aboutsummaryrefslogtreecommitdiff
path: root/llvm/test/CodeGen
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/test/CodeGen')
-rw-r--r--llvm/test/CodeGen/AArch64/GlobalISel/combine-fconstant.mir21
-rw-r--r--llvm/test/CodeGen/AArch64/GlobalISel/legalize-constant.mir5
-rw-r--r--llvm/test/CodeGen/AArch64/GlobalISel/legalize-fp16-fconstant.mir6
-rw-r--r--llvm/test/CodeGen/AArch64/arm64-indexed-memory.ll13
-rw-r--r--llvm/test/CodeGen/AArch64/arm64-saddlp1d-uaddlp1d.mir50
-rw-r--r--llvm/test/CodeGen/AArch64/arm64-vadd.ll5
-rw-r--r--llvm/test/CodeGen/AArch64/f16-instructions.ll21
-rw-r--r--llvm/test/CodeGen/AArch64/fcvt-fixed.ll112
-rw-r--r--llvm/test/CodeGen/AArch64/fdiv-combine.ll152
-rw-r--r--llvm/test/CodeGen/AArch64/vecreduce-fadd-strict.ll20
-rw-r--r--llvm/test/CodeGen/AArch64/vecreduce-fadd.ll42
-rw-r--r--llvm/test/CodeGen/AArch64/vecreduce-fmul-strict.ll12
-rw-r--r--llvm/test/CodeGen/AMDGPU/addrspacecast-constantexpr.ll4
-rw-r--r--llvm/test/CodeGen/AMDGPU/amdgpu-attributor-no-agpr.ll606
-rw-r--r--llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa-call.ll40
-rw-r--r--llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa.ll26
-rw-r--r--llvm/test/CodeGen/AMDGPU/annotate-kernel-features.ll18
-rw-r--r--llvm/test/CodeGen/AMDGPU/atomic_load_local.ll132
-rw-r--r--llvm/test/CodeGen/AMDGPU/atomic_store_local.ll196
-rw-r--r--llvm/test/CodeGen/AMDGPU/attributor-flatscratchinit-undefined-behavior.ll4
-rw-r--r--llvm/test/CodeGen/AMDGPU/attributor-flatscratchinit.ll38
-rw-r--r--llvm/test/CodeGen/AMDGPU/bf16.ll3693
-rw-r--r--llvm/test/CodeGen/AMDGPU/calling-conventions.ll6
-rw-r--r--llvm/test/CodeGen/AMDGPU/direct-indirect-call.ll2
-rw-r--r--llvm/test/CodeGen/AMDGPU/duplicate-attribute-indirect.ll2
-rw-r--r--llvm/test/CodeGen/AMDGPU/fptrunc.f16.ll738
-rw-r--r--llvm/test/CodeGen/AMDGPU/fsub.ll38
-rw-r--r--llvm/test/CodeGen/AMDGPU/implicitarg-offset-attributes.ll26
-rw-r--r--llvm/test/CodeGen/AMDGPU/indirect-call-set-from-other-function.ll2
-rw-r--r--llvm/test/CodeGen/AMDGPU/issue120256-annotate-constexpr-addrspacecast.ll4
-rw-r--r--llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.fp8.f16.ll24
-rw-r--r--llvm/test/CodeGen/AMDGPU/llvm.amdgcn.rcp.bf16.ll14
-rw-r--r--llvm/test/CodeGen/AMDGPU/llvm.amdgcn.rsq.bf16.ll6
-rw-r--r--llvm/test/CodeGen/AMDGPU/llvm.amdgcn.tanh.ll12
-rw-r--r--llvm/test/CodeGen/AMDGPU/min.ll92
-rw-r--r--llvm/test/CodeGen/AMDGPU/minmax.ll20
-rw-r--r--llvm/test/CodeGen/AMDGPU/propagate-flat-work-group-size.ll18
-rw-r--r--llvm/test/CodeGen/AMDGPU/propagate-waves-per-eu.ll42
-rw-r--r--llvm/test/CodeGen/AMDGPU/recursive_global_initializer.ll2
-rw-r--r--llvm/test/CodeGen/AMDGPU/remove-no-kernel-id-attribute.ll10
-rw-r--r--llvm/test/CodeGen/AMDGPU/select-fabs-fneg-extract.v2f16.ll4064
-rw-r--r--llvm/test/CodeGen/AMDGPU/simple-indirect-call-2.ll12
-rw-r--r--llvm/test/CodeGen/AMDGPU/simple-indirect-call.ll2
-rw-r--r--llvm/test/CodeGen/AMDGPU/uniform-work-group-attribute-missing.ll2
-rw-r--r--llvm/test/CodeGen/AMDGPU/uniform-work-group-multistep.ll4
-rw-r--r--llvm/test/CodeGen/AMDGPU/uniform-work-group-nested-function-calls.ll4
-rw-r--r--llvm/test/CodeGen/AMDGPU/uniform-work-group-prevent-attribute-propagation.ll4
-rw-r--r--llvm/test/CodeGen/AMDGPU/uniform-work-group-propagate-attribute.ll4
-rw-r--r--llvm/test/CodeGen/AMDGPU/uniform-work-group-recursion-test.ll6
-rw-r--r--llvm/test/CodeGen/AMDGPU/uniform-work-group-test.ll2
-rw-r--r--llvm/test/CodeGen/ARM/GlobalISel/arm-instruction-select-combos.mir16
-rw-r--r--llvm/test/CodeGen/ARM/GlobalISel/arm-select-copy_to_regclass-of-fptosi.mir2
-rw-r--r--llvm/test/CodeGen/ARM/GlobalISel/select-fp.mir388
-rw-r--r--llvm/test/CodeGen/ARM/GlobalISel/select-pr35926.mir2
-rw-r--r--llvm/test/CodeGen/ARM/bf16_fast_math.ll18
-rw-r--r--llvm/test/CodeGen/ARM/cortex-m7-wideops.mir17
-rw-r--r--llvm/test/CodeGen/ARM/fp16-litpool-arm.mir2
-rw-r--r--llvm/test/CodeGen/ARM/fp16-litpool-thumb.mir2
-rw-r--r--llvm/test/CodeGen/ARM/fp16-litpool2-arm.mir2
-rw-r--r--llvm/test/CodeGen/ARM/fp16-litpool3-arm.mir2
-rw-r--r--llvm/test/CodeGen/ARM/fp16_fast_math.ll86
-rw-r--r--llvm/test/CodeGen/ARM/ipra-reg-usage.ll2
-rw-r--r--llvm/test/CodeGen/ARM/llrint-conv.ll11
-rw-r--r--llvm/test/CodeGen/ARM/lrint-conv.ll48
-rw-r--r--llvm/test/CodeGen/ARM/misched-prevent-erase-history-of-subunits.mir4
-rw-r--r--llvm/test/CodeGen/ARM/vector-lrint.ll1301
-rw-r--r--llvm/test/CodeGen/ARM/vlldm-vlstm-uops.mir4
-rw-r--r--llvm/test/CodeGen/BPF/addr-space-memintrinsic-gep.ll60
-rw-r--r--llvm/test/CodeGen/BPF/addr-space-memintrinsic-no-gep.ll49
-rw-r--r--llvm/test/CodeGen/MIR2Vec/Inputs/mir2vec_dummy_2D_vocab.json11
-rw-r--r--llvm/test/CodeGen/MIR2Vec/Inputs/mir2vec_inconsistent_dims.json7
-rw-r--r--llvm/test/CodeGen/MIR2Vec/Inputs/mir2vec_invalid_vocab.json5
-rw-r--r--llvm/test/CodeGen/MIR2Vec/Inputs/mir2vec_zero_vocab.json12
-rw-r--r--llvm/test/CodeGen/MIR2Vec/Inputs/reference_x86_vocab_print.txt6882
-rw-r--r--llvm/test/CodeGen/MIR2Vec/Inputs/reference_x86_vocab_wo=0.5_print.txt6882
-rw-r--r--llvm/test/CodeGen/MIR2Vec/vocab-basic.ll14
-rw-r--r--llvm/test/CodeGen/MIR2Vec/vocab-error-handling.ll15
-rw-r--r--llvm/test/CodeGen/RISCV/GlobalISel/atomic-load-store-fp.ll118
-rw-r--r--llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/atomic-cmpxchg-rv32.mir8
-rw-r--r--llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/atomic-cmpxchg-rv64.mir10
-rw-r--r--llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/atomicrmw-add-sub-rv32.mir12
-rw-r--r--llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/atomicrmw-add-sub-rv64.mir16
-rw-r--r--llvm/test/CodeGen/RISCV/atomic-rmw.ll14130
-rw-r--r--llvm/test/CodeGen/RISCV/attributes.ll6
-rw-r--r--llvm/test/CodeGen/SPARC/atomicrmw-uinc-udec-wrap.ll24
-rw-r--r--llvm/test/CodeGen/SPARC/atomics-ordering.ll446
-rw-r--r--llvm/test/CodeGen/SPIRV/llc-pipeline.ll214
-rw-r--r--llvm/test/CodeGen/Thumb2/LowOverheadLoops/emptyblock.mir68
-rw-r--r--llvm/test/CodeGen/Thumb2/LowOverheadLoops/it-block-mov.mir16
-rw-r--r--llvm/test/CodeGen/Thumb2/LowOverheadLoops/lstp-insertion-position.mir12
-rw-r--r--llvm/test/CodeGen/Thumb2/LowOverheadLoops/mov-after-dlstp.mir8
-rw-r--r--llvm/test/CodeGen/Thumb2/pipeliner-inlineasm.mir16
-rw-r--r--llvm/test/CodeGen/Thumb2/scavenge-lr.mir16
-rw-r--r--llvm/test/CodeGen/Thumb2/swp-exitbranchdir.mir16
-rw-r--r--llvm/test/CodeGen/Thumb2/swp-fixedii-le.mir12
-rw-r--r--llvm/test/CodeGen/Thumb2/swp-fixedii.mir16
-rw-r--r--llvm/test/CodeGen/Thumb2/swp-regpressure.mir160
-rw-r--r--llvm/test/CodeGen/WebAssembly/simd-setcc-reductions.ll13
98 files changed, 32823 insertions, 8736 deletions
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/combine-fconstant.mir b/llvm/test/CodeGen/AArch64/GlobalISel/combine-fconstant.mir
index 6362ed6..9381f0f4 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/combine-fconstant.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/combine-fconstant.mir
@@ -1,11 +1,12 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-# RUN: llc -run-pass=aarch64-prelegalizer-combiner -verify-machineinstrs -mtriple aarch64-unknown-unknown %s -o - | FileCheck %s
-# RUN: llc -debugify-and-strip-all-safe -run-pass=aarch64-prelegalizer-combiner -verify-machineinstrs -mtriple aarch64-unknown-unknown %s -o - | FileCheck %s
+# RUN: llc -run-pass=aarch64-postlegalizer-lowering -verify-machineinstrs -mtriple aarch64-unknown-unknown %s -o - | FileCheck %s
+# RUN: llc -debugify-and-strip-all-safe -run-pass=aarch64-postlegalizer-lowering -verify-machineinstrs -mtriple aarch64-unknown-unknown %s -o - | FileCheck %s
...
---
name: fconstant_to_constant_s32
alignment: 4
tracksRegLiveness: true
+legalized: true
frameInfo:
maxAlignment: 1
machineFunctionInfo: {}
@@ -24,16 +25,17 @@ body: |
; CHECK-NEXT: G_STORE [[C]](s32), [[PTR_ADD]](p0) :: (store (s32))
; CHECK-NEXT: RET_ReallyLR
%0:_(p0) = COPY $x0
- %3:_(s32) = G_FCONSTANT float 0x3FA99999A0000000
- %1:_(s64) = G_CONSTANT i64 524
- %2:_(p0) = G_PTR_ADD %0, %1(s64)
- G_STORE %3(s32), %2(p0) :: (store (s32))
+ %1:_(s32) = G_FCONSTANT float 0x3FA99999A0000000
+ %2:_(s64) = G_CONSTANT i64 524
+ %3:_(p0) = G_PTR_ADD %0, %2(s64)
+ G_STORE %1(s32), %3(p0) :: (store (s32))
RET_ReallyLR
...
---
name: fconstant_to_constant_s64
alignment: 4
tracksRegLiveness: true
+legalized: true
frameInfo:
maxAlignment: 1
machineFunctionInfo: {}
@@ -48,7 +50,7 @@ body: |
; CHECK-NEXT: G_STORE %c(s64), %ptr(p0) :: (store (s64))
; CHECK-NEXT: RET_ReallyLR
%ptr:_(p0) = COPY $x0
- %c:_(s64) = G_FCONSTANT double 0.0
+ %c:_(s64) = G_FCONSTANT double 0.000000e+00
G_STORE %c(s64), %ptr(p0) :: (store (s64))
RET_ReallyLR
...
@@ -56,6 +58,7 @@ body: |
name: no_store_means_no_combine
alignment: 4
tracksRegLiveness: true
+legalized: true
frameInfo:
maxAlignment: 1
machineFunctionInfo: {}
@@ -71,7 +74,7 @@ body: |
; CHECK-NEXT: %add:_(s64) = G_FADD %v, %c
; CHECK-NEXT: RET_ReallyLR implicit %add(s64)
%v:_(s64) = COPY $x0
- %c:_(s64) = G_FCONSTANT double 0.0
+ %c:_(s64) = G_FCONSTANT double 0.000000e+00
%add:_(s64) = G_FADD %v, %c
- RET_ReallyLR implicit %add
+ RET_ReallyLR implicit %add(s64)
...
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-constant.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-constant.mir
index c301e76..c00ce22 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-constant.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-constant.mir
@@ -48,8 +48,9 @@ body: |
; CHECK-NEXT: $w0 = COPY [[C]](s32)
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_FCONSTANT double 2.000000e+00
; CHECK-NEXT: $x0 = COPY [[C1]](s64)
- ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
- ; CHECK-NEXT: $w0 = COPY [[C2]](s32)
+ ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s16) = G_FCONSTANT half 0xH0000
+ ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[C2]](s16)
+ ; CHECK-NEXT: $w0 = COPY [[ANYEXT]](s32)
%0:_(s32) = G_FCONSTANT float 1.0
$w0 = COPY %0
%1:_(s64) = G_FCONSTANT double 2.0
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-fp16-fconstant.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-fp16-fconstant.mir
index ddf219d..c6df345 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-fp16-fconstant.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-fp16-fconstant.mir
@@ -8,7 +8,7 @@ tracksRegLiveness: true
body: |
bb.0:
; NO-FP16-LABEL: name: fp16
- ; NO-FP16: %cst:_(s16) = G_CONSTANT i16 0
+ ; NO-FP16: %cst:_(s16) = G_FCONSTANT half 0xH0000
; NO-FP16-NEXT: $h0 = COPY %cst(s16)
; NO-FP16-NEXT: RET_ReallyLR implicit $h0
;
@@ -26,7 +26,7 @@ tracksRegLiveness: true
body: |
bb.0:
; NO-FP16-LABEL: name: fp16_non_zero
- ; NO-FP16: %cst:_(s16) = G_CONSTANT i16 16384
+ ; NO-FP16: %cst:_(s16) = G_FCONSTANT half 0xH4000
; NO-FP16-NEXT: $h0 = COPY %cst(s16)
; NO-FP16-NEXT: RET_ReallyLR implicit $h0
;
@@ -44,7 +44,7 @@ tracksRegLiveness: true
body: |
bb.1.entry:
; NO-FP16-LABEL: name: nan
- ; NO-FP16: %cst:_(s16) = G_CONSTANT i16 31745
+ ; NO-FP16: %cst:_(s16) = G_FCONSTANT half 0xH7C01
; NO-FP16-NEXT: %ext:_(s32) = G_FPEXT %cst(s16)
; NO-FP16-NEXT: $w0 = COPY %ext(s32)
; NO-FP16-NEXT: RET_ReallyLR implicit $w0
diff --git a/llvm/test/CodeGen/AArch64/arm64-indexed-memory.ll b/llvm/test/CodeGen/AArch64/arm64-indexed-memory.ll
index cb5df07..322a96a 100644
--- a/llvm/test/CodeGen/AArch64/arm64-indexed-memory.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-indexed-memory.ll
@@ -739,15 +739,14 @@ define ptr @postidx32_shalf(ptr %src, ptr %out, half %a) {
;
; GISEL-LABEL: postidx32_shalf:
; GISEL: ; %bb.0:
-; GISEL-NEXT: mov w8, #0 ; =0x0
-; GISEL-NEXT: ldr h1, [x0], #4
-; GISEL-NEXT: fmov s2, w8
+; GISEL-NEXT: movi d1, #0000000000000000
+; GISEL-NEXT: ldr h2, [x0], #4
; GISEL-NEXT: ; kill: def $h0 killed $h0 def $s0
; GISEL-NEXT: fmov w9, s0
-; GISEL-NEXT: fcvt s3, h1
-; GISEL-NEXT: fmov w8, s1
-; GISEL-NEXT: fcvt s2, h2
-; GISEL-NEXT: fcmp s3, s2
+; GISEL-NEXT: fcvt s3, h2
+; GISEL-NEXT: fmov w8, s2
+; GISEL-NEXT: fcvt s1, h1
+; GISEL-NEXT: fcmp s3, s1
; GISEL-NEXT: csel w8, w8, w9, mi
; GISEL-NEXT: strh w8, [x1]
; GISEL-NEXT: ret
diff --git a/llvm/test/CodeGen/AArch64/arm64-saddlp1d-uaddlp1d.mir b/llvm/test/CodeGen/AArch64/arm64-saddlp1d-uaddlp1d.mir
new file mode 100644
index 0000000..074f75a
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/arm64-saddlp1d-uaddlp1d.mir
@@ -0,0 +1,50 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 6
+# RUN: llc -mtriple=aarch64 -run-pass=regbankselect -verify-machineinstrs %s -o - | FileCheck %s
+
+---
+name: saddlp1d
+legalized: true
+regBankSelected: false
+tracksRegLiveness: true
+body: |
+ bb.1.entry:
+ liveins: $x0
+
+ ; CHECK-LABEL: name: saddlp1d
+ ; CHECK: liveins: $x0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr(p0) = COPY $x0
+ ; CHECK-NEXT: [[LOAD:%[0-9]+]]:fpr(<2 x s32>) = G_LOAD [[COPY]](p0) :: (load (<2 x s32>))
+ ; CHECK-NEXT: [[SADDLP:%[0-9]+]]:fpr(s64) = G_SADDLP [[LOAD]]
+ ; CHECK-NEXT: $d0 = COPY [[SADDLP]](s64)
+ ; CHECK-NEXT: RET_ReallyLR implicit $d0
+ %0:_(p0) = COPY $x0
+ %1:_(<2 x s32>) = G_LOAD %0(p0) :: (load (<2 x s32>))
+ %2:_(s64) = G_SADDLP %1
+ $d0 = COPY %2(s64)
+ RET_ReallyLR implicit $d0
+...
+---
+name: uaddlp1d
+legalized: true
+regBankSelected: false
+failedISel: false
+tracksRegLiveness: true
+body: |
+ bb.1.entry:
+ liveins: $x0
+
+ ; CHECK-LABEL: name: uaddlp1d
+ ; CHECK: liveins: $x0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr(p0) = COPY $x0
+ ; CHECK-NEXT: [[LOAD:%[0-9]+]]:fpr(<2 x s32>) = G_LOAD [[COPY]](p0) :: (load (<2 x s32>))
+ ; CHECK-NEXT: [[UADDLP:%[0-9]+]]:fpr(s64) = G_UADDLP [[LOAD]]
+ ; CHECK-NEXT: $d0 = COPY [[UADDLP]](s64)
+ ; CHECK-NEXT: RET_ReallyLR implicit $d0
+ %0:_(p0) = COPY $x0
+ %1:_(<2 x s32>) = G_LOAD %0(p0) :: (load (<2 x s32>))
+ %2:_(s64) = G_UADDLP %1
+ $d0 = COPY %2(s64)
+ RET_ReallyLR implicit $d0
+...
diff --git a/llvm/test/CodeGen/AArch64/arm64-vadd.ll b/llvm/test/CodeGen/AArch64/arm64-vadd.ll
index 938712a..3cf0115 100644
--- a/llvm/test/CodeGen/AArch64/arm64-vadd.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-vadd.ll
@@ -1,9 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
; RUN: llc < %s -mtriple=arm64-eabi | FileCheck %s --check-prefixes=CHECK,CHECK-SD
-; RUN: llc < %s -mtriple=arm64-eabi -global-isel -global-isel-abort=2 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI
-
-; CHECK-GI: warning: Instruction selection used fallback path for saddlp1d
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for uaddlp1d
+; RUN: llc < %s -mtriple=arm64-eabi -global-isel | FileCheck %s --check-prefixes=CHECK,CHECK-GI
define <8 x i8> @addhn8b(ptr %A, ptr %B) nounwind {
; CHECK-LABEL: addhn8b:
diff --git a/llvm/test/CodeGen/AArch64/f16-instructions.ll b/llvm/test/CodeGen/AArch64/f16-instructions.ll
index adc536d..b234ef7 100644
--- a/llvm/test/CodeGen/AArch64/f16-instructions.ll
+++ b/llvm/test/CodeGen/AArch64/f16-instructions.ll
@@ -782,18 +782,19 @@ define void @test_fccmp(half %in, ptr %out) {
;
; CHECK-CVT-GI-LABEL: test_fccmp:
; CHECK-CVT-GI: // %bb.0:
-; CHECK-CVT-GI-NEXT: mov w8, #17664 // =0x4500
-; CHECK-CVT-GI-NEXT: mov w9, #18432 // =0x4800
+; CHECK-CVT-GI-NEXT: adrp x8, .LCPI29_0
; CHECK-CVT-GI-NEXT: // kill: def $h0 killed $h0 def $s0
; CHECK-CVT-GI-NEXT: fcvt s2, h0
-; CHECK-CVT-GI-NEXT: fmov s1, w8
-; CHECK-CVT-GI-NEXT: fmov s3, w9
-; CHECK-CVT-GI-NEXT: fmov w9, s0
-; CHECK-CVT-GI-NEXT: fcvt s1, h1
-; CHECK-CVT-GI-NEXT: fcvt s3, h3
-; CHECK-CVT-GI-NEXT: fcmp s2, s1
-; CHECK-CVT-GI-NEXT: fccmp s2, s3, #4, mi
-; CHECK-CVT-GI-NEXT: csel w8, w9, w8, gt
+; CHECK-CVT-GI-NEXT: ldr h1, [x8, :lo12:.LCPI29_0]
+; CHECK-CVT-GI-NEXT: adrp x8, .LCPI29_1
+; CHECK-CVT-GI-NEXT: ldr h4, [x8, :lo12:.LCPI29_1]
+; CHECK-CVT-GI-NEXT: fmov w8, s0
+; CHECK-CVT-GI-NEXT: fcvt s3, h1
+; CHECK-CVT-GI-NEXT: fmov w9, s1
+; CHECK-CVT-GI-NEXT: fcvt s4, h4
+; CHECK-CVT-GI-NEXT: fcmp s2, s3
+; CHECK-CVT-GI-NEXT: fccmp s2, s4, #4, mi
+; CHECK-CVT-GI-NEXT: csel w8, w8, w9, gt
; CHECK-CVT-GI-NEXT: strh w8, [x0]
; CHECK-CVT-GI-NEXT: ret
;
diff --git a/llvm/test/CodeGen/AArch64/fcvt-fixed.ll b/llvm/test/CodeGen/AArch64/fcvt-fixed.ll
index 51aad4fe..7409bfb 100644
--- a/llvm/test/CodeGen/AArch64/fcvt-fixed.ll
+++ b/llvm/test/CodeGen/AArch64/fcvt-fixed.ll
@@ -166,9 +166,9 @@ define i32 @fcvtzs_f16_i32_7(half %flt) {
;
; CHECK-GI-NO16-LABEL: fcvtzs_f16_i32_7:
; CHECK-GI-NO16: // %bb.0:
-; CHECK-GI-NO16-NEXT: mov w8, #22528 // =0x5800
+; CHECK-GI-NO16-NEXT: adrp x8, .LCPI8_0
; CHECK-GI-NO16-NEXT: fcvt s0, h0
-; CHECK-GI-NO16-NEXT: fmov s1, w8
+; CHECK-GI-NO16-NEXT: ldr h1, [x8, :lo12:.LCPI8_0]
; CHECK-GI-NO16-NEXT: fcvt s1, h1
; CHECK-GI-NO16-NEXT: fmul s0, s0, s1
; CHECK-GI-NO16-NEXT: fcvt h0, s0
@@ -206,9 +206,9 @@ define i32 @fcvtzs_f16_i32_15(half %flt) {
;
; CHECK-GI-NO16-LABEL: fcvtzs_f16_i32_15:
; CHECK-GI-NO16: // %bb.0:
-; CHECK-GI-NO16-NEXT: mov w8, #30720 // =0x7800
+; CHECK-GI-NO16-NEXT: adrp x8, .LCPI9_0
; CHECK-GI-NO16-NEXT: fcvt s0, h0
-; CHECK-GI-NO16-NEXT: fmov s1, w8
+; CHECK-GI-NO16-NEXT: ldr h1, [x8, :lo12:.LCPI9_0]
; CHECK-GI-NO16-NEXT: fcvt s1, h1
; CHECK-GI-NO16-NEXT: fmul s0, s0, s1
; CHECK-GI-NO16-NEXT: fcvt h0, s0
@@ -246,9 +246,9 @@ define i64 @fcvtzs_f16_i64_7(half %flt) {
;
; CHECK-GI-NO16-LABEL: fcvtzs_f16_i64_7:
; CHECK-GI-NO16: // %bb.0:
-; CHECK-GI-NO16-NEXT: mov w8, #22528 // =0x5800
+; CHECK-GI-NO16-NEXT: adrp x8, .LCPI10_0
; CHECK-GI-NO16-NEXT: fcvt s0, h0
-; CHECK-GI-NO16-NEXT: fmov s1, w8
+; CHECK-GI-NO16-NEXT: ldr h1, [x8, :lo12:.LCPI10_0]
; CHECK-GI-NO16-NEXT: fcvt s1, h1
; CHECK-GI-NO16-NEXT: fmul s0, s0, s1
; CHECK-GI-NO16-NEXT: fcvt h0, s0
@@ -286,9 +286,9 @@ define i64 @fcvtzs_f16_i64_15(half %flt) {
;
; CHECK-GI-NO16-LABEL: fcvtzs_f16_i64_15:
; CHECK-GI-NO16: // %bb.0:
-; CHECK-GI-NO16-NEXT: mov w8, #30720 // =0x7800
+; CHECK-GI-NO16-NEXT: adrp x8, .LCPI11_0
; CHECK-GI-NO16-NEXT: fcvt s0, h0
-; CHECK-GI-NO16-NEXT: fmov s1, w8
+; CHECK-GI-NO16-NEXT: ldr h1, [x8, :lo12:.LCPI11_0]
; CHECK-GI-NO16-NEXT: fcvt s1, h1
; CHECK-GI-NO16-NEXT: fmul s0, s0, s1
; CHECK-GI-NO16-NEXT: fcvt h0, s0
@@ -470,9 +470,9 @@ define i32 @fcvtzu_f16_i32_7(half %flt) {
;
; CHECK-GI-NO16-LABEL: fcvtzu_f16_i32_7:
; CHECK-GI-NO16: // %bb.0:
-; CHECK-GI-NO16-NEXT: mov w8, #22528 // =0x5800
+; CHECK-GI-NO16-NEXT: adrp x8, .LCPI20_0
; CHECK-GI-NO16-NEXT: fcvt s0, h0
-; CHECK-GI-NO16-NEXT: fmov s1, w8
+; CHECK-GI-NO16-NEXT: ldr h1, [x8, :lo12:.LCPI20_0]
; CHECK-GI-NO16-NEXT: fcvt s1, h1
; CHECK-GI-NO16-NEXT: fmul s0, s0, s1
; CHECK-GI-NO16-NEXT: fcvt h0, s0
@@ -510,9 +510,9 @@ define i32 @fcvtzu_f16_i32_15(half %flt) {
;
; CHECK-GI-NO16-LABEL: fcvtzu_f16_i32_15:
; CHECK-GI-NO16: // %bb.0:
-; CHECK-GI-NO16-NEXT: mov w8, #30720 // =0x7800
+; CHECK-GI-NO16-NEXT: adrp x8, .LCPI21_0
; CHECK-GI-NO16-NEXT: fcvt s0, h0
-; CHECK-GI-NO16-NEXT: fmov s1, w8
+; CHECK-GI-NO16-NEXT: ldr h1, [x8, :lo12:.LCPI21_0]
; CHECK-GI-NO16-NEXT: fcvt s1, h1
; CHECK-GI-NO16-NEXT: fmul s0, s0, s1
; CHECK-GI-NO16-NEXT: fcvt h0, s0
@@ -550,9 +550,9 @@ define i64 @fcvtzu_f16_i64_7(half %flt) {
;
; CHECK-GI-NO16-LABEL: fcvtzu_f16_i64_7:
; CHECK-GI-NO16: // %bb.0:
-; CHECK-GI-NO16-NEXT: mov w8, #22528 // =0x5800
+; CHECK-GI-NO16-NEXT: adrp x8, .LCPI22_0
; CHECK-GI-NO16-NEXT: fcvt s0, h0
-; CHECK-GI-NO16-NEXT: fmov s1, w8
+; CHECK-GI-NO16-NEXT: ldr h1, [x8, :lo12:.LCPI22_0]
; CHECK-GI-NO16-NEXT: fcvt s1, h1
; CHECK-GI-NO16-NEXT: fmul s0, s0, s1
; CHECK-GI-NO16-NEXT: fcvt h0, s0
@@ -590,9 +590,9 @@ define i64 @fcvtzu_f16_i64_15(half %flt) {
;
; CHECK-GI-NO16-LABEL: fcvtzu_f16_i64_15:
; CHECK-GI-NO16: // %bb.0:
-; CHECK-GI-NO16-NEXT: mov w8, #30720 // =0x7800
+; CHECK-GI-NO16-NEXT: adrp x8, .LCPI23_0
; CHECK-GI-NO16-NEXT: fcvt s0, h0
-; CHECK-GI-NO16-NEXT: fmov s1, w8
+; CHECK-GI-NO16-NEXT: ldr h1, [x8, :lo12:.LCPI23_0]
; CHECK-GI-NO16-NEXT: fcvt s1, h1
; CHECK-GI-NO16-NEXT: fmul s0, s0, s1
; CHECK-GI-NO16-NEXT: fcvt h0, s0
@@ -775,10 +775,10 @@ define half @scvtf_f16_i32_7(i32 %int) {
; CHECK-GI-NO16-LABEL: scvtf_f16_i32_7:
; CHECK-GI-NO16: // %bb.0:
; CHECK-GI-NO16-NEXT: scvtf s0, w0
-; CHECK-GI-NO16-NEXT: mov w8, #22528 // =0x5800
-; CHECK-GI-NO16-NEXT: fmov s1, w8
-; CHECK-GI-NO16-NEXT: fcvt h0, s0
+; CHECK-GI-NO16-NEXT: adrp x8, .LCPI32_0
+; CHECK-GI-NO16-NEXT: ldr h1, [x8, :lo12:.LCPI32_0]
; CHECK-GI-NO16-NEXT: fcvt s1, h1
+; CHECK-GI-NO16-NEXT: fcvt h0, s0
; CHECK-GI-NO16-NEXT: fcvt s0, h0
; CHECK-GI-NO16-NEXT: fdiv s0, s0, s1
; CHECK-GI-NO16-NEXT: fcvt h0, s0
@@ -815,10 +815,10 @@ define half @scvtf_f16_i32_15(i32 %int) {
; CHECK-GI-NO16-LABEL: scvtf_f16_i32_15:
; CHECK-GI-NO16: // %bb.0:
; CHECK-GI-NO16-NEXT: scvtf s0, w0
-; CHECK-GI-NO16-NEXT: mov w8, #30720 // =0x7800
-; CHECK-GI-NO16-NEXT: fmov s1, w8
-; CHECK-GI-NO16-NEXT: fcvt h0, s0
+; CHECK-GI-NO16-NEXT: adrp x8, .LCPI33_0
+; CHECK-GI-NO16-NEXT: ldr h1, [x8, :lo12:.LCPI33_0]
; CHECK-GI-NO16-NEXT: fcvt s1, h1
+; CHECK-GI-NO16-NEXT: fcvt h0, s0
; CHECK-GI-NO16-NEXT: fcvt s0, h0
; CHECK-GI-NO16-NEXT: fdiv s0, s0, s1
; CHECK-GI-NO16-NEXT: fcvt h0, s0
@@ -855,10 +855,10 @@ define half @scvtf_f16_i64_7(i64 %long) {
; CHECK-GI-NO16-LABEL: scvtf_f16_i64_7:
; CHECK-GI-NO16: // %bb.0:
; CHECK-GI-NO16-NEXT: scvtf s0, x0
-; CHECK-GI-NO16-NEXT: mov w8, #22528 // =0x5800
-; CHECK-GI-NO16-NEXT: fmov s1, w8
-; CHECK-GI-NO16-NEXT: fcvt h0, s0
+; CHECK-GI-NO16-NEXT: adrp x8, .LCPI34_0
+; CHECK-GI-NO16-NEXT: ldr h1, [x8, :lo12:.LCPI34_0]
; CHECK-GI-NO16-NEXT: fcvt s1, h1
+; CHECK-GI-NO16-NEXT: fcvt h0, s0
; CHECK-GI-NO16-NEXT: fcvt s0, h0
; CHECK-GI-NO16-NEXT: fdiv s0, s0, s1
; CHECK-GI-NO16-NEXT: fcvt h0, s0
@@ -895,10 +895,10 @@ define half @scvtf_f16_i64_15(i64 %long) {
; CHECK-GI-NO16-LABEL: scvtf_f16_i64_15:
; CHECK-GI-NO16: // %bb.0:
; CHECK-GI-NO16-NEXT: scvtf s0, x0
-; CHECK-GI-NO16-NEXT: mov w8, #30720 // =0x7800
-; CHECK-GI-NO16-NEXT: fmov s1, w8
-; CHECK-GI-NO16-NEXT: fcvt h0, s0
+; CHECK-GI-NO16-NEXT: adrp x8, .LCPI35_0
+; CHECK-GI-NO16-NEXT: ldr h1, [x8, :lo12:.LCPI35_0]
; CHECK-GI-NO16-NEXT: fcvt s1, h1
+; CHECK-GI-NO16-NEXT: fcvt h0, s0
; CHECK-GI-NO16-NEXT: fcvt s0, h0
; CHECK-GI-NO16-NEXT: fdiv s0, s0, s1
; CHECK-GI-NO16-NEXT: fcvt h0, s0
@@ -1079,10 +1079,10 @@ define half @ucvtf_f16_i32_7(i32 %int) {
; CHECK-GI-NO16-LABEL: ucvtf_f16_i32_7:
; CHECK-GI-NO16: // %bb.0:
; CHECK-GI-NO16-NEXT: ucvtf s0, w0
-; CHECK-GI-NO16-NEXT: mov w8, #22528 // =0x5800
-; CHECK-GI-NO16-NEXT: fmov s1, w8
-; CHECK-GI-NO16-NEXT: fcvt h0, s0
+; CHECK-GI-NO16-NEXT: adrp x8, .LCPI44_0
+; CHECK-GI-NO16-NEXT: ldr h1, [x8, :lo12:.LCPI44_0]
; CHECK-GI-NO16-NEXT: fcvt s1, h1
+; CHECK-GI-NO16-NEXT: fcvt h0, s0
; CHECK-GI-NO16-NEXT: fcvt s0, h0
; CHECK-GI-NO16-NEXT: fdiv s0, s0, s1
; CHECK-GI-NO16-NEXT: fcvt h0, s0
@@ -1119,10 +1119,10 @@ define half @ucvtf_f16_i32_15(i32 %int) {
; CHECK-GI-NO16-LABEL: ucvtf_f16_i32_15:
; CHECK-GI-NO16: // %bb.0:
; CHECK-GI-NO16-NEXT: ucvtf s0, w0
-; CHECK-GI-NO16-NEXT: mov w8, #30720 // =0x7800
-; CHECK-GI-NO16-NEXT: fmov s1, w8
-; CHECK-GI-NO16-NEXT: fcvt h0, s0
+; CHECK-GI-NO16-NEXT: adrp x8, .LCPI45_0
+; CHECK-GI-NO16-NEXT: ldr h1, [x8, :lo12:.LCPI45_0]
; CHECK-GI-NO16-NEXT: fcvt s1, h1
+; CHECK-GI-NO16-NEXT: fcvt h0, s0
; CHECK-GI-NO16-NEXT: fcvt s0, h0
; CHECK-GI-NO16-NEXT: fdiv s0, s0, s1
; CHECK-GI-NO16-NEXT: fcvt h0, s0
@@ -1159,10 +1159,10 @@ define half @ucvtf_f16_i64_7(i64 %long) {
; CHECK-GI-NO16-LABEL: ucvtf_f16_i64_7:
; CHECK-GI-NO16: // %bb.0:
; CHECK-GI-NO16-NEXT: ucvtf s0, x0
-; CHECK-GI-NO16-NEXT: mov w8, #22528 // =0x5800
-; CHECK-GI-NO16-NEXT: fmov s1, w8
-; CHECK-GI-NO16-NEXT: fcvt h0, s0
+; CHECK-GI-NO16-NEXT: adrp x8, .LCPI46_0
+; CHECK-GI-NO16-NEXT: ldr h1, [x8, :lo12:.LCPI46_0]
; CHECK-GI-NO16-NEXT: fcvt s1, h1
+; CHECK-GI-NO16-NEXT: fcvt h0, s0
; CHECK-GI-NO16-NEXT: fcvt s0, h0
; CHECK-GI-NO16-NEXT: fdiv s0, s0, s1
; CHECK-GI-NO16-NEXT: fcvt h0, s0
@@ -1199,10 +1199,10 @@ define half @ucvtf_f16_i64_15(i64 %long) {
; CHECK-GI-NO16-LABEL: ucvtf_f16_i64_15:
; CHECK-GI-NO16: // %bb.0:
; CHECK-GI-NO16-NEXT: ucvtf s0, x0
-; CHECK-GI-NO16-NEXT: mov w8, #30720 // =0x7800
-; CHECK-GI-NO16-NEXT: fmov s1, w8
-; CHECK-GI-NO16-NEXT: fcvt h0, s0
+; CHECK-GI-NO16-NEXT: adrp x8, .LCPI47_0
+; CHECK-GI-NO16-NEXT: ldr h1, [x8, :lo12:.LCPI47_0]
; CHECK-GI-NO16-NEXT: fcvt s1, h1
+; CHECK-GI-NO16-NEXT: fcvt h0, s0
; CHECK-GI-NO16-NEXT: fcvt s0, h0
; CHECK-GI-NO16-NEXT: fdiv s0, s0, s1
; CHECK-GI-NO16-NEXT: fcvt h0, s0
@@ -1373,9 +1373,9 @@ define i32 @fcvtzs_sat_f16_i32_7(half %dbl) {
;
; CHECK-GI-NO16-LABEL: fcvtzs_sat_f16_i32_7:
; CHECK-GI-NO16: // %bb.0:
-; CHECK-GI-NO16-NEXT: mov w8, #22528 // =0x5800
+; CHECK-GI-NO16-NEXT: adrp x8, .LCPI55_0
; CHECK-GI-NO16-NEXT: fcvt s0, h0
-; CHECK-GI-NO16-NEXT: fmov s1, w8
+; CHECK-GI-NO16-NEXT: ldr h1, [x8, :lo12:.LCPI55_0]
; CHECK-GI-NO16-NEXT: fcvt s1, h1
; CHECK-GI-NO16-NEXT: fmul s0, s0, s1
; CHECK-GI-NO16-NEXT: fcvt h0, s0
@@ -1413,9 +1413,9 @@ define i32 @fcvtzs_sat_f16_i32_15(half %dbl) {
;
; CHECK-GI-NO16-LABEL: fcvtzs_sat_f16_i32_15:
; CHECK-GI-NO16: // %bb.0:
-; CHECK-GI-NO16-NEXT: mov w8, #30720 // =0x7800
+; CHECK-GI-NO16-NEXT: adrp x8, .LCPI56_0
; CHECK-GI-NO16-NEXT: fcvt s0, h0
-; CHECK-GI-NO16-NEXT: fmov s1, w8
+; CHECK-GI-NO16-NEXT: ldr h1, [x8, :lo12:.LCPI56_0]
; CHECK-GI-NO16-NEXT: fcvt s1, h1
; CHECK-GI-NO16-NEXT: fmul s0, s0, s1
; CHECK-GI-NO16-NEXT: fcvt h0, s0
@@ -1453,9 +1453,9 @@ define i64 @fcvtzs_sat_f16_i64_7(half %dbl) {
;
; CHECK-GI-NO16-LABEL: fcvtzs_sat_f16_i64_7:
; CHECK-GI-NO16: // %bb.0:
-; CHECK-GI-NO16-NEXT: mov w8, #22528 // =0x5800
+; CHECK-GI-NO16-NEXT: adrp x8, .LCPI57_0
; CHECK-GI-NO16-NEXT: fcvt s0, h0
-; CHECK-GI-NO16-NEXT: fmov s1, w8
+; CHECK-GI-NO16-NEXT: ldr h1, [x8, :lo12:.LCPI57_0]
; CHECK-GI-NO16-NEXT: fcvt s1, h1
; CHECK-GI-NO16-NEXT: fmul s0, s0, s1
; CHECK-GI-NO16-NEXT: fcvt h0, s0
@@ -1493,9 +1493,9 @@ define i64 @fcvtzs_sat_f16_i64_15(half %dbl) {
;
; CHECK-GI-NO16-LABEL: fcvtzs_sat_f16_i64_15:
; CHECK-GI-NO16: // %bb.0:
-; CHECK-GI-NO16-NEXT: mov w8, #30720 // =0x7800
+; CHECK-GI-NO16-NEXT: adrp x8, .LCPI58_0
; CHECK-GI-NO16-NEXT: fcvt s0, h0
-; CHECK-GI-NO16-NEXT: fmov s1, w8
+; CHECK-GI-NO16-NEXT: ldr h1, [x8, :lo12:.LCPI58_0]
; CHECK-GI-NO16-NEXT: fcvt s1, h1
; CHECK-GI-NO16-NEXT: fmul s0, s0, s1
; CHECK-GI-NO16-NEXT: fcvt h0, s0
@@ -1667,9 +1667,9 @@ define i32 @fcvtzu_sat_f16_i32_7(half %dbl) {
;
; CHECK-GI-NO16-LABEL: fcvtzu_sat_f16_i32_7:
; CHECK-GI-NO16: // %bb.0:
-; CHECK-GI-NO16-NEXT: mov w8, #22528 // =0x5800
+; CHECK-GI-NO16-NEXT: adrp x8, .LCPI66_0
; CHECK-GI-NO16-NEXT: fcvt s0, h0
-; CHECK-GI-NO16-NEXT: fmov s1, w8
+; CHECK-GI-NO16-NEXT: ldr h1, [x8, :lo12:.LCPI66_0]
; CHECK-GI-NO16-NEXT: fcvt s1, h1
; CHECK-GI-NO16-NEXT: fmul s0, s0, s1
; CHECK-GI-NO16-NEXT: fcvt h0, s0
@@ -1707,9 +1707,9 @@ define i32 @fcvtzu_sat_f16_i32_15(half %dbl) {
;
; CHECK-GI-NO16-LABEL: fcvtzu_sat_f16_i32_15:
; CHECK-GI-NO16: // %bb.0:
-; CHECK-GI-NO16-NEXT: mov w8, #30720 // =0x7800
+; CHECK-GI-NO16-NEXT: adrp x8, .LCPI67_0
; CHECK-GI-NO16-NEXT: fcvt s0, h0
-; CHECK-GI-NO16-NEXT: fmov s1, w8
+; CHECK-GI-NO16-NEXT: ldr h1, [x8, :lo12:.LCPI67_0]
; CHECK-GI-NO16-NEXT: fcvt s1, h1
; CHECK-GI-NO16-NEXT: fmul s0, s0, s1
; CHECK-GI-NO16-NEXT: fcvt h0, s0
@@ -1747,9 +1747,9 @@ define i64 @fcvtzu_sat_f16_i64_7(half %dbl) {
;
; CHECK-GI-NO16-LABEL: fcvtzu_sat_f16_i64_7:
; CHECK-GI-NO16: // %bb.0:
-; CHECK-GI-NO16-NEXT: mov w8, #22528 // =0x5800
+; CHECK-GI-NO16-NEXT: adrp x8, .LCPI68_0
; CHECK-GI-NO16-NEXT: fcvt s0, h0
-; CHECK-GI-NO16-NEXT: fmov s1, w8
+; CHECK-GI-NO16-NEXT: ldr h1, [x8, :lo12:.LCPI68_0]
; CHECK-GI-NO16-NEXT: fcvt s1, h1
; CHECK-GI-NO16-NEXT: fmul s0, s0, s1
; CHECK-GI-NO16-NEXT: fcvt h0, s0
@@ -1787,9 +1787,9 @@ define i64 @fcvtzu_sat_f16_i64_15(half %dbl) {
;
; CHECK-GI-NO16-LABEL: fcvtzu_sat_f16_i64_15:
; CHECK-GI-NO16: // %bb.0:
-; CHECK-GI-NO16-NEXT: mov w8, #30720 // =0x7800
+; CHECK-GI-NO16-NEXT: adrp x8, .LCPI69_0
; CHECK-GI-NO16-NEXT: fcvt s0, h0
-; CHECK-GI-NO16-NEXT: fmov s1, w8
+; CHECK-GI-NO16-NEXT: ldr h1, [x8, :lo12:.LCPI69_0]
; CHECK-GI-NO16-NEXT: fcvt s1, h1
; CHECK-GI-NO16-NEXT: fmul s0, s0, s1
; CHECK-GI-NO16-NEXT: fcvt h0, s0
diff --git a/llvm/test/CodeGen/AArch64/fdiv-combine.ll b/llvm/test/CodeGen/AArch64/fdiv-combine.ll
index 91bb8ac..9eacb61 100644
--- a/llvm/test/CodeGen/AArch64/fdiv-combine.ll
+++ b/llvm/test/CodeGen/AArch64/fdiv-combine.ll
@@ -12,22 +12,14 @@
; =>
; recip = 1.0 / D; a * recip; b * recip; c * recip;
define void @three_fdiv_float(float %D, float %a, float %b, float %c) {
-; CHECK-SD-LABEL: three_fdiv_float:
-; CHECK-SD: // %bb.0:
-; CHECK-SD-NEXT: fmov s4, #1.00000000
-; CHECK-SD-NEXT: fdiv s4, s4, s0
-; CHECK-SD-NEXT: fmul s0, s1, s4
-; CHECK-SD-NEXT: fmul s1, s2, s4
-; CHECK-SD-NEXT: fmul s2, s3, s4
-; CHECK-SD-NEXT: b foo_3f
-;
-; CHECK-GI-LABEL: three_fdiv_float:
-; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: fdiv s4, s1, s0
-; CHECK-GI-NEXT: fdiv s1, s2, s0
-; CHECK-GI-NEXT: fdiv s2, s3, s0
-; CHECK-GI-NEXT: fmov s0, s4
-; CHECK-GI-NEXT: b foo_3f
+; CHECK-LABEL: three_fdiv_float:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fmov s4, #1.00000000
+; CHECK-NEXT: fdiv s4, s4, s0
+; CHECK-NEXT: fmul s0, s1, s4
+; CHECK-NEXT: fmul s1, s2, s4
+; CHECK-NEXT: fmul s2, s3, s4
+; CHECK-NEXT: b foo_3f
%div = fdiv arcp float %a, %D
%div1 = fdiv arcp float %b, %D
%div2 = fdiv arcp float %c, %D
@@ -36,22 +28,14 @@ define void @three_fdiv_float(float %D, float %a, float %b, float %c) {
}
define void @three_fdiv_double(double %D, double %a, double %b, double %c) {
-; CHECK-SD-LABEL: three_fdiv_double:
-; CHECK-SD: // %bb.0:
-; CHECK-SD-NEXT: fmov d4, #1.00000000
-; CHECK-SD-NEXT: fdiv d4, d4, d0
-; CHECK-SD-NEXT: fmul d0, d1, d4
-; CHECK-SD-NEXT: fmul d1, d2, d4
-; CHECK-SD-NEXT: fmul d2, d3, d4
-; CHECK-SD-NEXT: b foo_3d
-;
-; CHECK-GI-LABEL: three_fdiv_double:
-; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: fdiv d4, d1, d0
-; CHECK-GI-NEXT: fdiv d1, d2, d0
-; CHECK-GI-NEXT: fdiv d2, d3, d0
-; CHECK-GI-NEXT: fmov d0, d4
-; CHECK-GI-NEXT: b foo_3d
+; CHECK-LABEL: three_fdiv_double:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fmov d4, #1.00000000
+; CHECK-NEXT: fdiv d4, d4, d0
+; CHECK-NEXT: fmul d0, d1, d4
+; CHECK-NEXT: fmul d1, d2, d4
+; CHECK-NEXT: fmul d2, d3, d4
+; CHECK-NEXT: b foo_3d
%div = fdiv arcp double %a, %D
%div1 = fdiv arcp double %b, %D
%div2 = fdiv arcp double %c, %D
@@ -60,22 +44,14 @@ define void @three_fdiv_double(double %D, double %a, double %b, double %c) {
}
define void @three_fdiv_4xfloat(<4 x float> %D, <4 x float> %a, <4 x float> %b, <4 x float> %c) {
-; CHECK-SD-LABEL: three_fdiv_4xfloat:
-; CHECK-SD: // %bb.0:
-; CHECK-SD-NEXT: fmov v4.4s, #1.00000000
-; CHECK-SD-NEXT: fdiv v4.4s, v4.4s, v0.4s
-; CHECK-SD-NEXT: fmul v0.4s, v1.4s, v4.4s
-; CHECK-SD-NEXT: fmul v1.4s, v2.4s, v4.4s
-; CHECK-SD-NEXT: fmul v2.4s, v3.4s, v4.4s
-; CHECK-SD-NEXT: b foo_3_4xf
-;
-; CHECK-GI-LABEL: three_fdiv_4xfloat:
-; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: fdiv v4.4s, v1.4s, v0.4s
-; CHECK-GI-NEXT: fdiv v1.4s, v2.4s, v0.4s
-; CHECK-GI-NEXT: fdiv v2.4s, v3.4s, v0.4s
-; CHECK-GI-NEXT: mov v0.16b, v4.16b
-; CHECK-GI-NEXT: b foo_3_4xf
+; CHECK-LABEL: three_fdiv_4xfloat:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fmov v4.4s, #1.00000000
+; CHECK-NEXT: fdiv v4.4s, v4.4s, v0.4s
+; CHECK-NEXT: fmul v0.4s, v1.4s, v4.4s
+; CHECK-NEXT: fmul v1.4s, v2.4s, v4.4s
+; CHECK-NEXT: fmul v2.4s, v3.4s, v4.4s
+; CHECK-NEXT: b foo_3_4xf
%div = fdiv arcp <4 x float> %a, %D
%div1 = fdiv arcp <4 x float> %b, %D
%div2 = fdiv arcp <4 x float> %c, %D
@@ -84,22 +60,14 @@ define void @three_fdiv_4xfloat(<4 x float> %D, <4 x float> %a, <4 x float> %b,
}
define void @three_fdiv_2xdouble(<2 x double> %D, <2 x double> %a, <2 x double> %b, <2 x double> %c) {
-; CHECK-SD-LABEL: three_fdiv_2xdouble:
-; CHECK-SD: // %bb.0:
-; CHECK-SD-NEXT: fmov v4.2d, #1.00000000
-; CHECK-SD-NEXT: fdiv v4.2d, v4.2d, v0.2d
-; CHECK-SD-NEXT: fmul v0.2d, v1.2d, v4.2d
-; CHECK-SD-NEXT: fmul v1.2d, v2.2d, v4.2d
-; CHECK-SD-NEXT: fmul v2.2d, v3.2d, v4.2d
-; CHECK-SD-NEXT: b foo_3_2xd
-;
-; CHECK-GI-LABEL: three_fdiv_2xdouble:
-; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: fdiv v4.2d, v1.2d, v0.2d
-; CHECK-GI-NEXT: fdiv v1.2d, v2.2d, v0.2d
-; CHECK-GI-NEXT: fdiv v2.2d, v3.2d, v0.2d
-; CHECK-GI-NEXT: mov v0.16b, v4.16b
-; CHECK-GI-NEXT: b foo_3_2xd
+; CHECK-LABEL: three_fdiv_2xdouble:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fmov v4.2d, #1.00000000
+; CHECK-NEXT: fdiv v4.2d, v4.2d, v0.2d
+; CHECK-NEXT: fmul v0.2d, v1.2d, v4.2d
+; CHECK-NEXT: fmul v1.2d, v2.2d, v4.2d
+; CHECK-NEXT: fmul v2.2d, v3.2d, v4.2d
+; CHECK-NEXT: b foo_3_2xd
%div = fdiv arcp <2 x double> %a, %D
%div1 = fdiv arcp <2 x double> %b, %D
%div2 = fdiv arcp <2 x double> %c, %D
@@ -135,26 +103,47 @@ define void @two_fdiv_double(double %D, double %a, double %b) {
ret void
}
-define void @splat_three_fdiv_4xfloat(float %D, <4 x float> %a, <4 x float> %b, <4 x float> %c) {
-; CHECK-SD-LABEL: splat_three_fdiv_4xfloat:
+define void @four_fdiv_multi_float(float %D, float %a, float %b, float %c) #0 {
+; CHECK-SD-LABEL: four_fdiv_multi_float:
; CHECK-SD: // %bb.0:
-; CHECK-SD-NEXT: // kill: def $s0 killed $s0 def $q0
-; CHECK-SD-NEXT: fmov v4.4s, #1.00000000
-; CHECK-SD-NEXT: dup v0.4s, v0.s[0]
-; CHECK-SD-NEXT: fdiv v4.4s, v4.4s, v0.4s
-; CHECK-SD-NEXT: fmul v0.4s, v1.4s, v4.4s
-; CHECK-SD-NEXT: fmul v1.4s, v2.4s, v4.4s
-; CHECK-SD-NEXT: fmul v2.4s, v3.4s, v4.4s
-; CHECK-SD-NEXT: b foo_3_4xf
+; CHECK-SD-NEXT: fmov s4, #1.00000000
+; CHECK-SD-NEXT: fdiv s5, s4, s0
+; CHECK-SD-NEXT: fmul s4, s1, s5
+; CHECK-SD-NEXT: fmul s1, s2, s5
+; CHECK-SD-NEXT: fmul s2, s3, s5
+; CHECK-SD-NEXT: fmul s3, s0, s5
+; CHECK-SD-NEXT: fmov s0, s4
+; CHECK-SD-NEXT: b foo_4f
;
-; CHECK-GI-LABEL: splat_three_fdiv_4xfloat:
+; CHECK-GI-LABEL: four_fdiv_multi_float:
; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: // kill: def $s0 killed $s0 def $q0
-; CHECK-GI-NEXT: dup v4.4s, v0.s[0]
-; CHECK-GI-NEXT: fdiv v0.4s, v1.4s, v4.4s
-; CHECK-GI-NEXT: fdiv v1.4s, v2.4s, v4.4s
-; CHECK-GI-NEXT: fdiv v2.4s, v3.4s, v4.4s
-; CHECK-GI-NEXT: b foo_3_4xf
+; CHECK-GI-NEXT: fmov s4, #1.00000000
+; CHECK-GI-NEXT: fdiv s5, s4, s0
+; CHECK-GI-NEXT: fdiv s4, s0, s0
+; CHECK-GI-NEXT: fmul s0, s1, s5
+; CHECK-GI-NEXT: fmul s1, s2, s5
+; CHECK-GI-NEXT: fmul s2, s3, s5
+; CHECK-GI-NEXT: fmov s3, s4
+; CHECK-GI-NEXT: b foo_4f
+ %div = fdiv arcp float %a, %D
+ %div1 = fdiv arcp float %b, %D
+ %div2 = fdiv arcp float %c, %D
+ %div3 = fdiv arcp float %D, %D
+ tail call void @foo_4f(float %div, float %div1, float %div2, float %div3)
+ ret void
+}
+
+define void @splat_three_fdiv_4xfloat(float %D, <4 x float> %a, <4 x float> %b, <4 x float> %c) {
+; CHECK-LABEL: splat_three_fdiv_4xfloat:
+; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $s0 killed $s0 def $q0
+; CHECK-NEXT: fmov v4.4s, #1.00000000
+; CHECK-NEXT: dup v0.4s, v0.s[0]
+; CHECK-NEXT: fdiv v4.4s, v4.4s, v0.4s
+; CHECK-NEXT: fmul v0.4s, v1.4s, v4.4s
+; CHECK-NEXT: fmul v1.4s, v2.4s, v4.4s
+; CHECK-NEXT: fmul v2.4s, v3.4s, v4.4s
+; CHECK-NEXT: b foo_3_4xf
%D.ins = insertelement <4 x float> poison, float %D, i64 0
%splat = shufflevector <4 x float> %D.ins, <4 x float> poison, <4 x i32> zeroinitializer
%div = fdiv arcp <4 x float> %a, %splat
@@ -256,6 +245,7 @@ entry:
}
declare void @foo_3f(float, float, float)
+declare void @foo_4f(float, float, float, float)
declare void @foo_3d(double, double, double)
declare void @foo_3_4xf(<4 x float>, <4 x float>, <4 x float>)
declare void @foo_3_2xd(<2 x double>, <2 x double>, <2 x double>)
diff --git a/llvm/test/CodeGen/AArch64/vecreduce-fadd-strict.ll b/llvm/test/CodeGen/AArch64/vecreduce-fadd-strict.ll
index 594a3ab..be07978 100644
--- a/llvm/test/CodeGen/AArch64/vecreduce-fadd-strict.ll
+++ b/llvm/test/CodeGen/AArch64/vecreduce-fadd-strict.ll
@@ -38,10 +38,10 @@ define half @add_v2HalfH(<2 x half> %bin.rdx) {
;
; CHECK-GI-NOFP16-LABEL: add_v2HalfH:
; CHECK-GI-NOFP16: // %bb.0:
-; CHECK-GI-NOFP16-NEXT: mov w8, #32768 // =0x8000
+; CHECK-GI-NOFP16-NEXT: adrp x8, .LCPI1_0
; CHECK-GI-NOFP16-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-GI-NOFP16-NEXT: fcvt s2, h0
-; CHECK-GI-NOFP16-NEXT: fmov s1, w8
+; CHECK-GI-NOFP16-NEXT: ldr h1, [x8, :lo12:.LCPI1_0]
; CHECK-GI-NOFP16-NEXT: mov h0, v0.h[1]
; CHECK-GI-NOFP16-NEXT: fcvt s1, h1
; CHECK-GI-NOFP16-NEXT: fcvt s0, h0
@@ -88,10 +88,10 @@ define half @add_v3HalfH(<3 x half> %bin.rdx) {
;
; CHECK-GI-NOFP16-LABEL: add_v3HalfH:
; CHECK-GI-NOFP16: // %bb.0:
-; CHECK-GI-NOFP16-NEXT: mov w8, #32768 // =0x8000
+; CHECK-GI-NOFP16-NEXT: adrp x8, .LCPI2_0
; CHECK-GI-NOFP16-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-GI-NOFP16-NEXT: fcvt s2, h0
-; CHECK-GI-NOFP16-NEXT: fmov s1, w8
+; CHECK-GI-NOFP16-NEXT: ldr h1, [x8, :lo12:.LCPI2_0]
; CHECK-GI-NOFP16-NEXT: fcvt s1, h1
; CHECK-GI-NOFP16-NEXT: fadd s1, s1, s2
; CHECK-GI-NOFP16-NEXT: mov h2, v0.h[1]
@@ -152,10 +152,10 @@ define half @add_HalfH(<4 x half> %bin.rdx) {
;
; CHECK-GI-NOFP16-LABEL: add_HalfH:
; CHECK-GI-NOFP16: // %bb.0:
-; CHECK-GI-NOFP16-NEXT: mov w8, #32768 // =0x8000
+; CHECK-GI-NOFP16-NEXT: adrp x8, .LCPI3_0
; CHECK-GI-NOFP16-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-GI-NOFP16-NEXT: fcvt s2, h0
-; CHECK-GI-NOFP16-NEXT: fmov s1, w8
+; CHECK-GI-NOFP16-NEXT: ldr h1, [x8, :lo12:.LCPI3_0]
; CHECK-GI-NOFP16-NEXT: fcvt s1, h1
; CHECK-GI-NOFP16-NEXT: fadd s1, s1, s2
; CHECK-GI-NOFP16-NEXT: mov h2, v0.h[1]
@@ -250,9 +250,9 @@ define half @add_H(<8 x half> %bin.rdx) {
;
; CHECK-GI-NOFP16-LABEL: add_H:
; CHECK-GI-NOFP16: // %bb.0:
-; CHECK-GI-NOFP16-NEXT: mov w8, #32768 // =0x8000
+; CHECK-GI-NOFP16-NEXT: adrp x8, .LCPI4_0
; CHECK-GI-NOFP16-NEXT: fcvt s2, h0
-; CHECK-GI-NOFP16-NEXT: fmov s1, w8
+; CHECK-GI-NOFP16-NEXT: ldr h1, [x8, :lo12:.LCPI4_0]
; CHECK-GI-NOFP16-NEXT: fcvt s1, h1
; CHECK-GI-NOFP16-NEXT: fadd s1, s1, s2
; CHECK-GI-NOFP16-NEXT: mov h2, v0.h[1]
@@ -448,9 +448,9 @@ define half @add_2H(<16 x half> %bin.rdx) {
;
; CHECK-GI-NOFP16-LABEL: add_2H:
; CHECK-GI-NOFP16: // %bb.0:
-; CHECK-GI-NOFP16-NEXT: mov w8, #32768 // =0x8000
+; CHECK-GI-NOFP16-NEXT: adrp x8, .LCPI7_0
; CHECK-GI-NOFP16-NEXT: fcvt s3, h0
-; CHECK-GI-NOFP16-NEXT: fmov s2, w8
+; CHECK-GI-NOFP16-NEXT: ldr h2, [x8, :lo12:.LCPI7_0]
; CHECK-GI-NOFP16-NEXT: fcvt s2, h2
; CHECK-GI-NOFP16-NEXT: fadd s2, s2, s3
; CHECK-GI-NOFP16-NEXT: mov h3, v0.h[1]
diff --git a/llvm/test/CodeGen/AArch64/vecreduce-fadd.ll b/llvm/test/CodeGen/AArch64/vecreduce-fadd.ll
index 18f463c..40925da 100644
--- a/llvm/test/CodeGen/AArch64/vecreduce-fadd.ll
+++ b/llvm/test/CodeGen/AArch64/vecreduce-fadd.ll
@@ -405,26 +405,23 @@ define half @fadd_reduction_v4f16_in_loop(ptr %ptr.start) {
;
; CHECK-GI-NOFP16-LABEL: fadd_reduction_v4f16_in_loop:
; CHECK-GI-NOFP16: // %bb.0: // %entry
+; CHECK-GI-NOFP16-NEXT: movi d0, #0000000000000000
; CHECK-GI-NOFP16-NEXT: mov x8, xzr
-; CHECK-GI-NOFP16-NEXT: mov w9, #0 // =0x0
; CHECK-GI-NOFP16-NEXT: .LBB13_1: // %loop
; CHECK-GI-NOFP16-NEXT: // =>This Inner Loop Header: Depth=1
-; CHECK-GI-NOFP16-NEXT: ldr d0, [x0, x8]
-; CHECK-GI-NOFP16-NEXT: fmov s1, w9
+; CHECK-GI-NOFP16-NEXT: ldr d1, [x0, x8]
+; CHECK-GI-NOFP16-NEXT: fcvt s0, h0
; CHECK-GI-NOFP16-NEXT: add x8, x8, #8
; CHECK-GI-NOFP16-NEXT: cmp w8, #56
-; CHECK-GI-NOFP16-NEXT: fcvtl v0.4s, v0.4h
+; CHECK-GI-NOFP16-NEXT: fcvtl v1.4s, v1.4h
+; CHECK-GI-NOFP16-NEXT: faddp v1.4s, v1.4s, v1.4s
+; CHECK-GI-NOFP16-NEXT: faddp s1, v1.2s
+; CHECK-GI-NOFP16-NEXT: fcvt h1, s1
; CHECK-GI-NOFP16-NEXT: fcvt s1, h1
-; CHECK-GI-NOFP16-NEXT: faddp v0.4s, v0.4s, v0.4s
-; CHECK-GI-NOFP16-NEXT: faddp s0, v0.2s
-; CHECK-GI-NOFP16-NEXT: fcvt h0, s0
-; CHECK-GI-NOFP16-NEXT: fcvt s0, h0
-; CHECK-GI-NOFP16-NEXT: fadd s0, s0, s1
+; CHECK-GI-NOFP16-NEXT: fadd s0, s1, s0
; CHECK-GI-NOFP16-NEXT: fcvt h0, s0
-; CHECK-GI-NOFP16-NEXT: fmov w9, s0
; CHECK-GI-NOFP16-NEXT: b.ne .LBB13_1
; CHECK-GI-NOFP16-NEXT: // %bb.2: // %exit
-; CHECK-GI-NOFP16-NEXT: // kill: def $h0 killed $h0 killed $s0
; CHECK-GI-NOFP16-NEXT: ret
;
; CHECK-GI-FP16-LABEL: fadd_reduction_v4f16_in_loop:
@@ -521,28 +518,25 @@ define half @fadd_reduction_v8f16_in_loop(ptr %ptr.start) {
;
; CHECK-GI-NOFP16-LABEL: fadd_reduction_v8f16_in_loop:
; CHECK-GI-NOFP16: // %bb.0: // %entry
+; CHECK-GI-NOFP16-NEXT: movi d0, #0000000000000000
; CHECK-GI-NOFP16-NEXT: mov x8, xzr
-; CHECK-GI-NOFP16-NEXT: mov w9, #0 // =0x0
; CHECK-GI-NOFP16-NEXT: .LBB14_1: // %loop
; CHECK-GI-NOFP16-NEXT: // =>This Inner Loop Header: Depth=1
-; CHECK-GI-NOFP16-NEXT: ldr q0, [x0, x8]
+; CHECK-GI-NOFP16-NEXT: ldr q1, [x0, x8]
+; CHECK-GI-NOFP16-NEXT: fcvt s0, h0
; CHECK-GI-NOFP16-NEXT: add x8, x8, #8
; CHECK-GI-NOFP16-NEXT: cmp w8, #56
-; CHECK-GI-NOFP16-NEXT: fcvtl v1.4s, v0.4h
-; CHECK-GI-NOFP16-NEXT: fcvtl2 v0.4s, v0.8h
-; CHECK-GI-NOFP16-NEXT: fadd v0.4s, v1.4s, v0.4s
-; CHECK-GI-NOFP16-NEXT: fmov s1, w9
+; CHECK-GI-NOFP16-NEXT: fcvtl v2.4s, v1.4h
+; CHECK-GI-NOFP16-NEXT: fcvtl2 v1.4s, v1.8h
+; CHECK-GI-NOFP16-NEXT: fadd v1.4s, v2.4s, v1.4s
+; CHECK-GI-NOFP16-NEXT: faddp v1.4s, v1.4s, v1.4s
+; CHECK-GI-NOFP16-NEXT: faddp s1, v1.2s
+; CHECK-GI-NOFP16-NEXT: fcvt h1, s1
; CHECK-GI-NOFP16-NEXT: fcvt s1, h1
-; CHECK-GI-NOFP16-NEXT: faddp v0.4s, v0.4s, v0.4s
-; CHECK-GI-NOFP16-NEXT: faddp s0, v0.2s
-; CHECK-GI-NOFP16-NEXT: fcvt h0, s0
-; CHECK-GI-NOFP16-NEXT: fcvt s0, h0
-; CHECK-GI-NOFP16-NEXT: fadd s0, s0, s1
+; CHECK-GI-NOFP16-NEXT: fadd s0, s1, s0
; CHECK-GI-NOFP16-NEXT: fcvt h0, s0
-; CHECK-GI-NOFP16-NEXT: fmov w9, s0
; CHECK-GI-NOFP16-NEXT: b.ne .LBB14_1
; CHECK-GI-NOFP16-NEXT: // %bb.2: // %exit
-; CHECK-GI-NOFP16-NEXT: // kill: def $h0 killed $h0 killed $s0
; CHECK-GI-NOFP16-NEXT: ret
;
; CHECK-GI-FP16-LABEL: fadd_reduction_v8f16_in_loop:
diff --git a/llvm/test/CodeGen/AArch64/vecreduce-fmul-strict.ll b/llvm/test/CodeGen/AArch64/vecreduce-fmul-strict.ll
index e1b2170..c10d6e9 100644
--- a/llvm/test/CodeGen/AArch64/vecreduce-fmul-strict.ll
+++ b/llvm/test/CodeGen/AArch64/vecreduce-fmul-strict.ll
@@ -52,10 +52,10 @@ define half @mul_HalfH(<4 x half> %bin.rdx) {
;
; CHECK-GI-NOFP16-LABEL: mul_HalfH:
; CHECK-GI-NOFP16: // %bb.0:
-; CHECK-GI-NOFP16-NEXT: mov w8, #15360 // =0x3c00
+; CHECK-GI-NOFP16-NEXT: adrp x8, .LCPI1_0
; CHECK-GI-NOFP16-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-GI-NOFP16-NEXT: fcvt s2, h0
-; CHECK-GI-NOFP16-NEXT: fmov s1, w8
+; CHECK-GI-NOFP16-NEXT: ldr h1, [x8, :lo12:.LCPI1_0]
; CHECK-GI-NOFP16-NEXT: fcvt s1, h1
; CHECK-GI-NOFP16-NEXT: fmul s1, s1, s2
; CHECK-GI-NOFP16-NEXT: mov h2, v0.h[1]
@@ -144,9 +144,9 @@ define half @mul_H(<8 x half> %bin.rdx) {
;
; CHECK-GI-NOFP16-LABEL: mul_H:
; CHECK-GI-NOFP16: // %bb.0:
-; CHECK-GI-NOFP16-NEXT: mov w8, #15360 // =0x3c00
+; CHECK-GI-NOFP16-NEXT: adrp x8, .LCPI2_0
; CHECK-GI-NOFP16-NEXT: fcvt s2, h0
-; CHECK-GI-NOFP16-NEXT: fmov s1, w8
+; CHECK-GI-NOFP16-NEXT: ldr h1, [x8, :lo12:.LCPI2_0]
; CHECK-GI-NOFP16-NEXT: fcvt s1, h1
; CHECK-GI-NOFP16-NEXT: fmul s1, s1, s2
; CHECK-GI-NOFP16-NEXT: mov h2, v0.h[1]
@@ -321,9 +321,9 @@ define half @mul_2H(<16 x half> %bin.rdx) {
;
; CHECK-GI-NOFP16-LABEL: mul_2H:
; CHECK-GI-NOFP16: // %bb.0:
-; CHECK-GI-NOFP16-NEXT: mov w8, #15360 // =0x3c00
+; CHECK-GI-NOFP16-NEXT: adrp x8, .LCPI5_0
; CHECK-GI-NOFP16-NEXT: fcvt s3, h0
-; CHECK-GI-NOFP16-NEXT: fmov s2, w8
+; CHECK-GI-NOFP16-NEXT: ldr h2, [x8, :lo12:.LCPI5_0]
; CHECK-GI-NOFP16-NEXT: fcvt s2, h2
; CHECK-GI-NOFP16-NEXT: fmul s2, s2, s3
; CHECK-GI-NOFP16-NEXT: mov h3, v0.h[1]
diff --git a/llvm/test/CodeGen/AMDGPU/addrspacecast-constantexpr.ll b/llvm/test/CodeGen/AMDGPU/addrspacecast-constantexpr.ll
index 2d7ef2c..98fbbe1 100644
--- a/llvm/test/CodeGen/AMDGPU/addrspacecast-constantexpr.ll
+++ b/llvm/test/CodeGen/AMDGPU/addrspacecast-constantexpr.ll
@@ -169,6 +169,6 @@ attributes #1 = { nounwind }
;.
; HSA: attributes #[[ATTR0:[0-9]+]] = { nocallback nofree nounwind willreturn memory(argmem: readwrite) }
-; HSA: attributes #[[ATTR1]] = { nounwind "amdgpu-agpr-alloc"="0" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
-; HSA: attributes #[[ATTR2]] = { nounwind "amdgpu-agpr-alloc"="0" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
+; HSA: attributes #[[ATTR1]] = { nounwind "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
+; HSA: attributes #[[ATTR2]] = { nounwind "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
;.
diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-attributor-no-agpr.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-attributor-no-agpr.ll
index 664dfa2..2ad6e68 100644
--- a/llvm/test/CodeGen/AMDGPU/amdgpu-attributor-no-agpr.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdgpu-attributor-no-agpr.ll
@@ -1,103 +1,166 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-attributes --check-globals all --version 4
; RUN: opt -S -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx90a -passes=amdgpu-attributor %s | FileCheck %s
+; Shrink result attribute list by preventing use of most attributes.
+define internal void @use_most() {
+; CHECK-LABEL: define internal void @use_most(
+; CHECK-SAME: ) #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT: [[ALLOCA:%.*]] = alloca [256 x i8], align 1, addrspace(5)
+; CHECK-NEXT: [[ALLOCA_CAST:%.*]] = addrspacecast ptr addrspace(5) [[ALLOCA]] to ptr
+; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.amdgcn.workitem.id.x()
+; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.amdgcn.workitem.id.y()
+; CHECK-NEXT: [[TMP3:%.*]] = call i32 @llvm.amdgcn.workitem.id.z()
+; CHECK-NEXT: [[TMP4:%.*]] = call i32 @llvm.amdgcn.workgroup.id.x()
+; CHECK-NEXT: [[TMP5:%.*]] = call i32 @llvm.amdgcn.workgroup.id.y()
+; CHECK-NEXT: [[TMP6:%.*]] = call i32 @llvm.amdgcn.workgroup.id.z()
+; CHECK-NEXT: [[TMP10:%.*]] = call i32 @llvm.amdgcn.cluster.id.x()
+; CHECK-NEXT: [[TMP11:%.*]] = call i32 @llvm.amdgcn.cluster.id.y()
+; CHECK-NEXT: [[TMP12:%.*]] = call i32 @llvm.amdgcn.cluster.id.z()
+; CHECK-NEXT: [[TMP7:%.*]] = call ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
+; CHECK-NEXT: [[TMP8:%.*]] = call ptr addrspace(4) @llvm.amdgcn.queue.ptr()
+; CHECK-NEXT: [[TMP9:%.*]] = call i64 @llvm.amdgcn.dispatch.id()
+; CHECK-NEXT: [[TMP13:%.*]] = call i32 @llvm.amdgcn.lds.kernel.id()
+; CHECK-NEXT: [[IMPLICIT_ARG_PTR:%.*]] = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
+; CHECK-NEXT: call void @llvm.memcpy.p0.p4.i64(ptr [[ALLOCA_CAST]], ptr addrspace(4) [[IMPLICIT_ARG_PTR]], i64 256, i1 false)
+; CHECK-NEXT: ret void
+;
+ %alloca = alloca [256 x i8], addrspace(5)
+ %alloca.cast = addrspacecast ptr addrspace(5) %alloca to ptr
+ call i32 @llvm.amdgcn.workitem.id.x()
+ call i32 @llvm.amdgcn.workitem.id.y()
+ call i32 @llvm.amdgcn.workitem.id.z()
+ call i32 @llvm.amdgcn.workgroup.id.x()
+ call i32 @llvm.amdgcn.workgroup.id.y()
+ call i32 @llvm.amdgcn.workgroup.id.z()
+ call i32 @llvm.amdgcn.cluster.id.x()
+ call i32 @llvm.amdgcn.cluster.id.y()
+ call i32 @llvm.amdgcn.cluster.id.z()
+ call ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
+ call ptr addrspace(4) @llvm.amdgcn.queue.ptr()
+ call i64 @llvm.amdgcn.dispatch.id()
+ call i32 @llvm.amdgcn.lds.kernel.id()
+ %implicit.arg.ptr = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
+ call void @llvm.memcpy.p0.p4(ptr %alloca.cast, ptr addrspace(4) %implicit.arg.ptr, i64 256, i1 false)
+ ret void
+}
+
define amdgpu_kernel void @kernel_uses_asm_virtreg() {
; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_asm_virtreg(
-; CHECK-SAME: ) #[[ATTR0:[0-9]+]] {
+; CHECK-SAME: ) #[[ATTR1:[0-9]+]] {
; CHECK-NEXT: call void asm sideeffect "
+; CHECK-NEXT: call void @use_most()
; CHECK-NEXT: ret void
;
call void asm sideeffect "; use $0", "a"(i32 poison)
+ call void @use_most()
ret void
}
define amdgpu_kernel void @kernel_uses_asm_virtreg_def() {
; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_asm_virtreg_def(
-; CHECK-SAME: ) #[[ATTR0]] {
+; CHECK-SAME: ) #[[ATTR1]] {
; CHECK-NEXT: [[DEF:%.*]] = call i32 asm sideeffect "
+; CHECK-NEXT: call void @use_most()
; CHECK-NEXT: ret void
;
%def = call i32 asm sideeffect "; def $0", "=a"()
+ call void @use_most()
ret void
}
define amdgpu_kernel void @kernel_uses_asm_physreg_def_tuple() {
; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_asm_physreg_def_tuple(
-; CHECK-SAME: ) #[[ATTR0]] {
+; CHECK-SAME: ) #[[ATTR1]] {
; CHECK-NEXT: [[DEF:%.*]] = call i64 asm sideeffect "
+; CHECK-NEXT: call void @use_most()
; CHECK-NEXT: ret void
;
%def = call i64 asm sideeffect "; def $0", "={a[0:1]}"()
+ call void @use_most()
ret void
}
define amdgpu_kernel void @kernel_uses_asm_virtreg_second_arg() {
; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_asm_virtreg_second_arg(
-; CHECK-SAME: ) #[[ATTR0]] {
+; CHECK-SAME: ) #[[ATTR1]] {
; CHECK-NEXT: call void asm sideeffect "
+; CHECK-NEXT: call void @use_most()
; CHECK-NEXT: ret void
;
call void asm sideeffect "; use $0", "v,a"(i32 poison, i32 poison)
+ call void @use_most()
ret void
}
define amdgpu_kernel void @kernel_uses_non_agpr_asm() {
; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_non_agpr_asm(
-; CHECK-SAME: ) #[[ATTR1:[0-9]+]] {
+; CHECK-SAME: ) #[[ATTR0]] {
; CHECK-NEXT: call void asm sideeffect "
+; CHECK-NEXT: call void @use_most()
; CHECK-NEXT: ret void
;
call void asm sideeffect "; use $0", "v"(i32 poison)
+ call void @use_most()
ret void
}
define amdgpu_kernel void @kernel_uses_asm_physreg() {
; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_asm_physreg(
-; CHECK-SAME: ) #[[ATTR0]] {
+; CHECK-SAME: ) #[[ATTR1]] {
; CHECK-NEXT: call void asm sideeffect "
+; CHECK-NEXT: call void @use_most()
; CHECK-NEXT: ret void
;
call void asm sideeffect "; use $0", "{a0}"(i32 poison)
+ call void @use_most()
ret void
}
define amdgpu_kernel void @kernel_uses_asm_physreg_tuple() {
; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_asm_physreg_tuple(
-; CHECK-SAME: ) #[[ATTR0]] {
+; CHECK-SAME: ) #[[ATTR1]] {
; CHECK-NEXT: call void asm sideeffect "
+; CHECK-NEXT: call void @use_most()
; CHECK-NEXT: ret void
;
call void asm sideeffect "; use $0", "{a[0:1]}"(i64 poison)
+ call void @use_most()
ret void
}
define void @func_uses_asm_virtreg_agpr() {
; CHECK-LABEL: define void @func_uses_asm_virtreg_agpr(
-; CHECK-SAME: ) #[[ATTR0]] {
+; CHECK-SAME: ) #[[ATTR1]] {
; CHECK-NEXT: call void asm sideeffect "
+; CHECK-NEXT: call void @use_most()
; CHECK-NEXT: ret void
;
call void asm sideeffect "; use $0", "a"(i32 poison)
+ call void @use_most()
ret void
}
define void @func_uses_asm_physreg_agpr() {
; CHECK-LABEL: define void @func_uses_asm_physreg_agpr(
-; CHECK-SAME: ) #[[ATTR0]] {
+; CHECK-SAME: ) #[[ATTR1]] {
; CHECK-NEXT: call void asm sideeffect "
+; CHECK-NEXT: call void @use_most()
; CHECK-NEXT: ret void
;
call void asm sideeffect "; use $0", "{a0}"(i32 poison)
+ call void @use_most()
ret void
}
define void @func_uses_asm_physreg_agpr_tuple() {
; CHECK-LABEL: define void @func_uses_asm_physreg_agpr_tuple(
-; CHECK-SAME: ) #[[ATTR0]] {
+; CHECK-SAME: ) #[[ATTR1]] {
; CHECK-NEXT: call void asm sideeffect "
+; CHECK-NEXT: call void @use_most()
; CHECK-NEXT: ret void
;
call void asm sideeffect "; use $0", "{a[0:1]}"(i64 poison)
+ call void @use_most()
ret void
}
@@ -105,99 +168,119 @@ declare void @unknown()
define amdgpu_kernel void @kernel_calls_extern() {
; CHECK-LABEL: define amdgpu_kernel void @kernel_calls_extern(
-; CHECK-SAME: ) #[[ATTR2:[0-9]+]] {
+; CHECK-SAME: ) #[[ATTR1]] {
; CHECK-NEXT: call void @unknown()
+; CHECK-NEXT: call void @use_most()
; CHECK-NEXT: ret void
;
call void @unknown()
+ call void @use_most()
ret void
}
define amdgpu_kernel void @kernel_calls_extern_marked_callsite() {
; CHECK-LABEL: define amdgpu_kernel void @kernel_calls_extern_marked_callsite(
-; CHECK-SAME: ) #[[ATTR2]] {
-; CHECK-NEXT: call void @unknown() #[[ATTR6:[0-9]+]]
+; CHECK-SAME: ) #[[ATTR1]] {
+; CHECK-NEXT: call void @unknown() #[[ATTR10:[0-9]+]]
+; CHECK-NEXT: call void @use_most()
; CHECK-NEXT: ret void
;
call void @unknown() #0
+ call void @use_most()
ret void
}
define amdgpu_kernel void @kernel_calls_indirect(ptr %indirect) {
; CHECK-LABEL: define amdgpu_kernel void @kernel_calls_indirect(
-; CHECK-SAME: ptr [[INDIRECT:%.*]]) #[[ATTR2]] {
+; CHECK-SAME: ptr [[INDIRECT:%.*]]) #[[ATTR1]] {
; CHECK-NEXT: call void [[INDIRECT]]()
+; CHECK-NEXT: call void @use_most()
; CHECK-NEXT: ret void
;
call void %indirect()
+ call void @use_most()
ret void
}
define amdgpu_kernel void @kernel_calls_indirect_marked_callsite(ptr %indirect) {
; CHECK-LABEL: define amdgpu_kernel void @kernel_calls_indirect_marked_callsite(
-; CHECK-SAME: ptr [[INDIRECT:%.*]]) #[[ATTR2]] {
-; CHECK-NEXT: call void [[INDIRECT]]() #[[ATTR6]]
+; CHECK-SAME: ptr [[INDIRECT:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT: call void [[INDIRECT]]() #[[ATTR10]]
+; CHECK-NEXT: call void @use_most()
; CHECK-NEXT: ret void
;
call void %indirect() #0
+ call void @use_most()
ret void
}
define amdgpu_kernel void @kernel_transitively_uses_agpr_asm() {
; CHECK-LABEL: define amdgpu_kernel void @kernel_transitively_uses_agpr_asm(
-; CHECK-SAME: ) #[[ATTR0]] {
+; CHECK-SAME: ) #[[ATTR1]] {
; CHECK-NEXT: call void @func_uses_asm_physreg_agpr()
+; CHECK-NEXT: call void @use_most()
; CHECK-NEXT: ret void
;
call void @func_uses_asm_physreg_agpr()
+ call void @use_most()
ret void
}
define void @empty() {
; CHECK-LABEL: define void @empty(
-; CHECK-SAME: ) #[[ATTR1]] {
+; CHECK-SAME: ) #[[ATTR0]] {
+; CHECK-NEXT: call void @use_most()
; CHECK-NEXT: ret void
;
+ call void @use_most()
ret void
}
define void @also_empty() {
; CHECK-LABEL: define void @also_empty(
-; CHECK-SAME: ) #[[ATTR1]] {
+; CHECK-SAME: ) #[[ATTR0]] {
+; CHECK-NEXT: call void @use_most()
; CHECK-NEXT: ret void
;
+ call void @use_most()
ret void
}
define amdgpu_kernel void @kernel_calls_empty() {
; CHECK-LABEL: define amdgpu_kernel void @kernel_calls_empty(
-; CHECK-SAME: ) #[[ATTR1]] {
+; CHECK-SAME: ) #[[ATTR0]] {
; CHECK-NEXT: call void @empty()
+; CHECK-NEXT: call void @use_most()
; CHECK-NEXT: ret void
;
call void @empty()
+ call void @use_most()
ret void
}
define amdgpu_kernel void @kernel_calls_non_agpr_and_agpr() {
; CHECK-LABEL: define amdgpu_kernel void @kernel_calls_non_agpr_and_agpr(
-; CHECK-SAME: ) #[[ATTR0]] {
+; CHECK-SAME: ) #[[ATTR1]] {
; CHECK-NEXT: call void @empty()
; CHECK-NEXT: call void @func_uses_asm_physreg_agpr()
+; CHECK-NEXT: call void @use_most()
; CHECK-NEXT: ret void
;
call void @empty()
call void @func_uses_asm_physreg_agpr()
+ call void @use_most()
ret void
}
define amdgpu_kernel void @kernel_calls_generic_intrinsic(ptr %ptr0, ptr %ptr1, i64 %size) {
; CHECK-LABEL: define amdgpu_kernel void @kernel_calls_generic_intrinsic(
-; CHECK-SAME: ptr [[PTR0:%.*]], ptr [[PTR1:%.*]], i64 [[SIZE:%.*]]) #[[ATTR1]] {
+; CHECK-SAME: ptr [[PTR0:%.*]], ptr [[PTR1:%.*]], i64 [[SIZE:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr [[PTR0]], ptr [[PTR1]], i64 [[SIZE]], i1 false)
+; CHECK-NEXT: call void @use_most()
; CHECK-NEXT: ret void
;
call void @llvm.memcpy.p0.p0.i64(ptr %ptr0, ptr %ptr1, i64 %size, i1 false)
+ call void @use_most()
ret void
}
@@ -205,31 +288,35 @@ declare <32 x float> @llvm.amdgcn.mfma.f32.32x32x1f32(float, float, <32 x float>
define amdgpu_kernel void @kernel_calls_mfma.f32.32x32x1f32(ptr addrspace(1) %out, float %a, float %b, <32 x float> %c) {
; CHECK-LABEL: define amdgpu_kernel void @kernel_calls_mfma.f32.32x32x1f32(
-; CHECK-SAME: ptr addrspace(1) [[OUT:%.*]], float [[A:%.*]], float [[B:%.*]], <32 x float> [[C:%.*]]) #[[ATTR1]] {
+; CHECK-SAME: ptr addrspace(1) [[OUT:%.*]], float [[A:%.*]], float [[B:%.*]], <32 x float> [[C:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: [[RESULT:%.*]] = call <32 x float> @llvm.amdgcn.mfma.f32.32x32x1f32(float [[A]], float [[B]], <32 x float> [[C]], i32 0, i32 0, i32 0)
; CHECK-NEXT: store <32 x float> [[RESULT]], ptr addrspace(1) [[OUT]], align 128
+; CHECK-NEXT: call void @use_most()
; CHECK-NEXT: ret void
;
%result = call <32 x float> @llvm.amdgcn.mfma.f32.32x32x1f32(float %a, float %b, <32 x float> %c, i32 0, i32 0, i32 0)
store <32 x float> %result, ptr addrspace(1) %out
+ call void @use_most()
ret void
}
define amdgpu_kernel void @kernel_calls_workitem_id_x(ptr addrspace(1) %out) {
; CHECK-LABEL: define amdgpu_kernel void @kernel_calls_workitem_id_x(
-; CHECK-SAME: ptr addrspace(1) [[OUT:%.*]]) #[[ATTR1]] {
+; CHECK-SAME: ptr addrspace(1) [[OUT:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: [[RESULT:%.*]] = call i32 @llvm.amdgcn.workitem.id.x()
; CHECK-NEXT: store i32 [[RESULT]], ptr addrspace(1) [[OUT]], align 4
+; CHECK-NEXT: call void @use_most()
; CHECK-NEXT: ret void
;
%result = call i32 @llvm.amdgcn.workitem.id.x()
store i32 %result, ptr addrspace(1) %out
+ call void @use_most()
ret void
}
define amdgpu_kernel void @indirect_calls_none_agpr(i1 %cond) {
; CHECK-LABEL: define amdgpu_kernel void @indirect_calls_none_agpr(
-; CHECK-SAME: i1 [[COND:%.*]]) #[[ATTR0]] {
+; CHECK-SAME: i1 [[COND:%.*]]) #[[ATTR1]] {
; CHECK-NEXT: [[FPTR:%.*]] = select i1 [[COND]], ptr @empty, ptr @also_empty
; CHECK-NEXT: [[TMP1:%.*]] = icmp eq ptr [[FPTR]], @also_empty
; CHECK-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP3:%.*]]
@@ -244,21 +331,476 @@ define amdgpu_kernel void @indirect_calls_none_agpr(i1 %cond) {
; CHECK: 5:
; CHECK-NEXT: unreachable
; CHECK: 6:
+; CHECK-NEXT: call void @use_most()
; CHECK-NEXT: ret void
;
%fptr = select i1 %cond, ptr @empty, ptr @also_empty
call void %fptr()
+ call void @use_most()
+ ret void
+}
+
+define amdgpu_kernel void @kernel_uses_asm_virtreg_def_struct_0() {
+; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_asm_virtreg_def_struct_0(
+; CHECK-SAME: ) #[[ATTR1]] {
+; CHECK-NEXT: [[DEF:%.*]] = call { i32, i32 } asm sideeffect "
+; CHECK-NEXT: call void @use_most()
+; CHECK-NEXT: ret void
+;
+ %def = call {i32, i32} asm sideeffect "; def $0", "=a,=a"()
+ call void @use_most()
+ ret void
+}
+
+define amdgpu_kernel void @kernel_uses_asm_virtreg_use_struct_1() {
+; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_asm_virtreg_use_struct_1(
+; CHECK-SAME: ) #[[ATTR1]] {
+; CHECK-NEXT: [[DEF:%.*]] = call { i32, <2 x i32> } asm sideeffect "
+; CHECK-NEXT: call void @use_most()
+; CHECK-NEXT: ret void
+;
+ %def = call {i32, <2 x i32>} asm sideeffect "; def $0", "=a,=a"()
+ call void @use_most()
+ ret void
+}
+
+define amdgpu_kernel void @kernel_uses_asm_virtreg_use_struct_2() {
+; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_asm_virtreg_use_struct_2(
+; CHECK-SAME: ) #[[ATTR1]] {
+; CHECK-NEXT: [[DEF:%.*]] = call { i32, <2 x i32> } asm sideeffect "
+; CHECK-NEXT: call void @use_most()
+; CHECK-NEXT: ret void
+;
+ %def = call {i32, <2 x i32>} asm sideeffect "; def $0", "=a,=v"()
+ call void @use_most()
+ ret void
+}
+
+define amdgpu_kernel void @kernel_uses_asm_virtreg_ptr_ty() {
+; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_asm_virtreg_ptr_ty(
+; CHECK-SAME: ) #[[ATTR1]] {
+; CHECK-NEXT: call void asm sideeffect "
+; CHECK-NEXT: call void @use_most()
+; CHECK-NEXT: ret void
+;
+ call void asm sideeffect "; use $0", "a"(ptr poison)
+ call void @use_most()
+ ret void
+}
+
+define amdgpu_kernel void @kernel_uses_asm_virtreg_def_ptr_ty() {
+; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_asm_virtreg_def_ptr_ty(
+; CHECK-SAME: ) #[[ATTR1]] {
+; CHECK-NEXT: [[DEF:%.*]] = call ptr asm sideeffect "
+; CHECK-NEXT: call void @use_most()
+; CHECK-NEXT: ret void
+;
+ %def = call ptr asm sideeffect "; def $0", "=a"()
+ call void @use_most()
+ ret void
+}
+
+define amdgpu_kernel void @kernel_uses_asm_virtreg_def_vector_ptr_ty() {
+; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_asm_virtreg_def_vector_ptr_ty(
+; CHECK-SAME: ) #[[ATTR1]] {
+; CHECK-NEXT: [[DEF:%.*]] = call <2 x ptr> asm sideeffect "
+; CHECK-NEXT: call void @use_most()
+; CHECK-NEXT: ret void
+;
+ %def = call <2 x ptr> asm sideeffect "; def $0", "=a"()
+ call void @use_most()
+ ret void
+}
+
+define amdgpu_kernel void @kernel_uses_asm_physreg_def_struct_0() {
+; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_asm_physreg_def_struct_0(
+; CHECK-SAME: ) #[[ATTR1]] {
+; CHECK-NEXT: [[DEF:%.*]] = call { i32, i32 } asm sideeffect "
+; CHECK-NEXT: call void @use_most()
+; CHECK-NEXT: ret void
+;
+ %def = call {i32, i32} asm sideeffect "; def $0", "={a0},={a[4:5]}"()
+ call void @use_most()
+ ret void
+}
+
+define amdgpu_kernel void @kernel_uses_asm_clobber() {
+; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_asm_clobber(
+; CHECK-SAME: ) #[[ATTR1]] {
+; CHECK-NEXT: call void asm sideeffect "
+; CHECK-NEXT: call void @use_most()
+; CHECK-NEXT: ret void
+;
+ call void asm sideeffect "; clobber $0", "~{a4}"()
+ call void @use_most()
+ ret void
+}
+
+define amdgpu_kernel void @kernel_uses_asm_clobber_tuple() {
+; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_asm_clobber_tuple(
+; CHECK-SAME: ) #[[ATTR1]] {
+; CHECK-NEXT: call void asm sideeffect "
+; CHECK-NEXT: call void @use_most()
+; CHECK-NEXT: ret void
+;
+ call void asm sideeffect "; clobber $0", "~{a[10:13]}"()
+ call void @use_most()
+ ret void
+}
+
+define amdgpu_kernel void @kernel_uses_asm_clobber_oob() {
+; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_asm_clobber_oob(
+; CHECK-SAME: ) #[[ATTR1]] {
+; CHECK-NEXT: call void asm sideeffect "
+; CHECK-NEXT: call void @use_most()
+; CHECK-NEXT: ret void
+;
+ call void asm sideeffect "; clobber $0", "~{a256}"()
+ call void @use_most()
ret void
}
+define amdgpu_kernel void @kernel_uses_asm_clobber_max() {
+; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_asm_clobber_max(
+; CHECK-SAME: ) #[[ATTR1]] {
+; CHECK-NEXT: call void asm sideeffect "
+; CHECK-NEXT: call void @use_most()
+; CHECK-NEXT: ret void
+;
+ call void asm sideeffect "; clobber $0", "~{a255}"()
+ call void @use_most()
+ ret void
+}
+
+define amdgpu_kernel void @kernel_uses_asm_physreg_oob() {
+; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_asm_physreg_oob(
+; CHECK-SAME: ) #[[ATTR1]] {
+; CHECK-NEXT: call void asm sideeffect "
+; CHECK-NEXT: call void @use_most()
+; CHECK-NEXT: ret void
+;
+ call void asm sideeffect "; use $0", "{a256}"(i32 poison)
+ call void @use_most()
+ ret void
+}
+
+define amdgpu_kernel void @kernel_uses_asm_virtreg_def_max_ty() {
+; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_asm_virtreg_def_max_ty(
+; CHECK-SAME: ) #[[ATTR1]] {
+; CHECK-NEXT: [[DEF:%.*]] = call <32 x i32> asm sideeffect "
+; CHECK-NEXT: call void @use_most()
+; CHECK-NEXT: ret void
+;
+ %def = call <32 x i32> asm sideeffect "; def $0", "=a"()
+ call void @use_most()
+ ret void
+}
+
+define amdgpu_kernel void @kernel_uses_asm_virtreg_use_max_ty() {
+; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_asm_virtreg_use_max_ty(
+; CHECK-SAME: ) #[[ATTR1]] {
+; CHECK-NEXT: call void asm sideeffect "
+; CHECK-NEXT: call void @use_most()
+; CHECK-NEXT: ret void
+;
+ call void asm sideeffect "; use $0", "a"(<32 x i32> poison)
+ call void @use_most()
+ ret void
+}
+
+define amdgpu_kernel void @kernel_uses_asm_virtreg_use_def_max_ty() {
+; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_asm_virtreg_use_def_max_ty(
+; CHECK-SAME: ) #[[ATTR1]] {
+; CHECK-NEXT: [[DEF:%.*]] = call <32 x i32> asm sideeffect "
+; CHECK-NEXT: call void @use_most()
+; CHECK-NEXT: ret void
+;
+ %def = call <32 x i32> asm sideeffect "; use $0", "=a,a"(<32 x i32> poison)
+ call void @use_most()
+ ret void
+}
+
+define amdgpu_kernel void @vreg_use_exceeds_register_file() {
+; CHECK-LABEL: define amdgpu_kernel void @vreg_use_exceeds_register_file(
+; CHECK-SAME: ) #[[ATTR1]] {
+; CHECK-NEXT: call void asm sideeffect "
+; CHECK-NEXT: call void @use_most()
+; CHECK-NEXT: ret void
+;
+ call void asm sideeffect "; use $0", "a"(<257 x i32> poison)
+ call void @use_most()
+ ret void
+}
+
+define amdgpu_kernel void @vreg_def_exceeds_register_file() {
+; CHECK-LABEL: define amdgpu_kernel void @vreg_def_exceeds_register_file(
+; CHECK-SAME: ) #[[ATTR1]] {
+; CHECK-NEXT: [[DEF:%.*]] = call <257 x i32> asm sideeffect "
+; CHECK-NEXT: call void @use_most()
+; CHECK-NEXT: ret void
+;
+ %def = call <257 x i32> asm sideeffect "; def $0", "=a"()
+ call void @use_most()
+ ret void
+}
+
+define amdgpu_kernel void @multiple() {
+; CHECK-LABEL: define amdgpu_kernel void @multiple(
+; CHECK-SAME: ) #[[ATTR1]] {
+; CHECK-NEXT: [[DEF:%.*]] = call { <16 x i32>, <8 x i32>, <8 x i32> } asm sideeffect "
+; CHECK-NEXT: call void @use_most()
+; CHECK-NEXT: ret void
+;
+ %def = call {<16 x i32>, <8 x i32>, <8 x i32>} asm sideeffect "; def $0", "=a,=a,=a,a,a,a"(<4 x i32> splat (i32 0), <8 x i32> splat (i32 1), i64 999)
+ call void @use_most()
+ ret void
+}
+
+define amdgpu_kernel void @earlyclobber_0() {
+; CHECK-LABEL: define amdgpu_kernel void @earlyclobber_0(
+; CHECK-SAME: ) #[[ATTR1]] {
+; CHECK-NEXT: [[DEF:%.*]] = call <8 x i32> asm sideeffect "
+; CHECK-NEXT: call void @use_most()
+; CHECK-NEXT: ret void
+;
+ %def = call <8 x i32> asm sideeffect "; def $0", "=&a,a"(i32 0)
+ call void @use_most()
+ ret void
+}
+
+define amdgpu_kernel void @earlyclobber_1() {
+; CHECK-LABEL: define amdgpu_kernel void @earlyclobber_1(
+; CHECK-SAME: ) #[[ATTR1]] {
+; CHECK-NEXT: [[DEF:%.*]] = call { <8 x i32>, <16 x i32> } asm sideeffect "
+; CHECK-NEXT: call void @use_most()
+; CHECK-NEXT: ret void
+;
+ %def = call { <8 x i32>, <16 x i32 > } asm sideeffect "; def $0, $1", "=&a,=&a,a,a"(i32 0, <16 x i32> splat (i32 1))
+ call void @use_most()
+ ret void
+}
+
+define amdgpu_kernel void @physreg_a32__vreg_a256__vreg_a512() {
+; CHECK-LABEL: define amdgpu_kernel void @physreg_a32__vreg_a256__vreg_a512(
+; CHECK-SAME: ) #[[ATTR1]] {
+; CHECK-NEXT: call void asm sideeffect "
+; CHECK-NEXT: call void @use_most()
+; CHECK-NEXT: ret void
+;
+ call void asm sideeffect "; use $0, $1, $2", "{a16},a,a"(i32 poison, <8 x i32> poison, <16 x i32> poison)
+ call void @use_most()
+ ret void
+}
+
+define amdgpu_kernel void @physreg_def_a32__def_vreg_a256__def_vreg_a512() {
+; CHECK-LABEL: define amdgpu_kernel void @physreg_def_a32__def_vreg_a256__def_vreg_a512(
+; CHECK-SAME: ) #[[ATTR1]] {
+; CHECK-NEXT: [[TMP1:%.*]] = call { i32, <8 x i32>, <16 x i32> } asm sideeffect "
+; CHECK-NEXT: call void @use_most()
+; CHECK-NEXT: ret void
+;
+ call {i32, <8 x i32>, <16 x i32>} asm sideeffect "; def $0, $1, $2", "={a16},=a,=a"()
+ call void @use_most()
+ ret void
+}
+
+define amdgpu_kernel void @physreg_def_a32___def_vreg_a512_use_vreg_a256() {
+; CHECK-LABEL: define amdgpu_kernel void @physreg_def_a32___def_vreg_a512_use_vreg_a256(
+; CHECK-SAME: ) #[[ATTR1]] {
+; CHECK-NEXT: [[TMP1:%.*]] = call { i32, <16 x i32> } asm sideeffect "
+; CHECK-NEXT: call void @use_most()
+; CHECK-NEXT: ret void
+;
+ call {i32, <16 x i32>} asm sideeffect "; def $0, $1, $2", "={a16},=a,a"(<8 x i32> poison)
+ call void @use_most()
+ ret void
+}
+
+define amdgpu_kernel void @mixed_physreg_vreg_tuples_0() {
+; CHECK-LABEL: define amdgpu_kernel void @mixed_physreg_vreg_tuples_0(
+; CHECK-SAME: ) #[[ATTR1]] {
+; CHECK-NEXT: call void asm sideeffect "
+; CHECK-NEXT: call void @use_most()
+; CHECK-NEXT: ret void
+;
+ call void asm sideeffect "; use $0, $1", "{a[1:4]},a"(<4 x i32> poison, <4 x i32> poison)
+ call void @use_most()
+ ret void
+}
+
+define amdgpu_kernel void @mixed_physreg_vreg_tuples_1() {
+; CHECK-LABEL: define amdgpu_kernel void @mixed_physreg_vreg_tuples_1(
+; CHECK-SAME: ) #[[ATTR1]] {
+; CHECK-NEXT: call void asm sideeffect "
+; CHECK-NEXT: call void @use_most()
+; CHECK-NEXT: ret void
+;
+ call void asm sideeffect "; use $0, $1", "a,{a[0:3]}"(<4 x i32> poison, <4 x i32> poison)
+ call void @use_most()
+ ret void
+}
+
+define amdgpu_kernel void @physreg_raises_limit() {
+; CHECK-LABEL: define amdgpu_kernel void @physreg_raises_limit(
+; CHECK-SAME: ) #[[ATTR1]] {
+; CHECK-NEXT: call void asm sideeffect "
+; CHECK-NEXT: call void @use_most()
+; CHECK-NEXT: ret void
+;
+ call void asm sideeffect "; use $0, $1", "a,{a[5:8]}"(<4 x i32> poison, <4 x i32> poison)
+ call void @use_most()
+ ret void
+}
+
+; FIXME: This should require 9. We cannot allocate an a128 at a0.
+define amdgpu_kernel void @physreg_tuple_alignment_raises_limit() {
+; CHECK-LABEL: define amdgpu_kernel void @physreg_tuple_alignment_raises_limit(
+; CHECK-SAME: ) #[[ATTR1]] {
+; CHECK-NEXT: call void asm sideeffect "
+; CHECK-NEXT: call void @use_most()
+; CHECK-NEXT: ret void
+;
+ call void asm sideeffect "; use $0, $1", "a,{a[1:4]}"(<4 x i32> poison, <4 x i32> poison)
+ call void @use_most()
+ ret void
+}
+
+define amdgpu_kernel void @align3_virtreg() {
+; CHECK-LABEL: define amdgpu_kernel void @align3_virtreg(
+; CHECK-SAME: ) #[[ATTR1]] {
+; CHECK-NEXT: call void asm sideeffect "
+; CHECK-NEXT: call void @use_most()
+; CHECK-NEXT: ret void
+;
+ call void asm sideeffect "; use $0, $1", "a,a"(<3 x i32> poison, <3 x i32> poison)
+ call void @use_most()
+ ret void
+}
+
+define amdgpu_kernel void @align3_align4_virtreg() {
+; CHECK-LABEL: define amdgpu_kernel void @align3_align4_virtreg(
+; CHECK-SAME: ) #[[ATTR1]] {
+; CHECK-NEXT: call void asm sideeffect "
+; CHECK-NEXT: call void @use_most()
+; CHECK-NEXT: ret void
+;
+ call void asm sideeffect "; use $0, $1", "a,a"(<3 x i32> poison, <4 x i32> poison)
+ call void @use_most()
+ ret void
+}
+
+define amdgpu_kernel void @align2_align4_virtreg() {
+; CHECK-LABEL: define amdgpu_kernel void @align2_align4_virtreg(
+; CHECK-SAME: ) #[[ATTR1]] {
+; CHECK-NEXT: call void asm sideeffect "
+; CHECK-NEXT: call void @use_most()
+; CHECK-NEXT: ret void
+;
+ call void asm sideeffect "; use $0, $1", "a,a"(<2 x i32> poison, <4 x i32> poison)
+ call void @use_most()
+ ret void
+}
+
+define amdgpu_kernel void @kernel_uses_write_register_a55() {
+; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_write_register_a55(
+; CHECK-SAME: ) #[[ATTR3:[0-9]+]] {
+; CHECK-NEXT: call void @llvm.write_register.i32(metadata [[META0:![0-9]+]], i32 0)
+; CHECK-NEXT: ret void
+;
+ call void @llvm.write_register.i64(metadata !0, i32 0)
+ ret void
+}
+
+define amdgpu_kernel void @kernel_uses_write_register_v55() {
+; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_write_register_v55(
+; CHECK-SAME: ) #[[ATTR4:[0-9]+]] {
+; CHECK-NEXT: call void @llvm.write_register.i32(metadata [[META1:![0-9]+]], i32 0)
+; CHECK-NEXT: ret void
+;
+ call void @llvm.write_register.i64(metadata !1, i32 0)
+ ret void
+}
+
+define amdgpu_kernel void @kernel_uses_write_register_a55_57() {
+; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_write_register_a55_57(
+; CHECK-SAME: ) #[[ATTR3]] {
+; CHECK-NEXT: call void @llvm.write_register.i96(metadata [[META2:![0-9]+]], i96 0)
+; CHECK-NEXT: ret void
+;
+ call void @llvm.write_register.i64(metadata !2, i96 0)
+ ret void
+}
+
+define amdgpu_kernel void @kernel_uses_read_register_a55(ptr addrspace(1) %ptr) {
+; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_read_register_a55(
+; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]]) #[[ATTR3]] {
+; CHECK-NEXT: [[REG:%.*]] = call i32 @llvm.read_register.i32(metadata [[META0]])
+; CHECK-NEXT: store i32 [[REG]], ptr addrspace(1) [[PTR]], align 4
+; CHECK-NEXT: ret void
+;
+ %reg = call i32 @llvm.read_register.i64(metadata !0)
+ store i32 %reg, ptr addrspace(1) %ptr
+ ret void
+}
+
+define amdgpu_kernel void @kernel_uses_read_volatile_register_a55(ptr addrspace(1) %ptr) {
+; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_read_volatile_register_a55(
+; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]]) #[[ATTR3]] {
+; CHECK-NEXT: [[REG:%.*]] = call i32 @llvm.read_volatile_register.i32(metadata [[META0]])
+; CHECK-NEXT: store i32 [[REG]], ptr addrspace(1) [[PTR]], align 4
+; CHECK-NEXT: ret void
+;
+ %reg = call i32 @llvm.read_volatile_register.i64(metadata !0)
+ store i32 %reg, ptr addrspace(1) %ptr
+ ret void
+}
+
+define amdgpu_kernel void @kernel_uses_read_register_a56_59(ptr addrspace(1) %ptr) {
+; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_read_register_a56_59(
+; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]]) #[[ATTR3]] {
+; CHECK-NEXT: [[REG:%.*]] = call i128 @llvm.read_register.i128(metadata [[META3:![0-9]+]])
+; CHECK-NEXT: store i128 [[REG]], ptr addrspace(1) [[PTR]], align 8
+; CHECK-NEXT: ret void
+;
+ %reg = call i128 @llvm.read_register.i64(metadata !3)
+ store i128 %reg, ptr addrspace(1) %ptr
+ ret void
+}
+
+define amdgpu_kernel void @kernel_uses_write_register_out_of_bounds_a256() {
+; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_write_register_out_of_bounds_a256(
+; CHECK-SAME: ) #[[ATTR3]] {
+; CHECK-NEXT: call void @llvm.write_register.i32(metadata [[META4:![0-9]+]], i32 0)
+; CHECK-NEXT: ret void
+;
+ call void @llvm.write_register.i64(metadata !4, i32 0)
+ ret void
+}
attributes #0 = { "amdgpu-agpr-alloc"="0" }
+
+!0 = !{!"a55"}
+!1 = !{!"v55"}
+!2 = !{!"a[55:57]"}
+!3 = !{!"a[56:59]"}
+!4 = !{!"a256"}
+
+;.
+; CHECK: attributes #[[ATTR0]] = { "amdgpu-agpr-alloc"="0" "target-cpu"="gfx90a" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR1]] = { "target-cpu"="gfx90a" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR2:[0-9]+]] = { convergent nocallback nofree nosync nounwind willreturn memory(none) "target-cpu"="gfx90a" }
+; CHECK: attributes #[[ATTR3]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx90a" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR4]] = { "amdgpu-agpr-alloc"="0" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx90a" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR5:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) "target-cpu"="gfx90a" }
+; CHECK: attributes #[[ATTR6:[0-9]+]] = { nocallback nofree nounwind willreturn memory(argmem: readwrite) "target-cpu"="gfx90a" }
+; CHECK: attributes #[[ATTR7:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(read) "target-cpu"="gfx90a" }
+; CHECK: attributes #[[ATTR8:[0-9]+]] = { nounwind "target-cpu"="gfx90a" }
+; CHECK: attributes #[[ATTR9:[0-9]+]] = { nocallback nounwind "target-cpu"="gfx90a" }
+; CHECK: attributes #[[ATTR10]] = { "amdgpu-agpr-alloc"="0" }
;.
-; CHECK: attributes #[[ATTR0]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx90a" "uniform-work-group-size"="false" }
-; CHECK: attributes #[[ATTR1]] = { "amdgpu-agpr-alloc"="0" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx90a" "uniform-work-group-size"="false" }
-; CHECK: attributes #[[ATTR2:[0-9]+]] = { "target-cpu"="gfx90a" "uniform-work-group-size"="false" }
-; CHECK: attributes #[[ATTR3:[0-9]+]] = { convergent nocallback nofree nosync nounwind willreturn memory(none) "target-cpu"="gfx90a" }
-; CHECK: attributes #[[ATTR4:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) "target-cpu"="gfx90a" }
-; CHECK: attributes #[[ATTR5:[0-9]+]] = { nocallback nofree nounwind willreturn memory(argmem: readwrite) "target-cpu"="gfx90a" }
-; CHECK: attributes #[[ATTR6]] = { "amdgpu-agpr-alloc"="0" }
+; CHECK: [[META0]] = !{!"a55"}
+; CHECK: [[META1]] = !{!"v55"}
+; CHECK: [[META2]] = !{!"a[55:57]"}
+; CHECK: [[META3]] = !{!"a[56:59]"}
+; CHECK: [[META4]] = !{!"a256"}
;.
diff --git a/llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa-call.ll b/llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa-call.ll
index fb566e5..9283bd5 100644
--- a/llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa-call.ll
+++ b/llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa-call.ll
@@ -691,29 +691,29 @@ attributes #6 = { "enqueued-block" }
;.
; ATTRIBUTOR_HSA: attributes #[[ATTR0:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
-; ATTRIBUTOR_HSA: attributes #[[ATTR1]] = { nounwind "amdgpu-agpr-alloc"="0" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="fiji" "uniform-work-group-size"="false" }
-; ATTRIBUTOR_HSA: attributes #[[ATTR2]] = { nounwind "amdgpu-agpr-alloc"="0" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-z" "target-cpu"="fiji" "uniform-work-group-size"="false" }
-; ATTRIBUTOR_HSA: attributes #[[ATTR3]] = { nounwind "amdgpu-agpr-alloc"="0" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "target-cpu"="fiji" "uniform-work-group-size"="false" }
-; ATTRIBUTOR_HSA: attributes #[[ATTR4]] = { nounwind "amdgpu-agpr-alloc"="0" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="fiji" "uniform-work-group-size"="false" }
-; ATTRIBUTOR_HSA: attributes #[[ATTR5]] = { nounwind "amdgpu-agpr-alloc"="0" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="fiji" "uniform-work-group-size"="false" }
-; ATTRIBUTOR_HSA: attributes #[[ATTR6]] = { nounwind "amdgpu-agpr-alloc"="0" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="fiji" "uniform-work-group-size"="false" }
-; ATTRIBUTOR_HSA: attributes #[[ATTR7]] = { nounwind "amdgpu-agpr-alloc"="0" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="fiji" "uniform-work-group-size"="false" }
-; ATTRIBUTOR_HSA: attributes #[[ATTR8]] = { nounwind "amdgpu-agpr-alloc"="0" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="fiji" "uniform-work-group-size"="false" }
-; ATTRIBUTOR_HSA: attributes #[[ATTR9]] = { nounwind "amdgpu-agpr-alloc"="0" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="fiji" "uniform-work-group-size"="false" }
-; ATTRIBUTOR_HSA: attributes #[[ATTR10]] = { nounwind "amdgpu-agpr-alloc"="0" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="fiji" "uniform-work-group-size"="false" }
-; ATTRIBUTOR_HSA: attributes #[[ATTR11]] = { nounwind "amdgpu-agpr-alloc"="0" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="fiji" "uniform-work-group-size"="false" }
-; ATTRIBUTOR_HSA: attributes #[[ATTR12]] = { nounwind "amdgpu-agpr-alloc"="0" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="fiji" "uniform-work-group-size"="false" }
-; ATTRIBUTOR_HSA: attributes #[[ATTR13]] = { nounwind "amdgpu-agpr-alloc"="0" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx900" "uniform-work-group-size"="false" }
-; ATTRIBUTOR_HSA: attributes #[[ATTR14]] = { nounwind "amdgpu-agpr-alloc"="0" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx900" "uniform-work-group-size"="false" }
-; ATTRIBUTOR_HSA: attributes #[[ATTR15:[0-9]+]] = { nounwind "uniform-work-group-size"="false" }
-; ATTRIBUTOR_HSA: attributes #[[ATTR16]] = { nounwind "amdgpu-agpr-alloc"="0" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
-; ATTRIBUTOR_HSA: attributes #[[ATTR17]] = { nounwind sanitize_address "amdgpu-agpr-alloc"="0" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
-; ATTRIBUTOR_HSA: attributes #[[ATTR18]] = { nounwind "amdgpu-agpr-alloc"="0" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
+; ATTRIBUTOR_HSA: attributes #[[ATTR1]] = { nounwind "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="fiji" "uniform-work-group-size"="false" }
+; ATTRIBUTOR_HSA: attributes #[[ATTR2]] = { nounwind "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-z" "target-cpu"="fiji" "uniform-work-group-size"="false" }
+; ATTRIBUTOR_HSA: attributes #[[ATTR3]] = { nounwind "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "target-cpu"="fiji" "uniform-work-group-size"="false" }
+; ATTRIBUTOR_HSA: attributes #[[ATTR4]] = { nounwind "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="fiji" "uniform-work-group-size"="false" }
+; ATTRIBUTOR_HSA: attributes #[[ATTR5]] = { nounwind "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="fiji" "uniform-work-group-size"="false" }
+; ATTRIBUTOR_HSA: attributes #[[ATTR6]] = { nounwind "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="fiji" "uniform-work-group-size"="false" }
+; ATTRIBUTOR_HSA: attributes #[[ATTR7]] = { nounwind "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="fiji" "uniform-work-group-size"="false" }
+; ATTRIBUTOR_HSA: attributes #[[ATTR8]] = { nounwind "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="fiji" "uniform-work-group-size"="false" }
+; ATTRIBUTOR_HSA: attributes #[[ATTR9]] = { nounwind "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="fiji" "uniform-work-group-size"="false" }
+; ATTRIBUTOR_HSA: attributes #[[ATTR10]] = { nounwind "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="fiji" "uniform-work-group-size"="false" }
+; ATTRIBUTOR_HSA: attributes #[[ATTR11]] = { nounwind "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="fiji" "uniform-work-group-size"="false" }
+; ATTRIBUTOR_HSA: attributes #[[ATTR12]] = { nounwind "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="fiji" "uniform-work-group-size"="false" }
+; ATTRIBUTOR_HSA: attributes #[[ATTR13]] = { nounwind "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx900" "uniform-work-group-size"="false" }
+; ATTRIBUTOR_HSA: attributes #[[ATTR14]] = { nounwind "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx900" "uniform-work-group-size"="false" }
+; ATTRIBUTOR_HSA: attributes #[[ATTR15]] = { nounwind "uniform-work-group-size"="false" }
+; ATTRIBUTOR_HSA: attributes #[[ATTR16]] = { nounwind "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
+; ATTRIBUTOR_HSA: attributes #[[ATTR17]] = { nounwind sanitize_address "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
+; ATTRIBUTOR_HSA: attributes #[[ATTR18]] = { nounwind "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
; ATTRIBUTOR_HSA: attributes #[[ATTR19:[0-9]+]] = { nounwind sanitize_address "amdgpu-no-implicitarg-ptr" "uniform-work-group-size"="false" }
; ATTRIBUTOR_HSA: attributes #[[ATTR20:[0-9]+]] = { "enqueued-block" "uniform-work-group-size"="false" }
-; ATTRIBUTOR_HSA: attributes #[[ATTR21]] = { "amdgpu-agpr-alloc"="0" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "enqueued-block" "uniform-work-group-size"="false" }
+; ATTRIBUTOR_HSA: attributes #[[ATTR21]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "enqueued-block" "uniform-work-group-size"="false" }
; ATTRIBUTOR_HSA: attributes #[[ATTR22]] = { "uniform-work-group-size"="false" }
-; ATTRIBUTOR_HSA: attributes #[[ATTR23]] = { "amdgpu-agpr-alloc"="0" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
+; ATTRIBUTOR_HSA: attributes #[[ATTR23]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
; ATTRIBUTOR_HSA: attributes #[[ATTR24]] = { nounwind }
; ATTRIBUTOR_HSA: attributes #[[ATTR25]] = { "enqueued-block" }
;.
diff --git a/llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa.ll b/llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa.ll
index 484ff77..8554485 100644
--- a/llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa.ll
+++ b/llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa.ll
@@ -474,19 +474,19 @@ attributes #1 = { nounwind }
; AKF_HSA: [[META0:![0-9]+]] = !{i32 1, !"amdhsa_code_object_version", i32 500}
;.
; HSA: attributes #[[ATTR0:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
-; HSA: attributes #[[ATTR1]] = { nounwind "amdgpu-agpr-alloc"="0" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
-; HSA: attributes #[[ATTR2]] = { nounwind "amdgpu-agpr-alloc"="0" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
-; HSA: attributes #[[ATTR3]] = { nounwind "amdgpu-agpr-alloc"="0" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
-; HSA: attributes #[[ATTR4]] = { nounwind "amdgpu-agpr-alloc"="0" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
-; HSA: attributes #[[ATTR5]] = { nounwind "amdgpu-agpr-alloc"="0" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
-; HSA: attributes #[[ATTR6]] = { nounwind "amdgpu-agpr-alloc"="0" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "uniform-work-group-size"="false" }
-; HSA: attributes #[[ATTR7]] = { nounwind "amdgpu-agpr-alloc"="0" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
-; HSA: attributes #[[ATTR8]] = { nounwind "amdgpu-agpr-alloc"="0" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "uniform-work-group-size"="false" }
-; HSA: attributes #[[ATTR9]] = { nounwind "amdgpu-agpr-alloc"="0" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workitem-id-x" "uniform-work-group-size"="false" }
-; HSA: attributes #[[ATTR10]] = { nounwind "amdgpu-agpr-alloc"="0" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
-; HSA: attributes #[[ATTR11]] = { nounwind "amdgpu-agpr-alloc"="0" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
-; HSA: attributes #[[ATTR12]] = { nounwind "amdgpu-agpr-alloc"="0" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
-; HSA: attributes #[[ATTR13]] = { nounwind "amdgpu-agpr-alloc"="0" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
+; HSA: attributes #[[ATTR1]] = { nounwind "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
+; HSA: attributes #[[ATTR2]] = { nounwind "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
+; HSA: attributes #[[ATTR3]] = { nounwind "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
+; HSA: attributes #[[ATTR4]] = { nounwind "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
+; HSA: attributes #[[ATTR5]] = { nounwind "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
+; HSA: attributes #[[ATTR6]] = { nounwind "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "uniform-work-group-size"="false" }
+; HSA: attributes #[[ATTR7]] = { nounwind "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
+; HSA: attributes #[[ATTR8]] = { nounwind "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "uniform-work-group-size"="false" }
+; HSA: attributes #[[ATTR9]] = { nounwind "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workitem-id-x" "uniform-work-group-size"="false" }
+; HSA: attributes #[[ATTR10]] = { nounwind "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
+; HSA: attributes #[[ATTR11]] = { nounwind "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
+; HSA: attributes #[[ATTR12]] = { nounwind "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
+; HSA: attributes #[[ATTR13]] = { nounwind "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
;.
; HSA: [[META0]] = !{i32 1, i32 3, i32 4, i32 10}
; HSA: [[META1]] = !{i32 1, i32 5, i32 6, i32 10}
diff --git a/llvm/test/CodeGen/AMDGPU/annotate-kernel-features.ll b/llvm/test/CodeGen/AMDGPU/annotate-kernel-features.ll
index 2efe024..e2a2deb 100644
--- a/llvm/test/CodeGen/AMDGPU/annotate-kernel-features.ll
+++ b/llvm/test/CodeGen/AMDGPU/annotate-kernel-features.ll
@@ -294,13 +294,13 @@ attributes #1 = { nounwind }
;.
; CHECK: attributes #[[ATTR0:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
-; CHECK: attributes #[[ATTR1]] = { nounwind "amdgpu-agpr-alloc"="0" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
-; CHECK: attributes #[[ATTR2]] = { nounwind "amdgpu-agpr-alloc"="0" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
-; CHECK: attributes #[[ATTR3]] = { nounwind "amdgpu-agpr-alloc"="0" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
-; CHECK: attributes #[[ATTR4]] = { nounwind "amdgpu-agpr-alloc"="0" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
-; CHECK: attributes #[[ATTR5]] = { nounwind "amdgpu-agpr-alloc"="0" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
-; CHECK: attributes #[[ATTR6]] = { nounwind "amdgpu-agpr-alloc"="0" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "uniform-work-group-size"="false" }
-; CHECK: attributes #[[ATTR7]] = { nounwind "amdgpu-agpr-alloc"="0" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
-; CHECK: attributes #[[ATTR8]] = { nounwind "amdgpu-agpr-alloc"="0" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "uniform-work-group-size"="false" }
-; CHECK: attributes #[[ATTR9]] = { nounwind "amdgpu-agpr-alloc"="0" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workitem-id-x" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR1]] = { nounwind "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR2]] = { nounwind "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR3]] = { nounwind "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR4]] = { nounwind "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR5]] = { nounwind "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR6]] = { nounwind "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR7]] = { nounwind "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR8]] = { nounwind "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR9]] = { nounwind "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workitem-id-x" "uniform-work-group-size"="false" }
;.
diff --git a/llvm/test/CodeGen/AMDGPU/atomic_load_local.ll b/llvm/test/CodeGen/AMDGPU/atomic_load_local.ll
index aaedb85..e67d7fdb 100644
--- a/llvm/test/CodeGen/AMDGPU/atomic_load_local.ll
+++ b/llvm/test/CodeGen/AMDGPU/atomic_load_local.ll
@@ -3,6 +3,8 @@
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GFX9 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -mattr=+real-true16 < %s | FileCheck -check-prefixes=GFX11,GFX11-TRUE16 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -mattr=-real-true16 < %s | FileCheck -check-prefixes=GFX11,GFX11-FAKE16 %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1250 -mattr=+real-true16 < %s | FileCheck -check-prefixes=GFX1250,GFX1250-TRUE16 %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1250 -mattr=-real-true16 < %s | FileCheck -check-prefixes=GFX1250,GFX1250-FAKE16 %s
define i8 @atomic_load_monotonic_i8(ptr addrspace(3) %ptr) {
; CI-LABEL: atomic_load_monotonic_i8:
@@ -33,6 +35,14 @@ define i8 @atomic_load_monotonic_i8(ptr addrspace(3) %ptr) {
; GFX11-FAKE16-NEXT: ds_load_u8 v0, v0
; GFX11-FAKE16-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1250-LABEL: atomic_load_monotonic_i8:
+; GFX1250: ; %bb.0:
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: ds_load_u8 v0, v0
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: s_set_pc_i64 s[30:31]
%load = load atomic i8, ptr addrspace(3) %ptr monotonic, align 1
ret i8 %load
}
@@ -66,6 +76,14 @@ define i8 @atomic_load_monotonic_i8_offset(ptr addrspace(3) %ptr) {
; GFX11-FAKE16-NEXT: ds_load_u8 v0, v0 offset:16
; GFX11-FAKE16-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1250-LABEL: atomic_load_monotonic_i8_offset:
+; GFX1250: ; %bb.0:
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: ds_load_u8 v0, v0 offset:16
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: s_set_pc_i64 s[30:31]
%gep = getelementptr inbounds i8, ptr addrspace(3) %ptr, i8 16
%load = load atomic i8, ptr addrspace(3) %gep monotonic, align 1
ret i8 %load
@@ -100,6 +118,14 @@ define i16 @atomic_load_monotonic_i16(ptr addrspace(3) %ptr) {
; GFX11-FAKE16-NEXT: ds_load_u16 v0, v0
; GFX11-FAKE16-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1250-LABEL: atomic_load_monotonic_i16:
+; GFX1250: ; %bb.0:
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: ds_load_u16 v0, v0
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: s_set_pc_i64 s[30:31]
%load = load atomic i16, ptr addrspace(3) %ptr monotonic, align 2
ret i16 %load
}
@@ -133,6 +159,14 @@ define i16 @atomic_load_monotonic_i16_offset(ptr addrspace(3) %ptr) {
; GFX11-FAKE16-NEXT: ds_load_u16 v0, v0 offset:32
; GFX11-FAKE16-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1250-LABEL: atomic_load_monotonic_i16_offset:
+; GFX1250: ; %bb.0:
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: ds_load_u16 v0, v0 offset:32
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: s_set_pc_i64 s[30:31]
%gep = getelementptr inbounds i16, ptr addrspace(3) %ptr, i16 16
%load = load atomic i16, ptr addrspace(3) %gep monotonic, align 2
ret i16 %load
@@ -160,6 +194,14 @@ define i32 @atomic_load_monotonic_i32(ptr addrspace(3) %ptr) {
; GFX11-NEXT: ds_load_b32 v0, v0
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1250-LABEL: atomic_load_monotonic_i32:
+; GFX1250: ; %bb.0:
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: ds_load_b32 v0, v0
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: s_set_pc_i64 s[30:31]
%load = load atomic i32, ptr addrspace(3) %ptr monotonic, align 4
ret i32 %load
}
@@ -186,6 +228,14 @@ define i32 @atomic_load_monotonic_i32_offset(ptr addrspace(3) %ptr) {
; GFX11-NEXT: ds_load_b32 v0, v0 offset:64
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1250-LABEL: atomic_load_monotonic_i32_offset:
+; GFX1250: ; %bb.0:
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: ds_load_b32 v0, v0 offset:64
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: s_set_pc_i64 s[30:31]
%gep = getelementptr inbounds i32, ptr addrspace(3) %ptr, i32 16
%load = load atomic i32, ptr addrspace(3) %gep monotonic, align 4
ret i32 %load
@@ -213,6 +263,14 @@ define i64 @atomic_load_monotonic_i64(ptr addrspace(3) %ptr) {
; GFX11-NEXT: ds_load_b64 v[0:1], v0
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1250-LABEL: atomic_load_monotonic_i64:
+; GFX1250: ; %bb.0:
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: ds_load_b64 v[0:1], v0
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: s_set_pc_i64 s[30:31]
%load = load atomic i64, ptr addrspace(3) %ptr monotonic, align 8
ret i64 %load
}
@@ -239,6 +297,14 @@ define i64 @atomic_load_monotonic_i64_offset(ptr addrspace(3) %ptr) {
; GFX11-NEXT: ds_load_b64 v[0:1], v0 offset:128
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1250-LABEL: atomic_load_monotonic_i64_offset:
+; GFX1250: ; %bb.0:
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: ds_load_b64 v[0:1], v0 offset:128
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: s_set_pc_i64 s[30:31]
%gep = getelementptr inbounds i64, ptr addrspace(3) %ptr, i32 16
%load = load atomic i64, ptr addrspace(3) %gep monotonic, align 8
ret i64 %load
@@ -266,6 +332,14 @@ define float @atomic_load_monotonic_f32_offset(ptr addrspace(3) %ptr) {
; GFX11-NEXT: ds_load_b32 v0, v0 offset:64
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1250-LABEL: atomic_load_monotonic_f32_offset:
+; GFX1250: ; %bb.0:
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: ds_load_b32 v0, v0 offset:64
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: s_set_pc_i64 s[30:31]
%gep = getelementptr inbounds float, ptr addrspace(3) %ptr, i32 16
%load = load atomic float, ptr addrspace(3) %gep monotonic, align 4
ret float %load
@@ -293,6 +367,14 @@ define double @atomic_load_monotonic_f64_offset(ptr addrspace(3) %ptr) {
; GFX11-NEXT: ds_load_b64 v[0:1], v0 offset:128
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1250-LABEL: atomic_load_monotonic_f64_offset:
+; GFX1250: ; %bb.0:
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: ds_load_b64 v[0:1], v0 offset:128
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: s_set_pc_i64 s[30:31]
%gep = getelementptr inbounds double, ptr addrspace(3) %ptr, i32 16
%load = load atomic double, ptr addrspace(3) %gep monotonic, align 8
ret double %load
@@ -320,6 +402,14 @@ define ptr @atomic_load_monotonic_p0i8_offset(ptr addrspace(3) %ptr) {
; GFX11-NEXT: ds_load_b64 v[0:1], v0 offset:128
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1250-LABEL: atomic_load_monotonic_p0i8_offset:
+; GFX1250: ; %bb.0:
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: ds_load_b64 v[0:1], v0 offset:128
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: s_set_pc_i64 s[30:31]
%gep = getelementptr inbounds ptr, ptr addrspace(3) %ptr, i32 16
%load = load atomic ptr, ptr addrspace(3) %gep monotonic, align 8
ret ptr %load
@@ -347,6 +437,14 @@ define ptr addrspace(3) @atomic_load_monotonic_p3i8_offset(ptr addrspace(3) %ptr
; GFX11-NEXT: ds_load_b32 v0, v0 offset:64
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1250-LABEL: atomic_load_monotonic_p3i8_offset:
+; GFX1250: ; %bb.0:
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: ds_load_b32 v0, v0 offset:64
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: s_set_pc_i64 s[30:31]
%gep = getelementptr inbounds ptr addrspace(3), ptr addrspace(3) %ptr, i32 16
%load = load atomic ptr addrspace(3), ptr addrspace(3) %gep monotonic, align 4
ret ptr addrspace(3) %load
@@ -381,6 +479,14 @@ define i16 @atomic_load_monotonic_f16(ptr addrspace(3) %ptr) {
; GFX11-FAKE16-NEXT: ds_load_u16 v0, v0
; GFX11-FAKE16-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1250-LABEL: atomic_load_monotonic_f16:
+; GFX1250: ; %bb.0:
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: ds_load_u16 v0, v0
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: s_set_pc_i64 s[30:31]
%load = load atomic half, ptr addrspace(3) %ptr monotonic, align 2
%ret = bitcast half %load to i16
ret i16 %ret
@@ -415,6 +521,14 @@ define i16 @atomic_load_monotonic_f16_offset(ptr addrspace(3) %ptr) {
; GFX11-FAKE16-NEXT: ds_load_u16 v0, v0 offset:32
; GFX11-FAKE16-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1250-LABEL: atomic_load_monotonic_f16_offset:
+; GFX1250: ; %bb.0:
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: ds_load_u16 v0, v0 offset:32
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: s_set_pc_i64 s[30:31]
%gep = getelementptr inbounds half, ptr addrspace(3) %ptr, i32 16
%load = load atomic half, ptr addrspace(3) %gep monotonic, align 2
%ret = bitcast half %load to i16
@@ -450,6 +564,14 @@ define i16 @atomic_load_monotonic_bf16(ptr addrspace(3) %ptr) {
; GFX11-FAKE16-NEXT: ds_load_u16 v0, v0
; GFX11-FAKE16-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1250-LABEL: atomic_load_monotonic_bf16:
+; GFX1250: ; %bb.0:
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: ds_load_u16 v0, v0
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: s_set_pc_i64 s[30:31]
%load = load atomic bfloat, ptr addrspace(3) %ptr monotonic, align 2
%ret = bitcast bfloat %load to i16
ret i16 %ret
@@ -484,6 +606,14 @@ define i16 @atomic_load_monotonic_bf16_offset(ptr addrspace(3) %ptr) {
; GFX11-FAKE16-NEXT: ds_load_u16 v0, v0 offset:32
; GFX11-FAKE16-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1250-LABEL: atomic_load_monotonic_bf16_offset:
+; GFX1250: ; %bb.0:
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: ds_load_u16 v0, v0 offset:32
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: s_set_pc_i64 s[30:31]
%gep = getelementptr inbounds bfloat, ptr addrspace(3) %ptr, i32 16
%load = load atomic bfloat, ptr addrspace(3) %gep monotonic, align 2
%ret = bitcast bfloat %load to i16
@@ -491,3 +621,5 @@ define i16 @atomic_load_monotonic_bf16_offset(ptr addrspace(3) %ptr) {
}
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
; GCN: {{.*}}
+; GFX1250-FAKE16: {{.*}}
+; GFX1250-TRUE16: {{.*}}
diff --git a/llvm/test/CodeGen/AMDGPU/atomic_store_local.ll b/llvm/test/CodeGen/AMDGPU/atomic_store_local.ll
index c2bb4f00..31065f2 100644
--- a/llvm/test/CodeGen/AMDGPU/atomic_store_local.ll
+++ b/llvm/test/CodeGen/AMDGPU/atomic_store_local.ll
@@ -3,6 +3,8 @@
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GFX9 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -mattr=+real-true16 < %s | FileCheck -check-prefixes=GFX11,GFX11-TRUE16 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -mattr=-real-true16 < %s | FileCheck -check-prefixes=GFX11,GFX11-FAKE16 %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1250 -mattr=+real-true16 < %s | FileCheck -check-prefixes=GFX1250,GFX1250-TRUE16 %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1250 -mattr=-real-true16 < %s | FileCheck -check-prefixes=GFX1250,GFX1250-FAKE16 %s
define void @atomic_store_monotonic_i8(ptr addrspace(3) %ptr, i8 %val) {
; CI-LABEL: atomic_store_monotonic_i8:
@@ -41,6 +43,26 @@ define void @atomic_store_monotonic_i8(ptr addrspace(3) %ptr, i8 %val) {
; GFX11-FAKE16-NEXT: ds_store_b8 v0, v2
; GFX11-FAKE16-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1250-TRUE16-LABEL: atomic_store_monotonic_i8:
+; GFX1250-TRUE16: ; %bb.0:
+; GFX1250-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-TRUE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250-TRUE16-NEXT: v_add_nc_u16 v1.h, v1.l, 2
+; GFX1250-TRUE16-NEXT: ds_store_b8 v0, v1
+; GFX1250-TRUE16-NEXT: ds_store_b8_d16_hi v0, v1
+; GFX1250-TRUE16-NEXT: s_wait_dscnt 0x0
+; GFX1250-TRUE16-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX1250-FAKE16-LABEL: atomic_store_monotonic_i8:
+; GFX1250-FAKE16: ; %bb.0:
+; GFX1250-FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-FAKE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250-FAKE16-NEXT: v_add_nc_u16 v2, v1, 2
+; GFX1250-FAKE16-NEXT: ds_store_b8 v0, v1
+; GFX1250-FAKE16-NEXT: ds_store_b8 v0, v2
+; GFX1250-FAKE16-NEXT: s_wait_dscnt 0x0
+; GFX1250-FAKE16-NEXT: s_set_pc_i64 s[30:31]
%val1 = add i8 %val, 2
store atomic i8 %val, ptr addrspace(3) %ptr monotonic, align 1
store atomic i8 %val1, ptr addrspace(3) %ptr monotonic, align 1
@@ -84,6 +106,26 @@ define void @atomic_store_monotonic_offset_i8(ptr addrspace(3) %ptr, i8 %val) {
; GFX11-FAKE16-NEXT: ds_store_b8 v0, v2 offset:16
; GFX11-FAKE16-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1250-TRUE16-LABEL: atomic_store_monotonic_offset_i8:
+; GFX1250-TRUE16: ; %bb.0:
+; GFX1250-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-TRUE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250-TRUE16-NEXT: v_add_nc_u16 v1.h, v1.l, 2
+; GFX1250-TRUE16-NEXT: ds_store_b8 v0, v1 offset:8
+; GFX1250-TRUE16-NEXT: ds_store_b8_d16_hi v0, v1 offset:16
+; GFX1250-TRUE16-NEXT: s_wait_dscnt 0x0
+; GFX1250-TRUE16-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX1250-FAKE16-LABEL: atomic_store_monotonic_offset_i8:
+; GFX1250-FAKE16: ; %bb.0:
+; GFX1250-FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-FAKE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250-FAKE16-NEXT: v_add_nc_u16 v2, v1, 2
+; GFX1250-FAKE16-NEXT: ds_store_b8 v0, v1 offset:8
+; GFX1250-FAKE16-NEXT: ds_store_b8 v0, v2 offset:16
+; GFX1250-FAKE16-NEXT: s_wait_dscnt 0x0
+; GFX1250-FAKE16-NEXT: s_set_pc_i64 s[30:31]
%val1 = add i8 %val, 2
%gep_1 = getelementptr inbounds i8, ptr addrspace(3) %ptr, i8 8
%gep_2 = getelementptr inbounds i8, ptr addrspace(3) %ptr, i8 16
@@ -129,6 +171,26 @@ define void @atomic_store_monotonic_i16(ptr addrspace(3) %ptr, i16 %val) {
; GFX11-FAKE16-NEXT: ds_store_b16 v0, v2
; GFX11-FAKE16-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1250-TRUE16-LABEL: atomic_store_monotonic_i16:
+; GFX1250-TRUE16: ; %bb.0:
+; GFX1250-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-TRUE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250-TRUE16-NEXT: v_add_nc_u16 v1.h, v1.l, 2
+; GFX1250-TRUE16-NEXT: ds_store_b16 v0, v1
+; GFX1250-TRUE16-NEXT: ds_store_b16_d16_hi v0, v1
+; GFX1250-TRUE16-NEXT: s_wait_dscnt 0x0
+; GFX1250-TRUE16-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX1250-FAKE16-LABEL: atomic_store_monotonic_i16:
+; GFX1250-FAKE16: ; %bb.0:
+; GFX1250-FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-FAKE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250-FAKE16-NEXT: v_add_nc_u16 v2, v1, 2
+; GFX1250-FAKE16-NEXT: ds_store_b16 v0, v1
+; GFX1250-FAKE16-NEXT: ds_store_b16 v0, v2
+; GFX1250-FAKE16-NEXT: s_wait_dscnt 0x0
+; GFX1250-FAKE16-NEXT: s_set_pc_i64 s[30:31]
%val1 = add i16 %val, 2
store atomic i16 %val, ptr addrspace(3) %ptr monotonic, align 2
store atomic i16 %val1, ptr addrspace(3) %ptr monotonic, align 2
@@ -172,6 +234,26 @@ define void @atomic_store_monotonic_offset_i16(ptr addrspace(3) %ptr, i16 %val)
; GFX11-FAKE16-NEXT: ds_store_b16 v0, v2 offset:32
; GFX11-FAKE16-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1250-TRUE16-LABEL: atomic_store_monotonic_offset_i16:
+; GFX1250-TRUE16: ; %bb.0:
+; GFX1250-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-TRUE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250-TRUE16-NEXT: v_add_nc_u16 v1.h, v1.l, 2
+; GFX1250-TRUE16-NEXT: ds_store_b16 v0, v1 offset:32
+; GFX1250-TRUE16-NEXT: ds_store_b16_d16_hi v0, v1 offset:32
+; GFX1250-TRUE16-NEXT: s_wait_dscnt 0x0
+; GFX1250-TRUE16-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX1250-FAKE16-LABEL: atomic_store_monotonic_offset_i16:
+; GFX1250-FAKE16: ; %bb.0:
+; GFX1250-FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-FAKE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250-FAKE16-NEXT: v_add_nc_u16 v2, v1, 2
+; GFX1250-FAKE16-NEXT: ds_store_b16 v0, v1 offset:32
+; GFX1250-FAKE16-NEXT: ds_store_b16 v0, v2 offset:32
+; GFX1250-FAKE16-NEXT: s_wait_dscnt 0x0
+; GFX1250-FAKE16-NEXT: s_set_pc_i64 s[30:31]
%val1 = add i16 %val, 2
%gep = getelementptr inbounds i16, ptr addrspace(3) %ptr, i16 16
store atomic i16 %val, ptr addrspace(3) %gep monotonic, align 2
@@ -201,6 +283,14 @@ define void @atomic_store_monotonic_i32(ptr addrspace(3) %ptr, i32 %val) {
; GFX11-NEXT: ds_store_b32 v0, v1
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1250-LABEL: atomic_store_monotonic_i32:
+; GFX1250: ; %bb.0:
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: ds_store_b32 v0, v1
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: s_set_pc_i64 s[30:31]
store atomic i32 %val, ptr addrspace(3) %ptr monotonic, align 4
ret void
}
@@ -227,6 +317,14 @@ define void @atomic_store_monotonic_offset_i32(ptr addrspace(3) %ptr, i32 %val)
; GFX11-NEXT: ds_store_b32 v0, v1 offset:64
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1250-LABEL: atomic_store_monotonic_offset_i32:
+; GFX1250: ; %bb.0:
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: ds_store_b32 v0, v1 offset:64
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: s_set_pc_i64 s[30:31]
%gep = getelementptr inbounds i32, ptr addrspace(3) %ptr, i32 16
store atomic i32 %val, ptr addrspace(3) %gep monotonic, align 4
ret void
@@ -254,6 +352,15 @@ define void @atomic_store_monotonic_i64(ptr addrspace(3) %ptr, i64 %val) {
; GFX11-NEXT: ds_store_b64 v0, v[1:2]
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1250-LABEL: atomic_store_monotonic_i64:
+; GFX1250: ; %bb.0:
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_dual_mov_b32 v3, v2 :: v_dual_mov_b32 v2, v1
+; GFX1250-NEXT: ds_store_b64 v0, v[2:3]
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: s_set_pc_i64 s[30:31]
store atomic i64 %val, ptr addrspace(3) %ptr monotonic, align 8
ret void
}
@@ -280,6 +387,15 @@ define void @atomic_store_monotonic_offset_i64(ptr addrspace(3) %ptr, i64 %val)
; GFX11-NEXT: ds_store_b64 v0, v[1:2] offset:128
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1250-LABEL: atomic_store_monotonic_offset_i64:
+; GFX1250: ; %bb.0:
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_dual_mov_b32 v3, v2 :: v_dual_mov_b32 v2, v1
+; GFX1250-NEXT: ds_store_b64 v0, v[2:3] offset:128
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: s_set_pc_i64 s[30:31]
%gep = getelementptr inbounds i64, ptr addrspace(3) %ptr, i64 16
store atomic i64 %val, ptr addrspace(3) %gep monotonic, align 8
ret void
@@ -322,6 +438,26 @@ define void @atomic_store_monotonic_f16(ptr addrspace(3) %ptr, i16 %arg.val) {
; GFX11-FAKE16-NEXT: ds_store_b16 v0, v2
; GFX11-FAKE16-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1250-TRUE16-LABEL: atomic_store_monotonic_f16:
+; GFX1250-TRUE16: ; %bb.0:
+; GFX1250-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-TRUE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250-TRUE16-NEXT: v_add_nc_u16 v1.h, v1.l, 2
+; GFX1250-TRUE16-NEXT: ds_store_b16 v0, v1
+; GFX1250-TRUE16-NEXT: ds_store_b16_d16_hi v0, v1
+; GFX1250-TRUE16-NEXT: s_wait_dscnt 0x0
+; GFX1250-TRUE16-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX1250-FAKE16-LABEL: atomic_store_monotonic_f16:
+; GFX1250-FAKE16: ; %bb.0:
+; GFX1250-FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-FAKE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250-FAKE16-NEXT: v_add_nc_u16 v2, v1, 2
+; GFX1250-FAKE16-NEXT: ds_store_b16 v0, v1
+; GFX1250-FAKE16-NEXT: ds_store_b16 v0, v2
+; GFX1250-FAKE16-NEXT: s_wait_dscnt 0x0
+; GFX1250-FAKE16-NEXT: s_set_pc_i64 s[30:31]
%arg.val1 = add i16 %arg.val, 2
%val = bitcast i16 %arg.val to half
%val1 = bitcast i16 %arg.val1 to half
@@ -367,6 +503,26 @@ define void @atomic_store_monotonic_offset_f16(ptr addrspace(3) %ptr, i16 %arg.v
; GFX11-FAKE16-NEXT: ds_store_b16 v0, v2 offset:32
; GFX11-FAKE16-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1250-TRUE16-LABEL: atomic_store_monotonic_offset_f16:
+; GFX1250-TRUE16: ; %bb.0:
+; GFX1250-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-TRUE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250-TRUE16-NEXT: v_add_nc_u16 v1.h, v1.l, 2
+; GFX1250-TRUE16-NEXT: ds_store_b16 v0, v1 offset:32
+; GFX1250-TRUE16-NEXT: ds_store_b16_d16_hi v0, v1 offset:32
+; GFX1250-TRUE16-NEXT: s_wait_dscnt 0x0
+; GFX1250-TRUE16-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX1250-FAKE16-LABEL: atomic_store_monotonic_offset_f16:
+; GFX1250-FAKE16: ; %bb.0:
+; GFX1250-FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-FAKE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250-FAKE16-NEXT: v_add_nc_u16 v2, v1, 2
+; GFX1250-FAKE16-NEXT: ds_store_b16 v0, v1 offset:32
+; GFX1250-FAKE16-NEXT: ds_store_b16 v0, v2 offset:32
+; GFX1250-FAKE16-NEXT: s_wait_dscnt 0x0
+; GFX1250-FAKE16-NEXT: s_set_pc_i64 s[30:31]
%arg.val1 = add i16 %arg.val, 2
%val1 = bitcast i16 %arg.val1 to half
%val = bitcast i16 %arg.val to half
@@ -413,6 +569,26 @@ define void @atomic_store_monotonic_bf16(ptr addrspace(3) %ptr, i16 %arg.val) {
; GFX11-FAKE16-NEXT: ds_store_b16 v0, v2
; GFX11-FAKE16-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1250-TRUE16-LABEL: atomic_store_monotonic_bf16:
+; GFX1250-TRUE16: ; %bb.0:
+; GFX1250-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-TRUE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250-TRUE16-NEXT: v_add_nc_u16 v1.h, v1.l, 2
+; GFX1250-TRUE16-NEXT: ds_store_b16 v0, v1
+; GFX1250-TRUE16-NEXT: ds_store_b16_d16_hi v0, v1
+; GFX1250-TRUE16-NEXT: s_wait_dscnt 0x0
+; GFX1250-TRUE16-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX1250-FAKE16-LABEL: atomic_store_monotonic_bf16:
+; GFX1250-FAKE16: ; %bb.0:
+; GFX1250-FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-FAKE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250-FAKE16-NEXT: v_add_nc_u16 v2, v1, 2
+; GFX1250-FAKE16-NEXT: ds_store_b16 v0, v1
+; GFX1250-FAKE16-NEXT: ds_store_b16 v0, v2
+; GFX1250-FAKE16-NEXT: s_wait_dscnt 0x0
+; GFX1250-FAKE16-NEXT: s_set_pc_i64 s[30:31]
%arg.val1 = add i16 %arg.val, 2
%val1 = bitcast i16 %arg.val1 to bfloat
%val = bitcast i16 %arg.val to bfloat
@@ -458,6 +634,26 @@ define void @atomic_store_monotonic_offset_bf16(ptr addrspace(3) %ptr, i16 %arg.
; GFX11-FAKE16-NEXT: ds_store_b16 v0, v2 offset:32
; GFX11-FAKE16-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1250-TRUE16-LABEL: atomic_store_monotonic_offset_bf16:
+; GFX1250-TRUE16: ; %bb.0:
+; GFX1250-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-TRUE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250-TRUE16-NEXT: v_add_nc_u16 v1.h, v1.l, 2
+; GFX1250-TRUE16-NEXT: ds_store_b16 v0, v1 offset:32
+; GFX1250-TRUE16-NEXT: ds_store_b16_d16_hi v0, v1 offset:32
+; GFX1250-TRUE16-NEXT: s_wait_dscnt 0x0
+; GFX1250-TRUE16-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX1250-FAKE16-LABEL: atomic_store_monotonic_offset_bf16:
+; GFX1250-FAKE16: ; %bb.0:
+; GFX1250-FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-FAKE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250-FAKE16-NEXT: v_add_nc_u16 v2, v1, 2
+; GFX1250-FAKE16-NEXT: ds_store_b16 v0, v1 offset:32
+; GFX1250-FAKE16-NEXT: ds_store_b16 v0, v2 offset:32
+; GFX1250-FAKE16-NEXT: s_wait_dscnt 0x0
+; GFX1250-FAKE16-NEXT: s_set_pc_i64 s[30:31]
%arg.val1 = add i16 %arg.val, 2
%val1 = bitcast i16 %arg.val1 to bfloat
%val = bitcast i16 %arg.val to bfloat
diff --git a/llvm/test/CodeGen/AMDGPU/attributor-flatscratchinit-undefined-behavior.ll b/llvm/test/CodeGen/AMDGPU/attributor-flatscratchinit-undefined-behavior.ll
index f63dd6e..c90611f 100644
--- a/llvm/test/CodeGen/AMDGPU/attributor-flatscratchinit-undefined-behavior.ll
+++ b/llvm/test/CodeGen/AMDGPU/attributor-flatscratchinit-undefined-behavior.ll
@@ -147,10 +147,10 @@ define amdgpu_kernel void @call_calls_intrin_ascast_cc_kernel(ptr addrspace(3) %
attributes #0 = { "amdgpu-no-flat-scratch-init" }
;.
-; GFX9: attributes #[[ATTR0]] = { "amdgpu-agpr-alloc"="0" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx900" "uniform-work-group-size"="false" }
+; GFX9: attributes #[[ATTR0]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx900" "uniform-work-group-size"="false" }
; GFX9: attributes #[[ATTR1:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) "target-cpu"="gfx900" }
;.
-; GFX10: attributes #[[ATTR0]] = { "amdgpu-agpr-alloc"="0" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx1010" "uniform-work-group-size"="false" }
+; GFX10: attributes #[[ATTR0]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx1010" "uniform-work-group-size"="false" }
; GFX10: attributes #[[ATTR1:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) "target-cpu"="gfx1010" }
;.
; GFX9: [[META0]] = !{i32 1, i32 5, i32 6, i32 10}
diff --git a/llvm/test/CodeGen/AMDGPU/attributor-flatscratchinit.ll b/llvm/test/CodeGen/AMDGPU/attributor-flatscratchinit.ll
index 60cd252..c005695a 100644
--- a/llvm/test/CodeGen/AMDGPU/attributor-flatscratchinit.ll
+++ b/llvm/test/CodeGen/AMDGPU/attributor-flatscratchinit.ll
@@ -723,7 +723,7 @@ define void @also_empty() {
define amdgpu_kernel void @indirect_call_known_callees(i1 %cond) {
; GFX9-LABEL: define amdgpu_kernel void @indirect_call_known_callees(
-; GFX9-SAME: i1 [[COND:%.*]]) #[[ATTR3:[0-9]+]] {
+; GFX9-SAME: i1 [[COND:%.*]]) #[[ATTR0]] {
; GFX9-NEXT: [[FPTR:%.*]] = select i1 [[COND]], ptr @empty, ptr @also_empty
; GFX9-NEXT: [[TMP1:%.*]] = icmp eq ptr [[FPTR]], @also_empty
; GFX9-NEXT: br i1 [[TMP1]], label %[[BB2:.*]], label %[[BB3:.*]]
@@ -741,7 +741,7 @@ define amdgpu_kernel void @indirect_call_known_callees(i1 %cond) {
; GFX9-NEXT: ret void
;
; GFX10-LABEL: define amdgpu_kernel void @indirect_call_known_callees(
-; GFX10-SAME: i1 [[COND:%.*]]) #[[ATTR3:[0-9]+]] {
+; GFX10-SAME: i1 [[COND:%.*]]) #[[ATTR0]] {
; GFX10-NEXT: [[FPTR:%.*]] = select i1 [[COND]], ptr @empty, ptr @also_empty
; GFX10-NEXT: [[TMP1:%.*]] = icmp eq ptr [[FPTR]], @also_empty
; GFX10-NEXT: br i1 [[TMP1]], label %[[BB2:.*]], label %[[BB3:.*]]
@@ -767,13 +767,13 @@ declare i32 @llvm.amdgcn.workgroup.id.x()
define void @use_intrinsic_workitem_id_x() {
; GFX9-LABEL: define void @use_intrinsic_workitem_id_x(
-; GFX9-SAME: ) #[[ATTR5:[0-9]+]] {
+; GFX9-SAME: ) #[[ATTR4:[0-9]+]] {
; GFX9-NEXT: [[VAL:%.*]] = call i32 @llvm.amdgcn.workitem.id.x()
; GFX9-NEXT: store volatile i32 [[VAL]], ptr addrspace(1) null, align 4
; GFX9-NEXT: ret void
;
; GFX10-LABEL: define void @use_intrinsic_workitem_id_x(
-; GFX10-SAME: ) #[[ATTR5:[0-9]+]] {
+; GFX10-SAME: ) #[[ATTR4:[0-9]+]] {
; GFX10-NEXT: [[VAL:%.*]] = call i32 @llvm.amdgcn.workitem.id.x()
; GFX10-NEXT: store volatile i32 [[VAL]], ptr addrspace(1) null, align 4
; GFX10-NEXT: ret void
@@ -803,12 +803,12 @@ define amdgpu_kernel void @use_intrinsic_workitem_id_x_cc_kernel() {
define void @call_use_intrinsic_workitem_id_x() {
; GFX9-LABEL: define void @call_use_intrinsic_workitem_id_x(
-; GFX9-SAME: ) #[[ATTR5]] {
+; GFX9-SAME: ) #[[ATTR4]] {
; GFX9-NEXT: call void @use_intrinsic_workitem_id_x()
; GFX9-NEXT: ret void
;
; GFX10-LABEL: define void @call_use_intrinsic_workitem_id_x(
-; GFX10-SAME: ) #[[ATTR5]] {
+; GFX10-SAME: ) #[[ATTR4]] {
; GFX10-NEXT: call void @use_intrinsic_workitem_id_x()
; GFX10-NEXT: ret void
;
@@ -818,12 +818,12 @@ define void @call_use_intrinsic_workitem_id_x() {
define amdgpu_kernel void @call_use_intrinsic_workitem_id_x_cc_kernel() {
; GFX9-LABEL: define amdgpu_kernel void @call_use_intrinsic_workitem_id_x_cc_kernel(
-; GFX9-SAME: ) #[[ATTR5]] {
+; GFX9-SAME: ) #[[ATTR4]] {
; GFX9-NEXT: call void @use_intrinsic_workitem_id_x()
; GFX9-NEXT: ret void
;
; GFX10-LABEL: define amdgpu_kernel void @call_use_intrinsic_workitem_id_x_cc_kernel(
-; GFX10-SAME: ) #[[ATTR5]] {
+; GFX10-SAME: ) #[[ATTR4]] {
; GFX10-NEXT: call void @use_intrinsic_workitem_id_x()
; GFX10-NEXT: ret void
;
@@ -851,12 +851,12 @@ define amdgpu_kernel void @calls_intrin_ascast_cc_kernel(ptr addrspace(3) %ptr)
define amdgpu_kernel void @with_inline_asm() {
; GFX9-LABEL: define amdgpu_kernel void @with_inline_asm(
-; GFX9-SAME: ) #[[ATTR3]] {
+; GFX9-SAME: ) #[[ATTR0]] {
; GFX9-NEXT: call void asm sideeffect "
; GFX9-NEXT: ret void
;
; GFX10-LABEL: define amdgpu_kernel void @with_inline_asm(
-; GFX10-SAME: ) #[[ATTR3]] {
+; GFX10-SAME: ) #[[ATTR0]] {
; GFX10-NEXT: call void asm sideeffect "
; GFX10-NEXT: ret void
;
@@ -865,19 +865,17 @@ define amdgpu_kernel void @with_inline_asm() {
}
;.
-; GFX9: attributes #[[ATTR0]] = { "amdgpu-agpr-alloc"="0" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx900" "uniform-work-group-size"="false" }
-; GFX9: attributes #[[ATTR1]] = { "amdgpu-agpr-alloc"="0" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx900" "uniform-work-group-size"="false" }
+; GFX9: attributes #[[ATTR0]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx900" "uniform-work-group-size"="false" }
+; GFX9: attributes #[[ATTR1]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx900" "uniform-work-group-size"="false" }
; GFX9: attributes #[[ATTR2]] = { "target-cpu"="gfx900" "uniform-work-group-size"="false" }
-; GFX9: attributes #[[ATTR3]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx900" "uniform-work-group-size"="false" }
-; GFX9: attributes #[[ATTR4:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) "target-cpu"="gfx900" }
-; GFX9: attributes #[[ATTR5]] = { "amdgpu-agpr-alloc"="0" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx900" "uniform-work-group-size"="false" }
+; GFX9: attributes #[[ATTR3:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) "target-cpu"="gfx900" }
+; GFX9: attributes #[[ATTR4]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx900" "uniform-work-group-size"="false" }
;.
-; GFX10: attributes #[[ATTR0]] = { "amdgpu-agpr-alloc"="0" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx1010" "uniform-work-group-size"="false" }
-; GFX10: attributes #[[ATTR1]] = { "amdgpu-agpr-alloc"="0" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx1010" "uniform-work-group-size"="false" }
+; GFX10: attributes #[[ATTR0]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx1010" "uniform-work-group-size"="false" }
+; GFX10: attributes #[[ATTR1]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx1010" "uniform-work-group-size"="false" }
; GFX10: attributes #[[ATTR2]] = { "target-cpu"="gfx1010" "uniform-work-group-size"="false" }
-; GFX10: attributes #[[ATTR3]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx1010" "uniform-work-group-size"="false" }
-; GFX10: attributes #[[ATTR4:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) "target-cpu"="gfx1010" }
-; GFX10: attributes #[[ATTR5]] = { "amdgpu-agpr-alloc"="0" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx1010" "uniform-work-group-size"="false" }
+; GFX10: attributes #[[ATTR3:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) "target-cpu"="gfx1010" }
+; GFX10: attributes #[[ATTR4]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx1010" "uniform-work-group-size"="false" }
;.
; GFX9: [[META0]] = !{i32 2, i32 10}
; GFX9: [[META1]] = !{i32 1, i32 2, i32 3, i32 10}
diff --git a/llvm/test/CodeGen/AMDGPU/bf16.ll b/llvm/test/CodeGen/AMDGPU/bf16.ll
index 6b5647e..4b14dc6 100644
--- a/llvm/test/CodeGen/AMDGPU/bf16.ll
+++ b/llvm/test/CodeGen/AMDGPU/bf16.ll
@@ -7,11 +7,9 @@
; RUN: llc < %s -mtriple=amdgcn -mcpu=gfx1010 | FileCheck %s -check-prefixes=GFX10
; RUN: llc < %s -mtriple=amdgcn -mcpu=gfx1100 -mattr=+real-true16 | FileCheck %s -check-prefixes=GFX11,GFX11TRUE16
; RUN: llc < %s -mtriple=amdgcn -mcpu=gfx1100 -mattr=-real-true16 | FileCheck %s -check-prefixes=GFX11,GFX11FAKE16
-; xUN: llc < %s -mtriple=amdgcn -mcpu=gfx1250 -mattr=+real-true16 | FileCheck %s -check-prefixes=GFX1250,GFX1250TRUE16
+; RUN: llc < %s -mtriple=amdgcn -mcpu=gfx1250 -mattr=+real-true16 | FileCheck %s -check-prefixes=GFX1250,GFX1250TRUE16
; RUN: llc < %s -mtriple=amdgcn -mcpu=gfx1250 -mattr=-real-true16 | FileCheck %s -check-prefixes=GFX1250,GFX1250FAKE16
-; FIXME: real-true16 version of gfx1250 test fails
-
define void @test_load_store(ptr addrspace(1) %in, ptr addrspace(1) %out) {
; GCN-LABEL: test_load_store:
; GCN: ; %bb.0:
@@ -2393,15 +2391,25 @@ define void @test_store_fpimm(ptr addrspace(1) %ptr0, ptr addrspace(1) %ptr1) {
; GFX11FAKE16-NEXT: global_store_b16 v[2:3], v5, off
; GFX11FAKE16-NEXT: s_setpc_b64 s[30:31]
;
-; GFX1250-LABEL: test_store_fpimm:
-; GFX1250: ; %bb.0:
-; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-NEXT: s_wait_kmcnt 0x0
-; GFX1250-NEXT: v_mov_b32_e32 v4, 0x3f80
-; GFX1250-NEXT: v_mov_b32_e32 v5, 0x4228
-; GFX1250-NEXT: global_store_b16 v[0:1], v4, off
-; GFX1250-NEXT: global_store_b16 v[2:3], v5, off
-; GFX1250-NEXT: s_set_pc_i64 s[30:31]
+; GFX1250TRUE16-LABEL: test_store_fpimm:
+; GFX1250TRUE16: ; %bb.0:
+; GFX1250TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250TRUE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250TRUE16-NEXT: v_mov_b16_e32 v4.l, 0x3f80
+; GFX1250TRUE16-NEXT: v_mov_b16_e32 v4.h, 0x4228
+; GFX1250TRUE16-NEXT: global_store_b16 v[0:1], v4, off
+; GFX1250TRUE16-NEXT: global_store_d16_hi_b16 v[2:3], v4, off
+; GFX1250TRUE16-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX1250FAKE16-LABEL: test_store_fpimm:
+; GFX1250FAKE16: ; %bb.0:
+; GFX1250FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250FAKE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250FAKE16-NEXT: v_mov_b32_e32 v4, 0x3f80
+; GFX1250FAKE16-NEXT: v_mov_b32_e32 v5, 0x4228
+; GFX1250FAKE16-NEXT: global_store_b16 v[0:1], v4, off
+; GFX1250FAKE16-NEXT: global_store_b16 v[2:3], v5, off
+; GFX1250FAKE16-NEXT: s_set_pc_i64 s[30:31]
store bfloat 1.0, ptr addrspace(1) %ptr0
store bfloat 42.0, ptr addrspace(1) %ptr1
ret void
@@ -3796,13 +3804,21 @@ define amdgpu_gfx void @test_inreg_arg_store(bfloat inreg %in, ptr addrspace(1)
; GFX11FAKE16-NEXT: global_store_b16 v[0:1], v2, off
; GFX11FAKE16-NEXT: s_setpc_b64 s[30:31]
;
-; GFX1250-LABEL: test_inreg_arg_store:
-; GFX1250: ; %bb.0:
-; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-NEXT: s_wait_kmcnt 0x0
-; GFX1250-NEXT: v_mov_b32_e32 v2, s4
-; GFX1250-NEXT: global_store_b16 v[0:1], v2, off
-; GFX1250-NEXT: s_set_pc_i64 s[30:31]
+; GFX1250TRUE16-LABEL: test_inreg_arg_store:
+; GFX1250TRUE16: ; %bb.0:
+; GFX1250TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250TRUE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250TRUE16-NEXT: v_mov_b16_e32 v2.l, s4
+; GFX1250TRUE16-NEXT: global_store_b16 v[0:1], v2, off
+; GFX1250TRUE16-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX1250FAKE16-LABEL: test_inreg_arg_store:
+; GFX1250FAKE16: ; %bb.0:
+; GFX1250FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250FAKE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250FAKE16-NEXT: v_mov_b32_e32 v2, s4
+; GFX1250FAKE16-NEXT: global_store_b16 v[0:1], v2, off
+; GFX1250FAKE16-NEXT: s_set_pc_i64 s[30:31]
store bfloat %in, ptr addrspace(1) %out
ret void
}
@@ -3866,12 +3882,20 @@ define bfloat @test_byval(ptr addrspace(5) byval(bfloat) %bv, bfloat %val) {
; GFX11FAKE16-NEXT: scratch_store_b16 off, v0, s32
; GFX11FAKE16-NEXT: s_setpc_b64 s[30:31]
;
-; GFX1250-LABEL: test_byval:
-; GFX1250: ; %bb.0:
-; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-NEXT: s_wait_kmcnt 0x0
-; GFX1250-NEXT: scratch_store_b16 off, v0, s32
-; GFX1250-NEXT: s_set_pc_i64 s[30:31]
+; GFX1250TRUE16-LABEL: test_byval:
+; GFX1250TRUE16: ; %bb.0:
+; GFX1250TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250TRUE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250TRUE16-NEXT: v_mov_b16_e32 v1.l, v0.l
+; GFX1250TRUE16-NEXT: scratch_store_b16 off, v1, s32
+; GFX1250TRUE16-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX1250FAKE16-LABEL: test_byval:
+; GFX1250FAKE16: ; %bb.0:
+; GFX1250FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250FAKE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250FAKE16-NEXT: scratch_store_b16 off, v0, s32
+; GFX1250FAKE16-NEXT: s_set_pc_i64 s[30:31]
store bfloat %val, ptr addrspace(5) %bv
%retval = load bfloat, ptr addrspace(5) %bv
ret bfloat %retval
@@ -6708,27 +6732,50 @@ define { <32 x i32>, bfloat } @test_overflow_stack(bfloat %a, <32 x i32> %b) {
; GFX11FAKE16-NEXT: scratch_store_b16 v0, v1, off offset:128
; GFX11FAKE16-NEXT: s_setpc_b64 s[30:31]
;
-; GFX1250-LABEL: test_overflow_stack:
-; GFX1250: ; %bb.0:
-; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-NEXT: s_wait_kmcnt 0x0
-; GFX1250-NEXT: s_clause 0x2
-; GFX1250-NEXT: scratch_load_b32 v33, off, s32 offset:8
-; GFX1250-NEXT: scratch_load_b32 v32, off, s32 offset:4
-; GFX1250-NEXT: scratch_load_b32 v31, off, s32
-; GFX1250-NEXT: s_clause 0x5
-; GFX1250-NEXT: scratch_store_b128 v0, v[22:25], off offset:80
-; GFX1250-NEXT: scratch_store_b128 v0, v[18:21], off offset:64
-; GFX1250-NEXT: scratch_store_b128 v0, v[14:17], off offset:48
-; GFX1250-NEXT: scratch_store_b128 v0, v[10:13], off offset:32
-; GFX1250-NEXT: scratch_store_b128 v0, v[6:9], off offset:16
-; GFX1250-NEXT: scratch_store_b128 v0, v[2:5], off
-; GFX1250-NEXT: s_wait_loadcnt 0x0
-; GFX1250-NEXT: s_clause 0x2
-; GFX1250-NEXT: scratch_store_b128 v0, v[30:33], off offset:112
-; GFX1250-NEXT: scratch_store_b128 v0, v[26:29], off offset:96
-; GFX1250-NEXT: scratch_store_b16 v0, v1, off offset:128
-; GFX1250-NEXT: s_set_pc_i64 s[30:31]
+; GFX1250TRUE16-LABEL: test_overflow_stack:
+; GFX1250TRUE16: ; %bb.0:
+; GFX1250TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250TRUE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250TRUE16-NEXT: s_clause 0x2
+; GFX1250TRUE16-NEXT: scratch_load_b32 v33, off, s32 offset:8
+; GFX1250TRUE16-NEXT: scratch_load_b32 v32, off, s32 offset:4
+; GFX1250TRUE16-NEXT: scratch_load_b32 v31, off, s32
+; GFX1250TRUE16-NEXT: s_clause 0x3
+; GFX1250TRUE16-NEXT: scratch_store_b128 v0, v[22:25], off offset:80
+; GFX1250TRUE16-NEXT: scratch_store_b128 v0, v[18:21], off offset:64
+; GFX1250TRUE16-NEXT: scratch_store_b128 v0, v[14:17], off offset:48
+; GFX1250TRUE16-NEXT: scratch_store_b128 v0, v[10:13], off offset:32
+; GFX1250TRUE16-NEXT: s_clause 0x1
+; GFX1250TRUE16-NEXT: scratch_store_b128 v0, v[6:9], off offset:16
+; GFX1250TRUE16-NEXT: scratch_store_b128 v0, v[2:5], off
+; GFX1250TRUE16-NEXT: s_wait_loadcnt 0x0
+; GFX1250TRUE16-NEXT: s_clause 0x2
+; GFX1250TRUE16-NEXT: scratch_store_b128 v0, v[30:33], off offset:112
+; GFX1250TRUE16-NEXT: scratch_store_b128 v0, v[26:29], off offset:96
+; GFX1250TRUE16-NEXT: scratch_store_b16 v0, v1, off offset:128
+; GFX1250TRUE16-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX1250FAKE16-LABEL: test_overflow_stack:
+; GFX1250FAKE16: ; %bb.0:
+; GFX1250FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250FAKE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250FAKE16-NEXT: s_clause 0x2
+; GFX1250FAKE16-NEXT: scratch_load_b32 v33, off, s32 offset:8
+; GFX1250FAKE16-NEXT: scratch_load_b32 v32, off, s32 offset:4
+; GFX1250FAKE16-NEXT: scratch_load_b32 v31, off, s32
+; GFX1250FAKE16-NEXT: s_clause 0x5
+; GFX1250FAKE16-NEXT: scratch_store_b128 v0, v[22:25], off offset:80
+; GFX1250FAKE16-NEXT: scratch_store_b128 v0, v[18:21], off offset:64
+; GFX1250FAKE16-NEXT: scratch_store_b128 v0, v[14:17], off offset:48
+; GFX1250FAKE16-NEXT: scratch_store_b128 v0, v[10:13], off offset:32
+; GFX1250FAKE16-NEXT: scratch_store_b128 v0, v[6:9], off offset:16
+; GFX1250FAKE16-NEXT: scratch_store_b128 v0, v[2:5], off
+; GFX1250FAKE16-NEXT: s_wait_loadcnt 0x0
+; GFX1250FAKE16-NEXT: s_clause 0x2
+; GFX1250FAKE16-NEXT: scratch_store_b128 v0, v[30:33], off offset:112
+; GFX1250FAKE16-NEXT: scratch_store_b128 v0, v[26:29], off offset:96
+; GFX1250FAKE16-NEXT: scratch_store_b16 v0, v1, off offset:128
+; GFX1250FAKE16-NEXT: s_set_pc_i64 s[30:31]
%ins.0 = insertvalue { <32 x i32>, bfloat } poison, <32 x i32> %b, 0
%ins.1 = insertvalue { <32 x i32>, bfloat } %ins.0 ,bfloat %a, 1
ret { <32 x i32>, bfloat } %ins.1
@@ -10726,15 +10773,29 @@ define bfloat @v_fadd_bf16(bfloat %a, bfloat %b) {
; GFX11FAKE16-NEXT: v_lshrrev_b32_e32 v0, 16, v0
; GFX11FAKE16-NEXT: s_setpc_b64 s[30:31]
;
-; GFX1250-LABEL: v_fadd_bf16:
-; GFX1250: ; %bb.0:
-; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-NEXT: s_wait_kmcnt 0x0
-; GFX1250-NEXT: v_dual_lshlrev_b32 v1, 16, v1 :: v_dual_lshlrev_b32 v0, 16, v0
-; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1250-NEXT: v_add_f32_e32 v0, v0, v1
-; GFX1250-NEXT: v_cvt_pk_bf16_f32 v0, v0, s0
-; GFX1250-NEXT: s_set_pc_i64 s[30:31]
+; GFX1250TRUE16-LABEL: v_fadd_bf16:
+; GFX1250TRUE16: ; %bb.0:
+; GFX1250TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250TRUE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250TRUE16-NEXT: v_mov_b16_e32 v2.l, 0
+; GFX1250TRUE16-NEXT: v_mov_b16_e32 v2.h, v1.l
+; GFX1250TRUE16-NEXT: v_mov_b16_e32 v1.h, v0.l
+; GFX1250TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1250TRUE16-NEXT: v_mov_b16_e32 v1.l, v2.l
+; GFX1250TRUE16-NEXT: v_add_f32_e32 v0, v1, v2
+; GFX1250TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1250TRUE16-NEXT: v_cvt_pk_bf16_f32 v0, v0, s0
+; GFX1250TRUE16-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX1250FAKE16-LABEL: v_fadd_bf16:
+; GFX1250FAKE16: ; %bb.0:
+; GFX1250FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250FAKE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250FAKE16-NEXT: v_dual_lshlrev_b32 v1, 16, v1 :: v_dual_lshlrev_b32 v0, 16, v0
+; GFX1250FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1250FAKE16-NEXT: v_add_f32_e32 v0, v0, v1
+; GFX1250FAKE16-NEXT: v_cvt_pk_bf16_f32 v0, v0, s0
+; GFX1250FAKE16-NEXT: s_set_pc_i64 s[30:31]
%op = fadd bfloat %a, %b
ret bfloat %op
}
@@ -15268,15 +15329,26 @@ define bfloat @v_fadd_bf16_fpimm_0(bfloat %arg0) {
; GFX11FAKE16-NEXT: v_lshrrev_b32_e32 v0, 16, v0
; GFX11FAKE16-NEXT: s_setpc_b64 s[30:31]
;
-; GFX1250-LABEL: v_fadd_bf16_fpimm_0:
-; GFX1250: ; %bb.0:
-; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-NEXT: s_wait_kmcnt 0x0
-; GFX1250-NEXT: v_lshlrev_b32_e32 v0, 16, v0
-; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1250-NEXT: v_add_f32_e32 v0, 1.0, v0
-; GFX1250-NEXT: v_cvt_pk_bf16_f32 v0, v0, s0
-; GFX1250-NEXT: s_set_pc_i64 s[30:31]
+; GFX1250TRUE16-LABEL: v_fadd_bf16_fpimm_0:
+; GFX1250TRUE16: ; %bb.0:
+; GFX1250TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250TRUE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250TRUE16-NEXT: v_mov_b16_e32 v1.l, 0
+; GFX1250TRUE16-NEXT: v_mov_b16_e32 v1.h, v0.l
+; GFX1250TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1250TRUE16-NEXT: v_add_f32_e32 v0, 1.0, v1
+; GFX1250TRUE16-NEXT: v_cvt_pk_bf16_f32 v0, v0, s0
+; GFX1250TRUE16-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX1250FAKE16-LABEL: v_fadd_bf16_fpimm_0:
+; GFX1250FAKE16: ; %bb.0:
+; GFX1250FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250FAKE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250FAKE16-NEXT: v_lshlrev_b32_e32 v0, 16, v0
+; GFX1250FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1250FAKE16-NEXT: v_add_f32_e32 v0, 1.0, v0
+; GFX1250FAKE16-NEXT: v_cvt_pk_bf16_f32 v0, v0, s0
+; GFX1250FAKE16-NEXT: s_set_pc_i64 s[30:31]
%add = fadd bfloat %arg0, 1.0
ret bfloat %add
}
@@ -15382,15 +15454,26 @@ define bfloat @v_fadd_bf16_fpimm_1(bfloat %arg0) {
; GFX11FAKE16-NEXT: v_lshrrev_b32_e32 v0, 16, v0
; GFX11FAKE16-NEXT: s_setpc_b64 s[30:31]
;
-; GFX1250-LABEL: v_fadd_bf16_fpimm_1:
-; GFX1250: ; %bb.0:
-; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-NEXT: s_wait_kmcnt 0x0
-; GFX1250-NEXT: v_lshlrev_b32_e32 v0, 16, v0
-; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1250-NEXT: v_add_f32_e32 v0, 0x42280000, v0
-; GFX1250-NEXT: v_cvt_pk_bf16_f32 v0, v0, s0
-; GFX1250-NEXT: s_set_pc_i64 s[30:31]
+; GFX1250TRUE16-LABEL: v_fadd_bf16_fpimm_1:
+; GFX1250TRUE16: ; %bb.0:
+; GFX1250TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250TRUE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250TRUE16-NEXT: v_mov_b16_e32 v1.l, 0
+; GFX1250TRUE16-NEXT: v_mov_b16_e32 v1.h, v0.l
+; GFX1250TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1250TRUE16-NEXT: v_add_f32_e32 v0, 0x42280000, v1
+; GFX1250TRUE16-NEXT: v_cvt_pk_bf16_f32 v0, v0, s0
+; GFX1250TRUE16-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX1250FAKE16-LABEL: v_fadd_bf16_fpimm_1:
+; GFX1250FAKE16: ; %bb.0:
+; GFX1250FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250FAKE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250FAKE16-NEXT: v_lshlrev_b32_e32 v0, 16, v0
+; GFX1250FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1250FAKE16-NEXT: v_add_f32_e32 v0, 0x42280000, v0
+; GFX1250FAKE16-NEXT: v_cvt_pk_bf16_f32 v0, v0, s0
+; GFX1250FAKE16-NEXT: s_set_pc_i64 s[30:31]
%add = fadd bfloat %arg0, 42.0
ret bfloat %add
}
@@ -15507,15 +15590,29 @@ define bfloat @v_fsub_bf16(bfloat %a, bfloat %b) {
; GFX11FAKE16-NEXT: v_lshrrev_b32_e32 v0, 16, v0
; GFX11FAKE16-NEXT: s_setpc_b64 s[30:31]
;
-; GFX1250-LABEL: v_fsub_bf16:
-; GFX1250: ; %bb.0:
-; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-NEXT: s_wait_kmcnt 0x0
-; GFX1250-NEXT: v_dual_lshlrev_b32 v1, 16, v1 :: v_dual_lshlrev_b32 v0, 16, v0
-; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1250-NEXT: v_sub_f32_e32 v0, v0, v1
-; GFX1250-NEXT: v_cvt_pk_bf16_f32 v0, v0, s0
-; GFX1250-NEXT: s_set_pc_i64 s[30:31]
+; GFX1250TRUE16-LABEL: v_fsub_bf16:
+; GFX1250TRUE16: ; %bb.0:
+; GFX1250TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250TRUE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250TRUE16-NEXT: v_mov_b16_e32 v2.l, 0
+; GFX1250TRUE16-NEXT: v_mov_b16_e32 v2.h, v1.l
+; GFX1250TRUE16-NEXT: v_mov_b16_e32 v1.h, v0.l
+; GFX1250TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1250TRUE16-NEXT: v_mov_b16_e32 v1.l, v2.l
+; GFX1250TRUE16-NEXT: v_sub_f32_e32 v0, v1, v2
+; GFX1250TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1250TRUE16-NEXT: v_cvt_pk_bf16_f32 v0, v0, s0
+; GFX1250TRUE16-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX1250FAKE16-LABEL: v_fsub_bf16:
+; GFX1250FAKE16: ; %bb.0:
+; GFX1250FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250FAKE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250FAKE16-NEXT: v_dual_lshlrev_b32 v1, 16, v1 :: v_dual_lshlrev_b32 v0, 16, v0
+; GFX1250FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1250FAKE16-NEXT: v_sub_f32_e32 v0, v0, v1
+; GFX1250FAKE16-NEXT: v_cvt_pk_bf16_f32 v0, v0, s0
+; GFX1250FAKE16-NEXT: s_set_pc_i64 s[30:31]
%op = fsub bfloat %a, %b
ret bfloat %op
}
@@ -15931,21 +16028,37 @@ define <3 x bfloat> @v_fsub_v3bf16(<3 x bfloat> %a, <3 x bfloat> %b) {
; GFX11FAKE16-NEXT: v_alignbit_b32 v1, s0, v1, 16
; GFX11FAKE16-NEXT: s_setpc_b64 s[30:31]
;
-; GFX1250-LABEL: v_fsub_v3bf16:
-; GFX1250: ; %bb.0:
-; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-NEXT: s_wait_kmcnt 0x0
-; GFX1250-NEXT: v_lshlrev_b32_e32 v3, 16, v3
-; GFX1250-NEXT: v_and_b32_e32 v4, 0xffff0000, v2
-; GFX1250-NEXT: v_and_b32_e32 v5, 0xffff0000, v0
-; GFX1250-NEXT: v_dual_lshlrev_b32 v2, 16, v2 :: v_dual_lshlrev_b32 v0, 16, v0
-; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1250-NEXT: v_dual_sub_f32 v4, v5, v4 :: v_dual_lshlrev_b32 v1, 16, v1
-; GFX1250-NEXT: v_dual_sub_f32 v0, v0, v2 :: v_dual_sub_f32 v1, v1, v3
-; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX1250-NEXT: v_cvt_pk_bf16_f32 v0, v0, v4
-; GFX1250-NEXT: v_cvt_pk_bf16_f32 v1, v1, s0
-; GFX1250-NEXT: s_set_pc_i64 s[30:31]
+; GFX1250TRUE16-LABEL: v_fsub_v3bf16:
+; GFX1250TRUE16: ; %bb.0:
+; GFX1250TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250TRUE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250TRUE16-NEXT: v_dual_lshlrev_b32 v3, 16, v3 :: v_dual_lshlrev_b32 v1, 16, v1
+; GFX1250TRUE16-NEXT: v_and_b32_e32 v4, 0xffff0000, v2
+; GFX1250TRUE16-NEXT: v_and_b32_e32 v5, 0xffff0000, v0
+; GFX1250TRUE16-NEXT: v_dual_lshlrev_b32 v2, 16, v2 :: v_dual_lshlrev_b32 v0, 16, v0
+; GFX1250TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX1250TRUE16-NEXT: v_sub_f32_e32 v1, v1, v3
+; GFX1250TRUE16-NEXT: v_dual_sub_f32 v3, v5, v4 :: v_dual_sub_f32 v0, v0, v2
+; GFX1250TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX1250TRUE16-NEXT: v_cvt_pk_bf16_f32 v1, v1, s0
+; GFX1250TRUE16-NEXT: v_cvt_pk_bf16_f32 v0, v0, v3
+; GFX1250TRUE16-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX1250FAKE16-LABEL: v_fsub_v3bf16:
+; GFX1250FAKE16: ; %bb.0:
+; GFX1250FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250FAKE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250FAKE16-NEXT: v_lshlrev_b32_e32 v3, 16, v3
+; GFX1250FAKE16-NEXT: v_and_b32_e32 v4, 0xffff0000, v2
+; GFX1250FAKE16-NEXT: v_and_b32_e32 v5, 0xffff0000, v0
+; GFX1250FAKE16-NEXT: v_dual_lshlrev_b32 v2, 16, v2 :: v_dual_lshlrev_b32 v0, 16, v0
+; GFX1250FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1250FAKE16-NEXT: v_dual_sub_f32 v4, v5, v4 :: v_dual_lshlrev_b32 v1, 16, v1
+; GFX1250FAKE16-NEXT: v_dual_sub_f32 v0, v0, v2 :: v_dual_sub_f32 v1, v1, v3
+; GFX1250FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX1250FAKE16-NEXT: v_cvt_pk_bf16_f32 v0, v0, v4
+; GFX1250FAKE16-NEXT: v_cvt_pk_bf16_f32 v1, v1, s0
+; GFX1250FAKE16-NEXT: s_set_pc_i64 s[30:31]
%op = fsub <3 x bfloat> %a, %b
ret <3 x bfloat> %op
}
@@ -16371,12 +16484,26 @@ define bfloat @v_fmul_bf16(bfloat %a, bfloat %b) {
; GFX11FAKE16-NEXT: v_lshrrev_b32_e32 v0, 16, v0
; GFX11FAKE16-NEXT: s_setpc_b64 s[30:31]
;
-; GFX1250-LABEL: v_fmul_bf16:
-; GFX1250: ; %bb.0:
-; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-NEXT: s_wait_kmcnt 0x0
-; GFX1250-NEXT: v_fma_mixlo_bf16 v0, v0, v1, 0 op_sel_hi:[1,1,0]
-; GFX1250-NEXT: s_set_pc_i64 s[30:31]
+; GFX1250TRUE16-LABEL: v_fmul_bf16:
+; GFX1250TRUE16: ; %bb.0:
+; GFX1250TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250TRUE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250TRUE16-NEXT: v_mov_b16_e32 v2.l, 0
+; GFX1250TRUE16-NEXT: v_mov_b16_e32 v2.h, v1.l
+; GFX1250TRUE16-NEXT: v_mov_b16_e32 v1.h, v0.l
+; GFX1250TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1250TRUE16-NEXT: v_mov_b16_e32 v1.l, v2.l
+; GFX1250TRUE16-NEXT: v_mul_f32_e32 v0, v1, v2
+; GFX1250TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1250TRUE16-NEXT: v_cvt_pk_bf16_f32 v0, v0, s0
+; GFX1250TRUE16-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX1250FAKE16-LABEL: v_fmul_bf16:
+; GFX1250FAKE16: ; %bb.0:
+; GFX1250FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250FAKE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250FAKE16-NEXT: v_fma_mixlo_bf16 v0, v0, v1, 0 op_sel_hi:[1,1,0]
+; GFX1250FAKE16-NEXT: s_set_pc_i64 s[30:31]
%op = fmul bfloat %a, %b
ret bfloat %op
}
@@ -21012,31 +21139,60 @@ define bfloat @v_fdiv_bf16(bfloat %a, bfloat %b) {
; GFX11FAKE16-NEXT: v_lshrrev_b32_e32 v0, 16, v0
; GFX11FAKE16-NEXT: s_setpc_b64 s[30:31]
;
-; GFX1250-LABEL: v_fdiv_bf16:
-; GFX1250: ; %bb.0:
-; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-NEXT: s_wait_kmcnt 0x0
-; GFX1250-NEXT: v_dual_lshlrev_b32 v0, 16, v0 :: v_dual_lshlrev_b32 v1, 16, v1
-; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1250-NEXT: v_div_scale_f32 v2, null, v1, v1, v0
-; GFX1250-NEXT: v_rcp_f32_e32 v3, v2
-; GFX1250-NEXT: v_nop
-; GFX1250-NEXT: s_delay_alu instid0(TRANS32_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1250-NEXT: v_fma_f32 v4, -v2, v3, 1.0
-; GFX1250-NEXT: v_fmac_f32_e32 v3, v4, v3
-; GFX1250-NEXT: v_div_scale_f32 v4, vcc_lo, v0, v1, v0
-; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1250-NEXT: v_mul_f32_e32 v5, v4, v3
-; GFX1250-NEXT: v_fma_f32 v6, -v2, v5, v4
-; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1250-NEXT: v_fmac_f32_e32 v5, v6, v3
-; GFX1250-NEXT: v_fma_f32 v2, -v2, v5, v4
-; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1250-NEXT: v_div_fmas_f32 v2, v2, v3, v5
-; GFX1250-NEXT: v_div_fixup_f32 v0, v2, v1, v0
-; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX1250-NEXT: v_cvt_pk_bf16_f32 v0, v0, s0
-; GFX1250-NEXT: s_set_pc_i64 s[30:31]
+; GFX1250TRUE16-LABEL: v_fdiv_bf16:
+; GFX1250TRUE16: ; %bb.0:
+; GFX1250TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250TRUE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250TRUE16-NEXT: v_mov_b16_e32 v2.l, 0
+; GFX1250TRUE16-NEXT: v_mov_b16_e32 v2.h, v0.l
+; GFX1250TRUE16-NEXT: v_mov_b16_e32 v0.h, v1.l
+; GFX1250TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1250TRUE16-NEXT: v_mov_b16_e32 v0.l, v2.l
+; GFX1250TRUE16-NEXT: v_div_scale_f32 v1, null, v0, v0, v2
+; GFX1250TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(TRANS32_DEP_1)
+; GFX1250TRUE16-NEXT: v_rcp_f32_e32 v3, v1
+; GFX1250TRUE16-NEXT: v_nop
+; GFX1250TRUE16-NEXT: v_fma_f32 v4, -v1, v3, 1.0
+; GFX1250TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
+; GFX1250TRUE16-NEXT: v_fmac_f32_e32 v3, v4, v3
+; GFX1250TRUE16-NEXT: v_div_scale_f32 v4, vcc_lo, v2, v0, v2
+; GFX1250TRUE16-NEXT: v_mul_f32_e32 v5, v4, v3
+; GFX1250TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1250TRUE16-NEXT: v_fma_f32 v6, -v1, v5, v4
+; GFX1250TRUE16-NEXT: v_fmac_f32_e32 v5, v6, v3
+; GFX1250TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1250TRUE16-NEXT: v_fma_f32 v1, -v1, v5, v4
+; GFX1250TRUE16-NEXT: v_div_fmas_f32 v1, v1, v3, v5
+; GFX1250TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1250TRUE16-NEXT: v_div_fixup_f32 v0, v1, v0, v2
+; GFX1250TRUE16-NEXT: v_cvt_pk_bf16_f32 v0, v0, s0
+; GFX1250TRUE16-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX1250FAKE16-LABEL: v_fdiv_bf16:
+; GFX1250FAKE16: ; %bb.0:
+; GFX1250FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250FAKE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250FAKE16-NEXT: v_dual_lshlrev_b32 v0, 16, v0 :: v_dual_lshlrev_b32 v1, 16, v1
+; GFX1250FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1250FAKE16-NEXT: v_div_scale_f32 v2, null, v1, v1, v0
+; GFX1250FAKE16-NEXT: v_rcp_f32_e32 v3, v2
+; GFX1250FAKE16-NEXT: v_nop
+; GFX1250FAKE16-NEXT: s_delay_alu instid0(TRANS32_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1250FAKE16-NEXT: v_fma_f32 v4, -v2, v3, 1.0
+; GFX1250FAKE16-NEXT: v_fmac_f32_e32 v3, v4, v3
+; GFX1250FAKE16-NEXT: v_div_scale_f32 v4, vcc_lo, v0, v1, v0
+; GFX1250FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1250FAKE16-NEXT: v_mul_f32_e32 v5, v4, v3
+; GFX1250FAKE16-NEXT: v_fma_f32 v6, -v2, v5, v4
+; GFX1250FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1250FAKE16-NEXT: v_fmac_f32_e32 v5, v6, v3
+; GFX1250FAKE16-NEXT: v_fma_f32 v2, -v2, v5, v4
+; GFX1250FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1250FAKE16-NEXT: v_div_fmas_f32 v2, v2, v3, v5
+; GFX1250FAKE16-NEXT: v_div_fixup_f32 v0, v2, v1, v0
+; GFX1250FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1250FAKE16-NEXT: v_cvt_pk_bf16_f32 v0, v0, s0
+; GFX1250FAKE16-NEXT: s_set_pc_i64 s[30:31]
%op = fdiv bfloat %a, %b
ret bfloat %op
}
@@ -21092,12 +21248,19 @@ define bfloat @v_fabs_bf16(bfloat %a) {
; GFX11FAKE16-NEXT: v_and_b32_e32 v0, 0x7fff, v0
; GFX11FAKE16-NEXT: s_setpc_b64 s[30:31]
;
-; GFX1250-LABEL: v_fabs_bf16:
-; GFX1250: ; %bb.0:
-; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-NEXT: s_wait_kmcnt 0x0
-; GFX1250-NEXT: v_and_b32_e32 v0, 0x7fff, v0
-; GFX1250-NEXT: s_set_pc_i64 s[30:31]
+; GFX1250TRUE16-LABEL: v_fabs_bf16:
+; GFX1250TRUE16: ; %bb.0:
+; GFX1250TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250TRUE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250TRUE16-NEXT: v_and_b16 v0.l, 0x7fff, v0.l
+; GFX1250TRUE16-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX1250FAKE16-LABEL: v_fabs_bf16:
+; GFX1250FAKE16: ; %bb.0:
+; GFX1250FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250FAKE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250FAKE16-NEXT: v_and_b32_e32 v0, 0x7fff, v0
+; GFX1250FAKE16-NEXT: s_set_pc_i64 s[30:31]
%op = call bfloat @llvm.fabs.bf16(bfloat %a)
ret bfloat %op
}
@@ -21198,12 +21361,19 @@ define bfloat @v_fneg_bf16(bfloat %a) {
; GFX11FAKE16-NEXT: v_xor_b32_e32 v0, 0x8000, v0
; GFX11FAKE16-NEXT: s_setpc_b64 s[30:31]
;
-; GFX1250-LABEL: v_fneg_bf16:
-; GFX1250: ; %bb.0:
-; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-NEXT: s_wait_kmcnt 0x0
-; GFX1250-NEXT: v_xor_b32_e32 v0, 0x8000, v0
-; GFX1250-NEXT: s_set_pc_i64 s[30:31]
+; GFX1250TRUE16-LABEL: v_fneg_bf16:
+; GFX1250TRUE16: ; %bb.0:
+; GFX1250TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250TRUE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250TRUE16-NEXT: v_xor_b16 v0.l, 0x8000, v0.l
+; GFX1250TRUE16-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX1250FAKE16-LABEL: v_fneg_bf16:
+; GFX1250FAKE16: ; %bb.0:
+; GFX1250FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250FAKE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250FAKE16-NEXT: v_xor_b32_e32 v0, 0x8000, v0
+; GFX1250FAKE16-NEXT: s_set_pc_i64 s[30:31]
%op = fneg bfloat %a
ret bfloat %op
}
@@ -21317,12 +21487,19 @@ define bfloat @v_fneg_fabs_bf16(bfloat %a) {
; GFX11FAKE16-NEXT: v_or_b32_e32 v0, 0x8000, v0
; GFX11FAKE16-NEXT: s_setpc_b64 s[30:31]
;
-; GFX1250-LABEL: v_fneg_fabs_bf16:
-; GFX1250: ; %bb.0:
-; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-NEXT: s_wait_kmcnt 0x0
-; GFX1250-NEXT: v_or_b32_e32 v0, 0x8000, v0
-; GFX1250-NEXT: s_set_pc_i64 s[30:31]
+; GFX1250TRUE16-LABEL: v_fneg_fabs_bf16:
+; GFX1250TRUE16: ; %bb.0:
+; GFX1250TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250TRUE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250TRUE16-NEXT: v_or_b16 v0.l, 0x8000, v0.l
+; GFX1250TRUE16-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX1250FAKE16-LABEL: v_fneg_fabs_bf16:
+; GFX1250FAKE16: ; %bb.0:
+; GFX1250FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250FAKE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250FAKE16-NEXT: v_or_b32_e32 v0, 0x8000, v0
+; GFX1250FAKE16-NEXT: s_set_pc_i64 s[30:31]
%fabs = call bfloat @llvm.fabs.bf16(bfloat %a)
%op = fneg bfloat %fabs
ret bfloat %op
@@ -21511,15 +21688,29 @@ define bfloat @v_minnum_bf16(bfloat %a, bfloat %b) {
; GFX11FAKE16-NEXT: v_lshrrev_b32_e32 v0, 16, v0
; GFX11FAKE16-NEXT: s_setpc_b64 s[30:31]
;
-; GFX1250-LABEL: v_minnum_bf16:
-; GFX1250: ; %bb.0:
-; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-NEXT: s_wait_kmcnt 0x0
-; GFX1250-NEXT: v_dual_lshlrev_b32 v1, 16, v1 :: v_dual_lshlrev_b32 v0, 16, v0
-; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1250-NEXT: v_min_num_f32_e32 v0, v0, v1
-; GFX1250-NEXT: v_cvt_pk_bf16_f32 v0, v0, s0
-; GFX1250-NEXT: s_set_pc_i64 s[30:31]
+; GFX1250TRUE16-LABEL: v_minnum_bf16:
+; GFX1250TRUE16: ; %bb.0:
+; GFX1250TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250TRUE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250TRUE16-NEXT: v_mov_b16_e32 v2.l, 0
+; GFX1250TRUE16-NEXT: v_mov_b16_e32 v2.h, v1.l
+; GFX1250TRUE16-NEXT: v_mov_b16_e32 v1.h, v0.l
+; GFX1250TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1250TRUE16-NEXT: v_mov_b16_e32 v1.l, v2.l
+; GFX1250TRUE16-NEXT: v_min_num_f32_e32 v0, v1, v2
+; GFX1250TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1250TRUE16-NEXT: v_cvt_pk_bf16_f32 v0, v0, s0
+; GFX1250TRUE16-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX1250FAKE16-LABEL: v_minnum_bf16:
+; GFX1250FAKE16: ; %bb.0:
+; GFX1250FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250FAKE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250FAKE16-NEXT: v_dual_lshlrev_b32 v1, 16, v1 :: v_dual_lshlrev_b32 v0, 16, v0
+; GFX1250FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1250FAKE16-NEXT: v_min_num_f32_e32 v0, v0, v1
+; GFX1250FAKE16-NEXT: v_cvt_pk_bf16_f32 v0, v0, s0
+; GFX1250FAKE16-NEXT: s_set_pc_i64 s[30:31]
%op = call bfloat @llvm.minnum.bf16(bfloat %a, bfloat %b)
ret bfloat %op
}
@@ -26073,15 +26264,29 @@ define bfloat @v_maxnum_bf16(bfloat %a, bfloat %b) {
; GFX11FAKE16-NEXT: v_lshrrev_b32_e32 v0, 16, v0
; GFX11FAKE16-NEXT: s_setpc_b64 s[30:31]
;
-; GFX1250-LABEL: v_maxnum_bf16:
-; GFX1250: ; %bb.0:
-; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-NEXT: s_wait_kmcnt 0x0
-; GFX1250-NEXT: v_dual_lshlrev_b32 v1, 16, v1 :: v_dual_lshlrev_b32 v0, 16, v0
-; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1250-NEXT: v_max_num_f32_e32 v0, v0, v1
-; GFX1250-NEXT: v_cvt_pk_bf16_f32 v0, v0, s0
-; GFX1250-NEXT: s_set_pc_i64 s[30:31]
+; GFX1250TRUE16-LABEL: v_maxnum_bf16:
+; GFX1250TRUE16: ; %bb.0:
+; GFX1250TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250TRUE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250TRUE16-NEXT: v_mov_b16_e32 v2.l, 0
+; GFX1250TRUE16-NEXT: v_mov_b16_e32 v2.h, v1.l
+; GFX1250TRUE16-NEXT: v_mov_b16_e32 v1.h, v0.l
+; GFX1250TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1250TRUE16-NEXT: v_mov_b16_e32 v1.l, v2.l
+; GFX1250TRUE16-NEXT: v_max_num_f32_e32 v0, v1, v2
+; GFX1250TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1250TRUE16-NEXT: v_cvt_pk_bf16_f32 v0, v0, s0
+; GFX1250TRUE16-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX1250FAKE16-LABEL: v_maxnum_bf16:
+; GFX1250FAKE16: ; %bb.0:
+; GFX1250FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250FAKE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250FAKE16-NEXT: v_dual_lshlrev_b32 v1, 16, v1 :: v_dual_lshlrev_b32 v0, 16, v0
+; GFX1250FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1250FAKE16-NEXT: v_max_num_f32_e32 v0, v0, v1
+; GFX1250FAKE16-NEXT: v_cvt_pk_bf16_f32 v0, v0, s0
+; GFX1250FAKE16-NEXT: s_set_pc_i64 s[30:31]
%op = call bfloat @llvm.maxnum.bf16(bfloat %a, bfloat %b)
ret bfloat %op
}
@@ -30764,12 +30969,19 @@ define bfloat @v_sqrt_bf16(bfloat %a) {
; GFX11FAKE16-NEXT: v_lshrrev_b32_e32 v0, 16, v0
; GFX11FAKE16-NEXT: s_setpc_b64 s[30:31]
;
-; GFX1250-LABEL: v_sqrt_bf16:
-; GFX1250: ; %bb.0:
-; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-NEXT: s_wait_kmcnt 0x0
-; GFX1250-NEXT: v_sqrt_bf16_e32 v0, v0
-; GFX1250-NEXT: s_set_pc_i64 s[30:31]
+; GFX1250TRUE16-LABEL: v_sqrt_bf16:
+; GFX1250TRUE16: ; %bb.0:
+; GFX1250TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250TRUE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250TRUE16-NEXT: v_sqrt_bf16_e32 v0.l, v0.l
+; GFX1250TRUE16-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX1250FAKE16-LABEL: v_sqrt_bf16:
+; GFX1250FAKE16: ; %bb.0:
+; GFX1250FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250FAKE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250FAKE16-NEXT: v_sqrt_bf16_e32 v0, v0
+; GFX1250FAKE16-NEXT: s_set_pc_i64 s[30:31]
%op = call bfloat @llvm.sqrt.bf16(bfloat %a)
ret bfloat %op
}
@@ -30877,15 +31089,26 @@ define bfloat @v_ldexp_bf16_i32(bfloat %a, i32 %b) {
; GFX11FAKE16-NEXT: v_lshrrev_b32_e32 v0, 16, v0
; GFX11FAKE16-NEXT: s_setpc_b64 s[30:31]
;
-; GFX1250-LABEL: v_ldexp_bf16_i32:
-; GFX1250: ; %bb.0:
-; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-NEXT: s_wait_kmcnt 0x0
-; GFX1250-NEXT: v_lshlrev_b32_e32 v0, 16, v0
-; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1250-NEXT: v_ldexp_f32 v0, v0, v1
-; GFX1250-NEXT: v_cvt_pk_bf16_f32 v0, v0, s0
-; GFX1250-NEXT: s_set_pc_i64 s[30:31]
+; GFX1250TRUE16-LABEL: v_ldexp_bf16_i32:
+; GFX1250TRUE16: ; %bb.0:
+; GFX1250TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250TRUE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250TRUE16-NEXT: v_mov_b16_e32 v2.l, 0
+; GFX1250TRUE16-NEXT: v_mov_b16_e32 v2.h, v0.l
+; GFX1250TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1250TRUE16-NEXT: v_ldexp_f32 v0, v2, v1
+; GFX1250TRUE16-NEXT: v_cvt_pk_bf16_f32 v0, v0, s0
+; GFX1250TRUE16-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX1250FAKE16-LABEL: v_ldexp_bf16_i32:
+; GFX1250FAKE16: ; %bb.0:
+; GFX1250FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250FAKE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250FAKE16-NEXT: v_lshlrev_b32_e32 v0, 16, v0
+; GFX1250FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1250FAKE16-NEXT: v_ldexp_f32 v0, v0, v1
+; GFX1250FAKE16-NEXT: v_cvt_pk_bf16_f32 v0, v0, s0
+; GFX1250FAKE16-NEXT: s_set_pc_i64 s[30:31]
%op = call bfloat @llvm.ldexp.bf16.i32(bfloat %a, i32 %b)
ret bfloat %op
}
@@ -31005,16 +31228,28 @@ define { bfloat, i16 } @v_frexp_bf16_i16(bfloat %a) {
; GFX11FAKE16-NEXT: v_lshrrev_b32_e32 v0, 16, v0
; GFX11FAKE16-NEXT: s_setpc_b64 s[30:31]
;
-; GFX1250-LABEL: v_frexp_bf16_i16:
-; GFX1250: ; %bb.0:
-; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-NEXT: s_wait_kmcnt 0x0
-; GFX1250-NEXT: v_lshlrev_b32_e32 v1, 16, v0
-; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
-; GFX1250-NEXT: v_frexp_mant_f32_e32 v0, v1
-; GFX1250-NEXT: v_frexp_exp_i32_f32_e32 v1, v1
-; GFX1250-NEXT: v_cvt_pk_bf16_f32 v0, v0, s0
-; GFX1250-NEXT: s_set_pc_i64 s[30:31]
+; GFX1250TRUE16-LABEL: v_frexp_bf16_i16:
+; GFX1250TRUE16: ; %bb.0:
+; GFX1250TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250TRUE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250TRUE16-NEXT: v_mov_b16_e32 v1.l, 0
+; GFX1250TRUE16-NEXT: v_mov_b16_e32 v1.h, v0.l
+; GFX1250TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX1250TRUE16-NEXT: v_frexp_mant_f32_e32 v0, v1
+; GFX1250TRUE16-NEXT: v_frexp_exp_i32_f32_e32 v1, v1
+; GFX1250TRUE16-NEXT: v_cvt_pk_bf16_f32 v0, v0, s0
+; GFX1250TRUE16-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX1250FAKE16-LABEL: v_frexp_bf16_i16:
+; GFX1250FAKE16: ; %bb.0:
+; GFX1250FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250FAKE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250FAKE16-NEXT: v_lshlrev_b32_e32 v1, 16, v0
+; GFX1250FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX1250FAKE16-NEXT: v_frexp_mant_f32_e32 v0, v1
+; GFX1250FAKE16-NEXT: v_frexp_exp_i32_f32_e32 v1, v1
+; GFX1250FAKE16-NEXT: v_cvt_pk_bf16_f32 v0, v0, s0
+; GFX1250FAKE16-NEXT: s_set_pc_i64 s[30:31]
%op = call { bfloat, i16 } @llvm.frexp.bf16.i16(bfloat %a)
ret { bfloat, i16 } %op
}
@@ -31254,31 +31489,58 @@ define bfloat @v_log_bf16(bfloat %a) {
; GFX11FAKE16-NEXT: v_lshrrev_b32_e32 v0, 16, v0
; GFX11FAKE16-NEXT: s_setpc_b64 s[30:31]
;
-; GFX1250-LABEL: v_log_bf16:
-; GFX1250: ; %bb.0:
-; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-NEXT: s_wait_kmcnt 0x0
-; GFX1250-NEXT: v_lshlrev_b32_e32 v0, 16, v0
-; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
-; GFX1250-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0
-; GFX1250-NEXT: v_cndmask_b32_e64 v1, 0, 32, vcc_lo
-; GFX1250-NEXT: v_ldexp_f32 v0, v0, v1
-; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(TRANS32_DEP_1)
-; GFX1250-NEXT: v_log_f32_e32 v0, v0
-; GFX1250-NEXT: v_nop
-; GFX1250-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0
-; GFX1250-NEXT: v_cmp_gt_f32_e64 s0, 0x7f800000, |v0|
-; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1250-NEXT: v_fma_f32 v2, 0x3f317217, v0, -v1
-; GFX1250-NEXT: v_fmamk_f32 v2, v0, 0x3377d1cf, v2
-; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1250-NEXT: v_add_f32_e32 v1, v1, v2
-; GFX1250-NEXT: v_cndmask_b32_e64 v0, v0, v1, s0
-; GFX1250-NEXT: v_cndmask_b32_e64 v1, 0, 0x41b17218, vcc_lo
-; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1250-NEXT: v_sub_f32_e32 v0, v0, v1
-; GFX1250-NEXT: v_cvt_pk_bf16_f32 v0, v0, s0
-; GFX1250-NEXT: s_set_pc_i64 s[30:31]
+; GFX1250TRUE16-LABEL: v_log_bf16:
+; GFX1250TRUE16: ; %bb.0:
+; GFX1250TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250TRUE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250TRUE16-NEXT: v_mov_b16_e32 v1.l, 0
+; GFX1250TRUE16-NEXT: v_mov_b16_e32 v1.h, v0.l
+; GFX1250TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
+; GFX1250TRUE16-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v1
+; GFX1250TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 32, vcc_lo
+; GFX1250TRUE16-NEXT: v_ldexp_f32 v0, v1, v0
+; GFX1250TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(TRANS32_DEP_1)
+; GFX1250TRUE16-NEXT: v_log_f32_e32 v0, v0
+; GFX1250TRUE16-NEXT: v_nop
+; GFX1250TRUE16-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0
+; GFX1250TRUE16-NEXT: v_cmp_gt_f32_e64 s0, 0x7f800000, |v0|
+; GFX1250TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1250TRUE16-NEXT: v_fma_f32 v2, 0x3f317217, v0, -v1
+; GFX1250TRUE16-NEXT: v_fmamk_f32 v2, v0, 0x3377d1cf, v2
+; GFX1250TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1250TRUE16-NEXT: v_add_f32_e32 v1, v1, v2
+; GFX1250TRUE16-NEXT: v_cndmask_b32_e64 v0, v0, v1, s0
+; GFX1250TRUE16-NEXT: v_cndmask_b32_e64 v1, 0, 0x41b17218, vcc_lo
+; GFX1250TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1250TRUE16-NEXT: v_sub_f32_e32 v0, v0, v1
+; GFX1250TRUE16-NEXT: v_cvt_pk_bf16_f32 v0, v0, s0
+; GFX1250TRUE16-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX1250FAKE16-LABEL: v_log_bf16:
+; GFX1250FAKE16: ; %bb.0:
+; GFX1250FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250FAKE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250FAKE16-NEXT: v_lshlrev_b32_e32 v0, 16, v0
+; GFX1250FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
+; GFX1250FAKE16-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0
+; GFX1250FAKE16-NEXT: v_cndmask_b32_e64 v1, 0, 32, vcc_lo
+; GFX1250FAKE16-NEXT: v_ldexp_f32 v0, v0, v1
+; GFX1250FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(TRANS32_DEP_1)
+; GFX1250FAKE16-NEXT: v_log_f32_e32 v0, v0
+; GFX1250FAKE16-NEXT: v_nop
+; GFX1250FAKE16-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0
+; GFX1250FAKE16-NEXT: v_cmp_gt_f32_e64 s0, 0x7f800000, |v0|
+; GFX1250FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1250FAKE16-NEXT: v_fma_f32 v2, 0x3f317217, v0, -v1
+; GFX1250FAKE16-NEXT: v_fmamk_f32 v2, v0, 0x3377d1cf, v2
+; GFX1250FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1250FAKE16-NEXT: v_add_f32_e32 v1, v1, v2
+; GFX1250FAKE16-NEXT: v_cndmask_b32_e64 v0, v0, v1, s0
+; GFX1250FAKE16-NEXT: v_cndmask_b32_e64 v1, 0, 0x41b17218, vcc_lo
+; GFX1250FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1250FAKE16-NEXT: v_sub_f32_e32 v0, v0, v1
+; GFX1250FAKE16-NEXT: v_cvt_pk_bf16_f32 v0, v0, s0
+; GFX1250FAKE16-NEXT: s_set_pc_i64 s[30:31]
%op = call bfloat @llvm.log.bf16(bfloat %a)
ret bfloat %op
}
@@ -31439,12 +31701,19 @@ define bfloat @v_log2_bf16(bfloat %a) {
; GFX11FAKE16-NEXT: v_lshrrev_b32_e32 v0, 16, v0
; GFX11FAKE16-NEXT: s_setpc_b64 s[30:31]
;
-; GFX1250-LABEL: v_log2_bf16:
-; GFX1250: ; %bb.0:
-; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-NEXT: s_wait_kmcnt 0x0
-; GFX1250-NEXT: v_log_bf16_e32 v0, v0
-; GFX1250-NEXT: s_set_pc_i64 s[30:31]
+; GFX1250TRUE16-LABEL: v_log2_bf16:
+; GFX1250TRUE16: ; %bb.0:
+; GFX1250TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250TRUE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250TRUE16-NEXT: v_log_bf16_e32 v0.l, v0.l
+; GFX1250TRUE16-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX1250FAKE16-LABEL: v_log2_bf16:
+; GFX1250FAKE16: ; %bb.0:
+; GFX1250FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250FAKE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250FAKE16-NEXT: v_log_bf16_e32 v0, v0
+; GFX1250FAKE16-NEXT: s_set_pc_i64 s[30:31]
%op = call bfloat @llvm.log2.bf16(bfloat %a)
ret bfloat %op
}
@@ -31679,31 +31948,58 @@ define bfloat @v_log10_bf16(bfloat %a) {
; GFX11FAKE16-NEXT: v_lshrrev_b32_e32 v0, 16, v0
; GFX11FAKE16-NEXT: s_setpc_b64 s[30:31]
;
-; GFX1250-LABEL: v_log10_bf16:
-; GFX1250: ; %bb.0:
-; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-NEXT: s_wait_kmcnt 0x0
-; GFX1250-NEXT: v_lshlrev_b32_e32 v0, 16, v0
-; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
-; GFX1250-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0
-; GFX1250-NEXT: v_cndmask_b32_e64 v1, 0, 32, vcc_lo
-; GFX1250-NEXT: v_ldexp_f32 v0, v0, v1
-; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(TRANS32_DEP_1)
-; GFX1250-NEXT: v_log_f32_e32 v0, v0
-; GFX1250-NEXT: v_nop
-; GFX1250-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0
-; GFX1250-NEXT: v_cmp_gt_f32_e64 s0, 0x7f800000, |v0|
-; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1250-NEXT: v_fma_f32 v2, 0x3e9a209a, v0, -v1
-; GFX1250-NEXT: v_fmamk_f32 v2, v0, 0x3284fbcf, v2
-; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1250-NEXT: v_add_f32_e32 v1, v1, v2
-; GFX1250-NEXT: v_cndmask_b32_e64 v0, v0, v1, s0
-; GFX1250-NEXT: v_cndmask_b32_e64 v1, 0, 0x411a209b, vcc_lo
-; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1250-NEXT: v_sub_f32_e32 v0, v0, v1
-; GFX1250-NEXT: v_cvt_pk_bf16_f32 v0, v0, s0
-; GFX1250-NEXT: s_set_pc_i64 s[30:31]
+; GFX1250TRUE16-LABEL: v_log10_bf16:
+; GFX1250TRUE16: ; %bb.0:
+; GFX1250TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250TRUE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250TRUE16-NEXT: v_mov_b16_e32 v1.l, 0
+; GFX1250TRUE16-NEXT: v_mov_b16_e32 v1.h, v0.l
+; GFX1250TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
+; GFX1250TRUE16-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v1
+; GFX1250TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 32, vcc_lo
+; GFX1250TRUE16-NEXT: v_ldexp_f32 v0, v1, v0
+; GFX1250TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(TRANS32_DEP_1)
+; GFX1250TRUE16-NEXT: v_log_f32_e32 v0, v0
+; GFX1250TRUE16-NEXT: v_nop
+; GFX1250TRUE16-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0
+; GFX1250TRUE16-NEXT: v_cmp_gt_f32_e64 s0, 0x7f800000, |v0|
+; GFX1250TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1250TRUE16-NEXT: v_fma_f32 v2, 0x3e9a209a, v0, -v1
+; GFX1250TRUE16-NEXT: v_fmamk_f32 v2, v0, 0x3284fbcf, v2
+; GFX1250TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1250TRUE16-NEXT: v_add_f32_e32 v1, v1, v2
+; GFX1250TRUE16-NEXT: v_cndmask_b32_e64 v0, v0, v1, s0
+; GFX1250TRUE16-NEXT: v_cndmask_b32_e64 v1, 0, 0x411a209b, vcc_lo
+; GFX1250TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1250TRUE16-NEXT: v_sub_f32_e32 v0, v0, v1
+; GFX1250TRUE16-NEXT: v_cvt_pk_bf16_f32 v0, v0, s0
+; GFX1250TRUE16-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX1250FAKE16-LABEL: v_log10_bf16:
+; GFX1250FAKE16: ; %bb.0:
+; GFX1250FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250FAKE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250FAKE16-NEXT: v_lshlrev_b32_e32 v0, 16, v0
+; GFX1250FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
+; GFX1250FAKE16-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0
+; GFX1250FAKE16-NEXT: v_cndmask_b32_e64 v1, 0, 32, vcc_lo
+; GFX1250FAKE16-NEXT: v_ldexp_f32 v0, v0, v1
+; GFX1250FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(TRANS32_DEP_1)
+; GFX1250FAKE16-NEXT: v_log_f32_e32 v0, v0
+; GFX1250FAKE16-NEXT: v_nop
+; GFX1250FAKE16-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0
+; GFX1250FAKE16-NEXT: v_cmp_gt_f32_e64 s0, 0x7f800000, |v0|
+; GFX1250FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1250FAKE16-NEXT: v_fma_f32 v2, 0x3e9a209a, v0, -v1
+; GFX1250FAKE16-NEXT: v_fmamk_f32 v2, v0, 0x3284fbcf, v2
+; GFX1250FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1250FAKE16-NEXT: v_add_f32_e32 v1, v1, v2
+; GFX1250FAKE16-NEXT: v_cndmask_b32_e64 v0, v0, v1, s0
+; GFX1250FAKE16-NEXT: v_cndmask_b32_e64 v1, 0, 0x411a209b, vcc_lo
+; GFX1250FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1250FAKE16-NEXT: v_sub_f32_e32 v0, v0, v1
+; GFX1250FAKE16-NEXT: v_cvt_pk_bf16_f32 v0, v0, s0
+; GFX1250FAKE16-NEXT: s_set_pc_i64 s[30:31]
%op = call bfloat @llvm.log10.bf16(bfloat %a)
ret bfloat %op
}
@@ -31946,34 +32242,65 @@ define bfloat @v_exp_bf16(bfloat %a) {
; GFX11FAKE16-NEXT: v_lshrrev_b32_e32 v0, 16, v0
; GFX11FAKE16-NEXT: s_setpc_b64 s[30:31]
;
-; GFX1250-LABEL: v_exp_bf16:
-; GFX1250: ; %bb.0:
-; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-NEXT: s_wait_kmcnt 0x0
-; GFX1250-NEXT: v_lshlrev_b32_e32 v1, 16, v0
-; GFX1250-NEXT: s_mov_b32 s0, 0x3fb8aa3b
-; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1250-NEXT: v_mul_f32_e32 v2, 0x3fb8aa3b, v1
-; GFX1250-NEXT: v_rndne_f32_e32 v3, v2
-; GFX1250-NEXT: v_fma_mix_f32_bf16 v4, v0, s0, -v2 op_sel_hi:[1,0,0]
-; GFX1250-NEXT: s_mov_b32 s0, 0x32a5705f
-; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX1250-NEXT: v_sub_f32_e32 v2, v2, v3
-; GFX1250-NEXT: v_fma_mix_f32_bf16 v0, v0, s0, v4 op_sel_hi:[1,0,0]
-; GFX1250-NEXT: v_cmp_ngt_f32_e32 vcc_lo, 0xc2ce8ed0, v1
-; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
-; GFX1250-NEXT: v_add_f32_e32 v0, v2, v0
-; GFX1250-NEXT: v_cvt_i32_f32_e32 v2, v3
-; GFX1250-NEXT: v_exp_f32_e32 v0, v0
-; GFX1250-NEXT: v_nop
-; GFX1250-NEXT: s_delay_alu instid0(TRANS32_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1250-NEXT: v_ldexp_f32 v0, v0, v2
-; GFX1250-NEXT: v_cndmask_b32_e32 v0, 0, v0, vcc_lo
-; GFX1250-NEXT: v_cmp_nlt_f32_e32 vcc_lo, 0x42b17218, v1
-; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1250-NEXT: v_cndmask_b32_e32 v0, 0x7f800000, v0, vcc_lo
-; GFX1250-NEXT: v_cvt_pk_bf16_f32 v0, v0, s0
-; GFX1250-NEXT: s_set_pc_i64 s[30:31]
+; GFX1250TRUE16-LABEL: v_exp_bf16:
+; GFX1250TRUE16: ; %bb.0:
+; GFX1250TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250TRUE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250TRUE16-NEXT: v_mov_b16_e32 v1.l, 0
+; GFX1250TRUE16-NEXT: v_mov_b16_e32 v1.h, v0.l
+; GFX1250TRUE16-NEXT: s_mov_b32 s0, 0x3fb8aa3b
+; GFX1250TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX1250TRUE16-NEXT: v_mul_f32_e32 v2, 0x3fb8aa3b, v1
+; GFX1250TRUE16-NEXT: v_cmp_ngt_f32_e32 vcc_lo, 0xc2ce8ed0, v1
+; GFX1250TRUE16-NEXT: v_fma_mix_f32_bf16 v3, v0, s0, -v2 op_sel_hi:[1,0,0]
+; GFX1250TRUE16-NEXT: v_rndne_f32_e32 v4, v2
+; GFX1250TRUE16-NEXT: s_mov_b32 s0, 0x32a5705f
+; GFX1250TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instid1(SALU_CYCLE_1)
+; GFX1250TRUE16-NEXT: v_fma_mix_f32_bf16 v0, v0, s0, v3 op_sel_hi:[1,0,0]
+; GFX1250TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1250TRUE16-NEXT: v_sub_f32_e32 v2, v2, v4
+; GFX1250TRUE16-NEXT: v_add_f32_e32 v0, v2, v0
+; GFX1250TRUE16-NEXT: v_cvt_i32_f32_e32 v2, v4
+; GFX1250TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(TRANS32_DEP_1)
+; GFX1250TRUE16-NEXT: v_exp_f32_e32 v0, v0
+; GFX1250TRUE16-NEXT: v_nop
+; GFX1250TRUE16-NEXT: v_ldexp_f32 v0, v0, v2
+; GFX1250TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX1250TRUE16-NEXT: v_cndmask_b32_e32 v0, 0, v0, vcc_lo
+; GFX1250TRUE16-NEXT: v_cmp_nlt_f32_e32 vcc_lo, 0x42b17218, v1
+; GFX1250TRUE16-NEXT: v_cndmask_b32_e32 v0, 0x7f800000, v0, vcc_lo
+; GFX1250TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1250TRUE16-NEXT: v_cvt_pk_bf16_f32 v0, v0, s0
+; GFX1250TRUE16-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX1250FAKE16-LABEL: v_exp_bf16:
+; GFX1250FAKE16: ; %bb.0:
+; GFX1250FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250FAKE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250FAKE16-NEXT: v_lshlrev_b32_e32 v1, 16, v0
+; GFX1250FAKE16-NEXT: s_mov_b32 s0, 0x3fb8aa3b
+; GFX1250FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1250FAKE16-NEXT: v_mul_f32_e32 v2, 0x3fb8aa3b, v1
+; GFX1250FAKE16-NEXT: v_rndne_f32_e32 v3, v2
+; GFX1250FAKE16-NEXT: v_fma_mix_f32_bf16 v4, v0, s0, -v2 op_sel_hi:[1,0,0]
+; GFX1250FAKE16-NEXT: s_mov_b32 s0, 0x32a5705f
+; GFX1250FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX1250FAKE16-NEXT: v_sub_f32_e32 v2, v2, v3
+; GFX1250FAKE16-NEXT: v_fma_mix_f32_bf16 v0, v0, s0, v4 op_sel_hi:[1,0,0]
+; GFX1250FAKE16-NEXT: v_cmp_ngt_f32_e32 vcc_lo, 0xc2ce8ed0, v1
+; GFX1250FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX1250FAKE16-NEXT: v_add_f32_e32 v0, v2, v0
+; GFX1250FAKE16-NEXT: v_cvt_i32_f32_e32 v2, v3
+; GFX1250FAKE16-NEXT: v_exp_f32_e32 v0, v0
+; GFX1250FAKE16-NEXT: v_nop
+; GFX1250FAKE16-NEXT: s_delay_alu instid0(TRANS32_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1250FAKE16-NEXT: v_ldexp_f32 v0, v0, v2
+; GFX1250FAKE16-NEXT: v_cndmask_b32_e32 v0, 0, v0, vcc_lo
+; GFX1250FAKE16-NEXT: v_cmp_nlt_f32_e32 vcc_lo, 0x42b17218, v1
+; GFX1250FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1250FAKE16-NEXT: v_cndmask_b32_e32 v0, 0x7f800000, v0, vcc_lo
+; GFX1250FAKE16-NEXT: v_cvt_pk_bf16_f32 v0, v0, s0
+; GFX1250FAKE16-NEXT: s_set_pc_i64 s[30:31]
%op = call bfloat @llvm.exp.bf16(bfloat %a)
ret bfloat %op
}
@@ -32138,12 +32465,19 @@ define bfloat @v_exp2_bf16(bfloat %a) {
; GFX11FAKE16-NEXT: v_lshrrev_b32_e32 v0, 16, v0
; GFX11FAKE16-NEXT: s_setpc_b64 s[30:31]
;
-; GFX1250-LABEL: v_exp2_bf16:
-; GFX1250: ; %bb.0:
-; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-NEXT: s_wait_kmcnt 0x0
-; GFX1250-NEXT: v_exp_bf16_e32 v0, v0
-; GFX1250-NEXT: s_set_pc_i64 s[30:31]
+; GFX1250TRUE16-LABEL: v_exp2_bf16:
+; GFX1250TRUE16: ; %bb.0:
+; GFX1250TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250TRUE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250TRUE16-NEXT: v_exp_bf16_e32 v0.l, v0.l
+; GFX1250TRUE16-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX1250FAKE16-LABEL: v_exp2_bf16:
+; GFX1250FAKE16: ; %bb.0:
+; GFX1250FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250FAKE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250FAKE16-NEXT: v_exp_bf16_e32 v0, v0
+; GFX1250FAKE16-NEXT: s_set_pc_i64 s[30:31]
%op = call bfloat @llvm.exp2.bf16(bfloat %a)
ret bfloat %op
}
@@ -32382,34 +32716,65 @@ define bfloat @v_exp10_bf16(bfloat %a) {
; GFX11FAKE16-NEXT: v_lshrrev_b32_e32 v0, 16, v0
; GFX11FAKE16-NEXT: s_setpc_b64 s[30:31]
;
-; GFX1250-LABEL: v_exp10_bf16:
-; GFX1250: ; %bb.0:
-; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-NEXT: s_wait_kmcnt 0x0
-; GFX1250-NEXT: v_lshlrev_b32_e32 v1, 16, v0
-; GFX1250-NEXT: s_mov_b32 s0, 0x40549a78
-; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1250-NEXT: v_mul_f32_e32 v2, 0x40549a78, v1
-; GFX1250-NEXT: v_rndne_f32_e32 v3, v2
-; GFX1250-NEXT: v_fma_mix_f32_bf16 v4, v0, s0, -v2 op_sel_hi:[1,0,0]
-; GFX1250-NEXT: s_mov_b32 s0, 0x33979a37
-; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX1250-NEXT: v_sub_f32_e32 v2, v2, v3
-; GFX1250-NEXT: v_fma_mix_f32_bf16 v0, v0, s0, v4 op_sel_hi:[1,0,0]
-; GFX1250-NEXT: v_cmp_ngt_f32_e32 vcc_lo, 0xc23369f4, v1
-; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
-; GFX1250-NEXT: v_add_f32_e32 v0, v2, v0
-; GFX1250-NEXT: v_cvt_i32_f32_e32 v2, v3
-; GFX1250-NEXT: v_exp_f32_e32 v0, v0
-; GFX1250-NEXT: v_nop
-; GFX1250-NEXT: s_delay_alu instid0(TRANS32_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1250-NEXT: v_ldexp_f32 v0, v0, v2
-; GFX1250-NEXT: v_cndmask_b32_e32 v0, 0, v0, vcc_lo
-; GFX1250-NEXT: v_cmp_nlt_f32_e32 vcc_lo, 0x421a209b, v1
-; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1250-NEXT: v_cndmask_b32_e32 v0, 0x7f800000, v0, vcc_lo
-; GFX1250-NEXT: v_cvt_pk_bf16_f32 v0, v0, s0
-; GFX1250-NEXT: s_set_pc_i64 s[30:31]
+; GFX1250TRUE16-LABEL: v_exp10_bf16:
+; GFX1250TRUE16: ; %bb.0:
+; GFX1250TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250TRUE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250TRUE16-NEXT: v_mov_b16_e32 v1.l, 0
+; GFX1250TRUE16-NEXT: v_mov_b16_e32 v1.h, v0.l
+; GFX1250TRUE16-NEXT: s_mov_b32 s0, 0x40549a78
+; GFX1250TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX1250TRUE16-NEXT: v_mul_f32_e32 v2, 0x40549a78, v1
+; GFX1250TRUE16-NEXT: v_cmp_ngt_f32_e32 vcc_lo, 0xc23369f4, v1
+; GFX1250TRUE16-NEXT: v_fma_mix_f32_bf16 v3, v0, s0, -v2 op_sel_hi:[1,0,0]
+; GFX1250TRUE16-NEXT: v_rndne_f32_e32 v4, v2
+; GFX1250TRUE16-NEXT: s_mov_b32 s0, 0x33979a37
+; GFX1250TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instid1(SALU_CYCLE_1)
+; GFX1250TRUE16-NEXT: v_fma_mix_f32_bf16 v0, v0, s0, v3 op_sel_hi:[1,0,0]
+; GFX1250TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1250TRUE16-NEXT: v_sub_f32_e32 v2, v2, v4
+; GFX1250TRUE16-NEXT: v_add_f32_e32 v0, v2, v0
+; GFX1250TRUE16-NEXT: v_cvt_i32_f32_e32 v2, v4
+; GFX1250TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(TRANS32_DEP_1)
+; GFX1250TRUE16-NEXT: v_exp_f32_e32 v0, v0
+; GFX1250TRUE16-NEXT: v_nop
+; GFX1250TRUE16-NEXT: v_ldexp_f32 v0, v0, v2
+; GFX1250TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX1250TRUE16-NEXT: v_cndmask_b32_e32 v0, 0, v0, vcc_lo
+; GFX1250TRUE16-NEXT: v_cmp_nlt_f32_e32 vcc_lo, 0x421a209b, v1
+; GFX1250TRUE16-NEXT: v_cndmask_b32_e32 v0, 0x7f800000, v0, vcc_lo
+; GFX1250TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1250TRUE16-NEXT: v_cvt_pk_bf16_f32 v0, v0, s0
+; GFX1250TRUE16-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX1250FAKE16-LABEL: v_exp10_bf16:
+; GFX1250FAKE16: ; %bb.0:
+; GFX1250FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250FAKE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250FAKE16-NEXT: v_lshlrev_b32_e32 v1, 16, v0
+; GFX1250FAKE16-NEXT: s_mov_b32 s0, 0x40549a78
+; GFX1250FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1250FAKE16-NEXT: v_mul_f32_e32 v2, 0x40549a78, v1
+; GFX1250FAKE16-NEXT: v_rndne_f32_e32 v3, v2
+; GFX1250FAKE16-NEXT: v_fma_mix_f32_bf16 v4, v0, s0, -v2 op_sel_hi:[1,0,0]
+; GFX1250FAKE16-NEXT: s_mov_b32 s0, 0x33979a37
+; GFX1250FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX1250FAKE16-NEXT: v_sub_f32_e32 v2, v2, v3
+; GFX1250FAKE16-NEXT: v_fma_mix_f32_bf16 v0, v0, s0, v4 op_sel_hi:[1,0,0]
+; GFX1250FAKE16-NEXT: v_cmp_ngt_f32_e32 vcc_lo, 0xc23369f4, v1
+; GFX1250FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX1250FAKE16-NEXT: v_add_f32_e32 v0, v2, v0
+; GFX1250FAKE16-NEXT: v_cvt_i32_f32_e32 v2, v3
+; GFX1250FAKE16-NEXT: v_exp_f32_e32 v0, v0
+; GFX1250FAKE16-NEXT: v_nop
+; GFX1250FAKE16-NEXT: s_delay_alu instid0(TRANS32_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1250FAKE16-NEXT: v_ldexp_f32 v0, v0, v2
+; GFX1250FAKE16-NEXT: v_cndmask_b32_e32 v0, 0, v0, vcc_lo
+; GFX1250FAKE16-NEXT: v_cmp_nlt_f32_e32 vcc_lo, 0x421a209b, v1
+; GFX1250FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1250FAKE16-NEXT: v_cndmask_b32_e32 v0, 0x7f800000, v0, vcc_lo
+; GFX1250FAKE16-NEXT: v_cvt_pk_bf16_f32 v0, v0, s0
+; GFX1250FAKE16-NEXT: s_set_pc_i64 s[30:31]
%op = call bfloat @llvm.exp10.bf16(bfloat %a)
ret bfloat %op
}
@@ -32517,15 +32882,26 @@ define bfloat @v_ceil_bf16(bfloat %a) {
; GFX11FAKE16-NEXT: v_lshrrev_b32_e32 v0, 16, v0
; GFX11FAKE16-NEXT: s_setpc_b64 s[30:31]
;
-; GFX1250-LABEL: v_ceil_bf16:
-; GFX1250: ; %bb.0:
-; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-NEXT: s_wait_kmcnt 0x0
-; GFX1250-NEXT: v_lshlrev_b32_e32 v0, 16, v0
-; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1250-NEXT: v_ceil_f32_e32 v0, v0
-; GFX1250-NEXT: v_cvt_pk_bf16_f32 v0, v0, s0
-; GFX1250-NEXT: s_set_pc_i64 s[30:31]
+; GFX1250TRUE16-LABEL: v_ceil_bf16:
+; GFX1250TRUE16: ; %bb.0:
+; GFX1250TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250TRUE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250TRUE16-NEXT: v_mov_b16_e32 v1.l, 0
+; GFX1250TRUE16-NEXT: v_mov_b16_e32 v1.h, v0.l
+; GFX1250TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1250TRUE16-NEXT: v_ceil_f32_e32 v0, v1
+; GFX1250TRUE16-NEXT: v_cvt_pk_bf16_f32 v0, v0, s0
+; GFX1250TRUE16-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX1250FAKE16-LABEL: v_ceil_bf16:
+; GFX1250FAKE16: ; %bb.0:
+; GFX1250FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250FAKE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250FAKE16-NEXT: v_lshlrev_b32_e32 v0, 16, v0
+; GFX1250FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1250FAKE16-NEXT: v_ceil_f32_e32 v0, v0
+; GFX1250FAKE16-NEXT: v_cvt_pk_bf16_f32 v0, v0, s0
+; GFX1250FAKE16-NEXT: s_set_pc_i64 s[30:31]
%op = call bfloat @llvm.ceil.bf16(bfloat %a)
ret bfloat %op
}
@@ -32633,15 +33009,26 @@ define bfloat @v_trunc_bf16(bfloat %a) {
; GFX11FAKE16-NEXT: v_lshrrev_b32_e32 v0, 16, v0
; GFX11FAKE16-NEXT: s_setpc_b64 s[30:31]
;
-; GFX1250-LABEL: v_trunc_bf16:
-; GFX1250: ; %bb.0:
-; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-NEXT: s_wait_kmcnt 0x0
-; GFX1250-NEXT: v_lshlrev_b32_e32 v0, 16, v0
-; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1250-NEXT: v_trunc_f32_e32 v0, v0
-; GFX1250-NEXT: v_cvt_pk_bf16_f32 v0, v0, s0
-; GFX1250-NEXT: s_set_pc_i64 s[30:31]
+; GFX1250TRUE16-LABEL: v_trunc_bf16:
+; GFX1250TRUE16: ; %bb.0:
+; GFX1250TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250TRUE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250TRUE16-NEXT: v_mov_b16_e32 v1.l, 0
+; GFX1250TRUE16-NEXT: v_mov_b16_e32 v1.h, v0.l
+; GFX1250TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1250TRUE16-NEXT: v_trunc_f32_e32 v0, v1
+; GFX1250TRUE16-NEXT: v_cvt_pk_bf16_f32 v0, v0, s0
+; GFX1250TRUE16-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX1250FAKE16-LABEL: v_trunc_bf16:
+; GFX1250FAKE16: ; %bb.0:
+; GFX1250FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250FAKE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250FAKE16-NEXT: v_lshlrev_b32_e32 v0, 16, v0
+; GFX1250FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1250FAKE16-NEXT: v_trunc_f32_e32 v0, v0
+; GFX1250FAKE16-NEXT: v_cvt_pk_bf16_f32 v0, v0, s0
+; GFX1250FAKE16-NEXT: s_set_pc_i64 s[30:31]
%op = call bfloat @llvm.trunc.bf16(bfloat %a)
ret bfloat %op
}
@@ -32749,15 +33136,26 @@ define bfloat @v_rint_bf16(bfloat %a) {
; GFX11FAKE16-NEXT: v_lshrrev_b32_e32 v0, 16, v0
; GFX11FAKE16-NEXT: s_setpc_b64 s[30:31]
;
-; GFX1250-LABEL: v_rint_bf16:
-; GFX1250: ; %bb.0:
-; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-NEXT: s_wait_kmcnt 0x0
-; GFX1250-NEXT: v_lshlrev_b32_e32 v0, 16, v0
-; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1250-NEXT: v_rndne_f32_e32 v0, v0
-; GFX1250-NEXT: v_cvt_pk_bf16_f32 v0, v0, s0
-; GFX1250-NEXT: s_set_pc_i64 s[30:31]
+; GFX1250TRUE16-LABEL: v_rint_bf16:
+; GFX1250TRUE16: ; %bb.0:
+; GFX1250TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250TRUE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250TRUE16-NEXT: v_mov_b16_e32 v1.l, 0
+; GFX1250TRUE16-NEXT: v_mov_b16_e32 v1.h, v0.l
+; GFX1250TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1250TRUE16-NEXT: v_rndne_f32_e32 v0, v1
+; GFX1250TRUE16-NEXT: v_cvt_pk_bf16_f32 v0, v0, s0
+; GFX1250TRUE16-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX1250FAKE16-LABEL: v_rint_bf16:
+; GFX1250FAKE16: ; %bb.0:
+; GFX1250FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250FAKE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250FAKE16-NEXT: v_lshlrev_b32_e32 v0, 16, v0
+; GFX1250FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1250FAKE16-NEXT: v_rndne_f32_e32 v0, v0
+; GFX1250FAKE16-NEXT: v_cvt_pk_bf16_f32 v0, v0, s0
+; GFX1250FAKE16-NEXT: s_set_pc_i64 s[30:31]
%op = call bfloat @llvm.rint.bf16(bfloat %a)
ret bfloat %op
}
@@ -32865,15 +33263,26 @@ define bfloat @v_nearbyint_bf16(bfloat %a) {
; GFX11FAKE16-NEXT: v_lshrrev_b32_e32 v0, 16, v0
; GFX11FAKE16-NEXT: s_setpc_b64 s[30:31]
;
-; GFX1250-LABEL: v_nearbyint_bf16:
-; GFX1250: ; %bb.0:
-; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-NEXT: s_wait_kmcnt 0x0
-; GFX1250-NEXT: v_lshlrev_b32_e32 v0, 16, v0
-; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1250-NEXT: v_rndne_f32_e32 v0, v0
-; GFX1250-NEXT: v_cvt_pk_bf16_f32 v0, v0, s0
-; GFX1250-NEXT: s_set_pc_i64 s[30:31]
+; GFX1250TRUE16-LABEL: v_nearbyint_bf16:
+; GFX1250TRUE16: ; %bb.0:
+; GFX1250TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250TRUE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250TRUE16-NEXT: v_mov_b16_e32 v1.l, 0
+; GFX1250TRUE16-NEXT: v_mov_b16_e32 v1.h, v0.l
+; GFX1250TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1250TRUE16-NEXT: v_rndne_f32_e32 v0, v1
+; GFX1250TRUE16-NEXT: v_cvt_pk_bf16_f32 v0, v0, s0
+; GFX1250TRUE16-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX1250FAKE16-LABEL: v_nearbyint_bf16:
+; GFX1250FAKE16: ; %bb.0:
+; GFX1250FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250FAKE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250FAKE16-NEXT: v_lshlrev_b32_e32 v0, 16, v0
+; GFX1250FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1250FAKE16-NEXT: v_rndne_f32_e32 v0, v0
+; GFX1250FAKE16-NEXT: v_cvt_pk_bf16_f32 v0, v0, s0
+; GFX1250FAKE16-NEXT: s_set_pc_i64 s[30:31]
%op = call bfloat @llvm.nearbyint.bf16(bfloat %a)
ret bfloat %op
}
@@ -33031,23 +33440,42 @@ define bfloat @v_round_bf16(bfloat %a) {
; GFX11FAKE16-NEXT: v_lshrrev_b32_e32 v0, 16, v0
; GFX11FAKE16-NEXT: s_setpc_b64 s[30:31]
;
-; GFX1250-LABEL: v_round_bf16:
-; GFX1250: ; %bb.0:
-; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-NEXT: s_wait_kmcnt 0x0
-; GFX1250-NEXT: v_lshlrev_b32_e32 v0, 16, v0
-; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1250-NEXT: v_trunc_f32_e32 v1, v0
-; GFX1250-NEXT: v_sub_f32_e32 v2, v0, v1
-; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1250-NEXT: v_cmp_ge_f32_e64 s0, |v2|, 0.5
-; GFX1250-NEXT: v_cndmask_b32_e64 v2, 0, 1.0, s0
-; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1250-NEXT: v_bfi_b32 v0, 0x7fffffff, v2, v0
-; GFX1250-NEXT: v_add_f32_e32 v0, v1, v0
-; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX1250-NEXT: v_cvt_pk_bf16_f32 v0, v0, s0
-; GFX1250-NEXT: s_set_pc_i64 s[30:31]
+; GFX1250TRUE16-LABEL: v_round_bf16:
+; GFX1250TRUE16: ; %bb.0:
+; GFX1250TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250TRUE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250TRUE16-NEXT: v_mov_b16_e32 v1.l, 0
+; GFX1250TRUE16-NEXT: v_mov_b16_e32 v1.h, v0.l
+; GFX1250TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1250TRUE16-NEXT: v_trunc_f32_e32 v0, v1
+; GFX1250TRUE16-NEXT: v_sub_f32_e32 v2, v1, v0
+; GFX1250TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1250TRUE16-NEXT: v_cmp_ge_f32_e64 s0, |v2|, 0.5
+; GFX1250TRUE16-NEXT: v_cndmask_b32_e64 v2, 0, 1.0, s0
+; GFX1250TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1250TRUE16-NEXT: v_bfi_b32 v1, 0x7fffffff, v2, v1
+; GFX1250TRUE16-NEXT: v_add_f32_e32 v0, v0, v1
+; GFX1250TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1250TRUE16-NEXT: v_cvt_pk_bf16_f32 v0, v0, s0
+; GFX1250TRUE16-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX1250FAKE16-LABEL: v_round_bf16:
+; GFX1250FAKE16: ; %bb.0:
+; GFX1250FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250FAKE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250FAKE16-NEXT: v_lshlrev_b32_e32 v0, 16, v0
+; GFX1250FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1250FAKE16-NEXT: v_trunc_f32_e32 v1, v0
+; GFX1250FAKE16-NEXT: v_sub_f32_e32 v2, v0, v1
+; GFX1250FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1250FAKE16-NEXT: v_cmp_ge_f32_e64 s0, |v2|, 0.5
+; GFX1250FAKE16-NEXT: v_cndmask_b32_e64 v2, 0, 1.0, s0
+; GFX1250FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1250FAKE16-NEXT: v_bfi_b32 v0, 0x7fffffff, v2, v0
+; GFX1250FAKE16-NEXT: v_add_f32_e32 v0, v1, v0
+; GFX1250FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1250FAKE16-NEXT: v_cvt_pk_bf16_f32 v0, v0, s0
+; GFX1250FAKE16-NEXT: s_set_pc_i64 s[30:31]
%op = call bfloat @llvm.round.bf16(bfloat %a)
ret bfloat %op
}
@@ -33155,15 +33583,26 @@ define bfloat @v_roundeven_bf16(bfloat %a) {
; GFX11FAKE16-NEXT: v_lshrrev_b32_e32 v0, 16, v0
; GFX11FAKE16-NEXT: s_setpc_b64 s[30:31]
;
-; GFX1250-LABEL: v_roundeven_bf16:
-; GFX1250: ; %bb.0:
-; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-NEXT: s_wait_kmcnt 0x0
-; GFX1250-NEXT: v_lshlrev_b32_e32 v0, 16, v0
-; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1250-NEXT: v_rndne_f32_e32 v0, v0
-; GFX1250-NEXT: v_cvt_pk_bf16_f32 v0, v0, s0
-; GFX1250-NEXT: s_set_pc_i64 s[30:31]
+; GFX1250TRUE16-LABEL: v_roundeven_bf16:
+; GFX1250TRUE16: ; %bb.0:
+; GFX1250TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250TRUE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250TRUE16-NEXT: v_mov_b16_e32 v1.l, 0
+; GFX1250TRUE16-NEXT: v_mov_b16_e32 v1.h, v0.l
+; GFX1250TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1250TRUE16-NEXT: v_rndne_f32_e32 v0, v1
+; GFX1250TRUE16-NEXT: v_cvt_pk_bf16_f32 v0, v0, s0
+; GFX1250TRUE16-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX1250FAKE16-LABEL: v_roundeven_bf16:
+; GFX1250FAKE16: ; %bb.0:
+; GFX1250FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250FAKE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250FAKE16-NEXT: v_lshlrev_b32_e32 v0, 16, v0
+; GFX1250FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1250FAKE16-NEXT: v_rndne_f32_e32 v0, v0
+; GFX1250FAKE16-NEXT: v_cvt_pk_bf16_f32 v0, v0, s0
+; GFX1250FAKE16-NEXT: s_set_pc_i64 s[30:31]
%op = call bfloat @llvm.roundeven.bf16(bfloat %a)
ret bfloat %op
}
@@ -33271,15 +33710,26 @@ define bfloat @v_floor_bf16(bfloat %a) {
; GFX11FAKE16-NEXT: v_lshrrev_b32_e32 v0, 16, v0
; GFX11FAKE16-NEXT: s_setpc_b64 s[30:31]
;
-; GFX1250-LABEL: v_floor_bf16:
-; GFX1250: ; %bb.0:
-; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-NEXT: s_wait_kmcnt 0x0
-; GFX1250-NEXT: v_lshlrev_b32_e32 v0, 16, v0
-; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1250-NEXT: v_floor_f32_e32 v0, v0
-; GFX1250-NEXT: v_cvt_pk_bf16_f32 v0, v0, s0
-; GFX1250-NEXT: s_set_pc_i64 s[30:31]
+; GFX1250TRUE16-LABEL: v_floor_bf16:
+; GFX1250TRUE16: ; %bb.0:
+; GFX1250TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250TRUE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250TRUE16-NEXT: v_mov_b16_e32 v1.l, 0
+; GFX1250TRUE16-NEXT: v_mov_b16_e32 v1.h, v0.l
+; GFX1250TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1250TRUE16-NEXT: v_floor_f32_e32 v0, v1
+; GFX1250TRUE16-NEXT: v_cvt_pk_bf16_f32 v0, v0, s0
+; GFX1250TRUE16-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX1250FAKE16-LABEL: v_floor_bf16:
+; GFX1250FAKE16: ; %bb.0:
+; GFX1250FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250FAKE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250FAKE16-NEXT: v_lshlrev_b32_e32 v0, 16, v0
+; GFX1250FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1250FAKE16-NEXT: v_floor_f32_e32 v0, v0
+; GFX1250FAKE16-NEXT: v_cvt_pk_bf16_f32 v0, v0, s0
+; GFX1250FAKE16-NEXT: s_set_pc_i64 s[30:31]
%op = call bfloat @llvm.floor.bf16(bfloat %a)
ret bfloat %op
}
@@ -33385,15 +33835,26 @@ define bfloat @v_canonicalize_bf16(bfloat %a) {
; GFX11FAKE16-NEXT: v_lshrrev_b32_e32 v0, 16, v0
; GFX11FAKE16-NEXT: s_setpc_b64 s[30:31]
;
-; GFX1250-LABEL: v_canonicalize_bf16:
-; GFX1250: ; %bb.0:
-; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-NEXT: s_wait_kmcnt 0x0
-; GFX1250-NEXT: v_lshlrev_b32_e32 v0, 16, v0
-; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1250-NEXT: v_max_num_f32_e32 v0, v0, v0
-; GFX1250-NEXT: v_cvt_pk_bf16_f32 v0, v0, s0
-; GFX1250-NEXT: s_set_pc_i64 s[30:31]
+; GFX1250TRUE16-LABEL: v_canonicalize_bf16:
+; GFX1250TRUE16: ; %bb.0:
+; GFX1250TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250TRUE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250TRUE16-NEXT: v_mov_b16_e32 v1.l, 0
+; GFX1250TRUE16-NEXT: v_mov_b16_e32 v1.h, v0.l
+; GFX1250TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1250TRUE16-NEXT: v_max_num_f32_e32 v0, v1, v1
+; GFX1250TRUE16-NEXT: v_cvt_pk_bf16_f32 v0, v0, s0
+; GFX1250TRUE16-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX1250FAKE16-LABEL: v_canonicalize_bf16:
+; GFX1250FAKE16: ; %bb.0:
+; GFX1250FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250FAKE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250FAKE16-NEXT: v_lshlrev_b32_e32 v0, 16, v0
+; GFX1250FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1250FAKE16-NEXT: v_max_num_f32_e32 v0, v0, v0
+; GFX1250FAKE16-NEXT: v_cvt_pk_bf16_f32 v0, v0, s0
+; GFX1250FAKE16-NEXT: s_set_pc_i64 s[30:31]
%op = call bfloat @llvm.canonicalize.bf16(bfloat %a)
ret bfloat %op
}
@@ -33535,15 +33996,28 @@ define i1 @v_fcmp_oeq_bf16(bfloat %a, bfloat %b) {
; GFX11FAKE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
; GFX11FAKE16-NEXT: s_setpc_b64 s[30:31]
;
-; GFX1250-LABEL: v_fcmp_oeq_bf16:
-; GFX1250: ; %bb.0:
-; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-NEXT: s_wait_kmcnt 0x0
-; GFX1250-NEXT: v_dual_lshlrev_b32 v1, 16, v1 :: v_dual_lshlrev_b32 v0, 16, v0
-; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX1250-NEXT: v_cmp_eq_f32_e32 vcc_lo, v0, v1
-; GFX1250-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
-; GFX1250-NEXT: s_set_pc_i64 s[30:31]
+; GFX1250TRUE16-LABEL: v_fcmp_oeq_bf16:
+; GFX1250TRUE16: ; %bb.0:
+; GFX1250TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250TRUE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250TRUE16-NEXT: v_mov_b16_e32 v2.l, 0
+; GFX1250TRUE16-NEXT: v_mov_b16_e32 v2.h, v1.l
+; GFX1250TRUE16-NEXT: v_mov_b16_e32 v1.h, v0.l
+; GFX1250TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1250TRUE16-NEXT: v_mov_b16_e32 v1.l, v2.l
+; GFX1250TRUE16-NEXT: v_cmp_eq_f32_e32 vcc_lo, v1, v2
+; GFX1250TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
+; GFX1250TRUE16-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX1250FAKE16-LABEL: v_fcmp_oeq_bf16:
+; GFX1250FAKE16: ; %bb.0:
+; GFX1250FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250FAKE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250FAKE16-NEXT: v_dual_lshlrev_b32 v1, 16, v1 :: v_dual_lshlrev_b32 v0, 16, v0
+; GFX1250FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1250FAKE16-NEXT: v_cmp_eq_f32_e32 vcc_lo, v0, v1
+; GFX1250FAKE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
+; GFX1250FAKE16-NEXT: s_set_pc_i64 s[30:31]
%op = fcmp oeq bfloat %a, %b
ret i1 %op
}
@@ -33630,15 +34104,28 @@ define i1 @v_fcmp_ogt_bf16(bfloat %a, bfloat %b) {
; GFX11FAKE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
; GFX11FAKE16-NEXT: s_setpc_b64 s[30:31]
;
-; GFX1250-LABEL: v_fcmp_ogt_bf16:
-; GFX1250: ; %bb.0:
-; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-NEXT: s_wait_kmcnt 0x0
-; GFX1250-NEXT: v_dual_lshlrev_b32 v1, 16, v1 :: v_dual_lshlrev_b32 v0, 16, v0
-; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX1250-NEXT: v_cmp_gt_f32_e32 vcc_lo, v0, v1
-; GFX1250-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
-; GFX1250-NEXT: s_set_pc_i64 s[30:31]
+; GFX1250TRUE16-LABEL: v_fcmp_ogt_bf16:
+; GFX1250TRUE16: ; %bb.0:
+; GFX1250TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250TRUE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250TRUE16-NEXT: v_mov_b16_e32 v2.l, 0
+; GFX1250TRUE16-NEXT: v_mov_b16_e32 v2.h, v1.l
+; GFX1250TRUE16-NEXT: v_mov_b16_e32 v1.h, v0.l
+; GFX1250TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1250TRUE16-NEXT: v_mov_b16_e32 v1.l, v2.l
+; GFX1250TRUE16-NEXT: v_cmp_gt_f32_e32 vcc_lo, v1, v2
+; GFX1250TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
+; GFX1250TRUE16-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX1250FAKE16-LABEL: v_fcmp_ogt_bf16:
+; GFX1250FAKE16: ; %bb.0:
+; GFX1250FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250FAKE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250FAKE16-NEXT: v_dual_lshlrev_b32 v1, 16, v1 :: v_dual_lshlrev_b32 v0, 16, v0
+; GFX1250FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1250FAKE16-NEXT: v_cmp_gt_f32_e32 vcc_lo, v0, v1
+; GFX1250FAKE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
+; GFX1250FAKE16-NEXT: s_set_pc_i64 s[30:31]
%op = fcmp ogt bfloat %a, %b
ret i1 %op
}
@@ -33725,15 +34212,28 @@ define i1 @v_fcmp_oge_bf16(bfloat %a, bfloat %b) {
; GFX11FAKE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
; GFX11FAKE16-NEXT: s_setpc_b64 s[30:31]
;
-; GFX1250-LABEL: v_fcmp_oge_bf16:
-; GFX1250: ; %bb.0:
-; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-NEXT: s_wait_kmcnt 0x0
-; GFX1250-NEXT: v_dual_lshlrev_b32 v1, 16, v1 :: v_dual_lshlrev_b32 v0, 16, v0
-; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX1250-NEXT: v_cmp_ge_f32_e32 vcc_lo, v0, v1
-; GFX1250-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
-; GFX1250-NEXT: s_set_pc_i64 s[30:31]
+; GFX1250TRUE16-LABEL: v_fcmp_oge_bf16:
+; GFX1250TRUE16: ; %bb.0:
+; GFX1250TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250TRUE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250TRUE16-NEXT: v_mov_b16_e32 v2.l, 0
+; GFX1250TRUE16-NEXT: v_mov_b16_e32 v2.h, v1.l
+; GFX1250TRUE16-NEXT: v_mov_b16_e32 v1.h, v0.l
+; GFX1250TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1250TRUE16-NEXT: v_mov_b16_e32 v1.l, v2.l
+; GFX1250TRUE16-NEXT: v_cmp_ge_f32_e32 vcc_lo, v1, v2
+; GFX1250TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
+; GFX1250TRUE16-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX1250FAKE16-LABEL: v_fcmp_oge_bf16:
+; GFX1250FAKE16: ; %bb.0:
+; GFX1250FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250FAKE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250FAKE16-NEXT: v_dual_lshlrev_b32 v1, 16, v1 :: v_dual_lshlrev_b32 v0, 16, v0
+; GFX1250FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1250FAKE16-NEXT: v_cmp_ge_f32_e32 vcc_lo, v0, v1
+; GFX1250FAKE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
+; GFX1250FAKE16-NEXT: s_set_pc_i64 s[30:31]
%op = fcmp oge bfloat %a, %b
ret i1 %op
}
@@ -33820,15 +34320,28 @@ define i1 @v_fcmp_olt_bf16(bfloat %a, bfloat %b) {
; GFX11FAKE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
; GFX11FAKE16-NEXT: s_setpc_b64 s[30:31]
;
-; GFX1250-LABEL: v_fcmp_olt_bf16:
-; GFX1250: ; %bb.0:
-; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-NEXT: s_wait_kmcnt 0x0
-; GFX1250-NEXT: v_dual_lshlrev_b32 v1, 16, v1 :: v_dual_lshlrev_b32 v0, 16, v0
-; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX1250-NEXT: v_cmp_lt_f32_e32 vcc_lo, v0, v1
-; GFX1250-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
-; GFX1250-NEXT: s_set_pc_i64 s[30:31]
+; GFX1250TRUE16-LABEL: v_fcmp_olt_bf16:
+; GFX1250TRUE16: ; %bb.0:
+; GFX1250TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250TRUE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250TRUE16-NEXT: v_mov_b16_e32 v2.l, 0
+; GFX1250TRUE16-NEXT: v_mov_b16_e32 v2.h, v1.l
+; GFX1250TRUE16-NEXT: v_mov_b16_e32 v1.h, v0.l
+; GFX1250TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1250TRUE16-NEXT: v_mov_b16_e32 v1.l, v2.l
+; GFX1250TRUE16-NEXT: v_cmp_lt_f32_e32 vcc_lo, v1, v2
+; GFX1250TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
+; GFX1250TRUE16-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX1250FAKE16-LABEL: v_fcmp_olt_bf16:
+; GFX1250FAKE16: ; %bb.0:
+; GFX1250FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250FAKE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250FAKE16-NEXT: v_dual_lshlrev_b32 v1, 16, v1 :: v_dual_lshlrev_b32 v0, 16, v0
+; GFX1250FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1250FAKE16-NEXT: v_cmp_lt_f32_e32 vcc_lo, v0, v1
+; GFX1250FAKE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
+; GFX1250FAKE16-NEXT: s_set_pc_i64 s[30:31]
%op = fcmp olt bfloat %a, %b
ret i1 %op
}
@@ -33915,15 +34428,28 @@ define i1 @v_fcmp_ole_bf16(bfloat %a, bfloat %b) {
; GFX11FAKE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
; GFX11FAKE16-NEXT: s_setpc_b64 s[30:31]
;
-; GFX1250-LABEL: v_fcmp_ole_bf16:
-; GFX1250: ; %bb.0:
-; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-NEXT: s_wait_kmcnt 0x0
-; GFX1250-NEXT: v_dual_lshlrev_b32 v1, 16, v1 :: v_dual_lshlrev_b32 v0, 16, v0
-; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX1250-NEXT: v_cmp_le_f32_e32 vcc_lo, v0, v1
-; GFX1250-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
-; GFX1250-NEXT: s_set_pc_i64 s[30:31]
+; GFX1250TRUE16-LABEL: v_fcmp_ole_bf16:
+; GFX1250TRUE16: ; %bb.0:
+; GFX1250TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250TRUE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250TRUE16-NEXT: v_mov_b16_e32 v2.l, 0
+; GFX1250TRUE16-NEXT: v_mov_b16_e32 v2.h, v1.l
+; GFX1250TRUE16-NEXT: v_mov_b16_e32 v1.h, v0.l
+; GFX1250TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1250TRUE16-NEXT: v_mov_b16_e32 v1.l, v2.l
+; GFX1250TRUE16-NEXT: v_cmp_le_f32_e32 vcc_lo, v1, v2
+; GFX1250TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
+; GFX1250TRUE16-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX1250FAKE16-LABEL: v_fcmp_ole_bf16:
+; GFX1250FAKE16: ; %bb.0:
+; GFX1250FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250FAKE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250FAKE16-NEXT: v_dual_lshlrev_b32 v1, 16, v1 :: v_dual_lshlrev_b32 v0, 16, v0
+; GFX1250FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1250FAKE16-NEXT: v_cmp_le_f32_e32 vcc_lo, v0, v1
+; GFX1250FAKE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
+; GFX1250FAKE16-NEXT: s_set_pc_i64 s[30:31]
%op = fcmp ole bfloat %a, %b
ret i1 %op
}
@@ -34010,15 +34536,28 @@ define i1 @v_fcmp_one_bf16(bfloat %a, bfloat %b) {
; GFX11FAKE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
; GFX11FAKE16-NEXT: s_setpc_b64 s[30:31]
;
-; GFX1250-LABEL: v_fcmp_one_bf16:
-; GFX1250: ; %bb.0:
-; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-NEXT: s_wait_kmcnt 0x0
-; GFX1250-NEXT: v_dual_lshlrev_b32 v1, 16, v1 :: v_dual_lshlrev_b32 v0, 16, v0
-; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX1250-NEXT: v_cmp_lg_f32_e32 vcc_lo, v0, v1
-; GFX1250-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
-; GFX1250-NEXT: s_set_pc_i64 s[30:31]
+; GFX1250TRUE16-LABEL: v_fcmp_one_bf16:
+; GFX1250TRUE16: ; %bb.0:
+; GFX1250TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250TRUE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250TRUE16-NEXT: v_mov_b16_e32 v2.l, 0
+; GFX1250TRUE16-NEXT: v_mov_b16_e32 v2.h, v1.l
+; GFX1250TRUE16-NEXT: v_mov_b16_e32 v1.h, v0.l
+; GFX1250TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1250TRUE16-NEXT: v_mov_b16_e32 v1.l, v2.l
+; GFX1250TRUE16-NEXT: v_cmp_lg_f32_e32 vcc_lo, v1, v2
+; GFX1250TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
+; GFX1250TRUE16-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX1250FAKE16-LABEL: v_fcmp_one_bf16:
+; GFX1250FAKE16: ; %bb.0:
+; GFX1250FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250FAKE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250FAKE16-NEXT: v_dual_lshlrev_b32 v1, 16, v1 :: v_dual_lshlrev_b32 v0, 16, v0
+; GFX1250FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1250FAKE16-NEXT: v_cmp_lg_f32_e32 vcc_lo, v0, v1
+; GFX1250FAKE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
+; GFX1250FAKE16-NEXT: s_set_pc_i64 s[30:31]
%op = fcmp one bfloat %a, %b
ret i1 %op
}
@@ -34105,15 +34644,28 @@ define i1 @v_fcmp_uno_bf16(bfloat %a, bfloat %b) {
; GFX11FAKE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
; GFX11FAKE16-NEXT: s_setpc_b64 s[30:31]
;
-; GFX1250-LABEL: v_fcmp_uno_bf16:
-; GFX1250: ; %bb.0:
-; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-NEXT: s_wait_kmcnt 0x0
-; GFX1250-NEXT: v_dual_lshlrev_b32 v1, 16, v1 :: v_dual_lshlrev_b32 v0, 16, v0
-; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX1250-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v1
-; GFX1250-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
-; GFX1250-NEXT: s_set_pc_i64 s[30:31]
+; GFX1250TRUE16-LABEL: v_fcmp_uno_bf16:
+; GFX1250TRUE16: ; %bb.0:
+; GFX1250TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250TRUE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250TRUE16-NEXT: v_mov_b16_e32 v2.l, 0
+; GFX1250TRUE16-NEXT: v_mov_b16_e32 v2.h, v1.l
+; GFX1250TRUE16-NEXT: v_mov_b16_e32 v1.h, v0.l
+; GFX1250TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1250TRUE16-NEXT: v_mov_b16_e32 v1.l, v2.l
+; GFX1250TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v1, v2
+; GFX1250TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
+; GFX1250TRUE16-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX1250FAKE16-LABEL: v_fcmp_uno_bf16:
+; GFX1250FAKE16: ; %bb.0:
+; GFX1250FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250FAKE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250FAKE16-NEXT: v_dual_lshlrev_b32 v1, 16, v1 :: v_dual_lshlrev_b32 v0, 16, v0
+; GFX1250FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1250FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v1
+; GFX1250FAKE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
+; GFX1250FAKE16-NEXT: s_set_pc_i64 s[30:31]
%op = fcmp uno bfloat %a, %b
ret i1 %op
}
@@ -34200,15 +34752,28 @@ define i1 @v_fcmp_ueq_bf16(bfloat %a, bfloat %b) {
; GFX11FAKE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
; GFX11FAKE16-NEXT: s_setpc_b64 s[30:31]
;
-; GFX1250-LABEL: v_fcmp_ueq_bf16:
-; GFX1250: ; %bb.0:
-; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-NEXT: s_wait_kmcnt 0x0
-; GFX1250-NEXT: v_dual_lshlrev_b32 v1, 16, v1 :: v_dual_lshlrev_b32 v0, 16, v0
-; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX1250-NEXT: v_cmp_nlg_f32_e32 vcc_lo, v0, v1
-; GFX1250-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
-; GFX1250-NEXT: s_set_pc_i64 s[30:31]
+; GFX1250TRUE16-LABEL: v_fcmp_ueq_bf16:
+; GFX1250TRUE16: ; %bb.0:
+; GFX1250TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250TRUE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250TRUE16-NEXT: v_mov_b16_e32 v2.l, 0
+; GFX1250TRUE16-NEXT: v_mov_b16_e32 v2.h, v1.l
+; GFX1250TRUE16-NEXT: v_mov_b16_e32 v1.h, v0.l
+; GFX1250TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1250TRUE16-NEXT: v_mov_b16_e32 v1.l, v2.l
+; GFX1250TRUE16-NEXT: v_cmp_nlg_f32_e32 vcc_lo, v1, v2
+; GFX1250TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
+; GFX1250TRUE16-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX1250FAKE16-LABEL: v_fcmp_ueq_bf16:
+; GFX1250FAKE16: ; %bb.0:
+; GFX1250FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250FAKE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250FAKE16-NEXT: v_dual_lshlrev_b32 v1, 16, v1 :: v_dual_lshlrev_b32 v0, 16, v0
+; GFX1250FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1250FAKE16-NEXT: v_cmp_nlg_f32_e32 vcc_lo, v0, v1
+; GFX1250FAKE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
+; GFX1250FAKE16-NEXT: s_set_pc_i64 s[30:31]
%op = fcmp ueq bfloat %a, %b
ret i1 %op
}
@@ -34295,15 +34860,28 @@ define i1 @v_fcmp_ugt_bf16(bfloat %a, bfloat %b) {
; GFX11FAKE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
; GFX11FAKE16-NEXT: s_setpc_b64 s[30:31]
;
-; GFX1250-LABEL: v_fcmp_ugt_bf16:
-; GFX1250: ; %bb.0:
-; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-NEXT: s_wait_kmcnt 0x0
-; GFX1250-NEXT: v_dual_lshlrev_b32 v1, 16, v1 :: v_dual_lshlrev_b32 v0, 16, v0
-; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX1250-NEXT: v_cmp_nle_f32_e32 vcc_lo, v0, v1
-; GFX1250-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
-; GFX1250-NEXT: s_set_pc_i64 s[30:31]
+; GFX1250TRUE16-LABEL: v_fcmp_ugt_bf16:
+; GFX1250TRUE16: ; %bb.0:
+; GFX1250TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250TRUE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250TRUE16-NEXT: v_mov_b16_e32 v2.l, 0
+; GFX1250TRUE16-NEXT: v_mov_b16_e32 v2.h, v1.l
+; GFX1250TRUE16-NEXT: v_mov_b16_e32 v1.h, v0.l
+; GFX1250TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1250TRUE16-NEXT: v_mov_b16_e32 v1.l, v2.l
+; GFX1250TRUE16-NEXT: v_cmp_nle_f32_e32 vcc_lo, v1, v2
+; GFX1250TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
+; GFX1250TRUE16-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX1250FAKE16-LABEL: v_fcmp_ugt_bf16:
+; GFX1250FAKE16: ; %bb.0:
+; GFX1250FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250FAKE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250FAKE16-NEXT: v_dual_lshlrev_b32 v1, 16, v1 :: v_dual_lshlrev_b32 v0, 16, v0
+; GFX1250FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1250FAKE16-NEXT: v_cmp_nle_f32_e32 vcc_lo, v0, v1
+; GFX1250FAKE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
+; GFX1250FAKE16-NEXT: s_set_pc_i64 s[30:31]
%op = fcmp ugt bfloat %a, %b
ret i1 %op
}
@@ -34390,15 +34968,28 @@ define i1 @v_fcmp_uge_bf16(bfloat %a, bfloat %b) {
; GFX11FAKE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
; GFX11FAKE16-NEXT: s_setpc_b64 s[30:31]
;
-; GFX1250-LABEL: v_fcmp_uge_bf16:
-; GFX1250: ; %bb.0:
-; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-NEXT: s_wait_kmcnt 0x0
-; GFX1250-NEXT: v_dual_lshlrev_b32 v1, 16, v1 :: v_dual_lshlrev_b32 v0, 16, v0
-; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX1250-NEXT: v_cmp_nlt_f32_e32 vcc_lo, v0, v1
-; GFX1250-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
-; GFX1250-NEXT: s_set_pc_i64 s[30:31]
+; GFX1250TRUE16-LABEL: v_fcmp_uge_bf16:
+; GFX1250TRUE16: ; %bb.0:
+; GFX1250TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250TRUE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250TRUE16-NEXT: v_mov_b16_e32 v2.l, 0
+; GFX1250TRUE16-NEXT: v_mov_b16_e32 v2.h, v1.l
+; GFX1250TRUE16-NEXT: v_mov_b16_e32 v1.h, v0.l
+; GFX1250TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1250TRUE16-NEXT: v_mov_b16_e32 v1.l, v2.l
+; GFX1250TRUE16-NEXT: v_cmp_nlt_f32_e32 vcc_lo, v1, v2
+; GFX1250TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
+; GFX1250TRUE16-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX1250FAKE16-LABEL: v_fcmp_uge_bf16:
+; GFX1250FAKE16: ; %bb.0:
+; GFX1250FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250FAKE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250FAKE16-NEXT: v_dual_lshlrev_b32 v1, 16, v1 :: v_dual_lshlrev_b32 v0, 16, v0
+; GFX1250FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1250FAKE16-NEXT: v_cmp_nlt_f32_e32 vcc_lo, v0, v1
+; GFX1250FAKE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
+; GFX1250FAKE16-NEXT: s_set_pc_i64 s[30:31]
%op = fcmp uge bfloat %a, %b
ret i1 %op
}
@@ -34485,15 +35076,28 @@ define i1 @v_fcmp_ult_bf16(bfloat %a, bfloat %b) {
; GFX11FAKE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
; GFX11FAKE16-NEXT: s_setpc_b64 s[30:31]
;
-; GFX1250-LABEL: v_fcmp_ult_bf16:
-; GFX1250: ; %bb.0:
-; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-NEXT: s_wait_kmcnt 0x0
-; GFX1250-NEXT: v_dual_lshlrev_b32 v1, 16, v1 :: v_dual_lshlrev_b32 v0, 16, v0
-; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX1250-NEXT: v_cmp_nge_f32_e32 vcc_lo, v0, v1
-; GFX1250-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
-; GFX1250-NEXT: s_set_pc_i64 s[30:31]
+; GFX1250TRUE16-LABEL: v_fcmp_ult_bf16:
+; GFX1250TRUE16: ; %bb.0:
+; GFX1250TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250TRUE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250TRUE16-NEXT: v_mov_b16_e32 v2.l, 0
+; GFX1250TRUE16-NEXT: v_mov_b16_e32 v2.h, v1.l
+; GFX1250TRUE16-NEXT: v_mov_b16_e32 v1.h, v0.l
+; GFX1250TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1250TRUE16-NEXT: v_mov_b16_e32 v1.l, v2.l
+; GFX1250TRUE16-NEXT: v_cmp_nge_f32_e32 vcc_lo, v1, v2
+; GFX1250TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
+; GFX1250TRUE16-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX1250FAKE16-LABEL: v_fcmp_ult_bf16:
+; GFX1250FAKE16: ; %bb.0:
+; GFX1250FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250FAKE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250FAKE16-NEXT: v_dual_lshlrev_b32 v1, 16, v1 :: v_dual_lshlrev_b32 v0, 16, v0
+; GFX1250FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1250FAKE16-NEXT: v_cmp_nge_f32_e32 vcc_lo, v0, v1
+; GFX1250FAKE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
+; GFX1250FAKE16-NEXT: s_set_pc_i64 s[30:31]
%op = fcmp ult bfloat %a, %b
ret i1 %op
}
@@ -34580,15 +35184,28 @@ define i1 @v_fcmp_ule_bf16(bfloat %a, bfloat %b) {
; GFX11FAKE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
; GFX11FAKE16-NEXT: s_setpc_b64 s[30:31]
;
-; GFX1250-LABEL: v_fcmp_ule_bf16:
-; GFX1250: ; %bb.0:
-; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-NEXT: s_wait_kmcnt 0x0
-; GFX1250-NEXT: v_dual_lshlrev_b32 v1, 16, v1 :: v_dual_lshlrev_b32 v0, 16, v0
-; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX1250-NEXT: v_cmp_ngt_f32_e32 vcc_lo, v0, v1
-; GFX1250-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
-; GFX1250-NEXT: s_set_pc_i64 s[30:31]
+; GFX1250TRUE16-LABEL: v_fcmp_ule_bf16:
+; GFX1250TRUE16: ; %bb.0:
+; GFX1250TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250TRUE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250TRUE16-NEXT: v_mov_b16_e32 v2.l, 0
+; GFX1250TRUE16-NEXT: v_mov_b16_e32 v2.h, v1.l
+; GFX1250TRUE16-NEXT: v_mov_b16_e32 v1.h, v0.l
+; GFX1250TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1250TRUE16-NEXT: v_mov_b16_e32 v1.l, v2.l
+; GFX1250TRUE16-NEXT: v_cmp_ngt_f32_e32 vcc_lo, v1, v2
+; GFX1250TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
+; GFX1250TRUE16-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX1250FAKE16-LABEL: v_fcmp_ule_bf16:
+; GFX1250FAKE16: ; %bb.0:
+; GFX1250FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250FAKE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250FAKE16-NEXT: v_dual_lshlrev_b32 v1, 16, v1 :: v_dual_lshlrev_b32 v0, 16, v0
+; GFX1250FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1250FAKE16-NEXT: v_cmp_ngt_f32_e32 vcc_lo, v0, v1
+; GFX1250FAKE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
+; GFX1250FAKE16-NEXT: s_set_pc_i64 s[30:31]
%op = fcmp ule bfloat %a, %b
ret i1 %op
}
@@ -34675,15 +35292,28 @@ define i1 @v_fcmp_une_bf16(bfloat %a, bfloat %b) {
; GFX11FAKE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
; GFX11FAKE16-NEXT: s_setpc_b64 s[30:31]
;
-; GFX1250-LABEL: v_fcmp_une_bf16:
-; GFX1250: ; %bb.0:
-; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-NEXT: s_wait_kmcnt 0x0
-; GFX1250-NEXT: v_dual_lshlrev_b32 v1, 16, v1 :: v_dual_lshlrev_b32 v0, 16, v0
-; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX1250-NEXT: v_cmp_neq_f32_e32 vcc_lo, v0, v1
-; GFX1250-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
-; GFX1250-NEXT: s_set_pc_i64 s[30:31]
+; GFX1250TRUE16-LABEL: v_fcmp_une_bf16:
+; GFX1250TRUE16: ; %bb.0:
+; GFX1250TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250TRUE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250TRUE16-NEXT: v_mov_b16_e32 v2.l, 0
+; GFX1250TRUE16-NEXT: v_mov_b16_e32 v2.h, v1.l
+; GFX1250TRUE16-NEXT: v_mov_b16_e32 v1.h, v0.l
+; GFX1250TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1250TRUE16-NEXT: v_mov_b16_e32 v1.l, v2.l
+; GFX1250TRUE16-NEXT: v_cmp_neq_f32_e32 vcc_lo, v1, v2
+; GFX1250TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
+; GFX1250TRUE16-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX1250FAKE16-LABEL: v_fcmp_une_bf16:
+; GFX1250FAKE16: ; %bb.0:
+; GFX1250FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250FAKE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250FAKE16-NEXT: v_dual_lshlrev_b32 v1, 16, v1 :: v_dual_lshlrev_b32 v0, 16, v0
+; GFX1250FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1250FAKE16-NEXT: v_cmp_neq_f32_e32 vcc_lo, v0, v1
+; GFX1250FAKE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
+; GFX1250FAKE16-NEXT: s_set_pc_i64 s[30:31]
%op = fcmp une bfloat %a, %b
ret i1 %op
}
@@ -34790,14 +35420,24 @@ define i16 @v_fptosi_bf16_to_i16(bfloat %x) {
; GFX11FAKE16-NEXT: v_cvt_i32_f32_e32 v0, v0
; GFX11FAKE16-NEXT: s_setpc_b64 s[30:31]
;
-; GFX1250-LABEL: v_fptosi_bf16_to_i16:
-; GFX1250: ; %bb.0:
-; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-NEXT: s_wait_kmcnt 0x0
-; GFX1250-NEXT: v_lshlrev_b32_e32 v0, 16, v0
-; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX1250-NEXT: v_cvt_i32_f32_e32 v0, v0
-; GFX1250-NEXT: s_set_pc_i64 s[30:31]
+; GFX1250TRUE16-LABEL: v_fptosi_bf16_to_i16:
+; GFX1250TRUE16: ; %bb.0:
+; GFX1250TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250TRUE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250TRUE16-NEXT: v_mov_b16_e32 v1.l, 0
+; GFX1250TRUE16-NEXT: v_mov_b16_e32 v1.h, v0.l
+; GFX1250TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1250TRUE16-NEXT: v_cvt_i32_f32_e32 v0, v1
+; GFX1250TRUE16-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX1250FAKE16-LABEL: v_fptosi_bf16_to_i16:
+; GFX1250FAKE16: ; %bb.0:
+; GFX1250FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250FAKE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250FAKE16-NEXT: v_lshlrev_b32_e32 v0, 16, v0
+; GFX1250FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1250FAKE16-NEXT: v_cvt_i32_f32_e32 v0, v0
+; GFX1250FAKE16-NEXT: s_set_pc_i64 s[30:31]
%op = fptosi bfloat %x to i16
ret i16 %op
}
@@ -34899,18 +35539,31 @@ define <2 x i16> @v_fptosi_v2bf16_to_v2i16(<2 x bfloat> %x) {
; GFX11FAKE16-NEXT: v_perm_b32 v0, v0, v1, 0x5040100
; GFX11FAKE16-NEXT: s_setpc_b64 s[30:31]
;
-; GFX1250-LABEL: v_fptosi_v2bf16_to_v2i16:
-; GFX1250: ; %bb.0:
-; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-NEXT: s_wait_kmcnt 0x0
-; GFX1250-NEXT: v_lshlrev_b32_e32 v1, 16, v0
-; GFX1250-NEXT: v_and_b32_e32 v0, 0xffff0000, v0
-; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX1250-NEXT: v_cvt_i32_f32_e32 v1, v1
-; GFX1250-NEXT: v_cvt_i32_f32_e32 v0, v0
-; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX1250-NEXT: v_perm_b32 v0, v0, v1, 0x5040100
-; GFX1250-NEXT: s_set_pc_i64 s[30:31]
+; GFX1250TRUE16-LABEL: v_fptosi_v2bf16_to_v2i16:
+; GFX1250TRUE16: ; %bb.0:
+; GFX1250TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250TRUE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250TRUE16-NEXT: v_and_b32_e32 v1, 0xffff0000, v0
+; GFX1250TRUE16-NEXT: v_lshlrev_b32_e32 v0, 16, v0
+; GFX1250TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX1250TRUE16-NEXT: v_cvt_i32_f32_e32 v1, v1
+; GFX1250TRUE16-NEXT: v_cvt_i32_f32_e32 v0, v0
+; GFX1250TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2)
+; GFX1250TRUE16-NEXT: v_mov_b16_e32 v0.h, v1.l
+; GFX1250TRUE16-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX1250FAKE16-LABEL: v_fptosi_v2bf16_to_v2i16:
+; GFX1250FAKE16: ; %bb.0:
+; GFX1250FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250FAKE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250FAKE16-NEXT: v_lshlrev_b32_e32 v1, 16, v0
+; GFX1250FAKE16-NEXT: v_and_b32_e32 v0, 0xffff0000, v0
+; GFX1250FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX1250FAKE16-NEXT: v_cvt_i32_f32_e32 v1, v1
+; GFX1250FAKE16-NEXT: v_cvt_i32_f32_e32 v0, v0
+; GFX1250FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1250FAKE16-NEXT: v_perm_b32 v0, v0, v1, 0x5040100
+; GFX1250FAKE16-NEXT: s_set_pc_i64 s[30:31]
%op = fptosi <2 x bfloat> %x to <2 x i16>
ret <2 x i16> %op
}
@@ -35032,19 +35685,33 @@ define <3 x i16> @v_fptosi_v3bf16_to_v3i16(<3 x bfloat> %x) {
; GFX11FAKE16-NEXT: v_perm_b32 v0, v0, v2, 0x5040100
; GFX11FAKE16-NEXT: s_setpc_b64 s[30:31]
;
-; GFX1250-LABEL: v_fptosi_v3bf16_to_v3i16:
-; GFX1250: ; %bb.0:
-; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-NEXT: s_wait_kmcnt 0x0
-; GFX1250-NEXT: v_dual_lshlrev_b32 v2, 16, v0 :: v_dual_lshlrev_b32 v1, 16, v1
-; GFX1250-NEXT: v_and_b32_e32 v0, 0xffff0000, v0
-; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_3)
-; GFX1250-NEXT: v_cvt_i32_f32_e32 v2, v2
-; GFX1250-NEXT: v_cvt_i32_f32_e32 v1, v1
-; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1250-NEXT: v_cvt_i32_f32_e32 v0, v0
-; GFX1250-NEXT: v_perm_b32 v0, v0, v2, 0x5040100
-; GFX1250-NEXT: s_set_pc_i64 s[30:31]
+; GFX1250TRUE16-LABEL: v_fptosi_v3bf16_to_v3i16:
+; GFX1250TRUE16: ; %bb.0:
+; GFX1250TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250TRUE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250TRUE16-NEXT: v_and_b32_e32 v2, 0xffff0000, v0
+; GFX1250TRUE16-NEXT: v_dual_lshlrev_b32 v0, 16, v0 :: v_dual_lshlrev_b32 v1, 16, v1
+; GFX1250TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX1250TRUE16-NEXT: v_cvt_i32_f32_e32 v2, v2
+; GFX1250TRUE16-NEXT: v_cvt_i32_f32_e32 v0, v0
+; GFX1250TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
+; GFX1250TRUE16-NEXT: v_cvt_i32_f32_e32 v1, v1
+; GFX1250TRUE16-NEXT: v_mov_b16_e32 v0.h, v2.l
+; GFX1250TRUE16-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX1250FAKE16-LABEL: v_fptosi_v3bf16_to_v3i16:
+; GFX1250FAKE16: ; %bb.0:
+; GFX1250FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250FAKE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250FAKE16-NEXT: v_dual_lshlrev_b32 v2, 16, v0 :: v_dual_lshlrev_b32 v1, 16, v1
+; GFX1250FAKE16-NEXT: v_and_b32_e32 v0, 0xffff0000, v0
+; GFX1250FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_3)
+; GFX1250FAKE16-NEXT: v_cvt_i32_f32_e32 v2, v2
+; GFX1250FAKE16-NEXT: v_cvt_i32_f32_e32 v1, v1
+; GFX1250FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1250FAKE16-NEXT: v_cvt_i32_f32_e32 v0, v0
+; GFX1250FAKE16-NEXT: v_perm_b32 v0, v0, v2, 0x5040100
+; GFX1250FAKE16-NEXT: s_set_pc_i64 s[30:31]
%op = fptosi <3 x bfloat> %x to <3 x i16>
ret <3 x i16> %op
}
@@ -35198,23 +35865,41 @@ define <4 x i16> @v_fptosi_v4bf16_to_v4i16(<4 x bfloat> %x) {
; GFX11FAKE16-NEXT: v_perm_b32 v1, v1, v2, 0x5040100
; GFX11FAKE16-NEXT: s_setpc_b64 s[30:31]
;
-; GFX1250-LABEL: v_fptosi_v4bf16_to_v4i16:
-; GFX1250: ; %bb.0:
-; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-NEXT: s_wait_kmcnt 0x0
-; GFX1250-NEXT: v_dual_lshlrev_b32 v2, 16, v1 :: v_dual_lshlrev_b32 v3, 16, v0
-; GFX1250-NEXT: v_and_b32_e32 v0, 0xffff0000, v0
-; GFX1250-NEXT: v_and_b32_e32 v1, 0xffff0000, v1
-; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_4)
-; GFX1250-NEXT: v_cvt_i32_f32_e32 v2, v2
-; GFX1250-NEXT: v_cvt_i32_f32_e32 v3, v3
-; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
-; GFX1250-NEXT: v_cvt_i32_f32_e32 v0, v0
-; GFX1250-NEXT: v_cvt_i32_f32_e32 v1, v1
-; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX1250-NEXT: v_perm_b32 v0, v0, v3, 0x5040100
-; GFX1250-NEXT: v_perm_b32 v1, v1, v2, 0x5040100
-; GFX1250-NEXT: s_set_pc_i64 s[30:31]
+; GFX1250TRUE16-LABEL: v_fptosi_v4bf16_to_v4i16:
+; GFX1250TRUE16: ; %bb.0:
+; GFX1250TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250TRUE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250TRUE16-NEXT: v_and_b32_e32 v2, 0xffff0000, v0
+; GFX1250TRUE16-NEXT: v_and_b32_e32 v3, 0xffff0000, v1
+; GFX1250TRUE16-NEXT: v_dual_lshlrev_b32 v1, 16, v1 :: v_dual_lshlrev_b32 v0, 16, v0
+; GFX1250TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
+; GFX1250TRUE16-NEXT: v_cvt_i32_f32_e32 v2, v2
+; GFX1250TRUE16-NEXT: v_cvt_i32_f32_e32 v3, v3
+; GFX1250TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_4)
+; GFX1250TRUE16-NEXT: v_cvt_i32_f32_e32 v1, v1
+; GFX1250TRUE16-NEXT: v_cvt_i32_f32_e32 v0, v0
+; GFX1250TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
+; GFX1250TRUE16-NEXT: v_mov_b16_e32 v0.h, v2.l
+; GFX1250TRUE16-NEXT: v_mov_b16_e32 v1.h, v3.l
+; GFX1250TRUE16-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX1250FAKE16-LABEL: v_fptosi_v4bf16_to_v4i16:
+; GFX1250FAKE16: ; %bb.0:
+; GFX1250FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250FAKE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250FAKE16-NEXT: v_dual_lshlrev_b32 v2, 16, v1 :: v_dual_lshlrev_b32 v3, 16, v0
+; GFX1250FAKE16-NEXT: v_and_b32_e32 v0, 0xffff0000, v0
+; GFX1250FAKE16-NEXT: v_and_b32_e32 v1, 0xffff0000, v1
+; GFX1250FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_4)
+; GFX1250FAKE16-NEXT: v_cvt_i32_f32_e32 v2, v2
+; GFX1250FAKE16-NEXT: v_cvt_i32_f32_e32 v3, v3
+; GFX1250FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
+; GFX1250FAKE16-NEXT: v_cvt_i32_f32_e32 v0, v0
+; GFX1250FAKE16-NEXT: v_cvt_i32_f32_e32 v1, v1
+; GFX1250FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX1250FAKE16-NEXT: v_perm_b32 v0, v0, v3, 0x5040100
+; GFX1250FAKE16-NEXT: v_perm_b32 v1, v1, v2, 0x5040100
+; GFX1250FAKE16-NEXT: s_set_pc_i64 s[30:31]
%op = fptosi <4 x bfloat> %x to <4 x i16>
ret <4 x i16> %op
}
@@ -35274,14 +35959,24 @@ define i32 @v_fptosi_bf16_to_i32(bfloat %x) {
; GFX11FAKE16-NEXT: v_cvt_i32_f32_e32 v0, v0
; GFX11FAKE16-NEXT: s_setpc_b64 s[30:31]
;
-; GFX1250-LABEL: v_fptosi_bf16_to_i32:
-; GFX1250: ; %bb.0:
-; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-NEXT: s_wait_kmcnt 0x0
-; GFX1250-NEXT: v_lshlrev_b32_e32 v0, 16, v0
-; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX1250-NEXT: v_cvt_i32_f32_e32 v0, v0
-; GFX1250-NEXT: s_set_pc_i64 s[30:31]
+; GFX1250TRUE16-LABEL: v_fptosi_bf16_to_i32:
+; GFX1250TRUE16: ; %bb.0:
+; GFX1250TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250TRUE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250TRUE16-NEXT: v_mov_b16_e32 v1.l, 0
+; GFX1250TRUE16-NEXT: v_mov_b16_e32 v1.h, v0.l
+; GFX1250TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1250TRUE16-NEXT: v_cvt_i32_f32_e32 v0, v1
+; GFX1250TRUE16-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX1250FAKE16-LABEL: v_fptosi_bf16_to_i32:
+; GFX1250FAKE16: ; %bb.0:
+; GFX1250FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250FAKE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250FAKE16-NEXT: v_lshlrev_b32_e32 v0, 16, v0
+; GFX1250FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1250FAKE16-NEXT: v_cvt_i32_f32_e32 v0, v0
+; GFX1250FAKE16-NEXT: s_set_pc_i64 s[30:31]
%op = fptosi bfloat %x to i32
ret i32 %op
}
@@ -35729,26 +36424,48 @@ define i64 @v_fptosi_bf16_to_i64(bfloat %x) {
; GFX11FAKE16-NEXT: v_sub_co_ci_u32_e64 v1, null, v1, v3, vcc_lo
; GFX11FAKE16-NEXT: s_setpc_b64 s[30:31]
;
-; GFX1250-LABEL: v_fptosi_bf16_to_i64:
-; GFX1250: ; %bb.0:
-; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-NEXT: s_wait_kmcnt 0x0
-; GFX1250-NEXT: v_lshlrev_b32_e32 v0, 16, v0
-; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1250-NEXT: v_trunc_f32_e32 v0, v0
-; GFX1250-NEXT: v_mul_f32_e64 v1, 0x2f800000, |v0|
-; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1250-NEXT: v_floor_f32_e32 v1, v1
-; GFX1250-NEXT: v_fma_f32 v2, 0xcf800000, v1, |v0|
-; GFX1250-NEXT: v_ashrrev_i32_e32 v0, 31, v0
-; GFX1250-NEXT: v_cvt_u32_f32_e32 v3, v1
-; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX1250-NEXT: v_cvt_u32_f32_e32 v2, v2
-; GFX1250-NEXT: v_dual_mov_b32 v1, v0 :: v_dual_bitop2_b32 v3, v3, v0 bitop3:0x14
-; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1250-NEXT: v_xor_b32_e32 v2, v2, v0
-; GFX1250-NEXT: v_sub_nc_u64_e32 v[0:1], v[2:3], v[0:1]
-; GFX1250-NEXT: s_set_pc_i64 s[30:31]
+; GFX1250TRUE16-LABEL: v_fptosi_bf16_to_i64:
+; GFX1250TRUE16: ; %bb.0:
+; GFX1250TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250TRUE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250TRUE16-NEXT: v_mov_b16_e32 v1.l, 0
+; GFX1250TRUE16-NEXT: v_mov_b16_e32 v1.h, v0.l
+; GFX1250TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1250TRUE16-NEXT: v_trunc_f32_e32 v0, v1
+; GFX1250TRUE16-NEXT: v_mul_f32_e64 v1, 0x2f800000, |v0|
+; GFX1250TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1250TRUE16-NEXT: v_floor_f32_e32 v1, v1
+; GFX1250TRUE16-NEXT: v_fma_f32 v2, 0xcf800000, v1, |v0|
+; GFX1250TRUE16-NEXT: v_ashrrev_i32_e32 v0, 31, v0
+; GFX1250TRUE16-NEXT: v_cvt_u32_f32_e32 v3, v1
+; GFX1250TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX1250TRUE16-NEXT: v_cvt_u32_f32_e32 v2, v2
+; GFX1250TRUE16-NEXT: v_dual_mov_b32 v1, v0 :: v_dual_bitop2_b32 v3, v3, v0 bitop3:0x14
+; GFX1250TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1250TRUE16-NEXT: v_xor_b32_e32 v2, v2, v0
+; GFX1250TRUE16-NEXT: v_sub_nc_u64_e32 v[0:1], v[2:3], v[0:1]
+; GFX1250TRUE16-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX1250FAKE16-LABEL: v_fptosi_bf16_to_i64:
+; GFX1250FAKE16: ; %bb.0:
+; GFX1250FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250FAKE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250FAKE16-NEXT: v_lshlrev_b32_e32 v0, 16, v0
+; GFX1250FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1250FAKE16-NEXT: v_trunc_f32_e32 v0, v0
+; GFX1250FAKE16-NEXT: v_mul_f32_e64 v1, 0x2f800000, |v0|
+; GFX1250FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1250FAKE16-NEXT: v_floor_f32_e32 v1, v1
+; GFX1250FAKE16-NEXT: v_fma_f32 v2, 0xcf800000, v1, |v0|
+; GFX1250FAKE16-NEXT: v_ashrrev_i32_e32 v0, 31, v0
+; GFX1250FAKE16-NEXT: v_cvt_u32_f32_e32 v3, v1
+; GFX1250FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX1250FAKE16-NEXT: v_cvt_u32_f32_e32 v2, v2
+; GFX1250FAKE16-NEXT: v_dual_mov_b32 v1, v0 :: v_dual_bitop2_b32 v3, v3, v0 bitop3:0x14
+; GFX1250FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1250FAKE16-NEXT: v_xor_b32_e32 v2, v2, v0
+; GFX1250FAKE16-NEXT: v_sub_nc_u64_e32 v[0:1], v[2:3], v[0:1]
+; GFX1250FAKE16-NEXT: s_set_pc_i64 s[30:31]
%op = fptosi bfloat %x to i64
ret i64 %op
}
@@ -37293,22 +38010,39 @@ define <3 x bfloat> @v_sitofp_v3i16_to_v3bf16(<3 x i16> %x) {
; GFX11FAKE16-NEXT: v_alignbit_b32 v1, s0, v1, 16
; GFX11FAKE16-NEXT: s_setpc_b64 s[30:31]
;
-; GFX1250-LABEL: v_sitofp_v3i16_to_v3bf16:
-; GFX1250: ; %bb.0:
-; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-NEXT: s_wait_kmcnt 0x0
-; GFX1250-NEXT: v_ashrrev_i32_e32 v2, 16, v0
-; GFX1250-NEXT: v_bfe_i32 v0, v0, 0, 16
-; GFX1250-NEXT: v_bfe_i32 v1, v1, 0, 16
-; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
-; GFX1250-NEXT: v_cvt_f32_i32_e32 v2, v2
-; GFX1250-NEXT: v_cvt_f32_i32_e32 v0, v0
-; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX1250-NEXT: v_cvt_f32_i32_e32 v1, v1
-; GFX1250-NEXT: v_cvt_pk_bf16_f32 v0, v0, v2
-; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_2)
-; GFX1250-NEXT: v_cvt_pk_bf16_f32 v1, v1, s0
-; GFX1250-NEXT: s_set_pc_i64 s[30:31]
+; GFX1250TRUE16-LABEL: v_sitofp_v3i16_to_v3bf16:
+; GFX1250TRUE16: ; %bb.0:
+; GFX1250TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250TRUE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250TRUE16-NEXT: v_bfe_i32 v1, v1, 0, 16
+; GFX1250TRUE16-NEXT: v_ashrrev_i32_e32 v2, 16, v0
+; GFX1250TRUE16-NEXT: v_bfe_i32 v0, v0, 0, 16
+; GFX1250TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
+; GFX1250TRUE16-NEXT: v_cvt_f32_i32_e32 v1, v1
+; GFX1250TRUE16-NEXT: v_cvt_f32_i32_e32 v2, v2
+; GFX1250TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
+; GFX1250TRUE16-NEXT: v_cvt_f32_i32_e32 v0, v0
+; GFX1250TRUE16-NEXT: v_cvt_pk_bf16_f32 v1, v1, s0
+; GFX1250TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2)
+; GFX1250TRUE16-NEXT: v_cvt_pk_bf16_f32 v0, v0, v2
+; GFX1250TRUE16-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX1250FAKE16-LABEL: v_sitofp_v3i16_to_v3bf16:
+; GFX1250FAKE16: ; %bb.0:
+; GFX1250FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250FAKE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250FAKE16-NEXT: v_ashrrev_i32_e32 v2, 16, v0
+; GFX1250FAKE16-NEXT: v_bfe_i32 v0, v0, 0, 16
+; GFX1250FAKE16-NEXT: v_bfe_i32 v1, v1, 0, 16
+; GFX1250FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
+; GFX1250FAKE16-NEXT: v_cvt_f32_i32_e32 v2, v2
+; GFX1250FAKE16-NEXT: v_cvt_f32_i32_e32 v0, v0
+; GFX1250FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX1250FAKE16-NEXT: v_cvt_f32_i32_e32 v1, v1
+; GFX1250FAKE16-NEXT: v_cvt_pk_bf16_f32 v0, v0, v2
+; GFX1250FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2)
+; GFX1250FAKE16-NEXT: v_cvt_pk_bf16_f32 v1, v1, s0
+; GFX1250FAKE16-NEXT: s_set_pc_i64 s[30:31]
%op = sitofp <3 x i16> %x to <3 x bfloat>
ret <3 x bfloat> %op
}
@@ -37972,17 +38706,31 @@ define <3 x bfloat> @v_sitofp_v3i32_to_v3bf16(<3 x i32> %x) {
; GFX11FAKE16-NEXT: v_alignbit_b32 v1, s0, v2, 16
; GFX11FAKE16-NEXT: s_setpc_b64 s[30:31]
;
-; GFX1250-LABEL: v_sitofp_v3i32_to_v3bf16:
-; GFX1250: ; %bb.0:
-; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-NEXT: s_wait_kmcnt 0x0
-; GFX1250-NEXT: v_cvt_f32_i32_e32 v1, v1
-; GFX1250-NEXT: v_cvt_f32_i32_e32 v0, v0
-; GFX1250-NEXT: v_cvt_f32_i32_e32 v2, v2
-; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX1250-NEXT: v_cvt_pk_bf16_f32 v0, v0, v1
-; GFX1250-NEXT: v_cvt_pk_bf16_f32 v1, v2, s0
-; GFX1250-NEXT: s_set_pc_i64 s[30:31]
+; GFX1250TRUE16-LABEL: v_sitofp_v3i32_to_v3bf16:
+; GFX1250TRUE16: ; %bb.0:
+; GFX1250TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250TRUE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250TRUE16-NEXT: v_cvt_f32_i32_e32 v2, v2
+; GFX1250TRUE16-NEXT: v_cvt_f32_i32_e32 v1, v1
+; GFX1250TRUE16-NEXT: v_cvt_f32_i32_e32 v0, v0
+; GFX1250TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX1250TRUE16-NEXT: v_cvt_pk_bf16_f32 v2, v2, s0
+; GFX1250TRUE16-NEXT: v_cvt_pk_bf16_f32 v0, v0, v1
+; GFX1250TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2)
+; GFX1250TRUE16-NEXT: v_mov_b16_e32 v1.l, v2.l
+; GFX1250TRUE16-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX1250FAKE16-LABEL: v_sitofp_v3i32_to_v3bf16:
+; GFX1250FAKE16: ; %bb.0:
+; GFX1250FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250FAKE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250FAKE16-NEXT: v_cvt_f32_i32_e32 v1, v1
+; GFX1250FAKE16-NEXT: v_cvt_f32_i32_e32 v0, v0
+; GFX1250FAKE16-NEXT: v_cvt_f32_i32_e32 v2, v2
+; GFX1250FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX1250FAKE16-NEXT: v_cvt_pk_bf16_f32 v0, v0, v1
+; GFX1250FAKE16-NEXT: v_cvt_pk_bf16_f32 v1, v2, s0
+; GFX1250FAKE16-NEXT: s_set_pc_i64 s[30:31]
%op = sitofp <3 x i32> %x to <3 x bfloat>
ret <3 x bfloat> %op
}
@@ -39232,52 +39980,101 @@ define <3 x bfloat> @v_sitofp_v3i64_to_v3bf16(<3 x i64> %x) {
; GFX11FAKE16-NEXT: v_alignbit_b32 v1, s0, v1, 16
; GFX11FAKE16-NEXT: s_setpc_b64 s[30:31]
;
-; GFX1250-LABEL: v_sitofp_v3i64_to_v3bf16:
-; GFX1250: ; %bb.0:
-; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-NEXT: s_wait_kmcnt 0x0
-; GFX1250-NEXT: v_xor_b32_e32 v8, v4, v5
-; GFX1250-NEXT: v_xor_b32_e32 v6, v2, v3
-; GFX1250-NEXT: v_cls_i32_e32 v10, v3
-; GFX1250-NEXT: v_cls_i32_e32 v9, v5
-; GFX1250-NEXT: v_cls_i32_e32 v11, v1
-; GFX1250-NEXT: v_dual_ashrrev_i32 v8, 31, v8 :: v_dual_bitop2_b32 v7, v0, v1 bitop3:0x14
-; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1250-NEXT: v_dual_ashrrev_i32 v6, 31, v6 :: v_dual_ashrrev_i32 v7, 31, v7
-; GFX1250-NEXT: v_dual_add_nc_u32 v6, 32, v6 :: v_dual_add_nc_u32 v7, 32, v7
-; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX1250-NEXT: v_add_min_u32_e64 v6, v10, -1, v6
-; GFX1250-NEXT: v_add_min_u32_e64 v7, v11, -1, v7
-; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX1250-NEXT: v_lshlrev_b64_e32 v[2:3], v6, v[2:3]
-; GFX1250-NEXT: v_lshlrev_b64_e32 v[0:1], v7, v[0:1]
-; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_3)
-; GFX1250-NEXT: v_min_u32_e32 v2, 1, v2
-; GFX1250-NEXT: v_add_nc_u32_e32 v8, 32, v8
-; GFX1250-NEXT: v_min_u32_e32 v0, 1, v0
-; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
-; GFX1250-NEXT: v_or_b32_e32 v2, v3, v2
-; GFX1250-NEXT: v_add_min_u32_e64 v8, v9, -1, v8
-; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
-; GFX1250-NEXT: v_dual_sub_nc_u32 v3, 32, v6 :: v_dual_bitop2_b32 v0, v1, v0 bitop3:0x54
-; GFX1250-NEXT: v_cvt_f32_i32_e32 v2, v2
-; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_3)
-; GFX1250-NEXT: v_lshlrev_b64_e32 v[4:5], v8, v[4:5]
-; GFX1250-NEXT: v_sub_nc_u32_e32 v8, 32, v8
-; GFX1250-NEXT: v_ldexp_f32 v2, v2, v3
-; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1250-NEXT: v_min_u32_e32 v4, 1, v4
-; GFX1250-NEXT: v_dual_sub_nc_u32 v4, 32, v7 :: v_dual_bitop2_b32 v1, v5, v4 bitop3:0x54
-; GFX1250-NEXT: v_cvt_f32_i32_e32 v0, v0
-; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX1250-NEXT: v_cvt_f32_i32_e32 v1, v1
-; GFX1250-NEXT: v_ldexp_f32 v0, v0, v4
-; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX1250-NEXT: v_ldexp_f32 v1, v1, v8
-; GFX1250-NEXT: v_cvt_pk_bf16_f32 v0, v0, v2
-; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_2)
-; GFX1250-NEXT: v_cvt_pk_bf16_f32 v1, v1, s0
-; GFX1250-NEXT: s_set_pc_i64 s[30:31]
+; GFX1250TRUE16-LABEL: v_sitofp_v3i64_to_v3bf16:
+; GFX1250TRUE16: ; %bb.0:
+; GFX1250TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250TRUE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250TRUE16-NEXT: v_xor_b32_e32 v7, v2, v3
+; GFX1250TRUE16-NEXT: v_xor_b32_e32 v6, v4, v5
+; GFX1250TRUE16-NEXT: v_cls_i32_e32 v10, v3
+; GFX1250TRUE16-NEXT: v_cls_i32_e32 v9, v5
+; GFX1250TRUE16-NEXT: v_cls_i32_e32 v11, v1
+; GFX1250TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX1250TRUE16-NEXT: v_dual_ashrrev_i32 v7, 31, v7 :: v_dual_ashrrev_i32 v6, 31, v6
+; GFX1250TRUE16-NEXT: v_xor_b32_e32 v8, v0, v1
+; GFX1250TRUE16-NEXT: v_dual_add_nc_u32 v7, 32, v7 :: v_dual_add_nc_u32 v6, 32, v6
+; GFX1250TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX1250TRUE16-NEXT: v_ashrrev_i32_e32 v8, 31, v8
+; GFX1250TRUE16-NEXT: v_add_min_u32_e64 v7, v10, -1, v7
+; GFX1250TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX1250TRUE16-NEXT: v_add_min_u32_e64 v6, v9, -1, v6
+; GFX1250TRUE16-NEXT: v_lshlrev_b64_e32 v[2:3], v7, v[2:3]
+; GFX1250TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX1250TRUE16-NEXT: v_lshlrev_b64_e32 v[4:5], v6, v[4:5]
+; GFX1250TRUE16-NEXT: v_min_u32_e32 v2, 1, v2
+; GFX1250TRUE16-NEXT: v_add_nc_u32_e32 v8, 32, v8
+; GFX1250TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
+; GFX1250TRUE16-NEXT: v_min_u32_e32 v4, 1, v4
+; GFX1250TRUE16-NEXT: v_or_b32_e32 v2, v3, v2
+; GFX1250TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
+; GFX1250TRUE16-NEXT: v_add_min_u32_e64 v8, v11, -1, v8
+; GFX1250TRUE16-NEXT: v_dual_sub_nc_u32 v3, 32, v6 :: v_dual_bitop2_b32 v4, v5, v4 bitop3:0x54
+; GFX1250TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
+; GFX1250TRUE16-NEXT: v_cvt_f32_i32_e32 v2, v2
+; GFX1250TRUE16-NEXT: v_lshlrev_b64_e32 v[0:1], v8, v[0:1]
+; GFX1250TRUE16-NEXT: v_sub_nc_u32_e32 v5, 32, v8
+; GFX1250TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1250TRUE16-NEXT: v_min_u32_e32 v0, 1, v0
+; GFX1250TRUE16-NEXT: v_or_b32_e32 v0, v1, v0
+; GFX1250TRUE16-NEXT: v_cvt_f32_i32_e32 v1, v4
+; GFX1250TRUE16-NEXT: v_sub_nc_u32_e32 v4, 32, v7
+; GFX1250TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
+; GFX1250TRUE16-NEXT: v_cvt_f32_i32_e32 v0, v0
+; GFX1250TRUE16-NEXT: v_ldexp_f32 v1, v1, v3
+; GFX1250TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
+; GFX1250TRUE16-NEXT: v_ldexp_f32 v2, v2, v4
+; GFX1250TRUE16-NEXT: v_ldexp_f32 v0, v0, v5
+; GFX1250TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX1250TRUE16-NEXT: v_cvt_pk_bf16_f32 v1, v1, s0
+; GFX1250TRUE16-NEXT: v_cvt_pk_bf16_f32 v0, v0, v2
+; GFX1250TRUE16-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX1250FAKE16-LABEL: v_sitofp_v3i64_to_v3bf16:
+; GFX1250FAKE16: ; %bb.0:
+; GFX1250FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250FAKE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250FAKE16-NEXT: v_xor_b32_e32 v8, v4, v5
+; GFX1250FAKE16-NEXT: v_xor_b32_e32 v6, v2, v3
+; GFX1250FAKE16-NEXT: v_cls_i32_e32 v10, v3
+; GFX1250FAKE16-NEXT: v_cls_i32_e32 v9, v5
+; GFX1250FAKE16-NEXT: v_cls_i32_e32 v11, v1
+; GFX1250FAKE16-NEXT: v_dual_ashrrev_i32 v8, 31, v8 :: v_dual_bitop2_b32 v7, v0, v1 bitop3:0x14
+; GFX1250FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1250FAKE16-NEXT: v_dual_ashrrev_i32 v6, 31, v6 :: v_dual_ashrrev_i32 v7, 31, v7
+; GFX1250FAKE16-NEXT: v_dual_add_nc_u32 v6, 32, v6 :: v_dual_add_nc_u32 v7, 32, v7
+; GFX1250FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX1250FAKE16-NEXT: v_add_min_u32_e64 v6, v10, -1, v6
+; GFX1250FAKE16-NEXT: v_add_min_u32_e64 v7, v11, -1, v7
+; GFX1250FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX1250FAKE16-NEXT: v_lshlrev_b64_e32 v[2:3], v6, v[2:3]
+; GFX1250FAKE16-NEXT: v_lshlrev_b64_e32 v[0:1], v7, v[0:1]
+; GFX1250FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_3)
+; GFX1250FAKE16-NEXT: v_min_u32_e32 v2, 1, v2
+; GFX1250FAKE16-NEXT: v_add_nc_u32_e32 v8, 32, v8
+; GFX1250FAKE16-NEXT: v_min_u32_e32 v0, 1, v0
+; GFX1250FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
+; GFX1250FAKE16-NEXT: v_or_b32_e32 v2, v3, v2
+; GFX1250FAKE16-NEXT: v_add_min_u32_e64 v8, v9, -1, v8
+; GFX1250FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
+; GFX1250FAKE16-NEXT: v_dual_sub_nc_u32 v3, 32, v6 :: v_dual_bitop2_b32 v0, v1, v0 bitop3:0x54
+; GFX1250FAKE16-NEXT: v_cvt_f32_i32_e32 v2, v2
+; GFX1250FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_3)
+; GFX1250FAKE16-NEXT: v_lshlrev_b64_e32 v[4:5], v8, v[4:5]
+; GFX1250FAKE16-NEXT: v_sub_nc_u32_e32 v8, 32, v8
+; GFX1250FAKE16-NEXT: v_ldexp_f32 v2, v2, v3
+; GFX1250FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1250FAKE16-NEXT: v_min_u32_e32 v4, 1, v4
+; GFX1250FAKE16-NEXT: v_dual_sub_nc_u32 v4, 32, v7 :: v_dual_bitop2_b32 v1, v5, v4 bitop3:0x54
+; GFX1250FAKE16-NEXT: v_cvt_f32_i32_e32 v0, v0
+; GFX1250FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX1250FAKE16-NEXT: v_cvt_f32_i32_e32 v1, v1
+; GFX1250FAKE16-NEXT: v_ldexp_f32 v0, v0, v4
+; GFX1250FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX1250FAKE16-NEXT: v_ldexp_f32 v1, v1, v8
+; GFX1250FAKE16-NEXT: v_cvt_pk_bf16_f32 v0, v0, v2
+; GFX1250FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2)
+; GFX1250FAKE16-NEXT: v_cvt_pk_bf16_f32 v1, v1, s0
+; GFX1250FAKE16-NEXT: s_set_pc_i64 s[30:31]
%op = sitofp <3 x i64> %x to <3 x bfloat>
ret <3 x bfloat> %op
}
@@ -40015,15 +40812,26 @@ define bfloat @v_uitofp_i16_to_bf16(i16 %x) {
; GFX11FAKE16-NEXT: v_lshrrev_b32_e32 v0, 16, v0
; GFX11FAKE16-NEXT: s_setpc_b64 s[30:31]
;
-; GFX1250-LABEL: v_uitofp_i16_to_bf16:
-; GFX1250: ; %bb.0:
-; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-NEXT: s_wait_kmcnt 0x0
-; GFX1250-NEXT: v_and_b32_e32 v0, 0xffff, v0
-; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1250-NEXT: v_cvt_f32_u32_e32 v0, v0
-; GFX1250-NEXT: v_cvt_pk_bf16_f32 v0, v0, s0
-; GFX1250-NEXT: s_set_pc_i64 s[30:31]
+; GFX1250TRUE16-LABEL: v_uitofp_i16_to_bf16:
+; GFX1250TRUE16: ; %bb.0:
+; GFX1250TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250TRUE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250TRUE16-NEXT: v_mov_b16_e32 v1.h, 0
+; GFX1250TRUE16-NEXT: v_mov_b16_e32 v1.l, v0.l
+; GFX1250TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1250TRUE16-NEXT: v_cvt_f32_u32_e32 v0, v1
+; GFX1250TRUE16-NEXT: v_cvt_pk_bf16_f32 v0, v0, s0
+; GFX1250TRUE16-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX1250FAKE16-LABEL: v_uitofp_i16_to_bf16:
+; GFX1250FAKE16: ; %bb.0:
+; GFX1250FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250FAKE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250FAKE16-NEXT: v_and_b32_e32 v0, 0xffff, v0
+; GFX1250FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1250FAKE16-NEXT: v_cvt_f32_u32_e32 v0, v0
+; GFX1250FAKE16-NEXT: v_cvt_pk_bf16_f32 v0, v0, s0
+; GFX1250FAKE16-NEXT: s_set_pc_i64 s[30:31]
%op = uitofp i16 %x to bfloat
ret bfloat %op
}
@@ -40167,18 +40975,32 @@ define <2 x bfloat> @v_uitofp_v2i16_to_v2bf16(<2 x i16> %x) {
; GFX11FAKE16-NEXT: v_perm_b32 v0, v0, v1, 0x7060302
; GFX11FAKE16-NEXT: s_setpc_b64 s[30:31]
;
-; GFX1250-LABEL: v_uitofp_v2i16_to_v2bf16:
-; GFX1250: ; %bb.0:
-; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-NEXT: s_wait_kmcnt 0x0
-; GFX1250-NEXT: v_lshrrev_b32_e32 v1, 16, v0
-; GFX1250-NEXT: v_and_b32_e32 v0, 0xffff, v0
-; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX1250-NEXT: v_cvt_f32_u32_e32 v1, v1
-; GFX1250-NEXT: v_cvt_f32_u32_e32 v0, v0
-; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX1250-NEXT: v_cvt_pk_bf16_f32 v0, v0, v1
-; GFX1250-NEXT: s_set_pc_i64 s[30:31]
+; GFX1250TRUE16-LABEL: v_uitofp_v2i16_to_v2bf16:
+; GFX1250TRUE16: ; %bb.0:
+; GFX1250TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250TRUE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250TRUE16-NEXT: v_mov_b16_e32 v1.h, 0
+; GFX1250TRUE16-NEXT: v_and_b32_e32 v2, 0xffff, v0
+; GFX1250TRUE16-NEXT: v_mov_b16_e32 v1.l, v0.h
+; GFX1250TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX1250TRUE16-NEXT: v_cvt_f32_u32_e32 v0, v2
+; GFX1250TRUE16-NEXT: v_cvt_f32_u32_e32 v1, v1
+; GFX1250TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1250TRUE16-NEXT: v_cvt_pk_bf16_f32 v0, v0, v1
+; GFX1250TRUE16-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX1250FAKE16-LABEL: v_uitofp_v2i16_to_v2bf16:
+; GFX1250FAKE16: ; %bb.0:
+; GFX1250FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250FAKE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250FAKE16-NEXT: v_lshrrev_b32_e32 v1, 16, v0
+; GFX1250FAKE16-NEXT: v_and_b32_e32 v0, 0xffff, v0
+; GFX1250FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX1250FAKE16-NEXT: v_cvt_f32_u32_e32 v1, v1
+; GFX1250FAKE16-NEXT: v_cvt_f32_u32_e32 v0, v0
+; GFX1250FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1250FAKE16-NEXT: v_cvt_pk_bf16_f32 v0, v0, v1
+; GFX1250FAKE16-NEXT: s_set_pc_i64 s[30:31]
%op = uitofp <2 x i16> %x to <2 x bfloat>
ret <2 x bfloat> %op
}
@@ -40373,22 +41195,41 @@ define <3 x bfloat> @v_uitofp_v3i16_to_v3bf16(<3 x i16> %x) {
; GFX11FAKE16-NEXT: v_alignbit_b32 v1, s0, v1, 16
; GFX11FAKE16-NEXT: s_setpc_b64 s[30:31]
;
-; GFX1250-LABEL: v_uitofp_v3i16_to_v3bf16:
-; GFX1250: ; %bb.0:
-; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-NEXT: s_wait_kmcnt 0x0
-; GFX1250-NEXT: v_lshrrev_b32_e32 v2, 16, v0
-; GFX1250-NEXT: v_and_b32_e32 v0, 0xffff, v0
-; GFX1250-NEXT: v_and_b32_e32 v1, 0xffff, v1
-; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
-; GFX1250-NEXT: v_cvt_f32_u32_e32 v2, v2
-; GFX1250-NEXT: v_cvt_f32_u32_e32 v0, v0
-; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX1250-NEXT: v_cvt_f32_u32_e32 v1, v1
-; GFX1250-NEXT: v_cvt_pk_bf16_f32 v0, v0, v2
-; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_2)
-; GFX1250-NEXT: v_cvt_pk_bf16_f32 v1, v1, s0
-; GFX1250-NEXT: s_set_pc_i64 s[30:31]
+; GFX1250TRUE16-LABEL: v_uitofp_v3i16_to_v3bf16:
+; GFX1250TRUE16: ; %bb.0:
+; GFX1250TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250TRUE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250TRUE16-NEXT: v_and_b32_e32 v1, 0xffff, v1
+; GFX1250TRUE16-NEXT: v_and_b32_e32 v2, 0xffff, v0
+; GFX1250TRUE16-NEXT: v_mov_b16_e32 v3.h, 0
+; GFX1250TRUE16-NEXT: v_mov_b16_e32 v3.l, v0.h
+; GFX1250TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
+; GFX1250TRUE16-NEXT: v_cvt_f32_u32_e32 v0, v1
+; GFX1250TRUE16-NEXT: v_cvt_f32_u32_e32 v1, v2
+; GFX1250TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
+; GFX1250TRUE16-NEXT: v_cvt_f32_u32_e32 v2, v3
+; GFX1250TRUE16-NEXT: v_cvt_pk_bf16_f32 v3, v0, s0
+; GFX1250TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX1250TRUE16-NEXT: v_cvt_pk_bf16_f32 v0, v1, v2
+; GFX1250TRUE16-NEXT: v_mov_b16_e32 v1.l, v3.l
+; GFX1250TRUE16-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX1250FAKE16-LABEL: v_uitofp_v3i16_to_v3bf16:
+; GFX1250FAKE16: ; %bb.0:
+; GFX1250FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250FAKE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250FAKE16-NEXT: v_lshrrev_b32_e32 v2, 16, v0
+; GFX1250FAKE16-NEXT: v_and_b32_e32 v0, 0xffff, v0
+; GFX1250FAKE16-NEXT: v_and_b32_e32 v1, 0xffff, v1
+; GFX1250FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
+; GFX1250FAKE16-NEXT: v_cvt_f32_u32_e32 v2, v2
+; GFX1250FAKE16-NEXT: v_cvt_f32_u32_e32 v0, v0
+; GFX1250FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX1250FAKE16-NEXT: v_cvt_f32_u32_e32 v1, v1
+; GFX1250FAKE16-NEXT: v_cvt_pk_bf16_f32 v0, v0, v2
+; GFX1250FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2)
+; GFX1250FAKE16-NEXT: v_cvt_pk_bf16_f32 v1, v1, s0
+; GFX1250FAKE16-NEXT: s_set_pc_i64 s[30:31]
%op = uitofp <3 x i16> %x to <3 x bfloat>
ret <3 x bfloat> %op
}
@@ -40626,23 +41467,43 @@ define <4 x bfloat> @v_uitofp_v4i16_to_v4bf16(<4 x i16> %x) {
; GFX11FAKE16-NEXT: v_perm_b32 v1, v1, v2, 0x7060302
; GFX11FAKE16-NEXT: s_setpc_b64 s[30:31]
;
-; GFX1250-LABEL: v_uitofp_v4i16_to_v4bf16:
-; GFX1250: ; %bb.0:
-; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-NEXT: s_wait_kmcnt 0x0
-; GFX1250-NEXT: v_dual_lshrrev_b32 v2, 16, v1 :: v_dual_lshrrev_b32 v3, 16, v0
-; GFX1250-NEXT: v_and_b32_e32 v0, 0xffff, v0
-; GFX1250-NEXT: v_and_b32_e32 v1, 0xffff, v1
-; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_4)
-; GFX1250-NEXT: v_cvt_f32_u32_e32 v2, v2
-; GFX1250-NEXT: v_cvt_f32_u32_e32 v3, v3
-; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
-; GFX1250-NEXT: v_cvt_f32_u32_e32 v0, v0
-; GFX1250-NEXT: v_cvt_f32_u32_e32 v1, v1
-; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX1250-NEXT: v_cvt_pk_bf16_f32 v0, v0, v3
-; GFX1250-NEXT: v_cvt_pk_bf16_f32 v1, v1, v2
-; GFX1250-NEXT: s_set_pc_i64 s[30:31]
+; GFX1250TRUE16-LABEL: v_uitofp_v4i16_to_v4bf16:
+; GFX1250TRUE16: ; %bb.0:
+; GFX1250TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250TRUE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250TRUE16-NEXT: v_mov_b16_e32 v2.h, 0
+; GFX1250TRUE16-NEXT: v_mov_b16_e32 v2.l, v1.h
+; GFX1250TRUE16-NEXT: v_and_b32_e32 v3, 0xffff, v0
+; GFX1250TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_4)
+; GFX1250TRUE16-NEXT: v_cvt_f32_u32_e32 v4, v2
+; GFX1250TRUE16-NEXT: v_mov_b16_e32 v2.l, v0.h
+; GFX1250TRUE16-NEXT: v_and_b32_e32 v0, 0xffff, v1
+; GFX1250TRUE16-NEXT: v_cvt_f32_u32_e32 v1, v3
+; GFX1250TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
+; GFX1250TRUE16-NEXT: v_cvt_f32_u32_e32 v2, v2
+; GFX1250TRUE16-NEXT: v_cvt_f32_u32_e32 v3, v0
+; GFX1250TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX1250TRUE16-NEXT: v_cvt_pk_bf16_f32 v0, v1, v2
+; GFX1250TRUE16-NEXT: v_cvt_pk_bf16_f32 v1, v3, v4
+; GFX1250TRUE16-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX1250FAKE16-LABEL: v_uitofp_v4i16_to_v4bf16:
+; GFX1250FAKE16: ; %bb.0:
+; GFX1250FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250FAKE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250FAKE16-NEXT: v_dual_lshrrev_b32 v2, 16, v1 :: v_dual_lshrrev_b32 v3, 16, v0
+; GFX1250FAKE16-NEXT: v_and_b32_e32 v0, 0xffff, v0
+; GFX1250FAKE16-NEXT: v_and_b32_e32 v1, 0xffff, v1
+; GFX1250FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_4)
+; GFX1250FAKE16-NEXT: v_cvt_f32_u32_e32 v2, v2
+; GFX1250FAKE16-NEXT: v_cvt_f32_u32_e32 v3, v3
+; GFX1250FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
+; GFX1250FAKE16-NEXT: v_cvt_f32_u32_e32 v0, v0
+; GFX1250FAKE16-NEXT: v_cvt_f32_u32_e32 v1, v1
+; GFX1250FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX1250FAKE16-NEXT: v_cvt_pk_bf16_f32 v0, v0, v3
+; GFX1250FAKE16-NEXT: v_cvt_pk_bf16_f32 v1, v1, v2
+; GFX1250FAKE16-NEXT: s_set_pc_i64 s[30:31]
%op = uitofp <4 x i16> %x to <4 x bfloat>
ret <4 x bfloat> %op
}
@@ -41058,17 +41919,31 @@ define <3 x bfloat> @v_uitofp_v3i32_to_v3bf16(<3 x i32> %x) {
; GFX11FAKE16-NEXT: v_alignbit_b32 v1, s0, v2, 16
; GFX11FAKE16-NEXT: s_setpc_b64 s[30:31]
;
-; GFX1250-LABEL: v_uitofp_v3i32_to_v3bf16:
-; GFX1250: ; %bb.0:
-; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-NEXT: s_wait_kmcnt 0x0
-; GFX1250-NEXT: v_cvt_f32_u32_e32 v1, v1
-; GFX1250-NEXT: v_cvt_f32_u32_e32 v0, v0
-; GFX1250-NEXT: v_cvt_f32_u32_e32 v2, v2
-; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX1250-NEXT: v_cvt_pk_bf16_f32 v0, v0, v1
-; GFX1250-NEXT: v_cvt_pk_bf16_f32 v1, v2, s0
-; GFX1250-NEXT: s_set_pc_i64 s[30:31]
+; GFX1250TRUE16-LABEL: v_uitofp_v3i32_to_v3bf16:
+; GFX1250TRUE16: ; %bb.0:
+; GFX1250TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250TRUE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250TRUE16-NEXT: v_cvt_f32_u32_e32 v2, v2
+; GFX1250TRUE16-NEXT: v_cvt_f32_u32_e32 v1, v1
+; GFX1250TRUE16-NEXT: v_cvt_f32_u32_e32 v0, v0
+; GFX1250TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX1250TRUE16-NEXT: v_cvt_pk_bf16_f32 v2, v2, s0
+; GFX1250TRUE16-NEXT: v_cvt_pk_bf16_f32 v0, v0, v1
+; GFX1250TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2)
+; GFX1250TRUE16-NEXT: v_mov_b16_e32 v1.l, v2.l
+; GFX1250TRUE16-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX1250FAKE16-LABEL: v_uitofp_v3i32_to_v3bf16:
+; GFX1250FAKE16: ; %bb.0:
+; GFX1250FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250FAKE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250FAKE16-NEXT: v_cvt_f32_u32_e32 v1, v1
+; GFX1250FAKE16-NEXT: v_cvt_f32_u32_e32 v0, v0
+; GFX1250FAKE16-NEXT: v_cvt_f32_u32_e32 v2, v2
+; GFX1250FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX1250FAKE16-NEXT: v_cvt_pk_bf16_f32 v0, v0, v1
+; GFX1250FAKE16-NEXT: v_cvt_pk_bf16_f32 v1, v2, s0
+; GFX1250FAKE16-NEXT: s_set_pc_i64 s[30:31]
%op = uitofp <3 x i32> %x to <3 x bfloat>
ret <3 x bfloat> %op
}
@@ -42105,44 +42980,84 @@ define <3 x bfloat> @v_uitofp_v3i64_to_v3bf16(<3 x i64> %x) {
; GFX11FAKE16-NEXT: v_alignbit_b32 v1, s0, v1, 16
; GFX11FAKE16-NEXT: s_setpc_b64 s[30:31]
;
-; GFX1250-LABEL: v_uitofp_v3i64_to_v3bf16:
-; GFX1250: ; %bb.0:
-; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-NEXT: s_wait_kmcnt 0x0
-; GFX1250-NEXT: v_clz_i32_u32_e32 v6, v3
-; GFX1250-NEXT: v_clz_i32_u32_e32 v7, v1
-; GFX1250-NEXT: v_clz_i32_u32_e32 v8, v5
-; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
-; GFX1250-NEXT: v_min_u32_e32 v6, 32, v6
-; GFX1250-NEXT: v_min_u32_e32 v7, 32, v7
-; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
-; GFX1250-NEXT: v_min_u32_e32 v8, 32, v8
-; GFX1250-NEXT: v_lshlrev_b64_e32 v[2:3], v6, v[2:3]
-; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
-; GFX1250-NEXT: v_lshlrev_b64_e32 v[0:1], v7, v[0:1]
-; GFX1250-NEXT: v_lshlrev_b64_e32 v[4:5], v8, v[4:5]
-; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
-; GFX1250-NEXT: v_min_u32_e32 v2, 1, v2
-; GFX1250-NEXT: v_min_u32_e32 v0, 1, v0
-; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
-; GFX1250-NEXT: v_min_u32_e32 v4, 1, v4
-; GFX1250-NEXT: v_dual_sub_nc_u32 v8, 32, v8 :: v_dual_bitop2_b32 v2, v3, v2 bitop3:0x54
-; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
-; GFX1250-NEXT: v_dual_sub_nc_u32 v3, 32, v6 :: v_dual_bitop2_b32 v0, v1, v0 bitop3:0x54
-; GFX1250-NEXT: v_dual_sub_nc_u32 v4, 32, v7 :: v_dual_bitop2_b32 v1, v5, v4 bitop3:0x54
-; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
-; GFX1250-NEXT: v_cvt_f32_u32_e32 v2, v2
-; GFX1250-NEXT: v_cvt_f32_u32_e32 v0, v0
-; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
-; GFX1250-NEXT: v_cvt_f32_u32_e32 v1, v1
-; GFX1250-NEXT: v_ldexp_f32 v2, v2, v3
-; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
-; GFX1250-NEXT: v_ldexp_f32 v0, v0, v4
-; GFX1250-NEXT: v_ldexp_f32 v1, v1, v8
-; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX1250-NEXT: v_cvt_pk_bf16_f32 v0, v0, v2
-; GFX1250-NEXT: v_cvt_pk_bf16_f32 v1, v1, s0
-; GFX1250-NEXT: s_set_pc_i64 s[30:31]
+; GFX1250TRUE16-LABEL: v_uitofp_v3i64_to_v3bf16:
+; GFX1250TRUE16: ; %bb.0:
+; GFX1250TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250TRUE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250TRUE16-NEXT: v_clz_i32_u32_e32 v6, v5
+; GFX1250TRUE16-NEXT: v_clz_i32_u32_e32 v7, v3
+; GFX1250TRUE16-NEXT: v_clz_i32_u32_e32 v8, v1
+; GFX1250TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
+; GFX1250TRUE16-NEXT: v_min_u32_e32 v6, 32, v6
+; GFX1250TRUE16-NEXT: v_min_u32_e32 v7, 32, v7
+; GFX1250TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
+; GFX1250TRUE16-NEXT: v_min_u32_e32 v8, 32, v8
+; GFX1250TRUE16-NEXT: v_lshlrev_b64_e32 v[4:5], v6, v[4:5]
+; GFX1250TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
+; GFX1250TRUE16-NEXT: v_lshlrev_b64_e32 v[2:3], v7, v[2:3]
+; GFX1250TRUE16-NEXT: v_lshlrev_b64_e32 v[0:1], v8, v[0:1]
+; GFX1250TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
+; GFX1250TRUE16-NEXT: v_min_u32_e32 v4, 1, v4
+; GFX1250TRUE16-NEXT: v_min_u32_e32 v2, 1, v2
+; GFX1250TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
+; GFX1250TRUE16-NEXT: v_min_u32_e32 v0, 1, v0
+; GFX1250TRUE16-NEXT: v_or_b32_e32 v4, v5, v4
+; GFX1250TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
+; GFX1250TRUE16-NEXT: v_or_b32_e32 v2, v3, v2
+; GFX1250TRUE16-NEXT: v_dual_sub_nc_u32 v3, 32, v6 :: v_dual_bitop2_b32 v0, v1, v0 bitop3:0x54
+; GFX1250TRUE16-NEXT: v_sub_nc_u32_e32 v5, 32, v8
+; GFX1250TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_3) | instid1(VALU_DEP_4)
+; GFX1250TRUE16-NEXT: v_cvt_f32_u32_e32 v1, v4
+; GFX1250TRUE16-NEXT: v_sub_nc_u32_e32 v4, 32, v7
+; GFX1250TRUE16-NEXT: v_cvt_f32_u32_e32 v2, v2
+; GFX1250TRUE16-NEXT: v_cvt_f32_u32_e32 v0, v0
+; GFX1250TRUE16-NEXT: v_ldexp_f32 v1, v1, v3
+; GFX1250TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
+; GFX1250TRUE16-NEXT: v_ldexp_f32 v2, v2, v4
+; GFX1250TRUE16-NEXT: v_ldexp_f32 v0, v0, v5
+; GFX1250TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX1250TRUE16-NEXT: v_cvt_pk_bf16_f32 v1, v1, s0
+; GFX1250TRUE16-NEXT: v_cvt_pk_bf16_f32 v0, v0, v2
+; GFX1250TRUE16-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX1250FAKE16-LABEL: v_uitofp_v3i64_to_v3bf16:
+; GFX1250FAKE16: ; %bb.0:
+; GFX1250FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250FAKE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250FAKE16-NEXT: v_clz_i32_u32_e32 v6, v3
+; GFX1250FAKE16-NEXT: v_clz_i32_u32_e32 v7, v1
+; GFX1250FAKE16-NEXT: v_clz_i32_u32_e32 v8, v5
+; GFX1250FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
+; GFX1250FAKE16-NEXT: v_min_u32_e32 v6, 32, v6
+; GFX1250FAKE16-NEXT: v_min_u32_e32 v7, 32, v7
+; GFX1250FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
+; GFX1250FAKE16-NEXT: v_min_u32_e32 v8, 32, v8
+; GFX1250FAKE16-NEXT: v_lshlrev_b64_e32 v[2:3], v6, v[2:3]
+; GFX1250FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
+; GFX1250FAKE16-NEXT: v_lshlrev_b64_e32 v[0:1], v7, v[0:1]
+; GFX1250FAKE16-NEXT: v_lshlrev_b64_e32 v[4:5], v8, v[4:5]
+; GFX1250FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
+; GFX1250FAKE16-NEXT: v_min_u32_e32 v2, 1, v2
+; GFX1250FAKE16-NEXT: v_min_u32_e32 v0, 1, v0
+; GFX1250FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
+; GFX1250FAKE16-NEXT: v_min_u32_e32 v4, 1, v4
+; GFX1250FAKE16-NEXT: v_dual_sub_nc_u32 v8, 32, v8 :: v_dual_bitop2_b32 v2, v3, v2 bitop3:0x54
+; GFX1250FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
+; GFX1250FAKE16-NEXT: v_dual_sub_nc_u32 v3, 32, v6 :: v_dual_bitop2_b32 v0, v1, v0 bitop3:0x54
+; GFX1250FAKE16-NEXT: v_dual_sub_nc_u32 v4, 32, v7 :: v_dual_bitop2_b32 v1, v5, v4 bitop3:0x54
+; GFX1250FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
+; GFX1250FAKE16-NEXT: v_cvt_f32_u32_e32 v2, v2
+; GFX1250FAKE16-NEXT: v_cvt_f32_u32_e32 v0, v0
+; GFX1250FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
+; GFX1250FAKE16-NEXT: v_cvt_f32_u32_e32 v1, v1
+; GFX1250FAKE16-NEXT: v_ldexp_f32 v2, v2, v3
+; GFX1250FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
+; GFX1250FAKE16-NEXT: v_ldexp_f32 v0, v0, v4
+; GFX1250FAKE16-NEXT: v_ldexp_f32 v1, v1, v8
+; GFX1250FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX1250FAKE16-NEXT: v_cvt_pk_bf16_f32 v0, v0, v2
+; GFX1250FAKE16-NEXT: v_cvt_pk_bf16_f32 v1, v1, s0
+; GFX1250FAKE16-NEXT: s_set_pc_i64 s[30:31]
%op = uitofp <3 x i64> %x to <3 x bfloat>
ret <3 x bfloat> %op
}
@@ -42717,15 +43632,25 @@ define bfloat @v_select_bf16(i1 %cond, bfloat %a, bfloat %b) {
; GFX11FAKE16-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc_lo
; GFX11FAKE16-NEXT: s_setpc_b64 s[30:31]
;
-; GFX1250-LABEL: v_select_bf16:
-; GFX1250: ; %bb.0:
-; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-NEXT: s_wait_kmcnt 0x0
-; GFX1250-NEXT: v_and_b32_e32 v0, 1, v0
-; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX1250-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
-; GFX1250-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc_lo
-; GFX1250-NEXT: s_set_pc_i64 s[30:31]
+; GFX1250TRUE16-LABEL: v_select_bf16:
+; GFX1250TRUE16: ; %bb.0:
+; GFX1250TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250TRUE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250TRUE16-NEXT: v_and_b32_e32 v0, 1, v0
+; GFX1250TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1250TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
+; GFX1250TRUE16-NEXT: v_cndmask_b16 v0.l, v2.l, v1.l, vcc_lo
+; GFX1250TRUE16-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX1250FAKE16-LABEL: v_select_bf16:
+; GFX1250FAKE16: ; %bb.0:
+; GFX1250FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250FAKE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250FAKE16-NEXT: v_and_b32_e32 v0, 1, v0
+; GFX1250FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1250FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
+; GFX1250FAKE16-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc_lo
+; GFX1250FAKE16-NEXT: s_set_pc_i64 s[30:31]
%op = select i1 %cond, bfloat %a, bfloat %b
ret bfloat %op
}
@@ -42810,16 +43735,27 @@ define bfloat @v_select_fneg_lhs_bf16(i1 %cond, bfloat %a, bfloat %b) {
; GFX11FAKE16-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc_lo
; GFX11FAKE16-NEXT: s_setpc_b64 s[30:31]
;
-; GFX1250-LABEL: v_select_fneg_lhs_bf16:
-; GFX1250: ; %bb.0:
-; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-NEXT: s_wait_kmcnt 0x0
-; GFX1250-NEXT: v_and_b32_e32 v0, 1, v0
-; GFX1250-NEXT: v_xor_b32_e32 v1, 0x8000, v1
-; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX1250-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
-; GFX1250-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc_lo
-; GFX1250-NEXT: s_set_pc_i64 s[30:31]
+; GFX1250TRUE16-LABEL: v_select_fneg_lhs_bf16:
+; GFX1250TRUE16: ; %bb.0:
+; GFX1250TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250TRUE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250TRUE16-NEXT: v_and_b32_e32 v0, 1, v0
+; GFX1250TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
+; GFX1250TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
+; GFX1250TRUE16-NEXT: v_xor_b16 v0.l, 0x8000, v1.l
+; GFX1250TRUE16-NEXT: v_cndmask_b16 v0.l, v2.l, v0.l, vcc_lo
+; GFX1250TRUE16-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX1250FAKE16-LABEL: v_select_fneg_lhs_bf16:
+; GFX1250FAKE16: ; %bb.0:
+; GFX1250FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250FAKE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250FAKE16-NEXT: v_and_b32_e32 v0, 1, v0
+; GFX1250FAKE16-NEXT: v_xor_b32_e32 v1, 0x8000, v1
+; GFX1250FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX1250FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
+; GFX1250FAKE16-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc_lo
+; GFX1250FAKE16-NEXT: s_set_pc_i64 s[30:31]
%neg.a = fneg bfloat %a
%op = select i1 %cond, bfloat %neg.a, bfloat %b
ret bfloat %op
@@ -42905,16 +43841,27 @@ define bfloat @v_select_fneg_rhs_bf16(i1 %cond, bfloat %a, bfloat %b) {
; GFX11FAKE16-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc_lo
; GFX11FAKE16-NEXT: s_setpc_b64 s[30:31]
;
-; GFX1250-LABEL: v_select_fneg_rhs_bf16:
-; GFX1250: ; %bb.0:
-; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-NEXT: s_wait_kmcnt 0x0
-; GFX1250-NEXT: v_and_b32_e32 v0, 1, v0
-; GFX1250-NEXT: v_xor_b32_e32 v2, 0x8000, v2
-; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX1250-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
-; GFX1250-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc_lo
-; GFX1250-NEXT: s_set_pc_i64 s[30:31]
+; GFX1250TRUE16-LABEL: v_select_fneg_rhs_bf16:
+; GFX1250TRUE16: ; %bb.0:
+; GFX1250TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250TRUE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250TRUE16-NEXT: v_and_b32_e32 v0, 1, v0
+; GFX1250TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
+; GFX1250TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
+; GFX1250TRUE16-NEXT: v_xor_b16 v0.l, 0x8000, v2.l
+; GFX1250TRUE16-NEXT: v_cndmask_b16 v0.l, v0.l, v1.l, vcc_lo
+; GFX1250TRUE16-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX1250FAKE16-LABEL: v_select_fneg_rhs_bf16:
+; GFX1250FAKE16: ; %bb.0:
+; GFX1250FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250FAKE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250FAKE16-NEXT: v_and_b32_e32 v0, 1, v0
+; GFX1250FAKE16-NEXT: v_xor_b32_e32 v2, 0x8000, v2
+; GFX1250FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX1250FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
+; GFX1250FAKE16-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc_lo
+; GFX1250FAKE16-NEXT: s_set_pc_i64 s[30:31]
%neg.b = fneg bfloat %b
%op = select i1 %cond, bfloat %a, bfloat %neg.b
ret bfloat %op
@@ -43025,18 +43972,29 @@ define <2 x bfloat> @v_select_v2bf16(i1 %cond, <2 x bfloat> %a, <2 x bfloat> %b)
; GFX11FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
; GFX11FAKE16-NEXT: s_setpc_b64 s[30:31]
;
-; GFX1250-LABEL: v_select_v2bf16:
-; GFX1250: ; %bb.0:
-; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-NEXT: s_wait_kmcnt 0x0
-; GFX1250-NEXT: v_dual_lshrrev_b32 v3, 16, v1 :: v_dual_bitop2_b32 v0, 1, v0 bitop3:0x40
-; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
-; GFX1250-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
-; GFX1250-NEXT: v_dual_lshrrev_b32 v4, 16, v2 :: v_dual_cndmask_b32 v0, v2, v1, vcc_lo
-; GFX1250-NEXT: v_cndmask_b32_e32 v1, v4, v3, vcc_lo
-; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX1250-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
-; GFX1250-NEXT: s_set_pc_i64 s[30:31]
+; GFX1250TRUE16-LABEL: v_select_v2bf16:
+; GFX1250TRUE16: ; %bb.0:
+; GFX1250TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250TRUE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250TRUE16-NEXT: v_and_b32_e32 v0, 1, v0
+; GFX1250TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1250TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
+; GFX1250TRUE16-NEXT: v_cndmask_b16 v0.l, v2.l, v1.l, vcc_lo
+; GFX1250TRUE16-NEXT: v_cndmask_b16 v0.h, v2.h, v1.h, vcc_lo
+; GFX1250TRUE16-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX1250FAKE16-LABEL: v_select_v2bf16:
+; GFX1250FAKE16: ; %bb.0:
+; GFX1250FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250FAKE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250FAKE16-NEXT: v_dual_lshrrev_b32 v3, 16, v1 :: v_dual_bitop2_b32 v0, 1, v0 bitop3:0x40
+; GFX1250FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
+; GFX1250FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
+; GFX1250FAKE16-NEXT: v_dual_lshrrev_b32 v4, 16, v2 :: v_dual_cndmask_b32 v0, v2, v1, vcc_lo
+; GFX1250FAKE16-NEXT: v_cndmask_b32_e32 v1, v4, v3, vcc_lo
+; GFX1250FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1250FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
+; GFX1250FAKE16-NEXT: s_set_pc_i64 s[30:31]
%op = select i1 %cond, <2 x bfloat> %a, <2 x bfloat> %b
ret <2 x bfloat> %op
}
@@ -43155,20 +44113,34 @@ define <2 x bfloat> @v_vselect_v2bf16(<2 x i1> %cond, <2 x bfloat> %a, <2 x bflo
; GFX11FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
; GFX11FAKE16-NEXT: s_setpc_b64 s[30:31]
;
-; GFX1250-LABEL: v_vselect_v2bf16:
-; GFX1250: ; %bb.0:
-; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-NEXT: s_wait_kmcnt 0x0
-; GFX1250-NEXT: v_dual_lshrrev_b32 v4, 16, v2 :: v_dual_bitop2_b32 v0, 1, v0 bitop3:0x40
-; GFX1250-NEXT: v_dual_lshrrev_b32 v5, 16, v3 :: v_dual_bitop2_b32 v1, 1, v1 bitop3:0x40
-; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_3)
-; GFX1250-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
-; GFX1250-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc_lo
-; GFX1250-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v1
-; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1250-NEXT: v_cndmask_b32_e32 v1, v5, v4, vcc_lo
-; GFX1250-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
-; GFX1250-NEXT: s_set_pc_i64 s[30:31]
+; GFX1250TRUE16-LABEL: v_vselect_v2bf16:
+; GFX1250TRUE16: ; %bb.0:
+; GFX1250TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250TRUE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250TRUE16-NEXT: v_and_b16 v0.l, 1, v0.l
+; GFX1250TRUE16-NEXT: v_and_b16 v0.h, 1, v1.l
+; GFX1250TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX1250TRUE16-NEXT: v_cmp_eq_u16_e32 vcc_lo, 1, v0.l
+; GFX1250TRUE16-NEXT: v_cmp_eq_u16_e64 s0, 1, v0.h
+; GFX1250TRUE16-NEXT: v_cndmask_b16 v0.l, v3.l, v2.l, vcc_lo
+; GFX1250TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2)
+; GFX1250TRUE16-NEXT: v_cndmask_b16 v0.h, v3.h, v2.h, s0
+; GFX1250TRUE16-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX1250FAKE16-LABEL: v_vselect_v2bf16:
+; GFX1250FAKE16: ; %bb.0:
+; GFX1250FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250FAKE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250FAKE16-NEXT: v_dual_lshrrev_b32 v4, 16, v2 :: v_dual_bitop2_b32 v0, 1, v0 bitop3:0x40
+; GFX1250FAKE16-NEXT: v_dual_lshrrev_b32 v5, 16, v3 :: v_dual_bitop2_b32 v1, 1, v1 bitop3:0x40
+; GFX1250FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_3)
+; GFX1250FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
+; GFX1250FAKE16-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc_lo
+; GFX1250FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v1
+; GFX1250FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1250FAKE16-NEXT: v_cndmask_b32_e32 v1, v5, v4, vcc_lo
+; GFX1250FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
+; GFX1250FAKE16-NEXT: s_set_pc_i64 s[30:31]
%op = select <2 x i1> %cond, <2 x bfloat> %a, <2 x bfloat> %b
ret <2 x bfloat> %op
}
@@ -43256,16 +44228,26 @@ define amdgpu_ps i32 @s_select_bf16(bfloat inreg %a, bfloat inreg %b, i32 %c) {
; GFX11FAKE16-NEXT: v_readfirstlane_b32 s0, v0
; GFX11FAKE16-NEXT: ; return to shader part epilog
;
-; GFX1250-LABEL: s_select_bf16:
-; GFX1250: ; %bb.0:
-; GFX1250-NEXT: v_mov_b32_e32 v1, s0
-; GFX1250-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1250-NEXT: v_cndmask_b32_e32 v0, s1, v1, vcc_lo
-; GFX1250-NEXT: v_and_b32_e32 v0, 0xffff, v0
-; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX1250-NEXT: v_readfirstlane_b32 s0, v0
-; GFX1250-NEXT: ; return to shader part epilog
+; GFX1250TRUE16-LABEL: s_select_bf16:
+; GFX1250TRUE16: ; %bb.0:
+; GFX1250TRUE16-NEXT: v_mov_b16_e32 v1.l, s0
+; GFX1250TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX1250TRUE16-NEXT: v_mov_b16_e32 v0.h, 0
+; GFX1250TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1250TRUE16-NEXT: v_cndmask_b16 v0.l, s1, v1.l, vcc_lo
+; GFX1250TRUE16-NEXT: v_readfirstlane_b32 s0, v0
+; GFX1250TRUE16-NEXT: ; return to shader part epilog
+;
+; GFX1250FAKE16-LABEL: s_select_bf16:
+; GFX1250FAKE16: ; %bb.0:
+; GFX1250FAKE16-NEXT: v_mov_b32_e32 v1, s0
+; GFX1250FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX1250FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1250FAKE16-NEXT: v_cndmask_b32_e32 v0, s1, v1, vcc_lo
+; GFX1250FAKE16-NEXT: v_and_b32_e32 v0, 0xffff, v0
+; GFX1250FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1250FAKE16-NEXT: v_readfirstlane_b32 s0, v0
+; GFX1250FAKE16-NEXT: ; return to shader part epilog
%cond = icmp eq i32 %c, 0
%op = select i1 %cond, bfloat %a, bfloat %b
%cast = bitcast bfloat %op to i16
@@ -43402,20 +44384,34 @@ define amdgpu_ps i32 @s_select_v2bf16(<2 x bfloat> inreg %a, <2 x bfloat> inreg
; GFX11FAKE16-NEXT: v_readfirstlane_b32 s0, v0
; GFX11FAKE16-NEXT: ; return to shader part epilog
;
-; GFX1250-LABEL: s_select_v2bf16:
-; GFX1250: ; %bb.0:
-; GFX1250-NEXT: s_lshr_b32 s2, s0, 16
-; GFX1250-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX1250-NEXT: v_dual_mov_b32 v1, s2 :: v_dual_mov_b32 v2, s0
-; GFX1250-NEXT: s_lshr_b32 s3, s1, 16
-; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1)
-; GFX1250-NEXT: v_cndmask_b32_e32 v0, s3, v1, vcc_lo
-; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1250-NEXT: v_cndmask_b32_e32 v1, s1, v2, vcc_lo
-; GFX1250-NEXT: v_perm_b32 v0, v0, v1, 0x5040100
-; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX1250-NEXT: v_readfirstlane_b32 s0, v0
-; GFX1250-NEXT: ; return to shader part epilog
+; GFX1250TRUE16-LABEL: s_select_v2bf16:
+; GFX1250TRUE16: ; %bb.0:
+; GFX1250TRUE16-NEXT: s_lshr_b32 s2, s0, 16
+; GFX1250TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX1250TRUE16-NEXT: v_mov_b16_e32 v1.l, s2
+; GFX1250TRUE16-NEXT: v_mov_b16_e32 v0.l, s0
+; GFX1250TRUE16-NEXT: s_lshr_b32 s0, s1, 16
+; GFX1250TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instid1(SALU_CYCLE_1)
+; GFX1250TRUE16-NEXT: v_cndmask_b16 v0.h, s0, v1.l, vcc_lo
+; GFX1250TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1250TRUE16-NEXT: v_cndmask_b16 v0.l, s1, v0.l, vcc_lo
+; GFX1250TRUE16-NEXT: v_readfirstlane_b32 s0, v0
+; GFX1250TRUE16-NEXT: ; return to shader part epilog
+;
+; GFX1250FAKE16-LABEL: s_select_v2bf16:
+; GFX1250FAKE16: ; %bb.0:
+; GFX1250FAKE16-NEXT: s_lshr_b32 s2, s0, 16
+; GFX1250FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX1250FAKE16-NEXT: v_dual_mov_b32 v1, s2 :: v_dual_mov_b32 v2, s0
+; GFX1250FAKE16-NEXT: s_lshr_b32 s3, s1, 16
+; GFX1250FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1)
+; GFX1250FAKE16-NEXT: v_cndmask_b32_e32 v0, s3, v1, vcc_lo
+; GFX1250FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1250FAKE16-NEXT: v_cndmask_b32_e32 v1, s1, v2, vcc_lo
+; GFX1250FAKE16-NEXT: v_perm_b32 v0, v0, v1, 0x5040100
+; GFX1250FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1250FAKE16-NEXT: v_readfirstlane_b32 s0, v0
+; GFX1250FAKE16-NEXT: ; return to shader part epilog
%cond = icmp eq i32 %c, 0
%op = select i1 %cond, <2 x bfloat> %a, <2 x bfloat> %b
%cast = bitcast <2 x bfloat> %op to i32
@@ -43554,21 +44550,36 @@ define amdgpu_ps i32 @s_vselect_v2bf16(<2 x bfloat> inreg %a, <2 x bfloat> inreg
; GFX11FAKE16-NEXT: v_readfirstlane_b32 s0, v0
; GFX11FAKE16-NEXT: ; return to shader part epilog
;
-; GFX1250-LABEL: s_vselect_v2bf16:
-; GFX1250: ; %bb.0:
-; GFX1250-NEXT: s_lshr_b32 s2, s0, 16
-; GFX1250-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX1250-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s0
-; GFX1250-NEXT: s_lshr_b32 s0, s1, 16
-; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1)
-; GFX1250-NEXT: v_cndmask_b32_e32 v1, s0, v2, vcc_lo
-; GFX1250-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1250-NEXT: v_cndmask_b32_e32 v0, s1, v3, vcc_lo
-; GFX1250-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
-; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX1250-NEXT: v_readfirstlane_b32 s0, v0
-; GFX1250-NEXT: ; return to shader part epilog
+; GFX1250TRUE16-LABEL: s_vselect_v2bf16:
+; GFX1250TRUE16: ; %bb.0:
+; GFX1250TRUE16-NEXT: s_lshr_b32 s3, s0, 16
+; GFX1250TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX1250TRUE16-NEXT: v_cmp_eq_u32_e64 s2, 0, v1
+; GFX1250TRUE16-NEXT: v_mov_b16_e32 v0.l, s3
+; GFX1250TRUE16-NEXT: v_mov_b16_e32 v0.h, s0
+; GFX1250TRUE16-NEXT: s_lshr_b32 s0, s1, 16
+; GFX1250TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instid1(SALU_CYCLE_1)
+; GFX1250TRUE16-NEXT: v_cndmask_b16 v1.h, s0, v0.l, s2
+; GFX1250TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1250TRUE16-NEXT: v_cndmask_b16 v1.l, s1, v0.h, vcc_lo
+; GFX1250TRUE16-NEXT: v_readfirstlane_b32 s0, v1
+; GFX1250TRUE16-NEXT: ; return to shader part epilog
+;
+; GFX1250FAKE16-LABEL: s_vselect_v2bf16:
+; GFX1250FAKE16: ; %bb.0:
+; GFX1250FAKE16-NEXT: s_lshr_b32 s2, s0, 16
+; GFX1250FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX1250FAKE16-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s0
+; GFX1250FAKE16-NEXT: s_lshr_b32 s0, s1, 16
+; GFX1250FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1)
+; GFX1250FAKE16-NEXT: v_cndmask_b32_e32 v1, s0, v2, vcc_lo
+; GFX1250FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX1250FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1250FAKE16-NEXT: v_cndmask_b32_e32 v0, s1, v3, vcc_lo
+; GFX1250FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
+; GFX1250FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1250FAKE16-NEXT: v_readfirstlane_b32 s0, v0
+; GFX1250FAKE16-NEXT: ; return to shader part epilog
%cond = icmp eq <2 x i32> %c, zeroinitializer
%op = select <2 x i1> %cond, <2 x bfloat> %a, <2 x bfloat> %b
%cast = bitcast <2 x bfloat> %op to i32
@@ -45557,32 +46568,55 @@ define amdgpu_ps <2 x i32> @s_vselect_v4bf16(<4 x bfloat> inreg %a, <4 x bfloat>
; GFX11FAKE16-NEXT: v_readfirstlane_b32 s1, v1
; GFX11FAKE16-NEXT: ; return to shader part epilog
;
-; GFX1250-LABEL: s_vselect_v4bf16:
-; GFX1250: ; %bb.0:
-; GFX1250-NEXT: s_lshr_b32 s4, s1, 16
-; GFX1250-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v3
-; GFX1250-NEXT: v_dual_mov_b32 v4, s4 :: v_dual_mov_b32 v5, s1
-; GFX1250-NEXT: s_lshr_b32 s4, s3, 16
-; GFX1250-NEXT: s_lshr_b32 s5, s0, 16
-; GFX1250-NEXT: v_mov_b32_e32 v6, s0
-; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_2)
-; GFX1250-NEXT: v_cndmask_b32_e32 v3, s4, v4, vcc_lo
-; GFX1250-NEXT: v_mov_b32_e32 v4, s5
-; GFX1250-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX1250-NEXT: s_lshr_b32 s0, s2, 16
-; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_2) | instid1(SALU_CYCLE_1)
-; GFX1250-NEXT: v_cndmask_b32_e32 v1, s0, v4, vcc_lo
-; GFX1250-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX1250-NEXT: v_cndmask_b32_e32 v0, s2, v6, vcc_lo
-; GFX1250-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v2
-; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
-; GFX1250-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
-; GFX1250-NEXT: v_cndmask_b32_e32 v2, s3, v5, vcc_lo
-; GFX1250-NEXT: v_readfirstlane_b32 s0, v0
-; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1250-NEXT: v_perm_b32 v1, v3, v2, 0x5040100
-; GFX1250-NEXT: v_readfirstlane_b32 s1, v1
-; GFX1250-NEXT: ; return to shader part epilog
+; GFX1250TRUE16-LABEL: s_vselect_v4bf16:
+; GFX1250TRUE16: ; %bb.0:
+; GFX1250TRUE16-NEXT: s_lshr_b32 s7, s1, 16
+; GFX1250TRUE16-NEXT: s_lshr_b32 s9, s0, 16
+; GFX1250TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX1250TRUE16-NEXT: v_cmp_eq_u32_e64 s4, 0, v1
+; GFX1250TRUE16-NEXT: v_cmp_eq_u32_e64 s5, 0, v2
+; GFX1250TRUE16-NEXT: v_cmp_eq_u32_e64 s6, 0, v3
+; GFX1250TRUE16-NEXT: v_mov_b16_e32 v0.l, s7
+; GFX1250TRUE16-NEXT: v_mov_b16_e32 v0.h, s9
+; GFX1250TRUE16-NEXT: v_mov_b16_e32 v1.l, s0
+; GFX1250TRUE16-NEXT: v_mov_b16_e32 v1.h, s1
+; GFX1250TRUE16-NEXT: s_lshr_b32 s8, s3, 16
+; GFX1250TRUE16-NEXT: s_lshr_b32 s0, s2, 16
+; GFX1250TRUE16-NEXT: v_cndmask_b16 v2.h, s8, v0.l, s6
+; GFX1250TRUE16-NEXT: v_cndmask_b16 v0.h, s0, v0.h, s4
+; GFX1250TRUE16-NEXT: v_cndmask_b16 v0.l, s2, v1.l, vcc_lo
+; GFX1250TRUE16-NEXT: v_cndmask_b16 v2.l, s3, v1.h, s5
+; GFX1250TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX1250TRUE16-NEXT: v_readfirstlane_b32 s0, v0
+; GFX1250TRUE16-NEXT: v_readfirstlane_b32 s1, v2
+; GFX1250TRUE16-NEXT: ; return to shader part epilog
+;
+; GFX1250FAKE16-LABEL: s_vselect_v4bf16:
+; GFX1250FAKE16: ; %bb.0:
+; GFX1250FAKE16-NEXT: s_lshr_b32 s4, s1, 16
+; GFX1250FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v3
+; GFX1250FAKE16-NEXT: v_dual_mov_b32 v4, s4 :: v_dual_mov_b32 v5, s1
+; GFX1250FAKE16-NEXT: s_lshr_b32 s4, s3, 16
+; GFX1250FAKE16-NEXT: s_lshr_b32 s5, s0, 16
+; GFX1250FAKE16-NEXT: v_mov_b32_e32 v6, s0
+; GFX1250FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2)
+; GFX1250FAKE16-NEXT: v_cndmask_b32_e32 v3, s4, v4, vcc_lo
+; GFX1250FAKE16-NEXT: v_mov_b32_e32 v4, s5
+; GFX1250FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX1250FAKE16-NEXT: s_lshr_b32 s0, s2, 16
+; GFX1250FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instid1(SALU_CYCLE_1)
+; GFX1250FAKE16-NEXT: v_cndmask_b32_e32 v1, s0, v4, vcc_lo
+; GFX1250FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX1250FAKE16-NEXT: v_cndmask_b32_e32 v0, s2, v6, vcc_lo
+; GFX1250FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v2
+; GFX1250FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX1250FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
+; GFX1250FAKE16-NEXT: v_cndmask_b32_e32 v2, s3, v5, vcc_lo
+; GFX1250FAKE16-NEXT: v_readfirstlane_b32 s0, v0
+; GFX1250FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1250FAKE16-NEXT: v_perm_b32 v1, v3, v2, 0x5040100
+; GFX1250FAKE16-NEXT: v_readfirstlane_b32 s1, v1
+; GFX1250FAKE16-NEXT: ; return to shader part epilog
%cond = icmp eq <4 x i32> %c, zeroinitializer
%op = select <4 x i1> %cond, <4 x bfloat> %a, <4 x bfloat> %b
%cast = bitcast <4 x bfloat> %op to <2 x i32>
@@ -45787,27 +46821,49 @@ define <4 x bfloat> @v_vselect_v4bf16(<4 x i1> %cond, <4 x bfloat> %a, <4 x bflo
; GFX11FAKE16-NEXT: v_perm_b32 v1, v3, v2, 0x5040100
; GFX11FAKE16-NEXT: s_setpc_b64 s[30:31]
;
-; GFX1250-LABEL: v_vselect_v4bf16:
-; GFX1250: ; %bb.0:
-; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-NEXT: s_wait_kmcnt 0x0
-; GFX1250-NEXT: v_and_b32_e32 v2, 1, v2
-; GFX1250-NEXT: v_dual_lshrrev_b32 v8, 16, v4 :: v_dual_bitop2_b32 v1, 1, v1 bitop3:0x40
-; GFX1250-NEXT: v_dual_lshrrev_b32 v9, 16, v6 :: v_dual_bitop2_b32 v3, 1, v3 bitop3:0x40
-; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_1)
-; GFX1250-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v2
-; GFX1250-NEXT: v_dual_cndmask_b32 v2, v7, v5, vcc_lo :: v_dual_bitop2_b32 v0, 1, v0 bitop3:0x40
-; GFX1250-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
-; GFX1250-NEXT: v_dual_lshrrev_b32 v7, 16, v7 :: v_dual_lshrrev_b32 v5, 16, v5
-; GFX1250-NEXT: v_cndmask_b32_e32 v0, v6, v4, vcc_lo
-; GFX1250-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v1
-; GFX1250-NEXT: v_cndmask_b32_e32 v1, v9, v8, vcc_lo
-; GFX1250-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v3
-; GFX1250-NEXT: v_cndmask_b32_e32 v3, v7, v5, vcc_lo
-; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX1250-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
-; GFX1250-NEXT: v_perm_b32 v1, v3, v2, 0x5040100
-; GFX1250-NEXT: s_set_pc_i64 s[30:31]
+; GFX1250TRUE16-LABEL: v_vselect_v4bf16:
+; GFX1250TRUE16: ; %bb.0:
+; GFX1250TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250TRUE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250TRUE16-NEXT: v_and_b16 v0.h, 1, v2.l
+; GFX1250TRUE16-NEXT: v_and_b16 v0.l, 1, v0.l
+; GFX1250TRUE16-NEXT: v_and_b16 v1.l, 1, v1.l
+; GFX1250TRUE16-NEXT: v_and_b16 v1.h, 1, v3.l
+; GFX1250TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
+; GFX1250TRUE16-NEXT: v_cmp_eq_u16_e32 vcc_lo, 1, v0.h
+; GFX1250TRUE16-NEXT: v_cmp_eq_u16_e64 s0, 1, v0.l
+; GFX1250TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
+; GFX1250TRUE16-NEXT: v_cmp_eq_u16_e64 s1, 1, v1.l
+; GFX1250TRUE16-NEXT: v_cmp_eq_u16_e64 s2, 1, v1.h
+; GFX1250TRUE16-NEXT: v_cndmask_b16 v1.l, v7.l, v5.l, vcc_lo
+; GFX1250TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
+; GFX1250TRUE16-NEXT: v_cndmask_b16 v0.l, v6.l, v4.l, s0
+; GFX1250TRUE16-NEXT: v_cndmask_b16 v0.h, v6.h, v4.h, s1
+; GFX1250TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4)
+; GFX1250TRUE16-NEXT: v_cndmask_b16 v1.h, v7.h, v5.h, s2
+; GFX1250TRUE16-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX1250FAKE16-LABEL: v_vselect_v4bf16:
+; GFX1250FAKE16: ; %bb.0:
+; GFX1250FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250FAKE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250FAKE16-NEXT: v_and_b32_e32 v2, 1, v2
+; GFX1250FAKE16-NEXT: v_dual_lshrrev_b32 v8, 16, v4 :: v_dual_bitop2_b32 v1, 1, v1 bitop3:0x40
+; GFX1250FAKE16-NEXT: v_dual_lshrrev_b32 v9, 16, v6 :: v_dual_bitop2_b32 v3, 1, v3 bitop3:0x40
+; GFX1250FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_1)
+; GFX1250FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v2
+; GFX1250FAKE16-NEXT: v_dual_cndmask_b32 v2, v7, v5, vcc_lo :: v_dual_bitop2_b32 v0, 1, v0 bitop3:0x40
+; GFX1250FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
+; GFX1250FAKE16-NEXT: v_dual_lshrrev_b32 v7, 16, v7 :: v_dual_lshrrev_b32 v5, 16, v5
+; GFX1250FAKE16-NEXT: v_cndmask_b32_e32 v0, v6, v4, vcc_lo
+; GFX1250FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v1
+; GFX1250FAKE16-NEXT: v_cndmask_b32_e32 v1, v9, v8, vcc_lo
+; GFX1250FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v3
+; GFX1250FAKE16-NEXT: v_cndmask_b32_e32 v3, v7, v5, vcc_lo
+; GFX1250FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX1250FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
+; GFX1250FAKE16-NEXT: v_perm_b32 v1, v3, v2, 0x5040100
+; GFX1250FAKE16-NEXT: s_set_pc_i64 s[30:31]
%op = select <4 x i1> %cond, <4 x bfloat> %a, <4 x bfloat> %b
ret <4 x bfloat> %op
}
@@ -46161,45 +47217,77 @@ define <8 x bfloat> @v_vselect_v8bf16(<8 x i1> %cond, <8 x bfloat> %a, <8 x bflo
; GFX11FAKE16-NEXT: v_perm_b32 v3, v7, v6, 0x5040100
; GFX11FAKE16-NEXT: s_setpc_b64 s[30:31]
;
-; GFX1250-LABEL: v_vselect_v8bf16:
-; GFX1250: ; %bb.0:
-; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-NEXT: s_wait_kmcnt 0x0
-; GFX1250-NEXT: v_and_b32_e32 v6, 1, v6
-; GFX1250-NEXT: v_and_b32_e32 v4, 1, v4
-; GFX1250-NEXT: v_dual_lshrrev_b32 v17, 16, v14 :: v_dual_bitop2_b32 v5, 1, v5 bitop3:0x40
-; GFX1250-NEXT: v_dual_lshrrev_b32 v16, 16, v10 :: v_dual_bitop2_b32 v3, 1, v3 bitop3:0x40
-; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_4)
-; GFX1250-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v6
-; GFX1250-NEXT: v_and_b32_e32 v1, 1, v1
-; GFX1250-NEXT: v_dual_cndmask_b32 v6, v15, v11, vcc_lo :: v_dual_bitop2_b32 v0, 1, v0 bitop3:0x40
-; GFX1250-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v4
-; GFX1250-NEXT: v_and_b32_e32 v7, 1, v7
-; GFX1250-NEXT: v_lshrrev_b32_e32 v11, 16, v11
-; GFX1250-NEXT: v_dual_cndmask_b32 v4, v14, v10 :: v_dual_lshrrev_b32 v15, 16, v15
-; GFX1250-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v5
-; GFX1250-NEXT: v_dual_lshrrev_b32 v14, 16, v12 :: v_dual_bitop2_b32 v2, 1, v2 bitop3:0x40
-; GFX1250-NEXT: v_lshrrev_b32_e32 v10, 16, v8
-; GFX1250-NEXT: v_cndmask_b32_e32 v5, v17, v16, vcc_lo
-; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_3)
-; GFX1250-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v2
-; GFX1250-NEXT: v_cndmask_b32_e32 v2, v13, v9, vcc_lo
-; GFX1250-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
-; GFX1250-NEXT: v_lshrrev_b32_e32 v9, 16, v9
-; GFX1250-NEXT: v_dual_cndmask_b32 v0, v12, v8 :: v_dual_lshrrev_b32 v13, 16, v13
-; GFX1250-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v1
-; GFX1250-NEXT: v_cndmask_b32_e32 v1, v14, v10, vcc_lo
-; GFX1250-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v3
-; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_3) | instid1(VALU_DEP_3)
-; GFX1250-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
-; GFX1250-NEXT: v_cndmask_b32_e32 v3, v13, v9, vcc_lo
-; GFX1250-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v7
-; GFX1250-NEXT: v_cndmask_b32_e32 v7, v15, v11, vcc_lo
-; GFX1250-NEXT: v_perm_b32 v1, v3, v2, 0x5040100
-; GFX1250-NEXT: v_perm_b32 v2, v5, v4, 0x5040100
-; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_3)
-; GFX1250-NEXT: v_perm_b32 v3, v7, v6, 0x5040100
-; GFX1250-NEXT: s_set_pc_i64 s[30:31]
+; GFX1250TRUE16-LABEL: v_vselect_v8bf16:
+; GFX1250TRUE16: ; %bb.0:
+; GFX1250TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250TRUE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250TRUE16-NEXT: v_and_b16 v0.h, 1, v1.l
+; GFX1250TRUE16-NEXT: v_and_b16 v0.l, 1, v0.l
+; GFX1250TRUE16-NEXT: v_and_b16 v1.l, 1, v3.l
+; GFX1250TRUE16-NEXT: v_and_b16 v1.h, 1, v5.l
+; GFX1250TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
+; GFX1250TRUE16-NEXT: v_cmp_eq_u16_e32 vcc_lo, 1, v0.h
+; GFX1250TRUE16-NEXT: v_cmp_eq_u16_e64 s0, 1, v0.l
+; GFX1250TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4)
+; GFX1250TRUE16-NEXT: v_cmp_eq_u16_e64 s1, 1, v1.l
+; GFX1250TRUE16-NEXT: v_and_b16 v0.l, 1, v6.l
+; GFX1250TRUE16-NEXT: v_and_b16 v0.h, 1, v4.l
+; GFX1250TRUE16-NEXT: v_and_b16 v1.l, 1, v2.l
+; GFX1250TRUE16-NEXT: v_and_b16 v2.l, 1, v7.l
+; GFX1250TRUE16-NEXT: v_cmp_eq_u16_e64 s5, 1, v1.h
+; GFX1250TRUE16-NEXT: v_cmp_eq_u16_e64 s2, 1, v0.l
+; GFX1250TRUE16-NEXT: v_cmp_eq_u16_e64 s3, 1, v0.h
+; GFX1250TRUE16-NEXT: v_cmp_eq_u16_e64 s4, 1, v1.l
+; GFX1250TRUE16-NEXT: v_cmp_eq_u16_e64 s6, 1, v2.l
+; GFX1250TRUE16-NEXT: v_cndmask_b16 v0.l, v12.l, v8.l, s0
+; GFX1250TRUE16-NEXT: v_cndmask_b16 v3.l, v15.l, v11.l, s2
+; GFX1250TRUE16-NEXT: v_cndmask_b16 v2.l, v14.l, v10.l, s3
+; GFX1250TRUE16-NEXT: v_cndmask_b16 v1.l, v13.l, v9.l, s4
+; GFX1250TRUE16-NEXT: v_cndmask_b16 v0.h, v12.h, v8.h, vcc_lo
+; GFX1250TRUE16-NEXT: v_cndmask_b16 v1.h, v13.h, v9.h, s1
+; GFX1250TRUE16-NEXT: v_cndmask_b16 v2.h, v14.h, v10.h, s5
+; GFX1250TRUE16-NEXT: v_cndmask_b16 v3.h, v15.h, v11.h, s6
+; GFX1250TRUE16-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX1250FAKE16-LABEL: v_vselect_v8bf16:
+; GFX1250FAKE16: ; %bb.0:
+; GFX1250FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250FAKE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250FAKE16-NEXT: v_and_b32_e32 v6, 1, v6
+; GFX1250FAKE16-NEXT: v_and_b32_e32 v4, 1, v4
+; GFX1250FAKE16-NEXT: v_dual_lshrrev_b32 v17, 16, v14 :: v_dual_bitop2_b32 v5, 1, v5 bitop3:0x40
+; GFX1250FAKE16-NEXT: v_dual_lshrrev_b32 v16, 16, v10 :: v_dual_bitop2_b32 v3, 1, v3 bitop3:0x40
+; GFX1250FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4)
+; GFX1250FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v6
+; GFX1250FAKE16-NEXT: v_and_b32_e32 v1, 1, v1
+; GFX1250FAKE16-NEXT: v_dual_cndmask_b32 v6, v15, v11, vcc_lo :: v_dual_bitop2_b32 v0, 1, v0 bitop3:0x40
+; GFX1250FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v4
+; GFX1250FAKE16-NEXT: v_and_b32_e32 v7, 1, v7
+; GFX1250FAKE16-NEXT: v_lshrrev_b32_e32 v11, 16, v11
+; GFX1250FAKE16-NEXT: v_dual_cndmask_b32 v4, v14, v10 :: v_dual_lshrrev_b32 v15, 16, v15
+; GFX1250FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v5
+; GFX1250FAKE16-NEXT: v_dual_lshrrev_b32 v14, 16, v12 :: v_dual_bitop2_b32 v2, 1, v2 bitop3:0x40
+; GFX1250FAKE16-NEXT: v_lshrrev_b32_e32 v10, 16, v8
+; GFX1250FAKE16-NEXT: v_cndmask_b32_e32 v5, v17, v16, vcc_lo
+; GFX1250FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3)
+; GFX1250FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v2
+; GFX1250FAKE16-NEXT: v_cndmask_b32_e32 v2, v13, v9, vcc_lo
+; GFX1250FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
+; GFX1250FAKE16-NEXT: v_lshrrev_b32_e32 v9, 16, v9
+; GFX1250FAKE16-NEXT: v_dual_cndmask_b32 v0, v12, v8 :: v_dual_lshrrev_b32 v13, 16, v13
+; GFX1250FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v1
+; GFX1250FAKE16-NEXT: v_cndmask_b32_e32 v1, v14, v10, vcc_lo
+; GFX1250FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v3
+; GFX1250FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_3) | instid1(VALU_DEP_3)
+; GFX1250FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
+; GFX1250FAKE16-NEXT: v_cndmask_b32_e32 v3, v13, v9, vcc_lo
+; GFX1250FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v7
+; GFX1250FAKE16-NEXT: v_cndmask_b32_e32 v7, v15, v11, vcc_lo
+; GFX1250FAKE16-NEXT: v_perm_b32 v1, v3, v2, 0x5040100
+; GFX1250FAKE16-NEXT: v_perm_b32 v2, v5, v4, 0x5040100
+; GFX1250FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3)
+; GFX1250FAKE16-NEXT: v_perm_b32 v3, v7, v6, 0x5040100
+; GFX1250FAKE16-NEXT: s_set_pc_i64 s[30:31]
%op = select <8 x i1> %cond, <8 x bfloat> %a, <8 x bfloat> %b
ret <8 x bfloat> %op
}
@@ -46939,73 +48027,129 @@ define <16 x bfloat> @v_vselect_v16bf16(<16 x i1> %cond, <16 x bfloat> %a, <16 x
; GFX11FAKE16-NEXT: v_perm_b32 v7, v15, v14, 0x5040100
; GFX11FAKE16-NEXT: s_setpc_b64 s[30:31]
;
-; GFX1250-LABEL: v_vselect_v16bf16:
-; GFX1250: ; %bb.0:
-; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-NEXT: s_wait_kmcnt 0x0
-; GFX1250-NEXT: scratch_load_b32 v31, off, s32
-; GFX1250-NEXT: v_dual_lshrrev_b32 v52, 16, v25 :: v_dual_bitop2_b32 v12, 1, v12 bitop3:0x40
-; GFX1250-NEXT: v_dual_lshrrev_b32 v53, 16, v16 :: v_dual_bitop2_b32 v13, 1, v13 bitop3:0x40
-; GFX1250-NEXT: v_dual_lshrrev_b32 v33, 16, v22 :: v_dual_bitop2_b32 v0, 1, v0 bitop3:0x40
-; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_3)
-; GFX1250-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v12
-; GFX1250-NEXT: v_dual_lshrrev_b32 v34, 16, v30 :: v_dual_bitop2_b32 v3, 1, v3 bitop3:0x40
-; GFX1250-NEXT: v_dual_lshrrev_b32 v51, 16, v17 :: v_dual_bitop2_b32 v10, 1, v10 bitop3:0x40
-; GFX1250-NEXT: v_cndmask_b32_e32 v12, v30, v22, vcc_lo
-; GFX1250-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v13
-; GFX1250-NEXT: v_dual_lshrrev_b32 v50, 16, v26 :: v_dual_bitop2_b32 v11, 1, v11 bitop3:0x40
-; GFX1250-NEXT: v_and_b32_e32 v14, 1, v14
-; GFX1250-NEXT: v_dual_lshrrev_b32 v35, 16, v21 :: v_dual_bitop2_b32 v2, 1, v2 bitop3:0x40
-; GFX1250-NEXT: v_cndmask_b32_e32 v13, v34, v33, vcc_lo
-; GFX1250-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v10
-; GFX1250-NEXT: v_dual_lshrrev_b32 v36, 16, v29 :: v_dual_bitop2_b32 v4, 1, v4 bitop3:0x40
-; GFX1250-NEXT: v_dual_lshrrev_b32 v49, 16, v18 :: v_dual_bitop2_b32 v8, 1, v8 bitop3:0x40
-; GFX1250-NEXT: v_cndmask_b32_e32 v10, v29, v21, vcc_lo
-; GFX1250-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v11
-; GFX1250-NEXT: v_dual_lshrrev_b32 v37, 16, v20 :: v_dual_bitop2_b32 v5, 1, v5 bitop3:0x40
-; GFX1250-NEXT: v_dual_lshrrev_b32 v38, 16, v28 :: v_dual_bitop2_b32 v7, 1, v7 bitop3:0x40
-; GFX1250-NEXT: v_dual_lshrrev_b32 v48, 16, v27 :: v_dual_bitop2_b32 v9, 1, v9 bitop3:0x40
-; GFX1250-NEXT: v_cndmask_b32_e32 v11, v36, v35, vcc_lo
-; GFX1250-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v8
-; GFX1250-NEXT: v_dual_lshrrev_b32 v39, 16, v19 :: v_dual_bitop2_b32 v6, 1, v6 bitop3:0x40
-; GFX1250-NEXT: v_dual_lshrrev_b32 v32, 16, v23 :: v_dual_bitop2_b32 v1, 1, v1 bitop3:0x40
-; GFX1250-NEXT: v_cndmask_b32_e32 v8, v28, v20, vcc_lo
-; GFX1250-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v9
-; GFX1250-NEXT: v_dual_lshrrev_b32 v54, 16, v24 :: v_dual_bitop2_b32 v15, 1, v15 bitop3:0x40
-; GFX1250-NEXT: v_cndmask_b32_e32 v9, v38, v37, vcc_lo
-; GFX1250-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v6
-; GFX1250-NEXT: v_cndmask_b32_e32 v6, v27, v19, vcc_lo
-; GFX1250-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v4
-; GFX1250-NEXT: v_cndmask_b32_e32 v4, v26, v18, vcc_lo
-; GFX1250-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v2
-; GFX1250-NEXT: v_cndmask_b32_e32 v2, v25, v17, vcc_lo
-; GFX1250-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v3
-; GFX1250-NEXT: v_cndmask_b32_e32 v3, v52, v51, vcc_lo
-; GFX1250-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
-; GFX1250-NEXT: v_cndmask_b32_e32 v0, v24, v16, vcc_lo
-; GFX1250-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v1
-; GFX1250-NEXT: v_cndmask_b32_e32 v1, v54, v53, vcc_lo
-; GFX1250-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v5
-; GFX1250-NEXT: v_cndmask_b32_e32 v5, v50, v49, vcc_lo
-; GFX1250-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v7
-; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_4)
-; GFX1250-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
-; GFX1250-NEXT: v_perm_b32 v1, v3, v2, 0x5040100
-; GFX1250-NEXT: v_perm_b32 v2, v5, v4, 0x5040100
-; GFX1250-NEXT: v_perm_b32 v4, v9, v8, 0x5040100
-; GFX1250-NEXT: v_perm_b32 v5, v11, v10, 0x5040100
-; GFX1250-NEXT: s_wait_loadcnt 0x0
-; GFX1250-NEXT: v_lshrrev_b32_e32 v3, 16, v31
-; GFX1250-NEXT: v_cndmask_b32_e32 v7, v48, v39, vcc_lo
-; GFX1250-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v14
-; GFX1250-NEXT: v_cndmask_b32_e32 v14, v31, v23, vcc_lo
-; GFX1250-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v15
-; GFX1250-NEXT: v_cndmask_b32_e32 v15, v3, v32, vcc_lo
-; GFX1250-NEXT: v_perm_b32 v3, v7, v6, 0x5040100
-; GFX1250-NEXT: v_perm_b32 v6, v13, v12, 0x5040100
-; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_3)
-; GFX1250-NEXT: v_perm_b32 v7, v15, v14, 0x5040100
-; GFX1250-NEXT: s_set_pc_i64 s[30:31]
+; GFX1250TRUE16-LABEL: v_vselect_v16bf16:
+; GFX1250TRUE16: ; %bb.0:
+; GFX1250TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250TRUE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250TRUE16-NEXT: scratch_load_b32 v31, off, s32
+; GFX1250TRUE16-NEXT: v_and_b16 v0.h, 1, v1.l
+; GFX1250TRUE16-NEXT: v_and_b16 v0.l, 1, v0.l
+; GFX1250TRUE16-NEXT: v_and_b16 v1.l, 1, v3.l
+; GFX1250TRUE16-NEXT: v_and_b16 v1.h, 1, v2.l
+; GFX1250TRUE16-NEXT: v_and_b16 v2.l, 1, v5.l
+; GFX1250TRUE16-NEXT: v_and_b16 v2.h, 1, v4.l
+; GFX1250TRUE16-NEXT: v_and_b16 v3.l, 1, v7.l
+; GFX1250TRUE16-NEXT: v_and_b16 v3.h, 1, v6.l
+; GFX1250TRUE16-NEXT: v_and_b16 v4.l, 1, v9.l
+; GFX1250TRUE16-NEXT: v_and_b16 v4.h, 1, v8.l
+; GFX1250TRUE16-NEXT: v_and_b16 v5.l, 1, v11.l
+; GFX1250TRUE16-NEXT: v_and_b16 v5.h, 1, v10.l
+; GFX1250TRUE16-NEXT: v_and_b16 v6.l, 1, v13.l
+; GFX1250TRUE16-NEXT: v_and_b16 v6.h, 1, v12.l
+; GFX1250TRUE16-NEXT: v_and_b16 v7.l, 1, v15.l
+; GFX1250TRUE16-NEXT: v_and_b16 v7.h, 1, v14.l
+; GFX1250TRUE16-NEXT: v_cmp_eq_u16_e32 vcc_lo, 1, v0.h
+; GFX1250TRUE16-NEXT: v_cmp_eq_u16_e64 s0, 1, v0.l
+; GFX1250TRUE16-NEXT: v_cmp_eq_u16_e64 s1, 1, v1.l
+; GFX1250TRUE16-NEXT: v_cmp_eq_u16_e64 s2, 1, v1.h
+; GFX1250TRUE16-NEXT: v_cmp_eq_u16_e64 s3, 1, v2.l
+; GFX1250TRUE16-NEXT: v_cmp_eq_u16_e64 s4, 1, v2.h
+; GFX1250TRUE16-NEXT: v_cmp_eq_u16_e64 s5, 1, v3.l
+; GFX1250TRUE16-NEXT: v_cmp_eq_u16_e64 s6, 1, v3.h
+; GFX1250TRUE16-NEXT: v_cmp_eq_u16_e64 s7, 1, v4.l
+; GFX1250TRUE16-NEXT: v_cmp_eq_u16_e64 s8, 1, v4.h
+; GFX1250TRUE16-NEXT: v_cmp_eq_u16_e64 s9, 1, v5.l
+; GFX1250TRUE16-NEXT: v_cmp_eq_u16_e64 s10, 1, v6.h
+; GFX1250TRUE16-NEXT: v_cmp_eq_u16_e64 s11, 1, v6.l
+; GFX1250TRUE16-NEXT: v_cmp_eq_u16_e64 s12, 1, v5.h
+; GFX1250TRUE16-NEXT: v_cmp_eq_u16_e64 s13, 1, v7.l
+; GFX1250TRUE16-NEXT: v_cmp_eq_u16_e64 s14, 1, v7.h
+; GFX1250TRUE16-NEXT: v_cndmask_b16 v6.l, v30.l, v22.l, s10
+; GFX1250TRUE16-NEXT: v_cndmask_b16 v6.h, v30.h, v22.h, s11
+; GFX1250TRUE16-NEXT: v_cndmask_b16 v5.l, v29.l, v21.l, s12
+; GFX1250TRUE16-NEXT: v_cndmask_b16 v5.h, v29.h, v21.h, s9
+; GFX1250TRUE16-NEXT: v_cndmask_b16 v4.l, v28.l, v20.l, s8
+; GFX1250TRUE16-NEXT: v_cndmask_b16 v4.h, v28.h, v20.h, s7
+; GFX1250TRUE16-NEXT: v_cndmask_b16 v3.l, v27.l, v19.l, s6
+; GFX1250TRUE16-NEXT: v_cndmask_b16 v3.h, v27.h, v19.h, s5
+; GFX1250TRUE16-NEXT: v_cndmask_b16 v2.l, v26.l, v18.l, s4
+; GFX1250TRUE16-NEXT: v_cndmask_b16 v1.l, v25.l, v17.l, s2
+; GFX1250TRUE16-NEXT: v_cndmask_b16 v0.l, v24.l, v16.l, s0
+; GFX1250TRUE16-NEXT: v_cndmask_b16 v0.h, v24.h, v16.h, vcc_lo
+; GFX1250TRUE16-NEXT: v_cndmask_b16 v1.h, v25.h, v17.h, s1
+; GFX1250TRUE16-NEXT: v_cndmask_b16 v2.h, v26.h, v18.h, s3
+; GFX1250TRUE16-NEXT: s_wait_loadcnt 0x0
+; GFX1250TRUE16-NEXT: v_cndmask_b16 v7.l, v31.l, v23.l, s14
+; GFX1250TRUE16-NEXT: v_cndmask_b16 v7.h, v31.h, v23.h, s13
+; GFX1250TRUE16-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX1250FAKE16-LABEL: v_vselect_v16bf16:
+; GFX1250FAKE16: ; %bb.0:
+; GFX1250FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250FAKE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250FAKE16-NEXT: scratch_load_b32 v31, off, s32
+; GFX1250FAKE16-NEXT: v_dual_lshrrev_b32 v52, 16, v25 :: v_dual_bitop2_b32 v12, 1, v12 bitop3:0x40
+; GFX1250FAKE16-NEXT: v_dual_lshrrev_b32 v53, 16, v16 :: v_dual_bitop2_b32 v13, 1, v13 bitop3:0x40
+; GFX1250FAKE16-NEXT: v_dual_lshrrev_b32 v33, 16, v22 :: v_dual_bitop2_b32 v0, 1, v0 bitop3:0x40
+; GFX1250FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3)
+; GFX1250FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v12
+; GFX1250FAKE16-NEXT: v_dual_lshrrev_b32 v34, 16, v30 :: v_dual_bitop2_b32 v3, 1, v3 bitop3:0x40
+; GFX1250FAKE16-NEXT: v_dual_lshrrev_b32 v51, 16, v17 :: v_dual_bitop2_b32 v10, 1, v10 bitop3:0x40
+; GFX1250FAKE16-NEXT: v_cndmask_b32_e32 v12, v30, v22, vcc_lo
+; GFX1250FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v13
+; GFX1250FAKE16-NEXT: v_dual_lshrrev_b32 v50, 16, v26 :: v_dual_bitop2_b32 v11, 1, v11 bitop3:0x40
+; GFX1250FAKE16-NEXT: v_and_b32_e32 v14, 1, v14
+; GFX1250FAKE16-NEXT: v_dual_lshrrev_b32 v35, 16, v21 :: v_dual_bitop2_b32 v2, 1, v2 bitop3:0x40
+; GFX1250FAKE16-NEXT: v_cndmask_b32_e32 v13, v34, v33, vcc_lo
+; GFX1250FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v10
+; GFX1250FAKE16-NEXT: v_dual_lshrrev_b32 v36, 16, v29 :: v_dual_bitop2_b32 v4, 1, v4 bitop3:0x40
+; GFX1250FAKE16-NEXT: v_dual_lshrrev_b32 v49, 16, v18 :: v_dual_bitop2_b32 v8, 1, v8 bitop3:0x40
+; GFX1250FAKE16-NEXT: v_cndmask_b32_e32 v10, v29, v21, vcc_lo
+; GFX1250FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v11
+; GFX1250FAKE16-NEXT: v_dual_lshrrev_b32 v37, 16, v20 :: v_dual_bitop2_b32 v5, 1, v5 bitop3:0x40
+; GFX1250FAKE16-NEXT: v_dual_lshrrev_b32 v38, 16, v28 :: v_dual_bitop2_b32 v7, 1, v7 bitop3:0x40
+; GFX1250FAKE16-NEXT: v_dual_lshrrev_b32 v48, 16, v27 :: v_dual_bitop2_b32 v9, 1, v9 bitop3:0x40
+; GFX1250FAKE16-NEXT: v_cndmask_b32_e32 v11, v36, v35, vcc_lo
+; GFX1250FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v8
+; GFX1250FAKE16-NEXT: v_dual_lshrrev_b32 v39, 16, v19 :: v_dual_bitop2_b32 v6, 1, v6 bitop3:0x40
+; GFX1250FAKE16-NEXT: v_dual_lshrrev_b32 v32, 16, v23 :: v_dual_bitop2_b32 v1, 1, v1 bitop3:0x40
+; GFX1250FAKE16-NEXT: v_cndmask_b32_e32 v8, v28, v20, vcc_lo
+; GFX1250FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v9
+; GFX1250FAKE16-NEXT: v_dual_lshrrev_b32 v54, 16, v24 :: v_dual_bitop2_b32 v15, 1, v15 bitop3:0x40
+; GFX1250FAKE16-NEXT: v_cndmask_b32_e32 v9, v38, v37, vcc_lo
+; GFX1250FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v6
+; GFX1250FAKE16-NEXT: v_cndmask_b32_e32 v6, v27, v19, vcc_lo
+; GFX1250FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v4
+; GFX1250FAKE16-NEXT: v_cndmask_b32_e32 v4, v26, v18, vcc_lo
+; GFX1250FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v2
+; GFX1250FAKE16-NEXT: v_cndmask_b32_e32 v2, v25, v17, vcc_lo
+; GFX1250FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v3
+; GFX1250FAKE16-NEXT: v_cndmask_b32_e32 v3, v52, v51, vcc_lo
+; GFX1250FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
+; GFX1250FAKE16-NEXT: v_cndmask_b32_e32 v0, v24, v16, vcc_lo
+; GFX1250FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v1
+; GFX1250FAKE16-NEXT: v_cndmask_b32_e32 v1, v54, v53, vcc_lo
+; GFX1250FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v5
+; GFX1250FAKE16-NEXT: v_cndmask_b32_e32 v5, v50, v49, vcc_lo
+; GFX1250FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v7
+; GFX1250FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_4)
+; GFX1250FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
+; GFX1250FAKE16-NEXT: v_perm_b32 v1, v3, v2, 0x5040100
+; GFX1250FAKE16-NEXT: v_perm_b32 v2, v5, v4, 0x5040100
+; GFX1250FAKE16-NEXT: v_perm_b32 v4, v9, v8, 0x5040100
+; GFX1250FAKE16-NEXT: v_perm_b32 v5, v11, v10, 0x5040100
+; GFX1250FAKE16-NEXT: s_wait_loadcnt 0x0
+; GFX1250FAKE16-NEXT: v_lshrrev_b32_e32 v3, 16, v31
+; GFX1250FAKE16-NEXT: v_cndmask_b32_e32 v7, v48, v39, vcc_lo
+; GFX1250FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v14
+; GFX1250FAKE16-NEXT: v_cndmask_b32_e32 v14, v31, v23, vcc_lo
+; GFX1250FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v15
+; GFX1250FAKE16-NEXT: v_cndmask_b32_e32 v15, v3, v32, vcc_lo
+; GFX1250FAKE16-NEXT: v_perm_b32 v3, v7, v6, 0x5040100
+; GFX1250FAKE16-NEXT: v_perm_b32 v6, v13, v12, 0x5040100
+; GFX1250FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3)
+; GFX1250FAKE16-NEXT: v_perm_b32 v7, v15, v14, 0x5040100
+; GFX1250FAKE16-NEXT: s_set_pc_i64 s[30:31]
%op = select <16 x i1> %cond, <16 x bfloat> %a, <16 x bfloat> %b
ret <16 x bfloat> %op
}
@@ -48861,177 +50005,330 @@ define <32 x bfloat> @v_vselect_v32bf16(<32 x i1> %cond, <32 x bfloat> %a, <32 x
; GFX11FAKE16-NEXT: v_perm_b32 v15, v31, v30, 0x5040100
; GFX11FAKE16-NEXT: s_setpc_b64 s[30:31]
;
-; GFX1250-LABEL: v_vselect_v32bf16:
-; GFX1250: ; %bb.0:
-; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-NEXT: s_wait_kmcnt 0x0
-; GFX1250-NEXT: s_clause 0x1b
-; GFX1250-NEXT: scratch_load_b32 v31, off, s32 offset:60
-; GFX1250-NEXT: scratch_load_b32 v32, off, s32 offset:124
-; GFX1250-NEXT: scratch_load_u16 v33, off, s32
-; GFX1250-NEXT: scratch_load_b32 v34, off, s32 offset:128
-; GFX1250-NEXT: scratch_load_b32 v35, off, s32 offset:64
-; GFX1250-NEXT: scratch_load_b32 v36, off, s32 offset:120
-; GFX1250-NEXT: scratch_load_b32 v37, off, s32 offset:56
-; GFX1250-NEXT: scratch_load_b32 v38, off, s32 offset:116
-; GFX1250-NEXT: scratch_load_b32 v39, off, s32 offset:52
-; GFX1250-NEXT: scratch_load_b32 v48, off, s32 offset:112
-; GFX1250-NEXT: scratch_load_b32 v49, off, s32 offset:48
-; GFX1250-NEXT: scratch_load_b32 v50, off, s32 offset:108
-; GFX1250-NEXT: scratch_load_b32 v51, off, s32 offset:44
-; GFX1250-NEXT: scratch_load_b32 v52, off, s32 offset:104
-; GFX1250-NEXT: scratch_load_b32 v53, off, s32 offset:40
-; GFX1250-NEXT: scratch_load_b32 v54, off, s32 offset:100
-; GFX1250-NEXT: scratch_load_b32 v55, off, s32 offset:36
-; GFX1250-NEXT: scratch_load_b32 v64, off, s32 offset:76
-; GFX1250-NEXT: scratch_load_b32 v65, off, s32 offset:12
-; GFX1250-NEXT: scratch_load_b32 v66, off, s32 offset:96
-; GFX1250-NEXT: scratch_load_b32 v67, off, s32 offset:32
-; GFX1250-NEXT: scratch_load_b32 v68, off, s32 offset:80
-; GFX1250-NEXT: scratch_load_b32 v69, off, s32 offset:84
-; GFX1250-NEXT: scratch_load_b32 v70, off, s32 offset:92
-; GFX1250-NEXT: scratch_load_b32 v71, off, s32 offset:28
-; GFX1250-NEXT: scratch_load_b32 v80, off, s32 offset:20
-; GFX1250-NEXT: scratch_load_b32 v81, off, s32 offset:88
-; GFX1250-NEXT: scratch_load_b32 v82, off, s32 offset:24
-; GFX1250-NEXT: v_and_b32_e32 v30, 1, v30
-; GFX1250-NEXT: v_and_b32_e32 v29, 1, v29
-; GFX1250-NEXT: v_and_b32_e32 v26, 1, v26
-; GFX1250-NEXT: v_and_b32_e32 v24, 1, v24
-; GFX1250-NEXT: v_and_b32_e32 v22, 1, v22
-; GFX1250-NEXT: v_and_b32_e32 v20, 1, v20
-; GFX1250-NEXT: v_and_b32_e32 v18, 1, v18
-; GFX1250-NEXT: v_and_b32_e32 v16, 1, v16
-; GFX1250-NEXT: v_and_b32_e32 v10, 1, v10
-; GFX1250-NEXT: v_and_b32_e32 v6, 1, v6
-; GFX1250-NEXT: v_and_b32_e32 v4, 1, v4
-; GFX1250-NEXT: v_and_b32_e32 v1, 1, v1
-; GFX1250-NEXT: v_and_b32_e32 v3, 1, v3
-; GFX1250-NEXT: v_and_b32_e32 v5, 1, v5
-; GFX1250-NEXT: v_and_b32_e32 v23, 1, v23
-; GFX1250-NEXT: v_and_b32_e32 v9, 1, v9
-; GFX1250-NEXT: v_and_b32_e32 v13, 1, v13
-; GFX1250-NEXT: v_and_b32_e32 v15, 1, v15
-; GFX1250-NEXT: v_and_b32_e32 v21, 1, v21
-; GFX1250-NEXT: v_and_b32_e32 v11, 1, v11
-; GFX1250-NEXT: v_and_b32_e32 v19, 1, v19
-; GFX1250-NEXT: s_wait_loadcnt 0x1a
-; GFX1250-NEXT: v_dual_lshrrev_b32 v83, 16, v32 :: v_dual_bitop2_b32 v17, 1, v17 bitop3:0x40
-; GFX1250-NEXT: v_cmp_eq_u32_e64 s1, 1, v30
-; GFX1250-NEXT: v_and_b32_e32 v28, 1, v28
-; GFX1250-NEXT: s_wait_loadcnt 0x17
-; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX1250-NEXT: v_dual_cndmask_b32 v30, v34, v35, s1 :: v_dual_bitop2_b32 v33, 1, v33 bitop3:0x40
-; GFX1250-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v28
-; GFX1250-NEXT: v_lshrrev_b32_e32 v28, 16, v31
-; GFX1250-NEXT: v_cmp_eq_u32_e64 s0, 1, v29
-; GFX1250-NEXT: scratch_load_b32 v29, off, s32 offset:16
-; GFX1250-NEXT: v_dual_lshrrev_b32 v35, 16, v35 :: v_dual_lshrrev_b32 v34, 16, v34
-; GFX1250-NEXT: v_cndmask_b32_e32 v31, v32, v31, vcc_lo
-; GFX1250-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v33
-; GFX1250-NEXT: scratch_load_b32 v32, off, s32 offset:72
-; GFX1250-NEXT: v_cndmask_b32_e64 v28, v83, v28, s0
-; GFX1250-NEXT: scratch_load_b32 v83, off, s32 offset:4
-; GFX1250-NEXT: v_cndmask_b32_e32 v34, v34, v35, vcc_lo
-; GFX1250-NEXT: s_clause 0x1
-; GFX1250-NEXT: scratch_load_b32 v35, off, s32 offset:68
-; GFX1250-NEXT: scratch_load_b32 v33, off, s32 offset:8
-; GFX1250-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v26
-; GFX1250-NEXT: s_wait_loadcnt 0x1a
-; GFX1250-NEXT: v_dual_cndmask_b32 v26, v36, v37, vcc_lo :: v_dual_bitop2_b32 v0, 1, v0 bitop3:0x40
-; GFX1250-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v24
-; GFX1250-NEXT: v_dual_lshrrev_b32 v37, 16, v37 :: v_dual_bitop2_b32 v2, 1, v2 bitop3:0x40
-; GFX1250-NEXT: s_wait_loadcnt 0x18
-; GFX1250-NEXT: v_dual_lshrrev_b32 v36, 16, v36 :: v_dual_cndmask_b32 v24, v38, v39, vcc_lo
-; GFX1250-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v22
-; GFX1250-NEXT: v_dual_lshrrev_b32 v38, 16, v38 :: v_dual_bitop2_b32 v7, 1, v7 bitop3:0x40
-; GFX1250-NEXT: s_wait_loadcnt 0x16
-; GFX1250-NEXT: v_dual_cndmask_b32 v22, v48, v49 :: v_dual_lshrrev_b32 v39, 16, v39
-; GFX1250-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v20
-; GFX1250-NEXT: v_dual_lshrrev_b32 v49, 16, v49 :: v_dual_bitop2_b32 v8, 1, v8 bitop3:0x40
-; GFX1250-NEXT: s_wait_loadcnt 0x14
-; GFX1250-NEXT: v_dual_lshrrev_b32 v48, 16, v48 :: v_dual_cndmask_b32 v20, v50, v51, vcc_lo
-; GFX1250-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v18
-; GFX1250-NEXT: v_dual_lshrrev_b32 v51, 16, v51 :: v_dual_bitop2_b32 v12, 1, v12 bitop3:0x40
-; GFX1250-NEXT: s_wait_loadcnt 0x12
-; GFX1250-NEXT: v_dual_lshrrev_b32 v50, 16, v50 :: v_dual_cndmask_b32 v18, v52, v53, vcc_lo
-; GFX1250-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v16
-; GFX1250-NEXT: v_dual_lshrrev_b32 v53, 16, v53 :: v_dual_bitop2_b32 v14, 1, v14 bitop3:0x40
-; GFX1250-NEXT: s_wait_loadcnt 0x10
-; GFX1250-NEXT: v_dual_lshrrev_b32 v52, 16, v52 :: v_dual_cndmask_b32 v16, v54, v55, vcc_lo
-; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_2)
-; GFX1250-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v14
-; GFX1250-NEXT: v_dual_lshrrev_b32 v55, 16, v55 :: v_dual_lshrrev_b32 v54, 16, v54
-; GFX1250-NEXT: s_wait_loadcnt 0xc
-; GFX1250-NEXT: v_cndmask_b32_e32 v14, v66, v67, vcc_lo
-; GFX1250-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v12
-; GFX1250-NEXT: v_dual_lshrrev_b32 v67, 16, v67 :: v_dual_lshrrev_b32 v66, 16, v66
-; GFX1250-NEXT: s_wait_loadcnt 0x8
-; GFX1250-NEXT: v_cndmask_b32_e32 v12, v70, v71, vcc_lo
-; GFX1250-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v10
-; GFX1250-NEXT: v_dual_lshrrev_b32 v70, 16, v70 :: v_dual_bitop2_b32 v25, 1, v25 bitop3:0x40
-; GFX1250-NEXT: s_wait_loadcnt 0x5
-; GFX1250-NEXT: v_dual_cndmask_b32 v10, v81, v82 :: v_dual_lshrrev_b32 v71, 16, v71
-; GFX1250-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v8
-; GFX1250-NEXT: v_dual_lshrrev_b32 v82, 16, v82 :: v_dual_bitop2_b32 v27, 1, v27 bitop3:0x40
-; GFX1250-NEXT: v_dual_cndmask_b32 v8, v69, v80 :: v_dual_lshrrev_b32 v81, 16, v81
-; GFX1250-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v6
-; GFX1250-NEXT: v_dual_lshrrev_b32 v80, 16, v80 :: v_dual_lshrrev_b32 v69, 16, v69
-; GFX1250-NEXT: s_wait_loadcnt 0x4
-; GFX1250-NEXT: v_dual_cndmask_b32 v6, v68, v29 :: v_dual_lshrrev_b32 v29, 16, v29
-; GFX1250-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v4
-; GFX1250-NEXT: v_dual_lshrrev_b32 v68, 16, v68 :: v_dual_cndmask_b32 v4, v64, v65, vcc_lo
-; GFX1250-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v2
-; GFX1250-NEXT: v_dual_lshrrev_b32 v65, 16, v65 :: v_dual_lshrrev_b32 v64, 16, v64
-; GFX1250-NEXT: s_wait_loadcnt 0x0
-; GFX1250-NEXT: v_dual_cndmask_b32 v2, v32, v33 :: v_dual_lshrrev_b32 v33, 16, v33
-; GFX1250-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
-; GFX1250-NEXT: v_dual_lshrrev_b32 v32, 16, v32 :: v_dual_cndmask_b32 v0, v35, v83, vcc_lo
-; GFX1250-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v27
-; GFX1250-NEXT: v_dual_lshrrev_b32 v83, 16, v83 :: v_dual_cndmask_b32 v27, v36, v37, vcc_lo
-; GFX1250-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v25
-; GFX1250-NEXT: v_cndmask_b32_e32 v25, v38, v39, vcc_lo
-; GFX1250-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v23
-; GFX1250-NEXT: v_dual_lshrrev_b32 v35, 16, v35 :: v_dual_cndmask_b32 v23, v48, v49, vcc_lo
-; GFX1250-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v21
-; GFX1250-NEXT: v_cndmask_b32_e32 v21, v50, v51, vcc_lo
-; GFX1250-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v19
-; GFX1250-NEXT: v_cndmask_b32_e32 v19, v52, v53, vcc_lo
-; GFX1250-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v17
-; GFX1250-NEXT: v_cndmask_b32_e32 v17, v54, v55, vcc_lo
-; GFX1250-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v15
-; GFX1250-NEXT: v_cndmask_b32_e32 v15, v66, v67, vcc_lo
-; GFX1250-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v13
-; GFX1250-NEXT: v_cndmask_b32_e32 v13, v70, v71, vcc_lo
-; GFX1250-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v11
-; GFX1250-NEXT: v_cndmask_b32_e32 v11, v81, v82, vcc_lo
-; GFX1250-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v7
-; GFX1250-NEXT: v_cndmask_b32_e32 v7, v68, v29, vcc_lo
-; GFX1250-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v3
-; GFX1250-NEXT: v_cndmask_b32_e32 v3, v32, v33, vcc_lo
-; GFX1250-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v1
-; GFX1250-NEXT: v_cndmask_b32_e32 v1, v35, v83, vcc_lo
-; GFX1250-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v5
-; GFX1250-NEXT: v_cndmask_b32_e32 v5, v64, v65, vcc_lo
-; GFX1250-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v9
-; GFX1250-NEXT: v_cndmask_b32_e32 v9, v69, v80, vcc_lo
-; GFX1250-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
-; GFX1250-NEXT: v_perm_b32 v1, v3, v2, 0x5040100
-; GFX1250-NEXT: v_perm_b32 v2, v5, v4, 0x5040100
-; GFX1250-NEXT: v_perm_b32 v3, v7, v6, 0x5040100
-; GFX1250-NEXT: v_perm_b32 v4, v9, v8, 0x5040100
-; GFX1250-NEXT: v_perm_b32 v5, v11, v10, 0x5040100
-; GFX1250-NEXT: v_perm_b32 v6, v13, v12, 0x5040100
-; GFX1250-NEXT: v_perm_b32 v7, v15, v14, 0x5040100
-; GFX1250-NEXT: v_perm_b32 v8, v17, v16, 0x5040100
-; GFX1250-NEXT: v_perm_b32 v9, v19, v18, 0x5040100
-; GFX1250-NEXT: v_perm_b32 v10, v21, v20, 0x5040100
-; GFX1250-NEXT: v_perm_b32 v11, v23, v22, 0x5040100
-; GFX1250-NEXT: v_perm_b32 v12, v25, v24, 0x5040100
-; GFX1250-NEXT: v_perm_b32 v13, v27, v26, 0x5040100
-; GFX1250-NEXT: v_perm_b32 v14, v28, v31, 0x5040100
-; GFX1250-NEXT: v_perm_b32 v15, v34, v30, 0x5040100
-; GFX1250-NEXT: s_set_pc_i64 s[30:31]
+; GFX1250TRUE16-LABEL: v_vselect_v32bf16:
+; GFX1250TRUE16: ; %bb.0:
+; GFX1250TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250TRUE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250TRUE16-NEXT: s_clause 0x20
+; GFX1250TRUE16-NEXT: scratch_load_u16 v31, off, s32
+; GFX1250TRUE16-NEXT: scratch_load_b32 v32, off, s32 offset:68
+; GFX1250TRUE16-NEXT: scratch_load_b32 v33, off, s32 offset:72
+; GFX1250TRUE16-NEXT: scratch_load_b32 v34, off, s32 offset:76
+; GFX1250TRUE16-NEXT: scratch_load_b32 v35, off, s32 offset:124
+; GFX1250TRUE16-NEXT: scratch_load_b32 v36, off, s32 offset:128
+; GFX1250TRUE16-NEXT: scratch_load_b32 v37, off, s32 offset:64
+; GFX1250TRUE16-NEXT: scratch_load_b32 v38, off, s32 offset:60
+; GFX1250TRUE16-NEXT: scratch_load_b32 v39, off, s32 offset:120
+; GFX1250TRUE16-NEXT: scratch_load_b32 v48, off, s32 offset:56
+; GFX1250TRUE16-NEXT: scratch_load_b32 v49, off, s32 offset:116
+; GFX1250TRUE16-NEXT: scratch_load_b32 v50, off, s32 offset:52
+; GFX1250TRUE16-NEXT: scratch_load_b32 v51, off, s32 offset:112
+; GFX1250TRUE16-NEXT: scratch_load_b32 v52, off, s32 offset:48
+; GFX1250TRUE16-NEXT: scratch_load_b32 v53, off, s32 offset:108
+; GFX1250TRUE16-NEXT: scratch_load_b32 v54, off, s32 offset:44
+; GFX1250TRUE16-NEXT: scratch_load_b32 v55, off, s32 offset:104
+; GFX1250TRUE16-NEXT: scratch_load_b32 v64, off, s32 offset:40
+; GFX1250TRUE16-NEXT: scratch_load_b32 v65, off, s32 offset:100
+; GFX1250TRUE16-NEXT: scratch_load_b32 v66, off, s32 offset:36
+; GFX1250TRUE16-NEXT: scratch_load_b32 v67, off, s32 offset:96
+; GFX1250TRUE16-NEXT: scratch_load_b32 v68, off, s32 offset:32
+; GFX1250TRUE16-NEXT: scratch_load_b32 v69, off, s32 offset:92
+; GFX1250TRUE16-NEXT: scratch_load_b32 v70, off, s32 offset:28
+; GFX1250TRUE16-NEXT: scratch_load_b32 v71, off, s32 offset:88
+; GFX1250TRUE16-NEXT: scratch_load_b32 v80, off, s32 offset:24
+; GFX1250TRUE16-NEXT: scratch_load_b32 v81, off, s32 offset:84
+; GFX1250TRUE16-NEXT: scratch_load_b32 v82, off, s32 offset:20
+; GFX1250TRUE16-NEXT: scratch_load_b32 v83, off, s32 offset:80
+; GFX1250TRUE16-NEXT: scratch_load_b32 v84, off, s32 offset:16
+; GFX1250TRUE16-NEXT: scratch_load_b32 v85, off, s32 offset:12
+; GFX1250TRUE16-NEXT: scratch_load_b32 v86, off, s32 offset:8
+; GFX1250TRUE16-NEXT: scratch_load_b32 v87, off, s32 offset:4
+; GFX1250TRUE16-NEXT: v_and_b16 v0.h, 1, v1.l
+; GFX1250TRUE16-NEXT: v_and_b16 v0.l, 1, v0.l
+; GFX1250TRUE16-NEXT: v_and_b16 v1.l, 1, v3.l
+; GFX1250TRUE16-NEXT: v_and_b16 v1.h, 1, v2.l
+; GFX1250TRUE16-NEXT: v_and_b16 v2.l, 1, v9.l
+; GFX1250TRUE16-NEXT: v_cmp_eq_u16_e32 vcc_lo, 1, v0.h
+; GFX1250TRUE16-NEXT: v_and_b16 v0.h, 1, v4.l
+; GFX1250TRUE16-NEXT: v_cmp_eq_u16_e64 s1, 1, v0.l
+; GFX1250TRUE16-NEXT: v_cmp_eq_u16_e64 s0, 1, v1.l
+; GFX1250TRUE16-NEXT: v_cmp_eq_u16_e64 s2, 1, v1.h
+; GFX1250TRUE16-NEXT: v_and_b16 v0.l, 1, v5.l
+; GFX1250TRUE16-NEXT: v_and_b16 v1.l, 1, v7.l
+; GFX1250TRUE16-NEXT: v_and_b16 v1.h, 1, v6.l
+; GFX1250TRUE16-NEXT: v_and_b16 v2.h, 1, v8.l
+; GFX1250TRUE16-NEXT: v_and_b16 v3.l, 1, v11.l
+; GFX1250TRUE16-NEXT: v_and_b16 v3.h, 1, v10.l
+; GFX1250TRUE16-NEXT: v_and_b16 v4.l, 1, v13.l
+; GFX1250TRUE16-NEXT: v_and_b16 v4.h, 1, v12.l
+; GFX1250TRUE16-NEXT: v_and_b16 v5.l, 1, v15.l
+; GFX1250TRUE16-NEXT: v_and_b16 v5.h, 1, v14.l
+; GFX1250TRUE16-NEXT: v_and_b16 v6.l, 1, v17.l
+; GFX1250TRUE16-NEXT: v_and_b16 v6.h, 1, v16.l
+; GFX1250TRUE16-NEXT: v_and_b16 v7.l, 1, v19.l
+; GFX1250TRUE16-NEXT: v_and_b16 v7.h, 1, v18.l
+; GFX1250TRUE16-NEXT: v_and_b16 v8.l, 1, v21.l
+; GFX1250TRUE16-NEXT: v_and_b16 v8.h, 1, v20.l
+; GFX1250TRUE16-NEXT: v_and_b16 v9.l, 1, v23.l
+; GFX1250TRUE16-NEXT: v_and_b16 v9.h, 1, v22.l
+; GFX1250TRUE16-NEXT: v_and_b16 v10.l, 1, v25.l
+; GFX1250TRUE16-NEXT: v_and_b16 v10.h, 1, v24.l
+; GFX1250TRUE16-NEXT: v_and_b16 v11.l, 1, v27.l
+; GFX1250TRUE16-NEXT: v_and_b16 v11.h, 1, v26.l
+; GFX1250TRUE16-NEXT: v_and_b16 v12.l, 1, v29.l
+; GFX1250TRUE16-NEXT: v_and_b16 v12.h, 1, v28.l
+; GFX1250TRUE16-NEXT: v_and_b16 v13.l, 1, v30.l
+; GFX1250TRUE16-NEXT: v_cmp_eq_u16_e64 s4, 1, v0.h
+; GFX1250TRUE16-NEXT: v_cmp_eq_u16_e64 s3, 1, v0.l
+; GFX1250TRUE16-NEXT: v_cmp_eq_u16_e64 s5, 1, v1.l
+; GFX1250TRUE16-NEXT: v_cmp_eq_u16_e64 s6, 1, v1.h
+; GFX1250TRUE16-NEXT: v_cmp_eq_u16_e64 s7, 1, v2.l
+; GFX1250TRUE16-NEXT: v_cmp_eq_u16_e64 s8, 1, v2.h
+; GFX1250TRUE16-NEXT: v_cmp_eq_u16_e64 s9, 1, v3.l
+; GFX1250TRUE16-NEXT: v_cmp_eq_u16_e64 s10, 1, v3.h
+; GFX1250TRUE16-NEXT: v_cmp_eq_u16_e64 s11, 1, v4.l
+; GFX1250TRUE16-NEXT: v_cmp_eq_u16_e64 s12, 1, v4.h
+; GFX1250TRUE16-NEXT: v_cmp_eq_u16_e64 s13, 1, v5.l
+; GFX1250TRUE16-NEXT: v_cmp_eq_u16_e64 s14, 1, v5.h
+; GFX1250TRUE16-NEXT: v_cmp_eq_u16_e64 s15, 1, v6.l
+; GFX1250TRUE16-NEXT: v_cmp_eq_u16_e64 s16, 1, v6.h
+; GFX1250TRUE16-NEXT: v_cmp_eq_u16_e64 s17, 1, v7.l
+; GFX1250TRUE16-NEXT: v_cmp_eq_u16_e64 s18, 1, v7.h
+; GFX1250TRUE16-NEXT: v_cmp_eq_u16_e64 s19, 1, v8.l
+; GFX1250TRUE16-NEXT: v_cmp_eq_u16_e64 s20, 1, v8.h
+; GFX1250TRUE16-NEXT: v_cmp_eq_u16_e64 s21, 1, v9.l
+; GFX1250TRUE16-NEXT: v_cmp_eq_u16_e64 s22, 1, v9.h
+; GFX1250TRUE16-NEXT: v_cmp_eq_u16_e64 s23, 1, v10.l
+; GFX1250TRUE16-NEXT: v_cmp_eq_u16_e64 s24, 1, v10.h
+; GFX1250TRUE16-NEXT: v_cmp_eq_u16_e64 s25, 1, v11.l
+; GFX1250TRUE16-NEXT: v_cmp_eq_u16_e64 s26, 1, v13.l
+; GFX1250TRUE16-NEXT: v_cmp_eq_u16_e64 s27, 1, v12.h
+; GFX1250TRUE16-NEXT: v_cmp_eq_u16_e64 s28, 1, v12.l
+; GFX1250TRUE16-NEXT: v_cmp_eq_u16_e64 s29, 1, v11.h
+; GFX1250TRUE16-NEXT: s_wait_loadcnt 0x20
+; GFX1250TRUE16-NEXT: v_and_b16 v0.h, 1, v31.l
+; GFX1250TRUE16-NEXT: s_wait_loadcnt 0x1a
+; GFX1250TRUE16-NEXT: v_cndmask_b16 v15.l, v36.l, v37.l, s26
+; GFX1250TRUE16-NEXT: s_wait_loadcnt 0x19
+; GFX1250TRUE16-NEXT: v_cndmask_b16 v14.l, v35.l, v38.l, s27
+; GFX1250TRUE16-NEXT: v_cndmask_b16 v14.h, v35.h, v38.h, s28
+; GFX1250TRUE16-NEXT: s_wait_loadcnt 0x17
+; GFX1250TRUE16-NEXT: v_cndmask_b16 v13.l, v39.l, v48.l, s29
+; GFX1250TRUE16-NEXT: v_cndmask_b16 v13.h, v39.h, v48.h, s25
+; GFX1250TRUE16-NEXT: s_wait_loadcnt 0x15
+; GFX1250TRUE16-NEXT: v_cndmask_b16 v12.l, v49.l, v50.l, s24
+; GFX1250TRUE16-NEXT: v_cndmask_b16 v12.h, v49.h, v50.h, s23
+; GFX1250TRUE16-NEXT: s_wait_loadcnt 0x13
+; GFX1250TRUE16-NEXT: v_cndmask_b16 v11.l, v51.l, v52.l, s22
+; GFX1250TRUE16-NEXT: v_cndmask_b16 v11.h, v51.h, v52.h, s21
+; GFX1250TRUE16-NEXT: s_wait_loadcnt 0x11
+; GFX1250TRUE16-NEXT: v_cndmask_b16 v10.l, v53.l, v54.l, s20
+; GFX1250TRUE16-NEXT: v_cndmask_b16 v10.h, v53.h, v54.h, s19
+; GFX1250TRUE16-NEXT: s_wait_loadcnt 0xf
+; GFX1250TRUE16-NEXT: v_cndmask_b16 v9.l, v55.l, v64.l, s18
+; GFX1250TRUE16-NEXT: v_cndmask_b16 v9.h, v55.h, v64.h, s17
+; GFX1250TRUE16-NEXT: s_wait_loadcnt 0xd
+; GFX1250TRUE16-NEXT: v_cndmask_b16 v8.l, v65.l, v66.l, s16
+; GFX1250TRUE16-NEXT: v_cndmask_b16 v8.h, v65.h, v66.h, s15
+; GFX1250TRUE16-NEXT: s_wait_loadcnt 0xb
+; GFX1250TRUE16-NEXT: v_cndmask_b16 v7.l, v67.l, v68.l, s14
+; GFX1250TRUE16-NEXT: v_cndmask_b16 v7.h, v67.h, v68.h, s13
+; GFX1250TRUE16-NEXT: s_wait_loadcnt 0x9
+; GFX1250TRUE16-NEXT: v_cndmask_b16 v6.l, v69.l, v70.l, s12
+; GFX1250TRUE16-NEXT: v_cndmask_b16 v6.h, v69.h, v70.h, s11
+; GFX1250TRUE16-NEXT: s_wait_loadcnt 0x7
+; GFX1250TRUE16-NEXT: v_cndmask_b16 v5.l, v71.l, v80.l, s10
+; GFX1250TRUE16-NEXT: v_cndmask_b16 v5.h, v71.h, v80.h, s9
+; GFX1250TRUE16-NEXT: s_wait_loadcnt 0x5
+; GFX1250TRUE16-NEXT: v_cndmask_b16 v4.l, v81.l, v82.l, s8
+; GFX1250TRUE16-NEXT: v_cndmask_b16 v4.h, v81.h, v82.h, s7
+; GFX1250TRUE16-NEXT: s_wait_loadcnt 0x3
+; GFX1250TRUE16-NEXT: v_cndmask_b16 v3.l, v83.l, v84.l, s6
+; GFX1250TRUE16-NEXT: s_wait_loadcnt 0x2
+; GFX1250TRUE16-NEXT: v_cndmask_b16 v2.l, v34.l, v85.l, s4
+; GFX1250TRUE16-NEXT: s_wait_loadcnt 0x1
+; GFX1250TRUE16-NEXT: v_cndmask_b16 v1.l, v33.l, v86.l, s2
+; GFX1250TRUE16-NEXT: s_wait_loadcnt 0x0
+; GFX1250TRUE16-NEXT: v_cndmask_b16 v0.l, v32.l, v87.l, s1
+; GFX1250TRUE16-NEXT: v_cmp_eq_u16_e64 s1, 1, v0.h
+; GFX1250TRUE16-NEXT: v_cndmask_b16 v0.h, v32.h, v87.h, vcc_lo
+; GFX1250TRUE16-NEXT: v_cndmask_b16 v1.h, v33.h, v86.h, s0
+; GFX1250TRUE16-NEXT: v_cndmask_b16 v2.h, v34.h, v85.h, s3
+; GFX1250TRUE16-NEXT: v_cndmask_b16 v3.h, v83.h, v84.h, s5
+; GFX1250TRUE16-NEXT: v_cndmask_b16 v15.h, v36.h, v37.h, s1
+; GFX1250TRUE16-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX1250FAKE16-LABEL: v_vselect_v32bf16:
+; GFX1250FAKE16: ; %bb.0:
+; GFX1250FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250FAKE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250FAKE16-NEXT: s_clause 0x1b
+; GFX1250FAKE16-NEXT: scratch_load_b32 v31, off, s32 offset:60
+; GFX1250FAKE16-NEXT: scratch_load_b32 v32, off, s32 offset:124
+; GFX1250FAKE16-NEXT: scratch_load_u16 v33, off, s32
+; GFX1250FAKE16-NEXT: scratch_load_b32 v34, off, s32 offset:128
+; GFX1250FAKE16-NEXT: scratch_load_b32 v35, off, s32 offset:64
+; GFX1250FAKE16-NEXT: scratch_load_b32 v36, off, s32 offset:120
+; GFX1250FAKE16-NEXT: scratch_load_b32 v37, off, s32 offset:56
+; GFX1250FAKE16-NEXT: scratch_load_b32 v38, off, s32 offset:116
+; GFX1250FAKE16-NEXT: scratch_load_b32 v39, off, s32 offset:52
+; GFX1250FAKE16-NEXT: scratch_load_b32 v48, off, s32 offset:112
+; GFX1250FAKE16-NEXT: scratch_load_b32 v49, off, s32 offset:48
+; GFX1250FAKE16-NEXT: scratch_load_b32 v50, off, s32 offset:108
+; GFX1250FAKE16-NEXT: scratch_load_b32 v51, off, s32 offset:44
+; GFX1250FAKE16-NEXT: scratch_load_b32 v52, off, s32 offset:104
+; GFX1250FAKE16-NEXT: scratch_load_b32 v53, off, s32 offset:40
+; GFX1250FAKE16-NEXT: scratch_load_b32 v54, off, s32 offset:100
+; GFX1250FAKE16-NEXT: scratch_load_b32 v55, off, s32 offset:36
+; GFX1250FAKE16-NEXT: scratch_load_b32 v64, off, s32 offset:76
+; GFX1250FAKE16-NEXT: scratch_load_b32 v65, off, s32 offset:12
+; GFX1250FAKE16-NEXT: scratch_load_b32 v66, off, s32 offset:96
+; GFX1250FAKE16-NEXT: scratch_load_b32 v67, off, s32 offset:32
+; GFX1250FAKE16-NEXT: scratch_load_b32 v68, off, s32 offset:80
+; GFX1250FAKE16-NEXT: scratch_load_b32 v69, off, s32 offset:84
+; GFX1250FAKE16-NEXT: scratch_load_b32 v70, off, s32 offset:92
+; GFX1250FAKE16-NEXT: scratch_load_b32 v71, off, s32 offset:28
+; GFX1250FAKE16-NEXT: scratch_load_b32 v80, off, s32 offset:20
+; GFX1250FAKE16-NEXT: scratch_load_b32 v81, off, s32 offset:88
+; GFX1250FAKE16-NEXT: scratch_load_b32 v82, off, s32 offset:24
+; GFX1250FAKE16-NEXT: v_and_b32_e32 v30, 1, v30
+; GFX1250FAKE16-NEXT: v_and_b32_e32 v29, 1, v29
+; GFX1250FAKE16-NEXT: v_and_b32_e32 v26, 1, v26
+; GFX1250FAKE16-NEXT: v_and_b32_e32 v24, 1, v24
+; GFX1250FAKE16-NEXT: v_and_b32_e32 v22, 1, v22
+; GFX1250FAKE16-NEXT: v_and_b32_e32 v20, 1, v20
+; GFX1250FAKE16-NEXT: v_and_b32_e32 v18, 1, v18
+; GFX1250FAKE16-NEXT: v_and_b32_e32 v16, 1, v16
+; GFX1250FAKE16-NEXT: v_and_b32_e32 v10, 1, v10
+; GFX1250FAKE16-NEXT: v_and_b32_e32 v6, 1, v6
+; GFX1250FAKE16-NEXT: v_and_b32_e32 v4, 1, v4
+; GFX1250FAKE16-NEXT: v_and_b32_e32 v1, 1, v1
+; GFX1250FAKE16-NEXT: v_and_b32_e32 v3, 1, v3
+; GFX1250FAKE16-NEXT: v_and_b32_e32 v5, 1, v5
+; GFX1250FAKE16-NEXT: v_and_b32_e32 v23, 1, v23
+; GFX1250FAKE16-NEXT: v_and_b32_e32 v9, 1, v9
+; GFX1250FAKE16-NEXT: v_and_b32_e32 v13, 1, v13
+; GFX1250FAKE16-NEXT: v_and_b32_e32 v15, 1, v15
+; GFX1250FAKE16-NEXT: v_and_b32_e32 v21, 1, v21
+; GFX1250FAKE16-NEXT: v_and_b32_e32 v11, 1, v11
+; GFX1250FAKE16-NEXT: v_and_b32_e32 v19, 1, v19
+; GFX1250FAKE16-NEXT: s_wait_loadcnt 0x1a
+; GFX1250FAKE16-NEXT: v_dual_lshrrev_b32 v83, 16, v32 :: v_dual_bitop2_b32 v17, 1, v17 bitop3:0x40
+; GFX1250FAKE16-NEXT: v_cmp_eq_u32_e64 s1, 1, v30
+; GFX1250FAKE16-NEXT: v_and_b32_e32 v28, 1, v28
+; GFX1250FAKE16-NEXT: s_wait_loadcnt 0x17
+; GFX1250FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX1250FAKE16-NEXT: v_dual_cndmask_b32 v30, v34, v35, s1 :: v_dual_bitop2_b32 v33, 1, v33 bitop3:0x40
+; GFX1250FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v28
+; GFX1250FAKE16-NEXT: v_lshrrev_b32_e32 v28, 16, v31
+; GFX1250FAKE16-NEXT: v_cmp_eq_u32_e64 s0, 1, v29
+; GFX1250FAKE16-NEXT: scratch_load_b32 v29, off, s32 offset:16
+; GFX1250FAKE16-NEXT: v_dual_lshrrev_b32 v35, 16, v35 :: v_dual_lshrrev_b32 v34, 16, v34
+; GFX1250FAKE16-NEXT: v_cndmask_b32_e32 v31, v32, v31, vcc_lo
+; GFX1250FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v33
+; GFX1250FAKE16-NEXT: scratch_load_b32 v32, off, s32 offset:72
+; GFX1250FAKE16-NEXT: v_cndmask_b32_e64 v28, v83, v28, s0
+; GFX1250FAKE16-NEXT: scratch_load_b32 v83, off, s32 offset:4
+; GFX1250FAKE16-NEXT: v_cndmask_b32_e32 v34, v34, v35, vcc_lo
+; GFX1250FAKE16-NEXT: s_clause 0x1
+; GFX1250FAKE16-NEXT: scratch_load_b32 v35, off, s32 offset:68
+; GFX1250FAKE16-NEXT: scratch_load_b32 v33, off, s32 offset:8
+; GFX1250FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v26
+; GFX1250FAKE16-NEXT: s_wait_loadcnt 0x1a
+; GFX1250FAKE16-NEXT: v_dual_cndmask_b32 v26, v36, v37, vcc_lo :: v_dual_bitop2_b32 v0, 1, v0 bitop3:0x40
+; GFX1250FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v24
+; GFX1250FAKE16-NEXT: v_dual_lshrrev_b32 v37, 16, v37 :: v_dual_bitop2_b32 v2, 1, v2 bitop3:0x40
+; GFX1250FAKE16-NEXT: s_wait_loadcnt 0x18
+; GFX1250FAKE16-NEXT: v_dual_lshrrev_b32 v36, 16, v36 :: v_dual_cndmask_b32 v24, v38, v39, vcc_lo
+; GFX1250FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v22
+; GFX1250FAKE16-NEXT: v_dual_lshrrev_b32 v38, 16, v38 :: v_dual_bitop2_b32 v7, 1, v7 bitop3:0x40
+; GFX1250FAKE16-NEXT: s_wait_loadcnt 0x16
+; GFX1250FAKE16-NEXT: v_dual_cndmask_b32 v22, v48, v49 :: v_dual_lshrrev_b32 v39, 16, v39
+; GFX1250FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v20
+; GFX1250FAKE16-NEXT: v_dual_lshrrev_b32 v49, 16, v49 :: v_dual_bitop2_b32 v8, 1, v8 bitop3:0x40
+; GFX1250FAKE16-NEXT: s_wait_loadcnt 0x14
+; GFX1250FAKE16-NEXT: v_dual_lshrrev_b32 v48, 16, v48 :: v_dual_cndmask_b32 v20, v50, v51, vcc_lo
+; GFX1250FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v18
+; GFX1250FAKE16-NEXT: v_dual_lshrrev_b32 v51, 16, v51 :: v_dual_bitop2_b32 v12, 1, v12 bitop3:0x40
+; GFX1250FAKE16-NEXT: s_wait_loadcnt 0x12
+; GFX1250FAKE16-NEXT: v_dual_lshrrev_b32 v50, 16, v50 :: v_dual_cndmask_b32 v18, v52, v53, vcc_lo
+; GFX1250FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v16
+; GFX1250FAKE16-NEXT: v_dual_lshrrev_b32 v53, 16, v53 :: v_dual_bitop2_b32 v14, 1, v14 bitop3:0x40
+; GFX1250FAKE16-NEXT: s_wait_loadcnt 0x10
+; GFX1250FAKE16-NEXT: v_dual_lshrrev_b32 v52, 16, v52 :: v_dual_cndmask_b32 v16, v54, v55, vcc_lo
+; GFX1250FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2)
+; GFX1250FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v14
+; GFX1250FAKE16-NEXT: v_dual_lshrrev_b32 v55, 16, v55 :: v_dual_lshrrev_b32 v54, 16, v54
+; GFX1250FAKE16-NEXT: s_wait_loadcnt 0xc
+; GFX1250FAKE16-NEXT: v_cndmask_b32_e32 v14, v66, v67, vcc_lo
+; GFX1250FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v12
+; GFX1250FAKE16-NEXT: v_dual_lshrrev_b32 v67, 16, v67 :: v_dual_lshrrev_b32 v66, 16, v66
+; GFX1250FAKE16-NEXT: s_wait_loadcnt 0x8
+; GFX1250FAKE16-NEXT: v_cndmask_b32_e32 v12, v70, v71, vcc_lo
+; GFX1250FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v10
+; GFX1250FAKE16-NEXT: v_dual_lshrrev_b32 v70, 16, v70 :: v_dual_bitop2_b32 v25, 1, v25 bitop3:0x40
+; GFX1250FAKE16-NEXT: s_wait_loadcnt 0x5
+; GFX1250FAKE16-NEXT: v_dual_cndmask_b32 v10, v81, v82 :: v_dual_lshrrev_b32 v71, 16, v71
+; GFX1250FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v8
+; GFX1250FAKE16-NEXT: v_dual_lshrrev_b32 v82, 16, v82 :: v_dual_bitop2_b32 v27, 1, v27 bitop3:0x40
+; GFX1250FAKE16-NEXT: v_dual_cndmask_b32 v8, v69, v80 :: v_dual_lshrrev_b32 v81, 16, v81
+; GFX1250FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v6
+; GFX1250FAKE16-NEXT: v_dual_lshrrev_b32 v80, 16, v80 :: v_dual_lshrrev_b32 v69, 16, v69
+; GFX1250FAKE16-NEXT: s_wait_loadcnt 0x4
+; GFX1250FAKE16-NEXT: v_dual_cndmask_b32 v6, v68, v29 :: v_dual_lshrrev_b32 v29, 16, v29
+; GFX1250FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v4
+; GFX1250FAKE16-NEXT: v_dual_lshrrev_b32 v68, 16, v68 :: v_dual_cndmask_b32 v4, v64, v65, vcc_lo
+; GFX1250FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v2
+; GFX1250FAKE16-NEXT: v_dual_lshrrev_b32 v65, 16, v65 :: v_dual_lshrrev_b32 v64, 16, v64
+; GFX1250FAKE16-NEXT: s_wait_loadcnt 0x0
+; GFX1250FAKE16-NEXT: v_dual_cndmask_b32 v2, v32, v33 :: v_dual_lshrrev_b32 v33, 16, v33
+; GFX1250FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
+; GFX1250FAKE16-NEXT: v_dual_lshrrev_b32 v32, 16, v32 :: v_dual_cndmask_b32 v0, v35, v83, vcc_lo
+; GFX1250FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v27
+; GFX1250FAKE16-NEXT: v_dual_lshrrev_b32 v83, 16, v83 :: v_dual_cndmask_b32 v27, v36, v37, vcc_lo
+; GFX1250FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v25
+; GFX1250FAKE16-NEXT: v_cndmask_b32_e32 v25, v38, v39, vcc_lo
+; GFX1250FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v23
+; GFX1250FAKE16-NEXT: v_dual_lshrrev_b32 v35, 16, v35 :: v_dual_cndmask_b32 v23, v48, v49, vcc_lo
+; GFX1250FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v21
+; GFX1250FAKE16-NEXT: v_cndmask_b32_e32 v21, v50, v51, vcc_lo
+; GFX1250FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v19
+; GFX1250FAKE16-NEXT: v_cndmask_b32_e32 v19, v52, v53, vcc_lo
+; GFX1250FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v17
+; GFX1250FAKE16-NEXT: v_cndmask_b32_e32 v17, v54, v55, vcc_lo
+; GFX1250FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v15
+; GFX1250FAKE16-NEXT: v_cndmask_b32_e32 v15, v66, v67, vcc_lo
+; GFX1250FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v13
+; GFX1250FAKE16-NEXT: v_cndmask_b32_e32 v13, v70, v71, vcc_lo
+; GFX1250FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v11
+; GFX1250FAKE16-NEXT: v_cndmask_b32_e32 v11, v81, v82, vcc_lo
+; GFX1250FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v7
+; GFX1250FAKE16-NEXT: v_cndmask_b32_e32 v7, v68, v29, vcc_lo
+; GFX1250FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v3
+; GFX1250FAKE16-NEXT: v_cndmask_b32_e32 v3, v32, v33, vcc_lo
+; GFX1250FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v1
+; GFX1250FAKE16-NEXT: v_cndmask_b32_e32 v1, v35, v83, vcc_lo
+; GFX1250FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v5
+; GFX1250FAKE16-NEXT: v_cndmask_b32_e32 v5, v64, v65, vcc_lo
+; GFX1250FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v9
+; GFX1250FAKE16-NEXT: v_cndmask_b32_e32 v9, v69, v80, vcc_lo
+; GFX1250FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
+; GFX1250FAKE16-NEXT: v_perm_b32 v1, v3, v2, 0x5040100
+; GFX1250FAKE16-NEXT: v_perm_b32 v2, v5, v4, 0x5040100
+; GFX1250FAKE16-NEXT: v_perm_b32 v3, v7, v6, 0x5040100
+; GFX1250FAKE16-NEXT: v_perm_b32 v4, v9, v8, 0x5040100
+; GFX1250FAKE16-NEXT: v_perm_b32 v5, v11, v10, 0x5040100
+; GFX1250FAKE16-NEXT: v_perm_b32 v6, v13, v12, 0x5040100
+; GFX1250FAKE16-NEXT: v_perm_b32 v7, v15, v14, 0x5040100
+; GFX1250FAKE16-NEXT: v_perm_b32 v8, v17, v16, 0x5040100
+; GFX1250FAKE16-NEXT: v_perm_b32 v9, v19, v18, 0x5040100
+; GFX1250FAKE16-NEXT: v_perm_b32 v10, v21, v20, 0x5040100
+; GFX1250FAKE16-NEXT: v_perm_b32 v11, v23, v22, 0x5040100
+; GFX1250FAKE16-NEXT: v_perm_b32 v12, v25, v24, 0x5040100
+; GFX1250FAKE16-NEXT: v_perm_b32 v13, v27, v26, 0x5040100
+; GFX1250FAKE16-NEXT: v_perm_b32 v14, v28, v31, 0x5040100
+; GFX1250FAKE16-NEXT: v_perm_b32 v15, v34, v30, 0x5040100
+; GFX1250FAKE16-NEXT: s_set_pc_i64 s[30:31]
%op = select <32 x i1> %cond, <32 x bfloat> %a, <32 x bfloat> %b
ret <32 x bfloat> %op
}
@@ -49167,12 +50464,21 @@ define bfloat @v_fma_bf16(bfloat %a, bfloat %b, bfloat %c) {
; GFX11FAKE16-NEXT: v_lshrrev_b32_e32 v0, 16, v0
; GFX11FAKE16-NEXT: s_setpc_b64 s[30:31]
;
-; GFX1250-LABEL: v_fma_bf16:
-; GFX1250: ; %bb.0:
-; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-NEXT: s_wait_kmcnt 0x0
-; GFX1250-NEXT: v_fma_mixlo_bf16 v0, v0, v1, v2 op_sel_hi:[1,1,1]
-; GFX1250-NEXT: s_set_pc_i64 s[30:31]
+; GFX1250TRUE16-LABEL: v_fma_bf16:
+; GFX1250TRUE16: ; %bb.0:
+; GFX1250TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250TRUE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250TRUE16-NEXT: v_fma_mix_f32_bf16 v0, v0, v1, v2 op_sel_hi:[1,1,1]
+; GFX1250TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1250TRUE16-NEXT: v_cvt_pk_bf16_f32 v0, v0, s0
+; GFX1250TRUE16-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX1250FAKE16-LABEL: v_fma_bf16:
+; GFX1250FAKE16: ; %bb.0:
+; GFX1250FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250FAKE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250FAKE16-NEXT: v_fma_mixlo_bf16 v0, v0, v1, v2 op_sel_hi:[1,1,1]
+; GFX1250FAKE16-NEXT: s_set_pc_i64 s[30:31]
%op = call bfloat @llvm.fma.bf16(bfloat %a, bfloat %b, bfloat %c)
ret bfloat %op
}
@@ -54791,12 +56097,21 @@ define bfloat @v_fmuladd_bf16(bfloat %a, bfloat %b, bfloat %c) {
; GFX11FAKE16-NEXT: v_lshrrev_b32_e32 v0, 16, v0
; GFX11FAKE16-NEXT: s_setpc_b64 s[30:31]
;
-; GFX1250-LABEL: v_fmuladd_bf16:
-; GFX1250: ; %bb.0:
-; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-NEXT: s_wait_kmcnt 0x0
-; GFX1250-NEXT: v_fma_mixlo_bf16 v0, v0, v1, v2 op_sel_hi:[1,1,1]
-; GFX1250-NEXT: s_set_pc_i64 s[30:31]
+; GFX1250TRUE16-LABEL: v_fmuladd_bf16:
+; GFX1250TRUE16: ; %bb.0:
+; GFX1250TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250TRUE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250TRUE16-NEXT: v_fma_mix_f32_bf16 v0, v0, v1, v2 op_sel_hi:[1,1,1]
+; GFX1250TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1250TRUE16-NEXT: v_cvt_pk_bf16_f32 v0, v0, s0
+; GFX1250TRUE16-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX1250FAKE16-LABEL: v_fmuladd_bf16:
+; GFX1250FAKE16: ; %bb.0:
+; GFX1250FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250FAKE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250FAKE16-NEXT: v_fma_mixlo_bf16 v0, v0, v1, v2 op_sel_hi:[1,1,1]
+; GFX1250FAKE16-NEXT: s_set_pc_i64 s[30:31]
%op = call bfloat @llvm.fmuladd.bf16(bfloat %a, bfloat %b, bfloat %c)
ret bfloat %op
}
@@ -55652,5 +56967,3 @@ define <4 x bfloat> @v_fmuladd_v4bf16(<4 x bfloat> %a, <4 x bfloat> %b, <4 x bfl
%op = call <4 x bfloat> @llvm.fmuladd.v4bf16(<4 x bfloat> %a, <4 x bfloat> %b, <4 x bfloat> %c)
ret <4 x bfloat> %op
}
-;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
-; GFX1250FAKE16: {{.*}}
diff --git a/llvm/test/CodeGen/AMDGPU/calling-conventions.ll b/llvm/test/CodeGen/AMDGPU/calling-conventions.ll
index 363a248..cbf6b66 100644
--- a/llvm/test/CodeGen/AMDGPU/calling-conventions.ll
+++ b/llvm/test/CodeGen/AMDGPU/calling-conventions.ll
@@ -1262,7 +1262,7 @@ define amdgpu_ps void @ps_mesa_i16(i16 %arg0) {
; GFX1250-TRUE16-LABEL: ps_mesa_i16:
; GFX1250-TRUE16: ; %bb.0:
; GFX1250-TRUE16-NEXT: v_add_nc_u16 v0.l, v0.l, v0.l
-; GFX1250-TRUE16-NEXT: flat_store_b16 v[0:1], v0
+; GFX1250-TRUE16-NEXT: global_store_b16 v[0:1], v0, off
; GFX1250-TRUE16-NEXT: s_endpgm
;
; GFX1250-FAKE16-LABEL: ps_mesa_i16:
@@ -3013,7 +3013,7 @@ define amdgpu_cs void @amdgpu_cs_v8i1(<8 x i1> %arg0) {
; GFX1250-TRUE16-NEXT: v_lshlrev_b16 v0.h, 4, v0.h
; GFX1250-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1250-TRUE16-NEXT: v_bitop3_b16 v0.l, v0.l, v0.h, 15 bitop3:0xec
-; GFX1250-TRUE16-NEXT: flat_store_b8 v[0:1], v0
+; GFX1250-TRUE16-NEXT: global_store_b8 v[0:1], v0, off
; GFX1250-TRUE16-NEXT: s_endpgm
;
; GFX1250-FAKE16-LABEL: amdgpu_cs_v8i1:
@@ -3297,7 +3297,7 @@ define amdgpu_cs void @amdgpu_cs_v16i1(<16 x i1> %arg0) {
; GFX1250-TRUE16-NEXT: v_or_b16 v0.h, v1.h, v1.l
; GFX1250-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1250-TRUE16-NEXT: v_bitop3_b16 v0.l, v0.l, v0.h, 0xff bitop3:0xec
-; GFX1250-TRUE16-NEXT: flat_store_b16 v[0:1], v0
+; GFX1250-TRUE16-NEXT: global_store_b16 v[0:1], v0, off
; GFX1250-TRUE16-NEXT: s_endpgm
;
; GFX1250-FAKE16-LABEL: amdgpu_cs_v16i1:
diff --git a/llvm/test/CodeGen/AMDGPU/direct-indirect-call.ll b/llvm/test/CodeGen/AMDGPU/direct-indirect-call.ll
index f706f53..eb40e5c 100644
--- a/llvm/test/CodeGen/AMDGPU/direct-indirect-call.ll
+++ b/llvm/test/CodeGen/AMDGPU/direct-indirect-call.ll
@@ -35,6 +35,6 @@ define amdgpu_kernel void @test_direct_indirect_call() {
ret void
}
;.
-; CHECK: attributes #[[ATTR0]] = { "amdgpu-agpr-alloc"="0" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR0]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
; CHECK: attributes #[[ATTR1]] = { "uniform-work-group-size"="false" }
;.
diff --git a/llvm/test/CodeGen/AMDGPU/duplicate-attribute-indirect.ll b/llvm/test/CodeGen/AMDGPU/duplicate-attribute-indirect.ll
index 8da204b..c02ff28 100644
--- a/llvm/test/CodeGen/AMDGPU/duplicate-attribute-indirect.ll
+++ b/llvm/test/CodeGen/AMDGPU/duplicate-attribute-indirect.ll
@@ -28,6 +28,6 @@ define amdgpu_kernel void @test_simple_indirect_call() #0 {
attributes #0 = { "amdgpu-no-dispatch-id" }
;.
-; ATTRIBUTOR_GCN: attributes #[[ATTR0]] = { "amdgpu-agpr-alloc"="0" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
+; ATTRIBUTOR_GCN: attributes #[[ATTR0]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
; ATTRIBUTOR_GCN: attributes #[[ATTR1]] = { "amdgpu-no-dispatch-id" "uniform-work-group-size"="false" }
;.
diff --git a/llvm/test/CodeGen/AMDGPU/fptrunc.f16.ll b/llvm/test/CodeGen/AMDGPU/fptrunc.f16.ll
index 40d2765..b0dd187 100644
--- a/llvm/test/CodeGen/AMDGPU/fptrunc.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/fptrunc.f16.ll
@@ -11,9 +11,9 @@
; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=gfx1100 -global-isel=0 -mattr=-flat-for-global,-real-true16 -denormal-fp-math=preserve-sign < %s | FileCheck -enable-var-scope -check-prefixes=GFX11-SDAG-FAKE16 %s
; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=gfx1100 -global-isel=1 -mattr=-flat-for-global,+real-true16 -denormal-fp-math=preserve-sign < %s | FileCheck -enable-var-scope -check-prefixes=GFX11-GISEL-TRUE16 %s
; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=gfx1100 -global-isel=1 -mattr=-flat-for-global,-real-true16 -denormal-fp-math=preserve-sign < %s | FileCheck -enable-var-scope -check-prefixes=GFX11-GISEL-FAKE16 %s
-; TODO: FIXME-TRUE16 llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=gfx1250 -global-isel=0 -mattr=-flat-for-global,+real-true16 -denormal-fp-math=preserve-sign < %s | FileCheck -enable-var-scope -check-prefixes=GFX1250-SDAG-TRUE16 %s
+; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=gfx1250 -global-isel=0 -mattr=-flat-for-global,+real-true16 -denormal-fp-math=preserve-sign < %s | FileCheck -enable-var-scope -check-prefixes=GFX1250-SDAG-TRUE16 %s
; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=gfx1250 -global-isel=0 -mattr=-flat-for-global,-real-true16 -denormal-fp-math=preserve-sign < %s | FileCheck -enable-var-scope -check-prefixes=GFX1250-SDAG-FAKE16 %s
-; TODO: FIXME-TRUE16 llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=gfx1250 -global-isel=1 -mattr=-flat-for-global,+real-true16 -denormal-fp-math=preserve-sign < %s | FileCheck -enable-var-scope -check-prefixes=GFX1250-GISEL-TRUE16 %s
+; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=gfx1250 -global-isel=1 -mattr=-flat-for-global,+real-true16 -denormal-fp-math=preserve-sign < %s | FileCheck -enable-var-scope -check-prefixes=GFX1250-GISEL-TRUE16 %s
; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=gfx1250 -global-isel=1 -mattr=-flat-for-global,-real-true16 -denormal-fp-math=preserve-sign < %s | FileCheck -enable-var-scope -check-prefixes=GFX1250-GISEL-FAKE16 %s
define amdgpu_kernel void @fptrunc_f32_to_f16(
@@ -197,6 +197,24 @@ define amdgpu_kernel void @fptrunc_f32_to_f16(
; GFX11-GISEL-FAKE16-NEXT: buffer_store_b16 v0, off, s[0:3], 0
; GFX11-GISEL-FAKE16-NEXT: s_endpgm
;
+; GFX1250-SDAG-TRUE16-LABEL: fptrunc_f32_to_f16:
+; GFX1250-SDAG-TRUE16: ; %bb.0: ; %entry
+; GFX1250-SDAG-TRUE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
+; GFX1250-SDAG-TRUE16-NEXT: s_mov_b32 s6, -1
+; GFX1250-SDAG-TRUE16-NEXT: s_mov_b32 s7, 0x31016000
+; GFX1250-SDAG-TRUE16-NEXT: s_mov_b32 s10, s6
+; GFX1250-SDAG-TRUE16-NEXT: s_mov_b32 s11, s7
+; GFX1250-SDAG-TRUE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250-SDAG-TRUE16-NEXT: s_mov_b32 s8, s2
+; GFX1250-SDAG-TRUE16-NEXT: s_mov_b32 s9, s3
+; GFX1250-SDAG-TRUE16-NEXT: s_mov_b32 s4, s0
+; GFX1250-SDAG-TRUE16-NEXT: buffer_load_b32 v0, off, s[8:11], null
+; GFX1250-SDAG-TRUE16-NEXT: s_mov_b32 s5, s1
+; GFX1250-SDAG-TRUE16-NEXT: s_wait_loadcnt 0x0
+; GFX1250-SDAG-TRUE16-NEXT: v_cvt_f16_f32_e32 v0.l, v0
+; GFX1250-SDAG-TRUE16-NEXT: buffer_store_b16 v0, off, s[4:7], null
+; GFX1250-SDAG-TRUE16-NEXT: s_endpgm
+;
; GFX1250-SDAG-FAKE16-LABEL: fptrunc_f32_to_f16:
; GFX1250-SDAG-FAKE16: ; %bb.0: ; %entry
; GFX1250-SDAG-FAKE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
@@ -215,6 +233,21 @@ define amdgpu_kernel void @fptrunc_f32_to_f16(
; GFX1250-SDAG-FAKE16-NEXT: buffer_store_b16 v0, off, s[4:7], null
; GFX1250-SDAG-FAKE16-NEXT: s_endpgm
;
+; GFX1250-GISEL-TRUE16-LABEL: fptrunc_f32_to_f16:
+; GFX1250-GISEL-TRUE16: ; %bb.0: ; %entry
+; GFX1250-GISEL-TRUE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
+; GFX1250-GISEL-TRUE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250-GISEL-TRUE16-NEXT: s_load_b32 s2, s[2:3], 0x0
+; GFX1250-GISEL-TRUE16-NEXT: s_wait_xcnt 0x0
+; GFX1250-GISEL-TRUE16-NEXT: s_mov_b32 s3, 0x31016000
+; GFX1250-GISEL-TRUE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250-GISEL-TRUE16-NEXT: s_cvt_f16_f32 s2, s2
+; GFX1250-GISEL-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_3)
+; GFX1250-GISEL-TRUE16-NEXT: v_mov_b32_e32 v0, s2
+; GFX1250-GISEL-TRUE16-NEXT: s_mov_b32 s2, -1
+; GFX1250-GISEL-TRUE16-NEXT: buffer_store_b16 v0, off, s[0:3], null
+; GFX1250-GISEL-TRUE16-NEXT: s_endpgm
+;
; GFX1250-GISEL-FAKE16-LABEL: fptrunc_f32_to_f16:
; GFX1250-GISEL-FAKE16: ; %bb.0: ; %entry
; GFX1250-GISEL-FAKE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
@@ -419,6 +452,24 @@ define amdgpu_kernel void @fptrunc_f32_to_f16_afn(ptr addrspace(1) %r,
; GFX11-GISEL-FAKE16-NEXT: buffer_store_b16 v0, off, s[0:3], 0
; GFX11-GISEL-FAKE16-NEXT: s_endpgm
;
+; GFX1250-SDAG-TRUE16-LABEL: fptrunc_f32_to_f16_afn:
+; GFX1250-SDAG-TRUE16: ; %bb.0: ; %entry
+; GFX1250-SDAG-TRUE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
+; GFX1250-SDAG-TRUE16-NEXT: s_mov_b32 s6, -1
+; GFX1250-SDAG-TRUE16-NEXT: s_mov_b32 s7, 0x31016000
+; GFX1250-SDAG-TRUE16-NEXT: s_mov_b32 s10, s6
+; GFX1250-SDAG-TRUE16-NEXT: s_mov_b32 s11, s7
+; GFX1250-SDAG-TRUE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250-SDAG-TRUE16-NEXT: s_mov_b32 s8, s2
+; GFX1250-SDAG-TRUE16-NEXT: s_mov_b32 s9, s3
+; GFX1250-SDAG-TRUE16-NEXT: s_mov_b32 s4, s0
+; GFX1250-SDAG-TRUE16-NEXT: buffer_load_b32 v0, off, s[8:11], null
+; GFX1250-SDAG-TRUE16-NEXT: s_mov_b32 s5, s1
+; GFX1250-SDAG-TRUE16-NEXT: s_wait_loadcnt 0x0
+; GFX1250-SDAG-TRUE16-NEXT: v_cvt_f16_f32_e32 v0.l, v0
+; GFX1250-SDAG-TRUE16-NEXT: buffer_store_b16 v0, off, s[4:7], null
+; GFX1250-SDAG-TRUE16-NEXT: s_endpgm
+;
; GFX1250-SDAG-FAKE16-LABEL: fptrunc_f32_to_f16_afn:
; GFX1250-SDAG-FAKE16: ; %bb.0: ; %entry
; GFX1250-SDAG-FAKE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
@@ -437,6 +488,21 @@ define amdgpu_kernel void @fptrunc_f32_to_f16_afn(ptr addrspace(1) %r,
; GFX1250-SDAG-FAKE16-NEXT: buffer_store_b16 v0, off, s[4:7], null
; GFX1250-SDAG-FAKE16-NEXT: s_endpgm
;
+; GFX1250-GISEL-TRUE16-LABEL: fptrunc_f32_to_f16_afn:
+; GFX1250-GISEL-TRUE16: ; %bb.0: ; %entry
+; GFX1250-GISEL-TRUE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
+; GFX1250-GISEL-TRUE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250-GISEL-TRUE16-NEXT: s_load_b32 s2, s[2:3], 0x0
+; GFX1250-GISEL-TRUE16-NEXT: s_wait_xcnt 0x0
+; GFX1250-GISEL-TRUE16-NEXT: s_mov_b32 s3, 0x31016000
+; GFX1250-GISEL-TRUE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250-GISEL-TRUE16-NEXT: s_cvt_f16_f32 s2, s2
+; GFX1250-GISEL-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_3)
+; GFX1250-GISEL-TRUE16-NEXT: v_mov_b32_e32 v0, s2
+; GFX1250-GISEL-TRUE16-NEXT: s_mov_b32 s2, -1
+; GFX1250-GISEL-TRUE16-NEXT: buffer_store_b16 v0, off, s[0:3], null
+; GFX1250-GISEL-TRUE16-NEXT: s_endpgm
+;
; GFX1250-GISEL-FAKE16-LABEL: fptrunc_f32_to_f16_afn:
; GFX1250-GISEL-FAKE16: ; %bb.0: ; %entry
; GFX1250-GISEL-FAKE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
@@ -1160,6 +1226,73 @@ define amdgpu_kernel void @fptrunc_f64_to_f16(
; GFX11-GISEL-FAKE16-NEXT: buffer_store_b16 v0, off, s[0:3], 0
; GFX11-GISEL-FAKE16-NEXT: s_endpgm
;
+; GFX1250-SDAG-TRUE16-LABEL: fptrunc_f64_to_f16:
+; GFX1250-SDAG-TRUE16: ; %bb.0: ; %entry
+; GFX1250-SDAG-TRUE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
+; GFX1250-SDAG-TRUE16-NEXT: s_mov_b32 s6, -1
+; GFX1250-SDAG-TRUE16-NEXT: s_mov_b32 s7, 0x31016000
+; GFX1250-SDAG-TRUE16-NEXT: s_mov_b32 s10, s6
+; GFX1250-SDAG-TRUE16-NEXT: s_mov_b32 s11, s7
+; GFX1250-SDAG-TRUE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250-SDAG-TRUE16-NEXT: s_mov_b32 s8, s2
+; GFX1250-SDAG-TRUE16-NEXT: s_mov_b32 s9, s3
+; GFX1250-SDAG-TRUE16-NEXT: buffer_load_b64 v[0:1], off, s[8:11], null
+; GFX1250-SDAG-TRUE16-NEXT: s_wait_loadcnt 0x0
+; GFX1250-SDAG-TRUE16-NEXT: v_readfirstlane_b32 s2, v1
+; GFX1250-SDAG-TRUE16-NEXT: s_and_b32 s3, s2, 0x1ff
+; GFX1250-SDAG-TRUE16-NEXT: s_lshr_b32 s5, s2, 8
+; GFX1250-SDAG-TRUE16-NEXT: v_or_b32_e32 v0, s3, v0
+; GFX1250-SDAG-TRUE16-NEXT: s_bfe_u32 s3, s2, 0xb0014
+; GFX1250-SDAG-TRUE16-NEXT: s_and_b32 s5, s5, 0xffe
+; GFX1250-SDAG-TRUE16-NEXT: s_sub_co_i32 s4, 0x3f1, s3
+; GFX1250-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_2)
+; GFX1250-SDAG-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0
+; GFX1250-SDAG-TRUE16-NEXT: v_med3_i32 v1, s4, 0, 13
+; GFX1250-SDAG-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
+; GFX1250-SDAG-TRUE16-NEXT: v_readfirstlane_b32 s8, v1
+; GFX1250-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
+; GFX1250-SDAG-TRUE16-NEXT: v_readfirstlane_b32 s4, v0
+; GFX1250-SDAG-TRUE16-NEXT: s_or_b32 s4, s5, s4
+; GFX1250-SDAG-TRUE16-NEXT: s_or_b32 s5, s4, 0x1000
+; GFX1250-SDAG-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
+; GFX1250-SDAG-TRUE16-NEXT: s_lshr_b32 s9, s5, s8
+; GFX1250-SDAG-TRUE16-NEXT: s_lshl_b32 s8, s9, s8
+; GFX1250-SDAG-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_4) | instid1(SALU_CYCLE_1)
+; GFX1250-SDAG-TRUE16-NEXT: s_cmp_lg_u32 s8, s5
+; GFX1250-SDAG-TRUE16-NEXT: s_cselect_b32 s5, 1, 0
+; GFX1250-SDAG-TRUE16-NEXT: s_addk_co_i32 s3, 0xfc10
+; GFX1250-SDAG-TRUE16-NEXT: s_or_b32 s5, s9, s5
+; GFX1250-SDAG-TRUE16-NEXT: s_lshl_b32 s8, s3, 12
+; GFX1250-SDAG-TRUE16-NEXT: s_or_b32 s8, s4, s8
+; GFX1250-SDAG-TRUE16-NEXT: s_cmp_lt_i32 s3, 1
+; GFX1250-SDAG-TRUE16-NEXT: s_cselect_b32 s5, s5, s8
+; GFX1250-SDAG-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
+; GFX1250-SDAG-TRUE16-NEXT: s_and_b32 s8, s5, 7
+; GFX1250-SDAG-TRUE16-NEXT: s_cmp_gt_i32 s8, 5
+; GFX1250-SDAG-TRUE16-NEXT: s_cselect_b32 s9, 1, 0
+; GFX1250-SDAG-TRUE16-NEXT: s_cmp_eq_u32 s8, 3
+; GFX1250-SDAG-TRUE16-NEXT: s_cselect_b32 s8, 1, 0
+; GFX1250-SDAG-TRUE16-NEXT: s_lshr_b32 s5, s5, 2
+; GFX1250-SDAG-TRUE16-NEXT: s_or_b32 s8, s8, s9
+; GFX1250-SDAG-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
+; GFX1250-SDAG-TRUE16-NEXT: s_add_co_i32 s5, s5, s8
+; GFX1250-SDAG-TRUE16-NEXT: s_cmp_lt_i32 s3, 31
+; GFX1250-SDAG-TRUE16-NEXT: s_movk_i32 s8, 0x7e00
+; GFX1250-SDAG-TRUE16-NEXT: s_cselect_b32 s5, s5, 0x7c00
+; GFX1250-SDAG-TRUE16-NEXT: s_cmp_lg_u32 s4, 0
+; GFX1250-SDAG-TRUE16-NEXT: s_cselect_b32 s4, s8, 0x7c00
+; GFX1250-SDAG-TRUE16-NEXT: s_cmp_eq_u32 s3, 0x40f
+; GFX1250-SDAG-TRUE16-NEXT: s_cselect_b32 s3, s4, s5
+; GFX1250-SDAG-TRUE16-NEXT: s_lshr_b32 s2, s2, 16
+; GFX1250-SDAG-TRUE16-NEXT: s_mov_b32 s4, s0
+; GFX1250-SDAG-TRUE16-NEXT: s_and_b32 s2, s2, 0x8000
+; GFX1250-SDAG-TRUE16-NEXT: s_mov_b32 s5, s1
+; GFX1250-SDAG-TRUE16-NEXT: s_or_b32 s2, s2, s3
+; GFX1250-SDAG-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
+; GFX1250-SDAG-TRUE16-NEXT: v_mov_b32_e32 v0, s2
+; GFX1250-SDAG-TRUE16-NEXT: buffer_store_b16 v0, off, s[4:7], null
+; GFX1250-SDAG-TRUE16-NEXT: s_endpgm
+;
; GFX1250-SDAG-FAKE16-LABEL: fptrunc_f64_to_f16:
; GFX1250-SDAG-FAKE16: ; %bb.0: ; %entry
; GFX1250-SDAG-FAKE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
@@ -1227,6 +1360,63 @@ define amdgpu_kernel void @fptrunc_f64_to_f16(
; GFX1250-SDAG-FAKE16-NEXT: buffer_store_b16 v0, off, s[4:7], null
; GFX1250-SDAG-FAKE16-NEXT: s_endpgm
;
+; GFX1250-GISEL-TRUE16-LABEL: fptrunc_f64_to_f16:
+; GFX1250-GISEL-TRUE16: ; %bb.0: ; %entry
+; GFX1250-GISEL-TRUE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
+; GFX1250-GISEL-TRUE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250-GISEL-TRUE16-NEXT: s_load_b64 s[2:3], s[2:3], 0x0
+; GFX1250-GISEL-TRUE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250-GISEL-TRUE16-NEXT: s_and_b32 s6, s3, 0x1ff
+; GFX1250-GISEL-TRUE16-NEXT: s_bfe_u32 s4, s3, 0xb0014
+; GFX1250-GISEL-TRUE16-NEXT: s_lshr_b32 s5, s3, 8
+; GFX1250-GISEL-TRUE16-NEXT: s_or_b32 s2, s6, s2
+; GFX1250-GISEL-TRUE16-NEXT: s_addk_co_i32 s4, 0xfc10
+; GFX1250-GISEL-TRUE16-NEXT: s_and_b32 s5, s5, 0xffe
+; GFX1250-GISEL-TRUE16-NEXT: s_cmp_lg_u32 s2, 0
+; GFX1250-GISEL-TRUE16-NEXT: s_cselect_b32 s2, 1, 0
+; GFX1250-GISEL-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
+; GFX1250-GISEL-TRUE16-NEXT: s_or_b32 s2, s5, s2
+; GFX1250-GISEL-TRUE16-NEXT: s_cmp_lg_u32 s2, 0
+; GFX1250-GISEL-TRUE16-NEXT: s_cselect_b32 s5, 1, 0
+; GFX1250-GISEL-TRUE16-NEXT: s_sub_co_i32 s6, 1, s4
+; GFX1250-GISEL-TRUE16-NEXT: s_or_b32 s8, s2, 0x1000
+; GFX1250-GISEL-TRUE16-NEXT: s_max_i32 s6, s6, 0
+; GFX1250-GISEL-TRUE16-NEXT: s_lshl_b32 s7, s4, 12
+; GFX1250-GISEL-TRUE16-NEXT: s_min_i32 s6, s6, 13
+; GFX1250-GISEL-TRUE16-NEXT: s_lshl_b32 s5, s5, 9
+; GFX1250-GISEL-TRUE16-NEXT: s_lshr_b32 s9, s8, s6
+; GFX1250-GISEL-TRUE16-NEXT: s_or_b32 s2, s2, s7
+; GFX1250-GISEL-TRUE16-NEXT: s_lshl_b32 s6, s9, s6
+; GFX1250-GISEL-TRUE16-NEXT: s_or_b32 s5, s5, 0x7c00
+; GFX1250-GISEL-TRUE16-NEXT: s_cmp_lg_u32 s6, s8
+; GFX1250-GISEL-TRUE16-NEXT: s_cselect_b32 s6, 1, 0
+; GFX1250-GISEL-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_2) | instid1(SALU_CYCLE_1)
+; GFX1250-GISEL-TRUE16-NEXT: s_or_b32 s6, s9, s6
+; GFX1250-GISEL-TRUE16-NEXT: s_cmp_lt_i32 s4, 1
+; GFX1250-GISEL-TRUE16-NEXT: s_cselect_b32 s2, s6, s2
+; GFX1250-GISEL-TRUE16-NEXT: s_and_b32 s6, s2, 7
+; GFX1250-GISEL-TRUE16-NEXT: s_lshr_b32 s2, s2, 2
+; GFX1250-GISEL-TRUE16-NEXT: s_cmp_eq_u32 s6, 3
+; GFX1250-GISEL-TRUE16-NEXT: s_cselect_b32 s7, 1, 0
+; GFX1250-GISEL-TRUE16-NEXT: s_cmp_gt_i32 s6, 5
+; GFX1250-GISEL-TRUE16-NEXT: s_cselect_b32 s6, 1, 0
+; GFX1250-GISEL-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
+; GFX1250-GISEL-TRUE16-NEXT: s_or_b32 s6, s7, s6
+; GFX1250-GISEL-TRUE16-NEXT: s_add_co_i32 s2, s2, s6
+; GFX1250-GISEL-TRUE16-NEXT: s_cmp_gt_i32 s4, 30
+; GFX1250-GISEL-TRUE16-NEXT: s_cselect_b32 s2, 0x7c00, s2
+; GFX1250-GISEL-TRUE16-NEXT: s_cmp_eq_u32 s4, 0x40f
+; GFX1250-GISEL-TRUE16-NEXT: s_cselect_b32 s2, s5, s2
+; GFX1250-GISEL-TRUE16-NEXT: s_lshr_b32 s3, s3, 16
+; GFX1250-GISEL-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
+; GFX1250-GISEL-TRUE16-NEXT: s_and_b32 s3, s3, 0x8000
+; GFX1250-GISEL-TRUE16-NEXT: s_or_b32 s2, s3, s2
+; GFX1250-GISEL-TRUE16-NEXT: s_mov_b32 s3, 0x31016000
+; GFX1250-GISEL-TRUE16-NEXT: v_mov_b32_e32 v0, s2
+; GFX1250-GISEL-TRUE16-NEXT: s_mov_b32 s2, -1
+; GFX1250-GISEL-TRUE16-NEXT: buffer_store_b16 v0, off, s[0:3], null
+; GFX1250-GISEL-TRUE16-NEXT: s_endpgm
+;
; GFX1250-GISEL-FAKE16-LABEL: fptrunc_f64_to_f16:
; GFX1250-GISEL-FAKE16: ; %bb.0: ; %entry
; GFX1250-GISEL-FAKE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
@@ -1489,6 +1679,26 @@ define amdgpu_kernel void @fptrunc_f64_to_f16_afn(
; GFX11-GISEL-FAKE16-NEXT: buffer_store_b16 v0, off, s[0:3], 0
; GFX11-GISEL-FAKE16-NEXT: s_endpgm
;
+; GFX1250-SDAG-TRUE16-LABEL: fptrunc_f64_to_f16_afn:
+; GFX1250-SDAG-TRUE16: ; %bb.0: ; %entry
+; GFX1250-SDAG-TRUE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
+; GFX1250-SDAG-TRUE16-NEXT: s_mov_b32 s6, -1
+; GFX1250-SDAG-TRUE16-NEXT: s_mov_b32 s7, 0x31016000
+; GFX1250-SDAG-TRUE16-NEXT: s_mov_b32 s10, s6
+; GFX1250-SDAG-TRUE16-NEXT: s_mov_b32 s11, s7
+; GFX1250-SDAG-TRUE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250-SDAG-TRUE16-NEXT: s_mov_b32 s8, s2
+; GFX1250-SDAG-TRUE16-NEXT: s_mov_b32 s9, s3
+; GFX1250-SDAG-TRUE16-NEXT: s_mov_b32 s4, s0
+; GFX1250-SDAG-TRUE16-NEXT: buffer_load_b64 v[0:1], off, s[8:11], null
+; GFX1250-SDAG-TRUE16-NEXT: s_mov_b32 s5, s1
+; GFX1250-SDAG-TRUE16-NEXT: s_wait_loadcnt 0x0
+; GFX1250-SDAG-TRUE16-NEXT: v_cvt_f32_f64_e32 v0, v[0:1]
+; GFX1250-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-SDAG-TRUE16-NEXT: v_cvt_f16_f32_e32 v0.l, v0
+; GFX1250-SDAG-TRUE16-NEXT: buffer_store_b16 v0, off, s[4:7], null
+; GFX1250-SDAG-TRUE16-NEXT: s_endpgm
+;
; GFX1250-SDAG-FAKE16-LABEL: fptrunc_f64_to_f16_afn:
; GFX1250-SDAG-FAKE16: ; %bb.0: ; %entry
; GFX1250-SDAG-FAKE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
@@ -1509,6 +1719,20 @@ define amdgpu_kernel void @fptrunc_f64_to_f16_afn(
; GFX1250-SDAG-FAKE16-NEXT: buffer_store_b16 v0, off, s[4:7], null
; GFX1250-SDAG-FAKE16-NEXT: s_endpgm
;
+; GFX1250-GISEL-TRUE16-LABEL: fptrunc_f64_to_f16_afn:
+; GFX1250-GISEL-TRUE16: ; %bb.0: ; %entry
+; GFX1250-GISEL-TRUE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
+; GFX1250-GISEL-TRUE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250-GISEL-TRUE16-NEXT: s_load_b64 s[2:3], s[2:3], 0x0
+; GFX1250-GISEL-TRUE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250-GISEL-TRUE16-NEXT: v_cvt_f32_f64_e32 v0, s[2:3]
+; GFX1250-GISEL-TRUE16-NEXT: s_mov_b32 s2, -1
+; GFX1250-GISEL-TRUE16-NEXT: s_mov_b32 s3, 0x31016000
+; GFX1250-GISEL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-GISEL-TRUE16-NEXT: v_cvt_f16_f32_e32 v0.l, v0
+; GFX1250-GISEL-TRUE16-NEXT: buffer_store_b16 v0, off, s[0:3], null
+; GFX1250-GISEL-TRUE16-NEXT: s_endpgm
+;
; GFX1250-GISEL-FAKE16-LABEL: fptrunc_f64_to_f16_afn:
; GFX1250-GISEL-FAKE16: ; %bb.0: ; %entry
; GFX1250-GISEL-FAKE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
@@ -1740,6 +1964,24 @@ define amdgpu_kernel void @fptrunc_v2f32_to_v2f16(
; GFX11-GISEL-FAKE16-NEXT: buffer_store_b32 v0, off, s[0:3], 0
; GFX11-GISEL-FAKE16-NEXT: s_endpgm
;
+; GFX1250-SDAG-TRUE16-LABEL: fptrunc_v2f32_to_v2f16:
+; GFX1250-SDAG-TRUE16: ; %bb.0: ; %entry
+; GFX1250-SDAG-TRUE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
+; GFX1250-SDAG-TRUE16-NEXT: s_mov_b32 s6, -1
+; GFX1250-SDAG-TRUE16-NEXT: s_mov_b32 s7, 0x31016000
+; GFX1250-SDAG-TRUE16-NEXT: s_mov_b32 s10, s6
+; GFX1250-SDAG-TRUE16-NEXT: s_mov_b32 s11, s7
+; GFX1250-SDAG-TRUE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250-SDAG-TRUE16-NEXT: s_mov_b32 s8, s2
+; GFX1250-SDAG-TRUE16-NEXT: s_mov_b32 s9, s3
+; GFX1250-SDAG-TRUE16-NEXT: s_mov_b32 s4, s0
+; GFX1250-SDAG-TRUE16-NEXT: buffer_load_b64 v[0:1], off, s[8:11], null
+; GFX1250-SDAG-TRUE16-NEXT: s_mov_b32 s5, s1
+; GFX1250-SDAG-TRUE16-NEXT: s_wait_loadcnt 0x0
+; GFX1250-SDAG-TRUE16-NEXT: v_cvt_pk_f16_f32 v0, v0, v1
+; GFX1250-SDAG-TRUE16-NEXT: buffer_store_b32 v0, off, s[4:7], null
+; GFX1250-SDAG-TRUE16-NEXT: s_endpgm
+;
; GFX1250-SDAG-FAKE16-LABEL: fptrunc_v2f32_to_v2f16:
; GFX1250-SDAG-FAKE16: ; %bb.0: ; %entry
; GFX1250-SDAG-FAKE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
@@ -1758,6 +2000,20 @@ define amdgpu_kernel void @fptrunc_v2f32_to_v2f16(
; GFX1250-SDAG-FAKE16-NEXT: buffer_store_b32 v0, off, s[4:7], null
; GFX1250-SDAG-FAKE16-NEXT: s_endpgm
;
+; GFX1250-GISEL-TRUE16-LABEL: fptrunc_v2f32_to_v2f16:
+; GFX1250-GISEL-TRUE16: ; %bb.0: ; %entry
+; GFX1250-GISEL-TRUE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
+; GFX1250-GISEL-TRUE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250-GISEL-TRUE16-NEXT: s_load_b64 s[2:3], s[2:3], 0x0
+; GFX1250-GISEL-TRUE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250-GISEL-TRUE16-NEXT: v_mov_b64_e32 v[0:1], s[2:3]
+; GFX1250-GISEL-TRUE16-NEXT: s_mov_b32 s2, -1
+; GFX1250-GISEL-TRUE16-NEXT: s_mov_b32 s3, 0x31016000
+; GFX1250-GISEL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-GISEL-TRUE16-NEXT: v_cvt_pk_f16_f32 v0, v0, v1
+; GFX1250-GISEL-TRUE16-NEXT: buffer_store_b32 v0, off, s[0:3], null
+; GFX1250-GISEL-TRUE16-NEXT: s_endpgm
+;
; GFX1250-GISEL-FAKE16-LABEL: fptrunc_v2f32_to_v2f16:
; GFX1250-GISEL-FAKE16: ; %bb.0: ; %entry
; GFX1250-GISEL-FAKE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
@@ -3017,6 +3273,122 @@ define amdgpu_kernel void @fptrunc_v2f64_to_v2f16(
; GFX11-GISEL-FAKE16-NEXT: buffer_store_b32 v0, off, s[0:3], 0
; GFX11-GISEL-FAKE16-NEXT: s_endpgm
;
+; GFX1250-SDAG-TRUE16-LABEL: fptrunc_v2f64_to_v2f16:
+; GFX1250-SDAG-TRUE16: ; %bb.0: ; %entry
+; GFX1250-SDAG-TRUE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
+; GFX1250-SDAG-TRUE16-NEXT: s_mov_b32 s6, -1
+; GFX1250-SDAG-TRUE16-NEXT: s_mov_b32 s7, 0x31016000
+; GFX1250-SDAG-TRUE16-NEXT: s_mov_b32 s10, s6
+; GFX1250-SDAG-TRUE16-NEXT: s_mov_b32 s11, s7
+; GFX1250-SDAG-TRUE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250-SDAG-TRUE16-NEXT: s_mov_b32 s8, s2
+; GFX1250-SDAG-TRUE16-NEXT: s_mov_b32 s9, s3
+; GFX1250-SDAG-TRUE16-NEXT: buffer_load_b128 v[0:3], off, s[8:11], null
+; GFX1250-SDAG-TRUE16-NEXT: s_wait_loadcnt 0x0
+; GFX1250-SDAG-TRUE16-NEXT: v_readfirstlane_b32 s2, v3
+; GFX1250-SDAG-TRUE16-NEXT: s_and_b32 s3, s2, 0x1ff
+; GFX1250-SDAG-TRUE16-NEXT: s_lshr_b32 s5, s2, 8
+; GFX1250-SDAG-TRUE16-NEXT: v_or_b32_e32 v2, s3, v2
+; GFX1250-SDAG-TRUE16-NEXT: s_bfe_u32 s3, s2, 0xb0014
+; GFX1250-SDAG-TRUE16-NEXT: s_and_b32 s5, s5, 0xffe
+; GFX1250-SDAG-TRUE16-NEXT: s_sub_co_i32 s4, 0x3f1, s3
+; GFX1250-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_2)
+; GFX1250-SDAG-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v2
+; GFX1250-SDAG-TRUE16-NEXT: v_med3_i32 v3, s4, 0, 13
+; GFX1250-SDAG-TRUE16-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc_lo
+; GFX1250-SDAG-TRUE16-NEXT: v_readfirstlane_b32 s8, v3
+; GFX1250-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
+; GFX1250-SDAG-TRUE16-NEXT: v_readfirstlane_b32 s4, v2
+; GFX1250-SDAG-TRUE16-NEXT: s_or_b32 s4, s5, s4
+; GFX1250-SDAG-TRUE16-NEXT: s_or_b32 s5, s4, 0x1000
+; GFX1250-SDAG-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
+; GFX1250-SDAG-TRUE16-NEXT: s_lshr_b32 s9, s5, s8
+; GFX1250-SDAG-TRUE16-NEXT: s_lshl_b32 s8, s9, s8
+; GFX1250-SDAG-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_4) | instid1(SALU_CYCLE_1)
+; GFX1250-SDAG-TRUE16-NEXT: s_cmp_lg_u32 s8, s5
+; GFX1250-SDAG-TRUE16-NEXT: s_cselect_b32 s5, 1, 0
+; GFX1250-SDAG-TRUE16-NEXT: s_addk_co_i32 s3, 0xfc10
+; GFX1250-SDAG-TRUE16-NEXT: s_or_b32 s5, s9, s5
+; GFX1250-SDAG-TRUE16-NEXT: s_lshl_b32 s8, s3, 12
+; GFX1250-SDAG-TRUE16-NEXT: s_or_b32 s8, s4, s8
+; GFX1250-SDAG-TRUE16-NEXT: s_cmp_lt_i32 s3, 1
+; GFX1250-SDAG-TRUE16-NEXT: s_cselect_b32 s5, s5, s8
+; GFX1250-SDAG-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
+; GFX1250-SDAG-TRUE16-NEXT: s_and_b32 s8, s5, 7
+; GFX1250-SDAG-TRUE16-NEXT: s_cmp_gt_i32 s8, 5
+; GFX1250-SDAG-TRUE16-NEXT: s_cselect_b32 s9, 1, 0
+; GFX1250-SDAG-TRUE16-NEXT: s_cmp_eq_u32 s8, 3
+; GFX1250-SDAG-TRUE16-NEXT: s_cselect_b32 s8, 1, 0
+; GFX1250-SDAG-TRUE16-NEXT: s_lshr_b32 s5, s5, 2
+; GFX1250-SDAG-TRUE16-NEXT: s_or_b32 s8, s8, s9
+; GFX1250-SDAG-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
+; GFX1250-SDAG-TRUE16-NEXT: s_add_co_i32 s5, s5, s8
+; GFX1250-SDAG-TRUE16-NEXT: s_cmp_lt_i32 s3, 31
+; GFX1250-SDAG-TRUE16-NEXT: s_movk_i32 s8, 0x7e00
+; GFX1250-SDAG-TRUE16-NEXT: s_cselect_b32 s5, s5, 0x7c00
+; GFX1250-SDAG-TRUE16-NEXT: s_cmp_lg_u32 s4, 0
+; GFX1250-SDAG-TRUE16-NEXT: v_readfirstlane_b32 s4, v1
+; GFX1250-SDAG-TRUE16-NEXT: s_cselect_b32 s9, s8, 0x7c00
+; GFX1250-SDAG-TRUE16-NEXT: s_cmp_eq_u32 s3, 0x40f
+; GFX1250-SDAG-TRUE16-NEXT: s_cselect_b32 s3, s9, s5
+; GFX1250-SDAG-TRUE16-NEXT: s_and_b32 s5, s4, 0x1ff
+; GFX1250-SDAG-TRUE16-NEXT: s_lshr_b32 s10, s4, 8
+; GFX1250-SDAG-TRUE16-NEXT: v_or_b32_e32 v0, s5, v0
+; GFX1250-SDAG-TRUE16-NEXT: s_bfe_u32 s5, s4, 0xb0014
+; GFX1250-SDAG-TRUE16-NEXT: s_and_b32 s10, s10, 0xffe
+; GFX1250-SDAG-TRUE16-NEXT: s_sub_co_i32 s9, 0x3f1, s5
+; GFX1250-SDAG-TRUE16-NEXT: s_lshr_b32 s2, s2, 16
+; GFX1250-SDAG-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0
+; GFX1250-SDAG-TRUE16-NEXT: v_med3_i32 v1, s9, 0, 13
+; GFX1250-SDAG-TRUE16-NEXT: s_and_b32 s2, s2, 0x8000
+; GFX1250-SDAG-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX1250-SDAG-TRUE16-NEXT: s_or_b32 s2, s2, s3
+; GFX1250-SDAG-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
+; GFX1250-SDAG-TRUE16-NEXT: v_readfirstlane_b32 s11, v1
+; GFX1250-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
+; GFX1250-SDAG-TRUE16-NEXT: v_readfirstlane_b32 s9, v0
+; GFX1250-SDAG-TRUE16-NEXT: s_or_b32 s9, s10, s9
+; GFX1250-SDAG-TRUE16-NEXT: s_or_b32 s10, s9, 0x1000
+; GFX1250-SDAG-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
+; GFX1250-SDAG-TRUE16-NEXT: s_lshr_b32 s12, s10, s11
+; GFX1250-SDAG-TRUE16-NEXT: s_lshl_b32 s11, s12, s11
+; GFX1250-SDAG-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_4) | instid1(SALU_CYCLE_1)
+; GFX1250-SDAG-TRUE16-NEXT: s_cmp_lg_u32 s11, s10
+; GFX1250-SDAG-TRUE16-NEXT: s_cselect_b32 s3, 1, 0
+; GFX1250-SDAG-TRUE16-NEXT: s_addk_co_i32 s5, 0xfc10
+; GFX1250-SDAG-TRUE16-NEXT: s_or_b32 s3, s12, s3
+; GFX1250-SDAG-TRUE16-NEXT: s_lshl_b32 s10, s5, 12
+; GFX1250-SDAG-TRUE16-NEXT: s_or_b32 s10, s9, s10
+; GFX1250-SDAG-TRUE16-NEXT: s_cmp_lt_i32 s5, 1
+; GFX1250-SDAG-TRUE16-NEXT: s_cselect_b32 s3, s3, s10
+; GFX1250-SDAG-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
+; GFX1250-SDAG-TRUE16-NEXT: s_and_b32 s10, s3, 7
+; GFX1250-SDAG-TRUE16-NEXT: s_cmp_gt_i32 s10, 5
+; GFX1250-SDAG-TRUE16-NEXT: s_cselect_b32 s11, 1, 0
+; GFX1250-SDAG-TRUE16-NEXT: s_cmp_eq_u32 s10, 3
+; GFX1250-SDAG-TRUE16-NEXT: s_cselect_b32 s10, 1, 0
+; GFX1250-SDAG-TRUE16-NEXT: s_lshr_b32 s3, s3, 2
+; GFX1250-SDAG-TRUE16-NEXT: s_or_b32 s10, s10, s11
+; GFX1250-SDAG-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
+; GFX1250-SDAG-TRUE16-NEXT: s_add_co_i32 s3, s3, s10
+; GFX1250-SDAG-TRUE16-NEXT: s_cmp_lt_i32 s5, 31
+; GFX1250-SDAG-TRUE16-NEXT: s_cselect_b32 s3, s3, 0x7c00
+; GFX1250-SDAG-TRUE16-NEXT: s_cmp_lg_u32 s9, 0
+; GFX1250-SDAG-TRUE16-NEXT: s_cselect_b32 s8, s8, 0x7c00
+; GFX1250-SDAG-TRUE16-NEXT: s_cmp_eq_u32 s5, 0x40f
+; GFX1250-SDAG-TRUE16-NEXT: s_mov_b32 s5, s1
+; GFX1250-SDAG-TRUE16-NEXT: s_cselect_b32 s3, s8, s3
+; GFX1250-SDAG-TRUE16-NEXT: s_lshr_b32 s4, s4, 16
+; GFX1250-SDAG-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
+; GFX1250-SDAG-TRUE16-NEXT: s_and_b32 s4, s4, 0x8000
+; GFX1250-SDAG-TRUE16-NEXT: s_or_b32 s3, s4, s3
+; GFX1250-SDAG-TRUE16-NEXT: s_mov_b32 s4, s0
+; GFX1250-SDAG-TRUE16-NEXT: s_pack_ll_b32_b16 s2, s3, s2
+; GFX1250-SDAG-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
+; GFX1250-SDAG-TRUE16-NEXT: v_mov_b32_e32 v0, s2
+; GFX1250-SDAG-TRUE16-NEXT: buffer_store_b32 v0, off, s[4:7], null
+; GFX1250-SDAG-TRUE16-NEXT: s_endpgm
+;
; GFX1250-SDAG-FAKE16-LABEL: fptrunc_v2f64_to_v2f16:
; GFX1250-SDAG-FAKE16: ; %bb.0: ; %entry
; GFX1250-SDAG-FAKE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
@@ -3133,6 +3505,109 @@ define amdgpu_kernel void @fptrunc_v2f64_to_v2f16(
; GFX1250-SDAG-FAKE16-NEXT: buffer_store_b32 v0, off, s[4:7], null
; GFX1250-SDAG-FAKE16-NEXT: s_endpgm
;
+; GFX1250-GISEL-TRUE16-LABEL: fptrunc_v2f64_to_v2f16:
+; GFX1250-GISEL-TRUE16: ; %bb.0: ; %entry
+; GFX1250-GISEL-TRUE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
+; GFX1250-GISEL-TRUE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250-GISEL-TRUE16-NEXT: s_load_b128 s[4:7], s[2:3], 0x0
+; GFX1250-GISEL-TRUE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250-GISEL-TRUE16-NEXT: s_and_b32 s8, s5, 0x1ff
+; GFX1250-GISEL-TRUE16-NEXT: s_bfe_u32 s2, s5, 0xb0014
+; GFX1250-GISEL-TRUE16-NEXT: s_lshr_b32 s3, s5, 8
+; GFX1250-GISEL-TRUE16-NEXT: s_or_b32 s4, s8, s4
+; GFX1250-GISEL-TRUE16-NEXT: s_addk_co_i32 s2, 0xfc10
+; GFX1250-GISEL-TRUE16-NEXT: s_and_b32 s3, s3, 0xffe
+; GFX1250-GISEL-TRUE16-NEXT: s_cmp_lg_u32 s4, 0
+; GFX1250-GISEL-TRUE16-NEXT: s_cselect_b32 s4, 1, 0
+; GFX1250-GISEL-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
+; GFX1250-GISEL-TRUE16-NEXT: s_or_b32 s3, s3, s4
+; GFX1250-GISEL-TRUE16-NEXT: s_cmp_lg_u32 s3, 0
+; GFX1250-GISEL-TRUE16-NEXT: s_cselect_b32 s4, 1, 0
+; GFX1250-GISEL-TRUE16-NEXT: s_sub_co_i32 s8, 1, s2
+; GFX1250-GISEL-TRUE16-NEXT: s_or_b32 s10, s3, 0x1000
+; GFX1250-GISEL-TRUE16-NEXT: s_max_i32 s8, s8, 0
+; GFX1250-GISEL-TRUE16-NEXT: s_lshl_b32 s9, s2, 12
+; GFX1250-GISEL-TRUE16-NEXT: s_min_i32 s8, s8, 13
+; GFX1250-GISEL-TRUE16-NEXT: s_lshl_b32 s4, s4, 9
+; GFX1250-GISEL-TRUE16-NEXT: s_lshr_b32 s11, s10, s8
+; GFX1250-GISEL-TRUE16-NEXT: s_or_b32 s3, s3, s9
+; GFX1250-GISEL-TRUE16-NEXT: s_lshl_b32 s8, s11, s8
+; GFX1250-GISEL-TRUE16-NEXT: s_or_b32 s4, s4, 0x7c00
+; GFX1250-GISEL-TRUE16-NEXT: s_cmp_lg_u32 s8, s10
+; GFX1250-GISEL-TRUE16-NEXT: s_cselect_b32 s8, 1, 0
+; GFX1250-GISEL-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_2) | instid1(SALU_CYCLE_1)
+; GFX1250-GISEL-TRUE16-NEXT: s_or_b32 s8, s11, s8
+; GFX1250-GISEL-TRUE16-NEXT: s_cmp_lt_i32 s2, 1
+; GFX1250-GISEL-TRUE16-NEXT: s_cselect_b32 s3, s8, s3
+; GFX1250-GISEL-TRUE16-NEXT: s_and_b32 s8, s3, 7
+; GFX1250-GISEL-TRUE16-NEXT: s_lshr_b32 s3, s3, 2
+; GFX1250-GISEL-TRUE16-NEXT: s_cmp_eq_u32 s8, 3
+; GFX1250-GISEL-TRUE16-NEXT: s_cselect_b32 s9, 1, 0
+; GFX1250-GISEL-TRUE16-NEXT: s_cmp_gt_i32 s8, 5
+; GFX1250-GISEL-TRUE16-NEXT: s_cselect_b32 s8, 1, 0
+; GFX1250-GISEL-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
+; GFX1250-GISEL-TRUE16-NEXT: s_or_b32 s8, s9, s8
+; GFX1250-GISEL-TRUE16-NEXT: s_add_co_i32 s3, s3, s8
+; GFX1250-GISEL-TRUE16-NEXT: s_cmp_gt_i32 s2, 30
+; GFX1250-GISEL-TRUE16-NEXT: s_cselect_b32 s3, 0x7c00, s3
+; GFX1250-GISEL-TRUE16-NEXT: s_cmp_eq_u32 s2, 0x40f
+; GFX1250-GISEL-TRUE16-NEXT: s_cselect_b32 s2, s4, s3
+; GFX1250-GISEL-TRUE16-NEXT: s_lshr_b32 s3, s5, 16
+; GFX1250-GISEL-TRUE16-NEXT: s_and_b32 s8, s7, 0x1ff
+; GFX1250-GISEL-TRUE16-NEXT: s_bfe_u32 s4, s7, 0xb0014
+; GFX1250-GISEL-TRUE16-NEXT: s_lshr_b32 s5, s7, 8
+; GFX1250-GISEL-TRUE16-NEXT: s_and_b32 s3, s3, 0x8000
+; GFX1250-GISEL-TRUE16-NEXT: s_or_b32 s6, s8, s6
+; GFX1250-GISEL-TRUE16-NEXT: s_addk_co_i32 s4, 0xfc10
+; GFX1250-GISEL-TRUE16-NEXT: s_and_b32 s5, s5, 0xffe
+; GFX1250-GISEL-TRUE16-NEXT: s_or_b32 s2, s3, s2
+; GFX1250-GISEL-TRUE16-NEXT: s_cmp_lg_u32 s6, 0
+; GFX1250-GISEL-TRUE16-NEXT: s_cselect_b32 s3, 1, 0
+; GFX1250-GISEL-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
+; GFX1250-GISEL-TRUE16-NEXT: s_or_b32 s3, s5, s3
+; GFX1250-GISEL-TRUE16-NEXT: s_cmp_lg_u32 s3, 0
+; GFX1250-GISEL-TRUE16-NEXT: s_cselect_b32 s5, 1, 0
+; GFX1250-GISEL-TRUE16-NEXT: s_sub_co_i32 s6, 1, s4
+; GFX1250-GISEL-TRUE16-NEXT: s_or_b32 s9, s3, 0x1000
+; GFX1250-GISEL-TRUE16-NEXT: s_max_i32 s6, s6, 0
+; GFX1250-GISEL-TRUE16-NEXT: s_lshl_b32 s8, s4, 12
+; GFX1250-GISEL-TRUE16-NEXT: s_min_i32 s6, s6, 13
+; GFX1250-GISEL-TRUE16-NEXT: s_lshl_b32 s5, s5, 9
+; GFX1250-GISEL-TRUE16-NEXT: s_lshr_b32 s10, s9, s6
+; GFX1250-GISEL-TRUE16-NEXT: s_or_b32 s3, s3, s8
+; GFX1250-GISEL-TRUE16-NEXT: s_lshl_b32 s6, s10, s6
+; GFX1250-GISEL-TRUE16-NEXT: s_or_b32 s5, s5, 0x7c00
+; GFX1250-GISEL-TRUE16-NEXT: s_cmp_lg_u32 s6, s9
+; GFX1250-GISEL-TRUE16-NEXT: s_cselect_b32 s6, 1, 0
+; GFX1250-GISEL-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_2) | instid1(SALU_CYCLE_1)
+; GFX1250-GISEL-TRUE16-NEXT: s_or_b32 s6, s10, s6
+; GFX1250-GISEL-TRUE16-NEXT: s_cmp_lt_i32 s4, 1
+; GFX1250-GISEL-TRUE16-NEXT: s_cselect_b32 s3, s6, s3
+; GFX1250-GISEL-TRUE16-NEXT: s_and_b32 s6, s3, 7
+; GFX1250-GISEL-TRUE16-NEXT: s_lshr_b32 s3, s3, 2
+; GFX1250-GISEL-TRUE16-NEXT: s_cmp_eq_u32 s6, 3
+; GFX1250-GISEL-TRUE16-NEXT: s_cselect_b32 s8, 1, 0
+; GFX1250-GISEL-TRUE16-NEXT: s_cmp_gt_i32 s6, 5
+; GFX1250-GISEL-TRUE16-NEXT: s_cselect_b32 s6, 1, 0
+; GFX1250-GISEL-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
+; GFX1250-GISEL-TRUE16-NEXT: s_or_b32 s6, s8, s6
+; GFX1250-GISEL-TRUE16-NEXT: s_add_co_i32 s3, s3, s6
+; GFX1250-GISEL-TRUE16-NEXT: s_cmp_gt_i32 s4, 30
+; GFX1250-GISEL-TRUE16-NEXT: s_cselect_b32 s3, 0x7c00, s3
+; GFX1250-GISEL-TRUE16-NEXT: s_cmp_eq_u32 s4, 0x40f
+; GFX1250-GISEL-TRUE16-NEXT: s_cselect_b32 s3, s5, s3
+; GFX1250-GISEL-TRUE16-NEXT: s_lshr_b32 s4, s7, 16
+; GFX1250-GISEL-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
+; GFX1250-GISEL-TRUE16-NEXT: s_and_b32 s4, s4, 0x8000
+; GFX1250-GISEL-TRUE16-NEXT: s_or_b32 s3, s4, s3
+; GFX1250-GISEL-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
+; GFX1250-GISEL-TRUE16-NEXT: s_pack_ll_b32_b16 s2, s2, s3
+; GFX1250-GISEL-TRUE16-NEXT: s_mov_b32 s3, 0x31016000
+; GFX1250-GISEL-TRUE16-NEXT: v_mov_b32_e32 v0, s2
+; GFX1250-GISEL-TRUE16-NEXT: s_mov_b32 s2, -1
+; GFX1250-GISEL-TRUE16-NEXT: buffer_store_b32 v0, off, s[0:3], null
+; GFX1250-GISEL-TRUE16-NEXT: s_endpgm
+;
; GFX1250-GISEL-FAKE16-LABEL: fptrunc_v2f64_to_v2f16:
; GFX1250-GISEL-FAKE16: ; %bb.0: ; %entry
; GFX1250-GISEL-FAKE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
@@ -3481,6 +3956,27 @@ define amdgpu_kernel void @fptrunc_v2f64_to_v2f16_afn(
; GFX11-GISEL-FAKE16-NEXT: buffer_store_b32 v0, off, s[0:3], 0
; GFX11-GISEL-FAKE16-NEXT: s_endpgm
;
+; GFX1250-SDAG-TRUE16-LABEL: fptrunc_v2f64_to_v2f16_afn:
+; GFX1250-SDAG-TRUE16: ; %bb.0: ; %entry
+; GFX1250-SDAG-TRUE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
+; GFX1250-SDAG-TRUE16-NEXT: s_mov_b32 s6, -1
+; GFX1250-SDAG-TRUE16-NEXT: s_mov_b32 s7, 0x31016000
+; GFX1250-SDAG-TRUE16-NEXT: s_mov_b32 s10, s6
+; GFX1250-SDAG-TRUE16-NEXT: s_mov_b32 s11, s7
+; GFX1250-SDAG-TRUE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250-SDAG-TRUE16-NEXT: s_mov_b32 s8, s2
+; GFX1250-SDAG-TRUE16-NEXT: s_mov_b32 s9, s3
+; GFX1250-SDAG-TRUE16-NEXT: s_mov_b32 s4, s0
+; GFX1250-SDAG-TRUE16-NEXT: buffer_load_b128 v[0:3], off, s[8:11], null
+; GFX1250-SDAG-TRUE16-NEXT: s_mov_b32 s5, s1
+; GFX1250-SDAG-TRUE16-NEXT: s_wait_loadcnt 0x0
+; GFX1250-SDAG-TRUE16-NEXT: v_cvt_f32_f64_e32 v2, v[2:3]
+; GFX1250-SDAG-TRUE16-NEXT: v_cvt_f32_f64_e32 v0, v[0:1]
+; GFX1250-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-SDAG-TRUE16-NEXT: v_cvt_pk_f16_f32 v0, v0, v2
+; GFX1250-SDAG-TRUE16-NEXT: buffer_store_b32 v0, off, s[4:7], null
+; GFX1250-SDAG-TRUE16-NEXT: s_endpgm
+;
; GFX1250-SDAG-FAKE16-LABEL: fptrunc_v2f64_to_v2f16_afn:
; GFX1250-SDAG-FAKE16: ; %bb.0: ; %entry
; GFX1250-SDAG-FAKE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
@@ -3502,6 +3998,25 @@ define amdgpu_kernel void @fptrunc_v2f64_to_v2f16_afn(
; GFX1250-SDAG-FAKE16-NEXT: buffer_store_b32 v0, off, s[4:7], null
; GFX1250-SDAG-FAKE16-NEXT: s_endpgm
;
+; GFX1250-GISEL-TRUE16-LABEL: fptrunc_v2f64_to_v2f16_afn:
+; GFX1250-GISEL-TRUE16: ; %bb.0: ; %entry
+; GFX1250-GISEL-TRUE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
+; GFX1250-GISEL-TRUE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250-GISEL-TRUE16-NEXT: s_load_b128 s[4:7], s[2:3], 0x0
+; GFX1250-GISEL-TRUE16-NEXT: s_wait_xcnt 0x0
+; GFX1250-GISEL-TRUE16-NEXT: s_mov_b32 s2, -1
+; GFX1250-GISEL-TRUE16-NEXT: s_mov_b32 s3, 0x31016000
+; GFX1250-GISEL-TRUE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250-GISEL-TRUE16-NEXT: v_cvt_f32_f64_e32 v0, s[4:5]
+; GFX1250-GISEL-TRUE16-NEXT: v_cvt_f32_f64_e32 v1, s[6:7]
+; GFX1250-GISEL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX1250-GISEL-TRUE16-NEXT: v_cvt_f16_f32_e32 v0.l, v0
+; GFX1250-GISEL-TRUE16-NEXT: v_cvt_f16_f32_e32 v0.h, v1
+; GFX1250-GISEL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-GISEL-TRUE16-NEXT: v_pack_b32_f16 v0, v0.l, v0.h
+; GFX1250-GISEL-TRUE16-NEXT: buffer_store_b32 v0, off, s[0:3], null
+; GFX1250-GISEL-TRUE16-NEXT: s_endpgm
+;
; GFX1250-GISEL-FAKE16-LABEL: fptrunc_v2f64_to_v2f16_afn:
; GFX1250-GISEL-FAKE16: ; %bb.0: ; %entry
; GFX1250-GISEL-FAKE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
@@ -3710,6 +4225,26 @@ define amdgpu_kernel void @fneg_fptrunc_f32_to_f16(
; GFX11-GISEL-FAKE16-NEXT: buffer_store_b16 v0, off, s[0:3], 0
; GFX11-GISEL-FAKE16-NEXT: s_endpgm
;
+; GFX1250-SDAG-TRUE16-LABEL: fneg_fptrunc_f32_to_f16:
+; GFX1250-SDAG-TRUE16: ; %bb.0: ; %entry
+; GFX1250-SDAG-TRUE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
+; GFX1250-SDAG-TRUE16-NEXT: s_mov_b32 s6, -1
+; GFX1250-SDAG-TRUE16-NEXT: s_mov_b32 s7, 0x31016000
+; GFX1250-SDAG-TRUE16-NEXT: s_mov_b32 s10, s6
+; GFX1250-SDAG-TRUE16-NEXT: s_mov_b32 s11, s7
+; GFX1250-SDAG-TRUE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250-SDAG-TRUE16-NEXT: s_mov_b32 s8, s2
+; GFX1250-SDAG-TRUE16-NEXT: s_mov_b32 s9, s3
+; GFX1250-SDAG-TRUE16-NEXT: s_mov_b32 s4, s0
+; GFX1250-SDAG-TRUE16-NEXT: buffer_load_b32 v0, off, s[8:11], null
+; GFX1250-SDAG-TRUE16-NEXT: s_mov_b32 s5, s1
+; GFX1250-SDAG-TRUE16-NEXT: s_wait_loadcnt 0x0
+; GFX1250-SDAG-TRUE16-NEXT: v_xor_b32_e32 v0, 0x80000000, v0
+; GFX1250-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-SDAG-TRUE16-NEXT: v_cvt_f16_f32_e32 v0.l, v0
+; GFX1250-SDAG-TRUE16-NEXT: buffer_store_b16 v0, off, s[4:7], null
+; GFX1250-SDAG-TRUE16-NEXT: s_endpgm
+;
; GFX1250-SDAG-FAKE16-LABEL: fneg_fptrunc_f32_to_f16:
; GFX1250-SDAG-FAKE16: ; %bb.0: ; %entry
; GFX1250-SDAG-FAKE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
@@ -3730,6 +4265,22 @@ define amdgpu_kernel void @fneg_fptrunc_f32_to_f16(
; GFX1250-SDAG-FAKE16-NEXT: buffer_store_b16 v0, off, s[4:7], null
; GFX1250-SDAG-FAKE16-NEXT: s_endpgm
;
+; GFX1250-GISEL-TRUE16-LABEL: fneg_fptrunc_f32_to_f16:
+; GFX1250-GISEL-TRUE16: ; %bb.0: ; %entry
+; GFX1250-GISEL-TRUE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
+; GFX1250-GISEL-TRUE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250-GISEL-TRUE16-NEXT: s_load_b32 s2, s[2:3], 0x0
+; GFX1250-GISEL-TRUE16-NEXT: s_wait_xcnt 0x0
+; GFX1250-GISEL-TRUE16-NEXT: s_mov_b32 s3, 0x31016000
+; GFX1250-GISEL-TRUE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250-GISEL-TRUE16-NEXT: s_xor_b32 s2, s2, 0x80000000
+; GFX1250-GISEL-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_3)
+; GFX1250-GISEL-TRUE16-NEXT: s_cvt_f16_f32 s2, s2
+; GFX1250-GISEL-TRUE16-NEXT: v_mov_b32_e32 v0, s2
+; GFX1250-GISEL-TRUE16-NEXT: s_mov_b32 s2, -1
+; GFX1250-GISEL-TRUE16-NEXT: buffer_store_b16 v0, off, s[0:3], null
+; GFX1250-GISEL-TRUE16-NEXT: s_endpgm
+;
; GFX1250-GISEL-FAKE16-LABEL: fneg_fptrunc_f32_to_f16:
; GFX1250-GISEL-FAKE16: ; %bb.0: ; %entry
; GFX1250-GISEL-FAKE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
@@ -3936,6 +4487,26 @@ define amdgpu_kernel void @fabs_fptrunc_f32_to_f16(
; GFX11-GISEL-FAKE16-NEXT: buffer_store_b16 v0, off, s[0:3], 0
; GFX11-GISEL-FAKE16-NEXT: s_endpgm
;
+; GFX1250-SDAG-TRUE16-LABEL: fabs_fptrunc_f32_to_f16:
+; GFX1250-SDAG-TRUE16: ; %bb.0: ; %entry
+; GFX1250-SDAG-TRUE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
+; GFX1250-SDAG-TRUE16-NEXT: s_mov_b32 s6, -1
+; GFX1250-SDAG-TRUE16-NEXT: s_mov_b32 s7, 0x31016000
+; GFX1250-SDAG-TRUE16-NEXT: s_mov_b32 s10, s6
+; GFX1250-SDAG-TRUE16-NEXT: s_mov_b32 s11, s7
+; GFX1250-SDAG-TRUE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250-SDAG-TRUE16-NEXT: s_mov_b32 s8, s2
+; GFX1250-SDAG-TRUE16-NEXT: s_mov_b32 s9, s3
+; GFX1250-SDAG-TRUE16-NEXT: s_mov_b32 s4, s0
+; GFX1250-SDAG-TRUE16-NEXT: buffer_load_b32 v0, off, s[8:11], null
+; GFX1250-SDAG-TRUE16-NEXT: s_mov_b32 s5, s1
+; GFX1250-SDAG-TRUE16-NEXT: s_wait_loadcnt 0x0
+; GFX1250-SDAG-TRUE16-NEXT: v_and_b32_e32 v0, 0x7fffffff, v0
+; GFX1250-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-SDAG-TRUE16-NEXT: v_cvt_f16_f32_e32 v0.l, v0
+; GFX1250-SDAG-TRUE16-NEXT: buffer_store_b16 v0, off, s[4:7], null
+; GFX1250-SDAG-TRUE16-NEXT: s_endpgm
+;
; GFX1250-SDAG-FAKE16-LABEL: fabs_fptrunc_f32_to_f16:
; GFX1250-SDAG-FAKE16: ; %bb.0: ; %entry
; GFX1250-SDAG-FAKE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
@@ -3956,6 +4527,22 @@ define amdgpu_kernel void @fabs_fptrunc_f32_to_f16(
; GFX1250-SDAG-FAKE16-NEXT: buffer_store_b16 v0, off, s[4:7], null
; GFX1250-SDAG-FAKE16-NEXT: s_endpgm
;
+; GFX1250-GISEL-TRUE16-LABEL: fabs_fptrunc_f32_to_f16:
+; GFX1250-GISEL-TRUE16: ; %bb.0: ; %entry
+; GFX1250-GISEL-TRUE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
+; GFX1250-GISEL-TRUE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250-GISEL-TRUE16-NEXT: s_load_b32 s2, s[2:3], 0x0
+; GFX1250-GISEL-TRUE16-NEXT: s_wait_xcnt 0x0
+; GFX1250-GISEL-TRUE16-NEXT: s_mov_b32 s3, 0x31016000
+; GFX1250-GISEL-TRUE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250-GISEL-TRUE16-NEXT: s_bitset0_b32 s2, 31
+; GFX1250-GISEL-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_3)
+; GFX1250-GISEL-TRUE16-NEXT: s_cvt_f16_f32 s2, s2
+; GFX1250-GISEL-TRUE16-NEXT: v_mov_b32_e32 v0, s2
+; GFX1250-GISEL-TRUE16-NEXT: s_mov_b32 s2, -1
+; GFX1250-GISEL-TRUE16-NEXT: buffer_store_b16 v0, off, s[0:3], null
+; GFX1250-GISEL-TRUE16-NEXT: s_endpgm
+;
; GFX1250-GISEL-FAKE16-LABEL: fabs_fptrunc_f32_to_f16:
; GFX1250-GISEL-FAKE16: ; %bb.0: ; %entry
; GFX1250-GISEL-FAKE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
@@ -4162,6 +4749,26 @@ define amdgpu_kernel void @fneg_fabs_fptrunc_f32_to_f16(
; GFX11-GISEL-FAKE16-NEXT: buffer_store_b16 v0, off, s[0:3], 0
; GFX11-GISEL-FAKE16-NEXT: s_endpgm
;
+; GFX1250-SDAG-TRUE16-LABEL: fneg_fabs_fptrunc_f32_to_f16:
+; GFX1250-SDAG-TRUE16: ; %bb.0: ; %entry
+; GFX1250-SDAG-TRUE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
+; GFX1250-SDAG-TRUE16-NEXT: s_mov_b32 s6, -1
+; GFX1250-SDAG-TRUE16-NEXT: s_mov_b32 s7, 0x31016000
+; GFX1250-SDAG-TRUE16-NEXT: s_mov_b32 s10, s6
+; GFX1250-SDAG-TRUE16-NEXT: s_mov_b32 s11, s7
+; GFX1250-SDAG-TRUE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250-SDAG-TRUE16-NEXT: s_mov_b32 s8, s2
+; GFX1250-SDAG-TRUE16-NEXT: s_mov_b32 s9, s3
+; GFX1250-SDAG-TRUE16-NEXT: s_mov_b32 s4, s0
+; GFX1250-SDAG-TRUE16-NEXT: buffer_load_b32 v0, off, s[8:11], null
+; GFX1250-SDAG-TRUE16-NEXT: s_mov_b32 s5, s1
+; GFX1250-SDAG-TRUE16-NEXT: s_wait_loadcnt 0x0
+; GFX1250-SDAG-TRUE16-NEXT: v_or_b32_e32 v0, 0x80000000, v0
+; GFX1250-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-SDAG-TRUE16-NEXT: v_cvt_f16_f32_e32 v0.l, v0
+; GFX1250-SDAG-TRUE16-NEXT: buffer_store_b16 v0, off, s[4:7], null
+; GFX1250-SDAG-TRUE16-NEXT: s_endpgm
+;
; GFX1250-SDAG-FAKE16-LABEL: fneg_fabs_fptrunc_f32_to_f16:
; GFX1250-SDAG-FAKE16: ; %bb.0: ; %entry
; GFX1250-SDAG-FAKE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
@@ -4182,6 +4789,22 @@ define amdgpu_kernel void @fneg_fabs_fptrunc_f32_to_f16(
; GFX1250-SDAG-FAKE16-NEXT: buffer_store_b16 v0, off, s[4:7], null
; GFX1250-SDAG-FAKE16-NEXT: s_endpgm
;
+; GFX1250-GISEL-TRUE16-LABEL: fneg_fabs_fptrunc_f32_to_f16:
+; GFX1250-GISEL-TRUE16: ; %bb.0: ; %entry
+; GFX1250-GISEL-TRUE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
+; GFX1250-GISEL-TRUE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250-GISEL-TRUE16-NEXT: s_load_b32 s2, s[2:3], 0x0
+; GFX1250-GISEL-TRUE16-NEXT: s_wait_xcnt 0x0
+; GFX1250-GISEL-TRUE16-NEXT: s_mov_b32 s3, 0x31016000
+; GFX1250-GISEL-TRUE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250-GISEL-TRUE16-NEXT: s_bitset1_b32 s2, 31
+; GFX1250-GISEL-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_3)
+; GFX1250-GISEL-TRUE16-NEXT: s_cvt_f16_f32 s2, s2
+; GFX1250-GISEL-TRUE16-NEXT: v_mov_b32_e32 v0, s2
+; GFX1250-GISEL-TRUE16-NEXT: s_mov_b32 s2, -1
+; GFX1250-GISEL-TRUE16-NEXT: buffer_store_b16 v0, off, s[0:3], null
+; GFX1250-GISEL-TRUE16-NEXT: s_endpgm
+;
; GFX1250-GISEL-FAKE16-LABEL: fneg_fabs_fptrunc_f32_to_f16:
; GFX1250-GISEL-FAKE16: ; %bb.0: ; %entry
; GFX1250-GISEL-FAKE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
@@ -4396,6 +5019,26 @@ define amdgpu_kernel void @fptrunc_f32_to_f16_zext_i32(
; GFX11-GISEL-FAKE16-NEXT: buffer_store_b32 v0, off, s[0:3], 0
; GFX11-GISEL-FAKE16-NEXT: s_endpgm
;
+; GFX1250-SDAG-TRUE16-LABEL: fptrunc_f32_to_f16_zext_i32:
+; GFX1250-SDAG-TRUE16: ; %bb.0: ; %entry
+; GFX1250-SDAG-TRUE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
+; GFX1250-SDAG-TRUE16-NEXT: s_mov_b32 s6, -1
+; GFX1250-SDAG-TRUE16-NEXT: s_mov_b32 s7, 0x31016000
+; GFX1250-SDAG-TRUE16-NEXT: s_mov_b32 s10, s6
+; GFX1250-SDAG-TRUE16-NEXT: s_mov_b32 s11, s7
+; GFX1250-SDAG-TRUE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250-SDAG-TRUE16-NEXT: s_mov_b32 s8, s2
+; GFX1250-SDAG-TRUE16-NEXT: s_mov_b32 s9, s3
+; GFX1250-SDAG-TRUE16-NEXT: s_mov_b32 s4, s0
+; GFX1250-SDAG-TRUE16-NEXT: buffer_load_b32 v0, off, s[8:11], null
+; GFX1250-SDAG-TRUE16-NEXT: s_mov_b32 s5, s1
+; GFX1250-SDAG-TRUE16-NEXT: s_wait_loadcnt 0x0
+; GFX1250-SDAG-TRUE16-NEXT: v_cvt_f16_f32_e32 v0.l, v0
+; GFX1250-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-SDAG-TRUE16-NEXT: v_and_b32_e32 v0, 0xffff, v0
+; GFX1250-SDAG-TRUE16-NEXT: buffer_store_b32 v0, off, s[4:7], null
+; GFX1250-SDAG-TRUE16-NEXT: s_endpgm
+;
; GFX1250-SDAG-FAKE16-LABEL: fptrunc_f32_to_f16_zext_i32:
; GFX1250-SDAG-FAKE16: ; %bb.0: ; %entry
; GFX1250-SDAG-FAKE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
@@ -4416,6 +5059,22 @@ define amdgpu_kernel void @fptrunc_f32_to_f16_zext_i32(
; GFX1250-SDAG-FAKE16-NEXT: buffer_store_b32 v0, off, s[4:7], null
; GFX1250-SDAG-FAKE16-NEXT: s_endpgm
;
+; GFX1250-GISEL-TRUE16-LABEL: fptrunc_f32_to_f16_zext_i32:
+; GFX1250-GISEL-TRUE16: ; %bb.0: ; %entry
+; GFX1250-GISEL-TRUE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
+; GFX1250-GISEL-TRUE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250-GISEL-TRUE16-NEXT: s_load_b32 s2, s[2:3], 0x0
+; GFX1250-GISEL-TRUE16-NEXT: s_wait_xcnt 0x0
+; GFX1250-GISEL-TRUE16-NEXT: s_mov_b32 s3, 0x31016000
+; GFX1250-GISEL-TRUE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250-GISEL-TRUE16-NEXT: s_cvt_f16_f32 s2, s2
+; GFX1250-GISEL-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_3) | instskip(NEXT) | instid1(SALU_CYCLE_1)
+; GFX1250-GISEL-TRUE16-NEXT: s_and_b32 s2, 0xffff, s2
+; GFX1250-GISEL-TRUE16-NEXT: v_mov_b32_e32 v0, s2
+; GFX1250-GISEL-TRUE16-NEXT: s_mov_b32 s2, -1
+; GFX1250-GISEL-TRUE16-NEXT: buffer_store_b32 v0, off, s[0:3], null
+; GFX1250-GISEL-TRUE16-NEXT: s_endpgm
+;
; GFX1250-GISEL-FAKE16-LABEL: fptrunc_f32_to_f16_zext_i32:
; GFX1250-GISEL-FAKE16: ; %bb.0: ; %entry
; GFX1250-GISEL-FAKE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
@@ -4630,6 +5289,27 @@ define amdgpu_kernel void @fptrunc_fabs_f32_to_f16_zext_i32(
; GFX11-GISEL-FAKE16-NEXT: buffer_store_b32 v0, off, s[0:3], 0
; GFX11-GISEL-FAKE16-NEXT: s_endpgm
;
+; GFX1250-SDAG-TRUE16-LABEL: fptrunc_fabs_f32_to_f16_zext_i32:
+; GFX1250-SDAG-TRUE16: ; %bb.0: ; %entry
+; GFX1250-SDAG-TRUE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
+; GFX1250-SDAG-TRUE16-NEXT: s_mov_b32 s6, -1
+; GFX1250-SDAG-TRUE16-NEXT: s_mov_b32 s7, 0x31016000
+; GFX1250-SDAG-TRUE16-NEXT: s_mov_b32 s10, s6
+; GFX1250-SDAG-TRUE16-NEXT: s_mov_b32 s11, s7
+; GFX1250-SDAG-TRUE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250-SDAG-TRUE16-NEXT: s_mov_b32 s8, s2
+; GFX1250-SDAG-TRUE16-NEXT: s_mov_b32 s9, s3
+; GFX1250-SDAG-TRUE16-NEXT: s_mov_b32 s4, s0
+; GFX1250-SDAG-TRUE16-NEXT: buffer_load_b32 v0, off, s[8:11], null
+; GFX1250-SDAG-TRUE16-NEXT: s_mov_b32 s5, s1
+; GFX1250-SDAG-TRUE16-NEXT: s_wait_loadcnt 0x0
+; GFX1250-SDAG-TRUE16-NEXT: v_and_b32_e32 v0, 0x7fffffff, v0
+; GFX1250-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1250-SDAG-TRUE16-NEXT: v_cvt_f16_f32_e32 v0.l, v0
+; GFX1250-SDAG-TRUE16-NEXT: v_and_b32_e32 v0, 0xffff, v0
+; GFX1250-SDAG-TRUE16-NEXT: buffer_store_b32 v0, off, s[4:7], null
+; GFX1250-SDAG-TRUE16-NEXT: s_endpgm
+;
; GFX1250-SDAG-FAKE16-LABEL: fptrunc_fabs_f32_to_f16_zext_i32:
; GFX1250-SDAG-FAKE16: ; %bb.0: ; %entry
; GFX1250-SDAG-FAKE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
@@ -4651,6 +5331,24 @@ define amdgpu_kernel void @fptrunc_fabs_f32_to_f16_zext_i32(
; GFX1250-SDAG-FAKE16-NEXT: buffer_store_b32 v0, off, s[4:7], null
; GFX1250-SDAG-FAKE16-NEXT: s_endpgm
;
+; GFX1250-GISEL-TRUE16-LABEL: fptrunc_fabs_f32_to_f16_zext_i32:
+; GFX1250-GISEL-TRUE16: ; %bb.0: ; %entry
+; GFX1250-GISEL-TRUE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
+; GFX1250-GISEL-TRUE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250-GISEL-TRUE16-NEXT: s_load_b32 s2, s[2:3], 0x0
+; GFX1250-GISEL-TRUE16-NEXT: s_wait_xcnt 0x0
+; GFX1250-GISEL-TRUE16-NEXT: s_mov_b32 s3, 0x31016000
+; GFX1250-GISEL-TRUE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250-GISEL-TRUE16-NEXT: s_bitset0_b32 s2, 31
+; GFX1250-GISEL-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_3)
+; GFX1250-GISEL-TRUE16-NEXT: s_cvt_f16_f32 s2, s2
+; GFX1250-GISEL-TRUE16-NEXT: s_and_b32 s2, 0xffff, s2
+; GFX1250-GISEL-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
+; GFX1250-GISEL-TRUE16-NEXT: v_mov_b32_e32 v0, s2
+; GFX1250-GISEL-TRUE16-NEXT: s_mov_b32 s2, -1
+; GFX1250-GISEL-TRUE16-NEXT: buffer_store_b32 v0, off, s[0:3], null
+; GFX1250-GISEL-TRUE16-NEXT: s_endpgm
+;
; GFX1250-GISEL-FAKE16-LABEL: fptrunc_fabs_f32_to_f16_zext_i32:
; GFX1250-GISEL-FAKE16: ; %bb.0: ; %entry
; GFX1250-GISEL-FAKE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
@@ -4877,6 +5575,26 @@ define amdgpu_kernel void @fptrunc_f32_to_f16_sext_i32(
; GFX11-GISEL-FAKE16-NEXT: buffer_store_b32 v0, off, s[0:3], 0
; GFX11-GISEL-FAKE16-NEXT: s_endpgm
;
+; GFX1250-SDAG-TRUE16-LABEL: fptrunc_f32_to_f16_sext_i32:
+; GFX1250-SDAG-TRUE16: ; %bb.0: ; %entry
+; GFX1250-SDAG-TRUE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
+; GFX1250-SDAG-TRUE16-NEXT: s_mov_b32 s6, -1
+; GFX1250-SDAG-TRUE16-NEXT: s_mov_b32 s7, 0x31016000
+; GFX1250-SDAG-TRUE16-NEXT: s_mov_b32 s10, s6
+; GFX1250-SDAG-TRUE16-NEXT: s_mov_b32 s11, s7
+; GFX1250-SDAG-TRUE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250-SDAG-TRUE16-NEXT: s_mov_b32 s8, s2
+; GFX1250-SDAG-TRUE16-NEXT: s_mov_b32 s9, s3
+; GFX1250-SDAG-TRUE16-NEXT: s_mov_b32 s4, s0
+; GFX1250-SDAG-TRUE16-NEXT: buffer_load_b32 v0, off, s[8:11], null
+; GFX1250-SDAG-TRUE16-NEXT: s_mov_b32 s5, s1
+; GFX1250-SDAG-TRUE16-NEXT: s_wait_loadcnt 0x0
+; GFX1250-SDAG-TRUE16-NEXT: v_cvt_f16_f32_e32 v0.l, v0
+; GFX1250-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-SDAG-TRUE16-NEXT: v_bfe_i32 v0, v0, 0, 16
+; GFX1250-SDAG-TRUE16-NEXT: buffer_store_b32 v0, off, s[4:7], null
+; GFX1250-SDAG-TRUE16-NEXT: s_endpgm
+;
; GFX1250-SDAG-FAKE16-LABEL: fptrunc_f32_to_f16_sext_i32:
; GFX1250-SDAG-FAKE16: ; %bb.0: ; %entry
; GFX1250-SDAG-FAKE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
@@ -4897,6 +5615,22 @@ define amdgpu_kernel void @fptrunc_f32_to_f16_sext_i32(
; GFX1250-SDAG-FAKE16-NEXT: buffer_store_b32 v0, off, s[4:7], null
; GFX1250-SDAG-FAKE16-NEXT: s_endpgm
;
+; GFX1250-GISEL-TRUE16-LABEL: fptrunc_f32_to_f16_sext_i32:
+; GFX1250-GISEL-TRUE16: ; %bb.0: ; %entry
+; GFX1250-GISEL-TRUE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
+; GFX1250-GISEL-TRUE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250-GISEL-TRUE16-NEXT: s_load_b32 s2, s[2:3], 0x0
+; GFX1250-GISEL-TRUE16-NEXT: s_wait_xcnt 0x0
+; GFX1250-GISEL-TRUE16-NEXT: s_mov_b32 s3, 0x31016000
+; GFX1250-GISEL-TRUE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250-GISEL-TRUE16-NEXT: s_cvt_f16_f32 s2, s2
+; GFX1250-GISEL-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_3) | instskip(NEXT) | instid1(SALU_CYCLE_1)
+; GFX1250-GISEL-TRUE16-NEXT: s_sext_i32_i16 s2, s2
+; GFX1250-GISEL-TRUE16-NEXT: v_mov_b32_e32 v0, s2
+; GFX1250-GISEL-TRUE16-NEXT: s_mov_b32 s2, -1
+; GFX1250-GISEL-TRUE16-NEXT: buffer_store_b32 v0, off, s[0:3], null
+; GFX1250-GISEL-TRUE16-NEXT: s_endpgm
+;
; GFX1250-GISEL-FAKE16-LABEL: fptrunc_f32_to_f16_sext_i32:
; GFX1250-GISEL-FAKE16: ; %bb.0: ; %entry
; GFX1250-GISEL-FAKE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
diff --git a/llvm/test/CodeGen/AMDGPU/fsub.ll b/llvm/test/CodeGen/AMDGPU/fsub.ll
index 743431c..d6a9cb1 100644
--- a/llvm/test/CodeGen/AMDGPU/fsub.ll
+++ b/llvm/test/CodeGen/AMDGPU/fsub.ll
@@ -92,43 +92,11 @@ define amdgpu_kernel void @v_fneg_fsub_nsz_f32(ptr addrspace(1) %out, ptr addrsp
ret void
}
-; FUNC-LABEL: {{^}}v_fneg_fsub_nsz_attribute_f32:
-; SI: v_sub_f32_e32 [[SUB:v[0-9]+]], {{v[0-9]+}}, {{v[0-9]+}}
-; SI-NOT: xor
-define amdgpu_kernel void @v_fneg_fsub_nsz_attribute_f32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
- %b_ptr = getelementptr float, ptr addrspace(1) %in, i32 1
- %a = load float, ptr addrspace(1) %in, align 4
- %b = load float, ptr addrspace(1) %b_ptr, align 4
- %result = fsub float %a, %b
- %neg.result = fsub float -0.0, %result
- store float %neg.result, ptr addrspace(1) %out, align 4
- ret void
-}
-
-; For some reason the attribute has a string "true" or "false", so
-; make sure it is disabled and the fneg is not folded if it is not
-; "true".
-; FUNC-LABEL: {{^}}v_fneg_fsub_nsz_false_attribute_f32:
-; SI: v_sub_f32_e32 [[SUB:v[0-9]+]], {{v[0-9]+}}, {{v[0-9]+}}
-; SI: v_xor_b32_e32 v{{[0-9]+}}, 0x80000000, [[SUB]]
-define amdgpu_kernel void @v_fneg_fsub_nsz_false_attribute_f32(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 {
- %b_ptr = getelementptr float, ptr addrspace(1) %in, i32 1
- %a = load float, ptr addrspace(1) %in, align 4
- %b = load float, ptr addrspace(1) %b_ptr, align 4
- %result = fsub float %a, %b
- %neg.result = fsub float -0.0, %result
- store float %neg.result, ptr addrspace(1) %out, align 4
- ret void
-}
-
-; FUNC-LABEL: {{^}}v_fsub_0_nsz_attribute_f32:
+; FUNC-LABEL: {{^}}v_fsub_0_nsz_flag_f32:
; SI-NOT: v_sub
-define amdgpu_kernel void @v_fsub_0_nsz_attribute_f32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @v_fsub_0_nsz_flag_f32(ptr addrspace(1) %out, ptr addrspace(1) %in) {
%a = load float, ptr addrspace(1) %in, align 4
- %result = fsub float %a, 0.0
+ %result = fsub nsz float %a, 0.0
store float %result, ptr addrspace(1) %out, align 4
ret void
}
-
-attributes #0 = { nounwind "no-signed-zeros-fp-math"="true" }
-attributes #1 = { nounwind "no-signed-zeros-fp-math"="false" }
diff --git a/llvm/test/CodeGen/AMDGPU/implicitarg-offset-attributes.ll b/llvm/test/CodeGen/AMDGPU/implicitarg-offset-attributes.ll
index 3089054..32f7d6b 100644
--- a/llvm/test/CodeGen/AMDGPU/implicitarg-offset-attributes.ll
+++ b/llvm/test/CodeGen/AMDGPU/implicitarg-offset-attributes.ll
@@ -276,23 +276,23 @@ attributes #0 = { nocallback nofree nosync nounwind speculatable willreturn memo
;.
; V4: attributes #[[ATTR0:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
-; V4: attributes #[[ATTR1]] = { "amdgpu-agpr-alloc"="0" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-multigrid-sync-arg" "uniform-work-group-size"="false" }
-; V4: attributes #[[ATTR2]] = { "amdgpu-agpr-alloc"="0" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-multigrid-sync-arg" "uniform-work-group-size"="false" }
-; V4: attributes #[[ATTR3]] = { "amdgpu-agpr-alloc"="0" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-multigrid-sync-arg" "uniform-work-group-size"="false" }
-; V4: attributes #[[ATTR4]] = { "amdgpu-agpr-alloc"="0" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-default-queue" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-multigrid-sync-arg" "uniform-work-group-size"="false" }
-; V4: attributes #[[ATTR5]] = { "amdgpu-agpr-alloc"="0" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-multigrid-sync-arg" "uniform-work-group-size"="false" }
+; V4: attributes #[[ATTR1]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-multigrid-sync-arg" "uniform-work-group-size"="false" }
+; V4: attributes #[[ATTR2]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-multigrid-sync-arg" "uniform-work-group-size"="false" }
+; V4: attributes #[[ATTR3]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-multigrid-sync-arg" "uniform-work-group-size"="false" }
+; V4: attributes #[[ATTR4]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-default-queue" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-multigrid-sync-arg" "uniform-work-group-size"="false" }
+; V4: attributes #[[ATTR5]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-multigrid-sync-arg" "uniform-work-group-size"="false" }
;.
; V5: attributes #[[ATTR0:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
-; V5: attributes #[[ATTR1]] = { "amdgpu-agpr-alloc"="0" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-multigrid-sync-arg" "uniform-work-group-size"="false" }
-; V5: attributes #[[ATTR2]] = { "amdgpu-agpr-alloc"="0" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-multigrid-sync-arg" "uniform-work-group-size"="false" }
-; V5: attributes #[[ATTR3]] = { "amdgpu-agpr-alloc"="0" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-default-queue" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-multigrid-sync-arg" "uniform-work-group-size"="false" }
-; V5: attributes #[[ATTR4]] = { "amdgpu-agpr-alloc"="0" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-multigrid-sync-arg" "uniform-work-group-size"="false" }
+; V5: attributes #[[ATTR1]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-multigrid-sync-arg" "uniform-work-group-size"="false" }
+; V5: attributes #[[ATTR2]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-multigrid-sync-arg" "uniform-work-group-size"="false" }
+; V5: attributes #[[ATTR3]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-default-queue" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-multigrid-sync-arg" "uniform-work-group-size"="false" }
+; V5: attributes #[[ATTR4]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-multigrid-sync-arg" "uniform-work-group-size"="false" }
;.
; V6: attributes #[[ATTR0:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
-; V6: attributes #[[ATTR1]] = { "amdgpu-agpr-alloc"="0" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-multigrid-sync-arg" "uniform-work-group-size"="false" }
-; V6: attributes #[[ATTR2]] = { "amdgpu-agpr-alloc"="0" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-multigrid-sync-arg" "uniform-work-group-size"="false" }
-; V6: attributes #[[ATTR3]] = { "amdgpu-agpr-alloc"="0" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-default-queue" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-multigrid-sync-arg" "uniform-work-group-size"="false" }
-; V6: attributes #[[ATTR4]] = { "amdgpu-agpr-alloc"="0" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-multigrid-sync-arg" "uniform-work-group-size"="false" }
+; V6: attributes #[[ATTR1]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-multigrid-sync-arg" "uniform-work-group-size"="false" }
+; V6: attributes #[[ATTR2]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-multigrid-sync-arg" "uniform-work-group-size"="false" }
+; V6: attributes #[[ATTR3]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-default-queue" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-multigrid-sync-arg" "uniform-work-group-size"="false" }
+; V6: attributes #[[ATTR4]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-multigrid-sync-arg" "uniform-work-group-size"="false" }
;.
; V4: [[META0:![0-9]+]] = !{i32 1, !"amdhsa_code_object_version", i32 400}
;.
diff --git a/llvm/test/CodeGen/AMDGPU/indirect-call-set-from-other-function.ll b/llvm/test/CodeGen/AMDGPU/indirect-call-set-from-other-function.ll
index d3ef1b7..a0f5d2f 100644
--- a/llvm/test/CodeGen/AMDGPU/indirect-call-set-from-other-function.ll
+++ b/llvm/test/CodeGen/AMDGPU/indirect-call-set-from-other-function.ll
@@ -68,6 +68,6 @@ if.end:
ret void
}
;.
-; CHECK: attributes #[[ATTR0]] = { "amdgpu-agpr-alloc"="0" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR0]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
; CHECK: attributes #[[ATTR1]] = { "uniform-work-group-size"="false" }
;.
diff --git a/llvm/test/CodeGen/AMDGPU/issue120256-annotate-constexpr-addrspacecast.ll b/llvm/test/CodeGen/AMDGPU/issue120256-annotate-constexpr-addrspacecast.ll
index 71a330e..4e952b6 100644
--- a/llvm/test/CodeGen/AMDGPU/issue120256-annotate-constexpr-addrspacecast.ll
+++ b/llvm/test/CodeGen/AMDGPU/issue120256-annotate-constexpr-addrspacecast.ll
@@ -55,8 +55,8 @@ define amdgpu_kernel void @issue120256_private(ptr addrspace(1) %out) {
; FIXME: Inference of amdgpu-no-queue-ptr should not depend on code object version.
!0 = !{i32 1, !"amdhsa_code_object_version", i32 400}
;.
-; CHECK: attributes #[[ATTR0]] = { "amdgpu-agpr-alloc"="0" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx803" "uniform-work-group-size"="false" }
-; CHECK: attributes #[[ATTR1]] = { "amdgpu-agpr-alloc"="0" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx803" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR0]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx803" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR1]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx803" "uniform-work-group-size"="false" }
;.
; CHECK: [[META0:![0-9]+]] = !{i32 1, !"amdhsa_code_object_version", i32 400}
;.
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.fp8.f16.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.fp8.f16.ll
index 6ccfad7..ff47563 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.fp8.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.fp8.f16.ll
@@ -14,7 +14,7 @@ define amdgpu_ps void @test_cvt_pk_bf8_f16_v(<2 x half> %a, ptr addrspace(1) %ou
; GFX1250-SDAG-REAL16: ; %bb.0:
; GFX1250-SDAG-REAL16-NEXT: v_dual_mov_b32 v3, v2 :: v_dual_mov_b32 v2, v1
; GFX1250-SDAG-REAL16-NEXT: v_cvt_pk_bf8_f16 v0.l, v0
-; GFX1250-SDAG-REAL16-NEXT: flat_store_b16 v[2:3], v0
+; GFX1250-SDAG-REAL16-NEXT: global_store_b16 v[2:3], v0, off
; GFX1250-SDAG-REAL16-NEXT: s_endpgm
;
; GFX1250-SDAG-FAKE16-LABEL: test_cvt_pk_bf8_f16_v:
@@ -28,7 +28,7 @@ define amdgpu_ps void @test_cvt_pk_bf8_f16_v(<2 x half> %a, ptr addrspace(1) %ou
; GFX1250-GISEL-REAL16: ; %bb.0:
; GFX1250-GISEL-REAL16-NEXT: v_dual_mov_b32 v4, v1 :: v_dual_mov_b32 v5, v2
; GFX1250-GISEL-REAL16-NEXT: v_cvt_pk_bf8_f16 v0.l, v0
-; GFX1250-GISEL-REAL16-NEXT: flat_store_b16 v[4:5], v0
+; GFX1250-GISEL-REAL16-NEXT: global_store_b16 v[4:5], v0, off
; GFX1250-GISEL-REAL16-NEXT: s_endpgm
;
; GFX1250-GISEL-FAKE16-LABEL: test_cvt_pk_bf8_f16_v:
@@ -46,7 +46,7 @@ define amdgpu_ps void @test_cvt_pk_bf8_f16_s(<2 x half> inreg %a, ptr addrspace(
; GFX1250-SDAG-REAL16-LABEL: test_cvt_pk_bf8_f16_s:
; GFX1250-SDAG-REAL16: ; %bb.0:
; GFX1250-SDAG-REAL16-NEXT: v_cvt_pk_bf8_f16 v2.l, s0
-; GFX1250-SDAG-REAL16-NEXT: flat_store_b16 v[0:1], v2
+; GFX1250-SDAG-REAL16-NEXT: global_store_b16 v[0:1], v2, off
; GFX1250-SDAG-REAL16-NEXT: s_endpgm
;
; GFX1250-SDAG-FAKE16-LABEL: test_cvt_pk_bf8_f16_s:
@@ -58,7 +58,7 @@ define amdgpu_ps void @test_cvt_pk_bf8_f16_s(<2 x half> inreg %a, ptr addrspace(
; GFX1250-GISEL-REAL16-LABEL: test_cvt_pk_bf8_f16_s:
; GFX1250-GISEL-REAL16: ; %bb.0:
; GFX1250-GISEL-REAL16-NEXT: v_cvt_pk_bf8_f16 v2.l, s0
-; GFX1250-GISEL-REAL16-NEXT: flat_store_b16 v[0:1], v2
+; GFX1250-GISEL-REAL16-NEXT: global_store_b16 v[0:1], v2, off
; GFX1250-GISEL-REAL16-NEXT: s_endpgm
;
; GFX1250-GISEL-FAKE16-LABEL: test_cvt_pk_bf8_f16_s:
@@ -75,7 +75,7 @@ define amdgpu_ps void @test_cvt_pk_bf8_f16_l(ptr addrspace(1) %out) {
; GFX1250-SDAG-REAL16-LABEL: test_cvt_pk_bf8_f16_l:
; GFX1250-SDAG-REAL16: ; %bb.0:
; GFX1250-SDAG-REAL16-NEXT: v_cvt_pk_bf8_f16 v2.l, 0x56400000
-; GFX1250-SDAG-REAL16-NEXT: flat_store_b16 v[0:1], v2
+; GFX1250-SDAG-REAL16-NEXT: global_store_b16 v[0:1], v2, off
; GFX1250-SDAG-REAL16-NEXT: s_endpgm
;
; GFX1250-SDAG-FAKE16-LABEL: test_cvt_pk_bf8_f16_l:
@@ -87,7 +87,7 @@ define amdgpu_ps void @test_cvt_pk_bf8_f16_l(ptr addrspace(1) %out) {
; GFX1250-GISEL-REAL16-LABEL: test_cvt_pk_bf8_f16_l:
; GFX1250-GISEL-REAL16: ; %bb.0:
; GFX1250-GISEL-REAL16-NEXT: v_cvt_pk_bf8_f16 v2.l, 0x56400000
-; GFX1250-GISEL-REAL16-NEXT: flat_store_b16 v[0:1], v2
+; GFX1250-GISEL-REAL16-NEXT: global_store_b16 v[0:1], v2, off
; GFX1250-GISEL-REAL16-NEXT: s_endpgm
;
; GFX1250-GISEL-FAKE16-LABEL: test_cvt_pk_bf8_f16_l:
@@ -105,7 +105,7 @@ define amdgpu_ps void @test_cvt_pk_fp8_f16_v(<2 x half> %a, ptr addrspace(1) %ou
; GFX1250-SDAG-REAL16: ; %bb.0:
; GFX1250-SDAG-REAL16-NEXT: v_dual_mov_b32 v3, v2 :: v_dual_mov_b32 v2, v1
; GFX1250-SDAG-REAL16-NEXT: v_cvt_pk_fp8_f16 v0.l, v0
-; GFX1250-SDAG-REAL16-NEXT: flat_store_b16 v[2:3], v0
+; GFX1250-SDAG-REAL16-NEXT: global_store_b16 v[2:3], v0, off
; GFX1250-SDAG-REAL16-NEXT: s_endpgm
;
; GFX1250-SDAG-FAKE16-LABEL: test_cvt_pk_fp8_f16_v:
@@ -119,7 +119,7 @@ define amdgpu_ps void @test_cvt_pk_fp8_f16_v(<2 x half> %a, ptr addrspace(1) %ou
; GFX1250-GISEL-REAL16: ; %bb.0:
; GFX1250-GISEL-REAL16-NEXT: v_dual_mov_b32 v4, v1 :: v_dual_mov_b32 v5, v2
; GFX1250-GISEL-REAL16-NEXT: v_cvt_pk_fp8_f16 v0.l, v0
-; GFX1250-GISEL-REAL16-NEXT: flat_store_b16 v[4:5], v0
+; GFX1250-GISEL-REAL16-NEXT: global_store_b16 v[4:5], v0, off
; GFX1250-GISEL-REAL16-NEXT: s_endpgm
;
; GFX1250-GISEL-FAKE16-LABEL: test_cvt_pk_fp8_f16_v:
@@ -137,7 +137,7 @@ define amdgpu_ps void @test_cvt_pk_fp8_f16_s(<2 x half> inreg %a, ptr addrspace(
; GFX1250-SDAG-REAL16-LABEL: test_cvt_pk_fp8_f16_s:
; GFX1250-SDAG-REAL16: ; %bb.0:
; GFX1250-SDAG-REAL16-NEXT: v_cvt_pk_fp8_f16 v2.l, s0
-; GFX1250-SDAG-REAL16-NEXT: flat_store_b16 v[0:1], v2
+; GFX1250-SDAG-REAL16-NEXT: global_store_b16 v[0:1], v2, off
; GFX1250-SDAG-REAL16-NEXT: s_endpgm
;
; GFX1250-SDAG-FAKE16-LABEL: test_cvt_pk_fp8_f16_s:
@@ -149,7 +149,7 @@ define amdgpu_ps void @test_cvt_pk_fp8_f16_s(<2 x half> inreg %a, ptr addrspace(
; GFX1250-GISEL-REAL16-LABEL: test_cvt_pk_fp8_f16_s:
; GFX1250-GISEL-REAL16: ; %bb.0:
; GFX1250-GISEL-REAL16-NEXT: v_cvt_pk_fp8_f16 v2.l, s0
-; GFX1250-GISEL-REAL16-NEXT: flat_store_b16 v[0:1], v2
+; GFX1250-GISEL-REAL16-NEXT: global_store_b16 v[0:1], v2, off
; GFX1250-GISEL-REAL16-NEXT: s_endpgm
;
; GFX1250-GISEL-FAKE16-LABEL: test_cvt_pk_fp8_f16_s:
@@ -166,7 +166,7 @@ define amdgpu_ps void @test_cvt_pk_fp8_f16_l(ptr addrspace(1) %out) {
; GFX1250-SDAG-REAL16-LABEL: test_cvt_pk_fp8_f16_l:
; GFX1250-SDAG-REAL16: ; %bb.0:
; GFX1250-SDAG-REAL16-NEXT: v_cvt_pk_fp8_f16 v2.l, 0x56400000
-; GFX1250-SDAG-REAL16-NEXT: flat_store_b16 v[0:1], v2
+; GFX1250-SDAG-REAL16-NEXT: global_store_b16 v[0:1], v2, off
; GFX1250-SDAG-REAL16-NEXT: s_endpgm
;
; GFX1250-SDAG-FAKE16-LABEL: test_cvt_pk_fp8_f16_l:
@@ -178,7 +178,7 @@ define amdgpu_ps void @test_cvt_pk_fp8_f16_l(ptr addrspace(1) %out) {
; GFX1250-GISEL-REAL16-LABEL: test_cvt_pk_fp8_f16_l:
; GFX1250-GISEL-REAL16: ; %bb.0:
; GFX1250-GISEL-REAL16-NEXT: v_cvt_pk_fp8_f16 v2.l, 0x56400000
-; GFX1250-GISEL-REAL16-NEXT: flat_store_b16 v[0:1], v2
+; GFX1250-GISEL-REAL16-NEXT: global_store_b16 v[0:1], v2, off
; GFX1250-GISEL-REAL16-NEXT: s_endpgm
;
; GFX1250-GISEL-FAKE16-LABEL: test_cvt_pk_fp8_f16_l:
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.rcp.bf16.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.rcp.bf16.ll
index 1e44a09..dbea832 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.rcp.bf16.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.rcp.bf16.ll
@@ -15,7 +15,7 @@ define amdgpu_kernel void @rcp_bf16(ptr addrspace(1) %out, bfloat %src) #1 {
; SDAG-TRUE16-NEXT: v_mov_b32_e32 v1, 0
; SDAG-TRUE16-NEXT: s_wait_kmcnt 0x0
; SDAG-TRUE16-NEXT: v_rcp_bf16_e32 v0.l, s2
-; SDAG-TRUE16-NEXT: flat_store_b16 v1, v0, s[0:1]
+; SDAG-TRUE16-NEXT: global_store_b16 v1, v0, s[0:1]
; SDAG-TRUE16-NEXT: s_endpgm
;
; SDAG-FAKE16-LABEL: rcp_bf16:
@@ -35,10 +35,10 @@ define amdgpu_kernel void @rcp_bf16_constant_4(ptr addrspace(1) %out) #1 {
; SDAG-TRUE16-LABEL: rcp_bf16_constant_4:
; SDAG-TRUE16: ; %bb.0:
; SDAG-TRUE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; SDAG-TRUE16-NEXT: v_mov_b16_e32 v0.l, 0x3e80
; SDAG-TRUE16-NEXT: v_mov_b32_e32 v1, 0
+; SDAG-TRUE16-NEXT: v_mov_b16_e32 v0.l, 0x3e80
; SDAG-TRUE16-NEXT: s_wait_kmcnt 0x0
-; SDAG-TRUE16-NEXT: flat_store_b16 v1, v0, s[0:1]
+; SDAG-TRUE16-NEXT: global_store_b16 v1, v0, s[0:1]
; SDAG-TRUE16-NEXT: s_endpgm
;
; SDAG-FAKE16-LABEL: rcp_bf16_constant_4:
@@ -57,10 +57,10 @@ define amdgpu_kernel void @rcp_bf16_constant_100(ptr addrspace(1) %out) #1 {
; SDAG-TRUE16-LABEL: rcp_bf16_constant_100:
; SDAG-TRUE16: ; %bb.0:
; SDAG-TRUE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; SDAG-TRUE16-NEXT: v_mov_b16_e32 v0.l, 0x3c24
; SDAG-TRUE16-NEXT: v_mov_b32_e32 v1, 0
+; SDAG-TRUE16-NEXT: v_mov_b16_e32 v0.l, 0x3c24
; SDAG-TRUE16-NEXT: s_wait_kmcnt 0x0
-; SDAG-TRUE16-NEXT: flat_store_b16 v1, v0, s[0:1]
+; SDAG-TRUE16-NEXT: global_store_b16 v1, v0, s[0:1]
; SDAG-TRUE16-NEXT: s_endpgm
;
; SDAG-FAKE16-LABEL: rcp_bf16_constant_100:
@@ -79,10 +79,10 @@ define amdgpu_kernel void @rcp_undef_bf16(ptr addrspace(1) %out) #1 {
; SDAG-TRUE16-LABEL: rcp_undef_bf16:
; SDAG-TRUE16: ; %bb.0:
; SDAG-TRUE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; SDAG-TRUE16-NEXT: v_mov_b16_e32 v0.l, 0x7fc0
; SDAG-TRUE16-NEXT: v_mov_b32_e32 v1, 0
+; SDAG-TRUE16-NEXT: v_mov_b16_e32 v0.l, 0x7fc0
; SDAG-TRUE16-NEXT: s_wait_kmcnt 0x0
-; SDAG-TRUE16-NEXT: flat_store_b16 v1, v0, s[0:1]
+; SDAG-TRUE16-NEXT: global_store_b16 v1, v0, s[0:1]
; SDAG-TRUE16-NEXT: s_endpgm
;
; SDAG-FAKE16-LABEL: rcp_undef_bf16:
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.rsq.bf16.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.rsq.bf16.ll
index 42d12fd..662dc613 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.rsq.bf16.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.rsq.bf16.ll
@@ -15,7 +15,7 @@ define amdgpu_kernel void @rsq_bf16(ptr addrspace(1) %out, bfloat %src) #1 {
; SDAG-REAL16-NEXT: v_mov_b32_e32 v1, 0
; SDAG-REAL16-NEXT: s_wait_kmcnt 0x0
; SDAG-REAL16-NEXT: v_rsq_bf16_e32 v0.l, s2
-; SDAG-REAL16-NEXT: flat_store_b16 v1, v0, s[0:1]
+; SDAG-REAL16-NEXT: global_store_b16 v1, v0, s[0:1]
; SDAG-REAL16-NEXT: s_endpgm
;
; SDAG-FAKE16-LABEL: rsq_bf16:
@@ -38,7 +38,7 @@ define amdgpu_kernel void @rsq_bf16_constant_4(ptr addrspace(1) %out) #1 {
; SDAG-REAL16-NEXT: v_rsq_bf16_e32 v0.l, 4.0
; SDAG-REAL16-NEXT: v_mov_b32_e32 v1, 0
; SDAG-REAL16-NEXT: s_wait_kmcnt 0x0
-; SDAG-REAL16-NEXT: flat_store_b16 v1, v0, s[0:1]
+; SDAG-REAL16-NEXT: global_store_b16 v1, v0, s[0:1]
; SDAG-REAL16-NEXT: s_endpgm
;
; SDAG-FAKE16-LABEL: rsq_bf16_constant_4:
@@ -61,7 +61,7 @@ define amdgpu_kernel void @rsq_bf16_constant_100(ptr addrspace(1) %out) #1 {
; SDAG-REAL16-NEXT: v_rsq_bf16_e32 v0.l, 0x42c8
; SDAG-REAL16-NEXT: v_mov_b32_e32 v1, 0
; SDAG-REAL16-NEXT: s_wait_kmcnt 0x0
-; SDAG-REAL16-NEXT: flat_store_b16 v1, v0, s[0:1]
+; SDAG-REAL16-NEXT: global_store_b16 v1, v0, s[0:1]
; SDAG-REAL16-NEXT: s_endpgm
;
; SDAG-FAKE16-LABEL: rsq_bf16_constant_100:
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.tanh.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.tanh.ll
index dd89f80..ba769ef 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.tanh.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.tanh.ll
@@ -100,7 +100,7 @@ define amdgpu_kernel void @tanh_f16(ptr addrspace(1) %out, half %src) #1 {
; SDAG-REAL16-NEXT: v_mov_b32_e32 v1, 0
; SDAG-REAL16-NEXT: s_wait_kmcnt 0x0
; SDAG-REAL16-NEXT: v_tanh_f16_e32 v0.l, s2
-; SDAG-REAL16-NEXT: flat_store_b16 v1, v0, s[0:1]
+; SDAG-REAL16-NEXT: global_store_b16 v1, v0, s[0:1]
; SDAG-REAL16-NEXT: s_endpgm
;
; SDAG-FAKE16-LABEL: tanh_f16:
@@ -123,7 +123,7 @@ define amdgpu_kernel void @tanh_f16_constant_4.0(ptr addrspace(1) %out) #1 {
; SDAG-REAL16-NEXT: v_tanh_f16_e32 v0.l, 4.0
; SDAG-REAL16-NEXT: v_mov_b32_e32 v1, 0
; SDAG-REAL16-NEXT: s_wait_kmcnt 0x0
-; SDAG-REAL16-NEXT: flat_store_b16 v1, v0, s[0:1]
+; SDAG-REAL16-NEXT: global_store_b16 v1, v0, s[0:1]
; SDAG-REAL16-NEXT: s_endpgm
;
; SDAG-FAKE16-LABEL: tanh_f16_constant_4.0:
@@ -146,7 +146,7 @@ define amdgpu_kernel void @tanh_f16_constant_100.0(ptr addrspace(1) %out) #1 {
; SDAG-REAL16-NEXT: v_tanh_f16_e32 v0.l, 0x5640
; SDAG-REAL16-NEXT: v_mov_b32_e32 v1, 0
; SDAG-REAL16-NEXT: s_wait_kmcnt 0x0
-; SDAG-REAL16-NEXT: flat_store_b16 v1, v0, s[0:1]
+; SDAG-REAL16-NEXT: global_store_b16 v1, v0, s[0:1]
; SDAG-REAL16-NEXT: s_endpgm
;
; SDAG-FAKE16-LABEL: tanh_f16_constant_100.0:
@@ -182,7 +182,7 @@ define amdgpu_kernel void @tanh_bf16(ptr addrspace(1) %out, bfloat %src) #1 {
; SDAG-REAL16-NEXT: v_mov_b32_e32 v1, 0
; SDAG-REAL16-NEXT: s_wait_kmcnt 0x0
; SDAG-REAL16-NEXT: v_tanh_bf16_e32 v0.l, s2
-; SDAG-REAL16-NEXT: flat_store_b16 v1, v0, s[0:1]
+; SDAG-REAL16-NEXT: global_store_b16 v1, v0, s[0:1]
; SDAG-REAL16-NEXT: s_endpgm
;
; SDAG-FAKE16-LABEL: tanh_bf16:
@@ -205,7 +205,7 @@ define amdgpu_kernel void @tanh_bf16_constant_4(ptr addrspace(1) %out) #1 {
; SDAG-REAL16-NEXT: v_tanh_bf16_e32 v0.l, 4.0
; SDAG-REAL16-NEXT: v_mov_b32_e32 v1, 0
; SDAG-REAL16-NEXT: s_wait_kmcnt 0x0
-; SDAG-REAL16-NEXT: flat_store_b16 v1, v0, s[0:1]
+; SDAG-REAL16-NEXT: global_store_b16 v1, v0, s[0:1]
; SDAG-REAL16-NEXT: s_endpgm
;
; SDAG-FAKE16-LABEL: tanh_bf16_constant_4:
@@ -228,7 +228,7 @@ define amdgpu_kernel void @tanh_bf16_constant_100(ptr addrspace(1) %out) #1 {
; SDAG-REAL16-NEXT: v_tanh_bf16_e32 v0.l, 0x42c8
; SDAG-REAL16-NEXT: v_mov_b32_e32 v1, 0
; SDAG-REAL16-NEXT: s_wait_kmcnt 0x0
-; SDAG-REAL16-NEXT: flat_store_b16 v1, v0, s[0:1]
+; SDAG-REAL16-NEXT: global_store_b16 v1, v0, s[0:1]
; SDAG-REAL16-NEXT: s_endpgm
;
; SDAG-FAKE16-LABEL: tanh_bf16_constant_100:
diff --git a/llvm/test/CodeGen/AMDGPU/min.ll b/llvm/test/CodeGen/AMDGPU/min.ll
index 6a3d31f..0458a64 100644
--- a/llvm/test/CodeGen/AMDGPU/min.ll
+++ b/llvm/test/CodeGen/AMDGPU/min.ll
@@ -6,9 +6,7 @@
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 < %s | FileCheck --check-prefix=GFX10 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -mattr=+real-true16 -amdgpu-enable-vopd=0 < %s | FileCheck --check-prefixes=GFX11,GFX11-TRUE16 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -mattr=-real-true16 -amdgpu-enable-vopd=0 < %s | FileCheck --check-prefixes=GFX11,GFX11-FAKE16 %s
-; TODO: FIXME-TRUE16 - Enable this llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1250 -mattr=+real-true16 -amdgpu-enable-vopd=0 < %s | FileCheck --check-prefixes=GFX1250,GFX1250-TRUE16 %s
-; Crashing on v_test_imin_slt_i16
-; LLVM ERROR: Cannot select: 0x5f895f65b050: i16,ch = load<(load (s16) from %ir.b.gep, addrspace 1)>
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1250 -mattr=+real-true16 -amdgpu-enable-vopd=0 < %s | FileCheck --check-prefixes=GFX1250,GFX1250-TRUE16 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1250 -mattr=-real-true16 -amdgpu-enable-vopd=0 < %s | FileCheck --check-prefixes=GFX1250,GFX1250-FAKE16 %s
define amdgpu_kernel void @v_test_imin_sle_i32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr) #0 {
@@ -1482,20 +1480,35 @@ define amdgpu_kernel void @v_test_imin_slt_i16(ptr addrspace(1) %out, ptr addrsp
; GFX11-FAKE16-NEXT: global_store_b16 v0, v1, s[0:1]
; GFX11-FAKE16-NEXT: s_endpgm
;
-; GFX1250-LABEL: v_test_imin_slt_i16:
-; GFX1250: ; %bb.0:
-; GFX1250-NEXT: s_clause 0x1
-; GFX1250-NEXT: s_load_b128 s[0:3], s[4:5], 0x0
-; GFX1250-NEXT: s_load_b64 s[6:7], s[4:5], 0x10
-; GFX1250-NEXT: v_and_b32_e32 v0, 0x3ff, v0
-; GFX1250-NEXT: s_wait_kmcnt 0x0
-; GFX1250-NEXT: s_clause 0x1
-; GFX1250-NEXT: global_load_u16 v1, v0, s[2:3] scale_offset
-; GFX1250-NEXT: global_load_u16 v2, v0, s[6:7] scale_offset
-; GFX1250-NEXT: s_wait_loadcnt 0x0
-; GFX1250-NEXT: v_min_i16 v1, v1, v2
-; GFX1250-NEXT: global_store_b16 v0, v1, s[0:1] scale_offset
-; GFX1250-NEXT: s_endpgm
+; GFX1250-TRUE16-LABEL: v_test_imin_slt_i16:
+; GFX1250-TRUE16: ; %bb.0:
+; GFX1250-TRUE16-NEXT: s_clause 0x1
+; GFX1250-TRUE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x0
+; GFX1250-TRUE16-NEXT: s_load_b64 s[6:7], s[4:5], 0x10
+; GFX1250-TRUE16-NEXT: v_and_b32_e32 v1, 0x3ff, v0
+; GFX1250-TRUE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250-TRUE16-NEXT: s_clause 0x1
+; GFX1250-TRUE16-NEXT: global_load_u16 v0, v1, s[2:3] scale_offset
+; GFX1250-TRUE16-NEXT: global_load_u16 v2, v1, s[6:7] scale_offset
+; GFX1250-TRUE16-NEXT: s_wait_loadcnt 0x0
+; GFX1250-TRUE16-NEXT: v_min_i16 v0.l, v0.l, v2.l
+; GFX1250-TRUE16-NEXT: global_store_b16 v1, v0, s[0:1] scale_offset
+; GFX1250-TRUE16-NEXT: s_endpgm
+;
+; GFX1250-FAKE16-LABEL: v_test_imin_slt_i16:
+; GFX1250-FAKE16: ; %bb.0:
+; GFX1250-FAKE16-NEXT: s_clause 0x1
+; GFX1250-FAKE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x0
+; GFX1250-FAKE16-NEXT: s_load_b64 s[6:7], s[4:5], 0x10
+; GFX1250-FAKE16-NEXT: v_and_b32_e32 v0, 0x3ff, v0
+; GFX1250-FAKE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250-FAKE16-NEXT: s_clause 0x1
+; GFX1250-FAKE16-NEXT: global_load_u16 v1, v0, s[2:3] scale_offset
+; GFX1250-FAKE16-NEXT: global_load_u16 v2, v0, s[6:7] scale_offset
+; GFX1250-FAKE16-NEXT: s_wait_loadcnt 0x0
+; GFX1250-FAKE16-NEXT: v_min_i16 v1, v1, v2
+; GFX1250-FAKE16-NEXT: global_store_b16 v0, v1, s[0:1] scale_offset
+; GFX1250-FAKE16-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%a.gep = getelementptr inbounds i16, ptr addrspace(1) %aptr, i32 %tid
%b.gep = getelementptr inbounds i16, ptr addrspace(1) %bptr, i32 %tid
@@ -2769,20 +2782,35 @@ define amdgpu_kernel void @v_test_umin_ult_i8(ptr addrspace(1) %out, ptr addrspa
; GFX11-FAKE16-NEXT: global_store_b8 v0, v1, s[0:1]
; GFX11-FAKE16-NEXT: s_endpgm
;
-; GFX1250-LABEL: v_test_umin_ult_i8:
-; GFX1250: ; %bb.0:
-; GFX1250-NEXT: s_clause 0x1
-; GFX1250-NEXT: s_load_b128 s[0:3], s[4:5], 0x0
-; GFX1250-NEXT: s_load_b64 s[6:7], s[4:5], 0x10
-; GFX1250-NEXT: v_and_b32_e32 v0, 0x3ff, v0
-; GFX1250-NEXT: s_wait_kmcnt 0x0
-; GFX1250-NEXT: s_clause 0x1
-; GFX1250-NEXT: global_load_u8 v1, v0, s[2:3]
-; GFX1250-NEXT: global_load_u8 v2, v0, s[6:7]
-; GFX1250-NEXT: s_wait_loadcnt 0x0
-; GFX1250-NEXT: v_min_u16 v1, v1, v2
-; GFX1250-NEXT: global_store_b8 v0, v1, s[0:1]
-; GFX1250-NEXT: s_endpgm
+; GFX1250-TRUE16-LABEL: v_test_umin_ult_i8:
+; GFX1250-TRUE16: ; %bb.0:
+; GFX1250-TRUE16-NEXT: s_clause 0x1
+; GFX1250-TRUE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x0
+; GFX1250-TRUE16-NEXT: s_load_b64 s[6:7], s[4:5], 0x10
+; GFX1250-TRUE16-NEXT: v_and_b32_e32 v1, 0x3ff, v0
+; GFX1250-TRUE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250-TRUE16-NEXT: s_clause 0x1
+; GFX1250-TRUE16-NEXT: global_load_u8 v0, v1, s[2:3]
+; GFX1250-TRUE16-NEXT: global_load_u8 v2, v1, s[6:7]
+; GFX1250-TRUE16-NEXT: s_wait_loadcnt 0x0
+; GFX1250-TRUE16-NEXT: v_min_u16 v0.l, v0.l, v2.l
+; GFX1250-TRUE16-NEXT: global_store_b8 v1, v0, s[0:1]
+; GFX1250-TRUE16-NEXT: s_endpgm
+;
+; GFX1250-FAKE16-LABEL: v_test_umin_ult_i8:
+; GFX1250-FAKE16: ; %bb.0:
+; GFX1250-FAKE16-NEXT: s_clause 0x1
+; GFX1250-FAKE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x0
+; GFX1250-FAKE16-NEXT: s_load_b64 s[6:7], s[4:5], 0x10
+; GFX1250-FAKE16-NEXT: v_and_b32_e32 v0, 0x3ff, v0
+; GFX1250-FAKE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250-FAKE16-NEXT: s_clause 0x1
+; GFX1250-FAKE16-NEXT: global_load_u8 v1, v0, s[2:3]
+; GFX1250-FAKE16-NEXT: global_load_u8 v2, v0, s[6:7]
+; GFX1250-FAKE16-NEXT: s_wait_loadcnt 0x0
+; GFX1250-FAKE16-NEXT: v_min_u16 v1, v1, v2
+; GFX1250-FAKE16-NEXT: global_store_b8 v0, v1, s[0:1]
+; GFX1250-FAKE16-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%a.gep = getelementptr inbounds i8, ptr addrspace(1) %a.ptr, i32 %tid
%b.gep = getelementptr inbounds i8, ptr addrspace(1) %b.ptr, i32 %tid
@@ -5069,5 +5097,3 @@ declare i32 @llvm.amdgcn.workitem.id.x() #1
attributes #0 = { nounwind }
attributes #1 = { nounwind readnone }
-;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
-; GFX1250-FAKE16: {{.*}}
diff --git a/llvm/test/CodeGen/AMDGPU/minmax.ll b/llvm/test/CodeGen/AMDGPU/minmax.ll
index 57e6943..56f9c5d 100644
--- a/llvm/test/CodeGen/AMDGPU/minmax.ll
+++ b/llvm/test/CodeGen/AMDGPU/minmax.ll
@@ -638,6 +638,14 @@ define void @test_med3_minimumnum_maximumnum_f32(ptr addrspace(1) %arg, float %x
; GFX12-NEXT: v_med3_num_f32 v2, v2, v3, v4
; GFX12-NEXT: global_store_b32 v[0:1], v2, off
; GFX12-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1250-LABEL: test_med3_minimumnum_maximumnum_f32:
+; GFX1250: ; %bb.0:
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_med3_num_f32 v2, v2, v3, v4
+; GFX1250-NEXT: global_store_b32 v[0:1], v2, off
+; GFX1250-NEXT: s_set_pc_i64 s[30:31]
%tmp0 = call float @llvm.minimumnum.f32(float %x, float %y)
%tmp1 = call float @llvm.maximumnum.f32(float %x, float %y)
%tmp2 = call float @llvm.minimumnum.f32(float %tmp1, float %z)
@@ -798,7 +806,7 @@ define amdgpu_ps void @s_test_minmax_f16_ieee_false(half inreg %a, half inreg %b
; SDAG-GFX1250-TRUE16-NEXT: s_mov_b32 s5, s4
; SDAG-GFX1250-TRUE16-NEXT: s_mov_b32 s4, s3
; SDAG-GFX1250-TRUE16-NEXT: v_maxmin_num_f16 v0.l, s0, s1, v0.l
-; SDAG-GFX1250-TRUE16-NEXT: flat_store_b16 v1, v0, s[4:5]
+; SDAG-GFX1250-TRUE16-NEXT: global_store_b16 v1, v0, s[4:5]
; SDAG-GFX1250-TRUE16-NEXT: s_endpgm
;
; SDAG-GFX1250-FAKE16-LABEL: s_test_minmax_f16_ieee_false:
@@ -813,12 +821,12 @@ define amdgpu_ps void @s_test_minmax_f16_ieee_false(half inreg %a, half inreg %b
; GISEL-GFX1250-TRUE16-LABEL: s_test_minmax_f16_ieee_false:
; GISEL-GFX1250-TRUE16: ; %bb.0:
; GISEL-GFX1250-TRUE16-NEXT: s_max_num_f16 s0, s0, s1
+; GISEL-GFX1250-TRUE16-NEXT: v_mov_b32_e32 v1, 0
; GISEL-GFX1250-TRUE16-NEXT: s_mov_b32 s6, s3
; GISEL-GFX1250-TRUE16-NEXT: s_mov_b32 s7, s4
-; GISEL-GFX1250-TRUE16-NEXT: v_mov_b32_e32 v1, 0
; GISEL-GFX1250-TRUE16-NEXT: s_min_num_f16 s0, s0, s2
-; GISEL-GFX1250-TRUE16-NEXT: v_mov_b32_e32 v0, s0
-; GISEL-GFX1250-TRUE16-NEXT: flat_store_b16 v1, v0, s[6:7]
+; GISEL-GFX1250-TRUE16-NEXT: v_mov_b16_e32 v0.l, s0
+; GISEL-GFX1250-TRUE16-NEXT: global_store_b16 v1, v0, s[6:7]
; GISEL-GFX1250-TRUE16-NEXT: s_endpgm
;
; GISEL-GFX1250-FAKE16-LABEL: s_test_minmax_f16_ieee_false:
@@ -1246,7 +1254,7 @@ define void @test_med3_f16(ptr addrspace(1) %arg, half %x, half %y, half %z) #0
; SDAG-GFX1250-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
; SDAG-GFX1250-TRUE16-NEXT: s_wait_kmcnt 0x0
; SDAG-GFX1250-TRUE16-NEXT: v_med3_num_f16 v2.l, v2.l, v3.l, v4.l
-; SDAG-GFX1250-TRUE16-NEXT: flat_store_b16 v[0:1], v2
+; SDAG-GFX1250-TRUE16-NEXT: global_store_b16 v[0:1], v2, off
; SDAG-GFX1250-TRUE16-NEXT: s_set_pc_i64 s[30:31]
;
; SDAG-GFX1250-FAKE16-LABEL: test_med3_f16:
@@ -1262,7 +1270,7 @@ define void @test_med3_f16(ptr addrspace(1) %arg, half %x, half %y, half %z) #0
; GISEL-GFX1250-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
; GISEL-GFX1250-TRUE16-NEXT: s_wait_kmcnt 0x0
; GISEL-GFX1250-TRUE16-NEXT: v_med3_num_f16 v2.l, v2.l, v3.l, v4.l
-; GISEL-GFX1250-TRUE16-NEXT: flat_store_b16 v[0:1], v2
+; GISEL-GFX1250-TRUE16-NEXT: global_store_b16 v[0:1], v2, off
; GISEL-GFX1250-TRUE16-NEXT: s_set_pc_i64 s[30:31]
;
; GISEL-GFX1250-FAKE16-LABEL: test_med3_f16:
diff --git a/llvm/test/CodeGen/AMDGPU/propagate-flat-work-group-size.ll b/llvm/test/CodeGen/AMDGPU/propagate-flat-work-group-size.ll
index 42469c8..23e90b3 100644
--- a/llvm/test/CodeGen/AMDGPU/propagate-flat-work-group-size.ll
+++ b/llvm/test/CodeGen/AMDGPU/propagate-flat-work-group-size.ll
@@ -202,13 +202,13 @@ attributes #5 = { "amdgpu-flat-work-group-size"="128,512" }
attributes #6 = { "amdgpu-flat-work-group-size"="512,512" }
attributes #7 = { "amdgpu-flat-work-group-size"="64,256" }
;.
-; CHECK: attributes #[[ATTR0]] = { "amdgpu-agpr-alloc"="0" "amdgpu-flat-work-group-size"="1,256" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
-; CHECK: attributes #[[ATTR1]] = { "amdgpu-agpr-alloc"="0" "amdgpu-flat-work-group-size"="64,128" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
-; CHECK: attributes #[[ATTR2]] = { "amdgpu-agpr-alloc"="0" "amdgpu-flat-work-group-size"="128,512" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
-; CHECK: attributes #[[ATTR3]] = { "amdgpu-agpr-alloc"="0" "amdgpu-flat-work-group-size"="64,64" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
-; CHECK: attributes #[[ATTR4]] = { "amdgpu-agpr-alloc"="0" "amdgpu-flat-work-group-size"="128,256" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
-; CHECK: attributes #[[ATTR5]] = { "amdgpu-agpr-alloc"="0" "amdgpu-flat-work-group-size"="512,1024" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
-; CHECK: attributes #[[ATTR6]] = { "amdgpu-agpr-alloc"="0" "amdgpu-flat-work-group-size"="512,512" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
-; CHECK: attributes #[[ATTR7]] = { "amdgpu-agpr-alloc"="0" "amdgpu-flat-work-group-size"="64,256" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
-; CHECK: attributes #[[ATTR8]] = { "amdgpu-agpr-alloc"="0" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR0]] = { "amdgpu-flat-work-group-size"="1,256" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR1]] = { "amdgpu-flat-work-group-size"="64,128" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR2]] = { "amdgpu-flat-work-group-size"="128,512" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR3]] = { "amdgpu-flat-work-group-size"="64,64" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR4]] = { "amdgpu-flat-work-group-size"="128,256" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR5]] = { "amdgpu-flat-work-group-size"="512,1024" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR6]] = { "amdgpu-flat-work-group-size"="512,512" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR7]] = { "amdgpu-flat-work-group-size"="64,256" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR8]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
;.
diff --git a/llvm/test/CodeGen/AMDGPU/propagate-waves-per-eu.ll b/llvm/test/CodeGen/AMDGPU/propagate-waves-per-eu.ll
index 06533b4..0be3147 100644
--- a/llvm/test/CodeGen/AMDGPU/propagate-waves-per-eu.ll
+++ b/llvm/test/CodeGen/AMDGPU/propagate-waves-per-eu.ll
@@ -399,25 +399,25 @@ attributes #17 = { "amdgpu-waves-per-eu"="5,8" }
attributes #18 = { "amdgpu-waves-per-eu"="9,10" }
attributes #19 = { "amdgpu-waves-per-eu"="8,9" }
;.
-; CHECK: attributes #[[ATTR0]] = { "amdgpu-agpr-alloc"="0" "amdgpu-flat-work-group-size"="1,64" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="2,8" "uniform-work-group-size"="false" }
-; CHECK: attributes #[[ATTR1]] = { "amdgpu-agpr-alloc"="0" "amdgpu-flat-work-group-size"="1,64" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="1,8" "uniform-work-group-size"="false" }
-; CHECK: attributes #[[ATTR2]] = { "amdgpu-agpr-alloc"="0" "amdgpu-flat-work-group-size"="1,64" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="1,2" "uniform-work-group-size"="false" }
-; CHECK: attributes #[[ATTR3]] = { "amdgpu-agpr-alloc"="0" "amdgpu-flat-work-group-size"="1,64" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="1,4" "uniform-work-group-size"="false" }
-; CHECK: attributes #[[ATTR4]] = { "amdgpu-agpr-alloc"="0" "amdgpu-flat-work-group-size"="1,64" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="9,9" "uniform-work-group-size"="false" }
-; CHECK: attributes #[[ATTR5]] = { "amdgpu-agpr-alloc"="0" "amdgpu-flat-work-group-size"="1,64" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="1,1" "uniform-work-group-size"="false" }
-; CHECK: attributes #[[ATTR6]] = { "amdgpu-agpr-alloc"="0" "amdgpu-flat-work-group-size"="1,64" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="9,10" "uniform-work-group-size"="false" }
-; CHECK: attributes #[[ATTR7]] = { "amdgpu-agpr-alloc"="0" "amdgpu-flat-work-group-size"="1,64" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="2,9" "uniform-work-group-size"="false" }
-; CHECK: attributes #[[ATTR8]] = { "amdgpu-agpr-alloc"="0" "amdgpu-flat-work-group-size"="1,64" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="3,8" "uniform-work-group-size"="false" }
-; CHECK: attributes #[[ATTR9]] = { "amdgpu-agpr-alloc"="0" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
-; CHECK: attributes #[[ATTR10]] = { "amdgpu-agpr-alloc"="0" "amdgpu-flat-work-group-size"="1,64" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
-; CHECK: attributes #[[ATTR11]] = { "amdgpu-agpr-alloc"="0" "amdgpu-flat-work-group-size"="1,64" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="1,123" "uniform-work-group-size"="false" }
-; CHECK: attributes #[[ATTR12]] = { "amdgpu-agpr-alloc"="0" "amdgpu-flat-work-group-size"="1,512" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
-; CHECK: attributes #[[ATTR13]] = { "amdgpu-agpr-alloc"="0" "amdgpu-flat-work-group-size"="1,512" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="3,6" "uniform-work-group-size"="false" }
-; CHECK: attributes #[[ATTR14]] = { "amdgpu-agpr-alloc"="0" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="3,6" "uniform-work-group-size"="false" }
-; CHECK: attributes #[[ATTR15]] = { "amdgpu-agpr-alloc"="0" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,8" "uniform-work-group-size"="false" }
-; CHECK: attributes #[[ATTR16]] = { "amdgpu-agpr-alloc"="0" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="6,8" "uniform-work-group-size"="false" }
-; CHECK: attributes #[[ATTR17]] = { "amdgpu-agpr-alloc"="0" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="5,5" "uniform-work-group-size"="false" }
-; CHECK: attributes #[[ATTR18]] = { "amdgpu-agpr-alloc"="0" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="5,8" "uniform-work-group-size"="false" }
-; CHECK: attributes #[[ATTR19]] = { "amdgpu-agpr-alloc"="0" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="9,10" "uniform-work-group-size"="false" }
-; CHECK: attributes #[[ATTR20]] = { "amdgpu-agpr-alloc"="0" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="8,9" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR0]] = { "amdgpu-flat-work-group-size"="1,64" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="2,8" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR1]] = { "amdgpu-flat-work-group-size"="1,64" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="1,8" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR2]] = { "amdgpu-flat-work-group-size"="1,64" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="1,2" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR3]] = { "amdgpu-flat-work-group-size"="1,64" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="1,4" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR4]] = { "amdgpu-flat-work-group-size"="1,64" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="9,9" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR5]] = { "amdgpu-flat-work-group-size"="1,64" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="1,1" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR6]] = { "amdgpu-flat-work-group-size"="1,64" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="9,10" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR7]] = { "amdgpu-flat-work-group-size"="1,64" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="2,9" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR8]] = { "amdgpu-flat-work-group-size"="1,64" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="3,8" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR9]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR10]] = { "amdgpu-flat-work-group-size"="1,64" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR11]] = { "amdgpu-flat-work-group-size"="1,64" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="1,123" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR12]] = { "amdgpu-flat-work-group-size"="1,512" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR13]] = { "amdgpu-flat-work-group-size"="1,512" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="3,6" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR14]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="3,6" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR15]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,8" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR16]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="6,8" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR17]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="5,5" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR18]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="5,8" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR19]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="9,10" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR20]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="8,9" "uniform-work-group-size"="false" }
;.
diff --git a/llvm/test/CodeGen/AMDGPU/recursive_global_initializer.ll b/llvm/test/CodeGen/AMDGPU/recursive_global_initializer.ll
index 8930626..33da671 100644
--- a/llvm/test/CodeGen/AMDGPU/recursive_global_initializer.ll
+++ b/llvm/test/CodeGen/AMDGPU/recursive_global_initializer.ll
@@ -19,5 +19,5 @@ define void @hoge() {
ret void
}
;.
-; CHECK: attributes #[[ATTR0]] = { "amdgpu-agpr-alloc"="0" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR0]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
;.
diff --git a/llvm/test/CodeGen/AMDGPU/remove-no-kernel-id-attribute.ll b/llvm/test/CodeGen/AMDGPU/remove-no-kernel-id-attribute.ll
index 3dfb0e1..f847d66 100644
--- a/llvm/test/CodeGen/AMDGPU/remove-no-kernel-id-attribute.ll
+++ b/llvm/test/CodeGen/AMDGPU/remove-no-kernel-id-attribute.ll
@@ -191,12 +191,12 @@ define amdgpu_kernel void @kernel_lds_recursion() {
!1 = !{i32 1, !"amdhsa_code_object_version", i32 400}
;.
-; CHECK: attributes #[[ATTR0]] = { "amdgpu-agpr-alloc"="0" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
-; CHECK: attributes #[[ATTR1]] = { "amdgpu-agpr-alloc"="0" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
-; CHECK: attributes #[[ATTR2]] = { "amdgpu-agpr-alloc"="0" "amdgpu-lds-size"="2" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR0]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR1]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR2]] = { "amdgpu-lds-size"="2" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
; CHECK: attributes #[[ATTR3]] = { "amdgpu-lds-size"="4" "uniform-work-group-size"="false" }
-; CHECK: attributes #[[ATTR4]] = { "amdgpu-agpr-alloc"="0" "amdgpu-lds-size"="2" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
-; CHECK: attributes #[[ATTR5]] = { "amdgpu-agpr-alloc"="0" "amdgpu-lds-size"="4" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR4]] = { "amdgpu-lds-size"="2" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR5]] = { "amdgpu-lds-size"="4" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
; CHECK: attributes #[[ATTR6:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(none) }
; CHECK: attributes #[[ATTR7:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
;.
diff --git a/llvm/test/CodeGen/AMDGPU/select-fabs-fneg-extract.v2f16.ll b/llvm/test/CodeGen/AMDGPU/select-fabs-fneg-extract.v2f16.ll
index bb22144..9814ed8 100644
--- a/llvm/test/CodeGen/AMDGPU/select-fabs-fneg-extract.v2f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/select-fabs-fneg-extract.v2f16.ll
@@ -1,15 +1,9 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=amdgcn -mcpu=hawaii < %s | FileCheck -check-prefixes=CI,CI-SAFE %s
-; RUN: llc -mtriple=amdgcn -mcpu=fiji < %s | FileCheck -check-prefixes=VI,VI-SAFE %s
-; RUN: llc -mtriple=amdgcn -mcpu=gfx906 < %s | FileCheck -check-prefixes=GFX9,GFX9-SAFE %s
-; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=+real-true16 < %s | FileCheck -check-prefixes=GFX11,GFX11-SAFE,GFX11-SAFE-TRUE16 %s
-; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=-real-true16 < %s | FileCheck -check-prefixes=GFX11,GFX11-SAFE,GFX11-SAFE-FAKE16 %s
-
-; RUN: llc -mtriple=amdgcn -mcpu=hawaii -enable-no-signed-zeros-fp-math < %s | FileCheck -check-prefixes=CI,CI-NSZ %s
-; RUN: llc -mtriple=amdgcn -mcpu=fiji -enable-no-signed-zeros-fp-math < %s | FileCheck -check-prefixes=VI,VI-NSZ %s
-; RUN: llc -mtriple=amdgcn -mcpu=gfx906 -enable-no-signed-zeros-fp-math < %s | FileCheck -check-prefixes=GFX9,GFX9-NSZ %s
-; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=+real-true16 -enable-no-signed-zeros-fp-math < %s | FileCheck -check-prefixes=GFX11,GFX11-NSZ,GFX11-NSZ-TRUE16 %s
-; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=-real-true16 -enable-no-signed-zeros-fp-math < %s | FileCheck -check-prefixes=GFX11,GFX11-NSZ,GFX11-NSZ-FAKE16 %s
+; RUN: llc -mtriple=amdgcn -mcpu=hawaii < %s | FileCheck -check-prefixes=CI %s
+; RUN: llc -mtriple=amdgcn -mcpu=fiji < %s | FileCheck -check-prefixes=VI %s
+; RUN: llc -mtriple=amdgcn -mcpu=gfx906 < %s | FileCheck -check-prefixes=GFX9 %s
+; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=+real-true16 < %s | FileCheck -check-prefixes=GFX11,GFX11-TRUE16 %s
+; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=-real-true16 < %s | FileCheck -check-prefixes=GFX11,GFX11-FAKE16 %s
define <2 x half> @add_select_fabs_fabs_v2f16(<2 x i32> %c, <2 x half> %x, <2 x half> %y, <2 x half> %z) {
; CI-LABEL: add_select_fabs_fabs_v2f16:
@@ -63,69 +57,37 @@ define <2 x half> @add_select_fabs_fabs_v2f16(<2 x i32> %c, <2 x half> %x, <2 x
; GFX9-NEXT: v_pk_add_f16 v0, v0, v4
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
-; GFX11-SAFE-TRUE16-LABEL: add_select_fabs_fabs_v2f16:
-; GFX11-SAFE-TRUE16: ; %bb.0:
-; GFX11-SAFE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-SAFE-TRUE16-NEXT: v_and_b32_e32 v0, 0x7fff7fff, v2
-; GFX11-SAFE-TRUE16-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v3
-; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v1
-; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.l, v2.l, v0.l, vcc_lo
-; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.h, v2.h, v0.h, s0
-; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-SAFE-TRUE16-NEXT: v_pk_add_f16 v0, v0, v4
-; GFX11-SAFE-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-SAFE-FAKE16-LABEL: add_select_fabs_fabs_v2f16:
-; GFX11-SAFE-FAKE16: ; %bb.0:
-; GFX11-SAFE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-FAKE16-NEXT: v_and_b32_e32 v3, 0x7fff7fff, v3
-; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_1)
-; GFX11-SAFE-FAKE16-NEXT: v_lshrrev_b32_e32 v6, 16, v3
-; GFX11-SAFE-FAKE16-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v2
-; GFX11-SAFE-FAKE16-NEXT: v_lshrrev_b32_e32 v5, 16, v2
-; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc_lo
-; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v1, v6, v5, vcc_lo
-; GFX11-SAFE-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
-; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-SAFE-FAKE16-NEXT: v_pk_add_f16 v0, v0, v4
-; GFX11-SAFE-FAKE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-TRUE16-LABEL: add_select_fabs_fabs_v2f16:
-; GFX11-NSZ-TRUE16: ; %bb.0:
-; GFX11-NSZ-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-NSZ-TRUE16-NEXT: v_and_b32_e32 v0, 0x7fff7fff, v2
-; GFX11-NSZ-TRUE16-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v3
-; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v1
-; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.l, v2.l, v0.l, vcc_lo
-; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.h, v2.h, v0.h, s0
-; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NSZ-TRUE16-NEXT: v_pk_add_f16 v0, v0, v4
-; GFX11-NSZ-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-FAKE16-LABEL: add_select_fabs_fabs_v2f16:
-; GFX11-NSZ-FAKE16: ; %bb.0:
-; GFX11-NSZ-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-FAKE16-NEXT: v_and_b32_e32 v3, 0x7fff7fff, v3
-; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_1)
-; GFX11-NSZ-FAKE16-NEXT: v_lshrrev_b32_e32 v6, 16, v3
-; GFX11-NSZ-FAKE16-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v2
-; GFX11-NSZ-FAKE16-NEXT: v_lshrrev_b32_e32 v5, 16, v2
-; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc_lo
-; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v1, v6, v5, vcc_lo
-; GFX11-NSZ-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
-; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NSZ-FAKE16-NEXT: v_pk_add_f16 v0, v0, v4
-; GFX11-NSZ-FAKE16-NEXT: s_setpc_b64 s[30:31]
+; GFX11-TRUE16-LABEL: add_select_fabs_fabs_v2f16:
+; GFX11-TRUE16: ; %bb.0:
+; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0x7fff7fff, v2
+; GFX11-TRUE16-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v3
+; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v1
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.l, v2.l, v0.l, vcc_lo
+; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.h, v2.h, v0.h, s0
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-TRUE16-NEXT: v_pk_add_f16 v0, v0, v4
+; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: add_select_fabs_fabs_v2f16:
+; GFX11-FAKE16: ; %bb.0:
+; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT: v_and_b32_e32 v3, 0x7fff7fff, v3
+; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_1)
+; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v6, 16, v3
+; GFX11-FAKE16-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v2
+; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v5, 16, v2
+; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc_lo
+; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v1, v6, v5, vcc_lo
+; GFX11-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
+; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-FAKE16-NEXT: v_pk_add_f16 v0, v0, v4
+; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
%cmp = icmp eq <2 x i32> %c, zeroinitializer
%fabs.x = call <2 x half> @llvm.fabs.v2f16(<2 x half> %x)
%fabs.y = call <2 x half> @llvm.fabs.v2f16(<2 x half> %y)
@@ -198,73 +160,39 @@ define { <2 x half>, <2 x half> } @add_select_multi_use_lhs_fabs_fabs_v2f16(<2 x
; GFX9-NEXT: v_pk_add_f16 v1, v1, v4
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
-; GFX11-SAFE-TRUE16-LABEL: add_select_multi_use_lhs_fabs_fabs_v2f16:
-; GFX11-SAFE-TRUE16: ; %bb.0:
-; GFX11-SAFE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-SAFE-TRUE16-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v2
-; GFX11-SAFE-TRUE16-NEXT: v_and_b32_e32 v0, 0x7fff7fff, v3
-; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v1
-; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
-; GFX11-SAFE-TRUE16-NEXT: v_pk_add_f16 v1, v2, v4
-; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.l, v0.l, v2.l, vcc_lo
-; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.h, v0.h, v2.h, s0
-; GFX11-SAFE-TRUE16-NEXT: v_pk_add_f16 v0, v0, v5
-; GFX11-SAFE-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-SAFE-FAKE16-LABEL: add_select_multi_use_lhs_fabs_fabs_v2f16:
-; GFX11-SAFE-FAKE16: ; %bb.0:
-; GFX11-SAFE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-FAKE16-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v2
-; GFX11-SAFE-FAKE16-NEXT: v_and_b32_e32 v3, 0x7fff7fff, v3
-; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
-; GFX11-SAFE-FAKE16-NEXT: v_lshrrev_b32_e32 v6, 16, v2
-; GFX11-SAFE-FAKE16-NEXT: v_lshrrev_b32_e32 v7, 16, v3
-; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc_lo
-; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v1, v7, v6, vcc_lo
-; GFX11-SAFE-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
-; GFX11-SAFE-FAKE16-NEXT: v_pk_add_f16 v1, v2, v4
-; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2)
-; GFX11-SAFE-FAKE16-NEXT: v_pk_add_f16 v0, v0, v5
-; GFX11-SAFE-FAKE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-TRUE16-LABEL: add_select_multi_use_lhs_fabs_fabs_v2f16:
-; GFX11-NSZ-TRUE16: ; %bb.0:
-; GFX11-NSZ-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-NSZ-TRUE16-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v2
-; GFX11-NSZ-TRUE16-NEXT: v_and_b32_e32 v0, 0x7fff7fff, v3
-; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v1
-; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
-; GFX11-NSZ-TRUE16-NEXT: v_pk_add_f16 v1, v2, v4
-; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.l, v0.l, v2.l, vcc_lo
-; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.h, v0.h, v2.h, s0
-; GFX11-NSZ-TRUE16-NEXT: v_pk_add_f16 v0, v0, v5
-; GFX11-NSZ-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-FAKE16-LABEL: add_select_multi_use_lhs_fabs_fabs_v2f16:
-; GFX11-NSZ-FAKE16: ; %bb.0:
-; GFX11-NSZ-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-FAKE16-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v2
-; GFX11-NSZ-FAKE16-NEXT: v_and_b32_e32 v3, 0x7fff7fff, v3
-; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
-; GFX11-NSZ-FAKE16-NEXT: v_lshrrev_b32_e32 v6, 16, v2
-; GFX11-NSZ-FAKE16-NEXT: v_lshrrev_b32_e32 v7, 16, v3
-; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc_lo
-; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v1, v7, v6, vcc_lo
-; GFX11-NSZ-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
-; GFX11-NSZ-FAKE16-NEXT: v_pk_add_f16 v1, v2, v4
-; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2)
-; GFX11-NSZ-FAKE16-NEXT: v_pk_add_f16 v0, v0, v5
-; GFX11-NSZ-FAKE16-NEXT: s_setpc_b64 s[30:31]
+; GFX11-TRUE16-LABEL: add_select_multi_use_lhs_fabs_fabs_v2f16:
+; GFX11-TRUE16: ; %bb.0:
+; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-TRUE16-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v2
+; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0x7fff7fff, v3
+; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v1
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
+; GFX11-TRUE16-NEXT: v_pk_add_f16 v1, v2, v4
+; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.l, v0.l, v2.l, vcc_lo
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.h, v0.h, v2.h, s0
+; GFX11-TRUE16-NEXT: v_pk_add_f16 v0, v0, v5
+; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: add_select_multi_use_lhs_fabs_fabs_v2f16:
+; GFX11-FAKE16: ; %bb.0:
+; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v2
+; GFX11-FAKE16-NEXT: v_and_b32_e32 v3, 0x7fff7fff, v3
+; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
+; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v6, 16, v2
+; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v7, 16, v3
+; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc_lo
+; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v1, v7, v6, vcc_lo
+; GFX11-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
+; GFX11-FAKE16-NEXT: v_pk_add_f16 v1, v2, v4
+; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2)
+; GFX11-FAKE16-NEXT: v_pk_add_f16 v0, v0, v5
+; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
%cmp = icmp eq <2 x i32> %c, zeroinitializer
%fabs.x = call <2 x half> @llvm.fabs.v2f16(<2 x half> %x)
%fabs.y = call <2 x half> @llvm.fabs.v2f16(<2 x half> %y)
@@ -328,73 +256,39 @@ define { <2 x half>, <2 x half> } @add_select_multi_store_use_lhs_fabs_fabs_v2f1
; GFX9-NEXT: v_pk_add_f16 v0, v0, v4
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
-; GFX11-SAFE-TRUE16-LABEL: add_select_multi_store_use_lhs_fabs_fabs_v2f16:
-; GFX11-SAFE-TRUE16: ; %bb.0:
-; GFX11-SAFE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-SAFE-TRUE16-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v2
-; GFX11-SAFE-TRUE16-NEXT: v_and_b32_e32 v0, 0x7fff7fff, v3
-; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v1
-; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
-; GFX11-SAFE-TRUE16-NEXT: v_mov_b32_e32 v1, v2
-; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.l, v0.l, v2.l, vcc_lo
-; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.h, v0.h, v2.h, s0
-; GFX11-SAFE-TRUE16-NEXT: v_pk_add_f16 v0, v0, v4
-; GFX11-SAFE-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-SAFE-FAKE16-LABEL: add_select_multi_store_use_lhs_fabs_fabs_v2f16:
-; GFX11-SAFE-FAKE16: ; %bb.0:
-; GFX11-SAFE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-FAKE16-NEXT: v_and_b32_e32 v3, 0x7fff7fff, v3
-; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_1)
-; GFX11-SAFE-FAKE16-NEXT: v_lshrrev_b32_e32 v6, 16, v3
-; GFX11-SAFE-FAKE16-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v2
-; GFX11-SAFE-FAKE16-NEXT: v_lshrrev_b32_e32 v5, 16, v2
-; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc_lo
-; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v1, v6, v5, vcc_lo
-; GFX11-SAFE-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
-; GFX11-SAFE-FAKE16-NEXT: v_mov_b32_e32 v1, v2
-; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2)
-; GFX11-SAFE-FAKE16-NEXT: v_pk_add_f16 v0, v0, v4
-; GFX11-SAFE-FAKE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-TRUE16-LABEL: add_select_multi_store_use_lhs_fabs_fabs_v2f16:
-; GFX11-NSZ-TRUE16: ; %bb.0:
-; GFX11-NSZ-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-NSZ-TRUE16-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v2
-; GFX11-NSZ-TRUE16-NEXT: v_and_b32_e32 v0, 0x7fff7fff, v3
-; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v1
-; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
-; GFX11-NSZ-TRUE16-NEXT: v_mov_b32_e32 v1, v2
-; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.l, v0.l, v2.l, vcc_lo
-; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.h, v0.h, v2.h, s0
-; GFX11-NSZ-TRUE16-NEXT: v_pk_add_f16 v0, v0, v4
-; GFX11-NSZ-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-FAKE16-LABEL: add_select_multi_store_use_lhs_fabs_fabs_v2f16:
-; GFX11-NSZ-FAKE16: ; %bb.0:
-; GFX11-NSZ-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-FAKE16-NEXT: v_and_b32_e32 v3, 0x7fff7fff, v3
-; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_1)
-; GFX11-NSZ-FAKE16-NEXT: v_lshrrev_b32_e32 v6, 16, v3
-; GFX11-NSZ-FAKE16-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v2
-; GFX11-NSZ-FAKE16-NEXT: v_lshrrev_b32_e32 v5, 16, v2
-; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc_lo
-; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v1, v6, v5, vcc_lo
-; GFX11-NSZ-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
-; GFX11-NSZ-FAKE16-NEXT: v_mov_b32_e32 v1, v2
-; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2)
-; GFX11-NSZ-FAKE16-NEXT: v_pk_add_f16 v0, v0, v4
-; GFX11-NSZ-FAKE16-NEXT: s_setpc_b64 s[30:31]
+; GFX11-TRUE16-LABEL: add_select_multi_store_use_lhs_fabs_fabs_v2f16:
+; GFX11-TRUE16: ; %bb.0:
+; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-TRUE16-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v2
+; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0x7fff7fff, v3
+; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v1
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
+; GFX11-TRUE16-NEXT: v_mov_b32_e32 v1, v2
+; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.l, v0.l, v2.l, vcc_lo
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.h, v0.h, v2.h, s0
+; GFX11-TRUE16-NEXT: v_pk_add_f16 v0, v0, v4
+; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: add_select_multi_store_use_lhs_fabs_fabs_v2f16:
+; GFX11-FAKE16: ; %bb.0:
+; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT: v_and_b32_e32 v3, 0x7fff7fff, v3
+; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_1)
+; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v6, 16, v3
+; GFX11-FAKE16-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v2
+; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v5, 16, v2
+; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc_lo
+; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v1, v6, v5, vcc_lo
+; GFX11-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
+; GFX11-FAKE16-NEXT: v_mov_b32_e32 v1, v2
+; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2)
+; GFX11-FAKE16-NEXT: v_pk_add_f16 v0, v0, v4
+; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
%cmp = icmp eq <2 x i32> %c, zeroinitializer
%fabs.x = call <2 x half> @llvm.fabs.v2f16(<2 x half> %x)
%fabs.y = call <2 x half> @llvm.fabs.v2f16(<2 x half> %y)
@@ -469,73 +363,39 @@ define { <2 x half>, <2 x half> } @add_select_multi_use_rhs_fabs_fabs_v2f16(<2 x
; GFX9-NEXT: v_pk_add_f16 v1, v2, v5
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
-; GFX11-SAFE-TRUE16-LABEL: add_select_multi_use_rhs_fabs_fabs_v2f16:
-; GFX11-SAFE-TRUE16: ; %bb.0:
-; GFX11-SAFE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-SAFE-TRUE16-NEXT: v_and_b32_e32 v0, 0x7fff7fff, v2
-; GFX11-SAFE-TRUE16-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v3
-; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v1
-; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.l, v2.l, v0.l, vcc_lo
-; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.h, v2.h, v0.h, s0
-; GFX11-SAFE-TRUE16-NEXT: v_pk_add_f16 v1, v2, v5
-; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2)
-; GFX11-SAFE-TRUE16-NEXT: v_pk_add_f16 v0, v0, v4
-; GFX11-SAFE-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-SAFE-FAKE16-LABEL: add_select_multi_use_rhs_fabs_fabs_v2f16:
-; GFX11-SAFE-FAKE16: ; %bb.0:
-; GFX11-SAFE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-FAKE16-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v2
-; GFX11-SAFE-FAKE16-NEXT: v_and_b32_e32 v3, 0x7fff7fff, v3
-; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
-; GFX11-SAFE-FAKE16-NEXT: v_lshrrev_b32_e32 v6, 16, v2
-; GFX11-SAFE-FAKE16-NEXT: v_lshrrev_b32_e32 v7, 16, v3
-; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc_lo
-; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v1, v7, v6, vcc_lo
-; GFX11-SAFE-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
-; GFX11-SAFE-FAKE16-NEXT: v_pk_add_f16 v1, v3, v5
-; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2)
-; GFX11-SAFE-FAKE16-NEXT: v_pk_add_f16 v0, v0, v4
-; GFX11-SAFE-FAKE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-TRUE16-LABEL: add_select_multi_use_rhs_fabs_fabs_v2f16:
-; GFX11-NSZ-TRUE16: ; %bb.0:
-; GFX11-NSZ-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-NSZ-TRUE16-NEXT: v_and_b32_e32 v0, 0x7fff7fff, v2
-; GFX11-NSZ-TRUE16-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v3
-; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v1
-; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.l, v2.l, v0.l, vcc_lo
-; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.h, v2.h, v0.h, s0
-; GFX11-NSZ-TRUE16-NEXT: v_pk_add_f16 v1, v2, v5
-; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2)
-; GFX11-NSZ-TRUE16-NEXT: v_pk_add_f16 v0, v0, v4
-; GFX11-NSZ-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-FAKE16-LABEL: add_select_multi_use_rhs_fabs_fabs_v2f16:
-; GFX11-NSZ-FAKE16: ; %bb.0:
-; GFX11-NSZ-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-FAKE16-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v2
-; GFX11-NSZ-FAKE16-NEXT: v_and_b32_e32 v3, 0x7fff7fff, v3
-; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
-; GFX11-NSZ-FAKE16-NEXT: v_lshrrev_b32_e32 v6, 16, v2
-; GFX11-NSZ-FAKE16-NEXT: v_lshrrev_b32_e32 v7, 16, v3
-; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc_lo
-; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v1, v7, v6, vcc_lo
-; GFX11-NSZ-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
-; GFX11-NSZ-FAKE16-NEXT: v_pk_add_f16 v1, v3, v5
-; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2)
-; GFX11-NSZ-FAKE16-NEXT: v_pk_add_f16 v0, v0, v4
-; GFX11-NSZ-FAKE16-NEXT: s_setpc_b64 s[30:31]
+; GFX11-TRUE16-LABEL: add_select_multi_use_rhs_fabs_fabs_v2f16:
+; GFX11-TRUE16: ; %bb.0:
+; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0x7fff7fff, v2
+; GFX11-TRUE16-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v3
+; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v1
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.l, v2.l, v0.l, vcc_lo
+; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.h, v2.h, v0.h, s0
+; GFX11-TRUE16-NEXT: v_pk_add_f16 v1, v2, v5
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2)
+; GFX11-TRUE16-NEXT: v_pk_add_f16 v0, v0, v4
+; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: add_select_multi_use_rhs_fabs_fabs_v2f16:
+; GFX11-FAKE16: ; %bb.0:
+; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v2
+; GFX11-FAKE16-NEXT: v_and_b32_e32 v3, 0x7fff7fff, v3
+; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
+; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v6, 16, v2
+; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v7, 16, v3
+; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc_lo
+; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v1, v7, v6, vcc_lo
+; GFX11-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
+; GFX11-FAKE16-NEXT: v_pk_add_f16 v1, v3, v5
+; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2)
+; GFX11-FAKE16-NEXT: v_pk_add_f16 v0, v0, v4
+; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
%cmp = icmp eq <2 x i32> %c, zeroinitializer
%fabs.x = call <2 x half> @llvm.fabs.v2f16(<2 x half> %x)
%fabs.y = call <2 x half> @llvm.fabs.v2f16(<2 x half> %y)
@@ -597,63 +457,34 @@ define <2 x half> @add_select_fabs_var_v2f16(<2 x i32> %c, <2 x half> %x, <2 x h
; GFX9-NEXT: v_pk_add_f16 v0, v0, v4
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
-; GFX11-SAFE-TRUE16-LABEL: add_select_fabs_var_v2f16:
-; GFX11-SAFE-TRUE16: ; %bb.0:
-; GFX11-SAFE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-SAFE-TRUE16-NEXT: v_and_b32_e32 v1, 0x7fff7fff, v2
-; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v0
-; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.h, v3.h, v1.h, vcc_lo
-; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.l, v3.l, v1.l, s0
-; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-SAFE-TRUE16-NEXT: v_pk_add_f16 v0, v0, v4
-; GFX11-SAFE-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-SAFE-FAKE16-LABEL: add_select_fabs_var_v2f16:
-; GFX11-SAFE-FAKE16: ; %bb.0:
-; GFX11-SAFE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-FAKE16-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v2
-; GFX11-SAFE-FAKE16-NEXT: v_lshrrev_b32_e32 v5, 16, v3
-; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-SAFE-FAKE16-NEXT: v_lshrrev_b32_e32 v6, 16, v2
-; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v1, v5, v6, vcc_lo
-; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc_lo
-; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-SAFE-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
-; GFX11-SAFE-FAKE16-NEXT: v_pk_add_f16 v0, v0, v4
-; GFX11-SAFE-FAKE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-TRUE16-LABEL: add_select_fabs_var_v2f16:
-; GFX11-NSZ-TRUE16: ; %bb.0:
-; GFX11-NSZ-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-NSZ-TRUE16-NEXT: v_and_b32_e32 v1, 0x7fff7fff, v2
-; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v0
-; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.h, v3.h, v1.h, vcc_lo
-; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.l, v3.l, v1.l, s0
-; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NSZ-TRUE16-NEXT: v_pk_add_f16 v0, v0, v4
-; GFX11-NSZ-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-FAKE16-LABEL: add_select_fabs_var_v2f16:
-; GFX11-NSZ-FAKE16: ; %bb.0:
-; GFX11-NSZ-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-FAKE16-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v2
-; GFX11-NSZ-FAKE16-NEXT: v_lshrrev_b32_e32 v5, 16, v3
-; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NSZ-FAKE16-NEXT: v_lshrrev_b32_e32 v6, 16, v2
-; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v1, v5, v6, vcc_lo
-; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc_lo
-; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NSZ-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
-; GFX11-NSZ-FAKE16-NEXT: v_pk_add_f16 v0, v0, v4
-; GFX11-NSZ-FAKE16-NEXT: s_setpc_b64 s[30:31]
+; GFX11-TRUE16-LABEL: add_select_fabs_var_v2f16:
+; GFX11-TRUE16: ; %bb.0:
+; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-TRUE16-NEXT: v_and_b32_e32 v1, 0x7fff7fff, v2
+; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v0
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.h, v3.h, v1.h, vcc_lo
+; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.l, v3.l, v1.l, s0
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-TRUE16-NEXT: v_pk_add_f16 v0, v0, v4
+; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: add_select_fabs_var_v2f16:
+; GFX11-FAKE16: ; %bb.0:
+; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v2
+; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v5, 16, v3
+; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v6, 16, v2
+; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v1, v5, v6, vcc_lo
+; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc_lo
+; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
+; GFX11-FAKE16-NEXT: v_pk_add_f16 v0, v0, v4
+; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
%cmp = icmp eq <2 x i32> %c, zeroinitializer
%fabs.x = call <2 x half> @llvm.fabs.v2f16(<2 x half> %x)
%select = select <2 x i1> %cmp, <2 x half> %fabs.x, <2 x half> %y
@@ -709,61 +540,33 @@ define <2 x half> @add_select_fabs_negk_v2f16(<2 x i32> %c, <2 x half> %x, <2 x
; GFX9-NEXT: v_pk_add_f16 v0, v0, v3
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
-; GFX11-SAFE-TRUE16-LABEL: add_select_fabs_negk_v2f16:
-; GFX11-SAFE-TRUE16: ; %bb.0:
-; GFX11-SAFE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-SAFE-TRUE16-NEXT: v_and_b32_e32 v0, 0x7fff7fff, v2
-; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v1
-; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.l, 0xbc00, v0.l, vcc_lo
-; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.h, 0xbc00, v0.h, s0
-; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-SAFE-TRUE16-NEXT: v_pk_add_f16 v0, v0, v3
-; GFX11-SAFE-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-SAFE-FAKE16-LABEL: add_select_fabs_negk_v2f16:
-; GFX11-SAFE-FAKE16: ; %bb.0:
-; GFX11-SAFE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-FAKE16-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v2
-; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3)
-; GFX11-SAFE-FAKE16-NEXT: v_lshrrev_b32_e32 v4, 16, v2
-; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0xbc00, v2, vcc_lo
-; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0xbc00, v4, vcc_lo
-; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-SAFE-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
-; GFX11-SAFE-FAKE16-NEXT: v_pk_add_f16 v0, v0, v3
-; GFX11-SAFE-FAKE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-TRUE16-LABEL: add_select_fabs_negk_v2f16:
-; GFX11-NSZ-TRUE16: ; %bb.0:
-; GFX11-NSZ-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-NSZ-TRUE16-NEXT: v_and_b32_e32 v0, 0x7fff7fff, v2
-; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v1
-; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.l, 0xbc00, v0.l, vcc_lo
-; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.h, 0xbc00, v0.h, s0
-; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NSZ-TRUE16-NEXT: v_pk_add_f16 v0, v0, v3
-; GFX11-NSZ-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-FAKE16-LABEL: add_select_fabs_negk_v2f16:
-; GFX11-NSZ-FAKE16: ; %bb.0:
-; GFX11-NSZ-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-FAKE16-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v2
-; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3)
-; GFX11-NSZ-FAKE16-NEXT: v_lshrrev_b32_e32 v4, 16, v2
-; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0xbc00, v2, vcc_lo
-; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0xbc00, v4, vcc_lo
-; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NSZ-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
-; GFX11-NSZ-FAKE16-NEXT: v_pk_add_f16 v0, v0, v3
-; GFX11-NSZ-FAKE16-NEXT: s_setpc_b64 s[30:31]
+; GFX11-TRUE16-LABEL: add_select_fabs_negk_v2f16:
+; GFX11-TRUE16: ; %bb.0:
+; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0x7fff7fff, v2
+; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v1
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.l, 0xbc00, v0.l, vcc_lo
+; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.h, 0xbc00, v0.h, s0
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-TRUE16-NEXT: v_pk_add_f16 v0, v0, v3
+; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: add_select_fabs_negk_v2f16:
+; GFX11-FAKE16: ; %bb.0:
+; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v2
+; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3)
+; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v4, 16, v2
+; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0xbc00, v2, vcc_lo
+; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0xbc00, v4, vcc_lo
+; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
+; GFX11-FAKE16-NEXT: v_pk_add_f16 v0, v0, v3
+; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
%cmp = icmp eq <2 x i32> %c, zeroinitializer
%fabs = call <2 x half> @llvm.fabs.v2f16(<2 x half> %x)
%select = select <2 x i1> %cmp, <2 x half> %fabs, <2 x half> <half -1.0, half -1.0>
@@ -815,61 +618,33 @@ define <2 x half> @add_select_fabs_negk_negk_v2f16(<2 x i32> %c, <2 x half> %x)
; GFX9-NEXT: v_pk_add_f16 v0, v2, v0 neg_lo:[0,1] neg_hi:[0,1]
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
-; GFX11-SAFE-TRUE16-LABEL: add_select_fabs_negk_negk_v2f16:
-; GFX11-SAFE-TRUE16: ; %bb.0:
-; GFX11-SAFE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-TRUE16-NEXT: v_mov_b16_e32 v3.l, 0xbc00
-; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v0
-; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.l, v3.l, 0xc000, vcc_lo
-; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.h, v3.l, 0xc000, s0
-; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-SAFE-TRUE16-NEXT: v_pack_b32_f16 v0, v0.h, v0.l
-; GFX11-SAFE-TRUE16-NEXT: v_pk_add_f16 v0, v2, v0 neg_lo:[0,1] neg_hi:[0,1]
-; GFX11-SAFE-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-SAFE-FAKE16-LABEL: add_select_fabs_negk_negk_v2f16:
-; GFX11-SAFE-FAKE16: ; %bb.0:
-; GFX11-SAFE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-FAKE16-NEXT: v_mov_b32_e32 v3, 0xc000
-; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1)
-; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0xbc00, v3, vcc_lo
-; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0xbc00, v3, vcc_lo
-; GFX11-SAFE-FAKE16-NEXT: v_pack_b32_f16 v0, v0, v1
-; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-SAFE-FAKE16-NEXT: v_pk_add_f16 v0, v2, v0 neg_lo:[0,1] neg_hi:[0,1]
-; GFX11-SAFE-FAKE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-TRUE16-LABEL: add_select_fabs_negk_negk_v2f16:
-; GFX11-NSZ-TRUE16: ; %bb.0:
-; GFX11-NSZ-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-TRUE16-NEXT: v_mov_b16_e32 v3.l, 0xbc00
-; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v0
-; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.l, v3.l, 0xc000, vcc_lo
-; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.h, v3.l, 0xc000, s0
-; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NSZ-TRUE16-NEXT: v_pack_b32_f16 v0, v0.h, v0.l
-; GFX11-NSZ-TRUE16-NEXT: v_pk_add_f16 v0, v2, v0 neg_lo:[0,1] neg_hi:[0,1]
-; GFX11-NSZ-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-FAKE16-LABEL: add_select_fabs_negk_negk_v2f16:
-; GFX11-NSZ-FAKE16: ; %bb.0:
-; GFX11-NSZ-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-FAKE16-NEXT: v_mov_b32_e32 v3, 0xc000
-; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1)
-; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0xbc00, v3, vcc_lo
-; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0xbc00, v3, vcc_lo
-; GFX11-NSZ-FAKE16-NEXT: v_pack_b32_f16 v0, v0, v1
-; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NSZ-FAKE16-NEXT: v_pk_add_f16 v0, v2, v0 neg_lo:[0,1] neg_hi:[0,1]
-; GFX11-NSZ-FAKE16-NEXT: s_setpc_b64 s[30:31]
+; GFX11-TRUE16-LABEL: add_select_fabs_negk_negk_v2f16:
+; GFX11-TRUE16: ; %bb.0:
+; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT: v_mov_b16_e32 v3.l, 0xbc00
+; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v0
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.l, v3.l, 0xc000, vcc_lo
+; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.h, v3.l, 0xc000, s0
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-TRUE16-NEXT: v_pack_b32_f16 v0, v0.h, v0.l
+; GFX11-TRUE16-NEXT: v_pk_add_f16 v0, v2, v0 neg_lo:[0,1] neg_hi:[0,1]
+; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: add_select_fabs_negk_negk_v2f16:
+; GFX11-FAKE16: ; %bb.0:
+; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT: v_mov_b32_e32 v3, 0xc000
+; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1)
+; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0xbc00, v3, vcc_lo
+; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0xbc00, v3, vcc_lo
+; GFX11-FAKE16-NEXT: v_pack_b32_f16 v0, v0, v1
+; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-FAKE16-NEXT: v_pk_add_f16 v0, v2, v0 neg_lo:[0,1] neg_hi:[0,1]
+; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
%cmp = icmp eq <2 x i32> %c, zeroinitializer
%select = select <2 x i1> %cmp, <2 x half> <half -2.0, half -2.0>, <2 x half> <half -1.0, half -1.0>
%fabs = call <2 x half> @llvm.fabs.v2f16(<2 x half> %select)
@@ -920,61 +695,33 @@ define <2 x half> @add_select_posk_posk_v2f16(<2 x i32> %c, <2 x half> %x) {
; GFX9-NEXT: v_pk_add_f16 v0, v0, v2
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
-; GFX11-SAFE-TRUE16-LABEL: add_select_posk_posk_v2f16:
-; GFX11-SAFE-TRUE16: ; %bb.0:
-; GFX11-SAFE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-TRUE16-NEXT: v_mov_b16_e32 v3.l, 0x3c00
-; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v0
-; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.l, v3.l, 0x4000, vcc_lo
-; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.h, v3.l, 0x4000, s0
-; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-SAFE-TRUE16-NEXT: v_pack_b32_f16 v0, v0.h, v0.l
-; GFX11-SAFE-TRUE16-NEXT: v_pk_add_f16 v0, v0, v2
-; GFX11-SAFE-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-SAFE-FAKE16-LABEL: add_select_posk_posk_v2f16:
-; GFX11-SAFE-FAKE16: ; %bb.0:
-; GFX11-SAFE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-FAKE16-NEXT: v_mov_b32_e32 v3, 0x4000
-; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1)
-; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0x3c00, v3, vcc_lo
-; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0x3c00, v3, vcc_lo
-; GFX11-SAFE-FAKE16-NEXT: v_pack_b32_f16 v0, v0, v1
-; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-SAFE-FAKE16-NEXT: v_pk_add_f16 v0, v0, v2
-; GFX11-SAFE-FAKE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-TRUE16-LABEL: add_select_posk_posk_v2f16:
-; GFX11-NSZ-TRUE16: ; %bb.0:
-; GFX11-NSZ-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-TRUE16-NEXT: v_mov_b16_e32 v3.l, 0x3c00
-; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v0
-; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.l, v3.l, 0x4000, vcc_lo
-; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.h, v3.l, 0x4000, s0
-; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NSZ-TRUE16-NEXT: v_pack_b32_f16 v0, v0.h, v0.l
-; GFX11-NSZ-TRUE16-NEXT: v_pk_add_f16 v0, v0, v2
-; GFX11-NSZ-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-FAKE16-LABEL: add_select_posk_posk_v2f16:
-; GFX11-NSZ-FAKE16: ; %bb.0:
-; GFX11-NSZ-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-FAKE16-NEXT: v_mov_b32_e32 v3, 0x4000
-; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1)
-; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0x3c00, v3, vcc_lo
-; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0x3c00, v3, vcc_lo
-; GFX11-NSZ-FAKE16-NEXT: v_pack_b32_f16 v0, v0, v1
-; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NSZ-FAKE16-NEXT: v_pk_add_f16 v0, v0, v2
-; GFX11-NSZ-FAKE16-NEXT: s_setpc_b64 s[30:31]
+; GFX11-TRUE16-LABEL: add_select_posk_posk_v2f16:
+; GFX11-TRUE16: ; %bb.0:
+; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT: v_mov_b16_e32 v3.l, 0x3c00
+; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v0
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.l, v3.l, 0x4000, vcc_lo
+; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.h, v3.l, 0x4000, s0
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-TRUE16-NEXT: v_pack_b32_f16 v0, v0.h, v0.l
+; GFX11-TRUE16-NEXT: v_pk_add_f16 v0, v0, v2
+; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: add_select_posk_posk_v2f16:
+; GFX11-FAKE16: ; %bb.0:
+; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT: v_mov_b32_e32 v3, 0x4000
+; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1)
+; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0x3c00, v3, vcc_lo
+; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0x3c00, v3, vcc_lo
+; GFX11-FAKE16-NEXT: v_pack_b32_f16 v0, v0, v1
+; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-FAKE16-NEXT: v_pk_add_f16 v0, v0, v2
+; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
%cmp = icmp eq <2 x i32> %c, zeroinitializer
%select = select <2 x i1> %cmp, <2 x half> <half 2.0, half 2.0>, <2 x half> <half 1.0, half 1.0>
%add = fadd <2 x half> %select, %x
@@ -1029,61 +776,33 @@ define <2 x half> @add_select_negk_fabs_v2f16(<2 x i32> %c, <2 x half> %x, <2 x
; GFX9-NEXT: v_pk_add_f16 v0, v0, v3
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
-; GFX11-SAFE-TRUE16-LABEL: add_select_negk_fabs_v2f16:
-; GFX11-SAFE-TRUE16: ; %bb.0:
-; GFX11-SAFE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-TRUE16-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v2
-; GFX11-SAFE-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0
-; GFX11-SAFE-TRUE16-NEXT: v_cmp_ne_u32_e64 s0, 0, v1
-; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.l, 0xbc00, v2.l, vcc_lo
-; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.h, 0xbc00, v2.h, s0
-; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-SAFE-TRUE16-NEXT: v_pk_add_f16 v0, v0, v3
-; GFX11-SAFE-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-SAFE-FAKE16-LABEL: add_select_negk_fabs_v2f16:
-; GFX11-SAFE-FAKE16: ; %bb.0:
-; GFX11-SAFE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-FAKE16-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v2
-; GFX11-SAFE-FAKE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0
-; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3)
-; GFX11-SAFE-FAKE16-NEXT: v_lshrrev_b32_e32 v4, 16, v2
-; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0xbc00, v2, vcc_lo
-; GFX11-SAFE-FAKE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v1
-; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0xbc00, v4, vcc_lo
-; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-SAFE-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
-; GFX11-SAFE-FAKE16-NEXT: v_pk_add_f16 v0, v0, v3
-; GFX11-SAFE-FAKE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-TRUE16-LABEL: add_select_negk_fabs_v2f16:
-; GFX11-NSZ-TRUE16: ; %bb.0:
-; GFX11-NSZ-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-TRUE16-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v2
-; GFX11-NSZ-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0
-; GFX11-NSZ-TRUE16-NEXT: v_cmp_ne_u32_e64 s0, 0, v1
-; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.l, 0xbc00, v2.l, vcc_lo
-; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.h, 0xbc00, v2.h, s0
-; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NSZ-TRUE16-NEXT: v_pk_add_f16 v0, v0, v3
-; GFX11-NSZ-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-FAKE16-LABEL: add_select_negk_fabs_v2f16:
-; GFX11-NSZ-FAKE16: ; %bb.0:
-; GFX11-NSZ-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-FAKE16-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v2
-; GFX11-NSZ-FAKE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0
-; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3)
-; GFX11-NSZ-FAKE16-NEXT: v_lshrrev_b32_e32 v4, 16, v2
-; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0xbc00, v2, vcc_lo
-; GFX11-NSZ-FAKE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v1
-; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0xbc00, v4, vcc_lo
-; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NSZ-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
-; GFX11-NSZ-FAKE16-NEXT: v_pk_add_f16 v0, v0, v3
-; GFX11-NSZ-FAKE16-NEXT: s_setpc_b64 s[30:31]
+; GFX11-TRUE16-LABEL: add_select_negk_fabs_v2f16:
+; GFX11-TRUE16: ; %bb.0:
+; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v2
+; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0
+; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e64 s0, 0, v1
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.l, 0xbc00, v2.l, vcc_lo
+; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.h, 0xbc00, v2.h, s0
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-TRUE16-NEXT: v_pk_add_f16 v0, v0, v3
+; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: add_select_negk_fabs_v2f16:
+; GFX11-FAKE16: ; %bb.0:
+; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v2
+; GFX11-FAKE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0
+; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3)
+; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v4, 16, v2
+; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0xbc00, v2, vcc_lo
+; GFX11-FAKE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v1
+; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0xbc00, v4, vcc_lo
+; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
+; GFX11-FAKE16-NEXT: v_pk_add_f16 v0, v0, v3
+; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
%cmp = icmp eq <2 x i32> %c, zeroinitializer
%fabs = call <2 x half> @llvm.fabs.v2f16(<2 x half> %x)
%select = select <2 x i1> %cmp, <2 x half> <half -1.0, half -1.0>, <2 x half> %fabs
@@ -1140,61 +859,33 @@ define <2 x half> @add_select_negliteralk_fabs_v2f16(<2 x i32> %c, <2 x half> %x
; GFX9-NEXT: v_pk_add_f16 v0, v0, v3
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
-; GFX11-SAFE-TRUE16-LABEL: add_select_negliteralk_fabs_v2f16:
-; GFX11-SAFE-TRUE16: ; %bb.0:
-; GFX11-SAFE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-TRUE16-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v2
-; GFX11-SAFE-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0
-; GFX11-SAFE-TRUE16-NEXT: v_cmp_ne_u32_e64 s0, 0, v1
-; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.l, 0xe400, v2.l, vcc_lo
-; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.h, 0xe400, v2.h, s0
-; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-SAFE-TRUE16-NEXT: v_pk_add_f16 v0, v0, v3
-; GFX11-SAFE-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-SAFE-FAKE16-LABEL: add_select_negliteralk_fabs_v2f16:
-; GFX11-SAFE-FAKE16: ; %bb.0:
-; GFX11-SAFE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-FAKE16-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v2
-; GFX11-SAFE-FAKE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0
-; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3)
-; GFX11-SAFE-FAKE16-NEXT: v_lshrrev_b32_e32 v4, 16, v2
-; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0xe400, v2, vcc_lo
-; GFX11-SAFE-FAKE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v1
-; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0xe400, v4, vcc_lo
-; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-SAFE-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
-; GFX11-SAFE-FAKE16-NEXT: v_pk_add_f16 v0, v0, v3
-; GFX11-SAFE-FAKE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-TRUE16-LABEL: add_select_negliteralk_fabs_v2f16:
-; GFX11-NSZ-TRUE16: ; %bb.0:
-; GFX11-NSZ-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-TRUE16-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v2
-; GFX11-NSZ-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0
-; GFX11-NSZ-TRUE16-NEXT: v_cmp_ne_u32_e64 s0, 0, v1
-; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.l, 0xe400, v2.l, vcc_lo
-; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.h, 0xe400, v2.h, s0
-; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NSZ-TRUE16-NEXT: v_pk_add_f16 v0, v0, v3
-; GFX11-NSZ-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-FAKE16-LABEL: add_select_negliteralk_fabs_v2f16:
-; GFX11-NSZ-FAKE16: ; %bb.0:
-; GFX11-NSZ-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-FAKE16-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v2
-; GFX11-NSZ-FAKE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0
-; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3)
-; GFX11-NSZ-FAKE16-NEXT: v_lshrrev_b32_e32 v4, 16, v2
-; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0xe400, v2, vcc_lo
-; GFX11-NSZ-FAKE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v1
-; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0xe400, v4, vcc_lo
-; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NSZ-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
-; GFX11-NSZ-FAKE16-NEXT: v_pk_add_f16 v0, v0, v3
-; GFX11-NSZ-FAKE16-NEXT: s_setpc_b64 s[30:31]
+; GFX11-TRUE16-LABEL: add_select_negliteralk_fabs_v2f16:
+; GFX11-TRUE16: ; %bb.0:
+; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v2
+; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0
+; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e64 s0, 0, v1
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.l, 0xe400, v2.l, vcc_lo
+; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.h, 0xe400, v2.h, s0
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-TRUE16-NEXT: v_pk_add_f16 v0, v0, v3
+; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: add_select_negliteralk_fabs_v2f16:
+; GFX11-FAKE16: ; %bb.0:
+; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v2
+; GFX11-FAKE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0
+; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3)
+; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v4, 16, v2
+; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0xe400, v2, vcc_lo
+; GFX11-FAKE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v1
+; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0xe400, v4, vcc_lo
+; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
+; GFX11-FAKE16-NEXT: v_pk_add_f16 v0, v0, v3
+; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
%cmp = icmp eq <2 x i32> %c, zeroinitializer
%fabs = call <2 x half> @llvm.fabs.v2f16(<2 x half> %x)
%select = select <2 x i1> %cmp, <2 x half> <half -1024.0, half -1024.0>, <2 x half> %fabs
@@ -1250,61 +941,33 @@ define <2 x half> @add_select_fabs_posk_v2f16(<2 x i32> %c, <2 x half> %x, <2 x
; GFX9-NEXT: v_pk_add_f16 v0, v0, v3
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
-; GFX11-SAFE-TRUE16-LABEL: add_select_fabs_posk_v2f16:
-; GFX11-SAFE-TRUE16: ; %bb.0:
-; GFX11-SAFE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-SAFE-TRUE16-NEXT: v_and_b32_e32 v0, 0x7fff7fff, v2
-; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v1
-; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.l, 0x3c00, v0.l, vcc_lo
-; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.h, 0x3c00, v0.h, s0
-; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-SAFE-TRUE16-NEXT: v_pk_add_f16 v0, v0, v3
-; GFX11-SAFE-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-SAFE-FAKE16-LABEL: add_select_fabs_posk_v2f16:
-; GFX11-SAFE-FAKE16: ; %bb.0:
-; GFX11-SAFE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-FAKE16-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v2
-; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3)
-; GFX11-SAFE-FAKE16-NEXT: v_lshrrev_b32_e32 v4, 16, v2
-; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0x3c00, v2, vcc_lo
-; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0x3c00, v4, vcc_lo
-; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-SAFE-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
-; GFX11-SAFE-FAKE16-NEXT: v_pk_add_f16 v0, v0, v3
-; GFX11-SAFE-FAKE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-TRUE16-LABEL: add_select_fabs_posk_v2f16:
-; GFX11-NSZ-TRUE16: ; %bb.0:
-; GFX11-NSZ-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-NSZ-TRUE16-NEXT: v_and_b32_e32 v0, 0x7fff7fff, v2
-; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v1
-; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.l, 0x3c00, v0.l, vcc_lo
-; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.h, 0x3c00, v0.h, s0
-; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NSZ-TRUE16-NEXT: v_pk_add_f16 v0, v0, v3
-; GFX11-NSZ-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-FAKE16-LABEL: add_select_fabs_posk_v2f16:
-; GFX11-NSZ-FAKE16: ; %bb.0:
-; GFX11-NSZ-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-FAKE16-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v2
-; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3)
-; GFX11-NSZ-FAKE16-NEXT: v_lshrrev_b32_e32 v4, 16, v2
-; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0x3c00, v2, vcc_lo
-; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0x3c00, v4, vcc_lo
-; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NSZ-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
-; GFX11-NSZ-FAKE16-NEXT: v_pk_add_f16 v0, v0, v3
-; GFX11-NSZ-FAKE16-NEXT: s_setpc_b64 s[30:31]
+; GFX11-TRUE16-LABEL: add_select_fabs_posk_v2f16:
+; GFX11-TRUE16: ; %bb.0:
+; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0x7fff7fff, v2
+; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v1
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.l, 0x3c00, v0.l, vcc_lo
+; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.h, 0x3c00, v0.h, s0
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-TRUE16-NEXT: v_pk_add_f16 v0, v0, v3
+; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: add_select_fabs_posk_v2f16:
+; GFX11-FAKE16: ; %bb.0:
+; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v2
+; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3)
+; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v4, 16, v2
+; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0x3c00, v2, vcc_lo
+; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0x3c00, v4, vcc_lo
+; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
+; GFX11-FAKE16-NEXT: v_pk_add_f16 v0, v0, v3
+; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
%cmp = icmp eq <2 x i32> %c, zeroinitializer
%fabs = call <2 x half> @llvm.fabs.v2f16(<2 x half> %x)
%select = select <2 x i1> %cmp, <2 x half> %fabs, <2 x half> <half 1.0, half 1.0>
@@ -1360,61 +1023,33 @@ define <2 x half> @add_select_posk_fabs_v2f16(<2 x i32> %c, <2 x half> %x, <2 x
; GFX9-NEXT: v_pk_add_f16 v0, v0, v3
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
-; GFX11-SAFE-TRUE16-LABEL: add_select_posk_fabs_v2f16:
-; GFX11-SAFE-TRUE16: ; %bb.0:
-; GFX11-SAFE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-TRUE16-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v2
-; GFX11-SAFE-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0
-; GFX11-SAFE-TRUE16-NEXT: v_cmp_ne_u32_e64 s0, 0, v1
-; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.l, 0x3c00, v2.l, vcc_lo
-; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.h, 0x3c00, v2.h, s0
-; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-SAFE-TRUE16-NEXT: v_pk_add_f16 v0, v0, v3
-; GFX11-SAFE-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-SAFE-FAKE16-LABEL: add_select_posk_fabs_v2f16:
-; GFX11-SAFE-FAKE16: ; %bb.0:
-; GFX11-SAFE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-FAKE16-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v2
-; GFX11-SAFE-FAKE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0
-; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3)
-; GFX11-SAFE-FAKE16-NEXT: v_lshrrev_b32_e32 v4, 16, v2
-; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0x3c00, v2, vcc_lo
-; GFX11-SAFE-FAKE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v1
-; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0x3c00, v4, vcc_lo
-; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-SAFE-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
-; GFX11-SAFE-FAKE16-NEXT: v_pk_add_f16 v0, v0, v3
-; GFX11-SAFE-FAKE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-TRUE16-LABEL: add_select_posk_fabs_v2f16:
-; GFX11-NSZ-TRUE16: ; %bb.0:
-; GFX11-NSZ-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-TRUE16-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v2
-; GFX11-NSZ-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0
-; GFX11-NSZ-TRUE16-NEXT: v_cmp_ne_u32_e64 s0, 0, v1
-; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.l, 0x3c00, v2.l, vcc_lo
-; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.h, 0x3c00, v2.h, s0
-; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NSZ-TRUE16-NEXT: v_pk_add_f16 v0, v0, v3
-; GFX11-NSZ-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-FAKE16-LABEL: add_select_posk_fabs_v2f16:
-; GFX11-NSZ-FAKE16: ; %bb.0:
-; GFX11-NSZ-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-FAKE16-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v2
-; GFX11-NSZ-FAKE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0
-; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3)
-; GFX11-NSZ-FAKE16-NEXT: v_lshrrev_b32_e32 v4, 16, v2
-; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0x3c00, v2, vcc_lo
-; GFX11-NSZ-FAKE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v1
-; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0x3c00, v4, vcc_lo
-; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NSZ-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
-; GFX11-NSZ-FAKE16-NEXT: v_pk_add_f16 v0, v0, v3
-; GFX11-NSZ-FAKE16-NEXT: s_setpc_b64 s[30:31]
+; GFX11-TRUE16-LABEL: add_select_posk_fabs_v2f16:
+; GFX11-TRUE16: ; %bb.0:
+; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v2
+; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0
+; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e64 s0, 0, v1
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.l, 0x3c00, v2.l, vcc_lo
+; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.h, 0x3c00, v2.h, s0
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-TRUE16-NEXT: v_pk_add_f16 v0, v0, v3
+; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: add_select_posk_fabs_v2f16:
+; GFX11-FAKE16: ; %bb.0:
+; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v2
+; GFX11-FAKE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0
+; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3)
+; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v4, 16, v2
+; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0x3c00, v2, vcc_lo
+; GFX11-FAKE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v1
+; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0x3c00, v4, vcc_lo
+; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
+; GFX11-FAKE16-NEXT: v_pk_add_f16 v0, v0, v3
+; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
%cmp = icmp eq <2 x i32> %c, zeroinitializer
%fabs = call <2 x half> @llvm.fabs.v2f16(<2 x half> %x)
%select = select <2 x i1> %cmp, <2 x half> <half 1.0, half 1.0>, <2 x half> %fabs
@@ -1470,57 +1105,31 @@ define <2 x half> @add_select_fneg_fneg_v2f16(<2 x i32> %c, <2 x half> %x, <2 x
; GFX9-NEXT: v_pk_add_f16 v0, v4, v0 neg_lo:[0,1] neg_hi:[0,1]
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
-; GFX11-SAFE-TRUE16-LABEL: add_select_fneg_fneg_v2f16:
-; GFX11-SAFE-TRUE16: ; %bb.0:
-; GFX11-SAFE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v0
-; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.h, v3.h, v2.h, vcc_lo
-; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.l, v3.l, v2.l, s0
-; GFX11-SAFE-TRUE16-NEXT: v_pk_add_f16 v0, v4, v0 neg_lo:[0,1] neg_hi:[0,1]
-; GFX11-SAFE-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-SAFE-FAKE16-LABEL: add_select_fneg_fneg_v2f16:
-; GFX11-SAFE-FAKE16: ; %bb.0:
-; GFX11-SAFE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-FAKE16-NEXT: v_lshrrev_b32_e32 v5, 16, v2
-; GFX11-SAFE-FAKE16-NEXT: v_lshrrev_b32_e32 v6, 16, v3
-; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1)
-; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v1, v6, v5, vcc_lo
-; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc_lo
-; GFX11-SAFE-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
-; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-SAFE-FAKE16-NEXT: v_pk_add_f16 v0, v4, v0 neg_lo:[0,1] neg_hi:[0,1]
-; GFX11-SAFE-FAKE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-TRUE16-LABEL: add_select_fneg_fneg_v2f16:
-; GFX11-NSZ-TRUE16: ; %bb.0:
-; GFX11-NSZ-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v0
-; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.h, v3.h, v2.h, vcc_lo
-; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.l, v3.l, v2.l, s0
-; GFX11-NSZ-TRUE16-NEXT: v_pk_add_f16 v0, v4, v0 neg_lo:[0,1] neg_hi:[0,1]
-; GFX11-NSZ-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-FAKE16-LABEL: add_select_fneg_fneg_v2f16:
-; GFX11-NSZ-FAKE16: ; %bb.0:
-; GFX11-NSZ-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-FAKE16-NEXT: v_lshrrev_b32_e32 v5, 16, v2
-; GFX11-NSZ-FAKE16-NEXT: v_lshrrev_b32_e32 v6, 16, v3
-; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1)
-; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v1, v6, v5, vcc_lo
-; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc_lo
-; GFX11-NSZ-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
-; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NSZ-FAKE16-NEXT: v_pk_add_f16 v0, v4, v0 neg_lo:[0,1] neg_hi:[0,1]
-; GFX11-NSZ-FAKE16-NEXT: s_setpc_b64 s[30:31]
+; GFX11-TRUE16-LABEL: add_select_fneg_fneg_v2f16:
+; GFX11-TRUE16: ; %bb.0:
+; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v0
+; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.h, v3.h, v2.h, vcc_lo
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.l, v3.l, v2.l, s0
+; GFX11-TRUE16-NEXT: v_pk_add_f16 v0, v4, v0 neg_lo:[0,1] neg_hi:[0,1]
+; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: add_select_fneg_fneg_v2f16:
+; GFX11-FAKE16: ; %bb.0:
+; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v5, 16, v2
+; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v6, 16, v3
+; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1)
+; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v1, v6, v5, vcc_lo
+; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc_lo
+; GFX11-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
+; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-FAKE16-NEXT: v_pk_add_f16 v0, v4, v0 neg_lo:[0,1] neg_hi:[0,1]
+; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
%cmp = icmp eq <2 x i32> %c, zeroinitializer
%fneg.x = fneg <2 x half> %x
%fneg.y = fneg <2 x half> %y
@@ -1587,61 +1196,33 @@ define { <2 x half>, <2 x half> } @add_select_multi_use_lhs_fneg_fneg_v2f16(<2 x
; GFX9-NEXT: v_pk_add_f16 v1, v5, v2 neg_lo:[0,1] neg_hi:[0,1]
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
-; GFX11-SAFE-TRUE16-LABEL: add_select_multi_use_lhs_fneg_fneg_v2f16:
-; GFX11-SAFE-TRUE16: ; %bb.0:
-; GFX11-SAFE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v0
-; GFX11-SAFE-TRUE16-NEXT: v_pk_add_f16 v1, v5, v2 neg_lo:[0,1] neg_hi:[0,1]
-; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.h, v3.h, v2.h, vcc_lo
-; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.l, v3.l, v2.l, s0
-; GFX11-SAFE-TRUE16-NEXT: v_pk_add_f16 v0, v4, v0 neg_lo:[0,1] neg_hi:[0,1]
-; GFX11-SAFE-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-SAFE-FAKE16-LABEL: add_select_multi_use_lhs_fneg_fneg_v2f16:
-; GFX11-SAFE-FAKE16: ; %bb.0:
-; GFX11-SAFE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-SAFE-FAKE16-NEXT: v_lshrrev_b32_e32 v6, 16, v2
-; GFX11-SAFE-FAKE16-NEXT: v_lshrrev_b32_e32 v7, 16, v3
-; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc_lo
-; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v1, v7, v6, vcc_lo
-; GFX11-SAFE-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
-; GFX11-SAFE-FAKE16-NEXT: v_pk_add_f16 v1, v5, v2 neg_lo:[0,1] neg_hi:[0,1]
-; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2)
-; GFX11-SAFE-FAKE16-NEXT: v_pk_add_f16 v0, v4, v0 neg_lo:[0,1] neg_hi:[0,1]
-; GFX11-SAFE-FAKE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-TRUE16-LABEL: add_select_multi_use_lhs_fneg_fneg_v2f16:
-; GFX11-NSZ-TRUE16: ; %bb.0:
-; GFX11-NSZ-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v0
-; GFX11-NSZ-TRUE16-NEXT: v_pk_add_f16 v1, v5, v2 neg_lo:[0,1] neg_hi:[0,1]
-; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.h, v3.h, v2.h, vcc_lo
-; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.l, v3.l, v2.l, s0
-; GFX11-NSZ-TRUE16-NEXT: v_pk_add_f16 v0, v4, v0 neg_lo:[0,1] neg_hi:[0,1]
-; GFX11-NSZ-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-FAKE16-LABEL: add_select_multi_use_lhs_fneg_fneg_v2f16:
-; GFX11-NSZ-FAKE16: ; %bb.0:
-; GFX11-NSZ-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-NSZ-FAKE16-NEXT: v_lshrrev_b32_e32 v6, 16, v2
-; GFX11-NSZ-FAKE16-NEXT: v_lshrrev_b32_e32 v7, 16, v3
-; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc_lo
-; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v1, v7, v6, vcc_lo
-; GFX11-NSZ-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
-; GFX11-NSZ-FAKE16-NEXT: v_pk_add_f16 v1, v5, v2 neg_lo:[0,1] neg_hi:[0,1]
-; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2)
-; GFX11-NSZ-FAKE16-NEXT: v_pk_add_f16 v0, v4, v0 neg_lo:[0,1] neg_hi:[0,1]
-; GFX11-NSZ-FAKE16-NEXT: s_setpc_b64 s[30:31]
+; GFX11-TRUE16-LABEL: add_select_multi_use_lhs_fneg_fneg_v2f16:
+; GFX11-TRUE16: ; %bb.0:
+; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v0
+; GFX11-TRUE16-NEXT: v_pk_add_f16 v1, v5, v2 neg_lo:[0,1] neg_hi:[0,1]
+; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.h, v3.h, v2.h, vcc_lo
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.l, v3.l, v2.l, s0
+; GFX11-TRUE16-NEXT: v_pk_add_f16 v0, v4, v0 neg_lo:[0,1] neg_hi:[0,1]
+; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: add_select_multi_use_lhs_fneg_fneg_v2f16:
+; GFX11-FAKE16: ; %bb.0:
+; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v6, 16, v2
+; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v7, 16, v3
+; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc_lo
+; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v1, v7, v6, vcc_lo
+; GFX11-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
+; GFX11-FAKE16-NEXT: v_pk_add_f16 v1, v5, v2 neg_lo:[0,1] neg_hi:[0,1]
+; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2)
+; GFX11-FAKE16-NEXT: v_pk_add_f16 v0, v4, v0 neg_lo:[0,1] neg_hi:[0,1]
+; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
%cmp = icmp eq <2 x i32> %c, zeroinitializer
%fneg.x = fneg <2 x half> %x
%fneg.y = fneg <2 x half> %y
@@ -1705,61 +1286,33 @@ define { <2 x half>, <2 x half> } @add_select_multi_store_use_lhs_fneg_fneg_v2f1
; GFX9-NEXT: v_pk_add_f16 v0, v4, v0 neg_lo:[0,1] neg_hi:[0,1]
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
-; GFX11-SAFE-TRUE16-LABEL: add_select_multi_store_use_lhs_fneg_fneg_v2f16:
-; GFX11-SAFE-TRUE16: ; %bb.0:
-; GFX11-SAFE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v0
-; GFX11-SAFE-TRUE16-NEXT: v_xor_b32_e32 v1, 0x80008000, v2
-; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.h, v3.h, v2.h, vcc_lo
-; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.l, v3.l, v2.l, s0
-; GFX11-SAFE-TRUE16-NEXT: v_pk_add_f16 v0, v4, v0 neg_lo:[0,1] neg_hi:[0,1]
-; GFX11-SAFE-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-SAFE-FAKE16-LABEL: add_select_multi_store_use_lhs_fneg_fneg_v2f16:
-; GFX11-SAFE-FAKE16: ; %bb.0:
-; GFX11-SAFE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-SAFE-FAKE16-NEXT: v_lshrrev_b32_e32 v5, 16, v2
-; GFX11-SAFE-FAKE16-NEXT: v_lshrrev_b32_e32 v6, 16, v3
-; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc_lo
-; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v1, v6, v5, vcc_lo
-; GFX11-SAFE-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
-; GFX11-SAFE-FAKE16-NEXT: v_xor_b32_e32 v1, 0x80008000, v2
-; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2)
-; GFX11-SAFE-FAKE16-NEXT: v_pk_add_f16 v0, v4, v0 neg_lo:[0,1] neg_hi:[0,1]
-; GFX11-SAFE-FAKE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-TRUE16-LABEL: add_select_multi_store_use_lhs_fneg_fneg_v2f16:
-; GFX11-NSZ-TRUE16: ; %bb.0:
-; GFX11-NSZ-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v0
-; GFX11-NSZ-TRUE16-NEXT: v_xor_b32_e32 v1, 0x80008000, v2
-; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.h, v3.h, v2.h, vcc_lo
-; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.l, v3.l, v2.l, s0
-; GFX11-NSZ-TRUE16-NEXT: v_pk_add_f16 v0, v4, v0 neg_lo:[0,1] neg_hi:[0,1]
-; GFX11-NSZ-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-FAKE16-LABEL: add_select_multi_store_use_lhs_fneg_fneg_v2f16:
-; GFX11-NSZ-FAKE16: ; %bb.0:
-; GFX11-NSZ-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-NSZ-FAKE16-NEXT: v_lshrrev_b32_e32 v5, 16, v2
-; GFX11-NSZ-FAKE16-NEXT: v_lshrrev_b32_e32 v6, 16, v3
-; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc_lo
-; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v1, v6, v5, vcc_lo
-; GFX11-NSZ-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
-; GFX11-NSZ-FAKE16-NEXT: v_xor_b32_e32 v1, 0x80008000, v2
-; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2)
-; GFX11-NSZ-FAKE16-NEXT: v_pk_add_f16 v0, v4, v0 neg_lo:[0,1] neg_hi:[0,1]
-; GFX11-NSZ-FAKE16-NEXT: s_setpc_b64 s[30:31]
+; GFX11-TRUE16-LABEL: add_select_multi_store_use_lhs_fneg_fneg_v2f16:
+; GFX11-TRUE16: ; %bb.0:
+; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v0
+; GFX11-TRUE16-NEXT: v_xor_b32_e32 v1, 0x80008000, v2
+; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.h, v3.h, v2.h, vcc_lo
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.l, v3.l, v2.l, s0
+; GFX11-TRUE16-NEXT: v_pk_add_f16 v0, v4, v0 neg_lo:[0,1] neg_hi:[0,1]
+; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: add_select_multi_store_use_lhs_fneg_fneg_v2f16:
+; GFX11-FAKE16: ; %bb.0:
+; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v5, 16, v2
+; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v6, 16, v3
+; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc_lo
+; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v1, v6, v5, vcc_lo
+; GFX11-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
+; GFX11-FAKE16-NEXT: v_xor_b32_e32 v1, 0x80008000, v2
+; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2)
+; GFX11-FAKE16-NEXT: v_pk_add_f16 v0, v4, v0 neg_lo:[0,1] neg_hi:[0,1]
+; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
%cmp = icmp eq <2 x i32> %c, zeroinitializer
%fneg.x = fneg <2 x half> %x
%fneg.y = fneg <2 x half> %y
@@ -1828,61 +1381,33 @@ define { <2 x half>, <2 x half> } @add_select_multi_use_rhs_fneg_fneg_v2f16(<2 x
; GFX9-NEXT: v_pk_add_f16 v1, v5, v3 neg_lo:[0,1] neg_hi:[0,1]
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
-; GFX11-SAFE-TRUE16-LABEL: add_select_multi_use_rhs_fneg_fneg_v2f16:
-; GFX11-SAFE-TRUE16: ; %bb.0:
-; GFX11-SAFE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v0
-; GFX11-SAFE-TRUE16-NEXT: v_pk_add_f16 v1, v5, v3 neg_lo:[0,1] neg_hi:[0,1]
-; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.h, v3.h, v2.h, vcc_lo
-; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.l, v3.l, v2.l, s0
-; GFX11-SAFE-TRUE16-NEXT: v_pk_add_f16 v0, v4, v0 neg_lo:[0,1] neg_hi:[0,1]
-; GFX11-SAFE-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-SAFE-FAKE16-LABEL: add_select_multi_use_rhs_fneg_fneg_v2f16:
-; GFX11-SAFE-FAKE16: ; %bb.0:
-; GFX11-SAFE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-SAFE-FAKE16-NEXT: v_lshrrev_b32_e32 v6, 16, v2
-; GFX11-SAFE-FAKE16-NEXT: v_lshrrev_b32_e32 v7, 16, v3
-; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc_lo
-; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v1, v7, v6, vcc_lo
-; GFX11-SAFE-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
-; GFX11-SAFE-FAKE16-NEXT: v_pk_add_f16 v1, v5, v3 neg_lo:[0,1] neg_hi:[0,1]
-; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2)
-; GFX11-SAFE-FAKE16-NEXT: v_pk_add_f16 v0, v4, v0 neg_lo:[0,1] neg_hi:[0,1]
-; GFX11-SAFE-FAKE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-TRUE16-LABEL: add_select_multi_use_rhs_fneg_fneg_v2f16:
-; GFX11-NSZ-TRUE16: ; %bb.0:
-; GFX11-NSZ-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v0
-; GFX11-NSZ-TRUE16-NEXT: v_pk_add_f16 v1, v5, v3 neg_lo:[0,1] neg_hi:[0,1]
-; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.h, v3.h, v2.h, vcc_lo
-; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.l, v3.l, v2.l, s0
-; GFX11-NSZ-TRUE16-NEXT: v_pk_add_f16 v0, v4, v0 neg_lo:[0,1] neg_hi:[0,1]
-; GFX11-NSZ-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-FAKE16-LABEL: add_select_multi_use_rhs_fneg_fneg_v2f16:
-; GFX11-NSZ-FAKE16: ; %bb.0:
-; GFX11-NSZ-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-NSZ-FAKE16-NEXT: v_lshrrev_b32_e32 v6, 16, v2
-; GFX11-NSZ-FAKE16-NEXT: v_lshrrev_b32_e32 v7, 16, v3
-; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc_lo
-; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v1, v7, v6, vcc_lo
-; GFX11-NSZ-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
-; GFX11-NSZ-FAKE16-NEXT: v_pk_add_f16 v1, v5, v3 neg_lo:[0,1] neg_hi:[0,1]
-; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2)
-; GFX11-NSZ-FAKE16-NEXT: v_pk_add_f16 v0, v4, v0 neg_lo:[0,1] neg_hi:[0,1]
-; GFX11-NSZ-FAKE16-NEXT: s_setpc_b64 s[30:31]
+; GFX11-TRUE16-LABEL: add_select_multi_use_rhs_fneg_fneg_v2f16:
+; GFX11-TRUE16: ; %bb.0:
+; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v0
+; GFX11-TRUE16-NEXT: v_pk_add_f16 v1, v5, v3 neg_lo:[0,1] neg_hi:[0,1]
+; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.h, v3.h, v2.h, vcc_lo
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.l, v3.l, v2.l, s0
+; GFX11-TRUE16-NEXT: v_pk_add_f16 v0, v4, v0 neg_lo:[0,1] neg_hi:[0,1]
+; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: add_select_multi_use_rhs_fneg_fneg_v2f16:
+; GFX11-FAKE16: ; %bb.0:
+; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v6, 16, v2
+; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v7, 16, v3
+; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc_lo
+; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v1, v7, v6, vcc_lo
+; GFX11-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
+; GFX11-FAKE16-NEXT: v_pk_add_f16 v1, v5, v3 neg_lo:[0,1] neg_hi:[0,1]
+; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2)
+; GFX11-FAKE16-NEXT: v_pk_add_f16 v0, v4, v0 neg_lo:[0,1] neg_hi:[0,1]
+; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
%cmp = icmp eq <2 x i32> %c, zeroinitializer
%fneg.x = fneg <2 x half> %x
%fneg.y = fneg <2 x half> %y
@@ -1948,63 +1473,34 @@ define <2 x half> @add_select_fneg_var_v2f16(<2 x i32> %c, <2 x half> %x, <2 x h
; GFX9-NEXT: v_pk_add_f16 v0, v0, v4
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
-; GFX11-SAFE-TRUE16-LABEL: add_select_fneg_var_v2f16:
-; GFX11-SAFE-TRUE16: ; %bb.0:
-; GFX11-SAFE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-SAFE-TRUE16-NEXT: v_xor_b32_e32 v1, 0x80008000, v2
-; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v0
-; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.h, v3.h, v1.h, vcc_lo
-; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.l, v3.l, v1.l, s0
-; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-SAFE-TRUE16-NEXT: v_pk_add_f16 v0, v0, v4
-; GFX11-SAFE-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-SAFE-FAKE16-LABEL: add_select_fneg_var_v2f16:
-; GFX11-SAFE-FAKE16: ; %bb.0:
-; GFX11-SAFE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-FAKE16-NEXT: v_xor_b32_e32 v2, 0x80008000, v2
-; GFX11-SAFE-FAKE16-NEXT: v_lshrrev_b32_e32 v5, 16, v3
-; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-SAFE-FAKE16-NEXT: v_lshrrev_b32_e32 v6, 16, v2
-; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v1, v5, v6, vcc_lo
-; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc_lo
-; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-SAFE-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
-; GFX11-SAFE-FAKE16-NEXT: v_pk_add_f16 v0, v0, v4
-; GFX11-SAFE-FAKE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-TRUE16-LABEL: add_select_fneg_var_v2f16:
-; GFX11-NSZ-TRUE16: ; %bb.0:
-; GFX11-NSZ-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-NSZ-TRUE16-NEXT: v_xor_b32_e32 v1, 0x80008000, v2
-; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v0
-; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.h, v3.h, v1.h, vcc_lo
-; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.l, v3.l, v1.l, s0
-; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NSZ-TRUE16-NEXT: v_pk_add_f16 v0, v0, v4
-; GFX11-NSZ-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-FAKE16-LABEL: add_select_fneg_var_v2f16:
-; GFX11-NSZ-FAKE16: ; %bb.0:
-; GFX11-NSZ-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-FAKE16-NEXT: v_xor_b32_e32 v2, 0x80008000, v2
-; GFX11-NSZ-FAKE16-NEXT: v_lshrrev_b32_e32 v5, 16, v3
-; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NSZ-FAKE16-NEXT: v_lshrrev_b32_e32 v6, 16, v2
-; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v1, v5, v6, vcc_lo
-; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc_lo
-; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NSZ-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
-; GFX11-NSZ-FAKE16-NEXT: v_pk_add_f16 v0, v0, v4
-; GFX11-NSZ-FAKE16-NEXT: s_setpc_b64 s[30:31]
+; GFX11-TRUE16-LABEL: add_select_fneg_var_v2f16:
+; GFX11-TRUE16: ; %bb.0:
+; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-TRUE16-NEXT: v_xor_b32_e32 v1, 0x80008000, v2
+; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v0
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.h, v3.h, v1.h, vcc_lo
+; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.l, v3.l, v1.l, s0
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-TRUE16-NEXT: v_pk_add_f16 v0, v0, v4
+; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: add_select_fneg_var_v2f16:
+; GFX11-FAKE16: ; %bb.0:
+; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT: v_xor_b32_e32 v2, 0x80008000, v2
+; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v5, 16, v3
+; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v6, 16, v2
+; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v1, v5, v6, vcc_lo
+; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc_lo
+; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
+; GFX11-FAKE16-NEXT: v_pk_add_f16 v0, v0, v4
+; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
%cmp = icmp eq <2 x i32> %c, zeroinitializer
%fneg.x = fneg <2 x half> %x
%select = select <2 x i1> %cmp, <2 x half> %fneg.x, <2 x half> %y
@@ -2058,55 +1554,30 @@ define <2 x half> @add_select_fneg_negk_v2f16(<2 x i32> %c, <2 x half> %x, <2 x
; GFX9-NEXT: v_pk_add_f16 v0, v3, v0 neg_lo:[0,1] neg_hi:[0,1]
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
-; GFX11-SAFE-TRUE16-LABEL: add_select_fneg_negk_v2f16:
-; GFX11-SAFE-TRUE16: ; %bb.0:
-; GFX11-SAFE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v0
-; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.h, 0x3c00, v2.h, vcc_lo
-; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.l, 0x3c00, v2.l, s0
-; GFX11-SAFE-TRUE16-NEXT: v_pk_add_f16 v0, v3, v0 neg_lo:[0,1] neg_hi:[0,1]
-; GFX11-SAFE-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-SAFE-FAKE16-LABEL: add_select_fneg_negk_v2f16:
-; GFX11-SAFE-FAKE16: ; %bb.0:
-; GFX11-SAFE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-FAKE16-NEXT: v_lshrrev_b32_e32 v4, 16, v2
-; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1)
-; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0x3c00, v4, vcc_lo
-; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0x3c00, v2, vcc_lo
-; GFX11-SAFE-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
-; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-SAFE-FAKE16-NEXT: v_pk_add_f16 v0, v3, v0 neg_lo:[0,1] neg_hi:[0,1]
-; GFX11-SAFE-FAKE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-TRUE16-LABEL: add_select_fneg_negk_v2f16:
-; GFX11-NSZ-TRUE16: ; %bb.0:
-; GFX11-NSZ-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v0
-; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.h, 0x3c00, v2.h, vcc_lo
-; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.l, 0x3c00, v2.l, s0
-; GFX11-NSZ-TRUE16-NEXT: v_pk_add_f16 v0, v3, v0 neg_lo:[0,1] neg_hi:[0,1]
-; GFX11-NSZ-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-FAKE16-LABEL: add_select_fneg_negk_v2f16:
-; GFX11-NSZ-FAKE16: ; %bb.0:
-; GFX11-NSZ-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-FAKE16-NEXT: v_lshrrev_b32_e32 v4, 16, v2
-; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1)
-; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0x3c00, v4, vcc_lo
-; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0x3c00, v2, vcc_lo
-; GFX11-NSZ-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
-; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NSZ-FAKE16-NEXT: v_pk_add_f16 v0, v3, v0 neg_lo:[0,1] neg_hi:[0,1]
-; GFX11-NSZ-FAKE16-NEXT: s_setpc_b64 s[30:31]
+; GFX11-TRUE16-LABEL: add_select_fneg_negk_v2f16:
+; GFX11-TRUE16: ; %bb.0:
+; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v0
+; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.h, 0x3c00, v2.h, vcc_lo
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.l, 0x3c00, v2.l, s0
+; GFX11-TRUE16-NEXT: v_pk_add_f16 v0, v3, v0 neg_lo:[0,1] neg_hi:[0,1]
+; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: add_select_fneg_negk_v2f16:
+; GFX11-FAKE16: ; %bb.0:
+; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v4, 16, v2
+; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1)
+; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0x3c00, v4, vcc_lo
+; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0x3c00, v2, vcc_lo
+; GFX11-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
+; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-FAKE16-NEXT: v_pk_add_f16 v0, v3, v0 neg_lo:[0,1] neg_hi:[0,1]
+; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
%cmp = icmp eq <2 x i32> %c, zeroinitializer
%fneg.x = fneg <2 x half> %x
%select = select <2 x i1> %cmp, <2 x half> %fneg.x, <2 x half> <half -1.0, half -1.0>
@@ -2161,55 +1632,30 @@ define <2 x half> @add_select_fneg_inv2pi_v2f16(<2 x i32> %c, <2 x half> %x, <2
; GFX9-NEXT: v_pk_add_f16 v0, v3, v0 neg_lo:[0,1] neg_hi:[0,1]
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
-; GFX11-SAFE-TRUE16-LABEL: add_select_fneg_inv2pi_v2f16:
-; GFX11-SAFE-TRUE16: ; %bb.0:
-; GFX11-SAFE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v0
-; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.h, 0xb118, v2.h, vcc_lo
-; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.l, 0xb118, v2.l, s0
-; GFX11-SAFE-TRUE16-NEXT: v_pk_add_f16 v0, v3, v0 neg_lo:[0,1] neg_hi:[0,1]
-; GFX11-SAFE-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-SAFE-FAKE16-LABEL: add_select_fneg_inv2pi_v2f16:
-; GFX11-SAFE-FAKE16: ; %bb.0:
-; GFX11-SAFE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-FAKE16-NEXT: v_lshrrev_b32_e32 v4, 16, v2
-; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1)
-; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0xb118, v4, vcc_lo
-; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0xb118, v2, vcc_lo
-; GFX11-SAFE-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
-; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-SAFE-FAKE16-NEXT: v_pk_add_f16 v0, v3, v0 neg_lo:[0,1] neg_hi:[0,1]
-; GFX11-SAFE-FAKE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-TRUE16-LABEL: add_select_fneg_inv2pi_v2f16:
-; GFX11-NSZ-TRUE16: ; %bb.0:
-; GFX11-NSZ-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v0
-; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.h, 0xb118, v2.h, vcc_lo
-; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.l, 0xb118, v2.l, s0
-; GFX11-NSZ-TRUE16-NEXT: v_pk_add_f16 v0, v3, v0 neg_lo:[0,1] neg_hi:[0,1]
-; GFX11-NSZ-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-FAKE16-LABEL: add_select_fneg_inv2pi_v2f16:
-; GFX11-NSZ-FAKE16: ; %bb.0:
-; GFX11-NSZ-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-FAKE16-NEXT: v_lshrrev_b32_e32 v4, 16, v2
-; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1)
-; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0xb118, v4, vcc_lo
-; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0xb118, v2, vcc_lo
-; GFX11-NSZ-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
-; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NSZ-FAKE16-NEXT: v_pk_add_f16 v0, v3, v0 neg_lo:[0,1] neg_hi:[0,1]
-; GFX11-NSZ-FAKE16-NEXT: s_setpc_b64 s[30:31]
+; GFX11-TRUE16-LABEL: add_select_fneg_inv2pi_v2f16:
+; GFX11-TRUE16: ; %bb.0:
+; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v0
+; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.h, 0xb118, v2.h, vcc_lo
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.l, 0xb118, v2.l, s0
+; GFX11-TRUE16-NEXT: v_pk_add_f16 v0, v3, v0 neg_lo:[0,1] neg_hi:[0,1]
+; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: add_select_fneg_inv2pi_v2f16:
+; GFX11-FAKE16: ; %bb.0:
+; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v4, 16, v2
+; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1)
+; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0xb118, v4, vcc_lo
+; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0xb118, v2, vcc_lo
+; GFX11-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
+; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-FAKE16-NEXT: v_pk_add_f16 v0, v3, v0 neg_lo:[0,1] neg_hi:[0,1]
+; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
%cmp = icmp eq <2 x i32> %c, zeroinitializer
%fneg.x = fneg <2 x half> %x
%select = select <2 x i1> %cmp, <2 x half> %fneg.x, <2 x half> <half 0xH3118, half 0xH3118>
@@ -2264,55 +1710,30 @@ define <2 x half> @add_select_fneg_neginv2pi_v2f16(<2 x i32> %c, <2 x half> %x,
; GFX9-NEXT: v_pk_add_f16 v0, v3, v0 neg_lo:[0,1] neg_hi:[0,1]
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
-; GFX11-SAFE-TRUE16-LABEL: add_select_fneg_neginv2pi_v2f16:
-; GFX11-SAFE-TRUE16: ; %bb.0:
-; GFX11-SAFE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v0
-; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.h, 0x3118, v2.h, vcc_lo
-; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.l, 0x3118, v2.l, s0
-; GFX11-SAFE-TRUE16-NEXT: v_pk_add_f16 v0, v3, v0 neg_lo:[0,1] neg_hi:[0,1]
-; GFX11-SAFE-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-SAFE-FAKE16-LABEL: add_select_fneg_neginv2pi_v2f16:
-; GFX11-SAFE-FAKE16: ; %bb.0:
-; GFX11-SAFE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-FAKE16-NEXT: v_lshrrev_b32_e32 v4, 16, v2
-; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1)
-; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0x3118, v4, vcc_lo
-; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0x3118, v2, vcc_lo
-; GFX11-SAFE-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
-; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-SAFE-FAKE16-NEXT: v_pk_add_f16 v0, v3, v0 neg_lo:[0,1] neg_hi:[0,1]
-; GFX11-SAFE-FAKE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-TRUE16-LABEL: add_select_fneg_neginv2pi_v2f16:
-; GFX11-NSZ-TRUE16: ; %bb.0:
-; GFX11-NSZ-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v0
-; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.h, 0x3118, v2.h, vcc_lo
-; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.l, 0x3118, v2.l, s0
-; GFX11-NSZ-TRUE16-NEXT: v_pk_add_f16 v0, v3, v0 neg_lo:[0,1] neg_hi:[0,1]
-; GFX11-NSZ-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-FAKE16-LABEL: add_select_fneg_neginv2pi_v2f16:
-; GFX11-NSZ-FAKE16: ; %bb.0:
-; GFX11-NSZ-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-FAKE16-NEXT: v_lshrrev_b32_e32 v4, 16, v2
-; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1)
-; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0x3118, v4, vcc_lo
-; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0x3118, v2, vcc_lo
-; GFX11-NSZ-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
-; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NSZ-FAKE16-NEXT: v_pk_add_f16 v0, v3, v0 neg_lo:[0,1] neg_hi:[0,1]
-; GFX11-NSZ-FAKE16-NEXT: s_setpc_b64 s[30:31]
+; GFX11-TRUE16-LABEL: add_select_fneg_neginv2pi_v2f16:
+; GFX11-TRUE16: ; %bb.0:
+; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v0
+; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.h, 0x3118, v2.h, vcc_lo
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.l, 0x3118, v2.l, s0
+; GFX11-TRUE16-NEXT: v_pk_add_f16 v0, v3, v0 neg_lo:[0,1] neg_hi:[0,1]
+; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: add_select_fneg_neginv2pi_v2f16:
+; GFX11-FAKE16: ; %bb.0:
+; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v4, 16, v2
+; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1)
+; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0x3118, v4, vcc_lo
+; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0x3118, v2, vcc_lo
+; GFX11-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
+; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-FAKE16-NEXT: v_pk_add_f16 v0, v3, v0 neg_lo:[0,1] neg_hi:[0,1]
+; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
%cmp = icmp eq <2 x i32> %c, zeroinitializer
%fneg.x = fneg <2 x half> %x
%select = select <2 x i1> %cmp, <2 x half> %fneg.x, <2 x half> <half 0xHB118, half 0xHB118>
@@ -2363,61 +1784,33 @@ define <2 x half> @add_select_negk_negk_v2f16(<2 x i32> %c, <2 x half> %x) {
; GFX9-NEXT: v_pk_add_f16 v0, v0, v2
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
-; GFX11-SAFE-TRUE16-LABEL: add_select_negk_negk_v2f16:
-; GFX11-SAFE-TRUE16: ; %bb.0:
-; GFX11-SAFE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-TRUE16-NEXT: v_mov_b16_e32 v3.l, 0xbc00
-; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v0
-; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.l, v3.l, 0xc000, vcc_lo
-; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.h, v3.l, 0xc000, s0
-; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-SAFE-TRUE16-NEXT: v_pack_b32_f16 v0, v0.h, v0.l
-; GFX11-SAFE-TRUE16-NEXT: v_pk_add_f16 v0, v0, v2
-; GFX11-SAFE-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-SAFE-FAKE16-LABEL: add_select_negk_negk_v2f16:
-; GFX11-SAFE-FAKE16: ; %bb.0:
-; GFX11-SAFE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-FAKE16-NEXT: v_mov_b32_e32 v3, 0xc000
-; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1)
-; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0xbc00, v3, vcc_lo
-; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0xbc00, v3, vcc_lo
-; GFX11-SAFE-FAKE16-NEXT: v_pack_b32_f16 v0, v0, v1
-; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-SAFE-FAKE16-NEXT: v_pk_add_f16 v0, v0, v2
-; GFX11-SAFE-FAKE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-TRUE16-LABEL: add_select_negk_negk_v2f16:
-; GFX11-NSZ-TRUE16: ; %bb.0:
-; GFX11-NSZ-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-TRUE16-NEXT: v_mov_b16_e32 v3.l, 0xbc00
-; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v0
-; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.l, v3.l, 0xc000, vcc_lo
-; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.h, v3.l, 0xc000, s0
-; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NSZ-TRUE16-NEXT: v_pack_b32_f16 v0, v0.h, v0.l
-; GFX11-NSZ-TRUE16-NEXT: v_pk_add_f16 v0, v0, v2
-; GFX11-NSZ-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-FAKE16-LABEL: add_select_negk_negk_v2f16:
-; GFX11-NSZ-FAKE16: ; %bb.0:
-; GFX11-NSZ-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-FAKE16-NEXT: v_mov_b32_e32 v3, 0xc000
-; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1)
-; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0xbc00, v3, vcc_lo
-; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0xbc00, v3, vcc_lo
-; GFX11-NSZ-FAKE16-NEXT: v_pack_b32_f16 v0, v0, v1
-; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NSZ-FAKE16-NEXT: v_pk_add_f16 v0, v0, v2
-; GFX11-NSZ-FAKE16-NEXT: s_setpc_b64 s[30:31]
+; GFX11-TRUE16-LABEL: add_select_negk_negk_v2f16:
+; GFX11-TRUE16: ; %bb.0:
+; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT: v_mov_b16_e32 v3.l, 0xbc00
+; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v0
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.l, v3.l, 0xc000, vcc_lo
+; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.h, v3.l, 0xc000, s0
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-TRUE16-NEXT: v_pack_b32_f16 v0, v0.h, v0.l
+; GFX11-TRUE16-NEXT: v_pk_add_f16 v0, v0, v2
+; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: add_select_negk_negk_v2f16:
+; GFX11-FAKE16: ; %bb.0:
+; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT: v_mov_b32_e32 v3, 0xc000
+; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1)
+; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0xbc00, v3, vcc_lo
+; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0xbc00, v3, vcc_lo
+; GFX11-FAKE16-NEXT: v_pack_b32_f16 v0, v0, v1
+; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-FAKE16-NEXT: v_pk_add_f16 v0, v0, v2
+; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
%cmp = icmp eq <2 x i32> %c, zeroinitializer
%select = select <2 x i1> %cmp, <2 x half> <half -2.0, half -2.0>, <2 x half> <half -1.0, half -1.0>
%add = fadd <2 x half> %select, %x
@@ -2469,61 +1862,33 @@ define <2 x half> @add_select_negliteralk_negliteralk_v2f16(<2 x i32> %c, <2 x h
; GFX9-NEXT: v_pk_add_f16 v0, v0, v2
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
-; GFX11-SAFE-TRUE16-LABEL: add_select_negliteralk_negliteralk_v2f16:
-; GFX11-SAFE-TRUE16: ; %bb.0:
-; GFX11-SAFE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-TRUE16-NEXT: v_mov_b16_e32 v3.l, 0xec00
-; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v0
-; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.l, v3.l, 0xe800, vcc_lo
-; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.h, v3.l, 0xe800, s0
-; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-SAFE-TRUE16-NEXT: v_pack_b32_f16 v0, v0.h, v0.l
-; GFX11-SAFE-TRUE16-NEXT: v_pk_add_f16 v0, v0, v2
-; GFX11-SAFE-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-SAFE-FAKE16-LABEL: add_select_negliteralk_negliteralk_v2f16:
-; GFX11-SAFE-FAKE16: ; %bb.0:
-; GFX11-SAFE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-FAKE16-NEXT: v_mov_b32_e32 v3, 0xe800
-; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1)
-; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0xec00, v3, vcc_lo
-; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0xec00, v3, vcc_lo
-; GFX11-SAFE-FAKE16-NEXT: v_pack_b32_f16 v0, v0, v1
-; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-SAFE-FAKE16-NEXT: v_pk_add_f16 v0, v0, v2
-; GFX11-SAFE-FAKE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-TRUE16-LABEL: add_select_negliteralk_negliteralk_v2f16:
-; GFX11-NSZ-TRUE16: ; %bb.0:
-; GFX11-NSZ-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-TRUE16-NEXT: v_mov_b16_e32 v3.l, 0xec00
-; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v0
-; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.l, v3.l, 0xe800, vcc_lo
-; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.h, v3.l, 0xe800, s0
-; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NSZ-TRUE16-NEXT: v_pack_b32_f16 v0, v0.h, v0.l
-; GFX11-NSZ-TRUE16-NEXT: v_pk_add_f16 v0, v0, v2
-; GFX11-NSZ-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-FAKE16-LABEL: add_select_negliteralk_negliteralk_v2f16:
-; GFX11-NSZ-FAKE16: ; %bb.0:
-; GFX11-NSZ-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-FAKE16-NEXT: v_mov_b32_e32 v3, 0xe800
-; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1)
-; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0xec00, v3, vcc_lo
-; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0xec00, v3, vcc_lo
-; GFX11-NSZ-FAKE16-NEXT: v_pack_b32_f16 v0, v0, v1
-; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NSZ-FAKE16-NEXT: v_pk_add_f16 v0, v0, v2
-; GFX11-NSZ-FAKE16-NEXT: s_setpc_b64 s[30:31]
+; GFX11-TRUE16-LABEL: add_select_negliteralk_negliteralk_v2f16:
+; GFX11-TRUE16: ; %bb.0:
+; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT: v_mov_b16_e32 v3.l, 0xec00
+; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v0
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.l, v3.l, 0xe800, vcc_lo
+; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.h, v3.l, 0xe800, s0
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-TRUE16-NEXT: v_pack_b32_f16 v0, v0.h, v0.l
+; GFX11-TRUE16-NEXT: v_pk_add_f16 v0, v0, v2
+; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: add_select_negliteralk_negliteralk_v2f16:
+; GFX11-FAKE16: ; %bb.0:
+; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT: v_mov_b32_e32 v3, 0xe800
+; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1)
+; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0xec00, v3, vcc_lo
+; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0xec00, v3, vcc_lo
+; GFX11-FAKE16-NEXT: v_pack_b32_f16 v0, v0, v1
+; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-FAKE16-NEXT: v_pk_add_f16 v0, v0, v2
+; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
%cmp = icmp eq <2 x i32> %c, zeroinitializer
%select = select <2 x i1> %cmp, <2 x half> <half -2048.0, half -2048.0>, <2 x half> <half -4096.0, half -4096.0>
%add = fadd <2 x half> %select, %x
@@ -2573,61 +1938,33 @@ define <2 x half> @add_select_fneg_negk_negk_v2f16(<2 x i32> %c, <2 x half> %x)
; GFX9-NEXT: v_pk_add_f16 v0, v2, v0 neg_lo:[0,1] neg_hi:[0,1]
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
-; GFX11-SAFE-TRUE16-LABEL: add_select_fneg_negk_negk_v2f16:
-; GFX11-SAFE-TRUE16: ; %bb.0:
-; GFX11-SAFE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-TRUE16-NEXT: v_mov_b16_e32 v3.l, 0xbc00
-; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v0
-; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.l, v3.l, 0xc000, vcc_lo
-; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.h, v3.l, 0xc000, s0
-; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-SAFE-TRUE16-NEXT: v_pack_b32_f16 v0, v0.h, v0.l
-; GFX11-SAFE-TRUE16-NEXT: v_pk_add_f16 v0, v2, v0 neg_lo:[0,1] neg_hi:[0,1]
-; GFX11-SAFE-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-SAFE-FAKE16-LABEL: add_select_fneg_negk_negk_v2f16:
-; GFX11-SAFE-FAKE16: ; %bb.0:
-; GFX11-SAFE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-FAKE16-NEXT: v_mov_b32_e32 v3, 0xc000
-; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1)
-; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0xbc00, v3, vcc_lo
-; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0xbc00, v3, vcc_lo
-; GFX11-SAFE-FAKE16-NEXT: v_pack_b32_f16 v0, v0, v1
-; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-SAFE-FAKE16-NEXT: v_pk_add_f16 v0, v2, v0 neg_lo:[0,1] neg_hi:[0,1]
-; GFX11-SAFE-FAKE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-TRUE16-LABEL: add_select_fneg_negk_negk_v2f16:
-; GFX11-NSZ-TRUE16: ; %bb.0:
-; GFX11-NSZ-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-TRUE16-NEXT: v_mov_b16_e32 v3.l, 0xbc00
-; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v0
-; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.l, v3.l, 0xc000, vcc_lo
-; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.h, v3.l, 0xc000, s0
-; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NSZ-TRUE16-NEXT: v_pack_b32_f16 v0, v0.h, v0.l
-; GFX11-NSZ-TRUE16-NEXT: v_pk_add_f16 v0, v2, v0 neg_lo:[0,1] neg_hi:[0,1]
-; GFX11-NSZ-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-FAKE16-LABEL: add_select_fneg_negk_negk_v2f16:
-; GFX11-NSZ-FAKE16: ; %bb.0:
-; GFX11-NSZ-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-FAKE16-NEXT: v_mov_b32_e32 v3, 0xc000
-; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1)
-; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0xbc00, v3, vcc_lo
-; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0xbc00, v3, vcc_lo
-; GFX11-NSZ-FAKE16-NEXT: v_pack_b32_f16 v0, v0, v1
-; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NSZ-FAKE16-NEXT: v_pk_add_f16 v0, v2, v0 neg_lo:[0,1] neg_hi:[0,1]
-; GFX11-NSZ-FAKE16-NEXT: s_setpc_b64 s[30:31]
+; GFX11-TRUE16-LABEL: add_select_fneg_negk_negk_v2f16:
+; GFX11-TRUE16: ; %bb.0:
+; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT: v_mov_b16_e32 v3.l, 0xbc00
+; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v0
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.l, v3.l, 0xc000, vcc_lo
+; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.h, v3.l, 0xc000, s0
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-TRUE16-NEXT: v_pack_b32_f16 v0, v0.h, v0.l
+; GFX11-TRUE16-NEXT: v_pk_add_f16 v0, v2, v0 neg_lo:[0,1] neg_hi:[0,1]
+; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: add_select_fneg_negk_negk_v2f16:
+; GFX11-FAKE16: ; %bb.0:
+; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT: v_mov_b32_e32 v3, 0xc000
+; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1)
+; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0xbc00, v3, vcc_lo
+; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0xbc00, v3, vcc_lo
+; GFX11-FAKE16-NEXT: v_pack_b32_f16 v0, v0, v1
+; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-FAKE16-NEXT: v_pk_add_f16 v0, v2, v0 neg_lo:[0,1] neg_hi:[0,1]
+; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
%cmp = icmp eq <2 x i32> %c, zeroinitializer
%select = select <2 x i1> %cmp, <2 x half> <half -2.0, half -2.0>, <2 x half> <half -1.0, half -1.0>
%fneg.x = fneg <2 x half> %select
@@ -2681,55 +2018,30 @@ define <2 x half> @add_select_negk_fneg_v2f16(<2 x i32> %c, <2 x half> %x, <2 x
; GFX9-NEXT: v_pk_add_f16 v0, v3, v0 neg_lo:[0,1] neg_hi:[0,1]
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
-; GFX11-SAFE-TRUE16-LABEL: add_select_negk_fneg_v2f16:
-; GFX11-SAFE-TRUE16: ; %bb.0:
-; GFX11-SAFE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v1
-; GFX11-SAFE-TRUE16-NEXT: v_cmp_ne_u32_e64 s0, 0, v0
-; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.h, 0x3c00, v2.h, vcc_lo
-; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.l, 0x3c00, v2.l, s0
-; GFX11-SAFE-TRUE16-NEXT: v_pk_add_f16 v0, v3, v0 neg_lo:[0,1] neg_hi:[0,1]
-; GFX11-SAFE-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-SAFE-FAKE16-LABEL: add_select_negk_fneg_v2f16:
-; GFX11-SAFE-FAKE16: ; %bb.0:
-; GFX11-SAFE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-FAKE16-NEXT: v_lshrrev_b32_e32 v4, 16, v2
-; GFX11-SAFE-FAKE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v1
-; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1)
-; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0x3c00, v4, vcc_lo
-; GFX11-SAFE-FAKE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0
-; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0x3c00, v2, vcc_lo
-; GFX11-SAFE-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
-; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-SAFE-FAKE16-NEXT: v_pk_add_f16 v0, v3, v0 neg_lo:[0,1] neg_hi:[0,1]
-; GFX11-SAFE-FAKE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-TRUE16-LABEL: add_select_negk_fneg_v2f16:
-; GFX11-NSZ-TRUE16: ; %bb.0:
-; GFX11-NSZ-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v1
-; GFX11-NSZ-TRUE16-NEXT: v_cmp_ne_u32_e64 s0, 0, v0
-; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.h, 0x3c00, v2.h, vcc_lo
-; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.l, 0x3c00, v2.l, s0
-; GFX11-NSZ-TRUE16-NEXT: v_pk_add_f16 v0, v3, v0 neg_lo:[0,1] neg_hi:[0,1]
-; GFX11-NSZ-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-FAKE16-LABEL: add_select_negk_fneg_v2f16:
-; GFX11-NSZ-FAKE16: ; %bb.0:
-; GFX11-NSZ-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-FAKE16-NEXT: v_lshrrev_b32_e32 v4, 16, v2
-; GFX11-NSZ-FAKE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v1
-; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1)
-; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0x3c00, v4, vcc_lo
-; GFX11-NSZ-FAKE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0
-; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0x3c00, v2, vcc_lo
-; GFX11-NSZ-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
-; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NSZ-FAKE16-NEXT: v_pk_add_f16 v0, v3, v0 neg_lo:[0,1] neg_hi:[0,1]
-; GFX11-NSZ-FAKE16-NEXT: s_setpc_b64 s[30:31]
+; GFX11-TRUE16-LABEL: add_select_negk_fneg_v2f16:
+; GFX11-TRUE16: ; %bb.0:
+; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v1
+; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e64 s0, 0, v0
+; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.h, 0x3c00, v2.h, vcc_lo
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.l, 0x3c00, v2.l, s0
+; GFX11-TRUE16-NEXT: v_pk_add_f16 v0, v3, v0 neg_lo:[0,1] neg_hi:[0,1]
+; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: add_select_negk_fneg_v2f16:
+; GFX11-FAKE16: ; %bb.0:
+; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v4, 16, v2
+; GFX11-FAKE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v1
+; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1)
+; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0x3c00, v4, vcc_lo
+; GFX11-FAKE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0
+; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0x3c00, v2, vcc_lo
+; GFX11-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
+; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-FAKE16-NEXT: v_pk_add_f16 v0, v3, v0 neg_lo:[0,1] neg_hi:[0,1]
+; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
%cmp = icmp eq <2 x i32> %c, zeroinitializer
%fneg.x = fneg <2 x half> %x
%select = select <2 x i1> %cmp, <2 x half> <half -1.0, half -1.0>, <2 x half> %fneg.x
@@ -2783,55 +2095,30 @@ define <2 x half> @add_select_fneg_posk_v2f16(<2 x i32> %c, <2 x half> %x, <2 x
; GFX9-NEXT: v_pk_add_f16 v0, v3, v0 neg_lo:[0,1] neg_hi:[0,1]
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
-; GFX11-SAFE-TRUE16-LABEL: add_select_fneg_posk_v2f16:
-; GFX11-SAFE-TRUE16: ; %bb.0:
-; GFX11-SAFE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v0
-; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.h, 0xbc00, v2.h, vcc_lo
-; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.l, 0xbc00, v2.l, s0
-; GFX11-SAFE-TRUE16-NEXT: v_pk_add_f16 v0, v3, v0 neg_lo:[0,1] neg_hi:[0,1]
-; GFX11-SAFE-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-SAFE-FAKE16-LABEL: add_select_fneg_posk_v2f16:
-; GFX11-SAFE-FAKE16: ; %bb.0:
-; GFX11-SAFE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-FAKE16-NEXT: v_lshrrev_b32_e32 v4, 16, v2
-; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1)
-; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0xbc00, v4, vcc_lo
-; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0xbc00, v2, vcc_lo
-; GFX11-SAFE-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
-; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-SAFE-FAKE16-NEXT: v_pk_add_f16 v0, v3, v0 neg_lo:[0,1] neg_hi:[0,1]
-; GFX11-SAFE-FAKE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-TRUE16-LABEL: add_select_fneg_posk_v2f16:
-; GFX11-NSZ-TRUE16: ; %bb.0:
-; GFX11-NSZ-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v0
-; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.h, 0xbc00, v2.h, vcc_lo
-; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.l, 0xbc00, v2.l, s0
-; GFX11-NSZ-TRUE16-NEXT: v_pk_add_f16 v0, v3, v0 neg_lo:[0,1] neg_hi:[0,1]
-; GFX11-NSZ-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-FAKE16-LABEL: add_select_fneg_posk_v2f16:
-; GFX11-NSZ-FAKE16: ; %bb.0:
-; GFX11-NSZ-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-FAKE16-NEXT: v_lshrrev_b32_e32 v4, 16, v2
-; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1)
-; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0xbc00, v4, vcc_lo
-; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0xbc00, v2, vcc_lo
-; GFX11-NSZ-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
-; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NSZ-FAKE16-NEXT: v_pk_add_f16 v0, v3, v0 neg_lo:[0,1] neg_hi:[0,1]
-; GFX11-NSZ-FAKE16-NEXT: s_setpc_b64 s[30:31]
+; GFX11-TRUE16-LABEL: add_select_fneg_posk_v2f16:
+; GFX11-TRUE16: ; %bb.0:
+; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v0
+; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.h, 0xbc00, v2.h, vcc_lo
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.l, 0xbc00, v2.l, s0
+; GFX11-TRUE16-NEXT: v_pk_add_f16 v0, v3, v0 neg_lo:[0,1] neg_hi:[0,1]
+; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: add_select_fneg_posk_v2f16:
+; GFX11-FAKE16: ; %bb.0:
+; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v4, 16, v2
+; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1)
+; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0xbc00, v4, vcc_lo
+; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0xbc00, v2, vcc_lo
+; GFX11-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
+; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-FAKE16-NEXT: v_pk_add_f16 v0, v3, v0 neg_lo:[0,1] neg_hi:[0,1]
+; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
%cmp = icmp eq <2 x i32> %c, zeroinitializer
%fneg.x = fneg <2 x half> %x
%select = select <2 x i1> %cmp, <2 x half> %fneg.x, <2 x half> <half 1.0, half 1.0>
@@ -2885,55 +2172,30 @@ define <2 x half> @add_select_posk_fneg_v2f16(<2 x i32> %c, <2 x half> %x, <2 x
; GFX9-NEXT: v_pk_add_f16 v0, v3, v0 neg_lo:[0,1] neg_hi:[0,1]
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
-; GFX11-SAFE-TRUE16-LABEL: add_select_posk_fneg_v2f16:
-; GFX11-SAFE-TRUE16: ; %bb.0:
-; GFX11-SAFE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v1
-; GFX11-SAFE-TRUE16-NEXT: v_cmp_ne_u32_e64 s0, 0, v0
-; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.h, 0xbc00, v2.h, vcc_lo
-; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.l, 0xbc00, v2.l, s0
-; GFX11-SAFE-TRUE16-NEXT: v_pk_add_f16 v0, v3, v0 neg_lo:[0,1] neg_hi:[0,1]
-; GFX11-SAFE-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-SAFE-FAKE16-LABEL: add_select_posk_fneg_v2f16:
-; GFX11-SAFE-FAKE16: ; %bb.0:
-; GFX11-SAFE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-FAKE16-NEXT: v_lshrrev_b32_e32 v4, 16, v2
-; GFX11-SAFE-FAKE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v1
-; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1)
-; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0xbc00, v4, vcc_lo
-; GFX11-SAFE-FAKE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0
-; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0xbc00, v2, vcc_lo
-; GFX11-SAFE-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
-; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-SAFE-FAKE16-NEXT: v_pk_add_f16 v0, v3, v0 neg_lo:[0,1] neg_hi:[0,1]
-; GFX11-SAFE-FAKE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-TRUE16-LABEL: add_select_posk_fneg_v2f16:
-; GFX11-NSZ-TRUE16: ; %bb.0:
-; GFX11-NSZ-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v1
-; GFX11-NSZ-TRUE16-NEXT: v_cmp_ne_u32_e64 s0, 0, v0
-; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.h, 0xbc00, v2.h, vcc_lo
-; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.l, 0xbc00, v2.l, s0
-; GFX11-NSZ-TRUE16-NEXT: v_pk_add_f16 v0, v3, v0 neg_lo:[0,1] neg_hi:[0,1]
-; GFX11-NSZ-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-FAKE16-LABEL: add_select_posk_fneg_v2f16:
-; GFX11-NSZ-FAKE16: ; %bb.0:
-; GFX11-NSZ-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-FAKE16-NEXT: v_lshrrev_b32_e32 v4, 16, v2
-; GFX11-NSZ-FAKE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v1
-; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1)
-; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0xbc00, v4, vcc_lo
-; GFX11-NSZ-FAKE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0
-; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0xbc00, v2, vcc_lo
-; GFX11-NSZ-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
-; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NSZ-FAKE16-NEXT: v_pk_add_f16 v0, v3, v0 neg_lo:[0,1] neg_hi:[0,1]
-; GFX11-NSZ-FAKE16-NEXT: s_setpc_b64 s[30:31]
+; GFX11-TRUE16-LABEL: add_select_posk_fneg_v2f16:
+; GFX11-TRUE16: ; %bb.0:
+; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v1
+; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e64 s0, 0, v0
+; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.h, 0xbc00, v2.h, vcc_lo
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.l, 0xbc00, v2.l, s0
+; GFX11-TRUE16-NEXT: v_pk_add_f16 v0, v3, v0 neg_lo:[0,1] neg_hi:[0,1]
+; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: add_select_posk_fneg_v2f16:
+; GFX11-FAKE16: ; %bb.0:
+; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v4, 16, v2
+; GFX11-FAKE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v1
+; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1)
+; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0xbc00, v4, vcc_lo
+; GFX11-FAKE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0
+; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0xbc00, v2, vcc_lo
+; GFX11-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
+; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-FAKE16-NEXT: v_pk_add_f16 v0, v3, v0 neg_lo:[0,1] neg_hi:[0,1]
+; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
%cmp = icmp eq <2 x i32> %c, zeroinitializer
%fneg.x = fneg <2 x half> %x
%select = select <2 x i1> %cmp, <2 x half> <half 1.0, half 1.0>, <2 x half> %fneg.x
@@ -2997,69 +2259,37 @@ define <2 x half> @add_select_negfabs_fabs_v2f16(<2 x i32> %c, <2 x half> %x, <2
; GFX9-NEXT: v_pk_add_f16 v0, v0, v4
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
-; GFX11-SAFE-TRUE16-LABEL: add_select_negfabs_fabs_v2f16:
-; GFX11-SAFE-TRUE16: ; %bb.0:
-; GFX11-SAFE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-SAFE-TRUE16-NEXT: v_or_b32_e32 v0, 0x80008000, v2
-; GFX11-SAFE-TRUE16-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v3
-; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v1
-; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.l, v2.l, v0.l, vcc_lo
-; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.h, v2.h, v0.h, s0
-; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-SAFE-TRUE16-NEXT: v_pk_add_f16 v0, v0, v4
-; GFX11-SAFE-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-SAFE-FAKE16-LABEL: add_select_negfabs_fabs_v2f16:
-; GFX11-SAFE-FAKE16: ; %bb.0:
-; GFX11-SAFE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-FAKE16-NEXT: v_or_b32_e32 v2, 0x80008000, v2
-; GFX11-SAFE-FAKE16-NEXT: v_and_b32_e32 v3, 0x7fff7fff, v3
-; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
-; GFX11-SAFE-FAKE16-NEXT: v_lshrrev_b32_e32 v5, 16, v2
-; GFX11-SAFE-FAKE16-NEXT: v_lshrrev_b32_e32 v6, 16, v3
-; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc_lo
-; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v1, v6, v5, vcc_lo
-; GFX11-SAFE-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
-; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-SAFE-FAKE16-NEXT: v_pk_add_f16 v0, v0, v4
-; GFX11-SAFE-FAKE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-TRUE16-LABEL: add_select_negfabs_fabs_v2f16:
-; GFX11-NSZ-TRUE16: ; %bb.0:
-; GFX11-NSZ-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-NSZ-TRUE16-NEXT: v_or_b32_e32 v0, 0x80008000, v2
-; GFX11-NSZ-TRUE16-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v3
-; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v1
-; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.l, v2.l, v0.l, vcc_lo
-; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.h, v2.h, v0.h, s0
-; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NSZ-TRUE16-NEXT: v_pk_add_f16 v0, v0, v4
-; GFX11-NSZ-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-FAKE16-LABEL: add_select_negfabs_fabs_v2f16:
-; GFX11-NSZ-FAKE16: ; %bb.0:
-; GFX11-NSZ-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-FAKE16-NEXT: v_or_b32_e32 v2, 0x80008000, v2
-; GFX11-NSZ-FAKE16-NEXT: v_and_b32_e32 v3, 0x7fff7fff, v3
-; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
-; GFX11-NSZ-FAKE16-NEXT: v_lshrrev_b32_e32 v5, 16, v2
-; GFX11-NSZ-FAKE16-NEXT: v_lshrrev_b32_e32 v6, 16, v3
-; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc_lo
-; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v1, v6, v5, vcc_lo
-; GFX11-NSZ-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
-; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NSZ-FAKE16-NEXT: v_pk_add_f16 v0, v0, v4
-; GFX11-NSZ-FAKE16-NEXT: s_setpc_b64 s[30:31]
+; GFX11-TRUE16-LABEL: add_select_negfabs_fabs_v2f16:
+; GFX11-TRUE16: ; %bb.0:
+; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-TRUE16-NEXT: v_or_b32_e32 v0, 0x80008000, v2
+; GFX11-TRUE16-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v3
+; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v1
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.l, v2.l, v0.l, vcc_lo
+; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.h, v2.h, v0.h, s0
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-TRUE16-NEXT: v_pk_add_f16 v0, v0, v4
+; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: add_select_negfabs_fabs_v2f16:
+; GFX11-FAKE16: ; %bb.0:
+; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT: v_or_b32_e32 v2, 0x80008000, v2
+; GFX11-FAKE16-NEXT: v_and_b32_e32 v3, 0x7fff7fff, v3
+; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
+; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v5, 16, v2
+; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v6, 16, v3
+; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc_lo
+; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v1, v6, v5, vcc_lo
+; GFX11-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
+; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-FAKE16-NEXT: v_pk_add_f16 v0, v0, v4
+; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
%cmp = icmp eq <2 x i32> %c, zeroinitializer
%fabs.x = call <2 x half> @llvm.fabs.v2f16(<2 x half> %x)
%fneg.fabs.x = fneg <2 x half> %fabs.x
@@ -3125,69 +2355,37 @@ define <2 x half> @add_select_fabs_negfabs_v2f16(<2 x i32> %c, <2 x half> %x, <2
; GFX9-NEXT: v_pk_add_f16 v0, v0, v4
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
-; GFX11-SAFE-TRUE16-LABEL: add_select_fabs_negfabs_v2f16:
-; GFX11-SAFE-TRUE16: ; %bb.0:
-; GFX11-SAFE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-SAFE-TRUE16-NEXT: v_and_b32_e32 v0, 0x7fff7fff, v2
-; GFX11-SAFE-TRUE16-NEXT: v_or_b32_e32 v2, 0x80008000, v3
-; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v1
-; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.l, v2.l, v0.l, vcc_lo
-; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.h, v2.h, v0.h, s0
-; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-SAFE-TRUE16-NEXT: v_pk_add_f16 v0, v0, v4
-; GFX11-SAFE-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-SAFE-FAKE16-LABEL: add_select_fabs_negfabs_v2f16:
-; GFX11-SAFE-FAKE16: ; %bb.0:
-; GFX11-SAFE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-FAKE16-NEXT: v_or_b32_e32 v3, 0x80008000, v3
-; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_1)
-; GFX11-SAFE-FAKE16-NEXT: v_lshrrev_b32_e32 v6, 16, v3
-; GFX11-SAFE-FAKE16-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v2
-; GFX11-SAFE-FAKE16-NEXT: v_lshrrev_b32_e32 v5, 16, v2
-; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc_lo
-; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v1, v6, v5, vcc_lo
-; GFX11-SAFE-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
-; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-SAFE-FAKE16-NEXT: v_pk_add_f16 v0, v0, v4
-; GFX11-SAFE-FAKE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-TRUE16-LABEL: add_select_fabs_negfabs_v2f16:
-; GFX11-NSZ-TRUE16: ; %bb.0:
-; GFX11-NSZ-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-NSZ-TRUE16-NEXT: v_and_b32_e32 v0, 0x7fff7fff, v2
-; GFX11-NSZ-TRUE16-NEXT: v_or_b32_e32 v2, 0x80008000, v3
-; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v1
-; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.l, v2.l, v0.l, vcc_lo
-; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.h, v2.h, v0.h, s0
-; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NSZ-TRUE16-NEXT: v_pk_add_f16 v0, v0, v4
-; GFX11-NSZ-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-FAKE16-LABEL: add_select_fabs_negfabs_v2f16:
-; GFX11-NSZ-FAKE16: ; %bb.0:
-; GFX11-NSZ-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-FAKE16-NEXT: v_or_b32_e32 v3, 0x80008000, v3
-; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_1)
-; GFX11-NSZ-FAKE16-NEXT: v_lshrrev_b32_e32 v6, 16, v3
-; GFX11-NSZ-FAKE16-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v2
-; GFX11-NSZ-FAKE16-NEXT: v_lshrrev_b32_e32 v5, 16, v2
-; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc_lo
-; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v1, v6, v5, vcc_lo
-; GFX11-NSZ-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
-; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NSZ-FAKE16-NEXT: v_pk_add_f16 v0, v0, v4
-; GFX11-NSZ-FAKE16-NEXT: s_setpc_b64 s[30:31]
+; GFX11-TRUE16-LABEL: add_select_fabs_negfabs_v2f16:
+; GFX11-TRUE16: ; %bb.0:
+; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0x7fff7fff, v2
+; GFX11-TRUE16-NEXT: v_or_b32_e32 v2, 0x80008000, v3
+; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v1
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.l, v2.l, v0.l, vcc_lo
+; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.h, v2.h, v0.h, s0
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-TRUE16-NEXT: v_pk_add_f16 v0, v0, v4
+; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: add_select_fabs_negfabs_v2f16:
+; GFX11-FAKE16: ; %bb.0:
+; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT: v_or_b32_e32 v3, 0x80008000, v3
+; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_1)
+; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v6, 16, v3
+; GFX11-FAKE16-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v2
+; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v5, 16, v2
+; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc_lo
+; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v1, v6, v5, vcc_lo
+; GFX11-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
+; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-FAKE16-NEXT: v_pk_add_f16 v0, v0, v4
+; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
%cmp = icmp eq <2 x i32> %c, zeroinitializer
%fabs.x = call <2 x half> @llvm.fabs.v2f16(<2 x half> %x)
%fabs.y = call <2 x half> @llvm.fabs.v2f16(<2 x half> %y)
@@ -3253,69 +2451,37 @@ define <2 x half> @add_select_neg_fabs_v2f16(<2 x i32> %c, <2 x half> %x, <2 x h
; GFX9-NEXT: v_pk_add_f16 v0, v0, v4
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
-; GFX11-SAFE-TRUE16-LABEL: add_select_neg_fabs_v2f16:
-; GFX11-SAFE-TRUE16: ; %bb.0:
-; GFX11-SAFE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-SAFE-TRUE16-NEXT: v_xor_b32_e32 v0, 0x80008000, v2
-; GFX11-SAFE-TRUE16-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v3
-; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v1
-; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.l, v2.l, v0.l, vcc_lo
-; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.h, v2.h, v0.h, s0
-; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-SAFE-TRUE16-NEXT: v_pk_add_f16 v0, v0, v4
-; GFX11-SAFE-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-SAFE-FAKE16-LABEL: add_select_neg_fabs_v2f16:
-; GFX11-SAFE-FAKE16: ; %bb.0:
-; GFX11-SAFE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-FAKE16-NEXT: v_xor_b32_e32 v2, 0x80008000, v2
-; GFX11-SAFE-FAKE16-NEXT: v_and_b32_e32 v3, 0x7fff7fff, v3
-; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
-; GFX11-SAFE-FAKE16-NEXT: v_lshrrev_b32_e32 v5, 16, v2
-; GFX11-SAFE-FAKE16-NEXT: v_lshrrev_b32_e32 v6, 16, v3
-; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc_lo
-; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v1, v6, v5, vcc_lo
-; GFX11-SAFE-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
-; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-SAFE-FAKE16-NEXT: v_pk_add_f16 v0, v0, v4
-; GFX11-SAFE-FAKE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-TRUE16-LABEL: add_select_neg_fabs_v2f16:
-; GFX11-NSZ-TRUE16: ; %bb.0:
-; GFX11-NSZ-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-NSZ-TRUE16-NEXT: v_xor_b32_e32 v0, 0x80008000, v2
-; GFX11-NSZ-TRUE16-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v3
-; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v1
-; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.l, v2.l, v0.l, vcc_lo
-; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.h, v2.h, v0.h, s0
-; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NSZ-TRUE16-NEXT: v_pk_add_f16 v0, v0, v4
-; GFX11-NSZ-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-FAKE16-LABEL: add_select_neg_fabs_v2f16:
-; GFX11-NSZ-FAKE16: ; %bb.0:
-; GFX11-NSZ-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-FAKE16-NEXT: v_xor_b32_e32 v2, 0x80008000, v2
-; GFX11-NSZ-FAKE16-NEXT: v_and_b32_e32 v3, 0x7fff7fff, v3
-; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
-; GFX11-NSZ-FAKE16-NEXT: v_lshrrev_b32_e32 v5, 16, v2
-; GFX11-NSZ-FAKE16-NEXT: v_lshrrev_b32_e32 v6, 16, v3
-; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc_lo
-; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v1, v6, v5, vcc_lo
-; GFX11-NSZ-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
-; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NSZ-FAKE16-NEXT: v_pk_add_f16 v0, v0, v4
-; GFX11-NSZ-FAKE16-NEXT: s_setpc_b64 s[30:31]
+; GFX11-TRUE16-LABEL: add_select_neg_fabs_v2f16:
+; GFX11-TRUE16: ; %bb.0:
+; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-TRUE16-NEXT: v_xor_b32_e32 v0, 0x80008000, v2
+; GFX11-TRUE16-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v3
+; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v1
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.l, v2.l, v0.l, vcc_lo
+; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.h, v2.h, v0.h, s0
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-TRUE16-NEXT: v_pk_add_f16 v0, v0, v4
+; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: add_select_neg_fabs_v2f16:
+; GFX11-FAKE16: ; %bb.0:
+; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT: v_xor_b32_e32 v2, 0x80008000, v2
+; GFX11-FAKE16-NEXT: v_and_b32_e32 v3, 0x7fff7fff, v3
+; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
+; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v5, 16, v2
+; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v6, 16, v3
+; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc_lo
+; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v1, v6, v5, vcc_lo
+; GFX11-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
+; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-FAKE16-NEXT: v_pk_add_f16 v0, v0, v4
+; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
%cmp = icmp eq <2 x i32> %c, zeroinitializer
%fneg.x = fneg <2 x half> %x
%fabs.y = call <2 x half> @llvm.fabs.v2f16(<2 x half> %y)
@@ -3380,69 +2546,37 @@ define <2 x half> @add_select_fabs_neg_v2f16(<2 x i32> %c, <2 x half> %x, <2 x h
; GFX9-NEXT: v_pk_add_f16 v0, v0, v4
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
-; GFX11-SAFE-TRUE16-LABEL: add_select_fabs_neg_v2f16:
-; GFX11-SAFE-TRUE16: ; %bb.0:
-; GFX11-SAFE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-SAFE-TRUE16-NEXT: v_and_b32_e32 v0, 0x7fff7fff, v2
-; GFX11-SAFE-TRUE16-NEXT: v_xor_b32_e32 v2, 0x80008000, v3
-; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v1
-; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.l, v2.l, v0.l, vcc_lo
-; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.h, v2.h, v0.h, s0
-; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-SAFE-TRUE16-NEXT: v_pk_add_f16 v0, v0, v4
-; GFX11-SAFE-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-SAFE-FAKE16-LABEL: add_select_fabs_neg_v2f16:
-; GFX11-SAFE-FAKE16: ; %bb.0:
-; GFX11-SAFE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-FAKE16-NEXT: v_xor_b32_e32 v3, 0x80008000, v3
-; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_1)
-; GFX11-SAFE-FAKE16-NEXT: v_lshrrev_b32_e32 v6, 16, v3
-; GFX11-SAFE-FAKE16-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v2
-; GFX11-SAFE-FAKE16-NEXT: v_lshrrev_b32_e32 v5, 16, v2
-; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc_lo
-; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v1, v6, v5, vcc_lo
-; GFX11-SAFE-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
-; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-SAFE-FAKE16-NEXT: v_pk_add_f16 v0, v0, v4
-; GFX11-SAFE-FAKE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-TRUE16-LABEL: add_select_fabs_neg_v2f16:
-; GFX11-NSZ-TRUE16: ; %bb.0:
-; GFX11-NSZ-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-NSZ-TRUE16-NEXT: v_and_b32_e32 v0, 0x7fff7fff, v2
-; GFX11-NSZ-TRUE16-NEXT: v_xor_b32_e32 v2, 0x80008000, v3
-; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v1
-; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.l, v2.l, v0.l, vcc_lo
-; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.h, v2.h, v0.h, s0
-; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NSZ-TRUE16-NEXT: v_pk_add_f16 v0, v0, v4
-; GFX11-NSZ-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-FAKE16-LABEL: add_select_fabs_neg_v2f16:
-; GFX11-NSZ-FAKE16: ; %bb.0:
-; GFX11-NSZ-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-FAKE16-NEXT: v_xor_b32_e32 v3, 0x80008000, v3
-; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_1)
-; GFX11-NSZ-FAKE16-NEXT: v_lshrrev_b32_e32 v6, 16, v3
-; GFX11-NSZ-FAKE16-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v2
-; GFX11-NSZ-FAKE16-NEXT: v_lshrrev_b32_e32 v5, 16, v2
-; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc_lo
-; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v1, v6, v5, vcc_lo
-; GFX11-NSZ-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
-; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NSZ-FAKE16-NEXT: v_pk_add_f16 v0, v0, v4
-; GFX11-NSZ-FAKE16-NEXT: s_setpc_b64 s[30:31]
+; GFX11-TRUE16-LABEL: add_select_fabs_neg_v2f16:
+; GFX11-TRUE16: ; %bb.0:
+; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0x7fff7fff, v2
+; GFX11-TRUE16-NEXT: v_xor_b32_e32 v2, 0x80008000, v3
+; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v1
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.l, v2.l, v0.l, vcc_lo
+; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.h, v2.h, v0.h, s0
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-TRUE16-NEXT: v_pk_add_f16 v0, v0, v4
+; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: add_select_fabs_neg_v2f16:
+; GFX11-FAKE16: ; %bb.0:
+; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT: v_xor_b32_e32 v3, 0x80008000, v3
+; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_1)
+; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v6, 16, v3
+; GFX11-FAKE16-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v2
+; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v5, 16, v2
+; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc_lo
+; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v1, v6, v5, vcc_lo
+; GFX11-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
+; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-FAKE16-NEXT: v_pk_add_f16 v0, v0, v4
+; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
%cmp = icmp eq <2 x i32> %c, zeroinitializer
%fabs.x = call <2 x half> @llvm.fabs.v2f16(<2 x half> %x)
%fneg.y = fneg <2 x half> %y
@@ -3501,63 +2635,34 @@ define <2 x half> @add_select_neg_negfabs_v2f16(<2 x i32> %c, <2 x half> %x, <2
; GFX9-NEXT: v_pk_add_f16 v0, v4, v0 neg_lo:[0,1] neg_hi:[0,1]
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
-; GFX11-SAFE-TRUE16-LABEL: add_select_neg_negfabs_v2f16:
-; GFX11-SAFE-TRUE16: ; %bb.0:
-; GFX11-SAFE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-SAFE-TRUE16-NEXT: v_and_b32_e32 v1, 0x7fff7fff, v3
-; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v0
-; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.h, v1.h, v2.h, vcc_lo
-; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.l, v1.l, v2.l, s0
-; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-SAFE-TRUE16-NEXT: v_pk_add_f16 v0, v4, v0 neg_lo:[0,1] neg_hi:[0,1]
-; GFX11-SAFE-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-SAFE-FAKE16-LABEL: add_select_neg_negfabs_v2f16:
-; GFX11-SAFE-FAKE16: ; %bb.0:
-; GFX11-SAFE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-FAKE16-NEXT: v_and_b32_e32 v3, 0x7fff7fff, v3
-; GFX11-SAFE-FAKE16-NEXT: v_lshrrev_b32_e32 v5, 16, v2
-; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-SAFE-FAKE16-NEXT: v_lshrrev_b32_e32 v6, 16, v3
-; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v1, v6, v5, vcc_lo
-; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc_lo
-; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-SAFE-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
-; GFX11-SAFE-FAKE16-NEXT: v_pk_add_f16 v0, v4, v0 neg_lo:[0,1] neg_hi:[0,1]
-; GFX11-SAFE-FAKE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-TRUE16-LABEL: add_select_neg_negfabs_v2f16:
-; GFX11-NSZ-TRUE16: ; %bb.0:
-; GFX11-NSZ-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-NSZ-TRUE16-NEXT: v_and_b32_e32 v1, 0x7fff7fff, v3
-; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v0
-; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.h, v1.h, v2.h, vcc_lo
-; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.l, v1.l, v2.l, s0
-; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NSZ-TRUE16-NEXT: v_pk_add_f16 v0, v4, v0 neg_lo:[0,1] neg_hi:[0,1]
-; GFX11-NSZ-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-FAKE16-LABEL: add_select_neg_negfabs_v2f16:
-; GFX11-NSZ-FAKE16: ; %bb.0:
-; GFX11-NSZ-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-FAKE16-NEXT: v_and_b32_e32 v3, 0x7fff7fff, v3
-; GFX11-NSZ-FAKE16-NEXT: v_lshrrev_b32_e32 v5, 16, v2
-; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NSZ-FAKE16-NEXT: v_lshrrev_b32_e32 v6, 16, v3
-; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v1, v6, v5, vcc_lo
-; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc_lo
-; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NSZ-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
-; GFX11-NSZ-FAKE16-NEXT: v_pk_add_f16 v0, v4, v0 neg_lo:[0,1] neg_hi:[0,1]
-; GFX11-NSZ-FAKE16-NEXT: s_setpc_b64 s[30:31]
+; GFX11-TRUE16-LABEL: add_select_neg_negfabs_v2f16:
+; GFX11-TRUE16: ; %bb.0:
+; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-TRUE16-NEXT: v_and_b32_e32 v1, 0x7fff7fff, v3
+; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v0
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.h, v1.h, v2.h, vcc_lo
+; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.l, v1.l, v2.l, s0
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-TRUE16-NEXT: v_pk_add_f16 v0, v4, v0 neg_lo:[0,1] neg_hi:[0,1]
+; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: add_select_neg_negfabs_v2f16:
+; GFX11-FAKE16: ; %bb.0:
+; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT: v_and_b32_e32 v3, 0x7fff7fff, v3
+; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v5, 16, v2
+; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v6, 16, v3
+; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v1, v6, v5, vcc_lo
+; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc_lo
+; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
+; GFX11-FAKE16-NEXT: v_pk_add_f16 v0, v4, v0 neg_lo:[0,1] neg_hi:[0,1]
+; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
%cmp = icmp eq <2 x i32> %c, zeroinitializer
%fneg.x = fneg <2 x half> %x
%fabs.y = call <2 x half> @llvm.fabs.v2f16(<2 x half> %y)
@@ -3617,63 +2722,34 @@ define <2 x half> @add_select_negfabs_neg_v2f16(<2 x i32> %c, <2 x half> %x, <2
; GFX9-NEXT: v_pk_add_f16 v0, v4, v0 neg_lo:[0,1] neg_hi:[0,1]
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
-; GFX11-SAFE-TRUE16-LABEL: add_select_negfabs_neg_v2f16:
-; GFX11-SAFE-TRUE16: ; %bb.0:
-; GFX11-SAFE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-SAFE-TRUE16-NEXT: v_and_b32_e32 v1, 0x7fff7fff, v2
-; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v0
-; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.h, v1.h, v3.h, vcc_lo
-; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.l, v1.l, v3.l, s0
-; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-SAFE-TRUE16-NEXT: v_pk_add_f16 v0, v4, v0 neg_lo:[0,1] neg_hi:[0,1]
-; GFX11-SAFE-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-SAFE-FAKE16-LABEL: add_select_negfabs_neg_v2f16:
-; GFX11-SAFE-FAKE16: ; %bb.0:
-; GFX11-SAFE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-FAKE16-NEXT: v_lshrrev_b32_e32 v5, 16, v3
-; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-SAFE-FAKE16-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v2
-; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-SAFE-FAKE16-NEXT: v_lshrrev_b32_e32 v6, 16, v2
-; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v1, v6, v5, vcc_lo
-; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v0, v2, v3, vcc_lo
-; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-SAFE-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
-; GFX11-SAFE-FAKE16-NEXT: v_pk_add_f16 v0, v4, v0 neg_lo:[0,1] neg_hi:[0,1]
-; GFX11-SAFE-FAKE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-TRUE16-LABEL: add_select_negfabs_neg_v2f16:
-; GFX11-NSZ-TRUE16: ; %bb.0:
-; GFX11-NSZ-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-NSZ-TRUE16-NEXT: v_and_b32_e32 v1, 0x7fff7fff, v2
-; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v0
-; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.h, v1.h, v3.h, vcc_lo
-; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.l, v1.l, v3.l, s0
-; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NSZ-TRUE16-NEXT: v_pk_add_f16 v0, v4, v0 neg_lo:[0,1] neg_hi:[0,1]
-; GFX11-NSZ-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-FAKE16-LABEL: add_select_negfabs_neg_v2f16:
-; GFX11-NSZ-FAKE16: ; %bb.0:
-; GFX11-NSZ-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-FAKE16-NEXT: v_lshrrev_b32_e32 v5, 16, v3
-; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-NSZ-FAKE16-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v2
-; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NSZ-FAKE16-NEXT: v_lshrrev_b32_e32 v6, 16, v2
-; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v1, v6, v5, vcc_lo
-; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v0, v2, v3, vcc_lo
-; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NSZ-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
-; GFX11-NSZ-FAKE16-NEXT: v_pk_add_f16 v0, v4, v0 neg_lo:[0,1] neg_hi:[0,1]
-; GFX11-NSZ-FAKE16-NEXT: s_setpc_b64 s[30:31]
+; GFX11-TRUE16-LABEL: add_select_negfabs_neg_v2f16:
+; GFX11-TRUE16: ; %bb.0:
+; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-TRUE16-NEXT: v_and_b32_e32 v1, 0x7fff7fff, v2
+; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v0
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.h, v1.h, v3.h, vcc_lo
+; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.l, v1.l, v3.l, s0
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-TRUE16-NEXT: v_pk_add_f16 v0, v4, v0 neg_lo:[0,1] neg_hi:[0,1]
+; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: add_select_negfabs_neg_v2f16:
+; GFX11-FAKE16: ; %bb.0:
+; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v5, 16, v3
+; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-FAKE16-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v2
+; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v6, 16, v2
+; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v1, v6, v5, vcc_lo
+; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v0, v2, v3, vcc_lo
+; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
+; GFX11-FAKE16-NEXT: v_pk_add_f16 v0, v4, v0 neg_lo:[0,1] neg_hi:[0,1]
+; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
%cmp = icmp eq <2 x i32> %c, zeroinitializer
%fabs.x = call <2 x half> @llvm.fabs.v2f16(<2 x half> %x)
%fneg.fabs.x = fneg <2 x half> %fabs.x
@@ -3735,61 +2811,33 @@ define <2 x half> @mul_select_negfabs_posk_v2f16(<2 x i32> %c, <2 x half> %x, <2
; GFX9-NEXT: v_pk_mul_f16 v0, v0, v3
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
-; GFX11-SAFE-TRUE16-LABEL: mul_select_negfabs_posk_v2f16:
-; GFX11-SAFE-TRUE16: ; %bb.0:
-; GFX11-SAFE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-SAFE-TRUE16-NEXT: v_or_b32_e32 v0, 0x80008000, v2
-; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v1
-; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.l, 0x4400, v0.l, vcc_lo
-; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.h, 0x4400, v0.h, s0
-; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-SAFE-TRUE16-NEXT: v_pk_mul_f16 v0, v0, v3
-; GFX11-SAFE-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-SAFE-FAKE16-LABEL: mul_select_negfabs_posk_v2f16:
-; GFX11-SAFE-FAKE16: ; %bb.0:
-; GFX11-SAFE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-FAKE16-NEXT: v_or_b32_e32 v2, 0x80008000, v2
-; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3)
-; GFX11-SAFE-FAKE16-NEXT: v_lshrrev_b32_e32 v4, 16, v2
-; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0x4400, v2, vcc_lo
-; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0x4400, v4, vcc_lo
-; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-SAFE-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
-; GFX11-SAFE-FAKE16-NEXT: v_pk_mul_f16 v0, v0, v3
-; GFX11-SAFE-FAKE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-TRUE16-LABEL: mul_select_negfabs_posk_v2f16:
-; GFX11-NSZ-TRUE16: ; %bb.0:
-; GFX11-NSZ-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-NSZ-TRUE16-NEXT: v_or_b32_e32 v0, 0x80008000, v2
-; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v1
-; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.l, 0x4400, v0.l, vcc_lo
-; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.h, 0x4400, v0.h, s0
-; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NSZ-TRUE16-NEXT: v_pk_mul_f16 v0, v0, v3
-; GFX11-NSZ-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-FAKE16-LABEL: mul_select_negfabs_posk_v2f16:
-; GFX11-NSZ-FAKE16: ; %bb.0:
-; GFX11-NSZ-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-FAKE16-NEXT: v_or_b32_e32 v2, 0x80008000, v2
-; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3)
-; GFX11-NSZ-FAKE16-NEXT: v_lshrrev_b32_e32 v4, 16, v2
-; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0x4400, v2, vcc_lo
-; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0x4400, v4, vcc_lo
-; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NSZ-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
-; GFX11-NSZ-FAKE16-NEXT: v_pk_mul_f16 v0, v0, v3
-; GFX11-NSZ-FAKE16-NEXT: s_setpc_b64 s[30:31]
+; GFX11-TRUE16-LABEL: mul_select_negfabs_posk_v2f16:
+; GFX11-TRUE16: ; %bb.0:
+; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-TRUE16-NEXT: v_or_b32_e32 v0, 0x80008000, v2
+; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v1
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.l, 0x4400, v0.l, vcc_lo
+; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.h, 0x4400, v0.h, s0
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-TRUE16-NEXT: v_pk_mul_f16 v0, v0, v3
+; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: mul_select_negfabs_posk_v2f16:
+; GFX11-FAKE16: ; %bb.0:
+; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT: v_or_b32_e32 v2, 0x80008000, v2
+; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3)
+; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v4, 16, v2
+; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0x4400, v2, vcc_lo
+; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0x4400, v4, vcc_lo
+; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
+; GFX11-FAKE16-NEXT: v_pk_mul_f16 v0, v0, v3
+; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
%cmp = icmp eq <2 x i32> %c, zeroinitializer
%fabs.x = call <2 x half> @llvm.fabs.v2f16(<2 x half> %x)
%fneg.fabs.x = fneg <2 x half> %fabs.x
@@ -3850,61 +2898,33 @@ define <2 x half> @mul_select_posk_negfabs_v2f16(<2 x i32> %c, <2 x half> %x, <2
; GFX9-NEXT: v_pk_mul_f16 v0, v0, v3
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
-; GFX11-SAFE-TRUE16-LABEL: mul_select_posk_negfabs_v2f16:
-; GFX11-SAFE-TRUE16: ; %bb.0:
-; GFX11-SAFE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-TRUE16-NEXT: v_or_b32_e32 v2, 0x80008000, v2
-; GFX11-SAFE-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0
-; GFX11-SAFE-TRUE16-NEXT: v_cmp_ne_u32_e64 s0, 0, v1
-; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.l, 0x4400, v2.l, vcc_lo
-; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.h, 0x4400, v2.h, s0
-; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-SAFE-TRUE16-NEXT: v_pk_mul_f16 v0, v0, v3
-; GFX11-SAFE-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-SAFE-FAKE16-LABEL: mul_select_posk_negfabs_v2f16:
-; GFX11-SAFE-FAKE16: ; %bb.0:
-; GFX11-SAFE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-FAKE16-NEXT: v_or_b32_e32 v2, 0x80008000, v2
-; GFX11-SAFE-FAKE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0
-; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3)
-; GFX11-SAFE-FAKE16-NEXT: v_lshrrev_b32_e32 v4, 16, v2
-; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0x4400, v2, vcc_lo
-; GFX11-SAFE-FAKE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v1
-; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0x4400, v4, vcc_lo
-; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-SAFE-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
-; GFX11-SAFE-FAKE16-NEXT: v_pk_mul_f16 v0, v0, v3
-; GFX11-SAFE-FAKE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-TRUE16-LABEL: mul_select_posk_negfabs_v2f16:
-; GFX11-NSZ-TRUE16: ; %bb.0:
-; GFX11-NSZ-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-TRUE16-NEXT: v_or_b32_e32 v2, 0x80008000, v2
-; GFX11-NSZ-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0
-; GFX11-NSZ-TRUE16-NEXT: v_cmp_ne_u32_e64 s0, 0, v1
-; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.l, 0x4400, v2.l, vcc_lo
-; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.h, 0x4400, v2.h, s0
-; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NSZ-TRUE16-NEXT: v_pk_mul_f16 v0, v0, v3
-; GFX11-NSZ-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-FAKE16-LABEL: mul_select_posk_negfabs_v2f16:
-; GFX11-NSZ-FAKE16: ; %bb.0:
-; GFX11-NSZ-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-FAKE16-NEXT: v_or_b32_e32 v2, 0x80008000, v2
-; GFX11-NSZ-FAKE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0
-; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3)
-; GFX11-NSZ-FAKE16-NEXT: v_lshrrev_b32_e32 v4, 16, v2
-; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0x4400, v2, vcc_lo
-; GFX11-NSZ-FAKE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v1
-; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0x4400, v4, vcc_lo
-; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NSZ-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
-; GFX11-NSZ-FAKE16-NEXT: v_pk_mul_f16 v0, v0, v3
-; GFX11-NSZ-FAKE16-NEXT: s_setpc_b64 s[30:31]
+; GFX11-TRUE16-LABEL: mul_select_posk_negfabs_v2f16:
+; GFX11-TRUE16: ; %bb.0:
+; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT: v_or_b32_e32 v2, 0x80008000, v2
+; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0
+; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e64 s0, 0, v1
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.l, 0x4400, v2.l, vcc_lo
+; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.h, 0x4400, v2.h, s0
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-TRUE16-NEXT: v_pk_mul_f16 v0, v0, v3
+; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: mul_select_posk_negfabs_v2f16:
+; GFX11-FAKE16: ; %bb.0:
+; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT: v_or_b32_e32 v2, 0x80008000, v2
+; GFX11-FAKE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0
+; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3)
+; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v4, 16, v2
+; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0x4400, v2, vcc_lo
+; GFX11-FAKE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v1
+; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0x4400, v4, vcc_lo
+; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
+; GFX11-FAKE16-NEXT: v_pk_mul_f16 v0, v0, v3
+; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
%cmp = icmp eq <2 x i32> %c, zeroinitializer
%fabs.x = call <2 x half> @llvm.fabs.v2f16(<2 x half> %x)
%fneg.fabs.x = fneg <2 x half> %fabs.x
@@ -3965,61 +2985,33 @@ define <2 x half> @mul_select_negfabs_negk_v2f16(<2 x i32> %c, <2 x half> %x, <2
; GFX9-NEXT: v_pk_mul_f16 v0, v0, v3
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
-; GFX11-SAFE-TRUE16-LABEL: mul_select_negfabs_negk_v2f16:
-; GFX11-SAFE-TRUE16: ; %bb.0:
-; GFX11-SAFE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-SAFE-TRUE16-NEXT: v_or_b32_e32 v0, 0x80008000, v2
-; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v1
-; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.l, 0xc400, v0.l, vcc_lo
-; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.h, 0xc400, v0.h, s0
-; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-SAFE-TRUE16-NEXT: v_pk_mul_f16 v0, v0, v3
-; GFX11-SAFE-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-SAFE-FAKE16-LABEL: mul_select_negfabs_negk_v2f16:
-; GFX11-SAFE-FAKE16: ; %bb.0:
-; GFX11-SAFE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-FAKE16-NEXT: v_or_b32_e32 v2, 0x80008000, v2
-; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3)
-; GFX11-SAFE-FAKE16-NEXT: v_lshrrev_b32_e32 v4, 16, v2
-; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0xc400, v2, vcc_lo
-; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0xc400, v4, vcc_lo
-; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-SAFE-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
-; GFX11-SAFE-FAKE16-NEXT: v_pk_mul_f16 v0, v0, v3
-; GFX11-SAFE-FAKE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-TRUE16-LABEL: mul_select_negfabs_negk_v2f16:
-; GFX11-NSZ-TRUE16: ; %bb.0:
-; GFX11-NSZ-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-NSZ-TRUE16-NEXT: v_or_b32_e32 v0, 0x80008000, v2
-; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v1
-; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.l, 0xc400, v0.l, vcc_lo
-; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.h, 0xc400, v0.h, s0
-; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NSZ-TRUE16-NEXT: v_pk_mul_f16 v0, v0, v3
-; GFX11-NSZ-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-FAKE16-LABEL: mul_select_negfabs_negk_v2f16:
-; GFX11-NSZ-FAKE16: ; %bb.0:
-; GFX11-NSZ-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-FAKE16-NEXT: v_or_b32_e32 v2, 0x80008000, v2
-; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3)
-; GFX11-NSZ-FAKE16-NEXT: v_lshrrev_b32_e32 v4, 16, v2
-; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0xc400, v2, vcc_lo
-; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0xc400, v4, vcc_lo
-; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NSZ-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
-; GFX11-NSZ-FAKE16-NEXT: v_pk_mul_f16 v0, v0, v3
-; GFX11-NSZ-FAKE16-NEXT: s_setpc_b64 s[30:31]
+; GFX11-TRUE16-LABEL: mul_select_negfabs_negk_v2f16:
+; GFX11-TRUE16: ; %bb.0:
+; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-TRUE16-NEXT: v_or_b32_e32 v0, 0x80008000, v2
+; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v1
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.l, 0xc400, v0.l, vcc_lo
+; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.h, 0xc400, v0.h, s0
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-TRUE16-NEXT: v_pk_mul_f16 v0, v0, v3
+; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: mul_select_negfabs_negk_v2f16:
+; GFX11-FAKE16: ; %bb.0:
+; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT: v_or_b32_e32 v2, 0x80008000, v2
+; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3)
+; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v4, 16, v2
+; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0xc400, v2, vcc_lo
+; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0xc400, v4, vcc_lo
+; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
+; GFX11-FAKE16-NEXT: v_pk_mul_f16 v0, v0, v3
+; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
%cmp = icmp eq <2 x i32> %c, zeroinitializer
%fabs.x = call <2 x half> @llvm.fabs.v2f16(<2 x half> %x)
%fneg.fabs.x = fneg <2 x half> %fabs.x
@@ -4080,61 +3072,33 @@ define <2 x half> @mul_select_negk_negfabs_v2f16(<2 x i32> %c, <2 x half> %x, <2
; GFX9-NEXT: v_pk_mul_f16 v0, v0, v3
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
-; GFX11-SAFE-TRUE16-LABEL: mul_select_negk_negfabs_v2f16:
-; GFX11-SAFE-TRUE16: ; %bb.0:
-; GFX11-SAFE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-TRUE16-NEXT: v_or_b32_e32 v2, 0x80008000, v2
-; GFX11-SAFE-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0
-; GFX11-SAFE-TRUE16-NEXT: v_cmp_ne_u32_e64 s0, 0, v1
-; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.l, 0xc400, v2.l, vcc_lo
-; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.h, 0xc400, v2.h, s0
-; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-SAFE-TRUE16-NEXT: v_pk_mul_f16 v0, v0, v3
-; GFX11-SAFE-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-SAFE-FAKE16-LABEL: mul_select_negk_negfabs_v2f16:
-; GFX11-SAFE-FAKE16: ; %bb.0:
-; GFX11-SAFE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-FAKE16-NEXT: v_or_b32_e32 v2, 0x80008000, v2
-; GFX11-SAFE-FAKE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0
-; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3)
-; GFX11-SAFE-FAKE16-NEXT: v_lshrrev_b32_e32 v4, 16, v2
-; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0xc400, v2, vcc_lo
-; GFX11-SAFE-FAKE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v1
-; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0xc400, v4, vcc_lo
-; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-SAFE-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
-; GFX11-SAFE-FAKE16-NEXT: v_pk_mul_f16 v0, v0, v3
-; GFX11-SAFE-FAKE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-TRUE16-LABEL: mul_select_negk_negfabs_v2f16:
-; GFX11-NSZ-TRUE16: ; %bb.0:
-; GFX11-NSZ-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-TRUE16-NEXT: v_or_b32_e32 v2, 0x80008000, v2
-; GFX11-NSZ-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0
-; GFX11-NSZ-TRUE16-NEXT: v_cmp_ne_u32_e64 s0, 0, v1
-; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.l, 0xc400, v2.l, vcc_lo
-; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.h, 0xc400, v2.h, s0
-; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NSZ-TRUE16-NEXT: v_pk_mul_f16 v0, v0, v3
-; GFX11-NSZ-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-FAKE16-LABEL: mul_select_negk_negfabs_v2f16:
-; GFX11-NSZ-FAKE16: ; %bb.0:
-; GFX11-NSZ-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-FAKE16-NEXT: v_or_b32_e32 v2, 0x80008000, v2
-; GFX11-NSZ-FAKE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0
-; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3)
-; GFX11-NSZ-FAKE16-NEXT: v_lshrrev_b32_e32 v4, 16, v2
-; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0xc400, v2, vcc_lo
-; GFX11-NSZ-FAKE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v1
-; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0xc400, v4, vcc_lo
-; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NSZ-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
-; GFX11-NSZ-FAKE16-NEXT: v_pk_mul_f16 v0, v0, v3
-; GFX11-NSZ-FAKE16-NEXT: s_setpc_b64 s[30:31]
+; GFX11-TRUE16-LABEL: mul_select_negk_negfabs_v2f16:
+; GFX11-TRUE16: ; %bb.0:
+; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT: v_or_b32_e32 v2, 0x80008000, v2
+; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0
+; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e64 s0, 0, v1
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.l, 0xc400, v2.l, vcc_lo
+; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.h, 0xc400, v2.h, s0
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-TRUE16-NEXT: v_pk_mul_f16 v0, v0, v3
+; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: mul_select_negk_negfabs_v2f16:
+; GFX11-FAKE16: ; %bb.0:
+; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT: v_or_b32_e32 v2, 0x80008000, v2
+; GFX11-FAKE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0
+; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3)
+; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v4, 16, v2
+; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0xc400, v2, vcc_lo
+; GFX11-FAKE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v1
+; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0xc400, v4, vcc_lo
+; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
+; GFX11-FAKE16-NEXT: v_pk_mul_f16 v0, v0, v3
+; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
%cmp = icmp eq <2 x i32> %c, zeroinitializer
%fabs.x = call <2 x half> @llvm.fabs.v2f16(<2 x half> %x)
%fneg.fabs.x = fneg <2 x half> %fabs.x
@@ -4171,115 +3135,63 @@ define <2 x half> @select_fneg_posk_src_add_v2f16(<2 x i32> %c, <2 x half> %x, <
; CI-NEXT: v_cndmask_b32_e32 v1, 2.0, v2, vcc
; CI-NEXT: s_setpc_b64 s[30:31]
;
-; VI-SAFE-LABEL: select_fneg_posk_src_add_v2f16:
-; VI-SAFE: ; %bb.0:
-; VI-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; VI-SAFE-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
-; VI-SAFE-NEXT: v_mov_b32_e32 v1, 0x4400
-; VI-SAFE-NEXT: v_add_f16_sdwa v1, v2, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
-; VI-SAFE-NEXT: v_add_f16_e32 v2, 4.0, v2
-; VI-SAFE-NEXT: v_or_b32_e32 v1, v2, v1
-; VI-SAFE-NEXT: v_xor_b32_e32 v1, 0x80008000, v1
-; VI-SAFE-NEXT: v_mov_b32_e32 v2, 0x4000
-; VI-SAFE-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v0
-; VI-SAFE-NEXT: v_cndmask_b32_e64 v0, v2, v1, s[4:5]
-; VI-SAFE-NEXT: v_cndmask_b32_sdwa v1, v2, v1, vcc dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
-; VI-SAFE-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
-; VI-SAFE-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX9-SAFE-LABEL: select_fneg_posk_src_add_v2f16:
-; GFX9-SAFE: ; %bb.0:
-; GFX9-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-SAFE-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
-; GFX9-SAFE-NEXT: v_pk_add_f16 v1, v2, 4.0 op_sel_hi:[1,0]
-; GFX9-SAFE-NEXT: v_xor_b32_e32 v1, 0x80008000, v1
-; GFX9-SAFE-NEXT: v_mov_b32_e32 v2, 0x4000
-; GFX9-SAFE-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v0
-; GFX9-SAFE-NEXT: v_cndmask_b32_e64 v0, v2, v1, s[4:5]
-; GFX9-SAFE-NEXT: v_cndmask_b32_sdwa v1, v2, v1, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
-; GFX9-SAFE-NEXT: s_mov_b32 s4, 0x5040100
-; GFX9-SAFE-NEXT: v_perm_b32 v0, v1, v0, s4
-; GFX9-SAFE-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-SAFE-TRUE16-LABEL: select_fneg_posk_src_add_v2f16:
-; GFX11-SAFE-TRUE16: ; %bb.0:
-; GFX11-SAFE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-TRUE16-NEXT: v_pk_add_f16 v2, v2, 4.0 op_sel_hi:[1,0]
-; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v1
-; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-SAFE-TRUE16-NEXT: v_xor_b32_e32 v0, 0x80008000, v2
-; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.l, 0x4000, v0.l, vcc_lo
-; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2)
-; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.h, 0x4000, v0.h, s0
-; GFX11-SAFE-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-SAFE-FAKE16-LABEL: select_fneg_posk_src_add_v2f16:
-; GFX11-SAFE-FAKE16: ; %bb.0:
-; GFX11-SAFE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-FAKE16-NEXT: v_pk_add_f16 v2, v2, 4.0 op_sel_hi:[1,0]
-; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-SAFE-FAKE16-NEXT: v_xor_b32_e32 v2, 0x80008000, v2
-; GFX11-SAFE-FAKE16-NEXT: v_lshrrev_b32_e32 v3, 16, v2
-; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0x4000, v2, vcc_lo
-; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0x4000, v3, vcc_lo
-; GFX11-SAFE-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
-; GFX11-SAFE-FAKE16-NEXT: s_setpc_b64 s[30:31]
-;
-; VI-NSZ-LABEL: select_fneg_posk_src_add_v2f16:
-; VI-NSZ: ; %bb.0:
-; VI-NSZ-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; VI-NSZ-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
-; VI-NSZ-NEXT: v_mov_b32_e32 v1, 0xc400
-; VI-NSZ-NEXT: v_sub_f16_sdwa v1, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
-; VI-NSZ-NEXT: v_sub_f16_e32 v2, -4.0, v2
-; VI-NSZ-NEXT: v_mov_b32_e32 v3, 0x4000
-; VI-NSZ-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v0
-; VI-NSZ-NEXT: v_cndmask_b32_e64 v0, v3, v2, s[4:5]
-; VI-NSZ-NEXT: v_cndmask_b32_sdwa v1, v3, v1, vcc dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
-; VI-NSZ-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
-; VI-NSZ-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX9-NSZ-LABEL: select_fneg_posk_src_add_v2f16:
-; GFX9-NSZ: ; %bb.0:
-; GFX9-NSZ-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NSZ-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
-; GFX9-NSZ-NEXT: v_pk_add_f16 v1, v2, -4.0 op_sel_hi:[1,0] neg_lo:[1,0] neg_hi:[1,0]
-; GFX9-NSZ-NEXT: v_mov_b32_e32 v2, 0x4000
-; GFX9-NSZ-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v0
-; GFX9-NSZ-NEXT: v_cndmask_b32_e64 v0, v2, v1, s[4:5]
-; GFX9-NSZ-NEXT: v_cndmask_b32_sdwa v1, v2, v1, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
-; GFX9-NSZ-NEXT: s_mov_b32 s4, 0x5040100
-; GFX9-NSZ-NEXT: v_perm_b32 v0, v1, v0, s4
-; GFX9-NSZ-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-TRUE16-LABEL: select_fneg_posk_src_add_v2f16:
-; GFX11-NSZ-TRUE16: ; %bb.0:
-; GFX11-NSZ-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-NSZ-TRUE16-NEXT: v_pk_add_f16 v0, v2, -4.0 op_sel_hi:[1,0] neg_lo:[1,0] neg_hi:[1,0]
-; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v1
-; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.l, 0x4000, v0.l, vcc_lo
-; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.h, 0x4000, v0.h, s0
-; GFX11-NSZ-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-FAKE16-LABEL: select_fneg_posk_src_add_v2f16:
-; GFX11-NSZ-FAKE16: ; %bb.0:
-; GFX11-NSZ-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-FAKE16-NEXT: v_pk_add_f16 v2, v2, -4.0 op_sel_hi:[1,0] neg_lo:[1,0] neg_hi:[1,0]
-; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3)
-; GFX11-NSZ-FAKE16-NEXT: v_lshrrev_b32_e32 v3, 16, v2
-; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0x4000, v2, vcc_lo
-; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0x4000, v3, vcc_lo
-; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NSZ-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
-; GFX11-NSZ-FAKE16-NEXT: s_setpc_b64 s[30:31]
+; VI-LABEL: select_fneg_posk_src_add_v2f16:
+; VI: ; %bb.0:
+; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
+; VI-NEXT: v_mov_b32_e32 v1, 0x4400
+; VI-NEXT: v_add_f16_sdwa v1, v2, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
+; VI-NEXT: v_add_f16_e32 v2, 4.0, v2
+; VI-NEXT: v_or_b32_e32 v1, v2, v1
+; VI-NEXT: v_xor_b32_e32 v1, 0x80008000, v1
+; VI-NEXT: v_mov_b32_e32 v2, 0x4000
+; VI-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v0
+; VI-NEXT: v_cndmask_b32_e64 v0, v2, v1, s[4:5]
+; VI-NEXT: v_cndmask_b32_sdwa v1, v2, v1, vcc dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
+; VI-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
+; VI-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-LABEL: select_fneg_posk_src_add_v2f16:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
+; GFX9-NEXT: v_pk_add_f16 v1, v2, 4.0 op_sel_hi:[1,0]
+; GFX9-NEXT: v_xor_b32_e32 v1, 0x80008000, v1
+; GFX9-NEXT: v_mov_b32_e32 v2, 0x4000
+; GFX9-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v0
+; GFX9-NEXT: v_cndmask_b32_e64 v0, v2, v1, s[4:5]
+; GFX9-NEXT: v_cndmask_b32_sdwa v1, v2, v1, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
+; GFX9-NEXT: s_mov_b32 s4, 0x5040100
+; GFX9-NEXT: v_perm_b32 v0, v1, v0, s4
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-TRUE16-LABEL: select_fneg_posk_src_add_v2f16:
+; GFX11-TRUE16: ; %bb.0:
+; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT: v_pk_add_f16 v2, v2, 4.0 op_sel_hi:[1,0]
+; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v1
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-TRUE16-NEXT: v_xor_b32_e32 v0, 0x80008000, v2
+; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.l, 0x4000, v0.l, vcc_lo
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2)
+; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.h, 0x4000, v0.h, s0
+; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: select_fneg_posk_src_add_v2f16:
+; GFX11-FAKE16: ; %bb.0:
+; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT: v_pk_add_f16 v2, v2, 4.0 op_sel_hi:[1,0]
+; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-FAKE16-NEXT: v_xor_b32_e32 v2, 0x80008000, v2
+; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v3, 16, v2
+; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0x4000, v2, vcc_lo
+; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0x4000, v3, vcc_lo
+; GFX11-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
+; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
%cmp = icmp eq <2 x i32> %c, zeroinitializer
%add = fadd <2 x half> %x, <half 4.0, half 4.0>
%fneg = fneg <2 x half> %add
@@ -4330,55 +3242,30 @@ define <2 x half> @select_fneg_posk_src_add_v2f16_nsz(<2 x i32> %c, <2 x half> %
; GFX9-NEXT: v_perm_b32 v0, v1, v0, s4
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
-; GFX11-SAFE-TRUE16-LABEL: select_fneg_posk_src_add_v2f16_nsz:
-; GFX11-SAFE-TRUE16: ; %bb.0:
-; GFX11-SAFE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-SAFE-TRUE16-NEXT: v_pk_add_f16 v0, v2, -4.0 op_sel_hi:[1,0] neg_lo:[1,0] neg_hi:[1,0]
-; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v1
-; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.l, 0x4000, v0.l, vcc_lo
-; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.h, 0x4000, v0.h, s0
-; GFX11-SAFE-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-SAFE-FAKE16-LABEL: select_fneg_posk_src_add_v2f16_nsz:
-; GFX11-SAFE-FAKE16: ; %bb.0:
-; GFX11-SAFE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-FAKE16-NEXT: v_pk_add_f16 v2, v2, -4.0 op_sel_hi:[1,0] neg_lo:[1,0] neg_hi:[1,0]
-; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3)
-; GFX11-SAFE-FAKE16-NEXT: v_lshrrev_b32_e32 v3, 16, v2
-; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0x4000, v2, vcc_lo
-; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0x4000, v3, vcc_lo
-; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-SAFE-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
-; GFX11-SAFE-FAKE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-TRUE16-LABEL: select_fneg_posk_src_add_v2f16_nsz:
-; GFX11-NSZ-TRUE16: ; %bb.0:
-; GFX11-NSZ-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-NSZ-TRUE16-NEXT: v_pk_add_f16 v0, v2, -4.0 op_sel_hi:[1,0] neg_lo:[1,0] neg_hi:[1,0]
-; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v1
-; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.l, 0x4000, v0.l, vcc_lo
-; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.h, 0x4000, v0.h, s0
-; GFX11-NSZ-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-FAKE16-LABEL: select_fneg_posk_src_add_v2f16_nsz:
-; GFX11-NSZ-FAKE16: ; %bb.0:
-; GFX11-NSZ-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-FAKE16-NEXT: v_pk_add_f16 v2, v2, -4.0 op_sel_hi:[1,0] neg_lo:[1,0] neg_hi:[1,0]
-; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3)
-; GFX11-NSZ-FAKE16-NEXT: v_lshrrev_b32_e32 v3, 16, v2
-; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0x4000, v2, vcc_lo
-; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0x4000, v3, vcc_lo
-; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NSZ-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
-; GFX11-NSZ-FAKE16-NEXT: s_setpc_b64 s[30:31]
+; GFX11-TRUE16-LABEL: select_fneg_posk_src_add_v2f16_nsz:
+; GFX11-TRUE16: ; %bb.0:
+; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-TRUE16-NEXT: v_pk_add_f16 v0, v2, -4.0 op_sel_hi:[1,0] neg_lo:[1,0] neg_hi:[1,0]
+; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v1
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.l, 0x4000, v0.l, vcc_lo
+; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.h, 0x4000, v0.h, s0
+; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: select_fneg_posk_src_add_v2f16_nsz:
+; GFX11-FAKE16: ; %bb.0:
+; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT: v_pk_add_f16 v2, v2, -4.0 op_sel_hi:[1,0] neg_lo:[1,0] neg_hi:[1,0]
+; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3)
+; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v3, 16, v2
+; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0x4000, v2, vcc_lo
+; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0x4000, v3, vcc_lo
+; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
+; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
%cmp = icmp eq <2 x i32> %c, zeroinitializer
%add = fadd nsz <2 x half> %x, <half 4.0, half 4.0>
%fneg = fneg <2 x half> %add
@@ -4387,153 +3274,86 @@ define <2 x half> @select_fneg_posk_src_add_v2f16_nsz(<2 x i32> %c, <2 x half> %
}
define <2 x half> @select_fneg_posk_src_sub_v2f16(<2 x i32> %c, <2 x half> %x) {
-; CI-SAFE-LABEL: select_fneg_posk_src_sub_v2f16:
-; CI-SAFE: ; %bb.0:
-; CI-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; CI-SAFE-NEXT: v_cvt_f16_f32_e32 v3, v3
-; CI-SAFE-NEXT: v_cvt_f16_f32_e32 v2, v2
-; CI-SAFE-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
-; CI-SAFE-NEXT: v_cvt_f32_f16_e32 v3, v3
-; CI-SAFE-NEXT: v_cvt_f32_f16_e32 v2, v2
-; CI-SAFE-NEXT: v_add_f32_e32 v3, -4.0, v3
-; CI-SAFE-NEXT: v_add_f32_e32 v2, -4.0, v2
-; CI-SAFE-NEXT: v_cvt_f16_f32_e32 v3, v3
-; CI-SAFE-NEXT: v_cvt_f16_f32_e32 v2, v2
-; CI-SAFE-NEXT: v_lshlrev_b32_e32 v3, 16, v3
-; CI-SAFE-NEXT: v_or_b32_e32 v2, v2, v3
-; CI-SAFE-NEXT: v_xor_b32_e32 v2, 0x80008000, v2
-; CI-SAFE-NEXT: v_cvt_f32_f16_e32 v3, v2
-; CI-SAFE-NEXT: v_lshrrev_b32_e32 v2, 16, v2
-; CI-SAFE-NEXT: v_cvt_f32_f16_e32 v2, v2
-; CI-SAFE-NEXT: v_cndmask_b32_e32 v0, 2.0, v3, vcc
-; CI-SAFE-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
-; CI-SAFE-NEXT: v_cndmask_b32_e32 v1, 2.0, v2, vcc
-; CI-SAFE-NEXT: s_setpc_b64 s[30:31]
-;
-; VI-SAFE-LABEL: select_fneg_posk_src_sub_v2f16:
-; VI-SAFE: ; %bb.0:
-; VI-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; VI-SAFE-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
-; VI-SAFE-NEXT: v_mov_b32_e32 v1, 0xc400
-; VI-SAFE-NEXT: v_add_f16_sdwa v1, v2, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
-; VI-SAFE-NEXT: v_add_f16_e32 v2, -4.0, v2
-; VI-SAFE-NEXT: v_or_b32_e32 v1, v2, v1
-; VI-SAFE-NEXT: v_xor_b32_e32 v1, 0x80008000, v1
-; VI-SAFE-NEXT: v_mov_b32_e32 v2, 0x4000
-; VI-SAFE-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v0
-; VI-SAFE-NEXT: v_cndmask_b32_e64 v0, v2, v1, s[4:5]
-; VI-SAFE-NEXT: v_cndmask_b32_sdwa v1, v2, v1, vcc dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
-; VI-SAFE-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
-; VI-SAFE-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX9-SAFE-LABEL: select_fneg_posk_src_sub_v2f16:
-; GFX9-SAFE: ; %bb.0:
-; GFX9-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-SAFE-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
-; GFX9-SAFE-NEXT: v_pk_add_f16 v1, v2, -4.0 op_sel_hi:[1,0]
-; GFX9-SAFE-NEXT: v_xor_b32_e32 v1, 0x80008000, v1
-; GFX9-SAFE-NEXT: v_mov_b32_e32 v2, 0x4000
-; GFX9-SAFE-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v0
-; GFX9-SAFE-NEXT: v_cndmask_b32_e64 v0, v2, v1, s[4:5]
-; GFX9-SAFE-NEXT: v_cndmask_b32_sdwa v1, v2, v1, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
-; GFX9-SAFE-NEXT: s_mov_b32 s4, 0x5040100
-; GFX9-SAFE-NEXT: v_perm_b32 v0, v1, v0, s4
-; GFX9-SAFE-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-SAFE-TRUE16-LABEL: select_fneg_posk_src_sub_v2f16:
-; GFX11-SAFE-TRUE16: ; %bb.0:
-; GFX11-SAFE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-TRUE16-NEXT: v_pk_add_f16 v2, v2, -4.0 op_sel_hi:[1,0]
-; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v1
-; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-SAFE-TRUE16-NEXT: v_xor_b32_e32 v0, 0x80008000, v2
-; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.l, 0x4000, v0.l, vcc_lo
-; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2)
-; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.h, 0x4000, v0.h, s0
-; GFX11-SAFE-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-SAFE-FAKE16-LABEL: select_fneg_posk_src_sub_v2f16:
-; GFX11-SAFE-FAKE16: ; %bb.0:
-; GFX11-SAFE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-FAKE16-NEXT: v_pk_add_f16 v2, v2, -4.0 op_sel_hi:[1,0]
-; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-SAFE-FAKE16-NEXT: v_xor_b32_e32 v2, 0x80008000, v2
-; GFX11-SAFE-FAKE16-NEXT: v_lshrrev_b32_e32 v3, 16, v2
-; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0x4000, v2, vcc_lo
-; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0x4000, v3, vcc_lo
-; GFX11-SAFE-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
-; GFX11-SAFE-FAKE16-NEXT: s_setpc_b64 s[30:31]
-;
-; CI-NSZ-LABEL: select_fneg_posk_src_sub_v2f16:
-; CI-NSZ: ; %bb.0:
-; CI-NSZ-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; CI-NSZ-NEXT: v_cvt_f16_f32_e32 v2, v2
-; CI-NSZ-NEXT: v_cvt_f16_f32_e32 v3, v3
-; CI-NSZ-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
-; CI-NSZ-NEXT: v_cvt_f32_f16_e32 v2, v2
-; CI-NSZ-NEXT: v_cvt_f32_f16_e32 v3, v3
-; CI-NSZ-NEXT: v_sub_f32_e32 v2, 4.0, v2
-; CI-NSZ-NEXT: v_sub_f32_e32 v3, 4.0, v3
-; CI-NSZ-NEXT: v_cndmask_b32_e32 v0, 2.0, v2, vcc
-; CI-NSZ-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
-; CI-NSZ-NEXT: v_cndmask_b32_e32 v1, 2.0, v3, vcc
-; CI-NSZ-NEXT: s_setpc_b64 s[30:31]
-;
-; VI-NSZ-LABEL: select_fneg_posk_src_sub_v2f16:
-; VI-NSZ: ; %bb.0:
-; VI-NSZ-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; VI-NSZ-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
-; VI-NSZ-NEXT: v_mov_b32_e32 v1, 0x4400
-; VI-NSZ-NEXT: v_sub_f16_sdwa v1, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
-; VI-NSZ-NEXT: v_sub_f16_e32 v2, 4.0, v2
-; VI-NSZ-NEXT: v_mov_b32_e32 v3, 0x4000
-; VI-NSZ-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v0
-; VI-NSZ-NEXT: v_cndmask_b32_e64 v0, v3, v2, s[4:5]
-; VI-NSZ-NEXT: v_cndmask_b32_sdwa v1, v3, v1, vcc dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
-; VI-NSZ-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
-; VI-NSZ-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX9-NSZ-LABEL: select_fneg_posk_src_sub_v2f16:
-; GFX9-NSZ: ; %bb.0:
-; GFX9-NSZ-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NSZ-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
-; GFX9-NSZ-NEXT: v_pk_add_f16 v1, v2, 4.0 op_sel_hi:[1,0] neg_lo:[1,0] neg_hi:[1,0]
-; GFX9-NSZ-NEXT: v_mov_b32_e32 v2, 0x4000
-; GFX9-NSZ-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v0
-; GFX9-NSZ-NEXT: v_cndmask_b32_e64 v0, v2, v1, s[4:5]
-; GFX9-NSZ-NEXT: v_cndmask_b32_sdwa v1, v2, v1, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
-; GFX9-NSZ-NEXT: s_mov_b32 s4, 0x5040100
-; GFX9-NSZ-NEXT: v_perm_b32 v0, v1, v0, s4
-; GFX9-NSZ-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-TRUE16-LABEL: select_fneg_posk_src_sub_v2f16:
-; GFX11-NSZ-TRUE16: ; %bb.0:
-; GFX11-NSZ-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-NSZ-TRUE16-NEXT: v_pk_add_f16 v0, v2, 4.0 op_sel_hi:[1,0] neg_lo:[1,0] neg_hi:[1,0]
-; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v1
-; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.l, 0x4000, v0.l, vcc_lo
-; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.h, 0x4000, v0.h, s0
-; GFX11-NSZ-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-FAKE16-LABEL: select_fneg_posk_src_sub_v2f16:
-; GFX11-NSZ-FAKE16: ; %bb.0:
-; GFX11-NSZ-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-FAKE16-NEXT: v_pk_add_f16 v2, v2, 4.0 op_sel_hi:[1,0] neg_lo:[1,0] neg_hi:[1,0]
-; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3)
-; GFX11-NSZ-FAKE16-NEXT: v_lshrrev_b32_e32 v3, 16, v2
-; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0x4000, v2, vcc_lo
-; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0x4000, v3, vcc_lo
-; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NSZ-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
-; GFX11-NSZ-FAKE16-NEXT: s_setpc_b64 s[30:31]
+; CI-LABEL: select_fneg_posk_src_sub_v2f16:
+; CI: ; %bb.0:
+; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CI-NEXT: v_cvt_f16_f32_e32 v3, v3
+; CI-NEXT: v_cvt_f16_f32_e32 v2, v2
+; CI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
+; CI-NEXT: v_cvt_f32_f16_e32 v3, v3
+; CI-NEXT: v_cvt_f32_f16_e32 v2, v2
+; CI-NEXT: v_add_f32_e32 v3, -4.0, v3
+; CI-NEXT: v_add_f32_e32 v2, -4.0, v2
+; CI-NEXT: v_cvt_f16_f32_e32 v3, v3
+; CI-NEXT: v_cvt_f16_f32_e32 v2, v2
+; CI-NEXT: v_lshlrev_b32_e32 v3, 16, v3
+; CI-NEXT: v_or_b32_e32 v2, v2, v3
+; CI-NEXT: v_xor_b32_e32 v2, 0x80008000, v2
+; CI-NEXT: v_cvt_f32_f16_e32 v3, v2
+; CI-NEXT: v_lshrrev_b32_e32 v2, 16, v2
+; CI-NEXT: v_cvt_f32_f16_e32 v2, v2
+; CI-NEXT: v_cndmask_b32_e32 v0, 2.0, v3, vcc
+; CI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
+; CI-NEXT: v_cndmask_b32_e32 v1, 2.0, v2, vcc
+; CI-NEXT: s_setpc_b64 s[30:31]
+;
+; VI-LABEL: select_fneg_posk_src_sub_v2f16:
+; VI: ; %bb.0:
+; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
+; VI-NEXT: v_mov_b32_e32 v1, 0xc400
+; VI-NEXT: v_add_f16_sdwa v1, v2, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
+; VI-NEXT: v_add_f16_e32 v2, -4.0, v2
+; VI-NEXT: v_or_b32_e32 v1, v2, v1
+; VI-NEXT: v_xor_b32_e32 v1, 0x80008000, v1
+; VI-NEXT: v_mov_b32_e32 v2, 0x4000
+; VI-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v0
+; VI-NEXT: v_cndmask_b32_e64 v0, v2, v1, s[4:5]
+; VI-NEXT: v_cndmask_b32_sdwa v1, v2, v1, vcc dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
+; VI-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
+; VI-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-LABEL: select_fneg_posk_src_sub_v2f16:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
+; GFX9-NEXT: v_pk_add_f16 v1, v2, -4.0 op_sel_hi:[1,0]
+; GFX9-NEXT: v_xor_b32_e32 v1, 0x80008000, v1
+; GFX9-NEXT: v_mov_b32_e32 v2, 0x4000
+; GFX9-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v0
+; GFX9-NEXT: v_cndmask_b32_e64 v0, v2, v1, s[4:5]
+; GFX9-NEXT: v_cndmask_b32_sdwa v1, v2, v1, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
+; GFX9-NEXT: s_mov_b32 s4, 0x5040100
+; GFX9-NEXT: v_perm_b32 v0, v1, v0, s4
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-TRUE16-LABEL: select_fneg_posk_src_sub_v2f16:
+; GFX11-TRUE16: ; %bb.0:
+; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT: v_pk_add_f16 v2, v2, -4.0 op_sel_hi:[1,0]
+; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v1
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-TRUE16-NEXT: v_xor_b32_e32 v0, 0x80008000, v2
+; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.l, 0x4000, v0.l, vcc_lo
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2)
+; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.h, 0x4000, v0.h, s0
+; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: select_fneg_posk_src_sub_v2f16:
+; GFX11-FAKE16: ; %bb.0:
+; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT: v_pk_add_f16 v2, v2, -4.0 op_sel_hi:[1,0]
+; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-FAKE16-NEXT: v_xor_b32_e32 v2, 0x80008000, v2
+; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v3, 16, v2
+; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0x4000, v2, vcc_lo
+; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0x4000, v3, vcc_lo
+; GFX11-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
+; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
%cmp = icmp eq <2 x i32> %c, zeroinitializer
%add = fsub <2 x half> %x, <half 4.0, half 4.0>
%fneg = fneg <2 x half> %add
@@ -4541,6 +3361,80 @@ define <2 x half> @select_fneg_posk_src_sub_v2f16(<2 x i32> %c, <2 x half> %x) {
ret <2 x half> %select
}
+define <2 x half> @select_fneg_posk_src_sub_v2f16_nsz(<2 x i32> %c, <2 x half> %x) {
+; CI-LABEL: select_fneg_posk_src_sub_v2f16_nsz:
+; CI: ; %bb.0:
+; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CI-NEXT: v_cvt_f16_f32_e32 v2, v2
+; CI-NEXT: v_cvt_f16_f32_e32 v3, v3
+; CI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
+; CI-NEXT: v_cvt_f32_f16_e32 v2, v2
+; CI-NEXT: v_cvt_f32_f16_e32 v3, v3
+; CI-NEXT: v_sub_f32_e32 v2, 4.0, v2
+; CI-NEXT: v_sub_f32_e32 v3, 4.0, v3
+; CI-NEXT: v_cndmask_b32_e32 v0, 2.0, v2, vcc
+; CI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
+; CI-NEXT: v_cndmask_b32_e32 v1, 2.0, v3, vcc
+; CI-NEXT: s_setpc_b64 s[30:31]
+;
+; VI-LABEL: select_fneg_posk_src_sub_v2f16_nsz:
+; VI: ; %bb.0:
+; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
+; VI-NEXT: v_mov_b32_e32 v1, 0x4400
+; VI-NEXT: v_sub_f16_sdwa v1, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
+; VI-NEXT: v_sub_f16_e32 v2, 4.0, v2
+; VI-NEXT: v_mov_b32_e32 v3, 0x4000
+; VI-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v0
+; VI-NEXT: v_cndmask_b32_e64 v0, v3, v2, s[4:5]
+; VI-NEXT: v_cndmask_b32_sdwa v1, v3, v1, vcc dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; VI-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
+; VI-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-LABEL: select_fneg_posk_src_sub_v2f16_nsz:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
+; GFX9-NEXT: v_pk_add_f16 v1, v2, 4.0 op_sel_hi:[1,0] neg_lo:[1,0] neg_hi:[1,0]
+; GFX9-NEXT: v_mov_b32_e32 v2, 0x4000
+; GFX9-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v0
+; GFX9-NEXT: v_cndmask_b32_e64 v0, v2, v1, s[4:5]
+; GFX9-NEXT: v_cndmask_b32_sdwa v1, v2, v1, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
+; GFX9-NEXT: s_mov_b32 s4, 0x5040100
+; GFX9-NEXT: v_perm_b32 v0, v1, v0, s4
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-TRUE16-LABEL: select_fneg_posk_src_sub_v2f16_nsz:
+; GFX11-TRUE16: ; %bb.0:
+; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-TRUE16-NEXT: v_pk_add_f16 v0, v2, 4.0 op_sel_hi:[1,0] neg_lo:[1,0] neg_hi:[1,0]
+; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v1
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.l, 0x4000, v0.l, vcc_lo
+; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.h, 0x4000, v0.h, s0
+; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: select_fneg_posk_src_sub_v2f16_nsz:
+; GFX11-FAKE16: ; %bb.0:
+; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT: v_pk_add_f16 v2, v2, 4.0 op_sel_hi:[1,0] neg_lo:[1,0] neg_hi:[1,0]
+; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3)
+; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v3, 16, v2
+; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0x4000, v2, vcc_lo
+; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0x4000, v3, vcc_lo
+; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
+; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
+ %cmp = icmp eq <2 x i32> %c, zeroinitializer
+ %add = fsub <2 x half> %x, <half 4.0, half 4.0>
+ %fneg = fneg nsz <2 x half> %add
+ %select = select <2 x i1> %cmp, <2 x half> %fneg, <2 x half> <half 2.0, half 2.0>
+ ret <2 x half> %select
+}
+
define <2 x half> @select_fneg_posk_src_mul_v2f16(<2 x i32> %c, <2 x half> %x) {
; CI-LABEL: select_fneg_posk_src_mul_v2f16:
; CI: ; %bb.0:
@@ -4584,55 +3478,30 @@ define <2 x half> @select_fneg_posk_src_mul_v2f16(<2 x i32> %c, <2 x half> %x) {
; GFX9-NEXT: v_perm_b32 v0, v1, v0, s4
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
-; GFX11-SAFE-TRUE16-LABEL: select_fneg_posk_src_mul_v2f16:
-; GFX11-SAFE-TRUE16: ; %bb.0:
-; GFX11-SAFE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-SAFE-TRUE16-NEXT: v_pk_mul_f16 v0, v2, -4.0 op_sel_hi:[1,0]
-; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v1
-; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.l, 0x4000, v0.l, vcc_lo
-; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.h, 0x4000, v0.h, s0
-; GFX11-SAFE-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-SAFE-FAKE16-LABEL: select_fneg_posk_src_mul_v2f16:
-; GFX11-SAFE-FAKE16: ; %bb.0:
-; GFX11-SAFE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-FAKE16-NEXT: v_pk_mul_f16 v2, v2, -4.0 op_sel_hi:[1,0]
-; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3)
-; GFX11-SAFE-FAKE16-NEXT: v_lshrrev_b32_e32 v3, 16, v2
-; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0x4000, v2, vcc_lo
-; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0x4000, v3, vcc_lo
-; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-SAFE-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
-; GFX11-SAFE-FAKE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-TRUE16-LABEL: select_fneg_posk_src_mul_v2f16:
-; GFX11-NSZ-TRUE16: ; %bb.0:
-; GFX11-NSZ-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-NSZ-TRUE16-NEXT: v_pk_mul_f16 v0, v2, -4.0 op_sel_hi:[1,0]
-; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v1
-; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.l, 0x4000, v0.l, vcc_lo
-; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.h, 0x4000, v0.h, s0
-; GFX11-NSZ-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-FAKE16-LABEL: select_fneg_posk_src_mul_v2f16:
-; GFX11-NSZ-FAKE16: ; %bb.0:
-; GFX11-NSZ-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-FAKE16-NEXT: v_pk_mul_f16 v2, v2, -4.0 op_sel_hi:[1,0]
-; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3)
-; GFX11-NSZ-FAKE16-NEXT: v_lshrrev_b32_e32 v3, 16, v2
-; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0x4000, v2, vcc_lo
-; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0x4000, v3, vcc_lo
-; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NSZ-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
-; GFX11-NSZ-FAKE16-NEXT: s_setpc_b64 s[30:31]
+; GFX11-TRUE16-LABEL: select_fneg_posk_src_mul_v2f16:
+; GFX11-TRUE16: ; %bb.0:
+; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-TRUE16-NEXT: v_pk_mul_f16 v0, v2, -4.0 op_sel_hi:[1,0]
+; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v1
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.l, 0x4000, v0.l, vcc_lo
+; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.h, 0x4000, v0.h, s0
+; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: select_fneg_posk_src_mul_v2f16:
+; GFX11-FAKE16: ; %bb.0:
+; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT: v_pk_mul_f16 v2, v2, -4.0 op_sel_hi:[1,0]
+; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3)
+; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v3, 16, v2
+; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0x4000, v2, vcc_lo
+; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0x4000, v3, vcc_lo
+; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
+; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
%cmp = icmp eq <2 x i32> %c, zeroinitializer
%mul = fmul <2 x half> %x, <half 4.0, half 4.0>
%fneg = fneg <2 x half> %mul
@@ -4668,118 +3537,65 @@ define <2 x half> @select_fneg_posk_src_fma_v2f16(<2 x i32> %c, <2 x half> %x, <
; CI-NEXT: v_cndmask_b32_e32 v1, 2.0, v2, vcc
; CI-NEXT: s_setpc_b64 s[30:31]
;
-; VI-SAFE-LABEL: select_fneg_posk_src_fma_v2f16:
-; VI-SAFE: ; %bb.0:
-; VI-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; VI-SAFE-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
-; VI-SAFE-NEXT: v_lshrrev_b32_e32 v1, 16, v3
-; VI-SAFE-NEXT: v_lshrrev_b32_e32 v4, 16, v2
-; VI-SAFE-NEXT: v_fma_f16 v1, v4, 4.0, v1
-; VI-SAFE-NEXT: v_lshlrev_b32_e32 v1, 16, v1
-; VI-SAFE-NEXT: v_fma_f16 v2, v2, 4.0, v3
-; VI-SAFE-NEXT: v_or_b32_e32 v1, v2, v1
-; VI-SAFE-NEXT: v_xor_b32_e32 v1, 0x80008000, v1
-; VI-SAFE-NEXT: v_mov_b32_e32 v2, 0x4000
-; VI-SAFE-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v0
-; VI-SAFE-NEXT: v_cndmask_b32_e64 v0, v2, v1, s[4:5]
-; VI-SAFE-NEXT: v_cndmask_b32_sdwa v1, v2, v1, vcc dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
-; VI-SAFE-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
-; VI-SAFE-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX9-SAFE-LABEL: select_fneg_posk_src_fma_v2f16:
-; GFX9-SAFE: ; %bb.0:
-; GFX9-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-SAFE-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
-; GFX9-SAFE-NEXT: v_pk_fma_f16 v1, v2, 4.0, v3 op_sel_hi:[1,0,1]
-; GFX9-SAFE-NEXT: v_xor_b32_e32 v1, 0x80008000, v1
-; GFX9-SAFE-NEXT: v_mov_b32_e32 v2, 0x4000
-; GFX9-SAFE-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v0
-; GFX9-SAFE-NEXT: v_cndmask_b32_e64 v0, v2, v1, s[4:5]
-; GFX9-SAFE-NEXT: v_cndmask_b32_sdwa v1, v2, v1, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
-; GFX9-SAFE-NEXT: s_mov_b32 s4, 0x5040100
-; GFX9-SAFE-NEXT: v_perm_b32 v0, v1, v0, s4
-; GFX9-SAFE-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-SAFE-TRUE16-LABEL: select_fneg_posk_src_fma_v2f16:
-; GFX11-SAFE-TRUE16: ; %bb.0:
-; GFX11-SAFE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-TRUE16-NEXT: v_pk_fma_f16 v2, v2, 4.0, v3 op_sel_hi:[1,0,1]
-; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v1
-; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-SAFE-TRUE16-NEXT: v_xor_b32_e32 v0, 0x80008000, v2
-; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.l, 0x4000, v0.l, vcc_lo
-; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2)
-; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.h, 0x4000, v0.h, s0
-; GFX11-SAFE-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-SAFE-FAKE16-LABEL: select_fneg_posk_src_fma_v2f16:
-; GFX11-SAFE-FAKE16: ; %bb.0:
-; GFX11-SAFE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-FAKE16-NEXT: v_pk_fma_f16 v2, v2, 4.0, v3 op_sel_hi:[1,0,1]
-; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-SAFE-FAKE16-NEXT: v_xor_b32_e32 v2, 0x80008000, v2
-; GFX11-SAFE-FAKE16-NEXT: v_lshrrev_b32_e32 v3, 16, v2
-; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0x4000, v2, vcc_lo
-; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0x4000, v3, vcc_lo
-; GFX11-SAFE-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
-; GFX11-SAFE-FAKE16-NEXT: s_setpc_b64 s[30:31]
-;
-; VI-NSZ-LABEL: select_fneg_posk_src_fma_v2f16:
-; VI-NSZ: ; %bb.0:
-; VI-NSZ-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; VI-NSZ-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
-; VI-NSZ-NEXT: v_lshrrev_b32_e32 v1, 16, v3
-; VI-NSZ-NEXT: v_lshrrev_b32_e32 v4, 16, v2
-; VI-NSZ-NEXT: v_fma_f16 v1, v4, -4.0, -v1
-; VI-NSZ-NEXT: v_fma_f16 v2, v2, -4.0, -v3
-; VI-NSZ-NEXT: v_mov_b32_e32 v3, 0x4000
-; VI-NSZ-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v0
-; VI-NSZ-NEXT: v_cndmask_b32_e64 v0, v3, v2, s[4:5]
-; VI-NSZ-NEXT: v_cndmask_b32_sdwa v1, v3, v1, vcc dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
-; VI-NSZ-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
-; VI-NSZ-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX9-NSZ-LABEL: select_fneg_posk_src_fma_v2f16:
-; GFX9-NSZ: ; %bb.0:
-; GFX9-NSZ-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NSZ-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
-; GFX9-NSZ-NEXT: v_pk_fma_f16 v1, v2, -4.0, v3 op_sel_hi:[1,0,1] neg_lo:[0,0,1] neg_hi:[0,0,1]
-; GFX9-NSZ-NEXT: v_mov_b32_e32 v2, 0x4000
-; GFX9-NSZ-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v0
-; GFX9-NSZ-NEXT: v_cndmask_b32_e64 v0, v2, v1, s[4:5]
-; GFX9-NSZ-NEXT: v_cndmask_b32_sdwa v1, v2, v1, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
-; GFX9-NSZ-NEXT: s_mov_b32 s4, 0x5040100
-; GFX9-NSZ-NEXT: v_perm_b32 v0, v1, v0, s4
-; GFX9-NSZ-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-TRUE16-LABEL: select_fneg_posk_src_fma_v2f16:
-; GFX11-NSZ-TRUE16: ; %bb.0:
-; GFX11-NSZ-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-NSZ-TRUE16-NEXT: v_pk_fma_f16 v0, v2, -4.0, v3 op_sel_hi:[1,0,1] neg_lo:[0,0,1] neg_hi:[0,0,1]
-; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v1
-; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.l, 0x4000, v0.l, vcc_lo
-; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.h, 0x4000, v0.h, s0
-; GFX11-NSZ-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-FAKE16-LABEL: select_fneg_posk_src_fma_v2f16:
-; GFX11-NSZ-FAKE16: ; %bb.0:
-; GFX11-NSZ-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-FAKE16-NEXT: v_pk_fma_f16 v2, v2, -4.0, v3 op_sel_hi:[1,0,1] neg_lo:[0,0,1] neg_hi:[0,0,1]
-; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3)
-; GFX11-NSZ-FAKE16-NEXT: v_lshrrev_b32_e32 v3, 16, v2
-; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0x4000, v2, vcc_lo
-; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0x4000, v3, vcc_lo
-; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NSZ-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
-; GFX11-NSZ-FAKE16-NEXT: s_setpc_b64 s[30:31]
+; VI-LABEL: select_fneg_posk_src_fma_v2f16:
+; VI: ; %bb.0:
+; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
+; VI-NEXT: v_lshrrev_b32_e32 v1, 16, v3
+; VI-NEXT: v_lshrrev_b32_e32 v4, 16, v2
+; VI-NEXT: v_fma_f16 v1, v4, 4.0, v1
+; VI-NEXT: v_lshlrev_b32_e32 v1, 16, v1
+; VI-NEXT: v_fma_f16 v2, v2, 4.0, v3
+; VI-NEXT: v_or_b32_e32 v1, v2, v1
+; VI-NEXT: v_xor_b32_e32 v1, 0x80008000, v1
+; VI-NEXT: v_mov_b32_e32 v2, 0x4000
+; VI-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v0
+; VI-NEXT: v_cndmask_b32_e64 v0, v2, v1, s[4:5]
+; VI-NEXT: v_cndmask_b32_sdwa v1, v2, v1, vcc dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
+; VI-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
+; VI-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-LABEL: select_fneg_posk_src_fma_v2f16:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
+; GFX9-NEXT: v_pk_fma_f16 v1, v2, 4.0, v3 op_sel_hi:[1,0,1]
+; GFX9-NEXT: v_xor_b32_e32 v1, 0x80008000, v1
+; GFX9-NEXT: v_mov_b32_e32 v2, 0x4000
+; GFX9-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v0
+; GFX9-NEXT: v_cndmask_b32_e64 v0, v2, v1, s[4:5]
+; GFX9-NEXT: v_cndmask_b32_sdwa v1, v2, v1, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
+; GFX9-NEXT: s_mov_b32 s4, 0x5040100
+; GFX9-NEXT: v_perm_b32 v0, v1, v0, s4
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-TRUE16-LABEL: select_fneg_posk_src_fma_v2f16:
+; GFX11-TRUE16: ; %bb.0:
+; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT: v_pk_fma_f16 v2, v2, 4.0, v3 op_sel_hi:[1,0,1]
+; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v1
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-TRUE16-NEXT: v_xor_b32_e32 v0, 0x80008000, v2
+; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.l, 0x4000, v0.l, vcc_lo
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2)
+; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.h, 0x4000, v0.h, s0
+; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: select_fneg_posk_src_fma_v2f16:
+; GFX11-FAKE16: ; %bb.0:
+; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT: v_pk_fma_f16 v2, v2, 4.0, v3 op_sel_hi:[1,0,1]
+; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-FAKE16-NEXT: v_xor_b32_e32 v2, 0x80008000, v2
+; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v3, 16, v2
+; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0x4000, v2, vcc_lo
+; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0x4000, v3, vcc_lo
+; GFX11-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
+; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
%cmp = icmp eq <2 x i32> %c, zeroinitializer
%fma = call <2 x half> @llvm.fma.v2f16(<2 x half> %x, <2 x half> <half 4.0, half 4.0>, <2 x half> %z)
%fneg = fneg <2 x half> %fma
@@ -4817,118 +3633,65 @@ define <2 x half> @select_fneg_posk_src_fmad_v2f16(<2 x i32> %c, <2 x half> %x,
; CI-NEXT: v_cndmask_b32_e32 v1, 2.0, v2, vcc
; CI-NEXT: s_setpc_b64 s[30:31]
;
-; VI-SAFE-LABEL: select_fneg_posk_src_fmad_v2f16:
-; VI-SAFE: ; %bb.0:
-; VI-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; VI-SAFE-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
-; VI-SAFE-NEXT: v_lshrrev_b32_e32 v1, 16, v3
-; VI-SAFE-NEXT: v_lshrrev_b32_e32 v4, 16, v2
-; VI-SAFE-NEXT: v_fma_f16 v1, v4, 4.0, v1
-; VI-SAFE-NEXT: v_lshlrev_b32_e32 v1, 16, v1
-; VI-SAFE-NEXT: v_fma_f16 v2, v2, 4.0, v3
-; VI-SAFE-NEXT: v_or_b32_e32 v1, v2, v1
-; VI-SAFE-NEXT: v_xor_b32_e32 v1, 0x80008000, v1
-; VI-SAFE-NEXT: v_mov_b32_e32 v2, 0x4000
-; VI-SAFE-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v0
-; VI-SAFE-NEXT: v_cndmask_b32_e64 v0, v2, v1, s[4:5]
-; VI-SAFE-NEXT: v_cndmask_b32_sdwa v1, v2, v1, vcc dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
-; VI-SAFE-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
-; VI-SAFE-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX9-SAFE-LABEL: select_fneg_posk_src_fmad_v2f16:
-; GFX9-SAFE: ; %bb.0:
-; GFX9-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-SAFE-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
-; GFX9-SAFE-NEXT: v_pk_fma_f16 v1, v2, 4.0, v3 op_sel_hi:[1,0,1]
-; GFX9-SAFE-NEXT: v_xor_b32_e32 v1, 0x80008000, v1
-; GFX9-SAFE-NEXT: v_mov_b32_e32 v2, 0x4000
-; GFX9-SAFE-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v0
-; GFX9-SAFE-NEXT: v_cndmask_b32_e64 v0, v2, v1, s[4:5]
-; GFX9-SAFE-NEXT: v_cndmask_b32_sdwa v1, v2, v1, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
-; GFX9-SAFE-NEXT: s_mov_b32 s4, 0x5040100
-; GFX9-SAFE-NEXT: v_perm_b32 v0, v1, v0, s4
-; GFX9-SAFE-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-SAFE-TRUE16-LABEL: select_fneg_posk_src_fmad_v2f16:
-; GFX11-SAFE-TRUE16: ; %bb.0:
-; GFX11-SAFE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-TRUE16-NEXT: v_pk_fma_f16 v2, v2, 4.0, v3 op_sel_hi:[1,0,1]
-; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v1
-; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-SAFE-TRUE16-NEXT: v_xor_b32_e32 v0, 0x80008000, v2
-; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.l, 0x4000, v0.l, vcc_lo
-; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2)
-; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.h, 0x4000, v0.h, s0
-; GFX11-SAFE-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-SAFE-FAKE16-LABEL: select_fneg_posk_src_fmad_v2f16:
-; GFX11-SAFE-FAKE16: ; %bb.0:
-; GFX11-SAFE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-FAKE16-NEXT: v_pk_fma_f16 v2, v2, 4.0, v3 op_sel_hi:[1,0,1]
-; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-SAFE-FAKE16-NEXT: v_xor_b32_e32 v2, 0x80008000, v2
-; GFX11-SAFE-FAKE16-NEXT: v_lshrrev_b32_e32 v3, 16, v2
-; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0x4000, v2, vcc_lo
-; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0x4000, v3, vcc_lo
-; GFX11-SAFE-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
-; GFX11-SAFE-FAKE16-NEXT: s_setpc_b64 s[30:31]
-;
-; VI-NSZ-LABEL: select_fneg_posk_src_fmad_v2f16:
-; VI-NSZ: ; %bb.0:
-; VI-NSZ-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; VI-NSZ-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
-; VI-NSZ-NEXT: v_lshrrev_b32_e32 v1, 16, v3
-; VI-NSZ-NEXT: v_lshrrev_b32_e32 v4, 16, v2
-; VI-NSZ-NEXT: v_fma_f16 v1, v4, -4.0, -v1
-; VI-NSZ-NEXT: v_fma_f16 v2, v2, -4.0, -v3
-; VI-NSZ-NEXT: v_mov_b32_e32 v3, 0x4000
-; VI-NSZ-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v0
-; VI-NSZ-NEXT: v_cndmask_b32_e64 v0, v3, v2, s[4:5]
-; VI-NSZ-NEXT: v_cndmask_b32_sdwa v1, v3, v1, vcc dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
-; VI-NSZ-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
-; VI-NSZ-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX9-NSZ-LABEL: select_fneg_posk_src_fmad_v2f16:
-; GFX9-NSZ: ; %bb.0:
-; GFX9-NSZ-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NSZ-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
-; GFX9-NSZ-NEXT: v_pk_fma_f16 v1, v2, -4.0, v3 op_sel_hi:[1,0,1] neg_lo:[0,0,1] neg_hi:[0,0,1]
-; GFX9-NSZ-NEXT: v_mov_b32_e32 v2, 0x4000
-; GFX9-NSZ-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v0
-; GFX9-NSZ-NEXT: v_cndmask_b32_e64 v0, v2, v1, s[4:5]
-; GFX9-NSZ-NEXT: v_cndmask_b32_sdwa v1, v2, v1, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
-; GFX9-NSZ-NEXT: s_mov_b32 s4, 0x5040100
-; GFX9-NSZ-NEXT: v_perm_b32 v0, v1, v0, s4
-; GFX9-NSZ-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-TRUE16-LABEL: select_fneg_posk_src_fmad_v2f16:
-; GFX11-NSZ-TRUE16: ; %bb.0:
-; GFX11-NSZ-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-NSZ-TRUE16-NEXT: v_pk_fma_f16 v0, v2, -4.0, v3 op_sel_hi:[1,0,1] neg_lo:[0,0,1] neg_hi:[0,0,1]
-; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v1
-; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.l, 0x4000, v0.l, vcc_lo
-; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.h, 0x4000, v0.h, s0
-; GFX11-NSZ-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-FAKE16-LABEL: select_fneg_posk_src_fmad_v2f16:
-; GFX11-NSZ-FAKE16: ; %bb.0:
-; GFX11-NSZ-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-FAKE16-NEXT: v_pk_fma_f16 v2, v2, -4.0, v3 op_sel_hi:[1,0,1] neg_lo:[0,0,1] neg_hi:[0,0,1]
-; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3)
-; GFX11-NSZ-FAKE16-NEXT: v_lshrrev_b32_e32 v3, 16, v2
-; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0x4000, v2, vcc_lo
-; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0x4000, v3, vcc_lo
-; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NSZ-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
-; GFX11-NSZ-FAKE16-NEXT: s_setpc_b64 s[30:31]
+; VI-LABEL: select_fneg_posk_src_fmad_v2f16:
+; VI: ; %bb.0:
+; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
+; VI-NEXT: v_lshrrev_b32_e32 v1, 16, v3
+; VI-NEXT: v_lshrrev_b32_e32 v4, 16, v2
+; VI-NEXT: v_fma_f16 v1, v4, 4.0, v1
+; VI-NEXT: v_lshlrev_b32_e32 v1, 16, v1
+; VI-NEXT: v_fma_f16 v2, v2, 4.0, v3
+; VI-NEXT: v_or_b32_e32 v1, v2, v1
+; VI-NEXT: v_xor_b32_e32 v1, 0x80008000, v1
+; VI-NEXT: v_mov_b32_e32 v2, 0x4000
+; VI-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v0
+; VI-NEXT: v_cndmask_b32_e64 v0, v2, v1, s[4:5]
+; VI-NEXT: v_cndmask_b32_sdwa v1, v2, v1, vcc dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
+; VI-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
+; VI-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-LABEL: select_fneg_posk_src_fmad_v2f16:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
+; GFX9-NEXT: v_pk_fma_f16 v1, v2, 4.0, v3 op_sel_hi:[1,0,1]
+; GFX9-NEXT: v_xor_b32_e32 v1, 0x80008000, v1
+; GFX9-NEXT: v_mov_b32_e32 v2, 0x4000
+; GFX9-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v0
+; GFX9-NEXT: v_cndmask_b32_e64 v0, v2, v1, s[4:5]
+; GFX9-NEXT: v_cndmask_b32_sdwa v1, v2, v1, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
+; GFX9-NEXT: s_mov_b32 s4, 0x5040100
+; GFX9-NEXT: v_perm_b32 v0, v1, v0, s4
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-TRUE16-LABEL: select_fneg_posk_src_fmad_v2f16:
+; GFX11-TRUE16: ; %bb.0:
+; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT: v_pk_fma_f16 v2, v2, 4.0, v3 op_sel_hi:[1,0,1]
+; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v1
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-TRUE16-NEXT: v_xor_b32_e32 v0, 0x80008000, v2
+; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.l, 0x4000, v0.l, vcc_lo
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2)
+; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.h, 0x4000, v0.h, s0
+; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: select_fneg_posk_src_fmad_v2f16:
+; GFX11-FAKE16: ; %bb.0:
+; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT: v_pk_fma_f16 v2, v2, 4.0, v3 op_sel_hi:[1,0,1]
+; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-FAKE16-NEXT: v_xor_b32_e32 v2, 0x80008000, v2
+; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v3, 16, v2
+; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0x4000, v2, vcc_lo
+; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0x4000, v3, vcc_lo
+; GFX11-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
+; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
%cmp = icmp eq <2 x i32> %c, zeroinitializer
%fmad = call <2 x half> @llvm.fmuladd.v2f16(<2 x half> %x, <2 x half> <half 4.0, half 4.0>, <2 x half> %z)
%fneg = fneg <2 x half> %fmad
@@ -4986,55 +3749,30 @@ define <2 x half> @select_fneg_posk_src_fmad_v2f16_nsz(<2 x i32> %c, <2 x half>
; GFX9-NEXT: v_perm_b32 v0, v1, v0, s4
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
-; GFX11-SAFE-TRUE16-LABEL: select_fneg_posk_src_fmad_v2f16_nsz:
-; GFX11-SAFE-TRUE16: ; %bb.0:
-; GFX11-SAFE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-SAFE-TRUE16-NEXT: v_pk_fma_f16 v0, v2, -4.0, v3 op_sel_hi:[1,0,1] neg_lo:[0,0,1] neg_hi:[0,0,1]
-; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v1
-; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.l, 0x4000, v0.l, vcc_lo
-; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.h, 0x4000, v0.h, s0
-; GFX11-SAFE-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-SAFE-FAKE16-LABEL: select_fneg_posk_src_fmad_v2f16_nsz:
-; GFX11-SAFE-FAKE16: ; %bb.0:
-; GFX11-SAFE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-FAKE16-NEXT: v_pk_fma_f16 v2, v2, -4.0, v3 op_sel_hi:[1,0,1] neg_lo:[0,0,1] neg_hi:[0,0,1]
-; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3)
-; GFX11-SAFE-FAKE16-NEXT: v_lshrrev_b32_e32 v3, 16, v2
-; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0x4000, v2, vcc_lo
-; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0x4000, v3, vcc_lo
-; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-SAFE-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
-; GFX11-SAFE-FAKE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-TRUE16-LABEL: select_fneg_posk_src_fmad_v2f16_nsz:
-; GFX11-NSZ-TRUE16: ; %bb.0:
-; GFX11-NSZ-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-NSZ-TRUE16-NEXT: v_pk_fma_f16 v0, v2, -4.0, v3 op_sel_hi:[1,0,1] neg_lo:[0,0,1] neg_hi:[0,0,1]
-; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v1
-; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.l, 0x4000, v0.l, vcc_lo
-; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.h, 0x4000, v0.h, s0
-; GFX11-NSZ-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-FAKE16-LABEL: select_fneg_posk_src_fmad_v2f16_nsz:
-; GFX11-NSZ-FAKE16: ; %bb.0:
-; GFX11-NSZ-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-FAKE16-NEXT: v_pk_fma_f16 v2, v2, -4.0, v3 op_sel_hi:[1,0,1] neg_lo:[0,0,1] neg_hi:[0,0,1]
-; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3)
-; GFX11-NSZ-FAKE16-NEXT: v_lshrrev_b32_e32 v3, 16, v2
-; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0x4000, v2, vcc_lo
-; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0x4000, v3, vcc_lo
-; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NSZ-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
-; GFX11-NSZ-FAKE16-NEXT: s_setpc_b64 s[30:31]
+; GFX11-TRUE16-LABEL: select_fneg_posk_src_fmad_v2f16_nsz:
+; GFX11-TRUE16: ; %bb.0:
+; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-TRUE16-NEXT: v_pk_fma_f16 v0, v2, -4.0, v3 op_sel_hi:[1,0,1] neg_lo:[0,0,1] neg_hi:[0,0,1]
+; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v1
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.l, 0x4000, v0.l, vcc_lo
+; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.h, 0x4000, v0.h, s0
+; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: select_fneg_posk_src_fmad_v2f16_nsz:
+; GFX11-FAKE16: ; %bb.0:
+; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT: v_pk_fma_f16 v2, v2, -4.0, v3 op_sel_hi:[1,0,1] neg_lo:[0,0,1] neg_hi:[0,0,1]
+; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3)
+; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v3, 16, v2
+; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0x4000, v2, vcc_lo
+; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0x4000, v3, vcc_lo
+; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
+; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
%cmp = icmp eq <2 x i32> %c, zeroinitializer
%fmad = call nsz <2 x half> @llvm.fmuladd.v2f16(<2 x half> %x, <2 x half> <half 4.0, half 4.0>, <2 x half> %z)
%fneg = fneg <2 x half> %fmad
@@ -5049,5 +3787,3 @@ declare <2 x half> @llvm.fmuladd.v2f16(<2 x half>, <2 x half>, <2 x half>) #0
attributes #0 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
; GFX11: {{.*}}
-; GFX11-NSZ: {{.*}}
-; GFX11-SAFE: {{.*}}
diff --git a/llvm/test/CodeGen/AMDGPU/simple-indirect-call-2.ll b/llvm/test/CodeGen/AMDGPU/simple-indirect-call-2.ll
index f1cadea..0868148 100644
--- a/llvm/test/CodeGen/AMDGPU/simple-indirect-call-2.ll
+++ b/llvm/test/CodeGen/AMDGPU/simple-indirect-call-2.ll
@@ -63,7 +63,7 @@ define amdgpu_kernel void @foo(ptr noundef %fp) {
; OW-NEXT: ret void
;
; CW-LABEL: define {{[^@]+}}@foo
-; CW-SAME: (ptr noundef [[FP:%.*]]) #[[ATTR1:[0-9]+]] {
+; CW-SAME: (ptr noundef [[FP:%.*]]) #[[ATTR0]] {
; CW-NEXT: entry:
; CW-NEXT: [[FP_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
; CW-NEXT: store ptr [[FP]], ptr addrspace(5) [[FP_ADDR]], align 8
@@ -84,7 +84,7 @@ define amdgpu_kernel void @foo(ptr noundef %fp) {
; CW-NEXT: ret void
;
; NO-LABEL: define {{[^@]+}}@foo
-; NO-SAME: (ptr noundef [[FP:%.*]]) #[[ATTR1:[0-9]+]] {
+; NO-SAME: (ptr noundef [[FP:%.*]]) #[[ATTR0]] {
; NO-NEXT: entry:
; NO-NEXT: [[FP_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
; NO-NEXT: store ptr [[FP]], ptr addrspace(5) [[FP_ADDR]], align 8
@@ -101,14 +101,12 @@ entry:
}
;.
-; NO: attributes #[[ATTR0]] = { "amdgpu-agpr-alloc"="0" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
-; NO: attributes #[[ATTR1]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
+; NO: attributes #[[ATTR0]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
;.
-; OW: attributes #[[ATTR0]] = { "amdgpu-agpr-alloc"="0" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
+; OW: attributes #[[ATTR0]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
; OW: attributes #[[ATTR1]] = { "uniform-work-group-size"="false" }
;.
-; CW: attributes #[[ATTR0]] = { "amdgpu-agpr-alloc"="0" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
-; CW: attributes #[[ATTR1]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
+; CW: attributes #[[ATTR0]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
;.
; NO: [[META0]] = !{ptr @bar1, ptr @bar2}
;.
diff --git a/llvm/test/CodeGen/AMDGPU/simple-indirect-call.ll b/llvm/test/CodeGen/AMDGPU/simple-indirect-call.ll
index 775d2f9..8fcaf5e 100644
--- a/llvm/test/CodeGen/AMDGPU/simple-indirect-call.ll
+++ b/llvm/test/CodeGen/AMDGPU/simple-indirect-call.ll
@@ -58,7 +58,7 @@ define amdgpu_kernel void @test_simple_indirect_call() {
;.
-; ATTRIBUTOR_GCN: attributes #[[ATTR0]] = { "amdgpu-agpr-alloc"="0" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
+; ATTRIBUTOR_GCN: attributes #[[ATTR0]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
; ATTRIBUTOR_GCN: attributes #[[ATTR1]] = { "uniform-work-group-size"="false" }
;.
; ATTRIBUTOR_GCN: [[META0]] = !{i32 1, i32 5, i32 6, i32 10}
diff --git a/llvm/test/CodeGen/AMDGPU/uniform-work-group-attribute-missing.ll b/llvm/test/CodeGen/AMDGPU/uniform-work-group-attribute-missing.ll
index a1557418..8dfd3b7 100644
--- a/llvm/test/CodeGen/AMDGPU/uniform-work-group-attribute-missing.ll
+++ b/llvm/test/CodeGen/AMDGPU/uniform-work-group-attribute-missing.ll
@@ -31,5 +31,5 @@ define amdgpu_kernel void @kernel1() #1 {
attributes #0 = { "uniform-work-group-size"="true" }
;.
-; CHECK: attributes #[[ATTR0]] = { "amdgpu-agpr-alloc"="0" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR0]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
;.
diff --git a/llvm/test/CodeGen/AMDGPU/uniform-work-group-multistep.ll b/llvm/test/CodeGen/AMDGPU/uniform-work-group-multistep.ll
index fb225a9..fa01ee9 100644
--- a/llvm/test/CodeGen/AMDGPU/uniform-work-group-multistep.ll
+++ b/llvm/test/CodeGen/AMDGPU/uniform-work-group-multistep.ll
@@ -98,7 +98,7 @@ define amdgpu_kernel void @kernel2() #0 {
attributes #0 = { "uniform-work-group-size"="true" }
;.
; CHECK: attributes #[[ATTR0]] = { "uniform-work-group-size"="false" }
-; CHECK: attributes #[[ATTR1]] = { "amdgpu-agpr-alloc"="0" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR1]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
; CHECK: attributes #[[ATTR2]] = { "uniform-work-group-size"="true" }
-; CHECK: attributes #[[ATTR3]] = { "amdgpu-agpr-alloc"="0" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="true" }
+; CHECK: attributes #[[ATTR3]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="true" }
;.
diff --git a/llvm/test/CodeGen/AMDGPU/uniform-work-group-nested-function-calls.ll b/llvm/test/CodeGen/AMDGPU/uniform-work-group-nested-function-calls.ll
index cfede0c..09001ca 100644
--- a/llvm/test/CodeGen/AMDGPU/uniform-work-group-nested-function-calls.ll
+++ b/llvm/test/CodeGen/AMDGPU/uniform-work-group-nested-function-calls.ll
@@ -41,6 +41,6 @@ define amdgpu_kernel void @kernel3() #2 {
attributes #2 = { "uniform-work-group-size"="true" }
;.
-; CHECK: attributes #[[ATTR0]] = { "amdgpu-agpr-alloc"="0" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
-; CHECK: attributes #[[ATTR1]] = { "amdgpu-agpr-alloc"="0" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="true" }
+; CHECK: attributes #[[ATTR0]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR1]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="true" }
;.
diff --git a/llvm/test/CodeGen/AMDGPU/uniform-work-group-prevent-attribute-propagation.ll b/llvm/test/CodeGen/AMDGPU/uniform-work-group-prevent-attribute-propagation.ll
index 854b724..4dede21 100644
--- a/llvm/test/CodeGen/AMDGPU/uniform-work-group-prevent-attribute-propagation.ll
+++ b/llvm/test/CodeGen/AMDGPU/uniform-work-group-prevent-attribute-propagation.ll
@@ -41,6 +41,6 @@ define amdgpu_kernel void @kernel2() #2 {
attributes #1 = { "uniform-work-group-size"="true" }
;.
-; CHECK: attributes #[[ATTR0]] = { "amdgpu-agpr-alloc"="0" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
-; CHECK: attributes #[[ATTR1]] = { "amdgpu-agpr-alloc"="0" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="true" }
+; CHECK: attributes #[[ATTR0]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR1]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="true" }
;.
diff --git a/llvm/test/CodeGen/AMDGPU/uniform-work-group-propagate-attribute.ll b/llvm/test/CodeGen/AMDGPU/uniform-work-group-propagate-attribute.ll
index c4e0a60..08e1556 100644
--- a/llvm/test/CodeGen/AMDGPU/uniform-work-group-propagate-attribute.ll
+++ b/llvm/test/CodeGen/AMDGPU/uniform-work-group-propagate-attribute.ll
@@ -52,8 +52,8 @@ attributes #0 = { nounwind }
attributes #1 = { "uniform-work-group-size"="false" }
attributes #2 = { "uniform-work-group-size"="true" }
;.
-; CHECK: attributes #[[ATTR0]] = { nounwind "amdgpu-agpr-alloc"="0" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
-; CHECK: attributes #[[ATTR1]] = { "amdgpu-agpr-alloc"="0" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR0]] = { nounwind "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR1]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
; CHECK: attributes #[[ATTR2]] = { nounwind "uniform-work-group-size"="false" }
; CHECK: attributes #[[ATTR3]] = { "uniform-work-group-size"="true" }
;.
diff --git a/llvm/test/CodeGen/AMDGPU/uniform-work-group-recursion-test.ll b/llvm/test/CodeGen/AMDGPU/uniform-work-group-recursion-test.ll
index 05af74d..9090d605 100644
--- a/llvm/test/CodeGen/AMDGPU/uniform-work-group-recursion-test.ll
+++ b/llvm/test/CodeGen/AMDGPU/uniform-work-group-recursion-test.ll
@@ -101,7 +101,7 @@ define amdgpu_kernel void @kernel(ptr addrspace(1) %m) #1 {
attributes #0 = { nounwind readnone }
attributes #1 = { "uniform-work-group-size"="true" }
;.
-; CHECK: attributes #[[ATTR0]] = { nounwind memory(none) "amdgpu-agpr-alloc"="0" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
-; CHECK: attributes #[[ATTR1]] = { nounwind memory(none) "amdgpu-agpr-alloc"="0" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="true" }
-; CHECK: attributes #[[ATTR2]] = { "amdgpu-agpr-alloc"="0" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="true" }
+; CHECK: attributes #[[ATTR0]] = { nounwind memory(none) "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR1]] = { nounwind memory(none) "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="true" }
+; CHECK: attributes #[[ATTR2]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="true" }
;.
diff --git a/llvm/test/CodeGen/AMDGPU/uniform-work-group-test.ll b/llvm/test/CodeGen/AMDGPU/uniform-work-group-test.ll
index cdbca7f..5e109f4 100644
--- a/llvm/test/CodeGen/AMDGPU/uniform-work-group-test.ll
+++ b/llvm/test/CodeGen/AMDGPU/uniform-work-group-test.ll
@@ -61,5 +61,5 @@ define amdgpu_kernel void @kernel3() #0 {
attributes #0 = { "uniform-work-group-size"="false" }
;.
-; CHECK: attributes #[[ATTR0]] = { "amdgpu-agpr-alloc"="0" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR0]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
;.
diff --git a/llvm/test/CodeGen/ARM/GlobalISel/arm-instruction-select-combos.mir b/llvm/test/CodeGen/ARM/GlobalISel/arm-instruction-select-combos.mir
index 77eeb34..4dd8af0 100644
--- a/llvm/test/CodeGen/ARM/GlobalISel/arm-instruction-select-combos.mir
+++ b/llvm/test/CodeGen/ARM/GlobalISel/arm-instruction-select-combos.mir
@@ -447,7 +447,7 @@ body: |
; CHECK-LABEL: name: test_vnmuls
; CHECK: [[COPY:%[0-9]+]]:spr = COPY $s0
; CHECK: [[COPY1:%[0-9]+]]:spr = COPY $s1
- ; CHECK: [[VNMULS:%[0-9]+]]:spr = VNMULS [[COPY]], [[COPY1]], 14 /* CC::al */, $noreg
+ ; CHECK: [[VNMULS:%[0-9]+]]:spr = nofpexcept VNMULS [[COPY]], [[COPY1]], 14 /* CC::al */, $noreg, implicit $fpscr
; CHECK: $s0 = COPY [[VNMULS]]
; CHECK: BX_RET 14 /* CC::al */, $noreg, implicit $s0
%0(s32) = COPY $s0
@@ -477,7 +477,7 @@ body: |
; CHECK-LABEL: name: test_vnmuls_reassociate
; CHECK: [[COPY:%[0-9]+]]:spr = COPY $s0
; CHECK: [[COPY1:%[0-9]+]]:spr = COPY $s1
- ; CHECK: [[VNMULS:%[0-9]+]]:spr = VNMULS [[COPY]], [[COPY1]], 14 /* CC::al */, $noreg
+ ; CHECK: [[VNMULS:%[0-9]+]]:spr = nofpexcept VNMULS [[COPY]], [[COPY1]], 14 /* CC::al */, $noreg, implicit $fpscr
; CHECK: $s0 = COPY [[VNMULS]]
; CHECK: BX_RET 14 /* CC::al */, $noreg, implicit $s0
%0(s32) = COPY $s0
@@ -507,7 +507,7 @@ body: |
; CHECK-LABEL: name: test_vnmuld
; CHECK: [[COPY:%[0-9]+]]:dpr = COPY $d0
; CHECK: [[COPY1:%[0-9]+]]:dpr = COPY $d1
- ; CHECK: [[VNMULD:%[0-9]+]]:dpr = VNMULD [[COPY]], [[COPY1]], 14 /* CC::al */, $noreg
+ ; CHECK: [[VNMULD:%[0-9]+]]:dpr = nofpexcept VNMULD [[COPY]], [[COPY1]], 14 /* CC::al */, $noreg, implicit $fpscr
; CHECK: $d0 = COPY [[VNMULD]]
; CHECK: BX_RET 14 /* CC::al */, $noreg, implicit $d0
%0(s64) = COPY $d0
@@ -539,7 +539,7 @@ body: |
; CHECK: [[COPY:%[0-9]+]]:spr = COPY $s0
; CHECK: [[COPY1:%[0-9]+]]:spr = COPY $s1
; CHECK: [[COPY2:%[0-9]+]]:spr = COPY $s2
- ; CHECK: [[VFNMAS:%[0-9]+]]:spr = VFNMAS [[COPY2]], [[COPY]], [[COPY1]], 14 /* CC::al */, $noreg
+ ; CHECK: [[VFNMAS:%[0-9]+]]:spr = nofpexcept VFNMAS [[COPY2]], [[COPY]], [[COPY1]], 14 /* CC::al */, $noreg, implicit $fpscr
; CHECK: $s0 = COPY [[VFNMAS]]
; CHECK: BX_RET 14 /* CC::al */, $noreg, implicit $s0
%0(s32) = COPY $s0
@@ -573,7 +573,7 @@ body: |
; CHECK: [[COPY:%[0-9]+]]:dpr = COPY $d0
; CHECK: [[COPY1:%[0-9]+]]:dpr = COPY $d1
; CHECK: [[COPY2:%[0-9]+]]:dpr = COPY $d2
- ; CHECK: [[VFNMAD:%[0-9]+]]:dpr = VFNMAD [[COPY2]], [[COPY]], [[COPY1]], 14 /* CC::al */, $noreg
+ ; CHECK: [[VFNMAD:%[0-9]+]]:dpr = nofpexcept VFNMAD [[COPY2]], [[COPY]], [[COPY1]], 14 /* CC::al */, $noreg, implicit $fpscr
; CHECK: $d0 = COPY [[VFNMAD]]
; CHECK: BX_RET 14 /* CC::al */, $noreg, implicit $d0
%0(s64) = COPY $d0
@@ -607,7 +607,7 @@ body: |
; CHECK: [[COPY:%[0-9]+]]:spr = COPY $s0
; CHECK: [[COPY1:%[0-9]+]]:spr = COPY $s1
; CHECK: [[COPY2:%[0-9]+]]:spr = COPY $s2
- ; CHECK: [[VFMSS:%[0-9]+]]:spr = VFMSS [[COPY2]], [[COPY]], [[COPY1]], 14 /* CC::al */, $noreg
+ ; CHECK: [[VFMSS:%[0-9]+]]:spr = nofpexcept VFMSS [[COPY2]], [[COPY]], [[COPY1]], 14 /* CC::al */, $noreg, implicit $fpscr
; CHECK: $s0 = COPY [[VFMSS]]
; CHECK: BX_RET 14 /* CC::al */, $noreg, implicit $s0
%0(s32) = COPY $s0
@@ -640,7 +640,7 @@ body: |
; CHECK: [[COPY:%[0-9]+]]:dpr = COPY $d0
; CHECK: [[COPY1:%[0-9]+]]:dpr = COPY $d1
; CHECK: [[COPY2:%[0-9]+]]:dpr = COPY $d2
- ; CHECK: [[VFMSD:%[0-9]+]]:dpr = VFMSD [[COPY2]], [[COPY1]], [[COPY]], 14 /* CC::al */, $noreg
+ ; CHECK: [[VFMSD:%[0-9]+]]:dpr = nofpexcept VFMSD [[COPY2]], [[COPY1]], [[COPY]], 14 /* CC::al */, $noreg, implicit $fpscr
; CHECK: $d0 = COPY [[VFMSD]]
; CHECK: BX_RET 14 /* CC::al */, $noreg, implicit $d0
%0(s64) = COPY $d0
@@ -673,7 +673,7 @@ body: |
; CHECK: [[COPY:%[0-9]+]]:spr = COPY $s0
; CHECK: [[COPY1:%[0-9]+]]:spr = COPY $s1
; CHECK: [[COPY2:%[0-9]+]]:spr = COPY $s2
- ; CHECK: [[VFNMSS:%[0-9]+]]:spr = VFNMSS [[COPY2]], [[COPY]], [[COPY1]], 14 /* CC::al */, $noreg
+ ; CHECK: [[VFNMSS:%[0-9]+]]:spr = nofpexcept VFNMSS [[COPY2]], [[COPY]], [[COPY1]], 14 /* CC::al */, $noreg, implicit $fpscr
; CHECK: $s0 = COPY [[VFNMSS]]
; CHECK: BX_RET 14 /* CC::al */, $noreg, implicit $s0
%0(s32) = COPY $s0
diff --git a/llvm/test/CodeGen/ARM/GlobalISel/arm-select-copy_to_regclass-of-fptosi.mir b/llvm/test/CodeGen/ARM/GlobalISel/arm-select-copy_to_regclass-of-fptosi.mir
index 45a846b..4cded13 100644
--- a/llvm/test/CodeGen/ARM/GlobalISel/arm-select-copy_to_regclass-of-fptosi.mir
+++ b/llvm/test/CodeGen/ARM/GlobalISel/arm-select-copy_to_regclass-of-fptosi.mir
@@ -19,7 +19,7 @@ body: |
bb.1:
; CHECK-LABEL: name: test_fptosi
; CHECK: [[COPY:%[0-9]+]]:spr = COPY $s0
- ; CHECK: [[VTOSIZS:%[0-9]+]]:spr = VTOSIZS [[COPY]], 14 /* CC::al */, $noreg
+ ; CHECK: [[VTOSIZS:%[0-9]+]]:spr = nofpexcept VTOSIZS [[COPY]], 14 /* CC::al */, $noreg
; CHECK: [[COPY1:%[0-9]+]]:gpr = COPY [[VTOSIZS]]
; CHECK: $r0 = COPY [[COPY1]]
; CHECK: MOVPCLR 14 /* CC::al */, $noreg, implicit $r0
diff --git a/llvm/test/CodeGen/ARM/GlobalISel/select-fp.mir b/llvm/test/CodeGen/ARM/GlobalISel/select-fp.mir
index ec834f1..4517fe6 100644
--- a/llvm/test/CodeGen/ARM/GlobalISel/select-fp.mir
+++ b/llvm/test/CodeGen/ARM/GlobalISel/select-fp.mir
@@ -1,3 +1,4 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
# RUN: llc -O0 -mtriple arm-- -mattr=+vfp4,-neonfp -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s
# RUN: llc -O0 -mtriple thumb-- -mattr=+v6t2,+vfp4,-neonfp -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s
--- |
@@ -76,11 +77,9 @@ body: |
...
---
name: test_fadd_s32
-# CHECK-LABEL: name: test_fadd_s32
legalized: true
regBankSelected: true
selected: false
-# CHECK: selected: true
registers:
- { id: 0, class: fprb }
- { id: 1, class: fprb }
@@ -89,28 +88,29 @@ body: |
bb.0:
liveins: $s0, $s1
+ ; CHECK-LABEL: name: test_fadd_s32
+ ; CHECK: liveins: $s0, $s1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:spr = COPY $s0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:spr = COPY $s1
+ ; CHECK-NEXT: [[VADDS:%[0-9]+]]:spr = nofpexcept VADDS [[COPY]], [[COPY1]], 14 /* CC::al */, $noreg, implicit $fpscr_rm
+ ; CHECK-NEXT: $s0 = COPY [[VADDS]]
+ ; CHECK-NEXT: BX_RET 14 /* CC::al */, $noreg, implicit $s0
%0(s32) = COPY $s0
- ; CHECK: [[VREGX:%[0-9]+]]:spr = COPY $s0
%1(s32) = COPY $s1
- ; CHECK: [[VREGY:%[0-9]+]]:spr = COPY $s1
%2(s32) = G_FADD %0, %1
- ; CHECK: [[VREGSUM:%[0-9]+]]:spr = VADDS [[VREGX]], [[VREGY]], 14 /* CC::al */, $noreg
$s0 = COPY %2(s32)
- ; CHECK: $s0 = COPY [[VREGSUM]]
BX_RET 14, $noreg, implicit $s0
- ; CHECK: BX_RET 14 /* CC::al */, $noreg, implicit $s0
...
---
name: test_fadd_s64
-# CHECK-LABEL: name: test_fadd_s64
legalized: true
regBankSelected: true
selected: false
-# CHECK: selected: true
registers:
- { id: 0, class: fprb }
- { id: 1, class: fprb }
@@ -119,28 +119,29 @@ body: |
bb.0:
liveins: $d0, $d1
+ ; CHECK-LABEL: name: test_fadd_s64
+ ; CHECK: liveins: $d0, $d1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:dpr = COPY $d0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:dpr = COPY $d1
+ ; CHECK-NEXT: [[VADDD:%[0-9]+]]:dpr = nofpexcept VADDD [[COPY]], [[COPY1]], 14 /* CC::al */, $noreg, implicit $fpscr_rm
+ ; CHECK-NEXT: $d0 = COPY [[VADDD]]
+ ; CHECK-NEXT: BX_RET 14 /* CC::al */, $noreg, implicit $d0
%0(s64) = COPY $d0
- ; CHECK: [[VREGX:%[0-9]+]]:dpr = COPY $d0
%1(s64) = COPY $d1
- ; CHECK: [[VREGY:%[0-9]+]]:dpr = COPY $d1
%2(s64) = G_FADD %0, %1
- ; CHECK: [[VREGSUM:%[0-9]+]]:dpr = VADDD [[VREGX]], [[VREGY]], 14 /* CC::al */, $noreg
$d0 = COPY %2(s64)
- ; CHECK: $d0 = COPY [[VREGSUM]]
BX_RET 14, $noreg, implicit $d0
- ; CHECK: BX_RET 14 /* CC::al */, $noreg, implicit $d0
...
---
name: test_fsub_s32
-# CHECK-LABEL: name: test_fsub_s32
legalized: true
regBankSelected: true
selected: false
-# CHECK: selected: true
registers:
- { id: 0, class: fprb }
- { id: 1, class: fprb }
@@ -149,28 +150,29 @@ body: |
bb.0:
liveins: $s0, $s1
+ ; CHECK-LABEL: name: test_fsub_s32
+ ; CHECK: liveins: $s0, $s1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:spr = COPY $s0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:spr = COPY $s1
+ ; CHECK-NEXT: [[VSUBS:%[0-9]+]]:spr = nofpexcept VSUBS [[COPY]], [[COPY1]], 14 /* CC::al */, $noreg, implicit $fpscr_rm
+ ; CHECK-NEXT: $s0 = COPY [[VSUBS]]
+ ; CHECK-NEXT: BX_RET 14 /* CC::al */, $noreg, implicit $s0
%0(s32) = COPY $s0
- ; CHECK: [[VREGX:%[0-9]+]]:spr = COPY $s0
%1(s32) = COPY $s1
- ; CHECK: [[VREGY:%[0-9]+]]:spr = COPY $s1
%2(s32) = G_FSUB %0, %1
- ; CHECK: [[VREGSUM:%[0-9]+]]:spr = VSUBS [[VREGX]], [[VREGY]], 14 /* CC::al */, $noreg
$s0 = COPY %2(s32)
- ; CHECK: $s0 = COPY [[VREGSUM]]
BX_RET 14, $noreg, implicit $s0
- ; CHECK: BX_RET 14 /* CC::al */, $noreg, implicit $s0
...
---
name: test_fsub_s64
-# CHECK-LABEL: name: test_fsub_s64
legalized: true
regBankSelected: true
selected: false
-# CHECK: selected: true
registers:
- { id: 0, class: fprb }
- { id: 1, class: fprb }
@@ -179,28 +181,29 @@ body: |
bb.0:
liveins: $d0, $d1
+ ; CHECK-LABEL: name: test_fsub_s64
+ ; CHECK: liveins: $d0, $d1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:dpr = COPY $d0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:dpr = COPY $d1
+ ; CHECK-NEXT: [[VSUBD:%[0-9]+]]:dpr = nofpexcept VSUBD [[COPY]], [[COPY1]], 14 /* CC::al */, $noreg, implicit $fpscr_rm
+ ; CHECK-NEXT: $d0 = COPY [[VSUBD]]
+ ; CHECK-NEXT: BX_RET 14 /* CC::al */, $noreg, implicit $d0
%0(s64) = COPY $d0
- ; CHECK: [[VREGX:%[0-9]+]]:dpr = COPY $d0
%1(s64) = COPY $d1
- ; CHECK: [[VREGY:%[0-9]+]]:dpr = COPY $d1
%2(s64) = G_FSUB %0, %1
- ; CHECK: [[VREGSUM:%[0-9]+]]:dpr = VSUBD [[VREGX]], [[VREGY]], 14 /* CC::al */, $noreg
$d0 = COPY %2(s64)
- ; CHECK: $d0 = COPY [[VREGSUM]]
BX_RET 14, $noreg, implicit $d0
- ; CHECK: BX_RET 14 /* CC::al */, $noreg, implicit $d0
...
---
name: test_fmul_s32
-# CHECK-LABEL: name: test_fmul_s32
legalized: true
regBankSelected: true
selected: false
-# CHECK: selected: true
registers:
- { id: 0, class: fprb }
- { id: 1, class: fprb }
@@ -209,28 +212,29 @@ body: |
bb.0:
liveins: $s0, $s1
+ ; CHECK-LABEL: name: test_fmul_s32
+ ; CHECK: liveins: $s0, $s1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:spr = COPY $s0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:spr = COPY $s1
+ ; CHECK-NEXT: [[VMULS:%[0-9]+]]:spr = nofpexcept VMULS [[COPY]], [[COPY1]], 14 /* CC::al */, $noreg, implicit $fpscr_rm
+ ; CHECK-NEXT: $s0 = COPY [[VMULS]]
+ ; CHECK-NEXT: BX_RET 14 /* CC::al */, $noreg, implicit $s0
%0(s32) = COPY $s0
- ; CHECK: [[VREGX:%[0-9]+]]:spr = COPY $s0
%1(s32) = COPY $s1
- ; CHECK: [[VREGY:%[0-9]+]]:spr = COPY $s1
%2(s32) = G_FMUL %0, %1
- ; CHECK: [[VREGSUM:%[0-9]+]]:spr = VMULS [[VREGX]], [[VREGY]], 14 /* CC::al */, $noreg
$s0 = COPY %2(s32)
- ; CHECK: $s0 = COPY [[VREGSUM]]
BX_RET 14, $noreg, implicit $s0
- ; CHECK: BX_RET 14 /* CC::al */, $noreg, implicit $s0
...
---
name: test_fmul_s64
-# CHECK-LABEL: name: test_fmul_s64
legalized: true
regBankSelected: true
selected: false
-# CHECK: selected: true
registers:
- { id: 0, class: fprb }
- { id: 1, class: fprb }
@@ -239,28 +243,29 @@ body: |
bb.0:
liveins: $d0, $d1
+ ; CHECK-LABEL: name: test_fmul_s64
+ ; CHECK: liveins: $d0, $d1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:dpr = COPY $d0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:dpr = COPY $d1
+ ; CHECK-NEXT: [[VMULD:%[0-9]+]]:dpr = nofpexcept VMULD [[COPY]], [[COPY1]], 14 /* CC::al */, $noreg, implicit $fpscr_rm
+ ; CHECK-NEXT: $d0 = COPY [[VMULD]]
+ ; CHECK-NEXT: BX_RET 14 /* CC::al */, $noreg, implicit $d0
%0(s64) = COPY $d0
- ; CHECK: [[VREGX:%[0-9]+]]:dpr = COPY $d0
%1(s64) = COPY $d1
- ; CHECK: [[VREGY:%[0-9]+]]:dpr = COPY $d1
%2(s64) = G_FMUL %0, %1
- ; CHECK: [[VREGSUM:%[0-9]+]]:dpr = VMULD [[VREGX]], [[VREGY]], 14 /* CC::al */, $noreg
$d0 = COPY %2(s64)
- ; CHECK: $d0 = COPY [[VREGSUM]]
BX_RET 14, $noreg, implicit $d0
- ; CHECK: BX_RET 14 /* CC::al */, $noreg, implicit $d0
...
---
name: test_fdiv_s32
-# CHECK-LABEL: name: test_fdiv_s32
legalized: true
regBankSelected: true
selected: false
-# CHECK: selected: true
registers:
- { id: 0, class: fprb }
- { id: 1, class: fprb }
@@ -269,28 +274,29 @@ body: |
bb.0:
liveins: $s0, $s1
+ ; CHECK-LABEL: name: test_fdiv_s32
+ ; CHECK: liveins: $s0, $s1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:spr = COPY $s0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:spr = COPY $s1
+ ; CHECK-NEXT: [[VDIVS:%[0-9]+]]:spr = nofpexcept VDIVS [[COPY]], [[COPY1]], 14 /* CC::al */, $noreg, implicit $fpscr_rm
+ ; CHECK-NEXT: $s0 = COPY [[VDIVS]]
+ ; CHECK-NEXT: BX_RET 14 /* CC::al */, $noreg, implicit $s0
%0(s32) = COPY $s0
- ; CHECK: [[VREGX:%[0-9]+]]:spr = COPY $s0
%1(s32) = COPY $s1
- ; CHECK: [[VREGY:%[0-9]+]]:spr = COPY $s1
%2(s32) = G_FDIV %0, %1
- ; CHECK: [[VREGSUM:%[0-9]+]]:spr = VDIVS [[VREGX]], [[VREGY]], 14 /* CC::al */, $noreg
$s0 = COPY %2(s32)
- ; CHECK: $s0 = COPY [[VREGSUM]]
BX_RET 14, $noreg, implicit $s0
- ; CHECK: BX_RET 14 /* CC::al */, $noreg, implicit $s0
...
---
name: test_fdiv_s64
-# CHECK-LABEL: name: test_fdiv_s64
legalized: true
regBankSelected: true
selected: false
-# CHECK: selected: true
registers:
- { id: 0, class: fprb }
- { id: 1, class: fprb }
@@ -299,28 +305,29 @@ body: |
bb.0:
liveins: $d0, $d1
+ ; CHECK-LABEL: name: test_fdiv_s64
+ ; CHECK: liveins: $d0, $d1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:dpr = COPY $d0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:dpr = COPY $d1
+ ; CHECK-NEXT: [[VDIVD:%[0-9]+]]:dpr = nofpexcept VDIVD [[COPY]], [[COPY1]], 14 /* CC::al */, $noreg, implicit $fpscr_rm
+ ; CHECK-NEXT: $d0 = COPY [[VDIVD]]
+ ; CHECK-NEXT: BX_RET 14 /* CC::al */, $noreg, implicit $d0
%0(s64) = COPY $d0
- ; CHECK: [[VREGX:%[0-9]+]]:dpr = COPY $d0
%1(s64) = COPY $d1
- ; CHECK: [[VREGY:%[0-9]+]]:dpr = COPY $d1
%2(s64) = G_FDIV %0, %1
- ; CHECK: [[VREGSUM:%[0-9]+]]:dpr = VDIVD [[VREGX]], [[VREGY]], 14 /* CC::al */, $noreg
$d0 = COPY %2(s64)
- ; CHECK: $d0 = COPY [[VREGSUM]]
BX_RET 14, $noreg, implicit $d0
- ; CHECK: BX_RET 14 /* CC::al */, $noreg, implicit $d0
...
---
name: test_fneg_s32
-# CHECK-LABEL: name: test_fneg_s32
legalized: true
regBankSelected: true
selected: false
-# CHECK: selected: true
registers:
- { id: 0, class: fprb }
- { id: 1, class: fprb }
@@ -328,25 +335,26 @@ body: |
bb.0:
liveins: $s0
+ ; CHECK-LABEL: name: test_fneg_s32
+ ; CHECK: liveins: $s0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:spr = COPY $s0
+ ; CHECK-NEXT: [[VNEGS:%[0-9]+]]:spr = VNEGS [[COPY]], 14 /* CC::al */, $noreg
+ ; CHECK-NEXT: $s0 = COPY [[VNEGS]]
+ ; CHECK-NEXT: BX_RET 14 /* CC::al */, $noreg, implicit $s0
%0(s32) = COPY $s0
- ; CHECK: [[VREGX:%[0-9]+]]:spr = COPY $s0
%1(s32) = G_FNEG %0
- ; CHECK: [[VREGSUM:%[0-9]+]]:spr = VNEGS [[VREGX]], 14 /* CC::al */, $noreg
$s0 = COPY %1(s32)
- ; CHECK: $s0 = COPY [[VREGSUM]]
BX_RET 14, $noreg, implicit $s0
- ; CHECK: BX_RET 14 /* CC::al */, $noreg, implicit $s0
...
---
name: test_fneg_s64
-# CHECK-LABEL: name: test_fneg_s64
legalized: true
regBankSelected: true
selected: false
-# CHECK: selected: true
registers:
- { id: 0, class: fprb }
- { id: 1, class: fprb }
@@ -355,25 +363,26 @@ body: |
bb.0:
liveins: $d0
+ ; CHECK-LABEL: name: test_fneg_s64
+ ; CHECK: liveins: $d0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:dpr = COPY $d0
+ ; CHECK-NEXT: [[VNEGD:%[0-9]+]]:dpr = VNEGD [[COPY]], 14 /* CC::al */, $noreg
+ ; CHECK-NEXT: $d0 = COPY [[VNEGD]]
+ ; CHECK-NEXT: BX_RET 14 /* CC::al */, $noreg, implicit $d0
%0(s64) = COPY $d0
- ; CHECK: [[VREGX:%[0-9]+]]:dpr = COPY $d0
%1(s64) = G_FNEG %0
- ; CHECK: [[VREGSUM:%[0-9]+]]:dpr = VNEGD [[VREGX]], 14 /* CC::al */, $noreg
$d0 = COPY %1(s64)
- ; CHECK: $d0 = COPY [[VREGSUM]]
BX_RET 14, $noreg, implicit $d0
- ; CHECK: BX_RET 14 /* CC::al */, $noreg, implicit $d0
...
---
name: test_fma_s32
-# CHECK-LABEL: name: test_fma_s32
legalized: true
regBankSelected: true
selected: false
-# CHECK: selected: true
registers:
- { id: 0, class: fprb }
- { id: 1, class: fprb }
@@ -383,31 +392,32 @@ body: |
bb.0:
liveins: $s0, $s1, $s2
+ ; CHECK-LABEL: name: test_fma_s32
+ ; CHECK: liveins: $s0, $s1, $s2
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:spr = COPY $s0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:spr = COPY $s1
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:spr = COPY $s2
+ ; CHECK-NEXT: [[VFMAS:%[0-9]+]]:spr = nofpexcept VFMAS [[COPY2]], [[COPY]], [[COPY1]], 14 /* CC::al */, $noreg, implicit $fpscr_rm
+ ; CHECK-NEXT: $s0 = COPY [[VFMAS]]
+ ; CHECK-NEXT: BX_RET 14 /* CC::al */, $noreg, implicit $s0
%0(s32) = COPY $s0
- ; CHECK: [[VREGX:%[0-9]+]]:spr = COPY $s0
%1(s32) = COPY $s1
- ; CHECK: [[VREGY:%[0-9]+]]:spr = COPY $s1
%2(s32) = COPY $s2
- ; CHECK: [[VREGZ:%[0-9]+]]:spr = COPY $s2
%3(s32) = G_FMA %0, %1, %2
- ; CHECK: [[VREGR:%[0-9]+]]:spr = VFMAS [[VREGZ]], [[VREGX]], [[VREGY]], 14 /* CC::al */, $noreg
$s0 = COPY %3(s32)
- ; CHECK: $s0 = COPY [[VREGR]]
BX_RET 14, $noreg, implicit $s0
- ; CHECK: BX_RET 14 /* CC::al */, $noreg, implicit $s0
...
---
name: test_fma_s64
-# CHECK-LABEL: name: test_fma_s64
legalized: true
regBankSelected: true
selected: false
-# CHECK: selected: true
registers:
- { id: 0, class: fprb }
- { id: 1, class: fprb }
@@ -417,31 +427,32 @@ body: |
bb.0:
liveins: $d0, $d1, $d2
+ ; CHECK-LABEL: name: test_fma_s64
+ ; CHECK: liveins: $d0, $d1, $d2
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:dpr = COPY $d0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:dpr = COPY $d1
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:dpr = COPY $d2
+ ; CHECK-NEXT: [[VFMAD:%[0-9]+]]:dpr = nofpexcept VFMAD [[COPY2]], [[COPY]], [[COPY1]], 14 /* CC::al */, $noreg, implicit $fpscr_rm
+ ; CHECK-NEXT: $d0 = COPY [[VFMAD]]
+ ; CHECK-NEXT: BX_RET 14 /* CC::al */, $noreg, implicit $d0
%0(s64) = COPY $d0
- ; CHECK: [[VREGX:%[0-9]+]]:dpr = COPY $d0
%1(s64) = COPY $d1
- ; CHECK: [[VREGY:%[0-9]+]]:dpr = COPY $d1
%2(s64) = COPY $d2
- ; CHECK: [[VREGZ:%[0-9]+]]:dpr = COPY $d2
%3(s64) = G_FMA %0, %1, %2
- ; CHECK: [[VREGR:%[0-9]+]]:dpr = VFMAD [[VREGZ]], [[VREGX]], [[VREGY]], 14 /* CC::al */, $noreg
$d0 = COPY %3(s64)
- ; CHECK: $d0 = COPY [[VREGR]]
BX_RET 14, $noreg, implicit $d0
- ; CHECK: BX_RET 14 /* CC::al */, $noreg, implicit $d0
...
---
name: test_fpext_s32_to_s64
-# CHECK-LABEL: name: test_fpext_s32_to_s64
legalized: true
regBankSelected: true
selected: false
-# CHECK: selected: true
registers:
- { id: 0, class: fprb }
- { id: 1, class: fprb }
@@ -449,25 +460,26 @@ body: |
bb.0:
liveins: $s0
+ ; CHECK-LABEL: name: test_fpext_s32_to_s64
+ ; CHECK: liveins: $s0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:spr = COPY $s0
+ ; CHECK-NEXT: [[VCVTDS:%[0-9]+]]:dpr = nofpexcept VCVTDS [[COPY]], 14 /* CC::al */, $noreg, implicit $fpscr_rm
+ ; CHECK-NEXT: $d0 = COPY [[VCVTDS]]
+ ; CHECK-NEXT: BX_RET 14 /* CC::al */, $noreg, implicit $d0
%0(s32) = COPY $s0
- ; CHECK: [[VREGX:%[0-9]+]]:spr = COPY $s0
%1(s64) = G_FPEXT %0(s32)
- ; CHECK: [[VREGR:%[0-9]+]]:dpr = VCVTDS [[VREGX]], 14 /* CC::al */, $noreg
$d0 = COPY %1(s64)
- ; CHECK: $d0 = COPY [[VREGR]]
BX_RET 14, $noreg, implicit $d0
- ; CHECK: BX_RET 14 /* CC::al */, $noreg, implicit $d0
...
---
name: test_fptrunc_s64_to_s32
-# CHECK-LABEL: name: test_fptrunc_s64_to_s32
legalized: true
regBankSelected: true
selected: false
-# CHECK: selected: true
registers:
- { id: 0, class: fprb }
- { id: 1, class: fprb }
@@ -475,25 +487,26 @@ body: |
bb.0:
liveins: $d0
+ ; CHECK-LABEL: name: test_fptrunc_s64_to_s32
+ ; CHECK: liveins: $d0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:dpr = COPY $d0
+ ; CHECK-NEXT: [[VCVTSD:%[0-9]+]]:spr = nofpexcept VCVTSD [[COPY]], 14 /* CC::al */, $noreg, implicit $fpscr_rm
+ ; CHECK-NEXT: $s0 = COPY [[VCVTSD]]
+ ; CHECK-NEXT: BX_RET 14 /* CC::al */, $noreg, implicit $s0
%0(s64) = COPY $d0
- ; CHECK: [[VREGX:%[0-9]+]]:dpr = COPY $d0
%1(s32) = G_FPTRUNC %0(s64)
- ; CHECK: [[VREGR:%[0-9]+]]:spr = VCVTSD [[VREGX]], 14 /* CC::al */, $noreg
$s0 = COPY %1(s32)
- ; CHECK: $s0 = COPY [[VREGR]]
BX_RET 14, $noreg, implicit $s0
- ; CHECK: BX_RET 14 /* CC::al */, $noreg, implicit $s0
...
---
name: test_fptosi_s32
-# CHECK-LABEL: name: test_fptosi_s32
legalized: true
regBankSelected: true
selected: false
-# CHECK: selected: true
registers:
- { id: 0, class: fprb }
- { id: 1, class: gprb }
@@ -501,26 +514,27 @@ body: |
bb.0:
liveins: $s0
+ ; CHECK-LABEL: name: test_fptosi_s32
+ ; CHECK: liveins: $s0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:spr = COPY $s0
+ ; CHECK-NEXT: [[VTOSIZS:%[0-9]+]]:spr = nofpexcept VTOSIZS [[COPY]], 14 /* CC::al */, $noreg
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr = COPY [[VTOSIZS]]
+ ; CHECK-NEXT: $r0 = COPY [[COPY1]]
+ ; CHECK-NEXT: BX_RET 14 /* CC::al */, $noreg, implicit $r0
%0(s32) = COPY $s0
- ; CHECK: [[VREGX:%[0-9]+]]:spr = COPY $s0
%1(s32) = G_FPTOSI %0(s32)
- ; CHECK: [[VREGI:%[0-9]+]]:spr = VTOSIZS [[VREGX]], 14 /* CC::al */, $noreg
- ; CHECK: [[VREGR:%[0-9]+]]:gpr = COPY [[VREGI]]
$r0 = COPY %1(s32)
- ; CHECK: $r0 = COPY [[VREGR]]
BX_RET 14, $noreg, implicit $r0
- ; CHECK: BX_RET 14 /* CC::al */, $noreg, implicit $r0
...
---
name: test_fptosi_s64
-# CHECK-LABEL: name: test_fptosi_s64
legalized: true
regBankSelected: true
selected: false
-# CHECK: selected: true
registers:
- { id: 0, class: fprb }
- { id: 1, class: gprb }
@@ -528,26 +542,27 @@ body: |
bb.0:
liveins: $d0
+ ; CHECK-LABEL: name: test_fptosi_s64
+ ; CHECK: liveins: $d0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:dpr = COPY $d0
+ ; CHECK-NEXT: [[VTOSIZD:%[0-9]+]]:spr = nofpexcept VTOSIZD [[COPY]], 14 /* CC::al */, $noreg
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr = COPY [[VTOSIZD]]
+ ; CHECK-NEXT: $r0 = COPY [[COPY1]]
+ ; CHECK-NEXT: BX_RET 14 /* CC::al */, $noreg, implicit $r0
%0(s64) = COPY $d0
- ; CHECK: [[VREGX:%[0-9]+]]:dpr = COPY $d0
%1(s32) = G_FPTOSI %0(s64)
- ; CHECK: [[VREGI:%[0-9]+]]:spr = VTOSIZD [[VREGX]], 14 /* CC::al */, $noreg
- ; CHECK: [[VREGR:%[0-9]+]]:gpr = COPY [[VREGI]]
$r0 = COPY %1(s32)
- ; CHECK: $r0 = COPY [[VREGR]]
BX_RET 14, $noreg, implicit $r0
- ; CHECK: BX_RET 14 /* CC::al */, $noreg, implicit $r0
...
---
name: test_fptoui_s32
-# CHECK-LABEL: name: test_fptoui_s32
legalized: true
regBankSelected: true
selected: false
-# CHECK: selected: true
registers:
- { id: 0, class: fprb }
- { id: 1, class: gprb }
@@ -555,26 +570,27 @@ body: |
bb.0:
liveins: $s0
+ ; CHECK-LABEL: name: test_fptoui_s32
+ ; CHECK: liveins: $s0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:spr = COPY $s0
+ ; CHECK-NEXT: [[VTOUIZS:%[0-9]+]]:spr = nofpexcept VTOUIZS [[COPY]], 14 /* CC::al */, $noreg
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr = COPY [[VTOUIZS]]
+ ; CHECK-NEXT: $r0 = COPY [[COPY1]]
+ ; CHECK-NEXT: BX_RET 14 /* CC::al */, $noreg, implicit $r0
%0(s32) = COPY $s0
- ; CHECK: [[VREGX:%[0-9]+]]:spr = COPY $s0
%1(s32) = G_FPTOUI %0(s32)
- ; CHECK: [[VREGI:%[0-9]+]]:spr = VTOUIZS [[VREGX]], 14 /* CC::al */, $noreg
- ; CHECK: [[VREGR:%[0-9]+]]:gpr = COPY [[VREGI]]
$r0 = COPY %1(s32)
- ; CHECK: $r0 = COPY [[VREGR]]
BX_RET 14, $noreg, implicit $r0
- ; CHECK: BX_RET 14 /* CC::al */, $noreg, implicit $r0
...
---
name: test_fptoui_s64
-# CHECK-LABEL: name: test_fptoui_s64
legalized: true
regBankSelected: true
selected: false
-# CHECK: selected: true
registers:
- { id: 0, class: fprb }
- { id: 1, class: gprb }
@@ -582,26 +598,27 @@ body: |
bb.0:
liveins: $d0
+ ; CHECK-LABEL: name: test_fptoui_s64
+ ; CHECK: liveins: $d0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:dpr = COPY $d0
+ ; CHECK-NEXT: [[VTOUIZD:%[0-9]+]]:spr = nofpexcept VTOUIZD [[COPY]], 14 /* CC::al */, $noreg
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr = COPY [[VTOUIZD]]
+ ; CHECK-NEXT: $r0 = COPY [[COPY1]]
+ ; CHECK-NEXT: BX_RET 14 /* CC::al */, $noreg, implicit $r0
%0(s64) = COPY $d0
- ; CHECK: [[VREGX:%[0-9]+]]:dpr = COPY $d0
%1(s32) = G_FPTOUI %0(s64)
- ; CHECK: [[VREGI:%[0-9]+]]:spr = VTOUIZD [[VREGX]], 14 /* CC::al */, $noreg
- ; CHECK: [[VREGR:%[0-9]+]]:gpr = COPY [[VREGI]]
$r0 = COPY %1(s32)
- ; CHECK: $r0 = COPY [[VREGR]]
BX_RET 14, $noreg, implicit $r0
- ; CHECK: BX_RET 14 /* CC::al */, $noreg, implicit $r0
...
---
name: test_sitofp_s32
-# CHECK-LABEL: name: test_sitofp_s32
legalized: true
regBankSelected: true
selected: false
-# CHECK: selected: true
registers:
- { id: 0, class: gprb }
- { id: 1, class: fprb }
@@ -609,26 +626,27 @@ body: |
bb.0:
liveins: $r0
+ ; CHECK-LABEL: name: test_sitofp_s32
+ ; CHECK: liveins: $r0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $r0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:spr = COPY [[COPY]]
+ ; CHECK-NEXT: [[VSITOS:%[0-9]+]]:spr = nofpexcept VSITOS [[COPY1]], 14 /* CC::al */, $noreg
+ ; CHECK-NEXT: $s0 = COPY [[VSITOS]]
+ ; CHECK-NEXT: BX_RET 14 /* CC::al */, $noreg, implicit $s0
%0(s32) = COPY $r0
- ; CHECK: [[VREGX:%[0-9]+]]:gpr = COPY $r0
%1(s32) = G_SITOFP %0(s32)
- ; CHECK: [[VREGF:%[0-9]+]]:spr = COPY [[VREGX]]
- ; CHECK: [[VREGR:%[0-9]+]]:spr = VSITOS [[VREGF]], 14 /* CC::al */, $noreg
$s0 = COPY %1(s32)
- ; CHECK: $s0 = COPY [[VREGR]]
BX_RET 14, $noreg, implicit $s0
- ; CHECK: BX_RET 14 /* CC::al */, $noreg, implicit $s0
...
---
name: test_sitofp_s64
-# CHECK-LABEL: name: test_sitofp_s64
legalized: true
regBankSelected: true
selected: false
-# CHECK: selected: true
registers:
- { id: 0, class: gprb }
- { id: 1, class: fprb }
@@ -636,26 +654,27 @@ body: |
bb.0:
liveins: $r0
+ ; CHECK-LABEL: name: test_sitofp_s64
+ ; CHECK: liveins: $r0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $r0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:spr = COPY [[COPY]]
+ ; CHECK-NEXT: [[VSITOD:%[0-9]+]]:dpr = nofpexcept VSITOD [[COPY1]], 14 /* CC::al */, $noreg
+ ; CHECK-NEXT: $d0 = COPY [[VSITOD]]
+ ; CHECK-NEXT: BX_RET 14 /* CC::al */, $noreg, implicit $d0
%0(s32) = COPY $r0
- ; CHECK: [[VREGX:%[0-9]+]]:gpr = COPY $r0
%1(s64) = G_SITOFP %0(s32)
- ; CHECK: [[VREGF:%[0-9]+]]:spr = COPY [[VREGX]]
- ; CHECK: [[VREGR:%[0-9]+]]:dpr = VSITOD [[VREGF]], 14 /* CC::al */, $noreg
$d0 = COPY %1(s64)
- ; CHECK: $d0 = COPY [[VREGR]]
BX_RET 14, $noreg, implicit $d0
- ; CHECK: BX_RET 14 /* CC::al */, $noreg, implicit $d0
...
---
name: test_uitofp_s32
-# CHECK-LABEL: name: test_uitofp_s32
legalized: true
regBankSelected: true
selected: false
-# CHECK: selected: true
registers:
- { id: 0, class: gprb }
- { id: 1, class: fprb }
@@ -663,26 +682,27 @@ body: |
bb.0:
liveins: $r0
+ ; CHECK-LABEL: name: test_uitofp_s32
+ ; CHECK: liveins: $r0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $r0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:spr = COPY [[COPY]]
+ ; CHECK-NEXT: [[VUITOS:%[0-9]+]]:spr = nofpexcept VUITOS [[COPY1]], 14 /* CC::al */, $noreg
+ ; CHECK-NEXT: $s0 = COPY [[VUITOS]]
+ ; CHECK-NEXT: BX_RET 14 /* CC::al */, $noreg, implicit $s0
%0(s32) = COPY $r0
- ; CHECK: [[VREGX:%[0-9]+]]:gpr = COPY $r0
%1(s32) = G_UITOFP %0(s32)
- ; CHECK: [[VREGF:%[0-9]+]]:spr = COPY [[VREGX]]
- ; CHECK: [[VREGR:%[0-9]+]]:spr = VUITOS [[VREGF]], 14 /* CC::al */, $noreg
$s0 = COPY %1(s32)
- ; CHECK: $s0 = COPY [[VREGR]]
BX_RET 14, $noreg, implicit $s0
- ; CHECK: BX_RET 14 /* CC::al */, $noreg, implicit $s0
...
---
name: test_uitofp_s64
-# CHECK-LABEL: name: test_uitofp_s64
legalized: true
regBankSelected: true
selected: false
-# CHECK: selected: true
registers:
- { id: 0, class: gprb }
- { id: 1, class: fprb }
@@ -690,26 +710,27 @@ body: |
bb.0:
liveins: $r0
+ ; CHECK-LABEL: name: test_uitofp_s64
+ ; CHECK: liveins: $r0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $r0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:spr = COPY [[COPY]]
+ ; CHECK-NEXT: [[VUITOD:%[0-9]+]]:dpr = nofpexcept VUITOD [[COPY1]], 14 /* CC::al */, $noreg
+ ; CHECK-NEXT: $d0 = COPY [[VUITOD]]
+ ; CHECK-NEXT: BX_RET 14 /* CC::al */, $noreg, implicit $d0
%0(s32) = COPY $r0
- ; CHECK: [[VREGX:%[0-9]+]]:gpr = COPY $r0
%1(s64) = G_UITOFP %0(s32)
- ; CHECK: [[VREGF:%[0-9]+]]:spr = COPY [[VREGX]]
- ; CHECK: [[VREGR:%[0-9]+]]:dpr = VUITOD [[VREGF]], 14 /* CC::al */, $noreg
$d0 = COPY %1(s64)
- ; CHECK: $d0 = COPY [[VREGR]]
BX_RET 14, $noreg, implicit $d0
- ; CHECK: BX_RET 14 /* CC::al */, $noreg, implicit $d0
...
---
name: test_load_f32
-# CHECK-LABEL: name: test_load_f32
legalized: true
regBankSelected: true
selected: false
-# CHECK: selected: true
registers:
- { id: 0, class: gprb }
- { id: 1, class: fprb }
@@ -717,25 +738,26 @@ body: |
bb.0:
liveins: $r0
+ ; CHECK-LABEL: name: test_load_f32
+ ; CHECK: liveins: $r0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $r0
+ ; CHECK-NEXT: [[VLDRS:%[0-9]+]]:spr = VLDRS [[COPY]], 0, 14 /* CC::al */, $noreg :: (load (s32))
+ ; CHECK-NEXT: $s0 = COPY [[VLDRS]]
+ ; CHECK-NEXT: BX_RET 14 /* CC::al */, $noreg, implicit $s0
%0(p0) = COPY $r0
- ; CHECK: %[[P:[0-9]+]]:gpr = COPY $r0
%1(s32) = G_LOAD %0(p0) :: (load (s32))
- ; CHECK: %[[V:[0-9]+]]:spr = VLDRS %[[P]], 0, 14 /* CC::al */, $noreg
$s0 = COPY %1
- ; CHECK: $s0 = COPY %[[V]]
BX_RET 14, $noreg, implicit $s0
- ; CHECK: BX_RET 14 /* CC::al */, $noreg, implicit $s0
...
---
name: test_load_f64
-# CHECK-LABEL: name: test_load_f64
legalized: true
regBankSelected: true
selected: false
-# CHECK: selected: true
registers:
- { id: 0, class: gprb }
- { id: 1, class: fprb }
@@ -743,45 +765,50 @@ body: |
bb.0:
liveins: $r0
+ ; CHECK-LABEL: name: test_load_f64
+ ; CHECK: liveins: $r0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $r0
+ ; CHECK-NEXT: [[VLDRD:%[0-9]+]]:dpr = VLDRD [[COPY]], 0, 14 /* CC::al */, $noreg :: (load (s64))
+ ; CHECK-NEXT: $d0 = COPY [[VLDRD]]
+ ; CHECK-NEXT: BX_RET 14 /* CC::al */, $noreg, implicit $d0
%0(p0) = COPY $r0
- ; CHECK: %[[P:[0-9]+]]:gpr = COPY $r0
%1(s64) = G_LOAD %0(p0) :: (load (s64))
- ; CHECK: %[[V:[0-9]+]]:dpr = VLDRD %[[P]], 0, 14 /* CC::al */, $noreg
$d0 = COPY %1
- ; CHECK: $d0 = COPY %[[V]]
BX_RET 14, $noreg, implicit $d0
- ; CHECK: BX_RET 14 /* CC::al */, $noreg, implicit $d0
...
---
name: test_stores
-# CHECK-LABEL: name: test_stores
legalized: true
regBankSelected: true
selected: false
-# CHECK: selected: true
registers:
- { id: 0, class: gprb }
- { id: 1, class: fprb }
- { id: 2, class: fprb }
-# CHECK: id: [[P:[0-9]+]], class: gpr
-# CHECK: id: [[F32:[0-9]+]], class: spr
-# CHECK: id: [[F64:[0-9]+]], class: dpr
body: |
bb.0:
liveins: $r0, $s0, $d0
+ ; CHECK-LABEL: name: test_stores
+ ; CHECK: liveins: $r0, $s0, $d0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $r0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:spr = COPY $s0
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:dpr = COPY $d2
+ ; CHECK-NEXT: VSTRS [[COPY1]], [[COPY]], 0, 14 /* CC::al */, $noreg :: (store (s32))
+ ; CHECK-NEXT: VSTRD [[COPY2]], [[COPY]], 0, 14 /* CC::al */, $noreg :: (store (s64))
+ ; CHECK-NEXT: BX_RET 14 /* CC::al */, $noreg
%0(p0) = COPY $r0
%1(s32) = COPY $s0
%2(s64) = COPY $d2
G_STORE %1(s32), %0(p0) :: (store (s32))
- ; CHECK: VSTRS %[[F32]], %[[P]], 0, 14 /* CC::al */, $noreg
G_STORE %2(s64), %0(p0) :: (store (s64))
- ; CHECK: VSTRD %[[F64]], %[[P]], 0, 14 /* CC::al */, $noreg
BX_RET 14, $noreg
...
@@ -833,11 +860,9 @@ body: |
...
---
name: test_soft_fp_double
-# CHECK-LABEL: name: test_soft_fp_double
legalized: true
regBankSelected: true
selected: false
-# CHECK: selected: true
registers:
- { id: 0, class: gprb }
- { id: 1, class: gprb }
@@ -848,24 +873,27 @@ body: |
bb.0:
liveins: $r0, $r1, $r2, $r3
+ ; CHECK-LABEL: name: test_soft_fp_double
+ ; CHECK: liveins: $r0, $r1, $r2, $r3
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $r2
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr = COPY $r3
+ ; CHECK-NEXT: [[VMOVDRR:%[0-9]+]]:dpr = VMOVDRR [[COPY]], [[COPY1]], 14 /* CC::al */, $noreg
+ ; CHECK-NEXT: [[VMOVRRD:%[0-9]+]]:gpr, [[VMOVRRD1:%[0-9]+]]:gpr = VMOVRRD [[VMOVDRR]], 14 /* CC::al */, $noreg
+ ; CHECK-NEXT: $r0 = COPY [[VMOVRRD]]
+ ; CHECK-NEXT: $r1 = COPY [[VMOVRRD1]]
+ ; CHECK-NEXT: BX_RET 14 /* CC::al */, $noreg, implicit $r0, implicit $r1
%0(s32) = COPY $r2
- ; CHECK: [[IN1:%[0-9]+]]:gpr = COPY $r2
%1(s32) = COPY $r3
- ; CHECK: [[IN2:%[0-9]+]]:gpr = COPY $r3
%2(s64) = G_MERGE_VALUES %0(s32), %1(s32)
- ; CHECK: %[[DREG:[0-9]+]]:dpr = VMOVDRR [[IN1]], [[IN2]]
%3(s32), %4(s32) = G_UNMERGE_VALUES %2(s64)
- ; CHECK: [[OUT1:%[0-9]+]]:gpr, [[OUT2:%[0-9]+]]:gpr = VMOVRRD %[[DREG]]
$r0 = COPY %3
- ; CHECK: $r0 = COPY [[OUT1]]
$r1 = COPY %4
- ; CHECK: $r1 = COPY [[OUT2]]
BX_RET 14, $noreg, implicit $r0, implicit $r1
- ; CHECK: BX_RET 14 /* CC::al */, $noreg, implicit $r0, implicit $r1
...
diff --git a/llvm/test/CodeGen/ARM/GlobalISel/select-pr35926.mir b/llvm/test/CodeGen/ARM/GlobalISel/select-pr35926.mir
index a6fc4da..fa982d8 100644
--- a/llvm/test/CodeGen/ARM/GlobalISel/select-pr35926.mir
+++ b/llvm/test/CodeGen/ARM/GlobalISel/select-pr35926.mir
@@ -31,7 +31,7 @@ body: |
; CHECK: [[COPY:%[0-9]+]]:dpr = COPY $d0
; CHECK: [[COPY1:%[0-9]+]]:dpr = COPY $d1
; CHECK: [[COPY2:%[0-9]+]]:dpr = COPY $d2
- ; CHECK: [[VFNMSD:%[0-9]+]]:dpr = VFNMSD [[COPY2]], [[COPY1]], [[COPY]], 14 /* CC::al */, $noreg
+ ; CHECK: [[VFNMSD:%[0-9]+]]:dpr = nofpexcept VFNMSD [[COPY2]], [[COPY1]], [[COPY]], 14 /* CC::al */, $noreg, implicit $fpscr
; CHECK: $d0 = COPY [[VFNMSD]]
; CHECK: MOVPCLR 14 /* CC::al */, $noreg, implicit $d0
%0:fprb(s64) = COPY $d0
diff --git a/llvm/test/CodeGen/ARM/bf16_fast_math.ll b/llvm/test/CodeGen/ARM/bf16_fast_math.ll
index 1b18ea6..5f7e1e6 100644
--- a/llvm/test/CodeGen/ARM/bf16_fast_math.ll
+++ b/llvm/test/CodeGen/ARM/bf16_fast_math.ll
@@ -17,7 +17,7 @@ define bfloat @normal_fadd(bfloat %x, bfloat %y) {
; CHECK-NOBF16-NEXT: [[VMOVSR:%[0-9]+]]:spr = VMOVSR killed [[MOVsi]], 14 /* CC::al */, $noreg
; CHECK-NOBF16-NEXT: [[MOVsi1:%[0-9]+]]:gpr = MOVsi [[COPY1]], 130, 14 /* CC::al */, $noreg, $noreg
; CHECK-NOBF16-NEXT: [[VMOVSR1:%[0-9]+]]:spr = VMOVSR killed [[MOVsi1]], 14 /* CC::al */, $noreg
- ; CHECK-NOBF16-NEXT: [[VADDS:%[0-9]+]]:spr = VADDS killed [[VMOVSR1]], killed [[VMOVSR]], 14 /* CC::al */, $noreg
+ ; CHECK-NOBF16-NEXT: [[VADDS:%[0-9]+]]:spr = nofpexcept VADDS killed [[VMOVSR1]], killed [[VMOVSR]], 14 /* CC::al */, $noreg, implicit $fpscr
; CHECK-NOBF16-NEXT: [[VMOVRS:%[0-9]+]]:gpr = VMOVRS killed [[VADDS]], 14 /* CC::al */, $noreg
; CHECK-NOBF16-NEXT: ADJCALLSTACKDOWN 0, 0, 14 /* CC::al */, $noreg, implicit-def dead $sp, implicit $sp
; CHECK-NOBF16-NEXT: $r0 = COPY [[VMOVRS]]
@@ -44,7 +44,7 @@ define bfloat @fast_fadd(bfloat %x, bfloat %y) {
; CHECK-NOBF16-NEXT: [[VMOVSR:%[0-9]+]]:spr = VMOVSR killed [[MOVsi]], 14 /* CC::al */, $noreg
; CHECK-NOBF16-NEXT: [[MOVsi1:%[0-9]+]]:gpr = MOVsi [[COPY1]], 130, 14 /* CC::al */, $noreg, $noreg
; CHECK-NOBF16-NEXT: [[VMOVSR1:%[0-9]+]]:spr = VMOVSR killed [[MOVsi1]], 14 /* CC::al */, $noreg
- ; CHECK-NOBF16-NEXT: [[VADDS:%[0-9]+]]:spr = nnan ninf nsz arcp contract afn reassoc VADDS killed [[VMOVSR1]], killed [[VMOVSR]], 14 /* CC::al */, $noreg
+ ; CHECK-NOBF16-NEXT: [[VADDS:%[0-9]+]]:spr = nnan ninf nsz arcp contract afn reassoc nofpexcept VADDS killed [[VMOVSR1]], killed [[VMOVSR]], 14 /* CC::al */, $noreg, implicit $fpscr
; CHECK-NOBF16-NEXT: [[VMOVRS:%[0-9]+]]:gpr = VMOVRS killed [[VADDS]], 14 /* CC::al */, $noreg
; CHECK-NOBF16-NEXT: ADJCALLSTACKDOWN 0, 0, 14 /* CC::al */, $noreg, implicit-def dead $sp, implicit $sp
; CHECK-NOBF16-NEXT: $r0 = COPY [[VMOVRS]]
@@ -71,7 +71,7 @@ define bfloat @ninf_fadd(bfloat %x, bfloat %y) {
; CHECK-NOBF16-NEXT: [[VMOVSR:%[0-9]+]]:spr = VMOVSR killed [[MOVsi]], 14 /* CC::al */, $noreg
; CHECK-NOBF16-NEXT: [[MOVsi1:%[0-9]+]]:gpr = MOVsi [[COPY1]], 130, 14 /* CC::al */, $noreg, $noreg
; CHECK-NOBF16-NEXT: [[VMOVSR1:%[0-9]+]]:spr = VMOVSR killed [[MOVsi1]], 14 /* CC::al */, $noreg
- ; CHECK-NOBF16-NEXT: [[VADDS:%[0-9]+]]:spr = ninf VADDS killed [[VMOVSR1]], killed [[VMOVSR]], 14 /* CC::al */, $noreg
+ ; CHECK-NOBF16-NEXT: [[VADDS:%[0-9]+]]:spr = ninf nofpexcept VADDS killed [[VMOVSR1]], killed [[VMOVSR]], 14 /* CC::al */, $noreg, implicit $fpscr
; CHECK-NOBF16-NEXT: [[VMOVRS:%[0-9]+]]:gpr = VMOVRS killed [[VADDS]], 14 /* CC::al */, $noreg
; CHECK-NOBF16-NEXT: ADJCALLSTACKDOWN 0, 0, 14 /* CC::al */, $noreg, implicit-def dead $sp, implicit $sp
; CHECK-NOBF16-NEXT: $r0 = COPY [[VMOVRS]]
@@ -102,7 +102,7 @@ define bfloat @normal_fadd_sequence(bfloat %x, bfloat %y, bfloat %z) {
; CHECK-NOBF16-NEXT: [[VMOVSR:%[0-9]+]]:spr = VMOVSR killed [[MOVsi]], 14 /* CC::al */, $noreg
; CHECK-NOBF16-NEXT: [[MOVsi1:%[0-9]+]]:gpr = MOVsi [[COPY2]], 130, 14 /* CC::al */, $noreg, $noreg
; CHECK-NOBF16-NEXT: [[VMOVSR1:%[0-9]+]]:spr = VMOVSR killed [[MOVsi1]], 14 /* CC::al */, $noreg
- ; CHECK-NOBF16-NEXT: [[VADDS:%[0-9]+]]:spr = VADDS killed [[VMOVSR1]], killed [[VMOVSR]], 14 /* CC::al */, $noreg
+ ; CHECK-NOBF16-NEXT: [[VADDS:%[0-9]+]]:spr = nofpexcept VADDS killed [[VMOVSR1]], killed [[VMOVSR]], 14 /* CC::al */, $noreg, implicit $fpscr
; CHECK-NOBF16-NEXT: [[VMOVRS:%[0-9]+]]:gpr = VMOVRS killed [[VADDS]], 14 /* CC::al */, $noreg
; CHECK-NOBF16-NEXT: ADJCALLSTACKDOWN 0, 0, 14 /* CC::al */, $noreg, implicit-def dead $sp, implicit $sp
; CHECK-NOBF16-NEXT: $r0 = COPY [[VMOVRS]]
@@ -113,7 +113,7 @@ define bfloat @normal_fadd_sequence(bfloat %x, bfloat %y, bfloat %z) {
; CHECK-NOBF16-NEXT: [[VMOVSR2:%[0-9]+]]:spr = VMOVSR killed [[MOVsi2]], 14 /* CC::al */, $noreg
; CHECK-NOBF16-NEXT: [[MOVsi3:%[0-9]+]]:gpr = MOVsi [[COPY3]], 130, 14 /* CC::al */, $noreg, $noreg
; CHECK-NOBF16-NEXT: [[VMOVSR3:%[0-9]+]]:spr = VMOVSR killed [[MOVsi3]], 14 /* CC::al */, $noreg
- ; CHECK-NOBF16-NEXT: [[VADDS1:%[0-9]+]]:spr = VADDS killed [[VMOVSR3]], killed [[VMOVSR2]], 14 /* CC::al */, $noreg
+ ; CHECK-NOBF16-NEXT: [[VADDS1:%[0-9]+]]:spr = nofpexcept VADDS killed [[VMOVSR3]], killed [[VMOVSR2]], 14 /* CC::al */, $noreg, implicit $fpscr
; CHECK-NOBF16-NEXT: [[VMOVRS1:%[0-9]+]]:gpr = VMOVRS killed [[VADDS1]], 14 /* CC::al */, $noreg
; CHECK-NOBF16-NEXT: ADJCALLSTACKDOWN 0, 0, 14 /* CC::al */, $noreg, implicit-def dead $sp, implicit $sp
; CHECK-NOBF16-NEXT: $r0 = COPY [[VMOVRS1]]
@@ -142,10 +142,10 @@ define bfloat @nnan_ninf_contract_fadd_sequence(bfloat %x, bfloat %y, bfloat %z)
; CHECK-NOBF16-NEXT: [[VMOVSR:%[0-9]+]]:spr = VMOVSR killed [[MOVsi]], 14 /* CC::al */, $noreg
; CHECK-NOBF16-NEXT: [[MOVsi1:%[0-9]+]]:gpr = MOVsi [[COPY2]], 130, 14 /* CC::al */, $noreg, $noreg
; CHECK-NOBF16-NEXT: [[VMOVSR1:%[0-9]+]]:spr = VMOVSR killed [[MOVsi1]], 14 /* CC::al */, $noreg
- ; CHECK-NOBF16-NEXT: [[VADDS:%[0-9]+]]:spr = nnan ninf contract VADDS killed [[VMOVSR1]], killed [[VMOVSR]], 14 /* CC::al */, $noreg
+ ; CHECK-NOBF16-NEXT: [[VADDS:%[0-9]+]]:spr = nnan ninf contract nofpexcept VADDS killed [[VMOVSR1]], killed [[VMOVSR]], 14 /* CC::al */, $noreg, implicit $fpscr
; CHECK-NOBF16-NEXT: [[MOVsi2:%[0-9]+]]:gpr = MOVsi [[COPY]], 130, 14 /* CC::al */, $noreg, $noreg
; CHECK-NOBF16-NEXT: [[VMOVSR2:%[0-9]+]]:spr = VMOVSR killed [[MOVsi2]], 14 /* CC::al */, $noreg
- ; CHECK-NOBF16-NEXT: [[VADDS1:%[0-9]+]]:spr = nnan ninf contract VADDS killed [[VADDS]], killed [[VMOVSR2]], 14 /* CC::al */, $noreg
+ ; CHECK-NOBF16-NEXT: [[VADDS1:%[0-9]+]]:spr = nnan ninf contract nofpexcept VADDS killed [[VADDS]], killed [[VMOVSR2]], 14 /* CC::al */, $noreg, implicit $fpscr
; CHECK-NOBF16-NEXT: [[VMOVRS:%[0-9]+]]:gpr = VMOVRS killed [[VADDS1]], 14 /* CC::al */, $noreg
; CHECK-NOBF16-NEXT: ADJCALLSTACKDOWN 0, 0, 14 /* CC::al */, $noreg, implicit-def dead $sp, implicit $sp
; CHECK-NOBF16-NEXT: $r0 = COPY [[VMOVRS]]
@@ -174,7 +174,7 @@ define bfloat @ninf_fadd_sequence(bfloat %x, bfloat %y, bfloat %z) {
; CHECK-NOBF16-NEXT: [[VMOVSR:%[0-9]+]]:spr = VMOVSR killed [[MOVsi]], 14 /* CC::al */, $noreg
; CHECK-NOBF16-NEXT: [[MOVsi1:%[0-9]+]]:gpr = MOVsi [[COPY2]], 130, 14 /* CC::al */, $noreg, $noreg
; CHECK-NOBF16-NEXT: [[VMOVSR1:%[0-9]+]]:spr = VMOVSR killed [[MOVsi1]], 14 /* CC::al */, $noreg
- ; CHECK-NOBF16-NEXT: [[VADDS:%[0-9]+]]:spr = ninf VADDS killed [[VMOVSR1]], killed [[VMOVSR]], 14 /* CC::al */, $noreg
+ ; CHECK-NOBF16-NEXT: [[VADDS:%[0-9]+]]:spr = ninf nofpexcept VADDS killed [[VMOVSR1]], killed [[VMOVSR]], 14 /* CC::al */, $noreg, implicit $fpscr
; CHECK-NOBF16-NEXT: [[VMOVRS:%[0-9]+]]:gpr = VMOVRS killed [[VADDS]], 14 /* CC::al */, $noreg
; CHECK-NOBF16-NEXT: ADJCALLSTACKDOWN 0, 0, 14 /* CC::al */, $noreg, implicit-def dead $sp, implicit $sp
; CHECK-NOBF16-NEXT: $r0 = COPY [[VMOVRS]]
@@ -185,7 +185,7 @@ define bfloat @ninf_fadd_sequence(bfloat %x, bfloat %y, bfloat %z) {
; CHECK-NOBF16-NEXT: [[VMOVSR2:%[0-9]+]]:spr = VMOVSR killed [[MOVsi2]], 14 /* CC::al */, $noreg
; CHECK-NOBF16-NEXT: [[MOVsi3:%[0-9]+]]:gpr = MOVsi [[COPY3]], 130, 14 /* CC::al */, $noreg, $noreg
; CHECK-NOBF16-NEXT: [[VMOVSR3:%[0-9]+]]:spr = VMOVSR killed [[MOVsi3]], 14 /* CC::al */, $noreg
- ; CHECK-NOBF16-NEXT: [[VADDS1:%[0-9]+]]:spr = ninf VADDS killed [[VMOVSR3]], killed [[VMOVSR2]], 14 /* CC::al */, $noreg
+ ; CHECK-NOBF16-NEXT: [[VADDS1:%[0-9]+]]:spr = ninf nofpexcept VADDS killed [[VMOVSR3]], killed [[VMOVSR2]], 14 /* CC::al */, $noreg, implicit $fpscr
; CHECK-NOBF16-NEXT: [[VMOVRS1:%[0-9]+]]:gpr = VMOVRS killed [[VADDS1]], 14 /* CC::al */, $noreg
; CHECK-NOBF16-NEXT: ADJCALLSTACKDOWN 0, 0, 14 /* CC::al */, $noreg, implicit-def dead $sp, implicit $sp
; CHECK-NOBF16-NEXT: $r0 = COPY [[VMOVRS1]]
diff --git a/llvm/test/CodeGen/ARM/cortex-m7-wideops.mir b/llvm/test/CodeGen/ARM/cortex-m7-wideops.mir
index 1bee32f..fe23e85 100644
--- a/llvm/test/CodeGen/ARM/cortex-m7-wideops.mir
+++ b/llvm/test/CodeGen/ARM/cortex-m7-wideops.mir
@@ -22,15 +22,16 @@ body: |
; CHECK-LABEL: name: test_groups
; CHECK: liveins: $d0, $r0, $r1, $r2, $r3, $r4
- ; CHECK: renamable $d0 = VADDD killed renamable $d0, renamable $d0, 14 /* CC::al */, $noreg
- ; CHECK: renamable $r3 = t2ADDrr killed renamable $r3, renamable $r3, 14 /* CC::al */, $noreg, $noreg
- ; CHECK: renamable $s2 = VLDRS killed renamable $r0, 0, 14 /* CC::al */, $noreg
- ; CHECK: VSTRS killed renamable $s2, killed renamable $r1, 0, 14 /* CC::al */, $noreg
- ; CHECK: t2STRi12 killed renamable $r3, killed renamable $r2, 0, 14 /* CC::al */, $noreg
- ; CHECK: renamable $r4 = t2ADDrr killed renamable $r4, renamable $r4, 14 /* CC::al */, $noreg, $noreg
- ; CHECK: tBX_RET 14 /* CC::al */, $noreg, implicit killed $d0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: renamable $s2 = VLDRS killed renamable $r0, 0, 14 /* CC::al */, $noreg
+ ; CHECK-NEXT: renamable $r3 = t2ADDrr killed renamable $r3, renamable $r3, 14 /* CC::al */, $noreg, $noreg
+ ; CHECK-NEXT: renamable $d0 = VADDD killed renamable $d0, renamable $d0, 14 /* CC::al */, $noreg, implicit $fpscr_rm
+ ; CHECK-NEXT: renamable $r4 = t2ADDrr killed renamable $r4, renamable $r4, 14 /* CC::al */, $noreg, $noreg
+ ; CHECK-NEXT: VSTRS killed renamable $s2, killed renamable $r1, 0, 14 /* CC::al */, $noreg
+ ; CHECK-NEXT: t2STRi12 killed renamable $r3, killed renamable $r2, 0, 14 /* CC::al */, $noreg
+ ; CHECK-NEXT: tBX_RET 14 /* CC::al */, $noreg, implicit killed $d0
renamable $s2 = VLDRS killed renamable $r0, 0, 14 /* CC::al */, $noreg
- renamable $d0 = VADDD killed renamable $d0, renamable $d0, 14 /* CC::al */, $noreg
+ renamable $d0 = VADDD killed renamable $d0, renamable $d0, 14 /* CC::al */, $noreg, implicit $fpscr_rm
VSTRS killed renamable $s2, killed renamable $r1, 0, 14 /* CC::al */, $noreg
renamable $r3 = t2ADDrr killed renamable $r3, renamable $r3, 14 /* CC::al */, $noreg, $noreg
t2STRi12 killed renamable $r3, killed renamable $r2, 0, 14 /* CC::al */, $noreg
diff --git a/llvm/test/CodeGen/ARM/fp16-litpool-arm.mir b/llvm/test/CodeGen/ARM/fp16-litpool-arm.mir
index 8e671c9..f5b2e98 100644
--- a/llvm/test/CodeGen/ARM/fp16-litpool-arm.mir
+++ b/llvm/test/CodeGen/ARM/fp16-litpool-arm.mir
@@ -81,7 +81,7 @@ body: |
STRi12 killed renamable $r1, killed renamable $r0, 0, 14, $noreg :: (volatile store (s32) into %ir.LL, align 8)
dead renamable $r0 = SPACE 8920, undef renamable $r0
renamable $s2 = VLDRH $sp, 1, 14, $noreg :: (volatile dereferenceable load (s16) from %ir.S)
- renamable $s0 = VADDH killed renamable $s2, killed renamable $s0, 14, $noreg
+ renamable $s0 = VADDH killed renamable $s2, killed renamable $s0, 14, $noreg, implicit $fpscr_rm
VSTRH renamable $s0, $sp, 1, 14, $noreg :: (volatile store (s16) into %ir.S)
renamable $r0 = VMOVRH killed renamable $s0, 14, $noreg
dead renamable $r1 = SPACE 1350, undef renamable $r0
diff --git a/llvm/test/CodeGen/ARM/fp16-litpool-thumb.mir b/llvm/test/CodeGen/ARM/fp16-litpool-thumb.mir
index 03ddd80..4b66476 100644
--- a/llvm/test/CodeGen/ARM/fp16-litpool-thumb.mir
+++ b/llvm/test/CodeGen/ARM/fp16-litpool-thumb.mir
@@ -72,7 +72,7 @@ body: |
renamable $s2 = VLDRH $sp, 1, 14, $noreg :: (volatile dereferenceable load (s16) from %ir.S)
renamable $s0 = VLDRH %const.1, 0, 14, $noreg :: (load (s16) from constant-pool)
dead renamable $r0 = SPACE 1230, undef renamable $r0
- renamable $s0 = VADDH killed renamable $s2, killed renamable $s0, 14, $noreg
+ renamable $s0 = VADDH killed renamable $s2, killed renamable $s0, 14, $noreg, implicit $fpscr_rm
VSTRH renamable $s0, $sp, 1, 14, $noreg :: (volatile store (s16) into %ir.S)
renamable $r0 = VMOVRH killed renamable $s0, 14, $noreg
dead renamable $r1 = SPACE 1330, undef renamable $r0
diff --git a/llvm/test/CodeGen/ARM/fp16-litpool2-arm.mir b/llvm/test/CodeGen/ARM/fp16-litpool2-arm.mir
index 46f028b..c16a62a 100644
--- a/llvm/test/CodeGen/ARM/fp16-litpool2-arm.mir
+++ b/llvm/test/CodeGen/ARM/fp16-litpool2-arm.mir
@@ -89,7 +89,7 @@ body: |
$sp = frame-setup SUBri $sp, 4, 14, $noreg, $noreg
frame-setup CFI_INSTRUCTION def_cfa_offset 4
renamable $s0 = VLDRH %const.0, 0, 14, $noreg :: (load (s16) from constant-pool)
- VCMPZH renamable $s0, 14, $noreg, implicit-def $fpscr_nzcv
+ VCMPZH renamable $s0, 14, $noreg, implicit-def $fpscr_nzcv, implicit $fpscr_rm
VSTRH killed renamable $s0, $sp, 1, 14, $noreg :: (store (s16) into %ir.res)
FMSTAT 14, $noreg, implicit-def $cpsr, implicit killed $fpscr_nzcv
Bcc %bb.2, 0, killed $cpsr
diff --git a/llvm/test/CodeGen/ARM/fp16-litpool3-arm.mir b/llvm/test/CodeGen/ARM/fp16-litpool3-arm.mir
index 5a03fcd..049b7d9 100644
--- a/llvm/test/CodeGen/ARM/fp16-litpool3-arm.mir
+++ b/llvm/test/CodeGen/ARM/fp16-litpool3-arm.mir
@@ -95,7 +95,7 @@ body: |
$sp = frame-setup SUBri $sp, 4, 14, $noreg, $noreg
frame-setup CFI_INSTRUCTION def_cfa_offset 4
renamable $s0 = VLDRH %const.0, 0, 14, $noreg :: (load (s16) from constant-pool)
- VCMPZH renamable $s0, 14, $noreg, implicit-def $fpscr_nzcv
+ VCMPZH renamable $s0, 14, $noreg, implicit-def $fpscr_nzcv, implicit $fpscr_rm
VSTRH killed renamable $s0, $sp, 1, 14, $noreg :: (store (s16) into %ir.res)
FMSTAT 14, $noreg, implicit-def $cpsr, implicit killed $fpscr_nzcv
Bcc %bb.2, 0, killed $cpsr
diff --git a/llvm/test/CodeGen/ARM/fp16_fast_math.ll b/llvm/test/CodeGen/ARM/fp16_fast_math.ll
index 165eb4b..47e1f84f 100644
--- a/llvm/test/CodeGen/ARM/fp16_fast_math.ll
+++ b/llvm/test/CodeGen/ARM/fp16_fast_math.ll
@@ -16,11 +16,11 @@ define half @normal_fadd(half %x, half %y) {
; CHECK-CVT-NEXT: [[COPY1:%[0-9]+]]:gpr = COPY $r0
; CHECK-CVT-NEXT: [[COPY2:%[0-9]+]]:spr = COPY [[COPY1]]
; CHECK-CVT-NEXT: [[COPY3:%[0-9]+]]:spr = COPY [[COPY]]
- ; CHECK-CVT-NEXT: [[VCVTBHS:%[0-9]+]]:spr = VCVTBHS killed [[COPY3]], 14 /* CC::al */, $noreg
- ; CHECK-CVT-NEXT: [[VCVTBHS1:%[0-9]+]]:spr = VCVTBHS killed [[COPY2]], 14 /* CC::al */, $noreg
- ; CHECK-CVT-NEXT: [[VADDS:%[0-9]+]]:spr = VADDS killed [[VCVTBHS1]], killed [[VCVTBHS]], 14 /* CC::al */, $noreg
+ ; CHECK-CVT-NEXT: [[VCVTBHS:%[0-9]+]]:spr = nofpexcept VCVTBHS killed [[COPY3]], 14 /* CC::al */, $noreg, implicit $fpscr
+ ; CHECK-CVT-NEXT: [[VCVTBHS1:%[0-9]+]]:spr = nofpexcept VCVTBHS killed [[COPY2]], 14 /* CC::al */, $noreg, implicit $fpscr
+ ; CHECK-CVT-NEXT: [[VADDS:%[0-9]+]]:spr = nofpexcept VADDS killed [[VCVTBHS1]], killed [[VCVTBHS]], 14 /* CC::al */, $noreg, implicit $fpscr
; CHECK-CVT-NEXT: [[DEF:%[0-9]+]]:spr = IMPLICIT_DEF
- ; CHECK-CVT-NEXT: [[VCVTBSH:%[0-9]+]]:spr = VCVTBSH [[DEF]], killed [[VADDS]], 14 /* CC::al */, $noreg
+ ; CHECK-CVT-NEXT: [[VCVTBSH:%[0-9]+]]:spr = nofpexcept VCVTBSH [[DEF]], killed [[VADDS]], 14 /* CC::al */, $noreg, implicit $fpscr
; CHECK-CVT-NEXT: [[COPY4:%[0-9]+]]:gpr = COPY killed [[VCVTBSH]]
; CHECK-CVT-NEXT: $r0 = COPY [[COPY4]]
; CHECK-CVT-NEXT: MOVPCLR 14 /* CC::al */, $noreg, implicit $r0
@@ -33,7 +33,7 @@ define half @normal_fadd(half %x, half %y) {
; CHECK-FP16-NEXT: [[COPY1:%[0-9]+]]:rgpr = COPY $r0
; CHECK-FP16-NEXT: [[VMOVHR:%[0-9]+]]:hpr = VMOVHR [[COPY]], 14, $noreg
; CHECK-FP16-NEXT: [[VMOVHR1:%[0-9]+]]:hpr = VMOVHR [[COPY1]], 14, $noreg
- ; CHECK-FP16-NEXT: [[VADDH:%[0-9]+]]:hpr = VADDH killed [[VMOVHR1]], killed [[VMOVHR]], 14, $noreg
+ ; CHECK-FP16-NEXT: [[VADDH:%[0-9]+]]:hpr = nofpexcept VADDH killed [[VMOVHR1]], killed [[VMOVHR]], 14, $noreg, implicit $fpscr
; CHECK-FP16-NEXT: $r0 = COPY [[VADDH]]
; CHECK-FP16-NEXT: MOVPCLR 14 /* CC::al */, $noreg, implicit $r0
entry:
@@ -50,11 +50,11 @@ define half @fast_fadd(half %x, half %y) {
; CHECK-CVT-NEXT: [[COPY1:%[0-9]+]]:gpr = COPY $r0
; CHECK-CVT-NEXT: [[COPY2:%[0-9]+]]:spr = COPY [[COPY1]]
; CHECK-CVT-NEXT: [[COPY3:%[0-9]+]]:spr = COPY [[COPY]]
- ; CHECK-CVT-NEXT: [[VCVTBHS:%[0-9]+]]:spr = nnan ninf nsz arcp contract afn reassoc VCVTBHS killed [[COPY3]], 14 /* CC::al */, $noreg
- ; CHECK-CVT-NEXT: [[VCVTBHS1:%[0-9]+]]:spr = nnan ninf nsz arcp contract afn reassoc VCVTBHS killed [[COPY2]], 14 /* CC::al */, $noreg
- ; CHECK-CVT-NEXT: [[VADDS:%[0-9]+]]:spr = nnan ninf nsz arcp contract afn reassoc VADDS killed [[VCVTBHS1]], killed [[VCVTBHS]], 14 /* CC::al */, $noreg
+ ; CHECK-CVT-NEXT: [[VCVTBHS:%[0-9]+]]:spr = nnan ninf nsz arcp contract afn reassoc nofpexcept VCVTBHS killed [[COPY3]], 14 /* CC::al */, $noreg, implicit $fpscr
+ ; CHECK-CVT-NEXT: [[VCVTBHS1:%[0-9]+]]:spr = nnan ninf nsz arcp contract afn reassoc nofpexcept VCVTBHS killed [[COPY2]], 14 /* CC::al */, $noreg, implicit $fpscr
+ ; CHECK-CVT-NEXT: [[VADDS:%[0-9]+]]:spr = nnan ninf nsz arcp contract afn reassoc nofpexcept VADDS killed [[VCVTBHS1]], killed [[VCVTBHS]], 14 /* CC::al */, $noreg, implicit $fpscr
; CHECK-CVT-NEXT: [[DEF:%[0-9]+]]:spr = IMPLICIT_DEF
- ; CHECK-CVT-NEXT: [[VCVTBSH:%[0-9]+]]:spr = VCVTBSH [[DEF]], killed [[VADDS]], 14 /* CC::al */, $noreg
+ ; CHECK-CVT-NEXT: [[VCVTBSH:%[0-9]+]]:spr = nofpexcept VCVTBSH [[DEF]], killed [[VADDS]], 14 /* CC::al */, $noreg, implicit $fpscr
; CHECK-CVT-NEXT: [[COPY4:%[0-9]+]]:gpr = COPY killed [[VCVTBSH]]
; CHECK-CVT-NEXT: $r0 = COPY [[COPY4]]
; CHECK-CVT-NEXT: MOVPCLR 14 /* CC::al */, $noreg, implicit $r0
@@ -67,7 +67,7 @@ define half @fast_fadd(half %x, half %y) {
; CHECK-FP16-NEXT: [[COPY1:%[0-9]+]]:rgpr = COPY $r0
; CHECK-FP16-NEXT: [[VMOVHR:%[0-9]+]]:hpr = VMOVHR [[COPY]], 14, $noreg
; CHECK-FP16-NEXT: [[VMOVHR1:%[0-9]+]]:hpr = VMOVHR [[COPY1]], 14, $noreg
- ; CHECK-FP16-NEXT: [[VADDH:%[0-9]+]]:hpr = nnan ninf nsz arcp contract afn reassoc VADDH killed [[VMOVHR1]], killed [[VMOVHR]], 14, $noreg
+ ; CHECK-FP16-NEXT: [[VADDH:%[0-9]+]]:hpr = nnan ninf nsz arcp contract afn reassoc nofpexcept VADDH killed [[VMOVHR1]], killed [[VMOVHR]], 14, $noreg, implicit $fpscr
; CHECK-FP16-NEXT: $r0 = COPY [[VADDH]]
; CHECK-FP16-NEXT: MOVPCLR 14 /* CC::al */, $noreg, implicit $r0
entry:
@@ -84,11 +84,11 @@ define half @ninf_fadd(half %x, half %y) {
; CHECK-CVT-NEXT: [[COPY1:%[0-9]+]]:gpr = COPY $r0
; CHECK-CVT-NEXT: [[COPY2:%[0-9]+]]:spr = COPY [[COPY1]]
; CHECK-CVT-NEXT: [[COPY3:%[0-9]+]]:spr = COPY [[COPY]]
- ; CHECK-CVT-NEXT: [[VCVTBHS:%[0-9]+]]:spr = ninf VCVTBHS killed [[COPY3]], 14 /* CC::al */, $noreg
- ; CHECK-CVT-NEXT: [[VCVTBHS1:%[0-9]+]]:spr = ninf VCVTBHS killed [[COPY2]], 14 /* CC::al */, $noreg
- ; CHECK-CVT-NEXT: [[VADDS:%[0-9]+]]:spr = ninf VADDS killed [[VCVTBHS1]], killed [[VCVTBHS]], 14 /* CC::al */, $noreg
+ ; CHECK-CVT-NEXT: [[VCVTBHS:%[0-9]+]]:spr = ninf nofpexcept VCVTBHS killed [[COPY3]], 14 /* CC::al */, $noreg, implicit $fpscr
+ ; CHECK-CVT-NEXT: [[VCVTBHS1:%[0-9]+]]:spr = ninf nofpexcept VCVTBHS killed [[COPY2]], 14 /* CC::al */, $noreg, implicit $fpscr
+ ; CHECK-CVT-NEXT: [[VADDS:%[0-9]+]]:spr = ninf nofpexcept VADDS killed [[VCVTBHS1]], killed [[VCVTBHS]], 14 /* CC::al */, $noreg, implicit $fpscr
; CHECK-CVT-NEXT: [[DEF:%[0-9]+]]:spr = IMPLICIT_DEF
- ; CHECK-CVT-NEXT: [[VCVTBSH:%[0-9]+]]:spr = VCVTBSH [[DEF]], killed [[VADDS]], 14 /* CC::al */, $noreg
+ ; CHECK-CVT-NEXT: [[VCVTBSH:%[0-9]+]]:spr = nofpexcept VCVTBSH [[DEF]], killed [[VADDS]], 14 /* CC::al */, $noreg, implicit $fpscr
; CHECK-CVT-NEXT: [[COPY4:%[0-9]+]]:gpr = COPY killed [[VCVTBSH]]
; CHECK-CVT-NEXT: $r0 = COPY [[COPY4]]
; CHECK-CVT-NEXT: MOVPCLR 14 /* CC::al */, $noreg, implicit $r0
@@ -101,7 +101,7 @@ define half @ninf_fadd(half %x, half %y) {
; CHECK-FP16-NEXT: [[COPY1:%[0-9]+]]:rgpr = COPY $r0
; CHECK-FP16-NEXT: [[VMOVHR:%[0-9]+]]:hpr = VMOVHR [[COPY]], 14, $noreg
; CHECK-FP16-NEXT: [[VMOVHR1:%[0-9]+]]:hpr = VMOVHR [[COPY1]], 14, $noreg
- ; CHECK-FP16-NEXT: [[VADDH:%[0-9]+]]:hpr = ninf VADDH killed [[VMOVHR1]], killed [[VMOVHR]], 14, $noreg
+ ; CHECK-FP16-NEXT: [[VADDH:%[0-9]+]]:hpr = ninf nofpexcept VADDH killed [[VMOVHR1]], killed [[VMOVHR]], 14, $noreg, implicit $fpscr
; CHECK-FP16-NEXT: $r0 = COPY [[VADDH]]
; CHECK-FP16-NEXT: MOVPCLR 14 /* CC::al */, $noreg, implicit $r0
entry:
@@ -122,19 +122,19 @@ define half @normal_fadd_sequence(half %x, half %y, half %z) {
; CHECK-CVT-NEXT: [[COPY2:%[0-9]+]]:gpr = COPY $r0
; CHECK-CVT-NEXT: [[COPY3:%[0-9]+]]:spr = COPY [[COPY2]]
; CHECK-CVT-NEXT: [[COPY4:%[0-9]+]]:spr = COPY [[COPY1]]
- ; CHECK-CVT-NEXT: [[VCVTBHS:%[0-9]+]]:spr = VCVTBHS killed [[COPY4]], 14 /* CC::al */, $noreg
- ; CHECK-CVT-NEXT: [[VCVTBHS1:%[0-9]+]]:spr = VCVTBHS killed [[COPY3]], 14 /* CC::al */, $noreg
- ; CHECK-CVT-NEXT: [[VADDS:%[0-9]+]]:spr = VADDS killed [[VCVTBHS1]], killed [[VCVTBHS]], 14 /* CC::al */, $noreg
+ ; CHECK-CVT-NEXT: [[VCVTBHS:%[0-9]+]]:spr = nofpexcept VCVTBHS killed [[COPY4]], 14 /* CC::al */, $noreg, implicit $fpscr
+ ; CHECK-CVT-NEXT: [[VCVTBHS1:%[0-9]+]]:spr = nofpexcept VCVTBHS killed [[COPY3]], 14 /* CC::al */, $noreg, implicit $fpscr
+ ; CHECK-CVT-NEXT: [[VADDS:%[0-9]+]]:spr = nofpexcept VADDS killed [[VCVTBHS1]], killed [[VCVTBHS]], 14 /* CC::al */, $noreg, implicit $fpscr
; CHECK-CVT-NEXT: [[COPY5:%[0-9]+]]:spr = COPY [[COPY]]
- ; CHECK-CVT-NEXT: [[VCVTBHS2:%[0-9]+]]:spr = VCVTBHS killed [[COPY5]], 14 /* CC::al */, $noreg
+ ; CHECK-CVT-NEXT: [[VCVTBHS2:%[0-9]+]]:spr = nofpexcept VCVTBHS killed [[COPY5]], 14 /* CC::al */, $noreg, implicit $fpscr
; CHECK-CVT-NEXT: [[DEF:%[0-9]+]]:spr = IMPLICIT_DEF
- ; CHECK-CVT-NEXT: [[VCVTBSH:%[0-9]+]]:spr = VCVTBSH [[DEF]], killed [[VADDS]], 14 /* CC::al */, $noreg
+ ; CHECK-CVT-NEXT: [[VCVTBSH:%[0-9]+]]:spr = nofpexcept VCVTBSH [[DEF]], killed [[VADDS]], 14 /* CC::al */, $noreg, implicit $fpscr
; CHECK-CVT-NEXT: [[COPY6:%[0-9]+]]:gpr = COPY killed [[VCVTBSH]]
; CHECK-CVT-NEXT: [[COPY7:%[0-9]+]]:spr = COPY killed [[COPY6]]
- ; CHECK-CVT-NEXT: [[VCVTBHS3:%[0-9]+]]:spr = VCVTBHS killed [[COPY7]], 14 /* CC::al */, $noreg
- ; CHECK-CVT-NEXT: [[VADDS1:%[0-9]+]]:spr = VADDS killed [[VCVTBHS3]], killed [[VCVTBHS2]], 14 /* CC::al */, $noreg
+ ; CHECK-CVT-NEXT: [[VCVTBHS3:%[0-9]+]]:spr = nofpexcept VCVTBHS killed [[COPY7]], 14 /* CC::al */, $noreg, implicit $fpscr
+ ; CHECK-CVT-NEXT: [[VADDS1:%[0-9]+]]:spr = nofpexcept VADDS killed [[VCVTBHS3]], killed [[VCVTBHS2]], 14 /* CC::al */, $noreg, implicit $fpscr
; CHECK-CVT-NEXT: [[DEF1:%[0-9]+]]:spr = IMPLICIT_DEF
- ; CHECK-CVT-NEXT: [[VCVTBSH1:%[0-9]+]]:spr = VCVTBSH [[DEF1]], killed [[VADDS1]], 14 /* CC::al */, $noreg
+ ; CHECK-CVT-NEXT: [[VCVTBSH1:%[0-9]+]]:spr = nofpexcept VCVTBSH [[DEF1]], killed [[VADDS1]], 14 /* CC::al */, $noreg, implicit $fpscr
; CHECK-CVT-NEXT: [[COPY8:%[0-9]+]]:gpr = COPY killed [[VCVTBSH1]]
; CHECK-CVT-NEXT: $r0 = COPY [[COPY8]]
; CHECK-CVT-NEXT: MOVPCLR 14 /* CC::al */, $noreg, implicit $r0
@@ -148,9 +148,9 @@ define half @normal_fadd_sequence(half %x, half %y, half %z) {
; CHECK-FP16-NEXT: [[COPY2:%[0-9]+]]:rgpr = COPY $r0
; CHECK-FP16-NEXT: [[VMOVHR:%[0-9]+]]:hpr = VMOVHR [[COPY1]], 14, $noreg
; CHECK-FP16-NEXT: [[VMOVHR1:%[0-9]+]]:hpr = VMOVHR [[COPY2]], 14, $noreg
- ; CHECK-FP16-NEXT: [[VADDH:%[0-9]+]]:hpr = VADDH killed [[VMOVHR1]], killed [[VMOVHR]], 14, $noreg
+ ; CHECK-FP16-NEXT: [[VADDH:%[0-9]+]]:hpr = nofpexcept VADDH killed [[VMOVHR1]], killed [[VMOVHR]], 14, $noreg, implicit $fpscr
; CHECK-FP16-NEXT: [[VMOVHR2:%[0-9]+]]:hpr = VMOVHR [[COPY]], 14, $noreg
- ; CHECK-FP16-NEXT: [[VADDH1:%[0-9]+]]:hpr = VADDH killed [[VADDH]], killed [[VMOVHR2]], 14, $noreg
+ ; CHECK-FP16-NEXT: [[VADDH1:%[0-9]+]]:hpr = nofpexcept VADDH killed [[VADDH]], killed [[VMOVHR2]], 14, $noreg, implicit $fpscr
; CHECK-FP16-NEXT: $r0 = COPY [[VADDH1]]
; CHECK-FP16-NEXT: MOVPCLR 14 /* CC::al */, $noreg, implicit $r0
entry:
@@ -169,14 +169,14 @@ define half @nnan_ninf_contract_fadd_sequence(half %x, half %y, half %z) {
; CHECK-CVT-NEXT: [[COPY2:%[0-9]+]]:gpr = COPY $r0
; CHECK-CVT-NEXT: [[COPY3:%[0-9]+]]:spr = COPY [[COPY2]]
; CHECK-CVT-NEXT: [[COPY4:%[0-9]+]]:spr = COPY [[COPY1]]
- ; CHECK-CVT-NEXT: [[VCVTBHS:%[0-9]+]]:spr = nnan ninf contract VCVTBHS killed [[COPY4]], 14 /* CC::al */, $noreg
- ; CHECK-CVT-NEXT: [[VCVTBHS1:%[0-9]+]]:spr = nnan ninf contract VCVTBHS killed [[COPY3]], 14 /* CC::al */, $noreg
- ; CHECK-CVT-NEXT: [[VADDS:%[0-9]+]]:spr = nnan ninf contract VADDS killed [[VCVTBHS1]], killed [[VCVTBHS]], 14 /* CC::al */, $noreg
+ ; CHECK-CVT-NEXT: [[VCVTBHS:%[0-9]+]]:spr = nnan ninf contract nofpexcept VCVTBHS killed [[COPY4]], 14 /* CC::al */, $noreg, implicit $fpscr
+ ; CHECK-CVT-NEXT: [[VCVTBHS1:%[0-9]+]]:spr = nnan ninf contract nofpexcept VCVTBHS killed [[COPY3]], 14 /* CC::al */, $noreg, implicit $fpscr
+ ; CHECK-CVT-NEXT: [[VADDS:%[0-9]+]]:spr = nnan ninf contract nofpexcept VADDS killed [[VCVTBHS1]], killed [[VCVTBHS]], 14 /* CC::al */, $noreg, implicit $fpscr
; CHECK-CVT-NEXT: [[COPY5:%[0-9]+]]:spr = COPY [[COPY]]
- ; CHECK-CVT-NEXT: [[VCVTBHS2:%[0-9]+]]:spr = nnan ninf contract VCVTBHS killed [[COPY5]], 14 /* CC::al */, $noreg
- ; CHECK-CVT-NEXT: [[VADDS1:%[0-9]+]]:spr = nnan ninf contract VADDS killed [[VADDS]], killed [[VCVTBHS2]], 14 /* CC::al */, $noreg
+ ; CHECK-CVT-NEXT: [[VCVTBHS2:%[0-9]+]]:spr = nnan ninf contract nofpexcept VCVTBHS killed [[COPY5]], 14 /* CC::al */, $noreg, implicit $fpscr
+ ; CHECK-CVT-NEXT: [[VADDS1:%[0-9]+]]:spr = nnan ninf contract nofpexcept VADDS killed [[VADDS]], killed [[VCVTBHS2]], 14 /* CC::al */, $noreg, implicit $fpscr
; CHECK-CVT-NEXT: [[DEF:%[0-9]+]]:spr = IMPLICIT_DEF
- ; CHECK-CVT-NEXT: [[VCVTBSH:%[0-9]+]]:spr = VCVTBSH [[DEF]], killed [[VADDS1]], 14 /* CC::al */, $noreg
+ ; CHECK-CVT-NEXT: [[VCVTBSH:%[0-9]+]]:spr = nofpexcept VCVTBSH [[DEF]], killed [[VADDS1]], 14 /* CC::al */, $noreg, implicit $fpscr
; CHECK-CVT-NEXT: [[COPY6:%[0-9]+]]:gpr = COPY killed [[VCVTBSH]]
; CHECK-CVT-NEXT: $r0 = COPY [[COPY6]]
; CHECK-CVT-NEXT: MOVPCLR 14 /* CC::al */, $noreg, implicit $r0
@@ -190,9 +190,9 @@ define half @nnan_ninf_contract_fadd_sequence(half %x, half %y, half %z) {
; CHECK-FP16-NEXT: [[COPY2:%[0-9]+]]:rgpr = COPY $r0
; CHECK-FP16-NEXT: [[VMOVHR:%[0-9]+]]:hpr = VMOVHR [[COPY1]], 14, $noreg
; CHECK-FP16-NEXT: [[VMOVHR1:%[0-9]+]]:hpr = VMOVHR [[COPY2]], 14, $noreg
- ; CHECK-FP16-NEXT: [[VADDH:%[0-9]+]]:hpr = nnan ninf contract VADDH killed [[VMOVHR1]], killed [[VMOVHR]], 14, $noreg
+ ; CHECK-FP16-NEXT: [[VADDH:%[0-9]+]]:hpr = nnan ninf contract nofpexcept VADDH killed [[VMOVHR1]], killed [[VMOVHR]], 14, $noreg, implicit $fpscr
; CHECK-FP16-NEXT: [[VMOVHR2:%[0-9]+]]:hpr = VMOVHR [[COPY]], 14, $noreg
- ; CHECK-FP16-NEXT: [[VADDH1:%[0-9]+]]:hpr = nnan ninf contract VADDH killed [[VADDH]], killed [[VMOVHR2]], 14, $noreg
+ ; CHECK-FP16-NEXT: [[VADDH1:%[0-9]+]]:hpr = nnan ninf contract nofpexcept VADDH killed [[VADDH]], killed [[VMOVHR2]], 14, $noreg, implicit $fpscr
; CHECK-FP16-NEXT: $r0 = COPY [[VADDH1]]
; CHECK-FP16-NEXT: MOVPCLR 14 /* CC::al */, $noreg, implicit $r0
entry:
@@ -211,19 +211,19 @@ define half @ninf_fadd_sequence(half %x, half %y, half %z) {
; CHECK-CVT-NEXT: [[COPY2:%[0-9]+]]:gpr = COPY $r0
; CHECK-CVT-NEXT: [[COPY3:%[0-9]+]]:spr = COPY [[COPY2]]
; CHECK-CVT-NEXT: [[COPY4:%[0-9]+]]:spr = COPY [[COPY1]]
- ; CHECK-CVT-NEXT: [[VCVTBHS:%[0-9]+]]:spr = ninf VCVTBHS killed [[COPY4]], 14 /* CC::al */, $noreg
- ; CHECK-CVT-NEXT: [[VCVTBHS1:%[0-9]+]]:spr = ninf VCVTBHS killed [[COPY3]], 14 /* CC::al */, $noreg
- ; CHECK-CVT-NEXT: [[VADDS:%[0-9]+]]:spr = ninf VADDS killed [[VCVTBHS1]], killed [[VCVTBHS]], 14 /* CC::al */, $noreg
+ ; CHECK-CVT-NEXT: [[VCVTBHS:%[0-9]+]]:spr = ninf nofpexcept VCVTBHS killed [[COPY4]], 14 /* CC::al */, $noreg, implicit $fpscr
+ ; CHECK-CVT-NEXT: [[VCVTBHS1:%[0-9]+]]:spr = ninf nofpexcept VCVTBHS killed [[COPY3]], 14 /* CC::al */, $noreg, implicit $fpscr
+ ; CHECK-CVT-NEXT: [[VADDS:%[0-9]+]]:spr = ninf nofpexcept VADDS killed [[VCVTBHS1]], killed [[VCVTBHS]], 14 /* CC::al */, $noreg, implicit $fpscr
; CHECK-CVT-NEXT: [[COPY5:%[0-9]+]]:spr = COPY [[COPY]]
- ; CHECK-CVT-NEXT: [[VCVTBHS2:%[0-9]+]]:spr = ninf VCVTBHS killed [[COPY5]], 14 /* CC::al */, $noreg
+ ; CHECK-CVT-NEXT: [[VCVTBHS2:%[0-9]+]]:spr = ninf nofpexcept VCVTBHS killed [[COPY5]], 14 /* CC::al */, $noreg, implicit $fpscr
; CHECK-CVT-NEXT: [[DEF:%[0-9]+]]:spr = IMPLICIT_DEF
- ; CHECK-CVT-NEXT: [[VCVTBSH:%[0-9]+]]:spr = VCVTBSH [[DEF]], killed [[VADDS]], 14 /* CC::al */, $noreg
+ ; CHECK-CVT-NEXT: [[VCVTBSH:%[0-9]+]]:spr = nofpexcept VCVTBSH [[DEF]], killed [[VADDS]], 14 /* CC::al */, $noreg, implicit $fpscr
; CHECK-CVT-NEXT: [[COPY6:%[0-9]+]]:gpr = COPY killed [[VCVTBSH]]
; CHECK-CVT-NEXT: [[COPY7:%[0-9]+]]:spr = COPY killed [[COPY6]]
- ; CHECK-CVT-NEXT: [[VCVTBHS3:%[0-9]+]]:spr = ninf VCVTBHS killed [[COPY7]], 14 /* CC::al */, $noreg
- ; CHECK-CVT-NEXT: [[VADDS1:%[0-9]+]]:spr = ninf VADDS killed [[VCVTBHS3]], killed [[VCVTBHS2]], 14 /* CC::al */, $noreg
+ ; CHECK-CVT-NEXT: [[VCVTBHS3:%[0-9]+]]:spr = ninf nofpexcept VCVTBHS killed [[COPY7]], 14 /* CC::al */, $noreg, implicit $fpscr
+ ; CHECK-CVT-NEXT: [[VADDS1:%[0-9]+]]:spr = ninf nofpexcept VADDS killed [[VCVTBHS3]], killed [[VCVTBHS2]], 14 /* CC::al */, $noreg, implicit $fpscr
; CHECK-CVT-NEXT: [[DEF1:%[0-9]+]]:spr = IMPLICIT_DEF
- ; CHECK-CVT-NEXT: [[VCVTBSH1:%[0-9]+]]:spr = VCVTBSH [[DEF1]], killed [[VADDS1]], 14 /* CC::al */, $noreg
+ ; CHECK-CVT-NEXT: [[VCVTBSH1:%[0-9]+]]:spr = nofpexcept VCVTBSH [[DEF1]], killed [[VADDS1]], 14 /* CC::al */, $noreg, implicit $fpscr
; CHECK-CVT-NEXT: [[COPY8:%[0-9]+]]:gpr = COPY killed [[VCVTBSH1]]
; CHECK-CVT-NEXT: $r0 = COPY [[COPY8]]
; CHECK-CVT-NEXT: MOVPCLR 14 /* CC::al */, $noreg, implicit $r0
@@ -237,9 +237,9 @@ define half @ninf_fadd_sequence(half %x, half %y, half %z) {
; CHECK-FP16-NEXT: [[COPY2:%[0-9]+]]:rgpr = COPY $r0
; CHECK-FP16-NEXT: [[VMOVHR:%[0-9]+]]:hpr = VMOVHR [[COPY1]], 14, $noreg
; CHECK-FP16-NEXT: [[VMOVHR1:%[0-9]+]]:hpr = VMOVHR [[COPY2]], 14, $noreg
- ; CHECK-FP16-NEXT: [[VADDH:%[0-9]+]]:hpr = ninf VADDH killed [[VMOVHR1]], killed [[VMOVHR]], 14, $noreg
+ ; CHECK-FP16-NEXT: [[VADDH:%[0-9]+]]:hpr = ninf nofpexcept VADDH killed [[VMOVHR1]], killed [[VMOVHR]], 14, $noreg, implicit $fpscr
; CHECK-FP16-NEXT: [[VMOVHR2:%[0-9]+]]:hpr = VMOVHR [[COPY]], 14, $noreg
- ; CHECK-FP16-NEXT: [[VADDH1:%[0-9]+]]:hpr = ninf VADDH killed [[VADDH]], killed [[VMOVHR2]], 14, $noreg
+ ; CHECK-FP16-NEXT: [[VADDH1:%[0-9]+]]:hpr = ninf nofpexcept VADDH killed [[VADDH]], killed [[VMOVHR2]], 14, $noreg, implicit $fpscr
; CHECK-FP16-NEXT: $r0 = COPY [[VADDH1]]
; CHECK-FP16-NEXT: MOVPCLR 14 /* CC::al */, $noreg, implicit $r0
entry:
diff --git a/llvm/test/CodeGen/ARM/ipra-reg-usage.ll b/llvm/test/CodeGen/ARM/ipra-reg-usage.ll
index c928390..90142cb 100644
--- a/llvm/test/CodeGen/ARM/ipra-reg-usage.ll
+++ b/llvm/test/CodeGen/ARM/ipra-reg-usage.ll
@@ -6,7 +6,7 @@ target triple = "armv7-eabi"
declare void @bar1()
define void @foo()#0 {
-; CHECK: foo Clobbered Registers: $apsr $apsr_nzcv $cpsr $fpcxtns $fpcxts $fpexc $fpinst $fpscr $fpscr_nzcv $fpscr_nzcvqc $fpsid $itstate $pc $ra_auth_code $sp $spsr $vpr $zr $d0 $d1 $d2 $d3 $d4 $d5 $d6 $d7 $d16 $d17 $d18 $d19 $d20 $d21 $d22 $d23 $d24 $d25 $d26 $d27 $d28 $d29 $d30 $d31 $fpinst2 $mvfr0 $mvfr1 $mvfr2 $p0 $q0 $q1 $q2 $q3 $q8 $q9 $q10 $q11 $q12 $q13 $q14 $q15 $r0 $r1 $r2 $r3 $r12 $s0 $s1 $s2 $s3 $s4 $s5 $s6 $s7 $s8 $s9 $s10 $s11 $s12 $s13 $s14 $s15 $d0_d2 $d1_d3 $d2_d4 $d3_d5 $d4_d6 $d5_d7 $d6_d8 $d7_d9 $d14_d16 $d15_d17 $d16_d18 $d17_d19 $d18_d20 $d19_d21 $d20_d22 $d21_d23 $d22_d24 $d23_d25 $d24_d26 $d25_d27 $d26_d28 $d27_d29 $d28_d30 $d29_d31 $q0_q1 $q1_q2 $q2_q3 $q3_q4 $q7_q8 $q8_q9 $q9_q10 $q10_q11 $q11_q12 $q12_q13 $q13_q14 $q14_q15 $q0_q1_q2_q3 $q1_q2_q3_q4 $q2_q3_q4_q5 $q3_q4_q5_q6 $q5_q6_q7_q8 $q6_q7_q8_q9 $q7_q8_q9_q10 $q8_q9_q10_q11 $q9_q10_q11_q12 $q10_q11_q12_q13 $q11_q12_q13_q14 $q12_q13_q14_q15 $r0_r1 $r2_r3 $r12_sp $d0_d1_d2 $d1_d2_d3 $d2_d3_d4 $d3_d4_d5 $d4_d5_d6 $d5_d6_d7 $d6_d7_d8 $d7_d8_d9 $d14_d15_d16 $d15_d16_d17 $d16_d17_d18 $d17_d18_d19 $d18_d19_d20 $d19_d20_d21 $d20_d21_d22 $d21_d22_d23 $d22_d23_d24 $d23_d24_d25 $d24_d25_d26 $d25_d26_d27 $d26_d27_d28 $d27_d28_d29 $d28_d29_d30 $d29_d30_d31 $d0_d2_d4 $d1_d3_d5 $d2_d4_d6 $d3_d5_d7 $d4_d6_d8 $d5_d7_d9 $d6_d8_d10 $d7_d9_d11 $d12_d14_d16 $d13_d15_d17 $d14_d16_d18 $d15_d17_d19 $d16_d18_d20 $d17_d19_d21 $d18_d20_d22 $d19_d21_d23 $d20_d22_d24 $d21_d23_d25 $d22_d24_d26 $d23_d25_d27 $d24_d26_d28 $d25_d27_d29 $d26_d28_d30 $d27_d29_d31 $d0_d2_d4_d6 $d1_d3_d5_d7 $d2_d4_d6_d8 $d3_d5_d7_d9 $d4_d6_d8_d10 $d5_d7_d9_d11 $d6_d8_d10_d12 $d7_d9_d11_d13 $d10_d12_d14_d16 $d11_d13_d15_d17 $d12_d14_d16_d18 $d13_d15_d17_d19 $d14_d16_d18_d20 $d15_d17_d19_d21 $d16_d18_d20_d22 $d17_d19_d21_d23 $d18_d20_d22_d24 $d19_d21_d23_d25 $d20_d22_d24_d26 $d21_d23_d25_d27 $d22_d24_d26_d28 $d23_d25_d27_d29 $d24_d26_d28_d30 $d25_d27_d29_d31 $d1_d2 $d3_d4 $d5_d6 $d7_d8 $d15_d16 $d17_d18 $d19_d20 $d21_d22 $d23_d24 $d25_d26 $d27_d28 $d29_d30 $d1_d2_d3_d4 $d3_d4_d5_d6 $d5_d6_d7_d8 $d7_d8_d9_d10 $d13_d14_d15_d16 $d15_d16_d17_d18 $d17_d18_d19_d20 $d19_d20_d21_d22 $d21_d22_d23_d24 $d23_d24_d25_d26 $d25_d26_d27_d28 $d27_d28_d29_d30
+; CHECK: foo Clobbered Registers: $apsr $apsr_nzcv $cpsr $fpcxtns $fpcxts $fpexc $fpinst $fpscr $fpscr_nzcv $fpscr_nzcvqc $fpscr_rm $fpsid $itstate $pc $ra_auth_code $sp $spsr $vpr $zr $d0 $d1 $d2 $d3 $d4 $d5 $d6 $d7 $d16 $d17 $d18 $d19 $d20 $d21 $d22 $d23 $d24 $d25 $d26 $d27 $d28 $d29 $d30 $d31 $fpinst2 $mvfr0 $mvfr1 $mvfr2 $p0 $q0 $q1 $q2 $q3 $q8 $q9 $q10 $q11 $q12 $q13 $q14 $q15 $r0 $r1 $r2 $r3 $r12 $s0 $s1 $s2 $s3 $s4 $s5 $s6 $s7 $s8 $s9 $s10 $s11 $s12 $s13 $s14 $s15 $d0_d2 $d1_d3 $d2_d4 $d3_d5 $d4_d6 $d5_d7 $d6_d8 $d7_d9 $d14_d16 $d15_d17 $d16_d18 $d17_d19 $d18_d20 $d19_d21 $d20_d22 $d21_d23 $d22_d24 $d23_d25 $d24_d26 $d25_d27 $d26_d28 $d27_d29 $d28_d30 $d29_d31 $q0_q1 $q1_q2 $q2_q3 $q3_q4 $q7_q8 $q8_q9 $q9_q10 $q10_q11 $q11_q12 $q12_q13 $q13_q14 $q14_q15 $q0_q1_q2_q3 $q1_q2_q3_q4 $q2_q3_q4_q5 $q3_q4_q5_q6 $q5_q6_q7_q8 $q6_q7_q8_q9 $q7_q8_q9_q10 $q8_q9_q10_q11 $q9_q10_q11_q12 $q10_q11_q12_q13 $q11_q12_q13_q14 $q12_q13_q14_q15 $r0_r1 $r2_r3 $r12_sp $d0_d1_d2 $d1_d2_d3 $d2_d3_d4 $d3_d4_d5 $d4_d5_d6 $d5_d6_d7 $d6_d7_d8 $d7_d8_d9 $d14_d15_d16 $d15_d16_d17 $d16_d17_d18 $d17_d18_d19 $d18_d19_d20 $d19_d20_d21 $d20_d21_d22 $d21_d22_d23 $d22_d23_d24 $d23_d24_d25 $d24_d25_d26 $d25_d26_d27 $d26_d27_d28 $d27_d28_d29 $d28_d29_d30 $d29_d30_d31 $d0_d2_d4 $d1_d3_d5 $d2_d4_d6 $d3_d5_d7 $d4_d6_d8 $d5_d7_d9 $d6_d8_d10 $d7_d9_d11 $d12_d14_d16 $d13_d15_d17 $d14_d16_d18 $d15_d17_d19 $d16_d18_d20 $d17_d19_d21 $d18_d20_d22 $d19_d21_d23 $d20_d22_d24 $d21_d23_d25 $d22_d24_d26 $d23_d25_d27 $d24_d26_d28 $d25_d27_d29 $d26_d28_d30 $d27_d29_d31 $d0_d2_d4_d6 $d1_d3_d5_d7 $d2_d4_d6_d8 $d3_d5_d7_d9 $d4_d6_d8_d10 $d5_d7_d9_d11 $d6_d8_d10_d12 $d7_d9_d11_d13 $d10_d12_d14_d16 $d11_d13_d15_d17 $d12_d14_d16_d18 $d13_d15_d17_d19 $d14_d16_d18_d20 $d15_d17_d19_d21 $d16_d18_d20_d22 $d17_d19_d21_d23 $d18_d20_d22_d24 $d19_d21_d23_d25 $d20_d22_d24_d26 $d21_d23_d25_d27 $d22_d24_d26_d28 $d23_d25_d27_d29 $d24_d26_d28_d30 $d25_d27_d29_d31 $d1_d2 $d3_d4 $d5_d6 $d7_d8 $d15_d16 $d17_d18 $d19_d20 $d21_d22 $d23_d24 $d25_d26 $d27_d28 $d29_d30 $d1_d2_d3_d4 $d3_d4_d5_d6 $d5_d6_d7_d8 $d7_d8_d9_d10 $d13_d14_d15_d16 $d15_d16_d17_d18 $d17_d18_d19_d20 $d19_d20_d21_d22 $d21_d22_d23_d24 $d23_d24_d25_d26 $d25_d26_d27_d28 $d27_d28_d29_d30
call void @bar1()
call void @bar2()
ret void
diff --git a/llvm/test/CodeGen/ARM/llrint-conv.ll b/llvm/test/CodeGen/ARM/llrint-conv.ll
index a1a04db..7274a8b 100644
--- a/llvm/test/CodeGen/ARM/llrint-conv.ll
+++ b/llvm/test/CodeGen/ARM/llrint-conv.ll
@@ -1,7 +1,8 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc < %s -mtriple=armv7-none-eabi -float-abi=soft | FileCheck %s --check-prefixes=CHECK,CHECK-SOFT
; RUN: llc < %s -mtriple=armv7-none-eabihf -mattr=+vfp2 -float-abi=hard | FileCheck %s --check-prefixes=CHECK,CHECK-NOFP16
-; RUN: llc < %s -mtriple=armv7-none-eabihf -mattr=+vfp2,+fullfp16 -float-abi=hard | FileCheck %s --check-prefixes=CHECK,CHECK-FP16
+; RUN: llc < %s -mtriple=armv8-none-eabihf -mattr=+fp-armv8 -float-abi=hard | FileCheck %s --check-prefixes=CHECK,CHECK-FPv8
+; RUN: llc < %s -mtriple=armv8-none-eabihf -mattr=+fp-armv8,+fullfp16 -float-abi=hard | FileCheck %s --check-prefixes=CHECK,CHECK-FP16
define i64 @testmsxh_builtin(half %x) {
; CHECK-SOFT-LABEL: testmsxh_builtin:
@@ -22,6 +23,14 @@ define i64 @testmsxh_builtin(half %x) {
; CHECK-NOFP16-NEXT: bl llrintf
; CHECK-NOFP16-NEXT: pop {r11, pc}
;
+; CHECK-FPv8-LABEL: testmsxh_builtin:
+; CHECK-FPv8: @ %bb.0: @ %entry
+; CHECK-FPv8-NEXT: .save {r11, lr}
+; CHECK-FPv8-NEXT: push {r11, lr}
+; CHECK-FPv8-NEXT: vcvtb.f32.f16 s0, s0
+; CHECK-FPv8-NEXT: bl llrintf
+; CHECK-FPv8-NEXT: pop {r11, pc}
+;
; CHECK-FP16-LABEL: testmsxh_builtin:
; CHECK-FP16: @ %bb.0: @ %entry
; CHECK-FP16-NEXT: .save {r11, lr}
diff --git a/llvm/test/CodeGen/ARM/lrint-conv.ll b/llvm/test/CodeGen/ARM/lrint-conv.ll
index 23a2685..2de2349 100644
--- a/llvm/test/CodeGen/ARM/lrint-conv.ll
+++ b/llvm/test/CodeGen/ARM/lrint-conv.ll
@@ -1,14 +1,43 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc < %s -mtriple=armv7-none-eabi -float-abi=soft | FileCheck %s --check-prefixes=CHECK,CHECK-SOFT
; RUN: llc < %s -mtriple=armv7-none-eabihf -mattr=+vfp2 -float-abi=hard | FileCheck %s --check-prefixes=CHECK,CHECK-NOFP16
-; RUN: llc < %s -mtriple=armv7-none-eabihf -mattr=+vfp2,+fullfp16 -float-abi=hard | FileCheck %s --check-prefixes=CHECK,CHECK-FP16
+; RUN: llc < %s -mtriple=armv8-none-eabihf -mattr=+fp-armv8 -float-abi=hard | FileCheck %s --check-prefixes=CHECK,CHECK-FPv8
+; RUN: llc < %s -mtriple=armv8-none-eabihf -mattr=+fp-armv8,+fullfp16 -float-abi=hard | FileCheck %s --check-prefixes=CHECK,CHECK-FP16
-; FIXME: crash
-; define i32 @testmswh_builtin(half %x) {
-; entry:
-; %0 = tail call i32 @llvm.lrint.i32.f16(half %x)
-; ret i32 %0
-; }
+define i32 @testmswh_builtin(half %x) {
+; CHECK-SOFT-LABEL: testmswh_builtin:
+; CHECK-SOFT: @ %bb.0: @ %entry
+; CHECK-SOFT-NEXT: .save {r11, lr}
+; CHECK-SOFT-NEXT: push {r11, lr}
+; CHECK-SOFT-NEXT: bl __aeabi_h2f
+; CHECK-SOFT-NEXT: pop {r11, lr}
+; CHECK-SOFT-NEXT: b lrintf
+;
+; CHECK-NOFP16-LABEL: testmswh_builtin:
+; CHECK-NOFP16: @ %bb.0: @ %entry
+; CHECK-NOFP16-NEXT: .save {r11, lr}
+; CHECK-NOFP16-NEXT: push {r11, lr}
+; CHECK-NOFP16-NEXT: vmov r0, s0
+; CHECK-NOFP16-NEXT: bl __aeabi_h2f
+; CHECK-NOFP16-NEXT: vmov s0, r0
+; CHECK-NOFP16-NEXT: pop {r11, lr}
+; CHECK-NOFP16-NEXT: b lrintf
+;
+; CHECK-FPv8-LABEL: testmswh_builtin:
+; CHECK-FPv8: @ %bb.0: @ %entry
+; CHECK-FPv8-NEXT: vcvtb.f32.f16 s0, s0
+; CHECK-FPv8-NEXT: b lrintf
+;
+; CHECK-FP16-LABEL: testmswh_builtin:
+; CHECK-FP16: @ %bb.0: @ %entry
+; CHECK-FP16-NEXT: vrintx.f16 s0, s0
+; CHECK-FP16-NEXT: vcvt.s32.f16 s0, s0
+; CHECK-FP16-NEXT: vmov r0, s0
+; CHECK-FP16-NEXT: bx lr
+entry:
+ %0 = tail call i32 @llvm.lrint.i32.f16(half %x)
+ ret i32 %0
+}
define i32 @testmsws_builtin(float %x) {
; CHECK-LABEL: testmsws_builtin:
@@ -39,8 +68,3 @@ entry:
%0 = tail call i32 @llvm.lrint.i32.f128(fp128 %x)
ret i32 %0
}
-
-;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
-; CHECK-FP16: {{.*}}
-; CHECK-NOFP16: {{.*}}
-; CHECK-SOFT: {{.*}}
diff --git a/llvm/test/CodeGen/ARM/misched-prevent-erase-history-of-subunits.mir b/llvm/test/CodeGen/ARM/misched-prevent-erase-history-of-subunits.mir
index 46f3e4b..17d6619 100644
--- a/llvm/test/CodeGen/ARM/misched-prevent-erase-history-of-subunits.mir
+++ b/llvm/test/CodeGen/ARM/misched-prevent-erase-history-of-subunits.mir
@@ -14,7 +14,7 @@
# CHECK: SU(1): %1:dpr = VABSD %0:dpr, 14, $noreg
# CHECK: SU(2): %2:dpr = VLDRD %const.0, 0, 14, $noreg :: (load (s64) from constant-pool)
# CHECK: SU(4): %3:rgpr = t2MOVi 0, 14, $noreg, $noreg
-# CHECK: SU(3): VCMPD %1:dpr, %2:dpr, 14, $noreg, implicit-def $fpscr_nzcv
+# CHECK: SU(3): VCMPD %1:dpr, %2:dpr, 14, $noreg, implicit-def $fpscr_nzcv, implicit $fpscr_rm
# CHECK: SU(5): $r0 = COPY %3:rgpr
---
name: test
@@ -29,7 +29,7 @@ body: |
%0:dpr = COPY $d0
%1:dpr = VABSD %0, 14 /* CC::al */, $noreg
%2:dpr = VLDRD %const.0, 0, 14 /* CC::al */, $noreg :: (load (s64) from constant-pool)
- VCMPD %1, %2, 14 /* CC::al */, $noreg, implicit-def $fpscr_nzcv
+ VCMPD %1, %2, 14 /* CC::al */, $noreg, implicit-def $fpscr_nzcv, implicit $fpscr_rm
%4:rgpr = t2MOVi 0, 14 /* CC::al */, $noreg, $noreg
$r0 = COPY %4
tBX_RET 14 /* CC::al */, $noreg, implicit killed $r0
diff --git a/llvm/test/CodeGen/ARM/vector-lrint.ll b/llvm/test/CodeGen/ARM/vector-lrint.ll
index c1159da..c3c8884 100644
--- a/llvm/test/CodeGen/ARM/vector-lrint.ll
+++ b/llvm/test/CodeGen/ARM/vector-lrint.ll
@@ -9,31 +9,1290 @@
; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=armebv7-unknown-none-eabihf -mattr=+neon | FileCheck %s --check-prefixes=BE-I32
; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=armebv7-unknown-none-eabihf -mattr=+neon | FileCheck %s --check-prefixes=BE-I64
-; FIXME: crash "Do not know how to soft promote this operator's operand!"
-; define <1 x iXLen> @lrint_v1f16(<1 x half> %x) {
-; %a = call <1 x iXLen> @llvm.lrint.v1iXLen.v1f16(<1 x half> %x)
-; ret <1 x iXLen> %a
-; }
+define <1 x iXLen> @lrint_v1f16(<1 x half> %x) {
+; LE-I32-LABEL: lrint_v1f16:
+; LE-I32: @ %bb.0:
+; LE-I32-NEXT: .save {r11, lr}
+; LE-I32-NEXT: push {r11, lr}
+; LE-I32-NEXT: vmov r0, s0
+; LE-I32-NEXT: bl __aeabi_f2h
+; LE-I32-NEXT: bl __aeabi_h2f
+; LE-I32-NEXT: vmov s0, r0
+; LE-I32-NEXT: bl lrintf
+; LE-I32-NEXT: pop {r11, pc}
+;
+; LE-I64-LABEL: lrint_v1f16:
+; LE-I64: @ %bb.0:
+; LE-I64-NEXT: .save {r11, lr}
+; LE-I64-NEXT: push {r11, lr}
+; LE-I64-NEXT: vmov r0, s0
+; LE-I64-NEXT: bl __aeabi_f2h
+; LE-I64-NEXT: bl __aeabi_h2f
+; LE-I64-NEXT: vmov s0, r0
+; LE-I64-NEXT: bl lrintf
+; LE-I64-NEXT: vmov.32 d0[0], r0
+; LE-I64-NEXT: vmov.32 d0[1], r1
+; LE-I64-NEXT: pop {r11, pc}
+;
+; BE-I32-LABEL: lrint_v1f16:
+; BE-I32: @ %bb.0:
+; BE-I32-NEXT: .save {r11, lr}
+; BE-I32-NEXT: push {r11, lr}
+; BE-I32-NEXT: vmov r0, s0
+; BE-I32-NEXT: bl __aeabi_f2h
+; BE-I32-NEXT: bl __aeabi_h2f
+; BE-I32-NEXT: vmov s0, r0
+; BE-I32-NEXT: bl lrintf
+; BE-I32-NEXT: pop {r11, pc}
+;
+; BE-I64-LABEL: lrint_v1f16:
+; BE-I64: @ %bb.0:
+; BE-I64-NEXT: .save {r11, lr}
+; BE-I64-NEXT: push {r11, lr}
+; BE-I64-NEXT: vmov r0, s0
+; BE-I64-NEXT: bl __aeabi_f2h
+; BE-I64-NEXT: bl __aeabi_h2f
+; BE-I64-NEXT: vmov s0, r0
+; BE-I64-NEXT: bl lrintf
+; BE-I64-NEXT: vmov.32 d16[0], r0
+; BE-I64-NEXT: vmov.32 d16[1], r1
+; BE-I64-NEXT: vrev64.32 d0, d16
+; BE-I64-NEXT: pop {r11, pc}
+ %a = call <1 x iXLen> @llvm.lrint.v1iXLen.v1f16(<1 x half> %x)
+ ret <1 x iXLen> %a
+}
-; define <2 x iXLen> @lrint_v2f16(<2 x half> %x) {
-; %a = call <2 x iXLen> @llvm.lrint.v2iXLen.v2f16(<2 x half> %x)
-; ret <2 x iXLen> %a
-; }
+define <2 x iXLen> @lrint_v2f16(<2 x half> %x) {
+; LE-I32-LABEL: lrint_v2f16:
+; LE-I32: @ %bb.0:
+; LE-I32-NEXT: .save {r11, lr}
+; LE-I32-NEXT: push {r11, lr}
+; LE-I32-NEXT: .vsave {d8}
+; LE-I32-NEXT: vpush {d8}
+; LE-I32-NEXT: vmov r0, s0
+; LE-I32-NEXT: vmov.f32 s16, s1
+; LE-I32-NEXT: bl __aeabi_h2f
+; LE-I32-NEXT: vmov s0, r0
+; LE-I32-NEXT: bl lrintf
+; LE-I32-NEXT: vmov r1, s16
+; LE-I32-NEXT: vmov.32 d8[0], r0
+; LE-I32-NEXT: mov r0, r1
+; LE-I32-NEXT: bl __aeabi_h2f
+; LE-I32-NEXT: vmov s0, r0
+; LE-I32-NEXT: bl lrintf
+; LE-I32-NEXT: vmov.32 d8[1], r0
+; LE-I32-NEXT: vorr d0, d8, d8
+; LE-I32-NEXT: vpop {d8}
+; LE-I32-NEXT: pop {r11, pc}
+;
+; LE-I64-LABEL: lrint_v2f16:
+; LE-I64: @ %bb.0:
+; LE-I64-NEXT: .save {r4, r5, r11, lr}
+; LE-I64-NEXT: push {r4, r5, r11, lr}
+; LE-I64-NEXT: .vsave {d8, d9}
+; LE-I64-NEXT: vpush {d8, d9}
+; LE-I64-NEXT: vmov r0, s1
+; LE-I64-NEXT: vmov.f32 s16, s0
+; LE-I64-NEXT: bl __aeabi_h2f
+; LE-I64-NEXT: vmov s0, r0
+; LE-I64-NEXT: bl lrintf
+; LE-I64-NEXT: mov r4, r0
+; LE-I64-NEXT: vmov r0, s16
+; LE-I64-NEXT: mov r5, r1
+; LE-I64-NEXT: bl __aeabi_h2f
+; LE-I64-NEXT: vmov s0, r0
+; LE-I64-NEXT: vmov.32 d9[0], r4
+; LE-I64-NEXT: bl lrintf
+; LE-I64-NEXT: vmov.32 d8[0], r0
+; LE-I64-NEXT: vmov.32 d9[1], r5
+; LE-I64-NEXT: vmov.32 d8[1], r1
+; LE-I64-NEXT: vorr q0, q4, q4
+; LE-I64-NEXT: vpop {d8, d9}
+; LE-I64-NEXT: pop {r4, r5, r11, pc}
+;
+; BE-I32-LABEL: lrint_v2f16:
+; BE-I32: @ %bb.0:
+; BE-I32-NEXT: .save {r11, lr}
+; BE-I32-NEXT: push {r11, lr}
+; BE-I32-NEXT: .vsave {d8}
+; BE-I32-NEXT: vpush {d8}
+; BE-I32-NEXT: vmov r0, s0
+; BE-I32-NEXT: vmov.f32 s16, s1
+; BE-I32-NEXT: bl __aeabi_h2f
+; BE-I32-NEXT: vmov s0, r0
+; BE-I32-NEXT: bl lrintf
+; BE-I32-NEXT: vmov r1, s16
+; BE-I32-NEXT: vmov.32 d8[0], r0
+; BE-I32-NEXT: mov r0, r1
+; BE-I32-NEXT: bl __aeabi_h2f
+; BE-I32-NEXT: vmov s0, r0
+; BE-I32-NEXT: bl lrintf
+; BE-I32-NEXT: vmov.32 d8[1], r0
+; BE-I32-NEXT: vrev64.32 d0, d8
+; BE-I32-NEXT: vpop {d8}
+; BE-I32-NEXT: pop {r11, pc}
+;
+; BE-I64-LABEL: lrint_v2f16:
+; BE-I64: @ %bb.0:
+; BE-I64-NEXT: .save {r4, r5, r11, lr}
+; BE-I64-NEXT: push {r4, r5, r11, lr}
+; BE-I64-NEXT: .vsave {d8}
+; BE-I64-NEXT: vpush {d8}
+; BE-I64-NEXT: vmov r0, s1
+; BE-I64-NEXT: vmov.f32 s16, s0
+; BE-I64-NEXT: bl __aeabi_h2f
+; BE-I64-NEXT: vmov s0, r0
+; BE-I64-NEXT: bl lrintf
+; BE-I64-NEXT: mov r4, r0
+; BE-I64-NEXT: vmov r0, s16
+; BE-I64-NEXT: mov r5, r1
+; BE-I64-NEXT: bl __aeabi_h2f
+; BE-I64-NEXT: vmov s0, r0
+; BE-I64-NEXT: vmov.32 d8[0], r4
+; BE-I64-NEXT: bl lrintf
+; BE-I64-NEXT: vmov.32 d16[0], r0
+; BE-I64-NEXT: vmov.32 d8[1], r5
+; BE-I64-NEXT: vmov.32 d16[1], r1
+; BE-I64-NEXT: vrev64.32 d1, d8
+; BE-I64-NEXT: vrev64.32 d0, d16
+; BE-I64-NEXT: vpop {d8}
+; BE-I64-NEXT: pop {r4, r5, r11, pc}
+ %a = call <2 x iXLen> @llvm.lrint.v2iXLen.v2f16(<2 x half> %x)
+ ret <2 x iXLen> %a
+}
-; define <4 x iXLen> @lrint_v4f16(<4 x half> %x) {
-; %a = call <4 x iXLen> @llvm.lrint.v4iXLen.v4f16(<4 x half> %x)
-; ret <4 x iXLen> %a
-; }
+define <4 x iXLen> @lrint_v4f16(<4 x half> %x) {
+; LE-I32-LABEL: lrint_v4f16:
+; LE-I32: @ %bb.0:
+; LE-I32-NEXT: .save {r4, r5, r11, lr}
+; LE-I32-NEXT: push {r4, r5, r11, lr}
+; LE-I32-NEXT: .vsave {d8, d9, d10, d11}
+; LE-I32-NEXT: vpush {d8, d9, d10, d11}
+; LE-I32-NEXT: vmov r0, s3
+; LE-I32-NEXT: vmov.f32 s16, s2
+; LE-I32-NEXT: vmov.f32 s18, s1
+; LE-I32-NEXT: vmov.f32 s20, s0
+; LE-I32-NEXT: bl __aeabi_h2f
+; LE-I32-NEXT: vmov s0, r0
+; LE-I32-NEXT: bl lrintf
+; LE-I32-NEXT: mov r4, r0
+; LE-I32-NEXT: vmov r0, s16
+; LE-I32-NEXT: bl __aeabi_h2f
+; LE-I32-NEXT: mov r5, r0
+; LE-I32-NEXT: vmov r0, s20
+; LE-I32-NEXT: bl __aeabi_h2f
+; LE-I32-NEXT: vmov s0, r0
+; LE-I32-NEXT: bl lrintf
+; LE-I32-NEXT: vmov s0, r5
+; LE-I32-NEXT: vmov.32 d10[0], r0
+; LE-I32-NEXT: bl lrintf
+; LE-I32-NEXT: vmov.32 d11[0], r0
+; LE-I32-NEXT: vmov r0, s18
+; LE-I32-NEXT: bl __aeabi_h2f
+; LE-I32-NEXT: vmov s0, r0
+; LE-I32-NEXT: vmov.32 d11[1], r4
+; LE-I32-NEXT: bl lrintf
+; LE-I32-NEXT: vmov.32 d10[1], r0
+; LE-I32-NEXT: vorr q0, q5, q5
+; LE-I32-NEXT: vpop {d8, d9, d10, d11}
+; LE-I32-NEXT: pop {r4, r5, r11, pc}
+;
+; LE-I64-LABEL: lrint_v4f16:
+; LE-I64: @ %bb.0:
+; LE-I64-NEXT: .save {r4, r5, r6, r7, r11, lr}
+; LE-I64-NEXT: push {r4, r5, r6, r7, r11, lr}
+; LE-I64-NEXT: .vsave {d12, d13}
+; LE-I64-NEXT: vpush {d12, d13}
+; LE-I64-NEXT: .vsave {d8, d9, d10}
+; LE-I64-NEXT: vpush {d8, d9, d10}
+; LE-I64-NEXT: vmov r0, s1
+; LE-I64-NEXT: vmov.f32 s16, s3
+; LE-I64-NEXT: vmov.f32 s20, s2
+; LE-I64-NEXT: vmov.f32 s18, s0
+; LE-I64-NEXT: bl __aeabi_h2f
+; LE-I64-NEXT: vmov s0, r0
+; LE-I64-NEXT: bl lrintf
+; LE-I64-NEXT: mov r5, r0
+; LE-I64-NEXT: vmov r0, s18
+; LE-I64-NEXT: mov r4, r1
+; LE-I64-NEXT: bl __aeabi_h2f
+; LE-I64-NEXT: mov r7, r0
+; LE-I64-NEXT: vmov r0, s16
+; LE-I64-NEXT: bl __aeabi_h2f
+; LE-I64-NEXT: vmov s0, r0
+; LE-I64-NEXT: bl lrintf
+; LE-I64-NEXT: vmov s0, r7
+; LE-I64-NEXT: mov r6, r1
+; LE-I64-NEXT: vmov.32 d9[0], r0
+; LE-I64-NEXT: bl lrintf
+; LE-I64-NEXT: vmov.32 d12[0], r0
+; LE-I64-NEXT: vmov r0, s20
+; LE-I64-NEXT: mov r7, r1
+; LE-I64-NEXT: bl __aeabi_h2f
+; LE-I64-NEXT: vmov s0, r0
+; LE-I64-NEXT: vmov.32 d13[0], r5
+; LE-I64-NEXT: bl lrintf
+; LE-I64-NEXT: vmov.32 d8[0], r0
+; LE-I64-NEXT: vmov.32 d13[1], r4
+; LE-I64-NEXT: vmov.32 d9[1], r6
+; LE-I64-NEXT: vmov.32 d12[1], r7
+; LE-I64-NEXT: vmov.32 d8[1], r1
+; LE-I64-NEXT: vorr q0, q6, q6
+; LE-I64-NEXT: vorr q1, q4, q4
+; LE-I64-NEXT: vpop {d8, d9, d10}
+; LE-I64-NEXT: vpop {d12, d13}
+; LE-I64-NEXT: pop {r4, r5, r6, r7, r11, pc}
+;
+; BE-I32-LABEL: lrint_v4f16:
+; BE-I32: @ %bb.0:
+; BE-I32-NEXT: .save {r4, r5, r11, lr}
+; BE-I32-NEXT: push {r4, r5, r11, lr}
+; BE-I32-NEXT: .vsave {d8, d9, d10, d11}
+; BE-I32-NEXT: vpush {d8, d9, d10, d11}
+; BE-I32-NEXT: vmov r0, s3
+; BE-I32-NEXT: vmov.f32 s16, s2
+; BE-I32-NEXT: vmov.f32 s18, s1
+; BE-I32-NEXT: vmov.f32 s20, s0
+; BE-I32-NEXT: bl __aeabi_h2f
+; BE-I32-NEXT: vmov s0, r0
+; BE-I32-NEXT: bl lrintf
+; BE-I32-NEXT: mov r4, r0
+; BE-I32-NEXT: vmov r0, s16
+; BE-I32-NEXT: bl __aeabi_h2f
+; BE-I32-NEXT: mov r5, r0
+; BE-I32-NEXT: vmov r0, s20
+; BE-I32-NEXT: bl __aeabi_h2f
+; BE-I32-NEXT: vmov s0, r0
+; BE-I32-NEXT: bl lrintf
+; BE-I32-NEXT: vmov s0, r5
+; BE-I32-NEXT: vmov.32 d10[0], r0
+; BE-I32-NEXT: bl lrintf
+; BE-I32-NEXT: vmov.32 d11[0], r0
+; BE-I32-NEXT: vmov r0, s18
+; BE-I32-NEXT: bl __aeabi_h2f
+; BE-I32-NEXT: vmov s0, r0
+; BE-I32-NEXT: vmov.32 d11[1], r4
+; BE-I32-NEXT: bl lrintf
+; BE-I32-NEXT: vmov.32 d10[1], r0
+; BE-I32-NEXT: vrev64.32 q0, q5
+; BE-I32-NEXT: vpop {d8, d9, d10, d11}
+; BE-I32-NEXT: pop {r4, r5, r11, pc}
+;
+; BE-I64-LABEL: lrint_v4f16:
+; BE-I64: @ %bb.0:
+; BE-I64-NEXT: .save {r4, r5, r6, r7, r11, lr}
+; BE-I64-NEXT: push {r4, r5, r6, r7, r11, lr}
+; BE-I64-NEXT: .vsave {d8, d9, d10}
+; BE-I64-NEXT: vpush {d8, d9, d10}
+; BE-I64-NEXT: vmov r0, s1
+; BE-I64-NEXT: vmov.f32 s16, s3
+; BE-I64-NEXT: vmov.f32 s18, s2
+; BE-I64-NEXT: vmov.f32 s20, s0
+; BE-I64-NEXT: bl __aeabi_h2f
+; BE-I64-NEXT: vmov s0, r0
+; BE-I64-NEXT: bl lrintf
+; BE-I64-NEXT: mov r5, r0
+; BE-I64-NEXT: vmov r0, s20
+; BE-I64-NEXT: mov r4, r1
+; BE-I64-NEXT: bl __aeabi_h2f
+; BE-I64-NEXT: mov r7, r0
+; BE-I64-NEXT: vmov r0, s16
+; BE-I64-NEXT: bl __aeabi_h2f
+; BE-I64-NEXT: vmov s0, r0
+; BE-I64-NEXT: bl lrintf
+; BE-I64-NEXT: vmov s0, r7
+; BE-I64-NEXT: mov r6, r1
+; BE-I64-NEXT: vmov.32 d8[0], r0
+; BE-I64-NEXT: bl lrintf
+; BE-I64-NEXT: vmov.32 d10[0], r0
+; BE-I64-NEXT: vmov r0, s18
+; BE-I64-NEXT: mov r7, r1
+; BE-I64-NEXT: bl __aeabi_h2f
+; BE-I64-NEXT: vmov s0, r0
+; BE-I64-NEXT: vmov.32 d9[0], r5
+; BE-I64-NEXT: bl lrintf
+; BE-I64-NEXT: vmov.32 d16[0], r0
+; BE-I64-NEXT: vmov.32 d9[1], r4
+; BE-I64-NEXT: vmov.32 d8[1], r6
+; BE-I64-NEXT: vmov.32 d10[1], r7
+; BE-I64-NEXT: vmov.32 d16[1], r1
+; BE-I64-NEXT: vrev64.32 d1, d9
+; BE-I64-NEXT: vrev64.32 d3, d8
+; BE-I64-NEXT: vrev64.32 d0, d10
+; BE-I64-NEXT: vrev64.32 d2, d16
+; BE-I64-NEXT: vpop {d8, d9, d10}
+; BE-I64-NEXT: pop {r4, r5, r6, r7, r11, pc}
+ %a = call <4 x iXLen> @llvm.lrint.v4iXLen.v4f16(<4 x half> %x)
+ ret <4 x iXLen> %a
+}
-; define <8 x iXLen> @lrint_v8f16(<8 x half> %x) {
-; %a = call <8 x iXLen> @llvm.lrint.v8iXLen.v8f16(<8 x half> %x)
-; ret <8 x iXLen> %a
-; }
+define <8 x iXLen> @lrint_v8f16(<8 x half> %x) {
+; LE-I32-LABEL: lrint_v8f16:
+; LE-I32: @ %bb.0:
+; LE-I32-NEXT: .save {r4, r5, r6, r7, r8, r9, r11, lr}
+; LE-I32-NEXT: push {r4, r5, r6, r7, r8, r9, r11, lr}
+; LE-I32-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14}
+; LE-I32-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14}
+; LE-I32-NEXT: vmov r0, s7
+; LE-I32-NEXT: vmov.f32 s18, s6
+; LE-I32-NEXT: vmov.f32 s16, s5
+; LE-I32-NEXT: vmov.f32 s20, s4
+; LE-I32-NEXT: vmov.f32 s22, s3
+; LE-I32-NEXT: vmov.f32 s24, s2
+; LE-I32-NEXT: vmov.f32 s26, s1
+; LE-I32-NEXT: vmov.f32 s28, s0
+; LE-I32-NEXT: bl __aeabi_h2f
+; LE-I32-NEXT: vmov s0, r0
+; LE-I32-NEXT: bl lrintf
+; LE-I32-NEXT: mov r8, r0
+; LE-I32-NEXT: vmov r0, s26
+; LE-I32-NEXT: bl __aeabi_h2f
+; LE-I32-NEXT: mov r9, r0
+; LE-I32-NEXT: vmov r0, s22
+; LE-I32-NEXT: bl __aeabi_h2f
+; LE-I32-NEXT: mov r6, r0
+; LE-I32-NEXT: vmov r0, s28
+; LE-I32-NEXT: bl __aeabi_h2f
+; LE-I32-NEXT: mov r7, r0
+; LE-I32-NEXT: vmov r0, s24
+; LE-I32-NEXT: bl __aeabi_h2f
+; LE-I32-NEXT: mov r4, r0
+; LE-I32-NEXT: vmov r0, s18
+; LE-I32-NEXT: bl __aeabi_h2f
+; LE-I32-NEXT: mov r5, r0
+; LE-I32-NEXT: vmov r0, s20
+; LE-I32-NEXT: bl __aeabi_h2f
+; LE-I32-NEXT: vmov s0, r0
+; LE-I32-NEXT: bl lrintf
+; LE-I32-NEXT: vmov s0, r5
+; LE-I32-NEXT: vmov.32 d10[0], r0
+; LE-I32-NEXT: bl lrintf
+; LE-I32-NEXT: vmov s0, r4
+; LE-I32-NEXT: vmov.32 d11[0], r0
+; LE-I32-NEXT: bl lrintf
+; LE-I32-NEXT: vmov s0, r7
+; LE-I32-NEXT: vmov.32 d13[0], r0
+; LE-I32-NEXT: bl lrintf
+; LE-I32-NEXT: vmov s0, r6
+; LE-I32-NEXT: vmov.32 d12[0], r0
+; LE-I32-NEXT: bl lrintf
+; LE-I32-NEXT: vmov s0, r9
+; LE-I32-NEXT: vmov.32 d13[1], r0
+; LE-I32-NEXT: bl lrintf
+; LE-I32-NEXT: vmov.32 d12[1], r0
+; LE-I32-NEXT: vmov r0, s16
+; LE-I32-NEXT: bl __aeabi_h2f
+; LE-I32-NEXT: vmov s0, r0
+; LE-I32-NEXT: vmov.32 d11[1], r8
+; LE-I32-NEXT: bl lrintf
+; LE-I32-NEXT: vmov.32 d10[1], r0
+; LE-I32-NEXT: vorr q0, q6, q6
+; LE-I32-NEXT: vorr q1, q5, q5
+; LE-I32-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14}
+; LE-I32-NEXT: pop {r4, r5, r6, r7, r8, r9, r11, pc}
+;
+; LE-I64-LABEL: lrint_v8f16:
+; LE-I64: @ %bb.0:
+; LE-I64-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; LE-I64-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; LE-I64-NEXT: .pad #4
+; LE-I64-NEXT: sub sp, sp, #4
+; LE-I64-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-I64-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-I64-NEXT: .pad #8
+; LE-I64-NEXT: sub sp, sp, #8
+; LE-I64-NEXT: vmov r0, s1
+; LE-I64-NEXT: vstr s6, [sp, #4] @ 4-byte Spill
+; LE-I64-NEXT: vmov.f32 s16, s7
+; LE-I64-NEXT: vmov.f32 s18, s5
+; LE-I64-NEXT: vmov.f32 s20, s4
+; LE-I64-NEXT: vmov.f32 s22, s3
+; LE-I64-NEXT: vmov.f32 s24, s2
+; LE-I64-NEXT: vmov.f32 s26, s0
+; LE-I64-NEXT: bl __aeabi_h2f
+; LE-I64-NEXT: vmov s0, r0
+; LE-I64-NEXT: bl lrintf
+; LE-I64-NEXT: mov r9, r0
+; LE-I64-NEXT: vmov r0, s26
+; LE-I64-NEXT: str r1, [sp] @ 4-byte Spill
+; LE-I64-NEXT: bl __aeabi_h2f
+; LE-I64-NEXT: mov r10, r0
+; LE-I64-NEXT: vmov r0, s22
+; LE-I64-NEXT: bl __aeabi_h2f
+; LE-I64-NEXT: mov r5, r0
+; LE-I64-NEXT: vmov r0, s24
+; LE-I64-NEXT: bl __aeabi_h2f
+; LE-I64-NEXT: mov r7, r0
+; LE-I64-NEXT: vmov r0, s18
+; LE-I64-NEXT: bl __aeabi_h2f
+; LE-I64-NEXT: mov r6, r0
+; LE-I64-NEXT: vmov r0, s20
+; LE-I64-NEXT: bl __aeabi_h2f
+; LE-I64-NEXT: mov r4, r0
+; LE-I64-NEXT: vmov r0, s16
+; LE-I64-NEXT: bl __aeabi_h2f
+; LE-I64-NEXT: vmov s0, r0
+; LE-I64-NEXT: bl lrintf
+; LE-I64-NEXT: vmov s0, r4
+; LE-I64-NEXT: mov r11, r1
+; LE-I64-NEXT: vmov.32 d11[0], r0
+; LE-I64-NEXT: bl lrintf
+; LE-I64-NEXT: vmov s0, r6
+; LE-I64-NEXT: mov r8, r1
+; LE-I64-NEXT: vmov.32 d12[0], r0
+; LE-I64-NEXT: bl lrintf
+; LE-I64-NEXT: vmov s0, r7
+; LE-I64-NEXT: mov r6, r1
+; LE-I64-NEXT: vmov.32 d13[0], r0
+; LE-I64-NEXT: bl lrintf
+; LE-I64-NEXT: vmov s0, r5
+; LE-I64-NEXT: mov r7, r1
+; LE-I64-NEXT: vmov.32 d14[0], r0
+; LE-I64-NEXT: bl lrintf
+; LE-I64-NEXT: vmov s0, r10
+; LE-I64-NEXT: mov r5, r1
+; LE-I64-NEXT: vmov.32 d15[0], r0
+; LE-I64-NEXT: bl lrintf
+; LE-I64-NEXT: vldr s0, [sp, #4] @ 4-byte Reload
+; LE-I64-NEXT: mov r4, r1
+; LE-I64-NEXT: vmov.32 d8[0], r0
+; LE-I64-NEXT: vmov r0, s0
+; LE-I64-NEXT: bl __aeabi_h2f
+; LE-I64-NEXT: vmov s0, r0
+; LE-I64-NEXT: vmov.32 d9[0], r9
+; LE-I64-NEXT: bl lrintf
+; LE-I64-NEXT: vmov.32 d10[0], r0
+; LE-I64-NEXT: ldr r0, [sp] @ 4-byte Reload
+; LE-I64-NEXT: vmov.32 d15[1], r5
+; LE-I64-NEXT: vmov.32 d9[1], r0
+; LE-I64-NEXT: vmov.32 d13[1], r6
+; LE-I64-NEXT: vmov.32 d11[1], r11
+; LE-I64-NEXT: vmov.32 d8[1], r4
+; LE-I64-NEXT: vmov.32 d14[1], r7
+; LE-I64-NEXT: vorr q0, q4, q4
+; LE-I64-NEXT: vmov.32 d12[1], r8
+; LE-I64-NEXT: vorr q1, q7, q7
+; LE-I64-NEXT: vmov.32 d10[1], r1
+; LE-I64-NEXT: vorr q2, q6, q6
+; LE-I64-NEXT: vorr q3, q5, q5
+; LE-I64-NEXT: add sp, sp, #8
+; LE-I64-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-I64-NEXT: add sp, sp, #4
+; LE-I64-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
+;
+; BE-I32-LABEL: lrint_v8f16:
+; BE-I32: @ %bb.0:
+; BE-I32-NEXT: .save {r4, r5, r6, r7, r8, r9, r11, lr}
+; BE-I32-NEXT: push {r4, r5, r6, r7, r8, r9, r11, lr}
+; BE-I32-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14}
+; BE-I32-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14}
+; BE-I32-NEXT: vmov r0, s1
+; BE-I32-NEXT: vmov.f32 s18, s7
+; BE-I32-NEXT: vmov.f32 s20, s6
+; BE-I32-NEXT: vmov.f32 s16, s5
+; BE-I32-NEXT: vmov.f32 s22, s4
+; BE-I32-NEXT: vmov.f32 s24, s3
+; BE-I32-NEXT: vmov.f32 s26, s2
+; BE-I32-NEXT: vmov.f32 s28, s0
+; BE-I32-NEXT: bl __aeabi_h2f
+; BE-I32-NEXT: vmov s0, r0
+; BE-I32-NEXT: bl lrintf
+; BE-I32-NEXT: mov r8, r0
+; BE-I32-NEXT: vmov r0, s24
+; BE-I32-NEXT: bl __aeabi_h2f
+; BE-I32-NEXT: mov r9, r0
+; BE-I32-NEXT: vmov r0, s18
+; BE-I32-NEXT: bl __aeabi_h2f
+; BE-I32-NEXT: mov r6, r0
+; BE-I32-NEXT: vmov r0, s26
+; BE-I32-NEXT: bl __aeabi_h2f
+; BE-I32-NEXT: mov r7, r0
+; BE-I32-NEXT: vmov r0, s20
+; BE-I32-NEXT: bl __aeabi_h2f
+; BE-I32-NEXT: mov r4, r0
+; BE-I32-NEXT: vmov r0, s28
+; BE-I32-NEXT: bl __aeabi_h2f
+; BE-I32-NEXT: mov r5, r0
+; BE-I32-NEXT: vmov r0, s22
+; BE-I32-NEXT: bl __aeabi_h2f
+; BE-I32-NEXT: vmov s0, r0
+; BE-I32-NEXT: bl lrintf
+; BE-I32-NEXT: vmov s0, r5
+; BE-I32-NEXT: vmov.32 d10[0], r0
+; BE-I32-NEXT: bl lrintf
+; BE-I32-NEXT: vmov s0, r4
+; BE-I32-NEXT: vmov.32 d12[0], r0
+; BE-I32-NEXT: bl lrintf
+; BE-I32-NEXT: vmov s0, r7
+; BE-I32-NEXT: vmov.32 d11[0], r0
+; BE-I32-NEXT: bl lrintf
+; BE-I32-NEXT: vmov s0, r6
+; BE-I32-NEXT: vmov.32 d13[0], r0
+; BE-I32-NEXT: bl lrintf
+; BE-I32-NEXT: vmov s0, r9
+; BE-I32-NEXT: vmov.32 d11[1], r0
+; BE-I32-NEXT: bl lrintf
+; BE-I32-NEXT: vmov.32 d13[1], r0
+; BE-I32-NEXT: vmov r0, s16
+; BE-I32-NEXT: bl __aeabi_h2f
+; BE-I32-NEXT: vmov s0, r0
+; BE-I32-NEXT: vmov.32 d12[1], r8
+; BE-I32-NEXT: bl lrintf
+; BE-I32-NEXT: vmov.32 d10[1], r0
+; BE-I32-NEXT: vrev64.32 q0, q6
+; BE-I32-NEXT: vrev64.32 q1, q5
+; BE-I32-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14}
+; BE-I32-NEXT: pop {r4, r5, r6, r7, r8, r9, r11, pc}
+;
+; BE-I64-LABEL: lrint_v8f16:
+; BE-I64: @ %bb.0:
+; BE-I64-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; BE-I64-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; BE-I64-NEXT: .pad #4
+; BE-I64-NEXT: sub sp, sp, #4
+; BE-I64-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14}
+; BE-I64-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14}
+; BE-I64-NEXT: .pad #8
+; BE-I64-NEXT: sub sp, sp, #8
+; BE-I64-NEXT: vmov r0, s1
+; BE-I64-NEXT: vmov.f32 s18, s7
+; BE-I64-NEXT: vmov.f32 s16, s6
+; BE-I64-NEXT: vmov.f32 s20, s5
+; BE-I64-NEXT: vmov.f32 s22, s4
+; BE-I64-NEXT: vmov.f32 s24, s3
+; BE-I64-NEXT: vmov.f32 s26, s2
+; BE-I64-NEXT: vmov.f32 s28, s0
+; BE-I64-NEXT: bl __aeabi_h2f
+; BE-I64-NEXT: vmov s0, r0
+; BE-I64-NEXT: bl lrintf
+; BE-I64-NEXT: mov r9, r0
+; BE-I64-NEXT: vmov r0, s28
+; BE-I64-NEXT: str r1, [sp, #4] @ 4-byte Spill
+; BE-I64-NEXT: bl __aeabi_h2f
+; BE-I64-NEXT: mov r10, r0
+; BE-I64-NEXT: vmov r0, s24
+; BE-I64-NEXT: bl __aeabi_h2f
+; BE-I64-NEXT: mov r5, r0
+; BE-I64-NEXT: vmov r0, s26
+; BE-I64-NEXT: bl __aeabi_h2f
+; BE-I64-NEXT: mov r7, r0
+; BE-I64-NEXT: vmov r0, s20
+; BE-I64-NEXT: bl __aeabi_h2f
+; BE-I64-NEXT: mov r6, r0
+; BE-I64-NEXT: vmov r0, s22
+; BE-I64-NEXT: bl __aeabi_h2f
+; BE-I64-NEXT: mov r4, r0
+; BE-I64-NEXT: vmov r0, s18
+; BE-I64-NEXT: bl __aeabi_h2f
+; BE-I64-NEXT: vmov s0, r0
+; BE-I64-NEXT: bl lrintf
+; BE-I64-NEXT: vmov s0, r4
+; BE-I64-NEXT: mov r11, r1
+; BE-I64-NEXT: vmov.32 d9[0], r0
+; BE-I64-NEXT: bl lrintf
+; BE-I64-NEXT: vmov s0, r6
+; BE-I64-NEXT: mov r8, r1
+; BE-I64-NEXT: vmov.32 d10[0], r0
+; BE-I64-NEXT: bl lrintf
+; BE-I64-NEXT: vmov s0, r7
+; BE-I64-NEXT: mov r6, r1
+; BE-I64-NEXT: vmov.32 d11[0], r0
+; BE-I64-NEXT: bl lrintf
+; BE-I64-NEXT: vmov s0, r5
+; BE-I64-NEXT: mov r7, r1
+; BE-I64-NEXT: vmov.32 d12[0], r0
+; BE-I64-NEXT: bl lrintf
+; BE-I64-NEXT: vmov s0, r10
+; BE-I64-NEXT: mov r5, r1
+; BE-I64-NEXT: vmov.32 d13[0], r0
+; BE-I64-NEXT: bl lrintf
+; BE-I64-NEXT: vmov.32 d14[0], r0
+; BE-I64-NEXT: vmov r0, s16
+; BE-I64-NEXT: mov r4, r1
+; BE-I64-NEXT: bl __aeabi_h2f
+; BE-I64-NEXT: vmov s0, r0
+; BE-I64-NEXT: vmov.32 d8[0], r9
+; BE-I64-NEXT: bl lrintf
+; BE-I64-NEXT: vmov.32 d16[0], r0
+; BE-I64-NEXT: ldr r0, [sp, #4] @ 4-byte Reload
+; BE-I64-NEXT: vmov.32 d13[1], r5
+; BE-I64-NEXT: vmov.32 d8[1], r0
+; BE-I64-NEXT: vmov.32 d11[1], r6
+; BE-I64-NEXT: vmov.32 d9[1], r11
+; BE-I64-NEXT: vmov.32 d14[1], r4
+; BE-I64-NEXT: vmov.32 d12[1], r7
+; BE-I64-NEXT: vmov.32 d10[1], r8
+; BE-I64-NEXT: vmov.32 d16[1], r1
+; BE-I64-NEXT: vrev64.32 d1, d8
+; BE-I64-NEXT: vrev64.32 d3, d13
+; BE-I64-NEXT: vrev64.32 d5, d11
+; BE-I64-NEXT: vrev64.32 d7, d9
+; BE-I64-NEXT: vrev64.32 d0, d14
+; BE-I64-NEXT: vrev64.32 d2, d12
+; BE-I64-NEXT: vrev64.32 d4, d10
+; BE-I64-NEXT: vrev64.32 d6, d16
+; BE-I64-NEXT: add sp, sp, #8
+; BE-I64-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14}
+; BE-I64-NEXT: add sp, sp, #4
+; BE-I64-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
+ %a = call <8 x iXLen> @llvm.lrint.v8iXLen.v8f16(<8 x half> %x)
+ ret <8 x iXLen> %a
+}
-; define <16 x iXLen> @lrint_v16f16(<16 x half> %x) {
-; %a = call <16 x iXLen> @llvm.lrint.v16iXLen.v16f16(<16 x half> %x)
-; ret <16 x iXLen> %a
-; }
+define <16 x iXLen> @lrint_v16f16(<16 x half> %x) {
+; LE-I32-LABEL: lrint_v16f16:
+; LE-I32: @ %bb.0:
+; LE-I32-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, lr}
+; LE-I32-NEXT: push {r4, r5, r6, r7, r8, r9, r10, lr}
+; LE-I32-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-I32-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-I32-NEXT: .pad #8
+; LE-I32-NEXT: sub sp, sp, #8
+; LE-I32-NEXT: vmov r0, s15
+; LE-I32-NEXT: vstr s13, [sp, #4] @ 4-byte Spill
+; LE-I32-NEXT: vmov.f32 s26, s14
+; LE-I32-NEXT: vstr s0, [sp] @ 4-byte Spill
+; LE-I32-NEXT: vmov.f32 s20, s12
+; LE-I32-NEXT: vmov.f32 s22, s11
+; LE-I32-NEXT: vmov.f32 s18, s10
+; LE-I32-NEXT: vmov.f32 s17, s9
+; LE-I32-NEXT: vmov.f32 s24, s8
+; LE-I32-NEXT: vmov.f32 s19, s7
+; LE-I32-NEXT: vmov.f32 s30, s6
+; LE-I32-NEXT: vmov.f32 s21, s5
+; LE-I32-NEXT: vmov.f32 s16, s4
+; LE-I32-NEXT: vmov.f32 s23, s3
+; LE-I32-NEXT: vmov.f32 s28, s2
+; LE-I32-NEXT: vmov.f32 s25, s1
+; LE-I32-NEXT: bl __aeabi_h2f
+; LE-I32-NEXT: vmov s0, r0
+; LE-I32-NEXT: bl lrintf
+; LE-I32-NEXT: mov r8, r0
+; LE-I32-NEXT: vmov r0, s17
+; LE-I32-NEXT: bl __aeabi_h2f
+; LE-I32-NEXT: mov r9, r0
+; LE-I32-NEXT: vmov r0, s22
+; LE-I32-NEXT: bl __aeabi_h2f
+; LE-I32-NEXT: mov r10, r0
+; LE-I32-NEXT: vmov r0, s21
+; LE-I32-NEXT: bl __aeabi_h2f
+; LE-I32-NEXT: mov r7, r0
+; LE-I32-NEXT: vmov r0, s19
+; LE-I32-NEXT: bl __aeabi_h2f
+; LE-I32-NEXT: mov r4, r0
+; LE-I32-NEXT: vmov r0, s25
+; LE-I32-NEXT: bl __aeabi_h2f
+; LE-I32-NEXT: mov r5, r0
+; LE-I32-NEXT: vmov r0, s23
+; LE-I32-NEXT: bl __aeabi_h2f
+; LE-I32-NEXT: mov r6, r0
+; LE-I32-NEXT: vmov r0, s20
+; LE-I32-NEXT: bl __aeabi_h2f
+; LE-I32-NEXT: vmov s0, r0
+; LE-I32-NEXT: bl lrintf
+; LE-I32-NEXT: vmov.32 d10[0], r0
+; LE-I32-NEXT: vmov r0, s26
+; LE-I32-NEXT: bl __aeabi_h2f
+; LE-I32-NEXT: vmov s0, r0
+; LE-I32-NEXT: bl lrintf
+; LE-I32-NEXT: vmov.32 d11[0], r0
+; LE-I32-NEXT: vmov r0, s24
+; LE-I32-NEXT: bl __aeabi_h2f
+; LE-I32-NEXT: vmov s0, r0
+; LE-I32-NEXT: bl lrintf
+; LE-I32-NEXT: vmov.32 d12[0], r0
+; LE-I32-NEXT: vmov r0, s18
+; LE-I32-NEXT: bl __aeabi_h2f
+; LE-I32-NEXT: vmov s0, r0
+; LE-I32-NEXT: bl lrintf
+; LE-I32-NEXT: vmov.32 d13[0], r0
+; LE-I32-NEXT: vmov r0, s16
+; LE-I32-NEXT: bl __aeabi_h2f
+; LE-I32-NEXT: vmov s0, r0
+; LE-I32-NEXT: bl lrintf
+; LE-I32-NEXT: vmov.32 d8[0], r0
+; LE-I32-NEXT: vmov r0, s30
+; LE-I32-NEXT: bl __aeabi_h2f
+; LE-I32-NEXT: vmov s0, r0
+; LE-I32-NEXT: bl lrintf
+; LE-I32-NEXT: vmov.32 d9[0], r0
+; LE-I32-NEXT: vmov r0, s28
+; LE-I32-NEXT: bl __aeabi_h2f
+; LE-I32-NEXT: vmov s0, r0
+; LE-I32-NEXT: bl lrintf
+; LE-I32-NEXT: vldr s0, [sp] @ 4-byte Reload
+; LE-I32-NEXT: vmov.32 d15[0], r0
+; LE-I32-NEXT: vmov r0, s0
+; LE-I32-NEXT: bl __aeabi_h2f
+; LE-I32-NEXT: vmov s0, r0
+; LE-I32-NEXT: bl lrintf
+; LE-I32-NEXT: vmov s0, r6
+; LE-I32-NEXT: vmov.32 d14[0], r0
+; LE-I32-NEXT: bl lrintf
+; LE-I32-NEXT: vmov s0, r5
+; LE-I32-NEXT: vmov.32 d15[1], r0
+; LE-I32-NEXT: bl lrintf
+; LE-I32-NEXT: vmov s0, r4
+; LE-I32-NEXT: vmov.32 d14[1], r0
+; LE-I32-NEXT: bl lrintf
+; LE-I32-NEXT: vmov s0, r7
+; LE-I32-NEXT: vmov.32 d9[1], r0
+; LE-I32-NEXT: bl lrintf
+; LE-I32-NEXT: vmov s0, r10
+; LE-I32-NEXT: vmov.32 d8[1], r0
+; LE-I32-NEXT: bl lrintf
+; LE-I32-NEXT: vmov s0, r9
+; LE-I32-NEXT: vmov.32 d13[1], r0
+; LE-I32-NEXT: bl lrintf
+; LE-I32-NEXT: vldr s0, [sp, #4] @ 4-byte Reload
+; LE-I32-NEXT: vmov.32 d12[1], r0
+; LE-I32-NEXT: vmov r0, s0
+; LE-I32-NEXT: bl __aeabi_h2f
+; LE-I32-NEXT: vmov s0, r0
+; LE-I32-NEXT: vmov.32 d11[1], r8
+; LE-I32-NEXT: bl lrintf
+; LE-I32-NEXT: vmov.32 d10[1], r0
+; LE-I32-NEXT: vorr q0, q7, q7
+; LE-I32-NEXT: vorr q1, q4, q4
+; LE-I32-NEXT: vorr q2, q6, q6
+; LE-I32-NEXT: vorr q3, q5, q5
+; LE-I32-NEXT: add sp, sp, #8
+; LE-I32-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-I32-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, pc}
+;
+; LE-I64-LABEL: lrint_v16f16:
+; LE-I64: @ %bb.0:
+; LE-I64-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; LE-I64-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; LE-I64-NEXT: .pad #4
+; LE-I64-NEXT: sub sp, sp, #4
+; LE-I64-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-I64-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-I64-NEXT: .pad #120
+; LE-I64-NEXT: sub sp, sp, #120
+; LE-I64-NEXT: mov r11, r0
+; LE-I64-NEXT: vmov r0, s7
+; LE-I64-NEXT: vstr s15, [sp, #24] @ 4-byte Spill
+; LE-I64-NEXT: vmov.f32 s23, s13
+; LE-I64-NEXT: vstr s14, [sp, #100] @ 4-byte Spill
+; LE-I64-NEXT: vmov.f32 s25, s12
+; LE-I64-NEXT: vmov.f32 s27, s11
+; LE-I64-NEXT: vstr s10, [sp, #104] @ 4-byte Spill
+; LE-I64-NEXT: vstr s9, [sp, #108] @ 4-byte Spill
+; LE-I64-NEXT: vmov.f32 s24, s8
+; LE-I64-NEXT: vmov.f32 s19, s6
+; LE-I64-NEXT: vmov.f32 s29, s5
+; LE-I64-NEXT: vmov.f32 s17, s4
+; LE-I64-NEXT: vmov.f32 s16, s3
+; LE-I64-NEXT: vmov.f32 s21, s2
+; LE-I64-NEXT: vmov.f32 s26, s1
+; LE-I64-NEXT: vmov.f32 s18, s0
+; LE-I64-NEXT: bl __aeabi_h2f
+; LE-I64-NEXT: vmov s0, r0
+; LE-I64-NEXT: bl lrintf
+; LE-I64-NEXT: mov r7, r0
+; LE-I64-NEXT: vmov r0, s25
+; LE-I64-NEXT: str r1, [sp, #56] @ 4-byte Spill
+; LE-I64-NEXT: bl __aeabi_h2f
+; LE-I64-NEXT: vmov s0, r0
+; LE-I64-NEXT: bl lrintf
+; LE-I64-NEXT: mov r5, r0
+; LE-I64-NEXT: vmov r0, s27
+; LE-I64-NEXT: str r1, [sp, #116] @ 4-byte Spill
+; LE-I64-NEXT: bl __aeabi_h2f
+; LE-I64-NEXT: vmov s0, r0
+; LE-I64-NEXT: bl lrintf
+; LE-I64-NEXT: mov r6, r0
+; LE-I64-NEXT: vmov r0, s29
+; LE-I64-NEXT: str r1, [sp, #112] @ 4-byte Spill
+; LE-I64-NEXT: bl __aeabi_h2f
+; LE-I64-NEXT: vmov s0, r0
+; LE-I64-NEXT: bl lrintf
+; LE-I64-NEXT: vmov.32 d15[0], r0
+; LE-I64-NEXT: vmov r0, s23
+; LE-I64-NEXT: mov r4, r1
+; LE-I64-NEXT: bl __aeabi_h2f
+; LE-I64-NEXT: vmov s0, r0
+; LE-I64-NEXT: add lr, sp, #80
+; LE-I64-NEXT: vmov.32 d17[0], r6
+; LE-I64-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill
+; LE-I64-NEXT: bl lrintf
+; LE-I64-NEXT: mov r6, r0
+; LE-I64-NEXT: vmov r0, s17
+; LE-I64-NEXT: vmov r8, s21
+; LE-I64-NEXT: str r1, [sp, #76] @ 4-byte Spill
+; LE-I64-NEXT: vmov r10, s19
+; LE-I64-NEXT: vmov.32 d10[0], r5
+; LE-I64-NEXT: bl __aeabi_h2f
+; LE-I64-NEXT: vmov s0, r0
+; LE-I64-NEXT: add lr, sp, #40
+; LE-I64-NEXT: vmov.32 d11[0], r6
+; LE-I64-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill
+; LE-I64-NEXT: bl lrintf
+; LE-I64-NEXT: vmov.32 d14[0], r0
+; LE-I64-NEXT: mov r0, r10
+; LE-I64-NEXT: mov r9, r1
+; LE-I64-NEXT: bl __aeabi_h2f
+; LE-I64-NEXT: vmov s0, r0
+; LE-I64-NEXT: vmov.32 d11[0], r7
+; LE-I64-NEXT: bl lrintf
+; LE-I64-NEXT: vmov.32 d10[0], r0
+; LE-I64-NEXT: mov r0, r8
+; LE-I64-NEXT: mov r7, r1
+; LE-I64-NEXT: bl __aeabi_h2f
+; LE-I64-NEXT: mov r6, r0
+; LE-I64-NEXT: ldr r0, [sp, #56] @ 4-byte Reload
+; LE-I64-NEXT: vmov.32 d11[1], r0
+; LE-I64-NEXT: vmov r0, s18
+; LE-I64-NEXT: bl __aeabi_h2f
+; LE-I64-NEXT: mov r5, r0
+; LE-I64-NEXT: vmov r0, s16
+; LE-I64-NEXT: vmov.32 d10[1], r7
+; LE-I64-NEXT: add lr, sp, #56
+; LE-I64-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill
+; LE-I64-NEXT: bl __aeabi_h2f
+; LE-I64-NEXT: vmov s0, r0
+; LE-I64-NEXT: vmov.32 d15[1], r4
+; LE-I64-NEXT: bl lrintf
+; LE-I64-NEXT: vmov.32 d9[0], r0
+; LE-I64-NEXT: vmov r0, s26
+; LE-I64-NEXT: add lr, sp, #24
+; LE-I64-NEXT: vmov r8, s24
+; LE-I64-NEXT: vmov.32 d14[1], r9
+; LE-I64-NEXT: mov r10, r1
+; LE-I64-NEXT: vmov s24, r5
+; LE-I64-NEXT: vldr s0, [sp, #24] @ 4-byte Reload
+; LE-I64-NEXT: vstmia lr, {d14, d15} @ 16-byte Spill
+; LE-I64-NEXT: vmov r7, s0
+; LE-I64-NEXT: bl __aeabi_h2f
+; LE-I64-NEXT: vmov.f32 s0, s24
+; LE-I64-NEXT: vmov s22, r0
+; LE-I64-NEXT: bl lrintf
+; LE-I64-NEXT: vmov.f32 s0, s22
+; LE-I64-NEXT: mov r5, r1
+; LE-I64-NEXT: vmov.32 d14[0], r0
+; LE-I64-NEXT: vmov s24, r6
+; LE-I64-NEXT: bl lrintf
+; LE-I64-NEXT: vmov.32 d15[0], r0
+; LE-I64-NEXT: mov r0, r7
+; LE-I64-NEXT: mov r6, r1
+; LE-I64-NEXT: bl __aeabi_h2f
+; LE-I64-NEXT: vmov.f32 s0, s24
+; LE-I64-NEXT: vmov s22, r0
+; LE-I64-NEXT: bl lrintf
+; LE-I64-NEXT: vmov.f32 s0, s22
+; LE-I64-NEXT: vmov.32 d8[0], r0
+; LE-I64-NEXT: add lr, sp, #8
+; LE-I64-NEXT: mov r9, r1
+; LE-I64-NEXT: vmov.32 d15[1], r6
+; LE-I64-NEXT: vstmia lr, {d8, d9} @ 16-byte Spill
+; LE-I64-NEXT: bl lrintf
+; LE-I64-NEXT: vmov.32 d13[0], r0
+; LE-I64-NEXT: mov r0, r8
+; LE-I64-NEXT: mov r6, r1
+; LE-I64-NEXT: bl __aeabi_h2f
+; LE-I64-NEXT: vldr s0, [sp, #100] @ 4-byte Reload
+; LE-I64-NEXT: mov r7, r0
+; LE-I64-NEXT: vmov.32 d14[1], r5
+; LE-I64-NEXT: vmov r0, s0
+; LE-I64-NEXT: bl __aeabi_h2f
+; LE-I64-NEXT: vldr s0, [sp, #104] @ 4-byte Reload
+; LE-I64-NEXT: vmov s20, r0
+; LE-I64-NEXT: vmov.32 d13[1], r6
+; LE-I64-NEXT: vmov r4, s0
+; LE-I64-NEXT: vldr s0, [sp, #108] @ 4-byte Reload
+; LE-I64-NEXT: vmov r0, s0
+; LE-I64-NEXT: bl __aeabi_h2f
+; LE-I64-NEXT: vmov.f32 s0, s20
+; LE-I64-NEXT: vmov s16, r0
+; LE-I64-NEXT: bl lrintf
+; LE-I64-NEXT: vmov.f32 s0, s16
+; LE-I64-NEXT: mov r5, r1
+; LE-I64-NEXT: vmov.32 d12[0], r0
+; LE-I64-NEXT: vmov s18, r7
+; LE-I64-NEXT: bl lrintf
+; LE-I64-NEXT: vmov.32 d11[0], r0
+; LE-I64-NEXT: mov r0, r4
+; LE-I64-NEXT: mov r6, r1
+; LE-I64-NEXT: bl __aeabi_h2f
+; LE-I64-NEXT: vmov.f32 s0, s18
+; LE-I64-NEXT: vmov s16, r0
+; LE-I64-NEXT: bl lrintf
+; LE-I64-NEXT: vmov.f32 s0, s16
+; LE-I64-NEXT: vmov.32 d10[0], r0
+; LE-I64-NEXT: mov r4, r1
+; LE-I64-NEXT: vmov.32 d11[1], r6
+; LE-I64-NEXT: bl lrintf
+; LE-I64-NEXT: add lr, sp, #80
+; LE-I64-NEXT: vmov.32 d10[1], r4
+; LE-I64-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-I64-NEXT: add lr, sp, #40
+; LE-I64-NEXT: vldmia lr, {d18, d19} @ 16-byte Reload
+; LE-I64-NEXT: add lr, sp, #8
+; LE-I64-NEXT: vmov.32 d16[0], r0
+; LE-I64-NEXT: ldr r0, [sp, #76] @ 4-byte Reload
+; LE-I64-NEXT: vldmia lr, {d20, d21} @ 16-byte Reload
+; LE-I64-NEXT: add lr, sp, #24
+; LE-I64-NEXT: vmov.32 d19[1], r0
+; LE-I64-NEXT: ldr r0, [sp, #116] @ 4-byte Reload
+; LE-I64-NEXT: vmov.32 d21[1], r10
+; LE-I64-NEXT: vmov.32 d18[1], r0
+; LE-I64-NEXT: ldr r0, [sp, #112] @ 4-byte Reload
+; LE-I64-NEXT: vmov.32 d12[1], r5
+; LE-I64-NEXT: vmov.32 d17[1], r0
+; LE-I64-NEXT: add r0, r11, #64
+; LE-I64-NEXT: vmov.32 d16[1], r1
+; LE-I64-NEXT: vst1.64 {d10, d11}, [r0:128]!
+; LE-I64-NEXT: vst1.64 {d16, d17}, [r0:128]!
+; LE-I64-NEXT: vst1.64 {d18, d19}, [r0:128]!
+; LE-I64-NEXT: vmov.32 d20[1], r9
+; LE-I64-NEXT: vst1.64 {d12, d13}, [r0:128]
+; LE-I64-NEXT: vst1.64 {d14, d15}, [r11:128]!
+; LE-I64-NEXT: vst1.64 {d20, d21}, [r11:128]!
+; LE-I64-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-I64-NEXT: add lr, sp, #56
+; LE-I64-NEXT: vst1.64 {d16, d17}, [r11:128]!
+; LE-I64-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-I64-NEXT: vst1.64 {d16, d17}, [r11:128]
+; LE-I64-NEXT: add sp, sp, #120
+; LE-I64-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-I64-NEXT: add sp, sp, #4
+; LE-I64-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
+;
+; BE-I32-LABEL: lrint_v16f16:
+; BE-I32: @ %bb.0:
+; BE-I32-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, lr}
+; BE-I32-NEXT: push {r4, r5, r6, r7, r8, r9, r10, lr}
+; BE-I32-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-I32-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-I32-NEXT: .pad #16
+; BE-I32-NEXT: sub sp, sp, #16
+; BE-I32-NEXT: vmov r0, s1
+; BE-I32-NEXT: vstr s14, [sp, #4] @ 4-byte Spill
+; BE-I32-NEXT: vmov.f32 s30, s15
+; BE-I32-NEXT: vstr s13, [sp, #12] @ 4-byte Spill
+; BE-I32-NEXT: vmov.f32 s17, s12
+; BE-I32-NEXT: vstr s10, [sp, #8] @ 4-byte Spill
+; BE-I32-NEXT: vmov.f32 s19, s11
+; BE-I32-NEXT: vstr s8, [sp] @ 4-byte Spill
+; BE-I32-NEXT: vmov.f32 s21, s9
+; BE-I32-NEXT: vmov.f32 s23, s7
+; BE-I32-NEXT: vmov.f32 s24, s6
+; BE-I32-NEXT: vmov.f32 s25, s5
+; BE-I32-NEXT: vmov.f32 s26, s4
+; BE-I32-NEXT: vmov.f32 s27, s3
+; BE-I32-NEXT: vmov.f32 s28, s2
+; BE-I32-NEXT: vmov.f32 s29, s0
+; BE-I32-NEXT: bl __aeabi_h2f
+; BE-I32-NEXT: vmov s0, r0
+; BE-I32-NEXT: bl lrintf
+; BE-I32-NEXT: mov r8, r0
+; BE-I32-NEXT: vmov r0, s27
+; BE-I32-NEXT: bl __aeabi_h2f
+; BE-I32-NEXT: mov r9, r0
+; BE-I32-NEXT: vmov r0, s25
+; BE-I32-NEXT: bl __aeabi_h2f
+; BE-I32-NEXT: mov r10, r0
+; BE-I32-NEXT: vmov r0, s23
+; BE-I32-NEXT: bl __aeabi_h2f
+; BE-I32-NEXT: mov r7, r0
+; BE-I32-NEXT: vmov r0, s21
+; BE-I32-NEXT: bl __aeabi_h2f
+; BE-I32-NEXT: mov r4, r0
+; BE-I32-NEXT: vmov r0, s19
+; BE-I32-NEXT: bl __aeabi_h2f
+; BE-I32-NEXT: mov r5, r0
+; BE-I32-NEXT: vmov r0, s30
+; BE-I32-NEXT: bl __aeabi_h2f
+; BE-I32-NEXT: mov r6, r0
+; BE-I32-NEXT: vmov r0, s17
+; BE-I32-NEXT: bl __aeabi_h2f
+; BE-I32-NEXT: vmov s0, r0
+; BE-I32-NEXT: bl lrintf
+; BE-I32-NEXT: vmov.32 d8[0], r0
+; BE-I32-NEXT: vmov r0, s29
+; BE-I32-NEXT: bl __aeabi_h2f
+; BE-I32-NEXT: vmov s0, r0
+; BE-I32-NEXT: bl lrintf
+; BE-I32-NEXT: vmov.32 d10[0], r0
+; BE-I32-NEXT: vmov r0, s28
+; BE-I32-NEXT: bl __aeabi_h2f
+; BE-I32-NEXT: vmov s0, r0
+; BE-I32-NEXT: bl lrintf
+; BE-I32-NEXT: vmov.32 d11[0], r0
+; BE-I32-NEXT: vmov r0, s26
+; BE-I32-NEXT: bl __aeabi_h2f
+; BE-I32-NEXT: vmov s0, r0
+; BE-I32-NEXT: bl lrintf
+; BE-I32-NEXT: vmov.32 d14[0], r0
+; BE-I32-NEXT: vmov r0, s24
+; BE-I32-NEXT: bl __aeabi_h2f
+; BE-I32-NEXT: vmov s0, r0
+; BE-I32-NEXT: bl lrintf
+; BE-I32-NEXT: vldr s0, [sp] @ 4-byte Reload
+; BE-I32-NEXT: vmov.32 d15[0], r0
+; BE-I32-NEXT: vmov r0, s0
+; BE-I32-NEXT: bl __aeabi_h2f
+; BE-I32-NEXT: vmov s0, r0
+; BE-I32-NEXT: bl lrintf
+; BE-I32-NEXT: vldr s0, [sp, #4] @ 4-byte Reload
+; BE-I32-NEXT: vmov.32 d12[0], r0
+; BE-I32-NEXT: vmov r0, s0
+; BE-I32-NEXT: bl __aeabi_h2f
+; BE-I32-NEXT: vmov s0, r0
+; BE-I32-NEXT: bl lrintf
+; BE-I32-NEXT: vldr s0, [sp, #8] @ 4-byte Reload
+; BE-I32-NEXT: vmov.32 d9[0], r0
+; BE-I32-NEXT: vmov r0, s0
+; BE-I32-NEXT: bl __aeabi_h2f
+; BE-I32-NEXT: vmov s0, r0
+; BE-I32-NEXT: bl lrintf
+; BE-I32-NEXT: vmov s0, r6
+; BE-I32-NEXT: vmov.32 d13[0], r0
+; BE-I32-NEXT: bl lrintf
+; BE-I32-NEXT: vmov s0, r5
+; BE-I32-NEXT: vmov.32 d9[1], r0
+; BE-I32-NEXT: bl lrintf
+; BE-I32-NEXT: vmov s0, r4
+; BE-I32-NEXT: vmov.32 d13[1], r0
+; BE-I32-NEXT: bl lrintf
+; BE-I32-NEXT: vmov s0, r7
+; BE-I32-NEXT: vmov.32 d12[1], r0
+; BE-I32-NEXT: bl lrintf
+; BE-I32-NEXT: vmov s0, r10
+; BE-I32-NEXT: vmov.32 d15[1], r0
+; BE-I32-NEXT: bl lrintf
+; BE-I32-NEXT: vmov s0, r9
+; BE-I32-NEXT: vmov.32 d14[1], r0
+; BE-I32-NEXT: bl lrintf
+; BE-I32-NEXT: vldr s0, [sp, #12] @ 4-byte Reload
+; BE-I32-NEXT: vmov.32 d11[1], r0
+; BE-I32-NEXT: vmov r0, s0
+; BE-I32-NEXT: bl __aeabi_h2f
+; BE-I32-NEXT: vmov s0, r0
+; BE-I32-NEXT: vmov.32 d10[1], r8
+; BE-I32-NEXT: bl lrintf
+; BE-I32-NEXT: vmov.32 d8[1], r0
+; BE-I32-NEXT: vrev64.32 q0, q5
+; BE-I32-NEXT: vrev64.32 q1, q7
+; BE-I32-NEXT: vrev64.32 q2, q6
+; BE-I32-NEXT: vrev64.32 q3, q4
+; BE-I32-NEXT: add sp, sp, #16
+; BE-I32-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-I32-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, pc}
+;
+; BE-I64-LABEL: lrint_v16f16:
+; BE-I64: @ %bb.0:
+; BE-I64-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; BE-I64-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; BE-I64-NEXT: .pad #4
+; BE-I64-NEXT: sub sp, sp, #4
+; BE-I64-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-I64-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-I64-NEXT: .pad #112
+; BE-I64-NEXT: sub sp, sp, #112
+; BE-I64-NEXT: mov r11, r0
+; BE-I64-NEXT: vmov r0, s14
+; BE-I64-NEXT: vmov.f32 s17, s15
+; BE-I64-NEXT: vstr s13, [sp, #52] @ 4-byte Spill
+; BE-I64-NEXT: vmov.f32 s21, s12
+; BE-I64-NEXT: vstr s10, [sp, #68] @ 4-byte Spill
+; BE-I64-NEXT: vmov.f32 s23, s11
+; BE-I64-NEXT: vstr s7, [sp, #72] @ 4-byte Spill
+; BE-I64-NEXT: vmov.f32 s19, s9
+; BE-I64-NEXT: vstr s4, [sp, #28] @ 4-byte Spill
+; BE-I64-NEXT: vmov.f32 s26, s8
+; BE-I64-NEXT: vmov.f32 s24, s6
+; BE-I64-NEXT: vmov.f32 s18, s5
+; BE-I64-NEXT: vmov.f32 s25, s3
+; BE-I64-NEXT: vmov.f32 s16, s2
+; BE-I64-NEXT: vmov.f32 s27, s1
+; BE-I64-NEXT: vmov.f32 s29, s0
+; BE-I64-NEXT: bl __aeabi_h2f
+; BE-I64-NEXT: vmov s0, r0
+; BE-I64-NEXT: bl lrintf
+; BE-I64-NEXT: mov r8, r0
+; BE-I64-NEXT: vmov r0, s29
+; BE-I64-NEXT: mov r4, r1
+; BE-I64-NEXT: bl __aeabi_h2f
+; BE-I64-NEXT: mov r9, r0
+; BE-I64-NEXT: vmov r0, s27
+; BE-I64-NEXT: bl __aeabi_h2f
+; BE-I64-NEXT: mov r7, r0
+; BE-I64-NEXT: vmov r0, s21
+; BE-I64-NEXT: bl __aeabi_h2f
+; BE-I64-NEXT: mov r6, r0
+; BE-I64-NEXT: vmov r0, s25
+; BE-I64-NEXT: bl __aeabi_h2f
+; BE-I64-NEXT: mov r5, r0
+; BE-I64-NEXT: vmov r0, s23
+; BE-I64-NEXT: bl __aeabi_h2f
+; BE-I64-NEXT: vmov s0, r0
+; BE-I64-NEXT: bl lrintf
+; BE-I64-NEXT: vmov.32 d16[0], r0
+; BE-I64-NEXT: vmov s0, r5
+; BE-I64-NEXT: str r1, [sp, #108] @ 4-byte Spill
+; BE-I64-NEXT: vstr d16, [sp, #96] @ 8-byte Spill
+; BE-I64-NEXT: bl lrintf
+; BE-I64-NEXT: vmov.32 d16[0], r0
+; BE-I64-NEXT: vmov s0, r6
+; BE-I64-NEXT: str r1, [sp, #92] @ 4-byte Spill
+; BE-I64-NEXT: vstr d16, [sp, #80] @ 8-byte Spill
+; BE-I64-NEXT: bl lrintf
+; BE-I64-NEXT: vmov.32 d16[0], r0
+; BE-I64-NEXT: vmov s0, r7
+; BE-I64-NEXT: str r1, [sp, #76] @ 4-byte Spill
+; BE-I64-NEXT: vstr d16, [sp, #56] @ 8-byte Spill
+; BE-I64-NEXT: bl lrintf
+; BE-I64-NEXT: vmov s0, r9
+; BE-I64-NEXT: mov r10, r1
+; BE-I64-NEXT: vmov.32 d14[0], r0
+; BE-I64-NEXT: bl lrintf
+; BE-I64-NEXT: vmov.32 d15[0], r0
+; BE-I64-NEXT: vmov r0, s17
+; BE-I64-NEXT: mov r5, r1
+; BE-I64-NEXT: bl __aeabi_h2f
+; BE-I64-NEXT: vmov s0, r0
+; BE-I64-NEXT: vmov.32 d10[0], r8
+; BE-I64-NEXT: vmov r6, s19
+; BE-I64-NEXT: bl lrintf
+; BE-I64-NEXT: vmov.32 d11[0], r0
+; BE-I64-NEXT: mov r0, r6
+; BE-I64-NEXT: mov r7, r1
+; BE-I64-NEXT: bl __aeabi_h2f
+; BE-I64-NEXT: mov r6, r0
+; BE-I64-NEXT: vmov r0, s18
+; BE-I64-NEXT: vmov.32 d10[1], r4
+; BE-I64-NEXT: vstr d10, [sp, #40] @ 8-byte Spill
+; BE-I64-NEXT: bl __aeabi_h2f
+; BE-I64-NEXT: mov r4, r0
+; BE-I64-NEXT: vmov r0, s16
+; BE-I64-NEXT: vmov.32 d11[1], r7
+; BE-I64-NEXT: vstr d11, [sp, #32] @ 8-byte Spill
+; BE-I64-NEXT: bl __aeabi_h2f
+; BE-I64-NEXT: vmov.32 d15[1], r5
+; BE-I64-NEXT: vmov s0, r0
+; BE-I64-NEXT: vstr d15, [sp, #16] @ 8-byte Spill
+; BE-I64-NEXT: bl lrintf
+; BE-I64-NEXT: vldr s0, [sp, #28] @ 4-byte Reload
+; BE-I64-NEXT: vmov r5, s26
+; BE-I64-NEXT: vmov.32 d16[0], r0
+; BE-I64-NEXT: vmov s26, r4
+; BE-I64-NEXT: vmov r0, s0
+; BE-I64-NEXT: mov r8, r1
+; BE-I64-NEXT: vmov.32 d14[1], r10
+; BE-I64-NEXT: vmov r4, s24
+; BE-I64-NEXT: vstr d16, [sp] @ 8-byte Spill
+; BE-I64-NEXT: vstr d14, [sp, #8] @ 8-byte Spill
+; BE-I64-NEXT: bl __aeabi_h2f
+; BE-I64-NEXT: vmov.f32 s0, s26
+; BE-I64-NEXT: vmov s22, r0
+; BE-I64-NEXT: bl lrintf
+; BE-I64-NEXT: vmov.f32 s0, s22
+; BE-I64-NEXT: mov r7, r1
+; BE-I64-NEXT: vmov.32 d13[0], r0
+; BE-I64-NEXT: vmov s24, r6
+; BE-I64-NEXT: bl lrintf
+; BE-I64-NEXT: vmov.32 d14[0], r0
+; BE-I64-NEXT: mov r0, r4
+; BE-I64-NEXT: mov r6, r1
+; BE-I64-NEXT: bl __aeabi_h2f
+; BE-I64-NEXT: vmov.f32 s0, s24
+; BE-I64-NEXT: vmov s22, r0
+; BE-I64-NEXT: bl lrintf
+; BE-I64-NEXT: vmov.f32 s0, s22
+; BE-I64-NEXT: mov r9, r1
+; BE-I64-NEXT: vmov.32 d12[0], r0
+; BE-I64-NEXT: vmov.32 d14[1], r6
+; BE-I64-NEXT: bl lrintf
+; BE-I64-NEXT: vmov.32 d11[0], r0
+; BE-I64-NEXT: mov r0, r5
+; BE-I64-NEXT: mov r6, r1
+; BE-I64-NEXT: bl __aeabi_h2f
+; BE-I64-NEXT: vldr s0, [sp, #52] @ 4-byte Reload
+; BE-I64-NEXT: mov r4, r0
+; BE-I64-NEXT: vmov.32 d13[1], r7
+; BE-I64-NEXT: vmov r0, s0
+; BE-I64-NEXT: bl __aeabi_h2f
+; BE-I64-NEXT: vldr s0, [sp, #68] @ 4-byte Reload
+; BE-I64-NEXT: vmov s20, r0
+; BE-I64-NEXT: vmov.32 d11[1], r6
+; BE-I64-NEXT: vmov r7, s0
+; BE-I64-NEXT: vldr s0, [sp, #72] @ 4-byte Reload
+; BE-I64-NEXT: vmov r0, s0
+; BE-I64-NEXT: bl __aeabi_h2f
+; BE-I64-NEXT: vmov.f32 s0, s20
+; BE-I64-NEXT: vmov s16, r0
+; BE-I64-NEXT: bl lrintf
+; BE-I64-NEXT: vmov.f32 s0, s16
+; BE-I64-NEXT: mov r5, r1
+; BE-I64-NEXT: vmov.32 d10[0], r0
+; BE-I64-NEXT: vmov s18, r4
+; BE-I64-NEXT: bl lrintf
+; BE-I64-NEXT: vmov.32 d15[0], r0
+; BE-I64-NEXT: mov r0, r7
+; BE-I64-NEXT: mov r4, r1
+; BE-I64-NEXT: bl __aeabi_h2f
+; BE-I64-NEXT: vmov.f32 s0, s18
+; BE-I64-NEXT: vmov s16, r0
+; BE-I64-NEXT: bl lrintf
+; BE-I64-NEXT: vmov.f32 s0, s16
+; BE-I64-NEXT: mov r6, r1
+; BE-I64-NEXT: vmov.32 d9[0], r0
+; BE-I64-NEXT: vmov.32 d15[1], r4
+; BE-I64-NEXT: bl lrintf
+; BE-I64-NEXT: vmov.32 d24[0], r0
+; BE-I64-NEXT: ldr r0, [sp, #76] @ 4-byte Reload
+; BE-I64-NEXT: vldr d23, [sp, #56] @ 8-byte Reload
+; BE-I64-NEXT: vldr d20, [sp, #8] @ 8-byte Reload
+; BE-I64-NEXT: vmov.32 d23[1], r0
+; BE-I64-NEXT: ldr r0, [sp, #92] @ 4-byte Reload
+; BE-I64-NEXT: vldr d22, [sp, #80] @ 8-byte Reload
+; BE-I64-NEXT: vldr d26, [sp, #16] @ 8-byte Reload
+; BE-I64-NEXT: vrev64.32 d21, d20
+; BE-I64-NEXT: vmov.32 d22[1], r0
+; BE-I64-NEXT: ldr r0, [sp, #108] @ 4-byte Reload
+; BE-I64-NEXT: vldr d30, [sp] @ 8-byte Reload
+; BE-I64-NEXT: vldr d25, [sp, #96] @ 8-byte Reload
+; BE-I64-NEXT: vrev64.32 d20, d26
+; BE-I64-NEXT: vldr d26, [sp, #32] @ 8-byte Reload
+; BE-I64-NEXT: vmov.32 d10[1], r5
+; BE-I64-NEXT: vmov.32 d12[1], r9
+; BE-I64-NEXT: vldr d28, [sp, #40] @ 8-byte Reload
+; BE-I64-NEXT: vrev64.32 d27, d26
+; BE-I64-NEXT: vmov.32 d25[1], r0
+; BE-I64-NEXT: add r0, r11, #64
+; BE-I64-NEXT: vmov.32 d30[1], r8
+; BE-I64-NEXT: vmov.32 d9[1], r6
+; BE-I64-NEXT: vrev64.32 d26, d28
+; BE-I64-NEXT: vrev64.32 d29, d10
+; BE-I64-NEXT: vmov.32 d24[1], r1
+; BE-I64-NEXT: vrev64.32 d1, d12
+; BE-I64-NEXT: vrev64.32 d28, d23
+; BE-I64-NEXT: vrev64.32 d23, d22
+; BE-I64-NEXT: vrev64.32 d22, d30
+; BE-I64-NEXT: vrev64.32 d31, d25
+; BE-I64-NEXT: vrev64.32 d0, d9
+; BE-I64-NEXT: vrev64.32 d30, d24
+; BE-I64-NEXT: vst1.64 {d0, d1}, [r0:128]!
+; BE-I64-NEXT: vst1.64 {d30, d31}, [r0:128]!
+; BE-I64-NEXT: vst1.64 {d28, d29}, [r0:128]!
+; BE-I64-NEXT: vrev64.32 d19, d13
+; BE-I64-NEXT: vst1.64 {d26, d27}, [r0:128]
+; BE-I64-NEXT: vst1.64 {d20, d21}, [r11:128]!
+; BE-I64-NEXT: vrev64.32 d18, d14
+; BE-I64-NEXT: vst1.64 {d22, d23}, [r11:128]!
+; BE-I64-NEXT: vrev64.32 d17, d15
+; BE-I64-NEXT: vrev64.32 d16, d11
+; BE-I64-NEXT: vst1.64 {d18, d19}, [r11:128]!
+; BE-I64-NEXT: vst1.64 {d16, d17}, [r11:128]
+; BE-I64-NEXT: add sp, sp, #112
+; BE-I64-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-I64-NEXT: add sp, sp, #4
+; BE-I64-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
+ %a = call <16 x iXLen> @llvm.lrint.v16iXLen.v16f16(<16 x half> %x)
+ ret <16 x iXLen> %a
+}
define <1 x iXLen> @lrint_v1f32(<1 x float> %x) {
; LE-I32-LABEL: lrint_v1f32:
diff --git a/llvm/test/CodeGen/ARM/vlldm-vlstm-uops.mir b/llvm/test/CodeGen/ARM/vlldm-vlstm-uops.mir
index 8fa9337..03cb8e3 100644
--- a/llvm/test/CodeGen/ARM/vlldm-vlstm-uops.mir
+++ b/llvm/test/CodeGen/ARM/vlldm-vlstm-uops.mir
@@ -60,9 +60,9 @@ body: |
$sp = t2STMDB_UPD $sp, 14, $noreg, $r4, killed $r5, killed $r6, killed $r7, killed $r8, killed $r9, killed $r10, killed $r11
$r4 = t2BICri $r4, 1, 14, $noreg, $noreg
$sp = tSUBspi $sp, 34, 14, $noreg
- VLSTM $sp, 14 /* CC::al */, $noreg, 0, implicit-def $vpr, implicit-def $fpscr, implicit-def $fpscr_nzcv, implicit undef $vpr, implicit undef $fpscr, implicit undef $fpscr_nzcv, implicit undef $d0, implicit undef $d1, implicit undef $d2, implicit undef $d3, implicit undef $d4, implicit undef $d5, implicit undef $d6, implicit undef $d7, implicit $d8, implicit $d9, implicit $d10, implicit $d11, implicit $d12, implicit $d13, implicit $d14, implicit $d15
+ VLSTM $sp, 14 /* CC::al */, $noreg, 0, implicit-def $vpr, implicit-def $fpscr, implicit-def $fpscr_nzcv, implicit-def $fpscr_rm, implicit undef $vpr, implicit undef $fpscr, implicit undef $fpscr_nzcv, implicit undef $fpscr_rm, implicit undef $d0, implicit undef $d1, implicit undef $d2, implicit undef $d3, implicit undef $d4, implicit undef $d5, implicit undef $d6, implicit undef $d7, implicit $d8, implicit $d9, implicit $d10, implicit $d11, implicit $d12, implicit $d13, implicit $d14, implicit $d15
tBLXNSr 14, $noreg, killed $r4, csr_aapcs, implicit-def $lr, implicit $sp, implicit-def dead $lr, implicit $sp, implicit-def $sp, implicit-def $q0, implicit-def $q1, implicit-def $q2, implicit-def $q3, implicit-def $q4, implicit-def $q5, implicit-def $q6, implicit-def $q7
- VLLDM $sp, 14 /* CC::al */, $noreg, 0, implicit-def $vpr, implicit-def $fpscr, implicit-def $fpscr_nzcv, implicit-def $d0, implicit-def $d1, implicit-def $d2, implicit-def $d3, implicit-def $d4, implicit-def $d5, implicit-def $d6, implicit-def $d7, implicit-def $d8, implicit-def $d9, implicit-def $d10, implicit-def $d11, implicit-def $d12, implicit-def $d13, implicit-def $d14, implicit-def $d15
+ VLLDM $sp, 14 /* CC::al */, $noreg, 0, implicit-def $vpr, implicit-def $fpscr, implicit-def $fpscr_nzcv, implicit-def $fpscr_rm, implicit-def $d0, implicit-def $d1, implicit-def $d2, implicit-def $d3, implicit-def $d4, implicit-def $d5, implicit-def $d6, implicit-def $d7, implicit-def $d8, implicit-def $d9, implicit-def $d10, implicit-def $d11, implicit-def $d12, implicit-def $d13, implicit-def $d14, implicit-def $d15
$sp = tADDspi $sp, 34, 14, $noreg
$sp = t2LDMIA_UPD $sp, 14, $noreg, def $r4, def $r5, def $r6, def $r7, def $r8, def $r9, def $r10, def $r11
$sp = t2LDMIA_RET $sp, 14, $noreg, def $r4, def $pc
diff --git a/llvm/test/CodeGen/BPF/addr-space-memintrinsic-gep.ll b/llvm/test/CodeGen/BPF/addr-space-memintrinsic-gep.ll
new file mode 100644
index 0000000..1db8391
--- /dev/null
+++ b/llvm/test/CodeGen/BPF/addr-space-memintrinsic-gep.ll
@@ -0,0 +1,60 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
+; RUN: opt --bpf-check-and-opt-ir -S -mtriple=bpf-pc-linux < %s | FileCheck %s
+
+@page1 = dso_local local_unnamed_addr addrspace(1) global [10 x ptr] zeroinitializer, align 8
+@page2 = dso_local local_unnamed_addr addrspace(1) global [10 x ptr] zeroinitializer, align 8
+
+define dso_local void @test_memset() local_unnamed_addr {
+; CHECK-LABEL: define dso_local void @test_memset() local_unnamed_addr {
+; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 8 addrspacecast (ptr addrspace(1) getelementptr inbounds nuw (i8, ptr addrspace(1) @page1, i64 16) to ptr), i8 0, i64 16, i1 false)
+; CHECK-NEXT: ret void
+;
+ tail call void @llvm.memset.p1.i64(ptr addrspace(1) noundef nonnull align 8 dereferenceable(16) getelementptr inbounds nuw (i8, ptr addrspace(1) @page1, i64 16), i8 0, i64 16, i1 false)
+ ret void
+}
+
+declare void @llvm.memset.p1.i64(ptr addrspace(1) writeonly captures(none), i8, i64, i1 immarg)
+
+define dso_local void @test_memcpy() local_unnamed_addr {
+; CHECK-LABEL: define dso_local void @test_memcpy() local_unnamed_addr {
+; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 addrspacecast (ptr addrspace(1) getelementptr inbounds nuw (i8, ptr addrspace(1) @page2, i64 8) to ptr), ptr align 8 addrspacecast (ptr addrspace(1) getelementptr inbounds nuw (i8, ptr addrspace(1) @page1, i64 8) to ptr), i64 16, i1 false)
+; CHECK-NEXT: ret void
+;
+ tail call void @llvm.memcpy.p1.p1.i64(ptr addrspace(1) noundef nonnull align 8 dereferenceable(16) getelementptr inbounds nuw (i8, ptr addrspace(1) @page2, i64 8), ptr addrspace(1) noundef nonnull align 8 dereferenceable(16) getelementptr inbounds nuw (i8, ptr addrspace(1) @page1, i64 8), i64 16, i1 false)
+ ret void
+}
+
+declare void @llvm.memcpy.p1.p1.i64(ptr addrspace(1) noalias writeonly captures(none), ptr addrspace(1) noalias readonly captures(none), i64, i1 immarg)
+
+define dso_local void @test_memmove() local_unnamed_addr {
+; CHECK-LABEL: define dso_local void @test_memmove() local_unnamed_addr {
+; CHECK-NEXT: call void @llvm.memmove.p0.p0.i64(ptr align 8 addrspacecast (ptr addrspace(1) getelementptr inbounds nuw (i8, ptr addrspace(1) @page2, i64 16) to ptr), ptr align 8 addrspacecast (ptr addrspace(1) getelementptr inbounds nuw (i8, ptr addrspace(1) @page2, i64 8) to ptr), i64 16, i1 false)
+; CHECK-NEXT: ret void
+;
+ tail call void @llvm.memmove.p1.p1.i64(ptr addrspace(1) noundef nonnull align 8 dereferenceable(16) getelementptr inbounds nuw (i8, ptr addrspace(1) @page2, i64 16), ptr addrspace(1) noundef nonnull align 8 dereferenceable(16) getelementptr inbounds nuw (i8, ptr addrspace(1) @page2, i64 8), i64 16, i1 false)
+ ret void
+}
+
+declare void @llvm.memmove.p1.p1.i64(ptr addrspace(1) writeonly captures(none), ptr addrspace(1) readonly captures(none), i64, i1 immarg)
+
+define dso_local void @test_memset_inline() local_unnamed_addr {
+; CHECK-LABEL: define dso_local void @test_memset_inline() local_unnamed_addr {
+; CHECK-NEXT: call void @llvm.memset.inline.p0.i64(ptr align 8 addrspacecast (ptr addrspace(1) getelementptr inbounds nuw (i8, ptr addrspace(1) @page1, i64 16) to ptr), i8 0, i64 16, i1 false)
+; CHECK-NEXT: ret void
+;
+ tail call void @llvm.memset.inline.p1.i64(ptr addrspace(1) nonnull align 8 getelementptr inbounds nuw (i8, ptr addrspace(1) @page1, i64 16), i8 0, i64 16, i1 false)
+ ret void
+}
+
+declare void @llvm.memset.inline.p1.i64(ptr addrspace(1) writeonly captures(none), i8, i64, i1 immarg)
+
+define dso_local void @test_memcpy_inline() local_unnamed_addr {
+; CHECK-LABEL: define dso_local void @test_memcpy_inline() local_unnamed_addr {
+; CHECK-NEXT: call void @llvm.memcpy.inline.p0.p0.i64(ptr align 8 addrspacecast (ptr addrspace(1) getelementptr inbounds nuw (i8, ptr addrspace(1) @page2, i64 8) to ptr), ptr align 8 addrspacecast (ptr addrspace(1) getelementptr inbounds nuw (i8, ptr addrspace(1) @page1, i64 8) to ptr), i64 16, i1 false)
+; CHECK-NEXT: ret void
+;
+ tail call void @llvm.memcpy.inline.p1.p1.i64(ptr addrspace(1) nonnull align 8 getelementptr inbounds nuw (i8, ptr addrspace(1) @page2, i64 8), ptr addrspace(1) nonnull align 8 getelementptr inbounds nuw (i8, ptr addrspace(1) @page1, i64 8), i64 16, i1 false)
+ ret void
+}
+
+declare void @llvm.memcpy.inline.p1.p1.i64(ptr addrspace(1) noalias writeonly captures(none), ptr addrspace(1) noalias readonly captures(none), i64, i1 immarg)
diff --git a/llvm/test/CodeGen/BPF/addr-space-memintrinsic-no-gep.ll b/llvm/test/CodeGen/BPF/addr-space-memintrinsic-no-gep.ll
new file mode 100644
index 0000000..62fa2e4
--- /dev/null
+++ b/llvm/test/CodeGen/BPF/addr-space-memintrinsic-no-gep.ll
@@ -0,0 +1,49 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
+; RUN: opt --bpf-check-and-opt-ir -S -mtriple=bpf-pc-linux < %s | FileCheck %s
+
+@page1 = dso_local local_unnamed_addr addrspace(1) global [10 x ptr] zeroinitializer, align 8
+@page2 = dso_local local_unnamed_addr addrspace(1) global [10 x ptr] zeroinitializer, align 8
+
+define dso_local void @test_memset() local_unnamed_addr {
+; CHECK-LABEL: define dso_local void @test_memset() local_unnamed_addr {
+; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 8 addrspacecast (ptr addrspace(1) @page1 to ptr), i8 0, i64 16, i1 false)
+; CHECK-NEXT: ret void
+;
+ tail call void @llvm.memset.p1.i64(ptr addrspace(1) noundef align 8 dereferenceable(16) @page1, i8 0, i64 16, i1 false)
+ ret void
+}
+
+declare void @llvm.memset.p1.i64(ptr addrspace(1) writeonly captures(none), i8, i64, i1 immarg)
+
+define dso_local void @test_memcpy() local_unnamed_addr {
+; CHECK-LABEL: define dso_local void @test_memcpy() local_unnamed_addr {
+; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 addrspacecast (ptr addrspace(1) @page2 to ptr), ptr align 8 addrspacecast (ptr addrspace(1) @page1 to ptr), i64 16, i1 false)
+; CHECK-NEXT: ret void
+;
+ tail call void @llvm.memcpy.p1.p1.i64(ptr addrspace(1) noundef align 8 dereferenceable(16) @page2, ptr addrspace(1) noundef align 8 dereferenceable(16) @page1, i64 16, i1 false)
+ ret void
+}
+
+declare void @llvm.memcpy.p1.p1.i64(ptr addrspace(1) noalias writeonly captures(none), ptr addrspace(1) noalias readonly captures(none), i64, i1 immarg)
+
+define dso_local void @test_memset_inline() local_unnamed_addr {
+; CHECK-LABEL: define dso_local void @test_memset_inline() local_unnamed_addr {
+; CHECK-NEXT: call void @llvm.memset.inline.p0.i64(ptr align 8 addrspacecast (ptr addrspace(1) @page1 to ptr), i8 0, i64 16, i1 false)
+; CHECK-NEXT: ret void
+;
+ tail call void @llvm.memset.inline.p1.i64(ptr addrspace(1) align 8 @page1, i8 0, i64 16, i1 false)
+ ret void
+}
+
+declare void @llvm.memset.inline.p1.i64(ptr addrspace(1) writeonly captures(none), i8, i64, i1 immarg)
+
+define dso_local void @test_memcpy_inline() local_unnamed_addr {
+; CHECK-LABEL: define dso_local void @test_memcpy_inline() local_unnamed_addr {
+; CHECK-NEXT: call void @llvm.memcpy.inline.p0.p0.i64(ptr align 8 addrspacecast (ptr addrspace(1) @page2 to ptr), ptr align 8 addrspacecast (ptr addrspace(1) @page1 to ptr), i64 16, i1 false)
+; CHECK-NEXT: ret void
+;
+ tail call void @llvm.memcpy.inline.p1.p1.i64(ptr addrspace(1) align 8 @page2, ptr addrspace(1) align 8 @page1, i64 16, i1 false)
+ ret void
+}
+
+declare void @llvm.memcpy.inline.p1.p1.i64(ptr addrspace(1) noalias writeonly captures(none), ptr addrspace(1) noalias readonly captures(none), i64, i1 immarg)
diff --git a/llvm/test/CodeGen/MIR2Vec/Inputs/mir2vec_dummy_2D_vocab.json b/llvm/test/CodeGen/MIR2Vec/Inputs/mir2vec_dummy_2D_vocab.json
new file mode 100644
index 0000000..2894fff
--- /dev/null
+++ b/llvm/test/CodeGen/MIR2Vec/Inputs/mir2vec_dummy_2D_vocab.json
@@ -0,0 +1,11 @@
+{
+ "entities" : {
+ "ABS_Fp":[1, 2],
+ "ADC":[3, 4],
+ "ADD":[5, 6],
+ "ADDPDrm":[7, 8],
+ "ADDPDrr":[9, 10],
+ "ADDPSrr":[11, 12],
+ "ADDSDrm":[13, 14]
+ }
+} \ No newline at end of file
diff --git a/llvm/test/CodeGen/MIR2Vec/Inputs/mir2vec_inconsistent_dims.json b/llvm/test/CodeGen/MIR2Vec/Inputs/mir2vec_inconsistent_dims.json
new file mode 100644
index 0000000..bf04163
--- /dev/null
+++ b/llvm/test/CodeGen/MIR2Vec/Inputs/mir2vec_inconsistent_dims.json
@@ -0,0 +1,7 @@
+{
+ "entities": {
+ "ADD": [1.0, 2.0, 3.0],
+ "SUB": [1.5],
+ "MUL": [2.0, 3.0]
+ }
+}
diff --git a/llvm/test/CodeGen/MIR2Vec/Inputs/mir2vec_invalid_vocab.json b/llvm/test/CodeGen/MIR2Vec/Inputs/mir2vec_invalid_vocab.json
new file mode 100644
index 0000000..585a85e
--- /dev/null
+++ b/llvm/test/CodeGen/MIR2Vec/Inputs/mir2vec_invalid_vocab.json
@@ -0,0 +1,5 @@
+{
+ "invalid_structure": {
+ "ADD": [ 1, 2, 3]
+ }
+} \ No newline at end of file
diff --git a/llvm/test/CodeGen/MIR2Vec/Inputs/mir2vec_zero_vocab.json b/llvm/test/CodeGen/MIR2Vec/Inputs/mir2vec_zero_vocab.json
new file mode 100644
index 0000000..63e8ccbd
--- /dev/null
+++ b/llvm/test/CodeGen/MIR2Vec/Inputs/mir2vec_zero_vocab.json
@@ -0,0 +1,12 @@
+{
+ "entities": {
+ "ADD": [],
+ "SUB": [],
+ "MUL": [],
+ "MOV": [],
+ "CMP": [],
+ "JMP": [],
+ "CALL": [],
+ "RET": []
+ }
+} \ No newline at end of file
diff --git a/llvm/test/CodeGen/MIR2Vec/Inputs/reference_x86_vocab_print.txt b/llvm/test/CodeGen/MIR2Vec/Inputs/reference_x86_vocab_print.txt
new file mode 100644
index 0000000..6327cff
--- /dev/null
+++ b/llvm/test/CodeGen/MIR2Vec/Inputs/reference_x86_vocab_print.txt
@@ -0,0 +1,6882 @@
+Key: AAA: [ 0.00 0.00 ]
+Key: AAD: [ 0.00 0.00 ]
+Key: AADD: [ 0.00 0.00 ]
+Key: AAM: [ 0.00 0.00 ]
+Key: AAND: [ 0.00 0.00 ]
+Key: AAS: [ 0.00 0.00 ]
+Key: ABS_F: [ 0.00 0.00 ]
+Key: ABS_Fp: [ 1.00 2.00 ]
+Key: ADC: [ 3.00 4.00 ]
+Key: ADCX: [ 0.00 0.00 ]
+Key: ADD: [ 5.00 6.00 ]
+Key: ADDPDrm: [ 7.00 8.00 ]
+Key: ADDPDrr: [ 9.00 10.00 ]
+Key: ADDPSrm: [ 0.00 0.00 ]
+Key: ADDPSrr: [ 11.00 12.00 ]
+Key: ADDR: [ 0.00 0.00 ]
+Key: ADDSDrm: [ 13.00 14.00 ]
+Key: ADDSDrm_Int: [ 0.00 0.00 ]
+Key: ADDSDrr: [ 0.00 0.00 ]
+Key: ADDSDrr_Int: [ 0.00 0.00 ]
+Key: ADDSSrm: [ 0.00 0.00 ]
+Key: ADDSSrm_Int: [ 0.00 0.00 ]
+Key: ADDSSrr: [ 0.00 0.00 ]
+Key: ADDSSrr_Int: [ 0.00 0.00 ]
+Key: ADDSUBPDrm: [ 0.00 0.00 ]
+Key: ADDSUBPDrr: [ 0.00 0.00 ]
+Key: ADDSUBPSrm: [ 0.00 0.00 ]
+Key: ADDSUBPSrr: [ 0.00 0.00 ]
+Key: ADD_F: [ 0.00 0.00 ]
+Key: ADD_FI: [ 0.00 0.00 ]
+Key: ADD_FPrST: [ 0.00 0.00 ]
+Key: ADD_FST: [ 0.00 0.00 ]
+Key: ADD_Fp: [ 0.00 0.00 ]
+Key: ADD_FpI: [ 0.00 0.00 ]
+Key: ADD_FrST: [ 0.00 0.00 ]
+Key: ADJCALLSTACKDOWN: [ 0.00 0.00 ]
+Key: ADJCALLSTACKUP: [ 0.00 0.00 ]
+Key: ADOX: [ 0.00 0.00 ]
+Key: AESDEC: [ 0.00 0.00 ]
+Key: AESDECLASTrm: [ 0.00 0.00 ]
+Key: AESDECLASTrr: [ 0.00 0.00 ]
+Key: AESDECWIDE: [ 0.00 0.00 ]
+Key: AESDECrm: [ 0.00 0.00 ]
+Key: AESDECrr: [ 0.00 0.00 ]
+Key: AESENC: [ 0.00 0.00 ]
+Key: AESENCLASTrm: [ 0.00 0.00 ]
+Key: AESENCLASTrr: [ 0.00 0.00 ]
+Key: AESENCWIDE: [ 0.00 0.00 ]
+Key: AESENCrm: [ 0.00 0.00 ]
+Key: AESENCrr: [ 0.00 0.00 ]
+Key: AESIMCrm: [ 0.00 0.00 ]
+Key: AESIMCrr: [ 0.00 0.00 ]
+Key: AESKEYGENASSISTrmi: [ 0.00 0.00 ]
+Key: AESKEYGENASSISTrri: [ 0.00 0.00 ]
+Key: AND: [ 0.00 0.00 ]
+Key: ANDN: [ 0.00 0.00 ]
+Key: ANDNPDrm: [ 0.00 0.00 ]
+Key: ANDNPDrr: [ 0.00 0.00 ]
+Key: ANDNPSrm: [ 0.00 0.00 ]
+Key: ANDNPSrr: [ 0.00 0.00 ]
+Key: ANDPDrm: [ 0.00 0.00 ]
+Key: ANDPDrr: [ 0.00 0.00 ]
+Key: ANDPSrm: [ 0.00 0.00 ]
+Key: ANDPSrr: [ 0.00 0.00 ]
+Key: ANNOTATION_LABEL: [ 0.00 0.00 ]
+Key: AOR: [ 0.00 0.00 ]
+Key: ARITH_FENCE: [ 0.00 0.00 ]
+Key: ARPL: [ 0.00 0.00 ]
+Key: ASAN_CHECK_MEMACCESS: [ 0.00 0.00 ]
+Key: AVX: [ 0.00 0.00 ]
+Key: AVX_SET: [ 0.00 0.00 ]
+Key: AXOR: [ 0.00 0.00 ]
+Key: BEXTR: [ 0.00 0.00 ]
+Key: BEXTRI: [ 0.00 0.00 ]
+Key: BLCFILL: [ 0.00 0.00 ]
+Key: BLCI: [ 0.00 0.00 ]
+Key: BLCIC: [ 0.00 0.00 ]
+Key: BLCMSK: [ 0.00 0.00 ]
+Key: BLCS: [ 0.00 0.00 ]
+Key: BLENDPDrmi: [ 0.00 0.00 ]
+Key: BLENDPDrri: [ 0.00 0.00 ]
+Key: BLENDPSrmi: [ 0.00 0.00 ]
+Key: BLENDPSrri: [ 0.00 0.00 ]
+Key: BLENDVPDrm: [ 0.00 0.00 ]
+Key: BLENDVPDrr: [ 0.00 0.00 ]
+Key: BLENDVPSrm: [ 0.00 0.00 ]
+Key: BLENDVPSrr: [ 0.00 0.00 ]
+Key: BLSFILL: [ 0.00 0.00 ]
+Key: BLSI: [ 0.00 0.00 ]
+Key: BLSIC: [ 0.00 0.00 ]
+Key: BLSMSK: [ 0.00 0.00 ]
+Key: BLSR: [ 0.00 0.00 ]
+Key: BOUNDS: [ 0.00 0.00 ]
+Key: BSF: [ 0.00 0.00 ]
+Key: BSR: [ 0.00 0.00 ]
+Key: BSWAP: [ 0.00 0.00 ]
+Key: BT: [ 0.00 0.00 ]
+Key: BTC: [ 0.00 0.00 ]
+Key: BTR: [ 0.00 0.00 ]
+Key: BTS: [ 0.00 0.00 ]
+Key: BUNDLE: [ 0.00 0.00 ]
+Key: BZHI: [ 0.00 0.00 ]
+Key: CALL: [ 0.00 0.00 ]
+Key: CALLpcrel: [ 0.00 0.00 ]
+Key: CATCHRET: [ 0.00 0.00 ]
+Key: CBW: [ 0.00 0.00 ]
+Key: CCMP: [ 0.00 0.00 ]
+Key: CDQ: [ 0.00 0.00 ]
+Key: CDQE: [ 0.00 0.00 ]
+Key: CFCMOV: [ 0.00 0.00 ]
+Key: CFI_INSTRUCTION: [ 0.00 0.00 ]
+Key: CHS_F: [ 0.00 0.00 ]
+Key: CHS_Fp: [ 0.00 0.00 ]
+Key: CLAC: [ 0.00 0.00 ]
+Key: CLC: [ 0.00 0.00 ]
+Key: CLD: [ 0.00 0.00 ]
+Key: CLDEMOTE: [ 0.00 0.00 ]
+Key: CLEANUPRET: [ 0.00 0.00 ]
+Key: CLFLUSH: [ 0.00 0.00 ]
+Key: CLFLUSHOPT: [ 0.00 0.00 ]
+Key: CLGI: [ 0.00 0.00 ]
+Key: CLI: [ 0.00 0.00 ]
+Key: CLRSSBSY: [ 0.00 0.00 ]
+Key: CLTS: [ 0.00 0.00 ]
+Key: CLUI: [ 0.00 0.00 ]
+Key: CLWB: [ 0.00 0.00 ]
+Key: CLZERO: [ 0.00 0.00 ]
+Key: CMC: [ 0.00 0.00 ]
+Key: CMOV: [ 0.00 0.00 ]
+Key: CMOVBE_F: [ 0.00 0.00 ]
+Key: CMOVBE_Fp: [ 0.00 0.00 ]
+Key: CMOVB_F: [ 0.00 0.00 ]
+Key: CMOVB_Fp: [ 0.00 0.00 ]
+Key: CMOVE_F: [ 0.00 0.00 ]
+Key: CMOVE_Fp: [ 0.00 0.00 ]
+Key: CMOVNBE_F: [ 0.00 0.00 ]
+Key: CMOVNBE_Fp: [ 0.00 0.00 ]
+Key: CMOVNB_F: [ 0.00 0.00 ]
+Key: CMOVNB_Fp: [ 0.00 0.00 ]
+Key: CMOVNE_F: [ 0.00 0.00 ]
+Key: CMOVNE_Fp: [ 0.00 0.00 ]
+Key: CMOVNP_F: [ 0.00 0.00 ]
+Key: CMOVNP_Fp: [ 0.00 0.00 ]
+Key: CMOVP_F: [ 0.00 0.00 ]
+Key: CMOVP_Fp: [ 0.00 0.00 ]
+Key: CMOV_FR: [ 0.00 0.00 ]
+Key: CMOV_GR: [ 0.00 0.00 ]
+Key: CMOV_RFP: [ 0.00 0.00 ]
+Key: CMOV_VK: [ 0.00 0.00 ]
+Key: CMOV_VR: [ 0.00 0.00 ]
+Key: CMP: [ 0.00 0.00 ]
+Key: CMPCCXADDmr: [ 0.00 0.00 ]
+Key: CMPPDrmi: [ 0.00 0.00 ]
+Key: CMPPDrri: [ 0.00 0.00 ]
+Key: CMPPSrmi: [ 0.00 0.00 ]
+Key: CMPPSrri: [ 0.00 0.00 ]
+Key: CMPSB: [ 0.00 0.00 ]
+Key: CMPSDrmi: [ 0.00 0.00 ]
+Key: CMPSDrmi_Int: [ 0.00 0.00 ]
+Key: CMPSDrri: [ 0.00 0.00 ]
+Key: CMPSDrri_Int: [ 0.00 0.00 ]
+Key: CMPSL: [ 0.00 0.00 ]
+Key: CMPSQ: [ 0.00 0.00 ]
+Key: CMPSSrmi: [ 0.00 0.00 ]
+Key: CMPSSrmi_Int: [ 0.00 0.00 ]
+Key: CMPSSrri: [ 0.00 0.00 ]
+Key: CMPSSrri_Int: [ 0.00 0.00 ]
+Key: CMPSW: [ 0.00 0.00 ]
+Key: CMPXCHG: [ 0.00 0.00 ]
+Key: COMISDrm: [ 0.00 0.00 ]
+Key: COMISDrm_Int: [ 0.00 0.00 ]
+Key: COMISDrr: [ 0.00 0.00 ]
+Key: COMISDrr_Int: [ 0.00 0.00 ]
+Key: COMISSrm: [ 0.00 0.00 ]
+Key: COMISSrm_Int: [ 0.00 0.00 ]
+Key: COMISSrr: [ 0.00 0.00 ]
+Key: COMISSrr_Int: [ 0.00 0.00 ]
+Key: COMP_FST: [ 0.00 0.00 ]
+Key: COM_FIPr: [ 0.00 0.00 ]
+Key: COM_FIr: [ 0.00 0.00 ]
+Key: COM_FST: [ 0.00 0.00 ]
+Key: COM_FpIr: [ 0.00 0.00 ]
+Key: COM_Fpr: [ 0.00 0.00 ]
+Key: CONVERGENCECTRL_ANCHOR: [ 0.00 0.00 ]
+Key: CONVERGENCECTRL_ENTRY: [ 0.00 0.00 ]
+Key: CONVERGENCECTRL_GLUE: [ 0.00 0.00 ]
+Key: CONVERGENCECTRL_LOOP: [ 0.00 0.00 ]
+Key: COPY: [ 0.00 0.00 ]
+Key: COPY_TO_REGCLASS: [ 0.00 0.00 ]
+Key: CPUID: [ 0.00 0.00 ]
+Key: CQO: [ 0.00 0.00 ]
+Key: CRC: [ 0.00 0.00 ]
+Key: CS_PREFIX: [ 0.00 0.00 ]
+Key: CTEST: [ 0.00 0.00 ]
+Key: CVTDQ: [ 0.00 0.00 ]
+Key: CVTPD: [ 0.00 0.00 ]
+Key: CVTPS: [ 0.00 0.00 ]
+Key: CVTSD: [ 0.00 0.00 ]
+Key: CVTSI: [ 0.00 0.00 ]
+Key: CVTSS: [ 0.00 0.00 ]
+Key: CVTTPD: [ 0.00 0.00 ]
+Key: CVTTPS: [ 0.00 0.00 ]
+Key: CVTTSD: [ 0.00 0.00 ]
+Key: CVTTSS: [ 0.00 0.00 ]
+Key: CWD: [ 0.00 0.00 ]
+Key: CWDE: [ 0.00 0.00 ]
+Key: DAA: [ 0.00 0.00 ]
+Key: DAS: [ 0.00 0.00 ]
+Key: DATA: [ 0.00 0.00 ]
+Key: DBG_INSTR_REF: [ 0.00 0.00 ]
+Key: DBG_LABEL: [ 0.00 0.00 ]
+Key: DBG_PHI: [ 0.00 0.00 ]
+Key: DBG_VALUE: [ 0.00 0.00 ]
+Key: DBG_VALUE_LIST: [ 0.00 0.00 ]
+Key: DEC: [ 0.00 0.00 ]
+Key: DIV: [ 0.00 0.00 ]
+Key: DIVPDrm: [ 0.00 0.00 ]
+Key: DIVPDrr: [ 0.00 0.00 ]
+Key: DIVPSrm: [ 0.00 0.00 ]
+Key: DIVPSrr: [ 0.00 0.00 ]
+Key: DIVR_F: [ 0.00 0.00 ]
+Key: DIVR_FI: [ 0.00 0.00 ]
+Key: DIVR_FPrST: [ 0.00 0.00 ]
+Key: DIVR_FST: [ 0.00 0.00 ]
+Key: DIVR_Fp: [ 0.00 0.00 ]
+Key: DIVR_FpI: [ 0.00 0.00 ]
+Key: DIVR_FrST: [ 0.00 0.00 ]
+Key: DIVSDrm: [ 0.00 0.00 ]
+Key: DIVSDrm_Int: [ 0.00 0.00 ]
+Key: DIVSDrr: [ 0.00 0.00 ]
+Key: DIVSDrr_Int: [ 0.00 0.00 ]
+Key: DIVSSrm: [ 0.00 0.00 ]
+Key: DIVSSrm_Int: [ 0.00 0.00 ]
+Key: DIVSSrr: [ 0.00 0.00 ]
+Key: DIVSSrr_Int: [ 0.00 0.00 ]
+Key: DIV_F: [ 0.00 0.00 ]
+Key: DIV_FI: [ 0.00 0.00 ]
+Key: DIV_FPrST: [ 0.00 0.00 ]
+Key: DIV_FST: [ 0.00 0.00 ]
+Key: DIV_Fp: [ 0.00 0.00 ]
+Key: DIV_FpI: [ 0.00 0.00 ]
+Key: DIV_FrST: [ 0.00 0.00 ]
+Key: DPPDrmi: [ 0.00 0.00 ]
+Key: DPPDrri: [ 0.00 0.00 ]
+Key: DPPSrmi: [ 0.00 0.00 ]
+Key: DPPSrri: [ 0.00 0.00 ]
+Key: DS_PREFIX: [ 0.00 0.00 ]
+Key: DYN_ALLOCA: [ 0.00 0.00 ]
+Key: EH_LABEL: [ 0.00 0.00 ]
+Key: EH_RETURN: [ 0.00 0.00 ]
+Key: EH_SjLj_LongJmp: [ 0.00 0.00 ]
+Key: EH_SjLj_SetJmp: [ 0.00 0.00 ]
+Key: EH_SjLj_Setup: [ 0.00 0.00 ]
+Key: ENCLS: [ 0.00 0.00 ]
+Key: ENCLU: [ 0.00 0.00 ]
+Key: ENCLV: [ 0.00 0.00 ]
+Key: ENCODEKEY: [ 0.00 0.00 ]
+Key: ENDBR: [ 0.00 0.00 ]
+Key: ENQCMD: [ 0.00 0.00 ]
+Key: ENQCMDS: [ 0.00 0.00 ]
+Key: ENTER: [ 0.00 0.00 ]
+Key: ERETS: [ 0.00 0.00 ]
+Key: ERETU: [ 0.00 0.00 ]
+Key: ES_PREFIX: [ 0.00 0.00 ]
+Key: EXTRACTPSmri: [ 0.00 0.00 ]
+Key: EXTRACTPSrri: [ 0.00 0.00 ]
+Key: EXTRACT_SUBREG: [ 0.00 0.00 ]
+Key: EXTRQ: [ 0.00 0.00 ]
+Key: EXTRQI: [ 0.00 0.00 ]
+Key: F: [ 0.00 0.00 ]
+Key: FAKE_USE: [ 0.00 0.00 ]
+Key: FARCALL: [ 0.00 0.00 ]
+Key: FARJMP: [ 0.00 0.00 ]
+Key: FAULTING_OP: [ 0.00 0.00 ]
+Key: FBLDm: [ 0.00 0.00 ]
+Key: FBSTPm: [ 0.00 0.00 ]
+Key: FCOM: [ 0.00 0.00 ]
+Key: FCOMP: [ 0.00 0.00 ]
+Key: FCOMPP: [ 0.00 0.00 ]
+Key: FCOS: [ 0.00 0.00 ]
+Key: FDECSTP: [ 0.00 0.00 ]
+Key: FEMMS: [ 0.00 0.00 ]
+Key: FENTRY_CALL: [ 0.00 0.00 ]
+Key: FFREE: [ 0.00 0.00 ]
+Key: FFREEP: [ 0.00 0.00 ]
+Key: FICOM: [ 0.00 0.00 ]
+Key: FICOMP: [ 0.00 0.00 ]
+Key: FINCSTP: [ 0.00 0.00 ]
+Key: FLDCW: [ 0.00 0.00 ]
+Key: FLDENVm: [ 0.00 0.00 ]
+Key: FLDL: [ 0.00 0.00 ]
+Key: FLDLG: [ 0.00 0.00 ]
+Key: FLDLN: [ 0.00 0.00 ]
+Key: FLDPI: [ 0.00 0.00 ]
+Key: FNCLEX: [ 0.00 0.00 ]
+Key: FNINIT: [ 0.00 0.00 ]
+Key: FNOP: [ 0.00 0.00 ]
+Key: FNSTCW: [ 0.00 0.00 ]
+Key: FNSTSW: [ 0.00 0.00 ]
+Key: FNSTSWm: [ 0.00 0.00 ]
+Key: FP: [ 0.00 0.00 ]
+Key: FPATAN: [ 0.00 0.00 ]
+Key: FPREM: [ 0.00 0.00 ]
+Key: FPTAN: [ 0.00 0.00 ]
+Key: FRNDINT: [ 0.00 0.00 ]
+Key: FRSTORm: [ 0.00 0.00 ]
+Key: FSAVEm: [ 0.00 0.00 ]
+Key: FSCALE: [ 0.00 0.00 ]
+Key: FSIN: [ 0.00 0.00 ]
+Key: FSINCOS: [ 0.00 0.00 ]
+Key: FSTENVm: [ 0.00 0.00 ]
+Key: FS_PREFIX: [ 0.00 0.00 ]
+Key: FXRSTOR: [ 0.00 0.00 ]
+Key: FXSAVE: [ 0.00 0.00 ]
+Key: FXTRACT: [ 0.00 0.00 ]
+Key: FYL: [ 0.00 0.00 ]
+Key: FsFLD: [ 0.00 0.00 ]
+Key: GC_LABEL: [ 0.00 0.00 ]
+Key: GETSEC: [ 0.00 0.00 ]
+Key: GF: [ 0.00 0.00 ]
+Key: GS_PREFIX: [ 0.00 0.00 ]
+Key: G_ABDS: [ 0.00 0.00 ]
+Key: G_ABDU: [ 0.00 0.00 ]
+Key: G_ABS: [ 0.00 0.00 ]
+Key: G_ADD: [ 0.00 0.00 ]
+Key: G_ADDRSPACE_CAST: [ 0.00 0.00 ]
+Key: G_AND: [ 0.00 0.00 ]
+Key: G_ANYEXT: [ 0.00 0.00 ]
+Key: G_ASHR: [ 0.00 0.00 ]
+Key: G_ASSERT_ALIGN: [ 0.00 0.00 ]
+Key: G_ASSERT_SEXT: [ 0.00 0.00 ]
+Key: G_ASSERT_ZEXT: [ 0.00 0.00 ]
+Key: G_ATOMICRMW_ADD: [ 0.00 0.00 ]
+Key: G_ATOMICRMW_AND: [ 0.00 0.00 ]
+Key: G_ATOMICRMW_FADD: [ 0.00 0.00 ]
+Key: G_ATOMICRMW_FMAX: [ 0.00 0.00 ]
+Key: G_ATOMICRMW_FMAXIMUM: [ 0.00 0.00 ]
+Key: G_ATOMICRMW_FMIN: [ 0.00 0.00 ]
+Key: G_ATOMICRMW_FMINIMUM: [ 0.00 0.00 ]
+Key: G_ATOMICRMW_FSUB: [ 0.00 0.00 ]
+Key: G_ATOMICRMW_MAX: [ 0.00 0.00 ]
+Key: G_ATOMICRMW_MIN: [ 0.00 0.00 ]
+Key: G_ATOMICRMW_NAND: [ 0.00 0.00 ]
+Key: G_ATOMICRMW_OR: [ 0.00 0.00 ]
+Key: G_ATOMICRMW_SUB: [ 0.00 0.00 ]
+Key: G_ATOMICRMW_UDEC_WRAP: [ 0.00 0.00 ]
+Key: G_ATOMICRMW_UINC_WRAP: [ 0.00 0.00 ]
+Key: G_ATOMICRMW_UMAX: [ 0.00 0.00 ]
+Key: G_ATOMICRMW_UMIN: [ 0.00 0.00 ]
+Key: G_ATOMICRMW_USUB_COND: [ 0.00 0.00 ]
+Key: G_ATOMICRMW_USUB_SAT: [ 0.00 0.00 ]
+Key: G_ATOMICRMW_XCHG: [ 0.00 0.00 ]
+Key: G_ATOMICRMW_XOR: [ 0.00 0.00 ]
+Key: G_ATOMIC_CMPXCHG: [ 0.00 0.00 ]
+Key: G_ATOMIC_CMPXCHG_WITH_SUCCESS: [ 0.00 0.00 ]
+Key: G_BITCAST: [ 0.00 0.00 ]
+Key: G_BITREVERSE: [ 0.00 0.00 ]
+Key: G_BLOCK_ADDR: [ 0.00 0.00 ]
+Key: G_BR: [ 0.00 0.00 ]
+Key: G_BRCOND: [ 0.00 0.00 ]
+Key: G_BRINDIRECT: [ 0.00 0.00 ]
+Key: G_BRJT: [ 0.00 0.00 ]
+Key: G_BSWAP: [ 0.00 0.00 ]
+Key: G_BUILD_VECTOR: [ 0.00 0.00 ]
+Key: G_BUILD_VECTOR_TRUNC: [ 0.00 0.00 ]
+Key: G_BZERO: [ 0.00 0.00 ]
+Key: G_CONCAT_VECTORS: [ 0.00 0.00 ]
+Key: G_CONSTANT: [ 0.00 0.00 ]
+Key: G_CONSTANT_FOLD_BARRIER: [ 0.00 0.00 ]
+Key: G_CONSTANT_POOL: [ 0.00 0.00 ]
+Key: G_CTLZ: [ 0.00 0.00 ]
+Key: G_CTLZ_ZERO_UNDEF: [ 0.00 0.00 ]
+Key: G_CTPOP: [ 0.00 0.00 ]
+Key: G_CTTZ: [ 0.00 0.00 ]
+Key: G_CTTZ_ZERO_UNDEF: [ 0.00 0.00 ]
+Key: G_DEBUGTRAP: [ 0.00 0.00 ]
+Key: G_DYN_STACKALLOC: [ 0.00 0.00 ]
+Key: G_EXTRACT: [ 0.00 0.00 ]
+Key: G_EXTRACT_SUBVECTOR: [ 0.00 0.00 ]
+Key: G_EXTRACT_VECTOR_ELT: [ 0.00 0.00 ]
+Key: G_FABS: [ 0.00 0.00 ]
+Key: G_FACOS: [ 0.00 0.00 ]
+Key: G_FADD: [ 0.00 0.00 ]
+Key: G_FASIN: [ 0.00 0.00 ]
+Key: G_FATAN: [ 0.00 0.00 ]
+Key: G_FCANONICALIZE: [ 0.00 0.00 ]
+Key: G_FCEIL: [ 0.00 0.00 ]
+Key: G_FCMP: [ 0.00 0.00 ]
+Key: G_FCONSTANT: [ 0.00 0.00 ]
+Key: G_FCOPYSIGN: [ 0.00 0.00 ]
+Key: G_FCOS: [ 0.00 0.00 ]
+Key: G_FCOSH: [ 0.00 0.00 ]
+Key: G_FDIV: [ 0.00 0.00 ]
+Key: G_FENCE: [ 0.00 0.00 ]
+Key: G_FEXP: [ 0.00 0.00 ]
+Key: G_FFLOOR: [ 0.00 0.00 ]
+Key: G_FFREXP: [ 0.00 0.00 ]
+Key: G_FILD: [ 0.00 0.00 ]
+Key: G_FIST: [ 0.00 0.00 ]
+Key: G_FLDCW: [ 0.00 0.00 ]
+Key: G_FLDEXP: [ 0.00 0.00 ]
+Key: G_FLOG: [ 0.00 0.00 ]
+Key: G_FMA: [ 0.00 0.00 ]
+Key: G_FMAD: [ 0.00 0.00 ]
+Key: G_FMAXIMUM: [ 0.00 0.00 ]
+Key: G_FMAXIMUMNUM: [ 0.00 0.00 ]
+Key: G_FMAXNUM: [ 0.00 0.00 ]
+Key: G_FMAXNUM_IEEE: [ 0.00 0.00 ]
+Key: G_FMINIMUM: [ 0.00 0.00 ]
+Key: G_FMINIMUMNUM: [ 0.00 0.00 ]
+Key: G_FMINNUM: [ 0.00 0.00 ]
+Key: G_FMINNUM_IEEE: [ 0.00 0.00 ]
+Key: G_FMODF: [ 0.00 0.00 ]
+Key: G_FMUL: [ 0.00 0.00 ]
+Key: G_FNEARBYINT: [ 0.00 0.00 ]
+Key: G_FNEG: [ 0.00 0.00 ]
+Key: G_FNSTCW: [ 0.00 0.00 ]
+Key: G_FPEXT: [ 0.00 0.00 ]
+Key: G_FPOW: [ 0.00 0.00 ]
+Key: G_FPOWI: [ 0.00 0.00 ]
+Key: G_FPTOSI: [ 0.00 0.00 ]
+Key: G_FPTOSI_SAT: [ 0.00 0.00 ]
+Key: G_FPTOUI: [ 0.00 0.00 ]
+Key: G_FPTOUI_SAT: [ 0.00 0.00 ]
+Key: G_FPTRUNC: [ 0.00 0.00 ]
+Key: G_FRAME_INDEX: [ 0.00 0.00 ]
+Key: G_FREEZE: [ 0.00 0.00 ]
+Key: G_FREM: [ 0.00 0.00 ]
+Key: G_FRINT: [ 0.00 0.00 ]
+Key: G_FSHL: [ 0.00 0.00 ]
+Key: G_FSHR: [ 0.00 0.00 ]
+Key: G_FSIN: [ 0.00 0.00 ]
+Key: G_FSINCOS: [ 0.00 0.00 ]
+Key: G_FSINH: [ 0.00 0.00 ]
+Key: G_FSQRT: [ 0.00 0.00 ]
+Key: G_FSUB: [ 0.00 0.00 ]
+Key: G_FTAN: [ 0.00 0.00 ]
+Key: G_FTANH: [ 0.00 0.00 ]
+Key: G_GET_FPENV: [ 0.00 0.00 ]
+Key: G_GET_FPMODE: [ 0.00 0.00 ]
+Key: G_GET_ROUNDING: [ 0.00 0.00 ]
+Key: G_GLOBAL_VALUE: [ 0.00 0.00 ]
+Key: G_ICMP: [ 0.00 0.00 ]
+Key: G_IMPLICIT_DEF: [ 0.00 0.00 ]
+Key: G_INDEXED_LOAD: [ 0.00 0.00 ]
+Key: G_INDEXED_SEXTLOAD: [ 0.00 0.00 ]
+Key: G_INDEXED_STORE: [ 0.00 0.00 ]
+Key: G_INDEXED_ZEXTLOAD: [ 0.00 0.00 ]
+Key: G_INSERT: [ 0.00 0.00 ]
+Key: G_INSERT_SUBVECTOR: [ 0.00 0.00 ]
+Key: G_INSERT_VECTOR_ELT: [ 0.00 0.00 ]
+Key: G_INTRINSIC: [ 0.00 0.00 ]
+Key: G_INTRINSIC_CONVERGENT: [ 0.00 0.00 ]
+Key: G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS: [ 0.00 0.00 ]
+Key: G_INTRINSIC_FPTRUNC_ROUND: [ 0.00 0.00 ]
+Key: G_INTRINSIC_LLRINT: [ 0.00 0.00 ]
+Key: G_INTRINSIC_LRINT: [ 0.00 0.00 ]
+Key: G_INTRINSIC_ROUND: [ 0.00 0.00 ]
+Key: G_INTRINSIC_ROUNDEVEN: [ 0.00 0.00 ]
+Key: G_INTRINSIC_TRUNC: [ 0.00 0.00 ]
+Key: G_INTRINSIC_W_SIDE_EFFECTS: [ 0.00 0.00 ]
+Key: G_INTTOPTR: [ 0.00 0.00 ]
+Key: G_INVOKE_REGION_START: [ 0.00 0.00 ]
+Key: G_IS_FPCLASS: [ 0.00 0.00 ]
+Key: G_JUMP_TABLE: [ 0.00 0.00 ]
+Key: G_LLROUND: [ 0.00 0.00 ]
+Key: G_LOAD: [ 0.00 0.00 ]
+Key: G_LROUND: [ 0.00 0.00 ]
+Key: G_LSHR: [ 0.00 0.00 ]
+Key: G_MEMCPY: [ 0.00 0.00 ]
+Key: G_MEMCPY_INLINE: [ 0.00 0.00 ]
+Key: G_MEMMOVE: [ 0.00 0.00 ]
+Key: G_MEMSET: [ 0.00 0.00 ]
+Key: G_MERGE_VALUES: [ 0.00 0.00 ]
+Key: G_MUL: [ 0.00 0.00 ]
+Key: G_OR: [ 0.00 0.00 ]
+Key: G_PHI: [ 0.00 0.00 ]
+Key: G_PREFETCH: [ 0.00 0.00 ]
+Key: G_PTRAUTH_GLOBAL_VALUE: [ 0.00 0.00 ]
+Key: G_PTRMASK: [ 0.00 0.00 ]
+Key: G_PTRTOINT: [ 0.00 0.00 ]
+Key: G_PTR_ADD: [ 0.00 0.00 ]
+Key: G_READCYCLECOUNTER: [ 0.00 0.00 ]
+Key: G_READSTEADYCOUNTER: [ 0.00 0.00 ]
+Key: G_READ_REGISTER: [ 0.00 0.00 ]
+Key: G_RESET_FPENV: [ 0.00 0.00 ]
+Key: G_RESET_FPMODE: [ 0.00 0.00 ]
+Key: G_ROTL: [ 0.00 0.00 ]
+Key: G_ROTR: [ 0.00 0.00 ]
+Key: G_SADDE: [ 0.00 0.00 ]
+Key: G_SADDO: [ 0.00 0.00 ]
+Key: G_SADDSAT: [ 0.00 0.00 ]
+Key: G_SBFX: [ 0.00 0.00 ]
+Key: G_SCMP: [ 0.00 0.00 ]
+Key: G_SDIV: [ 0.00 0.00 ]
+Key: G_SDIVFIX: [ 0.00 0.00 ]
+Key: G_SDIVFIXSAT: [ 0.00 0.00 ]
+Key: G_SDIVREM: [ 0.00 0.00 ]
+Key: G_SELECT: [ 0.00 0.00 ]
+Key: G_SET_FPENV: [ 0.00 0.00 ]
+Key: G_SET_FPMODE: [ 0.00 0.00 ]
+Key: G_SET_ROUNDING: [ 0.00 0.00 ]
+Key: G_SEXT: [ 0.00 0.00 ]
+Key: G_SEXTLOAD: [ 0.00 0.00 ]
+Key: G_SEXT_INREG: [ 0.00 0.00 ]
+Key: G_SHL: [ 0.00 0.00 ]
+Key: G_SHUFFLE_VECTOR: [ 0.00 0.00 ]
+Key: G_SITOFP: [ 0.00 0.00 ]
+Key: G_SMAX: [ 0.00 0.00 ]
+Key: G_SMIN: [ 0.00 0.00 ]
+Key: G_SMULFIX: [ 0.00 0.00 ]
+Key: G_SMULFIXSAT: [ 0.00 0.00 ]
+Key: G_SMULH: [ 0.00 0.00 ]
+Key: G_SMULO: [ 0.00 0.00 ]
+Key: G_SPLAT_VECTOR: [ 0.00 0.00 ]
+Key: G_SREM: [ 0.00 0.00 ]
+Key: G_SSHLSAT: [ 0.00 0.00 ]
+Key: G_SSUBE: [ 0.00 0.00 ]
+Key: G_SSUBO: [ 0.00 0.00 ]
+Key: G_SSUBSAT: [ 0.00 0.00 ]
+Key: G_STACKRESTORE: [ 0.00 0.00 ]
+Key: G_STACKSAVE: [ 0.00 0.00 ]
+Key: G_STEP_VECTOR: [ 0.00 0.00 ]
+Key: G_STORE: [ 0.00 0.00 ]
+Key: G_STRICT_FADD: [ 0.00 0.00 ]
+Key: G_STRICT_FDIV: [ 0.00 0.00 ]
+Key: G_STRICT_FLDEXP: [ 0.00 0.00 ]
+Key: G_STRICT_FMA: [ 0.00 0.00 ]
+Key: G_STRICT_FMUL: [ 0.00 0.00 ]
+Key: G_STRICT_FREM: [ 0.00 0.00 ]
+Key: G_STRICT_FSQRT: [ 0.00 0.00 ]
+Key: G_STRICT_FSUB: [ 0.00 0.00 ]
+Key: G_SUB: [ 0.00 0.00 ]
+Key: G_TRAP: [ 0.00 0.00 ]
+Key: G_TRUNC: [ 0.00 0.00 ]
+Key: G_TRUNC_SSAT_S: [ 0.00 0.00 ]
+Key: G_TRUNC_SSAT_U: [ 0.00 0.00 ]
+Key: G_TRUNC_USAT_U: [ 0.00 0.00 ]
+Key: G_UADDE: [ 0.00 0.00 ]
+Key: G_UADDO: [ 0.00 0.00 ]
+Key: G_UADDSAT: [ 0.00 0.00 ]
+Key: G_UBFX: [ 0.00 0.00 ]
+Key: G_UBSANTRAP: [ 0.00 0.00 ]
+Key: G_UCMP: [ 0.00 0.00 ]
+Key: G_UDIV: [ 0.00 0.00 ]
+Key: G_UDIVFIX: [ 0.00 0.00 ]
+Key: G_UDIVFIXSAT: [ 0.00 0.00 ]
+Key: G_UDIVREM: [ 0.00 0.00 ]
+Key: G_UITOFP: [ 0.00 0.00 ]
+Key: G_UMAX: [ 0.00 0.00 ]
+Key: G_UMIN: [ 0.00 0.00 ]
+Key: G_UMULFIX: [ 0.00 0.00 ]
+Key: G_UMULFIXSAT: [ 0.00 0.00 ]
+Key: G_UMULH: [ 0.00 0.00 ]
+Key: G_UMULO: [ 0.00 0.00 ]
+Key: G_UNMERGE_VALUES: [ 0.00 0.00 ]
+Key: G_UREM: [ 0.00 0.00 ]
+Key: G_USHLSAT: [ 0.00 0.00 ]
+Key: G_USUBE: [ 0.00 0.00 ]
+Key: G_USUBO: [ 0.00 0.00 ]
+Key: G_USUBSAT: [ 0.00 0.00 ]
+Key: G_VAARG: [ 0.00 0.00 ]
+Key: G_VASTART: [ 0.00 0.00 ]
+Key: G_VECREDUCE_ADD: [ 0.00 0.00 ]
+Key: G_VECREDUCE_AND: [ 0.00 0.00 ]
+Key: G_VECREDUCE_FADD: [ 0.00 0.00 ]
+Key: G_VECREDUCE_FMAX: [ 0.00 0.00 ]
+Key: G_VECREDUCE_FMAXIMUM: [ 0.00 0.00 ]
+Key: G_VECREDUCE_FMIN: [ 0.00 0.00 ]
+Key: G_VECREDUCE_FMINIMUM: [ 0.00 0.00 ]
+Key: G_VECREDUCE_FMUL: [ 0.00 0.00 ]
+Key: G_VECREDUCE_MUL: [ 0.00 0.00 ]
+Key: G_VECREDUCE_OR: [ 0.00 0.00 ]
+Key: G_VECREDUCE_SEQ_FADD: [ 0.00 0.00 ]
+Key: G_VECREDUCE_SEQ_FMUL: [ 0.00 0.00 ]
+Key: G_VECREDUCE_SMAX: [ 0.00 0.00 ]
+Key: G_VECREDUCE_SMIN: [ 0.00 0.00 ]
+Key: G_VECREDUCE_UMAX: [ 0.00 0.00 ]
+Key: G_VECREDUCE_UMIN: [ 0.00 0.00 ]
+Key: G_VECREDUCE_XOR: [ 0.00 0.00 ]
+Key: G_VECTOR_COMPRESS: [ 0.00 0.00 ]
+Key: G_VSCALE: [ 0.00 0.00 ]
+Key: G_WRITE_REGISTER: [ 0.00 0.00 ]
+Key: G_XOR: [ 0.00 0.00 ]
+Key: G_ZEXT: [ 0.00 0.00 ]
+Key: G_ZEXTLOAD: [ 0.00 0.00 ]
+Key: HADDPDrm: [ 0.00 0.00 ]
+Key: HADDPDrr: [ 0.00 0.00 ]
+Key: HADDPSrm: [ 0.00 0.00 ]
+Key: HADDPSrr: [ 0.00 0.00 ]
+Key: HLT: [ 0.00 0.00 ]
+Key: HRESET: [ 0.00 0.00 ]
+Key: HSUBPDrm: [ 0.00 0.00 ]
+Key: HSUBPDrr: [ 0.00 0.00 ]
+Key: HSUBPSrm: [ 0.00 0.00 ]
+Key: HSUBPSrr: [ 0.00 0.00 ]
+Key: ICALL_BRANCH_FUNNEL: [ 0.00 0.00 ]
+Key: IDIV: [ 0.00 0.00 ]
+Key: ILD_F: [ 0.00 0.00 ]
+Key: ILD_Fp: [ 0.00 0.00 ]
+Key: IMPLICIT_DEF: [ 0.00 0.00 ]
+Key: IMUL: [ 0.00 0.00 ]
+Key: IMULZU: [ 0.00 0.00 ]
+Key: IN: [ 0.00 0.00 ]
+Key: INC: [ 0.00 0.00 ]
+Key: INCSSPD: [ 0.00 0.00 ]
+Key: INCSSPQ: [ 0.00 0.00 ]
+Key: INDIRECT_THUNK_CALL: [ 0.00 0.00 ]
+Key: INDIRECT_THUNK_TCRETURN: [ 0.00 0.00 ]
+Key: INIT_UNDEF: [ 0.00 0.00 ]
+Key: INLINEASM: [ 0.00 0.00 ]
+Key: INLINEASM_BR: [ 0.00 0.00 ]
+Key: INSB: [ 0.00 0.00 ]
+Key: INSERTPSrmi: [ 0.00 0.00 ]
+Key: INSERTPSrri: [ 0.00 0.00 ]
+Key: INSERTQ: [ 0.00 0.00 ]
+Key: INSERTQI: [ 0.00 0.00 ]
+Key: INSERT_SUBREG: [ 0.00 0.00 ]
+Key: INSL: [ 0.00 0.00 ]
+Key: INSW: [ 0.00 0.00 ]
+Key: INT: [ 0.00 0.00 ]
+Key: INTO: [ 0.00 0.00 ]
+Key: INVD: [ 0.00 0.00 ]
+Key: INVEPT: [ 0.00 0.00 ]
+Key: INVLPG: [ 0.00 0.00 ]
+Key: INVLPGA: [ 0.00 0.00 ]
+Key: INVLPGB: [ 0.00 0.00 ]
+Key: INVPCID: [ 0.00 0.00 ]
+Key: INVVPID: [ 0.00 0.00 ]
+Key: IRET: [ 0.00 0.00 ]
+Key: ISTT_FP: [ 0.00 0.00 ]
+Key: ISTT_Fp: [ 0.00 0.00 ]
+Key: IST_F: [ 0.00 0.00 ]
+Key: IST_FP: [ 0.00 0.00 ]
+Key: IST_Fp: [ 0.00 0.00 ]
+Key: Int_eh_sjlj_setup_dispatch: [ 0.00 0.00 ]
+Key: JCC: [ 0.00 0.00 ]
+Key: JCXZ: [ 0.00 0.00 ]
+Key: JECXZ: [ 0.00 0.00 ]
+Key: JMP: [ 0.00 0.00 ]
+Key: JMPABS: [ 0.00 0.00 ]
+Key: JRCXZ: [ 0.00 0.00 ]
+Key: JUMP_TABLE_DEBUG_INFO: [ 0.00 0.00 ]
+Key: KADDBkk: [ 0.00 0.00 ]
+Key: KADDDkk: [ 0.00 0.00 ]
+Key: KADDQkk: [ 0.00 0.00 ]
+Key: KADDWkk: [ 0.00 0.00 ]
+Key: KANDBkk: [ 0.00 0.00 ]
+Key: KANDDkk: [ 0.00 0.00 ]
+Key: KANDNBkk: [ 0.00 0.00 ]
+Key: KANDNDkk: [ 0.00 0.00 ]
+Key: KANDNQkk: [ 0.00 0.00 ]
+Key: KANDNWkk: [ 0.00 0.00 ]
+Key: KANDQkk: [ 0.00 0.00 ]
+Key: KANDWkk: [ 0.00 0.00 ]
+Key: KCFI_CHECK: [ 0.00 0.00 ]
+Key: KILL: [ 0.00 0.00 ]
+Key: KMOVBkk: [ 0.00 0.00 ]
+Key: KMOVBkk_EVEX: [ 0.00 0.00 ]
+Key: KMOVBkm: [ 0.00 0.00 ]
+Key: KMOVBkm_EVEX: [ 0.00 0.00 ]
+Key: KMOVBkr: [ 0.00 0.00 ]
+Key: KMOVBkr_EVEX: [ 0.00 0.00 ]
+Key: KMOVBmk: [ 0.00 0.00 ]
+Key: KMOVBmk_EVEX: [ 0.00 0.00 ]
+Key: KMOVBrk: [ 0.00 0.00 ]
+Key: KMOVBrk_EVEX: [ 0.00 0.00 ]
+Key: KMOVDkk: [ 0.00 0.00 ]
+Key: KMOVDkk_EVEX: [ 0.00 0.00 ]
+Key: KMOVDkm: [ 0.00 0.00 ]
+Key: KMOVDkm_EVEX: [ 0.00 0.00 ]
+Key: KMOVDkr: [ 0.00 0.00 ]
+Key: KMOVDkr_EVEX: [ 0.00 0.00 ]
+Key: KMOVDmk: [ 0.00 0.00 ]
+Key: KMOVDmk_EVEX: [ 0.00 0.00 ]
+Key: KMOVDrk: [ 0.00 0.00 ]
+Key: KMOVDrk_EVEX: [ 0.00 0.00 ]
+Key: KMOVQkk: [ 0.00 0.00 ]
+Key: KMOVQkk_EVEX: [ 0.00 0.00 ]
+Key: KMOVQkm: [ 0.00 0.00 ]
+Key: KMOVQkm_EVEX: [ 0.00 0.00 ]
+Key: KMOVQkr: [ 0.00 0.00 ]
+Key: KMOVQkr_EVEX: [ 0.00 0.00 ]
+Key: KMOVQmk: [ 0.00 0.00 ]
+Key: KMOVQmk_EVEX: [ 0.00 0.00 ]
+Key: KMOVQrk: [ 0.00 0.00 ]
+Key: KMOVQrk_EVEX: [ 0.00 0.00 ]
+Key: KMOVWkk: [ 0.00 0.00 ]
+Key: KMOVWkk_EVEX: [ 0.00 0.00 ]
+Key: KMOVWkm: [ 0.00 0.00 ]
+Key: KMOVWkm_EVEX: [ 0.00 0.00 ]
+Key: KMOVWkr: [ 0.00 0.00 ]
+Key: KMOVWkr_EVEX: [ 0.00 0.00 ]
+Key: KMOVWmk: [ 0.00 0.00 ]
+Key: KMOVWmk_EVEX: [ 0.00 0.00 ]
+Key: KMOVWrk: [ 0.00 0.00 ]
+Key: KMOVWrk_EVEX: [ 0.00 0.00 ]
+Key: KNOTBkk: [ 0.00 0.00 ]
+Key: KNOTDkk: [ 0.00 0.00 ]
+Key: KNOTQkk: [ 0.00 0.00 ]
+Key: KNOTWkk: [ 0.00 0.00 ]
+Key: KORBkk: [ 0.00 0.00 ]
+Key: KORDkk: [ 0.00 0.00 ]
+Key: KORQkk: [ 0.00 0.00 ]
+Key: KORTESTBkk: [ 0.00 0.00 ]
+Key: KORTESTDkk: [ 0.00 0.00 ]
+Key: KORTESTQkk: [ 0.00 0.00 ]
+Key: KORTESTWkk: [ 0.00 0.00 ]
+Key: KORWkk: [ 0.00 0.00 ]
+Key: KSET: [ 0.00 0.00 ]
+Key: KSHIFTLBki: [ 0.00 0.00 ]
+Key: KSHIFTLDki: [ 0.00 0.00 ]
+Key: KSHIFTLQki: [ 0.00 0.00 ]
+Key: KSHIFTLWki: [ 0.00 0.00 ]
+Key: KSHIFTRBki: [ 0.00 0.00 ]
+Key: KSHIFTRDki: [ 0.00 0.00 ]
+Key: KSHIFTRQki: [ 0.00 0.00 ]
+Key: KSHIFTRWki: [ 0.00 0.00 ]
+Key: KTESTBkk: [ 0.00 0.00 ]
+Key: KTESTDkk: [ 0.00 0.00 ]
+Key: KTESTQkk: [ 0.00 0.00 ]
+Key: KTESTWkk: [ 0.00 0.00 ]
+Key: KUNPCKBWkk: [ 0.00 0.00 ]
+Key: KUNPCKDQkk: [ 0.00 0.00 ]
+Key: KUNPCKWDkk: [ 0.00 0.00 ]
+Key: KXNORBkk: [ 0.00 0.00 ]
+Key: KXNORDkk: [ 0.00 0.00 ]
+Key: KXNORQkk: [ 0.00 0.00 ]
+Key: KXNORWkk: [ 0.00 0.00 ]
+Key: KXORBkk: [ 0.00 0.00 ]
+Key: KXORDkk: [ 0.00 0.00 ]
+Key: KXORQkk: [ 0.00 0.00 ]
+Key: KXORWkk: [ 0.00 0.00 ]
+Key: LAHF: [ 0.00 0.00 ]
+Key: LAR: [ 0.00 0.00 ]
+Key: LCMPXCHG: [ 0.00 0.00 ]
+Key: LDDQUrm: [ 0.00 0.00 ]
+Key: LDMXCSR: [ 0.00 0.00 ]
+Key: LDS: [ 0.00 0.00 ]
+Key: LDTILECFG: [ 0.00 0.00 ]
+Key: LDTILECFG_EVEX: [ 0.00 0.00 ]
+Key: LD_F: [ 0.00 0.00 ]
+Key: LD_Fp: [ 0.00 0.00 ]
+Key: LD_Frr: [ 0.00 0.00 ]
+Key: LEA: [ 0.00 0.00 ]
+Key: LEAVE: [ 0.00 0.00 ]
+Key: LES: [ 0.00 0.00 ]
+Key: LFENCE: [ 0.00 0.00 ]
+Key: LFS: [ 0.00 0.00 ]
+Key: LGDT: [ 0.00 0.00 ]
+Key: LGS: [ 0.00 0.00 ]
+Key: LIDT: [ 0.00 0.00 ]
+Key: LIFETIME_END: [ 0.00 0.00 ]
+Key: LIFETIME_START: [ 0.00 0.00 ]
+Key: LKGS: [ 0.00 0.00 ]
+Key: LLDT: [ 0.00 0.00 ]
+Key: LLWPCB: [ 0.00 0.00 ]
+Key: LMSW: [ 0.00 0.00 ]
+Key: LOADIWKEY: [ 0.00 0.00 ]
+Key: LOAD_STACK_GUARD: [ 0.00 0.00 ]
+Key: LOCAL_ESCAPE: [ 0.00 0.00 ]
+Key: LOCK_ADD: [ 0.00 0.00 ]
+Key: LOCK_AND: [ 0.00 0.00 ]
+Key: LOCK_BTC: [ 0.00 0.00 ]
+Key: LOCK_BTC_RM: [ 0.00 0.00 ]
+Key: LOCK_BTR: [ 0.00 0.00 ]
+Key: LOCK_BTR_RM: [ 0.00 0.00 ]
+Key: LOCK_BTS: [ 0.00 0.00 ]
+Key: LOCK_BTS_RM: [ 0.00 0.00 ]
+Key: LOCK_DEC: [ 0.00 0.00 ]
+Key: LOCK_INC: [ 0.00 0.00 ]
+Key: LOCK_OR: [ 0.00 0.00 ]
+Key: LOCK_PREFIX: [ 0.00 0.00 ]
+Key: LOCK_SUB: [ 0.00 0.00 ]
+Key: LOCK_XOR: [ 0.00 0.00 ]
+Key: LODSB: [ 0.00 0.00 ]
+Key: LODSL: [ 0.00 0.00 ]
+Key: LODSQ: [ 0.00 0.00 ]
+Key: LODSW: [ 0.00 0.00 ]
+Key: LOOP: [ 0.00 0.00 ]
+Key: LOOPE: [ 0.00 0.00 ]
+Key: LOOPNE: [ 0.00 0.00 ]
+Key: LRET: [ 0.00 0.00 ]
+Key: LRETI: [ 0.00 0.00 ]
+Key: LSL: [ 0.00 0.00 ]
+Key: LSS: [ 0.00 0.00 ]
+Key: LTRm: [ 0.00 0.00 ]
+Key: LTRr: [ 0.00 0.00 ]
+Key: LWPINS: [ 0.00 0.00 ]
+Key: LWPVAL: [ 0.00 0.00 ]
+Key: LXADD: [ 0.00 0.00 ]
+Key: LZCNT: [ 0.00 0.00 ]
+Key: MASKMOVDQU: [ 0.00 0.00 ]
+Key: MASKPAIR: [ 0.00 0.00 ]
+Key: MAXCPDrm: [ 0.00 0.00 ]
+Key: MAXCPDrr: [ 0.00 0.00 ]
+Key: MAXCPSrm: [ 0.00 0.00 ]
+Key: MAXCPSrr: [ 0.00 0.00 ]
+Key: MAXCSDrm: [ 0.00 0.00 ]
+Key: MAXCSDrr: [ 0.00 0.00 ]
+Key: MAXCSSrm: [ 0.00 0.00 ]
+Key: MAXCSSrr: [ 0.00 0.00 ]
+Key: MAXPDrm: [ 0.00 0.00 ]
+Key: MAXPDrr: [ 0.00 0.00 ]
+Key: MAXPSrm: [ 0.00 0.00 ]
+Key: MAXPSrr: [ 0.00 0.00 ]
+Key: MAXSDrm: [ 0.00 0.00 ]
+Key: MAXSDrm_Int: [ 0.00 0.00 ]
+Key: MAXSDrr: [ 0.00 0.00 ]
+Key: MAXSDrr_Int: [ 0.00 0.00 ]
+Key: MAXSSrm: [ 0.00 0.00 ]
+Key: MAXSSrm_Int: [ 0.00 0.00 ]
+Key: MAXSSrr: [ 0.00 0.00 ]
+Key: MAXSSrr_Int: [ 0.00 0.00 ]
+Key: MEMBARRIER: [ 0.00 0.00 ]
+Key: MFENCE: [ 0.00 0.00 ]
+Key: MINCPDrm: [ 0.00 0.00 ]
+Key: MINCPDrr: [ 0.00 0.00 ]
+Key: MINCPSrm: [ 0.00 0.00 ]
+Key: MINCPSrr: [ 0.00 0.00 ]
+Key: MINCSDrm: [ 0.00 0.00 ]
+Key: MINCSDrr: [ 0.00 0.00 ]
+Key: MINCSSrm: [ 0.00 0.00 ]
+Key: MINCSSrr: [ 0.00 0.00 ]
+Key: MINPDrm: [ 0.00 0.00 ]
+Key: MINPDrr: [ 0.00 0.00 ]
+Key: MINPSrm: [ 0.00 0.00 ]
+Key: MINPSrr: [ 0.00 0.00 ]
+Key: MINSDrm: [ 0.00 0.00 ]
+Key: MINSDrm_Int: [ 0.00 0.00 ]
+Key: MINSDrr: [ 0.00 0.00 ]
+Key: MINSDrr_Int: [ 0.00 0.00 ]
+Key: MINSSrm: [ 0.00 0.00 ]
+Key: MINSSrm_Int: [ 0.00 0.00 ]
+Key: MINSSrr: [ 0.00 0.00 ]
+Key: MINSSrr_Int: [ 0.00 0.00 ]
+Key: MMX_CVTPD: [ 0.00 0.00 ]
+Key: MMX_CVTPI: [ 0.00 0.00 ]
+Key: MMX_CVTPS: [ 0.00 0.00 ]
+Key: MMX_CVTTPD: [ 0.00 0.00 ]
+Key: MMX_CVTTPS: [ 0.00 0.00 ]
+Key: MMX_EMMS: [ 0.00 0.00 ]
+Key: MMX_MASKMOVQ: [ 0.00 0.00 ]
+Key: MMX_MOVD: [ 0.00 0.00 ]
+Key: MMX_MOVDQ: [ 0.00 0.00 ]
+Key: MMX_MOVFR: [ 0.00 0.00 ]
+Key: MMX_MOVNTQmr: [ 0.00 0.00 ]
+Key: MMX_MOVQ: [ 0.00 0.00 ]
+Key: MMX_PABSBrm: [ 0.00 0.00 ]
+Key: MMX_PABSBrr: [ 0.00 0.00 ]
+Key: MMX_PABSDrm: [ 0.00 0.00 ]
+Key: MMX_PABSDrr: [ 0.00 0.00 ]
+Key: MMX_PABSWrm: [ 0.00 0.00 ]
+Key: MMX_PABSWrr: [ 0.00 0.00 ]
+Key: MMX_PACKSSDWrm: [ 0.00 0.00 ]
+Key: MMX_PACKSSDWrr: [ 0.00 0.00 ]
+Key: MMX_PACKSSWBrm: [ 0.00 0.00 ]
+Key: MMX_PACKSSWBrr: [ 0.00 0.00 ]
+Key: MMX_PACKUSWBrm: [ 0.00 0.00 ]
+Key: MMX_PACKUSWBrr: [ 0.00 0.00 ]
+Key: MMX_PADDBrm: [ 0.00 0.00 ]
+Key: MMX_PADDBrr: [ 0.00 0.00 ]
+Key: MMX_PADDDrm: [ 0.00 0.00 ]
+Key: MMX_PADDDrr: [ 0.00 0.00 ]
+Key: MMX_PADDQrm: [ 0.00 0.00 ]
+Key: MMX_PADDQrr: [ 0.00 0.00 ]
+Key: MMX_PADDSBrm: [ 0.00 0.00 ]
+Key: MMX_PADDSBrr: [ 0.00 0.00 ]
+Key: MMX_PADDSWrm: [ 0.00 0.00 ]
+Key: MMX_PADDSWrr: [ 0.00 0.00 ]
+Key: MMX_PADDUSBrm: [ 0.00 0.00 ]
+Key: MMX_PADDUSBrr: [ 0.00 0.00 ]
+Key: MMX_PADDUSWrm: [ 0.00 0.00 ]
+Key: MMX_PADDUSWrr: [ 0.00 0.00 ]
+Key: MMX_PADDWrm: [ 0.00 0.00 ]
+Key: MMX_PADDWrr: [ 0.00 0.00 ]
+Key: MMX_PALIGNRrmi: [ 0.00 0.00 ]
+Key: MMX_PALIGNRrri: [ 0.00 0.00 ]
+Key: MMX_PANDNrm: [ 0.00 0.00 ]
+Key: MMX_PANDNrr: [ 0.00 0.00 ]
+Key: MMX_PANDrm: [ 0.00 0.00 ]
+Key: MMX_PANDrr: [ 0.00 0.00 ]
+Key: MMX_PAVGBrm: [ 0.00 0.00 ]
+Key: MMX_PAVGBrr: [ 0.00 0.00 ]
+Key: MMX_PAVGWrm: [ 0.00 0.00 ]
+Key: MMX_PAVGWrr: [ 0.00 0.00 ]
+Key: MMX_PCMPEQBrm: [ 0.00 0.00 ]
+Key: MMX_PCMPEQBrr: [ 0.00 0.00 ]
+Key: MMX_PCMPEQDrm: [ 0.00 0.00 ]
+Key: MMX_PCMPEQDrr: [ 0.00 0.00 ]
+Key: MMX_PCMPEQWrm: [ 0.00 0.00 ]
+Key: MMX_PCMPEQWrr: [ 0.00 0.00 ]
+Key: MMX_PCMPGTBrm: [ 0.00 0.00 ]
+Key: MMX_PCMPGTBrr: [ 0.00 0.00 ]
+Key: MMX_PCMPGTDrm: [ 0.00 0.00 ]
+Key: MMX_PCMPGTDrr: [ 0.00 0.00 ]
+Key: MMX_PCMPGTWrm: [ 0.00 0.00 ]
+Key: MMX_PCMPGTWrr: [ 0.00 0.00 ]
+Key: MMX_PEXTRWrri: [ 0.00 0.00 ]
+Key: MMX_PHADDDrm: [ 0.00 0.00 ]
+Key: MMX_PHADDDrr: [ 0.00 0.00 ]
+Key: MMX_PHADDSWrm: [ 0.00 0.00 ]
+Key: MMX_PHADDSWrr: [ 0.00 0.00 ]
+Key: MMX_PHADDWrm: [ 0.00 0.00 ]
+Key: MMX_PHADDWrr: [ 0.00 0.00 ]
+Key: MMX_PHSUBDrm: [ 0.00 0.00 ]
+Key: MMX_PHSUBDrr: [ 0.00 0.00 ]
+Key: MMX_PHSUBSWrm: [ 0.00 0.00 ]
+Key: MMX_PHSUBSWrr: [ 0.00 0.00 ]
+Key: MMX_PHSUBWrm: [ 0.00 0.00 ]
+Key: MMX_PHSUBWrr: [ 0.00 0.00 ]
+Key: MMX_PINSRWrmi: [ 0.00 0.00 ]
+Key: MMX_PINSRWrri: [ 0.00 0.00 ]
+Key: MMX_PMADDUBSWrm: [ 0.00 0.00 ]
+Key: MMX_PMADDUBSWrr: [ 0.00 0.00 ]
+Key: MMX_PMADDWDrm: [ 0.00 0.00 ]
+Key: MMX_PMADDWDrr: [ 0.00 0.00 ]
+Key: MMX_PMAXSWrm: [ 0.00 0.00 ]
+Key: MMX_PMAXSWrr: [ 0.00 0.00 ]
+Key: MMX_PMAXUBrm: [ 0.00 0.00 ]
+Key: MMX_PMAXUBrr: [ 0.00 0.00 ]
+Key: MMX_PMINSWrm: [ 0.00 0.00 ]
+Key: MMX_PMINSWrr: [ 0.00 0.00 ]
+Key: MMX_PMINUBrm: [ 0.00 0.00 ]
+Key: MMX_PMINUBrr: [ 0.00 0.00 ]
+Key: MMX_PMOVMSKBrr: [ 0.00 0.00 ]
+Key: MMX_PMULHRSWrm: [ 0.00 0.00 ]
+Key: MMX_PMULHRSWrr: [ 0.00 0.00 ]
+Key: MMX_PMULHUWrm: [ 0.00 0.00 ]
+Key: MMX_PMULHUWrr: [ 0.00 0.00 ]
+Key: MMX_PMULHWrm: [ 0.00 0.00 ]
+Key: MMX_PMULHWrr: [ 0.00 0.00 ]
+Key: MMX_PMULLWrm: [ 0.00 0.00 ]
+Key: MMX_PMULLWrr: [ 0.00 0.00 ]
+Key: MMX_PMULUDQrm: [ 0.00 0.00 ]
+Key: MMX_PMULUDQrr: [ 0.00 0.00 ]
+Key: MMX_PORrm: [ 0.00 0.00 ]
+Key: MMX_PORrr: [ 0.00 0.00 ]
+Key: MMX_PSADBWrm: [ 0.00 0.00 ]
+Key: MMX_PSADBWrr: [ 0.00 0.00 ]
+Key: MMX_PSHUFBrm: [ 0.00 0.00 ]
+Key: MMX_PSHUFBrr: [ 0.00 0.00 ]
+Key: MMX_PSHUFWmi: [ 0.00 0.00 ]
+Key: MMX_PSHUFWri: [ 0.00 0.00 ]
+Key: MMX_PSIGNBrm: [ 0.00 0.00 ]
+Key: MMX_PSIGNBrr: [ 0.00 0.00 ]
+Key: MMX_PSIGNDrm: [ 0.00 0.00 ]
+Key: MMX_PSIGNDrr: [ 0.00 0.00 ]
+Key: MMX_PSIGNWrm: [ 0.00 0.00 ]
+Key: MMX_PSIGNWrr: [ 0.00 0.00 ]
+Key: MMX_PSLLDri: [ 0.00 0.00 ]
+Key: MMX_PSLLDrm: [ 0.00 0.00 ]
+Key: MMX_PSLLDrr: [ 0.00 0.00 ]
+Key: MMX_PSLLQri: [ 0.00 0.00 ]
+Key: MMX_PSLLQrm: [ 0.00 0.00 ]
+Key: MMX_PSLLQrr: [ 0.00 0.00 ]
+Key: MMX_PSLLWri: [ 0.00 0.00 ]
+Key: MMX_PSLLWrm: [ 0.00 0.00 ]
+Key: MMX_PSLLWrr: [ 0.00 0.00 ]
+Key: MMX_PSRADri: [ 0.00 0.00 ]
+Key: MMX_PSRADrm: [ 0.00 0.00 ]
+Key: MMX_PSRADrr: [ 0.00 0.00 ]
+Key: MMX_PSRAWri: [ 0.00 0.00 ]
+Key: MMX_PSRAWrm: [ 0.00 0.00 ]
+Key: MMX_PSRAWrr: [ 0.00 0.00 ]
+Key: MMX_PSRLDri: [ 0.00 0.00 ]
+Key: MMX_PSRLDrm: [ 0.00 0.00 ]
+Key: MMX_PSRLDrr: [ 0.00 0.00 ]
+Key: MMX_PSRLQri: [ 0.00 0.00 ]
+Key: MMX_PSRLQrm: [ 0.00 0.00 ]
+Key: MMX_PSRLQrr: [ 0.00 0.00 ]
+Key: MMX_PSRLWri: [ 0.00 0.00 ]
+Key: MMX_PSRLWrm: [ 0.00 0.00 ]
+Key: MMX_PSRLWrr: [ 0.00 0.00 ]
+Key: MMX_PSUBBrm: [ 0.00 0.00 ]
+Key: MMX_PSUBBrr: [ 0.00 0.00 ]
+Key: MMX_PSUBDrm: [ 0.00 0.00 ]
+Key: MMX_PSUBDrr: [ 0.00 0.00 ]
+Key: MMX_PSUBQrm: [ 0.00 0.00 ]
+Key: MMX_PSUBQrr: [ 0.00 0.00 ]
+Key: MMX_PSUBSBrm: [ 0.00 0.00 ]
+Key: MMX_PSUBSBrr: [ 0.00 0.00 ]
+Key: MMX_PSUBSWrm: [ 0.00 0.00 ]
+Key: MMX_PSUBSWrr: [ 0.00 0.00 ]
+Key: MMX_PSUBUSBrm: [ 0.00 0.00 ]
+Key: MMX_PSUBUSBrr: [ 0.00 0.00 ]
+Key: MMX_PSUBUSWrm: [ 0.00 0.00 ]
+Key: MMX_PSUBUSWrr: [ 0.00 0.00 ]
+Key: MMX_PSUBWrm: [ 0.00 0.00 ]
+Key: MMX_PSUBWrr: [ 0.00 0.00 ]
+Key: MMX_PUNPCKHBWrm: [ 0.00 0.00 ]
+Key: MMX_PUNPCKHBWrr: [ 0.00 0.00 ]
+Key: MMX_PUNPCKHDQrm: [ 0.00 0.00 ]
+Key: MMX_PUNPCKHDQrr: [ 0.00 0.00 ]
+Key: MMX_PUNPCKHWDrm: [ 0.00 0.00 ]
+Key: MMX_PUNPCKHWDrr: [ 0.00 0.00 ]
+Key: MMX_PUNPCKLBWrm: [ 0.00 0.00 ]
+Key: MMX_PUNPCKLBWrr: [ 0.00 0.00 ]
+Key: MMX_PUNPCKLDQrm: [ 0.00 0.00 ]
+Key: MMX_PUNPCKLDQrr: [ 0.00 0.00 ]
+Key: MMX_PUNPCKLWDrm: [ 0.00 0.00 ]
+Key: MMX_PUNPCKLWDrr: [ 0.00 0.00 ]
+Key: MMX_PXORrm: [ 0.00 0.00 ]
+Key: MMX_PXORrr: [ 0.00 0.00 ]
+Key: MMX_SET: [ 0.00 0.00 ]
+Key: MONITOR: [ 0.00 0.00 ]
+Key: MONITORX: [ 0.00 0.00 ]
+Key: MONTMUL: [ 0.00 0.00 ]
+Key: MORESTACK_RET: [ 0.00 0.00 ]
+Key: MORESTACK_RET_RESTORE_R: [ 0.00 0.00 ]
+Key: MOV: [ 0.00 0.00 ]
+Key: MOVAPDmr: [ 0.00 0.00 ]
+Key: MOVAPDrm: [ 0.00 0.00 ]
+Key: MOVAPDrr: [ 0.00 0.00 ]
+Key: MOVAPDrr_REV: [ 0.00 0.00 ]
+Key: MOVAPSmr: [ 0.00 0.00 ]
+Key: MOVAPSrm: [ 0.00 0.00 ]
+Key: MOVAPSrr: [ 0.00 0.00 ]
+Key: MOVAPSrr_REV: [ 0.00 0.00 ]
+Key: MOVBE: [ 0.00 0.00 ]
+Key: MOVDDUPrm: [ 0.00 0.00 ]
+Key: MOVDDUPrr: [ 0.00 0.00 ]
+Key: MOVDI: [ 0.00 0.00 ]
+Key: MOVDIR: [ 0.00 0.00 ]
+Key: MOVDIRI: [ 0.00 0.00 ]
+Key: MOVDQAmr: [ 0.00 0.00 ]
+Key: MOVDQArm: [ 0.00 0.00 ]
+Key: MOVDQArr: [ 0.00 0.00 ]
+Key: MOVDQArr_REV: [ 0.00 0.00 ]
+Key: MOVDQUmr: [ 0.00 0.00 ]
+Key: MOVDQUrm: [ 0.00 0.00 ]
+Key: MOVDQUrr: [ 0.00 0.00 ]
+Key: MOVDQUrr_REV: [ 0.00 0.00 ]
+Key: MOVHLPSrr: [ 0.00 0.00 ]
+Key: MOVHPDmr: [ 0.00 0.00 ]
+Key: MOVHPDrm: [ 0.00 0.00 ]
+Key: MOVHPSmr: [ 0.00 0.00 ]
+Key: MOVHPSrm: [ 0.00 0.00 ]
+Key: MOVLHPSrr: [ 0.00 0.00 ]
+Key: MOVLPDmr: [ 0.00 0.00 ]
+Key: MOVLPDrm: [ 0.00 0.00 ]
+Key: MOVLPSmr: [ 0.00 0.00 ]
+Key: MOVLPSrm: [ 0.00 0.00 ]
+Key: MOVMSKPDrr: [ 0.00 0.00 ]
+Key: MOVMSKPSrr: [ 0.00 0.00 ]
+Key: MOVNTDQArm: [ 0.00 0.00 ]
+Key: MOVNTDQmr: [ 0.00 0.00 ]
+Key: MOVNTI: [ 0.00 0.00 ]
+Key: MOVNTImr: [ 0.00 0.00 ]
+Key: MOVNTPDmr: [ 0.00 0.00 ]
+Key: MOVNTPSmr: [ 0.00 0.00 ]
+Key: MOVNTSD: [ 0.00 0.00 ]
+Key: MOVNTSS: [ 0.00 0.00 ]
+Key: MOVPC: [ 0.00 0.00 ]
+Key: MOVPDI: [ 0.00 0.00 ]
+Key: MOVPQI: [ 0.00 0.00 ]
+Key: MOVPQIto: [ 0.00 0.00 ]
+Key: MOVQI: [ 0.00 0.00 ]
+Key: MOVRS: [ 0.00 0.00 ]
+Key: MOVSB: [ 0.00 0.00 ]
+Key: MOVSDmr: [ 0.00 0.00 ]
+Key: MOVSDrm: [ 0.00 0.00 ]
+Key: MOVSDrm_alt: [ 0.00 0.00 ]
+Key: MOVSDrr: [ 0.00 0.00 ]
+Key: MOVSDrr_REV: [ 0.00 0.00 ]
+Key: MOVSDto: [ 0.00 0.00 ]
+Key: MOVSHDUPrm: [ 0.00 0.00 ]
+Key: MOVSHDUPrr: [ 0.00 0.00 ]
+Key: MOVSHPmr: [ 0.00 0.00 ]
+Key: MOVSHPrm: [ 0.00 0.00 ]
+Key: MOVSL: [ 0.00 0.00 ]
+Key: MOVSLDUPrm: [ 0.00 0.00 ]
+Key: MOVSLDUPrr: [ 0.00 0.00 ]
+Key: MOVSQ: [ 0.00 0.00 ]
+Key: MOVSS: [ 0.00 0.00 ]
+Key: MOVSSmr: [ 0.00 0.00 ]
+Key: MOVSSrm: [ 0.00 0.00 ]
+Key: MOVSSrm_alt: [ 0.00 0.00 ]
+Key: MOVSSrr: [ 0.00 0.00 ]
+Key: MOVSSrr_REV: [ 0.00 0.00 ]
+Key: MOVSW: [ 0.00 0.00 ]
+Key: MOVSX: [ 0.00 0.00 ]
+Key: MOVUPDmr: [ 0.00 0.00 ]
+Key: MOVUPDrm: [ 0.00 0.00 ]
+Key: MOVUPDrr: [ 0.00 0.00 ]
+Key: MOVUPDrr_REV: [ 0.00 0.00 ]
+Key: MOVUPSmr: [ 0.00 0.00 ]
+Key: MOVUPSrm: [ 0.00 0.00 ]
+Key: MOVUPSrr: [ 0.00 0.00 ]
+Key: MOVUPSrr_REV: [ 0.00 0.00 ]
+Key: MOVZPQILo: [ 0.00 0.00 ]
+Key: MOVZX: [ 0.00 0.00 ]
+Key: MPSADBWrmi: [ 0.00 0.00 ]
+Key: MPSADBWrri: [ 0.00 0.00 ]
+Key: MUL: [ 0.00 0.00 ]
+Key: MULPDrm: [ 0.00 0.00 ]
+Key: MULPDrr: [ 0.00 0.00 ]
+Key: MULPSrm: [ 0.00 0.00 ]
+Key: MULPSrr: [ 0.00 0.00 ]
+Key: MULSDrm: [ 0.00 0.00 ]
+Key: MULSDrm_Int: [ 0.00 0.00 ]
+Key: MULSDrr: [ 0.00 0.00 ]
+Key: MULSDrr_Int: [ 0.00 0.00 ]
+Key: MULSSrm: [ 0.00 0.00 ]
+Key: MULSSrm_Int: [ 0.00 0.00 ]
+Key: MULSSrr: [ 0.00 0.00 ]
+Key: MULSSrr_Int: [ 0.00 0.00 ]
+Key: MULX: [ 0.00 0.00 ]
+Key: MUL_F: [ 0.00 0.00 ]
+Key: MUL_FI: [ 0.00 0.00 ]
+Key: MUL_FPrST: [ 0.00 0.00 ]
+Key: MUL_FST: [ 0.00 0.00 ]
+Key: MUL_Fp: [ 0.00 0.00 ]
+Key: MUL_FpI: [ 0.00 0.00 ]
+Key: MUL_FrST: [ 0.00 0.00 ]
+Key: MWAITX: [ 0.00 0.00 ]
+Key: MWAITX_SAVE_RBX: [ 0.00 0.00 ]
+Key: MWAITXrrr: [ 0.00 0.00 ]
+Key: MWAITrr: [ 0.00 0.00 ]
+Key: NEG: [ 0.00 0.00 ]
+Key: NOOP: [ 0.00 0.00 ]
+Key: NOOPL: [ 0.00 0.00 ]
+Key: NOOPLr: [ 0.00 0.00 ]
+Key: NOOPQ: [ 0.00 0.00 ]
+Key: NOOPQr: [ 0.00 0.00 ]
+Key: NOOPW: [ 0.00 0.00 ]
+Key: NOOPWr: [ 0.00 0.00 ]
+Key: NOT: [ 0.00 0.00 ]
+Key: OR: [ 0.00 0.00 ]
+Key: ORPDrm: [ 0.00 0.00 ]
+Key: ORPDrr: [ 0.00 0.00 ]
+Key: ORPSrm: [ 0.00 0.00 ]
+Key: ORPSrr: [ 0.00 0.00 ]
+Key: OUT: [ 0.00 0.00 ]
+Key: OUTSB: [ 0.00 0.00 ]
+Key: OUTSL: [ 0.00 0.00 ]
+Key: OUTSW: [ 0.00 0.00 ]
+Key: PABSBrm: [ 0.00 0.00 ]
+Key: PABSBrr: [ 0.00 0.00 ]
+Key: PABSDrm: [ 0.00 0.00 ]
+Key: PABSDrr: [ 0.00 0.00 ]
+Key: PABSWrm: [ 0.00 0.00 ]
+Key: PABSWrr: [ 0.00 0.00 ]
+Key: PACKSSDWrm: [ 0.00 0.00 ]
+Key: PACKSSDWrr: [ 0.00 0.00 ]
+Key: PACKSSWBrm: [ 0.00 0.00 ]
+Key: PACKSSWBrr: [ 0.00 0.00 ]
+Key: PACKUSDWrm: [ 0.00 0.00 ]
+Key: PACKUSDWrr: [ 0.00 0.00 ]
+Key: PACKUSWBrm: [ 0.00 0.00 ]
+Key: PACKUSWBrr: [ 0.00 0.00 ]
+Key: PADDBrm: [ 0.00 0.00 ]
+Key: PADDBrr: [ 0.00 0.00 ]
+Key: PADDDrm: [ 0.00 0.00 ]
+Key: PADDDrr: [ 0.00 0.00 ]
+Key: PADDQrm: [ 0.00 0.00 ]
+Key: PADDQrr: [ 0.00 0.00 ]
+Key: PADDSBrm: [ 0.00 0.00 ]
+Key: PADDSBrr: [ 0.00 0.00 ]
+Key: PADDSWrm: [ 0.00 0.00 ]
+Key: PADDSWrr: [ 0.00 0.00 ]
+Key: PADDUSBrm: [ 0.00 0.00 ]
+Key: PADDUSBrr: [ 0.00 0.00 ]
+Key: PADDUSWrm: [ 0.00 0.00 ]
+Key: PADDUSWrr: [ 0.00 0.00 ]
+Key: PADDWrm: [ 0.00 0.00 ]
+Key: PADDWrr: [ 0.00 0.00 ]
+Key: PALIGNRrmi: [ 0.00 0.00 ]
+Key: PALIGNRrri: [ 0.00 0.00 ]
+Key: PANDNrm: [ 0.00 0.00 ]
+Key: PANDNrr: [ 0.00 0.00 ]
+Key: PANDrm: [ 0.00 0.00 ]
+Key: PANDrr: [ 0.00 0.00 ]
+Key: PATCHABLE_EVENT_CALL: [ 0.00 0.00 ]
+Key: PATCHABLE_FUNCTION_ENTER: [ 0.00 0.00 ]
+Key: PATCHABLE_FUNCTION_EXIT: [ 0.00 0.00 ]
+Key: PATCHABLE_OP: [ 0.00 0.00 ]
+Key: PATCHABLE_RET: [ 0.00 0.00 ]
+Key: PATCHABLE_TAIL_CALL: [ 0.00 0.00 ]
+Key: PATCHABLE_TYPED_EVENT_CALL: [ 0.00 0.00 ]
+Key: PATCHPOINT: [ 0.00 0.00 ]
+Key: PAUSE: [ 0.00 0.00 ]
+Key: PAVGBrm: [ 0.00 0.00 ]
+Key: PAVGBrr: [ 0.00 0.00 ]
+Key: PAVGUSBrm: [ 0.00 0.00 ]
+Key: PAVGUSBrr: [ 0.00 0.00 ]
+Key: PAVGWrm: [ 0.00 0.00 ]
+Key: PAVGWrr: [ 0.00 0.00 ]
+Key: PBLENDVBrm: [ 0.00 0.00 ]
+Key: PBLENDVBrr: [ 0.00 0.00 ]
+Key: PBLENDWrmi: [ 0.00 0.00 ]
+Key: PBLENDWrri: [ 0.00 0.00 ]
+Key: PBNDKB: [ 0.00 0.00 ]
+Key: PCLMULQDQrmi: [ 0.00 0.00 ]
+Key: PCLMULQDQrri: [ 0.00 0.00 ]
+Key: PCMPEQBrm: [ 0.00 0.00 ]
+Key: PCMPEQBrr: [ 0.00 0.00 ]
+Key: PCMPEQDrm: [ 0.00 0.00 ]
+Key: PCMPEQDrr: [ 0.00 0.00 ]
+Key: PCMPEQQrm: [ 0.00 0.00 ]
+Key: PCMPEQQrr: [ 0.00 0.00 ]
+Key: PCMPEQWrm: [ 0.00 0.00 ]
+Key: PCMPEQWrr: [ 0.00 0.00 ]
+Key: PCMPESTRIrmi: [ 0.00 0.00 ]
+Key: PCMPESTRIrri: [ 0.00 0.00 ]
+Key: PCMPESTRMrmi: [ 0.00 0.00 ]
+Key: PCMPESTRMrri: [ 0.00 0.00 ]
+Key: PCMPGTBrm: [ 0.00 0.00 ]
+Key: PCMPGTBrr: [ 0.00 0.00 ]
+Key: PCMPGTDrm: [ 0.00 0.00 ]
+Key: PCMPGTDrr: [ 0.00 0.00 ]
+Key: PCMPGTQrm: [ 0.00 0.00 ]
+Key: PCMPGTQrr: [ 0.00 0.00 ]
+Key: PCMPGTWrm: [ 0.00 0.00 ]
+Key: PCMPGTWrr: [ 0.00 0.00 ]
+Key: PCMPISTRIrmi: [ 0.00 0.00 ]
+Key: PCMPISTRIrri: [ 0.00 0.00 ]
+Key: PCMPISTRMrmi: [ 0.00 0.00 ]
+Key: PCMPISTRMrri: [ 0.00 0.00 ]
+Key: PCONFIG: [ 0.00 0.00 ]
+Key: PDEP: [ 0.00 0.00 ]
+Key: PEXT: [ 0.00 0.00 ]
+Key: PEXTRBmri: [ 0.00 0.00 ]
+Key: PEXTRBrri: [ 0.00 0.00 ]
+Key: PEXTRDmri: [ 0.00 0.00 ]
+Key: PEXTRDrri: [ 0.00 0.00 ]
+Key: PEXTRQmri: [ 0.00 0.00 ]
+Key: PEXTRQrri: [ 0.00 0.00 ]
+Key: PEXTRWmri: [ 0.00 0.00 ]
+Key: PEXTRWrri: [ 0.00 0.00 ]
+Key: PEXTRWrri_REV: [ 0.00 0.00 ]
+Key: PF: [ 0.00 0.00 ]
+Key: PFACCrm: [ 0.00 0.00 ]
+Key: PFACCrr: [ 0.00 0.00 ]
+Key: PFADDrm: [ 0.00 0.00 ]
+Key: PFADDrr: [ 0.00 0.00 ]
+Key: PFCMPEQrm: [ 0.00 0.00 ]
+Key: PFCMPEQrr: [ 0.00 0.00 ]
+Key: PFCMPGErm: [ 0.00 0.00 ]
+Key: PFCMPGErr: [ 0.00 0.00 ]
+Key: PFCMPGTrm: [ 0.00 0.00 ]
+Key: PFCMPGTrr: [ 0.00 0.00 ]
+Key: PFMAXrm: [ 0.00 0.00 ]
+Key: PFMAXrr: [ 0.00 0.00 ]
+Key: PFMINrm: [ 0.00 0.00 ]
+Key: PFMINrr: [ 0.00 0.00 ]
+Key: PFMULrm: [ 0.00 0.00 ]
+Key: PFMULrr: [ 0.00 0.00 ]
+Key: PFNACCrm: [ 0.00 0.00 ]
+Key: PFNACCrr: [ 0.00 0.00 ]
+Key: PFPNACCrm: [ 0.00 0.00 ]
+Key: PFPNACCrr: [ 0.00 0.00 ]
+Key: PFRCPIT: [ 0.00 0.00 ]
+Key: PFRCPrm: [ 0.00 0.00 ]
+Key: PFRCPrr: [ 0.00 0.00 ]
+Key: PFRSQIT: [ 0.00 0.00 ]
+Key: PFRSQRTrm: [ 0.00 0.00 ]
+Key: PFRSQRTrr: [ 0.00 0.00 ]
+Key: PFSUBRrm: [ 0.00 0.00 ]
+Key: PFSUBRrr: [ 0.00 0.00 ]
+Key: PFSUBrm: [ 0.00 0.00 ]
+Key: PFSUBrr: [ 0.00 0.00 ]
+Key: PHADDDrm: [ 0.00 0.00 ]
+Key: PHADDDrr: [ 0.00 0.00 ]
+Key: PHADDSWrm: [ 0.00 0.00 ]
+Key: PHADDSWrr: [ 0.00 0.00 ]
+Key: PHADDWrm: [ 0.00 0.00 ]
+Key: PHADDWrr: [ 0.00 0.00 ]
+Key: PHI: [ 0.00 0.00 ]
+Key: PHMINPOSUWrm: [ 0.00 0.00 ]
+Key: PHMINPOSUWrr: [ 0.00 0.00 ]
+Key: PHSUBDrm: [ 0.00 0.00 ]
+Key: PHSUBDrr: [ 0.00 0.00 ]
+Key: PHSUBSWrm: [ 0.00 0.00 ]
+Key: PHSUBSWrr: [ 0.00 0.00 ]
+Key: PHSUBWrm: [ 0.00 0.00 ]
+Key: PHSUBWrr: [ 0.00 0.00 ]
+Key: PI: [ 0.00 0.00 ]
+Key: PINSRBrmi: [ 0.00 0.00 ]
+Key: PINSRBrri: [ 0.00 0.00 ]
+Key: PINSRDrmi: [ 0.00 0.00 ]
+Key: PINSRDrri: [ 0.00 0.00 ]
+Key: PINSRQrmi: [ 0.00 0.00 ]
+Key: PINSRQrri: [ 0.00 0.00 ]
+Key: PINSRWrmi: [ 0.00 0.00 ]
+Key: PINSRWrri: [ 0.00 0.00 ]
+Key: PLDTILECFGV: [ 0.00 0.00 ]
+Key: PLEA: [ 0.00 0.00 ]
+Key: PMADDUBSWrm: [ 0.00 0.00 ]
+Key: PMADDUBSWrr: [ 0.00 0.00 ]
+Key: PMADDWDrm: [ 0.00 0.00 ]
+Key: PMADDWDrr: [ 0.00 0.00 ]
+Key: PMAXSBrm: [ 0.00 0.00 ]
+Key: PMAXSBrr: [ 0.00 0.00 ]
+Key: PMAXSDrm: [ 0.00 0.00 ]
+Key: PMAXSDrr: [ 0.00 0.00 ]
+Key: PMAXSWrm: [ 0.00 0.00 ]
+Key: PMAXSWrr: [ 0.00 0.00 ]
+Key: PMAXUBrm: [ 0.00 0.00 ]
+Key: PMAXUBrr: [ 0.00 0.00 ]
+Key: PMAXUDrm: [ 0.00 0.00 ]
+Key: PMAXUDrr: [ 0.00 0.00 ]
+Key: PMAXUWrm: [ 0.00 0.00 ]
+Key: PMAXUWrr: [ 0.00 0.00 ]
+Key: PMINSBrm: [ 0.00 0.00 ]
+Key: PMINSBrr: [ 0.00 0.00 ]
+Key: PMINSDrm: [ 0.00 0.00 ]
+Key: PMINSDrr: [ 0.00 0.00 ]
+Key: PMINSWrm: [ 0.00 0.00 ]
+Key: PMINSWrr: [ 0.00 0.00 ]
+Key: PMINUBrm: [ 0.00 0.00 ]
+Key: PMINUBrr: [ 0.00 0.00 ]
+Key: PMINUDrm: [ 0.00 0.00 ]
+Key: PMINUDrr: [ 0.00 0.00 ]
+Key: PMINUWrm: [ 0.00 0.00 ]
+Key: PMINUWrr: [ 0.00 0.00 ]
+Key: PMOVMSKBrr: [ 0.00 0.00 ]
+Key: PMOVSXBDrm: [ 0.00 0.00 ]
+Key: PMOVSXBDrr: [ 0.00 0.00 ]
+Key: PMOVSXBQrm: [ 0.00 0.00 ]
+Key: PMOVSXBQrr: [ 0.00 0.00 ]
+Key: PMOVSXBWrm: [ 0.00 0.00 ]
+Key: PMOVSXBWrr: [ 0.00 0.00 ]
+Key: PMOVSXDQrm: [ 0.00 0.00 ]
+Key: PMOVSXDQrr: [ 0.00 0.00 ]
+Key: PMOVSXWDrm: [ 0.00 0.00 ]
+Key: PMOVSXWDrr: [ 0.00 0.00 ]
+Key: PMOVSXWQrm: [ 0.00 0.00 ]
+Key: PMOVSXWQrr: [ 0.00 0.00 ]
+Key: PMOVZXBDrm: [ 0.00 0.00 ]
+Key: PMOVZXBDrr: [ 0.00 0.00 ]
+Key: PMOVZXBQrm: [ 0.00 0.00 ]
+Key: PMOVZXBQrr: [ 0.00 0.00 ]
+Key: PMOVZXBWrm: [ 0.00 0.00 ]
+Key: PMOVZXBWrr: [ 0.00 0.00 ]
+Key: PMOVZXDQrm: [ 0.00 0.00 ]
+Key: PMOVZXDQrr: [ 0.00 0.00 ]
+Key: PMOVZXWDrm: [ 0.00 0.00 ]
+Key: PMOVZXWDrr: [ 0.00 0.00 ]
+Key: PMOVZXWQrm: [ 0.00 0.00 ]
+Key: PMOVZXWQrr: [ 0.00 0.00 ]
+Key: PMULDQrm: [ 0.00 0.00 ]
+Key: PMULDQrr: [ 0.00 0.00 ]
+Key: PMULHRSWrm: [ 0.00 0.00 ]
+Key: PMULHRSWrr: [ 0.00 0.00 ]
+Key: PMULHRWrm: [ 0.00 0.00 ]
+Key: PMULHRWrr: [ 0.00 0.00 ]
+Key: PMULHUWrm: [ 0.00 0.00 ]
+Key: PMULHUWrr: [ 0.00 0.00 ]
+Key: PMULHWrm: [ 0.00 0.00 ]
+Key: PMULHWrr: [ 0.00 0.00 ]
+Key: PMULLDrm: [ 0.00 0.00 ]
+Key: PMULLDrr: [ 0.00 0.00 ]
+Key: PMULLWrm: [ 0.00 0.00 ]
+Key: PMULLWrr: [ 0.00 0.00 ]
+Key: PMULUDQrm: [ 0.00 0.00 ]
+Key: PMULUDQrr: [ 0.00 0.00 ]
+Key: POP: [ 0.00 0.00 ]
+Key: POPA: [ 0.00 0.00 ]
+Key: POPCNT: [ 0.00 0.00 ]
+Key: POPDS: [ 0.00 0.00 ]
+Key: POPES: [ 0.00 0.00 ]
+Key: POPF: [ 0.00 0.00 ]
+Key: POPFS: [ 0.00 0.00 ]
+Key: POPGS: [ 0.00 0.00 ]
+Key: POPP: [ 0.00 0.00 ]
+Key: POPSS: [ 0.00 0.00 ]
+Key: PORrm: [ 0.00 0.00 ]
+Key: PORrr: [ 0.00 0.00 ]
+Key: PREALLOCATED_ARG: [ 0.00 0.00 ]
+Key: PREALLOCATED_SETUP: [ 0.00 0.00 ]
+Key: PREFETCH: [ 0.00 0.00 ]
+Key: PREFETCHIT: [ 0.00 0.00 ]
+Key: PREFETCHNTA: [ 0.00 0.00 ]
+Key: PREFETCHRST: [ 0.00 0.00 ]
+Key: PREFETCHT: [ 0.00 0.00 ]
+Key: PREFETCHW: [ 0.00 0.00 ]
+Key: PREFETCHWT: [ 0.00 0.00 ]
+Key: PROBED_ALLOCA: [ 0.00 0.00 ]
+Key: PSADBWrm: [ 0.00 0.00 ]
+Key: PSADBWrr: [ 0.00 0.00 ]
+Key: PSEUDO_PROBE: [ 0.00 0.00 ]
+Key: PSHUFBrm: [ 0.00 0.00 ]
+Key: PSHUFBrr: [ 0.00 0.00 ]
+Key: PSHUFDmi: [ 0.00 0.00 ]
+Key: PSHUFDri: [ 0.00 0.00 ]
+Key: PSHUFHWmi: [ 0.00 0.00 ]
+Key: PSHUFHWri: [ 0.00 0.00 ]
+Key: PSHUFLWmi: [ 0.00 0.00 ]
+Key: PSHUFLWri: [ 0.00 0.00 ]
+Key: PSIGNBrm: [ 0.00 0.00 ]
+Key: PSIGNBrr: [ 0.00 0.00 ]
+Key: PSIGNDrm: [ 0.00 0.00 ]
+Key: PSIGNDrr: [ 0.00 0.00 ]
+Key: PSIGNWrm: [ 0.00 0.00 ]
+Key: PSIGNWrr: [ 0.00 0.00 ]
+Key: PSLLDQri: [ 0.00 0.00 ]
+Key: PSLLDri: [ 0.00 0.00 ]
+Key: PSLLDrm: [ 0.00 0.00 ]
+Key: PSLLDrr: [ 0.00 0.00 ]
+Key: PSLLQri: [ 0.00 0.00 ]
+Key: PSLLQrm: [ 0.00 0.00 ]
+Key: PSLLQrr: [ 0.00 0.00 ]
+Key: PSLLWri: [ 0.00 0.00 ]
+Key: PSLLWrm: [ 0.00 0.00 ]
+Key: PSLLWrr: [ 0.00 0.00 ]
+Key: PSMASH: [ 0.00 0.00 ]
+Key: PSRADri: [ 0.00 0.00 ]
+Key: PSRADrm: [ 0.00 0.00 ]
+Key: PSRADrr: [ 0.00 0.00 ]
+Key: PSRAWri: [ 0.00 0.00 ]
+Key: PSRAWrm: [ 0.00 0.00 ]
+Key: PSRAWrr: [ 0.00 0.00 ]
+Key: PSRLDQri: [ 0.00 0.00 ]
+Key: PSRLDri: [ 0.00 0.00 ]
+Key: PSRLDrm: [ 0.00 0.00 ]
+Key: PSRLDrr: [ 0.00 0.00 ]
+Key: PSRLQri: [ 0.00 0.00 ]
+Key: PSRLQrm: [ 0.00 0.00 ]
+Key: PSRLQrr: [ 0.00 0.00 ]
+Key: PSRLWri: [ 0.00 0.00 ]
+Key: PSRLWrm: [ 0.00 0.00 ]
+Key: PSRLWrr: [ 0.00 0.00 ]
+Key: PSUBBrm: [ 0.00 0.00 ]
+Key: PSUBBrr: [ 0.00 0.00 ]
+Key: PSUBDrm: [ 0.00 0.00 ]
+Key: PSUBDrr: [ 0.00 0.00 ]
+Key: PSUBQrm: [ 0.00 0.00 ]
+Key: PSUBQrr: [ 0.00 0.00 ]
+Key: PSUBSBrm: [ 0.00 0.00 ]
+Key: PSUBSBrr: [ 0.00 0.00 ]
+Key: PSUBSWrm: [ 0.00 0.00 ]
+Key: PSUBSWrr: [ 0.00 0.00 ]
+Key: PSUBUSBrm: [ 0.00 0.00 ]
+Key: PSUBUSBrr: [ 0.00 0.00 ]
+Key: PSUBUSWrm: [ 0.00 0.00 ]
+Key: PSUBUSWrr: [ 0.00 0.00 ]
+Key: PSUBWrm: [ 0.00 0.00 ]
+Key: PSUBWrr: [ 0.00 0.00 ]
+Key: PSWAPDrm: [ 0.00 0.00 ]
+Key: PSWAPDrr: [ 0.00 0.00 ]
+Key: PT: [ 0.00 0.00 ]
+Key: PTCMMIMFP: [ 0.00 0.00 ]
+Key: PTCMMRLFP: [ 0.00 0.00 ]
+Key: PTCONJTCMMIMFP: [ 0.00 0.00 ]
+Key: PTCONJTFP: [ 0.00 0.00 ]
+Key: PTCVTROWD: [ 0.00 0.00 ]
+Key: PTCVTROWPS: [ 0.00 0.00 ]
+Key: PTDPBF: [ 0.00 0.00 ]
+Key: PTDPBHF: [ 0.00 0.00 ]
+Key: PTDPBSSD: [ 0.00 0.00 ]
+Key: PTDPBSSDV: [ 0.00 0.00 ]
+Key: PTDPBSUD: [ 0.00 0.00 ]
+Key: PTDPBSUDV: [ 0.00 0.00 ]
+Key: PTDPBUSD: [ 0.00 0.00 ]
+Key: PTDPBUSDV: [ 0.00 0.00 ]
+Key: PTDPBUUD: [ 0.00 0.00 ]
+Key: PTDPBUUDV: [ 0.00 0.00 ]
+Key: PTDPFP: [ 0.00 0.00 ]
+Key: PTDPHBF: [ 0.00 0.00 ]
+Key: PTDPHF: [ 0.00 0.00 ]
+Key: PTESTrm: [ 0.00 0.00 ]
+Key: PTESTrr: [ 0.00 0.00 ]
+Key: PTILELOADD: [ 0.00 0.00 ]
+Key: PTILELOADDRS: [ 0.00 0.00 ]
+Key: PTILELOADDRST: [ 0.00 0.00 ]
+Key: PTILELOADDRSV: [ 0.00 0.00 ]
+Key: PTILELOADDT: [ 0.00 0.00 ]
+Key: PTILELOADDV: [ 0.00 0.00 ]
+Key: PTILEMOVROWrre: [ 0.00 0.00 ]
+Key: PTILEMOVROWrreV: [ 0.00 0.00 ]
+Key: PTILEMOVROWrri: [ 0.00 0.00 ]
+Key: PTILEMOVROWrriV: [ 0.00 0.00 ]
+Key: PTILEPAIRLOAD: [ 0.00 0.00 ]
+Key: PTILEPAIRSTORE: [ 0.00 0.00 ]
+Key: PTILESTORED: [ 0.00 0.00 ]
+Key: PTILESTOREDV: [ 0.00 0.00 ]
+Key: PTILEZERO: [ 0.00 0.00 ]
+Key: PTILEZEROV: [ 0.00 0.00 ]
+Key: PTMMULTF: [ 0.00 0.00 ]
+Key: PTTCMMIMFP: [ 0.00 0.00 ]
+Key: PTTCMMRLFP: [ 0.00 0.00 ]
+Key: PTTDPBF: [ 0.00 0.00 ]
+Key: PTTDPFP: [ 0.00 0.00 ]
+Key: PTTMMULTF: [ 0.00 0.00 ]
+Key: PTTRANSPOSED: [ 0.00 0.00 ]
+Key: PTTRANSPOSEDV: [ 0.00 0.00 ]
+Key: PTWRITE: [ 0.00 0.00 ]
+Key: PTWRITEm: [ 0.00 0.00 ]
+Key: PTWRITEr: [ 0.00 0.00 ]
+Key: PUNPCKHBWrm: [ 0.00 0.00 ]
+Key: PUNPCKHBWrr: [ 0.00 0.00 ]
+Key: PUNPCKHDQrm: [ 0.00 0.00 ]
+Key: PUNPCKHDQrr: [ 0.00 0.00 ]
+Key: PUNPCKHQDQrm: [ 0.00 0.00 ]
+Key: PUNPCKHQDQrr: [ 0.00 0.00 ]
+Key: PUNPCKHWDrm: [ 0.00 0.00 ]
+Key: PUNPCKHWDrr: [ 0.00 0.00 ]
+Key: PUNPCKLBWrm: [ 0.00 0.00 ]
+Key: PUNPCKLBWrr: [ 0.00 0.00 ]
+Key: PUNPCKLDQrm: [ 0.00 0.00 ]
+Key: PUNPCKLDQrr: [ 0.00 0.00 ]
+Key: PUNPCKLQDQrm: [ 0.00 0.00 ]
+Key: PUNPCKLQDQrr: [ 0.00 0.00 ]
+Key: PUNPCKLWDrm: [ 0.00 0.00 ]
+Key: PUNPCKLWDrr: [ 0.00 0.00 ]
+Key: PUSH: [ 0.00 0.00 ]
+Key: PUSHA: [ 0.00 0.00 ]
+Key: PUSHCS: [ 0.00 0.00 ]
+Key: PUSHDS: [ 0.00 0.00 ]
+Key: PUSHES: [ 0.00 0.00 ]
+Key: PUSHF: [ 0.00 0.00 ]
+Key: PUSHFS: [ 0.00 0.00 ]
+Key: PUSHGS: [ 0.00 0.00 ]
+Key: PUSHP: [ 0.00 0.00 ]
+Key: PUSHSS: [ 0.00 0.00 ]
+Key: PVALIDATE: [ 0.00 0.00 ]
+Key: PXORrm: [ 0.00 0.00 ]
+Key: PXORrr: [ 0.00 0.00 ]
+Key: RCL: [ 0.00 0.00 ]
+Key: RCPPSm: [ 0.00 0.00 ]
+Key: RCPPSr: [ 0.00 0.00 ]
+Key: RCPSSm: [ 0.00 0.00 ]
+Key: RCPSSm_Int: [ 0.00 0.00 ]
+Key: RCPSSr: [ 0.00 0.00 ]
+Key: RCPSSr_Int: [ 0.00 0.00 ]
+Key: RCR: [ 0.00 0.00 ]
+Key: RDFLAGS: [ 0.00 0.00 ]
+Key: RDFSBASE: [ 0.00 0.00 ]
+Key: RDGSBASE: [ 0.00 0.00 ]
+Key: RDMSR: [ 0.00 0.00 ]
+Key: RDMSRLIST: [ 0.00 0.00 ]
+Key: RDMSRri: [ 0.00 0.00 ]
+Key: RDMSRri_EVEX: [ 0.00 0.00 ]
+Key: RDPID: [ 0.00 0.00 ]
+Key: RDPKRUr: [ 0.00 0.00 ]
+Key: RDPMC: [ 0.00 0.00 ]
+Key: RDPRU: [ 0.00 0.00 ]
+Key: RDRAND: [ 0.00 0.00 ]
+Key: RDSEED: [ 0.00 0.00 ]
+Key: RDSSPD: [ 0.00 0.00 ]
+Key: RDSSPQ: [ 0.00 0.00 ]
+Key: RDTSC: [ 0.00 0.00 ]
+Key: RDTSCP: [ 0.00 0.00 ]
+Key: REG_SEQUENCE: [ 0.00 0.00 ]
+Key: REPNE_PREFIX: [ 0.00 0.00 ]
+Key: REP_MOVSB: [ 0.00 0.00 ]
+Key: REP_MOVSD: [ 0.00 0.00 ]
+Key: REP_MOVSQ: [ 0.00 0.00 ]
+Key: REP_MOVSW: [ 0.00 0.00 ]
+Key: REP_PREFIX: [ 0.00 0.00 ]
+Key: REP_STOSB: [ 0.00 0.00 ]
+Key: REP_STOSD: [ 0.00 0.00 ]
+Key: REP_STOSQ: [ 0.00 0.00 ]
+Key: REP_STOSW: [ 0.00 0.00 ]
+Key: RET: [ 0.00 0.00 ]
+Key: RETI: [ 0.00 0.00 ]
+Key: REX: [ 0.00 0.00 ]
+Key: RMPADJUST: [ 0.00 0.00 ]
+Key: RMPQUERY: [ 0.00 0.00 ]
+Key: RMPUPDATE: [ 0.00 0.00 ]
+Key: ROL: [ 0.00 0.00 ]
+Key: ROR: [ 0.00 0.00 ]
+Key: RORX: [ 0.00 0.00 ]
+Key: ROUNDPDmi: [ 0.00 0.00 ]
+Key: ROUNDPDri: [ 0.00 0.00 ]
+Key: ROUNDPSmi: [ 0.00 0.00 ]
+Key: ROUNDPSri: [ 0.00 0.00 ]
+Key: ROUNDSDmi: [ 0.00 0.00 ]
+Key: ROUNDSDmi_Int: [ 0.00 0.00 ]
+Key: ROUNDSDri: [ 0.00 0.00 ]
+Key: ROUNDSDri_Int: [ 0.00 0.00 ]
+Key: ROUNDSSmi: [ 0.00 0.00 ]
+Key: ROUNDSSmi_Int: [ 0.00 0.00 ]
+Key: ROUNDSSri: [ 0.00 0.00 ]
+Key: ROUNDSSri_Int: [ 0.00 0.00 ]
+Key: RSM: [ 0.00 0.00 ]
+Key: RSQRTPSm: [ 0.00 0.00 ]
+Key: RSQRTPSr: [ 0.00 0.00 ]
+Key: RSQRTSSm: [ 0.00 0.00 ]
+Key: RSQRTSSm_Int: [ 0.00 0.00 ]
+Key: RSQRTSSr: [ 0.00 0.00 ]
+Key: RSQRTSSr_Int: [ 0.00 0.00 ]
+Key: RSTORSSP: [ 0.00 0.00 ]
+Key: SAHF: [ 0.00 0.00 ]
+Key: SALC: [ 0.00 0.00 ]
+Key: SAR: [ 0.00 0.00 ]
+Key: SARX: [ 0.00 0.00 ]
+Key: SAVEPREVSSP: [ 0.00 0.00 ]
+Key: SBB: [ 0.00 0.00 ]
+Key: SCASB: [ 0.00 0.00 ]
+Key: SCASL: [ 0.00 0.00 ]
+Key: SCASQ: [ 0.00 0.00 ]
+Key: SCASW: [ 0.00 0.00 ]
+Key: SEAMCALL: [ 0.00 0.00 ]
+Key: SEAMOPS: [ 0.00 0.00 ]
+Key: SEAMRET: [ 0.00 0.00 ]
+Key: SEG_ALLOCA: [ 0.00 0.00 ]
+Key: SEH_BeginEpilogue: [ 0.00 0.00 ]
+Key: SEH_EndEpilogue: [ 0.00 0.00 ]
+Key: SEH_EndPrologue: [ 0.00 0.00 ]
+Key: SEH_PushFrame: [ 0.00 0.00 ]
+Key: SEH_PushReg: [ 0.00 0.00 ]
+Key: SEH_SaveReg: [ 0.00 0.00 ]
+Key: SEH_SaveXMM: [ 0.00 0.00 ]
+Key: SEH_SetFrame: [ 0.00 0.00 ]
+Key: SEH_StackAlign: [ 0.00 0.00 ]
+Key: SEH_StackAlloc: [ 0.00 0.00 ]
+Key: SEH_UnwindV: [ 0.00 0.00 ]
+Key: SEH_UnwindVersion: [ 0.00 0.00 ]
+Key: SENDUIPI: [ 0.00 0.00 ]
+Key: SERIALIZE: [ 0.00 0.00 ]
+Key: SETB_C: [ 0.00 0.00 ]
+Key: SETCCm: [ 0.00 0.00 ]
+Key: SETCCm_EVEX: [ 0.00 0.00 ]
+Key: SETCCr: [ 0.00 0.00 ]
+Key: SETCCr_EVEX: [ 0.00 0.00 ]
+Key: SETSSBSY: [ 0.00 0.00 ]
+Key: SETZUCCm: [ 0.00 0.00 ]
+Key: SETZUCCr: [ 0.00 0.00 ]
+Key: SFENCE: [ 0.00 0.00 ]
+Key: SGDT: [ 0.00 0.00 ]
+Key: SHA: [ 0.00 0.00 ]
+Key: SHL: [ 0.00 0.00 ]
+Key: SHLD: [ 0.00 0.00 ]
+Key: SHLDROT: [ 0.00 0.00 ]
+Key: SHLX: [ 0.00 0.00 ]
+Key: SHR: [ 0.00 0.00 ]
+Key: SHRD: [ 0.00 0.00 ]
+Key: SHRDROT: [ 0.00 0.00 ]
+Key: SHRX: [ 0.00 0.00 ]
+Key: SHUFPDrmi: [ 0.00 0.00 ]
+Key: SHUFPDrri: [ 0.00 0.00 ]
+Key: SHUFPSrmi: [ 0.00 0.00 ]
+Key: SHUFPSrri: [ 0.00 0.00 ]
+Key: SIDT: [ 0.00 0.00 ]
+Key: SKINIT: [ 0.00 0.00 ]
+Key: SLDT: [ 0.00 0.00 ]
+Key: SLWPCB: [ 0.00 0.00 ]
+Key: SMSW: [ 0.00 0.00 ]
+Key: SQRTPDm: [ 0.00 0.00 ]
+Key: SQRTPDr: [ 0.00 0.00 ]
+Key: SQRTPSm: [ 0.00 0.00 ]
+Key: SQRTPSr: [ 0.00 0.00 ]
+Key: SQRTSDm: [ 0.00 0.00 ]
+Key: SQRTSDm_Int: [ 0.00 0.00 ]
+Key: SQRTSDr: [ 0.00 0.00 ]
+Key: SQRTSDr_Int: [ 0.00 0.00 ]
+Key: SQRTSSm: [ 0.00 0.00 ]
+Key: SQRTSSm_Int: [ 0.00 0.00 ]
+Key: SQRTSSr: [ 0.00 0.00 ]
+Key: SQRTSSr_Int: [ 0.00 0.00 ]
+Key: SQRT_F: [ 0.00 0.00 ]
+Key: SQRT_Fp: [ 0.00 0.00 ]
+Key: SS_PREFIX: [ 0.00 0.00 ]
+Key: STAC: [ 0.00 0.00 ]
+Key: STACKALLOC_W_PROBING: [ 0.00 0.00 ]
+Key: STACKMAP: [ 0.00 0.00 ]
+Key: STATEPOINT: [ 0.00 0.00 ]
+Key: STC: [ 0.00 0.00 ]
+Key: STD: [ 0.00 0.00 ]
+Key: STGI: [ 0.00 0.00 ]
+Key: STI: [ 0.00 0.00 ]
+Key: STMXCSR: [ 0.00 0.00 ]
+Key: STOSB: [ 0.00 0.00 ]
+Key: STOSL: [ 0.00 0.00 ]
+Key: STOSQ: [ 0.00 0.00 ]
+Key: STOSW: [ 0.00 0.00 ]
+Key: STR: [ 0.00 0.00 ]
+Key: STRm: [ 0.00 0.00 ]
+Key: STTILECFG: [ 0.00 0.00 ]
+Key: STTILECFG_EVEX: [ 0.00 0.00 ]
+Key: STUI: [ 0.00 0.00 ]
+Key: ST_F: [ 0.00 0.00 ]
+Key: ST_FP: [ 0.00 0.00 ]
+Key: ST_FPrr: [ 0.00 0.00 ]
+Key: ST_Fp: [ 0.00 0.00 ]
+Key: ST_FpP: [ 0.00 0.00 ]
+Key: ST_Frr: [ 0.00 0.00 ]
+Key: SUB: [ 0.00 0.00 ]
+Key: SUBPDrm: [ 0.00 0.00 ]
+Key: SUBPDrr: [ 0.00 0.00 ]
+Key: SUBPSrm: [ 0.00 0.00 ]
+Key: SUBPSrr: [ 0.00 0.00 ]
+Key: SUBREG_TO_REG: [ 0.00 0.00 ]
+Key: SUBR_F: [ 0.00 0.00 ]
+Key: SUBR_FI: [ 0.00 0.00 ]
+Key: SUBR_FPrST: [ 0.00 0.00 ]
+Key: SUBR_FST: [ 0.00 0.00 ]
+Key: SUBR_Fp: [ 0.00 0.00 ]
+Key: SUBR_FpI: [ 0.00 0.00 ]
+Key: SUBR_FrST: [ 0.00 0.00 ]
+Key: SUBSDrm: [ 0.00 0.00 ]
+Key: SUBSDrm_Int: [ 0.00 0.00 ]
+Key: SUBSDrr: [ 0.00 0.00 ]
+Key: SUBSDrr_Int: [ 0.00 0.00 ]
+Key: SUBSSrm: [ 0.00 0.00 ]
+Key: SUBSSrm_Int: [ 0.00 0.00 ]
+Key: SUBSSrr: [ 0.00 0.00 ]
+Key: SUBSSrr_Int: [ 0.00 0.00 ]
+Key: SUB_F: [ 0.00 0.00 ]
+Key: SUB_FI: [ 0.00 0.00 ]
+Key: SUB_FPrST: [ 0.00 0.00 ]
+Key: SUB_FST: [ 0.00 0.00 ]
+Key: SUB_Fp: [ 0.00 0.00 ]
+Key: SUB_FpI: [ 0.00 0.00 ]
+Key: SUB_FrST: [ 0.00 0.00 ]
+Key: SWAPGS: [ 0.00 0.00 ]
+Key: SYSCALL: [ 0.00 0.00 ]
+Key: SYSENTER: [ 0.00 0.00 ]
+Key: SYSEXIT: [ 0.00 0.00 ]
+Key: SYSRET: [ 0.00 0.00 ]
+Key: T: [ 0.00 0.00 ]
+Key: TAILJMPd: [ 0.00 0.00 ]
+Key: TAILJMPd_CC: [ 0.00 0.00 ]
+Key: TAILJMPm: [ 0.00 0.00 ]
+Key: TAILJMPr: [ 0.00 0.00 ]
+Key: TCMMIMFP: [ 0.00 0.00 ]
+Key: TCMMRLFP: [ 0.00 0.00 ]
+Key: TCONJTCMMIMFP: [ 0.00 0.00 ]
+Key: TCONJTFP: [ 0.00 0.00 ]
+Key: TCRETURN_HIPE: [ 0.00 0.00 ]
+Key: TCRETURN_WIN: [ 0.00 0.00 ]
+Key: TCRETURN_WINmi: [ 0.00 0.00 ]
+Key: TCRETURNdi: [ 0.00 0.00 ]
+Key: TCRETURNdicc: [ 0.00 0.00 ]
+Key: TCRETURNmi: [ 0.00 0.00 ]
+Key: TCRETURNri: [ 0.00 0.00 ]
+Key: TCVTROWD: [ 0.00 0.00 ]
+Key: TCVTROWPS: [ 0.00 0.00 ]
+Key: TDCALL: [ 0.00 0.00 ]
+Key: TDPBF: [ 0.00 0.00 ]
+Key: TDPBHF: [ 0.00 0.00 ]
+Key: TDPBSSD: [ 0.00 0.00 ]
+Key: TDPBSUD: [ 0.00 0.00 ]
+Key: TDPBUSD: [ 0.00 0.00 ]
+Key: TDPBUUD: [ 0.00 0.00 ]
+Key: TDPFP: [ 0.00 0.00 ]
+Key: TDPHBF: [ 0.00 0.00 ]
+Key: TDPHF: [ 0.00 0.00 ]
+Key: TEST: [ 0.00 0.00 ]
+Key: TESTUI: [ 0.00 0.00 ]
+Key: TILELOADD: [ 0.00 0.00 ]
+Key: TILELOADDRS: [ 0.00 0.00 ]
+Key: TILELOADDRST: [ 0.00 0.00 ]
+Key: TILELOADDRS_EVEX: [ 0.00 0.00 ]
+Key: TILELOADDT: [ 0.00 0.00 ]
+Key: TILELOADD_EVEX: [ 0.00 0.00 ]
+Key: TILEMOVROWrre: [ 0.00 0.00 ]
+Key: TILEMOVROWrri: [ 0.00 0.00 ]
+Key: TILERELEASE: [ 0.00 0.00 ]
+Key: TILESTORED: [ 0.00 0.00 ]
+Key: TILESTORED_EVEX: [ 0.00 0.00 ]
+Key: TILEZERO: [ 0.00 0.00 ]
+Key: TLBSYNC: [ 0.00 0.00 ]
+Key: TLSCall: [ 0.00 0.00 ]
+Key: TLS_addr: [ 0.00 0.00 ]
+Key: TLS_addrX: [ 0.00 0.00 ]
+Key: TLS_base_addr: [ 0.00 0.00 ]
+Key: TLS_base_addrX: [ 0.00 0.00 ]
+Key: TLS_desc: [ 0.00 0.00 ]
+Key: TMMULTF: [ 0.00 0.00 ]
+Key: TPAUSE: [ 0.00 0.00 ]
+Key: TRAP: [ 0.00 0.00 ]
+Key: TST_F: [ 0.00 0.00 ]
+Key: TST_Fp: [ 0.00 0.00 ]
+Key: TTCMMIMFP: [ 0.00 0.00 ]
+Key: TTCMMRLFP: [ 0.00 0.00 ]
+Key: TTDPBF: [ 0.00 0.00 ]
+Key: TTDPFP: [ 0.00 0.00 ]
+Key: TTMMULTF: [ 0.00 0.00 ]
+Key: TTRANSPOSED: [ 0.00 0.00 ]
+Key: TZCNT: [ 0.00 0.00 ]
+Key: TZMSK: [ 0.00 0.00 ]
+Key: UBSAN_UD: [ 0.00 0.00 ]
+Key: UCOMISDrm: [ 0.00 0.00 ]
+Key: UCOMISDrm_Int: [ 0.00 0.00 ]
+Key: UCOMISDrr: [ 0.00 0.00 ]
+Key: UCOMISDrr_Int: [ 0.00 0.00 ]
+Key: UCOMISSrm: [ 0.00 0.00 ]
+Key: UCOMISSrm_Int: [ 0.00 0.00 ]
+Key: UCOMISSrr: [ 0.00 0.00 ]
+Key: UCOMISSrr_Int: [ 0.00 0.00 ]
+Key: UCOM_FIPr: [ 0.00 0.00 ]
+Key: UCOM_FIr: [ 0.00 0.00 ]
+Key: UCOM_FPPr: [ 0.00 0.00 ]
+Key: UCOM_FPr: [ 0.00 0.00 ]
+Key: UCOM_FpIr: [ 0.00 0.00 ]
+Key: UCOM_Fpr: [ 0.00 0.00 ]
+Key: UCOM_Fr: [ 0.00 0.00 ]
+Key: UD: [ 0.00 0.00 ]
+Key: UIRET: [ 0.00 0.00 ]
+Key: UMONITOR: [ 0.00 0.00 ]
+Key: UMWAIT: [ 0.00 0.00 ]
+Key: UNPCKHPDrm: [ 0.00 0.00 ]
+Key: UNPCKHPDrr: [ 0.00 0.00 ]
+Key: UNPCKHPSrm: [ 0.00 0.00 ]
+Key: UNPCKHPSrr: [ 0.00 0.00 ]
+Key: UNPCKLPDrm: [ 0.00 0.00 ]
+Key: UNPCKLPDrr: [ 0.00 0.00 ]
+Key: UNPCKLPSrm: [ 0.00 0.00 ]
+Key: UNPCKLPSrr: [ 0.00 0.00 ]
+Key: URDMSRri: [ 0.00 0.00 ]
+Key: URDMSRri_EVEX: [ 0.00 0.00 ]
+Key: URDMSRrr: [ 0.00 0.00 ]
+Key: URDMSRrr_EVEX: [ 0.00 0.00 ]
+Key: UWRMSRir: [ 0.00 0.00 ]
+Key: UWRMSRir_EVEX: [ 0.00 0.00 ]
+Key: UWRMSRrr: [ 0.00 0.00 ]
+Key: UWRMSRrr_EVEX: [ 0.00 0.00 ]
+Key: V: [ 0.00 0.00 ]
+Key: VAARG: [ 0.00 0.00 ]
+Key: VAARG_X: [ 0.00 0.00 ]
+Key: VADDBF: [ 0.00 0.00 ]
+Key: VADDPDYrm: [ 0.00 0.00 ]
+Key: VADDPDYrr: [ 0.00 0.00 ]
+Key: VADDPDZ: [ 0.00 0.00 ]
+Key: VADDPDZrm: [ 0.00 0.00 ]
+Key: VADDPDZrmb: [ 0.00 0.00 ]
+Key: VADDPDZrmbk: [ 0.00 0.00 ]
+Key: VADDPDZrmbkz: [ 0.00 0.00 ]
+Key: VADDPDZrmk: [ 0.00 0.00 ]
+Key: VADDPDZrmkz: [ 0.00 0.00 ]
+Key: VADDPDZrr: [ 0.00 0.00 ]
+Key: VADDPDZrrb: [ 0.00 0.00 ]
+Key: VADDPDZrrbk: [ 0.00 0.00 ]
+Key: VADDPDZrrbkz: [ 0.00 0.00 ]
+Key: VADDPDZrrk: [ 0.00 0.00 ]
+Key: VADDPDZrrkz: [ 0.00 0.00 ]
+Key: VADDPDrm: [ 0.00 0.00 ]
+Key: VADDPDrr: [ 0.00 0.00 ]
+Key: VADDPHZ: [ 0.00 0.00 ]
+Key: VADDPHZrm: [ 0.00 0.00 ]
+Key: VADDPHZrmb: [ 0.00 0.00 ]
+Key: VADDPHZrmbk: [ 0.00 0.00 ]
+Key: VADDPHZrmbkz: [ 0.00 0.00 ]
+Key: VADDPHZrmk: [ 0.00 0.00 ]
+Key: VADDPHZrmkz: [ 0.00 0.00 ]
+Key: VADDPHZrr: [ 0.00 0.00 ]
+Key: VADDPHZrrb: [ 0.00 0.00 ]
+Key: VADDPHZrrbk: [ 0.00 0.00 ]
+Key: VADDPHZrrbkz: [ 0.00 0.00 ]
+Key: VADDPHZrrk: [ 0.00 0.00 ]
+Key: VADDPHZrrkz: [ 0.00 0.00 ]
+Key: VADDPSYrm: [ 0.00 0.00 ]
+Key: VADDPSYrr: [ 0.00 0.00 ]
+Key: VADDPSZ: [ 0.00 0.00 ]
+Key: VADDPSZrm: [ 0.00 0.00 ]
+Key: VADDPSZrmb: [ 0.00 0.00 ]
+Key: VADDPSZrmbk: [ 0.00 0.00 ]
+Key: VADDPSZrmbkz: [ 0.00 0.00 ]
+Key: VADDPSZrmk: [ 0.00 0.00 ]
+Key: VADDPSZrmkz: [ 0.00 0.00 ]
+Key: VADDPSZrr: [ 0.00 0.00 ]
+Key: VADDPSZrrb: [ 0.00 0.00 ]
+Key: VADDPSZrrbk: [ 0.00 0.00 ]
+Key: VADDPSZrrbkz: [ 0.00 0.00 ]
+Key: VADDPSZrrk: [ 0.00 0.00 ]
+Key: VADDPSZrrkz: [ 0.00 0.00 ]
+Key: VADDPSrm: [ 0.00 0.00 ]
+Key: VADDPSrr: [ 0.00 0.00 ]
+Key: VADDSDZrm: [ 0.00 0.00 ]
+Key: VADDSDZrm_Int: [ 0.00 0.00 ]
+Key: VADDSDZrmk_Int: [ 0.00 0.00 ]
+Key: VADDSDZrmkz_Int: [ 0.00 0.00 ]
+Key: VADDSDZrr: [ 0.00 0.00 ]
+Key: VADDSDZrr_Int: [ 0.00 0.00 ]
+Key: VADDSDZrrb_Int: [ 0.00 0.00 ]
+Key: VADDSDZrrbk_Int: [ 0.00 0.00 ]
+Key: VADDSDZrrbkz_Int: [ 0.00 0.00 ]
+Key: VADDSDZrrk_Int: [ 0.00 0.00 ]
+Key: VADDSDZrrkz_Int: [ 0.00 0.00 ]
+Key: VADDSDrm: [ 0.00 0.00 ]
+Key: VADDSDrm_Int: [ 0.00 0.00 ]
+Key: VADDSDrr: [ 0.00 0.00 ]
+Key: VADDSDrr_Int: [ 0.00 0.00 ]
+Key: VADDSHZrm: [ 0.00 0.00 ]
+Key: VADDSHZrm_Int: [ 0.00 0.00 ]
+Key: VADDSHZrmk_Int: [ 0.00 0.00 ]
+Key: VADDSHZrmkz_Int: [ 0.00 0.00 ]
+Key: VADDSHZrr: [ 0.00 0.00 ]
+Key: VADDSHZrr_Int: [ 0.00 0.00 ]
+Key: VADDSHZrrb_Int: [ 0.00 0.00 ]
+Key: VADDSHZrrbk_Int: [ 0.00 0.00 ]
+Key: VADDSHZrrbkz_Int: [ 0.00 0.00 ]
+Key: VADDSHZrrk_Int: [ 0.00 0.00 ]
+Key: VADDSHZrrkz_Int: [ 0.00 0.00 ]
+Key: VADDSSZrm: [ 0.00 0.00 ]
+Key: VADDSSZrm_Int: [ 0.00 0.00 ]
+Key: VADDSSZrmk_Int: [ 0.00 0.00 ]
+Key: VADDSSZrmkz_Int: [ 0.00 0.00 ]
+Key: VADDSSZrr: [ 0.00 0.00 ]
+Key: VADDSSZrr_Int: [ 0.00 0.00 ]
+Key: VADDSSZrrb_Int: [ 0.00 0.00 ]
+Key: VADDSSZrrbk_Int: [ 0.00 0.00 ]
+Key: VADDSSZrrbkz_Int: [ 0.00 0.00 ]
+Key: VADDSSZrrk_Int: [ 0.00 0.00 ]
+Key: VADDSSZrrkz_Int: [ 0.00 0.00 ]
+Key: VADDSSrm: [ 0.00 0.00 ]
+Key: VADDSSrm_Int: [ 0.00 0.00 ]
+Key: VADDSSrr: [ 0.00 0.00 ]
+Key: VADDSSrr_Int: [ 0.00 0.00 ]
+Key: VADDSUBPDYrm: [ 0.00 0.00 ]
+Key: VADDSUBPDYrr: [ 0.00 0.00 ]
+Key: VADDSUBPDrm: [ 0.00 0.00 ]
+Key: VADDSUBPDrr: [ 0.00 0.00 ]
+Key: VADDSUBPSYrm: [ 0.00 0.00 ]
+Key: VADDSUBPSYrr: [ 0.00 0.00 ]
+Key: VADDSUBPSrm: [ 0.00 0.00 ]
+Key: VADDSUBPSrr: [ 0.00 0.00 ]
+Key: VAESDECLASTYrm: [ 0.00 0.00 ]
+Key: VAESDECLASTYrr: [ 0.00 0.00 ]
+Key: VAESDECLASTZ: [ 0.00 0.00 ]
+Key: VAESDECLASTZrm: [ 0.00 0.00 ]
+Key: VAESDECLASTZrr: [ 0.00 0.00 ]
+Key: VAESDECLASTrm: [ 0.00 0.00 ]
+Key: VAESDECLASTrr: [ 0.00 0.00 ]
+Key: VAESDECYrm: [ 0.00 0.00 ]
+Key: VAESDECYrr: [ 0.00 0.00 ]
+Key: VAESDECZ: [ 0.00 0.00 ]
+Key: VAESDECZrm: [ 0.00 0.00 ]
+Key: VAESDECZrr: [ 0.00 0.00 ]
+Key: VAESDECrm: [ 0.00 0.00 ]
+Key: VAESDECrr: [ 0.00 0.00 ]
+Key: VAESENCLASTYrm: [ 0.00 0.00 ]
+Key: VAESENCLASTYrr: [ 0.00 0.00 ]
+Key: VAESENCLASTZ: [ 0.00 0.00 ]
+Key: VAESENCLASTZrm: [ 0.00 0.00 ]
+Key: VAESENCLASTZrr: [ 0.00 0.00 ]
+Key: VAESENCLASTrm: [ 0.00 0.00 ]
+Key: VAESENCLASTrr: [ 0.00 0.00 ]
+Key: VAESENCYrm: [ 0.00 0.00 ]
+Key: VAESENCYrr: [ 0.00 0.00 ]
+Key: VAESENCZ: [ 0.00 0.00 ]
+Key: VAESENCZrm: [ 0.00 0.00 ]
+Key: VAESENCZrr: [ 0.00 0.00 ]
+Key: VAESENCrm: [ 0.00 0.00 ]
+Key: VAESENCrr: [ 0.00 0.00 ]
+Key: VAESIMCrm: [ 0.00 0.00 ]
+Key: VAESIMCrr: [ 0.00 0.00 ]
+Key: VAESKEYGENASSISTrmi: [ 0.00 0.00 ]
+Key: VAESKEYGENASSISTrri: [ 0.00 0.00 ]
+Key: VALIGNDZ: [ 0.00 0.00 ]
+Key: VALIGNDZrmbi: [ 0.00 0.00 ]
+Key: VALIGNDZrmbik: [ 0.00 0.00 ]
+Key: VALIGNDZrmbikz: [ 0.00 0.00 ]
+Key: VALIGNDZrmi: [ 0.00 0.00 ]
+Key: VALIGNDZrmik: [ 0.00 0.00 ]
+Key: VALIGNDZrmikz: [ 0.00 0.00 ]
+Key: VALIGNDZrri: [ 0.00 0.00 ]
+Key: VALIGNDZrrik: [ 0.00 0.00 ]
+Key: VALIGNDZrrikz: [ 0.00 0.00 ]
+Key: VALIGNQZ: [ 0.00 0.00 ]
+Key: VALIGNQZrmbi: [ 0.00 0.00 ]
+Key: VALIGNQZrmbik: [ 0.00 0.00 ]
+Key: VALIGNQZrmbikz: [ 0.00 0.00 ]
+Key: VALIGNQZrmi: [ 0.00 0.00 ]
+Key: VALIGNQZrmik: [ 0.00 0.00 ]
+Key: VALIGNQZrmikz: [ 0.00 0.00 ]
+Key: VALIGNQZrri: [ 0.00 0.00 ]
+Key: VALIGNQZrrik: [ 0.00 0.00 ]
+Key: VALIGNQZrrikz: [ 0.00 0.00 ]
+Key: VANDNPDYrm: [ 0.00 0.00 ]
+Key: VANDNPDYrr: [ 0.00 0.00 ]
+Key: VANDNPDZ: [ 0.00 0.00 ]
+Key: VANDNPDZrm: [ 0.00 0.00 ]
+Key: VANDNPDZrmb: [ 0.00 0.00 ]
+Key: VANDNPDZrmbk: [ 0.00 0.00 ]
+Key: VANDNPDZrmbkz: [ 0.00 0.00 ]
+Key: VANDNPDZrmk: [ 0.00 0.00 ]
+Key: VANDNPDZrmkz: [ 0.00 0.00 ]
+Key: VANDNPDZrr: [ 0.00 0.00 ]
+Key: VANDNPDZrrk: [ 0.00 0.00 ]
+Key: VANDNPDZrrkz: [ 0.00 0.00 ]
+Key: VANDNPDrm: [ 0.00 0.00 ]
+Key: VANDNPDrr: [ 0.00 0.00 ]
+Key: VANDNPSYrm: [ 0.00 0.00 ]
+Key: VANDNPSYrr: [ 0.00 0.00 ]
+Key: VANDNPSZ: [ 0.00 0.00 ]
+Key: VANDNPSZrm: [ 0.00 0.00 ]
+Key: VANDNPSZrmb: [ 0.00 0.00 ]
+Key: VANDNPSZrmbk: [ 0.00 0.00 ]
+Key: VANDNPSZrmbkz: [ 0.00 0.00 ]
+Key: VANDNPSZrmk: [ 0.00 0.00 ]
+Key: VANDNPSZrmkz: [ 0.00 0.00 ]
+Key: VANDNPSZrr: [ 0.00 0.00 ]
+Key: VANDNPSZrrk: [ 0.00 0.00 ]
+Key: VANDNPSZrrkz: [ 0.00 0.00 ]
+Key: VANDNPSrm: [ 0.00 0.00 ]
+Key: VANDNPSrr: [ 0.00 0.00 ]
+Key: VANDPDYrm: [ 0.00 0.00 ]
+Key: VANDPDYrr: [ 0.00 0.00 ]
+Key: VANDPDZ: [ 0.00 0.00 ]
+Key: VANDPDZrm: [ 0.00 0.00 ]
+Key: VANDPDZrmb: [ 0.00 0.00 ]
+Key: VANDPDZrmbk: [ 0.00 0.00 ]
+Key: VANDPDZrmbkz: [ 0.00 0.00 ]
+Key: VANDPDZrmk: [ 0.00 0.00 ]
+Key: VANDPDZrmkz: [ 0.00 0.00 ]
+Key: VANDPDZrr: [ 0.00 0.00 ]
+Key: VANDPDZrrk: [ 0.00 0.00 ]
+Key: VANDPDZrrkz: [ 0.00 0.00 ]
+Key: VANDPDrm: [ 0.00 0.00 ]
+Key: VANDPDrr: [ 0.00 0.00 ]
+Key: VANDPSYrm: [ 0.00 0.00 ]
+Key: VANDPSYrr: [ 0.00 0.00 ]
+Key: VANDPSZ: [ 0.00 0.00 ]
+Key: VANDPSZrm: [ 0.00 0.00 ]
+Key: VANDPSZrmb: [ 0.00 0.00 ]
+Key: VANDPSZrmbk: [ 0.00 0.00 ]
+Key: VANDPSZrmbkz: [ 0.00 0.00 ]
+Key: VANDPSZrmk: [ 0.00 0.00 ]
+Key: VANDPSZrmkz: [ 0.00 0.00 ]
+Key: VANDPSZrr: [ 0.00 0.00 ]
+Key: VANDPSZrrk: [ 0.00 0.00 ]
+Key: VANDPSZrrkz: [ 0.00 0.00 ]
+Key: VANDPSrm: [ 0.00 0.00 ]
+Key: VANDPSrr: [ 0.00 0.00 ]
+Key: VASTART_SAVE_XMM_REGS: [ 0.00 0.00 ]
+Key: VBCSTNEBF: [ 0.00 0.00 ]
+Key: VBCSTNESH: [ 0.00 0.00 ]
+Key: VBLENDMPDZ: [ 0.00 0.00 ]
+Key: VBLENDMPDZrm: [ 0.00 0.00 ]
+Key: VBLENDMPDZrmb: [ 0.00 0.00 ]
+Key: VBLENDMPDZrmbk: [ 0.00 0.00 ]
+Key: VBLENDMPDZrmbkz: [ 0.00 0.00 ]
+Key: VBLENDMPDZrmk: [ 0.00 0.00 ]
+Key: VBLENDMPDZrmkz: [ 0.00 0.00 ]
+Key: VBLENDMPDZrr: [ 0.00 0.00 ]
+Key: VBLENDMPDZrrk: [ 0.00 0.00 ]
+Key: VBLENDMPDZrrkz: [ 0.00 0.00 ]
+Key: VBLENDMPSZ: [ 0.00 0.00 ]
+Key: VBLENDMPSZrm: [ 0.00 0.00 ]
+Key: VBLENDMPSZrmb: [ 0.00 0.00 ]
+Key: VBLENDMPSZrmbk: [ 0.00 0.00 ]
+Key: VBLENDMPSZrmbkz: [ 0.00 0.00 ]
+Key: VBLENDMPSZrmk: [ 0.00 0.00 ]
+Key: VBLENDMPSZrmkz: [ 0.00 0.00 ]
+Key: VBLENDMPSZrr: [ 0.00 0.00 ]
+Key: VBLENDMPSZrrk: [ 0.00 0.00 ]
+Key: VBLENDMPSZrrkz: [ 0.00 0.00 ]
+Key: VBLENDPDYrmi: [ 0.00 0.00 ]
+Key: VBLENDPDYrri: [ 0.00 0.00 ]
+Key: VBLENDPDrmi: [ 0.00 0.00 ]
+Key: VBLENDPDrri: [ 0.00 0.00 ]
+Key: VBLENDPSYrmi: [ 0.00 0.00 ]
+Key: VBLENDPSYrri: [ 0.00 0.00 ]
+Key: VBLENDPSrmi: [ 0.00 0.00 ]
+Key: VBLENDPSrri: [ 0.00 0.00 ]
+Key: VBLENDVPDYrmr: [ 0.00 0.00 ]
+Key: VBLENDVPDYrrr: [ 0.00 0.00 ]
+Key: VBLENDVPDrmr: [ 0.00 0.00 ]
+Key: VBLENDVPDrrr: [ 0.00 0.00 ]
+Key: VBLENDVPSYrmr: [ 0.00 0.00 ]
+Key: VBLENDVPSYrrr: [ 0.00 0.00 ]
+Key: VBLENDVPSrmr: [ 0.00 0.00 ]
+Key: VBLENDVPSrrr: [ 0.00 0.00 ]
+Key: VBROADCASTF: [ 0.00 0.00 ]
+Key: VBROADCASTI: [ 0.00 0.00 ]
+Key: VBROADCASTSDYrm: [ 0.00 0.00 ]
+Key: VBROADCASTSDYrr: [ 0.00 0.00 ]
+Key: VBROADCASTSDZ: [ 0.00 0.00 ]
+Key: VBROADCASTSDZrm: [ 0.00 0.00 ]
+Key: VBROADCASTSDZrmk: [ 0.00 0.00 ]
+Key: VBROADCASTSDZrmkz: [ 0.00 0.00 ]
+Key: VBROADCASTSDZrr: [ 0.00 0.00 ]
+Key: VBROADCASTSDZrrk: [ 0.00 0.00 ]
+Key: VBROADCASTSDZrrkz: [ 0.00 0.00 ]
+Key: VBROADCASTSSYrm: [ 0.00 0.00 ]
+Key: VBROADCASTSSYrr: [ 0.00 0.00 ]
+Key: VBROADCASTSSZ: [ 0.00 0.00 ]
+Key: VBROADCASTSSZrm: [ 0.00 0.00 ]
+Key: VBROADCASTSSZrmk: [ 0.00 0.00 ]
+Key: VBROADCASTSSZrmkz: [ 0.00 0.00 ]
+Key: VBROADCASTSSZrr: [ 0.00 0.00 ]
+Key: VBROADCASTSSZrrk: [ 0.00 0.00 ]
+Key: VBROADCASTSSZrrkz: [ 0.00 0.00 ]
+Key: VBROADCASTSSrm: [ 0.00 0.00 ]
+Key: VBROADCASTSSrr: [ 0.00 0.00 ]
+Key: VCMPBF: [ 0.00 0.00 ]
+Key: VCMPPDYrmi: [ 0.00 0.00 ]
+Key: VCMPPDYrri: [ 0.00 0.00 ]
+Key: VCMPPDZ: [ 0.00 0.00 ]
+Key: VCMPPDZrmbi: [ 0.00 0.00 ]
+Key: VCMPPDZrmbik: [ 0.00 0.00 ]
+Key: VCMPPDZrmi: [ 0.00 0.00 ]
+Key: VCMPPDZrmik: [ 0.00 0.00 ]
+Key: VCMPPDZrri: [ 0.00 0.00 ]
+Key: VCMPPDZrrib: [ 0.00 0.00 ]
+Key: VCMPPDZrribk: [ 0.00 0.00 ]
+Key: VCMPPDZrrik: [ 0.00 0.00 ]
+Key: VCMPPDrmi: [ 0.00 0.00 ]
+Key: VCMPPDrri: [ 0.00 0.00 ]
+Key: VCMPPHZ: [ 0.00 0.00 ]
+Key: VCMPPHZrmbi: [ 0.00 0.00 ]
+Key: VCMPPHZrmbik: [ 0.00 0.00 ]
+Key: VCMPPHZrmi: [ 0.00 0.00 ]
+Key: VCMPPHZrmik: [ 0.00 0.00 ]
+Key: VCMPPHZrri: [ 0.00 0.00 ]
+Key: VCMPPHZrrib: [ 0.00 0.00 ]
+Key: VCMPPHZrribk: [ 0.00 0.00 ]
+Key: VCMPPHZrrik: [ 0.00 0.00 ]
+Key: VCMPPSYrmi: [ 0.00 0.00 ]
+Key: VCMPPSYrri: [ 0.00 0.00 ]
+Key: VCMPPSZ: [ 0.00 0.00 ]
+Key: VCMPPSZrmbi: [ 0.00 0.00 ]
+Key: VCMPPSZrmbik: [ 0.00 0.00 ]
+Key: VCMPPSZrmi: [ 0.00 0.00 ]
+Key: VCMPPSZrmik: [ 0.00 0.00 ]
+Key: VCMPPSZrri: [ 0.00 0.00 ]
+Key: VCMPPSZrrib: [ 0.00 0.00 ]
+Key: VCMPPSZrribk: [ 0.00 0.00 ]
+Key: VCMPPSZrrik: [ 0.00 0.00 ]
+Key: VCMPPSrmi: [ 0.00 0.00 ]
+Key: VCMPPSrri: [ 0.00 0.00 ]
+Key: VCMPSDZrmi: [ 0.00 0.00 ]
+Key: VCMPSDZrmi_Int: [ 0.00 0.00 ]
+Key: VCMPSDZrmik_Int: [ 0.00 0.00 ]
+Key: VCMPSDZrri: [ 0.00 0.00 ]
+Key: VCMPSDZrri_Int: [ 0.00 0.00 ]
+Key: VCMPSDZrrib_Int: [ 0.00 0.00 ]
+Key: VCMPSDZrribk_Int: [ 0.00 0.00 ]
+Key: VCMPSDZrrik_Int: [ 0.00 0.00 ]
+Key: VCMPSDrmi: [ 0.00 0.00 ]
+Key: VCMPSDrmi_Int: [ 0.00 0.00 ]
+Key: VCMPSDrri: [ 0.00 0.00 ]
+Key: VCMPSDrri_Int: [ 0.00 0.00 ]
+Key: VCMPSHZrmi: [ 0.00 0.00 ]
+Key: VCMPSHZrmi_Int: [ 0.00 0.00 ]
+Key: VCMPSHZrmik_Int: [ 0.00 0.00 ]
+Key: VCMPSHZrri: [ 0.00 0.00 ]
+Key: VCMPSHZrri_Int: [ 0.00 0.00 ]
+Key: VCMPSHZrrib_Int: [ 0.00 0.00 ]
+Key: VCMPSHZrribk_Int: [ 0.00 0.00 ]
+Key: VCMPSHZrrik_Int: [ 0.00 0.00 ]
+Key: VCMPSSZrmi: [ 0.00 0.00 ]
+Key: VCMPSSZrmi_Int: [ 0.00 0.00 ]
+Key: VCMPSSZrmik_Int: [ 0.00 0.00 ]
+Key: VCMPSSZrri: [ 0.00 0.00 ]
+Key: VCMPSSZrri_Int: [ 0.00 0.00 ]
+Key: VCMPSSZrrib_Int: [ 0.00 0.00 ]
+Key: VCMPSSZrribk_Int: [ 0.00 0.00 ]
+Key: VCMPSSZrrik_Int: [ 0.00 0.00 ]
+Key: VCMPSSrmi: [ 0.00 0.00 ]
+Key: VCMPSSrmi_Int: [ 0.00 0.00 ]
+Key: VCMPSSrri: [ 0.00 0.00 ]
+Key: VCMPSSrri_Int: [ 0.00 0.00 ]
+Key: VCOMISBF: [ 0.00 0.00 ]
+Key: VCOMISDZrm: [ 0.00 0.00 ]
+Key: VCOMISDZrm_Int: [ 0.00 0.00 ]
+Key: VCOMISDZrr: [ 0.00 0.00 ]
+Key: VCOMISDZrr_Int: [ 0.00 0.00 ]
+Key: VCOMISDZrrb: [ 0.00 0.00 ]
+Key: VCOMISDrm: [ 0.00 0.00 ]
+Key: VCOMISDrm_Int: [ 0.00 0.00 ]
+Key: VCOMISDrr: [ 0.00 0.00 ]
+Key: VCOMISDrr_Int: [ 0.00 0.00 ]
+Key: VCOMISHZrm: [ 0.00 0.00 ]
+Key: VCOMISHZrm_Int: [ 0.00 0.00 ]
+Key: VCOMISHZrr: [ 0.00 0.00 ]
+Key: VCOMISHZrr_Int: [ 0.00 0.00 ]
+Key: VCOMISHZrrb: [ 0.00 0.00 ]
+Key: VCOMISSZrm: [ 0.00 0.00 ]
+Key: VCOMISSZrm_Int: [ 0.00 0.00 ]
+Key: VCOMISSZrr: [ 0.00 0.00 ]
+Key: VCOMISSZrr_Int: [ 0.00 0.00 ]
+Key: VCOMISSZrrb: [ 0.00 0.00 ]
+Key: VCOMISSrm: [ 0.00 0.00 ]
+Key: VCOMISSrm_Int: [ 0.00 0.00 ]
+Key: VCOMISSrr: [ 0.00 0.00 ]
+Key: VCOMISSrr_Int: [ 0.00 0.00 ]
+Key: VCOMPRESSPDZ: [ 0.00 0.00 ]
+Key: VCOMPRESSPDZmr: [ 0.00 0.00 ]
+Key: VCOMPRESSPDZmrk: [ 0.00 0.00 ]
+Key: VCOMPRESSPDZrr: [ 0.00 0.00 ]
+Key: VCOMPRESSPDZrrk: [ 0.00 0.00 ]
+Key: VCOMPRESSPDZrrkz: [ 0.00 0.00 ]
+Key: VCOMPRESSPSZ: [ 0.00 0.00 ]
+Key: VCOMPRESSPSZmr: [ 0.00 0.00 ]
+Key: VCOMPRESSPSZmrk: [ 0.00 0.00 ]
+Key: VCOMPRESSPSZrr: [ 0.00 0.00 ]
+Key: VCOMPRESSPSZrrk: [ 0.00 0.00 ]
+Key: VCOMPRESSPSZrrkz: [ 0.00 0.00 ]
+Key: VCOMXSDZrm_Int: [ 0.00 0.00 ]
+Key: VCOMXSDZrr_Int: [ 0.00 0.00 ]
+Key: VCOMXSDZrrb_Int: [ 0.00 0.00 ]
+Key: VCOMXSHZrm_Int: [ 0.00 0.00 ]
+Key: VCOMXSHZrr_Int: [ 0.00 0.00 ]
+Key: VCOMXSHZrrb_Int: [ 0.00 0.00 ]
+Key: VCOMXSSZrm_Int: [ 0.00 0.00 ]
+Key: VCOMXSSZrr_Int: [ 0.00 0.00 ]
+Key: VCOMXSSZrrb_Int: [ 0.00 0.00 ]
+Key: VCVT: [ 0.00 0.00 ]
+Key: VCVTBF: [ 0.00 0.00 ]
+Key: VCVTBIASPH: [ 0.00 0.00 ]
+Key: VCVTDQ: [ 0.00 0.00 ]
+Key: VCVTHF: [ 0.00 0.00 ]
+Key: VCVTNE: [ 0.00 0.00 ]
+Key: VCVTNEEBF: [ 0.00 0.00 ]
+Key: VCVTNEEPH: [ 0.00 0.00 ]
+Key: VCVTNEOBF: [ 0.00 0.00 ]
+Key: VCVTNEOPH: [ 0.00 0.00 ]
+Key: VCVTNEPS: [ 0.00 0.00 ]
+Key: VCVTPD: [ 0.00 0.00 ]
+Key: VCVTPH: [ 0.00 0.00 ]
+Key: VCVTPS: [ 0.00 0.00 ]
+Key: VCVTQQ: [ 0.00 0.00 ]
+Key: VCVTSD: [ 0.00 0.00 ]
+Key: VCVTSH: [ 0.00 0.00 ]
+Key: VCVTSI: [ 0.00 0.00 ]
+Key: VCVTSS: [ 0.00 0.00 ]
+Key: VCVTTBF: [ 0.00 0.00 ]
+Key: VCVTTPD: [ 0.00 0.00 ]
+Key: VCVTTPH: [ 0.00 0.00 ]
+Key: VCVTTPS: [ 0.00 0.00 ]
+Key: VCVTTSD: [ 0.00 0.00 ]
+Key: VCVTTSH: [ 0.00 0.00 ]
+Key: VCVTTSS: [ 0.00 0.00 ]
+Key: VCVTUDQ: [ 0.00 0.00 ]
+Key: VCVTUQQ: [ 0.00 0.00 ]
+Key: VCVTUSI: [ 0.00 0.00 ]
+Key: VCVTUW: [ 0.00 0.00 ]
+Key: VCVTW: [ 0.00 0.00 ]
+Key: VDBPSADBWZ: [ 0.00 0.00 ]
+Key: VDBPSADBWZrmi: [ 0.00 0.00 ]
+Key: VDBPSADBWZrmik: [ 0.00 0.00 ]
+Key: VDBPSADBWZrmikz: [ 0.00 0.00 ]
+Key: VDBPSADBWZrri: [ 0.00 0.00 ]
+Key: VDBPSADBWZrrik: [ 0.00 0.00 ]
+Key: VDBPSADBWZrrikz: [ 0.00 0.00 ]
+Key: VDIVBF: [ 0.00 0.00 ]
+Key: VDIVPDYrm: [ 0.00 0.00 ]
+Key: VDIVPDYrr: [ 0.00 0.00 ]
+Key: VDIVPDZ: [ 0.00 0.00 ]
+Key: VDIVPDZrm: [ 0.00 0.00 ]
+Key: VDIVPDZrmb: [ 0.00 0.00 ]
+Key: VDIVPDZrmbk: [ 0.00 0.00 ]
+Key: VDIVPDZrmbkz: [ 0.00 0.00 ]
+Key: VDIVPDZrmk: [ 0.00 0.00 ]
+Key: VDIVPDZrmkz: [ 0.00 0.00 ]
+Key: VDIVPDZrr: [ 0.00 0.00 ]
+Key: VDIVPDZrrb: [ 0.00 0.00 ]
+Key: VDIVPDZrrbk: [ 0.00 0.00 ]
+Key: VDIVPDZrrbkz: [ 0.00 0.00 ]
+Key: VDIVPDZrrk: [ 0.00 0.00 ]
+Key: VDIVPDZrrkz: [ 0.00 0.00 ]
+Key: VDIVPDrm: [ 0.00 0.00 ]
+Key: VDIVPDrr: [ 0.00 0.00 ]
+Key: VDIVPHZ: [ 0.00 0.00 ]
+Key: VDIVPHZrm: [ 0.00 0.00 ]
+Key: VDIVPHZrmb: [ 0.00 0.00 ]
+Key: VDIVPHZrmbk: [ 0.00 0.00 ]
+Key: VDIVPHZrmbkz: [ 0.00 0.00 ]
+Key: VDIVPHZrmk: [ 0.00 0.00 ]
+Key: VDIVPHZrmkz: [ 0.00 0.00 ]
+Key: VDIVPHZrr: [ 0.00 0.00 ]
+Key: VDIVPHZrrb: [ 0.00 0.00 ]
+Key: VDIVPHZrrbk: [ 0.00 0.00 ]
+Key: VDIVPHZrrbkz: [ 0.00 0.00 ]
+Key: VDIVPHZrrk: [ 0.00 0.00 ]
+Key: VDIVPHZrrkz: [ 0.00 0.00 ]
+Key: VDIVPSYrm: [ 0.00 0.00 ]
+Key: VDIVPSYrr: [ 0.00 0.00 ]
+Key: VDIVPSZ: [ 0.00 0.00 ]
+Key: VDIVPSZrm: [ 0.00 0.00 ]
+Key: VDIVPSZrmb: [ 0.00 0.00 ]
+Key: VDIVPSZrmbk: [ 0.00 0.00 ]
+Key: VDIVPSZrmbkz: [ 0.00 0.00 ]
+Key: VDIVPSZrmk: [ 0.00 0.00 ]
+Key: VDIVPSZrmkz: [ 0.00 0.00 ]
+Key: VDIVPSZrr: [ 0.00 0.00 ]
+Key: VDIVPSZrrb: [ 0.00 0.00 ]
+Key: VDIVPSZrrbk: [ 0.00 0.00 ]
+Key: VDIVPSZrrbkz: [ 0.00 0.00 ]
+Key: VDIVPSZrrk: [ 0.00 0.00 ]
+Key: VDIVPSZrrkz: [ 0.00 0.00 ]
+Key: VDIVPSrm: [ 0.00 0.00 ]
+Key: VDIVPSrr: [ 0.00 0.00 ]
+Key: VDIVSDZrm: [ 0.00 0.00 ]
+Key: VDIVSDZrm_Int: [ 0.00 0.00 ]
+Key: VDIVSDZrmk_Int: [ 0.00 0.00 ]
+Key: VDIVSDZrmkz_Int: [ 0.00 0.00 ]
+Key: VDIVSDZrr: [ 0.00 0.00 ]
+Key: VDIVSDZrr_Int: [ 0.00 0.00 ]
+Key: VDIVSDZrrb_Int: [ 0.00 0.00 ]
+Key: VDIVSDZrrbk_Int: [ 0.00 0.00 ]
+Key: VDIVSDZrrbkz_Int: [ 0.00 0.00 ]
+Key: VDIVSDZrrk_Int: [ 0.00 0.00 ]
+Key: VDIVSDZrrkz_Int: [ 0.00 0.00 ]
+Key: VDIVSDrm: [ 0.00 0.00 ]
+Key: VDIVSDrm_Int: [ 0.00 0.00 ]
+Key: VDIVSDrr: [ 0.00 0.00 ]
+Key: VDIVSDrr_Int: [ 0.00 0.00 ]
+Key: VDIVSHZrm: [ 0.00 0.00 ]
+Key: VDIVSHZrm_Int: [ 0.00 0.00 ]
+Key: VDIVSHZrmk_Int: [ 0.00 0.00 ]
+Key: VDIVSHZrmkz_Int: [ 0.00 0.00 ]
+Key: VDIVSHZrr: [ 0.00 0.00 ]
+Key: VDIVSHZrr_Int: [ 0.00 0.00 ]
+Key: VDIVSHZrrb_Int: [ 0.00 0.00 ]
+Key: VDIVSHZrrbk_Int: [ 0.00 0.00 ]
+Key: VDIVSHZrrbkz_Int: [ 0.00 0.00 ]
+Key: VDIVSHZrrk_Int: [ 0.00 0.00 ]
+Key: VDIVSHZrrkz_Int: [ 0.00 0.00 ]
+Key: VDIVSSZrm: [ 0.00 0.00 ]
+Key: VDIVSSZrm_Int: [ 0.00 0.00 ]
+Key: VDIVSSZrmk_Int: [ 0.00 0.00 ]
+Key: VDIVSSZrmkz_Int: [ 0.00 0.00 ]
+Key: VDIVSSZrr: [ 0.00 0.00 ]
+Key: VDIVSSZrr_Int: [ 0.00 0.00 ]
+Key: VDIVSSZrrb_Int: [ 0.00 0.00 ]
+Key: VDIVSSZrrbk_Int: [ 0.00 0.00 ]
+Key: VDIVSSZrrbkz_Int: [ 0.00 0.00 ]
+Key: VDIVSSZrrk_Int: [ 0.00 0.00 ]
+Key: VDIVSSZrrkz_Int: [ 0.00 0.00 ]
+Key: VDIVSSrm: [ 0.00 0.00 ]
+Key: VDIVSSrm_Int: [ 0.00 0.00 ]
+Key: VDIVSSrr: [ 0.00 0.00 ]
+Key: VDIVSSrr_Int: [ 0.00 0.00 ]
+Key: VDPBF: [ 0.00 0.00 ]
+Key: VDPPDrmi: [ 0.00 0.00 ]
+Key: VDPPDrri: [ 0.00 0.00 ]
+Key: VDPPHPSZ: [ 0.00 0.00 ]
+Key: VDPPHPSZm: [ 0.00 0.00 ]
+Key: VDPPHPSZmb: [ 0.00 0.00 ]
+Key: VDPPHPSZmbk: [ 0.00 0.00 ]
+Key: VDPPHPSZmbkz: [ 0.00 0.00 ]
+Key: VDPPHPSZmk: [ 0.00 0.00 ]
+Key: VDPPHPSZmkz: [ 0.00 0.00 ]
+Key: VDPPHPSZr: [ 0.00 0.00 ]
+Key: VDPPHPSZrk: [ 0.00 0.00 ]
+Key: VDPPHPSZrkz: [ 0.00 0.00 ]
+Key: VDPPSYrmi: [ 0.00 0.00 ]
+Key: VDPPSYrri: [ 0.00 0.00 ]
+Key: VDPPSrmi: [ 0.00 0.00 ]
+Key: VDPPSrri: [ 0.00 0.00 ]
+Key: VERRm: [ 0.00 0.00 ]
+Key: VERRr: [ 0.00 0.00 ]
+Key: VERWm: [ 0.00 0.00 ]
+Key: VERWr: [ 0.00 0.00 ]
+Key: VEXP: [ 0.00 0.00 ]
+Key: VEXPANDPDZ: [ 0.00 0.00 ]
+Key: VEXPANDPDZrm: [ 0.00 0.00 ]
+Key: VEXPANDPDZrmk: [ 0.00 0.00 ]
+Key: VEXPANDPDZrmkz: [ 0.00 0.00 ]
+Key: VEXPANDPDZrr: [ 0.00 0.00 ]
+Key: VEXPANDPDZrrk: [ 0.00 0.00 ]
+Key: VEXPANDPDZrrkz: [ 0.00 0.00 ]
+Key: VEXPANDPSZ: [ 0.00 0.00 ]
+Key: VEXPANDPSZrm: [ 0.00 0.00 ]
+Key: VEXPANDPSZrmk: [ 0.00 0.00 ]
+Key: VEXPANDPSZrmkz: [ 0.00 0.00 ]
+Key: VEXPANDPSZrr: [ 0.00 0.00 ]
+Key: VEXPANDPSZrrk: [ 0.00 0.00 ]
+Key: VEXPANDPSZrrkz: [ 0.00 0.00 ]
+Key: VEXTRACTF: [ 0.00 0.00 ]
+Key: VEXTRACTI: [ 0.00 0.00 ]
+Key: VEXTRACTPSZmri: [ 0.00 0.00 ]
+Key: VEXTRACTPSZrri: [ 0.00 0.00 ]
+Key: VEXTRACTPSmri: [ 0.00 0.00 ]
+Key: VEXTRACTPSrri: [ 0.00 0.00 ]
+Key: VFCMADDCPHZ: [ 0.00 0.00 ]
+Key: VFCMADDCPHZm: [ 0.00 0.00 ]
+Key: VFCMADDCPHZmb: [ 0.00 0.00 ]
+Key: VFCMADDCPHZmbk: [ 0.00 0.00 ]
+Key: VFCMADDCPHZmbkz: [ 0.00 0.00 ]
+Key: VFCMADDCPHZmk: [ 0.00 0.00 ]
+Key: VFCMADDCPHZmkz: [ 0.00 0.00 ]
+Key: VFCMADDCPHZr: [ 0.00 0.00 ]
+Key: VFCMADDCPHZrb: [ 0.00 0.00 ]
+Key: VFCMADDCPHZrbk: [ 0.00 0.00 ]
+Key: VFCMADDCPHZrbkz: [ 0.00 0.00 ]
+Key: VFCMADDCPHZrk: [ 0.00 0.00 ]
+Key: VFCMADDCPHZrkz: [ 0.00 0.00 ]
+Key: VFCMADDCSHZm: [ 0.00 0.00 ]
+Key: VFCMADDCSHZmk: [ 0.00 0.00 ]
+Key: VFCMADDCSHZmkz: [ 0.00 0.00 ]
+Key: VFCMADDCSHZr: [ 0.00 0.00 ]
+Key: VFCMADDCSHZrb: [ 0.00 0.00 ]
+Key: VFCMADDCSHZrbk: [ 0.00 0.00 ]
+Key: VFCMADDCSHZrbkz: [ 0.00 0.00 ]
+Key: VFCMADDCSHZrk: [ 0.00 0.00 ]
+Key: VFCMADDCSHZrkz: [ 0.00 0.00 ]
+Key: VFCMULCPHZ: [ 0.00 0.00 ]
+Key: VFCMULCPHZrm: [ 0.00 0.00 ]
+Key: VFCMULCPHZrmb: [ 0.00 0.00 ]
+Key: VFCMULCPHZrmbk: [ 0.00 0.00 ]
+Key: VFCMULCPHZrmbkz: [ 0.00 0.00 ]
+Key: VFCMULCPHZrmk: [ 0.00 0.00 ]
+Key: VFCMULCPHZrmkz: [ 0.00 0.00 ]
+Key: VFCMULCPHZrr: [ 0.00 0.00 ]
+Key: VFCMULCPHZrrb: [ 0.00 0.00 ]
+Key: VFCMULCPHZrrbk: [ 0.00 0.00 ]
+Key: VFCMULCPHZrrbkz: [ 0.00 0.00 ]
+Key: VFCMULCPHZrrk: [ 0.00 0.00 ]
+Key: VFCMULCPHZrrkz: [ 0.00 0.00 ]
+Key: VFCMULCSHZrm: [ 0.00 0.00 ]
+Key: VFCMULCSHZrmk: [ 0.00 0.00 ]
+Key: VFCMULCSHZrmkz: [ 0.00 0.00 ]
+Key: VFCMULCSHZrr: [ 0.00 0.00 ]
+Key: VFCMULCSHZrrb: [ 0.00 0.00 ]
+Key: VFCMULCSHZrrbk: [ 0.00 0.00 ]
+Key: VFCMULCSHZrrbkz: [ 0.00 0.00 ]
+Key: VFCMULCSHZrrk: [ 0.00 0.00 ]
+Key: VFCMULCSHZrrkz: [ 0.00 0.00 ]
+Key: VFIXUPIMMPDZ: [ 0.00 0.00 ]
+Key: VFIXUPIMMPDZrmbi: [ 0.00 0.00 ]
+Key: VFIXUPIMMPDZrmbik: [ 0.00 0.00 ]
+Key: VFIXUPIMMPDZrmbikz: [ 0.00 0.00 ]
+Key: VFIXUPIMMPDZrmi: [ 0.00 0.00 ]
+Key: VFIXUPIMMPDZrmik: [ 0.00 0.00 ]
+Key: VFIXUPIMMPDZrmikz: [ 0.00 0.00 ]
+Key: VFIXUPIMMPDZrri: [ 0.00 0.00 ]
+Key: VFIXUPIMMPDZrrib: [ 0.00 0.00 ]
+Key: VFIXUPIMMPDZrribk: [ 0.00 0.00 ]
+Key: VFIXUPIMMPDZrribkz: [ 0.00 0.00 ]
+Key: VFIXUPIMMPDZrrik: [ 0.00 0.00 ]
+Key: VFIXUPIMMPDZrrikz: [ 0.00 0.00 ]
+Key: VFIXUPIMMPSZ: [ 0.00 0.00 ]
+Key: VFIXUPIMMPSZrmbi: [ 0.00 0.00 ]
+Key: VFIXUPIMMPSZrmbik: [ 0.00 0.00 ]
+Key: VFIXUPIMMPSZrmbikz: [ 0.00 0.00 ]
+Key: VFIXUPIMMPSZrmi: [ 0.00 0.00 ]
+Key: VFIXUPIMMPSZrmik: [ 0.00 0.00 ]
+Key: VFIXUPIMMPSZrmikz: [ 0.00 0.00 ]
+Key: VFIXUPIMMPSZrri: [ 0.00 0.00 ]
+Key: VFIXUPIMMPSZrrib: [ 0.00 0.00 ]
+Key: VFIXUPIMMPSZrribk: [ 0.00 0.00 ]
+Key: VFIXUPIMMPSZrribkz: [ 0.00 0.00 ]
+Key: VFIXUPIMMPSZrrik: [ 0.00 0.00 ]
+Key: VFIXUPIMMPSZrrikz: [ 0.00 0.00 ]
+Key: VFIXUPIMMSDZrmi: [ 0.00 0.00 ]
+Key: VFIXUPIMMSDZrmik: [ 0.00 0.00 ]
+Key: VFIXUPIMMSDZrmikz: [ 0.00 0.00 ]
+Key: VFIXUPIMMSDZrri: [ 0.00 0.00 ]
+Key: VFIXUPIMMSDZrrib: [ 0.00 0.00 ]
+Key: VFIXUPIMMSDZrribk: [ 0.00 0.00 ]
+Key: VFIXUPIMMSDZrribkz: [ 0.00 0.00 ]
+Key: VFIXUPIMMSDZrrik: [ 0.00 0.00 ]
+Key: VFIXUPIMMSDZrrikz: [ 0.00 0.00 ]
+Key: VFIXUPIMMSSZrmi: [ 0.00 0.00 ]
+Key: VFIXUPIMMSSZrmik: [ 0.00 0.00 ]
+Key: VFIXUPIMMSSZrmikz: [ 0.00 0.00 ]
+Key: VFIXUPIMMSSZrri: [ 0.00 0.00 ]
+Key: VFIXUPIMMSSZrrib: [ 0.00 0.00 ]
+Key: VFIXUPIMMSSZrribk: [ 0.00 0.00 ]
+Key: VFIXUPIMMSSZrribkz: [ 0.00 0.00 ]
+Key: VFIXUPIMMSSZrrik: [ 0.00 0.00 ]
+Key: VFIXUPIMMSSZrrikz: [ 0.00 0.00 ]
+Key: VFMADD: [ 0.00 0.00 ]
+Key: VFMADDCPHZ: [ 0.00 0.00 ]
+Key: VFMADDCPHZm: [ 0.00 0.00 ]
+Key: VFMADDCPHZmb: [ 0.00 0.00 ]
+Key: VFMADDCPHZmbk: [ 0.00 0.00 ]
+Key: VFMADDCPHZmbkz: [ 0.00 0.00 ]
+Key: VFMADDCPHZmk: [ 0.00 0.00 ]
+Key: VFMADDCPHZmkz: [ 0.00 0.00 ]
+Key: VFMADDCPHZr: [ 0.00 0.00 ]
+Key: VFMADDCPHZrb: [ 0.00 0.00 ]
+Key: VFMADDCPHZrbk: [ 0.00 0.00 ]
+Key: VFMADDCPHZrbkz: [ 0.00 0.00 ]
+Key: VFMADDCPHZrk: [ 0.00 0.00 ]
+Key: VFMADDCPHZrkz: [ 0.00 0.00 ]
+Key: VFMADDCSHZm: [ 0.00 0.00 ]
+Key: VFMADDCSHZmk: [ 0.00 0.00 ]
+Key: VFMADDCSHZmkz: [ 0.00 0.00 ]
+Key: VFMADDCSHZr: [ 0.00 0.00 ]
+Key: VFMADDCSHZrb: [ 0.00 0.00 ]
+Key: VFMADDCSHZrbk: [ 0.00 0.00 ]
+Key: VFMADDCSHZrbkz: [ 0.00 0.00 ]
+Key: VFMADDCSHZrk: [ 0.00 0.00 ]
+Key: VFMADDCSHZrkz: [ 0.00 0.00 ]
+Key: VFMADDPD: [ 0.00 0.00 ]
+Key: VFMADDPS: [ 0.00 0.00 ]
+Key: VFMADDSD: [ 0.00 0.00 ]
+Key: VFMADDSS: [ 0.00 0.00 ]
+Key: VFMADDSUB: [ 0.00 0.00 ]
+Key: VFMADDSUBPD: [ 0.00 0.00 ]
+Key: VFMADDSUBPS: [ 0.00 0.00 ]
+Key: VFMSUB: [ 0.00 0.00 ]
+Key: VFMSUBADD: [ 0.00 0.00 ]
+Key: VFMSUBADDPD: [ 0.00 0.00 ]
+Key: VFMSUBADDPS: [ 0.00 0.00 ]
+Key: VFMSUBPD: [ 0.00 0.00 ]
+Key: VFMSUBPS: [ 0.00 0.00 ]
+Key: VFMSUBSD: [ 0.00 0.00 ]
+Key: VFMSUBSS: [ 0.00 0.00 ]
+Key: VFMULCPHZ: [ 0.00 0.00 ]
+Key: VFMULCPHZrm: [ 0.00 0.00 ]
+Key: VFMULCPHZrmb: [ 0.00 0.00 ]
+Key: VFMULCPHZrmbk: [ 0.00 0.00 ]
+Key: VFMULCPHZrmbkz: [ 0.00 0.00 ]
+Key: VFMULCPHZrmk: [ 0.00 0.00 ]
+Key: VFMULCPHZrmkz: [ 0.00 0.00 ]
+Key: VFMULCPHZrr: [ 0.00 0.00 ]
+Key: VFMULCPHZrrb: [ 0.00 0.00 ]
+Key: VFMULCPHZrrbk: [ 0.00 0.00 ]
+Key: VFMULCPHZrrbkz: [ 0.00 0.00 ]
+Key: VFMULCPHZrrk: [ 0.00 0.00 ]
+Key: VFMULCPHZrrkz: [ 0.00 0.00 ]
+Key: VFMULCSHZrm: [ 0.00 0.00 ]
+Key: VFMULCSHZrmk: [ 0.00 0.00 ]
+Key: VFMULCSHZrmkz: [ 0.00 0.00 ]
+Key: VFMULCSHZrr: [ 0.00 0.00 ]
+Key: VFMULCSHZrrb: [ 0.00 0.00 ]
+Key: VFMULCSHZrrbk: [ 0.00 0.00 ]
+Key: VFMULCSHZrrbkz: [ 0.00 0.00 ]
+Key: VFMULCSHZrrk: [ 0.00 0.00 ]
+Key: VFMULCSHZrrkz: [ 0.00 0.00 ]
+Key: VFNMADD: [ 0.00 0.00 ]
+Key: VFNMADDPD: [ 0.00 0.00 ]
+Key: VFNMADDPS: [ 0.00 0.00 ]
+Key: VFNMADDSD: [ 0.00 0.00 ]
+Key: VFNMADDSS: [ 0.00 0.00 ]
+Key: VFNMSUB: [ 0.00 0.00 ]
+Key: VFNMSUBPD: [ 0.00 0.00 ]
+Key: VFNMSUBPS: [ 0.00 0.00 ]
+Key: VFNMSUBSD: [ 0.00 0.00 ]
+Key: VFNMSUBSS: [ 0.00 0.00 ]
+Key: VFPCLASSBF: [ 0.00 0.00 ]
+Key: VFPCLASSPDZ: [ 0.00 0.00 ]
+Key: VFPCLASSPDZmbi: [ 0.00 0.00 ]
+Key: VFPCLASSPDZmbik: [ 0.00 0.00 ]
+Key: VFPCLASSPDZmi: [ 0.00 0.00 ]
+Key: VFPCLASSPDZmik: [ 0.00 0.00 ]
+Key: VFPCLASSPDZri: [ 0.00 0.00 ]
+Key: VFPCLASSPDZrik: [ 0.00 0.00 ]
+Key: VFPCLASSPHZ: [ 0.00 0.00 ]
+Key: VFPCLASSPHZmbi: [ 0.00 0.00 ]
+Key: VFPCLASSPHZmbik: [ 0.00 0.00 ]
+Key: VFPCLASSPHZmi: [ 0.00 0.00 ]
+Key: VFPCLASSPHZmik: [ 0.00 0.00 ]
+Key: VFPCLASSPHZri: [ 0.00 0.00 ]
+Key: VFPCLASSPHZrik: [ 0.00 0.00 ]
+Key: VFPCLASSPSZ: [ 0.00 0.00 ]
+Key: VFPCLASSPSZmbi: [ 0.00 0.00 ]
+Key: VFPCLASSPSZmbik: [ 0.00 0.00 ]
+Key: VFPCLASSPSZmi: [ 0.00 0.00 ]
+Key: VFPCLASSPSZmik: [ 0.00 0.00 ]
+Key: VFPCLASSPSZri: [ 0.00 0.00 ]
+Key: VFPCLASSPSZrik: [ 0.00 0.00 ]
+Key: VFPCLASSSDZmi: [ 0.00 0.00 ]
+Key: VFPCLASSSDZmik: [ 0.00 0.00 ]
+Key: VFPCLASSSDZri: [ 0.00 0.00 ]
+Key: VFPCLASSSDZrik: [ 0.00 0.00 ]
+Key: VFPCLASSSHZmi: [ 0.00 0.00 ]
+Key: VFPCLASSSHZmik: [ 0.00 0.00 ]
+Key: VFPCLASSSHZri: [ 0.00 0.00 ]
+Key: VFPCLASSSHZrik: [ 0.00 0.00 ]
+Key: VFPCLASSSSZmi: [ 0.00 0.00 ]
+Key: VFPCLASSSSZmik: [ 0.00 0.00 ]
+Key: VFPCLASSSSZri: [ 0.00 0.00 ]
+Key: VFPCLASSSSZrik: [ 0.00 0.00 ]
+Key: VFRCZPDYrm: [ 0.00 0.00 ]
+Key: VFRCZPDYrr: [ 0.00 0.00 ]
+Key: VFRCZPDrm: [ 0.00 0.00 ]
+Key: VFRCZPDrr: [ 0.00 0.00 ]
+Key: VFRCZPSYrm: [ 0.00 0.00 ]
+Key: VFRCZPSYrr: [ 0.00 0.00 ]
+Key: VFRCZPSrm: [ 0.00 0.00 ]
+Key: VFRCZPSrr: [ 0.00 0.00 ]
+Key: VFRCZSDrm: [ 0.00 0.00 ]
+Key: VFRCZSDrr: [ 0.00 0.00 ]
+Key: VFRCZSSrm: [ 0.00 0.00 ]
+Key: VFRCZSSrr: [ 0.00 0.00 ]
+Key: VGATHERDPDYrm: [ 0.00 0.00 ]
+Key: VGATHERDPDZ: [ 0.00 0.00 ]
+Key: VGATHERDPDZrm: [ 0.00 0.00 ]
+Key: VGATHERDPDrm: [ 0.00 0.00 ]
+Key: VGATHERDPSYrm: [ 0.00 0.00 ]
+Key: VGATHERDPSZ: [ 0.00 0.00 ]
+Key: VGATHERDPSZrm: [ 0.00 0.00 ]
+Key: VGATHERDPSrm: [ 0.00 0.00 ]
+Key: VGATHERPF: [ 0.00 0.00 ]
+Key: VGATHERQPDYrm: [ 0.00 0.00 ]
+Key: VGATHERQPDZ: [ 0.00 0.00 ]
+Key: VGATHERQPDZrm: [ 0.00 0.00 ]
+Key: VGATHERQPDrm: [ 0.00 0.00 ]
+Key: VGATHERQPSYrm: [ 0.00 0.00 ]
+Key: VGATHERQPSZ: [ 0.00 0.00 ]
+Key: VGATHERQPSZrm: [ 0.00 0.00 ]
+Key: VGATHERQPSrm: [ 0.00 0.00 ]
+Key: VGETEXPBF: [ 0.00 0.00 ]
+Key: VGETEXPPDZ: [ 0.00 0.00 ]
+Key: VGETEXPPDZm: [ 0.00 0.00 ]
+Key: VGETEXPPDZmb: [ 0.00 0.00 ]
+Key: VGETEXPPDZmbk: [ 0.00 0.00 ]
+Key: VGETEXPPDZmbkz: [ 0.00 0.00 ]
+Key: VGETEXPPDZmk: [ 0.00 0.00 ]
+Key: VGETEXPPDZmkz: [ 0.00 0.00 ]
+Key: VGETEXPPDZr: [ 0.00 0.00 ]
+Key: VGETEXPPDZrb: [ 0.00 0.00 ]
+Key: VGETEXPPDZrbk: [ 0.00 0.00 ]
+Key: VGETEXPPDZrbkz: [ 0.00 0.00 ]
+Key: VGETEXPPDZrk: [ 0.00 0.00 ]
+Key: VGETEXPPDZrkz: [ 0.00 0.00 ]
+Key: VGETEXPPHZ: [ 0.00 0.00 ]
+Key: VGETEXPPHZm: [ 0.00 0.00 ]
+Key: VGETEXPPHZmb: [ 0.00 0.00 ]
+Key: VGETEXPPHZmbk: [ 0.00 0.00 ]
+Key: VGETEXPPHZmbkz: [ 0.00 0.00 ]
+Key: VGETEXPPHZmk: [ 0.00 0.00 ]
+Key: VGETEXPPHZmkz: [ 0.00 0.00 ]
+Key: VGETEXPPHZr: [ 0.00 0.00 ]
+Key: VGETEXPPHZrb: [ 0.00 0.00 ]
+Key: VGETEXPPHZrbk: [ 0.00 0.00 ]
+Key: VGETEXPPHZrbkz: [ 0.00 0.00 ]
+Key: VGETEXPPHZrk: [ 0.00 0.00 ]
+Key: VGETEXPPHZrkz: [ 0.00 0.00 ]
+Key: VGETEXPPSZ: [ 0.00 0.00 ]
+Key: VGETEXPPSZm: [ 0.00 0.00 ]
+Key: VGETEXPPSZmb: [ 0.00 0.00 ]
+Key: VGETEXPPSZmbk: [ 0.00 0.00 ]
+Key: VGETEXPPSZmbkz: [ 0.00 0.00 ]
+Key: VGETEXPPSZmk: [ 0.00 0.00 ]
+Key: VGETEXPPSZmkz: [ 0.00 0.00 ]
+Key: VGETEXPPSZr: [ 0.00 0.00 ]
+Key: VGETEXPPSZrb: [ 0.00 0.00 ]
+Key: VGETEXPPSZrbk: [ 0.00 0.00 ]
+Key: VGETEXPPSZrbkz: [ 0.00 0.00 ]
+Key: VGETEXPPSZrk: [ 0.00 0.00 ]
+Key: VGETEXPPSZrkz: [ 0.00 0.00 ]
+Key: VGETEXPSDZm: [ 0.00 0.00 ]
+Key: VGETEXPSDZmk: [ 0.00 0.00 ]
+Key: VGETEXPSDZmkz: [ 0.00 0.00 ]
+Key: VGETEXPSDZr: [ 0.00 0.00 ]
+Key: VGETEXPSDZrb: [ 0.00 0.00 ]
+Key: VGETEXPSDZrbk: [ 0.00 0.00 ]
+Key: VGETEXPSDZrbkz: [ 0.00 0.00 ]
+Key: VGETEXPSDZrk: [ 0.00 0.00 ]
+Key: VGETEXPSDZrkz: [ 0.00 0.00 ]
+Key: VGETEXPSHZm: [ 0.00 0.00 ]
+Key: VGETEXPSHZmk: [ 0.00 0.00 ]
+Key: VGETEXPSHZmkz: [ 0.00 0.00 ]
+Key: VGETEXPSHZr: [ 0.00 0.00 ]
+Key: VGETEXPSHZrb: [ 0.00 0.00 ]
+Key: VGETEXPSHZrbk: [ 0.00 0.00 ]
+Key: VGETEXPSHZrbkz: [ 0.00 0.00 ]
+Key: VGETEXPSHZrk: [ 0.00 0.00 ]
+Key: VGETEXPSHZrkz: [ 0.00 0.00 ]
+Key: VGETEXPSSZm: [ 0.00 0.00 ]
+Key: VGETEXPSSZmk: [ 0.00 0.00 ]
+Key: VGETEXPSSZmkz: [ 0.00 0.00 ]
+Key: VGETEXPSSZr: [ 0.00 0.00 ]
+Key: VGETEXPSSZrb: [ 0.00 0.00 ]
+Key: VGETEXPSSZrbk: [ 0.00 0.00 ]
+Key: VGETEXPSSZrbkz: [ 0.00 0.00 ]
+Key: VGETEXPSSZrk: [ 0.00 0.00 ]
+Key: VGETEXPSSZrkz: [ 0.00 0.00 ]
+Key: VGETMANTBF: [ 0.00 0.00 ]
+Key: VGETMANTPDZ: [ 0.00 0.00 ]
+Key: VGETMANTPDZrmbi: [ 0.00 0.00 ]
+Key: VGETMANTPDZrmbik: [ 0.00 0.00 ]
+Key: VGETMANTPDZrmbikz: [ 0.00 0.00 ]
+Key: VGETMANTPDZrmi: [ 0.00 0.00 ]
+Key: VGETMANTPDZrmik: [ 0.00 0.00 ]
+Key: VGETMANTPDZrmikz: [ 0.00 0.00 ]
+Key: VGETMANTPDZrri: [ 0.00 0.00 ]
+Key: VGETMANTPDZrrib: [ 0.00 0.00 ]
+Key: VGETMANTPDZrribk: [ 0.00 0.00 ]
+Key: VGETMANTPDZrribkz: [ 0.00 0.00 ]
+Key: VGETMANTPDZrrik: [ 0.00 0.00 ]
+Key: VGETMANTPDZrrikz: [ 0.00 0.00 ]
+Key: VGETMANTPHZ: [ 0.00 0.00 ]
+Key: VGETMANTPHZrmbi: [ 0.00 0.00 ]
+Key: VGETMANTPHZrmbik: [ 0.00 0.00 ]
+Key: VGETMANTPHZrmbikz: [ 0.00 0.00 ]
+Key: VGETMANTPHZrmi: [ 0.00 0.00 ]
+Key: VGETMANTPHZrmik: [ 0.00 0.00 ]
+Key: VGETMANTPHZrmikz: [ 0.00 0.00 ]
+Key: VGETMANTPHZrri: [ 0.00 0.00 ]
+Key: VGETMANTPHZrrib: [ 0.00 0.00 ]
+Key: VGETMANTPHZrribk: [ 0.00 0.00 ]
+Key: VGETMANTPHZrribkz: [ 0.00 0.00 ]
+Key: VGETMANTPHZrrik: [ 0.00 0.00 ]
+Key: VGETMANTPHZrrikz: [ 0.00 0.00 ]
+Key: VGETMANTPSZ: [ 0.00 0.00 ]
+Key: VGETMANTPSZrmbi: [ 0.00 0.00 ]
+Key: VGETMANTPSZrmbik: [ 0.00 0.00 ]
+Key: VGETMANTPSZrmbikz: [ 0.00 0.00 ]
+Key: VGETMANTPSZrmi: [ 0.00 0.00 ]
+Key: VGETMANTPSZrmik: [ 0.00 0.00 ]
+Key: VGETMANTPSZrmikz: [ 0.00 0.00 ]
+Key: VGETMANTPSZrri: [ 0.00 0.00 ]
+Key: VGETMANTPSZrrib: [ 0.00 0.00 ]
+Key: VGETMANTPSZrribk: [ 0.00 0.00 ]
+Key: VGETMANTPSZrribkz: [ 0.00 0.00 ]
+Key: VGETMANTPSZrrik: [ 0.00 0.00 ]
+Key: VGETMANTPSZrrikz: [ 0.00 0.00 ]
+Key: VGETMANTSDZrmi: [ 0.00 0.00 ]
+Key: VGETMANTSDZrmik: [ 0.00 0.00 ]
+Key: VGETMANTSDZrmikz: [ 0.00 0.00 ]
+Key: VGETMANTSDZrri: [ 0.00 0.00 ]
+Key: VGETMANTSDZrrib: [ 0.00 0.00 ]
+Key: VGETMANTSDZrribk: [ 0.00 0.00 ]
+Key: VGETMANTSDZrribkz: [ 0.00 0.00 ]
+Key: VGETMANTSDZrrik: [ 0.00 0.00 ]
+Key: VGETMANTSDZrrikz: [ 0.00 0.00 ]
+Key: VGETMANTSHZrmi: [ 0.00 0.00 ]
+Key: VGETMANTSHZrmik: [ 0.00 0.00 ]
+Key: VGETMANTSHZrmikz: [ 0.00 0.00 ]
+Key: VGETMANTSHZrri: [ 0.00 0.00 ]
+Key: VGETMANTSHZrrib: [ 0.00 0.00 ]
+Key: VGETMANTSHZrribk: [ 0.00 0.00 ]
+Key: VGETMANTSHZrribkz: [ 0.00 0.00 ]
+Key: VGETMANTSHZrrik: [ 0.00 0.00 ]
+Key: VGETMANTSHZrrikz: [ 0.00 0.00 ]
+Key: VGETMANTSSZrmi: [ 0.00 0.00 ]
+Key: VGETMANTSSZrmik: [ 0.00 0.00 ]
+Key: VGETMANTSSZrmikz: [ 0.00 0.00 ]
+Key: VGETMANTSSZrri: [ 0.00 0.00 ]
+Key: VGETMANTSSZrrib: [ 0.00 0.00 ]
+Key: VGETMANTSSZrribk: [ 0.00 0.00 ]
+Key: VGETMANTSSZrribkz: [ 0.00 0.00 ]
+Key: VGETMANTSSZrrik: [ 0.00 0.00 ]
+Key: VGETMANTSSZrrikz: [ 0.00 0.00 ]
+Key: VGF: [ 0.00 0.00 ]
+Key: VHADDPDYrm: [ 0.00 0.00 ]
+Key: VHADDPDYrr: [ 0.00 0.00 ]
+Key: VHADDPDrm: [ 0.00 0.00 ]
+Key: VHADDPDrr: [ 0.00 0.00 ]
+Key: VHADDPSYrm: [ 0.00 0.00 ]
+Key: VHADDPSYrr: [ 0.00 0.00 ]
+Key: VHADDPSrm: [ 0.00 0.00 ]
+Key: VHADDPSrr: [ 0.00 0.00 ]
+Key: VHSUBPDYrm: [ 0.00 0.00 ]
+Key: VHSUBPDYrr: [ 0.00 0.00 ]
+Key: VHSUBPDrm: [ 0.00 0.00 ]
+Key: VHSUBPDrr: [ 0.00 0.00 ]
+Key: VHSUBPSYrm: [ 0.00 0.00 ]
+Key: VHSUBPSYrr: [ 0.00 0.00 ]
+Key: VHSUBPSrm: [ 0.00 0.00 ]
+Key: VHSUBPSrr: [ 0.00 0.00 ]
+Key: VINSERTF: [ 0.00 0.00 ]
+Key: VINSERTI: [ 0.00 0.00 ]
+Key: VINSERTPSZrmi: [ 0.00 0.00 ]
+Key: VINSERTPSZrri: [ 0.00 0.00 ]
+Key: VINSERTPSrmi: [ 0.00 0.00 ]
+Key: VINSERTPSrri: [ 0.00 0.00 ]
+Key: VLDDQUYrm: [ 0.00 0.00 ]
+Key: VLDDQUrm: [ 0.00 0.00 ]
+Key: VLDMXCSR: [ 0.00 0.00 ]
+Key: VMASKMOVDQU: [ 0.00 0.00 ]
+Key: VMASKMOVPDYmr: [ 0.00 0.00 ]
+Key: VMASKMOVPDYrm: [ 0.00 0.00 ]
+Key: VMASKMOVPDmr: [ 0.00 0.00 ]
+Key: VMASKMOVPDrm: [ 0.00 0.00 ]
+Key: VMASKMOVPSYmr: [ 0.00 0.00 ]
+Key: VMASKMOVPSYrm: [ 0.00 0.00 ]
+Key: VMASKMOVPSmr: [ 0.00 0.00 ]
+Key: VMASKMOVPSrm: [ 0.00 0.00 ]
+Key: VMAXBF: [ 0.00 0.00 ]
+Key: VMAXCPDYrm: [ 0.00 0.00 ]
+Key: VMAXCPDYrr: [ 0.00 0.00 ]
+Key: VMAXCPDZ: [ 0.00 0.00 ]
+Key: VMAXCPDZrm: [ 0.00 0.00 ]
+Key: VMAXCPDZrmb: [ 0.00 0.00 ]
+Key: VMAXCPDZrmbk: [ 0.00 0.00 ]
+Key: VMAXCPDZrmbkz: [ 0.00 0.00 ]
+Key: VMAXCPDZrmk: [ 0.00 0.00 ]
+Key: VMAXCPDZrmkz: [ 0.00 0.00 ]
+Key: VMAXCPDZrr: [ 0.00 0.00 ]
+Key: VMAXCPDZrrk: [ 0.00 0.00 ]
+Key: VMAXCPDZrrkz: [ 0.00 0.00 ]
+Key: VMAXCPDrm: [ 0.00 0.00 ]
+Key: VMAXCPDrr: [ 0.00 0.00 ]
+Key: VMAXCPHZ: [ 0.00 0.00 ]
+Key: VMAXCPHZrm: [ 0.00 0.00 ]
+Key: VMAXCPHZrmb: [ 0.00 0.00 ]
+Key: VMAXCPHZrmbk: [ 0.00 0.00 ]
+Key: VMAXCPHZrmbkz: [ 0.00 0.00 ]
+Key: VMAXCPHZrmk: [ 0.00 0.00 ]
+Key: VMAXCPHZrmkz: [ 0.00 0.00 ]
+Key: VMAXCPHZrr: [ 0.00 0.00 ]
+Key: VMAXCPHZrrk: [ 0.00 0.00 ]
+Key: VMAXCPHZrrkz: [ 0.00 0.00 ]
+Key: VMAXCPSYrm: [ 0.00 0.00 ]
+Key: VMAXCPSYrr: [ 0.00 0.00 ]
+Key: VMAXCPSZ: [ 0.00 0.00 ]
+Key: VMAXCPSZrm: [ 0.00 0.00 ]
+Key: VMAXCPSZrmb: [ 0.00 0.00 ]
+Key: VMAXCPSZrmbk: [ 0.00 0.00 ]
+Key: VMAXCPSZrmbkz: [ 0.00 0.00 ]
+Key: VMAXCPSZrmk: [ 0.00 0.00 ]
+Key: VMAXCPSZrmkz: [ 0.00 0.00 ]
+Key: VMAXCPSZrr: [ 0.00 0.00 ]
+Key: VMAXCPSZrrk: [ 0.00 0.00 ]
+Key: VMAXCPSZrrkz: [ 0.00 0.00 ]
+Key: VMAXCPSrm: [ 0.00 0.00 ]
+Key: VMAXCPSrr: [ 0.00 0.00 ]
+Key: VMAXCSDZrm: [ 0.00 0.00 ]
+Key: VMAXCSDZrr: [ 0.00 0.00 ]
+Key: VMAXCSDrm: [ 0.00 0.00 ]
+Key: VMAXCSDrr: [ 0.00 0.00 ]
+Key: VMAXCSHZrm: [ 0.00 0.00 ]
+Key: VMAXCSHZrr: [ 0.00 0.00 ]
+Key: VMAXCSSZrm: [ 0.00 0.00 ]
+Key: VMAXCSSZrr: [ 0.00 0.00 ]
+Key: VMAXCSSrm: [ 0.00 0.00 ]
+Key: VMAXCSSrr: [ 0.00 0.00 ]
+Key: VMAXPDYrm: [ 0.00 0.00 ]
+Key: VMAXPDYrr: [ 0.00 0.00 ]
+Key: VMAXPDZ: [ 0.00 0.00 ]
+Key: VMAXPDZrm: [ 0.00 0.00 ]
+Key: VMAXPDZrmb: [ 0.00 0.00 ]
+Key: VMAXPDZrmbk: [ 0.00 0.00 ]
+Key: VMAXPDZrmbkz: [ 0.00 0.00 ]
+Key: VMAXPDZrmk: [ 0.00 0.00 ]
+Key: VMAXPDZrmkz: [ 0.00 0.00 ]
+Key: VMAXPDZrr: [ 0.00 0.00 ]
+Key: VMAXPDZrrb: [ 0.00 0.00 ]
+Key: VMAXPDZrrbk: [ 0.00 0.00 ]
+Key: VMAXPDZrrbkz: [ 0.00 0.00 ]
+Key: VMAXPDZrrk: [ 0.00 0.00 ]
+Key: VMAXPDZrrkz: [ 0.00 0.00 ]
+Key: VMAXPDrm: [ 0.00 0.00 ]
+Key: VMAXPDrr: [ 0.00 0.00 ]
+Key: VMAXPHZ: [ 0.00 0.00 ]
+Key: VMAXPHZrm: [ 0.00 0.00 ]
+Key: VMAXPHZrmb: [ 0.00 0.00 ]
+Key: VMAXPHZrmbk: [ 0.00 0.00 ]
+Key: VMAXPHZrmbkz: [ 0.00 0.00 ]
+Key: VMAXPHZrmk: [ 0.00 0.00 ]
+Key: VMAXPHZrmkz: [ 0.00 0.00 ]
+Key: VMAXPHZrr: [ 0.00 0.00 ]
+Key: VMAXPHZrrb: [ 0.00 0.00 ]
+Key: VMAXPHZrrbk: [ 0.00 0.00 ]
+Key: VMAXPHZrrbkz: [ 0.00 0.00 ]
+Key: VMAXPHZrrk: [ 0.00 0.00 ]
+Key: VMAXPHZrrkz: [ 0.00 0.00 ]
+Key: VMAXPSYrm: [ 0.00 0.00 ]
+Key: VMAXPSYrr: [ 0.00 0.00 ]
+Key: VMAXPSZ: [ 0.00 0.00 ]
+Key: VMAXPSZrm: [ 0.00 0.00 ]
+Key: VMAXPSZrmb: [ 0.00 0.00 ]
+Key: VMAXPSZrmbk: [ 0.00 0.00 ]
+Key: VMAXPSZrmbkz: [ 0.00 0.00 ]
+Key: VMAXPSZrmk: [ 0.00 0.00 ]
+Key: VMAXPSZrmkz: [ 0.00 0.00 ]
+Key: VMAXPSZrr: [ 0.00 0.00 ]
+Key: VMAXPSZrrb: [ 0.00 0.00 ]
+Key: VMAXPSZrrbk: [ 0.00 0.00 ]
+Key: VMAXPSZrrbkz: [ 0.00 0.00 ]
+Key: VMAXPSZrrk: [ 0.00 0.00 ]
+Key: VMAXPSZrrkz: [ 0.00 0.00 ]
+Key: VMAXPSrm: [ 0.00 0.00 ]
+Key: VMAXPSrr: [ 0.00 0.00 ]
+Key: VMAXSDZrm: [ 0.00 0.00 ]
+Key: VMAXSDZrm_Int: [ 0.00 0.00 ]
+Key: VMAXSDZrmk_Int: [ 0.00 0.00 ]
+Key: VMAXSDZrmkz_Int: [ 0.00 0.00 ]
+Key: VMAXSDZrr: [ 0.00 0.00 ]
+Key: VMAXSDZrr_Int: [ 0.00 0.00 ]
+Key: VMAXSDZrrb_Int: [ 0.00 0.00 ]
+Key: VMAXSDZrrbk_Int: [ 0.00 0.00 ]
+Key: VMAXSDZrrbkz_Int: [ 0.00 0.00 ]
+Key: VMAXSDZrrk_Int: [ 0.00 0.00 ]
+Key: VMAXSDZrrkz_Int: [ 0.00 0.00 ]
+Key: VMAXSDrm: [ 0.00 0.00 ]
+Key: VMAXSDrm_Int: [ 0.00 0.00 ]
+Key: VMAXSDrr: [ 0.00 0.00 ]
+Key: VMAXSDrr_Int: [ 0.00 0.00 ]
+Key: VMAXSHZrm: [ 0.00 0.00 ]
+Key: VMAXSHZrm_Int: [ 0.00 0.00 ]
+Key: VMAXSHZrmk_Int: [ 0.00 0.00 ]
+Key: VMAXSHZrmkz_Int: [ 0.00 0.00 ]
+Key: VMAXSHZrr: [ 0.00 0.00 ]
+Key: VMAXSHZrr_Int: [ 0.00 0.00 ]
+Key: VMAXSHZrrb_Int: [ 0.00 0.00 ]
+Key: VMAXSHZrrbk_Int: [ 0.00 0.00 ]
+Key: VMAXSHZrrbkz_Int: [ 0.00 0.00 ]
+Key: VMAXSHZrrk_Int: [ 0.00 0.00 ]
+Key: VMAXSHZrrkz_Int: [ 0.00 0.00 ]
+Key: VMAXSSZrm: [ 0.00 0.00 ]
+Key: VMAXSSZrm_Int: [ 0.00 0.00 ]
+Key: VMAXSSZrmk_Int: [ 0.00 0.00 ]
+Key: VMAXSSZrmkz_Int: [ 0.00 0.00 ]
+Key: VMAXSSZrr: [ 0.00 0.00 ]
+Key: VMAXSSZrr_Int: [ 0.00 0.00 ]
+Key: VMAXSSZrrb_Int: [ 0.00 0.00 ]
+Key: VMAXSSZrrbk_Int: [ 0.00 0.00 ]
+Key: VMAXSSZrrbkz_Int: [ 0.00 0.00 ]
+Key: VMAXSSZrrk_Int: [ 0.00 0.00 ]
+Key: VMAXSSZrrkz_Int: [ 0.00 0.00 ]
+Key: VMAXSSrm: [ 0.00 0.00 ]
+Key: VMAXSSrm_Int: [ 0.00 0.00 ]
+Key: VMAXSSrr: [ 0.00 0.00 ]
+Key: VMAXSSrr_Int: [ 0.00 0.00 ]
+Key: VMCALL: [ 0.00 0.00 ]
+Key: VMCLEARm: [ 0.00 0.00 ]
+Key: VMFUNC: [ 0.00 0.00 ]
+Key: VMINBF: [ 0.00 0.00 ]
+Key: VMINCPDYrm: [ 0.00 0.00 ]
+Key: VMINCPDYrr: [ 0.00 0.00 ]
+Key: VMINCPDZ: [ 0.00 0.00 ]
+Key: VMINCPDZrm: [ 0.00 0.00 ]
+Key: VMINCPDZrmb: [ 0.00 0.00 ]
+Key: VMINCPDZrmbk: [ 0.00 0.00 ]
+Key: VMINCPDZrmbkz: [ 0.00 0.00 ]
+Key: VMINCPDZrmk: [ 0.00 0.00 ]
+Key: VMINCPDZrmkz: [ 0.00 0.00 ]
+Key: VMINCPDZrr: [ 0.00 0.00 ]
+Key: VMINCPDZrrk: [ 0.00 0.00 ]
+Key: VMINCPDZrrkz: [ 0.00 0.00 ]
+Key: VMINCPDrm: [ 0.00 0.00 ]
+Key: VMINCPDrr: [ 0.00 0.00 ]
+Key: VMINCPHZ: [ 0.00 0.00 ]
+Key: VMINCPHZrm: [ 0.00 0.00 ]
+Key: VMINCPHZrmb: [ 0.00 0.00 ]
+Key: VMINCPHZrmbk: [ 0.00 0.00 ]
+Key: VMINCPHZrmbkz: [ 0.00 0.00 ]
+Key: VMINCPHZrmk: [ 0.00 0.00 ]
+Key: VMINCPHZrmkz: [ 0.00 0.00 ]
+Key: VMINCPHZrr: [ 0.00 0.00 ]
+Key: VMINCPHZrrk: [ 0.00 0.00 ]
+Key: VMINCPHZrrkz: [ 0.00 0.00 ]
+Key: VMINCPSYrm: [ 0.00 0.00 ]
+Key: VMINCPSYrr: [ 0.00 0.00 ]
+Key: VMINCPSZ: [ 0.00 0.00 ]
+Key: VMINCPSZrm: [ 0.00 0.00 ]
+Key: VMINCPSZrmb: [ 0.00 0.00 ]
+Key: VMINCPSZrmbk: [ 0.00 0.00 ]
+Key: VMINCPSZrmbkz: [ 0.00 0.00 ]
+Key: VMINCPSZrmk: [ 0.00 0.00 ]
+Key: VMINCPSZrmkz: [ 0.00 0.00 ]
+Key: VMINCPSZrr: [ 0.00 0.00 ]
+Key: VMINCPSZrrk: [ 0.00 0.00 ]
+Key: VMINCPSZrrkz: [ 0.00 0.00 ]
+Key: VMINCPSrm: [ 0.00 0.00 ]
+Key: VMINCPSrr: [ 0.00 0.00 ]
+Key: VMINCSDZrm: [ 0.00 0.00 ]
+Key: VMINCSDZrr: [ 0.00 0.00 ]
+Key: VMINCSDrm: [ 0.00 0.00 ]
+Key: VMINCSDrr: [ 0.00 0.00 ]
+Key: VMINCSHZrm: [ 0.00 0.00 ]
+Key: VMINCSHZrr: [ 0.00 0.00 ]
+Key: VMINCSSZrm: [ 0.00 0.00 ]
+Key: VMINCSSZrr: [ 0.00 0.00 ]
+Key: VMINCSSrm: [ 0.00 0.00 ]
+Key: VMINCSSrr: [ 0.00 0.00 ]
+Key: VMINMAXBF: [ 0.00 0.00 ]
+Key: VMINMAXPDZ: [ 0.00 0.00 ]
+Key: VMINMAXPDZrmbi: [ 0.00 0.00 ]
+Key: VMINMAXPDZrmbik: [ 0.00 0.00 ]
+Key: VMINMAXPDZrmbikz: [ 0.00 0.00 ]
+Key: VMINMAXPDZrmi: [ 0.00 0.00 ]
+Key: VMINMAXPDZrmik: [ 0.00 0.00 ]
+Key: VMINMAXPDZrmikz: [ 0.00 0.00 ]
+Key: VMINMAXPDZrri: [ 0.00 0.00 ]
+Key: VMINMAXPDZrrib: [ 0.00 0.00 ]
+Key: VMINMAXPDZrribk: [ 0.00 0.00 ]
+Key: VMINMAXPDZrribkz: [ 0.00 0.00 ]
+Key: VMINMAXPDZrrik: [ 0.00 0.00 ]
+Key: VMINMAXPDZrrikz: [ 0.00 0.00 ]
+Key: VMINMAXPHZ: [ 0.00 0.00 ]
+Key: VMINMAXPHZrmbi: [ 0.00 0.00 ]
+Key: VMINMAXPHZrmbik: [ 0.00 0.00 ]
+Key: VMINMAXPHZrmbikz: [ 0.00 0.00 ]
+Key: VMINMAXPHZrmi: [ 0.00 0.00 ]
+Key: VMINMAXPHZrmik: [ 0.00 0.00 ]
+Key: VMINMAXPHZrmikz: [ 0.00 0.00 ]
+Key: VMINMAXPHZrri: [ 0.00 0.00 ]
+Key: VMINMAXPHZrrib: [ 0.00 0.00 ]
+Key: VMINMAXPHZrribk: [ 0.00 0.00 ]
+Key: VMINMAXPHZrribkz: [ 0.00 0.00 ]
+Key: VMINMAXPHZrrik: [ 0.00 0.00 ]
+Key: VMINMAXPHZrrikz: [ 0.00 0.00 ]
+Key: VMINMAXPSZ: [ 0.00 0.00 ]
+Key: VMINMAXPSZrmbi: [ 0.00 0.00 ]
+Key: VMINMAXPSZrmbik: [ 0.00 0.00 ]
+Key: VMINMAXPSZrmbikz: [ 0.00 0.00 ]
+Key: VMINMAXPSZrmi: [ 0.00 0.00 ]
+Key: VMINMAXPSZrmik: [ 0.00 0.00 ]
+Key: VMINMAXPSZrmikz: [ 0.00 0.00 ]
+Key: VMINMAXPSZrri: [ 0.00 0.00 ]
+Key: VMINMAXPSZrrib: [ 0.00 0.00 ]
+Key: VMINMAXPSZrribk: [ 0.00 0.00 ]
+Key: VMINMAXPSZrribkz: [ 0.00 0.00 ]
+Key: VMINMAXPSZrrik: [ 0.00 0.00 ]
+Key: VMINMAXPSZrrikz: [ 0.00 0.00 ]
+Key: VMINMAXSDrmi: [ 0.00 0.00 ]
+Key: VMINMAXSDrmi_Int: [ 0.00 0.00 ]
+Key: VMINMAXSDrmik_Int: [ 0.00 0.00 ]
+Key: VMINMAXSDrmikz_Int: [ 0.00 0.00 ]
+Key: VMINMAXSDrri: [ 0.00 0.00 ]
+Key: VMINMAXSDrri_Int: [ 0.00 0.00 ]
+Key: VMINMAXSDrrib_Int: [ 0.00 0.00 ]
+Key: VMINMAXSDrribk_Int: [ 0.00 0.00 ]
+Key: VMINMAXSDrribkz_Int: [ 0.00 0.00 ]
+Key: VMINMAXSDrrik_Int: [ 0.00 0.00 ]
+Key: VMINMAXSDrrikz_Int: [ 0.00 0.00 ]
+Key: VMINMAXSHrmi: [ 0.00 0.00 ]
+Key: VMINMAXSHrmi_Int: [ 0.00 0.00 ]
+Key: VMINMAXSHrmik_Int: [ 0.00 0.00 ]
+Key: VMINMAXSHrmikz_Int: [ 0.00 0.00 ]
+Key: VMINMAXSHrri: [ 0.00 0.00 ]
+Key: VMINMAXSHrri_Int: [ 0.00 0.00 ]
+Key: VMINMAXSHrrib_Int: [ 0.00 0.00 ]
+Key: VMINMAXSHrribk_Int: [ 0.00 0.00 ]
+Key: VMINMAXSHrribkz_Int: [ 0.00 0.00 ]
+Key: VMINMAXSHrrik_Int: [ 0.00 0.00 ]
+Key: VMINMAXSHrrikz_Int: [ 0.00 0.00 ]
+Key: VMINMAXSSrmi: [ 0.00 0.00 ]
+Key: VMINMAXSSrmi_Int: [ 0.00 0.00 ]
+Key: VMINMAXSSrmik_Int: [ 0.00 0.00 ]
+Key: VMINMAXSSrmikz_Int: [ 0.00 0.00 ]
+Key: VMINMAXSSrri: [ 0.00 0.00 ]
+Key: VMINMAXSSrri_Int: [ 0.00 0.00 ]
+Key: VMINMAXSSrrib_Int: [ 0.00 0.00 ]
+Key: VMINMAXSSrribk_Int: [ 0.00 0.00 ]
+Key: VMINMAXSSrribkz_Int: [ 0.00 0.00 ]
+Key: VMINMAXSSrrik_Int: [ 0.00 0.00 ]
+Key: VMINMAXSSrrikz_Int: [ 0.00 0.00 ]
+Key: VMINPDYrm: [ 0.00 0.00 ]
+Key: VMINPDYrr: [ 0.00 0.00 ]
+Key: VMINPDZ: [ 0.00 0.00 ]
+Key: VMINPDZrm: [ 0.00 0.00 ]
+Key: VMINPDZrmb: [ 0.00 0.00 ]
+Key: VMINPDZrmbk: [ 0.00 0.00 ]
+Key: VMINPDZrmbkz: [ 0.00 0.00 ]
+Key: VMINPDZrmk: [ 0.00 0.00 ]
+Key: VMINPDZrmkz: [ 0.00 0.00 ]
+Key: VMINPDZrr: [ 0.00 0.00 ]
+Key: VMINPDZrrb: [ 0.00 0.00 ]
+Key: VMINPDZrrbk: [ 0.00 0.00 ]
+Key: VMINPDZrrbkz: [ 0.00 0.00 ]
+Key: VMINPDZrrk: [ 0.00 0.00 ]
+Key: VMINPDZrrkz: [ 0.00 0.00 ]
+Key: VMINPDrm: [ 0.00 0.00 ]
+Key: VMINPDrr: [ 0.00 0.00 ]
+Key: VMINPHZ: [ 0.00 0.00 ]
+Key: VMINPHZrm: [ 0.00 0.00 ]
+Key: VMINPHZrmb: [ 0.00 0.00 ]
+Key: VMINPHZrmbk: [ 0.00 0.00 ]
+Key: VMINPHZrmbkz: [ 0.00 0.00 ]
+Key: VMINPHZrmk: [ 0.00 0.00 ]
+Key: VMINPHZrmkz: [ 0.00 0.00 ]
+Key: VMINPHZrr: [ 0.00 0.00 ]
+Key: VMINPHZrrb: [ 0.00 0.00 ]
+Key: VMINPHZrrbk: [ 0.00 0.00 ]
+Key: VMINPHZrrbkz: [ 0.00 0.00 ]
+Key: VMINPHZrrk: [ 0.00 0.00 ]
+Key: VMINPHZrrkz: [ 0.00 0.00 ]
+Key: VMINPSYrm: [ 0.00 0.00 ]
+Key: VMINPSYrr: [ 0.00 0.00 ]
+Key: VMINPSZ: [ 0.00 0.00 ]
+Key: VMINPSZrm: [ 0.00 0.00 ]
+Key: VMINPSZrmb: [ 0.00 0.00 ]
+Key: VMINPSZrmbk: [ 0.00 0.00 ]
+Key: VMINPSZrmbkz: [ 0.00 0.00 ]
+Key: VMINPSZrmk: [ 0.00 0.00 ]
+Key: VMINPSZrmkz: [ 0.00 0.00 ]
+Key: VMINPSZrr: [ 0.00 0.00 ]
+Key: VMINPSZrrb: [ 0.00 0.00 ]
+Key: VMINPSZrrbk: [ 0.00 0.00 ]
+Key: VMINPSZrrbkz: [ 0.00 0.00 ]
+Key: VMINPSZrrk: [ 0.00 0.00 ]
+Key: VMINPSZrrkz: [ 0.00 0.00 ]
+Key: VMINPSrm: [ 0.00 0.00 ]
+Key: VMINPSrr: [ 0.00 0.00 ]
+Key: VMINSDZrm: [ 0.00 0.00 ]
+Key: VMINSDZrm_Int: [ 0.00 0.00 ]
+Key: VMINSDZrmk_Int: [ 0.00 0.00 ]
+Key: VMINSDZrmkz_Int: [ 0.00 0.00 ]
+Key: VMINSDZrr: [ 0.00 0.00 ]
+Key: VMINSDZrr_Int: [ 0.00 0.00 ]
+Key: VMINSDZrrb_Int: [ 0.00 0.00 ]
+Key: VMINSDZrrbk_Int: [ 0.00 0.00 ]
+Key: VMINSDZrrbkz_Int: [ 0.00 0.00 ]
+Key: VMINSDZrrk_Int: [ 0.00 0.00 ]
+Key: VMINSDZrrkz_Int: [ 0.00 0.00 ]
+Key: VMINSDrm: [ 0.00 0.00 ]
+Key: VMINSDrm_Int: [ 0.00 0.00 ]
+Key: VMINSDrr: [ 0.00 0.00 ]
+Key: VMINSDrr_Int: [ 0.00 0.00 ]
+Key: VMINSHZrm: [ 0.00 0.00 ]
+Key: VMINSHZrm_Int: [ 0.00 0.00 ]
+Key: VMINSHZrmk_Int: [ 0.00 0.00 ]
+Key: VMINSHZrmkz_Int: [ 0.00 0.00 ]
+Key: VMINSHZrr: [ 0.00 0.00 ]
+Key: VMINSHZrr_Int: [ 0.00 0.00 ]
+Key: VMINSHZrrb_Int: [ 0.00 0.00 ]
+Key: VMINSHZrrbk_Int: [ 0.00 0.00 ]
+Key: VMINSHZrrbkz_Int: [ 0.00 0.00 ]
+Key: VMINSHZrrk_Int: [ 0.00 0.00 ]
+Key: VMINSHZrrkz_Int: [ 0.00 0.00 ]
+Key: VMINSSZrm: [ 0.00 0.00 ]
+Key: VMINSSZrm_Int: [ 0.00 0.00 ]
+Key: VMINSSZrmk_Int: [ 0.00 0.00 ]
+Key: VMINSSZrmkz_Int: [ 0.00 0.00 ]
+Key: VMINSSZrr: [ 0.00 0.00 ]
+Key: VMINSSZrr_Int: [ 0.00 0.00 ]
+Key: VMINSSZrrb_Int: [ 0.00 0.00 ]
+Key: VMINSSZrrbk_Int: [ 0.00 0.00 ]
+Key: VMINSSZrrbkz_Int: [ 0.00 0.00 ]
+Key: VMINSSZrrk_Int: [ 0.00 0.00 ]
+Key: VMINSSZrrkz_Int: [ 0.00 0.00 ]
+Key: VMINSSrm: [ 0.00 0.00 ]
+Key: VMINSSrm_Int: [ 0.00 0.00 ]
+Key: VMINSSrr: [ 0.00 0.00 ]
+Key: VMINSSrr_Int: [ 0.00 0.00 ]
+Key: VMLAUNCH: [ 0.00 0.00 ]
+Key: VMLOAD: [ 0.00 0.00 ]
+Key: VMMCALL: [ 0.00 0.00 ]
+Key: VMOV: [ 0.00 0.00 ]
+Key: VMOVAPDYmr: [ 0.00 0.00 ]
+Key: VMOVAPDYrm: [ 0.00 0.00 ]
+Key: VMOVAPDYrr: [ 0.00 0.00 ]
+Key: VMOVAPDYrr_REV: [ 0.00 0.00 ]
+Key: VMOVAPDZ: [ 0.00 0.00 ]
+Key: VMOVAPDZmr: [ 0.00 0.00 ]
+Key: VMOVAPDZmrk: [ 0.00 0.00 ]
+Key: VMOVAPDZrm: [ 0.00 0.00 ]
+Key: VMOVAPDZrmk: [ 0.00 0.00 ]
+Key: VMOVAPDZrmkz: [ 0.00 0.00 ]
+Key: VMOVAPDZrr: [ 0.00 0.00 ]
+Key: VMOVAPDZrr_REV: [ 0.00 0.00 ]
+Key: VMOVAPDZrrk: [ 0.00 0.00 ]
+Key: VMOVAPDZrrk_REV: [ 0.00 0.00 ]
+Key: VMOVAPDZrrkz: [ 0.00 0.00 ]
+Key: VMOVAPDZrrkz_REV: [ 0.00 0.00 ]
+Key: VMOVAPDmr: [ 0.00 0.00 ]
+Key: VMOVAPDrm: [ 0.00 0.00 ]
+Key: VMOVAPDrr: [ 0.00 0.00 ]
+Key: VMOVAPDrr_REV: [ 0.00 0.00 ]
+Key: VMOVAPSYmr: [ 0.00 0.00 ]
+Key: VMOVAPSYrm: [ 0.00 0.00 ]
+Key: VMOVAPSYrr: [ 0.00 0.00 ]
+Key: VMOVAPSYrr_REV: [ 0.00 0.00 ]
+Key: VMOVAPSZ: [ 0.00 0.00 ]
+Key: VMOVAPSZmr: [ 0.00 0.00 ]
+Key: VMOVAPSZmrk: [ 0.00 0.00 ]
+Key: VMOVAPSZrm: [ 0.00 0.00 ]
+Key: VMOVAPSZrmk: [ 0.00 0.00 ]
+Key: VMOVAPSZrmkz: [ 0.00 0.00 ]
+Key: VMOVAPSZrr: [ 0.00 0.00 ]
+Key: VMOVAPSZrr_REV: [ 0.00 0.00 ]
+Key: VMOVAPSZrrk: [ 0.00 0.00 ]
+Key: VMOVAPSZrrk_REV: [ 0.00 0.00 ]
+Key: VMOVAPSZrrkz: [ 0.00 0.00 ]
+Key: VMOVAPSZrrkz_REV: [ 0.00 0.00 ]
+Key: VMOVAPSmr: [ 0.00 0.00 ]
+Key: VMOVAPSrm: [ 0.00 0.00 ]
+Key: VMOVAPSrr: [ 0.00 0.00 ]
+Key: VMOVAPSrr_REV: [ 0.00 0.00 ]
+Key: VMOVDDUPYrm: [ 0.00 0.00 ]
+Key: VMOVDDUPYrr: [ 0.00 0.00 ]
+Key: VMOVDDUPZ: [ 0.00 0.00 ]
+Key: VMOVDDUPZrm: [ 0.00 0.00 ]
+Key: VMOVDDUPZrmk: [ 0.00 0.00 ]
+Key: VMOVDDUPZrmkz: [ 0.00 0.00 ]
+Key: VMOVDDUPZrr: [ 0.00 0.00 ]
+Key: VMOVDDUPZrrk: [ 0.00 0.00 ]
+Key: VMOVDDUPZrrkz: [ 0.00 0.00 ]
+Key: VMOVDDUPrm: [ 0.00 0.00 ]
+Key: VMOVDDUPrr: [ 0.00 0.00 ]
+Key: VMOVDI: [ 0.00 0.00 ]
+Key: VMOVDQA: [ 0.00 0.00 ]
+Key: VMOVDQAYmr: [ 0.00 0.00 ]
+Key: VMOVDQAYrm: [ 0.00 0.00 ]
+Key: VMOVDQAYrr: [ 0.00 0.00 ]
+Key: VMOVDQAYrr_REV: [ 0.00 0.00 ]
+Key: VMOVDQAmr: [ 0.00 0.00 ]
+Key: VMOVDQArm: [ 0.00 0.00 ]
+Key: VMOVDQArr: [ 0.00 0.00 ]
+Key: VMOVDQArr_REV: [ 0.00 0.00 ]
+Key: VMOVDQU: [ 0.00 0.00 ]
+Key: VMOVDQUYmr: [ 0.00 0.00 ]
+Key: VMOVDQUYrm: [ 0.00 0.00 ]
+Key: VMOVDQUYrr: [ 0.00 0.00 ]
+Key: VMOVDQUYrr_REV: [ 0.00 0.00 ]
+Key: VMOVDQUmr: [ 0.00 0.00 ]
+Key: VMOVDQUrm: [ 0.00 0.00 ]
+Key: VMOVDQUrr: [ 0.00 0.00 ]
+Key: VMOVDQUrr_REV: [ 0.00 0.00 ]
+Key: VMOVHLPSZrr: [ 0.00 0.00 ]
+Key: VMOVHLPSrr: [ 0.00 0.00 ]
+Key: VMOVHPDZ: [ 0.00 0.00 ]
+Key: VMOVHPDmr: [ 0.00 0.00 ]
+Key: VMOVHPDrm: [ 0.00 0.00 ]
+Key: VMOVHPSZ: [ 0.00 0.00 ]
+Key: VMOVHPSmr: [ 0.00 0.00 ]
+Key: VMOVHPSrm: [ 0.00 0.00 ]
+Key: VMOVLHPSZrr: [ 0.00 0.00 ]
+Key: VMOVLHPSrr: [ 0.00 0.00 ]
+Key: VMOVLPDZ: [ 0.00 0.00 ]
+Key: VMOVLPDmr: [ 0.00 0.00 ]
+Key: VMOVLPDrm: [ 0.00 0.00 ]
+Key: VMOVLPSZ: [ 0.00 0.00 ]
+Key: VMOVLPSmr: [ 0.00 0.00 ]
+Key: VMOVLPSrm: [ 0.00 0.00 ]
+Key: VMOVMSKPDYrr: [ 0.00 0.00 ]
+Key: VMOVMSKPDrr: [ 0.00 0.00 ]
+Key: VMOVMSKPSYrr: [ 0.00 0.00 ]
+Key: VMOVMSKPSrr: [ 0.00 0.00 ]
+Key: VMOVNTDQAYrm: [ 0.00 0.00 ]
+Key: VMOVNTDQAZ: [ 0.00 0.00 ]
+Key: VMOVNTDQAZrm: [ 0.00 0.00 ]
+Key: VMOVNTDQArm: [ 0.00 0.00 ]
+Key: VMOVNTDQYmr: [ 0.00 0.00 ]
+Key: VMOVNTDQZ: [ 0.00 0.00 ]
+Key: VMOVNTDQZmr: [ 0.00 0.00 ]
+Key: VMOVNTDQmr: [ 0.00 0.00 ]
+Key: VMOVNTPDYmr: [ 0.00 0.00 ]
+Key: VMOVNTPDZ: [ 0.00 0.00 ]
+Key: VMOVNTPDZmr: [ 0.00 0.00 ]
+Key: VMOVNTPDmr: [ 0.00 0.00 ]
+Key: VMOVNTPSYmr: [ 0.00 0.00 ]
+Key: VMOVNTPSZ: [ 0.00 0.00 ]
+Key: VMOVNTPSZmr: [ 0.00 0.00 ]
+Key: VMOVNTPSmr: [ 0.00 0.00 ]
+Key: VMOVPDI: [ 0.00 0.00 ]
+Key: VMOVPQI: [ 0.00 0.00 ]
+Key: VMOVPQIto: [ 0.00 0.00 ]
+Key: VMOVQI: [ 0.00 0.00 ]
+Key: VMOVRSBZ: [ 0.00 0.00 ]
+Key: VMOVRSBZm: [ 0.00 0.00 ]
+Key: VMOVRSBZmk: [ 0.00 0.00 ]
+Key: VMOVRSBZmkz: [ 0.00 0.00 ]
+Key: VMOVRSDZ: [ 0.00 0.00 ]
+Key: VMOVRSDZm: [ 0.00 0.00 ]
+Key: VMOVRSDZmk: [ 0.00 0.00 ]
+Key: VMOVRSDZmkz: [ 0.00 0.00 ]
+Key: VMOVRSQZ: [ 0.00 0.00 ]
+Key: VMOVRSQZm: [ 0.00 0.00 ]
+Key: VMOVRSQZmk: [ 0.00 0.00 ]
+Key: VMOVRSQZmkz: [ 0.00 0.00 ]
+Key: VMOVRSWZ: [ 0.00 0.00 ]
+Key: VMOVRSWZm: [ 0.00 0.00 ]
+Key: VMOVRSWZmk: [ 0.00 0.00 ]
+Key: VMOVRSWZmkz: [ 0.00 0.00 ]
+Key: VMOVSDZmr: [ 0.00 0.00 ]
+Key: VMOVSDZmrk: [ 0.00 0.00 ]
+Key: VMOVSDZrm: [ 0.00 0.00 ]
+Key: VMOVSDZrm_alt: [ 0.00 0.00 ]
+Key: VMOVSDZrmk: [ 0.00 0.00 ]
+Key: VMOVSDZrmkz: [ 0.00 0.00 ]
+Key: VMOVSDZrr: [ 0.00 0.00 ]
+Key: VMOVSDZrr_REV: [ 0.00 0.00 ]
+Key: VMOVSDZrrk: [ 0.00 0.00 ]
+Key: VMOVSDZrrk_REV: [ 0.00 0.00 ]
+Key: VMOVSDZrrkz: [ 0.00 0.00 ]
+Key: VMOVSDZrrkz_REV: [ 0.00 0.00 ]
+Key: VMOVSDmr: [ 0.00 0.00 ]
+Key: VMOVSDrm: [ 0.00 0.00 ]
+Key: VMOVSDrm_alt: [ 0.00 0.00 ]
+Key: VMOVSDrr: [ 0.00 0.00 ]
+Key: VMOVSDrr_REV: [ 0.00 0.00 ]
+Key: VMOVSDto: [ 0.00 0.00 ]
+Key: VMOVSH: [ 0.00 0.00 ]
+Key: VMOVSHDUPYrm: [ 0.00 0.00 ]
+Key: VMOVSHDUPYrr: [ 0.00 0.00 ]
+Key: VMOVSHDUPZ: [ 0.00 0.00 ]
+Key: VMOVSHDUPZrm: [ 0.00 0.00 ]
+Key: VMOVSHDUPZrmk: [ 0.00 0.00 ]
+Key: VMOVSHDUPZrmkz: [ 0.00 0.00 ]
+Key: VMOVSHDUPZrr: [ 0.00 0.00 ]
+Key: VMOVSHDUPZrrk: [ 0.00 0.00 ]
+Key: VMOVSHDUPZrrkz: [ 0.00 0.00 ]
+Key: VMOVSHDUPrm: [ 0.00 0.00 ]
+Key: VMOVSHDUPrr: [ 0.00 0.00 ]
+Key: VMOVSHZmr: [ 0.00 0.00 ]
+Key: VMOVSHZmrk: [ 0.00 0.00 ]
+Key: VMOVSHZrm: [ 0.00 0.00 ]
+Key: VMOVSHZrm_alt: [ 0.00 0.00 ]
+Key: VMOVSHZrmk: [ 0.00 0.00 ]
+Key: VMOVSHZrmkz: [ 0.00 0.00 ]
+Key: VMOVSHZrr: [ 0.00 0.00 ]
+Key: VMOVSHZrr_REV: [ 0.00 0.00 ]
+Key: VMOVSHZrrk: [ 0.00 0.00 ]
+Key: VMOVSHZrrk_REV: [ 0.00 0.00 ]
+Key: VMOVSHZrrkz: [ 0.00 0.00 ]
+Key: VMOVSHZrrkz_REV: [ 0.00 0.00 ]
+Key: VMOVSHtoW: [ 0.00 0.00 ]
+Key: VMOVSLDUPYrm: [ 0.00 0.00 ]
+Key: VMOVSLDUPYrr: [ 0.00 0.00 ]
+Key: VMOVSLDUPZ: [ 0.00 0.00 ]
+Key: VMOVSLDUPZrm: [ 0.00 0.00 ]
+Key: VMOVSLDUPZrmk: [ 0.00 0.00 ]
+Key: VMOVSLDUPZrmkz: [ 0.00 0.00 ]
+Key: VMOVSLDUPZrr: [ 0.00 0.00 ]
+Key: VMOVSLDUPZrrk: [ 0.00 0.00 ]
+Key: VMOVSLDUPZrrkz: [ 0.00 0.00 ]
+Key: VMOVSLDUPrm: [ 0.00 0.00 ]
+Key: VMOVSLDUPrr: [ 0.00 0.00 ]
+Key: VMOVSS: [ 0.00 0.00 ]
+Key: VMOVSSZmr: [ 0.00 0.00 ]
+Key: VMOVSSZmrk: [ 0.00 0.00 ]
+Key: VMOVSSZrm: [ 0.00 0.00 ]
+Key: VMOVSSZrm_alt: [ 0.00 0.00 ]
+Key: VMOVSSZrmk: [ 0.00 0.00 ]
+Key: VMOVSSZrmkz: [ 0.00 0.00 ]
+Key: VMOVSSZrr: [ 0.00 0.00 ]
+Key: VMOVSSZrr_REV: [ 0.00 0.00 ]
+Key: VMOVSSZrrk: [ 0.00 0.00 ]
+Key: VMOVSSZrrk_REV: [ 0.00 0.00 ]
+Key: VMOVSSZrrkz: [ 0.00 0.00 ]
+Key: VMOVSSZrrkz_REV: [ 0.00 0.00 ]
+Key: VMOVSSmr: [ 0.00 0.00 ]
+Key: VMOVSSrm: [ 0.00 0.00 ]
+Key: VMOVSSrm_alt: [ 0.00 0.00 ]
+Key: VMOVSSrr: [ 0.00 0.00 ]
+Key: VMOVSSrr_REV: [ 0.00 0.00 ]
+Key: VMOVUPDYmr: [ 0.00 0.00 ]
+Key: VMOVUPDYrm: [ 0.00 0.00 ]
+Key: VMOVUPDYrr: [ 0.00 0.00 ]
+Key: VMOVUPDYrr_REV: [ 0.00 0.00 ]
+Key: VMOVUPDZ: [ 0.00 0.00 ]
+Key: VMOVUPDZmr: [ 0.00 0.00 ]
+Key: VMOVUPDZmrk: [ 0.00 0.00 ]
+Key: VMOVUPDZrm: [ 0.00 0.00 ]
+Key: VMOVUPDZrmk: [ 0.00 0.00 ]
+Key: VMOVUPDZrmkz: [ 0.00 0.00 ]
+Key: VMOVUPDZrr: [ 0.00 0.00 ]
+Key: VMOVUPDZrr_REV: [ 0.00 0.00 ]
+Key: VMOVUPDZrrk: [ 0.00 0.00 ]
+Key: VMOVUPDZrrk_REV: [ 0.00 0.00 ]
+Key: VMOVUPDZrrkz: [ 0.00 0.00 ]
+Key: VMOVUPDZrrkz_REV: [ 0.00 0.00 ]
+Key: VMOVUPDmr: [ 0.00 0.00 ]
+Key: VMOVUPDrm: [ 0.00 0.00 ]
+Key: VMOVUPDrr: [ 0.00 0.00 ]
+Key: VMOVUPDrr_REV: [ 0.00 0.00 ]
+Key: VMOVUPSYmr: [ 0.00 0.00 ]
+Key: VMOVUPSYrm: [ 0.00 0.00 ]
+Key: VMOVUPSYrr: [ 0.00 0.00 ]
+Key: VMOVUPSYrr_REV: [ 0.00 0.00 ]
+Key: VMOVUPSZ: [ 0.00 0.00 ]
+Key: VMOVUPSZmr: [ 0.00 0.00 ]
+Key: VMOVUPSZmrk: [ 0.00 0.00 ]
+Key: VMOVUPSZrm: [ 0.00 0.00 ]
+Key: VMOVUPSZrmk: [ 0.00 0.00 ]
+Key: VMOVUPSZrmkz: [ 0.00 0.00 ]
+Key: VMOVUPSZrr: [ 0.00 0.00 ]
+Key: VMOVUPSZrr_REV: [ 0.00 0.00 ]
+Key: VMOVUPSZrrk: [ 0.00 0.00 ]
+Key: VMOVUPSZrrk_REV: [ 0.00 0.00 ]
+Key: VMOVUPSZrrkz: [ 0.00 0.00 ]
+Key: VMOVUPSZrrkz_REV: [ 0.00 0.00 ]
+Key: VMOVUPSmr: [ 0.00 0.00 ]
+Key: VMOVUPSrm: [ 0.00 0.00 ]
+Key: VMOVUPSrr: [ 0.00 0.00 ]
+Key: VMOVUPSrr_REV: [ 0.00 0.00 ]
+Key: VMOVW: [ 0.00 0.00 ]
+Key: VMOVWmr: [ 0.00 0.00 ]
+Key: VMOVWrm: [ 0.00 0.00 ]
+Key: VMOVZPDILo: [ 0.00 0.00 ]
+Key: VMOVZPQILo: [ 0.00 0.00 ]
+Key: VMOVZPWILo: [ 0.00 0.00 ]
+Key: VMPSADBWYrmi: [ 0.00 0.00 ]
+Key: VMPSADBWYrri: [ 0.00 0.00 ]
+Key: VMPSADBWZ: [ 0.00 0.00 ]
+Key: VMPSADBWZrmi: [ 0.00 0.00 ]
+Key: VMPSADBWZrmik: [ 0.00 0.00 ]
+Key: VMPSADBWZrmikz: [ 0.00 0.00 ]
+Key: VMPSADBWZrri: [ 0.00 0.00 ]
+Key: VMPSADBWZrrik: [ 0.00 0.00 ]
+Key: VMPSADBWZrrikz: [ 0.00 0.00 ]
+Key: VMPSADBWrmi: [ 0.00 0.00 ]
+Key: VMPSADBWrri: [ 0.00 0.00 ]
+Key: VMPTRLDm: [ 0.00 0.00 ]
+Key: VMPTRSTm: [ 0.00 0.00 ]
+Key: VMREAD: [ 0.00 0.00 ]
+Key: VMRESUME: [ 0.00 0.00 ]
+Key: VMRUN: [ 0.00 0.00 ]
+Key: VMSAVE: [ 0.00 0.00 ]
+Key: VMULBF: [ 0.00 0.00 ]
+Key: VMULPDYrm: [ 0.00 0.00 ]
+Key: VMULPDYrr: [ 0.00 0.00 ]
+Key: VMULPDZ: [ 0.00 0.00 ]
+Key: VMULPDZrm: [ 0.00 0.00 ]
+Key: VMULPDZrmb: [ 0.00 0.00 ]
+Key: VMULPDZrmbk: [ 0.00 0.00 ]
+Key: VMULPDZrmbkz: [ 0.00 0.00 ]
+Key: VMULPDZrmk: [ 0.00 0.00 ]
+Key: VMULPDZrmkz: [ 0.00 0.00 ]
+Key: VMULPDZrr: [ 0.00 0.00 ]
+Key: VMULPDZrrb: [ 0.00 0.00 ]
+Key: VMULPDZrrbk: [ 0.00 0.00 ]
+Key: VMULPDZrrbkz: [ 0.00 0.00 ]
+Key: VMULPDZrrk: [ 0.00 0.00 ]
+Key: VMULPDZrrkz: [ 0.00 0.00 ]
+Key: VMULPDrm: [ 0.00 0.00 ]
+Key: VMULPDrr: [ 0.00 0.00 ]
+Key: VMULPHZ: [ 0.00 0.00 ]
+Key: VMULPHZrm: [ 0.00 0.00 ]
+Key: VMULPHZrmb: [ 0.00 0.00 ]
+Key: VMULPHZrmbk: [ 0.00 0.00 ]
+Key: VMULPHZrmbkz: [ 0.00 0.00 ]
+Key: VMULPHZrmk: [ 0.00 0.00 ]
+Key: VMULPHZrmkz: [ 0.00 0.00 ]
+Key: VMULPHZrr: [ 0.00 0.00 ]
+Key: VMULPHZrrb: [ 0.00 0.00 ]
+Key: VMULPHZrrbk: [ 0.00 0.00 ]
+Key: VMULPHZrrbkz: [ 0.00 0.00 ]
+Key: VMULPHZrrk: [ 0.00 0.00 ]
+Key: VMULPHZrrkz: [ 0.00 0.00 ]
+Key: VMULPSYrm: [ 0.00 0.00 ]
+Key: VMULPSYrr: [ 0.00 0.00 ]
+Key: VMULPSZ: [ 0.00 0.00 ]
+Key: VMULPSZrm: [ 0.00 0.00 ]
+Key: VMULPSZrmb: [ 0.00 0.00 ]
+Key: VMULPSZrmbk: [ 0.00 0.00 ]
+Key: VMULPSZrmbkz: [ 0.00 0.00 ]
+Key: VMULPSZrmk: [ 0.00 0.00 ]
+Key: VMULPSZrmkz: [ 0.00 0.00 ]
+Key: VMULPSZrr: [ 0.00 0.00 ]
+Key: VMULPSZrrb: [ 0.00 0.00 ]
+Key: VMULPSZrrbk: [ 0.00 0.00 ]
+Key: VMULPSZrrbkz: [ 0.00 0.00 ]
+Key: VMULPSZrrk: [ 0.00 0.00 ]
+Key: VMULPSZrrkz: [ 0.00 0.00 ]
+Key: VMULPSrm: [ 0.00 0.00 ]
+Key: VMULPSrr: [ 0.00 0.00 ]
+Key: VMULSDZrm: [ 0.00 0.00 ]
+Key: VMULSDZrm_Int: [ 0.00 0.00 ]
+Key: VMULSDZrmk_Int: [ 0.00 0.00 ]
+Key: VMULSDZrmkz_Int: [ 0.00 0.00 ]
+Key: VMULSDZrr: [ 0.00 0.00 ]
+Key: VMULSDZrr_Int: [ 0.00 0.00 ]
+Key: VMULSDZrrb_Int: [ 0.00 0.00 ]
+Key: VMULSDZrrbk_Int: [ 0.00 0.00 ]
+Key: VMULSDZrrbkz_Int: [ 0.00 0.00 ]
+Key: VMULSDZrrk_Int: [ 0.00 0.00 ]
+Key: VMULSDZrrkz_Int: [ 0.00 0.00 ]
+Key: VMULSDrm: [ 0.00 0.00 ]
+Key: VMULSDrm_Int: [ 0.00 0.00 ]
+Key: VMULSDrr: [ 0.00 0.00 ]
+Key: VMULSDrr_Int: [ 0.00 0.00 ]
+Key: VMULSHZrm: [ 0.00 0.00 ]
+Key: VMULSHZrm_Int: [ 0.00 0.00 ]
+Key: VMULSHZrmk_Int: [ 0.00 0.00 ]
+Key: VMULSHZrmkz_Int: [ 0.00 0.00 ]
+Key: VMULSHZrr: [ 0.00 0.00 ]
+Key: VMULSHZrr_Int: [ 0.00 0.00 ]
+Key: VMULSHZrrb_Int: [ 0.00 0.00 ]
+Key: VMULSHZrrbk_Int: [ 0.00 0.00 ]
+Key: VMULSHZrrbkz_Int: [ 0.00 0.00 ]
+Key: VMULSHZrrk_Int: [ 0.00 0.00 ]
+Key: VMULSHZrrkz_Int: [ 0.00 0.00 ]
+Key: VMULSSZrm: [ 0.00 0.00 ]
+Key: VMULSSZrm_Int: [ 0.00 0.00 ]
+Key: VMULSSZrmk_Int: [ 0.00 0.00 ]
+Key: VMULSSZrmkz_Int: [ 0.00 0.00 ]
+Key: VMULSSZrr: [ 0.00 0.00 ]
+Key: VMULSSZrr_Int: [ 0.00 0.00 ]
+Key: VMULSSZrrb_Int: [ 0.00 0.00 ]
+Key: VMULSSZrrbk_Int: [ 0.00 0.00 ]
+Key: VMULSSZrrbkz_Int: [ 0.00 0.00 ]
+Key: VMULSSZrrk_Int: [ 0.00 0.00 ]
+Key: VMULSSZrrkz_Int: [ 0.00 0.00 ]
+Key: VMULSSrm: [ 0.00 0.00 ]
+Key: VMULSSrm_Int: [ 0.00 0.00 ]
+Key: VMULSSrr: [ 0.00 0.00 ]
+Key: VMULSSrr_Int: [ 0.00 0.00 ]
+Key: VMWRITE: [ 0.00 0.00 ]
+Key: VMXOFF: [ 0.00 0.00 ]
+Key: VMXON: [ 0.00 0.00 ]
+Key: VORPDYrm: [ 0.00 0.00 ]
+Key: VORPDYrr: [ 0.00 0.00 ]
+Key: VORPDZ: [ 0.00 0.00 ]
+Key: VORPDZrm: [ 0.00 0.00 ]
+Key: VORPDZrmb: [ 0.00 0.00 ]
+Key: VORPDZrmbk: [ 0.00 0.00 ]
+Key: VORPDZrmbkz: [ 0.00 0.00 ]
+Key: VORPDZrmk: [ 0.00 0.00 ]
+Key: VORPDZrmkz: [ 0.00 0.00 ]
+Key: VORPDZrr: [ 0.00 0.00 ]
+Key: VORPDZrrk: [ 0.00 0.00 ]
+Key: VORPDZrrkz: [ 0.00 0.00 ]
+Key: VORPDrm: [ 0.00 0.00 ]
+Key: VORPDrr: [ 0.00 0.00 ]
+Key: VORPSYrm: [ 0.00 0.00 ]
+Key: VORPSYrr: [ 0.00 0.00 ]
+Key: VORPSZ: [ 0.00 0.00 ]
+Key: VORPSZrm: [ 0.00 0.00 ]
+Key: VORPSZrmb: [ 0.00 0.00 ]
+Key: VORPSZrmbk: [ 0.00 0.00 ]
+Key: VORPSZrmbkz: [ 0.00 0.00 ]
+Key: VORPSZrmk: [ 0.00 0.00 ]
+Key: VORPSZrmkz: [ 0.00 0.00 ]
+Key: VORPSZrr: [ 0.00 0.00 ]
+Key: VORPSZrrk: [ 0.00 0.00 ]
+Key: VORPSZrrkz: [ 0.00 0.00 ]
+Key: VORPSrm: [ 0.00 0.00 ]
+Key: VORPSrr: [ 0.00 0.00 ]
+Key: VP: [ 0.00 0.00 ]
+Key: VPABSBYrm: [ 0.00 0.00 ]
+Key: VPABSBYrr: [ 0.00 0.00 ]
+Key: VPABSBZ: [ 0.00 0.00 ]
+Key: VPABSBZrm: [ 0.00 0.00 ]
+Key: VPABSBZrmk: [ 0.00 0.00 ]
+Key: VPABSBZrmkz: [ 0.00 0.00 ]
+Key: VPABSBZrr: [ 0.00 0.00 ]
+Key: VPABSBZrrk: [ 0.00 0.00 ]
+Key: VPABSBZrrkz: [ 0.00 0.00 ]
+Key: VPABSBrm: [ 0.00 0.00 ]
+Key: VPABSBrr: [ 0.00 0.00 ]
+Key: VPABSDYrm: [ 0.00 0.00 ]
+Key: VPABSDYrr: [ 0.00 0.00 ]
+Key: VPABSDZ: [ 0.00 0.00 ]
+Key: VPABSDZrm: [ 0.00 0.00 ]
+Key: VPABSDZrmb: [ 0.00 0.00 ]
+Key: VPABSDZrmbk: [ 0.00 0.00 ]
+Key: VPABSDZrmbkz: [ 0.00 0.00 ]
+Key: VPABSDZrmk: [ 0.00 0.00 ]
+Key: VPABSDZrmkz: [ 0.00 0.00 ]
+Key: VPABSDZrr: [ 0.00 0.00 ]
+Key: VPABSDZrrk: [ 0.00 0.00 ]
+Key: VPABSDZrrkz: [ 0.00 0.00 ]
+Key: VPABSDrm: [ 0.00 0.00 ]
+Key: VPABSDrr: [ 0.00 0.00 ]
+Key: VPABSQZ: [ 0.00 0.00 ]
+Key: VPABSQZrm: [ 0.00 0.00 ]
+Key: VPABSQZrmb: [ 0.00 0.00 ]
+Key: VPABSQZrmbk: [ 0.00 0.00 ]
+Key: VPABSQZrmbkz: [ 0.00 0.00 ]
+Key: VPABSQZrmk: [ 0.00 0.00 ]
+Key: VPABSQZrmkz: [ 0.00 0.00 ]
+Key: VPABSQZrr: [ 0.00 0.00 ]
+Key: VPABSQZrrk: [ 0.00 0.00 ]
+Key: VPABSQZrrkz: [ 0.00 0.00 ]
+Key: VPABSWYrm: [ 0.00 0.00 ]
+Key: VPABSWYrr: [ 0.00 0.00 ]
+Key: VPABSWZ: [ 0.00 0.00 ]
+Key: VPABSWZrm: [ 0.00 0.00 ]
+Key: VPABSWZrmk: [ 0.00 0.00 ]
+Key: VPABSWZrmkz: [ 0.00 0.00 ]
+Key: VPABSWZrr: [ 0.00 0.00 ]
+Key: VPABSWZrrk: [ 0.00 0.00 ]
+Key: VPABSWZrrkz: [ 0.00 0.00 ]
+Key: VPABSWrm: [ 0.00 0.00 ]
+Key: VPABSWrr: [ 0.00 0.00 ]
+Key: VPACKSSDWYrm: [ 0.00 0.00 ]
+Key: VPACKSSDWYrr: [ 0.00 0.00 ]
+Key: VPACKSSDWZ: [ 0.00 0.00 ]
+Key: VPACKSSDWZrm: [ 0.00 0.00 ]
+Key: VPACKSSDWZrmb: [ 0.00 0.00 ]
+Key: VPACKSSDWZrmbk: [ 0.00 0.00 ]
+Key: VPACKSSDWZrmbkz: [ 0.00 0.00 ]
+Key: VPACKSSDWZrmk: [ 0.00 0.00 ]
+Key: VPACKSSDWZrmkz: [ 0.00 0.00 ]
+Key: VPACKSSDWZrr: [ 0.00 0.00 ]
+Key: VPACKSSDWZrrk: [ 0.00 0.00 ]
+Key: VPACKSSDWZrrkz: [ 0.00 0.00 ]
+Key: VPACKSSDWrm: [ 0.00 0.00 ]
+Key: VPACKSSDWrr: [ 0.00 0.00 ]
+Key: VPACKSSWBYrm: [ 0.00 0.00 ]
+Key: VPACKSSWBYrr: [ 0.00 0.00 ]
+Key: VPACKSSWBZ: [ 0.00 0.00 ]
+Key: VPACKSSWBZrm: [ 0.00 0.00 ]
+Key: VPACKSSWBZrmk: [ 0.00 0.00 ]
+Key: VPACKSSWBZrmkz: [ 0.00 0.00 ]
+Key: VPACKSSWBZrr: [ 0.00 0.00 ]
+Key: VPACKSSWBZrrk: [ 0.00 0.00 ]
+Key: VPACKSSWBZrrkz: [ 0.00 0.00 ]
+Key: VPACKSSWBrm: [ 0.00 0.00 ]
+Key: VPACKSSWBrr: [ 0.00 0.00 ]
+Key: VPACKUSDWYrm: [ 0.00 0.00 ]
+Key: VPACKUSDWYrr: [ 0.00 0.00 ]
+Key: VPACKUSDWZ: [ 0.00 0.00 ]
+Key: VPACKUSDWZrm: [ 0.00 0.00 ]
+Key: VPACKUSDWZrmb: [ 0.00 0.00 ]
+Key: VPACKUSDWZrmbk: [ 0.00 0.00 ]
+Key: VPACKUSDWZrmbkz: [ 0.00 0.00 ]
+Key: VPACKUSDWZrmk: [ 0.00 0.00 ]
+Key: VPACKUSDWZrmkz: [ 0.00 0.00 ]
+Key: VPACKUSDWZrr: [ 0.00 0.00 ]
+Key: VPACKUSDWZrrk: [ 0.00 0.00 ]
+Key: VPACKUSDWZrrkz: [ 0.00 0.00 ]
+Key: VPACKUSDWrm: [ 0.00 0.00 ]
+Key: VPACKUSDWrr: [ 0.00 0.00 ]
+Key: VPACKUSWBYrm: [ 0.00 0.00 ]
+Key: VPACKUSWBYrr: [ 0.00 0.00 ]
+Key: VPACKUSWBZ: [ 0.00 0.00 ]
+Key: VPACKUSWBZrm: [ 0.00 0.00 ]
+Key: VPACKUSWBZrmk: [ 0.00 0.00 ]
+Key: VPACKUSWBZrmkz: [ 0.00 0.00 ]
+Key: VPACKUSWBZrr: [ 0.00 0.00 ]
+Key: VPACKUSWBZrrk: [ 0.00 0.00 ]
+Key: VPACKUSWBZrrkz: [ 0.00 0.00 ]
+Key: VPACKUSWBrm: [ 0.00 0.00 ]
+Key: VPACKUSWBrr: [ 0.00 0.00 ]
+Key: VPADDBYrm: [ 0.00 0.00 ]
+Key: VPADDBYrr: [ 0.00 0.00 ]
+Key: VPADDBZ: [ 0.00 0.00 ]
+Key: VPADDBZrm: [ 0.00 0.00 ]
+Key: VPADDBZrmk: [ 0.00 0.00 ]
+Key: VPADDBZrmkz: [ 0.00 0.00 ]
+Key: VPADDBZrr: [ 0.00 0.00 ]
+Key: VPADDBZrrk: [ 0.00 0.00 ]
+Key: VPADDBZrrkz: [ 0.00 0.00 ]
+Key: VPADDBrm: [ 0.00 0.00 ]
+Key: VPADDBrr: [ 0.00 0.00 ]
+Key: VPADDDYrm: [ 0.00 0.00 ]
+Key: VPADDDYrr: [ 0.00 0.00 ]
+Key: VPADDDZ: [ 0.00 0.00 ]
+Key: VPADDDZrm: [ 0.00 0.00 ]
+Key: VPADDDZrmb: [ 0.00 0.00 ]
+Key: VPADDDZrmbk: [ 0.00 0.00 ]
+Key: VPADDDZrmbkz: [ 0.00 0.00 ]
+Key: VPADDDZrmk: [ 0.00 0.00 ]
+Key: VPADDDZrmkz: [ 0.00 0.00 ]
+Key: VPADDDZrr: [ 0.00 0.00 ]
+Key: VPADDDZrrk: [ 0.00 0.00 ]
+Key: VPADDDZrrkz: [ 0.00 0.00 ]
+Key: VPADDDrm: [ 0.00 0.00 ]
+Key: VPADDDrr: [ 0.00 0.00 ]
+Key: VPADDQYrm: [ 0.00 0.00 ]
+Key: VPADDQYrr: [ 0.00 0.00 ]
+Key: VPADDQZ: [ 0.00 0.00 ]
+Key: VPADDQZrm: [ 0.00 0.00 ]
+Key: VPADDQZrmb: [ 0.00 0.00 ]
+Key: VPADDQZrmbk: [ 0.00 0.00 ]
+Key: VPADDQZrmbkz: [ 0.00 0.00 ]
+Key: VPADDQZrmk: [ 0.00 0.00 ]
+Key: VPADDQZrmkz: [ 0.00 0.00 ]
+Key: VPADDQZrr: [ 0.00 0.00 ]
+Key: VPADDQZrrk: [ 0.00 0.00 ]
+Key: VPADDQZrrkz: [ 0.00 0.00 ]
+Key: VPADDQrm: [ 0.00 0.00 ]
+Key: VPADDQrr: [ 0.00 0.00 ]
+Key: VPADDSBYrm: [ 0.00 0.00 ]
+Key: VPADDSBYrr: [ 0.00 0.00 ]
+Key: VPADDSBZ: [ 0.00 0.00 ]
+Key: VPADDSBZrm: [ 0.00 0.00 ]
+Key: VPADDSBZrmk: [ 0.00 0.00 ]
+Key: VPADDSBZrmkz: [ 0.00 0.00 ]
+Key: VPADDSBZrr: [ 0.00 0.00 ]
+Key: VPADDSBZrrk: [ 0.00 0.00 ]
+Key: VPADDSBZrrkz: [ 0.00 0.00 ]
+Key: VPADDSBrm: [ 0.00 0.00 ]
+Key: VPADDSBrr: [ 0.00 0.00 ]
+Key: VPADDSWYrm: [ 0.00 0.00 ]
+Key: VPADDSWYrr: [ 0.00 0.00 ]
+Key: VPADDSWZ: [ 0.00 0.00 ]
+Key: VPADDSWZrm: [ 0.00 0.00 ]
+Key: VPADDSWZrmk: [ 0.00 0.00 ]
+Key: VPADDSWZrmkz: [ 0.00 0.00 ]
+Key: VPADDSWZrr: [ 0.00 0.00 ]
+Key: VPADDSWZrrk: [ 0.00 0.00 ]
+Key: VPADDSWZrrkz: [ 0.00 0.00 ]
+Key: VPADDSWrm: [ 0.00 0.00 ]
+Key: VPADDSWrr: [ 0.00 0.00 ]
+Key: VPADDUSBYrm: [ 0.00 0.00 ]
+Key: VPADDUSBYrr: [ 0.00 0.00 ]
+Key: VPADDUSBZ: [ 0.00 0.00 ]
+Key: VPADDUSBZrm: [ 0.00 0.00 ]
+Key: VPADDUSBZrmk: [ 0.00 0.00 ]
+Key: VPADDUSBZrmkz: [ 0.00 0.00 ]
+Key: VPADDUSBZrr: [ 0.00 0.00 ]
+Key: VPADDUSBZrrk: [ 0.00 0.00 ]
+Key: VPADDUSBZrrkz: [ 0.00 0.00 ]
+Key: VPADDUSBrm: [ 0.00 0.00 ]
+Key: VPADDUSBrr: [ 0.00 0.00 ]
+Key: VPADDUSWYrm: [ 0.00 0.00 ]
+Key: VPADDUSWYrr: [ 0.00 0.00 ]
+Key: VPADDUSWZ: [ 0.00 0.00 ]
+Key: VPADDUSWZrm: [ 0.00 0.00 ]
+Key: VPADDUSWZrmk: [ 0.00 0.00 ]
+Key: VPADDUSWZrmkz: [ 0.00 0.00 ]
+Key: VPADDUSWZrr: [ 0.00 0.00 ]
+Key: VPADDUSWZrrk: [ 0.00 0.00 ]
+Key: VPADDUSWZrrkz: [ 0.00 0.00 ]
+Key: VPADDUSWrm: [ 0.00 0.00 ]
+Key: VPADDUSWrr: [ 0.00 0.00 ]
+Key: VPADDWYrm: [ 0.00 0.00 ]
+Key: VPADDWYrr: [ 0.00 0.00 ]
+Key: VPADDWZ: [ 0.00 0.00 ]
+Key: VPADDWZrm: [ 0.00 0.00 ]
+Key: VPADDWZrmk: [ 0.00 0.00 ]
+Key: VPADDWZrmkz: [ 0.00 0.00 ]
+Key: VPADDWZrr: [ 0.00 0.00 ]
+Key: VPADDWZrrk: [ 0.00 0.00 ]
+Key: VPADDWZrrkz: [ 0.00 0.00 ]
+Key: VPADDWrm: [ 0.00 0.00 ]
+Key: VPADDWrr: [ 0.00 0.00 ]
+Key: VPALIGNRYrmi: [ 0.00 0.00 ]
+Key: VPALIGNRYrri: [ 0.00 0.00 ]
+Key: VPALIGNRZ: [ 0.00 0.00 ]
+Key: VPALIGNRZrmi: [ 0.00 0.00 ]
+Key: VPALIGNRZrmik: [ 0.00 0.00 ]
+Key: VPALIGNRZrmikz: [ 0.00 0.00 ]
+Key: VPALIGNRZrri: [ 0.00 0.00 ]
+Key: VPALIGNRZrrik: [ 0.00 0.00 ]
+Key: VPALIGNRZrrikz: [ 0.00 0.00 ]
+Key: VPALIGNRrmi: [ 0.00 0.00 ]
+Key: VPALIGNRrri: [ 0.00 0.00 ]
+Key: VPANDDZ: [ 0.00 0.00 ]
+Key: VPANDDZrm: [ 0.00 0.00 ]
+Key: VPANDDZrmb: [ 0.00 0.00 ]
+Key: VPANDDZrmbk: [ 0.00 0.00 ]
+Key: VPANDDZrmbkz: [ 0.00 0.00 ]
+Key: VPANDDZrmk: [ 0.00 0.00 ]
+Key: VPANDDZrmkz: [ 0.00 0.00 ]
+Key: VPANDDZrr: [ 0.00 0.00 ]
+Key: VPANDDZrrk: [ 0.00 0.00 ]
+Key: VPANDDZrrkz: [ 0.00 0.00 ]
+Key: VPANDNDZ: [ 0.00 0.00 ]
+Key: VPANDNDZrm: [ 0.00 0.00 ]
+Key: VPANDNDZrmb: [ 0.00 0.00 ]
+Key: VPANDNDZrmbk: [ 0.00 0.00 ]
+Key: VPANDNDZrmbkz: [ 0.00 0.00 ]
+Key: VPANDNDZrmk: [ 0.00 0.00 ]
+Key: VPANDNDZrmkz: [ 0.00 0.00 ]
+Key: VPANDNDZrr: [ 0.00 0.00 ]
+Key: VPANDNDZrrk: [ 0.00 0.00 ]
+Key: VPANDNDZrrkz: [ 0.00 0.00 ]
+Key: VPANDNQZ: [ 0.00 0.00 ]
+Key: VPANDNQZrm: [ 0.00 0.00 ]
+Key: VPANDNQZrmb: [ 0.00 0.00 ]
+Key: VPANDNQZrmbk: [ 0.00 0.00 ]
+Key: VPANDNQZrmbkz: [ 0.00 0.00 ]
+Key: VPANDNQZrmk: [ 0.00 0.00 ]
+Key: VPANDNQZrmkz: [ 0.00 0.00 ]
+Key: VPANDNQZrr: [ 0.00 0.00 ]
+Key: VPANDNQZrrk: [ 0.00 0.00 ]
+Key: VPANDNQZrrkz: [ 0.00 0.00 ]
+Key: VPANDNYrm: [ 0.00 0.00 ]
+Key: VPANDNYrr: [ 0.00 0.00 ]
+Key: VPANDNrm: [ 0.00 0.00 ]
+Key: VPANDNrr: [ 0.00 0.00 ]
+Key: VPANDQZ: [ 0.00 0.00 ]
+Key: VPANDQZrm: [ 0.00 0.00 ]
+Key: VPANDQZrmb: [ 0.00 0.00 ]
+Key: VPANDQZrmbk: [ 0.00 0.00 ]
+Key: VPANDQZrmbkz: [ 0.00 0.00 ]
+Key: VPANDQZrmk: [ 0.00 0.00 ]
+Key: VPANDQZrmkz: [ 0.00 0.00 ]
+Key: VPANDQZrr: [ 0.00 0.00 ]
+Key: VPANDQZrrk: [ 0.00 0.00 ]
+Key: VPANDQZrrkz: [ 0.00 0.00 ]
+Key: VPANDYrm: [ 0.00 0.00 ]
+Key: VPANDYrr: [ 0.00 0.00 ]
+Key: VPANDrm: [ 0.00 0.00 ]
+Key: VPANDrr: [ 0.00 0.00 ]
+Key: VPAVGBYrm: [ 0.00 0.00 ]
+Key: VPAVGBYrr: [ 0.00 0.00 ]
+Key: VPAVGBZ: [ 0.00 0.00 ]
+Key: VPAVGBZrm: [ 0.00 0.00 ]
+Key: VPAVGBZrmk: [ 0.00 0.00 ]
+Key: VPAVGBZrmkz: [ 0.00 0.00 ]
+Key: VPAVGBZrr: [ 0.00 0.00 ]
+Key: VPAVGBZrrk: [ 0.00 0.00 ]
+Key: VPAVGBZrrkz: [ 0.00 0.00 ]
+Key: VPAVGBrm: [ 0.00 0.00 ]
+Key: VPAVGBrr: [ 0.00 0.00 ]
+Key: VPAVGWYrm: [ 0.00 0.00 ]
+Key: VPAVGWYrr: [ 0.00 0.00 ]
+Key: VPAVGWZ: [ 0.00 0.00 ]
+Key: VPAVGWZrm: [ 0.00 0.00 ]
+Key: VPAVGWZrmk: [ 0.00 0.00 ]
+Key: VPAVGWZrmkz: [ 0.00 0.00 ]
+Key: VPAVGWZrr: [ 0.00 0.00 ]
+Key: VPAVGWZrrk: [ 0.00 0.00 ]
+Key: VPAVGWZrrkz: [ 0.00 0.00 ]
+Key: VPAVGWrm: [ 0.00 0.00 ]
+Key: VPAVGWrr: [ 0.00 0.00 ]
+Key: VPBLENDDYrmi: [ 0.00 0.00 ]
+Key: VPBLENDDYrri: [ 0.00 0.00 ]
+Key: VPBLENDDrmi: [ 0.00 0.00 ]
+Key: VPBLENDDrri: [ 0.00 0.00 ]
+Key: VPBLENDMBZ: [ 0.00 0.00 ]
+Key: VPBLENDMBZrm: [ 0.00 0.00 ]
+Key: VPBLENDMBZrmk: [ 0.00 0.00 ]
+Key: VPBLENDMBZrmkz: [ 0.00 0.00 ]
+Key: VPBLENDMBZrr: [ 0.00 0.00 ]
+Key: VPBLENDMBZrrk: [ 0.00 0.00 ]
+Key: VPBLENDMBZrrkz: [ 0.00 0.00 ]
+Key: VPBLENDMDZ: [ 0.00 0.00 ]
+Key: VPBLENDMDZrm: [ 0.00 0.00 ]
+Key: VPBLENDMDZrmb: [ 0.00 0.00 ]
+Key: VPBLENDMDZrmbk: [ 0.00 0.00 ]
+Key: VPBLENDMDZrmbkz: [ 0.00 0.00 ]
+Key: VPBLENDMDZrmk: [ 0.00 0.00 ]
+Key: VPBLENDMDZrmkz: [ 0.00 0.00 ]
+Key: VPBLENDMDZrr: [ 0.00 0.00 ]
+Key: VPBLENDMDZrrk: [ 0.00 0.00 ]
+Key: VPBLENDMDZrrkz: [ 0.00 0.00 ]
+Key: VPBLENDMQZ: [ 0.00 0.00 ]
+Key: VPBLENDMQZrm: [ 0.00 0.00 ]
+Key: VPBLENDMQZrmb: [ 0.00 0.00 ]
+Key: VPBLENDMQZrmbk: [ 0.00 0.00 ]
+Key: VPBLENDMQZrmbkz: [ 0.00 0.00 ]
+Key: VPBLENDMQZrmk: [ 0.00 0.00 ]
+Key: VPBLENDMQZrmkz: [ 0.00 0.00 ]
+Key: VPBLENDMQZrr: [ 0.00 0.00 ]
+Key: VPBLENDMQZrrk: [ 0.00 0.00 ]
+Key: VPBLENDMQZrrkz: [ 0.00 0.00 ]
+Key: VPBLENDMWZ: [ 0.00 0.00 ]
+Key: VPBLENDMWZrm: [ 0.00 0.00 ]
+Key: VPBLENDMWZrmk: [ 0.00 0.00 ]
+Key: VPBLENDMWZrmkz: [ 0.00 0.00 ]
+Key: VPBLENDMWZrr: [ 0.00 0.00 ]
+Key: VPBLENDMWZrrk: [ 0.00 0.00 ]
+Key: VPBLENDMWZrrkz: [ 0.00 0.00 ]
+Key: VPBLENDVBYrmr: [ 0.00 0.00 ]
+Key: VPBLENDVBYrrr: [ 0.00 0.00 ]
+Key: VPBLENDVBrmr: [ 0.00 0.00 ]
+Key: VPBLENDVBrrr: [ 0.00 0.00 ]
+Key: VPBLENDWYrmi: [ 0.00 0.00 ]
+Key: VPBLENDWYrri: [ 0.00 0.00 ]
+Key: VPBLENDWrmi: [ 0.00 0.00 ]
+Key: VPBLENDWrri: [ 0.00 0.00 ]
+Key: VPBROADCASTBYrm: [ 0.00 0.00 ]
+Key: VPBROADCASTBYrr: [ 0.00 0.00 ]
+Key: VPBROADCASTBZ: [ 0.00 0.00 ]
+Key: VPBROADCASTBZrm: [ 0.00 0.00 ]
+Key: VPBROADCASTBZrmk: [ 0.00 0.00 ]
+Key: VPBROADCASTBZrmkz: [ 0.00 0.00 ]
+Key: VPBROADCASTBZrr: [ 0.00 0.00 ]
+Key: VPBROADCASTBZrrk: [ 0.00 0.00 ]
+Key: VPBROADCASTBZrrkz: [ 0.00 0.00 ]
+Key: VPBROADCASTBrZ: [ 0.00 0.00 ]
+Key: VPBROADCASTBrZrr: [ 0.00 0.00 ]
+Key: VPBROADCASTBrZrrk: [ 0.00 0.00 ]
+Key: VPBROADCASTBrZrrkz: [ 0.00 0.00 ]
+Key: VPBROADCASTBrm: [ 0.00 0.00 ]
+Key: VPBROADCASTBrr: [ 0.00 0.00 ]
+Key: VPBROADCASTDYrm: [ 0.00 0.00 ]
+Key: VPBROADCASTDYrr: [ 0.00 0.00 ]
+Key: VPBROADCASTDZ: [ 0.00 0.00 ]
+Key: VPBROADCASTDZrm: [ 0.00 0.00 ]
+Key: VPBROADCASTDZrmk: [ 0.00 0.00 ]
+Key: VPBROADCASTDZrmkz: [ 0.00 0.00 ]
+Key: VPBROADCASTDZrr: [ 0.00 0.00 ]
+Key: VPBROADCASTDZrrk: [ 0.00 0.00 ]
+Key: VPBROADCASTDZrrkz: [ 0.00 0.00 ]
+Key: VPBROADCASTDrZ: [ 0.00 0.00 ]
+Key: VPBROADCASTDrZrr: [ 0.00 0.00 ]
+Key: VPBROADCASTDrZrrk: [ 0.00 0.00 ]
+Key: VPBROADCASTDrZrrkz: [ 0.00 0.00 ]
+Key: VPBROADCASTDrm: [ 0.00 0.00 ]
+Key: VPBROADCASTDrr: [ 0.00 0.00 ]
+Key: VPBROADCASTMB: [ 0.00 0.00 ]
+Key: VPBROADCASTMW: [ 0.00 0.00 ]
+Key: VPBROADCASTQYrm: [ 0.00 0.00 ]
+Key: VPBROADCASTQYrr: [ 0.00 0.00 ]
+Key: VPBROADCASTQZ: [ 0.00 0.00 ]
+Key: VPBROADCASTQZrm: [ 0.00 0.00 ]
+Key: VPBROADCASTQZrmk: [ 0.00 0.00 ]
+Key: VPBROADCASTQZrmkz: [ 0.00 0.00 ]
+Key: VPBROADCASTQZrr: [ 0.00 0.00 ]
+Key: VPBROADCASTQZrrk: [ 0.00 0.00 ]
+Key: VPBROADCASTQZrrkz: [ 0.00 0.00 ]
+Key: VPBROADCASTQrZ: [ 0.00 0.00 ]
+Key: VPBROADCASTQrZrr: [ 0.00 0.00 ]
+Key: VPBROADCASTQrZrrk: [ 0.00 0.00 ]
+Key: VPBROADCASTQrZrrkz: [ 0.00 0.00 ]
+Key: VPBROADCASTQrm: [ 0.00 0.00 ]
+Key: VPBROADCASTQrr: [ 0.00 0.00 ]
+Key: VPBROADCASTWYrm: [ 0.00 0.00 ]
+Key: VPBROADCASTWYrr: [ 0.00 0.00 ]
+Key: VPBROADCASTWZ: [ 0.00 0.00 ]
+Key: VPBROADCASTWZrm: [ 0.00 0.00 ]
+Key: VPBROADCASTWZrmk: [ 0.00 0.00 ]
+Key: VPBROADCASTWZrmkz: [ 0.00 0.00 ]
+Key: VPBROADCASTWZrr: [ 0.00 0.00 ]
+Key: VPBROADCASTWZrrk: [ 0.00 0.00 ]
+Key: VPBROADCASTWZrrkz: [ 0.00 0.00 ]
+Key: VPBROADCASTWrZ: [ 0.00 0.00 ]
+Key: VPBROADCASTWrZrr: [ 0.00 0.00 ]
+Key: VPBROADCASTWrZrrk: [ 0.00 0.00 ]
+Key: VPBROADCASTWrZrrkz: [ 0.00 0.00 ]
+Key: VPBROADCASTWrm: [ 0.00 0.00 ]
+Key: VPBROADCASTWrr: [ 0.00 0.00 ]
+Key: VPCLMULQDQYrmi: [ 0.00 0.00 ]
+Key: VPCLMULQDQYrri: [ 0.00 0.00 ]
+Key: VPCLMULQDQZ: [ 0.00 0.00 ]
+Key: VPCLMULQDQZrmi: [ 0.00 0.00 ]
+Key: VPCLMULQDQZrri: [ 0.00 0.00 ]
+Key: VPCLMULQDQrmi: [ 0.00 0.00 ]
+Key: VPCLMULQDQrri: [ 0.00 0.00 ]
+Key: VPCMOVYrmr: [ 0.00 0.00 ]
+Key: VPCMOVYrrm: [ 0.00 0.00 ]
+Key: VPCMOVYrrr: [ 0.00 0.00 ]
+Key: VPCMOVYrrr_REV: [ 0.00 0.00 ]
+Key: VPCMOVrmr: [ 0.00 0.00 ]
+Key: VPCMOVrrm: [ 0.00 0.00 ]
+Key: VPCMOVrrr: [ 0.00 0.00 ]
+Key: VPCMOVrrr_REV: [ 0.00 0.00 ]
+Key: VPCMPBZ: [ 0.00 0.00 ]
+Key: VPCMPBZrmi: [ 0.00 0.00 ]
+Key: VPCMPBZrmik: [ 0.00 0.00 ]
+Key: VPCMPBZrri: [ 0.00 0.00 ]
+Key: VPCMPBZrrik: [ 0.00 0.00 ]
+Key: VPCMPDZ: [ 0.00 0.00 ]
+Key: VPCMPDZrmbi: [ 0.00 0.00 ]
+Key: VPCMPDZrmbik: [ 0.00 0.00 ]
+Key: VPCMPDZrmi: [ 0.00 0.00 ]
+Key: VPCMPDZrmik: [ 0.00 0.00 ]
+Key: VPCMPDZrri: [ 0.00 0.00 ]
+Key: VPCMPDZrrik: [ 0.00 0.00 ]
+Key: VPCMPEQBYrm: [ 0.00 0.00 ]
+Key: VPCMPEQBYrr: [ 0.00 0.00 ]
+Key: VPCMPEQBZ: [ 0.00 0.00 ]
+Key: VPCMPEQBZrm: [ 0.00 0.00 ]
+Key: VPCMPEQBZrmk: [ 0.00 0.00 ]
+Key: VPCMPEQBZrr: [ 0.00 0.00 ]
+Key: VPCMPEQBZrrk: [ 0.00 0.00 ]
+Key: VPCMPEQBrm: [ 0.00 0.00 ]
+Key: VPCMPEQBrr: [ 0.00 0.00 ]
+Key: VPCMPEQDYrm: [ 0.00 0.00 ]
+Key: VPCMPEQDYrr: [ 0.00 0.00 ]
+Key: VPCMPEQDZ: [ 0.00 0.00 ]
+Key: VPCMPEQDZrm: [ 0.00 0.00 ]
+Key: VPCMPEQDZrmb: [ 0.00 0.00 ]
+Key: VPCMPEQDZrmbk: [ 0.00 0.00 ]
+Key: VPCMPEQDZrmk: [ 0.00 0.00 ]
+Key: VPCMPEQDZrr: [ 0.00 0.00 ]
+Key: VPCMPEQDZrrk: [ 0.00 0.00 ]
+Key: VPCMPEQDrm: [ 0.00 0.00 ]
+Key: VPCMPEQDrr: [ 0.00 0.00 ]
+Key: VPCMPEQQYrm: [ 0.00 0.00 ]
+Key: VPCMPEQQYrr: [ 0.00 0.00 ]
+Key: VPCMPEQQZ: [ 0.00 0.00 ]
+Key: VPCMPEQQZrm: [ 0.00 0.00 ]
+Key: VPCMPEQQZrmb: [ 0.00 0.00 ]
+Key: VPCMPEQQZrmbk: [ 0.00 0.00 ]
+Key: VPCMPEQQZrmk: [ 0.00 0.00 ]
+Key: VPCMPEQQZrr: [ 0.00 0.00 ]
+Key: VPCMPEQQZrrk: [ 0.00 0.00 ]
+Key: VPCMPEQQrm: [ 0.00 0.00 ]
+Key: VPCMPEQQrr: [ 0.00 0.00 ]
+Key: VPCMPEQWYrm: [ 0.00 0.00 ]
+Key: VPCMPEQWYrr: [ 0.00 0.00 ]
+Key: VPCMPEQWZ: [ 0.00 0.00 ]
+Key: VPCMPEQWZrm: [ 0.00 0.00 ]
+Key: VPCMPEQWZrmk: [ 0.00 0.00 ]
+Key: VPCMPEQWZrr: [ 0.00 0.00 ]
+Key: VPCMPEQWZrrk: [ 0.00 0.00 ]
+Key: VPCMPEQWrm: [ 0.00 0.00 ]
+Key: VPCMPEQWrr: [ 0.00 0.00 ]
+Key: VPCMPESTRIrmi: [ 0.00 0.00 ]
+Key: VPCMPESTRIrri: [ 0.00 0.00 ]
+Key: VPCMPESTRMrmi: [ 0.00 0.00 ]
+Key: VPCMPESTRMrri: [ 0.00 0.00 ]
+Key: VPCMPGTBYrm: [ 0.00 0.00 ]
+Key: VPCMPGTBYrr: [ 0.00 0.00 ]
+Key: VPCMPGTBZ: [ 0.00 0.00 ]
+Key: VPCMPGTBZrm: [ 0.00 0.00 ]
+Key: VPCMPGTBZrmk: [ 0.00 0.00 ]
+Key: VPCMPGTBZrr: [ 0.00 0.00 ]
+Key: VPCMPGTBZrrk: [ 0.00 0.00 ]
+Key: VPCMPGTBrm: [ 0.00 0.00 ]
+Key: VPCMPGTBrr: [ 0.00 0.00 ]
+Key: VPCMPGTDYrm: [ 0.00 0.00 ]
+Key: VPCMPGTDYrr: [ 0.00 0.00 ]
+Key: VPCMPGTDZ: [ 0.00 0.00 ]
+Key: VPCMPGTDZrm: [ 0.00 0.00 ]
+Key: VPCMPGTDZrmb: [ 0.00 0.00 ]
+Key: VPCMPGTDZrmbk: [ 0.00 0.00 ]
+Key: VPCMPGTDZrmk: [ 0.00 0.00 ]
+Key: VPCMPGTDZrr: [ 0.00 0.00 ]
+Key: VPCMPGTDZrrk: [ 0.00 0.00 ]
+Key: VPCMPGTDrm: [ 0.00 0.00 ]
+Key: VPCMPGTDrr: [ 0.00 0.00 ]
+Key: VPCMPGTQYrm: [ 0.00 0.00 ]
+Key: VPCMPGTQYrr: [ 0.00 0.00 ]
+Key: VPCMPGTQZ: [ 0.00 0.00 ]
+Key: VPCMPGTQZrm: [ 0.00 0.00 ]
+Key: VPCMPGTQZrmb: [ 0.00 0.00 ]
+Key: VPCMPGTQZrmbk: [ 0.00 0.00 ]
+Key: VPCMPGTQZrmk: [ 0.00 0.00 ]
+Key: VPCMPGTQZrr: [ 0.00 0.00 ]
+Key: VPCMPGTQZrrk: [ 0.00 0.00 ]
+Key: VPCMPGTQrm: [ 0.00 0.00 ]
+Key: VPCMPGTQrr: [ 0.00 0.00 ]
+Key: VPCMPGTWYrm: [ 0.00 0.00 ]
+Key: VPCMPGTWYrr: [ 0.00 0.00 ]
+Key: VPCMPGTWZ: [ 0.00 0.00 ]
+Key: VPCMPGTWZrm: [ 0.00 0.00 ]
+Key: VPCMPGTWZrmk: [ 0.00 0.00 ]
+Key: VPCMPGTWZrr: [ 0.00 0.00 ]
+Key: VPCMPGTWZrrk: [ 0.00 0.00 ]
+Key: VPCMPGTWrm: [ 0.00 0.00 ]
+Key: VPCMPGTWrr: [ 0.00 0.00 ]
+Key: VPCMPISTRIrmi: [ 0.00 0.00 ]
+Key: VPCMPISTRIrri: [ 0.00 0.00 ]
+Key: VPCMPISTRMrmi: [ 0.00 0.00 ]
+Key: VPCMPISTRMrri: [ 0.00 0.00 ]
+Key: VPCMPQZ: [ 0.00 0.00 ]
+Key: VPCMPQZrmbi: [ 0.00 0.00 ]
+Key: VPCMPQZrmbik: [ 0.00 0.00 ]
+Key: VPCMPQZrmi: [ 0.00 0.00 ]
+Key: VPCMPQZrmik: [ 0.00 0.00 ]
+Key: VPCMPQZrri: [ 0.00 0.00 ]
+Key: VPCMPQZrrik: [ 0.00 0.00 ]
+Key: VPCMPUBZ: [ 0.00 0.00 ]
+Key: VPCMPUBZrmi: [ 0.00 0.00 ]
+Key: VPCMPUBZrmik: [ 0.00 0.00 ]
+Key: VPCMPUBZrri: [ 0.00 0.00 ]
+Key: VPCMPUBZrrik: [ 0.00 0.00 ]
+Key: VPCMPUDZ: [ 0.00 0.00 ]
+Key: VPCMPUDZrmbi: [ 0.00 0.00 ]
+Key: VPCMPUDZrmbik: [ 0.00 0.00 ]
+Key: VPCMPUDZrmi: [ 0.00 0.00 ]
+Key: VPCMPUDZrmik: [ 0.00 0.00 ]
+Key: VPCMPUDZrri: [ 0.00 0.00 ]
+Key: VPCMPUDZrrik: [ 0.00 0.00 ]
+Key: VPCMPUQZ: [ 0.00 0.00 ]
+Key: VPCMPUQZrmbi: [ 0.00 0.00 ]
+Key: VPCMPUQZrmbik: [ 0.00 0.00 ]
+Key: VPCMPUQZrmi: [ 0.00 0.00 ]
+Key: VPCMPUQZrmik: [ 0.00 0.00 ]
+Key: VPCMPUQZrri: [ 0.00 0.00 ]
+Key: VPCMPUQZrrik: [ 0.00 0.00 ]
+Key: VPCMPUWZ: [ 0.00 0.00 ]
+Key: VPCMPUWZrmi: [ 0.00 0.00 ]
+Key: VPCMPUWZrmik: [ 0.00 0.00 ]
+Key: VPCMPUWZrri: [ 0.00 0.00 ]
+Key: VPCMPUWZrrik: [ 0.00 0.00 ]
+Key: VPCMPWZ: [ 0.00 0.00 ]
+Key: VPCMPWZrmi: [ 0.00 0.00 ]
+Key: VPCMPWZrmik: [ 0.00 0.00 ]
+Key: VPCMPWZrri: [ 0.00 0.00 ]
+Key: VPCMPWZrrik: [ 0.00 0.00 ]
+Key: VPCOMBmi: [ 0.00 0.00 ]
+Key: VPCOMBri: [ 0.00 0.00 ]
+Key: VPCOMDmi: [ 0.00 0.00 ]
+Key: VPCOMDri: [ 0.00 0.00 ]
+Key: VPCOMPRESSBZ: [ 0.00 0.00 ]
+Key: VPCOMPRESSBZmr: [ 0.00 0.00 ]
+Key: VPCOMPRESSBZmrk: [ 0.00 0.00 ]
+Key: VPCOMPRESSBZrr: [ 0.00 0.00 ]
+Key: VPCOMPRESSBZrrk: [ 0.00 0.00 ]
+Key: VPCOMPRESSBZrrkz: [ 0.00 0.00 ]
+Key: VPCOMPRESSDZ: [ 0.00 0.00 ]
+Key: VPCOMPRESSDZmr: [ 0.00 0.00 ]
+Key: VPCOMPRESSDZmrk: [ 0.00 0.00 ]
+Key: VPCOMPRESSDZrr: [ 0.00 0.00 ]
+Key: VPCOMPRESSDZrrk: [ 0.00 0.00 ]
+Key: VPCOMPRESSDZrrkz: [ 0.00 0.00 ]
+Key: VPCOMPRESSQZ: [ 0.00 0.00 ]
+Key: VPCOMPRESSQZmr: [ 0.00 0.00 ]
+Key: VPCOMPRESSQZmrk: [ 0.00 0.00 ]
+Key: VPCOMPRESSQZrr: [ 0.00 0.00 ]
+Key: VPCOMPRESSQZrrk: [ 0.00 0.00 ]
+Key: VPCOMPRESSQZrrkz: [ 0.00 0.00 ]
+Key: VPCOMPRESSWZ: [ 0.00 0.00 ]
+Key: VPCOMPRESSWZmr: [ 0.00 0.00 ]
+Key: VPCOMPRESSWZmrk: [ 0.00 0.00 ]
+Key: VPCOMPRESSWZrr: [ 0.00 0.00 ]
+Key: VPCOMPRESSWZrrk: [ 0.00 0.00 ]
+Key: VPCOMPRESSWZrrkz: [ 0.00 0.00 ]
+Key: VPCOMQmi: [ 0.00 0.00 ]
+Key: VPCOMQri: [ 0.00 0.00 ]
+Key: VPCOMUBmi: [ 0.00 0.00 ]
+Key: VPCOMUBri: [ 0.00 0.00 ]
+Key: VPCOMUDmi: [ 0.00 0.00 ]
+Key: VPCOMUDri: [ 0.00 0.00 ]
+Key: VPCOMUQmi: [ 0.00 0.00 ]
+Key: VPCOMUQri: [ 0.00 0.00 ]
+Key: VPCOMUWmi: [ 0.00 0.00 ]
+Key: VPCOMUWri: [ 0.00 0.00 ]
+Key: VPCOMWmi: [ 0.00 0.00 ]
+Key: VPCOMWri: [ 0.00 0.00 ]
+Key: VPCONFLICTDZ: [ 0.00 0.00 ]
+Key: VPCONFLICTDZrm: [ 0.00 0.00 ]
+Key: VPCONFLICTDZrmb: [ 0.00 0.00 ]
+Key: VPCONFLICTDZrmbk: [ 0.00 0.00 ]
+Key: VPCONFLICTDZrmbkz: [ 0.00 0.00 ]
+Key: VPCONFLICTDZrmk: [ 0.00 0.00 ]
+Key: VPCONFLICTDZrmkz: [ 0.00 0.00 ]
+Key: VPCONFLICTDZrr: [ 0.00 0.00 ]
+Key: VPCONFLICTDZrrk: [ 0.00 0.00 ]
+Key: VPCONFLICTDZrrkz: [ 0.00 0.00 ]
+Key: VPCONFLICTQZ: [ 0.00 0.00 ]
+Key: VPCONFLICTQZrm: [ 0.00 0.00 ]
+Key: VPCONFLICTQZrmb: [ 0.00 0.00 ]
+Key: VPCONFLICTQZrmbk: [ 0.00 0.00 ]
+Key: VPCONFLICTQZrmbkz: [ 0.00 0.00 ]
+Key: VPCONFLICTQZrmk: [ 0.00 0.00 ]
+Key: VPCONFLICTQZrmkz: [ 0.00 0.00 ]
+Key: VPCONFLICTQZrr: [ 0.00 0.00 ]
+Key: VPCONFLICTQZrrk: [ 0.00 0.00 ]
+Key: VPCONFLICTQZrrkz: [ 0.00 0.00 ]
+Key: VPDPBSSDSYrm: [ 0.00 0.00 ]
+Key: VPDPBSSDSYrr: [ 0.00 0.00 ]
+Key: VPDPBSSDSZ: [ 0.00 0.00 ]
+Key: VPDPBSSDSZrm: [ 0.00 0.00 ]
+Key: VPDPBSSDSZrmb: [ 0.00 0.00 ]
+Key: VPDPBSSDSZrmbk: [ 0.00 0.00 ]
+Key: VPDPBSSDSZrmbkz: [ 0.00 0.00 ]
+Key: VPDPBSSDSZrmk: [ 0.00 0.00 ]
+Key: VPDPBSSDSZrmkz: [ 0.00 0.00 ]
+Key: VPDPBSSDSZrr: [ 0.00 0.00 ]
+Key: VPDPBSSDSZrrk: [ 0.00 0.00 ]
+Key: VPDPBSSDSZrrkz: [ 0.00 0.00 ]
+Key: VPDPBSSDSrm: [ 0.00 0.00 ]
+Key: VPDPBSSDSrr: [ 0.00 0.00 ]
+Key: VPDPBSSDYrm: [ 0.00 0.00 ]
+Key: VPDPBSSDYrr: [ 0.00 0.00 ]
+Key: VPDPBSSDZ: [ 0.00 0.00 ]
+Key: VPDPBSSDZrm: [ 0.00 0.00 ]
+Key: VPDPBSSDZrmb: [ 0.00 0.00 ]
+Key: VPDPBSSDZrmbk: [ 0.00 0.00 ]
+Key: VPDPBSSDZrmbkz: [ 0.00 0.00 ]
+Key: VPDPBSSDZrmk: [ 0.00 0.00 ]
+Key: VPDPBSSDZrmkz: [ 0.00 0.00 ]
+Key: VPDPBSSDZrr: [ 0.00 0.00 ]
+Key: VPDPBSSDZrrk: [ 0.00 0.00 ]
+Key: VPDPBSSDZrrkz: [ 0.00 0.00 ]
+Key: VPDPBSSDrm: [ 0.00 0.00 ]
+Key: VPDPBSSDrr: [ 0.00 0.00 ]
+Key: VPDPBSUDSYrm: [ 0.00 0.00 ]
+Key: VPDPBSUDSYrr: [ 0.00 0.00 ]
+Key: VPDPBSUDSZ: [ 0.00 0.00 ]
+Key: VPDPBSUDSZrm: [ 0.00 0.00 ]
+Key: VPDPBSUDSZrmb: [ 0.00 0.00 ]
+Key: VPDPBSUDSZrmbk: [ 0.00 0.00 ]
+Key: VPDPBSUDSZrmbkz: [ 0.00 0.00 ]
+Key: VPDPBSUDSZrmk: [ 0.00 0.00 ]
+Key: VPDPBSUDSZrmkz: [ 0.00 0.00 ]
+Key: VPDPBSUDSZrr: [ 0.00 0.00 ]
+Key: VPDPBSUDSZrrk: [ 0.00 0.00 ]
+Key: VPDPBSUDSZrrkz: [ 0.00 0.00 ]
+Key: VPDPBSUDSrm: [ 0.00 0.00 ]
+Key: VPDPBSUDSrr: [ 0.00 0.00 ]
+Key: VPDPBSUDYrm: [ 0.00 0.00 ]
+Key: VPDPBSUDYrr: [ 0.00 0.00 ]
+Key: VPDPBSUDZ: [ 0.00 0.00 ]
+Key: VPDPBSUDZrm: [ 0.00 0.00 ]
+Key: VPDPBSUDZrmb: [ 0.00 0.00 ]
+Key: VPDPBSUDZrmbk: [ 0.00 0.00 ]
+Key: VPDPBSUDZrmbkz: [ 0.00 0.00 ]
+Key: VPDPBSUDZrmk: [ 0.00 0.00 ]
+Key: VPDPBSUDZrmkz: [ 0.00 0.00 ]
+Key: VPDPBSUDZrr: [ 0.00 0.00 ]
+Key: VPDPBSUDZrrk: [ 0.00 0.00 ]
+Key: VPDPBSUDZrrkz: [ 0.00 0.00 ]
+Key: VPDPBSUDrm: [ 0.00 0.00 ]
+Key: VPDPBSUDrr: [ 0.00 0.00 ]
+Key: VPDPBUSDSYrm: [ 0.00 0.00 ]
+Key: VPDPBUSDSYrr: [ 0.00 0.00 ]
+Key: VPDPBUSDSZ: [ 0.00 0.00 ]
+Key: VPDPBUSDSZrm: [ 0.00 0.00 ]
+Key: VPDPBUSDSZrmb: [ 0.00 0.00 ]
+Key: VPDPBUSDSZrmbk: [ 0.00 0.00 ]
+Key: VPDPBUSDSZrmbkz: [ 0.00 0.00 ]
+Key: VPDPBUSDSZrmk: [ 0.00 0.00 ]
+Key: VPDPBUSDSZrmkz: [ 0.00 0.00 ]
+Key: VPDPBUSDSZrr: [ 0.00 0.00 ]
+Key: VPDPBUSDSZrrk: [ 0.00 0.00 ]
+Key: VPDPBUSDSZrrkz: [ 0.00 0.00 ]
+Key: VPDPBUSDSrm: [ 0.00 0.00 ]
+Key: VPDPBUSDSrr: [ 0.00 0.00 ]
+Key: VPDPBUSDYrm: [ 0.00 0.00 ]
+Key: VPDPBUSDYrr: [ 0.00 0.00 ]
+Key: VPDPBUSDZ: [ 0.00 0.00 ]
+Key: VPDPBUSDZrm: [ 0.00 0.00 ]
+Key: VPDPBUSDZrmb: [ 0.00 0.00 ]
+Key: VPDPBUSDZrmbk: [ 0.00 0.00 ]
+Key: VPDPBUSDZrmbkz: [ 0.00 0.00 ]
+Key: VPDPBUSDZrmk: [ 0.00 0.00 ]
+Key: VPDPBUSDZrmkz: [ 0.00 0.00 ]
+Key: VPDPBUSDZrr: [ 0.00 0.00 ]
+Key: VPDPBUSDZrrk: [ 0.00 0.00 ]
+Key: VPDPBUSDZrrkz: [ 0.00 0.00 ]
+Key: VPDPBUSDrm: [ 0.00 0.00 ]
+Key: VPDPBUSDrr: [ 0.00 0.00 ]
+Key: VPDPBUUDSYrm: [ 0.00 0.00 ]
+Key: VPDPBUUDSYrr: [ 0.00 0.00 ]
+Key: VPDPBUUDSZ: [ 0.00 0.00 ]
+Key: VPDPBUUDSZrm: [ 0.00 0.00 ]
+Key: VPDPBUUDSZrmb: [ 0.00 0.00 ]
+Key: VPDPBUUDSZrmbk: [ 0.00 0.00 ]
+Key: VPDPBUUDSZrmbkz: [ 0.00 0.00 ]
+Key: VPDPBUUDSZrmk: [ 0.00 0.00 ]
+Key: VPDPBUUDSZrmkz: [ 0.00 0.00 ]
+Key: VPDPBUUDSZrr: [ 0.00 0.00 ]
+Key: VPDPBUUDSZrrk: [ 0.00 0.00 ]
+Key: VPDPBUUDSZrrkz: [ 0.00 0.00 ]
+Key: VPDPBUUDSrm: [ 0.00 0.00 ]
+Key: VPDPBUUDSrr: [ 0.00 0.00 ]
+Key: VPDPBUUDYrm: [ 0.00 0.00 ]
+Key: VPDPBUUDYrr: [ 0.00 0.00 ]
+Key: VPDPBUUDZ: [ 0.00 0.00 ]
+Key: VPDPBUUDZrm: [ 0.00 0.00 ]
+Key: VPDPBUUDZrmb: [ 0.00 0.00 ]
+Key: VPDPBUUDZrmbk: [ 0.00 0.00 ]
+Key: VPDPBUUDZrmbkz: [ 0.00 0.00 ]
+Key: VPDPBUUDZrmk: [ 0.00 0.00 ]
+Key: VPDPBUUDZrmkz: [ 0.00 0.00 ]
+Key: VPDPBUUDZrr: [ 0.00 0.00 ]
+Key: VPDPBUUDZrrk: [ 0.00 0.00 ]
+Key: VPDPBUUDZrrkz: [ 0.00 0.00 ]
+Key: VPDPBUUDrm: [ 0.00 0.00 ]
+Key: VPDPBUUDrr: [ 0.00 0.00 ]
+Key: VPDPWSSDSYrm: [ 0.00 0.00 ]
+Key: VPDPWSSDSYrr: [ 0.00 0.00 ]
+Key: VPDPWSSDSZ: [ 0.00 0.00 ]
+Key: VPDPWSSDSZrm: [ 0.00 0.00 ]
+Key: VPDPWSSDSZrmb: [ 0.00 0.00 ]
+Key: VPDPWSSDSZrmbk: [ 0.00 0.00 ]
+Key: VPDPWSSDSZrmbkz: [ 0.00 0.00 ]
+Key: VPDPWSSDSZrmk: [ 0.00 0.00 ]
+Key: VPDPWSSDSZrmkz: [ 0.00 0.00 ]
+Key: VPDPWSSDSZrr: [ 0.00 0.00 ]
+Key: VPDPWSSDSZrrk: [ 0.00 0.00 ]
+Key: VPDPWSSDSZrrkz: [ 0.00 0.00 ]
+Key: VPDPWSSDSrm: [ 0.00 0.00 ]
+Key: VPDPWSSDSrr: [ 0.00 0.00 ]
+Key: VPDPWSSDYrm: [ 0.00 0.00 ]
+Key: VPDPWSSDYrr: [ 0.00 0.00 ]
+Key: VPDPWSSDZ: [ 0.00 0.00 ]
+Key: VPDPWSSDZrm: [ 0.00 0.00 ]
+Key: VPDPWSSDZrmb: [ 0.00 0.00 ]
+Key: VPDPWSSDZrmbk: [ 0.00 0.00 ]
+Key: VPDPWSSDZrmbkz: [ 0.00 0.00 ]
+Key: VPDPWSSDZrmk: [ 0.00 0.00 ]
+Key: VPDPWSSDZrmkz: [ 0.00 0.00 ]
+Key: VPDPWSSDZrr: [ 0.00 0.00 ]
+Key: VPDPWSSDZrrk: [ 0.00 0.00 ]
+Key: VPDPWSSDZrrkz: [ 0.00 0.00 ]
+Key: VPDPWSSDrm: [ 0.00 0.00 ]
+Key: VPDPWSSDrr: [ 0.00 0.00 ]
+Key: VPDPWSUDSYrm: [ 0.00 0.00 ]
+Key: VPDPWSUDSYrr: [ 0.00 0.00 ]
+Key: VPDPWSUDSZ: [ 0.00 0.00 ]
+Key: VPDPWSUDSZrm: [ 0.00 0.00 ]
+Key: VPDPWSUDSZrmb: [ 0.00 0.00 ]
+Key: VPDPWSUDSZrmbk: [ 0.00 0.00 ]
+Key: VPDPWSUDSZrmbkz: [ 0.00 0.00 ]
+Key: VPDPWSUDSZrmk: [ 0.00 0.00 ]
+Key: VPDPWSUDSZrmkz: [ 0.00 0.00 ]
+Key: VPDPWSUDSZrr: [ 0.00 0.00 ]
+Key: VPDPWSUDSZrrk: [ 0.00 0.00 ]
+Key: VPDPWSUDSZrrkz: [ 0.00 0.00 ]
+Key: VPDPWSUDSrm: [ 0.00 0.00 ]
+Key: VPDPWSUDSrr: [ 0.00 0.00 ]
+Key: VPDPWSUDYrm: [ 0.00 0.00 ]
+Key: VPDPWSUDYrr: [ 0.00 0.00 ]
+Key: VPDPWSUDZ: [ 0.00 0.00 ]
+Key: VPDPWSUDZrm: [ 0.00 0.00 ]
+Key: VPDPWSUDZrmb: [ 0.00 0.00 ]
+Key: VPDPWSUDZrmbk: [ 0.00 0.00 ]
+Key: VPDPWSUDZrmbkz: [ 0.00 0.00 ]
+Key: VPDPWSUDZrmk: [ 0.00 0.00 ]
+Key: VPDPWSUDZrmkz: [ 0.00 0.00 ]
+Key: VPDPWSUDZrr: [ 0.00 0.00 ]
+Key: VPDPWSUDZrrk: [ 0.00 0.00 ]
+Key: VPDPWSUDZrrkz: [ 0.00 0.00 ]
+Key: VPDPWSUDrm: [ 0.00 0.00 ]
+Key: VPDPWSUDrr: [ 0.00 0.00 ]
+Key: VPDPWUSDSYrm: [ 0.00 0.00 ]
+Key: VPDPWUSDSYrr: [ 0.00 0.00 ]
+Key: VPDPWUSDSZ: [ 0.00 0.00 ]
+Key: VPDPWUSDSZrm: [ 0.00 0.00 ]
+Key: VPDPWUSDSZrmb: [ 0.00 0.00 ]
+Key: VPDPWUSDSZrmbk: [ 0.00 0.00 ]
+Key: VPDPWUSDSZrmbkz: [ 0.00 0.00 ]
+Key: VPDPWUSDSZrmk: [ 0.00 0.00 ]
+Key: VPDPWUSDSZrmkz: [ 0.00 0.00 ]
+Key: VPDPWUSDSZrr: [ 0.00 0.00 ]
+Key: VPDPWUSDSZrrk: [ 0.00 0.00 ]
+Key: VPDPWUSDSZrrkz: [ 0.00 0.00 ]
+Key: VPDPWUSDSrm: [ 0.00 0.00 ]
+Key: VPDPWUSDSrr: [ 0.00 0.00 ]
+Key: VPDPWUSDYrm: [ 0.00 0.00 ]
+Key: VPDPWUSDYrr: [ 0.00 0.00 ]
+Key: VPDPWUSDZ: [ 0.00 0.00 ]
+Key: VPDPWUSDZrm: [ 0.00 0.00 ]
+Key: VPDPWUSDZrmb: [ 0.00 0.00 ]
+Key: VPDPWUSDZrmbk: [ 0.00 0.00 ]
+Key: VPDPWUSDZrmbkz: [ 0.00 0.00 ]
+Key: VPDPWUSDZrmk: [ 0.00 0.00 ]
+Key: VPDPWUSDZrmkz: [ 0.00 0.00 ]
+Key: VPDPWUSDZrr: [ 0.00 0.00 ]
+Key: VPDPWUSDZrrk: [ 0.00 0.00 ]
+Key: VPDPWUSDZrrkz: [ 0.00 0.00 ]
+Key: VPDPWUSDrm: [ 0.00 0.00 ]
+Key: VPDPWUSDrr: [ 0.00 0.00 ]
+Key: VPDPWUUDSYrm: [ 0.00 0.00 ]
+Key: VPDPWUUDSYrr: [ 0.00 0.00 ]
+Key: VPDPWUUDSZ: [ 0.00 0.00 ]
+Key: VPDPWUUDSZrm: [ 0.00 0.00 ]
+Key: VPDPWUUDSZrmb: [ 0.00 0.00 ]
+Key: VPDPWUUDSZrmbk: [ 0.00 0.00 ]
+Key: VPDPWUUDSZrmbkz: [ 0.00 0.00 ]
+Key: VPDPWUUDSZrmk: [ 0.00 0.00 ]
+Key: VPDPWUUDSZrmkz: [ 0.00 0.00 ]
+Key: VPDPWUUDSZrr: [ 0.00 0.00 ]
+Key: VPDPWUUDSZrrk: [ 0.00 0.00 ]
+Key: VPDPWUUDSZrrkz: [ 0.00 0.00 ]
+Key: VPDPWUUDSrm: [ 0.00 0.00 ]
+Key: VPDPWUUDSrr: [ 0.00 0.00 ]
+Key: VPDPWUUDYrm: [ 0.00 0.00 ]
+Key: VPDPWUUDYrr: [ 0.00 0.00 ]
+Key: VPDPWUUDZ: [ 0.00 0.00 ]
+Key: VPDPWUUDZrm: [ 0.00 0.00 ]
+Key: VPDPWUUDZrmb: [ 0.00 0.00 ]
+Key: VPDPWUUDZrmbk: [ 0.00 0.00 ]
+Key: VPDPWUUDZrmbkz: [ 0.00 0.00 ]
+Key: VPDPWUUDZrmk: [ 0.00 0.00 ]
+Key: VPDPWUUDZrmkz: [ 0.00 0.00 ]
+Key: VPDPWUUDZrr: [ 0.00 0.00 ]
+Key: VPDPWUUDZrrk: [ 0.00 0.00 ]
+Key: VPDPWUUDZrrkz: [ 0.00 0.00 ]
+Key: VPDPWUUDrm: [ 0.00 0.00 ]
+Key: VPDPWUUDrr: [ 0.00 0.00 ]
+Key: VPERM: [ 0.00 0.00 ]
+Key: VPERMBZ: [ 0.00 0.00 ]
+Key: VPERMBZrm: [ 0.00 0.00 ]
+Key: VPERMBZrmk: [ 0.00 0.00 ]
+Key: VPERMBZrmkz: [ 0.00 0.00 ]
+Key: VPERMBZrr: [ 0.00 0.00 ]
+Key: VPERMBZrrk: [ 0.00 0.00 ]
+Key: VPERMBZrrkz: [ 0.00 0.00 ]
+Key: VPERMDYrm: [ 0.00 0.00 ]
+Key: VPERMDYrr: [ 0.00 0.00 ]
+Key: VPERMDZ: [ 0.00 0.00 ]
+Key: VPERMDZrm: [ 0.00 0.00 ]
+Key: VPERMDZrmb: [ 0.00 0.00 ]
+Key: VPERMDZrmbk: [ 0.00 0.00 ]
+Key: VPERMDZrmbkz: [ 0.00 0.00 ]
+Key: VPERMDZrmk: [ 0.00 0.00 ]
+Key: VPERMDZrmkz: [ 0.00 0.00 ]
+Key: VPERMDZrr: [ 0.00 0.00 ]
+Key: VPERMDZrrk: [ 0.00 0.00 ]
+Key: VPERMDZrrkz: [ 0.00 0.00 ]
+Key: VPERMI: [ 0.00 0.00 ]
+Key: VPERMIL: [ 0.00 0.00 ]
+Key: VPERMILPDYmi: [ 0.00 0.00 ]
+Key: VPERMILPDYri: [ 0.00 0.00 ]
+Key: VPERMILPDYrm: [ 0.00 0.00 ]
+Key: VPERMILPDYrr: [ 0.00 0.00 ]
+Key: VPERMILPDZ: [ 0.00 0.00 ]
+Key: VPERMILPDZmbi: [ 0.00 0.00 ]
+Key: VPERMILPDZmbik: [ 0.00 0.00 ]
+Key: VPERMILPDZmbikz: [ 0.00 0.00 ]
+Key: VPERMILPDZmi: [ 0.00 0.00 ]
+Key: VPERMILPDZmik: [ 0.00 0.00 ]
+Key: VPERMILPDZmikz: [ 0.00 0.00 ]
+Key: VPERMILPDZri: [ 0.00 0.00 ]
+Key: VPERMILPDZrik: [ 0.00 0.00 ]
+Key: VPERMILPDZrikz: [ 0.00 0.00 ]
+Key: VPERMILPDZrm: [ 0.00 0.00 ]
+Key: VPERMILPDZrmb: [ 0.00 0.00 ]
+Key: VPERMILPDZrmbk: [ 0.00 0.00 ]
+Key: VPERMILPDZrmbkz: [ 0.00 0.00 ]
+Key: VPERMILPDZrmk: [ 0.00 0.00 ]
+Key: VPERMILPDZrmkz: [ 0.00 0.00 ]
+Key: VPERMILPDZrr: [ 0.00 0.00 ]
+Key: VPERMILPDZrrk: [ 0.00 0.00 ]
+Key: VPERMILPDZrrkz: [ 0.00 0.00 ]
+Key: VPERMILPDmi: [ 0.00 0.00 ]
+Key: VPERMILPDri: [ 0.00 0.00 ]
+Key: VPERMILPDrm: [ 0.00 0.00 ]
+Key: VPERMILPDrr: [ 0.00 0.00 ]
+Key: VPERMILPSYmi: [ 0.00 0.00 ]
+Key: VPERMILPSYri: [ 0.00 0.00 ]
+Key: VPERMILPSYrm: [ 0.00 0.00 ]
+Key: VPERMILPSYrr: [ 0.00 0.00 ]
+Key: VPERMILPSZ: [ 0.00 0.00 ]
+Key: VPERMILPSZmbi: [ 0.00 0.00 ]
+Key: VPERMILPSZmbik: [ 0.00 0.00 ]
+Key: VPERMILPSZmbikz: [ 0.00 0.00 ]
+Key: VPERMILPSZmi: [ 0.00 0.00 ]
+Key: VPERMILPSZmik: [ 0.00 0.00 ]
+Key: VPERMILPSZmikz: [ 0.00 0.00 ]
+Key: VPERMILPSZri: [ 0.00 0.00 ]
+Key: VPERMILPSZrik: [ 0.00 0.00 ]
+Key: VPERMILPSZrikz: [ 0.00 0.00 ]
+Key: VPERMILPSZrm: [ 0.00 0.00 ]
+Key: VPERMILPSZrmb: [ 0.00 0.00 ]
+Key: VPERMILPSZrmbk: [ 0.00 0.00 ]
+Key: VPERMILPSZrmbkz: [ 0.00 0.00 ]
+Key: VPERMILPSZrmk: [ 0.00 0.00 ]
+Key: VPERMILPSZrmkz: [ 0.00 0.00 ]
+Key: VPERMILPSZrr: [ 0.00 0.00 ]
+Key: VPERMILPSZrrk: [ 0.00 0.00 ]
+Key: VPERMILPSZrrkz: [ 0.00 0.00 ]
+Key: VPERMILPSmi: [ 0.00 0.00 ]
+Key: VPERMILPSri: [ 0.00 0.00 ]
+Key: VPERMILPSrm: [ 0.00 0.00 ]
+Key: VPERMILPSrr: [ 0.00 0.00 ]
+Key: VPERMPDYmi: [ 0.00 0.00 ]
+Key: VPERMPDYri: [ 0.00 0.00 ]
+Key: VPERMPDZ: [ 0.00 0.00 ]
+Key: VPERMPDZmbi: [ 0.00 0.00 ]
+Key: VPERMPDZmbik: [ 0.00 0.00 ]
+Key: VPERMPDZmbikz: [ 0.00 0.00 ]
+Key: VPERMPDZmi: [ 0.00 0.00 ]
+Key: VPERMPDZmik: [ 0.00 0.00 ]
+Key: VPERMPDZmikz: [ 0.00 0.00 ]
+Key: VPERMPDZri: [ 0.00 0.00 ]
+Key: VPERMPDZrik: [ 0.00 0.00 ]
+Key: VPERMPDZrikz: [ 0.00 0.00 ]
+Key: VPERMPDZrm: [ 0.00 0.00 ]
+Key: VPERMPDZrmb: [ 0.00 0.00 ]
+Key: VPERMPDZrmbk: [ 0.00 0.00 ]
+Key: VPERMPDZrmbkz: [ 0.00 0.00 ]
+Key: VPERMPDZrmk: [ 0.00 0.00 ]
+Key: VPERMPDZrmkz: [ 0.00 0.00 ]
+Key: VPERMPDZrr: [ 0.00 0.00 ]
+Key: VPERMPDZrrk: [ 0.00 0.00 ]
+Key: VPERMPDZrrkz: [ 0.00 0.00 ]
+Key: VPERMPSYrm: [ 0.00 0.00 ]
+Key: VPERMPSYrr: [ 0.00 0.00 ]
+Key: VPERMPSZ: [ 0.00 0.00 ]
+Key: VPERMPSZrm: [ 0.00 0.00 ]
+Key: VPERMPSZrmb: [ 0.00 0.00 ]
+Key: VPERMPSZrmbk: [ 0.00 0.00 ]
+Key: VPERMPSZrmbkz: [ 0.00 0.00 ]
+Key: VPERMPSZrmk: [ 0.00 0.00 ]
+Key: VPERMPSZrmkz: [ 0.00 0.00 ]
+Key: VPERMPSZrr: [ 0.00 0.00 ]
+Key: VPERMPSZrrk: [ 0.00 0.00 ]
+Key: VPERMPSZrrkz: [ 0.00 0.00 ]
+Key: VPERMQYmi: [ 0.00 0.00 ]
+Key: VPERMQYri: [ 0.00 0.00 ]
+Key: VPERMQZ: [ 0.00 0.00 ]
+Key: VPERMQZmbi: [ 0.00 0.00 ]
+Key: VPERMQZmbik: [ 0.00 0.00 ]
+Key: VPERMQZmbikz: [ 0.00 0.00 ]
+Key: VPERMQZmi: [ 0.00 0.00 ]
+Key: VPERMQZmik: [ 0.00 0.00 ]
+Key: VPERMQZmikz: [ 0.00 0.00 ]
+Key: VPERMQZri: [ 0.00 0.00 ]
+Key: VPERMQZrik: [ 0.00 0.00 ]
+Key: VPERMQZrikz: [ 0.00 0.00 ]
+Key: VPERMQZrm: [ 0.00 0.00 ]
+Key: VPERMQZrmb: [ 0.00 0.00 ]
+Key: VPERMQZrmbk: [ 0.00 0.00 ]
+Key: VPERMQZrmbkz: [ 0.00 0.00 ]
+Key: VPERMQZrmk: [ 0.00 0.00 ]
+Key: VPERMQZrmkz: [ 0.00 0.00 ]
+Key: VPERMQZrr: [ 0.00 0.00 ]
+Key: VPERMQZrrk: [ 0.00 0.00 ]
+Key: VPERMQZrrkz: [ 0.00 0.00 ]
+Key: VPERMT: [ 0.00 0.00 ]
+Key: VPERMWZ: [ 0.00 0.00 ]
+Key: VPERMWZrm: [ 0.00 0.00 ]
+Key: VPERMWZrmk: [ 0.00 0.00 ]
+Key: VPERMWZrmkz: [ 0.00 0.00 ]
+Key: VPERMWZrr: [ 0.00 0.00 ]
+Key: VPERMWZrrk: [ 0.00 0.00 ]
+Key: VPERMWZrrkz: [ 0.00 0.00 ]
+Key: VPEXPANDBZ: [ 0.00 0.00 ]
+Key: VPEXPANDBZrm: [ 0.00 0.00 ]
+Key: VPEXPANDBZrmk: [ 0.00 0.00 ]
+Key: VPEXPANDBZrmkz: [ 0.00 0.00 ]
+Key: VPEXPANDBZrr: [ 0.00 0.00 ]
+Key: VPEXPANDBZrrk: [ 0.00 0.00 ]
+Key: VPEXPANDBZrrkz: [ 0.00 0.00 ]
+Key: VPEXPANDDZ: [ 0.00 0.00 ]
+Key: VPEXPANDDZrm: [ 0.00 0.00 ]
+Key: VPEXPANDDZrmk: [ 0.00 0.00 ]
+Key: VPEXPANDDZrmkz: [ 0.00 0.00 ]
+Key: VPEXPANDDZrr: [ 0.00 0.00 ]
+Key: VPEXPANDDZrrk: [ 0.00 0.00 ]
+Key: VPEXPANDDZrrkz: [ 0.00 0.00 ]
+Key: VPEXPANDQZ: [ 0.00 0.00 ]
+Key: VPEXPANDQZrm: [ 0.00 0.00 ]
+Key: VPEXPANDQZrmk: [ 0.00 0.00 ]
+Key: VPEXPANDQZrmkz: [ 0.00 0.00 ]
+Key: VPEXPANDQZrr: [ 0.00 0.00 ]
+Key: VPEXPANDQZrrk: [ 0.00 0.00 ]
+Key: VPEXPANDQZrrkz: [ 0.00 0.00 ]
+Key: VPEXPANDWZ: [ 0.00 0.00 ]
+Key: VPEXPANDWZrm: [ 0.00 0.00 ]
+Key: VPEXPANDWZrmk: [ 0.00 0.00 ]
+Key: VPEXPANDWZrmkz: [ 0.00 0.00 ]
+Key: VPEXPANDWZrr: [ 0.00 0.00 ]
+Key: VPEXPANDWZrrk: [ 0.00 0.00 ]
+Key: VPEXPANDWZrrkz: [ 0.00 0.00 ]
+Key: VPEXTRBZmri: [ 0.00 0.00 ]
+Key: VPEXTRBZrri: [ 0.00 0.00 ]
+Key: VPEXTRBmri: [ 0.00 0.00 ]
+Key: VPEXTRBrri: [ 0.00 0.00 ]
+Key: VPEXTRDZmri: [ 0.00 0.00 ]
+Key: VPEXTRDZrri: [ 0.00 0.00 ]
+Key: VPEXTRDmri: [ 0.00 0.00 ]
+Key: VPEXTRDrri: [ 0.00 0.00 ]
+Key: VPEXTRQZmri: [ 0.00 0.00 ]
+Key: VPEXTRQZrri: [ 0.00 0.00 ]
+Key: VPEXTRQmri: [ 0.00 0.00 ]
+Key: VPEXTRQrri: [ 0.00 0.00 ]
+Key: VPEXTRWZmri: [ 0.00 0.00 ]
+Key: VPEXTRWZrri: [ 0.00 0.00 ]
+Key: VPEXTRWZrri_REV: [ 0.00 0.00 ]
+Key: VPEXTRWmri: [ 0.00 0.00 ]
+Key: VPEXTRWrri: [ 0.00 0.00 ]
+Key: VPEXTRWrri_REV: [ 0.00 0.00 ]
+Key: VPGATHERDDYrm: [ 0.00 0.00 ]
+Key: VPGATHERDDZ: [ 0.00 0.00 ]
+Key: VPGATHERDDZrm: [ 0.00 0.00 ]
+Key: VPGATHERDDrm: [ 0.00 0.00 ]
+Key: VPGATHERDQYrm: [ 0.00 0.00 ]
+Key: VPGATHERDQZ: [ 0.00 0.00 ]
+Key: VPGATHERDQZrm: [ 0.00 0.00 ]
+Key: VPGATHERDQrm: [ 0.00 0.00 ]
+Key: VPGATHERQDYrm: [ 0.00 0.00 ]
+Key: VPGATHERQDZ: [ 0.00 0.00 ]
+Key: VPGATHERQDZrm: [ 0.00 0.00 ]
+Key: VPGATHERQDrm: [ 0.00 0.00 ]
+Key: VPGATHERQQYrm: [ 0.00 0.00 ]
+Key: VPGATHERQQZ: [ 0.00 0.00 ]
+Key: VPGATHERQQZrm: [ 0.00 0.00 ]
+Key: VPGATHERQQrm: [ 0.00 0.00 ]
+Key: VPHADDBDrm: [ 0.00 0.00 ]
+Key: VPHADDBDrr: [ 0.00 0.00 ]
+Key: VPHADDBQrm: [ 0.00 0.00 ]
+Key: VPHADDBQrr: [ 0.00 0.00 ]
+Key: VPHADDBWrm: [ 0.00 0.00 ]
+Key: VPHADDBWrr: [ 0.00 0.00 ]
+Key: VPHADDDQrm: [ 0.00 0.00 ]
+Key: VPHADDDQrr: [ 0.00 0.00 ]
+Key: VPHADDDYrm: [ 0.00 0.00 ]
+Key: VPHADDDYrr: [ 0.00 0.00 ]
+Key: VPHADDDrm: [ 0.00 0.00 ]
+Key: VPHADDDrr: [ 0.00 0.00 ]
+Key: VPHADDSWYrm: [ 0.00 0.00 ]
+Key: VPHADDSWYrr: [ 0.00 0.00 ]
+Key: VPHADDSWrm: [ 0.00 0.00 ]
+Key: VPHADDSWrr: [ 0.00 0.00 ]
+Key: VPHADDUBDrm: [ 0.00 0.00 ]
+Key: VPHADDUBDrr: [ 0.00 0.00 ]
+Key: VPHADDUBQrm: [ 0.00 0.00 ]
+Key: VPHADDUBQrr: [ 0.00 0.00 ]
+Key: VPHADDUBWrm: [ 0.00 0.00 ]
+Key: VPHADDUBWrr: [ 0.00 0.00 ]
+Key: VPHADDUDQrm: [ 0.00 0.00 ]
+Key: VPHADDUDQrr: [ 0.00 0.00 ]
+Key: VPHADDUWDrm: [ 0.00 0.00 ]
+Key: VPHADDUWDrr: [ 0.00 0.00 ]
+Key: VPHADDUWQrm: [ 0.00 0.00 ]
+Key: VPHADDUWQrr: [ 0.00 0.00 ]
+Key: VPHADDWDrm: [ 0.00 0.00 ]
+Key: VPHADDWDrr: [ 0.00 0.00 ]
+Key: VPHADDWQrm: [ 0.00 0.00 ]
+Key: VPHADDWQrr: [ 0.00 0.00 ]
+Key: VPHADDWYrm: [ 0.00 0.00 ]
+Key: VPHADDWYrr: [ 0.00 0.00 ]
+Key: VPHADDWrm: [ 0.00 0.00 ]
+Key: VPHADDWrr: [ 0.00 0.00 ]
+Key: VPHMINPOSUWrm: [ 0.00 0.00 ]
+Key: VPHMINPOSUWrr: [ 0.00 0.00 ]
+Key: VPHSUBBWrm: [ 0.00 0.00 ]
+Key: VPHSUBBWrr: [ 0.00 0.00 ]
+Key: VPHSUBDQrm: [ 0.00 0.00 ]
+Key: VPHSUBDQrr: [ 0.00 0.00 ]
+Key: VPHSUBDYrm: [ 0.00 0.00 ]
+Key: VPHSUBDYrr: [ 0.00 0.00 ]
+Key: VPHSUBDrm: [ 0.00 0.00 ]
+Key: VPHSUBDrr: [ 0.00 0.00 ]
+Key: VPHSUBSWYrm: [ 0.00 0.00 ]
+Key: VPHSUBSWYrr: [ 0.00 0.00 ]
+Key: VPHSUBSWrm: [ 0.00 0.00 ]
+Key: VPHSUBSWrr: [ 0.00 0.00 ]
+Key: VPHSUBWDrm: [ 0.00 0.00 ]
+Key: VPHSUBWDrr: [ 0.00 0.00 ]
+Key: VPHSUBWYrm: [ 0.00 0.00 ]
+Key: VPHSUBWYrr: [ 0.00 0.00 ]
+Key: VPHSUBWrm: [ 0.00 0.00 ]
+Key: VPHSUBWrr: [ 0.00 0.00 ]
+Key: VPINSRBZrmi: [ 0.00 0.00 ]
+Key: VPINSRBZrri: [ 0.00 0.00 ]
+Key: VPINSRBrmi: [ 0.00 0.00 ]
+Key: VPINSRBrri: [ 0.00 0.00 ]
+Key: VPINSRDZrmi: [ 0.00 0.00 ]
+Key: VPINSRDZrri: [ 0.00 0.00 ]
+Key: VPINSRDrmi: [ 0.00 0.00 ]
+Key: VPINSRDrri: [ 0.00 0.00 ]
+Key: VPINSRQZrmi: [ 0.00 0.00 ]
+Key: VPINSRQZrri: [ 0.00 0.00 ]
+Key: VPINSRQrmi: [ 0.00 0.00 ]
+Key: VPINSRQrri: [ 0.00 0.00 ]
+Key: VPINSRWZrmi: [ 0.00 0.00 ]
+Key: VPINSRWZrri: [ 0.00 0.00 ]
+Key: VPINSRWrmi: [ 0.00 0.00 ]
+Key: VPINSRWrri: [ 0.00 0.00 ]
+Key: VPLZCNTDZ: [ 0.00 0.00 ]
+Key: VPLZCNTDZrm: [ 0.00 0.00 ]
+Key: VPLZCNTDZrmb: [ 0.00 0.00 ]
+Key: VPLZCNTDZrmbk: [ 0.00 0.00 ]
+Key: VPLZCNTDZrmbkz: [ 0.00 0.00 ]
+Key: VPLZCNTDZrmk: [ 0.00 0.00 ]
+Key: VPLZCNTDZrmkz: [ 0.00 0.00 ]
+Key: VPLZCNTDZrr: [ 0.00 0.00 ]
+Key: VPLZCNTDZrrk: [ 0.00 0.00 ]
+Key: VPLZCNTDZrrkz: [ 0.00 0.00 ]
+Key: VPLZCNTQZ: [ 0.00 0.00 ]
+Key: VPLZCNTQZrm: [ 0.00 0.00 ]
+Key: VPLZCNTQZrmb: [ 0.00 0.00 ]
+Key: VPLZCNTQZrmbk: [ 0.00 0.00 ]
+Key: VPLZCNTQZrmbkz: [ 0.00 0.00 ]
+Key: VPLZCNTQZrmk: [ 0.00 0.00 ]
+Key: VPLZCNTQZrmkz: [ 0.00 0.00 ]
+Key: VPLZCNTQZrr: [ 0.00 0.00 ]
+Key: VPLZCNTQZrrk: [ 0.00 0.00 ]
+Key: VPLZCNTQZrrkz: [ 0.00 0.00 ]
+Key: VPMACSDDrm: [ 0.00 0.00 ]
+Key: VPMACSDDrr: [ 0.00 0.00 ]
+Key: VPMACSDQHrm: [ 0.00 0.00 ]
+Key: VPMACSDQHrr: [ 0.00 0.00 ]
+Key: VPMACSDQLrm: [ 0.00 0.00 ]
+Key: VPMACSDQLrr: [ 0.00 0.00 ]
+Key: VPMACSSDDrm: [ 0.00 0.00 ]
+Key: VPMACSSDDrr: [ 0.00 0.00 ]
+Key: VPMACSSDQHrm: [ 0.00 0.00 ]
+Key: VPMACSSDQHrr: [ 0.00 0.00 ]
+Key: VPMACSSDQLrm: [ 0.00 0.00 ]
+Key: VPMACSSDQLrr: [ 0.00 0.00 ]
+Key: VPMACSSWDrm: [ 0.00 0.00 ]
+Key: VPMACSSWDrr: [ 0.00 0.00 ]
+Key: VPMACSSWWrm: [ 0.00 0.00 ]
+Key: VPMACSSWWrr: [ 0.00 0.00 ]
+Key: VPMACSWDrm: [ 0.00 0.00 ]
+Key: VPMACSWDrr: [ 0.00 0.00 ]
+Key: VPMACSWWrm: [ 0.00 0.00 ]
+Key: VPMACSWWrr: [ 0.00 0.00 ]
+Key: VPMADCSSWDrm: [ 0.00 0.00 ]
+Key: VPMADCSSWDrr: [ 0.00 0.00 ]
+Key: VPMADCSWDrm: [ 0.00 0.00 ]
+Key: VPMADCSWDrr: [ 0.00 0.00 ]
+Key: VPMADD: [ 0.00 0.00 ]
+Key: VPMADDUBSWYrm: [ 0.00 0.00 ]
+Key: VPMADDUBSWYrr: [ 0.00 0.00 ]
+Key: VPMADDUBSWZ: [ 0.00 0.00 ]
+Key: VPMADDUBSWZrm: [ 0.00 0.00 ]
+Key: VPMADDUBSWZrmk: [ 0.00 0.00 ]
+Key: VPMADDUBSWZrmkz: [ 0.00 0.00 ]
+Key: VPMADDUBSWZrr: [ 0.00 0.00 ]
+Key: VPMADDUBSWZrrk: [ 0.00 0.00 ]
+Key: VPMADDUBSWZrrkz: [ 0.00 0.00 ]
+Key: VPMADDUBSWrm: [ 0.00 0.00 ]
+Key: VPMADDUBSWrr: [ 0.00 0.00 ]
+Key: VPMADDWDYrm: [ 0.00 0.00 ]
+Key: VPMADDWDYrr: [ 0.00 0.00 ]
+Key: VPMADDWDZ: [ 0.00 0.00 ]
+Key: VPMADDWDZrm: [ 0.00 0.00 ]
+Key: VPMADDWDZrmk: [ 0.00 0.00 ]
+Key: VPMADDWDZrmkz: [ 0.00 0.00 ]
+Key: VPMADDWDZrr: [ 0.00 0.00 ]
+Key: VPMADDWDZrrk: [ 0.00 0.00 ]
+Key: VPMADDWDZrrkz: [ 0.00 0.00 ]
+Key: VPMADDWDrm: [ 0.00 0.00 ]
+Key: VPMADDWDrr: [ 0.00 0.00 ]
+Key: VPMASKMOVDYmr: [ 0.00 0.00 ]
+Key: VPMASKMOVDYrm: [ 0.00 0.00 ]
+Key: VPMASKMOVDmr: [ 0.00 0.00 ]
+Key: VPMASKMOVDrm: [ 0.00 0.00 ]
+Key: VPMASKMOVQYmr: [ 0.00 0.00 ]
+Key: VPMASKMOVQYrm: [ 0.00 0.00 ]
+Key: VPMASKMOVQmr: [ 0.00 0.00 ]
+Key: VPMASKMOVQrm: [ 0.00 0.00 ]
+Key: VPMAXSBYrm: [ 0.00 0.00 ]
+Key: VPMAXSBYrr: [ 0.00 0.00 ]
+Key: VPMAXSBZ: [ 0.00 0.00 ]
+Key: VPMAXSBZrm: [ 0.00 0.00 ]
+Key: VPMAXSBZrmk: [ 0.00 0.00 ]
+Key: VPMAXSBZrmkz: [ 0.00 0.00 ]
+Key: VPMAXSBZrr: [ 0.00 0.00 ]
+Key: VPMAXSBZrrk: [ 0.00 0.00 ]
+Key: VPMAXSBZrrkz: [ 0.00 0.00 ]
+Key: VPMAXSBrm: [ 0.00 0.00 ]
+Key: VPMAXSBrr: [ 0.00 0.00 ]
+Key: VPMAXSDYrm: [ 0.00 0.00 ]
+Key: VPMAXSDYrr: [ 0.00 0.00 ]
+Key: VPMAXSDZ: [ 0.00 0.00 ]
+Key: VPMAXSDZrm: [ 0.00 0.00 ]
+Key: VPMAXSDZrmb: [ 0.00 0.00 ]
+Key: VPMAXSDZrmbk: [ 0.00 0.00 ]
+Key: VPMAXSDZrmbkz: [ 0.00 0.00 ]
+Key: VPMAXSDZrmk: [ 0.00 0.00 ]
+Key: VPMAXSDZrmkz: [ 0.00 0.00 ]
+Key: VPMAXSDZrr: [ 0.00 0.00 ]
+Key: VPMAXSDZrrk: [ 0.00 0.00 ]
+Key: VPMAXSDZrrkz: [ 0.00 0.00 ]
+Key: VPMAXSDrm: [ 0.00 0.00 ]
+Key: VPMAXSDrr: [ 0.00 0.00 ]
+Key: VPMAXSQZ: [ 0.00 0.00 ]
+Key: VPMAXSQZrm: [ 0.00 0.00 ]
+Key: VPMAXSQZrmb: [ 0.00 0.00 ]
+Key: VPMAXSQZrmbk: [ 0.00 0.00 ]
+Key: VPMAXSQZrmbkz: [ 0.00 0.00 ]
+Key: VPMAXSQZrmk: [ 0.00 0.00 ]
+Key: VPMAXSQZrmkz: [ 0.00 0.00 ]
+Key: VPMAXSQZrr: [ 0.00 0.00 ]
+Key: VPMAXSQZrrk: [ 0.00 0.00 ]
+Key: VPMAXSQZrrkz: [ 0.00 0.00 ]
+Key: VPMAXSWYrm: [ 0.00 0.00 ]
+Key: VPMAXSWYrr: [ 0.00 0.00 ]
+Key: VPMAXSWZ: [ 0.00 0.00 ]
+Key: VPMAXSWZrm: [ 0.00 0.00 ]
+Key: VPMAXSWZrmk: [ 0.00 0.00 ]
+Key: VPMAXSWZrmkz: [ 0.00 0.00 ]
+Key: VPMAXSWZrr: [ 0.00 0.00 ]
+Key: VPMAXSWZrrk: [ 0.00 0.00 ]
+Key: VPMAXSWZrrkz: [ 0.00 0.00 ]
+Key: VPMAXSWrm: [ 0.00 0.00 ]
+Key: VPMAXSWrr: [ 0.00 0.00 ]
+Key: VPMAXUBYrm: [ 0.00 0.00 ]
+Key: VPMAXUBYrr: [ 0.00 0.00 ]
+Key: VPMAXUBZ: [ 0.00 0.00 ]
+Key: VPMAXUBZrm: [ 0.00 0.00 ]
+Key: VPMAXUBZrmk: [ 0.00 0.00 ]
+Key: VPMAXUBZrmkz: [ 0.00 0.00 ]
+Key: VPMAXUBZrr: [ 0.00 0.00 ]
+Key: VPMAXUBZrrk: [ 0.00 0.00 ]
+Key: VPMAXUBZrrkz: [ 0.00 0.00 ]
+Key: VPMAXUBrm: [ 0.00 0.00 ]
+Key: VPMAXUBrr: [ 0.00 0.00 ]
+Key: VPMAXUDYrm: [ 0.00 0.00 ]
+Key: VPMAXUDYrr: [ 0.00 0.00 ]
+Key: VPMAXUDZ: [ 0.00 0.00 ]
+Key: VPMAXUDZrm: [ 0.00 0.00 ]
+Key: VPMAXUDZrmb: [ 0.00 0.00 ]
+Key: VPMAXUDZrmbk: [ 0.00 0.00 ]
+Key: VPMAXUDZrmbkz: [ 0.00 0.00 ]
+Key: VPMAXUDZrmk: [ 0.00 0.00 ]
+Key: VPMAXUDZrmkz: [ 0.00 0.00 ]
+Key: VPMAXUDZrr: [ 0.00 0.00 ]
+Key: VPMAXUDZrrk: [ 0.00 0.00 ]
+Key: VPMAXUDZrrkz: [ 0.00 0.00 ]
+Key: VPMAXUDrm: [ 0.00 0.00 ]
+Key: VPMAXUDrr: [ 0.00 0.00 ]
+Key: VPMAXUQZ: [ 0.00 0.00 ]
+Key: VPMAXUQZrm: [ 0.00 0.00 ]
+Key: VPMAXUQZrmb: [ 0.00 0.00 ]
+Key: VPMAXUQZrmbk: [ 0.00 0.00 ]
+Key: VPMAXUQZrmbkz: [ 0.00 0.00 ]
+Key: VPMAXUQZrmk: [ 0.00 0.00 ]
+Key: VPMAXUQZrmkz: [ 0.00 0.00 ]
+Key: VPMAXUQZrr: [ 0.00 0.00 ]
+Key: VPMAXUQZrrk: [ 0.00 0.00 ]
+Key: VPMAXUQZrrkz: [ 0.00 0.00 ]
+Key: VPMAXUWYrm: [ 0.00 0.00 ]
+Key: VPMAXUWYrr: [ 0.00 0.00 ]
+Key: VPMAXUWZ: [ 0.00 0.00 ]
+Key: VPMAXUWZrm: [ 0.00 0.00 ]
+Key: VPMAXUWZrmk: [ 0.00 0.00 ]
+Key: VPMAXUWZrmkz: [ 0.00 0.00 ]
+Key: VPMAXUWZrr: [ 0.00 0.00 ]
+Key: VPMAXUWZrrk: [ 0.00 0.00 ]
+Key: VPMAXUWZrrkz: [ 0.00 0.00 ]
+Key: VPMAXUWrm: [ 0.00 0.00 ]
+Key: VPMAXUWrr: [ 0.00 0.00 ]
+Key: VPMINSBYrm: [ 0.00 0.00 ]
+Key: VPMINSBYrr: [ 0.00 0.00 ]
+Key: VPMINSBZ: [ 0.00 0.00 ]
+Key: VPMINSBZrm: [ 0.00 0.00 ]
+Key: VPMINSBZrmk: [ 0.00 0.00 ]
+Key: VPMINSBZrmkz: [ 0.00 0.00 ]
+Key: VPMINSBZrr: [ 0.00 0.00 ]
+Key: VPMINSBZrrk: [ 0.00 0.00 ]
+Key: VPMINSBZrrkz: [ 0.00 0.00 ]
+Key: VPMINSBrm: [ 0.00 0.00 ]
+Key: VPMINSBrr: [ 0.00 0.00 ]
+Key: VPMINSDYrm: [ 0.00 0.00 ]
+Key: VPMINSDYrr: [ 0.00 0.00 ]
+Key: VPMINSDZ: [ 0.00 0.00 ]
+Key: VPMINSDZrm: [ 0.00 0.00 ]
+Key: VPMINSDZrmb: [ 0.00 0.00 ]
+Key: VPMINSDZrmbk: [ 0.00 0.00 ]
+Key: VPMINSDZrmbkz: [ 0.00 0.00 ]
+Key: VPMINSDZrmk: [ 0.00 0.00 ]
+Key: VPMINSDZrmkz: [ 0.00 0.00 ]
+Key: VPMINSDZrr: [ 0.00 0.00 ]
+Key: VPMINSDZrrk: [ 0.00 0.00 ]
+Key: VPMINSDZrrkz: [ 0.00 0.00 ]
+Key: VPMINSDrm: [ 0.00 0.00 ]
+Key: VPMINSDrr: [ 0.00 0.00 ]
+Key: VPMINSQZ: [ 0.00 0.00 ]
+Key: VPMINSQZrm: [ 0.00 0.00 ]
+Key: VPMINSQZrmb: [ 0.00 0.00 ]
+Key: VPMINSQZrmbk: [ 0.00 0.00 ]
+Key: VPMINSQZrmbkz: [ 0.00 0.00 ]
+Key: VPMINSQZrmk: [ 0.00 0.00 ]
+Key: VPMINSQZrmkz: [ 0.00 0.00 ]
+Key: VPMINSQZrr: [ 0.00 0.00 ]
+Key: VPMINSQZrrk: [ 0.00 0.00 ]
+Key: VPMINSQZrrkz: [ 0.00 0.00 ]
+Key: VPMINSWYrm: [ 0.00 0.00 ]
+Key: VPMINSWYrr: [ 0.00 0.00 ]
+Key: VPMINSWZ: [ 0.00 0.00 ]
+Key: VPMINSWZrm: [ 0.00 0.00 ]
+Key: VPMINSWZrmk: [ 0.00 0.00 ]
+Key: VPMINSWZrmkz: [ 0.00 0.00 ]
+Key: VPMINSWZrr: [ 0.00 0.00 ]
+Key: VPMINSWZrrk: [ 0.00 0.00 ]
+Key: VPMINSWZrrkz: [ 0.00 0.00 ]
+Key: VPMINSWrm: [ 0.00 0.00 ]
+Key: VPMINSWrr: [ 0.00 0.00 ]
+Key: VPMINUBYrm: [ 0.00 0.00 ]
+Key: VPMINUBYrr: [ 0.00 0.00 ]
+Key: VPMINUBZ: [ 0.00 0.00 ]
+Key: VPMINUBZrm: [ 0.00 0.00 ]
+Key: VPMINUBZrmk: [ 0.00 0.00 ]
+Key: VPMINUBZrmkz: [ 0.00 0.00 ]
+Key: VPMINUBZrr: [ 0.00 0.00 ]
+Key: VPMINUBZrrk: [ 0.00 0.00 ]
+Key: VPMINUBZrrkz: [ 0.00 0.00 ]
+Key: VPMINUBrm: [ 0.00 0.00 ]
+Key: VPMINUBrr: [ 0.00 0.00 ]
+Key: VPMINUDYrm: [ 0.00 0.00 ]
+Key: VPMINUDYrr: [ 0.00 0.00 ]
+Key: VPMINUDZ: [ 0.00 0.00 ]
+Key: VPMINUDZrm: [ 0.00 0.00 ]
+Key: VPMINUDZrmb: [ 0.00 0.00 ]
+Key: VPMINUDZrmbk: [ 0.00 0.00 ]
+Key: VPMINUDZrmbkz: [ 0.00 0.00 ]
+Key: VPMINUDZrmk: [ 0.00 0.00 ]
+Key: VPMINUDZrmkz: [ 0.00 0.00 ]
+Key: VPMINUDZrr: [ 0.00 0.00 ]
+Key: VPMINUDZrrk: [ 0.00 0.00 ]
+Key: VPMINUDZrrkz: [ 0.00 0.00 ]
+Key: VPMINUDrm: [ 0.00 0.00 ]
+Key: VPMINUDrr: [ 0.00 0.00 ]
+Key: VPMINUQZ: [ 0.00 0.00 ]
+Key: VPMINUQZrm: [ 0.00 0.00 ]
+Key: VPMINUQZrmb: [ 0.00 0.00 ]
+Key: VPMINUQZrmbk: [ 0.00 0.00 ]
+Key: VPMINUQZrmbkz: [ 0.00 0.00 ]
+Key: VPMINUQZrmk: [ 0.00 0.00 ]
+Key: VPMINUQZrmkz: [ 0.00 0.00 ]
+Key: VPMINUQZrr: [ 0.00 0.00 ]
+Key: VPMINUQZrrk: [ 0.00 0.00 ]
+Key: VPMINUQZrrkz: [ 0.00 0.00 ]
+Key: VPMINUWYrm: [ 0.00 0.00 ]
+Key: VPMINUWYrr: [ 0.00 0.00 ]
+Key: VPMINUWZ: [ 0.00 0.00 ]
+Key: VPMINUWZrm: [ 0.00 0.00 ]
+Key: VPMINUWZrmk: [ 0.00 0.00 ]
+Key: VPMINUWZrmkz: [ 0.00 0.00 ]
+Key: VPMINUWZrr: [ 0.00 0.00 ]
+Key: VPMINUWZrrk: [ 0.00 0.00 ]
+Key: VPMINUWZrrkz: [ 0.00 0.00 ]
+Key: VPMINUWrm: [ 0.00 0.00 ]
+Key: VPMINUWrr: [ 0.00 0.00 ]
+Key: VPMOVB: [ 0.00 0.00 ]
+Key: VPMOVD: [ 0.00 0.00 ]
+Key: VPMOVDBZ: [ 0.00 0.00 ]
+Key: VPMOVDBZmr: [ 0.00 0.00 ]
+Key: VPMOVDBZmrk: [ 0.00 0.00 ]
+Key: VPMOVDBZrr: [ 0.00 0.00 ]
+Key: VPMOVDBZrrk: [ 0.00 0.00 ]
+Key: VPMOVDBZrrkz: [ 0.00 0.00 ]
+Key: VPMOVDWZ: [ 0.00 0.00 ]
+Key: VPMOVDWZmr: [ 0.00 0.00 ]
+Key: VPMOVDWZmrk: [ 0.00 0.00 ]
+Key: VPMOVDWZrr: [ 0.00 0.00 ]
+Key: VPMOVDWZrrk: [ 0.00 0.00 ]
+Key: VPMOVDWZrrkz: [ 0.00 0.00 ]
+Key: VPMOVM: [ 0.00 0.00 ]
+Key: VPMOVMSKBYrr: [ 0.00 0.00 ]
+Key: VPMOVMSKBrr: [ 0.00 0.00 ]
+Key: VPMOVQ: [ 0.00 0.00 ]
+Key: VPMOVQBZ: [ 0.00 0.00 ]
+Key: VPMOVQBZmr: [ 0.00 0.00 ]
+Key: VPMOVQBZmrk: [ 0.00 0.00 ]
+Key: VPMOVQBZrr: [ 0.00 0.00 ]
+Key: VPMOVQBZrrk: [ 0.00 0.00 ]
+Key: VPMOVQBZrrkz: [ 0.00 0.00 ]
+Key: VPMOVQDZ: [ 0.00 0.00 ]
+Key: VPMOVQDZmr: [ 0.00 0.00 ]
+Key: VPMOVQDZmrk: [ 0.00 0.00 ]
+Key: VPMOVQDZrr: [ 0.00 0.00 ]
+Key: VPMOVQDZrrk: [ 0.00 0.00 ]
+Key: VPMOVQDZrrkz: [ 0.00 0.00 ]
+Key: VPMOVQWZ: [ 0.00 0.00 ]
+Key: VPMOVQWZmr: [ 0.00 0.00 ]
+Key: VPMOVQWZmrk: [ 0.00 0.00 ]
+Key: VPMOVQWZrr: [ 0.00 0.00 ]
+Key: VPMOVQWZrrk: [ 0.00 0.00 ]
+Key: VPMOVQWZrrkz: [ 0.00 0.00 ]
+Key: VPMOVSDBZ: [ 0.00 0.00 ]
+Key: VPMOVSDBZmr: [ 0.00 0.00 ]
+Key: VPMOVSDBZmrk: [ 0.00 0.00 ]
+Key: VPMOVSDBZrr: [ 0.00 0.00 ]
+Key: VPMOVSDBZrrk: [ 0.00 0.00 ]
+Key: VPMOVSDBZrrkz: [ 0.00 0.00 ]
+Key: VPMOVSDWZ: [ 0.00 0.00 ]
+Key: VPMOVSDWZmr: [ 0.00 0.00 ]
+Key: VPMOVSDWZmrk: [ 0.00 0.00 ]
+Key: VPMOVSDWZrr: [ 0.00 0.00 ]
+Key: VPMOVSDWZrrk: [ 0.00 0.00 ]
+Key: VPMOVSDWZrrkz: [ 0.00 0.00 ]
+Key: VPMOVSQBZ: [ 0.00 0.00 ]
+Key: VPMOVSQBZmr: [ 0.00 0.00 ]
+Key: VPMOVSQBZmrk: [ 0.00 0.00 ]
+Key: VPMOVSQBZrr: [ 0.00 0.00 ]
+Key: VPMOVSQBZrrk: [ 0.00 0.00 ]
+Key: VPMOVSQBZrrkz: [ 0.00 0.00 ]
+Key: VPMOVSQDZ: [ 0.00 0.00 ]
+Key: VPMOVSQDZmr: [ 0.00 0.00 ]
+Key: VPMOVSQDZmrk: [ 0.00 0.00 ]
+Key: VPMOVSQDZrr: [ 0.00 0.00 ]
+Key: VPMOVSQDZrrk: [ 0.00 0.00 ]
+Key: VPMOVSQDZrrkz: [ 0.00 0.00 ]
+Key: VPMOVSQWZ: [ 0.00 0.00 ]
+Key: VPMOVSQWZmr: [ 0.00 0.00 ]
+Key: VPMOVSQWZmrk: [ 0.00 0.00 ]
+Key: VPMOVSQWZrr: [ 0.00 0.00 ]
+Key: VPMOVSQWZrrk: [ 0.00 0.00 ]
+Key: VPMOVSQWZrrkz: [ 0.00 0.00 ]
+Key: VPMOVSWBZ: [ 0.00 0.00 ]
+Key: VPMOVSWBZmr: [ 0.00 0.00 ]
+Key: VPMOVSWBZmrk: [ 0.00 0.00 ]
+Key: VPMOVSWBZrr: [ 0.00 0.00 ]
+Key: VPMOVSWBZrrk: [ 0.00 0.00 ]
+Key: VPMOVSWBZrrkz: [ 0.00 0.00 ]
+Key: VPMOVSXBDYrm: [ 0.00 0.00 ]
+Key: VPMOVSXBDYrr: [ 0.00 0.00 ]
+Key: VPMOVSXBDZ: [ 0.00 0.00 ]
+Key: VPMOVSXBDZrm: [ 0.00 0.00 ]
+Key: VPMOVSXBDZrmk: [ 0.00 0.00 ]
+Key: VPMOVSXBDZrmkz: [ 0.00 0.00 ]
+Key: VPMOVSXBDZrr: [ 0.00 0.00 ]
+Key: VPMOVSXBDZrrk: [ 0.00 0.00 ]
+Key: VPMOVSXBDZrrkz: [ 0.00 0.00 ]
+Key: VPMOVSXBDrm: [ 0.00 0.00 ]
+Key: VPMOVSXBDrr: [ 0.00 0.00 ]
+Key: VPMOVSXBQYrm: [ 0.00 0.00 ]
+Key: VPMOVSXBQYrr: [ 0.00 0.00 ]
+Key: VPMOVSXBQZ: [ 0.00 0.00 ]
+Key: VPMOVSXBQZrm: [ 0.00 0.00 ]
+Key: VPMOVSXBQZrmk: [ 0.00 0.00 ]
+Key: VPMOVSXBQZrmkz: [ 0.00 0.00 ]
+Key: VPMOVSXBQZrr: [ 0.00 0.00 ]
+Key: VPMOVSXBQZrrk: [ 0.00 0.00 ]
+Key: VPMOVSXBQZrrkz: [ 0.00 0.00 ]
+Key: VPMOVSXBQrm: [ 0.00 0.00 ]
+Key: VPMOVSXBQrr: [ 0.00 0.00 ]
+Key: VPMOVSXBWYrm: [ 0.00 0.00 ]
+Key: VPMOVSXBWYrr: [ 0.00 0.00 ]
+Key: VPMOVSXBWZ: [ 0.00 0.00 ]
+Key: VPMOVSXBWZrm: [ 0.00 0.00 ]
+Key: VPMOVSXBWZrmk: [ 0.00 0.00 ]
+Key: VPMOVSXBWZrmkz: [ 0.00 0.00 ]
+Key: VPMOVSXBWZrr: [ 0.00 0.00 ]
+Key: VPMOVSXBWZrrk: [ 0.00 0.00 ]
+Key: VPMOVSXBWZrrkz: [ 0.00 0.00 ]
+Key: VPMOVSXBWrm: [ 0.00 0.00 ]
+Key: VPMOVSXBWrr: [ 0.00 0.00 ]
+Key: VPMOVSXDQYrm: [ 0.00 0.00 ]
+Key: VPMOVSXDQYrr: [ 0.00 0.00 ]
+Key: VPMOVSXDQZ: [ 0.00 0.00 ]
+Key: VPMOVSXDQZrm: [ 0.00 0.00 ]
+Key: VPMOVSXDQZrmk: [ 0.00 0.00 ]
+Key: VPMOVSXDQZrmkz: [ 0.00 0.00 ]
+Key: VPMOVSXDQZrr: [ 0.00 0.00 ]
+Key: VPMOVSXDQZrrk: [ 0.00 0.00 ]
+Key: VPMOVSXDQZrrkz: [ 0.00 0.00 ]
+Key: VPMOVSXDQrm: [ 0.00 0.00 ]
+Key: VPMOVSXDQrr: [ 0.00 0.00 ]
+Key: VPMOVSXWDYrm: [ 0.00 0.00 ]
+Key: VPMOVSXWDYrr: [ 0.00 0.00 ]
+Key: VPMOVSXWDZ: [ 0.00 0.00 ]
+Key: VPMOVSXWDZrm: [ 0.00 0.00 ]
+Key: VPMOVSXWDZrmk: [ 0.00 0.00 ]
+Key: VPMOVSXWDZrmkz: [ 0.00 0.00 ]
+Key: VPMOVSXWDZrr: [ 0.00 0.00 ]
+Key: VPMOVSXWDZrrk: [ 0.00 0.00 ]
+Key: VPMOVSXWDZrrkz: [ 0.00 0.00 ]
+Key: VPMOVSXWDrm: [ 0.00 0.00 ]
+Key: VPMOVSXWDrr: [ 0.00 0.00 ]
+Key: VPMOVSXWQYrm: [ 0.00 0.00 ]
+Key: VPMOVSXWQYrr: [ 0.00 0.00 ]
+Key: VPMOVSXWQZ: [ 0.00 0.00 ]
+Key: VPMOVSXWQZrm: [ 0.00 0.00 ]
+Key: VPMOVSXWQZrmk: [ 0.00 0.00 ]
+Key: VPMOVSXWQZrmkz: [ 0.00 0.00 ]
+Key: VPMOVSXWQZrr: [ 0.00 0.00 ]
+Key: VPMOVSXWQZrrk: [ 0.00 0.00 ]
+Key: VPMOVSXWQZrrkz: [ 0.00 0.00 ]
+Key: VPMOVSXWQrm: [ 0.00 0.00 ]
+Key: VPMOVSXWQrr: [ 0.00 0.00 ]
+Key: VPMOVUSDBZ: [ 0.00 0.00 ]
+Key: VPMOVUSDBZmr: [ 0.00 0.00 ]
+Key: VPMOVUSDBZmrk: [ 0.00 0.00 ]
+Key: VPMOVUSDBZrr: [ 0.00 0.00 ]
+Key: VPMOVUSDBZrrk: [ 0.00 0.00 ]
+Key: VPMOVUSDBZrrkz: [ 0.00 0.00 ]
+Key: VPMOVUSDWZ: [ 0.00 0.00 ]
+Key: VPMOVUSDWZmr: [ 0.00 0.00 ]
+Key: VPMOVUSDWZmrk: [ 0.00 0.00 ]
+Key: VPMOVUSDWZrr: [ 0.00 0.00 ]
+Key: VPMOVUSDWZrrk: [ 0.00 0.00 ]
+Key: VPMOVUSDWZrrkz: [ 0.00 0.00 ]
+Key: VPMOVUSQBZ: [ 0.00 0.00 ]
+Key: VPMOVUSQBZmr: [ 0.00 0.00 ]
+Key: VPMOVUSQBZmrk: [ 0.00 0.00 ]
+Key: VPMOVUSQBZrr: [ 0.00 0.00 ]
+Key: VPMOVUSQBZrrk: [ 0.00 0.00 ]
+Key: VPMOVUSQBZrrkz: [ 0.00 0.00 ]
+Key: VPMOVUSQDZ: [ 0.00 0.00 ]
+Key: VPMOVUSQDZmr: [ 0.00 0.00 ]
+Key: VPMOVUSQDZmrk: [ 0.00 0.00 ]
+Key: VPMOVUSQDZrr: [ 0.00 0.00 ]
+Key: VPMOVUSQDZrrk: [ 0.00 0.00 ]
+Key: VPMOVUSQDZrrkz: [ 0.00 0.00 ]
+Key: VPMOVUSQWZ: [ 0.00 0.00 ]
+Key: VPMOVUSQWZmr: [ 0.00 0.00 ]
+Key: VPMOVUSQWZmrk: [ 0.00 0.00 ]
+Key: VPMOVUSQWZrr: [ 0.00 0.00 ]
+Key: VPMOVUSQWZrrk: [ 0.00 0.00 ]
+Key: VPMOVUSQWZrrkz: [ 0.00 0.00 ]
+Key: VPMOVUSWBZ: [ 0.00 0.00 ]
+Key: VPMOVUSWBZmr: [ 0.00 0.00 ]
+Key: VPMOVUSWBZmrk: [ 0.00 0.00 ]
+Key: VPMOVUSWBZrr: [ 0.00 0.00 ]
+Key: VPMOVUSWBZrrk: [ 0.00 0.00 ]
+Key: VPMOVUSWBZrrkz: [ 0.00 0.00 ]
+Key: VPMOVW: [ 0.00 0.00 ]
+Key: VPMOVWBZ: [ 0.00 0.00 ]
+Key: VPMOVWBZmr: [ 0.00 0.00 ]
+Key: VPMOVWBZmrk: [ 0.00 0.00 ]
+Key: VPMOVWBZrr: [ 0.00 0.00 ]
+Key: VPMOVWBZrrk: [ 0.00 0.00 ]
+Key: VPMOVWBZrrkz: [ 0.00 0.00 ]
+Key: VPMOVZXBDYrm: [ 0.00 0.00 ]
+Key: VPMOVZXBDYrr: [ 0.00 0.00 ]
+Key: VPMOVZXBDZ: [ 0.00 0.00 ]
+Key: VPMOVZXBDZrm: [ 0.00 0.00 ]
+Key: VPMOVZXBDZrmk: [ 0.00 0.00 ]
+Key: VPMOVZXBDZrmkz: [ 0.00 0.00 ]
+Key: VPMOVZXBDZrr: [ 0.00 0.00 ]
+Key: VPMOVZXBDZrrk: [ 0.00 0.00 ]
+Key: VPMOVZXBDZrrkz: [ 0.00 0.00 ]
+Key: VPMOVZXBDrm: [ 0.00 0.00 ]
+Key: VPMOVZXBDrr: [ 0.00 0.00 ]
+Key: VPMOVZXBQYrm: [ 0.00 0.00 ]
+Key: VPMOVZXBQYrr: [ 0.00 0.00 ]
+Key: VPMOVZXBQZ: [ 0.00 0.00 ]
+Key: VPMOVZXBQZrm: [ 0.00 0.00 ]
+Key: VPMOVZXBQZrmk: [ 0.00 0.00 ]
+Key: VPMOVZXBQZrmkz: [ 0.00 0.00 ]
+Key: VPMOVZXBQZrr: [ 0.00 0.00 ]
+Key: VPMOVZXBQZrrk: [ 0.00 0.00 ]
+Key: VPMOVZXBQZrrkz: [ 0.00 0.00 ]
+Key: VPMOVZXBQrm: [ 0.00 0.00 ]
+Key: VPMOVZXBQrr: [ 0.00 0.00 ]
+Key: VPMOVZXBWYrm: [ 0.00 0.00 ]
+Key: VPMOVZXBWYrr: [ 0.00 0.00 ]
+Key: VPMOVZXBWZ: [ 0.00 0.00 ]
+Key: VPMOVZXBWZrm: [ 0.00 0.00 ]
+Key: VPMOVZXBWZrmk: [ 0.00 0.00 ]
+Key: VPMOVZXBWZrmkz: [ 0.00 0.00 ]
+Key: VPMOVZXBWZrr: [ 0.00 0.00 ]
+Key: VPMOVZXBWZrrk: [ 0.00 0.00 ]
+Key: VPMOVZXBWZrrkz: [ 0.00 0.00 ]
+Key: VPMOVZXBWrm: [ 0.00 0.00 ]
+Key: VPMOVZXBWrr: [ 0.00 0.00 ]
+Key: VPMOVZXDQYrm: [ 0.00 0.00 ]
+Key: VPMOVZXDQYrr: [ 0.00 0.00 ]
+Key: VPMOVZXDQZ: [ 0.00 0.00 ]
+Key: VPMOVZXDQZrm: [ 0.00 0.00 ]
+Key: VPMOVZXDQZrmk: [ 0.00 0.00 ]
+Key: VPMOVZXDQZrmkz: [ 0.00 0.00 ]
+Key: VPMOVZXDQZrr: [ 0.00 0.00 ]
+Key: VPMOVZXDQZrrk: [ 0.00 0.00 ]
+Key: VPMOVZXDQZrrkz: [ 0.00 0.00 ]
+Key: VPMOVZXDQrm: [ 0.00 0.00 ]
+Key: VPMOVZXDQrr: [ 0.00 0.00 ]
+Key: VPMOVZXWDYrm: [ 0.00 0.00 ]
+Key: VPMOVZXWDYrr: [ 0.00 0.00 ]
+Key: VPMOVZXWDZ: [ 0.00 0.00 ]
+Key: VPMOVZXWDZrm: [ 0.00 0.00 ]
+Key: VPMOVZXWDZrmk: [ 0.00 0.00 ]
+Key: VPMOVZXWDZrmkz: [ 0.00 0.00 ]
+Key: VPMOVZXWDZrr: [ 0.00 0.00 ]
+Key: VPMOVZXWDZrrk: [ 0.00 0.00 ]
+Key: VPMOVZXWDZrrkz: [ 0.00 0.00 ]
+Key: VPMOVZXWDrm: [ 0.00 0.00 ]
+Key: VPMOVZXWDrr: [ 0.00 0.00 ]
+Key: VPMOVZXWQYrm: [ 0.00 0.00 ]
+Key: VPMOVZXWQYrr: [ 0.00 0.00 ]
+Key: VPMOVZXWQZ: [ 0.00 0.00 ]
+Key: VPMOVZXWQZrm: [ 0.00 0.00 ]
+Key: VPMOVZXWQZrmk: [ 0.00 0.00 ]
+Key: VPMOVZXWQZrmkz: [ 0.00 0.00 ]
+Key: VPMOVZXWQZrr: [ 0.00 0.00 ]
+Key: VPMOVZXWQZrrk: [ 0.00 0.00 ]
+Key: VPMOVZXWQZrrkz: [ 0.00 0.00 ]
+Key: VPMOVZXWQrm: [ 0.00 0.00 ]
+Key: VPMOVZXWQrr: [ 0.00 0.00 ]
+Key: VPMULDQYrm: [ 0.00 0.00 ]
+Key: VPMULDQYrr: [ 0.00 0.00 ]
+Key: VPMULDQZ: [ 0.00 0.00 ]
+Key: VPMULDQZrm: [ 0.00 0.00 ]
+Key: VPMULDQZrmb: [ 0.00 0.00 ]
+Key: VPMULDQZrmbk: [ 0.00 0.00 ]
+Key: VPMULDQZrmbkz: [ 0.00 0.00 ]
+Key: VPMULDQZrmk: [ 0.00 0.00 ]
+Key: VPMULDQZrmkz: [ 0.00 0.00 ]
+Key: VPMULDQZrr: [ 0.00 0.00 ]
+Key: VPMULDQZrrk: [ 0.00 0.00 ]
+Key: VPMULDQZrrkz: [ 0.00 0.00 ]
+Key: VPMULDQrm: [ 0.00 0.00 ]
+Key: VPMULDQrr: [ 0.00 0.00 ]
+Key: VPMULHRSWYrm: [ 0.00 0.00 ]
+Key: VPMULHRSWYrr: [ 0.00 0.00 ]
+Key: VPMULHRSWZ: [ 0.00 0.00 ]
+Key: VPMULHRSWZrm: [ 0.00 0.00 ]
+Key: VPMULHRSWZrmk: [ 0.00 0.00 ]
+Key: VPMULHRSWZrmkz: [ 0.00 0.00 ]
+Key: VPMULHRSWZrr: [ 0.00 0.00 ]
+Key: VPMULHRSWZrrk: [ 0.00 0.00 ]
+Key: VPMULHRSWZrrkz: [ 0.00 0.00 ]
+Key: VPMULHRSWrm: [ 0.00 0.00 ]
+Key: VPMULHRSWrr: [ 0.00 0.00 ]
+Key: VPMULHUWYrm: [ 0.00 0.00 ]
+Key: VPMULHUWYrr: [ 0.00 0.00 ]
+Key: VPMULHUWZ: [ 0.00 0.00 ]
+Key: VPMULHUWZrm: [ 0.00 0.00 ]
+Key: VPMULHUWZrmk: [ 0.00 0.00 ]
+Key: VPMULHUWZrmkz: [ 0.00 0.00 ]
+Key: VPMULHUWZrr: [ 0.00 0.00 ]
+Key: VPMULHUWZrrk: [ 0.00 0.00 ]
+Key: VPMULHUWZrrkz: [ 0.00 0.00 ]
+Key: VPMULHUWrm: [ 0.00 0.00 ]
+Key: VPMULHUWrr: [ 0.00 0.00 ]
+Key: VPMULHWYrm: [ 0.00 0.00 ]
+Key: VPMULHWYrr: [ 0.00 0.00 ]
+Key: VPMULHWZ: [ 0.00 0.00 ]
+Key: VPMULHWZrm: [ 0.00 0.00 ]
+Key: VPMULHWZrmk: [ 0.00 0.00 ]
+Key: VPMULHWZrmkz: [ 0.00 0.00 ]
+Key: VPMULHWZrr: [ 0.00 0.00 ]
+Key: VPMULHWZrrk: [ 0.00 0.00 ]
+Key: VPMULHWZrrkz: [ 0.00 0.00 ]
+Key: VPMULHWrm: [ 0.00 0.00 ]
+Key: VPMULHWrr: [ 0.00 0.00 ]
+Key: VPMULLDYrm: [ 0.00 0.00 ]
+Key: VPMULLDYrr: [ 0.00 0.00 ]
+Key: VPMULLDZ: [ 0.00 0.00 ]
+Key: VPMULLDZrm: [ 0.00 0.00 ]
+Key: VPMULLDZrmb: [ 0.00 0.00 ]
+Key: VPMULLDZrmbk: [ 0.00 0.00 ]
+Key: VPMULLDZrmbkz: [ 0.00 0.00 ]
+Key: VPMULLDZrmk: [ 0.00 0.00 ]
+Key: VPMULLDZrmkz: [ 0.00 0.00 ]
+Key: VPMULLDZrr: [ 0.00 0.00 ]
+Key: VPMULLDZrrk: [ 0.00 0.00 ]
+Key: VPMULLDZrrkz: [ 0.00 0.00 ]
+Key: VPMULLDrm: [ 0.00 0.00 ]
+Key: VPMULLDrr: [ 0.00 0.00 ]
+Key: VPMULLQZ: [ 0.00 0.00 ]
+Key: VPMULLQZrm: [ 0.00 0.00 ]
+Key: VPMULLQZrmb: [ 0.00 0.00 ]
+Key: VPMULLQZrmbk: [ 0.00 0.00 ]
+Key: VPMULLQZrmbkz: [ 0.00 0.00 ]
+Key: VPMULLQZrmk: [ 0.00 0.00 ]
+Key: VPMULLQZrmkz: [ 0.00 0.00 ]
+Key: VPMULLQZrr: [ 0.00 0.00 ]
+Key: VPMULLQZrrk: [ 0.00 0.00 ]
+Key: VPMULLQZrrkz: [ 0.00 0.00 ]
+Key: VPMULLWYrm: [ 0.00 0.00 ]
+Key: VPMULLWYrr: [ 0.00 0.00 ]
+Key: VPMULLWZ: [ 0.00 0.00 ]
+Key: VPMULLWZrm: [ 0.00 0.00 ]
+Key: VPMULLWZrmk: [ 0.00 0.00 ]
+Key: VPMULLWZrmkz: [ 0.00 0.00 ]
+Key: VPMULLWZrr: [ 0.00 0.00 ]
+Key: VPMULLWZrrk: [ 0.00 0.00 ]
+Key: VPMULLWZrrkz: [ 0.00 0.00 ]
+Key: VPMULLWrm: [ 0.00 0.00 ]
+Key: VPMULLWrr: [ 0.00 0.00 ]
+Key: VPMULTISHIFTQBZ: [ 0.00 0.00 ]
+Key: VPMULTISHIFTQBZrm: [ 0.00 0.00 ]
+Key: VPMULTISHIFTQBZrmb: [ 0.00 0.00 ]
+Key: VPMULTISHIFTQBZrmbk: [ 0.00 0.00 ]
+Key: VPMULTISHIFTQBZrmbkz: [ 0.00 0.00 ]
+Key: VPMULTISHIFTQBZrmk: [ 0.00 0.00 ]
+Key: VPMULTISHIFTQBZrmkz: [ 0.00 0.00 ]
+Key: VPMULTISHIFTQBZrr: [ 0.00 0.00 ]
+Key: VPMULTISHIFTQBZrrk: [ 0.00 0.00 ]
+Key: VPMULTISHIFTQBZrrkz: [ 0.00 0.00 ]
+Key: VPMULUDQYrm: [ 0.00 0.00 ]
+Key: VPMULUDQYrr: [ 0.00 0.00 ]
+Key: VPMULUDQZ: [ 0.00 0.00 ]
+Key: VPMULUDQZrm: [ 0.00 0.00 ]
+Key: VPMULUDQZrmb: [ 0.00 0.00 ]
+Key: VPMULUDQZrmbk: [ 0.00 0.00 ]
+Key: VPMULUDQZrmbkz: [ 0.00 0.00 ]
+Key: VPMULUDQZrmk: [ 0.00 0.00 ]
+Key: VPMULUDQZrmkz: [ 0.00 0.00 ]
+Key: VPMULUDQZrr: [ 0.00 0.00 ]
+Key: VPMULUDQZrrk: [ 0.00 0.00 ]
+Key: VPMULUDQZrrkz: [ 0.00 0.00 ]
+Key: VPMULUDQrm: [ 0.00 0.00 ]
+Key: VPMULUDQrr: [ 0.00 0.00 ]
+Key: VPOPCNTBZ: [ 0.00 0.00 ]
+Key: VPOPCNTBZrm: [ 0.00 0.00 ]
+Key: VPOPCNTBZrmk: [ 0.00 0.00 ]
+Key: VPOPCNTBZrmkz: [ 0.00 0.00 ]
+Key: VPOPCNTBZrr: [ 0.00 0.00 ]
+Key: VPOPCNTBZrrk: [ 0.00 0.00 ]
+Key: VPOPCNTBZrrkz: [ 0.00 0.00 ]
+Key: VPOPCNTDZ: [ 0.00 0.00 ]
+Key: VPOPCNTDZrm: [ 0.00 0.00 ]
+Key: VPOPCNTDZrmb: [ 0.00 0.00 ]
+Key: VPOPCNTDZrmbk: [ 0.00 0.00 ]
+Key: VPOPCNTDZrmbkz: [ 0.00 0.00 ]
+Key: VPOPCNTDZrmk: [ 0.00 0.00 ]
+Key: VPOPCNTDZrmkz: [ 0.00 0.00 ]
+Key: VPOPCNTDZrr: [ 0.00 0.00 ]
+Key: VPOPCNTDZrrk: [ 0.00 0.00 ]
+Key: VPOPCNTDZrrkz: [ 0.00 0.00 ]
+Key: VPOPCNTQZ: [ 0.00 0.00 ]
+Key: VPOPCNTQZrm: [ 0.00 0.00 ]
+Key: VPOPCNTQZrmb: [ 0.00 0.00 ]
+Key: VPOPCNTQZrmbk: [ 0.00 0.00 ]
+Key: VPOPCNTQZrmbkz: [ 0.00 0.00 ]
+Key: VPOPCNTQZrmk: [ 0.00 0.00 ]
+Key: VPOPCNTQZrmkz: [ 0.00 0.00 ]
+Key: VPOPCNTQZrr: [ 0.00 0.00 ]
+Key: VPOPCNTQZrrk: [ 0.00 0.00 ]
+Key: VPOPCNTQZrrkz: [ 0.00 0.00 ]
+Key: VPOPCNTWZ: [ 0.00 0.00 ]
+Key: VPOPCNTWZrm: [ 0.00 0.00 ]
+Key: VPOPCNTWZrmk: [ 0.00 0.00 ]
+Key: VPOPCNTWZrmkz: [ 0.00 0.00 ]
+Key: VPOPCNTWZrr: [ 0.00 0.00 ]
+Key: VPOPCNTWZrrk: [ 0.00 0.00 ]
+Key: VPOPCNTWZrrkz: [ 0.00 0.00 ]
+Key: VPORDZ: [ 0.00 0.00 ]
+Key: VPORDZrm: [ 0.00 0.00 ]
+Key: VPORDZrmb: [ 0.00 0.00 ]
+Key: VPORDZrmbk: [ 0.00 0.00 ]
+Key: VPORDZrmbkz: [ 0.00 0.00 ]
+Key: VPORDZrmk: [ 0.00 0.00 ]
+Key: VPORDZrmkz: [ 0.00 0.00 ]
+Key: VPORDZrr: [ 0.00 0.00 ]
+Key: VPORDZrrk: [ 0.00 0.00 ]
+Key: VPORDZrrkz: [ 0.00 0.00 ]
+Key: VPORQZ: [ 0.00 0.00 ]
+Key: VPORQZrm: [ 0.00 0.00 ]
+Key: VPORQZrmb: [ 0.00 0.00 ]
+Key: VPORQZrmbk: [ 0.00 0.00 ]
+Key: VPORQZrmbkz: [ 0.00 0.00 ]
+Key: VPORQZrmk: [ 0.00 0.00 ]
+Key: VPORQZrmkz: [ 0.00 0.00 ]
+Key: VPORQZrr: [ 0.00 0.00 ]
+Key: VPORQZrrk: [ 0.00 0.00 ]
+Key: VPORQZrrkz: [ 0.00 0.00 ]
+Key: VPORYrm: [ 0.00 0.00 ]
+Key: VPORYrr: [ 0.00 0.00 ]
+Key: VPORrm: [ 0.00 0.00 ]
+Key: VPORrr: [ 0.00 0.00 ]
+Key: VPPERMrmr: [ 0.00 0.00 ]
+Key: VPPERMrrm: [ 0.00 0.00 ]
+Key: VPPERMrrr: [ 0.00 0.00 ]
+Key: VPPERMrrr_REV: [ 0.00 0.00 ]
+Key: VPROLDZ: [ 0.00 0.00 ]
+Key: VPROLDZmbi: [ 0.00 0.00 ]
+Key: VPROLDZmbik: [ 0.00 0.00 ]
+Key: VPROLDZmbikz: [ 0.00 0.00 ]
+Key: VPROLDZmi: [ 0.00 0.00 ]
+Key: VPROLDZmik: [ 0.00 0.00 ]
+Key: VPROLDZmikz: [ 0.00 0.00 ]
+Key: VPROLDZri: [ 0.00 0.00 ]
+Key: VPROLDZrik: [ 0.00 0.00 ]
+Key: VPROLDZrikz: [ 0.00 0.00 ]
+Key: VPROLQZ: [ 0.00 0.00 ]
+Key: VPROLQZmbi: [ 0.00 0.00 ]
+Key: VPROLQZmbik: [ 0.00 0.00 ]
+Key: VPROLQZmbikz: [ 0.00 0.00 ]
+Key: VPROLQZmi: [ 0.00 0.00 ]
+Key: VPROLQZmik: [ 0.00 0.00 ]
+Key: VPROLQZmikz: [ 0.00 0.00 ]
+Key: VPROLQZri: [ 0.00 0.00 ]
+Key: VPROLQZrik: [ 0.00 0.00 ]
+Key: VPROLQZrikz: [ 0.00 0.00 ]
+Key: VPROLVDZ: [ 0.00 0.00 ]
+Key: VPROLVDZrm: [ 0.00 0.00 ]
+Key: VPROLVDZrmb: [ 0.00 0.00 ]
+Key: VPROLVDZrmbk: [ 0.00 0.00 ]
+Key: VPROLVDZrmbkz: [ 0.00 0.00 ]
+Key: VPROLVDZrmk: [ 0.00 0.00 ]
+Key: VPROLVDZrmkz: [ 0.00 0.00 ]
+Key: VPROLVDZrr: [ 0.00 0.00 ]
+Key: VPROLVDZrrk: [ 0.00 0.00 ]
+Key: VPROLVDZrrkz: [ 0.00 0.00 ]
+Key: VPROLVQZ: [ 0.00 0.00 ]
+Key: VPROLVQZrm: [ 0.00 0.00 ]
+Key: VPROLVQZrmb: [ 0.00 0.00 ]
+Key: VPROLVQZrmbk: [ 0.00 0.00 ]
+Key: VPROLVQZrmbkz: [ 0.00 0.00 ]
+Key: VPROLVQZrmk: [ 0.00 0.00 ]
+Key: VPROLVQZrmkz: [ 0.00 0.00 ]
+Key: VPROLVQZrr: [ 0.00 0.00 ]
+Key: VPROLVQZrrk: [ 0.00 0.00 ]
+Key: VPROLVQZrrkz: [ 0.00 0.00 ]
+Key: VPRORDZ: [ 0.00 0.00 ]
+Key: VPRORDZmbi: [ 0.00 0.00 ]
+Key: VPRORDZmbik: [ 0.00 0.00 ]
+Key: VPRORDZmbikz: [ 0.00 0.00 ]
+Key: VPRORDZmi: [ 0.00 0.00 ]
+Key: VPRORDZmik: [ 0.00 0.00 ]
+Key: VPRORDZmikz: [ 0.00 0.00 ]
+Key: VPRORDZri: [ 0.00 0.00 ]
+Key: VPRORDZrik: [ 0.00 0.00 ]
+Key: VPRORDZrikz: [ 0.00 0.00 ]
+Key: VPRORQZ: [ 0.00 0.00 ]
+Key: VPRORQZmbi: [ 0.00 0.00 ]
+Key: VPRORQZmbik: [ 0.00 0.00 ]
+Key: VPRORQZmbikz: [ 0.00 0.00 ]
+Key: VPRORQZmi: [ 0.00 0.00 ]
+Key: VPRORQZmik: [ 0.00 0.00 ]
+Key: VPRORQZmikz: [ 0.00 0.00 ]
+Key: VPRORQZri: [ 0.00 0.00 ]
+Key: VPRORQZrik: [ 0.00 0.00 ]
+Key: VPRORQZrikz: [ 0.00 0.00 ]
+Key: VPRORVDZ: [ 0.00 0.00 ]
+Key: VPRORVDZrm: [ 0.00 0.00 ]
+Key: VPRORVDZrmb: [ 0.00 0.00 ]
+Key: VPRORVDZrmbk: [ 0.00 0.00 ]
+Key: VPRORVDZrmbkz: [ 0.00 0.00 ]
+Key: VPRORVDZrmk: [ 0.00 0.00 ]
+Key: VPRORVDZrmkz: [ 0.00 0.00 ]
+Key: VPRORVDZrr: [ 0.00 0.00 ]
+Key: VPRORVDZrrk: [ 0.00 0.00 ]
+Key: VPRORVDZrrkz: [ 0.00 0.00 ]
+Key: VPRORVQZ: [ 0.00 0.00 ]
+Key: VPRORVQZrm: [ 0.00 0.00 ]
+Key: VPRORVQZrmb: [ 0.00 0.00 ]
+Key: VPRORVQZrmbk: [ 0.00 0.00 ]
+Key: VPRORVQZrmbkz: [ 0.00 0.00 ]
+Key: VPRORVQZrmk: [ 0.00 0.00 ]
+Key: VPRORVQZrmkz: [ 0.00 0.00 ]
+Key: VPRORVQZrr: [ 0.00 0.00 ]
+Key: VPRORVQZrrk: [ 0.00 0.00 ]
+Key: VPRORVQZrrkz: [ 0.00 0.00 ]
+Key: VPROTBmi: [ 0.00 0.00 ]
+Key: VPROTBmr: [ 0.00 0.00 ]
+Key: VPROTBri: [ 0.00 0.00 ]
+Key: VPROTBrm: [ 0.00 0.00 ]
+Key: VPROTBrr: [ 0.00 0.00 ]
+Key: VPROTBrr_REV: [ 0.00 0.00 ]
+Key: VPROTDmi: [ 0.00 0.00 ]
+Key: VPROTDmr: [ 0.00 0.00 ]
+Key: VPROTDri: [ 0.00 0.00 ]
+Key: VPROTDrm: [ 0.00 0.00 ]
+Key: VPROTDrr: [ 0.00 0.00 ]
+Key: VPROTDrr_REV: [ 0.00 0.00 ]
+Key: VPROTQmi: [ 0.00 0.00 ]
+Key: VPROTQmr: [ 0.00 0.00 ]
+Key: VPROTQri: [ 0.00 0.00 ]
+Key: VPROTQrm: [ 0.00 0.00 ]
+Key: VPROTQrr: [ 0.00 0.00 ]
+Key: VPROTQrr_REV: [ 0.00 0.00 ]
+Key: VPROTWmi: [ 0.00 0.00 ]
+Key: VPROTWmr: [ 0.00 0.00 ]
+Key: VPROTWri: [ 0.00 0.00 ]
+Key: VPROTWrm: [ 0.00 0.00 ]
+Key: VPROTWrr: [ 0.00 0.00 ]
+Key: VPROTWrr_REV: [ 0.00 0.00 ]
+Key: VPSADBWYrm: [ 0.00 0.00 ]
+Key: VPSADBWYrr: [ 0.00 0.00 ]
+Key: VPSADBWZ: [ 0.00 0.00 ]
+Key: VPSADBWZrm: [ 0.00 0.00 ]
+Key: VPSADBWZrr: [ 0.00 0.00 ]
+Key: VPSADBWrm: [ 0.00 0.00 ]
+Key: VPSADBWrr: [ 0.00 0.00 ]
+Key: VPSCATTERDDZ: [ 0.00 0.00 ]
+Key: VPSCATTERDDZmr: [ 0.00 0.00 ]
+Key: VPSCATTERDQZ: [ 0.00 0.00 ]
+Key: VPSCATTERDQZmr: [ 0.00 0.00 ]
+Key: VPSCATTERQDZ: [ 0.00 0.00 ]
+Key: VPSCATTERQDZmr: [ 0.00 0.00 ]
+Key: VPSCATTERQQZ: [ 0.00 0.00 ]
+Key: VPSCATTERQQZmr: [ 0.00 0.00 ]
+Key: VPSHABmr: [ 0.00 0.00 ]
+Key: VPSHABrm: [ 0.00 0.00 ]
+Key: VPSHABrr: [ 0.00 0.00 ]
+Key: VPSHABrr_REV: [ 0.00 0.00 ]
+Key: VPSHADmr: [ 0.00 0.00 ]
+Key: VPSHADrm: [ 0.00 0.00 ]
+Key: VPSHADrr: [ 0.00 0.00 ]
+Key: VPSHADrr_REV: [ 0.00 0.00 ]
+Key: VPSHAQmr: [ 0.00 0.00 ]
+Key: VPSHAQrm: [ 0.00 0.00 ]
+Key: VPSHAQrr: [ 0.00 0.00 ]
+Key: VPSHAQrr_REV: [ 0.00 0.00 ]
+Key: VPSHAWmr: [ 0.00 0.00 ]
+Key: VPSHAWrm: [ 0.00 0.00 ]
+Key: VPSHAWrr: [ 0.00 0.00 ]
+Key: VPSHAWrr_REV: [ 0.00 0.00 ]
+Key: VPSHLBmr: [ 0.00 0.00 ]
+Key: VPSHLBrm: [ 0.00 0.00 ]
+Key: VPSHLBrr: [ 0.00 0.00 ]
+Key: VPSHLBrr_REV: [ 0.00 0.00 ]
+Key: VPSHLDDZ: [ 0.00 0.00 ]
+Key: VPSHLDDZrmbi: [ 0.00 0.00 ]
+Key: VPSHLDDZrmbik: [ 0.00 0.00 ]
+Key: VPSHLDDZrmbikz: [ 0.00 0.00 ]
+Key: VPSHLDDZrmi: [ 0.00 0.00 ]
+Key: VPSHLDDZrmik: [ 0.00 0.00 ]
+Key: VPSHLDDZrmikz: [ 0.00 0.00 ]
+Key: VPSHLDDZrri: [ 0.00 0.00 ]
+Key: VPSHLDDZrrik: [ 0.00 0.00 ]
+Key: VPSHLDDZrrikz: [ 0.00 0.00 ]
+Key: VPSHLDQZ: [ 0.00 0.00 ]
+Key: VPSHLDQZrmbi: [ 0.00 0.00 ]
+Key: VPSHLDQZrmbik: [ 0.00 0.00 ]
+Key: VPSHLDQZrmbikz: [ 0.00 0.00 ]
+Key: VPSHLDQZrmi: [ 0.00 0.00 ]
+Key: VPSHLDQZrmik: [ 0.00 0.00 ]
+Key: VPSHLDQZrmikz: [ 0.00 0.00 ]
+Key: VPSHLDQZrri: [ 0.00 0.00 ]
+Key: VPSHLDQZrrik: [ 0.00 0.00 ]
+Key: VPSHLDQZrrikz: [ 0.00 0.00 ]
+Key: VPSHLDVDZ: [ 0.00 0.00 ]
+Key: VPSHLDVDZm: [ 0.00 0.00 ]
+Key: VPSHLDVDZmb: [ 0.00 0.00 ]
+Key: VPSHLDVDZmbk: [ 0.00 0.00 ]
+Key: VPSHLDVDZmbkz: [ 0.00 0.00 ]
+Key: VPSHLDVDZmk: [ 0.00 0.00 ]
+Key: VPSHLDVDZmkz: [ 0.00 0.00 ]
+Key: VPSHLDVDZr: [ 0.00 0.00 ]
+Key: VPSHLDVDZrk: [ 0.00 0.00 ]
+Key: VPSHLDVDZrkz: [ 0.00 0.00 ]
+Key: VPSHLDVQZ: [ 0.00 0.00 ]
+Key: VPSHLDVQZm: [ 0.00 0.00 ]
+Key: VPSHLDVQZmb: [ 0.00 0.00 ]
+Key: VPSHLDVQZmbk: [ 0.00 0.00 ]
+Key: VPSHLDVQZmbkz: [ 0.00 0.00 ]
+Key: VPSHLDVQZmk: [ 0.00 0.00 ]
+Key: VPSHLDVQZmkz: [ 0.00 0.00 ]
+Key: VPSHLDVQZr: [ 0.00 0.00 ]
+Key: VPSHLDVQZrk: [ 0.00 0.00 ]
+Key: VPSHLDVQZrkz: [ 0.00 0.00 ]
+Key: VPSHLDVWZ: [ 0.00 0.00 ]
+Key: VPSHLDVWZm: [ 0.00 0.00 ]
+Key: VPSHLDVWZmk: [ 0.00 0.00 ]
+Key: VPSHLDVWZmkz: [ 0.00 0.00 ]
+Key: VPSHLDVWZr: [ 0.00 0.00 ]
+Key: VPSHLDVWZrk: [ 0.00 0.00 ]
+Key: VPSHLDVWZrkz: [ 0.00 0.00 ]
+Key: VPSHLDWZ: [ 0.00 0.00 ]
+Key: VPSHLDWZrmi: [ 0.00 0.00 ]
+Key: VPSHLDWZrmik: [ 0.00 0.00 ]
+Key: VPSHLDWZrmikz: [ 0.00 0.00 ]
+Key: VPSHLDWZrri: [ 0.00 0.00 ]
+Key: VPSHLDWZrrik: [ 0.00 0.00 ]
+Key: VPSHLDWZrrikz: [ 0.00 0.00 ]
+Key: VPSHLDmr: [ 0.00 0.00 ]
+Key: VPSHLDrm: [ 0.00 0.00 ]
+Key: VPSHLDrr: [ 0.00 0.00 ]
+Key: VPSHLDrr_REV: [ 0.00 0.00 ]
+Key: VPSHLQmr: [ 0.00 0.00 ]
+Key: VPSHLQrm: [ 0.00 0.00 ]
+Key: VPSHLQrr: [ 0.00 0.00 ]
+Key: VPSHLQrr_REV: [ 0.00 0.00 ]
+Key: VPSHLWmr: [ 0.00 0.00 ]
+Key: VPSHLWrm: [ 0.00 0.00 ]
+Key: VPSHLWrr: [ 0.00 0.00 ]
+Key: VPSHLWrr_REV: [ 0.00 0.00 ]
+Key: VPSHRDDZ: [ 0.00 0.00 ]
+Key: VPSHRDDZrmbi: [ 0.00 0.00 ]
+Key: VPSHRDDZrmbik: [ 0.00 0.00 ]
+Key: VPSHRDDZrmbikz: [ 0.00 0.00 ]
+Key: VPSHRDDZrmi: [ 0.00 0.00 ]
+Key: VPSHRDDZrmik: [ 0.00 0.00 ]
+Key: VPSHRDDZrmikz: [ 0.00 0.00 ]
+Key: VPSHRDDZrri: [ 0.00 0.00 ]
+Key: VPSHRDDZrrik: [ 0.00 0.00 ]
+Key: VPSHRDDZrrikz: [ 0.00 0.00 ]
+Key: VPSHRDQZ: [ 0.00 0.00 ]
+Key: VPSHRDQZrmbi: [ 0.00 0.00 ]
+Key: VPSHRDQZrmbik: [ 0.00 0.00 ]
+Key: VPSHRDQZrmbikz: [ 0.00 0.00 ]
+Key: VPSHRDQZrmi: [ 0.00 0.00 ]
+Key: VPSHRDQZrmik: [ 0.00 0.00 ]
+Key: VPSHRDQZrmikz: [ 0.00 0.00 ]
+Key: VPSHRDQZrri: [ 0.00 0.00 ]
+Key: VPSHRDQZrrik: [ 0.00 0.00 ]
+Key: VPSHRDQZrrikz: [ 0.00 0.00 ]
+Key: VPSHRDVDZ: [ 0.00 0.00 ]
+Key: VPSHRDVDZm: [ 0.00 0.00 ]
+Key: VPSHRDVDZmb: [ 0.00 0.00 ]
+Key: VPSHRDVDZmbk: [ 0.00 0.00 ]
+Key: VPSHRDVDZmbkz: [ 0.00 0.00 ]
+Key: VPSHRDVDZmk: [ 0.00 0.00 ]
+Key: VPSHRDVDZmkz: [ 0.00 0.00 ]
+Key: VPSHRDVDZr: [ 0.00 0.00 ]
+Key: VPSHRDVDZrk: [ 0.00 0.00 ]
+Key: VPSHRDVDZrkz: [ 0.00 0.00 ]
+Key: VPSHRDVQZ: [ 0.00 0.00 ]
+Key: VPSHRDVQZm: [ 0.00 0.00 ]
+Key: VPSHRDVQZmb: [ 0.00 0.00 ]
+Key: VPSHRDVQZmbk: [ 0.00 0.00 ]
+Key: VPSHRDVQZmbkz: [ 0.00 0.00 ]
+Key: VPSHRDVQZmk: [ 0.00 0.00 ]
+Key: VPSHRDVQZmkz: [ 0.00 0.00 ]
+Key: VPSHRDVQZr: [ 0.00 0.00 ]
+Key: VPSHRDVQZrk: [ 0.00 0.00 ]
+Key: VPSHRDVQZrkz: [ 0.00 0.00 ]
+Key: VPSHRDVWZ: [ 0.00 0.00 ]
+Key: VPSHRDVWZm: [ 0.00 0.00 ]
+Key: VPSHRDVWZmk: [ 0.00 0.00 ]
+Key: VPSHRDVWZmkz: [ 0.00 0.00 ]
+Key: VPSHRDVWZr: [ 0.00 0.00 ]
+Key: VPSHRDVWZrk: [ 0.00 0.00 ]
+Key: VPSHRDVWZrkz: [ 0.00 0.00 ]
+Key: VPSHRDWZ: [ 0.00 0.00 ]
+Key: VPSHRDWZrmi: [ 0.00 0.00 ]
+Key: VPSHRDWZrmik: [ 0.00 0.00 ]
+Key: VPSHRDWZrmikz: [ 0.00 0.00 ]
+Key: VPSHRDWZrri: [ 0.00 0.00 ]
+Key: VPSHRDWZrrik: [ 0.00 0.00 ]
+Key: VPSHRDWZrrikz: [ 0.00 0.00 ]
+Key: VPSHUFBITQMBZ: [ 0.00 0.00 ]
+Key: VPSHUFBITQMBZrm: [ 0.00 0.00 ]
+Key: VPSHUFBITQMBZrmk: [ 0.00 0.00 ]
+Key: VPSHUFBITQMBZrr: [ 0.00 0.00 ]
+Key: VPSHUFBITQMBZrrk: [ 0.00 0.00 ]
+Key: VPSHUFBYrm: [ 0.00 0.00 ]
+Key: VPSHUFBYrr: [ 0.00 0.00 ]
+Key: VPSHUFBZ: [ 0.00 0.00 ]
+Key: VPSHUFBZrm: [ 0.00 0.00 ]
+Key: VPSHUFBZrmk: [ 0.00 0.00 ]
+Key: VPSHUFBZrmkz: [ 0.00 0.00 ]
+Key: VPSHUFBZrr: [ 0.00 0.00 ]
+Key: VPSHUFBZrrk: [ 0.00 0.00 ]
+Key: VPSHUFBZrrkz: [ 0.00 0.00 ]
+Key: VPSHUFBrm: [ 0.00 0.00 ]
+Key: VPSHUFBrr: [ 0.00 0.00 ]
+Key: VPSHUFDYmi: [ 0.00 0.00 ]
+Key: VPSHUFDYri: [ 0.00 0.00 ]
+Key: VPSHUFDZ: [ 0.00 0.00 ]
+Key: VPSHUFDZmbi: [ 0.00 0.00 ]
+Key: VPSHUFDZmbik: [ 0.00 0.00 ]
+Key: VPSHUFDZmbikz: [ 0.00 0.00 ]
+Key: VPSHUFDZmi: [ 0.00 0.00 ]
+Key: VPSHUFDZmik: [ 0.00 0.00 ]
+Key: VPSHUFDZmikz: [ 0.00 0.00 ]
+Key: VPSHUFDZri: [ 0.00 0.00 ]
+Key: VPSHUFDZrik: [ 0.00 0.00 ]
+Key: VPSHUFDZrikz: [ 0.00 0.00 ]
+Key: VPSHUFDmi: [ 0.00 0.00 ]
+Key: VPSHUFDri: [ 0.00 0.00 ]
+Key: VPSHUFHWYmi: [ 0.00 0.00 ]
+Key: VPSHUFHWYri: [ 0.00 0.00 ]
+Key: VPSHUFHWZ: [ 0.00 0.00 ]
+Key: VPSHUFHWZmi: [ 0.00 0.00 ]
+Key: VPSHUFHWZmik: [ 0.00 0.00 ]
+Key: VPSHUFHWZmikz: [ 0.00 0.00 ]
+Key: VPSHUFHWZri: [ 0.00 0.00 ]
+Key: VPSHUFHWZrik: [ 0.00 0.00 ]
+Key: VPSHUFHWZrikz: [ 0.00 0.00 ]
+Key: VPSHUFHWmi: [ 0.00 0.00 ]
+Key: VPSHUFHWri: [ 0.00 0.00 ]
+Key: VPSHUFLWYmi: [ 0.00 0.00 ]
+Key: VPSHUFLWYri: [ 0.00 0.00 ]
+Key: VPSHUFLWZ: [ 0.00 0.00 ]
+Key: VPSHUFLWZmi: [ 0.00 0.00 ]
+Key: VPSHUFLWZmik: [ 0.00 0.00 ]
+Key: VPSHUFLWZmikz: [ 0.00 0.00 ]
+Key: VPSHUFLWZri: [ 0.00 0.00 ]
+Key: VPSHUFLWZrik: [ 0.00 0.00 ]
+Key: VPSHUFLWZrikz: [ 0.00 0.00 ]
+Key: VPSHUFLWmi: [ 0.00 0.00 ]
+Key: VPSHUFLWri: [ 0.00 0.00 ]
+Key: VPSIGNBYrm: [ 0.00 0.00 ]
+Key: VPSIGNBYrr: [ 0.00 0.00 ]
+Key: VPSIGNBrm: [ 0.00 0.00 ]
+Key: VPSIGNBrr: [ 0.00 0.00 ]
+Key: VPSIGNDYrm: [ 0.00 0.00 ]
+Key: VPSIGNDYrr: [ 0.00 0.00 ]
+Key: VPSIGNDrm: [ 0.00 0.00 ]
+Key: VPSIGNDrr: [ 0.00 0.00 ]
+Key: VPSIGNWYrm: [ 0.00 0.00 ]
+Key: VPSIGNWYrr: [ 0.00 0.00 ]
+Key: VPSIGNWrm: [ 0.00 0.00 ]
+Key: VPSIGNWrr: [ 0.00 0.00 ]
+Key: VPSLLDQYri: [ 0.00 0.00 ]
+Key: VPSLLDQZ: [ 0.00 0.00 ]
+Key: VPSLLDQZmi: [ 0.00 0.00 ]
+Key: VPSLLDQZri: [ 0.00 0.00 ]
+Key: VPSLLDQri: [ 0.00 0.00 ]
+Key: VPSLLDYri: [ 0.00 0.00 ]
+Key: VPSLLDYrm: [ 0.00 0.00 ]
+Key: VPSLLDYrr: [ 0.00 0.00 ]
+Key: VPSLLDZ: [ 0.00 0.00 ]
+Key: VPSLLDZmbi: [ 0.00 0.00 ]
+Key: VPSLLDZmbik: [ 0.00 0.00 ]
+Key: VPSLLDZmbikz: [ 0.00 0.00 ]
+Key: VPSLLDZmi: [ 0.00 0.00 ]
+Key: VPSLLDZmik: [ 0.00 0.00 ]
+Key: VPSLLDZmikz: [ 0.00 0.00 ]
+Key: VPSLLDZri: [ 0.00 0.00 ]
+Key: VPSLLDZrik: [ 0.00 0.00 ]
+Key: VPSLLDZrikz: [ 0.00 0.00 ]
+Key: VPSLLDZrm: [ 0.00 0.00 ]
+Key: VPSLLDZrmk: [ 0.00 0.00 ]
+Key: VPSLLDZrmkz: [ 0.00 0.00 ]
+Key: VPSLLDZrr: [ 0.00 0.00 ]
+Key: VPSLLDZrrk: [ 0.00 0.00 ]
+Key: VPSLLDZrrkz: [ 0.00 0.00 ]
+Key: VPSLLDri: [ 0.00 0.00 ]
+Key: VPSLLDrm: [ 0.00 0.00 ]
+Key: VPSLLDrr: [ 0.00 0.00 ]
+Key: VPSLLQYri: [ 0.00 0.00 ]
+Key: VPSLLQYrm: [ 0.00 0.00 ]
+Key: VPSLLQYrr: [ 0.00 0.00 ]
+Key: VPSLLQZ: [ 0.00 0.00 ]
+Key: VPSLLQZmbi: [ 0.00 0.00 ]
+Key: VPSLLQZmbik: [ 0.00 0.00 ]
+Key: VPSLLQZmbikz: [ 0.00 0.00 ]
+Key: VPSLLQZmi: [ 0.00 0.00 ]
+Key: VPSLLQZmik: [ 0.00 0.00 ]
+Key: VPSLLQZmikz: [ 0.00 0.00 ]
+Key: VPSLLQZri: [ 0.00 0.00 ]
+Key: VPSLLQZrik: [ 0.00 0.00 ]
+Key: VPSLLQZrikz: [ 0.00 0.00 ]
+Key: VPSLLQZrm: [ 0.00 0.00 ]
+Key: VPSLLQZrmk: [ 0.00 0.00 ]
+Key: VPSLLQZrmkz: [ 0.00 0.00 ]
+Key: VPSLLQZrr: [ 0.00 0.00 ]
+Key: VPSLLQZrrk: [ 0.00 0.00 ]
+Key: VPSLLQZrrkz: [ 0.00 0.00 ]
+Key: VPSLLQri: [ 0.00 0.00 ]
+Key: VPSLLQrm: [ 0.00 0.00 ]
+Key: VPSLLQrr: [ 0.00 0.00 ]
+Key: VPSLLVDYrm: [ 0.00 0.00 ]
+Key: VPSLLVDYrr: [ 0.00 0.00 ]
+Key: VPSLLVDZ: [ 0.00 0.00 ]
+Key: VPSLLVDZrm: [ 0.00 0.00 ]
+Key: VPSLLVDZrmb: [ 0.00 0.00 ]
+Key: VPSLLVDZrmbk: [ 0.00 0.00 ]
+Key: VPSLLVDZrmbkz: [ 0.00 0.00 ]
+Key: VPSLLVDZrmk: [ 0.00 0.00 ]
+Key: VPSLLVDZrmkz: [ 0.00 0.00 ]
+Key: VPSLLVDZrr: [ 0.00 0.00 ]
+Key: VPSLLVDZrrk: [ 0.00 0.00 ]
+Key: VPSLLVDZrrkz: [ 0.00 0.00 ]
+Key: VPSLLVDrm: [ 0.00 0.00 ]
+Key: VPSLLVDrr: [ 0.00 0.00 ]
+Key: VPSLLVQYrm: [ 0.00 0.00 ]
+Key: VPSLLVQYrr: [ 0.00 0.00 ]
+Key: VPSLLVQZ: [ 0.00 0.00 ]
+Key: VPSLLVQZrm: [ 0.00 0.00 ]
+Key: VPSLLVQZrmb: [ 0.00 0.00 ]
+Key: VPSLLVQZrmbk: [ 0.00 0.00 ]
+Key: VPSLLVQZrmbkz: [ 0.00 0.00 ]
+Key: VPSLLVQZrmk: [ 0.00 0.00 ]
+Key: VPSLLVQZrmkz: [ 0.00 0.00 ]
+Key: VPSLLVQZrr: [ 0.00 0.00 ]
+Key: VPSLLVQZrrk: [ 0.00 0.00 ]
+Key: VPSLLVQZrrkz: [ 0.00 0.00 ]
+Key: VPSLLVQrm: [ 0.00 0.00 ]
+Key: VPSLLVQrr: [ 0.00 0.00 ]
+Key: VPSLLVWZ: [ 0.00 0.00 ]
+Key: VPSLLVWZrm: [ 0.00 0.00 ]
+Key: VPSLLVWZrmk: [ 0.00 0.00 ]
+Key: VPSLLVWZrmkz: [ 0.00 0.00 ]
+Key: VPSLLVWZrr: [ 0.00 0.00 ]
+Key: VPSLLVWZrrk: [ 0.00 0.00 ]
+Key: VPSLLVWZrrkz: [ 0.00 0.00 ]
+Key: VPSLLWYri: [ 0.00 0.00 ]
+Key: VPSLLWYrm: [ 0.00 0.00 ]
+Key: VPSLLWYrr: [ 0.00 0.00 ]
+Key: VPSLLWZ: [ 0.00 0.00 ]
+Key: VPSLLWZmi: [ 0.00 0.00 ]
+Key: VPSLLWZmik: [ 0.00 0.00 ]
+Key: VPSLLWZmikz: [ 0.00 0.00 ]
+Key: VPSLLWZri: [ 0.00 0.00 ]
+Key: VPSLLWZrik: [ 0.00 0.00 ]
+Key: VPSLLWZrikz: [ 0.00 0.00 ]
+Key: VPSLLWZrm: [ 0.00 0.00 ]
+Key: VPSLLWZrmk: [ 0.00 0.00 ]
+Key: VPSLLWZrmkz: [ 0.00 0.00 ]
+Key: VPSLLWZrr: [ 0.00 0.00 ]
+Key: VPSLLWZrrk: [ 0.00 0.00 ]
+Key: VPSLLWZrrkz: [ 0.00 0.00 ]
+Key: VPSLLWri: [ 0.00 0.00 ]
+Key: VPSLLWrm: [ 0.00 0.00 ]
+Key: VPSLLWrr: [ 0.00 0.00 ]
+Key: VPSRADYri: [ 0.00 0.00 ]
+Key: VPSRADYrm: [ 0.00 0.00 ]
+Key: VPSRADYrr: [ 0.00 0.00 ]
+Key: VPSRADZ: [ 0.00 0.00 ]
+Key: VPSRADZmbi: [ 0.00 0.00 ]
+Key: VPSRADZmbik: [ 0.00 0.00 ]
+Key: VPSRADZmbikz: [ 0.00 0.00 ]
+Key: VPSRADZmi: [ 0.00 0.00 ]
+Key: VPSRADZmik: [ 0.00 0.00 ]
+Key: VPSRADZmikz: [ 0.00 0.00 ]
+Key: VPSRADZri: [ 0.00 0.00 ]
+Key: VPSRADZrik: [ 0.00 0.00 ]
+Key: VPSRADZrikz: [ 0.00 0.00 ]
+Key: VPSRADZrm: [ 0.00 0.00 ]
+Key: VPSRADZrmk: [ 0.00 0.00 ]
+Key: VPSRADZrmkz: [ 0.00 0.00 ]
+Key: VPSRADZrr: [ 0.00 0.00 ]
+Key: VPSRADZrrk: [ 0.00 0.00 ]
+Key: VPSRADZrrkz: [ 0.00 0.00 ]
+Key: VPSRADri: [ 0.00 0.00 ]
+Key: VPSRADrm: [ 0.00 0.00 ]
+Key: VPSRADrr: [ 0.00 0.00 ]
+Key: VPSRAQZ: [ 0.00 0.00 ]
+Key: VPSRAQZmbi: [ 0.00 0.00 ]
+Key: VPSRAQZmbik: [ 0.00 0.00 ]
+Key: VPSRAQZmbikz: [ 0.00 0.00 ]
+Key: VPSRAQZmi: [ 0.00 0.00 ]
+Key: VPSRAQZmik: [ 0.00 0.00 ]
+Key: VPSRAQZmikz: [ 0.00 0.00 ]
+Key: VPSRAQZri: [ 0.00 0.00 ]
+Key: VPSRAQZrik: [ 0.00 0.00 ]
+Key: VPSRAQZrikz: [ 0.00 0.00 ]
+Key: VPSRAQZrm: [ 0.00 0.00 ]
+Key: VPSRAQZrmk: [ 0.00 0.00 ]
+Key: VPSRAQZrmkz: [ 0.00 0.00 ]
+Key: VPSRAQZrr: [ 0.00 0.00 ]
+Key: VPSRAQZrrk: [ 0.00 0.00 ]
+Key: VPSRAQZrrkz: [ 0.00 0.00 ]
+Key: VPSRAVDYrm: [ 0.00 0.00 ]
+Key: VPSRAVDYrr: [ 0.00 0.00 ]
+Key: VPSRAVDZ: [ 0.00 0.00 ]
+Key: VPSRAVDZrm: [ 0.00 0.00 ]
+Key: VPSRAVDZrmb: [ 0.00 0.00 ]
+Key: VPSRAVDZrmbk: [ 0.00 0.00 ]
+Key: VPSRAVDZrmbkz: [ 0.00 0.00 ]
+Key: VPSRAVDZrmk: [ 0.00 0.00 ]
+Key: VPSRAVDZrmkz: [ 0.00 0.00 ]
+Key: VPSRAVDZrr: [ 0.00 0.00 ]
+Key: VPSRAVDZrrk: [ 0.00 0.00 ]
+Key: VPSRAVDZrrkz: [ 0.00 0.00 ]
+Key: VPSRAVDrm: [ 0.00 0.00 ]
+Key: VPSRAVDrr: [ 0.00 0.00 ]
+Key: VPSRAVQZ: [ 0.00 0.00 ]
+Key: VPSRAVQZrm: [ 0.00 0.00 ]
+Key: VPSRAVQZrmb: [ 0.00 0.00 ]
+Key: VPSRAVQZrmbk: [ 0.00 0.00 ]
+Key: VPSRAVQZrmbkz: [ 0.00 0.00 ]
+Key: VPSRAVQZrmk: [ 0.00 0.00 ]
+Key: VPSRAVQZrmkz: [ 0.00 0.00 ]
+Key: VPSRAVQZrr: [ 0.00 0.00 ]
+Key: VPSRAVQZrrk: [ 0.00 0.00 ]
+Key: VPSRAVQZrrkz: [ 0.00 0.00 ]
+Key: VPSRAVWZ: [ 0.00 0.00 ]
+Key: VPSRAVWZrm: [ 0.00 0.00 ]
+Key: VPSRAVWZrmk: [ 0.00 0.00 ]
+Key: VPSRAVWZrmkz: [ 0.00 0.00 ]
+Key: VPSRAVWZrr: [ 0.00 0.00 ]
+Key: VPSRAVWZrrk: [ 0.00 0.00 ]
+Key: VPSRAVWZrrkz: [ 0.00 0.00 ]
+Key: VPSRAWYri: [ 0.00 0.00 ]
+Key: VPSRAWYrm: [ 0.00 0.00 ]
+Key: VPSRAWYrr: [ 0.00 0.00 ]
+Key: VPSRAWZ: [ 0.00 0.00 ]
+Key: VPSRAWZmi: [ 0.00 0.00 ]
+Key: VPSRAWZmik: [ 0.00 0.00 ]
+Key: VPSRAWZmikz: [ 0.00 0.00 ]
+Key: VPSRAWZri: [ 0.00 0.00 ]
+Key: VPSRAWZrik: [ 0.00 0.00 ]
+Key: VPSRAWZrikz: [ 0.00 0.00 ]
+Key: VPSRAWZrm: [ 0.00 0.00 ]
+Key: VPSRAWZrmk: [ 0.00 0.00 ]
+Key: VPSRAWZrmkz: [ 0.00 0.00 ]
+Key: VPSRAWZrr: [ 0.00 0.00 ]
+Key: VPSRAWZrrk: [ 0.00 0.00 ]
+Key: VPSRAWZrrkz: [ 0.00 0.00 ]
+Key: VPSRAWri: [ 0.00 0.00 ]
+Key: VPSRAWrm: [ 0.00 0.00 ]
+Key: VPSRAWrr: [ 0.00 0.00 ]
+Key: VPSRLDQYri: [ 0.00 0.00 ]
+Key: VPSRLDQZ: [ 0.00 0.00 ]
+Key: VPSRLDQZmi: [ 0.00 0.00 ]
+Key: VPSRLDQZri: [ 0.00 0.00 ]
+Key: VPSRLDQri: [ 0.00 0.00 ]
+Key: VPSRLDYri: [ 0.00 0.00 ]
+Key: VPSRLDYrm: [ 0.00 0.00 ]
+Key: VPSRLDYrr: [ 0.00 0.00 ]
+Key: VPSRLDZ: [ 0.00 0.00 ]
+Key: VPSRLDZmbi: [ 0.00 0.00 ]
+Key: VPSRLDZmbik: [ 0.00 0.00 ]
+Key: VPSRLDZmbikz: [ 0.00 0.00 ]
+Key: VPSRLDZmi: [ 0.00 0.00 ]
+Key: VPSRLDZmik: [ 0.00 0.00 ]
+Key: VPSRLDZmikz: [ 0.00 0.00 ]
+Key: VPSRLDZri: [ 0.00 0.00 ]
+Key: VPSRLDZrik: [ 0.00 0.00 ]
+Key: VPSRLDZrikz: [ 0.00 0.00 ]
+Key: VPSRLDZrm: [ 0.00 0.00 ]
+Key: VPSRLDZrmk: [ 0.00 0.00 ]
+Key: VPSRLDZrmkz: [ 0.00 0.00 ]
+Key: VPSRLDZrr: [ 0.00 0.00 ]
+Key: VPSRLDZrrk: [ 0.00 0.00 ]
+Key: VPSRLDZrrkz: [ 0.00 0.00 ]
+Key: VPSRLDri: [ 0.00 0.00 ]
+Key: VPSRLDrm: [ 0.00 0.00 ]
+Key: VPSRLDrr: [ 0.00 0.00 ]
+Key: VPSRLQYri: [ 0.00 0.00 ]
+Key: VPSRLQYrm: [ 0.00 0.00 ]
+Key: VPSRLQYrr: [ 0.00 0.00 ]
+Key: VPSRLQZ: [ 0.00 0.00 ]
+Key: VPSRLQZmbi: [ 0.00 0.00 ]
+Key: VPSRLQZmbik: [ 0.00 0.00 ]
+Key: VPSRLQZmbikz: [ 0.00 0.00 ]
+Key: VPSRLQZmi: [ 0.00 0.00 ]
+Key: VPSRLQZmik: [ 0.00 0.00 ]
+Key: VPSRLQZmikz: [ 0.00 0.00 ]
+Key: VPSRLQZri: [ 0.00 0.00 ]
+Key: VPSRLQZrik: [ 0.00 0.00 ]
+Key: VPSRLQZrikz: [ 0.00 0.00 ]
+Key: VPSRLQZrm: [ 0.00 0.00 ]
+Key: VPSRLQZrmk: [ 0.00 0.00 ]
+Key: VPSRLQZrmkz: [ 0.00 0.00 ]
+Key: VPSRLQZrr: [ 0.00 0.00 ]
+Key: VPSRLQZrrk: [ 0.00 0.00 ]
+Key: VPSRLQZrrkz: [ 0.00 0.00 ]
+Key: VPSRLQri: [ 0.00 0.00 ]
+Key: VPSRLQrm: [ 0.00 0.00 ]
+Key: VPSRLQrr: [ 0.00 0.00 ]
+Key: VPSRLVDYrm: [ 0.00 0.00 ]
+Key: VPSRLVDYrr: [ 0.00 0.00 ]
+Key: VPSRLVDZ: [ 0.00 0.00 ]
+Key: VPSRLVDZrm: [ 0.00 0.00 ]
+Key: VPSRLVDZrmb: [ 0.00 0.00 ]
+Key: VPSRLVDZrmbk: [ 0.00 0.00 ]
+Key: VPSRLVDZrmbkz: [ 0.00 0.00 ]
+Key: VPSRLVDZrmk: [ 0.00 0.00 ]
+Key: VPSRLVDZrmkz: [ 0.00 0.00 ]
+Key: VPSRLVDZrr: [ 0.00 0.00 ]
+Key: VPSRLVDZrrk: [ 0.00 0.00 ]
+Key: VPSRLVDZrrkz: [ 0.00 0.00 ]
+Key: VPSRLVDrm: [ 0.00 0.00 ]
+Key: VPSRLVDrr: [ 0.00 0.00 ]
+Key: VPSRLVQYrm: [ 0.00 0.00 ]
+Key: VPSRLVQYrr: [ 0.00 0.00 ]
+Key: VPSRLVQZ: [ 0.00 0.00 ]
+Key: VPSRLVQZrm: [ 0.00 0.00 ]
+Key: VPSRLVQZrmb: [ 0.00 0.00 ]
+Key: VPSRLVQZrmbk: [ 0.00 0.00 ]
+Key: VPSRLVQZrmbkz: [ 0.00 0.00 ]
+Key: VPSRLVQZrmk: [ 0.00 0.00 ]
+Key: VPSRLVQZrmkz: [ 0.00 0.00 ]
+Key: VPSRLVQZrr: [ 0.00 0.00 ]
+Key: VPSRLVQZrrk: [ 0.00 0.00 ]
+Key: VPSRLVQZrrkz: [ 0.00 0.00 ]
+Key: VPSRLVQrm: [ 0.00 0.00 ]
+Key: VPSRLVQrr: [ 0.00 0.00 ]
+Key: VPSRLVWZ: [ 0.00 0.00 ]
+Key: VPSRLVWZrm: [ 0.00 0.00 ]
+Key: VPSRLVWZrmk: [ 0.00 0.00 ]
+Key: VPSRLVWZrmkz: [ 0.00 0.00 ]
+Key: VPSRLVWZrr: [ 0.00 0.00 ]
+Key: VPSRLVWZrrk: [ 0.00 0.00 ]
+Key: VPSRLVWZrrkz: [ 0.00 0.00 ]
+Key: VPSRLWYri: [ 0.00 0.00 ]
+Key: VPSRLWYrm: [ 0.00 0.00 ]
+Key: VPSRLWYrr: [ 0.00 0.00 ]
+Key: VPSRLWZ: [ 0.00 0.00 ]
+Key: VPSRLWZmi: [ 0.00 0.00 ]
+Key: VPSRLWZmik: [ 0.00 0.00 ]
+Key: VPSRLWZmikz: [ 0.00 0.00 ]
+Key: VPSRLWZri: [ 0.00 0.00 ]
+Key: VPSRLWZrik: [ 0.00 0.00 ]
+Key: VPSRLWZrikz: [ 0.00 0.00 ]
+Key: VPSRLWZrm: [ 0.00 0.00 ]
+Key: VPSRLWZrmk: [ 0.00 0.00 ]
+Key: VPSRLWZrmkz: [ 0.00 0.00 ]
+Key: VPSRLWZrr: [ 0.00 0.00 ]
+Key: VPSRLWZrrk: [ 0.00 0.00 ]
+Key: VPSRLWZrrkz: [ 0.00 0.00 ]
+Key: VPSRLWri: [ 0.00 0.00 ]
+Key: VPSRLWrm: [ 0.00 0.00 ]
+Key: VPSRLWrr: [ 0.00 0.00 ]
+Key: VPSUBBYrm: [ 0.00 0.00 ]
+Key: VPSUBBYrr: [ 0.00 0.00 ]
+Key: VPSUBBZ: [ 0.00 0.00 ]
+Key: VPSUBBZrm: [ 0.00 0.00 ]
+Key: VPSUBBZrmk: [ 0.00 0.00 ]
+Key: VPSUBBZrmkz: [ 0.00 0.00 ]
+Key: VPSUBBZrr: [ 0.00 0.00 ]
+Key: VPSUBBZrrk: [ 0.00 0.00 ]
+Key: VPSUBBZrrkz: [ 0.00 0.00 ]
+Key: VPSUBBrm: [ 0.00 0.00 ]
+Key: VPSUBBrr: [ 0.00 0.00 ]
+Key: VPSUBDYrm: [ 0.00 0.00 ]
+Key: VPSUBDYrr: [ 0.00 0.00 ]
+Key: VPSUBDZ: [ 0.00 0.00 ]
+Key: VPSUBDZrm: [ 0.00 0.00 ]
+Key: VPSUBDZrmb: [ 0.00 0.00 ]
+Key: VPSUBDZrmbk: [ 0.00 0.00 ]
+Key: VPSUBDZrmbkz: [ 0.00 0.00 ]
+Key: VPSUBDZrmk: [ 0.00 0.00 ]
+Key: VPSUBDZrmkz: [ 0.00 0.00 ]
+Key: VPSUBDZrr: [ 0.00 0.00 ]
+Key: VPSUBDZrrk: [ 0.00 0.00 ]
+Key: VPSUBDZrrkz: [ 0.00 0.00 ]
+Key: VPSUBDrm: [ 0.00 0.00 ]
+Key: VPSUBDrr: [ 0.00 0.00 ]
+Key: VPSUBQYrm: [ 0.00 0.00 ]
+Key: VPSUBQYrr: [ 0.00 0.00 ]
+Key: VPSUBQZ: [ 0.00 0.00 ]
+Key: VPSUBQZrm: [ 0.00 0.00 ]
+Key: VPSUBQZrmb: [ 0.00 0.00 ]
+Key: VPSUBQZrmbk: [ 0.00 0.00 ]
+Key: VPSUBQZrmbkz: [ 0.00 0.00 ]
+Key: VPSUBQZrmk: [ 0.00 0.00 ]
+Key: VPSUBQZrmkz: [ 0.00 0.00 ]
+Key: VPSUBQZrr: [ 0.00 0.00 ]
+Key: VPSUBQZrrk: [ 0.00 0.00 ]
+Key: VPSUBQZrrkz: [ 0.00 0.00 ]
+Key: VPSUBQrm: [ 0.00 0.00 ]
+Key: VPSUBQrr: [ 0.00 0.00 ]
+Key: VPSUBSBYrm: [ 0.00 0.00 ]
+Key: VPSUBSBYrr: [ 0.00 0.00 ]
+Key: VPSUBSBZ: [ 0.00 0.00 ]
+Key: VPSUBSBZrm: [ 0.00 0.00 ]
+Key: VPSUBSBZrmk: [ 0.00 0.00 ]
+Key: VPSUBSBZrmkz: [ 0.00 0.00 ]
+Key: VPSUBSBZrr: [ 0.00 0.00 ]
+Key: VPSUBSBZrrk: [ 0.00 0.00 ]
+Key: VPSUBSBZrrkz: [ 0.00 0.00 ]
+Key: VPSUBSBrm: [ 0.00 0.00 ]
+Key: VPSUBSBrr: [ 0.00 0.00 ]
+Key: VPSUBSWYrm: [ 0.00 0.00 ]
+Key: VPSUBSWYrr: [ 0.00 0.00 ]
+Key: VPSUBSWZ: [ 0.00 0.00 ]
+Key: VPSUBSWZrm: [ 0.00 0.00 ]
+Key: VPSUBSWZrmk: [ 0.00 0.00 ]
+Key: VPSUBSWZrmkz: [ 0.00 0.00 ]
+Key: VPSUBSWZrr: [ 0.00 0.00 ]
+Key: VPSUBSWZrrk: [ 0.00 0.00 ]
+Key: VPSUBSWZrrkz: [ 0.00 0.00 ]
+Key: VPSUBSWrm: [ 0.00 0.00 ]
+Key: VPSUBSWrr: [ 0.00 0.00 ]
+Key: VPSUBUSBYrm: [ 0.00 0.00 ]
+Key: VPSUBUSBYrr: [ 0.00 0.00 ]
+Key: VPSUBUSBZ: [ 0.00 0.00 ]
+Key: VPSUBUSBZrm: [ 0.00 0.00 ]
+Key: VPSUBUSBZrmk: [ 0.00 0.00 ]
+Key: VPSUBUSBZrmkz: [ 0.00 0.00 ]
+Key: VPSUBUSBZrr: [ 0.00 0.00 ]
+Key: VPSUBUSBZrrk: [ 0.00 0.00 ]
+Key: VPSUBUSBZrrkz: [ 0.00 0.00 ]
+Key: VPSUBUSBrm: [ 0.00 0.00 ]
+Key: VPSUBUSBrr: [ 0.00 0.00 ]
+Key: VPSUBUSWYrm: [ 0.00 0.00 ]
+Key: VPSUBUSWYrr: [ 0.00 0.00 ]
+Key: VPSUBUSWZ: [ 0.00 0.00 ]
+Key: VPSUBUSWZrm: [ 0.00 0.00 ]
+Key: VPSUBUSWZrmk: [ 0.00 0.00 ]
+Key: VPSUBUSWZrmkz: [ 0.00 0.00 ]
+Key: VPSUBUSWZrr: [ 0.00 0.00 ]
+Key: VPSUBUSWZrrk: [ 0.00 0.00 ]
+Key: VPSUBUSWZrrkz: [ 0.00 0.00 ]
+Key: VPSUBUSWrm: [ 0.00 0.00 ]
+Key: VPSUBUSWrr: [ 0.00 0.00 ]
+Key: VPSUBWYrm: [ 0.00 0.00 ]
+Key: VPSUBWYrr: [ 0.00 0.00 ]
+Key: VPSUBWZ: [ 0.00 0.00 ]
+Key: VPSUBWZrm: [ 0.00 0.00 ]
+Key: VPSUBWZrmk: [ 0.00 0.00 ]
+Key: VPSUBWZrmkz: [ 0.00 0.00 ]
+Key: VPSUBWZrr: [ 0.00 0.00 ]
+Key: VPSUBWZrrk: [ 0.00 0.00 ]
+Key: VPSUBWZrrkz: [ 0.00 0.00 ]
+Key: VPSUBWrm: [ 0.00 0.00 ]
+Key: VPSUBWrr: [ 0.00 0.00 ]
+Key: VPTERNLOGDZ: [ 0.00 0.00 ]
+Key: VPTERNLOGDZrmbi: [ 0.00 0.00 ]
+Key: VPTERNLOGDZrmbik: [ 0.00 0.00 ]
+Key: VPTERNLOGDZrmbikz: [ 0.00 0.00 ]
+Key: VPTERNLOGDZrmi: [ 0.00 0.00 ]
+Key: VPTERNLOGDZrmik: [ 0.00 0.00 ]
+Key: VPTERNLOGDZrmikz: [ 0.00 0.00 ]
+Key: VPTERNLOGDZrri: [ 0.00 0.00 ]
+Key: VPTERNLOGDZrrik: [ 0.00 0.00 ]
+Key: VPTERNLOGDZrrikz: [ 0.00 0.00 ]
+Key: VPTERNLOGQZ: [ 0.00 0.00 ]
+Key: VPTERNLOGQZrmbi: [ 0.00 0.00 ]
+Key: VPTERNLOGQZrmbik: [ 0.00 0.00 ]
+Key: VPTERNLOGQZrmbikz: [ 0.00 0.00 ]
+Key: VPTERNLOGQZrmi: [ 0.00 0.00 ]
+Key: VPTERNLOGQZrmik: [ 0.00 0.00 ]
+Key: VPTERNLOGQZrmikz: [ 0.00 0.00 ]
+Key: VPTERNLOGQZrri: [ 0.00 0.00 ]
+Key: VPTERNLOGQZrrik: [ 0.00 0.00 ]
+Key: VPTERNLOGQZrrikz: [ 0.00 0.00 ]
+Key: VPTESTMBZ: [ 0.00 0.00 ]
+Key: VPTESTMBZrm: [ 0.00 0.00 ]
+Key: VPTESTMBZrmk: [ 0.00 0.00 ]
+Key: VPTESTMBZrr: [ 0.00 0.00 ]
+Key: VPTESTMBZrrk: [ 0.00 0.00 ]
+Key: VPTESTMDZ: [ 0.00 0.00 ]
+Key: VPTESTMDZrm: [ 0.00 0.00 ]
+Key: VPTESTMDZrmb: [ 0.00 0.00 ]
+Key: VPTESTMDZrmbk: [ 0.00 0.00 ]
+Key: VPTESTMDZrmk: [ 0.00 0.00 ]
+Key: VPTESTMDZrr: [ 0.00 0.00 ]
+Key: VPTESTMDZrrk: [ 0.00 0.00 ]
+Key: VPTESTMQZ: [ 0.00 0.00 ]
+Key: VPTESTMQZrm: [ 0.00 0.00 ]
+Key: VPTESTMQZrmb: [ 0.00 0.00 ]
+Key: VPTESTMQZrmbk: [ 0.00 0.00 ]
+Key: VPTESTMQZrmk: [ 0.00 0.00 ]
+Key: VPTESTMQZrr: [ 0.00 0.00 ]
+Key: VPTESTMQZrrk: [ 0.00 0.00 ]
+Key: VPTESTMWZ: [ 0.00 0.00 ]
+Key: VPTESTMWZrm: [ 0.00 0.00 ]
+Key: VPTESTMWZrmk: [ 0.00 0.00 ]
+Key: VPTESTMWZrr: [ 0.00 0.00 ]
+Key: VPTESTMWZrrk: [ 0.00 0.00 ]
+Key: VPTESTNMBZ: [ 0.00 0.00 ]
+Key: VPTESTNMBZrm: [ 0.00 0.00 ]
+Key: VPTESTNMBZrmk: [ 0.00 0.00 ]
+Key: VPTESTNMBZrr: [ 0.00 0.00 ]
+Key: VPTESTNMBZrrk: [ 0.00 0.00 ]
+Key: VPTESTNMDZ: [ 0.00 0.00 ]
+Key: VPTESTNMDZrm: [ 0.00 0.00 ]
+Key: VPTESTNMDZrmb: [ 0.00 0.00 ]
+Key: VPTESTNMDZrmbk: [ 0.00 0.00 ]
+Key: VPTESTNMDZrmk: [ 0.00 0.00 ]
+Key: VPTESTNMDZrr: [ 0.00 0.00 ]
+Key: VPTESTNMDZrrk: [ 0.00 0.00 ]
+Key: VPTESTNMQZ: [ 0.00 0.00 ]
+Key: VPTESTNMQZrm: [ 0.00 0.00 ]
+Key: VPTESTNMQZrmb: [ 0.00 0.00 ]
+Key: VPTESTNMQZrmbk: [ 0.00 0.00 ]
+Key: VPTESTNMQZrmk: [ 0.00 0.00 ]
+Key: VPTESTNMQZrr: [ 0.00 0.00 ]
+Key: VPTESTNMQZrrk: [ 0.00 0.00 ]
+Key: VPTESTNMWZ: [ 0.00 0.00 ]
+Key: VPTESTNMWZrm: [ 0.00 0.00 ]
+Key: VPTESTNMWZrmk: [ 0.00 0.00 ]
+Key: VPTESTNMWZrr: [ 0.00 0.00 ]
+Key: VPTESTNMWZrrk: [ 0.00 0.00 ]
+Key: VPTESTYrm: [ 0.00 0.00 ]
+Key: VPTESTYrr: [ 0.00 0.00 ]
+Key: VPTESTrm: [ 0.00 0.00 ]
+Key: VPTESTrr: [ 0.00 0.00 ]
+Key: VPUNPCKHBWYrm: [ 0.00 0.00 ]
+Key: VPUNPCKHBWYrr: [ 0.00 0.00 ]
+Key: VPUNPCKHBWZ: [ 0.00 0.00 ]
+Key: VPUNPCKHBWZrm: [ 0.00 0.00 ]
+Key: VPUNPCKHBWZrmk: [ 0.00 0.00 ]
+Key: VPUNPCKHBWZrmkz: [ 0.00 0.00 ]
+Key: VPUNPCKHBWZrr: [ 0.00 0.00 ]
+Key: VPUNPCKHBWZrrk: [ 0.00 0.00 ]
+Key: VPUNPCKHBWZrrkz: [ 0.00 0.00 ]
+Key: VPUNPCKHBWrm: [ 0.00 0.00 ]
+Key: VPUNPCKHBWrr: [ 0.00 0.00 ]
+Key: VPUNPCKHDQYrm: [ 0.00 0.00 ]
+Key: VPUNPCKHDQYrr: [ 0.00 0.00 ]
+Key: VPUNPCKHDQZ: [ 0.00 0.00 ]
+Key: VPUNPCKHDQZrm: [ 0.00 0.00 ]
+Key: VPUNPCKHDQZrmb: [ 0.00 0.00 ]
+Key: VPUNPCKHDQZrmbk: [ 0.00 0.00 ]
+Key: VPUNPCKHDQZrmbkz: [ 0.00 0.00 ]
+Key: VPUNPCKHDQZrmk: [ 0.00 0.00 ]
+Key: VPUNPCKHDQZrmkz: [ 0.00 0.00 ]
+Key: VPUNPCKHDQZrr: [ 0.00 0.00 ]
+Key: VPUNPCKHDQZrrk: [ 0.00 0.00 ]
+Key: VPUNPCKHDQZrrkz: [ 0.00 0.00 ]
+Key: VPUNPCKHDQrm: [ 0.00 0.00 ]
+Key: VPUNPCKHDQrr: [ 0.00 0.00 ]
+Key: VPUNPCKHQDQYrm: [ 0.00 0.00 ]
+Key: VPUNPCKHQDQYrr: [ 0.00 0.00 ]
+Key: VPUNPCKHQDQZ: [ 0.00 0.00 ]
+Key: VPUNPCKHQDQZrm: [ 0.00 0.00 ]
+Key: VPUNPCKHQDQZrmb: [ 0.00 0.00 ]
+Key: VPUNPCKHQDQZrmbk: [ 0.00 0.00 ]
+Key: VPUNPCKHQDQZrmbkz: [ 0.00 0.00 ]
+Key: VPUNPCKHQDQZrmk: [ 0.00 0.00 ]
+Key: VPUNPCKHQDQZrmkz: [ 0.00 0.00 ]
+Key: VPUNPCKHQDQZrr: [ 0.00 0.00 ]
+Key: VPUNPCKHQDQZrrk: [ 0.00 0.00 ]
+Key: VPUNPCKHQDQZrrkz: [ 0.00 0.00 ]
+Key: VPUNPCKHQDQrm: [ 0.00 0.00 ]
+Key: VPUNPCKHQDQrr: [ 0.00 0.00 ]
+Key: VPUNPCKHWDYrm: [ 0.00 0.00 ]
+Key: VPUNPCKHWDYrr: [ 0.00 0.00 ]
+Key: VPUNPCKHWDZ: [ 0.00 0.00 ]
+Key: VPUNPCKHWDZrm: [ 0.00 0.00 ]
+Key: VPUNPCKHWDZrmk: [ 0.00 0.00 ]
+Key: VPUNPCKHWDZrmkz: [ 0.00 0.00 ]
+Key: VPUNPCKHWDZrr: [ 0.00 0.00 ]
+Key: VPUNPCKHWDZrrk: [ 0.00 0.00 ]
+Key: VPUNPCKHWDZrrkz: [ 0.00 0.00 ]
+Key: VPUNPCKHWDrm: [ 0.00 0.00 ]
+Key: VPUNPCKHWDrr: [ 0.00 0.00 ]
+Key: VPUNPCKLBWYrm: [ 0.00 0.00 ]
+Key: VPUNPCKLBWYrr: [ 0.00 0.00 ]
+Key: VPUNPCKLBWZ: [ 0.00 0.00 ]
+Key: VPUNPCKLBWZrm: [ 0.00 0.00 ]
+Key: VPUNPCKLBWZrmk: [ 0.00 0.00 ]
+Key: VPUNPCKLBWZrmkz: [ 0.00 0.00 ]
+Key: VPUNPCKLBWZrr: [ 0.00 0.00 ]
+Key: VPUNPCKLBWZrrk: [ 0.00 0.00 ]
+Key: VPUNPCKLBWZrrkz: [ 0.00 0.00 ]
+Key: VPUNPCKLBWrm: [ 0.00 0.00 ]
+Key: VPUNPCKLBWrr: [ 0.00 0.00 ]
+Key: VPUNPCKLDQYrm: [ 0.00 0.00 ]
+Key: VPUNPCKLDQYrr: [ 0.00 0.00 ]
+Key: VPUNPCKLDQZ: [ 0.00 0.00 ]
+Key: VPUNPCKLDQZrm: [ 0.00 0.00 ]
+Key: VPUNPCKLDQZrmb: [ 0.00 0.00 ]
+Key: VPUNPCKLDQZrmbk: [ 0.00 0.00 ]
+Key: VPUNPCKLDQZrmbkz: [ 0.00 0.00 ]
+Key: VPUNPCKLDQZrmk: [ 0.00 0.00 ]
+Key: VPUNPCKLDQZrmkz: [ 0.00 0.00 ]
+Key: VPUNPCKLDQZrr: [ 0.00 0.00 ]
+Key: VPUNPCKLDQZrrk: [ 0.00 0.00 ]
+Key: VPUNPCKLDQZrrkz: [ 0.00 0.00 ]
+Key: VPUNPCKLDQrm: [ 0.00 0.00 ]
+Key: VPUNPCKLDQrr: [ 0.00 0.00 ]
+Key: VPUNPCKLQDQYrm: [ 0.00 0.00 ]
+Key: VPUNPCKLQDQYrr: [ 0.00 0.00 ]
+Key: VPUNPCKLQDQZ: [ 0.00 0.00 ]
+Key: VPUNPCKLQDQZrm: [ 0.00 0.00 ]
+Key: VPUNPCKLQDQZrmb: [ 0.00 0.00 ]
+Key: VPUNPCKLQDQZrmbk: [ 0.00 0.00 ]
+Key: VPUNPCKLQDQZrmbkz: [ 0.00 0.00 ]
+Key: VPUNPCKLQDQZrmk: [ 0.00 0.00 ]
+Key: VPUNPCKLQDQZrmkz: [ 0.00 0.00 ]
+Key: VPUNPCKLQDQZrr: [ 0.00 0.00 ]
+Key: VPUNPCKLQDQZrrk: [ 0.00 0.00 ]
+Key: VPUNPCKLQDQZrrkz: [ 0.00 0.00 ]
+Key: VPUNPCKLQDQrm: [ 0.00 0.00 ]
+Key: VPUNPCKLQDQrr: [ 0.00 0.00 ]
+Key: VPUNPCKLWDYrm: [ 0.00 0.00 ]
+Key: VPUNPCKLWDYrr: [ 0.00 0.00 ]
+Key: VPUNPCKLWDZ: [ 0.00 0.00 ]
+Key: VPUNPCKLWDZrm: [ 0.00 0.00 ]
+Key: VPUNPCKLWDZrmk: [ 0.00 0.00 ]
+Key: VPUNPCKLWDZrmkz: [ 0.00 0.00 ]
+Key: VPUNPCKLWDZrr: [ 0.00 0.00 ]
+Key: VPUNPCKLWDZrrk: [ 0.00 0.00 ]
+Key: VPUNPCKLWDZrrkz: [ 0.00 0.00 ]
+Key: VPUNPCKLWDrm: [ 0.00 0.00 ]
+Key: VPUNPCKLWDrr: [ 0.00 0.00 ]
+Key: VPXORDZ: [ 0.00 0.00 ]
+Key: VPXORDZrm: [ 0.00 0.00 ]
+Key: VPXORDZrmb: [ 0.00 0.00 ]
+Key: VPXORDZrmbk: [ 0.00 0.00 ]
+Key: VPXORDZrmbkz: [ 0.00 0.00 ]
+Key: VPXORDZrmk: [ 0.00 0.00 ]
+Key: VPXORDZrmkz: [ 0.00 0.00 ]
+Key: VPXORDZrr: [ 0.00 0.00 ]
+Key: VPXORDZrrk: [ 0.00 0.00 ]
+Key: VPXORDZrrkz: [ 0.00 0.00 ]
+Key: VPXORQZ: [ 0.00 0.00 ]
+Key: VPXORQZrm: [ 0.00 0.00 ]
+Key: VPXORQZrmb: [ 0.00 0.00 ]
+Key: VPXORQZrmbk: [ 0.00 0.00 ]
+Key: VPXORQZrmbkz: [ 0.00 0.00 ]
+Key: VPXORQZrmk: [ 0.00 0.00 ]
+Key: VPXORQZrmkz: [ 0.00 0.00 ]
+Key: VPXORQZrr: [ 0.00 0.00 ]
+Key: VPXORQZrrk: [ 0.00 0.00 ]
+Key: VPXORQZrrkz: [ 0.00 0.00 ]
+Key: VPXORYrm: [ 0.00 0.00 ]
+Key: VPXORYrr: [ 0.00 0.00 ]
+Key: VPXORrm: [ 0.00 0.00 ]
+Key: VPXORrr: [ 0.00 0.00 ]
+Key: VRANGEPDZ: [ 0.00 0.00 ]
+Key: VRANGEPDZrmbi: [ 0.00 0.00 ]
+Key: VRANGEPDZrmbik: [ 0.00 0.00 ]
+Key: VRANGEPDZrmbikz: [ 0.00 0.00 ]
+Key: VRANGEPDZrmi: [ 0.00 0.00 ]
+Key: VRANGEPDZrmik: [ 0.00 0.00 ]
+Key: VRANGEPDZrmikz: [ 0.00 0.00 ]
+Key: VRANGEPDZrri: [ 0.00 0.00 ]
+Key: VRANGEPDZrrib: [ 0.00 0.00 ]
+Key: VRANGEPDZrribk: [ 0.00 0.00 ]
+Key: VRANGEPDZrribkz: [ 0.00 0.00 ]
+Key: VRANGEPDZrrik: [ 0.00 0.00 ]
+Key: VRANGEPDZrrikz: [ 0.00 0.00 ]
+Key: VRANGEPSZ: [ 0.00 0.00 ]
+Key: VRANGEPSZrmbi: [ 0.00 0.00 ]
+Key: VRANGEPSZrmbik: [ 0.00 0.00 ]
+Key: VRANGEPSZrmbikz: [ 0.00 0.00 ]
+Key: VRANGEPSZrmi: [ 0.00 0.00 ]
+Key: VRANGEPSZrmik: [ 0.00 0.00 ]
+Key: VRANGEPSZrmikz: [ 0.00 0.00 ]
+Key: VRANGEPSZrri: [ 0.00 0.00 ]
+Key: VRANGEPSZrrib: [ 0.00 0.00 ]
+Key: VRANGEPSZrribk: [ 0.00 0.00 ]
+Key: VRANGEPSZrribkz: [ 0.00 0.00 ]
+Key: VRANGEPSZrrik: [ 0.00 0.00 ]
+Key: VRANGEPSZrrikz: [ 0.00 0.00 ]
+Key: VRANGESDZrmi: [ 0.00 0.00 ]
+Key: VRANGESDZrmik: [ 0.00 0.00 ]
+Key: VRANGESDZrmikz: [ 0.00 0.00 ]
+Key: VRANGESDZrri: [ 0.00 0.00 ]
+Key: VRANGESDZrrib: [ 0.00 0.00 ]
+Key: VRANGESDZrribk: [ 0.00 0.00 ]
+Key: VRANGESDZrribkz: [ 0.00 0.00 ]
+Key: VRANGESDZrrik: [ 0.00 0.00 ]
+Key: VRANGESDZrrikz: [ 0.00 0.00 ]
+Key: VRANGESSZrmi: [ 0.00 0.00 ]
+Key: VRANGESSZrmik: [ 0.00 0.00 ]
+Key: VRANGESSZrmikz: [ 0.00 0.00 ]
+Key: VRANGESSZrri: [ 0.00 0.00 ]
+Key: VRANGESSZrrib: [ 0.00 0.00 ]
+Key: VRANGESSZrribk: [ 0.00 0.00 ]
+Key: VRANGESSZrribkz: [ 0.00 0.00 ]
+Key: VRANGESSZrrik: [ 0.00 0.00 ]
+Key: VRANGESSZrrikz: [ 0.00 0.00 ]
+Key: VRCP: [ 0.00 0.00 ]
+Key: VRCPBF: [ 0.00 0.00 ]
+Key: VRCPPHZ: [ 0.00 0.00 ]
+Key: VRCPPHZm: [ 0.00 0.00 ]
+Key: VRCPPHZmb: [ 0.00 0.00 ]
+Key: VRCPPHZmbk: [ 0.00 0.00 ]
+Key: VRCPPHZmbkz: [ 0.00 0.00 ]
+Key: VRCPPHZmk: [ 0.00 0.00 ]
+Key: VRCPPHZmkz: [ 0.00 0.00 ]
+Key: VRCPPHZr: [ 0.00 0.00 ]
+Key: VRCPPHZrk: [ 0.00 0.00 ]
+Key: VRCPPHZrkz: [ 0.00 0.00 ]
+Key: VRCPPSYm: [ 0.00 0.00 ]
+Key: VRCPPSYr: [ 0.00 0.00 ]
+Key: VRCPPSm: [ 0.00 0.00 ]
+Key: VRCPPSr: [ 0.00 0.00 ]
+Key: VRCPSHZrm: [ 0.00 0.00 ]
+Key: VRCPSHZrmk: [ 0.00 0.00 ]
+Key: VRCPSHZrmkz: [ 0.00 0.00 ]
+Key: VRCPSHZrr: [ 0.00 0.00 ]
+Key: VRCPSHZrrk: [ 0.00 0.00 ]
+Key: VRCPSHZrrkz: [ 0.00 0.00 ]
+Key: VRCPSSm: [ 0.00 0.00 ]
+Key: VRCPSSm_Int: [ 0.00 0.00 ]
+Key: VRCPSSr: [ 0.00 0.00 ]
+Key: VRCPSSr_Int: [ 0.00 0.00 ]
+Key: VREDUCEBF: [ 0.00 0.00 ]
+Key: VREDUCEPDZ: [ 0.00 0.00 ]
+Key: VREDUCEPDZrmbi: [ 0.00 0.00 ]
+Key: VREDUCEPDZrmbik: [ 0.00 0.00 ]
+Key: VREDUCEPDZrmbikz: [ 0.00 0.00 ]
+Key: VREDUCEPDZrmi: [ 0.00 0.00 ]
+Key: VREDUCEPDZrmik: [ 0.00 0.00 ]
+Key: VREDUCEPDZrmikz: [ 0.00 0.00 ]
+Key: VREDUCEPDZrri: [ 0.00 0.00 ]
+Key: VREDUCEPDZrrib: [ 0.00 0.00 ]
+Key: VREDUCEPDZrribk: [ 0.00 0.00 ]
+Key: VREDUCEPDZrribkz: [ 0.00 0.00 ]
+Key: VREDUCEPDZrrik: [ 0.00 0.00 ]
+Key: VREDUCEPDZrrikz: [ 0.00 0.00 ]
+Key: VREDUCEPHZ: [ 0.00 0.00 ]
+Key: VREDUCEPHZrmbi: [ 0.00 0.00 ]
+Key: VREDUCEPHZrmbik: [ 0.00 0.00 ]
+Key: VREDUCEPHZrmbikz: [ 0.00 0.00 ]
+Key: VREDUCEPHZrmi: [ 0.00 0.00 ]
+Key: VREDUCEPHZrmik: [ 0.00 0.00 ]
+Key: VREDUCEPHZrmikz: [ 0.00 0.00 ]
+Key: VREDUCEPHZrri: [ 0.00 0.00 ]
+Key: VREDUCEPHZrrib: [ 0.00 0.00 ]
+Key: VREDUCEPHZrribk: [ 0.00 0.00 ]
+Key: VREDUCEPHZrribkz: [ 0.00 0.00 ]
+Key: VREDUCEPHZrrik: [ 0.00 0.00 ]
+Key: VREDUCEPHZrrikz: [ 0.00 0.00 ]
+Key: VREDUCEPSZ: [ 0.00 0.00 ]
+Key: VREDUCEPSZrmbi: [ 0.00 0.00 ]
+Key: VREDUCEPSZrmbik: [ 0.00 0.00 ]
+Key: VREDUCEPSZrmbikz: [ 0.00 0.00 ]
+Key: VREDUCEPSZrmi: [ 0.00 0.00 ]
+Key: VREDUCEPSZrmik: [ 0.00 0.00 ]
+Key: VREDUCEPSZrmikz: [ 0.00 0.00 ]
+Key: VREDUCEPSZrri: [ 0.00 0.00 ]
+Key: VREDUCEPSZrrib: [ 0.00 0.00 ]
+Key: VREDUCEPSZrribk: [ 0.00 0.00 ]
+Key: VREDUCEPSZrribkz: [ 0.00 0.00 ]
+Key: VREDUCEPSZrrik: [ 0.00 0.00 ]
+Key: VREDUCEPSZrrikz: [ 0.00 0.00 ]
+Key: VREDUCESDZrmi: [ 0.00 0.00 ]
+Key: VREDUCESDZrmik: [ 0.00 0.00 ]
+Key: VREDUCESDZrmikz: [ 0.00 0.00 ]
+Key: VREDUCESDZrri: [ 0.00 0.00 ]
+Key: VREDUCESDZrrib: [ 0.00 0.00 ]
+Key: VREDUCESDZrribk: [ 0.00 0.00 ]
+Key: VREDUCESDZrribkz: [ 0.00 0.00 ]
+Key: VREDUCESDZrrik: [ 0.00 0.00 ]
+Key: VREDUCESDZrrikz: [ 0.00 0.00 ]
+Key: VREDUCESHZrmi: [ 0.00 0.00 ]
+Key: VREDUCESHZrmik: [ 0.00 0.00 ]
+Key: VREDUCESHZrmikz: [ 0.00 0.00 ]
+Key: VREDUCESHZrri: [ 0.00 0.00 ]
+Key: VREDUCESHZrrib: [ 0.00 0.00 ]
+Key: VREDUCESHZrribk: [ 0.00 0.00 ]
+Key: VREDUCESHZrribkz: [ 0.00 0.00 ]
+Key: VREDUCESHZrrik: [ 0.00 0.00 ]
+Key: VREDUCESHZrrikz: [ 0.00 0.00 ]
+Key: VREDUCESSZrmi: [ 0.00 0.00 ]
+Key: VREDUCESSZrmik: [ 0.00 0.00 ]
+Key: VREDUCESSZrmikz: [ 0.00 0.00 ]
+Key: VREDUCESSZrri: [ 0.00 0.00 ]
+Key: VREDUCESSZrrib: [ 0.00 0.00 ]
+Key: VREDUCESSZrribk: [ 0.00 0.00 ]
+Key: VREDUCESSZrribkz: [ 0.00 0.00 ]
+Key: VREDUCESSZrrik: [ 0.00 0.00 ]
+Key: VREDUCESSZrrikz: [ 0.00 0.00 ]
+Key: VRNDSCALEBF: [ 0.00 0.00 ]
+Key: VRNDSCALEPDZ: [ 0.00 0.00 ]
+Key: VRNDSCALEPDZrmbi: [ 0.00 0.00 ]
+Key: VRNDSCALEPDZrmbik: [ 0.00 0.00 ]
+Key: VRNDSCALEPDZrmbikz: [ 0.00 0.00 ]
+Key: VRNDSCALEPDZrmi: [ 0.00 0.00 ]
+Key: VRNDSCALEPDZrmik: [ 0.00 0.00 ]
+Key: VRNDSCALEPDZrmikz: [ 0.00 0.00 ]
+Key: VRNDSCALEPDZrri: [ 0.00 0.00 ]
+Key: VRNDSCALEPDZrrib: [ 0.00 0.00 ]
+Key: VRNDSCALEPDZrribk: [ 0.00 0.00 ]
+Key: VRNDSCALEPDZrribkz: [ 0.00 0.00 ]
+Key: VRNDSCALEPDZrrik: [ 0.00 0.00 ]
+Key: VRNDSCALEPDZrrikz: [ 0.00 0.00 ]
+Key: VRNDSCALEPHZ: [ 0.00 0.00 ]
+Key: VRNDSCALEPHZrmbi: [ 0.00 0.00 ]
+Key: VRNDSCALEPHZrmbik: [ 0.00 0.00 ]
+Key: VRNDSCALEPHZrmbikz: [ 0.00 0.00 ]
+Key: VRNDSCALEPHZrmi: [ 0.00 0.00 ]
+Key: VRNDSCALEPHZrmik: [ 0.00 0.00 ]
+Key: VRNDSCALEPHZrmikz: [ 0.00 0.00 ]
+Key: VRNDSCALEPHZrri: [ 0.00 0.00 ]
+Key: VRNDSCALEPHZrrib: [ 0.00 0.00 ]
+Key: VRNDSCALEPHZrribk: [ 0.00 0.00 ]
+Key: VRNDSCALEPHZrribkz: [ 0.00 0.00 ]
+Key: VRNDSCALEPHZrrik: [ 0.00 0.00 ]
+Key: VRNDSCALEPHZrrikz: [ 0.00 0.00 ]
+Key: VRNDSCALEPSZ: [ 0.00 0.00 ]
+Key: VRNDSCALEPSZrmbi: [ 0.00 0.00 ]
+Key: VRNDSCALEPSZrmbik: [ 0.00 0.00 ]
+Key: VRNDSCALEPSZrmbikz: [ 0.00 0.00 ]
+Key: VRNDSCALEPSZrmi: [ 0.00 0.00 ]
+Key: VRNDSCALEPSZrmik: [ 0.00 0.00 ]
+Key: VRNDSCALEPSZrmikz: [ 0.00 0.00 ]
+Key: VRNDSCALEPSZrri: [ 0.00 0.00 ]
+Key: VRNDSCALEPSZrrib: [ 0.00 0.00 ]
+Key: VRNDSCALEPSZrribk: [ 0.00 0.00 ]
+Key: VRNDSCALEPSZrribkz: [ 0.00 0.00 ]
+Key: VRNDSCALEPSZrrik: [ 0.00 0.00 ]
+Key: VRNDSCALEPSZrrikz: [ 0.00 0.00 ]
+Key: VRNDSCALESDZrmi: [ 0.00 0.00 ]
+Key: VRNDSCALESDZrmi_Int: [ 0.00 0.00 ]
+Key: VRNDSCALESDZrmik_Int: [ 0.00 0.00 ]
+Key: VRNDSCALESDZrmikz_Int: [ 0.00 0.00 ]
+Key: VRNDSCALESDZrri: [ 0.00 0.00 ]
+Key: VRNDSCALESDZrri_Int: [ 0.00 0.00 ]
+Key: VRNDSCALESDZrrib_Int: [ 0.00 0.00 ]
+Key: VRNDSCALESDZrribk_Int: [ 0.00 0.00 ]
+Key: VRNDSCALESDZrribkz_Int: [ 0.00 0.00 ]
+Key: VRNDSCALESDZrrik_Int: [ 0.00 0.00 ]
+Key: VRNDSCALESDZrrikz_Int: [ 0.00 0.00 ]
+Key: VRNDSCALESHZrmi: [ 0.00 0.00 ]
+Key: VRNDSCALESHZrmi_Int: [ 0.00 0.00 ]
+Key: VRNDSCALESHZrmik_Int: [ 0.00 0.00 ]
+Key: VRNDSCALESHZrmikz_Int: [ 0.00 0.00 ]
+Key: VRNDSCALESHZrri: [ 0.00 0.00 ]
+Key: VRNDSCALESHZrri_Int: [ 0.00 0.00 ]
+Key: VRNDSCALESHZrrib_Int: [ 0.00 0.00 ]
+Key: VRNDSCALESHZrribk_Int: [ 0.00 0.00 ]
+Key: VRNDSCALESHZrribkz_Int: [ 0.00 0.00 ]
+Key: VRNDSCALESHZrrik_Int: [ 0.00 0.00 ]
+Key: VRNDSCALESHZrrikz_Int: [ 0.00 0.00 ]
+Key: VRNDSCALESSZrmi: [ 0.00 0.00 ]
+Key: VRNDSCALESSZrmi_Int: [ 0.00 0.00 ]
+Key: VRNDSCALESSZrmik_Int: [ 0.00 0.00 ]
+Key: VRNDSCALESSZrmikz_Int: [ 0.00 0.00 ]
+Key: VRNDSCALESSZrri: [ 0.00 0.00 ]
+Key: VRNDSCALESSZrri_Int: [ 0.00 0.00 ]
+Key: VRNDSCALESSZrrib_Int: [ 0.00 0.00 ]
+Key: VRNDSCALESSZrribk_Int: [ 0.00 0.00 ]
+Key: VRNDSCALESSZrribkz_Int: [ 0.00 0.00 ]
+Key: VRNDSCALESSZrrik_Int: [ 0.00 0.00 ]
+Key: VRNDSCALESSZrrikz_Int: [ 0.00 0.00 ]
+Key: VROUNDPDYmi: [ 0.00 0.00 ]
+Key: VROUNDPDYri: [ 0.00 0.00 ]
+Key: VROUNDPDmi: [ 0.00 0.00 ]
+Key: VROUNDPDri: [ 0.00 0.00 ]
+Key: VROUNDPSYmi: [ 0.00 0.00 ]
+Key: VROUNDPSYri: [ 0.00 0.00 ]
+Key: VROUNDPSmi: [ 0.00 0.00 ]
+Key: VROUNDPSri: [ 0.00 0.00 ]
+Key: VROUNDSDmi: [ 0.00 0.00 ]
+Key: VROUNDSDmi_Int: [ 0.00 0.00 ]
+Key: VROUNDSDri: [ 0.00 0.00 ]
+Key: VROUNDSDri_Int: [ 0.00 0.00 ]
+Key: VROUNDSSmi: [ 0.00 0.00 ]
+Key: VROUNDSSmi_Int: [ 0.00 0.00 ]
+Key: VROUNDSSri: [ 0.00 0.00 ]
+Key: VROUNDSSri_Int: [ 0.00 0.00 ]
+Key: VRSQRT: [ 0.00 0.00 ]
+Key: VRSQRTBF: [ 0.00 0.00 ]
+Key: VRSQRTPHZ: [ 0.00 0.00 ]
+Key: VRSQRTPHZm: [ 0.00 0.00 ]
+Key: VRSQRTPHZmb: [ 0.00 0.00 ]
+Key: VRSQRTPHZmbk: [ 0.00 0.00 ]
+Key: VRSQRTPHZmbkz: [ 0.00 0.00 ]
+Key: VRSQRTPHZmk: [ 0.00 0.00 ]
+Key: VRSQRTPHZmkz: [ 0.00 0.00 ]
+Key: VRSQRTPHZr: [ 0.00 0.00 ]
+Key: VRSQRTPHZrk: [ 0.00 0.00 ]
+Key: VRSQRTPHZrkz: [ 0.00 0.00 ]
+Key: VRSQRTPSYm: [ 0.00 0.00 ]
+Key: VRSQRTPSYr: [ 0.00 0.00 ]
+Key: VRSQRTPSm: [ 0.00 0.00 ]
+Key: VRSQRTPSr: [ 0.00 0.00 ]
+Key: VRSQRTSHZrm: [ 0.00 0.00 ]
+Key: VRSQRTSHZrmk: [ 0.00 0.00 ]
+Key: VRSQRTSHZrmkz: [ 0.00 0.00 ]
+Key: VRSQRTSHZrr: [ 0.00 0.00 ]
+Key: VRSQRTSHZrrk: [ 0.00 0.00 ]
+Key: VRSQRTSHZrrkz: [ 0.00 0.00 ]
+Key: VRSQRTSSm: [ 0.00 0.00 ]
+Key: VRSQRTSSm_Int: [ 0.00 0.00 ]
+Key: VRSQRTSSr: [ 0.00 0.00 ]
+Key: VRSQRTSSr_Int: [ 0.00 0.00 ]
+Key: VSCALEFBF: [ 0.00 0.00 ]
+Key: VSCALEFPDZ: [ 0.00 0.00 ]
+Key: VSCALEFPDZrm: [ 0.00 0.00 ]
+Key: VSCALEFPDZrmb: [ 0.00 0.00 ]
+Key: VSCALEFPDZrmbk: [ 0.00 0.00 ]
+Key: VSCALEFPDZrmbkz: [ 0.00 0.00 ]
+Key: VSCALEFPDZrmk: [ 0.00 0.00 ]
+Key: VSCALEFPDZrmkz: [ 0.00 0.00 ]
+Key: VSCALEFPDZrr: [ 0.00 0.00 ]
+Key: VSCALEFPDZrrb: [ 0.00 0.00 ]
+Key: VSCALEFPDZrrbk: [ 0.00 0.00 ]
+Key: VSCALEFPDZrrbkz: [ 0.00 0.00 ]
+Key: VSCALEFPDZrrk: [ 0.00 0.00 ]
+Key: VSCALEFPDZrrkz: [ 0.00 0.00 ]
+Key: VSCALEFPHZ: [ 0.00 0.00 ]
+Key: VSCALEFPHZrm: [ 0.00 0.00 ]
+Key: VSCALEFPHZrmb: [ 0.00 0.00 ]
+Key: VSCALEFPHZrmbk: [ 0.00 0.00 ]
+Key: VSCALEFPHZrmbkz: [ 0.00 0.00 ]
+Key: VSCALEFPHZrmk: [ 0.00 0.00 ]
+Key: VSCALEFPHZrmkz: [ 0.00 0.00 ]
+Key: VSCALEFPHZrr: [ 0.00 0.00 ]
+Key: VSCALEFPHZrrb: [ 0.00 0.00 ]
+Key: VSCALEFPHZrrbk: [ 0.00 0.00 ]
+Key: VSCALEFPHZrrbkz: [ 0.00 0.00 ]
+Key: VSCALEFPHZrrk: [ 0.00 0.00 ]
+Key: VSCALEFPHZrrkz: [ 0.00 0.00 ]
+Key: VSCALEFPSZ: [ 0.00 0.00 ]
+Key: VSCALEFPSZrm: [ 0.00 0.00 ]
+Key: VSCALEFPSZrmb: [ 0.00 0.00 ]
+Key: VSCALEFPSZrmbk: [ 0.00 0.00 ]
+Key: VSCALEFPSZrmbkz: [ 0.00 0.00 ]
+Key: VSCALEFPSZrmk: [ 0.00 0.00 ]
+Key: VSCALEFPSZrmkz: [ 0.00 0.00 ]
+Key: VSCALEFPSZrr: [ 0.00 0.00 ]
+Key: VSCALEFPSZrrb: [ 0.00 0.00 ]
+Key: VSCALEFPSZrrbk: [ 0.00 0.00 ]
+Key: VSCALEFPSZrrbkz: [ 0.00 0.00 ]
+Key: VSCALEFPSZrrk: [ 0.00 0.00 ]
+Key: VSCALEFPSZrrkz: [ 0.00 0.00 ]
+Key: VSCALEFSDZrm: [ 0.00 0.00 ]
+Key: VSCALEFSDZrmk: [ 0.00 0.00 ]
+Key: VSCALEFSDZrmkz: [ 0.00 0.00 ]
+Key: VSCALEFSDZrr: [ 0.00 0.00 ]
+Key: VSCALEFSDZrrb_Int: [ 0.00 0.00 ]
+Key: VSCALEFSDZrrbk_Int: [ 0.00 0.00 ]
+Key: VSCALEFSDZrrbkz_Int: [ 0.00 0.00 ]
+Key: VSCALEFSDZrrk: [ 0.00 0.00 ]
+Key: VSCALEFSDZrrkz: [ 0.00 0.00 ]
+Key: VSCALEFSHZrm: [ 0.00 0.00 ]
+Key: VSCALEFSHZrmk: [ 0.00 0.00 ]
+Key: VSCALEFSHZrmkz: [ 0.00 0.00 ]
+Key: VSCALEFSHZrr: [ 0.00 0.00 ]
+Key: VSCALEFSHZrrb_Int: [ 0.00 0.00 ]
+Key: VSCALEFSHZrrbk_Int: [ 0.00 0.00 ]
+Key: VSCALEFSHZrrbkz_Int: [ 0.00 0.00 ]
+Key: VSCALEFSHZrrk: [ 0.00 0.00 ]
+Key: VSCALEFSHZrrkz: [ 0.00 0.00 ]
+Key: VSCALEFSSZrm: [ 0.00 0.00 ]
+Key: VSCALEFSSZrmk: [ 0.00 0.00 ]
+Key: VSCALEFSSZrmkz: [ 0.00 0.00 ]
+Key: VSCALEFSSZrr: [ 0.00 0.00 ]
+Key: VSCALEFSSZrrb_Int: [ 0.00 0.00 ]
+Key: VSCALEFSSZrrbk_Int: [ 0.00 0.00 ]
+Key: VSCALEFSSZrrbkz_Int: [ 0.00 0.00 ]
+Key: VSCALEFSSZrrk: [ 0.00 0.00 ]
+Key: VSCALEFSSZrrkz: [ 0.00 0.00 ]
+Key: VSCATTERDPDZ: [ 0.00 0.00 ]
+Key: VSCATTERDPDZmr: [ 0.00 0.00 ]
+Key: VSCATTERDPSZ: [ 0.00 0.00 ]
+Key: VSCATTERDPSZmr: [ 0.00 0.00 ]
+Key: VSCATTERPF: [ 0.00 0.00 ]
+Key: VSCATTERQPDZ: [ 0.00 0.00 ]
+Key: VSCATTERQPDZmr: [ 0.00 0.00 ]
+Key: VSCATTERQPSZ: [ 0.00 0.00 ]
+Key: VSCATTERQPSZmr: [ 0.00 0.00 ]
+Key: VSHA: [ 0.00 0.00 ]
+Key: VSHUFF: [ 0.00 0.00 ]
+Key: VSHUFI: [ 0.00 0.00 ]
+Key: VSHUFPDYrmi: [ 0.00 0.00 ]
+Key: VSHUFPDYrri: [ 0.00 0.00 ]
+Key: VSHUFPDZ: [ 0.00 0.00 ]
+Key: VSHUFPDZrmbi: [ 0.00 0.00 ]
+Key: VSHUFPDZrmbik: [ 0.00 0.00 ]
+Key: VSHUFPDZrmbikz: [ 0.00 0.00 ]
+Key: VSHUFPDZrmi: [ 0.00 0.00 ]
+Key: VSHUFPDZrmik: [ 0.00 0.00 ]
+Key: VSHUFPDZrmikz: [ 0.00 0.00 ]
+Key: VSHUFPDZrri: [ 0.00 0.00 ]
+Key: VSHUFPDZrrik: [ 0.00 0.00 ]
+Key: VSHUFPDZrrikz: [ 0.00 0.00 ]
+Key: VSHUFPDrmi: [ 0.00 0.00 ]
+Key: VSHUFPDrri: [ 0.00 0.00 ]
+Key: VSHUFPSYrmi: [ 0.00 0.00 ]
+Key: VSHUFPSYrri: [ 0.00 0.00 ]
+Key: VSHUFPSZ: [ 0.00 0.00 ]
+Key: VSHUFPSZrmbi: [ 0.00 0.00 ]
+Key: VSHUFPSZrmbik: [ 0.00 0.00 ]
+Key: VSHUFPSZrmbikz: [ 0.00 0.00 ]
+Key: VSHUFPSZrmi: [ 0.00 0.00 ]
+Key: VSHUFPSZrmik: [ 0.00 0.00 ]
+Key: VSHUFPSZrmikz: [ 0.00 0.00 ]
+Key: VSHUFPSZrri: [ 0.00 0.00 ]
+Key: VSHUFPSZrrik: [ 0.00 0.00 ]
+Key: VSHUFPSZrrikz: [ 0.00 0.00 ]
+Key: VSHUFPSrmi: [ 0.00 0.00 ]
+Key: VSHUFPSrri: [ 0.00 0.00 ]
+Key: VSM: [ 0.00 0.00 ]
+Key: VSQRTBF: [ 0.00 0.00 ]
+Key: VSQRTPDYm: [ 0.00 0.00 ]
+Key: VSQRTPDYr: [ 0.00 0.00 ]
+Key: VSQRTPDZ: [ 0.00 0.00 ]
+Key: VSQRTPDZm: [ 0.00 0.00 ]
+Key: VSQRTPDZmb: [ 0.00 0.00 ]
+Key: VSQRTPDZmbk: [ 0.00 0.00 ]
+Key: VSQRTPDZmbkz: [ 0.00 0.00 ]
+Key: VSQRTPDZmk: [ 0.00 0.00 ]
+Key: VSQRTPDZmkz: [ 0.00 0.00 ]
+Key: VSQRTPDZr: [ 0.00 0.00 ]
+Key: VSQRTPDZrb: [ 0.00 0.00 ]
+Key: VSQRTPDZrbk: [ 0.00 0.00 ]
+Key: VSQRTPDZrbkz: [ 0.00 0.00 ]
+Key: VSQRTPDZrk: [ 0.00 0.00 ]
+Key: VSQRTPDZrkz: [ 0.00 0.00 ]
+Key: VSQRTPDm: [ 0.00 0.00 ]
+Key: VSQRTPDr: [ 0.00 0.00 ]
+Key: VSQRTPHZ: [ 0.00 0.00 ]
+Key: VSQRTPHZm: [ 0.00 0.00 ]
+Key: VSQRTPHZmb: [ 0.00 0.00 ]
+Key: VSQRTPHZmbk: [ 0.00 0.00 ]
+Key: VSQRTPHZmbkz: [ 0.00 0.00 ]
+Key: VSQRTPHZmk: [ 0.00 0.00 ]
+Key: VSQRTPHZmkz: [ 0.00 0.00 ]
+Key: VSQRTPHZr: [ 0.00 0.00 ]
+Key: VSQRTPHZrb: [ 0.00 0.00 ]
+Key: VSQRTPHZrbk: [ 0.00 0.00 ]
+Key: VSQRTPHZrbkz: [ 0.00 0.00 ]
+Key: VSQRTPHZrk: [ 0.00 0.00 ]
+Key: VSQRTPHZrkz: [ 0.00 0.00 ]
+Key: VSQRTPSYm: [ 0.00 0.00 ]
+Key: VSQRTPSYr: [ 0.00 0.00 ]
+Key: VSQRTPSZ: [ 0.00 0.00 ]
+Key: VSQRTPSZm: [ 0.00 0.00 ]
+Key: VSQRTPSZmb: [ 0.00 0.00 ]
+Key: VSQRTPSZmbk: [ 0.00 0.00 ]
+Key: VSQRTPSZmbkz: [ 0.00 0.00 ]
+Key: VSQRTPSZmk: [ 0.00 0.00 ]
+Key: VSQRTPSZmkz: [ 0.00 0.00 ]
+Key: VSQRTPSZr: [ 0.00 0.00 ]
+Key: VSQRTPSZrb: [ 0.00 0.00 ]
+Key: VSQRTPSZrbk: [ 0.00 0.00 ]
+Key: VSQRTPSZrbkz: [ 0.00 0.00 ]
+Key: VSQRTPSZrk: [ 0.00 0.00 ]
+Key: VSQRTPSZrkz: [ 0.00 0.00 ]
+Key: VSQRTPSm: [ 0.00 0.00 ]
+Key: VSQRTPSr: [ 0.00 0.00 ]
+Key: VSQRTSDZm: [ 0.00 0.00 ]
+Key: VSQRTSDZm_Int: [ 0.00 0.00 ]
+Key: VSQRTSDZmk_Int: [ 0.00 0.00 ]
+Key: VSQRTSDZmkz_Int: [ 0.00 0.00 ]
+Key: VSQRTSDZr: [ 0.00 0.00 ]
+Key: VSQRTSDZr_Int: [ 0.00 0.00 ]
+Key: VSQRTSDZrb_Int: [ 0.00 0.00 ]
+Key: VSQRTSDZrbk_Int: [ 0.00 0.00 ]
+Key: VSQRTSDZrbkz_Int: [ 0.00 0.00 ]
+Key: VSQRTSDZrk_Int: [ 0.00 0.00 ]
+Key: VSQRTSDZrkz_Int: [ 0.00 0.00 ]
+Key: VSQRTSDm: [ 0.00 0.00 ]
+Key: VSQRTSDm_Int: [ 0.00 0.00 ]
+Key: VSQRTSDr: [ 0.00 0.00 ]
+Key: VSQRTSDr_Int: [ 0.00 0.00 ]
+Key: VSQRTSHZm: [ 0.00 0.00 ]
+Key: VSQRTSHZm_Int: [ 0.00 0.00 ]
+Key: VSQRTSHZmk_Int: [ 0.00 0.00 ]
+Key: VSQRTSHZmkz_Int: [ 0.00 0.00 ]
+Key: VSQRTSHZr: [ 0.00 0.00 ]
+Key: VSQRTSHZr_Int: [ 0.00 0.00 ]
+Key: VSQRTSHZrb_Int: [ 0.00 0.00 ]
+Key: VSQRTSHZrbk_Int: [ 0.00 0.00 ]
+Key: VSQRTSHZrbkz_Int: [ 0.00 0.00 ]
+Key: VSQRTSHZrk_Int: [ 0.00 0.00 ]
+Key: VSQRTSHZrkz_Int: [ 0.00 0.00 ]
+Key: VSQRTSSZm: [ 0.00 0.00 ]
+Key: VSQRTSSZm_Int: [ 0.00 0.00 ]
+Key: VSQRTSSZmk_Int: [ 0.00 0.00 ]
+Key: VSQRTSSZmkz_Int: [ 0.00 0.00 ]
+Key: VSQRTSSZr: [ 0.00 0.00 ]
+Key: VSQRTSSZr_Int: [ 0.00 0.00 ]
+Key: VSQRTSSZrb_Int: [ 0.00 0.00 ]
+Key: VSQRTSSZrbk_Int: [ 0.00 0.00 ]
+Key: VSQRTSSZrbkz_Int: [ 0.00 0.00 ]
+Key: VSQRTSSZrk_Int: [ 0.00 0.00 ]
+Key: VSQRTSSZrkz_Int: [ 0.00 0.00 ]
+Key: VSQRTSSm: [ 0.00 0.00 ]
+Key: VSQRTSSm_Int: [ 0.00 0.00 ]
+Key: VSQRTSSr: [ 0.00 0.00 ]
+Key: VSQRTSSr_Int: [ 0.00 0.00 ]
+Key: VSTMXCSR: [ 0.00 0.00 ]
+Key: VSUBBF: [ 0.00 0.00 ]
+Key: VSUBPDYrm: [ 0.00 0.00 ]
+Key: VSUBPDYrr: [ 0.00 0.00 ]
+Key: VSUBPDZ: [ 0.00 0.00 ]
+Key: VSUBPDZrm: [ 0.00 0.00 ]
+Key: VSUBPDZrmb: [ 0.00 0.00 ]
+Key: VSUBPDZrmbk: [ 0.00 0.00 ]
+Key: VSUBPDZrmbkz: [ 0.00 0.00 ]
+Key: VSUBPDZrmk: [ 0.00 0.00 ]
+Key: VSUBPDZrmkz: [ 0.00 0.00 ]
+Key: VSUBPDZrr: [ 0.00 0.00 ]
+Key: VSUBPDZrrb: [ 0.00 0.00 ]
+Key: VSUBPDZrrbk: [ 0.00 0.00 ]
+Key: VSUBPDZrrbkz: [ 0.00 0.00 ]
+Key: VSUBPDZrrk: [ 0.00 0.00 ]
+Key: VSUBPDZrrkz: [ 0.00 0.00 ]
+Key: VSUBPDrm: [ 0.00 0.00 ]
+Key: VSUBPDrr: [ 0.00 0.00 ]
+Key: VSUBPHZ: [ 0.00 0.00 ]
+Key: VSUBPHZrm: [ 0.00 0.00 ]
+Key: VSUBPHZrmb: [ 0.00 0.00 ]
+Key: VSUBPHZrmbk: [ 0.00 0.00 ]
+Key: VSUBPHZrmbkz: [ 0.00 0.00 ]
+Key: VSUBPHZrmk: [ 0.00 0.00 ]
+Key: VSUBPHZrmkz: [ 0.00 0.00 ]
+Key: VSUBPHZrr: [ 0.00 0.00 ]
+Key: VSUBPHZrrb: [ 0.00 0.00 ]
+Key: VSUBPHZrrbk: [ 0.00 0.00 ]
+Key: VSUBPHZrrbkz: [ 0.00 0.00 ]
+Key: VSUBPHZrrk: [ 0.00 0.00 ]
+Key: VSUBPHZrrkz: [ 0.00 0.00 ]
+Key: VSUBPSYrm: [ 0.00 0.00 ]
+Key: VSUBPSYrr: [ 0.00 0.00 ]
+Key: VSUBPSZ: [ 0.00 0.00 ]
+Key: VSUBPSZrm: [ 0.00 0.00 ]
+Key: VSUBPSZrmb: [ 0.00 0.00 ]
+Key: VSUBPSZrmbk: [ 0.00 0.00 ]
+Key: VSUBPSZrmbkz: [ 0.00 0.00 ]
+Key: VSUBPSZrmk: [ 0.00 0.00 ]
+Key: VSUBPSZrmkz: [ 0.00 0.00 ]
+Key: VSUBPSZrr: [ 0.00 0.00 ]
+Key: VSUBPSZrrb: [ 0.00 0.00 ]
+Key: VSUBPSZrrbk: [ 0.00 0.00 ]
+Key: VSUBPSZrrbkz: [ 0.00 0.00 ]
+Key: VSUBPSZrrk: [ 0.00 0.00 ]
+Key: VSUBPSZrrkz: [ 0.00 0.00 ]
+Key: VSUBPSrm: [ 0.00 0.00 ]
+Key: VSUBPSrr: [ 0.00 0.00 ]
+Key: VSUBSDZrm: [ 0.00 0.00 ]
+Key: VSUBSDZrm_Int: [ 0.00 0.00 ]
+Key: VSUBSDZrmk_Int: [ 0.00 0.00 ]
+Key: VSUBSDZrmkz_Int: [ 0.00 0.00 ]
+Key: VSUBSDZrr: [ 0.00 0.00 ]
+Key: VSUBSDZrr_Int: [ 0.00 0.00 ]
+Key: VSUBSDZrrb_Int: [ 0.00 0.00 ]
+Key: VSUBSDZrrbk_Int: [ 0.00 0.00 ]
+Key: VSUBSDZrrbkz_Int: [ 0.00 0.00 ]
+Key: VSUBSDZrrk_Int: [ 0.00 0.00 ]
+Key: VSUBSDZrrkz_Int: [ 0.00 0.00 ]
+Key: VSUBSDrm: [ 0.00 0.00 ]
+Key: VSUBSDrm_Int: [ 0.00 0.00 ]
+Key: VSUBSDrr: [ 0.00 0.00 ]
+Key: VSUBSDrr_Int: [ 0.00 0.00 ]
+Key: VSUBSHZrm: [ 0.00 0.00 ]
+Key: VSUBSHZrm_Int: [ 0.00 0.00 ]
+Key: VSUBSHZrmk_Int: [ 0.00 0.00 ]
+Key: VSUBSHZrmkz_Int: [ 0.00 0.00 ]
+Key: VSUBSHZrr: [ 0.00 0.00 ]
+Key: VSUBSHZrr_Int: [ 0.00 0.00 ]
+Key: VSUBSHZrrb_Int: [ 0.00 0.00 ]
+Key: VSUBSHZrrbk_Int: [ 0.00 0.00 ]
+Key: VSUBSHZrrbkz_Int: [ 0.00 0.00 ]
+Key: VSUBSHZrrk_Int: [ 0.00 0.00 ]
+Key: VSUBSHZrrkz_Int: [ 0.00 0.00 ]
+Key: VSUBSSZrm: [ 0.00 0.00 ]
+Key: VSUBSSZrm_Int: [ 0.00 0.00 ]
+Key: VSUBSSZrmk_Int: [ 0.00 0.00 ]
+Key: VSUBSSZrmkz_Int: [ 0.00 0.00 ]
+Key: VSUBSSZrr: [ 0.00 0.00 ]
+Key: VSUBSSZrr_Int: [ 0.00 0.00 ]
+Key: VSUBSSZrrb_Int: [ 0.00 0.00 ]
+Key: VSUBSSZrrbk_Int: [ 0.00 0.00 ]
+Key: VSUBSSZrrbkz_Int: [ 0.00 0.00 ]
+Key: VSUBSSZrrk_Int: [ 0.00 0.00 ]
+Key: VSUBSSZrrkz_Int: [ 0.00 0.00 ]
+Key: VSUBSSrm: [ 0.00 0.00 ]
+Key: VSUBSSrm_Int: [ 0.00 0.00 ]
+Key: VSUBSSrr: [ 0.00 0.00 ]
+Key: VSUBSSrr_Int: [ 0.00 0.00 ]
+Key: VTESTPDYrm: [ 0.00 0.00 ]
+Key: VTESTPDYrr: [ 0.00 0.00 ]
+Key: VTESTPDrm: [ 0.00 0.00 ]
+Key: VTESTPDrr: [ 0.00 0.00 ]
+Key: VTESTPSYrm: [ 0.00 0.00 ]
+Key: VTESTPSYrr: [ 0.00 0.00 ]
+Key: VTESTPSrm: [ 0.00 0.00 ]
+Key: VTESTPSrr: [ 0.00 0.00 ]
+Key: VUCOMISDZrm: [ 0.00 0.00 ]
+Key: VUCOMISDZrm_Int: [ 0.00 0.00 ]
+Key: VUCOMISDZrr: [ 0.00 0.00 ]
+Key: VUCOMISDZrr_Int: [ 0.00 0.00 ]
+Key: VUCOMISDZrrb: [ 0.00 0.00 ]
+Key: VUCOMISDrm: [ 0.00 0.00 ]
+Key: VUCOMISDrm_Int: [ 0.00 0.00 ]
+Key: VUCOMISDrr: [ 0.00 0.00 ]
+Key: VUCOMISDrr_Int: [ 0.00 0.00 ]
+Key: VUCOMISHZrm: [ 0.00 0.00 ]
+Key: VUCOMISHZrm_Int: [ 0.00 0.00 ]
+Key: VUCOMISHZrr: [ 0.00 0.00 ]
+Key: VUCOMISHZrr_Int: [ 0.00 0.00 ]
+Key: VUCOMISHZrrb: [ 0.00 0.00 ]
+Key: VUCOMISSZrm: [ 0.00 0.00 ]
+Key: VUCOMISSZrm_Int: [ 0.00 0.00 ]
+Key: VUCOMISSZrr: [ 0.00 0.00 ]
+Key: VUCOMISSZrr_Int: [ 0.00 0.00 ]
+Key: VUCOMISSZrrb: [ 0.00 0.00 ]
+Key: VUCOMISSrm: [ 0.00 0.00 ]
+Key: VUCOMISSrm_Int: [ 0.00 0.00 ]
+Key: VUCOMISSrr: [ 0.00 0.00 ]
+Key: VUCOMISSrr_Int: [ 0.00 0.00 ]
+Key: VUCOMXSDZrm: [ 0.00 0.00 ]
+Key: VUCOMXSDZrm_Int: [ 0.00 0.00 ]
+Key: VUCOMXSDZrr: [ 0.00 0.00 ]
+Key: VUCOMXSDZrr_Int: [ 0.00 0.00 ]
+Key: VUCOMXSDZrrb_Int: [ 0.00 0.00 ]
+Key: VUCOMXSHZrm: [ 0.00 0.00 ]
+Key: VUCOMXSHZrm_Int: [ 0.00 0.00 ]
+Key: VUCOMXSHZrr: [ 0.00 0.00 ]
+Key: VUCOMXSHZrr_Int: [ 0.00 0.00 ]
+Key: VUCOMXSHZrrb_Int: [ 0.00 0.00 ]
+Key: VUCOMXSSZrm: [ 0.00 0.00 ]
+Key: VUCOMXSSZrm_Int: [ 0.00 0.00 ]
+Key: VUCOMXSSZrr: [ 0.00 0.00 ]
+Key: VUCOMXSSZrr_Int: [ 0.00 0.00 ]
+Key: VUCOMXSSZrrb_Int: [ 0.00 0.00 ]
+Key: VUNPCKHPDYrm: [ 0.00 0.00 ]
+Key: VUNPCKHPDYrr: [ 0.00 0.00 ]
+Key: VUNPCKHPDZ: [ 0.00 0.00 ]
+Key: VUNPCKHPDZrm: [ 0.00 0.00 ]
+Key: VUNPCKHPDZrmb: [ 0.00 0.00 ]
+Key: VUNPCKHPDZrmbk: [ 0.00 0.00 ]
+Key: VUNPCKHPDZrmbkz: [ 0.00 0.00 ]
+Key: VUNPCKHPDZrmk: [ 0.00 0.00 ]
+Key: VUNPCKHPDZrmkz: [ 0.00 0.00 ]
+Key: VUNPCKHPDZrr: [ 0.00 0.00 ]
+Key: VUNPCKHPDZrrk: [ 0.00 0.00 ]
+Key: VUNPCKHPDZrrkz: [ 0.00 0.00 ]
+Key: VUNPCKHPDrm: [ 0.00 0.00 ]
+Key: VUNPCKHPDrr: [ 0.00 0.00 ]
+Key: VUNPCKHPSYrm: [ 0.00 0.00 ]
+Key: VUNPCKHPSYrr: [ 0.00 0.00 ]
+Key: VUNPCKHPSZ: [ 0.00 0.00 ]
+Key: VUNPCKHPSZrm: [ 0.00 0.00 ]
+Key: VUNPCKHPSZrmb: [ 0.00 0.00 ]
+Key: VUNPCKHPSZrmbk: [ 0.00 0.00 ]
+Key: VUNPCKHPSZrmbkz: [ 0.00 0.00 ]
+Key: VUNPCKHPSZrmk: [ 0.00 0.00 ]
+Key: VUNPCKHPSZrmkz: [ 0.00 0.00 ]
+Key: VUNPCKHPSZrr: [ 0.00 0.00 ]
+Key: VUNPCKHPSZrrk: [ 0.00 0.00 ]
+Key: VUNPCKHPSZrrkz: [ 0.00 0.00 ]
+Key: VUNPCKHPSrm: [ 0.00 0.00 ]
+Key: VUNPCKHPSrr: [ 0.00 0.00 ]
+Key: VUNPCKLPDYrm: [ 0.00 0.00 ]
+Key: VUNPCKLPDYrr: [ 0.00 0.00 ]
+Key: VUNPCKLPDZ: [ 0.00 0.00 ]
+Key: VUNPCKLPDZrm: [ 0.00 0.00 ]
+Key: VUNPCKLPDZrmb: [ 0.00 0.00 ]
+Key: VUNPCKLPDZrmbk: [ 0.00 0.00 ]
+Key: VUNPCKLPDZrmbkz: [ 0.00 0.00 ]
+Key: VUNPCKLPDZrmk: [ 0.00 0.00 ]
+Key: VUNPCKLPDZrmkz: [ 0.00 0.00 ]
+Key: VUNPCKLPDZrr: [ 0.00 0.00 ]
+Key: VUNPCKLPDZrrk: [ 0.00 0.00 ]
+Key: VUNPCKLPDZrrkz: [ 0.00 0.00 ]
+Key: VUNPCKLPDrm: [ 0.00 0.00 ]
+Key: VUNPCKLPDrr: [ 0.00 0.00 ]
+Key: VUNPCKLPSYrm: [ 0.00 0.00 ]
+Key: VUNPCKLPSYrr: [ 0.00 0.00 ]
+Key: VUNPCKLPSZ: [ 0.00 0.00 ]
+Key: VUNPCKLPSZrm: [ 0.00 0.00 ]
+Key: VUNPCKLPSZrmb: [ 0.00 0.00 ]
+Key: VUNPCKLPSZrmbk: [ 0.00 0.00 ]
+Key: VUNPCKLPSZrmbkz: [ 0.00 0.00 ]
+Key: VUNPCKLPSZrmk: [ 0.00 0.00 ]
+Key: VUNPCKLPSZrmkz: [ 0.00 0.00 ]
+Key: VUNPCKLPSZrr: [ 0.00 0.00 ]
+Key: VUNPCKLPSZrrk: [ 0.00 0.00 ]
+Key: VUNPCKLPSZrrkz: [ 0.00 0.00 ]
+Key: VUNPCKLPSrm: [ 0.00 0.00 ]
+Key: VUNPCKLPSrr: [ 0.00 0.00 ]
+Key: VXORPDYrm: [ 0.00 0.00 ]
+Key: VXORPDYrr: [ 0.00 0.00 ]
+Key: VXORPDZ: [ 0.00 0.00 ]
+Key: VXORPDZrm: [ 0.00 0.00 ]
+Key: VXORPDZrmb: [ 0.00 0.00 ]
+Key: VXORPDZrmbk: [ 0.00 0.00 ]
+Key: VXORPDZrmbkz: [ 0.00 0.00 ]
+Key: VXORPDZrmk: [ 0.00 0.00 ]
+Key: VXORPDZrmkz: [ 0.00 0.00 ]
+Key: VXORPDZrr: [ 0.00 0.00 ]
+Key: VXORPDZrrk: [ 0.00 0.00 ]
+Key: VXORPDZrrkz: [ 0.00 0.00 ]
+Key: VXORPDrm: [ 0.00 0.00 ]
+Key: VXORPDrr: [ 0.00 0.00 ]
+Key: VXORPSYrm: [ 0.00 0.00 ]
+Key: VXORPSYrr: [ 0.00 0.00 ]
+Key: VXORPSZ: [ 0.00 0.00 ]
+Key: VXORPSZrm: [ 0.00 0.00 ]
+Key: VXORPSZrmb: [ 0.00 0.00 ]
+Key: VXORPSZrmbk: [ 0.00 0.00 ]
+Key: VXORPSZrmbkz: [ 0.00 0.00 ]
+Key: VXORPSZrmk: [ 0.00 0.00 ]
+Key: VXORPSZrmkz: [ 0.00 0.00 ]
+Key: VXORPSZrr: [ 0.00 0.00 ]
+Key: VXORPSZrrk: [ 0.00 0.00 ]
+Key: VXORPSZrrkz: [ 0.00 0.00 ]
+Key: VXORPSrm: [ 0.00 0.00 ]
+Key: VXORPSrr: [ 0.00 0.00 ]
+Key: VZEROALL: [ 0.00 0.00 ]
+Key: VZEROUPPER: [ 0.00 0.00 ]
+Key: V_SET: [ 0.00 0.00 ]
+Key: V_SETALLONES: [ 0.00 0.00 ]
+Key: WAIT: [ 0.00 0.00 ]
+Key: WBINVD: [ 0.00 0.00 ]
+Key: WBNOINVD: [ 0.00 0.00 ]
+Key: WRFLAGS: [ 0.00 0.00 ]
+Key: WRFSBASE: [ 0.00 0.00 ]
+Key: WRGSBASE: [ 0.00 0.00 ]
+Key: WRMSR: [ 0.00 0.00 ]
+Key: WRMSRLIST: [ 0.00 0.00 ]
+Key: WRMSRNS: [ 0.00 0.00 ]
+Key: WRMSRNSir: [ 0.00 0.00 ]
+Key: WRMSRNSir_EVEX: [ 0.00 0.00 ]
+Key: WRPKRUr: [ 0.00 0.00 ]
+Key: WRSSD: [ 0.00 0.00 ]
+Key: WRSSD_EVEX: [ 0.00 0.00 ]
+Key: WRSSQ: [ 0.00 0.00 ]
+Key: WRSSQ_EVEX: [ 0.00 0.00 ]
+Key: WRUSSD: [ 0.00 0.00 ]
+Key: WRUSSD_EVEX: [ 0.00 0.00 ]
+Key: WRUSSQ: [ 0.00 0.00 ]
+Key: WRUSSQ_EVEX: [ 0.00 0.00 ]
+Key: XABORT: [ 0.00 0.00 ]
+Key: XABORT_DEF: [ 0.00 0.00 ]
+Key: XACQUIRE_PREFIX: [ 0.00 0.00 ]
+Key: XADD: [ 0.00 0.00 ]
+Key: XAM_F: [ 0.00 0.00 ]
+Key: XAM_Fp: [ 0.00 0.00 ]
+Key: XBEGIN: [ 0.00 0.00 ]
+Key: XCHG: [ 0.00 0.00 ]
+Key: XCH_F: [ 0.00 0.00 ]
+Key: XCRYPTCBC: [ 0.00 0.00 ]
+Key: XCRYPTCFB: [ 0.00 0.00 ]
+Key: XCRYPTCTR: [ 0.00 0.00 ]
+Key: XCRYPTECB: [ 0.00 0.00 ]
+Key: XCRYPTOFB: [ 0.00 0.00 ]
+Key: XEND: [ 0.00 0.00 ]
+Key: XGETBV: [ 0.00 0.00 ]
+Key: XLAT: [ 0.00 0.00 ]
+Key: XOR: [ 0.00 0.00 ]
+Key: XORPDrm: [ 0.00 0.00 ]
+Key: XORPDrr: [ 0.00 0.00 ]
+Key: XORPSrm: [ 0.00 0.00 ]
+Key: XORPSrr: [ 0.00 0.00 ]
+Key: XRELEASE_PREFIX: [ 0.00 0.00 ]
+Key: XRESLDTRK: [ 0.00 0.00 ]
+Key: XRSTOR: [ 0.00 0.00 ]
+Key: XRSTORS: [ 0.00 0.00 ]
+Key: XSAVE: [ 0.00 0.00 ]
+Key: XSAVEC: [ 0.00 0.00 ]
+Key: XSAVEOPT: [ 0.00 0.00 ]
+Key: XSAVES: [ 0.00 0.00 ]
+Key: XSETBV: [ 0.00 0.00 ]
+Key: XSHA: [ 0.00 0.00 ]
+Key: XSTORE: [ 0.00 0.00 ]
+Key: XSUSLDTRK: [ 0.00 0.00 ]
+Key: XTEST: [ 0.00 0.00 ]
diff --git a/llvm/test/CodeGen/MIR2Vec/Inputs/reference_x86_vocab_wo=0.5_print.txt b/llvm/test/CodeGen/MIR2Vec/Inputs/reference_x86_vocab_wo=0.5_print.txt
new file mode 100644
index 0000000..4409e6d
--- /dev/null
+++ b/llvm/test/CodeGen/MIR2Vec/Inputs/reference_x86_vocab_wo=0.5_print.txt
@@ -0,0 +1,6882 @@
+Key: AAA: [ 0.00 0.00 ]
+Key: AAD: [ 0.00 0.00 ]
+Key: AADD: [ 0.00 0.00 ]
+Key: AAM: [ 0.00 0.00 ]
+Key: AAND: [ 0.00 0.00 ]
+Key: AAS: [ 0.00 0.00 ]
+Key: ABS_F: [ 0.00 0.00 ]
+Key: ABS_Fp: [ 0.50 1.00 ]
+Key: ADC: [ 1.50 2.00 ]
+Key: ADCX: [ 0.00 0.00 ]
+Key: ADD: [ 2.50 3.00 ]
+Key: ADDPDrm: [ 3.50 4.00 ]
+Key: ADDPDrr: [ 4.50 5.00 ]
+Key: ADDPSrm: [ 0.00 0.00 ]
+Key: ADDPSrr: [ 5.50 6.00 ]
+Key: ADDR: [ 0.00 0.00 ]
+Key: ADDSDrm: [ 6.50 7.00 ]
+Key: ADDSDrm_Int: [ 0.00 0.00 ]
+Key: ADDSDrr: [ 0.00 0.00 ]
+Key: ADDSDrr_Int: [ 0.00 0.00 ]
+Key: ADDSSrm: [ 0.00 0.00 ]
+Key: ADDSSrm_Int: [ 0.00 0.00 ]
+Key: ADDSSrr: [ 0.00 0.00 ]
+Key: ADDSSrr_Int: [ 0.00 0.00 ]
+Key: ADDSUBPDrm: [ 0.00 0.00 ]
+Key: ADDSUBPDrr: [ 0.00 0.00 ]
+Key: ADDSUBPSrm: [ 0.00 0.00 ]
+Key: ADDSUBPSrr: [ 0.00 0.00 ]
+Key: ADD_F: [ 0.00 0.00 ]
+Key: ADD_FI: [ 0.00 0.00 ]
+Key: ADD_FPrST: [ 0.00 0.00 ]
+Key: ADD_FST: [ 0.00 0.00 ]
+Key: ADD_Fp: [ 0.00 0.00 ]
+Key: ADD_FpI: [ 0.00 0.00 ]
+Key: ADD_FrST: [ 0.00 0.00 ]
+Key: ADJCALLSTACKDOWN: [ 0.00 0.00 ]
+Key: ADJCALLSTACKUP: [ 0.00 0.00 ]
+Key: ADOX: [ 0.00 0.00 ]
+Key: AESDEC: [ 0.00 0.00 ]
+Key: AESDECLASTrm: [ 0.00 0.00 ]
+Key: AESDECLASTrr: [ 0.00 0.00 ]
+Key: AESDECWIDE: [ 0.00 0.00 ]
+Key: AESDECrm: [ 0.00 0.00 ]
+Key: AESDECrr: [ 0.00 0.00 ]
+Key: AESENC: [ 0.00 0.00 ]
+Key: AESENCLASTrm: [ 0.00 0.00 ]
+Key: AESENCLASTrr: [ 0.00 0.00 ]
+Key: AESENCWIDE: [ 0.00 0.00 ]
+Key: AESENCrm: [ 0.00 0.00 ]
+Key: AESENCrr: [ 0.00 0.00 ]
+Key: AESIMCrm: [ 0.00 0.00 ]
+Key: AESIMCrr: [ 0.00 0.00 ]
+Key: AESKEYGENASSISTrmi: [ 0.00 0.00 ]
+Key: AESKEYGENASSISTrri: [ 0.00 0.00 ]
+Key: AND: [ 0.00 0.00 ]
+Key: ANDN: [ 0.00 0.00 ]
+Key: ANDNPDrm: [ 0.00 0.00 ]
+Key: ANDNPDrr: [ 0.00 0.00 ]
+Key: ANDNPSrm: [ 0.00 0.00 ]
+Key: ANDNPSrr: [ 0.00 0.00 ]
+Key: ANDPDrm: [ 0.00 0.00 ]
+Key: ANDPDrr: [ 0.00 0.00 ]
+Key: ANDPSrm: [ 0.00 0.00 ]
+Key: ANDPSrr: [ 0.00 0.00 ]
+Key: ANNOTATION_LABEL: [ 0.00 0.00 ]
+Key: AOR: [ 0.00 0.00 ]
+Key: ARITH_FENCE: [ 0.00 0.00 ]
+Key: ARPL: [ 0.00 0.00 ]
+Key: ASAN_CHECK_MEMACCESS: [ 0.00 0.00 ]
+Key: AVX: [ 0.00 0.00 ]
+Key: AVX_SET: [ 0.00 0.00 ]
+Key: AXOR: [ 0.00 0.00 ]
+Key: BEXTR: [ 0.00 0.00 ]
+Key: BEXTRI: [ 0.00 0.00 ]
+Key: BLCFILL: [ 0.00 0.00 ]
+Key: BLCI: [ 0.00 0.00 ]
+Key: BLCIC: [ 0.00 0.00 ]
+Key: BLCMSK: [ 0.00 0.00 ]
+Key: BLCS: [ 0.00 0.00 ]
+Key: BLENDPDrmi: [ 0.00 0.00 ]
+Key: BLENDPDrri: [ 0.00 0.00 ]
+Key: BLENDPSrmi: [ 0.00 0.00 ]
+Key: BLENDPSrri: [ 0.00 0.00 ]
+Key: BLENDVPDrm: [ 0.00 0.00 ]
+Key: BLENDVPDrr: [ 0.00 0.00 ]
+Key: BLENDVPSrm: [ 0.00 0.00 ]
+Key: BLENDVPSrr: [ 0.00 0.00 ]
+Key: BLSFILL: [ 0.00 0.00 ]
+Key: BLSI: [ 0.00 0.00 ]
+Key: BLSIC: [ 0.00 0.00 ]
+Key: BLSMSK: [ 0.00 0.00 ]
+Key: BLSR: [ 0.00 0.00 ]
+Key: BOUNDS: [ 0.00 0.00 ]
+Key: BSF: [ 0.00 0.00 ]
+Key: BSR: [ 0.00 0.00 ]
+Key: BSWAP: [ 0.00 0.00 ]
+Key: BT: [ 0.00 0.00 ]
+Key: BTC: [ 0.00 0.00 ]
+Key: BTR: [ 0.00 0.00 ]
+Key: BTS: [ 0.00 0.00 ]
+Key: BUNDLE: [ 0.00 0.00 ]
+Key: BZHI: [ 0.00 0.00 ]
+Key: CALL: [ 0.00 0.00 ]
+Key: CALLpcrel: [ 0.00 0.00 ]
+Key: CATCHRET: [ 0.00 0.00 ]
+Key: CBW: [ 0.00 0.00 ]
+Key: CCMP: [ 0.00 0.00 ]
+Key: CDQ: [ 0.00 0.00 ]
+Key: CDQE: [ 0.00 0.00 ]
+Key: CFCMOV: [ 0.00 0.00 ]
+Key: CFI_INSTRUCTION: [ 0.00 0.00 ]
+Key: CHS_F: [ 0.00 0.00 ]
+Key: CHS_Fp: [ 0.00 0.00 ]
+Key: CLAC: [ 0.00 0.00 ]
+Key: CLC: [ 0.00 0.00 ]
+Key: CLD: [ 0.00 0.00 ]
+Key: CLDEMOTE: [ 0.00 0.00 ]
+Key: CLEANUPRET: [ 0.00 0.00 ]
+Key: CLFLUSH: [ 0.00 0.00 ]
+Key: CLFLUSHOPT: [ 0.00 0.00 ]
+Key: CLGI: [ 0.00 0.00 ]
+Key: CLI: [ 0.00 0.00 ]
+Key: CLRSSBSY: [ 0.00 0.00 ]
+Key: CLTS: [ 0.00 0.00 ]
+Key: CLUI: [ 0.00 0.00 ]
+Key: CLWB: [ 0.00 0.00 ]
+Key: CLZERO: [ 0.00 0.00 ]
+Key: CMC: [ 0.00 0.00 ]
+Key: CMOV: [ 0.00 0.00 ]
+Key: CMOVBE_F: [ 0.00 0.00 ]
+Key: CMOVBE_Fp: [ 0.00 0.00 ]
+Key: CMOVB_F: [ 0.00 0.00 ]
+Key: CMOVB_Fp: [ 0.00 0.00 ]
+Key: CMOVE_F: [ 0.00 0.00 ]
+Key: CMOVE_Fp: [ 0.00 0.00 ]
+Key: CMOVNBE_F: [ 0.00 0.00 ]
+Key: CMOVNBE_Fp: [ 0.00 0.00 ]
+Key: CMOVNB_F: [ 0.00 0.00 ]
+Key: CMOVNB_Fp: [ 0.00 0.00 ]
+Key: CMOVNE_F: [ 0.00 0.00 ]
+Key: CMOVNE_Fp: [ 0.00 0.00 ]
+Key: CMOVNP_F: [ 0.00 0.00 ]
+Key: CMOVNP_Fp: [ 0.00 0.00 ]
+Key: CMOVP_F: [ 0.00 0.00 ]
+Key: CMOVP_Fp: [ 0.00 0.00 ]
+Key: CMOV_FR: [ 0.00 0.00 ]
+Key: CMOV_GR: [ 0.00 0.00 ]
+Key: CMOV_RFP: [ 0.00 0.00 ]
+Key: CMOV_VK: [ 0.00 0.00 ]
+Key: CMOV_VR: [ 0.00 0.00 ]
+Key: CMP: [ 0.00 0.00 ]
+Key: CMPCCXADDmr: [ 0.00 0.00 ]
+Key: CMPPDrmi: [ 0.00 0.00 ]
+Key: CMPPDrri: [ 0.00 0.00 ]
+Key: CMPPSrmi: [ 0.00 0.00 ]
+Key: CMPPSrri: [ 0.00 0.00 ]
+Key: CMPSB: [ 0.00 0.00 ]
+Key: CMPSDrmi: [ 0.00 0.00 ]
+Key: CMPSDrmi_Int: [ 0.00 0.00 ]
+Key: CMPSDrri: [ 0.00 0.00 ]
+Key: CMPSDrri_Int: [ 0.00 0.00 ]
+Key: CMPSL: [ 0.00 0.00 ]
+Key: CMPSQ: [ 0.00 0.00 ]
+Key: CMPSSrmi: [ 0.00 0.00 ]
+Key: CMPSSrmi_Int: [ 0.00 0.00 ]
+Key: CMPSSrri: [ 0.00 0.00 ]
+Key: CMPSSrri_Int: [ 0.00 0.00 ]
+Key: CMPSW: [ 0.00 0.00 ]
+Key: CMPXCHG: [ 0.00 0.00 ]
+Key: COMISDrm: [ 0.00 0.00 ]
+Key: COMISDrm_Int: [ 0.00 0.00 ]
+Key: COMISDrr: [ 0.00 0.00 ]
+Key: COMISDrr_Int: [ 0.00 0.00 ]
+Key: COMISSrm: [ 0.00 0.00 ]
+Key: COMISSrm_Int: [ 0.00 0.00 ]
+Key: COMISSrr: [ 0.00 0.00 ]
+Key: COMISSrr_Int: [ 0.00 0.00 ]
+Key: COMP_FST: [ 0.00 0.00 ]
+Key: COM_FIPr: [ 0.00 0.00 ]
+Key: COM_FIr: [ 0.00 0.00 ]
+Key: COM_FST: [ 0.00 0.00 ]
+Key: COM_FpIr: [ 0.00 0.00 ]
+Key: COM_Fpr: [ 0.00 0.00 ]
+Key: CONVERGENCECTRL_ANCHOR: [ 0.00 0.00 ]
+Key: CONVERGENCECTRL_ENTRY: [ 0.00 0.00 ]
+Key: CONVERGENCECTRL_GLUE: [ 0.00 0.00 ]
+Key: CONVERGENCECTRL_LOOP: [ 0.00 0.00 ]
+Key: COPY: [ 0.00 0.00 ]
+Key: COPY_TO_REGCLASS: [ 0.00 0.00 ]
+Key: CPUID: [ 0.00 0.00 ]
+Key: CQO: [ 0.00 0.00 ]
+Key: CRC: [ 0.00 0.00 ]
+Key: CS_PREFIX: [ 0.00 0.00 ]
+Key: CTEST: [ 0.00 0.00 ]
+Key: CVTDQ: [ 0.00 0.00 ]
+Key: CVTPD: [ 0.00 0.00 ]
+Key: CVTPS: [ 0.00 0.00 ]
+Key: CVTSD: [ 0.00 0.00 ]
+Key: CVTSI: [ 0.00 0.00 ]
+Key: CVTSS: [ 0.00 0.00 ]
+Key: CVTTPD: [ 0.00 0.00 ]
+Key: CVTTPS: [ 0.00 0.00 ]
+Key: CVTTSD: [ 0.00 0.00 ]
+Key: CVTTSS: [ 0.00 0.00 ]
+Key: CWD: [ 0.00 0.00 ]
+Key: CWDE: [ 0.00 0.00 ]
+Key: DAA: [ 0.00 0.00 ]
+Key: DAS: [ 0.00 0.00 ]
+Key: DATA: [ 0.00 0.00 ]
+Key: DBG_INSTR_REF: [ 0.00 0.00 ]
+Key: DBG_LABEL: [ 0.00 0.00 ]
+Key: DBG_PHI: [ 0.00 0.00 ]
+Key: DBG_VALUE: [ 0.00 0.00 ]
+Key: DBG_VALUE_LIST: [ 0.00 0.00 ]
+Key: DEC: [ 0.00 0.00 ]
+Key: DIV: [ 0.00 0.00 ]
+Key: DIVPDrm: [ 0.00 0.00 ]
+Key: DIVPDrr: [ 0.00 0.00 ]
+Key: DIVPSrm: [ 0.00 0.00 ]
+Key: DIVPSrr: [ 0.00 0.00 ]
+Key: DIVR_F: [ 0.00 0.00 ]
+Key: DIVR_FI: [ 0.00 0.00 ]
+Key: DIVR_FPrST: [ 0.00 0.00 ]
+Key: DIVR_FST: [ 0.00 0.00 ]
+Key: DIVR_Fp: [ 0.00 0.00 ]
+Key: DIVR_FpI: [ 0.00 0.00 ]
+Key: DIVR_FrST: [ 0.00 0.00 ]
+Key: DIVSDrm: [ 0.00 0.00 ]
+Key: DIVSDrm_Int: [ 0.00 0.00 ]
+Key: DIVSDrr: [ 0.00 0.00 ]
+Key: DIVSDrr_Int: [ 0.00 0.00 ]
+Key: DIVSSrm: [ 0.00 0.00 ]
+Key: DIVSSrm_Int: [ 0.00 0.00 ]
+Key: DIVSSrr: [ 0.00 0.00 ]
+Key: DIVSSrr_Int: [ 0.00 0.00 ]
+Key: DIV_F: [ 0.00 0.00 ]
+Key: DIV_FI: [ 0.00 0.00 ]
+Key: DIV_FPrST: [ 0.00 0.00 ]
+Key: DIV_FST: [ 0.00 0.00 ]
+Key: DIV_Fp: [ 0.00 0.00 ]
+Key: DIV_FpI: [ 0.00 0.00 ]
+Key: DIV_FrST: [ 0.00 0.00 ]
+Key: DPPDrmi: [ 0.00 0.00 ]
+Key: DPPDrri: [ 0.00 0.00 ]
+Key: DPPSrmi: [ 0.00 0.00 ]
+Key: DPPSrri: [ 0.00 0.00 ]
+Key: DS_PREFIX: [ 0.00 0.00 ]
+Key: DYN_ALLOCA: [ 0.00 0.00 ]
+Key: EH_LABEL: [ 0.00 0.00 ]
+Key: EH_RETURN: [ 0.00 0.00 ]
+Key: EH_SjLj_LongJmp: [ 0.00 0.00 ]
+Key: EH_SjLj_SetJmp: [ 0.00 0.00 ]
+Key: EH_SjLj_Setup: [ 0.00 0.00 ]
+Key: ENCLS: [ 0.00 0.00 ]
+Key: ENCLU: [ 0.00 0.00 ]
+Key: ENCLV: [ 0.00 0.00 ]
+Key: ENCODEKEY: [ 0.00 0.00 ]
+Key: ENDBR: [ 0.00 0.00 ]
+Key: ENQCMD: [ 0.00 0.00 ]
+Key: ENQCMDS: [ 0.00 0.00 ]
+Key: ENTER: [ 0.00 0.00 ]
+Key: ERETS: [ 0.00 0.00 ]
+Key: ERETU: [ 0.00 0.00 ]
+Key: ES_PREFIX: [ 0.00 0.00 ]
+Key: EXTRACTPSmri: [ 0.00 0.00 ]
+Key: EXTRACTPSrri: [ 0.00 0.00 ]
+Key: EXTRACT_SUBREG: [ 0.00 0.00 ]
+Key: EXTRQ: [ 0.00 0.00 ]
+Key: EXTRQI: [ 0.00 0.00 ]
+Key: F: [ 0.00 0.00 ]
+Key: FAKE_USE: [ 0.00 0.00 ]
+Key: FARCALL: [ 0.00 0.00 ]
+Key: FARJMP: [ 0.00 0.00 ]
+Key: FAULTING_OP: [ 0.00 0.00 ]
+Key: FBLDm: [ 0.00 0.00 ]
+Key: FBSTPm: [ 0.00 0.00 ]
+Key: FCOM: [ 0.00 0.00 ]
+Key: FCOMP: [ 0.00 0.00 ]
+Key: FCOMPP: [ 0.00 0.00 ]
+Key: FCOS: [ 0.00 0.00 ]
+Key: FDECSTP: [ 0.00 0.00 ]
+Key: FEMMS: [ 0.00 0.00 ]
+Key: FENTRY_CALL: [ 0.00 0.00 ]
+Key: FFREE: [ 0.00 0.00 ]
+Key: FFREEP: [ 0.00 0.00 ]
+Key: FICOM: [ 0.00 0.00 ]
+Key: FICOMP: [ 0.00 0.00 ]
+Key: FINCSTP: [ 0.00 0.00 ]
+Key: FLDCW: [ 0.00 0.00 ]
+Key: FLDENVm: [ 0.00 0.00 ]
+Key: FLDL: [ 0.00 0.00 ]
+Key: FLDLG: [ 0.00 0.00 ]
+Key: FLDLN: [ 0.00 0.00 ]
+Key: FLDPI: [ 0.00 0.00 ]
+Key: FNCLEX: [ 0.00 0.00 ]
+Key: FNINIT: [ 0.00 0.00 ]
+Key: FNOP: [ 0.00 0.00 ]
+Key: FNSTCW: [ 0.00 0.00 ]
+Key: FNSTSW: [ 0.00 0.00 ]
+Key: FNSTSWm: [ 0.00 0.00 ]
+Key: FP: [ 0.00 0.00 ]
+Key: FPATAN: [ 0.00 0.00 ]
+Key: FPREM: [ 0.00 0.00 ]
+Key: FPTAN: [ 0.00 0.00 ]
+Key: FRNDINT: [ 0.00 0.00 ]
+Key: FRSTORm: [ 0.00 0.00 ]
+Key: FSAVEm: [ 0.00 0.00 ]
+Key: FSCALE: [ 0.00 0.00 ]
+Key: FSIN: [ 0.00 0.00 ]
+Key: FSINCOS: [ 0.00 0.00 ]
+Key: FSTENVm: [ 0.00 0.00 ]
+Key: FS_PREFIX: [ 0.00 0.00 ]
+Key: FXRSTOR: [ 0.00 0.00 ]
+Key: FXSAVE: [ 0.00 0.00 ]
+Key: FXTRACT: [ 0.00 0.00 ]
+Key: FYL: [ 0.00 0.00 ]
+Key: FsFLD: [ 0.00 0.00 ]
+Key: GC_LABEL: [ 0.00 0.00 ]
+Key: GETSEC: [ 0.00 0.00 ]
+Key: GF: [ 0.00 0.00 ]
+Key: GS_PREFIX: [ 0.00 0.00 ]
+Key: G_ABDS: [ 0.00 0.00 ]
+Key: G_ABDU: [ 0.00 0.00 ]
+Key: G_ABS: [ 0.00 0.00 ]
+Key: G_ADD: [ 0.00 0.00 ]
+Key: G_ADDRSPACE_CAST: [ 0.00 0.00 ]
+Key: G_AND: [ 0.00 0.00 ]
+Key: G_ANYEXT: [ 0.00 0.00 ]
+Key: G_ASHR: [ 0.00 0.00 ]
+Key: G_ASSERT_ALIGN: [ 0.00 0.00 ]
+Key: G_ASSERT_SEXT: [ 0.00 0.00 ]
+Key: G_ASSERT_ZEXT: [ 0.00 0.00 ]
+Key: G_ATOMICRMW_ADD: [ 0.00 0.00 ]
+Key: G_ATOMICRMW_AND: [ 0.00 0.00 ]
+Key: G_ATOMICRMW_FADD: [ 0.00 0.00 ]
+Key: G_ATOMICRMW_FMAX: [ 0.00 0.00 ]
+Key: G_ATOMICRMW_FMAXIMUM: [ 0.00 0.00 ]
+Key: G_ATOMICRMW_FMIN: [ 0.00 0.00 ]
+Key: G_ATOMICRMW_FMINIMUM: [ 0.00 0.00 ]
+Key: G_ATOMICRMW_FSUB: [ 0.00 0.00 ]
+Key: G_ATOMICRMW_MAX: [ 0.00 0.00 ]
+Key: G_ATOMICRMW_MIN: [ 0.00 0.00 ]
+Key: G_ATOMICRMW_NAND: [ 0.00 0.00 ]
+Key: G_ATOMICRMW_OR: [ 0.00 0.00 ]
+Key: G_ATOMICRMW_SUB: [ 0.00 0.00 ]
+Key: G_ATOMICRMW_UDEC_WRAP: [ 0.00 0.00 ]
+Key: G_ATOMICRMW_UINC_WRAP: [ 0.00 0.00 ]
+Key: G_ATOMICRMW_UMAX: [ 0.00 0.00 ]
+Key: G_ATOMICRMW_UMIN: [ 0.00 0.00 ]
+Key: G_ATOMICRMW_USUB_COND: [ 0.00 0.00 ]
+Key: G_ATOMICRMW_USUB_SAT: [ 0.00 0.00 ]
+Key: G_ATOMICRMW_XCHG: [ 0.00 0.00 ]
+Key: G_ATOMICRMW_XOR: [ 0.00 0.00 ]
+Key: G_ATOMIC_CMPXCHG: [ 0.00 0.00 ]
+Key: G_ATOMIC_CMPXCHG_WITH_SUCCESS: [ 0.00 0.00 ]
+Key: G_BITCAST: [ 0.00 0.00 ]
+Key: G_BITREVERSE: [ 0.00 0.00 ]
+Key: G_BLOCK_ADDR: [ 0.00 0.00 ]
+Key: G_BR: [ 0.00 0.00 ]
+Key: G_BRCOND: [ 0.00 0.00 ]
+Key: G_BRINDIRECT: [ 0.00 0.00 ]
+Key: G_BRJT: [ 0.00 0.00 ]
+Key: G_BSWAP: [ 0.00 0.00 ]
+Key: G_BUILD_VECTOR: [ 0.00 0.00 ]
+Key: G_BUILD_VECTOR_TRUNC: [ 0.00 0.00 ]
+Key: G_BZERO: [ 0.00 0.00 ]
+Key: G_CONCAT_VECTORS: [ 0.00 0.00 ]
+Key: G_CONSTANT: [ 0.00 0.00 ]
+Key: G_CONSTANT_FOLD_BARRIER: [ 0.00 0.00 ]
+Key: G_CONSTANT_POOL: [ 0.00 0.00 ]
+Key: G_CTLZ: [ 0.00 0.00 ]
+Key: G_CTLZ_ZERO_UNDEF: [ 0.00 0.00 ]
+Key: G_CTPOP: [ 0.00 0.00 ]
+Key: G_CTTZ: [ 0.00 0.00 ]
+Key: G_CTTZ_ZERO_UNDEF: [ 0.00 0.00 ]
+Key: G_DEBUGTRAP: [ 0.00 0.00 ]
+Key: G_DYN_STACKALLOC: [ 0.00 0.00 ]
+Key: G_EXTRACT: [ 0.00 0.00 ]
+Key: G_EXTRACT_SUBVECTOR: [ 0.00 0.00 ]
+Key: G_EXTRACT_VECTOR_ELT: [ 0.00 0.00 ]
+Key: G_FABS: [ 0.00 0.00 ]
+Key: G_FACOS: [ 0.00 0.00 ]
+Key: G_FADD: [ 0.00 0.00 ]
+Key: G_FASIN: [ 0.00 0.00 ]
+Key: G_FATAN: [ 0.00 0.00 ]
+Key: G_FCANONICALIZE: [ 0.00 0.00 ]
+Key: G_FCEIL: [ 0.00 0.00 ]
+Key: G_FCMP: [ 0.00 0.00 ]
+Key: G_FCONSTANT: [ 0.00 0.00 ]
+Key: G_FCOPYSIGN: [ 0.00 0.00 ]
+Key: G_FCOS: [ 0.00 0.00 ]
+Key: G_FCOSH: [ 0.00 0.00 ]
+Key: G_FDIV: [ 0.00 0.00 ]
+Key: G_FENCE: [ 0.00 0.00 ]
+Key: G_FEXP: [ 0.00 0.00 ]
+Key: G_FFLOOR: [ 0.00 0.00 ]
+Key: G_FFREXP: [ 0.00 0.00 ]
+Key: G_FILD: [ 0.00 0.00 ]
+Key: G_FIST: [ 0.00 0.00 ]
+Key: G_FLDCW: [ 0.00 0.00 ]
+Key: G_FLDEXP: [ 0.00 0.00 ]
+Key: G_FLOG: [ 0.00 0.00 ]
+Key: G_FMA: [ 0.00 0.00 ]
+Key: G_FMAD: [ 0.00 0.00 ]
+Key: G_FMAXIMUM: [ 0.00 0.00 ]
+Key: G_FMAXIMUMNUM: [ 0.00 0.00 ]
+Key: G_FMAXNUM: [ 0.00 0.00 ]
+Key: G_FMAXNUM_IEEE: [ 0.00 0.00 ]
+Key: G_FMINIMUM: [ 0.00 0.00 ]
+Key: G_FMINIMUMNUM: [ 0.00 0.00 ]
+Key: G_FMINNUM: [ 0.00 0.00 ]
+Key: G_FMINNUM_IEEE: [ 0.00 0.00 ]
+Key: G_FMODF: [ 0.00 0.00 ]
+Key: G_FMUL: [ 0.00 0.00 ]
+Key: G_FNEARBYINT: [ 0.00 0.00 ]
+Key: G_FNEG: [ 0.00 0.00 ]
+Key: G_FNSTCW: [ 0.00 0.00 ]
+Key: G_FPEXT: [ 0.00 0.00 ]
+Key: G_FPOW: [ 0.00 0.00 ]
+Key: G_FPOWI: [ 0.00 0.00 ]
+Key: G_FPTOSI: [ 0.00 0.00 ]
+Key: G_FPTOSI_SAT: [ 0.00 0.00 ]
+Key: G_FPTOUI: [ 0.00 0.00 ]
+Key: G_FPTOUI_SAT: [ 0.00 0.00 ]
+Key: G_FPTRUNC: [ 0.00 0.00 ]
+Key: G_FRAME_INDEX: [ 0.00 0.00 ]
+Key: G_FREEZE: [ 0.00 0.00 ]
+Key: G_FREM: [ 0.00 0.00 ]
+Key: G_FRINT: [ 0.00 0.00 ]
+Key: G_FSHL: [ 0.00 0.00 ]
+Key: G_FSHR: [ 0.00 0.00 ]
+Key: G_FSIN: [ 0.00 0.00 ]
+Key: G_FSINCOS: [ 0.00 0.00 ]
+Key: G_FSINH: [ 0.00 0.00 ]
+Key: G_FSQRT: [ 0.00 0.00 ]
+Key: G_FSUB: [ 0.00 0.00 ]
+Key: G_FTAN: [ 0.00 0.00 ]
+Key: G_FTANH: [ 0.00 0.00 ]
+Key: G_GET_FPENV: [ 0.00 0.00 ]
+Key: G_GET_FPMODE: [ 0.00 0.00 ]
+Key: G_GET_ROUNDING: [ 0.00 0.00 ]
+Key: G_GLOBAL_VALUE: [ 0.00 0.00 ]
+Key: G_ICMP: [ 0.00 0.00 ]
+Key: G_IMPLICIT_DEF: [ 0.00 0.00 ]
+Key: G_INDEXED_LOAD: [ 0.00 0.00 ]
+Key: G_INDEXED_SEXTLOAD: [ 0.00 0.00 ]
+Key: G_INDEXED_STORE: [ 0.00 0.00 ]
+Key: G_INDEXED_ZEXTLOAD: [ 0.00 0.00 ]
+Key: G_INSERT: [ 0.00 0.00 ]
+Key: G_INSERT_SUBVECTOR: [ 0.00 0.00 ]
+Key: G_INSERT_VECTOR_ELT: [ 0.00 0.00 ]
+Key: G_INTRINSIC: [ 0.00 0.00 ]
+Key: G_INTRINSIC_CONVERGENT: [ 0.00 0.00 ]
+Key: G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS: [ 0.00 0.00 ]
+Key: G_INTRINSIC_FPTRUNC_ROUND: [ 0.00 0.00 ]
+Key: G_INTRINSIC_LLRINT: [ 0.00 0.00 ]
+Key: G_INTRINSIC_LRINT: [ 0.00 0.00 ]
+Key: G_INTRINSIC_ROUND: [ 0.00 0.00 ]
+Key: G_INTRINSIC_ROUNDEVEN: [ 0.00 0.00 ]
+Key: G_INTRINSIC_TRUNC: [ 0.00 0.00 ]
+Key: G_INTRINSIC_W_SIDE_EFFECTS: [ 0.00 0.00 ]
+Key: G_INTTOPTR: [ 0.00 0.00 ]
+Key: G_INVOKE_REGION_START: [ 0.00 0.00 ]
+Key: G_IS_FPCLASS: [ 0.00 0.00 ]
+Key: G_JUMP_TABLE: [ 0.00 0.00 ]
+Key: G_LLROUND: [ 0.00 0.00 ]
+Key: G_LOAD: [ 0.00 0.00 ]
+Key: G_LROUND: [ 0.00 0.00 ]
+Key: G_LSHR: [ 0.00 0.00 ]
+Key: G_MEMCPY: [ 0.00 0.00 ]
+Key: G_MEMCPY_INLINE: [ 0.00 0.00 ]
+Key: G_MEMMOVE: [ 0.00 0.00 ]
+Key: G_MEMSET: [ 0.00 0.00 ]
+Key: G_MERGE_VALUES: [ 0.00 0.00 ]
+Key: G_MUL: [ 0.00 0.00 ]
+Key: G_OR: [ 0.00 0.00 ]
+Key: G_PHI: [ 0.00 0.00 ]
+Key: G_PREFETCH: [ 0.00 0.00 ]
+Key: G_PTRAUTH_GLOBAL_VALUE: [ 0.00 0.00 ]
+Key: G_PTRMASK: [ 0.00 0.00 ]
+Key: G_PTRTOINT: [ 0.00 0.00 ]
+Key: G_PTR_ADD: [ 0.00 0.00 ]
+Key: G_READCYCLECOUNTER: [ 0.00 0.00 ]
+Key: G_READSTEADYCOUNTER: [ 0.00 0.00 ]
+Key: G_READ_REGISTER: [ 0.00 0.00 ]
+Key: G_RESET_FPENV: [ 0.00 0.00 ]
+Key: G_RESET_FPMODE: [ 0.00 0.00 ]
+Key: G_ROTL: [ 0.00 0.00 ]
+Key: G_ROTR: [ 0.00 0.00 ]
+Key: G_SADDE: [ 0.00 0.00 ]
+Key: G_SADDO: [ 0.00 0.00 ]
+Key: G_SADDSAT: [ 0.00 0.00 ]
+Key: G_SBFX: [ 0.00 0.00 ]
+Key: G_SCMP: [ 0.00 0.00 ]
+Key: G_SDIV: [ 0.00 0.00 ]
+Key: G_SDIVFIX: [ 0.00 0.00 ]
+Key: G_SDIVFIXSAT: [ 0.00 0.00 ]
+Key: G_SDIVREM: [ 0.00 0.00 ]
+Key: G_SELECT: [ 0.00 0.00 ]
+Key: G_SET_FPENV: [ 0.00 0.00 ]
+Key: G_SET_FPMODE: [ 0.00 0.00 ]
+Key: G_SET_ROUNDING: [ 0.00 0.00 ]
+Key: G_SEXT: [ 0.00 0.00 ]
+Key: G_SEXTLOAD: [ 0.00 0.00 ]
+Key: G_SEXT_INREG: [ 0.00 0.00 ]
+Key: G_SHL: [ 0.00 0.00 ]
+Key: G_SHUFFLE_VECTOR: [ 0.00 0.00 ]
+Key: G_SITOFP: [ 0.00 0.00 ]
+Key: G_SMAX: [ 0.00 0.00 ]
+Key: G_SMIN: [ 0.00 0.00 ]
+Key: G_SMULFIX: [ 0.00 0.00 ]
+Key: G_SMULFIXSAT: [ 0.00 0.00 ]
+Key: G_SMULH: [ 0.00 0.00 ]
+Key: G_SMULO: [ 0.00 0.00 ]
+Key: G_SPLAT_VECTOR: [ 0.00 0.00 ]
+Key: G_SREM: [ 0.00 0.00 ]
+Key: G_SSHLSAT: [ 0.00 0.00 ]
+Key: G_SSUBE: [ 0.00 0.00 ]
+Key: G_SSUBO: [ 0.00 0.00 ]
+Key: G_SSUBSAT: [ 0.00 0.00 ]
+Key: G_STACKRESTORE: [ 0.00 0.00 ]
+Key: G_STACKSAVE: [ 0.00 0.00 ]
+Key: G_STEP_VECTOR: [ 0.00 0.00 ]
+Key: G_STORE: [ 0.00 0.00 ]
+Key: G_STRICT_FADD: [ 0.00 0.00 ]
+Key: G_STRICT_FDIV: [ 0.00 0.00 ]
+Key: G_STRICT_FLDEXP: [ 0.00 0.00 ]
+Key: G_STRICT_FMA: [ 0.00 0.00 ]
+Key: G_STRICT_FMUL: [ 0.00 0.00 ]
+Key: G_STRICT_FREM: [ 0.00 0.00 ]
+Key: G_STRICT_FSQRT: [ 0.00 0.00 ]
+Key: G_STRICT_FSUB: [ 0.00 0.00 ]
+Key: G_SUB: [ 0.00 0.00 ]
+Key: G_TRAP: [ 0.00 0.00 ]
+Key: G_TRUNC: [ 0.00 0.00 ]
+Key: G_TRUNC_SSAT_S: [ 0.00 0.00 ]
+Key: G_TRUNC_SSAT_U: [ 0.00 0.00 ]
+Key: G_TRUNC_USAT_U: [ 0.00 0.00 ]
+Key: G_UADDE: [ 0.00 0.00 ]
+Key: G_UADDO: [ 0.00 0.00 ]
+Key: G_UADDSAT: [ 0.00 0.00 ]
+Key: G_UBFX: [ 0.00 0.00 ]
+Key: G_UBSANTRAP: [ 0.00 0.00 ]
+Key: G_UCMP: [ 0.00 0.00 ]
+Key: G_UDIV: [ 0.00 0.00 ]
+Key: G_UDIVFIX: [ 0.00 0.00 ]
+Key: G_UDIVFIXSAT: [ 0.00 0.00 ]
+Key: G_UDIVREM: [ 0.00 0.00 ]
+Key: G_UITOFP: [ 0.00 0.00 ]
+Key: G_UMAX: [ 0.00 0.00 ]
+Key: G_UMIN: [ 0.00 0.00 ]
+Key: G_UMULFIX: [ 0.00 0.00 ]
+Key: G_UMULFIXSAT: [ 0.00 0.00 ]
+Key: G_UMULH: [ 0.00 0.00 ]
+Key: G_UMULO: [ 0.00 0.00 ]
+Key: G_UNMERGE_VALUES: [ 0.00 0.00 ]
+Key: G_UREM: [ 0.00 0.00 ]
+Key: G_USHLSAT: [ 0.00 0.00 ]
+Key: G_USUBE: [ 0.00 0.00 ]
+Key: G_USUBO: [ 0.00 0.00 ]
+Key: G_USUBSAT: [ 0.00 0.00 ]
+Key: G_VAARG: [ 0.00 0.00 ]
+Key: G_VASTART: [ 0.00 0.00 ]
+Key: G_VECREDUCE_ADD: [ 0.00 0.00 ]
+Key: G_VECREDUCE_AND: [ 0.00 0.00 ]
+Key: G_VECREDUCE_FADD: [ 0.00 0.00 ]
+Key: G_VECREDUCE_FMAX: [ 0.00 0.00 ]
+Key: G_VECREDUCE_FMAXIMUM: [ 0.00 0.00 ]
+Key: G_VECREDUCE_FMIN: [ 0.00 0.00 ]
+Key: G_VECREDUCE_FMINIMUM: [ 0.00 0.00 ]
+Key: G_VECREDUCE_FMUL: [ 0.00 0.00 ]
+Key: G_VECREDUCE_MUL: [ 0.00 0.00 ]
+Key: G_VECREDUCE_OR: [ 0.00 0.00 ]
+Key: G_VECREDUCE_SEQ_FADD: [ 0.00 0.00 ]
+Key: G_VECREDUCE_SEQ_FMUL: [ 0.00 0.00 ]
+Key: G_VECREDUCE_SMAX: [ 0.00 0.00 ]
+Key: G_VECREDUCE_SMIN: [ 0.00 0.00 ]
+Key: G_VECREDUCE_UMAX: [ 0.00 0.00 ]
+Key: G_VECREDUCE_UMIN: [ 0.00 0.00 ]
+Key: G_VECREDUCE_XOR: [ 0.00 0.00 ]
+Key: G_VECTOR_COMPRESS: [ 0.00 0.00 ]
+Key: G_VSCALE: [ 0.00 0.00 ]
+Key: G_WRITE_REGISTER: [ 0.00 0.00 ]
+Key: G_XOR: [ 0.00 0.00 ]
+Key: G_ZEXT: [ 0.00 0.00 ]
+Key: G_ZEXTLOAD: [ 0.00 0.00 ]
+Key: HADDPDrm: [ 0.00 0.00 ]
+Key: HADDPDrr: [ 0.00 0.00 ]
+Key: HADDPSrm: [ 0.00 0.00 ]
+Key: HADDPSrr: [ 0.00 0.00 ]
+Key: HLT: [ 0.00 0.00 ]
+Key: HRESET: [ 0.00 0.00 ]
+Key: HSUBPDrm: [ 0.00 0.00 ]
+Key: HSUBPDrr: [ 0.00 0.00 ]
+Key: HSUBPSrm: [ 0.00 0.00 ]
+Key: HSUBPSrr: [ 0.00 0.00 ]
+Key: ICALL_BRANCH_FUNNEL: [ 0.00 0.00 ]
+Key: IDIV: [ 0.00 0.00 ]
+Key: ILD_F: [ 0.00 0.00 ]
+Key: ILD_Fp: [ 0.00 0.00 ]
+Key: IMPLICIT_DEF: [ 0.00 0.00 ]
+Key: IMUL: [ 0.00 0.00 ]
+Key: IMULZU: [ 0.00 0.00 ]
+Key: IN: [ 0.00 0.00 ]
+Key: INC: [ 0.00 0.00 ]
+Key: INCSSPD: [ 0.00 0.00 ]
+Key: INCSSPQ: [ 0.00 0.00 ]
+Key: INDIRECT_THUNK_CALL: [ 0.00 0.00 ]
+Key: INDIRECT_THUNK_TCRETURN: [ 0.00 0.00 ]
+Key: INIT_UNDEF: [ 0.00 0.00 ]
+Key: INLINEASM: [ 0.00 0.00 ]
+Key: INLINEASM_BR: [ 0.00 0.00 ]
+Key: INSB: [ 0.00 0.00 ]
+Key: INSERTPSrmi: [ 0.00 0.00 ]
+Key: INSERTPSrri: [ 0.00 0.00 ]
+Key: INSERTQ: [ 0.00 0.00 ]
+Key: INSERTQI: [ 0.00 0.00 ]
+Key: INSERT_SUBREG: [ 0.00 0.00 ]
+Key: INSL: [ 0.00 0.00 ]
+Key: INSW: [ 0.00 0.00 ]
+Key: INT: [ 0.00 0.00 ]
+Key: INTO: [ 0.00 0.00 ]
+Key: INVD: [ 0.00 0.00 ]
+Key: INVEPT: [ 0.00 0.00 ]
+Key: INVLPG: [ 0.00 0.00 ]
+Key: INVLPGA: [ 0.00 0.00 ]
+Key: INVLPGB: [ 0.00 0.00 ]
+Key: INVPCID: [ 0.00 0.00 ]
+Key: INVVPID: [ 0.00 0.00 ]
+Key: IRET: [ 0.00 0.00 ]
+Key: ISTT_FP: [ 0.00 0.00 ]
+Key: ISTT_Fp: [ 0.00 0.00 ]
+Key: IST_F: [ 0.00 0.00 ]
+Key: IST_FP: [ 0.00 0.00 ]
+Key: IST_Fp: [ 0.00 0.00 ]
+Key: Int_eh_sjlj_setup_dispatch: [ 0.00 0.00 ]
+Key: JCC: [ 0.00 0.00 ]
+Key: JCXZ: [ 0.00 0.00 ]
+Key: JECXZ: [ 0.00 0.00 ]
+Key: JMP: [ 0.00 0.00 ]
+Key: JMPABS: [ 0.00 0.00 ]
+Key: JRCXZ: [ 0.00 0.00 ]
+Key: JUMP_TABLE_DEBUG_INFO: [ 0.00 0.00 ]
+Key: KADDBkk: [ 0.00 0.00 ]
+Key: KADDDkk: [ 0.00 0.00 ]
+Key: KADDQkk: [ 0.00 0.00 ]
+Key: KADDWkk: [ 0.00 0.00 ]
+Key: KANDBkk: [ 0.00 0.00 ]
+Key: KANDDkk: [ 0.00 0.00 ]
+Key: KANDNBkk: [ 0.00 0.00 ]
+Key: KANDNDkk: [ 0.00 0.00 ]
+Key: KANDNQkk: [ 0.00 0.00 ]
+Key: KANDNWkk: [ 0.00 0.00 ]
+Key: KANDQkk: [ 0.00 0.00 ]
+Key: KANDWkk: [ 0.00 0.00 ]
+Key: KCFI_CHECK: [ 0.00 0.00 ]
+Key: KILL: [ 0.00 0.00 ]
+Key: KMOVBkk: [ 0.00 0.00 ]
+Key: KMOVBkk_EVEX: [ 0.00 0.00 ]
+Key: KMOVBkm: [ 0.00 0.00 ]
+Key: KMOVBkm_EVEX: [ 0.00 0.00 ]
+Key: KMOVBkr: [ 0.00 0.00 ]
+Key: KMOVBkr_EVEX: [ 0.00 0.00 ]
+Key: KMOVBmk: [ 0.00 0.00 ]
+Key: KMOVBmk_EVEX: [ 0.00 0.00 ]
+Key: KMOVBrk: [ 0.00 0.00 ]
+Key: KMOVBrk_EVEX: [ 0.00 0.00 ]
+Key: KMOVDkk: [ 0.00 0.00 ]
+Key: KMOVDkk_EVEX: [ 0.00 0.00 ]
+Key: KMOVDkm: [ 0.00 0.00 ]
+Key: KMOVDkm_EVEX: [ 0.00 0.00 ]
+Key: KMOVDkr: [ 0.00 0.00 ]
+Key: KMOVDkr_EVEX: [ 0.00 0.00 ]
+Key: KMOVDmk: [ 0.00 0.00 ]
+Key: KMOVDmk_EVEX: [ 0.00 0.00 ]
+Key: KMOVDrk: [ 0.00 0.00 ]
+Key: KMOVDrk_EVEX: [ 0.00 0.00 ]
+Key: KMOVQkk: [ 0.00 0.00 ]
+Key: KMOVQkk_EVEX: [ 0.00 0.00 ]
+Key: KMOVQkm: [ 0.00 0.00 ]
+Key: KMOVQkm_EVEX: [ 0.00 0.00 ]
+Key: KMOVQkr: [ 0.00 0.00 ]
+Key: KMOVQkr_EVEX: [ 0.00 0.00 ]
+Key: KMOVQmk: [ 0.00 0.00 ]
+Key: KMOVQmk_EVEX: [ 0.00 0.00 ]
+Key: KMOVQrk: [ 0.00 0.00 ]
+Key: KMOVQrk_EVEX: [ 0.00 0.00 ]
+Key: KMOVWkk: [ 0.00 0.00 ]
+Key: KMOVWkk_EVEX: [ 0.00 0.00 ]
+Key: KMOVWkm: [ 0.00 0.00 ]
+Key: KMOVWkm_EVEX: [ 0.00 0.00 ]
+Key: KMOVWkr: [ 0.00 0.00 ]
+Key: KMOVWkr_EVEX: [ 0.00 0.00 ]
+Key: KMOVWmk: [ 0.00 0.00 ]
+Key: KMOVWmk_EVEX: [ 0.00 0.00 ]
+Key: KMOVWrk: [ 0.00 0.00 ]
+Key: KMOVWrk_EVEX: [ 0.00 0.00 ]
+Key: KNOTBkk: [ 0.00 0.00 ]
+Key: KNOTDkk: [ 0.00 0.00 ]
+Key: KNOTQkk: [ 0.00 0.00 ]
+Key: KNOTWkk: [ 0.00 0.00 ]
+Key: KORBkk: [ 0.00 0.00 ]
+Key: KORDkk: [ 0.00 0.00 ]
+Key: KORQkk: [ 0.00 0.00 ]
+Key: KORTESTBkk: [ 0.00 0.00 ]
+Key: KORTESTDkk: [ 0.00 0.00 ]
+Key: KORTESTQkk: [ 0.00 0.00 ]
+Key: KORTESTWkk: [ 0.00 0.00 ]
+Key: KORWkk: [ 0.00 0.00 ]
+Key: KSET: [ 0.00 0.00 ]
+Key: KSHIFTLBki: [ 0.00 0.00 ]
+Key: KSHIFTLDki: [ 0.00 0.00 ]
+Key: KSHIFTLQki: [ 0.00 0.00 ]
+Key: KSHIFTLWki: [ 0.00 0.00 ]
+Key: KSHIFTRBki: [ 0.00 0.00 ]
+Key: KSHIFTRDki: [ 0.00 0.00 ]
+Key: KSHIFTRQki: [ 0.00 0.00 ]
+Key: KSHIFTRWki: [ 0.00 0.00 ]
+Key: KTESTBkk: [ 0.00 0.00 ]
+Key: KTESTDkk: [ 0.00 0.00 ]
+Key: KTESTQkk: [ 0.00 0.00 ]
+Key: KTESTWkk: [ 0.00 0.00 ]
+Key: KUNPCKBWkk: [ 0.00 0.00 ]
+Key: KUNPCKDQkk: [ 0.00 0.00 ]
+Key: KUNPCKWDkk: [ 0.00 0.00 ]
+Key: KXNORBkk: [ 0.00 0.00 ]
+Key: KXNORDkk: [ 0.00 0.00 ]
+Key: KXNORQkk: [ 0.00 0.00 ]
+Key: KXNORWkk: [ 0.00 0.00 ]
+Key: KXORBkk: [ 0.00 0.00 ]
+Key: KXORDkk: [ 0.00 0.00 ]
+Key: KXORQkk: [ 0.00 0.00 ]
+Key: KXORWkk: [ 0.00 0.00 ]
+Key: LAHF: [ 0.00 0.00 ]
+Key: LAR: [ 0.00 0.00 ]
+Key: LCMPXCHG: [ 0.00 0.00 ]
+Key: LDDQUrm: [ 0.00 0.00 ]
+Key: LDMXCSR: [ 0.00 0.00 ]
+Key: LDS: [ 0.00 0.00 ]
+Key: LDTILECFG: [ 0.00 0.00 ]
+Key: LDTILECFG_EVEX: [ 0.00 0.00 ]
+Key: LD_F: [ 0.00 0.00 ]
+Key: LD_Fp: [ 0.00 0.00 ]
+Key: LD_Frr: [ 0.00 0.00 ]
+Key: LEA: [ 0.00 0.00 ]
+Key: LEAVE: [ 0.00 0.00 ]
+Key: LES: [ 0.00 0.00 ]
+Key: LFENCE: [ 0.00 0.00 ]
+Key: LFS: [ 0.00 0.00 ]
+Key: LGDT: [ 0.00 0.00 ]
+Key: LGS: [ 0.00 0.00 ]
+Key: LIDT: [ 0.00 0.00 ]
+Key: LIFETIME_END: [ 0.00 0.00 ]
+Key: LIFETIME_START: [ 0.00 0.00 ]
+Key: LKGS: [ 0.00 0.00 ]
+Key: LLDT: [ 0.00 0.00 ]
+Key: LLWPCB: [ 0.00 0.00 ]
+Key: LMSW: [ 0.00 0.00 ]
+Key: LOADIWKEY: [ 0.00 0.00 ]
+Key: LOAD_STACK_GUARD: [ 0.00 0.00 ]
+Key: LOCAL_ESCAPE: [ 0.00 0.00 ]
+Key: LOCK_ADD: [ 0.00 0.00 ]
+Key: LOCK_AND: [ 0.00 0.00 ]
+Key: LOCK_BTC: [ 0.00 0.00 ]
+Key: LOCK_BTC_RM: [ 0.00 0.00 ]
+Key: LOCK_BTR: [ 0.00 0.00 ]
+Key: LOCK_BTR_RM: [ 0.00 0.00 ]
+Key: LOCK_BTS: [ 0.00 0.00 ]
+Key: LOCK_BTS_RM: [ 0.00 0.00 ]
+Key: LOCK_DEC: [ 0.00 0.00 ]
+Key: LOCK_INC: [ 0.00 0.00 ]
+Key: LOCK_OR: [ 0.00 0.00 ]
+Key: LOCK_PREFIX: [ 0.00 0.00 ]
+Key: LOCK_SUB: [ 0.00 0.00 ]
+Key: LOCK_XOR: [ 0.00 0.00 ]
+Key: LODSB: [ 0.00 0.00 ]
+Key: LODSL: [ 0.00 0.00 ]
+Key: LODSQ: [ 0.00 0.00 ]
+Key: LODSW: [ 0.00 0.00 ]
+Key: LOOP: [ 0.00 0.00 ]
+Key: LOOPE: [ 0.00 0.00 ]
+Key: LOOPNE: [ 0.00 0.00 ]
+Key: LRET: [ 0.00 0.00 ]
+Key: LRETI: [ 0.00 0.00 ]
+Key: LSL: [ 0.00 0.00 ]
+Key: LSS: [ 0.00 0.00 ]
+Key: LTRm: [ 0.00 0.00 ]
+Key: LTRr: [ 0.00 0.00 ]
+Key: LWPINS: [ 0.00 0.00 ]
+Key: LWPVAL: [ 0.00 0.00 ]
+Key: LXADD: [ 0.00 0.00 ]
+Key: LZCNT: [ 0.00 0.00 ]
+Key: MASKMOVDQU: [ 0.00 0.00 ]
+Key: MASKPAIR: [ 0.00 0.00 ]
+Key: MAXCPDrm: [ 0.00 0.00 ]
+Key: MAXCPDrr: [ 0.00 0.00 ]
+Key: MAXCPSrm: [ 0.00 0.00 ]
+Key: MAXCPSrr: [ 0.00 0.00 ]
+Key: MAXCSDrm: [ 0.00 0.00 ]
+Key: MAXCSDrr: [ 0.00 0.00 ]
+Key: MAXCSSrm: [ 0.00 0.00 ]
+Key: MAXCSSrr: [ 0.00 0.00 ]
+Key: MAXPDrm: [ 0.00 0.00 ]
+Key: MAXPDrr: [ 0.00 0.00 ]
+Key: MAXPSrm: [ 0.00 0.00 ]
+Key: MAXPSrr: [ 0.00 0.00 ]
+Key: MAXSDrm: [ 0.00 0.00 ]
+Key: MAXSDrm_Int: [ 0.00 0.00 ]
+Key: MAXSDrr: [ 0.00 0.00 ]
+Key: MAXSDrr_Int: [ 0.00 0.00 ]
+Key: MAXSSrm: [ 0.00 0.00 ]
+Key: MAXSSrm_Int: [ 0.00 0.00 ]
+Key: MAXSSrr: [ 0.00 0.00 ]
+Key: MAXSSrr_Int: [ 0.00 0.00 ]
+Key: MEMBARRIER: [ 0.00 0.00 ]
+Key: MFENCE: [ 0.00 0.00 ]
+Key: MINCPDrm: [ 0.00 0.00 ]
+Key: MINCPDrr: [ 0.00 0.00 ]
+Key: MINCPSrm: [ 0.00 0.00 ]
+Key: MINCPSrr: [ 0.00 0.00 ]
+Key: MINCSDrm: [ 0.00 0.00 ]
+Key: MINCSDrr: [ 0.00 0.00 ]
+Key: MINCSSrm: [ 0.00 0.00 ]
+Key: MINCSSrr: [ 0.00 0.00 ]
+Key: MINPDrm: [ 0.00 0.00 ]
+Key: MINPDrr: [ 0.00 0.00 ]
+Key: MINPSrm: [ 0.00 0.00 ]
+Key: MINPSrr: [ 0.00 0.00 ]
+Key: MINSDrm: [ 0.00 0.00 ]
+Key: MINSDrm_Int: [ 0.00 0.00 ]
+Key: MINSDrr: [ 0.00 0.00 ]
+Key: MINSDrr_Int: [ 0.00 0.00 ]
+Key: MINSSrm: [ 0.00 0.00 ]
+Key: MINSSrm_Int: [ 0.00 0.00 ]
+Key: MINSSrr: [ 0.00 0.00 ]
+Key: MINSSrr_Int: [ 0.00 0.00 ]
+Key: MMX_CVTPD: [ 0.00 0.00 ]
+Key: MMX_CVTPI: [ 0.00 0.00 ]
+Key: MMX_CVTPS: [ 0.00 0.00 ]
+Key: MMX_CVTTPD: [ 0.00 0.00 ]
+Key: MMX_CVTTPS: [ 0.00 0.00 ]
+Key: MMX_EMMS: [ 0.00 0.00 ]
+Key: MMX_MASKMOVQ: [ 0.00 0.00 ]
+Key: MMX_MOVD: [ 0.00 0.00 ]
+Key: MMX_MOVDQ: [ 0.00 0.00 ]
+Key: MMX_MOVFR: [ 0.00 0.00 ]
+Key: MMX_MOVNTQmr: [ 0.00 0.00 ]
+Key: MMX_MOVQ: [ 0.00 0.00 ]
+Key: MMX_PABSBrm: [ 0.00 0.00 ]
+Key: MMX_PABSBrr: [ 0.00 0.00 ]
+Key: MMX_PABSDrm: [ 0.00 0.00 ]
+Key: MMX_PABSDrr: [ 0.00 0.00 ]
+Key: MMX_PABSWrm: [ 0.00 0.00 ]
+Key: MMX_PABSWrr: [ 0.00 0.00 ]
+Key: MMX_PACKSSDWrm: [ 0.00 0.00 ]
+Key: MMX_PACKSSDWrr: [ 0.00 0.00 ]
+Key: MMX_PACKSSWBrm: [ 0.00 0.00 ]
+Key: MMX_PACKSSWBrr: [ 0.00 0.00 ]
+Key: MMX_PACKUSWBrm: [ 0.00 0.00 ]
+Key: MMX_PACKUSWBrr: [ 0.00 0.00 ]
+Key: MMX_PADDBrm: [ 0.00 0.00 ]
+Key: MMX_PADDBrr: [ 0.00 0.00 ]
+Key: MMX_PADDDrm: [ 0.00 0.00 ]
+Key: MMX_PADDDrr: [ 0.00 0.00 ]
+Key: MMX_PADDQrm: [ 0.00 0.00 ]
+Key: MMX_PADDQrr: [ 0.00 0.00 ]
+Key: MMX_PADDSBrm: [ 0.00 0.00 ]
+Key: MMX_PADDSBrr: [ 0.00 0.00 ]
+Key: MMX_PADDSWrm: [ 0.00 0.00 ]
+Key: MMX_PADDSWrr: [ 0.00 0.00 ]
+Key: MMX_PADDUSBrm: [ 0.00 0.00 ]
+Key: MMX_PADDUSBrr: [ 0.00 0.00 ]
+Key: MMX_PADDUSWrm: [ 0.00 0.00 ]
+Key: MMX_PADDUSWrr: [ 0.00 0.00 ]
+Key: MMX_PADDWrm: [ 0.00 0.00 ]
+Key: MMX_PADDWrr: [ 0.00 0.00 ]
+Key: MMX_PALIGNRrmi: [ 0.00 0.00 ]
+Key: MMX_PALIGNRrri: [ 0.00 0.00 ]
+Key: MMX_PANDNrm: [ 0.00 0.00 ]
+Key: MMX_PANDNrr: [ 0.00 0.00 ]
+Key: MMX_PANDrm: [ 0.00 0.00 ]
+Key: MMX_PANDrr: [ 0.00 0.00 ]
+Key: MMX_PAVGBrm: [ 0.00 0.00 ]
+Key: MMX_PAVGBrr: [ 0.00 0.00 ]
+Key: MMX_PAVGWrm: [ 0.00 0.00 ]
+Key: MMX_PAVGWrr: [ 0.00 0.00 ]
+Key: MMX_PCMPEQBrm: [ 0.00 0.00 ]
+Key: MMX_PCMPEQBrr: [ 0.00 0.00 ]
+Key: MMX_PCMPEQDrm: [ 0.00 0.00 ]
+Key: MMX_PCMPEQDrr: [ 0.00 0.00 ]
+Key: MMX_PCMPEQWrm: [ 0.00 0.00 ]
+Key: MMX_PCMPEQWrr: [ 0.00 0.00 ]
+Key: MMX_PCMPGTBrm: [ 0.00 0.00 ]
+Key: MMX_PCMPGTBrr: [ 0.00 0.00 ]
+Key: MMX_PCMPGTDrm: [ 0.00 0.00 ]
+Key: MMX_PCMPGTDrr: [ 0.00 0.00 ]
+Key: MMX_PCMPGTWrm: [ 0.00 0.00 ]
+Key: MMX_PCMPGTWrr: [ 0.00 0.00 ]
+Key: MMX_PEXTRWrri: [ 0.00 0.00 ]
+Key: MMX_PHADDDrm: [ 0.00 0.00 ]
+Key: MMX_PHADDDrr: [ 0.00 0.00 ]
+Key: MMX_PHADDSWrm: [ 0.00 0.00 ]
+Key: MMX_PHADDSWrr: [ 0.00 0.00 ]
+Key: MMX_PHADDWrm: [ 0.00 0.00 ]
+Key: MMX_PHADDWrr: [ 0.00 0.00 ]
+Key: MMX_PHSUBDrm: [ 0.00 0.00 ]
+Key: MMX_PHSUBDrr: [ 0.00 0.00 ]
+Key: MMX_PHSUBSWrm: [ 0.00 0.00 ]
+Key: MMX_PHSUBSWrr: [ 0.00 0.00 ]
+Key: MMX_PHSUBWrm: [ 0.00 0.00 ]
+Key: MMX_PHSUBWrr: [ 0.00 0.00 ]
+Key: MMX_PINSRWrmi: [ 0.00 0.00 ]
+Key: MMX_PINSRWrri: [ 0.00 0.00 ]
+Key: MMX_PMADDUBSWrm: [ 0.00 0.00 ]
+Key: MMX_PMADDUBSWrr: [ 0.00 0.00 ]
+Key: MMX_PMADDWDrm: [ 0.00 0.00 ]
+Key: MMX_PMADDWDrr: [ 0.00 0.00 ]
+Key: MMX_PMAXSWrm: [ 0.00 0.00 ]
+Key: MMX_PMAXSWrr: [ 0.00 0.00 ]
+Key: MMX_PMAXUBrm: [ 0.00 0.00 ]
+Key: MMX_PMAXUBrr: [ 0.00 0.00 ]
+Key: MMX_PMINSWrm: [ 0.00 0.00 ]
+Key: MMX_PMINSWrr: [ 0.00 0.00 ]
+Key: MMX_PMINUBrm: [ 0.00 0.00 ]
+Key: MMX_PMINUBrr: [ 0.00 0.00 ]
+Key: MMX_PMOVMSKBrr: [ 0.00 0.00 ]
+Key: MMX_PMULHRSWrm: [ 0.00 0.00 ]
+Key: MMX_PMULHRSWrr: [ 0.00 0.00 ]
+Key: MMX_PMULHUWrm: [ 0.00 0.00 ]
+Key: MMX_PMULHUWrr: [ 0.00 0.00 ]
+Key: MMX_PMULHWrm: [ 0.00 0.00 ]
+Key: MMX_PMULHWrr: [ 0.00 0.00 ]
+Key: MMX_PMULLWrm: [ 0.00 0.00 ]
+Key: MMX_PMULLWrr: [ 0.00 0.00 ]
+Key: MMX_PMULUDQrm: [ 0.00 0.00 ]
+Key: MMX_PMULUDQrr: [ 0.00 0.00 ]
+Key: MMX_PORrm: [ 0.00 0.00 ]
+Key: MMX_PORrr: [ 0.00 0.00 ]
+Key: MMX_PSADBWrm: [ 0.00 0.00 ]
+Key: MMX_PSADBWrr: [ 0.00 0.00 ]
+Key: MMX_PSHUFBrm: [ 0.00 0.00 ]
+Key: MMX_PSHUFBrr: [ 0.00 0.00 ]
+Key: MMX_PSHUFWmi: [ 0.00 0.00 ]
+Key: MMX_PSHUFWri: [ 0.00 0.00 ]
+Key: MMX_PSIGNBrm: [ 0.00 0.00 ]
+Key: MMX_PSIGNBrr: [ 0.00 0.00 ]
+Key: MMX_PSIGNDrm: [ 0.00 0.00 ]
+Key: MMX_PSIGNDrr: [ 0.00 0.00 ]
+Key: MMX_PSIGNWrm: [ 0.00 0.00 ]
+Key: MMX_PSIGNWrr: [ 0.00 0.00 ]
+Key: MMX_PSLLDri: [ 0.00 0.00 ]
+Key: MMX_PSLLDrm: [ 0.00 0.00 ]
+Key: MMX_PSLLDrr: [ 0.00 0.00 ]
+Key: MMX_PSLLQri: [ 0.00 0.00 ]
+Key: MMX_PSLLQrm: [ 0.00 0.00 ]
+Key: MMX_PSLLQrr: [ 0.00 0.00 ]
+Key: MMX_PSLLWri: [ 0.00 0.00 ]
+Key: MMX_PSLLWrm: [ 0.00 0.00 ]
+Key: MMX_PSLLWrr: [ 0.00 0.00 ]
+Key: MMX_PSRADri: [ 0.00 0.00 ]
+Key: MMX_PSRADrm: [ 0.00 0.00 ]
+Key: MMX_PSRADrr: [ 0.00 0.00 ]
+Key: MMX_PSRAWri: [ 0.00 0.00 ]
+Key: MMX_PSRAWrm: [ 0.00 0.00 ]
+Key: MMX_PSRAWrr: [ 0.00 0.00 ]
+Key: MMX_PSRLDri: [ 0.00 0.00 ]
+Key: MMX_PSRLDrm: [ 0.00 0.00 ]
+Key: MMX_PSRLDrr: [ 0.00 0.00 ]
+Key: MMX_PSRLQri: [ 0.00 0.00 ]
+Key: MMX_PSRLQrm: [ 0.00 0.00 ]
+Key: MMX_PSRLQrr: [ 0.00 0.00 ]
+Key: MMX_PSRLWri: [ 0.00 0.00 ]
+Key: MMX_PSRLWrm: [ 0.00 0.00 ]
+Key: MMX_PSRLWrr: [ 0.00 0.00 ]
+Key: MMX_PSUBBrm: [ 0.00 0.00 ]
+Key: MMX_PSUBBrr: [ 0.00 0.00 ]
+Key: MMX_PSUBDrm: [ 0.00 0.00 ]
+Key: MMX_PSUBDrr: [ 0.00 0.00 ]
+Key: MMX_PSUBQrm: [ 0.00 0.00 ]
+Key: MMX_PSUBQrr: [ 0.00 0.00 ]
+Key: MMX_PSUBSBrm: [ 0.00 0.00 ]
+Key: MMX_PSUBSBrr: [ 0.00 0.00 ]
+Key: MMX_PSUBSWrm: [ 0.00 0.00 ]
+Key: MMX_PSUBSWrr: [ 0.00 0.00 ]
+Key: MMX_PSUBUSBrm: [ 0.00 0.00 ]
+Key: MMX_PSUBUSBrr: [ 0.00 0.00 ]
+Key: MMX_PSUBUSWrm: [ 0.00 0.00 ]
+Key: MMX_PSUBUSWrr: [ 0.00 0.00 ]
+Key: MMX_PSUBWrm: [ 0.00 0.00 ]
+Key: MMX_PSUBWrr: [ 0.00 0.00 ]
+Key: MMX_PUNPCKHBWrm: [ 0.00 0.00 ]
+Key: MMX_PUNPCKHBWrr: [ 0.00 0.00 ]
+Key: MMX_PUNPCKHDQrm: [ 0.00 0.00 ]
+Key: MMX_PUNPCKHDQrr: [ 0.00 0.00 ]
+Key: MMX_PUNPCKHWDrm: [ 0.00 0.00 ]
+Key: MMX_PUNPCKHWDrr: [ 0.00 0.00 ]
+Key: MMX_PUNPCKLBWrm: [ 0.00 0.00 ]
+Key: MMX_PUNPCKLBWrr: [ 0.00 0.00 ]
+Key: MMX_PUNPCKLDQrm: [ 0.00 0.00 ]
+Key: MMX_PUNPCKLDQrr: [ 0.00 0.00 ]
+Key: MMX_PUNPCKLWDrm: [ 0.00 0.00 ]
+Key: MMX_PUNPCKLWDrr: [ 0.00 0.00 ]
+Key: MMX_PXORrm: [ 0.00 0.00 ]
+Key: MMX_PXORrr: [ 0.00 0.00 ]
+Key: MMX_SET: [ 0.00 0.00 ]
+Key: MONITOR: [ 0.00 0.00 ]
+Key: MONITORX: [ 0.00 0.00 ]
+Key: MONTMUL: [ 0.00 0.00 ]
+Key: MORESTACK_RET: [ 0.00 0.00 ]
+Key: MORESTACK_RET_RESTORE_R: [ 0.00 0.00 ]
+Key: MOV: [ 0.00 0.00 ]
+Key: MOVAPDmr: [ 0.00 0.00 ]
+Key: MOVAPDrm: [ 0.00 0.00 ]
+Key: MOVAPDrr: [ 0.00 0.00 ]
+Key: MOVAPDrr_REV: [ 0.00 0.00 ]
+Key: MOVAPSmr: [ 0.00 0.00 ]
+Key: MOVAPSrm: [ 0.00 0.00 ]
+Key: MOVAPSrr: [ 0.00 0.00 ]
+Key: MOVAPSrr_REV: [ 0.00 0.00 ]
+Key: MOVBE: [ 0.00 0.00 ]
+Key: MOVDDUPrm: [ 0.00 0.00 ]
+Key: MOVDDUPrr: [ 0.00 0.00 ]
+Key: MOVDI: [ 0.00 0.00 ]
+Key: MOVDIR: [ 0.00 0.00 ]
+Key: MOVDIRI: [ 0.00 0.00 ]
+Key: MOVDQAmr: [ 0.00 0.00 ]
+Key: MOVDQArm: [ 0.00 0.00 ]
+Key: MOVDQArr: [ 0.00 0.00 ]
+Key: MOVDQArr_REV: [ 0.00 0.00 ]
+Key: MOVDQUmr: [ 0.00 0.00 ]
+Key: MOVDQUrm: [ 0.00 0.00 ]
+Key: MOVDQUrr: [ 0.00 0.00 ]
+Key: MOVDQUrr_REV: [ 0.00 0.00 ]
+Key: MOVHLPSrr: [ 0.00 0.00 ]
+Key: MOVHPDmr: [ 0.00 0.00 ]
+Key: MOVHPDrm: [ 0.00 0.00 ]
+Key: MOVHPSmr: [ 0.00 0.00 ]
+Key: MOVHPSrm: [ 0.00 0.00 ]
+Key: MOVLHPSrr: [ 0.00 0.00 ]
+Key: MOVLPDmr: [ 0.00 0.00 ]
+Key: MOVLPDrm: [ 0.00 0.00 ]
+Key: MOVLPSmr: [ 0.00 0.00 ]
+Key: MOVLPSrm: [ 0.00 0.00 ]
+Key: MOVMSKPDrr: [ 0.00 0.00 ]
+Key: MOVMSKPSrr: [ 0.00 0.00 ]
+Key: MOVNTDQArm: [ 0.00 0.00 ]
+Key: MOVNTDQmr: [ 0.00 0.00 ]
+Key: MOVNTI: [ 0.00 0.00 ]
+Key: MOVNTImr: [ 0.00 0.00 ]
+Key: MOVNTPDmr: [ 0.00 0.00 ]
+Key: MOVNTPSmr: [ 0.00 0.00 ]
+Key: MOVNTSD: [ 0.00 0.00 ]
+Key: MOVNTSS: [ 0.00 0.00 ]
+Key: MOVPC: [ 0.00 0.00 ]
+Key: MOVPDI: [ 0.00 0.00 ]
+Key: MOVPQI: [ 0.00 0.00 ]
+Key: MOVPQIto: [ 0.00 0.00 ]
+Key: MOVQI: [ 0.00 0.00 ]
+Key: MOVRS: [ 0.00 0.00 ]
+Key: MOVSB: [ 0.00 0.00 ]
+Key: MOVSDmr: [ 0.00 0.00 ]
+Key: MOVSDrm: [ 0.00 0.00 ]
+Key: MOVSDrm_alt: [ 0.00 0.00 ]
+Key: MOVSDrr: [ 0.00 0.00 ]
+Key: MOVSDrr_REV: [ 0.00 0.00 ]
+Key: MOVSDto: [ 0.00 0.00 ]
+Key: MOVSHDUPrm: [ 0.00 0.00 ]
+Key: MOVSHDUPrr: [ 0.00 0.00 ]
+Key: MOVSHPmr: [ 0.00 0.00 ]
+Key: MOVSHPrm: [ 0.00 0.00 ]
+Key: MOVSL: [ 0.00 0.00 ]
+Key: MOVSLDUPrm: [ 0.00 0.00 ]
+Key: MOVSLDUPrr: [ 0.00 0.00 ]
+Key: MOVSQ: [ 0.00 0.00 ]
+Key: MOVSS: [ 0.00 0.00 ]
+Key: MOVSSmr: [ 0.00 0.00 ]
+Key: MOVSSrm: [ 0.00 0.00 ]
+Key: MOVSSrm_alt: [ 0.00 0.00 ]
+Key: MOVSSrr: [ 0.00 0.00 ]
+Key: MOVSSrr_REV: [ 0.00 0.00 ]
+Key: MOVSW: [ 0.00 0.00 ]
+Key: MOVSX: [ 0.00 0.00 ]
+Key: MOVUPDmr: [ 0.00 0.00 ]
+Key: MOVUPDrm: [ 0.00 0.00 ]
+Key: MOVUPDrr: [ 0.00 0.00 ]
+Key: MOVUPDrr_REV: [ 0.00 0.00 ]
+Key: MOVUPSmr: [ 0.00 0.00 ]
+Key: MOVUPSrm: [ 0.00 0.00 ]
+Key: MOVUPSrr: [ 0.00 0.00 ]
+Key: MOVUPSrr_REV: [ 0.00 0.00 ]
+Key: MOVZPQILo: [ 0.00 0.00 ]
+Key: MOVZX: [ 0.00 0.00 ]
+Key: MPSADBWrmi: [ 0.00 0.00 ]
+Key: MPSADBWrri: [ 0.00 0.00 ]
+Key: MUL: [ 0.00 0.00 ]
+Key: MULPDrm: [ 0.00 0.00 ]
+Key: MULPDrr: [ 0.00 0.00 ]
+Key: MULPSrm: [ 0.00 0.00 ]
+Key: MULPSrr: [ 0.00 0.00 ]
+Key: MULSDrm: [ 0.00 0.00 ]
+Key: MULSDrm_Int: [ 0.00 0.00 ]
+Key: MULSDrr: [ 0.00 0.00 ]
+Key: MULSDrr_Int: [ 0.00 0.00 ]
+Key: MULSSrm: [ 0.00 0.00 ]
+Key: MULSSrm_Int: [ 0.00 0.00 ]
+Key: MULSSrr: [ 0.00 0.00 ]
+Key: MULSSrr_Int: [ 0.00 0.00 ]
+Key: MULX: [ 0.00 0.00 ]
+Key: MUL_F: [ 0.00 0.00 ]
+Key: MUL_FI: [ 0.00 0.00 ]
+Key: MUL_FPrST: [ 0.00 0.00 ]
+Key: MUL_FST: [ 0.00 0.00 ]
+Key: MUL_Fp: [ 0.00 0.00 ]
+Key: MUL_FpI: [ 0.00 0.00 ]
+Key: MUL_FrST: [ 0.00 0.00 ]
+Key: MWAITX: [ 0.00 0.00 ]
+Key: MWAITX_SAVE_RBX: [ 0.00 0.00 ]
+Key: MWAITXrrr: [ 0.00 0.00 ]
+Key: MWAITrr: [ 0.00 0.00 ]
+Key: NEG: [ 0.00 0.00 ]
+Key: NOOP: [ 0.00 0.00 ]
+Key: NOOPL: [ 0.00 0.00 ]
+Key: NOOPLr: [ 0.00 0.00 ]
+Key: NOOPQ: [ 0.00 0.00 ]
+Key: NOOPQr: [ 0.00 0.00 ]
+Key: NOOPW: [ 0.00 0.00 ]
+Key: NOOPWr: [ 0.00 0.00 ]
+Key: NOT: [ 0.00 0.00 ]
+Key: OR: [ 0.00 0.00 ]
+Key: ORPDrm: [ 0.00 0.00 ]
+Key: ORPDrr: [ 0.00 0.00 ]
+Key: ORPSrm: [ 0.00 0.00 ]
+Key: ORPSrr: [ 0.00 0.00 ]
+Key: OUT: [ 0.00 0.00 ]
+Key: OUTSB: [ 0.00 0.00 ]
+Key: OUTSL: [ 0.00 0.00 ]
+Key: OUTSW: [ 0.00 0.00 ]
+Key: PABSBrm: [ 0.00 0.00 ]
+Key: PABSBrr: [ 0.00 0.00 ]
+Key: PABSDrm: [ 0.00 0.00 ]
+Key: PABSDrr: [ 0.00 0.00 ]
+Key: PABSWrm: [ 0.00 0.00 ]
+Key: PABSWrr: [ 0.00 0.00 ]
+Key: PACKSSDWrm: [ 0.00 0.00 ]
+Key: PACKSSDWrr: [ 0.00 0.00 ]
+Key: PACKSSWBrm: [ 0.00 0.00 ]
+Key: PACKSSWBrr: [ 0.00 0.00 ]
+Key: PACKUSDWrm: [ 0.00 0.00 ]
+Key: PACKUSDWrr: [ 0.00 0.00 ]
+Key: PACKUSWBrm: [ 0.00 0.00 ]
+Key: PACKUSWBrr: [ 0.00 0.00 ]
+Key: PADDBrm: [ 0.00 0.00 ]
+Key: PADDBrr: [ 0.00 0.00 ]
+Key: PADDDrm: [ 0.00 0.00 ]
+Key: PADDDrr: [ 0.00 0.00 ]
+Key: PADDQrm: [ 0.00 0.00 ]
+Key: PADDQrr: [ 0.00 0.00 ]
+Key: PADDSBrm: [ 0.00 0.00 ]
+Key: PADDSBrr: [ 0.00 0.00 ]
+Key: PADDSWrm: [ 0.00 0.00 ]
+Key: PADDSWrr: [ 0.00 0.00 ]
+Key: PADDUSBrm: [ 0.00 0.00 ]
+Key: PADDUSBrr: [ 0.00 0.00 ]
+Key: PADDUSWrm: [ 0.00 0.00 ]
+Key: PADDUSWrr: [ 0.00 0.00 ]
+Key: PADDWrm: [ 0.00 0.00 ]
+Key: PADDWrr: [ 0.00 0.00 ]
+Key: PALIGNRrmi: [ 0.00 0.00 ]
+Key: PALIGNRrri: [ 0.00 0.00 ]
+Key: PANDNrm: [ 0.00 0.00 ]
+Key: PANDNrr: [ 0.00 0.00 ]
+Key: PANDrm: [ 0.00 0.00 ]
+Key: PANDrr: [ 0.00 0.00 ]
+Key: PATCHABLE_EVENT_CALL: [ 0.00 0.00 ]
+Key: PATCHABLE_FUNCTION_ENTER: [ 0.00 0.00 ]
+Key: PATCHABLE_FUNCTION_EXIT: [ 0.00 0.00 ]
+Key: PATCHABLE_OP: [ 0.00 0.00 ]
+Key: PATCHABLE_RET: [ 0.00 0.00 ]
+Key: PATCHABLE_TAIL_CALL: [ 0.00 0.00 ]
+Key: PATCHABLE_TYPED_EVENT_CALL: [ 0.00 0.00 ]
+Key: PATCHPOINT: [ 0.00 0.00 ]
+Key: PAUSE: [ 0.00 0.00 ]
+Key: PAVGBrm: [ 0.00 0.00 ]
+Key: PAVGBrr: [ 0.00 0.00 ]
+Key: PAVGUSBrm: [ 0.00 0.00 ]
+Key: PAVGUSBrr: [ 0.00 0.00 ]
+Key: PAVGWrm: [ 0.00 0.00 ]
+Key: PAVGWrr: [ 0.00 0.00 ]
+Key: PBLENDVBrm: [ 0.00 0.00 ]
+Key: PBLENDVBrr: [ 0.00 0.00 ]
+Key: PBLENDWrmi: [ 0.00 0.00 ]
+Key: PBLENDWrri: [ 0.00 0.00 ]
+Key: PBNDKB: [ 0.00 0.00 ]
+Key: PCLMULQDQrmi: [ 0.00 0.00 ]
+Key: PCLMULQDQrri: [ 0.00 0.00 ]
+Key: PCMPEQBrm: [ 0.00 0.00 ]
+Key: PCMPEQBrr: [ 0.00 0.00 ]
+Key: PCMPEQDrm: [ 0.00 0.00 ]
+Key: PCMPEQDrr: [ 0.00 0.00 ]
+Key: PCMPEQQrm: [ 0.00 0.00 ]
+Key: PCMPEQQrr: [ 0.00 0.00 ]
+Key: PCMPEQWrm: [ 0.00 0.00 ]
+Key: PCMPEQWrr: [ 0.00 0.00 ]
+Key: PCMPESTRIrmi: [ 0.00 0.00 ]
+Key: PCMPESTRIrri: [ 0.00 0.00 ]
+Key: PCMPESTRMrmi: [ 0.00 0.00 ]
+Key: PCMPESTRMrri: [ 0.00 0.00 ]
+Key: PCMPGTBrm: [ 0.00 0.00 ]
+Key: PCMPGTBrr: [ 0.00 0.00 ]
+Key: PCMPGTDrm: [ 0.00 0.00 ]
+Key: PCMPGTDrr: [ 0.00 0.00 ]
+Key: PCMPGTQrm: [ 0.00 0.00 ]
+Key: PCMPGTQrr: [ 0.00 0.00 ]
+Key: PCMPGTWrm: [ 0.00 0.00 ]
+Key: PCMPGTWrr: [ 0.00 0.00 ]
+Key: PCMPISTRIrmi: [ 0.00 0.00 ]
+Key: PCMPISTRIrri: [ 0.00 0.00 ]
+Key: PCMPISTRMrmi: [ 0.00 0.00 ]
+Key: PCMPISTRMrri: [ 0.00 0.00 ]
+Key: PCONFIG: [ 0.00 0.00 ]
+Key: PDEP: [ 0.00 0.00 ]
+Key: PEXT: [ 0.00 0.00 ]
+Key: PEXTRBmri: [ 0.00 0.00 ]
+Key: PEXTRBrri: [ 0.00 0.00 ]
+Key: PEXTRDmri: [ 0.00 0.00 ]
+Key: PEXTRDrri: [ 0.00 0.00 ]
+Key: PEXTRQmri: [ 0.00 0.00 ]
+Key: PEXTRQrri: [ 0.00 0.00 ]
+Key: PEXTRWmri: [ 0.00 0.00 ]
+Key: PEXTRWrri: [ 0.00 0.00 ]
+Key: PEXTRWrri_REV: [ 0.00 0.00 ]
+Key: PF: [ 0.00 0.00 ]
+Key: PFACCrm: [ 0.00 0.00 ]
+Key: PFACCrr: [ 0.00 0.00 ]
+Key: PFADDrm: [ 0.00 0.00 ]
+Key: PFADDrr: [ 0.00 0.00 ]
+Key: PFCMPEQrm: [ 0.00 0.00 ]
+Key: PFCMPEQrr: [ 0.00 0.00 ]
+Key: PFCMPGErm: [ 0.00 0.00 ]
+Key: PFCMPGErr: [ 0.00 0.00 ]
+Key: PFCMPGTrm: [ 0.00 0.00 ]
+Key: PFCMPGTrr: [ 0.00 0.00 ]
+Key: PFMAXrm: [ 0.00 0.00 ]
+Key: PFMAXrr: [ 0.00 0.00 ]
+Key: PFMINrm: [ 0.00 0.00 ]
+Key: PFMINrr: [ 0.00 0.00 ]
+Key: PFMULrm: [ 0.00 0.00 ]
+Key: PFMULrr: [ 0.00 0.00 ]
+Key: PFNACCrm: [ 0.00 0.00 ]
+Key: PFNACCrr: [ 0.00 0.00 ]
+Key: PFPNACCrm: [ 0.00 0.00 ]
+Key: PFPNACCrr: [ 0.00 0.00 ]
+Key: PFRCPIT: [ 0.00 0.00 ]
+Key: PFRCPrm: [ 0.00 0.00 ]
+Key: PFRCPrr: [ 0.00 0.00 ]
+Key: PFRSQIT: [ 0.00 0.00 ]
+Key: PFRSQRTrm: [ 0.00 0.00 ]
+Key: PFRSQRTrr: [ 0.00 0.00 ]
+Key: PFSUBRrm: [ 0.00 0.00 ]
+Key: PFSUBRrr: [ 0.00 0.00 ]
+Key: PFSUBrm: [ 0.00 0.00 ]
+Key: PFSUBrr: [ 0.00 0.00 ]
+Key: PHADDDrm: [ 0.00 0.00 ]
+Key: PHADDDrr: [ 0.00 0.00 ]
+Key: PHADDSWrm: [ 0.00 0.00 ]
+Key: PHADDSWrr: [ 0.00 0.00 ]
+Key: PHADDWrm: [ 0.00 0.00 ]
+Key: PHADDWrr: [ 0.00 0.00 ]
+Key: PHI: [ 0.00 0.00 ]
+Key: PHMINPOSUWrm: [ 0.00 0.00 ]
+Key: PHMINPOSUWrr: [ 0.00 0.00 ]
+Key: PHSUBDrm: [ 0.00 0.00 ]
+Key: PHSUBDrr: [ 0.00 0.00 ]
+Key: PHSUBSWrm: [ 0.00 0.00 ]
+Key: PHSUBSWrr: [ 0.00 0.00 ]
+Key: PHSUBWrm: [ 0.00 0.00 ]
+Key: PHSUBWrr: [ 0.00 0.00 ]
+Key: PI: [ 0.00 0.00 ]
+Key: PINSRBrmi: [ 0.00 0.00 ]
+Key: PINSRBrri: [ 0.00 0.00 ]
+Key: PINSRDrmi: [ 0.00 0.00 ]
+Key: PINSRDrri: [ 0.00 0.00 ]
+Key: PINSRQrmi: [ 0.00 0.00 ]
+Key: PINSRQrri: [ 0.00 0.00 ]
+Key: PINSRWrmi: [ 0.00 0.00 ]
+Key: PINSRWrri: [ 0.00 0.00 ]
+Key: PLDTILECFGV: [ 0.00 0.00 ]
+Key: PLEA: [ 0.00 0.00 ]
+Key: PMADDUBSWrm: [ 0.00 0.00 ]
+Key: PMADDUBSWrr: [ 0.00 0.00 ]
+Key: PMADDWDrm: [ 0.00 0.00 ]
+Key: PMADDWDrr: [ 0.00 0.00 ]
+Key: PMAXSBrm: [ 0.00 0.00 ]
+Key: PMAXSBrr: [ 0.00 0.00 ]
+Key: PMAXSDrm: [ 0.00 0.00 ]
+Key: PMAXSDrr: [ 0.00 0.00 ]
+Key: PMAXSWrm: [ 0.00 0.00 ]
+Key: PMAXSWrr: [ 0.00 0.00 ]
+Key: PMAXUBrm: [ 0.00 0.00 ]
+Key: PMAXUBrr: [ 0.00 0.00 ]
+Key: PMAXUDrm: [ 0.00 0.00 ]
+Key: PMAXUDrr: [ 0.00 0.00 ]
+Key: PMAXUWrm: [ 0.00 0.00 ]
+Key: PMAXUWrr: [ 0.00 0.00 ]
+Key: PMINSBrm: [ 0.00 0.00 ]
+Key: PMINSBrr: [ 0.00 0.00 ]
+Key: PMINSDrm: [ 0.00 0.00 ]
+Key: PMINSDrr: [ 0.00 0.00 ]
+Key: PMINSWrm: [ 0.00 0.00 ]
+Key: PMINSWrr: [ 0.00 0.00 ]
+Key: PMINUBrm: [ 0.00 0.00 ]
+Key: PMINUBrr: [ 0.00 0.00 ]
+Key: PMINUDrm: [ 0.00 0.00 ]
+Key: PMINUDrr: [ 0.00 0.00 ]
+Key: PMINUWrm: [ 0.00 0.00 ]
+Key: PMINUWrr: [ 0.00 0.00 ]
+Key: PMOVMSKBrr: [ 0.00 0.00 ]
+Key: PMOVSXBDrm: [ 0.00 0.00 ]
+Key: PMOVSXBDrr: [ 0.00 0.00 ]
+Key: PMOVSXBQrm: [ 0.00 0.00 ]
+Key: PMOVSXBQrr: [ 0.00 0.00 ]
+Key: PMOVSXBWrm: [ 0.00 0.00 ]
+Key: PMOVSXBWrr: [ 0.00 0.00 ]
+Key: PMOVSXDQrm: [ 0.00 0.00 ]
+Key: PMOVSXDQrr: [ 0.00 0.00 ]
+Key: PMOVSXWDrm: [ 0.00 0.00 ]
+Key: PMOVSXWDrr: [ 0.00 0.00 ]
+Key: PMOVSXWQrm: [ 0.00 0.00 ]
+Key: PMOVSXWQrr: [ 0.00 0.00 ]
+Key: PMOVZXBDrm: [ 0.00 0.00 ]
+Key: PMOVZXBDrr: [ 0.00 0.00 ]
+Key: PMOVZXBQrm: [ 0.00 0.00 ]
+Key: PMOVZXBQrr: [ 0.00 0.00 ]
+Key: PMOVZXBWrm: [ 0.00 0.00 ]
+Key: PMOVZXBWrr: [ 0.00 0.00 ]
+Key: PMOVZXDQrm: [ 0.00 0.00 ]
+Key: PMOVZXDQrr: [ 0.00 0.00 ]
+Key: PMOVZXWDrm: [ 0.00 0.00 ]
+Key: PMOVZXWDrr: [ 0.00 0.00 ]
+Key: PMOVZXWQrm: [ 0.00 0.00 ]
+Key: PMOVZXWQrr: [ 0.00 0.00 ]
+Key: PMULDQrm: [ 0.00 0.00 ]
+Key: PMULDQrr: [ 0.00 0.00 ]
+Key: PMULHRSWrm: [ 0.00 0.00 ]
+Key: PMULHRSWrr: [ 0.00 0.00 ]
+Key: PMULHRWrm: [ 0.00 0.00 ]
+Key: PMULHRWrr: [ 0.00 0.00 ]
+Key: PMULHUWrm: [ 0.00 0.00 ]
+Key: PMULHUWrr: [ 0.00 0.00 ]
+Key: PMULHWrm: [ 0.00 0.00 ]
+Key: PMULHWrr: [ 0.00 0.00 ]
+Key: PMULLDrm: [ 0.00 0.00 ]
+Key: PMULLDrr: [ 0.00 0.00 ]
+Key: PMULLWrm: [ 0.00 0.00 ]
+Key: PMULLWrr: [ 0.00 0.00 ]
+Key: PMULUDQrm: [ 0.00 0.00 ]
+Key: PMULUDQrr: [ 0.00 0.00 ]
+Key: POP: [ 0.00 0.00 ]
+Key: POPA: [ 0.00 0.00 ]
+Key: POPCNT: [ 0.00 0.00 ]
+Key: POPDS: [ 0.00 0.00 ]
+Key: POPES: [ 0.00 0.00 ]
+Key: POPF: [ 0.00 0.00 ]
+Key: POPFS: [ 0.00 0.00 ]
+Key: POPGS: [ 0.00 0.00 ]
+Key: POPP: [ 0.00 0.00 ]
+Key: POPSS: [ 0.00 0.00 ]
+Key: PORrm: [ 0.00 0.00 ]
+Key: PORrr: [ 0.00 0.00 ]
+Key: PREALLOCATED_ARG: [ 0.00 0.00 ]
+Key: PREALLOCATED_SETUP: [ 0.00 0.00 ]
+Key: PREFETCH: [ 0.00 0.00 ]
+Key: PREFETCHIT: [ 0.00 0.00 ]
+Key: PREFETCHNTA: [ 0.00 0.00 ]
+Key: PREFETCHRST: [ 0.00 0.00 ]
+Key: PREFETCHT: [ 0.00 0.00 ]
+Key: PREFETCHW: [ 0.00 0.00 ]
+Key: PREFETCHWT: [ 0.00 0.00 ]
+Key: PROBED_ALLOCA: [ 0.00 0.00 ]
+Key: PSADBWrm: [ 0.00 0.00 ]
+Key: PSADBWrr: [ 0.00 0.00 ]
+Key: PSEUDO_PROBE: [ 0.00 0.00 ]
+Key: PSHUFBrm: [ 0.00 0.00 ]
+Key: PSHUFBrr: [ 0.00 0.00 ]
+Key: PSHUFDmi: [ 0.00 0.00 ]
+Key: PSHUFDri: [ 0.00 0.00 ]
+Key: PSHUFHWmi: [ 0.00 0.00 ]
+Key: PSHUFHWri: [ 0.00 0.00 ]
+Key: PSHUFLWmi: [ 0.00 0.00 ]
+Key: PSHUFLWri: [ 0.00 0.00 ]
+Key: PSIGNBrm: [ 0.00 0.00 ]
+Key: PSIGNBrr: [ 0.00 0.00 ]
+Key: PSIGNDrm: [ 0.00 0.00 ]
+Key: PSIGNDrr: [ 0.00 0.00 ]
+Key: PSIGNWrm: [ 0.00 0.00 ]
+Key: PSIGNWrr: [ 0.00 0.00 ]
+Key: PSLLDQri: [ 0.00 0.00 ]
+Key: PSLLDri: [ 0.00 0.00 ]
+Key: PSLLDrm: [ 0.00 0.00 ]
+Key: PSLLDrr: [ 0.00 0.00 ]
+Key: PSLLQri: [ 0.00 0.00 ]
+Key: PSLLQrm: [ 0.00 0.00 ]
+Key: PSLLQrr: [ 0.00 0.00 ]
+Key: PSLLWri: [ 0.00 0.00 ]
+Key: PSLLWrm: [ 0.00 0.00 ]
+Key: PSLLWrr: [ 0.00 0.00 ]
+Key: PSMASH: [ 0.00 0.00 ]
+Key: PSRADri: [ 0.00 0.00 ]
+Key: PSRADrm: [ 0.00 0.00 ]
+Key: PSRADrr: [ 0.00 0.00 ]
+Key: PSRAWri: [ 0.00 0.00 ]
+Key: PSRAWrm: [ 0.00 0.00 ]
+Key: PSRAWrr: [ 0.00 0.00 ]
+Key: PSRLDQri: [ 0.00 0.00 ]
+Key: PSRLDri: [ 0.00 0.00 ]
+Key: PSRLDrm: [ 0.00 0.00 ]
+Key: PSRLDrr: [ 0.00 0.00 ]
+Key: PSRLQri: [ 0.00 0.00 ]
+Key: PSRLQrm: [ 0.00 0.00 ]
+Key: PSRLQrr: [ 0.00 0.00 ]
+Key: PSRLWri: [ 0.00 0.00 ]
+Key: PSRLWrm: [ 0.00 0.00 ]
+Key: PSRLWrr: [ 0.00 0.00 ]
+Key: PSUBBrm: [ 0.00 0.00 ]
+Key: PSUBBrr: [ 0.00 0.00 ]
+Key: PSUBDrm: [ 0.00 0.00 ]
+Key: PSUBDrr: [ 0.00 0.00 ]
+Key: PSUBQrm: [ 0.00 0.00 ]
+Key: PSUBQrr: [ 0.00 0.00 ]
+Key: PSUBSBrm: [ 0.00 0.00 ]
+Key: PSUBSBrr: [ 0.00 0.00 ]
+Key: PSUBSWrm: [ 0.00 0.00 ]
+Key: PSUBSWrr: [ 0.00 0.00 ]
+Key: PSUBUSBrm: [ 0.00 0.00 ]
+Key: PSUBUSBrr: [ 0.00 0.00 ]
+Key: PSUBUSWrm: [ 0.00 0.00 ]
+Key: PSUBUSWrr: [ 0.00 0.00 ]
+Key: PSUBWrm: [ 0.00 0.00 ]
+Key: PSUBWrr: [ 0.00 0.00 ]
+Key: PSWAPDrm: [ 0.00 0.00 ]
+Key: PSWAPDrr: [ 0.00 0.00 ]
+Key: PT: [ 0.00 0.00 ]
+Key: PTCMMIMFP: [ 0.00 0.00 ]
+Key: PTCMMRLFP: [ 0.00 0.00 ]
+Key: PTCONJTCMMIMFP: [ 0.00 0.00 ]
+Key: PTCONJTFP: [ 0.00 0.00 ]
+Key: PTCVTROWD: [ 0.00 0.00 ]
+Key: PTCVTROWPS: [ 0.00 0.00 ]
+Key: PTDPBF: [ 0.00 0.00 ]
+Key: PTDPBHF: [ 0.00 0.00 ]
+Key: PTDPBSSD: [ 0.00 0.00 ]
+Key: PTDPBSSDV: [ 0.00 0.00 ]
+Key: PTDPBSUD: [ 0.00 0.00 ]
+Key: PTDPBSUDV: [ 0.00 0.00 ]
+Key: PTDPBUSD: [ 0.00 0.00 ]
+Key: PTDPBUSDV: [ 0.00 0.00 ]
+Key: PTDPBUUD: [ 0.00 0.00 ]
+Key: PTDPBUUDV: [ 0.00 0.00 ]
+Key: PTDPFP: [ 0.00 0.00 ]
+Key: PTDPHBF: [ 0.00 0.00 ]
+Key: PTDPHF: [ 0.00 0.00 ]
+Key: PTESTrm: [ 0.00 0.00 ]
+Key: PTESTrr: [ 0.00 0.00 ]
+Key: PTILELOADD: [ 0.00 0.00 ]
+Key: PTILELOADDRS: [ 0.00 0.00 ]
+Key: PTILELOADDRST: [ 0.00 0.00 ]
+Key: PTILELOADDRSV: [ 0.00 0.00 ]
+Key: PTILELOADDT: [ 0.00 0.00 ]
+Key: PTILELOADDV: [ 0.00 0.00 ]
+Key: PTILEMOVROWrre: [ 0.00 0.00 ]
+Key: PTILEMOVROWrreV: [ 0.00 0.00 ]
+Key: PTILEMOVROWrri: [ 0.00 0.00 ]
+Key: PTILEMOVROWrriV: [ 0.00 0.00 ]
+Key: PTILEPAIRLOAD: [ 0.00 0.00 ]
+Key: PTILEPAIRSTORE: [ 0.00 0.00 ]
+Key: PTILESTORED: [ 0.00 0.00 ]
+Key: PTILESTOREDV: [ 0.00 0.00 ]
+Key: PTILEZERO: [ 0.00 0.00 ]
+Key: PTILEZEROV: [ 0.00 0.00 ]
+Key: PTMMULTF: [ 0.00 0.00 ]
+Key: PTTCMMIMFP: [ 0.00 0.00 ]
+Key: PTTCMMRLFP: [ 0.00 0.00 ]
+Key: PTTDPBF: [ 0.00 0.00 ]
+Key: PTTDPFP: [ 0.00 0.00 ]
+Key: PTTMMULTF: [ 0.00 0.00 ]
+Key: PTTRANSPOSED: [ 0.00 0.00 ]
+Key: PTTRANSPOSEDV: [ 0.00 0.00 ]
+Key: PTWRITE: [ 0.00 0.00 ]
+Key: PTWRITEm: [ 0.00 0.00 ]
+Key: PTWRITEr: [ 0.00 0.00 ]
+Key: PUNPCKHBWrm: [ 0.00 0.00 ]
+Key: PUNPCKHBWrr: [ 0.00 0.00 ]
+Key: PUNPCKHDQrm: [ 0.00 0.00 ]
+Key: PUNPCKHDQrr: [ 0.00 0.00 ]
+Key: PUNPCKHQDQrm: [ 0.00 0.00 ]
+Key: PUNPCKHQDQrr: [ 0.00 0.00 ]
+Key: PUNPCKHWDrm: [ 0.00 0.00 ]
+Key: PUNPCKHWDrr: [ 0.00 0.00 ]
+Key: PUNPCKLBWrm: [ 0.00 0.00 ]
+Key: PUNPCKLBWrr: [ 0.00 0.00 ]
+Key: PUNPCKLDQrm: [ 0.00 0.00 ]
+Key: PUNPCKLDQrr: [ 0.00 0.00 ]
+Key: PUNPCKLQDQrm: [ 0.00 0.00 ]
+Key: PUNPCKLQDQrr: [ 0.00 0.00 ]
+Key: PUNPCKLWDrm: [ 0.00 0.00 ]
+Key: PUNPCKLWDrr: [ 0.00 0.00 ]
+Key: PUSH: [ 0.00 0.00 ]
+Key: PUSHA: [ 0.00 0.00 ]
+Key: PUSHCS: [ 0.00 0.00 ]
+Key: PUSHDS: [ 0.00 0.00 ]
+Key: PUSHES: [ 0.00 0.00 ]
+Key: PUSHF: [ 0.00 0.00 ]
+Key: PUSHFS: [ 0.00 0.00 ]
+Key: PUSHGS: [ 0.00 0.00 ]
+Key: PUSHP: [ 0.00 0.00 ]
+Key: PUSHSS: [ 0.00 0.00 ]
+Key: PVALIDATE: [ 0.00 0.00 ]
+Key: PXORrm: [ 0.00 0.00 ]
+Key: PXORrr: [ 0.00 0.00 ]
+Key: RCL: [ 0.00 0.00 ]
+Key: RCPPSm: [ 0.00 0.00 ]
+Key: RCPPSr: [ 0.00 0.00 ]
+Key: RCPSSm: [ 0.00 0.00 ]
+Key: RCPSSm_Int: [ 0.00 0.00 ]
+Key: RCPSSr: [ 0.00 0.00 ]
+Key: RCPSSr_Int: [ 0.00 0.00 ]
+Key: RCR: [ 0.00 0.00 ]
+Key: RDFLAGS: [ 0.00 0.00 ]
+Key: RDFSBASE: [ 0.00 0.00 ]
+Key: RDGSBASE: [ 0.00 0.00 ]
+Key: RDMSR: [ 0.00 0.00 ]
+Key: RDMSRLIST: [ 0.00 0.00 ]
+Key: RDMSRri: [ 0.00 0.00 ]
+Key: RDMSRri_EVEX: [ 0.00 0.00 ]
+Key: RDPID: [ 0.00 0.00 ]
+Key: RDPKRUr: [ 0.00 0.00 ]
+Key: RDPMC: [ 0.00 0.00 ]
+Key: RDPRU: [ 0.00 0.00 ]
+Key: RDRAND: [ 0.00 0.00 ]
+Key: RDSEED: [ 0.00 0.00 ]
+Key: RDSSPD: [ 0.00 0.00 ]
+Key: RDSSPQ: [ 0.00 0.00 ]
+Key: RDTSC: [ 0.00 0.00 ]
+Key: RDTSCP: [ 0.00 0.00 ]
+Key: REG_SEQUENCE: [ 0.00 0.00 ]
+Key: REPNE_PREFIX: [ 0.00 0.00 ]
+Key: REP_MOVSB: [ 0.00 0.00 ]
+Key: REP_MOVSD: [ 0.00 0.00 ]
+Key: REP_MOVSQ: [ 0.00 0.00 ]
+Key: REP_MOVSW: [ 0.00 0.00 ]
+Key: REP_PREFIX: [ 0.00 0.00 ]
+Key: REP_STOSB: [ 0.00 0.00 ]
+Key: REP_STOSD: [ 0.00 0.00 ]
+Key: REP_STOSQ: [ 0.00 0.00 ]
+Key: REP_STOSW: [ 0.00 0.00 ]
+Key: RET: [ 0.00 0.00 ]
+Key: RETI: [ 0.00 0.00 ]
+Key: REX: [ 0.00 0.00 ]
+Key: RMPADJUST: [ 0.00 0.00 ]
+Key: RMPQUERY: [ 0.00 0.00 ]
+Key: RMPUPDATE: [ 0.00 0.00 ]
+Key: ROL: [ 0.00 0.00 ]
+Key: ROR: [ 0.00 0.00 ]
+Key: RORX: [ 0.00 0.00 ]
+Key: ROUNDPDmi: [ 0.00 0.00 ]
+Key: ROUNDPDri: [ 0.00 0.00 ]
+Key: ROUNDPSmi: [ 0.00 0.00 ]
+Key: ROUNDPSri: [ 0.00 0.00 ]
+Key: ROUNDSDmi: [ 0.00 0.00 ]
+Key: ROUNDSDmi_Int: [ 0.00 0.00 ]
+Key: ROUNDSDri: [ 0.00 0.00 ]
+Key: ROUNDSDri_Int: [ 0.00 0.00 ]
+Key: ROUNDSSmi: [ 0.00 0.00 ]
+Key: ROUNDSSmi_Int: [ 0.00 0.00 ]
+Key: ROUNDSSri: [ 0.00 0.00 ]
+Key: ROUNDSSri_Int: [ 0.00 0.00 ]
+Key: RSM: [ 0.00 0.00 ]
+Key: RSQRTPSm: [ 0.00 0.00 ]
+Key: RSQRTPSr: [ 0.00 0.00 ]
+Key: RSQRTSSm: [ 0.00 0.00 ]
+Key: RSQRTSSm_Int: [ 0.00 0.00 ]
+Key: RSQRTSSr: [ 0.00 0.00 ]
+Key: RSQRTSSr_Int: [ 0.00 0.00 ]
+Key: RSTORSSP: [ 0.00 0.00 ]
+Key: SAHF: [ 0.00 0.00 ]
+Key: SALC: [ 0.00 0.00 ]
+Key: SAR: [ 0.00 0.00 ]
+Key: SARX: [ 0.00 0.00 ]
+Key: SAVEPREVSSP: [ 0.00 0.00 ]
+Key: SBB: [ 0.00 0.00 ]
+Key: SCASB: [ 0.00 0.00 ]
+Key: SCASL: [ 0.00 0.00 ]
+Key: SCASQ: [ 0.00 0.00 ]
+Key: SCASW: [ 0.00 0.00 ]
+Key: SEAMCALL: [ 0.00 0.00 ]
+Key: SEAMOPS: [ 0.00 0.00 ]
+Key: SEAMRET: [ 0.00 0.00 ]
+Key: SEG_ALLOCA: [ 0.00 0.00 ]
+Key: SEH_BeginEpilogue: [ 0.00 0.00 ]
+Key: SEH_EndEpilogue: [ 0.00 0.00 ]
+Key: SEH_EndPrologue: [ 0.00 0.00 ]
+Key: SEH_PushFrame: [ 0.00 0.00 ]
+Key: SEH_PushReg: [ 0.00 0.00 ]
+Key: SEH_SaveReg: [ 0.00 0.00 ]
+Key: SEH_SaveXMM: [ 0.00 0.00 ]
+Key: SEH_SetFrame: [ 0.00 0.00 ]
+Key: SEH_StackAlign: [ 0.00 0.00 ]
+Key: SEH_StackAlloc: [ 0.00 0.00 ]
+Key: SEH_UnwindV: [ 0.00 0.00 ]
+Key: SEH_UnwindVersion: [ 0.00 0.00 ]
+Key: SENDUIPI: [ 0.00 0.00 ]
+Key: SERIALIZE: [ 0.00 0.00 ]
+Key: SETB_C: [ 0.00 0.00 ]
+Key: SETCCm: [ 0.00 0.00 ]
+Key: SETCCm_EVEX: [ 0.00 0.00 ]
+Key: SETCCr: [ 0.00 0.00 ]
+Key: SETCCr_EVEX: [ 0.00 0.00 ]
+Key: SETSSBSY: [ 0.00 0.00 ]
+Key: SETZUCCm: [ 0.00 0.00 ]
+Key: SETZUCCr: [ 0.00 0.00 ]
+Key: SFENCE: [ 0.00 0.00 ]
+Key: SGDT: [ 0.00 0.00 ]
+Key: SHA: [ 0.00 0.00 ]
+Key: SHL: [ 0.00 0.00 ]
+Key: SHLD: [ 0.00 0.00 ]
+Key: SHLDROT: [ 0.00 0.00 ]
+Key: SHLX: [ 0.00 0.00 ]
+Key: SHR: [ 0.00 0.00 ]
+Key: SHRD: [ 0.00 0.00 ]
+Key: SHRDROT: [ 0.00 0.00 ]
+Key: SHRX: [ 0.00 0.00 ]
+Key: SHUFPDrmi: [ 0.00 0.00 ]
+Key: SHUFPDrri: [ 0.00 0.00 ]
+Key: SHUFPSrmi: [ 0.00 0.00 ]
+Key: SHUFPSrri: [ 0.00 0.00 ]
+Key: SIDT: [ 0.00 0.00 ]
+Key: SKINIT: [ 0.00 0.00 ]
+Key: SLDT: [ 0.00 0.00 ]
+Key: SLWPCB: [ 0.00 0.00 ]
+Key: SMSW: [ 0.00 0.00 ]
+Key: SQRTPDm: [ 0.00 0.00 ]
+Key: SQRTPDr: [ 0.00 0.00 ]
+Key: SQRTPSm: [ 0.00 0.00 ]
+Key: SQRTPSr: [ 0.00 0.00 ]
+Key: SQRTSDm: [ 0.00 0.00 ]
+Key: SQRTSDm_Int: [ 0.00 0.00 ]
+Key: SQRTSDr: [ 0.00 0.00 ]
+Key: SQRTSDr_Int: [ 0.00 0.00 ]
+Key: SQRTSSm: [ 0.00 0.00 ]
+Key: SQRTSSm_Int: [ 0.00 0.00 ]
+Key: SQRTSSr: [ 0.00 0.00 ]
+Key: SQRTSSr_Int: [ 0.00 0.00 ]
+Key: SQRT_F: [ 0.00 0.00 ]
+Key: SQRT_Fp: [ 0.00 0.00 ]
+Key: SS_PREFIX: [ 0.00 0.00 ]
+Key: STAC: [ 0.00 0.00 ]
+Key: STACKALLOC_W_PROBING: [ 0.00 0.00 ]
+Key: STACKMAP: [ 0.00 0.00 ]
+Key: STATEPOINT: [ 0.00 0.00 ]
+Key: STC: [ 0.00 0.00 ]
+Key: STD: [ 0.00 0.00 ]
+Key: STGI: [ 0.00 0.00 ]
+Key: STI: [ 0.00 0.00 ]
+Key: STMXCSR: [ 0.00 0.00 ]
+Key: STOSB: [ 0.00 0.00 ]
+Key: STOSL: [ 0.00 0.00 ]
+Key: STOSQ: [ 0.00 0.00 ]
+Key: STOSW: [ 0.00 0.00 ]
+Key: STR: [ 0.00 0.00 ]
+Key: STRm: [ 0.00 0.00 ]
+Key: STTILECFG: [ 0.00 0.00 ]
+Key: STTILECFG_EVEX: [ 0.00 0.00 ]
+Key: STUI: [ 0.00 0.00 ]
+Key: ST_F: [ 0.00 0.00 ]
+Key: ST_FP: [ 0.00 0.00 ]
+Key: ST_FPrr: [ 0.00 0.00 ]
+Key: ST_Fp: [ 0.00 0.00 ]
+Key: ST_FpP: [ 0.00 0.00 ]
+Key: ST_Frr: [ 0.00 0.00 ]
+Key: SUB: [ 0.00 0.00 ]
+Key: SUBPDrm: [ 0.00 0.00 ]
+Key: SUBPDrr: [ 0.00 0.00 ]
+Key: SUBPSrm: [ 0.00 0.00 ]
+Key: SUBPSrr: [ 0.00 0.00 ]
+Key: SUBREG_TO_REG: [ 0.00 0.00 ]
+Key: SUBR_F: [ 0.00 0.00 ]
+Key: SUBR_FI: [ 0.00 0.00 ]
+Key: SUBR_FPrST: [ 0.00 0.00 ]
+Key: SUBR_FST: [ 0.00 0.00 ]
+Key: SUBR_Fp: [ 0.00 0.00 ]
+Key: SUBR_FpI: [ 0.00 0.00 ]
+Key: SUBR_FrST: [ 0.00 0.00 ]
+Key: SUBSDrm: [ 0.00 0.00 ]
+Key: SUBSDrm_Int: [ 0.00 0.00 ]
+Key: SUBSDrr: [ 0.00 0.00 ]
+Key: SUBSDrr_Int: [ 0.00 0.00 ]
+Key: SUBSSrm: [ 0.00 0.00 ]
+Key: SUBSSrm_Int: [ 0.00 0.00 ]
+Key: SUBSSrr: [ 0.00 0.00 ]
+Key: SUBSSrr_Int: [ 0.00 0.00 ]
+Key: SUB_F: [ 0.00 0.00 ]
+Key: SUB_FI: [ 0.00 0.00 ]
+Key: SUB_FPrST: [ 0.00 0.00 ]
+Key: SUB_FST: [ 0.00 0.00 ]
+Key: SUB_Fp: [ 0.00 0.00 ]
+Key: SUB_FpI: [ 0.00 0.00 ]
+Key: SUB_FrST: [ 0.00 0.00 ]
+Key: SWAPGS: [ 0.00 0.00 ]
+Key: SYSCALL: [ 0.00 0.00 ]
+Key: SYSENTER: [ 0.00 0.00 ]
+Key: SYSEXIT: [ 0.00 0.00 ]
+Key: SYSRET: [ 0.00 0.00 ]
+Key: T: [ 0.00 0.00 ]
+Key: TAILJMPd: [ 0.00 0.00 ]
+Key: TAILJMPd_CC: [ 0.00 0.00 ]
+Key: TAILJMPm: [ 0.00 0.00 ]
+Key: TAILJMPr: [ 0.00 0.00 ]
+Key: TCMMIMFP: [ 0.00 0.00 ]
+Key: TCMMRLFP: [ 0.00 0.00 ]
+Key: TCONJTCMMIMFP: [ 0.00 0.00 ]
+Key: TCONJTFP: [ 0.00 0.00 ]
+Key: TCRETURN_HIPE: [ 0.00 0.00 ]
+Key: TCRETURN_WIN: [ 0.00 0.00 ]
+Key: TCRETURN_WINmi: [ 0.00 0.00 ]
+Key: TCRETURNdi: [ 0.00 0.00 ]
+Key: TCRETURNdicc: [ 0.00 0.00 ]
+Key: TCRETURNmi: [ 0.00 0.00 ]
+Key: TCRETURNri: [ 0.00 0.00 ]
+Key: TCVTROWD: [ 0.00 0.00 ]
+Key: TCVTROWPS: [ 0.00 0.00 ]
+Key: TDCALL: [ 0.00 0.00 ]
+Key: TDPBF: [ 0.00 0.00 ]
+Key: TDPBHF: [ 0.00 0.00 ]
+Key: TDPBSSD: [ 0.00 0.00 ]
+Key: TDPBSUD: [ 0.00 0.00 ]
+Key: TDPBUSD: [ 0.00 0.00 ]
+Key: TDPBUUD: [ 0.00 0.00 ]
+Key: TDPFP: [ 0.00 0.00 ]
+Key: TDPHBF: [ 0.00 0.00 ]
+Key: TDPHF: [ 0.00 0.00 ]
+Key: TEST: [ 0.00 0.00 ]
+Key: TESTUI: [ 0.00 0.00 ]
+Key: TILELOADD: [ 0.00 0.00 ]
+Key: TILELOADDRS: [ 0.00 0.00 ]
+Key: TILELOADDRST: [ 0.00 0.00 ]
+Key: TILELOADDRS_EVEX: [ 0.00 0.00 ]
+Key: TILELOADDT: [ 0.00 0.00 ]
+Key: TILELOADD_EVEX: [ 0.00 0.00 ]
+Key: TILEMOVROWrre: [ 0.00 0.00 ]
+Key: TILEMOVROWrri: [ 0.00 0.00 ]
+Key: TILERELEASE: [ 0.00 0.00 ]
+Key: TILESTORED: [ 0.00 0.00 ]
+Key: TILESTORED_EVEX: [ 0.00 0.00 ]
+Key: TILEZERO: [ 0.00 0.00 ]
+Key: TLBSYNC: [ 0.00 0.00 ]
+Key: TLSCall: [ 0.00 0.00 ]
+Key: TLS_addr: [ 0.00 0.00 ]
+Key: TLS_addrX: [ 0.00 0.00 ]
+Key: TLS_base_addr: [ 0.00 0.00 ]
+Key: TLS_base_addrX: [ 0.00 0.00 ]
+Key: TLS_desc: [ 0.00 0.00 ]
+Key: TMMULTF: [ 0.00 0.00 ]
+Key: TPAUSE: [ 0.00 0.00 ]
+Key: TRAP: [ 0.00 0.00 ]
+Key: TST_F: [ 0.00 0.00 ]
+Key: TST_Fp: [ 0.00 0.00 ]
+Key: TTCMMIMFP: [ 0.00 0.00 ]
+Key: TTCMMRLFP: [ 0.00 0.00 ]
+Key: TTDPBF: [ 0.00 0.00 ]
+Key: TTDPFP: [ 0.00 0.00 ]
+Key: TTMMULTF: [ 0.00 0.00 ]
+Key: TTRANSPOSED: [ 0.00 0.00 ]
+Key: TZCNT: [ 0.00 0.00 ]
+Key: TZMSK: [ 0.00 0.00 ]
+Key: UBSAN_UD: [ 0.00 0.00 ]
+Key: UCOMISDrm: [ 0.00 0.00 ]
+Key: UCOMISDrm_Int: [ 0.00 0.00 ]
+Key: UCOMISDrr: [ 0.00 0.00 ]
+Key: UCOMISDrr_Int: [ 0.00 0.00 ]
+Key: UCOMISSrm: [ 0.00 0.00 ]
+Key: UCOMISSrm_Int: [ 0.00 0.00 ]
+Key: UCOMISSrr: [ 0.00 0.00 ]
+Key: UCOMISSrr_Int: [ 0.00 0.00 ]
+Key: UCOM_FIPr: [ 0.00 0.00 ]
+Key: UCOM_FIr: [ 0.00 0.00 ]
+Key: UCOM_FPPr: [ 0.00 0.00 ]
+Key: UCOM_FPr: [ 0.00 0.00 ]
+Key: UCOM_FpIr: [ 0.00 0.00 ]
+Key: UCOM_Fpr: [ 0.00 0.00 ]
+Key: UCOM_Fr: [ 0.00 0.00 ]
+Key: UD: [ 0.00 0.00 ]
+Key: UIRET: [ 0.00 0.00 ]
+Key: UMONITOR: [ 0.00 0.00 ]
+Key: UMWAIT: [ 0.00 0.00 ]
+Key: UNPCKHPDrm: [ 0.00 0.00 ]
+Key: UNPCKHPDrr: [ 0.00 0.00 ]
+Key: UNPCKHPSrm: [ 0.00 0.00 ]
+Key: UNPCKHPSrr: [ 0.00 0.00 ]
+Key: UNPCKLPDrm: [ 0.00 0.00 ]
+Key: UNPCKLPDrr: [ 0.00 0.00 ]
+Key: UNPCKLPSrm: [ 0.00 0.00 ]
+Key: UNPCKLPSrr: [ 0.00 0.00 ]
+Key: URDMSRri: [ 0.00 0.00 ]
+Key: URDMSRri_EVEX: [ 0.00 0.00 ]
+Key: URDMSRrr: [ 0.00 0.00 ]
+Key: URDMSRrr_EVEX: [ 0.00 0.00 ]
+Key: UWRMSRir: [ 0.00 0.00 ]
+Key: UWRMSRir_EVEX: [ 0.00 0.00 ]
+Key: UWRMSRrr: [ 0.00 0.00 ]
+Key: UWRMSRrr_EVEX: [ 0.00 0.00 ]
+Key: V: [ 0.00 0.00 ]
+Key: VAARG: [ 0.00 0.00 ]
+Key: VAARG_X: [ 0.00 0.00 ]
+Key: VADDBF: [ 0.00 0.00 ]
+Key: VADDPDYrm: [ 0.00 0.00 ]
+Key: VADDPDYrr: [ 0.00 0.00 ]
+Key: VADDPDZ: [ 0.00 0.00 ]
+Key: VADDPDZrm: [ 0.00 0.00 ]
+Key: VADDPDZrmb: [ 0.00 0.00 ]
+Key: VADDPDZrmbk: [ 0.00 0.00 ]
+Key: VADDPDZrmbkz: [ 0.00 0.00 ]
+Key: VADDPDZrmk: [ 0.00 0.00 ]
+Key: VADDPDZrmkz: [ 0.00 0.00 ]
+Key: VADDPDZrr: [ 0.00 0.00 ]
+Key: VADDPDZrrb: [ 0.00 0.00 ]
+Key: VADDPDZrrbk: [ 0.00 0.00 ]
+Key: VADDPDZrrbkz: [ 0.00 0.00 ]
+Key: VADDPDZrrk: [ 0.00 0.00 ]
+Key: VADDPDZrrkz: [ 0.00 0.00 ]
+Key: VADDPDrm: [ 0.00 0.00 ]
+Key: VADDPDrr: [ 0.00 0.00 ]
+Key: VADDPHZ: [ 0.00 0.00 ]
+Key: VADDPHZrm: [ 0.00 0.00 ]
+Key: VADDPHZrmb: [ 0.00 0.00 ]
+Key: VADDPHZrmbk: [ 0.00 0.00 ]
+Key: VADDPHZrmbkz: [ 0.00 0.00 ]
+Key: VADDPHZrmk: [ 0.00 0.00 ]
+Key: VADDPHZrmkz: [ 0.00 0.00 ]
+Key: VADDPHZrr: [ 0.00 0.00 ]
+Key: VADDPHZrrb: [ 0.00 0.00 ]
+Key: VADDPHZrrbk: [ 0.00 0.00 ]
+Key: VADDPHZrrbkz: [ 0.00 0.00 ]
+Key: VADDPHZrrk: [ 0.00 0.00 ]
+Key: VADDPHZrrkz: [ 0.00 0.00 ]
+Key: VADDPSYrm: [ 0.00 0.00 ]
+Key: VADDPSYrr: [ 0.00 0.00 ]
+Key: VADDPSZ: [ 0.00 0.00 ]
+Key: VADDPSZrm: [ 0.00 0.00 ]
+Key: VADDPSZrmb: [ 0.00 0.00 ]
+Key: VADDPSZrmbk: [ 0.00 0.00 ]
+Key: VADDPSZrmbkz: [ 0.00 0.00 ]
+Key: VADDPSZrmk: [ 0.00 0.00 ]
+Key: VADDPSZrmkz: [ 0.00 0.00 ]
+Key: VADDPSZrr: [ 0.00 0.00 ]
+Key: VADDPSZrrb: [ 0.00 0.00 ]
+Key: VADDPSZrrbk: [ 0.00 0.00 ]
+Key: VADDPSZrrbkz: [ 0.00 0.00 ]
+Key: VADDPSZrrk: [ 0.00 0.00 ]
+Key: VADDPSZrrkz: [ 0.00 0.00 ]
+Key: VADDPSrm: [ 0.00 0.00 ]
+Key: VADDPSrr: [ 0.00 0.00 ]
+Key: VADDSDZrm: [ 0.00 0.00 ]
+Key: VADDSDZrm_Int: [ 0.00 0.00 ]
+Key: VADDSDZrmk_Int: [ 0.00 0.00 ]
+Key: VADDSDZrmkz_Int: [ 0.00 0.00 ]
+Key: VADDSDZrr: [ 0.00 0.00 ]
+Key: VADDSDZrr_Int: [ 0.00 0.00 ]
+Key: VADDSDZrrb_Int: [ 0.00 0.00 ]
+Key: VADDSDZrrbk_Int: [ 0.00 0.00 ]
+Key: VADDSDZrrbkz_Int: [ 0.00 0.00 ]
+Key: VADDSDZrrk_Int: [ 0.00 0.00 ]
+Key: VADDSDZrrkz_Int: [ 0.00 0.00 ]
+Key: VADDSDrm: [ 0.00 0.00 ]
+Key: VADDSDrm_Int: [ 0.00 0.00 ]
+Key: VADDSDrr: [ 0.00 0.00 ]
+Key: VADDSDrr_Int: [ 0.00 0.00 ]
+Key: VADDSHZrm: [ 0.00 0.00 ]
+Key: VADDSHZrm_Int: [ 0.00 0.00 ]
+Key: VADDSHZrmk_Int: [ 0.00 0.00 ]
+Key: VADDSHZrmkz_Int: [ 0.00 0.00 ]
+Key: VADDSHZrr: [ 0.00 0.00 ]
+Key: VADDSHZrr_Int: [ 0.00 0.00 ]
+Key: VADDSHZrrb_Int: [ 0.00 0.00 ]
+Key: VADDSHZrrbk_Int: [ 0.00 0.00 ]
+Key: VADDSHZrrbkz_Int: [ 0.00 0.00 ]
+Key: VADDSHZrrk_Int: [ 0.00 0.00 ]
+Key: VADDSHZrrkz_Int: [ 0.00 0.00 ]
+Key: VADDSSZrm: [ 0.00 0.00 ]
+Key: VADDSSZrm_Int: [ 0.00 0.00 ]
+Key: VADDSSZrmk_Int: [ 0.00 0.00 ]
+Key: VADDSSZrmkz_Int: [ 0.00 0.00 ]
+Key: VADDSSZrr: [ 0.00 0.00 ]
+Key: VADDSSZrr_Int: [ 0.00 0.00 ]
+Key: VADDSSZrrb_Int: [ 0.00 0.00 ]
+Key: VADDSSZrrbk_Int: [ 0.00 0.00 ]
+Key: VADDSSZrrbkz_Int: [ 0.00 0.00 ]
+Key: VADDSSZrrk_Int: [ 0.00 0.00 ]
+Key: VADDSSZrrkz_Int: [ 0.00 0.00 ]
+Key: VADDSSrm: [ 0.00 0.00 ]
+Key: VADDSSrm_Int: [ 0.00 0.00 ]
+Key: VADDSSrr: [ 0.00 0.00 ]
+Key: VADDSSrr_Int: [ 0.00 0.00 ]
+Key: VADDSUBPDYrm: [ 0.00 0.00 ]
+Key: VADDSUBPDYrr: [ 0.00 0.00 ]
+Key: VADDSUBPDrm: [ 0.00 0.00 ]
+Key: VADDSUBPDrr: [ 0.00 0.00 ]
+Key: VADDSUBPSYrm: [ 0.00 0.00 ]
+Key: VADDSUBPSYrr: [ 0.00 0.00 ]
+Key: VADDSUBPSrm: [ 0.00 0.00 ]
+Key: VADDSUBPSrr: [ 0.00 0.00 ]
+Key: VAESDECLASTYrm: [ 0.00 0.00 ]
+Key: VAESDECLASTYrr: [ 0.00 0.00 ]
+Key: VAESDECLASTZ: [ 0.00 0.00 ]
+Key: VAESDECLASTZrm: [ 0.00 0.00 ]
+Key: VAESDECLASTZrr: [ 0.00 0.00 ]
+Key: VAESDECLASTrm: [ 0.00 0.00 ]
+Key: VAESDECLASTrr: [ 0.00 0.00 ]
+Key: VAESDECYrm: [ 0.00 0.00 ]
+Key: VAESDECYrr: [ 0.00 0.00 ]
+Key: VAESDECZ: [ 0.00 0.00 ]
+Key: VAESDECZrm: [ 0.00 0.00 ]
+Key: VAESDECZrr: [ 0.00 0.00 ]
+Key: VAESDECrm: [ 0.00 0.00 ]
+Key: VAESDECrr: [ 0.00 0.00 ]
+Key: VAESENCLASTYrm: [ 0.00 0.00 ]
+Key: VAESENCLASTYrr: [ 0.00 0.00 ]
+Key: VAESENCLASTZ: [ 0.00 0.00 ]
+Key: VAESENCLASTZrm: [ 0.00 0.00 ]
+Key: VAESENCLASTZrr: [ 0.00 0.00 ]
+Key: VAESENCLASTrm: [ 0.00 0.00 ]
+Key: VAESENCLASTrr: [ 0.00 0.00 ]
+Key: VAESENCYrm: [ 0.00 0.00 ]
+Key: VAESENCYrr: [ 0.00 0.00 ]
+Key: VAESENCZ: [ 0.00 0.00 ]
+Key: VAESENCZrm: [ 0.00 0.00 ]
+Key: VAESENCZrr: [ 0.00 0.00 ]
+Key: VAESENCrm: [ 0.00 0.00 ]
+Key: VAESENCrr: [ 0.00 0.00 ]
+Key: VAESIMCrm: [ 0.00 0.00 ]
+Key: VAESIMCrr: [ 0.00 0.00 ]
+Key: VAESKEYGENASSISTrmi: [ 0.00 0.00 ]
+Key: VAESKEYGENASSISTrri: [ 0.00 0.00 ]
+Key: VALIGNDZ: [ 0.00 0.00 ]
+Key: VALIGNDZrmbi: [ 0.00 0.00 ]
+Key: VALIGNDZrmbik: [ 0.00 0.00 ]
+Key: VALIGNDZrmbikz: [ 0.00 0.00 ]
+Key: VALIGNDZrmi: [ 0.00 0.00 ]
+Key: VALIGNDZrmik: [ 0.00 0.00 ]
+Key: VALIGNDZrmikz: [ 0.00 0.00 ]
+Key: VALIGNDZrri: [ 0.00 0.00 ]
+Key: VALIGNDZrrik: [ 0.00 0.00 ]
+Key: VALIGNDZrrikz: [ 0.00 0.00 ]
+Key: VALIGNQZ: [ 0.00 0.00 ]
+Key: VALIGNQZrmbi: [ 0.00 0.00 ]
+Key: VALIGNQZrmbik: [ 0.00 0.00 ]
+Key: VALIGNQZrmbikz: [ 0.00 0.00 ]
+Key: VALIGNQZrmi: [ 0.00 0.00 ]
+Key: VALIGNQZrmik: [ 0.00 0.00 ]
+Key: VALIGNQZrmikz: [ 0.00 0.00 ]
+Key: VALIGNQZrri: [ 0.00 0.00 ]
+Key: VALIGNQZrrik: [ 0.00 0.00 ]
+Key: VALIGNQZrrikz: [ 0.00 0.00 ]
+Key: VANDNPDYrm: [ 0.00 0.00 ]
+Key: VANDNPDYrr: [ 0.00 0.00 ]
+Key: VANDNPDZ: [ 0.00 0.00 ]
+Key: VANDNPDZrm: [ 0.00 0.00 ]
+Key: VANDNPDZrmb: [ 0.00 0.00 ]
+Key: VANDNPDZrmbk: [ 0.00 0.00 ]
+Key: VANDNPDZrmbkz: [ 0.00 0.00 ]
+Key: VANDNPDZrmk: [ 0.00 0.00 ]
+Key: VANDNPDZrmkz: [ 0.00 0.00 ]
+Key: VANDNPDZrr: [ 0.00 0.00 ]
+Key: VANDNPDZrrk: [ 0.00 0.00 ]
+Key: VANDNPDZrrkz: [ 0.00 0.00 ]
+Key: VANDNPDrm: [ 0.00 0.00 ]
+Key: VANDNPDrr: [ 0.00 0.00 ]
+Key: VANDNPSYrm: [ 0.00 0.00 ]
+Key: VANDNPSYrr: [ 0.00 0.00 ]
+Key: VANDNPSZ: [ 0.00 0.00 ]
+Key: VANDNPSZrm: [ 0.00 0.00 ]
+Key: VANDNPSZrmb: [ 0.00 0.00 ]
+Key: VANDNPSZrmbk: [ 0.00 0.00 ]
+Key: VANDNPSZrmbkz: [ 0.00 0.00 ]
+Key: VANDNPSZrmk: [ 0.00 0.00 ]
+Key: VANDNPSZrmkz: [ 0.00 0.00 ]
+Key: VANDNPSZrr: [ 0.00 0.00 ]
+Key: VANDNPSZrrk: [ 0.00 0.00 ]
+Key: VANDNPSZrrkz: [ 0.00 0.00 ]
+Key: VANDNPSrm: [ 0.00 0.00 ]
+Key: VANDNPSrr: [ 0.00 0.00 ]
+Key: VANDPDYrm: [ 0.00 0.00 ]
+Key: VANDPDYrr: [ 0.00 0.00 ]
+Key: VANDPDZ: [ 0.00 0.00 ]
+Key: VANDPDZrm: [ 0.00 0.00 ]
+Key: VANDPDZrmb: [ 0.00 0.00 ]
+Key: VANDPDZrmbk: [ 0.00 0.00 ]
+Key: VANDPDZrmbkz: [ 0.00 0.00 ]
+Key: VANDPDZrmk: [ 0.00 0.00 ]
+Key: VANDPDZrmkz: [ 0.00 0.00 ]
+Key: VANDPDZrr: [ 0.00 0.00 ]
+Key: VANDPDZrrk: [ 0.00 0.00 ]
+Key: VANDPDZrrkz: [ 0.00 0.00 ]
+Key: VANDPDrm: [ 0.00 0.00 ]
+Key: VANDPDrr: [ 0.00 0.00 ]
+Key: VANDPSYrm: [ 0.00 0.00 ]
+Key: VANDPSYrr: [ 0.00 0.00 ]
+Key: VANDPSZ: [ 0.00 0.00 ]
+Key: VANDPSZrm: [ 0.00 0.00 ]
+Key: VANDPSZrmb: [ 0.00 0.00 ]
+Key: VANDPSZrmbk: [ 0.00 0.00 ]
+Key: VANDPSZrmbkz: [ 0.00 0.00 ]
+Key: VANDPSZrmk: [ 0.00 0.00 ]
+Key: VANDPSZrmkz: [ 0.00 0.00 ]
+Key: VANDPSZrr: [ 0.00 0.00 ]
+Key: VANDPSZrrk: [ 0.00 0.00 ]
+Key: VANDPSZrrkz: [ 0.00 0.00 ]
+Key: VANDPSrm: [ 0.00 0.00 ]
+Key: VANDPSrr: [ 0.00 0.00 ]
+Key: VASTART_SAVE_XMM_REGS: [ 0.00 0.00 ]
+Key: VBCSTNEBF: [ 0.00 0.00 ]
+Key: VBCSTNESH: [ 0.00 0.00 ]
+Key: VBLENDMPDZ: [ 0.00 0.00 ]
+Key: VBLENDMPDZrm: [ 0.00 0.00 ]
+Key: VBLENDMPDZrmb: [ 0.00 0.00 ]
+Key: VBLENDMPDZrmbk: [ 0.00 0.00 ]
+Key: VBLENDMPDZrmbkz: [ 0.00 0.00 ]
+Key: VBLENDMPDZrmk: [ 0.00 0.00 ]
+Key: VBLENDMPDZrmkz: [ 0.00 0.00 ]
+Key: VBLENDMPDZrr: [ 0.00 0.00 ]
+Key: VBLENDMPDZrrk: [ 0.00 0.00 ]
+Key: VBLENDMPDZrrkz: [ 0.00 0.00 ]
+Key: VBLENDMPSZ: [ 0.00 0.00 ]
+Key: VBLENDMPSZrm: [ 0.00 0.00 ]
+Key: VBLENDMPSZrmb: [ 0.00 0.00 ]
+Key: VBLENDMPSZrmbk: [ 0.00 0.00 ]
+Key: VBLENDMPSZrmbkz: [ 0.00 0.00 ]
+Key: VBLENDMPSZrmk: [ 0.00 0.00 ]
+Key: VBLENDMPSZrmkz: [ 0.00 0.00 ]
+Key: VBLENDMPSZrr: [ 0.00 0.00 ]
+Key: VBLENDMPSZrrk: [ 0.00 0.00 ]
+Key: VBLENDMPSZrrkz: [ 0.00 0.00 ]
+Key: VBLENDPDYrmi: [ 0.00 0.00 ]
+Key: VBLENDPDYrri: [ 0.00 0.00 ]
+Key: VBLENDPDrmi: [ 0.00 0.00 ]
+Key: VBLENDPDrri: [ 0.00 0.00 ]
+Key: VBLENDPSYrmi: [ 0.00 0.00 ]
+Key: VBLENDPSYrri: [ 0.00 0.00 ]
+Key: VBLENDPSrmi: [ 0.00 0.00 ]
+Key: VBLENDPSrri: [ 0.00 0.00 ]
+Key: VBLENDVPDYrmr: [ 0.00 0.00 ]
+Key: VBLENDVPDYrrr: [ 0.00 0.00 ]
+Key: VBLENDVPDrmr: [ 0.00 0.00 ]
+Key: VBLENDVPDrrr: [ 0.00 0.00 ]
+Key: VBLENDVPSYrmr: [ 0.00 0.00 ]
+Key: VBLENDVPSYrrr: [ 0.00 0.00 ]
+Key: VBLENDVPSrmr: [ 0.00 0.00 ]
+Key: VBLENDVPSrrr: [ 0.00 0.00 ]
+Key: VBROADCASTF: [ 0.00 0.00 ]
+Key: VBROADCASTI: [ 0.00 0.00 ]
+Key: VBROADCASTSDYrm: [ 0.00 0.00 ]
+Key: VBROADCASTSDYrr: [ 0.00 0.00 ]
+Key: VBROADCASTSDZ: [ 0.00 0.00 ]
+Key: VBROADCASTSDZrm: [ 0.00 0.00 ]
+Key: VBROADCASTSDZrmk: [ 0.00 0.00 ]
+Key: VBROADCASTSDZrmkz: [ 0.00 0.00 ]
+Key: VBROADCASTSDZrr: [ 0.00 0.00 ]
+Key: VBROADCASTSDZrrk: [ 0.00 0.00 ]
+Key: VBROADCASTSDZrrkz: [ 0.00 0.00 ]
+Key: VBROADCASTSSYrm: [ 0.00 0.00 ]
+Key: VBROADCASTSSYrr: [ 0.00 0.00 ]
+Key: VBROADCASTSSZ: [ 0.00 0.00 ]
+Key: VBROADCASTSSZrm: [ 0.00 0.00 ]
+Key: VBROADCASTSSZrmk: [ 0.00 0.00 ]
+Key: VBROADCASTSSZrmkz: [ 0.00 0.00 ]
+Key: VBROADCASTSSZrr: [ 0.00 0.00 ]
+Key: VBROADCASTSSZrrk: [ 0.00 0.00 ]
+Key: VBROADCASTSSZrrkz: [ 0.00 0.00 ]
+Key: VBROADCASTSSrm: [ 0.00 0.00 ]
+Key: VBROADCASTSSrr: [ 0.00 0.00 ]
+Key: VCMPBF: [ 0.00 0.00 ]
+Key: VCMPPDYrmi: [ 0.00 0.00 ]
+Key: VCMPPDYrri: [ 0.00 0.00 ]
+Key: VCMPPDZ: [ 0.00 0.00 ]
+Key: VCMPPDZrmbi: [ 0.00 0.00 ]
+Key: VCMPPDZrmbik: [ 0.00 0.00 ]
+Key: VCMPPDZrmi: [ 0.00 0.00 ]
+Key: VCMPPDZrmik: [ 0.00 0.00 ]
+Key: VCMPPDZrri: [ 0.00 0.00 ]
+Key: VCMPPDZrrib: [ 0.00 0.00 ]
+Key: VCMPPDZrribk: [ 0.00 0.00 ]
+Key: VCMPPDZrrik: [ 0.00 0.00 ]
+Key: VCMPPDrmi: [ 0.00 0.00 ]
+Key: VCMPPDrri: [ 0.00 0.00 ]
+Key: VCMPPHZ: [ 0.00 0.00 ]
+Key: VCMPPHZrmbi: [ 0.00 0.00 ]
+Key: VCMPPHZrmbik: [ 0.00 0.00 ]
+Key: VCMPPHZrmi: [ 0.00 0.00 ]
+Key: VCMPPHZrmik: [ 0.00 0.00 ]
+Key: VCMPPHZrri: [ 0.00 0.00 ]
+Key: VCMPPHZrrib: [ 0.00 0.00 ]
+Key: VCMPPHZrribk: [ 0.00 0.00 ]
+Key: VCMPPHZrrik: [ 0.00 0.00 ]
+Key: VCMPPSYrmi: [ 0.00 0.00 ]
+Key: VCMPPSYrri: [ 0.00 0.00 ]
+Key: VCMPPSZ: [ 0.00 0.00 ]
+Key: VCMPPSZrmbi: [ 0.00 0.00 ]
+Key: VCMPPSZrmbik: [ 0.00 0.00 ]
+Key: VCMPPSZrmi: [ 0.00 0.00 ]
+Key: VCMPPSZrmik: [ 0.00 0.00 ]
+Key: VCMPPSZrri: [ 0.00 0.00 ]
+Key: VCMPPSZrrib: [ 0.00 0.00 ]
+Key: VCMPPSZrribk: [ 0.00 0.00 ]
+Key: VCMPPSZrrik: [ 0.00 0.00 ]
+Key: VCMPPSrmi: [ 0.00 0.00 ]
+Key: VCMPPSrri: [ 0.00 0.00 ]
+Key: VCMPSDZrmi: [ 0.00 0.00 ]
+Key: VCMPSDZrmi_Int: [ 0.00 0.00 ]
+Key: VCMPSDZrmik_Int: [ 0.00 0.00 ]
+Key: VCMPSDZrri: [ 0.00 0.00 ]
+Key: VCMPSDZrri_Int: [ 0.00 0.00 ]
+Key: VCMPSDZrrib_Int: [ 0.00 0.00 ]
+Key: VCMPSDZrribk_Int: [ 0.00 0.00 ]
+Key: VCMPSDZrrik_Int: [ 0.00 0.00 ]
+Key: VCMPSDrmi: [ 0.00 0.00 ]
+Key: VCMPSDrmi_Int: [ 0.00 0.00 ]
+Key: VCMPSDrri: [ 0.00 0.00 ]
+Key: VCMPSDrri_Int: [ 0.00 0.00 ]
+Key: VCMPSHZrmi: [ 0.00 0.00 ]
+Key: VCMPSHZrmi_Int: [ 0.00 0.00 ]
+Key: VCMPSHZrmik_Int: [ 0.00 0.00 ]
+Key: VCMPSHZrri: [ 0.00 0.00 ]
+Key: VCMPSHZrri_Int: [ 0.00 0.00 ]
+Key: VCMPSHZrrib_Int: [ 0.00 0.00 ]
+Key: VCMPSHZrribk_Int: [ 0.00 0.00 ]
+Key: VCMPSHZrrik_Int: [ 0.00 0.00 ]
+Key: VCMPSSZrmi: [ 0.00 0.00 ]
+Key: VCMPSSZrmi_Int: [ 0.00 0.00 ]
+Key: VCMPSSZrmik_Int: [ 0.00 0.00 ]
+Key: VCMPSSZrri: [ 0.00 0.00 ]
+Key: VCMPSSZrri_Int: [ 0.00 0.00 ]
+Key: VCMPSSZrrib_Int: [ 0.00 0.00 ]
+Key: VCMPSSZrribk_Int: [ 0.00 0.00 ]
+Key: VCMPSSZrrik_Int: [ 0.00 0.00 ]
+Key: VCMPSSrmi: [ 0.00 0.00 ]
+Key: VCMPSSrmi_Int: [ 0.00 0.00 ]
+Key: VCMPSSrri: [ 0.00 0.00 ]
+Key: VCMPSSrri_Int: [ 0.00 0.00 ]
+Key: VCOMISBF: [ 0.00 0.00 ]
+Key: VCOMISDZrm: [ 0.00 0.00 ]
+Key: VCOMISDZrm_Int: [ 0.00 0.00 ]
+Key: VCOMISDZrr: [ 0.00 0.00 ]
+Key: VCOMISDZrr_Int: [ 0.00 0.00 ]
+Key: VCOMISDZrrb: [ 0.00 0.00 ]
+Key: VCOMISDrm: [ 0.00 0.00 ]
+Key: VCOMISDrm_Int: [ 0.00 0.00 ]
+Key: VCOMISDrr: [ 0.00 0.00 ]
+Key: VCOMISDrr_Int: [ 0.00 0.00 ]
+Key: VCOMISHZrm: [ 0.00 0.00 ]
+Key: VCOMISHZrm_Int: [ 0.00 0.00 ]
+Key: VCOMISHZrr: [ 0.00 0.00 ]
+Key: VCOMISHZrr_Int: [ 0.00 0.00 ]
+Key: VCOMISHZrrb: [ 0.00 0.00 ]
+Key: VCOMISSZrm: [ 0.00 0.00 ]
+Key: VCOMISSZrm_Int: [ 0.00 0.00 ]
+Key: VCOMISSZrr: [ 0.00 0.00 ]
+Key: VCOMISSZrr_Int: [ 0.00 0.00 ]
+Key: VCOMISSZrrb: [ 0.00 0.00 ]
+Key: VCOMISSrm: [ 0.00 0.00 ]
+Key: VCOMISSrm_Int: [ 0.00 0.00 ]
+Key: VCOMISSrr: [ 0.00 0.00 ]
+Key: VCOMISSrr_Int: [ 0.00 0.00 ]
+Key: VCOMPRESSPDZ: [ 0.00 0.00 ]
+Key: VCOMPRESSPDZmr: [ 0.00 0.00 ]
+Key: VCOMPRESSPDZmrk: [ 0.00 0.00 ]
+Key: VCOMPRESSPDZrr: [ 0.00 0.00 ]
+Key: VCOMPRESSPDZrrk: [ 0.00 0.00 ]
+Key: VCOMPRESSPDZrrkz: [ 0.00 0.00 ]
+Key: VCOMPRESSPSZ: [ 0.00 0.00 ]
+Key: VCOMPRESSPSZmr: [ 0.00 0.00 ]
+Key: VCOMPRESSPSZmrk: [ 0.00 0.00 ]
+Key: VCOMPRESSPSZrr: [ 0.00 0.00 ]
+Key: VCOMPRESSPSZrrk: [ 0.00 0.00 ]
+Key: VCOMPRESSPSZrrkz: [ 0.00 0.00 ]
+Key: VCOMXSDZrm_Int: [ 0.00 0.00 ]
+Key: VCOMXSDZrr_Int: [ 0.00 0.00 ]
+Key: VCOMXSDZrrb_Int: [ 0.00 0.00 ]
+Key: VCOMXSHZrm_Int: [ 0.00 0.00 ]
+Key: VCOMXSHZrr_Int: [ 0.00 0.00 ]
+Key: VCOMXSHZrrb_Int: [ 0.00 0.00 ]
+Key: VCOMXSSZrm_Int: [ 0.00 0.00 ]
+Key: VCOMXSSZrr_Int: [ 0.00 0.00 ]
+Key: VCOMXSSZrrb_Int: [ 0.00 0.00 ]
+Key: VCVT: [ 0.00 0.00 ]
+Key: VCVTBF: [ 0.00 0.00 ]
+Key: VCVTBIASPH: [ 0.00 0.00 ]
+Key: VCVTDQ: [ 0.00 0.00 ]
+Key: VCVTHF: [ 0.00 0.00 ]
+Key: VCVTNE: [ 0.00 0.00 ]
+Key: VCVTNEEBF: [ 0.00 0.00 ]
+Key: VCVTNEEPH: [ 0.00 0.00 ]
+Key: VCVTNEOBF: [ 0.00 0.00 ]
+Key: VCVTNEOPH: [ 0.00 0.00 ]
+Key: VCVTNEPS: [ 0.00 0.00 ]
+Key: VCVTPD: [ 0.00 0.00 ]
+Key: VCVTPH: [ 0.00 0.00 ]
+Key: VCVTPS: [ 0.00 0.00 ]
+Key: VCVTQQ: [ 0.00 0.00 ]
+Key: VCVTSD: [ 0.00 0.00 ]
+Key: VCVTSH: [ 0.00 0.00 ]
+Key: VCVTSI: [ 0.00 0.00 ]
+Key: VCVTSS: [ 0.00 0.00 ]
+Key: VCVTTBF: [ 0.00 0.00 ]
+Key: VCVTTPD: [ 0.00 0.00 ]
+Key: VCVTTPH: [ 0.00 0.00 ]
+Key: VCVTTPS: [ 0.00 0.00 ]
+Key: VCVTTSD: [ 0.00 0.00 ]
+Key: VCVTTSH: [ 0.00 0.00 ]
+Key: VCVTTSS: [ 0.00 0.00 ]
+Key: VCVTUDQ: [ 0.00 0.00 ]
+Key: VCVTUQQ: [ 0.00 0.00 ]
+Key: VCVTUSI: [ 0.00 0.00 ]
+Key: VCVTUW: [ 0.00 0.00 ]
+Key: VCVTW: [ 0.00 0.00 ]
+Key: VDBPSADBWZ: [ 0.00 0.00 ]
+Key: VDBPSADBWZrmi: [ 0.00 0.00 ]
+Key: VDBPSADBWZrmik: [ 0.00 0.00 ]
+Key: VDBPSADBWZrmikz: [ 0.00 0.00 ]
+Key: VDBPSADBWZrri: [ 0.00 0.00 ]
+Key: VDBPSADBWZrrik: [ 0.00 0.00 ]
+Key: VDBPSADBWZrrikz: [ 0.00 0.00 ]
+Key: VDIVBF: [ 0.00 0.00 ]
+Key: VDIVPDYrm: [ 0.00 0.00 ]
+Key: VDIVPDYrr: [ 0.00 0.00 ]
+Key: VDIVPDZ: [ 0.00 0.00 ]
+Key: VDIVPDZrm: [ 0.00 0.00 ]
+Key: VDIVPDZrmb: [ 0.00 0.00 ]
+Key: VDIVPDZrmbk: [ 0.00 0.00 ]
+Key: VDIVPDZrmbkz: [ 0.00 0.00 ]
+Key: VDIVPDZrmk: [ 0.00 0.00 ]
+Key: VDIVPDZrmkz: [ 0.00 0.00 ]
+Key: VDIVPDZrr: [ 0.00 0.00 ]
+Key: VDIVPDZrrb: [ 0.00 0.00 ]
+Key: VDIVPDZrrbk: [ 0.00 0.00 ]
+Key: VDIVPDZrrbkz: [ 0.00 0.00 ]
+Key: VDIVPDZrrk: [ 0.00 0.00 ]
+Key: VDIVPDZrrkz: [ 0.00 0.00 ]
+Key: VDIVPDrm: [ 0.00 0.00 ]
+Key: VDIVPDrr: [ 0.00 0.00 ]
+Key: VDIVPHZ: [ 0.00 0.00 ]
+Key: VDIVPHZrm: [ 0.00 0.00 ]
+Key: VDIVPHZrmb: [ 0.00 0.00 ]
+Key: VDIVPHZrmbk: [ 0.00 0.00 ]
+Key: VDIVPHZrmbkz: [ 0.00 0.00 ]
+Key: VDIVPHZrmk: [ 0.00 0.00 ]
+Key: VDIVPHZrmkz: [ 0.00 0.00 ]
+Key: VDIVPHZrr: [ 0.00 0.00 ]
+Key: VDIVPHZrrb: [ 0.00 0.00 ]
+Key: VDIVPHZrrbk: [ 0.00 0.00 ]
+Key: VDIVPHZrrbkz: [ 0.00 0.00 ]
+Key: VDIVPHZrrk: [ 0.00 0.00 ]
+Key: VDIVPHZrrkz: [ 0.00 0.00 ]
+Key: VDIVPSYrm: [ 0.00 0.00 ]
+Key: VDIVPSYrr: [ 0.00 0.00 ]
+Key: VDIVPSZ: [ 0.00 0.00 ]
+Key: VDIVPSZrm: [ 0.00 0.00 ]
+Key: VDIVPSZrmb: [ 0.00 0.00 ]
+Key: VDIVPSZrmbk: [ 0.00 0.00 ]
+Key: VDIVPSZrmbkz: [ 0.00 0.00 ]
+Key: VDIVPSZrmk: [ 0.00 0.00 ]
+Key: VDIVPSZrmkz: [ 0.00 0.00 ]
+Key: VDIVPSZrr: [ 0.00 0.00 ]
+Key: VDIVPSZrrb: [ 0.00 0.00 ]
+Key: VDIVPSZrrbk: [ 0.00 0.00 ]
+Key: VDIVPSZrrbkz: [ 0.00 0.00 ]
+Key: VDIVPSZrrk: [ 0.00 0.00 ]
+Key: VDIVPSZrrkz: [ 0.00 0.00 ]
+Key: VDIVPSrm: [ 0.00 0.00 ]
+Key: VDIVPSrr: [ 0.00 0.00 ]
+Key: VDIVSDZrm: [ 0.00 0.00 ]
+Key: VDIVSDZrm_Int: [ 0.00 0.00 ]
+Key: VDIVSDZrmk_Int: [ 0.00 0.00 ]
+Key: VDIVSDZrmkz_Int: [ 0.00 0.00 ]
+Key: VDIVSDZrr: [ 0.00 0.00 ]
+Key: VDIVSDZrr_Int: [ 0.00 0.00 ]
+Key: VDIVSDZrrb_Int: [ 0.00 0.00 ]
+Key: VDIVSDZrrbk_Int: [ 0.00 0.00 ]
+Key: VDIVSDZrrbkz_Int: [ 0.00 0.00 ]
+Key: VDIVSDZrrk_Int: [ 0.00 0.00 ]
+Key: VDIVSDZrrkz_Int: [ 0.00 0.00 ]
+Key: VDIVSDrm: [ 0.00 0.00 ]
+Key: VDIVSDrm_Int: [ 0.00 0.00 ]
+Key: VDIVSDrr: [ 0.00 0.00 ]
+Key: VDIVSDrr_Int: [ 0.00 0.00 ]
+Key: VDIVSHZrm: [ 0.00 0.00 ]
+Key: VDIVSHZrm_Int: [ 0.00 0.00 ]
+Key: VDIVSHZrmk_Int: [ 0.00 0.00 ]
+Key: VDIVSHZrmkz_Int: [ 0.00 0.00 ]
+Key: VDIVSHZrr: [ 0.00 0.00 ]
+Key: VDIVSHZrr_Int: [ 0.00 0.00 ]
+Key: VDIVSHZrrb_Int: [ 0.00 0.00 ]
+Key: VDIVSHZrrbk_Int: [ 0.00 0.00 ]
+Key: VDIVSHZrrbkz_Int: [ 0.00 0.00 ]
+Key: VDIVSHZrrk_Int: [ 0.00 0.00 ]
+Key: VDIVSHZrrkz_Int: [ 0.00 0.00 ]
+Key: VDIVSSZrm: [ 0.00 0.00 ]
+Key: VDIVSSZrm_Int: [ 0.00 0.00 ]
+Key: VDIVSSZrmk_Int: [ 0.00 0.00 ]
+Key: VDIVSSZrmkz_Int: [ 0.00 0.00 ]
+Key: VDIVSSZrr: [ 0.00 0.00 ]
+Key: VDIVSSZrr_Int: [ 0.00 0.00 ]
+Key: VDIVSSZrrb_Int: [ 0.00 0.00 ]
+Key: VDIVSSZrrbk_Int: [ 0.00 0.00 ]
+Key: VDIVSSZrrbkz_Int: [ 0.00 0.00 ]
+Key: VDIVSSZrrk_Int: [ 0.00 0.00 ]
+Key: VDIVSSZrrkz_Int: [ 0.00 0.00 ]
+Key: VDIVSSrm: [ 0.00 0.00 ]
+Key: VDIVSSrm_Int: [ 0.00 0.00 ]
+Key: VDIVSSrr: [ 0.00 0.00 ]
+Key: VDIVSSrr_Int: [ 0.00 0.00 ]
+Key: VDPBF: [ 0.00 0.00 ]
+Key: VDPPDrmi: [ 0.00 0.00 ]
+Key: VDPPDrri: [ 0.00 0.00 ]
+Key: VDPPHPSZ: [ 0.00 0.00 ]
+Key: VDPPHPSZm: [ 0.00 0.00 ]
+Key: VDPPHPSZmb: [ 0.00 0.00 ]
+Key: VDPPHPSZmbk: [ 0.00 0.00 ]
+Key: VDPPHPSZmbkz: [ 0.00 0.00 ]
+Key: VDPPHPSZmk: [ 0.00 0.00 ]
+Key: VDPPHPSZmkz: [ 0.00 0.00 ]
+Key: VDPPHPSZr: [ 0.00 0.00 ]
+Key: VDPPHPSZrk: [ 0.00 0.00 ]
+Key: VDPPHPSZrkz: [ 0.00 0.00 ]
+Key: VDPPSYrmi: [ 0.00 0.00 ]
+Key: VDPPSYrri: [ 0.00 0.00 ]
+Key: VDPPSrmi: [ 0.00 0.00 ]
+Key: VDPPSrri: [ 0.00 0.00 ]
+Key: VERRm: [ 0.00 0.00 ]
+Key: VERRr: [ 0.00 0.00 ]
+Key: VERWm: [ 0.00 0.00 ]
+Key: VERWr: [ 0.00 0.00 ]
+Key: VEXP: [ 0.00 0.00 ]
+Key: VEXPANDPDZ: [ 0.00 0.00 ]
+Key: VEXPANDPDZrm: [ 0.00 0.00 ]
+Key: VEXPANDPDZrmk: [ 0.00 0.00 ]
+Key: VEXPANDPDZrmkz: [ 0.00 0.00 ]
+Key: VEXPANDPDZrr: [ 0.00 0.00 ]
+Key: VEXPANDPDZrrk: [ 0.00 0.00 ]
+Key: VEXPANDPDZrrkz: [ 0.00 0.00 ]
+Key: VEXPANDPSZ: [ 0.00 0.00 ]
+Key: VEXPANDPSZrm: [ 0.00 0.00 ]
+Key: VEXPANDPSZrmk: [ 0.00 0.00 ]
+Key: VEXPANDPSZrmkz: [ 0.00 0.00 ]
+Key: VEXPANDPSZrr: [ 0.00 0.00 ]
+Key: VEXPANDPSZrrk: [ 0.00 0.00 ]
+Key: VEXPANDPSZrrkz: [ 0.00 0.00 ]
+Key: VEXTRACTF: [ 0.00 0.00 ]
+Key: VEXTRACTI: [ 0.00 0.00 ]
+Key: VEXTRACTPSZmri: [ 0.00 0.00 ]
+Key: VEXTRACTPSZrri: [ 0.00 0.00 ]
+Key: VEXTRACTPSmri: [ 0.00 0.00 ]
+Key: VEXTRACTPSrri: [ 0.00 0.00 ]
+Key: VFCMADDCPHZ: [ 0.00 0.00 ]
+Key: VFCMADDCPHZm: [ 0.00 0.00 ]
+Key: VFCMADDCPHZmb: [ 0.00 0.00 ]
+Key: VFCMADDCPHZmbk: [ 0.00 0.00 ]
+Key: VFCMADDCPHZmbkz: [ 0.00 0.00 ]
+Key: VFCMADDCPHZmk: [ 0.00 0.00 ]
+Key: VFCMADDCPHZmkz: [ 0.00 0.00 ]
+Key: VFCMADDCPHZr: [ 0.00 0.00 ]
+Key: VFCMADDCPHZrb: [ 0.00 0.00 ]
+Key: VFCMADDCPHZrbk: [ 0.00 0.00 ]
+Key: VFCMADDCPHZrbkz: [ 0.00 0.00 ]
+Key: VFCMADDCPHZrk: [ 0.00 0.00 ]
+Key: VFCMADDCPHZrkz: [ 0.00 0.00 ]
+Key: VFCMADDCSHZm: [ 0.00 0.00 ]
+Key: VFCMADDCSHZmk: [ 0.00 0.00 ]
+Key: VFCMADDCSHZmkz: [ 0.00 0.00 ]
+Key: VFCMADDCSHZr: [ 0.00 0.00 ]
+Key: VFCMADDCSHZrb: [ 0.00 0.00 ]
+Key: VFCMADDCSHZrbk: [ 0.00 0.00 ]
+Key: VFCMADDCSHZrbkz: [ 0.00 0.00 ]
+Key: VFCMADDCSHZrk: [ 0.00 0.00 ]
+Key: VFCMADDCSHZrkz: [ 0.00 0.00 ]
+Key: VFCMULCPHZ: [ 0.00 0.00 ]
+Key: VFCMULCPHZrm: [ 0.00 0.00 ]
+Key: VFCMULCPHZrmb: [ 0.00 0.00 ]
+Key: VFCMULCPHZrmbk: [ 0.00 0.00 ]
+Key: VFCMULCPHZrmbkz: [ 0.00 0.00 ]
+Key: VFCMULCPHZrmk: [ 0.00 0.00 ]
+Key: VFCMULCPHZrmkz: [ 0.00 0.00 ]
+Key: VFCMULCPHZrr: [ 0.00 0.00 ]
+Key: VFCMULCPHZrrb: [ 0.00 0.00 ]
+Key: VFCMULCPHZrrbk: [ 0.00 0.00 ]
+Key: VFCMULCPHZrrbkz: [ 0.00 0.00 ]
+Key: VFCMULCPHZrrk: [ 0.00 0.00 ]
+Key: VFCMULCPHZrrkz: [ 0.00 0.00 ]
+Key: VFCMULCSHZrm: [ 0.00 0.00 ]
+Key: VFCMULCSHZrmk: [ 0.00 0.00 ]
+Key: VFCMULCSHZrmkz: [ 0.00 0.00 ]
+Key: VFCMULCSHZrr: [ 0.00 0.00 ]
+Key: VFCMULCSHZrrb: [ 0.00 0.00 ]
+Key: VFCMULCSHZrrbk: [ 0.00 0.00 ]
+Key: VFCMULCSHZrrbkz: [ 0.00 0.00 ]
+Key: VFCMULCSHZrrk: [ 0.00 0.00 ]
+Key: VFCMULCSHZrrkz: [ 0.00 0.00 ]
+Key: VFIXUPIMMPDZ: [ 0.00 0.00 ]
+Key: VFIXUPIMMPDZrmbi: [ 0.00 0.00 ]
+Key: VFIXUPIMMPDZrmbik: [ 0.00 0.00 ]
+Key: VFIXUPIMMPDZrmbikz: [ 0.00 0.00 ]
+Key: VFIXUPIMMPDZrmi: [ 0.00 0.00 ]
+Key: VFIXUPIMMPDZrmik: [ 0.00 0.00 ]
+Key: VFIXUPIMMPDZrmikz: [ 0.00 0.00 ]
+Key: VFIXUPIMMPDZrri: [ 0.00 0.00 ]
+Key: VFIXUPIMMPDZrrib: [ 0.00 0.00 ]
+Key: VFIXUPIMMPDZrribk: [ 0.00 0.00 ]
+Key: VFIXUPIMMPDZrribkz: [ 0.00 0.00 ]
+Key: VFIXUPIMMPDZrrik: [ 0.00 0.00 ]
+Key: VFIXUPIMMPDZrrikz: [ 0.00 0.00 ]
+Key: VFIXUPIMMPSZ: [ 0.00 0.00 ]
+Key: VFIXUPIMMPSZrmbi: [ 0.00 0.00 ]
+Key: VFIXUPIMMPSZrmbik: [ 0.00 0.00 ]
+Key: VFIXUPIMMPSZrmbikz: [ 0.00 0.00 ]
+Key: VFIXUPIMMPSZrmi: [ 0.00 0.00 ]
+Key: VFIXUPIMMPSZrmik: [ 0.00 0.00 ]
+Key: VFIXUPIMMPSZrmikz: [ 0.00 0.00 ]
+Key: VFIXUPIMMPSZrri: [ 0.00 0.00 ]
+Key: VFIXUPIMMPSZrrib: [ 0.00 0.00 ]
+Key: VFIXUPIMMPSZrribk: [ 0.00 0.00 ]
+Key: VFIXUPIMMPSZrribkz: [ 0.00 0.00 ]
+Key: VFIXUPIMMPSZrrik: [ 0.00 0.00 ]
+Key: VFIXUPIMMPSZrrikz: [ 0.00 0.00 ]
+Key: VFIXUPIMMSDZrmi: [ 0.00 0.00 ]
+Key: VFIXUPIMMSDZrmik: [ 0.00 0.00 ]
+Key: VFIXUPIMMSDZrmikz: [ 0.00 0.00 ]
+Key: VFIXUPIMMSDZrri: [ 0.00 0.00 ]
+Key: VFIXUPIMMSDZrrib: [ 0.00 0.00 ]
+Key: VFIXUPIMMSDZrribk: [ 0.00 0.00 ]
+Key: VFIXUPIMMSDZrribkz: [ 0.00 0.00 ]
+Key: VFIXUPIMMSDZrrik: [ 0.00 0.00 ]
+Key: VFIXUPIMMSDZrrikz: [ 0.00 0.00 ]
+Key: VFIXUPIMMSSZrmi: [ 0.00 0.00 ]
+Key: VFIXUPIMMSSZrmik: [ 0.00 0.00 ]
+Key: VFIXUPIMMSSZrmikz: [ 0.00 0.00 ]
+Key: VFIXUPIMMSSZrri: [ 0.00 0.00 ]
+Key: VFIXUPIMMSSZrrib: [ 0.00 0.00 ]
+Key: VFIXUPIMMSSZrribk: [ 0.00 0.00 ]
+Key: VFIXUPIMMSSZrribkz: [ 0.00 0.00 ]
+Key: VFIXUPIMMSSZrrik: [ 0.00 0.00 ]
+Key: VFIXUPIMMSSZrrikz: [ 0.00 0.00 ]
+Key: VFMADD: [ 0.00 0.00 ]
+Key: VFMADDCPHZ: [ 0.00 0.00 ]
+Key: VFMADDCPHZm: [ 0.00 0.00 ]
+Key: VFMADDCPHZmb: [ 0.00 0.00 ]
+Key: VFMADDCPHZmbk: [ 0.00 0.00 ]
+Key: VFMADDCPHZmbkz: [ 0.00 0.00 ]
+Key: VFMADDCPHZmk: [ 0.00 0.00 ]
+Key: VFMADDCPHZmkz: [ 0.00 0.00 ]
+Key: VFMADDCPHZr: [ 0.00 0.00 ]
+Key: VFMADDCPHZrb: [ 0.00 0.00 ]
+Key: VFMADDCPHZrbk: [ 0.00 0.00 ]
+Key: VFMADDCPHZrbkz: [ 0.00 0.00 ]
+Key: VFMADDCPHZrk: [ 0.00 0.00 ]
+Key: VFMADDCPHZrkz: [ 0.00 0.00 ]
+Key: VFMADDCSHZm: [ 0.00 0.00 ]
+Key: VFMADDCSHZmk: [ 0.00 0.00 ]
+Key: VFMADDCSHZmkz: [ 0.00 0.00 ]
+Key: VFMADDCSHZr: [ 0.00 0.00 ]
+Key: VFMADDCSHZrb: [ 0.00 0.00 ]
+Key: VFMADDCSHZrbk: [ 0.00 0.00 ]
+Key: VFMADDCSHZrbkz: [ 0.00 0.00 ]
+Key: VFMADDCSHZrk: [ 0.00 0.00 ]
+Key: VFMADDCSHZrkz: [ 0.00 0.00 ]
+Key: VFMADDPD: [ 0.00 0.00 ]
+Key: VFMADDPS: [ 0.00 0.00 ]
+Key: VFMADDSD: [ 0.00 0.00 ]
+Key: VFMADDSS: [ 0.00 0.00 ]
+Key: VFMADDSUB: [ 0.00 0.00 ]
+Key: VFMADDSUBPD: [ 0.00 0.00 ]
+Key: VFMADDSUBPS: [ 0.00 0.00 ]
+Key: VFMSUB: [ 0.00 0.00 ]
+Key: VFMSUBADD: [ 0.00 0.00 ]
+Key: VFMSUBADDPD: [ 0.00 0.00 ]
+Key: VFMSUBADDPS: [ 0.00 0.00 ]
+Key: VFMSUBPD: [ 0.00 0.00 ]
+Key: VFMSUBPS: [ 0.00 0.00 ]
+Key: VFMSUBSD: [ 0.00 0.00 ]
+Key: VFMSUBSS: [ 0.00 0.00 ]
+Key: VFMULCPHZ: [ 0.00 0.00 ]
+Key: VFMULCPHZrm: [ 0.00 0.00 ]
+Key: VFMULCPHZrmb: [ 0.00 0.00 ]
+Key: VFMULCPHZrmbk: [ 0.00 0.00 ]
+Key: VFMULCPHZrmbkz: [ 0.00 0.00 ]
+Key: VFMULCPHZrmk: [ 0.00 0.00 ]
+Key: VFMULCPHZrmkz: [ 0.00 0.00 ]
+Key: VFMULCPHZrr: [ 0.00 0.00 ]
+Key: VFMULCPHZrrb: [ 0.00 0.00 ]
+Key: VFMULCPHZrrbk: [ 0.00 0.00 ]
+Key: VFMULCPHZrrbkz: [ 0.00 0.00 ]
+Key: VFMULCPHZrrk: [ 0.00 0.00 ]
+Key: VFMULCPHZrrkz: [ 0.00 0.00 ]
+Key: VFMULCSHZrm: [ 0.00 0.00 ]
+Key: VFMULCSHZrmk: [ 0.00 0.00 ]
+Key: VFMULCSHZrmkz: [ 0.00 0.00 ]
+Key: VFMULCSHZrr: [ 0.00 0.00 ]
+Key: VFMULCSHZrrb: [ 0.00 0.00 ]
+Key: VFMULCSHZrrbk: [ 0.00 0.00 ]
+Key: VFMULCSHZrrbkz: [ 0.00 0.00 ]
+Key: VFMULCSHZrrk: [ 0.00 0.00 ]
+Key: VFMULCSHZrrkz: [ 0.00 0.00 ]
+Key: VFNMADD: [ 0.00 0.00 ]
+Key: VFNMADDPD: [ 0.00 0.00 ]
+Key: VFNMADDPS: [ 0.00 0.00 ]
+Key: VFNMADDSD: [ 0.00 0.00 ]
+Key: VFNMADDSS: [ 0.00 0.00 ]
+Key: VFNMSUB: [ 0.00 0.00 ]
+Key: VFNMSUBPD: [ 0.00 0.00 ]
+Key: VFNMSUBPS: [ 0.00 0.00 ]
+Key: VFNMSUBSD: [ 0.00 0.00 ]
+Key: VFNMSUBSS: [ 0.00 0.00 ]
+Key: VFPCLASSBF: [ 0.00 0.00 ]
+Key: VFPCLASSPDZ: [ 0.00 0.00 ]
+Key: VFPCLASSPDZmbi: [ 0.00 0.00 ]
+Key: VFPCLASSPDZmbik: [ 0.00 0.00 ]
+Key: VFPCLASSPDZmi: [ 0.00 0.00 ]
+Key: VFPCLASSPDZmik: [ 0.00 0.00 ]
+Key: VFPCLASSPDZri: [ 0.00 0.00 ]
+Key: VFPCLASSPDZrik: [ 0.00 0.00 ]
+Key: VFPCLASSPHZ: [ 0.00 0.00 ]
+Key: VFPCLASSPHZmbi: [ 0.00 0.00 ]
+Key: VFPCLASSPHZmbik: [ 0.00 0.00 ]
+Key: VFPCLASSPHZmi: [ 0.00 0.00 ]
+Key: VFPCLASSPHZmik: [ 0.00 0.00 ]
+Key: VFPCLASSPHZri: [ 0.00 0.00 ]
+Key: VFPCLASSPHZrik: [ 0.00 0.00 ]
+Key: VFPCLASSPSZ: [ 0.00 0.00 ]
+Key: VFPCLASSPSZmbi: [ 0.00 0.00 ]
+Key: VFPCLASSPSZmbik: [ 0.00 0.00 ]
+Key: VFPCLASSPSZmi: [ 0.00 0.00 ]
+Key: VFPCLASSPSZmik: [ 0.00 0.00 ]
+Key: VFPCLASSPSZri: [ 0.00 0.00 ]
+Key: VFPCLASSPSZrik: [ 0.00 0.00 ]
+Key: VFPCLASSSDZmi: [ 0.00 0.00 ]
+Key: VFPCLASSSDZmik: [ 0.00 0.00 ]
+Key: VFPCLASSSDZri: [ 0.00 0.00 ]
+Key: VFPCLASSSDZrik: [ 0.00 0.00 ]
+Key: VFPCLASSSHZmi: [ 0.00 0.00 ]
+Key: VFPCLASSSHZmik: [ 0.00 0.00 ]
+Key: VFPCLASSSHZri: [ 0.00 0.00 ]
+Key: VFPCLASSSHZrik: [ 0.00 0.00 ]
+Key: VFPCLASSSSZmi: [ 0.00 0.00 ]
+Key: VFPCLASSSSZmik: [ 0.00 0.00 ]
+Key: VFPCLASSSSZri: [ 0.00 0.00 ]
+Key: VFPCLASSSSZrik: [ 0.00 0.00 ]
+Key: VFRCZPDYrm: [ 0.00 0.00 ]
+Key: VFRCZPDYrr: [ 0.00 0.00 ]
+Key: VFRCZPDrm: [ 0.00 0.00 ]
+Key: VFRCZPDrr: [ 0.00 0.00 ]
+Key: VFRCZPSYrm: [ 0.00 0.00 ]
+Key: VFRCZPSYrr: [ 0.00 0.00 ]
+Key: VFRCZPSrm: [ 0.00 0.00 ]
+Key: VFRCZPSrr: [ 0.00 0.00 ]
+Key: VFRCZSDrm: [ 0.00 0.00 ]
+Key: VFRCZSDrr: [ 0.00 0.00 ]
+Key: VFRCZSSrm: [ 0.00 0.00 ]
+Key: VFRCZSSrr: [ 0.00 0.00 ]
+Key: VGATHERDPDYrm: [ 0.00 0.00 ]
+Key: VGATHERDPDZ: [ 0.00 0.00 ]
+Key: VGATHERDPDZrm: [ 0.00 0.00 ]
+Key: VGATHERDPDrm: [ 0.00 0.00 ]
+Key: VGATHERDPSYrm: [ 0.00 0.00 ]
+Key: VGATHERDPSZ: [ 0.00 0.00 ]
+Key: VGATHERDPSZrm: [ 0.00 0.00 ]
+Key: VGATHERDPSrm: [ 0.00 0.00 ]
+Key: VGATHERPF: [ 0.00 0.00 ]
+Key: VGATHERQPDYrm: [ 0.00 0.00 ]
+Key: VGATHERQPDZ: [ 0.00 0.00 ]
+Key: VGATHERQPDZrm: [ 0.00 0.00 ]
+Key: VGATHERQPDrm: [ 0.00 0.00 ]
+Key: VGATHERQPSYrm: [ 0.00 0.00 ]
+Key: VGATHERQPSZ: [ 0.00 0.00 ]
+Key: VGATHERQPSZrm: [ 0.00 0.00 ]
+Key: VGATHERQPSrm: [ 0.00 0.00 ]
+Key: VGETEXPBF: [ 0.00 0.00 ]
+Key: VGETEXPPDZ: [ 0.00 0.00 ]
+Key: VGETEXPPDZm: [ 0.00 0.00 ]
+Key: VGETEXPPDZmb: [ 0.00 0.00 ]
+Key: VGETEXPPDZmbk: [ 0.00 0.00 ]
+Key: VGETEXPPDZmbkz: [ 0.00 0.00 ]
+Key: VGETEXPPDZmk: [ 0.00 0.00 ]
+Key: VGETEXPPDZmkz: [ 0.00 0.00 ]
+Key: VGETEXPPDZr: [ 0.00 0.00 ]
+Key: VGETEXPPDZrb: [ 0.00 0.00 ]
+Key: VGETEXPPDZrbk: [ 0.00 0.00 ]
+Key: VGETEXPPDZrbkz: [ 0.00 0.00 ]
+Key: VGETEXPPDZrk: [ 0.00 0.00 ]
+Key: VGETEXPPDZrkz: [ 0.00 0.00 ]
+Key: VGETEXPPHZ: [ 0.00 0.00 ]
+Key: VGETEXPPHZm: [ 0.00 0.00 ]
+Key: VGETEXPPHZmb: [ 0.00 0.00 ]
+Key: VGETEXPPHZmbk: [ 0.00 0.00 ]
+Key: VGETEXPPHZmbkz: [ 0.00 0.00 ]
+Key: VGETEXPPHZmk: [ 0.00 0.00 ]
+Key: VGETEXPPHZmkz: [ 0.00 0.00 ]
+Key: VGETEXPPHZr: [ 0.00 0.00 ]
+Key: VGETEXPPHZrb: [ 0.00 0.00 ]
+Key: VGETEXPPHZrbk: [ 0.00 0.00 ]
+Key: VGETEXPPHZrbkz: [ 0.00 0.00 ]
+Key: VGETEXPPHZrk: [ 0.00 0.00 ]
+Key: VGETEXPPHZrkz: [ 0.00 0.00 ]
+Key: VGETEXPPSZ: [ 0.00 0.00 ]
+Key: VGETEXPPSZm: [ 0.00 0.00 ]
+Key: VGETEXPPSZmb: [ 0.00 0.00 ]
+Key: VGETEXPPSZmbk: [ 0.00 0.00 ]
+Key: VGETEXPPSZmbkz: [ 0.00 0.00 ]
+Key: VGETEXPPSZmk: [ 0.00 0.00 ]
+Key: VGETEXPPSZmkz: [ 0.00 0.00 ]
+Key: VGETEXPPSZr: [ 0.00 0.00 ]
+Key: VGETEXPPSZrb: [ 0.00 0.00 ]
+Key: VGETEXPPSZrbk: [ 0.00 0.00 ]
+Key: VGETEXPPSZrbkz: [ 0.00 0.00 ]
+Key: VGETEXPPSZrk: [ 0.00 0.00 ]
+Key: VGETEXPPSZrkz: [ 0.00 0.00 ]
+Key: VGETEXPSDZm: [ 0.00 0.00 ]
+Key: VGETEXPSDZmk: [ 0.00 0.00 ]
+Key: VGETEXPSDZmkz: [ 0.00 0.00 ]
+Key: VGETEXPSDZr: [ 0.00 0.00 ]
+Key: VGETEXPSDZrb: [ 0.00 0.00 ]
+Key: VGETEXPSDZrbk: [ 0.00 0.00 ]
+Key: VGETEXPSDZrbkz: [ 0.00 0.00 ]
+Key: VGETEXPSDZrk: [ 0.00 0.00 ]
+Key: VGETEXPSDZrkz: [ 0.00 0.00 ]
+Key: VGETEXPSHZm: [ 0.00 0.00 ]
+Key: VGETEXPSHZmk: [ 0.00 0.00 ]
+Key: VGETEXPSHZmkz: [ 0.00 0.00 ]
+Key: VGETEXPSHZr: [ 0.00 0.00 ]
+Key: VGETEXPSHZrb: [ 0.00 0.00 ]
+Key: VGETEXPSHZrbk: [ 0.00 0.00 ]
+Key: VGETEXPSHZrbkz: [ 0.00 0.00 ]
+Key: VGETEXPSHZrk: [ 0.00 0.00 ]
+Key: VGETEXPSHZrkz: [ 0.00 0.00 ]
+Key: VGETEXPSSZm: [ 0.00 0.00 ]
+Key: VGETEXPSSZmk: [ 0.00 0.00 ]
+Key: VGETEXPSSZmkz: [ 0.00 0.00 ]
+Key: VGETEXPSSZr: [ 0.00 0.00 ]
+Key: VGETEXPSSZrb: [ 0.00 0.00 ]
+Key: VGETEXPSSZrbk: [ 0.00 0.00 ]
+Key: VGETEXPSSZrbkz: [ 0.00 0.00 ]
+Key: VGETEXPSSZrk: [ 0.00 0.00 ]
+Key: VGETEXPSSZrkz: [ 0.00 0.00 ]
+Key: VGETMANTBF: [ 0.00 0.00 ]
+Key: VGETMANTPDZ: [ 0.00 0.00 ]
+Key: VGETMANTPDZrmbi: [ 0.00 0.00 ]
+Key: VGETMANTPDZrmbik: [ 0.00 0.00 ]
+Key: VGETMANTPDZrmbikz: [ 0.00 0.00 ]
+Key: VGETMANTPDZrmi: [ 0.00 0.00 ]
+Key: VGETMANTPDZrmik: [ 0.00 0.00 ]
+Key: VGETMANTPDZrmikz: [ 0.00 0.00 ]
+Key: VGETMANTPDZrri: [ 0.00 0.00 ]
+Key: VGETMANTPDZrrib: [ 0.00 0.00 ]
+Key: VGETMANTPDZrribk: [ 0.00 0.00 ]
+Key: VGETMANTPDZrribkz: [ 0.00 0.00 ]
+Key: VGETMANTPDZrrik: [ 0.00 0.00 ]
+Key: VGETMANTPDZrrikz: [ 0.00 0.00 ]
+Key: VGETMANTPHZ: [ 0.00 0.00 ]
+Key: VGETMANTPHZrmbi: [ 0.00 0.00 ]
+Key: VGETMANTPHZrmbik: [ 0.00 0.00 ]
+Key: VGETMANTPHZrmbikz: [ 0.00 0.00 ]
+Key: VGETMANTPHZrmi: [ 0.00 0.00 ]
+Key: VGETMANTPHZrmik: [ 0.00 0.00 ]
+Key: VGETMANTPHZrmikz: [ 0.00 0.00 ]
+Key: VGETMANTPHZrri: [ 0.00 0.00 ]
+Key: VGETMANTPHZrrib: [ 0.00 0.00 ]
+Key: VGETMANTPHZrribk: [ 0.00 0.00 ]
+Key: VGETMANTPHZrribkz: [ 0.00 0.00 ]
+Key: VGETMANTPHZrrik: [ 0.00 0.00 ]
+Key: VGETMANTPHZrrikz: [ 0.00 0.00 ]
+Key: VGETMANTPSZ: [ 0.00 0.00 ]
+Key: VGETMANTPSZrmbi: [ 0.00 0.00 ]
+Key: VGETMANTPSZrmbik: [ 0.00 0.00 ]
+Key: VGETMANTPSZrmbikz: [ 0.00 0.00 ]
+Key: VGETMANTPSZrmi: [ 0.00 0.00 ]
+Key: VGETMANTPSZrmik: [ 0.00 0.00 ]
+Key: VGETMANTPSZrmikz: [ 0.00 0.00 ]
+Key: VGETMANTPSZrri: [ 0.00 0.00 ]
+Key: VGETMANTPSZrrib: [ 0.00 0.00 ]
+Key: VGETMANTPSZrribk: [ 0.00 0.00 ]
+Key: VGETMANTPSZrribkz: [ 0.00 0.00 ]
+Key: VGETMANTPSZrrik: [ 0.00 0.00 ]
+Key: VGETMANTPSZrrikz: [ 0.00 0.00 ]
+Key: VGETMANTSDZrmi: [ 0.00 0.00 ]
+Key: VGETMANTSDZrmik: [ 0.00 0.00 ]
+Key: VGETMANTSDZrmikz: [ 0.00 0.00 ]
+Key: VGETMANTSDZrri: [ 0.00 0.00 ]
+Key: VGETMANTSDZrrib: [ 0.00 0.00 ]
+Key: VGETMANTSDZrribk: [ 0.00 0.00 ]
+Key: VGETMANTSDZrribkz: [ 0.00 0.00 ]
+Key: VGETMANTSDZrrik: [ 0.00 0.00 ]
+Key: VGETMANTSDZrrikz: [ 0.00 0.00 ]
+Key: VGETMANTSHZrmi: [ 0.00 0.00 ]
+Key: VGETMANTSHZrmik: [ 0.00 0.00 ]
+Key: VGETMANTSHZrmikz: [ 0.00 0.00 ]
+Key: VGETMANTSHZrri: [ 0.00 0.00 ]
+Key: VGETMANTSHZrrib: [ 0.00 0.00 ]
+Key: VGETMANTSHZrribk: [ 0.00 0.00 ]
+Key: VGETMANTSHZrribkz: [ 0.00 0.00 ]
+Key: VGETMANTSHZrrik: [ 0.00 0.00 ]
+Key: VGETMANTSHZrrikz: [ 0.00 0.00 ]
+Key: VGETMANTSSZrmi: [ 0.00 0.00 ]
+Key: VGETMANTSSZrmik: [ 0.00 0.00 ]
+Key: VGETMANTSSZrmikz: [ 0.00 0.00 ]
+Key: VGETMANTSSZrri: [ 0.00 0.00 ]
+Key: VGETMANTSSZrrib: [ 0.00 0.00 ]
+Key: VGETMANTSSZrribk: [ 0.00 0.00 ]
+Key: VGETMANTSSZrribkz: [ 0.00 0.00 ]
+Key: VGETMANTSSZrrik: [ 0.00 0.00 ]
+Key: VGETMANTSSZrrikz: [ 0.00 0.00 ]
+Key: VGF: [ 0.00 0.00 ]
+Key: VHADDPDYrm: [ 0.00 0.00 ]
+Key: VHADDPDYrr: [ 0.00 0.00 ]
+Key: VHADDPDrm: [ 0.00 0.00 ]
+Key: VHADDPDrr: [ 0.00 0.00 ]
+Key: VHADDPSYrm: [ 0.00 0.00 ]
+Key: VHADDPSYrr: [ 0.00 0.00 ]
+Key: VHADDPSrm: [ 0.00 0.00 ]
+Key: VHADDPSrr: [ 0.00 0.00 ]
+Key: VHSUBPDYrm: [ 0.00 0.00 ]
+Key: VHSUBPDYrr: [ 0.00 0.00 ]
+Key: VHSUBPDrm: [ 0.00 0.00 ]
+Key: VHSUBPDrr: [ 0.00 0.00 ]
+Key: VHSUBPSYrm: [ 0.00 0.00 ]
+Key: VHSUBPSYrr: [ 0.00 0.00 ]
+Key: VHSUBPSrm: [ 0.00 0.00 ]
+Key: VHSUBPSrr: [ 0.00 0.00 ]
+Key: VINSERTF: [ 0.00 0.00 ]
+Key: VINSERTI: [ 0.00 0.00 ]
+Key: VINSERTPSZrmi: [ 0.00 0.00 ]
+Key: VINSERTPSZrri: [ 0.00 0.00 ]
+Key: VINSERTPSrmi: [ 0.00 0.00 ]
+Key: VINSERTPSrri: [ 0.00 0.00 ]
+Key: VLDDQUYrm: [ 0.00 0.00 ]
+Key: VLDDQUrm: [ 0.00 0.00 ]
+Key: VLDMXCSR: [ 0.00 0.00 ]
+Key: VMASKMOVDQU: [ 0.00 0.00 ]
+Key: VMASKMOVPDYmr: [ 0.00 0.00 ]
+Key: VMASKMOVPDYrm: [ 0.00 0.00 ]
+Key: VMASKMOVPDmr: [ 0.00 0.00 ]
+Key: VMASKMOVPDrm: [ 0.00 0.00 ]
+Key: VMASKMOVPSYmr: [ 0.00 0.00 ]
+Key: VMASKMOVPSYrm: [ 0.00 0.00 ]
+Key: VMASKMOVPSmr: [ 0.00 0.00 ]
+Key: VMASKMOVPSrm: [ 0.00 0.00 ]
+Key: VMAXBF: [ 0.00 0.00 ]
+Key: VMAXCPDYrm: [ 0.00 0.00 ]
+Key: VMAXCPDYrr: [ 0.00 0.00 ]
+Key: VMAXCPDZ: [ 0.00 0.00 ]
+Key: VMAXCPDZrm: [ 0.00 0.00 ]
+Key: VMAXCPDZrmb: [ 0.00 0.00 ]
+Key: VMAXCPDZrmbk: [ 0.00 0.00 ]
+Key: VMAXCPDZrmbkz: [ 0.00 0.00 ]
+Key: VMAXCPDZrmk: [ 0.00 0.00 ]
+Key: VMAXCPDZrmkz: [ 0.00 0.00 ]
+Key: VMAXCPDZrr: [ 0.00 0.00 ]
+Key: VMAXCPDZrrk: [ 0.00 0.00 ]
+Key: VMAXCPDZrrkz: [ 0.00 0.00 ]
+Key: VMAXCPDrm: [ 0.00 0.00 ]
+Key: VMAXCPDrr: [ 0.00 0.00 ]
+Key: VMAXCPHZ: [ 0.00 0.00 ]
+Key: VMAXCPHZrm: [ 0.00 0.00 ]
+Key: VMAXCPHZrmb: [ 0.00 0.00 ]
+Key: VMAXCPHZrmbk: [ 0.00 0.00 ]
+Key: VMAXCPHZrmbkz: [ 0.00 0.00 ]
+Key: VMAXCPHZrmk: [ 0.00 0.00 ]
+Key: VMAXCPHZrmkz: [ 0.00 0.00 ]
+Key: VMAXCPHZrr: [ 0.00 0.00 ]
+Key: VMAXCPHZrrk: [ 0.00 0.00 ]
+Key: VMAXCPHZrrkz: [ 0.00 0.00 ]
+Key: VMAXCPSYrm: [ 0.00 0.00 ]
+Key: VMAXCPSYrr: [ 0.00 0.00 ]
+Key: VMAXCPSZ: [ 0.00 0.00 ]
+Key: VMAXCPSZrm: [ 0.00 0.00 ]
+Key: VMAXCPSZrmb: [ 0.00 0.00 ]
+Key: VMAXCPSZrmbk: [ 0.00 0.00 ]
+Key: VMAXCPSZrmbkz: [ 0.00 0.00 ]
+Key: VMAXCPSZrmk: [ 0.00 0.00 ]
+Key: VMAXCPSZrmkz: [ 0.00 0.00 ]
+Key: VMAXCPSZrr: [ 0.00 0.00 ]
+Key: VMAXCPSZrrk: [ 0.00 0.00 ]
+Key: VMAXCPSZrrkz: [ 0.00 0.00 ]
+Key: VMAXCPSrm: [ 0.00 0.00 ]
+Key: VMAXCPSrr: [ 0.00 0.00 ]
+Key: VMAXCSDZrm: [ 0.00 0.00 ]
+Key: VMAXCSDZrr: [ 0.00 0.00 ]
+Key: VMAXCSDrm: [ 0.00 0.00 ]
+Key: VMAXCSDrr: [ 0.00 0.00 ]
+Key: VMAXCSHZrm: [ 0.00 0.00 ]
+Key: VMAXCSHZrr: [ 0.00 0.00 ]
+Key: VMAXCSSZrm: [ 0.00 0.00 ]
+Key: VMAXCSSZrr: [ 0.00 0.00 ]
+Key: VMAXCSSrm: [ 0.00 0.00 ]
+Key: VMAXCSSrr: [ 0.00 0.00 ]
+Key: VMAXPDYrm: [ 0.00 0.00 ]
+Key: VMAXPDYrr: [ 0.00 0.00 ]
+Key: VMAXPDZ: [ 0.00 0.00 ]
+Key: VMAXPDZrm: [ 0.00 0.00 ]
+Key: VMAXPDZrmb: [ 0.00 0.00 ]
+Key: VMAXPDZrmbk: [ 0.00 0.00 ]
+Key: VMAXPDZrmbkz: [ 0.00 0.00 ]
+Key: VMAXPDZrmk: [ 0.00 0.00 ]
+Key: VMAXPDZrmkz: [ 0.00 0.00 ]
+Key: VMAXPDZrr: [ 0.00 0.00 ]
+Key: VMAXPDZrrb: [ 0.00 0.00 ]
+Key: VMAXPDZrrbk: [ 0.00 0.00 ]
+Key: VMAXPDZrrbkz: [ 0.00 0.00 ]
+Key: VMAXPDZrrk: [ 0.00 0.00 ]
+Key: VMAXPDZrrkz: [ 0.00 0.00 ]
+Key: VMAXPDrm: [ 0.00 0.00 ]
+Key: VMAXPDrr: [ 0.00 0.00 ]
+Key: VMAXPHZ: [ 0.00 0.00 ]
+Key: VMAXPHZrm: [ 0.00 0.00 ]
+Key: VMAXPHZrmb: [ 0.00 0.00 ]
+Key: VMAXPHZrmbk: [ 0.00 0.00 ]
+Key: VMAXPHZrmbkz: [ 0.00 0.00 ]
+Key: VMAXPHZrmk: [ 0.00 0.00 ]
+Key: VMAXPHZrmkz: [ 0.00 0.00 ]
+Key: VMAXPHZrr: [ 0.00 0.00 ]
+Key: VMAXPHZrrb: [ 0.00 0.00 ]
+Key: VMAXPHZrrbk: [ 0.00 0.00 ]
+Key: VMAXPHZrrbkz: [ 0.00 0.00 ]
+Key: VMAXPHZrrk: [ 0.00 0.00 ]
+Key: VMAXPHZrrkz: [ 0.00 0.00 ]
+Key: VMAXPSYrm: [ 0.00 0.00 ]
+Key: VMAXPSYrr: [ 0.00 0.00 ]
+Key: VMAXPSZ: [ 0.00 0.00 ]
+Key: VMAXPSZrm: [ 0.00 0.00 ]
+Key: VMAXPSZrmb: [ 0.00 0.00 ]
+Key: VMAXPSZrmbk: [ 0.00 0.00 ]
+Key: VMAXPSZrmbkz: [ 0.00 0.00 ]
+Key: VMAXPSZrmk: [ 0.00 0.00 ]
+Key: VMAXPSZrmkz: [ 0.00 0.00 ]
+Key: VMAXPSZrr: [ 0.00 0.00 ]
+Key: VMAXPSZrrb: [ 0.00 0.00 ]
+Key: VMAXPSZrrbk: [ 0.00 0.00 ]
+Key: VMAXPSZrrbkz: [ 0.00 0.00 ]
+Key: VMAXPSZrrk: [ 0.00 0.00 ]
+Key: VMAXPSZrrkz: [ 0.00 0.00 ]
+Key: VMAXPSrm: [ 0.00 0.00 ]
+Key: VMAXPSrr: [ 0.00 0.00 ]
+Key: VMAXSDZrm: [ 0.00 0.00 ]
+Key: VMAXSDZrm_Int: [ 0.00 0.00 ]
+Key: VMAXSDZrmk_Int: [ 0.00 0.00 ]
+Key: VMAXSDZrmkz_Int: [ 0.00 0.00 ]
+Key: VMAXSDZrr: [ 0.00 0.00 ]
+Key: VMAXSDZrr_Int: [ 0.00 0.00 ]
+Key: VMAXSDZrrb_Int: [ 0.00 0.00 ]
+Key: VMAXSDZrrbk_Int: [ 0.00 0.00 ]
+Key: VMAXSDZrrbkz_Int: [ 0.00 0.00 ]
+Key: VMAXSDZrrk_Int: [ 0.00 0.00 ]
+Key: VMAXSDZrrkz_Int: [ 0.00 0.00 ]
+Key: VMAXSDrm: [ 0.00 0.00 ]
+Key: VMAXSDrm_Int: [ 0.00 0.00 ]
+Key: VMAXSDrr: [ 0.00 0.00 ]
+Key: VMAXSDrr_Int: [ 0.00 0.00 ]
+Key: VMAXSHZrm: [ 0.00 0.00 ]
+Key: VMAXSHZrm_Int: [ 0.00 0.00 ]
+Key: VMAXSHZrmk_Int: [ 0.00 0.00 ]
+Key: VMAXSHZrmkz_Int: [ 0.00 0.00 ]
+Key: VMAXSHZrr: [ 0.00 0.00 ]
+Key: VMAXSHZrr_Int: [ 0.00 0.00 ]
+Key: VMAXSHZrrb_Int: [ 0.00 0.00 ]
+Key: VMAXSHZrrbk_Int: [ 0.00 0.00 ]
+Key: VMAXSHZrrbkz_Int: [ 0.00 0.00 ]
+Key: VMAXSHZrrk_Int: [ 0.00 0.00 ]
+Key: VMAXSHZrrkz_Int: [ 0.00 0.00 ]
+Key: VMAXSSZrm: [ 0.00 0.00 ]
+Key: VMAXSSZrm_Int: [ 0.00 0.00 ]
+Key: VMAXSSZrmk_Int: [ 0.00 0.00 ]
+Key: VMAXSSZrmkz_Int: [ 0.00 0.00 ]
+Key: VMAXSSZrr: [ 0.00 0.00 ]
+Key: VMAXSSZrr_Int: [ 0.00 0.00 ]
+Key: VMAXSSZrrb_Int: [ 0.00 0.00 ]
+Key: VMAXSSZrrbk_Int: [ 0.00 0.00 ]
+Key: VMAXSSZrrbkz_Int: [ 0.00 0.00 ]
+Key: VMAXSSZrrk_Int: [ 0.00 0.00 ]
+Key: VMAXSSZrrkz_Int: [ 0.00 0.00 ]
+Key: VMAXSSrm: [ 0.00 0.00 ]
+Key: VMAXSSrm_Int: [ 0.00 0.00 ]
+Key: VMAXSSrr: [ 0.00 0.00 ]
+Key: VMAXSSrr_Int: [ 0.00 0.00 ]
+Key: VMCALL: [ 0.00 0.00 ]
+Key: VMCLEARm: [ 0.00 0.00 ]
+Key: VMFUNC: [ 0.00 0.00 ]
+Key: VMINBF: [ 0.00 0.00 ]
+Key: VMINCPDYrm: [ 0.00 0.00 ]
+Key: VMINCPDYrr: [ 0.00 0.00 ]
+Key: VMINCPDZ: [ 0.00 0.00 ]
+Key: VMINCPDZrm: [ 0.00 0.00 ]
+Key: VMINCPDZrmb: [ 0.00 0.00 ]
+Key: VMINCPDZrmbk: [ 0.00 0.00 ]
+Key: VMINCPDZrmbkz: [ 0.00 0.00 ]
+Key: VMINCPDZrmk: [ 0.00 0.00 ]
+Key: VMINCPDZrmkz: [ 0.00 0.00 ]
+Key: VMINCPDZrr: [ 0.00 0.00 ]
+Key: VMINCPDZrrk: [ 0.00 0.00 ]
+Key: VMINCPDZrrkz: [ 0.00 0.00 ]
+Key: VMINCPDrm: [ 0.00 0.00 ]
+Key: VMINCPDrr: [ 0.00 0.00 ]
+Key: VMINCPHZ: [ 0.00 0.00 ]
+Key: VMINCPHZrm: [ 0.00 0.00 ]
+Key: VMINCPHZrmb: [ 0.00 0.00 ]
+Key: VMINCPHZrmbk: [ 0.00 0.00 ]
+Key: VMINCPHZrmbkz: [ 0.00 0.00 ]
+Key: VMINCPHZrmk: [ 0.00 0.00 ]
+Key: VMINCPHZrmkz: [ 0.00 0.00 ]
+Key: VMINCPHZrr: [ 0.00 0.00 ]
+Key: VMINCPHZrrk: [ 0.00 0.00 ]
+Key: VMINCPHZrrkz: [ 0.00 0.00 ]
+Key: VMINCPSYrm: [ 0.00 0.00 ]
+Key: VMINCPSYrr: [ 0.00 0.00 ]
+Key: VMINCPSZ: [ 0.00 0.00 ]
+Key: VMINCPSZrm: [ 0.00 0.00 ]
+Key: VMINCPSZrmb: [ 0.00 0.00 ]
+Key: VMINCPSZrmbk: [ 0.00 0.00 ]
+Key: VMINCPSZrmbkz: [ 0.00 0.00 ]
+Key: VMINCPSZrmk: [ 0.00 0.00 ]
+Key: VMINCPSZrmkz: [ 0.00 0.00 ]
+Key: VMINCPSZrr: [ 0.00 0.00 ]
+Key: VMINCPSZrrk: [ 0.00 0.00 ]
+Key: VMINCPSZrrkz: [ 0.00 0.00 ]
+Key: VMINCPSrm: [ 0.00 0.00 ]
+Key: VMINCPSrr: [ 0.00 0.00 ]
+Key: VMINCSDZrm: [ 0.00 0.00 ]
+Key: VMINCSDZrr: [ 0.00 0.00 ]
+Key: VMINCSDrm: [ 0.00 0.00 ]
+Key: VMINCSDrr: [ 0.00 0.00 ]
+Key: VMINCSHZrm: [ 0.00 0.00 ]
+Key: VMINCSHZrr: [ 0.00 0.00 ]
+Key: VMINCSSZrm: [ 0.00 0.00 ]
+Key: VMINCSSZrr: [ 0.00 0.00 ]
+Key: VMINCSSrm: [ 0.00 0.00 ]
+Key: VMINCSSrr: [ 0.00 0.00 ]
+Key: VMINMAXBF: [ 0.00 0.00 ]
+Key: VMINMAXPDZ: [ 0.00 0.00 ]
+Key: VMINMAXPDZrmbi: [ 0.00 0.00 ]
+Key: VMINMAXPDZrmbik: [ 0.00 0.00 ]
+Key: VMINMAXPDZrmbikz: [ 0.00 0.00 ]
+Key: VMINMAXPDZrmi: [ 0.00 0.00 ]
+Key: VMINMAXPDZrmik: [ 0.00 0.00 ]
+Key: VMINMAXPDZrmikz: [ 0.00 0.00 ]
+Key: VMINMAXPDZrri: [ 0.00 0.00 ]
+Key: VMINMAXPDZrrib: [ 0.00 0.00 ]
+Key: VMINMAXPDZrribk: [ 0.00 0.00 ]
+Key: VMINMAXPDZrribkz: [ 0.00 0.00 ]
+Key: VMINMAXPDZrrik: [ 0.00 0.00 ]
+Key: VMINMAXPDZrrikz: [ 0.00 0.00 ]
+Key: VMINMAXPHZ: [ 0.00 0.00 ]
+Key: VMINMAXPHZrmbi: [ 0.00 0.00 ]
+Key: VMINMAXPHZrmbik: [ 0.00 0.00 ]
+Key: VMINMAXPHZrmbikz: [ 0.00 0.00 ]
+Key: VMINMAXPHZrmi: [ 0.00 0.00 ]
+Key: VMINMAXPHZrmik: [ 0.00 0.00 ]
+Key: VMINMAXPHZrmikz: [ 0.00 0.00 ]
+Key: VMINMAXPHZrri: [ 0.00 0.00 ]
+Key: VMINMAXPHZrrib: [ 0.00 0.00 ]
+Key: VMINMAXPHZrribk: [ 0.00 0.00 ]
+Key: VMINMAXPHZrribkz: [ 0.00 0.00 ]
+Key: VMINMAXPHZrrik: [ 0.00 0.00 ]
+Key: VMINMAXPHZrrikz: [ 0.00 0.00 ]
+Key: VMINMAXPSZ: [ 0.00 0.00 ]
+Key: VMINMAXPSZrmbi: [ 0.00 0.00 ]
+Key: VMINMAXPSZrmbik: [ 0.00 0.00 ]
+Key: VMINMAXPSZrmbikz: [ 0.00 0.00 ]
+Key: VMINMAXPSZrmi: [ 0.00 0.00 ]
+Key: VMINMAXPSZrmik: [ 0.00 0.00 ]
+Key: VMINMAXPSZrmikz: [ 0.00 0.00 ]
+Key: VMINMAXPSZrri: [ 0.00 0.00 ]
+Key: VMINMAXPSZrrib: [ 0.00 0.00 ]
+Key: VMINMAXPSZrribk: [ 0.00 0.00 ]
+Key: VMINMAXPSZrribkz: [ 0.00 0.00 ]
+Key: VMINMAXPSZrrik: [ 0.00 0.00 ]
+Key: VMINMAXPSZrrikz: [ 0.00 0.00 ]
+Key: VMINMAXSDrmi: [ 0.00 0.00 ]
+Key: VMINMAXSDrmi_Int: [ 0.00 0.00 ]
+Key: VMINMAXSDrmik_Int: [ 0.00 0.00 ]
+Key: VMINMAXSDrmikz_Int: [ 0.00 0.00 ]
+Key: VMINMAXSDrri: [ 0.00 0.00 ]
+Key: VMINMAXSDrri_Int: [ 0.00 0.00 ]
+Key: VMINMAXSDrrib_Int: [ 0.00 0.00 ]
+Key: VMINMAXSDrribk_Int: [ 0.00 0.00 ]
+Key: VMINMAXSDrribkz_Int: [ 0.00 0.00 ]
+Key: VMINMAXSDrrik_Int: [ 0.00 0.00 ]
+Key: VMINMAXSDrrikz_Int: [ 0.00 0.00 ]
+Key: VMINMAXSHrmi: [ 0.00 0.00 ]
+Key: VMINMAXSHrmi_Int: [ 0.00 0.00 ]
+Key: VMINMAXSHrmik_Int: [ 0.00 0.00 ]
+Key: VMINMAXSHrmikz_Int: [ 0.00 0.00 ]
+Key: VMINMAXSHrri: [ 0.00 0.00 ]
+Key: VMINMAXSHrri_Int: [ 0.00 0.00 ]
+Key: VMINMAXSHrrib_Int: [ 0.00 0.00 ]
+Key: VMINMAXSHrribk_Int: [ 0.00 0.00 ]
+Key: VMINMAXSHrribkz_Int: [ 0.00 0.00 ]
+Key: VMINMAXSHrrik_Int: [ 0.00 0.00 ]
+Key: VMINMAXSHrrikz_Int: [ 0.00 0.00 ]
+Key: VMINMAXSSrmi: [ 0.00 0.00 ]
+Key: VMINMAXSSrmi_Int: [ 0.00 0.00 ]
+Key: VMINMAXSSrmik_Int: [ 0.00 0.00 ]
+Key: VMINMAXSSrmikz_Int: [ 0.00 0.00 ]
+Key: VMINMAXSSrri: [ 0.00 0.00 ]
+Key: VMINMAXSSrri_Int: [ 0.00 0.00 ]
+Key: VMINMAXSSrrib_Int: [ 0.00 0.00 ]
+Key: VMINMAXSSrribk_Int: [ 0.00 0.00 ]
+Key: VMINMAXSSrribkz_Int: [ 0.00 0.00 ]
+Key: VMINMAXSSrrik_Int: [ 0.00 0.00 ]
+Key: VMINMAXSSrrikz_Int: [ 0.00 0.00 ]
+Key: VMINPDYrm: [ 0.00 0.00 ]
+Key: VMINPDYrr: [ 0.00 0.00 ]
+Key: VMINPDZ: [ 0.00 0.00 ]
+Key: VMINPDZrm: [ 0.00 0.00 ]
+Key: VMINPDZrmb: [ 0.00 0.00 ]
+Key: VMINPDZrmbk: [ 0.00 0.00 ]
+Key: VMINPDZrmbkz: [ 0.00 0.00 ]
+Key: VMINPDZrmk: [ 0.00 0.00 ]
+Key: VMINPDZrmkz: [ 0.00 0.00 ]
+Key: VMINPDZrr: [ 0.00 0.00 ]
+Key: VMINPDZrrb: [ 0.00 0.00 ]
+Key: VMINPDZrrbk: [ 0.00 0.00 ]
+Key: VMINPDZrrbkz: [ 0.00 0.00 ]
+Key: VMINPDZrrk: [ 0.00 0.00 ]
+Key: VMINPDZrrkz: [ 0.00 0.00 ]
+Key: VMINPDrm: [ 0.00 0.00 ]
+Key: VMINPDrr: [ 0.00 0.00 ]
+Key: VMINPHZ: [ 0.00 0.00 ]
+Key: VMINPHZrm: [ 0.00 0.00 ]
+Key: VMINPHZrmb: [ 0.00 0.00 ]
+Key: VMINPHZrmbk: [ 0.00 0.00 ]
+Key: VMINPHZrmbkz: [ 0.00 0.00 ]
+Key: VMINPHZrmk: [ 0.00 0.00 ]
+Key: VMINPHZrmkz: [ 0.00 0.00 ]
+Key: VMINPHZrr: [ 0.00 0.00 ]
+Key: VMINPHZrrb: [ 0.00 0.00 ]
+Key: VMINPHZrrbk: [ 0.00 0.00 ]
+Key: VMINPHZrrbkz: [ 0.00 0.00 ]
+Key: VMINPHZrrk: [ 0.00 0.00 ]
+Key: VMINPHZrrkz: [ 0.00 0.00 ]
+Key: VMINPSYrm: [ 0.00 0.00 ]
+Key: VMINPSYrr: [ 0.00 0.00 ]
+Key: VMINPSZ: [ 0.00 0.00 ]
+Key: VMINPSZrm: [ 0.00 0.00 ]
+Key: VMINPSZrmb: [ 0.00 0.00 ]
+Key: VMINPSZrmbk: [ 0.00 0.00 ]
+Key: VMINPSZrmbkz: [ 0.00 0.00 ]
+Key: VMINPSZrmk: [ 0.00 0.00 ]
+Key: VMINPSZrmkz: [ 0.00 0.00 ]
+Key: VMINPSZrr: [ 0.00 0.00 ]
+Key: VMINPSZrrb: [ 0.00 0.00 ]
+Key: VMINPSZrrbk: [ 0.00 0.00 ]
+Key: VMINPSZrrbkz: [ 0.00 0.00 ]
+Key: VMINPSZrrk: [ 0.00 0.00 ]
+Key: VMINPSZrrkz: [ 0.00 0.00 ]
+Key: VMINPSrm: [ 0.00 0.00 ]
+Key: VMINPSrr: [ 0.00 0.00 ]
+Key: VMINSDZrm: [ 0.00 0.00 ]
+Key: VMINSDZrm_Int: [ 0.00 0.00 ]
+Key: VMINSDZrmk_Int: [ 0.00 0.00 ]
+Key: VMINSDZrmkz_Int: [ 0.00 0.00 ]
+Key: VMINSDZrr: [ 0.00 0.00 ]
+Key: VMINSDZrr_Int: [ 0.00 0.00 ]
+Key: VMINSDZrrb_Int: [ 0.00 0.00 ]
+Key: VMINSDZrrbk_Int: [ 0.00 0.00 ]
+Key: VMINSDZrrbkz_Int: [ 0.00 0.00 ]
+Key: VMINSDZrrk_Int: [ 0.00 0.00 ]
+Key: VMINSDZrrkz_Int: [ 0.00 0.00 ]
+Key: VMINSDrm: [ 0.00 0.00 ]
+Key: VMINSDrm_Int: [ 0.00 0.00 ]
+Key: VMINSDrr: [ 0.00 0.00 ]
+Key: VMINSDrr_Int: [ 0.00 0.00 ]
+Key: VMINSHZrm: [ 0.00 0.00 ]
+Key: VMINSHZrm_Int: [ 0.00 0.00 ]
+Key: VMINSHZrmk_Int: [ 0.00 0.00 ]
+Key: VMINSHZrmkz_Int: [ 0.00 0.00 ]
+Key: VMINSHZrr: [ 0.00 0.00 ]
+Key: VMINSHZrr_Int: [ 0.00 0.00 ]
+Key: VMINSHZrrb_Int: [ 0.00 0.00 ]
+Key: VMINSHZrrbk_Int: [ 0.00 0.00 ]
+Key: VMINSHZrrbkz_Int: [ 0.00 0.00 ]
+Key: VMINSHZrrk_Int: [ 0.00 0.00 ]
+Key: VMINSHZrrkz_Int: [ 0.00 0.00 ]
+Key: VMINSSZrm: [ 0.00 0.00 ]
+Key: VMINSSZrm_Int: [ 0.00 0.00 ]
+Key: VMINSSZrmk_Int: [ 0.00 0.00 ]
+Key: VMINSSZrmkz_Int: [ 0.00 0.00 ]
+Key: VMINSSZrr: [ 0.00 0.00 ]
+Key: VMINSSZrr_Int: [ 0.00 0.00 ]
+Key: VMINSSZrrb_Int: [ 0.00 0.00 ]
+Key: VMINSSZrrbk_Int: [ 0.00 0.00 ]
+Key: VMINSSZrrbkz_Int: [ 0.00 0.00 ]
+Key: VMINSSZrrk_Int: [ 0.00 0.00 ]
+Key: VMINSSZrrkz_Int: [ 0.00 0.00 ]
+Key: VMINSSrm: [ 0.00 0.00 ]
+Key: VMINSSrm_Int: [ 0.00 0.00 ]
+Key: VMINSSrr: [ 0.00 0.00 ]
+Key: VMINSSrr_Int: [ 0.00 0.00 ]
+Key: VMLAUNCH: [ 0.00 0.00 ]
+Key: VMLOAD: [ 0.00 0.00 ]
+Key: VMMCALL: [ 0.00 0.00 ]
+Key: VMOV: [ 0.00 0.00 ]
+Key: VMOVAPDYmr: [ 0.00 0.00 ]
+Key: VMOVAPDYrm: [ 0.00 0.00 ]
+Key: VMOVAPDYrr: [ 0.00 0.00 ]
+Key: VMOVAPDYrr_REV: [ 0.00 0.00 ]
+Key: VMOVAPDZ: [ 0.00 0.00 ]
+Key: VMOVAPDZmr: [ 0.00 0.00 ]
+Key: VMOVAPDZmrk: [ 0.00 0.00 ]
+Key: VMOVAPDZrm: [ 0.00 0.00 ]
+Key: VMOVAPDZrmk: [ 0.00 0.00 ]
+Key: VMOVAPDZrmkz: [ 0.00 0.00 ]
+Key: VMOVAPDZrr: [ 0.00 0.00 ]
+Key: VMOVAPDZrr_REV: [ 0.00 0.00 ]
+Key: VMOVAPDZrrk: [ 0.00 0.00 ]
+Key: VMOVAPDZrrk_REV: [ 0.00 0.00 ]
+Key: VMOVAPDZrrkz: [ 0.00 0.00 ]
+Key: VMOVAPDZrrkz_REV: [ 0.00 0.00 ]
+Key: VMOVAPDmr: [ 0.00 0.00 ]
+Key: VMOVAPDrm: [ 0.00 0.00 ]
+Key: VMOVAPDrr: [ 0.00 0.00 ]
+Key: VMOVAPDrr_REV: [ 0.00 0.00 ]
+Key: VMOVAPSYmr: [ 0.00 0.00 ]
+Key: VMOVAPSYrm: [ 0.00 0.00 ]
+Key: VMOVAPSYrr: [ 0.00 0.00 ]
+Key: VMOVAPSYrr_REV: [ 0.00 0.00 ]
+Key: VMOVAPSZ: [ 0.00 0.00 ]
+Key: VMOVAPSZmr: [ 0.00 0.00 ]
+Key: VMOVAPSZmrk: [ 0.00 0.00 ]
+Key: VMOVAPSZrm: [ 0.00 0.00 ]
+Key: VMOVAPSZrmk: [ 0.00 0.00 ]
+Key: VMOVAPSZrmkz: [ 0.00 0.00 ]
+Key: VMOVAPSZrr: [ 0.00 0.00 ]
+Key: VMOVAPSZrr_REV: [ 0.00 0.00 ]
+Key: VMOVAPSZrrk: [ 0.00 0.00 ]
+Key: VMOVAPSZrrk_REV: [ 0.00 0.00 ]
+Key: VMOVAPSZrrkz: [ 0.00 0.00 ]
+Key: VMOVAPSZrrkz_REV: [ 0.00 0.00 ]
+Key: VMOVAPSmr: [ 0.00 0.00 ]
+Key: VMOVAPSrm: [ 0.00 0.00 ]
+Key: VMOVAPSrr: [ 0.00 0.00 ]
+Key: VMOVAPSrr_REV: [ 0.00 0.00 ]
+Key: VMOVDDUPYrm: [ 0.00 0.00 ]
+Key: VMOVDDUPYrr: [ 0.00 0.00 ]
+Key: VMOVDDUPZ: [ 0.00 0.00 ]
+Key: VMOVDDUPZrm: [ 0.00 0.00 ]
+Key: VMOVDDUPZrmk: [ 0.00 0.00 ]
+Key: VMOVDDUPZrmkz: [ 0.00 0.00 ]
+Key: VMOVDDUPZrr: [ 0.00 0.00 ]
+Key: VMOVDDUPZrrk: [ 0.00 0.00 ]
+Key: VMOVDDUPZrrkz: [ 0.00 0.00 ]
+Key: VMOVDDUPrm: [ 0.00 0.00 ]
+Key: VMOVDDUPrr: [ 0.00 0.00 ]
+Key: VMOVDI: [ 0.00 0.00 ]
+Key: VMOVDQA: [ 0.00 0.00 ]
+Key: VMOVDQAYmr: [ 0.00 0.00 ]
+Key: VMOVDQAYrm: [ 0.00 0.00 ]
+Key: VMOVDQAYrr: [ 0.00 0.00 ]
+Key: VMOVDQAYrr_REV: [ 0.00 0.00 ]
+Key: VMOVDQAmr: [ 0.00 0.00 ]
+Key: VMOVDQArm: [ 0.00 0.00 ]
+Key: VMOVDQArr: [ 0.00 0.00 ]
+Key: VMOVDQArr_REV: [ 0.00 0.00 ]
+Key: VMOVDQU: [ 0.00 0.00 ]
+Key: VMOVDQUYmr: [ 0.00 0.00 ]
+Key: VMOVDQUYrm: [ 0.00 0.00 ]
+Key: VMOVDQUYrr: [ 0.00 0.00 ]
+Key: VMOVDQUYrr_REV: [ 0.00 0.00 ]
+Key: VMOVDQUmr: [ 0.00 0.00 ]
+Key: VMOVDQUrm: [ 0.00 0.00 ]
+Key: VMOVDQUrr: [ 0.00 0.00 ]
+Key: VMOVDQUrr_REV: [ 0.00 0.00 ]
+Key: VMOVHLPSZrr: [ 0.00 0.00 ]
+Key: VMOVHLPSrr: [ 0.00 0.00 ]
+Key: VMOVHPDZ: [ 0.00 0.00 ]
+Key: VMOVHPDmr: [ 0.00 0.00 ]
+Key: VMOVHPDrm: [ 0.00 0.00 ]
+Key: VMOVHPSZ: [ 0.00 0.00 ]
+Key: VMOVHPSmr: [ 0.00 0.00 ]
+Key: VMOVHPSrm: [ 0.00 0.00 ]
+Key: VMOVLHPSZrr: [ 0.00 0.00 ]
+Key: VMOVLHPSrr: [ 0.00 0.00 ]
+Key: VMOVLPDZ: [ 0.00 0.00 ]
+Key: VMOVLPDmr: [ 0.00 0.00 ]
+Key: VMOVLPDrm: [ 0.00 0.00 ]
+Key: VMOVLPSZ: [ 0.00 0.00 ]
+Key: VMOVLPSmr: [ 0.00 0.00 ]
+Key: VMOVLPSrm: [ 0.00 0.00 ]
+Key: VMOVMSKPDYrr: [ 0.00 0.00 ]
+Key: VMOVMSKPDrr: [ 0.00 0.00 ]
+Key: VMOVMSKPSYrr: [ 0.00 0.00 ]
+Key: VMOVMSKPSrr: [ 0.00 0.00 ]
+Key: VMOVNTDQAYrm: [ 0.00 0.00 ]
+Key: VMOVNTDQAZ: [ 0.00 0.00 ]
+Key: VMOVNTDQAZrm: [ 0.00 0.00 ]
+Key: VMOVNTDQArm: [ 0.00 0.00 ]
+Key: VMOVNTDQYmr: [ 0.00 0.00 ]
+Key: VMOVNTDQZ: [ 0.00 0.00 ]
+Key: VMOVNTDQZmr: [ 0.00 0.00 ]
+Key: VMOVNTDQmr: [ 0.00 0.00 ]
+Key: VMOVNTPDYmr: [ 0.00 0.00 ]
+Key: VMOVNTPDZ: [ 0.00 0.00 ]
+Key: VMOVNTPDZmr: [ 0.00 0.00 ]
+Key: VMOVNTPDmr: [ 0.00 0.00 ]
+Key: VMOVNTPSYmr: [ 0.00 0.00 ]
+Key: VMOVNTPSZ: [ 0.00 0.00 ]
+Key: VMOVNTPSZmr: [ 0.00 0.00 ]
+Key: VMOVNTPSmr: [ 0.00 0.00 ]
+Key: VMOVPDI: [ 0.00 0.00 ]
+Key: VMOVPQI: [ 0.00 0.00 ]
+Key: VMOVPQIto: [ 0.00 0.00 ]
+Key: VMOVQI: [ 0.00 0.00 ]
+Key: VMOVRSBZ: [ 0.00 0.00 ]
+Key: VMOVRSBZm: [ 0.00 0.00 ]
+Key: VMOVRSBZmk: [ 0.00 0.00 ]
+Key: VMOVRSBZmkz: [ 0.00 0.00 ]
+Key: VMOVRSDZ: [ 0.00 0.00 ]
+Key: VMOVRSDZm: [ 0.00 0.00 ]
+Key: VMOVRSDZmk: [ 0.00 0.00 ]
+Key: VMOVRSDZmkz: [ 0.00 0.00 ]
+Key: VMOVRSQZ: [ 0.00 0.00 ]
+Key: VMOVRSQZm: [ 0.00 0.00 ]
+Key: VMOVRSQZmk: [ 0.00 0.00 ]
+Key: VMOVRSQZmkz: [ 0.00 0.00 ]
+Key: VMOVRSWZ: [ 0.00 0.00 ]
+Key: VMOVRSWZm: [ 0.00 0.00 ]
+Key: VMOVRSWZmk: [ 0.00 0.00 ]
+Key: VMOVRSWZmkz: [ 0.00 0.00 ]
+Key: VMOVSDZmr: [ 0.00 0.00 ]
+Key: VMOVSDZmrk: [ 0.00 0.00 ]
+Key: VMOVSDZrm: [ 0.00 0.00 ]
+Key: VMOVSDZrm_alt: [ 0.00 0.00 ]
+Key: VMOVSDZrmk: [ 0.00 0.00 ]
+Key: VMOVSDZrmkz: [ 0.00 0.00 ]
+Key: VMOVSDZrr: [ 0.00 0.00 ]
+Key: VMOVSDZrr_REV: [ 0.00 0.00 ]
+Key: VMOVSDZrrk: [ 0.00 0.00 ]
+Key: VMOVSDZrrk_REV: [ 0.00 0.00 ]
+Key: VMOVSDZrrkz: [ 0.00 0.00 ]
+Key: VMOVSDZrrkz_REV: [ 0.00 0.00 ]
+Key: VMOVSDmr: [ 0.00 0.00 ]
+Key: VMOVSDrm: [ 0.00 0.00 ]
+Key: VMOVSDrm_alt: [ 0.00 0.00 ]
+Key: VMOVSDrr: [ 0.00 0.00 ]
+Key: VMOVSDrr_REV: [ 0.00 0.00 ]
+Key: VMOVSDto: [ 0.00 0.00 ]
+Key: VMOVSH: [ 0.00 0.00 ]
+Key: VMOVSHDUPYrm: [ 0.00 0.00 ]
+Key: VMOVSHDUPYrr: [ 0.00 0.00 ]
+Key: VMOVSHDUPZ: [ 0.00 0.00 ]
+Key: VMOVSHDUPZrm: [ 0.00 0.00 ]
+Key: VMOVSHDUPZrmk: [ 0.00 0.00 ]
+Key: VMOVSHDUPZrmkz: [ 0.00 0.00 ]
+Key: VMOVSHDUPZrr: [ 0.00 0.00 ]
+Key: VMOVSHDUPZrrk: [ 0.00 0.00 ]
+Key: VMOVSHDUPZrrkz: [ 0.00 0.00 ]
+Key: VMOVSHDUPrm: [ 0.00 0.00 ]
+Key: VMOVSHDUPrr: [ 0.00 0.00 ]
+Key: VMOVSHZmr: [ 0.00 0.00 ]
+Key: VMOVSHZmrk: [ 0.00 0.00 ]
+Key: VMOVSHZrm: [ 0.00 0.00 ]
+Key: VMOVSHZrm_alt: [ 0.00 0.00 ]
+Key: VMOVSHZrmk: [ 0.00 0.00 ]
+Key: VMOVSHZrmkz: [ 0.00 0.00 ]
+Key: VMOVSHZrr: [ 0.00 0.00 ]
+Key: VMOVSHZrr_REV: [ 0.00 0.00 ]
+Key: VMOVSHZrrk: [ 0.00 0.00 ]
+Key: VMOVSHZrrk_REV: [ 0.00 0.00 ]
+Key: VMOVSHZrrkz: [ 0.00 0.00 ]
+Key: VMOVSHZrrkz_REV: [ 0.00 0.00 ]
+Key: VMOVSHtoW: [ 0.00 0.00 ]
+Key: VMOVSLDUPYrm: [ 0.00 0.00 ]
+Key: VMOVSLDUPYrr: [ 0.00 0.00 ]
+Key: VMOVSLDUPZ: [ 0.00 0.00 ]
+Key: VMOVSLDUPZrm: [ 0.00 0.00 ]
+Key: VMOVSLDUPZrmk: [ 0.00 0.00 ]
+Key: VMOVSLDUPZrmkz: [ 0.00 0.00 ]
+Key: VMOVSLDUPZrr: [ 0.00 0.00 ]
+Key: VMOVSLDUPZrrk: [ 0.00 0.00 ]
+Key: VMOVSLDUPZrrkz: [ 0.00 0.00 ]
+Key: VMOVSLDUPrm: [ 0.00 0.00 ]
+Key: VMOVSLDUPrr: [ 0.00 0.00 ]
+Key: VMOVSS: [ 0.00 0.00 ]
+Key: VMOVSSZmr: [ 0.00 0.00 ]
+Key: VMOVSSZmrk: [ 0.00 0.00 ]
+Key: VMOVSSZrm: [ 0.00 0.00 ]
+Key: VMOVSSZrm_alt: [ 0.00 0.00 ]
+Key: VMOVSSZrmk: [ 0.00 0.00 ]
+Key: VMOVSSZrmkz: [ 0.00 0.00 ]
+Key: VMOVSSZrr: [ 0.00 0.00 ]
+Key: VMOVSSZrr_REV: [ 0.00 0.00 ]
+Key: VMOVSSZrrk: [ 0.00 0.00 ]
+Key: VMOVSSZrrk_REV: [ 0.00 0.00 ]
+Key: VMOVSSZrrkz: [ 0.00 0.00 ]
+Key: VMOVSSZrrkz_REV: [ 0.00 0.00 ]
+Key: VMOVSSmr: [ 0.00 0.00 ]
+Key: VMOVSSrm: [ 0.00 0.00 ]
+Key: VMOVSSrm_alt: [ 0.00 0.00 ]
+Key: VMOVSSrr: [ 0.00 0.00 ]
+Key: VMOVSSrr_REV: [ 0.00 0.00 ]
+Key: VMOVUPDYmr: [ 0.00 0.00 ]
+Key: VMOVUPDYrm: [ 0.00 0.00 ]
+Key: VMOVUPDYrr: [ 0.00 0.00 ]
+Key: VMOVUPDYrr_REV: [ 0.00 0.00 ]
+Key: VMOVUPDZ: [ 0.00 0.00 ]
+Key: VMOVUPDZmr: [ 0.00 0.00 ]
+Key: VMOVUPDZmrk: [ 0.00 0.00 ]
+Key: VMOVUPDZrm: [ 0.00 0.00 ]
+Key: VMOVUPDZrmk: [ 0.00 0.00 ]
+Key: VMOVUPDZrmkz: [ 0.00 0.00 ]
+Key: VMOVUPDZrr: [ 0.00 0.00 ]
+Key: VMOVUPDZrr_REV: [ 0.00 0.00 ]
+Key: VMOVUPDZrrk: [ 0.00 0.00 ]
+Key: VMOVUPDZrrk_REV: [ 0.00 0.00 ]
+Key: VMOVUPDZrrkz: [ 0.00 0.00 ]
+Key: VMOVUPDZrrkz_REV: [ 0.00 0.00 ]
+Key: VMOVUPDmr: [ 0.00 0.00 ]
+Key: VMOVUPDrm: [ 0.00 0.00 ]
+Key: VMOVUPDrr: [ 0.00 0.00 ]
+Key: VMOVUPDrr_REV: [ 0.00 0.00 ]
+Key: VMOVUPSYmr: [ 0.00 0.00 ]
+Key: VMOVUPSYrm: [ 0.00 0.00 ]
+Key: VMOVUPSYrr: [ 0.00 0.00 ]
+Key: VMOVUPSYrr_REV: [ 0.00 0.00 ]
+Key: VMOVUPSZ: [ 0.00 0.00 ]
+Key: VMOVUPSZmr: [ 0.00 0.00 ]
+Key: VMOVUPSZmrk: [ 0.00 0.00 ]
+Key: VMOVUPSZrm: [ 0.00 0.00 ]
+Key: VMOVUPSZrmk: [ 0.00 0.00 ]
+Key: VMOVUPSZrmkz: [ 0.00 0.00 ]
+Key: VMOVUPSZrr: [ 0.00 0.00 ]
+Key: VMOVUPSZrr_REV: [ 0.00 0.00 ]
+Key: VMOVUPSZrrk: [ 0.00 0.00 ]
+Key: VMOVUPSZrrk_REV: [ 0.00 0.00 ]
+Key: VMOVUPSZrrkz: [ 0.00 0.00 ]
+Key: VMOVUPSZrrkz_REV: [ 0.00 0.00 ]
+Key: VMOVUPSmr: [ 0.00 0.00 ]
+Key: VMOVUPSrm: [ 0.00 0.00 ]
+Key: VMOVUPSrr: [ 0.00 0.00 ]
+Key: VMOVUPSrr_REV: [ 0.00 0.00 ]
+Key: VMOVW: [ 0.00 0.00 ]
+Key: VMOVWmr: [ 0.00 0.00 ]
+Key: VMOVWrm: [ 0.00 0.00 ]
+Key: VMOVZPDILo: [ 0.00 0.00 ]
+Key: VMOVZPQILo: [ 0.00 0.00 ]
+Key: VMOVZPWILo: [ 0.00 0.00 ]
+Key: VMPSADBWYrmi: [ 0.00 0.00 ]
+Key: VMPSADBWYrri: [ 0.00 0.00 ]
+Key: VMPSADBWZ: [ 0.00 0.00 ]
+Key: VMPSADBWZrmi: [ 0.00 0.00 ]
+Key: VMPSADBWZrmik: [ 0.00 0.00 ]
+Key: VMPSADBWZrmikz: [ 0.00 0.00 ]
+Key: VMPSADBWZrri: [ 0.00 0.00 ]
+Key: VMPSADBWZrrik: [ 0.00 0.00 ]
+Key: VMPSADBWZrrikz: [ 0.00 0.00 ]
+Key: VMPSADBWrmi: [ 0.00 0.00 ]
+Key: VMPSADBWrri: [ 0.00 0.00 ]
+Key: VMPTRLDm: [ 0.00 0.00 ]
+Key: VMPTRSTm: [ 0.00 0.00 ]
+Key: VMREAD: [ 0.00 0.00 ]
+Key: VMRESUME: [ 0.00 0.00 ]
+Key: VMRUN: [ 0.00 0.00 ]
+Key: VMSAVE: [ 0.00 0.00 ]
+Key: VMULBF: [ 0.00 0.00 ]
+Key: VMULPDYrm: [ 0.00 0.00 ]
+Key: VMULPDYrr: [ 0.00 0.00 ]
+Key: VMULPDZ: [ 0.00 0.00 ]
+Key: VMULPDZrm: [ 0.00 0.00 ]
+Key: VMULPDZrmb: [ 0.00 0.00 ]
+Key: VMULPDZrmbk: [ 0.00 0.00 ]
+Key: VMULPDZrmbkz: [ 0.00 0.00 ]
+Key: VMULPDZrmk: [ 0.00 0.00 ]
+Key: VMULPDZrmkz: [ 0.00 0.00 ]
+Key: VMULPDZrr: [ 0.00 0.00 ]
+Key: VMULPDZrrb: [ 0.00 0.00 ]
+Key: VMULPDZrrbk: [ 0.00 0.00 ]
+Key: VMULPDZrrbkz: [ 0.00 0.00 ]
+Key: VMULPDZrrk: [ 0.00 0.00 ]
+Key: VMULPDZrrkz: [ 0.00 0.00 ]
+Key: VMULPDrm: [ 0.00 0.00 ]
+Key: VMULPDrr: [ 0.00 0.00 ]
+Key: VMULPHZ: [ 0.00 0.00 ]
+Key: VMULPHZrm: [ 0.00 0.00 ]
+Key: VMULPHZrmb: [ 0.00 0.00 ]
+Key: VMULPHZrmbk: [ 0.00 0.00 ]
+Key: VMULPHZrmbkz: [ 0.00 0.00 ]
+Key: VMULPHZrmk: [ 0.00 0.00 ]
+Key: VMULPHZrmkz: [ 0.00 0.00 ]
+Key: VMULPHZrr: [ 0.00 0.00 ]
+Key: VMULPHZrrb: [ 0.00 0.00 ]
+Key: VMULPHZrrbk: [ 0.00 0.00 ]
+Key: VMULPHZrrbkz: [ 0.00 0.00 ]
+Key: VMULPHZrrk: [ 0.00 0.00 ]
+Key: VMULPHZrrkz: [ 0.00 0.00 ]
+Key: VMULPSYrm: [ 0.00 0.00 ]
+Key: VMULPSYrr: [ 0.00 0.00 ]
+Key: VMULPSZ: [ 0.00 0.00 ]
+Key: VMULPSZrm: [ 0.00 0.00 ]
+Key: VMULPSZrmb: [ 0.00 0.00 ]
+Key: VMULPSZrmbk: [ 0.00 0.00 ]
+Key: VMULPSZrmbkz: [ 0.00 0.00 ]
+Key: VMULPSZrmk: [ 0.00 0.00 ]
+Key: VMULPSZrmkz: [ 0.00 0.00 ]
+Key: VMULPSZrr: [ 0.00 0.00 ]
+Key: VMULPSZrrb: [ 0.00 0.00 ]
+Key: VMULPSZrrbk: [ 0.00 0.00 ]
+Key: VMULPSZrrbkz: [ 0.00 0.00 ]
+Key: VMULPSZrrk: [ 0.00 0.00 ]
+Key: VMULPSZrrkz: [ 0.00 0.00 ]
+Key: VMULPSrm: [ 0.00 0.00 ]
+Key: VMULPSrr: [ 0.00 0.00 ]
+Key: VMULSDZrm: [ 0.00 0.00 ]
+Key: VMULSDZrm_Int: [ 0.00 0.00 ]
+Key: VMULSDZrmk_Int: [ 0.00 0.00 ]
+Key: VMULSDZrmkz_Int: [ 0.00 0.00 ]
+Key: VMULSDZrr: [ 0.00 0.00 ]
+Key: VMULSDZrr_Int: [ 0.00 0.00 ]
+Key: VMULSDZrrb_Int: [ 0.00 0.00 ]
+Key: VMULSDZrrbk_Int: [ 0.00 0.00 ]
+Key: VMULSDZrrbkz_Int: [ 0.00 0.00 ]
+Key: VMULSDZrrk_Int: [ 0.00 0.00 ]
+Key: VMULSDZrrkz_Int: [ 0.00 0.00 ]
+Key: VMULSDrm: [ 0.00 0.00 ]
+Key: VMULSDrm_Int: [ 0.00 0.00 ]
+Key: VMULSDrr: [ 0.00 0.00 ]
+Key: VMULSDrr_Int: [ 0.00 0.00 ]
+Key: VMULSHZrm: [ 0.00 0.00 ]
+Key: VMULSHZrm_Int: [ 0.00 0.00 ]
+Key: VMULSHZrmk_Int: [ 0.00 0.00 ]
+Key: VMULSHZrmkz_Int: [ 0.00 0.00 ]
+Key: VMULSHZrr: [ 0.00 0.00 ]
+Key: VMULSHZrr_Int: [ 0.00 0.00 ]
+Key: VMULSHZrrb_Int: [ 0.00 0.00 ]
+Key: VMULSHZrrbk_Int: [ 0.00 0.00 ]
+Key: VMULSHZrrbkz_Int: [ 0.00 0.00 ]
+Key: VMULSHZrrk_Int: [ 0.00 0.00 ]
+Key: VMULSHZrrkz_Int: [ 0.00 0.00 ]
+Key: VMULSSZrm: [ 0.00 0.00 ]
+Key: VMULSSZrm_Int: [ 0.00 0.00 ]
+Key: VMULSSZrmk_Int: [ 0.00 0.00 ]
+Key: VMULSSZrmkz_Int: [ 0.00 0.00 ]
+Key: VMULSSZrr: [ 0.00 0.00 ]
+Key: VMULSSZrr_Int: [ 0.00 0.00 ]
+Key: VMULSSZrrb_Int: [ 0.00 0.00 ]
+Key: VMULSSZrrbk_Int: [ 0.00 0.00 ]
+Key: VMULSSZrrbkz_Int: [ 0.00 0.00 ]
+Key: VMULSSZrrk_Int: [ 0.00 0.00 ]
+Key: VMULSSZrrkz_Int: [ 0.00 0.00 ]
+Key: VMULSSrm: [ 0.00 0.00 ]
+Key: VMULSSrm_Int: [ 0.00 0.00 ]
+Key: VMULSSrr: [ 0.00 0.00 ]
+Key: VMULSSrr_Int: [ 0.00 0.00 ]
+Key: VMWRITE: [ 0.00 0.00 ]
+Key: VMXOFF: [ 0.00 0.00 ]
+Key: VMXON: [ 0.00 0.00 ]
+Key: VORPDYrm: [ 0.00 0.00 ]
+Key: VORPDYrr: [ 0.00 0.00 ]
+Key: VORPDZ: [ 0.00 0.00 ]
+Key: VORPDZrm: [ 0.00 0.00 ]
+Key: VORPDZrmb: [ 0.00 0.00 ]
+Key: VORPDZrmbk: [ 0.00 0.00 ]
+Key: VORPDZrmbkz: [ 0.00 0.00 ]
+Key: VORPDZrmk: [ 0.00 0.00 ]
+Key: VORPDZrmkz: [ 0.00 0.00 ]
+Key: VORPDZrr: [ 0.00 0.00 ]
+Key: VORPDZrrk: [ 0.00 0.00 ]
+Key: VORPDZrrkz: [ 0.00 0.00 ]
+Key: VORPDrm: [ 0.00 0.00 ]
+Key: VORPDrr: [ 0.00 0.00 ]
+Key: VORPSYrm: [ 0.00 0.00 ]
+Key: VORPSYrr: [ 0.00 0.00 ]
+Key: VORPSZ: [ 0.00 0.00 ]
+Key: VORPSZrm: [ 0.00 0.00 ]
+Key: VORPSZrmb: [ 0.00 0.00 ]
+Key: VORPSZrmbk: [ 0.00 0.00 ]
+Key: VORPSZrmbkz: [ 0.00 0.00 ]
+Key: VORPSZrmk: [ 0.00 0.00 ]
+Key: VORPSZrmkz: [ 0.00 0.00 ]
+Key: VORPSZrr: [ 0.00 0.00 ]
+Key: VORPSZrrk: [ 0.00 0.00 ]
+Key: VORPSZrrkz: [ 0.00 0.00 ]
+Key: VORPSrm: [ 0.00 0.00 ]
+Key: VORPSrr: [ 0.00 0.00 ]
+Key: VP: [ 0.00 0.00 ]
+Key: VPABSBYrm: [ 0.00 0.00 ]
+Key: VPABSBYrr: [ 0.00 0.00 ]
+Key: VPABSBZ: [ 0.00 0.00 ]
+Key: VPABSBZrm: [ 0.00 0.00 ]
+Key: VPABSBZrmk: [ 0.00 0.00 ]
+Key: VPABSBZrmkz: [ 0.00 0.00 ]
+Key: VPABSBZrr: [ 0.00 0.00 ]
+Key: VPABSBZrrk: [ 0.00 0.00 ]
+Key: VPABSBZrrkz: [ 0.00 0.00 ]
+Key: VPABSBrm: [ 0.00 0.00 ]
+Key: VPABSBrr: [ 0.00 0.00 ]
+Key: VPABSDYrm: [ 0.00 0.00 ]
+Key: VPABSDYrr: [ 0.00 0.00 ]
+Key: VPABSDZ: [ 0.00 0.00 ]
+Key: VPABSDZrm: [ 0.00 0.00 ]
+Key: VPABSDZrmb: [ 0.00 0.00 ]
+Key: VPABSDZrmbk: [ 0.00 0.00 ]
+Key: VPABSDZrmbkz: [ 0.00 0.00 ]
+Key: VPABSDZrmk: [ 0.00 0.00 ]
+Key: VPABSDZrmkz: [ 0.00 0.00 ]
+Key: VPABSDZrr: [ 0.00 0.00 ]
+Key: VPABSDZrrk: [ 0.00 0.00 ]
+Key: VPABSDZrrkz: [ 0.00 0.00 ]
+Key: VPABSDrm: [ 0.00 0.00 ]
+Key: VPABSDrr: [ 0.00 0.00 ]
+Key: VPABSQZ: [ 0.00 0.00 ]
+Key: VPABSQZrm: [ 0.00 0.00 ]
+Key: VPABSQZrmb: [ 0.00 0.00 ]
+Key: VPABSQZrmbk: [ 0.00 0.00 ]
+Key: VPABSQZrmbkz: [ 0.00 0.00 ]
+Key: VPABSQZrmk: [ 0.00 0.00 ]
+Key: VPABSQZrmkz: [ 0.00 0.00 ]
+Key: VPABSQZrr: [ 0.00 0.00 ]
+Key: VPABSQZrrk: [ 0.00 0.00 ]
+Key: VPABSQZrrkz: [ 0.00 0.00 ]
+Key: VPABSWYrm: [ 0.00 0.00 ]
+Key: VPABSWYrr: [ 0.00 0.00 ]
+Key: VPABSWZ: [ 0.00 0.00 ]
+Key: VPABSWZrm: [ 0.00 0.00 ]
+Key: VPABSWZrmk: [ 0.00 0.00 ]
+Key: VPABSWZrmkz: [ 0.00 0.00 ]
+Key: VPABSWZrr: [ 0.00 0.00 ]
+Key: VPABSWZrrk: [ 0.00 0.00 ]
+Key: VPABSWZrrkz: [ 0.00 0.00 ]
+Key: VPABSWrm: [ 0.00 0.00 ]
+Key: VPABSWrr: [ 0.00 0.00 ]
+Key: VPACKSSDWYrm: [ 0.00 0.00 ]
+Key: VPACKSSDWYrr: [ 0.00 0.00 ]
+Key: VPACKSSDWZ: [ 0.00 0.00 ]
+Key: VPACKSSDWZrm: [ 0.00 0.00 ]
+Key: VPACKSSDWZrmb: [ 0.00 0.00 ]
+Key: VPACKSSDWZrmbk: [ 0.00 0.00 ]
+Key: VPACKSSDWZrmbkz: [ 0.00 0.00 ]
+Key: VPACKSSDWZrmk: [ 0.00 0.00 ]
+Key: VPACKSSDWZrmkz: [ 0.00 0.00 ]
+Key: VPACKSSDWZrr: [ 0.00 0.00 ]
+Key: VPACKSSDWZrrk: [ 0.00 0.00 ]
+Key: VPACKSSDWZrrkz: [ 0.00 0.00 ]
+Key: VPACKSSDWrm: [ 0.00 0.00 ]
+Key: VPACKSSDWrr: [ 0.00 0.00 ]
+Key: VPACKSSWBYrm: [ 0.00 0.00 ]
+Key: VPACKSSWBYrr: [ 0.00 0.00 ]
+Key: VPACKSSWBZ: [ 0.00 0.00 ]
+Key: VPACKSSWBZrm: [ 0.00 0.00 ]
+Key: VPACKSSWBZrmk: [ 0.00 0.00 ]
+Key: VPACKSSWBZrmkz: [ 0.00 0.00 ]
+Key: VPACKSSWBZrr: [ 0.00 0.00 ]
+Key: VPACKSSWBZrrk: [ 0.00 0.00 ]
+Key: VPACKSSWBZrrkz: [ 0.00 0.00 ]
+Key: VPACKSSWBrm: [ 0.00 0.00 ]
+Key: VPACKSSWBrr: [ 0.00 0.00 ]
+Key: VPACKUSDWYrm: [ 0.00 0.00 ]
+Key: VPACKUSDWYrr: [ 0.00 0.00 ]
+Key: VPACKUSDWZ: [ 0.00 0.00 ]
+Key: VPACKUSDWZrm: [ 0.00 0.00 ]
+Key: VPACKUSDWZrmb: [ 0.00 0.00 ]
+Key: VPACKUSDWZrmbk: [ 0.00 0.00 ]
+Key: VPACKUSDWZrmbkz: [ 0.00 0.00 ]
+Key: VPACKUSDWZrmk: [ 0.00 0.00 ]
+Key: VPACKUSDWZrmkz: [ 0.00 0.00 ]
+Key: VPACKUSDWZrr: [ 0.00 0.00 ]
+Key: VPACKUSDWZrrk: [ 0.00 0.00 ]
+Key: VPACKUSDWZrrkz: [ 0.00 0.00 ]
+Key: VPACKUSDWrm: [ 0.00 0.00 ]
+Key: VPACKUSDWrr: [ 0.00 0.00 ]
+Key: VPACKUSWBYrm: [ 0.00 0.00 ]
+Key: VPACKUSWBYrr: [ 0.00 0.00 ]
+Key: VPACKUSWBZ: [ 0.00 0.00 ]
+Key: VPACKUSWBZrm: [ 0.00 0.00 ]
+Key: VPACKUSWBZrmk: [ 0.00 0.00 ]
+Key: VPACKUSWBZrmkz: [ 0.00 0.00 ]
+Key: VPACKUSWBZrr: [ 0.00 0.00 ]
+Key: VPACKUSWBZrrk: [ 0.00 0.00 ]
+Key: VPACKUSWBZrrkz: [ 0.00 0.00 ]
+Key: VPACKUSWBrm: [ 0.00 0.00 ]
+Key: VPACKUSWBrr: [ 0.00 0.00 ]
+Key: VPADDBYrm: [ 0.00 0.00 ]
+Key: VPADDBYrr: [ 0.00 0.00 ]
+Key: VPADDBZ: [ 0.00 0.00 ]
+Key: VPADDBZrm: [ 0.00 0.00 ]
+Key: VPADDBZrmk: [ 0.00 0.00 ]
+Key: VPADDBZrmkz: [ 0.00 0.00 ]
+Key: VPADDBZrr: [ 0.00 0.00 ]
+Key: VPADDBZrrk: [ 0.00 0.00 ]
+Key: VPADDBZrrkz: [ 0.00 0.00 ]
+Key: VPADDBrm: [ 0.00 0.00 ]
+Key: VPADDBrr: [ 0.00 0.00 ]
+Key: VPADDDYrm: [ 0.00 0.00 ]
+Key: VPADDDYrr: [ 0.00 0.00 ]
+Key: VPADDDZ: [ 0.00 0.00 ]
+Key: VPADDDZrm: [ 0.00 0.00 ]
+Key: VPADDDZrmb: [ 0.00 0.00 ]
+Key: VPADDDZrmbk: [ 0.00 0.00 ]
+Key: VPADDDZrmbkz: [ 0.00 0.00 ]
+Key: VPADDDZrmk: [ 0.00 0.00 ]
+Key: VPADDDZrmkz: [ 0.00 0.00 ]
+Key: VPADDDZrr: [ 0.00 0.00 ]
+Key: VPADDDZrrk: [ 0.00 0.00 ]
+Key: VPADDDZrrkz: [ 0.00 0.00 ]
+Key: VPADDDrm: [ 0.00 0.00 ]
+Key: VPADDDrr: [ 0.00 0.00 ]
+Key: VPADDQYrm: [ 0.00 0.00 ]
+Key: VPADDQYrr: [ 0.00 0.00 ]
+Key: VPADDQZ: [ 0.00 0.00 ]
+Key: VPADDQZrm: [ 0.00 0.00 ]
+Key: VPADDQZrmb: [ 0.00 0.00 ]
+Key: VPADDQZrmbk: [ 0.00 0.00 ]
+Key: VPADDQZrmbkz: [ 0.00 0.00 ]
+Key: VPADDQZrmk: [ 0.00 0.00 ]
+Key: VPADDQZrmkz: [ 0.00 0.00 ]
+Key: VPADDQZrr: [ 0.00 0.00 ]
+Key: VPADDQZrrk: [ 0.00 0.00 ]
+Key: VPADDQZrrkz: [ 0.00 0.00 ]
+Key: VPADDQrm: [ 0.00 0.00 ]
+Key: VPADDQrr: [ 0.00 0.00 ]
+Key: VPADDSBYrm: [ 0.00 0.00 ]
+Key: VPADDSBYrr: [ 0.00 0.00 ]
+Key: VPADDSBZ: [ 0.00 0.00 ]
+Key: VPADDSBZrm: [ 0.00 0.00 ]
+Key: VPADDSBZrmk: [ 0.00 0.00 ]
+Key: VPADDSBZrmkz: [ 0.00 0.00 ]
+Key: VPADDSBZrr: [ 0.00 0.00 ]
+Key: VPADDSBZrrk: [ 0.00 0.00 ]
+Key: VPADDSBZrrkz: [ 0.00 0.00 ]
+Key: VPADDSBrm: [ 0.00 0.00 ]
+Key: VPADDSBrr: [ 0.00 0.00 ]
+Key: VPADDSWYrm: [ 0.00 0.00 ]
+Key: VPADDSWYrr: [ 0.00 0.00 ]
+Key: VPADDSWZ: [ 0.00 0.00 ]
+Key: VPADDSWZrm: [ 0.00 0.00 ]
+Key: VPADDSWZrmk: [ 0.00 0.00 ]
+Key: VPADDSWZrmkz: [ 0.00 0.00 ]
+Key: VPADDSWZrr: [ 0.00 0.00 ]
+Key: VPADDSWZrrk: [ 0.00 0.00 ]
+Key: VPADDSWZrrkz: [ 0.00 0.00 ]
+Key: VPADDSWrm: [ 0.00 0.00 ]
+Key: VPADDSWrr: [ 0.00 0.00 ]
+Key: VPADDUSBYrm: [ 0.00 0.00 ]
+Key: VPADDUSBYrr: [ 0.00 0.00 ]
+Key: VPADDUSBZ: [ 0.00 0.00 ]
+Key: VPADDUSBZrm: [ 0.00 0.00 ]
+Key: VPADDUSBZrmk: [ 0.00 0.00 ]
+Key: VPADDUSBZrmkz: [ 0.00 0.00 ]
+Key: VPADDUSBZrr: [ 0.00 0.00 ]
+Key: VPADDUSBZrrk: [ 0.00 0.00 ]
+Key: VPADDUSBZrrkz: [ 0.00 0.00 ]
+Key: VPADDUSBrm: [ 0.00 0.00 ]
+Key: VPADDUSBrr: [ 0.00 0.00 ]
+Key: VPADDUSWYrm: [ 0.00 0.00 ]
+Key: VPADDUSWYrr: [ 0.00 0.00 ]
+Key: VPADDUSWZ: [ 0.00 0.00 ]
+Key: VPADDUSWZrm: [ 0.00 0.00 ]
+Key: VPADDUSWZrmk: [ 0.00 0.00 ]
+Key: VPADDUSWZrmkz: [ 0.00 0.00 ]
+Key: VPADDUSWZrr: [ 0.00 0.00 ]
+Key: VPADDUSWZrrk: [ 0.00 0.00 ]
+Key: VPADDUSWZrrkz: [ 0.00 0.00 ]
+Key: VPADDUSWrm: [ 0.00 0.00 ]
+Key: VPADDUSWrr: [ 0.00 0.00 ]
+Key: VPADDWYrm: [ 0.00 0.00 ]
+Key: VPADDWYrr: [ 0.00 0.00 ]
+Key: VPADDWZ: [ 0.00 0.00 ]
+Key: VPADDWZrm: [ 0.00 0.00 ]
+Key: VPADDWZrmk: [ 0.00 0.00 ]
+Key: VPADDWZrmkz: [ 0.00 0.00 ]
+Key: VPADDWZrr: [ 0.00 0.00 ]
+Key: VPADDWZrrk: [ 0.00 0.00 ]
+Key: VPADDWZrrkz: [ 0.00 0.00 ]
+Key: VPADDWrm: [ 0.00 0.00 ]
+Key: VPADDWrr: [ 0.00 0.00 ]
+Key: VPALIGNRYrmi: [ 0.00 0.00 ]
+Key: VPALIGNRYrri: [ 0.00 0.00 ]
+Key: VPALIGNRZ: [ 0.00 0.00 ]
+Key: VPALIGNRZrmi: [ 0.00 0.00 ]
+Key: VPALIGNRZrmik: [ 0.00 0.00 ]
+Key: VPALIGNRZrmikz: [ 0.00 0.00 ]
+Key: VPALIGNRZrri: [ 0.00 0.00 ]
+Key: VPALIGNRZrrik: [ 0.00 0.00 ]
+Key: VPALIGNRZrrikz: [ 0.00 0.00 ]
+Key: VPALIGNRrmi: [ 0.00 0.00 ]
+Key: VPALIGNRrri: [ 0.00 0.00 ]
+Key: VPANDDZ: [ 0.00 0.00 ]
+Key: VPANDDZrm: [ 0.00 0.00 ]
+Key: VPANDDZrmb: [ 0.00 0.00 ]
+Key: VPANDDZrmbk: [ 0.00 0.00 ]
+Key: VPANDDZrmbkz: [ 0.00 0.00 ]
+Key: VPANDDZrmk: [ 0.00 0.00 ]
+Key: VPANDDZrmkz: [ 0.00 0.00 ]
+Key: VPANDDZrr: [ 0.00 0.00 ]
+Key: VPANDDZrrk: [ 0.00 0.00 ]
+Key: VPANDDZrrkz: [ 0.00 0.00 ]
+Key: VPANDNDZ: [ 0.00 0.00 ]
+Key: VPANDNDZrm: [ 0.00 0.00 ]
+Key: VPANDNDZrmb: [ 0.00 0.00 ]
+Key: VPANDNDZrmbk: [ 0.00 0.00 ]
+Key: VPANDNDZrmbkz: [ 0.00 0.00 ]
+Key: VPANDNDZrmk: [ 0.00 0.00 ]
+Key: VPANDNDZrmkz: [ 0.00 0.00 ]
+Key: VPANDNDZrr: [ 0.00 0.00 ]
+Key: VPANDNDZrrk: [ 0.00 0.00 ]
+Key: VPANDNDZrrkz: [ 0.00 0.00 ]
+Key: VPANDNQZ: [ 0.00 0.00 ]
+Key: VPANDNQZrm: [ 0.00 0.00 ]
+Key: VPANDNQZrmb: [ 0.00 0.00 ]
+Key: VPANDNQZrmbk: [ 0.00 0.00 ]
+Key: VPANDNQZrmbkz: [ 0.00 0.00 ]
+Key: VPANDNQZrmk: [ 0.00 0.00 ]
+Key: VPANDNQZrmkz: [ 0.00 0.00 ]
+Key: VPANDNQZrr: [ 0.00 0.00 ]
+Key: VPANDNQZrrk: [ 0.00 0.00 ]
+Key: VPANDNQZrrkz: [ 0.00 0.00 ]
+Key: VPANDNYrm: [ 0.00 0.00 ]
+Key: VPANDNYrr: [ 0.00 0.00 ]
+Key: VPANDNrm: [ 0.00 0.00 ]
+Key: VPANDNrr: [ 0.00 0.00 ]
+Key: VPANDQZ: [ 0.00 0.00 ]
+Key: VPANDQZrm: [ 0.00 0.00 ]
+Key: VPANDQZrmb: [ 0.00 0.00 ]
+Key: VPANDQZrmbk: [ 0.00 0.00 ]
+Key: VPANDQZrmbkz: [ 0.00 0.00 ]
+Key: VPANDQZrmk: [ 0.00 0.00 ]
+Key: VPANDQZrmkz: [ 0.00 0.00 ]
+Key: VPANDQZrr: [ 0.00 0.00 ]
+Key: VPANDQZrrk: [ 0.00 0.00 ]
+Key: VPANDQZrrkz: [ 0.00 0.00 ]
+Key: VPANDYrm: [ 0.00 0.00 ]
+Key: VPANDYrr: [ 0.00 0.00 ]
+Key: VPANDrm: [ 0.00 0.00 ]
+Key: VPANDrr: [ 0.00 0.00 ]
+Key: VPAVGBYrm: [ 0.00 0.00 ]
+Key: VPAVGBYrr: [ 0.00 0.00 ]
+Key: VPAVGBZ: [ 0.00 0.00 ]
+Key: VPAVGBZrm: [ 0.00 0.00 ]
+Key: VPAVGBZrmk: [ 0.00 0.00 ]
+Key: VPAVGBZrmkz: [ 0.00 0.00 ]
+Key: VPAVGBZrr: [ 0.00 0.00 ]
+Key: VPAVGBZrrk: [ 0.00 0.00 ]
+Key: VPAVGBZrrkz: [ 0.00 0.00 ]
+Key: VPAVGBrm: [ 0.00 0.00 ]
+Key: VPAVGBrr: [ 0.00 0.00 ]
+Key: VPAVGWYrm: [ 0.00 0.00 ]
+Key: VPAVGWYrr: [ 0.00 0.00 ]
+Key: VPAVGWZ: [ 0.00 0.00 ]
+Key: VPAVGWZrm: [ 0.00 0.00 ]
+Key: VPAVGWZrmk: [ 0.00 0.00 ]
+Key: VPAVGWZrmkz: [ 0.00 0.00 ]
+Key: VPAVGWZrr: [ 0.00 0.00 ]
+Key: VPAVGWZrrk: [ 0.00 0.00 ]
+Key: VPAVGWZrrkz: [ 0.00 0.00 ]
+Key: VPAVGWrm: [ 0.00 0.00 ]
+Key: VPAVGWrr: [ 0.00 0.00 ]
+Key: VPBLENDDYrmi: [ 0.00 0.00 ]
+Key: VPBLENDDYrri: [ 0.00 0.00 ]
+Key: VPBLENDDrmi: [ 0.00 0.00 ]
+Key: VPBLENDDrri: [ 0.00 0.00 ]
+Key: VPBLENDMBZ: [ 0.00 0.00 ]
+Key: VPBLENDMBZrm: [ 0.00 0.00 ]
+Key: VPBLENDMBZrmk: [ 0.00 0.00 ]
+Key: VPBLENDMBZrmkz: [ 0.00 0.00 ]
+Key: VPBLENDMBZrr: [ 0.00 0.00 ]
+Key: VPBLENDMBZrrk: [ 0.00 0.00 ]
+Key: VPBLENDMBZrrkz: [ 0.00 0.00 ]
+Key: VPBLENDMDZ: [ 0.00 0.00 ]
+Key: VPBLENDMDZrm: [ 0.00 0.00 ]
+Key: VPBLENDMDZrmb: [ 0.00 0.00 ]
+Key: VPBLENDMDZrmbk: [ 0.00 0.00 ]
+Key: VPBLENDMDZrmbkz: [ 0.00 0.00 ]
+Key: VPBLENDMDZrmk: [ 0.00 0.00 ]
+Key: VPBLENDMDZrmkz: [ 0.00 0.00 ]
+Key: VPBLENDMDZrr: [ 0.00 0.00 ]
+Key: VPBLENDMDZrrk: [ 0.00 0.00 ]
+Key: VPBLENDMDZrrkz: [ 0.00 0.00 ]
+Key: VPBLENDMQZ: [ 0.00 0.00 ]
+Key: VPBLENDMQZrm: [ 0.00 0.00 ]
+Key: VPBLENDMQZrmb: [ 0.00 0.00 ]
+Key: VPBLENDMQZrmbk: [ 0.00 0.00 ]
+Key: VPBLENDMQZrmbkz: [ 0.00 0.00 ]
+Key: VPBLENDMQZrmk: [ 0.00 0.00 ]
+Key: VPBLENDMQZrmkz: [ 0.00 0.00 ]
+Key: VPBLENDMQZrr: [ 0.00 0.00 ]
+Key: VPBLENDMQZrrk: [ 0.00 0.00 ]
+Key: VPBLENDMQZrrkz: [ 0.00 0.00 ]
+Key: VPBLENDMWZ: [ 0.00 0.00 ]
+Key: VPBLENDMWZrm: [ 0.00 0.00 ]
+Key: VPBLENDMWZrmk: [ 0.00 0.00 ]
+Key: VPBLENDMWZrmkz: [ 0.00 0.00 ]
+Key: VPBLENDMWZrr: [ 0.00 0.00 ]
+Key: VPBLENDMWZrrk: [ 0.00 0.00 ]
+Key: VPBLENDMWZrrkz: [ 0.00 0.00 ]
+Key: VPBLENDVBYrmr: [ 0.00 0.00 ]
+Key: VPBLENDVBYrrr: [ 0.00 0.00 ]
+Key: VPBLENDVBrmr: [ 0.00 0.00 ]
+Key: VPBLENDVBrrr: [ 0.00 0.00 ]
+Key: VPBLENDWYrmi: [ 0.00 0.00 ]
+Key: VPBLENDWYrri: [ 0.00 0.00 ]
+Key: VPBLENDWrmi: [ 0.00 0.00 ]
+Key: VPBLENDWrri: [ 0.00 0.00 ]
+Key: VPBROADCASTBYrm: [ 0.00 0.00 ]
+Key: VPBROADCASTBYrr: [ 0.00 0.00 ]
+Key: VPBROADCASTBZ: [ 0.00 0.00 ]
+Key: VPBROADCASTBZrm: [ 0.00 0.00 ]
+Key: VPBROADCASTBZrmk: [ 0.00 0.00 ]
+Key: VPBROADCASTBZrmkz: [ 0.00 0.00 ]
+Key: VPBROADCASTBZrr: [ 0.00 0.00 ]
+Key: VPBROADCASTBZrrk: [ 0.00 0.00 ]
+Key: VPBROADCASTBZrrkz: [ 0.00 0.00 ]
+Key: VPBROADCASTBrZ: [ 0.00 0.00 ]
+Key: VPBROADCASTBrZrr: [ 0.00 0.00 ]
+Key: VPBROADCASTBrZrrk: [ 0.00 0.00 ]
+Key: VPBROADCASTBrZrrkz: [ 0.00 0.00 ]
+Key: VPBROADCASTBrm: [ 0.00 0.00 ]
+Key: VPBROADCASTBrr: [ 0.00 0.00 ]
+Key: VPBROADCASTDYrm: [ 0.00 0.00 ]
+Key: VPBROADCASTDYrr: [ 0.00 0.00 ]
+Key: VPBROADCASTDZ: [ 0.00 0.00 ]
+Key: VPBROADCASTDZrm: [ 0.00 0.00 ]
+Key: VPBROADCASTDZrmk: [ 0.00 0.00 ]
+Key: VPBROADCASTDZrmkz: [ 0.00 0.00 ]
+Key: VPBROADCASTDZrr: [ 0.00 0.00 ]
+Key: VPBROADCASTDZrrk: [ 0.00 0.00 ]
+Key: VPBROADCASTDZrrkz: [ 0.00 0.00 ]
+Key: VPBROADCASTDrZ: [ 0.00 0.00 ]
+Key: VPBROADCASTDrZrr: [ 0.00 0.00 ]
+Key: VPBROADCASTDrZrrk: [ 0.00 0.00 ]
+Key: VPBROADCASTDrZrrkz: [ 0.00 0.00 ]
+Key: VPBROADCASTDrm: [ 0.00 0.00 ]
+Key: VPBROADCASTDrr: [ 0.00 0.00 ]
+Key: VPBROADCASTMB: [ 0.00 0.00 ]
+Key: VPBROADCASTMW: [ 0.00 0.00 ]
+Key: VPBROADCASTQYrm: [ 0.00 0.00 ]
+Key: VPBROADCASTQYrr: [ 0.00 0.00 ]
+Key: VPBROADCASTQZ: [ 0.00 0.00 ]
+Key: VPBROADCASTQZrm: [ 0.00 0.00 ]
+Key: VPBROADCASTQZrmk: [ 0.00 0.00 ]
+Key: VPBROADCASTQZrmkz: [ 0.00 0.00 ]
+Key: VPBROADCASTQZrr: [ 0.00 0.00 ]
+Key: VPBROADCASTQZrrk: [ 0.00 0.00 ]
+Key: VPBROADCASTQZrrkz: [ 0.00 0.00 ]
+Key: VPBROADCASTQrZ: [ 0.00 0.00 ]
+Key: VPBROADCASTQrZrr: [ 0.00 0.00 ]
+Key: VPBROADCASTQrZrrk: [ 0.00 0.00 ]
+Key: VPBROADCASTQrZrrkz: [ 0.00 0.00 ]
+Key: VPBROADCASTQrm: [ 0.00 0.00 ]
+Key: VPBROADCASTQrr: [ 0.00 0.00 ]
+Key: VPBROADCASTWYrm: [ 0.00 0.00 ]
+Key: VPBROADCASTWYrr: [ 0.00 0.00 ]
+Key: VPBROADCASTWZ: [ 0.00 0.00 ]
+Key: VPBROADCASTWZrm: [ 0.00 0.00 ]
+Key: VPBROADCASTWZrmk: [ 0.00 0.00 ]
+Key: VPBROADCASTWZrmkz: [ 0.00 0.00 ]
+Key: VPBROADCASTWZrr: [ 0.00 0.00 ]
+Key: VPBROADCASTWZrrk: [ 0.00 0.00 ]
+Key: VPBROADCASTWZrrkz: [ 0.00 0.00 ]
+Key: VPBROADCASTWrZ: [ 0.00 0.00 ]
+Key: VPBROADCASTWrZrr: [ 0.00 0.00 ]
+Key: VPBROADCASTWrZrrk: [ 0.00 0.00 ]
+Key: VPBROADCASTWrZrrkz: [ 0.00 0.00 ]
+Key: VPBROADCASTWrm: [ 0.00 0.00 ]
+Key: VPBROADCASTWrr: [ 0.00 0.00 ]
+Key: VPCLMULQDQYrmi: [ 0.00 0.00 ]
+Key: VPCLMULQDQYrri: [ 0.00 0.00 ]
+Key: VPCLMULQDQZ: [ 0.00 0.00 ]
+Key: VPCLMULQDQZrmi: [ 0.00 0.00 ]
+Key: VPCLMULQDQZrri: [ 0.00 0.00 ]
+Key: VPCLMULQDQrmi: [ 0.00 0.00 ]
+Key: VPCLMULQDQrri: [ 0.00 0.00 ]
+Key: VPCMOVYrmr: [ 0.00 0.00 ]
+Key: VPCMOVYrrm: [ 0.00 0.00 ]
+Key: VPCMOVYrrr: [ 0.00 0.00 ]
+Key: VPCMOVYrrr_REV: [ 0.00 0.00 ]
+Key: VPCMOVrmr: [ 0.00 0.00 ]
+Key: VPCMOVrrm: [ 0.00 0.00 ]
+Key: VPCMOVrrr: [ 0.00 0.00 ]
+Key: VPCMOVrrr_REV: [ 0.00 0.00 ]
+Key: VPCMPBZ: [ 0.00 0.00 ]
+Key: VPCMPBZrmi: [ 0.00 0.00 ]
+Key: VPCMPBZrmik: [ 0.00 0.00 ]
+Key: VPCMPBZrri: [ 0.00 0.00 ]
+Key: VPCMPBZrrik: [ 0.00 0.00 ]
+Key: VPCMPDZ: [ 0.00 0.00 ]
+Key: VPCMPDZrmbi: [ 0.00 0.00 ]
+Key: VPCMPDZrmbik: [ 0.00 0.00 ]
+Key: VPCMPDZrmi: [ 0.00 0.00 ]
+Key: VPCMPDZrmik: [ 0.00 0.00 ]
+Key: VPCMPDZrri: [ 0.00 0.00 ]
+Key: VPCMPDZrrik: [ 0.00 0.00 ]
+Key: VPCMPEQBYrm: [ 0.00 0.00 ]
+Key: VPCMPEQBYrr: [ 0.00 0.00 ]
+Key: VPCMPEQBZ: [ 0.00 0.00 ]
+Key: VPCMPEQBZrm: [ 0.00 0.00 ]
+Key: VPCMPEQBZrmk: [ 0.00 0.00 ]
+Key: VPCMPEQBZrr: [ 0.00 0.00 ]
+Key: VPCMPEQBZrrk: [ 0.00 0.00 ]
+Key: VPCMPEQBrm: [ 0.00 0.00 ]
+Key: VPCMPEQBrr: [ 0.00 0.00 ]
+Key: VPCMPEQDYrm: [ 0.00 0.00 ]
+Key: VPCMPEQDYrr: [ 0.00 0.00 ]
+Key: VPCMPEQDZ: [ 0.00 0.00 ]
+Key: VPCMPEQDZrm: [ 0.00 0.00 ]
+Key: VPCMPEQDZrmb: [ 0.00 0.00 ]
+Key: VPCMPEQDZrmbk: [ 0.00 0.00 ]
+Key: VPCMPEQDZrmk: [ 0.00 0.00 ]
+Key: VPCMPEQDZrr: [ 0.00 0.00 ]
+Key: VPCMPEQDZrrk: [ 0.00 0.00 ]
+Key: VPCMPEQDrm: [ 0.00 0.00 ]
+Key: VPCMPEQDrr: [ 0.00 0.00 ]
+Key: VPCMPEQQYrm: [ 0.00 0.00 ]
+Key: VPCMPEQQYrr: [ 0.00 0.00 ]
+Key: VPCMPEQQZ: [ 0.00 0.00 ]
+Key: VPCMPEQQZrm: [ 0.00 0.00 ]
+Key: VPCMPEQQZrmb: [ 0.00 0.00 ]
+Key: VPCMPEQQZrmbk: [ 0.00 0.00 ]
+Key: VPCMPEQQZrmk: [ 0.00 0.00 ]
+Key: VPCMPEQQZrr: [ 0.00 0.00 ]
+Key: VPCMPEQQZrrk: [ 0.00 0.00 ]
+Key: VPCMPEQQrm: [ 0.00 0.00 ]
+Key: VPCMPEQQrr: [ 0.00 0.00 ]
+Key: VPCMPEQWYrm: [ 0.00 0.00 ]
+Key: VPCMPEQWYrr: [ 0.00 0.00 ]
+Key: VPCMPEQWZ: [ 0.00 0.00 ]
+Key: VPCMPEQWZrm: [ 0.00 0.00 ]
+Key: VPCMPEQWZrmk: [ 0.00 0.00 ]
+Key: VPCMPEQWZrr: [ 0.00 0.00 ]
+Key: VPCMPEQWZrrk: [ 0.00 0.00 ]
+Key: VPCMPEQWrm: [ 0.00 0.00 ]
+Key: VPCMPEQWrr: [ 0.00 0.00 ]
+Key: VPCMPESTRIrmi: [ 0.00 0.00 ]
+Key: VPCMPESTRIrri: [ 0.00 0.00 ]
+Key: VPCMPESTRMrmi: [ 0.00 0.00 ]
+Key: VPCMPESTRMrri: [ 0.00 0.00 ]
+Key: VPCMPGTBYrm: [ 0.00 0.00 ]
+Key: VPCMPGTBYrr: [ 0.00 0.00 ]
+Key: VPCMPGTBZ: [ 0.00 0.00 ]
+Key: VPCMPGTBZrm: [ 0.00 0.00 ]
+Key: VPCMPGTBZrmk: [ 0.00 0.00 ]
+Key: VPCMPGTBZrr: [ 0.00 0.00 ]
+Key: VPCMPGTBZrrk: [ 0.00 0.00 ]
+Key: VPCMPGTBrm: [ 0.00 0.00 ]
+Key: VPCMPGTBrr: [ 0.00 0.00 ]
+Key: VPCMPGTDYrm: [ 0.00 0.00 ]
+Key: VPCMPGTDYrr: [ 0.00 0.00 ]
+Key: VPCMPGTDZ: [ 0.00 0.00 ]
+Key: VPCMPGTDZrm: [ 0.00 0.00 ]
+Key: VPCMPGTDZrmb: [ 0.00 0.00 ]
+Key: VPCMPGTDZrmbk: [ 0.00 0.00 ]
+Key: VPCMPGTDZrmk: [ 0.00 0.00 ]
+Key: VPCMPGTDZrr: [ 0.00 0.00 ]
+Key: VPCMPGTDZrrk: [ 0.00 0.00 ]
+Key: VPCMPGTDrm: [ 0.00 0.00 ]
+Key: VPCMPGTDrr: [ 0.00 0.00 ]
+Key: VPCMPGTQYrm: [ 0.00 0.00 ]
+Key: VPCMPGTQYrr: [ 0.00 0.00 ]
+Key: VPCMPGTQZ: [ 0.00 0.00 ]
+Key: VPCMPGTQZrm: [ 0.00 0.00 ]
+Key: VPCMPGTQZrmb: [ 0.00 0.00 ]
+Key: VPCMPGTQZrmbk: [ 0.00 0.00 ]
+Key: VPCMPGTQZrmk: [ 0.00 0.00 ]
+Key: VPCMPGTQZrr: [ 0.00 0.00 ]
+Key: VPCMPGTQZrrk: [ 0.00 0.00 ]
+Key: VPCMPGTQrm: [ 0.00 0.00 ]
+Key: VPCMPGTQrr: [ 0.00 0.00 ]
+Key: VPCMPGTWYrm: [ 0.00 0.00 ]
+Key: VPCMPGTWYrr: [ 0.00 0.00 ]
+Key: VPCMPGTWZ: [ 0.00 0.00 ]
+Key: VPCMPGTWZrm: [ 0.00 0.00 ]
+Key: VPCMPGTWZrmk: [ 0.00 0.00 ]
+Key: VPCMPGTWZrr: [ 0.00 0.00 ]
+Key: VPCMPGTWZrrk: [ 0.00 0.00 ]
+Key: VPCMPGTWrm: [ 0.00 0.00 ]
+Key: VPCMPGTWrr: [ 0.00 0.00 ]
+Key: VPCMPISTRIrmi: [ 0.00 0.00 ]
+Key: VPCMPISTRIrri: [ 0.00 0.00 ]
+Key: VPCMPISTRMrmi: [ 0.00 0.00 ]
+Key: VPCMPISTRMrri: [ 0.00 0.00 ]
+Key: VPCMPQZ: [ 0.00 0.00 ]
+Key: VPCMPQZrmbi: [ 0.00 0.00 ]
+Key: VPCMPQZrmbik: [ 0.00 0.00 ]
+Key: VPCMPQZrmi: [ 0.00 0.00 ]
+Key: VPCMPQZrmik: [ 0.00 0.00 ]
+Key: VPCMPQZrri: [ 0.00 0.00 ]
+Key: VPCMPQZrrik: [ 0.00 0.00 ]
+Key: VPCMPUBZ: [ 0.00 0.00 ]
+Key: VPCMPUBZrmi: [ 0.00 0.00 ]
+Key: VPCMPUBZrmik: [ 0.00 0.00 ]
+Key: VPCMPUBZrri: [ 0.00 0.00 ]
+Key: VPCMPUBZrrik: [ 0.00 0.00 ]
+Key: VPCMPUDZ: [ 0.00 0.00 ]
+Key: VPCMPUDZrmbi: [ 0.00 0.00 ]
+Key: VPCMPUDZrmbik: [ 0.00 0.00 ]
+Key: VPCMPUDZrmi: [ 0.00 0.00 ]
+Key: VPCMPUDZrmik: [ 0.00 0.00 ]
+Key: VPCMPUDZrri: [ 0.00 0.00 ]
+Key: VPCMPUDZrrik: [ 0.00 0.00 ]
+Key: VPCMPUQZ: [ 0.00 0.00 ]
+Key: VPCMPUQZrmbi: [ 0.00 0.00 ]
+Key: VPCMPUQZrmbik: [ 0.00 0.00 ]
+Key: VPCMPUQZrmi: [ 0.00 0.00 ]
+Key: VPCMPUQZrmik: [ 0.00 0.00 ]
+Key: VPCMPUQZrri: [ 0.00 0.00 ]
+Key: VPCMPUQZrrik: [ 0.00 0.00 ]
+Key: VPCMPUWZ: [ 0.00 0.00 ]
+Key: VPCMPUWZrmi: [ 0.00 0.00 ]
+Key: VPCMPUWZrmik: [ 0.00 0.00 ]
+Key: VPCMPUWZrri: [ 0.00 0.00 ]
+Key: VPCMPUWZrrik: [ 0.00 0.00 ]
+Key: VPCMPWZ: [ 0.00 0.00 ]
+Key: VPCMPWZrmi: [ 0.00 0.00 ]
+Key: VPCMPWZrmik: [ 0.00 0.00 ]
+Key: VPCMPWZrri: [ 0.00 0.00 ]
+Key: VPCMPWZrrik: [ 0.00 0.00 ]
+Key: VPCOMBmi: [ 0.00 0.00 ]
+Key: VPCOMBri: [ 0.00 0.00 ]
+Key: VPCOMDmi: [ 0.00 0.00 ]
+Key: VPCOMDri: [ 0.00 0.00 ]
+Key: VPCOMPRESSBZ: [ 0.00 0.00 ]
+Key: VPCOMPRESSBZmr: [ 0.00 0.00 ]
+Key: VPCOMPRESSBZmrk: [ 0.00 0.00 ]
+Key: VPCOMPRESSBZrr: [ 0.00 0.00 ]
+Key: VPCOMPRESSBZrrk: [ 0.00 0.00 ]
+Key: VPCOMPRESSBZrrkz: [ 0.00 0.00 ]
+Key: VPCOMPRESSDZ: [ 0.00 0.00 ]
+Key: VPCOMPRESSDZmr: [ 0.00 0.00 ]
+Key: VPCOMPRESSDZmrk: [ 0.00 0.00 ]
+Key: VPCOMPRESSDZrr: [ 0.00 0.00 ]
+Key: VPCOMPRESSDZrrk: [ 0.00 0.00 ]
+Key: VPCOMPRESSDZrrkz: [ 0.00 0.00 ]
+Key: VPCOMPRESSQZ: [ 0.00 0.00 ]
+Key: VPCOMPRESSQZmr: [ 0.00 0.00 ]
+Key: VPCOMPRESSQZmrk: [ 0.00 0.00 ]
+Key: VPCOMPRESSQZrr: [ 0.00 0.00 ]
+Key: VPCOMPRESSQZrrk: [ 0.00 0.00 ]
+Key: VPCOMPRESSQZrrkz: [ 0.00 0.00 ]
+Key: VPCOMPRESSWZ: [ 0.00 0.00 ]
+Key: VPCOMPRESSWZmr: [ 0.00 0.00 ]
+Key: VPCOMPRESSWZmrk: [ 0.00 0.00 ]
+Key: VPCOMPRESSWZrr: [ 0.00 0.00 ]
+Key: VPCOMPRESSWZrrk: [ 0.00 0.00 ]
+Key: VPCOMPRESSWZrrkz: [ 0.00 0.00 ]
+Key: VPCOMQmi: [ 0.00 0.00 ]
+Key: VPCOMQri: [ 0.00 0.00 ]
+Key: VPCOMUBmi: [ 0.00 0.00 ]
+Key: VPCOMUBri: [ 0.00 0.00 ]
+Key: VPCOMUDmi: [ 0.00 0.00 ]
+Key: VPCOMUDri: [ 0.00 0.00 ]
+Key: VPCOMUQmi: [ 0.00 0.00 ]
+Key: VPCOMUQri: [ 0.00 0.00 ]
+Key: VPCOMUWmi: [ 0.00 0.00 ]
+Key: VPCOMUWri: [ 0.00 0.00 ]
+Key: VPCOMWmi: [ 0.00 0.00 ]
+Key: VPCOMWri: [ 0.00 0.00 ]
+Key: VPCONFLICTDZ: [ 0.00 0.00 ]
+Key: VPCONFLICTDZrm: [ 0.00 0.00 ]
+Key: VPCONFLICTDZrmb: [ 0.00 0.00 ]
+Key: VPCONFLICTDZrmbk: [ 0.00 0.00 ]
+Key: VPCONFLICTDZrmbkz: [ 0.00 0.00 ]
+Key: VPCONFLICTDZrmk: [ 0.00 0.00 ]
+Key: VPCONFLICTDZrmkz: [ 0.00 0.00 ]
+Key: VPCONFLICTDZrr: [ 0.00 0.00 ]
+Key: VPCONFLICTDZrrk: [ 0.00 0.00 ]
+Key: VPCONFLICTDZrrkz: [ 0.00 0.00 ]
+Key: VPCONFLICTQZ: [ 0.00 0.00 ]
+Key: VPCONFLICTQZrm: [ 0.00 0.00 ]
+Key: VPCONFLICTQZrmb: [ 0.00 0.00 ]
+Key: VPCONFLICTQZrmbk: [ 0.00 0.00 ]
+Key: VPCONFLICTQZrmbkz: [ 0.00 0.00 ]
+Key: VPCONFLICTQZrmk: [ 0.00 0.00 ]
+Key: VPCONFLICTQZrmkz: [ 0.00 0.00 ]
+Key: VPCONFLICTQZrr: [ 0.00 0.00 ]
+Key: VPCONFLICTQZrrk: [ 0.00 0.00 ]
+Key: VPCONFLICTQZrrkz: [ 0.00 0.00 ]
+Key: VPDPBSSDSYrm: [ 0.00 0.00 ]
+Key: VPDPBSSDSYrr: [ 0.00 0.00 ]
+Key: VPDPBSSDSZ: [ 0.00 0.00 ]
+Key: VPDPBSSDSZrm: [ 0.00 0.00 ]
+Key: VPDPBSSDSZrmb: [ 0.00 0.00 ]
+Key: VPDPBSSDSZrmbk: [ 0.00 0.00 ]
+Key: VPDPBSSDSZrmbkz: [ 0.00 0.00 ]
+Key: VPDPBSSDSZrmk: [ 0.00 0.00 ]
+Key: VPDPBSSDSZrmkz: [ 0.00 0.00 ]
+Key: VPDPBSSDSZrr: [ 0.00 0.00 ]
+Key: VPDPBSSDSZrrk: [ 0.00 0.00 ]
+Key: VPDPBSSDSZrrkz: [ 0.00 0.00 ]
+Key: VPDPBSSDSrm: [ 0.00 0.00 ]
+Key: VPDPBSSDSrr: [ 0.00 0.00 ]
+Key: VPDPBSSDYrm: [ 0.00 0.00 ]
+Key: VPDPBSSDYrr: [ 0.00 0.00 ]
+Key: VPDPBSSDZ: [ 0.00 0.00 ]
+Key: VPDPBSSDZrm: [ 0.00 0.00 ]
+Key: VPDPBSSDZrmb: [ 0.00 0.00 ]
+Key: VPDPBSSDZrmbk: [ 0.00 0.00 ]
+Key: VPDPBSSDZrmbkz: [ 0.00 0.00 ]
+Key: VPDPBSSDZrmk: [ 0.00 0.00 ]
+Key: VPDPBSSDZrmkz: [ 0.00 0.00 ]
+Key: VPDPBSSDZrr: [ 0.00 0.00 ]
+Key: VPDPBSSDZrrk: [ 0.00 0.00 ]
+Key: VPDPBSSDZrrkz: [ 0.00 0.00 ]
+Key: VPDPBSSDrm: [ 0.00 0.00 ]
+Key: VPDPBSSDrr: [ 0.00 0.00 ]
+Key: VPDPBSUDSYrm: [ 0.00 0.00 ]
+Key: VPDPBSUDSYrr: [ 0.00 0.00 ]
+Key: VPDPBSUDSZ: [ 0.00 0.00 ]
+Key: VPDPBSUDSZrm: [ 0.00 0.00 ]
+Key: VPDPBSUDSZrmb: [ 0.00 0.00 ]
+Key: VPDPBSUDSZrmbk: [ 0.00 0.00 ]
+Key: VPDPBSUDSZrmbkz: [ 0.00 0.00 ]
+Key: VPDPBSUDSZrmk: [ 0.00 0.00 ]
+Key: VPDPBSUDSZrmkz: [ 0.00 0.00 ]
+Key: VPDPBSUDSZrr: [ 0.00 0.00 ]
+Key: VPDPBSUDSZrrk: [ 0.00 0.00 ]
+Key: VPDPBSUDSZrrkz: [ 0.00 0.00 ]
+Key: VPDPBSUDSrm: [ 0.00 0.00 ]
+Key: VPDPBSUDSrr: [ 0.00 0.00 ]
+Key: VPDPBSUDYrm: [ 0.00 0.00 ]
+Key: VPDPBSUDYrr: [ 0.00 0.00 ]
+Key: VPDPBSUDZ: [ 0.00 0.00 ]
+Key: VPDPBSUDZrm: [ 0.00 0.00 ]
+Key: VPDPBSUDZrmb: [ 0.00 0.00 ]
+Key: VPDPBSUDZrmbk: [ 0.00 0.00 ]
+Key: VPDPBSUDZrmbkz: [ 0.00 0.00 ]
+Key: VPDPBSUDZrmk: [ 0.00 0.00 ]
+Key: VPDPBSUDZrmkz: [ 0.00 0.00 ]
+Key: VPDPBSUDZrr: [ 0.00 0.00 ]
+Key: VPDPBSUDZrrk: [ 0.00 0.00 ]
+Key: VPDPBSUDZrrkz: [ 0.00 0.00 ]
+Key: VPDPBSUDrm: [ 0.00 0.00 ]
+Key: VPDPBSUDrr: [ 0.00 0.00 ]
+Key: VPDPBUSDSYrm: [ 0.00 0.00 ]
+Key: VPDPBUSDSYrr: [ 0.00 0.00 ]
+Key: VPDPBUSDSZ: [ 0.00 0.00 ]
+Key: VPDPBUSDSZrm: [ 0.00 0.00 ]
+Key: VPDPBUSDSZrmb: [ 0.00 0.00 ]
+Key: VPDPBUSDSZrmbk: [ 0.00 0.00 ]
+Key: VPDPBUSDSZrmbkz: [ 0.00 0.00 ]
+Key: VPDPBUSDSZrmk: [ 0.00 0.00 ]
+Key: VPDPBUSDSZrmkz: [ 0.00 0.00 ]
+Key: VPDPBUSDSZrr: [ 0.00 0.00 ]
+Key: VPDPBUSDSZrrk: [ 0.00 0.00 ]
+Key: VPDPBUSDSZrrkz: [ 0.00 0.00 ]
+Key: VPDPBUSDSrm: [ 0.00 0.00 ]
+Key: VPDPBUSDSrr: [ 0.00 0.00 ]
+Key: VPDPBUSDYrm: [ 0.00 0.00 ]
+Key: VPDPBUSDYrr: [ 0.00 0.00 ]
+Key: VPDPBUSDZ: [ 0.00 0.00 ]
+Key: VPDPBUSDZrm: [ 0.00 0.00 ]
+Key: VPDPBUSDZrmb: [ 0.00 0.00 ]
+Key: VPDPBUSDZrmbk: [ 0.00 0.00 ]
+Key: VPDPBUSDZrmbkz: [ 0.00 0.00 ]
+Key: VPDPBUSDZrmk: [ 0.00 0.00 ]
+Key: VPDPBUSDZrmkz: [ 0.00 0.00 ]
+Key: VPDPBUSDZrr: [ 0.00 0.00 ]
+Key: VPDPBUSDZrrk: [ 0.00 0.00 ]
+Key: VPDPBUSDZrrkz: [ 0.00 0.00 ]
+Key: VPDPBUSDrm: [ 0.00 0.00 ]
+Key: VPDPBUSDrr: [ 0.00 0.00 ]
+Key: VPDPBUUDSYrm: [ 0.00 0.00 ]
+Key: VPDPBUUDSYrr: [ 0.00 0.00 ]
+Key: VPDPBUUDSZ: [ 0.00 0.00 ]
+Key: VPDPBUUDSZrm: [ 0.00 0.00 ]
+Key: VPDPBUUDSZrmb: [ 0.00 0.00 ]
+Key: VPDPBUUDSZrmbk: [ 0.00 0.00 ]
+Key: VPDPBUUDSZrmbkz: [ 0.00 0.00 ]
+Key: VPDPBUUDSZrmk: [ 0.00 0.00 ]
+Key: VPDPBUUDSZrmkz: [ 0.00 0.00 ]
+Key: VPDPBUUDSZrr: [ 0.00 0.00 ]
+Key: VPDPBUUDSZrrk: [ 0.00 0.00 ]
+Key: VPDPBUUDSZrrkz: [ 0.00 0.00 ]
+Key: VPDPBUUDSrm: [ 0.00 0.00 ]
+Key: VPDPBUUDSrr: [ 0.00 0.00 ]
+Key: VPDPBUUDYrm: [ 0.00 0.00 ]
+Key: VPDPBUUDYrr: [ 0.00 0.00 ]
+Key: VPDPBUUDZ: [ 0.00 0.00 ]
+Key: VPDPBUUDZrm: [ 0.00 0.00 ]
+Key: VPDPBUUDZrmb: [ 0.00 0.00 ]
+Key: VPDPBUUDZrmbk: [ 0.00 0.00 ]
+Key: VPDPBUUDZrmbkz: [ 0.00 0.00 ]
+Key: VPDPBUUDZrmk: [ 0.00 0.00 ]
+Key: VPDPBUUDZrmkz: [ 0.00 0.00 ]
+Key: VPDPBUUDZrr: [ 0.00 0.00 ]
+Key: VPDPBUUDZrrk: [ 0.00 0.00 ]
+Key: VPDPBUUDZrrkz: [ 0.00 0.00 ]
+Key: VPDPBUUDrm: [ 0.00 0.00 ]
+Key: VPDPBUUDrr: [ 0.00 0.00 ]
+Key: VPDPWSSDSYrm: [ 0.00 0.00 ]
+Key: VPDPWSSDSYrr: [ 0.00 0.00 ]
+Key: VPDPWSSDSZ: [ 0.00 0.00 ]
+Key: VPDPWSSDSZrm: [ 0.00 0.00 ]
+Key: VPDPWSSDSZrmb: [ 0.00 0.00 ]
+Key: VPDPWSSDSZrmbk: [ 0.00 0.00 ]
+Key: VPDPWSSDSZrmbkz: [ 0.00 0.00 ]
+Key: VPDPWSSDSZrmk: [ 0.00 0.00 ]
+Key: VPDPWSSDSZrmkz: [ 0.00 0.00 ]
+Key: VPDPWSSDSZrr: [ 0.00 0.00 ]
+Key: VPDPWSSDSZrrk: [ 0.00 0.00 ]
+Key: VPDPWSSDSZrrkz: [ 0.00 0.00 ]
+Key: VPDPWSSDSrm: [ 0.00 0.00 ]
+Key: VPDPWSSDSrr: [ 0.00 0.00 ]
+Key: VPDPWSSDYrm: [ 0.00 0.00 ]
+Key: VPDPWSSDYrr: [ 0.00 0.00 ]
+Key: VPDPWSSDZ: [ 0.00 0.00 ]
+Key: VPDPWSSDZrm: [ 0.00 0.00 ]
+Key: VPDPWSSDZrmb: [ 0.00 0.00 ]
+Key: VPDPWSSDZrmbk: [ 0.00 0.00 ]
+Key: VPDPWSSDZrmbkz: [ 0.00 0.00 ]
+Key: VPDPWSSDZrmk: [ 0.00 0.00 ]
+Key: VPDPWSSDZrmkz: [ 0.00 0.00 ]
+Key: VPDPWSSDZrr: [ 0.00 0.00 ]
+Key: VPDPWSSDZrrk: [ 0.00 0.00 ]
+Key: VPDPWSSDZrrkz: [ 0.00 0.00 ]
+Key: VPDPWSSDrm: [ 0.00 0.00 ]
+Key: VPDPWSSDrr: [ 0.00 0.00 ]
+Key: VPDPWSUDSYrm: [ 0.00 0.00 ]
+Key: VPDPWSUDSYrr: [ 0.00 0.00 ]
+Key: VPDPWSUDSZ: [ 0.00 0.00 ]
+Key: VPDPWSUDSZrm: [ 0.00 0.00 ]
+Key: VPDPWSUDSZrmb: [ 0.00 0.00 ]
+Key: VPDPWSUDSZrmbk: [ 0.00 0.00 ]
+Key: VPDPWSUDSZrmbkz: [ 0.00 0.00 ]
+Key: VPDPWSUDSZrmk: [ 0.00 0.00 ]
+Key: VPDPWSUDSZrmkz: [ 0.00 0.00 ]
+Key: VPDPWSUDSZrr: [ 0.00 0.00 ]
+Key: VPDPWSUDSZrrk: [ 0.00 0.00 ]
+Key: VPDPWSUDSZrrkz: [ 0.00 0.00 ]
+Key: VPDPWSUDSrm: [ 0.00 0.00 ]
+Key: VPDPWSUDSrr: [ 0.00 0.00 ]
+Key: VPDPWSUDYrm: [ 0.00 0.00 ]
+Key: VPDPWSUDYrr: [ 0.00 0.00 ]
+Key: VPDPWSUDZ: [ 0.00 0.00 ]
+Key: VPDPWSUDZrm: [ 0.00 0.00 ]
+Key: VPDPWSUDZrmb: [ 0.00 0.00 ]
+Key: VPDPWSUDZrmbk: [ 0.00 0.00 ]
+Key: VPDPWSUDZrmbkz: [ 0.00 0.00 ]
+Key: VPDPWSUDZrmk: [ 0.00 0.00 ]
+Key: VPDPWSUDZrmkz: [ 0.00 0.00 ]
+Key: VPDPWSUDZrr: [ 0.00 0.00 ]
+Key: VPDPWSUDZrrk: [ 0.00 0.00 ]
+Key: VPDPWSUDZrrkz: [ 0.00 0.00 ]
+Key: VPDPWSUDrm: [ 0.00 0.00 ]
+Key: VPDPWSUDrr: [ 0.00 0.00 ]
+Key: VPDPWUSDSYrm: [ 0.00 0.00 ]
+Key: VPDPWUSDSYrr: [ 0.00 0.00 ]
+Key: VPDPWUSDSZ: [ 0.00 0.00 ]
+Key: VPDPWUSDSZrm: [ 0.00 0.00 ]
+Key: VPDPWUSDSZrmb: [ 0.00 0.00 ]
+Key: VPDPWUSDSZrmbk: [ 0.00 0.00 ]
+Key: VPDPWUSDSZrmbkz: [ 0.00 0.00 ]
+Key: VPDPWUSDSZrmk: [ 0.00 0.00 ]
+Key: VPDPWUSDSZrmkz: [ 0.00 0.00 ]
+Key: VPDPWUSDSZrr: [ 0.00 0.00 ]
+Key: VPDPWUSDSZrrk: [ 0.00 0.00 ]
+Key: VPDPWUSDSZrrkz: [ 0.00 0.00 ]
+Key: VPDPWUSDSrm: [ 0.00 0.00 ]
+Key: VPDPWUSDSrr: [ 0.00 0.00 ]
+Key: VPDPWUSDYrm: [ 0.00 0.00 ]
+Key: VPDPWUSDYrr: [ 0.00 0.00 ]
+Key: VPDPWUSDZ: [ 0.00 0.00 ]
+Key: VPDPWUSDZrm: [ 0.00 0.00 ]
+Key: VPDPWUSDZrmb: [ 0.00 0.00 ]
+Key: VPDPWUSDZrmbk: [ 0.00 0.00 ]
+Key: VPDPWUSDZrmbkz: [ 0.00 0.00 ]
+Key: VPDPWUSDZrmk: [ 0.00 0.00 ]
+Key: VPDPWUSDZrmkz: [ 0.00 0.00 ]
+Key: VPDPWUSDZrr: [ 0.00 0.00 ]
+Key: VPDPWUSDZrrk: [ 0.00 0.00 ]
+Key: VPDPWUSDZrrkz: [ 0.00 0.00 ]
+Key: VPDPWUSDrm: [ 0.00 0.00 ]
+Key: VPDPWUSDrr: [ 0.00 0.00 ]
+Key: VPDPWUUDSYrm: [ 0.00 0.00 ]
+Key: VPDPWUUDSYrr: [ 0.00 0.00 ]
+Key: VPDPWUUDSZ: [ 0.00 0.00 ]
+Key: VPDPWUUDSZrm: [ 0.00 0.00 ]
+Key: VPDPWUUDSZrmb: [ 0.00 0.00 ]
+Key: VPDPWUUDSZrmbk: [ 0.00 0.00 ]
+Key: VPDPWUUDSZrmbkz: [ 0.00 0.00 ]
+Key: VPDPWUUDSZrmk: [ 0.00 0.00 ]
+Key: VPDPWUUDSZrmkz: [ 0.00 0.00 ]
+Key: VPDPWUUDSZrr: [ 0.00 0.00 ]
+Key: VPDPWUUDSZrrk: [ 0.00 0.00 ]
+Key: VPDPWUUDSZrrkz: [ 0.00 0.00 ]
+Key: VPDPWUUDSrm: [ 0.00 0.00 ]
+Key: VPDPWUUDSrr: [ 0.00 0.00 ]
+Key: VPDPWUUDYrm: [ 0.00 0.00 ]
+Key: VPDPWUUDYrr: [ 0.00 0.00 ]
+Key: VPDPWUUDZ: [ 0.00 0.00 ]
+Key: VPDPWUUDZrm: [ 0.00 0.00 ]
+Key: VPDPWUUDZrmb: [ 0.00 0.00 ]
+Key: VPDPWUUDZrmbk: [ 0.00 0.00 ]
+Key: VPDPWUUDZrmbkz: [ 0.00 0.00 ]
+Key: VPDPWUUDZrmk: [ 0.00 0.00 ]
+Key: VPDPWUUDZrmkz: [ 0.00 0.00 ]
+Key: VPDPWUUDZrr: [ 0.00 0.00 ]
+Key: VPDPWUUDZrrk: [ 0.00 0.00 ]
+Key: VPDPWUUDZrrkz: [ 0.00 0.00 ]
+Key: VPDPWUUDrm: [ 0.00 0.00 ]
+Key: VPDPWUUDrr: [ 0.00 0.00 ]
+Key: VPERM: [ 0.00 0.00 ]
+Key: VPERMBZ: [ 0.00 0.00 ]
+Key: VPERMBZrm: [ 0.00 0.00 ]
+Key: VPERMBZrmk: [ 0.00 0.00 ]
+Key: VPERMBZrmkz: [ 0.00 0.00 ]
+Key: VPERMBZrr: [ 0.00 0.00 ]
+Key: VPERMBZrrk: [ 0.00 0.00 ]
+Key: VPERMBZrrkz: [ 0.00 0.00 ]
+Key: VPERMDYrm: [ 0.00 0.00 ]
+Key: VPERMDYrr: [ 0.00 0.00 ]
+Key: VPERMDZ: [ 0.00 0.00 ]
+Key: VPERMDZrm: [ 0.00 0.00 ]
+Key: VPERMDZrmb: [ 0.00 0.00 ]
+Key: VPERMDZrmbk: [ 0.00 0.00 ]
+Key: VPERMDZrmbkz: [ 0.00 0.00 ]
+Key: VPERMDZrmk: [ 0.00 0.00 ]
+Key: VPERMDZrmkz: [ 0.00 0.00 ]
+Key: VPERMDZrr: [ 0.00 0.00 ]
+Key: VPERMDZrrk: [ 0.00 0.00 ]
+Key: VPERMDZrrkz: [ 0.00 0.00 ]
+Key: VPERMI: [ 0.00 0.00 ]
+Key: VPERMIL: [ 0.00 0.00 ]
+Key: VPERMILPDYmi: [ 0.00 0.00 ]
+Key: VPERMILPDYri: [ 0.00 0.00 ]
+Key: VPERMILPDYrm: [ 0.00 0.00 ]
+Key: VPERMILPDYrr: [ 0.00 0.00 ]
+Key: VPERMILPDZ: [ 0.00 0.00 ]
+Key: VPERMILPDZmbi: [ 0.00 0.00 ]
+Key: VPERMILPDZmbik: [ 0.00 0.00 ]
+Key: VPERMILPDZmbikz: [ 0.00 0.00 ]
+Key: VPERMILPDZmi: [ 0.00 0.00 ]
+Key: VPERMILPDZmik: [ 0.00 0.00 ]
+Key: VPERMILPDZmikz: [ 0.00 0.00 ]
+Key: VPERMILPDZri: [ 0.00 0.00 ]
+Key: VPERMILPDZrik: [ 0.00 0.00 ]
+Key: VPERMILPDZrikz: [ 0.00 0.00 ]
+Key: VPERMILPDZrm: [ 0.00 0.00 ]
+Key: VPERMILPDZrmb: [ 0.00 0.00 ]
+Key: VPERMILPDZrmbk: [ 0.00 0.00 ]
+Key: VPERMILPDZrmbkz: [ 0.00 0.00 ]
+Key: VPERMILPDZrmk: [ 0.00 0.00 ]
+Key: VPERMILPDZrmkz: [ 0.00 0.00 ]
+Key: VPERMILPDZrr: [ 0.00 0.00 ]
+Key: VPERMILPDZrrk: [ 0.00 0.00 ]
+Key: VPERMILPDZrrkz: [ 0.00 0.00 ]
+Key: VPERMILPDmi: [ 0.00 0.00 ]
+Key: VPERMILPDri: [ 0.00 0.00 ]
+Key: VPERMILPDrm: [ 0.00 0.00 ]
+Key: VPERMILPDrr: [ 0.00 0.00 ]
+Key: VPERMILPSYmi: [ 0.00 0.00 ]
+Key: VPERMILPSYri: [ 0.00 0.00 ]
+Key: VPERMILPSYrm: [ 0.00 0.00 ]
+Key: VPERMILPSYrr: [ 0.00 0.00 ]
+Key: VPERMILPSZ: [ 0.00 0.00 ]
+Key: VPERMILPSZmbi: [ 0.00 0.00 ]
+Key: VPERMILPSZmbik: [ 0.00 0.00 ]
+Key: VPERMILPSZmbikz: [ 0.00 0.00 ]
+Key: VPERMILPSZmi: [ 0.00 0.00 ]
+Key: VPERMILPSZmik: [ 0.00 0.00 ]
+Key: VPERMILPSZmikz: [ 0.00 0.00 ]
+Key: VPERMILPSZri: [ 0.00 0.00 ]
+Key: VPERMILPSZrik: [ 0.00 0.00 ]
+Key: VPERMILPSZrikz: [ 0.00 0.00 ]
+Key: VPERMILPSZrm: [ 0.00 0.00 ]
+Key: VPERMILPSZrmb: [ 0.00 0.00 ]
+Key: VPERMILPSZrmbk: [ 0.00 0.00 ]
+Key: VPERMILPSZrmbkz: [ 0.00 0.00 ]
+Key: VPERMILPSZrmk: [ 0.00 0.00 ]
+Key: VPERMILPSZrmkz: [ 0.00 0.00 ]
+Key: VPERMILPSZrr: [ 0.00 0.00 ]
+Key: VPERMILPSZrrk: [ 0.00 0.00 ]
+Key: VPERMILPSZrrkz: [ 0.00 0.00 ]
+Key: VPERMILPSmi: [ 0.00 0.00 ]
+Key: VPERMILPSri: [ 0.00 0.00 ]
+Key: VPERMILPSrm: [ 0.00 0.00 ]
+Key: VPERMILPSrr: [ 0.00 0.00 ]
+Key: VPERMPDYmi: [ 0.00 0.00 ]
+Key: VPERMPDYri: [ 0.00 0.00 ]
+Key: VPERMPDZ: [ 0.00 0.00 ]
+Key: VPERMPDZmbi: [ 0.00 0.00 ]
+Key: VPERMPDZmbik: [ 0.00 0.00 ]
+Key: VPERMPDZmbikz: [ 0.00 0.00 ]
+Key: VPERMPDZmi: [ 0.00 0.00 ]
+Key: VPERMPDZmik: [ 0.00 0.00 ]
+Key: VPERMPDZmikz: [ 0.00 0.00 ]
+Key: VPERMPDZri: [ 0.00 0.00 ]
+Key: VPERMPDZrik: [ 0.00 0.00 ]
+Key: VPERMPDZrikz: [ 0.00 0.00 ]
+Key: VPERMPDZrm: [ 0.00 0.00 ]
+Key: VPERMPDZrmb: [ 0.00 0.00 ]
+Key: VPERMPDZrmbk: [ 0.00 0.00 ]
+Key: VPERMPDZrmbkz: [ 0.00 0.00 ]
+Key: VPERMPDZrmk: [ 0.00 0.00 ]
+Key: VPERMPDZrmkz: [ 0.00 0.00 ]
+Key: VPERMPDZrr: [ 0.00 0.00 ]
+Key: VPERMPDZrrk: [ 0.00 0.00 ]
+Key: VPERMPDZrrkz: [ 0.00 0.00 ]
+Key: VPERMPSYrm: [ 0.00 0.00 ]
+Key: VPERMPSYrr: [ 0.00 0.00 ]
+Key: VPERMPSZ: [ 0.00 0.00 ]
+Key: VPERMPSZrm: [ 0.00 0.00 ]
+Key: VPERMPSZrmb: [ 0.00 0.00 ]
+Key: VPERMPSZrmbk: [ 0.00 0.00 ]
+Key: VPERMPSZrmbkz: [ 0.00 0.00 ]
+Key: VPERMPSZrmk: [ 0.00 0.00 ]
+Key: VPERMPSZrmkz: [ 0.00 0.00 ]
+Key: VPERMPSZrr: [ 0.00 0.00 ]
+Key: VPERMPSZrrk: [ 0.00 0.00 ]
+Key: VPERMPSZrrkz: [ 0.00 0.00 ]
+Key: VPERMQYmi: [ 0.00 0.00 ]
+Key: VPERMQYri: [ 0.00 0.00 ]
+Key: VPERMQZ: [ 0.00 0.00 ]
+Key: VPERMQZmbi: [ 0.00 0.00 ]
+Key: VPERMQZmbik: [ 0.00 0.00 ]
+Key: VPERMQZmbikz: [ 0.00 0.00 ]
+Key: VPERMQZmi: [ 0.00 0.00 ]
+Key: VPERMQZmik: [ 0.00 0.00 ]
+Key: VPERMQZmikz: [ 0.00 0.00 ]
+Key: VPERMQZri: [ 0.00 0.00 ]
+Key: VPERMQZrik: [ 0.00 0.00 ]
+Key: VPERMQZrikz: [ 0.00 0.00 ]
+Key: VPERMQZrm: [ 0.00 0.00 ]
+Key: VPERMQZrmb: [ 0.00 0.00 ]
+Key: VPERMQZrmbk: [ 0.00 0.00 ]
+Key: VPERMQZrmbkz: [ 0.00 0.00 ]
+Key: VPERMQZrmk: [ 0.00 0.00 ]
+Key: VPERMQZrmkz: [ 0.00 0.00 ]
+Key: VPERMQZrr: [ 0.00 0.00 ]
+Key: VPERMQZrrk: [ 0.00 0.00 ]
+Key: VPERMQZrrkz: [ 0.00 0.00 ]
+Key: VPERMT: [ 0.00 0.00 ]
+Key: VPERMWZ: [ 0.00 0.00 ]
+Key: VPERMWZrm: [ 0.00 0.00 ]
+Key: VPERMWZrmk: [ 0.00 0.00 ]
+Key: VPERMWZrmkz: [ 0.00 0.00 ]
+Key: VPERMWZrr: [ 0.00 0.00 ]
+Key: VPERMWZrrk: [ 0.00 0.00 ]
+Key: VPERMWZrrkz: [ 0.00 0.00 ]
+Key: VPEXPANDBZ: [ 0.00 0.00 ]
+Key: VPEXPANDBZrm: [ 0.00 0.00 ]
+Key: VPEXPANDBZrmk: [ 0.00 0.00 ]
+Key: VPEXPANDBZrmkz: [ 0.00 0.00 ]
+Key: VPEXPANDBZrr: [ 0.00 0.00 ]
+Key: VPEXPANDBZrrk: [ 0.00 0.00 ]
+Key: VPEXPANDBZrrkz: [ 0.00 0.00 ]
+Key: VPEXPANDDZ: [ 0.00 0.00 ]
+Key: VPEXPANDDZrm: [ 0.00 0.00 ]
+Key: VPEXPANDDZrmk: [ 0.00 0.00 ]
+Key: VPEXPANDDZrmkz: [ 0.00 0.00 ]
+Key: VPEXPANDDZrr: [ 0.00 0.00 ]
+Key: VPEXPANDDZrrk: [ 0.00 0.00 ]
+Key: VPEXPANDDZrrkz: [ 0.00 0.00 ]
+Key: VPEXPANDQZ: [ 0.00 0.00 ]
+Key: VPEXPANDQZrm: [ 0.00 0.00 ]
+Key: VPEXPANDQZrmk: [ 0.00 0.00 ]
+Key: VPEXPANDQZrmkz: [ 0.00 0.00 ]
+Key: VPEXPANDQZrr: [ 0.00 0.00 ]
+Key: VPEXPANDQZrrk: [ 0.00 0.00 ]
+Key: VPEXPANDQZrrkz: [ 0.00 0.00 ]
+Key: VPEXPANDWZ: [ 0.00 0.00 ]
+Key: VPEXPANDWZrm: [ 0.00 0.00 ]
+Key: VPEXPANDWZrmk: [ 0.00 0.00 ]
+Key: VPEXPANDWZrmkz: [ 0.00 0.00 ]
+Key: VPEXPANDWZrr: [ 0.00 0.00 ]
+Key: VPEXPANDWZrrk: [ 0.00 0.00 ]
+Key: VPEXPANDWZrrkz: [ 0.00 0.00 ]
+Key: VPEXTRBZmri: [ 0.00 0.00 ]
+Key: VPEXTRBZrri: [ 0.00 0.00 ]
+Key: VPEXTRBmri: [ 0.00 0.00 ]
+Key: VPEXTRBrri: [ 0.00 0.00 ]
+Key: VPEXTRDZmri: [ 0.00 0.00 ]
+Key: VPEXTRDZrri: [ 0.00 0.00 ]
+Key: VPEXTRDmri: [ 0.00 0.00 ]
+Key: VPEXTRDrri: [ 0.00 0.00 ]
+Key: VPEXTRQZmri: [ 0.00 0.00 ]
+Key: VPEXTRQZrri: [ 0.00 0.00 ]
+Key: VPEXTRQmri: [ 0.00 0.00 ]
+Key: VPEXTRQrri: [ 0.00 0.00 ]
+Key: VPEXTRWZmri: [ 0.00 0.00 ]
+Key: VPEXTRWZrri: [ 0.00 0.00 ]
+Key: VPEXTRWZrri_REV: [ 0.00 0.00 ]
+Key: VPEXTRWmri: [ 0.00 0.00 ]
+Key: VPEXTRWrri: [ 0.00 0.00 ]
+Key: VPEXTRWrri_REV: [ 0.00 0.00 ]
+Key: VPGATHERDDYrm: [ 0.00 0.00 ]
+Key: VPGATHERDDZ: [ 0.00 0.00 ]
+Key: VPGATHERDDZrm: [ 0.00 0.00 ]
+Key: VPGATHERDDrm: [ 0.00 0.00 ]
+Key: VPGATHERDQYrm: [ 0.00 0.00 ]
+Key: VPGATHERDQZ: [ 0.00 0.00 ]
+Key: VPGATHERDQZrm: [ 0.00 0.00 ]
+Key: VPGATHERDQrm: [ 0.00 0.00 ]
+Key: VPGATHERQDYrm: [ 0.00 0.00 ]
+Key: VPGATHERQDZ: [ 0.00 0.00 ]
+Key: VPGATHERQDZrm: [ 0.00 0.00 ]
+Key: VPGATHERQDrm: [ 0.00 0.00 ]
+Key: VPGATHERQQYrm: [ 0.00 0.00 ]
+Key: VPGATHERQQZ: [ 0.00 0.00 ]
+Key: VPGATHERQQZrm: [ 0.00 0.00 ]
+Key: VPGATHERQQrm: [ 0.00 0.00 ]
+Key: VPHADDBDrm: [ 0.00 0.00 ]
+Key: VPHADDBDrr: [ 0.00 0.00 ]
+Key: VPHADDBQrm: [ 0.00 0.00 ]
+Key: VPHADDBQrr: [ 0.00 0.00 ]
+Key: VPHADDBWrm: [ 0.00 0.00 ]
+Key: VPHADDBWrr: [ 0.00 0.00 ]
+Key: VPHADDDQrm: [ 0.00 0.00 ]
+Key: VPHADDDQrr: [ 0.00 0.00 ]
+Key: VPHADDDYrm: [ 0.00 0.00 ]
+Key: VPHADDDYrr: [ 0.00 0.00 ]
+Key: VPHADDDrm: [ 0.00 0.00 ]
+Key: VPHADDDrr: [ 0.00 0.00 ]
+Key: VPHADDSWYrm: [ 0.00 0.00 ]
+Key: VPHADDSWYrr: [ 0.00 0.00 ]
+Key: VPHADDSWrm: [ 0.00 0.00 ]
+Key: VPHADDSWrr: [ 0.00 0.00 ]
+Key: VPHADDUBDrm: [ 0.00 0.00 ]
+Key: VPHADDUBDrr: [ 0.00 0.00 ]
+Key: VPHADDUBQrm: [ 0.00 0.00 ]
+Key: VPHADDUBQrr: [ 0.00 0.00 ]
+Key: VPHADDUBWrm: [ 0.00 0.00 ]
+Key: VPHADDUBWrr: [ 0.00 0.00 ]
+Key: VPHADDUDQrm: [ 0.00 0.00 ]
+Key: VPHADDUDQrr: [ 0.00 0.00 ]
+Key: VPHADDUWDrm: [ 0.00 0.00 ]
+Key: VPHADDUWDrr: [ 0.00 0.00 ]
+Key: VPHADDUWQrm: [ 0.00 0.00 ]
+Key: VPHADDUWQrr: [ 0.00 0.00 ]
+Key: VPHADDWDrm: [ 0.00 0.00 ]
+Key: VPHADDWDrr: [ 0.00 0.00 ]
+Key: VPHADDWQrm: [ 0.00 0.00 ]
+Key: VPHADDWQrr: [ 0.00 0.00 ]
+Key: VPHADDWYrm: [ 0.00 0.00 ]
+Key: VPHADDWYrr: [ 0.00 0.00 ]
+Key: VPHADDWrm: [ 0.00 0.00 ]
+Key: VPHADDWrr: [ 0.00 0.00 ]
+Key: VPHMINPOSUWrm: [ 0.00 0.00 ]
+Key: VPHMINPOSUWrr: [ 0.00 0.00 ]
+Key: VPHSUBBWrm: [ 0.00 0.00 ]
+Key: VPHSUBBWrr: [ 0.00 0.00 ]
+Key: VPHSUBDQrm: [ 0.00 0.00 ]
+Key: VPHSUBDQrr: [ 0.00 0.00 ]
+Key: VPHSUBDYrm: [ 0.00 0.00 ]
+Key: VPHSUBDYrr: [ 0.00 0.00 ]
+Key: VPHSUBDrm: [ 0.00 0.00 ]
+Key: VPHSUBDrr: [ 0.00 0.00 ]
+Key: VPHSUBSWYrm: [ 0.00 0.00 ]
+Key: VPHSUBSWYrr: [ 0.00 0.00 ]
+Key: VPHSUBSWrm: [ 0.00 0.00 ]
+Key: VPHSUBSWrr: [ 0.00 0.00 ]
+Key: VPHSUBWDrm: [ 0.00 0.00 ]
+Key: VPHSUBWDrr: [ 0.00 0.00 ]
+Key: VPHSUBWYrm: [ 0.00 0.00 ]
+Key: VPHSUBWYrr: [ 0.00 0.00 ]
+Key: VPHSUBWrm: [ 0.00 0.00 ]
+Key: VPHSUBWrr: [ 0.00 0.00 ]
+Key: VPINSRBZrmi: [ 0.00 0.00 ]
+Key: VPINSRBZrri: [ 0.00 0.00 ]
+Key: VPINSRBrmi: [ 0.00 0.00 ]
+Key: VPINSRBrri: [ 0.00 0.00 ]
+Key: VPINSRDZrmi: [ 0.00 0.00 ]
+Key: VPINSRDZrri: [ 0.00 0.00 ]
+Key: VPINSRDrmi: [ 0.00 0.00 ]
+Key: VPINSRDrri: [ 0.00 0.00 ]
+Key: VPINSRQZrmi: [ 0.00 0.00 ]
+Key: VPINSRQZrri: [ 0.00 0.00 ]
+Key: VPINSRQrmi: [ 0.00 0.00 ]
+Key: VPINSRQrri: [ 0.00 0.00 ]
+Key: VPINSRWZrmi: [ 0.00 0.00 ]
+Key: VPINSRWZrri: [ 0.00 0.00 ]
+Key: VPINSRWrmi: [ 0.00 0.00 ]
+Key: VPINSRWrri: [ 0.00 0.00 ]
+Key: VPLZCNTDZ: [ 0.00 0.00 ]
+Key: VPLZCNTDZrm: [ 0.00 0.00 ]
+Key: VPLZCNTDZrmb: [ 0.00 0.00 ]
+Key: VPLZCNTDZrmbk: [ 0.00 0.00 ]
+Key: VPLZCNTDZrmbkz: [ 0.00 0.00 ]
+Key: VPLZCNTDZrmk: [ 0.00 0.00 ]
+Key: VPLZCNTDZrmkz: [ 0.00 0.00 ]
+Key: VPLZCNTDZrr: [ 0.00 0.00 ]
+Key: VPLZCNTDZrrk: [ 0.00 0.00 ]
+Key: VPLZCNTDZrrkz: [ 0.00 0.00 ]
+Key: VPLZCNTQZ: [ 0.00 0.00 ]
+Key: VPLZCNTQZrm: [ 0.00 0.00 ]
+Key: VPLZCNTQZrmb: [ 0.00 0.00 ]
+Key: VPLZCNTQZrmbk: [ 0.00 0.00 ]
+Key: VPLZCNTQZrmbkz: [ 0.00 0.00 ]
+Key: VPLZCNTQZrmk: [ 0.00 0.00 ]
+Key: VPLZCNTQZrmkz: [ 0.00 0.00 ]
+Key: VPLZCNTQZrr: [ 0.00 0.00 ]
+Key: VPLZCNTQZrrk: [ 0.00 0.00 ]
+Key: VPLZCNTQZrrkz: [ 0.00 0.00 ]
+Key: VPMACSDDrm: [ 0.00 0.00 ]
+Key: VPMACSDDrr: [ 0.00 0.00 ]
+Key: VPMACSDQHrm: [ 0.00 0.00 ]
+Key: VPMACSDQHrr: [ 0.00 0.00 ]
+Key: VPMACSDQLrm: [ 0.00 0.00 ]
+Key: VPMACSDQLrr: [ 0.00 0.00 ]
+Key: VPMACSSDDrm: [ 0.00 0.00 ]
+Key: VPMACSSDDrr: [ 0.00 0.00 ]
+Key: VPMACSSDQHrm: [ 0.00 0.00 ]
+Key: VPMACSSDQHrr: [ 0.00 0.00 ]
+Key: VPMACSSDQLrm: [ 0.00 0.00 ]
+Key: VPMACSSDQLrr: [ 0.00 0.00 ]
+Key: VPMACSSWDrm: [ 0.00 0.00 ]
+Key: VPMACSSWDrr: [ 0.00 0.00 ]
+Key: VPMACSSWWrm: [ 0.00 0.00 ]
+Key: VPMACSSWWrr: [ 0.00 0.00 ]
+Key: VPMACSWDrm: [ 0.00 0.00 ]
+Key: VPMACSWDrr: [ 0.00 0.00 ]
+Key: VPMACSWWrm: [ 0.00 0.00 ]
+Key: VPMACSWWrr: [ 0.00 0.00 ]
+Key: VPMADCSSWDrm: [ 0.00 0.00 ]
+Key: VPMADCSSWDrr: [ 0.00 0.00 ]
+Key: VPMADCSWDrm: [ 0.00 0.00 ]
+Key: VPMADCSWDrr: [ 0.00 0.00 ]
+Key: VPMADD: [ 0.00 0.00 ]
+Key: VPMADDUBSWYrm: [ 0.00 0.00 ]
+Key: VPMADDUBSWYrr: [ 0.00 0.00 ]
+Key: VPMADDUBSWZ: [ 0.00 0.00 ]
+Key: VPMADDUBSWZrm: [ 0.00 0.00 ]
+Key: VPMADDUBSWZrmk: [ 0.00 0.00 ]
+Key: VPMADDUBSWZrmkz: [ 0.00 0.00 ]
+Key: VPMADDUBSWZrr: [ 0.00 0.00 ]
+Key: VPMADDUBSWZrrk: [ 0.00 0.00 ]
+Key: VPMADDUBSWZrrkz: [ 0.00 0.00 ]
+Key: VPMADDUBSWrm: [ 0.00 0.00 ]
+Key: VPMADDUBSWrr: [ 0.00 0.00 ]
+Key: VPMADDWDYrm: [ 0.00 0.00 ]
+Key: VPMADDWDYrr: [ 0.00 0.00 ]
+Key: VPMADDWDZ: [ 0.00 0.00 ]
+Key: VPMADDWDZrm: [ 0.00 0.00 ]
+Key: VPMADDWDZrmk: [ 0.00 0.00 ]
+Key: VPMADDWDZrmkz: [ 0.00 0.00 ]
+Key: VPMADDWDZrr: [ 0.00 0.00 ]
+Key: VPMADDWDZrrk: [ 0.00 0.00 ]
+Key: VPMADDWDZrrkz: [ 0.00 0.00 ]
+Key: VPMADDWDrm: [ 0.00 0.00 ]
+Key: VPMADDWDrr: [ 0.00 0.00 ]
+Key: VPMASKMOVDYmr: [ 0.00 0.00 ]
+Key: VPMASKMOVDYrm: [ 0.00 0.00 ]
+Key: VPMASKMOVDmr: [ 0.00 0.00 ]
+Key: VPMASKMOVDrm: [ 0.00 0.00 ]
+Key: VPMASKMOVQYmr: [ 0.00 0.00 ]
+Key: VPMASKMOVQYrm: [ 0.00 0.00 ]
+Key: VPMASKMOVQmr: [ 0.00 0.00 ]
+Key: VPMASKMOVQrm: [ 0.00 0.00 ]
+Key: VPMAXSBYrm: [ 0.00 0.00 ]
+Key: VPMAXSBYrr: [ 0.00 0.00 ]
+Key: VPMAXSBZ: [ 0.00 0.00 ]
+Key: VPMAXSBZrm: [ 0.00 0.00 ]
+Key: VPMAXSBZrmk: [ 0.00 0.00 ]
+Key: VPMAXSBZrmkz: [ 0.00 0.00 ]
+Key: VPMAXSBZrr: [ 0.00 0.00 ]
+Key: VPMAXSBZrrk: [ 0.00 0.00 ]
+Key: VPMAXSBZrrkz: [ 0.00 0.00 ]
+Key: VPMAXSBrm: [ 0.00 0.00 ]
+Key: VPMAXSBrr: [ 0.00 0.00 ]
+Key: VPMAXSDYrm: [ 0.00 0.00 ]
+Key: VPMAXSDYrr: [ 0.00 0.00 ]
+Key: VPMAXSDZ: [ 0.00 0.00 ]
+Key: VPMAXSDZrm: [ 0.00 0.00 ]
+Key: VPMAXSDZrmb: [ 0.00 0.00 ]
+Key: VPMAXSDZrmbk: [ 0.00 0.00 ]
+Key: VPMAXSDZrmbkz: [ 0.00 0.00 ]
+Key: VPMAXSDZrmk: [ 0.00 0.00 ]
+Key: VPMAXSDZrmkz: [ 0.00 0.00 ]
+Key: VPMAXSDZrr: [ 0.00 0.00 ]
+Key: VPMAXSDZrrk: [ 0.00 0.00 ]
+Key: VPMAXSDZrrkz: [ 0.00 0.00 ]
+Key: VPMAXSDrm: [ 0.00 0.00 ]
+Key: VPMAXSDrr: [ 0.00 0.00 ]
+Key: VPMAXSQZ: [ 0.00 0.00 ]
+Key: VPMAXSQZrm: [ 0.00 0.00 ]
+Key: VPMAXSQZrmb: [ 0.00 0.00 ]
+Key: VPMAXSQZrmbk: [ 0.00 0.00 ]
+Key: VPMAXSQZrmbkz: [ 0.00 0.00 ]
+Key: VPMAXSQZrmk: [ 0.00 0.00 ]
+Key: VPMAXSQZrmkz: [ 0.00 0.00 ]
+Key: VPMAXSQZrr: [ 0.00 0.00 ]
+Key: VPMAXSQZrrk: [ 0.00 0.00 ]
+Key: VPMAXSQZrrkz: [ 0.00 0.00 ]
+Key: VPMAXSWYrm: [ 0.00 0.00 ]
+Key: VPMAXSWYrr: [ 0.00 0.00 ]
+Key: VPMAXSWZ: [ 0.00 0.00 ]
+Key: VPMAXSWZrm: [ 0.00 0.00 ]
+Key: VPMAXSWZrmk: [ 0.00 0.00 ]
+Key: VPMAXSWZrmkz: [ 0.00 0.00 ]
+Key: VPMAXSWZrr: [ 0.00 0.00 ]
+Key: VPMAXSWZrrk: [ 0.00 0.00 ]
+Key: VPMAXSWZrrkz: [ 0.00 0.00 ]
+Key: VPMAXSWrm: [ 0.00 0.00 ]
+Key: VPMAXSWrr: [ 0.00 0.00 ]
+Key: VPMAXUBYrm: [ 0.00 0.00 ]
+Key: VPMAXUBYrr: [ 0.00 0.00 ]
+Key: VPMAXUBZ: [ 0.00 0.00 ]
+Key: VPMAXUBZrm: [ 0.00 0.00 ]
+Key: VPMAXUBZrmk: [ 0.00 0.00 ]
+Key: VPMAXUBZrmkz: [ 0.00 0.00 ]
+Key: VPMAXUBZrr: [ 0.00 0.00 ]
+Key: VPMAXUBZrrk: [ 0.00 0.00 ]
+Key: VPMAXUBZrrkz: [ 0.00 0.00 ]
+Key: VPMAXUBrm: [ 0.00 0.00 ]
+Key: VPMAXUBrr: [ 0.00 0.00 ]
+Key: VPMAXUDYrm: [ 0.00 0.00 ]
+Key: VPMAXUDYrr: [ 0.00 0.00 ]
+Key: VPMAXUDZ: [ 0.00 0.00 ]
+Key: VPMAXUDZrm: [ 0.00 0.00 ]
+Key: VPMAXUDZrmb: [ 0.00 0.00 ]
+Key: VPMAXUDZrmbk: [ 0.00 0.00 ]
+Key: VPMAXUDZrmbkz: [ 0.00 0.00 ]
+Key: VPMAXUDZrmk: [ 0.00 0.00 ]
+Key: VPMAXUDZrmkz: [ 0.00 0.00 ]
+Key: VPMAXUDZrr: [ 0.00 0.00 ]
+Key: VPMAXUDZrrk: [ 0.00 0.00 ]
+Key: VPMAXUDZrrkz: [ 0.00 0.00 ]
+Key: VPMAXUDrm: [ 0.00 0.00 ]
+Key: VPMAXUDrr: [ 0.00 0.00 ]
+Key: VPMAXUQZ: [ 0.00 0.00 ]
+Key: VPMAXUQZrm: [ 0.00 0.00 ]
+Key: VPMAXUQZrmb: [ 0.00 0.00 ]
+Key: VPMAXUQZrmbk: [ 0.00 0.00 ]
+Key: VPMAXUQZrmbkz: [ 0.00 0.00 ]
+Key: VPMAXUQZrmk: [ 0.00 0.00 ]
+Key: VPMAXUQZrmkz: [ 0.00 0.00 ]
+Key: VPMAXUQZrr: [ 0.00 0.00 ]
+Key: VPMAXUQZrrk: [ 0.00 0.00 ]
+Key: VPMAXUQZrrkz: [ 0.00 0.00 ]
+Key: VPMAXUWYrm: [ 0.00 0.00 ]
+Key: VPMAXUWYrr: [ 0.00 0.00 ]
+Key: VPMAXUWZ: [ 0.00 0.00 ]
+Key: VPMAXUWZrm: [ 0.00 0.00 ]
+Key: VPMAXUWZrmk: [ 0.00 0.00 ]
+Key: VPMAXUWZrmkz: [ 0.00 0.00 ]
+Key: VPMAXUWZrr: [ 0.00 0.00 ]
+Key: VPMAXUWZrrk: [ 0.00 0.00 ]
+Key: VPMAXUWZrrkz: [ 0.00 0.00 ]
+Key: VPMAXUWrm: [ 0.00 0.00 ]
+Key: VPMAXUWrr: [ 0.00 0.00 ]
+Key: VPMINSBYrm: [ 0.00 0.00 ]
+Key: VPMINSBYrr: [ 0.00 0.00 ]
+Key: VPMINSBZ: [ 0.00 0.00 ]
+Key: VPMINSBZrm: [ 0.00 0.00 ]
+Key: VPMINSBZrmk: [ 0.00 0.00 ]
+Key: VPMINSBZrmkz: [ 0.00 0.00 ]
+Key: VPMINSBZrr: [ 0.00 0.00 ]
+Key: VPMINSBZrrk: [ 0.00 0.00 ]
+Key: VPMINSBZrrkz: [ 0.00 0.00 ]
+Key: VPMINSBrm: [ 0.00 0.00 ]
+Key: VPMINSBrr: [ 0.00 0.00 ]
+Key: VPMINSDYrm: [ 0.00 0.00 ]
+Key: VPMINSDYrr: [ 0.00 0.00 ]
+Key: VPMINSDZ: [ 0.00 0.00 ]
+Key: VPMINSDZrm: [ 0.00 0.00 ]
+Key: VPMINSDZrmb: [ 0.00 0.00 ]
+Key: VPMINSDZrmbk: [ 0.00 0.00 ]
+Key: VPMINSDZrmbkz: [ 0.00 0.00 ]
+Key: VPMINSDZrmk: [ 0.00 0.00 ]
+Key: VPMINSDZrmkz: [ 0.00 0.00 ]
+Key: VPMINSDZrr: [ 0.00 0.00 ]
+Key: VPMINSDZrrk: [ 0.00 0.00 ]
+Key: VPMINSDZrrkz: [ 0.00 0.00 ]
+Key: VPMINSDrm: [ 0.00 0.00 ]
+Key: VPMINSDrr: [ 0.00 0.00 ]
+Key: VPMINSQZ: [ 0.00 0.00 ]
+Key: VPMINSQZrm: [ 0.00 0.00 ]
+Key: VPMINSQZrmb: [ 0.00 0.00 ]
+Key: VPMINSQZrmbk: [ 0.00 0.00 ]
+Key: VPMINSQZrmbkz: [ 0.00 0.00 ]
+Key: VPMINSQZrmk: [ 0.00 0.00 ]
+Key: VPMINSQZrmkz: [ 0.00 0.00 ]
+Key: VPMINSQZrr: [ 0.00 0.00 ]
+Key: VPMINSQZrrk: [ 0.00 0.00 ]
+Key: VPMINSQZrrkz: [ 0.00 0.00 ]
+Key: VPMINSWYrm: [ 0.00 0.00 ]
+Key: VPMINSWYrr: [ 0.00 0.00 ]
+Key: VPMINSWZ: [ 0.00 0.00 ]
+Key: VPMINSWZrm: [ 0.00 0.00 ]
+Key: VPMINSWZrmk: [ 0.00 0.00 ]
+Key: VPMINSWZrmkz: [ 0.00 0.00 ]
+Key: VPMINSWZrr: [ 0.00 0.00 ]
+Key: VPMINSWZrrk: [ 0.00 0.00 ]
+Key: VPMINSWZrrkz: [ 0.00 0.00 ]
+Key: VPMINSWrm: [ 0.00 0.00 ]
+Key: VPMINSWrr: [ 0.00 0.00 ]
+Key: VPMINUBYrm: [ 0.00 0.00 ]
+Key: VPMINUBYrr: [ 0.00 0.00 ]
+Key: VPMINUBZ: [ 0.00 0.00 ]
+Key: VPMINUBZrm: [ 0.00 0.00 ]
+Key: VPMINUBZrmk: [ 0.00 0.00 ]
+Key: VPMINUBZrmkz: [ 0.00 0.00 ]
+Key: VPMINUBZrr: [ 0.00 0.00 ]
+Key: VPMINUBZrrk: [ 0.00 0.00 ]
+Key: VPMINUBZrrkz: [ 0.00 0.00 ]
+Key: VPMINUBrm: [ 0.00 0.00 ]
+Key: VPMINUBrr: [ 0.00 0.00 ]
+Key: VPMINUDYrm: [ 0.00 0.00 ]
+Key: VPMINUDYrr: [ 0.00 0.00 ]
+Key: VPMINUDZ: [ 0.00 0.00 ]
+Key: VPMINUDZrm: [ 0.00 0.00 ]
+Key: VPMINUDZrmb: [ 0.00 0.00 ]
+Key: VPMINUDZrmbk: [ 0.00 0.00 ]
+Key: VPMINUDZrmbkz: [ 0.00 0.00 ]
+Key: VPMINUDZrmk: [ 0.00 0.00 ]
+Key: VPMINUDZrmkz: [ 0.00 0.00 ]
+Key: VPMINUDZrr: [ 0.00 0.00 ]
+Key: VPMINUDZrrk: [ 0.00 0.00 ]
+Key: VPMINUDZrrkz: [ 0.00 0.00 ]
+Key: VPMINUDrm: [ 0.00 0.00 ]
+Key: VPMINUDrr: [ 0.00 0.00 ]
+Key: VPMINUQZ: [ 0.00 0.00 ]
+Key: VPMINUQZrm: [ 0.00 0.00 ]
+Key: VPMINUQZrmb: [ 0.00 0.00 ]
+Key: VPMINUQZrmbk: [ 0.00 0.00 ]
+Key: VPMINUQZrmbkz: [ 0.00 0.00 ]
+Key: VPMINUQZrmk: [ 0.00 0.00 ]
+Key: VPMINUQZrmkz: [ 0.00 0.00 ]
+Key: VPMINUQZrr: [ 0.00 0.00 ]
+Key: VPMINUQZrrk: [ 0.00 0.00 ]
+Key: VPMINUQZrrkz: [ 0.00 0.00 ]
+Key: VPMINUWYrm: [ 0.00 0.00 ]
+Key: VPMINUWYrr: [ 0.00 0.00 ]
+Key: VPMINUWZ: [ 0.00 0.00 ]
+Key: VPMINUWZrm: [ 0.00 0.00 ]
+Key: VPMINUWZrmk: [ 0.00 0.00 ]
+Key: VPMINUWZrmkz: [ 0.00 0.00 ]
+Key: VPMINUWZrr: [ 0.00 0.00 ]
+Key: VPMINUWZrrk: [ 0.00 0.00 ]
+Key: VPMINUWZrrkz: [ 0.00 0.00 ]
+Key: VPMINUWrm: [ 0.00 0.00 ]
+Key: VPMINUWrr: [ 0.00 0.00 ]
+Key: VPMOVB: [ 0.00 0.00 ]
+Key: VPMOVD: [ 0.00 0.00 ]
+Key: VPMOVDBZ: [ 0.00 0.00 ]
+Key: VPMOVDBZmr: [ 0.00 0.00 ]
+Key: VPMOVDBZmrk: [ 0.00 0.00 ]
+Key: VPMOVDBZrr: [ 0.00 0.00 ]
+Key: VPMOVDBZrrk: [ 0.00 0.00 ]
+Key: VPMOVDBZrrkz: [ 0.00 0.00 ]
+Key: VPMOVDWZ: [ 0.00 0.00 ]
+Key: VPMOVDWZmr: [ 0.00 0.00 ]
+Key: VPMOVDWZmrk: [ 0.00 0.00 ]
+Key: VPMOVDWZrr: [ 0.00 0.00 ]
+Key: VPMOVDWZrrk: [ 0.00 0.00 ]
+Key: VPMOVDWZrrkz: [ 0.00 0.00 ]
+Key: VPMOVM: [ 0.00 0.00 ]
+Key: VPMOVMSKBYrr: [ 0.00 0.00 ]
+Key: VPMOVMSKBrr: [ 0.00 0.00 ]
+Key: VPMOVQ: [ 0.00 0.00 ]
+Key: VPMOVQBZ: [ 0.00 0.00 ]
+Key: VPMOVQBZmr: [ 0.00 0.00 ]
+Key: VPMOVQBZmrk: [ 0.00 0.00 ]
+Key: VPMOVQBZrr: [ 0.00 0.00 ]
+Key: VPMOVQBZrrk: [ 0.00 0.00 ]
+Key: VPMOVQBZrrkz: [ 0.00 0.00 ]
+Key: VPMOVQDZ: [ 0.00 0.00 ]
+Key: VPMOVQDZmr: [ 0.00 0.00 ]
+Key: VPMOVQDZmrk: [ 0.00 0.00 ]
+Key: VPMOVQDZrr: [ 0.00 0.00 ]
+Key: VPMOVQDZrrk: [ 0.00 0.00 ]
+Key: VPMOVQDZrrkz: [ 0.00 0.00 ]
+Key: VPMOVQWZ: [ 0.00 0.00 ]
+Key: VPMOVQWZmr: [ 0.00 0.00 ]
+Key: VPMOVQWZmrk: [ 0.00 0.00 ]
+Key: VPMOVQWZrr: [ 0.00 0.00 ]
+Key: VPMOVQWZrrk: [ 0.00 0.00 ]
+Key: VPMOVQWZrrkz: [ 0.00 0.00 ]
+Key: VPMOVSDBZ: [ 0.00 0.00 ]
+Key: VPMOVSDBZmr: [ 0.00 0.00 ]
+Key: VPMOVSDBZmrk: [ 0.00 0.00 ]
+Key: VPMOVSDBZrr: [ 0.00 0.00 ]
+Key: VPMOVSDBZrrk: [ 0.00 0.00 ]
+Key: VPMOVSDBZrrkz: [ 0.00 0.00 ]
+Key: VPMOVSDWZ: [ 0.00 0.00 ]
+Key: VPMOVSDWZmr: [ 0.00 0.00 ]
+Key: VPMOVSDWZmrk: [ 0.00 0.00 ]
+Key: VPMOVSDWZrr: [ 0.00 0.00 ]
+Key: VPMOVSDWZrrk: [ 0.00 0.00 ]
+Key: VPMOVSDWZrrkz: [ 0.00 0.00 ]
+Key: VPMOVSQBZ: [ 0.00 0.00 ]
+Key: VPMOVSQBZmr: [ 0.00 0.00 ]
+Key: VPMOVSQBZmrk: [ 0.00 0.00 ]
+Key: VPMOVSQBZrr: [ 0.00 0.00 ]
+Key: VPMOVSQBZrrk: [ 0.00 0.00 ]
+Key: VPMOVSQBZrrkz: [ 0.00 0.00 ]
+Key: VPMOVSQDZ: [ 0.00 0.00 ]
+Key: VPMOVSQDZmr: [ 0.00 0.00 ]
+Key: VPMOVSQDZmrk: [ 0.00 0.00 ]
+Key: VPMOVSQDZrr: [ 0.00 0.00 ]
+Key: VPMOVSQDZrrk: [ 0.00 0.00 ]
+Key: VPMOVSQDZrrkz: [ 0.00 0.00 ]
+Key: VPMOVSQWZ: [ 0.00 0.00 ]
+Key: VPMOVSQWZmr: [ 0.00 0.00 ]
+Key: VPMOVSQWZmrk: [ 0.00 0.00 ]
+Key: VPMOVSQWZrr: [ 0.00 0.00 ]
+Key: VPMOVSQWZrrk: [ 0.00 0.00 ]
+Key: VPMOVSQWZrrkz: [ 0.00 0.00 ]
+Key: VPMOVSWBZ: [ 0.00 0.00 ]
+Key: VPMOVSWBZmr: [ 0.00 0.00 ]
+Key: VPMOVSWBZmrk: [ 0.00 0.00 ]
+Key: VPMOVSWBZrr: [ 0.00 0.00 ]
+Key: VPMOVSWBZrrk: [ 0.00 0.00 ]
+Key: VPMOVSWBZrrkz: [ 0.00 0.00 ]
+Key: VPMOVSXBDYrm: [ 0.00 0.00 ]
+Key: VPMOVSXBDYrr: [ 0.00 0.00 ]
+Key: VPMOVSXBDZ: [ 0.00 0.00 ]
+Key: VPMOVSXBDZrm: [ 0.00 0.00 ]
+Key: VPMOVSXBDZrmk: [ 0.00 0.00 ]
+Key: VPMOVSXBDZrmkz: [ 0.00 0.00 ]
+Key: VPMOVSXBDZrr: [ 0.00 0.00 ]
+Key: VPMOVSXBDZrrk: [ 0.00 0.00 ]
+Key: VPMOVSXBDZrrkz: [ 0.00 0.00 ]
+Key: VPMOVSXBDrm: [ 0.00 0.00 ]
+Key: VPMOVSXBDrr: [ 0.00 0.00 ]
+Key: VPMOVSXBQYrm: [ 0.00 0.00 ]
+Key: VPMOVSXBQYrr: [ 0.00 0.00 ]
+Key: VPMOVSXBQZ: [ 0.00 0.00 ]
+Key: VPMOVSXBQZrm: [ 0.00 0.00 ]
+Key: VPMOVSXBQZrmk: [ 0.00 0.00 ]
+Key: VPMOVSXBQZrmkz: [ 0.00 0.00 ]
+Key: VPMOVSXBQZrr: [ 0.00 0.00 ]
+Key: VPMOVSXBQZrrk: [ 0.00 0.00 ]
+Key: VPMOVSXBQZrrkz: [ 0.00 0.00 ]
+Key: VPMOVSXBQrm: [ 0.00 0.00 ]
+Key: VPMOVSXBQrr: [ 0.00 0.00 ]
+Key: VPMOVSXBWYrm: [ 0.00 0.00 ]
+Key: VPMOVSXBWYrr: [ 0.00 0.00 ]
+Key: VPMOVSXBWZ: [ 0.00 0.00 ]
+Key: VPMOVSXBWZrm: [ 0.00 0.00 ]
+Key: VPMOVSXBWZrmk: [ 0.00 0.00 ]
+Key: VPMOVSXBWZrmkz: [ 0.00 0.00 ]
+Key: VPMOVSXBWZrr: [ 0.00 0.00 ]
+Key: VPMOVSXBWZrrk: [ 0.00 0.00 ]
+Key: VPMOVSXBWZrrkz: [ 0.00 0.00 ]
+Key: VPMOVSXBWrm: [ 0.00 0.00 ]
+Key: VPMOVSXBWrr: [ 0.00 0.00 ]
+Key: VPMOVSXDQYrm: [ 0.00 0.00 ]
+Key: VPMOVSXDQYrr: [ 0.00 0.00 ]
+Key: VPMOVSXDQZ: [ 0.00 0.00 ]
+Key: VPMOVSXDQZrm: [ 0.00 0.00 ]
+Key: VPMOVSXDQZrmk: [ 0.00 0.00 ]
+Key: VPMOVSXDQZrmkz: [ 0.00 0.00 ]
+Key: VPMOVSXDQZrr: [ 0.00 0.00 ]
+Key: VPMOVSXDQZrrk: [ 0.00 0.00 ]
+Key: VPMOVSXDQZrrkz: [ 0.00 0.00 ]
+Key: VPMOVSXDQrm: [ 0.00 0.00 ]
+Key: VPMOVSXDQrr: [ 0.00 0.00 ]
+Key: VPMOVSXWDYrm: [ 0.00 0.00 ]
+Key: VPMOVSXWDYrr: [ 0.00 0.00 ]
+Key: VPMOVSXWDZ: [ 0.00 0.00 ]
+Key: VPMOVSXWDZrm: [ 0.00 0.00 ]
+Key: VPMOVSXWDZrmk: [ 0.00 0.00 ]
+Key: VPMOVSXWDZrmkz: [ 0.00 0.00 ]
+Key: VPMOVSXWDZrr: [ 0.00 0.00 ]
+Key: VPMOVSXWDZrrk: [ 0.00 0.00 ]
+Key: VPMOVSXWDZrrkz: [ 0.00 0.00 ]
+Key: VPMOVSXWDrm: [ 0.00 0.00 ]
+Key: VPMOVSXWDrr: [ 0.00 0.00 ]
+Key: VPMOVSXWQYrm: [ 0.00 0.00 ]
+Key: VPMOVSXWQYrr: [ 0.00 0.00 ]
+Key: VPMOVSXWQZ: [ 0.00 0.00 ]
+Key: VPMOVSXWQZrm: [ 0.00 0.00 ]
+Key: VPMOVSXWQZrmk: [ 0.00 0.00 ]
+Key: VPMOVSXWQZrmkz: [ 0.00 0.00 ]
+Key: VPMOVSXWQZrr: [ 0.00 0.00 ]
+Key: VPMOVSXWQZrrk: [ 0.00 0.00 ]
+Key: VPMOVSXWQZrrkz: [ 0.00 0.00 ]
+Key: VPMOVSXWQrm: [ 0.00 0.00 ]
+Key: VPMOVSXWQrr: [ 0.00 0.00 ]
+Key: VPMOVUSDBZ: [ 0.00 0.00 ]
+Key: VPMOVUSDBZmr: [ 0.00 0.00 ]
+Key: VPMOVUSDBZmrk: [ 0.00 0.00 ]
+Key: VPMOVUSDBZrr: [ 0.00 0.00 ]
+Key: VPMOVUSDBZrrk: [ 0.00 0.00 ]
+Key: VPMOVUSDBZrrkz: [ 0.00 0.00 ]
+Key: VPMOVUSDWZ: [ 0.00 0.00 ]
+Key: VPMOVUSDWZmr: [ 0.00 0.00 ]
+Key: VPMOVUSDWZmrk: [ 0.00 0.00 ]
+Key: VPMOVUSDWZrr: [ 0.00 0.00 ]
+Key: VPMOVUSDWZrrk: [ 0.00 0.00 ]
+Key: VPMOVUSDWZrrkz: [ 0.00 0.00 ]
+Key: VPMOVUSQBZ: [ 0.00 0.00 ]
+Key: VPMOVUSQBZmr: [ 0.00 0.00 ]
+Key: VPMOVUSQBZmrk: [ 0.00 0.00 ]
+Key: VPMOVUSQBZrr: [ 0.00 0.00 ]
+Key: VPMOVUSQBZrrk: [ 0.00 0.00 ]
+Key: VPMOVUSQBZrrkz: [ 0.00 0.00 ]
+Key: VPMOVUSQDZ: [ 0.00 0.00 ]
+Key: VPMOVUSQDZmr: [ 0.00 0.00 ]
+Key: VPMOVUSQDZmrk: [ 0.00 0.00 ]
+Key: VPMOVUSQDZrr: [ 0.00 0.00 ]
+Key: VPMOVUSQDZrrk: [ 0.00 0.00 ]
+Key: VPMOVUSQDZrrkz: [ 0.00 0.00 ]
+Key: VPMOVUSQWZ: [ 0.00 0.00 ]
+Key: VPMOVUSQWZmr: [ 0.00 0.00 ]
+Key: VPMOVUSQWZmrk: [ 0.00 0.00 ]
+Key: VPMOVUSQWZrr: [ 0.00 0.00 ]
+Key: VPMOVUSQWZrrk: [ 0.00 0.00 ]
+Key: VPMOVUSQWZrrkz: [ 0.00 0.00 ]
+Key: VPMOVUSWBZ: [ 0.00 0.00 ]
+Key: VPMOVUSWBZmr: [ 0.00 0.00 ]
+Key: VPMOVUSWBZmrk: [ 0.00 0.00 ]
+Key: VPMOVUSWBZrr: [ 0.00 0.00 ]
+Key: VPMOVUSWBZrrk: [ 0.00 0.00 ]
+Key: VPMOVUSWBZrrkz: [ 0.00 0.00 ]
+Key: VPMOVW: [ 0.00 0.00 ]
+Key: VPMOVWBZ: [ 0.00 0.00 ]
+Key: VPMOVWBZmr: [ 0.00 0.00 ]
+Key: VPMOVWBZmrk: [ 0.00 0.00 ]
+Key: VPMOVWBZrr: [ 0.00 0.00 ]
+Key: VPMOVWBZrrk: [ 0.00 0.00 ]
+Key: VPMOVWBZrrkz: [ 0.00 0.00 ]
+Key: VPMOVZXBDYrm: [ 0.00 0.00 ]
+Key: VPMOVZXBDYrr: [ 0.00 0.00 ]
+Key: VPMOVZXBDZ: [ 0.00 0.00 ]
+Key: VPMOVZXBDZrm: [ 0.00 0.00 ]
+Key: VPMOVZXBDZrmk: [ 0.00 0.00 ]
+Key: VPMOVZXBDZrmkz: [ 0.00 0.00 ]
+Key: VPMOVZXBDZrr: [ 0.00 0.00 ]
+Key: VPMOVZXBDZrrk: [ 0.00 0.00 ]
+Key: VPMOVZXBDZrrkz: [ 0.00 0.00 ]
+Key: VPMOVZXBDrm: [ 0.00 0.00 ]
+Key: VPMOVZXBDrr: [ 0.00 0.00 ]
+Key: VPMOVZXBQYrm: [ 0.00 0.00 ]
+Key: VPMOVZXBQYrr: [ 0.00 0.00 ]
+Key: VPMOVZXBQZ: [ 0.00 0.00 ]
+Key: VPMOVZXBQZrm: [ 0.00 0.00 ]
+Key: VPMOVZXBQZrmk: [ 0.00 0.00 ]
+Key: VPMOVZXBQZrmkz: [ 0.00 0.00 ]
+Key: VPMOVZXBQZrr: [ 0.00 0.00 ]
+Key: VPMOVZXBQZrrk: [ 0.00 0.00 ]
+Key: VPMOVZXBQZrrkz: [ 0.00 0.00 ]
+Key: VPMOVZXBQrm: [ 0.00 0.00 ]
+Key: VPMOVZXBQrr: [ 0.00 0.00 ]
+Key: VPMOVZXBWYrm: [ 0.00 0.00 ]
+Key: VPMOVZXBWYrr: [ 0.00 0.00 ]
+Key: VPMOVZXBWZ: [ 0.00 0.00 ]
+Key: VPMOVZXBWZrm: [ 0.00 0.00 ]
+Key: VPMOVZXBWZrmk: [ 0.00 0.00 ]
+Key: VPMOVZXBWZrmkz: [ 0.00 0.00 ]
+Key: VPMOVZXBWZrr: [ 0.00 0.00 ]
+Key: VPMOVZXBWZrrk: [ 0.00 0.00 ]
+Key: VPMOVZXBWZrrkz: [ 0.00 0.00 ]
+Key: VPMOVZXBWrm: [ 0.00 0.00 ]
+Key: VPMOVZXBWrr: [ 0.00 0.00 ]
+Key: VPMOVZXDQYrm: [ 0.00 0.00 ]
+Key: VPMOVZXDQYrr: [ 0.00 0.00 ]
+Key: VPMOVZXDQZ: [ 0.00 0.00 ]
+Key: VPMOVZXDQZrm: [ 0.00 0.00 ]
+Key: VPMOVZXDQZrmk: [ 0.00 0.00 ]
+Key: VPMOVZXDQZrmkz: [ 0.00 0.00 ]
+Key: VPMOVZXDQZrr: [ 0.00 0.00 ]
+Key: VPMOVZXDQZrrk: [ 0.00 0.00 ]
+Key: VPMOVZXDQZrrkz: [ 0.00 0.00 ]
+Key: VPMOVZXDQrm: [ 0.00 0.00 ]
+Key: VPMOVZXDQrr: [ 0.00 0.00 ]
+Key: VPMOVZXWDYrm: [ 0.00 0.00 ]
+Key: VPMOVZXWDYrr: [ 0.00 0.00 ]
+Key: VPMOVZXWDZ: [ 0.00 0.00 ]
+Key: VPMOVZXWDZrm: [ 0.00 0.00 ]
+Key: VPMOVZXWDZrmk: [ 0.00 0.00 ]
+Key: VPMOVZXWDZrmkz: [ 0.00 0.00 ]
+Key: VPMOVZXWDZrr: [ 0.00 0.00 ]
+Key: VPMOVZXWDZrrk: [ 0.00 0.00 ]
+Key: VPMOVZXWDZrrkz: [ 0.00 0.00 ]
+Key: VPMOVZXWDrm: [ 0.00 0.00 ]
+Key: VPMOVZXWDrr: [ 0.00 0.00 ]
+Key: VPMOVZXWQYrm: [ 0.00 0.00 ]
+Key: VPMOVZXWQYrr: [ 0.00 0.00 ]
+Key: VPMOVZXWQZ: [ 0.00 0.00 ]
+Key: VPMOVZXWQZrm: [ 0.00 0.00 ]
+Key: VPMOVZXWQZrmk: [ 0.00 0.00 ]
+Key: VPMOVZXWQZrmkz: [ 0.00 0.00 ]
+Key: VPMOVZXWQZrr: [ 0.00 0.00 ]
+Key: VPMOVZXWQZrrk: [ 0.00 0.00 ]
+Key: VPMOVZXWQZrrkz: [ 0.00 0.00 ]
+Key: VPMOVZXWQrm: [ 0.00 0.00 ]
+Key: VPMOVZXWQrr: [ 0.00 0.00 ]
+Key: VPMULDQYrm: [ 0.00 0.00 ]
+Key: VPMULDQYrr: [ 0.00 0.00 ]
+Key: VPMULDQZ: [ 0.00 0.00 ]
+Key: VPMULDQZrm: [ 0.00 0.00 ]
+Key: VPMULDQZrmb: [ 0.00 0.00 ]
+Key: VPMULDQZrmbk: [ 0.00 0.00 ]
+Key: VPMULDQZrmbkz: [ 0.00 0.00 ]
+Key: VPMULDQZrmk: [ 0.00 0.00 ]
+Key: VPMULDQZrmkz: [ 0.00 0.00 ]
+Key: VPMULDQZrr: [ 0.00 0.00 ]
+Key: VPMULDQZrrk: [ 0.00 0.00 ]
+Key: VPMULDQZrrkz: [ 0.00 0.00 ]
+Key: VPMULDQrm: [ 0.00 0.00 ]
+Key: VPMULDQrr: [ 0.00 0.00 ]
+Key: VPMULHRSWYrm: [ 0.00 0.00 ]
+Key: VPMULHRSWYrr: [ 0.00 0.00 ]
+Key: VPMULHRSWZ: [ 0.00 0.00 ]
+Key: VPMULHRSWZrm: [ 0.00 0.00 ]
+Key: VPMULHRSWZrmk: [ 0.00 0.00 ]
+Key: VPMULHRSWZrmkz: [ 0.00 0.00 ]
+Key: VPMULHRSWZrr: [ 0.00 0.00 ]
+Key: VPMULHRSWZrrk: [ 0.00 0.00 ]
+Key: VPMULHRSWZrrkz: [ 0.00 0.00 ]
+Key: VPMULHRSWrm: [ 0.00 0.00 ]
+Key: VPMULHRSWrr: [ 0.00 0.00 ]
+Key: VPMULHUWYrm: [ 0.00 0.00 ]
+Key: VPMULHUWYrr: [ 0.00 0.00 ]
+Key: VPMULHUWZ: [ 0.00 0.00 ]
+Key: VPMULHUWZrm: [ 0.00 0.00 ]
+Key: VPMULHUWZrmk: [ 0.00 0.00 ]
+Key: VPMULHUWZrmkz: [ 0.00 0.00 ]
+Key: VPMULHUWZrr: [ 0.00 0.00 ]
+Key: VPMULHUWZrrk: [ 0.00 0.00 ]
+Key: VPMULHUWZrrkz: [ 0.00 0.00 ]
+Key: VPMULHUWrm: [ 0.00 0.00 ]
+Key: VPMULHUWrr: [ 0.00 0.00 ]
+Key: VPMULHWYrm: [ 0.00 0.00 ]
+Key: VPMULHWYrr: [ 0.00 0.00 ]
+Key: VPMULHWZ: [ 0.00 0.00 ]
+Key: VPMULHWZrm: [ 0.00 0.00 ]
+Key: VPMULHWZrmk: [ 0.00 0.00 ]
+Key: VPMULHWZrmkz: [ 0.00 0.00 ]
+Key: VPMULHWZrr: [ 0.00 0.00 ]
+Key: VPMULHWZrrk: [ 0.00 0.00 ]
+Key: VPMULHWZrrkz: [ 0.00 0.00 ]
+Key: VPMULHWrm: [ 0.00 0.00 ]
+Key: VPMULHWrr: [ 0.00 0.00 ]
+Key: VPMULLDYrm: [ 0.00 0.00 ]
+Key: VPMULLDYrr: [ 0.00 0.00 ]
+Key: VPMULLDZ: [ 0.00 0.00 ]
+Key: VPMULLDZrm: [ 0.00 0.00 ]
+Key: VPMULLDZrmb: [ 0.00 0.00 ]
+Key: VPMULLDZrmbk: [ 0.00 0.00 ]
+Key: VPMULLDZrmbkz: [ 0.00 0.00 ]
+Key: VPMULLDZrmk: [ 0.00 0.00 ]
+Key: VPMULLDZrmkz: [ 0.00 0.00 ]
+Key: VPMULLDZrr: [ 0.00 0.00 ]
+Key: VPMULLDZrrk: [ 0.00 0.00 ]
+Key: VPMULLDZrrkz: [ 0.00 0.00 ]
+Key: VPMULLDrm: [ 0.00 0.00 ]
+Key: VPMULLDrr: [ 0.00 0.00 ]
+Key: VPMULLQZ: [ 0.00 0.00 ]
+Key: VPMULLQZrm: [ 0.00 0.00 ]
+Key: VPMULLQZrmb: [ 0.00 0.00 ]
+Key: VPMULLQZrmbk: [ 0.00 0.00 ]
+Key: VPMULLQZrmbkz: [ 0.00 0.00 ]
+Key: VPMULLQZrmk: [ 0.00 0.00 ]
+Key: VPMULLQZrmkz: [ 0.00 0.00 ]
+Key: VPMULLQZrr: [ 0.00 0.00 ]
+Key: VPMULLQZrrk: [ 0.00 0.00 ]
+Key: VPMULLQZrrkz: [ 0.00 0.00 ]
+Key: VPMULLWYrm: [ 0.00 0.00 ]
+Key: VPMULLWYrr: [ 0.00 0.00 ]
+Key: VPMULLWZ: [ 0.00 0.00 ]
+Key: VPMULLWZrm: [ 0.00 0.00 ]
+Key: VPMULLWZrmk: [ 0.00 0.00 ]
+Key: VPMULLWZrmkz: [ 0.00 0.00 ]
+Key: VPMULLWZrr: [ 0.00 0.00 ]
+Key: VPMULLWZrrk: [ 0.00 0.00 ]
+Key: VPMULLWZrrkz: [ 0.00 0.00 ]
+Key: VPMULLWrm: [ 0.00 0.00 ]
+Key: VPMULLWrr: [ 0.00 0.00 ]
+Key: VPMULTISHIFTQBZ: [ 0.00 0.00 ]
+Key: VPMULTISHIFTQBZrm: [ 0.00 0.00 ]
+Key: VPMULTISHIFTQBZrmb: [ 0.00 0.00 ]
+Key: VPMULTISHIFTQBZrmbk: [ 0.00 0.00 ]
+Key: VPMULTISHIFTQBZrmbkz: [ 0.00 0.00 ]
+Key: VPMULTISHIFTQBZrmk: [ 0.00 0.00 ]
+Key: VPMULTISHIFTQBZrmkz: [ 0.00 0.00 ]
+Key: VPMULTISHIFTQBZrr: [ 0.00 0.00 ]
+Key: VPMULTISHIFTQBZrrk: [ 0.00 0.00 ]
+Key: VPMULTISHIFTQBZrrkz: [ 0.00 0.00 ]
+Key: VPMULUDQYrm: [ 0.00 0.00 ]
+Key: VPMULUDQYrr: [ 0.00 0.00 ]
+Key: VPMULUDQZ: [ 0.00 0.00 ]
+Key: VPMULUDQZrm: [ 0.00 0.00 ]
+Key: VPMULUDQZrmb: [ 0.00 0.00 ]
+Key: VPMULUDQZrmbk: [ 0.00 0.00 ]
+Key: VPMULUDQZrmbkz: [ 0.00 0.00 ]
+Key: VPMULUDQZrmk: [ 0.00 0.00 ]
+Key: VPMULUDQZrmkz: [ 0.00 0.00 ]
+Key: VPMULUDQZrr: [ 0.00 0.00 ]
+Key: VPMULUDQZrrk: [ 0.00 0.00 ]
+Key: VPMULUDQZrrkz: [ 0.00 0.00 ]
+Key: VPMULUDQrm: [ 0.00 0.00 ]
+Key: VPMULUDQrr: [ 0.00 0.00 ]
+Key: VPOPCNTBZ: [ 0.00 0.00 ]
+Key: VPOPCNTBZrm: [ 0.00 0.00 ]
+Key: VPOPCNTBZrmk: [ 0.00 0.00 ]
+Key: VPOPCNTBZrmkz: [ 0.00 0.00 ]
+Key: VPOPCNTBZrr: [ 0.00 0.00 ]
+Key: VPOPCNTBZrrk: [ 0.00 0.00 ]
+Key: VPOPCNTBZrrkz: [ 0.00 0.00 ]
+Key: VPOPCNTDZ: [ 0.00 0.00 ]
+Key: VPOPCNTDZrm: [ 0.00 0.00 ]
+Key: VPOPCNTDZrmb: [ 0.00 0.00 ]
+Key: VPOPCNTDZrmbk: [ 0.00 0.00 ]
+Key: VPOPCNTDZrmbkz: [ 0.00 0.00 ]
+Key: VPOPCNTDZrmk: [ 0.00 0.00 ]
+Key: VPOPCNTDZrmkz: [ 0.00 0.00 ]
+Key: VPOPCNTDZrr: [ 0.00 0.00 ]
+Key: VPOPCNTDZrrk: [ 0.00 0.00 ]
+Key: VPOPCNTDZrrkz: [ 0.00 0.00 ]
+Key: VPOPCNTQZ: [ 0.00 0.00 ]
+Key: VPOPCNTQZrm: [ 0.00 0.00 ]
+Key: VPOPCNTQZrmb: [ 0.00 0.00 ]
+Key: VPOPCNTQZrmbk: [ 0.00 0.00 ]
+Key: VPOPCNTQZrmbkz: [ 0.00 0.00 ]
+Key: VPOPCNTQZrmk: [ 0.00 0.00 ]
+Key: VPOPCNTQZrmkz: [ 0.00 0.00 ]
+Key: VPOPCNTQZrr: [ 0.00 0.00 ]
+Key: VPOPCNTQZrrk: [ 0.00 0.00 ]
+Key: VPOPCNTQZrrkz: [ 0.00 0.00 ]
+Key: VPOPCNTWZ: [ 0.00 0.00 ]
+Key: VPOPCNTWZrm: [ 0.00 0.00 ]
+Key: VPOPCNTWZrmk: [ 0.00 0.00 ]
+Key: VPOPCNTWZrmkz: [ 0.00 0.00 ]
+Key: VPOPCNTWZrr: [ 0.00 0.00 ]
+Key: VPOPCNTWZrrk: [ 0.00 0.00 ]
+Key: VPOPCNTWZrrkz: [ 0.00 0.00 ]
+Key: VPORDZ: [ 0.00 0.00 ]
+Key: VPORDZrm: [ 0.00 0.00 ]
+Key: VPORDZrmb: [ 0.00 0.00 ]
+Key: VPORDZrmbk: [ 0.00 0.00 ]
+Key: VPORDZrmbkz: [ 0.00 0.00 ]
+Key: VPORDZrmk: [ 0.00 0.00 ]
+Key: VPORDZrmkz: [ 0.00 0.00 ]
+Key: VPORDZrr: [ 0.00 0.00 ]
+Key: VPORDZrrk: [ 0.00 0.00 ]
+Key: VPORDZrrkz: [ 0.00 0.00 ]
+Key: VPORQZ: [ 0.00 0.00 ]
+Key: VPORQZrm: [ 0.00 0.00 ]
+Key: VPORQZrmb: [ 0.00 0.00 ]
+Key: VPORQZrmbk: [ 0.00 0.00 ]
+Key: VPORQZrmbkz: [ 0.00 0.00 ]
+Key: VPORQZrmk: [ 0.00 0.00 ]
+Key: VPORQZrmkz: [ 0.00 0.00 ]
+Key: VPORQZrr: [ 0.00 0.00 ]
+Key: VPORQZrrk: [ 0.00 0.00 ]
+Key: VPORQZrrkz: [ 0.00 0.00 ]
+Key: VPORYrm: [ 0.00 0.00 ]
+Key: VPORYrr: [ 0.00 0.00 ]
+Key: VPORrm: [ 0.00 0.00 ]
+Key: VPORrr: [ 0.00 0.00 ]
+Key: VPPERMrmr: [ 0.00 0.00 ]
+Key: VPPERMrrm: [ 0.00 0.00 ]
+Key: VPPERMrrr: [ 0.00 0.00 ]
+Key: VPPERMrrr_REV: [ 0.00 0.00 ]
+Key: VPROLDZ: [ 0.00 0.00 ]
+Key: VPROLDZmbi: [ 0.00 0.00 ]
+Key: VPROLDZmbik: [ 0.00 0.00 ]
+Key: VPROLDZmbikz: [ 0.00 0.00 ]
+Key: VPROLDZmi: [ 0.00 0.00 ]
+Key: VPROLDZmik: [ 0.00 0.00 ]
+Key: VPROLDZmikz: [ 0.00 0.00 ]
+Key: VPROLDZri: [ 0.00 0.00 ]
+Key: VPROLDZrik: [ 0.00 0.00 ]
+Key: VPROLDZrikz: [ 0.00 0.00 ]
+Key: VPROLQZ: [ 0.00 0.00 ]
+Key: VPROLQZmbi: [ 0.00 0.00 ]
+Key: VPROLQZmbik: [ 0.00 0.00 ]
+Key: VPROLQZmbikz: [ 0.00 0.00 ]
+Key: VPROLQZmi: [ 0.00 0.00 ]
+Key: VPROLQZmik: [ 0.00 0.00 ]
+Key: VPROLQZmikz: [ 0.00 0.00 ]
+Key: VPROLQZri: [ 0.00 0.00 ]
+Key: VPROLQZrik: [ 0.00 0.00 ]
+Key: VPROLQZrikz: [ 0.00 0.00 ]
+Key: VPROLVDZ: [ 0.00 0.00 ]
+Key: VPROLVDZrm: [ 0.00 0.00 ]
+Key: VPROLVDZrmb: [ 0.00 0.00 ]
+Key: VPROLVDZrmbk: [ 0.00 0.00 ]
+Key: VPROLVDZrmbkz: [ 0.00 0.00 ]
+Key: VPROLVDZrmk: [ 0.00 0.00 ]
+Key: VPROLVDZrmkz: [ 0.00 0.00 ]
+Key: VPROLVDZrr: [ 0.00 0.00 ]
+Key: VPROLVDZrrk: [ 0.00 0.00 ]
+Key: VPROLVDZrrkz: [ 0.00 0.00 ]
+Key: VPROLVQZ: [ 0.00 0.00 ]
+Key: VPROLVQZrm: [ 0.00 0.00 ]
+Key: VPROLVQZrmb: [ 0.00 0.00 ]
+Key: VPROLVQZrmbk: [ 0.00 0.00 ]
+Key: VPROLVQZrmbkz: [ 0.00 0.00 ]
+Key: VPROLVQZrmk: [ 0.00 0.00 ]
+Key: VPROLVQZrmkz: [ 0.00 0.00 ]
+Key: VPROLVQZrr: [ 0.00 0.00 ]
+Key: VPROLVQZrrk: [ 0.00 0.00 ]
+Key: VPROLVQZrrkz: [ 0.00 0.00 ]
+Key: VPRORDZ: [ 0.00 0.00 ]
+Key: VPRORDZmbi: [ 0.00 0.00 ]
+Key: VPRORDZmbik: [ 0.00 0.00 ]
+Key: VPRORDZmbikz: [ 0.00 0.00 ]
+Key: VPRORDZmi: [ 0.00 0.00 ]
+Key: VPRORDZmik: [ 0.00 0.00 ]
+Key: VPRORDZmikz: [ 0.00 0.00 ]
+Key: VPRORDZri: [ 0.00 0.00 ]
+Key: VPRORDZrik: [ 0.00 0.00 ]
+Key: VPRORDZrikz: [ 0.00 0.00 ]
+Key: VPRORQZ: [ 0.00 0.00 ]
+Key: VPRORQZmbi: [ 0.00 0.00 ]
+Key: VPRORQZmbik: [ 0.00 0.00 ]
+Key: VPRORQZmbikz: [ 0.00 0.00 ]
+Key: VPRORQZmi: [ 0.00 0.00 ]
+Key: VPRORQZmik: [ 0.00 0.00 ]
+Key: VPRORQZmikz: [ 0.00 0.00 ]
+Key: VPRORQZri: [ 0.00 0.00 ]
+Key: VPRORQZrik: [ 0.00 0.00 ]
+Key: VPRORQZrikz: [ 0.00 0.00 ]
+Key: VPRORVDZ: [ 0.00 0.00 ]
+Key: VPRORVDZrm: [ 0.00 0.00 ]
+Key: VPRORVDZrmb: [ 0.00 0.00 ]
+Key: VPRORVDZrmbk: [ 0.00 0.00 ]
+Key: VPRORVDZrmbkz: [ 0.00 0.00 ]
+Key: VPRORVDZrmk: [ 0.00 0.00 ]
+Key: VPRORVDZrmkz: [ 0.00 0.00 ]
+Key: VPRORVDZrr: [ 0.00 0.00 ]
+Key: VPRORVDZrrk: [ 0.00 0.00 ]
+Key: VPRORVDZrrkz: [ 0.00 0.00 ]
+Key: VPRORVQZ: [ 0.00 0.00 ]
+Key: VPRORVQZrm: [ 0.00 0.00 ]
+Key: VPRORVQZrmb: [ 0.00 0.00 ]
+Key: VPRORVQZrmbk: [ 0.00 0.00 ]
+Key: VPRORVQZrmbkz: [ 0.00 0.00 ]
+Key: VPRORVQZrmk: [ 0.00 0.00 ]
+Key: VPRORVQZrmkz: [ 0.00 0.00 ]
+Key: VPRORVQZrr: [ 0.00 0.00 ]
+Key: VPRORVQZrrk: [ 0.00 0.00 ]
+Key: VPRORVQZrrkz: [ 0.00 0.00 ]
+Key: VPROTBmi: [ 0.00 0.00 ]
+Key: VPROTBmr: [ 0.00 0.00 ]
+Key: VPROTBri: [ 0.00 0.00 ]
+Key: VPROTBrm: [ 0.00 0.00 ]
+Key: VPROTBrr: [ 0.00 0.00 ]
+Key: VPROTBrr_REV: [ 0.00 0.00 ]
+Key: VPROTDmi: [ 0.00 0.00 ]
+Key: VPROTDmr: [ 0.00 0.00 ]
+Key: VPROTDri: [ 0.00 0.00 ]
+Key: VPROTDrm: [ 0.00 0.00 ]
+Key: VPROTDrr: [ 0.00 0.00 ]
+Key: VPROTDrr_REV: [ 0.00 0.00 ]
+Key: VPROTQmi: [ 0.00 0.00 ]
+Key: VPROTQmr: [ 0.00 0.00 ]
+Key: VPROTQri: [ 0.00 0.00 ]
+Key: VPROTQrm: [ 0.00 0.00 ]
+Key: VPROTQrr: [ 0.00 0.00 ]
+Key: VPROTQrr_REV: [ 0.00 0.00 ]
+Key: VPROTWmi: [ 0.00 0.00 ]
+Key: VPROTWmr: [ 0.00 0.00 ]
+Key: VPROTWri: [ 0.00 0.00 ]
+Key: VPROTWrm: [ 0.00 0.00 ]
+Key: VPROTWrr: [ 0.00 0.00 ]
+Key: VPROTWrr_REV: [ 0.00 0.00 ]
+Key: VPSADBWYrm: [ 0.00 0.00 ]
+Key: VPSADBWYrr: [ 0.00 0.00 ]
+Key: VPSADBWZ: [ 0.00 0.00 ]
+Key: VPSADBWZrm: [ 0.00 0.00 ]
+Key: VPSADBWZrr: [ 0.00 0.00 ]
+Key: VPSADBWrm: [ 0.00 0.00 ]
+Key: VPSADBWrr: [ 0.00 0.00 ]
+Key: VPSCATTERDDZ: [ 0.00 0.00 ]
+Key: VPSCATTERDDZmr: [ 0.00 0.00 ]
+Key: VPSCATTERDQZ: [ 0.00 0.00 ]
+Key: VPSCATTERDQZmr: [ 0.00 0.00 ]
+Key: VPSCATTERQDZ: [ 0.00 0.00 ]
+Key: VPSCATTERQDZmr: [ 0.00 0.00 ]
+Key: VPSCATTERQQZ: [ 0.00 0.00 ]
+Key: VPSCATTERQQZmr: [ 0.00 0.00 ]
+Key: VPSHABmr: [ 0.00 0.00 ]
+Key: VPSHABrm: [ 0.00 0.00 ]
+Key: VPSHABrr: [ 0.00 0.00 ]
+Key: VPSHABrr_REV: [ 0.00 0.00 ]
+Key: VPSHADmr: [ 0.00 0.00 ]
+Key: VPSHADrm: [ 0.00 0.00 ]
+Key: VPSHADrr: [ 0.00 0.00 ]
+Key: VPSHADrr_REV: [ 0.00 0.00 ]
+Key: VPSHAQmr: [ 0.00 0.00 ]
+Key: VPSHAQrm: [ 0.00 0.00 ]
+Key: VPSHAQrr: [ 0.00 0.00 ]
+Key: VPSHAQrr_REV: [ 0.00 0.00 ]
+Key: VPSHAWmr: [ 0.00 0.00 ]
+Key: VPSHAWrm: [ 0.00 0.00 ]
+Key: VPSHAWrr: [ 0.00 0.00 ]
+Key: VPSHAWrr_REV: [ 0.00 0.00 ]
+Key: VPSHLBmr: [ 0.00 0.00 ]
+Key: VPSHLBrm: [ 0.00 0.00 ]
+Key: VPSHLBrr: [ 0.00 0.00 ]
+Key: VPSHLBrr_REV: [ 0.00 0.00 ]
+Key: VPSHLDDZ: [ 0.00 0.00 ]
+Key: VPSHLDDZrmbi: [ 0.00 0.00 ]
+Key: VPSHLDDZrmbik: [ 0.00 0.00 ]
+Key: VPSHLDDZrmbikz: [ 0.00 0.00 ]
+Key: VPSHLDDZrmi: [ 0.00 0.00 ]
+Key: VPSHLDDZrmik: [ 0.00 0.00 ]
+Key: VPSHLDDZrmikz: [ 0.00 0.00 ]
+Key: VPSHLDDZrri: [ 0.00 0.00 ]
+Key: VPSHLDDZrrik: [ 0.00 0.00 ]
+Key: VPSHLDDZrrikz: [ 0.00 0.00 ]
+Key: VPSHLDQZ: [ 0.00 0.00 ]
+Key: VPSHLDQZrmbi: [ 0.00 0.00 ]
+Key: VPSHLDQZrmbik: [ 0.00 0.00 ]
+Key: VPSHLDQZrmbikz: [ 0.00 0.00 ]
+Key: VPSHLDQZrmi: [ 0.00 0.00 ]
+Key: VPSHLDQZrmik: [ 0.00 0.00 ]
+Key: VPSHLDQZrmikz: [ 0.00 0.00 ]
+Key: VPSHLDQZrri: [ 0.00 0.00 ]
+Key: VPSHLDQZrrik: [ 0.00 0.00 ]
+Key: VPSHLDQZrrikz: [ 0.00 0.00 ]
+Key: VPSHLDVDZ: [ 0.00 0.00 ]
+Key: VPSHLDVDZm: [ 0.00 0.00 ]
+Key: VPSHLDVDZmb: [ 0.00 0.00 ]
+Key: VPSHLDVDZmbk: [ 0.00 0.00 ]
+Key: VPSHLDVDZmbkz: [ 0.00 0.00 ]
+Key: VPSHLDVDZmk: [ 0.00 0.00 ]
+Key: VPSHLDVDZmkz: [ 0.00 0.00 ]
+Key: VPSHLDVDZr: [ 0.00 0.00 ]
+Key: VPSHLDVDZrk: [ 0.00 0.00 ]
+Key: VPSHLDVDZrkz: [ 0.00 0.00 ]
+Key: VPSHLDVQZ: [ 0.00 0.00 ]
+Key: VPSHLDVQZm: [ 0.00 0.00 ]
+Key: VPSHLDVQZmb: [ 0.00 0.00 ]
+Key: VPSHLDVQZmbk: [ 0.00 0.00 ]
+Key: VPSHLDVQZmbkz: [ 0.00 0.00 ]
+Key: VPSHLDVQZmk: [ 0.00 0.00 ]
+Key: VPSHLDVQZmkz: [ 0.00 0.00 ]
+Key: VPSHLDVQZr: [ 0.00 0.00 ]
+Key: VPSHLDVQZrk: [ 0.00 0.00 ]
+Key: VPSHLDVQZrkz: [ 0.00 0.00 ]
+Key: VPSHLDVWZ: [ 0.00 0.00 ]
+Key: VPSHLDVWZm: [ 0.00 0.00 ]
+Key: VPSHLDVWZmk: [ 0.00 0.00 ]
+Key: VPSHLDVWZmkz: [ 0.00 0.00 ]
+Key: VPSHLDVWZr: [ 0.00 0.00 ]
+Key: VPSHLDVWZrk: [ 0.00 0.00 ]
+Key: VPSHLDVWZrkz: [ 0.00 0.00 ]
+Key: VPSHLDWZ: [ 0.00 0.00 ]
+Key: VPSHLDWZrmi: [ 0.00 0.00 ]
+Key: VPSHLDWZrmik: [ 0.00 0.00 ]
+Key: VPSHLDWZrmikz: [ 0.00 0.00 ]
+Key: VPSHLDWZrri: [ 0.00 0.00 ]
+Key: VPSHLDWZrrik: [ 0.00 0.00 ]
+Key: VPSHLDWZrrikz: [ 0.00 0.00 ]
+Key: VPSHLDmr: [ 0.00 0.00 ]
+Key: VPSHLDrm: [ 0.00 0.00 ]
+Key: VPSHLDrr: [ 0.00 0.00 ]
+Key: VPSHLDrr_REV: [ 0.00 0.00 ]
+Key: VPSHLQmr: [ 0.00 0.00 ]
+Key: VPSHLQrm: [ 0.00 0.00 ]
+Key: VPSHLQrr: [ 0.00 0.00 ]
+Key: VPSHLQrr_REV: [ 0.00 0.00 ]
+Key: VPSHLWmr: [ 0.00 0.00 ]
+Key: VPSHLWrm: [ 0.00 0.00 ]
+Key: VPSHLWrr: [ 0.00 0.00 ]
+Key: VPSHLWrr_REV: [ 0.00 0.00 ]
+Key: VPSHRDDZ: [ 0.00 0.00 ]
+Key: VPSHRDDZrmbi: [ 0.00 0.00 ]
+Key: VPSHRDDZrmbik: [ 0.00 0.00 ]
+Key: VPSHRDDZrmbikz: [ 0.00 0.00 ]
+Key: VPSHRDDZrmi: [ 0.00 0.00 ]
+Key: VPSHRDDZrmik: [ 0.00 0.00 ]
+Key: VPSHRDDZrmikz: [ 0.00 0.00 ]
+Key: VPSHRDDZrri: [ 0.00 0.00 ]
+Key: VPSHRDDZrrik: [ 0.00 0.00 ]
+Key: VPSHRDDZrrikz: [ 0.00 0.00 ]
+Key: VPSHRDQZ: [ 0.00 0.00 ]
+Key: VPSHRDQZrmbi: [ 0.00 0.00 ]
+Key: VPSHRDQZrmbik: [ 0.00 0.00 ]
+Key: VPSHRDQZrmbikz: [ 0.00 0.00 ]
+Key: VPSHRDQZrmi: [ 0.00 0.00 ]
+Key: VPSHRDQZrmik: [ 0.00 0.00 ]
+Key: VPSHRDQZrmikz: [ 0.00 0.00 ]
+Key: VPSHRDQZrri: [ 0.00 0.00 ]
+Key: VPSHRDQZrrik: [ 0.00 0.00 ]
+Key: VPSHRDQZrrikz: [ 0.00 0.00 ]
+Key: VPSHRDVDZ: [ 0.00 0.00 ]
+Key: VPSHRDVDZm: [ 0.00 0.00 ]
+Key: VPSHRDVDZmb: [ 0.00 0.00 ]
+Key: VPSHRDVDZmbk: [ 0.00 0.00 ]
+Key: VPSHRDVDZmbkz: [ 0.00 0.00 ]
+Key: VPSHRDVDZmk: [ 0.00 0.00 ]
+Key: VPSHRDVDZmkz: [ 0.00 0.00 ]
+Key: VPSHRDVDZr: [ 0.00 0.00 ]
+Key: VPSHRDVDZrk: [ 0.00 0.00 ]
+Key: VPSHRDVDZrkz: [ 0.00 0.00 ]
+Key: VPSHRDVQZ: [ 0.00 0.00 ]
+Key: VPSHRDVQZm: [ 0.00 0.00 ]
+Key: VPSHRDVQZmb: [ 0.00 0.00 ]
+Key: VPSHRDVQZmbk: [ 0.00 0.00 ]
+Key: VPSHRDVQZmbkz: [ 0.00 0.00 ]
+Key: VPSHRDVQZmk: [ 0.00 0.00 ]
+Key: VPSHRDVQZmkz: [ 0.00 0.00 ]
+Key: VPSHRDVQZr: [ 0.00 0.00 ]
+Key: VPSHRDVQZrk: [ 0.00 0.00 ]
+Key: VPSHRDVQZrkz: [ 0.00 0.00 ]
+Key: VPSHRDVWZ: [ 0.00 0.00 ]
+Key: VPSHRDVWZm: [ 0.00 0.00 ]
+Key: VPSHRDVWZmk: [ 0.00 0.00 ]
+Key: VPSHRDVWZmkz: [ 0.00 0.00 ]
+Key: VPSHRDVWZr: [ 0.00 0.00 ]
+Key: VPSHRDVWZrk: [ 0.00 0.00 ]
+Key: VPSHRDVWZrkz: [ 0.00 0.00 ]
+Key: VPSHRDWZ: [ 0.00 0.00 ]
+Key: VPSHRDWZrmi: [ 0.00 0.00 ]
+Key: VPSHRDWZrmik: [ 0.00 0.00 ]
+Key: VPSHRDWZrmikz: [ 0.00 0.00 ]
+Key: VPSHRDWZrri: [ 0.00 0.00 ]
+Key: VPSHRDWZrrik: [ 0.00 0.00 ]
+Key: VPSHRDWZrrikz: [ 0.00 0.00 ]
+Key: VPSHUFBITQMBZ: [ 0.00 0.00 ]
+Key: VPSHUFBITQMBZrm: [ 0.00 0.00 ]
+Key: VPSHUFBITQMBZrmk: [ 0.00 0.00 ]
+Key: VPSHUFBITQMBZrr: [ 0.00 0.00 ]
+Key: VPSHUFBITQMBZrrk: [ 0.00 0.00 ]
+Key: VPSHUFBYrm: [ 0.00 0.00 ]
+Key: VPSHUFBYrr: [ 0.00 0.00 ]
+Key: VPSHUFBZ: [ 0.00 0.00 ]
+Key: VPSHUFBZrm: [ 0.00 0.00 ]
+Key: VPSHUFBZrmk: [ 0.00 0.00 ]
+Key: VPSHUFBZrmkz: [ 0.00 0.00 ]
+Key: VPSHUFBZrr: [ 0.00 0.00 ]
+Key: VPSHUFBZrrk: [ 0.00 0.00 ]
+Key: VPSHUFBZrrkz: [ 0.00 0.00 ]
+Key: VPSHUFBrm: [ 0.00 0.00 ]
+Key: VPSHUFBrr: [ 0.00 0.00 ]
+Key: VPSHUFDYmi: [ 0.00 0.00 ]
+Key: VPSHUFDYri: [ 0.00 0.00 ]
+Key: VPSHUFDZ: [ 0.00 0.00 ]
+Key: VPSHUFDZmbi: [ 0.00 0.00 ]
+Key: VPSHUFDZmbik: [ 0.00 0.00 ]
+Key: VPSHUFDZmbikz: [ 0.00 0.00 ]
+Key: VPSHUFDZmi: [ 0.00 0.00 ]
+Key: VPSHUFDZmik: [ 0.00 0.00 ]
+Key: VPSHUFDZmikz: [ 0.00 0.00 ]
+Key: VPSHUFDZri: [ 0.00 0.00 ]
+Key: VPSHUFDZrik: [ 0.00 0.00 ]
+Key: VPSHUFDZrikz: [ 0.00 0.00 ]
+Key: VPSHUFDmi: [ 0.00 0.00 ]
+Key: VPSHUFDri: [ 0.00 0.00 ]
+Key: VPSHUFHWYmi: [ 0.00 0.00 ]
+Key: VPSHUFHWYri: [ 0.00 0.00 ]
+Key: VPSHUFHWZ: [ 0.00 0.00 ]
+Key: VPSHUFHWZmi: [ 0.00 0.00 ]
+Key: VPSHUFHWZmik: [ 0.00 0.00 ]
+Key: VPSHUFHWZmikz: [ 0.00 0.00 ]
+Key: VPSHUFHWZri: [ 0.00 0.00 ]
+Key: VPSHUFHWZrik: [ 0.00 0.00 ]
+Key: VPSHUFHWZrikz: [ 0.00 0.00 ]
+Key: VPSHUFHWmi: [ 0.00 0.00 ]
+Key: VPSHUFHWri: [ 0.00 0.00 ]
+Key: VPSHUFLWYmi: [ 0.00 0.00 ]
+Key: VPSHUFLWYri: [ 0.00 0.00 ]
+Key: VPSHUFLWZ: [ 0.00 0.00 ]
+Key: VPSHUFLWZmi: [ 0.00 0.00 ]
+Key: VPSHUFLWZmik: [ 0.00 0.00 ]
+Key: VPSHUFLWZmikz: [ 0.00 0.00 ]
+Key: VPSHUFLWZri: [ 0.00 0.00 ]
+Key: VPSHUFLWZrik: [ 0.00 0.00 ]
+Key: VPSHUFLWZrikz: [ 0.00 0.00 ]
+Key: VPSHUFLWmi: [ 0.00 0.00 ]
+Key: VPSHUFLWri: [ 0.00 0.00 ]
+Key: VPSIGNBYrm: [ 0.00 0.00 ]
+Key: VPSIGNBYrr: [ 0.00 0.00 ]
+Key: VPSIGNBrm: [ 0.00 0.00 ]
+Key: VPSIGNBrr: [ 0.00 0.00 ]
+Key: VPSIGNDYrm: [ 0.00 0.00 ]
+Key: VPSIGNDYrr: [ 0.00 0.00 ]
+Key: VPSIGNDrm: [ 0.00 0.00 ]
+Key: VPSIGNDrr: [ 0.00 0.00 ]
+Key: VPSIGNWYrm: [ 0.00 0.00 ]
+Key: VPSIGNWYrr: [ 0.00 0.00 ]
+Key: VPSIGNWrm: [ 0.00 0.00 ]
+Key: VPSIGNWrr: [ 0.00 0.00 ]
+Key: VPSLLDQYri: [ 0.00 0.00 ]
+Key: VPSLLDQZ: [ 0.00 0.00 ]
+Key: VPSLLDQZmi: [ 0.00 0.00 ]
+Key: VPSLLDQZri: [ 0.00 0.00 ]
+Key: VPSLLDQri: [ 0.00 0.00 ]
+Key: VPSLLDYri: [ 0.00 0.00 ]
+Key: VPSLLDYrm: [ 0.00 0.00 ]
+Key: VPSLLDYrr: [ 0.00 0.00 ]
+Key: VPSLLDZ: [ 0.00 0.00 ]
+Key: VPSLLDZmbi: [ 0.00 0.00 ]
+Key: VPSLLDZmbik: [ 0.00 0.00 ]
+Key: VPSLLDZmbikz: [ 0.00 0.00 ]
+Key: VPSLLDZmi: [ 0.00 0.00 ]
+Key: VPSLLDZmik: [ 0.00 0.00 ]
+Key: VPSLLDZmikz: [ 0.00 0.00 ]
+Key: VPSLLDZri: [ 0.00 0.00 ]
+Key: VPSLLDZrik: [ 0.00 0.00 ]
+Key: VPSLLDZrikz: [ 0.00 0.00 ]
+Key: VPSLLDZrm: [ 0.00 0.00 ]
+Key: VPSLLDZrmk: [ 0.00 0.00 ]
+Key: VPSLLDZrmkz: [ 0.00 0.00 ]
+Key: VPSLLDZrr: [ 0.00 0.00 ]
+Key: VPSLLDZrrk: [ 0.00 0.00 ]
+Key: VPSLLDZrrkz: [ 0.00 0.00 ]
+Key: VPSLLDri: [ 0.00 0.00 ]
+Key: VPSLLDrm: [ 0.00 0.00 ]
+Key: VPSLLDrr: [ 0.00 0.00 ]
+Key: VPSLLQYri: [ 0.00 0.00 ]
+Key: VPSLLQYrm: [ 0.00 0.00 ]
+Key: VPSLLQYrr: [ 0.00 0.00 ]
+Key: VPSLLQZ: [ 0.00 0.00 ]
+Key: VPSLLQZmbi: [ 0.00 0.00 ]
+Key: VPSLLQZmbik: [ 0.00 0.00 ]
+Key: VPSLLQZmbikz: [ 0.00 0.00 ]
+Key: VPSLLQZmi: [ 0.00 0.00 ]
+Key: VPSLLQZmik: [ 0.00 0.00 ]
+Key: VPSLLQZmikz: [ 0.00 0.00 ]
+Key: VPSLLQZri: [ 0.00 0.00 ]
+Key: VPSLLQZrik: [ 0.00 0.00 ]
+Key: VPSLLQZrikz: [ 0.00 0.00 ]
+Key: VPSLLQZrm: [ 0.00 0.00 ]
+Key: VPSLLQZrmk: [ 0.00 0.00 ]
+Key: VPSLLQZrmkz: [ 0.00 0.00 ]
+Key: VPSLLQZrr: [ 0.00 0.00 ]
+Key: VPSLLQZrrk: [ 0.00 0.00 ]
+Key: VPSLLQZrrkz: [ 0.00 0.00 ]
+Key: VPSLLQri: [ 0.00 0.00 ]
+Key: VPSLLQrm: [ 0.00 0.00 ]
+Key: VPSLLQrr: [ 0.00 0.00 ]
+Key: VPSLLVDYrm: [ 0.00 0.00 ]
+Key: VPSLLVDYrr: [ 0.00 0.00 ]
+Key: VPSLLVDZ: [ 0.00 0.00 ]
+Key: VPSLLVDZrm: [ 0.00 0.00 ]
+Key: VPSLLVDZrmb: [ 0.00 0.00 ]
+Key: VPSLLVDZrmbk: [ 0.00 0.00 ]
+Key: VPSLLVDZrmbkz: [ 0.00 0.00 ]
+Key: VPSLLVDZrmk: [ 0.00 0.00 ]
+Key: VPSLLVDZrmkz: [ 0.00 0.00 ]
+Key: VPSLLVDZrr: [ 0.00 0.00 ]
+Key: VPSLLVDZrrk: [ 0.00 0.00 ]
+Key: VPSLLVDZrrkz: [ 0.00 0.00 ]
+Key: VPSLLVDrm: [ 0.00 0.00 ]
+Key: VPSLLVDrr: [ 0.00 0.00 ]
+Key: VPSLLVQYrm: [ 0.00 0.00 ]
+Key: VPSLLVQYrr: [ 0.00 0.00 ]
+Key: VPSLLVQZ: [ 0.00 0.00 ]
+Key: VPSLLVQZrm: [ 0.00 0.00 ]
+Key: VPSLLVQZrmb: [ 0.00 0.00 ]
+Key: VPSLLVQZrmbk: [ 0.00 0.00 ]
+Key: VPSLLVQZrmbkz: [ 0.00 0.00 ]
+Key: VPSLLVQZrmk: [ 0.00 0.00 ]
+Key: VPSLLVQZrmkz: [ 0.00 0.00 ]
+Key: VPSLLVQZrr: [ 0.00 0.00 ]
+Key: VPSLLVQZrrk: [ 0.00 0.00 ]
+Key: VPSLLVQZrrkz: [ 0.00 0.00 ]
+Key: VPSLLVQrm: [ 0.00 0.00 ]
+Key: VPSLLVQrr: [ 0.00 0.00 ]
+Key: VPSLLVWZ: [ 0.00 0.00 ]
+Key: VPSLLVWZrm: [ 0.00 0.00 ]
+Key: VPSLLVWZrmk: [ 0.00 0.00 ]
+Key: VPSLLVWZrmkz: [ 0.00 0.00 ]
+Key: VPSLLVWZrr: [ 0.00 0.00 ]
+Key: VPSLLVWZrrk: [ 0.00 0.00 ]
+Key: VPSLLVWZrrkz: [ 0.00 0.00 ]
+Key: VPSLLWYri: [ 0.00 0.00 ]
+Key: VPSLLWYrm: [ 0.00 0.00 ]
+Key: VPSLLWYrr: [ 0.00 0.00 ]
+Key: VPSLLWZ: [ 0.00 0.00 ]
+Key: VPSLLWZmi: [ 0.00 0.00 ]
+Key: VPSLLWZmik: [ 0.00 0.00 ]
+Key: VPSLLWZmikz: [ 0.00 0.00 ]
+Key: VPSLLWZri: [ 0.00 0.00 ]
+Key: VPSLLWZrik: [ 0.00 0.00 ]
+Key: VPSLLWZrikz: [ 0.00 0.00 ]
+Key: VPSLLWZrm: [ 0.00 0.00 ]
+Key: VPSLLWZrmk: [ 0.00 0.00 ]
+Key: VPSLLWZrmkz: [ 0.00 0.00 ]
+Key: VPSLLWZrr: [ 0.00 0.00 ]
+Key: VPSLLWZrrk: [ 0.00 0.00 ]
+Key: VPSLLWZrrkz: [ 0.00 0.00 ]
+Key: VPSLLWri: [ 0.00 0.00 ]
+Key: VPSLLWrm: [ 0.00 0.00 ]
+Key: VPSLLWrr: [ 0.00 0.00 ]
+Key: VPSRADYri: [ 0.00 0.00 ]
+Key: VPSRADYrm: [ 0.00 0.00 ]
+Key: VPSRADYrr: [ 0.00 0.00 ]
+Key: VPSRADZ: [ 0.00 0.00 ]
+Key: VPSRADZmbi: [ 0.00 0.00 ]
+Key: VPSRADZmbik: [ 0.00 0.00 ]
+Key: VPSRADZmbikz: [ 0.00 0.00 ]
+Key: VPSRADZmi: [ 0.00 0.00 ]
+Key: VPSRADZmik: [ 0.00 0.00 ]
+Key: VPSRADZmikz: [ 0.00 0.00 ]
+Key: VPSRADZri: [ 0.00 0.00 ]
+Key: VPSRADZrik: [ 0.00 0.00 ]
+Key: VPSRADZrikz: [ 0.00 0.00 ]
+Key: VPSRADZrm: [ 0.00 0.00 ]
+Key: VPSRADZrmk: [ 0.00 0.00 ]
+Key: VPSRADZrmkz: [ 0.00 0.00 ]
+Key: VPSRADZrr: [ 0.00 0.00 ]
+Key: VPSRADZrrk: [ 0.00 0.00 ]
+Key: VPSRADZrrkz: [ 0.00 0.00 ]
+Key: VPSRADri: [ 0.00 0.00 ]
+Key: VPSRADrm: [ 0.00 0.00 ]
+Key: VPSRADrr: [ 0.00 0.00 ]
+Key: VPSRAQZ: [ 0.00 0.00 ]
+Key: VPSRAQZmbi: [ 0.00 0.00 ]
+Key: VPSRAQZmbik: [ 0.00 0.00 ]
+Key: VPSRAQZmbikz: [ 0.00 0.00 ]
+Key: VPSRAQZmi: [ 0.00 0.00 ]
+Key: VPSRAQZmik: [ 0.00 0.00 ]
+Key: VPSRAQZmikz: [ 0.00 0.00 ]
+Key: VPSRAQZri: [ 0.00 0.00 ]
+Key: VPSRAQZrik: [ 0.00 0.00 ]
+Key: VPSRAQZrikz: [ 0.00 0.00 ]
+Key: VPSRAQZrm: [ 0.00 0.00 ]
+Key: VPSRAQZrmk: [ 0.00 0.00 ]
+Key: VPSRAQZrmkz: [ 0.00 0.00 ]
+Key: VPSRAQZrr: [ 0.00 0.00 ]
+Key: VPSRAQZrrk: [ 0.00 0.00 ]
+Key: VPSRAQZrrkz: [ 0.00 0.00 ]
+Key: VPSRAVDYrm: [ 0.00 0.00 ]
+Key: VPSRAVDYrr: [ 0.00 0.00 ]
+Key: VPSRAVDZ: [ 0.00 0.00 ]
+Key: VPSRAVDZrm: [ 0.00 0.00 ]
+Key: VPSRAVDZrmb: [ 0.00 0.00 ]
+Key: VPSRAVDZrmbk: [ 0.00 0.00 ]
+Key: VPSRAVDZrmbkz: [ 0.00 0.00 ]
+Key: VPSRAVDZrmk: [ 0.00 0.00 ]
+Key: VPSRAVDZrmkz: [ 0.00 0.00 ]
+Key: VPSRAVDZrr: [ 0.00 0.00 ]
+Key: VPSRAVDZrrk: [ 0.00 0.00 ]
+Key: VPSRAVDZrrkz: [ 0.00 0.00 ]
+Key: VPSRAVDrm: [ 0.00 0.00 ]
+Key: VPSRAVDrr: [ 0.00 0.00 ]
+Key: VPSRAVQZ: [ 0.00 0.00 ]
+Key: VPSRAVQZrm: [ 0.00 0.00 ]
+Key: VPSRAVQZrmb: [ 0.00 0.00 ]
+Key: VPSRAVQZrmbk: [ 0.00 0.00 ]
+Key: VPSRAVQZrmbkz: [ 0.00 0.00 ]
+Key: VPSRAVQZrmk: [ 0.00 0.00 ]
+Key: VPSRAVQZrmkz: [ 0.00 0.00 ]
+Key: VPSRAVQZrr: [ 0.00 0.00 ]
+Key: VPSRAVQZrrk: [ 0.00 0.00 ]
+Key: VPSRAVQZrrkz: [ 0.00 0.00 ]
+Key: VPSRAVWZ: [ 0.00 0.00 ]
+Key: VPSRAVWZrm: [ 0.00 0.00 ]
+Key: VPSRAVWZrmk: [ 0.00 0.00 ]
+Key: VPSRAVWZrmkz: [ 0.00 0.00 ]
+Key: VPSRAVWZrr: [ 0.00 0.00 ]
+Key: VPSRAVWZrrk: [ 0.00 0.00 ]
+Key: VPSRAVWZrrkz: [ 0.00 0.00 ]
+Key: VPSRAWYri: [ 0.00 0.00 ]
+Key: VPSRAWYrm: [ 0.00 0.00 ]
+Key: VPSRAWYrr: [ 0.00 0.00 ]
+Key: VPSRAWZ: [ 0.00 0.00 ]
+Key: VPSRAWZmi: [ 0.00 0.00 ]
+Key: VPSRAWZmik: [ 0.00 0.00 ]
+Key: VPSRAWZmikz: [ 0.00 0.00 ]
+Key: VPSRAWZri: [ 0.00 0.00 ]
+Key: VPSRAWZrik: [ 0.00 0.00 ]
+Key: VPSRAWZrikz: [ 0.00 0.00 ]
+Key: VPSRAWZrm: [ 0.00 0.00 ]
+Key: VPSRAWZrmk: [ 0.00 0.00 ]
+Key: VPSRAWZrmkz: [ 0.00 0.00 ]
+Key: VPSRAWZrr: [ 0.00 0.00 ]
+Key: VPSRAWZrrk: [ 0.00 0.00 ]
+Key: VPSRAWZrrkz: [ 0.00 0.00 ]
+Key: VPSRAWri: [ 0.00 0.00 ]
+Key: VPSRAWrm: [ 0.00 0.00 ]
+Key: VPSRAWrr: [ 0.00 0.00 ]
+Key: VPSRLDQYri: [ 0.00 0.00 ]
+Key: VPSRLDQZ: [ 0.00 0.00 ]
+Key: VPSRLDQZmi: [ 0.00 0.00 ]
+Key: VPSRLDQZri: [ 0.00 0.00 ]
+Key: VPSRLDQri: [ 0.00 0.00 ]
+Key: VPSRLDYri: [ 0.00 0.00 ]
+Key: VPSRLDYrm: [ 0.00 0.00 ]
+Key: VPSRLDYrr: [ 0.00 0.00 ]
+Key: VPSRLDZ: [ 0.00 0.00 ]
+Key: VPSRLDZmbi: [ 0.00 0.00 ]
+Key: VPSRLDZmbik: [ 0.00 0.00 ]
+Key: VPSRLDZmbikz: [ 0.00 0.00 ]
+Key: VPSRLDZmi: [ 0.00 0.00 ]
+Key: VPSRLDZmik: [ 0.00 0.00 ]
+Key: VPSRLDZmikz: [ 0.00 0.00 ]
+Key: VPSRLDZri: [ 0.00 0.00 ]
+Key: VPSRLDZrik: [ 0.00 0.00 ]
+Key: VPSRLDZrikz: [ 0.00 0.00 ]
+Key: VPSRLDZrm: [ 0.00 0.00 ]
+Key: VPSRLDZrmk: [ 0.00 0.00 ]
+Key: VPSRLDZrmkz: [ 0.00 0.00 ]
+Key: VPSRLDZrr: [ 0.00 0.00 ]
+Key: VPSRLDZrrk: [ 0.00 0.00 ]
+Key: VPSRLDZrrkz: [ 0.00 0.00 ]
+Key: VPSRLDri: [ 0.00 0.00 ]
+Key: VPSRLDrm: [ 0.00 0.00 ]
+Key: VPSRLDrr: [ 0.00 0.00 ]
+Key: VPSRLQYri: [ 0.00 0.00 ]
+Key: VPSRLQYrm: [ 0.00 0.00 ]
+Key: VPSRLQYrr: [ 0.00 0.00 ]
+Key: VPSRLQZ: [ 0.00 0.00 ]
+Key: VPSRLQZmbi: [ 0.00 0.00 ]
+Key: VPSRLQZmbik: [ 0.00 0.00 ]
+Key: VPSRLQZmbikz: [ 0.00 0.00 ]
+Key: VPSRLQZmi: [ 0.00 0.00 ]
+Key: VPSRLQZmik: [ 0.00 0.00 ]
+Key: VPSRLQZmikz: [ 0.00 0.00 ]
+Key: VPSRLQZri: [ 0.00 0.00 ]
+Key: VPSRLQZrik: [ 0.00 0.00 ]
+Key: VPSRLQZrikz: [ 0.00 0.00 ]
+Key: VPSRLQZrm: [ 0.00 0.00 ]
+Key: VPSRLQZrmk: [ 0.00 0.00 ]
+Key: VPSRLQZrmkz: [ 0.00 0.00 ]
+Key: VPSRLQZrr: [ 0.00 0.00 ]
+Key: VPSRLQZrrk: [ 0.00 0.00 ]
+Key: VPSRLQZrrkz: [ 0.00 0.00 ]
+Key: VPSRLQri: [ 0.00 0.00 ]
+Key: VPSRLQrm: [ 0.00 0.00 ]
+Key: VPSRLQrr: [ 0.00 0.00 ]
+Key: VPSRLVDYrm: [ 0.00 0.00 ]
+Key: VPSRLVDYrr: [ 0.00 0.00 ]
+Key: VPSRLVDZ: [ 0.00 0.00 ]
+Key: VPSRLVDZrm: [ 0.00 0.00 ]
+Key: VPSRLVDZrmb: [ 0.00 0.00 ]
+Key: VPSRLVDZrmbk: [ 0.00 0.00 ]
+Key: VPSRLVDZrmbkz: [ 0.00 0.00 ]
+Key: VPSRLVDZrmk: [ 0.00 0.00 ]
+Key: VPSRLVDZrmkz: [ 0.00 0.00 ]
+Key: VPSRLVDZrr: [ 0.00 0.00 ]
+Key: VPSRLVDZrrk: [ 0.00 0.00 ]
+Key: VPSRLVDZrrkz: [ 0.00 0.00 ]
+Key: VPSRLVDrm: [ 0.00 0.00 ]
+Key: VPSRLVDrr: [ 0.00 0.00 ]
+Key: VPSRLVQYrm: [ 0.00 0.00 ]
+Key: VPSRLVQYrr: [ 0.00 0.00 ]
+Key: VPSRLVQZ: [ 0.00 0.00 ]
+Key: VPSRLVQZrm: [ 0.00 0.00 ]
+Key: VPSRLVQZrmb: [ 0.00 0.00 ]
+Key: VPSRLVQZrmbk: [ 0.00 0.00 ]
+Key: VPSRLVQZrmbkz: [ 0.00 0.00 ]
+Key: VPSRLVQZrmk: [ 0.00 0.00 ]
+Key: VPSRLVQZrmkz: [ 0.00 0.00 ]
+Key: VPSRLVQZrr: [ 0.00 0.00 ]
+Key: VPSRLVQZrrk: [ 0.00 0.00 ]
+Key: VPSRLVQZrrkz: [ 0.00 0.00 ]
+Key: VPSRLVQrm: [ 0.00 0.00 ]
+Key: VPSRLVQrr: [ 0.00 0.00 ]
+Key: VPSRLVWZ: [ 0.00 0.00 ]
+Key: VPSRLVWZrm: [ 0.00 0.00 ]
+Key: VPSRLVWZrmk: [ 0.00 0.00 ]
+Key: VPSRLVWZrmkz: [ 0.00 0.00 ]
+Key: VPSRLVWZrr: [ 0.00 0.00 ]
+Key: VPSRLVWZrrk: [ 0.00 0.00 ]
+Key: VPSRLVWZrrkz: [ 0.00 0.00 ]
+Key: VPSRLWYri: [ 0.00 0.00 ]
+Key: VPSRLWYrm: [ 0.00 0.00 ]
+Key: VPSRLWYrr: [ 0.00 0.00 ]
+Key: VPSRLWZ: [ 0.00 0.00 ]
+Key: VPSRLWZmi: [ 0.00 0.00 ]
+Key: VPSRLWZmik: [ 0.00 0.00 ]
+Key: VPSRLWZmikz: [ 0.00 0.00 ]
+Key: VPSRLWZri: [ 0.00 0.00 ]
+Key: VPSRLWZrik: [ 0.00 0.00 ]
+Key: VPSRLWZrikz: [ 0.00 0.00 ]
+Key: VPSRLWZrm: [ 0.00 0.00 ]
+Key: VPSRLWZrmk: [ 0.00 0.00 ]
+Key: VPSRLWZrmkz: [ 0.00 0.00 ]
+Key: VPSRLWZrr: [ 0.00 0.00 ]
+Key: VPSRLWZrrk: [ 0.00 0.00 ]
+Key: VPSRLWZrrkz: [ 0.00 0.00 ]
+Key: VPSRLWri: [ 0.00 0.00 ]
+Key: VPSRLWrm: [ 0.00 0.00 ]
+Key: VPSRLWrr: [ 0.00 0.00 ]
+Key: VPSUBBYrm: [ 0.00 0.00 ]
+Key: VPSUBBYrr: [ 0.00 0.00 ]
+Key: VPSUBBZ: [ 0.00 0.00 ]
+Key: VPSUBBZrm: [ 0.00 0.00 ]
+Key: VPSUBBZrmk: [ 0.00 0.00 ]
+Key: VPSUBBZrmkz: [ 0.00 0.00 ]
+Key: VPSUBBZrr: [ 0.00 0.00 ]
+Key: VPSUBBZrrk: [ 0.00 0.00 ]
+Key: VPSUBBZrrkz: [ 0.00 0.00 ]
+Key: VPSUBBrm: [ 0.00 0.00 ]
+Key: VPSUBBrr: [ 0.00 0.00 ]
+Key: VPSUBDYrm: [ 0.00 0.00 ]
+Key: VPSUBDYrr: [ 0.00 0.00 ]
+Key: VPSUBDZ: [ 0.00 0.00 ]
+Key: VPSUBDZrm: [ 0.00 0.00 ]
+Key: VPSUBDZrmb: [ 0.00 0.00 ]
+Key: VPSUBDZrmbk: [ 0.00 0.00 ]
+Key: VPSUBDZrmbkz: [ 0.00 0.00 ]
+Key: VPSUBDZrmk: [ 0.00 0.00 ]
+Key: VPSUBDZrmkz: [ 0.00 0.00 ]
+Key: VPSUBDZrr: [ 0.00 0.00 ]
+Key: VPSUBDZrrk: [ 0.00 0.00 ]
+Key: VPSUBDZrrkz: [ 0.00 0.00 ]
+Key: VPSUBDrm: [ 0.00 0.00 ]
+Key: VPSUBDrr: [ 0.00 0.00 ]
+Key: VPSUBQYrm: [ 0.00 0.00 ]
+Key: VPSUBQYrr: [ 0.00 0.00 ]
+Key: VPSUBQZ: [ 0.00 0.00 ]
+Key: VPSUBQZrm: [ 0.00 0.00 ]
+Key: VPSUBQZrmb: [ 0.00 0.00 ]
+Key: VPSUBQZrmbk: [ 0.00 0.00 ]
+Key: VPSUBQZrmbkz: [ 0.00 0.00 ]
+Key: VPSUBQZrmk: [ 0.00 0.00 ]
+Key: VPSUBQZrmkz: [ 0.00 0.00 ]
+Key: VPSUBQZrr: [ 0.00 0.00 ]
+Key: VPSUBQZrrk: [ 0.00 0.00 ]
+Key: VPSUBQZrrkz: [ 0.00 0.00 ]
+Key: VPSUBQrm: [ 0.00 0.00 ]
+Key: VPSUBQrr: [ 0.00 0.00 ]
+Key: VPSUBSBYrm: [ 0.00 0.00 ]
+Key: VPSUBSBYrr: [ 0.00 0.00 ]
+Key: VPSUBSBZ: [ 0.00 0.00 ]
+Key: VPSUBSBZrm: [ 0.00 0.00 ]
+Key: VPSUBSBZrmk: [ 0.00 0.00 ]
+Key: VPSUBSBZrmkz: [ 0.00 0.00 ]
+Key: VPSUBSBZrr: [ 0.00 0.00 ]
+Key: VPSUBSBZrrk: [ 0.00 0.00 ]
+Key: VPSUBSBZrrkz: [ 0.00 0.00 ]
+Key: VPSUBSBrm: [ 0.00 0.00 ]
+Key: VPSUBSBrr: [ 0.00 0.00 ]
+Key: VPSUBSWYrm: [ 0.00 0.00 ]
+Key: VPSUBSWYrr: [ 0.00 0.00 ]
+Key: VPSUBSWZ: [ 0.00 0.00 ]
+Key: VPSUBSWZrm: [ 0.00 0.00 ]
+Key: VPSUBSWZrmk: [ 0.00 0.00 ]
+Key: VPSUBSWZrmkz: [ 0.00 0.00 ]
+Key: VPSUBSWZrr: [ 0.00 0.00 ]
+Key: VPSUBSWZrrk: [ 0.00 0.00 ]
+Key: VPSUBSWZrrkz: [ 0.00 0.00 ]
+Key: VPSUBSWrm: [ 0.00 0.00 ]
+Key: VPSUBSWrr: [ 0.00 0.00 ]
+Key: VPSUBUSBYrm: [ 0.00 0.00 ]
+Key: VPSUBUSBYrr: [ 0.00 0.00 ]
+Key: VPSUBUSBZ: [ 0.00 0.00 ]
+Key: VPSUBUSBZrm: [ 0.00 0.00 ]
+Key: VPSUBUSBZrmk: [ 0.00 0.00 ]
+Key: VPSUBUSBZrmkz: [ 0.00 0.00 ]
+Key: VPSUBUSBZrr: [ 0.00 0.00 ]
+Key: VPSUBUSBZrrk: [ 0.00 0.00 ]
+Key: VPSUBUSBZrrkz: [ 0.00 0.00 ]
+Key: VPSUBUSBrm: [ 0.00 0.00 ]
+Key: VPSUBUSBrr: [ 0.00 0.00 ]
+Key: VPSUBUSWYrm: [ 0.00 0.00 ]
+Key: VPSUBUSWYrr: [ 0.00 0.00 ]
+Key: VPSUBUSWZ: [ 0.00 0.00 ]
+Key: VPSUBUSWZrm: [ 0.00 0.00 ]
+Key: VPSUBUSWZrmk: [ 0.00 0.00 ]
+Key: VPSUBUSWZrmkz: [ 0.00 0.00 ]
+Key: VPSUBUSWZrr: [ 0.00 0.00 ]
+Key: VPSUBUSWZrrk: [ 0.00 0.00 ]
+Key: VPSUBUSWZrrkz: [ 0.00 0.00 ]
+Key: VPSUBUSWrm: [ 0.00 0.00 ]
+Key: VPSUBUSWrr: [ 0.00 0.00 ]
+Key: VPSUBWYrm: [ 0.00 0.00 ]
+Key: VPSUBWYrr: [ 0.00 0.00 ]
+Key: VPSUBWZ: [ 0.00 0.00 ]
+Key: VPSUBWZrm: [ 0.00 0.00 ]
+Key: VPSUBWZrmk: [ 0.00 0.00 ]
+Key: VPSUBWZrmkz: [ 0.00 0.00 ]
+Key: VPSUBWZrr: [ 0.00 0.00 ]
+Key: VPSUBWZrrk: [ 0.00 0.00 ]
+Key: VPSUBWZrrkz: [ 0.00 0.00 ]
+Key: VPSUBWrm: [ 0.00 0.00 ]
+Key: VPSUBWrr: [ 0.00 0.00 ]
+Key: VPTERNLOGDZ: [ 0.00 0.00 ]
+Key: VPTERNLOGDZrmbi: [ 0.00 0.00 ]
+Key: VPTERNLOGDZrmbik: [ 0.00 0.00 ]
+Key: VPTERNLOGDZrmbikz: [ 0.00 0.00 ]
+Key: VPTERNLOGDZrmi: [ 0.00 0.00 ]
+Key: VPTERNLOGDZrmik: [ 0.00 0.00 ]
+Key: VPTERNLOGDZrmikz: [ 0.00 0.00 ]
+Key: VPTERNLOGDZrri: [ 0.00 0.00 ]
+Key: VPTERNLOGDZrrik: [ 0.00 0.00 ]
+Key: VPTERNLOGDZrrikz: [ 0.00 0.00 ]
+Key: VPTERNLOGQZ: [ 0.00 0.00 ]
+Key: VPTERNLOGQZrmbi: [ 0.00 0.00 ]
+Key: VPTERNLOGQZrmbik: [ 0.00 0.00 ]
+Key: VPTERNLOGQZrmbikz: [ 0.00 0.00 ]
+Key: VPTERNLOGQZrmi: [ 0.00 0.00 ]
+Key: VPTERNLOGQZrmik: [ 0.00 0.00 ]
+Key: VPTERNLOGQZrmikz: [ 0.00 0.00 ]
+Key: VPTERNLOGQZrri: [ 0.00 0.00 ]
+Key: VPTERNLOGQZrrik: [ 0.00 0.00 ]
+Key: VPTERNLOGQZrrikz: [ 0.00 0.00 ]
+Key: VPTESTMBZ: [ 0.00 0.00 ]
+Key: VPTESTMBZrm: [ 0.00 0.00 ]
+Key: VPTESTMBZrmk: [ 0.00 0.00 ]
+Key: VPTESTMBZrr: [ 0.00 0.00 ]
+Key: VPTESTMBZrrk: [ 0.00 0.00 ]
+Key: VPTESTMDZ: [ 0.00 0.00 ]
+Key: VPTESTMDZrm: [ 0.00 0.00 ]
+Key: VPTESTMDZrmb: [ 0.00 0.00 ]
+Key: VPTESTMDZrmbk: [ 0.00 0.00 ]
+Key: VPTESTMDZrmk: [ 0.00 0.00 ]
+Key: VPTESTMDZrr: [ 0.00 0.00 ]
+Key: VPTESTMDZrrk: [ 0.00 0.00 ]
+Key: VPTESTMQZ: [ 0.00 0.00 ]
+Key: VPTESTMQZrm: [ 0.00 0.00 ]
+Key: VPTESTMQZrmb: [ 0.00 0.00 ]
+Key: VPTESTMQZrmbk: [ 0.00 0.00 ]
+Key: VPTESTMQZrmk: [ 0.00 0.00 ]
+Key: VPTESTMQZrr: [ 0.00 0.00 ]
+Key: VPTESTMQZrrk: [ 0.00 0.00 ]
+Key: VPTESTMWZ: [ 0.00 0.00 ]
+Key: VPTESTMWZrm: [ 0.00 0.00 ]
+Key: VPTESTMWZrmk: [ 0.00 0.00 ]
+Key: VPTESTMWZrr: [ 0.00 0.00 ]
+Key: VPTESTMWZrrk: [ 0.00 0.00 ]
+Key: VPTESTNMBZ: [ 0.00 0.00 ]
+Key: VPTESTNMBZrm: [ 0.00 0.00 ]
+Key: VPTESTNMBZrmk: [ 0.00 0.00 ]
+Key: VPTESTNMBZrr: [ 0.00 0.00 ]
+Key: VPTESTNMBZrrk: [ 0.00 0.00 ]
+Key: VPTESTNMDZ: [ 0.00 0.00 ]
+Key: VPTESTNMDZrm: [ 0.00 0.00 ]
+Key: VPTESTNMDZrmb: [ 0.00 0.00 ]
+Key: VPTESTNMDZrmbk: [ 0.00 0.00 ]
+Key: VPTESTNMDZrmk: [ 0.00 0.00 ]
+Key: VPTESTNMDZrr: [ 0.00 0.00 ]
+Key: VPTESTNMDZrrk: [ 0.00 0.00 ]
+Key: VPTESTNMQZ: [ 0.00 0.00 ]
+Key: VPTESTNMQZrm: [ 0.00 0.00 ]
+Key: VPTESTNMQZrmb: [ 0.00 0.00 ]
+Key: VPTESTNMQZrmbk: [ 0.00 0.00 ]
+Key: VPTESTNMQZrmk: [ 0.00 0.00 ]
+Key: VPTESTNMQZrr: [ 0.00 0.00 ]
+Key: VPTESTNMQZrrk: [ 0.00 0.00 ]
+Key: VPTESTNMWZ: [ 0.00 0.00 ]
+Key: VPTESTNMWZrm: [ 0.00 0.00 ]
+Key: VPTESTNMWZrmk: [ 0.00 0.00 ]
+Key: VPTESTNMWZrr: [ 0.00 0.00 ]
+Key: VPTESTNMWZrrk: [ 0.00 0.00 ]
+Key: VPTESTYrm: [ 0.00 0.00 ]
+Key: VPTESTYrr: [ 0.00 0.00 ]
+Key: VPTESTrm: [ 0.00 0.00 ]
+Key: VPTESTrr: [ 0.00 0.00 ]
+Key: VPUNPCKHBWYrm: [ 0.00 0.00 ]
+Key: VPUNPCKHBWYrr: [ 0.00 0.00 ]
+Key: VPUNPCKHBWZ: [ 0.00 0.00 ]
+Key: VPUNPCKHBWZrm: [ 0.00 0.00 ]
+Key: VPUNPCKHBWZrmk: [ 0.00 0.00 ]
+Key: VPUNPCKHBWZrmkz: [ 0.00 0.00 ]
+Key: VPUNPCKHBWZrr: [ 0.00 0.00 ]
+Key: VPUNPCKHBWZrrk: [ 0.00 0.00 ]
+Key: VPUNPCKHBWZrrkz: [ 0.00 0.00 ]
+Key: VPUNPCKHBWrm: [ 0.00 0.00 ]
+Key: VPUNPCKHBWrr: [ 0.00 0.00 ]
+Key: VPUNPCKHDQYrm: [ 0.00 0.00 ]
+Key: VPUNPCKHDQYrr: [ 0.00 0.00 ]
+Key: VPUNPCKHDQZ: [ 0.00 0.00 ]
+Key: VPUNPCKHDQZrm: [ 0.00 0.00 ]
+Key: VPUNPCKHDQZrmb: [ 0.00 0.00 ]
+Key: VPUNPCKHDQZrmbk: [ 0.00 0.00 ]
+Key: VPUNPCKHDQZrmbkz: [ 0.00 0.00 ]
+Key: VPUNPCKHDQZrmk: [ 0.00 0.00 ]
+Key: VPUNPCKHDQZrmkz: [ 0.00 0.00 ]
+Key: VPUNPCKHDQZrr: [ 0.00 0.00 ]
+Key: VPUNPCKHDQZrrk: [ 0.00 0.00 ]
+Key: VPUNPCKHDQZrrkz: [ 0.00 0.00 ]
+Key: VPUNPCKHDQrm: [ 0.00 0.00 ]
+Key: VPUNPCKHDQrr: [ 0.00 0.00 ]
+Key: VPUNPCKHQDQYrm: [ 0.00 0.00 ]
+Key: VPUNPCKHQDQYrr: [ 0.00 0.00 ]
+Key: VPUNPCKHQDQZ: [ 0.00 0.00 ]
+Key: VPUNPCKHQDQZrm: [ 0.00 0.00 ]
+Key: VPUNPCKHQDQZrmb: [ 0.00 0.00 ]
+Key: VPUNPCKHQDQZrmbk: [ 0.00 0.00 ]
+Key: VPUNPCKHQDQZrmbkz: [ 0.00 0.00 ]
+Key: VPUNPCKHQDQZrmk: [ 0.00 0.00 ]
+Key: VPUNPCKHQDQZrmkz: [ 0.00 0.00 ]
+Key: VPUNPCKHQDQZrr: [ 0.00 0.00 ]
+Key: VPUNPCKHQDQZrrk: [ 0.00 0.00 ]
+Key: VPUNPCKHQDQZrrkz: [ 0.00 0.00 ]
+Key: VPUNPCKHQDQrm: [ 0.00 0.00 ]
+Key: VPUNPCKHQDQrr: [ 0.00 0.00 ]
+Key: VPUNPCKHWDYrm: [ 0.00 0.00 ]
+Key: VPUNPCKHWDYrr: [ 0.00 0.00 ]
+Key: VPUNPCKHWDZ: [ 0.00 0.00 ]
+Key: VPUNPCKHWDZrm: [ 0.00 0.00 ]
+Key: VPUNPCKHWDZrmk: [ 0.00 0.00 ]
+Key: VPUNPCKHWDZrmkz: [ 0.00 0.00 ]
+Key: VPUNPCKHWDZrr: [ 0.00 0.00 ]
+Key: VPUNPCKHWDZrrk: [ 0.00 0.00 ]
+Key: VPUNPCKHWDZrrkz: [ 0.00 0.00 ]
+Key: VPUNPCKHWDrm: [ 0.00 0.00 ]
+Key: VPUNPCKHWDrr: [ 0.00 0.00 ]
+Key: VPUNPCKLBWYrm: [ 0.00 0.00 ]
+Key: VPUNPCKLBWYrr: [ 0.00 0.00 ]
+Key: VPUNPCKLBWZ: [ 0.00 0.00 ]
+Key: VPUNPCKLBWZrm: [ 0.00 0.00 ]
+Key: VPUNPCKLBWZrmk: [ 0.00 0.00 ]
+Key: VPUNPCKLBWZrmkz: [ 0.00 0.00 ]
+Key: VPUNPCKLBWZrr: [ 0.00 0.00 ]
+Key: VPUNPCKLBWZrrk: [ 0.00 0.00 ]
+Key: VPUNPCKLBWZrrkz: [ 0.00 0.00 ]
+Key: VPUNPCKLBWrm: [ 0.00 0.00 ]
+Key: VPUNPCKLBWrr: [ 0.00 0.00 ]
+Key: VPUNPCKLDQYrm: [ 0.00 0.00 ]
+Key: VPUNPCKLDQYrr: [ 0.00 0.00 ]
+Key: VPUNPCKLDQZ: [ 0.00 0.00 ]
+Key: VPUNPCKLDQZrm: [ 0.00 0.00 ]
+Key: VPUNPCKLDQZrmb: [ 0.00 0.00 ]
+Key: VPUNPCKLDQZrmbk: [ 0.00 0.00 ]
+Key: VPUNPCKLDQZrmbkz: [ 0.00 0.00 ]
+Key: VPUNPCKLDQZrmk: [ 0.00 0.00 ]
+Key: VPUNPCKLDQZrmkz: [ 0.00 0.00 ]
+Key: VPUNPCKLDQZrr: [ 0.00 0.00 ]
+Key: VPUNPCKLDQZrrk: [ 0.00 0.00 ]
+Key: VPUNPCKLDQZrrkz: [ 0.00 0.00 ]
+Key: VPUNPCKLDQrm: [ 0.00 0.00 ]
+Key: VPUNPCKLDQrr: [ 0.00 0.00 ]
+Key: VPUNPCKLQDQYrm: [ 0.00 0.00 ]
+Key: VPUNPCKLQDQYrr: [ 0.00 0.00 ]
+Key: VPUNPCKLQDQZ: [ 0.00 0.00 ]
+Key: VPUNPCKLQDQZrm: [ 0.00 0.00 ]
+Key: VPUNPCKLQDQZrmb: [ 0.00 0.00 ]
+Key: VPUNPCKLQDQZrmbk: [ 0.00 0.00 ]
+Key: VPUNPCKLQDQZrmbkz: [ 0.00 0.00 ]
+Key: VPUNPCKLQDQZrmk: [ 0.00 0.00 ]
+Key: VPUNPCKLQDQZrmkz: [ 0.00 0.00 ]
+Key: VPUNPCKLQDQZrr: [ 0.00 0.00 ]
+Key: VPUNPCKLQDQZrrk: [ 0.00 0.00 ]
+Key: VPUNPCKLQDQZrrkz: [ 0.00 0.00 ]
+Key: VPUNPCKLQDQrm: [ 0.00 0.00 ]
+Key: VPUNPCKLQDQrr: [ 0.00 0.00 ]
+Key: VPUNPCKLWDYrm: [ 0.00 0.00 ]
+Key: VPUNPCKLWDYrr: [ 0.00 0.00 ]
+Key: VPUNPCKLWDZ: [ 0.00 0.00 ]
+Key: VPUNPCKLWDZrm: [ 0.00 0.00 ]
+Key: VPUNPCKLWDZrmk: [ 0.00 0.00 ]
+Key: VPUNPCKLWDZrmkz: [ 0.00 0.00 ]
+Key: VPUNPCKLWDZrr: [ 0.00 0.00 ]
+Key: VPUNPCKLWDZrrk: [ 0.00 0.00 ]
+Key: VPUNPCKLWDZrrkz: [ 0.00 0.00 ]
+Key: VPUNPCKLWDrm: [ 0.00 0.00 ]
+Key: VPUNPCKLWDrr: [ 0.00 0.00 ]
+Key: VPXORDZ: [ 0.00 0.00 ]
+Key: VPXORDZrm: [ 0.00 0.00 ]
+Key: VPXORDZrmb: [ 0.00 0.00 ]
+Key: VPXORDZrmbk: [ 0.00 0.00 ]
+Key: VPXORDZrmbkz: [ 0.00 0.00 ]
+Key: VPXORDZrmk: [ 0.00 0.00 ]
+Key: VPXORDZrmkz: [ 0.00 0.00 ]
+Key: VPXORDZrr: [ 0.00 0.00 ]
+Key: VPXORDZrrk: [ 0.00 0.00 ]
+Key: VPXORDZrrkz: [ 0.00 0.00 ]
+Key: VPXORQZ: [ 0.00 0.00 ]
+Key: VPXORQZrm: [ 0.00 0.00 ]
+Key: VPXORQZrmb: [ 0.00 0.00 ]
+Key: VPXORQZrmbk: [ 0.00 0.00 ]
+Key: VPXORQZrmbkz: [ 0.00 0.00 ]
+Key: VPXORQZrmk: [ 0.00 0.00 ]
+Key: VPXORQZrmkz: [ 0.00 0.00 ]
+Key: VPXORQZrr: [ 0.00 0.00 ]
+Key: VPXORQZrrk: [ 0.00 0.00 ]
+Key: VPXORQZrrkz: [ 0.00 0.00 ]
+Key: VPXORYrm: [ 0.00 0.00 ]
+Key: VPXORYrr: [ 0.00 0.00 ]
+Key: VPXORrm: [ 0.00 0.00 ]
+Key: VPXORrr: [ 0.00 0.00 ]
+Key: VRANGEPDZ: [ 0.00 0.00 ]
+Key: VRANGEPDZrmbi: [ 0.00 0.00 ]
+Key: VRANGEPDZrmbik: [ 0.00 0.00 ]
+Key: VRANGEPDZrmbikz: [ 0.00 0.00 ]
+Key: VRANGEPDZrmi: [ 0.00 0.00 ]
+Key: VRANGEPDZrmik: [ 0.00 0.00 ]
+Key: VRANGEPDZrmikz: [ 0.00 0.00 ]
+Key: VRANGEPDZrri: [ 0.00 0.00 ]
+Key: VRANGEPDZrrib: [ 0.00 0.00 ]
+Key: VRANGEPDZrribk: [ 0.00 0.00 ]
+Key: VRANGEPDZrribkz: [ 0.00 0.00 ]
+Key: VRANGEPDZrrik: [ 0.00 0.00 ]
+Key: VRANGEPDZrrikz: [ 0.00 0.00 ]
+Key: VRANGEPSZ: [ 0.00 0.00 ]
+Key: VRANGEPSZrmbi: [ 0.00 0.00 ]
+Key: VRANGEPSZrmbik: [ 0.00 0.00 ]
+Key: VRANGEPSZrmbikz: [ 0.00 0.00 ]
+Key: VRANGEPSZrmi: [ 0.00 0.00 ]
+Key: VRANGEPSZrmik: [ 0.00 0.00 ]
+Key: VRANGEPSZrmikz: [ 0.00 0.00 ]
+Key: VRANGEPSZrri: [ 0.00 0.00 ]
+Key: VRANGEPSZrrib: [ 0.00 0.00 ]
+Key: VRANGEPSZrribk: [ 0.00 0.00 ]
+Key: VRANGEPSZrribkz: [ 0.00 0.00 ]
+Key: VRANGEPSZrrik: [ 0.00 0.00 ]
+Key: VRANGEPSZrrikz: [ 0.00 0.00 ]
+Key: VRANGESDZrmi: [ 0.00 0.00 ]
+Key: VRANGESDZrmik: [ 0.00 0.00 ]
+Key: VRANGESDZrmikz: [ 0.00 0.00 ]
+Key: VRANGESDZrri: [ 0.00 0.00 ]
+Key: VRANGESDZrrib: [ 0.00 0.00 ]
+Key: VRANGESDZrribk: [ 0.00 0.00 ]
+Key: VRANGESDZrribkz: [ 0.00 0.00 ]
+Key: VRANGESDZrrik: [ 0.00 0.00 ]
+Key: VRANGESDZrrikz: [ 0.00 0.00 ]
+Key: VRANGESSZrmi: [ 0.00 0.00 ]
+Key: VRANGESSZrmik: [ 0.00 0.00 ]
+Key: VRANGESSZrmikz: [ 0.00 0.00 ]
+Key: VRANGESSZrri: [ 0.00 0.00 ]
+Key: VRANGESSZrrib: [ 0.00 0.00 ]
+Key: VRANGESSZrribk: [ 0.00 0.00 ]
+Key: VRANGESSZrribkz: [ 0.00 0.00 ]
+Key: VRANGESSZrrik: [ 0.00 0.00 ]
+Key: VRANGESSZrrikz: [ 0.00 0.00 ]
+Key: VRCP: [ 0.00 0.00 ]
+Key: VRCPBF: [ 0.00 0.00 ]
+Key: VRCPPHZ: [ 0.00 0.00 ]
+Key: VRCPPHZm: [ 0.00 0.00 ]
+Key: VRCPPHZmb: [ 0.00 0.00 ]
+Key: VRCPPHZmbk: [ 0.00 0.00 ]
+Key: VRCPPHZmbkz: [ 0.00 0.00 ]
+Key: VRCPPHZmk: [ 0.00 0.00 ]
+Key: VRCPPHZmkz: [ 0.00 0.00 ]
+Key: VRCPPHZr: [ 0.00 0.00 ]
+Key: VRCPPHZrk: [ 0.00 0.00 ]
+Key: VRCPPHZrkz: [ 0.00 0.00 ]
+Key: VRCPPSYm: [ 0.00 0.00 ]
+Key: VRCPPSYr: [ 0.00 0.00 ]
+Key: VRCPPSm: [ 0.00 0.00 ]
+Key: VRCPPSr: [ 0.00 0.00 ]
+Key: VRCPSHZrm: [ 0.00 0.00 ]
+Key: VRCPSHZrmk: [ 0.00 0.00 ]
+Key: VRCPSHZrmkz: [ 0.00 0.00 ]
+Key: VRCPSHZrr: [ 0.00 0.00 ]
+Key: VRCPSHZrrk: [ 0.00 0.00 ]
+Key: VRCPSHZrrkz: [ 0.00 0.00 ]
+Key: VRCPSSm: [ 0.00 0.00 ]
+Key: VRCPSSm_Int: [ 0.00 0.00 ]
+Key: VRCPSSr: [ 0.00 0.00 ]
+Key: VRCPSSr_Int: [ 0.00 0.00 ]
+Key: VREDUCEBF: [ 0.00 0.00 ]
+Key: VREDUCEPDZ: [ 0.00 0.00 ]
+Key: VREDUCEPDZrmbi: [ 0.00 0.00 ]
+Key: VREDUCEPDZrmbik: [ 0.00 0.00 ]
+Key: VREDUCEPDZrmbikz: [ 0.00 0.00 ]
+Key: VREDUCEPDZrmi: [ 0.00 0.00 ]
+Key: VREDUCEPDZrmik: [ 0.00 0.00 ]
+Key: VREDUCEPDZrmikz: [ 0.00 0.00 ]
+Key: VREDUCEPDZrri: [ 0.00 0.00 ]
+Key: VREDUCEPDZrrib: [ 0.00 0.00 ]
+Key: VREDUCEPDZrribk: [ 0.00 0.00 ]
+Key: VREDUCEPDZrribkz: [ 0.00 0.00 ]
+Key: VREDUCEPDZrrik: [ 0.00 0.00 ]
+Key: VREDUCEPDZrrikz: [ 0.00 0.00 ]
+Key: VREDUCEPHZ: [ 0.00 0.00 ]
+Key: VREDUCEPHZrmbi: [ 0.00 0.00 ]
+Key: VREDUCEPHZrmbik: [ 0.00 0.00 ]
+Key: VREDUCEPHZrmbikz: [ 0.00 0.00 ]
+Key: VREDUCEPHZrmi: [ 0.00 0.00 ]
+Key: VREDUCEPHZrmik: [ 0.00 0.00 ]
+Key: VREDUCEPHZrmikz: [ 0.00 0.00 ]
+Key: VREDUCEPHZrri: [ 0.00 0.00 ]
+Key: VREDUCEPHZrrib: [ 0.00 0.00 ]
+Key: VREDUCEPHZrribk: [ 0.00 0.00 ]
+Key: VREDUCEPHZrribkz: [ 0.00 0.00 ]
+Key: VREDUCEPHZrrik: [ 0.00 0.00 ]
+Key: VREDUCEPHZrrikz: [ 0.00 0.00 ]
+Key: VREDUCEPSZ: [ 0.00 0.00 ]
+Key: VREDUCEPSZrmbi: [ 0.00 0.00 ]
+Key: VREDUCEPSZrmbik: [ 0.00 0.00 ]
+Key: VREDUCEPSZrmbikz: [ 0.00 0.00 ]
+Key: VREDUCEPSZrmi: [ 0.00 0.00 ]
+Key: VREDUCEPSZrmik: [ 0.00 0.00 ]
+Key: VREDUCEPSZrmikz: [ 0.00 0.00 ]
+Key: VREDUCEPSZrri: [ 0.00 0.00 ]
+Key: VREDUCEPSZrrib: [ 0.00 0.00 ]
+Key: VREDUCEPSZrribk: [ 0.00 0.00 ]
+Key: VREDUCEPSZrribkz: [ 0.00 0.00 ]
+Key: VREDUCEPSZrrik: [ 0.00 0.00 ]
+Key: VREDUCEPSZrrikz: [ 0.00 0.00 ]
+Key: VREDUCESDZrmi: [ 0.00 0.00 ]
+Key: VREDUCESDZrmik: [ 0.00 0.00 ]
+Key: VREDUCESDZrmikz: [ 0.00 0.00 ]
+Key: VREDUCESDZrri: [ 0.00 0.00 ]
+Key: VREDUCESDZrrib: [ 0.00 0.00 ]
+Key: VREDUCESDZrribk: [ 0.00 0.00 ]
+Key: VREDUCESDZrribkz: [ 0.00 0.00 ]
+Key: VREDUCESDZrrik: [ 0.00 0.00 ]
+Key: VREDUCESDZrrikz: [ 0.00 0.00 ]
+Key: VREDUCESHZrmi: [ 0.00 0.00 ]
+Key: VREDUCESHZrmik: [ 0.00 0.00 ]
+Key: VREDUCESHZrmikz: [ 0.00 0.00 ]
+Key: VREDUCESHZrri: [ 0.00 0.00 ]
+Key: VREDUCESHZrrib: [ 0.00 0.00 ]
+Key: VREDUCESHZrribk: [ 0.00 0.00 ]
+Key: VREDUCESHZrribkz: [ 0.00 0.00 ]
+Key: VREDUCESHZrrik: [ 0.00 0.00 ]
+Key: VREDUCESHZrrikz: [ 0.00 0.00 ]
+Key: VREDUCESSZrmi: [ 0.00 0.00 ]
+Key: VREDUCESSZrmik: [ 0.00 0.00 ]
+Key: VREDUCESSZrmikz: [ 0.00 0.00 ]
+Key: VREDUCESSZrri: [ 0.00 0.00 ]
+Key: VREDUCESSZrrib: [ 0.00 0.00 ]
+Key: VREDUCESSZrribk: [ 0.00 0.00 ]
+Key: VREDUCESSZrribkz: [ 0.00 0.00 ]
+Key: VREDUCESSZrrik: [ 0.00 0.00 ]
+Key: VREDUCESSZrrikz: [ 0.00 0.00 ]
+Key: VRNDSCALEBF: [ 0.00 0.00 ]
+Key: VRNDSCALEPDZ: [ 0.00 0.00 ]
+Key: VRNDSCALEPDZrmbi: [ 0.00 0.00 ]
+Key: VRNDSCALEPDZrmbik: [ 0.00 0.00 ]
+Key: VRNDSCALEPDZrmbikz: [ 0.00 0.00 ]
+Key: VRNDSCALEPDZrmi: [ 0.00 0.00 ]
+Key: VRNDSCALEPDZrmik: [ 0.00 0.00 ]
+Key: VRNDSCALEPDZrmikz: [ 0.00 0.00 ]
+Key: VRNDSCALEPDZrri: [ 0.00 0.00 ]
+Key: VRNDSCALEPDZrrib: [ 0.00 0.00 ]
+Key: VRNDSCALEPDZrribk: [ 0.00 0.00 ]
+Key: VRNDSCALEPDZrribkz: [ 0.00 0.00 ]
+Key: VRNDSCALEPDZrrik: [ 0.00 0.00 ]
+Key: VRNDSCALEPDZrrikz: [ 0.00 0.00 ]
+Key: VRNDSCALEPHZ: [ 0.00 0.00 ]
+Key: VRNDSCALEPHZrmbi: [ 0.00 0.00 ]
+Key: VRNDSCALEPHZrmbik: [ 0.00 0.00 ]
+Key: VRNDSCALEPHZrmbikz: [ 0.00 0.00 ]
+Key: VRNDSCALEPHZrmi: [ 0.00 0.00 ]
+Key: VRNDSCALEPHZrmik: [ 0.00 0.00 ]
+Key: VRNDSCALEPHZrmikz: [ 0.00 0.00 ]
+Key: VRNDSCALEPHZrri: [ 0.00 0.00 ]
+Key: VRNDSCALEPHZrrib: [ 0.00 0.00 ]
+Key: VRNDSCALEPHZrribk: [ 0.00 0.00 ]
+Key: VRNDSCALEPHZrribkz: [ 0.00 0.00 ]
+Key: VRNDSCALEPHZrrik: [ 0.00 0.00 ]
+Key: VRNDSCALEPHZrrikz: [ 0.00 0.00 ]
+Key: VRNDSCALEPSZ: [ 0.00 0.00 ]
+Key: VRNDSCALEPSZrmbi: [ 0.00 0.00 ]
+Key: VRNDSCALEPSZrmbik: [ 0.00 0.00 ]
+Key: VRNDSCALEPSZrmbikz: [ 0.00 0.00 ]
+Key: VRNDSCALEPSZrmi: [ 0.00 0.00 ]
+Key: VRNDSCALEPSZrmik: [ 0.00 0.00 ]
+Key: VRNDSCALEPSZrmikz: [ 0.00 0.00 ]
+Key: VRNDSCALEPSZrri: [ 0.00 0.00 ]
+Key: VRNDSCALEPSZrrib: [ 0.00 0.00 ]
+Key: VRNDSCALEPSZrribk: [ 0.00 0.00 ]
+Key: VRNDSCALEPSZrribkz: [ 0.00 0.00 ]
+Key: VRNDSCALEPSZrrik: [ 0.00 0.00 ]
+Key: VRNDSCALEPSZrrikz: [ 0.00 0.00 ]
+Key: VRNDSCALESDZrmi: [ 0.00 0.00 ]
+Key: VRNDSCALESDZrmi_Int: [ 0.00 0.00 ]
+Key: VRNDSCALESDZrmik_Int: [ 0.00 0.00 ]
+Key: VRNDSCALESDZrmikz_Int: [ 0.00 0.00 ]
+Key: VRNDSCALESDZrri: [ 0.00 0.00 ]
+Key: VRNDSCALESDZrri_Int: [ 0.00 0.00 ]
+Key: VRNDSCALESDZrrib_Int: [ 0.00 0.00 ]
+Key: VRNDSCALESDZrribk_Int: [ 0.00 0.00 ]
+Key: VRNDSCALESDZrribkz_Int: [ 0.00 0.00 ]
+Key: VRNDSCALESDZrrik_Int: [ 0.00 0.00 ]
+Key: VRNDSCALESDZrrikz_Int: [ 0.00 0.00 ]
+Key: VRNDSCALESHZrmi: [ 0.00 0.00 ]
+Key: VRNDSCALESHZrmi_Int: [ 0.00 0.00 ]
+Key: VRNDSCALESHZrmik_Int: [ 0.00 0.00 ]
+Key: VRNDSCALESHZrmikz_Int: [ 0.00 0.00 ]
+Key: VRNDSCALESHZrri: [ 0.00 0.00 ]
+Key: VRNDSCALESHZrri_Int: [ 0.00 0.00 ]
+Key: VRNDSCALESHZrrib_Int: [ 0.00 0.00 ]
+Key: VRNDSCALESHZrribk_Int: [ 0.00 0.00 ]
+Key: VRNDSCALESHZrribkz_Int: [ 0.00 0.00 ]
+Key: VRNDSCALESHZrrik_Int: [ 0.00 0.00 ]
+Key: VRNDSCALESHZrrikz_Int: [ 0.00 0.00 ]
+Key: VRNDSCALESSZrmi: [ 0.00 0.00 ]
+Key: VRNDSCALESSZrmi_Int: [ 0.00 0.00 ]
+Key: VRNDSCALESSZrmik_Int: [ 0.00 0.00 ]
+Key: VRNDSCALESSZrmikz_Int: [ 0.00 0.00 ]
+Key: VRNDSCALESSZrri: [ 0.00 0.00 ]
+Key: VRNDSCALESSZrri_Int: [ 0.00 0.00 ]
+Key: VRNDSCALESSZrrib_Int: [ 0.00 0.00 ]
+Key: VRNDSCALESSZrribk_Int: [ 0.00 0.00 ]
+Key: VRNDSCALESSZrribkz_Int: [ 0.00 0.00 ]
+Key: VRNDSCALESSZrrik_Int: [ 0.00 0.00 ]
+Key: VRNDSCALESSZrrikz_Int: [ 0.00 0.00 ]
+Key: VROUNDPDYmi: [ 0.00 0.00 ]
+Key: VROUNDPDYri: [ 0.00 0.00 ]
+Key: VROUNDPDmi: [ 0.00 0.00 ]
+Key: VROUNDPDri: [ 0.00 0.00 ]
+Key: VROUNDPSYmi: [ 0.00 0.00 ]
+Key: VROUNDPSYri: [ 0.00 0.00 ]
+Key: VROUNDPSmi: [ 0.00 0.00 ]
+Key: VROUNDPSri: [ 0.00 0.00 ]
+Key: VROUNDSDmi: [ 0.00 0.00 ]
+Key: VROUNDSDmi_Int: [ 0.00 0.00 ]
+Key: VROUNDSDri: [ 0.00 0.00 ]
+Key: VROUNDSDri_Int: [ 0.00 0.00 ]
+Key: VROUNDSSmi: [ 0.00 0.00 ]
+Key: VROUNDSSmi_Int: [ 0.00 0.00 ]
+Key: VROUNDSSri: [ 0.00 0.00 ]
+Key: VROUNDSSri_Int: [ 0.00 0.00 ]
+Key: VRSQRT: [ 0.00 0.00 ]
+Key: VRSQRTBF: [ 0.00 0.00 ]
+Key: VRSQRTPHZ: [ 0.00 0.00 ]
+Key: VRSQRTPHZm: [ 0.00 0.00 ]
+Key: VRSQRTPHZmb: [ 0.00 0.00 ]
+Key: VRSQRTPHZmbk: [ 0.00 0.00 ]
+Key: VRSQRTPHZmbkz: [ 0.00 0.00 ]
+Key: VRSQRTPHZmk: [ 0.00 0.00 ]
+Key: VRSQRTPHZmkz: [ 0.00 0.00 ]
+Key: VRSQRTPHZr: [ 0.00 0.00 ]
+Key: VRSQRTPHZrk: [ 0.00 0.00 ]
+Key: VRSQRTPHZrkz: [ 0.00 0.00 ]
+Key: VRSQRTPSYm: [ 0.00 0.00 ]
+Key: VRSQRTPSYr: [ 0.00 0.00 ]
+Key: VRSQRTPSm: [ 0.00 0.00 ]
+Key: VRSQRTPSr: [ 0.00 0.00 ]
+Key: VRSQRTSHZrm: [ 0.00 0.00 ]
+Key: VRSQRTSHZrmk: [ 0.00 0.00 ]
+Key: VRSQRTSHZrmkz: [ 0.00 0.00 ]
+Key: VRSQRTSHZrr: [ 0.00 0.00 ]
+Key: VRSQRTSHZrrk: [ 0.00 0.00 ]
+Key: VRSQRTSHZrrkz: [ 0.00 0.00 ]
+Key: VRSQRTSSm: [ 0.00 0.00 ]
+Key: VRSQRTSSm_Int: [ 0.00 0.00 ]
+Key: VRSQRTSSr: [ 0.00 0.00 ]
+Key: VRSQRTSSr_Int: [ 0.00 0.00 ]
+Key: VSCALEFBF: [ 0.00 0.00 ]
+Key: VSCALEFPDZ: [ 0.00 0.00 ]
+Key: VSCALEFPDZrm: [ 0.00 0.00 ]
+Key: VSCALEFPDZrmb: [ 0.00 0.00 ]
+Key: VSCALEFPDZrmbk: [ 0.00 0.00 ]
+Key: VSCALEFPDZrmbkz: [ 0.00 0.00 ]
+Key: VSCALEFPDZrmk: [ 0.00 0.00 ]
+Key: VSCALEFPDZrmkz: [ 0.00 0.00 ]
+Key: VSCALEFPDZrr: [ 0.00 0.00 ]
+Key: VSCALEFPDZrrb: [ 0.00 0.00 ]
+Key: VSCALEFPDZrrbk: [ 0.00 0.00 ]
+Key: VSCALEFPDZrrbkz: [ 0.00 0.00 ]
+Key: VSCALEFPDZrrk: [ 0.00 0.00 ]
+Key: VSCALEFPDZrrkz: [ 0.00 0.00 ]
+Key: VSCALEFPHZ: [ 0.00 0.00 ]
+Key: VSCALEFPHZrm: [ 0.00 0.00 ]
+Key: VSCALEFPHZrmb: [ 0.00 0.00 ]
+Key: VSCALEFPHZrmbk: [ 0.00 0.00 ]
+Key: VSCALEFPHZrmbkz: [ 0.00 0.00 ]
+Key: VSCALEFPHZrmk: [ 0.00 0.00 ]
+Key: VSCALEFPHZrmkz: [ 0.00 0.00 ]
+Key: VSCALEFPHZrr: [ 0.00 0.00 ]
+Key: VSCALEFPHZrrb: [ 0.00 0.00 ]
+Key: VSCALEFPHZrrbk: [ 0.00 0.00 ]
+Key: VSCALEFPHZrrbkz: [ 0.00 0.00 ]
+Key: VSCALEFPHZrrk: [ 0.00 0.00 ]
+Key: VSCALEFPHZrrkz: [ 0.00 0.00 ]
+Key: VSCALEFPSZ: [ 0.00 0.00 ]
+Key: VSCALEFPSZrm: [ 0.00 0.00 ]
+Key: VSCALEFPSZrmb: [ 0.00 0.00 ]
+Key: VSCALEFPSZrmbk: [ 0.00 0.00 ]
+Key: VSCALEFPSZrmbkz: [ 0.00 0.00 ]
+Key: VSCALEFPSZrmk: [ 0.00 0.00 ]
+Key: VSCALEFPSZrmkz: [ 0.00 0.00 ]
+Key: VSCALEFPSZrr: [ 0.00 0.00 ]
+Key: VSCALEFPSZrrb: [ 0.00 0.00 ]
+Key: VSCALEFPSZrrbk: [ 0.00 0.00 ]
+Key: VSCALEFPSZrrbkz: [ 0.00 0.00 ]
+Key: VSCALEFPSZrrk: [ 0.00 0.00 ]
+Key: VSCALEFPSZrrkz: [ 0.00 0.00 ]
+Key: VSCALEFSDZrm: [ 0.00 0.00 ]
+Key: VSCALEFSDZrmk: [ 0.00 0.00 ]
+Key: VSCALEFSDZrmkz: [ 0.00 0.00 ]
+Key: VSCALEFSDZrr: [ 0.00 0.00 ]
+Key: VSCALEFSDZrrb_Int: [ 0.00 0.00 ]
+Key: VSCALEFSDZrrbk_Int: [ 0.00 0.00 ]
+Key: VSCALEFSDZrrbkz_Int: [ 0.00 0.00 ]
+Key: VSCALEFSDZrrk: [ 0.00 0.00 ]
+Key: VSCALEFSDZrrkz: [ 0.00 0.00 ]
+Key: VSCALEFSHZrm: [ 0.00 0.00 ]
+Key: VSCALEFSHZrmk: [ 0.00 0.00 ]
+Key: VSCALEFSHZrmkz: [ 0.00 0.00 ]
+Key: VSCALEFSHZrr: [ 0.00 0.00 ]
+Key: VSCALEFSHZrrb_Int: [ 0.00 0.00 ]
+Key: VSCALEFSHZrrbk_Int: [ 0.00 0.00 ]
+Key: VSCALEFSHZrrbkz_Int: [ 0.00 0.00 ]
+Key: VSCALEFSHZrrk: [ 0.00 0.00 ]
+Key: VSCALEFSHZrrkz: [ 0.00 0.00 ]
+Key: VSCALEFSSZrm: [ 0.00 0.00 ]
+Key: VSCALEFSSZrmk: [ 0.00 0.00 ]
+Key: VSCALEFSSZrmkz: [ 0.00 0.00 ]
+Key: VSCALEFSSZrr: [ 0.00 0.00 ]
+Key: VSCALEFSSZrrb_Int: [ 0.00 0.00 ]
+Key: VSCALEFSSZrrbk_Int: [ 0.00 0.00 ]
+Key: VSCALEFSSZrrbkz_Int: [ 0.00 0.00 ]
+Key: VSCALEFSSZrrk: [ 0.00 0.00 ]
+Key: VSCALEFSSZrrkz: [ 0.00 0.00 ]
+Key: VSCATTERDPDZ: [ 0.00 0.00 ]
+Key: VSCATTERDPDZmr: [ 0.00 0.00 ]
+Key: VSCATTERDPSZ: [ 0.00 0.00 ]
+Key: VSCATTERDPSZmr: [ 0.00 0.00 ]
+Key: VSCATTERPF: [ 0.00 0.00 ]
+Key: VSCATTERQPDZ: [ 0.00 0.00 ]
+Key: VSCATTERQPDZmr: [ 0.00 0.00 ]
+Key: VSCATTERQPSZ: [ 0.00 0.00 ]
+Key: VSCATTERQPSZmr: [ 0.00 0.00 ]
+Key: VSHA: [ 0.00 0.00 ]
+Key: VSHUFF: [ 0.00 0.00 ]
+Key: VSHUFI: [ 0.00 0.00 ]
+Key: VSHUFPDYrmi: [ 0.00 0.00 ]
+Key: VSHUFPDYrri: [ 0.00 0.00 ]
+Key: VSHUFPDZ: [ 0.00 0.00 ]
+Key: VSHUFPDZrmbi: [ 0.00 0.00 ]
+Key: VSHUFPDZrmbik: [ 0.00 0.00 ]
+Key: VSHUFPDZrmbikz: [ 0.00 0.00 ]
+Key: VSHUFPDZrmi: [ 0.00 0.00 ]
+Key: VSHUFPDZrmik: [ 0.00 0.00 ]
+Key: VSHUFPDZrmikz: [ 0.00 0.00 ]
+Key: VSHUFPDZrri: [ 0.00 0.00 ]
+Key: VSHUFPDZrrik: [ 0.00 0.00 ]
+Key: VSHUFPDZrrikz: [ 0.00 0.00 ]
+Key: VSHUFPDrmi: [ 0.00 0.00 ]
+Key: VSHUFPDrri: [ 0.00 0.00 ]
+Key: VSHUFPSYrmi: [ 0.00 0.00 ]
+Key: VSHUFPSYrri: [ 0.00 0.00 ]
+Key: VSHUFPSZ: [ 0.00 0.00 ]
+Key: VSHUFPSZrmbi: [ 0.00 0.00 ]
+Key: VSHUFPSZrmbik: [ 0.00 0.00 ]
+Key: VSHUFPSZrmbikz: [ 0.00 0.00 ]
+Key: VSHUFPSZrmi: [ 0.00 0.00 ]
+Key: VSHUFPSZrmik: [ 0.00 0.00 ]
+Key: VSHUFPSZrmikz: [ 0.00 0.00 ]
+Key: VSHUFPSZrri: [ 0.00 0.00 ]
+Key: VSHUFPSZrrik: [ 0.00 0.00 ]
+Key: VSHUFPSZrrikz: [ 0.00 0.00 ]
+Key: VSHUFPSrmi: [ 0.00 0.00 ]
+Key: VSHUFPSrri: [ 0.00 0.00 ]
+Key: VSM: [ 0.00 0.00 ]
+Key: VSQRTBF: [ 0.00 0.00 ]
+Key: VSQRTPDYm: [ 0.00 0.00 ]
+Key: VSQRTPDYr: [ 0.00 0.00 ]
+Key: VSQRTPDZ: [ 0.00 0.00 ]
+Key: VSQRTPDZm: [ 0.00 0.00 ]
+Key: VSQRTPDZmb: [ 0.00 0.00 ]
+Key: VSQRTPDZmbk: [ 0.00 0.00 ]
+Key: VSQRTPDZmbkz: [ 0.00 0.00 ]
+Key: VSQRTPDZmk: [ 0.00 0.00 ]
+Key: VSQRTPDZmkz: [ 0.00 0.00 ]
+Key: VSQRTPDZr: [ 0.00 0.00 ]
+Key: VSQRTPDZrb: [ 0.00 0.00 ]
+Key: VSQRTPDZrbk: [ 0.00 0.00 ]
+Key: VSQRTPDZrbkz: [ 0.00 0.00 ]
+Key: VSQRTPDZrk: [ 0.00 0.00 ]
+Key: VSQRTPDZrkz: [ 0.00 0.00 ]
+Key: VSQRTPDm: [ 0.00 0.00 ]
+Key: VSQRTPDr: [ 0.00 0.00 ]
+Key: VSQRTPHZ: [ 0.00 0.00 ]
+Key: VSQRTPHZm: [ 0.00 0.00 ]
+Key: VSQRTPHZmb: [ 0.00 0.00 ]
+Key: VSQRTPHZmbk: [ 0.00 0.00 ]
+Key: VSQRTPHZmbkz: [ 0.00 0.00 ]
+Key: VSQRTPHZmk: [ 0.00 0.00 ]
+Key: VSQRTPHZmkz: [ 0.00 0.00 ]
+Key: VSQRTPHZr: [ 0.00 0.00 ]
+Key: VSQRTPHZrb: [ 0.00 0.00 ]
+Key: VSQRTPHZrbk: [ 0.00 0.00 ]
+Key: VSQRTPHZrbkz: [ 0.00 0.00 ]
+Key: VSQRTPHZrk: [ 0.00 0.00 ]
+Key: VSQRTPHZrkz: [ 0.00 0.00 ]
+Key: VSQRTPSYm: [ 0.00 0.00 ]
+Key: VSQRTPSYr: [ 0.00 0.00 ]
+Key: VSQRTPSZ: [ 0.00 0.00 ]
+Key: VSQRTPSZm: [ 0.00 0.00 ]
+Key: VSQRTPSZmb: [ 0.00 0.00 ]
+Key: VSQRTPSZmbk: [ 0.00 0.00 ]
+Key: VSQRTPSZmbkz: [ 0.00 0.00 ]
+Key: VSQRTPSZmk: [ 0.00 0.00 ]
+Key: VSQRTPSZmkz: [ 0.00 0.00 ]
+Key: VSQRTPSZr: [ 0.00 0.00 ]
+Key: VSQRTPSZrb: [ 0.00 0.00 ]
+Key: VSQRTPSZrbk: [ 0.00 0.00 ]
+Key: VSQRTPSZrbkz: [ 0.00 0.00 ]
+Key: VSQRTPSZrk: [ 0.00 0.00 ]
+Key: VSQRTPSZrkz: [ 0.00 0.00 ]
+Key: VSQRTPSm: [ 0.00 0.00 ]
+Key: VSQRTPSr: [ 0.00 0.00 ]
+Key: VSQRTSDZm: [ 0.00 0.00 ]
+Key: VSQRTSDZm_Int: [ 0.00 0.00 ]
+Key: VSQRTSDZmk_Int: [ 0.00 0.00 ]
+Key: VSQRTSDZmkz_Int: [ 0.00 0.00 ]
+Key: VSQRTSDZr: [ 0.00 0.00 ]
+Key: VSQRTSDZr_Int: [ 0.00 0.00 ]
+Key: VSQRTSDZrb_Int: [ 0.00 0.00 ]
+Key: VSQRTSDZrbk_Int: [ 0.00 0.00 ]
+Key: VSQRTSDZrbkz_Int: [ 0.00 0.00 ]
+Key: VSQRTSDZrk_Int: [ 0.00 0.00 ]
+Key: VSQRTSDZrkz_Int: [ 0.00 0.00 ]
+Key: VSQRTSDm: [ 0.00 0.00 ]
+Key: VSQRTSDm_Int: [ 0.00 0.00 ]
+Key: VSQRTSDr: [ 0.00 0.00 ]
+Key: VSQRTSDr_Int: [ 0.00 0.00 ]
+Key: VSQRTSHZm: [ 0.00 0.00 ]
+Key: VSQRTSHZm_Int: [ 0.00 0.00 ]
+Key: VSQRTSHZmk_Int: [ 0.00 0.00 ]
+Key: VSQRTSHZmkz_Int: [ 0.00 0.00 ]
+Key: VSQRTSHZr: [ 0.00 0.00 ]
+Key: VSQRTSHZr_Int: [ 0.00 0.00 ]
+Key: VSQRTSHZrb_Int: [ 0.00 0.00 ]
+Key: VSQRTSHZrbk_Int: [ 0.00 0.00 ]
+Key: VSQRTSHZrbkz_Int: [ 0.00 0.00 ]
+Key: VSQRTSHZrk_Int: [ 0.00 0.00 ]
+Key: VSQRTSHZrkz_Int: [ 0.00 0.00 ]
+Key: VSQRTSSZm: [ 0.00 0.00 ]
+Key: VSQRTSSZm_Int: [ 0.00 0.00 ]
+Key: VSQRTSSZmk_Int: [ 0.00 0.00 ]
+Key: VSQRTSSZmkz_Int: [ 0.00 0.00 ]
+Key: VSQRTSSZr: [ 0.00 0.00 ]
+Key: VSQRTSSZr_Int: [ 0.00 0.00 ]
+Key: VSQRTSSZrb_Int: [ 0.00 0.00 ]
+Key: VSQRTSSZrbk_Int: [ 0.00 0.00 ]
+Key: VSQRTSSZrbkz_Int: [ 0.00 0.00 ]
+Key: VSQRTSSZrk_Int: [ 0.00 0.00 ]
+Key: VSQRTSSZrkz_Int: [ 0.00 0.00 ]
+Key: VSQRTSSm: [ 0.00 0.00 ]
+Key: VSQRTSSm_Int: [ 0.00 0.00 ]
+Key: VSQRTSSr: [ 0.00 0.00 ]
+Key: VSQRTSSr_Int: [ 0.00 0.00 ]
+Key: VSTMXCSR: [ 0.00 0.00 ]
+Key: VSUBBF: [ 0.00 0.00 ]
+Key: VSUBPDYrm: [ 0.00 0.00 ]
+Key: VSUBPDYrr: [ 0.00 0.00 ]
+Key: VSUBPDZ: [ 0.00 0.00 ]
+Key: VSUBPDZrm: [ 0.00 0.00 ]
+Key: VSUBPDZrmb: [ 0.00 0.00 ]
+Key: VSUBPDZrmbk: [ 0.00 0.00 ]
+Key: VSUBPDZrmbkz: [ 0.00 0.00 ]
+Key: VSUBPDZrmk: [ 0.00 0.00 ]
+Key: VSUBPDZrmkz: [ 0.00 0.00 ]
+Key: VSUBPDZrr: [ 0.00 0.00 ]
+Key: VSUBPDZrrb: [ 0.00 0.00 ]
+Key: VSUBPDZrrbk: [ 0.00 0.00 ]
+Key: VSUBPDZrrbkz: [ 0.00 0.00 ]
+Key: VSUBPDZrrk: [ 0.00 0.00 ]
+Key: VSUBPDZrrkz: [ 0.00 0.00 ]
+Key: VSUBPDrm: [ 0.00 0.00 ]
+Key: VSUBPDrr: [ 0.00 0.00 ]
+Key: VSUBPHZ: [ 0.00 0.00 ]
+Key: VSUBPHZrm: [ 0.00 0.00 ]
+Key: VSUBPHZrmb: [ 0.00 0.00 ]
+Key: VSUBPHZrmbk: [ 0.00 0.00 ]
+Key: VSUBPHZrmbkz: [ 0.00 0.00 ]
+Key: VSUBPHZrmk: [ 0.00 0.00 ]
+Key: VSUBPHZrmkz: [ 0.00 0.00 ]
+Key: VSUBPHZrr: [ 0.00 0.00 ]
+Key: VSUBPHZrrb: [ 0.00 0.00 ]
+Key: VSUBPHZrrbk: [ 0.00 0.00 ]
+Key: VSUBPHZrrbkz: [ 0.00 0.00 ]
+Key: VSUBPHZrrk: [ 0.00 0.00 ]
+Key: VSUBPHZrrkz: [ 0.00 0.00 ]
+Key: VSUBPSYrm: [ 0.00 0.00 ]
+Key: VSUBPSYrr: [ 0.00 0.00 ]
+Key: VSUBPSZ: [ 0.00 0.00 ]
+Key: VSUBPSZrm: [ 0.00 0.00 ]
+Key: VSUBPSZrmb: [ 0.00 0.00 ]
+Key: VSUBPSZrmbk: [ 0.00 0.00 ]
+Key: VSUBPSZrmbkz: [ 0.00 0.00 ]
+Key: VSUBPSZrmk: [ 0.00 0.00 ]
+Key: VSUBPSZrmkz: [ 0.00 0.00 ]
+Key: VSUBPSZrr: [ 0.00 0.00 ]
+Key: VSUBPSZrrb: [ 0.00 0.00 ]
+Key: VSUBPSZrrbk: [ 0.00 0.00 ]
+Key: VSUBPSZrrbkz: [ 0.00 0.00 ]
+Key: VSUBPSZrrk: [ 0.00 0.00 ]
+Key: VSUBPSZrrkz: [ 0.00 0.00 ]
+Key: VSUBPSrm: [ 0.00 0.00 ]
+Key: VSUBPSrr: [ 0.00 0.00 ]
+Key: VSUBSDZrm: [ 0.00 0.00 ]
+Key: VSUBSDZrm_Int: [ 0.00 0.00 ]
+Key: VSUBSDZrmk_Int: [ 0.00 0.00 ]
+Key: VSUBSDZrmkz_Int: [ 0.00 0.00 ]
+Key: VSUBSDZrr: [ 0.00 0.00 ]
+Key: VSUBSDZrr_Int: [ 0.00 0.00 ]
+Key: VSUBSDZrrb_Int: [ 0.00 0.00 ]
+Key: VSUBSDZrrbk_Int: [ 0.00 0.00 ]
+Key: VSUBSDZrrbkz_Int: [ 0.00 0.00 ]
+Key: VSUBSDZrrk_Int: [ 0.00 0.00 ]
+Key: VSUBSDZrrkz_Int: [ 0.00 0.00 ]
+Key: VSUBSDrm: [ 0.00 0.00 ]
+Key: VSUBSDrm_Int: [ 0.00 0.00 ]
+Key: VSUBSDrr: [ 0.00 0.00 ]
+Key: VSUBSDrr_Int: [ 0.00 0.00 ]
+Key: VSUBSHZrm: [ 0.00 0.00 ]
+Key: VSUBSHZrm_Int: [ 0.00 0.00 ]
+Key: VSUBSHZrmk_Int: [ 0.00 0.00 ]
+Key: VSUBSHZrmkz_Int: [ 0.00 0.00 ]
+Key: VSUBSHZrr: [ 0.00 0.00 ]
+Key: VSUBSHZrr_Int: [ 0.00 0.00 ]
+Key: VSUBSHZrrb_Int: [ 0.00 0.00 ]
+Key: VSUBSHZrrbk_Int: [ 0.00 0.00 ]
+Key: VSUBSHZrrbkz_Int: [ 0.00 0.00 ]
+Key: VSUBSHZrrk_Int: [ 0.00 0.00 ]
+Key: VSUBSHZrrkz_Int: [ 0.00 0.00 ]
+Key: VSUBSSZrm: [ 0.00 0.00 ]
+Key: VSUBSSZrm_Int: [ 0.00 0.00 ]
+Key: VSUBSSZrmk_Int: [ 0.00 0.00 ]
+Key: VSUBSSZrmkz_Int: [ 0.00 0.00 ]
+Key: VSUBSSZrr: [ 0.00 0.00 ]
+Key: VSUBSSZrr_Int: [ 0.00 0.00 ]
+Key: VSUBSSZrrb_Int: [ 0.00 0.00 ]
+Key: VSUBSSZrrbk_Int: [ 0.00 0.00 ]
+Key: VSUBSSZrrbkz_Int: [ 0.00 0.00 ]
+Key: VSUBSSZrrk_Int: [ 0.00 0.00 ]
+Key: VSUBSSZrrkz_Int: [ 0.00 0.00 ]
+Key: VSUBSSrm: [ 0.00 0.00 ]
+Key: VSUBSSrm_Int: [ 0.00 0.00 ]
+Key: VSUBSSrr: [ 0.00 0.00 ]
+Key: VSUBSSrr_Int: [ 0.00 0.00 ]
+Key: VTESTPDYrm: [ 0.00 0.00 ]
+Key: VTESTPDYrr: [ 0.00 0.00 ]
+Key: VTESTPDrm: [ 0.00 0.00 ]
+Key: VTESTPDrr: [ 0.00 0.00 ]
+Key: VTESTPSYrm: [ 0.00 0.00 ]
+Key: VTESTPSYrr: [ 0.00 0.00 ]
+Key: VTESTPSrm: [ 0.00 0.00 ]
+Key: VTESTPSrr: [ 0.00 0.00 ]
+Key: VUCOMISDZrm: [ 0.00 0.00 ]
+Key: VUCOMISDZrm_Int: [ 0.00 0.00 ]
+Key: VUCOMISDZrr: [ 0.00 0.00 ]
+Key: VUCOMISDZrr_Int: [ 0.00 0.00 ]
+Key: VUCOMISDZrrb: [ 0.00 0.00 ]
+Key: VUCOMISDrm: [ 0.00 0.00 ]
+Key: VUCOMISDrm_Int: [ 0.00 0.00 ]
+Key: VUCOMISDrr: [ 0.00 0.00 ]
+Key: VUCOMISDrr_Int: [ 0.00 0.00 ]
+Key: VUCOMISHZrm: [ 0.00 0.00 ]
+Key: VUCOMISHZrm_Int: [ 0.00 0.00 ]
+Key: VUCOMISHZrr: [ 0.00 0.00 ]
+Key: VUCOMISHZrr_Int: [ 0.00 0.00 ]
+Key: VUCOMISHZrrb: [ 0.00 0.00 ]
+Key: VUCOMISSZrm: [ 0.00 0.00 ]
+Key: VUCOMISSZrm_Int: [ 0.00 0.00 ]
+Key: VUCOMISSZrr: [ 0.00 0.00 ]
+Key: VUCOMISSZrr_Int: [ 0.00 0.00 ]
+Key: VUCOMISSZrrb: [ 0.00 0.00 ]
+Key: VUCOMISSrm: [ 0.00 0.00 ]
+Key: VUCOMISSrm_Int: [ 0.00 0.00 ]
+Key: VUCOMISSrr: [ 0.00 0.00 ]
+Key: VUCOMISSrr_Int: [ 0.00 0.00 ]
+Key: VUCOMXSDZrm: [ 0.00 0.00 ]
+Key: VUCOMXSDZrm_Int: [ 0.00 0.00 ]
+Key: VUCOMXSDZrr: [ 0.00 0.00 ]
+Key: VUCOMXSDZrr_Int: [ 0.00 0.00 ]
+Key: VUCOMXSDZrrb_Int: [ 0.00 0.00 ]
+Key: VUCOMXSHZrm: [ 0.00 0.00 ]
+Key: VUCOMXSHZrm_Int: [ 0.00 0.00 ]
+Key: VUCOMXSHZrr: [ 0.00 0.00 ]
+Key: VUCOMXSHZrr_Int: [ 0.00 0.00 ]
+Key: VUCOMXSHZrrb_Int: [ 0.00 0.00 ]
+Key: VUCOMXSSZrm: [ 0.00 0.00 ]
+Key: VUCOMXSSZrm_Int: [ 0.00 0.00 ]
+Key: VUCOMXSSZrr: [ 0.00 0.00 ]
+Key: VUCOMXSSZrr_Int: [ 0.00 0.00 ]
+Key: VUCOMXSSZrrb_Int: [ 0.00 0.00 ]
+Key: VUNPCKHPDYrm: [ 0.00 0.00 ]
+Key: VUNPCKHPDYrr: [ 0.00 0.00 ]
+Key: VUNPCKHPDZ: [ 0.00 0.00 ]
+Key: VUNPCKHPDZrm: [ 0.00 0.00 ]
+Key: VUNPCKHPDZrmb: [ 0.00 0.00 ]
+Key: VUNPCKHPDZrmbk: [ 0.00 0.00 ]
+Key: VUNPCKHPDZrmbkz: [ 0.00 0.00 ]
+Key: VUNPCKHPDZrmk: [ 0.00 0.00 ]
+Key: VUNPCKHPDZrmkz: [ 0.00 0.00 ]
+Key: VUNPCKHPDZrr: [ 0.00 0.00 ]
+Key: VUNPCKHPDZrrk: [ 0.00 0.00 ]
+Key: VUNPCKHPDZrrkz: [ 0.00 0.00 ]
+Key: VUNPCKHPDrm: [ 0.00 0.00 ]
+Key: VUNPCKHPDrr: [ 0.00 0.00 ]
+Key: VUNPCKHPSYrm: [ 0.00 0.00 ]
+Key: VUNPCKHPSYrr: [ 0.00 0.00 ]
+Key: VUNPCKHPSZ: [ 0.00 0.00 ]
+Key: VUNPCKHPSZrm: [ 0.00 0.00 ]
+Key: VUNPCKHPSZrmb: [ 0.00 0.00 ]
+Key: VUNPCKHPSZrmbk: [ 0.00 0.00 ]
+Key: VUNPCKHPSZrmbkz: [ 0.00 0.00 ]
+Key: VUNPCKHPSZrmk: [ 0.00 0.00 ]
+Key: VUNPCKHPSZrmkz: [ 0.00 0.00 ]
+Key: VUNPCKHPSZrr: [ 0.00 0.00 ]
+Key: VUNPCKHPSZrrk: [ 0.00 0.00 ]
+Key: VUNPCKHPSZrrkz: [ 0.00 0.00 ]
+Key: VUNPCKHPSrm: [ 0.00 0.00 ]
+Key: VUNPCKHPSrr: [ 0.00 0.00 ]
+Key: VUNPCKLPDYrm: [ 0.00 0.00 ]
+Key: VUNPCKLPDYrr: [ 0.00 0.00 ]
+Key: VUNPCKLPDZ: [ 0.00 0.00 ]
+Key: VUNPCKLPDZrm: [ 0.00 0.00 ]
+Key: VUNPCKLPDZrmb: [ 0.00 0.00 ]
+Key: VUNPCKLPDZrmbk: [ 0.00 0.00 ]
+Key: VUNPCKLPDZrmbkz: [ 0.00 0.00 ]
+Key: VUNPCKLPDZrmk: [ 0.00 0.00 ]
+Key: VUNPCKLPDZrmkz: [ 0.00 0.00 ]
+Key: VUNPCKLPDZrr: [ 0.00 0.00 ]
+Key: VUNPCKLPDZrrk: [ 0.00 0.00 ]
+Key: VUNPCKLPDZrrkz: [ 0.00 0.00 ]
+Key: VUNPCKLPDrm: [ 0.00 0.00 ]
+Key: VUNPCKLPDrr: [ 0.00 0.00 ]
+Key: VUNPCKLPSYrm: [ 0.00 0.00 ]
+Key: VUNPCKLPSYrr: [ 0.00 0.00 ]
+Key: VUNPCKLPSZ: [ 0.00 0.00 ]
+Key: VUNPCKLPSZrm: [ 0.00 0.00 ]
+Key: VUNPCKLPSZrmb: [ 0.00 0.00 ]
+Key: VUNPCKLPSZrmbk: [ 0.00 0.00 ]
+Key: VUNPCKLPSZrmbkz: [ 0.00 0.00 ]
+Key: VUNPCKLPSZrmk: [ 0.00 0.00 ]
+Key: VUNPCKLPSZrmkz: [ 0.00 0.00 ]
+Key: VUNPCKLPSZrr: [ 0.00 0.00 ]
+Key: VUNPCKLPSZrrk: [ 0.00 0.00 ]
+Key: VUNPCKLPSZrrkz: [ 0.00 0.00 ]
+Key: VUNPCKLPSrm: [ 0.00 0.00 ]
+Key: VUNPCKLPSrr: [ 0.00 0.00 ]
+Key: VXORPDYrm: [ 0.00 0.00 ]
+Key: VXORPDYrr: [ 0.00 0.00 ]
+Key: VXORPDZ: [ 0.00 0.00 ]
+Key: VXORPDZrm: [ 0.00 0.00 ]
+Key: VXORPDZrmb: [ 0.00 0.00 ]
+Key: VXORPDZrmbk: [ 0.00 0.00 ]
+Key: VXORPDZrmbkz: [ 0.00 0.00 ]
+Key: VXORPDZrmk: [ 0.00 0.00 ]
+Key: VXORPDZrmkz: [ 0.00 0.00 ]
+Key: VXORPDZrr: [ 0.00 0.00 ]
+Key: VXORPDZrrk: [ 0.00 0.00 ]
+Key: VXORPDZrrkz: [ 0.00 0.00 ]
+Key: VXORPDrm: [ 0.00 0.00 ]
+Key: VXORPDrr: [ 0.00 0.00 ]
+Key: VXORPSYrm: [ 0.00 0.00 ]
+Key: VXORPSYrr: [ 0.00 0.00 ]
+Key: VXORPSZ: [ 0.00 0.00 ]
+Key: VXORPSZrm: [ 0.00 0.00 ]
+Key: VXORPSZrmb: [ 0.00 0.00 ]
+Key: VXORPSZrmbk: [ 0.00 0.00 ]
+Key: VXORPSZrmbkz: [ 0.00 0.00 ]
+Key: VXORPSZrmk: [ 0.00 0.00 ]
+Key: VXORPSZrmkz: [ 0.00 0.00 ]
+Key: VXORPSZrr: [ 0.00 0.00 ]
+Key: VXORPSZrrk: [ 0.00 0.00 ]
+Key: VXORPSZrrkz: [ 0.00 0.00 ]
+Key: VXORPSrm: [ 0.00 0.00 ]
+Key: VXORPSrr: [ 0.00 0.00 ]
+Key: VZEROALL: [ 0.00 0.00 ]
+Key: VZEROUPPER: [ 0.00 0.00 ]
+Key: V_SET: [ 0.00 0.00 ]
+Key: V_SETALLONES: [ 0.00 0.00 ]
+Key: WAIT: [ 0.00 0.00 ]
+Key: WBINVD: [ 0.00 0.00 ]
+Key: WBNOINVD: [ 0.00 0.00 ]
+Key: WRFLAGS: [ 0.00 0.00 ]
+Key: WRFSBASE: [ 0.00 0.00 ]
+Key: WRGSBASE: [ 0.00 0.00 ]
+Key: WRMSR: [ 0.00 0.00 ]
+Key: WRMSRLIST: [ 0.00 0.00 ]
+Key: WRMSRNS: [ 0.00 0.00 ]
+Key: WRMSRNSir: [ 0.00 0.00 ]
+Key: WRMSRNSir_EVEX: [ 0.00 0.00 ]
+Key: WRPKRUr: [ 0.00 0.00 ]
+Key: WRSSD: [ 0.00 0.00 ]
+Key: WRSSD_EVEX: [ 0.00 0.00 ]
+Key: WRSSQ: [ 0.00 0.00 ]
+Key: WRSSQ_EVEX: [ 0.00 0.00 ]
+Key: WRUSSD: [ 0.00 0.00 ]
+Key: WRUSSD_EVEX: [ 0.00 0.00 ]
+Key: WRUSSQ: [ 0.00 0.00 ]
+Key: WRUSSQ_EVEX: [ 0.00 0.00 ]
+Key: XABORT: [ 0.00 0.00 ]
+Key: XABORT_DEF: [ 0.00 0.00 ]
+Key: XACQUIRE_PREFIX: [ 0.00 0.00 ]
+Key: XADD: [ 0.00 0.00 ]
+Key: XAM_F: [ 0.00 0.00 ]
+Key: XAM_Fp: [ 0.00 0.00 ]
+Key: XBEGIN: [ 0.00 0.00 ]
+Key: XCHG: [ 0.00 0.00 ]
+Key: XCH_F: [ 0.00 0.00 ]
+Key: XCRYPTCBC: [ 0.00 0.00 ]
+Key: XCRYPTCFB: [ 0.00 0.00 ]
+Key: XCRYPTCTR: [ 0.00 0.00 ]
+Key: XCRYPTECB: [ 0.00 0.00 ]
+Key: XCRYPTOFB: [ 0.00 0.00 ]
+Key: XEND: [ 0.00 0.00 ]
+Key: XGETBV: [ 0.00 0.00 ]
+Key: XLAT: [ 0.00 0.00 ]
+Key: XOR: [ 0.00 0.00 ]
+Key: XORPDrm: [ 0.00 0.00 ]
+Key: XORPDrr: [ 0.00 0.00 ]
+Key: XORPSrm: [ 0.00 0.00 ]
+Key: XORPSrr: [ 0.00 0.00 ]
+Key: XRELEASE_PREFIX: [ 0.00 0.00 ]
+Key: XRESLDTRK: [ 0.00 0.00 ]
+Key: XRSTOR: [ 0.00 0.00 ]
+Key: XRSTORS: [ 0.00 0.00 ]
+Key: XSAVE: [ 0.00 0.00 ]
+Key: XSAVEC: [ 0.00 0.00 ]
+Key: XSAVEOPT: [ 0.00 0.00 ]
+Key: XSAVES: [ 0.00 0.00 ]
+Key: XSETBV: [ 0.00 0.00 ]
+Key: XSHA: [ 0.00 0.00 ]
+Key: XSTORE: [ 0.00 0.00 ]
+Key: XSUSLDTRK: [ 0.00 0.00 ]
+Key: XTEST: [ 0.00 0.00 ]
diff --git a/llvm/test/CodeGen/MIR2Vec/vocab-basic.ll b/llvm/test/CodeGen/MIR2Vec/vocab-basic.ll
new file mode 100644
index 0000000..a57dd0b
--- /dev/null
+++ b/llvm/test/CodeGen/MIR2Vec/vocab-basic.ll
@@ -0,0 +1,14 @@
+; REQUIRES: x86_64-linux
+; RUN: llc -o /dev/null -print-mir2vec-vocab -mir2vec-vocab-path=%S/Inputs/mir2vec_dummy_2D_vocab.json %s 2> %t1.log
+; RUN: diff %S/Inputs/reference_x86_vocab_print.txt %t1.log
+
+; RUN: llc -o /dev/null -print-mir2vec-vocab -mir2vec-opc-weight=1 -mir2vec-vocab-path=%S/Inputs/mir2vec_dummy_2D_vocab.json %s 2> %t1.log
+; RUN: diff %S/Inputs/reference_x86_vocab_print.txt %t1.log
+
+; RUN: llc -o /dev/null -print-mir2vec-vocab -mir2vec-opc-weight=0.5 -mir2vec-vocab-path=%S/Inputs/mir2vec_dummy_2D_vocab.json %s 2> %t1.log
+; RUN: diff %S/Inputs/reference_x86_vocab_wo=0.5_print.txt %t1.log
+
+define dso_local void @test() {
+ entry:
+ ret void
+}
diff --git a/llvm/test/CodeGen/MIR2Vec/vocab-error-handling.ll b/llvm/test/CodeGen/MIR2Vec/vocab-error-handling.ll
new file mode 100644
index 0000000..1da516a
--- /dev/null
+++ b/llvm/test/CodeGen/MIR2Vec/vocab-error-handling.ll
@@ -0,0 +1,15 @@
+; REQUIRES: x86_64-linux
+; RUN: not llc -o /dev/null -print-mir2vec-vocab %s 2>&1 | FileCheck %s --check-prefix=CHECK-INVALID
+; RUN: not llc -o /dev/null -print-mir2vec-vocab -mir2vec-vocab-path=%S/Inputs/mir2vec_zero_vocab.json %s 2>&1 | FileCheck %s --check-prefix=CHECK-ZERO-DIM
+; RUN: not llc -o /dev/null -print-mir2vec-vocab -mir2vec-vocab-path=%S/Inputs/mir2vec_invalid_vocab.json %s 2>&1 | FileCheck %s --check-prefix=CHECK-NO-ENTITIES
+; RUN: not llc -o /dev/null -print-mir2vec-vocab -mir2vec-vocab-path=%S/Inputs/mir2vec_inconsistent_dims.json %s 2>&1 | FileCheck %s --check-prefix=CHECK-INCONSISTENT-DIMS
+
+define dso_local void @test() {
+ entry:
+ ret void
+}
+
+; CHECK-INVALID: error: MIR2Vec vocabulary file path not specified; set it using --mir2vec-vocab-path
+; CHECK-ZERO-DIM: error: Dimension of 'entities' section of the vocabulary is zero
+; CHECK-NO-ENTITIES: error: Missing 'entities' section in vocabulary file
+; CHECK-INCONSISTENT-DIMS: error: All vectors in the 'entities' section of the vocabulary are not of the same dimension
diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/atomic-load-store-fp.ll b/llvm/test/CodeGen/RISCV/GlobalISel/atomic-load-store-fp.ll
index 4ad2d2c..4914357 100644
--- a/llvm/test/CodeGen/RISCV/GlobalISel/atomic-load-store-fp.ll
+++ b/llvm/test/CodeGen/RISCV/GlobalISel/atomic-load-store-fp.ll
@@ -23,6 +23,16 @@
; RUN: llc -mtriple=riscv64 -global-isel -mattr=+d,+a,+ztso -verify-machineinstrs < %s \
; RUN: | FileCheck -check-prefixes=RV64IA,RV64IA-TSO-TRAILING-FENCE %s
+; RUN: llc -mtriple=riscv32 -global-isel -mattr=+d,+a,+experimental-zalasr -verify-machineinstrs < %s \
+; RUN: | FileCheck -check-prefixes=RV32IA,RV32IA-ZALASR,RV32IA-ZALASR-WMO %s
+; RUN: llc -mtriple=riscv32 -global-isel -mattr=+d,+a,+experimental-zalasr,+ztso -verify-machineinstrs < %s \
+; RUN: | FileCheck -check-prefixes=RV32IA,RV32IA-ZALASR,RV32IA-ZALASR-TSO %s
+
+; RUN: llc -mtriple=riscv64 -global-isel -mattr=+d,+a,+experimental-zalasr -verify-machineinstrs < %s \
+; RUN: | FileCheck -check-prefixes=RV64IA,RV64IA-ZALASR,RV64IA-ZALASR-WMO %s
+; RUN: llc -mtriple=riscv64 -global-isel -mattr=+d,+a,+experimental-zalasr,+ztso -verify-machineinstrs < %s \
+; RUN: | FileCheck -check-prefixes=RV64IA,RV64IA-ZALASR,RV64IA-ZALASR-TSO %s
+
define float @atomic_load_f32_unordered(ptr %a) nounwind {
; RV32I-LABEL: atomic_load_f32_unordered:
@@ -171,6 +181,30 @@ define float @atomic_load_f32_acquire(ptr %a) nounwind {
; RV64IA-TSO-TRAILING-FENCE-NEXT: lw a0, 0(a0)
; RV64IA-TSO-TRAILING-FENCE-NEXT: fmv.w.x fa0, a0
; RV64IA-TSO-TRAILING-FENCE-NEXT: ret
+;
+; RV32IA-ZALASR-WMO-LABEL: atomic_load_f32_acquire:
+; RV32IA-ZALASR-WMO: # %bb.0:
+; RV32IA-ZALASR-WMO-NEXT: lw.aq a0, (a0)
+; RV32IA-ZALASR-WMO-NEXT: fmv.w.x fa0, a0
+; RV32IA-ZALASR-WMO-NEXT: ret
+;
+; RV32IA-ZALASR-TSO-LABEL: atomic_load_f32_acquire:
+; RV32IA-ZALASR-TSO: # %bb.0:
+; RV32IA-ZALASR-TSO-NEXT: lw a0, 0(a0)
+; RV32IA-ZALASR-TSO-NEXT: fmv.w.x fa0, a0
+; RV32IA-ZALASR-TSO-NEXT: ret
+;
+; RV64IA-ZALASR-WMO-LABEL: atomic_load_f32_acquire:
+; RV64IA-ZALASR-WMO: # %bb.0:
+; RV64IA-ZALASR-WMO-NEXT: lw.aq a0, (a0)
+; RV64IA-ZALASR-WMO-NEXT: fmv.w.x fa0, a0
+; RV64IA-ZALASR-WMO-NEXT: ret
+;
+; RV64IA-ZALASR-TSO-LABEL: atomic_load_f32_acquire:
+; RV64IA-ZALASR-TSO: # %bb.0:
+; RV64IA-ZALASR-TSO-NEXT: lw a0, 0(a0)
+; RV64IA-ZALASR-TSO-NEXT: fmv.w.x fa0, a0
+; RV64IA-ZALASR-TSO-NEXT: ret
%1 = load atomic float, ptr %a acquire, align 4
ret float %1
}
@@ -256,6 +290,18 @@ define float @atomic_load_f32_seq_cst(ptr %a) nounwind {
; RV64IA-TSO-TRAILING-FENCE-NEXT: lw a0, 0(a0)
; RV64IA-TSO-TRAILING-FENCE-NEXT: fmv.w.x fa0, a0
; RV64IA-TSO-TRAILING-FENCE-NEXT: ret
+;
+; RV32IA-ZALASR-LABEL: atomic_load_f32_seq_cst:
+; RV32IA-ZALASR: # %bb.0:
+; RV32IA-ZALASR-NEXT: lw.aq a0, (a0)
+; RV32IA-ZALASR-NEXT: fmv.w.x fa0, a0
+; RV32IA-ZALASR-NEXT: ret
+;
+; RV64IA-ZALASR-LABEL: atomic_load_f32_seq_cst:
+; RV64IA-ZALASR: # %bb.0:
+; RV64IA-ZALASR-NEXT: lw.aq a0, (a0)
+; RV64IA-ZALASR-NEXT: fmv.w.x fa0, a0
+; RV64IA-ZALASR-NEXT: ret
%1 = load atomic float, ptr %a seq_cst, align 4
ret float %1
}
@@ -414,6 +460,18 @@ define double @atomic_load_f64_acquire(ptr %a) nounwind {
; RV64IA-TSO-TRAILING-FENCE-NEXT: ld a0, 0(a0)
; RV64IA-TSO-TRAILING-FENCE-NEXT: fmv.d.x fa0, a0
; RV64IA-TSO-TRAILING-FENCE-NEXT: ret
+;
+; RV64IA-ZALASR-WMO-LABEL: atomic_load_f64_acquire:
+; RV64IA-ZALASR-WMO: # %bb.0:
+; RV64IA-ZALASR-WMO-NEXT: ld.aq a0, (a0)
+; RV64IA-ZALASR-WMO-NEXT: fmv.d.x fa0, a0
+; RV64IA-ZALASR-WMO-NEXT: ret
+;
+; RV64IA-ZALASR-TSO-LABEL: atomic_load_f64_acquire:
+; RV64IA-ZALASR-TSO: # %bb.0:
+; RV64IA-ZALASR-TSO-NEXT: ld a0, 0(a0)
+; RV64IA-ZALASR-TSO-NEXT: fmv.d.x fa0, a0
+; RV64IA-ZALASR-TSO-NEXT: ret
%1 = load atomic double, ptr %a acquire, align 8
ret double %1
}
@@ -484,6 +542,12 @@ define double @atomic_load_f64_seq_cst(ptr %a) nounwind {
; RV64IA-TSO-TRAILING-FENCE-NEXT: ld a0, 0(a0)
; RV64IA-TSO-TRAILING-FENCE-NEXT: fmv.d.x fa0, a0
; RV64IA-TSO-TRAILING-FENCE-NEXT: ret
+;
+; RV64IA-ZALASR-LABEL: atomic_load_f64_seq_cst:
+; RV64IA-ZALASR: # %bb.0:
+; RV64IA-ZALASR-NEXT: ld.aq a0, (a0)
+; RV64IA-ZALASR-NEXT: fmv.d.x fa0, a0
+; RV64IA-ZALASR-NEXT: ret
%1 = load atomic double, ptr %a seq_cst, align 8
ret double %1
}
@@ -635,6 +699,30 @@ define void @atomic_store_f32_release(ptr %a, float %b) nounwind {
; RV64IA-TSO-TRAILING-FENCE-NEXT: fmv.x.w a1, fa0
; RV64IA-TSO-TRAILING-FENCE-NEXT: sw a1, 0(a0)
; RV64IA-TSO-TRAILING-FENCE-NEXT: ret
+;
+; RV32IA-ZALASR-WMO-LABEL: atomic_store_f32_release:
+; RV32IA-ZALASR-WMO: # %bb.0:
+; RV32IA-ZALASR-WMO-NEXT: fmv.x.w a1, fa0
+; RV32IA-ZALASR-WMO-NEXT: sw.rl a1, (a0)
+; RV32IA-ZALASR-WMO-NEXT: ret
+;
+; RV32IA-ZALASR-TSO-LABEL: atomic_store_f32_release:
+; RV32IA-ZALASR-TSO: # %bb.0:
+; RV32IA-ZALASR-TSO-NEXT: fmv.x.w a1, fa0
+; RV32IA-ZALASR-TSO-NEXT: sw a1, 0(a0)
+; RV32IA-ZALASR-TSO-NEXT: ret
+;
+; RV64IA-ZALASR-WMO-LABEL: atomic_store_f32_release:
+; RV64IA-ZALASR-WMO: # %bb.0:
+; RV64IA-ZALASR-WMO-NEXT: fmv.x.w a1, fa0
+; RV64IA-ZALASR-WMO-NEXT: sw.rl a1, (a0)
+; RV64IA-ZALASR-WMO-NEXT: ret
+;
+; RV64IA-ZALASR-TSO-LABEL: atomic_store_f32_release:
+; RV64IA-ZALASR-TSO: # %bb.0:
+; RV64IA-ZALASR-TSO-NEXT: fmv.x.w a1, fa0
+; RV64IA-ZALASR-TSO-NEXT: sw a1, 0(a0)
+; RV64IA-ZALASR-TSO-NEXT: ret
store atomic float %b, ptr %a release, align 4
ret void
}
@@ -718,6 +806,18 @@ define void @atomic_store_f32_seq_cst(ptr %a, float %b) nounwind {
; RV64IA-TSO-TRAILING-FENCE-NEXT: sw a1, 0(a0)
; RV64IA-TSO-TRAILING-FENCE-NEXT: fence rw, rw
; RV64IA-TSO-TRAILING-FENCE-NEXT: ret
+;
+; RV32IA-ZALASR-LABEL: atomic_store_f32_seq_cst:
+; RV32IA-ZALASR: # %bb.0:
+; RV32IA-ZALASR-NEXT: fmv.x.w a1, fa0
+; RV32IA-ZALASR-NEXT: sw.rl a1, (a0)
+; RV32IA-ZALASR-NEXT: ret
+;
+; RV64IA-ZALASR-LABEL: atomic_store_f32_seq_cst:
+; RV64IA-ZALASR: # %bb.0:
+; RV64IA-ZALASR-NEXT: fmv.x.w a1, fa0
+; RV64IA-ZALASR-NEXT: sw.rl a1, (a0)
+; RV64IA-ZALASR-NEXT: ret
store atomic float %b, ptr %a seq_cst, align 4
ret void
}
@@ -876,6 +976,18 @@ define void @atomic_store_f64_release(ptr %a, double %b) nounwind {
; RV64IA-TSO-TRAILING-FENCE-NEXT: fmv.x.d a1, fa0
; RV64IA-TSO-TRAILING-FENCE-NEXT: sd a1, 0(a0)
; RV64IA-TSO-TRAILING-FENCE-NEXT: ret
+;
+; RV64IA-ZALASR-WMO-LABEL: atomic_store_f64_release:
+; RV64IA-ZALASR-WMO: # %bb.0:
+; RV64IA-ZALASR-WMO-NEXT: fmv.x.d a1, fa0
+; RV64IA-ZALASR-WMO-NEXT: sd.rl a1, (a0)
+; RV64IA-ZALASR-WMO-NEXT: ret
+;
+; RV64IA-ZALASR-TSO-LABEL: atomic_store_f64_release:
+; RV64IA-ZALASR-TSO: # %bb.0:
+; RV64IA-ZALASR-TSO-NEXT: fmv.x.d a1, fa0
+; RV64IA-ZALASR-TSO-NEXT: sd a1, 0(a0)
+; RV64IA-ZALASR-TSO-NEXT: ret
store atomic double %b, ptr %a release, align 8
ret void
}
@@ -945,6 +1057,12 @@ define void @atomic_store_f64_seq_cst(ptr %a, double %b) nounwind {
; RV64IA-TSO-TRAILING-FENCE-NEXT: sd a1, 0(a0)
; RV64IA-TSO-TRAILING-FENCE-NEXT: fence rw, rw
; RV64IA-TSO-TRAILING-FENCE-NEXT: ret
+;
+; RV64IA-ZALASR-LABEL: atomic_store_f64_seq_cst:
+; RV64IA-ZALASR: # %bb.0:
+; RV64IA-ZALASR-NEXT: fmv.x.d a1, fa0
+; RV64IA-ZALASR-NEXT: sd.rl a1, (a0)
+; RV64IA-ZALASR-NEXT: ret
store atomic double %b, ptr %a seq_cst, align 8
ret void
}
diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/atomic-cmpxchg-rv32.mir b/llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/atomic-cmpxchg-rv32.mir
index 74249c1..e2d3bff 100644
--- a/llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/atomic-cmpxchg-rv32.mir
+++ b/llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/atomic-cmpxchg-rv32.mir
@@ -17,7 +17,7 @@ body: |
; RV32IA-ZABHA-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x10
; RV32IA-ZABHA-NEXT: [[COPY1:%[0-9]+]]:gpr = COPY $x0
; RV32IA-ZABHA-NEXT: [[ADDI:%[0-9]+]]:gpr = ADDI $x0, 1
- ; RV32IA-ZABHA-NEXT: [[AMOCAS_B:%[0-9]+]]:gpr = AMOCAS_B [[COPY1]], [[COPY]], [[ADDI]] :: (load store monotonic (s8))
+ ; RV32IA-ZABHA-NEXT: [[AMOCAS_B:%[0-9]+]]:gpr = AMOCAS_B [[COPY1]], [[ADDI]], [[COPY]] :: (load store monotonic (s8))
; RV32IA-ZABHA-NEXT: $x10 = COPY [[AMOCAS_B]]
; RV32IA-ZABHA-NEXT: PseudoRET implicit $x10
%0:gpr(p0) = COPY $x10
@@ -42,7 +42,7 @@ body: |
; RV32IA-ZABHA-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x10
; RV32IA-ZABHA-NEXT: [[COPY1:%[0-9]+]]:gpr = COPY $x0
; RV32IA-ZABHA-NEXT: [[ADDI:%[0-9]+]]:gpr = ADDI $x0, 1
- ; RV32IA-ZABHA-NEXT: [[AMOCAS_H:%[0-9]+]]:gpr = AMOCAS_H [[COPY1]], [[COPY]], [[ADDI]] :: (load store monotonic (s16))
+ ; RV32IA-ZABHA-NEXT: [[AMOCAS_H:%[0-9]+]]:gpr = AMOCAS_H [[COPY1]], [[ADDI]], [[COPY]] :: (load store monotonic (s16))
; RV32IA-ZABHA-NEXT: $x10 = COPY [[AMOCAS_H]]
; RV32IA-ZABHA-NEXT: PseudoRET implicit $x10
%0:gpr(p0) = COPY $x10
@@ -67,7 +67,7 @@ body: |
; RV32IA-ZABHA-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x10
; RV32IA-ZABHA-NEXT: [[COPY1:%[0-9]+]]:gpr = COPY $x0
; RV32IA-ZABHA-NEXT: [[ADDI:%[0-9]+]]:gpr = ADDI $x0, 1
- ; RV32IA-ZABHA-NEXT: [[AMOCAS_W:%[0-9]+]]:gpr = AMOCAS_W [[COPY1]], [[COPY]], [[ADDI]] :: (load store monotonic (s32))
+ ; RV32IA-ZABHA-NEXT: [[AMOCAS_W:%[0-9]+]]:gpr = AMOCAS_W [[COPY1]], [[ADDI]], [[COPY]] :: (load store monotonic (s32))
; RV32IA-ZABHA-NEXT: $x10 = COPY [[AMOCAS_W]]
; RV32IA-ZABHA-NEXT: PseudoRET implicit $x10
%0:gpr(p0) = COPY $x10
@@ -92,7 +92,7 @@ body: |
; RV32IA-ZABHA-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x10
; RV32IA-ZABHA-NEXT: [[COPY1:%[0-9]+]]:gpr = COPY $x0
; RV32IA-ZABHA-NEXT: [[ADDI:%[0-9]+]]:gpr = ADDI $x0, 1
- ; RV32IA-ZABHA-NEXT: [[AMOCAS_W:%[0-9]+]]:gpr = AMOCAS_W [[COPY1]], [[COPY]], [[ADDI]] :: (load store monotonic (s32))
+ ; RV32IA-ZABHA-NEXT: [[AMOCAS_W:%[0-9]+]]:gpr = AMOCAS_W [[COPY1]], [[ADDI]], [[COPY]] :: (load store monotonic (s32))
; RV32IA-ZABHA-NEXT: [[SLTIU:%[0-9]+]]:gpr = SLTIU [[AMOCAS_W]], 1
; RV32IA-ZABHA-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $x2, implicit $x2
; RV32IA-ZABHA-NEXT: $x10 = COPY [[AMOCAS_W]]
diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/atomic-cmpxchg-rv64.mir b/llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/atomic-cmpxchg-rv64.mir
index a2f7e30..ab537ea 100644
--- a/llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/atomic-cmpxchg-rv64.mir
+++ b/llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/atomic-cmpxchg-rv64.mir
@@ -17,7 +17,7 @@ body: |
; RV64IA-ZABHA-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x10
; RV64IA-ZABHA-NEXT: [[COPY1:%[0-9]+]]:gpr = COPY $x0
; RV64IA-ZABHA-NEXT: [[ADDI:%[0-9]+]]:gpr = ADDI $x0, 1
- ; RV64IA-ZABHA-NEXT: [[AMOCAS_B:%[0-9]+]]:gpr = AMOCAS_B [[COPY1]], [[COPY]], [[ADDI]] :: (load store monotonic (s8))
+ ; RV64IA-ZABHA-NEXT: [[AMOCAS_B:%[0-9]+]]:gpr = AMOCAS_B [[COPY1]], [[ADDI]], [[COPY]] :: (load store monotonic (s8))
; RV64IA-ZABHA-NEXT: $x10 = COPY [[AMOCAS_B]]
; RV64IA-ZABHA-NEXT: PseudoRET implicit $x10
%0:gpr(p0) = COPY $x10
@@ -42,7 +42,7 @@ body: |
; RV64IA-ZABHA-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x10
; RV64IA-ZABHA-NEXT: [[COPY1:%[0-9]+]]:gpr = COPY $x0
; RV64IA-ZABHA-NEXT: [[ADDI:%[0-9]+]]:gpr = ADDI $x0, 1
- ; RV64IA-ZABHA-NEXT: [[AMOCAS_H:%[0-9]+]]:gpr = AMOCAS_H [[COPY1]], [[COPY]], [[ADDI]] :: (load store monotonic (s16))
+ ; RV64IA-ZABHA-NEXT: [[AMOCAS_H:%[0-9]+]]:gpr = AMOCAS_H [[COPY1]], [[ADDI]], [[COPY]] :: (load store monotonic (s16))
; RV64IA-ZABHA-NEXT: $x10 = COPY [[AMOCAS_H]]
; RV64IA-ZABHA-NEXT: PseudoRET implicit $x10
%0:gpr(p0) = COPY $x10
@@ -67,7 +67,7 @@ body: |
; RV64IA-ZABHA-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x10
; RV64IA-ZABHA-NEXT: [[COPY1:%[0-9]+]]:gpr = COPY $x0
; RV64IA-ZABHA-NEXT: [[ADDI:%[0-9]+]]:gpr = ADDI $x0, 1
- ; RV64IA-ZABHA-NEXT: [[AMOCAS_W:%[0-9]+]]:gpr = AMOCAS_W [[COPY1]], [[COPY]], [[ADDI]] :: (load store monotonic (s32))
+ ; RV64IA-ZABHA-NEXT: [[AMOCAS_W:%[0-9]+]]:gpr = AMOCAS_W [[COPY1]], [[ADDI]], [[COPY]] :: (load store monotonic (s32))
; RV64IA-ZABHA-NEXT: $x10 = COPY [[AMOCAS_W]]
; RV64IA-ZABHA-NEXT: PseudoRET implicit $x10
%0:gpr(p0) = COPY $x10
@@ -92,7 +92,7 @@ body: |
; RV64IA-ZABHA-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x10
; RV64IA-ZABHA-NEXT: [[COPY1:%[0-9]+]]:gpr = COPY $x0
; RV64IA-ZABHA-NEXT: [[ADDI:%[0-9]+]]:gpr = ADDI $x0, 1
- ; RV64IA-ZABHA-NEXT: [[AMOCAS_D_RV64_:%[0-9]+]]:gpr = AMOCAS_D_RV64 [[COPY1]], [[COPY]], [[ADDI]] :: (load store monotonic (s64))
+ ; RV64IA-ZABHA-NEXT: [[AMOCAS_D_RV64_:%[0-9]+]]:gpr = AMOCAS_D_RV64 [[COPY1]], [[ADDI]], [[COPY]] :: (load store monotonic (s64))
; RV64IA-ZABHA-NEXT: $x10 = COPY [[AMOCAS_D_RV64_]]
; RV64IA-ZABHA-NEXT: PseudoRET implicit $x10
%0:gpr(p0) = COPY $x10
@@ -116,7 +116,7 @@ body: |
; RV64IA-ZABHA-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x10
; RV64IA-ZABHA-NEXT: [[COPY1:%[0-9]+]]:gpr = COPY $x0
; RV64IA-ZABHA-NEXT: [[ADDI:%[0-9]+]]:gpr = ADDI $x0, 1
- ; RV64IA-ZABHA-NEXT: [[AMOCAS_D_RV64_:%[0-9]+]]:gpr = AMOCAS_D_RV64 [[COPY1]], [[COPY]], [[ADDI]] :: (load store monotonic (s64))
+ ; RV64IA-ZABHA-NEXT: [[AMOCAS_D_RV64_:%[0-9]+]]:gpr = AMOCAS_D_RV64 [[COPY1]], [[ADDI]], [[COPY]] :: (load store monotonic (s64))
; RV64IA-ZABHA-NEXT: [[SLTIU:%[0-9]+]]:gpr = SLTIU [[AMOCAS_D_RV64_]], 1
; RV64IA-ZABHA-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $x2, implicit $x2
; RV64IA-ZABHA-NEXT: $x10 = COPY [[AMOCAS_D_RV64_]]
diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/atomicrmw-add-sub-rv32.mir b/llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/atomicrmw-add-sub-rv32.mir
index f7fdc33..e547972 100644
--- a/llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/atomicrmw-add-sub-rv32.mir
+++ b/llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/atomicrmw-add-sub-rv32.mir
@@ -15,7 +15,7 @@ body: |
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x10
; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr = COPY $x11
- ; CHECK-NEXT: [[AMOADD_B:%[0-9]+]]:gpr = AMOADD_B [[COPY]], [[COPY1]] :: (load store monotonic (s8))
+ ; CHECK-NEXT: [[AMOADD_B:%[0-9]+]]:gpr = AMOADD_B [[COPY1]], [[COPY]] :: (load store monotonic (s8))
; CHECK-NEXT: $x10 = COPY [[AMOADD_B]]
; CHECK-NEXT: PseudoRET implicit $x10
%0:gprb(p0) = COPY $x10
@@ -38,7 +38,7 @@ body: |
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x10
; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr = COPY $x11
- ; CHECK-NEXT: [[AMOADD_H:%[0-9]+]]:gpr = AMOADD_H [[COPY]], [[COPY1]] :: (load store monotonic (s16))
+ ; CHECK-NEXT: [[AMOADD_H:%[0-9]+]]:gpr = AMOADD_H [[COPY1]], [[COPY]] :: (load store monotonic (s16))
; CHECK-NEXT: $x10 = COPY [[AMOADD_H]]
; CHECK-NEXT: PseudoRET implicit $x10
%0:gprb(p0) = COPY $x10
@@ -61,7 +61,7 @@ body: |
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x10
; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr = COPY $x11
- ; CHECK-NEXT: [[AMOADD_W:%[0-9]+]]:gpr = AMOADD_W [[COPY]], [[COPY1]] :: (load store monotonic (s32))
+ ; CHECK-NEXT: [[AMOADD_W:%[0-9]+]]:gpr = AMOADD_W [[COPY1]], [[COPY]] :: (load store monotonic (s32))
; CHECK-NEXT: $x10 = COPY [[AMOADD_W]]
; CHECK-NEXT: PseudoRET implicit $x10
%0:gprb(p0) = COPY $x10
@@ -86,7 +86,7 @@ body: |
; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr = COPY $x11
; CHECK-NEXT: [[COPY2:%[0-9]+]]:gpr = COPY $x0
; CHECK-NEXT: [[SUB:%[0-9]+]]:gpr = SUB [[COPY2]], [[COPY1]]
- ; CHECK-NEXT: [[AMOADD_B:%[0-9]+]]:gpr = AMOADD_B [[COPY]], [[SUB]] :: (load store monotonic (s8))
+ ; CHECK-NEXT: [[AMOADD_B:%[0-9]+]]:gpr = AMOADD_B [[SUB]], [[COPY]] :: (load store monotonic (s8))
; CHECK-NEXT: $x10 = COPY [[AMOADD_B]]
; CHECK-NEXT: PseudoRET implicit $x10
%0:gprb(p0) = COPY $x10
@@ -113,7 +113,7 @@ body: |
; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr = COPY $x11
; CHECK-NEXT: [[COPY2:%[0-9]+]]:gpr = COPY $x0
; CHECK-NEXT: [[SUB:%[0-9]+]]:gpr = SUB [[COPY2]], [[COPY1]]
- ; CHECK-NEXT: [[AMOADD_H:%[0-9]+]]:gpr = AMOADD_H [[COPY]], [[SUB]] :: (load store monotonic (s16))
+ ; CHECK-NEXT: [[AMOADD_H:%[0-9]+]]:gpr = AMOADD_H [[SUB]], [[COPY]] :: (load store monotonic (s16))
; CHECK-NEXT: $x10 = COPY [[AMOADD_H]]
; CHECK-NEXT: PseudoRET implicit $x10
%0:gprb(p0) = COPY $x10
@@ -140,7 +140,7 @@ body: |
; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr = COPY $x11
; CHECK-NEXT: [[COPY2:%[0-9]+]]:gpr = COPY $x0
; CHECK-NEXT: [[SUB:%[0-9]+]]:gpr = SUB [[COPY2]], [[COPY1]]
- ; CHECK-NEXT: [[AMOADD_B:%[0-9]+]]:gpr = AMOADD_B [[COPY]], [[SUB]] :: (load store monotonic (s8))
+ ; CHECK-NEXT: [[AMOADD_B:%[0-9]+]]:gpr = AMOADD_B [[SUB]], [[COPY]] :: (load store monotonic (s8))
; CHECK-NEXT: $x10 = COPY [[AMOADD_B]]
; CHECK-NEXT: PseudoRET implicit $x10
%0:gprb(p0) = COPY $x10
diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/atomicrmw-add-sub-rv64.mir b/llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/atomicrmw-add-sub-rv64.mir
index 178586c..f34826c 100644
--- a/llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/atomicrmw-add-sub-rv64.mir
+++ b/llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/atomicrmw-add-sub-rv64.mir
@@ -15,7 +15,7 @@ body: |
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x10
; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr = COPY $x11
- ; CHECK-NEXT: [[AMOADD_B:%[0-9]+]]:gpr = AMOADD_B [[COPY]], [[COPY1]] :: (load store monotonic (s8))
+ ; CHECK-NEXT: [[AMOADD_B:%[0-9]+]]:gpr = AMOADD_B [[COPY1]], [[COPY]] :: (load store monotonic (s8))
; CHECK-NEXT: $x10 = COPY [[AMOADD_B]]
; CHECK-NEXT: PseudoRET implicit $x10
%0:gprb(p0) = COPY $x10
@@ -38,7 +38,7 @@ body: |
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x10
; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr = COPY $x11
- ; CHECK-NEXT: [[AMOADD_H:%[0-9]+]]:gpr = AMOADD_H [[COPY]], [[COPY1]] :: (load store monotonic (s16))
+ ; CHECK-NEXT: [[AMOADD_H:%[0-9]+]]:gpr = AMOADD_H [[COPY1]], [[COPY]] :: (load store monotonic (s16))
; CHECK-NEXT: $x10 = COPY [[AMOADD_H]]
; CHECK-NEXT: PseudoRET implicit $x10
%0:gprb(p0) = COPY $x10
@@ -61,7 +61,7 @@ body: |
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x10
; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr = COPY $x11
- ; CHECK-NEXT: [[AMOADD_W:%[0-9]+]]:gpr = AMOADD_W [[COPY]], [[COPY1]] :: (load store monotonic (s32))
+ ; CHECK-NEXT: [[AMOADD_W:%[0-9]+]]:gpr = AMOADD_W [[COPY1]], [[COPY]] :: (load store monotonic (s32))
; CHECK-NEXT: $x10 = COPY [[AMOADD_W]]
; CHECK-NEXT: PseudoRET implicit $x10
%0:gprb(p0) = COPY $x10
@@ -84,7 +84,7 @@ body: |
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x10
; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr = COPY $x11
- ; CHECK-NEXT: [[AMOADD_D:%[0-9]+]]:gpr = AMOADD_D [[COPY]], [[COPY1]] :: (load store monotonic (s64))
+ ; CHECK-NEXT: [[AMOADD_D:%[0-9]+]]:gpr = AMOADD_D [[COPY1]], [[COPY]] :: (load store monotonic (s64))
; CHECK-NEXT: $x10 = COPY [[AMOADD_D]]
; CHECK-NEXT: PseudoRET implicit $x10
%0:gprb(p0) = COPY $x10
@@ -109,7 +109,7 @@ body: |
; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr = COPY $x11
; CHECK-NEXT: [[COPY2:%[0-9]+]]:gpr = COPY $x0
; CHECK-NEXT: [[SUB:%[0-9]+]]:gpr = SUB [[COPY2]], [[COPY1]]
- ; CHECK-NEXT: [[AMOADD_B:%[0-9]+]]:gpr = AMOADD_B [[COPY]], [[SUB]] :: (load store monotonic (s8))
+ ; CHECK-NEXT: [[AMOADD_B:%[0-9]+]]:gpr = AMOADD_B [[SUB]], [[COPY]] :: (load store monotonic (s8))
; CHECK-NEXT: $x10 = COPY [[AMOADD_B]]
; CHECK-NEXT: PseudoRET implicit $x10
%0:gprb(p0) = COPY $x10
@@ -136,7 +136,7 @@ body: |
; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr = COPY $x11
; CHECK-NEXT: [[COPY2:%[0-9]+]]:gpr = COPY $x0
; CHECK-NEXT: [[SUB:%[0-9]+]]:gpr = SUB [[COPY2]], [[COPY1]]
- ; CHECK-NEXT: [[AMOADD_H:%[0-9]+]]:gpr = AMOADD_H [[COPY]], [[SUB]] :: (load store monotonic (s16))
+ ; CHECK-NEXT: [[AMOADD_H:%[0-9]+]]:gpr = AMOADD_H [[SUB]], [[COPY]] :: (load store monotonic (s16))
; CHECK-NEXT: $x10 = COPY [[AMOADD_H]]
; CHECK-NEXT: PseudoRET implicit $x10
%0:gprb(p0) = COPY $x10
@@ -163,7 +163,7 @@ body: |
; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr = COPY $x11
; CHECK-NEXT: [[COPY2:%[0-9]+]]:gpr = COPY $x0
; CHECK-NEXT: [[SUB:%[0-9]+]]:gpr = SUB [[COPY2]], [[COPY1]]
- ; CHECK-NEXT: [[AMOADD_W:%[0-9]+]]:gpr = AMOADD_W [[COPY]], [[SUB]] :: (load store monotonic (s32))
+ ; CHECK-NEXT: [[AMOADD_W:%[0-9]+]]:gpr = AMOADD_W [[SUB]], [[COPY]] :: (load store monotonic (s32))
; CHECK-NEXT: $x10 = COPY [[AMOADD_W]]
; CHECK-NEXT: PseudoRET implicit $x10
%0:gprb(p0) = COPY $x10
@@ -190,7 +190,7 @@ body: |
; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr = COPY $x11
; CHECK-NEXT: [[COPY2:%[0-9]+]]:gpr = COPY $x0
; CHECK-NEXT: [[SUB:%[0-9]+]]:gpr = SUB [[COPY2]], [[COPY1]]
- ; CHECK-NEXT: [[AMOADD_B:%[0-9]+]]:gpr = AMOADD_B [[COPY]], [[SUB]] :: (load store monotonic (s8))
+ ; CHECK-NEXT: [[AMOADD_B:%[0-9]+]]:gpr = AMOADD_B [[SUB]], [[COPY]] :: (load store monotonic (s8))
; CHECK-NEXT: $x10 = COPY [[AMOADD_B]]
; CHECK-NEXT: PseudoRET implicit $x10
%0:gprb(p0) = COPY $x10
diff --git a/llvm/test/CodeGen/RISCV/atomic-rmw.ll b/llvm/test/CodeGen/RISCV/atomic-rmw.ll
index b0510f8..1213256 100644
--- a/llvm/test/CodeGen/RISCV/atomic-rmw.ll
+++ b/llvm/test/CodeGen/RISCV/atomic-rmw.ll
@@ -21,10 +21,19 @@
; RUN: llc -mtriple=riscv64 -mattr=+a,+ztso,+zacas -verify-machineinstrs < %s \
; RUN: | FileCheck -check-prefixes=RV64IA,RV64IA-ZACAS,RV64IA-TSO,RV64IA-TSO-ZACAS %s
+; RUN: llc -mtriple=riscv32 -mattr=+a,+zabha -verify-machineinstrs < %s \
+; RUN: | FileCheck -check-prefixes=RV32IA,RV32IA-WMO,RV32IA-WMO-ZABHA,RV32IA-WMO-ZABHA-NOZACAS %s
+; RUN: llc -mtriple=riscv32 -mattr=+a,+ztso,+zabha -verify-machineinstrs < %s \
+; RUN: | FileCheck -check-prefixes=RV32IA,RV32IA-TSO,RV32IA-TSO-ZABHA,RV32IA-TSO-ZABHA-NOZACAS %s
; RUN: llc -mtriple=riscv64 -mattr=+a,+zabha -verify-machineinstrs < %s \
; RUN: | FileCheck -check-prefixes=RV64IA,RV64IA-WMO,RV64IA-WMO-ZABHA,RV64IA-WMO-ZABHA-NOZACAS %s
; RUN: llc -mtriple=riscv64 -mattr=+a,+ztso,+zabha -verify-machineinstrs < %s \
; RUN: | FileCheck -check-prefixes=RV64IA,RV64IA-TSO,RV64IA-TSO-ZABHA,RV64IA-TSO-ZABHA-NOZACAS %s
+
+; RUN: llc -mtriple=riscv32 -mattr=+a,+zabha,+zacas -verify-machineinstrs < %s \
+; RUN: | FileCheck -check-prefixes=RV32IA,RV32IA-WMO,RV32IA-WMO-ZABHA,RV32IA-WMO-ZABHA-ZACAS %s
+; RUN: llc -mtriple=riscv32 -mattr=+a,+ztso,+zabha,+zacas -verify-machineinstrs < %s \
+; RUN: | FileCheck -check-prefixes=RV32IA,RV32IA-TSO,RV32IA-TSO-ZABHA,RV32IA-TSO-ZABHA-ZACAS %s
; RUN: llc -mtriple=riscv64 -mattr=+a,+zabha,+zacas -verify-machineinstrs < %s \
; RUN: | FileCheck -check-prefixes=RV64IA,RV64IA-WMO,RV64IA-WMO-ZABHA,RV64IA-WMO-ZABHA-ZACAS %s
; RUN: llc -mtriple=riscv64 -mattr=+a,+ztso,+zabha,+zacas -verify-machineinstrs < %s \
@@ -41,25 +50,25 @@ define i8 @atomicrmw_xchg_i8_monotonic(ptr %a, i8 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
-; RV32IA-LABEL: atomicrmw_xchg_i8_monotonic:
-; RV32IA: # %bb.0:
-; RV32IA-NEXT: andi a2, a0, -4
-; RV32IA-NEXT: slli a0, a0, 3
-; RV32IA-NEXT: li a3, 255
-; RV32IA-NEXT: zext.b a1, a1
-; RV32IA-NEXT: sll a3, a3, a0
-; RV32IA-NEXT: sll a1, a1, a0
-; RV32IA-NEXT: .LBB0_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-NEXT: lr.w a4, (a2)
-; RV32IA-NEXT: mv a5, a1
-; RV32IA-NEXT: xor a5, a4, a5
-; RV32IA-NEXT: and a5, a5, a3
-; RV32IA-NEXT: xor a5, a4, a5
-; RV32IA-NEXT: sc.w a5, a5, (a2)
-; RV32IA-NEXT: bnez a5, .LBB0_1
-; RV32IA-NEXT: # %bb.2:
-; RV32IA-NEXT: srl a0, a4, a0
-; RV32IA-NEXT: ret
+; RV32IA-NOZACAS-LABEL: atomicrmw_xchg_i8_monotonic:
+; RV32IA-NOZACAS: # %bb.0:
+; RV32IA-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-NOZACAS-NEXT: li a3, 255
+; RV32IA-NOZACAS-NEXT: zext.b a1, a1
+; RV32IA-NOZACAS-NEXT: sll a3, a3, a0
+; RV32IA-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-NOZACAS-NEXT: .LBB0_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-NOZACAS-NEXT: lr.w a4, (a2)
+; RV32IA-NOZACAS-NEXT: mv a5, a1
+; RV32IA-NOZACAS-NEXT: xor a5, a4, a5
+; RV32IA-NOZACAS-NEXT: and a5, a5, a3
+; RV32IA-NOZACAS-NEXT: xor a5, a4, a5
+; RV32IA-NOZACAS-NEXT: sc.w a5, a5, (a2)
+; RV32IA-NOZACAS-NEXT: bnez a5, .LBB0_1
+; RV32IA-NOZACAS-NEXT: # %bb.2:
+; RV32IA-NOZACAS-NEXT: srl a0, a4, a0
+; RV32IA-NOZACAS-NEXT: ret
;
; RV64I-LABEL: atomicrmw_xchg_i8_monotonic:
; RV64I: # %bb.0:
@@ -91,6 +100,26 @@ define i8 @atomicrmw_xchg_i8_monotonic(ptr %a, i8 %b) nounwind {
; RV64IA-NOZACAS-NEXT: srlw a0, a4, a0
; RV64IA-NOZACAS-NEXT: ret
;
+; RV32IA-ZACAS-LABEL: atomicrmw_xchg_i8_monotonic:
+; RV32IA-ZACAS: # %bb.0:
+; RV32IA-ZACAS-NEXT: andi a2, a0, -4
+; RV32IA-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-ZACAS-NEXT: li a3, 255
+; RV32IA-ZACAS-NEXT: zext.b a1, a1
+; RV32IA-ZACAS-NEXT: sll a3, a3, a0
+; RV32IA-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-ZACAS-NEXT: .LBB0_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-ZACAS-NEXT: lr.w a4, (a2)
+; RV32IA-ZACAS-NEXT: mv a5, a1
+; RV32IA-ZACAS-NEXT: xor a5, a4, a5
+; RV32IA-ZACAS-NEXT: and a5, a5, a3
+; RV32IA-ZACAS-NEXT: xor a5, a4, a5
+; RV32IA-ZACAS-NEXT: sc.w a5, a5, (a2)
+; RV32IA-ZACAS-NEXT: bnez a5, .LBB0_1
+; RV32IA-ZACAS-NEXT: # %bb.2:
+; RV32IA-ZACAS-NEXT: srl a0, a4, a0
+; RV32IA-ZACAS-NEXT: ret
+;
; RV64IA-ZACAS-LABEL: atomicrmw_xchg_i8_monotonic:
; RV64IA-ZACAS: # %bb.0:
; RV64IA-ZACAS-NEXT: andi a2, a0, -4
@@ -111,6 +140,16 @@ define i8 @atomicrmw_xchg_i8_monotonic(ptr %a, i8 %b) nounwind {
; RV64IA-ZACAS-NEXT: srlw a0, a4, a0
; RV64IA-ZACAS-NEXT: ret
;
+; RV32IA-WMO-ZABHA-LABEL: atomicrmw_xchg_i8_monotonic:
+; RV32IA-WMO-ZABHA: # %bb.0:
+; RV32IA-WMO-ZABHA-NEXT: amoswap.b a0, a1, (a0)
+; RV32IA-WMO-ZABHA-NEXT: ret
+;
+; RV32IA-TSO-ZABHA-LABEL: atomicrmw_xchg_i8_monotonic:
+; RV32IA-TSO-ZABHA: # %bb.0:
+; RV32IA-TSO-ZABHA-NEXT: amoswap.b a0, a1, (a0)
+; RV32IA-TSO-ZABHA-NEXT: ret
+;
; RV64IA-WMO-ZABHA-LABEL: atomicrmw_xchg_i8_monotonic:
; RV64IA-WMO-ZABHA: # %bb.0:
; RV64IA-WMO-ZABHA-NEXT: amoswap.b a0, a1, (a0)
@@ -135,45 +174,45 @@ define i8 @atomicrmw_xchg_i8_acquire(ptr %a, i8 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
-; RV32IA-WMO-LABEL: atomicrmw_xchg_i8_acquire:
-; RV32IA-WMO: # %bb.0:
-; RV32IA-WMO-NEXT: andi a2, a0, -4
-; RV32IA-WMO-NEXT: slli a0, a0, 3
-; RV32IA-WMO-NEXT: li a3, 255
-; RV32IA-WMO-NEXT: zext.b a1, a1
-; RV32IA-WMO-NEXT: sll a3, a3, a0
-; RV32IA-WMO-NEXT: sll a1, a1, a0
-; RV32IA-WMO-NEXT: .LBB1_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-WMO-NEXT: lr.w.aq a4, (a2)
-; RV32IA-WMO-NEXT: mv a5, a1
-; RV32IA-WMO-NEXT: xor a5, a4, a5
-; RV32IA-WMO-NEXT: and a5, a5, a3
-; RV32IA-WMO-NEXT: xor a5, a4, a5
-; RV32IA-WMO-NEXT: sc.w a5, a5, (a2)
-; RV32IA-WMO-NEXT: bnez a5, .LBB1_1
-; RV32IA-WMO-NEXT: # %bb.2:
-; RV32IA-WMO-NEXT: srl a0, a4, a0
-; RV32IA-WMO-NEXT: ret
+; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_xchg_i8_acquire:
+; RV32IA-WMO-NOZACAS: # %bb.0:
+; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-WMO-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-NOZACAS-NEXT: li a3, 255
+; RV32IA-WMO-NOZACAS-NEXT: zext.b a1, a1
+; RV32IA-WMO-NOZACAS-NEXT: sll a3, a3, a0
+; RV32IA-WMO-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-WMO-NOZACAS-NEXT: .LBB1_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-NOZACAS-NEXT: lr.w.aq a4, (a2)
+; RV32IA-WMO-NOZACAS-NEXT: mv a5, a1
+; RV32IA-WMO-NOZACAS-NEXT: xor a5, a4, a5
+; RV32IA-WMO-NOZACAS-NEXT: and a5, a5, a3
+; RV32IA-WMO-NOZACAS-NEXT: xor a5, a4, a5
+; RV32IA-WMO-NOZACAS-NEXT: sc.w a5, a5, (a2)
+; RV32IA-WMO-NOZACAS-NEXT: bnez a5, .LBB1_1
+; RV32IA-WMO-NOZACAS-NEXT: # %bb.2:
+; RV32IA-WMO-NOZACAS-NEXT: srl a0, a4, a0
+; RV32IA-WMO-NOZACAS-NEXT: ret
;
-; RV32IA-TSO-LABEL: atomicrmw_xchg_i8_acquire:
-; RV32IA-TSO: # %bb.0:
-; RV32IA-TSO-NEXT: andi a2, a0, -4
-; RV32IA-TSO-NEXT: slli a0, a0, 3
-; RV32IA-TSO-NEXT: li a3, 255
-; RV32IA-TSO-NEXT: zext.b a1, a1
-; RV32IA-TSO-NEXT: sll a3, a3, a0
-; RV32IA-TSO-NEXT: sll a1, a1, a0
-; RV32IA-TSO-NEXT: .LBB1_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-TSO-NEXT: lr.w a4, (a2)
-; RV32IA-TSO-NEXT: mv a5, a1
-; RV32IA-TSO-NEXT: xor a5, a4, a5
-; RV32IA-TSO-NEXT: and a5, a5, a3
-; RV32IA-TSO-NEXT: xor a5, a4, a5
-; RV32IA-TSO-NEXT: sc.w a5, a5, (a2)
-; RV32IA-TSO-NEXT: bnez a5, .LBB1_1
-; RV32IA-TSO-NEXT: # %bb.2:
-; RV32IA-TSO-NEXT: srl a0, a4, a0
-; RV32IA-TSO-NEXT: ret
+; RV32IA-TSO-NOZACAS-LABEL: atomicrmw_xchg_i8_acquire:
+; RV32IA-TSO-NOZACAS: # %bb.0:
+; RV32IA-TSO-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-TSO-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-NOZACAS-NEXT: li a3, 255
+; RV32IA-TSO-NOZACAS-NEXT: zext.b a1, a1
+; RV32IA-TSO-NOZACAS-NEXT: sll a3, a3, a0
+; RV32IA-TSO-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-TSO-NOZACAS-NEXT: .LBB1_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-NOZACAS-NEXT: lr.w a4, (a2)
+; RV32IA-TSO-NOZACAS-NEXT: mv a5, a1
+; RV32IA-TSO-NOZACAS-NEXT: xor a5, a4, a5
+; RV32IA-TSO-NOZACAS-NEXT: and a5, a5, a3
+; RV32IA-TSO-NOZACAS-NEXT: xor a5, a4, a5
+; RV32IA-TSO-NOZACAS-NEXT: sc.w a5, a5, (a2)
+; RV32IA-TSO-NOZACAS-NEXT: bnez a5, .LBB1_1
+; RV32IA-TSO-NOZACAS-NEXT: # %bb.2:
+; RV32IA-TSO-NOZACAS-NEXT: srl a0, a4, a0
+; RV32IA-TSO-NOZACAS-NEXT: ret
;
; RV64I-LABEL: atomicrmw_xchg_i8_acquire:
; RV64I: # %bb.0:
@@ -225,6 +264,46 @@ define i8 @atomicrmw_xchg_i8_acquire(ptr %a, i8 %b) nounwind {
; RV64IA-TSO-NOZACAS-NEXT: srlw a0, a4, a0
; RV64IA-TSO-NOZACAS-NEXT: ret
;
+; RV32IA-WMO-ZACAS-LABEL: atomicrmw_xchg_i8_acquire:
+; RV32IA-WMO-ZACAS: # %bb.0:
+; RV32IA-WMO-ZACAS-NEXT: andi a2, a0, -4
+; RV32IA-WMO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-ZACAS-NEXT: li a3, 255
+; RV32IA-WMO-ZACAS-NEXT: zext.b a1, a1
+; RV32IA-WMO-ZACAS-NEXT: sll a3, a3, a0
+; RV32IA-WMO-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-WMO-ZACAS-NEXT: .LBB1_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-ZACAS-NEXT: lr.w.aq a4, (a2)
+; RV32IA-WMO-ZACAS-NEXT: mv a5, a1
+; RV32IA-WMO-ZACAS-NEXT: xor a5, a4, a5
+; RV32IA-WMO-ZACAS-NEXT: and a5, a5, a3
+; RV32IA-WMO-ZACAS-NEXT: xor a5, a4, a5
+; RV32IA-WMO-ZACAS-NEXT: sc.w a5, a5, (a2)
+; RV32IA-WMO-ZACAS-NEXT: bnez a5, .LBB1_1
+; RV32IA-WMO-ZACAS-NEXT: # %bb.2:
+; RV32IA-WMO-ZACAS-NEXT: srl a0, a4, a0
+; RV32IA-WMO-ZACAS-NEXT: ret
+;
+; RV32IA-TSO-ZACAS-LABEL: atomicrmw_xchg_i8_acquire:
+; RV32IA-TSO-ZACAS: # %bb.0:
+; RV32IA-TSO-ZACAS-NEXT: andi a2, a0, -4
+; RV32IA-TSO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-ZACAS-NEXT: li a3, 255
+; RV32IA-TSO-ZACAS-NEXT: zext.b a1, a1
+; RV32IA-TSO-ZACAS-NEXT: sll a3, a3, a0
+; RV32IA-TSO-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-TSO-ZACAS-NEXT: .LBB1_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-ZACAS-NEXT: lr.w a4, (a2)
+; RV32IA-TSO-ZACAS-NEXT: mv a5, a1
+; RV32IA-TSO-ZACAS-NEXT: xor a5, a4, a5
+; RV32IA-TSO-ZACAS-NEXT: and a5, a5, a3
+; RV32IA-TSO-ZACAS-NEXT: xor a5, a4, a5
+; RV32IA-TSO-ZACAS-NEXT: sc.w a5, a5, (a2)
+; RV32IA-TSO-ZACAS-NEXT: bnez a5, .LBB1_1
+; RV32IA-TSO-ZACAS-NEXT: # %bb.2:
+; RV32IA-TSO-ZACAS-NEXT: srl a0, a4, a0
+; RV32IA-TSO-ZACAS-NEXT: ret
+;
; RV64IA-WMO-ZACAS-LABEL: atomicrmw_xchg_i8_acquire:
; RV64IA-WMO-ZACAS: # %bb.0:
; RV64IA-WMO-ZACAS-NEXT: andi a2, a0, -4
@@ -265,6 +344,16 @@ define i8 @atomicrmw_xchg_i8_acquire(ptr %a, i8 %b) nounwind {
; RV64IA-TSO-ZACAS-NEXT: srlw a0, a4, a0
; RV64IA-TSO-ZACAS-NEXT: ret
;
+; RV32IA-WMO-ZABHA-LABEL: atomicrmw_xchg_i8_acquire:
+; RV32IA-WMO-ZABHA: # %bb.0:
+; RV32IA-WMO-ZABHA-NEXT: amoswap.b.aq a0, a1, (a0)
+; RV32IA-WMO-ZABHA-NEXT: ret
+;
+; RV32IA-TSO-ZABHA-LABEL: atomicrmw_xchg_i8_acquire:
+; RV32IA-TSO-ZABHA: # %bb.0:
+; RV32IA-TSO-ZABHA-NEXT: amoswap.b a0, a1, (a0)
+; RV32IA-TSO-ZABHA-NEXT: ret
+;
; RV64IA-WMO-ZABHA-LABEL: atomicrmw_xchg_i8_acquire:
; RV64IA-WMO-ZABHA: # %bb.0:
; RV64IA-WMO-ZABHA-NEXT: amoswap.b.aq a0, a1, (a0)
@@ -289,45 +378,45 @@ define i8 @atomicrmw_xchg_i8_release(ptr %a, i8 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
-; RV32IA-WMO-LABEL: atomicrmw_xchg_i8_release:
-; RV32IA-WMO: # %bb.0:
-; RV32IA-WMO-NEXT: andi a2, a0, -4
-; RV32IA-WMO-NEXT: slli a0, a0, 3
-; RV32IA-WMO-NEXT: li a3, 255
-; RV32IA-WMO-NEXT: zext.b a1, a1
-; RV32IA-WMO-NEXT: sll a3, a3, a0
-; RV32IA-WMO-NEXT: sll a1, a1, a0
-; RV32IA-WMO-NEXT: .LBB2_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-WMO-NEXT: lr.w a4, (a2)
-; RV32IA-WMO-NEXT: mv a5, a1
-; RV32IA-WMO-NEXT: xor a5, a4, a5
-; RV32IA-WMO-NEXT: and a5, a5, a3
-; RV32IA-WMO-NEXT: xor a5, a4, a5
-; RV32IA-WMO-NEXT: sc.w.rl a5, a5, (a2)
-; RV32IA-WMO-NEXT: bnez a5, .LBB2_1
-; RV32IA-WMO-NEXT: # %bb.2:
-; RV32IA-WMO-NEXT: srl a0, a4, a0
-; RV32IA-WMO-NEXT: ret
+; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_xchg_i8_release:
+; RV32IA-WMO-NOZACAS: # %bb.0:
+; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-WMO-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-NOZACAS-NEXT: li a3, 255
+; RV32IA-WMO-NOZACAS-NEXT: zext.b a1, a1
+; RV32IA-WMO-NOZACAS-NEXT: sll a3, a3, a0
+; RV32IA-WMO-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-WMO-NOZACAS-NEXT: .LBB2_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-NOZACAS-NEXT: lr.w a4, (a2)
+; RV32IA-WMO-NOZACAS-NEXT: mv a5, a1
+; RV32IA-WMO-NOZACAS-NEXT: xor a5, a4, a5
+; RV32IA-WMO-NOZACAS-NEXT: and a5, a5, a3
+; RV32IA-WMO-NOZACAS-NEXT: xor a5, a4, a5
+; RV32IA-WMO-NOZACAS-NEXT: sc.w.rl a5, a5, (a2)
+; RV32IA-WMO-NOZACAS-NEXT: bnez a5, .LBB2_1
+; RV32IA-WMO-NOZACAS-NEXT: # %bb.2:
+; RV32IA-WMO-NOZACAS-NEXT: srl a0, a4, a0
+; RV32IA-WMO-NOZACAS-NEXT: ret
;
-; RV32IA-TSO-LABEL: atomicrmw_xchg_i8_release:
-; RV32IA-TSO: # %bb.0:
-; RV32IA-TSO-NEXT: andi a2, a0, -4
-; RV32IA-TSO-NEXT: slli a0, a0, 3
-; RV32IA-TSO-NEXT: li a3, 255
-; RV32IA-TSO-NEXT: zext.b a1, a1
-; RV32IA-TSO-NEXT: sll a3, a3, a0
-; RV32IA-TSO-NEXT: sll a1, a1, a0
-; RV32IA-TSO-NEXT: .LBB2_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-TSO-NEXT: lr.w a4, (a2)
-; RV32IA-TSO-NEXT: mv a5, a1
-; RV32IA-TSO-NEXT: xor a5, a4, a5
-; RV32IA-TSO-NEXT: and a5, a5, a3
-; RV32IA-TSO-NEXT: xor a5, a4, a5
-; RV32IA-TSO-NEXT: sc.w a5, a5, (a2)
-; RV32IA-TSO-NEXT: bnez a5, .LBB2_1
-; RV32IA-TSO-NEXT: # %bb.2:
-; RV32IA-TSO-NEXT: srl a0, a4, a0
-; RV32IA-TSO-NEXT: ret
+; RV32IA-TSO-NOZACAS-LABEL: atomicrmw_xchg_i8_release:
+; RV32IA-TSO-NOZACAS: # %bb.0:
+; RV32IA-TSO-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-TSO-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-NOZACAS-NEXT: li a3, 255
+; RV32IA-TSO-NOZACAS-NEXT: zext.b a1, a1
+; RV32IA-TSO-NOZACAS-NEXT: sll a3, a3, a0
+; RV32IA-TSO-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-TSO-NOZACAS-NEXT: .LBB2_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-NOZACAS-NEXT: lr.w a4, (a2)
+; RV32IA-TSO-NOZACAS-NEXT: mv a5, a1
+; RV32IA-TSO-NOZACAS-NEXT: xor a5, a4, a5
+; RV32IA-TSO-NOZACAS-NEXT: and a5, a5, a3
+; RV32IA-TSO-NOZACAS-NEXT: xor a5, a4, a5
+; RV32IA-TSO-NOZACAS-NEXT: sc.w a5, a5, (a2)
+; RV32IA-TSO-NOZACAS-NEXT: bnez a5, .LBB2_1
+; RV32IA-TSO-NOZACAS-NEXT: # %bb.2:
+; RV32IA-TSO-NOZACAS-NEXT: srl a0, a4, a0
+; RV32IA-TSO-NOZACAS-NEXT: ret
;
; RV64I-LABEL: atomicrmw_xchg_i8_release:
; RV64I: # %bb.0:
@@ -379,6 +468,46 @@ define i8 @atomicrmw_xchg_i8_release(ptr %a, i8 %b) nounwind {
; RV64IA-TSO-NOZACAS-NEXT: srlw a0, a4, a0
; RV64IA-TSO-NOZACAS-NEXT: ret
;
+; RV32IA-WMO-ZACAS-LABEL: atomicrmw_xchg_i8_release:
+; RV32IA-WMO-ZACAS: # %bb.0:
+; RV32IA-WMO-ZACAS-NEXT: andi a2, a0, -4
+; RV32IA-WMO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-ZACAS-NEXT: li a3, 255
+; RV32IA-WMO-ZACAS-NEXT: zext.b a1, a1
+; RV32IA-WMO-ZACAS-NEXT: sll a3, a3, a0
+; RV32IA-WMO-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-WMO-ZACAS-NEXT: .LBB2_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-ZACAS-NEXT: lr.w a4, (a2)
+; RV32IA-WMO-ZACAS-NEXT: mv a5, a1
+; RV32IA-WMO-ZACAS-NEXT: xor a5, a4, a5
+; RV32IA-WMO-ZACAS-NEXT: and a5, a5, a3
+; RV32IA-WMO-ZACAS-NEXT: xor a5, a4, a5
+; RV32IA-WMO-ZACAS-NEXT: sc.w.rl a5, a5, (a2)
+; RV32IA-WMO-ZACAS-NEXT: bnez a5, .LBB2_1
+; RV32IA-WMO-ZACAS-NEXT: # %bb.2:
+; RV32IA-WMO-ZACAS-NEXT: srl a0, a4, a0
+; RV32IA-WMO-ZACAS-NEXT: ret
+;
+; RV32IA-TSO-ZACAS-LABEL: atomicrmw_xchg_i8_release:
+; RV32IA-TSO-ZACAS: # %bb.0:
+; RV32IA-TSO-ZACAS-NEXT: andi a2, a0, -4
+; RV32IA-TSO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-ZACAS-NEXT: li a3, 255
+; RV32IA-TSO-ZACAS-NEXT: zext.b a1, a1
+; RV32IA-TSO-ZACAS-NEXT: sll a3, a3, a0
+; RV32IA-TSO-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-TSO-ZACAS-NEXT: .LBB2_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-ZACAS-NEXT: lr.w a4, (a2)
+; RV32IA-TSO-ZACAS-NEXT: mv a5, a1
+; RV32IA-TSO-ZACAS-NEXT: xor a5, a4, a5
+; RV32IA-TSO-ZACAS-NEXT: and a5, a5, a3
+; RV32IA-TSO-ZACAS-NEXT: xor a5, a4, a5
+; RV32IA-TSO-ZACAS-NEXT: sc.w a5, a5, (a2)
+; RV32IA-TSO-ZACAS-NEXT: bnez a5, .LBB2_1
+; RV32IA-TSO-ZACAS-NEXT: # %bb.2:
+; RV32IA-TSO-ZACAS-NEXT: srl a0, a4, a0
+; RV32IA-TSO-ZACAS-NEXT: ret
+;
; RV64IA-WMO-ZACAS-LABEL: atomicrmw_xchg_i8_release:
; RV64IA-WMO-ZACAS: # %bb.0:
; RV64IA-WMO-ZACAS-NEXT: andi a2, a0, -4
@@ -419,6 +548,16 @@ define i8 @atomicrmw_xchg_i8_release(ptr %a, i8 %b) nounwind {
; RV64IA-TSO-ZACAS-NEXT: srlw a0, a4, a0
; RV64IA-TSO-ZACAS-NEXT: ret
;
+; RV32IA-WMO-ZABHA-LABEL: atomicrmw_xchg_i8_release:
+; RV32IA-WMO-ZABHA: # %bb.0:
+; RV32IA-WMO-ZABHA-NEXT: amoswap.b.rl a0, a1, (a0)
+; RV32IA-WMO-ZABHA-NEXT: ret
+;
+; RV32IA-TSO-ZABHA-LABEL: atomicrmw_xchg_i8_release:
+; RV32IA-TSO-ZABHA: # %bb.0:
+; RV32IA-TSO-ZABHA-NEXT: amoswap.b a0, a1, (a0)
+; RV32IA-TSO-ZABHA-NEXT: ret
+;
; RV64IA-WMO-ZABHA-LABEL: atomicrmw_xchg_i8_release:
; RV64IA-WMO-ZABHA: # %bb.0:
; RV64IA-WMO-ZABHA-NEXT: amoswap.b.rl a0, a1, (a0)
@@ -443,45 +582,45 @@ define i8 @atomicrmw_xchg_i8_acq_rel(ptr %a, i8 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
-; RV32IA-WMO-LABEL: atomicrmw_xchg_i8_acq_rel:
-; RV32IA-WMO: # %bb.0:
-; RV32IA-WMO-NEXT: andi a2, a0, -4
-; RV32IA-WMO-NEXT: slli a0, a0, 3
-; RV32IA-WMO-NEXT: li a3, 255
-; RV32IA-WMO-NEXT: zext.b a1, a1
-; RV32IA-WMO-NEXT: sll a3, a3, a0
-; RV32IA-WMO-NEXT: sll a1, a1, a0
-; RV32IA-WMO-NEXT: .LBB3_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-WMO-NEXT: lr.w.aq a4, (a2)
-; RV32IA-WMO-NEXT: mv a5, a1
-; RV32IA-WMO-NEXT: xor a5, a4, a5
-; RV32IA-WMO-NEXT: and a5, a5, a3
-; RV32IA-WMO-NEXT: xor a5, a4, a5
-; RV32IA-WMO-NEXT: sc.w.rl a5, a5, (a2)
-; RV32IA-WMO-NEXT: bnez a5, .LBB3_1
-; RV32IA-WMO-NEXT: # %bb.2:
-; RV32IA-WMO-NEXT: srl a0, a4, a0
-; RV32IA-WMO-NEXT: ret
+; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_xchg_i8_acq_rel:
+; RV32IA-WMO-NOZACAS: # %bb.0:
+; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-WMO-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-NOZACAS-NEXT: li a3, 255
+; RV32IA-WMO-NOZACAS-NEXT: zext.b a1, a1
+; RV32IA-WMO-NOZACAS-NEXT: sll a3, a3, a0
+; RV32IA-WMO-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-WMO-NOZACAS-NEXT: .LBB3_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-NOZACAS-NEXT: lr.w.aq a4, (a2)
+; RV32IA-WMO-NOZACAS-NEXT: mv a5, a1
+; RV32IA-WMO-NOZACAS-NEXT: xor a5, a4, a5
+; RV32IA-WMO-NOZACAS-NEXT: and a5, a5, a3
+; RV32IA-WMO-NOZACAS-NEXT: xor a5, a4, a5
+; RV32IA-WMO-NOZACAS-NEXT: sc.w.rl a5, a5, (a2)
+; RV32IA-WMO-NOZACAS-NEXT: bnez a5, .LBB3_1
+; RV32IA-WMO-NOZACAS-NEXT: # %bb.2:
+; RV32IA-WMO-NOZACAS-NEXT: srl a0, a4, a0
+; RV32IA-WMO-NOZACAS-NEXT: ret
;
-; RV32IA-TSO-LABEL: atomicrmw_xchg_i8_acq_rel:
-; RV32IA-TSO: # %bb.0:
-; RV32IA-TSO-NEXT: andi a2, a0, -4
-; RV32IA-TSO-NEXT: slli a0, a0, 3
-; RV32IA-TSO-NEXT: li a3, 255
-; RV32IA-TSO-NEXT: zext.b a1, a1
-; RV32IA-TSO-NEXT: sll a3, a3, a0
-; RV32IA-TSO-NEXT: sll a1, a1, a0
-; RV32IA-TSO-NEXT: .LBB3_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-TSO-NEXT: lr.w a4, (a2)
-; RV32IA-TSO-NEXT: mv a5, a1
-; RV32IA-TSO-NEXT: xor a5, a4, a5
-; RV32IA-TSO-NEXT: and a5, a5, a3
-; RV32IA-TSO-NEXT: xor a5, a4, a5
-; RV32IA-TSO-NEXT: sc.w a5, a5, (a2)
-; RV32IA-TSO-NEXT: bnez a5, .LBB3_1
-; RV32IA-TSO-NEXT: # %bb.2:
-; RV32IA-TSO-NEXT: srl a0, a4, a0
-; RV32IA-TSO-NEXT: ret
+; RV32IA-TSO-NOZACAS-LABEL: atomicrmw_xchg_i8_acq_rel:
+; RV32IA-TSO-NOZACAS: # %bb.0:
+; RV32IA-TSO-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-TSO-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-NOZACAS-NEXT: li a3, 255
+; RV32IA-TSO-NOZACAS-NEXT: zext.b a1, a1
+; RV32IA-TSO-NOZACAS-NEXT: sll a3, a3, a0
+; RV32IA-TSO-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-TSO-NOZACAS-NEXT: .LBB3_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-NOZACAS-NEXT: lr.w a4, (a2)
+; RV32IA-TSO-NOZACAS-NEXT: mv a5, a1
+; RV32IA-TSO-NOZACAS-NEXT: xor a5, a4, a5
+; RV32IA-TSO-NOZACAS-NEXT: and a5, a5, a3
+; RV32IA-TSO-NOZACAS-NEXT: xor a5, a4, a5
+; RV32IA-TSO-NOZACAS-NEXT: sc.w a5, a5, (a2)
+; RV32IA-TSO-NOZACAS-NEXT: bnez a5, .LBB3_1
+; RV32IA-TSO-NOZACAS-NEXT: # %bb.2:
+; RV32IA-TSO-NOZACAS-NEXT: srl a0, a4, a0
+; RV32IA-TSO-NOZACAS-NEXT: ret
;
; RV64I-LABEL: atomicrmw_xchg_i8_acq_rel:
; RV64I: # %bb.0:
@@ -533,6 +672,46 @@ define i8 @atomicrmw_xchg_i8_acq_rel(ptr %a, i8 %b) nounwind {
; RV64IA-TSO-NOZACAS-NEXT: srlw a0, a4, a0
; RV64IA-TSO-NOZACAS-NEXT: ret
;
+; RV32IA-WMO-ZACAS-LABEL: atomicrmw_xchg_i8_acq_rel:
+; RV32IA-WMO-ZACAS: # %bb.0:
+; RV32IA-WMO-ZACAS-NEXT: andi a2, a0, -4
+; RV32IA-WMO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-ZACAS-NEXT: li a3, 255
+; RV32IA-WMO-ZACAS-NEXT: zext.b a1, a1
+; RV32IA-WMO-ZACAS-NEXT: sll a3, a3, a0
+; RV32IA-WMO-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-WMO-ZACAS-NEXT: .LBB3_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-ZACAS-NEXT: lr.w.aq a4, (a2)
+; RV32IA-WMO-ZACAS-NEXT: mv a5, a1
+; RV32IA-WMO-ZACAS-NEXT: xor a5, a4, a5
+; RV32IA-WMO-ZACAS-NEXT: and a5, a5, a3
+; RV32IA-WMO-ZACAS-NEXT: xor a5, a4, a5
+; RV32IA-WMO-ZACAS-NEXT: sc.w.rl a5, a5, (a2)
+; RV32IA-WMO-ZACAS-NEXT: bnez a5, .LBB3_1
+; RV32IA-WMO-ZACAS-NEXT: # %bb.2:
+; RV32IA-WMO-ZACAS-NEXT: srl a0, a4, a0
+; RV32IA-WMO-ZACAS-NEXT: ret
+;
+; RV32IA-TSO-ZACAS-LABEL: atomicrmw_xchg_i8_acq_rel:
+; RV32IA-TSO-ZACAS: # %bb.0:
+; RV32IA-TSO-ZACAS-NEXT: andi a2, a0, -4
+; RV32IA-TSO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-ZACAS-NEXT: li a3, 255
+; RV32IA-TSO-ZACAS-NEXT: zext.b a1, a1
+; RV32IA-TSO-ZACAS-NEXT: sll a3, a3, a0
+; RV32IA-TSO-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-TSO-ZACAS-NEXT: .LBB3_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-ZACAS-NEXT: lr.w a4, (a2)
+; RV32IA-TSO-ZACAS-NEXT: mv a5, a1
+; RV32IA-TSO-ZACAS-NEXT: xor a5, a4, a5
+; RV32IA-TSO-ZACAS-NEXT: and a5, a5, a3
+; RV32IA-TSO-ZACAS-NEXT: xor a5, a4, a5
+; RV32IA-TSO-ZACAS-NEXT: sc.w a5, a5, (a2)
+; RV32IA-TSO-ZACAS-NEXT: bnez a5, .LBB3_1
+; RV32IA-TSO-ZACAS-NEXT: # %bb.2:
+; RV32IA-TSO-ZACAS-NEXT: srl a0, a4, a0
+; RV32IA-TSO-ZACAS-NEXT: ret
+;
; RV64IA-WMO-ZACAS-LABEL: atomicrmw_xchg_i8_acq_rel:
; RV64IA-WMO-ZACAS: # %bb.0:
; RV64IA-WMO-ZACAS-NEXT: andi a2, a0, -4
@@ -573,6 +752,16 @@ define i8 @atomicrmw_xchg_i8_acq_rel(ptr %a, i8 %b) nounwind {
; RV64IA-TSO-ZACAS-NEXT: srlw a0, a4, a0
; RV64IA-TSO-ZACAS-NEXT: ret
;
+; RV32IA-WMO-ZABHA-LABEL: atomicrmw_xchg_i8_acq_rel:
+; RV32IA-WMO-ZABHA: # %bb.0:
+; RV32IA-WMO-ZABHA-NEXT: amoswap.b.aqrl a0, a1, (a0)
+; RV32IA-WMO-ZABHA-NEXT: ret
+;
+; RV32IA-TSO-ZABHA-LABEL: atomicrmw_xchg_i8_acq_rel:
+; RV32IA-TSO-ZABHA: # %bb.0:
+; RV32IA-TSO-ZABHA-NEXT: amoswap.b a0, a1, (a0)
+; RV32IA-TSO-ZABHA-NEXT: ret
+;
; RV64IA-WMO-ZABHA-LABEL: atomicrmw_xchg_i8_acq_rel:
; RV64IA-WMO-ZABHA: # %bb.0:
; RV64IA-WMO-ZABHA-NEXT: amoswap.b.aqrl a0, a1, (a0)
@@ -597,25 +786,25 @@ define i8 @atomicrmw_xchg_i8_seq_cst(ptr %a, i8 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
-; RV32IA-LABEL: atomicrmw_xchg_i8_seq_cst:
-; RV32IA: # %bb.0:
-; RV32IA-NEXT: andi a2, a0, -4
-; RV32IA-NEXT: slli a0, a0, 3
-; RV32IA-NEXT: li a3, 255
-; RV32IA-NEXT: zext.b a1, a1
-; RV32IA-NEXT: sll a3, a3, a0
-; RV32IA-NEXT: sll a1, a1, a0
-; RV32IA-NEXT: .LBB4_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-NEXT: lr.w.aqrl a4, (a2)
-; RV32IA-NEXT: mv a5, a1
-; RV32IA-NEXT: xor a5, a4, a5
-; RV32IA-NEXT: and a5, a5, a3
-; RV32IA-NEXT: xor a5, a4, a5
-; RV32IA-NEXT: sc.w.rl a5, a5, (a2)
-; RV32IA-NEXT: bnez a5, .LBB4_1
-; RV32IA-NEXT: # %bb.2:
-; RV32IA-NEXT: srl a0, a4, a0
-; RV32IA-NEXT: ret
+; RV32IA-NOZACAS-LABEL: atomicrmw_xchg_i8_seq_cst:
+; RV32IA-NOZACAS: # %bb.0:
+; RV32IA-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-NOZACAS-NEXT: li a3, 255
+; RV32IA-NOZACAS-NEXT: zext.b a1, a1
+; RV32IA-NOZACAS-NEXT: sll a3, a3, a0
+; RV32IA-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-NOZACAS-NEXT: .LBB4_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-NOZACAS-NEXT: lr.w.aqrl a4, (a2)
+; RV32IA-NOZACAS-NEXT: mv a5, a1
+; RV32IA-NOZACAS-NEXT: xor a5, a4, a5
+; RV32IA-NOZACAS-NEXT: and a5, a5, a3
+; RV32IA-NOZACAS-NEXT: xor a5, a4, a5
+; RV32IA-NOZACAS-NEXT: sc.w.rl a5, a5, (a2)
+; RV32IA-NOZACAS-NEXT: bnez a5, .LBB4_1
+; RV32IA-NOZACAS-NEXT: # %bb.2:
+; RV32IA-NOZACAS-NEXT: srl a0, a4, a0
+; RV32IA-NOZACAS-NEXT: ret
;
; RV64I-LABEL: atomicrmw_xchg_i8_seq_cst:
; RV64I: # %bb.0:
@@ -647,6 +836,26 @@ define i8 @atomicrmw_xchg_i8_seq_cst(ptr %a, i8 %b) nounwind {
; RV64IA-NOZACAS-NEXT: srlw a0, a4, a0
; RV64IA-NOZACAS-NEXT: ret
;
+; RV32IA-ZACAS-LABEL: atomicrmw_xchg_i8_seq_cst:
+; RV32IA-ZACAS: # %bb.0:
+; RV32IA-ZACAS-NEXT: andi a2, a0, -4
+; RV32IA-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-ZACAS-NEXT: li a3, 255
+; RV32IA-ZACAS-NEXT: zext.b a1, a1
+; RV32IA-ZACAS-NEXT: sll a3, a3, a0
+; RV32IA-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-ZACAS-NEXT: .LBB4_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-ZACAS-NEXT: lr.w.aqrl a4, (a2)
+; RV32IA-ZACAS-NEXT: mv a5, a1
+; RV32IA-ZACAS-NEXT: xor a5, a4, a5
+; RV32IA-ZACAS-NEXT: and a5, a5, a3
+; RV32IA-ZACAS-NEXT: xor a5, a4, a5
+; RV32IA-ZACAS-NEXT: sc.w.rl a5, a5, (a2)
+; RV32IA-ZACAS-NEXT: bnez a5, .LBB4_1
+; RV32IA-ZACAS-NEXT: # %bb.2:
+; RV32IA-ZACAS-NEXT: srl a0, a4, a0
+; RV32IA-ZACAS-NEXT: ret
+;
; RV64IA-ZACAS-LABEL: atomicrmw_xchg_i8_seq_cst:
; RV64IA-ZACAS: # %bb.0:
; RV64IA-ZACAS-NEXT: andi a2, a0, -4
@@ -667,6 +876,16 @@ define i8 @atomicrmw_xchg_i8_seq_cst(ptr %a, i8 %b) nounwind {
; RV64IA-ZACAS-NEXT: srlw a0, a4, a0
; RV64IA-ZACAS-NEXT: ret
;
+; RV32IA-WMO-ZABHA-LABEL: atomicrmw_xchg_i8_seq_cst:
+; RV32IA-WMO-ZABHA: # %bb.0:
+; RV32IA-WMO-ZABHA-NEXT: amoswap.b.aqrl a0, a1, (a0)
+; RV32IA-WMO-ZABHA-NEXT: ret
+;
+; RV32IA-TSO-ZABHA-LABEL: atomicrmw_xchg_i8_seq_cst:
+; RV32IA-TSO-ZABHA: # %bb.0:
+; RV32IA-TSO-ZABHA-NEXT: amoswap.b a0, a1, (a0)
+; RV32IA-TSO-ZABHA-NEXT: ret
+;
; RV64IA-WMO-ZABHA-LABEL: atomicrmw_xchg_i8_seq_cst:
; RV64IA-WMO-ZABHA: # %bb.0:
; RV64IA-WMO-ZABHA-NEXT: amoswap.b.aqrl a0, a1, (a0)
@@ -695,16 +914,16 @@ define i8 @atomicrmw_xchg_0_i8_monotonic(ptr %a) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
-; RV32IA-LABEL: atomicrmw_xchg_0_i8_monotonic:
-; RV32IA: # %bb.0:
-; RV32IA-NEXT: andi a1, a0, -4
-; RV32IA-NEXT: slli a0, a0, 3
-; RV32IA-NEXT: li a2, 255
-; RV32IA-NEXT: sll a2, a2, a0
-; RV32IA-NEXT: not a2, a2
-; RV32IA-NEXT: amoand.w a1, a2, (a1)
-; RV32IA-NEXT: srl a0, a1, a0
-; RV32IA-NEXT: ret
+; RV32IA-NOZACAS-LABEL: atomicrmw_xchg_0_i8_monotonic:
+; RV32IA-NOZACAS: # %bb.0:
+; RV32IA-NOZACAS-NEXT: andi a1, a0, -4
+; RV32IA-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-NOZACAS-NEXT: li a2, 255
+; RV32IA-NOZACAS-NEXT: sll a2, a2, a0
+; RV32IA-NOZACAS-NEXT: not a2, a2
+; RV32IA-NOZACAS-NEXT: amoand.w a1, a2, (a1)
+; RV32IA-NOZACAS-NEXT: srl a0, a1, a0
+; RV32IA-NOZACAS-NEXT: ret
;
; RV64I-LABEL: atomicrmw_xchg_0_i8_monotonic:
; RV64I: # %bb.0:
@@ -728,6 +947,17 @@ define i8 @atomicrmw_xchg_0_i8_monotonic(ptr %a) nounwind {
; RV64IA-NOZACAS-NEXT: srlw a0, a1, a0
; RV64IA-NOZACAS-NEXT: ret
;
+; RV32IA-ZACAS-LABEL: atomicrmw_xchg_0_i8_monotonic:
+; RV32IA-ZACAS: # %bb.0:
+; RV32IA-ZACAS-NEXT: andi a1, a0, -4
+; RV32IA-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-ZACAS-NEXT: li a2, 255
+; RV32IA-ZACAS-NEXT: sll a2, a2, a0
+; RV32IA-ZACAS-NEXT: not a2, a2
+; RV32IA-ZACAS-NEXT: amoand.w a1, a2, (a1)
+; RV32IA-ZACAS-NEXT: srl a0, a1, a0
+; RV32IA-ZACAS-NEXT: ret
+;
; RV64IA-ZACAS-LABEL: atomicrmw_xchg_0_i8_monotonic:
; RV64IA-ZACAS: # %bb.0:
; RV64IA-ZACAS-NEXT: andi a1, a0, -4
@@ -739,6 +969,16 @@ define i8 @atomicrmw_xchg_0_i8_monotonic(ptr %a) nounwind {
; RV64IA-ZACAS-NEXT: srlw a0, a1, a0
; RV64IA-ZACAS-NEXT: ret
;
+; RV32IA-WMO-ZABHA-LABEL: atomicrmw_xchg_0_i8_monotonic:
+; RV32IA-WMO-ZABHA: # %bb.0:
+; RV32IA-WMO-ZABHA-NEXT: amoswap.b a0, zero, (a0)
+; RV32IA-WMO-ZABHA-NEXT: ret
+;
+; RV32IA-TSO-ZABHA-LABEL: atomicrmw_xchg_0_i8_monotonic:
+; RV32IA-TSO-ZABHA: # %bb.0:
+; RV32IA-TSO-ZABHA-NEXT: amoswap.b a0, zero, (a0)
+; RV32IA-TSO-ZABHA-NEXT: ret
+;
; RV64IA-WMO-ZABHA-LABEL: atomicrmw_xchg_0_i8_monotonic:
; RV64IA-WMO-ZABHA: # %bb.0:
; RV64IA-WMO-ZABHA-NEXT: amoswap.b a0, zero, (a0)
@@ -764,27 +1004,27 @@ define i8 @atomicrmw_xchg_0_i8_acquire(ptr %a) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
-; RV32IA-WMO-LABEL: atomicrmw_xchg_0_i8_acquire:
-; RV32IA-WMO: # %bb.0:
-; RV32IA-WMO-NEXT: andi a1, a0, -4
-; RV32IA-WMO-NEXT: slli a0, a0, 3
-; RV32IA-WMO-NEXT: li a2, 255
-; RV32IA-WMO-NEXT: sll a2, a2, a0
-; RV32IA-WMO-NEXT: not a2, a2
-; RV32IA-WMO-NEXT: amoand.w.aq a1, a2, (a1)
-; RV32IA-WMO-NEXT: srl a0, a1, a0
-; RV32IA-WMO-NEXT: ret
+; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_xchg_0_i8_acquire:
+; RV32IA-WMO-NOZACAS: # %bb.0:
+; RV32IA-WMO-NOZACAS-NEXT: andi a1, a0, -4
+; RV32IA-WMO-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-NOZACAS-NEXT: li a2, 255
+; RV32IA-WMO-NOZACAS-NEXT: sll a2, a2, a0
+; RV32IA-WMO-NOZACAS-NEXT: not a2, a2
+; RV32IA-WMO-NOZACAS-NEXT: amoand.w.aq a1, a2, (a1)
+; RV32IA-WMO-NOZACAS-NEXT: srl a0, a1, a0
+; RV32IA-WMO-NOZACAS-NEXT: ret
;
-; RV32IA-TSO-LABEL: atomicrmw_xchg_0_i8_acquire:
-; RV32IA-TSO: # %bb.0:
-; RV32IA-TSO-NEXT: andi a1, a0, -4
-; RV32IA-TSO-NEXT: slli a0, a0, 3
-; RV32IA-TSO-NEXT: li a2, 255
-; RV32IA-TSO-NEXT: sll a2, a2, a0
-; RV32IA-TSO-NEXT: not a2, a2
-; RV32IA-TSO-NEXT: amoand.w a1, a2, (a1)
-; RV32IA-TSO-NEXT: srl a0, a1, a0
-; RV32IA-TSO-NEXT: ret
+; RV32IA-TSO-NOZACAS-LABEL: atomicrmw_xchg_0_i8_acquire:
+; RV32IA-TSO-NOZACAS: # %bb.0:
+; RV32IA-TSO-NOZACAS-NEXT: andi a1, a0, -4
+; RV32IA-TSO-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-NOZACAS-NEXT: li a2, 255
+; RV32IA-TSO-NOZACAS-NEXT: sll a2, a2, a0
+; RV32IA-TSO-NOZACAS-NEXT: not a2, a2
+; RV32IA-TSO-NOZACAS-NEXT: amoand.w a1, a2, (a1)
+; RV32IA-TSO-NOZACAS-NEXT: srl a0, a1, a0
+; RV32IA-TSO-NOZACAS-NEXT: ret
;
; RV64I-LABEL: atomicrmw_xchg_0_i8_acquire:
; RV64I: # %bb.0:
@@ -819,6 +1059,28 @@ define i8 @atomicrmw_xchg_0_i8_acquire(ptr %a) nounwind {
; RV64IA-TSO-NOZACAS-NEXT: srlw a0, a1, a0
; RV64IA-TSO-NOZACAS-NEXT: ret
;
+; RV32IA-WMO-ZACAS-LABEL: atomicrmw_xchg_0_i8_acquire:
+; RV32IA-WMO-ZACAS: # %bb.0:
+; RV32IA-WMO-ZACAS-NEXT: andi a1, a0, -4
+; RV32IA-WMO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-ZACAS-NEXT: li a2, 255
+; RV32IA-WMO-ZACAS-NEXT: sll a2, a2, a0
+; RV32IA-WMO-ZACAS-NEXT: not a2, a2
+; RV32IA-WMO-ZACAS-NEXT: amoand.w.aq a1, a2, (a1)
+; RV32IA-WMO-ZACAS-NEXT: srl a0, a1, a0
+; RV32IA-WMO-ZACAS-NEXT: ret
+;
+; RV32IA-TSO-ZACAS-LABEL: atomicrmw_xchg_0_i8_acquire:
+; RV32IA-TSO-ZACAS: # %bb.0:
+; RV32IA-TSO-ZACAS-NEXT: andi a1, a0, -4
+; RV32IA-TSO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-ZACAS-NEXT: li a2, 255
+; RV32IA-TSO-ZACAS-NEXT: sll a2, a2, a0
+; RV32IA-TSO-ZACAS-NEXT: not a2, a2
+; RV32IA-TSO-ZACAS-NEXT: amoand.w a1, a2, (a1)
+; RV32IA-TSO-ZACAS-NEXT: srl a0, a1, a0
+; RV32IA-TSO-ZACAS-NEXT: ret
+;
; RV64IA-WMO-ZACAS-LABEL: atomicrmw_xchg_0_i8_acquire:
; RV64IA-WMO-ZACAS: # %bb.0:
; RV64IA-WMO-ZACAS-NEXT: andi a1, a0, -4
@@ -841,6 +1103,16 @@ define i8 @atomicrmw_xchg_0_i8_acquire(ptr %a) nounwind {
; RV64IA-TSO-ZACAS-NEXT: srlw a0, a1, a0
; RV64IA-TSO-ZACAS-NEXT: ret
;
+; RV32IA-WMO-ZABHA-LABEL: atomicrmw_xchg_0_i8_acquire:
+; RV32IA-WMO-ZABHA: # %bb.0:
+; RV32IA-WMO-ZABHA-NEXT: amoswap.b.aq a0, zero, (a0)
+; RV32IA-WMO-ZABHA-NEXT: ret
+;
+; RV32IA-TSO-ZABHA-LABEL: atomicrmw_xchg_0_i8_acquire:
+; RV32IA-TSO-ZABHA: # %bb.0:
+; RV32IA-TSO-ZABHA-NEXT: amoswap.b a0, zero, (a0)
+; RV32IA-TSO-ZABHA-NEXT: ret
+;
; RV64IA-WMO-ZABHA-LABEL: atomicrmw_xchg_0_i8_acquire:
; RV64IA-WMO-ZABHA: # %bb.0:
; RV64IA-WMO-ZABHA-NEXT: amoswap.b.aq a0, zero, (a0)
@@ -866,27 +1138,27 @@ define i8 @atomicrmw_xchg_0_i8_release(ptr %a) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
-; RV32IA-WMO-LABEL: atomicrmw_xchg_0_i8_release:
-; RV32IA-WMO: # %bb.0:
-; RV32IA-WMO-NEXT: andi a1, a0, -4
-; RV32IA-WMO-NEXT: slli a0, a0, 3
-; RV32IA-WMO-NEXT: li a2, 255
-; RV32IA-WMO-NEXT: sll a2, a2, a0
-; RV32IA-WMO-NEXT: not a2, a2
-; RV32IA-WMO-NEXT: amoand.w.rl a1, a2, (a1)
-; RV32IA-WMO-NEXT: srl a0, a1, a0
-; RV32IA-WMO-NEXT: ret
+; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_xchg_0_i8_release:
+; RV32IA-WMO-NOZACAS: # %bb.0:
+; RV32IA-WMO-NOZACAS-NEXT: andi a1, a0, -4
+; RV32IA-WMO-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-NOZACAS-NEXT: li a2, 255
+; RV32IA-WMO-NOZACAS-NEXT: sll a2, a2, a0
+; RV32IA-WMO-NOZACAS-NEXT: not a2, a2
+; RV32IA-WMO-NOZACAS-NEXT: amoand.w.rl a1, a2, (a1)
+; RV32IA-WMO-NOZACAS-NEXT: srl a0, a1, a0
+; RV32IA-WMO-NOZACAS-NEXT: ret
;
-; RV32IA-TSO-LABEL: atomicrmw_xchg_0_i8_release:
-; RV32IA-TSO: # %bb.0:
-; RV32IA-TSO-NEXT: andi a1, a0, -4
-; RV32IA-TSO-NEXT: slli a0, a0, 3
-; RV32IA-TSO-NEXT: li a2, 255
-; RV32IA-TSO-NEXT: sll a2, a2, a0
-; RV32IA-TSO-NEXT: not a2, a2
-; RV32IA-TSO-NEXT: amoand.w a1, a2, (a1)
-; RV32IA-TSO-NEXT: srl a0, a1, a0
-; RV32IA-TSO-NEXT: ret
+; RV32IA-TSO-NOZACAS-LABEL: atomicrmw_xchg_0_i8_release:
+; RV32IA-TSO-NOZACAS: # %bb.0:
+; RV32IA-TSO-NOZACAS-NEXT: andi a1, a0, -4
+; RV32IA-TSO-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-NOZACAS-NEXT: li a2, 255
+; RV32IA-TSO-NOZACAS-NEXT: sll a2, a2, a0
+; RV32IA-TSO-NOZACAS-NEXT: not a2, a2
+; RV32IA-TSO-NOZACAS-NEXT: amoand.w a1, a2, (a1)
+; RV32IA-TSO-NOZACAS-NEXT: srl a0, a1, a0
+; RV32IA-TSO-NOZACAS-NEXT: ret
;
; RV64I-LABEL: atomicrmw_xchg_0_i8_release:
; RV64I: # %bb.0:
@@ -921,6 +1193,28 @@ define i8 @atomicrmw_xchg_0_i8_release(ptr %a) nounwind {
; RV64IA-TSO-NOZACAS-NEXT: srlw a0, a1, a0
; RV64IA-TSO-NOZACAS-NEXT: ret
;
+; RV32IA-WMO-ZACAS-LABEL: atomicrmw_xchg_0_i8_release:
+; RV32IA-WMO-ZACAS: # %bb.0:
+; RV32IA-WMO-ZACAS-NEXT: andi a1, a0, -4
+; RV32IA-WMO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-ZACAS-NEXT: li a2, 255
+; RV32IA-WMO-ZACAS-NEXT: sll a2, a2, a0
+; RV32IA-WMO-ZACAS-NEXT: not a2, a2
+; RV32IA-WMO-ZACAS-NEXT: amoand.w.rl a1, a2, (a1)
+; RV32IA-WMO-ZACAS-NEXT: srl a0, a1, a0
+; RV32IA-WMO-ZACAS-NEXT: ret
+;
+; RV32IA-TSO-ZACAS-LABEL: atomicrmw_xchg_0_i8_release:
+; RV32IA-TSO-ZACAS: # %bb.0:
+; RV32IA-TSO-ZACAS-NEXT: andi a1, a0, -4
+; RV32IA-TSO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-ZACAS-NEXT: li a2, 255
+; RV32IA-TSO-ZACAS-NEXT: sll a2, a2, a0
+; RV32IA-TSO-ZACAS-NEXT: not a2, a2
+; RV32IA-TSO-ZACAS-NEXT: amoand.w a1, a2, (a1)
+; RV32IA-TSO-ZACAS-NEXT: srl a0, a1, a0
+; RV32IA-TSO-ZACAS-NEXT: ret
+;
; RV64IA-WMO-ZACAS-LABEL: atomicrmw_xchg_0_i8_release:
; RV64IA-WMO-ZACAS: # %bb.0:
; RV64IA-WMO-ZACAS-NEXT: andi a1, a0, -4
@@ -943,6 +1237,16 @@ define i8 @atomicrmw_xchg_0_i8_release(ptr %a) nounwind {
; RV64IA-TSO-ZACAS-NEXT: srlw a0, a1, a0
; RV64IA-TSO-ZACAS-NEXT: ret
;
+; RV32IA-WMO-ZABHA-LABEL: atomicrmw_xchg_0_i8_release:
+; RV32IA-WMO-ZABHA: # %bb.0:
+; RV32IA-WMO-ZABHA-NEXT: amoswap.b.rl a0, zero, (a0)
+; RV32IA-WMO-ZABHA-NEXT: ret
+;
+; RV32IA-TSO-ZABHA-LABEL: atomicrmw_xchg_0_i8_release:
+; RV32IA-TSO-ZABHA: # %bb.0:
+; RV32IA-TSO-ZABHA-NEXT: amoswap.b a0, zero, (a0)
+; RV32IA-TSO-ZABHA-NEXT: ret
+;
; RV64IA-WMO-ZABHA-LABEL: atomicrmw_xchg_0_i8_release:
; RV64IA-WMO-ZABHA: # %bb.0:
; RV64IA-WMO-ZABHA-NEXT: amoswap.b.rl a0, zero, (a0)
@@ -968,27 +1272,27 @@ define i8 @atomicrmw_xchg_0_i8_acq_rel(ptr %a) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
-; RV32IA-WMO-LABEL: atomicrmw_xchg_0_i8_acq_rel:
-; RV32IA-WMO: # %bb.0:
-; RV32IA-WMO-NEXT: andi a1, a0, -4
-; RV32IA-WMO-NEXT: slli a0, a0, 3
-; RV32IA-WMO-NEXT: li a2, 255
-; RV32IA-WMO-NEXT: sll a2, a2, a0
-; RV32IA-WMO-NEXT: not a2, a2
-; RV32IA-WMO-NEXT: amoand.w.aqrl a1, a2, (a1)
-; RV32IA-WMO-NEXT: srl a0, a1, a0
-; RV32IA-WMO-NEXT: ret
+; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_xchg_0_i8_acq_rel:
+; RV32IA-WMO-NOZACAS: # %bb.0:
+; RV32IA-WMO-NOZACAS-NEXT: andi a1, a0, -4
+; RV32IA-WMO-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-NOZACAS-NEXT: li a2, 255
+; RV32IA-WMO-NOZACAS-NEXT: sll a2, a2, a0
+; RV32IA-WMO-NOZACAS-NEXT: not a2, a2
+; RV32IA-WMO-NOZACAS-NEXT: amoand.w.aqrl a1, a2, (a1)
+; RV32IA-WMO-NOZACAS-NEXT: srl a0, a1, a0
+; RV32IA-WMO-NOZACAS-NEXT: ret
;
-; RV32IA-TSO-LABEL: atomicrmw_xchg_0_i8_acq_rel:
-; RV32IA-TSO: # %bb.0:
-; RV32IA-TSO-NEXT: andi a1, a0, -4
-; RV32IA-TSO-NEXT: slli a0, a0, 3
-; RV32IA-TSO-NEXT: li a2, 255
-; RV32IA-TSO-NEXT: sll a2, a2, a0
-; RV32IA-TSO-NEXT: not a2, a2
-; RV32IA-TSO-NEXT: amoand.w a1, a2, (a1)
-; RV32IA-TSO-NEXT: srl a0, a1, a0
-; RV32IA-TSO-NEXT: ret
+; RV32IA-TSO-NOZACAS-LABEL: atomicrmw_xchg_0_i8_acq_rel:
+; RV32IA-TSO-NOZACAS: # %bb.0:
+; RV32IA-TSO-NOZACAS-NEXT: andi a1, a0, -4
+; RV32IA-TSO-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-NOZACAS-NEXT: li a2, 255
+; RV32IA-TSO-NOZACAS-NEXT: sll a2, a2, a0
+; RV32IA-TSO-NOZACAS-NEXT: not a2, a2
+; RV32IA-TSO-NOZACAS-NEXT: amoand.w a1, a2, (a1)
+; RV32IA-TSO-NOZACAS-NEXT: srl a0, a1, a0
+; RV32IA-TSO-NOZACAS-NEXT: ret
;
; RV64I-LABEL: atomicrmw_xchg_0_i8_acq_rel:
; RV64I: # %bb.0:
@@ -1023,6 +1327,28 @@ define i8 @atomicrmw_xchg_0_i8_acq_rel(ptr %a) nounwind {
; RV64IA-TSO-NOZACAS-NEXT: srlw a0, a1, a0
; RV64IA-TSO-NOZACAS-NEXT: ret
;
+; RV32IA-WMO-ZACAS-LABEL: atomicrmw_xchg_0_i8_acq_rel:
+; RV32IA-WMO-ZACAS: # %bb.0:
+; RV32IA-WMO-ZACAS-NEXT: andi a1, a0, -4
+; RV32IA-WMO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-ZACAS-NEXT: li a2, 255
+; RV32IA-WMO-ZACAS-NEXT: sll a2, a2, a0
+; RV32IA-WMO-ZACAS-NEXT: not a2, a2
+; RV32IA-WMO-ZACAS-NEXT: amoand.w.aqrl a1, a2, (a1)
+; RV32IA-WMO-ZACAS-NEXT: srl a0, a1, a0
+; RV32IA-WMO-ZACAS-NEXT: ret
+;
+; RV32IA-TSO-ZACAS-LABEL: atomicrmw_xchg_0_i8_acq_rel:
+; RV32IA-TSO-ZACAS: # %bb.0:
+; RV32IA-TSO-ZACAS-NEXT: andi a1, a0, -4
+; RV32IA-TSO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-ZACAS-NEXT: li a2, 255
+; RV32IA-TSO-ZACAS-NEXT: sll a2, a2, a0
+; RV32IA-TSO-ZACAS-NEXT: not a2, a2
+; RV32IA-TSO-ZACAS-NEXT: amoand.w a1, a2, (a1)
+; RV32IA-TSO-ZACAS-NEXT: srl a0, a1, a0
+; RV32IA-TSO-ZACAS-NEXT: ret
+;
; RV64IA-WMO-ZACAS-LABEL: atomicrmw_xchg_0_i8_acq_rel:
; RV64IA-WMO-ZACAS: # %bb.0:
; RV64IA-WMO-ZACAS-NEXT: andi a1, a0, -4
@@ -1045,6 +1371,16 @@ define i8 @atomicrmw_xchg_0_i8_acq_rel(ptr %a) nounwind {
; RV64IA-TSO-ZACAS-NEXT: srlw a0, a1, a0
; RV64IA-TSO-ZACAS-NEXT: ret
;
+; RV32IA-WMO-ZABHA-LABEL: atomicrmw_xchg_0_i8_acq_rel:
+; RV32IA-WMO-ZABHA: # %bb.0:
+; RV32IA-WMO-ZABHA-NEXT: amoswap.b.aqrl a0, zero, (a0)
+; RV32IA-WMO-ZABHA-NEXT: ret
+;
+; RV32IA-TSO-ZABHA-LABEL: atomicrmw_xchg_0_i8_acq_rel:
+; RV32IA-TSO-ZABHA: # %bb.0:
+; RV32IA-TSO-ZABHA-NEXT: amoswap.b a0, zero, (a0)
+; RV32IA-TSO-ZABHA-NEXT: ret
+;
; RV64IA-WMO-ZABHA-LABEL: atomicrmw_xchg_0_i8_acq_rel:
; RV64IA-WMO-ZABHA: # %bb.0:
; RV64IA-WMO-ZABHA-NEXT: amoswap.b.aqrl a0, zero, (a0)
@@ -1070,27 +1406,27 @@ define i8 @atomicrmw_xchg_0_i8_seq_cst(ptr %a) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
-; RV32IA-WMO-LABEL: atomicrmw_xchg_0_i8_seq_cst:
-; RV32IA-WMO: # %bb.0:
-; RV32IA-WMO-NEXT: andi a1, a0, -4
-; RV32IA-WMO-NEXT: slli a0, a0, 3
-; RV32IA-WMO-NEXT: li a2, 255
-; RV32IA-WMO-NEXT: sll a2, a2, a0
-; RV32IA-WMO-NEXT: not a2, a2
-; RV32IA-WMO-NEXT: amoand.w.aqrl a1, a2, (a1)
-; RV32IA-WMO-NEXT: srl a0, a1, a0
-; RV32IA-WMO-NEXT: ret
+; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_xchg_0_i8_seq_cst:
+; RV32IA-WMO-NOZACAS: # %bb.0:
+; RV32IA-WMO-NOZACAS-NEXT: andi a1, a0, -4
+; RV32IA-WMO-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-NOZACAS-NEXT: li a2, 255
+; RV32IA-WMO-NOZACAS-NEXT: sll a2, a2, a0
+; RV32IA-WMO-NOZACAS-NEXT: not a2, a2
+; RV32IA-WMO-NOZACAS-NEXT: amoand.w.aqrl a1, a2, (a1)
+; RV32IA-WMO-NOZACAS-NEXT: srl a0, a1, a0
+; RV32IA-WMO-NOZACAS-NEXT: ret
;
-; RV32IA-TSO-LABEL: atomicrmw_xchg_0_i8_seq_cst:
-; RV32IA-TSO: # %bb.0:
-; RV32IA-TSO-NEXT: andi a1, a0, -4
-; RV32IA-TSO-NEXT: slli a0, a0, 3
-; RV32IA-TSO-NEXT: li a2, 255
-; RV32IA-TSO-NEXT: sll a2, a2, a0
-; RV32IA-TSO-NEXT: not a2, a2
-; RV32IA-TSO-NEXT: amoand.w a1, a2, (a1)
-; RV32IA-TSO-NEXT: srl a0, a1, a0
-; RV32IA-TSO-NEXT: ret
+; RV32IA-TSO-NOZACAS-LABEL: atomicrmw_xchg_0_i8_seq_cst:
+; RV32IA-TSO-NOZACAS: # %bb.0:
+; RV32IA-TSO-NOZACAS-NEXT: andi a1, a0, -4
+; RV32IA-TSO-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-NOZACAS-NEXT: li a2, 255
+; RV32IA-TSO-NOZACAS-NEXT: sll a2, a2, a0
+; RV32IA-TSO-NOZACAS-NEXT: not a2, a2
+; RV32IA-TSO-NOZACAS-NEXT: amoand.w a1, a2, (a1)
+; RV32IA-TSO-NOZACAS-NEXT: srl a0, a1, a0
+; RV32IA-TSO-NOZACAS-NEXT: ret
;
; RV64I-LABEL: atomicrmw_xchg_0_i8_seq_cst:
; RV64I: # %bb.0:
@@ -1125,6 +1461,28 @@ define i8 @atomicrmw_xchg_0_i8_seq_cst(ptr %a) nounwind {
; RV64IA-TSO-NOZACAS-NEXT: srlw a0, a1, a0
; RV64IA-TSO-NOZACAS-NEXT: ret
;
+; RV32IA-WMO-ZACAS-LABEL: atomicrmw_xchg_0_i8_seq_cst:
+; RV32IA-WMO-ZACAS: # %bb.0:
+; RV32IA-WMO-ZACAS-NEXT: andi a1, a0, -4
+; RV32IA-WMO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-ZACAS-NEXT: li a2, 255
+; RV32IA-WMO-ZACAS-NEXT: sll a2, a2, a0
+; RV32IA-WMO-ZACAS-NEXT: not a2, a2
+; RV32IA-WMO-ZACAS-NEXT: amoand.w.aqrl a1, a2, (a1)
+; RV32IA-WMO-ZACAS-NEXT: srl a0, a1, a0
+; RV32IA-WMO-ZACAS-NEXT: ret
+;
+; RV32IA-TSO-ZACAS-LABEL: atomicrmw_xchg_0_i8_seq_cst:
+; RV32IA-TSO-ZACAS: # %bb.0:
+; RV32IA-TSO-ZACAS-NEXT: andi a1, a0, -4
+; RV32IA-TSO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-ZACAS-NEXT: li a2, 255
+; RV32IA-TSO-ZACAS-NEXT: sll a2, a2, a0
+; RV32IA-TSO-ZACAS-NEXT: not a2, a2
+; RV32IA-TSO-ZACAS-NEXT: amoand.w a1, a2, (a1)
+; RV32IA-TSO-ZACAS-NEXT: srl a0, a1, a0
+; RV32IA-TSO-ZACAS-NEXT: ret
+;
; RV64IA-WMO-ZACAS-LABEL: atomicrmw_xchg_0_i8_seq_cst:
; RV64IA-WMO-ZACAS: # %bb.0:
; RV64IA-WMO-ZACAS-NEXT: andi a1, a0, -4
@@ -1147,6 +1505,16 @@ define i8 @atomicrmw_xchg_0_i8_seq_cst(ptr %a) nounwind {
; RV64IA-TSO-ZACAS-NEXT: srlw a0, a1, a0
; RV64IA-TSO-ZACAS-NEXT: ret
;
+; RV32IA-WMO-ZABHA-LABEL: atomicrmw_xchg_0_i8_seq_cst:
+; RV32IA-WMO-ZABHA: # %bb.0:
+; RV32IA-WMO-ZABHA-NEXT: amoswap.b.aqrl a0, zero, (a0)
+; RV32IA-WMO-ZABHA-NEXT: ret
+;
+; RV32IA-TSO-ZABHA-LABEL: atomicrmw_xchg_0_i8_seq_cst:
+; RV32IA-TSO-ZABHA: # %bb.0:
+; RV32IA-TSO-ZABHA-NEXT: amoswap.b a0, zero, (a0)
+; RV32IA-TSO-ZABHA-NEXT: ret
+;
; RV64IA-WMO-ZABHA-LABEL: atomicrmw_xchg_0_i8_seq_cst:
; RV64IA-WMO-ZABHA: # %bb.0:
; RV64IA-WMO-ZABHA-NEXT: amoswap.b.aqrl a0, zero, (a0)
@@ -1172,15 +1540,15 @@ define i8 @atomicrmw_xchg_minus_1_i8_monotonic(ptr %a) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
-; RV32IA-LABEL: atomicrmw_xchg_minus_1_i8_monotonic:
-; RV32IA: # %bb.0:
-; RV32IA-NEXT: andi a1, a0, -4
-; RV32IA-NEXT: slli a0, a0, 3
-; RV32IA-NEXT: li a2, 255
-; RV32IA-NEXT: sll a2, a2, a0
-; RV32IA-NEXT: amoor.w a1, a2, (a1)
-; RV32IA-NEXT: srl a0, a1, a0
-; RV32IA-NEXT: ret
+; RV32IA-NOZACAS-LABEL: atomicrmw_xchg_minus_1_i8_monotonic:
+; RV32IA-NOZACAS: # %bb.0:
+; RV32IA-NOZACAS-NEXT: andi a1, a0, -4
+; RV32IA-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-NOZACAS-NEXT: li a2, 255
+; RV32IA-NOZACAS-NEXT: sll a2, a2, a0
+; RV32IA-NOZACAS-NEXT: amoor.w a1, a2, (a1)
+; RV32IA-NOZACAS-NEXT: srl a0, a1, a0
+; RV32IA-NOZACAS-NEXT: ret
;
; RV64I-LABEL: atomicrmw_xchg_minus_1_i8_monotonic:
; RV64I: # %bb.0:
@@ -1203,6 +1571,16 @@ define i8 @atomicrmw_xchg_minus_1_i8_monotonic(ptr %a) nounwind {
; RV64IA-NOZACAS-NEXT: srlw a0, a1, a0
; RV64IA-NOZACAS-NEXT: ret
;
+; RV32IA-ZACAS-LABEL: atomicrmw_xchg_minus_1_i8_monotonic:
+; RV32IA-ZACAS: # %bb.0:
+; RV32IA-ZACAS-NEXT: andi a1, a0, -4
+; RV32IA-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-ZACAS-NEXT: li a2, 255
+; RV32IA-ZACAS-NEXT: sll a2, a2, a0
+; RV32IA-ZACAS-NEXT: amoor.w a1, a2, (a1)
+; RV32IA-ZACAS-NEXT: srl a0, a1, a0
+; RV32IA-ZACAS-NEXT: ret
+;
; RV64IA-ZACAS-LABEL: atomicrmw_xchg_minus_1_i8_monotonic:
; RV64IA-ZACAS: # %bb.0:
; RV64IA-ZACAS-NEXT: andi a1, a0, -4
@@ -1213,6 +1591,18 @@ define i8 @atomicrmw_xchg_minus_1_i8_monotonic(ptr %a) nounwind {
; RV64IA-ZACAS-NEXT: srlw a0, a1, a0
; RV64IA-ZACAS-NEXT: ret
;
+; RV32IA-WMO-ZABHA-LABEL: atomicrmw_xchg_minus_1_i8_monotonic:
+; RV32IA-WMO-ZABHA: # %bb.0:
+; RV32IA-WMO-ZABHA-NEXT: li a1, -1
+; RV32IA-WMO-ZABHA-NEXT: amoswap.b a0, a1, (a0)
+; RV32IA-WMO-ZABHA-NEXT: ret
+;
+; RV32IA-TSO-ZABHA-LABEL: atomicrmw_xchg_minus_1_i8_monotonic:
+; RV32IA-TSO-ZABHA: # %bb.0:
+; RV32IA-TSO-ZABHA-NEXT: li a1, -1
+; RV32IA-TSO-ZABHA-NEXT: amoswap.b a0, a1, (a0)
+; RV32IA-TSO-ZABHA-NEXT: ret
+;
; RV64IA-WMO-ZABHA-LABEL: atomicrmw_xchg_minus_1_i8_monotonic:
; RV64IA-WMO-ZABHA: # %bb.0:
; RV64IA-WMO-ZABHA-NEXT: li a1, -1
@@ -1240,25 +1630,25 @@ define i8 @atomicrmw_xchg_minus_1_i8_acquire(ptr %a) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
-; RV32IA-WMO-LABEL: atomicrmw_xchg_minus_1_i8_acquire:
-; RV32IA-WMO: # %bb.0:
-; RV32IA-WMO-NEXT: andi a1, a0, -4
-; RV32IA-WMO-NEXT: slli a0, a0, 3
-; RV32IA-WMO-NEXT: li a2, 255
-; RV32IA-WMO-NEXT: sll a2, a2, a0
-; RV32IA-WMO-NEXT: amoor.w.aq a1, a2, (a1)
-; RV32IA-WMO-NEXT: srl a0, a1, a0
-; RV32IA-WMO-NEXT: ret
+; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_xchg_minus_1_i8_acquire:
+; RV32IA-WMO-NOZACAS: # %bb.0:
+; RV32IA-WMO-NOZACAS-NEXT: andi a1, a0, -4
+; RV32IA-WMO-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-NOZACAS-NEXT: li a2, 255
+; RV32IA-WMO-NOZACAS-NEXT: sll a2, a2, a0
+; RV32IA-WMO-NOZACAS-NEXT: amoor.w.aq a1, a2, (a1)
+; RV32IA-WMO-NOZACAS-NEXT: srl a0, a1, a0
+; RV32IA-WMO-NOZACAS-NEXT: ret
;
-; RV32IA-TSO-LABEL: atomicrmw_xchg_minus_1_i8_acquire:
-; RV32IA-TSO: # %bb.0:
-; RV32IA-TSO-NEXT: andi a1, a0, -4
-; RV32IA-TSO-NEXT: slli a0, a0, 3
-; RV32IA-TSO-NEXT: li a2, 255
-; RV32IA-TSO-NEXT: sll a2, a2, a0
-; RV32IA-TSO-NEXT: amoor.w a1, a2, (a1)
-; RV32IA-TSO-NEXT: srl a0, a1, a0
-; RV32IA-TSO-NEXT: ret
+; RV32IA-TSO-NOZACAS-LABEL: atomicrmw_xchg_minus_1_i8_acquire:
+; RV32IA-TSO-NOZACAS: # %bb.0:
+; RV32IA-TSO-NOZACAS-NEXT: andi a1, a0, -4
+; RV32IA-TSO-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-NOZACAS-NEXT: li a2, 255
+; RV32IA-TSO-NOZACAS-NEXT: sll a2, a2, a0
+; RV32IA-TSO-NOZACAS-NEXT: amoor.w a1, a2, (a1)
+; RV32IA-TSO-NOZACAS-NEXT: srl a0, a1, a0
+; RV32IA-TSO-NOZACAS-NEXT: ret
;
; RV64I-LABEL: atomicrmw_xchg_minus_1_i8_acquire:
; RV64I: # %bb.0:
@@ -1291,6 +1681,26 @@ define i8 @atomicrmw_xchg_minus_1_i8_acquire(ptr %a) nounwind {
; RV64IA-TSO-NOZACAS-NEXT: srlw a0, a1, a0
; RV64IA-TSO-NOZACAS-NEXT: ret
;
+; RV32IA-WMO-ZACAS-LABEL: atomicrmw_xchg_minus_1_i8_acquire:
+; RV32IA-WMO-ZACAS: # %bb.0:
+; RV32IA-WMO-ZACAS-NEXT: andi a1, a0, -4
+; RV32IA-WMO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-ZACAS-NEXT: li a2, 255
+; RV32IA-WMO-ZACAS-NEXT: sll a2, a2, a0
+; RV32IA-WMO-ZACAS-NEXT: amoor.w.aq a1, a2, (a1)
+; RV32IA-WMO-ZACAS-NEXT: srl a0, a1, a0
+; RV32IA-WMO-ZACAS-NEXT: ret
+;
+; RV32IA-TSO-ZACAS-LABEL: atomicrmw_xchg_minus_1_i8_acquire:
+; RV32IA-TSO-ZACAS: # %bb.0:
+; RV32IA-TSO-ZACAS-NEXT: andi a1, a0, -4
+; RV32IA-TSO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-ZACAS-NEXT: li a2, 255
+; RV32IA-TSO-ZACAS-NEXT: sll a2, a2, a0
+; RV32IA-TSO-ZACAS-NEXT: amoor.w a1, a2, (a1)
+; RV32IA-TSO-ZACAS-NEXT: srl a0, a1, a0
+; RV32IA-TSO-ZACAS-NEXT: ret
+;
; RV64IA-WMO-ZACAS-LABEL: atomicrmw_xchg_minus_1_i8_acquire:
; RV64IA-WMO-ZACAS: # %bb.0:
; RV64IA-WMO-ZACAS-NEXT: andi a1, a0, -4
@@ -1311,6 +1721,18 @@ define i8 @atomicrmw_xchg_minus_1_i8_acquire(ptr %a) nounwind {
; RV64IA-TSO-ZACAS-NEXT: srlw a0, a1, a0
; RV64IA-TSO-ZACAS-NEXT: ret
;
+; RV32IA-WMO-ZABHA-LABEL: atomicrmw_xchg_minus_1_i8_acquire:
+; RV32IA-WMO-ZABHA: # %bb.0:
+; RV32IA-WMO-ZABHA-NEXT: li a1, -1
+; RV32IA-WMO-ZABHA-NEXT: amoswap.b.aq a0, a1, (a0)
+; RV32IA-WMO-ZABHA-NEXT: ret
+;
+; RV32IA-TSO-ZABHA-LABEL: atomicrmw_xchg_minus_1_i8_acquire:
+; RV32IA-TSO-ZABHA: # %bb.0:
+; RV32IA-TSO-ZABHA-NEXT: li a1, -1
+; RV32IA-TSO-ZABHA-NEXT: amoswap.b a0, a1, (a0)
+; RV32IA-TSO-ZABHA-NEXT: ret
+;
; RV64IA-WMO-ZABHA-LABEL: atomicrmw_xchg_minus_1_i8_acquire:
; RV64IA-WMO-ZABHA: # %bb.0:
; RV64IA-WMO-ZABHA-NEXT: li a1, -1
@@ -1338,25 +1760,25 @@ define i8 @atomicrmw_xchg_minus_1_i8_release(ptr %a) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
-; RV32IA-WMO-LABEL: atomicrmw_xchg_minus_1_i8_release:
-; RV32IA-WMO: # %bb.0:
-; RV32IA-WMO-NEXT: andi a1, a0, -4
-; RV32IA-WMO-NEXT: slli a0, a0, 3
-; RV32IA-WMO-NEXT: li a2, 255
-; RV32IA-WMO-NEXT: sll a2, a2, a0
-; RV32IA-WMO-NEXT: amoor.w.rl a1, a2, (a1)
-; RV32IA-WMO-NEXT: srl a0, a1, a0
-; RV32IA-WMO-NEXT: ret
+; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_xchg_minus_1_i8_release:
+; RV32IA-WMO-NOZACAS: # %bb.0:
+; RV32IA-WMO-NOZACAS-NEXT: andi a1, a0, -4
+; RV32IA-WMO-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-NOZACAS-NEXT: li a2, 255
+; RV32IA-WMO-NOZACAS-NEXT: sll a2, a2, a0
+; RV32IA-WMO-NOZACAS-NEXT: amoor.w.rl a1, a2, (a1)
+; RV32IA-WMO-NOZACAS-NEXT: srl a0, a1, a0
+; RV32IA-WMO-NOZACAS-NEXT: ret
;
-; RV32IA-TSO-LABEL: atomicrmw_xchg_minus_1_i8_release:
-; RV32IA-TSO: # %bb.0:
-; RV32IA-TSO-NEXT: andi a1, a0, -4
-; RV32IA-TSO-NEXT: slli a0, a0, 3
-; RV32IA-TSO-NEXT: li a2, 255
-; RV32IA-TSO-NEXT: sll a2, a2, a0
-; RV32IA-TSO-NEXT: amoor.w a1, a2, (a1)
-; RV32IA-TSO-NEXT: srl a0, a1, a0
-; RV32IA-TSO-NEXT: ret
+; RV32IA-TSO-NOZACAS-LABEL: atomicrmw_xchg_minus_1_i8_release:
+; RV32IA-TSO-NOZACAS: # %bb.0:
+; RV32IA-TSO-NOZACAS-NEXT: andi a1, a0, -4
+; RV32IA-TSO-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-NOZACAS-NEXT: li a2, 255
+; RV32IA-TSO-NOZACAS-NEXT: sll a2, a2, a0
+; RV32IA-TSO-NOZACAS-NEXT: amoor.w a1, a2, (a1)
+; RV32IA-TSO-NOZACAS-NEXT: srl a0, a1, a0
+; RV32IA-TSO-NOZACAS-NEXT: ret
;
; RV64I-LABEL: atomicrmw_xchg_minus_1_i8_release:
; RV64I: # %bb.0:
@@ -1389,6 +1811,26 @@ define i8 @atomicrmw_xchg_minus_1_i8_release(ptr %a) nounwind {
; RV64IA-TSO-NOZACAS-NEXT: srlw a0, a1, a0
; RV64IA-TSO-NOZACAS-NEXT: ret
;
+; RV32IA-WMO-ZACAS-LABEL: atomicrmw_xchg_minus_1_i8_release:
+; RV32IA-WMO-ZACAS: # %bb.0:
+; RV32IA-WMO-ZACAS-NEXT: andi a1, a0, -4
+; RV32IA-WMO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-ZACAS-NEXT: li a2, 255
+; RV32IA-WMO-ZACAS-NEXT: sll a2, a2, a0
+; RV32IA-WMO-ZACAS-NEXT: amoor.w.rl a1, a2, (a1)
+; RV32IA-WMO-ZACAS-NEXT: srl a0, a1, a0
+; RV32IA-WMO-ZACAS-NEXT: ret
+;
+; RV32IA-TSO-ZACAS-LABEL: atomicrmw_xchg_minus_1_i8_release:
+; RV32IA-TSO-ZACAS: # %bb.0:
+; RV32IA-TSO-ZACAS-NEXT: andi a1, a0, -4
+; RV32IA-TSO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-ZACAS-NEXT: li a2, 255
+; RV32IA-TSO-ZACAS-NEXT: sll a2, a2, a0
+; RV32IA-TSO-ZACAS-NEXT: amoor.w a1, a2, (a1)
+; RV32IA-TSO-ZACAS-NEXT: srl a0, a1, a0
+; RV32IA-TSO-ZACAS-NEXT: ret
+;
; RV64IA-WMO-ZACAS-LABEL: atomicrmw_xchg_minus_1_i8_release:
; RV64IA-WMO-ZACAS: # %bb.0:
; RV64IA-WMO-ZACAS-NEXT: andi a1, a0, -4
@@ -1409,6 +1851,18 @@ define i8 @atomicrmw_xchg_minus_1_i8_release(ptr %a) nounwind {
; RV64IA-TSO-ZACAS-NEXT: srlw a0, a1, a0
; RV64IA-TSO-ZACAS-NEXT: ret
;
+; RV32IA-WMO-ZABHA-LABEL: atomicrmw_xchg_minus_1_i8_release:
+; RV32IA-WMO-ZABHA: # %bb.0:
+; RV32IA-WMO-ZABHA-NEXT: li a1, -1
+; RV32IA-WMO-ZABHA-NEXT: amoswap.b.rl a0, a1, (a0)
+; RV32IA-WMO-ZABHA-NEXT: ret
+;
+; RV32IA-TSO-ZABHA-LABEL: atomicrmw_xchg_minus_1_i8_release:
+; RV32IA-TSO-ZABHA: # %bb.0:
+; RV32IA-TSO-ZABHA-NEXT: li a1, -1
+; RV32IA-TSO-ZABHA-NEXT: amoswap.b a0, a1, (a0)
+; RV32IA-TSO-ZABHA-NEXT: ret
+;
; RV64IA-WMO-ZABHA-LABEL: atomicrmw_xchg_minus_1_i8_release:
; RV64IA-WMO-ZABHA: # %bb.0:
; RV64IA-WMO-ZABHA-NEXT: li a1, -1
@@ -1436,25 +1890,25 @@ define i8 @atomicrmw_xchg_minus_1_i8_acq_rel(ptr %a) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
-; RV32IA-WMO-LABEL: atomicrmw_xchg_minus_1_i8_acq_rel:
-; RV32IA-WMO: # %bb.0:
-; RV32IA-WMO-NEXT: andi a1, a0, -4
-; RV32IA-WMO-NEXT: slli a0, a0, 3
-; RV32IA-WMO-NEXT: li a2, 255
-; RV32IA-WMO-NEXT: sll a2, a2, a0
-; RV32IA-WMO-NEXT: amoor.w.aqrl a1, a2, (a1)
-; RV32IA-WMO-NEXT: srl a0, a1, a0
-; RV32IA-WMO-NEXT: ret
+; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_xchg_minus_1_i8_acq_rel:
+; RV32IA-WMO-NOZACAS: # %bb.0:
+; RV32IA-WMO-NOZACAS-NEXT: andi a1, a0, -4
+; RV32IA-WMO-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-NOZACAS-NEXT: li a2, 255
+; RV32IA-WMO-NOZACAS-NEXT: sll a2, a2, a0
+; RV32IA-WMO-NOZACAS-NEXT: amoor.w.aqrl a1, a2, (a1)
+; RV32IA-WMO-NOZACAS-NEXT: srl a0, a1, a0
+; RV32IA-WMO-NOZACAS-NEXT: ret
;
-; RV32IA-TSO-LABEL: atomicrmw_xchg_minus_1_i8_acq_rel:
-; RV32IA-TSO: # %bb.0:
-; RV32IA-TSO-NEXT: andi a1, a0, -4
-; RV32IA-TSO-NEXT: slli a0, a0, 3
-; RV32IA-TSO-NEXT: li a2, 255
-; RV32IA-TSO-NEXT: sll a2, a2, a0
-; RV32IA-TSO-NEXT: amoor.w a1, a2, (a1)
-; RV32IA-TSO-NEXT: srl a0, a1, a0
-; RV32IA-TSO-NEXT: ret
+; RV32IA-TSO-NOZACAS-LABEL: atomicrmw_xchg_minus_1_i8_acq_rel:
+; RV32IA-TSO-NOZACAS: # %bb.0:
+; RV32IA-TSO-NOZACAS-NEXT: andi a1, a0, -4
+; RV32IA-TSO-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-NOZACAS-NEXT: li a2, 255
+; RV32IA-TSO-NOZACAS-NEXT: sll a2, a2, a0
+; RV32IA-TSO-NOZACAS-NEXT: amoor.w a1, a2, (a1)
+; RV32IA-TSO-NOZACAS-NEXT: srl a0, a1, a0
+; RV32IA-TSO-NOZACAS-NEXT: ret
;
; RV64I-LABEL: atomicrmw_xchg_minus_1_i8_acq_rel:
; RV64I: # %bb.0:
@@ -1487,6 +1941,26 @@ define i8 @atomicrmw_xchg_minus_1_i8_acq_rel(ptr %a) nounwind {
; RV64IA-TSO-NOZACAS-NEXT: srlw a0, a1, a0
; RV64IA-TSO-NOZACAS-NEXT: ret
;
+; RV32IA-WMO-ZACAS-LABEL: atomicrmw_xchg_minus_1_i8_acq_rel:
+; RV32IA-WMO-ZACAS: # %bb.0:
+; RV32IA-WMO-ZACAS-NEXT: andi a1, a0, -4
+; RV32IA-WMO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-ZACAS-NEXT: li a2, 255
+; RV32IA-WMO-ZACAS-NEXT: sll a2, a2, a0
+; RV32IA-WMO-ZACAS-NEXT: amoor.w.aqrl a1, a2, (a1)
+; RV32IA-WMO-ZACAS-NEXT: srl a0, a1, a0
+; RV32IA-WMO-ZACAS-NEXT: ret
+;
+; RV32IA-TSO-ZACAS-LABEL: atomicrmw_xchg_minus_1_i8_acq_rel:
+; RV32IA-TSO-ZACAS: # %bb.0:
+; RV32IA-TSO-ZACAS-NEXT: andi a1, a0, -4
+; RV32IA-TSO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-ZACAS-NEXT: li a2, 255
+; RV32IA-TSO-ZACAS-NEXT: sll a2, a2, a0
+; RV32IA-TSO-ZACAS-NEXT: amoor.w a1, a2, (a1)
+; RV32IA-TSO-ZACAS-NEXT: srl a0, a1, a0
+; RV32IA-TSO-ZACAS-NEXT: ret
+;
; RV64IA-WMO-ZACAS-LABEL: atomicrmw_xchg_minus_1_i8_acq_rel:
; RV64IA-WMO-ZACAS: # %bb.0:
; RV64IA-WMO-ZACAS-NEXT: andi a1, a0, -4
@@ -1507,6 +1981,18 @@ define i8 @atomicrmw_xchg_minus_1_i8_acq_rel(ptr %a) nounwind {
; RV64IA-TSO-ZACAS-NEXT: srlw a0, a1, a0
; RV64IA-TSO-ZACAS-NEXT: ret
;
+; RV32IA-WMO-ZABHA-LABEL: atomicrmw_xchg_minus_1_i8_acq_rel:
+; RV32IA-WMO-ZABHA: # %bb.0:
+; RV32IA-WMO-ZABHA-NEXT: li a1, -1
+; RV32IA-WMO-ZABHA-NEXT: amoswap.b.aqrl a0, a1, (a0)
+; RV32IA-WMO-ZABHA-NEXT: ret
+;
+; RV32IA-TSO-ZABHA-LABEL: atomicrmw_xchg_minus_1_i8_acq_rel:
+; RV32IA-TSO-ZABHA: # %bb.0:
+; RV32IA-TSO-ZABHA-NEXT: li a1, -1
+; RV32IA-TSO-ZABHA-NEXT: amoswap.b a0, a1, (a0)
+; RV32IA-TSO-ZABHA-NEXT: ret
+;
; RV64IA-WMO-ZABHA-LABEL: atomicrmw_xchg_minus_1_i8_acq_rel:
; RV64IA-WMO-ZABHA: # %bb.0:
; RV64IA-WMO-ZABHA-NEXT: li a1, -1
@@ -1534,25 +2020,25 @@ define i8 @atomicrmw_xchg_minus_1_i8_seq_cst(ptr %a) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
-; RV32IA-WMO-LABEL: atomicrmw_xchg_minus_1_i8_seq_cst:
-; RV32IA-WMO: # %bb.0:
-; RV32IA-WMO-NEXT: andi a1, a0, -4
-; RV32IA-WMO-NEXT: slli a0, a0, 3
-; RV32IA-WMO-NEXT: li a2, 255
-; RV32IA-WMO-NEXT: sll a2, a2, a0
-; RV32IA-WMO-NEXT: amoor.w.aqrl a1, a2, (a1)
-; RV32IA-WMO-NEXT: srl a0, a1, a0
-; RV32IA-WMO-NEXT: ret
+; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_xchg_minus_1_i8_seq_cst:
+; RV32IA-WMO-NOZACAS: # %bb.0:
+; RV32IA-WMO-NOZACAS-NEXT: andi a1, a0, -4
+; RV32IA-WMO-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-NOZACAS-NEXT: li a2, 255
+; RV32IA-WMO-NOZACAS-NEXT: sll a2, a2, a0
+; RV32IA-WMO-NOZACAS-NEXT: amoor.w.aqrl a1, a2, (a1)
+; RV32IA-WMO-NOZACAS-NEXT: srl a0, a1, a0
+; RV32IA-WMO-NOZACAS-NEXT: ret
;
-; RV32IA-TSO-LABEL: atomicrmw_xchg_minus_1_i8_seq_cst:
-; RV32IA-TSO: # %bb.0:
-; RV32IA-TSO-NEXT: andi a1, a0, -4
-; RV32IA-TSO-NEXT: slli a0, a0, 3
-; RV32IA-TSO-NEXT: li a2, 255
-; RV32IA-TSO-NEXT: sll a2, a2, a0
-; RV32IA-TSO-NEXT: amoor.w a1, a2, (a1)
-; RV32IA-TSO-NEXT: srl a0, a1, a0
-; RV32IA-TSO-NEXT: ret
+; RV32IA-TSO-NOZACAS-LABEL: atomicrmw_xchg_minus_1_i8_seq_cst:
+; RV32IA-TSO-NOZACAS: # %bb.0:
+; RV32IA-TSO-NOZACAS-NEXT: andi a1, a0, -4
+; RV32IA-TSO-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-NOZACAS-NEXT: li a2, 255
+; RV32IA-TSO-NOZACAS-NEXT: sll a2, a2, a0
+; RV32IA-TSO-NOZACAS-NEXT: amoor.w a1, a2, (a1)
+; RV32IA-TSO-NOZACAS-NEXT: srl a0, a1, a0
+; RV32IA-TSO-NOZACAS-NEXT: ret
;
; RV64I-LABEL: atomicrmw_xchg_minus_1_i8_seq_cst:
; RV64I: # %bb.0:
@@ -1585,6 +2071,26 @@ define i8 @atomicrmw_xchg_minus_1_i8_seq_cst(ptr %a) nounwind {
; RV64IA-TSO-NOZACAS-NEXT: srlw a0, a1, a0
; RV64IA-TSO-NOZACAS-NEXT: ret
;
+; RV32IA-WMO-ZACAS-LABEL: atomicrmw_xchg_minus_1_i8_seq_cst:
+; RV32IA-WMO-ZACAS: # %bb.0:
+; RV32IA-WMO-ZACAS-NEXT: andi a1, a0, -4
+; RV32IA-WMO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-ZACAS-NEXT: li a2, 255
+; RV32IA-WMO-ZACAS-NEXT: sll a2, a2, a0
+; RV32IA-WMO-ZACAS-NEXT: amoor.w.aqrl a1, a2, (a1)
+; RV32IA-WMO-ZACAS-NEXT: srl a0, a1, a0
+; RV32IA-WMO-ZACAS-NEXT: ret
+;
+; RV32IA-TSO-ZACAS-LABEL: atomicrmw_xchg_minus_1_i8_seq_cst:
+; RV32IA-TSO-ZACAS: # %bb.0:
+; RV32IA-TSO-ZACAS-NEXT: andi a1, a0, -4
+; RV32IA-TSO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-ZACAS-NEXT: li a2, 255
+; RV32IA-TSO-ZACAS-NEXT: sll a2, a2, a0
+; RV32IA-TSO-ZACAS-NEXT: amoor.w a1, a2, (a1)
+; RV32IA-TSO-ZACAS-NEXT: srl a0, a1, a0
+; RV32IA-TSO-ZACAS-NEXT: ret
+;
; RV64IA-WMO-ZACAS-LABEL: atomicrmw_xchg_minus_1_i8_seq_cst:
; RV64IA-WMO-ZACAS: # %bb.0:
; RV64IA-WMO-ZACAS-NEXT: andi a1, a0, -4
@@ -1605,6 +2111,18 @@ define i8 @atomicrmw_xchg_minus_1_i8_seq_cst(ptr %a) nounwind {
; RV64IA-TSO-ZACAS-NEXT: srlw a0, a1, a0
; RV64IA-TSO-ZACAS-NEXT: ret
;
+; RV32IA-WMO-ZABHA-LABEL: atomicrmw_xchg_minus_1_i8_seq_cst:
+; RV32IA-WMO-ZABHA: # %bb.0:
+; RV32IA-WMO-ZABHA-NEXT: li a1, -1
+; RV32IA-WMO-ZABHA-NEXT: amoswap.b.aqrl a0, a1, (a0)
+; RV32IA-WMO-ZABHA-NEXT: ret
+;
+; RV32IA-TSO-ZABHA-LABEL: atomicrmw_xchg_minus_1_i8_seq_cst:
+; RV32IA-TSO-ZABHA: # %bb.0:
+; RV32IA-TSO-ZABHA-NEXT: li a1, -1
+; RV32IA-TSO-ZABHA-NEXT: amoswap.b a0, a1, (a0)
+; RV32IA-TSO-ZABHA-NEXT: ret
+;
; RV64IA-WMO-ZABHA-LABEL: atomicrmw_xchg_minus_1_i8_seq_cst:
; RV64IA-WMO-ZABHA: # %bb.0:
; RV64IA-WMO-ZABHA-NEXT: li a1, -1
@@ -1631,25 +2149,25 @@ define i8 @atomicrmw_add_i8_monotonic(ptr %a, i8 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
-; RV32IA-LABEL: atomicrmw_add_i8_monotonic:
-; RV32IA: # %bb.0:
-; RV32IA-NEXT: andi a2, a0, -4
-; RV32IA-NEXT: slli a0, a0, 3
-; RV32IA-NEXT: li a3, 255
-; RV32IA-NEXT: zext.b a1, a1
-; RV32IA-NEXT: sll a3, a3, a0
-; RV32IA-NEXT: sll a1, a1, a0
-; RV32IA-NEXT: .LBB15_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-NEXT: lr.w a4, (a2)
-; RV32IA-NEXT: add a5, a4, a1
-; RV32IA-NEXT: xor a5, a4, a5
-; RV32IA-NEXT: and a5, a5, a3
-; RV32IA-NEXT: xor a5, a4, a5
-; RV32IA-NEXT: sc.w a5, a5, (a2)
-; RV32IA-NEXT: bnez a5, .LBB15_1
-; RV32IA-NEXT: # %bb.2:
-; RV32IA-NEXT: srl a0, a4, a0
-; RV32IA-NEXT: ret
+; RV32IA-NOZACAS-LABEL: atomicrmw_add_i8_monotonic:
+; RV32IA-NOZACAS: # %bb.0:
+; RV32IA-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-NOZACAS-NEXT: li a3, 255
+; RV32IA-NOZACAS-NEXT: zext.b a1, a1
+; RV32IA-NOZACAS-NEXT: sll a3, a3, a0
+; RV32IA-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-NOZACAS-NEXT: .LBB15_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-NOZACAS-NEXT: lr.w a4, (a2)
+; RV32IA-NOZACAS-NEXT: add a5, a4, a1
+; RV32IA-NOZACAS-NEXT: xor a5, a4, a5
+; RV32IA-NOZACAS-NEXT: and a5, a5, a3
+; RV32IA-NOZACAS-NEXT: xor a5, a4, a5
+; RV32IA-NOZACAS-NEXT: sc.w a5, a5, (a2)
+; RV32IA-NOZACAS-NEXT: bnez a5, .LBB15_1
+; RV32IA-NOZACAS-NEXT: # %bb.2:
+; RV32IA-NOZACAS-NEXT: srl a0, a4, a0
+; RV32IA-NOZACAS-NEXT: ret
;
; RV64I-LABEL: atomicrmw_add_i8_monotonic:
; RV64I: # %bb.0:
@@ -1681,6 +2199,26 @@ define i8 @atomicrmw_add_i8_monotonic(ptr %a, i8 %b) nounwind {
; RV64IA-NOZACAS-NEXT: srlw a0, a4, a0
; RV64IA-NOZACAS-NEXT: ret
;
+; RV32IA-ZACAS-LABEL: atomicrmw_add_i8_monotonic:
+; RV32IA-ZACAS: # %bb.0:
+; RV32IA-ZACAS-NEXT: andi a2, a0, -4
+; RV32IA-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-ZACAS-NEXT: li a3, 255
+; RV32IA-ZACAS-NEXT: zext.b a1, a1
+; RV32IA-ZACAS-NEXT: sll a3, a3, a0
+; RV32IA-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-ZACAS-NEXT: .LBB15_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-ZACAS-NEXT: lr.w a4, (a2)
+; RV32IA-ZACAS-NEXT: add a5, a4, a1
+; RV32IA-ZACAS-NEXT: xor a5, a4, a5
+; RV32IA-ZACAS-NEXT: and a5, a5, a3
+; RV32IA-ZACAS-NEXT: xor a5, a4, a5
+; RV32IA-ZACAS-NEXT: sc.w a5, a5, (a2)
+; RV32IA-ZACAS-NEXT: bnez a5, .LBB15_1
+; RV32IA-ZACAS-NEXT: # %bb.2:
+; RV32IA-ZACAS-NEXT: srl a0, a4, a0
+; RV32IA-ZACAS-NEXT: ret
+;
; RV64IA-ZACAS-LABEL: atomicrmw_add_i8_monotonic:
; RV64IA-ZACAS: # %bb.0:
; RV64IA-ZACAS-NEXT: andi a2, a0, -4
@@ -1701,6 +2239,16 @@ define i8 @atomicrmw_add_i8_monotonic(ptr %a, i8 %b) nounwind {
; RV64IA-ZACAS-NEXT: srlw a0, a4, a0
; RV64IA-ZACAS-NEXT: ret
;
+; RV32IA-WMO-ZABHA-LABEL: atomicrmw_add_i8_monotonic:
+; RV32IA-WMO-ZABHA: # %bb.0:
+; RV32IA-WMO-ZABHA-NEXT: amoadd.b a0, a1, (a0)
+; RV32IA-WMO-ZABHA-NEXT: ret
+;
+; RV32IA-TSO-ZABHA-LABEL: atomicrmw_add_i8_monotonic:
+; RV32IA-TSO-ZABHA: # %bb.0:
+; RV32IA-TSO-ZABHA-NEXT: amoadd.b a0, a1, (a0)
+; RV32IA-TSO-ZABHA-NEXT: ret
+;
; RV64IA-WMO-ZABHA-LABEL: atomicrmw_add_i8_monotonic:
; RV64IA-WMO-ZABHA: # %bb.0:
; RV64IA-WMO-ZABHA-NEXT: amoadd.b a0, a1, (a0)
@@ -1725,45 +2273,45 @@ define i8 @atomicrmw_add_i8_acquire(ptr %a, i8 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
-; RV32IA-WMO-LABEL: atomicrmw_add_i8_acquire:
-; RV32IA-WMO: # %bb.0:
-; RV32IA-WMO-NEXT: andi a2, a0, -4
-; RV32IA-WMO-NEXT: slli a0, a0, 3
-; RV32IA-WMO-NEXT: li a3, 255
-; RV32IA-WMO-NEXT: zext.b a1, a1
-; RV32IA-WMO-NEXT: sll a3, a3, a0
-; RV32IA-WMO-NEXT: sll a1, a1, a0
-; RV32IA-WMO-NEXT: .LBB16_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-WMO-NEXT: lr.w.aq a4, (a2)
-; RV32IA-WMO-NEXT: add a5, a4, a1
-; RV32IA-WMO-NEXT: xor a5, a4, a5
-; RV32IA-WMO-NEXT: and a5, a5, a3
-; RV32IA-WMO-NEXT: xor a5, a4, a5
-; RV32IA-WMO-NEXT: sc.w a5, a5, (a2)
-; RV32IA-WMO-NEXT: bnez a5, .LBB16_1
-; RV32IA-WMO-NEXT: # %bb.2:
-; RV32IA-WMO-NEXT: srl a0, a4, a0
-; RV32IA-WMO-NEXT: ret
+; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_add_i8_acquire:
+; RV32IA-WMO-NOZACAS: # %bb.0:
+; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-WMO-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-NOZACAS-NEXT: li a3, 255
+; RV32IA-WMO-NOZACAS-NEXT: zext.b a1, a1
+; RV32IA-WMO-NOZACAS-NEXT: sll a3, a3, a0
+; RV32IA-WMO-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-WMO-NOZACAS-NEXT: .LBB16_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-NOZACAS-NEXT: lr.w.aq a4, (a2)
+; RV32IA-WMO-NOZACAS-NEXT: add a5, a4, a1
+; RV32IA-WMO-NOZACAS-NEXT: xor a5, a4, a5
+; RV32IA-WMO-NOZACAS-NEXT: and a5, a5, a3
+; RV32IA-WMO-NOZACAS-NEXT: xor a5, a4, a5
+; RV32IA-WMO-NOZACAS-NEXT: sc.w a5, a5, (a2)
+; RV32IA-WMO-NOZACAS-NEXT: bnez a5, .LBB16_1
+; RV32IA-WMO-NOZACAS-NEXT: # %bb.2:
+; RV32IA-WMO-NOZACAS-NEXT: srl a0, a4, a0
+; RV32IA-WMO-NOZACAS-NEXT: ret
;
-; RV32IA-TSO-LABEL: atomicrmw_add_i8_acquire:
-; RV32IA-TSO: # %bb.0:
-; RV32IA-TSO-NEXT: andi a2, a0, -4
-; RV32IA-TSO-NEXT: slli a0, a0, 3
-; RV32IA-TSO-NEXT: li a3, 255
-; RV32IA-TSO-NEXT: zext.b a1, a1
-; RV32IA-TSO-NEXT: sll a3, a3, a0
-; RV32IA-TSO-NEXT: sll a1, a1, a0
-; RV32IA-TSO-NEXT: .LBB16_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-TSO-NEXT: lr.w a4, (a2)
-; RV32IA-TSO-NEXT: add a5, a4, a1
-; RV32IA-TSO-NEXT: xor a5, a4, a5
-; RV32IA-TSO-NEXT: and a5, a5, a3
-; RV32IA-TSO-NEXT: xor a5, a4, a5
-; RV32IA-TSO-NEXT: sc.w a5, a5, (a2)
-; RV32IA-TSO-NEXT: bnez a5, .LBB16_1
-; RV32IA-TSO-NEXT: # %bb.2:
-; RV32IA-TSO-NEXT: srl a0, a4, a0
-; RV32IA-TSO-NEXT: ret
+; RV32IA-TSO-NOZACAS-LABEL: atomicrmw_add_i8_acquire:
+; RV32IA-TSO-NOZACAS: # %bb.0:
+; RV32IA-TSO-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-TSO-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-NOZACAS-NEXT: li a3, 255
+; RV32IA-TSO-NOZACAS-NEXT: zext.b a1, a1
+; RV32IA-TSO-NOZACAS-NEXT: sll a3, a3, a0
+; RV32IA-TSO-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-TSO-NOZACAS-NEXT: .LBB16_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-NOZACAS-NEXT: lr.w a4, (a2)
+; RV32IA-TSO-NOZACAS-NEXT: add a5, a4, a1
+; RV32IA-TSO-NOZACAS-NEXT: xor a5, a4, a5
+; RV32IA-TSO-NOZACAS-NEXT: and a5, a5, a3
+; RV32IA-TSO-NOZACAS-NEXT: xor a5, a4, a5
+; RV32IA-TSO-NOZACAS-NEXT: sc.w a5, a5, (a2)
+; RV32IA-TSO-NOZACAS-NEXT: bnez a5, .LBB16_1
+; RV32IA-TSO-NOZACAS-NEXT: # %bb.2:
+; RV32IA-TSO-NOZACAS-NEXT: srl a0, a4, a0
+; RV32IA-TSO-NOZACAS-NEXT: ret
;
; RV64I-LABEL: atomicrmw_add_i8_acquire:
; RV64I: # %bb.0:
@@ -1815,6 +2363,46 @@ define i8 @atomicrmw_add_i8_acquire(ptr %a, i8 %b) nounwind {
; RV64IA-TSO-NOZACAS-NEXT: srlw a0, a4, a0
; RV64IA-TSO-NOZACAS-NEXT: ret
;
+; RV32IA-WMO-ZACAS-LABEL: atomicrmw_add_i8_acquire:
+; RV32IA-WMO-ZACAS: # %bb.0:
+; RV32IA-WMO-ZACAS-NEXT: andi a2, a0, -4
+; RV32IA-WMO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-ZACAS-NEXT: li a3, 255
+; RV32IA-WMO-ZACAS-NEXT: zext.b a1, a1
+; RV32IA-WMO-ZACAS-NEXT: sll a3, a3, a0
+; RV32IA-WMO-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-WMO-ZACAS-NEXT: .LBB16_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-ZACAS-NEXT: lr.w.aq a4, (a2)
+; RV32IA-WMO-ZACAS-NEXT: add a5, a4, a1
+; RV32IA-WMO-ZACAS-NEXT: xor a5, a4, a5
+; RV32IA-WMO-ZACAS-NEXT: and a5, a5, a3
+; RV32IA-WMO-ZACAS-NEXT: xor a5, a4, a5
+; RV32IA-WMO-ZACAS-NEXT: sc.w a5, a5, (a2)
+; RV32IA-WMO-ZACAS-NEXT: bnez a5, .LBB16_1
+; RV32IA-WMO-ZACAS-NEXT: # %bb.2:
+; RV32IA-WMO-ZACAS-NEXT: srl a0, a4, a0
+; RV32IA-WMO-ZACAS-NEXT: ret
+;
+; RV32IA-TSO-ZACAS-LABEL: atomicrmw_add_i8_acquire:
+; RV32IA-TSO-ZACAS: # %bb.0:
+; RV32IA-TSO-ZACAS-NEXT: andi a2, a0, -4
+; RV32IA-TSO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-ZACAS-NEXT: li a3, 255
+; RV32IA-TSO-ZACAS-NEXT: zext.b a1, a1
+; RV32IA-TSO-ZACAS-NEXT: sll a3, a3, a0
+; RV32IA-TSO-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-TSO-ZACAS-NEXT: .LBB16_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-ZACAS-NEXT: lr.w a4, (a2)
+; RV32IA-TSO-ZACAS-NEXT: add a5, a4, a1
+; RV32IA-TSO-ZACAS-NEXT: xor a5, a4, a5
+; RV32IA-TSO-ZACAS-NEXT: and a5, a5, a3
+; RV32IA-TSO-ZACAS-NEXT: xor a5, a4, a5
+; RV32IA-TSO-ZACAS-NEXT: sc.w a5, a5, (a2)
+; RV32IA-TSO-ZACAS-NEXT: bnez a5, .LBB16_1
+; RV32IA-TSO-ZACAS-NEXT: # %bb.2:
+; RV32IA-TSO-ZACAS-NEXT: srl a0, a4, a0
+; RV32IA-TSO-ZACAS-NEXT: ret
+;
; RV64IA-WMO-ZACAS-LABEL: atomicrmw_add_i8_acquire:
; RV64IA-WMO-ZACAS: # %bb.0:
; RV64IA-WMO-ZACAS-NEXT: andi a2, a0, -4
@@ -1855,6 +2443,16 @@ define i8 @atomicrmw_add_i8_acquire(ptr %a, i8 %b) nounwind {
; RV64IA-TSO-ZACAS-NEXT: srlw a0, a4, a0
; RV64IA-TSO-ZACAS-NEXT: ret
;
+; RV32IA-WMO-ZABHA-LABEL: atomicrmw_add_i8_acquire:
+; RV32IA-WMO-ZABHA: # %bb.0:
+; RV32IA-WMO-ZABHA-NEXT: amoadd.b.aq a0, a1, (a0)
+; RV32IA-WMO-ZABHA-NEXT: ret
+;
+; RV32IA-TSO-ZABHA-LABEL: atomicrmw_add_i8_acquire:
+; RV32IA-TSO-ZABHA: # %bb.0:
+; RV32IA-TSO-ZABHA-NEXT: amoadd.b a0, a1, (a0)
+; RV32IA-TSO-ZABHA-NEXT: ret
+;
; RV64IA-WMO-ZABHA-LABEL: atomicrmw_add_i8_acquire:
; RV64IA-WMO-ZABHA: # %bb.0:
; RV64IA-WMO-ZABHA-NEXT: amoadd.b.aq a0, a1, (a0)
@@ -1879,45 +2477,45 @@ define i8 @atomicrmw_add_i8_release(ptr %a, i8 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
-; RV32IA-WMO-LABEL: atomicrmw_add_i8_release:
-; RV32IA-WMO: # %bb.0:
-; RV32IA-WMO-NEXT: andi a2, a0, -4
-; RV32IA-WMO-NEXT: slli a0, a0, 3
-; RV32IA-WMO-NEXT: li a3, 255
-; RV32IA-WMO-NEXT: zext.b a1, a1
-; RV32IA-WMO-NEXT: sll a3, a3, a0
-; RV32IA-WMO-NEXT: sll a1, a1, a0
-; RV32IA-WMO-NEXT: .LBB17_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-WMO-NEXT: lr.w a4, (a2)
-; RV32IA-WMO-NEXT: add a5, a4, a1
-; RV32IA-WMO-NEXT: xor a5, a4, a5
-; RV32IA-WMO-NEXT: and a5, a5, a3
-; RV32IA-WMO-NEXT: xor a5, a4, a5
-; RV32IA-WMO-NEXT: sc.w.rl a5, a5, (a2)
-; RV32IA-WMO-NEXT: bnez a5, .LBB17_1
-; RV32IA-WMO-NEXT: # %bb.2:
-; RV32IA-WMO-NEXT: srl a0, a4, a0
-; RV32IA-WMO-NEXT: ret
+; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_add_i8_release:
+; RV32IA-WMO-NOZACAS: # %bb.0:
+; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-WMO-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-NOZACAS-NEXT: li a3, 255
+; RV32IA-WMO-NOZACAS-NEXT: zext.b a1, a1
+; RV32IA-WMO-NOZACAS-NEXT: sll a3, a3, a0
+; RV32IA-WMO-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-WMO-NOZACAS-NEXT: .LBB17_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-NOZACAS-NEXT: lr.w a4, (a2)
+; RV32IA-WMO-NOZACAS-NEXT: add a5, a4, a1
+; RV32IA-WMO-NOZACAS-NEXT: xor a5, a4, a5
+; RV32IA-WMO-NOZACAS-NEXT: and a5, a5, a3
+; RV32IA-WMO-NOZACAS-NEXT: xor a5, a4, a5
+; RV32IA-WMO-NOZACAS-NEXT: sc.w.rl a5, a5, (a2)
+; RV32IA-WMO-NOZACAS-NEXT: bnez a5, .LBB17_1
+; RV32IA-WMO-NOZACAS-NEXT: # %bb.2:
+; RV32IA-WMO-NOZACAS-NEXT: srl a0, a4, a0
+; RV32IA-WMO-NOZACAS-NEXT: ret
;
-; RV32IA-TSO-LABEL: atomicrmw_add_i8_release:
-; RV32IA-TSO: # %bb.0:
-; RV32IA-TSO-NEXT: andi a2, a0, -4
-; RV32IA-TSO-NEXT: slli a0, a0, 3
-; RV32IA-TSO-NEXT: li a3, 255
-; RV32IA-TSO-NEXT: zext.b a1, a1
-; RV32IA-TSO-NEXT: sll a3, a3, a0
-; RV32IA-TSO-NEXT: sll a1, a1, a0
-; RV32IA-TSO-NEXT: .LBB17_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-TSO-NEXT: lr.w a4, (a2)
-; RV32IA-TSO-NEXT: add a5, a4, a1
-; RV32IA-TSO-NEXT: xor a5, a4, a5
-; RV32IA-TSO-NEXT: and a5, a5, a3
-; RV32IA-TSO-NEXT: xor a5, a4, a5
-; RV32IA-TSO-NEXT: sc.w a5, a5, (a2)
-; RV32IA-TSO-NEXT: bnez a5, .LBB17_1
-; RV32IA-TSO-NEXT: # %bb.2:
-; RV32IA-TSO-NEXT: srl a0, a4, a0
-; RV32IA-TSO-NEXT: ret
+; RV32IA-TSO-NOZACAS-LABEL: atomicrmw_add_i8_release:
+; RV32IA-TSO-NOZACAS: # %bb.0:
+; RV32IA-TSO-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-TSO-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-NOZACAS-NEXT: li a3, 255
+; RV32IA-TSO-NOZACAS-NEXT: zext.b a1, a1
+; RV32IA-TSO-NOZACAS-NEXT: sll a3, a3, a0
+; RV32IA-TSO-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-TSO-NOZACAS-NEXT: .LBB17_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-NOZACAS-NEXT: lr.w a4, (a2)
+; RV32IA-TSO-NOZACAS-NEXT: add a5, a4, a1
+; RV32IA-TSO-NOZACAS-NEXT: xor a5, a4, a5
+; RV32IA-TSO-NOZACAS-NEXT: and a5, a5, a3
+; RV32IA-TSO-NOZACAS-NEXT: xor a5, a4, a5
+; RV32IA-TSO-NOZACAS-NEXT: sc.w a5, a5, (a2)
+; RV32IA-TSO-NOZACAS-NEXT: bnez a5, .LBB17_1
+; RV32IA-TSO-NOZACAS-NEXT: # %bb.2:
+; RV32IA-TSO-NOZACAS-NEXT: srl a0, a4, a0
+; RV32IA-TSO-NOZACAS-NEXT: ret
;
; RV64I-LABEL: atomicrmw_add_i8_release:
; RV64I: # %bb.0:
@@ -1969,6 +2567,46 @@ define i8 @atomicrmw_add_i8_release(ptr %a, i8 %b) nounwind {
; RV64IA-TSO-NOZACAS-NEXT: srlw a0, a4, a0
; RV64IA-TSO-NOZACAS-NEXT: ret
;
+; RV32IA-WMO-ZACAS-LABEL: atomicrmw_add_i8_release:
+; RV32IA-WMO-ZACAS: # %bb.0:
+; RV32IA-WMO-ZACAS-NEXT: andi a2, a0, -4
+; RV32IA-WMO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-ZACAS-NEXT: li a3, 255
+; RV32IA-WMO-ZACAS-NEXT: zext.b a1, a1
+; RV32IA-WMO-ZACAS-NEXT: sll a3, a3, a0
+; RV32IA-WMO-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-WMO-ZACAS-NEXT: .LBB17_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-ZACAS-NEXT: lr.w a4, (a2)
+; RV32IA-WMO-ZACAS-NEXT: add a5, a4, a1
+; RV32IA-WMO-ZACAS-NEXT: xor a5, a4, a5
+; RV32IA-WMO-ZACAS-NEXT: and a5, a5, a3
+; RV32IA-WMO-ZACAS-NEXT: xor a5, a4, a5
+; RV32IA-WMO-ZACAS-NEXT: sc.w.rl a5, a5, (a2)
+; RV32IA-WMO-ZACAS-NEXT: bnez a5, .LBB17_1
+; RV32IA-WMO-ZACAS-NEXT: # %bb.2:
+; RV32IA-WMO-ZACAS-NEXT: srl a0, a4, a0
+; RV32IA-WMO-ZACAS-NEXT: ret
+;
+; RV32IA-TSO-ZACAS-LABEL: atomicrmw_add_i8_release:
+; RV32IA-TSO-ZACAS: # %bb.0:
+; RV32IA-TSO-ZACAS-NEXT: andi a2, a0, -4
+; RV32IA-TSO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-ZACAS-NEXT: li a3, 255
+; RV32IA-TSO-ZACAS-NEXT: zext.b a1, a1
+; RV32IA-TSO-ZACAS-NEXT: sll a3, a3, a0
+; RV32IA-TSO-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-TSO-ZACAS-NEXT: .LBB17_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-ZACAS-NEXT: lr.w a4, (a2)
+; RV32IA-TSO-ZACAS-NEXT: add a5, a4, a1
+; RV32IA-TSO-ZACAS-NEXT: xor a5, a4, a5
+; RV32IA-TSO-ZACAS-NEXT: and a5, a5, a3
+; RV32IA-TSO-ZACAS-NEXT: xor a5, a4, a5
+; RV32IA-TSO-ZACAS-NEXT: sc.w a5, a5, (a2)
+; RV32IA-TSO-ZACAS-NEXT: bnez a5, .LBB17_1
+; RV32IA-TSO-ZACAS-NEXT: # %bb.2:
+; RV32IA-TSO-ZACAS-NEXT: srl a0, a4, a0
+; RV32IA-TSO-ZACAS-NEXT: ret
+;
; RV64IA-WMO-ZACAS-LABEL: atomicrmw_add_i8_release:
; RV64IA-WMO-ZACAS: # %bb.0:
; RV64IA-WMO-ZACAS-NEXT: andi a2, a0, -4
@@ -2009,6 +2647,16 @@ define i8 @atomicrmw_add_i8_release(ptr %a, i8 %b) nounwind {
; RV64IA-TSO-ZACAS-NEXT: srlw a0, a4, a0
; RV64IA-TSO-ZACAS-NEXT: ret
;
+; RV32IA-WMO-ZABHA-LABEL: atomicrmw_add_i8_release:
+; RV32IA-WMO-ZABHA: # %bb.0:
+; RV32IA-WMO-ZABHA-NEXT: amoadd.b.rl a0, a1, (a0)
+; RV32IA-WMO-ZABHA-NEXT: ret
+;
+; RV32IA-TSO-ZABHA-LABEL: atomicrmw_add_i8_release:
+; RV32IA-TSO-ZABHA: # %bb.0:
+; RV32IA-TSO-ZABHA-NEXT: amoadd.b a0, a1, (a0)
+; RV32IA-TSO-ZABHA-NEXT: ret
+;
; RV64IA-WMO-ZABHA-LABEL: atomicrmw_add_i8_release:
; RV64IA-WMO-ZABHA: # %bb.0:
; RV64IA-WMO-ZABHA-NEXT: amoadd.b.rl a0, a1, (a0)
@@ -2033,45 +2681,45 @@ define i8 @atomicrmw_add_i8_acq_rel(ptr %a, i8 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
-; RV32IA-WMO-LABEL: atomicrmw_add_i8_acq_rel:
-; RV32IA-WMO: # %bb.0:
-; RV32IA-WMO-NEXT: andi a2, a0, -4
-; RV32IA-WMO-NEXT: slli a0, a0, 3
-; RV32IA-WMO-NEXT: li a3, 255
-; RV32IA-WMO-NEXT: zext.b a1, a1
-; RV32IA-WMO-NEXT: sll a3, a3, a0
-; RV32IA-WMO-NEXT: sll a1, a1, a0
-; RV32IA-WMO-NEXT: .LBB18_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-WMO-NEXT: lr.w.aq a4, (a2)
-; RV32IA-WMO-NEXT: add a5, a4, a1
-; RV32IA-WMO-NEXT: xor a5, a4, a5
-; RV32IA-WMO-NEXT: and a5, a5, a3
-; RV32IA-WMO-NEXT: xor a5, a4, a5
-; RV32IA-WMO-NEXT: sc.w.rl a5, a5, (a2)
-; RV32IA-WMO-NEXT: bnez a5, .LBB18_1
-; RV32IA-WMO-NEXT: # %bb.2:
-; RV32IA-WMO-NEXT: srl a0, a4, a0
-; RV32IA-WMO-NEXT: ret
+; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_add_i8_acq_rel:
+; RV32IA-WMO-NOZACAS: # %bb.0:
+; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-WMO-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-NOZACAS-NEXT: li a3, 255
+; RV32IA-WMO-NOZACAS-NEXT: zext.b a1, a1
+; RV32IA-WMO-NOZACAS-NEXT: sll a3, a3, a0
+; RV32IA-WMO-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-WMO-NOZACAS-NEXT: .LBB18_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-NOZACAS-NEXT: lr.w.aq a4, (a2)
+; RV32IA-WMO-NOZACAS-NEXT: add a5, a4, a1
+; RV32IA-WMO-NOZACAS-NEXT: xor a5, a4, a5
+; RV32IA-WMO-NOZACAS-NEXT: and a5, a5, a3
+; RV32IA-WMO-NOZACAS-NEXT: xor a5, a4, a5
+; RV32IA-WMO-NOZACAS-NEXT: sc.w.rl a5, a5, (a2)
+; RV32IA-WMO-NOZACAS-NEXT: bnez a5, .LBB18_1
+; RV32IA-WMO-NOZACAS-NEXT: # %bb.2:
+; RV32IA-WMO-NOZACAS-NEXT: srl a0, a4, a0
+; RV32IA-WMO-NOZACAS-NEXT: ret
;
-; RV32IA-TSO-LABEL: atomicrmw_add_i8_acq_rel:
-; RV32IA-TSO: # %bb.0:
-; RV32IA-TSO-NEXT: andi a2, a0, -4
-; RV32IA-TSO-NEXT: slli a0, a0, 3
-; RV32IA-TSO-NEXT: li a3, 255
-; RV32IA-TSO-NEXT: zext.b a1, a1
-; RV32IA-TSO-NEXT: sll a3, a3, a0
-; RV32IA-TSO-NEXT: sll a1, a1, a0
-; RV32IA-TSO-NEXT: .LBB18_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-TSO-NEXT: lr.w a4, (a2)
-; RV32IA-TSO-NEXT: add a5, a4, a1
-; RV32IA-TSO-NEXT: xor a5, a4, a5
-; RV32IA-TSO-NEXT: and a5, a5, a3
-; RV32IA-TSO-NEXT: xor a5, a4, a5
-; RV32IA-TSO-NEXT: sc.w a5, a5, (a2)
-; RV32IA-TSO-NEXT: bnez a5, .LBB18_1
-; RV32IA-TSO-NEXT: # %bb.2:
-; RV32IA-TSO-NEXT: srl a0, a4, a0
-; RV32IA-TSO-NEXT: ret
+; RV32IA-TSO-NOZACAS-LABEL: atomicrmw_add_i8_acq_rel:
+; RV32IA-TSO-NOZACAS: # %bb.0:
+; RV32IA-TSO-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-TSO-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-NOZACAS-NEXT: li a3, 255
+; RV32IA-TSO-NOZACAS-NEXT: zext.b a1, a1
+; RV32IA-TSO-NOZACAS-NEXT: sll a3, a3, a0
+; RV32IA-TSO-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-TSO-NOZACAS-NEXT: .LBB18_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-NOZACAS-NEXT: lr.w a4, (a2)
+; RV32IA-TSO-NOZACAS-NEXT: add a5, a4, a1
+; RV32IA-TSO-NOZACAS-NEXT: xor a5, a4, a5
+; RV32IA-TSO-NOZACAS-NEXT: and a5, a5, a3
+; RV32IA-TSO-NOZACAS-NEXT: xor a5, a4, a5
+; RV32IA-TSO-NOZACAS-NEXT: sc.w a5, a5, (a2)
+; RV32IA-TSO-NOZACAS-NEXT: bnez a5, .LBB18_1
+; RV32IA-TSO-NOZACAS-NEXT: # %bb.2:
+; RV32IA-TSO-NOZACAS-NEXT: srl a0, a4, a0
+; RV32IA-TSO-NOZACAS-NEXT: ret
;
; RV64I-LABEL: atomicrmw_add_i8_acq_rel:
; RV64I: # %bb.0:
@@ -2123,6 +2771,46 @@ define i8 @atomicrmw_add_i8_acq_rel(ptr %a, i8 %b) nounwind {
; RV64IA-TSO-NOZACAS-NEXT: srlw a0, a4, a0
; RV64IA-TSO-NOZACAS-NEXT: ret
;
+; RV32IA-WMO-ZACAS-LABEL: atomicrmw_add_i8_acq_rel:
+; RV32IA-WMO-ZACAS: # %bb.0:
+; RV32IA-WMO-ZACAS-NEXT: andi a2, a0, -4
+; RV32IA-WMO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-ZACAS-NEXT: li a3, 255
+; RV32IA-WMO-ZACAS-NEXT: zext.b a1, a1
+; RV32IA-WMO-ZACAS-NEXT: sll a3, a3, a0
+; RV32IA-WMO-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-WMO-ZACAS-NEXT: .LBB18_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-ZACAS-NEXT: lr.w.aq a4, (a2)
+; RV32IA-WMO-ZACAS-NEXT: add a5, a4, a1
+; RV32IA-WMO-ZACAS-NEXT: xor a5, a4, a5
+; RV32IA-WMO-ZACAS-NEXT: and a5, a5, a3
+; RV32IA-WMO-ZACAS-NEXT: xor a5, a4, a5
+; RV32IA-WMO-ZACAS-NEXT: sc.w.rl a5, a5, (a2)
+; RV32IA-WMO-ZACAS-NEXT: bnez a5, .LBB18_1
+; RV32IA-WMO-ZACAS-NEXT: # %bb.2:
+; RV32IA-WMO-ZACAS-NEXT: srl a0, a4, a0
+; RV32IA-WMO-ZACAS-NEXT: ret
+;
+; RV32IA-TSO-ZACAS-LABEL: atomicrmw_add_i8_acq_rel:
+; RV32IA-TSO-ZACAS: # %bb.0:
+; RV32IA-TSO-ZACAS-NEXT: andi a2, a0, -4
+; RV32IA-TSO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-ZACAS-NEXT: li a3, 255
+; RV32IA-TSO-ZACAS-NEXT: zext.b a1, a1
+; RV32IA-TSO-ZACAS-NEXT: sll a3, a3, a0
+; RV32IA-TSO-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-TSO-ZACAS-NEXT: .LBB18_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-ZACAS-NEXT: lr.w a4, (a2)
+; RV32IA-TSO-ZACAS-NEXT: add a5, a4, a1
+; RV32IA-TSO-ZACAS-NEXT: xor a5, a4, a5
+; RV32IA-TSO-ZACAS-NEXT: and a5, a5, a3
+; RV32IA-TSO-ZACAS-NEXT: xor a5, a4, a5
+; RV32IA-TSO-ZACAS-NEXT: sc.w a5, a5, (a2)
+; RV32IA-TSO-ZACAS-NEXT: bnez a5, .LBB18_1
+; RV32IA-TSO-ZACAS-NEXT: # %bb.2:
+; RV32IA-TSO-ZACAS-NEXT: srl a0, a4, a0
+; RV32IA-TSO-ZACAS-NEXT: ret
+;
; RV64IA-WMO-ZACAS-LABEL: atomicrmw_add_i8_acq_rel:
; RV64IA-WMO-ZACAS: # %bb.0:
; RV64IA-WMO-ZACAS-NEXT: andi a2, a0, -4
@@ -2163,6 +2851,16 @@ define i8 @atomicrmw_add_i8_acq_rel(ptr %a, i8 %b) nounwind {
; RV64IA-TSO-ZACAS-NEXT: srlw a0, a4, a0
; RV64IA-TSO-ZACAS-NEXT: ret
;
+; RV32IA-WMO-ZABHA-LABEL: atomicrmw_add_i8_acq_rel:
+; RV32IA-WMO-ZABHA: # %bb.0:
+; RV32IA-WMO-ZABHA-NEXT: amoadd.b.aqrl a0, a1, (a0)
+; RV32IA-WMO-ZABHA-NEXT: ret
+;
+; RV32IA-TSO-ZABHA-LABEL: atomicrmw_add_i8_acq_rel:
+; RV32IA-TSO-ZABHA: # %bb.0:
+; RV32IA-TSO-ZABHA-NEXT: amoadd.b a0, a1, (a0)
+; RV32IA-TSO-ZABHA-NEXT: ret
+;
; RV64IA-WMO-ZABHA-LABEL: atomicrmw_add_i8_acq_rel:
; RV64IA-WMO-ZABHA: # %bb.0:
; RV64IA-WMO-ZABHA-NEXT: amoadd.b.aqrl a0, a1, (a0)
@@ -2187,25 +2885,25 @@ define i8 @atomicrmw_add_i8_seq_cst(ptr %a, i8 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
-; RV32IA-LABEL: atomicrmw_add_i8_seq_cst:
-; RV32IA: # %bb.0:
-; RV32IA-NEXT: andi a2, a0, -4
-; RV32IA-NEXT: slli a0, a0, 3
-; RV32IA-NEXT: li a3, 255
-; RV32IA-NEXT: zext.b a1, a1
-; RV32IA-NEXT: sll a3, a3, a0
-; RV32IA-NEXT: sll a1, a1, a0
-; RV32IA-NEXT: .LBB19_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-NEXT: lr.w.aqrl a4, (a2)
-; RV32IA-NEXT: add a5, a4, a1
-; RV32IA-NEXT: xor a5, a4, a5
-; RV32IA-NEXT: and a5, a5, a3
-; RV32IA-NEXT: xor a5, a4, a5
-; RV32IA-NEXT: sc.w.rl a5, a5, (a2)
-; RV32IA-NEXT: bnez a5, .LBB19_1
-; RV32IA-NEXT: # %bb.2:
-; RV32IA-NEXT: srl a0, a4, a0
-; RV32IA-NEXT: ret
+; RV32IA-NOZACAS-LABEL: atomicrmw_add_i8_seq_cst:
+; RV32IA-NOZACAS: # %bb.0:
+; RV32IA-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-NOZACAS-NEXT: li a3, 255
+; RV32IA-NOZACAS-NEXT: zext.b a1, a1
+; RV32IA-NOZACAS-NEXT: sll a3, a3, a0
+; RV32IA-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-NOZACAS-NEXT: .LBB19_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-NOZACAS-NEXT: lr.w.aqrl a4, (a2)
+; RV32IA-NOZACAS-NEXT: add a5, a4, a1
+; RV32IA-NOZACAS-NEXT: xor a5, a4, a5
+; RV32IA-NOZACAS-NEXT: and a5, a5, a3
+; RV32IA-NOZACAS-NEXT: xor a5, a4, a5
+; RV32IA-NOZACAS-NEXT: sc.w.rl a5, a5, (a2)
+; RV32IA-NOZACAS-NEXT: bnez a5, .LBB19_1
+; RV32IA-NOZACAS-NEXT: # %bb.2:
+; RV32IA-NOZACAS-NEXT: srl a0, a4, a0
+; RV32IA-NOZACAS-NEXT: ret
;
; RV64I-LABEL: atomicrmw_add_i8_seq_cst:
; RV64I: # %bb.0:
@@ -2237,6 +2935,26 @@ define i8 @atomicrmw_add_i8_seq_cst(ptr %a, i8 %b) nounwind {
; RV64IA-NOZACAS-NEXT: srlw a0, a4, a0
; RV64IA-NOZACAS-NEXT: ret
;
+; RV32IA-ZACAS-LABEL: atomicrmw_add_i8_seq_cst:
+; RV32IA-ZACAS: # %bb.0:
+; RV32IA-ZACAS-NEXT: andi a2, a0, -4
+; RV32IA-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-ZACAS-NEXT: li a3, 255
+; RV32IA-ZACAS-NEXT: zext.b a1, a1
+; RV32IA-ZACAS-NEXT: sll a3, a3, a0
+; RV32IA-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-ZACAS-NEXT: .LBB19_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-ZACAS-NEXT: lr.w.aqrl a4, (a2)
+; RV32IA-ZACAS-NEXT: add a5, a4, a1
+; RV32IA-ZACAS-NEXT: xor a5, a4, a5
+; RV32IA-ZACAS-NEXT: and a5, a5, a3
+; RV32IA-ZACAS-NEXT: xor a5, a4, a5
+; RV32IA-ZACAS-NEXT: sc.w.rl a5, a5, (a2)
+; RV32IA-ZACAS-NEXT: bnez a5, .LBB19_1
+; RV32IA-ZACAS-NEXT: # %bb.2:
+; RV32IA-ZACAS-NEXT: srl a0, a4, a0
+; RV32IA-ZACAS-NEXT: ret
+;
; RV64IA-ZACAS-LABEL: atomicrmw_add_i8_seq_cst:
; RV64IA-ZACAS: # %bb.0:
; RV64IA-ZACAS-NEXT: andi a2, a0, -4
@@ -2257,6 +2975,16 @@ define i8 @atomicrmw_add_i8_seq_cst(ptr %a, i8 %b) nounwind {
; RV64IA-ZACAS-NEXT: srlw a0, a4, a0
; RV64IA-ZACAS-NEXT: ret
;
+; RV32IA-WMO-ZABHA-LABEL: atomicrmw_add_i8_seq_cst:
+; RV32IA-WMO-ZABHA: # %bb.0:
+; RV32IA-WMO-ZABHA-NEXT: amoadd.b.aqrl a0, a1, (a0)
+; RV32IA-WMO-ZABHA-NEXT: ret
+;
+; RV32IA-TSO-ZABHA-LABEL: atomicrmw_add_i8_seq_cst:
+; RV32IA-TSO-ZABHA: # %bb.0:
+; RV32IA-TSO-ZABHA-NEXT: amoadd.b a0, a1, (a0)
+; RV32IA-TSO-ZABHA-NEXT: ret
+;
; RV64IA-WMO-ZABHA-LABEL: atomicrmw_add_i8_seq_cst:
; RV64IA-WMO-ZABHA: # %bb.0:
; RV64IA-WMO-ZABHA-NEXT: amoadd.b.aqrl a0, a1, (a0)
@@ -2281,25 +3009,25 @@ define i8 @atomicrmw_sub_i8_monotonic(ptr %a, i8 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
-; RV32IA-LABEL: atomicrmw_sub_i8_monotonic:
-; RV32IA: # %bb.0:
-; RV32IA-NEXT: andi a2, a0, -4
-; RV32IA-NEXT: slli a0, a0, 3
-; RV32IA-NEXT: li a3, 255
-; RV32IA-NEXT: zext.b a1, a1
-; RV32IA-NEXT: sll a3, a3, a0
-; RV32IA-NEXT: sll a1, a1, a0
-; RV32IA-NEXT: .LBB20_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-NEXT: lr.w a4, (a2)
-; RV32IA-NEXT: sub a5, a4, a1
-; RV32IA-NEXT: xor a5, a4, a5
-; RV32IA-NEXT: and a5, a5, a3
-; RV32IA-NEXT: xor a5, a4, a5
-; RV32IA-NEXT: sc.w a5, a5, (a2)
-; RV32IA-NEXT: bnez a5, .LBB20_1
-; RV32IA-NEXT: # %bb.2:
-; RV32IA-NEXT: srl a0, a4, a0
-; RV32IA-NEXT: ret
+; RV32IA-NOZACAS-LABEL: atomicrmw_sub_i8_monotonic:
+; RV32IA-NOZACAS: # %bb.0:
+; RV32IA-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-NOZACAS-NEXT: li a3, 255
+; RV32IA-NOZACAS-NEXT: zext.b a1, a1
+; RV32IA-NOZACAS-NEXT: sll a3, a3, a0
+; RV32IA-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-NOZACAS-NEXT: .LBB20_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-NOZACAS-NEXT: lr.w a4, (a2)
+; RV32IA-NOZACAS-NEXT: sub a5, a4, a1
+; RV32IA-NOZACAS-NEXT: xor a5, a4, a5
+; RV32IA-NOZACAS-NEXT: and a5, a5, a3
+; RV32IA-NOZACAS-NEXT: xor a5, a4, a5
+; RV32IA-NOZACAS-NEXT: sc.w a5, a5, (a2)
+; RV32IA-NOZACAS-NEXT: bnez a5, .LBB20_1
+; RV32IA-NOZACAS-NEXT: # %bb.2:
+; RV32IA-NOZACAS-NEXT: srl a0, a4, a0
+; RV32IA-NOZACAS-NEXT: ret
;
; RV64I-LABEL: atomicrmw_sub_i8_monotonic:
; RV64I: # %bb.0:
@@ -2331,6 +3059,26 @@ define i8 @atomicrmw_sub_i8_monotonic(ptr %a, i8 %b) nounwind {
; RV64IA-NOZACAS-NEXT: srlw a0, a4, a0
; RV64IA-NOZACAS-NEXT: ret
;
+; RV32IA-ZACAS-LABEL: atomicrmw_sub_i8_monotonic:
+; RV32IA-ZACAS: # %bb.0:
+; RV32IA-ZACAS-NEXT: andi a2, a0, -4
+; RV32IA-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-ZACAS-NEXT: li a3, 255
+; RV32IA-ZACAS-NEXT: zext.b a1, a1
+; RV32IA-ZACAS-NEXT: sll a3, a3, a0
+; RV32IA-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-ZACAS-NEXT: .LBB20_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-ZACAS-NEXT: lr.w a4, (a2)
+; RV32IA-ZACAS-NEXT: sub a5, a4, a1
+; RV32IA-ZACAS-NEXT: xor a5, a4, a5
+; RV32IA-ZACAS-NEXT: and a5, a5, a3
+; RV32IA-ZACAS-NEXT: xor a5, a4, a5
+; RV32IA-ZACAS-NEXT: sc.w a5, a5, (a2)
+; RV32IA-ZACAS-NEXT: bnez a5, .LBB20_1
+; RV32IA-ZACAS-NEXT: # %bb.2:
+; RV32IA-ZACAS-NEXT: srl a0, a4, a0
+; RV32IA-ZACAS-NEXT: ret
+;
; RV64IA-ZACAS-LABEL: atomicrmw_sub_i8_monotonic:
; RV64IA-ZACAS: # %bb.0:
; RV64IA-ZACAS-NEXT: andi a2, a0, -4
@@ -2351,6 +3099,18 @@ define i8 @atomicrmw_sub_i8_monotonic(ptr %a, i8 %b) nounwind {
; RV64IA-ZACAS-NEXT: srlw a0, a4, a0
; RV64IA-ZACAS-NEXT: ret
;
+; RV32IA-WMO-ZABHA-LABEL: atomicrmw_sub_i8_monotonic:
+; RV32IA-WMO-ZABHA: # %bb.0:
+; RV32IA-WMO-ZABHA-NEXT: neg a1, a1
+; RV32IA-WMO-ZABHA-NEXT: amoadd.b a0, a1, (a0)
+; RV32IA-WMO-ZABHA-NEXT: ret
+;
+; RV32IA-TSO-ZABHA-LABEL: atomicrmw_sub_i8_monotonic:
+; RV32IA-TSO-ZABHA: # %bb.0:
+; RV32IA-TSO-ZABHA-NEXT: neg a1, a1
+; RV32IA-TSO-ZABHA-NEXT: amoadd.b a0, a1, (a0)
+; RV32IA-TSO-ZABHA-NEXT: ret
+;
; RV64IA-WMO-ZABHA-LABEL: atomicrmw_sub_i8_monotonic:
; RV64IA-WMO-ZABHA: # %bb.0:
; RV64IA-WMO-ZABHA-NEXT: neg a1, a1
@@ -2377,45 +3137,45 @@ define i8 @atomicrmw_sub_i8_acquire(ptr %a, i8 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
-; RV32IA-WMO-LABEL: atomicrmw_sub_i8_acquire:
-; RV32IA-WMO: # %bb.0:
-; RV32IA-WMO-NEXT: andi a2, a0, -4
-; RV32IA-WMO-NEXT: slli a0, a0, 3
-; RV32IA-WMO-NEXT: li a3, 255
-; RV32IA-WMO-NEXT: zext.b a1, a1
-; RV32IA-WMO-NEXT: sll a3, a3, a0
-; RV32IA-WMO-NEXT: sll a1, a1, a0
-; RV32IA-WMO-NEXT: .LBB21_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-WMO-NEXT: lr.w.aq a4, (a2)
-; RV32IA-WMO-NEXT: sub a5, a4, a1
-; RV32IA-WMO-NEXT: xor a5, a4, a5
-; RV32IA-WMO-NEXT: and a5, a5, a3
-; RV32IA-WMO-NEXT: xor a5, a4, a5
-; RV32IA-WMO-NEXT: sc.w a5, a5, (a2)
-; RV32IA-WMO-NEXT: bnez a5, .LBB21_1
-; RV32IA-WMO-NEXT: # %bb.2:
-; RV32IA-WMO-NEXT: srl a0, a4, a0
-; RV32IA-WMO-NEXT: ret
+; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_sub_i8_acquire:
+; RV32IA-WMO-NOZACAS: # %bb.0:
+; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-WMO-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-NOZACAS-NEXT: li a3, 255
+; RV32IA-WMO-NOZACAS-NEXT: zext.b a1, a1
+; RV32IA-WMO-NOZACAS-NEXT: sll a3, a3, a0
+; RV32IA-WMO-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-WMO-NOZACAS-NEXT: .LBB21_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-NOZACAS-NEXT: lr.w.aq a4, (a2)
+; RV32IA-WMO-NOZACAS-NEXT: sub a5, a4, a1
+; RV32IA-WMO-NOZACAS-NEXT: xor a5, a4, a5
+; RV32IA-WMO-NOZACAS-NEXT: and a5, a5, a3
+; RV32IA-WMO-NOZACAS-NEXT: xor a5, a4, a5
+; RV32IA-WMO-NOZACAS-NEXT: sc.w a5, a5, (a2)
+; RV32IA-WMO-NOZACAS-NEXT: bnez a5, .LBB21_1
+; RV32IA-WMO-NOZACAS-NEXT: # %bb.2:
+; RV32IA-WMO-NOZACAS-NEXT: srl a0, a4, a0
+; RV32IA-WMO-NOZACAS-NEXT: ret
;
-; RV32IA-TSO-LABEL: atomicrmw_sub_i8_acquire:
-; RV32IA-TSO: # %bb.0:
-; RV32IA-TSO-NEXT: andi a2, a0, -4
-; RV32IA-TSO-NEXT: slli a0, a0, 3
-; RV32IA-TSO-NEXT: li a3, 255
-; RV32IA-TSO-NEXT: zext.b a1, a1
-; RV32IA-TSO-NEXT: sll a3, a3, a0
-; RV32IA-TSO-NEXT: sll a1, a1, a0
-; RV32IA-TSO-NEXT: .LBB21_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-TSO-NEXT: lr.w a4, (a2)
-; RV32IA-TSO-NEXT: sub a5, a4, a1
-; RV32IA-TSO-NEXT: xor a5, a4, a5
-; RV32IA-TSO-NEXT: and a5, a5, a3
-; RV32IA-TSO-NEXT: xor a5, a4, a5
-; RV32IA-TSO-NEXT: sc.w a5, a5, (a2)
-; RV32IA-TSO-NEXT: bnez a5, .LBB21_1
-; RV32IA-TSO-NEXT: # %bb.2:
-; RV32IA-TSO-NEXT: srl a0, a4, a0
-; RV32IA-TSO-NEXT: ret
+; RV32IA-TSO-NOZACAS-LABEL: atomicrmw_sub_i8_acquire:
+; RV32IA-TSO-NOZACAS: # %bb.0:
+; RV32IA-TSO-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-TSO-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-NOZACAS-NEXT: li a3, 255
+; RV32IA-TSO-NOZACAS-NEXT: zext.b a1, a1
+; RV32IA-TSO-NOZACAS-NEXT: sll a3, a3, a0
+; RV32IA-TSO-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-TSO-NOZACAS-NEXT: .LBB21_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-NOZACAS-NEXT: lr.w a4, (a2)
+; RV32IA-TSO-NOZACAS-NEXT: sub a5, a4, a1
+; RV32IA-TSO-NOZACAS-NEXT: xor a5, a4, a5
+; RV32IA-TSO-NOZACAS-NEXT: and a5, a5, a3
+; RV32IA-TSO-NOZACAS-NEXT: xor a5, a4, a5
+; RV32IA-TSO-NOZACAS-NEXT: sc.w a5, a5, (a2)
+; RV32IA-TSO-NOZACAS-NEXT: bnez a5, .LBB21_1
+; RV32IA-TSO-NOZACAS-NEXT: # %bb.2:
+; RV32IA-TSO-NOZACAS-NEXT: srl a0, a4, a0
+; RV32IA-TSO-NOZACAS-NEXT: ret
;
; RV64I-LABEL: atomicrmw_sub_i8_acquire:
; RV64I: # %bb.0:
@@ -2467,6 +3227,46 @@ define i8 @atomicrmw_sub_i8_acquire(ptr %a, i8 %b) nounwind {
; RV64IA-TSO-NOZACAS-NEXT: srlw a0, a4, a0
; RV64IA-TSO-NOZACAS-NEXT: ret
;
+; RV32IA-WMO-ZACAS-LABEL: atomicrmw_sub_i8_acquire:
+; RV32IA-WMO-ZACAS: # %bb.0:
+; RV32IA-WMO-ZACAS-NEXT: andi a2, a0, -4
+; RV32IA-WMO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-ZACAS-NEXT: li a3, 255
+; RV32IA-WMO-ZACAS-NEXT: zext.b a1, a1
+; RV32IA-WMO-ZACAS-NEXT: sll a3, a3, a0
+; RV32IA-WMO-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-WMO-ZACAS-NEXT: .LBB21_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-ZACAS-NEXT: lr.w.aq a4, (a2)
+; RV32IA-WMO-ZACAS-NEXT: sub a5, a4, a1
+; RV32IA-WMO-ZACAS-NEXT: xor a5, a4, a5
+; RV32IA-WMO-ZACAS-NEXT: and a5, a5, a3
+; RV32IA-WMO-ZACAS-NEXT: xor a5, a4, a5
+; RV32IA-WMO-ZACAS-NEXT: sc.w a5, a5, (a2)
+; RV32IA-WMO-ZACAS-NEXT: bnez a5, .LBB21_1
+; RV32IA-WMO-ZACAS-NEXT: # %bb.2:
+; RV32IA-WMO-ZACAS-NEXT: srl a0, a4, a0
+; RV32IA-WMO-ZACAS-NEXT: ret
+;
+; RV32IA-TSO-ZACAS-LABEL: atomicrmw_sub_i8_acquire:
+; RV32IA-TSO-ZACAS: # %bb.0:
+; RV32IA-TSO-ZACAS-NEXT: andi a2, a0, -4
+; RV32IA-TSO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-ZACAS-NEXT: li a3, 255
+; RV32IA-TSO-ZACAS-NEXT: zext.b a1, a1
+; RV32IA-TSO-ZACAS-NEXT: sll a3, a3, a0
+; RV32IA-TSO-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-TSO-ZACAS-NEXT: .LBB21_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-ZACAS-NEXT: lr.w a4, (a2)
+; RV32IA-TSO-ZACAS-NEXT: sub a5, a4, a1
+; RV32IA-TSO-ZACAS-NEXT: xor a5, a4, a5
+; RV32IA-TSO-ZACAS-NEXT: and a5, a5, a3
+; RV32IA-TSO-ZACAS-NEXT: xor a5, a4, a5
+; RV32IA-TSO-ZACAS-NEXT: sc.w a5, a5, (a2)
+; RV32IA-TSO-ZACAS-NEXT: bnez a5, .LBB21_1
+; RV32IA-TSO-ZACAS-NEXT: # %bb.2:
+; RV32IA-TSO-ZACAS-NEXT: srl a0, a4, a0
+; RV32IA-TSO-ZACAS-NEXT: ret
+;
; RV64IA-WMO-ZACAS-LABEL: atomicrmw_sub_i8_acquire:
; RV64IA-WMO-ZACAS: # %bb.0:
; RV64IA-WMO-ZACAS-NEXT: andi a2, a0, -4
@@ -2507,6 +3307,18 @@ define i8 @atomicrmw_sub_i8_acquire(ptr %a, i8 %b) nounwind {
; RV64IA-TSO-ZACAS-NEXT: srlw a0, a4, a0
; RV64IA-TSO-ZACAS-NEXT: ret
;
+; RV32IA-WMO-ZABHA-LABEL: atomicrmw_sub_i8_acquire:
+; RV32IA-WMO-ZABHA: # %bb.0:
+; RV32IA-WMO-ZABHA-NEXT: neg a1, a1
+; RV32IA-WMO-ZABHA-NEXT: amoadd.b.aq a0, a1, (a0)
+; RV32IA-WMO-ZABHA-NEXT: ret
+;
+; RV32IA-TSO-ZABHA-LABEL: atomicrmw_sub_i8_acquire:
+; RV32IA-TSO-ZABHA: # %bb.0:
+; RV32IA-TSO-ZABHA-NEXT: neg a1, a1
+; RV32IA-TSO-ZABHA-NEXT: amoadd.b a0, a1, (a0)
+; RV32IA-TSO-ZABHA-NEXT: ret
+;
; RV64IA-WMO-ZABHA-LABEL: atomicrmw_sub_i8_acquire:
; RV64IA-WMO-ZABHA: # %bb.0:
; RV64IA-WMO-ZABHA-NEXT: neg a1, a1
@@ -2533,45 +3345,45 @@ define i8 @atomicrmw_sub_i8_release(ptr %a, i8 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
-; RV32IA-WMO-LABEL: atomicrmw_sub_i8_release:
-; RV32IA-WMO: # %bb.0:
-; RV32IA-WMO-NEXT: andi a2, a0, -4
-; RV32IA-WMO-NEXT: slli a0, a0, 3
-; RV32IA-WMO-NEXT: li a3, 255
-; RV32IA-WMO-NEXT: zext.b a1, a1
-; RV32IA-WMO-NEXT: sll a3, a3, a0
-; RV32IA-WMO-NEXT: sll a1, a1, a0
-; RV32IA-WMO-NEXT: .LBB22_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-WMO-NEXT: lr.w a4, (a2)
-; RV32IA-WMO-NEXT: sub a5, a4, a1
-; RV32IA-WMO-NEXT: xor a5, a4, a5
-; RV32IA-WMO-NEXT: and a5, a5, a3
-; RV32IA-WMO-NEXT: xor a5, a4, a5
-; RV32IA-WMO-NEXT: sc.w.rl a5, a5, (a2)
-; RV32IA-WMO-NEXT: bnez a5, .LBB22_1
-; RV32IA-WMO-NEXT: # %bb.2:
-; RV32IA-WMO-NEXT: srl a0, a4, a0
-; RV32IA-WMO-NEXT: ret
+; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_sub_i8_release:
+; RV32IA-WMO-NOZACAS: # %bb.0:
+; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-WMO-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-NOZACAS-NEXT: li a3, 255
+; RV32IA-WMO-NOZACAS-NEXT: zext.b a1, a1
+; RV32IA-WMO-NOZACAS-NEXT: sll a3, a3, a0
+; RV32IA-WMO-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-WMO-NOZACAS-NEXT: .LBB22_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-NOZACAS-NEXT: lr.w a4, (a2)
+; RV32IA-WMO-NOZACAS-NEXT: sub a5, a4, a1
+; RV32IA-WMO-NOZACAS-NEXT: xor a5, a4, a5
+; RV32IA-WMO-NOZACAS-NEXT: and a5, a5, a3
+; RV32IA-WMO-NOZACAS-NEXT: xor a5, a4, a5
+; RV32IA-WMO-NOZACAS-NEXT: sc.w.rl a5, a5, (a2)
+; RV32IA-WMO-NOZACAS-NEXT: bnez a5, .LBB22_1
+; RV32IA-WMO-NOZACAS-NEXT: # %bb.2:
+; RV32IA-WMO-NOZACAS-NEXT: srl a0, a4, a0
+; RV32IA-WMO-NOZACAS-NEXT: ret
;
-; RV32IA-TSO-LABEL: atomicrmw_sub_i8_release:
-; RV32IA-TSO: # %bb.0:
-; RV32IA-TSO-NEXT: andi a2, a0, -4
-; RV32IA-TSO-NEXT: slli a0, a0, 3
-; RV32IA-TSO-NEXT: li a3, 255
-; RV32IA-TSO-NEXT: zext.b a1, a1
-; RV32IA-TSO-NEXT: sll a3, a3, a0
-; RV32IA-TSO-NEXT: sll a1, a1, a0
-; RV32IA-TSO-NEXT: .LBB22_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-TSO-NEXT: lr.w a4, (a2)
-; RV32IA-TSO-NEXT: sub a5, a4, a1
-; RV32IA-TSO-NEXT: xor a5, a4, a5
-; RV32IA-TSO-NEXT: and a5, a5, a3
-; RV32IA-TSO-NEXT: xor a5, a4, a5
-; RV32IA-TSO-NEXT: sc.w a5, a5, (a2)
-; RV32IA-TSO-NEXT: bnez a5, .LBB22_1
-; RV32IA-TSO-NEXT: # %bb.2:
-; RV32IA-TSO-NEXT: srl a0, a4, a0
-; RV32IA-TSO-NEXT: ret
+; RV32IA-TSO-NOZACAS-LABEL: atomicrmw_sub_i8_release:
+; RV32IA-TSO-NOZACAS: # %bb.0:
+; RV32IA-TSO-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-TSO-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-NOZACAS-NEXT: li a3, 255
+; RV32IA-TSO-NOZACAS-NEXT: zext.b a1, a1
+; RV32IA-TSO-NOZACAS-NEXT: sll a3, a3, a0
+; RV32IA-TSO-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-TSO-NOZACAS-NEXT: .LBB22_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-NOZACAS-NEXT: lr.w a4, (a2)
+; RV32IA-TSO-NOZACAS-NEXT: sub a5, a4, a1
+; RV32IA-TSO-NOZACAS-NEXT: xor a5, a4, a5
+; RV32IA-TSO-NOZACAS-NEXT: and a5, a5, a3
+; RV32IA-TSO-NOZACAS-NEXT: xor a5, a4, a5
+; RV32IA-TSO-NOZACAS-NEXT: sc.w a5, a5, (a2)
+; RV32IA-TSO-NOZACAS-NEXT: bnez a5, .LBB22_1
+; RV32IA-TSO-NOZACAS-NEXT: # %bb.2:
+; RV32IA-TSO-NOZACAS-NEXT: srl a0, a4, a0
+; RV32IA-TSO-NOZACAS-NEXT: ret
;
; RV64I-LABEL: atomicrmw_sub_i8_release:
; RV64I: # %bb.0:
@@ -2623,6 +3435,46 @@ define i8 @atomicrmw_sub_i8_release(ptr %a, i8 %b) nounwind {
; RV64IA-TSO-NOZACAS-NEXT: srlw a0, a4, a0
; RV64IA-TSO-NOZACAS-NEXT: ret
;
+; RV32IA-WMO-ZACAS-LABEL: atomicrmw_sub_i8_release:
+; RV32IA-WMO-ZACAS: # %bb.0:
+; RV32IA-WMO-ZACAS-NEXT: andi a2, a0, -4
+; RV32IA-WMO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-ZACAS-NEXT: li a3, 255
+; RV32IA-WMO-ZACAS-NEXT: zext.b a1, a1
+; RV32IA-WMO-ZACAS-NEXT: sll a3, a3, a0
+; RV32IA-WMO-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-WMO-ZACAS-NEXT: .LBB22_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-ZACAS-NEXT: lr.w a4, (a2)
+; RV32IA-WMO-ZACAS-NEXT: sub a5, a4, a1
+; RV32IA-WMO-ZACAS-NEXT: xor a5, a4, a5
+; RV32IA-WMO-ZACAS-NEXT: and a5, a5, a3
+; RV32IA-WMO-ZACAS-NEXT: xor a5, a4, a5
+; RV32IA-WMO-ZACAS-NEXT: sc.w.rl a5, a5, (a2)
+; RV32IA-WMO-ZACAS-NEXT: bnez a5, .LBB22_1
+; RV32IA-WMO-ZACAS-NEXT: # %bb.2:
+; RV32IA-WMO-ZACAS-NEXT: srl a0, a4, a0
+; RV32IA-WMO-ZACAS-NEXT: ret
+;
+; RV32IA-TSO-ZACAS-LABEL: atomicrmw_sub_i8_release:
+; RV32IA-TSO-ZACAS: # %bb.0:
+; RV32IA-TSO-ZACAS-NEXT: andi a2, a0, -4
+; RV32IA-TSO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-ZACAS-NEXT: li a3, 255
+; RV32IA-TSO-ZACAS-NEXT: zext.b a1, a1
+; RV32IA-TSO-ZACAS-NEXT: sll a3, a3, a0
+; RV32IA-TSO-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-TSO-ZACAS-NEXT: .LBB22_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-ZACAS-NEXT: lr.w a4, (a2)
+; RV32IA-TSO-ZACAS-NEXT: sub a5, a4, a1
+; RV32IA-TSO-ZACAS-NEXT: xor a5, a4, a5
+; RV32IA-TSO-ZACAS-NEXT: and a5, a5, a3
+; RV32IA-TSO-ZACAS-NEXT: xor a5, a4, a5
+; RV32IA-TSO-ZACAS-NEXT: sc.w a5, a5, (a2)
+; RV32IA-TSO-ZACAS-NEXT: bnez a5, .LBB22_1
+; RV32IA-TSO-ZACAS-NEXT: # %bb.2:
+; RV32IA-TSO-ZACAS-NEXT: srl a0, a4, a0
+; RV32IA-TSO-ZACAS-NEXT: ret
+;
; RV64IA-WMO-ZACAS-LABEL: atomicrmw_sub_i8_release:
; RV64IA-WMO-ZACAS: # %bb.0:
; RV64IA-WMO-ZACAS-NEXT: andi a2, a0, -4
@@ -2663,6 +3515,18 @@ define i8 @atomicrmw_sub_i8_release(ptr %a, i8 %b) nounwind {
; RV64IA-TSO-ZACAS-NEXT: srlw a0, a4, a0
; RV64IA-TSO-ZACAS-NEXT: ret
;
+; RV32IA-WMO-ZABHA-LABEL: atomicrmw_sub_i8_release:
+; RV32IA-WMO-ZABHA: # %bb.0:
+; RV32IA-WMO-ZABHA-NEXT: neg a1, a1
+; RV32IA-WMO-ZABHA-NEXT: amoadd.b.rl a0, a1, (a0)
+; RV32IA-WMO-ZABHA-NEXT: ret
+;
+; RV32IA-TSO-ZABHA-LABEL: atomicrmw_sub_i8_release:
+; RV32IA-TSO-ZABHA: # %bb.0:
+; RV32IA-TSO-ZABHA-NEXT: neg a1, a1
+; RV32IA-TSO-ZABHA-NEXT: amoadd.b a0, a1, (a0)
+; RV32IA-TSO-ZABHA-NEXT: ret
+;
; RV64IA-WMO-ZABHA-LABEL: atomicrmw_sub_i8_release:
; RV64IA-WMO-ZABHA: # %bb.0:
; RV64IA-WMO-ZABHA-NEXT: neg a1, a1
@@ -2689,45 +3553,45 @@ define i8 @atomicrmw_sub_i8_acq_rel(ptr %a, i8 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
-; RV32IA-WMO-LABEL: atomicrmw_sub_i8_acq_rel:
-; RV32IA-WMO: # %bb.0:
-; RV32IA-WMO-NEXT: andi a2, a0, -4
-; RV32IA-WMO-NEXT: slli a0, a0, 3
-; RV32IA-WMO-NEXT: li a3, 255
-; RV32IA-WMO-NEXT: zext.b a1, a1
-; RV32IA-WMO-NEXT: sll a3, a3, a0
-; RV32IA-WMO-NEXT: sll a1, a1, a0
-; RV32IA-WMO-NEXT: .LBB23_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-WMO-NEXT: lr.w.aq a4, (a2)
-; RV32IA-WMO-NEXT: sub a5, a4, a1
-; RV32IA-WMO-NEXT: xor a5, a4, a5
-; RV32IA-WMO-NEXT: and a5, a5, a3
-; RV32IA-WMO-NEXT: xor a5, a4, a5
-; RV32IA-WMO-NEXT: sc.w.rl a5, a5, (a2)
-; RV32IA-WMO-NEXT: bnez a5, .LBB23_1
-; RV32IA-WMO-NEXT: # %bb.2:
-; RV32IA-WMO-NEXT: srl a0, a4, a0
-; RV32IA-WMO-NEXT: ret
+; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_sub_i8_acq_rel:
+; RV32IA-WMO-NOZACAS: # %bb.0:
+; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-WMO-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-NOZACAS-NEXT: li a3, 255
+; RV32IA-WMO-NOZACAS-NEXT: zext.b a1, a1
+; RV32IA-WMO-NOZACAS-NEXT: sll a3, a3, a0
+; RV32IA-WMO-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-WMO-NOZACAS-NEXT: .LBB23_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-NOZACAS-NEXT: lr.w.aq a4, (a2)
+; RV32IA-WMO-NOZACAS-NEXT: sub a5, a4, a1
+; RV32IA-WMO-NOZACAS-NEXT: xor a5, a4, a5
+; RV32IA-WMO-NOZACAS-NEXT: and a5, a5, a3
+; RV32IA-WMO-NOZACAS-NEXT: xor a5, a4, a5
+; RV32IA-WMO-NOZACAS-NEXT: sc.w.rl a5, a5, (a2)
+; RV32IA-WMO-NOZACAS-NEXT: bnez a5, .LBB23_1
+; RV32IA-WMO-NOZACAS-NEXT: # %bb.2:
+; RV32IA-WMO-NOZACAS-NEXT: srl a0, a4, a0
+; RV32IA-WMO-NOZACAS-NEXT: ret
;
-; RV32IA-TSO-LABEL: atomicrmw_sub_i8_acq_rel:
-; RV32IA-TSO: # %bb.0:
-; RV32IA-TSO-NEXT: andi a2, a0, -4
-; RV32IA-TSO-NEXT: slli a0, a0, 3
-; RV32IA-TSO-NEXT: li a3, 255
-; RV32IA-TSO-NEXT: zext.b a1, a1
-; RV32IA-TSO-NEXT: sll a3, a3, a0
-; RV32IA-TSO-NEXT: sll a1, a1, a0
-; RV32IA-TSO-NEXT: .LBB23_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-TSO-NEXT: lr.w a4, (a2)
-; RV32IA-TSO-NEXT: sub a5, a4, a1
-; RV32IA-TSO-NEXT: xor a5, a4, a5
-; RV32IA-TSO-NEXT: and a5, a5, a3
-; RV32IA-TSO-NEXT: xor a5, a4, a5
-; RV32IA-TSO-NEXT: sc.w a5, a5, (a2)
-; RV32IA-TSO-NEXT: bnez a5, .LBB23_1
-; RV32IA-TSO-NEXT: # %bb.2:
-; RV32IA-TSO-NEXT: srl a0, a4, a0
-; RV32IA-TSO-NEXT: ret
+; RV32IA-TSO-NOZACAS-LABEL: atomicrmw_sub_i8_acq_rel:
+; RV32IA-TSO-NOZACAS: # %bb.0:
+; RV32IA-TSO-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-TSO-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-NOZACAS-NEXT: li a3, 255
+; RV32IA-TSO-NOZACAS-NEXT: zext.b a1, a1
+; RV32IA-TSO-NOZACAS-NEXT: sll a3, a3, a0
+; RV32IA-TSO-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-TSO-NOZACAS-NEXT: .LBB23_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-NOZACAS-NEXT: lr.w a4, (a2)
+; RV32IA-TSO-NOZACAS-NEXT: sub a5, a4, a1
+; RV32IA-TSO-NOZACAS-NEXT: xor a5, a4, a5
+; RV32IA-TSO-NOZACAS-NEXT: and a5, a5, a3
+; RV32IA-TSO-NOZACAS-NEXT: xor a5, a4, a5
+; RV32IA-TSO-NOZACAS-NEXT: sc.w a5, a5, (a2)
+; RV32IA-TSO-NOZACAS-NEXT: bnez a5, .LBB23_1
+; RV32IA-TSO-NOZACAS-NEXT: # %bb.2:
+; RV32IA-TSO-NOZACAS-NEXT: srl a0, a4, a0
+; RV32IA-TSO-NOZACAS-NEXT: ret
;
; RV64I-LABEL: atomicrmw_sub_i8_acq_rel:
; RV64I: # %bb.0:
@@ -2779,6 +3643,46 @@ define i8 @atomicrmw_sub_i8_acq_rel(ptr %a, i8 %b) nounwind {
; RV64IA-TSO-NOZACAS-NEXT: srlw a0, a4, a0
; RV64IA-TSO-NOZACAS-NEXT: ret
;
+; RV32IA-WMO-ZACAS-LABEL: atomicrmw_sub_i8_acq_rel:
+; RV32IA-WMO-ZACAS: # %bb.0:
+; RV32IA-WMO-ZACAS-NEXT: andi a2, a0, -4
+; RV32IA-WMO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-ZACAS-NEXT: li a3, 255
+; RV32IA-WMO-ZACAS-NEXT: zext.b a1, a1
+; RV32IA-WMO-ZACAS-NEXT: sll a3, a3, a0
+; RV32IA-WMO-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-WMO-ZACAS-NEXT: .LBB23_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-ZACAS-NEXT: lr.w.aq a4, (a2)
+; RV32IA-WMO-ZACAS-NEXT: sub a5, a4, a1
+; RV32IA-WMO-ZACAS-NEXT: xor a5, a4, a5
+; RV32IA-WMO-ZACAS-NEXT: and a5, a5, a3
+; RV32IA-WMO-ZACAS-NEXT: xor a5, a4, a5
+; RV32IA-WMO-ZACAS-NEXT: sc.w.rl a5, a5, (a2)
+; RV32IA-WMO-ZACAS-NEXT: bnez a5, .LBB23_1
+; RV32IA-WMO-ZACAS-NEXT: # %bb.2:
+; RV32IA-WMO-ZACAS-NEXT: srl a0, a4, a0
+; RV32IA-WMO-ZACAS-NEXT: ret
+;
+; RV32IA-TSO-ZACAS-LABEL: atomicrmw_sub_i8_acq_rel:
+; RV32IA-TSO-ZACAS: # %bb.0:
+; RV32IA-TSO-ZACAS-NEXT: andi a2, a0, -4
+; RV32IA-TSO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-ZACAS-NEXT: li a3, 255
+; RV32IA-TSO-ZACAS-NEXT: zext.b a1, a1
+; RV32IA-TSO-ZACAS-NEXT: sll a3, a3, a0
+; RV32IA-TSO-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-TSO-ZACAS-NEXT: .LBB23_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-ZACAS-NEXT: lr.w a4, (a2)
+; RV32IA-TSO-ZACAS-NEXT: sub a5, a4, a1
+; RV32IA-TSO-ZACAS-NEXT: xor a5, a4, a5
+; RV32IA-TSO-ZACAS-NEXT: and a5, a5, a3
+; RV32IA-TSO-ZACAS-NEXT: xor a5, a4, a5
+; RV32IA-TSO-ZACAS-NEXT: sc.w a5, a5, (a2)
+; RV32IA-TSO-ZACAS-NEXT: bnez a5, .LBB23_1
+; RV32IA-TSO-ZACAS-NEXT: # %bb.2:
+; RV32IA-TSO-ZACAS-NEXT: srl a0, a4, a0
+; RV32IA-TSO-ZACAS-NEXT: ret
+;
; RV64IA-WMO-ZACAS-LABEL: atomicrmw_sub_i8_acq_rel:
; RV64IA-WMO-ZACAS: # %bb.0:
; RV64IA-WMO-ZACAS-NEXT: andi a2, a0, -4
@@ -2819,6 +3723,18 @@ define i8 @atomicrmw_sub_i8_acq_rel(ptr %a, i8 %b) nounwind {
; RV64IA-TSO-ZACAS-NEXT: srlw a0, a4, a0
; RV64IA-TSO-ZACAS-NEXT: ret
;
+; RV32IA-WMO-ZABHA-LABEL: atomicrmw_sub_i8_acq_rel:
+; RV32IA-WMO-ZABHA: # %bb.0:
+; RV32IA-WMO-ZABHA-NEXT: neg a1, a1
+; RV32IA-WMO-ZABHA-NEXT: amoadd.b.aqrl a0, a1, (a0)
+; RV32IA-WMO-ZABHA-NEXT: ret
+;
+; RV32IA-TSO-ZABHA-LABEL: atomicrmw_sub_i8_acq_rel:
+; RV32IA-TSO-ZABHA: # %bb.0:
+; RV32IA-TSO-ZABHA-NEXT: neg a1, a1
+; RV32IA-TSO-ZABHA-NEXT: amoadd.b a0, a1, (a0)
+; RV32IA-TSO-ZABHA-NEXT: ret
+;
; RV64IA-WMO-ZABHA-LABEL: atomicrmw_sub_i8_acq_rel:
; RV64IA-WMO-ZABHA: # %bb.0:
; RV64IA-WMO-ZABHA-NEXT: neg a1, a1
@@ -2845,25 +3761,25 @@ define i8 @atomicrmw_sub_i8_seq_cst(ptr %a, i8 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
-; RV32IA-LABEL: atomicrmw_sub_i8_seq_cst:
-; RV32IA: # %bb.0:
-; RV32IA-NEXT: andi a2, a0, -4
-; RV32IA-NEXT: slli a0, a0, 3
-; RV32IA-NEXT: li a3, 255
-; RV32IA-NEXT: zext.b a1, a1
-; RV32IA-NEXT: sll a3, a3, a0
-; RV32IA-NEXT: sll a1, a1, a0
-; RV32IA-NEXT: .LBB24_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-NEXT: lr.w.aqrl a4, (a2)
-; RV32IA-NEXT: sub a5, a4, a1
-; RV32IA-NEXT: xor a5, a4, a5
-; RV32IA-NEXT: and a5, a5, a3
-; RV32IA-NEXT: xor a5, a4, a5
-; RV32IA-NEXT: sc.w.rl a5, a5, (a2)
-; RV32IA-NEXT: bnez a5, .LBB24_1
-; RV32IA-NEXT: # %bb.2:
-; RV32IA-NEXT: srl a0, a4, a0
-; RV32IA-NEXT: ret
+; RV32IA-NOZACAS-LABEL: atomicrmw_sub_i8_seq_cst:
+; RV32IA-NOZACAS: # %bb.0:
+; RV32IA-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-NOZACAS-NEXT: li a3, 255
+; RV32IA-NOZACAS-NEXT: zext.b a1, a1
+; RV32IA-NOZACAS-NEXT: sll a3, a3, a0
+; RV32IA-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-NOZACAS-NEXT: .LBB24_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-NOZACAS-NEXT: lr.w.aqrl a4, (a2)
+; RV32IA-NOZACAS-NEXT: sub a5, a4, a1
+; RV32IA-NOZACAS-NEXT: xor a5, a4, a5
+; RV32IA-NOZACAS-NEXT: and a5, a5, a3
+; RV32IA-NOZACAS-NEXT: xor a5, a4, a5
+; RV32IA-NOZACAS-NEXT: sc.w.rl a5, a5, (a2)
+; RV32IA-NOZACAS-NEXT: bnez a5, .LBB24_1
+; RV32IA-NOZACAS-NEXT: # %bb.2:
+; RV32IA-NOZACAS-NEXT: srl a0, a4, a0
+; RV32IA-NOZACAS-NEXT: ret
;
; RV64I-LABEL: atomicrmw_sub_i8_seq_cst:
; RV64I: # %bb.0:
@@ -2895,6 +3811,26 @@ define i8 @atomicrmw_sub_i8_seq_cst(ptr %a, i8 %b) nounwind {
; RV64IA-NOZACAS-NEXT: srlw a0, a4, a0
; RV64IA-NOZACAS-NEXT: ret
;
+; RV32IA-ZACAS-LABEL: atomicrmw_sub_i8_seq_cst:
+; RV32IA-ZACAS: # %bb.0:
+; RV32IA-ZACAS-NEXT: andi a2, a0, -4
+; RV32IA-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-ZACAS-NEXT: li a3, 255
+; RV32IA-ZACAS-NEXT: zext.b a1, a1
+; RV32IA-ZACAS-NEXT: sll a3, a3, a0
+; RV32IA-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-ZACAS-NEXT: .LBB24_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-ZACAS-NEXT: lr.w.aqrl a4, (a2)
+; RV32IA-ZACAS-NEXT: sub a5, a4, a1
+; RV32IA-ZACAS-NEXT: xor a5, a4, a5
+; RV32IA-ZACAS-NEXT: and a5, a5, a3
+; RV32IA-ZACAS-NEXT: xor a5, a4, a5
+; RV32IA-ZACAS-NEXT: sc.w.rl a5, a5, (a2)
+; RV32IA-ZACAS-NEXT: bnez a5, .LBB24_1
+; RV32IA-ZACAS-NEXT: # %bb.2:
+; RV32IA-ZACAS-NEXT: srl a0, a4, a0
+; RV32IA-ZACAS-NEXT: ret
+;
; RV64IA-ZACAS-LABEL: atomicrmw_sub_i8_seq_cst:
; RV64IA-ZACAS: # %bb.0:
; RV64IA-ZACAS-NEXT: andi a2, a0, -4
@@ -2915,6 +3851,18 @@ define i8 @atomicrmw_sub_i8_seq_cst(ptr %a, i8 %b) nounwind {
; RV64IA-ZACAS-NEXT: srlw a0, a4, a0
; RV64IA-ZACAS-NEXT: ret
;
+; RV32IA-WMO-ZABHA-LABEL: atomicrmw_sub_i8_seq_cst:
+; RV32IA-WMO-ZABHA: # %bb.0:
+; RV32IA-WMO-ZABHA-NEXT: neg a1, a1
+; RV32IA-WMO-ZABHA-NEXT: amoadd.b.aqrl a0, a1, (a0)
+; RV32IA-WMO-ZABHA-NEXT: ret
+;
+; RV32IA-TSO-ZABHA-LABEL: atomicrmw_sub_i8_seq_cst:
+; RV32IA-TSO-ZABHA: # %bb.0:
+; RV32IA-TSO-ZABHA-NEXT: neg a1, a1
+; RV32IA-TSO-ZABHA-NEXT: amoadd.b a0, a1, (a0)
+; RV32IA-TSO-ZABHA-NEXT: ret
+;
; RV64IA-WMO-ZABHA-LABEL: atomicrmw_sub_i8_seq_cst:
; RV64IA-WMO-ZABHA: # %bb.0:
; RV64IA-WMO-ZABHA-NEXT: neg a1, a1
@@ -2941,19 +3889,19 @@ define i8 @atomicrmw_and_i8_monotonic(ptr %a, i8 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
-; RV32IA-LABEL: atomicrmw_and_i8_monotonic:
-; RV32IA: # %bb.0:
-; RV32IA-NEXT: andi a2, a0, -4
-; RV32IA-NEXT: slli a0, a0, 3
-; RV32IA-NEXT: li a3, 255
-; RV32IA-NEXT: zext.b a1, a1
-; RV32IA-NEXT: sll a3, a3, a0
-; RV32IA-NEXT: not a3, a3
-; RV32IA-NEXT: sll a1, a1, a0
-; RV32IA-NEXT: or a1, a1, a3
-; RV32IA-NEXT: amoand.w a1, a1, (a2)
-; RV32IA-NEXT: srl a0, a1, a0
-; RV32IA-NEXT: ret
+; RV32IA-NOZACAS-LABEL: atomicrmw_and_i8_monotonic:
+; RV32IA-NOZACAS: # %bb.0:
+; RV32IA-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-NOZACAS-NEXT: li a3, 255
+; RV32IA-NOZACAS-NEXT: zext.b a1, a1
+; RV32IA-NOZACAS-NEXT: sll a3, a3, a0
+; RV32IA-NOZACAS-NEXT: not a3, a3
+; RV32IA-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-NOZACAS-NEXT: or a1, a1, a3
+; RV32IA-NOZACAS-NEXT: amoand.w a1, a1, (a2)
+; RV32IA-NOZACAS-NEXT: srl a0, a1, a0
+; RV32IA-NOZACAS-NEXT: ret
;
; RV64I-LABEL: atomicrmw_and_i8_monotonic:
; RV64I: # %bb.0:
@@ -2979,6 +3927,20 @@ define i8 @atomicrmw_and_i8_monotonic(ptr %a, i8 %b) nounwind {
; RV64IA-NOZACAS-NEXT: srlw a0, a1, a0
; RV64IA-NOZACAS-NEXT: ret
;
+; RV32IA-ZACAS-LABEL: atomicrmw_and_i8_monotonic:
+; RV32IA-ZACAS: # %bb.0:
+; RV32IA-ZACAS-NEXT: andi a2, a0, -4
+; RV32IA-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-ZACAS-NEXT: li a3, 255
+; RV32IA-ZACAS-NEXT: zext.b a1, a1
+; RV32IA-ZACAS-NEXT: sll a3, a3, a0
+; RV32IA-ZACAS-NEXT: not a3, a3
+; RV32IA-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-ZACAS-NEXT: or a1, a1, a3
+; RV32IA-ZACAS-NEXT: amoand.w a1, a1, (a2)
+; RV32IA-ZACAS-NEXT: srl a0, a1, a0
+; RV32IA-ZACAS-NEXT: ret
+;
; RV64IA-ZACAS-LABEL: atomicrmw_and_i8_monotonic:
; RV64IA-ZACAS: # %bb.0:
; RV64IA-ZACAS-NEXT: andi a2, a0, -4
@@ -2993,6 +3955,16 @@ define i8 @atomicrmw_and_i8_monotonic(ptr %a, i8 %b) nounwind {
; RV64IA-ZACAS-NEXT: srlw a0, a1, a0
; RV64IA-ZACAS-NEXT: ret
;
+; RV32IA-WMO-ZABHA-LABEL: atomicrmw_and_i8_monotonic:
+; RV32IA-WMO-ZABHA: # %bb.0:
+; RV32IA-WMO-ZABHA-NEXT: amoand.b a0, a1, (a0)
+; RV32IA-WMO-ZABHA-NEXT: ret
+;
+; RV32IA-TSO-ZABHA-LABEL: atomicrmw_and_i8_monotonic:
+; RV32IA-TSO-ZABHA: # %bb.0:
+; RV32IA-TSO-ZABHA-NEXT: amoand.b a0, a1, (a0)
+; RV32IA-TSO-ZABHA-NEXT: ret
+;
; RV64IA-WMO-ZABHA-LABEL: atomicrmw_and_i8_monotonic:
; RV64IA-WMO-ZABHA: # %bb.0:
; RV64IA-WMO-ZABHA-NEXT: amoand.b a0, a1, (a0)
@@ -3017,33 +3989,33 @@ define i8 @atomicrmw_and_i8_acquire(ptr %a, i8 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
-; RV32IA-WMO-LABEL: atomicrmw_and_i8_acquire:
-; RV32IA-WMO: # %bb.0:
-; RV32IA-WMO-NEXT: andi a2, a0, -4
-; RV32IA-WMO-NEXT: slli a0, a0, 3
-; RV32IA-WMO-NEXT: li a3, 255
-; RV32IA-WMO-NEXT: zext.b a1, a1
-; RV32IA-WMO-NEXT: sll a3, a3, a0
-; RV32IA-WMO-NEXT: not a3, a3
-; RV32IA-WMO-NEXT: sll a1, a1, a0
-; RV32IA-WMO-NEXT: or a1, a1, a3
-; RV32IA-WMO-NEXT: amoand.w.aq a1, a1, (a2)
-; RV32IA-WMO-NEXT: srl a0, a1, a0
-; RV32IA-WMO-NEXT: ret
+; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_and_i8_acquire:
+; RV32IA-WMO-NOZACAS: # %bb.0:
+; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-WMO-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-NOZACAS-NEXT: li a3, 255
+; RV32IA-WMO-NOZACAS-NEXT: zext.b a1, a1
+; RV32IA-WMO-NOZACAS-NEXT: sll a3, a3, a0
+; RV32IA-WMO-NOZACAS-NEXT: not a3, a3
+; RV32IA-WMO-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-WMO-NOZACAS-NEXT: or a1, a1, a3
+; RV32IA-WMO-NOZACAS-NEXT: amoand.w.aq a1, a1, (a2)
+; RV32IA-WMO-NOZACAS-NEXT: srl a0, a1, a0
+; RV32IA-WMO-NOZACAS-NEXT: ret
;
-; RV32IA-TSO-LABEL: atomicrmw_and_i8_acquire:
-; RV32IA-TSO: # %bb.0:
-; RV32IA-TSO-NEXT: andi a2, a0, -4
-; RV32IA-TSO-NEXT: slli a0, a0, 3
-; RV32IA-TSO-NEXT: li a3, 255
-; RV32IA-TSO-NEXT: zext.b a1, a1
-; RV32IA-TSO-NEXT: sll a3, a3, a0
-; RV32IA-TSO-NEXT: not a3, a3
-; RV32IA-TSO-NEXT: sll a1, a1, a0
-; RV32IA-TSO-NEXT: or a1, a1, a3
-; RV32IA-TSO-NEXT: amoand.w a1, a1, (a2)
-; RV32IA-TSO-NEXT: srl a0, a1, a0
-; RV32IA-TSO-NEXT: ret
+; RV32IA-TSO-NOZACAS-LABEL: atomicrmw_and_i8_acquire:
+; RV32IA-TSO-NOZACAS: # %bb.0:
+; RV32IA-TSO-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-TSO-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-NOZACAS-NEXT: li a3, 255
+; RV32IA-TSO-NOZACAS-NEXT: zext.b a1, a1
+; RV32IA-TSO-NOZACAS-NEXT: sll a3, a3, a0
+; RV32IA-TSO-NOZACAS-NEXT: not a3, a3
+; RV32IA-TSO-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-TSO-NOZACAS-NEXT: or a1, a1, a3
+; RV32IA-TSO-NOZACAS-NEXT: amoand.w a1, a1, (a2)
+; RV32IA-TSO-NOZACAS-NEXT: srl a0, a1, a0
+; RV32IA-TSO-NOZACAS-NEXT: ret
;
; RV64I-LABEL: atomicrmw_and_i8_acquire:
; RV64I: # %bb.0:
@@ -3083,6 +4055,34 @@ define i8 @atomicrmw_and_i8_acquire(ptr %a, i8 %b) nounwind {
; RV64IA-TSO-NOZACAS-NEXT: srlw a0, a1, a0
; RV64IA-TSO-NOZACAS-NEXT: ret
;
+; RV32IA-WMO-ZACAS-LABEL: atomicrmw_and_i8_acquire:
+; RV32IA-WMO-ZACAS: # %bb.0:
+; RV32IA-WMO-ZACAS-NEXT: andi a2, a0, -4
+; RV32IA-WMO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-ZACAS-NEXT: li a3, 255
+; RV32IA-WMO-ZACAS-NEXT: zext.b a1, a1
+; RV32IA-WMO-ZACAS-NEXT: sll a3, a3, a0
+; RV32IA-WMO-ZACAS-NEXT: not a3, a3
+; RV32IA-WMO-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-WMO-ZACAS-NEXT: or a1, a1, a3
+; RV32IA-WMO-ZACAS-NEXT: amoand.w.aq a1, a1, (a2)
+; RV32IA-WMO-ZACAS-NEXT: srl a0, a1, a0
+; RV32IA-WMO-ZACAS-NEXT: ret
+;
+; RV32IA-TSO-ZACAS-LABEL: atomicrmw_and_i8_acquire:
+; RV32IA-TSO-ZACAS: # %bb.0:
+; RV32IA-TSO-ZACAS-NEXT: andi a2, a0, -4
+; RV32IA-TSO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-ZACAS-NEXT: li a3, 255
+; RV32IA-TSO-ZACAS-NEXT: zext.b a1, a1
+; RV32IA-TSO-ZACAS-NEXT: sll a3, a3, a0
+; RV32IA-TSO-ZACAS-NEXT: not a3, a3
+; RV32IA-TSO-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-TSO-ZACAS-NEXT: or a1, a1, a3
+; RV32IA-TSO-ZACAS-NEXT: amoand.w a1, a1, (a2)
+; RV32IA-TSO-ZACAS-NEXT: srl a0, a1, a0
+; RV32IA-TSO-ZACAS-NEXT: ret
+;
; RV64IA-WMO-ZACAS-LABEL: atomicrmw_and_i8_acquire:
; RV64IA-WMO-ZACAS: # %bb.0:
; RV64IA-WMO-ZACAS-NEXT: andi a2, a0, -4
@@ -3111,6 +4111,16 @@ define i8 @atomicrmw_and_i8_acquire(ptr %a, i8 %b) nounwind {
; RV64IA-TSO-ZACAS-NEXT: srlw a0, a1, a0
; RV64IA-TSO-ZACAS-NEXT: ret
;
+; RV32IA-WMO-ZABHA-LABEL: atomicrmw_and_i8_acquire:
+; RV32IA-WMO-ZABHA: # %bb.0:
+; RV32IA-WMO-ZABHA-NEXT: amoand.b.aq a0, a1, (a0)
+; RV32IA-WMO-ZABHA-NEXT: ret
+;
+; RV32IA-TSO-ZABHA-LABEL: atomicrmw_and_i8_acquire:
+; RV32IA-TSO-ZABHA: # %bb.0:
+; RV32IA-TSO-ZABHA-NEXT: amoand.b a0, a1, (a0)
+; RV32IA-TSO-ZABHA-NEXT: ret
+;
; RV64IA-WMO-ZABHA-LABEL: atomicrmw_and_i8_acquire:
; RV64IA-WMO-ZABHA: # %bb.0:
; RV64IA-WMO-ZABHA-NEXT: amoand.b.aq a0, a1, (a0)
@@ -3135,33 +4145,33 @@ define i8 @atomicrmw_and_i8_release(ptr %a, i8 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
-; RV32IA-WMO-LABEL: atomicrmw_and_i8_release:
-; RV32IA-WMO: # %bb.0:
-; RV32IA-WMO-NEXT: andi a2, a0, -4
-; RV32IA-WMO-NEXT: slli a0, a0, 3
-; RV32IA-WMO-NEXT: li a3, 255
-; RV32IA-WMO-NEXT: zext.b a1, a1
-; RV32IA-WMO-NEXT: sll a3, a3, a0
-; RV32IA-WMO-NEXT: not a3, a3
-; RV32IA-WMO-NEXT: sll a1, a1, a0
-; RV32IA-WMO-NEXT: or a1, a1, a3
-; RV32IA-WMO-NEXT: amoand.w.rl a1, a1, (a2)
-; RV32IA-WMO-NEXT: srl a0, a1, a0
-; RV32IA-WMO-NEXT: ret
+; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_and_i8_release:
+; RV32IA-WMO-NOZACAS: # %bb.0:
+; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-WMO-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-NOZACAS-NEXT: li a3, 255
+; RV32IA-WMO-NOZACAS-NEXT: zext.b a1, a1
+; RV32IA-WMO-NOZACAS-NEXT: sll a3, a3, a0
+; RV32IA-WMO-NOZACAS-NEXT: not a3, a3
+; RV32IA-WMO-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-WMO-NOZACAS-NEXT: or a1, a1, a3
+; RV32IA-WMO-NOZACAS-NEXT: amoand.w.rl a1, a1, (a2)
+; RV32IA-WMO-NOZACAS-NEXT: srl a0, a1, a0
+; RV32IA-WMO-NOZACAS-NEXT: ret
;
-; RV32IA-TSO-LABEL: atomicrmw_and_i8_release:
-; RV32IA-TSO: # %bb.0:
-; RV32IA-TSO-NEXT: andi a2, a0, -4
-; RV32IA-TSO-NEXT: slli a0, a0, 3
-; RV32IA-TSO-NEXT: li a3, 255
-; RV32IA-TSO-NEXT: zext.b a1, a1
-; RV32IA-TSO-NEXT: sll a3, a3, a0
-; RV32IA-TSO-NEXT: not a3, a3
-; RV32IA-TSO-NEXT: sll a1, a1, a0
-; RV32IA-TSO-NEXT: or a1, a1, a3
-; RV32IA-TSO-NEXT: amoand.w a1, a1, (a2)
-; RV32IA-TSO-NEXT: srl a0, a1, a0
-; RV32IA-TSO-NEXT: ret
+; RV32IA-TSO-NOZACAS-LABEL: atomicrmw_and_i8_release:
+; RV32IA-TSO-NOZACAS: # %bb.0:
+; RV32IA-TSO-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-TSO-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-NOZACAS-NEXT: li a3, 255
+; RV32IA-TSO-NOZACAS-NEXT: zext.b a1, a1
+; RV32IA-TSO-NOZACAS-NEXT: sll a3, a3, a0
+; RV32IA-TSO-NOZACAS-NEXT: not a3, a3
+; RV32IA-TSO-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-TSO-NOZACAS-NEXT: or a1, a1, a3
+; RV32IA-TSO-NOZACAS-NEXT: amoand.w a1, a1, (a2)
+; RV32IA-TSO-NOZACAS-NEXT: srl a0, a1, a0
+; RV32IA-TSO-NOZACAS-NEXT: ret
;
; RV64I-LABEL: atomicrmw_and_i8_release:
; RV64I: # %bb.0:
@@ -3201,6 +4211,34 @@ define i8 @atomicrmw_and_i8_release(ptr %a, i8 %b) nounwind {
; RV64IA-TSO-NOZACAS-NEXT: srlw a0, a1, a0
; RV64IA-TSO-NOZACAS-NEXT: ret
;
+; RV32IA-WMO-ZACAS-LABEL: atomicrmw_and_i8_release:
+; RV32IA-WMO-ZACAS: # %bb.0:
+; RV32IA-WMO-ZACAS-NEXT: andi a2, a0, -4
+; RV32IA-WMO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-ZACAS-NEXT: li a3, 255
+; RV32IA-WMO-ZACAS-NEXT: zext.b a1, a1
+; RV32IA-WMO-ZACAS-NEXT: sll a3, a3, a0
+; RV32IA-WMO-ZACAS-NEXT: not a3, a3
+; RV32IA-WMO-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-WMO-ZACAS-NEXT: or a1, a1, a3
+; RV32IA-WMO-ZACAS-NEXT: amoand.w.rl a1, a1, (a2)
+; RV32IA-WMO-ZACAS-NEXT: srl a0, a1, a0
+; RV32IA-WMO-ZACAS-NEXT: ret
+;
+; RV32IA-TSO-ZACAS-LABEL: atomicrmw_and_i8_release:
+; RV32IA-TSO-ZACAS: # %bb.0:
+; RV32IA-TSO-ZACAS-NEXT: andi a2, a0, -4
+; RV32IA-TSO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-ZACAS-NEXT: li a3, 255
+; RV32IA-TSO-ZACAS-NEXT: zext.b a1, a1
+; RV32IA-TSO-ZACAS-NEXT: sll a3, a3, a0
+; RV32IA-TSO-ZACAS-NEXT: not a3, a3
+; RV32IA-TSO-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-TSO-ZACAS-NEXT: or a1, a1, a3
+; RV32IA-TSO-ZACAS-NEXT: amoand.w a1, a1, (a2)
+; RV32IA-TSO-ZACAS-NEXT: srl a0, a1, a0
+; RV32IA-TSO-ZACAS-NEXT: ret
+;
; RV64IA-WMO-ZACAS-LABEL: atomicrmw_and_i8_release:
; RV64IA-WMO-ZACAS: # %bb.0:
; RV64IA-WMO-ZACAS-NEXT: andi a2, a0, -4
@@ -3229,6 +4267,16 @@ define i8 @atomicrmw_and_i8_release(ptr %a, i8 %b) nounwind {
; RV64IA-TSO-ZACAS-NEXT: srlw a0, a1, a0
; RV64IA-TSO-ZACAS-NEXT: ret
;
+; RV32IA-WMO-ZABHA-LABEL: atomicrmw_and_i8_release:
+; RV32IA-WMO-ZABHA: # %bb.0:
+; RV32IA-WMO-ZABHA-NEXT: amoand.b.rl a0, a1, (a0)
+; RV32IA-WMO-ZABHA-NEXT: ret
+;
+; RV32IA-TSO-ZABHA-LABEL: atomicrmw_and_i8_release:
+; RV32IA-TSO-ZABHA: # %bb.0:
+; RV32IA-TSO-ZABHA-NEXT: amoand.b a0, a1, (a0)
+; RV32IA-TSO-ZABHA-NEXT: ret
+;
; RV64IA-WMO-ZABHA-LABEL: atomicrmw_and_i8_release:
; RV64IA-WMO-ZABHA: # %bb.0:
; RV64IA-WMO-ZABHA-NEXT: amoand.b.rl a0, a1, (a0)
@@ -3253,33 +4301,33 @@ define i8 @atomicrmw_and_i8_acq_rel(ptr %a, i8 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
-; RV32IA-WMO-LABEL: atomicrmw_and_i8_acq_rel:
-; RV32IA-WMO: # %bb.0:
-; RV32IA-WMO-NEXT: andi a2, a0, -4
-; RV32IA-WMO-NEXT: slli a0, a0, 3
-; RV32IA-WMO-NEXT: li a3, 255
-; RV32IA-WMO-NEXT: zext.b a1, a1
-; RV32IA-WMO-NEXT: sll a3, a3, a0
-; RV32IA-WMO-NEXT: not a3, a3
-; RV32IA-WMO-NEXT: sll a1, a1, a0
-; RV32IA-WMO-NEXT: or a1, a1, a3
-; RV32IA-WMO-NEXT: amoand.w.aqrl a1, a1, (a2)
-; RV32IA-WMO-NEXT: srl a0, a1, a0
-; RV32IA-WMO-NEXT: ret
+; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_and_i8_acq_rel:
+; RV32IA-WMO-NOZACAS: # %bb.0:
+; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-WMO-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-NOZACAS-NEXT: li a3, 255
+; RV32IA-WMO-NOZACAS-NEXT: zext.b a1, a1
+; RV32IA-WMO-NOZACAS-NEXT: sll a3, a3, a0
+; RV32IA-WMO-NOZACAS-NEXT: not a3, a3
+; RV32IA-WMO-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-WMO-NOZACAS-NEXT: or a1, a1, a3
+; RV32IA-WMO-NOZACAS-NEXT: amoand.w.aqrl a1, a1, (a2)
+; RV32IA-WMO-NOZACAS-NEXT: srl a0, a1, a0
+; RV32IA-WMO-NOZACAS-NEXT: ret
;
-; RV32IA-TSO-LABEL: atomicrmw_and_i8_acq_rel:
-; RV32IA-TSO: # %bb.0:
-; RV32IA-TSO-NEXT: andi a2, a0, -4
-; RV32IA-TSO-NEXT: slli a0, a0, 3
-; RV32IA-TSO-NEXT: li a3, 255
-; RV32IA-TSO-NEXT: zext.b a1, a1
-; RV32IA-TSO-NEXT: sll a3, a3, a0
-; RV32IA-TSO-NEXT: not a3, a3
-; RV32IA-TSO-NEXT: sll a1, a1, a0
-; RV32IA-TSO-NEXT: or a1, a1, a3
-; RV32IA-TSO-NEXT: amoand.w a1, a1, (a2)
-; RV32IA-TSO-NEXT: srl a0, a1, a0
-; RV32IA-TSO-NEXT: ret
+; RV32IA-TSO-NOZACAS-LABEL: atomicrmw_and_i8_acq_rel:
+; RV32IA-TSO-NOZACAS: # %bb.0:
+; RV32IA-TSO-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-TSO-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-NOZACAS-NEXT: li a3, 255
+; RV32IA-TSO-NOZACAS-NEXT: zext.b a1, a1
+; RV32IA-TSO-NOZACAS-NEXT: sll a3, a3, a0
+; RV32IA-TSO-NOZACAS-NEXT: not a3, a3
+; RV32IA-TSO-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-TSO-NOZACAS-NEXT: or a1, a1, a3
+; RV32IA-TSO-NOZACAS-NEXT: amoand.w a1, a1, (a2)
+; RV32IA-TSO-NOZACAS-NEXT: srl a0, a1, a0
+; RV32IA-TSO-NOZACAS-NEXT: ret
;
; RV64I-LABEL: atomicrmw_and_i8_acq_rel:
; RV64I: # %bb.0:
@@ -3319,6 +4367,34 @@ define i8 @atomicrmw_and_i8_acq_rel(ptr %a, i8 %b) nounwind {
; RV64IA-TSO-NOZACAS-NEXT: srlw a0, a1, a0
; RV64IA-TSO-NOZACAS-NEXT: ret
;
+; RV32IA-WMO-ZACAS-LABEL: atomicrmw_and_i8_acq_rel:
+; RV32IA-WMO-ZACAS: # %bb.0:
+; RV32IA-WMO-ZACAS-NEXT: andi a2, a0, -4
+; RV32IA-WMO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-ZACAS-NEXT: li a3, 255
+; RV32IA-WMO-ZACAS-NEXT: zext.b a1, a1
+; RV32IA-WMO-ZACAS-NEXT: sll a3, a3, a0
+; RV32IA-WMO-ZACAS-NEXT: not a3, a3
+; RV32IA-WMO-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-WMO-ZACAS-NEXT: or a1, a1, a3
+; RV32IA-WMO-ZACAS-NEXT: amoand.w.aqrl a1, a1, (a2)
+; RV32IA-WMO-ZACAS-NEXT: srl a0, a1, a0
+; RV32IA-WMO-ZACAS-NEXT: ret
+;
+; RV32IA-TSO-ZACAS-LABEL: atomicrmw_and_i8_acq_rel:
+; RV32IA-TSO-ZACAS: # %bb.0:
+; RV32IA-TSO-ZACAS-NEXT: andi a2, a0, -4
+; RV32IA-TSO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-ZACAS-NEXT: li a3, 255
+; RV32IA-TSO-ZACAS-NEXT: zext.b a1, a1
+; RV32IA-TSO-ZACAS-NEXT: sll a3, a3, a0
+; RV32IA-TSO-ZACAS-NEXT: not a3, a3
+; RV32IA-TSO-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-TSO-ZACAS-NEXT: or a1, a1, a3
+; RV32IA-TSO-ZACAS-NEXT: amoand.w a1, a1, (a2)
+; RV32IA-TSO-ZACAS-NEXT: srl a0, a1, a0
+; RV32IA-TSO-ZACAS-NEXT: ret
+;
; RV64IA-WMO-ZACAS-LABEL: atomicrmw_and_i8_acq_rel:
; RV64IA-WMO-ZACAS: # %bb.0:
; RV64IA-WMO-ZACAS-NEXT: andi a2, a0, -4
@@ -3347,6 +4423,16 @@ define i8 @atomicrmw_and_i8_acq_rel(ptr %a, i8 %b) nounwind {
; RV64IA-TSO-ZACAS-NEXT: srlw a0, a1, a0
; RV64IA-TSO-ZACAS-NEXT: ret
;
+; RV32IA-WMO-ZABHA-LABEL: atomicrmw_and_i8_acq_rel:
+; RV32IA-WMO-ZABHA: # %bb.0:
+; RV32IA-WMO-ZABHA-NEXT: amoand.b.aqrl a0, a1, (a0)
+; RV32IA-WMO-ZABHA-NEXT: ret
+;
+; RV32IA-TSO-ZABHA-LABEL: atomicrmw_and_i8_acq_rel:
+; RV32IA-TSO-ZABHA: # %bb.0:
+; RV32IA-TSO-ZABHA-NEXT: amoand.b a0, a1, (a0)
+; RV32IA-TSO-ZABHA-NEXT: ret
+;
; RV64IA-WMO-ZABHA-LABEL: atomicrmw_and_i8_acq_rel:
; RV64IA-WMO-ZABHA: # %bb.0:
; RV64IA-WMO-ZABHA-NEXT: amoand.b.aqrl a0, a1, (a0)
@@ -3371,33 +4457,33 @@ define i8 @atomicrmw_and_i8_seq_cst(ptr %a, i8 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
-; RV32IA-WMO-LABEL: atomicrmw_and_i8_seq_cst:
-; RV32IA-WMO: # %bb.0:
-; RV32IA-WMO-NEXT: andi a2, a0, -4
-; RV32IA-WMO-NEXT: slli a0, a0, 3
-; RV32IA-WMO-NEXT: li a3, 255
-; RV32IA-WMO-NEXT: zext.b a1, a1
-; RV32IA-WMO-NEXT: sll a3, a3, a0
-; RV32IA-WMO-NEXT: not a3, a3
-; RV32IA-WMO-NEXT: sll a1, a1, a0
-; RV32IA-WMO-NEXT: or a1, a1, a3
-; RV32IA-WMO-NEXT: amoand.w.aqrl a1, a1, (a2)
-; RV32IA-WMO-NEXT: srl a0, a1, a0
-; RV32IA-WMO-NEXT: ret
+; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_and_i8_seq_cst:
+; RV32IA-WMO-NOZACAS: # %bb.0:
+; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-WMO-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-NOZACAS-NEXT: li a3, 255
+; RV32IA-WMO-NOZACAS-NEXT: zext.b a1, a1
+; RV32IA-WMO-NOZACAS-NEXT: sll a3, a3, a0
+; RV32IA-WMO-NOZACAS-NEXT: not a3, a3
+; RV32IA-WMO-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-WMO-NOZACAS-NEXT: or a1, a1, a3
+; RV32IA-WMO-NOZACAS-NEXT: amoand.w.aqrl a1, a1, (a2)
+; RV32IA-WMO-NOZACAS-NEXT: srl a0, a1, a0
+; RV32IA-WMO-NOZACAS-NEXT: ret
;
-; RV32IA-TSO-LABEL: atomicrmw_and_i8_seq_cst:
-; RV32IA-TSO: # %bb.0:
-; RV32IA-TSO-NEXT: andi a2, a0, -4
-; RV32IA-TSO-NEXT: slli a0, a0, 3
-; RV32IA-TSO-NEXT: li a3, 255
-; RV32IA-TSO-NEXT: zext.b a1, a1
-; RV32IA-TSO-NEXT: sll a3, a3, a0
-; RV32IA-TSO-NEXT: not a3, a3
-; RV32IA-TSO-NEXT: sll a1, a1, a0
-; RV32IA-TSO-NEXT: or a1, a1, a3
-; RV32IA-TSO-NEXT: amoand.w a1, a1, (a2)
-; RV32IA-TSO-NEXT: srl a0, a1, a0
-; RV32IA-TSO-NEXT: ret
+; RV32IA-TSO-NOZACAS-LABEL: atomicrmw_and_i8_seq_cst:
+; RV32IA-TSO-NOZACAS: # %bb.0:
+; RV32IA-TSO-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-TSO-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-NOZACAS-NEXT: li a3, 255
+; RV32IA-TSO-NOZACAS-NEXT: zext.b a1, a1
+; RV32IA-TSO-NOZACAS-NEXT: sll a3, a3, a0
+; RV32IA-TSO-NOZACAS-NEXT: not a3, a3
+; RV32IA-TSO-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-TSO-NOZACAS-NEXT: or a1, a1, a3
+; RV32IA-TSO-NOZACAS-NEXT: amoand.w a1, a1, (a2)
+; RV32IA-TSO-NOZACAS-NEXT: srl a0, a1, a0
+; RV32IA-TSO-NOZACAS-NEXT: ret
;
; RV64I-LABEL: atomicrmw_and_i8_seq_cst:
; RV64I: # %bb.0:
@@ -3437,6 +4523,34 @@ define i8 @atomicrmw_and_i8_seq_cst(ptr %a, i8 %b) nounwind {
; RV64IA-TSO-NOZACAS-NEXT: srlw a0, a1, a0
; RV64IA-TSO-NOZACAS-NEXT: ret
;
+; RV32IA-WMO-ZACAS-LABEL: atomicrmw_and_i8_seq_cst:
+; RV32IA-WMO-ZACAS: # %bb.0:
+; RV32IA-WMO-ZACAS-NEXT: andi a2, a0, -4
+; RV32IA-WMO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-ZACAS-NEXT: li a3, 255
+; RV32IA-WMO-ZACAS-NEXT: zext.b a1, a1
+; RV32IA-WMO-ZACAS-NEXT: sll a3, a3, a0
+; RV32IA-WMO-ZACAS-NEXT: not a3, a3
+; RV32IA-WMO-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-WMO-ZACAS-NEXT: or a1, a1, a3
+; RV32IA-WMO-ZACAS-NEXT: amoand.w.aqrl a1, a1, (a2)
+; RV32IA-WMO-ZACAS-NEXT: srl a0, a1, a0
+; RV32IA-WMO-ZACAS-NEXT: ret
+;
+; RV32IA-TSO-ZACAS-LABEL: atomicrmw_and_i8_seq_cst:
+; RV32IA-TSO-ZACAS: # %bb.0:
+; RV32IA-TSO-ZACAS-NEXT: andi a2, a0, -4
+; RV32IA-TSO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-ZACAS-NEXT: li a3, 255
+; RV32IA-TSO-ZACAS-NEXT: zext.b a1, a1
+; RV32IA-TSO-ZACAS-NEXT: sll a3, a3, a0
+; RV32IA-TSO-ZACAS-NEXT: not a3, a3
+; RV32IA-TSO-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-TSO-ZACAS-NEXT: or a1, a1, a3
+; RV32IA-TSO-ZACAS-NEXT: amoand.w a1, a1, (a2)
+; RV32IA-TSO-ZACAS-NEXT: srl a0, a1, a0
+; RV32IA-TSO-ZACAS-NEXT: ret
+;
; RV64IA-WMO-ZACAS-LABEL: atomicrmw_and_i8_seq_cst:
; RV64IA-WMO-ZACAS: # %bb.0:
; RV64IA-WMO-ZACAS-NEXT: andi a2, a0, -4
@@ -3465,6 +4579,16 @@ define i8 @atomicrmw_and_i8_seq_cst(ptr %a, i8 %b) nounwind {
; RV64IA-TSO-ZACAS-NEXT: srlw a0, a1, a0
; RV64IA-TSO-ZACAS-NEXT: ret
;
+; RV32IA-WMO-ZABHA-LABEL: atomicrmw_and_i8_seq_cst:
+; RV32IA-WMO-ZABHA: # %bb.0:
+; RV32IA-WMO-ZABHA-NEXT: amoand.b.aqrl a0, a1, (a0)
+; RV32IA-WMO-ZABHA-NEXT: ret
+;
+; RV32IA-TSO-ZABHA-LABEL: atomicrmw_and_i8_seq_cst:
+; RV32IA-TSO-ZABHA: # %bb.0:
+; RV32IA-TSO-ZABHA-NEXT: amoand.b a0, a1, (a0)
+; RV32IA-TSO-ZABHA-NEXT: ret
+;
; RV64IA-WMO-ZABHA-LABEL: atomicrmw_and_i8_seq_cst:
; RV64IA-WMO-ZABHA: # %bb.0:
; RV64IA-WMO-ZABHA-NEXT: amoand.b.aqrl a0, a1, (a0)
@@ -3489,26 +4613,26 @@ define i8 @atomicrmw_nand_i8_monotonic(ptr %a, i8 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
-; RV32IA-LABEL: atomicrmw_nand_i8_monotonic:
-; RV32IA: # %bb.0:
-; RV32IA-NEXT: andi a2, a0, -4
-; RV32IA-NEXT: slli a0, a0, 3
-; RV32IA-NEXT: li a3, 255
-; RV32IA-NEXT: zext.b a1, a1
-; RV32IA-NEXT: sll a3, a3, a0
-; RV32IA-NEXT: sll a1, a1, a0
-; RV32IA-NEXT: .LBB30_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-NEXT: lr.w a4, (a2)
-; RV32IA-NEXT: and a5, a4, a1
-; RV32IA-NEXT: not a5, a5
-; RV32IA-NEXT: xor a5, a4, a5
-; RV32IA-NEXT: and a5, a5, a3
-; RV32IA-NEXT: xor a5, a4, a5
-; RV32IA-NEXT: sc.w a5, a5, (a2)
-; RV32IA-NEXT: bnez a5, .LBB30_1
-; RV32IA-NEXT: # %bb.2:
-; RV32IA-NEXT: srl a0, a4, a0
-; RV32IA-NEXT: ret
+; RV32IA-NOZACAS-LABEL: atomicrmw_nand_i8_monotonic:
+; RV32IA-NOZACAS: # %bb.0:
+; RV32IA-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-NOZACAS-NEXT: li a3, 255
+; RV32IA-NOZACAS-NEXT: zext.b a1, a1
+; RV32IA-NOZACAS-NEXT: sll a3, a3, a0
+; RV32IA-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-NOZACAS-NEXT: .LBB30_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-NOZACAS-NEXT: lr.w a4, (a2)
+; RV32IA-NOZACAS-NEXT: and a5, a4, a1
+; RV32IA-NOZACAS-NEXT: not a5, a5
+; RV32IA-NOZACAS-NEXT: xor a5, a4, a5
+; RV32IA-NOZACAS-NEXT: and a5, a5, a3
+; RV32IA-NOZACAS-NEXT: xor a5, a4, a5
+; RV32IA-NOZACAS-NEXT: sc.w a5, a5, (a2)
+; RV32IA-NOZACAS-NEXT: bnez a5, .LBB30_1
+; RV32IA-NOZACAS-NEXT: # %bb.2:
+; RV32IA-NOZACAS-NEXT: srl a0, a4, a0
+; RV32IA-NOZACAS-NEXT: ret
;
; RV64I-LABEL: atomicrmw_nand_i8_monotonic:
; RV64I: # %bb.0:
@@ -3541,6 +4665,27 @@ define i8 @atomicrmw_nand_i8_monotonic(ptr %a, i8 %b) nounwind {
; RV64IA-NOZACAS-NEXT: srlw a0, a4, a0
; RV64IA-NOZACAS-NEXT: ret
;
+; RV32IA-ZACAS-LABEL: atomicrmw_nand_i8_monotonic:
+; RV32IA-ZACAS: # %bb.0:
+; RV32IA-ZACAS-NEXT: andi a2, a0, -4
+; RV32IA-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-ZACAS-NEXT: li a3, 255
+; RV32IA-ZACAS-NEXT: zext.b a1, a1
+; RV32IA-ZACAS-NEXT: sll a3, a3, a0
+; RV32IA-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-ZACAS-NEXT: .LBB30_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-ZACAS-NEXT: lr.w a4, (a2)
+; RV32IA-ZACAS-NEXT: and a5, a4, a1
+; RV32IA-ZACAS-NEXT: not a5, a5
+; RV32IA-ZACAS-NEXT: xor a5, a4, a5
+; RV32IA-ZACAS-NEXT: and a5, a5, a3
+; RV32IA-ZACAS-NEXT: xor a5, a4, a5
+; RV32IA-ZACAS-NEXT: sc.w a5, a5, (a2)
+; RV32IA-ZACAS-NEXT: bnez a5, .LBB30_1
+; RV32IA-ZACAS-NEXT: # %bb.2:
+; RV32IA-ZACAS-NEXT: srl a0, a4, a0
+; RV32IA-ZACAS-NEXT: ret
+;
; RV64IA-ZACAS-LABEL: atomicrmw_nand_i8_monotonic:
; RV64IA-ZACAS: # %bb.0:
; RV64IA-ZACAS-NEXT: andi a2, a0, -4
@@ -3562,6 +4707,48 @@ define i8 @atomicrmw_nand_i8_monotonic(ptr %a, i8 %b) nounwind {
; RV64IA-ZACAS-NEXT: srlw a0, a4, a0
; RV64IA-ZACAS-NEXT: ret
;
+; RV32IA-WMO-ZABHA-NOZACAS-LABEL: atomicrmw_nand_i8_monotonic:
+; RV32IA-WMO-ZABHA-NOZACAS: # %bb.0:
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: li a3, 255
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: zext.b a1, a1
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: sll a3, a3, a0
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: .LBB30_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: lr.w a4, (a2)
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: and a5, a4, a1
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: not a5, a5
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: xor a5, a4, a5
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: and a5, a5, a3
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: xor a5, a4, a5
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: sc.w a5, a5, (a2)
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: bnez a5, .LBB30_1
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: # %bb.2:
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: srl a0, a4, a0
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: ret
+;
+; RV32IA-TSO-ZABHA-NOZACAS-LABEL: atomicrmw_nand_i8_monotonic:
+; RV32IA-TSO-ZABHA-NOZACAS: # %bb.0:
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: li a3, 255
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: zext.b a1, a1
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: sll a3, a3, a0
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: .LBB30_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: lr.w a4, (a2)
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: and a5, a4, a1
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: not a5, a5
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: xor a5, a4, a5
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: and a5, a5, a3
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: xor a5, a4, a5
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: sc.w a5, a5, (a2)
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: bnez a5, .LBB30_1
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: # %bb.2:
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: srl a0, a4, a0
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: ret
+;
; RV64IA-WMO-ZABHA-NOZACAS-LABEL: atomicrmw_nand_i8_monotonic:
; RV64IA-WMO-ZABHA-NOZACAS: # %bb.0:
; RV64IA-WMO-ZABHA-NOZACAS-NEXT: andi a2, a0, -4
@@ -3604,6 +4791,36 @@ define i8 @atomicrmw_nand_i8_monotonic(ptr %a, i8 %b) nounwind {
; RV64IA-TSO-ZABHA-NOZACAS-NEXT: srlw a0, a4, a0
; RV64IA-TSO-ZABHA-NOZACAS-NEXT: ret
;
+; RV32IA-WMO-ZABHA-ZACAS-LABEL: atomicrmw_nand_i8_monotonic:
+; RV32IA-WMO-ZABHA-ZACAS: # %bb.0:
+; RV32IA-WMO-ZABHA-ZACAS-NEXT: mv a2, a0
+; RV32IA-WMO-ZABHA-ZACAS-NEXT: lbu a0, 0(a0)
+; RV32IA-WMO-ZABHA-ZACAS-NEXT: .LBB30_1: # %atomicrmw.start
+; RV32IA-WMO-ZABHA-ZACAS-NEXT: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-ZABHA-ZACAS-NEXT: and a3, a0, a1
+; RV32IA-WMO-ZABHA-ZACAS-NEXT: not a3, a3
+; RV32IA-WMO-ZABHA-ZACAS-NEXT: slli a4, a0, 24
+; RV32IA-WMO-ZABHA-ZACAS-NEXT: amocas.b a0, a3, (a2)
+; RV32IA-WMO-ZABHA-ZACAS-NEXT: srai a4, a4, 24
+; RV32IA-WMO-ZABHA-ZACAS-NEXT: bne a0, a4, .LBB30_1
+; RV32IA-WMO-ZABHA-ZACAS-NEXT: # %bb.2: # %atomicrmw.end
+; RV32IA-WMO-ZABHA-ZACAS-NEXT: ret
+;
+; RV32IA-TSO-ZABHA-ZACAS-LABEL: atomicrmw_nand_i8_monotonic:
+; RV32IA-TSO-ZABHA-ZACAS: # %bb.0:
+; RV32IA-TSO-ZABHA-ZACAS-NEXT: mv a2, a0
+; RV32IA-TSO-ZABHA-ZACAS-NEXT: lbu a0, 0(a0)
+; RV32IA-TSO-ZABHA-ZACAS-NEXT: .LBB30_1: # %atomicrmw.start
+; RV32IA-TSO-ZABHA-ZACAS-NEXT: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-ZABHA-ZACAS-NEXT: and a3, a0, a1
+; RV32IA-TSO-ZABHA-ZACAS-NEXT: not a3, a3
+; RV32IA-TSO-ZABHA-ZACAS-NEXT: slli a4, a0, 24
+; RV32IA-TSO-ZABHA-ZACAS-NEXT: amocas.b a0, a3, (a2)
+; RV32IA-TSO-ZABHA-ZACAS-NEXT: srai a4, a4, 24
+; RV32IA-TSO-ZABHA-ZACAS-NEXT: bne a0, a4, .LBB30_1
+; RV32IA-TSO-ZABHA-ZACAS-NEXT: # %bb.2: # %atomicrmw.end
+; RV32IA-TSO-ZABHA-ZACAS-NEXT: ret
+;
; RV64IA-WMO-ZABHA-ZACAS-LABEL: atomicrmw_nand_i8_monotonic:
; RV64IA-WMO-ZABHA-ZACAS: # %bb.0:
; RV64IA-WMO-ZABHA-ZACAS-NEXT: mv a2, a0
@@ -3648,47 +4865,47 @@ define i8 @atomicrmw_nand_i8_acquire(ptr %a, i8 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
-; RV32IA-WMO-LABEL: atomicrmw_nand_i8_acquire:
-; RV32IA-WMO: # %bb.0:
-; RV32IA-WMO-NEXT: andi a2, a0, -4
-; RV32IA-WMO-NEXT: slli a0, a0, 3
-; RV32IA-WMO-NEXT: li a3, 255
-; RV32IA-WMO-NEXT: zext.b a1, a1
-; RV32IA-WMO-NEXT: sll a3, a3, a0
-; RV32IA-WMO-NEXT: sll a1, a1, a0
-; RV32IA-WMO-NEXT: .LBB31_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-WMO-NEXT: lr.w.aq a4, (a2)
-; RV32IA-WMO-NEXT: and a5, a4, a1
-; RV32IA-WMO-NEXT: not a5, a5
-; RV32IA-WMO-NEXT: xor a5, a4, a5
-; RV32IA-WMO-NEXT: and a5, a5, a3
-; RV32IA-WMO-NEXT: xor a5, a4, a5
-; RV32IA-WMO-NEXT: sc.w a5, a5, (a2)
-; RV32IA-WMO-NEXT: bnez a5, .LBB31_1
-; RV32IA-WMO-NEXT: # %bb.2:
-; RV32IA-WMO-NEXT: srl a0, a4, a0
-; RV32IA-WMO-NEXT: ret
+; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_nand_i8_acquire:
+; RV32IA-WMO-NOZACAS: # %bb.0:
+; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-WMO-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-NOZACAS-NEXT: li a3, 255
+; RV32IA-WMO-NOZACAS-NEXT: zext.b a1, a1
+; RV32IA-WMO-NOZACAS-NEXT: sll a3, a3, a0
+; RV32IA-WMO-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-WMO-NOZACAS-NEXT: .LBB31_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-NOZACAS-NEXT: lr.w.aq a4, (a2)
+; RV32IA-WMO-NOZACAS-NEXT: and a5, a4, a1
+; RV32IA-WMO-NOZACAS-NEXT: not a5, a5
+; RV32IA-WMO-NOZACAS-NEXT: xor a5, a4, a5
+; RV32IA-WMO-NOZACAS-NEXT: and a5, a5, a3
+; RV32IA-WMO-NOZACAS-NEXT: xor a5, a4, a5
+; RV32IA-WMO-NOZACAS-NEXT: sc.w a5, a5, (a2)
+; RV32IA-WMO-NOZACAS-NEXT: bnez a5, .LBB31_1
+; RV32IA-WMO-NOZACAS-NEXT: # %bb.2:
+; RV32IA-WMO-NOZACAS-NEXT: srl a0, a4, a0
+; RV32IA-WMO-NOZACAS-NEXT: ret
;
-; RV32IA-TSO-LABEL: atomicrmw_nand_i8_acquire:
-; RV32IA-TSO: # %bb.0:
-; RV32IA-TSO-NEXT: andi a2, a0, -4
-; RV32IA-TSO-NEXT: slli a0, a0, 3
-; RV32IA-TSO-NEXT: li a3, 255
-; RV32IA-TSO-NEXT: zext.b a1, a1
-; RV32IA-TSO-NEXT: sll a3, a3, a0
-; RV32IA-TSO-NEXT: sll a1, a1, a0
-; RV32IA-TSO-NEXT: .LBB31_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-TSO-NEXT: lr.w a4, (a2)
-; RV32IA-TSO-NEXT: and a5, a4, a1
-; RV32IA-TSO-NEXT: not a5, a5
-; RV32IA-TSO-NEXT: xor a5, a4, a5
-; RV32IA-TSO-NEXT: and a5, a5, a3
-; RV32IA-TSO-NEXT: xor a5, a4, a5
-; RV32IA-TSO-NEXT: sc.w a5, a5, (a2)
-; RV32IA-TSO-NEXT: bnez a5, .LBB31_1
-; RV32IA-TSO-NEXT: # %bb.2:
-; RV32IA-TSO-NEXT: srl a0, a4, a0
-; RV32IA-TSO-NEXT: ret
+; RV32IA-TSO-NOZACAS-LABEL: atomicrmw_nand_i8_acquire:
+; RV32IA-TSO-NOZACAS: # %bb.0:
+; RV32IA-TSO-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-TSO-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-NOZACAS-NEXT: li a3, 255
+; RV32IA-TSO-NOZACAS-NEXT: zext.b a1, a1
+; RV32IA-TSO-NOZACAS-NEXT: sll a3, a3, a0
+; RV32IA-TSO-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-TSO-NOZACAS-NEXT: .LBB31_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-NOZACAS-NEXT: lr.w a4, (a2)
+; RV32IA-TSO-NOZACAS-NEXT: and a5, a4, a1
+; RV32IA-TSO-NOZACAS-NEXT: not a5, a5
+; RV32IA-TSO-NOZACAS-NEXT: xor a5, a4, a5
+; RV32IA-TSO-NOZACAS-NEXT: and a5, a5, a3
+; RV32IA-TSO-NOZACAS-NEXT: xor a5, a4, a5
+; RV32IA-TSO-NOZACAS-NEXT: sc.w a5, a5, (a2)
+; RV32IA-TSO-NOZACAS-NEXT: bnez a5, .LBB31_1
+; RV32IA-TSO-NOZACAS-NEXT: # %bb.2:
+; RV32IA-TSO-NOZACAS-NEXT: srl a0, a4, a0
+; RV32IA-TSO-NOZACAS-NEXT: ret
;
; RV64I-LABEL: atomicrmw_nand_i8_acquire:
; RV64I: # %bb.0:
@@ -3742,6 +4959,48 @@ define i8 @atomicrmw_nand_i8_acquire(ptr %a, i8 %b) nounwind {
; RV64IA-TSO-NOZACAS-NEXT: srlw a0, a4, a0
; RV64IA-TSO-NOZACAS-NEXT: ret
;
+; RV32IA-WMO-ZACAS-LABEL: atomicrmw_nand_i8_acquire:
+; RV32IA-WMO-ZACAS: # %bb.0:
+; RV32IA-WMO-ZACAS-NEXT: andi a2, a0, -4
+; RV32IA-WMO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-ZACAS-NEXT: li a3, 255
+; RV32IA-WMO-ZACAS-NEXT: zext.b a1, a1
+; RV32IA-WMO-ZACAS-NEXT: sll a3, a3, a0
+; RV32IA-WMO-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-WMO-ZACAS-NEXT: .LBB31_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-ZACAS-NEXT: lr.w.aq a4, (a2)
+; RV32IA-WMO-ZACAS-NEXT: and a5, a4, a1
+; RV32IA-WMO-ZACAS-NEXT: not a5, a5
+; RV32IA-WMO-ZACAS-NEXT: xor a5, a4, a5
+; RV32IA-WMO-ZACAS-NEXT: and a5, a5, a3
+; RV32IA-WMO-ZACAS-NEXT: xor a5, a4, a5
+; RV32IA-WMO-ZACAS-NEXT: sc.w a5, a5, (a2)
+; RV32IA-WMO-ZACAS-NEXT: bnez a5, .LBB31_1
+; RV32IA-WMO-ZACAS-NEXT: # %bb.2:
+; RV32IA-WMO-ZACAS-NEXT: srl a0, a4, a0
+; RV32IA-WMO-ZACAS-NEXT: ret
+;
+; RV32IA-TSO-ZACAS-LABEL: atomicrmw_nand_i8_acquire:
+; RV32IA-TSO-ZACAS: # %bb.0:
+; RV32IA-TSO-ZACAS-NEXT: andi a2, a0, -4
+; RV32IA-TSO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-ZACAS-NEXT: li a3, 255
+; RV32IA-TSO-ZACAS-NEXT: zext.b a1, a1
+; RV32IA-TSO-ZACAS-NEXT: sll a3, a3, a0
+; RV32IA-TSO-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-TSO-ZACAS-NEXT: .LBB31_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-ZACAS-NEXT: lr.w a4, (a2)
+; RV32IA-TSO-ZACAS-NEXT: and a5, a4, a1
+; RV32IA-TSO-ZACAS-NEXT: not a5, a5
+; RV32IA-TSO-ZACAS-NEXT: xor a5, a4, a5
+; RV32IA-TSO-ZACAS-NEXT: and a5, a5, a3
+; RV32IA-TSO-ZACAS-NEXT: xor a5, a4, a5
+; RV32IA-TSO-ZACAS-NEXT: sc.w a5, a5, (a2)
+; RV32IA-TSO-ZACAS-NEXT: bnez a5, .LBB31_1
+; RV32IA-TSO-ZACAS-NEXT: # %bb.2:
+; RV32IA-TSO-ZACAS-NEXT: srl a0, a4, a0
+; RV32IA-TSO-ZACAS-NEXT: ret
+;
; RV64IA-WMO-ZACAS-LABEL: atomicrmw_nand_i8_acquire:
; RV64IA-WMO-ZACAS: # %bb.0:
; RV64IA-WMO-ZACAS-NEXT: andi a2, a0, -4
@@ -3784,6 +5043,48 @@ define i8 @atomicrmw_nand_i8_acquire(ptr %a, i8 %b) nounwind {
; RV64IA-TSO-ZACAS-NEXT: srlw a0, a4, a0
; RV64IA-TSO-ZACAS-NEXT: ret
;
+; RV32IA-WMO-ZABHA-NOZACAS-LABEL: atomicrmw_nand_i8_acquire:
+; RV32IA-WMO-ZABHA-NOZACAS: # %bb.0:
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: li a3, 255
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: zext.b a1, a1
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: sll a3, a3, a0
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: .LBB31_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: lr.w.aq a4, (a2)
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: and a5, a4, a1
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: not a5, a5
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: xor a5, a4, a5
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: and a5, a5, a3
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: xor a5, a4, a5
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: sc.w a5, a5, (a2)
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: bnez a5, .LBB31_1
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: # %bb.2:
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: srl a0, a4, a0
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: ret
+;
+; RV32IA-TSO-ZABHA-NOZACAS-LABEL: atomicrmw_nand_i8_acquire:
+; RV32IA-TSO-ZABHA-NOZACAS: # %bb.0:
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: li a3, 255
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: zext.b a1, a1
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: sll a3, a3, a0
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: .LBB31_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: lr.w a4, (a2)
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: and a5, a4, a1
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: not a5, a5
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: xor a5, a4, a5
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: and a5, a5, a3
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: xor a5, a4, a5
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: sc.w a5, a5, (a2)
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: bnez a5, .LBB31_1
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: # %bb.2:
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: srl a0, a4, a0
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: ret
+;
; RV64IA-WMO-ZABHA-NOZACAS-LABEL: atomicrmw_nand_i8_acquire:
; RV64IA-WMO-ZABHA-NOZACAS: # %bb.0:
; RV64IA-WMO-ZABHA-NOZACAS-NEXT: andi a2, a0, -4
@@ -3826,6 +5127,36 @@ define i8 @atomicrmw_nand_i8_acquire(ptr %a, i8 %b) nounwind {
; RV64IA-TSO-ZABHA-NOZACAS-NEXT: srlw a0, a4, a0
; RV64IA-TSO-ZABHA-NOZACAS-NEXT: ret
;
+; RV32IA-WMO-ZABHA-ZACAS-LABEL: atomicrmw_nand_i8_acquire:
+; RV32IA-WMO-ZABHA-ZACAS: # %bb.0:
+; RV32IA-WMO-ZABHA-ZACAS-NEXT: mv a2, a0
+; RV32IA-WMO-ZABHA-ZACAS-NEXT: lbu a0, 0(a0)
+; RV32IA-WMO-ZABHA-ZACAS-NEXT: .LBB31_1: # %atomicrmw.start
+; RV32IA-WMO-ZABHA-ZACAS-NEXT: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-ZABHA-ZACAS-NEXT: and a3, a0, a1
+; RV32IA-WMO-ZABHA-ZACAS-NEXT: not a3, a3
+; RV32IA-WMO-ZABHA-ZACAS-NEXT: slli a4, a0, 24
+; RV32IA-WMO-ZABHA-ZACAS-NEXT: amocas.b.aq a0, a3, (a2)
+; RV32IA-WMO-ZABHA-ZACAS-NEXT: srai a4, a4, 24
+; RV32IA-WMO-ZABHA-ZACAS-NEXT: bne a0, a4, .LBB31_1
+; RV32IA-WMO-ZABHA-ZACAS-NEXT: # %bb.2: # %atomicrmw.end
+; RV32IA-WMO-ZABHA-ZACAS-NEXT: ret
+;
+; RV32IA-TSO-ZABHA-ZACAS-LABEL: atomicrmw_nand_i8_acquire:
+; RV32IA-TSO-ZABHA-ZACAS: # %bb.0:
+; RV32IA-TSO-ZABHA-ZACAS-NEXT: mv a2, a0
+; RV32IA-TSO-ZABHA-ZACAS-NEXT: lbu a0, 0(a0)
+; RV32IA-TSO-ZABHA-ZACAS-NEXT: .LBB31_1: # %atomicrmw.start
+; RV32IA-TSO-ZABHA-ZACAS-NEXT: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-ZABHA-ZACAS-NEXT: and a3, a0, a1
+; RV32IA-TSO-ZABHA-ZACAS-NEXT: not a3, a3
+; RV32IA-TSO-ZABHA-ZACAS-NEXT: slli a4, a0, 24
+; RV32IA-TSO-ZABHA-ZACAS-NEXT: amocas.b a0, a3, (a2)
+; RV32IA-TSO-ZABHA-ZACAS-NEXT: srai a4, a4, 24
+; RV32IA-TSO-ZABHA-ZACAS-NEXT: bne a0, a4, .LBB31_1
+; RV32IA-TSO-ZABHA-ZACAS-NEXT: # %bb.2: # %atomicrmw.end
+; RV32IA-TSO-ZABHA-ZACAS-NEXT: ret
+;
; RV64IA-WMO-ZABHA-ZACAS-LABEL: atomicrmw_nand_i8_acquire:
; RV64IA-WMO-ZABHA-ZACAS: # %bb.0:
; RV64IA-WMO-ZABHA-ZACAS-NEXT: mv a2, a0
@@ -3870,47 +5201,47 @@ define i8 @atomicrmw_nand_i8_release(ptr %a, i8 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
-; RV32IA-WMO-LABEL: atomicrmw_nand_i8_release:
-; RV32IA-WMO: # %bb.0:
-; RV32IA-WMO-NEXT: andi a2, a0, -4
-; RV32IA-WMO-NEXT: slli a0, a0, 3
-; RV32IA-WMO-NEXT: li a3, 255
-; RV32IA-WMO-NEXT: zext.b a1, a1
-; RV32IA-WMO-NEXT: sll a3, a3, a0
-; RV32IA-WMO-NEXT: sll a1, a1, a0
-; RV32IA-WMO-NEXT: .LBB32_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-WMO-NEXT: lr.w a4, (a2)
-; RV32IA-WMO-NEXT: and a5, a4, a1
-; RV32IA-WMO-NEXT: not a5, a5
-; RV32IA-WMO-NEXT: xor a5, a4, a5
-; RV32IA-WMO-NEXT: and a5, a5, a3
-; RV32IA-WMO-NEXT: xor a5, a4, a5
-; RV32IA-WMO-NEXT: sc.w.rl a5, a5, (a2)
-; RV32IA-WMO-NEXT: bnez a5, .LBB32_1
-; RV32IA-WMO-NEXT: # %bb.2:
-; RV32IA-WMO-NEXT: srl a0, a4, a0
-; RV32IA-WMO-NEXT: ret
+; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_nand_i8_release:
+; RV32IA-WMO-NOZACAS: # %bb.0:
+; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-WMO-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-NOZACAS-NEXT: li a3, 255
+; RV32IA-WMO-NOZACAS-NEXT: zext.b a1, a1
+; RV32IA-WMO-NOZACAS-NEXT: sll a3, a3, a0
+; RV32IA-WMO-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-WMO-NOZACAS-NEXT: .LBB32_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-NOZACAS-NEXT: lr.w a4, (a2)
+; RV32IA-WMO-NOZACAS-NEXT: and a5, a4, a1
+; RV32IA-WMO-NOZACAS-NEXT: not a5, a5
+; RV32IA-WMO-NOZACAS-NEXT: xor a5, a4, a5
+; RV32IA-WMO-NOZACAS-NEXT: and a5, a5, a3
+; RV32IA-WMO-NOZACAS-NEXT: xor a5, a4, a5
+; RV32IA-WMO-NOZACAS-NEXT: sc.w.rl a5, a5, (a2)
+; RV32IA-WMO-NOZACAS-NEXT: bnez a5, .LBB32_1
+; RV32IA-WMO-NOZACAS-NEXT: # %bb.2:
+; RV32IA-WMO-NOZACAS-NEXT: srl a0, a4, a0
+; RV32IA-WMO-NOZACAS-NEXT: ret
;
-; RV32IA-TSO-LABEL: atomicrmw_nand_i8_release:
-; RV32IA-TSO: # %bb.0:
-; RV32IA-TSO-NEXT: andi a2, a0, -4
-; RV32IA-TSO-NEXT: slli a0, a0, 3
-; RV32IA-TSO-NEXT: li a3, 255
-; RV32IA-TSO-NEXT: zext.b a1, a1
-; RV32IA-TSO-NEXT: sll a3, a3, a0
-; RV32IA-TSO-NEXT: sll a1, a1, a0
-; RV32IA-TSO-NEXT: .LBB32_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-TSO-NEXT: lr.w a4, (a2)
-; RV32IA-TSO-NEXT: and a5, a4, a1
-; RV32IA-TSO-NEXT: not a5, a5
-; RV32IA-TSO-NEXT: xor a5, a4, a5
-; RV32IA-TSO-NEXT: and a5, a5, a3
-; RV32IA-TSO-NEXT: xor a5, a4, a5
-; RV32IA-TSO-NEXT: sc.w a5, a5, (a2)
-; RV32IA-TSO-NEXT: bnez a5, .LBB32_1
-; RV32IA-TSO-NEXT: # %bb.2:
-; RV32IA-TSO-NEXT: srl a0, a4, a0
-; RV32IA-TSO-NEXT: ret
+; RV32IA-TSO-NOZACAS-LABEL: atomicrmw_nand_i8_release:
+; RV32IA-TSO-NOZACAS: # %bb.0:
+; RV32IA-TSO-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-TSO-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-NOZACAS-NEXT: li a3, 255
+; RV32IA-TSO-NOZACAS-NEXT: zext.b a1, a1
+; RV32IA-TSO-NOZACAS-NEXT: sll a3, a3, a0
+; RV32IA-TSO-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-TSO-NOZACAS-NEXT: .LBB32_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-NOZACAS-NEXT: lr.w a4, (a2)
+; RV32IA-TSO-NOZACAS-NEXT: and a5, a4, a1
+; RV32IA-TSO-NOZACAS-NEXT: not a5, a5
+; RV32IA-TSO-NOZACAS-NEXT: xor a5, a4, a5
+; RV32IA-TSO-NOZACAS-NEXT: and a5, a5, a3
+; RV32IA-TSO-NOZACAS-NEXT: xor a5, a4, a5
+; RV32IA-TSO-NOZACAS-NEXT: sc.w a5, a5, (a2)
+; RV32IA-TSO-NOZACAS-NEXT: bnez a5, .LBB32_1
+; RV32IA-TSO-NOZACAS-NEXT: # %bb.2:
+; RV32IA-TSO-NOZACAS-NEXT: srl a0, a4, a0
+; RV32IA-TSO-NOZACAS-NEXT: ret
;
; RV64I-LABEL: atomicrmw_nand_i8_release:
; RV64I: # %bb.0:
@@ -3964,6 +5295,48 @@ define i8 @atomicrmw_nand_i8_release(ptr %a, i8 %b) nounwind {
; RV64IA-TSO-NOZACAS-NEXT: srlw a0, a4, a0
; RV64IA-TSO-NOZACAS-NEXT: ret
;
+; RV32IA-WMO-ZACAS-LABEL: atomicrmw_nand_i8_release:
+; RV32IA-WMO-ZACAS: # %bb.0:
+; RV32IA-WMO-ZACAS-NEXT: andi a2, a0, -4
+; RV32IA-WMO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-ZACAS-NEXT: li a3, 255
+; RV32IA-WMO-ZACAS-NEXT: zext.b a1, a1
+; RV32IA-WMO-ZACAS-NEXT: sll a3, a3, a0
+; RV32IA-WMO-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-WMO-ZACAS-NEXT: .LBB32_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-ZACAS-NEXT: lr.w a4, (a2)
+; RV32IA-WMO-ZACAS-NEXT: and a5, a4, a1
+; RV32IA-WMO-ZACAS-NEXT: not a5, a5
+; RV32IA-WMO-ZACAS-NEXT: xor a5, a4, a5
+; RV32IA-WMO-ZACAS-NEXT: and a5, a5, a3
+; RV32IA-WMO-ZACAS-NEXT: xor a5, a4, a5
+; RV32IA-WMO-ZACAS-NEXT: sc.w.rl a5, a5, (a2)
+; RV32IA-WMO-ZACAS-NEXT: bnez a5, .LBB32_1
+; RV32IA-WMO-ZACAS-NEXT: # %bb.2:
+; RV32IA-WMO-ZACAS-NEXT: srl a0, a4, a0
+; RV32IA-WMO-ZACAS-NEXT: ret
+;
+; RV32IA-TSO-ZACAS-LABEL: atomicrmw_nand_i8_release:
+; RV32IA-TSO-ZACAS: # %bb.0:
+; RV32IA-TSO-ZACAS-NEXT: andi a2, a0, -4
+; RV32IA-TSO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-ZACAS-NEXT: li a3, 255
+; RV32IA-TSO-ZACAS-NEXT: zext.b a1, a1
+; RV32IA-TSO-ZACAS-NEXT: sll a3, a3, a0
+; RV32IA-TSO-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-TSO-ZACAS-NEXT: .LBB32_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-ZACAS-NEXT: lr.w a4, (a2)
+; RV32IA-TSO-ZACAS-NEXT: and a5, a4, a1
+; RV32IA-TSO-ZACAS-NEXT: not a5, a5
+; RV32IA-TSO-ZACAS-NEXT: xor a5, a4, a5
+; RV32IA-TSO-ZACAS-NEXT: and a5, a5, a3
+; RV32IA-TSO-ZACAS-NEXT: xor a5, a4, a5
+; RV32IA-TSO-ZACAS-NEXT: sc.w a5, a5, (a2)
+; RV32IA-TSO-ZACAS-NEXT: bnez a5, .LBB32_1
+; RV32IA-TSO-ZACAS-NEXT: # %bb.2:
+; RV32IA-TSO-ZACAS-NEXT: srl a0, a4, a0
+; RV32IA-TSO-ZACAS-NEXT: ret
+;
; RV64IA-WMO-ZACAS-LABEL: atomicrmw_nand_i8_release:
; RV64IA-WMO-ZACAS: # %bb.0:
; RV64IA-WMO-ZACAS-NEXT: andi a2, a0, -4
@@ -4006,6 +5379,48 @@ define i8 @atomicrmw_nand_i8_release(ptr %a, i8 %b) nounwind {
; RV64IA-TSO-ZACAS-NEXT: srlw a0, a4, a0
; RV64IA-TSO-ZACAS-NEXT: ret
;
+; RV32IA-WMO-ZABHA-NOZACAS-LABEL: atomicrmw_nand_i8_release:
+; RV32IA-WMO-ZABHA-NOZACAS: # %bb.0:
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: li a3, 255
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: zext.b a1, a1
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: sll a3, a3, a0
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: .LBB32_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: lr.w a4, (a2)
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: and a5, a4, a1
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: not a5, a5
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: xor a5, a4, a5
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: and a5, a5, a3
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: xor a5, a4, a5
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: sc.w.rl a5, a5, (a2)
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: bnez a5, .LBB32_1
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: # %bb.2:
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: srl a0, a4, a0
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: ret
+;
+; RV32IA-TSO-ZABHA-NOZACAS-LABEL: atomicrmw_nand_i8_release:
+; RV32IA-TSO-ZABHA-NOZACAS: # %bb.0:
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: li a3, 255
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: zext.b a1, a1
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: sll a3, a3, a0
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: .LBB32_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: lr.w a4, (a2)
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: and a5, a4, a1
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: not a5, a5
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: xor a5, a4, a5
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: and a5, a5, a3
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: xor a5, a4, a5
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: sc.w a5, a5, (a2)
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: bnez a5, .LBB32_1
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: # %bb.2:
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: srl a0, a4, a0
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: ret
+;
; RV64IA-WMO-ZABHA-NOZACAS-LABEL: atomicrmw_nand_i8_release:
; RV64IA-WMO-ZABHA-NOZACAS: # %bb.0:
; RV64IA-WMO-ZABHA-NOZACAS-NEXT: andi a2, a0, -4
@@ -4048,6 +5463,36 @@ define i8 @atomicrmw_nand_i8_release(ptr %a, i8 %b) nounwind {
; RV64IA-TSO-ZABHA-NOZACAS-NEXT: srlw a0, a4, a0
; RV64IA-TSO-ZABHA-NOZACAS-NEXT: ret
;
+; RV32IA-WMO-ZABHA-ZACAS-LABEL: atomicrmw_nand_i8_release:
+; RV32IA-WMO-ZABHA-ZACAS: # %bb.0:
+; RV32IA-WMO-ZABHA-ZACAS-NEXT: mv a2, a0
+; RV32IA-WMO-ZABHA-ZACAS-NEXT: lbu a0, 0(a0)
+; RV32IA-WMO-ZABHA-ZACAS-NEXT: .LBB32_1: # %atomicrmw.start
+; RV32IA-WMO-ZABHA-ZACAS-NEXT: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-ZABHA-ZACAS-NEXT: and a3, a0, a1
+; RV32IA-WMO-ZABHA-ZACAS-NEXT: not a3, a3
+; RV32IA-WMO-ZABHA-ZACAS-NEXT: slli a4, a0, 24
+; RV32IA-WMO-ZABHA-ZACAS-NEXT: amocas.b.rl a0, a3, (a2)
+; RV32IA-WMO-ZABHA-ZACAS-NEXT: srai a4, a4, 24
+; RV32IA-WMO-ZABHA-ZACAS-NEXT: bne a0, a4, .LBB32_1
+; RV32IA-WMO-ZABHA-ZACAS-NEXT: # %bb.2: # %atomicrmw.end
+; RV32IA-WMO-ZABHA-ZACAS-NEXT: ret
+;
+; RV32IA-TSO-ZABHA-ZACAS-LABEL: atomicrmw_nand_i8_release:
+; RV32IA-TSO-ZABHA-ZACAS: # %bb.0:
+; RV32IA-TSO-ZABHA-ZACAS-NEXT: mv a2, a0
+; RV32IA-TSO-ZABHA-ZACAS-NEXT: lbu a0, 0(a0)
+; RV32IA-TSO-ZABHA-ZACAS-NEXT: .LBB32_1: # %atomicrmw.start
+; RV32IA-TSO-ZABHA-ZACAS-NEXT: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-ZABHA-ZACAS-NEXT: and a3, a0, a1
+; RV32IA-TSO-ZABHA-ZACAS-NEXT: not a3, a3
+; RV32IA-TSO-ZABHA-ZACAS-NEXT: slli a4, a0, 24
+; RV32IA-TSO-ZABHA-ZACAS-NEXT: amocas.b a0, a3, (a2)
+; RV32IA-TSO-ZABHA-ZACAS-NEXT: srai a4, a4, 24
+; RV32IA-TSO-ZABHA-ZACAS-NEXT: bne a0, a4, .LBB32_1
+; RV32IA-TSO-ZABHA-ZACAS-NEXT: # %bb.2: # %atomicrmw.end
+; RV32IA-TSO-ZABHA-ZACAS-NEXT: ret
+;
; RV64IA-WMO-ZABHA-ZACAS-LABEL: atomicrmw_nand_i8_release:
; RV64IA-WMO-ZABHA-ZACAS: # %bb.0:
; RV64IA-WMO-ZABHA-ZACAS-NEXT: mv a2, a0
@@ -4092,47 +5537,47 @@ define i8 @atomicrmw_nand_i8_acq_rel(ptr %a, i8 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
-; RV32IA-WMO-LABEL: atomicrmw_nand_i8_acq_rel:
-; RV32IA-WMO: # %bb.0:
-; RV32IA-WMO-NEXT: andi a2, a0, -4
-; RV32IA-WMO-NEXT: slli a0, a0, 3
-; RV32IA-WMO-NEXT: li a3, 255
-; RV32IA-WMO-NEXT: zext.b a1, a1
-; RV32IA-WMO-NEXT: sll a3, a3, a0
-; RV32IA-WMO-NEXT: sll a1, a1, a0
-; RV32IA-WMO-NEXT: .LBB33_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-WMO-NEXT: lr.w.aq a4, (a2)
-; RV32IA-WMO-NEXT: and a5, a4, a1
-; RV32IA-WMO-NEXT: not a5, a5
-; RV32IA-WMO-NEXT: xor a5, a4, a5
-; RV32IA-WMO-NEXT: and a5, a5, a3
-; RV32IA-WMO-NEXT: xor a5, a4, a5
-; RV32IA-WMO-NEXT: sc.w.rl a5, a5, (a2)
-; RV32IA-WMO-NEXT: bnez a5, .LBB33_1
-; RV32IA-WMO-NEXT: # %bb.2:
-; RV32IA-WMO-NEXT: srl a0, a4, a0
-; RV32IA-WMO-NEXT: ret
+; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_nand_i8_acq_rel:
+; RV32IA-WMO-NOZACAS: # %bb.0:
+; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-WMO-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-NOZACAS-NEXT: li a3, 255
+; RV32IA-WMO-NOZACAS-NEXT: zext.b a1, a1
+; RV32IA-WMO-NOZACAS-NEXT: sll a3, a3, a0
+; RV32IA-WMO-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-WMO-NOZACAS-NEXT: .LBB33_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-NOZACAS-NEXT: lr.w.aq a4, (a2)
+; RV32IA-WMO-NOZACAS-NEXT: and a5, a4, a1
+; RV32IA-WMO-NOZACAS-NEXT: not a5, a5
+; RV32IA-WMO-NOZACAS-NEXT: xor a5, a4, a5
+; RV32IA-WMO-NOZACAS-NEXT: and a5, a5, a3
+; RV32IA-WMO-NOZACAS-NEXT: xor a5, a4, a5
+; RV32IA-WMO-NOZACAS-NEXT: sc.w.rl a5, a5, (a2)
+; RV32IA-WMO-NOZACAS-NEXT: bnez a5, .LBB33_1
+; RV32IA-WMO-NOZACAS-NEXT: # %bb.2:
+; RV32IA-WMO-NOZACAS-NEXT: srl a0, a4, a0
+; RV32IA-WMO-NOZACAS-NEXT: ret
;
-; RV32IA-TSO-LABEL: atomicrmw_nand_i8_acq_rel:
-; RV32IA-TSO: # %bb.0:
-; RV32IA-TSO-NEXT: andi a2, a0, -4
-; RV32IA-TSO-NEXT: slli a0, a0, 3
-; RV32IA-TSO-NEXT: li a3, 255
-; RV32IA-TSO-NEXT: zext.b a1, a1
-; RV32IA-TSO-NEXT: sll a3, a3, a0
-; RV32IA-TSO-NEXT: sll a1, a1, a0
-; RV32IA-TSO-NEXT: .LBB33_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-TSO-NEXT: lr.w a4, (a2)
-; RV32IA-TSO-NEXT: and a5, a4, a1
-; RV32IA-TSO-NEXT: not a5, a5
-; RV32IA-TSO-NEXT: xor a5, a4, a5
-; RV32IA-TSO-NEXT: and a5, a5, a3
-; RV32IA-TSO-NEXT: xor a5, a4, a5
-; RV32IA-TSO-NEXT: sc.w a5, a5, (a2)
-; RV32IA-TSO-NEXT: bnez a5, .LBB33_1
-; RV32IA-TSO-NEXT: # %bb.2:
-; RV32IA-TSO-NEXT: srl a0, a4, a0
-; RV32IA-TSO-NEXT: ret
+; RV32IA-TSO-NOZACAS-LABEL: atomicrmw_nand_i8_acq_rel:
+; RV32IA-TSO-NOZACAS: # %bb.0:
+; RV32IA-TSO-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-TSO-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-NOZACAS-NEXT: li a3, 255
+; RV32IA-TSO-NOZACAS-NEXT: zext.b a1, a1
+; RV32IA-TSO-NOZACAS-NEXT: sll a3, a3, a0
+; RV32IA-TSO-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-TSO-NOZACAS-NEXT: .LBB33_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-NOZACAS-NEXT: lr.w a4, (a2)
+; RV32IA-TSO-NOZACAS-NEXT: and a5, a4, a1
+; RV32IA-TSO-NOZACAS-NEXT: not a5, a5
+; RV32IA-TSO-NOZACAS-NEXT: xor a5, a4, a5
+; RV32IA-TSO-NOZACAS-NEXT: and a5, a5, a3
+; RV32IA-TSO-NOZACAS-NEXT: xor a5, a4, a5
+; RV32IA-TSO-NOZACAS-NEXT: sc.w a5, a5, (a2)
+; RV32IA-TSO-NOZACAS-NEXT: bnez a5, .LBB33_1
+; RV32IA-TSO-NOZACAS-NEXT: # %bb.2:
+; RV32IA-TSO-NOZACAS-NEXT: srl a0, a4, a0
+; RV32IA-TSO-NOZACAS-NEXT: ret
;
; RV64I-LABEL: atomicrmw_nand_i8_acq_rel:
; RV64I: # %bb.0:
@@ -4186,6 +5631,48 @@ define i8 @atomicrmw_nand_i8_acq_rel(ptr %a, i8 %b) nounwind {
; RV64IA-TSO-NOZACAS-NEXT: srlw a0, a4, a0
; RV64IA-TSO-NOZACAS-NEXT: ret
;
+; RV32IA-WMO-ZACAS-LABEL: atomicrmw_nand_i8_acq_rel:
+; RV32IA-WMO-ZACAS: # %bb.0:
+; RV32IA-WMO-ZACAS-NEXT: andi a2, a0, -4
+; RV32IA-WMO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-ZACAS-NEXT: li a3, 255
+; RV32IA-WMO-ZACAS-NEXT: zext.b a1, a1
+; RV32IA-WMO-ZACAS-NEXT: sll a3, a3, a0
+; RV32IA-WMO-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-WMO-ZACAS-NEXT: .LBB33_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-ZACAS-NEXT: lr.w.aq a4, (a2)
+; RV32IA-WMO-ZACAS-NEXT: and a5, a4, a1
+; RV32IA-WMO-ZACAS-NEXT: not a5, a5
+; RV32IA-WMO-ZACAS-NEXT: xor a5, a4, a5
+; RV32IA-WMO-ZACAS-NEXT: and a5, a5, a3
+; RV32IA-WMO-ZACAS-NEXT: xor a5, a4, a5
+; RV32IA-WMO-ZACAS-NEXT: sc.w.rl a5, a5, (a2)
+; RV32IA-WMO-ZACAS-NEXT: bnez a5, .LBB33_1
+; RV32IA-WMO-ZACAS-NEXT: # %bb.2:
+; RV32IA-WMO-ZACAS-NEXT: srl a0, a4, a0
+; RV32IA-WMO-ZACAS-NEXT: ret
+;
+; RV32IA-TSO-ZACAS-LABEL: atomicrmw_nand_i8_acq_rel:
+; RV32IA-TSO-ZACAS: # %bb.0:
+; RV32IA-TSO-ZACAS-NEXT: andi a2, a0, -4
+; RV32IA-TSO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-ZACAS-NEXT: li a3, 255
+; RV32IA-TSO-ZACAS-NEXT: zext.b a1, a1
+; RV32IA-TSO-ZACAS-NEXT: sll a3, a3, a0
+; RV32IA-TSO-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-TSO-ZACAS-NEXT: .LBB33_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-ZACAS-NEXT: lr.w a4, (a2)
+; RV32IA-TSO-ZACAS-NEXT: and a5, a4, a1
+; RV32IA-TSO-ZACAS-NEXT: not a5, a5
+; RV32IA-TSO-ZACAS-NEXT: xor a5, a4, a5
+; RV32IA-TSO-ZACAS-NEXT: and a5, a5, a3
+; RV32IA-TSO-ZACAS-NEXT: xor a5, a4, a5
+; RV32IA-TSO-ZACAS-NEXT: sc.w a5, a5, (a2)
+; RV32IA-TSO-ZACAS-NEXT: bnez a5, .LBB33_1
+; RV32IA-TSO-ZACAS-NEXT: # %bb.2:
+; RV32IA-TSO-ZACAS-NEXT: srl a0, a4, a0
+; RV32IA-TSO-ZACAS-NEXT: ret
+;
; RV64IA-WMO-ZACAS-LABEL: atomicrmw_nand_i8_acq_rel:
; RV64IA-WMO-ZACAS: # %bb.0:
; RV64IA-WMO-ZACAS-NEXT: andi a2, a0, -4
@@ -4228,6 +5715,48 @@ define i8 @atomicrmw_nand_i8_acq_rel(ptr %a, i8 %b) nounwind {
; RV64IA-TSO-ZACAS-NEXT: srlw a0, a4, a0
; RV64IA-TSO-ZACAS-NEXT: ret
;
+; RV32IA-WMO-ZABHA-NOZACAS-LABEL: atomicrmw_nand_i8_acq_rel:
+; RV32IA-WMO-ZABHA-NOZACAS: # %bb.0:
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: li a3, 255
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: zext.b a1, a1
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: sll a3, a3, a0
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: .LBB33_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: lr.w.aq a4, (a2)
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: and a5, a4, a1
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: not a5, a5
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: xor a5, a4, a5
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: and a5, a5, a3
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: xor a5, a4, a5
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: sc.w.rl a5, a5, (a2)
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: bnez a5, .LBB33_1
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: # %bb.2:
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: srl a0, a4, a0
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: ret
+;
+; RV32IA-TSO-ZABHA-NOZACAS-LABEL: atomicrmw_nand_i8_acq_rel:
+; RV32IA-TSO-ZABHA-NOZACAS: # %bb.0:
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: li a3, 255
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: zext.b a1, a1
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: sll a3, a3, a0
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: .LBB33_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: lr.w a4, (a2)
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: and a5, a4, a1
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: not a5, a5
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: xor a5, a4, a5
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: and a5, a5, a3
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: xor a5, a4, a5
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: sc.w a5, a5, (a2)
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: bnez a5, .LBB33_1
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: # %bb.2:
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: srl a0, a4, a0
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: ret
+;
; RV64IA-WMO-ZABHA-NOZACAS-LABEL: atomicrmw_nand_i8_acq_rel:
; RV64IA-WMO-ZABHA-NOZACAS: # %bb.0:
; RV64IA-WMO-ZABHA-NOZACAS-NEXT: andi a2, a0, -4
@@ -4270,6 +5799,36 @@ define i8 @atomicrmw_nand_i8_acq_rel(ptr %a, i8 %b) nounwind {
; RV64IA-TSO-ZABHA-NOZACAS-NEXT: srlw a0, a4, a0
; RV64IA-TSO-ZABHA-NOZACAS-NEXT: ret
;
+; RV32IA-WMO-ZABHA-ZACAS-LABEL: atomicrmw_nand_i8_acq_rel:
+; RV32IA-WMO-ZABHA-ZACAS: # %bb.0:
+; RV32IA-WMO-ZABHA-ZACAS-NEXT: mv a2, a0
+; RV32IA-WMO-ZABHA-ZACAS-NEXT: lbu a0, 0(a0)
+; RV32IA-WMO-ZABHA-ZACAS-NEXT: .LBB33_1: # %atomicrmw.start
+; RV32IA-WMO-ZABHA-ZACAS-NEXT: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-ZABHA-ZACAS-NEXT: and a3, a0, a1
+; RV32IA-WMO-ZABHA-ZACAS-NEXT: not a3, a3
+; RV32IA-WMO-ZABHA-ZACAS-NEXT: slli a4, a0, 24
+; RV32IA-WMO-ZABHA-ZACAS-NEXT: amocas.b.aqrl a0, a3, (a2)
+; RV32IA-WMO-ZABHA-ZACAS-NEXT: srai a4, a4, 24
+; RV32IA-WMO-ZABHA-ZACAS-NEXT: bne a0, a4, .LBB33_1
+; RV32IA-WMO-ZABHA-ZACAS-NEXT: # %bb.2: # %atomicrmw.end
+; RV32IA-WMO-ZABHA-ZACAS-NEXT: ret
+;
+; RV32IA-TSO-ZABHA-ZACAS-LABEL: atomicrmw_nand_i8_acq_rel:
+; RV32IA-TSO-ZABHA-ZACAS: # %bb.0:
+; RV32IA-TSO-ZABHA-ZACAS-NEXT: mv a2, a0
+; RV32IA-TSO-ZABHA-ZACAS-NEXT: lbu a0, 0(a0)
+; RV32IA-TSO-ZABHA-ZACAS-NEXT: .LBB33_1: # %atomicrmw.start
+; RV32IA-TSO-ZABHA-ZACAS-NEXT: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-ZABHA-ZACAS-NEXT: and a3, a0, a1
+; RV32IA-TSO-ZABHA-ZACAS-NEXT: not a3, a3
+; RV32IA-TSO-ZABHA-ZACAS-NEXT: slli a4, a0, 24
+; RV32IA-TSO-ZABHA-ZACAS-NEXT: amocas.b a0, a3, (a2)
+; RV32IA-TSO-ZABHA-ZACAS-NEXT: srai a4, a4, 24
+; RV32IA-TSO-ZABHA-ZACAS-NEXT: bne a0, a4, .LBB33_1
+; RV32IA-TSO-ZABHA-ZACAS-NEXT: # %bb.2: # %atomicrmw.end
+; RV32IA-TSO-ZABHA-ZACAS-NEXT: ret
+;
; RV64IA-WMO-ZABHA-ZACAS-LABEL: atomicrmw_nand_i8_acq_rel:
; RV64IA-WMO-ZABHA-ZACAS: # %bb.0:
; RV64IA-WMO-ZABHA-ZACAS-NEXT: mv a2, a0
@@ -4314,26 +5873,26 @@ define i8 @atomicrmw_nand_i8_seq_cst(ptr %a, i8 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
-; RV32IA-LABEL: atomicrmw_nand_i8_seq_cst:
-; RV32IA: # %bb.0:
-; RV32IA-NEXT: andi a2, a0, -4
-; RV32IA-NEXT: slli a0, a0, 3
-; RV32IA-NEXT: li a3, 255
-; RV32IA-NEXT: zext.b a1, a1
-; RV32IA-NEXT: sll a3, a3, a0
-; RV32IA-NEXT: sll a1, a1, a0
-; RV32IA-NEXT: .LBB34_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-NEXT: lr.w.aqrl a4, (a2)
-; RV32IA-NEXT: and a5, a4, a1
-; RV32IA-NEXT: not a5, a5
-; RV32IA-NEXT: xor a5, a4, a5
-; RV32IA-NEXT: and a5, a5, a3
-; RV32IA-NEXT: xor a5, a4, a5
-; RV32IA-NEXT: sc.w.rl a5, a5, (a2)
-; RV32IA-NEXT: bnez a5, .LBB34_1
-; RV32IA-NEXT: # %bb.2:
-; RV32IA-NEXT: srl a0, a4, a0
-; RV32IA-NEXT: ret
+; RV32IA-NOZACAS-LABEL: atomicrmw_nand_i8_seq_cst:
+; RV32IA-NOZACAS: # %bb.0:
+; RV32IA-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-NOZACAS-NEXT: li a3, 255
+; RV32IA-NOZACAS-NEXT: zext.b a1, a1
+; RV32IA-NOZACAS-NEXT: sll a3, a3, a0
+; RV32IA-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-NOZACAS-NEXT: .LBB34_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-NOZACAS-NEXT: lr.w.aqrl a4, (a2)
+; RV32IA-NOZACAS-NEXT: and a5, a4, a1
+; RV32IA-NOZACAS-NEXT: not a5, a5
+; RV32IA-NOZACAS-NEXT: xor a5, a4, a5
+; RV32IA-NOZACAS-NEXT: and a5, a5, a3
+; RV32IA-NOZACAS-NEXT: xor a5, a4, a5
+; RV32IA-NOZACAS-NEXT: sc.w.rl a5, a5, (a2)
+; RV32IA-NOZACAS-NEXT: bnez a5, .LBB34_1
+; RV32IA-NOZACAS-NEXT: # %bb.2:
+; RV32IA-NOZACAS-NEXT: srl a0, a4, a0
+; RV32IA-NOZACAS-NEXT: ret
;
; RV64I-LABEL: atomicrmw_nand_i8_seq_cst:
; RV64I: # %bb.0:
@@ -4366,6 +5925,27 @@ define i8 @atomicrmw_nand_i8_seq_cst(ptr %a, i8 %b) nounwind {
; RV64IA-NOZACAS-NEXT: srlw a0, a4, a0
; RV64IA-NOZACAS-NEXT: ret
;
+; RV32IA-ZACAS-LABEL: atomicrmw_nand_i8_seq_cst:
+; RV32IA-ZACAS: # %bb.0:
+; RV32IA-ZACAS-NEXT: andi a2, a0, -4
+; RV32IA-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-ZACAS-NEXT: li a3, 255
+; RV32IA-ZACAS-NEXT: zext.b a1, a1
+; RV32IA-ZACAS-NEXT: sll a3, a3, a0
+; RV32IA-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-ZACAS-NEXT: .LBB34_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-ZACAS-NEXT: lr.w.aqrl a4, (a2)
+; RV32IA-ZACAS-NEXT: and a5, a4, a1
+; RV32IA-ZACAS-NEXT: not a5, a5
+; RV32IA-ZACAS-NEXT: xor a5, a4, a5
+; RV32IA-ZACAS-NEXT: and a5, a5, a3
+; RV32IA-ZACAS-NEXT: xor a5, a4, a5
+; RV32IA-ZACAS-NEXT: sc.w.rl a5, a5, (a2)
+; RV32IA-ZACAS-NEXT: bnez a5, .LBB34_1
+; RV32IA-ZACAS-NEXT: # %bb.2:
+; RV32IA-ZACAS-NEXT: srl a0, a4, a0
+; RV32IA-ZACAS-NEXT: ret
+;
; RV64IA-ZACAS-LABEL: atomicrmw_nand_i8_seq_cst:
; RV64IA-ZACAS: # %bb.0:
; RV64IA-ZACAS-NEXT: andi a2, a0, -4
@@ -4387,6 +5967,48 @@ define i8 @atomicrmw_nand_i8_seq_cst(ptr %a, i8 %b) nounwind {
; RV64IA-ZACAS-NEXT: srlw a0, a4, a0
; RV64IA-ZACAS-NEXT: ret
;
+; RV32IA-WMO-ZABHA-NOZACAS-LABEL: atomicrmw_nand_i8_seq_cst:
+; RV32IA-WMO-ZABHA-NOZACAS: # %bb.0:
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: li a3, 255
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: zext.b a1, a1
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: sll a3, a3, a0
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: .LBB34_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: lr.w.aqrl a4, (a2)
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: and a5, a4, a1
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: not a5, a5
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: xor a5, a4, a5
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: and a5, a5, a3
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: xor a5, a4, a5
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: sc.w.rl a5, a5, (a2)
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: bnez a5, .LBB34_1
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: # %bb.2:
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: srl a0, a4, a0
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: ret
+;
+; RV32IA-TSO-ZABHA-NOZACAS-LABEL: atomicrmw_nand_i8_seq_cst:
+; RV32IA-TSO-ZABHA-NOZACAS: # %bb.0:
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: li a3, 255
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: zext.b a1, a1
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: sll a3, a3, a0
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: .LBB34_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: lr.w.aqrl a4, (a2)
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: and a5, a4, a1
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: not a5, a5
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: xor a5, a4, a5
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: and a5, a5, a3
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: xor a5, a4, a5
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: sc.w.rl a5, a5, (a2)
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: bnez a5, .LBB34_1
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: # %bb.2:
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: srl a0, a4, a0
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: ret
+;
; RV64IA-WMO-ZABHA-NOZACAS-LABEL: atomicrmw_nand_i8_seq_cst:
; RV64IA-WMO-ZABHA-NOZACAS: # %bb.0:
; RV64IA-WMO-ZABHA-NOZACAS-NEXT: andi a2, a0, -4
@@ -4429,6 +6051,38 @@ define i8 @atomicrmw_nand_i8_seq_cst(ptr %a, i8 %b) nounwind {
; RV64IA-TSO-ZABHA-NOZACAS-NEXT: srlw a0, a4, a0
; RV64IA-TSO-ZABHA-NOZACAS-NEXT: ret
;
+; RV32IA-WMO-ZABHA-ZACAS-LABEL: atomicrmw_nand_i8_seq_cst:
+; RV32IA-WMO-ZABHA-ZACAS: # %bb.0:
+; RV32IA-WMO-ZABHA-ZACAS-NEXT: mv a2, a0
+; RV32IA-WMO-ZABHA-ZACAS-NEXT: lbu a0, 0(a0)
+; RV32IA-WMO-ZABHA-ZACAS-NEXT: .LBB34_1: # %atomicrmw.start
+; RV32IA-WMO-ZABHA-ZACAS-NEXT: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-ZABHA-ZACAS-NEXT: and a3, a0, a1
+; RV32IA-WMO-ZABHA-ZACAS-NEXT: fence rw, rw
+; RV32IA-WMO-ZABHA-ZACAS-NEXT: not a3, a3
+; RV32IA-WMO-ZABHA-ZACAS-NEXT: slli a4, a0, 24
+; RV32IA-WMO-ZABHA-ZACAS-NEXT: amocas.b.aqrl a0, a3, (a2)
+; RV32IA-WMO-ZABHA-ZACAS-NEXT: srai a4, a4, 24
+; RV32IA-WMO-ZABHA-ZACAS-NEXT: bne a0, a4, .LBB34_1
+; RV32IA-WMO-ZABHA-ZACAS-NEXT: # %bb.2: # %atomicrmw.end
+; RV32IA-WMO-ZABHA-ZACAS-NEXT: ret
+;
+; RV32IA-TSO-ZABHA-ZACAS-LABEL: atomicrmw_nand_i8_seq_cst:
+; RV32IA-TSO-ZABHA-ZACAS: # %bb.0:
+; RV32IA-TSO-ZABHA-ZACAS-NEXT: mv a2, a0
+; RV32IA-TSO-ZABHA-ZACAS-NEXT: lbu a0, 0(a0)
+; RV32IA-TSO-ZABHA-ZACAS-NEXT: .LBB34_1: # %atomicrmw.start
+; RV32IA-TSO-ZABHA-ZACAS-NEXT: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-ZABHA-ZACAS-NEXT: and a3, a0, a1
+; RV32IA-TSO-ZABHA-ZACAS-NEXT: fence rw, rw
+; RV32IA-TSO-ZABHA-ZACAS-NEXT: not a3, a3
+; RV32IA-TSO-ZABHA-ZACAS-NEXT: slli a4, a0, 24
+; RV32IA-TSO-ZABHA-ZACAS-NEXT: amocas.b a0, a3, (a2)
+; RV32IA-TSO-ZABHA-ZACAS-NEXT: srai a4, a4, 24
+; RV32IA-TSO-ZABHA-ZACAS-NEXT: bne a0, a4, .LBB34_1
+; RV32IA-TSO-ZABHA-ZACAS-NEXT: # %bb.2: # %atomicrmw.end
+; RV32IA-TSO-ZABHA-ZACAS-NEXT: ret
+;
; RV64IA-WMO-ZABHA-ZACAS-LABEL: atomicrmw_nand_i8_seq_cst:
; RV64IA-WMO-ZABHA-ZACAS: # %bb.0:
; RV64IA-WMO-ZABHA-ZACAS-NEXT: mv a2, a0
@@ -4475,15 +6129,15 @@ define i8 @atomicrmw_or_i8_monotonic(ptr %a, i8 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
-; RV32IA-LABEL: atomicrmw_or_i8_monotonic:
-; RV32IA: # %bb.0:
-; RV32IA-NEXT: andi a2, a0, -4
-; RV32IA-NEXT: slli a0, a0, 3
-; RV32IA-NEXT: zext.b a1, a1
-; RV32IA-NEXT: sll a1, a1, a0
-; RV32IA-NEXT: amoor.w a1, a1, (a2)
-; RV32IA-NEXT: srl a0, a1, a0
-; RV32IA-NEXT: ret
+; RV32IA-NOZACAS-LABEL: atomicrmw_or_i8_monotonic:
+; RV32IA-NOZACAS: # %bb.0:
+; RV32IA-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-NOZACAS-NEXT: zext.b a1, a1
+; RV32IA-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-NOZACAS-NEXT: amoor.w a1, a1, (a2)
+; RV32IA-NOZACAS-NEXT: srl a0, a1, a0
+; RV32IA-NOZACAS-NEXT: ret
;
; RV64I-LABEL: atomicrmw_or_i8_monotonic:
; RV64I: # %bb.0:
@@ -4505,6 +6159,16 @@ define i8 @atomicrmw_or_i8_monotonic(ptr %a, i8 %b) nounwind {
; RV64IA-NOZACAS-NEXT: srlw a0, a1, a0
; RV64IA-NOZACAS-NEXT: ret
;
+; RV32IA-ZACAS-LABEL: atomicrmw_or_i8_monotonic:
+; RV32IA-ZACAS: # %bb.0:
+; RV32IA-ZACAS-NEXT: andi a2, a0, -4
+; RV32IA-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-ZACAS-NEXT: zext.b a1, a1
+; RV32IA-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-ZACAS-NEXT: amoor.w a1, a1, (a2)
+; RV32IA-ZACAS-NEXT: srl a0, a1, a0
+; RV32IA-ZACAS-NEXT: ret
+;
; RV64IA-ZACAS-LABEL: atomicrmw_or_i8_monotonic:
; RV64IA-ZACAS: # %bb.0:
; RV64IA-ZACAS-NEXT: andi a2, a0, -4
@@ -4515,6 +6179,16 @@ define i8 @atomicrmw_or_i8_monotonic(ptr %a, i8 %b) nounwind {
; RV64IA-ZACAS-NEXT: srlw a0, a1, a0
; RV64IA-ZACAS-NEXT: ret
;
+; RV32IA-WMO-ZABHA-LABEL: atomicrmw_or_i8_monotonic:
+; RV32IA-WMO-ZABHA: # %bb.0:
+; RV32IA-WMO-ZABHA-NEXT: amoor.b a0, a1, (a0)
+; RV32IA-WMO-ZABHA-NEXT: ret
+;
+; RV32IA-TSO-ZABHA-LABEL: atomicrmw_or_i8_monotonic:
+; RV32IA-TSO-ZABHA: # %bb.0:
+; RV32IA-TSO-ZABHA-NEXT: amoor.b a0, a1, (a0)
+; RV32IA-TSO-ZABHA-NEXT: ret
+;
; RV64IA-WMO-ZABHA-LABEL: atomicrmw_or_i8_monotonic:
; RV64IA-WMO-ZABHA: # %bb.0:
; RV64IA-WMO-ZABHA-NEXT: amoor.b a0, a1, (a0)
@@ -4539,25 +6213,25 @@ define i8 @atomicrmw_or_i8_acquire(ptr %a, i8 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
-; RV32IA-WMO-LABEL: atomicrmw_or_i8_acquire:
-; RV32IA-WMO: # %bb.0:
-; RV32IA-WMO-NEXT: andi a2, a0, -4
-; RV32IA-WMO-NEXT: slli a0, a0, 3
-; RV32IA-WMO-NEXT: zext.b a1, a1
-; RV32IA-WMO-NEXT: sll a1, a1, a0
-; RV32IA-WMO-NEXT: amoor.w.aq a1, a1, (a2)
-; RV32IA-WMO-NEXT: srl a0, a1, a0
-; RV32IA-WMO-NEXT: ret
+; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_or_i8_acquire:
+; RV32IA-WMO-NOZACAS: # %bb.0:
+; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-WMO-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-NOZACAS-NEXT: zext.b a1, a1
+; RV32IA-WMO-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-WMO-NOZACAS-NEXT: amoor.w.aq a1, a1, (a2)
+; RV32IA-WMO-NOZACAS-NEXT: srl a0, a1, a0
+; RV32IA-WMO-NOZACAS-NEXT: ret
;
-; RV32IA-TSO-LABEL: atomicrmw_or_i8_acquire:
-; RV32IA-TSO: # %bb.0:
-; RV32IA-TSO-NEXT: andi a2, a0, -4
-; RV32IA-TSO-NEXT: slli a0, a0, 3
-; RV32IA-TSO-NEXT: zext.b a1, a1
-; RV32IA-TSO-NEXT: sll a1, a1, a0
-; RV32IA-TSO-NEXT: amoor.w a1, a1, (a2)
-; RV32IA-TSO-NEXT: srl a0, a1, a0
-; RV32IA-TSO-NEXT: ret
+; RV32IA-TSO-NOZACAS-LABEL: atomicrmw_or_i8_acquire:
+; RV32IA-TSO-NOZACAS: # %bb.0:
+; RV32IA-TSO-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-TSO-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-NOZACAS-NEXT: zext.b a1, a1
+; RV32IA-TSO-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-TSO-NOZACAS-NEXT: amoor.w a1, a1, (a2)
+; RV32IA-TSO-NOZACAS-NEXT: srl a0, a1, a0
+; RV32IA-TSO-NOZACAS-NEXT: ret
;
; RV64I-LABEL: atomicrmw_or_i8_acquire:
; RV64I: # %bb.0:
@@ -4589,6 +6263,26 @@ define i8 @atomicrmw_or_i8_acquire(ptr %a, i8 %b) nounwind {
; RV64IA-TSO-NOZACAS-NEXT: srlw a0, a1, a0
; RV64IA-TSO-NOZACAS-NEXT: ret
;
+; RV32IA-WMO-ZACAS-LABEL: atomicrmw_or_i8_acquire:
+; RV32IA-WMO-ZACAS: # %bb.0:
+; RV32IA-WMO-ZACAS-NEXT: andi a2, a0, -4
+; RV32IA-WMO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-ZACAS-NEXT: zext.b a1, a1
+; RV32IA-WMO-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-WMO-ZACAS-NEXT: amoor.w.aq a1, a1, (a2)
+; RV32IA-WMO-ZACAS-NEXT: srl a0, a1, a0
+; RV32IA-WMO-ZACAS-NEXT: ret
+;
+; RV32IA-TSO-ZACAS-LABEL: atomicrmw_or_i8_acquire:
+; RV32IA-TSO-ZACAS: # %bb.0:
+; RV32IA-TSO-ZACAS-NEXT: andi a2, a0, -4
+; RV32IA-TSO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-ZACAS-NEXT: zext.b a1, a1
+; RV32IA-TSO-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-TSO-ZACAS-NEXT: amoor.w a1, a1, (a2)
+; RV32IA-TSO-ZACAS-NEXT: srl a0, a1, a0
+; RV32IA-TSO-ZACAS-NEXT: ret
+;
; RV64IA-WMO-ZACAS-LABEL: atomicrmw_or_i8_acquire:
; RV64IA-WMO-ZACAS: # %bb.0:
; RV64IA-WMO-ZACAS-NEXT: andi a2, a0, -4
@@ -4609,6 +6303,16 @@ define i8 @atomicrmw_or_i8_acquire(ptr %a, i8 %b) nounwind {
; RV64IA-TSO-ZACAS-NEXT: srlw a0, a1, a0
; RV64IA-TSO-ZACAS-NEXT: ret
;
+; RV32IA-WMO-ZABHA-LABEL: atomicrmw_or_i8_acquire:
+; RV32IA-WMO-ZABHA: # %bb.0:
+; RV32IA-WMO-ZABHA-NEXT: amoor.b.aq a0, a1, (a0)
+; RV32IA-WMO-ZABHA-NEXT: ret
+;
+; RV32IA-TSO-ZABHA-LABEL: atomicrmw_or_i8_acquire:
+; RV32IA-TSO-ZABHA: # %bb.0:
+; RV32IA-TSO-ZABHA-NEXT: amoor.b a0, a1, (a0)
+; RV32IA-TSO-ZABHA-NEXT: ret
+;
; RV64IA-WMO-ZABHA-LABEL: atomicrmw_or_i8_acquire:
; RV64IA-WMO-ZABHA: # %bb.0:
; RV64IA-WMO-ZABHA-NEXT: amoor.b.aq a0, a1, (a0)
@@ -4633,25 +6337,25 @@ define i8 @atomicrmw_or_i8_release(ptr %a, i8 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
-; RV32IA-WMO-LABEL: atomicrmw_or_i8_release:
-; RV32IA-WMO: # %bb.0:
-; RV32IA-WMO-NEXT: andi a2, a0, -4
-; RV32IA-WMO-NEXT: slli a0, a0, 3
-; RV32IA-WMO-NEXT: zext.b a1, a1
-; RV32IA-WMO-NEXT: sll a1, a1, a0
-; RV32IA-WMO-NEXT: amoor.w.rl a1, a1, (a2)
-; RV32IA-WMO-NEXT: srl a0, a1, a0
-; RV32IA-WMO-NEXT: ret
+; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_or_i8_release:
+; RV32IA-WMO-NOZACAS: # %bb.0:
+; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-WMO-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-NOZACAS-NEXT: zext.b a1, a1
+; RV32IA-WMO-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-WMO-NOZACAS-NEXT: amoor.w.rl a1, a1, (a2)
+; RV32IA-WMO-NOZACAS-NEXT: srl a0, a1, a0
+; RV32IA-WMO-NOZACAS-NEXT: ret
;
-; RV32IA-TSO-LABEL: atomicrmw_or_i8_release:
-; RV32IA-TSO: # %bb.0:
-; RV32IA-TSO-NEXT: andi a2, a0, -4
-; RV32IA-TSO-NEXT: slli a0, a0, 3
-; RV32IA-TSO-NEXT: zext.b a1, a1
-; RV32IA-TSO-NEXT: sll a1, a1, a0
-; RV32IA-TSO-NEXT: amoor.w a1, a1, (a2)
-; RV32IA-TSO-NEXT: srl a0, a1, a0
-; RV32IA-TSO-NEXT: ret
+; RV32IA-TSO-NOZACAS-LABEL: atomicrmw_or_i8_release:
+; RV32IA-TSO-NOZACAS: # %bb.0:
+; RV32IA-TSO-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-TSO-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-NOZACAS-NEXT: zext.b a1, a1
+; RV32IA-TSO-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-TSO-NOZACAS-NEXT: amoor.w a1, a1, (a2)
+; RV32IA-TSO-NOZACAS-NEXT: srl a0, a1, a0
+; RV32IA-TSO-NOZACAS-NEXT: ret
;
; RV64I-LABEL: atomicrmw_or_i8_release:
; RV64I: # %bb.0:
@@ -4683,6 +6387,26 @@ define i8 @atomicrmw_or_i8_release(ptr %a, i8 %b) nounwind {
; RV64IA-TSO-NOZACAS-NEXT: srlw a0, a1, a0
; RV64IA-TSO-NOZACAS-NEXT: ret
;
+; RV32IA-WMO-ZACAS-LABEL: atomicrmw_or_i8_release:
+; RV32IA-WMO-ZACAS: # %bb.0:
+; RV32IA-WMO-ZACAS-NEXT: andi a2, a0, -4
+; RV32IA-WMO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-ZACAS-NEXT: zext.b a1, a1
+; RV32IA-WMO-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-WMO-ZACAS-NEXT: amoor.w.rl a1, a1, (a2)
+; RV32IA-WMO-ZACAS-NEXT: srl a0, a1, a0
+; RV32IA-WMO-ZACAS-NEXT: ret
+;
+; RV32IA-TSO-ZACAS-LABEL: atomicrmw_or_i8_release:
+; RV32IA-TSO-ZACAS: # %bb.0:
+; RV32IA-TSO-ZACAS-NEXT: andi a2, a0, -4
+; RV32IA-TSO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-ZACAS-NEXT: zext.b a1, a1
+; RV32IA-TSO-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-TSO-ZACAS-NEXT: amoor.w a1, a1, (a2)
+; RV32IA-TSO-ZACAS-NEXT: srl a0, a1, a0
+; RV32IA-TSO-ZACAS-NEXT: ret
+;
; RV64IA-WMO-ZACAS-LABEL: atomicrmw_or_i8_release:
; RV64IA-WMO-ZACAS: # %bb.0:
; RV64IA-WMO-ZACAS-NEXT: andi a2, a0, -4
@@ -4703,6 +6427,16 @@ define i8 @atomicrmw_or_i8_release(ptr %a, i8 %b) nounwind {
; RV64IA-TSO-ZACAS-NEXT: srlw a0, a1, a0
; RV64IA-TSO-ZACAS-NEXT: ret
;
+; RV32IA-WMO-ZABHA-LABEL: atomicrmw_or_i8_release:
+; RV32IA-WMO-ZABHA: # %bb.0:
+; RV32IA-WMO-ZABHA-NEXT: amoor.b.rl a0, a1, (a0)
+; RV32IA-WMO-ZABHA-NEXT: ret
+;
+; RV32IA-TSO-ZABHA-LABEL: atomicrmw_or_i8_release:
+; RV32IA-TSO-ZABHA: # %bb.0:
+; RV32IA-TSO-ZABHA-NEXT: amoor.b a0, a1, (a0)
+; RV32IA-TSO-ZABHA-NEXT: ret
+;
; RV64IA-WMO-ZABHA-LABEL: atomicrmw_or_i8_release:
; RV64IA-WMO-ZABHA: # %bb.0:
; RV64IA-WMO-ZABHA-NEXT: amoor.b.rl a0, a1, (a0)
@@ -4727,25 +6461,25 @@ define i8 @atomicrmw_or_i8_acq_rel(ptr %a, i8 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
-; RV32IA-WMO-LABEL: atomicrmw_or_i8_acq_rel:
-; RV32IA-WMO: # %bb.0:
-; RV32IA-WMO-NEXT: andi a2, a0, -4
-; RV32IA-WMO-NEXT: slli a0, a0, 3
-; RV32IA-WMO-NEXT: zext.b a1, a1
-; RV32IA-WMO-NEXT: sll a1, a1, a0
-; RV32IA-WMO-NEXT: amoor.w.aqrl a1, a1, (a2)
-; RV32IA-WMO-NEXT: srl a0, a1, a0
-; RV32IA-WMO-NEXT: ret
+; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_or_i8_acq_rel:
+; RV32IA-WMO-NOZACAS: # %bb.0:
+; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-WMO-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-NOZACAS-NEXT: zext.b a1, a1
+; RV32IA-WMO-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-WMO-NOZACAS-NEXT: amoor.w.aqrl a1, a1, (a2)
+; RV32IA-WMO-NOZACAS-NEXT: srl a0, a1, a0
+; RV32IA-WMO-NOZACAS-NEXT: ret
;
-; RV32IA-TSO-LABEL: atomicrmw_or_i8_acq_rel:
-; RV32IA-TSO: # %bb.0:
-; RV32IA-TSO-NEXT: andi a2, a0, -4
-; RV32IA-TSO-NEXT: slli a0, a0, 3
-; RV32IA-TSO-NEXT: zext.b a1, a1
-; RV32IA-TSO-NEXT: sll a1, a1, a0
-; RV32IA-TSO-NEXT: amoor.w a1, a1, (a2)
-; RV32IA-TSO-NEXT: srl a0, a1, a0
-; RV32IA-TSO-NEXT: ret
+; RV32IA-TSO-NOZACAS-LABEL: atomicrmw_or_i8_acq_rel:
+; RV32IA-TSO-NOZACAS: # %bb.0:
+; RV32IA-TSO-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-TSO-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-NOZACAS-NEXT: zext.b a1, a1
+; RV32IA-TSO-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-TSO-NOZACAS-NEXT: amoor.w a1, a1, (a2)
+; RV32IA-TSO-NOZACAS-NEXT: srl a0, a1, a0
+; RV32IA-TSO-NOZACAS-NEXT: ret
;
; RV64I-LABEL: atomicrmw_or_i8_acq_rel:
; RV64I: # %bb.0:
@@ -4777,6 +6511,26 @@ define i8 @atomicrmw_or_i8_acq_rel(ptr %a, i8 %b) nounwind {
; RV64IA-TSO-NOZACAS-NEXT: srlw a0, a1, a0
; RV64IA-TSO-NOZACAS-NEXT: ret
;
+; RV32IA-WMO-ZACAS-LABEL: atomicrmw_or_i8_acq_rel:
+; RV32IA-WMO-ZACAS: # %bb.0:
+; RV32IA-WMO-ZACAS-NEXT: andi a2, a0, -4
+; RV32IA-WMO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-ZACAS-NEXT: zext.b a1, a1
+; RV32IA-WMO-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-WMO-ZACAS-NEXT: amoor.w.aqrl a1, a1, (a2)
+; RV32IA-WMO-ZACAS-NEXT: srl a0, a1, a0
+; RV32IA-WMO-ZACAS-NEXT: ret
+;
+; RV32IA-TSO-ZACAS-LABEL: atomicrmw_or_i8_acq_rel:
+; RV32IA-TSO-ZACAS: # %bb.0:
+; RV32IA-TSO-ZACAS-NEXT: andi a2, a0, -4
+; RV32IA-TSO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-ZACAS-NEXT: zext.b a1, a1
+; RV32IA-TSO-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-TSO-ZACAS-NEXT: amoor.w a1, a1, (a2)
+; RV32IA-TSO-ZACAS-NEXT: srl a0, a1, a0
+; RV32IA-TSO-ZACAS-NEXT: ret
+;
; RV64IA-WMO-ZACAS-LABEL: atomicrmw_or_i8_acq_rel:
; RV64IA-WMO-ZACAS: # %bb.0:
; RV64IA-WMO-ZACAS-NEXT: andi a2, a0, -4
@@ -4797,6 +6551,16 @@ define i8 @atomicrmw_or_i8_acq_rel(ptr %a, i8 %b) nounwind {
; RV64IA-TSO-ZACAS-NEXT: srlw a0, a1, a0
; RV64IA-TSO-ZACAS-NEXT: ret
;
+; RV32IA-WMO-ZABHA-LABEL: atomicrmw_or_i8_acq_rel:
+; RV32IA-WMO-ZABHA: # %bb.0:
+; RV32IA-WMO-ZABHA-NEXT: amoor.b.aqrl a0, a1, (a0)
+; RV32IA-WMO-ZABHA-NEXT: ret
+;
+; RV32IA-TSO-ZABHA-LABEL: atomicrmw_or_i8_acq_rel:
+; RV32IA-TSO-ZABHA: # %bb.0:
+; RV32IA-TSO-ZABHA-NEXT: amoor.b a0, a1, (a0)
+; RV32IA-TSO-ZABHA-NEXT: ret
+;
; RV64IA-WMO-ZABHA-LABEL: atomicrmw_or_i8_acq_rel:
; RV64IA-WMO-ZABHA: # %bb.0:
; RV64IA-WMO-ZABHA-NEXT: amoor.b.aqrl a0, a1, (a0)
@@ -4821,25 +6585,25 @@ define i8 @atomicrmw_or_i8_seq_cst(ptr %a, i8 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
-; RV32IA-WMO-LABEL: atomicrmw_or_i8_seq_cst:
-; RV32IA-WMO: # %bb.0:
-; RV32IA-WMO-NEXT: andi a2, a0, -4
-; RV32IA-WMO-NEXT: slli a0, a0, 3
-; RV32IA-WMO-NEXT: zext.b a1, a1
-; RV32IA-WMO-NEXT: sll a1, a1, a0
-; RV32IA-WMO-NEXT: amoor.w.aqrl a1, a1, (a2)
-; RV32IA-WMO-NEXT: srl a0, a1, a0
-; RV32IA-WMO-NEXT: ret
+; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_or_i8_seq_cst:
+; RV32IA-WMO-NOZACAS: # %bb.0:
+; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-WMO-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-NOZACAS-NEXT: zext.b a1, a1
+; RV32IA-WMO-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-WMO-NOZACAS-NEXT: amoor.w.aqrl a1, a1, (a2)
+; RV32IA-WMO-NOZACAS-NEXT: srl a0, a1, a0
+; RV32IA-WMO-NOZACAS-NEXT: ret
;
-; RV32IA-TSO-LABEL: atomicrmw_or_i8_seq_cst:
-; RV32IA-TSO: # %bb.0:
-; RV32IA-TSO-NEXT: andi a2, a0, -4
-; RV32IA-TSO-NEXT: slli a0, a0, 3
-; RV32IA-TSO-NEXT: zext.b a1, a1
-; RV32IA-TSO-NEXT: sll a1, a1, a0
-; RV32IA-TSO-NEXT: amoor.w a1, a1, (a2)
-; RV32IA-TSO-NEXT: srl a0, a1, a0
-; RV32IA-TSO-NEXT: ret
+; RV32IA-TSO-NOZACAS-LABEL: atomicrmw_or_i8_seq_cst:
+; RV32IA-TSO-NOZACAS: # %bb.0:
+; RV32IA-TSO-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-TSO-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-NOZACAS-NEXT: zext.b a1, a1
+; RV32IA-TSO-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-TSO-NOZACAS-NEXT: amoor.w a1, a1, (a2)
+; RV32IA-TSO-NOZACAS-NEXT: srl a0, a1, a0
+; RV32IA-TSO-NOZACAS-NEXT: ret
;
; RV64I-LABEL: atomicrmw_or_i8_seq_cst:
; RV64I: # %bb.0:
@@ -4871,6 +6635,26 @@ define i8 @atomicrmw_or_i8_seq_cst(ptr %a, i8 %b) nounwind {
; RV64IA-TSO-NOZACAS-NEXT: srlw a0, a1, a0
; RV64IA-TSO-NOZACAS-NEXT: ret
;
+; RV32IA-WMO-ZACAS-LABEL: atomicrmw_or_i8_seq_cst:
+; RV32IA-WMO-ZACAS: # %bb.0:
+; RV32IA-WMO-ZACAS-NEXT: andi a2, a0, -4
+; RV32IA-WMO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-ZACAS-NEXT: zext.b a1, a1
+; RV32IA-WMO-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-WMO-ZACAS-NEXT: amoor.w.aqrl a1, a1, (a2)
+; RV32IA-WMO-ZACAS-NEXT: srl a0, a1, a0
+; RV32IA-WMO-ZACAS-NEXT: ret
+;
+; RV32IA-TSO-ZACAS-LABEL: atomicrmw_or_i8_seq_cst:
+; RV32IA-TSO-ZACAS: # %bb.0:
+; RV32IA-TSO-ZACAS-NEXT: andi a2, a0, -4
+; RV32IA-TSO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-ZACAS-NEXT: zext.b a1, a1
+; RV32IA-TSO-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-TSO-ZACAS-NEXT: amoor.w a1, a1, (a2)
+; RV32IA-TSO-ZACAS-NEXT: srl a0, a1, a0
+; RV32IA-TSO-ZACAS-NEXT: ret
+;
; RV64IA-WMO-ZACAS-LABEL: atomicrmw_or_i8_seq_cst:
; RV64IA-WMO-ZACAS: # %bb.0:
; RV64IA-WMO-ZACAS-NEXT: andi a2, a0, -4
@@ -4891,6 +6675,16 @@ define i8 @atomicrmw_or_i8_seq_cst(ptr %a, i8 %b) nounwind {
; RV64IA-TSO-ZACAS-NEXT: srlw a0, a1, a0
; RV64IA-TSO-ZACAS-NEXT: ret
;
+; RV32IA-WMO-ZABHA-LABEL: atomicrmw_or_i8_seq_cst:
+; RV32IA-WMO-ZABHA: # %bb.0:
+; RV32IA-WMO-ZABHA-NEXT: amoor.b.aqrl a0, a1, (a0)
+; RV32IA-WMO-ZABHA-NEXT: ret
+;
+; RV32IA-TSO-ZABHA-LABEL: atomicrmw_or_i8_seq_cst:
+; RV32IA-TSO-ZABHA: # %bb.0:
+; RV32IA-TSO-ZABHA-NEXT: amoor.b a0, a1, (a0)
+; RV32IA-TSO-ZABHA-NEXT: ret
+;
; RV64IA-WMO-ZABHA-LABEL: atomicrmw_or_i8_seq_cst:
; RV64IA-WMO-ZABHA: # %bb.0:
; RV64IA-WMO-ZABHA-NEXT: amoor.b.aqrl a0, a1, (a0)
@@ -4915,15 +6709,15 @@ define i8 @atomicrmw_xor_i8_monotonic(ptr %a, i8 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
-; RV32IA-LABEL: atomicrmw_xor_i8_monotonic:
-; RV32IA: # %bb.0:
-; RV32IA-NEXT: andi a2, a0, -4
-; RV32IA-NEXT: slli a0, a0, 3
-; RV32IA-NEXT: zext.b a1, a1
-; RV32IA-NEXT: sll a1, a1, a0
-; RV32IA-NEXT: amoxor.w a1, a1, (a2)
-; RV32IA-NEXT: srl a0, a1, a0
-; RV32IA-NEXT: ret
+; RV32IA-NOZACAS-LABEL: atomicrmw_xor_i8_monotonic:
+; RV32IA-NOZACAS: # %bb.0:
+; RV32IA-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-NOZACAS-NEXT: zext.b a1, a1
+; RV32IA-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-NOZACAS-NEXT: amoxor.w a1, a1, (a2)
+; RV32IA-NOZACAS-NEXT: srl a0, a1, a0
+; RV32IA-NOZACAS-NEXT: ret
;
; RV64I-LABEL: atomicrmw_xor_i8_monotonic:
; RV64I: # %bb.0:
@@ -4945,6 +6739,16 @@ define i8 @atomicrmw_xor_i8_monotonic(ptr %a, i8 %b) nounwind {
; RV64IA-NOZACAS-NEXT: srlw a0, a1, a0
; RV64IA-NOZACAS-NEXT: ret
;
+; RV32IA-ZACAS-LABEL: atomicrmw_xor_i8_monotonic:
+; RV32IA-ZACAS: # %bb.0:
+; RV32IA-ZACAS-NEXT: andi a2, a0, -4
+; RV32IA-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-ZACAS-NEXT: zext.b a1, a1
+; RV32IA-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-ZACAS-NEXT: amoxor.w a1, a1, (a2)
+; RV32IA-ZACAS-NEXT: srl a0, a1, a0
+; RV32IA-ZACAS-NEXT: ret
+;
; RV64IA-ZACAS-LABEL: atomicrmw_xor_i8_monotonic:
; RV64IA-ZACAS: # %bb.0:
; RV64IA-ZACAS-NEXT: andi a2, a0, -4
@@ -4955,6 +6759,16 @@ define i8 @atomicrmw_xor_i8_monotonic(ptr %a, i8 %b) nounwind {
; RV64IA-ZACAS-NEXT: srlw a0, a1, a0
; RV64IA-ZACAS-NEXT: ret
;
+; RV32IA-WMO-ZABHA-LABEL: atomicrmw_xor_i8_monotonic:
+; RV32IA-WMO-ZABHA: # %bb.0:
+; RV32IA-WMO-ZABHA-NEXT: amoxor.b a0, a1, (a0)
+; RV32IA-WMO-ZABHA-NEXT: ret
+;
+; RV32IA-TSO-ZABHA-LABEL: atomicrmw_xor_i8_monotonic:
+; RV32IA-TSO-ZABHA: # %bb.0:
+; RV32IA-TSO-ZABHA-NEXT: amoxor.b a0, a1, (a0)
+; RV32IA-TSO-ZABHA-NEXT: ret
+;
; RV64IA-WMO-ZABHA-LABEL: atomicrmw_xor_i8_monotonic:
; RV64IA-WMO-ZABHA: # %bb.0:
; RV64IA-WMO-ZABHA-NEXT: amoxor.b a0, a1, (a0)
@@ -4979,25 +6793,25 @@ define i8 @atomicrmw_xor_i8_acquire(ptr %a, i8 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
-; RV32IA-WMO-LABEL: atomicrmw_xor_i8_acquire:
-; RV32IA-WMO: # %bb.0:
-; RV32IA-WMO-NEXT: andi a2, a0, -4
-; RV32IA-WMO-NEXT: slli a0, a0, 3
-; RV32IA-WMO-NEXT: zext.b a1, a1
-; RV32IA-WMO-NEXT: sll a1, a1, a0
-; RV32IA-WMO-NEXT: amoxor.w.aq a1, a1, (a2)
-; RV32IA-WMO-NEXT: srl a0, a1, a0
-; RV32IA-WMO-NEXT: ret
+; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_xor_i8_acquire:
+; RV32IA-WMO-NOZACAS: # %bb.0:
+; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-WMO-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-NOZACAS-NEXT: zext.b a1, a1
+; RV32IA-WMO-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-WMO-NOZACAS-NEXT: amoxor.w.aq a1, a1, (a2)
+; RV32IA-WMO-NOZACAS-NEXT: srl a0, a1, a0
+; RV32IA-WMO-NOZACAS-NEXT: ret
;
-; RV32IA-TSO-LABEL: atomicrmw_xor_i8_acquire:
-; RV32IA-TSO: # %bb.0:
-; RV32IA-TSO-NEXT: andi a2, a0, -4
-; RV32IA-TSO-NEXT: slli a0, a0, 3
-; RV32IA-TSO-NEXT: zext.b a1, a1
-; RV32IA-TSO-NEXT: sll a1, a1, a0
-; RV32IA-TSO-NEXT: amoxor.w a1, a1, (a2)
-; RV32IA-TSO-NEXT: srl a0, a1, a0
-; RV32IA-TSO-NEXT: ret
+; RV32IA-TSO-NOZACAS-LABEL: atomicrmw_xor_i8_acquire:
+; RV32IA-TSO-NOZACAS: # %bb.0:
+; RV32IA-TSO-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-TSO-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-NOZACAS-NEXT: zext.b a1, a1
+; RV32IA-TSO-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-TSO-NOZACAS-NEXT: amoxor.w a1, a1, (a2)
+; RV32IA-TSO-NOZACAS-NEXT: srl a0, a1, a0
+; RV32IA-TSO-NOZACAS-NEXT: ret
;
; RV64I-LABEL: atomicrmw_xor_i8_acquire:
; RV64I: # %bb.0:
@@ -5029,6 +6843,26 @@ define i8 @atomicrmw_xor_i8_acquire(ptr %a, i8 %b) nounwind {
; RV64IA-TSO-NOZACAS-NEXT: srlw a0, a1, a0
; RV64IA-TSO-NOZACAS-NEXT: ret
;
+; RV32IA-WMO-ZACAS-LABEL: atomicrmw_xor_i8_acquire:
+; RV32IA-WMO-ZACAS: # %bb.0:
+; RV32IA-WMO-ZACAS-NEXT: andi a2, a0, -4
+; RV32IA-WMO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-ZACAS-NEXT: zext.b a1, a1
+; RV32IA-WMO-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-WMO-ZACAS-NEXT: amoxor.w.aq a1, a1, (a2)
+; RV32IA-WMO-ZACAS-NEXT: srl a0, a1, a0
+; RV32IA-WMO-ZACAS-NEXT: ret
+;
+; RV32IA-TSO-ZACAS-LABEL: atomicrmw_xor_i8_acquire:
+; RV32IA-TSO-ZACAS: # %bb.0:
+; RV32IA-TSO-ZACAS-NEXT: andi a2, a0, -4
+; RV32IA-TSO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-ZACAS-NEXT: zext.b a1, a1
+; RV32IA-TSO-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-TSO-ZACAS-NEXT: amoxor.w a1, a1, (a2)
+; RV32IA-TSO-ZACAS-NEXT: srl a0, a1, a0
+; RV32IA-TSO-ZACAS-NEXT: ret
+;
; RV64IA-WMO-ZACAS-LABEL: atomicrmw_xor_i8_acquire:
; RV64IA-WMO-ZACAS: # %bb.0:
; RV64IA-WMO-ZACAS-NEXT: andi a2, a0, -4
@@ -5049,6 +6883,16 @@ define i8 @atomicrmw_xor_i8_acquire(ptr %a, i8 %b) nounwind {
; RV64IA-TSO-ZACAS-NEXT: srlw a0, a1, a0
; RV64IA-TSO-ZACAS-NEXT: ret
;
+; RV32IA-WMO-ZABHA-LABEL: atomicrmw_xor_i8_acquire:
+; RV32IA-WMO-ZABHA: # %bb.0:
+; RV32IA-WMO-ZABHA-NEXT: amoxor.b.aq a0, a1, (a0)
+; RV32IA-WMO-ZABHA-NEXT: ret
+;
+; RV32IA-TSO-ZABHA-LABEL: atomicrmw_xor_i8_acquire:
+; RV32IA-TSO-ZABHA: # %bb.0:
+; RV32IA-TSO-ZABHA-NEXT: amoxor.b a0, a1, (a0)
+; RV32IA-TSO-ZABHA-NEXT: ret
+;
; RV64IA-WMO-ZABHA-LABEL: atomicrmw_xor_i8_acquire:
; RV64IA-WMO-ZABHA: # %bb.0:
; RV64IA-WMO-ZABHA-NEXT: amoxor.b.aq a0, a1, (a0)
@@ -5073,25 +6917,25 @@ define i8 @atomicrmw_xor_i8_release(ptr %a, i8 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
-; RV32IA-WMO-LABEL: atomicrmw_xor_i8_release:
-; RV32IA-WMO: # %bb.0:
-; RV32IA-WMO-NEXT: andi a2, a0, -4
-; RV32IA-WMO-NEXT: slli a0, a0, 3
-; RV32IA-WMO-NEXT: zext.b a1, a1
-; RV32IA-WMO-NEXT: sll a1, a1, a0
-; RV32IA-WMO-NEXT: amoxor.w.rl a1, a1, (a2)
-; RV32IA-WMO-NEXT: srl a0, a1, a0
-; RV32IA-WMO-NEXT: ret
+; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_xor_i8_release:
+; RV32IA-WMO-NOZACAS: # %bb.0:
+; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-WMO-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-NOZACAS-NEXT: zext.b a1, a1
+; RV32IA-WMO-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-WMO-NOZACAS-NEXT: amoxor.w.rl a1, a1, (a2)
+; RV32IA-WMO-NOZACAS-NEXT: srl a0, a1, a0
+; RV32IA-WMO-NOZACAS-NEXT: ret
;
-; RV32IA-TSO-LABEL: atomicrmw_xor_i8_release:
-; RV32IA-TSO: # %bb.0:
-; RV32IA-TSO-NEXT: andi a2, a0, -4
-; RV32IA-TSO-NEXT: slli a0, a0, 3
-; RV32IA-TSO-NEXT: zext.b a1, a1
-; RV32IA-TSO-NEXT: sll a1, a1, a0
-; RV32IA-TSO-NEXT: amoxor.w a1, a1, (a2)
-; RV32IA-TSO-NEXT: srl a0, a1, a0
-; RV32IA-TSO-NEXT: ret
+; RV32IA-TSO-NOZACAS-LABEL: atomicrmw_xor_i8_release:
+; RV32IA-TSO-NOZACAS: # %bb.0:
+; RV32IA-TSO-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-TSO-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-NOZACAS-NEXT: zext.b a1, a1
+; RV32IA-TSO-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-TSO-NOZACAS-NEXT: amoxor.w a1, a1, (a2)
+; RV32IA-TSO-NOZACAS-NEXT: srl a0, a1, a0
+; RV32IA-TSO-NOZACAS-NEXT: ret
;
; RV64I-LABEL: atomicrmw_xor_i8_release:
; RV64I: # %bb.0:
@@ -5123,6 +6967,26 @@ define i8 @atomicrmw_xor_i8_release(ptr %a, i8 %b) nounwind {
; RV64IA-TSO-NOZACAS-NEXT: srlw a0, a1, a0
; RV64IA-TSO-NOZACAS-NEXT: ret
;
+; RV32IA-WMO-ZACAS-LABEL: atomicrmw_xor_i8_release:
+; RV32IA-WMO-ZACAS: # %bb.0:
+; RV32IA-WMO-ZACAS-NEXT: andi a2, a0, -4
+; RV32IA-WMO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-ZACAS-NEXT: zext.b a1, a1
+; RV32IA-WMO-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-WMO-ZACAS-NEXT: amoxor.w.rl a1, a1, (a2)
+; RV32IA-WMO-ZACAS-NEXT: srl a0, a1, a0
+; RV32IA-WMO-ZACAS-NEXT: ret
+;
+; RV32IA-TSO-ZACAS-LABEL: atomicrmw_xor_i8_release:
+; RV32IA-TSO-ZACAS: # %bb.0:
+; RV32IA-TSO-ZACAS-NEXT: andi a2, a0, -4
+; RV32IA-TSO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-ZACAS-NEXT: zext.b a1, a1
+; RV32IA-TSO-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-TSO-ZACAS-NEXT: amoxor.w a1, a1, (a2)
+; RV32IA-TSO-ZACAS-NEXT: srl a0, a1, a0
+; RV32IA-TSO-ZACAS-NEXT: ret
+;
; RV64IA-WMO-ZACAS-LABEL: atomicrmw_xor_i8_release:
; RV64IA-WMO-ZACAS: # %bb.0:
; RV64IA-WMO-ZACAS-NEXT: andi a2, a0, -4
@@ -5143,6 +7007,16 @@ define i8 @atomicrmw_xor_i8_release(ptr %a, i8 %b) nounwind {
; RV64IA-TSO-ZACAS-NEXT: srlw a0, a1, a0
; RV64IA-TSO-ZACAS-NEXT: ret
;
+; RV32IA-WMO-ZABHA-LABEL: atomicrmw_xor_i8_release:
+; RV32IA-WMO-ZABHA: # %bb.0:
+; RV32IA-WMO-ZABHA-NEXT: amoxor.b.rl a0, a1, (a0)
+; RV32IA-WMO-ZABHA-NEXT: ret
+;
+; RV32IA-TSO-ZABHA-LABEL: atomicrmw_xor_i8_release:
+; RV32IA-TSO-ZABHA: # %bb.0:
+; RV32IA-TSO-ZABHA-NEXT: amoxor.b a0, a1, (a0)
+; RV32IA-TSO-ZABHA-NEXT: ret
+;
; RV64IA-WMO-ZABHA-LABEL: atomicrmw_xor_i8_release:
; RV64IA-WMO-ZABHA: # %bb.0:
; RV64IA-WMO-ZABHA-NEXT: amoxor.b.rl a0, a1, (a0)
@@ -5167,25 +7041,25 @@ define i8 @atomicrmw_xor_i8_acq_rel(ptr %a, i8 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
-; RV32IA-WMO-LABEL: atomicrmw_xor_i8_acq_rel:
-; RV32IA-WMO: # %bb.0:
-; RV32IA-WMO-NEXT: andi a2, a0, -4
-; RV32IA-WMO-NEXT: slli a0, a0, 3
-; RV32IA-WMO-NEXT: zext.b a1, a1
-; RV32IA-WMO-NEXT: sll a1, a1, a0
-; RV32IA-WMO-NEXT: amoxor.w.aqrl a1, a1, (a2)
-; RV32IA-WMO-NEXT: srl a0, a1, a0
-; RV32IA-WMO-NEXT: ret
+; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_xor_i8_acq_rel:
+; RV32IA-WMO-NOZACAS: # %bb.0:
+; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-WMO-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-NOZACAS-NEXT: zext.b a1, a1
+; RV32IA-WMO-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-WMO-NOZACAS-NEXT: amoxor.w.aqrl a1, a1, (a2)
+; RV32IA-WMO-NOZACAS-NEXT: srl a0, a1, a0
+; RV32IA-WMO-NOZACAS-NEXT: ret
;
-; RV32IA-TSO-LABEL: atomicrmw_xor_i8_acq_rel:
-; RV32IA-TSO: # %bb.0:
-; RV32IA-TSO-NEXT: andi a2, a0, -4
-; RV32IA-TSO-NEXT: slli a0, a0, 3
-; RV32IA-TSO-NEXT: zext.b a1, a1
-; RV32IA-TSO-NEXT: sll a1, a1, a0
-; RV32IA-TSO-NEXT: amoxor.w a1, a1, (a2)
-; RV32IA-TSO-NEXT: srl a0, a1, a0
-; RV32IA-TSO-NEXT: ret
+; RV32IA-TSO-NOZACAS-LABEL: atomicrmw_xor_i8_acq_rel:
+; RV32IA-TSO-NOZACAS: # %bb.0:
+; RV32IA-TSO-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-TSO-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-NOZACAS-NEXT: zext.b a1, a1
+; RV32IA-TSO-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-TSO-NOZACAS-NEXT: amoxor.w a1, a1, (a2)
+; RV32IA-TSO-NOZACAS-NEXT: srl a0, a1, a0
+; RV32IA-TSO-NOZACAS-NEXT: ret
;
; RV64I-LABEL: atomicrmw_xor_i8_acq_rel:
; RV64I: # %bb.0:
@@ -5217,6 +7091,26 @@ define i8 @atomicrmw_xor_i8_acq_rel(ptr %a, i8 %b) nounwind {
; RV64IA-TSO-NOZACAS-NEXT: srlw a0, a1, a0
; RV64IA-TSO-NOZACAS-NEXT: ret
;
+; RV32IA-WMO-ZACAS-LABEL: atomicrmw_xor_i8_acq_rel:
+; RV32IA-WMO-ZACAS: # %bb.0:
+; RV32IA-WMO-ZACAS-NEXT: andi a2, a0, -4
+; RV32IA-WMO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-ZACAS-NEXT: zext.b a1, a1
+; RV32IA-WMO-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-WMO-ZACAS-NEXT: amoxor.w.aqrl a1, a1, (a2)
+; RV32IA-WMO-ZACAS-NEXT: srl a0, a1, a0
+; RV32IA-WMO-ZACAS-NEXT: ret
+;
+; RV32IA-TSO-ZACAS-LABEL: atomicrmw_xor_i8_acq_rel:
+; RV32IA-TSO-ZACAS: # %bb.0:
+; RV32IA-TSO-ZACAS-NEXT: andi a2, a0, -4
+; RV32IA-TSO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-ZACAS-NEXT: zext.b a1, a1
+; RV32IA-TSO-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-TSO-ZACAS-NEXT: amoxor.w a1, a1, (a2)
+; RV32IA-TSO-ZACAS-NEXT: srl a0, a1, a0
+; RV32IA-TSO-ZACAS-NEXT: ret
+;
; RV64IA-WMO-ZACAS-LABEL: atomicrmw_xor_i8_acq_rel:
; RV64IA-WMO-ZACAS: # %bb.0:
; RV64IA-WMO-ZACAS-NEXT: andi a2, a0, -4
@@ -5237,6 +7131,16 @@ define i8 @atomicrmw_xor_i8_acq_rel(ptr %a, i8 %b) nounwind {
; RV64IA-TSO-ZACAS-NEXT: srlw a0, a1, a0
; RV64IA-TSO-ZACAS-NEXT: ret
;
+; RV32IA-WMO-ZABHA-LABEL: atomicrmw_xor_i8_acq_rel:
+; RV32IA-WMO-ZABHA: # %bb.0:
+; RV32IA-WMO-ZABHA-NEXT: amoxor.b.aqrl a0, a1, (a0)
+; RV32IA-WMO-ZABHA-NEXT: ret
+;
+; RV32IA-TSO-ZABHA-LABEL: atomicrmw_xor_i8_acq_rel:
+; RV32IA-TSO-ZABHA: # %bb.0:
+; RV32IA-TSO-ZABHA-NEXT: amoxor.b a0, a1, (a0)
+; RV32IA-TSO-ZABHA-NEXT: ret
+;
; RV64IA-WMO-ZABHA-LABEL: atomicrmw_xor_i8_acq_rel:
; RV64IA-WMO-ZABHA: # %bb.0:
; RV64IA-WMO-ZABHA-NEXT: amoxor.b.aqrl a0, a1, (a0)
@@ -5261,25 +7165,25 @@ define i8 @atomicrmw_xor_i8_seq_cst(ptr %a, i8 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
-; RV32IA-WMO-LABEL: atomicrmw_xor_i8_seq_cst:
-; RV32IA-WMO: # %bb.0:
-; RV32IA-WMO-NEXT: andi a2, a0, -4
-; RV32IA-WMO-NEXT: slli a0, a0, 3
-; RV32IA-WMO-NEXT: zext.b a1, a1
-; RV32IA-WMO-NEXT: sll a1, a1, a0
-; RV32IA-WMO-NEXT: amoxor.w.aqrl a1, a1, (a2)
-; RV32IA-WMO-NEXT: srl a0, a1, a0
-; RV32IA-WMO-NEXT: ret
+; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_xor_i8_seq_cst:
+; RV32IA-WMO-NOZACAS: # %bb.0:
+; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-WMO-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-NOZACAS-NEXT: zext.b a1, a1
+; RV32IA-WMO-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-WMO-NOZACAS-NEXT: amoxor.w.aqrl a1, a1, (a2)
+; RV32IA-WMO-NOZACAS-NEXT: srl a0, a1, a0
+; RV32IA-WMO-NOZACAS-NEXT: ret
;
-; RV32IA-TSO-LABEL: atomicrmw_xor_i8_seq_cst:
-; RV32IA-TSO: # %bb.0:
-; RV32IA-TSO-NEXT: andi a2, a0, -4
-; RV32IA-TSO-NEXT: slli a0, a0, 3
-; RV32IA-TSO-NEXT: zext.b a1, a1
-; RV32IA-TSO-NEXT: sll a1, a1, a0
-; RV32IA-TSO-NEXT: amoxor.w a1, a1, (a2)
-; RV32IA-TSO-NEXT: srl a0, a1, a0
-; RV32IA-TSO-NEXT: ret
+; RV32IA-TSO-NOZACAS-LABEL: atomicrmw_xor_i8_seq_cst:
+; RV32IA-TSO-NOZACAS: # %bb.0:
+; RV32IA-TSO-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-TSO-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-NOZACAS-NEXT: zext.b a1, a1
+; RV32IA-TSO-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-TSO-NOZACAS-NEXT: amoxor.w a1, a1, (a2)
+; RV32IA-TSO-NOZACAS-NEXT: srl a0, a1, a0
+; RV32IA-TSO-NOZACAS-NEXT: ret
;
; RV64I-LABEL: atomicrmw_xor_i8_seq_cst:
; RV64I: # %bb.0:
@@ -5311,6 +7215,26 @@ define i8 @atomicrmw_xor_i8_seq_cst(ptr %a, i8 %b) nounwind {
; RV64IA-TSO-NOZACAS-NEXT: srlw a0, a1, a0
; RV64IA-TSO-NOZACAS-NEXT: ret
;
+; RV32IA-WMO-ZACAS-LABEL: atomicrmw_xor_i8_seq_cst:
+; RV32IA-WMO-ZACAS: # %bb.0:
+; RV32IA-WMO-ZACAS-NEXT: andi a2, a0, -4
+; RV32IA-WMO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-ZACAS-NEXT: zext.b a1, a1
+; RV32IA-WMO-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-WMO-ZACAS-NEXT: amoxor.w.aqrl a1, a1, (a2)
+; RV32IA-WMO-ZACAS-NEXT: srl a0, a1, a0
+; RV32IA-WMO-ZACAS-NEXT: ret
+;
+; RV32IA-TSO-ZACAS-LABEL: atomicrmw_xor_i8_seq_cst:
+; RV32IA-TSO-ZACAS: # %bb.0:
+; RV32IA-TSO-ZACAS-NEXT: andi a2, a0, -4
+; RV32IA-TSO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-ZACAS-NEXT: zext.b a1, a1
+; RV32IA-TSO-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-TSO-ZACAS-NEXT: amoxor.w a1, a1, (a2)
+; RV32IA-TSO-ZACAS-NEXT: srl a0, a1, a0
+; RV32IA-TSO-ZACAS-NEXT: ret
+;
; RV64IA-WMO-ZACAS-LABEL: atomicrmw_xor_i8_seq_cst:
; RV64IA-WMO-ZACAS: # %bb.0:
; RV64IA-WMO-ZACAS-NEXT: andi a2, a0, -4
@@ -5331,6 +7255,16 @@ define i8 @atomicrmw_xor_i8_seq_cst(ptr %a, i8 %b) nounwind {
; RV64IA-TSO-ZACAS-NEXT: srlw a0, a1, a0
; RV64IA-TSO-ZACAS-NEXT: ret
;
+; RV32IA-WMO-ZABHA-LABEL: atomicrmw_xor_i8_seq_cst:
+; RV32IA-WMO-ZABHA: # %bb.0:
+; RV32IA-WMO-ZABHA-NEXT: amoxor.b.aqrl a0, a1, (a0)
+; RV32IA-WMO-ZABHA-NEXT: ret
+;
+; RV32IA-TSO-ZABHA-LABEL: atomicrmw_xor_i8_seq_cst:
+; RV32IA-TSO-ZABHA: # %bb.0:
+; RV32IA-TSO-ZABHA-NEXT: amoxor.b a0, a1, (a0)
+; RV32IA-TSO-ZABHA-NEXT: ret
+;
; RV64IA-WMO-ZABHA-LABEL: atomicrmw_xor_i8_seq_cst:
; RV64IA-WMO-ZABHA: # %bb.0:
; RV64IA-WMO-ZABHA-NEXT: amoxor.b.aqrl a0, a1, (a0)
@@ -5387,34 +7321,34 @@ define i8 @atomicrmw_max_i8_monotonic(ptr %a, i8 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 32
; RV32I-NEXT: ret
;
-; RV32IA-LABEL: atomicrmw_max_i8_monotonic:
-; RV32IA: # %bb.0:
-; RV32IA-NEXT: andi a2, a0, -4
-; RV32IA-NEXT: slli a0, a0, 3
-; RV32IA-NEXT: li a3, 255
-; RV32IA-NEXT: slli a1, a1, 24
-; RV32IA-NEXT: andi a4, a0, 24
-; RV32IA-NEXT: sll a3, a3, a0
-; RV32IA-NEXT: srai a1, a1, 24
-; RV32IA-NEXT: sll a1, a1, a0
-; RV32IA-NEXT: xori a4, a4, 24
-; RV32IA-NEXT: .LBB45_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-NEXT: lr.w a5, (a2)
-; RV32IA-NEXT: and a7, a5, a3
-; RV32IA-NEXT: mv a6, a5
-; RV32IA-NEXT: sll a7, a7, a4
-; RV32IA-NEXT: sra a7, a7, a4
-; RV32IA-NEXT: bge a7, a1, .LBB45_3
-; RV32IA-NEXT: # %bb.2: # in Loop: Header=BB45_1 Depth=1
-; RV32IA-NEXT: xor a6, a5, a1
-; RV32IA-NEXT: and a6, a6, a3
-; RV32IA-NEXT: xor a6, a5, a6
-; RV32IA-NEXT: .LBB45_3: # in Loop: Header=BB45_1 Depth=1
-; RV32IA-NEXT: sc.w a6, a6, (a2)
-; RV32IA-NEXT: bnez a6, .LBB45_1
-; RV32IA-NEXT: # %bb.4:
-; RV32IA-NEXT: srl a0, a5, a0
-; RV32IA-NEXT: ret
+; RV32IA-NOZACAS-LABEL: atomicrmw_max_i8_monotonic:
+; RV32IA-NOZACAS: # %bb.0:
+; RV32IA-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-NOZACAS-NEXT: li a3, 255
+; RV32IA-NOZACAS-NEXT: slli a1, a1, 24
+; RV32IA-NOZACAS-NEXT: andi a4, a0, 24
+; RV32IA-NOZACAS-NEXT: sll a3, a3, a0
+; RV32IA-NOZACAS-NEXT: srai a1, a1, 24
+; RV32IA-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-NOZACAS-NEXT: xori a4, a4, 24
+; RV32IA-NOZACAS-NEXT: .LBB45_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-NOZACAS-NEXT: lr.w a5, (a2)
+; RV32IA-NOZACAS-NEXT: and a7, a5, a3
+; RV32IA-NOZACAS-NEXT: mv a6, a5
+; RV32IA-NOZACAS-NEXT: sll a7, a7, a4
+; RV32IA-NOZACAS-NEXT: sra a7, a7, a4
+; RV32IA-NOZACAS-NEXT: bge a7, a1, .LBB45_3
+; RV32IA-NOZACAS-NEXT: # %bb.2: # in Loop: Header=BB45_1 Depth=1
+; RV32IA-NOZACAS-NEXT: xor a6, a5, a1
+; RV32IA-NOZACAS-NEXT: and a6, a6, a3
+; RV32IA-NOZACAS-NEXT: xor a6, a5, a6
+; RV32IA-NOZACAS-NEXT: .LBB45_3: # in Loop: Header=BB45_1 Depth=1
+; RV32IA-NOZACAS-NEXT: sc.w a6, a6, (a2)
+; RV32IA-NOZACAS-NEXT: bnez a6, .LBB45_1
+; RV32IA-NOZACAS-NEXT: # %bb.4:
+; RV32IA-NOZACAS-NEXT: srl a0, a5, a0
+; RV32IA-NOZACAS-NEXT: ret
;
; RV64I-LABEL: atomicrmw_max_i8_monotonic:
; RV64I: # %bb.0:
@@ -5487,6 +7421,35 @@ define i8 @atomicrmw_max_i8_monotonic(ptr %a, i8 %b) nounwind {
; RV64IA-NOZACAS-NEXT: srlw a0, a5, a0
; RV64IA-NOZACAS-NEXT: ret
;
+; RV32IA-ZACAS-LABEL: atomicrmw_max_i8_monotonic:
+; RV32IA-ZACAS: # %bb.0:
+; RV32IA-ZACAS-NEXT: andi a2, a0, -4
+; RV32IA-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-ZACAS-NEXT: li a3, 255
+; RV32IA-ZACAS-NEXT: slli a1, a1, 24
+; RV32IA-ZACAS-NEXT: andi a4, a0, 24
+; RV32IA-ZACAS-NEXT: sll a3, a3, a0
+; RV32IA-ZACAS-NEXT: srai a1, a1, 24
+; RV32IA-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-ZACAS-NEXT: xori a4, a4, 24
+; RV32IA-ZACAS-NEXT: .LBB45_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-ZACAS-NEXT: lr.w a5, (a2)
+; RV32IA-ZACAS-NEXT: and a7, a5, a3
+; RV32IA-ZACAS-NEXT: mv a6, a5
+; RV32IA-ZACAS-NEXT: sll a7, a7, a4
+; RV32IA-ZACAS-NEXT: sra a7, a7, a4
+; RV32IA-ZACAS-NEXT: bge a7, a1, .LBB45_3
+; RV32IA-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB45_1 Depth=1
+; RV32IA-ZACAS-NEXT: xor a6, a5, a1
+; RV32IA-ZACAS-NEXT: and a6, a6, a3
+; RV32IA-ZACAS-NEXT: xor a6, a5, a6
+; RV32IA-ZACAS-NEXT: .LBB45_3: # in Loop: Header=BB45_1 Depth=1
+; RV32IA-ZACAS-NEXT: sc.w a6, a6, (a2)
+; RV32IA-ZACAS-NEXT: bnez a6, .LBB45_1
+; RV32IA-ZACAS-NEXT: # %bb.4:
+; RV32IA-ZACAS-NEXT: srl a0, a5, a0
+; RV32IA-ZACAS-NEXT: ret
+;
; RV64IA-ZACAS-LABEL: atomicrmw_max_i8_monotonic:
; RV64IA-ZACAS: # %bb.0:
; RV64IA-ZACAS-NEXT: andi a2, a0, -4
@@ -5516,6 +7479,16 @@ define i8 @atomicrmw_max_i8_monotonic(ptr %a, i8 %b) nounwind {
; RV64IA-ZACAS-NEXT: srlw a0, a5, a0
; RV64IA-ZACAS-NEXT: ret
;
+; RV32IA-WMO-ZABHA-LABEL: atomicrmw_max_i8_monotonic:
+; RV32IA-WMO-ZABHA: # %bb.0:
+; RV32IA-WMO-ZABHA-NEXT: amomax.b a0, a1, (a0)
+; RV32IA-WMO-ZABHA-NEXT: ret
+;
+; RV32IA-TSO-ZABHA-LABEL: atomicrmw_max_i8_monotonic:
+; RV32IA-TSO-ZABHA: # %bb.0:
+; RV32IA-TSO-ZABHA-NEXT: amomax.b a0, a1, (a0)
+; RV32IA-TSO-ZABHA-NEXT: ret
+;
; RV64IA-WMO-ZABHA-LABEL: atomicrmw_max_i8_monotonic:
; RV64IA-WMO-ZABHA: # %bb.0:
; RV64IA-WMO-ZABHA-NEXT: amomax.b a0, a1, (a0)
@@ -5572,63 +7545,63 @@ define i8 @atomicrmw_max_i8_acquire(ptr %a, i8 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 32
; RV32I-NEXT: ret
;
-; RV32IA-WMO-LABEL: atomicrmw_max_i8_acquire:
-; RV32IA-WMO: # %bb.0:
-; RV32IA-WMO-NEXT: andi a2, a0, -4
-; RV32IA-WMO-NEXT: slli a0, a0, 3
-; RV32IA-WMO-NEXT: li a3, 255
-; RV32IA-WMO-NEXT: slli a1, a1, 24
-; RV32IA-WMO-NEXT: andi a4, a0, 24
-; RV32IA-WMO-NEXT: sll a3, a3, a0
-; RV32IA-WMO-NEXT: srai a1, a1, 24
-; RV32IA-WMO-NEXT: sll a1, a1, a0
-; RV32IA-WMO-NEXT: xori a4, a4, 24
-; RV32IA-WMO-NEXT: .LBB46_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-WMO-NEXT: lr.w.aq a5, (a2)
-; RV32IA-WMO-NEXT: and a7, a5, a3
-; RV32IA-WMO-NEXT: mv a6, a5
-; RV32IA-WMO-NEXT: sll a7, a7, a4
-; RV32IA-WMO-NEXT: sra a7, a7, a4
-; RV32IA-WMO-NEXT: bge a7, a1, .LBB46_3
-; RV32IA-WMO-NEXT: # %bb.2: # in Loop: Header=BB46_1 Depth=1
-; RV32IA-WMO-NEXT: xor a6, a5, a1
-; RV32IA-WMO-NEXT: and a6, a6, a3
-; RV32IA-WMO-NEXT: xor a6, a5, a6
-; RV32IA-WMO-NEXT: .LBB46_3: # in Loop: Header=BB46_1 Depth=1
-; RV32IA-WMO-NEXT: sc.w a6, a6, (a2)
-; RV32IA-WMO-NEXT: bnez a6, .LBB46_1
-; RV32IA-WMO-NEXT: # %bb.4:
-; RV32IA-WMO-NEXT: srl a0, a5, a0
-; RV32IA-WMO-NEXT: ret
+; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_max_i8_acquire:
+; RV32IA-WMO-NOZACAS: # %bb.0:
+; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-WMO-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-NOZACAS-NEXT: li a3, 255
+; RV32IA-WMO-NOZACAS-NEXT: slli a1, a1, 24
+; RV32IA-WMO-NOZACAS-NEXT: andi a4, a0, 24
+; RV32IA-WMO-NOZACAS-NEXT: sll a3, a3, a0
+; RV32IA-WMO-NOZACAS-NEXT: srai a1, a1, 24
+; RV32IA-WMO-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-WMO-NOZACAS-NEXT: xori a4, a4, 24
+; RV32IA-WMO-NOZACAS-NEXT: .LBB46_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-NOZACAS-NEXT: lr.w.aq a5, (a2)
+; RV32IA-WMO-NOZACAS-NEXT: and a7, a5, a3
+; RV32IA-WMO-NOZACAS-NEXT: mv a6, a5
+; RV32IA-WMO-NOZACAS-NEXT: sll a7, a7, a4
+; RV32IA-WMO-NOZACAS-NEXT: sra a7, a7, a4
+; RV32IA-WMO-NOZACAS-NEXT: bge a7, a1, .LBB46_3
+; RV32IA-WMO-NOZACAS-NEXT: # %bb.2: # in Loop: Header=BB46_1 Depth=1
+; RV32IA-WMO-NOZACAS-NEXT: xor a6, a5, a1
+; RV32IA-WMO-NOZACAS-NEXT: and a6, a6, a3
+; RV32IA-WMO-NOZACAS-NEXT: xor a6, a5, a6
+; RV32IA-WMO-NOZACAS-NEXT: .LBB46_3: # in Loop: Header=BB46_1 Depth=1
+; RV32IA-WMO-NOZACAS-NEXT: sc.w a6, a6, (a2)
+; RV32IA-WMO-NOZACAS-NEXT: bnez a6, .LBB46_1
+; RV32IA-WMO-NOZACAS-NEXT: # %bb.4:
+; RV32IA-WMO-NOZACAS-NEXT: srl a0, a5, a0
+; RV32IA-WMO-NOZACAS-NEXT: ret
;
-; RV32IA-TSO-LABEL: atomicrmw_max_i8_acquire:
-; RV32IA-TSO: # %bb.0:
-; RV32IA-TSO-NEXT: andi a2, a0, -4
-; RV32IA-TSO-NEXT: slli a0, a0, 3
-; RV32IA-TSO-NEXT: li a3, 255
-; RV32IA-TSO-NEXT: slli a1, a1, 24
-; RV32IA-TSO-NEXT: andi a4, a0, 24
-; RV32IA-TSO-NEXT: sll a3, a3, a0
-; RV32IA-TSO-NEXT: srai a1, a1, 24
-; RV32IA-TSO-NEXT: sll a1, a1, a0
-; RV32IA-TSO-NEXT: xori a4, a4, 24
-; RV32IA-TSO-NEXT: .LBB46_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-TSO-NEXT: lr.w a5, (a2)
-; RV32IA-TSO-NEXT: and a7, a5, a3
-; RV32IA-TSO-NEXT: mv a6, a5
-; RV32IA-TSO-NEXT: sll a7, a7, a4
-; RV32IA-TSO-NEXT: sra a7, a7, a4
-; RV32IA-TSO-NEXT: bge a7, a1, .LBB46_3
-; RV32IA-TSO-NEXT: # %bb.2: # in Loop: Header=BB46_1 Depth=1
-; RV32IA-TSO-NEXT: xor a6, a5, a1
-; RV32IA-TSO-NEXT: and a6, a6, a3
-; RV32IA-TSO-NEXT: xor a6, a5, a6
-; RV32IA-TSO-NEXT: .LBB46_3: # in Loop: Header=BB46_1 Depth=1
-; RV32IA-TSO-NEXT: sc.w a6, a6, (a2)
-; RV32IA-TSO-NEXT: bnez a6, .LBB46_1
-; RV32IA-TSO-NEXT: # %bb.4:
-; RV32IA-TSO-NEXT: srl a0, a5, a0
-; RV32IA-TSO-NEXT: ret
+; RV32IA-TSO-NOZACAS-LABEL: atomicrmw_max_i8_acquire:
+; RV32IA-TSO-NOZACAS: # %bb.0:
+; RV32IA-TSO-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-TSO-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-NOZACAS-NEXT: li a3, 255
+; RV32IA-TSO-NOZACAS-NEXT: slli a1, a1, 24
+; RV32IA-TSO-NOZACAS-NEXT: andi a4, a0, 24
+; RV32IA-TSO-NOZACAS-NEXT: sll a3, a3, a0
+; RV32IA-TSO-NOZACAS-NEXT: srai a1, a1, 24
+; RV32IA-TSO-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-TSO-NOZACAS-NEXT: xori a4, a4, 24
+; RV32IA-TSO-NOZACAS-NEXT: .LBB46_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-NOZACAS-NEXT: lr.w a5, (a2)
+; RV32IA-TSO-NOZACAS-NEXT: and a7, a5, a3
+; RV32IA-TSO-NOZACAS-NEXT: mv a6, a5
+; RV32IA-TSO-NOZACAS-NEXT: sll a7, a7, a4
+; RV32IA-TSO-NOZACAS-NEXT: sra a7, a7, a4
+; RV32IA-TSO-NOZACAS-NEXT: bge a7, a1, .LBB46_3
+; RV32IA-TSO-NOZACAS-NEXT: # %bb.2: # in Loop: Header=BB46_1 Depth=1
+; RV32IA-TSO-NOZACAS-NEXT: xor a6, a5, a1
+; RV32IA-TSO-NOZACAS-NEXT: and a6, a6, a3
+; RV32IA-TSO-NOZACAS-NEXT: xor a6, a5, a6
+; RV32IA-TSO-NOZACAS-NEXT: .LBB46_3: # in Loop: Header=BB46_1 Depth=1
+; RV32IA-TSO-NOZACAS-NEXT: sc.w a6, a6, (a2)
+; RV32IA-TSO-NOZACAS-NEXT: bnez a6, .LBB46_1
+; RV32IA-TSO-NOZACAS-NEXT: # %bb.4:
+; RV32IA-TSO-NOZACAS-NEXT: srl a0, a5, a0
+; RV32IA-TSO-NOZACAS-NEXT: ret
;
; RV64I-LABEL: atomicrmw_max_i8_acquire:
; RV64I: # %bb.0:
@@ -5730,6 +7703,64 @@ define i8 @atomicrmw_max_i8_acquire(ptr %a, i8 %b) nounwind {
; RV64IA-TSO-NOZACAS-NEXT: srlw a0, a5, a0
; RV64IA-TSO-NOZACAS-NEXT: ret
;
+; RV32IA-WMO-ZACAS-LABEL: atomicrmw_max_i8_acquire:
+; RV32IA-WMO-ZACAS: # %bb.0:
+; RV32IA-WMO-ZACAS-NEXT: andi a2, a0, -4
+; RV32IA-WMO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-ZACAS-NEXT: li a3, 255
+; RV32IA-WMO-ZACAS-NEXT: slli a1, a1, 24
+; RV32IA-WMO-ZACAS-NEXT: andi a4, a0, 24
+; RV32IA-WMO-ZACAS-NEXT: sll a3, a3, a0
+; RV32IA-WMO-ZACAS-NEXT: srai a1, a1, 24
+; RV32IA-WMO-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-WMO-ZACAS-NEXT: xori a4, a4, 24
+; RV32IA-WMO-ZACAS-NEXT: .LBB46_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-ZACAS-NEXT: lr.w.aq a5, (a2)
+; RV32IA-WMO-ZACAS-NEXT: and a7, a5, a3
+; RV32IA-WMO-ZACAS-NEXT: mv a6, a5
+; RV32IA-WMO-ZACAS-NEXT: sll a7, a7, a4
+; RV32IA-WMO-ZACAS-NEXT: sra a7, a7, a4
+; RV32IA-WMO-ZACAS-NEXT: bge a7, a1, .LBB46_3
+; RV32IA-WMO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB46_1 Depth=1
+; RV32IA-WMO-ZACAS-NEXT: xor a6, a5, a1
+; RV32IA-WMO-ZACAS-NEXT: and a6, a6, a3
+; RV32IA-WMO-ZACAS-NEXT: xor a6, a5, a6
+; RV32IA-WMO-ZACAS-NEXT: .LBB46_3: # in Loop: Header=BB46_1 Depth=1
+; RV32IA-WMO-ZACAS-NEXT: sc.w a6, a6, (a2)
+; RV32IA-WMO-ZACAS-NEXT: bnez a6, .LBB46_1
+; RV32IA-WMO-ZACAS-NEXT: # %bb.4:
+; RV32IA-WMO-ZACAS-NEXT: srl a0, a5, a0
+; RV32IA-WMO-ZACAS-NEXT: ret
+;
+; RV32IA-TSO-ZACAS-LABEL: atomicrmw_max_i8_acquire:
+; RV32IA-TSO-ZACAS: # %bb.0:
+; RV32IA-TSO-ZACAS-NEXT: andi a2, a0, -4
+; RV32IA-TSO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-ZACAS-NEXT: li a3, 255
+; RV32IA-TSO-ZACAS-NEXT: slli a1, a1, 24
+; RV32IA-TSO-ZACAS-NEXT: andi a4, a0, 24
+; RV32IA-TSO-ZACAS-NEXT: sll a3, a3, a0
+; RV32IA-TSO-ZACAS-NEXT: srai a1, a1, 24
+; RV32IA-TSO-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-TSO-ZACAS-NEXT: xori a4, a4, 24
+; RV32IA-TSO-ZACAS-NEXT: .LBB46_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-ZACAS-NEXT: lr.w a5, (a2)
+; RV32IA-TSO-ZACAS-NEXT: and a7, a5, a3
+; RV32IA-TSO-ZACAS-NEXT: mv a6, a5
+; RV32IA-TSO-ZACAS-NEXT: sll a7, a7, a4
+; RV32IA-TSO-ZACAS-NEXT: sra a7, a7, a4
+; RV32IA-TSO-ZACAS-NEXT: bge a7, a1, .LBB46_3
+; RV32IA-TSO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB46_1 Depth=1
+; RV32IA-TSO-ZACAS-NEXT: xor a6, a5, a1
+; RV32IA-TSO-ZACAS-NEXT: and a6, a6, a3
+; RV32IA-TSO-ZACAS-NEXT: xor a6, a5, a6
+; RV32IA-TSO-ZACAS-NEXT: .LBB46_3: # in Loop: Header=BB46_1 Depth=1
+; RV32IA-TSO-ZACAS-NEXT: sc.w a6, a6, (a2)
+; RV32IA-TSO-ZACAS-NEXT: bnez a6, .LBB46_1
+; RV32IA-TSO-ZACAS-NEXT: # %bb.4:
+; RV32IA-TSO-ZACAS-NEXT: srl a0, a5, a0
+; RV32IA-TSO-ZACAS-NEXT: ret
+;
; RV64IA-WMO-ZACAS-LABEL: atomicrmw_max_i8_acquire:
; RV64IA-WMO-ZACAS: # %bb.0:
; RV64IA-WMO-ZACAS-NEXT: andi a2, a0, -4
@@ -5788,6 +7819,16 @@ define i8 @atomicrmw_max_i8_acquire(ptr %a, i8 %b) nounwind {
; RV64IA-TSO-ZACAS-NEXT: srlw a0, a5, a0
; RV64IA-TSO-ZACAS-NEXT: ret
;
+; RV32IA-WMO-ZABHA-LABEL: atomicrmw_max_i8_acquire:
+; RV32IA-WMO-ZABHA: # %bb.0:
+; RV32IA-WMO-ZABHA-NEXT: amomax.b.aq a0, a1, (a0)
+; RV32IA-WMO-ZABHA-NEXT: ret
+;
+; RV32IA-TSO-ZABHA-LABEL: atomicrmw_max_i8_acquire:
+; RV32IA-TSO-ZABHA: # %bb.0:
+; RV32IA-TSO-ZABHA-NEXT: amomax.b a0, a1, (a0)
+; RV32IA-TSO-ZABHA-NEXT: ret
+;
; RV64IA-WMO-ZABHA-LABEL: atomicrmw_max_i8_acquire:
; RV64IA-WMO-ZABHA: # %bb.0:
; RV64IA-WMO-ZABHA-NEXT: amomax.b.aq a0, a1, (a0)
@@ -5844,63 +7885,63 @@ define i8 @atomicrmw_max_i8_release(ptr %a, i8 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 32
; RV32I-NEXT: ret
;
-; RV32IA-WMO-LABEL: atomicrmw_max_i8_release:
-; RV32IA-WMO: # %bb.0:
-; RV32IA-WMO-NEXT: andi a2, a0, -4
-; RV32IA-WMO-NEXT: slli a0, a0, 3
-; RV32IA-WMO-NEXT: li a3, 255
-; RV32IA-WMO-NEXT: slli a1, a1, 24
-; RV32IA-WMO-NEXT: andi a4, a0, 24
-; RV32IA-WMO-NEXT: sll a3, a3, a0
-; RV32IA-WMO-NEXT: srai a1, a1, 24
-; RV32IA-WMO-NEXT: sll a1, a1, a0
-; RV32IA-WMO-NEXT: xori a4, a4, 24
-; RV32IA-WMO-NEXT: .LBB47_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-WMO-NEXT: lr.w a5, (a2)
-; RV32IA-WMO-NEXT: and a7, a5, a3
-; RV32IA-WMO-NEXT: mv a6, a5
-; RV32IA-WMO-NEXT: sll a7, a7, a4
-; RV32IA-WMO-NEXT: sra a7, a7, a4
-; RV32IA-WMO-NEXT: bge a7, a1, .LBB47_3
-; RV32IA-WMO-NEXT: # %bb.2: # in Loop: Header=BB47_1 Depth=1
-; RV32IA-WMO-NEXT: xor a6, a5, a1
-; RV32IA-WMO-NEXT: and a6, a6, a3
-; RV32IA-WMO-NEXT: xor a6, a5, a6
-; RV32IA-WMO-NEXT: .LBB47_3: # in Loop: Header=BB47_1 Depth=1
-; RV32IA-WMO-NEXT: sc.w.rl a6, a6, (a2)
-; RV32IA-WMO-NEXT: bnez a6, .LBB47_1
-; RV32IA-WMO-NEXT: # %bb.4:
-; RV32IA-WMO-NEXT: srl a0, a5, a0
-; RV32IA-WMO-NEXT: ret
+; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_max_i8_release:
+; RV32IA-WMO-NOZACAS: # %bb.0:
+; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-WMO-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-NOZACAS-NEXT: li a3, 255
+; RV32IA-WMO-NOZACAS-NEXT: slli a1, a1, 24
+; RV32IA-WMO-NOZACAS-NEXT: andi a4, a0, 24
+; RV32IA-WMO-NOZACAS-NEXT: sll a3, a3, a0
+; RV32IA-WMO-NOZACAS-NEXT: srai a1, a1, 24
+; RV32IA-WMO-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-WMO-NOZACAS-NEXT: xori a4, a4, 24
+; RV32IA-WMO-NOZACAS-NEXT: .LBB47_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-NOZACAS-NEXT: lr.w a5, (a2)
+; RV32IA-WMO-NOZACAS-NEXT: and a7, a5, a3
+; RV32IA-WMO-NOZACAS-NEXT: mv a6, a5
+; RV32IA-WMO-NOZACAS-NEXT: sll a7, a7, a4
+; RV32IA-WMO-NOZACAS-NEXT: sra a7, a7, a4
+; RV32IA-WMO-NOZACAS-NEXT: bge a7, a1, .LBB47_3
+; RV32IA-WMO-NOZACAS-NEXT: # %bb.2: # in Loop: Header=BB47_1 Depth=1
+; RV32IA-WMO-NOZACAS-NEXT: xor a6, a5, a1
+; RV32IA-WMO-NOZACAS-NEXT: and a6, a6, a3
+; RV32IA-WMO-NOZACAS-NEXT: xor a6, a5, a6
+; RV32IA-WMO-NOZACAS-NEXT: .LBB47_3: # in Loop: Header=BB47_1 Depth=1
+; RV32IA-WMO-NOZACAS-NEXT: sc.w.rl a6, a6, (a2)
+; RV32IA-WMO-NOZACAS-NEXT: bnez a6, .LBB47_1
+; RV32IA-WMO-NOZACAS-NEXT: # %bb.4:
+; RV32IA-WMO-NOZACAS-NEXT: srl a0, a5, a0
+; RV32IA-WMO-NOZACAS-NEXT: ret
;
-; RV32IA-TSO-LABEL: atomicrmw_max_i8_release:
-; RV32IA-TSO: # %bb.0:
-; RV32IA-TSO-NEXT: andi a2, a0, -4
-; RV32IA-TSO-NEXT: slli a0, a0, 3
-; RV32IA-TSO-NEXT: li a3, 255
-; RV32IA-TSO-NEXT: slli a1, a1, 24
-; RV32IA-TSO-NEXT: andi a4, a0, 24
-; RV32IA-TSO-NEXT: sll a3, a3, a0
-; RV32IA-TSO-NEXT: srai a1, a1, 24
-; RV32IA-TSO-NEXT: sll a1, a1, a0
-; RV32IA-TSO-NEXT: xori a4, a4, 24
-; RV32IA-TSO-NEXT: .LBB47_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-TSO-NEXT: lr.w a5, (a2)
-; RV32IA-TSO-NEXT: and a7, a5, a3
-; RV32IA-TSO-NEXT: mv a6, a5
-; RV32IA-TSO-NEXT: sll a7, a7, a4
-; RV32IA-TSO-NEXT: sra a7, a7, a4
-; RV32IA-TSO-NEXT: bge a7, a1, .LBB47_3
-; RV32IA-TSO-NEXT: # %bb.2: # in Loop: Header=BB47_1 Depth=1
-; RV32IA-TSO-NEXT: xor a6, a5, a1
-; RV32IA-TSO-NEXT: and a6, a6, a3
-; RV32IA-TSO-NEXT: xor a6, a5, a6
-; RV32IA-TSO-NEXT: .LBB47_3: # in Loop: Header=BB47_1 Depth=1
-; RV32IA-TSO-NEXT: sc.w a6, a6, (a2)
-; RV32IA-TSO-NEXT: bnez a6, .LBB47_1
-; RV32IA-TSO-NEXT: # %bb.4:
-; RV32IA-TSO-NEXT: srl a0, a5, a0
-; RV32IA-TSO-NEXT: ret
+; RV32IA-TSO-NOZACAS-LABEL: atomicrmw_max_i8_release:
+; RV32IA-TSO-NOZACAS: # %bb.0:
+; RV32IA-TSO-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-TSO-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-NOZACAS-NEXT: li a3, 255
+; RV32IA-TSO-NOZACAS-NEXT: slli a1, a1, 24
+; RV32IA-TSO-NOZACAS-NEXT: andi a4, a0, 24
+; RV32IA-TSO-NOZACAS-NEXT: sll a3, a3, a0
+; RV32IA-TSO-NOZACAS-NEXT: srai a1, a1, 24
+; RV32IA-TSO-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-TSO-NOZACAS-NEXT: xori a4, a4, 24
+; RV32IA-TSO-NOZACAS-NEXT: .LBB47_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-NOZACAS-NEXT: lr.w a5, (a2)
+; RV32IA-TSO-NOZACAS-NEXT: and a7, a5, a3
+; RV32IA-TSO-NOZACAS-NEXT: mv a6, a5
+; RV32IA-TSO-NOZACAS-NEXT: sll a7, a7, a4
+; RV32IA-TSO-NOZACAS-NEXT: sra a7, a7, a4
+; RV32IA-TSO-NOZACAS-NEXT: bge a7, a1, .LBB47_3
+; RV32IA-TSO-NOZACAS-NEXT: # %bb.2: # in Loop: Header=BB47_1 Depth=1
+; RV32IA-TSO-NOZACAS-NEXT: xor a6, a5, a1
+; RV32IA-TSO-NOZACAS-NEXT: and a6, a6, a3
+; RV32IA-TSO-NOZACAS-NEXT: xor a6, a5, a6
+; RV32IA-TSO-NOZACAS-NEXT: .LBB47_3: # in Loop: Header=BB47_1 Depth=1
+; RV32IA-TSO-NOZACAS-NEXT: sc.w a6, a6, (a2)
+; RV32IA-TSO-NOZACAS-NEXT: bnez a6, .LBB47_1
+; RV32IA-TSO-NOZACAS-NEXT: # %bb.4:
+; RV32IA-TSO-NOZACAS-NEXT: srl a0, a5, a0
+; RV32IA-TSO-NOZACAS-NEXT: ret
;
; RV64I-LABEL: atomicrmw_max_i8_release:
; RV64I: # %bb.0:
@@ -6002,6 +8043,64 @@ define i8 @atomicrmw_max_i8_release(ptr %a, i8 %b) nounwind {
; RV64IA-TSO-NOZACAS-NEXT: srlw a0, a5, a0
; RV64IA-TSO-NOZACAS-NEXT: ret
;
+; RV32IA-WMO-ZACAS-LABEL: atomicrmw_max_i8_release:
+; RV32IA-WMO-ZACAS: # %bb.0:
+; RV32IA-WMO-ZACAS-NEXT: andi a2, a0, -4
+; RV32IA-WMO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-ZACAS-NEXT: li a3, 255
+; RV32IA-WMO-ZACAS-NEXT: slli a1, a1, 24
+; RV32IA-WMO-ZACAS-NEXT: andi a4, a0, 24
+; RV32IA-WMO-ZACAS-NEXT: sll a3, a3, a0
+; RV32IA-WMO-ZACAS-NEXT: srai a1, a1, 24
+; RV32IA-WMO-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-WMO-ZACAS-NEXT: xori a4, a4, 24
+; RV32IA-WMO-ZACAS-NEXT: .LBB47_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-ZACAS-NEXT: lr.w a5, (a2)
+; RV32IA-WMO-ZACAS-NEXT: and a7, a5, a3
+; RV32IA-WMO-ZACAS-NEXT: mv a6, a5
+; RV32IA-WMO-ZACAS-NEXT: sll a7, a7, a4
+; RV32IA-WMO-ZACAS-NEXT: sra a7, a7, a4
+; RV32IA-WMO-ZACAS-NEXT: bge a7, a1, .LBB47_3
+; RV32IA-WMO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB47_1 Depth=1
+; RV32IA-WMO-ZACAS-NEXT: xor a6, a5, a1
+; RV32IA-WMO-ZACAS-NEXT: and a6, a6, a3
+; RV32IA-WMO-ZACAS-NEXT: xor a6, a5, a6
+; RV32IA-WMO-ZACAS-NEXT: .LBB47_3: # in Loop: Header=BB47_1 Depth=1
+; RV32IA-WMO-ZACAS-NEXT: sc.w.rl a6, a6, (a2)
+; RV32IA-WMO-ZACAS-NEXT: bnez a6, .LBB47_1
+; RV32IA-WMO-ZACAS-NEXT: # %bb.4:
+; RV32IA-WMO-ZACAS-NEXT: srl a0, a5, a0
+; RV32IA-WMO-ZACAS-NEXT: ret
+;
+; RV32IA-TSO-ZACAS-LABEL: atomicrmw_max_i8_release:
+; RV32IA-TSO-ZACAS: # %bb.0:
+; RV32IA-TSO-ZACAS-NEXT: andi a2, a0, -4
+; RV32IA-TSO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-ZACAS-NEXT: li a3, 255
+; RV32IA-TSO-ZACAS-NEXT: slli a1, a1, 24
+; RV32IA-TSO-ZACAS-NEXT: andi a4, a0, 24
+; RV32IA-TSO-ZACAS-NEXT: sll a3, a3, a0
+; RV32IA-TSO-ZACAS-NEXT: srai a1, a1, 24
+; RV32IA-TSO-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-TSO-ZACAS-NEXT: xori a4, a4, 24
+; RV32IA-TSO-ZACAS-NEXT: .LBB47_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-ZACAS-NEXT: lr.w a5, (a2)
+; RV32IA-TSO-ZACAS-NEXT: and a7, a5, a3
+; RV32IA-TSO-ZACAS-NEXT: mv a6, a5
+; RV32IA-TSO-ZACAS-NEXT: sll a7, a7, a4
+; RV32IA-TSO-ZACAS-NEXT: sra a7, a7, a4
+; RV32IA-TSO-ZACAS-NEXT: bge a7, a1, .LBB47_3
+; RV32IA-TSO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB47_1 Depth=1
+; RV32IA-TSO-ZACAS-NEXT: xor a6, a5, a1
+; RV32IA-TSO-ZACAS-NEXT: and a6, a6, a3
+; RV32IA-TSO-ZACAS-NEXT: xor a6, a5, a6
+; RV32IA-TSO-ZACAS-NEXT: .LBB47_3: # in Loop: Header=BB47_1 Depth=1
+; RV32IA-TSO-ZACAS-NEXT: sc.w a6, a6, (a2)
+; RV32IA-TSO-ZACAS-NEXT: bnez a6, .LBB47_1
+; RV32IA-TSO-ZACAS-NEXT: # %bb.4:
+; RV32IA-TSO-ZACAS-NEXT: srl a0, a5, a0
+; RV32IA-TSO-ZACAS-NEXT: ret
+;
; RV64IA-WMO-ZACAS-LABEL: atomicrmw_max_i8_release:
; RV64IA-WMO-ZACAS: # %bb.0:
; RV64IA-WMO-ZACAS-NEXT: andi a2, a0, -4
@@ -6060,6 +8159,16 @@ define i8 @atomicrmw_max_i8_release(ptr %a, i8 %b) nounwind {
; RV64IA-TSO-ZACAS-NEXT: srlw a0, a5, a0
; RV64IA-TSO-ZACAS-NEXT: ret
;
+; RV32IA-WMO-ZABHA-LABEL: atomicrmw_max_i8_release:
+; RV32IA-WMO-ZABHA: # %bb.0:
+; RV32IA-WMO-ZABHA-NEXT: amomax.b.rl a0, a1, (a0)
+; RV32IA-WMO-ZABHA-NEXT: ret
+;
+; RV32IA-TSO-ZABHA-LABEL: atomicrmw_max_i8_release:
+; RV32IA-TSO-ZABHA: # %bb.0:
+; RV32IA-TSO-ZABHA-NEXT: amomax.b a0, a1, (a0)
+; RV32IA-TSO-ZABHA-NEXT: ret
+;
; RV64IA-WMO-ZABHA-LABEL: atomicrmw_max_i8_release:
; RV64IA-WMO-ZABHA: # %bb.0:
; RV64IA-WMO-ZABHA-NEXT: amomax.b.rl a0, a1, (a0)
@@ -6116,63 +8225,63 @@ define i8 @atomicrmw_max_i8_acq_rel(ptr %a, i8 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 32
; RV32I-NEXT: ret
;
-; RV32IA-WMO-LABEL: atomicrmw_max_i8_acq_rel:
-; RV32IA-WMO: # %bb.0:
-; RV32IA-WMO-NEXT: andi a2, a0, -4
-; RV32IA-WMO-NEXT: slli a0, a0, 3
-; RV32IA-WMO-NEXT: li a3, 255
-; RV32IA-WMO-NEXT: slli a1, a1, 24
-; RV32IA-WMO-NEXT: andi a4, a0, 24
-; RV32IA-WMO-NEXT: sll a3, a3, a0
-; RV32IA-WMO-NEXT: srai a1, a1, 24
-; RV32IA-WMO-NEXT: sll a1, a1, a0
-; RV32IA-WMO-NEXT: xori a4, a4, 24
-; RV32IA-WMO-NEXT: .LBB48_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-WMO-NEXT: lr.w.aq a5, (a2)
-; RV32IA-WMO-NEXT: and a7, a5, a3
-; RV32IA-WMO-NEXT: mv a6, a5
-; RV32IA-WMO-NEXT: sll a7, a7, a4
-; RV32IA-WMO-NEXT: sra a7, a7, a4
-; RV32IA-WMO-NEXT: bge a7, a1, .LBB48_3
-; RV32IA-WMO-NEXT: # %bb.2: # in Loop: Header=BB48_1 Depth=1
-; RV32IA-WMO-NEXT: xor a6, a5, a1
-; RV32IA-WMO-NEXT: and a6, a6, a3
-; RV32IA-WMO-NEXT: xor a6, a5, a6
-; RV32IA-WMO-NEXT: .LBB48_3: # in Loop: Header=BB48_1 Depth=1
-; RV32IA-WMO-NEXT: sc.w.rl a6, a6, (a2)
-; RV32IA-WMO-NEXT: bnez a6, .LBB48_1
-; RV32IA-WMO-NEXT: # %bb.4:
-; RV32IA-WMO-NEXT: srl a0, a5, a0
-; RV32IA-WMO-NEXT: ret
+; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_max_i8_acq_rel:
+; RV32IA-WMO-NOZACAS: # %bb.0:
+; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-WMO-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-NOZACAS-NEXT: li a3, 255
+; RV32IA-WMO-NOZACAS-NEXT: slli a1, a1, 24
+; RV32IA-WMO-NOZACAS-NEXT: andi a4, a0, 24
+; RV32IA-WMO-NOZACAS-NEXT: sll a3, a3, a0
+; RV32IA-WMO-NOZACAS-NEXT: srai a1, a1, 24
+; RV32IA-WMO-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-WMO-NOZACAS-NEXT: xori a4, a4, 24
+; RV32IA-WMO-NOZACAS-NEXT: .LBB48_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-NOZACAS-NEXT: lr.w.aq a5, (a2)
+; RV32IA-WMO-NOZACAS-NEXT: and a7, a5, a3
+; RV32IA-WMO-NOZACAS-NEXT: mv a6, a5
+; RV32IA-WMO-NOZACAS-NEXT: sll a7, a7, a4
+; RV32IA-WMO-NOZACAS-NEXT: sra a7, a7, a4
+; RV32IA-WMO-NOZACAS-NEXT: bge a7, a1, .LBB48_3
+; RV32IA-WMO-NOZACAS-NEXT: # %bb.2: # in Loop: Header=BB48_1 Depth=1
+; RV32IA-WMO-NOZACAS-NEXT: xor a6, a5, a1
+; RV32IA-WMO-NOZACAS-NEXT: and a6, a6, a3
+; RV32IA-WMO-NOZACAS-NEXT: xor a6, a5, a6
+; RV32IA-WMO-NOZACAS-NEXT: .LBB48_3: # in Loop: Header=BB48_1 Depth=1
+; RV32IA-WMO-NOZACAS-NEXT: sc.w.rl a6, a6, (a2)
+; RV32IA-WMO-NOZACAS-NEXT: bnez a6, .LBB48_1
+; RV32IA-WMO-NOZACAS-NEXT: # %bb.4:
+; RV32IA-WMO-NOZACAS-NEXT: srl a0, a5, a0
+; RV32IA-WMO-NOZACAS-NEXT: ret
;
-; RV32IA-TSO-LABEL: atomicrmw_max_i8_acq_rel:
-; RV32IA-TSO: # %bb.0:
-; RV32IA-TSO-NEXT: andi a2, a0, -4
-; RV32IA-TSO-NEXT: slli a0, a0, 3
-; RV32IA-TSO-NEXT: li a3, 255
-; RV32IA-TSO-NEXT: slli a1, a1, 24
-; RV32IA-TSO-NEXT: andi a4, a0, 24
-; RV32IA-TSO-NEXT: sll a3, a3, a0
-; RV32IA-TSO-NEXT: srai a1, a1, 24
-; RV32IA-TSO-NEXT: sll a1, a1, a0
-; RV32IA-TSO-NEXT: xori a4, a4, 24
-; RV32IA-TSO-NEXT: .LBB48_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-TSO-NEXT: lr.w a5, (a2)
-; RV32IA-TSO-NEXT: and a7, a5, a3
-; RV32IA-TSO-NEXT: mv a6, a5
-; RV32IA-TSO-NEXT: sll a7, a7, a4
-; RV32IA-TSO-NEXT: sra a7, a7, a4
-; RV32IA-TSO-NEXT: bge a7, a1, .LBB48_3
-; RV32IA-TSO-NEXT: # %bb.2: # in Loop: Header=BB48_1 Depth=1
-; RV32IA-TSO-NEXT: xor a6, a5, a1
-; RV32IA-TSO-NEXT: and a6, a6, a3
-; RV32IA-TSO-NEXT: xor a6, a5, a6
-; RV32IA-TSO-NEXT: .LBB48_3: # in Loop: Header=BB48_1 Depth=1
-; RV32IA-TSO-NEXT: sc.w a6, a6, (a2)
-; RV32IA-TSO-NEXT: bnez a6, .LBB48_1
-; RV32IA-TSO-NEXT: # %bb.4:
-; RV32IA-TSO-NEXT: srl a0, a5, a0
-; RV32IA-TSO-NEXT: ret
+; RV32IA-TSO-NOZACAS-LABEL: atomicrmw_max_i8_acq_rel:
+; RV32IA-TSO-NOZACAS: # %bb.0:
+; RV32IA-TSO-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-TSO-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-NOZACAS-NEXT: li a3, 255
+; RV32IA-TSO-NOZACAS-NEXT: slli a1, a1, 24
+; RV32IA-TSO-NOZACAS-NEXT: andi a4, a0, 24
+; RV32IA-TSO-NOZACAS-NEXT: sll a3, a3, a0
+; RV32IA-TSO-NOZACAS-NEXT: srai a1, a1, 24
+; RV32IA-TSO-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-TSO-NOZACAS-NEXT: xori a4, a4, 24
+; RV32IA-TSO-NOZACAS-NEXT: .LBB48_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-NOZACAS-NEXT: lr.w a5, (a2)
+; RV32IA-TSO-NOZACAS-NEXT: and a7, a5, a3
+; RV32IA-TSO-NOZACAS-NEXT: mv a6, a5
+; RV32IA-TSO-NOZACAS-NEXT: sll a7, a7, a4
+; RV32IA-TSO-NOZACAS-NEXT: sra a7, a7, a4
+; RV32IA-TSO-NOZACAS-NEXT: bge a7, a1, .LBB48_3
+; RV32IA-TSO-NOZACAS-NEXT: # %bb.2: # in Loop: Header=BB48_1 Depth=1
+; RV32IA-TSO-NOZACAS-NEXT: xor a6, a5, a1
+; RV32IA-TSO-NOZACAS-NEXT: and a6, a6, a3
+; RV32IA-TSO-NOZACAS-NEXT: xor a6, a5, a6
+; RV32IA-TSO-NOZACAS-NEXT: .LBB48_3: # in Loop: Header=BB48_1 Depth=1
+; RV32IA-TSO-NOZACAS-NEXT: sc.w a6, a6, (a2)
+; RV32IA-TSO-NOZACAS-NEXT: bnez a6, .LBB48_1
+; RV32IA-TSO-NOZACAS-NEXT: # %bb.4:
+; RV32IA-TSO-NOZACAS-NEXT: srl a0, a5, a0
+; RV32IA-TSO-NOZACAS-NEXT: ret
;
; RV64I-LABEL: atomicrmw_max_i8_acq_rel:
; RV64I: # %bb.0:
@@ -6274,6 +8383,64 @@ define i8 @atomicrmw_max_i8_acq_rel(ptr %a, i8 %b) nounwind {
; RV64IA-TSO-NOZACAS-NEXT: srlw a0, a5, a0
; RV64IA-TSO-NOZACAS-NEXT: ret
;
+; RV32IA-WMO-ZACAS-LABEL: atomicrmw_max_i8_acq_rel:
+; RV32IA-WMO-ZACAS: # %bb.0:
+; RV32IA-WMO-ZACAS-NEXT: andi a2, a0, -4
+; RV32IA-WMO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-ZACAS-NEXT: li a3, 255
+; RV32IA-WMO-ZACAS-NEXT: slli a1, a1, 24
+; RV32IA-WMO-ZACAS-NEXT: andi a4, a0, 24
+; RV32IA-WMO-ZACAS-NEXT: sll a3, a3, a0
+; RV32IA-WMO-ZACAS-NEXT: srai a1, a1, 24
+; RV32IA-WMO-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-WMO-ZACAS-NEXT: xori a4, a4, 24
+; RV32IA-WMO-ZACAS-NEXT: .LBB48_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-ZACAS-NEXT: lr.w.aq a5, (a2)
+; RV32IA-WMO-ZACAS-NEXT: and a7, a5, a3
+; RV32IA-WMO-ZACAS-NEXT: mv a6, a5
+; RV32IA-WMO-ZACAS-NEXT: sll a7, a7, a4
+; RV32IA-WMO-ZACAS-NEXT: sra a7, a7, a4
+; RV32IA-WMO-ZACAS-NEXT: bge a7, a1, .LBB48_3
+; RV32IA-WMO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB48_1 Depth=1
+; RV32IA-WMO-ZACAS-NEXT: xor a6, a5, a1
+; RV32IA-WMO-ZACAS-NEXT: and a6, a6, a3
+; RV32IA-WMO-ZACAS-NEXT: xor a6, a5, a6
+; RV32IA-WMO-ZACAS-NEXT: .LBB48_3: # in Loop: Header=BB48_1 Depth=1
+; RV32IA-WMO-ZACAS-NEXT: sc.w.rl a6, a6, (a2)
+; RV32IA-WMO-ZACAS-NEXT: bnez a6, .LBB48_1
+; RV32IA-WMO-ZACAS-NEXT: # %bb.4:
+; RV32IA-WMO-ZACAS-NEXT: srl a0, a5, a0
+; RV32IA-WMO-ZACAS-NEXT: ret
+;
+; RV32IA-TSO-ZACAS-LABEL: atomicrmw_max_i8_acq_rel:
+; RV32IA-TSO-ZACAS: # %bb.0:
+; RV32IA-TSO-ZACAS-NEXT: andi a2, a0, -4
+; RV32IA-TSO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-ZACAS-NEXT: li a3, 255
+; RV32IA-TSO-ZACAS-NEXT: slli a1, a1, 24
+; RV32IA-TSO-ZACAS-NEXT: andi a4, a0, 24
+; RV32IA-TSO-ZACAS-NEXT: sll a3, a3, a0
+; RV32IA-TSO-ZACAS-NEXT: srai a1, a1, 24
+; RV32IA-TSO-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-TSO-ZACAS-NEXT: xori a4, a4, 24
+; RV32IA-TSO-ZACAS-NEXT: .LBB48_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-ZACAS-NEXT: lr.w a5, (a2)
+; RV32IA-TSO-ZACAS-NEXT: and a7, a5, a3
+; RV32IA-TSO-ZACAS-NEXT: mv a6, a5
+; RV32IA-TSO-ZACAS-NEXT: sll a7, a7, a4
+; RV32IA-TSO-ZACAS-NEXT: sra a7, a7, a4
+; RV32IA-TSO-ZACAS-NEXT: bge a7, a1, .LBB48_3
+; RV32IA-TSO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB48_1 Depth=1
+; RV32IA-TSO-ZACAS-NEXT: xor a6, a5, a1
+; RV32IA-TSO-ZACAS-NEXT: and a6, a6, a3
+; RV32IA-TSO-ZACAS-NEXT: xor a6, a5, a6
+; RV32IA-TSO-ZACAS-NEXT: .LBB48_3: # in Loop: Header=BB48_1 Depth=1
+; RV32IA-TSO-ZACAS-NEXT: sc.w a6, a6, (a2)
+; RV32IA-TSO-ZACAS-NEXT: bnez a6, .LBB48_1
+; RV32IA-TSO-ZACAS-NEXT: # %bb.4:
+; RV32IA-TSO-ZACAS-NEXT: srl a0, a5, a0
+; RV32IA-TSO-ZACAS-NEXT: ret
+;
; RV64IA-WMO-ZACAS-LABEL: atomicrmw_max_i8_acq_rel:
; RV64IA-WMO-ZACAS: # %bb.0:
; RV64IA-WMO-ZACAS-NEXT: andi a2, a0, -4
@@ -6332,6 +8499,16 @@ define i8 @atomicrmw_max_i8_acq_rel(ptr %a, i8 %b) nounwind {
; RV64IA-TSO-ZACAS-NEXT: srlw a0, a5, a0
; RV64IA-TSO-ZACAS-NEXT: ret
;
+; RV32IA-WMO-ZABHA-LABEL: atomicrmw_max_i8_acq_rel:
+; RV32IA-WMO-ZABHA: # %bb.0:
+; RV32IA-WMO-ZABHA-NEXT: amomax.b.aqrl a0, a1, (a0)
+; RV32IA-WMO-ZABHA-NEXT: ret
+;
+; RV32IA-TSO-ZABHA-LABEL: atomicrmw_max_i8_acq_rel:
+; RV32IA-TSO-ZABHA: # %bb.0:
+; RV32IA-TSO-ZABHA-NEXT: amomax.b a0, a1, (a0)
+; RV32IA-TSO-ZABHA-NEXT: ret
+;
; RV64IA-WMO-ZABHA-LABEL: atomicrmw_max_i8_acq_rel:
; RV64IA-WMO-ZABHA: # %bb.0:
; RV64IA-WMO-ZABHA-NEXT: amomax.b.aqrl a0, a1, (a0)
@@ -6388,34 +8565,34 @@ define i8 @atomicrmw_max_i8_seq_cst(ptr %a, i8 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 32
; RV32I-NEXT: ret
;
-; RV32IA-LABEL: atomicrmw_max_i8_seq_cst:
-; RV32IA: # %bb.0:
-; RV32IA-NEXT: andi a2, a0, -4
-; RV32IA-NEXT: slli a0, a0, 3
-; RV32IA-NEXT: li a3, 255
-; RV32IA-NEXT: slli a1, a1, 24
-; RV32IA-NEXT: andi a4, a0, 24
-; RV32IA-NEXT: sll a3, a3, a0
-; RV32IA-NEXT: srai a1, a1, 24
-; RV32IA-NEXT: sll a1, a1, a0
-; RV32IA-NEXT: xori a4, a4, 24
-; RV32IA-NEXT: .LBB49_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-NEXT: lr.w.aqrl a5, (a2)
-; RV32IA-NEXT: and a7, a5, a3
-; RV32IA-NEXT: mv a6, a5
-; RV32IA-NEXT: sll a7, a7, a4
-; RV32IA-NEXT: sra a7, a7, a4
-; RV32IA-NEXT: bge a7, a1, .LBB49_3
-; RV32IA-NEXT: # %bb.2: # in Loop: Header=BB49_1 Depth=1
-; RV32IA-NEXT: xor a6, a5, a1
-; RV32IA-NEXT: and a6, a6, a3
-; RV32IA-NEXT: xor a6, a5, a6
-; RV32IA-NEXT: .LBB49_3: # in Loop: Header=BB49_1 Depth=1
-; RV32IA-NEXT: sc.w.rl a6, a6, (a2)
-; RV32IA-NEXT: bnez a6, .LBB49_1
-; RV32IA-NEXT: # %bb.4:
-; RV32IA-NEXT: srl a0, a5, a0
-; RV32IA-NEXT: ret
+; RV32IA-NOZACAS-LABEL: atomicrmw_max_i8_seq_cst:
+; RV32IA-NOZACAS: # %bb.0:
+; RV32IA-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-NOZACAS-NEXT: li a3, 255
+; RV32IA-NOZACAS-NEXT: slli a1, a1, 24
+; RV32IA-NOZACAS-NEXT: andi a4, a0, 24
+; RV32IA-NOZACAS-NEXT: sll a3, a3, a0
+; RV32IA-NOZACAS-NEXT: srai a1, a1, 24
+; RV32IA-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-NOZACAS-NEXT: xori a4, a4, 24
+; RV32IA-NOZACAS-NEXT: .LBB49_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-NOZACAS-NEXT: lr.w.aqrl a5, (a2)
+; RV32IA-NOZACAS-NEXT: and a7, a5, a3
+; RV32IA-NOZACAS-NEXT: mv a6, a5
+; RV32IA-NOZACAS-NEXT: sll a7, a7, a4
+; RV32IA-NOZACAS-NEXT: sra a7, a7, a4
+; RV32IA-NOZACAS-NEXT: bge a7, a1, .LBB49_3
+; RV32IA-NOZACAS-NEXT: # %bb.2: # in Loop: Header=BB49_1 Depth=1
+; RV32IA-NOZACAS-NEXT: xor a6, a5, a1
+; RV32IA-NOZACAS-NEXT: and a6, a6, a3
+; RV32IA-NOZACAS-NEXT: xor a6, a5, a6
+; RV32IA-NOZACAS-NEXT: .LBB49_3: # in Loop: Header=BB49_1 Depth=1
+; RV32IA-NOZACAS-NEXT: sc.w.rl a6, a6, (a2)
+; RV32IA-NOZACAS-NEXT: bnez a6, .LBB49_1
+; RV32IA-NOZACAS-NEXT: # %bb.4:
+; RV32IA-NOZACAS-NEXT: srl a0, a5, a0
+; RV32IA-NOZACAS-NEXT: ret
;
; RV64I-LABEL: atomicrmw_max_i8_seq_cst:
; RV64I: # %bb.0:
@@ -6488,6 +8665,35 @@ define i8 @atomicrmw_max_i8_seq_cst(ptr %a, i8 %b) nounwind {
; RV64IA-NOZACAS-NEXT: srlw a0, a5, a0
; RV64IA-NOZACAS-NEXT: ret
;
+; RV32IA-ZACAS-LABEL: atomicrmw_max_i8_seq_cst:
+; RV32IA-ZACAS: # %bb.0:
+; RV32IA-ZACAS-NEXT: andi a2, a0, -4
+; RV32IA-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-ZACAS-NEXT: li a3, 255
+; RV32IA-ZACAS-NEXT: slli a1, a1, 24
+; RV32IA-ZACAS-NEXT: andi a4, a0, 24
+; RV32IA-ZACAS-NEXT: sll a3, a3, a0
+; RV32IA-ZACAS-NEXT: srai a1, a1, 24
+; RV32IA-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-ZACAS-NEXT: xori a4, a4, 24
+; RV32IA-ZACAS-NEXT: .LBB49_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-ZACAS-NEXT: lr.w.aqrl a5, (a2)
+; RV32IA-ZACAS-NEXT: and a7, a5, a3
+; RV32IA-ZACAS-NEXT: mv a6, a5
+; RV32IA-ZACAS-NEXT: sll a7, a7, a4
+; RV32IA-ZACAS-NEXT: sra a7, a7, a4
+; RV32IA-ZACAS-NEXT: bge a7, a1, .LBB49_3
+; RV32IA-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB49_1 Depth=1
+; RV32IA-ZACAS-NEXT: xor a6, a5, a1
+; RV32IA-ZACAS-NEXT: and a6, a6, a3
+; RV32IA-ZACAS-NEXT: xor a6, a5, a6
+; RV32IA-ZACAS-NEXT: .LBB49_3: # in Loop: Header=BB49_1 Depth=1
+; RV32IA-ZACAS-NEXT: sc.w.rl a6, a6, (a2)
+; RV32IA-ZACAS-NEXT: bnez a6, .LBB49_1
+; RV32IA-ZACAS-NEXT: # %bb.4:
+; RV32IA-ZACAS-NEXT: srl a0, a5, a0
+; RV32IA-ZACAS-NEXT: ret
+;
; RV64IA-ZACAS-LABEL: atomicrmw_max_i8_seq_cst:
; RV64IA-ZACAS: # %bb.0:
; RV64IA-ZACAS-NEXT: andi a2, a0, -4
@@ -6517,6 +8723,16 @@ define i8 @atomicrmw_max_i8_seq_cst(ptr %a, i8 %b) nounwind {
; RV64IA-ZACAS-NEXT: srlw a0, a5, a0
; RV64IA-ZACAS-NEXT: ret
;
+; RV32IA-WMO-ZABHA-LABEL: atomicrmw_max_i8_seq_cst:
+; RV32IA-WMO-ZABHA: # %bb.0:
+; RV32IA-WMO-ZABHA-NEXT: amomax.b.aqrl a0, a1, (a0)
+; RV32IA-WMO-ZABHA-NEXT: ret
+;
+; RV32IA-TSO-ZABHA-LABEL: atomicrmw_max_i8_seq_cst:
+; RV32IA-TSO-ZABHA: # %bb.0:
+; RV32IA-TSO-ZABHA-NEXT: amomax.b a0, a1, (a0)
+; RV32IA-TSO-ZABHA-NEXT: ret
+;
; RV64IA-WMO-ZABHA-LABEL: atomicrmw_max_i8_seq_cst:
; RV64IA-WMO-ZABHA: # %bb.0:
; RV64IA-WMO-ZABHA-NEXT: amomax.b.aqrl a0, a1, (a0)
@@ -6573,34 +8789,34 @@ define i8 @atomicrmw_min_i8_monotonic(ptr %a, i8 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 32
; RV32I-NEXT: ret
;
-; RV32IA-LABEL: atomicrmw_min_i8_monotonic:
-; RV32IA: # %bb.0:
-; RV32IA-NEXT: andi a2, a0, -4
-; RV32IA-NEXT: slli a0, a0, 3
-; RV32IA-NEXT: li a3, 255
-; RV32IA-NEXT: slli a1, a1, 24
-; RV32IA-NEXT: andi a4, a0, 24
-; RV32IA-NEXT: sll a3, a3, a0
-; RV32IA-NEXT: srai a1, a1, 24
-; RV32IA-NEXT: sll a1, a1, a0
-; RV32IA-NEXT: xori a4, a4, 24
-; RV32IA-NEXT: .LBB50_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-NEXT: lr.w a5, (a2)
-; RV32IA-NEXT: and a7, a5, a3
-; RV32IA-NEXT: mv a6, a5
-; RV32IA-NEXT: sll a7, a7, a4
-; RV32IA-NEXT: sra a7, a7, a4
-; RV32IA-NEXT: bge a1, a7, .LBB50_3
-; RV32IA-NEXT: # %bb.2: # in Loop: Header=BB50_1 Depth=1
-; RV32IA-NEXT: xor a6, a5, a1
-; RV32IA-NEXT: and a6, a6, a3
-; RV32IA-NEXT: xor a6, a5, a6
-; RV32IA-NEXT: .LBB50_3: # in Loop: Header=BB50_1 Depth=1
-; RV32IA-NEXT: sc.w a6, a6, (a2)
-; RV32IA-NEXT: bnez a6, .LBB50_1
-; RV32IA-NEXT: # %bb.4:
-; RV32IA-NEXT: srl a0, a5, a0
-; RV32IA-NEXT: ret
+; RV32IA-NOZACAS-LABEL: atomicrmw_min_i8_monotonic:
+; RV32IA-NOZACAS: # %bb.0:
+; RV32IA-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-NOZACAS-NEXT: li a3, 255
+; RV32IA-NOZACAS-NEXT: slli a1, a1, 24
+; RV32IA-NOZACAS-NEXT: andi a4, a0, 24
+; RV32IA-NOZACAS-NEXT: sll a3, a3, a0
+; RV32IA-NOZACAS-NEXT: srai a1, a1, 24
+; RV32IA-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-NOZACAS-NEXT: xori a4, a4, 24
+; RV32IA-NOZACAS-NEXT: .LBB50_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-NOZACAS-NEXT: lr.w a5, (a2)
+; RV32IA-NOZACAS-NEXT: and a7, a5, a3
+; RV32IA-NOZACAS-NEXT: mv a6, a5
+; RV32IA-NOZACAS-NEXT: sll a7, a7, a4
+; RV32IA-NOZACAS-NEXT: sra a7, a7, a4
+; RV32IA-NOZACAS-NEXT: bge a1, a7, .LBB50_3
+; RV32IA-NOZACAS-NEXT: # %bb.2: # in Loop: Header=BB50_1 Depth=1
+; RV32IA-NOZACAS-NEXT: xor a6, a5, a1
+; RV32IA-NOZACAS-NEXT: and a6, a6, a3
+; RV32IA-NOZACAS-NEXT: xor a6, a5, a6
+; RV32IA-NOZACAS-NEXT: .LBB50_3: # in Loop: Header=BB50_1 Depth=1
+; RV32IA-NOZACAS-NEXT: sc.w a6, a6, (a2)
+; RV32IA-NOZACAS-NEXT: bnez a6, .LBB50_1
+; RV32IA-NOZACAS-NEXT: # %bb.4:
+; RV32IA-NOZACAS-NEXT: srl a0, a5, a0
+; RV32IA-NOZACAS-NEXT: ret
;
; RV64I-LABEL: atomicrmw_min_i8_monotonic:
; RV64I: # %bb.0:
@@ -6673,6 +8889,35 @@ define i8 @atomicrmw_min_i8_monotonic(ptr %a, i8 %b) nounwind {
; RV64IA-NOZACAS-NEXT: srlw a0, a5, a0
; RV64IA-NOZACAS-NEXT: ret
;
+; RV32IA-ZACAS-LABEL: atomicrmw_min_i8_monotonic:
+; RV32IA-ZACAS: # %bb.0:
+; RV32IA-ZACAS-NEXT: andi a2, a0, -4
+; RV32IA-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-ZACAS-NEXT: li a3, 255
+; RV32IA-ZACAS-NEXT: slli a1, a1, 24
+; RV32IA-ZACAS-NEXT: andi a4, a0, 24
+; RV32IA-ZACAS-NEXT: sll a3, a3, a0
+; RV32IA-ZACAS-NEXT: srai a1, a1, 24
+; RV32IA-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-ZACAS-NEXT: xori a4, a4, 24
+; RV32IA-ZACAS-NEXT: .LBB50_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-ZACAS-NEXT: lr.w a5, (a2)
+; RV32IA-ZACAS-NEXT: and a7, a5, a3
+; RV32IA-ZACAS-NEXT: mv a6, a5
+; RV32IA-ZACAS-NEXT: sll a7, a7, a4
+; RV32IA-ZACAS-NEXT: sra a7, a7, a4
+; RV32IA-ZACAS-NEXT: bge a1, a7, .LBB50_3
+; RV32IA-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB50_1 Depth=1
+; RV32IA-ZACAS-NEXT: xor a6, a5, a1
+; RV32IA-ZACAS-NEXT: and a6, a6, a3
+; RV32IA-ZACAS-NEXT: xor a6, a5, a6
+; RV32IA-ZACAS-NEXT: .LBB50_3: # in Loop: Header=BB50_1 Depth=1
+; RV32IA-ZACAS-NEXT: sc.w a6, a6, (a2)
+; RV32IA-ZACAS-NEXT: bnez a6, .LBB50_1
+; RV32IA-ZACAS-NEXT: # %bb.4:
+; RV32IA-ZACAS-NEXT: srl a0, a5, a0
+; RV32IA-ZACAS-NEXT: ret
+;
; RV64IA-ZACAS-LABEL: atomicrmw_min_i8_monotonic:
; RV64IA-ZACAS: # %bb.0:
; RV64IA-ZACAS-NEXT: andi a2, a0, -4
@@ -6702,6 +8947,16 @@ define i8 @atomicrmw_min_i8_monotonic(ptr %a, i8 %b) nounwind {
; RV64IA-ZACAS-NEXT: srlw a0, a5, a0
; RV64IA-ZACAS-NEXT: ret
;
+; RV32IA-WMO-ZABHA-LABEL: atomicrmw_min_i8_monotonic:
+; RV32IA-WMO-ZABHA: # %bb.0:
+; RV32IA-WMO-ZABHA-NEXT: amomin.b a0, a1, (a0)
+; RV32IA-WMO-ZABHA-NEXT: ret
+;
+; RV32IA-TSO-ZABHA-LABEL: atomicrmw_min_i8_monotonic:
+; RV32IA-TSO-ZABHA: # %bb.0:
+; RV32IA-TSO-ZABHA-NEXT: amomin.b a0, a1, (a0)
+; RV32IA-TSO-ZABHA-NEXT: ret
+;
; RV64IA-WMO-ZABHA-LABEL: atomicrmw_min_i8_monotonic:
; RV64IA-WMO-ZABHA: # %bb.0:
; RV64IA-WMO-ZABHA-NEXT: amomin.b a0, a1, (a0)
@@ -6758,63 +9013,63 @@ define i8 @atomicrmw_min_i8_acquire(ptr %a, i8 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 32
; RV32I-NEXT: ret
;
-; RV32IA-WMO-LABEL: atomicrmw_min_i8_acquire:
-; RV32IA-WMO: # %bb.0:
-; RV32IA-WMO-NEXT: andi a2, a0, -4
-; RV32IA-WMO-NEXT: slli a0, a0, 3
-; RV32IA-WMO-NEXT: li a3, 255
-; RV32IA-WMO-NEXT: slli a1, a1, 24
-; RV32IA-WMO-NEXT: andi a4, a0, 24
-; RV32IA-WMO-NEXT: sll a3, a3, a0
-; RV32IA-WMO-NEXT: srai a1, a1, 24
-; RV32IA-WMO-NEXT: sll a1, a1, a0
-; RV32IA-WMO-NEXT: xori a4, a4, 24
-; RV32IA-WMO-NEXT: .LBB51_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-WMO-NEXT: lr.w.aq a5, (a2)
-; RV32IA-WMO-NEXT: and a7, a5, a3
-; RV32IA-WMO-NEXT: mv a6, a5
-; RV32IA-WMO-NEXT: sll a7, a7, a4
-; RV32IA-WMO-NEXT: sra a7, a7, a4
-; RV32IA-WMO-NEXT: bge a1, a7, .LBB51_3
-; RV32IA-WMO-NEXT: # %bb.2: # in Loop: Header=BB51_1 Depth=1
-; RV32IA-WMO-NEXT: xor a6, a5, a1
-; RV32IA-WMO-NEXT: and a6, a6, a3
-; RV32IA-WMO-NEXT: xor a6, a5, a6
-; RV32IA-WMO-NEXT: .LBB51_3: # in Loop: Header=BB51_1 Depth=1
-; RV32IA-WMO-NEXT: sc.w a6, a6, (a2)
-; RV32IA-WMO-NEXT: bnez a6, .LBB51_1
-; RV32IA-WMO-NEXT: # %bb.4:
-; RV32IA-WMO-NEXT: srl a0, a5, a0
-; RV32IA-WMO-NEXT: ret
+; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_min_i8_acquire:
+; RV32IA-WMO-NOZACAS: # %bb.0:
+; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-WMO-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-NOZACAS-NEXT: li a3, 255
+; RV32IA-WMO-NOZACAS-NEXT: slli a1, a1, 24
+; RV32IA-WMO-NOZACAS-NEXT: andi a4, a0, 24
+; RV32IA-WMO-NOZACAS-NEXT: sll a3, a3, a0
+; RV32IA-WMO-NOZACAS-NEXT: srai a1, a1, 24
+; RV32IA-WMO-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-WMO-NOZACAS-NEXT: xori a4, a4, 24
+; RV32IA-WMO-NOZACAS-NEXT: .LBB51_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-NOZACAS-NEXT: lr.w.aq a5, (a2)
+; RV32IA-WMO-NOZACAS-NEXT: and a7, a5, a3
+; RV32IA-WMO-NOZACAS-NEXT: mv a6, a5
+; RV32IA-WMO-NOZACAS-NEXT: sll a7, a7, a4
+; RV32IA-WMO-NOZACAS-NEXT: sra a7, a7, a4
+; RV32IA-WMO-NOZACAS-NEXT: bge a1, a7, .LBB51_3
+; RV32IA-WMO-NOZACAS-NEXT: # %bb.2: # in Loop: Header=BB51_1 Depth=1
+; RV32IA-WMO-NOZACAS-NEXT: xor a6, a5, a1
+; RV32IA-WMO-NOZACAS-NEXT: and a6, a6, a3
+; RV32IA-WMO-NOZACAS-NEXT: xor a6, a5, a6
+; RV32IA-WMO-NOZACAS-NEXT: .LBB51_3: # in Loop: Header=BB51_1 Depth=1
+; RV32IA-WMO-NOZACAS-NEXT: sc.w a6, a6, (a2)
+; RV32IA-WMO-NOZACAS-NEXT: bnez a6, .LBB51_1
+; RV32IA-WMO-NOZACAS-NEXT: # %bb.4:
+; RV32IA-WMO-NOZACAS-NEXT: srl a0, a5, a0
+; RV32IA-WMO-NOZACAS-NEXT: ret
;
-; RV32IA-TSO-LABEL: atomicrmw_min_i8_acquire:
-; RV32IA-TSO: # %bb.0:
-; RV32IA-TSO-NEXT: andi a2, a0, -4
-; RV32IA-TSO-NEXT: slli a0, a0, 3
-; RV32IA-TSO-NEXT: li a3, 255
-; RV32IA-TSO-NEXT: slli a1, a1, 24
-; RV32IA-TSO-NEXT: andi a4, a0, 24
-; RV32IA-TSO-NEXT: sll a3, a3, a0
-; RV32IA-TSO-NEXT: srai a1, a1, 24
-; RV32IA-TSO-NEXT: sll a1, a1, a0
-; RV32IA-TSO-NEXT: xori a4, a4, 24
-; RV32IA-TSO-NEXT: .LBB51_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-TSO-NEXT: lr.w a5, (a2)
-; RV32IA-TSO-NEXT: and a7, a5, a3
-; RV32IA-TSO-NEXT: mv a6, a5
-; RV32IA-TSO-NEXT: sll a7, a7, a4
-; RV32IA-TSO-NEXT: sra a7, a7, a4
-; RV32IA-TSO-NEXT: bge a1, a7, .LBB51_3
-; RV32IA-TSO-NEXT: # %bb.2: # in Loop: Header=BB51_1 Depth=1
-; RV32IA-TSO-NEXT: xor a6, a5, a1
-; RV32IA-TSO-NEXT: and a6, a6, a3
-; RV32IA-TSO-NEXT: xor a6, a5, a6
-; RV32IA-TSO-NEXT: .LBB51_3: # in Loop: Header=BB51_1 Depth=1
-; RV32IA-TSO-NEXT: sc.w a6, a6, (a2)
-; RV32IA-TSO-NEXT: bnez a6, .LBB51_1
-; RV32IA-TSO-NEXT: # %bb.4:
-; RV32IA-TSO-NEXT: srl a0, a5, a0
-; RV32IA-TSO-NEXT: ret
+; RV32IA-TSO-NOZACAS-LABEL: atomicrmw_min_i8_acquire:
+; RV32IA-TSO-NOZACAS: # %bb.0:
+; RV32IA-TSO-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-TSO-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-NOZACAS-NEXT: li a3, 255
+; RV32IA-TSO-NOZACAS-NEXT: slli a1, a1, 24
+; RV32IA-TSO-NOZACAS-NEXT: andi a4, a0, 24
+; RV32IA-TSO-NOZACAS-NEXT: sll a3, a3, a0
+; RV32IA-TSO-NOZACAS-NEXT: srai a1, a1, 24
+; RV32IA-TSO-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-TSO-NOZACAS-NEXT: xori a4, a4, 24
+; RV32IA-TSO-NOZACAS-NEXT: .LBB51_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-NOZACAS-NEXT: lr.w a5, (a2)
+; RV32IA-TSO-NOZACAS-NEXT: and a7, a5, a3
+; RV32IA-TSO-NOZACAS-NEXT: mv a6, a5
+; RV32IA-TSO-NOZACAS-NEXT: sll a7, a7, a4
+; RV32IA-TSO-NOZACAS-NEXT: sra a7, a7, a4
+; RV32IA-TSO-NOZACAS-NEXT: bge a1, a7, .LBB51_3
+; RV32IA-TSO-NOZACAS-NEXT: # %bb.2: # in Loop: Header=BB51_1 Depth=1
+; RV32IA-TSO-NOZACAS-NEXT: xor a6, a5, a1
+; RV32IA-TSO-NOZACAS-NEXT: and a6, a6, a3
+; RV32IA-TSO-NOZACAS-NEXT: xor a6, a5, a6
+; RV32IA-TSO-NOZACAS-NEXT: .LBB51_3: # in Loop: Header=BB51_1 Depth=1
+; RV32IA-TSO-NOZACAS-NEXT: sc.w a6, a6, (a2)
+; RV32IA-TSO-NOZACAS-NEXT: bnez a6, .LBB51_1
+; RV32IA-TSO-NOZACAS-NEXT: # %bb.4:
+; RV32IA-TSO-NOZACAS-NEXT: srl a0, a5, a0
+; RV32IA-TSO-NOZACAS-NEXT: ret
;
; RV64I-LABEL: atomicrmw_min_i8_acquire:
; RV64I: # %bb.0:
@@ -6916,6 +9171,64 @@ define i8 @atomicrmw_min_i8_acquire(ptr %a, i8 %b) nounwind {
; RV64IA-TSO-NOZACAS-NEXT: srlw a0, a5, a0
; RV64IA-TSO-NOZACAS-NEXT: ret
;
+; RV32IA-WMO-ZACAS-LABEL: atomicrmw_min_i8_acquire:
+; RV32IA-WMO-ZACAS: # %bb.0:
+; RV32IA-WMO-ZACAS-NEXT: andi a2, a0, -4
+; RV32IA-WMO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-ZACAS-NEXT: li a3, 255
+; RV32IA-WMO-ZACAS-NEXT: slli a1, a1, 24
+; RV32IA-WMO-ZACAS-NEXT: andi a4, a0, 24
+; RV32IA-WMO-ZACAS-NEXT: sll a3, a3, a0
+; RV32IA-WMO-ZACAS-NEXT: srai a1, a1, 24
+; RV32IA-WMO-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-WMO-ZACAS-NEXT: xori a4, a4, 24
+; RV32IA-WMO-ZACAS-NEXT: .LBB51_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-ZACAS-NEXT: lr.w.aq a5, (a2)
+; RV32IA-WMO-ZACAS-NEXT: and a7, a5, a3
+; RV32IA-WMO-ZACAS-NEXT: mv a6, a5
+; RV32IA-WMO-ZACAS-NEXT: sll a7, a7, a4
+; RV32IA-WMO-ZACAS-NEXT: sra a7, a7, a4
+; RV32IA-WMO-ZACAS-NEXT: bge a1, a7, .LBB51_3
+; RV32IA-WMO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB51_1 Depth=1
+; RV32IA-WMO-ZACAS-NEXT: xor a6, a5, a1
+; RV32IA-WMO-ZACAS-NEXT: and a6, a6, a3
+; RV32IA-WMO-ZACAS-NEXT: xor a6, a5, a6
+; RV32IA-WMO-ZACAS-NEXT: .LBB51_3: # in Loop: Header=BB51_1 Depth=1
+; RV32IA-WMO-ZACAS-NEXT: sc.w a6, a6, (a2)
+; RV32IA-WMO-ZACAS-NEXT: bnez a6, .LBB51_1
+; RV32IA-WMO-ZACAS-NEXT: # %bb.4:
+; RV32IA-WMO-ZACAS-NEXT: srl a0, a5, a0
+; RV32IA-WMO-ZACAS-NEXT: ret
+;
+; RV32IA-TSO-ZACAS-LABEL: atomicrmw_min_i8_acquire:
+; RV32IA-TSO-ZACAS: # %bb.0:
+; RV32IA-TSO-ZACAS-NEXT: andi a2, a0, -4
+; RV32IA-TSO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-ZACAS-NEXT: li a3, 255
+; RV32IA-TSO-ZACAS-NEXT: slli a1, a1, 24
+; RV32IA-TSO-ZACAS-NEXT: andi a4, a0, 24
+; RV32IA-TSO-ZACAS-NEXT: sll a3, a3, a0
+; RV32IA-TSO-ZACAS-NEXT: srai a1, a1, 24
+; RV32IA-TSO-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-TSO-ZACAS-NEXT: xori a4, a4, 24
+; RV32IA-TSO-ZACAS-NEXT: .LBB51_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-ZACAS-NEXT: lr.w a5, (a2)
+; RV32IA-TSO-ZACAS-NEXT: and a7, a5, a3
+; RV32IA-TSO-ZACAS-NEXT: mv a6, a5
+; RV32IA-TSO-ZACAS-NEXT: sll a7, a7, a4
+; RV32IA-TSO-ZACAS-NEXT: sra a7, a7, a4
+; RV32IA-TSO-ZACAS-NEXT: bge a1, a7, .LBB51_3
+; RV32IA-TSO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB51_1 Depth=1
+; RV32IA-TSO-ZACAS-NEXT: xor a6, a5, a1
+; RV32IA-TSO-ZACAS-NEXT: and a6, a6, a3
+; RV32IA-TSO-ZACAS-NEXT: xor a6, a5, a6
+; RV32IA-TSO-ZACAS-NEXT: .LBB51_3: # in Loop: Header=BB51_1 Depth=1
+; RV32IA-TSO-ZACAS-NEXT: sc.w a6, a6, (a2)
+; RV32IA-TSO-ZACAS-NEXT: bnez a6, .LBB51_1
+; RV32IA-TSO-ZACAS-NEXT: # %bb.4:
+; RV32IA-TSO-ZACAS-NEXT: srl a0, a5, a0
+; RV32IA-TSO-ZACAS-NEXT: ret
+;
; RV64IA-WMO-ZACAS-LABEL: atomicrmw_min_i8_acquire:
; RV64IA-WMO-ZACAS: # %bb.0:
; RV64IA-WMO-ZACAS-NEXT: andi a2, a0, -4
@@ -6974,6 +9287,16 @@ define i8 @atomicrmw_min_i8_acquire(ptr %a, i8 %b) nounwind {
; RV64IA-TSO-ZACAS-NEXT: srlw a0, a5, a0
; RV64IA-TSO-ZACAS-NEXT: ret
;
+; RV32IA-WMO-ZABHA-LABEL: atomicrmw_min_i8_acquire:
+; RV32IA-WMO-ZABHA: # %bb.0:
+; RV32IA-WMO-ZABHA-NEXT: amomin.b.aq a0, a1, (a0)
+; RV32IA-WMO-ZABHA-NEXT: ret
+;
+; RV32IA-TSO-ZABHA-LABEL: atomicrmw_min_i8_acquire:
+; RV32IA-TSO-ZABHA: # %bb.0:
+; RV32IA-TSO-ZABHA-NEXT: amomin.b a0, a1, (a0)
+; RV32IA-TSO-ZABHA-NEXT: ret
+;
; RV64IA-WMO-ZABHA-LABEL: atomicrmw_min_i8_acquire:
; RV64IA-WMO-ZABHA: # %bb.0:
; RV64IA-WMO-ZABHA-NEXT: amomin.b.aq a0, a1, (a0)
@@ -7030,63 +9353,63 @@ define i8 @atomicrmw_min_i8_release(ptr %a, i8 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 32
; RV32I-NEXT: ret
;
-; RV32IA-WMO-LABEL: atomicrmw_min_i8_release:
-; RV32IA-WMO: # %bb.0:
-; RV32IA-WMO-NEXT: andi a2, a0, -4
-; RV32IA-WMO-NEXT: slli a0, a0, 3
-; RV32IA-WMO-NEXT: li a3, 255
-; RV32IA-WMO-NEXT: slli a1, a1, 24
-; RV32IA-WMO-NEXT: andi a4, a0, 24
-; RV32IA-WMO-NEXT: sll a3, a3, a0
-; RV32IA-WMO-NEXT: srai a1, a1, 24
-; RV32IA-WMO-NEXT: sll a1, a1, a0
-; RV32IA-WMO-NEXT: xori a4, a4, 24
-; RV32IA-WMO-NEXT: .LBB52_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-WMO-NEXT: lr.w a5, (a2)
-; RV32IA-WMO-NEXT: and a7, a5, a3
-; RV32IA-WMO-NEXT: mv a6, a5
-; RV32IA-WMO-NEXT: sll a7, a7, a4
-; RV32IA-WMO-NEXT: sra a7, a7, a4
-; RV32IA-WMO-NEXT: bge a1, a7, .LBB52_3
-; RV32IA-WMO-NEXT: # %bb.2: # in Loop: Header=BB52_1 Depth=1
-; RV32IA-WMO-NEXT: xor a6, a5, a1
-; RV32IA-WMO-NEXT: and a6, a6, a3
-; RV32IA-WMO-NEXT: xor a6, a5, a6
-; RV32IA-WMO-NEXT: .LBB52_3: # in Loop: Header=BB52_1 Depth=1
-; RV32IA-WMO-NEXT: sc.w.rl a6, a6, (a2)
-; RV32IA-WMO-NEXT: bnez a6, .LBB52_1
-; RV32IA-WMO-NEXT: # %bb.4:
-; RV32IA-WMO-NEXT: srl a0, a5, a0
-; RV32IA-WMO-NEXT: ret
+; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_min_i8_release:
+; RV32IA-WMO-NOZACAS: # %bb.0:
+; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-WMO-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-NOZACAS-NEXT: li a3, 255
+; RV32IA-WMO-NOZACAS-NEXT: slli a1, a1, 24
+; RV32IA-WMO-NOZACAS-NEXT: andi a4, a0, 24
+; RV32IA-WMO-NOZACAS-NEXT: sll a3, a3, a0
+; RV32IA-WMO-NOZACAS-NEXT: srai a1, a1, 24
+; RV32IA-WMO-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-WMO-NOZACAS-NEXT: xori a4, a4, 24
+; RV32IA-WMO-NOZACAS-NEXT: .LBB52_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-NOZACAS-NEXT: lr.w a5, (a2)
+; RV32IA-WMO-NOZACAS-NEXT: and a7, a5, a3
+; RV32IA-WMO-NOZACAS-NEXT: mv a6, a5
+; RV32IA-WMO-NOZACAS-NEXT: sll a7, a7, a4
+; RV32IA-WMO-NOZACAS-NEXT: sra a7, a7, a4
+; RV32IA-WMO-NOZACAS-NEXT: bge a1, a7, .LBB52_3
+; RV32IA-WMO-NOZACAS-NEXT: # %bb.2: # in Loop: Header=BB52_1 Depth=1
+; RV32IA-WMO-NOZACAS-NEXT: xor a6, a5, a1
+; RV32IA-WMO-NOZACAS-NEXT: and a6, a6, a3
+; RV32IA-WMO-NOZACAS-NEXT: xor a6, a5, a6
+; RV32IA-WMO-NOZACAS-NEXT: .LBB52_3: # in Loop: Header=BB52_1 Depth=1
+; RV32IA-WMO-NOZACAS-NEXT: sc.w.rl a6, a6, (a2)
+; RV32IA-WMO-NOZACAS-NEXT: bnez a6, .LBB52_1
+; RV32IA-WMO-NOZACAS-NEXT: # %bb.4:
+; RV32IA-WMO-NOZACAS-NEXT: srl a0, a5, a0
+; RV32IA-WMO-NOZACAS-NEXT: ret
;
-; RV32IA-TSO-LABEL: atomicrmw_min_i8_release:
-; RV32IA-TSO: # %bb.0:
-; RV32IA-TSO-NEXT: andi a2, a0, -4
-; RV32IA-TSO-NEXT: slli a0, a0, 3
-; RV32IA-TSO-NEXT: li a3, 255
-; RV32IA-TSO-NEXT: slli a1, a1, 24
-; RV32IA-TSO-NEXT: andi a4, a0, 24
-; RV32IA-TSO-NEXT: sll a3, a3, a0
-; RV32IA-TSO-NEXT: srai a1, a1, 24
-; RV32IA-TSO-NEXT: sll a1, a1, a0
-; RV32IA-TSO-NEXT: xori a4, a4, 24
-; RV32IA-TSO-NEXT: .LBB52_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-TSO-NEXT: lr.w a5, (a2)
-; RV32IA-TSO-NEXT: and a7, a5, a3
-; RV32IA-TSO-NEXT: mv a6, a5
-; RV32IA-TSO-NEXT: sll a7, a7, a4
-; RV32IA-TSO-NEXT: sra a7, a7, a4
-; RV32IA-TSO-NEXT: bge a1, a7, .LBB52_3
-; RV32IA-TSO-NEXT: # %bb.2: # in Loop: Header=BB52_1 Depth=1
-; RV32IA-TSO-NEXT: xor a6, a5, a1
-; RV32IA-TSO-NEXT: and a6, a6, a3
-; RV32IA-TSO-NEXT: xor a6, a5, a6
-; RV32IA-TSO-NEXT: .LBB52_3: # in Loop: Header=BB52_1 Depth=1
-; RV32IA-TSO-NEXT: sc.w a6, a6, (a2)
-; RV32IA-TSO-NEXT: bnez a6, .LBB52_1
-; RV32IA-TSO-NEXT: # %bb.4:
-; RV32IA-TSO-NEXT: srl a0, a5, a0
-; RV32IA-TSO-NEXT: ret
+; RV32IA-TSO-NOZACAS-LABEL: atomicrmw_min_i8_release:
+; RV32IA-TSO-NOZACAS: # %bb.0:
+; RV32IA-TSO-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-TSO-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-NOZACAS-NEXT: li a3, 255
+; RV32IA-TSO-NOZACAS-NEXT: slli a1, a1, 24
+; RV32IA-TSO-NOZACAS-NEXT: andi a4, a0, 24
+; RV32IA-TSO-NOZACAS-NEXT: sll a3, a3, a0
+; RV32IA-TSO-NOZACAS-NEXT: srai a1, a1, 24
+; RV32IA-TSO-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-TSO-NOZACAS-NEXT: xori a4, a4, 24
+; RV32IA-TSO-NOZACAS-NEXT: .LBB52_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-NOZACAS-NEXT: lr.w a5, (a2)
+; RV32IA-TSO-NOZACAS-NEXT: and a7, a5, a3
+; RV32IA-TSO-NOZACAS-NEXT: mv a6, a5
+; RV32IA-TSO-NOZACAS-NEXT: sll a7, a7, a4
+; RV32IA-TSO-NOZACAS-NEXT: sra a7, a7, a4
+; RV32IA-TSO-NOZACAS-NEXT: bge a1, a7, .LBB52_3
+; RV32IA-TSO-NOZACAS-NEXT: # %bb.2: # in Loop: Header=BB52_1 Depth=1
+; RV32IA-TSO-NOZACAS-NEXT: xor a6, a5, a1
+; RV32IA-TSO-NOZACAS-NEXT: and a6, a6, a3
+; RV32IA-TSO-NOZACAS-NEXT: xor a6, a5, a6
+; RV32IA-TSO-NOZACAS-NEXT: .LBB52_3: # in Loop: Header=BB52_1 Depth=1
+; RV32IA-TSO-NOZACAS-NEXT: sc.w a6, a6, (a2)
+; RV32IA-TSO-NOZACAS-NEXT: bnez a6, .LBB52_1
+; RV32IA-TSO-NOZACAS-NEXT: # %bb.4:
+; RV32IA-TSO-NOZACAS-NEXT: srl a0, a5, a0
+; RV32IA-TSO-NOZACAS-NEXT: ret
;
; RV64I-LABEL: atomicrmw_min_i8_release:
; RV64I: # %bb.0:
@@ -7188,6 +9511,64 @@ define i8 @atomicrmw_min_i8_release(ptr %a, i8 %b) nounwind {
; RV64IA-TSO-NOZACAS-NEXT: srlw a0, a5, a0
; RV64IA-TSO-NOZACAS-NEXT: ret
;
+; RV32IA-WMO-ZACAS-LABEL: atomicrmw_min_i8_release:
+; RV32IA-WMO-ZACAS: # %bb.0:
+; RV32IA-WMO-ZACAS-NEXT: andi a2, a0, -4
+; RV32IA-WMO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-ZACAS-NEXT: li a3, 255
+; RV32IA-WMO-ZACAS-NEXT: slli a1, a1, 24
+; RV32IA-WMO-ZACAS-NEXT: andi a4, a0, 24
+; RV32IA-WMO-ZACAS-NEXT: sll a3, a3, a0
+; RV32IA-WMO-ZACAS-NEXT: srai a1, a1, 24
+; RV32IA-WMO-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-WMO-ZACAS-NEXT: xori a4, a4, 24
+; RV32IA-WMO-ZACAS-NEXT: .LBB52_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-ZACAS-NEXT: lr.w a5, (a2)
+; RV32IA-WMO-ZACAS-NEXT: and a7, a5, a3
+; RV32IA-WMO-ZACAS-NEXT: mv a6, a5
+; RV32IA-WMO-ZACAS-NEXT: sll a7, a7, a4
+; RV32IA-WMO-ZACAS-NEXT: sra a7, a7, a4
+; RV32IA-WMO-ZACAS-NEXT: bge a1, a7, .LBB52_3
+; RV32IA-WMO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB52_1 Depth=1
+; RV32IA-WMO-ZACAS-NEXT: xor a6, a5, a1
+; RV32IA-WMO-ZACAS-NEXT: and a6, a6, a3
+; RV32IA-WMO-ZACAS-NEXT: xor a6, a5, a6
+; RV32IA-WMO-ZACAS-NEXT: .LBB52_3: # in Loop: Header=BB52_1 Depth=1
+; RV32IA-WMO-ZACAS-NEXT: sc.w.rl a6, a6, (a2)
+; RV32IA-WMO-ZACAS-NEXT: bnez a6, .LBB52_1
+; RV32IA-WMO-ZACAS-NEXT: # %bb.4:
+; RV32IA-WMO-ZACAS-NEXT: srl a0, a5, a0
+; RV32IA-WMO-ZACAS-NEXT: ret
+;
+; RV32IA-TSO-ZACAS-LABEL: atomicrmw_min_i8_release:
+; RV32IA-TSO-ZACAS: # %bb.0:
+; RV32IA-TSO-ZACAS-NEXT: andi a2, a0, -4
+; RV32IA-TSO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-ZACAS-NEXT: li a3, 255
+; RV32IA-TSO-ZACAS-NEXT: slli a1, a1, 24
+; RV32IA-TSO-ZACAS-NEXT: andi a4, a0, 24
+; RV32IA-TSO-ZACAS-NEXT: sll a3, a3, a0
+; RV32IA-TSO-ZACAS-NEXT: srai a1, a1, 24
+; RV32IA-TSO-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-TSO-ZACAS-NEXT: xori a4, a4, 24
+; RV32IA-TSO-ZACAS-NEXT: .LBB52_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-ZACAS-NEXT: lr.w a5, (a2)
+; RV32IA-TSO-ZACAS-NEXT: and a7, a5, a3
+; RV32IA-TSO-ZACAS-NEXT: mv a6, a5
+; RV32IA-TSO-ZACAS-NEXT: sll a7, a7, a4
+; RV32IA-TSO-ZACAS-NEXT: sra a7, a7, a4
+; RV32IA-TSO-ZACAS-NEXT: bge a1, a7, .LBB52_3
+; RV32IA-TSO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB52_1 Depth=1
+; RV32IA-TSO-ZACAS-NEXT: xor a6, a5, a1
+; RV32IA-TSO-ZACAS-NEXT: and a6, a6, a3
+; RV32IA-TSO-ZACAS-NEXT: xor a6, a5, a6
+; RV32IA-TSO-ZACAS-NEXT: .LBB52_3: # in Loop: Header=BB52_1 Depth=1
+; RV32IA-TSO-ZACAS-NEXT: sc.w a6, a6, (a2)
+; RV32IA-TSO-ZACAS-NEXT: bnez a6, .LBB52_1
+; RV32IA-TSO-ZACAS-NEXT: # %bb.4:
+; RV32IA-TSO-ZACAS-NEXT: srl a0, a5, a0
+; RV32IA-TSO-ZACAS-NEXT: ret
+;
; RV64IA-WMO-ZACAS-LABEL: atomicrmw_min_i8_release:
; RV64IA-WMO-ZACAS: # %bb.0:
; RV64IA-WMO-ZACAS-NEXT: andi a2, a0, -4
@@ -7246,6 +9627,16 @@ define i8 @atomicrmw_min_i8_release(ptr %a, i8 %b) nounwind {
; RV64IA-TSO-ZACAS-NEXT: srlw a0, a5, a0
; RV64IA-TSO-ZACAS-NEXT: ret
;
+; RV32IA-WMO-ZABHA-LABEL: atomicrmw_min_i8_release:
+; RV32IA-WMO-ZABHA: # %bb.0:
+; RV32IA-WMO-ZABHA-NEXT: amomin.b.rl a0, a1, (a0)
+; RV32IA-WMO-ZABHA-NEXT: ret
+;
+; RV32IA-TSO-ZABHA-LABEL: atomicrmw_min_i8_release:
+; RV32IA-TSO-ZABHA: # %bb.0:
+; RV32IA-TSO-ZABHA-NEXT: amomin.b a0, a1, (a0)
+; RV32IA-TSO-ZABHA-NEXT: ret
+;
; RV64IA-WMO-ZABHA-LABEL: atomicrmw_min_i8_release:
; RV64IA-WMO-ZABHA: # %bb.0:
; RV64IA-WMO-ZABHA-NEXT: amomin.b.rl a0, a1, (a0)
@@ -7302,63 +9693,63 @@ define i8 @atomicrmw_min_i8_acq_rel(ptr %a, i8 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 32
; RV32I-NEXT: ret
;
-; RV32IA-WMO-LABEL: atomicrmw_min_i8_acq_rel:
-; RV32IA-WMO: # %bb.0:
-; RV32IA-WMO-NEXT: andi a2, a0, -4
-; RV32IA-WMO-NEXT: slli a0, a0, 3
-; RV32IA-WMO-NEXT: li a3, 255
-; RV32IA-WMO-NEXT: slli a1, a1, 24
-; RV32IA-WMO-NEXT: andi a4, a0, 24
-; RV32IA-WMO-NEXT: sll a3, a3, a0
-; RV32IA-WMO-NEXT: srai a1, a1, 24
-; RV32IA-WMO-NEXT: sll a1, a1, a0
-; RV32IA-WMO-NEXT: xori a4, a4, 24
-; RV32IA-WMO-NEXT: .LBB53_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-WMO-NEXT: lr.w.aq a5, (a2)
-; RV32IA-WMO-NEXT: and a7, a5, a3
-; RV32IA-WMO-NEXT: mv a6, a5
-; RV32IA-WMO-NEXT: sll a7, a7, a4
-; RV32IA-WMO-NEXT: sra a7, a7, a4
-; RV32IA-WMO-NEXT: bge a1, a7, .LBB53_3
-; RV32IA-WMO-NEXT: # %bb.2: # in Loop: Header=BB53_1 Depth=1
-; RV32IA-WMO-NEXT: xor a6, a5, a1
-; RV32IA-WMO-NEXT: and a6, a6, a3
-; RV32IA-WMO-NEXT: xor a6, a5, a6
-; RV32IA-WMO-NEXT: .LBB53_3: # in Loop: Header=BB53_1 Depth=1
-; RV32IA-WMO-NEXT: sc.w.rl a6, a6, (a2)
-; RV32IA-WMO-NEXT: bnez a6, .LBB53_1
-; RV32IA-WMO-NEXT: # %bb.4:
-; RV32IA-WMO-NEXT: srl a0, a5, a0
-; RV32IA-WMO-NEXT: ret
+; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_min_i8_acq_rel:
+; RV32IA-WMO-NOZACAS: # %bb.0:
+; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-WMO-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-NOZACAS-NEXT: li a3, 255
+; RV32IA-WMO-NOZACAS-NEXT: slli a1, a1, 24
+; RV32IA-WMO-NOZACAS-NEXT: andi a4, a0, 24
+; RV32IA-WMO-NOZACAS-NEXT: sll a3, a3, a0
+; RV32IA-WMO-NOZACAS-NEXT: srai a1, a1, 24
+; RV32IA-WMO-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-WMO-NOZACAS-NEXT: xori a4, a4, 24
+; RV32IA-WMO-NOZACAS-NEXT: .LBB53_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-NOZACAS-NEXT: lr.w.aq a5, (a2)
+; RV32IA-WMO-NOZACAS-NEXT: and a7, a5, a3
+; RV32IA-WMO-NOZACAS-NEXT: mv a6, a5
+; RV32IA-WMO-NOZACAS-NEXT: sll a7, a7, a4
+; RV32IA-WMO-NOZACAS-NEXT: sra a7, a7, a4
+; RV32IA-WMO-NOZACAS-NEXT: bge a1, a7, .LBB53_3
+; RV32IA-WMO-NOZACAS-NEXT: # %bb.2: # in Loop: Header=BB53_1 Depth=1
+; RV32IA-WMO-NOZACAS-NEXT: xor a6, a5, a1
+; RV32IA-WMO-NOZACAS-NEXT: and a6, a6, a3
+; RV32IA-WMO-NOZACAS-NEXT: xor a6, a5, a6
+; RV32IA-WMO-NOZACAS-NEXT: .LBB53_3: # in Loop: Header=BB53_1 Depth=1
+; RV32IA-WMO-NOZACAS-NEXT: sc.w.rl a6, a6, (a2)
+; RV32IA-WMO-NOZACAS-NEXT: bnez a6, .LBB53_1
+; RV32IA-WMO-NOZACAS-NEXT: # %bb.4:
+; RV32IA-WMO-NOZACAS-NEXT: srl a0, a5, a0
+; RV32IA-WMO-NOZACAS-NEXT: ret
;
-; RV32IA-TSO-LABEL: atomicrmw_min_i8_acq_rel:
-; RV32IA-TSO: # %bb.0:
-; RV32IA-TSO-NEXT: andi a2, a0, -4
-; RV32IA-TSO-NEXT: slli a0, a0, 3
-; RV32IA-TSO-NEXT: li a3, 255
-; RV32IA-TSO-NEXT: slli a1, a1, 24
-; RV32IA-TSO-NEXT: andi a4, a0, 24
-; RV32IA-TSO-NEXT: sll a3, a3, a0
-; RV32IA-TSO-NEXT: srai a1, a1, 24
-; RV32IA-TSO-NEXT: sll a1, a1, a0
-; RV32IA-TSO-NEXT: xori a4, a4, 24
-; RV32IA-TSO-NEXT: .LBB53_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-TSO-NEXT: lr.w a5, (a2)
-; RV32IA-TSO-NEXT: and a7, a5, a3
-; RV32IA-TSO-NEXT: mv a6, a5
-; RV32IA-TSO-NEXT: sll a7, a7, a4
-; RV32IA-TSO-NEXT: sra a7, a7, a4
-; RV32IA-TSO-NEXT: bge a1, a7, .LBB53_3
-; RV32IA-TSO-NEXT: # %bb.2: # in Loop: Header=BB53_1 Depth=1
-; RV32IA-TSO-NEXT: xor a6, a5, a1
-; RV32IA-TSO-NEXT: and a6, a6, a3
-; RV32IA-TSO-NEXT: xor a6, a5, a6
-; RV32IA-TSO-NEXT: .LBB53_3: # in Loop: Header=BB53_1 Depth=1
-; RV32IA-TSO-NEXT: sc.w a6, a6, (a2)
-; RV32IA-TSO-NEXT: bnez a6, .LBB53_1
-; RV32IA-TSO-NEXT: # %bb.4:
-; RV32IA-TSO-NEXT: srl a0, a5, a0
-; RV32IA-TSO-NEXT: ret
+; RV32IA-TSO-NOZACAS-LABEL: atomicrmw_min_i8_acq_rel:
+; RV32IA-TSO-NOZACAS: # %bb.0:
+; RV32IA-TSO-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-TSO-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-NOZACAS-NEXT: li a3, 255
+; RV32IA-TSO-NOZACAS-NEXT: slli a1, a1, 24
+; RV32IA-TSO-NOZACAS-NEXT: andi a4, a0, 24
+; RV32IA-TSO-NOZACAS-NEXT: sll a3, a3, a0
+; RV32IA-TSO-NOZACAS-NEXT: srai a1, a1, 24
+; RV32IA-TSO-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-TSO-NOZACAS-NEXT: xori a4, a4, 24
+; RV32IA-TSO-NOZACAS-NEXT: .LBB53_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-NOZACAS-NEXT: lr.w a5, (a2)
+; RV32IA-TSO-NOZACAS-NEXT: and a7, a5, a3
+; RV32IA-TSO-NOZACAS-NEXT: mv a6, a5
+; RV32IA-TSO-NOZACAS-NEXT: sll a7, a7, a4
+; RV32IA-TSO-NOZACAS-NEXT: sra a7, a7, a4
+; RV32IA-TSO-NOZACAS-NEXT: bge a1, a7, .LBB53_3
+; RV32IA-TSO-NOZACAS-NEXT: # %bb.2: # in Loop: Header=BB53_1 Depth=1
+; RV32IA-TSO-NOZACAS-NEXT: xor a6, a5, a1
+; RV32IA-TSO-NOZACAS-NEXT: and a6, a6, a3
+; RV32IA-TSO-NOZACAS-NEXT: xor a6, a5, a6
+; RV32IA-TSO-NOZACAS-NEXT: .LBB53_3: # in Loop: Header=BB53_1 Depth=1
+; RV32IA-TSO-NOZACAS-NEXT: sc.w a6, a6, (a2)
+; RV32IA-TSO-NOZACAS-NEXT: bnez a6, .LBB53_1
+; RV32IA-TSO-NOZACAS-NEXT: # %bb.4:
+; RV32IA-TSO-NOZACAS-NEXT: srl a0, a5, a0
+; RV32IA-TSO-NOZACAS-NEXT: ret
;
; RV64I-LABEL: atomicrmw_min_i8_acq_rel:
; RV64I: # %bb.0:
@@ -7460,6 +9851,64 @@ define i8 @atomicrmw_min_i8_acq_rel(ptr %a, i8 %b) nounwind {
; RV64IA-TSO-NOZACAS-NEXT: srlw a0, a5, a0
; RV64IA-TSO-NOZACAS-NEXT: ret
;
+; RV32IA-WMO-ZACAS-LABEL: atomicrmw_min_i8_acq_rel:
+; RV32IA-WMO-ZACAS: # %bb.0:
+; RV32IA-WMO-ZACAS-NEXT: andi a2, a0, -4
+; RV32IA-WMO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-ZACAS-NEXT: li a3, 255
+; RV32IA-WMO-ZACAS-NEXT: slli a1, a1, 24
+; RV32IA-WMO-ZACAS-NEXT: andi a4, a0, 24
+; RV32IA-WMO-ZACAS-NEXT: sll a3, a3, a0
+; RV32IA-WMO-ZACAS-NEXT: srai a1, a1, 24
+; RV32IA-WMO-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-WMO-ZACAS-NEXT: xori a4, a4, 24
+; RV32IA-WMO-ZACAS-NEXT: .LBB53_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-ZACAS-NEXT: lr.w.aq a5, (a2)
+; RV32IA-WMO-ZACAS-NEXT: and a7, a5, a3
+; RV32IA-WMO-ZACAS-NEXT: mv a6, a5
+; RV32IA-WMO-ZACAS-NEXT: sll a7, a7, a4
+; RV32IA-WMO-ZACAS-NEXT: sra a7, a7, a4
+; RV32IA-WMO-ZACAS-NEXT: bge a1, a7, .LBB53_3
+; RV32IA-WMO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB53_1 Depth=1
+; RV32IA-WMO-ZACAS-NEXT: xor a6, a5, a1
+; RV32IA-WMO-ZACAS-NEXT: and a6, a6, a3
+; RV32IA-WMO-ZACAS-NEXT: xor a6, a5, a6
+; RV32IA-WMO-ZACAS-NEXT: .LBB53_3: # in Loop: Header=BB53_1 Depth=1
+; RV32IA-WMO-ZACAS-NEXT: sc.w.rl a6, a6, (a2)
+; RV32IA-WMO-ZACAS-NEXT: bnez a6, .LBB53_1
+; RV32IA-WMO-ZACAS-NEXT: # %bb.4:
+; RV32IA-WMO-ZACAS-NEXT: srl a0, a5, a0
+; RV32IA-WMO-ZACAS-NEXT: ret
+;
+; RV32IA-TSO-ZACAS-LABEL: atomicrmw_min_i8_acq_rel:
+; RV32IA-TSO-ZACAS: # %bb.0:
+; RV32IA-TSO-ZACAS-NEXT: andi a2, a0, -4
+; RV32IA-TSO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-ZACAS-NEXT: li a3, 255
+; RV32IA-TSO-ZACAS-NEXT: slli a1, a1, 24
+; RV32IA-TSO-ZACAS-NEXT: andi a4, a0, 24
+; RV32IA-TSO-ZACAS-NEXT: sll a3, a3, a0
+; RV32IA-TSO-ZACAS-NEXT: srai a1, a1, 24
+; RV32IA-TSO-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-TSO-ZACAS-NEXT: xori a4, a4, 24
+; RV32IA-TSO-ZACAS-NEXT: .LBB53_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-ZACAS-NEXT: lr.w a5, (a2)
+; RV32IA-TSO-ZACAS-NEXT: and a7, a5, a3
+; RV32IA-TSO-ZACAS-NEXT: mv a6, a5
+; RV32IA-TSO-ZACAS-NEXT: sll a7, a7, a4
+; RV32IA-TSO-ZACAS-NEXT: sra a7, a7, a4
+; RV32IA-TSO-ZACAS-NEXT: bge a1, a7, .LBB53_3
+; RV32IA-TSO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB53_1 Depth=1
+; RV32IA-TSO-ZACAS-NEXT: xor a6, a5, a1
+; RV32IA-TSO-ZACAS-NEXT: and a6, a6, a3
+; RV32IA-TSO-ZACAS-NEXT: xor a6, a5, a6
+; RV32IA-TSO-ZACAS-NEXT: .LBB53_3: # in Loop: Header=BB53_1 Depth=1
+; RV32IA-TSO-ZACAS-NEXT: sc.w a6, a6, (a2)
+; RV32IA-TSO-ZACAS-NEXT: bnez a6, .LBB53_1
+; RV32IA-TSO-ZACAS-NEXT: # %bb.4:
+; RV32IA-TSO-ZACAS-NEXT: srl a0, a5, a0
+; RV32IA-TSO-ZACAS-NEXT: ret
+;
; RV64IA-WMO-ZACAS-LABEL: atomicrmw_min_i8_acq_rel:
; RV64IA-WMO-ZACAS: # %bb.0:
; RV64IA-WMO-ZACAS-NEXT: andi a2, a0, -4
@@ -7518,6 +9967,16 @@ define i8 @atomicrmw_min_i8_acq_rel(ptr %a, i8 %b) nounwind {
; RV64IA-TSO-ZACAS-NEXT: srlw a0, a5, a0
; RV64IA-TSO-ZACAS-NEXT: ret
;
+; RV32IA-WMO-ZABHA-LABEL: atomicrmw_min_i8_acq_rel:
+; RV32IA-WMO-ZABHA: # %bb.0:
+; RV32IA-WMO-ZABHA-NEXT: amomin.b.aqrl a0, a1, (a0)
+; RV32IA-WMO-ZABHA-NEXT: ret
+;
+; RV32IA-TSO-ZABHA-LABEL: atomicrmw_min_i8_acq_rel:
+; RV32IA-TSO-ZABHA: # %bb.0:
+; RV32IA-TSO-ZABHA-NEXT: amomin.b a0, a1, (a0)
+; RV32IA-TSO-ZABHA-NEXT: ret
+;
; RV64IA-WMO-ZABHA-LABEL: atomicrmw_min_i8_acq_rel:
; RV64IA-WMO-ZABHA: # %bb.0:
; RV64IA-WMO-ZABHA-NEXT: amomin.b.aqrl a0, a1, (a0)
@@ -7574,34 +10033,34 @@ define i8 @atomicrmw_min_i8_seq_cst(ptr %a, i8 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 32
; RV32I-NEXT: ret
;
-; RV32IA-LABEL: atomicrmw_min_i8_seq_cst:
-; RV32IA: # %bb.0:
-; RV32IA-NEXT: andi a2, a0, -4
-; RV32IA-NEXT: slli a0, a0, 3
-; RV32IA-NEXT: li a3, 255
-; RV32IA-NEXT: slli a1, a1, 24
-; RV32IA-NEXT: andi a4, a0, 24
-; RV32IA-NEXT: sll a3, a3, a0
-; RV32IA-NEXT: srai a1, a1, 24
-; RV32IA-NEXT: sll a1, a1, a0
-; RV32IA-NEXT: xori a4, a4, 24
-; RV32IA-NEXT: .LBB54_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-NEXT: lr.w.aqrl a5, (a2)
-; RV32IA-NEXT: and a7, a5, a3
-; RV32IA-NEXT: mv a6, a5
-; RV32IA-NEXT: sll a7, a7, a4
-; RV32IA-NEXT: sra a7, a7, a4
-; RV32IA-NEXT: bge a1, a7, .LBB54_3
-; RV32IA-NEXT: # %bb.2: # in Loop: Header=BB54_1 Depth=1
-; RV32IA-NEXT: xor a6, a5, a1
-; RV32IA-NEXT: and a6, a6, a3
-; RV32IA-NEXT: xor a6, a5, a6
-; RV32IA-NEXT: .LBB54_3: # in Loop: Header=BB54_1 Depth=1
-; RV32IA-NEXT: sc.w.rl a6, a6, (a2)
-; RV32IA-NEXT: bnez a6, .LBB54_1
-; RV32IA-NEXT: # %bb.4:
-; RV32IA-NEXT: srl a0, a5, a0
-; RV32IA-NEXT: ret
+; RV32IA-NOZACAS-LABEL: atomicrmw_min_i8_seq_cst:
+; RV32IA-NOZACAS: # %bb.0:
+; RV32IA-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-NOZACAS-NEXT: li a3, 255
+; RV32IA-NOZACAS-NEXT: slli a1, a1, 24
+; RV32IA-NOZACAS-NEXT: andi a4, a0, 24
+; RV32IA-NOZACAS-NEXT: sll a3, a3, a0
+; RV32IA-NOZACAS-NEXT: srai a1, a1, 24
+; RV32IA-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-NOZACAS-NEXT: xori a4, a4, 24
+; RV32IA-NOZACAS-NEXT: .LBB54_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-NOZACAS-NEXT: lr.w.aqrl a5, (a2)
+; RV32IA-NOZACAS-NEXT: and a7, a5, a3
+; RV32IA-NOZACAS-NEXT: mv a6, a5
+; RV32IA-NOZACAS-NEXT: sll a7, a7, a4
+; RV32IA-NOZACAS-NEXT: sra a7, a7, a4
+; RV32IA-NOZACAS-NEXT: bge a1, a7, .LBB54_3
+; RV32IA-NOZACAS-NEXT: # %bb.2: # in Loop: Header=BB54_1 Depth=1
+; RV32IA-NOZACAS-NEXT: xor a6, a5, a1
+; RV32IA-NOZACAS-NEXT: and a6, a6, a3
+; RV32IA-NOZACAS-NEXT: xor a6, a5, a6
+; RV32IA-NOZACAS-NEXT: .LBB54_3: # in Loop: Header=BB54_1 Depth=1
+; RV32IA-NOZACAS-NEXT: sc.w.rl a6, a6, (a2)
+; RV32IA-NOZACAS-NEXT: bnez a6, .LBB54_1
+; RV32IA-NOZACAS-NEXT: # %bb.4:
+; RV32IA-NOZACAS-NEXT: srl a0, a5, a0
+; RV32IA-NOZACAS-NEXT: ret
;
; RV64I-LABEL: atomicrmw_min_i8_seq_cst:
; RV64I: # %bb.0:
@@ -7674,6 +10133,35 @@ define i8 @atomicrmw_min_i8_seq_cst(ptr %a, i8 %b) nounwind {
; RV64IA-NOZACAS-NEXT: srlw a0, a5, a0
; RV64IA-NOZACAS-NEXT: ret
;
+; RV32IA-ZACAS-LABEL: atomicrmw_min_i8_seq_cst:
+; RV32IA-ZACAS: # %bb.0:
+; RV32IA-ZACAS-NEXT: andi a2, a0, -4
+; RV32IA-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-ZACAS-NEXT: li a3, 255
+; RV32IA-ZACAS-NEXT: slli a1, a1, 24
+; RV32IA-ZACAS-NEXT: andi a4, a0, 24
+; RV32IA-ZACAS-NEXT: sll a3, a3, a0
+; RV32IA-ZACAS-NEXT: srai a1, a1, 24
+; RV32IA-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-ZACAS-NEXT: xori a4, a4, 24
+; RV32IA-ZACAS-NEXT: .LBB54_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-ZACAS-NEXT: lr.w.aqrl a5, (a2)
+; RV32IA-ZACAS-NEXT: and a7, a5, a3
+; RV32IA-ZACAS-NEXT: mv a6, a5
+; RV32IA-ZACAS-NEXT: sll a7, a7, a4
+; RV32IA-ZACAS-NEXT: sra a7, a7, a4
+; RV32IA-ZACAS-NEXT: bge a1, a7, .LBB54_3
+; RV32IA-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB54_1 Depth=1
+; RV32IA-ZACAS-NEXT: xor a6, a5, a1
+; RV32IA-ZACAS-NEXT: and a6, a6, a3
+; RV32IA-ZACAS-NEXT: xor a6, a5, a6
+; RV32IA-ZACAS-NEXT: .LBB54_3: # in Loop: Header=BB54_1 Depth=1
+; RV32IA-ZACAS-NEXT: sc.w.rl a6, a6, (a2)
+; RV32IA-ZACAS-NEXT: bnez a6, .LBB54_1
+; RV32IA-ZACAS-NEXT: # %bb.4:
+; RV32IA-ZACAS-NEXT: srl a0, a5, a0
+; RV32IA-ZACAS-NEXT: ret
+;
; RV64IA-ZACAS-LABEL: atomicrmw_min_i8_seq_cst:
; RV64IA-ZACAS: # %bb.0:
; RV64IA-ZACAS-NEXT: andi a2, a0, -4
@@ -7703,6 +10191,16 @@ define i8 @atomicrmw_min_i8_seq_cst(ptr %a, i8 %b) nounwind {
; RV64IA-ZACAS-NEXT: srlw a0, a5, a0
; RV64IA-ZACAS-NEXT: ret
;
+; RV32IA-WMO-ZABHA-LABEL: atomicrmw_min_i8_seq_cst:
+; RV32IA-WMO-ZABHA: # %bb.0:
+; RV32IA-WMO-ZABHA-NEXT: amomin.b.aqrl a0, a1, (a0)
+; RV32IA-WMO-ZABHA-NEXT: ret
+;
+; RV32IA-TSO-ZABHA-LABEL: atomicrmw_min_i8_seq_cst:
+; RV32IA-TSO-ZABHA: # %bb.0:
+; RV32IA-TSO-ZABHA-NEXT: amomin.b a0, a1, (a0)
+; RV32IA-TSO-ZABHA-NEXT: ret
+;
; RV64IA-WMO-ZABHA-LABEL: atomicrmw_min_i8_seq_cst:
; RV64IA-WMO-ZABHA: # %bb.0:
; RV64IA-WMO-ZABHA-NEXT: amomin.b.aqrl a0, a1, (a0)
@@ -7757,29 +10255,29 @@ define i8 @atomicrmw_umax_i8_monotonic(ptr %a, i8 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 32
; RV32I-NEXT: ret
;
-; RV32IA-LABEL: atomicrmw_umax_i8_monotonic:
-; RV32IA: # %bb.0:
-; RV32IA-NEXT: andi a2, a0, -4
-; RV32IA-NEXT: slli a0, a0, 3
-; RV32IA-NEXT: li a3, 255
-; RV32IA-NEXT: zext.b a1, a1
-; RV32IA-NEXT: sll a3, a3, a0
-; RV32IA-NEXT: sll a1, a1, a0
-; RV32IA-NEXT: .LBB55_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-NEXT: lr.w a4, (a2)
-; RV32IA-NEXT: and a6, a4, a3
-; RV32IA-NEXT: mv a5, a4
-; RV32IA-NEXT: bgeu a6, a1, .LBB55_3
-; RV32IA-NEXT: # %bb.2: # in Loop: Header=BB55_1 Depth=1
-; RV32IA-NEXT: xor a5, a4, a1
-; RV32IA-NEXT: and a5, a5, a3
-; RV32IA-NEXT: xor a5, a4, a5
-; RV32IA-NEXT: .LBB55_3: # in Loop: Header=BB55_1 Depth=1
-; RV32IA-NEXT: sc.w a5, a5, (a2)
-; RV32IA-NEXT: bnez a5, .LBB55_1
-; RV32IA-NEXT: # %bb.4:
-; RV32IA-NEXT: srl a0, a4, a0
-; RV32IA-NEXT: ret
+; RV32IA-NOZACAS-LABEL: atomicrmw_umax_i8_monotonic:
+; RV32IA-NOZACAS: # %bb.0:
+; RV32IA-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-NOZACAS-NEXT: li a3, 255
+; RV32IA-NOZACAS-NEXT: zext.b a1, a1
+; RV32IA-NOZACAS-NEXT: sll a3, a3, a0
+; RV32IA-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-NOZACAS-NEXT: .LBB55_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-NOZACAS-NEXT: lr.w a4, (a2)
+; RV32IA-NOZACAS-NEXT: and a6, a4, a3
+; RV32IA-NOZACAS-NEXT: mv a5, a4
+; RV32IA-NOZACAS-NEXT: bgeu a6, a1, .LBB55_3
+; RV32IA-NOZACAS-NEXT: # %bb.2: # in Loop: Header=BB55_1 Depth=1
+; RV32IA-NOZACAS-NEXT: xor a5, a4, a1
+; RV32IA-NOZACAS-NEXT: and a5, a5, a3
+; RV32IA-NOZACAS-NEXT: xor a5, a4, a5
+; RV32IA-NOZACAS-NEXT: .LBB55_3: # in Loop: Header=BB55_1 Depth=1
+; RV32IA-NOZACAS-NEXT: sc.w a5, a5, (a2)
+; RV32IA-NOZACAS-NEXT: bnez a5, .LBB55_1
+; RV32IA-NOZACAS-NEXT: # %bb.4:
+; RV32IA-NOZACAS-NEXT: srl a0, a4, a0
+; RV32IA-NOZACAS-NEXT: ret
;
; RV64I-LABEL: atomicrmw_umax_i8_monotonic:
; RV64I: # %bb.0:
@@ -7845,6 +10343,30 @@ define i8 @atomicrmw_umax_i8_monotonic(ptr %a, i8 %b) nounwind {
; RV64IA-NOZACAS-NEXT: srlw a0, a4, a0
; RV64IA-NOZACAS-NEXT: ret
;
+; RV32IA-ZACAS-LABEL: atomicrmw_umax_i8_monotonic:
+; RV32IA-ZACAS: # %bb.0:
+; RV32IA-ZACAS-NEXT: andi a2, a0, -4
+; RV32IA-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-ZACAS-NEXT: li a3, 255
+; RV32IA-ZACAS-NEXT: zext.b a1, a1
+; RV32IA-ZACAS-NEXT: sll a3, a3, a0
+; RV32IA-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-ZACAS-NEXT: .LBB55_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-ZACAS-NEXT: lr.w a4, (a2)
+; RV32IA-ZACAS-NEXT: and a6, a4, a3
+; RV32IA-ZACAS-NEXT: mv a5, a4
+; RV32IA-ZACAS-NEXT: bgeu a6, a1, .LBB55_3
+; RV32IA-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB55_1 Depth=1
+; RV32IA-ZACAS-NEXT: xor a5, a4, a1
+; RV32IA-ZACAS-NEXT: and a5, a5, a3
+; RV32IA-ZACAS-NEXT: xor a5, a4, a5
+; RV32IA-ZACAS-NEXT: .LBB55_3: # in Loop: Header=BB55_1 Depth=1
+; RV32IA-ZACAS-NEXT: sc.w a5, a5, (a2)
+; RV32IA-ZACAS-NEXT: bnez a5, .LBB55_1
+; RV32IA-ZACAS-NEXT: # %bb.4:
+; RV32IA-ZACAS-NEXT: srl a0, a4, a0
+; RV32IA-ZACAS-NEXT: ret
+;
; RV64IA-ZACAS-LABEL: atomicrmw_umax_i8_monotonic:
; RV64IA-ZACAS: # %bb.0:
; RV64IA-ZACAS-NEXT: andi a2, a0, -4
@@ -7869,6 +10391,16 @@ define i8 @atomicrmw_umax_i8_monotonic(ptr %a, i8 %b) nounwind {
; RV64IA-ZACAS-NEXT: srlw a0, a4, a0
; RV64IA-ZACAS-NEXT: ret
;
+; RV32IA-WMO-ZABHA-LABEL: atomicrmw_umax_i8_monotonic:
+; RV32IA-WMO-ZABHA: # %bb.0:
+; RV32IA-WMO-ZABHA-NEXT: amomaxu.b a0, a1, (a0)
+; RV32IA-WMO-ZABHA-NEXT: ret
+;
+; RV32IA-TSO-ZABHA-LABEL: atomicrmw_umax_i8_monotonic:
+; RV32IA-TSO-ZABHA: # %bb.0:
+; RV32IA-TSO-ZABHA-NEXT: amomaxu.b a0, a1, (a0)
+; RV32IA-TSO-ZABHA-NEXT: ret
+;
; RV64IA-WMO-ZABHA-LABEL: atomicrmw_umax_i8_monotonic:
; RV64IA-WMO-ZABHA: # %bb.0:
; RV64IA-WMO-ZABHA-NEXT: amomaxu.b a0, a1, (a0)
@@ -7923,53 +10455,53 @@ define i8 @atomicrmw_umax_i8_acquire(ptr %a, i8 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 32
; RV32I-NEXT: ret
;
-; RV32IA-WMO-LABEL: atomicrmw_umax_i8_acquire:
-; RV32IA-WMO: # %bb.0:
-; RV32IA-WMO-NEXT: andi a2, a0, -4
-; RV32IA-WMO-NEXT: slli a0, a0, 3
-; RV32IA-WMO-NEXT: li a3, 255
-; RV32IA-WMO-NEXT: zext.b a1, a1
-; RV32IA-WMO-NEXT: sll a3, a3, a0
-; RV32IA-WMO-NEXT: sll a1, a1, a0
-; RV32IA-WMO-NEXT: .LBB56_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-WMO-NEXT: lr.w.aq a4, (a2)
-; RV32IA-WMO-NEXT: and a6, a4, a3
-; RV32IA-WMO-NEXT: mv a5, a4
-; RV32IA-WMO-NEXT: bgeu a6, a1, .LBB56_3
-; RV32IA-WMO-NEXT: # %bb.2: # in Loop: Header=BB56_1 Depth=1
-; RV32IA-WMO-NEXT: xor a5, a4, a1
-; RV32IA-WMO-NEXT: and a5, a5, a3
-; RV32IA-WMO-NEXT: xor a5, a4, a5
-; RV32IA-WMO-NEXT: .LBB56_3: # in Loop: Header=BB56_1 Depth=1
-; RV32IA-WMO-NEXT: sc.w a5, a5, (a2)
-; RV32IA-WMO-NEXT: bnez a5, .LBB56_1
-; RV32IA-WMO-NEXT: # %bb.4:
-; RV32IA-WMO-NEXT: srl a0, a4, a0
-; RV32IA-WMO-NEXT: ret
+; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_umax_i8_acquire:
+; RV32IA-WMO-NOZACAS: # %bb.0:
+; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-WMO-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-NOZACAS-NEXT: li a3, 255
+; RV32IA-WMO-NOZACAS-NEXT: zext.b a1, a1
+; RV32IA-WMO-NOZACAS-NEXT: sll a3, a3, a0
+; RV32IA-WMO-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-WMO-NOZACAS-NEXT: .LBB56_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-NOZACAS-NEXT: lr.w.aq a4, (a2)
+; RV32IA-WMO-NOZACAS-NEXT: and a6, a4, a3
+; RV32IA-WMO-NOZACAS-NEXT: mv a5, a4
+; RV32IA-WMO-NOZACAS-NEXT: bgeu a6, a1, .LBB56_3
+; RV32IA-WMO-NOZACAS-NEXT: # %bb.2: # in Loop: Header=BB56_1 Depth=1
+; RV32IA-WMO-NOZACAS-NEXT: xor a5, a4, a1
+; RV32IA-WMO-NOZACAS-NEXT: and a5, a5, a3
+; RV32IA-WMO-NOZACAS-NEXT: xor a5, a4, a5
+; RV32IA-WMO-NOZACAS-NEXT: .LBB56_3: # in Loop: Header=BB56_1 Depth=1
+; RV32IA-WMO-NOZACAS-NEXT: sc.w a5, a5, (a2)
+; RV32IA-WMO-NOZACAS-NEXT: bnez a5, .LBB56_1
+; RV32IA-WMO-NOZACAS-NEXT: # %bb.4:
+; RV32IA-WMO-NOZACAS-NEXT: srl a0, a4, a0
+; RV32IA-WMO-NOZACAS-NEXT: ret
;
-; RV32IA-TSO-LABEL: atomicrmw_umax_i8_acquire:
-; RV32IA-TSO: # %bb.0:
-; RV32IA-TSO-NEXT: andi a2, a0, -4
-; RV32IA-TSO-NEXT: slli a0, a0, 3
-; RV32IA-TSO-NEXT: li a3, 255
-; RV32IA-TSO-NEXT: zext.b a1, a1
-; RV32IA-TSO-NEXT: sll a3, a3, a0
-; RV32IA-TSO-NEXT: sll a1, a1, a0
-; RV32IA-TSO-NEXT: .LBB56_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-TSO-NEXT: lr.w a4, (a2)
-; RV32IA-TSO-NEXT: and a6, a4, a3
-; RV32IA-TSO-NEXT: mv a5, a4
-; RV32IA-TSO-NEXT: bgeu a6, a1, .LBB56_3
-; RV32IA-TSO-NEXT: # %bb.2: # in Loop: Header=BB56_1 Depth=1
-; RV32IA-TSO-NEXT: xor a5, a4, a1
-; RV32IA-TSO-NEXT: and a5, a5, a3
-; RV32IA-TSO-NEXT: xor a5, a4, a5
-; RV32IA-TSO-NEXT: .LBB56_3: # in Loop: Header=BB56_1 Depth=1
-; RV32IA-TSO-NEXT: sc.w a5, a5, (a2)
-; RV32IA-TSO-NEXT: bnez a5, .LBB56_1
-; RV32IA-TSO-NEXT: # %bb.4:
-; RV32IA-TSO-NEXT: srl a0, a4, a0
-; RV32IA-TSO-NEXT: ret
+; RV32IA-TSO-NOZACAS-LABEL: atomicrmw_umax_i8_acquire:
+; RV32IA-TSO-NOZACAS: # %bb.0:
+; RV32IA-TSO-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-TSO-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-NOZACAS-NEXT: li a3, 255
+; RV32IA-TSO-NOZACAS-NEXT: zext.b a1, a1
+; RV32IA-TSO-NOZACAS-NEXT: sll a3, a3, a0
+; RV32IA-TSO-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-TSO-NOZACAS-NEXT: .LBB56_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-NOZACAS-NEXT: lr.w a4, (a2)
+; RV32IA-TSO-NOZACAS-NEXT: and a6, a4, a3
+; RV32IA-TSO-NOZACAS-NEXT: mv a5, a4
+; RV32IA-TSO-NOZACAS-NEXT: bgeu a6, a1, .LBB56_3
+; RV32IA-TSO-NOZACAS-NEXT: # %bb.2: # in Loop: Header=BB56_1 Depth=1
+; RV32IA-TSO-NOZACAS-NEXT: xor a5, a4, a1
+; RV32IA-TSO-NOZACAS-NEXT: and a5, a5, a3
+; RV32IA-TSO-NOZACAS-NEXT: xor a5, a4, a5
+; RV32IA-TSO-NOZACAS-NEXT: .LBB56_3: # in Loop: Header=BB56_1 Depth=1
+; RV32IA-TSO-NOZACAS-NEXT: sc.w a5, a5, (a2)
+; RV32IA-TSO-NOZACAS-NEXT: bnez a5, .LBB56_1
+; RV32IA-TSO-NOZACAS-NEXT: # %bb.4:
+; RV32IA-TSO-NOZACAS-NEXT: srl a0, a4, a0
+; RV32IA-TSO-NOZACAS-NEXT: ret
;
; RV64I-LABEL: atomicrmw_umax_i8_acquire:
; RV64I: # %bb.0:
@@ -8059,6 +10591,54 @@ define i8 @atomicrmw_umax_i8_acquire(ptr %a, i8 %b) nounwind {
; RV64IA-TSO-NOZACAS-NEXT: srlw a0, a4, a0
; RV64IA-TSO-NOZACAS-NEXT: ret
;
+; RV32IA-WMO-ZACAS-LABEL: atomicrmw_umax_i8_acquire:
+; RV32IA-WMO-ZACAS: # %bb.0:
+; RV32IA-WMO-ZACAS-NEXT: andi a2, a0, -4
+; RV32IA-WMO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-ZACAS-NEXT: li a3, 255
+; RV32IA-WMO-ZACAS-NEXT: zext.b a1, a1
+; RV32IA-WMO-ZACAS-NEXT: sll a3, a3, a0
+; RV32IA-WMO-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-WMO-ZACAS-NEXT: .LBB56_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-ZACAS-NEXT: lr.w.aq a4, (a2)
+; RV32IA-WMO-ZACAS-NEXT: and a6, a4, a3
+; RV32IA-WMO-ZACAS-NEXT: mv a5, a4
+; RV32IA-WMO-ZACAS-NEXT: bgeu a6, a1, .LBB56_3
+; RV32IA-WMO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB56_1 Depth=1
+; RV32IA-WMO-ZACAS-NEXT: xor a5, a4, a1
+; RV32IA-WMO-ZACAS-NEXT: and a5, a5, a3
+; RV32IA-WMO-ZACAS-NEXT: xor a5, a4, a5
+; RV32IA-WMO-ZACAS-NEXT: .LBB56_3: # in Loop: Header=BB56_1 Depth=1
+; RV32IA-WMO-ZACAS-NEXT: sc.w a5, a5, (a2)
+; RV32IA-WMO-ZACAS-NEXT: bnez a5, .LBB56_1
+; RV32IA-WMO-ZACAS-NEXT: # %bb.4:
+; RV32IA-WMO-ZACAS-NEXT: srl a0, a4, a0
+; RV32IA-WMO-ZACAS-NEXT: ret
+;
+; RV32IA-TSO-ZACAS-LABEL: atomicrmw_umax_i8_acquire:
+; RV32IA-TSO-ZACAS: # %bb.0:
+; RV32IA-TSO-ZACAS-NEXT: andi a2, a0, -4
+; RV32IA-TSO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-ZACAS-NEXT: li a3, 255
+; RV32IA-TSO-ZACAS-NEXT: zext.b a1, a1
+; RV32IA-TSO-ZACAS-NEXT: sll a3, a3, a0
+; RV32IA-TSO-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-TSO-ZACAS-NEXT: .LBB56_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-ZACAS-NEXT: lr.w a4, (a2)
+; RV32IA-TSO-ZACAS-NEXT: and a6, a4, a3
+; RV32IA-TSO-ZACAS-NEXT: mv a5, a4
+; RV32IA-TSO-ZACAS-NEXT: bgeu a6, a1, .LBB56_3
+; RV32IA-TSO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB56_1 Depth=1
+; RV32IA-TSO-ZACAS-NEXT: xor a5, a4, a1
+; RV32IA-TSO-ZACAS-NEXT: and a5, a5, a3
+; RV32IA-TSO-ZACAS-NEXT: xor a5, a4, a5
+; RV32IA-TSO-ZACAS-NEXT: .LBB56_3: # in Loop: Header=BB56_1 Depth=1
+; RV32IA-TSO-ZACAS-NEXT: sc.w a5, a5, (a2)
+; RV32IA-TSO-ZACAS-NEXT: bnez a5, .LBB56_1
+; RV32IA-TSO-ZACAS-NEXT: # %bb.4:
+; RV32IA-TSO-ZACAS-NEXT: srl a0, a4, a0
+; RV32IA-TSO-ZACAS-NEXT: ret
+;
; RV64IA-WMO-ZACAS-LABEL: atomicrmw_umax_i8_acquire:
; RV64IA-WMO-ZACAS: # %bb.0:
; RV64IA-WMO-ZACAS-NEXT: andi a2, a0, -4
@@ -8107,6 +10687,16 @@ define i8 @atomicrmw_umax_i8_acquire(ptr %a, i8 %b) nounwind {
; RV64IA-TSO-ZACAS-NEXT: srlw a0, a4, a0
; RV64IA-TSO-ZACAS-NEXT: ret
;
+; RV32IA-WMO-ZABHA-LABEL: atomicrmw_umax_i8_acquire:
+; RV32IA-WMO-ZABHA: # %bb.0:
+; RV32IA-WMO-ZABHA-NEXT: amomaxu.b.aq a0, a1, (a0)
+; RV32IA-WMO-ZABHA-NEXT: ret
+;
+; RV32IA-TSO-ZABHA-LABEL: atomicrmw_umax_i8_acquire:
+; RV32IA-TSO-ZABHA: # %bb.0:
+; RV32IA-TSO-ZABHA-NEXT: amomaxu.b a0, a1, (a0)
+; RV32IA-TSO-ZABHA-NEXT: ret
+;
; RV64IA-WMO-ZABHA-LABEL: atomicrmw_umax_i8_acquire:
; RV64IA-WMO-ZABHA: # %bb.0:
; RV64IA-WMO-ZABHA-NEXT: amomaxu.b.aq a0, a1, (a0)
@@ -8161,53 +10751,53 @@ define i8 @atomicrmw_umax_i8_release(ptr %a, i8 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 32
; RV32I-NEXT: ret
;
-; RV32IA-WMO-LABEL: atomicrmw_umax_i8_release:
-; RV32IA-WMO: # %bb.0:
-; RV32IA-WMO-NEXT: andi a2, a0, -4
-; RV32IA-WMO-NEXT: slli a0, a0, 3
-; RV32IA-WMO-NEXT: li a3, 255
-; RV32IA-WMO-NEXT: zext.b a1, a1
-; RV32IA-WMO-NEXT: sll a3, a3, a0
-; RV32IA-WMO-NEXT: sll a1, a1, a0
-; RV32IA-WMO-NEXT: .LBB57_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-WMO-NEXT: lr.w a4, (a2)
-; RV32IA-WMO-NEXT: and a6, a4, a3
-; RV32IA-WMO-NEXT: mv a5, a4
-; RV32IA-WMO-NEXT: bgeu a6, a1, .LBB57_3
-; RV32IA-WMO-NEXT: # %bb.2: # in Loop: Header=BB57_1 Depth=1
-; RV32IA-WMO-NEXT: xor a5, a4, a1
-; RV32IA-WMO-NEXT: and a5, a5, a3
-; RV32IA-WMO-NEXT: xor a5, a4, a5
-; RV32IA-WMO-NEXT: .LBB57_3: # in Loop: Header=BB57_1 Depth=1
-; RV32IA-WMO-NEXT: sc.w.rl a5, a5, (a2)
-; RV32IA-WMO-NEXT: bnez a5, .LBB57_1
-; RV32IA-WMO-NEXT: # %bb.4:
-; RV32IA-WMO-NEXT: srl a0, a4, a0
-; RV32IA-WMO-NEXT: ret
+; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_umax_i8_release:
+; RV32IA-WMO-NOZACAS: # %bb.0:
+; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-WMO-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-NOZACAS-NEXT: li a3, 255
+; RV32IA-WMO-NOZACAS-NEXT: zext.b a1, a1
+; RV32IA-WMO-NOZACAS-NEXT: sll a3, a3, a0
+; RV32IA-WMO-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-WMO-NOZACAS-NEXT: .LBB57_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-NOZACAS-NEXT: lr.w a4, (a2)
+; RV32IA-WMO-NOZACAS-NEXT: and a6, a4, a3
+; RV32IA-WMO-NOZACAS-NEXT: mv a5, a4
+; RV32IA-WMO-NOZACAS-NEXT: bgeu a6, a1, .LBB57_3
+; RV32IA-WMO-NOZACAS-NEXT: # %bb.2: # in Loop: Header=BB57_1 Depth=1
+; RV32IA-WMO-NOZACAS-NEXT: xor a5, a4, a1
+; RV32IA-WMO-NOZACAS-NEXT: and a5, a5, a3
+; RV32IA-WMO-NOZACAS-NEXT: xor a5, a4, a5
+; RV32IA-WMO-NOZACAS-NEXT: .LBB57_3: # in Loop: Header=BB57_1 Depth=1
+; RV32IA-WMO-NOZACAS-NEXT: sc.w.rl a5, a5, (a2)
+; RV32IA-WMO-NOZACAS-NEXT: bnez a5, .LBB57_1
+; RV32IA-WMO-NOZACAS-NEXT: # %bb.4:
+; RV32IA-WMO-NOZACAS-NEXT: srl a0, a4, a0
+; RV32IA-WMO-NOZACAS-NEXT: ret
;
-; RV32IA-TSO-LABEL: atomicrmw_umax_i8_release:
-; RV32IA-TSO: # %bb.0:
-; RV32IA-TSO-NEXT: andi a2, a0, -4
-; RV32IA-TSO-NEXT: slli a0, a0, 3
-; RV32IA-TSO-NEXT: li a3, 255
-; RV32IA-TSO-NEXT: zext.b a1, a1
-; RV32IA-TSO-NEXT: sll a3, a3, a0
-; RV32IA-TSO-NEXT: sll a1, a1, a0
-; RV32IA-TSO-NEXT: .LBB57_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-TSO-NEXT: lr.w a4, (a2)
-; RV32IA-TSO-NEXT: and a6, a4, a3
-; RV32IA-TSO-NEXT: mv a5, a4
-; RV32IA-TSO-NEXT: bgeu a6, a1, .LBB57_3
-; RV32IA-TSO-NEXT: # %bb.2: # in Loop: Header=BB57_1 Depth=1
-; RV32IA-TSO-NEXT: xor a5, a4, a1
-; RV32IA-TSO-NEXT: and a5, a5, a3
-; RV32IA-TSO-NEXT: xor a5, a4, a5
-; RV32IA-TSO-NEXT: .LBB57_3: # in Loop: Header=BB57_1 Depth=1
-; RV32IA-TSO-NEXT: sc.w a5, a5, (a2)
-; RV32IA-TSO-NEXT: bnez a5, .LBB57_1
-; RV32IA-TSO-NEXT: # %bb.4:
-; RV32IA-TSO-NEXT: srl a0, a4, a0
-; RV32IA-TSO-NEXT: ret
+; RV32IA-TSO-NOZACAS-LABEL: atomicrmw_umax_i8_release:
+; RV32IA-TSO-NOZACAS: # %bb.0:
+; RV32IA-TSO-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-TSO-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-NOZACAS-NEXT: li a3, 255
+; RV32IA-TSO-NOZACAS-NEXT: zext.b a1, a1
+; RV32IA-TSO-NOZACAS-NEXT: sll a3, a3, a0
+; RV32IA-TSO-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-TSO-NOZACAS-NEXT: .LBB57_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-NOZACAS-NEXT: lr.w a4, (a2)
+; RV32IA-TSO-NOZACAS-NEXT: and a6, a4, a3
+; RV32IA-TSO-NOZACAS-NEXT: mv a5, a4
+; RV32IA-TSO-NOZACAS-NEXT: bgeu a6, a1, .LBB57_3
+; RV32IA-TSO-NOZACAS-NEXT: # %bb.2: # in Loop: Header=BB57_1 Depth=1
+; RV32IA-TSO-NOZACAS-NEXT: xor a5, a4, a1
+; RV32IA-TSO-NOZACAS-NEXT: and a5, a5, a3
+; RV32IA-TSO-NOZACAS-NEXT: xor a5, a4, a5
+; RV32IA-TSO-NOZACAS-NEXT: .LBB57_3: # in Loop: Header=BB57_1 Depth=1
+; RV32IA-TSO-NOZACAS-NEXT: sc.w a5, a5, (a2)
+; RV32IA-TSO-NOZACAS-NEXT: bnez a5, .LBB57_1
+; RV32IA-TSO-NOZACAS-NEXT: # %bb.4:
+; RV32IA-TSO-NOZACAS-NEXT: srl a0, a4, a0
+; RV32IA-TSO-NOZACAS-NEXT: ret
;
; RV64I-LABEL: atomicrmw_umax_i8_release:
; RV64I: # %bb.0:
@@ -8297,6 +10887,54 @@ define i8 @atomicrmw_umax_i8_release(ptr %a, i8 %b) nounwind {
; RV64IA-TSO-NOZACAS-NEXT: srlw a0, a4, a0
; RV64IA-TSO-NOZACAS-NEXT: ret
;
+; RV32IA-WMO-ZACAS-LABEL: atomicrmw_umax_i8_release:
+; RV32IA-WMO-ZACAS: # %bb.0:
+; RV32IA-WMO-ZACAS-NEXT: andi a2, a0, -4
+; RV32IA-WMO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-ZACAS-NEXT: li a3, 255
+; RV32IA-WMO-ZACAS-NEXT: zext.b a1, a1
+; RV32IA-WMO-ZACAS-NEXT: sll a3, a3, a0
+; RV32IA-WMO-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-WMO-ZACAS-NEXT: .LBB57_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-ZACAS-NEXT: lr.w a4, (a2)
+; RV32IA-WMO-ZACAS-NEXT: and a6, a4, a3
+; RV32IA-WMO-ZACAS-NEXT: mv a5, a4
+; RV32IA-WMO-ZACAS-NEXT: bgeu a6, a1, .LBB57_3
+; RV32IA-WMO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB57_1 Depth=1
+; RV32IA-WMO-ZACAS-NEXT: xor a5, a4, a1
+; RV32IA-WMO-ZACAS-NEXT: and a5, a5, a3
+; RV32IA-WMO-ZACAS-NEXT: xor a5, a4, a5
+; RV32IA-WMO-ZACAS-NEXT: .LBB57_3: # in Loop: Header=BB57_1 Depth=1
+; RV32IA-WMO-ZACAS-NEXT: sc.w.rl a5, a5, (a2)
+; RV32IA-WMO-ZACAS-NEXT: bnez a5, .LBB57_1
+; RV32IA-WMO-ZACAS-NEXT: # %bb.4:
+; RV32IA-WMO-ZACAS-NEXT: srl a0, a4, a0
+; RV32IA-WMO-ZACAS-NEXT: ret
+;
+; RV32IA-TSO-ZACAS-LABEL: atomicrmw_umax_i8_release:
+; RV32IA-TSO-ZACAS: # %bb.0:
+; RV32IA-TSO-ZACAS-NEXT: andi a2, a0, -4
+; RV32IA-TSO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-ZACAS-NEXT: li a3, 255
+; RV32IA-TSO-ZACAS-NEXT: zext.b a1, a1
+; RV32IA-TSO-ZACAS-NEXT: sll a3, a3, a0
+; RV32IA-TSO-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-TSO-ZACAS-NEXT: .LBB57_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-ZACAS-NEXT: lr.w a4, (a2)
+; RV32IA-TSO-ZACAS-NEXT: and a6, a4, a3
+; RV32IA-TSO-ZACAS-NEXT: mv a5, a4
+; RV32IA-TSO-ZACAS-NEXT: bgeu a6, a1, .LBB57_3
+; RV32IA-TSO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB57_1 Depth=1
+; RV32IA-TSO-ZACAS-NEXT: xor a5, a4, a1
+; RV32IA-TSO-ZACAS-NEXT: and a5, a5, a3
+; RV32IA-TSO-ZACAS-NEXT: xor a5, a4, a5
+; RV32IA-TSO-ZACAS-NEXT: .LBB57_3: # in Loop: Header=BB57_1 Depth=1
+; RV32IA-TSO-ZACAS-NEXT: sc.w a5, a5, (a2)
+; RV32IA-TSO-ZACAS-NEXT: bnez a5, .LBB57_1
+; RV32IA-TSO-ZACAS-NEXT: # %bb.4:
+; RV32IA-TSO-ZACAS-NEXT: srl a0, a4, a0
+; RV32IA-TSO-ZACAS-NEXT: ret
+;
; RV64IA-WMO-ZACAS-LABEL: atomicrmw_umax_i8_release:
; RV64IA-WMO-ZACAS: # %bb.0:
; RV64IA-WMO-ZACAS-NEXT: andi a2, a0, -4
@@ -8345,6 +10983,16 @@ define i8 @atomicrmw_umax_i8_release(ptr %a, i8 %b) nounwind {
; RV64IA-TSO-ZACAS-NEXT: srlw a0, a4, a0
; RV64IA-TSO-ZACAS-NEXT: ret
;
+; RV32IA-WMO-ZABHA-LABEL: atomicrmw_umax_i8_release:
+; RV32IA-WMO-ZABHA: # %bb.0:
+; RV32IA-WMO-ZABHA-NEXT: amomaxu.b.rl a0, a1, (a0)
+; RV32IA-WMO-ZABHA-NEXT: ret
+;
+; RV32IA-TSO-ZABHA-LABEL: atomicrmw_umax_i8_release:
+; RV32IA-TSO-ZABHA: # %bb.0:
+; RV32IA-TSO-ZABHA-NEXT: amomaxu.b a0, a1, (a0)
+; RV32IA-TSO-ZABHA-NEXT: ret
+;
; RV64IA-WMO-ZABHA-LABEL: atomicrmw_umax_i8_release:
; RV64IA-WMO-ZABHA: # %bb.0:
; RV64IA-WMO-ZABHA-NEXT: amomaxu.b.rl a0, a1, (a0)
@@ -8399,53 +11047,53 @@ define i8 @atomicrmw_umax_i8_acq_rel(ptr %a, i8 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 32
; RV32I-NEXT: ret
;
-; RV32IA-WMO-LABEL: atomicrmw_umax_i8_acq_rel:
-; RV32IA-WMO: # %bb.0:
-; RV32IA-WMO-NEXT: andi a2, a0, -4
-; RV32IA-WMO-NEXT: slli a0, a0, 3
-; RV32IA-WMO-NEXT: li a3, 255
-; RV32IA-WMO-NEXT: zext.b a1, a1
-; RV32IA-WMO-NEXT: sll a3, a3, a0
-; RV32IA-WMO-NEXT: sll a1, a1, a0
-; RV32IA-WMO-NEXT: .LBB58_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-WMO-NEXT: lr.w.aq a4, (a2)
-; RV32IA-WMO-NEXT: and a6, a4, a3
-; RV32IA-WMO-NEXT: mv a5, a4
-; RV32IA-WMO-NEXT: bgeu a6, a1, .LBB58_3
-; RV32IA-WMO-NEXT: # %bb.2: # in Loop: Header=BB58_1 Depth=1
-; RV32IA-WMO-NEXT: xor a5, a4, a1
-; RV32IA-WMO-NEXT: and a5, a5, a3
-; RV32IA-WMO-NEXT: xor a5, a4, a5
-; RV32IA-WMO-NEXT: .LBB58_3: # in Loop: Header=BB58_1 Depth=1
-; RV32IA-WMO-NEXT: sc.w.rl a5, a5, (a2)
-; RV32IA-WMO-NEXT: bnez a5, .LBB58_1
-; RV32IA-WMO-NEXT: # %bb.4:
-; RV32IA-WMO-NEXT: srl a0, a4, a0
-; RV32IA-WMO-NEXT: ret
+; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_umax_i8_acq_rel:
+; RV32IA-WMO-NOZACAS: # %bb.0:
+; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-WMO-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-NOZACAS-NEXT: li a3, 255
+; RV32IA-WMO-NOZACAS-NEXT: zext.b a1, a1
+; RV32IA-WMO-NOZACAS-NEXT: sll a3, a3, a0
+; RV32IA-WMO-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-WMO-NOZACAS-NEXT: .LBB58_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-NOZACAS-NEXT: lr.w.aq a4, (a2)
+; RV32IA-WMO-NOZACAS-NEXT: and a6, a4, a3
+; RV32IA-WMO-NOZACAS-NEXT: mv a5, a4
+; RV32IA-WMO-NOZACAS-NEXT: bgeu a6, a1, .LBB58_3
+; RV32IA-WMO-NOZACAS-NEXT: # %bb.2: # in Loop: Header=BB58_1 Depth=1
+; RV32IA-WMO-NOZACAS-NEXT: xor a5, a4, a1
+; RV32IA-WMO-NOZACAS-NEXT: and a5, a5, a3
+; RV32IA-WMO-NOZACAS-NEXT: xor a5, a4, a5
+; RV32IA-WMO-NOZACAS-NEXT: .LBB58_3: # in Loop: Header=BB58_1 Depth=1
+; RV32IA-WMO-NOZACAS-NEXT: sc.w.rl a5, a5, (a2)
+; RV32IA-WMO-NOZACAS-NEXT: bnez a5, .LBB58_1
+; RV32IA-WMO-NOZACAS-NEXT: # %bb.4:
+; RV32IA-WMO-NOZACAS-NEXT: srl a0, a4, a0
+; RV32IA-WMO-NOZACAS-NEXT: ret
;
-; RV32IA-TSO-LABEL: atomicrmw_umax_i8_acq_rel:
-; RV32IA-TSO: # %bb.0:
-; RV32IA-TSO-NEXT: andi a2, a0, -4
-; RV32IA-TSO-NEXT: slli a0, a0, 3
-; RV32IA-TSO-NEXT: li a3, 255
-; RV32IA-TSO-NEXT: zext.b a1, a1
-; RV32IA-TSO-NEXT: sll a3, a3, a0
-; RV32IA-TSO-NEXT: sll a1, a1, a0
-; RV32IA-TSO-NEXT: .LBB58_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-TSO-NEXT: lr.w a4, (a2)
-; RV32IA-TSO-NEXT: and a6, a4, a3
-; RV32IA-TSO-NEXT: mv a5, a4
-; RV32IA-TSO-NEXT: bgeu a6, a1, .LBB58_3
-; RV32IA-TSO-NEXT: # %bb.2: # in Loop: Header=BB58_1 Depth=1
-; RV32IA-TSO-NEXT: xor a5, a4, a1
-; RV32IA-TSO-NEXT: and a5, a5, a3
-; RV32IA-TSO-NEXT: xor a5, a4, a5
-; RV32IA-TSO-NEXT: .LBB58_3: # in Loop: Header=BB58_1 Depth=1
-; RV32IA-TSO-NEXT: sc.w a5, a5, (a2)
-; RV32IA-TSO-NEXT: bnez a5, .LBB58_1
-; RV32IA-TSO-NEXT: # %bb.4:
-; RV32IA-TSO-NEXT: srl a0, a4, a0
-; RV32IA-TSO-NEXT: ret
+; RV32IA-TSO-NOZACAS-LABEL: atomicrmw_umax_i8_acq_rel:
+; RV32IA-TSO-NOZACAS: # %bb.0:
+; RV32IA-TSO-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-TSO-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-NOZACAS-NEXT: li a3, 255
+; RV32IA-TSO-NOZACAS-NEXT: zext.b a1, a1
+; RV32IA-TSO-NOZACAS-NEXT: sll a3, a3, a0
+; RV32IA-TSO-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-TSO-NOZACAS-NEXT: .LBB58_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-NOZACAS-NEXT: lr.w a4, (a2)
+; RV32IA-TSO-NOZACAS-NEXT: and a6, a4, a3
+; RV32IA-TSO-NOZACAS-NEXT: mv a5, a4
+; RV32IA-TSO-NOZACAS-NEXT: bgeu a6, a1, .LBB58_3
+; RV32IA-TSO-NOZACAS-NEXT: # %bb.2: # in Loop: Header=BB58_1 Depth=1
+; RV32IA-TSO-NOZACAS-NEXT: xor a5, a4, a1
+; RV32IA-TSO-NOZACAS-NEXT: and a5, a5, a3
+; RV32IA-TSO-NOZACAS-NEXT: xor a5, a4, a5
+; RV32IA-TSO-NOZACAS-NEXT: .LBB58_3: # in Loop: Header=BB58_1 Depth=1
+; RV32IA-TSO-NOZACAS-NEXT: sc.w a5, a5, (a2)
+; RV32IA-TSO-NOZACAS-NEXT: bnez a5, .LBB58_1
+; RV32IA-TSO-NOZACAS-NEXT: # %bb.4:
+; RV32IA-TSO-NOZACAS-NEXT: srl a0, a4, a0
+; RV32IA-TSO-NOZACAS-NEXT: ret
;
; RV64I-LABEL: atomicrmw_umax_i8_acq_rel:
; RV64I: # %bb.0:
@@ -8535,6 +11183,54 @@ define i8 @atomicrmw_umax_i8_acq_rel(ptr %a, i8 %b) nounwind {
; RV64IA-TSO-NOZACAS-NEXT: srlw a0, a4, a0
; RV64IA-TSO-NOZACAS-NEXT: ret
;
+; RV32IA-WMO-ZACAS-LABEL: atomicrmw_umax_i8_acq_rel:
+; RV32IA-WMO-ZACAS: # %bb.0:
+; RV32IA-WMO-ZACAS-NEXT: andi a2, a0, -4
+; RV32IA-WMO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-ZACAS-NEXT: li a3, 255
+; RV32IA-WMO-ZACAS-NEXT: zext.b a1, a1
+; RV32IA-WMO-ZACAS-NEXT: sll a3, a3, a0
+; RV32IA-WMO-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-WMO-ZACAS-NEXT: .LBB58_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-ZACAS-NEXT: lr.w.aq a4, (a2)
+; RV32IA-WMO-ZACAS-NEXT: and a6, a4, a3
+; RV32IA-WMO-ZACAS-NEXT: mv a5, a4
+; RV32IA-WMO-ZACAS-NEXT: bgeu a6, a1, .LBB58_3
+; RV32IA-WMO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB58_1 Depth=1
+; RV32IA-WMO-ZACAS-NEXT: xor a5, a4, a1
+; RV32IA-WMO-ZACAS-NEXT: and a5, a5, a3
+; RV32IA-WMO-ZACAS-NEXT: xor a5, a4, a5
+; RV32IA-WMO-ZACAS-NEXT: .LBB58_3: # in Loop: Header=BB58_1 Depth=1
+; RV32IA-WMO-ZACAS-NEXT: sc.w.rl a5, a5, (a2)
+; RV32IA-WMO-ZACAS-NEXT: bnez a5, .LBB58_1
+; RV32IA-WMO-ZACAS-NEXT: # %bb.4:
+; RV32IA-WMO-ZACAS-NEXT: srl a0, a4, a0
+; RV32IA-WMO-ZACAS-NEXT: ret
+;
+; RV32IA-TSO-ZACAS-LABEL: atomicrmw_umax_i8_acq_rel:
+; RV32IA-TSO-ZACAS: # %bb.0:
+; RV32IA-TSO-ZACAS-NEXT: andi a2, a0, -4
+; RV32IA-TSO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-ZACAS-NEXT: li a3, 255
+; RV32IA-TSO-ZACAS-NEXT: zext.b a1, a1
+; RV32IA-TSO-ZACAS-NEXT: sll a3, a3, a0
+; RV32IA-TSO-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-TSO-ZACAS-NEXT: .LBB58_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-ZACAS-NEXT: lr.w a4, (a2)
+; RV32IA-TSO-ZACAS-NEXT: and a6, a4, a3
+; RV32IA-TSO-ZACAS-NEXT: mv a5, a4
+; RV32IA-TSO-ZACAS-NEXT: bgeu a6, a1, .LBB58_3
+; RV32IA-TSO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB58_1 Depth=1
+; RV32IA-TSO-ZACAS-NEXT: xor a5, a4, a1
+; RV32IA-TSO-ZACAS-NEXT: and a5, a5, a3
+; RV32IA-TSO-ZACAS-NEXT: xor a5, a4, a5
+; RV32IA-TSO-ZACAS-NEXT: .LBB58_3: # in Loop: Header=BB58_1 Depth=1
+; RV32IA-TSO-ZACAS-NEXT: sc.w a5, a5, (a2)
+; RV32IA-TSO-ZACAS-NEXT: bnez a5, .LBB58_1
+; RV32IA-TSO-ZACAS-NEXT: # %bb.4:
+; RV32IA-TSO-ZACAS-NEXT: srl a0, a4, a0
+; RV32IA-TSO-ZACAS-NEXT: ret
+;
; RV64IA-WMO-ZACAS-LABEL: atomicrmw_umax_i8_acq_rel:
; RV64IA-WMO-ZACAS: # %bb.0:
; RV64IA-WMO-ZACAS-NEXT: andi a2, a0, -4
@@ -8583,6 +11279,16 @@ define i8 @atomicrmw_umax_i8_acq_rel(ptr %a, i8 %b) nounwind {
; RV64IA-TSO-ZACAS-NEXT: srlw a0, a4, a0
; RV64IA-TSO-ZACAS-NEXT: ret
;
+; RV32IA-WMO-ZABHA-LABEL: atomicrmw_umax_i8_acq_rel:
+; RV32IA-WMO-ZABHA: # %bb.0:
+; RV32IA-WMO-ZABHA-NEXT: amomaxu.b.aqrl a0, a1, (a0)
+; RV32IA-WMO-ZABHA-NEXT: ret
+;
+; RV32IA-TSO-ZABHA-LABEL: atomicrmw_umax_i8_acq_rel:
+; RV32IA-TSO-ZABHA: # %bb.0:
+; RV32IA-TSO-ZABHA-NEXT: amomaxu.b a0, a1, (a0)
+; RV32IA-TSO-ZABHA-NEXT: ret
+;
; RV64IA-WMO-ZABHA-LABEL: atomicrmw_umax_i8_acq_rel:
; RV64IA-WMO-ZABHA: # %bb.0:
; RV64IA-WMO-ZABHA-NEXT: amomaxu.b.aqrl a0, a1, (a0)
@@ -8637,29 +11343,29 @@ define i8 @atomicrmw_umax_i8_seq_cst(ptr %a, i8 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 32
; RV32I-NEXT: ret
;
-; RV32IA-LABEL: atomicrmw_umax_i8_seq_cst:
-; RV32IA: # %bb.0:
-; RV32IA-NEXT: andi a2, a0, -4
-; RV32IA-NEXT: slli a0, a0, 3
-; RV32IA-NEXT: li a3, 255
-; RV32IA-NEXT: zext.b a1, a1
-; RV32IA-NEXT: sll a3, a3, a0
-; RV32IA-NEXT: sll a1, a1, a0
-; RV32IA-NEXT: .LBB59_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-NEXT: lr.w.aqrl a4, (a2)
-; RV32IA-NEXT: and a6, a4, a3
-; RV32IA-NEXT: mv a5, a4
-; RV32IA-NEXT: bgeu a6, a1, .LBB59_3
-; RV32IA-NEXT: # %bb.2: # in Loop: Header=BB59_1 Depth=1
-; RV32IA-NEXT: xor a5, a4, a1
-; RV32IA-NEXT: and a5, a5, a3
-; RV32IA-NEXT: xor a5, a4, a5
-; RV32IA-NEXT: .LBB59_3: # in Loop: Header=BB59_1 Depth=1
-; RV32IA-NEXT: sc.w.rl a5, a5, (a2)
-; RV32IA-NEXT: bnez a5, .LBB59_1
-; RV32IA-NEXT: # %bb.4:
-; RV32IA-NEXT: srl a0, a4, a0
-; RV32IA-NEXT: ret
+; RV32IA-NOZACAS-LABEL: atomicrmw_umax_i8_seq_cst:
+; RV32IA-NOZACAS: # %bb.0:
+; RV32IA-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-NOZACAS-NEXT: li a3, 255
+; RV32IA-NOZACAS-NEXT: zext.b a1, a1
+; RV32IA-NOZACAS-NEXT: sll a3, a3, a0
+; RV32IA-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-NOZACAS-NEXT: .LBB59_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-NOZACAS-NEXT: lr.w.aqrl a4, (a2)
+; RV32IA-NOZACAS-NEXT: and a6, a4, a3
+; RV32IA-NOZACAS-NEXT: mv a5, a4
+; RV32IA-NOZACAS-NEXT: bgeu a6, a1, .LBB59_3
+; RV32IA-NOZACAS-NEXT: # %bb.2: # in Loop: Header=BB59_1 Depth=1
+; RV32IA-NOZACAS-NEXT: xor a5, a4, a1
+; RV32IA-NOZACAS-NEXT: and a5, a5, a3
+; RV32IA-NOZACAS-NEXT: xor a5, a4, a5
+; RV32IA-NOZACAS-NEXT: .LBB59_3: # in Loop: Header=BB59_1 Depth=1
+; RV32IA-NOZACAS-NEXT: sc.w.rl a5, a5, (a2)
+; RV32IA-NOZACAS-NEXT: bnez a5, .LBB59_1
+; RV32IA-NOZACAS-NEXT: # %bb.4:
+; RV32IA-NOZACAS-NEXT: srl a0, a4, a0
+; RV32IA-NOZACAS-NEXT: ret
;
; RV64I-LABEL: atomicrmw_umax_i8_seq_cst:
; RV64I: # %bb.0:
@@ -8725,6 +11431,30 @@ define i8 @atomicrmw_umax_i8_seq_cst(ptr %a, i8 %b) nounwind {
; RV64IA-NOZACAS-NEXT: srlw a0, a4, a0
; RV64IA-NOZACAS-NEXT: ret
;
+; RV32IA-ZACAS-LABEL: atomicrmw_umax_i8_seq_cst:
+; RV32IA-ZACAS: # %bb.0:
+; RV32IA-ZACAS-NEXT: andi a2, a0, -4
+; RV32IA-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-ZACAS-NEXT: li a3, 255
+; RV32IA-ZACAS-NEXT: zext.b a1, a1
+; RV32IA-ZACAS-NEXT: sll a3, a3, a0
+; RV32IA-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-ZACAS-NEXT: .LBB59_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-ZACAS-NEXT: lr.w.aqrl a4, (a2)
+; RV32IA-ZACAS-NEXT: and a6, a4, a3
+; RV32IA-ZACAS-NEXT: mv a5, a4
+; RV32IA-ZACAS-NEXT: bgeu a6, a1, .LBB59_3
+; RV32IA-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB59_1 Depth=1
+; RV32IA-ZACAS-NEXT: xor a5, a4, a1
+; RV32IA-ZACAS-NEXT: and a5, a5, a3
+; RV32IA-ZACAS-NEXT: xor a5, a4, a5
+; RV32IA-ZACAS-NEXT: .LBB59_3: # in Loop: Header=BB59_1 Depth=1
+; RV32IA-ZACAS-NEXT: sc.w.rl a5, a5, (a2)
+; RV32IA-ZACAS-NEXT: bnez a5, .LBB59_1
+; RV32IA-ZACAS-NEXT: # %bb.4:
+; RV32IA-ZACAS-NEXT: srl a0, a4, a0
+; RV32IA-ZACAS-NEXT: ret
+;
; RV64IA-ZACAS-LABEL: atomicrmw_umax_i8_seq_cst:
; RV64IA-ZACAS: # %bb.0:
; RV64IA-ZACAS-NEXT: andi a2, a0, -4
@@ -8749,6 +11479,16 @@ define i8 @atomicrmw_umax_i8_seq_cst(ptr %a, i8 %b) nounwind {
; RV64IA-ZACAS-NEXT: srlw a0, a4, a0
; RV64IA-ZACAS-NEXT: ret
;
+; RV32IA-WMO-ZABHA-LABEL: atomicrmw_umax_i8_seq_cst:
+; RV32IA-WMO-ZABHA: # %bb.0:
+; RV32IA-WMO-ZABHA-NEXT: amomaxu.b.aqrl a0, a1, (a0)
+; RV32IA-WMO-ZABHA-NEXT: ret
+;
+; RV32IA-TSO-ZABHA-LABEL: atomicrmw_umax_i8_seq_cst:
+; RV32IA-TSO-ZABHA: # %bb.0:
+; RV32IA-TSO-ZABHA-NEXT: amomaxu.b a0, a1, (a0)
+; RV32IA-TSO-ZABHA-NEXT: ret
+;
; RV64IA-WMO-ZABHA-LABEL: atomicrmw_umax_i8_seq_cst:
; RV64IA-WMO-ZABHA: # %bb.0:
; RV64IA-WMO-ZABHA-NEXT: amomaxu.b.aqrl a0, a1, (a0)
@@ -8803,29 +11543,29 @@ define i8 @atomicrmw_umin_i8_monotonic(ptr %a, i8 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 32
; RV32I-NEXT: ret
;
-; RV32IA-LABEL: atomicrmw_umin_i8_monotonic:
-; RV32IA: # %bb.0:
-; RV32IA-NEXT: andi a2, a0, -4
-; RV32IA-NEXT: slli a0, a0, 3
-; RV32IA-NEXT: li a3, 255
-; RV32IA-NEXT: zext.b a1, a1
-; RV32IA-NEXT: sll a3, a3, a0
-; RV32IA-NEXT: sll a1, a1, a0
-; RV32IA-NEXT: .LBB60_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-NEXT: lr.w a4, (a2)
-; RV32IA-NEXT: and a6, a4, a3
-; RV32IA-NEXT: mv a5, a4
-; RV32IA-NEXT: bgeu a1, a6, .LBB60_3
-; RV32IA-NEXT: # %bb.2: # in Loop: Header=BB60_1 Depth=1
-; RV32IA-NEXT: xor a5, a4, a1
-; RV32IA-NEXT: and a5, a5, a3
-; RV32IA-NEXT: xor a5, a4, a5
-; RV32IA-NEXT: .LBB60_3: # in Loop: Header=BB60_1 Depth=1
-; RV32IA-NEXT: sc.w a5, a5, (a2)
-; RV32IA-NEXT: bnez a5, .LBB60_1
-; RV32IA-NEXT: # %bb.4:
-; RV32IA-NEXT: srl a0, a4, a0
-; RV32IA-NEXT: ret
+; RV32IA-NOZACAS-LABEL: atomicrmw_umin_i8_monotonic:
+; RV32IA-NOZACAS: # %bb.0:
+; RV32IA-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-NOZACAS-NEXT: li a3, 255
+; RV32IA-NOZACAS-NEXT: zext.b a1, a1
+; RV32IA-NOZACAS-NEXT: sll a3, a3, a0
+; RV32IA-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-NOZACAS-NEXT: .LBB60_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-NOZACAS-NEXT: lr.w a4, (a2)
+; RV32IA-NOZACAS-NEXT: and a6, a4, a3
+; RV32IA-NOZACAS-NEXT: mv a5, a4
+; RV32IA-NOZACAS-NEXT: bgeu a1, a6, .LBB60_3
+; RV32IA-NOZACAS-NEXT: # %bb.2: # in Loop: Header=BB60_1 Depth=1
+; RV32IA-NOZACAS-NEXT: xor a5, a4, a1
+; RV32IA-NOZACAS-NEXT: and a5, a5, a3
+; RV32IA-NOZACAS-NEXT: xor a5, a4, a5
+; RV32IA-NOZACAS-NEXT: .LBB60_3: # in Loop: Header=BB60_1 Depth=1
+; RV32IA-NOZACAS-NEXT: sc.w a5, a5, (a2)
+; RV32IA-NOZACAS-NEXT: bnez a5, .LBB60_1
+; RV32IA-NOZACAS-NEXT: # %bb.4:
+; RV32IA-NOZACAS-NEXT: srl a0, a4, a0
+; RV32IA-NOZACAS-NEXT: ret
;
; RV64I-LABEL: atomicrmw_umin_i8_monotonic:
; RV64I: # %bb.0:
@@ -8891,6 +11631,30 @@ define i8 @atomicrmw_umin_i8_monotonic(ptr %a, i8 %b) nounwind {
; RV64IA-NOZACAS-NEXT: srlw a0, a4, a0
; RV64IA-NOZACAS-NEXT: ret
;
+; RV32IA-ZACAS-LABEL: atomicrmw_umin_i8_monotonic:
+; RV32IA-ZACAS: # %bb.0:
+; RV32IA-ZACAS-NEXT: andi a2, a0, -4
+; RV32IA-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-ZACAS-NEXT: li a3, 255
+; RV32IA-ZACAS-NEXT: zext.b a1, a1
+; RV32IA-ZACAS-NEXT: sll a3, a3, a0
+; RV32IA-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-ZACAS-NEXT: .LBB60_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-ZACAS-NEXT: lr.w a4, (a2)
+; RV32IA-ZACAS-NEXT: and a6, a4, a3
+; RV32IA-ZACAS-NEXT: mv a5, a4
+; RV32IA-ZACAS-NEXT: bgeu a1, a6, .LBB60_3
+; RV32IA-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB60_1 Depth=1
+; RV32IA-ZACAS-NEXT: xor a5, a4, a1
+; RV32IA-ZACAS-NEXT: and a5, a5, a3
+; RV32IA-ZACAS-NEXT: xor a5, a4, a5
+; RV32IA-ZACAS-NEXT: .LBB60_3: # in Loop: Header=BB60_1 Depth=1
+; RV32IA-ZACAS-NEXT: sc.w a5, a5, (a2)
+; RV32IA-ZACAS-NEXT: bnez a5, .LBB60_1
+; RV32IA-ZACAS-NEXT: # %bb.4:
+; RV32IA-ZACAS-NEXT: srl a0, a4, a0
+; RV32IA-ZACAS-NEXT: ret
+;
; RV64IA-ZACAS-LABEL: atomicrmw_umin_i8_monotonic:
; RV64IA-ZACAS: # %bb.0:
; RV64IA-ZACAS-NEXT: andi a2, a0, -4
@@ -8915,6 +11679,16 @@ define i8 @atomicrmw_umin_i8_monotonic(ptr %a, i8 %b) nounwind {
; RV64IA-ZACAS-NEXT: srlw a0, a4, a0
; RV64IA-ZACAS-NEXT: ret
;
+; RV32IA-WMO-ZABHA-LABEL: atomicrmw_umin_i8_monotonic:
+; RV32IA-WMO-ZABHA: # %bb.0:
+; RV32IA-WMO-ZABHA-NEXT: amominu.b a0, a1, (a0)
+; RV32IA-WMO-ZABHA-NEXT: ret
+;
+; RV32IA-TSO-ZABHA-LABEL: atomicrmw_umin_i8_monotonic:
+; RV32IA-TSO-ZABHA: # %bb.0:
+; RV32IA-TSO-ZABHA-NEXT: amominu.b a0, a1, (a0)
+; RV32IA-TSO-ZABHA-NEXT: ret
+;
; RV64IA-WMO-ZABHA-LABEL: atomicrmw_umin_i8_monotonic:
; RV64IA-WMO-ZABHA: # %bb.0:
; RV64IA-WMO-ZABHA-NEXT: amominu.b a0, a1, (a0)
@@ -8969,53 +11743,53 @@ define i8 @atomicrmw_umin_i8_acquire(ptr %a, i8 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 32
; RV32I-NEXT: ret
;
-; RV32IA-WMO-LABEL: atomicrmw_umin_i8_acquire:
-; RV32IA-WMO: # %bb.0:
-; RV32IA-WMO-NEXT: andi a2, a0, -4
-; RV32IA-WMO-NEXT: slli a0, a0, 3
-; RV32IA-WMO-NEXT: li a3, 255
-; RV32IA-WMO-NEXT: zext.b a1, a1
-; RV32IA-WMO-NEXT: sll a3, a3, a0
-; RV32IA-WMO-NEXT: sll a1, a1, a0
-; RV32IA-WMO-NEXT: .LBB61_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-WMO-NEXT: lr.w.aq a4, (a2)
-; RV32IA-WMO-NEXT: and a6, a4, a3
-; RV32IA-WMO-NEXT: mv a5, a4
-; RV32IA-WMO-NEXT: bgeu a1, a6, .LBB61_3
-; RV32IA-WMO-NEXT: # %bb.2: # in Loop: Header=BB61_1 Depth=1
-; RV32IA-WMO-NEXT: xor a5, a4, a1
-; RV32IA-WMO-NEXT: and a5, a5, a3
-; RV32IA-WMO-NEXT: xor a5, a4, a5
-; RV32IA-WMO-NEXT: .LBB61_3: # in Loop: Header=BB61_1 Depth=1
-; RV32IA-WMO-NEXT: sc.w a5, a5, (a2)
-; RV32IA-WMO-NEXT: bnez a5, .LBB61_1
-; RV32IA-WMO-NEXT: # %bb.4:
-; RV32IA-WMO-NEXT: srl a0, a4, a0
-; RV32IA-WMO-NEXT: ret
+; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_umin_i8_acquire:
+; RV32IA-WMO-NOZACAS: # %bb.0:
+; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-WMO-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-NOZACAS-NEXT: li a3, 255
+; RV32IA-WMO-NOZACAS-NEXT: zext.b a1, a1
+; RV32IA-WMO-NOZACAS-NEXT: sll a3, a3, a0
+; RV32IA-WMO-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-WMO-NOZACAS-NEXT: .LBB61_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-NOZACAS-NEXT: lr.w.aq a4, (a2)
+; RV32IA-WMO-NOZACAS-NEXT: and a6, a4, a3
+; RV32IA-WMO-NOZACAS-NEXT: mv a5, a4
+; RV32IA-WMO-NOZACAS-NEXT: bgeu a1, a6, .LBB61_3
+; RV32IA-WMO-NOZACAS-NEXT: # %bb.2: # in Loop: Header=BB61_1 Depth=1
+; RV32IA-WMO-NOZACAS-NEXT: xor a5, a4, a1
+; RV32IA-WMO-NOZACAS-NEXT: and a5, a5, a3
+; RV32IA-WMO-NOZACAS-NEXT: xor a5, a4, a5
+; RV32IA-WMO-NOZACAS-NEXT: .LBB61_3: # in Loop: Header=BB61_1 Depth=1
+; RV32IA-WMO-NOZACAS-NEXT: sc.w a5, a5, (a2)
+; RV32IA-WMO-NOZACAS-NEXT: bnez a5, .LBB61_1
+; RV32IA-WMO-NOZACAS-NEXT: # %bb.4:
+; RV32IA-WMO-NOZACAS-NEXT: srl a0, a4, a0
+; RV32IA-WMO-NOZACAS-NEXT: ret
;
-; RV32IA-TSO-LABEL: atomicrmw_umin_i8_acquire:
-; RV32IA-TSO: # %bb.0:
-; RV32IA-TSO-NEXT: andi a2, a0, -4
-; RV32IA-TSO-NEXT: slli a0, a0, 3
-; RV32IA-TSO-NEXT: li a3, 255
-; RV32IA-TSO-NEXT: zext.b a1, a1
-; RV32IA-TSO-NEXT: sll a3, a3, a0
-; RV32IA-TSO-NEXT: sll a1, a1, a0
-; RV32IA-TSO-NEXT: .LBB61_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-TSO-NEXT: lr.w a4, (a2)
-; RV32IA-TSO-NEXT: and a6, a4, a3
-; RV32IA-TSO-NEXT: mv a5, a4
-; RV32IA-TSO-NEXT: bgeu a1, a6, .LBB61_3
-; RV32IA-TSO-NEXT: # %bb.2: # in Loop: Header=BB61_1 Depth=1
-; RV32IA-TSO-NEXT: xor a5, a4, a1
-; RV32IA-TSO-NEXT: and a5, a5, a3
-; RV32IA-TSO-NEXT: xor a5, a4, a5
-; RV32IA-TSO-NEXT: .LBB61_3: # in Loop: Header=BB61_1 Depth=1
-; RV32IA-TSO-NEXT: sc.w a5, a5, (a2)
-; RV32IA-TSO-NEXT: bnez a5, .LBB61_1
-; RV32IA-TSO-NEXT: # %bb.4:
-; RV32IA-TSO-NEXT: srl a0, a4, a0
-; RV32IA-TSO-NEXT: ret
+; RV32IA-TSO-NOZACAS-LABEL: atomicrmw_umin_i8_acquire:
+; RV32IA-TSO-NOZACAS: # %bb.0:
+; RV32IA-TSO-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-TSO-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-NOZACAS-NEXT: li a3, 255
+; RV32IA-TSO-NOZACAS-NEXT: zext.b a1, a1
+; RV32IA-TSO-NOZACAS-NEXT: sll a3, a3, a0
+; RV32IA-TSO-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-TSO-NOZACAS-NEXT: .LBB61_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-NOZACAS-NEXT: lr.w a4, (a2)
+; RV32IA-TSO-NOZACAS-NEXT: and a6, a4, a3
+; RV32IA-TSO-NOZACAS-NEXT: mv a5, a4
+; RV32IA-TSO-NOZACAS-NEXT: bgeu a1, a6, .LBB61_3
+; RV32IA-TSO-NOZACAS-NEXT: # %bb.2: # in Loop: Header=BB61_1 Depth=1
+; RV32IA-TSO-NOZACAS-NEXT: xor a5, a4, a1
+; RV32IA-TSO-NOZACAS-NEXT: and a5, a5, a3
+; RV32IA-TSO-NOZACAS-NEXT: xor a5, a4, a5
+; RV32IA-TSO-NOZACAS-NEXT: .LBB61_3: # in Loop: Header=BB61_1 Depth=1
+; RV32IA-TSO-NOZACAS-NEXT: sc.w a5, a5, (a2)
+; RV32IA-TSO-NOZACAS-NEXT: bnez a5, .LBB61_1
+; RV32IA-TSO-NOZACAS-NEXT: # %bb.4:
+; RV32IA-TSO-NOZACAS-NEXT: srl a0, a4, a0
+; RV32IA-TSO-NOZACAS-NEXT: ret
;
; RV64I-LABEL: atomicrmw_umin_i8_acquire:
; RV64I: # %bb.0:
@@ -9105,6 +11879,54 @@ define i8 @atomicrmw_umin_i8_acquire(ptr %a, i8 %b) nounwind {
; RV64IA-TSO-NOZACAS-NEXT: srlw a0, a4, a0
; RV64IA-TSO-NOZACAS-NEXT: ret
;
+; RV32IA-WMO-ZACAS-LABEL: atomicrmw_umin_i8_acquire:
+; RV32IA-WMO-ZACAS: # %bb.0:
+; RV32IA-WMO-ZACAS-NEXT: andi a2, a0, -4
+; RV32IA-WMO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-ZACAS-NEXT: li a3, 255
+; RV32IA-WMO-ZACAS-NEXT: zext.b a1, a1
+; RV32IA-WMO-ZACAS-NEXT: sll a3, a3, a0
+; RV32IA-WMO-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-WMO-ZACAS-NEXT: .LBB61_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-ZACAS-NEXT: lr.w.aq a4, (a2)
+; RV32IA-WMO-ZACAS-NEXT: and a6, a4, a3
+; RV32IA-WMO-ZACAS-NEXT: mv a5, a4
+; RV32IA-WMO-ZACAS-NEXT: bgeu a1, a6, .LBB61_3
+; RV32IA-WMO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB61_1 Depth=1
+; RV32IA-WMO-ZACAS-NEXT: xor a5, a4, a1
+; RV32IA-WMO-ZACAS-NEXT: and a5, a5, a3
+; RV32IA-WMO-ZACAS-NEXT: xor a5, a4, a5
+; RV32IA-WMO-ZACAS-NEXT: .LBB61_3: # in Loop: Header=BB61_1 Depth=1
+; RV32IA-WMO-ZACAS-NEXT: sc.w a5, a5, (a2)
+; RV32IA-WMO-ZACAS-NEXT: bnez a5, .LBB61_1
+; RV32IA-WMO-ZACAS-NEXT: # %bb.4:
+; RV32IA-WMO-ZACAS-NEXT: srl a0, a4, a0
+; RV32IA-WMO-ZACAS-NEXT: ret
+;
+; RV32IA-TSO-ZACAS-LABEL: atomicrmw_umin_i8_acquire:
+; RV32IA-TSO-ZACAS: # %bb.0:
+; RV32IA-TSO-ZACAS-NEXT: andi a2, a0, -4
+; RV32IA-TSO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-ZACAS-NEXT: li a3, 255
+; RV32IA-TSO-ZACAS-NEXT: zext.b a1, a1
+; RV32IA-TSO-ZACAS-NEXT: sll a3, a3, a0
+; RV32IA-TSO-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-TSO-ZACAS-NEXT: .LBB61_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-ZACAS-NEXT: lr.w a4, (a2)
+; RV32IA-TSO-ZACAS-NEXT: and a6, a4, a3
+; RV32IA-TSO-ZACAS-NEXT: mv a5, a4
+; RV32IA-TSO-ZACAS-NEXT: bgeu a1, a6, .LBB61_3
+; RV32IA-TSO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB61_1 Depth=1
+; RV32IA-TSO-ZACAS-NEXT: xor a5, a4, a1
+; RV32IA-TSO-ZACAS-NEXT: and a5, a5, a3
+; RV32IA-TSO-ZACAS-NEXT: xor a5, a4, a5
+; RV32IA-TSO-ZACAS-NEXT: .LBB61_3: # in Loop: Header=BB61_1 Depth=1
+; RV32IA-TSO-ZACAS-NEXT: sc.w a5, a5, (a2)
+; RV32IA-TSO-ZACAS-NEXT: bnez a5, .LBB61_1
+; RV32IA-TSO-ZACAS-NEXT: # %bb.4:
+; RV32IA-TSO-ZACAS-NEXT: srl a0, a4, a0
+; RV32IA-TSO-ZACAS-NEXT: ret
+;
; RV64IA-WMO-ZACAS-LABEL: atomicrmw_umin_i8_acquire:
; RV64IA-WMO-ZACAS: # %bb.0:
; RV64IA-WMO-ZACAS-NEXT: andi a2, a0, -4
@@ -9153,6 +11975,16 @@ define i8 @atomicrmw_umin_i8_acquire(ptr %a, i8 %b) nounwind {
; RV64IA-TSO-ZACAS-NEXT: srlw a0, a4, a0
; RV64IA-TSO-ZACAS-NEXT: ret
;
+; RV32IA-WMO-ZABHA-LABEL: atomicrmw_umin_i8_acquire:
+; RV32IA-WMO-ZABHA: # %bb.0:
+; RV32IA-WMO-ZABHA-NEXT: amominu.b.aq a0, a1, (a0)
+; RV32IA-WMO-ZABHA-NEXT: ret
+;
+; RV32IA-TSO-ZABHA-LABEL: atomicrmw_umin_i8_acquire:
+; RV32IA-TSO-ZABHA: # %bb.0:
+; RV32IA-TSO-ZABHA-NEXT: amominu.b a0, a1, (a0)
+; RV32IA-TSO-ZABHA-NEXT: ret
+;
; RV64IA-WMO-ZABHA-LABEL: atomicrmw_umin_i8_acquire:
; RV64IA-WMO-ZABHA: # %bb.0:
; RV64IA-WMO-ZABHA-NEXT: amominu.b.aq a0, a1, (a0)
@@ -9207,53 +12039,53 @@ define i8 @atomicrmw_umin_i8_release(ptr %a, i8 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 32
; RV32I-NEXT: ret
;
-; RV32IA-WMO-LABEL: atomicrmw_umin_i8_release:
-; RV32IA-WMO: # %bb.0:
-; RV32IA-WMO-NEXT: andi a2, a0, -4
-; RV32IA-WMO-NEXT: slli a0, a0, 3
-; RV32IA-WMO-NEXT: li a3, 255
-; RV32IA-WMO-NEXT: zext.b a1, a1
-; RV32IA-WMO-NEXT: sll a3, a3, a0
-; RV32IA-WMO-NEXT: sll a1, a1, a0
-; RV32IA-WMO-NEXT: .LBB62_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-WMO-NEXT: lr.w a4, (a2)
-; RV32IA-WMO-NEXT: and a6, a4, a3
-; RV32IA-WMO-NEXT: mv a5, a4
-; RV32IA-WMO-NEXT: bgeu a1, a6, .LBB62_3
-; RV32IA-WMO-NEXT: # %bb.2: # in Loop: Header=BB62_1 Depth=1
-; RV32IA-WMO-NEXT: xor a5, a4, a1
-; RV32IA-WMO-NEXT: and a5, a5, a3
-; RV32IA-WMO-NEXT: xor a5, a4, a5
-; RV32IA-WMO-NEXT: .LBB62_3: # in Loop: Header=BB62_1 Depth=1
-; RV32IA-WMO-NEXT: sc.w.rl a5, a5, (a2)
-; RV32IA-WMO-NEXT: bnez a5, .LBB62_1
-; RV32IA-WMO-NEXT: # %bb.4:
-; RV32IA-WMO-NEXT: srl a0, a4, a0
-; RV32IA-WMO-NEXT: ret
+; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_umin_i8_release:
+; RV32IA-WMO-NOZACAS: # %bb.0:
+; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-WMO-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-NOZACAS-NEXT: li a3, 255
+; RV32IA-WMO-NOZACAS-NEXT: zext.b a1, a1
+; RV32IA-WMO-NOZACAS-NEXT: sll a3, a3, a0
+; RV32IA-WMO-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-WMO-NOZACAS-NEXT: .LBB62_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-NOZACAS-NEXT: lr.w a4, (a2)
+; RV32IA-WMO-NOZACAS-NEXT: and a6, a4, a3
+; RV32IA-WMO-NOZACAS-NEXT: mv a5, a4
+; RV32IA-WMO-NOZACAS-NEXT: bgeu a1, a6, .LBB62_3
+; RV32IA-WMO-NOZACAS-NEXT: # %bb.2: # in Loop: Header=BB62_1 Depth=1
+; RV32IA-WMO-NOZACAS-NEXT: xor a5, a4, a1
+; RV32IA-WMO-NOZACAS-NEXT: and a5, a5, a3
+; RV32IA-WMO-NOZACAS-NEXT: xor a5, a4, a5
+; RV32IA-WMO-NOZACAS-NEXT: .LBB62_3: # in Loop: Header=BB62_1 Depth=1
+; RV32IA-WMO-NOZACAS-NEXT: sc.w.rl a5, a5, (a2)
+; RV32IA-WMO-NOZACAS-NEXT: bnez a5, .LBB62_1
+; RV32IA-WMO-NOZACAS-NEXT: # %bb.4:
+; RV32IA-WMO-NOZACAS-NEXT: srl a0, a4, a0
+; RV32IA-WMO-NOZACAS-NEXT: ret
;
-; RV32IA-TSO-LABEL: atomicrmw_umin_i8_release:
-; RV32IA-TSO: # %bb.0:
-; RV32IA-TSO-NEXT: andi a2, a0, -4
-; RV32IA-TSO-NEXT: slli a0, a0, 3
-; RV32IA-TSO-NEXT: li a3, 255
-; RV32IA-TSO-NEXT: zext.b a1, a1
-; RV32IA-TSO-NEXT: sll a3, a3, a0
-; RV32IA-TSO-NEXT: sll a1, a1, a0
-; RV32IA-TSO-NEXT: .LBB62_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-TSO-NEXT: lr.w a4, (a2)
-; RV32IA-TSO-NEXT: and a6, a4, a3
-; RV32IA-TSO-NEXT: mv a5, a4
-; RV32IA-TSO-NEXT: bgeu a1, a6, .LBB62_3
-; RV32IA-TSO-NEXT: # %bb.2: # in Loop: Header=BB62_1 Depth=1
-; RV32IA-TSO-NEXT: xor a5, a4, a1
-; RV32IA-TSO-NEXT: and a5, a5, a3
-; RV32IA-TSO-NEXT: xor a5, a4, a5
-; RV32IA-TSO-NEXT: .LBB62_3: # in Loop: Header=BB62_1 Depth=1
-; RV32IA-TSO-NEXT: sc.w a5, a5, (a2)
-; RV32IA-TSO-NEXT: bnez a5, .LBB62_1
-; RV32IA-TSO-NEXT: # %bb.4:
-; RV32IA-TSO-NEXT: srl a0, a4, a0
-; RV32IA-TSO-NEXT: ret
+; RV32IA-TSO-NOZACAS-LABEL: atomicrmw_umin_i8_release:
+; RV32IA-TSO-NOZACAS: # %bb.0:
+; RV32IA-TSO-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-TSO-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-NOZACAS-NEXT: li a3, 255
+; RV32IA-TSO-NOZACAS-NEXT: zext.b a1, a1
+; RV32IA-TSO-NOZACAS-NEXT: sll a3, a3, a0
+; RV32IA-TSO-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-TSO-NOZACAS-NEXT: .LBB62_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-NOZACAS-NEXT: lr.w a4, (a2)
+; RV32IA-TSO-NOZACAS-NEXT: and a6, a4, a3
+; RV32IA-TSO-NOZACAS-NEXT: mv a5, a4
+; RV32IA-TSO-NOZACAS-NEXT: bgeu a1, a6, .LBB62_3
+; RV32IA-TSO-NOZACAS-NEXT: # %bb.2: # in Loop: Header=BB62_1 Depth=1
+; RV32IA-TSO-NOZACAS-NEXT: xor a5, a4, a1
+; RV32IA-TSO-NOZACAS-NEXT: and a5, a5, a3
+; RV32IA-TSO-NOZACAS-NEXT: xor a5, a4, a5
+; RV32IA-TSO-NOZACAS-NEXT: .LBB62_3: # in Loop: Header=BB62_1 Depth=1
+; RV32IA-TSO-NOZACAS-NEXT: sc.w a5, a5, (a2)
+; RV32IA-TSO-NOZACAS-NEXT: bnez a5, .LBB62_1
+; RV32IA-TSO-NOZACAS-NEXT: # %bb.4:
+; RV32IA-TSO-NOZACAS-NEXT: srl a0, a4, a0
+; RV32IA-TSO-NOZACAS-NEXT: ret
;
; RV64I-LABEL: atomicrmw_umin_i8_release:
; RV64I: # %bb.0:
@@ -9343,6 +12175,54 @@ define i8 @atomicrmw_umin_i8_release(ptr %a, i8 %b) nounwind {
; RV64IA-TSO-NOZACAS-NEXT: srlw a0, a4, a0
; RV64IA-TSO-NOZACAS-NEXT: ret
;
+; RV32IA-WMO-ZACAS-LABEL: atomicrmw_umin_i8_release:
+; RV32IA-WMO-ZACAS: # %bb.0:
+; RV32IA-WMO-ZACAS-NEXT: andi a2, a0, -4
+; RV32IA-WMO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-ZACAS-NEXT: li a3, 255
+; RV32IA-WMO-ZACAS-NEXT: zext.b a1, a1
+; RV32IA-WMO-ZACAS-NEXT: sll a3, a3, a0
+; RV32IA-WMO-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-WMO-ZACAS-NEXT: .LBB62_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-ZACAS-NEXT: lr.w a4, (a2)
+; RV32IA-WMO-ZACAS-NEXT: and a6, a4, a3
+; RV32IA-WMO-ZACAS-NEXT: mv a5, a4
+; RV32IA-WMO-ZACAS-NEXT: bgeu a1, a6, .LBB62_3
+; RV32IA-WMO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB62_1 Depth=1
+; RV32IA-WMO-ZACAS-NEXT: xor a5, a4, a1
+; RV32IA-WMO-ZACAS-NEXT: and a5, a5, a3
+; RV32IA-WMO-ZACAS-NEXT: xor a5, a4, a5
+; RV32IA-WMO-ZACAS-NEXT: .LBB62_3: # in Loop: Header=BB62_1 Depth=1
+; RV32IA-WMO-ZACAS-NEXT: sc.w.rl a5, a5, (a2)
+; RV32IA-WMO-ZACAS-NEXT: bnez a5, .LBB62_1
+; RV32IA-WMO-ZACAS-NEXT: # %bb.4:
+; RV32IA-WMO-ZACAS-NEXT: srl a0, a4, a0
+; RV32IA-WMO-ZACAS-NEXT: ret
+;
+; RV32IA-TSO-ZACAS-LABEL: atomicrmw_umin_i8_release:
+; RV32IA-TSO-ZACAS: # %bb.0:
+; RV32IA-TSO-ZACAS-NEXT: andi a2, a0, -4
+; RV32IA-TSO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-ZACAS-NEXT: li a3, 255
+; RV32IA-TSO-ZACAS-NEXT: zext.b a1, a1
+; RV32IA-TSO-ZACAS-NEXT: sll a3, a3, a0
+; RV32IA-TSO-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-TSO-ZACAS-NEXT: .LBB62_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-ZACAS-NEXT: lr.w a4, (a2)
+; RV32IA-TSO-ZACAS-NEXT: and a6, a4, a3
+; RV32IA-TSO-ZACAS-NEXT: mv a5, a4
+; RV32IA-TSO-ZACAS-NEXT: bgeu a1, a6, .LBB62_3
+; RV32IA-TSO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB62_1 Depth=1
+; RV32IA-TSO-ZACAS-NEXT: xor a5, a4, a1
+; RV32IA-TSO-ZACAS-NEXT: and a5, a5, a3
+; RV32IA-TSO-ZACAS-NEXT: xor a5, a4, a5
+; RV32IA-TSO-ZACAS-NEXT: .LBB62_3: # in Loop: Header=BB62_1 Depth=1
+; RV32IA-TSO-ZACAS-NEXT: sc.w a5, a5, (a2)
+; RV32IA-TSO-ZACAS-NEXT: bnez a5, .LBB62_1
+; RV32IA-TSO-ZACAS-NEXT: # %bb.4:
+; RV32IA-TSO-ZACAS-NEXT: srl a0, a4, a0
+; RV32IA-TSO-ZACAS-NEXT: ret
+;
; RV64IA-WMO-ZACAS-LABEL: atomicrmw_umin_i8_release:
; RV64IA-WMO-ZACAS: # %bb.0:
; RV64IA-WMO-ZACAS-NEXT: andi a2, a0, -4
@@ -9391,6 +12271,16 @@ define i8 @atomicrmw_umin_i8_release(ptr %a, i8 %b) nounwind {
; RV64IA-TSO-ZACAS-NEXT: srlw a0, a4, a0
; RV64IA-TSO-ZACAS-NEXT: ret
;
+; RV32IA-WMO-ZABHA-LABEL: atomicrmw_umin_i8_release:
+; RV32IA-WMO-ZABHA: # %bb.0:
+; RV32IA-WMO-ZABHA-NEXT: amominu.b.rl a0, a1, (a0)
+; RV32IA-WMO-ZABHA-NEXT: ret
+;
+; RV32IA-TSO-ZABHA-LABEL: atomicrmw_umin_i8_release:
+; RV32IA-TSO-ZABHA: # %bb.0:
+; RV32IA-TSO-ZABHA-NEXT: amominu.b a0, a1, (a0)
+; RV32IA-TSO-ZABHA-NEXT: ret
+;
; RV64IA-WMO-ZABHA-LABEL: atomicrmw_umin_i8_release:
; RV64IA-WMO-ZABHA: # %bb.0:
; RV64IA-WMO-ZABHA-NEXT: amominu.b.rl a0, a1, (a0)
@@ -9445,53 +12335,53 @@ define i8 @atomicrmw_umin_i8_acq_rel(ptr %a, i8 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 32
; RV32I-NEXT: ret
;
-; RV32IA-WMO-LABEL: atomicrmw_umin_i8_acq_rel:
-; RV32IA-WMO: # %bb.0:
-; RV32IA-WMO-NEXT: andi a2, a0, -4
-; RV32IA-WMO-NEXT: slli a0, a0, 3
-; RV32IA-WMO-NEXT: li a3, 255
-; RV32IA-WMO-NEXT: zext.b a1, a1
-; RV32IA-WMO-NEXT: sll a3, a3, a0
-; RV32IA-WMO-NEXT: sll a1, a1, a0
-; RV32IA-WMO-NEXT: .LBB63_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-WMO-NEXT: lr.w.aq a4, (a2)
-; RV32IA-WMO-NEXT: and a6, a4, a3
-; RV32IA-WMO-NEXT: mv a5, a4
-; RV32IA-WMO-NEXT: bgeu a1, a6, .LBB63_3
-; RV32IA-WMO-NEXT: # %bb.2: # in Loop: Header=BB63_1 Depth=1
-; RV32IA-WMO-NEXT: xor a5, a4, a1
-; RV32IA-WMO-NEXT: and a5, a5, a3
-; RV32IA-WMO-NEXT: xor a5, a4, a5
-; RV32IA-WMO-NEXT: .LBB63_3: # in Loop: Header=BB63_1 Depth=1
-; RV32IA-WMO-NEXT: sc.w.rl a5, a5, (a2)
-; RV32IA-WMO-NEXT: bnez a5, .LBB63_1
-; RV32IA-WMO-NEXT: # %bb.4:
-; RV32IA-WMO-NEXT: srl a0, a4, a0
-; RV32IA-WMO-NEXT: ret
+; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_umin_i8_acq_rel:
+; RV32IA-WMO-NOZACAS: # %bb.0:
+; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-WMO-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-NOZACAS-NEXT: li a3, 255
+; RV32IA-WMO-NOZACAS-NEXT: zext.b a1, a1
+; RV32IA-WMO-NOZACAS-NEXT: sll a3, a3, a0
+; RV32IA-WMO-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-WMO-NOZACAS-NEXT: .LBB63_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-NOZACAS-NEXT: lr.w.aq a4, (a2)
+; RV32IA-WMO-NOZACAS-NEXT: and a6, a4, a3
+; RV32IA-WMO-NOZACAS-NEXT: mv a5, a4
+; RV32IA-WMO-NOZACAS-NEXT: bgeu a1, a6, .LBB63_3
+; RV32IA-WMO-NOZACAS-NEXT: # %bb.2: # in Loop: Header=BB63_1 Depth=1
+; RV32IA-WMO-NOZACAS-NEXT: xor a5, a4, a1
+; RV32IA-WMO-NOZACAS-NEXT: and a5, a5, a3
+; RV32IA-WMO-NOZACAS-NEXT: xor a5, a4, a5
+; RV32IA-WMO-NOZACAS-NEXT: .LBB63_3: # in Loop: Header=BB63_1 Depth=1
+; RV32IA-WMO-NOZACAS-NEXT: sc.w.rl a5, a5, (a2)
+; RV32IA-WMO-NOZACAS-NEXT: bnez a5, .LBB63_1
+; RV32IA-WMO-NOZACAS-NEXT: # %bb.4:
+; RV32IA-WMO-NOZACAS-NEXT: srl a0, a4, a0
+; RV32IA-WMO-NOZACAS-NEXT: ret
;
-; RV32IA-TSO-LABEL: atomicrmw_umin_i8_acq_rel:
-; RV32IA-TSO: # %bb.0:
-; RV32IA-TSO-NEXT: andi a2, a0, -4
-; RV32IA-TSO-NEXT: slli a0, a0, 3
-; RV32IA-TSO-NEXT: li a3, 255
-; RV32IA-TSO-NEXT: zext.b a1, a1
-; RV32IA-TSO-NEXT: sll a3, a3, a0
-; RV32IA-TSO-NEXT: sll a1, a1, a0
-; RV32IA-TSO-NEXT: .LBB63_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-TSO-NEXT: lr.w a4, (a2)
-; RV32IA-TSO-NEXT: and a6, a4, a3
-; RV32IA-TSO-NEXT: mv a5, a4
-; RV32IA-TSO-NEXT: bgeu a1, a6, .LBB63_3
-; RV32IA-TSO-NEXT: # %bb.2: # in Loop: Header=BB63_1 Depth=1
-; RV32IA-TSO-NEXT: xor a5, a4, a1
-; RV32IA-TSO-NEXT: and a5, a5, a3
-; RV32IA-TSO-NEXT: xor a5, a4, a5
-; RV32IA-TSO-NEXT: .LBB63_3: # in Loop: Header=BB63_1 Depth=1
-; RV32IA-TSO-NEXT: sc.w a5, a5, (a2)
-; RV32IA-TSO-NEXT: bnez a5, .LBB63_1
-; RV32IA-TSO-NEXT: # %bb.4:
-; RV32IA-TSO-NEXT: srl a0, a4, a0
-; RV32IA-TSO-NEXT: ret
+; RV32IA-TSO-NOZACAS-LABEL: atomicrmw_umin_i8_acq_rel:
+; RV32IA-TSO-NOZACAS: # %bb.0:
+; RV32IA-TSO-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-TSO-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-NOZACAS-NEXT: li a3, 255
+; RV32IA-TSO-NOZACAS-NEXT: zext.b a1, a1
+; RV32IA-TSO-NOZACAS-NEXT: sll a3, a3, a0
+; RV32IA-TSO-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-TSO-NOZACAS-NEXT: .LBB63_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-NOZACAS-NEXT: lr.w a4, (a2)
+; RV32IA-TSO-NOZACAS-NEXT: and a6, a4, a3
+; RV32IA-TSO-NOZACAS-NEXT: mv a5, a4
+; RV32IA-TSO-NOZACAS-NEXT: bgeu a1, a6, .LBB63_3
+; RV32IA-TSO-NOZACAS-NEXT: # %bb.2: # in Loop: Header=BB63_1 Depth=1
+; RV32IA-TSO-NOZACAS-NEXT: xor a5, a4, a1
+; RV32IA-TSO-NOZACAS-NEXT: and a5, a5, a3
+; RV32IA-TSO-NOZACAS-NEXT: xor a5, a4, a5
+; RV32IA-TSO-NOZACAS-NEXT: .LBB63_3: # in Loop: Header=BB63_1 Depth=1
+; RV32IA-TSO-NOZACAS-NEXT: sc.w a5, a5, (a2)
+; RV32IA-TSO-NOZACAS-NEXT: bnez a5, .LBB63_1
+; RV32IA-TSO-NOZACAS-NEXT: # %bb.4:
+; RV32IA-TSO-NOZACAS-NEXT: srl a0, a4, a0
+; RV32IA-TSO-NOZACAS-NEXT: ret
;
; RV64I-LABEL: atomicrmw_umin_i8_acq_rel:
; RV64I: # %bb.0:
@@ -9581,6 +12471,54 @@ define i8 @atomicrmw_umin_i8_acq_rel(ptr %a, i8 %b) nounwind {
; RV64IA-TSO-NOZACAS-NEXT: srlw a0, a4, a0
; RV64IA-TSO-NOZACAS-NEXT: ret
;
+; RV32IA-WMO-ZACAS-LABEL: atomicrmw_umin_i8_acq_rel:
+; RV32IA-WMO-ZACAS: # %bb.0:
+; RV32IA-WMO-ZACAS-NEXT: andi a2, a0, -4
+; RV32IA-WMO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-ZACAS-NEXT: li a3, 255
+; RV32IA-WMO-ZACAS-NEXT: zext.b a1, a1
+; RV32IA-WMO-ZACAS-NEXT: sll a3, a3, a0
+; RV32IA-WMO-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-WMO-ZACAS-NEXT: .LBB63_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-ZACAS-NEXT: lr.w.aq a4, (a2)
+; RV32IA-WMO-ZACAS-NEXT: and a6, a4, a3
+; RV32IA-WMO-ZACAS-NEXT: mv a5, a4
+; RV32IA-WMO-ZACAS-NEXT: bgeu a1, a6, .LBB63_3
+; RV32IA-WMO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB63_1 Depth=1
+; RV32IA-WMO-ZACAS-NEXT: xor a5, a4, a1
+; RV32IA-WMO-ZACAS-NEXT: and a5, a5, a3
+; RV32IA-WMO-ZACAS-NEXT: xor a5, a4, a5
+; RV32IA-WMO-ZACAS-NEXT: .LBB63_3: # in Loop: Header=BB63_1 Depth=1
+; RV32IA-WMO-ZACAS-NEXT: sc.w.rl a5, a5, (a2)
+; RV32IA-WMO-ZACAS-NEXT: bnez a5, .LBB63_1
+; RV32IA-WMO-ZACAS-NEXT: # %bb.4:
+; RV32IA-WMO-ZACAS-NEXT: srl a0, a4, a0
+; RV32IA-WMO-ZACAS-NEXT: ret
+;
+; RV32IA-TSO-ZACAS-LABEL: atomicrmw_umin_i8_acq_rel:
+; RV32IA-TSO-ZACAS: # %bb.0:
+; RV32IA-TSO-ZACAS-NEXT: andi a2, a0, -4
+; RV32IA-TSO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-ZACAS-NEXT: li a3, 255
+; RV32IA-TSO-ZACAS-NEXT: zext.b a1, a1
+; RV32IA-TSO-ZACAS-NEXT: sll a3, a3, a0
+; RV32IA-TSO-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-TSO-ZACAS-NEXT: .LBB63_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-ZACAS-NEXT: lr.w a4, (a2)
+; RV32IA-TSO-ZACAS-NEXT: and a6, a4, a3
+; RV32IA-TSO-ZACAS-NEXT: mv a5, a4
+; RV32IA-TSO-ZACAS-NEXT: bgeu a1, a6, .LBB63_3
+; RV32IA-TSO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB63_1 Depth=1
+; RV32IA-TSO-ZACAS-NEXT: xor a5, a4, a1
+; RV32IA-TSO-ZACAS-NEXT: and a5, a5, a3
+; RV32IA-TSO-ZACAS-NEXT: xor a5, a4, a5
+; RV32IA-TSO-ZACAS-NEXT: .LBB63_3: # in Loop: Header=BB63_1 Depth=1
+; RV32IA-TSO-ZACAS-NEXT: sc.w a5, a5, (a2)
+; RV32IA-TSO-ZACAS-NEXT: bnez a5, .LBB63_1
+; RV32IA-TSO-ZACAS-NEXT: # %bb.4:
+; RV32IA-TSO-ZACAS-NEXT: srl a0, a4, a0
+; RV32IA-TSO-ZACAS-NEXT: ret
+;
; RV64IA-WMO-ZACAS-LABEL: atomicrmw_umin_i8_acq_rel:
; RV64IA-WMO-ZACAS: # %bb.0:
; RV64IA-WMO-ZACAS-NEXT: andi a2, a0, -4
@@ -9629,6 +12567,16 @@ define i8 @atomicrmw_umin_i8_acq_rel(ptr %a, i8 %b) nounwind {
; RV64IA-TSO-ZACAS-NEXT: srlw a0, a4, a0
; RV64IA-TSO-ZACAS-NEXT: ret
;
+; RV32IA-WMO-ZABHA-LABEL: atomicrmw_umin_i8_acq_rel:
+; RV32IA-WMO-ZABHA: # %bb.0:
+; RV32IA-WMO-ZABHA-NEXT: amominu.b.aqrl a0, a1, (a0)
+; RV32IA-WMO-ZABHA-NEXT: ret
+;
+; RV32IA-TSO-ZABHA-LABEL: atomicrmw_umin_i8_acq_rel:
+; RV32IA-TSO-ZABHA: # %bb.0:
+; RV32IA-TSO-ZABHA-NEXT: amominu.b a0, a1, (a0)
+; RV32IA-TSO-ZABHA-NEXT: ret
+;
; RV64IA-WMO-ZABHA-LABEL: atomicrmw_umin_i8_acq_rel:
; RV64IA-WMO-ZABHA: # %bb.0:
; RV64IA-WMO-ZABHA-NEXT: amominu.b.aqrl a0, a1, (a0)
@@ -9683,29 +12631,29 @@ define i8 @atomicrmw_umin_i8_seq_cst(ptr %a, i8 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 32
; RV32I-NEXT: ret
;
-; RV32IA-LABEL: atomicrmw_umin_i8_seq_cst:
-; RV32IA: # %bb.0:
-; RV32IA-NEXT: andi a2, a0, -4
-; RV32IA-NEXT: slli a0, a0, 3
-; RV32IA-NEXT: li a3, 255
-; RV32IA-NEXT: zext.b a1, a1
-; RV32IA-NEXT: sll a3, a3, a0
-; RV32IA-NEXT: sll a1, a1, a0
-; RV32IA-NEXT: .LBB64_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-NEXT: lr.w.aqrl a4, (a2)
-; RV32IA-NEXT: and a6, a4, a3
-; RV32IA-NEXT: mv a5, a4
-; RV32IA-NEXT: bgeu a1, a6, .LBB64_3
-; RV32IA-NEXT: # %bb.2: # in Loop: Header=BB64_1 Depth=1
-; RV32IA-NEXT: xor a5, a4, a1
-; RV32IA-NEXT: and a5, a5, a3
-; RV32IA-NEXT: xor a5, a4, a5
-; RV32IA-NEXT: .LBB64_3: # in Loop: Header=BB64_1 Depth=1
-; RV32IA-NEXT: sc.w.rl a5, a5, (a2)
-; RV32IA-NEXT: bnez a5, .LBB64_1
-; RV32IA-NEXT: # %bb.4:
-; RV32IA-NEXT: srl a0, a4, a0
-; RV32IA-NEXT: ret
+; RV32IA-NOZACAS-LABEL: atomicrmw_umin_i8_seq_cst:
+; RV32IA-NOZACAS: # %bb.0:
+; RV32IA-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-NOZACAS-NEXT: li a3, 255
+; RV32IA-NOZACAS-NEXT: zext.b a1, a1
+; RV32IA-NOZACAS-NEXT: sll a3, a3, a0
+; RV32IA-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-NOZACAS-NEXT: .LBB64_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-NOZACAS-NEXT: lr.w.aqrl a4, (a2)
+; RV32IA-NOZACAS-NEXT: and a6, a4, a3
+; RV32IA-NOZACAS-NEXT: mv a5, a4
+; RV32IA-NOZACAS-NEXT: bgeu a1, a6, .LBB64_3
+; RV32IA-NOZACAS-NEXT: # %bb.2: # in Loop: Header=BB64_1 Depth=1
+; RV32IA-NOZACAS-NEXT: xor a5, a4, a1
+; RV32IA-NOZACAS-NEXT: and a5, a5, a3
+; RV32IA-NOZACAS-NEXT: xor a5, a4, a5
+; RV32IA-NOZACAS-NEXT: .LBB64_3: # in Loop: Header=BB64_1 Depth=1
+; RV32IA-NOZACAS-NEXT: sc.w.rl a5, a5, (a2)
+; RV32IA-NOZACAS-NEXT: bnez a5, .LBB64_1
+; RV32IA-NOZACAS-NEXT: # %bb.4:
+; RV32IA-NOZACAS-NEXT: srl a0, a4, a0
+; RV32IA-NOZACAS-NEXT: ret
;
; RV64I-LABEL: atomicrmw_umin_i8_seq_cst:
; RV64I: # %bb.0:
@@ -9771,6 +12719,30 @@ define i8 @atomicrmw_umin_i8_seq_cst(ptr %a, i8 %b) nounwind {
; RV64IA-NOZACAS-NEXT: srlw a0, a4, a0
; RV64IA-NOZACAS-NEXT: ret
;
+; RV32IA-ZACAS-LABEL: atomicrmw_umin_i8_seq_cst:
+; RV32IA-ZACAS: # %bb.0:
+; RV32IA-ZACAS-NEXT: andi a2, a0, -4
+; RV32IA-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-ZACAS-NEXT: li a3, 255
+; RV32IA-ZACAS-NEXT: zext.b a1, a1
+; RV32IA-ZACAS-NEXT: sll a3, a3, a0
+; RV32IA-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-ZACAS-NEXT: .LBB64_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-ZACAS-NEXT: lr.w.aqrl a4, (a2)
+; RV32IA-ZACAS-NEXT: and a6, a4, a3
+; RV32IA-ZACAS-NEXT: mv a5, a4
+; RV32IA-ZACAS-NEXT: bgeu a1, a6, .LBB64_3
+; RV32IA-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB64_1 Depth=1
+; RV32IA-ZACAS-NEXT: xor a5, a4, a1
+; RV32IA-ZACAS-NEXT: and a5, a5, a3
+; RV32IA-ZACAS-NEXT: xor a5, a4, a5
+; RV32IA-ZACAS-NEXT: .LBB64_3: # in Loop: Header=BB64_1 Depth=1
+; RV32IA-ZACAS-NEXT: sc.w.rl a5, a5, (a2)
+; RV32IA-ZACAS-NEXT: bnez a5, .LBB64_1
+; RV32IA-ZACAS-NEXT: # %bb.4:
+; RV32IA-ZACAS-NEXT: srl a0, a4, a0
+; RV32IA-ZACAS-NEXT: ret
+;
; RV64IA-ZACAS-LABEL: atomicrmw_umin_i8_seq_cst:
; RV64IA-ZACAS: # %bb.0:
; RV64IA-ZACAS-NEXT: andi a2, a0, -4
@@ -9795,6 +12767,16 @@ define i8 @atomicrmw_umin_i8_seq_cst(ptr %a, i8 %b) nounwind {
; RV64IA-ZACAS-NEXT: srlw a0, a4, a0
; RV64IA-ZACAS-NEXT: ret
;
+; RV32IA-WMO-ZABHA-LABEL: atomicrmw_umin_i8_seq_cst:
+; RV32IA-WMO-ZABHA: # %bb.0:
+; RV32IA-WMO-ZABHA-NEXT: amominu.b.aqrl a0, a1, (a0)
+; RV32IA-WMO-ZABHA-NEXT: ret
+;
+; RV32IA-TSO-ZABHA-LABEL: atomicrmw_umin_i8_seq_cst:
+; RV32IA-TSO-ZABHA: # %bb.0:
+; RV32IA-TSO-ZABHA-NEXT: amominu.b a0, a1, (a0)
+; RV32IA-TSO-ZABHA-NEXT: ret
+;
; RV64IA-WMO-ZABHA-LABEL: atomicrmw_umin_i8_seq_cst:
; RV64IA-WMO-ZABHA: # %bb.0:
; RV64IA-WMO-ZABHA-NEXT: amominu.b.aqrl a0, a1, (a0)
@@ -9819,26 +12801,26 @@ define i16 @atomicrmw_xchg_i16_monotonic(ptr %a, i16 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
-; RV32IA-LABEL: atomicrmw_xchg_i16_monotonic:
-; RV32IA: # %bb.0:
-; RV32IA-NEXT: andi a2, a0, -4
-; RV32IA-NEXT: slli a0, a0, 3
-; RV32IA-NEXT: lui a3, 16
-; RV32IA-NEXT: addi a3, a3, -1
-; RV32IA-NEXT: sll a4, a3, a0
-; RV32IA-NEXT: and a1, a1, a3
-; RV32IA-NEXT: sll a1, a1, a0
-; RV32IA-NEXT: .LBB65_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-NEXT: lr.w a3, (a2)
-; RV32IA-NEXT: mv a5, a1
-; RV32IA-NEXT: xor a5, a3, a5
-; RV32IA-NEXT: and a5, a5, a4
-; RV32IA-NEXT: xor a5, a3, a5
-; RV32IA-NEXT: sc.w a5, a5, (a2)
-; RV32IA-NEXT: bnez a5, .LBB65_1
-; RV32IA-NEXT: # %bb.2:
-; RV32IA-NEXT: srl a0, a3, a0
-; RV32IA-NEXT: ret
+; RV32IA-NOZACAS-LABEL: atomicrmw_xchg_i16_monotonic:
+; RV32IA-NOZACAS: # %bb.0:
+; RV32IA-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-NOZACAS-NEXT: lui a3, 16
+; RV32IA-NOZACAS-NEXT: addi a3, a3, -1
+; RV32IA-NOZACAS-NEXT: sll a4, a3, a0
+; RV32IA-NOZACAS-NEXT: and a1, a1, a3
+; RV32IA-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-NOZACAS-NEXT: .LBB65_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-NOZACAS-NEXT: lr.w a3, (a2)
+; RV32IA-NOZACAS-NEXT: mv a5, a1
+; RV32IA-NOZACAS-NEXT: xor a5, a3, a5
+; RV32IA-NOZACAS-NEXT: and a5, a5, a4
+; RV32IA-NOZACAS-NEXT: xor a5, a3, a5
+; RV32IA-NOZACAS-NEXT: sc.w a5, a5, (a2)
+; RV32IA-NOZACAS-NEXT: bnez a5, .LBB65_1
+; RV32IA-NOZACAS-NEXT: # %bb.2:
+; RV32IA-NOZACAS-NEXT: srl a0, a3, a0
+; RV32IA-NOZACAS-NEXT: ret
;
; RV64I-LABEL: atomicrmw_xchg_i16_monotonic:
; RV64I: # %bb.0:
@@ -9871,6 +12853,27 @@ define i16 @atomicrmw_xchg_i16_monotonic(ptr %a, i16 %b) nounwind {
; RV64IA-NOZACAS-NEXT: srlw a0, a3, a0
; RV64IA-NOZACAS-NEXT: ret
;
+; RV32IA-ZACAS-LABEL: atomicrmw_xchg_i16_monotonic:
+; RV32IA-ZACAS: # %bb.0:
+; RV32IA-ZACAS-NEXT: andi a2, a0, -4
+; RV32IA-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-ZACAS-NEXT: lui a3, 16
+; RV32IA-ZACAS-NEXT: addi a3, a3, -1
+; RV32IA-ZACAS-NEXT: sll a4, a3, a0
+; RV32IA-ZACAS-NEXT: and a1, a1, a3
+; RV32IA-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-ZACAS-NEXT: .LBB65_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-ZACAS-NEXT: lr.w a3, (a2)
+; RV32IA-ZACAS-NEXT: mv a5, a1
+; RV32IA-ZACAS-NEXT: xor a5, a3, a5
+; RV32IA-ZACAS-NEXT: and a5, a5, a4
+; RV32IA-ZACAS-NEXT: xor a5, a3, a5
+; RV32IA-ZACAS-NEXT: sc.w a5, a5, (a2)
+; RV32IA-ZACAS-NEXT: bnez a5, .LBB65_1
+; RV32IA-ZACAS-NEXT: # %bb.2:
+; RV32IA-ZACAS-NEXT: srl a0, a3, a0
+; RV32IA-ZACAS-NEXT: ret
+;
; RV64IA-ZACAS-LABEL: atomicrmw_xchg_i16_monotonic:
; RV64IA-ZACAS: # %bb.0:
; RV64IA-ZACAS-NEXT: andi a2, a0, -4
@@ -9892,6 +12895,16 @@ define i16 @atomicrmw_xchg_i16_monotonic(ptr %a, i16 %b) nounwind {
; RV64IA-ZACAS-NEXT: srlw a0, a3, a0
; RV64IA-ZACAS-NEXT: ret
;
+; RV32IA-WMO-ZABHA-LABEL: atomicrmw_xchg_i16_monotonic:
+; RV32IA-WMO-ZABHA: # %bb.0:
+; RV32IA-WMO-ZABHA-NEXT: amoswap.h a0, a1, (a0)
+; RV32IA-WMO-ZABHA-NEXT: ret
+;
+; RV32IA-TSO-ZABHA-LABEL: atomicrmw_xchg_i16_monotonic:
+; RV32IA-TSO-ZABHA: # %bb.0:
+; RV32IA-TSO-ZABHA-NEXT: amoswap.h a0, a1, (a0)
+; RV32IA-TSO-ZABHA-NEXT: ret
+;
; RV64IA-WMO-ZABHA-LABEL: atomicrmw_xchg_i16_monotonic:
; RV64IA-WMO-ZABHA: # %bb.0:
; RV64IA-WMO-ZABHA-NEXT: amoswap.h a0, a1, (a0)
@@ -9916,47 +12929,47 @@ define i16 @atomicrmw_xchg_i16_acquire(ptr %a, i16 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
-; RV32IA-WMO-LABEL: atomicrmw_xchg_i16_acquire:
-; RV32IA-WMO: # %bb.0:
-; RV32IA-WMO-NEXT: andi a2, a0, -4
-; RV32IA-WMO-NEXT: slli a0, a0, 3
-; RV32IA-WMO-NEXT: lui a3, 16
-; RV32IA-WMO-NEXT: addi a3, a3, -1
-; RV32IA-WMO-NEXT: sll a4, a3, a0
-; RV32IA-WMO-NEXT: and a1, a1, a3
-; RV32IA-WMO-NEXT: sll a1, a1, a0
-; RV32IA-WMO-NEXT: .LBB66_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-WMO-NEXT: lr.w.aq a3, (a2)
-; RV32IA-WMO-NEXT: mv a5, a1
-; RV32IA-WMO-NEXT: xor a5, a3, a5
-; RV32IA-WMO-NEXT: and a5, a5, a4
-; RV32IA-WMO-NEXT: xor a5, a3, a5
-; RV32IA-WMO-NEXT: sc.w a5, a5, (a2)
-; RV32IA-WMO-NEXT: bnez a5, .LBB66_1
-; RV32IA-WMO-NEXT: # %bb.2:
-; RV32IA-WMO-NEXT: srl a0, a3, a0
-; RV32IA-WMO-NEXT: ret
+; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_xchg_i16_acquire:
+; RV32IA-WMO-NOZACAS: # %bb.0:
+; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-WMO-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-NOZACAS-NEXT: lui a3, 16
+; RV32IA-WMO-NOZACAS-NEXT: addi a3, a3, -1
+; RV32IA-WMO-NOZACAS-NEXT: sll a4, a3, a0
+; RV32IA-WMO-NOZACAS-NEXT: and a1, a1, a3
+; RV32IA-WMO-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-WMO-NOZACAS-NEXT: .LBB66_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-NOZACAS-NEXT: lr.w.aq a3, (a2)
+; RV32IA-WMO-NOZACAS-NEXT: mv a5, a1
+; RV32IA-WMO-NOZACAS-NEXT: xor a5, a3, a5
+; RV32IA-WMO-NOZACAS-NEXT: and a5, a5, a4
+; RV32IA-WMO-NOZACAS-NEXT: xor a5, a3, a5
+; RV32IA-WMO-NOZACAS-NEXT: sc.w a5, a5, (a2)
+; RV32IA-WMO-NOZACAS-NEXT: bnez a5, .LBB66_1
+; RV32IA-WMO-NOZACAS-NEXT: # %bb.2:
+; RV32IA-WMO-NOZACAS-NEXT: srl a0, a3, a0
+; RV32IA-WMO-NOZACAS-NEXT: ret
;
-; RV32IA-TSO-LABEL: atomicrmw_xchg_i16_acquire:
-; RV32IA-TSO: # %bb.0:
-; RV32IA-TSO-NEXT: andi a2, a0, -4
-; RV32IA-TSO-NEXT: slli a0, a0, 3
-; RV32IA-TSO-NEXT: lui a3, 16
-; RV32IA-TSO-NEXT: addi a3, a3, -1
-; RV32IA-TSO-NEXT: sll a4, a3, a0
-; RV32IA-TSO-NEXT: and a1, a1, a3
-; RV32IA-TSO-NEXT: sll a1, a1, a0
-; RV32IA-TSO-NEXT: .LBB66_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-TSO-NEXT: lr.w a3, (a2)
-; RV32IA-TSO-NEXT: mv a5, a1
-; RV32IA-TSO-NEXT: xor a5, a3, a5
-; RV32IA-TSO-NEXT: and a5, a5, a4
-; RV32IA-TSO-NEXT: xor a5, a3, a5
-; RV32IA-TSO-NEXT: sc.w a5, a5, (a2)
-; RV32IA-TSO-NEXT: bnez a5, .LBB66_1
-; RV32IA-TSO-NEXT: # %bb.2:
-; RV32IA-TSO-NEXT: srl a0, a3, a0
-; RV32IA-TSO-NEXT: ret
+; RV32IA-TSO-NOZACAS-LABEL: atomicrmw_xchg_i16_acquire:
+; RV32IA-TSO-NOZACAS: # %bb.0:
+; RV32IA-TSO-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-TSO-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-NOZACAS-NEXT: lui a3, 16
+; RV32IA-TSO-NOZACAS-NEXT: addi a3, a3, -1
+; RV32IA-TSO-NOZACAS-NEXT: sll a4, a3, a0
+; RV32IA-TSO-NOZACAS-NEXT: and a1, a1, a3
+; RV32IA-TSO-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-TSO-NOZACAS-NEXT: .LBB66_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-NOZACAS-NEXT: lr.w a3, (a2)
+; RV32IA-TSO-NOZACAS-NEXT: mv a5, a1
+; RV32IA-TSO-NOZACAS-NEXT: xor a5, a3, a5
+; RV32IA-TSO-NOZACAS-NEXT: and a5, a5, a4
+; RV32IA-TSO-NOZACAS-NEXT: xor a5, a3, a5
+; RV32IA-TSO-NOZACAS-NEXT: sc.w a5, a5, (a2)
+; RV32IA-TSO-NOZACAS-NEXT: bnez a5, .LBB66_1
+; RV32IA-TSO-NOZACAS-NEXT: # %bb.2:
+; RV32IA-TSO-NOZACAS-NEXT: srl a0, a3, a0
+; RV32IA-TSO-NOZACAS-NEXT: ret
;
; RV64I-LABEL: atomicrmw_xchg_i16_acquire:
; RV64I: # %bb.0:
@@ -10010,6 +13023,48 @@ define i16 @atomicrmw_xchg_i16_acquire(ptr %a, i16 %b) nounwind {
; RV64IA-TSO-NOZACAS-NEXT: srlw a0, a3, a0
; RV64IA-TSO-NOZACAS-NEXT: ret
;
+; RV32IA-WMO-ZACAS-LABEL: atomicrmw_xchg_i16_acquire:
+; RV32IA-WMO-ZACAS: # %bb.0:
+; RV32IA-WMO-ZACAS-NEXT: andi a2, a0, -4
+; RV32IA-WMO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-ZACAS-NEXT: lui a3, 16
+; RV32IA-WMO-ZACAS-NEXT: addi a3, a3, -1
+; RV32IA-WMO-ZACAS-NEXT: sll a4, a3, a0
+; RV32IA-WMO-ZACAS-NEXT: and a1, a1, a3
+; RV32IA-WMO-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-WMO-ZACAS-NEXT: .LBB66_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-ZACAS-NEXT: lr.w.aq a3, (a2)
+; RV32IA-WMO-ZACAS-NEXT: mv a5, a1
+; RV32IA-WMO-ZACAS-NEXT: xor a5, a3, a5
+; RV32IA-WMO-ZACAS-NEXT: and a5, a5, a4
+; RV32IA-WMO-ZACAS-NEXT: xor a5, a3, a5
+; RV32IA-WMO-ZACAS-NEXT: sc.w a5, a5, (a2)
+; RV32IA-WMO-ZACAS-NEXT: bnez a5, .LBB66_1
+; RV32IA-WMO-ZACAS-NEXT: # %bb.2:
+; RV32IA-WMO-ZACAS-NEXT: srl a0, a3, a0
+; RV32IA-WMO-ZACAS-NEXT: ret
+;
+; RV32IA-TSO-ZACAS-LABEL: atomicrmw_xchg_i16_acquire:
+; RV32IA-TSO-ZACAS: # %bb.0:
+; RV32IA-TSO-ZACAS-NEXT: andi a2, a0, -4
+; RV32IA-TSO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-ZACAS-NEXT: lui a3, 16
+; RV32IA-TSO-ZACAS-NEXT: addi a3, a3, -1
+; RV32IA-TSO-ZACAS-NEXT: sll a4, a3, a0
+; RV32IA-TSO-ZACAS-NEXT: and a1, a1, a3
+; RV32IA-TSO-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-TSO-ZACAS-NEXT: .LBB66_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-ZACAS-NEXT: lr.w a3, (a2)
+; RV32IA-TSO-ZACAS-NEXT: mv a5, a1
+; RV32IA-TSO-ZACAS-NEXT: xor a5, a3, a5
+; RV32IA-TSO-ZACAS-NEXT: and a5, a5, a4
+; RV32IA-TSO-ZACAS-NEXT: xor a5, a3, a5
+; RV32IA-TSO-ZACAS-NEXT: sc.w a5, a5, (a2)
+; RV32IA-TSO-ZACAS-NEXT: bnez a5, .LBB66_1
+; RV32IA-TSO-ZACAS-NEXT: # %bb.2:
+; RV32IA-TSO-ZACAS-NEXT: srl a0, a3, a0
+; RV32IA-TSO-ZACAS-NEXT: ret
+;
; RV64IA-WMO-ZACAS-LABEL: atomicrmw_xchg_i16_acquire:
; RV64IA-WMO-ZACAS: # %bb.0:
; RV64IA-WMO-ZACAS-NEXT: andi a2, a0, -4
@@ -10052,6 +13107,16 @@ define i16 @atomicrmw_xchg_i16_acquire(ptr %a, i16 %b) nounwind {
; RV64IA-TSO-ZACAS-NEXT: srlw a0, a3, a0
; RV64IA-TSO-ZACAS-NEXT: ret
;
+; RV32IA-WMO-ZABHA-LABEL: atomicrmw_xchg_i16_acquire:
+; RV32IA-WMO-ZABHA: # %bb.0:
+; RV32IA-WMO-ZABHA-NEXT: amoswap.h.aq a0, a1, (a0)
+; RV32IA-WMO-ZABHA-NEXT: ret
+;
+; RV32IA-TSO-ZABHA-LABEL: atomicrmw_xchg_i16_acquire:
+; RV32IA-TSO-ZABHA: # %bb.0:
+; RV32IA-TSO-ZABHA-NEXT: amoswap.h a0, a1, (a0)
+; RV32IA-TSO-ZABHA-NEXT: ret
+;
; RV64IA-WMO-ZABHA-LABEL: atomicrmw_xchg_i16_acquire:
; RV64IA-WMO-ZABHA: # %bb.0:
; RV64IA-WMO-ZABHA-NEXT: amoswap.h.aq a0, a1, (a0)
@@ -10076,47 +13141,47 @@ define i16 @atomicrmw_xchg_i16_release(ptr %a, i16 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
-; RV32IA-WMO-LABEL: atomicrmw_xchg_i16_release:
-; RV32IA-WMO: # %bb.0:
-; RV32IA-WMO-NEXT: andi a2, a0, -4
-; RV32IA-WMO-NEXT: slli a0, a0, 3
-; RV32IA-WMO-NEXT: lui a3, 16
-; RV32IA-WMO-NEXT: addi a3, a3, -1
-; RV32IA-WMO-NEXT: sll a4, a3, a0
-; RV32IA-WMO-NEXT: and a1, a1, a3
-; RV32IA-WMO-NEXT: sll a1, a1, a0
-; RV32IA-WMO-NEXT: .LBB67_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-WMO-NEXT: lr.w a3, (a2)
-; RV32IA-WMO-NEXT: mv a5, a1
-; RV32IA-WMO-NEXT: xor a5, a3, a5
-; RV32IA-WMO-NEXT: and a5, a5, a4
-; RV32IA-WMO-NEXT: xor a5, a3, a5
-; RV32IA-WMO-NEXT: sc.w.rl a5, a5, (a2)
-; RV32IA-WMO-NEXT: bnez a5, .LBB67_1
-; RV32IA-WMO-NEXT: # %bb.2:
-; RV32IA-WMO-NEXT: srl a0, a3, a0
-; RV32IA-WMO-NEXT: ret
+; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_xchg_i16_release:
+; RV32IA-WMO-NOZACAS: # %bb.0:
+; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-WMO-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-NOZACAS-NEXT: lui a3, 16
+; RV32IA-WMO-NOZACAS-NEXT: addi a3, a3, -1
+; RV32IA-WMO-NOZACAS-NEXT: sll a4, a3, a0
+; RV32IA-WMO-NOZACAS-NEXT: and a1, a1, a3
+; RV32IA-WMO-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-WMO-NOZACAS-NEXT: .LBB67_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-NOZACAS-NEXT: lr.w a3, (a2)
+; RV32IA-WMO-NOZACAS-NEXT: mv a5, a1
+; RV32IA-WMO-NOZACAS-NEXT: xor a5, a3, a5
+; RV32IA-WMO-NOZACAS-NEXT: and a5, a5, a4
+; RV32IA-WMO-NOZACAS-NEXT: xor a5, a3, a5
+; RV32IA-WMO-NOZACAS-NEXT: sc.w.rl a5, a5, (a2)
+; RV32IA-WMO-NOZACAS-NEXT: bnez a5, .LBB67_1
+; RV32IA-WMO-NOZACAS-NEXT: # %bb.2:
+; RV32IA-WMO-NOZACAS-NEXT: srl a0, a3, a0
+; RV32IA-WMO-NOZACAS-NEXT: ret
;
-; RV32IA-TSO-LABEL: atomicrmw_xchg_i16_release:
-; RV32IA-TSO: # %bb.0:
-; RV32IA-TSO-NEXT: andi a2, a0, -4
-; RV32IA-TSO-NEXT: slli a0, a0, 3
-; RV32IA-TSO-NEXT: lui a3, 16
-; RV32IA-TSO-NEXT: addi a3, a3, -1
-; RV32IA-TSO-NEXT: sll a4, a3, a0
-; RV32IA-TSO-NEXT: and a1, a1, a3
-; RV32IA-TSO-NEXT: sll a1, a1, a0
-; RV32IA-TSO-NEXT: .LBB67_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-TSO-NEXT: lr.w a3, (a2)
-; RV32IA-TSO-NEXT: mv a5, a1
-; RV32IA-TSO-NEXT: xor a5, a3, a5
-; RV32IA-TSO-NEXT: and a5, a5, a4
-; RV32IA-TSO-NEXT: xor a5, a3, a5
-; RV32IA-TSO-NEXT: sc.w a5, a5, (a2)
-; RV32IA-TSO-NEXT: bnez a5, .LBB67_1
-; RV32IA-TSO-NEXT: # %bb.2:
-; RV32IA-TSO-NEXT: srl a0, a3, a0
-; RV32IA-TSO-NEXT: ret
+; RV32IA-TSO-NOZACAS-LABEL: atomicrmw_xchg_i16_release:
+; RV32IA-TSO-NOZACAS: # %bb.0:
+; RV32IA-TSO-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-TSO-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-NOZACAS-NEXT: lui a3, 16
+; RV32IA-TSO-NOZACAS-NEXT: addi a3, a3, -1
+; RV32IA-TSO-NOZACAS-NEXT: sll a4, a3, a0
+; RV32IA-TSO-NOZACAS-NEXT: and a1, a1, a3
+; RV32IA-TSO-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-TSO-NOZACAS-NEXT: .LBB67_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-NOZACAS-NEXT: lr.w a3, (a2)
+; RV32IA-TSO-NOZACAS-NEXT: mv a5, a1
+; RV32IA-TSO-NOZACAS-NEXT: xor a5, a3, a5
+; RV32IA-TSO-NOZACAS-NEXT: and a5, a5, a4
+; RV32IA-TSO-NOZACAS-NEXT: xor a5, a3, a5
+; RV32IA-TSO-NOZACAS-NEXT: sc.w a5, a5, (a2)
+; RV32IA-TSO-NOZACAS-NEXT: bnez a5, .LBB67_1
+; RV32IA-TSO-NOZACAS-NEXT: # %bb.2:
+; RV32IA-TSO-NOZACAS-NEXT: srl a0, a3, a0
+; RV32IA-TSO-NOZACAS-NEXT: ret
;
; RV64I-LABEL: atomicrmw_xchg_i16_release:
; RV64I: # %bb.0:
@@ -10170,6 +13235,48 @@ define i16 @atomicrmw_xchg_i16_release(ptr %a, i16 %b) nounwind {
; RV64IA-TSO-NOZACAS-NEXT: srlw a0, a3, a0
; RV64IA-TSO-NOZACAS-NEXT: ret
;
+; RV32IA-WMO-ZACAS-LABEL: atomicrmw_xchg_i16_release:
+; RV32IA-WMO-ZACAS: # %bb.0:
+; RV32IA-WMO-ZACAS-NEXT: andi a2, a0, -4
+; RV32IA-WMO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-ZACAS-NEXT: lui a3, 16
+; RV32IA-WMO-ZACAS-NEXT: addi a3, a3, -1
+; RV32IA-WMO-ZACAS-NEXT: sll a4, a3, a0
+; RV32IA-WMO-ZACAS-NEXT: and a1, a1, a3
+; RV32IA-WMO-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-WMO-ZACAS-NEXT: .LBB67_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-ZACAS-NEXT: lr.w a3, (a2)
+; RV32IA-WMO-ZACAS-NEXT: mv a5, a1
+; RV32IA-WMO-ZACAS-NEXT: xor a5, a3, a5
+; RV32IA-WMO-ZACAS-NEXT: and a5, a5, a4
+; RV32IA-WMO-ZACAS-NEXT: xor a5, a3, a5
+; RV32IA-WMO-ZACAS-NEXT: sc.w.rl a5, a5, (a2)
+; RV32IA-WMO-ZACAS-NEXT: bnez a5, .LBB67_1
+; RV32IA-WMO-ZACAS-NEXT: # %bb.2:
+; RV32IA-WMO-ZACAS-NEXT: srl a0, a3, a0
+; RV32IA-WMO-ZACAS-NEXT: ret
+;
+; RV32IA-TSO-ZACAS-LABEL: atomicrmw_xchg_i16_release:
+; RV32IA-TSO-ZACAS: # %bb.0:
+; RV32IA-TSO-ZACAS-NEXT: andi a2, a0, -4
+; RV32IA-TSO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-ZACAS-NEXT: lui a3, 16
+; RV32IA-TSO-ZACAS-NEXT: addi a3, a3, -1
+; RV32IA-TSO-ZACAS-NEXT: sll a4, a3, a0
+; RV32IA-TSO-ZACAS-NEXT: and a1, a1, a3
+; RV32IA-TSO-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-TSO-ZACAS-NEXT: .LBB67_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-ZACAS-NEXT: lr.w a3, (a2)
+; RV32IA-TSO-ZACAS-NEXT: mv a5, a1
+; RV32IA-TSO-ZACAS-NEXT: xor a5, a3, a5
+; RV32IA-TSO-ZACAS-NEXT: and a5, a5, a4
+; RV32IA-TSO-ZACAS-NEXT: xor a5, a3, a5
+; RV32IA-TSO-ZACAS-NEXT: sc.w a5, a5, (a2)
+; RV32IA-TSO-ZACAS-NEXT: bnez a5, .LBB67_1
+; RV32IA-TSO-ZACAS-NEXT: # %bb.2:
+; RV32IA-TSO-ZACAS-NEXT: srl a0, a3, a0
+; RV32IA-TSO-ZACAS-NEXT: ret
+;
; RV64IA-WMO-ZACAS-LABEL: atomicrmw_xchg_i16_release:
; RV64IA-WMO-ZACAS: # %bb.0:
; RV64IA-WMO-ZACAS-NEXT: andi a2, a0, -4
@@ -10212,6 +13319,16 @@ define i16 @atomicrmw_xchg_i16_release(ptr %a, i16 %b) nounwind {
; RV64IA-TSO-ZACAS-NEXT: srlw a0, a3, a0
; RV64IA-TSO-ZACAS-NEXT: ret
;
+; RV32IA-WMO-ZABHA-LABEL: atomicrmw_xchg_i16_release:
+; RV32IA-WMO-ZABHA: # %bb.0:
+; RV32IA-WMO-ZABHA-NEXT: amoswap.h.rl a0, a1, (a0)
+; RV32IA-WMO-ZABHA-NEXT: ret
+;
+; RV32IA-TSO-ZABHA-LABEL: atomicrmw_xchg_i16_release:
+; RV32IA-TSO-ZABHA: # %bb.0:
+; RV32IA-TSO-ZABHA-NEXT: amoswap.h a0, a1, (a0)
+; RV32IA-TSO-ZABHA-NEXT: ret
+;
; RV64IA-WMO-ZABHA-LABEL: atomicrmw_xchg_i16_release:
; RV64IA-WMO-ZABHA: # %bb.0:
; RV64IA-WMO-ZABHA-NEXT: amoswap.h.rl a0, a1, (a0)
@@ -10236,47 +13353,47 @@ define i16 @atomicrmw_xchg_i16_acq_rel(ptr %a, i16 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
-; RV32IA-WMO-LABEL: atomicrmw_xchg_i16_acq_rel:
-; RV32IA-WMO: # %bb.0:
-; RV32IA-WMO-NEXT: andi a2, a0, -4
-; RV32IA-WMO-NEXT: slli a0, a0, 3
-; RV32IA-WMO-NEXT: lui a3, 16
-; RV32IA-WMO-NEXT: addi a3, a3, -1
-; RV32IA-WMO-NEXT: sll a4, a3, a0
-; RV32IA-WMO-NEXT: and a1, a1, a3
-; RV32IA-WMO-NEXT: sll a1, a1, a0
-; RV32IA-WMO-NEXT: .LBB68_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-WMO-NEXT: lr.w.aq a3, (a2)
-; RV32IA-WMO-NEXT: mv a5, a1
-; RV32IA-WMO-NEXT: xor a5, a3, a5
-; RV32IA-WMO-NEXT: and a5, a5, a4
-; RV32IA-WMO-NEXT: xor a5, a3, a5
-; RV32IA-WMO-NEXT: sc.w.rl a5, a5, (a2)
-; RV32IA-WMO-NEXT: bnez a5, .LBB68_1
-; RV32IA-WMO-NEXT: # %bb.2:
-; RV32IA-WMO-NEXT: srl a0, a3, a0
-; RV32IA-WMO-NEXT: ret
+; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_xchg_i16_acq_rel:
+; RV32IA-WMO-NOZACAS: # %bb.0:
+; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-WMO-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-NOZACAS-NEXT: lui a3, 16
+; RV32IA-WMO-NOZACAS-NEXT: addi a3, a3, -1
+; RV32IA-WMO-NOZACAS-NEXT: sll a4, a3, a0
+; RV32IA-WMO-NOZACAS-NEXT: and a1, a1, a3
+; RV32IA-WMO-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-WMO-NOZACAS-NEXT: .LBB68_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-NOZACAS-NEXT: lr.w.aq a3, (a2)
+; RV32IA-WMO-NOZACAS-NEXT: mv a5, a1
+; RV32IA-WMO-NOZACAS-NEXT: xor a5, a3, a5
+; RV32IA-WMO-NOZACAS-NEXT: and a5, a5, a4
+; RV32IA-WMO-NOZACAS-NEXT: xor a5, a3, a5
+; RV32IA-WMO-NOZACAS-NEXT: sc.w.rl a5, a5, (a2)
+; RV32IA-WMO-NOZACAS-NEXT: bnez a5, .LBB68_1
+; RV32IA-WMO-NOZACAS-NEXT: # %bb.2:
+; RV32IA-WMO-NOZACAS-NEXT: srl a0, a3, a0
+; RV32IA-WMO-NOZACAS-NEXT: ret
;
-; RV32IA-TSO-LABEL: atomicrmw_xchg_i16_acq_rel:
-; RV32IA-TSO: # %bb.0:
-; RV32IA-TSO-NEXT: andi a2, a0, -4
-; RV32IA-TSO-NEXT: slli a0, a0, 3
-; RV32IA-TSO-NEXT: lui a3, 16
-; RV32IA-TSO-NEXT: addi a3, a3, -1
-; RV32IA-TSO-NEXT: sll a4, a3, a0
-; RV32IA-TSO-NEXT: and a1, a1, a3
-; RV32IA-TSO-NEXT: sll a1, a1, a0
-; RV32IA-TSO-NEXT: .LBB68_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-TSO-NEXT: lr.w a3, (a2)
-; RV32IA-TSO-NEXT: mv a5, a1
-; RV32IA-TSO-NEXT: xor a5, a3, a5
-; RV32IA-TSO-NEXT: and a5, a5, a4
-; RV32IA-TSO-NEXT: xor a5, a3, a5
-; RV32IA-TSO-NEXT: sc.w a5, a5, (a2)
-; RV32IA-TSO-NEXT: bnez a5, .LBB68_1
-; RV32IA-TSO-NEXT: # %bb.2:
-; RV32IA-TSO-NEXT: srl a0, a3, a0
-; RV32IA-TSO-NEXT: ret
+; RV32IA-TSO-NOZACAS-LABEL: atomicrmw_xchg_i16_acq_rel:
+; RV32IA-TSO-NOZACAS: # %bb.0:
+; RV32IA-TSO-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-TSO-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-NOZACAS-NEXT: lui a3, 16
+; RV32IA-TSO-NOZACAS-NEXT: addi a3, a3, -1
+; RV32IA-TSO-NOZACAS-NEXT: sll a4, a3, a0
+; RV32IA-TSO-NOZACAS-NEXT: and a1, a1, a3
+; RV32IA-TSO-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-TSO-NOZACAS-NEXT: .LBB68_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-NOZACAS-NEXT: lr.w a3, (a2)
+; RV32IA-TSO-NOZACAS-NEXT: mv a5, a1
+; RV32IA-TSO-NOZACAS-NEXT: xor a5, a3, a5
+; RV32IA-TSO-NOZACAS-NEXT: and a5, a5, a4
+; RV32IA-TSO-NOZACAS-NEXT: xor a5, a3, a5
+; RV32IA-TSO-NOZACAS-NEXT: sc.w a5, a5, (a2)
+; RV32IA-TSO-NOZACAS-NEXT: bnez a5, .LBB68_1
+; RV32IA-TSO-NOZACAS-NEXT: # %bb.2:
+; RV32IA-TSO-NOZACAS-NEXT: srl a0, a3, a0
+; RV32IA-TSO-NOZACAS-NEXT: ret
;
; RV64I-LABEL: atomicrmw_xchg_i16_acq_rel:
; RV64I: # %bb.0:
@@ -10330,6 +13447,48 @@ define i16 @atomicrmw_xchg_i16_acq_rel(ptr %a, i16 %b) nounwind {
; RV64IA-TSO-NOZACAS-NEXT: srlw a0, a3, a0
; RV64IA-TSO-NOZACAS-NEXT: ret
;
+; RV32IA-WMO-ZACAS-LABEL: atomicrmw_xchg_i16_acq_rel:
+; RV32IA-WMO-ZACAS: # %bb.0:
+; RV32IA-WMO-ZACAS-NEXT: andi a2, a0, -4
+; RV32IA-WMO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-ZACAS-NEXT: lui a3, 16
+; RV32IA-WMO-ZACAS-NEXT: addi a3, a3, -1
+; RV32IA-WMO-ZACAS-NEXT: sll a4, a3, a0
+; RV32IA-WMO-ZACAS-NEXT: and a1, a1, a3
+; RV32IA-WMO-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-WMO-ZACAS-NEXT: .LBB68_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-ZACAS-NEXT: lr.w.aq a3, (a2)
+; RV32IA-WMO-ZACAS-NEXT: mv a5, a1
+; RV32IA-WMO-ZACAS-NEXT: xor a5, a3, a5
+; RV32IA-WMO-ZACAS-NEXT: and a5, a5, a4
+; RV32IA-WMO-ZACAS-NEXT: xor a5, a3, a5
+; RV32IA-WMO-ZACAS-NEXT: sc.w.rl a5, a5, (a2)
+; RV32IA-WMO-ZACAS-NEXT: bnez a5, .LBB68_1
+; RV32IA-WMO-ZACAS-NEXT: # %bb.2:
+; RV32IA-WMO-ZACAS-NEXT: srl a0, a3, a0
+; RV32IA-WMO-ZACAS-NEXT: ret
+;
+; RV32IA-TSO-ZACAS-LABEL: atomicrmw_xchg_i16_acq_rel:
+; RV32IA-TSO-ZACAS: # %bb.0:
+; RV32IA-TSO-ZACAS-NEXT: andi a2, a0, -4
+; RV32IA-TSO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-ZACAS-NEXT: lui a3, 16
+; RV32IA-TSO-ZACAS-NEXT: addi a3, a3, -1
+; RV32IA-TSO-ZACAS-NEXT: sll a4, a3, a0
+; RV32IA-TSO-ZACAS-NEXT: and a1, a1, a3
+; RV32IA-TSO-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-TSO-ZACAS-NEXT: .LBB68_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-ZACAS-NEXT: lr.w a3, (a2)
+; RV32IA-TSO-ZACAS-NEXT: mv a5, a1
+; RV32IA-TSO-ZACAS-NEXT: xor a5, a3, a5
+; RV32IA-TSO-ZACAS-NEXT: and a5, a5, a4
+; RV32IA-TSO-ZACAS-NEXT: xor a5, a3, a5
+; RV32IA-TSO-ZACAS-NEXT: sc.w a5, a5, (a2)
+; RV32IA-TSO-ZACAS-NEXT: bnez a5, .LBB68_1
+; RV32IA-TSO-ZACAS-NEXT: # %bb.2:
+; RV32IA-TSO-ZACAS-NEXT: srl a0, a3, a0
+; RV32IA-TSO-ZACAS-NEXT: ret
+;
; RV64IA-WMO-ZACAS-LABEL: atomicrmw_xchg_i16_acq_rel:
; RV64IA-WMO-ZACAS: # %bb.0:
; RV64IA-WMO-ZACAS-NEXT: andi a2, a0, -4
@@ -10372,6 +13531,16 @@ define i16 @atomicrmw_xchg_i16_acq_rel(ptr %a, i16 %b) nounwind {
; RV64IA-TSO-ZACAS-NEXT: srlw a0, a3, a0
; RV64IA-TSO-ZACAS-NEXT: ret
;
+; RV32IA-WMO-ZABHA-LABEL: atomicrmw_xchg_i16_acq_rel:
+; RV32IA-WMO-ZABHA: # %bb.0:
+; RV32IA-WMO-ZABHA-NEXT: amoswap.h.aqrl a0, a1, (a0)
+; RV32IA-WMO-ZABHA-NEXT: ret
+;
+; RV32IA-TSO-ZABHA-LABEL: atomicrmw_xchg_i16_acq_rel:
+; RV32IA-TSO-ZABHA: # %bb.0:
+; RV32IA-TSO-ZABHA-NEXT: amoswap.h a0, a1, (a0)
+; RV32IA-TSO-ZABHA-NEXT: ret
+;
; RV64IA-WMO-ZABHA-LABEL: atomicrmw_xchg_i16_acq_rel:
; RV64IA-WMO-ZABHA: # %bb.0:
; RV64IA-WMO-ZABHA-NEXT: amoswap.h.aqrl a0, a1, (a0)
@@ -10396,26 +13565,26 @@ define i16 @atomicrmw_xchg_i16_seq_cst(ptr %a, i16 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
-; RV32IA-LABEL: atomicrmw_xchg_i16_seq_cst:
-; RV32IA: # %bb.0:
-; RV32IA-NEXT: andi a2, a0, -4
-; RV32IA-NEXT: slli a0, a0, 3
-; RV32IA-NEXT: lui a3, 16
-; RV32IA-NEXT: addi a3, a3, -1
-; RV32IA-NEXT: sll a4, a3, a0
-; RV32IA-NEXT: and a1, a1, a3
-; RV32IA-NEXT: sll a1, a1, a0
-; RV32IA-NEXT: .LBB69_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-NEXT: lr.w.aqrl a3, (a2)
-; RV32IA-NEXT: mv a5, a1
-; RV32IA-NEXT: xor a5, a3, a5
-; RV32IA-NEXT: and a5, a5, a4
-; RV32IA-NEXT: xor a5, a3, a5
-; RV32IA-NEXT: sc.w.rl a5, a5, (a2)
-; RV32IA-NEXT: bnez a5, .LBB69_1
-; RV32IA-NEXT: # %bb.2:
-; RV32IA-NEXT: srl a0, a3, a0
-; RV32IA-NEXT: ret
+; RV32IA-NOZACAS-LABEL: atomicrmw_xchg_i16_seq_cst:
+; RV32IA-NOZACAS: # %bb.0:
+; RV32IA-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-NOZACAS-NEXT: lui a3, 16
+; RV32IA-NOZACAS-NEXT: addi a3, a3, -1
+; RV32IA-NOZACAS-NEXT: sll a4, a3, a0
+; RV32IA-NOZACAS-NEXT: and a1, a1, a3
+; RV32IA-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-NOZACAS-NEXT: .LBB69_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-NOZACAS-NEXT: lr.w.aqrl a3, (a2)
+; RV32IA-NOZACAS-NEXT: mv a5, a1
+; RV32IA-NOZACAS-NEXT: xor a5, a3, a5
+; RV32IA-NOZACAS-NEXT: and a5, a5, a4
+; RV32IA-NOZACAS-NEXT: xor a5, a3, a5
+; RV32IA-NOZACAS-NEXT: sc.w.rl a5, a5, (a2)
+; RV32IA-NOZACAS-NEXT: bnez a5, .LBB69_1
+; RV32IA-NOZACAS-NEXT: # %bb.2:
+; RV32IA-NOZACAS-NEXT: srl a0, a3, a0
+; RV32IA-NOZACAS-NEXT: ret
;
; RV64I-LABEL: atomicrmw_xchg_i16_seq_cst:
; RV64I: # %bb.0:
@@ -10448,6 +13617,27 @@ define i16 @atomicrmw_xchg_i16_seq_cst(ptr %a, i16 %b) nounwind {
; RV64IA-NOZACAS-NEXT: srlw a0, a3, a0
; RV64IA-NOZACAS-NEXT: ret
;
+; RV32IA-ZACAS-LABEL: atomicrmw_xchg_i16_seq_cst:
+; RV32IA-ZACAS: # %bb.0:
+; RV32IA-ZACAS-NEXT: andi a2, a0, -4
+; RV32IA-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-ZACAS-NEXT: lui a3, 16
+; RV32IA-ZACAS-NEXT: addi a3, a3, -1
+; RV32IA-ZACAS-NEXT: sll a4, a3, a0
+; RV32IA-ZACAS-NEXT: and a1, a1, a3
+; RV32IA-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-ZACAS-NEXT: .LBB69_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-ZACAS-NEXT: lr.w.aqrl a3, (a2)
+; RV32IA-ZACAS-NEXT: mv a5, a1
+; RV32IA-ZACAS-NEXT: xor a5, a3, a5
+; RV32IA-ZACAS-NEXT: and a5, a5, a4
+; RV32IA-ZACAS-NEXT: xor a5, a3, a5
+; RV32IA-ZACAS-NEXT: sc.w.rl a5, a5, (a2)
+; RV32IA-ZACAS-NEXT: bnez a5, .LBB69_1
+; RV32IA-ZACAS-NEXT: # %bb.2:
+; RV32IA-ZACAS-NEXT: srl a0, a3, a0
+; RV32IA-ZACAS-NEXT: ret
+;
; RV64IA-ZACAS-LABEL: atomicrmw_xchg_i16_seq_cst:
; RV64IA-ZACAS: # %bb.0:
; RV64IA-ZACAS-NEXT: andi a2, a0, -4
@@ -10469,6 +13659,16 @@ define i16 @atomicrmw_xchg_i16_seq_cst(ptr %a, i16 %b) nounwind {
; RV64IA-ZACAS-NEXT: srlw a0, a3, a0
; RV64IA-ZACAS-NEXT: ret
;
+; RV32IA-WMO-ZABHA-LABEL: atomicrmw_xchg_i16_seq_cst:
+; RV32IA-WMO-ZABHA: # %bb.0:
+; RV32IA-WMO-ZABHA-NEXT: amoswap.h.aqrl a0, a1, (a0)
+; RV32IA-WMO-ZABHA-NEXT: ret
+;
+; RV32IA-TSO-ZABHA-LABEL: atomicrmw_xchg_i16_seq_cst:
+; RV32IA-TSO-ZABHA: # %bb.0:
+; RV32IA-TSO-ZABHA-NEXT: amoswap.h a0, a1, (a0)
+; RV32IA-TSO-ZABHA-NEXT: ret
+;
; RV64IA-WMO-ZABHA-LABEL: atomicrmw_xchg_i16_seq_cst:
; RV64IA-WMO-ZABHA: # %bb.0:
; RV64IA-WMO-ZABHA-NEXT: amoswap.h.aqrl a0, a1, (a0)
@@ -10497,17 +13697,17 @@ define i16 @atomicrmw_xchg_0_i16_monotonic(ptr %a) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
-; RV32IA-LABEL: atomicrmw_xchg_0_i16_monotonic:
-; RV32IA: # %bb.0:
-; RV32IA-NEXT: andi a1, a0, -4
-; RV32IA-NEXT: slli a0, a0, 3
-; RV32IA-NEXT: lui a2, 16
-; RV32IA-NEXT: addi a2, a2, -1
-; RV32IA-NEXT: sll a2, a2, a0
-; RV32IA-NEXT: not a2, a2
-; RV32IA-NEXT: amoand.w a1, a2, (a1)
-; RV32IA-NEXT: srl a0, a1, a0
-; RV32IA-NEXT: ret
+; RV32IA-NOZACAS-LABEL: atomicrmw_xchg_0_i16_monotonic:
+; RV32IA-NOZACAS: # %bb.0:
+; RV32IA-NOZACAS-NEXT: andi a1, a0, -4
+; RV32IA-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-NOZACAS-NEXT: lui a2, 16
+; RV32IA-NOZACAS-NEXT: addi a2, a2, -1
+; RV32IA-NOZACAS-NEXT: sll a2, a2, a0
+; RV32IA-NOZACAS-NEXT: not a2, a2
+; RV32IA-NOZACAS-NEXT: amoand.w a1, a2, (a1)
+; RV32IA-NOZACAS-NEXT: srl a0, a1, a0
+; RV32IA-NOZACAS-NEXT: ret
;
; RV64I-LABEL: atomicrmw_xchg_0_i16_monotonic:
; RV64I: # %bb.0:
@@ -10532,6 +13732,18 @@ define i16 @atomicrmw_xchg_0_i16_monotonic(ptr %a) nounwind {
; RV64IA-NOZACAS-NEXT: srlw a0, a1, a0
; RV64IA-NOZACAS-NEXT: ret
;
+; RV32IA-ZACAS-LABEL: atomicrmw_xchg_0_i16_monotonic:
+; RV32IA-ZACAS: # %bb.0:
+; RV32IA-ZACAS-NEXT: andi a1, a0, -4
+; RV32IA-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-ZACAS-NEXT: lui a2, 16
+; RV32IA-ZACAS-NEXT: addi a2, a2, -1
+; RV32IA-ZACAS-NEXT: sll a2, a2, a0
+; RV32IA-ZACAS-NEXT: not a2, a2
+; RV32IA-ZACAS-NEXT: amoand.w a1, a2, (a1)
+; RV32IA-ZACAS-NEXT: srl a0, a1, a0
+; RV32IA-ZACAS-NEXT: ret
+;
; RV64IA-ZACAS-LABEL: atomicrmw_xchg_0_i16_monotonic:
; RV64IA-ZACAS: # %bb.0:
; RV64IA-ZACAS-NEXT: andi a1, a0, -4
@@ -10544,6 +13756,16 @@ define i16 @atomicrmw_xchg_0_i16_monotonic(ptr %a) nounwind {
; RV64IA-ZACAS-NEXT: srlw a0, a1, a0
; RV64IA-ZACAS-NEXT: ret
;
+; RV32IA-WMO-ZABHA-LABEL: atomicrmw_xchg_0_i16_monotonic:
+; RV32IA-WMO-ZABHA: # %bb.0:
+; RV32IA-WMO-ZABHA-NEXT: amoswap.h a0, zero, (a0)
+; RV32IA-WMO-ZABHA-NEXT: ret
+;
+; RV32IA-TSO-ZABHA-LABEL: atomicrmw_xchg_0_i16_monotonic:
+; RV32IA-TSO-ZABHA: # %bb.0:
+; RV32IA-TSO-ZABHA-NEXT: amoswap.h a0, zero, (a0)
+; RV32IA-TSO-ZABHA-NEXT: ret
+;
; RV64IA-WMO-ZABHA-LABEL: atomicrmw_xchg_0_i16_monotonic:
; RV64IA-WMO-ZABHA: # %bb.0:
; RV64IA-WMO-ZABHA-NEXT: amoswap.h a0, zero, (a0)
@@ -10569,29 +13791,29 @@ define i16 @atomicrmw_xchg_0_i16_acquire(ptr %a) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
-; RV32IA-WMO-LABEL: atomicrmw_xchg_0_i16_acquire:
-; RV32IA-WMO: # %bb.0:
-; RV32IA-WMO-NEXT: andi a1, a0, -4
-; RV32IA-WMO-NEXT: slli a0, a0, 3
-; RV32IA-WMO-NEXT: lui a2, 16
-; RV32IA-WMO-NEXT: addi a2, a2, -1
-; RV32IA-WMO-NEXT: sll a2, a2, a0
-; RV32IA-WMO-NEXT: not a2, a2
-; RV32IA-WMO-NEXT: amoand.w.aq a1, a2, (a1)
-; RV32IA-WMO-NEXT: srl a0, a1, a0
-; RV32IA-WMO-NEXT: ret
+; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_xchg_0_i16_acquire:
+; RV32IA-WMO-NOZACAS: # %bb.0:
+; RV32IA-WMO-NOZACAS-NEXT: andi a1, a0, -4
+; RV32IA-WMO-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-NOZACAS-NEXT: lui a2, 16
+; RV32IA-WMO-NOZACAS-NEXT: addi a2, a2, -1
+; RV32IA-WMO-NOZACAS-NEXT: sll a2, a2, a0
+; RV32IA-WMO-NOZACAS-NEXT: not a2, a2
+; RV32IA-WMO-NOZACAS-NEXT: amoand.w.aq a1, a2, (a1)
+; RV32IA-WMO-NOZACAS-NEXT: srl a0, a1, a0
+; RV32IA-WMO-NOZACAS-NEXT: ret
;
-; RV32IA-TSO-LABEL: atomicrmw_xchg_0_i16_acquire:
-; RV32IA-TSO: # %bb.0:
-; RV32IA-TSO-NEXT: andi a1, a0, -4
-; RV32IA-TSO-NEXT: slli a0, a0, 3
-; RV32IA-TSO-NEXT: lui a2, 16
-; RV32IA-TSO-NEXT: addi a2, a2, -1
-; RV32IA-TSO-NEXT: sll a2, a2, a0
-; RV32IA-TSO-NEXT: not a2, a2
-; RV32IA-TSO-NEXT: amoand.w a1, a2, (a1)
-; RV32IA-TSO-NEXT: srl a0, a1, a0
-; RV32IA-TSO-NEXT: ret
+; RV32IA-TSO-NOZACAS-LABEL: atomicrmw_xchg_0_i16_acquire:
+; RV32IA-TSO-NOZACAS: # %bb.0:
+; RV32IA-TSO-NOZACAS-NEXT: andi a1, a0, -4
+; RV32IA-TSO-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-NOZACAS-NEXT: lui a2, 16
+; RV32IA-TSO-NOZACAS-NEXT: addi a2, a2, -1
+; RV32IA-TSO-NOZACAS-NEXT: sll a2, a2, a0
+; RV32IA-TSO-NOZACAS-NEXT: not a2, a2
+; RV32IA-TSO-NOZACAS-NEXT: amoand.w a1, a2, (a1)
+; RV32IA-TSO-NOZACAS-NEXT: srl a0, a1, a0
+; RV32IA-TSO-NOZACAS-NEXT: ret
;
; RV64I-LABEL: atomicrmw_xchg_0_i16_acquire:
; RV64I: # %bb.0:
@@ -10628,6 +13850,30 @@ define i16 @atomicrmw_xchg_0_i16_acquire(ptr %a) nounwind {
; RV64IA-TSO-NOZACAS-NEXT: srlw a0, a1, a0
; RV64IA-TSO-NOZACAS-NEXT: ret
;
+; RV32IA-WMO-ZACAS-LABEL: atomicrmw_xchg_0_i16_acquire:
+; RV32IA-WMO-ZACAS: # %bb.0:
+; RV32IA-WMO-ZACAS-NEXT: andi a1, a0, -4
+; RV32IA-WMO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-ZACAS-NEXT: lui a2, 16
+; RV32IA-WMO-ZACAS-NEXT: addi a2, a2, -1
+; RV32IA-WMO-ZACAS-NEXT: sll a2, a2, a0
+; RV32IA-WMO-ZACAS-NEXT: not a2, a2
+; RV32IA-WMO-ZACAS-NEXT: amoand.w.aq a1, a2, (a1)
+; RV32IA-WMO-ZACAS-NEXT: srl a0, a1, a0
+; RV32IA-WMO-ZACAS-NEXT: ret
+;
+; RV32IA-TSO-ZACAS-LABEL: atomicrmw_xchg_0_i16_acquire:
+; RV32IA-TSO-ZACAS: # %bb.0:
+; RV32IA-TSO-ZACAS-NEXT: andi a1, a0, -4
+; RV32IA-TSO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-ZACAS-NEXT: lui a2, 16
+; RV32IA-TSO-ZACAS-NEXT: addi a2, a2, -1
+; RV32IA-TSO-ZACAS-NEXT: sll a2, a2, a0
+; RV32IA-TSO-ZACAS-NEXT: not a2, a2
+; RV32IA-TSO-ZACAS-NEXT: amoand.w a1, a2, (a1)
+; RV32IA-TSO-ZACAS-NEXT: srl a0, a1, a0
+; RV32IA-TSO-ZACAS-NEXT: ret
+;
; RV64IA-WMO-ZACAS-LABEL: atomicrmw_xchg_0_i16_acquire:
; RV64IA-WMO-ZACAS: # %bb.0:
; RV64IA-WMO-ZACAS-NEXT: andi a1, a0, -4
@@ -10652,6 +13898,16 @@ define i16 @atomicrmw_xchg_0_i16_acquire(ptr %a) nounwind {
; RV64IA-TSO-ZACAS-NEXT: srlw a0, a1, a0
; RV64IA-TSO-ZACAS-NEXT: ret
;
+; RV32IA-WMO-ZABHA-LABEL: atomicrmw_xchg_0_i16_acquire:
+; RV32IA-WMO-ZABHA: # %bb.0:
+; RV32IA-WMO-ZABHA-NEXT: amoswap.h.aq a0, zero, (a0)
+; RV32IA-WMO-ZABHA-NEXT: ret
+;
+; RV32IA-TSO-ZABHA-LABEL: atomicrmw_xchg_0_i16_acquire:
+; RV32IA-TSO-ZABHA: # %bb.0:
+; RV32IA-TSO-ZABHA-NEXT: amoswap.h a0, zero, (a0)
+; RV32IA-TSO-ZABHA-NEXT: ret
+;
; RV64IA-WMO-ZABHA-LABEL: atomicrmw_xchg_0_i16_acquire:
; RV64IA-WMO-ZABHA: # %bb.0:
; RV64IA-WMO-ZABHA-NEXT: amoswap.h.aq a0, zero, (a0)
@@ -10677,29 +13933,29 @@ define i16 @atomicrmw_xchg_0_i16_release(ptr %a) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
-; RV32IA-WMO-LABEL: atomicrmw_xchg_0_i16_release:
-; RV32IA-WMO: # %bb.0:
-; RV32IA-WMO-NEXT: andi a1, a0, -4
-; RV32IA-WMO-NEXT: slli a0, a0, 3
-; RV32IA-WMO-NEXT: lui a2, 16
-; RV32IA-WMO-NEXT: addi a2, a2, -1
-; RV32IA-WMO-NEXT: sll a2, a2, a0
-; RV32IA-WMO-NEXT: not a2, a2
-; RV32IA-WMO-NEXT: amoand.w.rl a1, a2, (a1)
-; RV32IA-WMO-NEXT: srl a0, a1, a0
-; RV32IA-WMO-NEXT: ret
+; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_xchg_0_i16_release:
+; RV32IA-WMO-NOZACAS: # %bb.0:
+; RV32IA-WMO-NOZACAS-NEXT: andi a1, a0, -4
+; RV32IA-WMO-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-NOZACAS-NEXT: lui a2, 16
+; RV32IA-WMO-NOZACAS-NEXT: addi a2, a2, -1
+; RV32IA-WMO-NOZACAS-NEXT: sll a2, a2, a0
+; RV32IA-WMO-NOZACAS-NEXT: not a2, a2
+; RV32IA-WMO-NOZACAS-NEXT: amoand.w.rl a1, a2, (a1)
+; RV32IA-WMO-NOZACAS-NEXT: srl a0, a1, a0
+; RV32IA-WMO-NOZACAS-NEXT: ret
;
-; RV32IA-TSO-LABEL: atomicrmw_xchg_0_i16_release:
-; RV32IA-TSO: # %bb.0:
-; RV32IA-TSO-NEXT: andi a1, a0, -4
-; RV32IA-TSO-NEXT: slli a0, a0, 3
-; RV32IA-TSO-NEXT: lui a2, 16
-; RV32IA-TSO-NEXT: addi a2, a2, -1
-; RV32IA-TSO-NEXT: sll a2, a2, a0
-; RV32IA-TSO-NEXT: not a2, a2
-; RV32IA-TSO-NEXT: amoand.w a1, a2, (a1)
-; RV32IA-TSO-NEXT: srl a0, a1, a0
-; RV32IA-TSO-NEXT: ret
+; RV32IA-TSO-NOZACAS-LABEL: atomicrmw_xchg_0_i16_release:
+; RV32IA-TSO-NOZACAS: # %bb.0:
+; RV32IA-TSO-NOZACAS-NEXT: andi a1, a0, -4
+; RV32IA-TSO-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-NOZACAS-NEXT: lui a2, 16
+; RV32IA-TSO-NOZACAS-NEXT: addi a2, a2, -1
+; RV32IA-TSO-NOZACAS-NEXT: sll a2, a2, a0
+; RV32IA-TSO-NOZACAS-NEXT: not a2, a2
+; RV32IA-TSO-NOZACAS-NEXT: amoand.w a1, a2, (a1)
+; RV32IA-TSO-NOZACAS-NEXT: srl a0, a1, a0
+; RV32IA-TSO-NOZACAS-NEXT: ret
;
; RV64I-LABEL: atomicrmw_xchg_0_i16_release:
; RV64I: # %bb.0:
@@ -10736,6 +13992,30 @@ define i16 @atomicrmw_xchg_0_i16_release(ptr %a) nounwind {
; RV64IA-TSO-NOZACAS-NEXT: srlw a0, a1, a0
; RV64IA-TSO-NOZACAS-NEXT: ret
;
+; RV32IA-WMO-ZACAS-LABEL: atomicrmw_xchg_0_i16_release:
+; RV32IA-WMO-ZACAS: # %bb.0:
+; RV32IA-WMO-ZACAS-NEXT: andi a1, a0, -4
+; RV32IA-WMO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-ZACAS-NEXT: lui a2, 16
+; RV32IA-WMO-ZACAS-NEXT: addi a2, a2, -1
+; RV32IA-WMO-ZACAS-NEXT: sll a2, a2, a0
+; RV32IA-WMO-ZACAS-NEXT: not a2, a2
+; RV32IA-WMO-ZACAS-NEXT: amoand.w.rl a1, a2, (a1)
+; RV32IA-WMO-ZACAS-NEXT: srl a0, a1, a0
+; RV32IA-WMO-ZACAS-NEXT: ret
+;
+; RV32IA-TSO-ZACAS-LABEL: atomicrmw_xchg_0_i16_release:
+; RV32IA-TSO-ZACAS: # %bb.0:
+; RV32IA-TSO-ZACAS-NEXT: andi a1, a0, -4
+; RV32IA-TSO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-ZACAS-NEXT: lui a2, 16
+; RV32IA-TSO-ZACAS-NEXT: addi a2, a2, -1
+; RV32IA-TSO-ZACAS-NEXT: sll a2, a2, a0
+; RV32IA-TSO-ZACAS-NEXT: not a2, a2
+; RV32IA-TSO-ZACAS-NEXT: amoand.w a1, a2, (a1)
+; RV32IA-TSO-ZACAS-NEXT: srl a0, a1, a0
+; RV32IA-TSO-ZACAS-NEXT: ret
+;
; RV64IA-WMO-ZACAS-LABEL: atomicrmw_xchg_0_i16_release:
; RV64IA-WMO-ZACAS: # %bb.0:
; RV64IA-WMO-ZACAS-NEXT: andi a1, a0, -4
@@ -10760,6 +14040,16 @@ define i16 @atomicrmw_xchg_0_i16_release(ptr %a) nounwind {
; RV64IA-TSO-ZACAS-NEXT: srlw a0, a1, a0
; RV64IA-TSO-ZACAS-NEXT: ret
;
+; RV32IA-WMO-ZABHA-LABEL: atomicrmw_xchg_0_i16_release:
+; RV32IA-WMO-ZABHA: # %bb.0:
+; RV32IA-WMO-ZABHA-NEXT: amoswap.h.rl a0, zero, (a0)
+; RV32IA-WMO-ZABHA-NEXT: ret
+;
+; RV32IA-TSO-ZABHA-LABEL: atomicrmw_xchg_0_i16_release:
+; RV32IA-TSO-ZABHA: # %bb.0:
+; RV32IA-TSO-ZABHA-NEXT: amoswap.h a0, zero, (a0)
+; RV32IA-TSO-ZABHA-NEXT: ret
+;
; RV64IA-WMO-ZABHA-LABEL: atomicrmw_xchg_0_i16_release:
; RV64IA-WMO-ZABHA: # %bb.0:
; RV64IA-WMO-ZABHA-NEXT: amoswap.h.rl a0, zero, (a0)
@@ -10785,29 +14075,29 @@ define i16 @atomicrmw_xchg_0_i16_acq_rel(ptr %a) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
-; RV32IA-WMO-LABEL: atomicrmw_xchg_0_i16_acq_rel:
-; RV32IA-WMO: # %bb.0:
-; RV32IA-WMO-NEXT: andi a1, a0, -4
-; RV32IA-WMO-NEXT: slli a0, a0, 3
-; RV32IA-WMO-NEXT: lui a2, 16
-; RV32IA-WMO-NEXT: addi a2, a2, -1
-; RV32IA-WMO-NEXT: sll a2, a2, a0
-; RV32IA-WMO-NEXT: not a2, a2
-; RV32IA-WMO-NEXT: amoand.w.aqrl a1, a2, (a1)
-; RV32IA-WMO-NEXT: srl a0, a1, a0
-; RV32IA-WMO-NEXT: ret
+; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_xchg_0_i16_acq_rel:
+; RV32IA-WMO-NOZACAS: # %bb.0:
+; RV32IA-WMO-NOZACAS-NEXT: andi a1, a0, -4
+; RV32IA-WMO-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-NOZACAS-NEXT: lui a2, 16
+; RV32IA-WMO-NOZACAS-NEXT: addi a2, a2, -1
+; RV32IA-WMO-NOZACAS-NEXT: sll a2, a2, a0
+; RV32IA-WMO-NOZACAS-NEXT: not a2, a2
+; RV32IA-WMO-NOZACAS-NEXT: amoand.w.aqrl a1, a2, (a1)
+; RV32IA-WMO-NOZACAS-NEXT: srl a0, a1, a0
+; RV32IA-WMO-NOZACAS-NEXT: ret
;
-; RV32IA-TSO-LABEL: atomicrmw_xchg_0_i16_acq_rel:
-; RV32IA-TSO: # %bb.0:
-; RV32IA-TSO-NEXT: andi a1, a0, -4
-; RV32IA-TSO-NEXT: slli a0, a0, 3
-; RV32IA-TSO-NEXT: lui a2, 16
-; RV32IA-TSO-NEXT: addi a2, a2, -1
-; RV32IA-TSO-NEXT: sll a2, a2, a0
-; RV32IA-TSO-NEXT: not a2, a2
-; RV32IA-TSO-NEXT: amoand.w a1, a2, (a1)
-; RV32IA-TSO-NEXT: srl a0, a1, a0
-; RV32IA-TSO-NEXT: ret
+; RV32IA-TSO-NOZACAS-LABEL: atomicrmw_xchg_0_i16_acq_rel:
+; RV32IA-TSO-NOZACAS: # %bb.0:
+; RV32IA-TSO-NOZACAS-NEXT: andi a1, a0, -4
+; RV32IA-TSO-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-NOZACAS-NEXT: lui a2, 16
+; RV32IA-TSO-NOZACAS-NEXT: addi a2, a2, -1
+; RV32IA-TSO-NOZACAS-NEXT: sll a2, a2, a0
+; RV32IA-TSO-NOZACAS-NEXT: not a2, a2
+; RV32IA-TSO-NOZACAS-NEXT: amoand.w a1, a2, (a1)
+; RV32IA-TSO-NOZACAS-NEXT: srl a0, a1, a0
+; RV32IA-TSO-NOZACAS-NEXT: ret
;
; RV64I-LABEL: atomicrmw_xchg_0_i16_acq_rel:
; RV64I: # %bb.0:
@@ -10844,6 +14134,30 @@ define i16 @atomicrmw_xchg_0_i16_acq_rel(ptr %a) nounwind {
; RV64IA-TSO-NOZACAS-NEXT: srlw a0, a1, a0
; RV64IA-TSO-NOZACAS-NEXT: ret
;
+; RV32IA-WMO-ZACAS-LABEL: atomicrmw_xchg_0_i16_acq_rel:
+; RV32IA-WMO-ZACAS: # %bb.0:
+; RV32IA-WMO-ZACAS-NEXT: andi a1, a0, -4
+; RV32IA-WMO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-ZACAS-NEXT: lui a2, 16
+; RV32IA-WMO-ZACAS-NEXT: addi a2, a2, -1
+; RV32IA-WMO-ZACAS-NEXT: sll a2, a2, a0
+; RV32IA-WMO-ZACAS-NEXT: not a2, a2
+; RV32IA-WMO-ZACAS-NEXT: amoand.w.aqrl a1, a2, (a1)
+; RV32IA-WMO-ZACAS-NEXT: srl a0, a1, a0
+; RV32IA-WMO-ZACAS-NEXT: ret
+;
+; RV32IA-TSO-ZACAS-LABEL: atomicrmw_xchg_0_i16_acq_rel:
+; RV32IA-TSO-ZACAS: # %bb.0:
+; RV32IA-TSO-ZACAS-NEXT: andi a1, a0, -4
+; RV32IA-TSO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-ZACAS-NEXT: lui a2, 16
+; RV32IA-TSO-ZACAS-NEXT: addi a2, a2, -1
+; RV32IA-TSO-ZACAS-NEXT: sll a2, a2, a0
+; RV32IA-TSO-ZACAS-NEXT: not a2, a2
+; RV32IA-TSO-ZACAS-NEXT: amoand.w a1, a2, (a1)
+; RV32IA-TSO-ZACAS-NEXT: srl a0, a1, a0
+; RV32IA-TSO-ZACAS-NEXT: ret
+;
; RV64IA-WMO-ZACAS-LABEL: atomicrmw_xchg_0_i16_acq_rel:
; RV64IA-WMO-ZACAS: # %bb.0:
; RV64IA-WMO-ZACAS-NEXT: andi a1, a0, -4
@@ -10868,6 +14182,16 @@ define i16 @atomicrmw_xchg_0_i16_acq_rel(ptr %a) nounwind {
; RV64IA-TSO-ZACAS-NEXT: srlw a0, a1, a0
; RV64IA-TSO-ZACAS-NEXT: ret
;
+; RV32IA-WMO-ZABHA-LABEL: atomicrmw_xchg_0_i16_acq_rel:
+; RV32IA-WMO-ZABHA: # %bb.0:
+; RV32IA-WMO-ZABHA-NEXT: amoswap.h.aqrl a0, zero, (a0)
+; RV32IA-WMO-ZABHA-NEXT: ret
+;
+; RV32IA-TSO-ZABHA-LABEL: atomicrmw_xchg_0_i16_acq_rel:
+; RV32IA-TSO-ZABHA: # %bb.0:
+; RV32IA-TSO-ZABHA-NEXT: amoswap.h a0, zero, (a0)
+; RV32IA-TSO-ZABHA-NEXT: ret
+;
; RV64IA-WMO-ZABHA-LABEL: atomicrmw_xchg_0_i16_acq_rel:
; RV64IA-WMO-ZABHA: # %bb.0:
; RV64IA-WMO-ZABHA-NEXT: amoswap.h.aqrl a0, zero, (a0)
@@ -10893,29 +14217,29 @@ define i16 @atomicrmw_xchg_0_i16_seq_cst(ptr %a) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
-; RV32IA-WMO-LABEL: atomicrmw_xchg_0_i16_seq_cst:
-; RV32IA-WMO: # %bb.0:
-; RV32IA-WMO-NEXT: andi a1, a0, -4
-; RV32IA-WMO-NEXT: slli a0, a0, 3
-; RV32IA-WMO-NEXT: lui a2, 16
-; RV32IA-WMO-NEXT: addi a2, a2, -1
-; RV32IA-WMO-NEXT: sll a2, a2, a0
-; RV32IA-WMO-NEXT: not a2, a2
-; RV32IA-WMO-NEXT: amoand.w.aqrl a1, a2, (a1)
-; RV32IA-WMO-NEXT: srl a0, a1, a0
-; RV32IA-WMO-NEXT: ret
+; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_xchg_0_i16_seq_cst:
+; RV32IA-WMO-NOZACAS: # %bb.0:
+; RV32IA-WMO-NOZACAS-NEXT: andi a1, a0, -4
+; RV32IA-WMO-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-NOZACAS-NEXT: lui a2, 16
+; RV32IA-WMO-NOZACAS-NEXT: addi a2, a2, -1
+; RV32IA-WMO-NOZACAS-NEXT: sll a2, a2, a0
+; RV32IA-WMO-NOZACAS-NEXT: not a2, a2
+; RV32IA-WMO-NOZACAS-NEXT: amoand.w.aqrl a1, a2, (a1)
+; RV32IA-WMO-NOZACAS-NEXT: srl a0, a1, a0
+; RV32IA-WMO-NOZACAS-NEXT: ret
;
-; RV32IA-TSO-LABEL: atomicrmw_xchg_0_i16_seq_cst:
-; RV32IA-TSO: # %bb.0:
-; RV32IA-TSO-NEXT: andi a1, a0, -4
-; RV32IA-TSO-NEXT: slli a0, a0, 3
-; RV32IA-TSO-NEXT: lui a2, 16
-; RV32IA-TSO-NEXT: addi a2, a2, -1
-; RV32IA-TSO-NEXT: sll a2, a2, a0
-; RV32IA-TSO-NEXT: not a2, a2
-; RV32IA-TSO-NEXT: amoand.w a1, a2, (a1)
-; RV32IA-TSO-NEXT: srl a0, a1, a0
-; RV32IA-TSO-NEXT: ret
+; RV32IA-TSO-NOZACAS-LABEL: atomicrmw_xchg_0_i16_seq_cst:
+; RV32IA-TSO-NOZACAS: # %bb.0:
+; RV32IA-TSO-NOZACAS-NEXT: andi a1, a0, -4
+; RV32IA-TSO-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-NOZACAS-NEXT: lui a2, 16
+; RV32IA-TSO-NOZACAS-NEXT: addi a2, a2, -1
+; RV32IA-TSO-NOZACAS-NEXT: sll a2, a2, a0
+; RV32IA-TSO-NOZACAS-NEXT: not a2, a2
+; RV32IA-TSO-NOZACAS-NEXT: amoand.w a1, a2, (a1)
+; RV32IA-TSO-NOZACAS-NEXT: srl a0, a1, a0
+; RV32IA-TSO-NOZACAS-NEXT: ret
;
; RV64I-LABEL: atomicrmw_xchg_0_i16_seq_cst:
; RV64I: # %bb.0:
@@ -10952,6 +14276,30 @@ define i16 @atomicrmw_xchg_0_i16_seq_cst(ptr %a) nounwind {
; RV64IA-TSO-NOZACAS-NEXT: srlw a0, a1, a0
; RV64IA-TSO-NOZACAS-NEXT: ret
;
+; RV32IA-WMO-ZACAS-LABEL: atomicrmw_xchg_0_i16_seq_cst:
+; RV32IA-WMO-ZACAS: # %bb.0:
+; RV32IA-WMO-ZACAS-NEXT: andi a1, a0, -4
+; RV32IA-WMO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-ZACAS-NEXT: lui a2, 16
+; RV32IA-WMO-ZACAS-NEXT: addi a2, a2, -1
+; RV32IA-WMO-ZACAS-NEXT: sll a2, a2, a0
+; RV32IA-WMO-ZACAS-NEXT: not a2, a2
+; RV32IA-WMO-ZACAS-NEXT: amoand.w.aqrl a1, a2, (a1)
+; RV32IA-WMO-ZACAS-NEXT: srl a0, a1, a0
+; RV32IA-WMO-ZACAS-NEXT: ret
+;
+; RV32IA-TSO-ZACAS-LABEL: atomicrmw_xchg_0_i16_seq_cst:
+; RV32IA-TSO-ZACAS: # %bb.0:
+; RV32IA-TSO-ZACAS-NEXT: andi a1, a0, -4
+; RV32IA-TSO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-ZACAS-NEXT: lui a2, 16
+; RV32IA-TSO-ZACAS-NEXT: addi a2, a2, -1
+; RV32IA-TSO-ZACAS-NEXT: sll a2, a2, a0
+; RV32IA-TSO-ZACAS-NEXT: not a2, a2
+; RV32IA-TSO-ZACAS-NEXT: amoand.w a1, a2, (a1)
+; RV32IA-TSO-ZACAS-NEXT: srl a0, a1, a0
+; RV32IA-TSO-ZACAS-NEXT: ret
+;
; RV64IA-WMO-ZACAS-LABEL: atomicrmw_xchg_0_i16_seq_cst:
; RV64IA-WMO-ZACAS: # %bb.0:
; RV64IA-WMO-ZACAS-NEXT: andi a1, a0, -4
@@ -10976,6 +14324,16 @@ define i16 @atomicrmw_xchg_0_i16_seq_cst(ptr %a) nounwind {
; RV64IA-TSO-ZACAS-NEXT: srlw a0, a1, a0
; RV64IA-TSO-ZACAS-NEXT: ret
;
+; RV32IA-WMO-ZABHA-LABEL: atomicrmw_xchg_0_i16_seq_cst:
+; RV32IA-WMO-ZABHA: # %bb.0:
+; RV32IA-WMO-ZABHA-NEXT: amoswap.h.aqrl a0, zero, (a0)
+; RV32IA-WMO-ZABHA-NEXT: ret
+;
+; RV32IA-TSO-ZABHA-LABEL: atomicrmw_xchg_0_i16_seq_cst:
+; RV32IA-TSO-ZABHA: # %bb.0:
+; RV32IA-TSO-ZABHA-NEXT: amoswap.h a0, zero, (a0)
+; RV32IA-TSO-ZABHA-NEXT: ret
+;
; RV64IA-WMO-ZABHA-LABEL: atomicrmw_xchg_0_i16_seq_cst:
; RV64IA-WMO-ZABHA: # %bb.0:
; RV64IA-WMO-ZABHA-NEXT: amoswap.h.aqrl a0, zero, (a0)
@@ -11002,16 +14360,16 @@ define i16 @atomicrmw_xchg_minus_1_i16_monotonic(ptr %a) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
-; RV32IA-LABEL: atomicrmw_xchg_minus_1_i16_monotonic:
-; RV32IA: # %bb.0:
-; RV32IA-NEXT: andi a1, a0, -4
-; RV32IA-NEXT: slli a0, a0, 3
-; RV32IA-NEXT: lui a2, 16
-; RV32IA-NEXT: addi a2, a2, -1
-; RV32IA-NEXT: sll a2, a2, a0
-; RV32IA-NEXT: amoor.w a1, a2, (a1)
-; RV32IA-NEXT: srl a0, a1, a0
-; RV32IA-NEXT: ret
+; RV32IA-NOZACAS-LABEL: atomicrmw_xchg_minus_1_i16_monotonic:
+; RV32IA-NOZACAS: # %bb.0:
+; RV32IA-NOZACAS-NEXT: andi a1, a0, -4
+; RV32IA-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-NOZACAS-NEXT: lui a2, 16
+; RV32IA-NOZACAS-NEXT: addi a2, a2, -1
+; RV32IA-NOZACAS-NEXT: sll a2, a2, a0
+; RV32IA-NOZACAS-NEXT: amoor.w a1, a2, (a1)
+; RV32IA-NOZACAS-NEXT: srl a0, a1, a0
+; RV32IA-NOZACAS-NEXT: ret
;
; RV64I-LABEL: atomicrmw_xchg_minus_1_i16_monotonic:
; RV64I: # %bb.0:
@@ -11036,6 +14394,17 @@ define i16 @atomicrmw_xchg_minus_1_i16_monotonic(ptr %a) nounwind {
; RV64IA-NOZACAS-NEXT: srlw a0, a1, a0
; RV64IA-NOZACAS-NEXT: ret
;
+; RV32IA-ZACAS-LABEL: atomicrmw_xchg_minus_1_i16_monotonic:
+; RV32IA-ZACAS: # %bb.0:
+; RV32IA-ZACAS-NEXT: andi a1, a0, -4
+; RV32IA-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-ZACAS-NEXT: lui a2, 16
+; RV32IA-ZACAS-NEXT: addi a2, a2, -1
+; RV32IA-ZACAS-NEXT: sll a2, a2, a0
+; RV32IA-ZACAS-NEXT: amoor.w a1, a2, (a1)
+; RV32IA-ZACAS-NEXT: srl a0, a1, a0
+; RV32IA-ZACAS-NEXT: ret
+;
; RV64IA-ZACAS-LABEL: atomicrmw_xchg_minus_1_i16_monotonic:
; RV64IA-ZACAS: # %bb.0:
; RV64IA-ZACAS-NEXT: andi a1, a0, -4
@@ -11047,6 +14416,18 @@ define i16 @atomicrmw_xchg_minus_1_i16_monotonic(ptr %a) nounwind {
; RV64IA-ZACAS-NEXT: srlw a0, a1, a0
; RV64IA-ZACAS-NEXT: ret
;
+; RV32IA-WMO-ZABHA-LABEL: atomicrmw_xchg_minus_1_i16_monotonic:
+; RV32IA-WMO-ZABHA: # %bb.0:
+; RV32IA-WMO-ZABHA-NEXT: li a1, -1
+; RV32IA-WMO-ZABHA-NEXT: amoswap.h a0, a1, (a0)
+; RV32IA-WMO-ZABHA-NEXT: ret
+;
+; RV32IA-TSO-ZABHA-LABEL: atomicrmw_xchg_minus_1_i16_monotonic:
+; RV32IA-TSO-ZABHA: # %bb.0:
+; RV32IA-TSO-ZABHA-NEXT: li a1, -1
+; RV32IA-TSO-ZABHA-NEXT: amoswap.h a0, a1, (a0)
+; RV32IA-TSO-ZABHA-NEXT: ret
+;
; RV64IA-WMO-ZABHA-LABEL: atomicrmw_xchg_minus_1_i16_monotonic:
; RV64IA-WMO-ZABHA: # %bb.0:
; RV64IA-WMO-ZABHA-NEXT: li a1, -1
@@ -11075,27 +14456,27 @@ define i16 @atomicrmw_xchg_minus_1_i16_acquire(ptr %a) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
-; RV32IA-WMO-LABEL: atomicrmw_xchg_minus_1_i16_acquire:
-; RV32IA-WMO: # %bb.0:
-; RV32IA-WMO-NEXT: andi a1, a0, -4
-; RV32IA-WMO-NEXT: slli a0, a0, 3
-; RV32IA-WMO-NEXT: lui a2, 16
-; RV32IA-WMO-NEXT: addi a2, a2, -1
-; RV32IA-WMO-NEXT: sll a2, a2, a0
-; RV32IA-WMO-NEXT: amoor.w.aq a1, a2, (a1)
-; RV32IA-WMO-NEXT: srl a0, a1, a0
-; RV32IA-WMO-NEXT: ret
+; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_xchg_minus_1_i16_acquire:
+; RV32IA-WMO-NOZACAS: # %bb.0:
+; RV32IA-WMO-NOZACAS-NEXT: andi a1, a0, -4
+; RV32IA-WMO-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-NOZACAS-NEXT: lui a2, 16
+; RV32IA-WMO-NOZACAS-NEXT: addi a2, a2, -1
+; RV32IA-WMO-NOZACAS-NEXT: sll a2, a2, a0
+; RV32IA-WMO-NOZACAS-NEXT: amoor.w.aq a1, a2, (a1)
+; RV32IA-WMO-NOZACAS-NEXT: srl a0, a1, a0
+; RV32IA-WMO-NOZACAS-NEXT: ret
;
-; RV32IA-TSO-LABEL: atomicrmw_xchg_minus_1_i16_acquire:
-; RV32IA-TSO: # %bb.0:
-; RV32IA-TSO-NEXT: andi a1, a0, -4
-; RV32IA-TSO-NEXT: slli a0, a0, 3
-; RV32IA-TSO-NEXT: lui a2, 16
-; RV32IA-TSO-NEXT: addi a2, a2, -1
-; RV32IA-TSO-NEXT: sll a2, a2, a0
-; RV32IA-TSO-NEXT: amoor.w a1, a2, (a1)
-; RV32IA-TSO-NEXT: srl a0, a1, a0
-; RV32IA-TSO-NEXT: ret
+; RV32IA-TSO-NOZACAS-LABEL: atomicrmw_xchg_minus_1_i16_acquire:
+; RV32IA-TSO-NOZACAS: # %bb.0:
+; RV32IA-TSO-NOZACAS-NEXT: andi a1, a0, -4
+; RV32IA-TSO-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-NOZACAS-NEXT: lui a2, 16
+; RV32IA-TSO-NOZACAS-NEXT: addi a2, a2, -1
+; RV32IA-TSO-NOZACAS-NEXT: sll a2, a2, a0
+; RV32IA-TSO-NOZACAS-NEXT: amoor.w a1, a2, (a1)
+; RV32IA-TSO-NOZACAS-NEXT: srl a0, a1, a0
+; RV32IA-TSO-NOZACAS-NEXT: ret
;
; RV64I-LABEL: atomicrmw_xchg_minus_1_i16_acquire:
; RV64I: # %bb.0:
@@ -11131,6 +14512,28 @@ define i16 @atomicrmw_xchg_minus_1_i16_acquire(ptr %a) nounwind {
; RV64IA-TSO-NOZACAS-NEXT: srlw a0, a1, a0
; RV64IA-TSO-NOZACAS-NEXT: ret
;
+; RV32IA-WMO-ZACAS-LABEL: atomicrmw_xchg_minus_1_i16_acquire:
+; RV32IA-WMO-ZACAS: # %bb.0:
+; RV32IA-WMO-ZACAS-NEXT: andi a1, a0, -4
+; RV32IA-WMO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-ZACAS-NEXT: lui a2, 16
+; RV32IA-WMO-ZACAS-NEXT: addi a2, a2, -1
+; RV32IA-WMO-ZACAS-NEXT: sll a2, a2, a0
+; RV32IA-WMO-ZACAS-NEXT: amoor.w.aq a1, a2, (a1)
+; RV32IA-WMO-ZACAS-NEXT: srl a0, a1, a0
+; RV32IA-WMO-ZACAS-NEXT: ret
+;
+; RV32IA-TSO-ZACAS-LABEL: atomicrmw_xchg_minus_1_i16_acquire:
+; RV32IA-TSO-ZACAS: # %bb.0:
+; RV32IA-TSO-ZACAS-NEXT: andi a1, a0, -4
+; RV32IA-TSO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-ZACAS-NEXT: lui a2, 16
+; RV32IA-TSO-ZACAS-NEXT: addi a2, a2, -1
+; RV32IA-TSO-ZACAS-NEXT: sll a2, a2, a0
+; RV32IA-TSO-ZACAS-NEXT: amoor.w a1, a2, (a1)
+; RV32IA-TSO-ZACAS-NEXT: srl a0, a1, a0
+; RV32IA-TSO-ZACAS-NEXT: ret
+;
; RV64IA-WMO-ZACAS-LABEL: atomicrmw_xchg_minus_1_i16_acquire:
; RV64IA-WMO-ZACAS: # %bb.0:
; RV64IA-WMO-ZACAS-NEXT: andi a1, a0, -4
@@ -11153,6 +14556,18 @@ define i16 @atomicrmw_xchg_minus_1_i16_acquire(ptr %a) nounwind {
; RV64IA-TSO-ZACAS-NEXT: srlw a0, a1, a0
; RV64IA-TSO-ZACAS-NEXT: ret
;
+; RV32IA-WMO-ZABHA-LABEL: atomicrmw_xchg_minus_1_i16_acquire:
+; RV32IA-WMO-ZABHA: # %bb.0:
+; RV32IA-WMO-ZABHA-NEXT: li a1, -1
+; RV32IA-WMO-ZABHA-NEXT: amoswap.h.aq a0, a1, (a0)
+; RV32IA-WMO-ZABHA-NEXT: ret
+;
+; RV32IA-TSO-ZABHA-LABEL: atomicrmw_xchg_minus_1_i16_acquire:
+; RV32IA-TSO-ZABHA: # %bb.0:
+; RV32IA-TSO-ZABHA-NEXT: li a1, -1
+; RV32IA-TSO-ZABHA-NEXT: amoswap.h a0, a1, (a0)
+; RV32IA-TSO-ZABHA-NEXT: ret
+;
; RV64IA-WMO-ZABHA-LABEL: atomicrmw_xchg_minus_1_i16_acquire:
; RV64IA-WMO-ZABHA: # %bb.0:
; RV64IA-WMO-ZABHA-NEXT: li a1, -1
@@ -11181,27 +14596,27 @@ define i16 @atomicrmw_xchg_minus_1_i16_release(ptr %a) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
-; RV32IA-WMO-LABEL: atomicrmw_xchg_minus_1_i16_release:
-; RV32IA-WMO: # %bb.0:
-; RV32IA-WMO-NEXT: andi a1, a0, -4
-; RV32IA-WMO-NEXT: slli a0, a0, 3
-; RV32IA-WMO-NEXT: lui a2, 16
-; RV32IA-WMO-NEXT: addi a2, a2, -1
-; RV32IA-WMO-NEXT: sll a2, a2, a0
-; RV32IA-WMO-NEXT: amoor.w.rl a1, a2, (a1)
-; RV32IA-WMO-NEXT: srl a0, a1, a0
-; RV32IA-WMO-NEXT: ret
+; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_xchg_minus_1_i16_release:
+; RV32IA-WMO-NOZACAS: # %bb.0:
+; RV32IA-WMO-NOZACAS-NEXT: andi a1, a0, -4
+; RV32IA-WMO-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-NOZACAS-NEXT: lui a2, 16
+; RV32IA-WMO-NOZACAS-NEXT: addi a2, a2, -1
+; RV32IA-WMO-NOZACAS-NEXT: sll a2, a2, a0
+; RV32IA-WMO-NOZACAS-NEXT: amoor.w.rl a1, a2, (a1)
+; RV32IA-WMO-NOZACAS-NEXT: srl a0, a1, a0
+; RV32IA-WMO-NOZACAS-NEXT: ret
;
-; RV32IA-TSO-LABEL: atomicrmw_xchg_minus_1_i16_release:
-; RV32IA-TSO: # %bb.0:
-; RV32IA-TSO-NEXT: andi a1, a0, -4
-; RV32IA-TSO-NEXT: slli a0, a0, 3
-; RV32IA-TSO-NEXT: lui a2, 16
-; RV32IA-TSO-NEXT: addi a2, a2, -1
-; RV32IA-TSO-NEXT: sll a2, a2, a0
-; RV32IA-TSO-NEXT: amoor.w a1, a2, (a1)
-; RV32IA-TSO-NEXT: srl a0, a1, a0
-; RV32IA-TSO-NEXT: ret
+; RV32IA-TSO-NOZACAS-LABEL: atomicrmw_xchg_minus_1_i16_release:
+; RV32IA-TSO-NOZACAS: # %bb.0:
+; RV32IA-TSO-NOZACAS-NEXT: andi a1, a0, -4
+; RV32IA-TSO-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-NOZACAS-NEXT: lui a2, 16
+; RV32IA-TSO-NOZACAS-NEXT: addi a2, a2, -1
+; RV32IA-TSO-NOZACAS-NEXT: sll a2, a2, a0
+; RV32IA-TSO-NOZACAS-NEXT: amoor.w a1, a2, (a1)
+; RV32IA-TSO-NOZACAS-NEXT: srl a0, a1, a0
+; RV32IA-TSO-NOZACAS-NEXT: ret
;
; RV64I-LABEL: atomicrmw_xchg_minus_1_i16_release:
; RV64I: # %bb.0:
@@ -11237,6 +14652,28 @@ define i16 @atomicrmw_xchg_minus_1_i16_release(ptr %a) nounwind {
; RV64IA-TSO-NOZACAS-NEXT: srlw a0, a1, a0
; RV64IA-TSO-NOZACAS-NEXT: ret
;
+; RV32IA-WMO-ZACAS-LABEL: atomicrmw_xchg_minus_1_i16_release:
+; RV32IA-WMO-ZACAS: # %bb.0:
+; RV32IA-WMO-ZACAS-NEXT: andi a1, a0, -4
+; RV32IA-WMO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-ZACAS-NEXT: lui a2, 16
+; RV32IA-WMO-ZACAS-NEXT: addi a2, a2, -1
+; RV32IA-WMO-ZACAS-NEXT: sll a2, a2, a0
+; RV32IA-WMO-ZACAS-NEXT: amoor.w.rl a1, a2, (a1)
+; RV32IA-WMO-ZACAS-NEXT: srl a0, a1, a0
+; RV32IA-WMO-ZACAS-NEXT: ret
+;
+; RV32IA-TSO-ZACAS-LABEL: atomicrmw_xchg_minus_1_i16_release:
+; RV32IA-TSO-ZACAS: # %bb.0:
+; RV32IA-TSO-ZACAS-NEXT: andi a1, a0, -4
+; RV32IA-TSO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-ZACAS-NEXT: lui a2, 16
+; RV32IA-TSO-ZACAS-NEXT: addi a2, a2, -1
+; RV32IA-TSO-ZACAS-NEXT: sll a2, a2, a0
+; RV32IA-TSO-ZACAS-NEXT: amoor.w a1, a2, (a1)
+; RV32IA-TSO-ZACAS-NEXT: srl a0, a1, a0
+; RV32IA-TSO-ZACAS-NEXT: ret
+;
; RV64IA-WMO-ZACAS-LABEL: atomicrmw_xchg_minus_1_i16_release:
; RV64IA-WMO-ZACAS: # %bb.0:
; RV64IA-WMO-ZACAS-NEXT: andi a1, a0, -4
@@ -11259,6 +14696,18 @@ define i16 @atomicrmw_xchg_minus_1_i16_release(ptr %a) nounwind {
; RV64IA-TSO-ZACAS-NEXT: srlw a0, a1, a0
; RV64IA-TSO-ZACAS-NEXT: ret
;
+; RV32IA-WMO-ZABHA-LABEL: atomicrmw_xchg_minus_1_i16_release:
+; RV32IA-WMO-ZABHA: # %bb.0:
+; RV32IA-WMO-ZABHA-NEXT: li a1, -1
+; RV32IA-WMO-ZABHA-NEXT: amoswap.h.rl a0, a1, (a0)
+; RV32IA-WMO-ZABHA-NEXT: ret
+;
+; RV32IA-TSO-ZABHA-LABEL: atomicrmw_xchg_minus_1_i16_release:
+; RV32IA-TSO-ZABHA: # %bb.0:
+; RV32IA-TSO-ZABHA-NEXT: li a1, -1
+; RV32IA-TSO-ZABHA-NEXT: amoswap.h a0, a1, (a0)
+; RV32IA-TSO-ZABHA-NEXT: ret
+;
; RV64IA-WMO-ZABHA-LABEL: atomicrmw_xchg_minus_1_i16_release:
; RV64IA-WMO-ZABHA: # %bb.0:
; RV64IA-WMO-ZABHA-NEXT: li a1, -1
@@ -11287,27 +14736,27 @@ define i16 @atomicrmw_xchg_minus_1_i16_acq_rel(ptr %a) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
-; RV32IA-WMO-LABEL: atomicrmw_xchg_minus_1_i16_acq_rel:
-; RV32IA-WMO: # %bb.0:
-; RV32IA-WMO-NEXT: andi a1, a0, -4
-; RV32IA-WMO-NEXT: slli a0, a0, 3
-; RV32IA-WMO-NEXT: lui a2, 16
-; RV32IA-WMO-NEXT: addi a2, a2, -1
-; RV32IA-WMO-NEXT: sll a2, a2, a0
-; RV32IA-WMO-NEXT: amoor.w.aqrl a1, a2, (a1)
-; RV32IA-WMO-NEXT: srl a0, a1, a0
-; RV32IA-WMO-NEXT: ret
+; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_xchg_minus_1_i16_acq_rel:
+; RV32IA-WMO-NOZACAS: # %bb.0:
+; RV32IA-WMO-NOZACAS-NEXT: andi a1, a0, -4
+; RV32IA-WMO-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-NOZACAS-NEXT: lui a2, 16
+; RV32IA-WMO-NOZACAS-NEXT: addi a2, a2, -1
+; RV32IA-WMO-NOZACAS-NEXT: sll a2, a2, a0
+; RV32IA-WMO-NOZACAS-NEXT: amoor.w.aqrl a1, a2, (a1)
+; RV32IA-WMO-NOZACAS-NEXT: srl a0, a1, a0
+; RV32IA-WMO-NOZACAS-NEXT: ret
;
-; RV32IA-TSO-LABEL: atomicrmw_xchg_minus_1_i16_acq_rel:
-; RV32IA-TSO: # %bb.0:
-; RV32IA-TSO-NEXT: andi a1, a0, -4
-; RV32IA-TSO-NEXT: slli a0, a0, 3
-; RV32IA-TSO-NEXT: lui a2, 16
-; RV32IA-TSO-NEXT: addi a2, a2, -1
-; RV32IA-TSO-NEXT: sll a2, a2, a0
-; RV32IA-TSO-NEXT: amoor.w a1, a2, (a1)
-; RV32IA-TSO-NEXT: srl a0, a1, a0
-; RV32IA-TSO-NEXT: ret
+; RV32IA-TSO-NOZACAS-LABEL: atomicrmw_xchg_minus_1_i16_acq_rel:
+; RV32IA-TSO-NOZACAS: # %bb.0:
+; RV32IA-TSO-NOZACAS-NEXT: andi a1, a0, -4
+; RV32IA-TSO-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-NOZACAS-NEXT: lui a2, 16
+; RV32IA-TSO-NOZACAS-NEXT: addi a2, a2, -1
+; RV32IA-TSO-NOZACAS-NEXT: sll a2, a2, a0
+; RV32IA-TSO-NOZACAS-NEXT: amoor.w a1, a2, (a1)
+; RV32IA-TSO-NOZACAS-NEXT: srl a0, a1, a0
+; RV32IA-TSO-NOZACAS-NEXT: ret
;
; RV64I-LABEL: atomicrmw_xchg_minus_1_i16_acq_rel:
; RV64I: # %bb.0:
@@ -11343,6 +14792,28 @@ define i16 @atomicrmw_xchg_minus_1_i16_acq_rel(ptr %a) nounwind {
; RV64IA-TSO-NOZACAS-NEXT: srlw a0, a1, a0
; RV64IA-TSO-NOZACAS-NEXT: ret
;
+; RV32IA-WMO-ZACAS-LABEL: atomicrmw_xchg_minus_1_i16_acq_rel:
+; RV32IA-WMO-ZACAS: # %bb.0:
+; RV32IA-WMO-ZACAS-NEXT: andi a1, a0, -4
+; RV32IA-WMO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-ZACAS-NEXT: lui a2, 16
+; RV32IA-WMO-ZACAS-NEXT: addi a2, a2, -1
+; RV32IA-WMO-ZACAS-NEXT: sll a2, a2, a0
+; RV32IA-WMO-ZACAS-NEXT: amoor.w.aqrl a1, a2, (a1)
+; RV32IA-WMO-ZACAS-NEXT: srl a0, a1, a0
+; RV32IA-WMO-ZACAS-NEXT: ret
+;
+; RV32IA-TSO-ZACAS-LABEL: atomicrmw_xchg_minus_1_i16_acq_rel:
+; RV32IA-TSO-ZACAS: # %bb.0:
+; RV32IA-TSO-ZACAS-NEXT: andi a1, a0, -4
+; RV32IA-TSO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-ZACAS-NEXT: lui a2, 16
+; RV32IA-TSO-ZACAS-NEXT: addi a2, a2, -1
+; RV32IA-TSO-ZACAS-NEXT: sll a2, a2, a0
+; RV32IA-TSO-ZACAS-NEXT: amoor.w a1, a2, (a1)
+; RV32IA-TSO-ZACAS-NEXT: srl a0, a1, a0
+; RV32IA-TSO-ZACAS-NEXT: ret
+;
; RV64IA-WMO-ZACAS-LABEL: atomicrmw_xchg_minus_1_i16_acq_rel:
; RV64IA-WMO-ZACAS: # %bb.0:
; RV64IA-WMO-ZACAS-NEXT: andi a1, a0, -4
@@ -11365,6 +14836,18 @@ define i16 @atomicrmw_xchg_minus_1_i16_acq_rel(ptr %a) nounwind {
; RV64IA-TSO-ZACAS-NEXT: srlw a0, a1, a0
; RV64IA-TSO-ZACAS-NEXT: ret
;
+; RV32IA-WMO-ZABHA-LABEL: atomicrmw_xchg_minus_1_i16_acq_rel:
+; RV32IA-WMO-ZABHA: # %bb.0:
+; RV32IA-WMO-ZABHA-NEXT: li a1, -1
+; RV32IA-WMO-ZABHA-NEXT: amoswap.h.aqrl a0, a1, (a0)
+; RV32IA-WMO-ZABHA-NEXT: ret
+;
+; RV32IA-TSO-ZABHA-LABEL: atomicrmw_xchg_minus_1_i16_acq_rel:
+; RV32IA-TSO-ZABHA: # %bb.0:
+; RV32IA-TSO-ZABHA-NEXT: li a1, -1
+; RV32IA-TSO-ZABHA-NEXT: amoswap.h a0, a1, (a0)
+; RV32IA-TSO-ZABHA-NEXT: ret
+;
; RV64IA-WMO-ZABHA-LABEL: atomicrmw_xchg_minus_1_i16_acq_rel:
; RV64IA-WMO-ZABHA: # %bb.0:
; RV64IA-WMO-ZABHA-NEXT: li a1, -1
@@ -11393,27 +14876,27 @@ define i16 @atomicrmw_xchg_minus_1_i16_seq_cst(ptr %a) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
-; RV32IA-WMO-LABEL: atomicrmw_xchg_minus_1_i16_seq_cst:
-; RV32IA-WMO: # %bb.0:
-; RV32IA-WMO-NEXT: andi a1, a0, -4
-; RV32IA-WMO-NEXT: slli a0, a0, 3
-; RV32IA-WMO-NEXT: lui a2, 16
-; RV32IA-WMO-NEXT: addi a2, a2, -1
-; RV32IA-WMO-NEXT: sll a2, a2, a0
-; RV32IA-WMO-NEXT: amoor.w.aqrl a1, a2, (a1)
-; RV32IA-WMO-NEXT: srl a0, a1, a0
-; RV32IA-WMO-NEXT: ret
+; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_xchg_minus_1_i16_seq_cst:
+; RV32IA-WMO-NOZACAS: # %bb.0:
+; RV32IA-WMO-NOZACAS-NEXT: andi a1, a0, -4
+; RV32IA-WMO-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-NOZACAS-NEXT: lui a2, 16
+; RV32IA-WMO-NOZACAS-NEXT: addi a2, a2, -1
+; RV32IA-WMO-NOZACAS-NEXT: sll a2, a2, a0
+; RV32IA-WMO-NOZACAS-NEXT: amoor.w.aqrl a1, a2, (a1)
+; RV32IA-WMO-NOZACAS-NEXT: srl a0, a1, a0
+; RV32IA-WMO-NOZACAS-NEXT: ret
;
-; RV32IA-TSO-LABEL: atomicrmw_xchg_minus_1_i16_seq_cst:
-; RV32IA-TSO: # %bb.0:
-; RV32IA-TSO-NEXT: andi a1, a0, -4
-; RV32IA-TSO-NEXT: slli a0, a0, 3
-; RV32IA-TSO-NEXT: lui a2, 16
-; RV32IA-TSO-NEXT: addi a2, a2, -1
-; RV32IA-TSO-NEXT: sll a2, a2, a0
-; RV32IA-TSO-NEXT: amoor.w a1, a2, (a1)
-; RV32IA-TSO-NEXT: srl a0, a1, a0
-; RV32IA-TSO-NEXT: ret
+; RV32IA-TSO-NOZACAS-LABEL: atomicrmw_xchg_minus_1_i16_seq_cst:
+; RV32IA-TSO-NOZACAS: # %bb.0:
+; RV32IA-TSO-NOZACAS-NEXT: andi a1, a0, -4
+; RV32IA-TSO-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-NOZACAS-NEXT: lui a2, 16
+; RV32IA-TSO-NOZACAS-NEXT: addi a2, a2, -1
+; RV32IA-TSO-NOZACAS-NEXT: sll a2, a2, a0
+; RV32IA-TSO-NOZACAS-NEXT: amoor.w a1, a2, (a1)
+; RV32IA-TSO-NOZACAS-NEXT: srl a0, a1, a0
+; RV32IA-TSO-NOZACAS-NEXT: ret
;
; RV64I-LABEL: atomicrmw_xchg_minus_1_i16_seq_cst:
; RV64I: # %bb.0:
@@ -11449,6 +14932,28 @@ define i16 @atomicrmw_xchg_minus_1_i16_seq_cst(ptr %a) nounwind {
; RV64IA-TSO-NOZACAS-NEXT: srlw a0, a1, a0
; RV64IA-TSO-NOZACAS-NEXT: ret
;
+; RV32IA-WMO-ZACAS-LABEL: atomicrmw_xchg_minus_1_i16_seq_cst:
+; RV32IA-WMO-ZACAS: # %bb.0:
+; RV32IA-WMO-ZACAS-NEXT: andi a1, a0, -4
+; RV32IA-WMO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-ZACAS-NEXT: lui a2, 16
+; RV32IA-WMO-ZACAS-NEXT: addi a2, a2, -1
+; RV32IA-WMO-ZACAS-NEXT: sll a2, a2, a0
+; RV32IA-WMO-ZACAS-NEXT: amoor.w.aqrl a1, a2, (a1)
+; RV32IA-WMO-ZACAS-NEXT: srl a0, a1, a0
+; RV32IA-WMO-ZACAS-NEXT: ret
+;
+; RV32IA-TSO-ZACAS-LABEL: atomicrmw_xchg_minus_1_i16_seq_cst:
+; RV32IA-TSO-ZACAS: # %bb.0:
+; RV32IA-TSO-ZACAS-NEXT: andi a1, a0, -4
+; RV32IA-TSO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-ZACAS-NEXT: lui a2, 16
+; RV32IA-TSO-ZACAS-NEXT: addi a2, a2, -1
+; RV32IA-TSO-ZACAS-NEXT: sll a2, a2, a0
+; RV32IA-TSO-ZACAS-NEXT: amoor.w a1, a2, (a1)
+; RV32IA-TSO-ZACAS-NEXT: srl a0, a1, a0
+; RV32IA-TSO-ZACAS-NEXT: ret
+;
; RV64IA-WMO-ZACAS-LABEL: atomicrmw_xchg_minus_1_i16_seq_cst:
; RV64IA-WMO-ZACAS: # %bb.0:
; RV64IA-WMO-ZACAS-NEXT: andi a1, a0, -4
@@ -11471,6 +14976,18 @@ define i16 @atomicrmw_xchg_minus_1_i16_seq_cst(ptr %a) nounwind {
; RV64IA-TSO-ZACAS-NEXT: srlw a0, a1, a0
; RV64IA-TSO-ZACAS-NEXT: ret
;
+; RV32IA-WMO-ZABHA-LABEL: atomicrmw_xchg_minus_1_i16_seq_cst:
+; RV32IA-WMO-ZABHA: # %bb.0:
+; RV32IA-WMO-ZABHA-NEXT: li a1, -1
+; RV32IA-WMO-ZABHA-NEXT: amoswap.h.aqrl a0, a1, (a0)
+; RV32IA-WMO-ZABHA-NEXT: ret
+;
+; RV32IA-TSO-ZABHA-LABEL: atomicrmw_xchg_minus_1_i16_seq_cst:
+; RV32IA-TSO-ZABHA: # %bb.0:
+; RV32IA-TSO-ZABHA-NEXT: li a1, -1
+; RV32IA-TSO-ZABHA-NEXT: amoswap.h a0, a1, (a0)
+; RV32IA-TSO-ZABHA-NEXT: ret
+;
; RV64IA-WMO-ZABHA-LABEL: atomicrmw_xchg_minus_1_i16_seq_cst:
; RV64IA-WMO-ZABHA: # %bb.0:
; RV64IA-WMO-ZABHA-NEXT: li a1, -1
@@ -11497,26 +15014,26 @@ define i16 @atomicrmw_add_i16_monotonic(ptr %a, i16 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
-; RV32IA-LABEL: atomicrmw_add_i16_monotonic:
-; RV32IA: # %bb.0:
-; RV32IA-NEXT: andi a2, a0, -4
-; RV32IA-NEXT: slli a0, a0, 3
-; RV32IA-NEXT: lui a3, 16
-; RV32IA-NEXT: addi a3, a3, -1
-; RV32IA-NEXT: sll a4, a3, a0
-; RV32IA-NEXT: and a1, a1, a3
-; RV32IA-NEXT: sll a1, a1, a0
-; RV32IA-NEXT: .LBB80_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-NEXT: lr.w a3, (a2)
-; RV32IA-NEXT: add a5, a3, a1
-; RV32IA-NEXT: xor a5, a3, a5
-; RV32IA-NEXT: and a5, a5, a4
-; RV32IA-NEXT: xor a5, a3, a5
-; RV32IA-NEXT: sc.w a5, a5, (a2)
-; RV32IA-NEXT: bnez a5, .LBB80_1
-; RV32IA-NEXT: # %bb.2:
-; RV32IA-NEXT: srl a0, a3, a0
-; RV32IA-NEXT: ret
+; RV32IA-NOZACAS-LABEL: atomicrmw_add_i16_monotonic:
+; RV32IA-NOZACAS: # %bb.0:
+; RV32IA-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-NOZACAS-NEXT: lui a3, 16
+; RV32IA-NOZACAS-NEXT: addi a3, a3, -1
+; RV32IA-NOZACAS-NEXT: sll a4, a3, a0
+; RV32IA-NOZACAS-NEXT: and a1, a1, a3
+; RV32IA-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-NOZACAS-NEXT: .LBB80_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-NOZACAS-NEXT: lr.w a3, (a2)
+; RV32IA-NOZACAS-NEXT: add a5, a3, a1
+; RV32IA-NOZACAS-NEXT: xor a5, a3, a5
+; RV32IA-NOZACAS-NEXT: and a5, a5, a4
+; RV32IA-NOZACAS-NEXT: xor a5, a3, a5
+; RV32IA-NOZACAS-NEXT: sc.w a5, a5, (a2)
+; RV32IA-NOZACAS-NEXT: bnez a5, .LBB80_1
+; RV32IA-NOZACAS-NEXT: # %bb.2:
+; RV32IA-NOZACAS-NEXT: srl a0, a3, a0
+; RV32IA-NOZACAS-NEXT: ret
;
; RV64I-LABEL: atomicrmw_add_i16_monotonic:
; RV64I: # %bb.0:
@@ -11549,6 +15066,27 @@ define i16 @atomicrmw_add_i16_monotonic(ptr %a, i16 %b) nounwind {
; RV64IA-NOZACAS-NEXT: srlw a0, a3, a0
; RV64IA-NOZACAS-NEXT: ret
;
+; RV32IA-ZACAS-LABEL: atomicrmw_add_i16_monotonic:
+; RV32IA-ZACAS: # %bb.0:
+; RV32IA-ZACAS-NEXT: andi a2, a0, -4
+; RV32IA-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-ZACAS-NEXT: lui a3, 16
+; RV32IA-ZACAS-NEXT: addi a3, a3, -1
+; RV32IA-ZACAS-NEXT: sll a4, a3, a0
+; RV32IA-ZACAS-NEXT: and a1, a1, a3
+; RV32IA-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-ZACAS-NEXT: .LBB80_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-ZACAS-NEXT: lr.w a3, (a2)
+; RV32IA-ZACAS-NEXT: add a5, a3, a1
+; RV32IA-ZACAS-NEXT: xor a5, a3, a5
+; RV32IA-ZACAS-NEXT: and a5, a5, a4
+; RV32IA-ZACAS-NEXT: xor a5, a3, a5
+; RV32IA-ZACAS-NEXT: sc.w a5, a5, (a2)
+; RV32IA-ZACAS-NEXT: bnez a5, .LBB80_1
+; RV32IA-ZACAS-NEXT: # %bb.2:
+; RV32IA-ZACAS-NEXT: srl a0, a3, a0
+; RV32IA-ZACAS-NEXT: ret
+;
; RV64IA-ZACAS-LABEL: atomicrmw_add_i16_monotonic:
; RV64IA-ZACAS: # %bb.0:
; RV64IA-ZACAS-NEXT: andi a2, a0, -4
@@ -11570,6 +15108,16 @@ define i16 @atomicrmw_add_i16_monotonic(ptr %a, i16 %b) nounwind {
; RV64IA-ZACAS-NEXT: srlw a0, a3, a0
; RV64IA-ZACAS-NEXT: ret
;
+; RV32IA-WMO-ZABHA-LABEL: atomicrmw_add_i16_monotonic:
+; RV32IA-WMO-ZABHA: # %bb.0:
+; RV32IA-WMO-ZABHA-NEXT: amoadd.h a0, a1, (a0)
+; RV32IA-WMO-ZABHA-NEXT: ret
+;
+; RV32IA-TSO-ZABHA-LABEL: atomicrmw_add_i16_monotonic:
+; RV32IA-TSO-ZABHA: # %bb.0:
+; RV32IA-TSO-ZABHA-NEXT: amoadd.h a0, a1, (a0)
+; RV32IA-TSO-ZABHA-NEXT: ret
+;
; RV64IA-WMO-ZABHA-LABEL: atomicrmw_add_i16_monotonic:
; RV64IA-WMO-ZABHA: # %bb.0:
; RV64IA-WMO-ZABHA-NEXT: amoadd.h a0, a1, (a0)
@@ -11594,47 +15142,47 @@ define i16 @atomicrmw_add_i16_acquire(ptr %a, i16 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
-; RV32IA-WMO-LABEL: atomicrmw_add_i16_acquire:
-; RV32IA-WMO: # %bb.0:
-; RV32IA-WMO-NEXT: andi a2, a0, -4
-; RV32IA-WMO-NEXT: slli a0, a0, 3
-; RV32IA-WMO-NEXT: lui a3, 16
-; RV32IA-WMO-NEXT: addi a3, a3, -1
-; RV32IA-WMO-NEXT: sll a4, a3, a0
-; RV32IA-WMO-NEXT: and a1, a1, a3
-; RV32IA-WMO-NEXT: sll a1, a1, a0
-; RV32IA-WMO-NEXT: .LBB81_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-WMO-NEXT: lr.w.aq a3, (a2)
-; RV32IA-WMO-NEXT: add a5, a3, a1
-; RV32IA-WMO-NEXT: xor a5, a3, a5
-; RV32IA-WMO-NEXT: and a5, a5, a4
-; RV32IA-WMO-NEXT: xor a5, a3, a5
-; RV32IA-WMO-NEXT: sc.w a5, a5, (a2)
-; RV32IA-WMO-NEXT: bnez a5, .LBB81_1
-; RV32IA-WMO-NEXT: # %bb.2:
-; RV32IA-WMO-NEXT: srl a0, a3, a0
-; RV32IA-WMO-NEXT: ret
+; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_add_i16_acquire:
+; RV32IA-WMO-NOZACAS: # %bb.0:
+; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-WMO-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-NOZACAS-NEXT: lui a3, 16
+; RV32IA-WMO-NOZACAS-NEXT: addi a3, a3, -1
+; RV32IA-WMO-NOZACAS-NEXT: sll a4, a3, a0
+; RV32IA-WMO-NOZACAS-NEXT: and a1, a1, a3
+; RV32IA-WMO-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-WMO-NOZACAS-NEXT: .LBB81_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-NOZACAS-NEXT: lr.w.aq a3, (a2)
+; RV32IA-WMO-NOZACAS-NEXT: add a5, a3, a1
+; RV32IA-WMO-NOZACAS-NEXT: xor a5, a3, a5
+; RV32IA-WMO-NOZACAS-NEXT: and a5, a5, a4
+; RV32IA-WMO-NOZACAS-NEXT: xor a5, a3, a5
+; RV32IA-WMO-NOZACAS-NEXT: sc.w a5, a5, (a2)
+; RV32IA-WMO-NOZACAS-NEXT: bnez a5, .LBB81_1
+; RV32IA-WMO-NOZACAS-NEXT: # %bb.2:
+; RV32IA-WMO-NOZACAS-NEXT: srl a0, a3, a0
+; RV32IA-WMO-NOZACAS-NEXT: ret
;
-; RV32IA-TSO-LABEL: atomicrmw_add_i16_acquire:
-; RV32IA-TSO: # %bb.0:
-; RV32IA-TSO-NEXT: andi a2, a0, -4
-; RV32IA-TSO-NEXT: slli a0, a0, 3
-; RV32IA-TSO-NEXT: lui a3, 16
-; RV32IA-TSO-NEXT: addi a3, a3, -1
-; RV32IA-TSO-NEXT: sll a4, a3, a0
-; RV32IA-TSO-NEXT: and a1, a1, a3
-; RV32IA-TSO-NEXT: sll a1, a1, a0
-; RV32IA-TSO-NEXT: .LBB81_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-TSO-NEXT: lr.w a3, (a2)
-; RV32IA-TSO-NEXT: add a5, a3, a1
-; RV32IA-TSO-NEXT: xor a5, a3, a5
-; RV32IA-TSO-NEXT: and a5, a5, a4
-; RV32IA-TSO-NEXT: xor a5, a3, a5
-; RV32IA-TSO-NEXT: sc.w a5, a5, (a2)
-; RV32IA-TSO-NEXT: bnez a5, .LBB81_1
-; RV32IA-TSO-NEXT: # %bb.2:
-; RV32IA-TSO-NEXT: srl a0, a3, a0
-; RV32IA-TSO-NEXT: ret
+; RV32IA-TSO-NOZACAS-LABEL: atomicrmw_add_i16_acquire:
+; RV32IA-TSO-NOZACAS: # %bb.0:
+; RV32IA-TSO-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-TSO-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-NOZACAS-NEXT: lui a3, 16
+; RV32IA-TSO-NOZACAS-NEXT: addi a3, a3, -1
+; RV32IA-TSO-NOZACAS-NEXT: sll a4, a3, a0
+; RV32IA-TSO-NOZACAS-NEXT: and a1, a1, a3
+; RV32IA-TSO-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-TSO-NOZACAS-NEXT: .LBB81_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-NOZACAS-NEXT: lr.w a3, (a2)
+; RV32IA-TSO-NOZACAS-NEXT: add a5, a3, a1
+; RV32IA-TSO-NOZACAS-NEXT: xor a5, a3, a5
+; RV32IA-TSO-NOZACAS-NEXT: and a5, a5, a4
+; RV32IA-TSO-NOZACAS-NEXT: xor a5, a3, a5
+; RV32IA-TSO-NOZACAS-NEXT: sc.w a5, a5, (a2)
+; RV32IA-TSO-NOZACAS-NEXT: bnez a5, .LBB81_1
+; RV32IA-TSO-NOZACAS-NEXT: # %bb.2:
+; RV32IA-TSO-NOZACAS-NEXT: srl a0, a3, a0
+; RV32IA-TSO-NOZACAS-NEXT: ret
;
; RV64I-LABEL: atomicrmw_add_i16_acquire:
; RV64I: # %bb.0:
@@ -11688,6 +15236,48 @@ define i16 @atomicrmw_add_i16_acquire(ptr %a, i16 %b) nounwind {
; RV64IA-TSO-NOZACAS-NEXT: srlw a0, a3, a0
; RV64IA-TSO-NOZACAS-NEXT: ret
;
+; RV32IA-WMO-ZACAS-LABEL: atomicrmw_add_i16_acquire:
+; RV32IA-WMO-ZACAS: # %bb.0:
+; RV32IA-WMO-ZACAS-NEXT: andi a2, a0, -4
+; RV32IA-WMO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-ZACAS-NEXT: lui a3, 16
+; RV32IA-WMO-ZACAS-NEXT: addi a3, a3, -1
+; RV32IA-WMO-ZACAS-NEXT: sll a4, a3, a0
+; RV32IA-WMO-ZACAS-NEXT: and a1, a1, a3
+; RV32IA-WMO-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-WMO-ZACAS-NEXT: .LBB81_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-ZACAS-NEXT: lr.w.aq a3, (a2)
+; RV32IA-WMO-ZACAS-NEXT: add a5, a3, a1
+; RV32IA-WMO-ZACAS-NEXT: xor a5, a3, a5
+; RV32IA-WMO-ZACAS-NEXT: and a5, a5, a4
+; RV32IA-WMO-ZACAS-NEXT: xor a5, a3, a5
+; RV32IA-WMO-ZACAS-NEXT: sc.w a5, a5, (a2)
+; RV32IA-WMO-ZACAS-NEXT: bnez a5, .LBB81_1
+; RV32IA-WMO-ZACAS-NEXT: # %bb.2:
+; RV32IA-WMO-ZACAS-NEXT: srl a0, a3, a0
+; RV32IA-WMO-ZACAS-NEXT: ret
+;
+; RV32IA-TSO-ZACAS-LABEL: atomicrmw_add_i16_acquire:
+; RV32IA-TSO-ZACAS: # %bb.0:
+; RV32IA-TSO-ZACAS-NEXT: andi a2, a0, -4
+; RV32IA-TSO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-ZACAS-NEXT: lui a3, 16
+; RV32IA-TSO-ZACAS-NEXT: addi a3, a3, -1
+; RV32IA-TSO-ZACAS-NEXT: sll a4, a3, a0
+; RV32IA-TSO-ZACAS-NEXT: and a1, a1, a3
+; RV32IA-TSO-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-TSO-ZACAS-NEXT: .LBB81_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-ZACAS-NEXT: lr.w a3, (a2)
+; RV32IA-TSO-ZACAS-NEXT: add a5, a3, a1
+; RV32IA-TSO-ZACAS-NEXT: xor a5, a3, a5
+; RV32IA-TSO-ZACAS-NEXT: and a5, a5, a4
+; RV32IA-TSO-ZACAS-NEXT: xor a5, a3, a5
+; RV32IA-TSO-ZACAS-NEXT: sc.w a5, a5, (a2)
+; RV32IA-TSO-ZACAS-NEXT: bnez a5, .LBB81_1
+; RV32IA-TSO-ZACAS-NEXT: # %bb.2:
+; RV32IA-TSO-ZACAS-NEXT: srl a0, a3, a0
+; RV32IA-TSO-ZACAS-NEXT: ret
+;
; RV64IA-WMO-ZACAS-LABEL: atomicrmw_add_i16_acquire:
; RV64IA-WMO-ZACAS: # %bb.0:
; RV64IA-WMO-ZACAS-NEXT: andi a2, a0, -4
@@ -11730,6 +15320,16 @@ define i16 @atomicrmw_add_i16_acquire(ptr %a, i16 %b) nounwind {
; RV64IA-TSO-ZACAS-NEXT: srlw a0, a3, a0
; RV64IA-TSO-ZACAS-NEXT: ret
;
+; RV32IA-WMO-ZABHA-LABEL: atomicrmw_add_i16_acquire:
+; RV32IA-WMO-ZABHA: # %bb.0:
+; RV32IA-WMO-ZABHA-NEXT: amoadd.h.aq a0, a1, (a0)
+; RV32IA-WMO-ZABHA-NEXT: ret
+;
+; RV32IA-TSO-ZABHA-LABEL: atomicrmw_add_i16_acquire:
+; RV32IA-TSO-ZABHA: # %bb.0:
+; RV32IA-TSO-ZABHA-NEXT: amoadd.h a0, a1, (a0)
+; RV32IA-TSO-ZABHA-NEXT: ret
+;
; RV64IA-WMO-ZABHA-LABEL: atomicrmw_add_i16_acquire:
; RV64IA-WMO-ZABHA: # %bb.0:
; RV64IA-WMO-ZABHA-NEXT: amoadd.h.aq a0, a1, (a0)
@@ -11754,47 +15354,47 @@ define i16 @atomicrmw_add_i16_release(ptr %a, i16 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
-; RV32IA-WMO-LABEL: atomicrmw_add_i16_release:
-; RV32IA-WMO: # %bb.0:
-; RV32IA-WMO-NEXT: andi a2, a0, -4
-; RV32IA-WMO-NEXT: slli a0, a0, 3
-; RV32IA-WMO-NEXT: lui a3, 16
-; RV32IA-WMO-NEXT: addi a3, a3, -1
-; RV32IA-WMO-NEXT: sll a4, a3, a0
-; RV32IA-WMO-NEXT: and a1, a1, a3
-; RV32IA-WMO-NEXT: sll a1, a1, a0
-; RV32IA-WMO-NEXT: .LBB82_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-WMO-NEXT: lr.w a3, (a2)
-; RV32IA-WMO-NEXT: add a5, a3, a1
-; RV32IA-WMO-NEXT: xor a5, a3, a5
-; RV32IA-WMO-NEXT: and a5, a5, a4
-; RV32IA-WMO-NEXT: xor a5, a3, a5
-; RV32IA-WMO-NEXT: sc.w.rl a5, a5, (a2)
-; RV32IA-WMO-NEXT: bnez a5, .LBB82_1
-; RV32IA-WMO-NEXT: # %bb.2:
-; RV32IA-WMO-NEXT: srl a0, a3, a0
-; RV32IA-WMO-NEXT: ret
+; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_add_i16_release:
+; RV32IA-WMO-NOZACAS: # %bb.0:
+; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-WMO-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-NOZACAS-NEXT: lui a3, 16
+; RV32IA-WMO-NOZACAS-NEXT: addi a3, a3, -1
+; RV32IA-WMO-NOZACAS-NEXT: sll a4, a3, a0
+; RV32IA-WMO-NOZACAS-NEXT: and a1, a1, a3
+; RV32IA-WMO-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-WMO-NOZACAS-NEXT: .LBB82_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-NOZACAS-NEXT: lr.w a3, (a2)
+; RV32IA-WMO-NOZACAS-NEXT: add a5, a3, a1
+; RV32IA-WMO-NOZACAS-NEXT: xor a5, a3, a5
+; RV32IA-WMO-NOZACAS-NEXT: and a5, a5, a4
+; RV32IA-WMO-NOZACAS-NEXT: xor a5, a3, a5
+; RV32IA-WMO-NOZACAS-NEXT: sc.w.rl a5, a5, (a2)
+; RV32IA-WMO-NOZACAS-NEXT: bnez a5, .LBB82_1
+; RV32IA-WMO-NOZACAS-NEXT: # %bb.2:
+; RV32IA-WMO-NOZACAS-NEXT: srl a0, a3, a0
+; RV32IA-WMO-NOZACAS-NEXT: ret
;
-; RV32IA-TSO-LABEL: atomicrmw_add_i16_release:
-; RV32IA-TSO: # %bb.0:
-; RV32IA-TSO-NEXT: andi a2, a0, -4
-; RV32IA-TSO-NEXT: slli a0, a0, 3
-; RV32IA-TSO-NEXT: lui a3, 16
-; RV32IA-TSO-NEXT: addi a3, a3, -1
-; RV32IA-TSO-NEXT: sll a4, a3, a0
-; RV32IA-TSO-NEXT: and a1, a1, a3
-; RV32IA-TSO-NEXT: sll a1, a1, a0
-; RV32IA-TSO-NEXT: .LBB82_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-TSO-NEXT: lr.w a3, (a2)
-; RV32IA-TSO-NEXT: add a5, a3, a1
-; RV32IA-TSO-NEXT: xor a5, a3, a5
-; RV32IA-TSO-NEXT: and a5, a5, a4
-; RV32IA-TSO-NEXT: xor a5, a3, a5
-; RV32IA-TSO-NEXT: sc.w a5, a5, (a2)
-; RV32IA-TSO-NEXT: bnez a5, .LBB82_1
-; RV32IA-TSO-NEXT: # %bb.2:
-; RV32IA-TSO-NEXT: srl a0, a3, a0
-; RV32IA-TSO-NEXT: ret
+; RV32IA-TSO-NOZACAS-LABEL: atomicrmw_add_i16_release:
+; RV32IA-TSO-NOZACAS: # %bb.0:
+; RV32IA-TSO-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-TSO-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-NOZACAS-NEXT: lui a3, 16
+; RV32IA-TSO-NOZACAS-NEXT: addi a3, a3, -1
+; RV32IA-TSO-NOZACAS-NEXT: sll a4, a3, a0
+; RV32IA-TSO-NOZACAS-NEXT: and a1, a1, a3
+; RV32IA-TSO-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-TSO-NOZACAS-NEXT: .LBB82_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-NOZACAS-NEXT: lr.w a3, (a2)
+; RV32IA-TSO-NOZACAS-NEXT: add a5, a3, a1
+; RV32IA-TSO-NOZACAS-NEXT: xor a5, a3, a5
+; RV32IA-TSO-NOZACAS-NEXT: and a5, a5, a4
+; RV32IA-TSO-NOZACAS-NEXT: xor a5, a3, a5
+; RV32IA-TSO-NOZACAS-NEXT: sc.w a5, a5, (a2)
+; RV32IA-TSO-NOZACAS-NEXT: bnez a5, .LBB82_1
+; RV32IA-TSO-NOZACAS-NEXT: # %bb.2:
+; RV32IA-TSO-NOZACAS-NEXT: srl a0, a3, a0
+; RV32IA-TSO-NOZACAS-NEXT: ret
;
; RV64I-LABEL: atomicrmw_add_i16_release:
; RV64I: # %bb.0:
@@ -11848,6 +15448,48 @@ define i16 @atomicrmw_add_i16_release(ptr %a, i16 %b) nounwind {
; RV64IA-TSO-NOZACAS-NEXT: srlw a0, a3, a0
; RV64IA-TSO-NOZACAS-NEXT: ret
;
+; RV32IA-WMO-ZACAS-LABEL: atomicrmw_add_i16_release:
+; RV32IA-WMO-ZACAS: # %bb.0:
+; RV32IA-WMO-ZACAS-NEXT: andi a2, a0, -4
+; RV32IA-WMO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-ZACAS-NEXT: lui a3, 16
+; RV32IA-WMO-ZACAS-NEXT: addi a3, a3, -1
+; RV32IA-WMO-ZACAS-NEXT: sll a4, a3, a0
+; RV32IA-WMO-ZACAS-NEXT: and a1, a1, a3
+; RV32IA-WMO-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-WMO-ZACAS-NEXT: .LBB82_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-ZACAS-NEXT: lr.w a3, (a2)
+; RV32IA-WMO-ZACAS-NEXT: add a5, a3, a1
+; RV32IA-WMO-ZACAS-NEXT: xor a5, a3, a5
+; RV32IA-WMO-ZACAS-NEXT: and a5, a5, a4
+; RV32IA-WMO-ZACAS-NEXT: xor a5, a3, a5
+; RV32IA-WMO-ZACAS-NEXT: sc.w.rl a5, a5, (a2)
+; RV32IA-WMO-ZACAS-NEXT: bnez a5, .LBB82_1
+; RV32IA-WMO-ZACAS-NEXT: # %bb.2:
+; RV32IA-WMO-ZACAS-NEXT: srl a0, a3, a0
+; RV32IA-WMO-ZACAS-NEXT: ret
+;
+; RV32IA-TSO-ZACAS-LABEL: atomicrmw_add_i16_release:
+; RV32IA-TSO-ZACAS: # %bb.0:
+; RV32IA-TSO-ZACAS-NEXT: andi a2, a0, -4
+; RV32IA-TSO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-ZACAS-NEXT: lui a3, 16
+; RV32IA-TSO-ZACAS-NEXT: addi a3, a3, -1
+; RV32IA-TSO-ZACAS-NEXT: sll a4, a3, a0
+; RV32IA-TSO-ZACAS-NEXT: and a1, a1, a3
+; RV32IA-TSO-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-TSO-ZACAS-NEXT: .LBB82_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-ZACAS-NEXT: lr.w a3, (a2)
+; RV32IA-TSO-ZACAS-NEXT: add a5, a3, a1
+; RV32IA-TSO-ZACAS-NEXT: xor a5, a3, a5
+; RV32IA-TSO-ZACAS-NEXT: and a5, a5, a4
+; RV32IA-TSO-ZACAS-NEXT: xor a5, a3, a5
+; RV32IA-TSO-ZACAS-NEXT: sc.w a5, a5, (a2)
+; RV32IA-TSO-ZACAS-NEXT: bnez a5, .LBB82_1
+; RV32IA-TSO-ZACAS-NEXT: # %bb.2:
+; RV32IA-TSO-ZACAS-NEXT: srl a0, a3, a0
+; RV32IA-TSO-ZACAS-NEXT: ret
+;
; RV64IA-WMO-ZACAS-LABEL: atomicrmw_add_i16_release:
; RV64IA-WMO-ZACAS: # %bb.0:
; RV64IA-WMO-ZACAS-NEXT: andi a2, a0, -4
@@ -11890,6 +15532,16 @@ define i16 @atomicrmw_add_i16_release(ptr %a, i16 %b) nounwind {
; RV64IA-TSO-ZACAS-NEXT: srlw a0, a3, a0
; RV64IA-TSO-ZACAS-NEXT: ret
;
+; RV32IA-WMO-ZABHA-LABEL: atomicrmw_add_i16_release:
+; RV32IA-WMO-ZABHA: # %bb.0:
+; RV32IA-WMO-ZABHA-NEXT: amoadd.h.rl a0, a1, (a0)
+; RV32IA-WMO-ZABHA-NEXT: ret
+;
+; RV32IA-TSO-ZABHA-LABEL: atomicrmw_add_i16_release:
+; RV32IA-TSO-ZABHA: # %bb.0:
+; RV32IA-TSO-ZABHA-NEXT: amoadd.h a0, a1, (a0)
+; RV32IA-TSO-ZABHA-NEXT: ret
+;
; RV64IA-WMO-ZABHA-LABEL: atomicrmw_add_i16_release:
; RV64IA-WMO-ZABHA: # %bb.0:
; RV64IA-WMO-ZABHA-NEXT: amoadd.h.rl a0, a1, (a0)
@@ -11914,47 +15566,47 @@ define i16 @atomicrmw_add_i16_acq_rel(ptr %a, i16 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
-; RV32IA-WMO-LABEL: atomicrmw_add_i16_acq_rel:
-; RV32IA-WMO: # %bb.0:
-; RV32IA-WMO-NEXT: andi a2, a0, -4
-; RV32IA-WMO-NEXT: slli a0, a0, 3
-; RV32IA-WMO-NEXT: lui a3, 16
-; RV32IA-WMO-NEXT: addi a3, a3, -1
-; RV32IA-WMO-NEXT: sll a4, a3, a0
-; RV32IA-WMO-NEXT: and a1, a1, a3
-; RV32IA-WMO-NEXT: sll a1, a1, a0
-; RV32IA-WMO-NEXT: .LBB83_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-WMO-NEXT: lr.w.aq a3, (a2)
-; RV32IA-WMO-NEXT: add a5, a3, a1
-; RV32IA-WMO-NEXT: xor a5, a3, a5
-; RV32IA-WMO-NEXT: and a5, a5, a4
-; RV32IA-WMO-NEXT: xor a5, a3, a5
-; RV32IA-WMO-NEXT: sc.w.rl a5, a5, (a2)
-; RV32IA-WMO-NEXT: bnez a5, .LBB83_1
-; RV32IA-WMO-NEXT: # %bb.2:
-; RV32IA-WMO-NEXT: srl a0, a3, a0
-; RV32IA-WMO-NEXT: ret
+; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_add_i16_acq_rel:
+; RV32IA-WMO-NOZACAS: # %bb.0:
+; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-WMO-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-NOZACAS-NEXT: lui a3, 16
+; RV32IA-WMO-NOZACAS-NEXT: addi a3, a3, -1
+; RV32IA-WMO-NOZACAS-NEXT: sll a4, a3, a0
+; RV32IA-WMO-NOZACAS-NEXT: and a1, a1, a3
+; RV32IA-WMO-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-WMO-NOZACAS-NEXT: .LBB83_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-NOZACAS-NEXT: lr.w.aq a3, (a2)
+; RV32IA-WMO-NOZACAS-NEXT: add a5, a3, a1
+; RV32IA-WMO-NOZACAS-NEXT: xor a5, a3, a5
+; RV32IA-WMO-NOZACAS-NEXT: and a5, a5, a4
+; RV32IA-WMO-NOZACAS-NEXT: xor a5, a3, a5
+; RV32IA-WMO-NOZACAS-NEXT: sc.w.rl a5, a5, (a2)
+; RV32IA-WMO-NOZACAS-NEXT: bnez a5, .LBB83_1
+; RV32IA-WMO-NOZACAS-NEXT: # %bb.2:
+; RV32IA-WMO-NOZACAS-NEXT: srl a0, a3, a0
+; RV32IA-WMO-NOZACAS-NEXT: ret
;
-; RV32IA-TSO-LABEL: atomicrmw_add_i16_acq_rel:
-; RV32IA-TSO: # %bb.0:
-; RV32IA-TSO-NEXT: andi a2, a0, -4
-; RV32IA-TSO-NEXT: slli a0, a0, 3
-; RV32IA-TSO-NEXT: lui a3, 16
-; RV32IA-TSO-NEXT: addi a3, a3, -1
-; RV32IA-TSO-NEXT: sll a4, a3, a0
-; RV32IA-TSO-NEXT: and a1, a1, a3
-; RV32IA-TSO-NEXT: sll a1, a1, a0
-; RV32IA-TSO-NEXT: .LBB83_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-TSO-NEXT: lr.w a3, (a2)
-; RV32IA-TSO-NEXT: add a5, a3, a1
-; RV32IA-TSO-NEXT: xor a5, a3, a5
-; RV32IA-TSO-NEXT: and a5, a5, a4
-; RV32IA-TSO-NEXT: xor a5, a3, a5
-; RV32IA-TSO-NEXT: sc.w a5, a5, (a2)
-; RV32IA-TSO-NEXT: bnez a5, .LBB83_1
-; RV32IA-TSO-NEXT: # %bb.2:
-; RV32IA-TSO-NEXT: srl a0, a3, a0
-; RV32IA-TSO-NEXT: ret
+; RV32IA-TSO-NOZACAS-LABEL: atomicrmw_add_i16_acq_rel:
+; RV32IA-TSO-NOZACAS: # %bb.0:
+; RV32IA-TSO-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-TSO-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-NOZACAS-NEXT: lui a3, 16
+; RV32IA-TSO-NOZACAS-NEXT: addi a3, a3, -1
+; RV32IA-TSO-NOZACAS-NEXT: sll a4, a3, a0
+; RV32IA-TSO-NOZACAS-NEXT: and a1, a1, a3
+; RV32IA-TSO-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-TSO-NOZACAS-NEXT: .LBB83_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-NOZACAS-NEXT: lr.w a3, (a2)
+; RV32IA-TSO-NOZACAS-NEXT: add a5, a3, a1
+; RV32IA-TSO-NOZACAS-NEXT: xor a5, a3, a5
+; RV32IA-TSO-NOZACAS-NEXT: and a5, a5, a4
+; RV32IA-TSO-NOZACAS-NEXT: xor a5, a3, a5
+; RV32IA-TSO-NOZACAS-NEXT: sc.w a5, a5, (a2)
+; RV32IA-TSO-NOZACAS-NEXT: bnez a5, .LBB83_1
+; RV32IA-TSO-NOZACAS-NEXT: # %bb.2:
+; RV32IA-TSO-NOZACAS-NEXT: srl a0, a3, a0
+; RV32IA-TSO-NOZACAS-NEXT: ret
;
; RV64I-LABEL: atomicrmw_add_i16_acq_rel:
; RV64I: # %bb.0:
@@ -12008,6 +15660,48 @@ define i16 @atomicrmw_add_i16_acq_rel(ptr %a, i16 %b) nounwind {
; RV64IA-TSO-NOZACAS-NEXT: srlw a0, a3, a0
; RV64IA-TSO-NOZACAS-NEXT: ret
;
+; RV32IA-WMO-ZACAS-LABEL: atomicrmw_add_i16_acq_rel:
+; RV32IA-WMO-ZACAS: # %bb.0:
+; RV32IA-WMO-ZACAS-NEXT: andi a2, a0, -4
+; RV32IA-WMO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-ZACAS-NEXT: lui a3, 16
+; RV32IA-WMO-ZACAS-NEXT: addi a3, a3, -1
+; RV32IA-WMO-ZACAS-NEXT: sll a4, a3, a0
+; RV32IA-WMO-ZACAS-NEXT: and a1, a1, a3
+; RV32IA-WMO-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-WMO-ZACAS-NEXT: .LBB83_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-ZACAS-NEXT: lr.w.aq a3, (a2)
+; RV32IA-WMO-ZACAS-NEXT: add a5, a3, a1
+; RV32IA-WMO-ZACAS-NEXT: xor a5, a3, a5
+; RV32IA-WMO-ZACAS-NEXT: and a5, a5, a4
+; RV32IA-WMO-ZACAS-NEXT: xor a5, a3, a5
+; RV32IA-WMO-ZACAS-NEXT: sc.w.rl a5, a5, (a2)
+; RV32IA-WMO-ZACAS-NEXT: bnez a5, .LBB83_1
+; RV32IA-WMO-ZACAS-NEXT: # %bb.2:
+; RV32IA-WMO-ZACAS-NEXT: srl a0, a3, a0
+; RV32IA-WMO-ZACAS-NEXT: ret
+;
+; RV32IA-TSO-ZACAS-LABEL: atomicrmw_add_i16_acq_rel:
+; RV32IA-TSO-ZACAS: # %bb.0:
+; RV32IA-TSO-ZACAS-NEXT: andi a2, a0, -4
+; RV32IA-TSO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-ZACAS-NEXT: lui a3, 16
+; RV32IA-TSO-ZACAS-NEXT: addi a3, a3, -1
+; RV32IA-TSO-ZACAS-NEXT: sll a4, a3, a0
+; RV32IA-TSO-ZACAS-NEXT: and a1, a1, a3
+; RV32IA-TSO-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-TSO-ZACAS-NEXT: .LBB83_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-ZACAS-NEXT: lr.w a3, (a2)
+; RV32IA-TSO-ZACAS-NEXT: add a5, a3, a1
+; RV32IA-TSO-ZACAS-NEXT: xor a5, a3, a5
+; RV32IA-TSO-ZACAS-NEXT: and a5, a5, a4
+; RV32IA-TSO-ZACAS-NEXT: xor a5, a3, a5
+; RV32IA-TSO-ZACAS-NEXT: sc.w a5, a5, (a2)
+; RV32IA-TSO-ZACAS-NEXT: bnez a5, .LBB83_1
+; RV32IA-TSO-ZACAS-NEXT: # %bb.2:
+; RV32IA-TSO-ZACAS-NEXT: srl a0, a3, a0
+; RV32IA-TSO-ZACAS-NEXT: ret
+;
; RV64IA-WMO-ZACAS-LABEL: atomicrmw_add_i16_acq_rel:
; RV64IA-WMO-ZACAS: # %bb.0:
; RV64IA-WMO-ZACAS-NEXT: andi a2, a0, -4
@@ -12050,6 +15744,16 @@ define i16 @atomicrmw_add_i16_acq_rel(ptr %a, i16 %b) nounwind {
; RV64IA-TSO-ZACAS-NEXT: srlw a0, a3, a0
; RV64IA-TSO-ZACAS-NEXT: ret
;
+; RV32IA-WMO-ZABHA-LABEL: atomicrmw_add_i16_acq_rel:
+; RV32IA-WMO-ZABHA: # %bb.0:
+; RV32IA-WMO-ZABHA-NEXT: amoadd.h.aqrl a0, a1, (a0)
+; RV32IA-WMO-ZABHA-NEXT: ret
+;
+; RV32IA-TSO-ZABHA-LABEL: atomicrmw_add_i16_acq_rel:
+; RV32IA-TSO-ZABHA: # %bb.0:
+; RV32IA-TSO-ZABHA-NEXT: amoadd.h a0, a1, (a0)
+; RV32IA-TSO-ZABHA-NEXT: ret
+;
; RV64IA-WMO-ZABHA-LABEL: atomicrmw_add_i16_acq_rel:
; RV64IA-WMO-ZABHA: # %bb.0:
; RV64IA-WMO-ZABHA-NEXT: amoadd.h.aqrl a0, a1, (a0)
@@ -12074,26 +15778,26 @@ define i16 @atomicrmw_add_i16_seq_cst(ptr %a, i16 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
-; RV32IA-LABEL: atomicrmw_add_i16_seq_cst:
-; RV32IA: # %bb.0:
-; RV32IA-NEXT: andi a2, a0, -4
-; RV32IA-NEXT: slli a0, a0, 3
-; RV32IA-NEXT: lui a3, 16
-; RV32IA-NEXT: addi a3, a3, -1
-; RV32IA-NEXT: sll a4, a3, a0
-; RV32IA-NEXT: and a1, a1, a3
-; RV32IA-NEXT: sll a1, a1, a0
-; RV32IA-NEXT: .LBB84_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-NEXT: lr.w.aqrl a3, (a2)
-; RV32IA-NEXT: add a5, a3, a1
-; RV32IA-NEXT: xor a5, a3, a5
-; RV32IA-NEXT: and a5, a5, a4
-; RV32IA-NEXT: xor a5, a3, a5
-; RV32IA-NEXT: sc.w.rl a5, a5, (a2)
-; RV32IA-NEXT: bnez a5, .LBB84_1
-; RV32IA-NEXT: # %bb.2:
-; RV32IA-NEXT: srl a0, a3, a0
-; RV32IA-NEXT: ret
+; RV32IA-NOZACAS-LABEL: atomicrmw_add_i16_seq_cst:
+; RV32IA-NOZACAS: # %bb.0:
+; RV32IA-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-NOZACAS-NEXT: lui a3, 16
+; RV32IA-NOZACAS-NEXT: addi a3, a3, -1
+; RV32IA-NOZACAS-NEXT: sll a4, a3, a0
+; RV32IA-NOZACAS-NEXT: and a1, a1, a3
+; RV32IA-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-NOZACAS-NEXT: .LBB84_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-NOZACAS-NEXT: lr.w.aqrl a3, (a2)
+; RV32IA-NOZACAS-NEXT: add a5, a3, a1
+; RV32IA-NOZACAS-NEXT: xor a5, a3, a5
+; RV32IA-NOZACAS-NEXT: and a5, a5, a4
+; RV32IA-NOZACAS-NEXT: xor a5, a3, a5
+; RV32IA-NOZACAS-NEXT: sc.w.rl a5, a5, (a2)
+; RV32IA-NOZACAS-NEXT: bnez a5, .LBB84_1
+; RV32IA-NOZACAS-NEXT: # %bb.2:
+; RV32IA-NOZACAS-NEXT: srl a0, a3, a0
+; RV32IA-NOZACAS-NEXT: ret
;
; RV64I-LABEL: atomicrmw_add_i16_seq_cst:
; RV64I: # %bb.0:
@@ -12126,6 +15830,27 @@ define i16 @atomicrmw_add_i16_seq_cst(ptr %a, i16 %b) nounwind {
; RV64IA-NOZACAS-NEXT: srlw a0, a3, a0
; RV64IA-NOZACAS-NEXT: ret
;
+; RV32IA-ZACAS-LABEL: atomicrmw_add_i16_seq_cst:
+; RV32IA-ZACAS: # %bb.0:
+; RV32IA-ZACAS-NEXT: andi a2, a0, -4
+; RV32IA-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-ZACAS-NEXT: lui a3, 16
+; RV32IA-ZACAS-NEXT: addi a3, a3, -1
+; RV32IA-ZACAS-NEXT: sll a4, a3, a0
+; RV32IA-ZACAS-NEXT: and a1, a1, a3
+; RV32IA-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-ZACAS-NEXT: .LBB84_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-ZACAS-NEXT: lr.w.aqrl a3, (a2)
+; RV32IA-ZACAS-NEXT: add a5, a3, a1
+; RV32IA-ZACAS-NEXT: xor a5, a3, a5
+; RV32IA-ZACAS-NEXT: and a5, a5, a4
+; RV32IA-ZACAS-NEXT: xor a5, a3, a5
+; RV32IA-ZACAS-NEXT: sc.w.rl a5, a5, (a2)
+; RV32IA-ZACAS-NEXT: bnez a5, .LBB84_1
+; RV32IA-ZACAS-NEXT: # %bb.2:
+; RV32IA-ZACAS-NEXT: srl a0, a3, a0
+; RV32IA-ZACAS-NEXT: ret
+;
; RV64IA-ZACAS-LABEL: atomicrmw_add_i16_seq_cst:
; RV64IA-ZACAS: # %bb.0:
; RV64IA-ZACAS-NEXT: andi a2, a0, -4
@@ -12147,6 +15872,16 @@ define i16 @atomicrmw_add_i16_seq_cst(ptr %a, i16 %b) nounwind {
; RV64IA-ZACAS-NEXT: srlw a0, a3, a0
; RV64IA-ZACAS-NEXT: ret
;
+; RV32IA-WMO-ZABHA-LABEL: atomicrmw_add_i16_seq_cst:
+; RV32IA-WMO-ZABHA: # %bb.0:
+; RV32IA-WMO-ZABHA-NEXT: amoadd.h.aqrl a0, a1, (a0)
+; RV32IA-WMO-ZABHA-NEXT: ret
+;
+; RV32IA-TSO-ZABHA-LABEL: atomicrmw_add_i16_seq_cst:
+; RV32IA-TSO-ZABHA: # %bb.0:
+; RV32IA-TSO-ZABHA-NEXT: amoadd.h a0, a1, (a0)
+; RV32IA-TSO-ZABHA-NEXT: ret
+;
; RV64IA-WMO-ZABHA-LABEL: atomicrmw_add_i16_seq_cst:
; RV64IA-WMO-ZABHA: # %bb.0:
; RV64IA-WMO-ZABHA-NEXT: amoadd.h.aqrl a0, a1, (a0)
@@ -12171,26 +15906,26 @@ define i16 @atomicrmw_sub_i16_monotonic(ptr %a, i16 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
-; RV32IA-LABEL: atomicrmw_sub_i16_monotonic:
-; RV32IA: # %bb.0:
-; RV32IA-NEXT: andi a2, a0, -4
-; RV32IA-NEXT: slli a0, a0, 3
-; RV32IA-NEXT: lui a3, 16
-; RV32IA-NEXT: addi a3, a3, -1
-; RV32IA-NEXT: sll a4, a3, a0
-; RV32IA-NEXT: and a1, a1, a3
-; RV32IA-NEXT: sll a1, a1, a0
-; RV32IA-NEXT: .LBB85_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-NEXT: lr.w a3, (a2)
-; RV32IA-NEXT: sub a5, a3, a1
-; RV32IA-NEXT: xor a5, a3, a5
-; RV32IA-NEXT: and a5, a5, a4
-; RV32IA-NEXT: xor a5, a3, a5
-; RV32IA-NEXT: sc.w a5, a5, (a2)
-; RV32IA-NEXT: bnez a5, .LBB85_1
-; RV32IA-NEXT: # %bb.2:
-; RV32IA-NEXT: srl a0, a3, a0
-; RV32IA-NEXT: ret
+; RV32IA-NOZACAS-LABEL: atomicrmw_sub_i16_monotonic:
+; RV32IA-NOZACAS: # %bb.0:
+; RV32IA-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-NOZACAS-NEXT: lui a3, 16
+; RV32IA-NOZACAS-NEXT: addi a3, a3, -1
+; RV32IA-NOZACAS-NEXT: sll a4, a3, a0
+; RV32IA-NOZACAS-NEXT: and a1, a1, a3
+; RV32IA-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-NOZACAS-NEXT: .LBB85_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-NOZACAS-NEXT: lr.w a3, (a2)
+; RV32IA-NOZACAS-NEXT: sub a5, a3, a1
+; RV32IA-NOZACAS-NEXT: xor a5, a3, a5
+; RV32IA-NOZACAS-NEXT: and a5, a5, a4
+; RV32IA-NOZACAS-NEXT: xor a5, a3, a5
+; RV32IA-NOZACAS-NEXT: sc.w a5, a5, (a2)
+; RV32IA-NOZACAS-NEXT: bnez a5, .LBB85_1
+; RV32IA-NOZACAS-NEXT: # %bb.2:
+; RV32IA-NOZACAS-NEXT: srl a0, a3, a0
+; RV32IA-NOZACAS-NEXT: ret
;
; RV64I-LABEL: atomicrmw_sub_i16_monotonic:
; RV64I: # %bb.0:
@@ -12223,6 +15958,27 @@ define i16 @atomicrmw_sub_i16_monotonic(ptr %a, i16 %b) nounwind {
; RV64IA-NOZACAS-NEXT: srlw a0, a3, a0
; RV64IA-NOZACAS-NEXT: ret
;
+; RV32IA-ZACAS-LABEL: atomicrmw_sub_i16_monotonic:
+; RV32IA-ZACAS: # %bb.0:
+; RV32IA-ZACAS-NEXT: andi a2, a0, -4
+; RV32IA-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-ZACAS-NEXT: lui a3, 16
+; RV32IA-ZACAS-NEXT: addi a3, a3, -1
+; RV32IA-ZACAS-NEXT: sll a4, a3, a0
+; RV32IA-ZACAS-NEXT: and a1, a1, a3
+; RV32IA-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-ZACAS-NEXT: .LBB85_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-ZACAS-NEXT: lr.w a3, (a2)
+; RV32IA-ZACAS-NEXT: sub a5, a3, a1
+; RV32IA-ZACAS-NEXT: xor a5, a3, a5
+; RV32IA-ZACAS-NEXT: and a5, a5, a4
+; RV32IA-ZACAS-NEXT: xor a5, a3, a5
+; RV32IA-ZACAS-NEXT: sc.w a5, a5, (a2)
+; RV32IA-ZACAS-NEXT: bnez a5, .LBB85_1
+; RV32IA-ZACAS-NEXT: # %bb.2:
+; RV32IA-ZACAS-NEXT: srl a0, a3, a0
+; RV32IA-ZACAS-NEXT: ret
+;
; RV64IA-ZACAS-LABEL: atomicrmw_sub_i16_monotonic:
; RV64IA-ZACAS: # %bb.0:
; RV64IA-ZACAS-NEXT: andi a2, a0, -4
@@ -12244,6 +16000,18 @@ define i16 @atomicrmw_sub_i16_monotonic(ptr %a, i16 %b) nounwind {
; RV64IA-ZACAS-NEXT: srlw a0, a3, a0
; RV64IA-ZACAS-NEXT: ret
;
+; RV32IA-WMO-ZABHA-LABEL: atomicrmw_sub_i16_monotonic:
+; RV32IA-WMO-ZABHA: # %bb.0:
+; RV32IA-WMO-ZABHA-NEXT: neg a1, a1
+; RV32IA-WMO-ZABHA-NEXT: amoadd.h a0, a1, (a0)
+; RV32IA-WMO-ZABHA-NEXT: ret
+;
+; RV32IA-TSO-ZABHA-LABEL: atomicrmw_sub_i16_monotonic:
+; RV32IA-TSO-ZABHA: # %bb.0:
+; RV32IA-TSO-ZABHA-NEXT: neg a1, a1
+; RV32IA-TSO-ZABHA-NEXT: amoadd.h a0, a1, (a0)
+; RV32IA-TSO-ZABHA-NEXT: ret
+;
; RV64IA-WMO-ZABHA-LABEL: atomicrmw_sub_i16_monotonic:
; RV64IA-WMO-ZABHA: # %bb.0:
; RV64IA-WMO-ZABHA-NEXT: neg a1, a1
@@ -12270,47 +16038,47 @@ define i16 @atomicrmw_sub_i16_acquire(ptr %a, i16 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
-; RV32IA-WMO-LABEL: atomicrmw_sub_i16_acquire:
-; RV32IA-WMO: # %bb.0:
-; RV32IA-WMO-NEXT: andi a2, a0, -4
-; RV32IA-WMO-NEXT: slli a0, a0, 3
-; RV32IA-WMO-NEXT: lui a3, 16
-; RV32IA-WMO-NEXT: addi a3, a3, -1
-; RV32IA-WMO-NEXT: sll a4, a3, a0
-; RV32IA-WMO-NEXT: and a1, a1, a3
-; RV32IA-WMO-NEXT: sll a1, a1, a0
-; RV32IA-WMO-NEXT: .LBB86_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-WMO-NEXT: lr.w.aq a3, (a2)
-; RV32IA-WMO-NEXT: sub a5, a3, a1
-; RV32IA-WMO-NEXT: xor a5, a3, a5
-; RV32IA-WMO-NEXT: and a5, a5, a4
-; RV32IA-WMO-NEXT: xor a5, a3, a5
-; RV32IA-WMO-NEXT: sc.w a5, a5, (a2)
-; RV32IA-WMO-NEXT: bnez a5, .LBB86_1
-; RV32IA-WMO-NEXT: # %bb.2:
-; RV32IA-WMO-NEXT: srl a0, a3, a0
-; RV32IA-WMO-NEXT: ret
+; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_sub_i16_acquire:
+; RV32IA-WMO-NOZACAS: # %bb.0:
+; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-WMO-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-NOZACAS-NEXT: lui a3, 16
+; RV32IA-WMO-NOZACAS-NEXT: addi a3, a3, -1
+; RV32IA-WMO-NOZACAS-NEXT: sll a4, a3, a0
+; RV32IA-WMO-NOZACAS-NEXT: and a1, a1, a3
+; RV32IA-WMO-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-WMO-NOZACAS-NEXT: .LBB86_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-NOZACAS-NEXT: lr.w.aq a3, (a2)
+; RV32IA-WMO-NOZACAS-NEXT: sub a5, a3, a1
+; RV32IA-WMO-NOZACAS-NEXT: xor a5, a3, a5
+; RV32IA-WMO-NOZACAS-NEXT: and a5, a5, a4
+; RV32IA-WMO-NOZACAS-NEXT: xor a5, a3, a5
+; RV32IA-WMO-NOZACAS-NEXT: sc.w a5, a5, (a2)
+; RV32IA-WMO-NOZACAS-NEXT: bnez a5, .LBB86_1
+; RV32IA-WMO-NOZACAS-NEXT: # %bb.2:
+; RV32IA-WMO-NOZACAS-NEXT: srl a0, a3, a0
+; RV32IA-WMO-NOZACAS-NEXT: ret
;
-; RV32IA-TSO-LABEL: atomicrmw_sub_i16_acquire:
-; RV32IA-TSO: # %bb.0:
-; RV32IA-TSO-NEXT: andi a2, a0, -4
-; RV32IA-TSO-NEXT: slli a0, a0, 3
-; RV32IA-TSO-NEXT: lui a3, 16
-; RV32IA-TSO-NEXT: addi a3, a3, -1
-; RV32IA-TSO-NEXT: sll a4, a3, a0
-; RV32IA-TSO-NEXT: and a1, a1, a3
-; RV32IA-TSO-NEXT: sll a1, a1, a0
-; RV32IA-TSO-NEXT: .LBB86_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-TSO-NEXT: lr.w a3, (a2)
-; RV32IA-TSO-NEXT: sub a5, a3, a1
-; RV32IA-TSO-NEXT: xor a5, a3, a5
-; RV32IA-TSO-NEXT: and a5, a5, a4
-; RV32IA-TSO-NEXT: xor a5, a3, a5
-; RV32IA-TSO-NEXT: sc.w a5, a5, (a2)
-; RV32IA-TSO-NEXT: bnez a5, .LBB86_1
-; RV32IA-TSO-NEXT: # %bb.2:
-; RV32IA-TSO-NEXT: srl a0, a3, a0
-; RV32IA-TSO-NEXT: ret
+; RV32IA-TSO-NOZACAS-LABEL: atomicrmw_sub_i16_acquire:
+; RV32IA-TSO-NOZACAS: # %bb.0:
+; RV32IA-TSO-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-TSO-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-NOZACAS-NEXT: lui a3, 16
+; RV32IA-TSO-NOZACAS-NEXT: addi a3, a3, -1
+; RV32IA-TSO-NOZACAS-NEXT: sll a4, a3, a0
+; RV32IA-TSO-NOZACAS-NEXT: and a1, a1, a3
+; RV32IA-TSO-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-TSO-NOZACAS-NEXT: .LBB86_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-NOZACAS-NEXT: lr.w a3, (a2)
+; RV32IA-TSO-NOZACAS-NEXT: sub a5, a3, a1
+; RV32IA-TSO-NOZACAS-NEXT: xor a5, a3, a5
+; RV32IA-TSO-NOZACAS-NEXT: and a5, a5, a4
+; RV32IA-TSO-NOZACAS-NEXT: xor a5, a3, a5
+; RV32IA-TSO-NOZACAS-NEXT: sc.w a5, a5, (a2)
+; RV32IA-TSO-NOZACAS-NEXT: bnez a5, .LBB86_1
+; RV32IA-TSO-NOZACAS-NEXT: # %bb.2:
+; RV32IA-TSO-NOZACAS-NEXT: srl a0, a3, a0
+; RV32IA-TSO-NOZACAS-NEXT: ret
;
; RV64I-LABEL: atomicrmw_sub_i16_acquire:
; RV64I: # %bb.0:
@@ -12364,6 +16132,48 @@ define i16 @atomicrmw_sub_i16_acquire(ptr %a, i16 %b) nounwind {
; RV64IA-TSO-NOZACAS-NEXT: srlw a0, a3, a0
; RV64IA-TSO-NOZACAS-NEXT: ret
;
+; RV32IA-WMO-ZACAS-LABEL: atomicrmw_sub_i16_acquire:
+; RV32IA-WMO-ZACAS: # %bb.0:
+; RV32IA-WMO-ZACAS-NEXT: andi a2, a0, -4
+; RV32IA-WMO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-ZACAS-NEXT: lui a3, 16
+; RV32IA-WMO-ZACAS-NEXT: addi a3, a3, -1
+; RV32IA-WMO-ZACAS-NEXT: sll a4, a3, a0
+; RV32IA-WMO-ZACAS-NEXT: and a1, a1, a3
+; RV32IA-WMO-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-WMO-ZACAS-NEXT: .LBB86_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-ZACAS-NEXT: lr.w.aq a3, (a2)
+; RV32IA-WMO-ZACAS-NEXT: sub a5, a3, a1
+; RV32IA-WMO-ZACAS-NEXT: xor a5, a3, a5
+; RV32IA-WMO-ZACAS-NEXT: and a5, a5, a4
+; RV32IA-WMO-ZACAS-NEXT: xor a5, a3, a5
+; RV32IA-WMO-ZACAS-NEXT: sc.w a5, a5, (a2)
+; RV32IA-WMO-ZACAS-NEXT: bnez a5, .LBB86_1
+; RV32IA-WMO-ZACAS-NEXT: # %bb.2:
+; RV32IA-WMO-ZACAS-NEXT: srl a0, a3, a0
+; RV32IA-WMO-ZACAS-NEXT: ret
+;
+; RV32IA-TSO-ZACAS-LABEL: atomicrmw_sub_i16_acquire:
+; RV32IA-TSO-ZACAS: # %bb.0:
+; RV32IA-TSO-ZACAS-NEXT: andi a2, a0, -4
+; RV32IA-TSO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-ZACAS-NEXT: lui a3, 16
+; RV32IA-TSO-ZACAS-NEXT: addi a3, a3, -1
+; RV32IA-TSO-ZACAS-NEXT: sll a4, a3, a0
+; RV32IA-TSO-ZACAS-NEXT: and a1, a1, a3
+; RV32IA-TSO-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-TSO-ZACAS-NEXT: .LBB86_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-ZACAS-NEXT: lr.w a3, (a2)
+; RV32IA-TSO-ZACAS-NEXT: sub a5, a3, a1
+; RV32IA-TSO-ZACAS-NEXT: xor a5, a3, a5
+; RV32IA-TSO-ZACAS-NEXT: and a5, a5, a4
+; RV32IA-TSO-ZACAS-NEXT: xor a5, a3, a5
+; RV32IA-TSO-ZACAS-NEXT: sc.w a5, a5, (a2)
+; RV32IA-TSO-ZACAS-NEXT: bnez a5, .LBB86_1
+; RV32IA-TSO-ZACAS-NEXT: # %bb.2:
+; RV32IA-TSO-ZACAS-NEXT: srl a0, a3, a0
+; RV32IA-TSO-ZACAS-NEXT: ret
+;
; RV64IA-WMO-ZACAS-LABEL: atomicrmw_sub_i16_acquire:
; RV64IA-WMO-ZACAS: # %bb.0:
; RV64IA-WMO-ZACAS-NEXT: andi a2, a0, -4
@@ -12406,6 +16216,18 @@ define i16 @atomicrmw_sub_i16_acquire(ptr %a, i16 %b) nounwind {
; RV64IA-TSO-ZACAS-NEXT: srlw a0, a3, a0
; RV64IA-TSO-ZACAS-NEXT: ret
;
+; RV32IA-WMO-ZABHA-LABEL: atomicrmw_sub_i16_acquire:
+; RV32IA-WMO-ZABHA: # %bb.0:
+; RV32IA-WMO-ZABHA-NEXT: neg a1, a1
+; RV32IA-WMO-ZABHA-NEXT: amoadd.h.aq a0, a1, (a0)
+; RV32IA-WMO-ZABHA-NEXT: ret
+;
+; RV32IA-TSO-ZABHA-LABEL: atomicrmw_sub_i16_acquire:
+; RV32IA-TSO-ZABHA: # %bb.0:
+; RV32IA-TSO-ZABHA-NEXT: neg a1, a1
+; RV32IA-TSO-ZABHA-NEXT: amoadd.h a0, a1, (a0)
+; RV32IA-TSO-ZABHA-NEXT: ret
+;
; RV64IA-WMO-ZABHA-LABEL: atomicrmw_sub_i16_acquire:
; RV64IA-WMO-ZABHA: # %bb.0:
; RV64IA-WMO-ZABHA-NEXT: neg a1, a1
@@ -12432,47 +16254,47 @@ define i16 @atomicrmw_sub_i16_release(ptr %a, i16 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
-; RV32IA-WMO-LABEL: atomicrmw_sub_i16_release:
-; RV32IA-WMO: # %bb.0:
-; RV32IA-WMO-NEXT: andi a2, a0, -4
-; RV32IA-WMO-NEXT: slli a0, a0, 3
-; RV32IA-WMO-NEXT: lui a3, 16
-; RV32IA-WMO-NEXT: addi a3, a3, -1
-; RV32IA-WMO-NEXT: sll a4, a3, a0
-; RV32IA-WMO-NEXT: and a1, a1, a3
-; RV32IA-WMO-NEXT: sll a1, a1, a0
-; RV32IA-WMO-NEXT: .LBB87_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-WMO-NEXT: lr.w a3, (a2)
-; RV32IA-WMO-NEXT: sub a5, a3, a1
-; RV32IA-WMO-NEXT: xor a5, a3, a5
-; RV32IA-WMO-NEXT: and a5, a5, a4
-; RV32IA-WMO-NEXT: xor a5, a3, a5
-; RV32IA-WMO-NEXT: sc.w.rl a5, a5, (a2)
-; RV32IA-WMO-NEXT: bnez a5, .LBB87_1
-; RV32IA-WMO-NEXT: # %bb.2:
-; RV32IA-WMO-NEXT: srl a0, a3, a0
-; RV32IA-WMO-NEXT: ret
+; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_sub_i16_release:
+; RV32IA-WMO-NOZACAS: # %bb.0:
+; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-WMO-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-NOZACAS-NEXT: lui a3, 16
+; RV32IA-WMO-NOZACAS-NEXT: addi a3, a3, -1
+; RV32IA-WMO-NOZACAS-NEXT: sll a4, a3, a0
+; RV32IA-WMO-NOZACAS-NEXT: and a1, a1, a3
+; RV32IA-WMO-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-WMO-NOZACAS-NEXT: .LBB87_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-NOZACAS-NEXT: lr.w a3, (a2)
+; RV32IA-WMO-NOZACAS-NEXT: sub a5, a3, a1
+; RV32IA-WMO-NOZACAS-NEXT: xor a5, a3, a5
+; RV32IA-WMO-NOZACAS-NEXT: and a5, a5, a4
+; RV32IA-WMO-NOZACAS-NEXT: xor a5, a3, a5
+; RV32IA-WMO-NOZACAS-NEXT: sc.w.rl a5, a5, (a2)
+; RV32IA-WMO-NOZACAS-NEXT: bnez a5, .LBB87_1
+; RV32IA-WMO-NOZACAS-NEXT: # %bb.2:
+; RV32IA-WMO-NOZACAS-NEXT: srl a0, a3, a0
+; RV32IA-WMO-NOZACAS-NEXT: ret
;
-; RV32IA-TSO-LABEL: atomicrmw_sub_i16_release:
-; RV32IA-TSO: # %bb.0:
-; RV32IA-TSO-NEXT: andi a2, a0, -4
-; RV32IA-TSO-NEXT: slli a0, a0, 3
-; RV32IA-TSO-NEXT: lui a3, 16
-; RV32IA-TSO-NEXT: addi a3, a3, -1
-; RV32IA-TSO-NEXT: sll a4, a3, a0
-; RV32IA-TSO-NEXT: and a1, a1, a3
-; RV32IA-TSO-NEXT: sll a1, a1, a0
-; RV32IA-TSO-NEXT: .LBB87_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-TSO-NEXT: lr.w a3, (a2)
-; RV32IA-TSO-NEXT: sub a5, a3, a1
-; RV32IA-TSO-NEXT: xor a5, a3, a5
-; RV32IA-TSO-NEXT: and a5, a5, a4
-; RV32IA-TSO-NEXT: xor a5, a3, a5
-; RV32IA-TSO-NEXT: sc.w a5, a5, (a2)
-; RV32IA-TSO-NEXT: bnez a5, .LBB87_1
-; RV32IA-TSO-NEXT: # %bb.2:
-; RV32IA-TSO-NEXT: srl a0, a3, a0
-; RV32IA-TSO-NEXT: ret
+; RV32IA-TSO-NOZACAS-LABEL: atomicrmw_sub_i16_release:
+; RV32IA-TSO-NOZACAS: # %bb.0:
+; RV32IA-TSO-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-TSO-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-NOZACAS-NEXT: lui a3, 16
+; RV32IA-TSO-NOZACAS-NEXT: addi a3, a3, -1
+; RV32IA-TSO-NOZACAS-NEXT: sll a4, a3, a0
+; RV32IA-TSO-NOZACAS-NEXT: and a1, a1, a3
+; RV32IA-TSO-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-TSO-NOZACAS-NEXT: .LBB87_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-NOZACAS-NEXT: lr.w a3, (a2)
+; RV32IA-TSO-NOZACAS-NEXT: sub a5, a3, a1
+; RV32IA-TSO-NOZACAS-NEXT: xor a5, a3, a5
+; RV32IA-TSO-NOZACAS-NEXT: and a5, a5, a4
+; RV32IA-TSO-NOZACAS-NEXT: xor a5, a3, a5
+; RV32IA-TSO-NOZACAS-NEXT: sc.w a5, a5, (a2)
+; RV32IA-TSO-NOZACAS-NEXT: bnez a5, .LBB87_1
+; RV32IA-TSO-NOZACAS-NEXT: # %bb.2:
+; RV32IA-TSO-NOZACAS-NEXT: srl a0, a3, a0
+; RV32IA-TSO-NOZACAS-NEXT: ret
;
; RV64I-LABEL: atomicrmw_sub_i16_release:
; RV64I: # %bb.0:
@@ -12526,6 +16348,48 @@ define i16 @atomicrmw_sub_i16_release(ptr %a, i16 %b) nounwind {
; RV64IA-TSO-NOZACAS-NEXT: srlw a0, a3, a0
; RV64IA-TSO-NOZACAS-NEXT: ret
;
+; RV32IA-WMO-ZACAS-LABEL: atomicrmw_sub_i16_release:
+; RV32IA-WMO-ZACAS: # %bb.0:
+; RV32IA-WMO-ZACAS-NEXT: andi a2, a0, -4
+; RV32IA-WMO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-ZACAS-NEXT: lui a3, 16
+; RV32IA-WMO-ZACAS-NEXT: addi a3, a3, -1
+; RV32IA-WMO-ZACAS-NEXT: sll a4, a3, a0
+; RV32IA-WMO-ZACAS-NEXT: and a1, a1, a3
+; RV32IA-WMO-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-WMO-ZACAS-NEXT: .LBB87_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-ZACAS-NEXT: lr.w a3, (a2)
+; RV32IA-WMO-ZACAS-NEXT: sub a5, a3, a1
+; RV32IA-WMO-ZACAS-NEXT: xor a5, a3, a5
+; RV32IA-WMO-ZACAS-NEXT: and a5, a5, a4
+; RV32IA-WMO-ZACAS-NEXT: xor a5, a3, a5
+; RV32IA-WMO-ZACAS-NEXT: sc.w.rl a5, a5, (a2)
+; RV32IA-WMO-ZACAS-NEXT: bnez a5, .LBB87_1
+; RV32IA-WMO-ZACAS-NEXT: # %bb.2:
+; RV32IA-WMO-ZACAS-NEXT: srl a0, a3, a0
+; RV32IA-WMO-ZACAS-NEXT: ret
+;
+; RV32IA-TSO-ZACAS-LABEL: atomicrmw_sub_i16_release:
+; RV32IA-TSO-ZACAS: # %bb.0:
+; RV32IA-TSO-ZACAS-NEXT: andi a2, a0, -4
+; RV32IA-TSO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-ZACAS-NEXT: lui a3, 16
+; RV32IA-TSO-ZACAS-NEXT: addi a3, a3, -1
+; RV32IA-TSO-ZACAS-NEXT: sll a4, a3, a0
+; RV32IA-TSO-ZACAS-NEXT: and a1, a1, a3
+; RV32IA-TSO-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-TSO-ZACAS-NEXT: .LBB87_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-ZACAS-NEXT: lr.w a3, (a2)
+; RV32IA-TSO-ZACAS-NEXT: sub a5, a3, a1
+; RV32IA-TSO-ZACAS-NEXT: xor a5, a3, a5
+; RV32IA-TSO-ZACAS-NEXT: and a5, a5, a4
+; RV32IA-TSO-ZACAS-NEXT: xor a5, a3, a5
+; RV32IA-TSO-ZACAS-NEXT: sc.w a5, a5, (a2)
+; RV32IA-TSO-ZACAS-NEXT: bnez a5, .LBB87_1
+; RV32IA-TSO-ZACAS-NEXT: # %bb.2:
+; RV32IA-TSO-ZACAS-NEXT: srl a0, a3, a0
+; RV32IA-TSO-ZACAS-NEXT: ret
+;
; RV64IA-WMO-ZACAS-LABEL: atomicrmw_sub_i16_release:
; RV64IA-WMO-ZACAS: # %bb.0:
; RV64IA-WMO-ZACAS-NEXT: andi a2, a0, -4
@@ -12568,6 +16432,18 @@ define i16 @atomicrmw_sub_i16_release(ptr %a, i16 %b) nounwind {
; RV64IA-TSO-ZACAS-NEXT: srlw a0, a3, a0
; RV64IA-TSO-ZACAS-NEXT: ret
;
+; RV32IA-WMO-ZABHA-LABEL: atomicrmw_sub_i16_release:
+; RV32IA-WMO-ZABHA: # %bb.0:
+; RV32IA-WMO-ZABHA-NEXT: neg a1, a1
+; RV32IA-WMO-ZABHA-NEXT: amoadd.h.rl a0, a1, (a0)
+; RV32IA-WMO-ZABHA-NEXT: ret
+;
+; RV32IA-TSO-ZABHA-LABEL: atomicrmw_sub_i16_release:
+; RV32IA-TSO-ZABHA: # %bb.0:
+; RV32IA-TSO-ZABHA-NEXT: neg a1, a1
+; RV32IA-TSO-ZABHA-NEXT: amoadd.h a0, a1, (a0)
+; RV32IA-TSO-ZABHA-NEXT: ret
+;
; RV64IA-WMO-ZABHA-LABEL: atomicrmw_sub_i16_release:
; RV64IA-WMO-ZABHA: # %bb.0:
; RV64IA-WMO-ZABHA-NEXT: neg a1, a1
@@ -12594,47 +16470,47 @@ define i16 @atomicrmw_sub_i16_acq_rel(ptr %a, i16 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
-; RV32IA-WMO-LABEL: atomicrmw_sub_i16_acq_rel:
-; RV32IA-WMO: # %bb.0:
-; RV32IA-WMO-NEXT: andi a2, a0, -4
-; RV32IA-WMO-NEXT: slli a0, a0, 3
-; RV32IA-WMO-NEXT: lui a3, 16
-; RV32IA-WMO-NEXT: addi a3, a3, -1
-; RV32IA-WMO-NEXT: sll a4, a3, a0
-; RV32IA-WMO-NEXT: and a1, a1, a3
-; RV32IA-WMO-NEXT: sll a1, a1, a0
-; RV32IA-WMO-NEXT: .LBB88_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-WMO-NEXT: lr.w.aq a3, (a2)
-; RV32IA-WMO-NEXT: sub a5, a3, a1
-; RV32IA-WMO-NEXT: xor a5, a3, a5
-; RV32IA-WMO-NEXT: and a5, a5, a4
-; RV32IA-WMO-NEXT: xor a5, a3, a5
-; RV32IA-WMO-NEXT: sc.w.rl a5, a5, (a2)
-; RV32IA-WMO-NEXT: bnez a5, .LBB88_1
-; RV32IA-WMO-NEXT: # %bb.2:
-; RV32IA-WMO-NEXT: srl a0, a3, a0
-; RV32IA-WMO-NEXT: ret
+; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_sub_i16_acq_rel:
+; RV32IA-WMO-NOZACAS: # %bb.0:
+; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-WMO-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-NOZACAS-NEXT: lui a3, 16
+; RV32IA-WMO-NOZACAS-NEXT: addi a3, a3, -1
+; RV32IA-WMO-NOZACAS-NEXT: sll a4, a3, a0
+; RV32IA-WMO-NOZACAS-NEXT: and a1, a1, a3
+; RV32IA-WMO-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-WMO-NOZACAS-NEXT: .LBB88_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-NOZACAS-NEXT: lr.w.aq a3, (a2)
+; RV32IA-WMO-NOZACAS-NEXT: sub a5, a3, a1
+; RV32IA-WMO-NOZACAS-NEXT: xor a5, a3, a5
+; RV32IA-WMO-NOZACAS-NEXT: and a5, a5, a4
+; RV32IA-WMO-NOZACAS-NEXT: xor a5, a3, a5
+; RV32IA-WMO-NOZACAS-NEXT: sc.w.rl a5, a5, (a2)
+; RV32IA-WMO-NOZACAS-NEXT: bnez a5, .LBB88_1
+; RV32IA-WMO-NOZACAS-NEXT: # %bb.2:
+; RV32IA-WMO-NOZACAS-NEXT: srl a0, a3, a0
+; RV32IA-WMO-NOZACAS-NEXT: ret
;
-; RV32IA-TSO-LABEL: atomicrmw_sub_i16_acq_rel:
-; RV32IA-TSO: # %bb.0:
-; RV32IA-TSO-NEXT: andi a2, a0, -4
-; RV32IA-TSO-NEXT: slli a0, a0, 3
-; RV32IA-TSO-NEXT: lui a3, 16
-; RV32IA-TSO-NEXT: addi a3, a3, -1
-; RV32IA-TSO-NEXT: sll a4, a3, a0
-; RV32IA-TSO-NEXT: and a1, a1, a3
-; RV32IA-TSO-NEXT: sll a1, a1, a0
-; RV32IA-TSO-NEXT: .LBB88_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-TSO-NEXT: lr.w a3, (a2)
-; RV32IA-TSO-NEXT: sub a5, a3, a1
-; RV32IA-TSO-NEXT: xor a5, a3, a5
-; RV32IA-TSO-NEXT: and a5, a5, a4
-; RV32IA-TSO-NEXT: xor a5, a3, a5
-; RV32IA-TSO-NEXT: sc.w a5, a5, (a2)
-; RV32IA-TSO-NEXT: bnez a5, .LBB88_1
-; RV32IA-TSO-NEXT: # %bb.2:
-; RV32IA-TSO-NEXT: srl a0, a3, a0
-; RV32IA-TSO-NEXT: ret
+; RV32IA-TSO-NOZACAS-LABEL: atomicrmw_sub_i16_acq_rel:
+; RV32IA-TSO-NOZACAS: # %bb.0:
+; RV32IA-TSO-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-TSO-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-NOZACAS-NEXT: lui a3, 16
+; RV32IA-TSO-NOZACAS-NEXT: addi a3, a3, -1
+; RV32IA-TSO-NOZACAS-NEXT: sll a4, a3, a0
+; RV32IA-TSO-NOZACAS-NEXT: and a1, a1, a3
+; RV32IA-TSO-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-TSO-NOZACAS-NEXT: .LBB88_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-NOZACAS-NEXT: lr.w a3, (a2)
+; RV32IA-TSO-NOZACAS-NEXT: sub a5, a3, a1
+; RV32IA-TSO-NOZACAS-NEXT: xor a5, a3, a5
+; RV32IA-TSO-NOZACAS-NEXT: and a5, a5, a4
+; RV32IA-TSO-NOZACAS-NEXT: xor a5, a3, a5
+; RV32IA-TSO-NOZACAS-NEXT: sc.w a5, a5, (a2)
+; RV32IA-TSO-NOZACAS-NEXT: bnez a5, .LBB88_1
+; RV32IA-TSO-NOZACAS-NEXT: # %bb.2:
+; RV32IA-TSO-NOZACAS-NEXT: srl a0, a3, a0
+; RV32IA-TSO-NOZACAS-NEXT: ret
;
; RV64I-LABEL: atomicrmw_sub_i16_acq_rel:
; RV64I: # %bb.0:
@@ -12688,6 +16564,48 @@ define i16 @atomicrmw_sub_i16_acq_rel(ptr %a, i16 %b) nounwind {
; RV64IA-TSO-NOZACAS-NEXT: srlw a0, a3, a0
; RV64IA-TSO-NOZACAS-NEXT: ret
;
+; RV32IA-WMO-ZACAS-LABEL: atomicrmw_sub_i16_acq_rel:
+; RV32IA-WMO-ZACAS: # %bb.0:
+; RV32IA-WMO-ZACAS-NEXT: andi a2, a0, -4
+; RV32IA-WMO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-ZACAS-NEXT: lui a3, 16
+; RV32IA-WMO-ZACAS-NEXT: addi a3, a3, -1
+; RV32IA-WMO-ZACAS-NEXT: sll a4, a3, a0
+; RV32IA-WMO-ZACAS-NEXT: and a1, a1, a3
+; RV32IA-WMO-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-WMO-ZACAS-NEXT: .LBB88_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-ZACAS-NEXT: lr.w.aq a3, (a2)
+; RV32IA-WMO-ZACAS-NEXT: sub a5, a3, a1
+; RV32IA-WMO-ZACAS-NEXT: xor a5, a3, a5
+; RV32IA-WMO-ZACAS-NEXT: and a5, a5, a4
+; RV32IA-WMO-ZACAS-NEXT: xor a5, a3, a5
+; RV32IA-WMO-ZACAS-NEXT: sc.w.rl a5, a5, (a2)
+; RV32IA-WMO-ZACAS-NEXT: bnez a5, .LBB88_1
+; RV32IA-WMO-ZACAS-NEXT: # %bb.2:
+; RV32IA-WMO-ZACAS-NEXT: srl a0, a3, a0
+; RV32IA-WMO-ZACAS-NEXT: ret
+;
+; RV32IA-TSO-ZACAS-LABEL: atomicrmw_sub_i16_acq_rel:
+; RV32IA-TSO-ZACAS: # %bb.0:
+; RV32IA-TSO-ZACAS-NEXT: andi a2, a0, -4
+; RV32IA-TSO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-ZACAS-NEXT: lui a3, 16
+; RV32IA-TSO-ZACAS-NEXT: addi a3, a3, -1
+; RV32IA-TSO-ZACAS-NEXT: sll a4, a3, a0
+; RV32IA-TSO-ZACAS-NEXT: and a1, a1, a3
+; RV32IA-TSO-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-TSO-ZACAS-NEXT: .LBB88_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-ZACAS-NEXT: lr.w a3, (a2)
+; RV32IA-TSO-ZACAS-NEXT: sub a5, a3, a1
+; RV32IA-TSO-ZACAS-NEXT: xor a5, a3, a5
+; RV32IA-TSO-ZACAS-NEXT: and a5, a5, a4
+; RV32IA-TSO-ZACAS-NEXT: xor a5, a3, a5
+; RV32IA-TSO-ZACAS-NEXT: sc.w a5, a5, (a2)
+; RV32IA-TSO-ZACAS-NEXT: bnez a5, .LBB88_1
+; RV32IA-TSO-ZACAS-NEXT: # %bb.2:
+; RV32IA-TSO-ZACAS-NEXT: srl a0, a3, a0
+; RV32IA-TSO-ZACAS-NEXT: ret
+;
; RV64IA-WMO-ZACAS-LABEL: atomicrmw_sub_i16_acq_rel:
; RV64IA-WMO-ZACAS: # %bb.0:
; RV64IA-WMO-ZACAS-NEXT: andi a2, a0, -4
@@ -12730,6 +16648,18 @@ define i16 @atomicrmw_sub_i16_acq_rel(ptr %a, i16 %b) nounwind {
; RV64IA-TSO-ZACAS-NEXT: srlw a0, a3, a0
; RV64IA-TSO-ZACAS-NEXT: ret
;
+; RV32IA-WMO-ZABHA-LABEL: atomicrmw_sub_i16_acq_rel:
+; RV32IA-WMO-ZABHA: # %bb.0:
+; RV32IA-WMO-ZABHA-NEXT: neg a1, a1
+; RV32IA-WMO-ZABHA-NEXT: amoadd.h.aqrl a0, a1, (a0)
+; RV32IA-WMO-ZABHA-NEXT: ret
+;
+; RV32IA-TSO-ZABHA-LABEL: atomicrmw_sub_i16_acq_rel:
+; RV32IA-TSO-ZABHA: # %bb.0:
+; RV32IA-TSO-ZABHA-NEXT: neg a1, a1
+; RV32IA-TSO-ZABHA-NEXT: amoadd.h a0, a1, (a0)
+; RV32IA-TSO-ZABHA-NEXT: ret
+;
; RV64IA-WMO-ZABHA-LABEL: atomicrmw_sub_i16_acq_rel:
; RV64IA-WMO-ZABHA: # %bb.0:
; RV64IA-WMO-ZABHA-NEXT: neg a1, a1
@@ -12756,26 +16686,26 @@ define i16 @atomicrmw_sub_i16_seq_cst(ptr %a, i16 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
-; RV32IA-LABEL: atomicrmw_sub_i16_seq_cst:
-; RV32IA: # %bb.0:
-; RV32IA-NEXT: andi a2, a0, -4
-; RV32IA-NEXT: slli a0, a0, 3
-; RV32IA-NEXT: lui a3, 16
-; RV32IA-NEXT: addi a3, a3, -1
-; RV32IA-NEXT: sll a4, a3, a0
-; RV32IA-NEXT: and a1, a1, a3
-; RV32IA-NEXT: sll a1, a1, a0
-; RV32IA-NEXT: .LBB89_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-NEXT: lr.w.aqrl a3, (a2)
-; RV32IA-NEXT: sub a5, a3, a1
-; RV32IA-NEXT: xor a5, a3, a5
-; RV32IA-NEXT: and a5, a5, a4
-; RV32IA-NEXT: xor a5, a3, a5
-; RV32IA-NEXT: sc.w.rl a5, a5, (a2)
-; RV32IA-NEXT: bnez a5, .LBB89_1
-; RV32IA-NEXT: # %bb.2:
-; RV32IA-NEXT: srl a0, a3, a0
-; RV32IA-NEXT: ret
+; RV32IA-NOZACAS-LABEL: atomicrmw_sub_i16_seq_cst:
+; RV32IA-NOZACAS: # %bb.0:
+; RV32IA-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-NOZACAS-NEXT: lui a3, 16
+; RV32IA-NOZACAS-NEXT: addi a3, a3, -1
+; RV32IA-NOZACAS-NEXT: sll a4, a3, a0
+; RV32IA-NOZACAS-NEXT: and a1, a1, a3
+; RV32IA-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-NOZACAS-NEXT: .LBB89_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-NOZACAS-NEXT: lr.w.aqrl a3, (a2)
+; RV32IA-NOZACAS-NEXT: sub a5, a3, a1
+; RV32IA-NOZACAS-NEXT: xor a5, a3, a5
+; RV32IA-NOZACAS-NEXT: and a5, a5, a4
+; RV32IA-NOZACAS-NEXT: xor a5, a3, a5
+; RV32IA-NOZACAS-NEXT: sc.w.rl a5, a5, (a2)
+; RV32IA-NOZACAS-NEXT: bnez a5, .LBB89_1
+; RV32IA-NOZACAS-NEXT: # %bb.2:
+; RV32IA-NOZACAS-NEXT: srl a0, a3, a0
+; RV32IA-NOZACAS-NEXT: ret
;
; RV64I-LABEL: atomicrmw_sub_i16_seq_cst:
; RV64I: # %bb.0:
@@ -12808,6 +16738,27 @@ define i16 @atomicrmw_sub_i16_seq_cst(ptr %a, i16 %b) nounwind {
; RV64IA-NOZACAS-NEXT: srlw a0, a3, a0
; RV64IA-NOZACAS-NEXT: ret
;
+; RV32IA-ZACAS-LABEL: atomicrmw_sub_i16_seq_cst:
+; RV32IA-ZACAS: # %bb.0:
+; RV32IA-ZACAS-NEXT: andi a2, a0, -4
+; RV32IA-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-ZACAS-NEXT: lui a3, 16
+; RV32IA-ZACAS-NEXT: addi a3, a3, -1
+; RV32IA-ZACAS-NEXT: sll a4, a3, a0
+; RV32IA-ZACAS-NEXT: and a1, a1, a3
+; RV32IA-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-ZACAS-NEXT: .LBB89_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-ZACAS-NEXT: lr.w.aqrl a3, (a2)
+; RV32IA-ZACAS-NEXT: sub a5, a3, a1
+; RV32IA-ZACAS-NEXT: xor a5, a3, a5
+; RV32IA-ZACAS-NEXT: and a5, a5, a4
+; RV32IA-ZACAS-NEXT: xor a5, a3, a5
+; RV32IA-ZACAS-NEXT: sc.w.rl a5, a5, (a2)
+; RV32IA-ZACAS-NEXT: bnez a5, .LBB89_1
+; RV32IA-ZACAS-NEXT: # %bb.2:
+; RV32IA-ZACAS-NEXT: srl a0, a3, a0
+; RV32IA-ZACAS-NEXT: ret
+;
; RV64IA-ZACAS-LABEL: atomicrmw_sub_i16_seq_cst:
; RV64IA-ZACAS: # %bb.0:
; RV64IA-ZACAS-NEXT: andi a2, a0, -4
@@ -12829,6 +16780,18 @@ define i16 @atomicrmw_sub_i16_seq_cst(ptr %a, i16 %b) nounwind {
; RV64IA-ZACAS-NEXT: srlw a0, a3, a0
; RV64IA-ZACAS-NEXT: ret
;
+; RV32IA-WMO-ZABHA-LABEL: atomicrmw_sub_i16_seq_cst:
+; RV32IA-WMO-ZABHA: # %bb.0:
+; RV32IA-WMO-ZABHA-NEXT: neg a1, a1
+; RV32IA-WMO-ZABHA-NEXT: amoadd.h.aqrl a0, a1, (a0)
+; RV32IA-WMO-ZABHA-NEXT: ret
+;
+; RV32IA-TSO-ZABHA-LABEL: atomicrmw_sub_i16_seq_cst:
+; RV32IA-TSO-ZABHA: # %bb.0:
+; RV32IA-TSO-ZABHA-NEXT: neg a1, a1
+; RV32IA-TSO-ZABHA-NEXT: amoadd.h a0, a1, (a0)
+; RV32IA-TSO-ZABHA-NEXT: ret
+;
; RV64IA-WMO-ZABHA-LABEL: atomicrmw_sub_i16_seq_cst:
; RV64IA-WMO-ZABHA: # %bb.0:
; RV64IA-WMO-ZABHA-NEXT: neg a1, a1
@@ -12855,20 +16818,20 @@ define i16 @atomicrmw_and_i16_monotonic(ptr %a, i16 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
-; RV32IA-LABEL: atomicrmw_and_i16_monotonic:
-; RV32IA: # %bb.0:
-; RV32IA-NEXT: andi a2, a0, -4
-; RV32IA-NEXT: slli a0, a0, 3
-; RV32IA-NEXT: lui a3, 16
-; RV32IA-NEXT: addi a3, a3, -1
-; RV32IA-NEXT: sll a4, a3, a0
-; RV32IA-NEXT: and a1, a1, a3
-; RV32IA-NEXT: not a3, a4
-; RV32IA-NEXT: sll a1, a1, a0
-; RV32IA-NEXT: or a1, a1, a3
-; RV32IA-NEXT: amoand.w a1, a1, (a2)
-; RV32IA-NEXT: srl a0, a1, a0
-; RV32IA-NEXT: ret
+; RV32IA-NOZACAS-LABEL: atomicrmw_and_i16_monotonic:
+; RV32IA-NOZACAS: # %bb.0:
+; RV32IA-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-NOZACAS-NEXT: lui a3, 16
+; RV32IA-NOZACAS-NEXT: addi a3, a3, -1
+; RV32IA-NOZACAS-NEXT: sll a4, a3, a0
+; RV32IA-NOZACAS-NEXT: and a1, a1, a3
+; RV32IA-NOZACAS-NEXT: not a3, a4
+; RV32IA-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-NOZACAS-NEXT: or a1, a1, a3
+; RV32IA-NOZACAS-NEXT: amoand.w a1, a1, (a2)
+; RV32IA-NOZACAS-NEXT: srl a0, a1, a0
+; RV32IA-NOZACAS-NEXT: ret
;
; RV64I-LABEL: atomicrmw_and_i16_monotonic:
; RV64I: # %bb.0:
@@ -12895,6 +16858,21 @@ define i16 @atomicrmw_and_i16_monotonic(ptr %a, i16 %b) nounwind {
; RV64IA-NOZACAS-NEXT: srlw a0, a1, a0
; RV64IA-NOZACAS-NEXT: ret
;
+; RV32IA-ZACAS-LABEL: atomicrmw_and_i16_monotonic:
+; RV32IA-ZACAS: # %bb.0:
+; RV32IA-ZACAS-NEXT: andi a2, a0, -4
+; RV32IA-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-ZACAS-NEXT: lui a3, 16
+; RV32IA-ZACAS-NEXT: addi a3, a3, -1
+; RV32IA-ZACAS-NEXT: sll a4, a3, a0
+; RV32IA-ZACAS-NEXT: and a1, a1, a3
+; RV32IA-ZACAS-NEXT: not a3, a4
+; RV32IA-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-ZACAS-NEXT: or a1, a1, a3
+; RV32IA-ZACAS-NEXT: amoand.w a1, a1, (a2)
+; RV32IA-ZACAS-NEXT: srl a0, a1, a0
+; RV32IA-ZACAS-NEXT: ret
+;
; RV64IA-ZACAS-LABEL: atomicrmw_and_i16_monotonic:
; RV64IA-ZACAS: # %bb.0:
; RV64IA-ZACAS-NEXT: andi a2, a0, -4
@@ -12910,6 +16888,16 @@ define i16 @atomicrmw_and_i16_monotonic(ptr %a, i16 %b) nounwind {
; RV64IA-ZACAS-NEXT: srlw a0, a1, a0
; RV64IA-ZACAS-NEXT: ret
;
+; RV32IA-WMO-ZABHA-LABEL: atomicrmw_and_i16_monotonic:
+; RV32IA-WMO-ZABHA: # %bb.0:
+; RV32IA-WMO-ZABHA-NEXT: amoand.h a0, a1, (a0)
+; RV32IA-WMO-ZABHA-NEXT: ret
+;
+; RV32IA-TSO-ZABHA-LABEL: atomicrmw_and_i16_monotonic:
+; RV32IA-TSO-ZABHA: # %bb.0:
+; RV32IA-TSO-ZABHA-NEXT: amoand.h a0, a1, (a0)
+; RV32IA-TSO-ZABHA-NEXT: ret
+;
; RV64IA-WMO-ZABHA-LABEL: atomicrmw_and_i16_monotonic:
; RV64IA-WMO-ZABHA: # %bb.0:
; RV64IA-WMO-ZABHA-NEXT: amoand.h a0, a1, (a0)
@@ -12934,35 +16922,35 @@ define i16 @atomicrmw_and_i16_acquire(ptr %a, i16 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
-; RV32IA-WMO-LABEL: atomicrmw_and_i16_acquire:
-; RV32IA-WMO: # %bb.0:
-; RV32IA-WMO-NEXT: andi a2, a0, -4
-; RV32IA-WMO-NEXT: slli a0, a0, 3
-; RV32IA-WMO-NEXT: lui a3, 16
-; RV32IA-WMO-NEXT: addi a3, a3, -1
-; RV32IA-WMO-NEXT: sll a4, a3, a0
-; RV32IA-WMO-NEXT: and a1, a1, a3
-; RV32IA-WMO-NEXT: not a3, a4
-; RV32IA-WMO-NEXT: sll a1, a1, a0
-; RV32IA-WMO-NEXT: or a1, a1, a3
-; RV32IA-WMO-NEXT: amoand.w.aq a1, a1, (a2)
-; RV32IA-WMO-NEXT: srl a0, a1, a0
-; RV32IA-WMO-NEXT: ret
+; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_and_i16_acquire:
+; RV32IA-WMO-NOZACAS: # %bb.0:
+; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-WMO-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-NOZACAS-NEXT: lui a3, 16
+; RV32IA-WMO-NOZACAS-NEXT: addi a3, a3, -1
+; RV32IA-WMO-NOZACAS-NEXT: sll a4, a3, a0
+; RV32IA-WMO-NOZACAS-NEXT: and a1, a1, a3
+; RV32IA-WMO-NOZACAS-NEXT: not a3, a4
+; RV32IA-WMO-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-WMO-NOZACAS-NEXT: or a1, a1, a3
+; RV32IA-WMO-NOZACAS-NEXT: amoand.w.aq a1, a1, (a2)
+; RV32IA-WMO-NOZACAS-NEXT: srl a0, a1, a0
+; RV32IA-WMO-NOZACAS-NEXT: ret
;
-; RV32IA-TSO-LABEL: atomicrmw_and_i16_acquire:
-; RV32IA-TSO: # %bb.0:
-; RV32IA-TSO-NEXT: andi a2, a0, -4
-; RV32IA-TSO-NEXT: slli a0, a0, 3
-; RV32IA-TSO-NEXT: lui a3, 16
-; RV32IA-TSO-NEXT: addi a3, a3, -1
-; RV32IA-TSO-NEXT: sll a4, a3, a0
-; RV32IA-TSO-NEXT: and a1, a1, a3
-; RV32IA-TSO-NEXT: not a3, a4
-; RV32IA-TSO-NEXT: sll a1, a1, a0
-; RV32IA-TSO-NEXT: or a1, a1, a3
-; RV32IA-TSO-NEXT: amoand.w a1, a1, (a2)
-; RV32IA-TSO-NEXT: srl a0, a1, a0
-; RV32IA-TSO-NEXT: ret
+; RV32IA-TSO-NOZACAS-LABEL: atomicrmw_and_i16_acquire:
+; RV32IA-TSO-NOZACAS: # %bb.0:
+; RV32IA-TSO-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-TSO-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-NOZACAS-NEXT: lui a3, 16
+; RV32IA-TSO-NOZACAS-NEXT: addi a3, a3, -1
+; RV32IA-TSO-NOZACAS-NEXT: sll a4, a3, a0
+; RV32IA-TSO-NOZACAS-NEXT: and a1, a1, a3
+; RV32IA-TSO-NOZACAS-NEXT: not a3, a4
+; RV32IA-TSO-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-TSO-NOZACAS-NEXT: or a1, a1, a3
+; RV32IA-TSO-NOZACAS-NEXT: amoand.w a1, a1, (a2)
+; RV32IA-TSO-NOZACAS-NEXT: srl a0, a1, a0
+; RV32IA-TSO-NOZACAS-NEXT: ret
;
; RV64I-LABEL: atomicrmw_and_i16_acquire:
; RV64I: # %bb.0:
@@ -13004,6 +16992,36 @@ define i16 @atomicrmw_and_i16_acquire(ptr %a, i16 %b) nounwind {
; RV64IA-TSO-NOZACAS-NEXT: srlw a0, a1, a0
; RV64IA-TSO-NOZACAS-NEXT: ret
;
+; RV32IA-WMO-ZACAS-LABEL: atomicrmw_and_i16_acquire:
+; RV32IA-WMO-ZACAS: # %bb.0:
+; RV32IA-WMO-ZACAS-NEXT: andi a2, a0, -4
+; RV32IA-WMO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-ZACAS-NEXT: lui a3, 16
+; RV32IA-WMO-ZACAS-NEXT: addi a3, a3, -1
+; RV32IA-WMO-ZACAS-NEXT: sll a4, a3, a0
+; RV32IA-WMO-ZACAS-NEXT: and a1, a1, a3
+; RV32IA-WMO-ZACAS-NEXT: not a3, a4
+; RV32IA-WMO-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-WMO-ZACAS-NEXT: or a1, a1, a3
+; RV32IA-WMO-ZACAS-NEXT: amoand.w.aq a1, a1, (a2)
+; RV32IA-WMO-ZACAS-NEXT: srl a0, a1, a0
+; RV32IA-WMO-ZACAS-NEXT: ret
+;
+; RV32IA-TSO-ZACAS-LABEL: atomicrmw_and_i16_acquire:
+; RV32IA-TSO-ZACAS: # %bb.0:
+; RV32IA-TSO-ZACAS-NEXT: andi a2, a0, -4
+; RV32IA-TSO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-ZACAS-NEXT: lui a3, 16
+; RV32IA-TSO-ZACAS-NEXT: addi a3, a3, -1
+; RV32IA-TSO-ZACAS-NEXT: sll a4, a3, a0
+; RV32IA-TSO-ZACAS-NEXT: and a1, a1, a3
+; RV32IA-TSO-ZACAS-NEXT: not a3, a4
+; RV32IA-TSO-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-TSO-ZACAS-NEXT: or a1, a1, a3
+; RV32IA-TSO-ZACAS-NEXT: amoand.w a1, a1, (a2)
+; RV32IA-TSO-ZACAS-NEXT: srl a0, a1, a0
+; RV32IA-TSO-ZACAS-NEXT: ret
+;
; RV64IA-WMO-ZACAS-LABEL: atomicrmw_and_i16_acquire:
; RV64IA-WMO-ZACAS: # %bb.0:
; RV64IA-WMO-ZACAS-NEXT: andi a2, a0, -4
@@ -13034,6 +17052,16 @@ define i16 @atomicrmw_and_i16_acquire(ptr %a, i16 %b) nounwind {
; RV64IA-TSO-ZACAS-NEXT: srlw a0, a1, a0
; RV64IA-TSO-ZACAS-NEXT: ret
;
+; RV32IA-WMO-ZABHA-LABEL: atomicrmw_and_i16_acquire:
+; RV32IA-WMO-ZABHA: # %bb.0:
+; RV32IA-WMO-ZABHA-NEXT: amoand.h.aq a0, a1, (a0)
+; RV32IA-WMO-ZABHA-NEXT: ret
+;
+; RV32IA-TSO-ZABHA-LABEL: atomicrmw_and_i16_acquire:
+; RV32IA-TSO-ZABHA: # %bb.0:
+; RV32IA-TSO-ZABHA-NEXT: amoand.h a0, a1, (a0)
+; RV32IA-TSO-ZABHA-NEXT: ret
+;
; RV64IA-WMO-ZABHA-LABEL: atomicrmw_and_i16_acquire:
; RV64IA-WMO-ZABHA: # %bb.0:
; RV64IA-WMO-ZABHA-NEXT: amoand.h.aq a0, a1, (a0)
@@ -13058,35 +17086,35 @@ define i16 @atomicrmw_and_i16_release(ptr %a, i16 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
-; RV32IA-WMO-LABEL: atomicrmw_and_i16_release:
-; RV32IA-WMO: # %bb.0:
-; RV32IA-WMO-NEXT: andi a2, a0, -4
-; RV32IA-WMO-NEXT: slli a0, a0, 3
-; RV32IA-WMO-NEXT: lui a3, 16
-; RV32IA-WMO-NEXT: addi a3, a3, -1
-; RV32IA-WMO-NEXT: sll a4, a3, a0
-; RV32IA-WMO-NEXT: and a1, a1, a3
-; RV32IA-WMO-NEXT: not a3, a4
-; RV32IA-WMO-NEXT: sll a1, a1, a0
-; RV32IA-WMO-NEXT: or a1, a1, a3
-; RV32IA-WMO-NEXT: amoand.w.rl a1, a1, (a2)
-; RV32IA-WMO-NEXT: srl a0, a1, a0
-; RV32IA-WMO-NEXT: ret
+; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_and_i16_release:
+; RV32IA-WMO-NOZACAS: # %bb.0:
+; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-WMO-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-NOZACAS-NEXT: lui a3, 16
+; RV32IA-WMO-NOZACAS-NEXT: addi a3, a3, -1
+; RV32IA-WMO-NOZACAS-NEXT: sll a4, a3, a0
+; RV32IA-WMO-NOZACAS-NEXT: and a1, a1, a3
+; RV32IA-WMO-NOZACAS-NEXT: not a3, a4
+; RV32IA-WMO-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-WMO-NOZACAS-NEXT: or a1, a1, a3
+; RV32IA-WMO-NOZACAS-NEXT: amoand.w.rl a1, a1, (a2)
+; RV32IA-WMO-NOZACAS-NEXT: srl a0, a1, a0
+; RV32IA-WMO-NOZACAS-NEXT: ret
;
-; RV32IA-TSO-LABEL: atomicrmw_and_i16_release:
-; RV32IA-TSO: # %bb.0:
-; RV32IA-TSO-NEXT: andi a2, a0, -4
-; RV32IA-TSO-NEXT: slli a0, a0, 3
-; RV32IA-TSO-NEXT: lui a3, 16
-; RV32IA-TSO-NEXT: addi a3, a3, -1
-; RV32IA-TSO-NEXT: sll a4, a3, a0
-; RV32IA-TSO-NEXT: and a1, a1, a3
-; RV32IA-TSO-NEXT: not a3, a4
-; RV32IA-TSO-NEXT: sll a1, a1, a0
-; RV32IA-TSO-NEXT: or a1, a1, a3
-; RV32IA-TSO-NEXT: amoand.w a1, a1, (a2)
-; RV32IA-TSO-NEXT: srl a0, a1, a0
-; RV32IA-TSO-NEXT: ret
+; RV32IA-TSO-NOZACAS-LABEL: atomicrmw_and_i16_release:
+; RV32IA-TSO-NOZACAS: # %bb.0:
+; RV32IA-TSO-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-TSO-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-NOZACAS-NEXT: lui a3, 16
+; RV32IA-TSO-NOZACAS-NEXT: addi a3, a3, -1
+; RV32IA-TSO-NOZACAS-NEXT: sll a4, a3, a0
+; RV32IA-TSO-NOZACAS-NEXT: and a1, a1, a3
+; RV32IA-TSO-NOZACAS-NEXT: not a3, a4
+; RV32IA-TSO-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-TSO-NOZACAS-NEXT: or a1, a1, a3
+; RV32IA-TSO-NOZACAS-NEXT: amoand.w a1, a1, (a2)
+; RV32IA-TSO-NOZACAS-NEXT: srl a0, a1, a0
+; RV32IA-TSO-NOZACAS-NEXT: ret
;
; RV64I-LABEL: atomicrmw_and_i16_release:
; RV64I: # %bb.0:
@@ -13128,6 +17156,36 @@ define i16 @atomicrmw_and_i16_release(ptr %a, i16 %b) nounwind {
; RV64IA-TSO-NOZACAS-NEXT: srlw a0, a1, a0
; RV64IA-TSO-NOZACAS-NEXT: ret
;
+; RV32IA-WMO-ZACAS-LABEL: atomicrmw_and_i16_release:
+; RV32IA-WMO-ZACAS: # %bb.0:
+; RV32IA-WMO-ZACAS-NEXT: andi a2, a0, -4
+; RV32IA-WMO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-ZACAS-NEXT: lui a3, 16
+; RV32IA-WMO-ZACAS-NEXT: addi a3, a3, -1
+; RV32IA-WMO-ZACAS-NEXT: sll a4, a3, a0
+; RV32IA-WMO-ZACAS-NEXT: and a1, a1, a3
+; RV32IA-WMO-ZACAS-NEXT: not a3, a4
+; RV32IA-WMO-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-WMO-ZACAS-NEXT: or a1, a1, a3
+; RV32IA-WMO-ZACAS-NEXT: amoand.w.rl a1, a1, (a2)
+; RV32IA-WMO-ZACAS-NEXT: srl a0, a1, a0
+; RV32IA-WMO-ZACAS-NEXT: ret
+;
+; RV32IA-TSO-ZACAS-LABEL: atomicrmw_and_i16_release:
+; RV32IA-TSO-ZACAS: # %bb.0:
+; RV32IA-TSO-ZACAS-NEXT: andi a2, a0, -4
+; RV32IA-TSO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-ZACAS-NEXT: lui a3, 16
+; RV32IA-TSO-ZACAS-NEXT: addi a3, a3, -1
+; RV32IA-TSO-ZACAS-NEXT: sll a4, a3, a0
+; RV32IA-TSO-ZACAS-NEXT: and a1, a1, a3
+; RV32IA-TSO-ZACAS-NEXT: not a3, a4
+; RV32IA-TSO-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-TSO-ZACAS-NEXT: or a1, a1, a3
+; RV32IA-TSO-ZACAS-NEXT: amoand.w a1, a1, (a2)
+; RV32IA-TSO-ZACAS-NEXT: srl a0, a1, a0
+; RV32IA-TSO-ZACAS-NEXT: ret
+;
; RV64IA-WMO-ZACAS-LABEL: atomicrmw_and_i16_release:
; RV64IA-WMO-ZACAS: # %bb.0:
; RV64IA-WMO-ZACAS-NEXT: andi a2, a0, -4
@@ -13158,6 +17216,16 @@ define i16 @atomicrmw_and_i16_release(ptr %a, i16 %b) nounwind {
; RV64IA-TSO-ZACAS-NEXT: srlw a0, a1, a0
; RV64IA-TSO-ZACAS-NEXT: ret
;
+; RV32IA-WMO-ZABHA-LABEL: atomicrmw_and_i16_release:
+; RV32IA-WMO-ZABHA: # %bb.0:
+; RV32IA-WMO-ZABHA-NEXT: amoand.h.rl a0, a1, (a0)
+; RV32IA-WMO-ZABHA-NEXT: ret
+;
+; RV32IA-TSO-ZABHA-LABEL: atomicrmw_and_i16_release:
+; RV32IA-TSO-ZABHA: # %bb.0:
+; RV32IA-TSO-ZABHA-NEXT: amoand.h a0, a1, (a0)
+; RV32IA-TSO-ZABHA-NEXT: ret
+;
; RV64IA-WMO-ZABHA-LABEL: atomicrmw_and_i16_release:
; RV64IA-WMO-ZABHA: # %bb.0:
; RV64IA-WMO-ZABHA-NEXT: amoand.h.rl a0, a1, (a0)
@@ -13182,35 +17250,35 @@ define i16 @atomicrmw_and_i16_acq_rel(ptr %a, i16 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
-; RV32IA-WMO-LABEL: atomicrmw_and_i16_acq_rel:
-; RV32IA-WMO: # %bb.0:
-; RV32IA-WMO-NEXT: andi a2, a0, -4
-; RV32IA-WMO-NEXT: slli a0, a0, 3
-; RV32IA-WMO-NEXT: lui a3, 16
-; RV32IA-WMO-NEXT: addi a3, a3, -1
-; RV32IA-WMO-NEXT: sll a4, a3, a0
-; RV32IA-WMO-NEXT: and a1, a1, a3
-; RV32IA-WMO-NEXT: not a3, a4
-; RV32IA-WMO-NEXT: sll a1, a1, a0
-; RV32IA-WMO-NEXT: or a1, a1, a3
-; RV32IA-WMO-NEXT: amoand.w.aqrl a1, a1, (a2)
-; RV32IA-WMO-NEXT: srl a0, a1, a0
-; RV32IA-WMO-NEXT: ret
+; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_and_i16_acq_rel:
+; RV32IA-WMO-NOZACAS: # %bb.0:
+; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-WMO-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-NOZACAS-NEXT: lui a3, 16
+; RV32IA-WMO-NOZACAS-NEXT: addi a3, a3, -1
+; RV32IA-WMO-NOZACAS-NEXT: sll a4, a3, a0
+; RV32IA-WMO-NOZACAS-NEXT: and a1, a1, a3
+; RV32IA-WMO-NOZACAS-NEXT: not a3, a4
+; RV32IA-WMO-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-WMO-NOZACAS-NEXT: or a1, a1, a3
+; RV32IA-WMO-NOZACAS-NEXT: amoand.w.aqrl a1, a1, (a2)
+; RV32IA-WMO-NOZACAS-NEXT: srl a0, a1, a0
+; RV32IA-WMO-NOZACAS-NEXT: ret
;
-; RV32IA-TSO-LABEL: atomicrmw_and_i16_acq_rel:
-; RV32IA-TSO: # %bb.0:
-; RV32IA-TSO-NEXT: andi a2, a0, -4
-; RV32IA-TSO-NEXT: slli a0, a0, 3
-; RV32IA-TSO-NEXT: lui a3, 16
-; RV32IA-TSO-NEXT: addi a3, a3, -1
-; RV32IA-TSO-NEXT: sll a4, a3, a0
-; RV32IA-TSO-NEXT: and a1, a1, a3
-; RV32IA-TSO-NEXT: not a3, a4
-; RV32IA-TSO-NEXT: sll a1, a1, a0
-; RV32IA-TSO-NEXT: or a1, a1, a3
-; RV32IA-TSO-NEXT: amoand.w a1, a1, (a2)
-; RV32IA-TSO-NEXT: srl a0, a1, a0
-; RV32IA-TSO-NEXT: ret
+; RV32IA-TSO-NOZACAS-LABEL: atomicrmw_and_i16_acq_rel:
+; RV32IA-TSO-NOZACAS: # %bb.0:
+; RV32IA-TSO-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-TSO-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-NOZACAS-NEXT: lui a3, 16
+; RV32IA-TSO-NOZACAS-NEXT: addi a3, a3, -1
+; RV32IA-TSO-NOZACAS-NEXT: sll a4, a3, a0
+; RV32IA-TSO-NOZACAS-NEXT: and a1, a1, a3
+; RV32IA-TSO-NOZACAS-NEXT: not a3, a4
+; RV32IA-TSO-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-TSO-NOZACAS-NEXT: or a1, a1, a3
+; RV32IA-TSO-NOZACAS-NEXT: amoand.w a1, a1, (a2)
+; RV32IA-TSO-NOZACAS-NEXT: srl a0, a1, a0
+; RV32IA-TSO-NOZACAS-NEXT: ret
;
; RV64I-LABEL: atomicrmw_and_i16_acq_rel:
; RV64I: # %bb.0:
@@ -13252,6 +17320,36 @@ define i16 @atomicrmw_and_i16_acq_rel(ptr %a, i16 %b) nounwind {
; RV64IA-TSO-NOZACAS-NEXT: srlw a0, a1, a0
; RV64IA-TSO-NOZACAS-NEXT: ret
;
+; RV32IA-WMO-ZACAS-LABEL: atomicrmw_and_i16_acq_rel:
+; RV32IA-WMO-ZACAS: # %bb.0:
+; RV32IA-WMO-ZACAS-NEXT: andi a2, a0, -4
+; RV32IA-WMO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-ZACAS-NEXT: lui a3, 16
+; RV32IA-WMO-ZACAS-NEXT: addi a3, a3, -1
+; RV32IA-WMO-ZACAS-NEXT: sll a4, a3, a0
+; RV32IA-WMO-ZACAS-NEXT: and a1, a1, a3
+; RV32IA-WMO-ZACAS-NEXT: not a3, a4
+; RV32IA-WMO-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-WMO-ZACAS-NEXT: or a1, a1, a3
+; RV32IA-WMO-ZACAS-NEXT: amoand.w.aqrl a1, a1, (a2)
+; RV32IA-WMO-ZACAS-NEXT: srl a0, a1, a0
+; RV32IA-WMO-ZACAS-NEXT: ret
+;
+; RV32IA-TSO-ZACAS-LABEL: atomicrmw_and_i16_acq_rel:
+; RV32IA-TSO-ZACAS: # %bb.0:
+; RV32IA-TSO-ZACAS-NEXT: andi a2, a0, -4
+; RV32IA-TSO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-ZACAS-NEXT: lui a3, 16
+; RV32IA-TSO-ZACAS-NEXT: addi a3, a3, -1
+; RV32IA-TSO-ZACAS-NEXT: sll a4, a3, a0
+; RV32IA-TSO-ZACAS-NEXT: and a1, a1, a3
+; RV32IA-TSO-ZACAS-NEXT: not a3, a4
+; RV32IA-TSO-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-TSO-ZACAS-NEXT: or a1, a1, a3
+; RV32IA-TSO-ZACAS-NEXT: amoand.w a1, a1, (a2)
+; RV32IA-TSO-ZACAS-NEXT: srl a0, a1, a0
+; RV32IA-TSO-ZACAS-NEXT: ret
+;
; RV64IA-WMO-ZACAS-LABEL: atomicrmw_and_i16_acq_rel:
; RV64IA-WMO-ZACAS: # %bb.0:
; RV64IA-WMO-ZACAS-NEXT: andi a2, a0, -4
@@ -13282,6 +17380,16 @@ define i16 @atomicrmw_and_i16_acq_rel(ptr %a, i16 %b) nounwind {
; RV64IA-TSO-ZACAS-NEXT: srlw a0, a1, a0
; RV64IA-TSO-ZACAS-NEXT: ret
;
+; RV32IA-WMO-ZABHA-LABEL: atomicrmw_and_i16_acq_rel:
+; RV32IA-WMO-ZABHA: # %bb.0:
+; RV32IA-WMO-ZABHA-NEXT: amoand.h.aqrl a0, a1, (a0)
+; RV32IA-WMO-ZABHA-NEXT: ret
+;
+; RV32IA-TSO-ZABHA-LABEL: atomicrmw_and_i16_acq_rel:
+; RV32IA-TSO-ZABHA: # %bb.0:
+; RV32IA-TSO-ZABHA-NEXT: amoand.h a0, a1, (a0)
+; RV32IA-TSO-ZABHA-NEXT: ret
+;
; RV64IA-WMO-ZABHA-LABEL: atomicrmw_and_i16_acq_rel:
; RV64IA-WMO-ZABHA: # %bb.0:
; RV64IA-WMO-ZABHA-NEXT: amoand.h.aqrl a0, a1, (a0)
@@ -13306,35 +17414,35 @@ define i16 @atomicrmw_and_i16_seq_cst(ptr %a, i16 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
-; RV32IA-WMO-LABEL: atomicrmw_and_i16_seq_cst:
-; RV32IA-WMO: # %bb.0:
-; RV32IA-WMO-NEXT: andi a2, a0, -4
-; RV32IA-WMO-NEXT: slli a0, a0, 3
-; RV32IA-WMO-NEXT: lui a3, 16
-; RV32IA-WMO-NEXT: addi a3, a3, -1
-; RV32IA-WMO-NEXT: sll a4, a3, a0
-; RV32IA-WMO-NEXT: and a1, a1, a3
-; RV32IA-WMO-NEXT: not a3, a4
-; RV32IA-WMO-NEXT: sll a1, a1, a0
-; RV32IA-WMO-NEXT: or a1, a1, a3
-; RV32IA-WMO-NEXT: amoand.w.aqrl a1, a1, (a2)
-; RV32IA-WMO-NEXT: srl a0, a1, a0
-; RV32IA-WMO-NEXT: ret
+; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_and_i16_seq_cst:
+; RV32IA-WMO-NOZACAS: # %bb.0:
+; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-WMO-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-NOZACAS-NEXT: lui a3, 16
+; RV32IA-WMO-NOZACAS-NEXT: addi a3, a3, -1
+; RV32IA-WMO-NOZACAS-NEXT: sll a4, a3, a0
+; RV32IA-WMO-NOZACAS-NEXT: and a1, a1, a3
+; RV32IA-WMO-NOZACAS-NEXT: not a3, a4
+; RV32IA-WMO-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-WMO-NOZACAS-NEXT: or a1, a1, a3
+; RV32IA-WMO-NOZACAS-NEXT: amoand.w.aqrl a1, a1, (a2)
+; RV32IA-WMO-NOZACAS-NEXT: srl a0, a1, a0
+; RV32IA-WMO-NOZACAS-NEXT: ret
;
-; RV32IA-TSO-LABEL: atomicrmw_and_i16_seq_cst:
-; RV32IA-TSO: # %bb.0:
-; RV32IA-TSO-NEXT: andi a2, a0, -4
-; RV32IA-TSO-NEXT: slli a0, a0, 3
-; RV32IA-TSO-NEXT: lui a3, 16
-; RV32IA-TSO-NEXT: addi a3, a3, -1
-; RV32IA-TSO-NEXT: sll a4, a3, a0
-; RV32IA-TSO-NEXT: and a1, a1, a3
-; RV32IA-TSO-NEXT: not a3, a4
-; RV32IA-TSO-NEXT: sll a1, a1, a0
-; RV32IA-TSO-NEXT: or a1, a1, a3
-; RV32IA-TSO-NEXT: amoand.w a1, a1, (a2)
-; RV32IA-TSO-NEXT: srl a0, a1, a0
-; RV32IA-TSO-NEXT: ret
+; RV32IA-TSO-NOZACAS-LABEL: atomicrmw_and_i16_seq_cst:
+; RV32IA-TSO-NOZACAS: # %bb.0:
+; RV32IA-TSO-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-TSO-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-NOZACAS-NEXT: lui a3, 16
+; RV32IA-TSO-NOZACAS-NEXT: addi a3, a3, -1
+; RV32IA-TSO-NOZACAS-NEXT: sll a4, a3, a0
+; RV32IA-TSO-NOZACAS-NEXT: and a1, a1, a3
+; RV32IA-TSO-NOZACAS-NEXT: not a3, a4
+; RV32IA-TSO-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-TSO-NOZACAS-NEXT: or a1, a1, a3
+; RV32IA-TSO-NOZACAS-NEXT: amoand.w a1, a1, (a2)
+; RV32IA-TSO-NOZACAS-NEXT: srl a0, a1, a0
+; RV32IA-TSO-NOZACAS-NEXT: ret
;
; RV64I-LABEL: atomicrmw_and_i16_seq_cst:
; RV64I: # %bb.0:
@@ -13376,6 +17484,36 @@ define i16 @atomicrmw_and_i16_seq_cst(ptr %a, i16 %b) nounwind {
; RV64IA-TSO-NOZACAS-NEXT: srlw a0, a1, a0
; RV64IA-TSO-NOZACAS-NEXT: ret
;
+; RV32IA-WMO-ZACAS-LABEL: atomicrmw_and_i16_seq_cst:
+; RV32IA-WMO-ZACAS: # %bb.0:
+; RV32IA-WMO-ZACAS-NEXT: andi a2, a0, -4
+; RV32IA-WMO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-ZACAS-NEXT: lui a3, 16
+; RV32IA-WMO-ZACAS-NEXT: addi a3, a3, -1
+; RV32IA-WMO-ZACAS-NEXT: sll a4, a3, a0
+; RV32IA-WMO-ZACAS-NEXT: and a1, a1, a3
+; RV32IA-WMO-ZACAS-NEXT: not a3, a4
+; RV32IA-WMO-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-WMO-ZACAS-NEXT: or a1, a1, a3
+; RV32IA-WMO-ZACAS-NEXT: amoand.w.aqrl a1, a1, (a2)
+; RV32IA-WMO-ZACAS-NEXT: srl a0, a1, a0
+; RV32IA-WMO-ZACAS-NEXT: ret
+;
+; RV32IA-TSO-ZACAS-LABEL: atomicrmw_and_i16_seq_cst:
+; RV32IA-TSO-ZACAS: # %bb.0:
+; RV32IA-TSO-ZACAS-NEXT: andi a2, a0, -4
+; RV32IA-TSO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-ZACAS-NEXT: lui a3, 16
+; RV32IA-TSO-ZACAS-NEXT: addi a3, a3, -1
+; RV32IA-TSO-ZACAS-NEXT: sll a4, a3, a0
+; RV32IA-TSO-ZACAS-NEXT: and a1, a1, a3
+; RV32IA-TSO-ZACAS-NEXT: not a3, a4
+; RV32IA-TSO-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-TSO-ZACAS-NEXT: or a1, a1, a3
+; RV32IA-TSO-ZACAS-NEXT: amoand.w a1, a1, (a2)
+; RV32IA-TSO-ZACAS-NEXT: srl a0, a1, a0
+; RV32IA-TSO-ZACAS-NEXT: ret
+;
; RV64IA-WMO-ZACAS-LABEL: atomicrmw_and_i16_seq_cst:
; RV64IA-WMO-ZACAS: # %bb.0:
; RV64IA-WMO-ZACAS-NEXT: andi a2, a0, -4
@@ -13406,6 +17544,16 @@ define i16 @atomicrmw_and_i16_seq_cst(ptr %a, i16 %b) nounwind {
; RV64IA-TSO-ZACAS-NEXT: srlw a0, a1, a0
; RV64IA-TSO-ZACAS-NEXT: ret
;
+; RV32IA-WMO-ZABHA-LABEL: atomicrmw_and_i16_seq_cst:
+; RV32IA-WMO-ZABHA: # %bb.0:
+; RV32IA-WMO-ZABHA-NEXT: amoand.h.aqrl a0, a1, (a0)
+; RV32IA-WMO-ZABHA-NEXT: ret
+;
+; RV32IA-TSO-ZABHA-LABEL: atomicrmw_and_i16_seq_cst:
+; RV32IA-TSO-ZABHA: # %bb.0:
+; RV32IA-TSO-ZABHA-NEXT: amoand.h a0, a1, (a0)
+; RV32IA-TSO-ZABHA-NEXT: ret
+;
; RV64IA-WMO-ZABHA-LABEL: atomicrmw_and_i16_seq_cst:
; RV64IA-WMO-ZABHA: # %bb.0:
; RV64IA-WMO-ZABHA-NEXT: amoand.h.aqrl a0, a1, (a0)
@@ -13430,27 +17578,27 @@ define i16 @atomicrmw_nand_i16_monotonic(ptr %a, i16 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
-; RV32IA-LABEL: atomicrmw_nand_i16_monotonic:
-; RV32IA: # %bb.0:
-; RV32IA-NEXT: andi a2, a0, -4
-; RV32IA-NEXT: slli a0, a0, 3
-; RV32IA-NEXT: lui a3, 16
-; RV32IA-NEXT: addi a3, a3, -1
-; RV32IA-NEXT: sll a4, a3, a0
-; RV32IA-NEXT: and a1, a1, a3
-; RV32IA-NEXT: sll a1, a1, a0
-; RV32IA-NEXT: .LBB95_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-NEXT: lr.w a3, (a2)
-; RV32IA-NEXT: and a5, a3, a1
-; RV32IA-NEXT: not a5, a5
-; RV32IA-NEXT: xor a5, a3, a5
-; RV32IA-NEXT: and a5, a5, a4
-; RV32IA-NEXT: xor a5, a3, a5
-; RV32IA-NEXT: sc.w a5, a5, (a2)
-; RV32IA-NEXT: bnez a5, .LBB95_1
-; RV32IA-NEXT: # %bb.2:
-; RV32IA-NEXT: srl a0, a3, a0
-; RV32IA-NEXT: ret
+; RV32IA-NOZACAS-LABEL: atomicrmw_nand_i16_monotonic:
+; RV32IA-NOZACAS: # %bb.0:
+; RV32IA-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-NOZACAS-NEXT: lui a3, 16
+; RV32IA-NOZACAS-NEXT: addi a3, a3, -1
+; RV32IA-NOZACAS-NEXT: sll a4, a3, a0
+; RV32IA-NOZACAS-NEXT: and a1, a1, a3
+; RV32IA-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-NOZACAS-NEXT: .LBB95_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-NOZACAS-NEXT: lr.w a3, (a2)
+; RV32IA-NOZACAS-NEXT: and a5, a3, a1
+; RV32IA-NOZACAS-NEXT: not a5, a5
+; RV32IA-NOZACAS-NEXT: xor a5, a3, a5
+; RV32IA-NOZACAS-NEXT: and a5, a5, a4
+; RV32IA-NOZACAS-NEXT: xor a5, a3, a5
+; RV32IA-NOZACAS-NEXT: sc.w a5, a5, (a2)
+; RV32IA-NOZACAS-NEXT: bnez a5, .LBB95_1
+; RV32IA-NOZACAS-NEXT: # %bb.2:
+; RV32IA-NOZACAS-NEXT: srl a0, a3, a0
+; RV32IA-NOZACAS-NEXT: ret
;
; RV64I-LABEL: atomicrmw_nand_i16_monotonic:
; RV64I: # %bb.0:
@@ -13484,6 +17632,28 @@ define i16 @atomicrmw_nand_i16_monotonic(ptr %a, i16 %b) nounwind {
; RV64IA-NOZACAS-NEXT: srlw a0, a3, a0
; RV64IA-NOZACAS-NEXT: ret
;
+; RV32IA-ZACAS-LABEL: atomicrmw_nand_i16_monotonic:
+; RV32IA-ZACAS: # %bb.0:
+; RV32IA-ZACAS-NEXT: andi a2, a0, -4
+; RV32IA-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-ZACAS-NEXT: lui a3, 16
+; RV32IA-ZACAS-NEXT: addi a3, a3, -1
+; RV32IA-ZACAS-NEXT: sll a4, a3, a0
+; RV32IA-ZACAS-NEXT: and a1, a1, a3
+; RV32IA-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-ZACAS-NEXT: .LBB95_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-ZACAS-NEXT: lr.w a3, (a2)
+; RV32IA-ZACAS-NEXT: and a5, a3, a1
+; RV32IA-ZACAS-NEXT: not a5, a5
+; RV32IA-ZACAS-NEXT: xor a5, a3, a5
+; RV32IA-ZACAS-NEXT: and a5, a5, a4
+; RV32IA-ZACAS-NEXT: xor a5, a3, a5
+; RV32IA-ZACAS-NEXT: sc.w a5, a5, (a2)
+; RV32IA-ZACAS-NEXT: bnez a5, .LBB95_1
+; RV32IA-ZACAS-NEXT: # %bb.2:
+; RV32IA-ZACAS-NEXT: srl a0, a3, a0
+; RV32IA-ZACAS-NEXT: ret
+;
; RV64IA-ZACAS-LABEL: atomicrmw_nand_i16_monotonic:
; RV64IA-ZACAS: # %bb.0:
; RV64IA-ZACAS-NEXT: andi a2, a0, -4
@@ -13506,6 +17676,50 @@ define i16 @atomicrmw_nand_i16_monotonic(ptr %a, i16 %b) nounwind {
; RV64IA-ZACAS-NEXT: srlw a0, a3, a0
; RV64IA-ZACAS-NEXT: ret
;
+; RV32IA-WMO-ZABHA-NOZACAS-LABEL: atomicrmw_nand_i16_monotonic:
+; RV32IA-WMO-ZABHA-NOZACAS: # %bb.0:
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: lui a3, 16
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: addi a3, a3, -1
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: sll a4, a3, a0
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: and a1, a1, a3
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: .LBB95_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: lr.w a3, (a2)
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: and a5, a3, a1
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: not a5, a5
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: xor a5, a3, a5
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: and a5, a5, a4
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: xor a5, a3, a5
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: sc.w a5, a5, (a2)
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: bnez a5, .LBB95_1
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: # %bb.2:
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: srl a0, a3, a0
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: ret
+;
+; RV32IA-TSO-ZABHA-NOZACAS-LABEL: atomicrmw_nand_i16_monotonic:
+; RV32IA-TSO-ZABHA-NOZACAS: # %bb.0:
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: lui a3, 16
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: addi a3, a3, -1
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: sll a4, a3, a0
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: and a1, a1, a3
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: .LBB95_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: lr.w a3, (a2)
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: and a5, a3, a1
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: not a5, a5
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: xor a5, a3, a5
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: and a5, a5, a4
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: xor a5, a3, a5
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: sc.w a5, a5, (a2)
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: bnez a5, .LBB95_1
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: # %bb.2:
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: srl a0, a3, a0
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: ret
+;
; RV64IA-WMO-ZABHA-NOZACAS-LABEL: atomicrmw_nand_i16_monotonic:
; RV64IA-WMO-ZABHA-NOZACAS: # %bb.0:
; RV64IA-WMO-ZABHA-NOZACAS-NEXT: andi a2, a0, -4
@@ -13550,6 +17764,36 @@ define i16 @atomicrmw_nand_i16_monotonic(ptr %a, i16 %b) nounwind {
; RV64IA-TSO-ZABHA-NOZACAS-NEXT: srlw a0, a3, a0
; RV64IA-TSO-ZABHA-NOZACAS-NEXT: ret
;
+; RV32IA-WMO-ZABHA-ZACAS-LABEL: atomicrmw_nand_i16_monotonic:
+; RV32IA-WMO-ZABHA-ZACAS: # %bb.0:
+; RV32IA-WMO-ZABHA-ZACAS-NEXT: mv a2, a0
+; RV32IA-WMO-ZABHA-ZACAS-NEXT: lhu a0, 0(a0)
+; RV32IA-WMO-ZABHA-ZACAS-NEXT: .LBB95_1: # %atomicrmw.start
+; RV32IA-WMO-ZABHA-ZACAS-NEXT: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-ZABHA-ZACAS-NEXT: and a3, a0, a1
+; RV32IA-WMO-ZABHA-ZACAS-NEXT: not a3, a3
+; RV32IA-WMO-ZABHA-ZACAS-NEXT: slli a4, a0, 16
+; RV32IA-WMO-ZABHA-ZACAS-NEXT: amocas.h a0, a3, (a2)
+; RV32IA-WMO-ZABHA-ZACAS-NEXT: srai a4, a4, 16
+; RV32IA-WMO-ZABHA-ZACAS-NEXT: bne a0, a4, .LBB95_1
+; RV32IA-WMO-ZABHA-ZACAS-NEXT: # %bb.2: # %atomicrmw.end
+; RV32IA-WMO-ZABHA-ZACAS-NEXT: ret
+;
+; RV32IA-TSO-ZABHA-ZACAS-LABEL: atomicrmw_nand_i16_monotonic:
+; RV32IA-TSO-ZABHA-ZACAS: # %bb.0:
+; RV32IA-TSO-ZABHA-ZACAS-NEXT: mv a2, a0
+; RV32IA-TSO-ZABHA-ZACAS-NEXT: lhu a0, 0(a0)
+; RV32IA-TSO-ZABHA-ZACAS-NEXT: .LBB95_1: # %atomicrmw.start
+; RV32IA-TSO-ZABHA-ZACAS-NEXT: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-ZABHA-ZACAS-NEXT: and a3, a0, a1
+; RV32IA-TSO-ZABHA-ZACAS-NEXT: not a3, a3
+; RV32IA-TSO-ZABHA-ZACAS-NEXT: slli a4, a0, 16
+; RV32IA-TSO-ZABHA-ZACAS-NEXT: amocas.h a0, a3, (a2)
+; RV32IA-TSO-ZABHA-ZACAS-NEXT: srai a4, a4, 16
+; RV32IA-TSO-ZABHA-ZACAS-NEXT: bne a0, a4, .LBB95_1
+; RV32IA-TSO-ZABHA-ZACAS-NEXT: # %bb.2: # %atomicrmw.end
+; RV32IA-TSO-ZABHA-ZACAS-NEXT: ret
+;
; RV64IA-WMO-ZABHA-ZACAS-LABEL: atomicrmw_nand_i16_monotonic:
; RV64IA-WMO-ZABHA-ZACAS: # %bb.0:
; RV64IA-WMO-ZABHA-ZACAS-NEXT: mv a2, a0
@@ -13594,49 +17838,49 @@ define i16 @atomicrmw_nand_i16_acquire(ptr %a, i16 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
-; RV32IA-WMO-LABEL: atomicrmw_nand_i16_acquire:
-; RV32IA-WMO: # %bb.0:
-; RV32IA-WMO-NEXT: andi a2, a0, -4
-; RV32IA-WMO-NEXT: slli a0, a0, 3
-; RV32IA-WMO-NEXT: lui a3, 16
-; RV32IA-WMO-NEXT: addi a3, a3, -1
-; RV32IA-WMO-NEXT: sll a4, a3, a0
-; RV32IA-WMO-NEXT: and a1, a1, a3
-; RV32IA-WMO-NEXT: sll a1, a1, a0
-; RV32IA-WMO-NEXT: .LBB96_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-WMO-NEXT: lr.w.aq a3, (a2)
-; RV32IA-WMO-NEXT: and a5, a3, a1
-; RV32IA-WMO-NEXT: not a5, a5
-; RV32IA-WMO-NEXT: xor a5, a3, a5
-; RV32IA-WMO-NEXT: and a5, a5, a4
-; RV32IA-WMO-NEXT: xor a5, a3, a5
-; RV32IA-WMO-NEXT: sc.w a5, a5, (a2)
-; RV32IA-WMO-NEXT: bnez a5, .LBB96_1
-; RV32IA-WMO-NEXT: # %bb.2:
-; RV32IA-WMO-NEXT: srl a0, a3, a0
-; RV32IA-WMO-NEXT: ret
+; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_nand_i16_acquire:
+; RV32IA-WMO-NOZACAS: # %bb.0:
+; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-WMO-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-NOZACAS-NEXT: lui a3, 16
+; RV32IA-WMO-NOZACAS-NEXT: addi a3, a3, -1
+; RV32IA-WMO-NOZACAS-NEXT: sll a4, a3, a0
+; RV32IA-WMO-NOZACAS-NEXT: and a1, a1, a3
+; RV32IA-WMO-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-WMO-NOZACAS-NEXT: .LBB96_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-NOZACAS-NEXT: lr.w.aq a3, (a2)
+; RV32IA-WMO-NOZACAS-NEXT: and a5, a3, a1
+; RV32IA-WMO-NOZACAS-NEXT: not a5, a5
+; RV32IA-WMO-NOZACAS-NEXT: xor a5, a3, a5
+; RV32IA-WMO-NOZACAS-NEXT: and a5, a5, a4
+; RV32IA-WMO-NOZACAS-NEXT: xor a5, a3, a5
+; RV32IA-WMO-NOZACAS-NEXT: sc.w a5, a5, (a2)
+; RV32IA-WMO-NOZACAS-NEXT: bnez a5, .LBB96_1
+; RV32IA-WMO-NOZACAS-NEXT: # %bb.2:
+; RV32IA-WMO-NOZACAS-NEXT: srl a0, a3, a0
+; RV32IA-WMO-NOZACAS-NEXT: ret
;
-; RV32IA-TSO-LABEL: atomicrmw_nand_i16_acquire:
-; RV32IA-TSO: # %bb.0:
-; RV32IA-TSO-NEXT: andi a2, a0, -4
-; RV32IA-TSO-NEXT: slli a0, a0, 3
-; RV32IA-TSO-NEXT: lui a3, 16
-; RV32IA-TSO-NEXT: addi a3, a3, -1
-; RV32IA-TSO-NEXT: sll a4, a3, a0
-; RV32IA-TSO-NEXT: and a1, a1, a3
-; RV32IA-TSO-NEXT: sll a1, a1, a0
-; RV32IA-TSO-NEXT: .LBB96_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-TSO-NEXT: lr.w a3, (a2)
-; RV32IA-TSO-NEXT: and a5, a3, a1
-; RV32IA-TSO-NEXT: not a5, a5
-; RV32IA-TSO-NEXT: xor a5, a3, a5
-; RV32IA-TSO-NEXT: and a5, a5, a4
-; RV32IA-TSO-NEXT: xor a5, a3, a5
-; RV32IA-TSO-NEXT: sc.w a5, a5, (a2)
-; RV32IA-TSO-NEXT: bnez a5, .LBB96_1
-; RV32IA-TSO-NEXT: # %bb.2:
-; RV32IA-TSO-NEXT: srl a0, a3, a0
-; RV32IA-TSO-NEXT: ret
+; RV32IA-TSO-NOZACAS-LABEL: atomicrmw_nand_i16_acquire:
+; RV32IA-TSO-NOZACAS: # %bb.0:
+; RV32IA-TSO-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-TSO-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-NOZACAS-NEXT: lui a3, 16
+; RV32IA-TSO-NOZACAS-NEXT: addi a3, a3, -1
+; RV32IA-TSO-NOZACAS-NEXT: sll a4, a3, a0
+; RV32IA-TSO-NOZACAS-NEXT: and a1, a1, a3
+; RV32IA-TSO-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-TSO-NOZACAS-NEXT: .LBB96_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-NOZACAS-NEXT: lr.w a3, (a2)
+; RV32IA-TSO-NOZACAS-NEXT: and a5, a3, a1
+; RV32IA-TSO-NOZACAS-NEXT: not a5, a5
+; RV32IA-TSO-NOZACAS-NEXT: xor a5, a3, a5
+; RV32IA-TSO-NOZACAS-NEXT: and a5, a5, a4
+; RV32IA-TSO-NOZACAS-NEXT: xor a5, a3, a5
+; RV32IA-TSO-NOZACAS-NEXT: sc.w a5, a5, (a2)
+; RV32IA-TSO-NOZACAS-NEXT: bnez a5, .LBB96_1
+; RV32IA-TSO-NOZACAS-NEXT: # %bb.2:
+; RV32IA-TSO-NOZACAS-NEXT: srl a0, a3, a0
+; RV32IA-TSO-NOZACAS-NEXT: ret
;
; RV64I-LABEL: atomicrmw_nand_i16_acquire:
; RV64I: # %bb.0:
@@ -13692,6 +17936,50 @@ define i16 @atomicrmw_nand_i16_acquire(ptr %a, i16 %b) nounwind {
; RV64IA-TSO-NOZACAS-NEXT: srlw a0, a3, a0
; RV64IA-TSO-NOZACAS-NEXT: ret
;
+; RV32IA-WMO-ZACAS-LABEL: atomicrmw_nand_i16_acquire:
+; RV32IA-WMO-ZACAS: # %bb.0:
+; RV32IA-WMO-ZACAS-NEXT: andi a2, a0, -4
+; RV32IA-WMO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-ZACAS-NEXT: lui a3, 16
+; RV32IA-WMO-ZACAS-NEXT: addi a3, a3, -1
+; RV32IA-WMO-ZACAS-NEXT: sll a4, a3, a0
+; RV32IA-WMO-ZACAS-NEXT: and a1, a1, a3
+; RV32IA-WMO-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-WMO-ZACAS-NEXT: .LBB96_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-ZACAS-NEXT: lr.w.aq a3, (a2)
+; RV32IA-WMO-ZACAS-NEXT: and a5, a3, a1
+; RV32IA-WMO-ZACAS-NEXT: not a5, a5
+; RV32IA-WMO-ZACAS-NEXT: xor a5, a3, a5
+; RV32IA-WMO-ZACAS-NEXT: and a5, a5, a4
+; RV32IA-WMO-ZACAS-NEXT: xor a5, a3, a5
+; RV32IA-WMO-ZACAS-NEXT: sc.w a5, a5, (a2)
+; RV32IA-WMO-ZACAS-NEXT: bnez a5, .LBB96_1
+; RV32IA-WMO-ZACAS-NEXT: # %bb.2:
+; RV32IA-WMO-ZACAS-NEXT: srl a0, a3, a0
+; RV32IA-WMO-ZACAS-NEXT: ret
+;
+; RV32IA-TSO-ZACAS-LABEL: atomicrmw_nand_i16_acquire:
+; RV32IA-TSO-ZACAS: # %bb.0:
+; RV32IA-TSO-ZACAS-NEXT: andi a2, a0, -4
+; RV32IA-TSO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-ZACAS-NEXT: lui a3, 16
+; RV32IA-TSO-ZACAS-NEXT: addi a3, a3, -1
+; RV32IA-TSO-ZACAS-NEXT: sll a4, a3, a0
+; RV32IA-TSO-ZACAS-NEXT: and a1, a1, a3
+; RV32IA-TSO-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-TSO-ZACAS-NEXT: .LBB96_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-ZACAS-NEXT: lr.w a3, (a2)
+; RV32IA-TSO-ZACAS-NEXT: and a5, a3, a1
+; RV32IA-TSO-ZACAS-NEXT: not a5, a5
+; RV32IA-TSO-ZACAS-NEXT: xor a5, a3, a5
+; RV32IA-TSO-ZACAS-NEXT: and a5, a5, a4
+; RV32IA-TSO-ZACAS-NEXT: xor a5, a3, a5
+; RV32IA-TSO-ZACAS-NEXT: sc.w a5, a5, (a2)
+; RV32IA-TSO-ZACAS-NEXT: bnez a5, .LBB96_1
+; RV32IA-TSO-ZACAS-NEXT: # %bb.2:
+; RV32IA-TSO-ZACAS-NEXT: srl a0, a3, a0
+; RV32IA-TSO-ZACAS-NEXT: ret
+;
; RV64IA-WMO-ZACAS-LABEL: atomicrmw_nand_i16_acquire:
; RV64IA-WMO-ZACAS: # %bb.0:
; RV64IA-WMO-ZACAS-NEXT: andi a2, a0, -4
@@ -13736,6 +18024,50 @@ define i16 @atomicrmw_nand_i16_acquire(ptr %a, i16 %b) nounwind {
; RV64IA-TSO-ZACAS-NEXT: srlw a0, a3, a0
; RV64IA-TSO-ZACAS-NEXT: ret
;
+; RV32IA-WMO-ZABHA-NOZACAS-LABEL: atomicrmw_nand_i16_acquire:
+; RV32IA-WMO-ZABHA-NOZACAS: # %bb.0:
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: lui a3, 16
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: addi a3, a3, -1
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: sll a4, a3, a0
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: and a1, a1, a3
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: .LBB96_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: lr.w.aq a3, (a2)
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: and a5, a3, a1
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: not a5, a5
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: xor a5, a3, a5
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: and a5, a5, a4
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: xor a5, a3, a5
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: sc.w a5, a5, (a2)
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: bnez a5, .LBB96_1
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: # %bb.2:
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: srl a0, a3, a0
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: ret
+;
+; RV32IA-TSO-ZABHA-NOZACAS-LABEL: atomicrmw_nand_i16_acquire:
+; RV32IA-TSO-ZABHA-NOZACAS: # %bb.0:
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: lui a3, 16
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: addi a3, a3, -1
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: sll a4, a3, a0
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: and a1, a1, a3
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: .LBB96_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: lr.w a3, (a2)
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: and a5, a3, a1
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: not a5, a5
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: xor a5, a3, a5
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: and a5, a5, a4
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: xor a5, a3, a5
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: sc.w a5, a5, (a2)
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: bnez a5, .LBB96_1
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: # %bb.2:
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: srl a0, a3, a0
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: ret
+;
; RV64IA-WMO-ZABHA-NOZACAS-LABEL: atomicrmw_nand_i16_acquire:
; RV64IA-WMO-ZABHA-NOZACAS: # %bb.0:
; RV64IA-WMO-ZABHA-NOZACAS-NEXT: andi a2, a0, -4
@@ -13780,6 +18112,36 @@ define i16 @atomicrmw_nand_i16_acquire(ptr %a, i16 %b) nounwind {
; RV64IA-TSO-ZABHA-NOZACAS-NEXT: srlw a0, a3, a0
; RV64IA-TSO-ZABHA-NOZACAS-NEXT: ret
;
+; RV32IA-WMO-ZABHA-ZACAS-LABEL: atomicrmw_nand_i16_acquire:
+; RV32IA-WMO-ZABHA-ZACAS: # %bb.0:
+; RV32IA-WMO-ZABHA-ZACAS-NEXT: mv a2, a0
+; RV32IA-WMO-ZABHA-ZACAS-NEXT: lhu a0, 0(a0)
+; RV32IA-WMO-ZABHA-ZACAS-NEXT: .LBB96_1: # %atomicrmw.start
+; RV32IA-WMO-ZABHA-ZACAS-NEXT: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-ZABHA-ZACAS-NEXT: and a3, a0, a1
+; RV32IA-WMO-ZABHA-ZACAS-NEXT: not a3, a3
+; RV32IA-WMO-ZABHA-ZACAS-NEXT: slli a4, a0, 16
+; RV32IA-WMO-ZABHA-ZACAS-NEXT: amocas.h.aq a0, a3, (a2)
+; RV32IA-WMO-ZABHA-ZACAS-NEXT: srai a4, a4, 16
+; RV32IA-WMO-ZABHA-ZACAS-NEXT: bne a0, a4, .LBB96_1
+; RV32IA-WMO-ZABHA-ZACAS-NEXT: # %bb.2: # %atomicrmw.end
+; RV32IA-WMO-ZABHA-ZACAS-NEXT: ret
+;
+; RV32IA-TSO-ZABHA-ZACAS-LABEL: atomicrmw_nand_i16_acquire:
+; RV32IA-TSO-ZABHA-ZACAS: # %bb.0:
+; RV32IA-TSO-ZABHA-ZACAS-NEXT: mv a2, a0
+; RV32IA-TSO-ZABHA-ZACAS-NEXT: lhu a0, 0(a0)
+; RV32IA-TSO-ZABHA-ZACAS-NEXT: .LBB96_1: # %atomicrmw.start
+; RV32IA-TSO-ZABHA-ZACAS-NEXT: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-ZABHA-ZACAS-NEXT: and a3, a0, a1
+; RV32IA-TSO-ZABHA-ZACAS-NEXT: not a3, a3
+; RV32IA-TSO-ZABHA-ZACAS-NEXT: slli a4, a0, 16
+; RV32IA-TSO-ZABHA-ZACAS-NEXT: amocas.h a0, a3, (a2)
+; RV32IA-TSO-ZABHA-ZACAS-NEXT: srai a4, a4, 16
+; RV32IA-TSO-ZABHA-ZACAS-NEXT: bne a0, a4, .LBB96_1
+; RV32IA-TSO-ZABHA-ZACAS-NEXT: # %bb.2: # %atomicrmw.end
+; RV32IA-TSO-ZABHA-ZACAS-NEXT: ret
+;
; RV64IA-WMO-ZABHA-ZACAS-LABEL: atomicrmw_nand_i16_acquire:
; RV64IA-WMO-ZABHA-ZACAS: # %bb.0:
; RV64IA-WMO-ZABHA-ZACAS-NEXT: mv a2, a0
@@ -13824,49 +18186,49 @@ define i16 @atomicrmw_nand_i16_release(ptr %a, i16 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
-; RV32IA-WMO-LABEL: atomicrmw_nand_i16_release:
-; RV32IA-WMO: # %bb.0:
-; RV32IA-WMO-NEXT: andi a2, a0, -4
-; RV32IA-WMO-NEXT: slli a0, a0, 3
-; RV32IA-WMO-NEXT: lui a3, 16
-; RV32IA-WMO-NEXT: addi a3, a3, -1
-; RV32IA-WMO-NEXT: sll a4, a3, a0
-; RV32IA-WMO-NEXT: and a1, a1, a3
-; RV32IA-WMO-NEXT: sll a1, a1, a0
-; RV32IA-WMO-NEXT: .LBB97_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-WMO-NEXT: lr.w a3, (a2)
-; RV32IA-WMO-NEXT: and a5, a3, a1
-; RV32IA-WMO-NEXT: not a5, a5
-; RV32IA-WMO-NEXT: xor a5, a3, a5
-; RV32IA-WMO-NEXT: and a5, a5, a4
-; RV32IA-WMO-NEXT: xor a5, a3, a5
-; RV32IA-WMO-NEXT: sc.w.rl a5, a5, (a2)
-; RV32IA-WMO-NEXT: bnez a5, .LBB97_1
-; RV32IA-WMO-NEXT: # %bb.2:
-; RV32IA-WMO-NEXT: srl a0, a3, a0
-; RV32IA-WMO-NEXT: ret
+; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_nand_i16_release:
+; RV32IA-WMO-NOZACAS: # %bb.0:
+; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-WMO-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-NOZACAS-NEXT: lui a3, 16
+; RV32IA-WMO-NOZACAS-NEXT: addi a3, a3, -1
+; RV32IA-WMO-NOZACAS-NEXT: sll a4, a3, a0
+; RV32IA-WMO-NOZACAS-NEXT: and a1, a1, a3
+; RV32IA-WMO-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-WMO-NOZACAS-NEXT: .LBB97_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-NOZACAS-NEXT: lr.w a3, (a2)
+; RV32IA-WMO-NOZACAS-NEXT: and a5, a3, a1
+; RV32IA-WMO-NOZACAS-NEXT: not a5, a5
+; RV32IA-WMO-NOZACAS-NEXT: xor a5, a3, a5
+; RV32IA-WMO-NOZACAS-NEXT: and a5, a5, a4
+; RV32IA-WMO-NOZACAS-NEXT: xor a5, a3, a5
+; RV32IA-WMO-NOZACAS-NEXT: sc.w.rl a5, a5, (a2)
+; RV32IA-WMO-NOZACAS-NEXT: bnez a5, .LBB97_1
+; RV32IA-WMO-NOZACAS-NEXT: # %bb.2:
+; RV32IA-WMO-NOZACAS-NEXT: srl a0, a3, a0
+; RV32IA-WMO-NOZACAS-NEXT: ret
;
-; RV32IA-TSO-LABEL: atomicrmw_nand_i16_release:
-; RV32IA-TSO: # %bb.0:
-; RV32IA-TSO-NEXT: andi a2, a0, -4
-; RV32IA-TSO-NEXT: slli a0, a0, 3
-; RV32IA-TSO-NEXT: lui a3, 16
-; RV32IA-TSO-NEXT: addi a3, a3, -1
-; RV32IA-TSO-NEXT: sll a4, a3, a0
-; RV32IA-TSO-NEXT: and a1, a1, a3
-; RV32IA-TSO-NEXT: sll a1, a1, a0
-; RV32IA-TSO-NEXT: .LBB97_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-TSO-NEXT: lr.w a3, (a2)
-; RV32IA-TSO-NEXT: and a5, a3, a1
-; RV32IA-TSO-NEXT: not a5, a5
-; RV32IA-TSO-NEXT: xor a5, a3, a5
-; RV32IA-TSO-NEXT: and a5, a5, a4
-; RV32IA-TSO-NEXT: xor a5, a3, a5
-; RV32IA-TSO-NEXT: sc.w a5, a5, (a2)
-; RV32IA-TSO-NEXT: bnez a5, .LBB97_1
-; RV32IA-TSO-NEXT: # %bb.2:
-; RV32IA-TSO-NEXT: srl a0, a3, a0
-; RV32IA-TSO-NEXT: ret
+; RV32IA-TSO-NOZACAS-LABEL: atomicrmw_nand_i16_release:
+; RV32IA-TSO-NOZACAS: # %bb.0:
+; RV32IA-TSO-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-TSO-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-NOZACAS-NEXT: lui a3, 16
+; RV32IA-TSO-NOZACAS-NEXT: addi a3, a3, -1
+; RV32IA-TSO-NOZACAS-NEXT: sll a4, a3, a0
+; RV32IA-TSO-NOZACAS-NEXT: and a1, a1, a3
+; RV32IA-TSO-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-TSO-NOZACAS-NEXT: .LBB97_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-NOZACAS-NEXT: lr.w a3, (a2)
+; RV32IA-TSO-NOZACAS-NEXT: and a5, a3, a1
+; RV32IA-TSO-NOZACAS-NEXT: not a5, a5
+; RV32IA-TSO-NOZACAS-NEXT: xor a5, a3, a5
+; RV32IA-TSO-NOZACAS-NEXT: and a5, a5, a4
+; RV32IA-TSO-NOZACAS-NEXT: xor a5, a3, a5
+; RV32IA-TSO-NOZACAS-NEXT: sc.w a5, a5, (a2)
+; RV32IA-TSO-NOZACAS-NEXT: bnez a5, .LBB97_1
+; RV32IA-TSO-NOZACAS-NEXT: # %bb.2:
+; RV32IA-TSO-NOZACAS-NEXT: srl a0, a3, a0
+; RV32IA-TSO-NOZACAS-NEXT: ret
;
; RV64I-LABEL: atomicrmw_nand_i16_release:
; RV64I: # %bb.0:
@@ -13922,6 +18284,50 @@ define i16 @atomicrmw_nand_i16_release(ptr %a, i16 %b) nounwind {
; RV64IA-TSO-NOZACAS-NEXT: srlw a0, a3, a0
; RV64IA-TSO-NOZACAS-NEXT: ret
;
+; RV32IA-WMO-ZACAS-LABEL: atomicrmw_nand_i16_release:
+; RV32IA-WMO-ZACAS: # %bb.0:
+; RV32IA-WMO-ZACAS-NEXT: andi a2, a0, -4
+; RV32IA-WMO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-ZACAS-NEXT: lui a3, 16
+; RV32IA-WMO-ZACAS-NEXT: addi a3, a3, -1
+; RV32IA-WMO-ZACAS-NEXT: sll a4, a3, a0
+; RV32IA-WMO-ZACAS-NEXT: and a1, a1, a3
+; RV32IA-WMO-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-WMO-ZACAS-NEXT: .LBB97_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-ZACAS-NEXT: lr.w a3, (a2)
+; RV32IA-WMO-ZACAS-NEXT: and a5, a3, a1
+; RV32IA-WMO-ZACAS-NEXT: not a5, a5
+; RV32IA-WMO-ZACAS-NEXT: xor a5, a3, a5
+; RV32IA-WMO-ZACAS-NEXT: and a5, a5, a4
+; RV32IA-WMO-ZACAS-NEXT: xor a5, a3, a5
+; RV32IA-WMO-ZACAS-NEXT: sc.w.rl a5, a5, (a2)
+; RV32IA-WMO-ZACAS-NEXT: bnez a5, .LBB97_1
+; RV32IA-WMO-ZACAS-NEXT: # %bb.2:
+; RV32IA-WMO-ZACAS-NEXT: srl a0, a3, a0
+; RV32IA-WMO-ZACAS-NEXT: ret
+;
+; RV32IA-TSO-ZACAS-LABEL: atomicrmw_nand_i16_release:
+; RV32IA-TSO-ZACAS: # %bb.0:
+; RV32IA-TSO-ZACAS-NEXT: andi a2, a0, -4
+; RV32IA-TSO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-ZACAS-NEXT: lui a3, 16
+; RV32IA-TSO-ZACAS-NEXT: addi a3, a3, -1
+; RV32IA-TSO-ZACAS-NEXT: sll a4, a3, a0
+; RV32IA-TSO-ZACAS-NEXT: and a1, a1, a3
+; RV32IA-TSO-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-TSO-ZACAS-NEXT: .LBB97_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-ZACAS-NEXT: lr.w a3, (a2)
+; RV32IA-TSO-ZACAS-NEXT: and a5, a3, a1
+; RV32IA-TSO-ZACAS-NEXT: not a5, a5
+; RV32IA-TSO-ZACAS-NEXT: xor a5, a3, a5
+; RV32IA-TSO-ZACAS-NEXT: and a5, a5, a4
+; RV32IA-TSO-ZACAS-NEXT: xor a5, a3, a5
+; RV32IA-TSO-ZACAS-NEXT: sc.w a5, a5, (a2)
+; RV32IA-TSO-ZACAS-NEXT: bnez a5, .LBB97_1
+; RV32IA-TSO-ZACAS-NEXT: # %bb.2:
+; RV32IA-TSO-ZACAS-NEXT: srl a0, a3, a0
+; RV32IA-TSO-ZACAS-NEXT: ret
+;
; RV64IA-WMO-ZACAS-LABEL: atomicrmw_nand_i16_release:
; RV64IA-WMO-ZACAS: # %bb.0:
; RV64IA-WMO-ZACAS-NEXT: andi a2, a0, -4
@@ -13966,6 +18372,50 @@ define i16 @atomicrmw_nand_i16_release(ptr %a, i16 %b) nounwind {
; RV64IA-TSO-ZACAS-NEXT: srlw a0, a3, a0
; RV64IA-TSO-ZACAS-NEXT: ret
;
+; RV32IA-WMO-ZABHA-NOZACAS-LABEL: atomicrmw_nand_i16_release:
+; RV32IA-WMO-ZABHA-NOZACAS: # %bb.0:
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: lui a3, 16
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: addi a3, a3, -1
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: sll a4, a3, a0
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: and a1, a1, a3
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: .LBB97_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: lr.w a3, (a2)
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: and a5, a3, a1
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: not a5, a5
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: xor a5, a3, a5
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: and a5, a5, a4
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: xor a5, a3, a5
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: sc.w.rl a5, a5, (a2)
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: bnez a5, .LBB97_1
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: # %bb.2:
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: srl a0, a3, a0
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: ret
+;
+; RV32IA-TSO-ZABHA-NOZACAS-LABEL: atomicrmw_nand_i16_release:
+; RV32IA-TSO-ZABHA-NOZACAS: # %bb.0:
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: lui a3, 16
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: addi a3, a3, -1
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: sll a4, a3, a0
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: and a1, a1, a3
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: .LBB97_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: lr.w a3, (a2)
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: and a5, a3, a1
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: not a5, a5
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: xor a5, a3, a5
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: and a5, a5, a4
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: xor a5, a3, a5
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: sc.w a5, a5, (a2)
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: bnez a5, .LBB97_1
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: # %bb.2:
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: srl a0, a3, a0
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: ret
+;
; RV64IA-WMO-ZABHA-NOZACAS-LABEL: atomicrmw_nand_i16_release:
; RV64IA-WMO-ZABHA-NOZACAS: # %bb.0:
; RV64IA-WMO-ZABHA-NOZACAS-NEXT: andi a2, a0, -4
@@ -14010,6 +18460,36 @@ define i16 @atomicrmw_nand_i16_release(ptr %a, i16 %b) nounwind {
; RV64IA-TSO-ZABHA-NOZACAS-NEXT: srlw a0, a3, a0
; RV64IA-TSO-ZABHA-NOZACAS-NEXT: ret
;
+; RV32IA-WMO-ZABHA-ZACAS-LABEL: atomicrmw_nand_i16_release:
+; RV32IA-WMO-ZABHA-ZACAS: # %bb.0:
+; RV32IA-WMO-ZABHA-ZACAS-NEXT: mv a2, a0
+; RV32IA-WMO-ZABHA-ZACAS-NEXT: lhu a0, 0(a0)
+; RV32IA-WMO-ZABHA-ZACAS-NEXT: .LBB97_1: # %atomicrmw.start
+; RV32IA-WMO-ZABHA-ZACAS-NEXT: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-ZABHA-ZACAS-NEXT: and a3, a0, a1
+; RV32IA-WMO-ZABHA-ZACAS-NEXT: not a3, a3
+; RV32IA-WMO-ZABHA-ZACAS-NEXT: slli a4, a0, 16
+; RV32IA-WMO-ZABHA-ZACAS-NEXT: amocas.h.rl a0, a3, (a2)
+; RV32IA-WMO-ZABHA-ZACAS-NEXT: srai a4, a4, 16
+; RV32IA-WMO-ZABHA-ZACAS-NEXT: bne a0, a4, .LBB97_1
+; RV32IA-WMO-ZABHA-ZACAS-NEXT: # %bb.2: # %atomicrmw.end
+; RV32IA-WMO-ZABHA-ZACAS-NEXT: ret
+;
+; RV32IA-TSO-ZABHA-ZACAS-LABEL: atomicrmw_nand_i16_release:
+; RV32IA-TSO-ZABHA-ZACAS: # %bb.0:
+; RV32IA-TSO-ZABHA-ZACAS-NEXT: mv a2, a0
+; RV32IA-TSO-ZABHA-ZACAS-NEXT: lhu a0, 0(a0)
+; RV32IA-TSO-ZABHA-ZACAS-NEXT: .LBB97_1: # %atomicrmw.start
+; RV32IA-TSO-ZABHA-ZACAS-NEXT: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-ZABHA-ZACAS-NEXT: and a3, a0, a1
+; RV32IA-TSO-ZABHA-ZACAS-NEXT: not a3, a3
+; RV32IA-TSO-ZABHA-ZACAS-NEXT: slli a4, a0, 16
+; RV32IA-TSO-ZABHA-ZACAS-NEXT: amocas.h a0, a3, (a2)
+; RV32IA-TSO-ZABHA-ZACAS-NEXT: srai a4, a4, 16
+; RV32IA-TSO-ZABHA-ZACAS-NEXT: bne a0, a4, .LBB97_1
+; RV32IA-TSO-ZABHA-ZACAS-NEXT: # %bb.2: # %atomicrmw.end
+; RV32IA-TSO-ZABHA-ZACAS-NEXT: ret
+;
; RV64IA-WMO-ZABHA-ZACAS-LABEL: atomicrmw_nand_i16_release:
; RV64IA-WMO-ZABHA-ZACAS: # %bb.0:
; RV64IA-WMO-ZABHA-ZACAS-NEXT: mv a2, a0
@@ -14054,49 +18534,49 @@ define i16 @atomicrmw_nand_i16_acq_rel(ptr %a, i16 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
-; RV32IA-WMO-LABEL: atomicrmw_nand_i16_acq_rel:
-; RV32IA-WMO: # %bb.0:
-; RV32IA-WMO-NEXT: andi a2, a0, -4
-; RV32IA-WMO-NEXT: slli a0, a0, 3
-; RV32IA-WMO-NEXT: lui a3, 16
-; RV32IA-WMO-NEXT: addi a3, a3, -1
-; RV32IA-WMO-NEXT: sll a4, a3, a0
-; RV32IA-WMO-NEXT: and a1, a1, a3
-; RV32IA-WMO-NEXT: sll a1, a1, a0
-; RV32IA-WMO-NEXT: .LBB98_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-WMO-NEXT: lr.w.aq a3, (a2)
-; RV32IA-WMO-NEXT: and a5, a3, a1
-; RV32IA-WMO-NEXT: not a5, a5
-; RV32IA-WMO-NEXT: xor a5, a3, a5
-; RV32IA-WMO-NEXT: and a5, a5, a4
-; RV32IA-WMO-NEXT: xor a5, a3, a5
-; RV32IA-WMO-NEXT: sc.w.rl a5, a5, (a2)
-; RV32IA-WMO-NEXT: bnez a5, .LBB98_1
-; RV32IA-WMO-NEXT: # %bb.2:
-; RV32IA-WMO-NEXT: srl a0, a3, a0
-; RV32IA-WMO-NEXT: ret
+; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_nand_i16_acq_rel:
+; RV32IA-WMO-NOZACAS: # %bb.0:
+; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-WMO-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-NOZACAS-NEXT: lui a3, 16
+; RV32IA-WMO-NOZACAS-NEXT: addi a3, a3, -1
+; RV32IA-WMO-NOZACAS-NEXT: sll a4, a3, a0
+; RV32IA-WMO-NOZACAS-NEXT: and a1, a1, a3
+; RV32IA-WMO-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-WMO-NOZACAS-NEXT: .LBB98_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-NOZACAS-NEXT: lr.w.aq a3, (a2)
+; RV32IA-WMO-NOZACAS-NEXT: and a5, a3, a1
+; RV32IA-WMO-NOZACAS-NEXT: not a5, a5
+; RV32IA-WMO-NOZACAS-NEXT: xor a5, a3, a5
+; RV32IA-WMO-NOZACAS-NEXT: and a5, a5, a4
+; RV32IA-WMO-NOZACAS-NEXT: xor a5, a3, a5
+; RV32IA-WMO-NOZACAS-NEXT: sc.w.rl a5, a5, (a2)
+; RV32IA-WMO-NOZACAS-NEXT: bnez a5, .LBB98_1
+; RV32IA-WMO-NOZACAS-NEXT: # %bb.2:
+; RV32IA-WMO-NOZACAS-NEXT: srl a0, a3, a0
+; RV32IA-WMO-NOZACAS-NEXT: ret
;
-; RV32IA-TSO-LABEL: atomicrmw_nand_i16_acq_rel:
-; RV32IA-TSO: # %bb.0:
-; RV32IA-TSO-NEXT: andi a2, a0, -4
-; RV32IA-TSO-NEXT: slli a0, a0, 3
-; RV32IA-TSO-NEXT: lui a3, 16
-; RV32IA-TSO-NEXT: addi a3, a3, -1
-; RV32IA-TSO-NEXT: sll a4, a3, a0
-; RV32IA-TSO-NEXT: and a1, a1, a3
-; RV32IA-TSO-NEXT: sll a1, a1, a0
-; RV32IA-TSO-NEXT: .LBB98_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-TSO-NEXT: lr.w a3, (a2)
-; RV32IA-TSO-NEXT: and a5, a3, a1
-; RV32IA-TSO-NEXT: not a5, a5
-; RV32IA-TSO-NEXT: xor a5, a3, a5
-; RV32IA-TSO-NEXT: and a5, a5, a4
-; RV32IA-TSO-NEXT: xor a5, a3, a5
-; RV32IA-TSO-NEXT: sc.w a5, a5, (a2)
-; RV32IA-TSO-NEXT: bnez a5, .LBB98_1
-; RV32IA-TSO-NEXT: # %bb.2:
-; RV32IA-TSO-NEXT: srl a0, a3, a0
-; RV32IA-TSO-NEXT: ret
+; RV32IA-TSO-NOZACAS-LABEL: atomicrmw_nand_i16_acq_rel:
+; RV32IA-TSO-NOZACAS: # %bb.0:
+; RV32IA-TSO-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-TSO-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-NOZACAS-NEXT: lui a3, 16
+; RV32IA-TSO-NOZACAS-NEXT: addi a3, a3, -1
+; RV32IA-TSO-NOZACAS-NEXT: sll a4, a3, a0
+; RV32IA-TSO-NOZACAS-NEXT: and a1, a1, a3
+; RV32IA-TSO-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-TSO-NOZACAS-NEXT: .LBB98_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-NOZACAS-NEXT: lr.w a3, (a2)
+; RV32IA-TSO-NOZACAS-NEXT: and a5, a3, a1
+; RV32IA-TSO-NOZACAS-NEXT: not a5, a5
+; RV32IA-TSO-NOZACAS-NEXT: xor a5, a3, a5
+; RV32IA-TSO-NOZACAS-NEXT: and a5, a5, a4
+; RV32IA-TSO-NOZACAS-NEXT: xor a5, a3, a5
+; RV32IA-TSO-NOZACAS-NEXT: sc.w a5, a5, (a2)
+; RV32IA-TSO-NOZACAS-NEXT: bnez a5, .LBB98_1
+; RV32IA-TSO-NOZACAS-NEXT: # %bb.2:
+; RV32IA-TSO-NOZACAS-NEXT: srl a0, a3, a0
+; RV32IA-TSO-NOZACAS-NEXT: ret
;
; RV64I-LABEL: atomicrmw_nand_i16_acq_rel:
; RV64I: # %bb.0:
@@ -14152,6 +18632,50 @@ define i16 @atomicrmw_nand_i16_acq_rel(ptr %a, i16 %b) nounwind {
; RV64IA-TSO-NOZACAS-NEXT: srlw a0, a3, a0
; RV64IA-TSO-NOZACAS-NEXT: ret
;
+; RV32IA-WMO-ZACAS-LABEL: atomicrmw_nand_i16_acq_rel:
+; RV32IA-WMO-ZACAS: # %bb.0:
+; RV32IA-WMO-ZACAS-NEXT: andi a2, a0, -4
+; RV32IA-WMO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-ZACAS-NEXT: lui a3, 16
+; RV32IA-WMO-ZACAS-NEXT: addi a3, a3, -1
+; RV32IA-WMO-ZACAS-NEXT: sll a4, a3, a0
+; RV32IA-WMO-ZACAS-NEXT: and a1, a1, a3
+; RV32IA-WMO-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-WMO-ZACAS-NEXT: .LBB98_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-ZACAS-NEXT: lr.w.aq a3, (a2)
+; RV32IA-WMO-ZACAS-NEXT: and a5, a3, a1
+; RV32IA-WMO-ZACAS-NEXT: not a5, a5
+; RV32IA-WMO-ZACAS-NEXT: xor a5, a3, a5
+; RV32IA-WMO-ZACAS-NEXT: and a5, a5, a4
+; RV32IA-WMO-ZACAS-NEXT: xor a5, a3, a5
+; RV32IA-WMO-ZACAS-NEXT: sc.w.rl a5, a5, (a2)
+; RV32IA-WMO-ZACAS-NEXT: bnez a5, .LBB98_1
+; RV32IA-WMO-ZACAS-NEXT: # %bb.2:
+; RV32IA-WMO-ZACAS-NEXT: srl a0, a3, a0
+; RV32IA-WMO-ZACAS-NEXT: ret
+;
+; RV32IA-TSO-ZACAS-LABEL: atomicrmw_nand_i16_acq_rel:
+; RV32IA-TSO-ZACAS: # %bb.0:
+; RV32IA-TSO-ZACAS-NEXT: andi a2, a0, -4
+; RV32IA-TSO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-ZACAS-NEXT: lui a3, 16
+; RV32IA-TSO-ZACAS-NEXT: addi a3, a3, -1
+; RV32IA-TSO-ZACAS-NEXT: sll a4, a3, a0
+; RV32IA-TSO-ZACAS-NEXT: and a1, a1, a3
+; RV32IA-TSO-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-TSO-ZACAS-NEXT: .LBB98_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-ZACAS-NEXT: lr.w a3, (a2)
+; RV32IA-TSO-ZACAS-NEXT: and a5, a3, a1
+; RV32IA-TSO-ZACAS-NEXT: not a5, a5
+; RV32IA-TSO-ZACAS-NEXT: xor a5, a3, a5
+; RV32IA-TSO-ZACAS-NEXT: and a5, a5, a4
+; RV32IA-TSO-ZACAS-NEXT: xor a5, a3, a5
+; RV32IA-TSO-ZACAS-NEXT: sc.w a5, a5, (a2)
+; RV32IA-TSO-ZACAS-NEXT: bnez a5, .LBB98_1
+; RV32IA-TSO-ZACAS-NEXT: # %bb.2:
+; RV32IA-TSO-ZACAS-NEXT: srl a0, a3, a0
+; RV32IA-TSO-ZACAS-NEXT: ret
+;
; RV64IA-WMO-ZACAS-LABEL: atomicrmw_nand_i16_acq_rel:
; RV64IA-WMO-ZACAS: # %bb.0:
; RV64IA-WMO-ZACAS-NEXT: andi a2, a0, -4
@@ -14196,6 +18720,50 @@ define i16 @atomicrmw_nand_i16_acq_rel(ptr %a, i16 %b) nounwind {
; RV64IA-TSO-ZACAS-NEXT: srlw a0, a3, a0
; RV64IA-TSO-ZACAS-NEXT: ret
;
+; RV32IA-WMO-ZABHA-NOZACAS-LABEL: atomicrmw_nand_i16_acq_rel:
+; RV32IA-WMO-ZABHA-NOZACAS: # %bb.0:
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: lui a3, 16
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: addi a3, a3, -1
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: sll a4, a3, a0
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: and a1, a1, a3
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: .LBB98_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: lr.w.aq a3, (a2)
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: and a5, a3, a1
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: not a5, a5
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: xor a5, a3, a5
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: and a5, a5, a4
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: xor a5, a3, a5
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: sc.w.rl a5, a5, (a2)
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: bnez a5, .LBB98_1
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: # %bb.2:
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: srl a0, a3, a0
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: ret
+;
+; RV32IA-TSO-ZABHA-NOZACAS-LABEL: atomicrmw_nand_i16_acq_rel:
+; RV32IA-TSO-ZABHA-NOZACAS: # %bb.0:
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: lui a3, 16
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: addi a3, a3, -1
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: sll a4, a3, a0
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: and a1, a1, a3
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: .LBB98_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: lr.w a3, (a2)
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: and a5, a3, a1
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: not a5, a5
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: xor a5, a3, a5
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: and a5, a5, a4
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: xor a5, a3, a5
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: sc.w a5, a5, (a2)
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: bnez a5, .LBB98_1
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: # %bb.2:
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: srl a0, a3, a0
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: ret
+;
; RV64IA-WMO-ZABHA-NOZACAS-LABEL: atomicrmw_nand_i16_acq_rel:
; RV64IA-WMO-ZABHA-NOZACAS: # %bb.0:
; RV64IA-WMO-ZABHA-NOZACAS-NEXT: andi a2, a0, -4
@@ -14240,6 +18808,36 @@ define i16 @atomicrmw_nand_i16_acq_rel(ptr %a, i16 %b) nounwind {
; RV64IA-TSO-ZABHA-NOZACAS-NEXT: srlw a0, a3, a0
; RV64IA-TSO-ZABHA-NOZACAS-NEXT: ret
;
+; RV32IA-WMO-ZABHA-ZACAS-LABEL: atomicrmw_nand_i16_acq_rel:
+; RV32IA-WMO-ZABHA-ZACAS: # %bb.0:
+; RV32IA-WMO-ZABHA-ZACAS-NEXT: mv a2, a0
+; RV32IA-WMO-ZABHA-ZACAS-NEXT: lhu a0, 0(a0)
+; RV32IA-WMO-ZABHA-ZACAS-NEXT: .LBB98_1: # %atomicrmw.start
+; RV32IA-WMO-ZABHA-ZACAS-NEXT: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-ZABHA-ZACAS-NEXT: and a3, a0, a1
+; RV32IA-WMO-ZABHA-ZACAS-NEXT: not a3, a3
+; RV32IA-WMO-ZABHA-ZACAS-NEXT: slli a4, a0, 16
+; RV32IA-WMO-ZABHA-ZACAS-NEXT: amocas.h.aqrl a0, a3, (a2)
+; RV32IA-WMO-ZABHA-ZACAS-NEXT: srai a4, a4, 16
+; RV32IA-WMO-ZABHA-ZACAS-NEXT: bne a0, a4, .LBB98_1
+; RV32IA-WMO-ZABHA-ZACAS-NEXT: # %bb.2: # %atomicrmw.end
+; RV32IA-WMO-ZABHA-ZACAS-NEXT: ret
+;
+; RV32IA-TSO-ZABHA-ZACAS-LABEL: atomicrmw_nand_i16_acq_rel:
+; RV32IA-TSO-ZABHA-ZACAS: # %bb.0:
+; RV32IA-TSO-ZABHA-ZACAS-NEXT: mv a2, a0
+; RV32IA-TSO-ZABHA-ZACAS-NEXT: lhu a0, 0(a0)
+; RV32IA-TSO-ZABHA-ZACAS-NEXT: .LBB98_1: # %atomicrmw.start
+; RV32IA-TSO-ZABHA-ZACAS-NEXT: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-ZABHA-ZACAS-NEXT: and a3, a0, a1
+; RV32IA-TSO-ZABHA-ZACAS-NEXT: not a3, a3
+; RV32IA-TSO-ZABHA-ZACAS-NEXT: slli a4, a0, 16
+; RV32IA-TSO-ZABHA-ZACAS-NEXT: amocas.h a0, a3, (a2)
+; RV32IA-TSO-ZABHA-ZACAS-NEXT: srai a4, a4, 16
+; RV32IA-TSO-ZABHA-ZACAS-NEXT: bne a0, a4, .LBB98_1
+; RV32IA-TSO-ZABHA-ZACAS-NEXT: # %bb.2: # %atomicrmw.end
+; RV32IA-TSO-ZABHA-ZACAS-NEXT: ret
+;
; RV64IA-WMO-ZABHA-ZACAS-LABEL: atomicrmw_nand_i16_acq_rel:
; RV64IA-WMO-ZABHA-ZACAS: # %bb.0:
; RV64IA-WMO-ZABHA-ZACAS-NEXT: mv a2, a0
@@ -14284,27 +18882,27 @@ define i16 @atomicrmw_nand_i16_seq_cst(ptr %a, i16 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
-; RV32IA-LABEL: atomicrmw_nand_i16_seq_cst:
-; RV32IA: # %bb.0:
-; RV32IA-NEXT: andi a2, a0, -4
-; RV32IA-NEXT: slli a0, a0, 3
-; RV32IA-NEXT: lui a3, 16
-; RV32IA-NEXT: addi a3, a3, -1
-; RV32IA-NEXT: sll a4, a3, a0
-; RV32IA-NEXT: and a1, a1, a3
-; RV32IA-NEXT: sll a1, a1, a0
-; RV32IA-NEXT: .LBB99_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-NEXT: lr.w.aqrl a3, (a2)
-; RV32IA-NEXT: and a5, a3, a1
-; RV32IA-NEXT: not a5, a5
-; RV32IA-NEXT: xor a5, a3, a5
-; RV32IA-NEXT: and a5, a5, a4
-; RV32IA-NEXT: xor a5, a3, a5
-; RV32IA-NEXT: sc.w.rl a5, a5, (a2)
-; RV32IA-NEXT: bnez a5, .LBB99_1
-; RV32IA-NEXT: # %bb.2:
-; RV32IA-NEXT: srl a0, a3, a0
-; RV32IA-NEXT: ret
+; RV32IA-NOZACAS-LABEL: atomicrmw_nand_i16_seq_cst:
+; RV32IA-NOZACAS: # %bb.0:
+; RV32IA-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-NOZACAS-NEXT: lui a3, 16
+; RV32IA-NOZACAS-NEXT: addi a3, a3, -1
+; RV32IA-NOZACAS-NEXT: sll a4, a3, a0
+; RV32IA-NOZACAS-NEXT: and a1, a1, a3
+; RV32IA-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-NOZACAS-NEXT: .LBB99_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-NOZACAS-NEXT: lr.w.aqrl a3, (a2)
+; RV32IA-NOZACAS-NEXT: and a5, a3, a1
+; RV32IA-NOZACAS-NEXT: not a5, a5
+; RV32IA-NOZACAS-NEXT: xor a5, a3, a5
+; RV32IA-NOZACAS-NEXT: and a5, a5, a4
+; RV32IA-NOZACAS-NEXT: xor a5, a3, a5
+; RV32IA-NOZACAS-NEXT: sc.w.rl a5, a5, (a2)
+; RV32IA-NOZACAS-NEXT: bnez a5, .LBB99_1
+; RV32IA-NOZACAS-NEXT: # %bb.2:
+; RV32IA-NOZACAS-NEXT: srl a0, a3, a0
+; RV32IA-NOZACAS-NEXT: ret
;
; RV64I-LABEL: atomicrmw_nand_i16_seq_cst:
; RV64I: # %bb.0:
@@ -14338,6 +18936,28 @@ define i16 @atomicrmw_nand_i16_seq_cst(ptr %a, i16 %b) nounwind {
; RV64IA-NOZACAS-NEXT: srlw a0, a3, a0
; RV64IA-NOZACAS-NEXT: ret
;
+; RV32IA-ZACAS-LABEL: atomicrmw_nand_i16_seq_cst:
+; RV32IA-ZACAS: # %bb.0:
+; RV32IA-ZACAS-NEXT: andi a2, a0, -4
+; RV32IA-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-ZACAS-NEXT: lui a3, 16
+; RV32IA-ZACAS-NEXT: addi a3, a3, -1
+; RV32IA-ZACAS-NEXT: sll a4, a3, a0
+; RV32IA-ZACAS-NEXT: and a1, a1, a3
+; RV32IA-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-ZACAS-NEXT: .LBB99_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-ZACAS-NEXT: lr.w.aqrl a3, (a2)
+; RV32IA-ZACAS-NEXT: and a5, a3, a1
+; RV32IA-ZACAS-NEXT: not a5, a5
+; RV32IA-ZACAS-NEXT: xor a5, a3, a5
+; RV32IA-ZACAS-NEXT: and a5, a5, a4
+; RV32IA-ZACAS-NEXT: xor a5, a3, a5
+; RV32IA-ZACAS-NEXT: sc.w.rl a5, a5, (a2)
+; RV32IA-ZACAS-NEXT: bnez a5, .LBB99_1
+; RV32IA-ZACAS-NEXT: # %bb.2:
+; RV32IA-ZACAS-NEXT: srl a0, a3, a0
+; RV32IA-ZACAS-NEXT: ret
+;
; RV64IA-ZACAS-LABEL: atomicrmw_nand_i16_seq_cst:
; RV64IA-ZACAS: # %bb.0:
; RV64IA-ZACAS-NEXT: andi a2, a0, -4
@@ -14360,6 +18980,50 @@ define i16 @atomicrmw_nand_i16_seq_cst(ptr %a, i16 %b) nounwind {
; RV64IA-ZACAS-NEXT: srlw a0, a3, a0
; RV64IA-ZACAS-NEXT: ret
;
+; RV32IA-WMO-ZABHA-NOZACAS-LABEL: atomicrmw_nand_i16_seq_cst:
+; RV32IA-WMO-ZABHA-NOZACAS: # %bb.0:
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: lui a3, 16
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: addi a3, a3, -1
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: sll a4, a3, a0
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: and a1, a1, a3
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: .LBB99_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: lr.w.aqrl a3, (a2)
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: and a5, a3, a1
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: not a5, a5
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: xor a5, a3, a5
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: and a5, a5, a4
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: xor a5, a3, a5
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: sc.w.rl a5, a5, (a2)
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: bnez a5, .LBB99_1
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: # %bb.2:
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: srl a0, a3, a0
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: ret
+;
+; RV32IA-TSO-ZABHA-NOZACAS-LABEL: atomicrmw_nand_i16_seq_cst:
+; RV32IA-TSO-ZABHA-NOZACAS: # %bb.0:
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: lui a3, 16
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: addi a3, a3, -1
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: sll a4, a3, a0
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: and a1, a1, a3
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: .LBB99_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: lr.w.aqrl a3, (a2)
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: and a5, a3, a1
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: not a5, a5
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: xor a5, a3, a5
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: and a5, a5, a4
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: xor a5, a3, a5
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: sc.w.rl a5, a5, (a2)
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: bnez a5, .LBB99_1
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: # %bb.2:
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: srl a0, a3, a0
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: ret
+;
; RV64IA-WMO-ZABHA-NOZACAS-LABEL: atomicrmw_nand_i16_seq_cst:
; RV64IA-WMO-ZABHA-NOZACAS: # %bb.0:
; RV64IA-WMO-ZABHA-NOZACAS-NEXT: andi a2, a0, -4
@@ -14404,6 +19068,38 @@ define i16 @atomicrmw_nand_i16_seq_cst(ptr %a, i16 %b) nounwind {
; RV64IA-TSO-ZABHA-NOZACAS-NEXT: srlw a0, a3, a0
; RV64IA-TSO-ZABHA-NOZACAS-NEXT: ret
;
+; RV32IA-WMO-ZABHA-ZACAS-LABEL: atomicrmw_nand_i16_seq_cst:
+; RV32IA-WMO-ZABHA-ZACAS: # %bb.0:
+; RV32IA-WMO-ZABHA-ZACAS-NEXT: mv a2, a0
+; RV32IA-WMO-ZABHA-ZACAS-NEXT: lhu a0, 0(a0)
+; RV32IA-WMO-ZABHA-ZACAS-NEXT: .LBB99_1: # %atomicrmw.start
+; RV32IA-WMO-ZABHA-ZACAS-NEXT: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-ZABHA-ZACAS-NEXT: and a3, a0, a1
+; RV32IA-WMO-ZABHA-ZACAS-NEXT: fence rw, rw
+; RV32IA-WMO-ZABHA-ZACAS-NEXT: not a3, a3
+; RV32IA-WMO-ZABHA-ZACAS-NEXT: slli a4, a0, 16
+; RV32IA-WMO-ZABHA-ZACAS-NEXT: amocas.h.aqrl a0, a3, (a2)
+; RV32IA-WMO-ZABHA-ZACAS-NEXT: srai a4, a4, 16
+; RV32IA-WMO-ZABHA-ZACAS-NEXT: bne a0, a4, .LBB99_1
+; RV32IA-WMO-ZABHA-ZACAS-NEXT: # %bb.2: # %atomicrmw.end
+; RV32IA-WMO-ZABHA-ZACAS-NEXT: ret
+;
+; RV32IA-TSO-ZABHA-ZACAS-LABEL: atomicrmw_nand_i16_seq_cst:
+; RV32IA-TSO-ZABHA-ZACAS: # %bb.0:
+; RV32IA-TSO-ZABHA-ZACAS-NEXT: mv a2, a0
+; RV32IA-TSO-ZABHA-ZACAS-NEXT: lhu a0, 0(a0)
+; RV32IA-TSO-ZABHA-ZACAS-NEXT: .LBB99_1: # %atomicrmw.start
+; RV32IA-TSO-ZABHA-ZACAS-NEXT: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-ZABHA-ZACAS-NEXT: and a3, a0, a1
+; RV32IA-TSO-ZABHA-ZACAS-NEXT: fence rw, rw
+; RV32IA-TSO-ZABHA-ZACAS-NEXT: not a3, a3
+; RV32IA-TSO-ZABHA-ZACAS-NEXT: slli a4, a0, 16
+; RV32IA-TSO-ZABHA-ZACAS-NEXT: amocas.h a0, a3, (a2)
+; RV32IA-TSO-ZABHA-ZACAS-NEXT: srai a4, a4, 16
+; RV32IA-TSO-ZABHA-ZACAS-NEXT: bne a0, a4, .LBB99_1
+; RV32IA-TSO-ZABHA-ZACAS-NEXT: # %bb.2: # %atomicrmw.end
+; RV32IA-TSO-ZABHA-ZACAS-NEXT: ret
+;
; RV64IA-WMO-ZABHA-ZACAS-LABEL: atomicrmw_nand_i16_seq_cst:
; RV64IA-WMO-ZABHA-ZACAS: # %bb.0:
; RV64IA-WMO-ZABHA-ZACAS-NEXT: mv a2, a0
@@ -14450,16 +19146,16 @@ define i16 @atomicrmw_or_i16_monotonic(ptr %a, i16 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
-; RV32IA-LABEL: atomicrmw_or_i16_monotonic:
-; RV32IA: # %bb.0:
-; RV32IA-NEXT: andi a2, a0, -4
-; RV32IA-NEXT: slli a0, a0, 3
-; RV32IA-NEXT: slli a1, a1, 16
-; RV32IA-NEXT: srli a1, a1, 16
-; RV32IA-NEXT: sll a1, a1, a0
-; RV32IA-NEXT: amoor.w a1, a1, (a2)
-; RV32IA-NEXT: srl a0, a1, a0
-; RV32IA-NEXT: ret
+; RV32IA-NOZACAS-LABEL: atomicrmw_or_i16_monotonic:
+; RV32IA-NOZACAS: # %bb.0:
+; RV32IA-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-NOZACAS-NEXT: slli a1, a1, 16
+; RV32IA-NOZACAS-NEXT: srli a1, a1, 16
+; RV32IA-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-NOZACAS-NEXT: amoor.w a1, a1, (a2)
+; RV32IA-NOZACAS-NEXT: srl a0, a1, a0
+; RV32IA-NOZACAS-NEXT: ret
;
; RV64I-LABEL: atomicrmw_or_i16_monotonic:
; RV64I: # %bb.0:
@@ -14482,6 +19178,17 @@ define i16 @atomicrmw_or_i16_monotonic(ptr %a, i16 %b) nounwind {
; RV64IA-NOZACAS-NEXT: srlw a0, a1, a0
; RV64IA-NOZACAS-NEXT: ret
;
+; RV32IA-ZACAS-LABEL: atomicrmw_or_i16_monotonic:
+; RV32IA-ZACAS: # %bb.0:
+; RV32IA-ZACAS-NEXT: andi a2, a0, -4
+; RV32IA-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-ZACAS-NEXT: slli a1, a1, 16
+; RV32IA-ZACAS-NEXT: srli a1, a1, 16
+; RV32IA-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-ZACAS-NEXT: amoor.w a1, a1, (a2)
+; RV32IA-ZACAS-NEXT: srl a0, a1, a0
+; RV32IA-ZACAS-NEXT: ret
+;
; RV64IA-ZACAS-LABEL: atomicrmw_or_i16_monotonic:
; RV64IA-ZACAS: # %bb.0:
; RV64IA-ZACAS-NEXT: andi a2, a0, -4
@@ -14493,6 +19200,16 @@ define i16 @atomicrmw_or_i16_monotonic(ptr %a, i16 %b) nounwind {
; RV64IA-ZACAS-NEXT: srlw a0, a1, a0
; RV64IA-ZACAS-NEXT: ret
;
+; RV32IA-WMO-ZABHA-LABEL: atomicrmw_or_i16_monotonic:
+; RV32IA-WMO-ZABHA: # %bb.0:
+; RV32IA-WMO-ZABHA-NEXT: amoor.h a0, a1, (a0)
+; RV32IA-WMO-ZABHA-NEXT: ret
+;
+; RV32IA-TSO-ZABHA-LABEL: atomicrmw_or_i16_monotonic:
+; RV32IA-TSO-ZABHA: # %bb.0:
+; RV32IA-TSO-ZABHA-NEXT: amoor.h a0, a1, (a0)
+; RV32IA-TSO-ZABHA-NEXT: ret
+;
; RV64IA-WMO-ZABHA-LABEL: atomicrmw_or_i16_monotonic:
; RV64IA-WMO-ZABHA: # %bb.0:
; RV64IA-WMO-ZABHA-NEXT: amoor.h a0, a1, (a0)
@@ -14517,27 +19234,27 @@ define i16 @atomicrmw_or_i16_acquire(ptr %a, i16 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
-; RV32IA-WMO-LABEL: atomicrmw_or_i16_acquire:
-; RV32IA-WMO: # %bb.0:
-; RV32IA-WMO-NEXT: andi a2, a0, -4
-; RV32IA-WMO-NEXT: slli a0, a0, 3
-; RV32IA-WMO-NEXT: slli a1, a1, 16
-; RV32IA-WMO-NEXT: srli a1, a1, 16
-; RV32IA-WMO-NEXT: sll a1, a1, a0
-; RV32IA-WMO-NEXT: amoor.w.aq a1, a1, (a2)
-; RV32IA-WMO-NEXT: srl a0, a1, a0
-; RV32IA-WMO-NEXT: ret
+; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_or_i16_acquire:
+; RV32IA-WMO-NOZACAS: # %bb.0:
+; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-WMO-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-NOZACAS-NEXT: slli a1, a1, 16
+; RV32IA-WMO-NOZACAS-NEXT: srli a1, a1, 16
+; RV32IA-WMO-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-WMO-NOZACAS-NEXT: amoor.w.aq a1, a1, (a2)
+; RV32IA-WMO-NOZACAS-NEXT: srl a0, a1, a0
+; RV32IA-WMO-NOZACAS-NEXT: ret
;
-; RV32IA-TSO-LABEL: atomicrmw_or_i16_acquire:
-; RV32IA-TSO: # %bb.0:
-; RV32IA-TSO-NEXT: andi a2, a0, -4
-; RV32IA-TSO-NEXT: slli a0, a0, 3
-; RV32IA-TSO-NEXT: slli a1, a1, 16
-; RV32IA-TSO-NEXT: srli a1, a1, 16
-; RV32IA-TSO-NEXT: sll a1, a1, a0
-; RV32IA-TSO-NEXT: amoor.w a1, a1, (a2)
-; RV32IA-TSO-NEXT: srl a0, a1, a0
-; RV32IA-TSO-NEXT: ret
+; RV32IA-TSO-NOZACAS-LABEL: atomicrmw_or_i16_acquire:
+; RV32IA-TSO-NOZACAS: # %bb.0:
+; RV32IA-TSO-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-TSO-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-NOZACAS-NEXT: slli a1, a1, 16
+; RV32IA-TSO-NOZACAS-NEXT: srli a1, a1, 16
+; RV32IA-TSO-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-TSO-NOZACAS-NEXT: amoor.w a1, a1, (a2)
+; RV32IA-TSO-NOZACAS-NEXT: srl a0, a1, a0
+; RV32IA-TSO-NOZACAS-NEXT: ret
;
; RV64I-LABEL: atomicrmw_or_i16_acquire:
; RV64I: # %bb.0:
@@ -14571,6 +19288,28 @@ define i16 @atomicrmw_or_i16_acquire(ptr %a, i16 %b) nounwind {
; RV64IA-TSO-NOZACAS-NEXT: srlw a0, a1, a0
; RV64IA-TSO-NOZACAS-NEXT: ret
;
+; RV32IA-WMO-ZACAS-LABEL: atomicrmw_or_i16_acquire:
+; RV32IA-WMO-ZACAS: # %bb.0:
+; RV32IA-WMO-ZACAS-NEXT: andi a2, a0, -4
+; RV32IA-WMO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-ZACAS-NEXT: slli a1, a1, 16
+; RV32IA-WMO-ZACAS-NEXT: srli a1, a1, 16
+; RV32IA-WMO-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-WMO-ZACAS-NEXT: amoor.w.aq a1, a1, (a2)
+; RV32IA-WMO-ZACAS-NEXT: srl a0, a1, a0
+; RV32IA-WMO-ZACAS-NEXT: ret
+;
+; RV32IA-TSO-ZACAS-LABEL: atomicrmw_or_i16_acquire:
+; RV32IA-TSO-ZACAS: # %bb.0:
+; RV32IA-TSO-ZACAS-NEXT: andi a2, a0, -4
+; RV32IA-TSO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-ZACAS-NEXT: slli a1, a1, 16
+; RV32IA-TSO-ZACAS-NEXT: srli a1, a1, 16
+; RV32IA-TSO-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-TSO-ZACAS-NEXT: amoor.w a1, a1, (a2)
+; RV32IA-TSO-ZACAS-NEXT: srl a0, a1, a0
+; RV32IA-TSO-ZACAS-NEXT: ret
+;
; RV64IA-WMO-ZACAS-LABEL: atomicrmw_or_i16_acquire:
; RV64IA-WMO-ZACAS: # %bb.0:
; RV64IA-WMO-ZACAS-NEXT: andi a2, a0, -4
@@ -14593,6 +19332,16 @@ define i16 @atomicrmw_or_i16_acquire(ptr %a, i16 %b) nounwind {
; RV64IA-TSO-ZACAS-NEXT: srlw a0, a1, a0
; RV64IA-TSO-ZACAS-NEXT: ret
;
+; RV32IA-WMO-ZABHA-LABEL: atomicrmw_or_i16_acquire:
+; RV32IA-WMO-ZABHA: # %bb.0:
+; RV32IA-WMO-ZABHA-NEXT: amoor.h.aq a0, a1, (a0)
+; RV32IA-WMO-ZABHA-NEXT: ret
+;
+; RV32IA-TSO-ZABHA-LABEL: atomicrmw_or_i16_acquire:
+; RV32IA-TSO-ZABHA: # %bb.0:
+; RV32IA-TSO-ZABHA-NEXT: amoor.h a0, a1, (a0)
+; RV32IA-TSO-ZABHA-NEXT: ret
+;
; RV64IA-WMO-ZABHA-LABEL: atomicrmw_or_i16_acquire:
; RV64IA-WMO-ZABHA: # %bb.0:
; RV64IA-WMO-ZABHA-NEXT: amoor.h.aq a0, a1, (a0)
@@ -14617,27 +19366,27 @@ define i16 @atomicrmw_or_i16_release(ptr %a, i16 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
-; RV32IA-WMO-LABEL: atomicrmw_or_i16_release:
-; RV32IA-WMO: # %bb.0:
-; RV32IA-WMO-NEXT: andi a2, a0, -4
-; RV32IA-WMO-NEXT: slli a0, a0, 3
-; RV32IA-WMO-NEXT: slli a1, a1, 16
-; RV32IA-WMO-NEXT: srli a1, a1, 16
-; RV32IA-WMO-NEXT: sll a1, a1, a0
-; RV32IA-WMO-NEXT: amoor.w.rl a1, a1, (a2)
-; RV32IA-WMO-NEXT: srl a0, a1, a0
-; RV32IA-WMO-NEXT: ret
+; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_or_i16_release:
+; RV32IA-WMO-NOZACAS: # %bb.0:
+; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-WMO-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-NOZACAS-NEXT: slli a1, a1, 16
+; RV32IA-WMO-NOZACAS-NEXT: srli a1, a1, 16
+; RV32IA-WMO-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-WMO-NOZACAS-NEXT: amoor.w.rl a1, a1, (a2)
+; RV32IA-WMO-NOZACAS-NEXT: srl a0, a1, a0
+; RV32IA-WMO-NOZACAS-NEXT: ret
;
-; RV32IA-TSO-LABEL: atomicrmw_or_i16_release:
-; RV32IA-TSO: # %bb.0:
-; RV32IA-TSO-NEXT: andi a2, a0, -4
-; RV32IA-TSO-NEXT: slli a0, a0, 3
-; RV32IA-TSO-NEXT: slli a1, a1, 16
-; RV32IA-TSO-NEXT: srli a1, a1, 16
-; RV32IA-TSO-NEXT: sll a1, a1, a0
-; RV32IA-TSO-NEXT: amoor.w a1, a1, (a2)
-; RV32IA-TSO-NEXT: srl a0, a1, a0
-; RV32IA-TSO-NEXT: ret
+; RV32IA-TSO-NOZACAS-LABEL: atomicrmw_or_i16_release:
+; RV32IA-TSO-NOZACAS: # %bb.0:
+; RV32IA-TSO-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-TSO-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-NOZACAS-NEXT: slli a1, a1, 16
+; RV32IA-TSO-NOZACAS-NEXT: srli a1, a1, 16
+; RV32IA-TSO-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-TSO-NOZACAS-NEXT: amoor.w a1, a1, (a2)
+; RV32IA-TSO-NOZACAS-NEXT: srl a0, a1, a0
+; RV32IA-TSO-NOZACAS-NEXT: ret
;
; RV64I-LABEL: atomicrmw_or_i16_release:
; RV64I: # %bb.0:
@@ -14671,6 +19420,28 @@ define i16 @atomicrmw_or_i16_release(ptr %a, i16 %b) nounwind {
; RV64IA-TSO-NOZACAS-NEXT: srlw a0, a1, a0
; RV64IA-TSO-NOZACAS-NEXT: ret
;
+; RV32IA-WMO-ZACAS-LABEL: atomicrmw_or_i16_release:
+; RV32IA-WMO-ZACAS: # %bb.0:
+; RV32IA-WMO-ZACAS-NEXT: andi a2, a0, -4
+; RV32IA-WMO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-ZACAS-NEXT: slli a1, a1, 16
+; RV32IA-WMO-ZACAS-NEXT: srli a1, a1, 16
+; RV32IA-WMO-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-WMO-ZACAS-NEXT: amoor.w.rl a1, a1, (a2)
+; RV32IA-WMO-ZACAS-NEXT: srl a0, a1, a0
+; RV32IA-WMO-ZACAS-NEXT: ret
+;
+; RV32IA-TSO-ZACAS-LABEL: atomicrmw_or_i16_release:
+; RV32IA-TSO-ZACAS: # %bb.0:
+; RV32IA-TSO-ZACAS-NEXT: andi a2, a0, -4
+; RV32IA-TSO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-ZACAS-NEXT: slli a1, a1, 16
+; RV32IA-TSO-ZACAS-NEXT: srli a1, a1, 16
+; RV32IA-TSO-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-TSO-ZACAS-NEXT: amoor.w a1, a1, (a2)
+; RV32IA-TSO-ZACAS-NEXT: srl a0, a1, a0
+; RV32IA-TSO-ZACAS-NEXT: ret
+;
; RV64IA-WMO-ZACAS-LABEL: atomicrmw_or_i16_release:
; RV64IA-WMO-ZACAS: # %bb.0:
; RV64IA-WMO-ZACAS-NEXT: andi a2, a0, -4
@@ -14693,6 +19464,16 @@ define i16 @atomicrmw_or_i16_release(ptr %a, i16 %b) nounwind {
; RV64IA-TSO-ZACAS-NEXT: srlw a0, a1, a0
; RV64IA-TSO-ZACAS-NEXT: ret
;
+; RV32IA-WMO-ZABHA-LABEL: atomicrmw_or_i16_release:
+; RV32IA-WMO-ZABHA: # %bb.0:
+; RV32IA-WMO-ZABHA-NEXT: amoor.h.rl a0, a1, (a0)
+; RV32IA-WMO-ZABHA-NEXT: ret
+;
+; RV32IA-TSO-ZABHA-LABEL: atomicrmw_or_i16_release:
+; RV32IA-TSO-ZABHA: # %bb.0:
+; RV32IA-TSO-ZABHA-NEXT: amoor.h a0, a1, (a0)
+; RV32IA-TSO-ZABHA-NEXT: ret
+;
; RV64IA-WMO-ZABHA-LABEL: atomicrmw_or_i16_release:
; RV64IA-WMO-ZABHA: # %bb.0:
; RV64IA-WMO-ZABHA-NEXT: amoor.h.rl a0, a1, (a0)
@@ -14717,27 +19498,27 @@ define i16 @atomicrmw_or_i16_acq_rel(ptr %a, i16 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
-; RV32IA-WMO-LABEL: atomicrmw_or_i16_acq_rel:
-; RV32IA-WMO: # %bb.0:
-; RV32IA-WMO-NEXT: andi a2, a0, -4
-; RV32IA-WMO-NEXT: slli a0, a0, 3
-; RV32IA-WMO-NEXT: slli a1, a1, 16
-; RV32IA-WMO-NEXT: srli a1, a1, 16
-; RV32IA-WMO-NEXT: sll a1, a1, a0
-; RV32IA-WMO-NEXT: amoor.w.aqrl a1, a1, (a2)
-; RV32IA-WMO-NEXT: srl a0, a1, a0
-; RV32IA-WMO-NEXT: ret
+; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_or_i16_acq_rel:
+; RV32IA-WMO-NOZACAS: # %bb.0:
+; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-WMO-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-NOZACAS-NEXT: slli a1, a1, 16
+; RV32IA-WMO-NOZACAS-NEXT: srli a1, a1, 16
+; RV32IA-WMO-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-WMO-NOZACAS-NEXT: amoor.w.aqrl a1, a1, (a2)
+; RV32IA-WMO-NOZACAS-NEXT: srl a0, a1, a0
+; RV32IA-WMO-NOZACAS-NEXT: ret
;
-; RV32IA-TSO-LABEL: atomicrmw_or_i16_acq_rel:
-; RV32IA-TSO: # %bb.0:
-; RV32IA-TSO-NEXT: andi a2, a0, -4
-; RV32IA-TSO-NEXT: slli a0, a0, 3
-; RV32IA-TSO-NEXT: slli a1, a1, 16
-; RV32IA-TSO-NEXT: srli a1, a1, 16
-; RV32IA-TSO-NEXT: sll a1, a1, a0
-; RV32IA-TSO-NEXT: amoor.w a1, a1, (a2)
-; RV32IA-TSO-NEXT: srl a0, a1, a0
-; RV32IA-TSO-NEXT: ret
+; RV32IA-TSO-NOZACAS-LABEL: atomicrmw_or_i16_acq_rel:
+; RV32IA-TSO-NOZACAS: # %bb.0:
+; RV32IA-TSO-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-TSO-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-NOZACAS-NEXT: slli a1, a1, 16
+; RV32IA-TSO-NOZACAS-NEXT: srli a1, a1, 16
+; RV32IA-TSO-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-TSO-NOZACAS-NEXT: amoor.w a1, a1, (a2)
+; RV32IA-TSO-NOZACAS-NEXT: srl a0, a1, a0
+; RV32IA-TSO-NOZACAS-NEXT: ret
;
; RV64I-LABEL: atomicrmw_or_i16_acq_rel:
; RV64I: # %bb.0:
@@ -14771,6 +19552,28 @@ define i16 @atomicrmw_or_i16_acq_rel(ptr %a, i16 %b) nounwind {
; RV64IA-TSO-NOZACAS-NEXT: srlw a0, a1, a0
; RV64IA-TSO-NOZACAS-NEXT: ret
;
+; RV32IA-WMO-ZACAS-LABEL: atomicrmw_or_i16_acq_rel:
+; RV32IA-WMO-ZACAS: # %bb.0:
+; RV32IA-WMO-ZACAS-NEXT: andi a2, a0, -4
+; RV32IA-WMO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-ZACAS-NEXT: slli a1, a1, 16
+; RV32IA-WMO-ZACAS-NEXT: srli a1, a1, 16
+; RV32IA-WMO-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-WMO-ZACAS-NEXT: amoor.w.aqrl a1, a1, (a2)
+; RV32IA-WMO-ZACAS-NEXT: srl a0, a1, a0
+; RV32IA-WMO-ZACAS-NEXT: ret
+;
+; RV32IA-TSO-ZACAS-LABEL: atomicrmw_or_i16_acq_rel:
+; RV32IA-TSO-ZACAS: # %bb.0:
+; RV32IA-TSO-ZACAS-NEXT: andi a2, a0, -4
+; RV32IA-TSO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-ZACAS-NEXT: slli a1, a1, 16
+; RV32IA-TSO-ZACAS-NEXT: srli a1, a1, 16
+; RV32IA-TSO-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-TSO-ZACAS-NEXT: amoor.w a1, a1, (a2)
+; RV32IA-TSO-ZACAS-NEXT: srl a0, a1, a0
+; RV32IA-TSO-ZACAS-NEXT: ret
+;
; RV64IA-WMO-ZACAS-LABEL: atomicrmw_or_i16_acq_rel:
; RV64IA-WMO-ZACAS: # %bb.0:
; RV64IA-WMO-ZACAS-NEXT: andi a2, a0, -4
@@ -14793,6 +19596,16 @@ define i16 @atomicrmw_or_i16_acq_rel(ptr %a, i16 %b) nounwind {
; RV64IA-TSO-ZACAS-NEXT: srlw a0, a1, a0
; RV64IA-TSO-ZACAS-NEXT: ret
;
+; RV32IA-WMO-ZABHA-LABEL: atomicrmw_or_i16_acq_rel:
+; RV32IA-WMO-ZABHA: # %bb.0:
+; RV32IA-WMO-ZABHA-NEXT: amoor.h.aqrl a0, a1, (a0)
+; RV32IA-WMO-ZABHA-NEXT: ret
+;
+; RV32IA-TSO-ZABHA-LABEL: atomicrmw_or_i16_acq_rel:
+; RV32IA-TSO-ZABHA: # %bb.0:
+; RV32IA-TSO-ZABHA-NEXT: amoor.h a0, a1, (a0)
+; RV32IA-TSO-ZABHA-NEXT: ret
+;
; RV64IA-WMO-ZABHA-LABEL: atomicrmw_or_i16_acq_rel:
; RV64IA-WMO-ZABHA: # %bb.0:
; RV64IA-WMO-ZABHA-NEXT: amoor.h.aqrl a0, a1, (a0)
@@ -14817,27 +19630,27 @@ define i16 @atomicrmw_or_i16_seq_cst(ptr %a, i16 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
-; RV32IA-WMO-LABEL: atomicrmw_or_i16_seq_cst:
-; RV32IA-WMO: # %bb.0:
-; RV32IA-WMO-NEXT: andi a2, a0, -4
-; RV32IA-WMO-NEXT: slli a0, a0, 3
-; RV32IA-WMO-NEXT: slli a1, a1, 16
-; RV32IA-WMO-NEXT: srli a1, a1, 16
-; RV32IA-WMO-NEXT: sll a1, a1, a0
-; RV32IA-WMO-NEXT: amoor.w.aqrl a1, a1, (a2)
-; RV32IA-WMO-NEXT: srl a0, a1, a0
-; RV32IA-WMO-NEXT: ret
+; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_or_i16_seq_cst:
+; RV32IA-WMO-NOZACAS: # %bb.0:
+; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-WMO-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-NOZACAS-NEXT: slli a1, a1, 16
+; RV32IA-WMO-NOZACAS-NEXT: srli a1, a1, 16
+; RV32IA-WMO-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-WMO-NOZACAS-NEXT: amoor.w.aqrl a1, a1, (a2)
+; RV32IA-WMO-NOZACAS-NEXT: srl a0, a1, a0
+; RV32IA-WMO-NOZACAS-NEXT: ret
;
-; RV32IA-TSO-LABEL: atomicrmw_or_i16_seq_cst:
-; RV32IA-TSO: # %bb.0:
-; RV32IA-TSO-NEXT: andi a2, a0, -4
-; RV32IA-TSO-NEXT: slli a0, a0, 3
-; RV32IA-TSO-NEXT: slli a1, a1, 16
-; RV32IA-TSO-NEXT: srli a1, a1, 16
-; RV32IA-TSO-NEXT: sll a1, a1, a0
-; RV32IA-TSO-NEXT: amoor.w a1, a1, (a2)
-; RV32IA-TSO-NEXT: srl a0, a1, a0
-; RV32IA-TSO-NEXT: ret
+; RV32IA-TSO-NOZACAS-LABEL: atomicrmw_or_i16_seq_cst:
+; RV32IA-TSO-NOZACAS: # %bb.0:
+; RV32IA-TSO-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-TSO-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-NOZACAS-NEXT: slli a1, a1, 16
+; RV32IA-TSO-NOZACAS-NEXT: srli a1, a1, 16
+; RV32IA-TSO-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-TSO-NOZACAS-NEXT: amoor.w a1, a1, (a2)
+; RV32IA-TSO-NOZACAS-NEXT: srl a0, a1, a0
+; RV32IA-TSO-NOZACAS-NEXT: ret
;
; RV64I-LABEL: atomicrmw_or_i16_seq_cst:
; RV64I: # %bb.0:
@@ -14871,6 +19684,28 @@ define i16 @atomicrmw_or_i16_seq_cst(ptr %a, i16 %b) nounwind {
; RV64IA-TSO-NOZACAS-NEXT: srlw a0, a1, a0
; RV64IA-TSO-NOZACAS-NEXT: ret
;
+; RV32IA-WMO-ZACAS-LABEL: atomicrmw_or_i16_seq_cst:
+; RV32IA-WMO-ZACAS: # %bb.0:
+; RV32IA-WMO-ZACAS-NEXT: andi a2, a0, -4
+; RV32IA-WMO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-ZACAS-NEXT: slli a1, a1, 16
+; RV32IA-WMO-ZACAS-NEXT: srli a1, a1, 16
+; RV32IA-WMO-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-WMO-ZACAS-NEXT: amoor.w.aqrl a1, a1, (a2)
+; RV32IA-WMO-ZACAS-NEXT: srl a0, a1, a0
+; RV32IA-WMO-ZACAS-NEXT: ret
+;
+; RV32IA-TSO-ZACAS-LABEL: atomicrmw_or_i16_seq_cst:
+; RV32IA-TSO-ZACAS: # %bb.0:
+; RV32IA-TSO-ZACAS-NEXT: andi a2, a0, -4
+; RV32IA-TSO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-ZACAS-NEXT: slli a1, a1, 16
+; RV32IA-TSO-ZACAS-NEXT: srli a1, a1, 16
+; RV32IA-TSO-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-TSO-ZACAS-NEXT: amoor.w a1, a1, (a2)
+; RV32IA-TSO-ZACAS-NEXT: srl a0, a1, a0
+; RV32IA-TSO-ZACAS-NEXT: ret
+;
; RV64IA-WMO-ZACAS-LABEL: atomicrmw_or_i16_seq_cst:
; RV64IA-WMO-ZACAS: # %bb.0:
; RV64IA-WMO-ZACAS-NEXT: andi a2, a0, -4
@@ -14893,6 +19728,16 @@ define i16 @atomicrmw_or_i16_seq_cst(ptr %a, i16 %b) nounwind {
; RV64IA-TSO-ZACAS-NEXT: srlw a0, a1, a0
; RV64IA-TSO-ZACAS-NEXT: ret
;
+; RV32IA-WMO-ZABHA-LABEL: atomicrmw_or_i16_seq_cst:
+; RV32IA-WMO-ZABHA: # %bb.0:
+; RV32IA-WMO-ZABHA-NEXT: amoor.h.aqrl a0, a1, (a0)
+; RV32IA-WMO-ZABHA-NEXT: ret
+;
+; RV32IA-TSO-ZABHA-LABEL: atomicrmw_or_i16_seq_cst:
+; RV32IA-TSO-ZABHA: # %bb.0:
+; RV32IA-TSO-ZABHA-NEXT: amoor.h a0, a1, (a0)
+; RV32IA-TSO-ZABHA-NEXT: ret
+;
; RV64IA-WMO-ZABHA-LABEL: atomicrmw_or_i16_seq_cst:
; RV64IA-WMO-ZABHA: # %bb.0:
; RV64IA-WMO-ZABHA-NEXT: amoor.h.aqrl a0, a1, (a0)
@@ -14917,16 +19762,16 @@ define i16 @atomicrmw_xor_i16_monotonic(ptr %a, i16 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
-; RV32IA-LABEL: atomicrmw_xor_i16_monotonic:
-; RV32IA: # %bb.0:
-; RV32IA-NEXT: andi a2, a0, -4
-; RV32IA-NEXT: slli a0, a0, 3
-; RV32IA-NEXT: slli a1, a1, 16
-; RV32IA-NEXT: srli a1, a1, 16
-; RV32IA-NEXT: sll a1, a1, a0
-; RV32IA-NEXT: amoxor.w a1, a1, (a2)
-; RV32IA-NEXT: srl a0, a1, a0
-; RV32IA-NEXT: ret
+; RV32IA-NOZACAS-LABEL: atomicrmw_xor_i16_monotonic:
+; RV32IA-NOZACAS: # %bb.0:
+; RV32IA-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-NOZACAS-NEXT: slli a1, a1, 16
+; RV32IA-NOZACAS-NEXT: srli a1, a1, 16
+; RV32IA-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-NOZACAS-NEXT: amoxor.w a1, a1, (a2)
+; RV32IA-NOZACAS-NEXT: srl a0, a1, a0
+; RV32IA-NOZACAS-NEXT: ret
;
; RV64I-LABEL: atomicrmw_xor_i16_monotonic:
; RV64I: # %bb.0:
@@ -14949,6 +19794,17 @@ define i16 @atomicrmw_xor_i16_monotonic(ptr %a, i16 %b) nounwind {
; RV64IA-NOZACAS-NEXT: srlw a0, a1, a0
; RV64IA-NOZACAS-NEXT: ret
;
+; RV32IA-ZACAS-LABEL: atomicrmw_xor_i16_monotonic:
+; RV32IA-ZACAS: # %bb.0:
+; RV32IA-ZACAS-NEXT: andi a2, a0, -4
+; RV32IA-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-ZACAS-NEXT: slli a1, a1, 16
+; RV32IA-ZACAS-NEXT: srli a1, a1, 16
+; RV32IA-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-ZACAS-NEXT: amoxor.w a1, a1, (a2)
+; RV32IA-ZACAS-NEXT: srl a0, a1, a0
+; RV32IA-ZACAS-NEXT: ret
+;
; RV64IA-ZACAS-LABEL: atomicrmw_xor_i16_monotonic:
; RV64IA-ZACAS: # %bb.0:
; RV64IA-ZACAS-NEXT: andi a2, a0, -4
@@ -14960,6 +19816,16 @@ define i16 @atomicrmw_xor_i16_monotonic(ptr %a, i16 %b) nounwind {
; RV64IA-ZACAS-NEXT: srlw a0, a1, a0
; RV64IA-ZACAS-NEXT: ret
;
+; RV32IA-WMO-ZABHA-LABEL: atomicrmw_xor_i16_monotonic:
+; RV32IA-WMO-ZABHA: # %bb.0:
+; RV32IA-WMO-ZABHA-NEXT: amoxor.h a0, a1, (a0)
+; RV32IA-WMO-ZABHA-NEXT: ret
+;
+; RV32IA-TSO-ZABHA-LABEL: atomicrmw_xor_i16_monotonic:
+; RV32IA-TSO-ZABHA: # %bb.0:
+; RV32IA-TSO-ZABHA-NEXT: amoxor.h a0, a1, (a0)
+; RV32IA-TSO-ZABHA-NEXT: ret
+;
; RV64IA-WMO-ZABHA-LABEL: atomicrmw_xor_i16_monotonic:
; RV64IA-WMO-ZABHA: # %bb.0:
; RV64IA-WMO-ZABHA-NEXT: amoxor.h a0, a1, (a0)
@@ -14984,27 +19850,27 @@ define i16 @atomicrmw_xor_i16_acquire(ptr %a, i16 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
-; RV32IA-WMO-LABEL: atomicrmw_xor_i16_acquire:
-; RV32IA-WMO: # %bb.0:
-; RV32IA-WMO-NEXT: andi a2, a0, -4
-; RV32IA-WMO-NEXT: slli a0, a0, 3
-; RV32IA-WMO-NEXT: slli a1, a1, 16
-; RV32IA-WMO-NEXT: srli a1, a1, 16
-; RV32IA-WMO-NEXT: sll a1, a1, a0
-; RV32IA-WMO-NEXT: amoxor.w.aq a1, a1, (a2)
-; RV32IA-WMO-NEXT: srl a0, a1, a0
-; RV32IA-WMO-NEXT: ret
+; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_xor_i16_acquire:
+; RV32IA-WMO-NOZACAS: # %bb.0:
+; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-WMO-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-NOZACAS-NEXT: slli a1, a1, 16
+; RV32IA-WMO-NOZACAS-NEXT: srli a1, a1, 16
+; RV32IA-WMO-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-WMO-NOZACAS-NEXT: amoxor.w.aq a1, a1, (a2)
+; RV32IA-WMO-NOZACAS-NEXT: srl a0, a1, a0
+; RV32IA-WMO-NOZACAS-NEXT: ret
;
-; RV32IA-TSO-LABEL: atomicrmw_xor_i16_acquire:
-; RV32IA-TSO: # %bb.0:
-; RV32IA-TSO-NEXT: andi a2, a0, -4
-; RV32IA-TSO-NEXT: slli a0, a0, 3
-; RV32IA-TSO-NEXT: slli a1, a1, 16
-; RV32IA-TSO-NEXT: srli a1, a1, 16
-; RV32IA-TSO-NEXT: sll a1, a1, a0
-; RV32IA-TSO-NEXT: amoxor.w a1, a1, (a2)
-; RV32IA-TSO-NEXT: srl a0, a1, a0
-; RV32IA-TSO-NEXT: ret
+; RV32IA-TSO-NOZACAS-LABEL: atomicrmw_xor_i16_acquire:
+; RV32IA-TSO-NOZACAS: # %bb.0:
+; RV32IA-TSO-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-TSO-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-NOZACAS-NEXT: slli a1, a1, 16
+; RV32IA-TSO-NOZACAS-NEXT: srli a1, a1, 16
+; RV32IA-TSO-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-TSO-NOZACAS-NEXT: amoxor.w a1, a1, (a2)
+; RV32IA-TSO-NOZACAS-NEXT: srl a0, a1, a0
+; RV32IA-TSO-NOZACAS-NEXT: ret
;
; RV64I-LABEL: atomicrmw_xor_i16_acquire:
; RV64I: # %bb.0:
@@ -15038,6 +19904,28 @@ define i16 @atomicrmw_xor_i16_acquire(ptr %a, i16 %b) nounwind {
; RV64IA-TSO-NOZACAS-NEXT: srlw a0, a1, a0
; RV64IA-TSO-NOZACAS-NEXT: ret
;
+; RV32IA-WMO-ZACAS-LABEL: atomicrmw_xor_i16_acquire:
+; RV32IA-WMO-ZACAS: # %bb.0:
+; RV32IA-WMO-ZACAS-NEXT: andi a2, a0, -4
+; RV32IA-WMO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-ZACAS-NEXT: slli a1, a1, 16
+; RV32IA-WMO-ZACAS-NEXT: srli a1, a1, 16
+; RV32IA-WMO-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-WMO-ZACAS-NEXT: amoxor.w.aq a1, a1, (a2)
+; RV32IA-WMO-ZACAS-NEXT: srl a0, a1, a0
+; RV32IA-WMO-ZACAS-NEXT: ret
+;
+; RV32IA-TSO-ZACAS-LABEL: atomicrmw_xor_i16_acquire:
+; RV32IA-TSO-ZACAS: # %bb.0:
+; RV32IA-TSO-ZACAS-NEXT: andi a2, a0, -4
+; RV32IA-TSO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-ZACAS-NEXT: slli a1, a1, 16
+; RV32IA-TSO-ZACAS-NEXT: srli a1, a1, 16
+; RV32IA-TSO-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-TSO-ZACAS-NEXT: amoxor.w a1, a1, (a2)
+; RV32IA-TSO-ZACAS-NEXT: srl a0, a1, a0
+; RV32IA-TSO-ZACAS-NEXT: ret
+;
; RV64IA-WMO-ZACAS-LABEL: atomicrmw_xor_i16_acquire:
; RV64IA-WMO-ZACAS: # %bb.0:
; RV64IA-WMO-ZACAS-NEXT: andi a2, a0, -4
@@ -15060,6 +19948,16 @@ define i16 @atomicrmw_xor_i16_acquire(ptr %a, i16 %b) nounwind {
; RV64IA-TSO-ZACAS-NEXT: srlw a0, a1, a0
; RV64IA-TSO-ZACAS-NEXT: ret
;
+; RV32IA-WMO-ZABHA-LABEL: atomicrmw_xor_i16_acquire:
+; RV32IA-WMO-ZABHA: # %bb.0:
+; RV32IA-WMO-ZABHA-NEXT: amoxor.h.aq a0, a1, (a0)
+; RV32IA-WMO-ZABHA-NEXT: ret
+;
+; RV32IA-TSO-ZABHA-LABEL: atomicrmw_xor_i16_acquire:
+; RV32IA-TSO-ZABHA: # %bb.0:
+; RV32IA-TSO-ZABHA-NEXT: amoxor.h a0, a1, (a0)
+; RV32IA-TSO-ZABHA-NEXT: ret
+;
; RV64IA-WMO-ZABHA-LABEL: atomicrmw_xor_i16_acquire:
; RV64IA-WMO-ZABHA: # %bb.0:
; RV64IA-WMO-ZABHA-NEXT: amoxor.h.aq a0, a1, (a0)
@@ -15084,27 +19982,27 @@ define i16 @atomicrmw_xor_i16_release(ptr %a, i16 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
-; RV32IA-WMO-LABEL: atomicrmw_xor_i16_release:
-; RV32IA-WMO: # %bb.0:
-; RV32IA-WMO-NEXT: andi a2, a0, -4
-; RV32IA-WMO-NEXT: slli a0, a0, 3
-; RV32IA-WMO-NEXT: slli a1, a1, 16
-; RV32IA-WMO-NEXT: srli a1, a1, 16
-; RV32IA-WMO-NEXT: sll a1, a1, a0
-; RV32IA-WMO-NEXT: amoxor.w.rl a1, a1, (a2)
-; RV32IA-WMO-NEXT: srl a0, a1, a0
-; RV32IA-WMO-NEXT: ret
+; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_xor_i16_release:
+; RV32IA-WMO-NOZACAS: # %bb.0:
+; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-WMO-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-NOZACAS-NEXT: slli a1, a1, 16
+; RV32IA-WMO-NOZACAS-NEXT: srli a1, a1, 16
+; RV32IA-WMO-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-WMO-NOZACAS-NEXT: amoxor.w.rl a1, a1, (a2)
+; RV32IA-WMO-NOZACAS-NEXT: srl a0, a1, a0
+; RV32IA-WMO-NOZACAS-NEXT: ret
;
-; RV32IA-TSO-LABEL: atomicrmw_xor_i16_release:
-; RV32IA-TSO: # %bb.0:
-; RV32IA-TSO-NEXT: andi a2, a0, -4
-; RV32IA-TSO-NEXT: slli a0, a0, 3
-; RV32IA-TSO-NEXT: slli a1, a1, 16
-; RV32IA-TSO-NEXT: srli a1, a1, 16
-; RV32IA-TSO-NEXT: sll a1, a1, a0
-; RV32IA-TSO-NEXT: amoxor.w a1, a1, (a2)
-; RV32IA-TSO-NEXT: srl a0, a1, a0
-; RV32IA-TSO-NEXT: ret
+; RV32IA-TSO-NOZACAS-LABEL: atomicrmw_xor_i16_release:
+; RV32IA-TSO-NOZACAS: # %bb.0:
+; RV32IA-TSO-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-TSO-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-NOZACAS-NEXT: slli a1, a1, 16
+; RV32IA-TSO-NOZACAS-NEXT: srli a1, a1, 16
+; RV32IA-TSO-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-TSO-NOZACAS-NEXT: amoxor.w a1, a1, (a2)
+; RV32IA-TSO-NOZACAS-NEXT: srl a0, a1, a0
+; RV32IA-TSO-NOZACAS-NEXT: ret
;
; RV64I-LABEL: atomicrmw_xor_i16_release:
; RV64I: # %bb.0:
@@ -15138,6 +20036,28 @@ define i16 @atomicrmw_xor_i16_release(ptr %a, i16 %b) nounwind {
; RV64IA-TSO-NOZACAS-NEXT: srlw a0, a1, a0
; RV64IA-TSO-NOZACAS-NEXT: ret
;
+; RV32IA-WMO-ZACAS-LABEL: atomicrmw_xor_i16_release:
+; RV32IA-WMO-ZACAS: # %bb.0:
+; RV32IA-WMO-ZACAS-NEXT: andi a2, a0, -4
+; RV32IA-WMO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-ZACAS-NEXT: slli a1, a1, 16
+; RV32IA-WMO-ZACAS-NEXT: srli a1, a1, 16
+; RV32IA-WMO-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-WMO-ZACAS-NEXT: amoxor.w.rl a1, a1, (a2)
+; RV32IA-WMO-ZACAS-NEXT: srl a0, a1, a0
+; RV32IA-WMO-ZACAS-NEXT: ret
+;
+; RV32IA-TSO-ZACAS-LABEL: atomicrmw_xor_i16_release:
+; RV32IA-TSO-ZACAS: # %bb.0:
+; RV32IA-TSO-ZACAS-NEXT: andi a2, a0, -4
+; RV32IA-TSO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-ZACAS-NEXT: slli a1, a1, 16
+; RV32IA-TSO-ZACAS-NEXT: srli a1, a1, 16
+; RV32IA-TSO-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-TSO-ZACAS-NEXT: amoxor.w a1, a1, (a2)
+; RV32IA-TSO-ZACAS-NEXT: srl a0, a1, a0
+; RV32IA-TSO-ZACAS-NEXT: ret
+;
; RV64IA-WMO-ZACAS-LABEL: atomicrmw_xor_i16_release:
; RV64IA-WMO-ZACAS: # %bb.0:
; RV64IA-WMO-ZACAS-NEXT: andi a2, a0, -4
@@ -15160,6 +20080,16 @@ define i16 @atomicrmw_xor_i16_release(ptr %a, i16 %b) nounwind {
; RV64IA-TSO-ZACAS-NEXT: srlw a0, a1, a0
; RV64IA-TSO-ZACAS-NEXT: ret
;
+; RV32IA-WMO-ZABHA-LABEL: atomicrmw_xor_i16_release:
+; RV32IA-WMO-ZABHA: # %bb.0:
+; RV32IA-WMO-ZABHA-NEXT: amoxor.h.rl a0, a1, (a0)
+; RV32IA-WMO-ZABHA-NEXT: ret
+;
+; RV32IA-TSO-ZABHA-LABEL: atomicrmw_xor_i16_release:
+; RV32IA-TSO-ZABHA: # %bb.0:
+; RV32IA-TSO-ZABHA-NEXT: amoxor.h a0, a1, (a0)
+; RV32IA-TSO-ZABHA-NEXT: ret
+;
; RV64IA-WMO-ZABHA-LABEL: atomicrmw_xor_i16_release:
; RV64IA-WMO-ZABHA: # %bb.0:
; RV64IA-WMO-ZABHA-NEXT: amoxor.h.rl a0, a1, (a0)
@@ -15184,27 +20114,27 @@ define i16 @atomicrmw_xor_i16_acq_rel(ptr %a, i16 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
-; RV32IA-WMO-LABEL: atomicrmw_xor_i16_acq_rel:
-; RV32IA-WMO: # %bb.0:
-; RV32IA-WMO-NEXT: andi a2, a0, -4
-; RV32IA-WMO-NEXT: slli a0, a0, 3
-; RV32IA-WMO-NEXT: slli a1, a1, 16
-; RV32IA-WMO-NEXT: srli a1, a1, 16
-; RV32IA-WMO-NEXT: sll a1, a1, a0
-; RV32IA-WMO-NEXT: amoxor.w.aqrl a1, a1, (a2)
-; RV32IA-WMO-NEXT: srl a0, a1, a0
-; RV32IA-WMO-NEXT: ret
+; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_xor_i16_acq_rel:
+; RV32IA-WMO-NOZACAS: # %bb.0:
+; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-WMO-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-NOZACAS-NEXT: slli a1, a1, 16
+; RV32IA-WMO-NOZACAS-NEXT: srli a1, a1, 16
+; RV32IA-WMO-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-WMO-NOZACAS-NEXT: amoxor.w.aqrl a1, a1, (a2)
+; RV32IA-WMO-NOZACAS-NEXT: srl a0, a1, a0
+; RV32IA-WMO-NOZACAS-NEXT: ret
;
-; RV32IA-TSO-LABEL: atomicrmw_xor_i16_acq_rel:
-; RV32IA-TSO: # %bb.0:
-; RV32IA-TSO-NEXT: andi a2, a0, -4
-; RV32IA-TSO-NEXT: slli a0, a0, 3
-; RV32IA-TSO-NEXT: slli a1, a1, 16
-; RV32IA-TSO-NEXT: srli a1, a1, 16
-; RV32IA-TSO-NEXT: sll a1, a1, a0
-; RV32IA-TSO-NEXT: amoxor.w a1, a1, (a2)
-; RV32IA-TSO-NEXT: srl a0, a1, a0
-; RV32IA-TSO-NEXT: ret
+; RV32IA-TSO-NOZACAS-LABEL: atomicrmw_xor_i16_acq_rel:
+; RV32IA-TSO-NOZACAS: # %bb.0:
+; RV32IA-TSO-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-TSO-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-NOZACAS-NEXT: slli a1, a1, 16
+; RV32IA-TSO-NOZACAS-NEXT: srli a1, a1, 16
+; RV32IA-TSO-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-TSO-NOZACAS-NEXT: amoxor.w a1, a1, (a2)
+; RV32IA-TSO-NOZACAS-NEXT: srl a0, a1, a0
+; RV32IA-TSO-NOZACAS-NEXT: ret
;
; RV64I-LABEL: atomicrmw_xor_i16_acq_rel:
; RV64I: # %bb.0:
@@ -15238,6 +20168,28 @@ define i16 @atomicrmw_xor_i16_acq_rel(ptr %a, i16 %b) nounwind {
; RV64IA-TSO-NOZACAS-NEXT: srlw a0, a1, a0
; RV64IA-TSO-NOZACAS-NEXT: ret
;
+; RV32IA-WMO-ZACAS-LABEL: atomicrmw_xor_i16_acq_rel:
+; RV32IA-WMO-ZACAS: # %bb.0:
+; RV32IA-WMO-ZACAS-NEXT: andi a2, a0, -4
+; RV32IA-WMO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-ZACAS-NEXT: slli a1, a1, 16
+; RV32IA-WMO-ZACAS-NEXT: srli a1, a1, 16
+; RV32IA-WMO-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-WMO-ZACAS-NEXT: amoxor.w.aqrl a1, a1, (a2)
+; RV32IA-WMO-ZACAS-NEXT: srl a0, a1, a0
+; RV32IA-WMO-ZACAS-NEXT: ret
+;
+; RV32IA-TSO-ZACAS-LABEL: atomicrmw_xor_i16_acq_rel:
+; RV32IA-TSO-ZACAS: # %bb.0:
+; RV32IA-TSO-ZACAS-NEXT: andi a2, a0, -4
+; RV32IA-TSO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-ZACAS-NEXT: slli a1, a1, 16
+; RV32IA-TSO-ZACAS-NEXT: srli a1, a1, 16
+; RV32IA-TSO-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-TSO-ZACAS-NEXT: amoxor.w a1, a1, (a2)
+; RV32IA-TSO-ZACAS-NEXT: srl a0, a1, a0
+; RV32IA-TSO-ZACAS-NEXT: ret
+;
; RV64IA-WMO-ZACAS-LABEL: atomicrmw_xor_i16_acq_rel:
; RV64IA-WMO-ZACAS: # %bb.0:
; RV64IA-WMO-ZACAS-NEXT: andi a2, a0, -4
@@ -15260,6 +20212,16 @@ define i16 @atomicrmw_xor_i16_acq_rel(ptr %a, i16 %b) nounwind {
; RV64IA-TSO-ZACAS-NEXT: srlw a0, a1, a0
; RV64IA-TSO-ZACAS-NEXT: ret
;
+; RV32IA-WMO-ZABHA-LABEL: atomicrmw_xor_i16_acq_rel:
+; RV32IA-WMO-ZABHA: # %bb.0:
+; RV32IA-WMO-ZABHA-NEXT: amoxor.h.aqrl a0, a1, (a0)
+; RV32IA-WMO-ZABHA-NEXT: ret
+;
+; RV32IA-TSO-ZABHA-LABEL: atomicrmw_xor_i16_acq_rel:
+; RV32IA-TSO-ZABHA: # %bb.0:
+; RV32IA-TSO-ZABHA-NEXT: amoxor.h a0, a1, (a0)
+; RV32IA-TSO-ZABHA-NEXT: ret
+;
; RV64IA-WMO-ZABHA-LABEL: atomicrmw_xor_i16_acq_rel:
; RV64IA-WMO-ZABHA: # %bb.0:
; RV64IA-WMO-ZABHA-NEXT: amoxor.h.aqrl a0, a1, (a0)
@@ -15284,27 +20246,27 @@ define i16 @atomicrmw_xor_i16_seq_cst(ptr %a, i16 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
-; RV32IA-WMO-LABEL: atomicrmw_xor_i16_seq_cst:
-; RV32IA-WMO: # %bb.0:
-; RV32IA-WMO-NEXT: andi a2, a0, -4
-; RV32IA-WMO-NEXT: slli a0, a0, 3
-; RV32IA-WMO-NEXT: slli a1, a1, 16
-; RV32IA-WMO-NEXT: srli a1, a1, 16
-; RV32IA-WMO-NEXT: sll a1, a1, a0
-; RV32IA-WMO-NEXT: amoxor.w.aqrl a1, a1, (a2)
-; RV32IA-WMO-NEXT: srl a0, a1, a0
-; RV32IA-WMO-NEXT: ret
+; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_xor_i16_seq_cst:
+; RV32IA-WMO-NOZACAS: # %bb.0:
+; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-WMO-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-NOZACAS-NEXT: slli a1, a1, 16
+; RV32IA-WMO-NOZACAS-NEXT: srli a1, a1, 16
+; RV32IA-WMO-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-WMO-NOZACAS-NEXT: amoxor.w.aqrl a1, a1, (a2)
+; RV32IA-WMO-NOZACAS-NEXT: srl a0, a1, a0
+; RV32IA-WMO-NOZACAS-NEXT: ret
;
-; RV32IA-TSO-LABEL: atomicrmw_xor_i16_seq_cst:
-; RV32IA-TSO: # %bb.0:
-; RV32IA-TSO-NEXT: andi a2, a0, -4
-; RV32IA-TSO-NEXT: slli a0, a0, 3
-; RV32IA-TSO-NEXT: slli a1, a1, 16
-; RV32IA-TSO-NEXT: srli a1, a1, 16
-; RV32IA-TSO-NEXT: sll a1, a1, a0
-; RV32IA-TSO-NEXT: amoxor.w a1, a1, (a2)
-; RV32IA-TSO-NEXT: srl a0, a1, a0
-; RV32IA-TSO-NEXT: ret
+; RV32IA-TSO-NOZACAS-LABEL: atomicrmw_xor_i16_seq_cst:
+; RV32IA-TSO-NOZACAS: # %bb.0:
+; RV32IA-TSO-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-TSO-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-NOZACAS-NEXT: slli a1, a1, 16
+; RV32IA-TSO-NOZACAS-NEXT: srli a1, a1, 16
+; RV32IA-TSO-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-TSO-NOZACAS-NEXT: amoxor.w a1, a1, (a2)
+; RV32IA-TSO-NOZACAS-NEXT: srl a0, a1, a0
+; RV32IA-TSO-NOZACAS-NEXT: ret
;
; RV64I-LABEL: atomicrmw_xor_i16_seq_cst:
; RV64I: # %bb.0:
@@ -15338,6 +20300,28 @@ define i16 @atomicrmw_xor_i16_seq_cst(ptr %a, i16 %b) nounwind {
; RV64IA-TSO-NOZACAS-NEXT: srlw a0, a1, a0
; RV64IA-TSO-NOZACAS-NEXT: ret
;
+; RV32IA-WMO-ZACAS-LABEL: atomicrmw_xor_i16_seq_cst:
+; RV32IA-WMO-ZACAS: # %bb.0:
+; RV32IA-WMO-ZACAS-NEXT: andi a2, a0, -4
+; RV32IA-WMO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-ZACAS-NEXT: slli a1, a1, 16
+; RV32IA-WMO-ZACAS-NEXT: srli a1, a1, 16
+; RV32IA-WMO-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-WMO-ZACAS-NEXT: amoxor.w.aqrl a1, a1, (a2)
+; RV32IA-WMO-ZACAS-NEXT: srl a0, a1, a0
+; RV32IA-WMO-ZACAS-NEXT: ret
+;
+; RV32IA-TSO-ZACAS-LABEL: atomicrmw_xor_i16_seq_cst:
+; RV32IA-TSO-ZACAS: # %bb.0:
+; RV32IA-TSO-ZACAS-NEXT: andi a2, a0, -4
+; RV32IA-TSO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-ZACAS-NEXT: slli a1, a1, 16
+; RV32IA-TSO-ZACAS-NEXT: srli a1, a1, 16
+; RV32IA-TSO-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-TSO-ZACAS-NEXT: amoxor.w a1, a1, (a2)
+; RV32IA-TSO-ZACAS-NEXT: srl a0, a1, a0
+; RV32IA-TSO-ZACAS-NEXT: ret
+;
; RV64IA-WMO-ZACAS-LABEL: atomicrmw_xor_i16_seq_cst:
; RV64IA-WMO-ZACAS: # %bb.0:
; RV64IA-WMO-ZACAS-NEXT: andi a2, a0, -4
@@ -15360,6 +20344,16 @@ define i16 @atomicrmw_xor_i16_seq_cst(ptr %a, i16 %b) nounwind {
; RV64IA-TSO-ZACAS-NEXT: srlw a0, a1, a0
; RV64IA-TSO-ZACAS-NEXT: ret
;
+; RV32IA-WMO-ZABHA-LABEL: atomicrmw_xor_i16_seq_cst:
+; RV32IA-WMO-ZABHA: # %bb.0:
+; RV32IA-WMO-ZABHA-NEXT: amoxor.h.aqrl a0, a1, (a0)
+; RV32IA-WMO-ZABHA-NEXT: ret
+;
+; RV32IA-TSO-ZABHA-LABEL: atomicrmw_xor_i16_seq_cst:
+; RV32IA-TSO-ZABHA: # %bb.0:
+; RV32IA-TSO-ZABHA-NEXT: amoxor.h a0, a1, (a0)
+; RV32IA-TSO-ZABHA-NEXT: ret
+;
; RV64IA-WMO-ZABHA-LABEL: atomicrmw_xor_i16_seq_cst:
; RV64IA-WMO-ZABHA: # %bb.0:
; RV64IA-WMO-ZABHA-NEXT: amoxor.h.aqrl a0, a1, (a0)
@@ -15416,36 +20410,36 @@ define i16 @atomicrmw_max_i16_monotonic(ptr %a, i16 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 32
; RV32I-NEXT: ret
;
-; RV32IA-LABEL: atomicrmw_max_i16_monotonic:
-; RV32IA: # %bb.0:
-; RV32IA-NEXT: andi a2, a0, -4
-; RV32IA-NEXT: slli a0, a0, 3
-; RV32IA-NEXT: lui a3, 16
-; RV32IA-NEXT: slli a1, a1, 16
-; RV32IA-NEXT: li a4, 16
-; RV32IA-NEXT: andi a5, a0, 24
-; RV32IA-NEXT: addi a3, a3, -1
-; RV32IA-NEXT: srai a1, a1, 16
-; RV32IA-NEXT: sll a3, a3, a0
-; RV32IA-NEXT: sll a1, a1, a0
-; RV32IA-NEXT: sub a4, a4, a5
-; RV32IA-NEXT: .LBB110_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-NEXT: lr.w a5, (a2)
-; RV32IA-NEXT: and a7, a5, a3
-; RV32IA-NEXT: mv a6, a5
-; RV32IA-NEXT: sll a7, a7, a4
-; RV32IA-NEXT: sra a7, a7, a4
-; RV32IA-NEXT: bge a7, a1, .LBB110_3
-; RV32IA-NEXT: # %bb.2: # in Loop: Header=BB110_1 Depth=1
-; RV32IA-NEXT: xor a6, a5, a1
-; RV32IA-NEXT: and a6, a6, a3
-; RV32IA-NEXT: xor a6, a5, a6
-; RV32IA-NEXT: .LBB110_3: # in Loop: Header=BB110_1 Depth=1
-; RV32IA-NEXT: sc.w a6, a6, (a2)
-; RV32IA-NEXT: bnez a6, .LBB110_1
-; RV32IA-NEXT: # %bb.4:
-; RV32IA-NEXT: srl a0, a5, a0
-; RV32IA-NEXT: ret
+; RV32IA-NOZACAS-LABEL: atomicrmw_max_i16_monotonic:
+; RV32IA-NOZACAS: # %bb.0:
+; RV32IA-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-NOZACAS-NEXT: lui a3, 16
+; RV32IA-NOZACAS-NEXT: slli a1, a1, 16
+; RV32IA-NOZACAS-NEXT: li a4, 16
+; RV32IA-NOZACAS-NEXT: andi a5, a0, 24
+; RV32IA-NOZACAS-NEXT: addi a3, a3, -1
+; RV32IA-NOZACAS-NEXT: srai a1, a1, 16
+; RV32IA-NOZACAS-NEXT: sll a3, a3, a0
+; RV32IA-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-NOZACAS-NEXT: sub a4, a4, a5
+; RV32IA-NOZACAS-NEXT: .LBB110_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-NOZACAS-NEXT: lr.w a5, (a2)
+; RV32IA-NOZACAS-NEXT: and a7, a5, a3
+; RV32IA-NOZACAS-NEXT: mv a6, a5
+; RV32IA-NOZACAS-NEXT: sll a7, a7, a4
+; RV32IA-NOZACAS-NEXT: sra a7, a7, a4
+; RV32IA-NOZACAS-NEXT: bge a7, a1, .LBB110_3
+; RV32IA-NOZACAS-NEXT: # %bb.2: # in Loop: Header=BB110_1 Depth=1
+; RV32IA-NOZACAS-NEXT: xor a6, a5, a1
+; RV32IA-NOZACAS-NEXT: and a6, a6, a3
+; RV32IA-NOZACAS-NEXT: xor a6, a5, a6
+; RV32IA-NOZACAS-NEXT: .LBB110_3: # in Loop: Header=BB110_1 Depth=1
+; RV32IA-NOZACAS-NEXT: sc.w a6, a6, (a2)
+; RV32IA-NOZACAS-NEXT: bnez a6, .LBB110_1
+; RV32IA-NOZACAS-NEXT: # %bb.4:
+; RV32IA-NOZACAS-NEXT: srl a0, a5, a0
+; RV32IA-NOZACAS-NEXT: ret
;
; RV64I-LABEL: atomicrmw_max_i16_monotonic:
; RV64I: # %bb.0:
@@ -15520,6 +20514,37 @@ define i16 @atomicrmw_max_i16_monotonic(ptr %a, i16 %b) nounwind {
; RV64IA-NOZACAS-NEXT: srlw a0, a5, a0
; RV64IA-NOZACAS-NEXT: ret
;
+; RV32IA-ZACAS-LABEL: atomicrmw_max_i16_monotonic:
+; RV32IA-ZACAS: # %bb.0:
+; RV32IA-ZACAS-NEXT: andi a2, a0, -4
+; RV32IA-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-ZACAS-NEXT: lui a3, 16
+; RV32IA-ZACAS-NEXT: slli a1, a1, 16
+; RV32IA-ZACAS-NEXT: li a4, 16
+; RV32IA-ZACAS-NEXT: andi a5, a0, 24
+; RV32IA-ZACAS-NEXT: addi a3, a3, -1
+; RV32IA-ZACAS-NEXT: srai a1, a1, 16
+; RV32IA-ZACAS-NEXT: sll a3, a3, a0
+; RV32IA-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-ZACAS-NEXT: sub a4, a4, a5
+; RV32IA-ZACAS-NEXT: .LBB110_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-ZACAS-NEXT: lr.w a5, (a2)
+; RV32IA-ZACAS-NEXT: and a7, a5, a3
+; RV32IA-ZACAS-NEXT: mv a6, a5
+; RV32IA-ZACAS-NEXT: sll a7, a7, a4
+; RV32IA-ZACAS-NEXT: sra a7, a7, a4
+; RV32IA-ZACAS-NEXT: bge a7, a1, .LBB110_3
+; RV32IA-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB110_1 Depth=1
+; RV32IA-ZACAS-NEXT: xor a6, a5, a1
+; RV32IA-ZACAS-NEXT: and a6, a6, a3
+; RV32IA-ZACAS-NEXT: xor a6, a5, a6
+; RV32IA-ZACAS-NEXT: .LBB110_3: # in Loop: Header=BB110_1 Depth=1
+; RV32IA-ZACAS-NEXT: sc.w a6, a6, (a2)
+; RV32IA-ZACAS-NEXT: bnez a6, .LBB110_1
+; RV32IA-ZACAS-NEXT: # %bb.4:
+; RV32IA-ZACAS-NEXT: srl a0, a5, a0
+; RV32IA-ZACAS-NEXT: ret
+;
; RV64IA-ZACAS-LABEL: atomicrmw_max_i16_monotonic:
; RV64IA-ZACAS: # %bb.0:
; RV64IA-ZACAS-NEXT: andi a2, a0, -4
@@ -15551,6 +20576,16 @@ define i16 @atomicrmw_max_i16_monotonic(ptr %a, i16 %b) nounwind {
; RV64IA-ZACAS-NEXT: srlw a0, a5, a0
; RV64IA-ZACAS-NEXT: ret
;
+; RV32IA-WMO-ZABHA-LABEL: atomicrmw_max_i16_monotonic:
+; RV32IA-WMO-ZABHA: # %bb.0:
+; RV32IA-WMO-ZABHA-NEXT: amomax.h a0, a1, (a0)
+; RV32IA-WMO-ZABHA-NEXT: ret
+;
+; RV32IA-TSO-ZABHA-LABEL: atomicrmw_max_i16_monotonic:
+; RV32IA-TSO-ZABHA: # %bb.0:
+; RV32IA-TSO-ZABHA-NEXT: amomax.h a0, a1, (a0)
+; RV32IA-TSO-ZABHA-NEXT: ret
+;
; RV64IA-WMO-ZABHA-LABEL: atomicrmw_max_i16_monotonic:
; RV64IA-WMO-ZABHA: # %bb.0:
; RV64IA-WMO-ZABHA-NEXT: amomax.h a0, a1, (a0)
@@ -15607,67 +20642,67 @@ define i16 @atomicrmw_max_i16_acquire(ptr %a, i16 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 32
; RV32I-NEXT: ret
;
-; RV32IA-WMO-LABEL: atomicrmw_max_i16_acquire:
-; RV32IA-WMO: # %bb.0:
-; RV32IA-WMO-NEXT: andi a2, a0, -4
-; RV32IA-WMO-NEXT: slli a0, a0, 3
-; RV32IA-WMO-NEXT: lui a3, 16
-; RV32IA-WMO-NEXT: slli a1, a1, 16
-; RV32IA-WMO-NEXT: li a4, 16
-; RV32IA-WMO-NEXT: andi a5, a0, 24
-; RV32IA-WMO-NEXT: addi a3, a3, -1
-; RV32IA-WMO-NEXT: srai a1, a1, 16
-; RV32IA-WMO-NEXT: sll a3, a3, a0
-; RV32IA-WMO-NEXT: sll a1, a1, a0
-; RV32IA-WMO-NEXT: sub a4, a4, a5
-; RV32IA-WMO-NEXT: .LBB111_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-WMO-NEXT: lr.w.aq a5, (a2)
-; RV32IA-WMO-NEXT: and a7, a5, a3
-; RV32IA-WMO-NEXT: mv a6, a5
-; RV32IA-WMO-NEXT: sll a7, a7, a4
-; RV32IA-WMO-NEXT: sra a7, a7, a4
-; RV32IA-WMO-NEXT: bge a7, a1, .LBB111_3
-; RV32IA-WMO-NEXT: # %bb.2: # in Loop: Header=BB111_1 Depth=1
-; RV32IA-WMO-NEXT: xor a6, a5, a1
-; RV32IA-WMO-NEXT: and a6, a6, a3
-; RV32IA-WMO-NEXT: xor a6, a5, a6
-; RV32IA-WMO-NEXT: .LBB111_3: # in Loop: Header=BB111_1 Depth=1
-; RV32IA-WMO-NEXT: sc.w a6, a6, (a2)
-; RV32IA-WMO-NEXT: bnez a6, .LBB111_1
-; RV32IA-WMO-NEXT: # %bb.4:
-; RV32IA-WMO-NEXT: srl a0, a5, a0
-; RV32IA-WMO-NEXT: ret
+; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_max_i16_acquire:
+; RV32IA-WMO-NOZACAS: # %bb.0:
+; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-WMO-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-NOZACAS-NEXT: lui a3, 16
+; RV32IA-WMO-NOZACAS-NEXT: slli a1, a1, 16
+; RV32IA-WMO-NOZACAS-NEXT: li a4, 16
+; RV32IA-WMO-NOZACAS-NEXT: andi a5, a0, 24
+; RV32IA-WMO-NOZACAS-NEXT: addi a3, a3, -1
+; RV32IA-WMO-NOZACAS-NEXT: srai a1, a1, 16
+; RV32IA-WMO-NOZACAS-NEXT: sll a3, a3, a0
+; RV32IA-WMO-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-WMO-NOZACAS-NEXT: sub a4, a4, a5
+; RV32IA-WMO-NOZACAS-NEXT: .LBB111_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-NOZACAS-NEXT: lr.w.aq a5, (a2)
+; RV32IA-WMO-NOZACAS-NEXT: and a7, a5, a3
+; RV32IA-WMO-NOZACAS-NEXT: mv a6, a5
+; RV32IA-WMO-NOZACAS-NEXT: sll a7, a7, a4
+; RV32IA-WMO-NOZACAS-NEXT: sra a7, a7, a4
+; RV32IA-WMO-NOZACAS-NEXT: bge a7, a1, .LBB111_3
+; RV32IA-WMO-NOZACAS-NEXT: # %bb.2: # in Loop: Header=BB111_1 Depth=1
+; RV32IA-WMO-NOZACAS-NEXT: xor a6, a5, a1
+; RV32IA-WMO-NOZACAS-NEXT: and a6, a6, a3
+; RV32IA-WMO-NOZACAS-NEXT: xor a6, a5, a6
+; RV32IA-WMO-NOZACAS-NEXT: .LBB111_3: # in Loop: Header=BB111_1 Depth=1
+; RV32IA-WMO-NOZACAS-NEXT: sc.w a6, a6, (a2)
+; RV32IA-WMO-NOZACAS-NEXT: bnez a6, .LBB111_1
+; RV32IA-WMO-NOZACAS-NEXT: # %bb.4:
+; RV32IA-WMO-NOZACAS-NEXT: srl a0, a5, a0
+; RV32IA-WMO-NOZACAS-NEXT: ret
;
-; RV32IA-TSO-LABEL: atomicrmw_max_i16_acquire:
-; RV32IA-TSO: # %bb.0:
-; RV32IA-TSO-NEXT: andi a2, a0, -4
-; RV32IA-TSO-NEXT: slli a0, a0, 3
-; RV32IA-TSO-NEXT: lui a3, 16
-; RV32IA-TSO-NEXT: slli a1, a1, 16
-; RV32IA-TSO-NEXT: li a4, 16
-; RV32IA-TSO-NEXT: andi a5, a0, 24
-; RV32IA-TSO-NEXT: addi a3, a3, -1
-; RV32IA-TSO-NEXT: srai a1, a1, 16
-; RV32IA-TSO-NEXT: sll a3, a3, a0
-; RV32IA-TSO-NEXT: sll a1, a1, a0
-; RV32IA-TSO-NEXT: sub a4, a4, a5
-; RV32IA-TSO-NEXT: .LBB111_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-TSO-NEXT: lr.w a5, (a2)
-; RV32IA-TSO-NEXT: and a7, a5, a3
-; RV32IA-TSO-NEXT: mv a6, a5
-; RV32IA-TSO-NEXT: sll a7, a7, a4
-; RV32IA-TSO-NEXT: sra a7, a7, a4
-; RV32IA-TSO-NEXT: bge a7, a1, .LBB111_3
-; RV32IA-TSO-NEXT: # %bb.2: # in Loop: Header=BB111_1 Depth=1
-; RV32IA-TSO-NEXT: xor a6, a5, a1
-; RV32IA-TSO-NEXT: and a6, a6, a3
-; RV32IA-TSO-NEXT: xor a6, a5, a6
-; RV32IA-TSO-NEXT: .LBB111_3: # in Loop: Header=BB111_1 Depth=1
-; RV32IA-TSO-NEXT: sc.w a6, a6, (a2)
-; RV32IA-TSO-NEXT: bnez a6, .LBB111_1
-; RV32IA-TSO-NEXT: # %bb.4:
-; RV32IA-TSO-NEXT: srl a0, a5, a0
-; RV32IA-TSO-NEXT: ret
+; RV32IA-TSO-NOZACAS-LABEL: atomicrmw_max_i16_acquire:
+; RV32IA-TSO-NOZACAS: # %bb.0:
+; RV32IA-TSO-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-TSO-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-NOZACAS-NEXT: lui a3, 16
+; RV32IA-TSO-NOZACAS-NEXT: slli a1, a1, 16
+; RV32IA-TSO-NOZACAS-NEXT: li a4, 16
+; RV32IA-TSO-NOZACAS-NEXT: andi a5, a0, 24
+; RV32IA-TSO-NOZACAS-NEXT: addi a3, a3, -1
+; RV32IA-TSO-NOZACAS-NEXT: srai a1, a1, 16
+; RV32IA-TSO-NOZACAS-NEXT: sll a3, a3, a0
+; RV32IA-TSO-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-TSO-NOZACAS-NEXT: sub a4, a4, a5
+; RV32IA-TSO-NOZACAS-NEXT: .LBB111_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-NOZACAS-NEXT: lr.w a5, (a2)
+; RV32IA-TSO-NOZACAS-NEXT: and a7, a5, a3
+; RV32IA-TSO-NOZACAS-NEXT: mv a6, a5
+; RV32IA-TSO-NOZACAS-NEXT: sll a7, a7, a4
+; RV32IA-TSO-NOZACAS-NEXT: sra a7, a7, a4
+; RV32IA-TSO-NOZACAS-NEXT: bge a7, a1, .LBB111_3
+; RV32IA-TSO-NOZACAS-NEXT: # %bb.2: # in Loop: Header=BB111_1 Depth=1
+; RV32IA-TSO-NOZACAS-NEXT: xor a6, a5, a1
+; RV32IA-TSO-NOZACAS-NEXT: and a6, a6, a3
+; RV32IA-TSO-NOZACAS-NEXT: xor a6, a5, a6
+; RV32IA-TSO-NOZACAS-NEXT: .LBB111_3: # in Loop: Header=BB111_1 Depth=1
+; RV32IA-TSO-NOZACAS-NEXT: sc.w a6, a6, (a2)
+; RV32IA-TSO-NOZACAS-NEXT: bnez a6, .LBB111_1
+; RV32IA-TSO-NOZACAS-NEXT: # %bb.4:
+; RV32IA-TSO-NOZACAS-NEXT: srl a0, a5, a0
+; RV32IA-TSO-NOZACAS-NEXT: ret
;
; RV64I-LABEL: atomicrmw_max_i16_acquire:
; RV64I: # %bb.0:
@@ -15773,6 +20808,68 @@ define i16 @atomicrmw_max_i16_acquire(ptr %a, i16 %b) nounwind {
; RV64IA-TSO-NOZACAS-NEXT: srlw a0, a5, a0
; RV64IA-TSO-NOZACAS-NEXT: ret
;
+; RV32IA-WMO-ZACAS-LABEL: atomicrmw_max_i16_acquire:
+; RV32IA-WMO-ZACAS: # %bb.0:
+; RV32IA-WMO-ZACAS-NEXT: andi a2, a0, -4
+; RV32IA-WMO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-ZACAS-NEXT: lui a3, 16
+; RV32IA-WMO-ZACAS-NEXT: slli a1, a1, 16
+; RV32IA-WMO-ZACAS-NEXT: li a4, 16
+; RV32IA-WMO-ZACAS-NEXT: andi a5, a0, 24
+; RV32IA-WMO-ZACAS-NEXT: addi a3, a3, -1
+; RV32IA-WMO-ZACAS-NEXT: srai a1, a1, 16
+; RV32IA-WMO-ZACAS-NEXT: sll a3, a3, a0
+; RV32IA-WMO-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-WMO-ZACAS-NEXT: sub a4, a4, a5
+; RV32IA-WMO-ZACAS-NEXT: .LBB111_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-ZACAS-NEXT: lr.w.aq a5, (a2)
+; RV32IA-WMO-ZACAS-NEXT: and a7, a5, a3
+; RV32IA-WMO-ZACAS-NEXT: mv a6, a5
+; RV32IA-WMO-ZACAS-NEXT: sll a7, a7, a4
+; RV32IA-WMO-ZACAS-NEXT: sra a7, a7, a4
+; RV32IA-WMO-ZACAS-NEXT: bge a7, a1, .LBB111_3
+; RV32IA-WMO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB111_1 Depth=1
+; RV32IA-WMO-ZACAS-NEXT: xor a6, a5, a1
+; RV32IA-WMO-ZACAS-NEXT: and a6, a6, a3
+; RV32IA-WMO-ZACAS-NEXT: xor a6, a5, a6
+; RV32IA-WMO-ZACAS-NEXT: .LBB111_3: # in Loop: Header=BB111_1 Depth=1
+; RV32IA-WMO-ZACAS-NEXT: sc.w a6, a6, (a2)
+; RV32IA-WMO-ZACAS-NEXT: bnez a6, .LBB111_1
+; RV32IA-WMO-ZACAS-NEXT: # %bb.4:
+; RV32IA-WMO-ZACAS-NEXT: srl a0, a5, a0
+; RV32IA-WMO-ZACAS-NEXT: ret
+;
+; RV32IA-TSO-ZACAS-LABEL: atomicrmw_max_i16_acquire:
+; RV32IA-TSO-ZACAS: # %bb.0:
+; RV32IA-TSO-ZACAS-NEXT: andi a2, a0, -4
+; RV32IA-TSO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-ZACAS-NEXT: lui a3, 16
+; RV32IA-TSO-ZACAS-NEXT: slli a1, a1, 16
+; RV32IA-TSO-ZACAS-NEXT: li a4, 16
+; RV32IA-TSO-ZACAS-NEXT: andi a5, a0, 24
+; RV32IA-TSO-ZACAS-NEXT: addi a3, a3, -1
+; RV32IA-TSO-ZACAS-NEXT: srai a1, a1, 16
+; RV32IA-TSO-ZACAS-NEXT: sll a3, a3, a0
+; RV32IA-TSO-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-TSO-ZACAS-NEXT: sub a4, a4, a5
+; RV32IA-TSO-ZACAS-NEXT: .LBB111_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-ZACAS-NEXT: lr.w a5, (a2)
+; RV32IA-TSO-ZACAS-NEXT: and a7, a5, a3
+; RV32IA-TSO-ZACAS-NEXT: mv a6, a5
+; RV32IA-TSO-ZACAS-NEXT: sll a7, a7, a4
+; RV32IA-TSO-ZACAS-NEXT: sra a7, a7, a4
+; RV32IA-TSO-ZACAS-NEXT: bge a7, a1, .LBB111_3
+; RV32IA-TSO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB111_1 Depth=1
+; RV32IA-TSO-ZACAS-NEXT: xor a6, a5, a1
+; RV32IA-TSO-ZACAS-NEXT: and a6, a6, a3
+; RV32IA-TSO-ZACAS-NEXT: xor a6, a5, a6
+; RV32IA-TSO-ZACAS-NEXT: .LBB111_3: # in Loop: Header=BB111_1 Depth=1
+; RV32IA-TSO-ZACAS-NEXT: sc.w a6, a6, (a2)
+; RV32IA-TSO-ZACAS-NEXT: bnez a6, .LBB111_1
+; RV32IA-TSO-ZACAS-NEXT: # %bb.4:
+; RV32IA-TSO-ZACAS-NEXT: srl a0, a5, a0
+; RV32IA-TSO-ZACAS-NEXT: ret
+;
; RV64IA-WMO-ZACAS-LABEL: atomicrmw_max_i16_acquire:
; RV64IA-WMO-ZACAS: # %bb.0:
; RV64IA-WMO-ZACAS-NEXT: andi a2, a0, -4
@@ -15835,6 +20932,16 @@ define i16 @atomicrmw_max_i16_acquire(ptr %a, i16 %b) nounwind {
; RV64IA-TSO-ZACAS-NEXT: srlw a0, a5, a0
; RV64IA-TSO-ZACAS-NEXT: ret
;
+; RV32IA-WMO-ZABHA-LABEL: atomicrmw_max_i16_acquire:
+; RV32IA-WMO-ZABHA: # %bb.0:
+; RV32IA-WMO-ZABHA-NEXT: amomax.h.aq a0, a1, (a0)
+; RV32IA-WMO-ZABHA-NEXT: ret
+;
+; RV32IA-TSO-ZABHA-LABEL: atomicrmw_max_i16_acquire:
+; RV32IA-TSO-ZABHA: # %bb.0:
+; RV32IA-TSO-ZABHA-NEXT: amomax.h a0, a1, (a0)
+; RV32IA-TSO-ZABHA-NEXT: ret
+;
; RV64IA-WMO-ZABHA-LABEL: atomicrmw_max_i16_acquire:
; RV64IA-WMO-ZABHA: # %bb.0:
; RV64IA-WMO-ZABHA-NEXT: amomax.h.aq a0, a1, (a0)
@@ -15891,67 +20998,67 @@ define i16 @atomicrmw_max_i16_release(ptr %a, i16 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 32
; RV32I-NEXT: ret
;
-; RV32IA-WMO-LABEL: atomicrmw_max_i16_release:
-; RV32IA-WMO: # %bb.0:
-; RV32IA-WMO-NEXT: andi a2, a0, -4
-; RV32IA-WMO-NEXT: slli a0, a0, 3
-; RV32IA-WMO-NEXT: lui a3, 16
-; RV32IA-WMO-NEXT: slli a1, a1, 16
-; RV32IA-WMO-NEXT: li a4, 16
-; RV32IA-WMO-NEXT: andi a5, a0, 24
-; RV32IA-WMO-NEXT: addi a3, a3, -1
-; RV32IA-WMO-NEXT: srai a1, a1, 16
-; RV32IA-WMO-NEXT: sll a3, a3, a0
-; RV32IA-WMO-NEXT: sll a1, a1, a0
-; RV32IA-WMO-NEXT: sub a4, a4, a5
-; RV32IA-WMO-NEXT: .LBB112_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-WMO-NEXT: lr.w a5, (a2)
-; RV32IA-WMO-NEXT: and a7, a5, a3
-; RV32IA-WMO-NEXT: mv a6, a5
-; RV32IA-WMO-NEXT: sll a7, a7, a4
-; RV32IA-WMO-NEXT: sra a7, a7, a4
-; RV32IA-WMO-NEXT: bge a7, a1, .LBB112_3
-; RV32IA-WMO-NEXT: # %bb.2: # in Loop: Header=BB112_1 Depth=1
-; RV32IA-WMO-NEXT: xor a6, a5, a1
-; RV32IA-WMO-NEXT: and a6, a6, a3
-; RV32IA-WMO-NEXT: xor a6, a5, a6
-; RV32IA-WMO-NEXT: .LBB112_3: # in Loop: Header=BB112_1 Depth=1
-; RV32IA-WMO-NEXT: sc.w.rl a6, a6, (a2)
-; RV32IA-WMO-NEXT: bnez a6, .LBB112_1
-; RV32IA-WMO-NEXT: # %bb.4:
-; RV32IA-WMO-NEXT: srl a0, a5, a0
-; RV32IA-WMO-NEXT: ret
+; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_max_i16_release:
+; RV32IA-WMO-NOZACAS: # %bb.0:
+; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-WMO-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-NOZACAS-NEXT: lui a3, 16
+; RV32IA-WMO-NOZACAS-NEXT: slli a1, a1, 16
+; RV32IA-WMO-NOZACAS-NEXT: li a4, 16
+; RV32IA-WMO-NOZACAS-NEXT: andi a5, a0, 24
+; RV32IA-WMO-NOZACAS-NEXT: addi a3, a3, -1
+; RV32IA-WMO-NOZACAS-NEXT: srai a1, a1, 16
+; RV32IA-WMO-NOZACAS-NEXT: sll a3, a3, a0
+; RV32IA-WMO-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-WMO-NOZACAS-NEXT: sub a4, a4, a5
+; RV32IA-WMO-NOZACAS-NEXT: .LBB112_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-NOZACAS-NEXT: lr.w a5, (a2)
+; RV32IA-WMO-NOZACAS-NEXT: and a7, a5, a3
+; RV32IA-WMO-NOZACAS-NEXT: mv a6, a5
+; RV32IA-WMO-NOZACAS-NEXT: sll a7, a7, a4
+; RV32IA-WMO-NOZACAS-NEXT: sra a7, a7, a4
+; RV32IA-WMO-NOZACAS-NEXT: bge a7, a1, .LBB112_3
+; RV32IA-WMO-NOZACAS-NEXT: # %bb.2: # in Loop: Header=BB112_1 Depth=1
+; RV32IA-WMO-NOZACAS-NEXT: xor a6, a5, a1
+; RV32IA-WMO-NOZACAS-NEXT: and a6, a6, a3
+; RV32IA-WMO-NOZACAS-NEXT: xor a6, a5, a6
+; RV32IA-WMO-NOZACAS-NEXT: .LBB112_3: # in Loop: Header=BB112_1 Depth=1
+; RV32IA-WMO-NOZACAS-NEXT: sc.w.rl a6, a6, (a2)
+; RV32IA-WMO-NOZACAS-NEXT: bnez a6, .LBB112_1
+; RV32IA-WMO-NOZACAS-NEXT: # %bb.4:
+; RV32IA-WMO-NOZACAS-NEXT: srl a0, a5, a0
+; RV32IA-WMO-NOZACAS-NEXT: ret
;
-; RV32IA-TSO-LABEL: atomicrmw_max_i16_release:
-; RV32IA-TSO: # %bb.0:
-; RV32IA-TSO-NEXT: andi a2, a0, -4
-; RV32IA-TSO-NEXT: slli a0, a0, 3
-; RV32IA-TSO-NEXT: lui a3, 16
-; RV32IA-TSO-NEXT: slli a1, a1, 16
-; RV32IA-TSO-NEXT: li a4, 16
-; RV32IA-TSO-NEXT: andi a5, a0, 24
-; RV32IA-TSO-NEXT: addi a3, a3, -1
-; RV32IA-TSO-NEXT: srai a1, a1, 16
-; RV32IA-TSO-NEXT: sll a3, a3, a0
-; RV32IA-TSO-NEXT: sll a1, a1, a0
-; RV32IA-TSO-NEXT: sub a4, a4, a5
-; RV32IA-TSO-NEXT: .LBB112_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-TSO-NEXT: lr.w a5, (a2)
-; RV32IA-TSO-NEXT: and a7, a5, a3
-; RV32IA-TSO-NEXT: mv a6, a5
-; RV32IA-TSO-NEXT: sll a7, a7, a4
-; RV32IA-TSO-NEXT: sra a7, a7, a4
-; RV32IA-TSO-NEXT: bge a7, a1, .LBB112_3
-; RV32IA-TSO-NEXT: # %bb.2: # in Loop: Header=BB112_1 Depth=1
-; RV32IA-TSO-NEXT: xor a6, a5, a1
-; RV32IA-TSO-NEXT: and a6, a6, a3
-; RV32IA-TSO-NEXT: xor a6, a5, a6
-; RV32IA-TSO-NEXT: .LBB112_3: # in Loop: Header=BB112_1 Depth=1
-; RV32IA-TSO-NEXT: sc.w a6, a6, (a2)
-; RV32IA-TSO-NEXT: bnez a6, .LBB112_1
-; RV32IA-TSO-NEXT: # %bb.4:
-; RV32IA-TSO-NEXT: srl a0, a5, a0
-; RV32IA-TSO-NEXT: ret
+; RV32IA-TSO-NOZACAS-LABEL: atomicrmw_max_i16_release:
+; RV32IA-TSO-NOZACAS: # %bb.0:
+; RV32IA-TSO-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-TSO-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-NOZACAS-NEXT: lui a3, 16
+; RV32IA-TSO-NOZACAS-NEXT: slli a1, a1, 16
+; RV32IA-TSO-NOZACAS-NEXT: li a4, 16
+; RV32IA-TSO-NOZACAS-NEXT: andi a5, a0, 24
+; RV32IA-TSO-NOZACAS-NEXT: addi a3, a3, -1
+; RV32IA-TSO-NOZACAS-NEXT: srai a1, a1, 16
+; RV32IA-TSO-NOZACAS-NEXT: sll a3, a3, a0
+; RV32IA-TSO-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-TSO-NOZACAS-NEXT: sub a4, a4, a5
+; RV32IA-TSO-NOZACAS-NEXT: .LBB112_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-NOZACAS-NEXT: lr.w a5, (a2)
+; RV32IA-TSO-NOZACAS-NEXT: and a7, a5, a3
+; RV32IA-TSO-NOZACAS-NEXT: mv a6, a5
+; RV32IA-TSO-NOZACAS-NEXT: sll a7, a7, a4
+; RV32IA-TSO-NOZACAS-NEXT: sra a7, a7, a4
+; RV32IA-TSO-NOZACAS-NEXT: bge a7, a1, .LBB112_3
+; RV32IA-TSO-NOZACAS-NEXT: # %bb.2: # in Loop: Header=BB112_1 Depth=1
+; RV32IA-TSO-NOZACAS-NEXT: xor a6, a5, a1
+; RV32IA-TSO-NOZACAS-NEXT: and a6, a6, a3
+; RV32IA-TSO-NOZACAS-NEXT: xor a6, a5, a6
+; RV32IA-TSO-NOZACAS-NEXT: .LBB112_3: # in Loop: Header=BB112_1 Depth=1
+; RV32IA-TSO-NOZACAS-NEXT: sc.w a6, a6, (a2)
+; RV32IA-TSO-NOZACAS-NEXT: bnez a6, .LBB112_1
+; RV32IA-TSO-NOZACAS-NEXT: # %bb.4:
+; RV32IA-TSO-NOZACAS-NEXT: srl a0, a5, a0
+; RV32IA-TSO-NOZACAS-NEXT: ret
;
; RV64I-LABEL: atomicrmw_max_i16_release:
; RV64I: # %bb.0:
@@ -16057,6 +21164,68 @@ define i16 @atomicrmw_max_i16_release(ptr %a, i16 %b) nounwind {
; RV64IA-TSO-NOZACAS-NEXT: srlw a0, a5, a0
; RV64IA-TSO-NOZACAS-NEXT: ret
;
+; RV32IA-WMO-ZACAS-LABEL: atomicrmw_max_i16_release:
+; RV32IA-WMO-ZACAS: # %bb.0:
+; RV32IA-WMO-ZACAS-NEXT: andi a2, a0, -4
+; RV32IA-WMO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-ZACAS-NEXT: lui a3, 16
+; RV32IA-WMO-ZACAS-NEXT: slli a1, a1, 16
+; RV32IA-WMO-ZACAS-NEXT: li a4, 16
+; RV32IA-WMO-ZACAS-NEXT: andi a5, a0, 24
+; RV32IA-WMO-ZACAS-NEXT: addi a3, a3, -1
+; RV32IA-WMO-ZACAS-NEXT: srai a1, a1, 16
+; RV32IA-WMO-ZACAS-NEXT: sll a3, a3, a0
+; RV32IA-WMO-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-WMO-ZACAS-NEXT: sub a4, a4, a5
+; RV32IA-WMO-ZACAS-NEXT: .LBB112_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-ZACAS-NEXT: lr.w a5, (a2)
+; RV32IA-WMO-ZACAS-NEXT: and a7, a5, a3
+; RV32IA-WMO-ZACAS-NEXT: mv a6, a5
+; RV32IA-WMO-ZACAS-NEXT: sll a7, a7, a4
+; RV32IA-WMO-ZACAS-NEXT: sra a7, a7, a4
+; RV32IA-WMO-ZACAS-NEXT: bge a7, a1, .LBB112_3
+; RV32IA-WMO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB112_1 Depth=1
+; RV32IA-WMO-ZACAS-NEXT: xor a6, a5, a1
+; RV32IA-WMO-ZACAS-NEXT: and a6, a6, a3
+; RV32IA-WMO-ZACAS-NEXT: xor a6, a5, a6
+; RV32IA-WMO-ZACAS-NEXT: .LBB112_3: # in Loop: Header=BB112_1 Depth=1
+; RV32IA-WMO-ZACAS-NEXT: sc.w.rl a6, a6, (a2)
+; RV32IA-WMO-ZACAS-NEXT: bnez a6, .LBB112_1
+; RV32IA-WMO-ZACAS-NEXT: # %bb.4:
+; RV32IA-WMO-ZACAS-NEXT: srl a0, a5, a0
+; RV32IA-WMO-ZACAS-NEXT: ret
+;
+; RV32IA-TSO-ZACAS-LABEL: atomicrmw_max_i16_release:
+; RV32IA-TSO-ZACAS: # %bb.0:
+; RV32IA-TSO-ZACAS-NEXT: andi a2, a0, -4
+; RV32IA-TSO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-ZACAS-NEXT: lui a3, 16
+; RV32IA-TSO-ZACAS-NEXT: slli a1, a1, 16
+; RV32IA-TSO-ZACAS-NEXT: li a4, 16
+; RV32IA-TSO-ZACAS-NEXT: andi a5, a0, 24
+; RV32IA-TSO-ZACAS-NEXT: addi a3, a3, -1
+; RV32IA-TSO-ZACAS-NEXT: srai a1, a1, 16
+; RV32IA-TSO-ZACAS-NEXT: sll a3, a3, a0
+; RV32IA-TSO-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-TSO-ZACAS-NEXT: sub a4, a4, a5
+; RV32IA-TSO-ZACAS-NEXT: .LBB112_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-ZACAS-NEXT: lr.w a5, (a2)
+; RV32IA-TSO-ZACAS-NEXT: and a7, a5, a3
+; RV32IA-TSO-ZACAS-NEXT: mv a6, a5
+; RV32IA-TSO-ZACAS-NEXT: sll a7, a7, a4
+; RV32IA-TSO-ZACAS-NEXT: sra a7, a7, a4
+; RV32IA-TSO-ZACAS-NEXT: bge a7, a1, .LBB112_3
+; RV32IA-TSO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB112_1 Depth=1
+; RV32IA-TSO-ZACAS-NEXT: xor a6, a5, a1
+; RV32IA-TSO-ZACAS-NEXT: and a6, a6, a3
+; RV32IA-TSO-ZACAS-NEXT: xor a6, a5, a6
+; RV32IA-TSO-ZACAS-NEXT: .LBB112_3: # in Loop: Header=BB112_1 Depth=1
+; RV32IA-TSO-ZACAS-NEXT: sc.w a6, a6, (a2)
+; RV32IA-TSO-ZACAS-NEXT: bnez a6, .LBB112_1
+; RV32IA-TSO-ZACAS-NEXT: # %bb.4:
+; RV32IA-TSO-ZACAS-NEXT: srl a0, a5, a0
+; RV32IA-TSO-ZACAS-NEXT: ret
+;
; RV64IA-WMO-ZACAS-LABEL: atomicrmw_max_i16_release:
; RV64IA-WMO-ZACAS: # %bb.0:
; RV64IA-WMO-ZACAS-NEXT: andi a2, a0, -4
@@ -16119,6 +21288,16 @@ define i16 @atomicrmw_max_i16_release(ptr %a, i16 %b) nounwind {
; RV64IA-TSO-ZACAS-NEXT: srlw a0, a5, a0
; RV64IA-TSO-ZACAS-NEXT: ret
;
+; RV32IA-WMO-ZABHA-LABEL: atomicrmw_max_i16_release:
+; RV32IA-WMO-ZABHA: # %bb.0:
+; RV32IA-WMO-ZABHA-NEXT: amomax.h.rl a0, a1, (a0)
+; RV32IA-WMO-ZABHA-NEXT: ret
+;
+; RV32IA-TSO-ZABHA-LABEL: atomicrmw_max_i16_release:
+; RV32IA-TSO-ZABHA: # %bb.0:
+; RV32IA-TSO-ZABHA-NEXT: amomax.h a0, a1, (a0)
+; RV32IA-TSO-ZABHA-NEXT: ret
+;
; RV64IA-WMO-ZABHA-LABEL: atomicrmw_max_i16_release:
; RV64IA-WMO-ZABHA: # %bb.0:
; RV64IA-WMO-ZABHA-NEXT: amomax.h.rl a0, a1, (a0)
@@ -16175,67 +21354,67 @@ define i16 @atomicrmw_max_i16_acq_rel(ptr %a, i16 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 32
; RV32I-NEXT: ret
;
-; RV32IA-WMO-LABEL: atomicrmw_max_i16_acq_rel:
-; RV32IA-WMO: # %bb.0:
-; RV32IA-WMO-NEXT: andi a2, a0, -4
-; RV32IA-WMO-NEXT: slli a0, a0, 3
-; RV32IA-WMO-NEXT: lui a3, 16
-; RV32IA-WMO-NEXT: slli a1, a1, 16
-; RV32IA-WMO-NEXT: li a4, 16
-; RV32IA-WMO-NEXT: andi a5, a0, 24
-; RV32IA-WMO-NEXT: addi a3, a3, -1
-; RV32IA-WMO-NEXT: srai a1, a1, 16
-; RV32IA-WMO-NEXT: sll a3, a3, a0
-; RV32IA-WMO-NEXT: sll a1, a1, a0
-; RV32IA-WMO-NEXT: sub a4, a4, a5
-; RV32IA-WMO-NEXT: .LBB113_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-WMO-NEXT: lr.w.aq a5, (a2)
-; RV32IA-WMO-NEXT: and a7, a5, a3
-; RV32IA-WMO-NEXT: mv a6, a5
-; RV32IA-WMO-NEXT: sll a7, a7, a4
-; RV32IA-WMO-NEXT: sra a7, a7, a4
-; RV32IA-WMO-NEXT: bge a7, a1, .LBB113_3
-; RV32IA-WMO-NEXT: # %bb.2: # in Loop: Header=BB113_1 Depth=1
-; RV32IA-WMO-NEXT: xor a6, a5, a1
-; RV32IA-WMO-NEXT: and a6, a6, a3
-; RV32IA-WMO-NEXT: xor a6, a5, a6
-; RV32IA-WMO-NEXT: .LBB113_3: # in Loop: Header=BB113_1 Depth=1
-; RV32IA-WMO-NEXT: sc.w.rl a6, a6, (a2)
-; RV32IA-WMO-NEXT: bnez a6, .LBB113_1
-; RV32IA-WMO-NEXT: # %bb.4:
-; RV32IA-WMO-NEXT: srl a0, a5, a0
-; RV32IA-WMO-NEXT: ret
+; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_max_i16_acq_rel:
+; RV32IA-WMO-NOZACAS: # %bb.0:
+; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-WMO-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-NOZACAS-NEXT: lui a3, 16
+; RV32IA-WMO-NOZACAS-NEXT: slli a1, a1, 16
+; RV32IA-WMO-NOZACAS-NEXT: li a4, 16
+; RV32IA-WMO-NOZACAS-NEXT: andi a5, a0, 24
+; RV32IA-WMO-NOZACAS-NEXT: addi a3, a3, -1
+; RV32IA-WMO-NOZACAS-NEXT: srai a1, a1, 16
+; RV32IA-WMO-NOZACAS-NEXT: sll a3, a3, a0
+; RV32IA-WMO-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-WMO-NOZACAS-NEXT: sub a4, a4, a5
+; RV32IA-WMO-NOZACAS-NEXT: .LBB113_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-NOZACAS-NEXT: lr.w.aq a5, (a2)
+; RV32IA-WMO-NOZACAS-NEXT: and a7, a5, a3
+; RV32IA-WMO-NOZACAS-NEXT: mv a6, a5
+; RV32IA-WMO-NOZACAS-NEXT: sll a7, a7, a4
+; RV32IA-WMO-NOZACAS-NEXT: sra a7, a7, a4
+; RV32IA-WMO-NOZACAS-NEXT: bge a7, a1, .LBB113_3
+; RV32IA-WMO-NOZACAS-NEXT: # %bb.2: # in Loop: Header=BB113_1 Depth=1
+; RV32IA-WMO-NOZACAS-NEXT: xor a6, a5, a1
+; RV32IA-WMO-NOZACAS-NEXT: and a6, a6, a3
+; RV32IA-WMO-NOZACAS-NEXT: xor a6, a5, a6
+; RV32IA-WMO-NOZACAS-NEXT: .LBB113_3: # in Loop: Header=BB113_1 Depth=1
+; RV32IA-WMO-NOZACAS-NEXT: sc.w.rl a6, a6, (a2)
+; RV32IA-WMO-NOZACAS-NEXT: bnez a6, .LBB113_1
+; RV32IA-WMO-NOZACAS-NEXT: # %bb.4:
+; RV32IA-WMO-NOZACAS-NEXT: srl a0, a5, a0
+; RV32IA-WMO-NOZACAS-NEXT: ret
;
-; RV32IA-TSO-LABEL: atomicrmw_max_i16_acq_rel:
-; RV32IA-TSO: # %bb.0:
-; RV32IA-TSO-NEXT: andi a2, a0, -4
-; RV32IA-TSO-NEXT: slli a0, a0, 3
-; RV32IA-TSO-NEXT: lui a3, 16
-; RV32IA-TSO-NEXT: slli a1, a1, 16
-; RV32IA-TSO-NEXT: li a4, 16
-; RV32IA-TSO-NEXT: andi a5, a0, 24
-; RV32IA-TSO-NEXT: addi a3, a3, -1
-; RV32IA-TSO-NEXT: srai a1, a1, 16
-; RV32IA-TSO-NEXT: sll a3, a3, a0
-; RV32IA-TSO-NEXT: sll a1, a1, a0
-; RV32IA-TSO-NEXT: sub a4, a4, a5
-; RV32IA-TSO-NEXT: .LBB113_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-TSO-NEXT: lr.w a5, (a2)
-; RV32IA-TSO-NEXT: and a7, a5, a3
-; RV32IA-TSO-NEXT: mv a6, a5
-; RV32IA-TSO-NEXT: sll a7, a7, a4
-; RV32IA-TSO-NEXT: sra a7, a7, a4
-; RV32IA-TSO-NEXT: bge a7, a1, .LBB113_3
-; RV32IA-TSO-NEXT: # %bb.2: # in Loop: Header=BB113_1 Depth=1
-; RV32IA-TSO-NEXT: xor a6, a5, a1
-; RV32IA-TSO-NEXT: and a6, a6, a3
-; RV32IA-TSO-NEXT: xor a6, a5, a6
-; RV32IA-TSO-NEXT: .LBB113_3: # in Loop: Header=BB113_1 Depth=1
-; RV32IA-TSO-NEXT: sc.w a6, a6, (a2)
-; RV32IA-TSO-NEXT: bnez a6, .LBB113_1
-; RV32IA-TSO-NEXT: # %bb.4:
-; RV32IA-TSO-NEXT: srl a0, a5, a0
-; RV32IA-TSO-NEXT: ret
+; RV32IA-TSO-NOZACAS-LABEL: atomicrmw_max_i16_acq_rel:
+; RV32IA-TSO-NOZACAS: # %bb.0:
+; RV32IA-TSO-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-TSO-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-NOZACAS-NEXT: lui a3, 16
+; RV32IA-TSO-NOZACAS-NEXT: slli a1, a1, 16
+; RV32IA-TSO-NOZACAS-NEXT: li a4, 16
+; RV32IA-TSO-NOZACAS-NEXT: andi a5, a0, 24
+; RV32IA-TSO-NOZACAS-NEXT: addi a3, a3, -1
+; RV32IA-TSO-NOZACAS-NEXT: srai a1, a1, 16
+; RV32IA-TSO-NOZACAS-NEXT: sll a3, a3, a0
+; RV32IA-TSO-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-TSO-NOZACAS-NEXT: sub a4, a4, a5
+; RV32IA-TSO-NOZACAS-NEXT: .LBB113_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-NOZACAS-NEXT: lr.w a5, (a2)
+; RV32IA-TSO-NOZACAS-NEXT: and a7, a5, a3
+; RV32IA-TSO-NOZACAS-NEXT: mv a6, a5
+; RV32IA-TSO-NOZACAS-NEXT: sll a7, a7, a4
+; RV32IA-TSO-NOZACAS-NEXT: sra a7, a7, a4
+; RV32IA-TSO-NOZACAS-NEXT: bge a7, a1, .LBB113_3
+; RV32IA-TSO-NOZACAS-NEXT: # %bb.2: # in Loop: Header=BB113_1 Depth=1
+; RV32IA-TSO-NOZACAS-NEXT: xor a6, a5, a1
+; RV32IA-TSO-NOZACAS-NEXT: and a6, a6, a3
+; RV32IA-TSO-NOZACAS-NEXT: xor a6, a5, a6
+; RV32IA-TSO-NOZACAS-NEXT: .LBB113_3: # in Loop: Header=BB113_1 Depth=1
+; RV32IA-TSO-NOZACAS-NEXT: sc.w a6, a6, (a2)
+; RV32IA-TSO-NOZACAS-NEXT: bnez a6, .LBB113_1
+; RV32IA-TSO-NOZACAS-NEXT: # %bb.4:
+; RV32IA-TSO-NOZACAS-NEXT: srl a0, a5, a0
+; RV32IA-TSO-NOZACAS-NEXT: ret
;
; RV64I-LABEL: atomicrmw_max_i16_acq_rel:
; RV64I: # %bb.0:
@@ -16341,6 +21520,68 @@ define i16 @atomicrmw_max_i16_acq_rel(ptr %a, i16 %b) nounwind {
; RV64IA-TSO-NOZACAS-NEXT: srlw a0, a5, a0
; RV64IA-TSO-NOZACAS-NEXT: ret
;
+; RV32IA-WMO-ZACAS-LABEL: atomicrmw_max_i16_acq_rel:
+; RV32IA-WMO-ZACAS: # %bb.0:
+; RV32IA-WMO-ZACAS-NEXT: andi a2, a0, -4
+; RV32IA-WMO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-ZACAS-NEXT: lui a3, 16
+; RV32IA-WMO-ZACAS-NEXT: slli a1, a1, 16
+; RV32IA-WMO-ZACAS-NEXT: li a4, 16
+; RV32IA-WMO-ZACAS-NEXT: andi a5, a0, 24
+; RV32IA-WMO-ZACAS-NEXT: addi a3, a3, -1
+; RV32IA-WMO-ZACAS-NEXT: srai a1, a1, 16
+; RV32IA-WMO-ZACAS-NEXT: sll a3, a3, a0
+; RV32IA-WMO-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-WMO-ZACAS-NEXT: sub a4, a4, a5
+; RV32IA-WMO-ZACAS-NEXT: .LBB113_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-ZACAS-NEXT: lr.w.aq a5, (a2)
+; RV32IA-WMO-ZACAS-NEXT: and a7, a5, a3
+; RV32IA-WMO-ZACAS-NEXT: mv a6, a5
+; RV32IA-WMO-ZACAS-NEXT: sll a7, a7, a4
+; RV32IA-WMO-ZACAS-NEXT: sra a7, a7, a4
+; RV32IA-WMO-ZACAS-NEXT: bge a7, a1, .LBB113_3
+; RV32IA-WMO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB113_1 Depth=1
+; RV32IA-WMO-ZACAS-NEXT: xor a6, a5, a1
+; RV32IA-WMO-ZACAS-NEXT: and a6, a6, a3
+; RV32IA-WMO-ZACAS-NEXT: xor a6, a5, a6
+; RV32IA-WMO-ZACAS-NEXT: .LBB113_3: # in Loop: Header=BB113_1 Depth=1
+; RV32IA-WMO-ZACAS-NEXT: sc.w.rl a6, a6, (a2)
+; RV32IA-WMO-ZACAS-NEXT: bnez a6, .LBB113_1
+; RV32IA-WMO-ZACAS-NEXT: # %bb.4:
+; RV32IA-WMO-ZACAS-NEXT: srl a0, a5, a0
+; RV32IA-WMO-ZACAS-NEXT: ret
+;
+; RV32IA-TSO-ZACAS-LABEL: atomicrmw_max_i16_acq_rel:
+; RV32IA-TSO-ZACAS: # %bb.0:
+; RV32IA-TSO-ZACAS-NEXT: andi a2, a0, -4
+; RV32IA-TSO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-ZACAS-NEXT: lui a3, 16
+; RV32IA-TSO-ZACAS-NEXT: slli a1, a1, 16
+; RV32IA-TSO-ZACAS-NEXT: li a4, 16
+; RV32IA-TSO-ZACAS-NEXT: andi a5, a0, 24
+; RV32IA-TSO-ZACAS-NEXT: addi a3, a3, -1
+; RV32IA-TSO-ZACAS-NEXT: srai a1, a1, 16
+; RV32IA-TSO-ZACAS-NEXT: sll a3, a3, a0
+; RV32IA-TSO-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-TSO-ZACAS-NEXT: sub a4, a4, a5
+; RV32IA-TSO-ZACAS-NEXT: .LBB113_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-ZACAS-NEXT: lr.w a5, (a2)
+; RV32IA-TSO-ZACAS-NEXT: and a7, a5, a3
+; RV32IA-TSO-ZACAS-NEXT: mv a6, a5
+; RV32IA-TSO-ZACAS-NEXT: sll a7, a7, a4
+; RV32IA-TSO-ZACAS-NEXT: sra a7, a7, a4
+; RV32IA-TSO-ZACAS-NEXT: bge a7, a1, .LBB113_3
+; RV32IA-TSO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB113_1 Depth=1
+; RV32IA-TSO-ZACAS-NEXT: xor a6, a5, a1
+; RV32IA-TSO-ZACAS-NEXT: and a6, a6, a3
+; RV32IA-TSO-ZACAS-NEXT: xor a6, a5, a6
+; RV32IA-TSO-ZACAS-NEXT: .LBB113_3: # in Loop: Header=BB113_1 Depth=1
+; RV32IA-TSO-ZACAS-NEXT: sc.w a6, a6, (a2)
+; RV32IA-TSO-ZACAS-NEXT: bnez a6, .LBB113_1
+; RV32IA-TSO-ZACAS-NEXT: # %bb.4:
+; RV32IA-TSO-ZACAS-NEXT: srl a0, a5, a0
+; RV32IA-TSO-ZACAS-NEXT: ret
+;
; RV64IA-WMO-ZACAS-LABEL: atomicrmw_max_i16_acq_rel:
; RV64IA-WMO-ZACAS: # %bb.0:
; RV64IA-WMO-ZACAS-NEXT: andi a2, a0, -4
@@ -16403,6 +21644,16 @@ define i16 @atomicrmw_max_i16_acq_rel(ptr %a, i16 %b) nounwind {
; RV64IA-TSO-ZACAS-NEXT: srlw a0, a5, a0
; RV64IA-TSO-ZACAS-NEXT: ret
;
+; RV32IA-WMO-ZABHA-LABEL: atomicrmw_max_i16_acq_rel:
+; RV32IA-WMO-ZABHA: # %bb.0:
+; RV32IA-WMO-ZABHA-NEXT: amomax.h.aqrl a0, a1, (a0)
+; RV32IA-WMO-ZABHA-NEXT: ret
+;
+; RV32IA-TSO-ZABHA-LABEL: atomicrmw_max_i16_acq_rel:
+; RV32IA-TSO-ZABHA: # %bb.0:
+; RV32IA-TSO-ZABHA-NEXT: amomax.h a0, a1, (a0)
+; RV32IA-TSO-ZABHA-NEXT: ret
+;
; RV64IA-WMO-ZABHA-LABEL: atomicrmw_max_i16_acq_rel:
; RV64IA-WMO-ZABHA: # %bb.0:
; RV64IA-WMO-ZABHA-NEXT: amomax.h.aqrl a0, a1, (a0)
@@ -16459,36 +21710,36 @@ define i16 @atomicrmw_max_i16_seq_cst(ptr %a, i16 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 32
; RV32I-NEXT: ret
;
-; RV32IA-LABEL: atomicrmw_max_i16_seq_cst:
-; RV32IA: # %bb.0:
-; RV32IA-NEXT: andi a2, a0, -4
-; RV32IA-NEXT: slli a0, a0, 3
-; RV32IA-NEXT: lui a3, 16
-; RV32IA-NEXT: slli a1, a1, 16
-; RV32IA-NEXT: li a4, 16
-; RV32IA-NEXT: andi a5, a0, 24
-; RV32IA-NEXT: addi a3, a3, -1
-; RV32IA-NEXT: srai a1, a1, 16
-; RV32IA-NEXT: sll a3, a3, a0
-; RV32IA-NEXT: sll a1, a1, a0
-; RV32IA-NEXT: sub a4, a4, a5
-; RV32IA-NEXT: .LBB114_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-NEXT: lr.w.aqrl a5, (a2)
-; RV32IA-NEXT: and a7, a5, a3
-; RV32IA-NEXT: mv a6, a5
-; RV32IA-NEXT: sll a7, a7, a4
-; RV32IA-NEXT: sra a7, a7, a4
-; RV32IA-NEXT: bge a7, a1, .LBB114_3
-; RV32IA-NEXT: # %bb.2: # in Loop: Header=BB114_1 Depth=1
-; RV32IA-NEXT: xor a6, a5, a1
-; RV32IA-NEXT: and a6, a6, a3
-; RV32IA-NEXT: xor a6, a5, a6
-; RV32IA-NEXT: .LBB114_3: # in Loop: Header=BB114_1 Depth=1
-; RV32IA-NEXT: sc.w.rl a6, a6, (a2)
-; RV32IA-NEXT: bnez a6, .LBB114_1
-; RV32IA-NEXT: # %bb.4:
-; RV32IA-NEXT: srl a0, a5, a0
-; RV32IA-NEXT: ret
+; RV32IA-NOZACAS-LABEL: atomicrmw_max_i16_seq_cst:
+; RV32IA-NOZACAS: # %bb.0:
+; RV32IA-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-NOZACAS-NEXT: lui a3, 16
+; RV32IA-NOZACAS-NEXT: slli a1, a1, 16
+; RV32IA-NOZACAS-NEXT: li a4, 16
+; RV32IA-NOZACAS-NEXT: andi a5, a0, 24
+; RV32IA-NOZACAS-NEXT: addi a3, a3, -1
+; RV32IA-NOZACAS-NEXT: srai a1, a1, 16
+; RV32IA-NOZACAS-NEXT: sll a3, a3, a0
+; RV32IA-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-NOZACAS-NEXT: sub a4, a4, a5
+; RV32IA-NOZACAS-NEXT: .LBB114_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-NOZACAS-NEXT: lr.w.aqrl a5, (a2)
+; RV32IA-NOZACAS-NEXT: and a7, a5, a3
+; RV32IA-NOZACAS-NEXT: mv a6, a5
+; RV32IA-NOZACAS-NEXT: sll a7, a7, a4
+; RV32IA-NOZACAS-NEXT: sra a7, a7, a4
+; RV32IA-NOZACAS-NEXT: bge a7, a1, .LBB114_3
+; RV32IA-NOZACAS-NEXT: # %bb.2: # in Loop: Header=BB114_1 Depth=1
+; RV32IA-NOZACAS-NEXT: xor a6, a5, a1
+; RV32IA-NOZACAS-NEXT: and a6, a6, a3
+; RV32IA-NOZACAS-NEXT: xor a6, a5, a6
+; RV32IA-NOZACAS-NEXT: .LBB114_3: # in Loop: Header=BB114_1 Depth=1
+; RV32IA-NOZACAS-NEXT: sc.w.rl a6, a6, (a2)
+; RV32IA-NOZACAS-NEXT: bnez a6, .LBB114_1
+; RV32IA-NOZACAS-NEXT: # %bb.4:
+; RV32IA-NOZACAS-NEXT: srl a0, a5, a0
+; RV32IA-NOZACAS-NEXT: ret
;
; RV64I-LABEL: atomicrmw_max_i16_seq_cst:
; RV64I: # %bb.0:
@@ -16563,6 +21814,37 @@ define i16 @atomicrmw_max_i16_seq_cst(ptr %a, i16 %b) nounwind {
; RV64IA-NOZACAS-NEXT: srlw a0, a5, a0
; RV64IA-NOZACAS-NEXT: ret
;
+; RV32IA-ZACAS-LABEL: atomicrmw_max_i16_seq_cst:
+; RV32IA-ZACAS: # %bb.0:
+; RV32IA-ZACAS-NEXT: andi a2, a0, -4
+; RV32IA-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-ZACAS-NEXT: lui a3, 16
+; RV32IA-ZACAS-NEXT: slli a1, a1, 16
+; RV32IA-ZACAS-NEXT: li a4, 16
+; RV32IA-ZACAS-NEXT: andi a5, a0, 24
+; RV32IA-ZACAS-NEXT: addi a3, a3, -1
+; RV32IA-ZACAS-NEXT: srai a1, a1, 16
+; RV32IA-ZACAS-NEXT: sll a3, a3, a0
+; RV32IA-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-ZACAS-NEXT: sub a4, a4, a5
+; RV32IA-ZACAS-NEXT: .LBB114_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-ZACAS-NEXT: lr.w.aqrl a5, (a2)
+; RV32IA-ZACAS-NEXT: and a7, a5, a3
+; RV32IA-ZACAS-NEXT: mv a6, a5
+; RV32IA-ZACAS-NEXT: sll a7, a7, a4
+; RV32IA-ZACAS-NEXT: sra a7, a7, a4
+; RV32IA-ZACAS-NEXT: bge a7, a1, .LBB114_3
+; RV32IA-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB114_1 Depth=1
+; RV32IA-ZACAS-NEXT: xor a6, a5, a1
+; RV32IA-ZACAS-NEXT: and a6, a6, a3
+; RV32IA-ZACAS-NEXT: xor a6, a5, a6
+; RV32IA-ZACAS-NEXT: .LBB114_3: # in Loop: Header=BB114_1 Depth=1
+; RV32IA-ZACAS-NEXT: sc.w.rl a6, a6, (a2)
+; RV32IA-ZACAS-NEXT: bnez a6, .LBB114_1
+; RV32IA-ZACAS-NEXT: # %bb.4:
+; RV32IA-ZACAS-NEXT: srl a0, a5, a0
+; RV32IA-ZACAS-NEXT: ret
+;
; RV64IA-ZACAS-LABEL: atomicrmw_max_i16_seq_cst:
; RV64IA-ZACAS: # %bb.0:
; RV64IA-ZACAS-NEXT: andi a2, a0, -4
@@ -16594,6 +21876,16 @@ define i16 @atomicrmw_max_i16_seq_cst(ptr %a, i16 %b) nounwind {
; RV64IA-ZACAS-NEXT: srlw a0, a5, a0
; RV64IA-ZACAS-NEXT: ret
;
+; RV32IA-WMO-ZABHA-LABEL: atomicrmw_max_i16_seq_cst:
+; RV32IA-WMO-ZABHA: # %bb.0:
+; RV32IA-WMO-ZABHA-NEXT: amomax.h.aqrl a0, a1, (a0)
+; RV32IA-WMO-ZABHA-NEXT: ret
+;
+; RV32IA-TSO-ZABHA-LABEL: atomicrmw_max_i16_seq_cst:
+; RV32IA-TSO-ZABHA: # %bb.0:
+; RV32IA-TSO-ZABHA-NEXT: amomax.h a0, a1, (a0)
+; RV32IA-TSO-ZABHA-NEXT: ret
+;
; RV64IA-WMO-ZABHA-LABEL: atomicrmw_max_i16_seq_cst:
; RV64IA-WMO-ZABHA: # %bb.0:
; RV64IA-WMO-ZABHA-NEXT: amomax.h.aqrl a0, a1, (a0)
@@ -16650,36 +21942,36 @@ define i16 @atomicrmw_min_i16_monotonic(ptr %a, i16 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 32
; RV32I-NEXT: ret
;
-; RV32IA-LABEL: atomicrmw_min_i16_monotonic:
-; RV32IA: # %bb.0:
-; RV32IA-NEXT: andi a2, a0, -4
-; RV32IA-NEXT: slli a0, a0, 3
-; RV32IA-NEXT: lui a3, 16
-; RV32IA-NEXT: slli a1, a1, 16
-; RV32IA-NEXT: li a4, 16
-; RV32IA-NEXT: andi a5, a0, 24
-; RV32IA-NEXT: addi a3, a3, -1
-; RV32IA-NEXT: srai a1, a1, 16
-; RV32IA-NEXT: sll a3, a3, a0
-; RV32IA-NEXT: sll a1, a1, a0
-; RV32IA-NEXT: sub a4, a4, a5
-; RV32IA-NEXT: .LBB115_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-NEXT: lr.w a5, (a2)
-; RV32IA-NEXT: and a7, a5, a3
-; RV32IA-NEXT: mv a6, a5
-; RV32IA-NEXT: sll a7, a7, a4
-; RV32IA-NEXT: sra a7, a7, a4
-; RV32IA-NEXT: bge a1, a7, .LBB115_3
-; RV32IA-NEXT: # %bb.2: # in Loop: Header=BB115_1 Depth=1
-; RV32IA-NEXT: xor a6, a5, a1
-; RV32IA-NEXT: and a6, a6, a3
-; RV32IA-NEXT: xor a6, a5, a6
-; RV32IA-NEXT: .LBB115_3: # in Loop: Header=BB115_1 Depth=1
-; RV32IA-NEXT: sc.w a6, a6, (a2)
-; RV32IA-NEXT: bnez a6, .LBB115_1
-; RV32IA-NEXT: # %bb.4:
-; RV32IA-NEXT: srl a0, a5, a0
-; RV32IA-NEXT: ret
+; RV32IA-NOZACAS-LABEL: atomicrmw_min_i16_monotonic:
+; RV32IA-NOZACAS: # %bb.0:
+; RV32IA-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-NOZACAS-NEXT: lui a3, 16
+; RV32IA-NOZACAS-NEXT: slli a1, a1, 16
+; RV32IA-NOZACAS-NEXT: li a4, 16
+; RV32IA-NOZACAS-NEXT: andi a5, a0, 24
+; RV32IA-NOZACAS-NEXT: addi a3, a3, -1
+; RV32IA-NOZACAS-NEXT: srai a1, a1, 16
+; RV32IA-NOZACAS-NEXT: sll a3, a3, a0
+; RV32IA-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-NOZACAS-NEXT: sub a4, a4, a5
+; RV32IA-NOZACAS-NEXT: .LBB115_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-NOZACAS-NEXT: lr.w a5, (a2)
+; RV32IA-NOZACAS-NEXT: and a7, a5, a3
+; RV32IA-NOZACAS-NEXT: mv a6, a5
+; RV32IA-NOZACAS-NEXT: sll a7, a7, a4
+; RV32IA-NOZACAS-NEXT: sra a7, a7, a4
+; RV32IA-NOZACAS-NEXT: bge a1, a7, .LBB115_3
+; RV32IA-NOZACAS-NEXT: # %bb.2: # in Loop: Header=BB115_1 Depth=1
+; RV32IA-NOZACAS-NEXT: xor a6, a5, a1
+; RV32IA-NOZACAS-NEXT: and a6, a6, a3
+; RV32IA-NOZACAS-NEXT: xor a6, a5, a6
+; RV32IA-NOZACAS-NEXT: .LBB115_3: # in Loop: Header=BB115_1 Depth=1
+; RV32IA-NOZACAS-NEXT: sc.w a6, a6, (a2)
+; RV32IA-NOZACAS-NEXT: bnez a6, .LBB115_1
+; RV32IA-NOZACAS-NEXT: # %bb.4:
+; RV32IA-NOZACAS-NEXT: srl a0, a5, a0
+; RV32IA-NOZACAS-NEXT: ret
;
; RV64I-LABEL: atomicrmw_min_i16_monotonic:
; RV64I: # %bb.0:
@@ -16754,6 +22046,37 @@ define i16 @atomicrmw_min_i16_monotonic(ptr %a, i16 %b) nounwind {
; RV64IA-NOZACAS-NEXT: srlw a0, a5, a0
; RV64IA-NOZACAS-NEXT: ret
;
+; RV32IA-ZACAS-LABEL: atomicrmw_min_i16_monotonic:
+; RV32IA-ZACAS: # %bb.0:
+; RV32IA-ZACAS-NEXT: andi a2, a0, -4
+; RV32IA-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-ZACAS-NEXT: lui a3, 16
+; RV32IA-ZACAS-NEXT: slli a1, a1, 16
+; RV32IA-ZACAS-NEXT: li a4, 16
+; RV32IA-ZACAS-NEXT: andi a5, a0, 24
+; RV32IA-ZACAS-NEXT: addi a3, a3, -1
+; RV32IA-ZACAS-NEXT: srai a1, a1, 16
+; RV32IA-ZACAS-NEXT: sll a3, a3, a0
+; RV32IA-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-ZACAS-NEXT: sub a4, a4, a5
+; RV32IA-ZACAS-NEXT: .LBB115_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-ZACAS-NEXT: lr.w a5, (a2)
+; RV32IA-ZACAS-NEXT: and a7, a5, a3
+; RV32IA-ZACAS-NEXT: mv a6, a5
+; RV32IA-ZACAS-NEXT: sll a7, a7, a4
+; RV32IA-ZACAS-NEXT: sra a7, a7, a4
+; RV32IA-ZACAS-NEXT: bge a1, a7, .LBB115_3
+; RV32IA-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB115_1 Depth=1
+; RV32IA-ZACAS-NEXT: xor a6, a5, a1
+; RV32IA-ZACAS-NEXT: and a6, a6, a3
+; RV32IA-ZACAS-NEXT: xor a6, a5, a6
+; RV32IA-ZACAS-NEXT: .LBB115_3: # in Loop: Header=BB115_1 Depth=1
+; RV32IA-ZACAS-NEXT: sc.w a6, a6, (a2)
+; RV32IA-ZACAS-NEXT: bnez a6, .LBB115_1
+; RV32IA-ZACAS-NEXT: # %bb.4:
+; RV32IA-ZACAS-NEXT: srl a0, a5, a0
+; RV32IA-ZACAS-NEXT: ret
+;
; RV64IA-ZACAS-LABEL: atomicrmw_min_i16_monotonic:
; RV64IA-ZACAS: # %bb.0:
; RV64IA-ZACAS-NEXT: andi a2, a0, -4
@@ -16785,6 +22108,16 @@ define i16 @atomicrmw_min_i16_monotonic(ptr %a, i16 %b) nounwind {
; RV64IA-ZACAS-NEXT: srlw a0, a5, a0
; RV64IA-ZACAS-NEXT: ret
;
+; RV32IA-WMO-ZABHA-LABEL: atomicrmw_min_i16_monotonic:
+; RV32IA-WMO-ZABHA: # %bb.0:
+; RV32IA-WMO-ZABHA-NEXT: amomin.h a0, a1, (a0)
+; RV32IA-WMO-ZABHA-NEXT: ret
+;
+; RV32IA-TSO-ZABHA-LABEL: atomicrmw_min_i16_monotonic:
+; RV32IA-TSO-ZABHA: # %bb.0:
+; RV32IA-TSO-ZABHA-NEXT: amomin.h a0, a1, (a0)
+; RV32IA-TSO-ZABHA-NEXT: ret
+;
; RV64IA-WMO-ZABHA-LABEL: atomicrmw_min_i16_monotonic:
; RV64IA-WMO-ZABHA: # %bb.0:
; RV64IA-WMO-ZABHA-NEXT: amomin.h a0, a1, (a0)
@@ -16841,67 +22174,67 @@ define i16 @atomicrmw_min_i16_acquire(ptr %a, i16 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 32
; RV32I-NEXT: ret
;
-; RV32IA-WMO-LABEL: atomicrmw_min_i16_acquire:
-; RV32IA-WMO: # %bb.0:
-; RV32IA-WMO-NEXT: andi a2, a0, -4
-; RV32IA-WMO-NEXT: slli a0, a0, 3
-; RV32IA-WMO-NEXT: lui a3, 16
-; RV32IA-WMO-NEXT: slli a1, a1, 16
-; RV32IA-WMO-NEXT: li a4, 16
-; RV32IA-WMO-NEXT: andi a5, a0, 24
-; RV32IA-WMO-NEXT: addi a3, a3, -1
-; RV32IA-WMO-NEXT: srai a1, a1, 16
-; RV32IA-WMO-NEXT: sll a3, a3, a0
-; RV32IA-WMO-NEXT: sll a1, a1, a0
-; RV32IA-WMO-NEXT: sub a4, a4, a5
-; RV32IA-WMO-NEXT: .LBB116_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-WMO-NEXT: lr.w.aq a5, (a2)
-; RV32IA-WMO-NEXT: and a7, a5, a3
-; RV32IA-WMO-NEXT: mv a6, a5
-; RV32IA-WMO-NEXT: sll a7, a7, a4
-; RV32IA-WMO-NEXT: sra a7, a7, a4
-; RV32IA-WMO-NEXT: bge a1, a7, .LBB116_3
-; RV32IA-WMO-NEXT: # %bb.2: # in Loop: Header=BB116_1 Depth=1
-; RV32IA-WMO-NEXT: xor a6, a5, a1
-; RV32IA-WMO-NEXT: and a6, a6, a3
-; RV32IA-WMO-NEXT: xor a6, a5, a6
-; RV32IA-WMO-NEXT: .LBB116_3: # in Loop: Header=BB116_1 Depth=1
-; RV32IA-WMO-NEXT: sc.w a6, a6, (a2)
-; RV32IA-WMO-NEXT: bnez a6, .LBB116_1
-; RV32IA-WMO-NEXT: # %bb.4:
-; RV32IA-WMO-NEXT: srl a0, a5, a0
-; RV32IA-WMO-NEXT: ret
+; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_min_i16_acquire:
+; RV32IA-WMO-NOZACAS: # %bb.0:
+; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-WMO-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-NOZACAS-NEXT: lui a3, 16
+; RV32IA-WMO-NOZACAS-NEXT: slli a1, a1, 16
+; RV32IA-WMO-NOZACAS-NEXT: li a4, 16
+; RV32IA-WMO-NOZACAS-NEXT: andi a5, a0, 24
+; RV32IA-WMO-NOZACAS-NEXT: addi a3, a3, -1
+; RV32IA-WMO-NOZACAS-NEXT: srai a1, a1, 16
+; RV32IA-WMO-NOZACAS-NEXT: sll a3, a3, a0
+; RV32IA-WMO-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-WMO-NOZACAS-NEXT: sub a4, a4, a5
+; RV32IA-WMO-NOZACAS-NEXT: .LBB116_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-NOZACAS-NEXT: lr.w.aq a5, (a2)
+; RV32IA-WMO-NOZACAS-NEXT: and a7, a5, a3
+; RV32IA-WMO-NOZACAS-NEXT: mv a6, a5
+; RV32IA-WMO-NOZACAS-NEXT: sll a7, a7, a4
+; RV32IA-WMO-NOZACAS-NEXT: sra a7, a7, a4
+; RV32IA-WMO-NOZACAS-NEXT: bge a1, a7, .LBB116_3
+; RV32IA-WMO-NOZACAS-NEXT: # %bb.2: # in Loop: Header=BB116_1 Depth=1
+; RV32IA-WMO-NOZACAS-NEXT: xor a6, a5, a1
+; RV32IA-WMO-NOZACAS-NEXT: and a6, a6, a3
+; RV32IA-WMO-NOZACAS-NEXT: xor a6, a5, a6
+; RV32IA-WMO-NOZACAS-NEXT: .LBB116_3: # in Loop: Header=BB116_1 Depth=1
+; RV32IA-WMO-NOZACAS-NEXT: sc.w a6, a6, (a2)
+; RV32IA-WMO-NOZACAS-NEXT: bnez a6, .LBB116_1
+; RV32IA-WMO-NOZACAS-NEXT: # %bb.4:
+; RV32IA-WMO-NOZACAS-NEXT: srl a0, a5, a0
+; RV32IA-WMO-NOZACAS-NEXT: ret
;
-; RV32IA-TSO-LABEL: atomicrmw_min_i16_acquire:
-; RV32IA-TSO: # %bb.0:
-; RV32IA-TSO-NEXT: andi a2, a0, -4
-; RV32IA-TSO-NEXT: slli a0, a0, 3
-; RV32IA-TSO-NEXT: lui a3, 16
-; RV32IA-TSO-NEXT: slli a1, a1, 16
-; RV32IA-TSO-NEXT: li a4, 16
-; RV32IA-TSO-NEXT: andi a5, a0, 24
-; RV32IA-TSO-NEXT: addi a3, a3, -1
-; RV32IA-TSO-NEXT: srai a1, a1, 16
-; RV32IA-TSO-NEXT: sll a3, a3, a0
-; RV32IA-TSO-NEXT: sll a1, a1, a0
-; RV32IA-TSO-NEXT: sub a4, a4, a5
-; RV32IA-TSO-NEXT: .LBB116_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-TSO-NEXT: lr.w a5, (a2)
-; RV32IA-TSO-NEXT: and a7, a5, a3
-; RV32IA-TSO-NEXT: mv a6, a5
-; RV32IA-TSO-NEXT: sll a7, a7, a4
-; RV32IA-TSO-NEXT: sra a7, a7, a4
-; RV32IA-TSO-NEXT: bge a1, a7, .LBB116_3
-; RV32IA-TSO-NEXT: # %bb.2: # in Loop: Header=BB116_1 Depth=1
-; RV32IA-TSO-NEXT: xor a6, a5, a1
-; RV32IA-TSO-NEXT: and a6, a6, a3
-; RV32IA-TSO-NEXT: xor a6, a5, a6
-; RV32IA-TSO-NEXT: .LBB116_3: # in Loop: Header=BB116_1 Depth=1
-; RV32IA-TSO-NEXT: sc.w a6, a6, (a2)
-; RV32IA-TSO-NEXT: bnez a6, .LBB116_1
-; RV32IA-TSO-NEXT: # %bb.4:
-; RV32IA-TSO-NEXT: srl a0, a5, a0
-; RV32IA-TSO-NEXT: ret
+; RV32IA-TSO-NOZACAS-LABEL: atomicrmw_min_i16_acquire:
+; RV32IA-TSO-NOZACAS: # %bb.0:
+; RV32IA-TSO-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-TSO-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-NOZACAS-NEXT: lui a3, 16
+; RV32IA-TSO-NOZACAS-NEXT: slli a1, a1, 16
+; RV32IA-TSO-NOZACAS-NEXT: li a4, 16
+; RV32IA-TSO-NOZACAS-NEXT: andi a5, a0, 24
+; RV32IA-TSO-NOZACAS-NEXT: addi a3, a3, -1
+; RV32IA-TSO-NOZACAS-NEXT: srai a1, a1, 16
+; RV32IA-TSO-NOZACAS-NEXT: sll a3, a3, a0
+; RV32IA-TSO-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-TSO-NOZACAS-NEXT: sub a4, a4, a5
+; RV32IA-TSO-NOZACAS-NEXT: .LBB116_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-NOZACAS-NEXT: lr.w a5, (a2)
+; RV32IA-TSO-NOZACAS-NEXT: and a7, a5, a3
+; RV32IA-TSO-NOZACAS-NEXT: mv a6, a5
+; RV32IA-TSO-NOZACAS-NEXT: sll a7, a7, a4
+; RV32IA-TSO-NOZACAS-NEXT: sra a7, a7, a4
+; RV32IA-TSO-NOZACAS-NEXT: bge a1, a7, .LBB116_3
+; RV32IA-TSO-NOZACAS-NEXT: # %bb.2: # in Loop: Header=BB116_1 Depth=1
+; RV32IA-TSO-NOZACAS-NEXT: xor a6, a5, a1
+; RV32IA-TSO-NOZACAS-NEXT: and a6, a6, a3
+; RV32IA-TSO-NOZACAS-NEXT: xor a6, a5, a6
+; RV32IA-TSO-NOZACAS-NEXT: .LBB116_3: # in Loop: Header=BB116_1 Depth=1
+; RV32IA-TSO-NOZACAS-NEXT: sc.w a6, a6, (a2)
+; RV32IA-TSO-NOZACAS-NEXT: bnez a6, .LBB116_1
+; RV32IA-TSO-NOZACAS-NEXT: # %bb.4:
+; RV32IA-TSO-NOZACAS-NEXT: srl a0, a5, a0
+; RV32IA-TSO-NOZACAS-NEXT: ret
;
; RV64I-LABEL: atomicrmw_min_i16_acquire:
; RV64I: # %bb.0:
@@ -17007,6 +22340,68 @@ define i16 @atomicrmw_min_i16_acquire(ptr %a, i16 %b) nounwind {
; RV64IA-TSO-NOZACAS-NEXT: srlw a0, a5, a0
; RV64IA-TSO-NOZACAS-NEXT: ret
;
+; RV32IA-WMO-ZACAS-LABEL: atomicrmw_min_i16_acquire:
+; RV32IA-WMO-ZACAS: # %bb.0:
+; RV32IA-WMO-ZACAS-NEXT: andi a2, a0, -4
+; RV32IA-WMO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-ZACAS-NEXT: lui a3, 16
+; RV32IA-WMO-ZACAS-NEXT: slli a1, a1, 16
+; RV32IA-WMO-ZACAS-NEXT: li a4, 16
+; RV32IA-WMO-ZACAS-NEXT: andi a5, a0, 24
+; RV32IA-WMO-ZACAS-NEXT: addi a3, a3, -1
+; RV32IA-WMO-ZACAS-NEXT: srai a1, a1, 16
+; RV32IA-WMO-ZACAS-NEXT: sll a3, a3, a0
+; RV32IA-WMO-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-WMO-ZACAS-NEXT: sub a4, a4, a5
+; RV32IA-WMO-ZACAS-NEXT: .LBB116_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-ZACAS-NEXT: lr.w.aq a5, (a2)
+; RV32IA-WMO-ZACAS-NEXT: and a7, a5, a3
+; RV32IA-WMO-ZACAS-NEXT: mv a6, a5
+; RV32IA-WMO-ZACAS-NEXT: sll a7, a7, a4
+; RV32IA-WMO-ZACAS-NEXT: sra a7, a7, a4
+; RV32IA-WMO-ZACAS-NEXT: bge a1, a7, .LBB116_3
+; RV32IA-WMO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB116_1 Depth=1
+; RV32IA-WMO-ZACAS-NEXT: xor a6, a5, a1
+; RV32IA-WMO-ZACAS-NEXT: and a6, a6, a3
+; RV32IA-WMO-ZACAS-NEXT: xor a6, a5, a6
+; RV32IA-WMO-ZACAS-NEXT: .LBB116_3: # in Loop: Header=BB116_1 Depth=1
+; RV32IA-WMO-ZACAS-NEXT: sc.w a6, a6, (a2)
+; RV32IA-WMO-ZACAS-NEXT: bnez a6, .LBB116_1
+; RV32IA-WMO-ZACAS-NEXT: # %bb.4:
+; RV32IA-WMO-ZACAS-NEXT: srl a0, a5, a0
+; RV32IA-WMO-ZACAS-NEXT: ret
+;
+; RV32IA-TSO-ZACAS-LABEL: atomicrmw_min_i16_acquire:
+; RV32IA-TSO-ZACAS: # %bb.0:
+; RV32IA-TSO-ZACAS-NEXT: andi a2, a0, -4
+; RV32IA-TSO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-ZACAS-NEXT: lui a3, 16
+; RV32IA-TSO-ZACAS-NEXT: slli a1, a1, 16
+; RV32IA-TSO-ZACAS-NEXT: li a4, 16
+; RV32IA-TSO-ZACAS-NEXT: andi a5, a0, 24
+; RV32IA-TSO-ZACAS-NEXT: addi a3, a3, -1
+; RV32IA-TSO-ZACAS-NEXT: srai a1, a1, 16
+; RV32IA-TSO-ZACAS-NEXT: sll a3, a3, a0
+; RV32IA-TSO-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-TSO-ZACAS-NEXT: sub a4, a4, a5
+; RV32IA-TSO-ZACAS-NEXT: .LBB116_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-ZACAS-NEXT: lr.w a5, (a2)
+; RV32IA-TSO-ZACAS-NEXT: and a7, a5, a3
+; RV32IA-TSO-ZACAS-NEXT: mv a6, a5
+; RV32IA-TSO-ZACAS-NEXT: sll a7, a7, a4
+; RV32IA-TSO-ZACAS-NEXT: sra a7, a7, a4
+; RV32IA-TSO-ZACAS-NEXT: bge a1, a7, .LBB116_3
+; RV32IA-TSO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB116_1 Depth=1
+; RV32IA-TSO-ZACAS-NEXT: xor a6, a5, a1
+; RV32IA-TSO-ZACAS-NEXT: and a6, a6, a3
+; RV32IA-TSO-ZACAS-NEXT: xor a6, a5, a6
+; RV32IA-TSO-ZACAS-NEXT: .LBB116_3: # in Loop: Header=BB116_1 Depth=1
+; RV32IA-TSO-ZACAS-NEXT: sc.w a6, a6, (a2)
+; RV32IA-TSO-ZACAS-NEXT: bnez a6, .LBB116_1
+; RV32IA-TSO-ZACAS-NEXT: # %bb.4:
+; RV32IA-TSO-ZACAS-NEXT: srl a0, a5, a0
+; RV32IA-TSO-ZACAS-NEXT: ret
+;
; RV64IA-WMO-ZACAS-LABEL: atomicrmw_min_i16_acquire:
; RV64IA-WMO-ZACAS: # %bb.0:
; RV64IA-WMO-ZACAS-NEXT: andi a2, a0, -4
@@ -17069,6 +22464,16 @@ define i16 @atomicrmw_min_i16_acquire(ptr %a, i16 %b) nounwind {
; RV64IA-TSO-ZACAS-NEXT: srlw a0, a5, a0
; RV64IA-TSO-ZACAS-NEXT: ret
;
+; RV32IA-WMO-ZABHA-LABEL: atomicrmw_min_i16_acquire:
+; RV32IA-WMO-ZABHA: # %bb.0:
+; RV32IA-WMO-ZABHA-NEXT: amomin.h.aq a0, a1, (a0)
+; RV32IA-WMO-ZABHA-NEXT: ret
+;
+; RV32IA-TSO-ZABHA-LABEL: atomicrmw_min_i16_acquire:
+; RV32IA-TSO-ZABHA: # %bb.0:
+; RV32IA-TSO-ZABHA-NEXT: amomin.h a0, a1, (a0)
+; RV32IA-TSO-ZABHA-NEXT: ret
+;
; RV64IA-WMO-ZABHA-LABEL: atomicrmw_min_i16_acquire:
; RV64IA-WMO-ZABHA: # %bb.0:
; RV64IA-WMO-ZABHA-NEXT: amomin.h.aq a0, a1, (a0)
@@ -17125,67 +22530,67 @@ define i16 @atomicrmw_min_i16_release(ptr %a, i16 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 32
; RV32I-NEXT: ret
;
-; RV32IA-WMO-LABEL: atomicrmw_min_i16_release:
-; RV32IA-WMO: # %bb.0:
-; RV32IA-WMO-NEXT: andi a2, a0, -4
-; RV32IA-WMO-NEXT: slli a0, a0, 3
-; RV32IA-WMO-NEXT: lui a3, 16
-; RV32IA-WMO-NEXT: slli a1, a1, 16
-; RV32IA-WMO-NEXT: li a4, 16
-; RV32IA-WMO-NEXT: andi a5, a0, 24
-; RV32IA-WMO-NEXT: addi a3, a3, -1
-; RV32IA-WMO-NEXT: srai a1, a1, 16
-; RV32IA-WMO-NEXT: sll a3, a3, a0
-; RV32IA-WMO-NEXT: sll a1, a1, a0
-; RV32IA-WMO-NEXT: sub a4, a4, a5
-; RV32IA-WMO-NEXT: .LBB117_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-WMO-NEXT: lr.w a5, (a2)
-; RV32IA-WMO-NEXT: and a7, a5, a3
-; RV32IA-WMO-NEXT: mv a6, a5
-; RV32IA-WMO-NEXT: sll a7, a7, a4
-; RV32IA-WMO-NEXT: sra a7, a7, a4
-; RV32IA-WMO-NEXT: bge a1, a7, .LBB117_3
-; RV32IA-WMO-NEXT: # %bb.2: # in Loop: Header=BB117_1 Depth=1
-; RV32IA-WMO-NEXT: xor a6, a5, a1
-; RV32IA-WMO-NEXT: and a6, a6, a3
-; RV32IA-WMO-NEXT: xor a6, a5, a6
-; RV32IA-WMO-NEXT: .LBB117_3: # in Loop: Header=BB117_1 Depth=1
-; RV32IA-WMO-NEXT: sc.w.rl a6, a6, (a2)
-; RV32IA-WMO-NEXT: bnez a6, .LBB117_1
-; RV32IA-WMO-NEXT: # %bb.4:
-; RV32IA-WMO-NEXT: srl a0, a5, a0
-; RV32IA-WMO-NEXT: ret
+; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_min_i16_release:
+; RV32IA-WMO-NOZACAS: # %bb.0:
+; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-WMO-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-NOZACAS-NEXT: lui a3, 16
+; RV32IA-WMO-NOZACAS-NEXT: slli a1, a1, 16
+; RV32IA-WMO-NOZACAS-NEXT: li a4, 16
+; RV32IA-WMO-NOZACAS-NEXT: andi a5, a0, 24
+; RV32IA-WMO-NOZACAS-NEXT: addi a3, a3, -1
+; RV32IA-WMO-NOZACAS-NEXT: srai a1, a1, 16
+; RV32IA-WMO-NOZACAS-NEXT: sll a3, a3, a0
+; RV32IA-WMO-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-WMO-NOZACAS-NEXT: sub a4, a4, a5
+; RV32IA-WMO-NOZACAS-NEXT: .LBB117_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-NOZACAS-NEXT: lr.w a5, (a2)
+; RV32IA-WMO-NOZACAS-NEXT: and a7, a5, a3
+; RV32IA-WMO-NOZACAS-NEXT: mv a6, a5
+; RV32IA-WMO-NOZACAS-NEXT: sll a7, a7, a4
+; RV32IA-WMO-NOZACAS-NEXT: sra a7, a7, a4
+; RV32IA-WMO-NOZACAS-NEXT: bge a1, a7, .LBB117_3
+; RV32IA-WMO-NOZACAS-NEXT: # %bb.2: # in Loop: Header=BB117_1 Depth=1
+; RV32IA-WMO-NOZACAS-NEXT: xor a6, a5, a1
+; RV32IA-WMO-NOZACAS-NEXT: and a6, a6, a3
+; RV32IA-WMO-NOZACAS-NEXT: xor a6, a5, a6
+; RV32IA-WMO-NOZACAS-NEXT: .LBB117_3: # in Loop: Header=BB117_1 Depth=1
+; RV32IA-WMO-NOZACAS-NEXT: sc.w.rl a6, a6, (a2)
+; RV32IA-WMO-NOZACAS-NEXT: bnez a6, .LBB117_1
+; RV32IA-WMO-NOZACAS-NEXT: # %bb.4:
+; RV32IA-WMO-NOZACAS-NEXT: srl a0, a5, a0
+; RV32IA-WMO-NOZACAS-NEXT: ret
;
-; RV32IA-TSO-LABEL: atomicrmw_min_i16_release:
-; RV32IA-TSO: # %bb.0:
-; RV32IA-TSO-NEXT: andi a2, a0, -4
-; RV32IA-TSO-NEXT: slli a0, a0, 3
-; RV32IA-TSO-NEXT: lui a3, 16
-; RV32IA-TSO-NEXT: slli a1, a1, 16
-; RV32IA-TSO-NEXT: li a4, 16
-; RV32IA-TSO-NEXT: andi a5, a0, 24
-; RV32IA-TSO-NEXT: addi a3, a3, -1
-; RV32IA-TSO-NEXT: srai a1, a1, 16
-; RV32IA-TSO-NEXT: sll a3, a3, a0
-; RV32IA-TSO-NEXT: sll a1, a1, a0
-; RV32IA-TSO-NEXT: sub a4, a4, a5
-; RV32IA-TSO-NEXT: .LBB117_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-TSO-NEXT: lr.w a5, (a2)
-; RV32IA-TSO-NEXT: and a7, a5, a3
-; RV32IA-TSO-NEXT: mv a6, a5
-; RV32IA-TSO-NEXT: sll a7, a7, a4
-; RV32IA-TSO-NEXT: sra a7, a7, a4
-; RV32IA-TSO-NEXT: bge a1, a7, .LBB117_3
-; RV32IA-TSO-NEXT: # %bb.2: # in Loop: Header=BB117_1 Depth=1
-; RV32IA-TSO-NEXT: xor a6, a5, a1
-; RV32IA-TSO-NEXT: and a6, a6, a3
-; RV32IA-TSO-NEXT: xor a6, a5, a6
-; RV32IA-TSO-NEXT: .LBB117_3: # in Loop: Header=BB117_1 Depth=1
-; RV32IA-TSO-NEXT: sc.w a6, a6, (a2)
-; RV32IA-TSO-NEXT: bnez a6, .LBB117_1
-; RV32IA-TSO-NEXT: # %bb.4:
-; RV32IA-TSO-NEXT: srl a0, a5, a0
-; RV32IA-TSO-NEXT: ret
+; RV32IA-TSO-NOZACAS-LABEL: atomicrmw_min_i16_release:
+; RV32IA-TSO-NOZACAS: # %bb.0:
+; RV32IA-TSO-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-TSO-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-NOZACAS-NEXT: lui a3, 16
+; RV32IA-TSO-NOZACAS-NEXT: slli a1, a1, 16
+; RV32IA-TSO-NOZACAS-NEXT: li a4, 16
+; RV32IA-TSO-NOZACAS-NEXT: andi a5, a0, 24
+; RV32IA-TSO-NOZACAS-NEXT: addi a3, a3, -1
+; RV32IA-TSO-NOZACAS-NEXT: srai a1, a1, 16
+; RV32IA-TSO-NOZACAS-NEXT: sll a3, a3, a0
+; RV32IA-TSO-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-TSO-NOZACAS-NEXT: sub a4, a4, a5
+; RV32IA-TSO-NOZACAS-NEXT: .LBB117_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-NOZACAS-NEXT: lr.w a5, (a2)
+; RV32IA-TSO-NOZACAS-NEXT: and a7, a5, a3
+; RV32IA-TSO-NOZACAS-NEXT: mv a6, a5
+; RV32IA-TSO-NOZACAS-NEXT: sll a7, a7, a4
+; RV32IA-TSO-NOZACAS-NEXT: sra a7, a7, a4
+; RV32IA-TSO-NOZACAS-NEXT: bge a1, a7, .LBB117_3
+; RV32IA-TSO-NOZACAS-NEXT: # %bb.2: # in Loop: Header=BB117_1 Depth=1
+; RV32IA-TSO-NOZACAS-NEXT: xor a6, a5, a1
+; RV32IA-TSO-NOZACAS-NEXT: and a6, a6, a3
+; RV32IA-TSO-NOZACAS-NEXT: xor a6, a5, a6
+; RV32IA-TSO-NOZACAS-NEXT: .LBB117_3: # in Loop: Header=BB117_1 Depth=1
+; RV32IA-TSO-NOZACAS-NEXT: sc.w a6, a6, (a2)
+; RV32IA-TSO-NOZACAS-NEXT: bnez a6, .LBB117_1
+; RV32IA-TSO-NOZACAS-NEXT: # %bb.4:
+; RV32IA-TSO-NOZACAS-NEXT: srl a0, a5, a0
+; RV32IA-TSO-NOZACAS-NEXT: ret
;
; RV64I-LABEL: atomicrmw_min_i16_release:
; RV64I: # %bb.0:
@@ -17291,6 +22696,68 @@ define i16 @atomicrmw_min_i16_release(ptr %a, i16 %b) nounwind {
; RV64IA-TSO-NOZACAS-NEXT: srlw a0, a5, a0
; RV64IA-TSO-NOZACAS-NEXT: ret
;
+; RV32IA-WMO-ZACAS-LABEL: atomicrmw_min_i16_release:
+; RV32IA-WMO-ZACAS: # %bb.0:
+; RV32IA-WMO-ZACAS-NEXT: andi a2, a0, -4
+; RV32IA-WMO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-ZACAS-NEXT: lui a3, 16
+; RV32IA-WMO-ZACAS-NEXT: slli a1, a1, 16
+; RV32IA-WMO-ZACAS-NEXT: li a4, 16
+; RV32IA-WMO-ZACAS-NEXT: andi a5, a0, 24
+; RV32IA-WMO-ZACAS-NEXT: addi a3, a3, -1
+; RV32IA-WMO-ZACAS-NEXT: srai a1, a1, 16
+; RV32IA-WMO-ZACAS-NEXT: sll a3, a3, a0
+; RV32IA-WMO-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-WMO-ZACAS-NEXT: sub a4, a4, a5
+; RV32IA-WMO-ZACAS-NEXT: .LBB117_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-ZACAS-NEXT: lr.w a5, (a2)
+; RV32IA-WMO-ZACAS-NEXT: and a7, a5, a3
+; RV32IA-WMO-ZACAS-NEXT: mv a6, a5
+; RV32IA-WMO-ZACAS-NEXT: sll a7, a7, a4
+; RV32IA-WMO-ZACAS-NEXT: sra a7, a7, a4
+; RV32IA-WMO-ZACAS-NEXT: bge a1, a7, .LBB117_3
+; RV32IA-WMO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB117_1 Depth=1
+; RV32IA-WMO-ZACAS-NEXT: xor a6, a5, a1
+; RV32IA-WMO-ZACAS-NEXT: and a6, a6, a3
+; RV32IA-WMO-ZACAS-NEXT: xor a6, a5, a6
+; RV32IA-WMO-ZACAS-NEXT: .LBB117_3: # in Loop: Header=BB117_1 Depth=1
+; RV32IA-WMO-ZACAS-NEXT: sc.w.rl a6, a6, (a2)
+; RV32IA-WMO-ZACAS-NEXT: bnez a6, .LBB117_1
+; RV32IA-WMO-ZACAS-NEXT: # %bb.4:
+; RV32IA-WMO-ZACAS-NEXT: srl a0, a5, a0
+; RV32IA-WMO-ZACAS-NEXT: ret
+;
+; RV32IA-TSO-ZACAS-LABEL: atomicrmw_min_i16_release:
+; RV32IA-TSO-ZACAS: # %bb.0:
+; RV32IA-TSO-ZACAS-NEXT: andi a2, a0, -4
+; RV32IA-TSO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-ZACAS-NEXT: lui a3, 16
+; RV32IA-TSO-ZACAS-NEXT: slli a1, a1, 16
+; RV32IA-TSO-ZACAS-NEXT: li a4, 16
+; RV32IA-TSO-ZACAS-NEXT: andi a5, a0, 24
+; RV32IA-TSO-ZACAS-NEXT: addi a3, a3, -1
+; RV32IA-TSO-ZACAS-NEXT: srai a1, a1, 16
+; RV32IA-TSO-ZACAS-NEXT: sll a3, a3, a0
+; RV32IA-TSO-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-TSO-ZACAS-NEXT: sub a4, a4, a5
+; RV32IA-TSO-ZACAS-NEXT: .LBB117_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-ZACAS-NEXT: lr.w a5, (a2)
+; RV32IA-TSO-ZACAS-NEXT: and a7, a5, a3
+; RV32IA-TSO-ZACAS-NEXT: mv a6, a5
+; RV32IA-TSO-ZACAS-NEXT: sll a7, a7, a4
+; RV32IA-TSO-ZACAS-NEXT: sra a7, a7, a4
+; RV32IA-TSO-ZACAS-NEXT: bge a1, a7, .LBB117_3
+; RV32IA-TSO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB117_1 Depth=1
+; RV32IA-TSO-ZACAS-NEXT: xor a6, a5, a1
+; RV32IA-TSO-ZACAS-NEXT: and a6, a6, a3
+; RV32IA-TSO-ZACAS-NEXT: xor a6, a5, a6
+; RV32IA-TSO-ZACAS-NEXT: .LBB117_3: # in Loop: Header=BB117_1 Depth=1
+; RV32IA-TSO-ZACAS-NEXT: sc.w a6, a6, (a2)
+; RV32IA-TSO-ZACAS-NEXT: bnez a6, .LBB117_1
+; RV32IA-TSO-ZACAS-NEXT: # %bb.4:
+; RV32IA-TSO-ZACAS-NEXT: srl a0, a5, a0
+; RV32IA-TSO-ZACAS-NEXT: ret
+;
; RV64IA-WMO-ZACAS-LABEL: atomicrmw_min_i16_release:
; RV64IA-WMO-ZACAS: # %bb.0:
; RV64IA-WMO-ZACAS-NEXT: andi a2, a0, -4
@@ -17353,6 +22820,16 @@ define i16 @atomicrmw_min_i16_release(ptr %a, i16 %b) nounwind {
; RV64IA-TSO-ZACAS-NEXT: srlw a0, a5, a0
; RV64IA-TSO-ZACAS-NEXT: ret
;
+; RV32IA-WMO-ZABHA-LABEL: atomicrmw_min_i16_release:
+; RV32IA-WMO-ZABHA: # %bb.0:
+; RV32IA-WMO-ZABHA-NEXT: amomin.h.rl a0, a1, (a0)
+; RV32IA-WMO-ZABHA-NEXT: ret
+;
+; RV32IA-TSO-ZABHA-LABEL: atomicrmw_min_i16_release:
+; RV32IA-TSO-ZABHA: # %bb.0:
+; RV32IA-TSO-ZABHA-NEXT: amomin.h a0, a1, (a0)
+; RV32IA-TSO-ZABHA-NEXT: ret
+;
; RV64IA-WMO-ZABHA-LABEL: atomicrmw_min_i16_release:
; RV64IA-WMO-ZABHA: # %bb.0:
; RV64IA-WMO-ZABHA-NEXT: amomin.h.rl a0, a1, (a0)
@@ -17409,67 +22886,67 @@ define i16 @atomicrmw_min_i16_acq_rel(ptr %a, i16 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 32
; RV32I-NEXT: ret
;
-; RV32IA-WMO-LABEL: atomicrmw_min_i16_acq_rel:
-; RV32IA-WMO: # %bb.0:
-; RV32IA-WMO-NEXT: andi a2, a0, -4
-; RV32IA-WMO-NEXT: slli a0, a0, 3
-; RV32IA-WMO-NEXT: lui a3, 16
-; RV32IA-WMO-NEXT: slli a1, a1, 16
-; RV32IA-WMO-NEXT: li a4, 16
-; RV32IA-WMO-NEXT: andi a5, a0, 24
-; RV32IA-WMO-NEXT: addi a3, a3, -1
-; RV32IA-WMO-NEXT: srai a1, a1, 16
-; RV32IA-WMO-NEXT: sll a3, a3, a0
-; RV32IA-WMO-NEXT: sll a1, a1, a0
-; RV32IA-WMO-NEXT: sub a4, a4, a5
-; RV32IA-WMO-NEXT: .LBB118_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-WMO-NEXT: lr.w.aq a5, (a2)
-; RV32IA-WMO-NEXT: and a7, a5, a3
-; RV32IA-WMO-NEXT: mv a6, a5
-; RV32IA-WMO-NEXT: sll a7, a7, a4
-; RV32IA-WMO-NEXT: sra a7, a7, a4
-; RV32IA-WMO-NEXT: bge a1, a7, .LBB118_3
-; RV32IA-WMO-NEXT: # %bb.2: # in Loop: Header=BB118_1 Depth=1
-; RV32IA-WMO-NEXT: xor a6, a5, a1
-; RV32IA-WMO-NEXT: and a6, a6, a3
-; RV32IA-WMO-NEXT: xor a6, a5, a6
-; RV32IA-WMO-NEXT: .LBB118_3: # in Loop: Header=BB118_1 Depth=1
-; RV32IA-WMO-NEXT: sc.w.rl a6, a6, (a2)
-; RV32IA-WMO-NEXT: bnez a6, .LBB118_1
-; RV32IA-WMO-NEXT: # %bb.4:
-; RV32IA-WMO-NEXT: srl a0, a5, a0
-; RV32IA-WMO-NEXT: ret
+; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_min_i16_acq_rel:
+; RV32IA-WMO-NOZACAS: # %bb.0:
+; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-WMO-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-NOZACAS-NEXT: lui a3, 16
+; RV32IA-WMO-NOZACAS-NEXT: slli a1, a1, 16
+; RV32IA-WMO-NOZACAS-NEXT: li a4, 16
+; RV32IA-WMO-NOZACAS-NEXT: andi a5, a0, 24
+; RV32IA-WMO-NOZACAS-NEXT: addi a3, a3, -1
+; RV32IA-WMO-NOZACAS-NEXT: srai a1, a1, 16
+; RV32IA-WMO-NOZACAS-NEXT: sll a3, a3, a0
+; RV32IA-WMO-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-WMO-NOZACAS-NEXT: sub a4, a4, a5
+; RV32IA-WMO-NOZACAS-NEXT: .LBB118_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-NOZACAS-NEXT: lr.w.aq a5, (a2)
+; RV32IA-WMO-NOZACAS-NEXT: and a7, a5, a3
+; RV32IA-WMO-NOZACAS-NEXT: mv a6, a5
+; RV32IA-WMO-NOZACAS-NEXT: sll a7, a7, a4
+; RV32IA-WMO-NOZACAS-NEXT: sra a7, a7, a4
+; RV32IA-WMO-NOZACAS-NEXT: bge a1, a7, .LBB118_3
+; RV32IA-WMO-NOZACAS-NEXT: # %bb.2: # in Loop: Header=BB118_1 Depth=1
+; RV32IA-WMO-NOZACAS-NEXT: xor a6, a5, a1
+; RV32IA-WMO-NOZACAS-NEXT: and a6, a6, a3
+; RV32IA-WMO-NOZACAS-NEXT: xor a6, a5, a6
+; RV32IA-WMO-NOZACAS-NEXT: .LBB118_3: # in Loop: Header=BB118_1 Depth=1
+; RV32IA-WMO-NOZACAS-NEXT: sc.w.rl a6, a6, (a2)
+; RV32IA-WMO-NOZACAS-NEXT: bnez a6, .LBB118_1
+; RV32IA-WMO-NOZACAS-NEXT: # %bb.4:
+; RV32IA-WMO-NOZACAS-NEXT: srl a0, a5, a0
+; RV32IA-WMO-NOZACAS-NEXT: ret
;
-; RV32IA-TSO-LABEL: atomicrmw_min_i16_acq_rel:
-; RV32IA-TSO: # %bb.0:
-; RV32IA-TSO-NEXT: andi a2, a0, -4
-; RV32IA-TSO-NEXT: slli a0, a0, 3
-; RV32IA-TSO-NEXT: lui a3, 16
-; RV32IA-TSO-NEXT: slli a1, a1, 16
-; RV32IA-TSO-NEXT: li a4, 16
-; RV32IA-TSO-NEXT: andi a5, a0, 24
-; RV32IA-TSO-NEXT: addi a3, a3, -1
-; RV32IA-TSO-NEXT: srai a1, a1, 16
-; RV32IA-TSO-NEXT: sll a3, a3, a0
-; RV32IA-TSO-NEXT: sll a1, a1, a0
-; RV32IA-TSO-NEXT: sub a4, a4, a5
-; RV32IA-TSO-NEXT: .LBB118_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-TSO-NEXT: lr.w a5, (a2)
-; RV32IA-TSO-NEXT: and a7, a5, a3
-; RV32IA-TSO-NEXT: mv a6, a5
-; RV32IA-TSO-NEXT: sll a7, a7, a4
-; RV32IA-TSO-NEXT: sra a7, a7, a4
-; RV32IA-TSO-NEXT: bge a1, a7, .LBB118_3
-; RV32IA-TSO-NEXT: # %bb.2: # in Loop: Header=BB118_1 Depth=1
-; RV32IA-TSO-NEXT: xor a6, a5, a1
-; RV32IA-TSO-NEXT: and a6, a6, a3
-; RV32IA-TSO-NEXT: xor a6, a5, a6
-; RV32IA-TSO-NEXT: .LBB118_3: # in Loop: Header=BB118_1 Depth=1
-; RV32IA-TSO-NEXT: sc.w a6, a6, (a2)
-; RV32IA-TSO-NEXT: bnez a6, .LBB118_1
-; RV32IA-TSO-NEXT: # %bb.4:
-; RV32IA-TSO-NEXT: srl a0, a5, a0
-; RV32IA-TSO-NEXT: ret
+; RV32IA-TSO-NOZACAS-LABEL: atomicrmw_min_i16_acq_rel:
+; RV32IA-TSO-NOZACAS: # %bb.0:
+; RV32IA-TSO-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-TSO-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-NOZACAS-NEXT: lui a3, 16
+; RV32IA-TSO-NOZACAS-NEXT: slli a1, a1, 16
+; RV32IA-TSO-NOZACAS-NEXT: li a4, 16
+; RV32IA-TSO-NOZACAS-NEXT: andi a5, a0, 24
+; RV32IA-TSO-NOZACAS-NEXT: addi a3, a3, -1
+; RV32IA-TSO-NOZACAS-NEXT: srai a1, a1, 16
+; RV32IA-TSO-NOZACAS-NEXT: sll a3, a3, a0
+; RV32IA-TSO-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-TSO-NOZACAS-NEXT: sub a4, a4, a5
+; RV32IA-TSO-NOZACAS-NEXT: .LBB118_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-NOZACAS-NEXT: lr.w a5, (a2)
+; RV32IA-TSO-NOZACAS-NEXT: and a7, a5, a3
+; RV32IA-TSO-NOZACAS-NEXT: mv a6, a5
+; RV32IA-TSO-NOZACAS-NEXT: sll a7, a7, a4
+; RV32IA-TSO-NOZACAS-NEXT: sra a7, a7, a4
+; RV32IA-TSO-NOZACAS-NEXT: bge a1, a7, .LBB118_3
+; RV32IA-TSO-NOZACAS-NEXT: # %bb.2: # in Loop: Header=BB118_1 Depth=1
+; RV32IA-TSO-NOZACAS-NEXT: xor a6, a5, a1
+; RV32IA-TSO-NOZACAS-NEXT: and a6, a6, a3
+; RV32IA-TSO-NOZACAS-NEXT: xor a6, a5, a6
+; RV32IA-TSO-NOZACAS-NEXT: .LBB118_3: # in Loop: Header=BB118_1 Depth=1
+; RV32IA-TSO-NOZACAS-NEXT: sc.w a6, a6, (a2)
+; RV32IA-TSO-NOZACAS-NEXT: bnez a6, .LBB118_1
+; RV32IA-TSO-NOZACAS-NEXT: # %bb.4:
+; RV32IA-TSO-NOZACAS-NEXT: srl a0, a5, a0
+; RV32IA-TSO-NOZACAS-NEXT: ret
;
; RV64I-LABEL: atomicrmw_min_i16_acq_rel:
; RV64I: # %bb.0:
@@ -17575,6 +23052,68 @@ define i16 @atomicrmw_min_i16_acq_rel(ptr %a, i16 %b) nounwind {
; RV64IA-TSO-NOZACAS-NEXT: srlw a0, a5, a0
; RV64IA-TSO-NOZACAS-NEXT: ret
;
+; RV32IA-WMO-ZACAS-LABEL: atomicrmw_min_i16_acq_rel:
+; RV32IA-WMO-ZACAS: # %bb.0:
+; RV32IA-WMO-ZACAS-NEXT: andi a2, a0, -4
+; RV32IA-WMO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-ZACAS-NEXT: lui a3, 16
+; RV32IA-WMO-ZACAS-NEXT: slli a1, a1, 16
+; RV32IA-WMO-ZACAS-NEXT: li a4, 16
+; RV32IA-WMO-ZACAS-NEXT: andi a5, a0, 24
+; RV32IA-WMO-ZACAS-NEXT: addi a3, a3, -1
+; RV32IA-WMO-ZACAS-NEXT: srai a1, a1, 16
+; RV32IA-WMO-ZACAS-NEXT: sll a3, a3, a0
+; RV32IA-WMO-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-WMO-ZACAS-NEXT: sub a4, a4, a5
+; RV32IA-WMO-ZACAS-NEXT: .LBB118_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-ZACAS-NEXT: lr.w.aq a5, (a2)
+; RV32IA-WMO-ZACAS-NEXT: and a7, a5, a3
+; RV32IA-WMO-ZACAS-NEXT: mv a6, a5
+; RV32IA-WMO-ZACAS-NEXT: sll a7, a7, a4
+; RV32IA-WMO-ZACAS-NEXT: sra a7, a7, a4
+; RV32IA-WMO-ZACAS-NEXT: bge a1, a7, .LBB118_3
+; RV32IA-WMO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB118_1 Depth=1
+; RV32IA-WMO-ZACAS-NEXT: xor a6, a5, a1
+; RV32IA-WMO-ZACAS-NEXT: and a6, a6, a3
+; RV32IA-WMO-ZACAS-NEXT: xor a6, a5, a6
+; RV32IA-WMO-ZACAS-NEXT: .LBB118_3: # in Loop: Header=BB118_1 Depth=1
+; RV32IA-WMO-ZACAS-NEXT: sc.w.rl a6, a6, (a2)
+; RV32IA-WMO-ZACAS-NEXT: bnez a6, .LBB118_1
+; RV32IA-WMO-ZACAS-NEXT: # %bb.4:
+; RV32IA-WMO-ZACAS-NEXT: srl a0, a5, a0
+; RV32IA-WMO-ZACAS-NEXT: ret
+;
+; RV32IA-TSO-ZACAS-LABEL: atomicrmw_min_i16_acq_rel:
+; RV32IA-TSO-ZACAS: # %bb.0:
+; RV32IA-TSO-ZACAS-NEXT: andi a2, a0, -4
+; RV32IA-TSO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-ZACAS-NEXT: lui a3, 16
+; RV32IA-TSO-ZACAS-NEXT: slli a1, a1, 16
+; RV32IA-TSO-ZACAS-NEXT: li a4, 16
+; RV32IA-TSO-ZACAS-NEXT: andi a5, a0, 24
+; RV32IA-TSO-ZACAS-NEXT: addi a3, a3, -1
+; RV32IA-TSO-ZACAS-NEXT: srai a1, a1, 16
+; RV32IA-TSO-ZACAS-NEXT: sll a3, a3, a0
+; RV32IA-TSO-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-TSO-ZACAS-NEXT: sub a4, a4, a5
+; RV32IA-TSO-ZACAS-NEXT: .LBB118_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-ZACAS-NEXT: lr.w a5, (a2)
+; RV32IA-TSO-ZACAS-NEXT: and a7, a5, a3
+; RV32IA-TSO-ZACAS-NEXT: mv a6, a5
+; RV32IA-TSO-ZACAS-NEXT: sll a7, a7, a4
+; RV32IA-TSO-ZACAS-NEXT: sra a7, a7, a4
+; RV32IA-TSO-ZACAS-NEXT: bge a1, a7, .LBB118_3
+; RV32IA-TSO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB118_1 Depth=1
+; RV32IA-TSO-ZACAS-NEXT: xor a6, a5, a1
+; RV32IA-TSO-ZACAS-NEXT: and a6, a6, a3
+; RV32IA-TSO-ZACAS-NEXT: xor a6, a5, a6
+; RV32IA-TSO-ZACAS-NEXT: .LBB118_3: # in Loop: Header=BB118_1 Depth=1
+; RV32IA-TSO-ZACAS-NEXT: sc.w a6, a6, (a2)
+; RV32IA-TSO-ZACAS-NEXT: bnez a6, .LBB118_1
+; RV32IA-TSO-ZACAS-NEXT: # %bb.4:
+; RV32IA-TSO-ZACAS-NEXT: srl a0, a5, a0
+; RV32IA-TSO-ZACAS-NEXT: ret
+;
; RV64IA-WMO-ZACAS-LABEL: atomicrmw_min_i16_acq_rel:
; RV64IA-WMO-ZACAS: # %bb.0:
; RV64IA-WMO-ZACAS-NEXT: andi a2, a0, -4
@@ -17637,6 +23176,16 @@ define i16 @atomicrmw_min_i16_acq_rel(ptr %a, i16 %b) nounwind {
; RV64IA-TSO-ZACAS-NEXT: srlw a0, a5, a0
; RV64IA-TSO-ZACAS-NEXT: ret
;
+; RV32IA-WMO-ZABHA-LABEL: atomicrmw_min_i16_acq_rel:
+; RV32IA-WMO-ZABHA: # %bb.0:
+; RV32IA-WMO-ZABHA-NEXT: amomin.h.aqrl a0, a1, (a0)
+; RV32IA-WMO-ZABHA-NEXT: ret
+;
+; RV32IA-TSO-ZABHA-LABEL: atomicrmw_min_i16_acq_rel:
+; RV32IA-TSO-ZABHA: # %bb.0:
+; RV32IA-TSO-ZABHA-NEXT: amomin.h a0, a1, (a0)
+; RV32IA-TSO-ZABHA-NEXT: ret
+;
; RV64IA-WMO-ZABHA-LABEL: atomicrmw_min_i16_acq_rel:
; RV64IA-WMO-ZABHA: # %bb.0:
; RV64IA-WMO-ZABHA-NEXT: amomin.h.aqrl a0, a1, (a0)
@@ -17693,36 +23242,36 @@ define i16 @atomicrmw_min_i16_seq_cst(ptr %a, i16 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 32
; RV32I-NEXT: ret
;
-; RV32IA-LABEL: atomicrmw_min_i16_seq_cst:
-; RV32IA: # %bb.0:
-; RV32IA-NEXT: andi a2, a0, -4
-; RV32IA-NEXT: slli a0, a0, 3
-; RV32IA-NEXT: lui a3, 16
-; RV32IA-NEXT: slli a1, a1, 16
-; RV32IA-NEXT: li a4, 16
-; RV32IA-NEXT: andi a5, a0, 24
-; RV32IA-NEXT: addi a3, a3, -1
-; RV32IA-NEXT: srai a1, a1, 16
-; RV32IA-NEXT: sll a3, a3, a0
-; RV32IA-NEXT: sll a1, a1, a0
-; RV32IA-NEXT: sub a4, a4, a5
-; RV32IA-NEXT: .LBB119_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-NEXT: lr.w.aqrl a5, (a2)
-; RV32IA-NEXT: and a7, a5, a3
-; RV32IA-NEXT: mv a6, a5
-; RV32IA-NEXT: sll a7, a7, a4
-; RV32IA-NEXT: sra a7, a7, a4
-; RV32IA-NEXT: bge a1, a7, .LBB119_3
-; RV32IA-NEXT: # %bb.2: # in Loop: Header=BB119_1 Depth=1
-; RV32IA-NEXT: xor a6, a5, a1
-; RV32IA-NEXT: and a6, a6, a3
-; RV32IA-NEXT: xor a6, a5, a6
-; RV32IA-NEXT: .LBB119_3: # in Loop: Header=BB119_1 Depth=1
-; RV32IA-NEXT: sc.w.rl a6, a6, (a2)
-; RV32IA-NEXT: bnez a6, .LBB119_1
-; RV32IA-NEXT: # %bb.4:
-; RV32IA-NEXT: srl a0, a5, a0
-; RV32IA-NEXT: ret
+; RV32IA-NOZACAS-LABEL: atomicrmw_min_i16_seq_cst:
+; RV32IA-NOZACAS: # %bb.0:
+; RV32IA-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-NOZACAS-NEXT: lui a3, 16
+; RV32IA-NOZACAS-NEXT: slli a1, a1, 16
+; RV32IA-NOZACAS-NEXT: li a4, 16
+; RV32IA-NOZACAS-NEXT: andi a5, a0, 24
+; RV32IA-NOZACAS-NEXT: addi a3, a3, -1
+; RV32IA-NOZACAS-NEXT: srai a1, a1, 16
+; RV32IA-NOZACAS-NEXT: sll a3, a3, a0
+; RV32IA-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-NOZACAS-NEXT: sub a4, a4, a5
+; RV32IA-NOZACAS-NEXT: .LBB119_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-NOZACAS-NEXT: lr.w.aqrl a5, (a2)
+; RV32IA-NOZACAS-NEXT: and a7, a5, a3
+; RV32IA-NOZACAS-NEXT: mv a6, a5
+; RV32IA-NOZACAS-NEXT: sll a7, a7, a4
+; RV32IA-NOZACAS-NEXT: sra a7, a7, a4
+; RV32IA-NOZACAS-NEXT: bge a1, a7, .LBB119_3
+; RV32IA-NOZACAS-NEXT: # %bb.2: # in Loop: Header=BB119_1 Depth=1
+; RV32IA-NOZACAS-NEXT: xor a6, a5, a1
+; RV32IA-NOZACAS-NEXT: and a6, a6, a3
+; RV32IA-NOZACAS-NEXT: xor a6, a5, a6
+; RV32IA-NOZACAS-NEXT: .LBB119_3: # in Loop: Header=BB119_1 Depth=1
+; RV32IA-NOZACAS-NEXT: sc.w.rl a6, a6, (a2)
+; RV32IA-NOZACAS-NEXT: bnez a6, .LBB119_1
+; RV32IA-NOZACAS-NEXT: # %bb.4:
+; RV32IA-NOZACAS-NEXT: srl a0, a5, a0
+; RV32IA-NOZACAS-NEXT: ret
;
; RV64I-LABEL: atomicrmw_min_i16_seq_cst:
; RV64I: # %bb.0:
@@ -17797,6 +23346,37 @@ define i16 @atomicrmw_min_i16_seq_cst(ptr %a, i16 %b) nounwind {
; RV64IA-NOZACAS-NEXT: srlw a0, a5, a0
; RV64IA-NOZACAS-NEXT: ret
;
+; RV32IA-ZACAS-LABEL: atomicrmw_min_i16_seq_cst:
+; RV32IA-ZACAS: # %bb.0:
+; RV32IA-ZACAS-NEXT: andi a2, a0, -4
+; RV32IA-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-ZACAS-NEXT: lui a3, 16
+; RV32IA-ZACAS-NEXT: slli a1, a1, 16
+; RV32IA-ZACAS-NEXT: li a4, 16
+; RV32IA-ZACAS-NEXT: andi a5, a0, 24
+; RV32IA-ZACAS-NEXT: addi a3, a3, -1
+; RV32IA-ZACAS-NEXT: srai a1, a1, 16
+; RV32IA-ZACAS-NEXT: sll a3, a3, a0
+; RV32IA-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-ZACAS-NEXT: sub a4, a4, a5
+; RV32IA-ZACAS-NEXT: .LBB119_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-ZACAS-NEXT: lr.w.aqrl a5, (a2)
+; RV32IA-ZACAS-NEXT: and a7, a5, a3
+; RV32IA-ZACAS-NEXT: mv a6, a5
+; RV32IA-ZACAS-NEXT: sll a7, a7, a4
+; RV32IA-ZACAS-NEXT: sra a7, a7, a4
+; RV32IA-ZACAS-NEXT: bge a1, a7, .LBB119_3
+; RV32IA-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB119_1 Depth=1
+; RV32IA-ZACAS-NEXT: xor a6, a5, a1
+; RV32IA-ZACAS-NEXT: and a6, a6, a3
+; RV32IA-ZACAS-NEXT: xor a6, a5, a6
+; RV32IA-ZACAS-NEXT: .LBB119_3: # in Loop: Header=BB119_1 Depth=1
+; RV32IA-ZACAS-NEXT: sc.w.rl a6, a6, (a2)
+; RV32IA-ZACAS-NEXT: bnez a6, .LBB119_1
+; RV32IA-ZACAS-NEXT: # %bb.4:
+; RV32IA-ZACAS-NEXT: srl a0, a5, a0
+; RV32IA-ZACAS-NEXT: ret
+;
; RV64IA-ZACAS-LABEL: atomicrmw_min_i16_seq_cst:
; RV64IA-ZACAS: # %bb.0:
; RV64IA-ZACAS-NEXT: andi a2, a0, -4
@@ -17828,6 +23408,16 @@ define i16 @atomicrmw_min_i16_seq_cst(ptr %a, i16 %b) nounwind {
; RV64IA-ZACAS-NEXT: srlw a0, a5, a0
; RV64IA-ZACAS-NEXT: ret
;
+; RV32IA-WMO-ZABHA-LABEL: atomicrmw_min_i16_seq_cst:
+; RV32IA-WMO-ZABHA: # %bb.0:
+; RV32IA-WMO-ZABHA-NEXT: amomin.h.aqrl a0, a1, (a0)
+; RV32IA-WMO-ZABHA-NEXT: ret
+;
+; RV32IA-TSO-ZABHA-LABEL: atomicrmw_min_i16_seq_cst:
+; RV32IA-TSO-ZABHA: # %bb.0:
+; RV32IA-TSO-ZABHA-NEXT: amomin.h a0, a1, (a0)
+; RV32IA-TSO-ZABHA-NEXT: ret
+;
; RV64IA-WMO-ZABHA-LABEL: atomicrmw_min_i16_seq_cst:
; RV64IA-WMO-ZABHA: # %bb.0:
; RV64IA-WMO-ZABHA-NEXT: amomin.h.aqrl a0, a1, (a0)
@@ -17886,30 +23476,30 @@ define i16 @atomicrmw_umax_i16_monotonic(ptr %a, i16 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 32
; RV32I-NEXT: ret
;
-; RV32IA-LABEL: atomicrmw_umax_i16_monotonic:
-; RV32IA: # %bb.0:
-; RV32IA-NEXT: andi a2, a0, -4
-; RV32IA-NEXT: slli a0, a0, 3
-; RV32IA-NEXT: lui a3, 16
-; RV32IA-NEXT: addi a3, a3, -1
-; RV32IA-NEXT: sll a4, a3, a0
-; RV32IA-NEXT: and a1, a1, a3
-; RV32IA-NEXT: sll a1, a1, a0
-; RV32IA-NEXT: .LBB120_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-NEXT: lr.w a3, (a2)
-; RV32IA-NEXT: and a6, a3, a4
-; RV32IA-NEXT: mv a5, a3
-; RV32IA-NEXT: bgeu a6, a1, .LBB120_3
-; RV32IA-NEXT: # %bb.2: # in Loop: Header=BB120_1 Depth=1
-; RV32IA-NEXT: xor a5, a3, a1
-; RV32IA-NEXT: and a5, a5, a4
-; RV32IA-NEXT: xor a5, a3, a5
-; RV32IA-NEXT: .LBB120_3: # in Loop: Header=BB120_1 Depth=1
-; RV32IA-NEXT: sc.w a5, a5, (a2)
-; RV32IA-NEXT: bnez a5, .LBB120_1
-; RV32IA-NEXT: # %bb.4:
-; RV32IA-NEXT: srl a0, a3, a0
-; RV32IA-NEXT: ret
+; RV32IA-NOZACAS-LABEL: atomicrmw_umax_i16_monotonic:
+; RV32IA-NOZACAS: # %bb.0:
+; RV32IA-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-NOZACAS-NEXT: lui a3, 16
+; RV32IA-NOZACAS-NEXT: addi a3, a3, -1
+; RV32IA-NOZACAS-NEXT: sll a4, a3, a0
+; RV32IA-NOZACAS-NEXT: and a1, a1, a3
+; RV32IA-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-NOZACAS-NEXT: .LBB120_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-NOZACAS-NEXT: lr.w a3, (a2)
+; RV32IA-NOZACAS-NEXT: and a6, a3, a4
+; RV32IA-NOZACAS-NEXT: mv a5, a3
+; RV32IA-NOZACAS-NEXT: bgeu a6, a1, .LBB120_3
+; RV32IA-NOZACAS-NEXT: # %bb.2: # in Loop: Header=BB120_1 Depth=1
+; RV32IA-NOZACAS-NEXT: xor a5, a3, a1
+; RV32IA-NOZACAS-NEXT: and a5, a5, a4
+; RV32IA-NOZACAS-NEXT: xor a5, a3, a5
+; RV32IA-NOZACAS-NEXT: .LBB120_3: # in Loop: Header=BB120_1 Depth=1
+; RV32IA-NOZACAS-NEXT: sc.w a5, a5, (a2)
+; RV32IA-NOZACAS-NEXT: bnez a5, .LBB120_1
+; RV32IA-NOZACAS-NEXT: # %bb.4:
+; RV32IA-NOZACAS-NEXT: srl a0, a3, a0
+; RV32IA-NOZACAS-NEXT: ret
;
; RV64I-LABEL: atomicrmw_umax_i16_monotonic:
; RV64I: # %bb.0:
@@ -17980,6 +23570,31 @@ define i16 @atomicrmw_umax_i16_monotonic(ptr %a, i16 %b) nounwind {
; RV64IA-NOZACAS-NEXT: srlw a0, a3, a0
; RV64IA-NOZACAS-NEXT: ret
;
+; RV32IA-ZACAS-LABEL: atomicrmw_umax_i16_monotonic:
+; RV32IA-ZACAS: # %bb.0:
+; RV32IA-ZACAS-NEXT: andi a2, a0, -4
+; RV32IA-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-ZACAS-NEXT: lui a3, 16
+; RV32IA-ZACAS-NEXT: addi a3, a3, -1
+; RV32IA-ZACAS-NEXT: sll a4, a3, a0
+; RV32IA-ZACAS-NEXT: and a1, a1, a3
+; RV32IA-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-ZACAS-NEXT: .LBB120_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-ZACAS-NEXT: lr.w a3, (a2)
+; RV32IA-ZACAS-NEXT: and a6, a3, a4
+; RV32IA-ZACAS-NEXT: mv a5, a3
+; RV32IA-ZACAS-NEXT: bgeu a6, a1, .LBB120_3
+; RV32IA-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB120_1 Depth=1
+; RV32IA-ZACAS-NEXT: xor a5, a3, a1
+; RV32IA-ZACAS-NEXT: and a5, a5, a4
+; RV32IA-ZACAS-NEXT: xor a5, a3, a5
+; RV32IA-ZACAS-NEXT: .LBB120_3: # in Loop: Header=BB120_1 Depth=1
+; RV32IA-ZACAS-NEXT: sc.w a5, a5, (a2)
+; RV32IA-ZACAS-NEXT: bnez a5, .LBB120_1
+; RV32IA-ZACAS-NEXT: # %bb.4:
+; RV32IA-ZACAS-NEXT: srl a0, a3, a0
+; RV32IA-ZACAS-NEXT: ret
+;
; RV64IA-ZACAS-LABEL: atomicrmw_umax_i16_monotonic:
; RV64IA-ZACAS: # %bb.0:
; RV64IA-ZACAS-NEXT: andi a2, a0, -4
@@ -18005,6 +23620,16 @@ define i16 @atomicrmw_umax_i16_monotonic(ptr %a, i16 %b) nounwind {
; RV64IA-ZACAS-NEXT: srlw a0, a3, a0
; RV64IA-ZACAS-NEXT: ret
;
+; RV32IA-WMO-ZABHA-LABEL: atomicrmw_umax_i16_monotonic:
+; RV32IA-WMO-ZABHA: # %bb.0:
+; RV32IA-WMO-ZABHA-NEXT: amomaxu.h a0, a1, (a0)
+; RV32IA-WMO-ZABHA-NEXT: ret
+;
+; RV32IA-TSO-ZABHA-LABEL: atomicrmw_umax_i16_monotonic:
+; RV32IA-TSO-ZABHA: # %bb.0:
+; RV32IA-TSO-ZABHA-NEXT: amomaxu.h a0, a1, (a0)
+; RV32IA-TSO-ZABHA-NEXT: ret
+;
; RV64IA-WMO-ZABHA-LABEL: atomicrmw_umax_i16_monotonic:
; RV64IA-WMO-ZABHA: # %bb.0:
; RV64IA-WMO-ZABHA-NEXT: amomaxu.h a0, a1, (a0)
@@ -18063,55 +23688,55 @@ define i16 @atomicrmw_umax_i16_acquire(ptr %a, i16 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 32
; RV32I-NEXT: ret
;
-; RV32IA-WMO-LABEL: atomicrmw_umax_i16_acquire:
-; RV32IA-WMO: # %bb.0:
-; RV32IA-WMO-NEXT: andi a2, a0, -4
-; RV32IA-WMO-NEXT: slli a0, a0, 3
-; RV32IA-WMO-NEXT: lui a3, 16
-; RV32IA-WMO-NEXT: addi a3, a3, -1
-; RV32IA-WMO-NEXT: sll a4, a3, a0
-; RV32IA-WMO-NEXT: and a1, a1, a3
-; RV32IA-WMO-NEXT: sll a1, a1, a0
-; RV32IA-WMO-NEXT: .LBB121_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-WMO-NEXT: lr.w.aq a3, (a2)
-; RV32IA-WMO-NEXT: and a6, a3, a4
-; RV32IA-WMO-NEXT: mv a5, a3
-; RV32IA-WMO-NEXT: bgeu a6, a1, .LBB121_3
-; RV32IA-WMO-NEXT: # %bb.2: # in Loop: Header=BB121_1 Depth=1
-; RV32IA-WMO-NEXT: xor a5, a3, a1
-; RV32IA-WMO-NEXT: and a5, a5, a4
-; RV32IA-WMO-NEXT: xor a5, a3, a5
-; RV32IA-WMO-NEXT: .LBB121_3: # in Loop: Header=BB121_1 Depth=1
-; RV32IA-WMO-NEXT: sc.w a5, a5, (a2)
-; RV32IA-WMO-NEXT: bnez a5, .LBB121_1
-; RV32IA-WMO-NEXT: # %bb.4:
-; RV32IA-WMO-NEXT: srl a0, a3, a0
-; RV32IA-WMO-NEXT: ret
+; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_umax_i16_acquire:
+; RV32IA-WMO-NOZACAS: # %bb.0:
+; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-WMO-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-NOZACAS-NEXT: lui a3, 16
+; RV32IA-WMO-NOZACAS-NEXT: addi a3, a3, -1
+; RV32IA-WMO-NOZACAS-NEXT: sll a4, a3, a0
+; RV32IA-WMO-NOZACAS-NEXT: and a1, a1, a3
+; RV32IA-WMO-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-WMO-NOZACAS-NEXT: .LBB121_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-NOZACAS-NEXT: lr.w.aq a3, (a2)
+; RV32IA-WMO-NOZACAS-NEXT: and a6, a3, a4
+; RV32IA-WMO-NOZACAS-NEXT: mv a5, a3
+; RV32IA-WMO-NOZACAS-NEXT: bgeu a6, a1, .LBB121_3
+; RV32IA-WMO-NOZACAS-NEXT: # %bb.2: # in Loop: Header=BB121_1 Depth=1
+; RV32IA-WMO-NOZACAS-NEXT: xor a5, a3, a1
+; RV32IA-WMO-NOZACAS-NEXT: and a5, a5, a4
+; RV32IA-WMO-NOZACAS-NEXT: xor a5, a3, a5
+; RV32IA-WMO-NOZACAS-NEXT: .LBB121_3: # in Loop: Header=BB121_1 Depth=1
+; RV32IA-WMO-NOZACAS-NEXT: sc.w a5, a5, (a2)
+; RV32IA-WMO-NOZACAS-NEXT: bnez a5, .LBB121_1
+; RV32IA-WMO-NOZACAS-NEXT: # %bb.4:
+; RV32IA-WMO-NOZACAS-NEXT: srl a0, a3, a0
+; RV32IA-WMO-NOZACAS-NEXT: ret
;
-; RV32IA-TSO-LABEL: atomicrmw_umax_i16_acquire:
-; RV32IA-TSO: # %bb.0:
-; RV32IA-TSO-NEXT: andi a2, a0, -4
-; RV32IA-TSO-NEXT: slli a0, a0, 3
-; RV32IA-TSO-NEXT: lui a3, 16
-; RV32IA-TSO-NEXT: addi a3, a3, -1
-; RV32IA-TSO-NEXT: sll a4, a3, a0
-; RV32IA-TSO-NEXT: and a1, a1, a3
-; RV32IA-TSO-NEXT: sll a1, a1, a0
-; RV32IA-TSO-NEXT: .LBB121_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-TSO-NEXT: lr.w a3, (a2)
-; RV32IA-TSO-NEXT: and a6, a3, a4
-; RV32IA-TSO-NEXT: mv a5, a3
-; RV32IA-TSO-NEXT: bgeu a6, a1, .LBB121_3
-; RV32IA-TSO-NEXT: # %bb.2: # in Loop: Header=BB121_1 Depth=1
-; RV32IA-TSO-NEXT: xor a5, a3, a1
-; RV32IA-TSO-NEXT: and a5, a5, a4
-; RV32IA-TSO-NEXT: xor a5, a3, a5
-; RV32IA-TSO-NEXT: .LBB121_3: # in Loop: Header=BB121_1 Depth=1
-; RV32IA-TSO-NEXT: sc.w a5, a5, (a2)
-; RV32IA-TSO-NEXT: bnez a5, .LBB121_1
-; RV32IA-TSO-NEXT: # %bb.4:
-; RV32IA-TSO-NEXT: srl a0, a3, a0
-; RV32IA-TSO-NEXT: ret
+; RV32IA-TSO-NOZACAS-LABEL: atomicrmw_umax_i16_acquire:
+; RV32IA-TSO-NOZACAS: # %bb.0:
+; RV32IA-TSO-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-TSO-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-NOZACAS-NEXT: lui a3, 16
+; RV32IA-TSO-NOZACAS-NEXT: addi a3, a3, -1
+; RV32IA-TSO-NOZACAS-NEXT: sll a4, a3, a0
+; RV32IA-TSO-NOZACAS-NEXT: and a1, a1, a3
+; RV32IA-TSO-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-TSO-NOZACAS-NEXT: .LBB121_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-NOZACAS-NEXT: lr.w a3, (a2)
+; RV32IA-TSO-NOZACAS-NEXT: and a6, a3, a4
+; RV32IA-TSO-NOZACAS-NEXT: mv a5, a3
+; RV32IA-TSO-NOZACAS-NEXT: bgeu a6, a1, .LBB121_3
+; RV32IA-TSO-NOZACAS-NEXT: # %bb.2: # in Loop: Header=BB121_1 Depth=1
+; RV32IA-TSO-NOZACAS-NEXT: xor a5, a3, a1
+; RV32IA-TSO-NOZACAS-NEXT: and a5, a5, a4
+; RV32IA-TSO-NOZACAS-NEXT: xor a5, a3, a5
+; RV32IA-TSO-NOZACAS-NEXT: .LBB121_3: # in Loop: Header=BB121_1 Depth=1
+; RV32IA-TSO-NOZACAS-NEXT: sc.w a5, a5, (a2)
+; RV32IA-TSO-NOZACAS-NEXT: bnez a5, .LBB121_1
+; RV32IA-TSO-NOZACAS-NEXT: # %bb.4:
+; RV32IA-TSO-NOZACAS-NEXT: srl a0, a3, a0
+; RV32IA-TSO-NOZACAS-NEXT: ret
;
; RV64I-LABEL: atomicrmw_umax_i16_acquire:
; RV64I: # %bb.0:
@@ -18207,6 +23832,56 @@ define i16 @atomicrmw_umax_i16_acquire(ptr %a, i16 %b) nounwind {
; RV64IA-TSO-NOZACAS-NEXT: srlw a0, a3, a0
; RV64IA-TSO-NOZACAS-NEXT: ret
;
+; RV32IA-WMO-ZACAS-LABEL: atomicrmw_umax_i16_acquire:
+; RV32IA-WMO-ZACAS: # %bb.0:
+; RV32IA-WMO-ZACAS-NEXT: andi a2, a0, -4
+; RV32IA-WMO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-ZACAS-NEXT: lui a3, 16
+; RV32IA-WMO-ZACAS-NEXT: addi a3, a3, -1
+; RV32IA-WMO-ZACAS-NEXT: sll a4, a3, a0
+; RV32IA-WMO-ZACAS-NEXT: and a1, a1, a3
+; RV32IA-WMO-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-WMO-ZACAS-NEXT: .LBB121_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-ZACAS-NEXT: lr.w.aq a3, (a2)
+; RV32IA-WMO-ZACAS-NEXT: and a6, a3, a4
+; RV32IA-WMO-ZACAS-NEXT: mv a5, a3
+; RV32IA-WMO-ZACAS-NEXT: bgeu a6, a1, .LBB121_3
+; RV32IA-WMO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB121_1 Depth=1
+; RV32IA-WMO-ZACAS-NEXT: xor a5, a3, a1
+; RV32IA-WMO-ZACAS-NEXT: and a5, a5, a4
+; RV32IA-WMO-ZACAS-NEXT: xor a5, a3, a5
+; RV32IA-WMO-ZACAS-NEXT: .LBB121_3: # in Loop: Header=BB121_1 Depth=1
+; RV32IA-WMO-ZACAS-NEXT: sc.w a5, a5, (a2)
+; RV32IA-WMO-ZACAS-NEXT: bnez a5, .LBB121_1
+; RV32IA-WMO-ZACAS-NEXT: # %bb.4:
+; RV32IA-WMO-ZACAS-NEXT: srl a0, a3, a0
+; RV32IA-WMO-ZACAS-NEXT: ret
+;
+; RV32IA-TSO-ZACAS-LABEL: atomicrmw_umax_i16_acquire:
+; RV32IA-TSO-ZACAS: # %bb.0:
+; RV32IA-TSO-ZACAS-NEXT: andi a2, a0, -4
+; RV32IA-TSO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-ZACAS-NEXT: lui a3, 16
+; RV32IA-TSO-ZACAS-NEXT: addi a3, a3, -1
+; RV32IA-TSO-ZACAS-NEXT: sll a4, a3, a0
+; RV32IA-TSO-ZACAS-NEXT: and a1, a1, a3
+; RV32IA-TSO-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-TSO-ZACAS-NEXT: .LBB121_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-ZACAS-NEXT: lr.w a3, (a2)
+; RV32IA-TSO-ZACAS-NEXT: and a6, a3, a4
+; RV32IA-TSO-ZACAS-NEXT: mv a5, a3
+; RV32IA-TSO-ZACAS-NEXT: bgeu a6, a1, .LBB121_3
+; RV32IA-TSO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB121_1 Depth=1
+; RV32IA-TSO-ZACAS-NEXT: xor a5, a3, a1
+; RV32IA-TSO-ZACAS-NEXT: and a5, a5, a4
+; RV32IA-TSO-ZACAS-NEXT: xor a5, a3, a5
+; RV32IA-TSO-ZACAS-NEXT: .LBB121_3: # in Loop: Header=BB121_1 Depth=1
+; RV32IA-TSO-ZACAS-NEXT: sc.w a5, a5, (a2)
+; RV32IA-TSO-ZACAS-NEXT: bnez a5, .LBB121_1
+; RV32IA-TSO-ZACAS-NEXT: # %bb.4:
+; RV32IA-TSO-ZACAS-NEXT: srl a0, a3, a0
+; RV32IA-TSO-ZACAS-NEXT: ret
+;
; RV64IA-WMO-ZACAS-LABEL: atomicrmw_umax_i16_acquire:
; RV64IA-WMO-ZACAS: # %bb.0:
; RV64IA-WMO-ZACAS-NEXT: andi a2, a0, -4
@@ -18257,6 +23932,16 @@ define i16 @atomicrmw_umax_i16_acquire(ptr %a, i16 %b) nounwind {
; RV64IA-TSO-ZACAS-NEXT: srlw a0, a3, a0
; RV64IA-TSO-ZACAS-NEXT: ret
;
+; RV32IA-WMO-ZABHA-LABEL: atomicrmw_umax_i16_acquire:
+; RV32IA-WMO-ZABHA: # %bb.0:
+; RV32IA-WMO-ZABHA-NEXT: amomaxu.h.aq a0, a1, (a0)
+; RV32IA-WMO-ZABHA-NEXT: ret
+;
+; RV32IA-TSO-ZABHA-LABEL: atomicrmw_umax_i16_acquire:
+; RV32IA-TSO-ZABHA: # %bb.0:
+; RV32IA-TSO-ZABHA-NEXT: amomaxu.h a0, a1, (a0)
+; RV32IA-TSO-ZABHA-NEXT: ret
+;
; RV64IA-WMO-ZABHA-LABEL: atomicrmw_umax_i16_acquire:
; RV64IA-WMO-ZABHA: # %bb.0:
; RV64IA-WMO-ZABHA-NEXT: amomaxu.h.aq a0, a1, (a0)
@@ -18315,55 +24000,55 @@ define i16 @atomicrmw_umax_i16_release(ptr %a, i16 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 32
; RV32I-NEXT: ret
;
-; RV32IA-WMO-LABEL: atomicrmw_umax_i16_release:
-; RV32IA-WMO: # %bb.0:
-; RV32IA-WMO-NEXT: andi a2, a0, -4
-; RV32IA-WMO-NEXT: slli a0, a0, 3
-; RV32IA-WMO-NEXT: lui a3, 16
-; RV32IA-WMO-NEXT: addi a3, a3, -1
-; RV32IA-WMO-NEXT: sll a4, a3, a0
-; RV32IA-WMO-NEXT: and a1, a1, a3
-; RV32IA-WMO-NEXT: sll a1, a1, a0
-; RV32IA-WMO-NEXT: .LBB122_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-WMO-NEXT: lr.w a3, (a2)
-; RV32IA-WMO-NEXT: and a6, a3, a4
-; RV32IA-WMO-NEXT: mv a5, a3
-; RV32IA-WMO-NEXT: bgeu a6, a1, .LBB122_3
-; RV32IA-WMO-NEXT: # %bb.2: # in Loop: Header=BB122_1 Depth=1
-; RV32IA-WMO-NEXT: xor a5, a3, a1
-; RV32IA-WMO-NEXT: and a5, a5, a4
-; RV32IA-WMO-NEXT: xor a5, a3, a5
-; RV32IA-WMO-NEXT: .LBB122_3: # in Loop: Header=BB122_1 Depth=1
-; RV32IA-WMO-NEXT: sc.w.rl a5, a5, (a2)
-; RV32IA-WMO-NEXT: bnez a5, .LBB122_1
-; RV32IA-WMO-NEXT: # %bb.4:
-; RV32IA-WMO-NEXT: srl a0, a3, a0
-; RV32IA-WMO-NEXT: ret
+; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_umax_i16_release:
+; RV32IA-WMO-NOZACAS: # %bb.0:
+; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-WMO-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-NOZACAS-NEXT: lui a3, 16
+; RV32IA-WMO-NOZACAS-NEXT: addi a3, a3, -1
+; RV32IA-WMO-NOZACAS-NEXT: sll a4, a3, a0
+; RV32IA-WMO-NOZACAS-NEXT: and a1, a1, a3
+; RV32IA-WMO-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-WMO-NOZACAS-NEXT: .LBB122_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-NOZACAS-NEXT: lr.w a3, (a2)
+; RV32IA-WMO-NOZACAS-NEXT: and a6, a3, a4
+; RV32IA-WMO-NOZACAS-NEXT: mv a5, a3
+; RV32IA-WMO-NOZACAS-NEXT: bgeu a6, a1, .LBB122_3
+; RV32IA-WMO-NOZACAS-NEXT: # %bb.2: # in Loop: Header=BB122_1 Depth=1
+; RV32IA-WMO-NOZACAS-NEXT: xor a5, a3, a1
+; RV32IA-WMO-NOZACAS-NEXT: and a5, a5, a4
+; RV32IA-WMO-NOZACAS-NEXT: xor a5, a3, a5
+; RV32IA-WMO-NOZACAS-NEXT: .LBB122_3: # in Loop: Header=BB122_1 Depth=1
+; RV32IA-WMO-NOZACAS-NEXT: sc.w.rl a5, a5, (a2)
+; RV32IA-WMO-NOZACAS-NEXT: bnez a5, .LBB122_1
+; RV32IA-WMO-NOZACAS-NEXT: # %bb.4:
+; RV32IA-WMO-NOZACAS-NEXT: srl a0, a3, a0
+; RV32IA-WMO-NOZACAS-NEXT: ret
;
-; RV32IA-TSO-LABEL: atomicrmw_umax_i16_release:
-; RV32IA-TSO: # %bb.0:
-; RV32IA-TSO-NEXT: andi a2, a0, -4
-; RV32IA-TSO-NEXT: slli a0, a0, 3
-; RV32IA-TSO-NEXT: lui a3, 16
-; RV32IA-TSO-NEXT: addi a3, a3, -1
-; RV32IA-TSO-NEXT: sll a4, a3, a0
-; RV32IA-TSO-NEXT: and a1, a1, a3
-; RV32IA-TSO-NEXT: sll a1, a1, a0
-; RV32IA-TSO-NEXT: .LBB122_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-TSO-NEXT: lr.w a3, (a2)
-; RV32IA-TSO-NEXT: and a6, a3, a4
-; RV32IA-TSO-NEXT: mv a5, a3
-; RV32IA-TSO-NEXT: bgeu a6, a1, .LBB122_3
-; RV32IA-TSO-NEXT: # %bb.2: # in Loop: Header=BB122_1 Depth=1
-; RV32IA-TSO-NEXT: xor a5, a3, a1
-; RV32IA-TSO-NEXT: and a5, a5, a4
-; RV32IA-TSO-NEXT: xor a5, a3, a5
-; RV32IA-TSO-NEXT: .LBB122_3: # in Loop: Header=BB122_1 Depth=1
-; RV32IA-TSO-NEXT: sc.w a5, a5, (a2)
-; RV32IA-TSO-NEXT: bnez a5, .LBB122_1
-; RV32IA-TSO-NEXT: # %bb.4:
-; RV32IA-TSO-NEXT: srl a0, a3, a0
-; RV32IA-TSO-NEXT: ret
+; RV32IA-TSO-NOZACAS-LABEL: atomicrmw_umax_i16_release:
+; RV32IA-TSO-NOZACAS: # %bb.0:
+; RV32IA-TSO-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-TSO-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-NOZACAS-NEXT: lui a3, 16
+; RV32IA-TSO-NOZACAS-NEXT: addi a3, a3, -1
+; RV32IA-TSO-NOZACAS-NEXT: sll a4, a3, a0
+; RV32IA-TSO-NOZACAS-NEXT: and a1, a1, a3
+; RV32IA-TSO-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-TSO-NOZACAS-NEXT: .LBB122_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-NOZACAS-NEXT: lr.w a3, (a2)
+; RV32IA-TSO-NOZACAS-NEXT: and a6, a3, a4
+; RV32IA-TSO-NOZACAS-NEXT: mv a5, a3
+; RV32IA-TSO-NOZACAS-NEXT: bgeu a6, a1, .LBB122_3
+; RV32IA-TSO-NOZACAS-NEXT: # %bb.2: # in Loop: Header=BB122_1 Depth=1
+; RV32IA-TSO-NOZACAS-NEXT: xor a5, a3, a1
+; RV32IA-TSO-NOZACAS-NEXT: and a5, a5, a4
+; RV32IA-TSO-NOZACAS-NEXT: xor a5, a3, a5
+; RV32IA-TSO-NOZACAS-NEXT: .LBB122_3: # in Loop: Header=BB122_1 Depth=1
+; RV32IA-TSO-NOZACAS-NEXT: sc.w a5, a5, (a2)
+; RV32IA-TSO-NOZACAS-NEXT: bnez a5, .LBB122_1
+; RV32IA-TSO-NOZACAS-NEXT: # %bb.4:
+; RV32IA-TSO-NOZACAS-NEXT: srl a0, a3, a0
+; RV32IA-TSO-NOZACAS-NEXT: ret
;
; RV64I-LABEL: atomicrmw_umax_i16_release:
; RV64I: # %bb.0:
@@ -18459,6 +24144,56 @@ define i16 @atomicrmw_umax_i16_release(ptr %a, i16 %b) nounwind {
; RV64IA-TSO-NOZACAS-NEXT: srlw a0, a3, a0
; RV64IA-TSO-NOZACAS-NEXT: ret
;
+; RV32IA-WMO-ZACAS-LABEL: atomicrmw_umax_i16_release:
+; RV32IA-WMO-ZACAS: # %bb.0:
+; RV32IA-WMO-ZACAS-NEXT: andi a2, a0, -4
+; RV32IA-WMO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-ZACAS-NEXT: lui a3, 16
+; RV32IA-WMO-ZACAS-NEXT: addi a3, a3, -1
+; RV32IA-WMO-ZACAS-NEXT: sll a4, a3, a0
+; RV32IA-WMO-ZACAS-NEXT: and a1, a1, a3
+; RV32IA-WMO-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-WMO-ZACAS-NEXT: .LBB122_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-ZACAS-NEXT: lr.w a3, (a2)
+; RV32IA-WMO-ZACAS-NEXT: and a6, a3, a4
+; RV32IA-WMO-ZACAS-NEXT: mv a5, a3
+; RV32IA-WMO-ZACAS-NEXT: bgeu a6, a1, .LBB122_3
+; RV32IA-WMO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB122_1 Depth=1
+; RV32IA-WMO-ZACAS-NEXT: xor a5, a3, a1
+; RV32IA-WMO-ZACAS-NEXT: and a5, a5, a4
+; RV32IA-WMO-ZACAS-NEXT: xor a5, a3, a5
+; RV32IA-WMO-ZACAS-NEXT: .LBB122_3: # in Loop: Header=BB122_1 Depth=1
+; RV32IA-WMO-ZACAS-NEXT: sc.w.rl a5, a5, (a2)
+; RV32IA-WMO-ZACAS-NEXT: bnez a5, .LBB122_1
+; RV32IA-WMO-ZACAS-NEXT: # %bb.4:
+; RV32IA-WMO-ZACAS-NEXT: srl a0, a3, a0
+; RV32IA-WMO-ZACAS-NEXT: ret
+;
+; RV32IA-TSO-ZACAS-LABEL: atomicrmw_umax_i16_release:
+; RV32IA-TSO-ZACAS: # %bb.0:
+; RV32IA-TSO-ZACAS-NEXT: andi a2, a0, -4
+; RV32IA-TSO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-ZACAS-NEXT: lui a3, 16
+; RV32IA-TSO-ZACAS-NEXT: addi a3, a3, -1
+; RV32IA-TSO-ZACAS-NEXT: sll a4, a3, a0
+; RV32IA-TSO-ZACAS-NEXT: and a1, a1, a3
+; RV32IA-TSO-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-TSO-ZACAS-NEXT: .LBB122_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-ZACAS-NEXT: lr.w a3, (a2)
+; RV32IA-TSO-ZACAS-NEXT: and a6, a3, a4
+; RV32IA-TSO-ZACAS-NEXT: mv a5, a3
+; RV32IA-TSO-ZACAS-NEXT: bgeu a6, a1, .LBB122_3
+; RV32IA-TSO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB122_1 Depth=1
+; RV32IA-TSO-ZACAS-NEXT: xor a5, a3, a1
+; RV32IA-TSO-ZACAS-NEXT: and a5, a5, a4
+; RV32IA-TSO-ZACAS-NEXT: xor a5, a3, a5
+; RV32IA-TSO-ZACAS-NEXT: .LBB122_3: # in Loop: Header=BB122_1 Depth=1
+; RV32IA-TSO-ZACAS-NEXT: sc.w a5, a5, (a2)
+; RV32IA-TSO-ZACAS-NEXT: bnez a5, .LBB122_1
+; RV32IA-TSO-ZACAS-NEXT: # %bb.4:
+; RV32IA-TSO-ZACAS-NEXT: srl a0, a3, a0
+; RV32IA-TSO-ZACAS-NEXT: ret
+;
; RV64IA-WMO-ZACAS-LABEL: atomicrmw_umax_i16_release:
; RV64IA-WMO-ZACAS: # %bb.0:
; RV64IA-WMO-ZACAS-NEXT: andi a2, a0, -4
@@ -18509,6 +24244,16 @@ define i16 @atomicrmw_umax_i16_release(ptr %a, i16 %b) nounwind {
; RV64IA-TSO-ZACAS-NEXT: srlw a0, a3, a0
; RV64IA-TSO-ZACAS-NEXT: ret
;
+; RV32IA-WMO-ZABHA-LABEL: atomicrmw_umax_i16_release:
+; RV32IA-WMO-ZABHA: # %bb.0:
+; RV32IA-WMO-ZABHA-NEXT: amomaxu.h.rl a0, a1, (a0)
+; RV32IA-WMO-ZABHA-NEXT: ret
+;
+; RV32IA-TSO-ZABHA-LABEL: atomicrmw_umax_i16_release:
+; RV32IA-TSO-ZABHA: # %bb.0:
+; RV32IA-TSO-ZABHA-NEXT: amomaxu.h a0, a1, (a0)
+; RV32IA-TSO-ZABHA-NEXT: ret
+;
; RV64IA-WMO-ZABHA-LABEL: atomicrmw_umax_i16_release:
; RV64IA-WMO-ZABHA: # %bb.0:
; RV64IA-WMO-ZABHA-NEXT: amomaxu.h.rl a0, a1, (a0)
@@ -18567,55 +24312,55 @@ define i16 @atomicrmw_umax_i16_acq_rel(ptr %a, i16 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 32
; RV32I-NEXT: ret
;
-; RV32IA-WMO-LABEL: atomicrmw_umax_i16_acq_rel:
-; RV32IA-WMO: # %bb.0:
-; RV32IA-WMO-NEXT: andi a2, a0, -4
-; RV32IA-WMO-NEXT: slli a0, a0, 3
-; RV32IA-WMO-NEXT: lui a3, 16
-; RV32IA-WMO-NEXT: addi a3, a3, -1
-; RV32IA-WMO-NEXT: sll a4, a3, a0
-; RV32IA-WMO-NEXT: and a1, a1, a3
-; RV32IA-WMO-NEXT: sll a1, a1, a0
-; RV32IA-WMO-NEXT: .LBB123_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-WMO-NEXT: lr.w.aq a3, (a2)
-; RV32IA-WMO-NEXT: and a6, a3, a4
-; RV32IA-WMO-NEXT: mv a5, a3
-; RV32IA-WMO-NEXT: bgeu a6, a1, .LBB123_3
-; RV32IA-WMO-NEXT: # %bb.2: # in Loop: Header=BB123_1 Depth=1
-; RV32IA-WMO-NEXT: xor a5, a3, a1
-; RV32IA-WMO-NEXT: and a5, a5, a4
-; RV32IA-WMO-NEXT: xor a5, a3, a5
-; RV32IA-WMO-NEXT: .LBB123_3: # in Loop: Header=BB123_1 Depth=1
-; RV32IA-WMO-NEXT: sc.w.rl a5, a5, (a2)
-; RV32IA-WMO-NEXT: bnez a5, .LBB123_1
-; RV32IA-WMO-NEXT: # %bb.4:
-; RV32IA-WMO-NEXT: srl a0, a3, a0
-; RV32IA-WMO-NEXT: ret
+; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_umax_i16_acq_rel:
+; RV32IA-WMO-NOZACAS: # %bb.0:
+; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-WMO-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-NOZACAS-NEXT: lui a3, 16
+; RV32IA-WMO-NOZACAS-NEXT: addi a3, a3, -1
+; RV32IA-WMO-NOZACAS-NEXT: sll a4, a3, a0
+; RV32IA-WMO-NOZACAS-NEXT: and a1, a1, a3
+; RV32IA-WMO-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-WMO-NOZACAS-NEXT: .LBB123_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-NOZACAS-NEXT: lr.w.aq a3, (a2)
+; RV32IA-WMO-NOZACAS-NEXT: and a6, a3, a4
+; RV32IA-WMO-NOZACAS-NEXT: mv a5, a3
+; RV32IA-WMO-NOZACAS-NEXT: bgeu a6, a1, .LBB123_3
+; RV32IA-WMO-NOZACAS-NEXT: # %bb.2: # in Loop: Header=BB123_1 Depth=1
+; RV32IA-WMO-NOZACAS-NEXT: xor a5, a3, a1
+; RV32IA-WMO-NOZACAS-NEXT: and a5, a5, a4
+; RV32IA-WMO-NOZACAS-NEXT: xor a5, a3, a5
+; RV32IA-WMO-NOZACAS-NEXT: .LBB123_3: # in Loop: Header=BB123_1 Depth=1
+; RV32IA-WMO-NOZACAS-NEXT: sc.w.rl a5, a5, (a2)
+; RV32IA-WMO-NOZACAS-NEXT: bnez a5, .LBB123_1
+; RV32IA-WMO-NOZACAS-NEXT: # %bb.4:
+; RV32IA-WMO-NOZACAS-NEXT: srl a0, a3, a0
+; RV32IA-WMO-NOZACAS-NEXT: ret
;
-; RV32IA-TSO-LABEL: atomicrmw_umax_i16_acq_rel:
-; RV32IA-TSO: # %bb.0:
-; RV32IA-TSO-NEXT: andi a2, a0, -4
-; RV32IA-TSO-NEXT: slli a0, a0, 3
-; RV32IA-TSO-NEXT: lui a3, 16
-; RV32IA-TSO-NEXT: addi a3, a3, -1
-; RV32IA-TSO-NEXT: sll a4, a3, a0
-; RV32IA-TSO-NEXT: and a1, a1, a3
-; RV32IA-TSO-NEXT: sll a1, a1, a0
-; RV32IA-TSO-NEXT: .LBB123_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-TSO-NEXT: lr.w a3, (a2)
-; RV32IA-TSO-NEXT: and a6, a3, a4
-; RV32IA-TSO-NEXT: mv a5, a3
-; RV32IA-TSO-NEXT: bgeu a6, a1, .LBB123_3
-; RV32IA-TSO-NEXT: # %bb.2: # in Loop: Header=BB123_1 Depth=1
-; RV32IA-TSO-NEXT: xor a5, a3, a1
-; RV32IA-TSO-NEXT: and a5, a5, a4
-; RV32IA-TSO-NEXT: xor a5, a3, a5
-; RV32IA-TSO-NEXT: .LBB123_3: # in Loop: Header=BB123_1 Depth=1
-; RV32IA-TSO-NEXT: sc.w a5, a5, (a2)
-; RV32IA-TSO-NEXT: bnez a5, .LBB123_1
-; RV32IA-TSO-NEXT: # %bb.4:
-; RV32IA-TSO-NEXT: srl a0, a3, a0
-; RV32IA-TSO-NEXT: ret
+; RV32IA-TSO-NOZACAS-LABEL: atomicrmw_umax_i16_acq_rel:
+; RV32IA-TSO-NOZACAS: # %bb.0:
+; RV32IA-TSO-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-TSO-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-NOZACAS-NEXT: lui a3, 16
+; RV32IA-TSO-NOZACAS-NEXT: addi a3, a3, -1
+; RV32IA-TSO-NOZACAS-NEXT: sll a4, a3, a0
+; RV32IA-TSO-NOZACAS-NEXT: and a1, a1, a3
+; RV32IA-TSO-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-TSO-NOZACAS-NEXT: .LBB123_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-NOZACAS-NEXT: lr.w a3, (a2)
+; RV32IA-TSO-NOZACAS-NEXT: and a6, a3, a4
+; RV32IA-TSO-NOZACAS-NEXT: mv a5, a3
+; RV32IA-TSO-NOZACAS-NEXT: bgeu a6, a1, .LBB123_3
+; RV32IA-TSO-NOZACAS-NEXT: # %bb.2: # in Loop: Header=BB123_1 Depth=1
+; RV32IA-TSO-NOZACAS-NEXT: xor a5, a3, a1
+; RV32IA-TSO-NOZACAS-NEXT: and a5, a5, a4
+; RV32IA-TSO-NOZACAS-NEXT: xor a5, a3, a5
+; RV32IA-TSO-NOZACAS-NEXT: .LBB123_3: # in Loop: Header=BB123_1 Depth=1
+; RV32IA-TSO-NOZACAS-NEXT: sc.w a5, a5, (a2)
+; RV32IA-TSO-NOZACAS-NEXT: bnez a5, .LBB123_1
+; RV32IA-TSO-NOZACAS-NEXT: # %bb.4:
+; RV32IA-TSO-NOZACAS-NEXT: srl a0, a3, a0
+; RV32IA-TSO-NOZACAS-NEXT: ret
;
; RV64I-LABEL: atomicrmw_umax_i16_acq_rel:
; RV64I: # %bb.0:
@@ -18711,6 +24456,56 @@ define i16 @atomicrmw_umax_i16_acq_rel(ptr %a, i16 %b) nounwind {
; RV64IA-TSO-NOZACAS-NEXT: srlw a0, a3, a0
; RV64IA-TSO-NOZACAS-NEXT: ret
;
+; RV32IA-WMO-ZACAS-LABEL: atomicrmw_umax_i16_acq_rel:
+; RV32IA-WMO-ZACAS: # %bb.0:
+; RV32IA-WMO-ZACAS-NEXT: andi a2, a0, -4
+; RV32IA-WMO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-ZACAS-NEXT: lui a3, 16
+; RV32IA-WMO-ZACAS-NEXT: addi a3, a3, -1
+; RV32IA-WMO-ZACAS-NEXT: sll a4, a3, a0
+; RV32IA-WMO-ZACAS-NEXT: and a1, a1, a3
+; RV32IA-WMO-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-WMO-ZACAS-NEXT: .LBB123_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-ZACAS-NEXT: lr.w.aq a3, (a2)
+; RV32IA-WMO-ZACAS-NEXT: and a6, a3, a4
+; RV32IA-WMO-ZACAS-NEXT: mv a5, a3
+; RV32IA-WMO-ZACAS-NEXT: bgeu a6, a1, .LBB123_3
+; RV32IA-WMO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB123_1 Depth=1
+; RV32IA-WMO-ZACAS-NEXT: xor a5, a3, a1
+; RV32IA-WMO-ZACAS-NEXT: and a5, a5, a4
+; RV32IA-WMO-ZACAS-NEXT: xor a5, a3, a5
+; RV32IA-WMO-ZACAS-NEXT: .LBB123_3: # in Loop: Header=BB123_1 Depth=1
+; RV32IA-WMO-ZACAS-NEXT: sc.w.rl a5, a5, (a2)
+; RV32IA-WMO-ZACAS-NEXT: bnez a5, .LBB123_1
+; RV32IA-WMO-ZACAS-NEXT: # %bb.4:
+; RV32IA-WMO-ZACAS-NEXT: srl a0, a3, a0
+; RV32IA-WMO-ZACAS-NEXT: ret
+;
+; RV32IA-TSO-ZACAS-LABEL: atomicrmw_umax_i16_acq_rel:
+; RV32IA-TSO-ZACAS: # %bb.0:
+; RV32IA-TSO-ZACAS-NEXT: andi a2, a0, -4
+; RV32IA-TSO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-ZACAS-NEXT: lui a3, 16
+; RV32IA-TSO-ZACAS-NEXT: addi a3, a3, -1
+; RV32IA-TSO-ZACAS-NEXT: sll a4, a3, a0
+; RV32IA-TSO-ZACAS-NEXT: and a1, a1, a3
+; RV32IA-TSO-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-TSO-ZACAS-NEXT: .LBB123_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-ZACAS-NEXT: lr.w a3, (a2)
+; RV32IA-TSO-ZACAS-NEXT: and a6, a3, a4
+; RV32IA-TSO-ZACAS-NEXT: mv a5, a3
+; RV32IA-TSO-ZACAS-NEXT: bgeu a6, a1, .LBB123_3
+; RV32IA-TSO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB123_1 Depth=1
+; RV32IA-TSO-ZACAS-NEXT: xor a5, a3, a1
+; RV32IA-TSO-ZACAS-NEXT: and a5, a5, a4
+; RV32IA-TSO-ZACAS-NEXT: xor a5, a3, a5
+; RV32IA-TSO-ZACAS-NEXT: .LBB123_3: # in Loop: Header=BB123_1 Depth=1
+; RV32IA-TSO-ZACAS-NEXT: sc.w a5, a5, (a2)
+; RV32IA-TSO-ZACAS-NEXT: bnez a5, .LBB123_1
+; RV32IA-TSO-ZACAS-NEXT: # %bb.4:
+; RV32IA-TSO-ZACAS-NEXT: srl a0, a3, a0
+; RV32IA-TSO-ZACAS-NEXT: ret
+;
; RV64IA-WMO-ZACAS-LABEL: atomicrmw_umax_i16_acq_rel:
; RV64IA-WMO-ZACAS: # %bb.0:
; RV64IA-WMO-ZACAS-NEXT: andi a2, a0, -4
@@ -18761,6 +24556,16 @@ define i16 @atomicrmw_umax_i16_acq_rel(ptr %a, i16 %b) nounwind {
; RV64IA-TSO-ZACAS-NEXT: srlw a0, a3, a0
; RV64IA-TSO-ZACAS-NEXT: ret
;
+; RV32IA-WMO-ZABHA-LABEL: atomicrmw_umax_i16_acq_rel:
+; RV32IA-WMO-ZABHA: # %bb.0:
+; RV32IA-WMO-ZABHA-NEXT: amomaxu.h.aqrl a0, a1, (a0)
+; RV32IA-WMO-ZABHA-NEXT: ret
+;
+; RV32IA-TSO-ZABHA-LABEL: atomicrmw_umax_i16_acq_rel:
+; RV32IA-TSO-ZABHA: # %bb.0:
+; RV32IA-TSO-ZABHA-NEXT: amomaxu.h a0, a1, (a0)
+; RV32IA-TSO-ZABHA-NEXT: ret
+;
; RV64IA-WMO-ZABHA-LABEL: atomicrmw_umax_i16_acq_rel:
; RV64IA-WMO-ZABHA: # %bb.0:
; RV64IA-WMO-ZABHA-NEXT: amomaxu.h.aqrl a0, a1, (a0)
@@ -18819,30 +24624,30 @@ define i16 @atomicrmw_umax_i16_seq_cst(ptr %a, i16 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 32
; RV32I-NEXT: ret
;
-; RV32IA-LABEL: atomicrmw_umax_i16_seq_cst:
-; RV32IA: # %bb.0:
-; RV32IA-NEXT: andi a2, a0, -4
-; RV32IA-NEXT: slli a0, a0, 3
-; RV32IA-NEXT: lui a3, 16
-; RV32IA-NEXT: addi a3, a3, -1
-; RV32IA-NEXT: sll a4, a3, a0
-; RV32IA-NEXT: and a1, a1, a3
-; RV32IA-NEXT: sll a1, a1, a0
-; RV32IA-NEXT: .LBB124_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-NEXT: lr.w.aqrl a3, (a2)
-; RV32IA-NEXT: and a6, a3, a4
-; RV32IA-NEXT: mv a5, a3
-; RV32IA-NEXT: bgeu a6, a1, .LBB124_3
-; RV32IA-NEXT: # %bb.2: # in Loop: Header=BB124_1 Depth=1
-; RV32IA-NEXT: xor a5, a3, a1
-; RV32IA-NEXT: and a5, a5, a4
-; RV32IA-NEXT: xor a5, a3, a5
-; RV32IA-NEXT: .LBB124_3: # in Loop: Header=BB124_1 Depth=1
-; RV32IA-NEXT: sc.w.rl a5, a5, (a2)
-; RV32IA-NEXT: bnez a5, .LBB124_1
-; RV32IA-NEXT: # %bb.4:
-; RV32IA-NEXT: srl a0, a3, a0
-; RV32IA-NEXT: ret
+; RV32IA-NOZACAS-LABEL: atomicrmw_umax_i16_seq_cst:
+; RV32IA-NOZACAS: # %bb.0:
+; RV32IA-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-NOZACAS-NEXT: lui a3, 16
+; RV32IA-NOZACAS-NEXT: addi a3, a3, -1
+; RV32IA-NOZACAS-NEXT: sll a4, a3, a0
+; RV32IA-NOZACAS-NEXT: and a1, a1, a3
+; RV32IA-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-NOZACAS-NEXT: .LBB124_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-NOZACAS-NEXT: lr.w.aqrl a3, (a2)
+; RV32IA-NOZACAS-NEXT: and a6, a3, a4
+; RV32IA-NOZACAS-NEXT: mv a5, a3
+; RV32IA-NOZACAS-NEXT: bgeu a6, a1, .LBB124_3
+; RV32IA-NOZACAS-NEXT: # %bb.2: # in Loop: Header=BB124_1 Depth=1
+; RV32IA-NOZACAS-NEXT: xor a5, a3, a1
+; RV32IA-NOZACAS-NEXT: and a5, a5, a4
+; RV32IA-NOZACAS-NEXT: xor a5, a3, a5
+; RV32IA-NOZACAS-NEXT: .LBB124_3: # in Loop: Header=BB124_1 Depth=1
+; RV32IA-NOZACAS-NEXT: sc.w.rl a5, a5, (a2)
+; RV32IA-NOZACAS-NEXT: bnez a5, .LBB124_1
+; RV32IA-NOZACAS-NEXT: # %bb.4:
+; RV32IA-NOZACAS-NEXT: srl a0, a3, a0
+; RV32IA-NOZACAS-NEXT: ret
;
; RV64I-LABEL: atomicrmw_umax_i16_seq_cst:
; RV64I: # %bb.0:
@@ -18913,6 +24718,31 @@ define i16 @atomicrmw_umax_i16_seq_cst(ptr %a, i16 %b) nounwind {
; RV64IA-NOZACAS-NEXT: srlw a0, a3, a0
; RV64IA-NOZACAS-NEXT: ret
;
+; RV32IA-ZACAS-LABEL: atomicrmw_umax_i16_seq_cst:
+; RV32IA-ZACAS: # %bb.0:
+; RV32IA-ZACAS-NEXT: andi a2, a0, -4
+; RV32IA-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-ZACAS-NEXT: lui a3, 16
+; RV32IA-ZACAS-NEXT: addi a3, a3, -1
+; RV32IA-ZACAS-NEXT: sll a4, a3, a0
+; RV32IA-ZACAS-NEXT: and a1, a1, a3
+; RV32IA-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-ZACAS-NEXT: .LBB124_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-ZACAS-NEXT: lr.w.aqrl a3, (a2)
+; RV32IA-ZACAS-NEXT: and a6, a3, a4
+; RV32IA-ZACAS-NEXT: mv a5, a3
+; RV32IA-ZACAS-NEXT: bgeu a6, a1, .LBB124_3
+; RV32IA-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB124_1 Depth=1
+; RV32IA-ZACAS-NEXT: xor a5, a3, a1
+; RV32IA-ZACAS-NEXT: and a5, a5, a4
+; RV32IA-ZACAS-NEXT: xor a5, a3, a5
+; RV32IA-ZACAS-NEXT: .LBB124_3: # in Loop: Header=BB124_1 Depth=1
+; RV32IA-ZACAS-NEXT: sc.w.rl a5, a5, (a2)
+; RV32IA-ZACAS-NEXT: bnez a5, .LBB124_1
+; RV32IA-ZACAS-NEXT: # %bb.4:
+; RV32IA-ZACAS-NEXT: srl a0, a3, a0
+; RV32IA-ZACAS-NEXT: ret
+;
; RV64IA-ZACAS-LABEL: atomicrmw_umax_i16_seq_cst:
; RV64IA-ZACAS: # %bb.0:
; RV64IA-ZACAS-NEXT: andi a2, a0, -4
@@ -18938,6 +24768,16 @@ define i16 @atomicrmw_umax_i16_seq_cst(ptr %a, i16 %b) nounwind {
; RV64IA-ZACAS-NEXT: srlw a0, a3, a0
; RV64IA-ZACAS-NEXT: ret
;
+; RV32IA-WMO-ZABHA-LABEL: atomicrmw_umax_i16_seq_cst:
+; RV32IA-WMO-ZABHA: # %bb.0:
+; RV32IA-WMO-ZABHA-NEXT: amomaxu.h.aqrl a0, a1, (a0)
+; RV32IA-WMO-ZABHA-NEXT: ret
+;
+; RV32IA-TSO-ZABHA-LABEL: atomicrmw_umax_i16_seq_cst:
+; RV32IA-TSO-ZABHA: # %bb.0:
+; RV32IA-TSO-ZABHA-NEXT: amomaxu.h a0, a1, (a0)
+; RV32IA-TSO-ZABHA-NEXT: ret
+;
; RV64IA-WMO-ZABHA-LABEL: atomicrmw_umax_i16_seq_cst:
; RV64IA-WMO-ZABHA: # %bb.0:
; RV64IA-WMO-ZABHA-NEXT: amomaxu.h.aqrl a0, a1, (a0)
@@ -18996,30 +24836,30 @@ define i16 @atomicrmw_umin_i16_monotonic(ptr %a, i16 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 32
; RV32I-NEXT: ret
;
-; RV32IA-LABEL: atomicrmw_umin_i16_monotonic:
-; RV32IA: # %bb.0:
-; RV32IA-NEXT: andi a2, a0, -4
-; RV32IA-NEXT: slli a0, a0, 3
-; RV32IA-NEXT: lui a3, 16
-; RV32IA-NEXT: addi a3, a3, -1
-; RV32IA-NEXT: sll a4, a3, a0
-; RV32IA-NEXT: and a1, a1, a3
-; RV32IA-NEXT: sll a1, a1, a0
-; RV32IA-NEXT: .LBB125_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-NEXT: lr.w a3, (a2)
-; RV32IA-NEXT: and a6, a3, a4
-; RV32IA-NEXT: mv a5, a3
-; RV32IA-NEXT: bgeu a1, a6, .LBB125_3
-; RV32IA-NEXT: # %bb.2: # in Loop: Header=BB125_1 Depth=1
-; RV32IA-NEXT: xor a5, a3, a1
-; RV32IA-NEXT: and a5, a5, a4
-; RV32IA-NEXT: xor a5, a3, a5
-; RV32IA-NEXT: .LBB125_3: # in Loop: Header=BB125_1 Depth=1
-; RV32IA-NEXT: sc.w a5, a5, (a2)
-; RV32IA-NEXT: bnez a5, .LBB125_1
-; RV32IA-NEXT: # %bb.4:
-; RV32IA-NEXT: srl a0, a3, a0
-; RV32IA-NEXT: ret
+; RV32IA-NOZACAS-LABEL: atomicrmw_umin_i16_monotonic:
+; RV32IA-NOZACAS: # %bb.0:
+; RV32IA-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-NOZACAS-NEXT: lui a3, 16
+; RV32IA-NOZACAS-NEXT: addi a3, a3, -1
+; RV32IA-NOZACAS-NEXT: sll a4, a3, a0
+; RV32IA-NOZACAS-NEXT: and a1, a1, a3
+; RV32IA-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-NOZACAS-NEXT: .LBB125_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-NOZACAS-NEXT: lr.w a3, (a2)
+; RV32IA-NOZACAS-NEXT: and a6, a3, a4
+; RV32IA-NOZACAS-NEXT: mv a5, a3
+; RV32IA-NOZACAS-NEXT: bgeu a1, a6, .LBB125_3
+; RV32IA-NOZACAS-NEXT: # %bb.2: # in Loop: Header=BB125_1 Depth=1
+; RV32IA-NOZACAS-NEXT: xor a5, a3, a1
+; RV32IA-NOZACAS-NEXT: and a5, a5, a4
+; RV32IA-NOZACAS-NEXT: xor a5, a3, a5
+; RV32IA-NOZACAS-NEXT: .LBB125_3: # in Loop: Header=BB125_1 Depth=1
+; RV32IA-NOZACAS-NEXT: sc.w a5, a5, (a2)
+; RV32IA-NOZACAS-NEXT: bnez a5, .LBB125_1
+; RV32IA-NOZACAS-NEXT: # %bb.4:
+; RV32IA-NOZACAS-NEXT: srl a0, a3, a0
+; RV32IA-NOZACAS-NEXT: ret
;
; RV64I-LABEL: atomicrmw_umin_i16_monotonic:
; RV64I: # %bb.0:
@@ -19090,6 +24930,31 @@ define i16 @atomicrmw_umin_i16_monotonic(ptr %a, i16 %b) nounwind {
; RV64IA-NOZACAS-NEXT: srlw a0, a3, a0
; RV64IA-NOZACAS-NEXT: ret
;
+; RV32IA-ZACAS-LABEL: atomicrmw_umin_i16_monotonic:
+; RV32IA-ZACAS: # %bb.0:
+; RV32IA-ZACAS-NEXT: andi a2, a0, -4
+; RV32IA-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-ZACAS-NEXT: lui a3, 16
+; RV32IA-ZACAS-NEXT: addi a3, a3, -1
+; RV32IA-ZACAS-NEXT: sll a4, a3, a0
+; RV32IA-ZACAS-NEXT: and a1, a1, a3
+; RV32IA-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-ZACAS-NEXT: .LBB125_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-ZACAS-NEXT: lr.w a3, (a2)
+; RV32IA-ZACAS-NEXT: and a6, a3, a4
+; RV32IA-ZACAS-NEXT: mv a5, a3
+; RV32IA-ZACAS-NEXT: bgeu a1, a6, .LBB125_3
+; RV32IA-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB125_1 Depth=1
+; RV32IA-ZACAS-NEXT: xor a5, a3, a1
+; RV32IA-ZACAS-NEXT: and a5, a5, a4
+; RV32IA-ZACAS-NEXT: xor a5, a3, a5
+; RV32IA-ZACAS-NEXT: .LBB125_3: # in Loop: Header=BB125_1 Depth=1
+; RV32IA-ZACAS-NEXT: sc.w a5, a5, (a2)
+; RV32IA-ZACAS-NEXT: bnez a5, .LBB125_1
+; RV32IA-ZACAS-NEXT: # %bb.4:
+; RV32IA-ZACAS-NEXT: srl a0, a3, a0
+; RV32IA-ZACAS-NEXT: ret
+;
; RV64IA-ZACAS-LABEL: atomicrmw_umin_i16_monotonic:
; RV64IA-ZACAS: # %bb.0:
; RV64IA-ZACAS-NEXT: andi a2, a0, -4
@@ -19115,6 +24980,16 @@ define i16 @atomicrmw_umin_i16_monotonic(ptr %a, i16 %b) nounwind {
; RV64IA-ZACAS-NEXT: srlw a0, a3, a0
; RV64IA-ZACAS-NEXT: ret
;
+; RV32IA-WMO-ZABHA-LABEL: atomicrmw_umin_i16_monotonic:
+; RV32IA-WMO-ZABHA: # %bb.0:
+; RV32IA-WMO-ZABHA-NEXT: amominu.h a0, a1, (a0)
+; RV32IA-WMO-ZABHA-NEXT: ret
+;
+; RV32IA-TSO-ZABHA-LABEL: atomicrmw_umin_i16_monotonic:
+; RV32IA-TSO-ZABHA: # %bb.0:
+; RV32IA-TSO-ZABHA-NEXT: amominu.h a0, a1, (a0)
+; RV32IA-TSO-ZABHA-NEXT: ret
+;
; RV64IA-WMO-ZABHA-LABEL: atomicrmw_umin_i16_monotonic:
; RV64IA-WMO-ZABHA: # %bb.0:
; RV64IA-WMO-ZABHA-NEXT: amominu.h a0, a1, (a0)
@@ -19173,55 +25048,55 @@ define i16 @atomicrmw_umin_i16_acquire(ptr %a, i16 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 32
; RV32I-NEXT: ret
;
-; RV32IA-WMO-LABEL: atomicrmw_umin_i16_acquire:
-; RV32IA-WMO: # %bb.0:
-; RV32IA-WMO-NEXT: andi a2, a0, -4
-; RV32IA-WMO-NEXT: slli a0, a0, 3
-; RV32IA-WMO-NEXT: lui a3, 16
-; RV32IA-WMO-NEXT: addi a3, a3, -1
-; RV32IA-WMO-NEXT: sll a4, a3, a0
-; RV32IA-WMO-NEXT: and a1, a1, a3
-; RV32IA-WMO-NEXT: sll a1, a1, a0
-; RV32IA-WMO-NEXT: .LBB126_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-WMO-NEXT: lr.w.aq a3, (a2)
-; RV32IA-WMO-NEXT: and a6, a3, a4
-; RV32IA-WMO-NEXT: mv a5, a3
-; RV32IA-WMO-NEXT: bgeu a1, a6, .LBB126_3
-; RV32IA-WMO-NEXT: # %bb.2: # in Loop: Header=BB126_1 Depth=1
-; RV32IA-WMO-NEXT: xor a5, a3, a1
-; RV32IA-WMO-NEXT: and a5, a5, a4
-; RV32IA-WMO-NEXT: xor a5, a3, a5
-; RV32IA-WMO-NEXT: .LBB126_3: # in Loop: Header=BB126_1 Depth=1
-; RV32IA-WMO-NEXT: sc.w a5, a5, (a2)
-; RV32IA-WMO-NEXT: bnez a5, .LBB126_1
-; RV32IA-WMO-NEXT: # %bb.4:
-; RV32IA-WMO-NEXT: srl a0, a3, a0
-; RV32IA-WMO-NEXT: ret
+; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_umin_i16_acquire:
+; RV32IA-WMO-NOZACAS: # %bb.0:
+; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-WMO-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-NOZACAS-NEXT: lui a3, 16
+; RV32IA-WMO-NOZACAS-NEXT: addi a3, a3, -1
+; RV32IA-WMO-NOZACAS-NEXT: sll a4, a3, a0
+; RV32IA-WMO-NOZACAS-NEXT: and a1, a1, a3
+; RV32IA-WMO-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-WMO-NOZACAS-NEXT: .LBB126_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-NOZACAS-NEXT: lr.w.aq a3, (a2)
+; RV32IA-WMO-NOZACAS-NEXT: and a6, a3, a4
+; RV32IA-WMO-NOZACAS-NEXT: mv a5, a3
+; RV32IA-WMO-NOZACAS-NEXT: bgeu a1, a6, .LBB126_3
+; RV32IA-WMO-NOZACAS-NEXT: # %bb.2: # in Loop: Header=BB126_1 Depth=1
+; RV32IA-WMO-NOZACAS-NEXT: xor a5, a3, a1
+; RV32IA-WMO-NOZACAS-NEXT: and a5, a5, a4
+; RV32IA-WMO-NOZACAS-NEXT: xor a5, a3, a5
+; RV32IA-WMO-NOZACAS-NEXT: .LBB126_3: # in Loop: Header=BB126_1 Depth=1
+; RV32IA-WMO-NOZACAS-NEXT: sc.w a5, a5, (a2)
+; RV32IA-WMO-NOZACAS-NEXT: bnez a5, .LBB126_1
+; RV32IA-WMO-NOZACAS-NEXT: # %bb.4:
+; RV32IA-WMO-NOZACAS-NEXT: srl a0, a3, a0
+; RV32IA-WMO-NOZACAS-NEXT: ret
;
-; RV32IA-TSO-LABEL: atomicrmw_umin_i16_acquire:
-; RV32IA-TSO: # %bb.0:
-; RV32IA-TSO-NEXT: andi a2, a0, -4
-; RV32IA-TSO-NEXT: slli a0, a0, 3
-; RV32IA-TSO-NEXT: lui a3, 16
-; RV32IA-TSO-NEXT: addi a3, a3, -1
-; RV32IA-TSO-NEXT: sll a4, a3, a0
-; RV32IA-TSO-NEXT: and a1, a1, a3
-; RV32IA-TSO-NEXT: sll a1, a1, a0
-; RV32IA-TSO-NEXT: .LBB126_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-TSO-NEXT: lr.w a3, (a2)
-; RV32IA-TSO-NEXT: and a6, a3, a4
-; RV32IA-TSO-NEXT: mv a5, a3
-; RV32IA-TSO-NEXT: bgeu a1, a6, .LBB126_3
-; RV32IA-TSO-NEXT: # %bb.2: # in Loop: Header=BB126_1 Depth=1
-; RV32IA-TSO-NEXT: xor a5, a3, a1
-; RV32IA-TSO-NEXT: and a5, a5, a4
-; RV32IA-TSO-NEXT: xor a5, a3, a5
-; RV32IA-TSO-NEXT: .LBB126_3: # in Loop: Header=BB126_1 Depth=1
-; RV32IA-TSO-NEXT: sc.w a5, a5, (a2)
-; RV32IA-TSO-NEXT: bnez a5, .LBB126_1
-; RV32IA-TSO-NEXT: # %bb.4:
-; RV32IA-TSO-NEXT: srl a0, a3, a0
-; RV32IA-TSO-NEXT: ret
+; RV32IA-TSO-NOZACAS-LABEL: atomicrmw_umin_i16_acquire:
+; RV32IA-TSO-NOZACAS: # %bb.0:
+; RV32IA-TSO-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-TSO-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-NOZACAS-NEXT: lui a3, 16
+; RV32IA-TSO-NOZACAS-NEXT: addi a3, a3, -1
+; RV32IA-TSO-NOZACAS-NEXT: sll a4, a3, a0
+; RV32IA-TSO-NOZACAS-NEXT: and a1, a1, a3
+; RV32IA-TSO-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-TSO-NOZACAS-NEXT: .LBB126_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-NOZACAS-NEXT: lr.w a3, (a2)
+; RV32IA-TSO-NOZACAS-NEXT: and a6, a3, a4
+; RV32IA-TSO-NOZACAS-NEXT: mv a5, a3
+; RV32IA-TSO-NOZACAS-NEXT: bgeu a1, a6, .LBB126_3
+; RV32IA-TSO-NOZACAS-NEXT: # %bb.2: # in Loop: Header=BB126_1 Depth=1
+; RV32IA-TSO-NOZACAS-NEXT: xor a5, a3, a1
+; RV32IA-TSO-NOZACAS-NEXT: and a5, a5, a4
+; RV32IA-TSO-NOZACAS-NEXT: xor a5, a3, a5
+; RV32IA-TSO-NOZACAS-NEXT: .LBB126_3: # in Loop: Header=BB126_1 Depth=1
+; RV32IA-TSO-NOZACAS-NEXT: sc.w a5, a5, (a2)
+; RV32IA-TSO-NOZACAS-NEXT: bnez a5, .LBB126_1
+; RV32IA-TSO-NOZACAS-NEXT: # %bb.4:
+; RV32IA-TSO-NOZACAS-NEXT: srl a0, a3, a0
+; RV32IA-TSO-NOZACAS-NEXT: ret
;
; RV64I-LABEL: atomicrmw_umin_i16_acquire:
; RV64I: # %bb.0:
@@ -19317,6 +25192,56 @@ define i16 @atomicrmw_umin_i16_acquire(ptr %a, i16 %b) nounwind {
; RV64IA-TSO-NOZACAS-NEXT: srlw a0, a3, a0
; RV64IA-TSO-NOZACAS-NEXT: ret
;
+; RV32IA-WMO-ZACAS-LABEL: atomicrmw_umin_i16_acquire:
+; RV32IA-WMO-ZACAS: # %bb.0:
+; RV32IA-WMO-ZACAS-NEXT: andi a2, a0, -4
+; RV32IA-WMO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-ZACAS-NEXT: lui a3, 16
+; RV32IA-WMO-ZACAS-NEXT: addi a3, a3, -1
+; RV32IA-WMO-ZACAS-NEXT: sll a4, a3, a0
+; RV32IA-WMO-ZACAS-NEXT: and a1, a1, a3
+; RV32IA-WMO-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-WMO-ZACAS-NEXT: .LBB126_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-ZACAS-NEXT: lr.w.aq a3, (a2)
+; RV32IA-WMO-ZACAS-NEXT: and a6, a3, a4
+; RV32IA-WMO-ZACAS-NEXT: mv a5, a3
+; RV32IA-WMO-ZACAS-NEXT: bgeu a1, a6, .LBB126_3
+; RV32IA-WMO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB126_1 Depth=1
+; RV32IA-WMO-ZACAS-NEXT: xor a5, a3, a1
+; RV32IA-WMO-ZACAS-NEXT: and a5, a5, a4
+; RV32IA-WMO-ZACAS-NEXT: xor a5, a3, a5
+; RV32IA-WMO-ZACAS-NEXT: .LBB126_3: # in Loop: Header=BB126_1 Depth=1
+; RV32IA-WMO-ZACAS-NEXT: sc.w a5, a5, (a2)
+; RV32IA-WMO-ZACAS-NEXT: bnez a5, .LBB126_1
+; RV32IA-WMO-ZACAS-NEXT: # %bb.4:
+; RV32IA-WMO-ZACAS-NEXT: srl a0, a3, a0
+; RV32IA-WMO-ZACAS-NEXT: ret
+;
+; RV32IA-TSO-ZACAS-LABEL: atomicrmw_umin_i16_acquire:
+; RV32IA-TSO-ZACAS: # %bb.0:
+; RV32IA-TSO-ZACAS-NEXT: andi a2, a0, -4
+; RV32IA-TSO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-ZACAS-NEXT: lui a3, 16
+; RV32IA-TSO-ZACAS-NEXT: addi a3, a3, -1
+; RV32IA-TSO-ZACAS-NEXT: sll a4, a3, a0
+; RV32IA-TSO-ZACAS-NEXT: and a1, a1, a3
+; RV32IA-TSO-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-TSO-ZACAS-NEXT: .LBB126_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-ZACAS-NEXT: lr.w a3, (a2)
+; RV32IA-TSO-ZACAS-NEXT: and a6, a3, a4
+; RV32IA-TSO-ZACAS-NEXT: mv a5, a3
+; RV32IA-TSO-ZACAS-NEXT: bgeu a1, a6, .LBB126_3
+; RV32IA-TSO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB126_1 Depth=1
+; RV32IA-TSO-ZACAS-NEXT: xor a5, a3, a1
+; RV32IA-TSO-ZACAS-NEXT: and a5, a5, a4
+; RV32IA-TSO-ZACAS-NEXT: xor a5, a3, a5
+; RV32IA-TSO-ZACAS-NEXT: .LBB126_3: # in Loop: Header=BB126_1 Depth=1
+; RV32IA-TSO-ZACAS-NEXT: sc.w a5, a5, (a2)
+; RV32IA-TSO-ZACAS-NEXT: bnez a5, .LBB126_1
+; RV32IA-TSO-ZACAS-NEXT: # %bb.4:
+; RV32IA-TSO-ZACAS-NEXT: srl a0, a3, a0
+; RV32IA-TSO-ZACAS-NEXT: ret
+;
; RV64IA-WMO-ZACAS-LABEL: atomicrmw_umin_i16_acquire:
; RV64IA-WMO-ZACAS: # %bb.0:
; RV64IA-WMO-ZACAS-NEXT: andi a2, a0, -4
@@ -19367,6 +25292,16 @@ define i16 @atomicrmw_umin_i16_acquire(ptr %a, i16 %b) nounwind {
; RV64IA-TSO-ZACAS-NEXT: srlw a0, a3, a0
; RV64IA-TSO-ZACAS-NEXT: ret
;
+; RV32IA-WMO-ZABHA-LABEL: atomicrmw_umin_i16_acquire:
+; RV32IA-WMO-ZABHA: # %bb.0:
+; RV32IA-WMO-ZABHA-NEXT: amominu.h.aq a0, a1, (a0)
+; RV32IA-WMO-ZABHA-NEXT: ret
+;
+; RV32IA-TSO-ZABHA-LABEL: atomicrmw_umin_i16_acquire:
+; RV32IA-TSO-ZABHA: # %bb.0:
+; RV32IA-TSO-ZABHA-NEXT: amominu.h a0, a1, (a0)
+; RV32IA-TSO-ZABHA-NEXT: ret
+;
; RV64IA-WMO-ZABHA-LABEL: atomicrmw_umin_i16_acquire:
; RV64IA-WMO-ZABHA: # %bb.0:
; RV64IA-WMO-ZABHA-NEXT: amominu.h.aq a0, a1, (a0)
@@ -19425,55 +25360,55 @@ define i16 @atomicrmw_umin_i16_release(ptr %a, i16 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 32
; RV32I-NEXT: ret
;
-; RV32IA-WMO-LABEL: atomicrmw_umin_i16_release:
-; RV32IA-WMO: # %bb.0:
-; RV32IA-WMO-NEXT: andi a2, a0, -4
-; RV32IA-WMO-NEXT: slli a0, a0, 3
-; RV32IA-WMO-NEXT: lui a3, 16
-; RV32IA-WMO-NEXT: addi a3, a3, -1
-; RV32IA-WMO-NEXT: sll a4, a3, a0
-; RV32IA-WMO-NEXT: and a1, a1, a3
-; RV32IA-WMO-NEXT: sll a1, a1, a0
-; RV32IA-WMO-NEXT: .LBB127_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-WMO-NEXT: lr.w a3, (a2)
-; RV32IA-WMO-NEXT: and a6, a3, a4
-; RV32IA-WMO-NEXT: mv a5, a3
-; RV32IA-WMO-NEXT: bgeu a1, a6, .LBB127_3
-; RV32IA-WMO-NEXT: # %bb.2: # in Loop: Header=BB127_1 Depth=1
-; RV32IA-WMO-NEXT: xor a5, a3, a1
-; RV32IA-WMO-NEXT: and a5, a5, a4
-; RV32IA-WMO-NEXT: xor a5, a3, a5
-; RV32IA-WMO-NEXT: .LBB127_3: # in Loop: Header=BB127_1 Depth=1
-; RV32IA-WMO-NEXT: sc.w.rl a5, a5, (a2)
-; RV32IA-WMO-NEXT: bnez a5, .LBB127_1
-; RV32IA-WMO-NEXT: # %bb.4:
-; RV32IA-WMO-NEXT: srl a0, a3, a0
-; RV32IA-WMO-NEXT: ret
+; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_umin_i16_release:
+; RV32IA-WMO-NOZACAS: # %bb.0:
+; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-WMO-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-NOZACAS-NEXT: lui a3, 16
+; RV32IA-WMO-NOZACAS-NEXT: addi a3, a3, -1
+; RV32IA-WMO-NOZACAS-NEXT: sll a4, a3, a0
+; RV32IA-WMO-NOZACAS-NEXT: and a1, a1, a3
+; RV32IA-WMO-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-WMO-NOZACAS-NEXT: .LBB127_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-NOZACAS-NEXT: lr.w a3, (a2)
+; RV32IA-WMO-NOZACAS-NEXT: and a6, a3, a4
+; RV32IA-WMO-NOZACAS-NEXT: mv a5, a3
+; RV32IA-WMO-NOZACAS-NEXT: bgeu a1, a6, .LBB127_3
+; RV32IA-WMO-NOZACAS-NEXT: # %bb.2: # in Loop: Header=BB127_1 Depth=1
+; RV32IA-WMO-NOZACAS-NEXT: xor a5, a3, a1
+; RV32IA-WMO-NOZACAS-NEXT: and a5, a5, a4
+; RV32IA-WMO-NOZACAS-NEXT: xor a5, a3, a5
+; RV32IA-WMO-NOZACAS-NEXT: .LBB127_3: # in Loop: Header=BB127_1 Depth=1
+; RV32IA-WMO-NOZACAS-NEXT: sc.w.rl a5, a5, (a2)
+; RV32IA-WMO-NOZACAS-NEXT: bnez a5, .LBB127_1
+; RV32IA-WMO-NOZACAS-NEXT: # %bb.4:
+; RV32IA-WMO-NOZACAS-NEXT: srl a0, a3, a0
+; RV32IA-WMO-NOZACAS-NEXT: ret
;
-; RV32IA-TSO-LABEL: atomicrmw_umin_i16_release:
-; RV32IA-TSO: # %bb.0:
-; RV32IA-TSO-NEXT: andi a2, a0, -4
-; RV32IA-TSO-NEXT: slli a0, a0, 3
-; RV32IA-TSO-NEXT: lui a3, 16
-; RV32IA-TSO-NEXT: addi a3, a3, -1
-; RV32IA-TSO-NEXT: sll a4, a3, a0
-; RV32IA-TSO-NEXT: and a1, a1, a3
-; RV32IA-TSO-NEXT: sll a1, a1, a0
-; RV32IA-TSO-NEXT: .LBB127_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-TSO-NEXT: lr.w a3, (a2)
-; RV32IA-TSO-NEXT: and a6, a3, a4
-; RV32IA-TSO-NEXT: mv a5, a3
-; RV32IA-TSO-NEXT: bgeu a1, a6, .LBB127_3
-; RV32IA-TSO-NEXT: # %bb.2: # in Loop: Header=BB127_1 Depth=1
-; RV32IA-TSO-NEXT: xor a5, a3, a1
-; RV32IA-TSO-NEXT: and a5, a5, a4
-; RV32IA-TSO-NEXT: xor a5, a3, a5
-; RV32IA-TSO-NEXT: .LBB127_3: # in Loop: Header=BB127_1 Depth=1
-; RV32IA-TSO-NEXT: sc.w a5, a5, (a2)
-; RV32IA-TSO-NEXT: bnez a5, .LBB127_1
-; RV32IA-TSO-NEXT: # %bb.4:
-; RV32IA-TSO-NEXT: srl a0, a3, a0
-; RV32IA-TSO-NEXT: ret
+; RV32IA-TSO-NOZACAS-LABEL: atomicrmw_umin_i16_release:
+; RV32IA-TSO-NOZACAS: # %bb.0:
+; RV32IA-TSO-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-TSO-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-NOZACAS-NEXT: lui a3, 16
+; RV32IA-TSO-NOZACAS-NEXT: addi a3, a3, -1
+; RV32IA-TSO-NOZACAS-NEXT: sll a4, a3, a0
+; RV32IA-TSO-NOZACAS-NEXT: and a1, a1, a3
+; RV32IA-TSO-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-TSO-NOZACAS-NEXT: .LBB127_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-NOZACAS-NEXT: lr.w a3, (a2)
+; RV32IA-TSO-NOZACAS-NEXT: and a6, a3, a4
+; RV32IA-TSO-NOZACAS-NEXT: mv a5, a3
+; RV32IA-TSO-NOZACAS-NEXT: bgeu a1, a6, .LBB127_3
+; RV32IA-TSO-NOZACAS-NEXT: # %bb.2: # in Loop: Header=BB127_1 Depth=1
+; RV32IA-TSO-NOZACAS-NEXT: xor a5, a3, a1
+; RV32IA-TSO-NOZACAS-NEXT: and a5, a5, a4
+; RV32IA-TSO-NOZACAS-NEXT: xor a5, a3, a5
+; RV32IA-TSO-NOZACAS-NEXT: .LBB127_3: # in Loop: Header=BB127_1 Depth=1
+; RV32IA-TSO-NOZACAS-NEXT: sc.w a5, a5, (a2)
+; RV32IA-TSO-NOZACAS-NEXT: bnez a5, .LBB127_1
+; RV32IA-TSO-NOZACAS-NEXT: # %bb.4:
+; RV32IA-TSO-NOZACAS-NEXT: srl a0, a3, a0
+; RV32IA-TSO-NOZACAS-NEXT: ret
;
; RV64I-LABEL: atomicrmw_umin_i16_release:
; RV64I: # %bb.0:
@@ -19569,6 +25504,56 @@ define i16 @atomicrmw_umin_i16_release(ptr %a, i16 %b) nounwind {
; RV64IA-TSO-NOZACAS-NEXT: srlw a0, a3, a0
; RV64IA-TSO-NOZACAS-NEXT: ret
;
+; RV32IA-WMO-ZACAS-LABEL: atomicrmw_umin_i16_release:
+; RV32IA-WMO-ZACAS: # %bb.0:
+; RV32IA-WMO-ZACAS-NEXT: andi a2, a0, -4
+; RV32IA-WMO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-ZACAS-NEXT: lui a3, 16
+; RV32IA-WMO-ZACAS-NEXT: addi a3, a3, -1
+; RV32IA-WMO-ZACAS-NEXT: sll a4, a3, a0
+; RV32IA-WMO-ZACAS-NEXT: and a1, a1, a3
+; RV32IA-WMO-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-WMO-ZACAS-NEXT: .LBB127_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-ZACAS-NEXT: lr.w a3, (a2)
+; RV32IA-WMO-ZACAS-NEXT: and a6, a3, a4
+; RV32IA-WMO-ZACAS-NEXT: mv a5, a3
+; RV32IA-WMO-ZACAS-NEXT: bgeu a1, a6, .LBB127_3
+; RV32IA-WMO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB127_1 Depth=1
+; RV32IA-WMO-ZACAS-NEXT: xor a5, a3, a1
+; RV32IA-WMO-ZACAS-NEXT: and a5, a5, a4
+; RV32IA-WMO-ZACAS-NEXT: xor a5, a3, a5
+; RV32IA-WMO-ZACAS-NEXT: .LBB127_3: # in Loop: Header=BB127_1 Depth=1
+; RV32IA-WMO-ZACAS-NEXT: sc.w.rl a5, a5, (a2)
+; RV32IA-WMO-ZACAS-NEXT: bnez a5, .LBB127_1
+; RV32IA-WMO-ZACAS-NEXT: # %bb.4:
+; RV32IA-WMO-ZACAS-NEXT: srl a0, a3, a0
+; RV32IA-WMO-ZACAS-NEXT: ret
+;
+; RV32IA-TSO-ZACAS-LABEL: atomicrmw_umin_i16_release:
+; RV32IA-TSO-ZACAS: # %bb.0:
+; RV32IA-TSO-ZACAS-NEXT: andi a2, a0, -4
+; RV32IA-TSO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-ZACAS-NEXT: lui a3, 16
+; RV32IA-TSO-ZACAS-NEXT: addi a3, a3, -1
+; RV32IA-TSO-ZACAS-NEXT: sll a4, a3, a0
+; RV32IA-TSO-ZACAS-NEXT: and a1, a1, a3
+; RV32IA-TSO-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-TSO-ZACAS-NEXT: .LBB127_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-ZACAS-NEXT: lr.w a3, (a2)
+; RV32IA-TSO-ZACAS-NEXT: and a6, a3, a4
+; RV32IA-TSO-ZACAS-NEXT: mv a5, a3
+; RV32IA-TSO-ZACAS-NEXT: bgeu a1, a6, .LBB127_3
+; RV32IA-TSO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB127_1 Depth=1
+; RV32IA-TSO-ZACAS-NEXT: xor a5, a3, a1
+; RV32IA-TSO-ZACAS-NEXT: and a5, a5, a4
+; RV32IA-TSO-ZACAS-NEXT: xor a5, a3, a5
+; RV32IA-TSO-ZACAS-NEXT: .LBB127_3: # in Loop: Header=BB127_1 Depth=1
+; RV32IA-TSO-ZACAS-NEXT: sc.w a5, a5, (a2)
+; RV32IA-TSO-ZACAS-NEXT: bnez a5, .LBB127_1
+; RV32IA-TSO-ZACAS-NEXT: # %bb.4:
+; RV32IA-TSO-ZACAS-NEXT: srl a0, a3, a0
+; RV32IA-TSO-ZACAS-NEXT: ret
+;
; RV64IA-WMO-ZACAS-LABEL: atomicrmw_umin_i16_release:
; RV64IA-WMO-ZACAS: # %bb.0:
; RV64IA-WMO-ZACAS-NEXT: andi a2, a0, -4
@@ -19619,6 +25604,16 @@ define i16 @atomicrmw_umin_i16_release(ptr %a, i16 %b) nounwind {
; RV64IA-TSO-ZACAS-NEXT: srlw a0, a3, a0
; RV64IA-TSO-ZACAS-NEXT: ret
;
+; RV32IA-WMO-ZABHA-LABEL: atomicrmw_umin_i16_release:
+; RV32IA-WMO-ZABHA: # %bb.0:
+; RV32IA-WMO-ZABHA-NEXT: amominu.h.rl a0, a1, (a0)
+; RV32IA-WMO-ZABHA-NEXT: ret
+;
+; RV32IA-TSO-ZABHA-LABEL: atomicrmw_umin_i16_release:
+; RV32IA-TSO-ZABHA: # %bb.0:
+; RV32IA-TSO-ZABHA-NEXT: amominu.h a0, a1, (a0)
+; RV32IA-TSO-ZABHA-NEXT: ret
+;
; RV64IA-WMO-ZABHA-LABEL: atomicrmw_umin_i16_release:
; RV64IA-WMO-ZABHA: # %bb.0:
; RV64IA-WMO-ZABHA-NEXT: amominu.h.rl a0, a1, (a0)
@@ -19677,55 +25672,55 @@ define i16 @atomicrmw_umin_i16_acq_rel(ptr %a, i16 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 32
; RV32I-NEXT: ret
;
-; RV32IA-WMO-LABEL: atomicrmw_umin_i16_acq_rel:
-; RV32IA-WMO: # %bb.0:
-; RV32IA-WMO-NEXT: andi a2, a0, -4
-; RV32IA-WMO-NEXT: slli a0, a0, 3
-; RV32IA-WMO-NEXT: lui a3, 16
-; RV32IA-WMO-NEXT: addi a3, a3, -1
-; RV32IA-WMO-NEXT: sll a4, a3, a0
-; RV32IA-WMO-NEXT: and a1, a1, a3
-; RV32IA-WMO-NEXT: sll a1, a1, a0
-; RV32IA-WMO-NEXT: .LBB128_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-WMO-NEXT: lr.w.aq a3, (a2)
-; RV32IA-WMO-NEXT: and a6, a3, a4
-; RV32IA-WMO-NEXT: mv a5, a3
-; RV32IA-WMO-NEXT: bgeu a1, a6, .LBB128_3
-; RV32IA-WMO-NEXT: # %bb.2: # in Loop: Header=BB128_1 Depth=1
-; RV32IA-WMO-NEXT: xor a5, a3, a1
-; RV32IA-WMO-NEXT: and a5, a5, a4
-; RV32IA-WMO-NEXT: xor a5, a3, a5
-; RV32IA-WMO-NEXT: .LBB128_3: # in Loop: Header=BB128_1 Depth=1
-; RV32IA-WMO-NEXT: sc.w.rl a5, a5, (a2)
-; RV32IA-WMO-NEXT: bnez a5, .LBB128_1
-; RV32IA-WMO-NEXT: # %bb.4:
-; RV32IA-WMO-NEXT: srl a0, a3, a0
-; RV32IA-WMO-NEXT: ret
+; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_umin_i16_acq_rel:
+; RV32IA-WMO-NOZACAS: # %bb.0:
+; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-WMO-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-NOZACAS-NEXT: lui a3, 16
+; RV32IA-WMO-NOZACAS-NEXT: addi a3, a3, -1
+; RV32IA-WMO-NOZACAS-NEXT: sll a4, a3, a0
+; RV32IA-WMO-NOZACAS-NEXT: and a1, a1, a3
+; RV32IA-WMO-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-WMO-NOZACAS-NEXT: .LBB128_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-NOZACAS-NEXT: lr.w.aq a3, (a2)
+; RV32IA-WMO-NOZACAS-NEXT: and a6, a3, a4
+; RV32IA-WMO-NOZACAS-NEXT: mv a5, a3
+; RV32IA-WMO-NOZACAS-NEXT: bgeu a1, a6, .LBB128_3
+; RV32IA-WMO-NOZACAS-NEXT: # %bb.2: # in Loop: Header=BB128_1 Depth=1
+; RV32IA-WMO-NOZACAS-NEXT: xor a5, a3, a1
+; RV32IA-WMO-NOZACAS-NEXT: and a5, a5, a4
+; RV32IA-WMO-NOZACAS-NEXT: xor a5, a3, a5
+; RV32IA-WMO-NOZACAS-NEXT: .LBB128_3: # in Loop: Header=BB128_1 Depth=1
+; RV32IA-WMO-NOZACAS-NEXT: sc.w.rl a5, a5, (a2)
+; RV32IA-WMO-NOZACAS-NEXT: bnez a5, .LBB128_1
+; RV32IA-WMO-NOZACAS-NEXT: # %bb.4:
+; RV32IA-WMO-NOZACAS-NEXT: srl a0, a3, a0
+; RV32IA-WMO-NOZACAS-NEXT: ret
;
-; RV32IA-TSO-LABEL: atomicrmw_umin_i16_acq_rel:
-; RV32IA-TSO: # %bb.0:
-; RV32IA-TSO-NEXT: andi a2, a0, -4
-; RV32IA-TSO-NEXT: slli a0, a0, 3
-; RV32IA-TSO-NEXT: lui a3, 16
-; RV32IA-TSO-NEXT: addi a3, a3, -1
-; RV32IA-TSO-NEXT: sll a4, a3, a0
-; RV32IA-TSO-NEXT: and a1, a1, a3
-; RV32IA-TSO-NEXT: sll a1, a1, a0
-; RV32IA-TSO-NEXT: .LBB128_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-TSO-NEXT: lr.w a3, (a2)
-; RV32IA-TSO-NEXT: and a6, a3, a4
-; RV32IA-TSO-NEXT: mv a5, a3
-; RV32IA-TSO-NEXT: bgeu a1, a6, .LBB128_3
-; RV32IA-TSO-NEXT: # %bb.2: # in Loop: Header=BB128_1 Depth=1
-; RV32IA-TSO-NEXT: xor a5, a3, a1
-; RV32IA-TSO-NEXT: and a5, a5, a4
-; RV32IA-TSO-NEXT: xor a5, a3, a5
-; RV32IA-TSO-NEXT: .LBB128_3: # in Loop: Header=BB128_1 Depth=1
-; RV32IA-TSO-NEXT: sc.w a5, a5, (a2)
-; RV32IA-TSO-NEXT: bnez a5, .LBB128_1
-; RV32IA-TSO-NEXT: # %bb.4:
-; RV32IA-TSO-NEXT: srl a0, a3, a0
-; RV32IA-TSO-NEXT: ret
+; RV32IA-TSO-NOZACAS-LABEL: atomicrmw_umin_i16_acq_rel:
+; RV32IA-TSO-NOZACAS: # %bb.0:
+; RV32IA-TSO-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-TSO-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-NOZACAS-NEXT: lui a3, 16
+; RV32IA-TSO-NOZACAS-NEXT: addi a3, a3, -1
+; RV32IA-TSO-NOZACAS-NEXT: sll a4, a3, a0
+; RV32IA-TSO-NOZACAS-NEXT: and a1, a1, a3
+; RV32IA-TSO-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-TSO-NOZACAS-NEXT: .LBB128_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-NOZACAS-NEXT: lr.w a3, (a2)
+; RV32IA-TSO-NOZACAS-NEXT: and a6, a3, a4
+; RV32IA-TSO-NOZACAS-NEXT: mv a5, a3
+; RV32IA-TSO-NOZACAS-NEXT: bgeu a1, a6, .LBB128_3
+; RV32IA-TSO-NOZACAS-NEXT: # %bb.2: # in Loop: Header=BB128_1 Depth=1
+; RV32IA-TSO-NOZACAS-NEXT: xor a5, a3, a1
+; RV32IA-TSO-NOZACAS-NEXT: and a5, a5, a4
+; RV32IA-TSO-NOZACAS-NEXT: xor a5, a3, a5
+; RV32IA-TSO-NOZACAS-NEXT: .LBB128_3: # in Loop: Header=BB128_1 Depth=1
+; RV32IA-TSO-NOZACAS-NEXT: sc.w a5, a5, (a2)
+; RV32IA-TSO-NOZACAS-NEXT: bnez a5, .LBB128_1
+; RV32IA-TSO-NOZACAS-NEXT: # %bb.4:
+; RV32IA-TSO-NOZACAS-NEXT: srl a0, a3, a0
+; RV32IA-TSO-NOZACAS-NEXT: ret
;
; RV64I-LABEL: atomicrmw_umin_i16_acq_rel:
; RV64I: # %bb.0:
@@ -19821,6 +25816,56 @@ define i16 @atomicrmw_umin_i16_acq_rel(ptr %a, i16 %b) nounwind {
; RV64IA-TSO-NOZACAS-NEXT: srlw a0, a3, a0
; RV64IA-TSO-NOZACAS-NEXT: ret
;
+; RV32IA-WMO-ZACAS-LABEL: atomicrmw_umin_i16_acq_rel:
+; RV32IA-WMO-ZACAS: # %bb.0:
+; RV32IA-WMO-ZACAS-NEXT: andi a2, a0, -4
+; RV32IA-WMO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-ZACAS-NEXT: lui a3, 16
+; RV32IA-WMO-ZACAS-NEXT: addi a3, a3, -1
+; RV32IA-WMO-ZACAS-NEXT: sll a4, a3, a0
+; RV32IA-WMO-ZACAS-NEXT: and a1, a1, a3
+; RV32IA-WMO-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-WMO-ZACAS-NEXT: .LBB128_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-ZACAS-NEXT: lr.w.aq a3, (a2)
+; RV32IA-WMO-ZACAS-NEXT: and a6, a3, a4
+; RV32IA-WMO-ZACAS-NEXT: mv a5, a3
+; RV32IA-WMO-ZACAS-NEXT: bgeu a1, a6, .LBB128_3
+; RV32IA-WMO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB128_1 Depth=1
+; RV32IA-WMO-ZACAS-NEXT: xor a5, a3, a1
+; RV32IA-WMO-ZACAS-NEXT: and a5, a5, a4
+; RV32IA-WMO-ZACAS-NEXT: xor a5, a3, a5
+; RV32IA-WMO-ZACAS-NEXT: .LBB128_3: # in Loop: Header=BB128_1 Depth=1
+; RV32IA-WMO-ZACAS-NEXT: sc.w.rl a5, a5, (a2)
+; RV32IA-WMO-ZACAS-NEXT: bnez a5, .LBB128_1
+; RV32IA-WMO-ZACAS-NEXT: # %bb.4:
+; RV32IA-WMO-ZACAS-NEXT: srl a0, a3, a0
+; RV32IA-WMO-ZACAS-NEXT: ret
+;
+; RV32IA-TSO-ZACAS-LABEL: atomicrmw_umin_i16_acq_rel:
+; RV32IA-TSO-ZACAS: # %bb.0:
+; RV32IA-TSO-ZACAS-NEXT: andi a2, a0, -4
+; RV32IA-TSO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-ZACAS-NEXT: lui a3, 16
+; RV32IA-TSO-ZACAS-NEXT: addi a3, a3, -1
+; RV32IA-TSO-ZACAS-NEXT: sll a4, a3, a0
+; RV32IA-TSO-ZACAS-NEXT: and a1, a1, a3
+; RV32IA-TSO-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-TSO-ZACAS-NEXT: .LBB128_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-ZACAS-NEXT: lr.w a3, (a2)
+; RV32IA-TSO-ZACAS-NEXT: and a6, a3, a4
+; RV32IA-TSO-ZACAS-NEXT: mv a5, a3
+; RV32IA-TSO-ZACAS-NEXT: bgeu a1, a6, .LBB128_3
+; RV32IA-TSO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB128_1 Depth=1
+; RV32IA-TSO-ZACAS-NEXT: xor a5, a3, a1
+; RV32IA-TSO-ZACAS-NEXT: and a5, a5, a4
+; RV32IA-TSO-ZACAS-NEXT: xor a5, a3, a5
+; RV32IA-TSO-ZACAS-NEXT: .LBB128_3: # in Loop: Header=BB128_1 Depth=1
+; RV32IA-TSO-ZACAS-NEXT: sc.w a5, a5, (a2)
+; RV32IA-TSO-ZACAS-NEXT: bnez a5, .LBB128_1
+; RV32IA-TSO-ZACAS-NEXT: # %bb.4:
+; RV32IA-TSO-ZACAS-NEXT: srl a0, a3, a0
+; RV32IA-TSO-ZACAS-NEXT: ret
+;
; RV64IA-WMO-ZACAS-LABEL: atomicrmw_umin_i16_acq_rel:
; RV64IA-WMO-ZACAS: # %bb.0:
; RV64IA-WMO-ZACAS-NEXT: andi a2, a0, -4
@@ -19871,6 +25916,16 @@ define i16 @atomicrmw_umin_i16_acq_rel(ptr %a, i16 %b) nounwind {
; RV64IA-TSO-ZACAS-NEXT: srlw a0, a3, a0
; RV64IA-TSO-ZACAS-NEXT: ret
;
+; RV32IA-WMO-ZABHA-LABEL: atomicrmw_umin_i16_acq_rel:
+; RV32IA-WMO-ZABHA: # %bb.0:
+; RV32IA-WMO-ZABHA-NEXT: amominu.h.aqrl a0, a1, (a0)
+; RV32IA-WMO-ZABHA-NEXT: ret
+;
+; RV32IA-TSO-ZABHA-LABEL: atomicrmw_umin_i16_acq_rel:
+; RV32IA-TSO-ZABHA: # %bb.0:
+; RV32IA-TSO-ZABHA-NEXT: amominu.h a0, a1, (a0)
+; RV32IA-TSO-ZABHA-NEXT: ret
+;
; RV64IA-WMO-ZABHA-LABEL: atomicrmw_umin_i16_acq_rel:
; RV64IA-WMO-ZABHA: # %bb.0:
; RV64IA-WMO-ZABHA-NEXT: amominu.h.aqrl a0, a1, (a0)
@@ -19929,30 +25984,30 @@ define i16 @atomicrmw_umin_i16_seq_cst(ptr %a, i16 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 32
; RV32I-NEXT: ret
;
-; RV32IA-LABEL: atomicrmw_umin_i16_seq_cst:
-; RV32IA: # %bb.0:
-; RV32IA-NEXT: andi a2, a0, -4
-; RV32IA-NEXT: slli a0, a0, 3
-; RV32IA-NEXT: lui a3, 16
-; RV32IA-NEXT: addi a3, a3, -1
-; RV32IA-NEXT: sll a4, a3, a0
-; RV32IA-NEXT: and a1, a1, a3
-; RV32IA-NEXT: sll a1, a1, a0
-; RV32IA-NEXT: .LBB129_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-NEXT: lr.w.aqrl a3, (a2)
-; RV32IA-NEXT: and a6, a3, a4
-; RV32IA-NEXT: mv a5, a3
-; RV32IA-NEXT: bgeu a1, a6, .LBB129_3
-; RV32IA-NEXT: # %bb.2: # in Loop: Header=BB129_1 Depth=1
-; RV32IA-NEXT: xor a5, a3, a1
-; RV32IA-NEXT: and a5, a5, a4
-; RV32IA-NEXT: xor a5, a3, a5
-; RV32IA-NEXT: .LBB129_3: # in Loop: Header=BB129_1 Depth=1
-; RV32IA-NEXT: sc.w.rl a5, a5, (a2)
-; RV32IA-NEXT: bnez a5, .LBB129_1
-; RV32IA-NEXT: # %bb.4:
-; RV32IA-NEXT: srl a0, a3, a0
-; RV32IA-NEXT: ret
+; RV32IA-NOZACAS-LABEL: atomicrmw_umin_i16_seq_cst:
+; RV32IA-NOZACAS: # %bb.0:
+; RV32IA-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-NOZACAS-NEXT: lui a3, 16
+; RV32IA-NOZACAS-NEXT: addi a3, a3, -1
+; RV32IA-NOZACAS-NEXT: sll a4, a3, a0
+; RV32IA-NOZACAS-NEXT: and a1, a1, a3
+; RV32IA-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-NOZACAS-NEXT: .LBB129_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-NOZACAS-NEXT: lr.w.aqrl a3, (a2)
+; RV32IA-NOZACAS-NEXT: and a6, a3, a4
+; RV32IA-NOZACAS-NEXT: mv a5, a3
+; RV32IA-NOZACAS-NEXT: bgeu a1, a6, .LBB129_3
+; RV32IA-NOZACAS-NEXT: # %bb.2: # in Loop: Header=BB129_1 Depth=1
+; RV32IA-NOZACAS-NEXT: xor a5, a3, a1
+; RV32IA-NOZACAS-NEXT: and a5, a5, a4
+; RV32IA-NOZACAS-NEXT: xor a5, a3, a5
+; RV32IA-NOZACAS-NEXT: .LBB129_3: # in Loop: Header=BB129_1 Depth=1
+; RV32IA-NOZACAS-NEXT: sc.w.rl a5, a5, (a2)
+; RV32IA-NOZACAS-NEXT: bnez a5, .LBB129_1
+; RV32IA-NOZACAS-NEXT: # %bb.4:
+; RV32IA-NOZACAS-NEXT: srl a0, a3, a0
+; RV32IA-NOZACAS-NEXT: ret
;
; RV64I-LABEL: atomicrmw_umin_i16_seq_cst:
; RV64I: # %bb.0:
@@ -20023,6 +26078,31 @@ define i16 @atomicrmw_umin_i16_seq_cst(ptr %a, i16 %b) nounwind {
; RV64IA-NOZACAS-NEXT: srlw a0, a3, a0
; RV64IA-NOZACAS-NEXT: ret
;
+; RV32IA-ZACAS-LABEL: atomicrmw_umin_i16_seq_cst:
+; RV32IA-ZACAS: # %bb.0:
+; RV32IA-ZACAS-NEXT: andi a2, a0, -4
+; RV32IA-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-ZACAS-NEXT: lui a3, 16
+; RV32IA-ZACAS-NEXT: addi a3, a3, -1
+; RV32IA-ZACAS-NEXT: sll a4, a3, a0
+; RV32IA-ZACAS-NEXT: and a1, a1, a3
+; RV32IA-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-ZACAS-NEXT: .LBB129_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-ZACAS-NEXT: lr.w.aqrl a3, (a2)
+; RV32IA-ZACAS-NEXT: and a6, a3, a4
+; RV32IA-ZACAS-NEXT: mv a5, a3
+; RV32IA-ZACAS-NEXT: bgeu a1, a6, .LBB129_3
+; RV32IA-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB129_1 Depth=1
+; RV32IA-ZACAS-NEXT: xor a5, a3, a1
+; RV32IA-ZACAS-NEXT: and a5, a5, a4
+; RV32IA-ZACAS-NEXT: xor a5, a3, a5
+; RV32IA-ZACAS-NEXT: .LBB129_3: # in Loop: Header=BB129_1 Depth=1
+; RV32IA-ZACAS-NEXT: sc.w.rl a5, a5, (a2)
+; RV32IA-ZACAS-NEXT: bnez a5, .LBB129_1
+; RV32IA-ZACAS-NEXT: # %bb.4:
+; RV32IA-ZACAS-NEXT: srl a0, a3, a0
+; RV32IA-ZACAS-NEXT: ret
+;
; RV64IA-ZACAS-LABEL: atomicrmw_umin_i16_seq_cst:
; RV64IA-ZACAS: # %bb.0:
; RV64IA-ZACAS-NEXT: andi a2, a0, -4
@@ -20048,6 +26128,16 @@ define i16 @atomicrmw_umin_i16_seq_cst(ptr %a, i16 %b) nounwind {
; RV64IA-ZACAS-NEXT: srlw a0, a3, a0
; RV64IA-ZACAS-NEXT: ret
;
+; RV32IA-WMO-ZABHA-LABEL: atomicrmw_umin_i16_seq_cst:
+; RV32IA-WMO-ZABHA: # %bb.0:
+; RV32IA-WMO-ZABHA-NEXT: amominu.h.aqrl a0, a1, (a0)
+; RV32IA-WMO-ZABHA-NEXT: ret
+;
+; RV32IA-TSO-ZABHA-LABEL: atomicrmw_umin_i16_seq_cst:
+; RV32IA-TSO-ZABHA: # %bb.0:
+; RV32IA-TSO-ZABHA-NEXT: amominu.h a0, a1, (a0)
+; RV32IA-TSO-ZABHA-NEXT: ret
+;
; RV64IA-WMO-ZABHA-LABEL: atomicrmw_umin_i16_seq_cst:
; RV64IA-WMO-ZABHA: # %bb.0:
; RV64IA-WMO-ZABHA-NEXT: amominu.h.aqrl a0, a1, (a0)
@@ -20992,6 +27082,30 @@ define i32 @atomicrmw_nand_i32_monotonic(ptr %a, i32 %b) nounwind {
; RV64IA-ZACAS-NEXT: # %bb.2: # %atomicrmw.end
; RV64IA-ZACAS-NEXT: ret
;
+; RV32IA-WMO-ZABHA-NOZACAS-LABEL: atomicrmw_nand_i32_monotonic:
+; RV32IA-WMO-ZABHA-NOZACAS: # %bb.0:
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: .LBB150_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: lr.w a2, (a0)
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: and a3, a2, a1
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: not a3, a3
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: sc.w a3, a3, (a0)
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: bnez a3, .LBB150_1
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: # %bb.2:
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: mv a0, a2
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: ret
+;
+; RV32IA-TSO-ZABHA-NOZACAS-LABEL: atomicrmw_nand_i32_monotonic:
+; RV32IA-TSO-ZABHA-NOZACAS: # %bb.0:
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: .LBB150_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: lr.w a2, (a0)
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: and a3, a2, a1
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: not a3, a3
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: sc.w a3, a3, (a0)
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: bnez a3, .LBB150_1
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: # %bb.2:
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: mv a0, a2
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: ret
+;
; RV64IA-WMO-ZABHA-NOZACAS-LABEL: atomicrmw_nand_i32_monotonic:
; RV64IA-WMO-ZABHA-NOZACAS: # %bb.0:
; RV64IA-WMO-ZABHA-NOZACAS-NEXT: .LBB150_1: # =>This Inner Loop Header: Depth=1
@@ -21016,6 +27130,34 @@ define i32 @atomicrmw_nand_i32_monotonic(ptr %a, i32 %b) nounwind {
; RV64IA-TSO-ZABHA-NOZACAS-NEXT: mv a0, a2
; RV64IA-TSO-ZABHA-NOZACAS-NEXT: ret
;
+; RV32IA-WMO-ZABHA-ZACAS-LABEL: atomicrmw_nand_i32_monotonic:
+; RV32IA-WMO-ZABHA-ZACAS: # %bb.0:
+; RV32IA-WMO-ZABHA-ZACAS-NEXT: mv a2, a0
+; RV32IA-WMO-ZABHA-ZACAS-NEXT: lw a0, 0(a0)
+; RV32IA-WMO-ZABHA-ZACAS-NEXT: .LBB150_1: # %atomicrmw.start
+; RV32IA-WMO-ZABHA-ZACAS-NEXT: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-ZABHA-ZACAS-NEXT: mv a3, a0
+; RV32IA-WMO-ZABHA-ZACAS-NEXT: and a4, a0, a1
+; RV32IA-WMO-ZABHA-ZACAS-NEXT: not a4, a4
+; RV32IA-WMO-ZABHA-ZACAS-NEXT: amocas.w a0, a4, (a2)
+; RV32IA-WMO-ZABHA-ZACAS-NEXT: bne a0, a3, .LBB150_1
+; RV32IA-WMO-ZABHA-ZACAS-NEXT: # %bb.2: # %atomicrmw.end
+; RV32IA-WMO-ZABHA-ZACAS-NEXT: ret
+;
+; RV32IA-TSO-ZABHA-ZACAS-LABEL: atomicrmw_nand_i32_monotonic:
+; RV32IA-TSO-ZABHA-ZACAS: # %bb.0:
+; RV32IA-TSO-ZABHA-ZACAS-NEXT: mv a2, a0
+; RV32IA-TSO-ZABHA-ZACAS-NEXT: lw a0, 0(a0)
+; RV32IA-TSO-ZABHA-ZACAS-NEXT: .LBB150_1: # %atomicrmw.start
+; RV32IA-TSO-ZABHA-ZACAS-NEXT: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-ZABHA-ZACAS-NEXT: mv a3, a0
+; RV32IA-TSO-ZABHA-ZACAS-NEXT: and a4, a0, a1
+; RV32IA-TSO-ZABHA-ZACAS-NEXT: not a4, a4
+; RV32IA-TSO-ZABHA-ZACAS-NEXT: amocas.w a0, a4, (a2)
+; RV32IA-TSO-ZABHA-ZACAS-NEXT: bne a0, a3, .LBB150_1
+; RV32IA-TSO-ZABHA-ZACAS-NEXT: # %bb.2: # %atomicrmw.end
+; RV32IA-TSO-ZABHA-ZACAS-NEXT: ret
+;
; RV64IA-WMO-ZABHA-ZACAS-LABEL: atomicrmw_nand_i32_monotonic:
; RV64IA-WMO-ZABHA-ZACAS: # %bb.0:
; RV64IA-WMO-ZABHA-ZACAS-NEXT: mv a2, a0
@@ -21172,6 +27314,30 @@ define i32 @atomicrmw_nand_i32_acquire(ptr %a, i32 %b) nounwind {
; RV64IA-TSO-ZACAS-NEXT: # %bb.2: # %atomicrmw.end
; RV64IA-TSO-ZACAS-NEXT: ret
;
+; RV32IA-WMO-ZABHA-NOZACAS-LABEL: atomicrmw_nand_i32_acquire:
+; RV32IA-WMO-ZABHA-NOZACAS: # %bb.0:
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: .LBB151_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: lr.w.aq a2, (a0)
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: and a3, a2, a1
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: not a3, a3
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: sc.w a3, a3, (a0)
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: bnez a3, .LBB151_1
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: # %bb.2:
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: mv a0, a2
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: ret
+;
+; RV32IA-TSO-ZABHA-NOZACAS-LABEL: atomicrmw_nand_i32_acquire:
+; RV32IA-TSO-ZABHA-NOZACAS: # %bb.0:
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: .LBB151_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: lr.w a2, (a0)
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: and a3, a2, a1
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: not a3, a3
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: sc.w a3, a3, (a0)
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: bnez a3, .LBB151_1
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: # %bb.2:
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: mv a0, a2
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: ret
+;
; RV64IA-WMO-ZABHA-NOZACAS-LABEL: atomicrmw_nand_i32_acquire:
; RV64IA-WMO-ZABHA-NOZACAS: # %bb.0:
; RV64IA-WMO-ZABHA-NOZACAS-NEXT: .LBB151_1: # =>This Inner Loop Header: Depth=1
@@ -21196,6 +27362,34 @@ define i32 @atomicrmw_nand_i32_acquire(ptr %a, i32 %b) nounwind {
; RV64IA-TSO-ZABHA-NOZACAS-NEXT: mv a0, a2
; RV64IA-TSO-ZABHA-NOZACAS-NEXT: ret
;
+; RV32IA-WMO-ZABHA-ZACAS-LABEL: atomicrmw_nand_i32_acquire:
+; RV32IA-WMO-ZABHA-ZACAS: # %bb.0:
+; RV32IA-WMO-ZABHA-ZACAS-NEXT: mv a2, a0
+; RV32IA-WMO-ZABHA-ZACAS-NEXT: lw a0, 0(a0)
+; RV32IA-WMO-ZABHA-ZACAS-NEXT: .LBB151_1: # %atomicrmw.start
+; RV32IA-WMO-ZABHA-ZACAS-NEXT: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-ZABHA-ZACAS-NEXT: mv a3, a0
+; RV32IA-WMO-ZABHA-ZACAS-NEXT: and a4, a0, a1
+; RV32IA-WMO-ZABHA-ZACAS-NEXT: not a4, a4
+; RV32IA-WMO-ZABHA-ZACAS-NEXT: amocas.w.aq a0, a4, (a2)
+; RV32IA-WMO-ZABHA-ZACAS-NEXT: bne a0, a3, .LBB151_1
+; RV32IA-WMO-ZABHA-ZACAS-NEXT: # %bb.2: # %atomicrmw.end
+; RV32IA-WMO-ZABHA-ZACAS-NEXT: ret
+;
+; RV32IA-TSO-ZABHA-ZACAS-LABEL: atomicrmw_nand_i32_acquire:
+; RV32IA-TSO-ZABHA-ZACAS: # %bb.0:
+; RV32IA-TSO-ZABHA-ZACAS-NEXT: mv a2, a0
+; RV32IA-TSO-ZABHA-ZACAS-NEXT: lw a0, 0(a0)
+; RV32IA-TSO-ZABHA-ZACAS-NEXT: .LBB151_1: # %atomicrmw.start
+; RV32IA-TSO-ZABHA-ZACAS-NEXT: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-ZABHA-ZACAS-NEXT: mv a3, a0
+; RV32IA-TSO-ZABHA-ZACAS-NEXT: and a4, a0, a1
+; RV32IA-TSO-ZABHA-ZACAS-NEXT: not a4, a4
+; RV32IA-TSO-ZABHA-ZACAS-NEXT: amocas.w a0, a4, (a2)
+; RV32IA-TSO-ZABHA-ZACAS-NEXT: bne a0, a3, .LBB151_1
+; RV32IA-TSO-ZABHA-ZACAS-NEXT: # %bb.2: # %atomicrmw.end
+; RV32IA-TSO-ZABHA-ZACAS-NEXT: ret
+;
; RV64IA-WMO-ZABHA-ZACAS-LABEL: atomicrmw_nand_i32_acquire:
; RV64IA-WMO-ZABHA-ZACAS: # %bb.0:
; RV64IA-WMO-ZABHA-ZACAS-NEXT: mv a2, a0
@@ -21352,6 +27546,30 @@ define i32 @atomicrmw_nand_i32_release(ptr %a, i32 %b) nounwind {
; RV64IA-TSO-ZACAS-NEXT: # %bb.2: # %atomicrmw.end
; RV64IA-TSO-ZACAS-NEXT: ret
;
+; RV32IA-WMO-ZABHA-NOZACAS-LABEL: atomicrmw_nand_i32_release:
+; RV32IA-WMO-ZABHA-NOZACAS: # %bb.0:
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: .LBB152_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: lr.w a2, (a0)
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: and a3, a2, a1
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: not a3, a3
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: sc.w.rl a3, a3, (a0)
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: bnez a3, .LBB152_1
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: # %bb.2:
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: mv a0, a2
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: ret
+;
+; RV32IA-TSO-ZABHA-NOZACAS-LABEL: atomicrmw_nand_i32_release:
+; RV32IA-TSO-ZABHA-NOZACAS: # %bb.0:
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: .LBB152_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: lr.w a2, (a0)
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: and a3, a2, a1
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: not a3, a3
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: sc.w a3, a3, (a0)
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: bnez a3, .LBB152_1
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: # %bb.2:
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: mv a0, a2
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: ret
+;
; RV64IA-WMO-ZABHA-NOZACAS-LABEL: atomicrmw_nand_i32_release:
; RV64IA-WMO-ZABHA-NOZACAS: # %bb.0:
; RV64IA-WMO-ZABHA-NOZACAS-NEXT: .LBB152_1: # =>This Inner Loop Header: Depth=1
@@ -21376,6 +27594,34 @@ define i32 @atomicrmw_nand_i32_release(ptr %a, i32 %b) nounwind {
; RV64IA-TSO-ZABHA-NOZACAS-NEXT: mv a0, a2
; RV64IA-TSO-ZABHA-NOZACAS-NEXT: ret
;
+; RV32IA-WMO-ZABHA-ZACAS-LABEL: atomicrmw_nand_i32_release:
+; RV32IA-WMO-ZABHA-ZACAS: # %bb.0:
+; RV32IA-WMO-ZABHA-ZACAS-NEXT: mv a2, a0
+; RV32IA-WMO-ZABHA-ZACAS-NEXT: lw a0, 0(a0)
+; RV32IA-WMO-ZABHA-ZACAS-NEXT: .LBB152_1: # %atomicrmw.start
+; RV32IA-WMO-ZABHA-ZACAS-NEXT: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-ZABHA-ZACAS-NEXT: mv a3, a0
+; RV32IA-WMO-ZABHA-ZACAS-NEXT: and a4, a0, a1
+; RV32IA-WMO-ZABHA-ZACAS-NEXT: not a4, a4
+; RV32IA-WMO-ZABHA-ZACAS-NEXT: amocas.w.rl a0, a4, (a2)
+; RV32IA-WMO-ZABHA-ZACAS-NEXT: bne a0, a3, .LBB152_1
+; RV32IA-WMO-ZABHA-ZACAS-NEXT: # %bb.2: # %atomicrmw.end
+; RV32IA-WMO-ZABHA-ZACAS-NEXT: ret
+;
+; RV32IA-TSO-ZABHA-ZACAS-LABEL: atomicrmw_nand_i32_release:
+; RV32IA-TSO-ZABHA-ZACAS: # %bb.0:
+; RV32IA-TSO-ZABHA-ZACAS-NEXT: mv a2, a0
+; RV32IA-TSO-ZABHA-ZACAS-NEXT: lw a0, 0(a0)
+; RV32IA-TSO-ZABHA-ZACAS-NEXT: .LBB152_1: # %atomicrmw.start
+; RV32IA-TSO-ZABHA-ZACAS-NEXT: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-ZABHA-ZACAS-NEXT: mv a3, a0
+; RV32IA-TSO-ZABHA-ZACAS-NEXT: and a4, a0, a1
+; RV32IA-TSO-ZABHA-ZACAS-NEXT: not a4, a4
+; RV32IA-TSO-ZABHA-ZACAS-NEXT: amocas.w a0, a4, (a2)
+; RV32IA-TSO-ZABHA-ZACAS-NEXT: bne a0, a3, .LBB152_1
+; RV32IA-TSO-ZABHA-ZACAS-NEXT: # %bb.2: # %atomicrmw.end
+; RV32IA-TSO-ZABHA-ZACAS-NEXT: ret
+;
; RV64IA-WMO-ZABHA-ZACAS-LABEL: atomicrmw_nand_i32_release:
; RV64IA-WMO-ZABHA-ZACAS: # %bb.0:
; RV64IA-WMO-ZABHA-ZACAS-NEXT: mv a2, a0
@@ -21532,6 +27778,30 @@ define i32 @atomicrmw_nand_i32_acq_rel(ptr %a, i32 %b) nounwind {
; RV64IA-TSO-ZACAS-NEXT: # %bb.2: # %atomicrmw.end
; RV64IA-TSO-ZACAS-NEXT: ret
;
+; RV32IA-WMO-ZABHA-NOZACAS-LABEL: atomicrmw_nand_i32_acq_rel:
+; RV32IA-WMO-ZABHA-NOZACAS: # %bb.0:
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: .LBB153_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: lr.w.aq a2, (a0)
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: and a3, a2, a1
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: not a3, a3
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: sc.w.rl a3, a3, (a0)
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: bnez a3, .LBB153_1
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: # %bb.2:
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: mv a0, a2
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: ret
+;
+; RV32IA-TSO-ZABHA-NOZACAS-LABEL: atomicrmw_nand_i32_acq_rel:
+; RV32IA-TSO-ZABHA-NOZACAS: # %bb.0:
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: .LBB153_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: lr.w a2, (a0)
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: and a3, a2, a1
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: not a3, a3
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: sc.w a3, a3, (a0)
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: bnez a3, .LBB153_1
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: # %bb.2:
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: mv a0, a2
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: ret
+;
; RV64IA-WMO-ZABHA-NOZACAS-LABEL: atomicrmw_nand_i32_acq_rel:
; RV64IA-WMO-ZABHA-NOZACAS: # %bb.0:
; RV64IA-WMO-ZABHA-NOZACAS-NEXT: .LBB153_1: # =>This Inner Loop Header: Depth=1
@@ -21556,6 +27826,34 @@ define i32 @atomicrmw_nand_i32_acq_rel(ptr %a, i32 %b) nounwind {
; RV64IA-TSO-ZABHA-NOZACAS-NEXT: mv a0, a2
; RV64IA-TSO-ZABHA-NOZACAS-NEXT: ret
;
+; RV32IA-WMO-ZABHA-ZACAS-LABEL: atomicrmw_nand_i32_acq_rel:
+; RV32IA-WMO-ZABHA-ZACAS: # %bb.0:
+; RV32IA-WMO-ZABHA-ZACAS-NEXT: mv a2, a0
+; RV32IA-WMO-ZABHA-ZACAS-NEXT: lw a0, 0(a0)
+; RV32IA-WMO-ZABHA-ZACAS-NEXT: .LBB153_1: # %atomicrmw.start
+; RV32IA-WMO-ZABHA-ZACAS-NEXT: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-ZABHA-ZACAS-NEXT: mv a3, a0
+; RV32IA-WMO-ZABHA-ZACAS-NEXT: and a4, a0, a1
+; RV32IA-WMO-ZABHA-ZACAS-NEXT: not a4, a4
+; RV32IA-WMO-ZABHA-ZACAS-NEXT: amocas.w.aqrl a0, a4, (a2)
+; RV32IA-WMO-ZABHA-ZACAS-NEXT: bne a0, a3, .LBB153_1
+; RV32IA-WMO-ZABHA-ZACAS-NEXT: # %bb.2: # %atomicrmw.end
+; RV32IA-WMO-ZABHA-ZACAS-NEXT: ret
+;
+; RV32IA-TSO-ZABHA-ZACAS-LABEL: atomicrmw_nand_i32_acq_rel:
+; RV32IA-TSO-ZABHA-ZACAS: # %bb.0:
+; RV32IA-TSO-ZABHA-ZACAS-NEXT: mv a2, a0
+; RV32IA-TSO-ZABHA-ZACAS-NEXT: lw a0, 0(a0)
+; RV32IA-TSO-ZABHA-ZACAS-NEXT: .LBB153_1: # %atomicrmw.start
+; RV32IA-TSO-ZABHA-ZACAS-NEXT: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-ZABHA-ZACAS-NEXT: mv a3, a0
+; RV32IA-TSO-ZABHA-ZACAS-NEXT: and a4, a0, a1
+; RV32IA-TSO-ZABHA-ZACAS-NEXT: not a4, a4
+; RV32IA-TSO-ZABHA-ZACAS-NEXT: amocas.w a0, a4, (a2)
+; RV32IA-TSO-ZABHA-ZACAS-NEXT: bne a0, a3, .LBB153_1
+; RV32IA-TSO-ZABHA-ZACAS-NEXT: # %bb.2: # %atomicrmw.end
+; RV32IA-TSO-ZABHA-ZACAS-NEXT: ret
+;
; RV64IA-WMO-ZABHA-ZACAS-LABEL: atomicrmw_nand_i32_acq_rel:
; RV64IA-WMO-ZABHA-ZACAS: # %bb.0:
; RV64IA-WMO-ZABHA-ZACAS-NEXT: mv a2, a0
@@ -21692,6 +27990,30 @@ define i32 @atomicrmw_nand_i32_seq_cst(ptr %a, i32 %b) nounwind {
; RV64IA-TSO-ZACAS-NEXT: # %bb.2: # %atomicrmw.end
; RV64IA-TSO-ZACAS-NEXT: ret
;
+; RV32IA-WMO-ZABHA-NOZACAS-LABEL: atomicrmw_nand_i32_seq_cst:
+; RV32IA-WMO-ZABHA-NOZACAS: # %bb.0:
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: .LBB154_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: lr.w.aqrl a2, (a0)
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: and a3, a2, a1
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: not a3, a3
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: sc.w.rl a3, a3, (a0)
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: bnez a3, .LBB154_1
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: # %bb.2:
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: mv a0, a2
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: ret
+;
+; RV32IA-TSO-ZABHA-NOZACAS-LABEL: atomicrmw_nand_i32_seq_cst:
+; RV32IA-TSO-ZABHA-NOZACAS: # %bb.0:
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: .LBB154_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: lr.w.aqrl a2, (a0)
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: and a3, a2, a1
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: not a3, a3
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: sc.w.rl a3, a3, (a0)
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: bnez a3, .LBB154_1
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: # %bb.2:
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: mv a0, a2
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: ret
+;
; RV64IA-WMO-ZABHA-NOZACAS-LABEL: atomicrmw_nand_i32_seq_cst:
; RV64IA-WMO-ZABHA-NOZACAS: # %bb.0:
; RV64IA-WMO-ZABHA-NOZACAS-NEXT: .LBB154_1: # =>This Inner Loop Header: Depth=1
@@ -21716,6 +28038,36 @@ define i32 @atomicrmw_nand_i32_seq_cst(ptr %a, i32 %b) nounwind {
; RV64IA-TSO-ZABHA-NOZACAS-NEXT: mv a0, a2
; RV64IA-TSO-ZABHA-NOZACAS-NEXT: ret
;
+; RV32IA-WMO-ZABHA-ZACAS-LABEL: atomicrmw_nand_i32_seq_cst:
+; RV32IA-WMO-ZABHA-ZACAS: # %bb.0:
+; RV32IA-WMO-ZABHA-ZACAS-NEXT: mv a2, a0
+; RV32IA-WMO-ZABHA-ZACAS-NEXT: lw a0, 0(a0)
+; RV32IA-WMO-ZABHA-ZACAS-NEXT: .LBB154_1: # %atomicrmw.start
+; RV32IA-WMO-ZABHA-ZACAS-NEXT: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-ZABHA-ZACAS-NEXT: mv a3, a0
+; RV32IA-WMO-ZABHA-ZACAS-NEXT: and a4, a0, a1
+; RV32IA-WMO-ZABHA-ZACAS-NEXT: not a4, a4
+; RV32IA-WMO-ZABHA-ZACAS-NEXT: fence rw, rw
+; RV32IA-WMO-ZABHA-ZACAS-NEXT: amocas.w.aqrl a0, a4, (a2)
+; RV32IA-WMO-ZABHA-ZACAS-NEXT: bne a0, a3, .LBB154_1
+; RV32IA-WMO-ZABHA-ZACAS-NEXT: # %bb.2: # %atomicrmw.end
+; RV32IA-WMO-ZABHA-ZACAS-NEXT: ret
+;
+; RV32IA-TSO-ZABHA-ZACAS-LABEL: atomicrmw_nand_i32_seq_cst:
+; RV32IA-TSO-ZABHA-ZACAS: # %bb.0:
+; RV32IA-TSO-ZABHA-ZACAS-NEXT: mv a2, a0
+; RV32IA-TSO-ZABHA-ZACAS-NEXT: lw a0, 0(a0)
+; RV32IA-TSO-ZABHA-ZACAS-NEXT: .LBB154_1: # %atomicrmw.start
+; RV32IA-TSO-ZABHA-ZACAS-NEXT: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-ZABHA-ZACAS-NEXT: mv a3, a0
+; RV32IA-TSO-ZABHA-ZACAS-NEXT: and a4, a0, a1
+; RV32IA-TSO-ZABHA-ZACAS-NEXT: not a4, a4
+; RV32IA-TSO-ZABHA-ZACAS-NEXT: fence rw, rw
+; RV32IA-TSO-ZABHA-ZACAS-NEXT: amocas.w a0, a4, (a2)
+; RV32IA-TSO-ZABHA-ZACAS-NEXT: bne a0, a3, .LBB154_1
+; RV32IA-TSO-ZABHA-ZACAS-NEXT: # %bb.2: # %atomicrmw.end
+; RV32IA-TSO-ZABHA-ZACAS-NEXT: ret
+;
; RV64IA-WMO-ZABHA-ZACAS-LABEL: atomicrmw_nand_i32_seq_cst:
; RV64IA-WMO-ZABHA-ZACAS: # %bb.0:
; RV64IA-WMO-ZABHA-ZACAS-NEXT: mv a2, a0
diff --git a/llvm/test/CodeGen/RISCV/attributes.ll b/llvm/test/CodeGen/RISCV/attributes.ll
index ead255b..f3529b1 100644
--- a/llvm/test/CodeGen/RISCV/attributes.ll
+++ b/llvm/test/CodeGen/RISCV/attributes.ll
@@ -443,7 +443,7 @@
; RV32ZVFBFWMA: .attribute 5, "rv32i2p1_f2p2_zicsr2p0_zfbfmin1p0_zve32f1p0_zve32x1p0_zvfbfmin1p0_zvfbfwma1p0_zvl32b1p0"
; RV32ZVFOFP8MIN: .attribute 5, "rv32i2p1_f2p2_zicsr2p0_zve32f1p0_zve32x1p0_zvfofp8min0p2_zvl32b1p0"
; RV32ZACAS: .attribute 5, "rv32i2p1_zaamo1p0_zacas1p0"
-; RV32ZALASR: .attribute 5, "rv32i2p1_zalasr0p1"
+; RV32ZALASR: .attribute 5, "rv32i2p1_zalasr0p9"
; RV32ZAMA16B: .attribute 5, "rv32i2p1_zama16b1p0"
; RV32ZICFILP: .attribute 5, "rv32i2p1_zicfilp1p0_zicsr2p0"
; RV32ZABHA: .attribute 5, "rv32i2p1_zaamo1p0_zabha1p0"
@@ -590,8 +590,8 @@
; RV64ZVFBFWMA: .attribute 5, "rv64i2p1_f2p2_zicsr2p0_zfbfmin1p0_zve32f1p0_zve32x1p0_zvfbfmin1p0_zvfbfwma1p0_zvl32b1p0"
; RV64ZVFOFP8MIN: .attribute 5, "rv64i2p1_f2p2_zicsr2p0_zve32f1p0_zve32x1p0_zvfofp8min0p2_zvl32b1p0"
; RV64ZACAS: .attribute 5, "rv64i2p1_zaamo1p0_zacas1p0"
-; RV64ZALASR: .attribute 5, "rv64i2p1_zalasr0p1"
-; RV64ZALASRA: .attribute 5, "rv64i2p1_a2p1_zaamo1p0_zalasr0p1_zalrsc1p0"
+; RV64ZALASR: .attribute 5, "rv64i2p1_zalasr0p9"
+; RV64ZALASRA: .attribute 5, "rv64i2p1_a2p1_zaamo1p0_zalasr0p9_zalrsc1p0"
; RV64ZICFILP: .attribute 5, "rv64i2p1_zicfilp1p0_zicsr2p0"
; RV64ZABHA: .attribute 5, "rv64i2p1_zaamo1p0_zabha1p0"
; RV64ZVBC32E: .attribute 5, "rv64i2p1_zicsr2p0_zvbc32e0p7_zve32x1p0_zvl32b1p0"
diff --git a/llvm/test/CodeGen/SPARC/atomicrmw-uinc-udec-wrap.ll b/llvm/test/CodeGen/SPARC/atomicrmw-uinc-udec-wrap.ll
index 380a4a0..d1f1c46 100644
--- a/llvm/test/CodeGen/SPARC/atomicrmw-uinc-udec-wrap.ll
+++ b/llvm/test/CodeGen/SPARC/atomicrmw-uinc-udec-wrap.ll
@@ -5,7 +5,7 @@ define i8 @atomicrmw_uinc_wrap_i8(ptr %ptr, i8 %val) {
; CHECK-LABEL: atomicrmw_uinc_wrap_i8:
; CHECK: .cfi_startproc
; CHECK-NEXT: ! %bb.0:
-; CHECK-NEXT: membar #LoadLoad | #StoreLoad | #LoadStore | #StoreStore
+; CHECK-NEXT: membar #LoadStore | #StoreStore
; CHECK-NEXT: and %o0, -4, %o2
; CHECK-NEXT: mov 3, %o3
; CHECK-NEXT: andn %o3, %o0, %o0
@@ -36,7 +36,7 @@ define i8 @atomicrmw_uinc_wrap_i8(ptr %ptr, i8 %val) {
; CHECK-NEXT: nop
; CHECK-NEXT: ! %bb.2: ! %atomicrmw.end
; CHECK-NEXT: srl %o4, %o0, %o0
-; CHECK-NEXT: membar #LoadLoad | #StoreLoad | #LoadStore | #StoreStore
+; CHECK-NEXT: membar #LoadLoad | #LoadStore
; CHECK-NEXT: retl
; CHECK-NEXT: nop
%result = atomicrmw uinc_wrap ptr %ptr, i8 %val seq_cst
@@ -47,7 +47,7 @@ define i16 @atomicrmw_uinc_wrap_i16(ptr %ptr, i16 %val) {
; CHECK-LABEL: atomicrmw_uinc_wrap_i16:
; CHECK: .cfi_startproc
; CHECK-NEXT: ! %bb.0:
-; CHECK-NEXT: membar #LoadLoad | #StoreLoad | #LoadStore | #StoreStore
+; CHECK-NEXT: membar #LoadStore | #StoreStore
; CHECK-NEXT: and %o0, -4, %o2
; CHECK-NEXT: and %o0, 3, %o0
; CHECK-NEXT: xor %o0, 2, %o0
@@ -79,7 +79,7 @@ define i16 @atomicrmw_uinc_wrap_i16(ptr %ptr, i16 %val) {
; CHECK-NEXT: nop
; CHECK-NEXT: ! %bb.2: ! %atomicrmw.end
; CHECK-NEXT: srl %o5, %o0, %o0
-; CHECK-NEXT: membar #LoadLoad | #StoreLoad | #LoadStore | #StoreStore
+; CHECK-NEXT: membar #LoadLoad | #LoadStore
; CHECK-NEXT: retl
; CHECK-NEXT: nop
%result = atomicrmw uinc_wrap ptr %ptr, i16 %val seq_cst
@@ -90,7 +90,7 @@ define i32 @atomicrmw_uinc_wrap_i32(ptr %ptr, i32 %val) {
; CHECK-LABEL: atomicrmw_uinc_wrap_i32:
; CHECK: .cfi_startproc
; CHECK-NEXT: ! %bb.0:
-; CHECK-NEXT: membar #LoadLoad | #StoreLoad | #LoadStore | #StoreStore
+; CHECK-NEXT: membar #LoadStore | #StoreStore
; CHECK-NEXT: ld [%o0], %o2
; CHECK-NEXT: .LBB2_1: ! %atomicrmw.start
; CHECK-NEXT: ! =>This Inner Loop Header: Depth=1
@@ -106,7 +106,7 @@ define i32 @atomicrmw_uinc_wrap_i32(ptr %ptr, i32 %val) {
; CHECK-NEXT: bne %icc, .LBB2_1
; CHECK-NEXT: nop
; CHECK-NEXT: ! %bb.2: ! %atomicrmw.end
-; CHECK-NEXT: membar #LoadLoad | #StoreLoad | #LoadStore | #StoreStore
+; CHECK-NEXT: membar #LoadLoad | #LoadStore
; CHECK-NEXT: retl
; CHECK-NEXT: mov %o2, %o0
%result = atomicrmw uinc_wrap ptr %ptr, i32 %val seq_cst
@@ -160,7 +160,7 @@ define i8 @atomicrmw_udec_wrap_i8(ptr %ptr, i8 %val) {
; CHECK-LABEL: atomicrmw_udec_wrap_i8:
; CHECK: .cfi_startproc
; CHECK-NEXT: ! %bb.0:
-; CHECK-NEXT: membar #LoadLoad | #StoreLoad | #LoadStore | #StoreStore
+; CHECK-NEXT: membar #LoadStore | #StoreStore
; CHECK-NEXT: and %o0, -4, %o2
; CHECK-NEXT: mov 3, %o3
; CHECK-NEXT: andn %o3, %o0, %o0
@@ -193,7 +193,7 @@ define i8 @atomicrmw_udec_wrap_i8(ptr %ptr, i8 %val) {
; CHECK-NEXT: nop
; CHECK-NEXT: ! %bb.2: ! %atomicrmw.end
; CHECK-NEXT: srl %o5, %o0, %o0
-; CHECK-NEXT: membar #LoadLoad | #StoreLoad | #LoadStore | #StoreStore
+; CHECK-NEXT: membar #LoadLoad | #LoadStore
; CHECK-NEXT: retl
; CHECK-NEXT: nop
%result = atomicrmw udec_wrap ptr %ptr, i8 %val seq_cst
@@ -204,7 +204,7 @@ define i16 @atomicrmw_udec_wrap_i16(ptr %ptr, i16 %val) {
; CHECK-LABEL: atomicrmw_udec_wrap_i16:
; CHECK: .cfi_startproc
; CHECK-NEXT: ! %bb.0:
-; CHECK-NEXT: membar #LoadLoad | #StoreLoad | #LoadStore | #StoreStore
+; CHECK-NEXT: membar #LoadStore | #StoreStore
; CHECK-NEXT: and %o0, -4, %o2
; CHECK-NEXT: and %o0, 3, %o0
; CHECK-NEXT: xor %o0, 2, %o0
@@ -238,7 +238,7 @@ define i16 @atomicrmw_udec_wrap_i16(ptr %ptr, i16 %val) {
; CHECK-NEXT: nop
; CHECK-NEXT: ! %bb.2: ! %atomicrmw.end
; CHECK-NEXT: srl %g2, %o0, %o0
-; CHECK-NEXT: membar #LoadLoad | #StoreLoad | #LoadStore | #StoreStore
+; CHECK-NEXT: membar #LoadLoad | #LoadStore
; CHECK-NEXT: retl
; CHECK-NEXT: nop
%result = atomicrmw udec_wrap ptr %ptr, i16 %val seq_cst
@@ -249,7 +249,7 @@ define i32 @atomicrmw_udec_wrap_i32(ptr %ptr, i32 %val) {
; CHECK-LABEL: atomicrmw_udec_wrap_i32:
; CHECK: .cfi_startproc
; CHECK-NEXT: ! %bb.0:
-; CHECK-NEXT: membar #LoadLoad | #StoreLoad | #LoadStore | #StoreStore
+; CHECK-NEXT: membar #LoadStore | #StoreStore
; CHECK-NEXT: ld [%o0], %o2
; CHECK-NEXT: .LBB6_1: ! %atomicrmw.start
; CHECK-NEXT: ! =>This Inner Loop Header: Depth=1
@@ -267,7 +267,7 @@ define i32 @atomicrmw_udec_wrap_i32(ptr %ptr, i32 %val) {
; CHECK-NEXT: bne %icc, .LBB6_1
; CHECK-NEXT: nop
; CHECK-NEXT: ! %bb.2: ! %atomicrmw.end
-; CHECK-NEXT: membar #LoadLoad | #StoreLoad | #LoadStore | #StoreStore
+; CHECK-NEXT: membar #LoadLoad | #LoadStore
; CHECK-NEXT: retl
; CHECK-NEXT: mov %o2, %o0
%result = atomicrmw udec_wrap ptr %ptr, i32 %val seq_cst
diff --git a/llvm/test/CodeGen/SPARC/atomics-ordering.ll b/llvm/test/CodeGen/SPARC/atomics-ordering.ll
new file mode 100644
index 0000000..7c13ac2
--- /dev/null
+++ b/llvm/test/CodeGen/SPARC/atomics-ordering.ll
@@ -0,0 +1,446 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc < %s -mtriple=sparc -verify-machineinstrs | FileCheck %s --check-prefixes=SPARC32
+; RUN: llc < %s -mtriple=sparc -mcpu=leon4 -verify-machineinstrs | FileCheck %s --check-prefixes=SPARC32-LEON4
+; RUN: llc < %s -mtriple=sparc -mcpu=v9 -verify-machineinstrs | FileCheck %s --check-prefixes=SPARC32-V9
+; RUN: llc < %s -mtriple=sparcv9 -verify-machineinstrs | FileCheck %s --check-prefixes=SPARC64
+
+define i32 @load_acq(ptr %0) nounwind {
+; SPARC32-LABEL: load_acq:
+; SPARC32: ! %bb.0:
+; SPARC32-NEXT: save %sp, -96, %sp
+; SPARC32-NEXT: mov %i0, %o0
+; SPARC32-NEXT: call __atomic_load_4
+; SPARC32-NEXT: mov 2, %o1
+; SPARC32-NEXT: ret
+; SPARC32-NEXT: restore %g0, %o0, %o0
+;
+; SPARC32-LEON4-LABEL: load_acq:
+; SPARC32-LEON4: ! %bb.0:
+; SPARC32-LEON4-NEXT: retl
+; SPARC32-LEON4-NEXT: ld [%o0], %o0
+;
+; SPARC32-V9-LABEL: load_acq:
+; SPARC32-V9: ! %bb.0:
+; SPARC32-V9-NEXT: ld [%o0], %o0
+; SPARC32-V9-NEXT: membar #LoadLoad | #LoadStore
+; SPARC32-V9-NEXT: retl
+; SPARC32-V9-NEXT: nop
+;
+; SPARC64-LABEL: load_acq:
+; SPARC64: ! %bb.0:
+; SPARC64-NEXT: ld [%o0], %o0
+; SPARC64-NEXT: membar #LoadLoad | #LoadStore
+; SPARC64-NEXT: retl
+; SPARC64-NEXT: nop
+ %2 = load atomic i32, ptr %0 acquire, align 4
+ ret i32 %2
+}
+
+define i32 @load_sc(ptr %0) nounwind {
+; SPARC32-LABEL: load_sc:
+; SPARC32: ! %bb.0:
+; SPARC32-NEXT: save %sp, -96, %sp
+; SPARC32-NEXT: mov %i0, %o0
+; SPARC32-NEXT: call __atomic_load_4
+; SPARC32-NEXT: mov 5, %o1
+; SPARC32-NEXT: ret
+; SPARC32-NEXT: restore %g0, %o0, %o0
+;
+; SPARC32-LEON4-LABEL: load_sc:
+; SPARC32-LEON4: ! %bb.0:
+; SPARC32-LEON4-NEXT: retl
+; SPARC32-LEON4-NEXT: ld [%o0], %o0
+;
+; SPARC32-V9-LABEL: load_sc:
+; SPARC32-V9: ! %bb.0:
+; SPARC32-V9-NEXT: ld [%o0], %o0
+; SPARC32-V9-NEXT: membar #LoadLoad | #LoadStore
+; SPARC32-V9-NEXT: retl
+; SPARC32-V9-NEXT: nop
+;
+; SPARC64-LABEL: load_sc:
+; SPARC64: ! %bb.0:
+; SPARC64-NEXT: ld [%o0], %o0
+; SPARC64-NEXT: membar #LoadLoad | #LoadStore
+; SPARC64-NEXT: retl
+; SPARC64-NEXT: nop
+ %2 = load atomic i32, ptr %0 seq_cst, align 4
+ ret i32 %2
+}
+
+define void @store_rel(ptr %0, i32 %1) nounwind {
+; SPARC32-LABEL: store_rel:
+; SPARC32: ! %bb.0:
+; SPARC32-NEXT: save %sp, -96, %sp
+; SPARC32-NEXT: mov %i1, %o1
+; SPARC32-NEXT: mov %i0, %o0
+; SPARC32-NEXT: call __atomic_store_4
+; SPARC32-NEXT: mov 3, %o2
+; SPARC32-NEXT: ret
+; SPARC32-NEXT: restore
+;
+; SPARC32-LEON4-LABEL: store_rel:
+; SPARC32-LEON4: ! %bb.0:
+; SPARC32-LEON4-NEXT: stbar
+; SPARC32-LEON4-NEXT: retl
+; SPARC32-LEON4-NEXT: st %o1, [%o0]
+;
+; SPARC32-V9-LABEL: store_rel:
+; SPARC32-V9: ! %bb.0:
+; SPARC32-V9-NEXT: membar #LoadStore | #StoreStore
+; SPARC32-V9-NEXT: retl
+; SPARC32-V9-NEXT: st %o1, [%o0]
+;
+; SPARC64-LABEL: store_rel:
+; SPARC64: ! %bb.0:
+; SPARC64-NEXT: membar #LoadStore | #StoreStore
+; SPARC64-NEXT: retl
+; SPARC64-NEXT: st %o1, [%o0]
+ store atomic i32 %1, ptr %0 release, align 4
+ ret void
+}
+
+define void @store_sc(ptr %0, i32 %1) nounwind {
+; SPARC32-LABEL: store_sc:
+; SPARC32: ! %bb.0:
+; SPARC32-NEXT: save %sp, -96, %sp
+; SPARC32-NEXT: mov %i1, %o1
+; SPARC32-NEXT: mov %i0, %o0
+; SPARC32-NEXT: call __atomic_store_4
+; SPARC32-NEXT: mov 5, %o2
+; SPARC32-NEXT: ret
+; SPARC32-NEXT: restore
+;
+; SPARC32-LEON4-LABEL: store_sc:
+; SPARC32-LEON4: ! %bb.0:
+; SPARC32-LEON4-NEXT: stbar
+; SPARC32-LEON4-NEXT: st %o1, [%o0]
+; SPARC32-LEON4-NEXT: stbar
+; SPARC32-LEON4-NEXT: ldstub [%sp+-1], %g0
+; SPARC32-LEON4-NEXT: retl
+; SPARC32-LEON4-NEXT: nop
+;
+; SPARC32-V9-LABEL: store_sc:
+; SPARC32-V9: ! %bb.0:
+; SPARC32-V9-NEXT: membar #LoadStore | #StoreStore
+; SPARC32-V9-NEXT: st %o1, [%o0]
+; SPARC32-V9-NEXT: membar #LoadLoad | #StoreLoad | #LoadStore | #StoreStore
+; SPARC32-V9-NEXT: retl
+; SPARC32-V9-NEXT: nop
+;
+; SPARC64-LABEL: store_sc:
+; SPARC64: ! %bb.0:
+; SPARC64-NEXT: membar #LoadStore | #StoreStore
+; SPARC64-NEXT: st %o1, [%o0]
+; SPARC64-NEXT: membar #LoadLoad | #StoreLoad | #LoadStore | #StoreStore
+; SPARC64-NEXT: retl
+; SPARC64-NEXT: nop
+ store atomic i32 %1, ptr %0 seq_cst, align 4
+ ret void
+}
+
+define i32 @rmw_acq(ptr %0, i32 %1) nounwind {
+; SPARC32-LABEL: rmw_acq:
+; SPARC32: ! %bb.0:
+; SPARC32-NEXT: save %sp, -96, %sp
+; SPARC32-NEXT: mov %i1, %o1
+; SPARC32-NEXT: mov %i0, %o0
+; SPARC32-NEXT: call __atomic_exchange_4
+; SPARC32-NEXT: mov 2, %o2
+; SPARC32-NEXT: ret
+; SPARC32-NEXT: restore %g0, %o0, %o0
+;
+; SPARC32-LEON4-LABEL: rmw_acq:
+; SPARC32-LEON4: ! %bb.0:
+; SPARC32-LEON4-NEXT: swap [%o0], %o1
+; SPARC32-LEON4-NEXT: retl
+; SPARC32-LEON4-NEXT: mov %o1, %o0
+;
+; SPARC32-V9-LABEL: rmw_acq:
+; SPARC32-V9: ! %bb.0:
+; SPARC32-V9-NEXT: swap [%o0], %o1
+; SPARC32-V9-NEXT: membar #LoadLoad | #LoadStore
+; SPARC32-V9-NEXT: retl
+; SPARC32-V9-NEXT: mov %o1, %o0
+;
+; SPARC64-LABEL: rmw_acq:
+; SPARC64: ! %bb.0:
+; SPARC64-NEXT: swap [%o0], %o1
+; SPARC64-NEXT: membar #LoadLoad | #LoadStore
+; SPARC64-NEXT: retl
+; SPARC64-NEXT: mov %o1, %o0
+ %3 = atomicrmw xchg ptr %0, i32 %1 acquire, align 4
+ ret i32 %3
+}
+
+define i32 @rmw_rel(ptr %0, i32 %1) nounwind {
+; SPARC32-LABEL: rmw_rel:
+; SPARC32: ! %bb.0:
+; SPARC32-NEXT: save %sp, -96, %sp
+; SPARC32-NEXT: mov %i1, %o1
+; SPARC32-NEXT: mov %i0, %o0
+; SPARC32-NEXT: call __atomic_exchange_4
+; SPARC32-NEXT: mov 3, %o2
+; SPARC32-NEXT: ret
+; SPARC32-NEXT: restore %g0, %o0, %o0
+;
+; SPARC32-LEON4-LABEL: rmw_rel:
+; SPARC32-LEON4: ! %bb.0:
+; SPARC32-LEON4-NEXT: stbar
+; SPARC32-LEON4-NEXT: swap [%o0], %o1
+; SPARC32-LEON4-NEXT: retl
+; SPARC32-LEON4-NEXT: mov %o1, %o0
+;
+; SPARC32-V9-LABEL: rmw_rel:
+; SPARC32-V9: ! %bb.0:
+; SPARC32-V9-NEXT: membar #LoadStore | #StoreStore
+; SPARC32-V9-NEXT: swap [%o0], %o1
+; SPARC32-V9-NEXT: retl
+; SPARC32-V9-NEXT: mov %o1, %o0
+;
+; SPARC64-LABEL: rmw_rel:
+; SPARC64: ! %bb.0:
+; SPARC64-NEXT: membar #LoadStore | #StoreStore
+; SPARC64-NEXT: swap [%o0], %o1
+; SPARC64-NEXT: retl
+; SPARC64-NEXT: mov %o1, %o0
+ %3 = atomicrmw xchg ptr %0, i32 %1 release, align 4
+ ret i32 %3
+}
+
+define i32 @rmw_acq_rel(ptr %0, i32 %1) nounwind {
+; SPARC32-LABEL: rmw_acq_rel:
+; SPARC32: ! %bb.0:
+; SPARC32-NEXT: save %sp, -96, %sp
+; SPARC32-NEXT: mov %i1, %o1
+; SPARC32-NEXT: mov %i0, %o0
+; SPARC32-NEXT: call __atomic_exchange_4
+; SPARC32-NEXT: mov 4, %o2
+; SPARC32-NEXT: ret
+; SPARC32-NEXT: restore %g0, %o0, %o0
+;
+; SPARC32-LEON4-LABEL: rmw_acq_rel:
+; SPARC32-LEON4: ! %bb.0:
+; SPARC32-LEON4-NEXT: stbar
+; SPARC32-LEON4-NEXT: swap [%o0], %o1
+; SPARC32-LEON4-NEXT: retl
+; SPARC32-LEON4-NEXT: mov %o1, %o0
+;
+; SPARC32-V9-LABEL: rmw_acq_rel:
+; SPARC32-V9: ! %bb.0:
+; SPARC32-V9-NEXT: membar #LoadStore | #StoreStore
+; SPARC32-V9-NEXT: swap [%o0], %o1
+; SPARC32-V9-NEXT: membar #LoadLoad | #LoadStore
+; SPARC32-V9-NEXT: retl
+; SPARC32-V9-NEXT: mov %o1, %o0
+;
+; SPARC64-LABEL: rmw_acq_rel:
+; SPARC64: ! %bb.0:
+; SPARC64-NEXT: membar #LoadStore | #StoreStore
+; SPARC64-NEXT: swap [%o0], %o1
+; SPARC64-NEXT: membar #LoadLoad | #LoadStore
+; SPARC64-NEXT: retl
+; SPARC64-NEXT: mov %o1, %o0
+ %3 = atomicrmw xchg ptr %0, i32 %1 acq_rel, align 4
+ ret i32 %3
+}
+
+define i32 @rmw_sc(ptr %0, i32 %1) nounwind {
+; SPARC32-LABEL: rmw_sc:
+; SPARC32: ! %bb.0:
+; SPARC32-NEXT: save %sp, -96, %sp
+; SPARC32-NEXT: mov %i1, %o1
+; SPARC32-NEXT: mov %i0, %o0
+; SPARC32-NEXT: call __atomic_exchange_4
+; SPARC32-NEXT: mov 5, %o2
+; SPARC32-NEXT: ret
+; SPARC32-NEXT: restore %g0, %o0, %o0
+;
+; SPARC32-LEON4-LABEL: rmw_sc:
+; SPARC32-LEON4: ! %bb.0:
+; SPARC32-LEON4-NEXT: stbar
+; SPARC32-LEON4-NEXT: swap [%o0], %o1
+; SPARC32-LEON4-NEXT: retl
+; SPARC32-LEON4-NEXT: mov %o1, %o0
+;
+; SPARC32-V9-LABEL: rmw_sc:
+; SPARC32-V9: ! %bb.0:
+; SPARC32-V9-NEXT: membar #LoadStore | #StoreStore
+; SPARC32-V9-NEXT: swap [%o0], %o1
+; SPARC32-V9-NEXT: membar #LoadLoad | #LoadStore
+; SPARC32-V9-NEXT: retl
+; SPARC32-V9-NEXT: mov %o1, %o0
+;
+; SPARC64-LABEL: rmw_sc:
+; SPARC64: ! %bb.0:
+; SPARC64-NEXT: membar #LoadStore | #StoreStore
+; SPARC64-NEXT: swap [%o0], %o1
+; SPARC64-NEXT: membar #LoadLoad | #LoadStore
+; SPARC64-NEXT: retl
+; SPARC64-NEXT: mov %o1, %o0
+ %3 = atomicrmw xchg ptr %0, i32 %1 seq_cst, align 4
+ ret i32 %3
+}
+
+define i32 @cas_acq(ptr %0, i32 %1, i32 %2) nounwind {
+; SPARC32-LABEL: cas_acq:
+; SPARC32: ! %bb.0:
+; SPARC32-NEXT: save %sp, -96, %sp
+; SPARC32-NEXT: mov %i2, %o2
+; SPARC32-NEXT: mov %i0, %o0
+; SPARC32-NEXT: st %i1, [%fp+-4]
+; SPARC32-NEXT: add %fp, -4, %o1
+; SPARC32-NEXT: mov 2, %o3
+; SPARC32-NEXT: call __atomic_compare_exchange_4
+; SPARC32-NEXT: mov %o3, %o4
+; SPARC32-NEXT: ld [%fp+-4], %i0
+; SPARC32-NEXT: ret
+; SPARC32-NEXT: restore
+;
+; SPARC32-LEON4-LABEL: cas_acq:
+; SPARC32-LEON4: ! %bb.0:
+; SPARC32-LEON4-NEXT: casa [%o0] 10, %o1, %o2
+; SPARC32-LEON4-NEXT: retl
+; SPARC32-LEON4-NEXT: mov %o2, %o0
+;
+; SPARC32-V9-LABEL: cas_acq:
+; SPARC32-V9: ! %bb.0:
+; SPARC32-V9-NEXT: cas [%o0], %o1, %o2
+; SPARC32-V9-NEXT: membar #LoadLoad | #LoadStore
+; SPARC32-V9-NEXT: retl
+; SPARC32-V9-NEXT: mov %o2, %o0
+;
+; SPARC64-LABEL: cas_acq:
+; SPARC64: ! %bb.0:
+; SPARC64-NEXT: cas [%o0], %o1, %o2
+; SPARC64-NEXT: membar #LoadLoad | #LoadStore
+; SPARC64-NEXT: retl
+; SPARC64-NEXT: mov %o2, %o0
+ %4 = cmpxchg ptr %0, i32 %1, i32 %2 acquire acquire, align 4
+ %5 = extractvalue { i32, i1 } %4, 0
+ ret i32 %5
+}
+
+define i32 @cas_rel(ptr %0, i32 %1, i32 %2) nounwind {
+; SPARC32-LABEL: cas_rel:
+; SPARC32: ! %bb.0:
+; SPARC32-NEXT: save %sp, -96, %sp
+; SPARC32-NEXT: mov %i2, %o2
+; SPARC32-NEXT: mov %i0, %o0
+; SPARC32-NEXT: st %i1, [%fp+-4]
+; SPARC32-NEXT: add %fp, -4, %o1
+; SPARC32-NEXT: mov 3, %o3
+; SPARC32-NEXT: call __atomic_compare_exchange_4
+; SPARC32-NEXT: mov %g0, %o4
+; SPARC32-NEXT: ld [%fp+-4], %i0
+; SPARC32-NEXT: ret
+; SPARC32-NEXT: restore
+;
+; SPARC32-LEON4-LABEL: cas_rel:
+; SPARC32-LEON4: ! %bb.0:
+; SPARC32-LEON4-NEXT: stbar
+; SPARC32-LEON4-NEXT: casa [%o0] 10, %o1, %o2
+; SPARC32-LEON4-NEXT: retl
+; SPARC32-LEON4-NEXT: mov %o2, %o0
+;
+; SPARC32-V9-LABEL: cas_rel:
+; SPARC32-V9: ! %bb.0:
+; SPARC32-V9-NEXT: membar #LoadStore | #StoreStore
+; SPARC32-V9-NEXT: cas [%o0], %o1, %o2
+; SPARC32-V9-NEXT: retl
+; SPARC32-V9-NEXT: mov %o2, %o0
+;
+; SPARC64-LABEL: cas_rel:
+; SPARC64: ! %bb.0:
+; SPARC64-NEXT: membar #LoadStore | #StoreStore
+; SPARC64-NEXT: cas [%o0], %o1, %o2
+; SPARC64-NEXT: retl
+; SPARC64-NEXT: mov %o2, %o0
+ %4 = cmpxchg ptr %0, i32 %1, i32 %2 release monotonic, align 4
+ %5 = extractvalue { i32, i1 } %4, 0
+ ret i32 %5
+}
+
+define i32 @cas_acq_rel(ptr %0, i32 %1, i32 %2) nounwind {
+; SPARC32-LABEL: cas_acq_rel:
+; SPARC32: ! %bb.0:
+; SPARC32-NEXT: save %sp, -96, %sp
+; SPARC32-NEXT: mov %i2, %o2
+; SPARC32-NEXT: mov %i0, %o0
+; SPARC32-NEXT: st %i1, [%fp+-4]
+; SPARC32-NEXT: add %fp, -4, %o1
+; SPARC32-NEXT: mov 4, %o3
+; SPARC32-NEXT: call __atomic_compare_exchange_4
+; SPARC32-NEXT: mov 2, %o4
+; SPARC32-NEXT: ld [%fp+-4], %i0
+; SPARC32-NEXT: ret
+; SPARC32-NEXT: restore
+;
+; SPARC32-LEON4-LABEL: cas_acq_rel:
+; SPARC32-LEON4: ! %bb.0:
+; SPARC32-LEON4-NEXT: stbar
+; SPARC32-LEON4-NEXT: casa [%o0] 10, %o1, %o2
+; SPARC32-LEON4-NEXT: retl
+; SPARC32-LEON4-NEXT: mov %o2, %o0
+;
+; SPARC32-V9-LABEL: cas_acq_rel:
+; SPARC32-V9: ! %bb.0:
+; SPARC32-V9-NEXT: membar #LoadStore | #StoreStore
+; SPARC32-V9-NEXT: cas [%o0], %o1, %o2
+; SPARC32-V9-NEXT: membar #LoadLoad | #LoadStore
+; SPARC32-V9-NEXT: retl
+; SPARC32-V9-NEXT: mov %o2, %o0
+;
+; SPARC64-LABEL: cas_acq_rel:
+; SPARC64: ! %bb.0:
+; SPARC64-NEXT: membar #LoadStore | #StoreStore
+; SPARC64-NEXT: cas [%o0], %o1, %o2
+; SPARC64-NEXT: membar #LoadLoad | #LoadStore
+; SPARC64-NEXT: retl
+; SPARC64-NEXT: mov %o2, %o0
+ %4 = cmpxchg ptr %0, i32 %1, i32 %2 acq_rel acquire, align 4
+ %5 = extractvalue { i32, i1 } %4, 0
+ ret i32 %5
+}
+
+define i32 @cas_sc(ptr %0, i32 %1, i32 %2) nounwind {
+; SPARC32-LABEL: cas_sc:
+; SPARC32: ! %bb.0:
+; SPARC32-NEXT: save %sp, -96, %sp
+; SPARC32-NEXT: mov %i2, %o2
+; SPARC32-NEXT: mov %i0, %o0
+; SPARC32-NEXT: st %i1, [%fp+-4]
+; SPARC32-NEXT: add %fp, -4, %o1
+; SPARC32-NEXT: mov 5, %o3
+; SPARC32-NEXT: call __atomic_compare_exchange_4
+; SPARC32-NEXT: mov %o3, %o4
+; SPARC32-NEXT: ld [%fp+-4], %i0
+; SPARC32-NEXT: ret
+; SPARC32-NEXT: restore
+;
+; SPARC32-LEON4-LABEL: cas_sc:
+; SPARC32-LEON4: ! %bb.0:
+; SPARC32-LEON4-NEXT: stbar
+; SPARC32-LEON4-NEXT: casa [%o0] 10, %o1, %o2
+; SPARC32-LEON4-NEXT: retl
+; SPARC32-LEON4-NEXT: mov %o2, %o0
+;
+; SPARC32-V9-LABEL: cas_sc:
+; SPARC32-V9: ! %bb.0:
+; SPARC32-V9-NEXT: membar #LoadStore | #StoreStore
+; SPARC32-V9-NEXT: cas [%o0], %o1, %o2
+; SPARC32-V9-NEXT: membar #LoadLoad | #LoadStore
+; SPARC32-V9-NEXT: retl
+; SPARC32-V9-NEXT: mov %o2, %o0
+;
+; SPARC64-LABEL: cas_sc:
+; SPARC64: ! %bb.0:
+; SPARC64-NEXT: membar #LoadStore | #StoreStore
+; SPARC64-NEXT: cas [%o0], %o1, %o2
+; SPARC64-NEXT: membar #LoadLoad | #LoadStore
+; SPARC64-NEXT: retl
+; SPARC64-NEXT: mov %o2, %o0
+ %4 = cmpxchg ptr %0, i32 %1, i32 %2 seq_cst seq_cst, align 4
+ %5 = extractvalue { i32, i1 } %4, 0
+ ret i32 %5
+}
diff --git a/llvm/test/CodeGen/SPIRV/llc-pipeline.ll b/llvm/test/CodeGen/SPIRV/llc-pipeline.ll
new file mode 100644
index 0000000..3fff2a8
--- /dev/null
+++ b/llvm/test/CodeGen/SPIRV/llc-pipeline.ll
@@ -0,0 +1,214 @@
+; UNSUPPORTED:expensive_checks
+; RUN:llc -O0 -mtriple=spirv-- -disable-verify -debug-pass=Structure < %s 2>&1 \
+; RUN: | FileCheck -match-full-lines -strict-whitespace -check-prefix=SPIRV-O0 %s
+; RUN:llc -O1 -mtriple=spirv-- -disable-verify -debug-pass=Structure < %s 2>&1 \
+; RUN: | FileCheck -match-full-lines -strict-whitespace -check-prefix=SPIRV-Opt %s
+; RUN:llc -O2 -mtriple=spirv-- -disable-verify -debug-pass=Structure < %s 2>&1 \
+; RUN: | FileCheck -match-full-lines -strict-whitespace -check-prefix=SPIRV-Opt %s
+; RUN:llc -O3 -mtriple=spirv-- -disable-verify -debug-pass=Structure < %s 2>&1 \
+; RUN: | FileCheck -match-full-lines -strict-whitespace -check-prefix=SPIRV-Opt %s
+;
+; REQUIRES:asserts
+
+; SPIRV-O0:Target Library Information
+; SPIRV-O0-NEXT:Target Pass Configuration
+; SPIRV-O0-NEXT:Machine Module Information
+; SPIRV-O0-NEXT:Target Transform Information
+; SPIRV-O0-NEXT:Create Garbage Collector Module Metadata
+; SPIRV-O0-NEXT:Assumption Cache Tracker
+; SPIRV-O0-NEXT:Profile summary info
+; SPIRV-O0-NEXT:Machine Branch Probability Analysis
+; SPIRV-O0-NEXT: ModulePass Manager
+; SPIRV-O0-NEXT: Pre-ISel Intrinsic Lowering
+; SPIRV-O0-NEXT: FunctionPass Manager
+; SPIRV-O0-NEXT: Expand large div/rem
+; SPIRV-O0-NEXT: Expand fp
+; SPIRV-O0-NEXT: Lower Garbage Collection Instructions
+; SPIRV-O0-NEXT: Shadow Stack GC Lowering
+; SPIRV-O0-NEXT: Remove unreachable blocks from the CFG
+; SPIRV-O0-NEXT: Instrument function entry/exit with calls to e.g. mcount() (post inlining)
+; SPIRV-O0-NEXT: Scalarize Masked Memory Intrinsics
+; SPIRV-O0-NEXT: Expand reduction intrinsics
+; SPIRV-O0-NEXT: SPIR-V Regularizer
+; SPIRV-O0-NEXT: SPIRV prepare functions
+; SPIRV-O0-NEXT: FunctionPass Manager
+; SPIRV-O0-NEXT: Lower invoke and unwind, for unwindless code generators
+; SPIRV-O0-NEXT: Remove unreachable blocks from the CFG
+; SPIRV-O0-NEXT: SPIRV strip convergent intrinsics
+; SPIRV-O0-NEXT: SPIRV Legalize Implicit Binding
+; SPIRV-O0-NEXT: SPIRV CBuffer Access
+; SPIRV-O0-NEXT: SPIRV emit intrinsics
+; SPIRV-O0-NEXT: FunctionPass Manager
+; SPIRV-O0-NEXT: SPIRV legalize bitcast pass
+; SPIRV-O0-NEXT: Prepare callbr
+; SPIRV-O0-NEXT: Safe Stack instrumentation pass
+; SPIRV-O0-NEXT: Insert stack protectors
+; SPIRV-O0-NEXT: Analysis containing CSE Info
+; SPIRV-O0-NEXT: IRTranslator
+; SPIRV-O0-NEXT: Analysis for ComputingKnownBits
+; SPIRV-O0-NEXT: MachineDominator Tree Construction
+; SPIRV-O0-NEXT: SPIRVPreLegalizerCombiner
+; SPIRV-O0-NEXT: SPIRV pre legalizer
+; SPIRV-O0-NEXT: Analysis containing CSE Info
+; SPIRV-O0-NEXT: Legalizer
+; SPIRV-O0-NEXT: SPIRV post legalizer
+; SPIRV-O0-NEXT: Analysis for ComputingKnownBits
+; SPIRV-O0-NEXT: Dominator Tree Construction
+; SPIRV-O0-NEXT: Natural Loop Information
+; SPIRV-O0-NEXT: Lazy Branch Probability Analysis
+; SPIRV-O0-NEXT: Lazy Block Frequency Analysis
+; SPIRV-O0-NEXT: InstructionSelect
+; SPIRV-O0-NEXT: ResetMachineFunction
+; SPIRV-O0-NEXT: Finalize ISel and expand pseudo-instructions
+; SPIRV-O0-NEXT: Local Stack Slot Allocation
+; SPIRV-O0-NEXT: Remove Redundant DEBUG_VALUE analysis
+; SPIRV-O0-NEXT: Fixup Statepoint Caller Saved
+; SPIRV-O0-NEXT: Lazy Machine Block Frequency Analysis
+; SPIRV-O0-NEXT: Machine Optimization Remark Emitter
+; SPIRV-O0-NEXT: Prologue/Epilogue Insertion & Frame Finalization
+; SPIRV-O0-NEXT: Post-RA pseudo instruction expansion pass
+; SPIRV-O0-NEXT: Analyze Machine Code For Garbage Collection
+; SPIRV-O0-NEXT: Insert fentry calls
+; SPIRV-O0-NEXT: Insert XRay ops
+; SPIRV-O0-NEXT: Machine Sanitizer Binary Metadata
+; SPIRV-O0-NEXT: Lazy Machine Block Frequency Analysis
+; SPIRV-O0-NEXT: Machine Optimization Remark Emitter
+; SPIRV-O0-NEXT: Stack Frame Layout Analysis
+; SPIRV-O0-NEXT: SPIRV module analysis
+; SPIRV-O0-NEXT: FunctionPass Manager
+; SPIRV-O0-NEXT: Lazy Machine Block Frequency Analysis
+; SPIRV-O0-NEXT: Machine Optimization Remark Emitter
+; SPIRV-O0-NEXT: SPIRV Assembly Printer
+; SPIRV-O0-NEXT: Free MachineFunction
+
+; SPIRV-Opt:Target Library Information
+; SPIRV-Opt-NEXT:Target Pass Configuration
+; SPIRV-Opt-NEXT:Machine Module Information
+; SPIRV-Opt-NEXT:Target Transform Information
+; SPIRV-Opt-NEXT:Assumption Cache Tracker
+; SPIRV-Opt-NEXT:Type-Based Alias Analysis
+; SPIRV-Opt-NEXT:Scoped NoAlias Alias Analysis
+; SPIRV-Opt-NEXT:Profile summary info
+; SPIRV-Opt-NEXT:Create Garbage Collector Module Metadata
+; SPIRV-Opt-NEXT:Machine Branch Probability Analysis
+; SPIRV-Opt-NEXT: ModulePass Manager
+; SPIRV-Opt-NEXT: Pre-ISel Intrinsic Lowering
+; SPIRV-Opt-NEXT: FunctionPass Manager
+; SPIRV-Opt-NEXT: Expand large div/rem
+; SPIRV-Opt-NEXT: Expand fp
+; SPIRV-Opt-NEXT: Dominator Tree Construction
+; SPIRV-Opt-NEXT: Basic Alias Analysis (stateless AA impl)
+; SPIRV-Opt-NEXT: Natural Loop Information
+; SPIRV-Opt-NEXT: Canonicalize natural loops
+; SPIRV-Opt-NEXT: Scalar Evolution Analysis
+; SPIRV-Opt-NEXT: Loop Pass Manager
+; SPIRV-Opt-NEXT: Canonicalize Freeze Instructions in Loops
+; SPIRV-Opt-NEXT: Induction Variable Users
+; SPIRV-Opt-NEXT: Loop Strength Reduction
+; SPIRV-Opt-NEXT: Basic Alias Analysis (stateless AA impl)
+; SPIRV-Opt-NEXT: Function Alias Analysis Results
+; SPIRV-Opt-NEXT: Merge contiguous icmps into a memcmp
+; SPIRV-Opt-NEXT: Natural Loop Information
+; SPIRV-Opt-NEXT: Lazy Branch Probability Analysis
+; SPIRV-Opt-NEXT: Lazy Block Frequency Analysis
+; SPIRV-Opt-NEXT: Expand memcmp() to load/stores
+; SPIRV-Opt-NEXT: Lower Garbage Collection Instructions
+; SPIRV-Opt-NEXT: Shadow Stack GC Lowering
+; SPIRV-Opt-NEXT: Remove unreachable blocks from the CFG
+; SPIRV-Opt-NEXT: Natural Loop Information
+; SPIRV-Opt-NEXT: Post-Dominator Tree Construction
+; SPIRV-Opt-NEXT: Branch Probability Analysis
+; SPIRV-Opt-NEXT: Block Frequency Analysis
+; SPIRV-Opt-NEXT: Constant Hoisting
+; SPIRV-Opt-NEXT: Replace intrinsics with calls to vector library
+; SPIRV-Opt-NEXT: Lazy Branch Probability Analysis
+; SPIRV-Opt-NEXT: Lazy Block Frequency Analysis
+; SPIRV-Opt-NEXT: Optimization Remark Emitter
+; SPIRV-Opt-NEXT: Partially inline calls to library functions
+; SPIRV-Opt-NEXT: Instrument function entry/exit with calls to e.g. mcount() (post inlining)
+; SPIRV-Opt-NEXT: Scalarize Masked Memory Intrinsics
+; SPIRV-Opt-NEXT: Expand reduction intrinsics
+; SPIRV-Opt-NEXT: SPIR-V Regularizer
+; SPIRV-Opt-NEXT: SPIRV prepare functions
+; SPIRV-Opt-NEXT: FunctionPass Manager
+; SPIRV-Opt-NEXT: Dominator Tree Construction
+; SPIRV-Opt-NEXT: Natural Loop Information
+; SPIRV-Opt-NEXT: CodeGen Prepare
+; SPIRV-Opt-NEXT: Lower invoke and unwind, for unwindless code generators
+; SPIRV-Opt-NEXT: Remove unreachable blocks from the CFG
+; SPIRV-Opt-NEXT: SPIRV strip convergent intrinsics
+; SPIRV-Opt-NEXT: SPIRV Legalize Implicit Binding
+; SPIRV-Opt-NEXT: SPIRV CBuffer Access
+; SPIRV-Opt-NEXT: SPIRV emit intrinsics
+; SPIRV-Opt-NEXT: FunctionPass Manager
+; SPIRV-Opt-NEXT: SPIRV legalize bitcast pass
+; SPIRV-Opt-NEXT: Dominator Tree Construction
+; SPIRV-Opt-NEXT: Basic Alias Analysis (stateless AA impl)
+; SPIRV-Opt-NEXT: Function Alias Analysis Results
+; SPIRV-Opt-NEXT: ObjC ARC contraction
+; SPIRV-Opt-NEXT: Prepare callbr
+; SPIRV-Opt-NEXT: Safe Stack instrumentation pass
+; SPIRV-Opt-NEXT: Insert stack protectors
+; SPIRV-Opt-NEXT: Analysis containing CSE Info
+; SPIRV-Opt-NEXT: Natural Loop Information
+; SPIRV-Opt-NEXT: Post-Dominator Tree Construction
+; SPIRV-Opt-NEXT: Branch Probability Analysis
+; SPIRV-Opt-NEXT: Basic Alias Analysis (stateless AA impl)
+; SPIRV-Opt-NEXT: Function Alias Analysis Results
+; SPIRV-Opt-NEXT: IRTranslator
+; SPIRV-Opt-NEXT: Analysis for ComputingKnownBits
+; SPIRV-Opt-NEXT: MachineDominator Tree Construction
+; SPIRV-Opt-NEXT: SPIRVPreLegalizerCombiner
+; SPIRV-Opt-NEXT: SPIRV pre legalizer
+; SPIRV-Opt-NEXT: Analysis containing CSE Info
+; SPIRV-Opt-NEXT: Legalizer
+; SPIRV-Opt-NEXT: SPIRV post legalizer
+; SPIRV-Opt-NEXT: Analysis for ComputingKnownBits
+; SPIRV-Opt-NEXT: Lazy Branch Probability Analysis
+; SPIRV-Opt-NEXT: Lazy Block Frequency Analysis
+; SPIRV-Opt-NEXT: InstructionSelect
+; SPIRV-Opt-NEXT: ResetMachineFunction
+; SPIRV-Opt-NEXT: Finalize ISel and expand pseudo-instructions
+; SPIRV-Opt-NEXT: Lazy Machine Block Frequency Analysis
+; SPIRV-Opt-NEXT: Early Tail Duplication
+; SPIRV-Opt-NEXT: Optimize machine instruction PHIs
+; SPIRV-Opt-NEXT: Slot index numbering
+; SPIRV-Opt-NEXT: Merge disjoint stack slots
+; SPIRV-Opt-NEXT: Local Stack Slot Allocation
+; SPIRV-Opt-NEXT: Remove dead machine instructions
+; SPIRV-Opt-NEXT: MachineDominator Tree Construction
+; SPIRV-Opt-NEXT: Machine Natural Loop Construction
+; SPIRV-Opt-NEXT: Machine Block Frequency Analysis
+; SPIRV-Opt-NEXT: Early Machine Loop Invariant Code Motion
+; SPIRV-Opt-NEXT: MachineDominator Tree Construction
+; SPIRV-Opt-NEXT: Machine Block Frequency Analysis
+; SPIRV-Opt-NEXT: Machine Common Subexpression Elimination
+; SPIRV-Opt-NEXT: MachinePostDominator Tree Construction
+; SPIRV-Opt-NEXT: Machine Cycle Info Analysis
+; SPIRV-Opt-NEXT: Machine code sinking
+; SPIRV-Opt-NEXT: Peephole Optimizations
+; SPIRV-Opt-NEXT: Remove dead machine instructions
+; SPIRV-Opt-NEXT: Remove Redundant DEBUG_VALUE analysis
+; SPIRV-Opt-NEXT: Fixup Statepoint Caller Saved
+; SPIRV-Opt-NEXT: Lazy Machine Block Frequency Analysis
+; SPIRV-Opt-NEXT: Machine Optimization Remark Emitter
+; SPIRV-Opt-NEXT: Prologue/Epilogue Insertion & Frame Finalization
+; SPIRV-Opt-NEXT: Tail Duplication
+; SPIRV-Opt-NEXT: Post-RA pseudo instruction expansion pass
+; SPIRV-Opt-NEXT: Analyze Machine Code For Garbage Collection
+; SPIRV-Opt-NEXT: Insert fentry calls
+; SPIRV-Opt-NEXT: Insert XRay ops
+; SPIRV-Opt-NEXT: Machine Sanitizer Binary Metadata
+; SPIRV-Opt-NEXT: Lazy Machine Block Frequency Analysis
+; SPIRV-Opt-NEXT: Machine Optimization Remark Emitter
+; SPIRV-Opt-NEXT: Stack Frame Layout Analysis
+; SPIRV-Opt-NEXT: SPIRV module analysis
+; SPIRV-Opt-NEXT: FunctionPass Manager
+; SPIRV-Opt-NEXT: Lazy Machine Block Frequency Analysis
+; SPIRV-Opt-NEXT: Machine Optimization Remark Emitter
+; SPIRV-Opt-NEXT: SPIRV Assembly Printer
+; SPIRV-Opt-NEXT: Free MachineFunction
+
+define void @empty() {
+ ret void
+}
diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/emptyblock.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/emptyblock.mir
index 021cb4c..8abe5c5 100644
--- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/emptyblock.mir
+++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/emptyblock.mir
@@ -8,7 +8,7 @@
--- |
%struct.DCT_InstanceTypeDef = type { ptr, i32, i32 }
-
+
; Function Attrs: nofree nounwind
define hidden arm_aapcs_vfpcc void @test(ptr nocapture readonly %S, ptr %pIn, ptr nocapture %pOut) {
entry:
@@ -41,7 +41,7 @@
%13 = call i32 @llvm.loop.decrement.reg.i32(i32 %8, i32 1)
%14 = icmp ne i32 %13, 0
br i1 %14, label %do.body, label %do.end
-
+
do.end: ; preds = %do.body
%15 = extractelement <4 x float> %11, i32 0
%16 = extractelement <4 x float> %11, i32 1
@@ -56,7 +56,7 @@
%sub4 = add i32 %1, -4
%cmp5201 = icmp ugt i32 %sub4, 1
br i1 %cmp5201, label %for.body.lr.ph, label %for.cond54.preheader
-
+
for.body.lr.ph: ; preds = %do.end
%scevgep = getelementptr float, ptr %pIn, i32 4
%20 = add i32 %0, 4
@@ -161,7 +161,7 @@
%63 = call i32 @llvm.loop.decrement.reg.i32(i32 %53, i32 1)
%64 = icmp ne i32 %63, 0
br i1 %64, label %do.body24, label %do.end33
-
+
do.end33: ; preds = %do.body24
%65 = bitcast ptr %lsr.iv27 to ptr
%66 = bitcast ptr %lsr.iv20 to ptr
@@ -254,7 +254,7 @@
%inc = add nuw i32 %k.1200, 1
%exitcond.not = icmp eq i32 %inc, %1
br i1 %exitcond.not, label %for.end72, label %for.body56
-
+
for.end72: ; preds = %do.end66, %for.cond54.preheader
ret void
}
@@ -428,28 +428,28 @@ body: |
renamable $lr = t2LoopDec killed renamable $lr, 1
t2LoopEnd renamable $lr, %bb.1, implicit-def dead $cpsr
tB %bb.2, 14 /* CC::al */, $noreg
-
+
bb.2.do.end:
successors: %bb.3(0x40000000), %bb.7(0x40000000)
liveins: $q0, $r2, $r3, $r4, $r5, $r11
-
- renamable $s4 = nnan ninf nsz arcp contract afn reassoc VADDS renamable $s0, renamable $s1, 14 /* CC::al */, $noreg
+
+ renamable $s4 = nnan ninf nsz arcp contract afn reassoc VADDS renamable $s0, renamable $s1, 14 /* CC::al */, $noreg, implicit $fpscr_rm
renamable $r0, dead $cpsr = tSUBi3 renamable $r3, 4, 14 /* CC::al */, $noreg
tSTRspi killed renamable $r3, $sp, 1, 14 /* CC::al */, $noreg :: (store (s32) into %stack.8)
- renamable $s4 = nnan ninf nsz arcp contract afn reassoc VADDS killed renamable $s4, renamable $s2, 14 /* CC::al */, $noreg
+ renamable $s4 = nnan ninf nsz arcp contract afn reassoc VADDS killed renamable $s4, renamable $s2, 14 /* CC::al */, $noreg, implicit $fpscr_rm
tSTRspi renamable $r0, $sp, 8, 14 /* CC::al */, $noreg :: (store (s32) into %stack.1)
- renamable $s0 = nnan ninf nsz arcp contract afn reassoc VADDS killed renamable $s4, killed renamable $s3, 14 /* CC::al */, $noreg, implicit $q0
+ renamable $s0 = nnan ninf nsz arcp contract afn reassoc VADDS killed renamable $s4, killed renamable $s3, 14 /* CC::al */, $noreg, implicit $q0, implicit $fpscr_rm
renamable $s2 = VLDRS renamable $r11, 0, 14 /* CC::al */, $noreg :: (load (s32) from %ir.2)
tCMPi8 killed renamable $r0, 2, 14 /* CC::al */, $noreg, implicit-def $cpsr
renamable $r0 = t2MOVi 1, 14 /* CC::al */, $noreg, $noreg
- renamable $s0 = nnan ninf nsz arcp contract afn reassoc VMULS killed renamable $s2, killed renamable $s0, 14 /* CC::al */, $noreg
+ renamable $s0 = nnan ninf nsz arcp contract afn reassoc VMULS killed renamable $s2, killed renamable $s0, 14 /* CC::al */, $noreg, implicit $fpscr_rm
VSTRS killed renamable $s0, renamable $r2, 0, 14 /* CC::al */, $noreg :: (store (s32) into %ir.pOut)
t2Bcc %bb.7, 3 /* CC::lo */, killed $cpsr
-
+
bb.3.for.body.lr.ph:
successors: %bb.4(0x80000000)
liveins: $r0, $r2, $r4, $r5, $r11
-
+
renamable $r6 = t2ADDri renamable $r5, 16, 14 /* CC::al */, $noreg, $noreg
renamable $r1, dead $cpsr = tSUBi3 renamable $r4, 4, 14 /* CC::al */, $noreg
tSTRspi killed renamable $r6, $sp, 4, 14 /* CC::al */, $noreg :: (store (s32) into %stack.5)
@@ -523,26 +523,26 @@ body: |
renamable $lr = t2LoopDec killed renamable $lr, 1
t2LoopEnd renamable $lr, %bb.5, implicit-def dead $cpsr
tB %bb.6, 14 /* CC::al */, $noreg
-
+
bb.6.do.end33:
successors: %bb.4(0x7c000000), %bb.7(0x04000000)
liveins: $q0, $q1, $q2, $q3, $r0, $r1, $r2, $r6, $r8, $r9, $r10, $r12
-
- renamable $s16 = nnan ninf nsz arcp contract afn reassoc VADDS renamable $s12, renamable $s13, 14 /* CC::al */, $noreg
- renamable $s18 = nnan ninf nsz arcp contract afn reassoc VADDS renamable $s8, renamable $s9, 14 /* CC::al */, $noreg
- renamable $s16 = nnan ninf nsz arcp contract afn reassoc VADDS killed renamable $s16, renamable $s14, 14 /* CC::al */, $noreg
- renamable $s18 = nnan ninf nsz arcp contract afn reassoc VADDS killed renamable $s18, renamable $s10, 14 /* CC::al */, $noreg
- renamable $s12 = nnan ninf nsz arcp contract afn reassoc VADDS killed renamable $s16, killed renamable $s15, 14 /* CC::al */, $noreg, implicit $q3
- renamable $s8 = nnan ninf nsz arcp contract afn reassoc VADDS killed renamable $s18, killed renamable $s11, 14 /* CC::al */, $noreg, implicit $q2
- renamable $s10 = nnan ninf nsz arcp contract afn reassoc VADDS renamable $s4, renamable $s5, 14 /* CC::al */, $noreg
- renamable $s14 = nnan ninf nsz arcp contract afn reassoc VADDS renamable $s0, renamable $s1, 14 /* CC::al */, $noreg
+
+ renamable $s16 = nnan ninf nsz arcp contract afn reassoc VADDS renamable $s12, renamable $s13, 14 /* CC::al */, $noreg, implicit $fpscr_rm
+ renamable $s18 = nnan ninf nsz arcp contract afn reassoc VADDS renamable $s8, renamable $s9, 14 /* CC::al */, $noreg, implicit $fpscr_rm
+ renamable $s16 = nnan ninf nsz arcp contract afn reassoc VADDS killed renamable $s16, renamable $s14, 14 /* CC::al */, $noreg, implicit $fpscr_rm
+ renamable $s18 = nnan ninf nsz arcp contract afn reassoc VADDS killed renamable $s18, renamable $s10, 14 /* CC::al */, $noreg, implicit $fpscr_rm
+ renamable $s12 = nnan ninf nsz arcp contract afn reassoc VADDS killed renamable $s16, killed renamable $s15, 14 /* CC::al */, $noreg, implicit $q3, implicit $fpscr_rm
+ renamable $s8 = nnan ninf nsz arcp contract afn reassoc VADDS killed renamable $s18, killed renamable $s11, 14 /* CC::al */, $noreg, implicit $q2, implicit $fpscr_rm
+ renamable $s10 = nnan ninf nsz arcp contract afn reassoc VADDS renamable $s4, renamable $s5, 14 /* CC::al */, $noreg, implicit $fpscr_rm
+ renamable $s14 = nnan ninf nsz arcp contract afn reassoc VADDS renamable $s0, renamable $s1, 14 /* CC::al */, $noreg, implicit $fpscr_rm
renamable $r7 = tLDRspi $sp, 9, 14 /* CC::al */, $noreg :: (load (s32) from %stack.0)
- renamable $s10 = nnan ninf nsz arcp contract afn reassoc VADDS killed renamable $s10, renamable $s6, 14 /* CC::al */, $noreg
- renamable $s14 = nnan ninf nsz arcp contract afn reassoc VADDS killed renamable $s14, renamable $s2, 14 /* CC::al */, $noreg
+ renamable $s10 = nnan ninf nsz arcp contract afn reassoc VADDS killed renamable $s10, renamable $s6, 14 /* CC::al */, $noreg, implicit $fpscr_rm
+ renamable $s14 = nnan ninf nsz arcp contract afn reassoc VADDS killed renamable $s14, renamable $s2, 14 /* CC::al */, $noreg, implicit $fpscr_rm
renamable $r3 = t2ADDrs renamable $r2, renamable $r0, 18, 14 /* CC::al */, $noreg, $noreg
renamable $r7 = t2ADDrs renamable $r2, killed renamable $r7, 18, 14 /* CC::al */, $noreg, $noreg
- renamable $s4 = nnan ninf nsz arcp contract afn reassoc VADDS killed renamable $s10, killed renamable $s7, 14 /* CC::al */, $noreg, implicit $q1
- renamable $s0 = nnan ninf nsz arcp contract afn reassoc VADDS killed renamable $s14, killed renamable $s3, 14 /* CC::al */, $noreg, implicit $q0
+ renamable $s4 = nnan ninf nsz arcp contract afn reassoc VADDS killed renamable $s10, killed renamable $s7, 14 /* CC::al */, $noreg, implicit $q1, implicit $fpscr_rm
+ renamable $s0 = nnan ninf nsz arcp contract afn reassoc VADDS killed renamable $s14, killed renamable $s3, 14 /* CC::al */, $noreg, implicit $q0, implicit $fpscr_rm
VSTRS killed renamable $s12, killed renamable $r3, 0, 14 /* CC::al */, $noreg :: (store (s32) into %ir.arrayidx37)
VSTRS killed renamable $s8, killed renamable $r7, 0, 14 /* CC::al */, $noreg :: (store (s32) into %ir.arrayidx42)
renamable $r3 = t2ADDrs renamable $r2, killed renamable $r8, 18, 14 /* CC::al */, $noreg, $noreg
@@ -597,7 +597,7 @@ body: |
bb.13:
successors: %bb.10(0x80000000)
liveins: $lr, $q0, $r0, $r1, $r2, $r3, $r4, $r5, $r6, $r7, $r11, $r12
-
+
bb.10.do.body59 (align 4):
successors: %bb.10(0x7c000000), %bb.11(0x04000000)
liveins: $lr, $q0, $r0, $r1, $r2, $r3, $r4, $r5, $r6, $r7, $r11, $r12
@@ -611,20 +611,20 @@ body: |
renamable $lr = t2LoopDec killed renamable $lr, 1
t2LoopEnd renamable $lr, %bb.10, implicit-def dead $cpsr
tB %bb.11, 14 /* CC::al */, $noreg
-
+
bb.11.do.end66:
successors: %bb.12(0x04000000), %bb.9(0x7c000000)
liveins: $q0, $r0, $r2, $r3, $r4, $r5, $r11, $r12
-
- renamable $s4 = nnan ninf nsz arcp contract afn reassoc VADDS renamable $s0, renamable $s1, 14 /* CC::al */, $noreg
+
+ renamable $s4 = nnan ninf nsz arcp contract afn reassoc VADDS renamable $s0, renamable $s1, 14 /* CC::al */, $noreg, implicit $fpscr_rm
renamable $r1 = t2ADDrs renamable $r2, renamable $r0, 18, 14 /* CC::al */, $noreg, $noreg
- renamable $s4 = nnan ninf nsz arcp contract afn reassoc VADDS killed renamable $s4, renamable $s2, 14 /* CC::al */, $noreg
+ renamable $s4 = nnan ninf nsz arcp contract afn reassoc VADDS killed renamable $s4, renamable $s2, 14 /* CC::al */, $noreg, implicit $fpscr_rm
renamable $r0, dead $cpsr = nuw tADDi8 killed renamable $r0, 1, 14 /* CC::al */, $noreg
- renamable $s0 = nnan ninf nsz arcp contract afn reassoc VADDS killed renamable $s4, killed renamable $s3, 14 /* CC::al */, $noreg, implicit $q0
+ renamable $s0 = nnan ninf nsz arcp contract afn reassoc VADDS killed renamable $s4, killed renamable $s3, 14 /* CC::al */, $noreg, implicit $q0, implicit $fpscr_rm
tCMPhir renamable $r0, renamable $r12, 14 /* CC::al */, $noreg, implicit-def $cpsr
VSTRS killed renamable $s0, killed renamable $r1, 0, 14 /* CC::al */, $noreg :: (store (s32) into %ir.arrayidx70)
tBcc %bb.9, 1 /* CC::ne */, killed $cpsr
-
+
bb.12.for.end72:
$sp = frame-destroy tADDspi $sp, 10, 14 /* CC::al */, $noreg
$sp = frame-destroy VLDMDIA_UPD $sp, 14 /* CC::al */, $noreg, def $d8, def $d9, def $d10, def $d11
diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/it-block-mov.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/it-block-mov.mir
index 31e88ea..85b826a 100644
--- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/it-block-mov.mir
+++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/it-block-mov.mir
@@ -185,15 +185,15 @@ body: |
successors: %bb.5(0x80000000)
liveins: $q0, $r0, $r1, $r2, $r4
- renamable $s4 = nnan ninf nsz VADDS renamable $s0, renamable $s1, 14, $noreg
+ renamable $s4 = nnan ninf nsz VADDS renamable $s0, renamable $s1, 14, $noreg, implicit $fpscr_rm
$lr = tMOVr $r4, 14, $noreg
$r3 = tMOVr $r1, 14, $noreg
- renamable $s4 = nnan ninf nsz VADDS renamable $s2, killed renamable $s4, 14, $noreg
- renamable $s0 = nnan ninf nsz VADDS killed renamable $s3, killed renamable $s4, 14, $noreg, implicit $q0
+ renamable $s4 = nnan ninf nsz VADDS renamable $s2, killed renamable $s4, 14, $noreg, implicit $fpscr_rm
+ renamable $s0 = nnan ninf nsz VADDS killed renamable $s3, killed renamable $s4, 14, $noreg, implicit $q0, implicit $fpscr_rm
$s2 = VMOVSR $r1, 14, $noreg
renamable $s2 = VUITOS killed renamable $s2, 14, $noreg
$lr = t2DoLoopStart killed $r4
- renamable $s4 = nnan ninf nsz VDIVS killed renamable $s0, killed renamable $s2, 14, $noreg
+ renamable $s4 = nnan ninf nsz VDIVS killed renamable $s0, killed renamable $s2, 14, $noreg, implicit $fpscr_rm
renamable $q0 = MVE_VMOVimmi32 0, 0, $noreg, $noreg, undef renamable $q0
bb.5:
@@ -215,13 +215,13 @@ body: |
bb.6:
liveins: $q0, $r1, $r2
- renamable $s4 = nnan ninf nsz VADDS renamable $s0, renamable $s1, 14, $noreg
+ renamable $s4 = nnan ninf nsz VADDS renamable $s0, renamable $s1, 14, $noreg, implicit $fpscr_rm
renamable $r0, dead $cpsr = tSUBi3 killed renamable $r1, 1, 14, $noreg
- renamable $s4 = nnan ninf nsz VADDS renamable $s2, killed renamable $s4, 14, $noreg
- renamable $s0 = nnan ninf nsz VADDS killed renamable $s3, killed renamable $s4, 14, $noreg, implicit $q0
+ renamable $s4 = nnan ninf nsz VADDS renamable $s2, killed renamable $s4, 14, $noreg, implicit $fpscr_rm
+ renamable $s0 = nnan ninf nsz VADDS killed renamable $s3, killed renamable $s4, 14, $noreg, implicit $q0, implicit $fpscr_rm
$s2 = VMOVSR killed $r0, 14, $noreg
renamable $s2 = VUITOS killed renamable $s2, 14, $noreg
- renamable $s0 = nnan ninf nsz VDIVS killed renamable $s0, killed renamable $s2, 14, $noreg
+ renamable $s0 = nnan ninf nsz VDIVS killed renamable $s0, killed renamable $s2, 14, $noreg, implicit $fpscr_rm
VSTRS killed renamable $s0, killed renamable $r2, 0, 14, $noreg
tPOP_RET 14, $noreg, def $r4, def $pc
diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/lstp-insertion-position.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/lstp-insertion-position.mir
index f5da7ac..780831c 100644
--- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/lstp-insertion-position.mir
+++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/lstp-insertion-position.mir
@@ -232,9 +232,9 @@ body: |
bb.3.middle.block:
liveins: $q1
- renamable $s0 = nnan ninf nsz arcp contract afn reassoc VADDS renamable $s6, renamable $s7, 14 /* CC::al */, $noreg
- renamable $s2 = nnan ninf nsz arcp contract afn reassoc VADDS killed renamable $s4, killed renamable $s5, 14 /* CC::al */, $noreg, implicit $q1
- renamable $s0 = nnan ninf nsz arcp contract afn reassoc VADDS killed renamable $s2, killed renamable $s0, 14 /* CC::al */, $noreg
+ renamable $s0 = nnan ninf nsz arcp contract afn reassoc VADDS renamable $s6, renamable $s7, 14 /* CC::al */, $noreg, implicit $fpscr_rm
+ renamable $s2 = nnan ninf nsz arcp contract afn reassoc VADDS killed renamable $s4, killed renamable $s5, 14 /* CC::al */, $noreg, implicit $q1, implicit $fpscr_rm
+ renamable $s0 = nnan ninf nsz arcp contract afn reassoc VADDS killed renamable $s2, killed renamable $s0, 14 /* CC::al */, $noreg, implicit $fpscr_rm
$sp = frame-destroy t2LDMIA_UPD $sp, 14 /* CC::al */, $noreg, def $r7, def $lr
tBX_RET 14 /* CC::al */, $noreg, implicit killed $s0
@@ -376,9 +376,9 @@ body: |
bb.3.middle.block:
liveins: $q1
- renamable $s0 = nnan ninf nsz arcp contract afn reassoc VADDS renamable $s6, renamable $s7, 14 /* CC::al */, $noreg
- renamable $s2 = nnan ninf nsz arcp contract afn reassoc VADDS killed renamable $s4, killed renamable $s5, 14 /* CC::al */, $noreg, implicit $q1
- renamable $s0 = nnan ninf nsz arcp contract afn reassoc VADDS killed renamable $s2, killed renamable $s0, 14 /* CC::al */, $noreg
+ renamable $s0 = nnan ninf nsz arcp contract afn reassoc VADDS renamable $s6, renamable $s7, 14 /* CC::al */, $noreg, implicit $fpscr_rm
+ renamable $s2 = nnan ninf nsz arcp contract afn reassoc VADDS killed renamable $s4, killed renamable $s5, 14 /* CC::al */, $noreg, implicit $q1, implicit $fpscr_rm
+ renamable $s0 = nnan ninf nsz arcp contract afn reassoc VADDS killed renamable $s2, killed renamable $s0, 14 /* CC::al */, $noreg, implicit $fpscr_rm
$sp = frame-destroy t2LDMIA_UPD $sp, 14 /* CC::al */, $noreg, def $r7, def $lr
tBX_RET 14 /* CC::al */, $noreg, implicit killed $s0
diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/mov-after-dlstp.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/mov-after-dlstp.mir
index c331612..5dcd0a1 100644
--- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/mov-after-dlstp.mir
+++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/mov-after-dlstp.mir
@@ -240,10 +240,10 @@ body: |
$s4 = VMOVSR $r1, 14 /* CC::al */, $noreg
$lr = tMOVr $r4, 14 /* CC::al */, $noreg
- renamable $s0 = nnan ninf nsz arcp contract afn reassoc VADDS killed renamable $s3, renamable $s3, 14 /* CC::al */, $noreg, implicit $q0
+ renamable $s0 = nnan ninf nsz arcp contract afn reassoc VADDS killed renamable $s3, renamable $s3, 14 /* CC::al */, $noreg, implicit $q0, implicit $fpscr_rm
$lr = t2DoLoopStart killed $r4
renamable $s4 = VUITOS killed renamable $s4, 14 /* CC::al */, $noreg
- renamable $s0 = nnan ninf nsz arcp contract afn reassoc VDIVS killed renamable $s0, killed renamable $s4, 14 /* CC::al */, $noreg
+ renamable $s0 = nnan ninf nsz arcp contract afn reassoc VDIVS killed renamable $s0, killed renamable $s4, 14 /* CC::al */, $noreg, implicit $fpscr_rm
renamable $r3 = VMOVRS killed renamable $s0, 14 /* CC::al */, $noreg
renamable $q0 = MVE_VMOVimmi32 0, 0, $noreg, $noreg, undef renamable $q0
renamable $q1 = MVE_VDUP32 killed renamable $r3, 0, $noreg, $noreg, undef renamable $q1
@@ -267,10 +267,10 @@ body: |
liveins: $q0, $r1, $r2
renamable $r0, dead $cpsr = tSUBi3 killed renamable $r1, 1, 14 /* CC::al */, $noreg
- renamable $s0 = nnan ninf nsz arcp contract afn reassoc VADDS killed renamable $s3, renamable $s3, 14 /* CC::al */, $noreg, implicit $q0
+ renamable $s0 = nnan ninf nsz arcp contract afn reassoc VADDS killed renamable $s3, renamable $s3, 14 /* CC::al */, $noreg, implicit $q0, implicit $fpscr_rm
$s2 = VMOVSR killed $r0, 14 /* CC::al */, $noreg
renamable $s2 = VUITOS killed renamable $s2, 14 /* CC::al */, $noreg
- renamable $s0 = nnan ninf nsz arcp contract afn reassoc VDIVS killed renamable $s0, killed renamable $s2, 14 /* CC::al */, $noreg
+ renamable $s0 = nnan ninf nsz arcp contract afn reassoc VDIVS killed renamable $s0, killed renamable $s2, 14 /* CC::al */, $noreg, implicit $fpscr_rm
VSTRS killed renamable $s0, killed renamable $r2, 0, 14 /* CC::al */, $noreg :: (store (s32) into %ir.pResult)
frame-destroy tPOP_RET 14 /* CC::al */, $noreg, def $r4, def $pc
diff --git a/llvm/test/CodeGen/Thumb2/pipeliner-inlineasm.mir b/llvm/test/CodeGen/Thumb2/pipeliner-inlineasm.mir
index 5221205..d9d2f25 100644
--- a/llvm/test/CodeGen/Thumb2/pipeliner-inlineasm.mir
+++ b/llvm/test/CodeGen/Thumb2/pipeliner-inlineasm.mir
@@ -96,7 +96,7 @@ body: |
; CHECK-NEXT: bb.6.for.body:
; CHECK-NEXT: successors: %bb.7(0x80000000), %bb.8(0x00000000)
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: [[VMULS:%[0-9]+]]:spr = nnan ninf nsz arcp contract afn reassoc VMULS [[VLDRS3]], %30, 14 /* CC::al */, $noreg
+ ; CHECK-NEXT: [[VMULS:%[0-9]+]]:spr = nnan ninf nsz arcp contract afn reassoc VMULS [[VLDRS3]], %30, 14 /* CC::al */, $noreg, implicit $fpscr_rm
; CHECK-NEXT: [[t2ADDri2:%[0-9]+]]:rgpr = t2ADDri [[COPY7]], 4, 14 /* CC::al */, $noreg, $noreg
; CHECK-NEXT: [[VLDRS4:%[0-9]+]]:spr = VLDRS [[COPY7]], 1, 14 /* CC::al */, $noreg :: (load unknown-size from %ir.scevgep7, align 4)
; CHECK-NEXT: [[t2ADDri3:%[0-9]+]]:rgpr = t2ADDri [[COPY6]], 4, 14 /* CC::al */, $noreg, $noreg
@@ -119,13 +119,13 @@ body: |
; CHECK-NEXT: [[PHI4:%[0-9]+]]:spr = PHI [[VLDRS5]], %bb.6, %47, %bb.7
; CHECK-NEXT: [[PHI5:%[0-9]+]]:spr = PHI %40, %bb.6, %55, %bb.7
; CHECK-NEXT: [[PHI6:%[0-9]+]]:spr = PHI [[VMULS]], %bb.6, %45, %bb.7
- ; CHECK-NEXT: [[VMULS1:%[0-9]+]]:spr = nnan ninf nsz arcp contract afn reassoc VMULS [[PHI4]], [[PHI5]], 14 /* CC::al */, $noreg
+ ; CHECK-NEXT: [[VMULS1:%[0-9]+]]:spr = nnan ninf nsz arcp contract afn reassoc VMULS [[PHI4]], [[PHI5]], 14 /* CC::al */, $noreg, implicit $fpscr_rm
; CHECK-NEXT: [[t2SUBri4:%[0-9]+]]:rgpr = t2SUBri [[PHI2]], 1, 14 /* CC::al */, $noreg, def $cpsr
; CHECK-NEXT: [[VLDRS6:%[0-9]+]]:spr = VLDRS [[PHI1]], 1, 14 /* CC::al */, $noreg :: (load unknown-size from %ir.scevgep3, align 4)
; CHECK-NEXT: [[VLDRS7:%[0-9]+]]:spr = VLDRS [[PHI]], 1, 14 /* CC::al */, $noreg :: (load unknown-size from %ir.scevgep7, align 4)
; CHECK-NEXT: [[t2ADDri4:%[0-9]+]]:rgpr = t2ADDri [[PHI]], 4, 14 /* CC::al */, $noreg, $noreg
; CHECK-NEXT: [[t2ADDri5:%[0-9]+]]:rgpr = t2ADDri [[PHI1]], 4, 14 /* CC::al */, $noreg, $noreg
- ; CHECK-NEXT: [[VADDS:%[0-9]+]]:spr = nnan ninf nsz arcp contract afn reassoc VADDS [[PHI6]], [[PHI3]], 14 /* CC::al */, $noreg
+ ; CHECK-NEXT: [[VADDS:%[0-9]+]]:spr = nnan ninf nsz arcp contract afn reassoc VADDS [[PHI6]], [[PHI3]], 14 /* CC::al */, $noreg, implicit $fpscr_rm
; CHECK-NEXT: [[COPY11:%[0-9]+]]:gpr = COPY [[t2ADDri4]]
; CHECK-NEXT: [[COPY12:%[0-9]+]]:gpr = COPY [[t2ADDri5]]
; CHECK-NEXT: [[COPY13:%[0-9]+]]:gpr = COPY [[t2SUBri4]]
@@ -140,7 +140,7 @@ body: |
; CHECK-NEXT: [[PHI8:%[0-9]+]]:spr = PHI [[VLDRS5]], %bb.6, [[VLDRS6]], %bb.7
; CHECK-NEXT: [[PHI9:%[0-9]+]]:spr = PHI %40, %bb.6, %55, %bb.7
; CHECK-NEXT: [[PHI10:%[0-9]+]]:spr = PHI [[VMULS]], %bb.6, [[VMULS1]], %bb.7
- ; CHECK-NEXT: [[VADDS1:%[0-9]+]]:spr = nnan ninf nsz arcp contract afn reassoc VADDS [[PHI10]], [[PHI7]], 14 /* CC::al */, $noreg
+ ; CHECK-NEXT: [[VADDS1:%[0-9]+]]:spr = nnan ninf nsz arcp contract afn reassoc VADDS [[PHI10]], [[PHI7]], 14 /* CC::al */, $noreg, implicit $fpscr_rm
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.9:
; CHECK-NEXT: successors: %bb.4(0x80000000)
@@ -148,8 +148,8 @@ body: |
; CHECK-NEXT: [[PHI11:%[0-9]+]]:spr = PHI [[VLDRS1]], %bb.5, [[VADDS1]], %bb.8
; CHECK-NEXT: [[PHI12:%[0-9]+]]:spr = PHI [[VLDRS3]], %bb.5, [[PHI8]], %bb.8
; CHECK-NEXT: [[PHI13:%[0-9]+]]:spr = PHI %30, %bb.5, [[PHI9]], %bb.8
- ; CHECK-NEXT: [[VMULS2:%[0-9]+]]:spr = nnan ninf nsz arcp contract afn reassoc VMULS [[PHI12]], [[PHI13]], 14 /* CC::al */, $noreg
- ; CHECK-NEXT: [[VADDS2:%[0-9]+]]:spr = nnan ninf nsz arcp contract afn reassoc VADDS [[VMULS2]], [[PHI11]], 14 /* CC::al */, $noreg
+ ; CHECK-NEXT: [[VMULS2:%[0-9]+]]:spr = nnan ninf nsz arcp contract afn reassoc VMULS [[PHI12]], [[PHI13]], 14 /* CC::al */, $noreg, implicit $fpscr_rm
+ ; CHECK-NEXT: [[VADDS2:%[0-9]+]]:spr = nnan ninf nsz arcp contract afn reassoc VADDS [[VMULS2]], [[PHI11]], 14 /* CC::al */, $noreg, implicit $fpscr_rm
; CHECK-NEXT: t2B %bb.4, 14 /* CC::al */, $noreg
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.4.for.end:
@@ -194,8 +194,8 @@ body: |
%20:rgpr = t2ADDri %3, 4, 14 /* CC::al */, $noreg, $noreg
%21:spr = VLDRS %3, 1, 14 /* CC::al */, $noreg :: (load (s32) from %ir.scevgep3)
INLINEASM &nop, 0 /* attdialect */, 196618 /* regdef:SPR */, def %25, 2147483657 /* reguse tiedto:$0 */, %19(tied-def 3)
- %22:spr = nnan ninf nsz arcp contract afn reassoc VMULS killed %21, killed %25, 14 /* CC::al */, $noreg
- %6:spr = nnan ninf nsz arcp contract afn reassoc VADDS killed %22, %5, 14 /* CC::al */, $noreg
+ %22:spr = nnan ninf nsz arcp contract afn reassoc VMULS killed %21, killed %25, 14 /* CC::al */, $noreg, implicit $fpscr_rm
+ %6:spr = nnan ninf nsz arcp contract afn reassoc VADDS killed %22, %5, 14 /* CC::al */, $noreg, implicit $fpscr_rm
%23:rgpr = t2SUBri %4, 1, 14 /* CC::al */, $noreg, def $cpsr
%7:gpr = COPY %23
%8:gpr = COPY %20
diff --git a/llvm/test/CodeGen/Thumb2/scavenge-lr.mir b/llvm/test/CodeGen/Thumb2/scavenge-lr.mir
index 5513bed..bfe55a5 100644
--- a/llvm/test/CodeGen/Thumb2/scavenge-lr.mir
+++ b/llvm/test/CodeGen/Thumb2/scavenge-lr.mir
@@ -147,10 +147,10 @@ body: |
$q5 = VLD1q64 $r3, 16, 14 /* CC::al */, $noreg :: (load (s128) from %ir.zzz..sroa_cast241, align 32)
$q1 = VMLAfq killed $q1, $q5, killed $q8, 14 /* CC::al */, $noreg
$s8 = VLDRS %const.0, 0, 14 /* CC::al */, $noreg :: (load (s32) from constant-pool)
- $s3 = VDIVS $s8, $s7, 14 /* CC::al */, $noreg, implicit-def $q0
- $s2 = VDIVS $s8, $s6, 14 /* CC::al */, $noreg, implicit killed $q0, implicit-def $q0
- $s1 = VDIVS $s8, $s5, 14 /* CC::al */, $noreg, implicit killed $q0, implicit-def $q0
- $s0 = VDIVS $s8, $s4, 14 /* CC::al */, $noreg, implicit killed $q1, implicit killed $q0, implicit-def $q0
+ $s3 = VDIVS $s8, $s7, 14 /* CC::al */, $noreg, implicit-def $q0, implicit $fpscr_rm
+ $s2 = VDIVS $s8, $s6, 14 /* CC::al */, $noreg, implicit killed $q0, implicit-def $q0, implicit $fpscr_rm
+ $s1 = VDIVS $s8, $s5, 14 /* CC::al */, $noreg, implicit killed $q0, implicit-def $q0, implicit $fpscr_rm
+ $s0 = VDIVS $s8, $s4, 14 /* CC::al */, $noreg, implicit killed $q1, implicit killed $q0, implicit-def $q0, implicit $fpscr_rm
$r7 = t2SUBri $r0, 64, 14 /* CC::al */, $noreg, $noreg
$q8 = VLD1q64 $r7, 16, 14 /* CC::al */, $noreg :: (load (s128) from %ir.yyy..sroa_cast244, align 32)
VSTMQIA $q8, %stack.1, 14 /* CC::al */, $noreg :: (store (s128) into %stack.1)
@@ -185,10 +185,10 @@ body: |
$r3 = VST1q32wb_fixed killed $r3, 16, killed $q10, 14 /* CC::al */, $noreg :: (store (s128) into %ir.zzz..sroa_cast241, align 32)
$q10 = VLD1q64 $r3, 16, 14 /* CC::al */, $noreg :: (load (s128) from %ir.zzz..sroa_cast241 + 16, basealign 32)
$q1 = VMLAfq killed $q1, $q10, killed $q8, 14 /* CC::al */, $noreg
- $s23 = VDIVS $s8, $s7, 14 /* CC::al */, $noreg, implicit-def $q5
- $s22 = VDIVS $s8, $s6, 14 /* CC::al */, $noreg, implicit killed $q5, implicit-def $q5
- $s21 = VDIVS $s8, $s5, 14 /* CC::al */, $noreg, implicit killed $q5, implicit-def $q5
- $s20 = VDIVS killed $s8, $s4, 14 /* CC::al */, $noreg, implicit killed $q1, implicit killed $q5, implicit-def $q5
+ $s23 = VDIVS $s8, $s7, 14 /* CC::al */, $noreg, implicit-def $q5, implicit $fpscr_rm
+ $s22 = VDIVS $s8, $s6, 14 /* CC::al */, $noreg, implicit killed $q5, implicit-def $q5, implicit $fpscr_rm
+ $s21 = VDIVS $s8, $s5, 14 /* CC::al */, $noreg, implicit killed $q5, implicit-def $q5, implicit $fpscr_rm
+ $s20 = VDIVS killed $s8, $s4, 14 /* CC::al */, $noreg, implicit killed $q1, implicit killed $q5, implicit-def $q5, implicit $fpscr_rm
VST1q64 killed $r5, 16, $q5, 14 /* CC::al */, $noreg :: (store (s128) into %ir.xxx..sroa_cast248 + 16, basealign 32)
VST1q64 killed $r6, 16, $q5, 14 /* CC::al */, $noreg :: (store (s128) into %ir.vvv..sroa_cast230 + 16, basealign 32)
$q8 = VLDMQIA %stack.0, 14 /* CC::al */, $noreg :: (load (s128) from %stack.0)
diff --git a/llvm/test/CodeGen/Thumb2/swp-exitbranchdir.mir b/llvm/test/CodeGen/Thumb2/swp-exitbranchdir.mir
index ba10045..20f044a 100644
--- a/llvm/test/CodeGen/Thumb2/swp-exitbranchdir.mir
+++ b/llvm/test/CodeGen/Thumb2/swp-exitbranchdir.mir
@@ -83,7 +83,7 @@ body: |
; CHECK-NEXT: [[VLDRS2:%[0-9]+]]:spr = VLDRS [[COPY4]], 1, 14 /* CC::al */, $noreg :: (load (s32) from %ir.scevgep7)
; CHECK-NEXT: [[t2ADDri1:%[0-9]+]]:rgpr = t2ADDri [[COPY3]], 4, 14 /* CC::al */, $noreg, $noreg
; CHECK-NEXT: [[VLDRS3:%[0-9]+]]:spr = VLDRS [[COPY3]], 1, 14 /* CC::al */, $noreg :: (load (s32) from %ir.scevgep3)
- ; CHECK-NEXT: [[VMULS:%[0-9]+]]:spr = nnan ninf nsz arcp contract afn reassoc VMULS [[VLDRS3]], [[VLDRS2]], 14 /* CC::al */, $noreg
+ ; CHECK-NEXT: [[VMULS:%[0-9]+]]:spr = nnan ninf nsz arcp contract afn reassoc VMULS [[VLDRS3]], [[VLDRS2]], 14 /* CC::al */, $noreg, implicit $fpscr_rm
; CHECK-NEXT: [[t2SUBri2:%[0-9]+]]:rgpr = t2SUBri [[COPY]], 1, 14 /* CC::al */, $noreg, def $cpsr
; CHECK-NEXT: [[COPY5:%[0-9]+]]:gprnopc = COPY [[t2SUBri2]]
; CHECK-NEXT: [[COPY6:%[0-9]+]]:gprnopc = COPY [[t2ADDri1]]
@@ -98,7 +98,7 @@ body: |
; CHECK-NEXT: [[VLDRS4:%[0-9]+]]:spr = VLDRS [[COPY7]], 1, 14 /* CC::al */, $noreg :: (load unknown-size from %ir.scevgep7, align 4)
; CHECK-NEXT: [[t2ADDri3:%[0-9]+]]:rgpr = t2ADDri [[COPY6]], 4, 14 /* CC::al */, $noreg, $noreg
; CHECK-NEXT: [[VLDRS5:%[0-9]+]]:spr = VLDRS [[COPY6]], 1, 14 /* CC::al */, $noreg :: (load unknown-size from %ir.scevgep3, align 4)
- ; CHECK-NEXT: [[VMULS1:%[0-9]+]]:spr = nnan ninf nsz arcp contract afn reassoc VMULS [[VLDRS5]], [[VLDRS4]], 14 /* CC::al */, $noreg
+ ; CHECK-NEXT: [[VMULS1:%[0-9]+]]:spr = nnan ninf nsz arcp contract afn reassoc VMULS [[VLDRS5]], [[VLDRS4]], 14 /* CC::al */, $noreg, implicit $fpscr_rm
; CHECK-NEXT: [[t2SUBri3:%[0-9]+]]:rgpr = t2SUBri [[COPY5]], 1, 14 /* CC::al */, $noreg, def $cpsr
; CHECK-NEXT: [[COPY8:%[0-9]+]]:gpr = COPY [[t2SUBri3]]
; CHECK-NEXT: [[COPY9:%[0-9]+]]:gpr = COPY [[t2ADDri3]]
@@ -115,7 +115,7 @@ body: |
; CHECK-NEXT: [[PHI3:%[0-9]+]]:spr = PHI [[VLDRS1]], %bb.6, %43, %bb.7
; CHECK-NEXT: [[PHI4:%[0-9]+]]:spr = PHI [[VMULS1]], %bb.6, %52, %bb.7
; CHECK-NEXT: [[PHI5:%[0-9]+]]:spr = PHI [[VMULS]], %bb.6, [[PHI4]], %bb.7
- ; CHECK-NEXT: [[VADDS:%[0-9]+]]:spr = nnan ninf nsz arcp contract afn reassoc VADDS [[PHI5]], [[PHI3]], 14 /* CC::al */, $noreg
+ ; CHECK-NEXT: [[VADDS:%[0-9]+]]:spr = nnan ninf nsz arcp contract afn reassoc VADDS [[PHI5]], [[PHI3]], 14 /* CC::al */, $noreg, implicit $fpscr_rm
; CHECK-NEXT: [[t2SUBri4:%[0-9]+]]:rgpr = t2SUBri [[PHI2]], 1, 14 /* CC::al */, $noreg, def $cpsr
; CHECK-NEXT: [[VLDRS6:%[0-9]+]]:spr = VLDRS [[PHI1]], 1, 14 /* CC::al */, $noreg :: (load unknown-size from %ir.scevgep3, align 4)
; CHECK-NEXT: [[VLDRS7:%[0-9]+]]:spr = VLDRS [[PHI]], 1, 14 /* CC::al */, $noreg :: (load unknown-size from %ir.scevgep7, align 4)
@@ -124,7 +124,7 @@ body: |
; CHECK-NEXT: [[COPY11:%[0-9]+]]:gpr = COPY [[t2ADDri4]]
; CHECK-NEXT: [[COPY12:%[0-9]+]]:gpr = COPY [[t2ADDri5]]
; CHECK-NEXT: [[COPY13:%[0-9]+]]:gpr = COPY [[t2SUBri4]]
- ; CHECK-NEXT: [[VMULS2:%[0-9]+]]:spr = nnan ninf nsz arcp contract afn reassoc VMULS [[VLDRS6]], [[VLDRS7]], 14 /* CC::al */, $noreg
+ ; CHECK-NEXT: [[VMULS2:%[0-9]+]]:spr = nnan ninf nsz arcp contract afn reassoc VMULS [[VLDRS6]], [[VLDRS7]], 14 /* CC::al */, $noreg, implicit $fpscr_rm
; CHECK-NEXT: t2Bcc %bb.8, 0 /* CC::eq */, $cpsr
; CHECK-NEXT: t2B %bb.7, 14 /* CC::al */, $noreg
; CHECK-NEXT: {{ $}}
@@ -134,14 +134,14 @@ body: |
; CHECK-NEXT: [[PHI6:%[0-9]+]]:spr = PHI [[VLDRS1]], %bb.6, [[VADDS]], %bb.7
; CHECK-NEXT: [[PHI7:%[0-9]+]]:spr = PHI [[VMULS1]], %bb.6, [[VMULS2]], %bb.7
; CHECK-NEXT: [[PHI8:%[0-9]+]]:spr = PHI [[VMULS]], %bb.6, [[PHI4]], %bb.7
- ; CHECK-NEXT: [[VADDS1:%[0-9]+]]:spr = nnan ninf nsz arcp contract afn reassoc VADDS [[PHI8]], [[PHI6]], 14 /* CC::al */, $noreg
+ ; CHECK-NEXT: [[VADDS1:%[0-9]+]]:spr = nnan ninf nsz arcp contract afn reassoc VADDS [[PHI8]], [[PHI6]], 14 /* CC::al */, $noreg, implicit $fpscr_rm
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.9:
; CHECK-NEXT: successors: %bb.4(0x80000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[PHI9:%[0-9]+]]:spr = PHI [[VLDRS1]], %bb.5, [[VADDS1]], %bb.8
; CHECK-NEXT: [[PHI10:%[0-9]+]]:spr = PHI [[VMULS]], %bb.5, [[PHI7]], %bb.8
- ; CHECK-NEXT: [[VADDS2:%[0-9]+]]:spr = nnan ninf nsz arcp contract afn reassoc VADDS [[PHI10]], [[PHI9]], 14 /* CC::al */, $noreg
+ ; CHECK-NEXT: [[VADDS2:%[0-9]+]]:spr = nnan ninf nsz arcp contract afn reassoc VADDS [[PHI10]], [[PHI9]], 14 /* CC::al */, $noreg, implicit $fpscr_rm
; CHECK-NEXT: t2B %bb.4, 14 /* CC::al */, $noreg
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.4.for.end:
@@ -185,8 +185,8 @@ body: |
%19:spr = VLDRS %2, 1, 14 /* CC::al */, $noreg :: (load (s32) from %ir.scevgep7)
%20:rgpr = t2ADDri %3, 4, 14 /* CC::al */, $noreg, $noreg
%21:spr = VLDRS %3, 1, 14 /* CC::al */, $noreg :: (load (s32) from %ir.scevgep3)
- %22:spr = nnan ninf nsz arcp contract afn reassoc VMULS killed %21, killed %19, 14 /* CC::al */, $noreg
- %6:spr = nnan ninf nsz arcp contract afn reassoc VADDS killed %22, %5, 14 /* CC::al */, $noreg
+ %22:spr = nnan ninf nsz arcp contract afn reassoc VMULS killed %21, killed %19, 14 /* CC::al */, $noreg, implicit $fpscr_rm
+ %6:spr = nnan ninf nsz arcp contract afn reassoc VADDS killed %22, %5, 14 /* CC::al */, $noreg, implicit $fpscr_rm
%23:rgpr = t2SUBri %4, 1, 14 /* CC::al */, $noreg, def $cpsr
%7:gpr = COPY %23
%8:gpr = COPY %20
diff --git a/llvm/test/CodeGen/Thumb2/swp-fixedii-le.mir b/llvm/test/CodeGen/Thumb2/swp-fixedii-le.mir
index 854c5b8..177c94e 100644
--- a/llvm/test/CodeGen/Thumb2/swp-fixedii-le.mir
+++ b/llvm/test/CodeGen/Thumb2/swp-fixedii-le.mir
@@ -84,7 +84,7 @@ body: |
; CHECK-NEXT: [[VLDRS2:%[0-9]+]]:spr = VLDRS [[COPY4]], 1, 14 /* CC::al */, $noreg :: (load (s32) from %ir.scevgep7)
; CHECK-NEXT: [[t2ADDri1:%[0-9]+]]:rgpr = t2ADDri [[COPY3]], 4, 14 /* CC::al */, $noreg, $noreg
; CHECK-NEXT: [[VLDRS3:%[0-9]+]]:spr = VLDRS [[COPY3]], 1, 14 /* CC::al */, $noreg :: (load (s32) from %ir.scevgep3)
- ; CHECK-NEXT: [[VMULS:%[0-9]+]]:spr = nnan ninf nsz arcp contract afn reassoc VMULS [[VLDRS3]], [[VLDRS2]], 14 /* CC::al */, $noreg
+ ; CHECK-NEXT: [[VMULS:%[0-9]+]]:spr = nnan ninf nsz arcp contract afn reassoc VMULS [[VLDRS3]], [[VLDRS2]], 14 /* CC::al */, $noreg, implicit $fpscr_rm
; CHECK-NEXT: [[COPY5:%[0-9]+]]:gprlr = COPY [[t2DoLoopStart]]
; CHECK-NEXT: [[t2LoopDec:%[0-9]+]]:gprlr = t2LoopDec [[COPY5]], 1
; CHECK-NEXT: [[COPY6:%[0-9]+]]:gpr = COPY [[t2LoopDec]]
@@ -110,8 +110,8 @@ body: |
; CHECK-NEXT: [[t2ADDri3:%[0-9]+]]:rgpr = t2ADDri [[PHI1]], 4, 14 /* CC::al */, $noreg, $noreg
; CHECK-NEXT: [[COPY10:%[0-9]+]]:gpr = COPY [[t2ADDri2]]
; CHECK-NEXT: [[COPY11:%[0-9]+]]:gpr = COPY [[t2ADDri3]]
- ; CHECK-NEXT: [[VMULS1:%[0-9]+]]:spr = nnan ninf nsz arcp contract afn reassoc VMULS [[VLDRS4]], [[VLDRS5]], 14 /* CC::al */, $noreg
- ; CHECK-NEXT: [[VADDS:%[0-9]+]]:spr = nnan ninf nsz arcp contract afn reassoc VADDS [[PHI4]], [[PHI3]], 14 /* CC::al */, $noreg
+ ; CHECK-NEXT: [[VMULS1:%[0-9]+]]:spr = nnan ninf nsz arcp contract afn reassoc VMULS [[VLDRS4]], [[VLDRS5]], 14 /* CC::al */, $noreg, implicit $fpscr_rm
+ ; CHECK-NEXT: [[VADDS:%[0-9]+]]:spr = nnan ninf nsz arcp contract afn reassoc VADDS [[PHI4]], [[PHI3]], 14 /* CC::al */, $noreg, implicit $fpscr_rm
; CHECK-NEXT: [[COPY12:%[0-9]+]]:gpr = COPY [[t2LoopDec1]]
; CHECK-NEXT: t2LoopEnd [[t2LoopDec1]], %bb.6, implicit-def $cpsr
; CHECK-NEXT: t2B %bb.7, 14 /* CC::al */, $noreg
@@ -121,7 +121,7 @@ body: |
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[PHI5:%[0-9]+]]:spr = PHI [[VLDRS1]], %bb.5, [[VADDS]], %bb.6
; CHECK-NEXT: [[PHI6:%[0-9]+]]:spr = PHI [[VMULS]], %bb.5, [[VMULS1]], %bb.6
- ; CHECK-NEXT: [[VADDS1:%[0-9]+]]:spr = nnan ninf nsz arcp contract afn reassoc VADDS [[PHI6]], [[PHI5]], 14 /* CC::al */, $noreg
+ ; CHECK-NEXT: [[VADDS1:%[0-9]+]]:spr = nnan ninf nsz arcp contract afn reassoc VADDS [[PHI6]], [[PHI5]], 14 /* CC::al */, $noreg, implicit $fpscr_rm
; CHECK-NEXT: t2B %bb.4, 14 /* CC::al */, $noreg
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.4.for.end:
@@ -166,8 +166,8 @@ body: |
%19:spr = VLDRS %2, 1, 14 /* CC::al */, $noreg :: (load (s32) from %ir.scevgep7)
%20:rgpr = t2ADDri %3, 4, 14 /* CC::al */, $noreg, $noreg
%21:spr = VLDRS %3, 1, 14 /* CC::al */, $noreg :: (load (s32) from %ir.scevgep3)
- %22:spr = nnan ninf nsz arcp contract afn reassoc VMULS killed %21, killed %19, 14 /* CC::al */, $noreg
- %6:spr = nnan ninf nsz arcp contract afn reassoc VADDS killed %22, %5, 14 /* CC::al */, $noreg
+ %22:spr = nnan ninf nsz arcp contract afn reassoc VMULS killed %21, killed %19, 14 /* CC::al */, $noreg, implicit $fpscr_rm
+ %6:spr = nnan ninf nsz arcp contract afn reassoc VADDS killed %22, %5, 14 /* CC::al */, $noreg, implicit $fpscr_rm
%42:gprlr = COPY %4
%23:gprlr = t2LoopDec %42:gprlr, 1
%7:gpr = COPY %23
diff --git a/llvm/test/CodeGen/Thumb2/swp-fixedii.mir b/llvm/test/CodeGen/Thumb2/swp-fixedii.mir
index dd02703..7939717 100644
--- a/llvm/test/CodeGen/Thumb2/swp-fixedii.mir
+++ b/llvm/test/CodeGen/Thumb2/swp-fixedii.mir
@@ -83,7 +83,7 @@ body: |
; CHECK-NEXT: [[VLDRS2:%[0-9]+]]:spr = VLDRS [[COPY4]], 1, 14 /* CC::al */, $noreg :: (load (s32) from %ir.scevgep7)
; CHECK-NEXT: [[t2ADDri1:%[0-9]+]]:rgpr = t2ADDri [[COPY3]], 4, 14 /* CC::al */, $noreg, $noreg
; CHECK-NEXT: [[VLDRS3:%[0-9]+]]:spr = VLDRS [[COPY3]], 1, 14 /* CC::al */, $noreg :: (load (s32) from %ir.scevgep3)
- ; CHECK-NEXT: [[VMULS:%[0-9]+]]:spr = nnan ninf nsz arcp contract afn reassoc VMULS [[VLDRS3]], [[VLDRS2]], 14 /* CC::al */, $noreg
+ ; CHECK-NEXT: [[VMULS:%[0-9]+]]:spr = nnan ninf nsz arcp contract afn reassoc VMULS [[VLDRS3]], [[VLDRS2]], 14 /* CC::al */, $noreg, implicit $fpscr_rm
; CHECK-NEXT: [[t2SUBri2:%[0-9]+]]:rgpr = t2SUBri [[COPY]], 1, 14 /* CC::al */, $noreg, def $cpsr
; CHECK-NEXT: [[COPY5:%[0-9]+]]:gprnopc = COPY [[t2SUBri2]]
; CHECK-NEXT: [[COPY6:%[0-9]+]]:gprnopc = COPY [[t2ADDri1]]
@@ -98,7 +98,7 @@ body: |
; CHECK-NEXT: [[VLDRS4:%[0-9]+]]:spr = VLDRS [[COPY7]], 1, 14 /* CC::al */, $noreg :: (load unknown-size from %ir.scevgep7, align 4)
; CHECK-NEXT: [[t2ADDri3:%[0-9]+]]:rgpr = t2ADDri [[COPY6]], 4, 14 /* CC::al */, $noreg, $noreg
; CHECK-NEXT: [[VLDRS5:%[0-9]+]]:spr = VLDRS [[COPY6]], 1, 14 /* CC::al */, $noreg :: (load unknown-size from %ir.scevgep3, align 4)
- ; CHECK-NEXT: [[VMULS1:%[0-9]+]]:spr = nnan ninf nsz arcp contract afn reassoc VMULS [[VLDRS5]], [[VLDRS4]], 14 /* CC::al */, $noreg
+ ; CHECK-NEXT: [[VMULS1:%[0-9]+]]:spr = nnan ninf nsz arcp contract afn reassoc VMULS [[VLDRS5]], [[VLDRS4]], 14 /* CC::al */, $noreg, implicit $fpscr_rm
; CHECK-NEXT: [[t2SUBri3:%[0-9]+]]:rgpr = t2SUBri [[COPY5]], 1, 14 /* CC::al */, $noreg, def $cpsr
; CHECK-NEXT: [[COPY8:%[0-9]+]]:gpr = COPY [[t2SUBri3]]
; CHECK-NEXT: [[COPY9:%[0-9]+]]:gpr = COPY [[t2ADDri3]]
@@ -115,7 +115,7 @@ body: |
; CHECK-NEXT: [[PHI3:%[0-9]+]]:spr = PHI [[VLDRS1]], %bb.6, %43, %bb.7
; CHECK-NEXT: [[PHI4:%[0-9]+]]:spr = PHI [[VMULS1]], %bb.6, %52, %bb.7
; CHECK-NEXT: [[PHI5:%[0-9]+]]:spr = PHI [[VMULS]], %bb.6, [[PHI4]], %bb.7
- ; CHECK-NEXT: [[VADDS:%[0-9]+]]:spr = nnan ninf nsz arcp contract afn reassoc VADDS [[PHI5]], [[PHI3]], 14 /* CC::al */, $noreg
+ ; CHECK-NEXT: [[VADDS:%[0-9]+]]:spr = nnan ninf nsz arcp contract afn reassoc VADDS [[PHI5]], [[PHI3]], 14 /* CC::al */, $noreg, implicit $fpscr_rm
; CHECK-NEXT: [[t2SUBri4:%[0-9]+]]:rgpr = t2SUBri [[PHI2]], 1, 14 /* CC::al */, $noreg, def $cpsr
; CHECK-NEXT: [[VLDRS6:%[0-9]+]]:spr = VLDRS [[PHI1]], 1, 14 /* CC::al */, $noreg :: (load unknown-size from %ir.scevgep3, align 4)
; CHECK-NEXT: [[VLDRS7:%[0-9]+]]:spr = VLDRS [[PHI]], 1, 14 /* CC::al */, $noreg :: (load unknown-size from %ir.scevgep7, align 4)
@@ -124,7 +124,7 @@ body: |
; CHECK-NEXT: [[COPY11:%[0-9]+]]:gpr = COPY [[t2ADDri4]]
; CHECK-NEXT: [[COPY12:%[0-9]+]]:gpr = COPY [[t2ADDri5]]
; CHECK-NEXT: [[COPY13:%[0-9]+]]:gpr = COPY [[t2SUBri4]]
- ; CHECK-NEXT: [[VMULS2:%[0-9]+]]:spr = nnan ninf nsz arcp contract afn reassoc VMULS [[VLDRS6]], [[VLDRS7]], 14 /* CC::al */, $noreg
+ ; CHECK-NEXT: [[VMULS2:%[0-9]+]]:spr = nnan ninf nsz arcp contract afn reassoc VMULS [[VLDRS6]], [[VLDRS7]], 14 /* CC::al */, $noreg, implicit $fpscr_rm
; CHECK-NEXT: t2Bcc %bb.7, 1 /* CC::ne */, $cpsr
; CHECK-NEXT: t2B %bb.8, 14 /* CC::al */, $noreg
; CHECK-NEXT: {{ $}}
@@ -134,14 +134,14 @@ body: |
; CHECK-NEXT: [[PHI6:%[0-9]+]]:spr = PHI [[VLDRS1]], %bb.6, [[VADDS]], %bb.7
; CHECK-NEXT: [[PHI7:%[0-9]+]]:spr = PHI [[VMULS1]], %bb.6, [[VMULS2]], %bb.7
; CHECK-NEXT: [[PHI8:%[0-9]+]]:spr = PHI [[VMULS]], %bb.6, [[PHI4]], %bb.7
- ; CHECK-NEXT: [[VADDS1:%[0-9]+]]:spr = nnan ninf nsz arcp contract afn reassoc VADDS [[PHI8]], [[PHI6]], 14 /* CC::al */, $noreg
+ ; CHECK-NEXT: [[VADDS1:%[0-9]+]]:spr = nnan ninf nsz arcp contract afn reassoc VADDS [[PHI8]], [[PHI6]], 14 /* CC::al */, $noreg, implicit $fpscr_rm
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.9:
; CHECK-NEXT: successors: %bb.4(0x80000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[PHI9:%[0-9]+]]:spr = PHI [[VLDRS1]], %bb.5, [[VADDS1]], %bb.8
; CHECK-NEXT: [[PHI10:%[0-9]+]]:spr = PHI [[VMULS]], %bb.5, [[PHI7]], %bb.8
- ; CHECK-NEXT: [[VADDS2:%[0-9]+]]:spr = nnan ninf nsz arcp contract afn reassoc VADDS [[PHI10]], [[PHI9]], 14 /* CC::al */, $noreg
+ ; CHECK-NEXT: [[VADDS2:%[0-9]+]]:spr = nnan ninf nsz arcp contract afn reassoc VADDS [[PHI10]], [[PHI9]], 14 /* CC::al */, $noreg, implicit $fpscr_rm
; CHECK-NEXT: t2B %bb.4, 14 /* CC::al */, $noreg
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.4.for.end:
@@ -185,8 +185,8 @@ body: |
%19:spr = VLDRS %2, 1, 14 /* CC::al */, $noreg :: (load (s32) from %ir.scevgep7)
%20:rgpr = t2ADDri %3, 4, 14 /* CC::al */, $noreg, $noreg
%21:spr = VLDRS %3, 1, 14 /* CC::al */, $noreg :: (load (s32) from %ir.scevgep3)
- %22:spr = nnan ninf nsz arcp contract afn reassoc VMULS killed %21, killed %19, 14 /* CC::al */, $noreg
- %6:spr = nnan ninf nsz arcp contract afn reassoc VADDS killed %22, %5, 14 /* CC::al */, $noreg
+ %22:spr = nnan ninf nsz arcp contract afn reassoc VMULS killed %21, killed %19, 14 /* CC::al */, $noreg, implicit $fpscr_rm
+ %6:spr = nnan ninf nsz arcp contract afn reassoc VADDS killed %22, %5, 14 /* CC::al */, $noreg, implicit $fpscr_rm
%23:rgpr = t2SUBri %4, 1, 14 /* CC::al */, $noreg, def $cpsr
%7:gpr = COPY %23
%8:gpr = COPY %20
diff --git a/llvm/test/CodeGen/Thumb2/swp-regpressure.mir b/llvm/test/CodeGen/Thumb2/swp-regpressure.mir
index 2bcb0c9..955b53df 100644
--- a/llvm/test/CodeGen/Thumb2/swp-regpressure.mir
+++ b/llvm/test/CodeGen/Thumb2/swp-regpressure.mir
@@ -148,8 +148,8 @@ body: |
; CHECK-NEXT: [[VLDRS2:%[0-9]+]]:spr = VLDRS [[PHI]], 1, 14 /* CC::al */, $noreg :: (load (s32) from %ir.scevgep7)
; CHECK-NEXT: [[t2ADDri1:%[0-9]+]]:rgpr = t2ADDri [[PHI1]], 4, 14 /* CC::al */, $noreg, $noreg
; CHECK-NEXT: [[VLDRS3:%[0-9]+]]:spr = VLDRS [[PHI1]], 1, 14 /* CC::al */, $noreg :: (load (s32) from %ir.scevgep3)
- ; CHECK-NEXT: [[VMULS:%[0-9]+]]:spr = nnan ninf nsz arcp contract afn reassoc VMULS [[VLDRS3]], [[VLDRS2]], 14 /* CC::al */, $noreg
- ; CHECK-NEXT: [[VADDS:%[0-9]+]]:spr = nnan ninf nsz arcp contract afn reassoc VADDS [[VMULS]], [[PHI3]], 14 /* CC::al */, $noreg
+ ; CHECK-NEXT: [[VMULS:%[0-9]+]]:spr = nnan ninf nsz arcp contract afn reassoc VMULS [[VLDRS3]], [[VLDRS2]], 14 /* CC::al */, $noreg, implicit $fpscr_rm
+ ; CHECK-NEXT: [[VADDS:%[0-9]+]]:spr = nnan ninf nsz arcp contract afn reassoc VADDS [[VMULS]], [[PHI3]], 14 /* CC::al */, $noreg, implicit $fpscr_rm
; CHECK-NEXT: [[t2SUBri2:%[0-9]+]]:rgpr = t2SUBri [[PHI2]], 1, 14 /* CC::al */, $noreg, def $cpsr
; CHECK-NEXT: [[COPY5:%[0-9]+]]:gpr = COPY [[t2SUBri2]]
; CHECK-NEXT: [[COPY6:%[0-9]+]]:gpr = COPY [[t2ADDri1]]
@@ -236,8 +236,8 @@ body: |
%19:spr = VLDRS %2, 1, 14 /* CC::al */, $noreg :: (load (s32) from %ir.scevgep7)
%20:rgpr = t2ADDri %3, 4, 14 /* CC::al */, $noreg, $noreg
%21:spr = VLDRS %3, 1, 14 /* CC::al */, $noreg :: (load (s32) from %ir.scevgep3)
- %22:spr = nnan ninf nsz arcp contract afn reassoc VMULS killed %21, killed %19, 14 /* CC::al */, $noreg
- %6:spr = nnan ninf nsz arcp contract afn reassoc VADDS killed %22, %5, 14 /* CC::al */, $noreg
+ %22:spr = nnan ninf nsz arcp contract afn reassoc VMULS killed %21, killed %19, 14 /* CC::al */, $noreg, implicit $fpscr_rm
+ %6:spr = nnan ninf nsz arcp contract afn reassoc VADDS killed %22, %5, 14 /* CC::al */, $noreg, implicit $fpscr_rm
%23:rgpr = t2SUBri %4, 1, 14 /* CC::al */, $noreg, def $cpsr
%7:gpr = COPY %23
%8:gpr = COPY %20
@@ -314,24 +314,24 @@ body: |
; CHECK-NEXT: [[t2SUBri2:%[0-9]+]]:rgpr = t2SUBri [[COPY]], 1, 14 /* CC::al */, $noreg, def $cpsr
; CHECK-NEXT: [[COPY5:%[0-9]+]]:gprnopc = COPY [[t2SUBri2]]
; CHECK-NEXT: [[COPY6:%[0-9]+]]:rgpr = COPY [[COPY4]]
- ; CHECK-NEXT: dead %66:rgpr = COPY [[COPY4]]
- ; CHECK-NEXT: dead %67:rgpr = COPY [[COPY4]]
- ; CHECK-NEXT: dead %68:rgpr = COPY [[COPY4]]
- ; CHECK-NEXT: dead %69:rgpr = COPY [[COPY4]]
- ; CHECK-NEXT: dead %70:rgpr = COPY [[COPY4]]
- ; CHECK-NEXT: dead %71:rgpr = COPY [[COPY4]]
- ; CHECK-NEXT: dead %72:rgpr = COPY [[COPY4]]
- ; CHECK-NEXT: dead %73:rgpr = COPY [[COPY4]]
- ; CHECK-NEXT: dead %74:rgpr = COPY [[COPY4]]
- ; CHECK-NEXT: dead %75:rgpr = COPY [[COPY4]]
- ; CHECK-NEXT: dead %76:rgpr = COPY [[COPY4]]
- ; CHECK-NEXT: dead %77:rgpr = COPY [[COPY4]]
- ; CHECK-NEXT: dead %78:rgpr = COPY [[COPY4]]
- ; CHECK-NEXT: dead %79:rgpr = COPY [[COPY4]]
- ; CHECK-NEXT: dead %80:rgpr = COPY [[COPY4]]
- ; CHECK-NEXT: dead %81:rgpr = COPY [[COPY4]]
- ; CHECK-NEXT: dead %82:rgpr = COPY [[COPY4]]
- ; CHECK-NEXT: dead %83:rgpr = COPY [[COPY4]]
+ ; CHECK-NEXT: dead [[COPY7:%[0-9]+]]:rgpr = COPY [[COPY4]]
+ ; CHECK-NEXT: dead [[COPY8:%[0-9]+]]:rgpr = COPY [[COPY4]]
+ ; CHECK-NEXT: dead [[COPY9:%[0-9]+]]:rgpr = COPY [[COPY4]]
+ ; CHECK-NEXT: dead [[COPY10:%[0-9]+]]:rgpr = COPY [[COPY4]]
+ ; CHECK-NEXT: dead [[COPY11:%[0-9]+]]:rgpr = COPY [[COPY4]]
+ ; CHECK-NEXT: dead [[COPY12:%[0-9]+]]:rgpr = COPY [[COPY4]]
+ ; CHECK-NEXT: dead [[COPY13:%[0-9]+]]:rgpr = COPY [[COPY4]]
+ ; CHECK-NEXT: dead [[COPY14:%[0-9]+]]:rgpr = COPY [[COPY4]]
+ ; CHECK-NEXT: dead [[COPY15:%[0-9]+]]:rgpr = COPY [[COPY4]]
+ ; CHECK-NEXT: dead [[COPY16:%[0-9]+]]:rgpr = COPY [[COPY4]]
+ ; CHECK-NEXT: dead [[COPY17:%[0-9]+]]:rgpr = COPY [[COPY4]]
+ ; CHECK-NEXT: dead [[COPY18:%[0-9]+]]:rgpr = COPY [[COPY4]]
+ ; CHECK-NEXT: dead [[COPY19:%[0-9]+]]:rgpr = COPY [[COPY4]]
+ ; CHECK-NEXT: dead [[COPY20:%[0-9]+]]:rgpr = COPY [[COPY4]]
+ ; CHECK-NEXT: dead [[COPY21:%[0-9]+]]:rgpr = COPY [[COPY4]]
+ ; CHECK-NEXT: dead [[COPY22:%[0-9]+]]:rgpr = COPY [[COPY4]]
+ ; CHECK-NEXT: dead [[COPY23:%[0-9]+]]:rgpr = COPY [[COPY4]]
+ ; CHECK-NEXT: dead [[COPY24:%[0-9]+]]:rgpr = COPY [[COPY4]]
; CHECK-NEXT: t2Bcc %bb.9, 0 /* CC::eq */, $cpsr
; CHECK-NEXT: t2B %bb.6, 14 /* CC::al */, $noreg
; CHECK-NEXT: {{ $}}
@@ -342,82 +342,82 @@ body: |
; CHECK-NEXT: [[VLDRS2:%[0-9]+]]:spr = VLDRS [[COPY4]], 1, 14 /* CC::al */, $noreg :: (load (s32) from %ir.scevgep7)
; CHECK-NEXT: [[t2ADDri1:%[0-9]+]]:rgpr = t2ADDri [[COPY3]], 4, 14 /* CC::al */, $noreg, $noreg
; CHECK-NEXT: [[VLDRS3:%[0-9]+]]:spr = VLDRS [[COPY3]], 1, 14 /* CC::al */, $noreg :: (load (s32) from %ir.scevgep3)
- ; CHECK-NEXT: [[VMULS:%[0-9]+]]:spr = nnan ninf nsz arcp contract afn reassoc VMULS [[VLDRS3]], [[VLDRS2]], 14 /* CC::al */, $noreg
- ; CHECK-NEXT: [[COPY7:%[0-9]+]]:gpr = COPY [[t2ADDri1]]
- ; CHECK-NEXT: [[COPY8:%[0-9]+]]:gpr = COPY [[t2ADDri]]
+ ; CHECK-NEXT: [[VMULS:%[0-9]+]]:spr = nnan ninf nsz arcp contract afn reassoc VMULS [[VLDRS3]], [[VLDRS2]], 14 /* CC::al */, $noreg, implicit $fpscr_rm
+ ; CHECK-NEXT: [[COPY25:%[0-9]+]]:gpr = COPY [[t2ADDri1]]
+ ; CHECK-NEXT: [[COPY26:%[0-9]+]]:gpr = COPY [[t2ADDri]]
; CHECK-NEXT: [[t2SUBri3:%[0-9]+]]:rgpr = t2SUBri [[COPY5]], 1, 14 /* CC::al */, $noreg, def $cpsr
- ; CHECK-NEXT: [[COPY9:%[0-9]+]]:gpr = COPY [[t2SUBri3]]
- ; CHECK-NEXT: [[COPY10:%[0-9]+]]:rgpr = COPY [[COPY6]]
- ; CHECK-NEXT: dead %94:rgpr = COPY [[COPY6]]
- ; CHECK-NEXT: dead %95:rgpr = COPY [[COPY6]]
- ; CHECK-NEXT: dead %96:rgpr = COPY [[COPY6]]
- ; CHECK-NEXT: dead %97:rgpr = COPY [[COPY6]]
- ; CHECK-NEXT: dead %98:rgpr = COPY [[COPY6]]
- ; CHECK-NEXT: dead %99:rgpr = COPY [[COPY6]]
- ; CHECK-NEXT: dead %100:rgpr = COPY [[COPY6]]
- ; CHECK-NEXT: dead %101:rgpr = COPY [[COPY6]]
- ; CHECK-NEXT: dead %102:rgpr = COPY [[COPY6]]
- ; CHECK-NEXT: dead %103:rgpr = COPY [[COPY6]]
- ; CHECK-NEXT: dead %104:rgpr = COPY [[COPY6]]
- ; CHECK-NEXT: dead %105:rgpr = COPY [[COPY6]]
- ; CHECK-NEXT: dead %106:rgpr = COPY [[COPY6]]
- ; CHECK-NEXT: dead %107:rgpr = COPY [[COPY6]]
- ; CHECK-NEXT: dead %108:rgpr = COPY [[COPY6]]
- ; CHECK-NEXT: dead %109:rgpr = COPY [[COPY6]]
- ; CHECK-NEXT: dead %110:rgpr = COPY [[COPY6]]
- ; CHECK-NEXT: dead %111:rgpr = COPY [[COPY6]]
+ ; CHECK-NEXT: [[COPY27:%[0-9]+]]:gpr = COPY [[t2SUBri3]]
+ ; CHECK-NEXT: [[COPY28:%[0-9]+]]:rgpr = COPY [[COPY6]]
+ ; CHECK-NEXT: dead [[COPY29:%[0-9]+]]:rgpr = COPY [[COPY6]]
+ ; CHECK-NEXT: dead [[COPY30:%[0-9]+]]:rgpr = COPY [[COPY6]]
+ ; CHECK-NEXT: dead [[COPY31:%[0-9]+]]:rgpr = COPY [[COPY6]]
+ ; CHECK-NEXT: dead [[COPY32:%[0-9]+]]:rgpr = COPY [[COPY6]]
+ ; CHECK-NEXT: dead [[COPY33:%[0-9]+]]:rgpr = COPY [[COPY6]]
+ ; CHECK-NEXT: dead [[COPY34:%[0-9]+]]:rgpr = COPY [[COPY6]]
+ ; CHECK-NEXT: dead [[COPY35:%[0-9]+]]:rgpr = COPY [[COPY6]]
+ ; CHECK-NEXT: dead [[COPY36:%[0-9]+]]:rgpr = COPY [[COPY6]]
+ ; CHECK-NEXT: dead [[COPY37:%[0-9]+]]:rgpr = COPY [[COPY6]]
+ ; CHECK-NEXT: dead [[COPY38:%[0-9]+]]:rgpr = COPY [[COPY6]]
+ ; CHECK-NEXT: dead [[COPY39:%[0-9]+]]:rgpr = COPY [[COPY6]]
+ ; CHECK-NEXT: dead [[COPY40:%[0-9]+]]:rgpr = COPY [[COPY6]]
+ ; CHECK-NEXT: dead [[COPY41:%[0-9]+]]:rgpr = COPY [[COPY6]]
+ ; CHECK-NEXT: dead [[COPY42:%[0-9]+]]:rgpr = COPY [[COPY6]]
+ ; CHECK-NEXT: dead [[COPY43:%[0-9]+]]:rgpr = COPY [[COPY6]]
+ ; CHECK-NEXT: dead [[COPY44:%[0-9]+]]:rgpr = COPY [[COPY6]]
+ ; CHECK-NEXT: dead [[COPY45:%[0-9]+]]:rgpr = COPY [[COPY6]]
+ ; CHECK-NEXT: dead [[COPY46:%[0-9]+]]:rgpr = COPY [[COPY6]]
; CHECK-NEXT: t2Bcc %bb.8, 0 /* CC::eq */, $cpsr
; CHECK-NEXT: t2B %bb.7, 14 /* CC::al */, $noreg
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.7.for.body:
; CHECK-NEXT: successors: %bb.8(0x04000000), %bb.7(0x7c000000)
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: [[PHI:%[0-9]+]]:gprnopc = PHI [[COPY8]], %bb.6, %116, %bb.7
- ; CHECK-NEXT: [[PHI1:%[0-9]+]]:gprnopc = PHI [[COPY7]], %bb.6, %117, %bb.7
- ; CHECK-NEXT: [[PHI2:%[0-9]+]]:gprnopc = PHI [[COPY9]], %bb.6, %140, %bb.7
+ ; CHECK-NEXT: [[PHI:%[0-9]+]]:gprnopc = PHI [[COPY26]], %bb.6, %116, %bb.7
+ ; CHECK-NEXT: [[PHI1:%[0-9]+]]:gprnopc = PHI [[COPY25]], %bb.6, %117, %bb.7
+ ; CHECK-NEXT: [[PHI2:%[0-9]+]]:gprnopc = PHI [[COPY27]], %bb.6, %140, %bb.7
; CHECK-NEXT: [[PHI3:%[0-9]+]]:spr = PHI [[VLDRS1]], %bb.6, %137, %bb.7
- ; CHECK-NEXT: [[PHI4:%[0-9]+]]:gprnopc = PHI [[COPY10]], %bb.6, %139, %bb.7
+ ; CHECK-NEXT: [[PHI4:%[0-9]+]]:gprnopc = PHI [[COPY28]], %bb.6, %139, %bb.7
; CHECK-NEXT: [[PHI5:%[0-9]+]]:spr = PHI [[VMULS]], %bb.6, %118, %bb.7
; CHECK-NEXT: [[VLDRS4:%[0-9]+]]:spr = VLDRS [[PHI1]], 1, 14 /* CC::al */, $noreg :: (load unknown-size from %ir.scevgep3, align 4)
; CHECK-NEXT: [[VLDRS5:%[0-9]+]]:spr = VLDRS [[PHI]], 1, 14 /* CC::al */, $noreg :: (load unknown-size from %ir.scevgep7, align 4)
; CHECK-NEXT: [[t2ADDri2:%[0-9]+]]:rgpr = t2ADDri [[PHI]], 4, 14 /* CC::al */, $noreg, $noreg
; CHECK-NEXT: [[t2ADDri3:%[0-9]+]]:rgpr = t2ADDri [[PHI1]], 4, 14 /* CC::al */, $noreg, $noreg
- ; CHECK-NEXT: [[COPY11:%[0-9]+]]:gpr = COPY [[t2ADDri2]]
- ; CHECK-NEXT: [[COPY12:%[0-9]+]]:gpr = COPY [[t2ADDri3]]
- ; CHECK-NEXT: [[VMULS1:%[0-9]+]]:spr = nnan ninf nsz arcp contract afn reassoc VMULS [[VLDRS4]], [[VLDRS5]], 14 /* CC::al */, $noreg
- ; CHECK-NEXT: dead %119:rgpr = COPY [[PHI4]]
- ; CHECK-NEXT: dead %120:rgpr = COPY [[PHI4]]
- ; CHECK-NEXT: dead %121:rgpr = COPY [[PHI4]]
- ; CHECK-NEXT: dead %122:rgpr = COPY [[PHI4]]
- ; CHECK-NEXT: dead %123:rgpr = COPY [[PHI4]]
- ; CHECK-NEXT: dead %124:rgpr = COPY [[PHI4]]
- ; CHECK-NEXT: dead %125:rgpr = COPY [[PHI4]]
- ; CHECK-NEXT: dead %126:rgpr = COPY [[PHI4]]
- ; CHECK-NEXT: dead %127:rgpr = COPY [[PHI4]]
- ; CHECK-NEXT: dead %128:rgpr = COPY [[PHI4]]
- ; CHECK-NEXT: dead %129:rgpr = COPY [[PHI4]]
- ; CHECK-NEXT: dead %130:rgpr = COPY [[PHI4]]
- ; CHECK-NEXT: dead %131:rgpr = COPY [[PHI4]]
- ; CHECK-NEXT: dead %132:rgpr = COPY [[PHI4]]
- ; CHECK-NEXT: dead %133:rgpr = COPY [[PHI4]]
- ; CHECK-NEXT: dead %134:rgpr = COPY [[PHI4]]
- ; CHECK-NEXT: dead %135:rgpr = COPY [[PHI4]]
- ; CHECK-NEXT: dead %136:rgpr = COPY [[PHI4]]
- ; CHECK-NEXT: [[VADDS:%[0-9]+]]:spr = nnan ninf nsz arcp contract afn reassoc VADDS [[PHI5]], [[PHI3]], 14 /* CC::al */, $noreg
+ ; CHECK-NEXT: [[COPY47:%[0-9]+]]:gpr = COPY [[t2ADDri2]]
+ ; CHECK-NEXT: [[COPY48:%[0-9]+]]:gpr = COPY [[t2ADDri3]]
+ ; CHECK-NEXT: [[VMULS1:%[0-9]+]]:spr = nnan ninf nsz arcp contract afn reassoc VMULS [[VLDRS4]], [[VLDRS5]], 14 /* CC::al */, $noreg, implicit $fpscr_rm
+ ; CHECK-NEXT: dead [[COPY49:%[0-9]+]]:rgpr = COPY [[PHI4]]
+ ; CHECK-NEXT: dead [[COPY50:%[0-9]+]]:rgpr = COPY [[PHI4]]
+ ; CHECK-NEXT: dead [[COPY51:%[0-9]+]]:rgpr = COPY [[PHI4]]
+ ; CHECK-NEXT: dead [[COPY52:%[0-9]+]]:rgpr = COPY [[PHI4]]
+ ; CHECK-NEXT: dead [[COPY53:%[0-9]+]]:rgpr = COPY [[PHI4]]
+ ; CHECK-NEXT: dead [[COPY54:%[0-9]+]]:rgpr = COPY [[PHI4]]
+ ; CHECK-NEXT: dead [[COPY55:%[0-9]+]]:rgpr = COPY [[PHI4]]
+ ; CHECK-NEXT: dead [[COPY56:%[0-9]+]]:rgpr = COPY [[PHI4]]
+ ; CHECK-NEXT: dead [[COPY57:%[0-9]+]]:rgpr = COPY [[PHI4]]
+ ; CHECK-NEXT: dead [[COPY58:%[0-9]+]]:rgpr = COPY [[PHI4]]
+ ; CHECK-NEXT: dead [[COPY59:%[0-9]+]]:rgpr = COPY [[PHI4]]
+ ; CHECK-NEXT: dead [[COPY60:%[0-9]+]]:rgpr = COPY [[PHI4]]
+ ; CHECK-NEXT: dead [[COPY61:%[0-9]+]]:rgpr = COPY [[PHI4]]
+ ; CHECK-NEXT: dead [[COPY62:%[0-9]+]]:rgpr = COPY [[PHI4]]
+ ; CHECK-NEXT: dead [[COPY63:%[0-9]+]]:rgpr = COPY [[PHI4]]
+ ; CHECK-NEXT: dead [[COPY64:%[0-9]+]]:rgpr = COPY [[PHI4]]
+ ; CHECK-NEXT: dead [[COPY65:%[0-9]+]]:rgpr = COPY [[PHI4]]
+ ; CHECK-NEXT: dead [[COPY66:%[0-9]+]]:rgpr = COPY [[PHI4]]
+ ; CHECK-NEXT: [[VADDS:%[0-9]+]]:spr = nnan ninf nsz arcp contract afn reassoc VADDS [[PHI5]], [[PHI3]], 14 /* CC::al */, $noreg, implicit $fpscr_rm
; CHECK-NEXT: [[t2SUBri4:%[0-9]+]]:rgpr = t2SUBri [[PHI2]], 1, 14 /* CC::al */, $noreg, def $cpsr
- ; CHECK-NEXT: [[COPY13:%[0-9]+]]:rgpr = COPY [[PHI4]]
- ; CHECK-NEXT: [[COPY14:%[0-9]+]]:gpr = COPY [[t2SUBri4]]
+ ; CHECK-NEXT: [[COPY67:%[0-9]+]]:rgpr = COPY [[PHI4]]
+ ; CHECK-NEXT: [[COPY68:%[0-9]+]]:gpr = COPY [[t2SUBri4]]
; CHECK-NEXT: t2Bcc %bb.7, 1 /* CC::ne */, $cpsr
; CHECK-NEXT: t2B %bb.8, 14 /* CC::al */, $noreg
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.8:
; CHECK-NEXT: successors: %bb.9(0x80000000)
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: [[PHI6:%[0-9]+]]:gprnopc = PHI [[COPY8]], %bb.6, [[COPY11]], %bb.7
- ; CHECK-NEXT: [[PHI7:%[0-9]+]]:gprnopc = PHI [[COPY7]], %bb.6, [[COPY12]], %bb.7
+ ; CHECK-NEXT: [[PHI6:%[0-9]+]]:gprnopc = PHI [[COPY26]], %bb.6, [[COPY47]], %bb.7
+ ; CHECK-NEXT: [[PHI7:%[0-9]+]]:gprnopc = PHI [[COPY25]], %bb.6, [[COPY48]], %bb.7
; CHECK-NEXT: [[PHI8:%[0-9]+]]:spr = PHI [[VLDRS1]], %bb.6, [[VADDS]], %bb.7
; CHECK-NEXT: [[PHI9:%[0-9]+]]:spr = PHI [[VMULS]], %bb.6, [[VMULS1]], %bb.7
- ; CHECK-NEXT: [[VADDS1:%[0-9]+]]:spr = nnan ninf nsz arcp contract afn reassoc VADDS [[PHI9]], [[PHI8]], 14 /* CC::al */, $noreg
+ ; CHECK-NEXT: [[VADDS1:%[0-9]+]]:spr = nnan ninf nsz arcp contract afn reassoc VADDS [[PHI9]], [[PHI8]], 14 /* CC::al */, $noreg, implicit $fpscr_rm
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.9:
; CHECK-NEXT: successors: %bb.4(0x80000000)
@@ -427,8 +427,8 @@ body: |
; CHECK-NEXT: [[PHI12:%[0-9]+]]:spr = PHI [[VLDRS1]], %bb.5, [[VADDS1]], %bb.8
; CHECK-NEXT: [[VLDRS6:%[0-9]+]]:spr = VLDRS [[PHI10]], 1, 14 /* CC::al */, $noreg :: (load unknown-size from %ir.scevgep7, align 4)
; CHECK-NEXT: [[VLDRS7:%[0-9]+]]:spr = VLDRS [[PHI11]], 1, 14 /* CC::al */, $noreg :: (load unknown-size from %ir.scevgep3, align 4)
- ; CHECK-NEXT: [[VMULS2:%[0-9]+]]:spr = nnan ninf nsz arcp contract afn reassoc VMULS [[VLDRS7]], [[VLDRS6]], 14 /* CC::al */, $noreg
- ; CHECK-NEXT: [[VADDS2:%[0-9]+]]:spr = nnan ninf nsz arcp contract afn reassoc VADDS [[VMULS2]], [[PHI12]], 14 /* CC::al */, $noreg
+ ; CHECK-NEXT: [[VMULS2:%[0-9]+]]:spr = nnan ninf nsz arcp contract afn reassoc VMULS [[VLDRS7]], [[VLDRS6]], 14 /* CC::al */, $noreg, implicit $fpscr_rm
+ ; CHECK-NEXT: [[VADDS2:%[0-9]+]]:spr = nnan ninf nsz arcp contract afn reassoc VADDS [[VMULS2]], [[PHI12]], 14 /* CC::al */, $noreg, implicit $fpscr_rm
; CHECK-NEXT: t2B %bb.4, 14 /* CC::al */, $noreg
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.4.for.end:
@@ -491,8 +491,8 @@ body: |
%19:spr = VLDRS %2, 1, 14 /* CC::al */, $noreg :: (load (s32) from %ir.scevgep7)
%20:rgpr = t2ADDri %3, 4, 14 /* CC::al */, $noreg, $noreg
%21:spr = VLDRS %3, 1, 14 /* CC::al */, $noreg :: (load (s32) from %ir.scevgep3)
- %22:spr = nnan ninf nsz arcp contract afn reassoc VMULS killed %21, killed %19, 14 /* CC::al */, $noreg
- %6:spr = nnan ninf nsz arcp contract afn reassoc VADDS killed %22, %5, 14 /* CC::al */, $noreg
+ %22:spr = nnan ninf nsz arcp contract afn reassoc VMULS killed %21, killed %19, 14 /* CC::al */, $noreg, implicit $fpscr_rm
+ %6:spr = nnan ninf nsz arcp contract afn reassoc VADDS killed %22, %5, 14 /* CC::al */, $noreg, implicit $fpscr_rm
%23:rgpr = t2SUBri %4, 1, 14 /* CC::al */, $noreg, def $cpsr
%7:gpr = COPY %23
%8:gpr = COPY %20
diff --git a/llvm/test/CodeGen/WebAssembly/simd-setcc-reductions.ll b/llvm/test/CodeGen/WebAssembly/simd-setcc-reductions.ll
index 172ff53..e562c4a 100644
--- a/llvm/test/CodeGen/WebAssembly/simd-setcc-reductions.ll
+++ b/llvm/test/CodeGen/WebAssembly/simd-setcc-reductions.ll
@@ -132,4 +132,17 @@ define i32 @all_true_2_4_i32(<4 x i32> %v) {
ret i32 %conv3
}
+; Regression test for the intrinsic pattern matcher with nullary intrinsics
+define i64 @other_intrinsic() #0 {
+; CHECK-LABEL: other_intrinsic:
+; CHECK: .functype other_intrinsic () -> (i64)
+; CHECK-NEXT: # %bb.0: # %entry
+; CHECK-NEXT: global.get $push0=, __tls_align
+; CHECK-NEXT: return $pop0
+entry:
+ %0 = call i64 @llvm.wasm.tls.align.i64()
+ ret i64 %0
+}
+
+attributes #0 = { "target-features"="+atomics" }