aboutsummaryrefslogtreecommitdiff
path: root/llvm/test/CodeGen
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/test/CodeGen')
-rw-r--r--llvm/test/CodeGen/AArch64/GlobalISel/combine-fconstant.mir21
-rw-r--r--llvm/test/CodeGen/AArch64/GlobalISel/legalize-constant.mir5
-rw-r--r--llvm/test/CodeGen/AArch64/GlobalISel/legalize-fp16-fconstant.mir6
-rw-r--r--llvm/test/CodeGen/AArch64/arm64-indexed-memory.ll13
-rw-r--r--llvm/test/CodeGen/AArch64/arm64-saddlp1d-uaddlp1d.mir50
-rw-r--r--llvm/test/CodeGen/AArch64/arm64-vadd.ll5
-rw-r--r--llvm/test/CodeGen/AArch64/f16-instructions.ll21
-rw-r--r--llvm/test/CodeGen/AArch64/fcvt-fixed.ll112
-rw-r--r--llvm/test/CodeGen/AArch64/fdiv-combine.ll152
-rw-r--r--llvm/test/CodeGen/AArch64/vecreduce-fadd-strict.ll20
-rw-r--r--llvm/test/CodeGen/AArch64/vecreduce-fadd.ll42
-rw-r--r--llvm/test/CodeGen/AArch64/vecreduce-fmul-strict.ll12
-rw-r--r--llvm/test/CodeGen/AMDGPU/addrspacecast-constantexpr.ll4
-rw-r--r--llvm/test/CodeGen/AMDGPU/amdgpu-attributor-min-agpr-alloc.ll1066
-rw-r--r--llvm/test/CodeGen/AMDGPU/amdgpu-attributor-no-agpr.ll264
-rw-r--r--llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa-call.ll40
-rw-r--r--llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa.ll26
-rw-r--r--llvm/test/CodeGen/AMDGPU/annotate-kernel-features.ll18
-rw-r--r--llvm/test/CodeGen/AMDGPU/atomic_load_local.ll132
-rw-r--r--llvm/test/CodeGen/AMDGPU/atomic_store_local.ll196
-rw-r--r--llvm/test/CodeGen/AMDGPU/attributor-flatscratchinit-undefined-behavior.ll4
-rw-r--r--llvm/test/CodeGen/AMDGPU/attributor-flatscratchinit.ll38
-rw-r--r--llvm/test/CodeGen/AMDGPU/bf16.ll3693
-rw-r--r--llvm/test/CodeGen/AMDGPU/calling-conventions.ll6
-rw-r--r--llvm/test/CodeGen/AMDGPU/direct-indirect-call.ll2
-rw-r--r--llvm/test/CodeGen/AMDGPU/duplicate-attribute-indirect.ll2
-rw-r--r--llvm/test/CodeGen/AMDGPU/fcanonicalize-elimination.ll6
-rw-r--r--llvm/test/CodeGen/AMDGPU/fneg-combines.new.ll10
-rw-r--r--llvm/test/CodeGen/AMDGPU/fptrunc.f16.ll738
-rw-r--r--llvm/test/CodeGen/AMDGPU/fsub.ll38
-rw-r--r--llvm/test/CodeGen/AMDGPU/implicitarg-offset-attributes.ll26
-rw-r--r--llvm/test/CodeGen/AMDGPU/indirect-call-set-from-other-function.ll2
-rw-r--r--llvm/test/CodeGen/AMDGPU/issue120256-annotate-constexpr-addrspacecast.ll4
-rw-r--r--llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.fp8.f16.ll24
-rw-r--r--llvm/test/CodeGen/AMDGPU/llvm.amdgcn.rcp.bf16.ll14
-rw-r--r--llvm/test/CodeGen/AMDGPU/llvm.amdgcn.rsq.bf16.ll6
-rw-r--r--llvm/test/CodeGen/AMDGPU/llvm.amdgcn.tanh.ll12
-rw-r--r--llvm/test/CodeGen/AMDGPU/mad_int24.ll313
-rw-r--r--llvm/test/CodeGen/AMDGPU/mad_uint24.ll1492
-rw-r--r--llvm/test/CodeGen/AMDGPU/min.ll92
-rw-r--r--llvm/test/CodeGen/AMDGPU/minmax.ll20
-rw-r--r--llvm/test/CodeGen/AMDGPU/propagate-flat-work-group-size.ll18
-rw-r--r--llvm/test/CodeGen/AMDGPU/propagate-waves-per-eu.ll42
-rw-r--r--llvm/test/CodeGen/AMDGPU/recursive_global_initializer.ll2
-rw-r--r--llvm/test/CodeGen/AMDGPU/remove-no-kernel-id-attribute.ll10
-rw-r--r--llvm/test/CodeGen/AMDGPU/select-fabs-fneg-extract.v2f16.ll4064
-rw-r--r--llvm/test/CodeGen/AMDGPU/simple-indirect-call-2.ll12
-rw-r--r--llvm/test/CodeGen/AMDGPU/simple-indirect-call.ll2
-rw-r--r--llvm/test/CodeGen/AMDGPU/uniform-work-group-attribute-missing.ll2
-rw-r--r--llvm/test/CodeGen/AMDGPU/uniform-work-group-multistep.ll4
-rw-r--r--llvm/test/CodeGen/AMDGPU/uniform-work-group-nested-function-calls.ll4
-rw-r--r--llvm/test/CodeGen/AMDGPU/uniform-work-group-prevent-attribute-propagation.ll4
-rw-r--r--llvm/test/CodeGen/AMDGPU/uniform-work-group-propagate-attribute.ll4
-rw-r--r--llvm/test/CodeGen/AMDGPU/uniform-work-group-recursion-test.ll6
-rw-r--r--llvm/test/CodeGen/AMDGPU/uniform-work-group-test.ll2
-rw-r--r--llvm/test/CodeGen/ARM/GlobalISel/arm-instruction-select-combos.mir16
-rw-r--r--llvm/test/CodeGen/ARM/GlobalISel/arm-select-copy_to_regclass-of-fptosi.mir2
-rw-r--r--llvm/test/CodeGen/ARM/GlobalISel/select-fp.mir388
-rw-r--r--llvm/test/CodeGen/ARM/GlobalISel/select-pr35926.mir2
-rw-r--r--llvm/test/CodeGen/ARM/bf16_fast_math.ll18
-rw-r--r--llvm/test/CodeGen/ARM/cortex-m7-wideops.mir17
-rw-r--r--llvm/test/CodeGen/ARM/fp16-litpool-arm.mir2
-rw-r--r--llvm/test/CodeGen/ARM/fp16-litpool-thumb.mir2
-rw-r--r--llvm/test/CodeGen/ARM/fp16-litpool2-arm.mir2
-rw-r--r--llvm/test/CodeGen/ARM/fp16-litpool3-arm.mir2
-rw-r--r--llvm/test/CodeGen/ARM/fp16_fast_math.ll86
-rw-r--r--llvm/test/CodeGen/ARM/ipra-reg-usage.ll2
-rw-r--r--llvm/test/CodeGen/ARM/llrint-conv.ll11
-rw-r--r--llvm/test/CodeGen/ARM/lrint-conv.ll48
-rw-r--r--llvm/test/CodeGen/ARM/misched-prevent-erase-history-of-subunits.mir4
-rw-r--r--llvm/test/CodeGen/ARM/vector-lrint.ll1301
-rw-r--r--llvm/test/CodeGen/ARM/vlldm-vlstm-uops.mir4
-rw-r--r--llvm/test/CodeGen/BPF/addr-space-memintrinsic-gep.ll60
-rw-r--r--llvm/test/CodeGen/BPF/addr-space-memintrinsic-no-gep.ll49
-rw-r--r--llvm/test/CodeGen/MIR2Vec/Inputs/mir2vec_dummy_2D_vocab.json11
-rw-r--r--llvm/test/CodeGen/MIR2Vec/Inputs/mir2vec_inconsistent_dims.json7
-rw-r--r--llvm/test/CodeGen/MIR2Vec/Inputs/mir2vec_invalid_vocab.json5
-rw-r--r--llvm/test/CodeGen/MIR2Vec/Inputs/mir2vec_zero_vocab.json12
-rw-r--r--llvm/test/CodeGen/MIR2Vec/Inputs/reference_x86_vocab_print.txt6882
-rw-r--r--llvm/test/CodeGen/MIR2Vec/Inputs/reference_x86_vocab_wo=0.5_print.txt6882
-rw-r--r--llvm/test/CodeGen/MIR2Vec/vocab-basic.ll14
-rw-r--r--llvm/test/CodeGen/MIR2Vec/vocab-error-handling.ll15
-rw-r--r--llvm/test/CodeGen/RISCV/GlobalISel/atomic-load-store-fp.ll118
-rw-r--r--llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/atomic-cmpxchg-rv32.mir8
-rw-r--r--llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/atomic-cmpxchg-rv64.mir10
-rw-r--r--llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/atomicrmw-add-sub-rv32.mir12
-rw-r--r--llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/atomicrmw-add-sub-rv64.mir16
-rw-r--r--llvm/test/CodeGen/RISCV/atomic-rmw.ll14130
-rw-r--r--llvm/test/CodeGen/RISCV/attributes.ll6
-rw-r--r--llvm/test/CodeGen/RISCV/float-imm.ll8
-rw-r--r--llvm/test/CodeGen/RISCV/half-imm.ll76
-rw-r--r--llvm/test/CodeGen/SPARC/atomicrmw-uinc-udec-wrap.ll24
-rw-r--r--llvm/test/CodeGen/SPARC/atomics-ordering.ll446
-rw-r--r--llvm/test/CodeGen/SPIRV/llc-pipeline.ll214
-rw-r--r--llvm/test/CodeGen/Thumb2/LowOverheadLoops/emptyblock.mir68
-rw-r--r--llvm/test/CodeGen/Thumb2/LowOverheadLoops/it-block-mov.mir16
-rw-r--r--llvm/test/CodeGen/Thumb2/LowOverheadLoops/lstp-insertion-position.mir12
-rw-r--r--llvm/test/CodeGen/Thumb2/LowOverheadLoops/mov-after-dlstp.mir8
-rw-r--r--llvm/test/CodeGen/Thumb2/pipeliner-inlineasm.mir16
-rw-r--r--llvm/test/CodeGen/Thumb2/scavenge-lr.mir16
-rw-r--r--llvm/test/CodeGen/Thumb2/swp-exitbranchdir.mir16
-rw-r--r--llvm/test/CodeGen/Thumb2/swp-fixedii-le.mir12
-rw-r--r--llvm/test/CodeGen/Thumb2/swp-fixedii.mir16
-rw-r--r--llvm/test/CodeGen/Thumb2/swp-regpressure.mir160
-rw-r--r--llvm/test/CodeGen/WebAssembly/fpclamptosat_vec.ll48
-rw-r--r--llvm/test/CodeGen/WebAssembly/saturating-truncation.ll87
-rw-r--r--llvm/test/CodeGen/WebAssembly/simd-setcc-reductions.ll13
-rw-r--r--llvm/test/CodeGen/X86/ptrtoaddr-fast-isel.ll11
108 files changed, 35134 insertions, 9200 deletions
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/combine-fconstant.mir b/llvm/test/CodeGen/AArch64/GlobalISel/combine-fconstant.mir
index 6362ed6..9381f0f4 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/combine-fconstant.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/combine-fconstant.mir
@@ -1,11 +1,12 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-# RUN: llc -run-pass=aarch64-prelegalizer-combiner -verify-machineinstrs -mtriple aarch64-unknown-unknown %s -o - | FileCheck %s
-# RUN: llc -debugify-and-strip-all-safe -run-pass=aarch64-prelegalizer-combiner -verify-machineinstrs -mtriple aarch64-unknown-unknown %s -o - | FileCheck %s
+# RUN: llc -run-pass=aarch64-postlegalizer-lowering -verify-machineinstrs -mtriple aarch64-unknown-unknown %s -o - | FileCheck %s
+# RUN: llc -debugify-and-strip-all-safe -run-pass=aarch64-postlegalizer-lowering -verify-machineinstrs -mtriple aarch64-unknown-unknown %s -o - | FileCheck %s
...
---
name: fconstant_to_constant_s32
alignment: 4
tracksRegLiveness: true
+legalized: true
frameInfo:
maxAlignment: 1
machineFunctionInfo: {}
@@ -24,16 +25,17 @@ body: |
; CHECK-NEXT: G_STORE [[C]](s32), [[PTR_ADD]](p0) :: (store (s32))
; CHECK-NEXT: RET_ReallyLR
%0:_(p0) = COPY $x0
- %3:_(s32) = G_FCONSTANT float 0x3FA99999A0000000
- %1:_(s64) = G_CONSTANT i64 524
- %2:_(p0) = G_PTR_ADD %0, %1(s64)
- G_STORE %3(s32), %2(p0) :: (store (s32))
+ %1:_(s32) = G_FCONSTANT float 0x3FA99999A0000000
+ %2:_(s64) = G_CONSTANT i64 524
+ %3:_(p0) = G_PTR_ADD %0, %2(s64)
+ G_STORE %1(s32), %3(p0) :: (store (s32))
RET_ReallyLR
...
---
name: fconstant_to_constant_s64
alignment: 4
tracksRegLiveness: true
+legalized: true
frameInfo:
maxAlignment: 1
machineFunctionInfo: {}
@@ -48,7 +50,7 @@ body: |
; CHECK-NEXT: G_STORE %c(s64), %ptr(p0) :: (store (s64))
; CHECK-NEXT: RET_ReallyLR
%ptr:_(p0) = COPY $x0
- %c:_(s64) = G_FCONSTANT double 0.0
+ %c:_(s64) = G_FCONSTANT double 0.000000e+00
G_STORE %c(s64), %ptr(p0) :: (store (s64))
RET_ReallyLR
...
@@ -56,6 +58,7 @@ body: |
name: no_store_means_no_combine
alignment: 4
tracksRegLiveness: true
+legalized: true
frameInfo:
maxAlignment: 1
machineFunctionInfo: {}
@@ -71,7 +74,7 @@ body: |
; CHECK-NEXT: %add:_(s64) = G_FADD %v, %c
; CHECK-NEXT: RET_ReallyLR implicit %add(s64)
%v:_(s64) = COPY $x0
- %c:_(s64) = G_FCONSTANT double 0.0
+ %c:_(s64) = G_FCONSTANT double 0.000000e+00
%add:_(s64) = G_FADD %v, %c
- RET_ReallyLR implicit %add
+ RET_ReallyLR implicit %add(s64)
...
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-constant.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-constant.mir
index c301e76..c00ce22 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-constant.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-constant.mir
@@ -48,8 +48,9 @@ body: |
; CHECK-NEXT: $w0 = COPY [[C]](s32)
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_FCONSTANT double 2.000000e+00
; CHECK-NEXT: $x0 = COPY [[C1]](s64)
- ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
- ; CHECK-NEXT: $w0 = COPY [[C2]](s32)
+ ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s16) = G_FCONSTANT half 0xH0000
+ ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[C2]](s16)
+ ; CHECK-NEXT: $w0 = COPY [[ANYEXT]](s32)
%0:_(s32) = G_FCONSTANT float 1.0
$w0 = COPY %0
%1:_(s64) = G_FCONSTANT double 2.0
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-fp16-fconstant.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-fp16-fconstant.mir
index ddf219d..c6df345 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/legalize-fp16-fconstant.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalize-fp16-fconstant.mir
@@ -8,7 +8,7 @@ tracksRegLiveness: true
body: |
bb.0:
; NO-FP16-LABEL: name: fp16
- ; NO-FP16: %cst:_(s16) = G_CONSTANT i16 0
+ ; NO-FP16: %cst:_(s16) = G_FCONSTANT half 0xH0000
; NO-FP16-NEXT: $h0 = COPY %cst(s16)
; NO-FP16-NEXT: RET_ReallyLR implicit $h0
;
@@ -26,7 +26,7 @@ tracksRegLiveness: true
body: |
bb.0:
; NO-FP16-LABEL: name: fp16_non_zero
- ; NO-FP16: %cst:_(s16) = G_CONSTANT i16 16384
+ ; NO-FP16: %cst:_(s16) = G_FCONSTANT half 0xH4000
; NO-FP16-NEXT: $h0 = COPY %cst(s16)
; NO-FP16-NEXT: RET_ReallyLR implicit $h0
;
@@ -44,7 +44,7 @@ tracksRegLiveness: true
body: |
bb.1.entry:
; NO-FP16-LABEL: name: nan
- ; NO-FP16: %cst:_(s16) = G_CONSTANT i16 31745
+ ; NO-FP16: %cst:_(s16) = G_FCONSTANT half 0xH7C01
; NO-FP16-NEXT: %ext:_(s32) = G_FPEXT %cst(s16)
; NO-FP16-NEXT: $w0 = COPY %ext(s32)
; NO-FP16-NEXT: RET_ReallyLR implicit $w0
diff --git a/llvm/test/CodeGen/AArch64/arm64-indexed-memory.ll b/llvm/test/CodeGen/AArch64/arm64-indexed-memory.ll
index cb5df07..322a96a 100644
--- a/llvm/test/CodeGen/AArch64/arm64-indexed-memory.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-indexed-memory.ll
@@ -739,15 +739,14 @@ define ptr @postidx32_shalf(ptr %src, ptr %out, half %a) {
;
; GISEL-LABEL: postidx32_shalf:
; GISEL: ; %bb.0:
-; GISEL-NEXT: mov w8, #0 ; =0x0
-; GISEL-NEXT: ldr h1, [x0], #4
-; GISEL-NEXT: fmov s2, w8
+; GISEL-NEXT: movi d1, #0000000000000000
+; GISEL-NEXT: ldr h2, [x0], #4
; GISEL-NEXT: ; kill: def $h0 killed $h0 def $s0
; GISEL-NEXT: fmov w9, s0
-; GISEL-NEXT: fcvt s3, h1
-; GISEL-NEXT: fmov w8, s1
-; GISEL-NEXT: fcvt s2, h2
-; GISEL-NEXT: fcmp s3, s2
+; GISEL-NEXT: fcvt s3, h2
+; GISEL-NEXT: fmov w8, s2
+; GISEL-NEXT: fcvt s1, h1
+; GISEL-NEXT: fcmp s3, s1
; GISEL-NEXT: csel w8, w8, w9, mi
; GISEL-NEXT: strh w8, [x1]
; GISEL-NEXT: ret
diff --git a/llvm/test/CodeGen/AArch64/arm64-saddlp1d-uaddlp1d.mir b/llvm/test/CodeGen/AArch64/arm64-saddlp1d-uaddlp1d.mir
new file mode 100644
index 0000000..074f75a
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/arm64-saddlp1d-uaddlp1d.mir
@@ -0,0 +1,50 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 6
+# RUN: llc -mtriple=aarch64 -run-pass=regbankselect -verify-machineinstrs %s -o - | FileCheck %s
+
+---
+name: saddlp1d
+legalized: true
+regBankSelected: false
+tracksRegLiveness: true
+body: |
+ bb.1.entry:
+ liveins: $x0
+
+ ; CHECK-LABEL: name: saddlp1d
+ ; CHECK: liveins: $x0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr(p0) = COPY $x0
+ ; CHECK-NEXT: [[LOAD:%[0-9]+]]:fpr(<2 x s32>) = G_LOAD [[COPY]](p0) :: (load (<2 x s32>))
+ ; CHECK-NEXT: [[SADDLP:%[0-9]+]]:fpr(s64) = G_SADDLP [[LOAD]]
+ ; CHECK-NEXT: $d0 = COPY [[SADDLP]](s64)
+ ; CHECK-NEXT: RET_ReallyLR implicit $d0
+ %0:_(p0) = COPY $x0
+ %1:_(<2 x s32>) = G_LOAD %0(p0) :: (load (<2 x s32>))
+ %2:_(s64) = G_SADDLP %1
+ $d0 = COPY %2(s64)
+ RET_ReallyLR implicit $d0
+...
+---
+name: uaddlp1d
+legalized: true
+regBankSelected: false
+failedISel: false
+tracksRegLiveness: true
+body: |
+ bb.1.entry:
+ liveins: $x0
+
+ ; CHECK-LABEL: name: uaddlp1d
+ ; CHECK: liveins: $x0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr(p0) = COPY $x0
+ ; CHECK-NEXT: [[LOAD:%[0-9]+]]:fpr(<2 x s32>) = G_LOAD [[COPY]](p0) :: (load (<2 x s32>))
+ ; CHECK-NEXT: [[UADDLP:%[0-9]+]]:fpr(s64) = G_UADDLP [[LOAD]]
+ ; CHECK-NEXT: $d0 = COPY [[UADDLP]](s64)
+ ; CHECK-NEXT: RET_ReallyLR implicit $d0
+ %0:_(p0) = COPY $x0
+ %1:_(<2 x s32>) = G_LOAD %0(p0) :: (load (<2 x s32>))
+ %2:_(s64) = G_UADDLP %1
+ $d0 = COPY %2(s64)
+ RET_ReallyLR implicit $d0
+...
diff --git a/llvm/test/CodeGen/AArch64/arm64-vadd.ll b/llvm/test/CodeGen/AArch64/arm64-vadd.ll
index 938712a..3cf0115 100644
--- a/llvm/test/CodeGen/AArch64/arm64-vadd.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-vadd.ll
@@ -1,9 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
; RUN: llc < %s -mtriple=arm64-eabi | FileCheck %s --check-prefixes=CHECK,CHECK-SD
-; RUN: llc < %s -mtriple=arm64-eabi -global-isel -global-isel-abort=2 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI
-
-; CHECK-GI: warning: Instruction selection used fallback path for saddlp1d
-; CHECK-GI-NEXT: warning: Instruction selection used fallback path for uaddlp1d
+; RUN: llc < %s -mtriple=arm64-eabi -global-isel | FileCheck %s --check-prefixes=CHECK,CHECK-GI
define <8 x i8> @addhn8b(ptr %A, ptr %B) nounwind {
; CHECK-LABEL: addhn8b:
diff --git a/llvm/test/CodeGen/AArch64/f16-instructions.ll b/llvm/test/CodeGen/AArch64/f16-instructions.ll
index adc536d..b234ef7 100644
--- a/llvm/test/CodeGen/AArch64/f16-instructions.ll
+++ b/llvm/test/CodeGen/AArch64/f16-instructions.ll
@@ -782,18 +782,19 @@ define void @test_fccmp(half %in, ptr %out) {
;
; CHECK-CVT-GI-LABEL: test_fccmp:
; CHECK-CVT-GI: // %bb.0:
-; CHECK-CVT-GI-NEXT: mov w8, #17664 // =0x4500
-; CHECK-CVT-GI-NEXT: mov w9, #18432 // =0x4800
+; CHECK-CVT-GI-NEXT: adrp x8, .LCPI29_0
; CHECK-CVT-GI-NEXT: // kill: def $h0 killed $h0 def $s0
; CHECK-CVT-GI-NEXT: fcvt s2, h0
-; CHECK-CVT-GI-NEXT: fmov s1, w8
-; CHECK-CVT-GI-NEXT: fmov s3, w9
-; CHECK-CVT-GI-NEXT: fmov w9, s0
-; CHECK-CVT-GI-NEXT: fcvt s1, h1
-; CHECK-CVT-GI-NEXT: fcvt s3, h3
-; CHECK-CVT-GI-NEXT: fcmp s2, s1
-; CHECK-CVT-GI-NEXT: fccmp s2, s3, #4, mi
-; CHECK-CVT-GI-NEXT: csel w8, w9, w8, gt
+; CHECK-CVT-GI-NEXT: ldr h1, [x8, :lo12:.LCPI29_0]
+; CHECK-CVT-GI-NEXT: adrp x8, .LCPI29_1
+; CHECK-CVT-GI-NEXT: ldr h4, [x8, :lo12:.LCPI29_1]
+; CHECK-CVT-GI-NEXT: fmov w8, s0
+; CHECK-CVT-GI-NEXT: fcvt s3, h1
+; CHECK-CVT-GI-NEXT: fmov w9, s1
+; CHECK-CVT-GI-NEXT: fcvt s4, h4
+; CHECK-CVT-GI-NEXT: fcmp s2, s3
+; CHECK-CVT-GI-NEXT: fccmp s2, s4, #4, mi
+; CHECK-CVT-GI-NEXT: csel w8, w8, w9, gt
; CHECK-CVT-GI-NEXT: strh w8, [x0]
; CHECK-CVT-GI-NEXT: ret
;
diff --git a/llvm/test/CodeGen/AArch64/fcvt-fixed.ll b/llvm/test/CodeGen/AArch64/fcvt-fixed.ll
index 51aad4fe..7409bfb 100644
--- a/llvm/test/CodeGen/AArch64/fcvt-fixed.ll
+++ b/llvm/test/CodeGen/AArch64/fcvt-fixed.ll
@@ -166,9 +166,9 @@ define i32 @fcvtzs_f16_i32_7(half %flt) {
;
; CHECK-GI-NO16-LABEL: fcvtzs_f16_i32_7:
; CHECK-GI-NO16: // %bb.0:
-; CHECK-GI-NO16-NEXT: mov w8, #22528 // =0x5800
+; CHECK-GI-NO16-NEXT: adrp x8, .LCPI8_0
; CHECK-GI-NO16-NEXT: fcvt s0, h0
-; CHECK-GI-NO16-NEXT: fmov s1, w8
+; CHECK-GI-NO16-NEXT: ldr h1, [x8, :lo12:.LCPI8_0]
; CHECK-GI-NO16-NEXT: fcvt s1, h1
; CHECK-GI-NO16-NEXT: fmul s0, s0, s1
; CHECK-GI-NO16-NEXT: fcvt h0, s0
@@ -206,9 +206,9 @@ define i32 @fcvtzs_f16_i32_15(half %flt) {
;
; CHECK-GI-NO16-LABEL: fcvtzs_f16_i32_15:
; CHECK-GI-NO16: // %bb.0:
-; CHECK-GI-NO16-NEXT: mov w8, #30720 // =0x7800
+; CHECK-GI-NO16-NEXT: adrp x8, .LCPI9_0
; CHECK-GI-NO16-NEXT: fcvt s0, h0
-; CHECK-GI-NO16-NEXT: fmov s1, w8
+; CHECK-GI-NO16-NEXT: ldr h1, [x8, :lo12:.LCPI9_0]
; CHECK-GI-NO16-NEXT: fcvt s1, h1
; CHECK-GI-NO16-NEXT: fmul s0, s0, s1
; CHECK-GI-NO16-NEXT: fcvt h0, s0
@@ -246,9 +246,9 @@ define i64 @fcvtzs_f16_i64_7(half %flt) {
;
; CHECK-GI-NO16-LABEL: fcvtzs_f16_i64_7:
; CHECK-GI-NO16: // %bb.0:
-; CHECK-GI-NO16-NEXT: mov w8, #22528 // =0x5800
+; CHECK-GI-NO16-NEXT: adrp x8, .LCPI10_0
; CHECK-GI-NO16-NEXT: fcvt s0, h0
-; CHECK-GI-NO16-NEXT: fmov s1, w8
+; CHECK-GI-NO16-NEXT: ldr h1, [x8, :lo12:.LCPI10_0]
; CHECK-GI-NO16-NEXT: fcvt s1, h1
; CHECK-GI-NO16-NEXT: fmul s0, s0, s1
; CHECK-GI-NO16-NEXT: fcvt h0, s0
@@ -286,9 +286,9 @@ define i64 @fcvtzs_f16_i64_15(half %flt) {
;
; CHECK-GI-NO16-LABEL: fcvtzs_f16_i64_15:
; CHECK-GI-NO16: // %bb.0:
-; CHECK-GI-NO16-NEXT: mov w8, #30720 // =0x7800
+; CHECK-GI-NO16-NEXT: adrp x8, .LCPI11_0
; CHECK-GI-NO16-NEXT: fcvt s0, h0
-; CHECK-GI-NO16-NEXT: fmov s1, w8
+; CHECK-GI-NO16-NEXT: ldr h1, [x8, :lo12:.LCPI11_0]
; CHECK-GI-NO16-NEXT: fcvt s1, h1
; CHECK-GI-NO16-NEXT: fmul s0, s0, s1
; CHECK-GI-NO16-NEXT: fcvt h0, s0
@@ -470,9 +470,9 @@ define i32 @fcvtzu_f16_i32_7(half %flt) {
;
; CHECK-GI-NO16-LABEL: fcvtzu_f16_i32_7:
; CHECK-GI-NO16: // %bb.0:
-; CHECK-GI-NO16-NEXT: mov w8, #22528 // =0x5800
+; CHECK-GI-NO16-NEXT: adrp x8, .LCPI20_0
; CHECK-GI-NO16-NEXT: fcvt s0, h0
-; CHECK-GI-NO16-NEXT: fmov s1, w8
+; CHECK-GI-NO16-NEXT: ldr h1, [x8, :lo12:.LCPI20_0]
; CHECK-GI-NO16-NEXT: fcvt s1, h1
; CHECK-GI-NO16-NEXT: fmul s0, s0, s1
; CHECK-GI-NO16-NEXT: fcvt h0, s0
@@ -510,9 +510,9 @@ define i32 @fcvtzu_f16_i32_15(half %flt) {
;
; CHECK-GI-NO16-LABEL: fcvtzu_f16_i32_15:
; CHECK-GI-NO16: // %bb.0:
-; CHECK-GI-NO16-NEXT: mov w8, #30720 // =0x7800
+; CHECK-GI-NO16-NEXT: adrp x8, .LCPI21_0
; CHECK-GI-NO16-NEXT: fcvt s0, h0
-; CHECK-GI-NO16-NEXT: fmov s1, w8
+; CHECK-GI-NO16-NEXT: ldr h1, [x8, :lo12:.LCPI21_0]
; CHECK-GI-NO16-NEXT: fcvt s1, h1
; CHECK-GI-NO16-NEXT: fmul s0, s0, s1
; CHECK-GI-NO16-NEXT: fcvt h0, s0
@@ -550,9 +550,9 @@ define i64 @fcvtzu_f16_i64_7(half %flt) {
;
; CHECK-GI-NO16-LABEL: fcvtzu_f16_i64_7:
; CHECK-GI-NO16: // %bb.0:
-; CHECK-GI-NO16-NEXT: mov w8, #22528 // =0x5800
+; CHECK-GI-NO16-NEXT: adrp x8, .LCPI22_0
; CHECK-GI-NO16-NEXT: fcvt s0, h0
-; CHECK-GI-NO16-NEXT: fmov s1, w8
+; CHECK-GI-NO16-NEXT: ldr h1, [x8, :lo12:.LCPI22_0]
; CHECK-GI-NO16-NEXT: fcvt s1, h1
; CHECK-GI-NO16-NEXT: fmul s0, s0, s1
; CHECK-GI-NO16-NEXT: fcvt h0, s0
@@ -590,9 +590,9 @@ define i64 @fcvtzu_f16_i64_15(half %flt) {
;
; CHECK-GI-NO16-LABEL: fcvtzu_f16_i64_15:
; CHECK-GI-NO16: // %bb.0:
-; CHECK-GI-NO16-NEXT: mov w8, #30720 // =0x7800
+; CHECK-GI-NO16-NEXT: adrp x8, .LCPI23_0
; CHECK-GI-NO16-NEXT: fcvt s0, h0
-; CHECK-GI-NO16-NEXT: fmov s1, w8
+; CHECK-GI-NO16-NEXT: ldr h1, [x8, :lo12:.LCPI23_0]
; CHECK-GI-NO16-NEXT: fcvt s1, h1
; CHECK-GI-NO16-NEXT: fmul s0, s0, s1
; CHECK-GI-NO16-NEXT: fcvt h0, s0
@@ -775,10 +775,10 @@ define half @scvtf_f16_i32_7(i32 %int) {
; CHECK-GI-NO16-LABEL: scvtf_f16_i32_7:
; CHECK-GI-NO16: // %bb.0:
; CHECK-GI-NO16-NEXT: scvtf s0, w0
-; CHECK-GI-NO16-NEXT: mov w8, #22528 // =0x5800
-; CHECK-GI-NO16-NEXT: fmov s1, w8
-; CHECK-GI-NO16-NEXT: fcvt h0, s0
+; CHECK-GI-NO16-NEXT: adrp x8, .LCPI32_0
+; CHECK-GI-NO16-NEXT: ldr h1, [x8, :lo12:.LCPI32_0]
; CHECK-GI-NO16-NEXT: fcvt s1, h1
+; CHECK-GI-NO16-NEXT: fcvt h0, s0
; CHECK-GI-NO16-NEXT: fcvt s0, h0
; CHECK-GI-NO16-NEXT: fdiv s0, s0, s1
; CHECK-GI-NO16-NEXT: fcvt h0, s0
@@ -815,10 +815,10 @@ define half @scvtf_f16_i32_15(i32 %int) {
; CHECK-GI-NO16-LABEL: scvtf_f16_i32_15:
; CHECK-GI-NO16: // %bb.0:
; CHECK-GI-NO16-NEXT: scvtf s0, w0
-; CHECK-GI-NO16-NEXT: mov w8, #30720 // =0x7800
-; CHECK-GI-NO16-NEXT: fmov s1, w8
-; CHECK-GI-NO16-NEXT: fcvt h0, s0
+; CHECK-GI-NO16-NEXT: adrp x8, .LCPI33_0
+; CHECK-GI-NO16-NEXT: ldr h1, [x8, :lo12:.LCPI33_0]
; CHECK-GI-NO16-NEXT: fcvt s1, h1
+; CHECK-GI-NO16-NEXT: fcvt h0, s0
; CHECK-GI-NO16-NEXT: fcvt s0, h0
; CHECK-GI-NO16-NEXT: fdiv s0, s0, s1
; CHECK-GI-NO16-NEXT: fcvt h0, s0
@@ -855,10 +855,10 @@ define half @scvtf_f16_i64_7(i64 %long) {
; CHECK-GI-NO16-LABEL: scvtf_f16_i64_7:
; CHECK-GI-NO16: // %bb.0:
; CHECK-GI-NO16-NEXT: scvtf s0, x0
-; CHECK-GI-NO16-NEXT: mov w8, #22528 // =0x5800
-; CHECK-GI-NO16-NEXT: fmov s1, w8
-; CHECK-GI-NO16-NEXT: fcvt h0, s0
+; CHECK-GI-NO16-NEXT: adrp x8, .LCPI34_0
+; CHECK-GI-NO16-NEXT: ldr h1, [x8, :lo12:.LCPI34_0]
; CHECK-GI-NO16-NEXT: fcvt s1, h1
+; CHECK-GI-NO16-NEXT: fcvt h0, s0
; CHECK-GI-NO16-NEXT: fcvt s0, h0
; CHECK-GI-NO16-NEXT: fdiv s0, s0, s1
; CHECK-GI-NO16-NEXT: fcvt h0, s0
@@ -895,10 +895,10 @@ define half @scvtf_f16_i64_15(i64 %long) {
; CHECK-GI-NO16-LABEL: scvtf_f16_i64_15:
; CHECK-GI-NO16: // %bb.0:
; CHECK-GI-NO16-NEXT: scvtf s0, x0
-; CHECK-GI-NO16-NEXT: mov w8, #30720 // =0x7800
-; CHECK-GI-NO16-NEXT: fmov s1, w8
-; CHECK-GI-NO16-NEXT: fcvt h0, s0
+; CHECK-GI-NO16-NEXT: adrp x8, .LCPI35_0
+; CHECK-GI-NO16-NEXT: ldr h1, [x8, :lo12:.LCPI35_0]
; CHECK-GI-NO16-NEXT: fcvt s1, h1
+; CHECK-GI-NO16-NEXT: fcvt h0, s0
; CHECK-GI-NO16-NEXT: fcvt s0, h0
; CHECK-GI-NO16-NEXT: fdiv s0, s0, s1
; CHECK-GI-NO16-NEXT: fcvt h0, s0
@@ -1079,10 +1079,10 @@ define half @ucvtf_f16_i32_7(i32 %int) {
; CHECK-GI-NO16-LABEL: ucvtf_f16_i32_7:
; CHECK-GI-NO16: // %bb.0:
; CHECK-GI-NO16-NEXT: ucvtf s0, w0
-; CHECK-GI-NO16-NEXT: mov w8, #22528 // =0x5800
-; CHECK-GI-NO16-NEXT: fmov s1, w8
-; CHECK-GI-NO16-NEXT: fcvt h0, s0
+; CHECK-GI-NO16-NEXT: adrp x8, .LCPI44_0
+; CHECK-GI-NO16-NEXT: ldr h1, [x8, :lo12:.LCPI44_0]
; CHECK-GI-NO16-NEXT: fcvt s1, h1
+; CHECK-GI-NO16-NEXT: fcvt h0, s0
; CHECK-GI-NO16-NEXT: fcvt s0, h0
; CHECK-GI-NO16-NEXT: fdiv s0, s0, s1
; CHECK-GI-NO16-NEXT: fcvt h0, s0
@@ -1119,10 +1119,10 @@ define half @ucvtf_f16_i32_15(i32 %int) {
; CHECK-GI-NO16-LABEL: ucvtf_f16_i32_15:
; CHECK-GI-NO16: // %bb.0:
; CHECK-GI-NO16-NEXT: ucvtf s0, w0
-; CHECK-GI-NO16-NEXT: mov w8, #30720 // =0x7800
-; CHECK-GI-NO16-NEXT: fmov s1, w8
-; CHECK-GI-NO16-NEXT: fcvt h0, s0
+; CHECK-GI-NO16-NEXT: adrp x8, .LCPI45_0
+; CHECK-GI-NO16-NEXT: ldr h1, [x8, :lo12:.LCPI45_0]
; CHECK-GI-NO16-NEXT: fcvt s1, h1
+; CHECK-GI-NO16-NEXT: fcvt h0, s0
; CHECK-GI-NO16-NEXT: fcvt s0, h0
; CHECK-GI-NO16-NEXT: fdiv s0, s0, s1
; CHECK-GI-NO16-NEXT: fcvt h0, s0
@@ -1159,10 +1159,10 @@ define half @ucvtf_f16_i64_7(i64 %long) {
; CHECK-GI-NO16-LABEL: ucvtf_f16_i64_7:
; CHECK-GI-NO16: // %bb.0:
; CHECK-GI-NO16-NEXT: ucvtf s0, x0
-; CHECK-GI-NO16-NEXT: mov w8, #22528 // =0x5800
-; CHECK-GI-NO16-NEXT: fmov s1, w8
-; CHECK-GI-NO16-NEXT: fcvt h0, s0
+; CHECK-GI-NO16-NEXT: adrp x8, .LCPI46_0
+; CHECK-GI-NO16-NEXT: ldr h1, [x8, :lo12:.LCPI46_0]
; CHECK-GI-NO16-NEXT: fcvt s1, h1
+; CHECK-GI-NO16-NEXT: fcvt h0, s0
; CHECK-GI-NO16-NEXT: fcvt s0, h0
; CHECK-GI-NO16-NEXT: fdiv s0, s0, s1
; CHECK-GI-NO16-NEXT: fcvt h0, s0
@@ -1199,10 +1199,10 @@ define half @ucvtf_f16_i64_15(i64 %long) {
; CHECK-GI-NO16-LABEL: ucvtf_f16_i64_15:
; CHECK-GI-NO16: // %bb.0:
; CHECK-GI-NO16-NEXT: ucvtf s0, x0
-; CHECK-GI-NO16-NEXT: mov w8, #30720 // =0x7800
-; CHECK-GI-NO16-NEXT: fmov s1, w8
-; CHECK-GI-NO16-NEXT: fcvt h0, s0
+; CHECK-GI-NO16-NEXT: adrp x8, .LCPI47_0
+; CHECK-GI-NO16-NEXT: ldr h1, [x8, :lo12:.LCPI47_0]
; CHECK-GI-NO16-NEXT: fcvt s1, h1
+; CHECK-GI-NO16-NEXT: fcvt h0, s0
; CHECK-GI-NO16-NEXT: fcvt s0, h0
; CHECK-GI-NO16-NEXT: fdiv s0, s0, s1
; CHECK-GI-NO16-NEXT: fcvt h0, s0
@@ -1373,9 +1373,9 @@ define i32 @fcvtzs_sat_f16_i32_7(half %dbl) {
;
; CHECK-GI-NO16-LABEL: fcvtzs_sat_f16_i32_7:
; CHECK-GI-NO16: // %bb.0:
-; CHECK-GI-NO16-NEXT: mov w8, #22528 // =0x5800
+; CHECK-GI-NO16-NEXT: adrp x8, .LCPI55_0
; CHECK-GI-NO16-NEXT: fcvt s0, h0
-; CHECK-GI-NO16-NEXT: fmov s1, w8
+; CHECK-GI-NO16-NEXT: ldr h1, [x8, :lo12:.LCPI55_0]
; CHECK-GI-NO16-NEXT: fcvt s1, h1
; CHECK-GI-NO16-NEXT: fmul s0, s0, s1
; CHECK-GI-NO16-NEXT: fcvt h0, s0
@@ -1413,9 +1413,9 @@ define i32 @fcvtzs_sat_f16_i32_15(half %dbl) {
;
; CHECK-GI-NO16-LABEL: fcvtzs_sat_f16_i32_15:
; CHECK-GI-NO16: // %bb.0:
-; CHECK-GI-NO16-NEXT: mov w8, #30720 // =0x7800
+; CHECK-GI-NO16-NEXT: adrp x8, .LCPI56_0
; CHECK-GI-NO16-NEXT: fcvt s0, h0
-; CHECK-GI-NO16-NEXT: fmov s1, w8
+; CHECK-GI-NO16-NEXT: ldr h1, [x8, :lo12:.LCPI56_0]
; CHECK-GI-NO16-NEXT: fcvt s1, h1
; CHECK-GI-NO16-NEXT: fmul s0, s0, s1
; CHECK-GI-NO16-NEXT: fcvt h0, s0
@@ -1453,9 +1453,9 @@ define i64 @fcvtzs_sat_f16_i64_7(half %dbl) {
;
; CHECK-GI-NO16-LABEL: fcvtzs_sat_f16_i64_7:
; CHECK-GI-NO16: // %bb.0:
-; CHECK-GI-NO16-NEXT: mov w8, #22528 // =0x5800
+; CHECK-GI-NO16-NEXT: adrp x8, .LCPI57_0
; CHECK-GI-NO16-NEXT: fcvt s0, h0
-; CHECK-GI-NO16-NEXT: fmov s1, w8
+; CHECK-GI-NO16-NEXT: ldr h1, [x8, :lo12:.LCPI57_0]
; CHECK-GI-NO16-NEXT: fcvt s1, h1
; CHECK-GI-NO16-NEXT: fmul s0, s0, s1
; CHECK-GI-NO16-NEXT: fcvt h0, s0
@@ -1493,9 +1493,9 @@ define i64 @fcvtzs_sat_f16_i64_15(half %dbl) {
;
; CHECK-GI-NO16-LABEL: fcvtzs_sat_f16_i64_15:
; CHECK-GI-NO16: // %bb.0:
-; CHECK-GI-NO16-NEXT: mov w8, #30720 // =0x7800
+; CHECK-GI-NO16-NEXT: adrp x8, .LCPI58_0
; CHECK-GI-NO16-NEXT: fcvt s0, h0
-; CHECK-GI-NO16-NEXT: fmov s1, w8
+; CHECK-GI-NO16-NEXT: ldr h1, [x8, :lo12:.LCPI58_0]
; CHECK-GI-NO16-NEXT: fcvt s1, h1
; CHECK-GI-NO16-NEXT: fmul s0, s0, s1
; CHECK-GI-NO16-NEXT: fcvt h0, s0
@@ -1667,9 +1667,9 @@ define i32 @fcvtzu_sat_f16_i32_7(half %dbl) {
;
; CHECK-GI-NO16-LABEL: fcvtzu_sat_f16_i32_7:
; CHECK-GI-NO16: // %bb.0:
-; CHECK-GI-NO16-NEXT: mov w8, #22528 // =0x5800
+; CHECK-GI-NO16-NEXT: adrp x8, .LCPI66_0
; CHECK-GI-NO16-NEXT: fcvt s0, h0
-; CHECK-GI-NO16-NEXT: fmov s1, w8
+; CHECK-GI-NO16-NEXT: ldr h1, [x8, :lo12:.LCPI66_0]
; CHECK-GI-NO16-NEXT: fcvt s1, h1
; CHECK-GI-NO16-NEXT: fmul s0, s0, s1
; CHECK-GI-NO16-NEXT: fcvt h0, s0
@@ -1707,9 +1707,9 @@ define i32 @fcvtzu_sat_f16_i32_15(half %dbl) {
;
; CHECK-GI-NO16-LABEL: fcvtzu_sat_f16_i32_15:
; CHECK-GI-NO16: // %bb.0:
-; CHECK-GI-NO16-NEXT: mov w8, #30720 // =0x7800
+; CHECK-GI-NO16-NEXT: adrp x8, .LCPI67_0
; CHECK-GI-NO16-NEXT: fcvt s0, h0
-; CHECK-GI-NO16-NEXT: fmov s1, w8
+; CHECK-GI-NO16-NEXT: ldr h1, [x8, :lo12:.LCPI67_0]
; CHECK-GI-NO16-NEXT: fcvt s1, h1
; CHECK-GI-NO16-NEXT: fmul s0, s0, s1
; CHECK-GI-NO16-NEXT: fcvt h0, s0
@@ -1747,9 +1747,9 @@ define i64 @fcvtzu_sat_f16_i64_7(half %dbl) {
;
; CHECK-GI-NO16-LABEL: fcvtzu_sat_f16_i64_7:
; CHECK-GI-NO16: // %bb.0:
-; CHECK-GI-NO16-NEXT: mov w8, #22528 // =0x5800
+; CHECK-GI-NO16-NEXT: adrp x8, .LCPI68_0
; CHECK-GI-NO16-NEXT: fcvt s0, h0
-; CHECK-GI-NO16-NEXT: fmov s1, w8
+; CHECK-GI-NO16-NEXT: ldr h1, [x8, :lo12:.LCPI68_0]
; CHECK-GI-NO16-NEXT: fcvt s1, h1
; CHECK-GI-NO16-NEXT: fmul s0, s0, s1
; CHECK-GI-NO16-NEXT: fcvt h0, s0
@@ -1787,9 +1787,9 @@ define i64 @fcvtzu_sat_f16_i64_15(half %dbl) {
;
; CHECK-GI-NO16-LABEL: fcvtzu_sat_f16_i64_15:
; CHECK-GI-NO16: // %bb.0:
-; CHECK-GI-NO16-NEXT: mov w8, #30720 // =0x7800
+; CHECK-GI-NO16-NEXT: adrp x8, .LCPI69_0
; CHECK-GI-NO16-NEXT: fcvt s0, h0
-; CHECK-GI-NO16-NEXT: fmov s1, w8
+; CHECK-GI-NO16-NEXT: ldr h1, [x8, :lo12:.LCPI69_0]
; CHECK-GI-NO16-NEXT: fcvt s1, h1
; CHECK-GI-NO16-NEXT: fmul s0, s0, s1
; CHECK-GI-NO16-NEXT: fcvt h0, s0
diff --git a/llvm/test/CodeGen/AArch64/fdiv-combine.ll b/llvm/test/CodeGen/AArch64/fdiv-combine.ll
index 91bb8ac..9eacb61 100644
--- a/llvm/test/CodeGen/AArch64/fdiv-combine.ll
+++ b/llvm/test/CodeGen/AArch64/fdiv-combine.ll
@@ -12,22 +12,14 @@
; =>
; recip = 1.0 / D; a * recip; b * recip; c * recip;
define void @three_fdiv_float(float %D, float %a, float %b, float %c) {
-; CHECK-SD-LABEL: three_fdiv_float:
-; CHECK-SD: // %bb.0:
-; CHECK-SD-NEXT: fmov s4, #1.00000000
-; CHECK-SD-NEXT: fdiv s4, s4, s0
-; CHECK-SD-NEXT: fmul s0, s1, s4
-; CHECK-SD-NEXT: fmul s1, s2, s4
-; CHECK-SD-NEXT: fmul s2, s3, s4
-; CHECK-SD-NEXT: b foo_3f
-;
-; CHECK-GI-LABEL: three_fdiv_float:
-; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: fdiv s4, s1, s0
-; CHECK-GI-NEXT: fdiv s1, s2, s0
-; CHECK-GI-NEXT: fdiv s2, s3, s0
-; CHECK-GI-NEXT: fmov s0, s4
-; CHECK-GI-NEXT: b foo_3f
+; CHECK-LABEL: three_fdiv_float:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fmov s4, #1.00000000
+; CHECK-NEXT: fdiv s4, s4, s0
+; CHECK-NEXT: fmul s0, s1, s4
+; CHECK-NEXT: fmul s1, s2, s4
+; CHECK-NEXT: fmul s2, s3, s4
+; CHECK-NEXT: b foo_3f
%div = fdiv arcp float %a, %D
%div1 = fdiv arcp float %b, %D
%div2 = fdiv arcp float %c, %D
@@ -36,22 +28,14 @@ define void @three_fdiv_float(float %D, float %a, float %b, float %c) {
}
define void @three_fdiv_double(double %D, double %a, double %b, double %c) {
-; CHECK-SD-LABEL: three_fdiv_double:
-; CHECK-SD: // %bb.0:
-; CHECK-SD-NEXT: fmov d4, #1.00000000
-; CHECK-SD-NEXT: fdiv d4, d4, d0
-; CHECK-SD-NEXT: fmul d0, d1, d4
-; CHECK-SD-NEXT: fmul d1, d2, d4
-; CHECK-SD-NEXT: fmul d2, d3, d4
-; CHECK-SD-NEXT: b foo_3d
-;
-; CHECK-GI-LABEL: three_fdiv_double:
-; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: fdiv d4, d1, d0
-; CHECK-GI-NEXT: fdiv d1, d2, d0
-; CHECK-GI-NEXT: fdiv d2, d3, d0
-; CHECK-GI-NEXT: fmov d0, d4
-; CHECK-GI-NEXT: b foo_3d
+; CHECK-LABEL: three_fdiv_double:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fmov d4, #1.00000000
+; CHECK-NEXT: fdiv d4, d4, d0
+; CHECK-NEXT: fmul d0, d1, d4
+; CHECK-NEXT: fmul d1, d2, d4
+; CHECK-NEXT: fmul d2, d3, d4
+; CHECK-NEXT: b foo_3d
%div = fdiv arcp double %a, %D
%div1 = fdiv arcp double %b, %D
%div2 = fdiv arcp double %c, %D
@@ -60,22 +44,14 @@ define void @three_fdiv_double(double %D, double %a, double %b, double %c) {
}
define void @three_fdiv_4xfloat(<4 x float> %D, <4 x float> %a, <4 x float> %b, <4 x float> %c) {
-; CHECK-SD-LABEL: three_fdiv_4xfloat:
-; CHECK-SD: // %bb.0:
-; CHECK-SD-NEXT: fmov v4.4s, #1.00000000
-; CHECK-SD-NEXT: fdiv v4.4s, v4.4s, v0.4s
-; CHECK-SD-NEXT: fmul v0.4s, v1.4s, v4.4s
-; CHECK-SD-NEXT: fmul v1.4s, v2.4s, v4.4s
-; CHECK-SD-NEXT: fmul v2.4s, v3.4s, v4.4s
-; CHECK-SD-NEXT: b foo_3_4xf
-;
-; CHECK-GI-LABEL: three_fdiv_4xfloat:
-; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: fdiv v4.4s, v1.4s, v0.4s
-; CHECK-GI-NEXT: fdiv v1.4s, v2.4s, v0.4s
-; CHECK-GI-NEXT: fdiv v2.4s, v3.4s, v0.4s
-; CHECK-GI-NEXT: mov v0.16b, v4.16b
-; CHECK-GI-NEXT: b foo_3_4xf
+; CHECK-LABEL: three_fdiv_4xfloat:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fmov v4.4s, #1.00000000
+; CHECK-NEXT: fdiv v4.4s, v4.4s, v0.4s
+; CHECK-NEXT: fmul v0.4s, v1.4s, v4.4s
+; CHECK-NEXT: fmul v1.4s, v2.4s, v4.4s
+; CHECK-NEXT: fmul v2.4s, v3.4s, v4.4s
+; CHECK-NEXT: b foo_3_4xf
%div = fdiv arcp <4 x float> %a, %D
%div1 = fdiv arcp <4 x float> %b, %D
%div2 = fdiv arcp <4 x float> %c, %D
@@ -84,22 +60,14 @@ define void @three_fdiv_4xfloat(<4 x float> %D, <4 x float> %a, <4 x float> %b,
}
define void @three_fdiv_2xdouble(<2 x double> %D, <2 x double> %a, <2 x double> %b, <2 x double> %c) {
-; CHECK-SD-LABEL: three_fdiv_2xdouble:
-; CHECK-SD: // %bb.0:
-; CHECK-SD-NEXT: fmov v4.2d, #1.00000000
-; CHECK-SD-NEXT: fdiv v4.2d, v4.2d, v0.2d
-; CHECK-SD-NEXT: fmul v0.2d, v1.2d, v4.2d
-; CHECK-SD-NEXT: fmul v1.2d, v2.2d, v4.2d
-; CHECK-SD-NEXT: fmul v2.2d, v3.2d, v4.2d
-; CHECK-SD-NEXT: b foo_3_2xd
-;
-; CHECK-GI-LABEL: three_fdiv_2xdouble:
-; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: fdiv v4.2d, v1.2d, v0.2d
-; CHECK-GI-NEXT: fdiv v1.2d, v2.2d, v0.2d
-; CHECK-GI-NEXT: fdiv v2.2d, v3.2d, v0.2d
-; CHECK-GI-NEXT: mov v0.16b, v4.16b
-; CHECK-GI-NEXT: b foo_3_2xd
+; CHECK-LABEL: three_fdiv_2xdouble:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fmov v4.2d, #1.00000000
+; CHECK-NEXT: fdiv v4.2d, v4.2d, v0.2d
+; CHECK-NEXT: fmul v0.2d, v1.2d, v4.2d
+; CHECK-NEXT: fmul v1.2d, v2.2d, v4.2d
+; CHECK-NEXT: fmul v2.2d, v3.2d, v4.2d
+; CHECK-NEXT: b foo_3_2xd
%div = fdiv arcp <2 x double> %a, %D
%div1 = fdiv arcp <2 x double> %b, %D
%div2 = fdiv arcp <2 x double> %c, %D
@@ -135,26 +103,47 @@ define void @two_fdiv_double(double %D, double %a, double %b) {
ret void
}
-define void @splat_three_fdiv_4xfloat(float %D, <4 x float> %a, <4 x float> %b, <4 x float> %c) {
-; CHECK-SD-LABEL: splat_three_fdiv_4xfloat:
+define void @four_fdiv_multi_float(float %D, float %a, float %b, float %c) #0 {
+; CHECK-SD-LABEL: four_fdiv_multi_float:
; CHECK-SD: // %bb.0:
-; CHECK-SD-NEXT: // kill: def $s0 killed $s0 def $q0
-; CHECK-SD-NEXT: fmov v4.4s, #1.00000000
-; CHECK-SD-NEXT: dup v0.4s, v0.s[0]
-; CHECK-SD-NEXT: fdiv v4.4s, v4.4s, v0.4s
-; CHECK-SD-NEXT: fmul v0.4s, v1.4s, v4.4s
-; CHECK-SD-NEXT: fmul v1.4s, v2.4s, v4.4s
-; CHECK-SD-NEXT: fmul v2.4s, v3.4s, v4.4s
-; CHECK-SD-NEXT: b foo_3_4xf
+; CHECK-SD-NEXT: fmov s4, #1.00000000
+; CHECK-SD-NEXT: fdiv s5, s4, s0
+; CHECK-SD-NEXT: fmul s4, s1, s5
+; CHECK-SD-NEXT: fmul s1, s2, s5
+; CHECK-SD-NEXT: fmul s2, s3, s5
+; CHECK-SD-NEXT: fmul s3, s0, s5
+; CHECK-SD-NEXT: fmov s0, s4
+; CHECK-SD-NEXT: b foo_4f
;
-; CHECK-GI-LABEL: splat_three_fdiv_4xfloat:
+; CHECK-GI-LABEL: four_fdiv_multi_float:
; CHECK-GI: // %bb.0:
-; CHECK-GI-NEXT: // kill: def $s0 killed $s0 def $q0
-; CHECK-GI-NEXT: dup v4.4s, v0.s[0]
-; CHECK-GI-NEXT: fdiv v0.4s, v1.4s, v4.4s
-; CHECK-GI-NEXT: fdiv v1.4s, v2.4s, v4.4s
-; CHECK-GI-NEXT: fdiv v2.4s, v3.4s, v4.4s
-; CHECK-GI-NEXT: b foo_3_4xf
+; CHECK-GI-NEXT: fmov s4, #1.00000000
+; CHECK-GI-NEXT: fdiv s5, s4, s0
+; CHECK-GI-NEXT: fdiv s4, s0, s0
+; CHECK-GI-NEXT: fmul s0, s1, s5
+; CHECK-GI-NEXT: fmul s1, s2, s5
+; CHECK-GI-NEXT: fmul s2, s3, s5
+; CHECK-GI-NEXT: fmov s3, s4
+; CHECK-GI-NEXT: b foo_4f
+ %div = fdiv arcp float %a, %D
+ %div1 = fdiv arcp float %b, %D
+ %div2 = fdiv arcp float %c, %D
+ %div3 = fdiv arcp float %D, %D
+ tail call void @foo_4f(float %div, float %div1, float %div2, float %div3)
+ ret void
+}
+
+define void @splat_three_fdiv_4xfloat(float %D, <4 x float> %a, <4 x float> %b, <4 x float> %c) {
+; CHECK-LABEL: splat_three_fdiv_4xfloat:
+; CHECK: // %bb.0:
+; CHECK-NEXT: // kill: def $s0 killed $s0 def $q0
+; CHECK-NEXT: fmov v4.4s, #1.00000000
+; CHECK-NEXT: dup v0.4s, v0.s[0]
+; CHECK-NEXT: fdiv v4.4s, v4.4s, v0.4s
+; CHECK-NEXT: fmul v0.4s, v1.4s, v4.4s
+; CHECK-NEXT: fmul v1.4s, v2.4s, v4.4s
+; CHECK-NEXT: fmul v2.4s, v3.4s, v4.4s
+; CHECK-NEXT: b foo_3_4xf
%D.ins = insertelement <4 x float> poison, float %D, i64 0
%splat = shufflevector <4 x float> %D.ins, <4 x float> poison, <4 x i32> zeroinitializer
%div = fdiv arcp <4 x float> %a, %splat
@@ -256,6 +245,7 @@ entry:
}
declare void @foo_3f(float, float, float)
+declare void @foo_4f(float, float, float, float)
declare void @foo_3d(double, double, double)
declare void @foo_3_4xf(<4 x float>, <4 x float>, <4 x float>)
declare void @foo_3_2xd(<2 x double>, <2 x double>, <2 x double>)
diff --git a/llvm/test/CodeGen/AArch64/vecreduce-fadd-strict.ll b/llvm/test/CodeGen/AArch64/vecreduce-fadd-strict.ll
index 594a3ab..be07978 100644
--- a/llvm/test/CodeGen/AArch64/vecreduce-fadd-strict.ll
+++ b/llvm/test/CodeGen/AArch64/vecreduce-fadd-strict.ll
@@ -38,10 +38,10 @@ define half @add_v2HalfH(<2 x half> %bin.rdx) {
;
; CHECK-GI-NOFP16-LABEL: add_v2HalfH:
; CHECK-GI-NOFP16: // %bb.0:
-; CHECK-GI-NOFP16-NEXT: mov w8, #32768 // =0x8000
+; CHECK-GI-NOFP16-NEXT: adrp x8, .LCPI1_0
; CHECK-GI-NOFP16-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-GI-NOFP16-NEXT: fcvt s2, h0
-; CHECK-GI-NOFP16-NEXT: fmov s1, w8
+; CHECK-GI-NOFP16-NEXT: ldr h1, [x8, :lo12:.LCPI1_0]
; CHECK-GI-NOFP16-NEXT: mov h0, v0.h[1]
; CHECK-GI-NOFP16-NEXT: fcvt s1, h1
; CHECK-GI-NOFP16-NEXT: fcvt s0, h0
@@ -88,10 +88,10 @@ define half @add_v3HalfH(<3 x half> %bin.rdx) {
;
; CHECK-GI-NOFP16-LABEL: add_v3HalfH:
; CHECK-GI-NOFP16: // %bb.0:
-; CHECK-GI-NOFP16-NEXT: mov w8, #32768 // =0x8000
+; CHECK-GI-NOFP16-NEXT: adrp x8, .LCPI2_0
; CHECK-GI-NOFP16-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-GI-NOFP16-NEXT: fcvt s2, h0
-; CHECK-GI-NOFP16-NEXT: fmov s1, w8
+; CHECK-GI-NOFP16-NEXT: ldr h1, [x8, :lo12:.LCPI2_0]
; CHECK-GI-NOFP16-NEXT: fcvt s1, h1
; CHECK-GI-NOFP16-NEXT: fadd s1, s1, s2
; CHECK-GI-NOFP16-NEXT: mov h2, v0.h[1]
@@ -152,10 +152,10 @@ define half @add_HalfH(<4 x half> %bin.rdx) {
;
; CHECK-GI-NOFP16-LABEL: add_HalfH:
; CHECK-GI-NOFP16: // %bb.0:
-; CHECK-GI-NOFP16-NEXT: mov w8, #32768 // =0x8000
+; CHECK-GI-NOFP16-NEXT: adrp x8, .LCPI3_0
; CHECK-GI-NOFP16-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-GI-NOFP16-NEXT: fcvt s2, h0
-; CHECK-GI-NOFP16-NEXT: fmov s1, w8
+; CHECK-GI-NOFP16-NEXT: ldr h1, [x8, :lo12:.LCPI3_0]
; CHECK-GI-NOFP16-NEXT: fcvt s1, h1
; CHECK-GI-NOFP16-NEXT: fadd s1, s1, s2
; CHECK-GI-NOFP16-NEXT: mov h2, v0.h[1]
@@ -250,9 +250,9 @@ define half @add_H(<8 x half> %bin.rdx) {
;
; CHECK-GI-NOFP16-LABEL: add_H:
; CHECK-GI-NOFP16: // %bb.0:
-; CHECK-GI-NOFP16-NEXT: mov w8, #32768 // =0x8000
+; CHECK-GI-NOFP16-NEXT: adrp x8, .LCPI4_0
; CHECK-GI-NOFP16-NEXT: fcvt s2, h0
-; CHECK-GI-NOFP16-NEXT: fmov s1, w8
+; CHECK-GI-NOFP16-NEXT: ldr h1, [x8, :lo12:.LCPI4_0]
; CHECK-GI-NOFP16-NEXT: fcvt s1, h1
; CHECK-GI-NOFP16-NEXT: fadd s1, s1, s2
; CHECK-GI-NOFP16-NEXT: mov h2, v0.h[1]
@@ -448,9 +448,9 @@ define half @add_2H(<16 x half> %bin.rdx) {
;
; CHECK-GI-NOFP16-LABEL: add_2H:
; CHECK-GI-NOFP16: // %bb.0:
-; CHECK-GI-NOFP16-NEXT: mov w8, #32768 // =0x8000
+; CHECK-GI-NOFP16-NEXT: adrp x8, .LCPI7_0
; CHECK-GI-NOFP16-NEXT: fcvt s3, h0
-; CHECK-GI-NOFP16-NEXT: fmov s2, w8
+; CHECK-GI-NOFP16-NEXT: ldr h2, [x8, :lo12:.LCPI7_0]
; CHECK-GI-NOFP16-NEXT: fcvt s2, h2
; CHECK-GI-NOFP16-NEXT: fadd s2, s2, s3
; CHECK-GI-NOFP16-NEXT: mov h3, v0.h[1]
diff --git a/llvm/test/CodeGen/AArch64/vecreduce-fadd.ll b/llvm/test/CodeGen/AArch64/vecreduce-fadd.ll
index 18f463c..40925da 100644
--- a/llvm/test/CodeGen/AArch64/vecreduce-fadd.ll
+++ b/llvm/test/CodeGen/AArch64/vecreduce-fadd.ll
@@ -405,26 +405,23 @@ define half @fadd_reduction_v4f16_in_loop(ptr %ptr.start) {
;
; CHECK-GI-NOFP16-LABEL: fadd_reduction_v4f16_in_loop:
; CHECK-GI-NOFP16: // %bb.0: // %entry
+; CHECK-GI-NOFP16-NEXT: movi d0, #0000000000000000
; CHECK-GI-NOFP16-NEXT: mov x8, xzr
-; CHECK-GI-NOFP16-NEXT: mov w9, #0 // =0x0
; CHECK-GI-NOFP16-NEXT: .LBB13_1: // %loop
; CHECK-GI-NOFP16-NEXT: // =>This Inner Loop Header: Depth=1
-; CHECK-GI-NOFP16-NEXT: ldr d0, [x0, x8]
-; CHECK-GI-NOFP16-NEXT: fmov s1, w9
+; CHECK-GI-NOFP16-NEXT: ldr d1, [x0, x8]
+; CHECK-GI-NOFP16-NEXT: fcvt s0, h0
; CHECK-GI-NOFP16-NEXT: add x8, x8, #8
; CHECK-GI-NOFP16-NEXT: cmp w8, #56
-; CHECK-GI-NOFP16-NEXT: fcvtl v0.4s, v0.4h
+; CHECK-GI-NOFP16-NEXT: fcvtl v1.4s, v1.4h
+; CHECK-GI-NOFP16-NEXT: faddp v1.4s, v1.4s, v1.4s
+; CHECK-GI-NOFP16-NEXT: faddp s1, v1.2s
+; CHECK-GI-NOFP16-NEXT: fcvt h1, s1
; CHECK-GI-NOFP16-NEXT: fcvt s1, h1
-; CHECK-GI-NOFP16-NEXT: faddp v0.4s, v0.4s, v0.4s
-; CHECK-GI-NOFP16-NEXT: faddp s0, v0.2s
-; CHECK-GI-NOFP16-NEXT: fcvt h0, s0
-; CHECK-GI-NOFP16-NEXT: fcvt s0, h0
-; CHECK-GI-NOFP16-NEXT: fadd s0, s0, s1
+; CHECK-GI-NOFP16-NEXT: fadd s0, s1, s0
; CHECK-GI-NOFP16-NEXT: fcvt h0, s0
-; CHECK-GI-NOFP16-NEXT: fmov w9, s0
; CHECK-GI-NOFP16-NEXT: b.ne .LBB13_1
; CHECK-GI-NOFP16-NEXT: // %bb.2: // %exit
-; CHECK-GI-NOFP16-NEXT: // kill: def $h0 killed $h0 killed $s0
; CHECK-GI-NOFP16-NEXT: ret
;
; CHECK-GI-FP16-LABEL: fadd_reduction_v4f16_in_loop:
@@ -521,28 +518,25 @@ define half @fadd_reduction_v8f16_in_loop(ptr %ptr.start) {
;
; CHECK-GI-NOFP16-LABEL: fadd_reduction_v8f16_in_loop:
; CHECK-GI-NOFP16: // %bb.0: // %entry
+; CHECK-GI-NOFP16-NEXT: movi d0, #0000000000000000
; CHECK-GI-NOFP16-NEXT: mov x8, xzr
-; CHECK-GI-NOFP16-NEXT: mov w9, #0 // =0x0
; CHECK-GI-NOFP16-NEXT: .LBB14_1: // %loop
; CHECK-GI-NOFP16-NEXT: // =>This Inner Loop Header: Depth=1
-; CHECK-GI-NOFP16-NEXT: ldr q0, [x0, x8]
+; CHECK-GI-NOFP16-NEXT: ldr q1, [x0, x8]
+; CHECK-GI-NOFP16-NEXT: fcvt s0, h0
; CHECK-GI-NOFP16-NEXT: add x8, x8, #8
; CHECK-GI-NOFP16-NEXT: cmp w8, #56
-; CHECK-GI-NOFP16-NEXT: fcvtl v1.4s, v0.4h
-; CHECK-GI-NOFP16-NEXT: fcvtl2 v0.4s, v0.8h
-; CHECK-GI-NOFP16-NEXT: fadd v0.4s, v1.4s, v0.4s
-; CHECK-GI-NOFP16-NEXT: fmov s1, w9
+; CHECK-GI-NOFP16-NEXT: fcvtl v2.4s, v1.4h
+; CHECK-GI-NOFP16-NEXT: fcvtl2 v1.4s, v1.8h
+; CHECK-GI-NOFP16-NEXT: fadd v1.4s, v2.4s, v1.4s
+; CHECK-GI-NOFP16-NEXT: faddp v1.4s, v1.4s, v1.4s
+; CHECK-GI-NOFP16-NEXT: faddp s1, v1.2s
+; CHECK-GI-NOFP16-NEXT: fcvt h1, s1
; CHECK-GI-NOFP16-NEXT: fcvt s1, h1
-; CHECK-GI-NOFP16-NEXT: faddp v0.4s, v0.4s, v0.4s
-; CHECK-GI-NOFP16-NEXT: faddp s0, v0.2s
-; CHECK-GI-NOFP16-NEXT: fcvt h0, s0
-; CHECK-GI-NOFP16-NEXT: fcvt s0, h0
-; CHECK-GI-NOFP16-NEXT: fadd s0, s0, s1
+; CHECK-GI-NOFP16-NEXT: fadd s0, s1, s0
; CHECK-GI-NOFP16-NEXT: fcvt h0, s0
-; CHECK-GI-NOFP16-NEXT: fmov w9, s0
; CHECK-GI-NOFP16-NEXT: b.ne .LBB14_1
; CHECK-GI-NOFP16-NEXT: // %bb.2: // %exit
-; CHECK-GI-NOFP16-NEXT: // kill: def $h0 killed $h0 killed $s0
; CHECK-GI-NOFP16-NEXT: ret
;
; CHECK-GI-FP16-LABEL: fadd_reduction_v8f16_in_loop:
diff --git a/llvm/test/CodeGen/AArch64/vecreduce-fmul-strict.ll b/llvm/test/CodeGen/AArch64/vecreduce-fmul-strict.ll
index e1b2170..c10d6e9 100644
--- a/llvm/test/CodeGen/AArch64/vecreduce-fmul-strict.ll
+++ b/llvm/test/CodeGen/AArch64/vecreduce-fmul-strict.ll
@@ -52,10 +52,10 @@ define half @mul_HalfH(<4 x half> %bin.rdx) {
;
; CHECK-GI-NOFP16-LABEL: mul_HalfH:
; CHECK-GI-NOFP16: // %bb.0:
-; CHECK-GI-NOFP16-NEXT: mov w8, #15360 // =0x3c00
+; CHECK-GI-NOFP16-NEXT: adrp x8, .LCPI1_0
; CHECK-GI-NOFP16-NEXT: // kill: def $d0 killed $d0 def $q0
; CHECK-GI-NOFP16-NEXT: fcvt s2, h0
-; CHECK-GI-NOFP16-NEXT: fmov s1, w8
+; CHECK-GI-NOFP16-NEXT: ldr h1, [x8, :lo12:.LCPI1_0]
; CHECK-GI-NOFP16-NEXT: fcvt s1, h1
; CHECK-GI-NOFP16-NEXT: fmul s1, s1, s2
; CHECK-GI-NOFP16-NEXT: mov h2, v0.h[1]
@@ -144,9 +144,9 @@ define half @mul_H(<8 x half> %bin.rdx) {
;
; CHECK-GI-NOFP16-LABEL: mul_H:
; CHECK-GI-NOFP16: // %bb.0:
-; CHECK-GI-NOFP16-NEXT: mov w8, #15360 // =0x3c00
+; CHECK-GI-NOFP16-NEXT: adrp x8, .LCPI2_0
; CHECK-GI-NOFP16-NEXT: fcvt s2, h0
-; CHECK-GI-NOFP16-NEXT: fmov s1, w8
+; CHECK-GI-NOFP16-NEXT: ldr h1, [x8, :lo12:.LCPI2_0]
; CHECK-GI-NOFP16-NEXT: fcvt s1, h1
; CHECK-GI-NOFP16-NEXT: fmul s1, s1, s2
; CHECK-GI-NOFP16-NEXT: mov h2, v0.h[1]
@@ -321,9 +321,9 @@ define half @mul_2H(<16 x half> %bin.rdx) {
;
; CHECK-GI-NOFP16-LABEL: mul_2H:
; CHECK-GI-NOFP16: // %bb.0:
-; CHECK-GI-NOFP16-NEXT: mov w8, #15360 // =0x3c00
+; CHECK-GI-NOFP16-NEXT: adrp x8, .LCPI5_0
; CHECK-GI-NOFP16-NEXT: fcvt s3, h0
-; CHECK-GI-NOFP16-NEXT: fmov s2, w8
+; CHECK-GI-NOFP16-NEXT: ldr h2, [x8, :lo12:.LCPI5_0]
; CHECK-GI-NOFP16-NEXT: fcvt s2, h2
; CHECK-GI-NOFP16-NEXT: fmul s2, s2, s3
; CHECK-GI-NOFP16-NEXT: mov h3, v0.h[1]
diff --git a/llvm/test/CodeGen/AMDGPU/addrspacecast-constantexpr.ll b/llvm/test/CodeGen/AMDGPU/addrspacecast-constantexpr.ll
index 2d7ef2c..98fbbe1 100644
--- a/llvm/test/CodeGen/AMDGPU/addrspacecast-constantexpr.ll
+++ b/llvm/test/CodeGen/AMDGPU/addrspacecast-constantexpr.ll
@@ -169,6 +169,6 @@ attributes #1 = { nounwind }
;.
; HSA: attributes #[[ATTR0:[0-9]+]] = { nocallback nofree nounwind willreturn memory(argmem: readwrite) }
-; HSA: attributes #[[ATTR1]] = { nounwind "amdgpu-agpr-alloc"="0" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
-; HSA: attributes #[[ATTR2]] = { nounwind "amdgpu-agpr-alloc"="0" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
+; HSA: attributes #[[ATTR1]] = { nounwind "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
+; HSA: attributes #[[ATTR2]] = { nounwind "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
;.
diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-attributor-min-agpr-alloc.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-attributor-min-agpr-alloc.ll
new file mode 100644
index 0000000..f730199
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/amdgpu-attributor-min-agpr-alloc.ll
@@ -0,0 +1,1066 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-attributes --check-globals all --version 4
+; RUN: opt -S -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx90a -passes=amdgpu-attributor %s | FileCheck %s
+
+; Shrink result attribute list by preventing use of most attributes.
+define internal void @use_most() {
+; CHECK-LABEL: define internal void @use_most(
+; CHECK-SAME: ) #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT: [[ALLOCA:%.*]] = alloca [256 x i8], align 1, addrspace(5)
+; CHECK-NEXT: [[ALLOCA_CAST:%.*]] = addrspacecast ptr addrspace(5) [[ALLOCA]] to ptr
+; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.amdgcn.workitem.id.x()
+; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.amdgcn.workitem.id.y()
+; CHECK-NEXT: [[TMP3:%.*]] = call i32 @llvm.amdgcn.workitem.id.z()
+; CHECK-NEXT: [[TMP4:%.*]] = call i32 @llvm.amdgcn.workgroup.id.x()
+; CHECK-NEXT: [[TMP5:%.*]] = call i32 @llvm.amdgcn.workgroup.id.y()
+; CHECK-NEXT: [[TMP6:%.*]] = call i32 @llvm.amdgcn.workgroup.id.z()
+; CHECK-NEXT: [[TMP10:%.*]] = call i32 @llvm.amdgcn.cluster.id.x()
+; CHECK-NEXT: [[TMP11:%.*]] = call i32 @llvm.amdgcn.cluster.id.y()
+; CHECK-NEXT: [[TMP12:%.*]] = call i32 @llvm.amdgcn.cluster.id.z()
+; CHECK-NEXT: [[TMP7:%.*]] = call ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
+; CHECK-NEXT: [[TMP8:%.*]] = call ptr addrspace(4) @llvm.amdgcn.queue.ptr()
+; CHECK-NEXT: [[TMP9:%.*]] = call i64 @llvm.amdgcn.dispatch.id()
+; CHECK-NEXT: [[TMP13:%.*]] = call i32 @llvm.amdgcn.lds.kernel.id()
+; CHECK-NEXT: [[IMPLICIT_ARG_PTR:%.*]] = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
+; CHECK-NEXT: call void @llvm.memcpy.p0.p4.i64(ptr [[ALLOCA_CAST]], ptr addrspace(4) [[IMPLICIT_ARG_PTR]], i64 256, i1 false)
+; CHECK-NEXT: ret void
+;
+ %alloca = alloca [256 x i8], addrspace(5)
+ %alloca.cast = addrspacecast ptr addrspace(5) %alloca to ptr
+ call i32 @llvm.amdgcn.workitem.id.x()
+ call i32 @llvm.amdgcn.workitem.id.y()
+ call i32 @llvm.amdgcn.workitem.id.z()
+ call i32 @llvm.amdgcn.workgroup.id.x()
+ call i32 @llvm.amdgcn.workgroup.id.y()
+ call i32 @llvm.amdgcn.workgroup.id.z()
+ call i32 @llvm.amdgcn.cluster.id.x()
+ call i32 @llvm.amdgcn.cluster.id.y()
+ call i32 @llvm.amdgcn.cluster.id.z()
+ call ptr addrspace(4) @llvm.amdgcn.dispatch.ptr()
+ call ptr addrspace(4) @llvm.amdgcn.queue.ptr()
+ call i64 @llvm.amdgcn.dispatch.id()
+ call i32 @llvm.amdgcn.lds.kernel.id()
+ %implicit.arg.ptr = call ptr addrspace(4) @llvm.amdgcn.implicitarg.ptr()
+ call void @llvm.memcpy.p0.p4(ptr %alloca.cast, ptr addrspace(4) %implicit.arg.ptr, i64 256, i1 false)
+ ret void
+}
+
+define amdgpu_kernel void @kernel_uses_asm_virtreg() {
+; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_asm_virtreg(
+; CHECK-SAME: ) #[[ATTR1:[0-9]+]] {
+; CHECK-NEXT: call void asm sideeffect "
+; CHECK-NEXT: call void @use_most()
+; CHECK-NEXT: ret void
+;
+ call void asm sideeffect "; use $0", "a"(i32 poison)
+ call void @use_most()
+ ret void
+}
+
+define amdgpu_kernel void @kernel_uses_asm_virtreg_def() {
+; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_asm_virtreg_def(
+; CHECK-SAME: ) #[[ATTR1]] {
+; CHECK-NEXT: [[DEF:%.*]] = call i32 asm sideeffect "
+; CHECK-NEXT: call void @use_most()
+; CHECK-NEXT: ret void
+;
+ %def = call i32 asm sideeffect "; def $0", "=a"()
+ call void @use_most()
+ ret void
+}
+
+define amdgpu_kernel void @kernel_uses_asm_physreg_def_tuple() {
+; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_asm_physreg_def_tuple(
+; CHECK-SAME: ) #[[ATTR2:[0-9]+]] {
+; CHECK-NEXT: [[DEF:%.*]] = call i64 asm sideeffect "
+; CHECK-NEXT: call void @use_most()
+; CHECK-NEXT: ret void
+;
+ %def = call i64 asm sideeffect "; def $0", "={a[0:1]}"()
+ call void @use_most()
+ ret void
+}
+
+define amdgpu_kernel void @kernel_uses_asm_virtreg_second_arg() {
+; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_asm_virtreg_second_arg(
+; CHECK-SAME: ) #[[ATTR1]] {
+; CHECK-NEXT: call void asm sideeffect "
+; CHECK-NEXT: call void @use_most()
+; CHECK-NEXT: ret void
+;
+ call void asm sideeffect "; use $0", "v,a"(i32 poison, i32 poison)
+ call void @use_most()
+ ret void
+}
+
+define amdgpu_kernel void @kernel_uses_non_agpr_asm() {
+; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_non_agpr_asm(
+; CHECK-SAME: ) #[[ATTR0]] {
+; CHECK-NEXT: call void asm sideeffect "
+; CHECK-NEXT: call void @use_most()
+; CHECK-NEXT: ret void
+;
+ call void asm sideeffect "; use $0", "v"(i32 poison)
+ call void @use_most()
+ ret void
+}
+
+define amdgpu_kernel void @kernel_uses_asm_physreg() {
+; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_asm_physreg(
+; CHECK-SAME: ) #[[ATTR1]] {
+; CHECK-NEXT: call void asm sideeffect "
+; CHECK-NEXT: call void @use_most()
+; CHECK-NEXT: ret void
+;
+ call void asm sideeffect "; use $0", "{a0}"(i32 poison)
+ call void @use_most()
+ ret void
+}
+
+define amdgpu_kernel void @kernel_uses_asm_physreg_tuple() {
+; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_asm_physreg_tuple(
+; CHECK-SAME: ) #[[ATTR2]] {
+; CHECK-NEXT: call void asm sideeffect "
+; CHECK-NEXT: call void @use_most()
+; CHECK-NEXT: ret void
+;
+ call void asm sideeffect "; use $0", "{a[0:1]}"(i64 poison)
+ call void @use_most()
+ ret void
+}
+
+define void @func_uses_asm_virtreg_agpr() {
+; CHECK-LABEL: define void @func_uses_asm_virtreg_agpr(
+; CHECK-SAME: ) #[[ATTR1]] {
+; CHECK-NEXT: call void asm sideeffect "
+; CHECK-NEXT: call void @use_most()
+; CHECK-NEXT: ret void
+;
+ call void asm sideeffect "; use $0", "a"(i32 poison)
+ call void @use_most()
+ ret void
+}
+
+define void @func_uses_asm_physreg_agpr() {
+; CHECK-LABEL: define void @func_uses_asm_physreg_agpr(
+; CHECK-SAME: ) #[[ATTR1]] {
+; CHECK-NEXT: call void asm sideeffect "
+; CHECK-NEXT: call void @use_most()
+; CHECK-NEXT: ret void
+;
+ call void asm sideeffect "; use $0", "{a0}"(i32 poison)
+ call void @use_most()
+ ret void
+}
+
+define void @func_uses_asm_physreg_agpr_tuple() {
+; CHECK-LABEL: define void @func_uses_asm_physreg_agpr_tuple(
+; CHECK-SAME: ) #[[ATTR2]] {
+; CHECK-NEXT: call void asm sideeffect "
+; CHECK-NEXT: call void @use_most()
+; CHECK-NEXT: ret void
+;
+ call void asm sideeffect "; use $0", "{a[0:1]}"(i64 poison)
+ call void @use_most()
+ ret void
+}
+
+declare void @unknown()
+
+define amdgpu_kernel void @kernel_calls_extern() {
+; CHECK-LABEL: define amdgpu_kernel void @kernel_calls_extern(
+; CHECK-SAME: ) #[[ATTR3:[0-9]+]] {
+; CHECK-NEXT: call void @unknown()
+; CHECK-NEXT: call void @use_most()
+; CHECK-NEXT: ret void
+;
+ call void @unknown()
+ call void @use_most()
+ ret void
+}
+
+define amdgpu_kernel void @kernel_calls_extern_marked_callsite() {
+; CHECK-LABEL: define amdgpu_kernel void @kernel_calls_extern_marked_callsite(
+; CHECK-SAME: ) #[[ATTR3]] {
+; CHECK-NEXT: call void @unknown() #[[ATTR29:[0-9]+]]
+; CHECK-NEXT: call void @use_most()
+; CHECK-NEXT: ret void
+;
+ call void @unknown() #0
+ call void @use_most()
+ ret void
+}
+
+define amdgpu_kernel void @kernel_calls_indirect(ptr %indirect) {
+; CHECK-LABEL: define amdgpu_kernel void @kernel_calls_indirect(
+; CHECK-SAME: ptr [[INDIRECT:%.*]]) #[[ATTR3]] {
+; CHECK-NEXT: call void [[INDIRECT]]()
+; CHECK-NEXT: call void @use_most()
+; CHECK-NEXT: ret void
+;
+ call void %indirect()
+ call void @use_most()
+ ret void
+}
+
+define amdgpu_kernel void @kernel_calls_indirect_marked_callsite(ptr %indirect) {
+; CHECK-LABEL: define amdgpu_kernel void @kernel_calls_indirect_marked_callsite(
+; CHECK-SAME: ptr [[INDIRECT:%.*]]) #[[ATTR3]] {
+; CHECK-NEXT: call void [[INDIRECT]]() #[[ATTR29]]
+; CHECK-NEXT: call void @use_most()
+; CHECK-NEXT: ret void
+;
+ call void %indirect() #0
+ call void @use_most()
+ ret void
+}
+
+define amdgpu_kernel void @kernel_transitively_uses_agpr_asm() {
+; CHECK-LABEL: define amdgpu_kernel void @kernel_transitively_uses_agpr_asm(
+; CHECK-SAME: ) #[[ATTR1]] {
+; CHECK-NEXT: call void @func_uses_asm_physreg_agpr()
+; CHECK-NEXT: call void @use_most()
+; CHECK-NEXT: ret void
+;
+ call void @func_uses_asm_physreg_agpr()
+ call void @use_most()
+ ret void
+}
+
+define void @empty() {
+; CHECK-LABEL: define void @empty(
+; CHECK-SAME: ) #[[ATTR0]] {
+; CHECK-NEXT: call void @use_most()
+; CHECK-NEXT: ret void
+;
+ call void @use_most()
+ ret void
+}
+
+define void @also_empty() {
+; CHECK-LABEL: define void @also_empty(
+; CHECK-SAME: ) #[[ATTR0]] {
+; CHECK-NEXT: call void @use_most()
+; CHECK-NEXT: ret void
+;
+ call void @use_most()
+ ret void
+}
+
+define amdgpu_kernel void @kernel_calls_empty() {
+; CHECK-LABEL: define amdgpu_kernel void @kernel_calls_empty(
+; CHECK-SAME: ) #[[ATTR0]] {
+; CHECK-NEXT: call void @empty()
+; CHECK-NEXT: call void @use_most()
+; CHECK-NEXT: ret void
+;
+ call void @empty()
+ call void @use_most()
+ ret void
+}
+
+define amdgpu_kernel void @kernel_calls_non_agpr_and_agpr() {
+; CHECK-LABEL: define amdgpu_kernel void @kernel_calls_non_agpr_and_agpr(
+; CHECK-SAME: ) #[[ATTR1]] {
+; CHECK-NEXT: call void @empty()
+; CHECK-NEXT: call void @func_uses_asm_physreg_agpr()
+; CHECK-NEXT: call void @use_most()
+; CHECK-NEXT: ret void
+;
+ call void @empty()
+ call void @func_uses_asm_physreg_agpr()
+ call void @use_most()
+ ret void
+}
+
+define amdgpu_kernel void @kernel_calls_generic_intrinsic(ptr %ptr0, ptr %ptr1, i64 %size) {
+; CHECK-LABEL: define amdgpu_kernel void @kernel_calls_generic_intrinsic(
+; CHECK-SAME: ptr [[PTR0:%.*]], ptr [[PTR1:%.*]], i64 [[SIZE:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr [[PTR0]], ptr [[PTR1]], i64 [[SIZE]], i1 false)
+; CHECK-NEXT: call void @use_most()
+; CHECK-NEXT: ret void
+;
+ call void @llvm.memcpy.p0.p0.i64(ptr %ptr0, ptr %ptr1, i64 %size, i1 false)
+ call void @use_most()
+ ret void
+}
+
+declare <32 x float> @llvm.amdgcn.mfma.f32.32x32x1f32(float, float, <32 x float>, i32 immarg, i32 immarg, i32 immarg)
+
+define amdgpu_kernel void @kernel_calls_mfma.f32.32x32x1f32(ptr addrspace(1) %out, float %a, float %b, <32 x float> %c) {
+; CHECK-LABEL: define amdgpu_kernel void @kernel_calls_mfma.f32.32x32x1f32(
+; CHECK-SAME: ptr addrspace(1) [[OUT:%.*]], float [[A:%.*]], float [[B:%.*]], <32 x float> [[C:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[RESULT:%.*]] = call <32 x float> @llvm.amdgcn.mfma.f32.32x32x1f32(float [[A]], float [[B]], <32 x float> [[C]], i32 0, i32 0, i32 0)
+; CHECK-NEXT: store <32 x float> [[RESULT]], ptr addrspace(1) [[OUT]], align 128
+; CHECK-NEXT: call void @use_most()
+; CHECK-NEXT: ret void
+;
+ %result = call <32 x float> @llvm.amdgcn.mfma.f32.32x32x1f32(float %a, float %b, <32 x float> %c, i32 0, i32 0, i32 0)
+ store <32 x float> %result, ptr addrspace(1) %out
+ call void @use_most()
+ ret void
+}
+
+define amdgpu_kernel void @kernel_calls_workitem_id_x(ptr addrspace(1) %out) {
+; CHECK-LABEL: define amdgpu_kernel void @kernel_calls_workitem_id_x(
+; CHECK-SAME: ptr addrspace(1) [[OUT:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[RESULT:%.*]] = call i32 @llvm.amdgcn.workitem.id.x()
+; CHECK-NEXT: store i32 [[RESULT]], ptr addrspace(1) [[OUT]], align 4
+; CHECK-NEXT: call void @use_most()
+; CHECK-NEXT: ret void
+;
+ %result = call i32 @llvm.amdgcn.workitem.id.x()
+ store i32 %result, ptr addrspace(1) %out
+ call void @use_most()
+ ret void
+}
+
+define amdgpu_kernel void @indirect_calls_none_agpr(i1 %cond) {
+; CHECK-LABEL: define amdgpu_kernel void @indirect_calls_none_agpr(
+; CHECK-SAME: i1 [[COND:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[FPTR:%.*]] = select i1 [[COND]], ptr @empty, ptr @also_empty
+; CHECK-NEXT: [[TMP1:%.*]] = icmp eq ptr [[FPTR]], @also_empty
+; CHECK-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP3:%.*]]
+; CHECK: 2:
+; CHECK-NEXT: call void @also_empty()
+; CHECK-NEXT: br label [[TMP6:%.*]]
+; CHECK: 3:
+; CHECK-NEXT: br i1 true, label [[TMP4:%.*]], label [[TMP5:%.*]]
+; CHECK: 4:
+; CHECK-NEXT: call void @empty()
+; CHECK-NEXT: br label [[TMP6]]
+; CHECK: 5:
+; CHECK-NEXT: unreachable
+; CHECK: 6:
+; CHECK-NEXT: call void @use_most()
+; CHECK-NEXT: ret void
+;
+ %fptr = select i1 %cond, ptr @empty, ptr @also_empty
+ call void %fptr()
+ call void @use_most()
+ ret void
+}
+
+define amdgpu_kernel void @kernel_uses_asm_virtreg_def_struct_0() {
+; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_asm_virtreg_def_struct_0(
+; CHECK-SAME: ) #[[ATTR2]] {
+; CHECK-NEXT: [[DEF:%.*]] = call { i32, i32 } asm sideeffect "
+; CHECK-NEXT: call void @use_most()
+; CHECK-NEXT: ret void
+;
+ %def = call {i32, i32} asm sideeffect "; def $0", "=a,=a"()
+ call void @use_most()
+ ret void
+}
+
+define amdgpu_kernel void @kernel_uses_asm_virtreg_use_struct_1() {
+; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_asm_virtreg_use_struct_1(
+; CHECK-SAME: ) #[[ATTR5:[0-9]+]] {
+; CHECK-NEXT: [[DEF:%.*]] = call { i32, <2 x i32> } asm sideeffect "
+; CHECK-NEXT: call void @use_most()
+; CHECK-NEXT: ret void
+;
+ %def = call {i32, <2 x i32>} asm sideeffect "; def $0", "=a,=a"()
+ call void @use_most()
+ ret void
+}
+
+define amdgpu_kernel void @kernel_uses_asm_virtreg_use_struct_2() {
+; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_asm_virtreg_use_struct_2(
+; CHECK-SAME: ) #[[ATTR1]] {
+; CHECK-NEXT: [[DEF:%.*]] = call { i32, <2 x i32> } asm sideeffect "
+; CHECK-NEXT: call void @use_most()
+; CHECK-NEXT: ret void
+;
+ %def = call {i32, <2 x i32>} asm sideeffect "; def $0", "=a,=v"()
+ call void @use_most()
+ ret void
+}
+
+define amdgpu_kernel void @kernel_uses_asm_virtreg_ptr_ty() {
+; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_asm_virtreg_ptr_ty(
+; CHECK-SAME: ) #[[ATTR2]] {
+; CHECK-NEXT: call void asm sideeffect "
+; CHECK-NEXT: call void @use_most()
+; CHECK-NEXT: ret void
+;
+ call void asm sideeffect "; use $0", "a"(ptr poison)
+ call void @use_most()
+ ret void
+}
+
+define amdgpu_kernel void @kernel_uses_asm_virtreg_def_ptr_ty() {
+; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_asm_virtreg_def_ptr_ty(
+; CHECK-SAME: ) #[[ATTR2]] {
+; CHECK-NEXT: [[DEF:%.*]] = call ptr asm sideeffect "
+; CHECK-NEXT: call void @use_most()
+; CHECK-NEXT: ret void
+;
+ %def = call ptr asm sideeffect "; def $0", "=a"()
+ call void @use_most()
+ ret void
+}
+
+define amdgpu_kernel void @kernel_uses_asm_virtreg_def_vector_ptr_ty() {
+; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_asm_virtreg_def_vector_ptr_ty(
+; CHECK-SAME: ) #[[ATTR5]] {
+; CHECK-NEXT: [[DEF:%.*]] = call <2 x ptr> asm sideeffect "
+; CHECK-NEXT: call void @use_most()
+; CHECK-NEXT: ret void
+;
+ %def = call <2 x ptr> asm sideeffect "; def $0", "=a"()
+ call void @use_most()
+ ret void
+}
+
+define amdgpu_kernel void @kernel_uses_asm_physreg_def_struct_0() {
+; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_asm_physreg_def_struct_0(
+; CHECK-SAME: ) #[[ATTR6:[0-9]+]] {
+; CHECK-NEXT: [[DEF:%.*]] = call { i32, i32 } asm sideeffect "
+; CHECK-NEXT: call void @use_most()
+; CHECK-NEXT: ret void
+;
+ %def = call {i32, i32} asm sideeffect "; def $0", "={a0},={a[4:5]}"()
+ call void @use_most()
+ ret void
+}
+
+define amdgpu_kernel void @kernel_uses_asm_clobber() {
+; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_asm_clobber(
+; CHECK-SAME: ) #[[ATTR7:[0-9]+]] {
+; CHECK-NEXT: call void asm sideeffect "
+; CHECK-NEXT: call void @use_most()
+; CHECK-NEXT: ret void
+;
+ call void asm sideeffect "; clobber $0", "~{a4}"()
+ call void @use_most()
+ ret void
+}
+
+define amdgpu_kernel void @kernel_uses_asm_clobber_tuple() {
+; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_asm_clobber_tuple(
+; CHECK-SAME: ) #[[ATTR8:[0-9]+]] {
+; CHECK-NEXT: call void asm sideeffect "
+; CHECK-NEXT: call void @use_most()
+; CHECK-NEXT: ret void
+;
+ call void asm sideeffect "; clobber $0", "~{a[10:13]}"()
+ call void @use_most()
+ ret void
+}
+
+define amdgpu_kernel void @kernel_uses_asm_clobber_oob() {
+; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_asm_clobber_oob(
+; CHECK-SAME: ) #[[ATTR9:[0-9]+]] {
+; CHECK-NEXT: call void asm sideeffect "
+; CHECK-NEXT: call void @use_most()
+; CHECK-NEXT: ret void
+;
+ call void asm sideeffect "; clobber $0", "~{a256}"()
+ call void @use_most()
+ ret void
+}
+
+define amdgpu_kernel void @kernel_uses_asm_clobber_max() {
+; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_asm_clobber_max(
+; CHECK-SAME: ) #[[ATTR9]] {
+; CHECK-NEXT: call void asm sideeffect "
+; CHECK-NEXT: call void @use_most()
+; CHECK-NEXT: ret void
+;
+ call void asm sideeffect "; clobber $0", "~{a255}"()
+ call void @use_most()
+ ret void
+}
+
+define amdgpu_kernel void @kernel_uses_asm_physreg_oob() {
+; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_asm_physreg_oob(
+; CHECK-SAME: ) #[[ATTR9]] {
+; CHECK-NEXT: call void asm sideeffect "
+; CHECK-NEXT: call void @use_most()
+; CHECK-NEXT: ret void
+;
+ call void asm sideeffect "; use $0", "{a256}"(i32 poison)
+ call void @use_most()
+ ret void
+}
+
+define amdgpu_kernel void @kernel_uses_asm_virtreg_def_max_ty() {
+; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_asm_virtreg_def_max_ty(
+; CHECK-SAME: ) #[[ATTR10:[0-9]+]] {
+; CHECK-NEXT: [[DEF:%.*]] = call <32 x i32> asm sideeffect "
+; CHECK-NEXT: call void @use_most()
+; CHECK-NEXT: ret void
+;
+ %def = call <32 x i32> asm sideeffect "; def $0", "=a"()
+ call void @use_most()
+ ret void
+}
+
+define amdgpu_kernel void @kernel_uses_asm_virtreg_use_max_ty() {
+; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_asm_virtreg_use_max_ty(
+; CHECK-SAME: ) #[[ATTR10]] {
+; CHECK-NEXT: call void asm sideeffect "
+; CHECK-NEXT: call void @use_most()
+; CHECK-NEXT: ret void
+;
+ call void asm sideeffect "; use $0", "a"(<32 x i32> poison)
+ call void @use_most()
+ ret void
+}
+
+define amdgpu_kernel void @kernel_uses_asm_virtreg_use_def_max_ty() {
+; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_asm_virtreg_use_def_max_ty(
+; CHECK-SAME: ) #[[ATTR10]] {
+; CHECK-NEXT: [[DEF:%.*]] = call <32 x i32> asm sideeffect "
+; CHECK-NEXT: call void @use_most()
+; CHECK-NEXT: ret void
+;
+ %def = call <32 x i32> asm sideeffect "; use $0", "=a,a"(<32 x i32> poison)
+ call void @use_most()
+ ret void
+}
+
+define amdgpu_kernel void @vreg_use_exceeds_register_file() {
+; CHECK-LABEL: define amdgpu_kernel void @vreg_use_exceeds_register_file(
+; CHECK-SAME: ) #[[ATTR9]] {
+; CHECK-NEXT: call void asm sideeffect "
+; CHECK-NEXT: call void @use_most()
+; CHECK-NEXT: ret void
+;
+ call void asm sideeffect "; use $0", "a"(<257 x i32> poison)
+ call void @use_most()
+ ret void
+}
+
+define amdgpu_kernel void @vreg_def_exceeds_register_file() {
+; CHECK-LABEL: define amdgpu_kernel void @vreg_def_exceeds_register_file(
+; CHECK-SAME: ) #[[ATTR9]] {
+; CHECK-NEXT: [[DEF:%.*]] = call <257 x i32> asm sideeffect "
+; CHECK-NEXT: call void @use_most()
+; CHECK-NEXT: ret void
+;
+ %def = call <257 x i32> asm sideeffect "; def $0", "=a"()
+ call void @use_most()
+ ret void
+}
+
+define amdgpu_kernel void @multiple() {
+; CHECK-LABEL: define amdgpu_kernel void @multiple(
+; CHECK-SAME: ) #[[ATTR10]] {
+; CHECK-NEXT: [[DEF:%.*]] = call { <16 x i32>, <8 x i32>, <8 x i32> } asm sideeffect "
+; CHECK-NEXT: call void @use_most()
+; CHECK-NEXT: ret void
+;
+ %def = call {<16 x i32>, <8 x i32>, <8 x i32>} asm sideeffect "; def $0", "=a,=a,=a,a,a,a"(<4 x i32> splat (i32 0), <8 x i32> splat (i32 1), i64 999)
+ call void @use_most()
+ ret void
+}
+
+define amdgpu_kernel void @earlyclobber_0() {
+; CHECK-LABEL: define amdgpu_kernel void @earlyclobber_0(
+; CHECK-SAME: ) #[[ATTR11:[0-9]+]] {
+; CHECK-NEXT: [[DEF:%.*]] = call <8 x i32> asm sideeffect "
+; CHECK-NEXT: call void @use_most()
+; CHECK-NEXT: ret void
+;
+ %def = call <8 x i32> asm sideeffect "; def $0", "=&a,a"(i32 0)
+ call void @use_most()
+ ret void
+}
+
+define amdgpu_kernel void @earlyclobber_1() {
+; CHECK-LABEL: define amdgpu_kernel void @earlyclobber_1(
+; CHECK-SAME: ) #[[ATTR12:[0-9]+]] {
+; CHECK-NEXT: [[DEF:%.*]] = call { <8 x i32>, <16 x i32> } asm sideeffect "
+; CHECK-NEXT: call void @use_most()
+; CHECK-NEXT: ret void
+;
+ %def = call { <8 x i32>, <16 x i32 > } asm sideeffect "; def $0, $1", "=&a,=&a,a,a"(i32 0, <16 x i32> splat (i32 1))
+ call void @use_most()
+ ret void
+}
+
+define amdgpu_kernel void @physreg_a32__vreg_a256__vreg_a512() {
+; CHECK-LABEL: define amdgpu_kernel void @physreg_a32__vreg_a256__vreg_a512(
+; CHECK-SAME: ) #[[ATTR13:[0-9]+]] {
+; CHECK-NEXT: call void asm sideeffect "
+; CHECK-NEXT: call void @use_most()
+; CHECK-NEXT: ret void
+;
+ call void asm sideeffect "; use $0, $1, $2", "{a16},a,a"(i32 poison, <8 x i32> poison, <16 x i32> poison)
+ call void @use_most()
+ ret void
+}
+
+define amdgpu_kernel void @physreg_def_a32__def_vreg_a256__def_vreg_a512() {
+; CHECK-LABEL: define amdgpu_kernel void @physreg_def_a32__def_vreg_a256__def_vreg_a512(
+; CHECK-SAME: ) #[[ATTR13]] {
+; CHECK-NEXT: [[TMP1:%.*]] = call { i32, <8 x i32>, <16 x i32> } asm sideeffect "
+; CHECK-NEXT: call void @use_most()
+; CHECK-NEXT: ret void
+;
+ call {i32, <8 x i32>, <16 x i32>} asm sideeffect "; def $0, $1, $2", "={a16},=a,=a"()
+ call void @use_most()
+ ret void
+}
+
+define amdgpu_kernel void @physreg_def_a32___def_vreg_a512_use_vreg_a256() {
+; CHECK-LABEL: define amdgpu_kernel void @physreg_def_a32___def_vreg_a512_use_vreg_a256(
+; CHECK-SAME: ) #[[ATTR14:[0-9]+]] {
+; CHECK-NEXT: [[TMP1:%.*]] = call { i32, <16 x i32> } asm sideeffect "
+; CHECK-NEXT: call void @use_most()
+; CHECK-NEXT: ret void
+;
+ call {i32, <16 x i32>} asm sideeffect "; def $0, $1, $2", "={a16},=a,a"(<8 x i32> poison)
+ call void @use_most()
+ ret void
+}
+
+define amdgpu_kernel void @mixed_physreg_vreg_tuples_0() {
+; CHECK-LABEL: define amdgpu_kernel void @mixed_physreg_vreg_tuples_0(
+; CHECK-SAME: ) #[[ATTR11]] {
+; CHECK-NEXT: call void asm sideeffect "
+; CHECK-NEXT: call void @use_most()
+; CHECK-NEXT: ret void
+;
+ call void asm sideeffect "; use $0, $1", "{a[1:4]},a"(<4 x i32> poison, <4 x i32> poison)
+ call void @use_most()
+ ret void
+}
+
+define amdgpu_kernel void @mixed_physreg_vreg_tuples_1() {
+; CHECK-LABEL: define amdgpu_kernel void @mixed_physreg_vreg_tuples_1(
+; CHECK-SAME: ) #[[ATTR15:[0-9]+]] {
+; CHECK-NEXT: call void asm sideeffect "
+; CHECK-NEXT: call void @use_most()
+; CHECK-NEXT: ret void
+;
+ call void asm sideeffect "; use $0, $1", "a,{a[0:3]}"(<4 x i32> poison, <4 x i32> poison)
+ call void @use_most()
+ ret void
+}
+
+define amdgpu_kernel void @physreg_raises_limit() {
+; CHECK-LABEL: define amdgpu_kernel void @physreg_raises_limit(
+; CHECK-SAME: ) #[[ATTR16:[0-9]+]] {
+; CHECK-NEXT: call void asm sideeffect "
+; CHECK-NEXT: call void @use_most()
+; CHECK-NEXT: ret void
+;
+ call void asm sideeffect "; use $0, $1", "a,{a[5:8]}"(<4 x i32> poison, <4 x i32> poison)
+ call void @use_most()
+ ret void
+}
+
+define amdgpu_kernel void @physreg_tuple_alignment_raises_limit() {
+; CHECK-LABEL: define amdgpu_kernel void @physreg_tuple_alignment_raises_limit(
+; CHECK-SAME: ) #[[ATTR11]] {
+; CHECK-NEXT: call void asm sideeffect "
+; CHECK-NEXT: call void @use_most()
+; CHECK-NEXT: ret void
+;
+ call void asm sideeffect "; use $0, $1", "a,{a[1:4]}"(<4 x i32> poison, <4 x i32> poison)
+ call void @use_most()
+ ret void
+}
+
+define amdgpu_kernel void @align3_virtreg() {
+; CHECK-LABEL: define amdgpu_kernel void @align3_virtreg(
+; CHECK-SAME: ) #[[ATTR6]] {
+; CHECK-NEXT: call void asm sideeffect "
+; CHECK-NEXT: call void @use_most()
+; CHECK-NEXT: ret void
+;
+ call void asm sideeffect "; use $0, $1", "a,a"(<3 x i32> poison, <3 x i32> poison)
+ call void @use_most()
+ ret void
+}
+
+define amdgpu_kernel void @align3_align4_virtreg() {
+; CHECK-LABEL: define amdgpu_kernel void @align3_align4_virtreg(
+; CHECK-SAME: ) #[[ATTR15]] {
+; CHECK-NEXT: call void asm sideeffect "
+; CHECK-NEXT: call void @use_most()
+; CHECK-NEXT: ret void
+;
+ call void asm sideeffect "; use $0, $1", "a,a"(<3 x i32> poison, <4 x i32> poison)
+ call void @use_most()
+ ret void
+}
+
+define amdgpu_kernel void @align2_align4_virtreg() {
+; CHECK-LABEL: define amdgpu_kernel void @align2_align4_virtreg(
+; CHECK-SAME: ) #[[ATTR15]] {
+; CHECK-NEXT: call void asm sideeffect "
+; CHECK-NEXT: call void @use_most()
+; CHECK-NEXT: ret void
+;
+ call void asm sideeffect "; use $0, $1", "a,a"(<2 x i32> poison, <4 x i32> poison)
+ call void @use_most()
+ ret void
+}
+
+define amdgpu_kernel void @kernel_uses_write_register_a55() {
+; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_write_register_a55(
+; CHECK-SAME: ) #[[ATTR17:[0-9]+]] {
+; CHECK-NEXT: call void @llvm.write_register.i32(metadata [[META0:![0-9]+]], i32 0)
+; CHECK-NEXT: ret void
+;
+ call void @llvm.write_register.i64(metadata !0, i32 0)
+ ret void
+}
+
+define amdgpu_kernel void @kernel_uses_write_register_v55() {
+; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_write_register_v55(
+; CHECK-SAME: ) #[[ATTR0]] {
+; CHECK-NEXT: call void @llvm.write_register.i32(metadata [[META1:![0-9]+]], i32 0)
+; CHECK-NEXT: call void @use_most()
+; CHECK-NEXT: ret void
+;
+ call void @llvm.write_register.i64(metadata !1, i32 0)
+ call void @use_most()
+ ret void
+}
+
+define amdgpu_kernel void @kernel_uses_write_register_a55_57() {
+; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_write_register_a55_57(
+; CHECK-SAME: ) #[[ATTR18:[0-9]+]] {
+; CHECK-NEXT: call void @llvm.write_register.i96(metadata [[META2:![0-9]+]], i96 0)
+; CHECK-NEXT: call void @use_most()
+; CHECK-NEXT: ret void
+;
+ call void @llvm.write_register.i64(metadata !2, i96 0)
+ call void @use_most()
+ ret void
+}
+
+define amdgpu_kernel void @kernel_uses_read_register_a55(ptr addrspace(1) %ptr) {
+; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_read_register_a55(
+; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]]) #[[ATTR19:[0-9]+]] {
+; CHECK-NEXT: [[REG:%.*]] = call i32 @llvm.read_register.i32(metadata [[META0]])
+; CHECK-NEXT: store i32 [[REG]], ptr addrspace(1) [[PTR]], align 4
+; CHECK-NEXT: call void @use_most()
+; CHECK-NEXT: ret void
+;
+ %reg = call i32 @llvm.read_register.i64(metadata !0)
+ store i32 %reg, ptr addrspace(1) %ptr
+ call void @use_most()
+ ret void
+}
+
+define amdgpu_kernel void @kernel_uses_read_volatile_register_a55(ptr addrspace(1) %ptr) {
+; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_read_volatile_register_a55(
+; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]]) #[[ATTR19]] {
+; CHECK-NEXT: [[REG:%.*]] = call i32 @llvm.read_volatile_register.i32(metadata [[META0]])
+; CHECK-NEXT: store i32 [[REG]], ptr addrspace(1) [[PTR]], align 4
+; CHECK-NEXT: call void @use_most()
+; CHECK-NEXT: ret void
+;
+ %reg = call i32 @llvm.read_volatile_register.i64(metadata !0)
+ store i32 %reg, ptr addrspace(1) %ptr
+ call void @use_most()
+ ret void
+}
+
+define amdgpu_kernel void @kernel_uses_read_register_a56_59(ptr addrspace(1) %ptr) {
+; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_read_register_a56_59(
+; CHECK-SAME: ptr addrspace(1) [[PTR:%.*]]) #[[ATTR20:[0-9]+]] {
+; CHECK-NEXT: [[REG:%.*]] = call i128 @llvm.read_register.i128(metadata [[META3:![0-9]+]])
+; CHECK-NEXT: store i128 [[REG]], ptr addrspace(1) [[PTR]], align 8
+; CHECK-NEXT: call void @use_most()
+; CHECK-NEXT: ret void
+;
+ %reg = call i128 @llvm.read_register.i64(metadata !3)
+ store i128 %reg, ptr addrspace(1) %ptr
+ call void @use_most()
+ ret void
+}
+
+define amdgpu_kernel void @kernel_uses_write_register_out_of_bounds_a256() {
+; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_write_register_out_of_bounds_a256(
+; CHECK-SAME: ) #[[ATTR9]] {
+; CHECK-NEXT: call void @llvm.write_register.i32(metadata [[META4:![0-9]+]], i32 0)
+; CHECK-NEXT: call void @use_most()
+; CHECK-NEXT: ret void
+;
+ call void @llvm.write_register.i64(metadata !4, i32 0)
+ call void @use_most()
+ ret void
+}
+
+define amdgpu_kernel void @kernel_multiple_uses() {
+; CHECK-LABEL: define amdgpu_kernel void @kernel_multiple_uses(
+; CHECK-SAME: ) #[[ATTR5]] {
+; CHECK-NEXT: call void asm sideeffect "
+; CHECK-NEXT: call void asm sideeffect "
+; CHECK-NEXT: call void asm sideeffect "
+; CHECK-NEXT: call void @use_most()
+; CHECK-NEXT: ret void
+;
+ call void asm sideeffect "; use $0", "a"(i64 poison)
+ call void asm sideeffect "; use $0", "a"(i32 poison)
+ call void asm sideeffect "; use $0", "a"(i128 poison)
+ call void @use_most()
+ ret void
+}
+
+define amdgpu_kernel void @kernel_multiple_defs() {
+; CHECK-LABEL: define amdgpu_kernel void @kernel_multiple_defs(
+; CHECK-SAME: ) #[[ATTR5]] {
+; CHECK-NEXT: [[TMP1:%.*]] = call i64 asm sideeffect "
+; CHECK-NEXT: [[TMP2:%.*]] = call i32 asm sideeffect "
+; CHECK-NEXT: [[TMP3:%.*]] = call i128 asm sideeffect "
+; CHECK-NEXT: call void @use_most()
+; CHECK-NEXT: ret void
+;
+ call i64 asm sideeffect "; def $0", "=a"()
+ call i32 asm sideeffect "; def $0", "=a"()
+ call i128 asm sideeffect "; def $0", "=a"()
+ call void @use_most()
+ ret void
+}
+
+define amdgpu_kernel void @kernel_multiple_use_defs() {
+; CHECK-LABEL: define amdgpu_kernel void @kernel_multiple_use_defs(
+; CHECK-SAME: ) #[[ATTR5]] {
+; CHECK-NEXT: call void asm sideeffect "
+; CHECK-NEXT: [[TMP1:%.*]] = call i128 asm sideeffect "
+; CHECK-NEXT: call void @use_most()
+; CHECK-NEXT: ret void
+;
+ call void asm sideeffect "; use $0", "a"(i32 poison)
+ call i128 asm sideeffect "; def $0", "=a"()
+ call void @use_most()
+ ret void
+}
+
+define void @callgraph_b() {
+; CHECK-LABEL: define void @callgraph_b(
+; CHECK-SAME: ) #[[ATTR15]] {
+; CHECK-NEXT: [[TMP1:%.*]] = call <4 x i32> asm sideeffect "
+; CHECK-NEXT: call void asm sideeffect "
+; CHECK-NEXT: call void @use_most()
+; CHECK-NEXT: ret void
+;
+ call <4 x i32> asm sideeffect "; def $0", "=a"()
+ call void asm sideeffect "; use $0", "a"(<8 x i32> poison)
+ call void @use_most()
+ ret void
+}
+
+define void @callgraph_c() {
+; CHECK-LABEL: define void @callgraph_c(
+; CHECK-SAME: ) #[[ATTR2]] {
+; CHECK-NEXT: [[TMP1:%.*]] = call i32 asm sideeffect "
+; CHECK-NEXT: call void asm sideeffect "
+; CHECK-NEXT: call void @use_most()
+; CHECK-NEXT: ret void
+;
+ call i32 asm sideeffect "; def $0", "=a"()
+ call void asm sideeffect "; use $0", "a"(<2 x i32> poison)
+ call void @use_most()
+ ret void
+}
+
+define void @callgraph_a(i1 %cond) {
+; CHECK-LABEL: define void @callgraph_a(
+; CHECK-SAME: i1 [[COND:%.*]]) #[[ATTR15]] {
+; CHECK-NEXT: br i1 [[COND]], label [[A:%.*]], label [[B:%.*]]
+; CHECK: a:
+; CHECK-NEXT: call void @callgraph_b()
+; CHECK-NEXT: ret void
+; CHECK: b:
+; CHECK-NEXT: call void @callgraph_c()
+; CHECK-NEXT: ret void
+;
+ br i1 %cond, label %a, label %b
+
+a:
+ call void @callgraph_b()
+ ret void
+
+b:
+ call void @callgraph_c()
+ ret void
+}
+
+
+define void @kernel_max_callgraph(i1 %cond) {
+; CHECK-LABEL: define void @kernel_max_callgraph(
+; CHECK-SAME: i1 [[COND:%.*]]) #[[ATTR15]] {
+; CHECK-NEXT: call void @callgraph_a(i1 [[COND]])
+; CHECK-NEXT: ret void
+;
+ call void @callgraph_a(i1 %cond)
+ ret void
+}
+
+define amdgpu_kernel void @kernel_uses_all_virtregs() #1 {
+; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_all_virtregs(
+; CHECK-SAME: ) #[[ATTR21:[0-9]+]] {
+; CHECK-NEXT: call void asm sideeffect "
+; CHECK-NEXT: call void @use_most()
+; CHECK-NEXT: ret void
+;
+ call void asm sideeffect "; use $0", "a,a,a,a,a,a,a,a"(<32 x i32> poison, <32 x i32> poison, <32 x i32> poison, <32 x i32> poison, <32 x i32> poison, <32 x i32> poison, <32 x i32> poison, <32 x i32> poison)
+ call void @use_most()
+ ret void
+}
+
+define amdgpu_kernel void @kernel_uses_all_virtregs_plus_1() #1 {
+; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_all_virtregs_plus_1(
+; CHECK-SAME: ) #[[ATTR21]] {
+; CHECK-NEXT: call void asm sideeffect "
+; CHECK-NEXT: call void @use_most()
+; CHECK-NEXT: ret void
+;
+ call void asm sideeffect "; use $0", "a,a,a,a,a,a,a,a,a"(<32 x i32> poison, <32 x i32> poison, <32 x i32> poison, <32 x i32> poison, <32 x i32> poison, <32 x i32> poison, <32 x i32> poison, <32 x i32> poison, i32 poison)
+ call void @use_most()
+ ret void
+}
+
+define void @recursive() {
+; CHECK-LABEL: define void @recursive(
+; CHECK-SAME: ) #[[ATTR22:[0-9]+]] {
+; CHECK-NEXT: call void asm sideeffect "
+; CHECK-NEXT: call void @use_most()
+; CHECK-NEXT: call void @recursive()
+; CHECK-NEXT: ret void
+;
+ call void asm sideeffect "; use $0", "a"(<7 x i32> poison)
+ call void @use_most()
+ call void @recursive()
+ ret void
+}
+
+define void @indirect_0() {
+; CHECK-LABEL: define void @indirect_0(
+; CHECK-SAME: ) #[[ATTR22]] {
+; CHECK-NEXT: call void asm sideeffect "
+; CHECK-NEXT: call void @use_most()
+; CHECK-NEXT: ret void
+;
+ call void asm sideeffect "; use $0", "a"(<7 x i32> poison)
+ call void @use_most()
+ ret void
+}
+
+define void @indirect_1() {
+; CHECK-LABEL: define void @indirect_1(
+; CHECK-SAME: ) #[[ATTR23:[0-9]+]] {
+; CHECK-NEXT: [[TMP1:%.*]] = call <3 x i32> asm sideeffect "
+; CHECK-NEXT: call void @use_most()
+; CHECK-NEXT: ret void
+;
+ call <3 x i32> asm sideeffect "; def $0", "=a"()
+ call void @use_most()
+ ret void
+}
+
+define amdgpu_kernel void @knowable_indirect_call(i1 %cond) {
+; CHECK-LABEL: define amdgpu_kernel void @knowable_indirect_call(
+; CHECK-SAME: i1 [[COND:%.*]]) #[[ATTR22]] {
+; CHECK-NEXT: [[FPTR:%.*]] = select i1 [[COND]], ptr @indirect_0, ptr @indirect_1
+; CHECK-NEXT: [[TMP1:%.*]] = icmp eq ptr [[FPTR]], @indirect_1
+; CHECK-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP3:%.*]]
+; CHECK: 2:
+; CHECK-NEXT: call void @indirect_1()
+; CHECK-NEXT: br label [[TMP6:%.*]]
+; CHECK: 3:
+; CHECK-NEXT: br i1 true, label [[TMP4:%.*]], label [[TMP5:%.*]]
+; CHECK: 4:
+; CHECK-NEXT: call void @indirect_0()
+; CHECK-NEXT: br label [[TMP6]]
+; CHECK: 5:
+; CHECK-NEXT: unreachable
+; CHECK: 6:
+; CHECK-NEXT: call void @use_most()
+; CHECK-NEXT: ret void
+;
+ %fptr = select i1 %cond, ptr @indirect_0, ptr @indirect_1
+ call void %fptr()
+ call void @use_most()
+ ret void
+}
+
+define amdgpu_kernel void @calls_poison(i1 %cond) {
+; CHECK-LABEL: define amdgpu_kernel void @calls_poison(
+; CHECK-SAME: i1 [[COND:%.*]]) #[[ATTR3]] {
+; CHECK-NEXT: call void poison()
+; CHECK-NEXT: call void @use_most()
+; CHECK-NEXT: ret void
+;
+ call void poison()
+ call void @use_most()
+ ret void
+}
+
+define amdgpu_kernel void @calls_null(i1 %cond) {
+; CHECK-LABEL: define amdgpu_kernel void @calls_null(
+; CHECK-SAME: i1 [[COND:%.*]]) #[[ATTR3]] {
+; CHECK-NEXT: call void null()
+; CHECK-NEXT: call void @use_most()
+; CHECK-NEXT: ret void
+;
+ call void null()
+ call void @use_most()
+ ret void
+}
+
+define amdgpu_kernel void @indirect_unknown(ptr %fptr) {
+; CHECK-LABEL: define amdgpu_kernel void @indirect_unknown(
+; CHECK-SAME: ptr [[FPTR:%.*]]) #[[ATTR3]] {
+; CHECK-NEXT: call void [[FPTR]]()
+; CHECK-NEXT: ret void
+;
+ call void %fptr()
+ ret void
+}
+
+attributes #0 = { "amdgpu-agpr-alloc"="0" }
+attributes #1 = { "amdgpu-waves-per-eu"="1,1" }
+
+!0 = !{!"a55"}
+!1 = !{!"v55"}
+!2 = !{!"a[55:57]"}
+!3 = !{!"a[56:59]"}
+!4 = !{!"a256"}
+
+;.
+; CHECK: attributes #[[ATTR0]] = { "amdgpu-agpr-alloc"="0" "target-cpu"="gfx90a" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR1]] = { "amdgpu-agpr-alloc"="1" "target-cpu"="gfx90a" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR2]] = { "amdgpu-agpr-alloc"="2" "target-cpu"="gfx90a" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR3]] = { "target-cpu"="gfx90a" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR4:[0-9]+]] = { convergent nocallback nofree nosync nounwind willreturn memory(none) "target-cpu"="gfx90a" }
+; CHECK: attributes #[[ATTR5]] = { "amdgpu-agpr-alloc"="4" "target-cpu"="gfx90a" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR6]] = { "amdgpu-agpr-alloc"="6" "target-cpu"="gfx90a" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR7]] = { "amdgpu-agpr-alloc"="5" "target-cpu"="gfx90a" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR8]] = { "amdgpu-agpr-alloc"="14" "target-cpu"="gfx90a" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR9]] = { "amdgpu-agpr-alloc"="256" "target-cpu"="gfx90a" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR10]] = { "amdgpu-agpr-alloc"="32" "target-cpu"="gfx90a" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR11]] = { "amdgpu-agpr-alloc"="9" "target-cpu"="gfx90a" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR12]] = { "amdgpu-agpr-alloc"="64" "target-cpu"="gfx90a" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR13]] = { "amdgpu-agpr-alloc"="49" "target-cpu"="gfx90a" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR14]] = { "amdgpu-agpr-alloc"="33" "target-cpu"="gfx90a" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR15]] = { "amdgpu-agpr-alloc"="8" "target-cpu"="gfx90a" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR16]] = { "amdgpu-agpr-alloc"="13" "target-cpu"="gfx90a" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR17]] = { "amdgpu-agpr-alloc"="56" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx90a" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR18]] = { "amdgpu-agpr-alloc"="58" "target-cpu"="gfx90a" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR19]] = { "amdgpu-agpr-alloc"="56" "target-cpu"="gfx90a" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR20]] = { "amdgpu-agpr-alloc"="60" "target-cpu"="gfx90a" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR21]] = { "amdgpu-agpr-alloc"="256" "amdgpu-waves-per-eu"="1,1" "target-cpu"="gfx90a" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR22]] = { "amdgpu-agpr-alloc"="7" "target-cpu"="gfx90a" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR23]] = { "amdgpu-agpr-alloc"="3" "target-cpu"="gfx90a" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR24:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) "target-cpu"="gfx90a" }
+; CHECK: attributes #[[ATTR25:[0-9]+]] = { nocallback nofree nounwind willreturn memory(argmem: readwrite) "target-cpu"="gfx90a" }
+; CHECK: attributes #[[ATTR26:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(read) "target-cpu"="gfx90a" }
+; CHECK: attributes #[[ATTR27:[0-9]+]] = { nounwind "target-cpu"="gfx90a" }
+; CHECK: attributes #[[ATTR28:[0-9]+]] = { nocallback nounwind "target-cpu"="gfx90a" }
+; CHECK: attributes #[[ATTR29]] = { "amdgpu-agpr-alloc"="0" }
+;.
+; CHECK: [[META0]] = !{!"a55"}
+; CHECK: [[META1]] = !{!"v55"}
+; CHECK: [[META2]] = !{!"a[55:57]"}
+; CHECK: [[META3]] = !{!"a[56:59]"}
+; CHECK: [[META4]] = !{!"a256"}
+;.
diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-attributor-no-agpr.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-attributor-no-agpr.ll
deleted file mode 100644
index 664dfa2..0000000
--- a/llvm/test/CodeGen/AMDGPU/amdgpu-attributor-no-agpr.ll
+++ /dev/null
@@ -1,264 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-attributes --check-globals all --version 4
-; RUN: opt -S -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx90a -passes=amdgpu-attributor %s | FileCheck %s
-
-define amdgpu_kernel void @kernel_uses_asm_virtreg() {
-; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_asm_virtreg(
-; CHECK-SAME: ) #[[ATTR0:[0-9]+]] {
-; CHECK-NEXT: call void asm sideeffect "
-; CHECK-NEXT: ret void
-;
- call void asm sideeffect "; use $0", "a"(i32 poison)
- ret void
-}
-
-define amdgpu_kernel void @kernel_uses_asm_virtreg_def() {
-; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_asm_virtreg_def(
-; CHECK-SAME: ) #[[ATTR0]] {
-; CHECK-NEXT: [[DEF:%.*]] = call i32 asm sideeffect "
-; CHECK-NEXT: ret void
-;
- %def = call i32 asm sideeffect "; def $0", "=a"()
- ret void
-}
-
-define amdgpu_kernel void @kernel_uses_asm_physreg_def_tuple() {
-; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_asm_physreg_def_tuple(
-; CHECK-SAME: ) #[[ATTR0]] {
-; CHECK-NEXT: [[DEF:%.*]] = call i64 asm sideeffect "
-; CHECK-NEXT: ret void
-;
- %def = call i64 asm sideeffect "; def $0", "={a[0:1]}"()
- ret void
-}
-
-define amdgpu_kernel void @kernel_uses_asm_virtreg_second_arg() {
-; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_asm_virtreg_second_arg(
-; CHECK-SAME: ) #[[ATTR0]] {
-; CHECK-NEXT: call void asm sideeffect "
-; CHECK-NEXT: ret void
-;
- call void asm sideeffect "; use $0", "v,a"(i32 poison, i32 poison)
- ret void
-}
-
-define amdgpu_kernel void @kernel_uses_non_agpr_asm() {
-; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_non_agpr_asm(
-; CHECK-SAME: ) #[[ATTR1:[0-9]+]] {
-; CHECK-NEXT: call void asm sideeffect "
-; CHECK-NEXT: ret void
-;
- call void asm sideeffect "; use $0", "v"(i32 poison)
- ret void
-}
-
-define amdgpu_kernel void @kernel_uses_asm_physreg() {
-; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_asm_physreg(
-; CHECK-SAME: ) #[[ATTR0]] {
-; CHECK-NEXT: call void asm sideeffect "
-; CHECK-NEXT: ret void
-;
- call void asm sideeffect "; use $0", "{a0}"(i32 poison)
- ret void
-}
-
-define amdgpu_kernel void @kernel_uses_asm_physreg_tuple() {
-; CHECK-LABEL: define amdgpu_kernel void @kernel_uses_asm_physreg_tuple(
-; CHECK-SAME: ) #[[ATTR0]] {
-; CHECK-NEXT: call void asm sideeffect "
-; CHECK-NEXT: ret void
-;
- call void asm sideeffect "; use $0", "{a[0:1]}"(i64 poison)
- ret void
-}
-
-define void @func_uses_asm_virtreg_agpr() {
-; CHECK-LABEL: define void @func_uses_asm_virtreg_agpr(
-; CHECK-SAME: ) #[[ATTR0]] {
-; CHECK-NEXT: call void asm sideeffect "
-; CHECK-NEXT: ret void
-;
- call void asm sideeffect "; use $0", "a"(i32 poison)
- ret void
-}
-
-define void @func_uses_asm_physreg_agpr() {
-; CHECK-LABEL: define void @func_uses_asm_physreg_agpr(
-; CHECK-SAME: ) #[[ATTR0]] {
-; CHECK-NEXT: call void asm sideeffect "
-; CHECK-NEXT: ret void
-;
- call void asm sideeffect "; use $0", "{a0}"(i32 poison)
- ret void
-}
-
-define void @func_uses_asm_physreg_agpr_tuple() {
-; CHECK-LABEL: define void @func_uses_asm_physreg_agpr_tuple(
-; CHECK-SAME: ) #[[ATTR0]] {
-; CHECK-NEXT: call void asm sideeffect "
-; CHECK-NEXT: ret void
-;
- call void asm sideeffect "; use $0", "{a[0:1]}"(i64 poison)
- ret void
-}
-
-declare void @unknown()
-
-define amdgpu_kernel void @kernel_calls_extern() {
-; CHECK-LABEL: define amdgpu_kernel void @kernel_calls_extern(
-; CHECK-SAME: ) #[[ATTR2:[0-9]+]] {
-; CHECK-NEXT: call void @unknown()
-; CHECK-NEXT: ret void
-;
- call void @unknown()
- ret void
-}
-
-define amdgpu_kernel void @kernel_calls_extern_marked_callsite() {
-; CHECK-LABEL: define amdgpu_kernel void @kernel_calls_extern_marked_callsite(
-; CHECK-SAME: ) #[[ATTR2]] {
-; CHECK-NEXT: call void @unknown() #[[ATTR6:[0-9]+]]
-; CHECK-NEXT: ret void
-;
- call void @unknown() #0
- ret void
-}
-
-define amdgpu_kernel void @kernel_calls_indirect(ptr %indirect) {
-; CHECK-LABEL: define amdgpu_kernel void @kernel_calls_indirect(
-; CHECK-SAME: ptr [[INDIRECT:%.*]]) #[[ATTR2]] {
-; CHECK-NEXT: call void [[INDIRECT]]()
-; CHECK-NEXT: ret void
-;
- call void %indirect()
- ret void
-}
-
-define amdgpu_kernel void @kernel_calls_indirect_marked_callsite(ptr %indirect) {
-; CHECK-LABEL: define amdgpu_kernel void @kernel_calls_indirect_marked_callsite(
-; CHECK-SAME: ptr [[INDIRECT:%.*]]) #[[ATTR2]] {
-; CHECK-NEXT: call void [[INDIRECT]]() #[[ATTR6]]
-; CHECK-NEXT: ret void
-;
- call void %indirect() #0
- ret void
-}
-
-define amdgpu_kernel void @kernel_transitively_uses_agpr_asm() {
-; CHECK-LABEL: define amdgpu_kernel void @kernel_transitively_uses_agpr_asm(
-; CHECK-SAME: ) #[[ATTR0]] {
-; CHECK-NEXT: call void @func_uses_asm_physreg_agpr()
-; CHECK-NEXT: ret void
-;
- call void @func_uses_asm_physreg_agpr()
- ret void
-}
-
-define void @empty() {
-; CHECK-LABEL: define void @empty(
-; CHECK-SAME: ) #[[ATTR1]] {
-; CHECK-NEXT: ret void
-;
- ret void
-}
-
-define void @also_empty() {
-; CHECK-LABEL: define void @also_empty(
-; CHECK-SAME: ) #[[ATTR1]] {
-; CHECK-NEXT: ret void
-;
- ret void
-}
-
-define amdgpu_kernel void @kernel_calls_empty() {
-; CHECK-LABEL: define amdgpu_kernel void @kernel_calls_empty(
-; CHECK-SAME: ) #[[ATTR1]] {
-; CHECK-NEXT: call void @empty()
-; CHECK-NEXT: ret void
-;
- call void @empty()
- ret void
-}
-
-define amdgpu_kernel void @kernel_calls_non_agpr_and_agpr() {
-; CHECK-LABEL: define amdgpu_kernel void @kernel_calls_non_agpr_and_agpr(
-; CHECK-SAME: ) #[[ATTR0]] {
-; CHECK-NEXT: call void @empty()
-; CHECK-NEXT: call void @func_uses_asm_physreg_agpr()
-; CHECK-NEXT: ret void
-;
- call void @empty()
- call void @func_uses_asm_physreg_agpr()
- ret void
-}
-
-define amdgpu_kernel void @kernel_calls_generic_intrinsic(ptr %ptr0, ptr %ptr1, i64 %size) {
-; CHECK-LABEL: define amdgpu_kernel void @kernel_calls_generic_intrinsic(
-; CHECK-SAME: ptr [[PTR0:%.*]], ptr [[PTR1:%.*]], i64 [[SIZE:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr [[PTR0]], ptr [[PTR1]], i64 [[SIZE]], i1 false)
-; CHECK-NEXT: ret void
-;
- call void @llvm.memcpy.p0.p0.i64(ptr %ptr0, ptr %ptr1, i64 %size, i1 false)
- ret void
-}
-
-declare <32 x float> @llvm.amdgcn.mfma.f32.32x32x1f32(float, float, <32 x float>, i32 immarg, i32 immarg, i32 immarg)
-
-define amdgpu_kernel void @kernel_calls_mfma.f32.32x32x1f32(ptr addrspace(1) %out, float %a, float %b, <32 x float> %c) {
-; CHECK-LABEL: define amdgpu_kernel void @kernel_calls_mfma.f32.32x32x1f32(
-; CHECK-SAME: ptr addrspace(1) [[OUT:%.*]], float [[A:%.*]], float [[B:%.*]], <32 x float> [[C:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT: [[RESULT:%.*]] = call <32 x float> @llvm.amdgcn.mfma.f32.32x32x1f32(float [[A]], float [[B]], <32 x float> [[C]], i32 0, i32 0, i32 0)
-; CHECK-NEXT: store <32 x float> [[RESULT]], ptr addrspace(1) [[OUT]], align 128
-; CHECK-NEXT: ret void
-;
- %result = call <32 x float> @llvm.amdgcn.mfma.f32.32x32x1f32(float %a, float %b, <32 x float> %c, i32 0, i32 0, i32 0)
- store <32 x float> %result, ptr addrspace(1) %out
- ret void
-}
-
-define amdgpu_kernel void @kernel_calls_workitem_id_x(ptr addrspace(1) %out) {
-; CHECK-LABEL: define amdgpu_kernel void @kernel_calls_workitem_id_x(
-; CHECK-SAME: ptr addrspace(1) [[OUT:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT: [[RESULT:%.*]] = call i32 @llvm.amdgcn.workitem.id.x()
-; CHECK-NEXT: store i32 [[RESULT]], ptr addrspace(1) [[OUT]], align 4
-; CHECK-NEXT: ret void
-;
- %result = call i32 @llvm.amdgcn.workitem.id.x()
- store i32 %result, ptr addrspace(1) %out
- ret void
-}
-
-define amdgpu_kernel void @indirect_calls_none_agpr(i1 %cond) {
-; CHECK-LABEL: define amdgpu_kernel void @indirect_calls_none_agpr(
-; CHECK-SAME: i1 [[COND:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT: [[FPTR:%.*]] = select i1 [[COND]], ptr @empty, ptr @also_empty
-; CHECK-NEXT: [[TMP1:%.*]] = icmp eq ptr [[FPTR]], @also_empty
-; CHECK-NEXT: br i1 [[TMP1]], label [[TMP2:%.*]], label [[TMP3:%.*]]
-; CHECK: 2:
-; CHECK-NEXT: call void @also_empty()
-; CHECK-NEXT: br label [[TMP6:%.*]]
-; CHECK: 3:
-; CHECK-NEXT: br i1 true, label [[TMP4:%.*]], label [[TMP5:%.*]]
-; CHECK: 4:
-; CHECK-NEXT: call void @empty()
-; CHECK-NEXT: br label [[TMP6]]
-; CHECK: 5:
-; CHECK-NEXT: unreachable
-; CHECK: 6:
-; CHECK-NEXT: ret void
-;
- %fptr = select i1 %cond, ptr @empty, ptr @also_empty
- call void %fptr()
- ret void
-}
-
-
-attributes #0 = { "amdgpu-agpr-alloc"="0" }
-;.
-; CHECK: attributes #[[ATTR0]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx90a" "uniform-work-group-size"="false" }
-; CHECK: attributes #[[ATTR1]] = { "amdgpu-agpr-alloc"="0" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx90a" "uniform-work-group-size"="false" }
-; CHECK: attributes #[[ATTR2:[0-9]+]] = { "target-cpu"="gfx90a" "uniform-work-group-size"="false" }
-; CHECK: attributes #[[ATTR3:[0-9]+]] = { convergent nocallback nofree nosync nounwind willreturn memory(none) "target-cpu"="gfx90a" }
-; CHECK: attributes #[[ATTR4:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) "target-cpu"="gfx90a" }
-; CHECK: attributes #[[ATTR5:[0-9]+]] = { nocallback nofree nounwind willreturn memory(argmem: readwrite) "target-cpu"="gfx90a" }
-; CHECK: attributes #[[ATTR6]] = { "amdgpu-agpr-alloc"="0" }
-;.
diff --git a/llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa-call.ll b/llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa-call.ll
index fb566e5..9283bd5 100644
--- a/llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa-call.ll
+++ b/llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa-call.ll
@@ -691,29 +691,29 @@ attributes #6 = { "enqueued-block" }
;.
; ATTRIBUTOR_HSA: attributes #[[ATTR0:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
-; ATTRIBUTOR_HSA: attributes #[[ATTR1]] = { nounwind "amdgpu-agpr-alloc"="0" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="fiji" "uniform-work-group-size"="false" }
-; ATTRIBUTOR_HSA: attributes #[[ATTR2]] = { nounwind "amdgpu-agpr-alloc"="0" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-z" "target-cpu"="fiji" "uniform-work-group-size"="false" }
-; ATTRIBUTOR_HSA: attributes #[[ATTR3]] = { nounwind "amdgpu-agpr-alloc"="0" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "target-cpu"="fiji" "uniform-work-group-size"="false" }
-; ATTRIBUTOR_HSA: attributes #[[ATTR4]] = { nounwind "amdgpu-agpr-alloc"="0" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="fiji" "uniform-work-group-size"="false" }
-; ATTRIBUTOR_HSA: attributes #[[ATTR5]] = { nounwind "amdgpu-agpr-alloc"="0" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="fiji" "uniform-work-group-size"="false" }
-; ATTRIBUTOR_HSA: attributes #[[ATTR6]] = { nounwind "amdgpu-agpr-alloc"="0" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="fiji" "uniform-work-group-size"="false" }
-; ATTRIBUTOR_HSA: attributes #[[ATTR7]] = { nounwind "amdgpu-agpr-alloc"="0" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="fiji" "uniform-work-group-size"="false" }
-; ATTRIBUTOR_HSA: attributes #[[ATTR8]] = { nounwind "amdgpu-agpr-alloc"="0" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="fiji" "uniform-work-group-size"="false" }
-; ATTRIBUTOR_HSA: attributes #[[ATTR9]] = { nounwind "amdgpu-agpr-alloc"="0" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="fiji" "uniform-work-group-size"="false" }
-; ATTRIBUTOR_HSA: attributes #[[ATTR10]] = { nounwind "amdgpu-agpr-alloc"="0" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="fiji" "uniform-work-group-size"="false" }
-; ATTRIBUTOR_HSA: attributes #[[ATTR11]] = { nounwind "amdgpu-agpr-alloc"="0" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="fiji" "uniform-work-group-size"="false" }
-; ATTRIBUTOR_HSA: attributes #[[ATTR12]] = { nounwind "amdgpu-agpr-alloc"="0" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="fiji" "uniform-work-group-size"="false" }
-; ATTRIBUTOR_HSA: attributes #[[ATTR13]] = { nounwind "amdgpu-agpr-alloc"="0" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx900" "uniform-work-group-size"="false" }
-; ATTRIBUTOR_HSA: attributes #[[ATTR14]] = { nounwind "amdgpu-agpr-alloc"="0" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx900" "uniform-work-group-size"="false" }
-; ATTRIBUTOR_HSA: attributes #[[ATTR15:[0-9]+]] = { nounwind "uniform-work-group-size"="false" }
-; ATTRIBUTOR_HSA: attributes #[[ATTR16]] = { nounwind "amdgpu-agpr-alloc"="0" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
-; ATTRIBUTOR_HSA: attributes #[[ATTR17]] = { nounwind sanitize_address "amdgpu-agpr-alloc"="0" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
-; ATTRIBUTOR_HSA: attributes #[[ATTR18]] = { nounwind "amdgpu-agpr-alloc"="0" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
+; ATTRIBUTOR_HSA: attributes #[[ATTR1]] = { nounwind "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="fiji" "uniform-work-group-size"="false" }
+; ATTRIBUTOR_HSA: attributes #[[ATTR2]] = { nounwind "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-z" "target-cpu"="fiji" "uniform-work-group-size"="false" }
+; ATTRIBUTOR_HSA: attributes #[[ATTR3]] = { nounwind "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "target-cpu"="fiji" "uniform-work-group-size"="false" }
+; ATTRIBUTOR_HSA: attributes #[[ATTR4]] = { nounwind "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="fiji" "uniform-work-group-size"="false" }
+; ATTRIBUTOR_HSA: attributes #[[ATTR5]] = { nounwind "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="fiji" "uniform-work-group-size"="false" }
+; ATTRIBUTOR_HSA: attributes #[[ATTR6]] = { nounwind "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="fiji" "uniform-work-group-size"="false" }
+; ATTRIBUTOR_HSA: attributes #[[ATTR7]] = { nounwind "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="fiji" "uniform-work-group-size"="false" }
+; ATTRIBUTOR_HSA: attributes #[[ATTR8]] = { nounwind "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="fiji" "uniform-work-group-size"="false" }
+; ATTRIBUTOR_HSA: attributes #[[ATTR9]] = { nounwind "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="fiji" "uniform-work-group-size"="false" }
+; ATTRIBUTOR_HSA: attributes #[[ATTR10]] = { nounwind "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="fiji" "uniform-work-group-size"="false" }
+; ATTRIBUTOR_HSA: attributes #[[ATTR11]] = { nounwind "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="fiji" "uniform-work-group-size"="false" }
+; ATTRIBUTOR_HSA: attributes #[[ATTR12]] = { nounwind "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="fiji" "uniform-work-group-size"="false" }
+; ATTRIBUTOR_HSA: attributes #[[ATTR13]] = { nounwind "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx900" "uniform-work-group-size"="false" }
+; ATTRIBUTOR_HSA: attributes #[[ATTR14]] = { nounwind "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx900" "uniform-work-group-size"="false" }
+; ATTRIBUTOR_HSA: attributes #[[ATTR15]] = { nounwind "uniform-work-group-size"="false" }
+; ATTRIBUTOR_HSA: attributes #[[ATTR16]] = { nounwind "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
+; ATTRIBUTOR_HSA: attributes #[[ATTR17]] = { nounwind sanitize_address "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
+; ATTRIBUTOR_HSA: attributes #[[ATTR18]] = { nounwind "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
; ATTRIBUTOR_HSA: attributes #[[ATTR19:[0-9]+]] = { nounwind sanitize_address "amdgpu-no-implicitarg-ptr" "uniform-work-group-size"="false" }
; ATTRIBUTOR_HSA: attributes #[[ATTR20:[0-9]+]] = { "enqueued-block" "uniform-work-group-size"="false" }
-; ATTRIBUTOR_HSA: attributes #[[ATTR21]] = { "amdgpu-agpr-alloc"="0" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "enqueued-block" "uniform-work-group-size"="false" }
+; ATTRIBUTOR_HSA: attributes #[[ATTR21]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "enqueued-block" "uniform-work-group-size"="false" }
; ATTRIBUTOR_HSA: attributes #[[ATTR22]] = { "uniform-work-group-size"="false" }
-; ATTRIBUTOR_HSA: attributes #[[ATTR23]] = { "amdgpu-agpr-alloc"="0" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
+; ATTRIBUTOR_HSA: attributes #[[ATTR23]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
; ATTRIBUTOR_HSA: attributes #[[ATTR24]] = { nounwind }
; ATTRIBUTOR_HSA: attributes #[[ATTR25]] = { "enqueued-block" }
;.
diff --git a/llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa.ll b/llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa.ll
index 484ff77..8554485 100644
--- a/llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa.ll
+++ b/llvm/test/CodeGen/AMDGPU/annotate-kernel-features-hsa.ll
@@ -474,19 +474,19 @@ attributes #1 = { nounwind }
; AKF_HSA: [[META0:![0-9]+]] = !{i32 1, !"amdhsa_code_object_version", i32 500}
;.
; HSA: attributes #[[ATTR0:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
-; HSA: attributes #[[ATTR1]] = { nounwind "amdgpu-agpr-alloc"="0" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
-; HSA: attributes #[[ATTR2]] = { nounwind "amdgpu-agpr-alloc"="0" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
-; HSA: attributes #[[ATTR3]] = { nounwind "amdgpu-agpr-alloc"="0" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
-; HSA: attributes #[[ATTR4]] = { nounwind "amdgpu-agpr-alloc"="0" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
-; HSA: attributes #[[ATTR5]] = { nounwind "amdgpu-agpr-alloc"="0" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
-; HSA: attributes #[[ATTR6]] = { nounwind "amdgpu-agpr-alloc"="0" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "uniform-work-group-size"="false" }
-; HSA: attributes #[[ATTR7]] = { nounwind "amdgpu-agpr-alloc"="0" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
-; HSA: attributes #[[ATTR8]] = { nounwind "amdgpu-agpr-alloc"="0" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "uniform-work-group-size"="false" }
-; HSA: attributes #[[ATTR9]] = { nounwind "amdgpu-agpr-alloc"="0" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workitem-id-x" "uniform-work-group-size"="false" }
-; HSA: attributes #[[ATTR10]] = { nounwind "amdgpu-agpr-alloc"="0" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
-; HSA: attributes #[[ATTR11]] = { nounwind "amdgpu-agpr-alloc"="0" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
-; HSA: attributes #[[ATTR12]] = { nounwind "amdgpu-agpr-alloc"="0" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
-; HSA: attributes #[[ATTR13]] = { nounwind "amdgpu-agpr-alloc"="0" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
+; HSA: attributes #[[ATTR1]] = { nounwind "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
+; HSA: attributes #[[ATTR2]] = { nounwind "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
+; HSA: attributes #[[ATTR3]] = { nounwind "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
+; HSA: attributes #[[ATTR4]] = { nounwind "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
+; HSA: attributes #[[ATTR5]] = { nounwind "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
+; HSA: attributes #[[ATTR6]] = { nounwind "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "uniform-work-group-size"="false" }
+; HSA: attributes #[[ATTR7]] = { nounwind "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
+; HSA: attributes #[[ATTR8]] = { nounwind "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "uniform-work-group-size"="false" }
+; HSA: attributes #[[ATTR9]] = { nounwind "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workitem-id-x" "uniform-work-group-size"="false" }
+; HSA: attributes #[[ATTR10]] = { nounwind "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
+; HSA: attributes #[[ATTR11]] = { nounwind "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
+; HSA: attributes #[[ATTR12]] = { nounwind "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
+; HSA: attributes #[[ATTR13]] = { nounwind "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
;.
; HSA: [[META0]] = !{i32 1, i32 3, i32 4, i32 10}
; HSA: [[META1]] = !{i32 1, i32 5, i32 6, i32 10}
diff --git a/llvm/test/CodeGen/AMDGPU/annotate-kernel-features.ll b/llvm/test/CodeGen/AMDGPU/annotate-kernel-features.ll
index 2efe024..e2a2deb 100644
--- a/llvm/test/CodeGen/AMDGPU/annotate-kernel-features.ll
+++ b/llvm/test/CodeGen/AMDGPU/annotate-kernel-features.ll
@@ -294,13 +294,13 @@ attributes #1 = { nounwind }
;.
; CHECK: attributes #[[ATTR0:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
-; CHECK: attributes #[[ATTR1]] = { nounwind "amdgpu-agpr-alloc"="0" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
-; CHECK: attributes #[[ATTR2]] = { nounwind "amdgpu-agpr-alloc"="0" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
-; CHECK: attributes #[[ATTR3]] = { nounwind "amdgpu-agpr-alloc"="0" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
-; CHECK: attributes #[[ATTR4]] = { nounwind "amdgpu-agpr-alloc"="0" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
-; CHECK: attributes #[[ATTR5]] = { nounwind "amdgpu-agpr-alloc"="0" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
-; CHECK: attributes #[[ATTR6]] = { nounwind "amdgpu-agpr-alloc"="0" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "uniform-work-group-size"="false" }
-; CHECK: attributes #[[ATTR7]] = { nounwind "amdgpu-agpr-alloc"="0" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
-; CHECK: attributes #[[ATTR8]] = { nounwind "amdgpu-agpr-alloc"="0" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "uniform-work-group-size"="false" }
-; CHECK: attributes #[[ATTR9]] = { nounwind "amdgpu-agpr-alloc"="0" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workitem-id-x" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR1]] = { nounwind "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR2]] = { nounwind "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR3]] = { nounwind "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR4]] = { nounwind "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR5]] = { nounwind "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR6]] = { nounwind "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR7]] = { nounwind "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR8]] = { nounwind "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR9]] = { nounwind "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workitem-id-x" "uniform-work-group-size"="false" }
;.
diff --git a/llvm/test/CodeGen/AMDGPU/atomic_load_local.ll b/llvm/test/CodeGen/AMDGPU/atomic_load_local.ll
index aaedb85..e67d7fdb 100644
--- a/llvm/test/CodeGen/AMDGPU/atomic_load_local.ll
+++ b/llvm/test/CodeGen/AMDGPU/atomic_load_local.ll
@@ -3,6 +3,8 @@
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GFX9 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -mattr=+real-true16 < %s | FileCheck -check-prefixes=GFX11,GFX11-TRUE16 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -mattr=-real-true16 < %s | FileCheck -check-prefixes=GFX11,GFX11-FAKE16 %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1250 -mattr=+real-true16 < %s | FileCheck -check-prefixes=GFX1250,GFX1250-TRUE16 %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1250 -mattr=-real-true16 < %s | FileCheck -check-prefixes=GFX1250,GFX1250-FAKE16 %s
define i8 @atomic_load_monotonic_i8(ptr addrspace(3) %ptr) {
; CI-LABEL: atomic_load_monotonic_i8:
@@ -33,6 +35,14 @@ define i8 @atomic_load_monotonic_i8(ptr addrspace(3) %ptr) {
; GFX11-FAKE16-NEXT: ds_load_u8 v0, v0
; GFX11-FAKE16-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1250-LABEL: atomic_load_monotonic_i8:
+; GFX1250: ; %bb.0:
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: ds_load_u8 v0, v0
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: s_set_pc_i64 s[30:31]
%load = load atomic i8, ptr addrspace(3) %ptr monotonic, align 1
ret i8 %load
}
@@ -66,6 +76,14 @@ define i8 @atomic_load_monotonic_i8_offset(ptr addrspace(3) %ptr) {
; GFX11-FAKE16-NEXT: ds_load_u8 v0, v0 offset:16
; GFX11-FAKE16-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1250-LABEL: atomic_load_monotonic_i8_offset:
+; GFX1250: ; %bb.0:
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: ds_load_u8 v0, v0 offset:16
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: s_set_pc_i64 s[30:31]
%gep = getelementptr inbounds i8, ptr addrspace(3) %ptr, i8 16
%load = load atomic i8, ptr addrspace(3) %gep monotonic, align 1
ret i8 %load
@@ -100,6 +118,14 @@ define i16 @atomic_load_monotonic_i16(ptr addrspace(3) %ptr) {
; GFX11-FAKE16-NEXT: ds_load_u16 v0, v0
; GFX11-FAKE16-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1250-LABEL: atomic_load_monotonic_i16:
+; GFX1250: ; %bb.0:
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: ds_load_u16 v0, v0
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: s_set_pc_i64 s[30:31]
%load = load atomic i16, ptr addrspace(3) %ptr monotonic, align 2
ret i16 %load
}
@@ -133,6 +159,14 @@ define i16 @atomic_load_monotonic_i16_offset(ptr addrspace(3) %ptr) {
; GFX11-FAKE16-NEXT: ds_load_u16 v0, v0 offset:32
; GFX11-FAKE16-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1250-LABEL: atomic_load_monotonic_i16_offset:
+; GFX1250: ; %bb.0:
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: ds_load_u16 v0, v0 offset:32
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: s_set_pc_i64 s[30:31]
%gep = getelementptr inbounds i16, ptr addrspace(3) %ptr, i16 16
%load = load atomic i16, ptr addrspace(3) %gep monotonic, align 2
ret i16 %load
@@ -160,6 +194,14 @@ define i32 @atomic_load_monotonic_i32(ptr addrspace(3) %ptr) {
; GFX11-NEXT: ds_load_b32 v0, v0
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1250-LABEL: atomic_load_monotonic_i32:
+; GFX1250: ; %bb.0:
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: ds_load_b32 v0, v0
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: s_set_pc_i64 s[30:31]
%load = load atomic i32, ptr addrspace(3) %ptr monotonic, align 4
ret i32 %load
}
@@ -186,6 +228,14 @@ define i32 @atomic_load_monotonic_i32_offset(ptr addrspace(3) %ptr) {
; GFX11-NEXT: ds_load_b32 v0, v0 offset:64
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1250-LABEL: atomic_load_monotonic_i32_offset:
+; GFX1250: ; %bb.0:
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: ds_load_b32 v0, v0 offset:64
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: s_set_pc_i64 s[30:31]
%gep = getelementptr inbounds i32, ptr addrspace(3) %ptr, i32 16
%load = load atomic i32, ptr addrspace(3) %gep monotonic, align 4
ret i32 %load
@@ -213,6 +263,14 @@ define i64 @atomic_load_monotonic_i64(ptr addrspace(3) %ptr) {
; GFX11-NEXT: ds_load_b64 v[0:1], v0
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1250-LABEL: atomic_load_monotonic_i64:
+; GFX1250: ; %bb.0:
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: ds_load_b64 v[0:1], v0
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: s_set_pc_i64 s[30:31]
%load = load atomic i64, ptr addrspace(3) %ptr monotonic, align 8
ret i64 %load
}
@@ -239,6 +297,14 @@ define i64 @atomic_load_monotonic_i64_offset(ptr addrspace(3) %ptr) {
; GFX11-NEXT: ds_load_b64 v[0:1], v0 offset:128
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1250-LABEL: atomic_load_monotonic_i64_offset:
+; GFX1250: ; %bb.0:
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: ds_load_b64 v[0:1], v0 offset:128
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: s_set_pc_i64 s[30:31]
%gep = getelementptr inbounds i64, ptr addrspace(3) %ptr, i32 16
%load = load atomic i64, ptr addrspace(3) %gep monotonic, align 8
ret i64 %load
@@ -266,6 +332,14 @@ define float @atomic_load_monotonic_f32_offset(ptr addrspace(3) %ptr) {
; GFX11-NEXT: ds_load_b32 v0, v0 offset:64
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1250-LABEL: atomic_load_monotonic_f32_offset:
+; GFX1250: ; %bb.0:
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: ds_load_b32 v0, v0 offset:64
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: s_set_pc_i64 s[30:31]
%gep = getelementptr inbounds float, ptr addrspace(3) %ptr, i32 16
%load = load atomic float, ptr addrspace(3) %gep monotonic, align 4
ret float %load
@@ -293,6 +367,14 @@ define double @atomic_load_monotonic_f64_offset(ptr addrspace(3) %ptr) {
; GFX11-NEXT: ds_load_b64 v[0:1], v0 offset:128
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1250-LABEL: atomic_load_monotonic_f64_offset:
+; GFX1250: ; %bb.0:
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: ds_load_b64 v[0:1], v0 offset:128
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: s_set_pc_i64 s[30:31]
%gep = getelementptr inbounds double, ptr addrspace(3) %ptr, i32 16
%load = load atomic double, ptr addrspace(3) %gep monotonic, align 8
ret double %load
@@ -320,6 +402,14 @@ define ptr @atomic_load_monotonic_p0i8_offset(ptr addrspace(3) %ptr) {
; GFX11-NEXT: ds_load_b64 v[0:1], v0 offset:128
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1250-LABEL: atomic_load_monotonic_p0i8_offset:
+; GFX1250: ; %bb.0:
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: ds_load_b64 v[0:1], v0 offset:128
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: s_set_pc_i64 s[30:31]
%gep = getelementptr inbounds ptr, ptr addrspace(3) %ptr, i32 16
%load = load atomic ptr, ptr addrspace(3) %gep monotonic, align 8
ret ptr %load
@@ -347,6 +437,14 @@ define ptr addrspace(3) @atomic_load_monotonic_p3i8_offset(ptr addrspace(3) %ptr
; GFX11-NEXT: ds_load_b32 v0, v0 offset:64
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1250-LABEL: atomic_load_monotonic_p3i8_offset:
+; GFX1250: ; %bb.0:
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: ds_load_b32 v0, v0 offset:64
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: s_set_pc_i64 s[30:31]
%gep = getelementptr inbounds ptr addrspace(3), ptr addrspace(3) %ptr, i32 16
%load = load atomic ptr addrspace(3), ptr addrspace(3) %gep monotonic, align 4
ret ptr addrspace(3) %load
@@ -381,6 +479,14 @@ define i16 @atomic_load_monotonic_f16(ptr addrspace(3) %ptr) {
; GFX11-FAKE16-NEXT: ds_load_u16 v0, v0
; GFX11-FAKE16-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1250-LABEL: atomic_load_monotonic_f16:
+; GFX1250: ; %bb.0:
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: ds_load_u16 v0, v0
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: s_set_pc_i64 s[30:31]
%load = load atomic half, ptr addrspace(3) %ptr monotonic, align 2
%ret = bitcast half %load to i16
ret i16 %ret
@@ -415,6 +521,14 @@ define i16 @atomic_load_monotonic_f16_offset(ptr addrspace(3) %ptr) {
; GFX11-FAKE16-NEXT: ds_load_u16 v0, v0 offset:32
; GFX11-FAKE16-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1250-LABEL: atomic_load_monotonic_f16_offset:
+; GFX1250: ; %bb.0:
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: ds_load_u16 v0, v0 offset:32
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: s_set_pc_i64 s[30:31]
%gep = getelementptr inbounds half, ptr addrspace(3) %ptr, i32 16
%load = load atomic half, ptr addrspace(3) %gep monotonic, align 2
%ret = bitcast half %load to i16
@@ -450,6 +564,14 @@ define i16 @atomic_load_monotonic_bf16(ptr addrspace(3) %ptr) {
; GFX11-FAKE16-NEXT: ds_load_u16 v0, v0
; GFX11-FAKE16-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1250-LABEL: atomic_load_monotonic_bf16:
+; GFX1250: ; %bb.0:
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: ds_load_u16 v0, v0
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: s_set_pc_i64 s[30:31]
%load = load atomic bfloat, ptr addrspace(3) %ptr monotonic, align 2
%ret = bitcast bfloat %load to i16
ret i16 %ret
@@ -484,6 +606,14 @@ define i16 @atomic_load_monotonic_bf16_offset(ptr addrspace(3) %ptr) {
; GFX11-FAKE16-NEXT: ds_load_u16 v0, v0 offset:32
; GFX11-FAKE16-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1250-LABEL: atomic_load_monotonic_bf16_offset:
+; GFX1250: ; %bb.0:
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: ds_load_u16 v0, v0 offset:32
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: s_set_pc_i64 s[30:31]
%gep = getelementptr inbounds bfloat, ptr addrspace(3) %ptr, i32 16
%load = load atomic bfloat, ptr addrspace(3) %gep monotonic, align 2
%ret = bitcast bfloat %load to i16
@@ -491,3 +621,5 @@ define i16 @atomic_load_monotonic_bf16_offset(ptr addrspace(3) %ptr) {
}
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
; GCN: {{.*}}
+; GFX1250-FAKE16: {{.*}}
+; GFX1250-TRUE16: {{.*}}
diff --git a/llvm/test/CodeGen/AMDGPU/atomic_store_local.ll b/llvm/test/CodeGen/AMDGPU/atomic_store_local.ll
index c2bb4f00..31065f2 100644
--- a/llvm/test/CodeGen/AMDGPU/atomic_store_local.ll
+++ b/llvm/test/CodeGen/AMDGPU/atomic_store_local.ll
@@ -3,6 +3,8 @@
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GFX9 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -mattr=+real-true16 < %s | FileCheck -check-prefixes=GFX11,GFX11-TRUE16 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -mattr=-real-true16 < %s | FileCheck -check-prefixes=GFX11,GFX11-FAKE16 %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1250 -mattr=+real-true16 < %s | FileCheck -check-prefixes=GFX1250,GFX1250-TRUE16 %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1250 -mattr=-real-true16 < %s | FileCheck -check-prefixes=GFX1250,GFX1250-FAKE16 %s
define void @atomic_store_monotonic_i8(ptr addrspace(3) %ptr, i8 %val) {
; CI-LABEL: atomic_store_monotonic_i8:
@@ -41,6 +43,26 @@ define void @atomic_store_monotonic_i8(ptr addrspace(3) %ptr, i8 %val) {
; GFX11-FAKE16-NEXT: ds_store_b8 v0, v2
; GFX11-FAKE16-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1250-TRUE16-LABEL: atomic_store_monotonic_i8:
+; GFX1250-TRUE16: ; %bb.0:
+; GFX1250-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-TRUE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250-TRUE16-NEXT: v_add_nc_u16 v1.h, v1.l, 2
+; GFX1250-TRUE16-NEXT: ds_store_b8 v0, v1
+; GFX1250-TRUE16-NEXT: ds_store_b8_d16_hi v0, v1
+; GFX1250-TRUE16-NEXT: s_wait_dscnt 0x0
+; GFX1250-TRUE16-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX1250-FAKE16-LABEL: atomic_store_monotonic_i8:
+; GFX1250-FAKE16: ; %bb.0:
+; GFX1250-FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-FAKE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250-FAKE16-NEXT: v_add_nc_u16 v2, v1, 2
+; GFX1250-FAKE16-NEXT: ds_store_b8 v0, v1
+; GFX1250-FAKE16-NEXT: ds_store_b8 v0, v2
+; GFX1250-FAKE16-NEXT: s_wait_dscnt 0x0
+; GFX1250-FAKE16-NEXT: s_set_pc_i64 s[30:31]
%val1 = add i8 %val, 2
store atomic i8 %val, ptr addrspace(3) %ptr monotonic, align 1
store atomic i8 %val1, ptr addrspace(3) %ptr monotonic, align 1
@@ -84,6 +106,26 @@ define void @atomic_store_monotonic_offset_i8(ptr addrspace(3) %ptr, i8 %val) {
; GFX11-FAKE16-NEXT: ds_store_b8 v0, v2 offset:16
; GFX11-FAKE16-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1250-TRUE16-LABEL: atomic_store_monotonic_offset_i8:
+; GFX1250-TRUE16: ; %bb.0:
+; GFX1250-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-TRUE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250-TRUE16-NEXT: v_add_nc_u16 v1.h, v1.l, 2
+; GFX1250-TRUE16-NEXT: ds_store_b8 v0, v1 offset:8
+; GFX1250-TRUE16-NEXT: ds_store_b8_d16_hi v0, v1 offset:16
+; GFX1250-TRUE16-NEXT: s_wait_dscnt 0x0
+; GFX1250-TRUE16-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX1250-FAKE16-LABEL: atomic_store_monotonic_offset_i8:
+; GFX1250-FAKE16: ; %bb.0:
+; GFX1250-FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-FAKE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250-FAKE16-NEXT: v_add_nc_u16 v2, v1, 2
+; GFX1250-FAKE16-NEXT: ds_store_b8 v0, v1 offset:8
+; GFX1250-FAKE16-NEXT: ds_store_b8 v0, v2 offset:16
+; GFX1250-FAKE16-NEXT: s_wait_dscnt 0x0
+; GFX1250-FAKE16-NEXT: s_set_pc_i64 s[30:31]
%val1 = add i8 %val, 2
%gep_1 = getelementptr inbounds i8, ptr addrspace(3) %ptr, i8 8
%gep_2 = getelementptr inbounds i8, ptr addrspace(3) %ptr, i8 16
@@ -129,6 +171,26 @@ define void @atomic_store_monotonic_i16(ptr addrspace(3) %ptr, i16 %val) {
; GFX11-FAKE16-NEXT: ds_store_b16 v0, v2
; GFX11-FAKE16-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1250-TRUE16-LABEL: atomic_store_monotonic_i16:
+; GFX1250-TRUE16: ; %bb.0:
+; GFX1250-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-TRUE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250-TRUE16-NEXT: v_add_nc_u16 v1.h, v1.l, 2
+; GFX1250-TRUE16-NEXT: ds_store_b16 v0, v1
+; GFX1250-TRUE16-NEXT: ds_store_b16_d16_hi v0, v1
+; GFX1250-TRUE16-NEXT: s_wait_dscnt 0x0
+; GFX1250-TRUE16-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX1250-FAKE16-LABEL: atomic_store_monotonic_i16:
+; GFX1250-FAKE16: ; %bb.0:
+; GFX1250-FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-FAKE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250-FAKE16-NEXT: v_add_nc_u16 v2, v1, 2
+; GFX1250-FAKE16-NEXT: ds_store_b16 v0, v1
+; GFX1250-FAKE16-NEXT: ds_store_b16 v0, v2
+; GFX1250-FAKE16-NEXT: s_wait_dscnt 0x0
+; GFX1250-FAKE16-NEXT: s_set_pc_i64 s[30:31]
%val1 = add i16 %val, 2
store atomic i16 %val, ptr addrspace(3) %ptr monotonic, align 2
store atomic i16 %val1, ptr addrspace(3) %ptr monotonic, align 2
@@ -172,6 +234,26 @@ define void @atomic_store_monotonic_offset_i16(ptr addrspace(3) %ptr, i16 %val)
; GFX11-FAKE16-NEXT: ds_store_b16 v0, v2 offset:32
; GFX11-FAKE16-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1250-TRUE16-LABEL: atomic_store_monotonic_offset_i16:
+; GFX1250-TRUE16: ; %bb.0:
+; GFX1250-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-TRUE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250-TRUE16-NEXT: v_add_nc_u16 v1.h, v1.l, 2
+; GFX1250-TRUE16-NEXT: ds_store_b16 v0, v1 offset:32
+; GFX1250-TRUE16-NEXT: ds_store_b16_d16_hi v0, v1 offset:32
+; GFX1250-TRUE16-NEXT: s_wait_dscnt 0x0
+; GFX1250-TRUE16-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX1250-FAKE16-LABEL: atomic_store_monotonic_offset_i16:
+; GFX1250-FAKE16: ; %bb.0:
+; GFX1250-FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-FAKE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250-FAKE16-NEXT: v_add_nc_u16 v2, v1, 2
+; GFX1250-FAKE16-NEXT: ds_store_b16 v0, v1 offset:32
+; GFX1250-FAKE16-NEXT: ds_store_b16 v0, v2 offset:32
+; GFX1250-FAKE16-NEXT: s_wait_dscnt 0x0
+; GFX1250-FAKE16-NEXT: s_set_pc_i64 s[30:31]
%val1 = add i16 %val, 2
%gep = getelementptr inbounds i16, ptr addrspace(3) %ptr, i16 16
store atomic i16 %val, ptr addrspace(3) %gep monotonic, align 2
@@ -201,6 +283,14 @@ define void @atomic_store_monotonic_i32(ptr addrspace(3) %ptr, i32 %val) {
; GFX11-NEXT: ds_store_b32 v0, v1
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1250-LABEL: atomic_store_monotonic_i32:
+; GFX1250: ; %bb.0:
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: ds_store_b32 v0, v1
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: s_set_pc_i64 s[30:31]
store atomic i32 %val, ptr addrspace(3) %ptr monotonic, align 4
ret void
}
@@ -227,6 +317,14 @@ define void @atomic_store_monotonic_offset_i32(ptr addrspace(3) %ptr, i32 %val)
; GFX11-NEXT: ds_store_b32 v0, v1 offset:64
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1250-LABEL: atomic_store_monotonic_offset_i32:
+; GFX1250: ; %bb.0:
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: ds_store_b32 v0, v1 offset:64
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: s_set_pc_i64 s[30:31]
%gep = getelementptr inbounds i32, ptr addrspace(3) %ptr, i32 16
store atomic i32 %val, ptr addrspace(3) %gep monotonic, align 4
ret void
@@ -254,6 +352,15 @@ define void @atomic_store_monotonic_i64(ptr addrspace(3) %ptr, i64 %val) {
; GFX11-NEXT: ds_store_b64 v0, v[1:2]
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1250-LABEL: atomic_store_monotonic_i64:
+; GFX1250: ; %bb.0:
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_dual_mov_b32 v3, v2 :: v_dual_mov_b32 v2, v1
+; GFX1250-NEXT: ds_store_b64 v0, v[2:3]
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: s_set_pc_i64 s[30:31]
store atomic i64 %val, ptr addrspace(3) %ptr monotonic, align 8
ret void
}
@@ -280,6 +387,15 @@ define void @atomic_store_monotonic_offset_i64(ptr addrspace(3) %ptr, i64 %val)
; GFX11-NEXT: ds_store_b64 v0, v[1:2] offset:128
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1250-LABEL: atomic_store_monotonic_offset_i64:
+; GFX1250: ; %bb.0:
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_dual_mov_b32 v3, v2 :: v_dual_mov_b32 v2, v1
+; GFX1250-NEXT: ds_store_b64 v0, v[2:3] offset:128
+; GFX1250-NEXT: s_wait_dscnt 0x0
+; GFX1250-NEXT: s_set_pc_i64 s[30:31]
%gep = getelementptr inbounds i64, ptr addrspace(3) %ptr, i64 16
store atomic i64 %val, ptr addrspace(3) %gep monotonic, align 8
ret void
@@ -322,6 +438,26 @@ define void @atomic_store_monotonic_f16(ptr addrspace(3) %ptr, i16 %arg.val) {
; GFX11-FAKE16-NEXT: ds_store_b16 v0, v2
; GFX11-FAKE16-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1250-TRUE16-LABEL: atomic_store_monotonic_f16:
+; GFX1250-TRUE16: ; %bb.0:
+; GFX1250-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-TRUE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250-TRUE16-NEXT: v_add_nc_u16 v1.h, v1.l, 2
+; GFX1250-TRUE16-NEXT: ds_store_b16 v0, v1
+; GFX1250-TRUE16-NEXT: ds_store_b16_d16_hi v0, v1
+; GFX1250-TRUE16-NEXT: s_wait_dscnt 0x0
+; GFX1250-TRUE16-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX1250-FAKE16-LABEL: atomic_store_monotonic_f16:
+; GFX1250-FAKE16: ; %bb.0:
+; GFX1250-FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-FAKE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250-FAKE16-NEXT: v_add_nc_u16 v2, v1, 2
+; GFX1250-FAKE16-NEXT: ds_store_b16 v0, v1
+; GFX1250-FAKE16-NEXT: ds_store_b16 v0, v2
+; GFX1250-FAKE16-NEXT: s_wait_dscnt 0x0
+; GFX1250-FAKE16-NEXT: s_set_pc_i64 s[30:31]
%arg.val1 = add i16 %arg.val, 2
%val = bitcast i16 %arg.val to half
%val1 = bitcast i16 %arg.val1 to half
@@ -367,6 +503,26 @@ define void @atomic_store_monotonic_offset_f16(ptr addrspace(3) %ptr, i16 %arg.v
; GFX11-FAKE16-NEXT: ds_store_b16 v0, v2 offset:32
; GFX11-FAKE16-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1250-TRUE16-LABEL: atomic_store_monotonic_offset_f16:
+; GFX1250-TRUE16: ; %bb.0:
+; GFX1250-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-TRUE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250-TRUE16-NEXT: v_add_nc_u16 v1.h, v1.l, 2
+; GFX1250-TRUE16-NEXT: ds_store_b16 v0, v1 offset:32
+; GFX1250-TRUE16-NEXT: ds_store_b16_d16_hi v0, v1 offset:32
+; GFX1250-TRUE16-NEXT: s_wait_dscnt 0x0
+; GFX1250-TRUE16-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX1250-FAKE16-LABEL: atomic_store_monotonic_offset_f16:
+; GFX1250-FAKE16: ; %bb.0:
+; GFX1250-FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-FAKE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250-FAKE16-NEXT: v_add_nc_u16 v2, v1, 2
+; GFX1250-FAKE16-NEXT: ds_store_b16 v0, v1 offset:32
+; GFX1250-FAKE16-NEXT: ds_store_b16 v0, v2 offset:32
+; GFX1250-FAKE16-NEXT: s_wait_dscnt 0x0
+; GFX1250-FAKE16-NEXT: s_set_pc_i64 s[30:31]
%arg.val1 = add i16 %arg.val, 2
%val1 = bitcast i16 %arg.val1 to half
%val = bitcast i16 %arg.val to half
@@ -413,6 +569,26 @@ define void @atomic_store_monotonic_bf16(ptr addrspace(3) %ptr, i16 %arg.val) {
; GFX11-FAKE16-NEXT: ds_store_b16 v0, v2
; GFX11-FAKE16-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1250-TRUE16-LABEL: atomic_store_monotonic_bf16:
+; GFX1250-TRUE16: ; %bb.0:
+; GFX1250-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-TRUE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250-TRUE16-NEXT: v_add_nc_u16 v1.h, v1.l, 2
+; GFX1250-TRUE16-NEXT: ds_store_b16 v0, v1
+; GFX1250-TRUE16-NEXT: ds_store_b16_d16_hi v0, v1
+; GFX1250-TRUE16-NEXT: s_wait_dscnt 0x0
+; GFX1250-TRUE16-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX1250-FAKE16-LABEL: atomic_store_monotonic_bf16:
+; GFX1250-FAKE16: ; %bb.0:
+; GFX1250-FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-FAKE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250-FAKE16-NEXT: v_add_nc_u16 v2, v1, 2
+; GFX1250-FAKE16-NEXT: ds_store_b16 v0, v1
+; GFX1250-FAKE16-NEXT: ds_store_b16 v0, v2
+; GFX1250-FAKE16-NEXT: s_wait_dscnt 0x0
+; GFX1250-FAKE16-NEXT: s_set_pc_i64 s[30:31]
%arg.val1 = add i16 %arg.val, 2
%val1 = bitcast i16 %arg.val1 to bfloat
%val = bitcast i16 %arg.val to bfloat
@@ -458,6 +634,26 @@ define void @atomic_store_monotonic_offset_bf16(ptr addrspace(3) %ptr, i16 %arg.
; GFX11-FAKE16-NEXT: ds_store_b16 v0, v2 offset:32
; GFX11-FAKE16-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1250-TRUE16-LABEL: atomic_store_monotonic_offset_bf16:
+; GFX1250-TRUE16: ; %bb.0:
+; GFX1250-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-TRUE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250-TRUE16-NEXT: v_add_nc_u16 v1.h, v1.l, 2
+; GFX1250-TRUE16-NEXT: ds_store_b16 v0, v1 offset:32
+; GFX1250-TRUE16-NEXT: ds_store_b16_d16_hi v0, v1 offset:32
+; GFX1250-TRUE16-NEXT: s_wait_dscnt 0x0
+; GFX1250-TRUE16-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX1250-FAKE16-LABEL: atomic_store_monotonic_offset_bf16:
+; GFX1250-FAKE16: ; %bb.0:
+; GFX1250-FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-FAKE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250-FAKE16-NEXT: v_add_nc_u16 v2, v1, 2
+; GFX1250-FAKE16-NEXT: ds_store_b16 v0, v1 offset:32
+; GFX1250-FAKE16-NEXT: ds_store_b16 v0, v2 offset:32
+; GFX1250-FAKE16-NEXT: s_wait_dscnt 0x0
+; GFX1250-FAKE16-NEXT: s_set_pc_i64 s[30:31]
%arg.val1 = add i16 %arg.val, 2
%val1 = bitcast i16 %arg.val1 to bfloat
%val = bitcast i16 %arg.val to bfloat
diff --git a/llvm/test/CodeGen/AMDGPU/attributor-flatscratchinit-undefined-behavior.ll b/llvm/test/CodeGen/AMDGPU/attributor-flatscratchinit-undefined-behavior.ll
index f63dd6e..c90611f 100644
--- a/llvm/test/CodeGen/AMDGPU/attributor-flatscratchinit-undefined-behavior.ll
+++ b/llvm/test/CodeGen/AMDGPU/attributor-flatscratchinit-undefined-behavior.ll
@@ -147,10 +147,10 @@ define amdgpu_kernel void @call_calls_intrin_ascast_cc_kernel(ptr addrspace(3) %
attributes #0 = { "amdgpu-no-flat-scratch-init" }
;.
-; GFX9: attributes #[[ATTR0]] = { "amdgpu-agpr-alloc"="0" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx900" "uniform-work-group-size"="false" }
+; GFX9: attributes #[[ATTR0]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx900" "uniform-work-group-size"="false" }
; GFX9: attributes #[[ATTR1:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) "target-cpu"="gfx900" }
;.
-; GFX10: attributes #[[ATTR0]] = { "amdgpu-agpr-alloc"="0" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx1010" "uniform-work-group-size"="false" }
+; GFX10: attributes #[[ATTR0]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx1010" "uniform-work-group-size"="false" }
; GFX10: attributes #[[ATTR1:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) "target-cpu"="gfx1010" }
;.
; GFX9: [[META0]] = !{i32 1, i32 5, i32 6, i32 10}
diff --git a/llvm/test/CodeGen/AMDGPU/attributor-flatscratchinit.ll b/llvm/test/CodeGen/AMDGPU/attributor-flatscratchinit.ll
index 60cd252..c005695a 100644
--- a/llvm/test/CodeGen/AMDGPU/attributor-flatscratchinit.ll
+++ b/llvm/test/CodeGen/AMDGPU/attributor-flatscratchinit.ll
@@ -723,7 +723,7 @@ define void @also_empty() {
define amdgpu_kernel void @indirect_call_known_callees(i1 %cond) {
; GFX9-LABEL: define amdgpu_kernel void @indirect_call_known_callees(
-; GFX9-SAME: i1 [[COND:%.*]]) #[[ATTR3:[0-9]+]] {
+; GFX9-SAME: i1 [[COND:%.*]]) #[[ATTR0]] {
; GFX9-NEXT: [[FPTR:%.*]] = select i1 [[COND]], ptr @empty, ptr @also_empty
; GFX9-NEXT: [[TMP1:%.*]] = icmp eq ptr [[FPTR]], @also_empty
; GFX9-NEXT: br i1 [[TMP1]], label %[[BB2:.*]], label %[[BB3:.*]]
@@ -741,7 +741,7 @@ define amdgpu_kernel void @indirect_call_known_callees(i1 %cond) {
; GFX9-NEXT: ret void
;
; GFX10-LABEL: define amdgpu_kernel void @indirect_call_known_callees(
-; GFX10-SAME: i1 [[COND:%.*]]) #[[ATTR3:[0-9]+]] {
+; GFX10-SAME: i1 [[COND:%.*]]) #[[ATTR0]] {
; GFX10-NEXT: [[FPTR:%.*]] = select i1 [[COND]], ptr @empty, ptr @also_empty
; GFX10-NEXT: [[TMP1:%.*]] = icmp eq ptr [[FPTR]], @also_empty
; GFX10-NEXT: br i1 [[TMP1]], label %[[BB2:.*]], label %[[BB3:.*]]
@@ -767,13 +767,13 @@ declare i32 @llvm.amdgcn.workgroup.id.x()
define void @use_intrinsic_workitem_id_x() {
; GFX9-LABEL: define void @use_intrinsic_workitem_id_x(
-; GFX9-SAME: ) #[[ATTR5:[0-9]+]] {
+; GFX9-SAME: ) #[[ATTR4:[0-9]+]] {
; GFX9-NEXT: [[VAL:%.*]] = call i32 @llvm.amdgcn.workitem.id.x()
; GFX9-NEXT: store volatile i32 [[VAL]], ptr addrspace(1) null, align 4
; GFX9-NEXT: ret void
;
; GFX10-LABEL: define void @use_intrinsic_workitem_id_x(
-; GFX10-SAME: ) #[[ATTR5:[0-9]+]] {
+; GFX10-SAME: ) #[[ATTR4:[0-9]+]] {
; GFX10-NEXT: [[VAL:%.*]] = call i32 @llvm.amdgcn.workitem.id.x()
; GFX10-NEXT: store volatile i32 [[VAL]], ptr addrspace(1) null, align 4
; GFX10-NEXT: ret void
@@ -803,12 +803,12 @@ define amdgpu_kernel void @use_intrinsic_workitem_id_x_cc_kernel() {
define void @call_use_intrinsic_workitem_id_x() {
; GFX9-LABEL: define void @call_use_intrinsic_workitem_id_x(
-; GFX9-SAME: ) #[[ATTR5]] {
+; GFX9-SAME: ) #[[ATTR4]] {
; GFX9-NEXT: call void @use_intrinsic_workitem_id_x()
; GFX9-NEXT: ret void
;
; GFX10-LABEL: define void @call_use_intrinsic_workitem_id_x(
-; GFX10-SAME: ) #[[ATTR5]] {
+; GFX10-SAME: ) #[[ATTR4]] {
; GFX10-NEXT: call void @use_intrinsic_workitem_id_x()
; GFX10-NEXT: ret void
;
@@ -818,12 +818,12 @@ define void @call_use_intrinsic_workitem_id_x() {
define amdgpu_kernel void @call_use_intrinsic_workitem_id_x_cc_kernel() {
; GFX9-LABEL: define amdgpu_kernel void @call_use_intrinsic_workitem_id_x_cc_kernel(
-; GFX9-SAME: ) #[[ATTR5]] {
+; GFX9-SAME: ) #[[ATTR4]] {
; GFX9-NEXT: call void @use_intrinsic_workitem_id_x()
; GFX9-NEXT: ret void
;
; GFX10-LABEL: define amdgpu_kernel void @call_use_intrinsic_workitem_id_x_cc_kernel(
-; GFX10-SAME: ) #[[ATTR5]] {
+; GFX10-SAME: ) #[[ATTR4]] {
; GFX10-NEXT: call void @use_intrinsic_workitem_id_x()
; GFX10-NEXT: ret void
;
@@ -851,12 +851,12 @@ define amdgpu_kernel void @calls_intrin_ascast_cc_kernel(ptr addrspace(3) %ptr)
define amdgpu_kernel void @with_inline_asm() {
; GFX9-LABEL: define amdgpu_kernel void @with_inline_asm(
-; GFX9-SAME: ) #[[ATTR3]] {
+; GFX9-SAME: ) #[[ATTR0]] {
; GFX9-NEXT: call void asm sideeffect "
; GFX9-NEXT: ret void
;
; GFX10-LABEL: define amdgpu_kernel void @with_inline_asm(
-; GFX10-SAME: ) #[[ATTR3]] {
+; GFX10-SAME: ) #[[ATTR0]] {
; GFX10-NEXT: call void asm sideeffect "
; GFX10-NEXT: ret void
;
@@ -865,19 +865,17 @@ define amdgpu_kernel void @with_inline_asm() {
}
;.
-; GFX9: attributes #[[ATTR0]] = { "amdgpu-agpr-alloc"="0" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx900" "uniform-work-group-size"="false" }
-; GFX9: attributes #[[ATTR1]] = { "amdgpu-agpr-alloc"="0" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx900" "uniform-work-group-size"="false" }
+; GFX9: attributes #[[ATTR0]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx900" "uniform-work-group-size"="false" }
+; GFX9: attributes #[[ATTR1]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx900" "uniform-work-group-size"="false" }
; GFX9: attributes #[[ATTR2]] = { "target-cpu"="gfx900" "uniform-work-group-size"="false" }
-; GFX9: attributes #[[ATTR3]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx900" "uniform-work-group-size"="false" }
-; GFX9: attributes #[[ATTR4:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) "target-cpu"="gfx900" }
-; GFX9: attributes #[[ATTR5]] = { "amdgpu-agpr-alloc"="0" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx900" "uniform-work-group-size"="false" }
+; GFX9: attributes #[[ATTR3:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) "target-cpu"="gfx900" }
+; GFX9: attributes #[[ATTR4]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx900" "uniform-work-group-size"="false" }
;.
-; GFX10: attributes #[[ATTR0]] = { "amdgpu-agpr-alloc"="0" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx1010" "uniform-work-group-size"="false" }
-; GFX10: attributes #[[ATTR1]] = { "amdgpu-agpr-alloc"="0" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx1010" "uniform-work-group-size"="false" }
+; GFX10: attributes #[[ATTR0]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx1010" "uniform-work-group-size"="false" }
+; GFX10: attributes #[[ATTR1]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx1010" "uniform-work-group-size"="false" }
; GFX10: attributes #[[ATTR2]] = { "target-cpu"="gfx1010" "uniform-work-group-size"="false" }
-; GFX10: attributes #[[ATTR3]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx1010" "uniform-work-group-size"="false" }
-; GFX10: attributes #[[ATTR4:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) "target-cpu"="gfx1010" }
-; GFX10: attributes #[[ATTR5]] = { "amdgpu-agpr-alloc"="0" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx1010" "uniform-work-group-size"="false" }
+; GFX10: attributes #[[ATTR3:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) "target-cpu"="gfx1010" }
+; GFX10: attributes #[[ATTR4]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx1010" "uniform-work-group-size"="false" }
;.
; GFX9: [[META0]] = !{i32 2, i32 10}
; GFX9: [[META1]] = !{i32 1, i32 2, i32 3, i32 10}
diff --git a/llvm/test/CodeGen/AMDGPU/bf16.ll b/llvm/test/CodeGen/AMDGPU/bf16.ll
index 6b5647e..4b14dc6 100644
--- a/llvm/test/CodeGen/AMDGPU/bf16.ll
+++ b/llvm/test/CodeGen/AMDGPU/bf16.ll
@@ -7,11 +7,9 @@
; RUN: llc < %s -mtriple=amdgcn -mcpu=gfx1010 | FileCheck %s -check-prefixes=GFX10
; RUN: llc < %s -mtriple=amdgcn -mcpu=gfx1100 -mattr=+real-true16 | FileCheck %s -check-prefixes=GFX11,GFX11TRUE16
; RUN: llc < %s -mtriple=amdgcn -mcpu=gfx1100 -mattr=-real-true16 | FileCheck %s -check-prefixes=GFX11,GFX11FAKE16
-; xUN: llc < %s -mtriple=amdgcn -mcpu=gfx1250 -mattr=+real-true16 | FileCheck %s -check-prefixes=GFX1250,GFX1250TRUE16
+; RUN: llc < %s -mtriple=amdgcn -mcpu=gfx1250 -mattr=+real-true16 | FileCheck %s -check-prefixes=GFX1250,GFX1250TRUE16
; RUN: llc < %s -mtriple=amdgcn -mcpu=gfx1250 -mattr=-real-true16 | FileCheck %s -check-prefixes=GFX1250,GFX1250FAKE16
-; FIXME: real-true16 version of gfx1250 test fails
-
define void @test_load_store(ptr addrspace(1) %in, ptr addrspace(1) %out) {
; GCN-LABEL: test_load_store:
; GCN: ; %bb.0:
@@ -2393,15 +2391,25 @@ define void @test_store_fpimm(ptr addrspace(1) %ptr0, ptr addrspace(1) %ptr1) {
; GFX11FAKE16-NEXT: global_store_b16 v[2:3], v5, off
; GFX11FAKE16-NEXT: s_setpc_b64 s[30:31]
;
-; GFX1250-LABEL: test_store_fpimm:
-; GFX1250: ; %bb.0:
-; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-NEXT: s_wait_kmcnt 0x0
-; GFX1250-NEXT: v_mov_b32_e32 v4, 0x3f80
-; GFX1250-NEXT: v_mov_b32_e32 v5, 0x4228
-; GFX1250-NEXT: global_store_b16 v[0:1], v4, off
-; GFX1250-NEXT: global_store_b16 v[2:3], v5, off
-; GFX1250-NEXT: s_set_pc_i64 s[30:31]
+; GFX1250TRUE16-LABEL: test_store_fpimm:
+; GFX1250TRUE16: ; %bb.0:
+; GFX1250TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250TRUE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250TRUE16-NEXT: v_mov_b16_e32 v4.l, 0x3f80
+; GFX1250TRUE16-NEXT: v_mov_b16_e32 v4.h, 0x4228
+; GFX1250TRUE16-NEXT: global_store_b16 v[0:1], v4, off
+; GFX1250TRUE16-NEXT: global_store_d16_hi_b16 v[2:3], v4, off
+; GFX1250TRUE16-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX1250FAKE16-LABEL: test_store_fpimm:
+; GFX1250FAKE16: ; %bb.0:
+; GFX1250FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250FAKE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250FAKE16-NEXT: v_mov_b32_e32 v4, 0x3f80
+; GFX1250FAKE16-NEXT: v_mov_b32_e32 v5, 0x4228
+; GFX1250FAKE16-NEXT: global_store_b16 v[0:1], v4, off
+; GFX1250FAKE16-NEXT: global_store_b16 v[2:3], v5, off
+; GFX1250FAKE16-NEXT: s_set_pc_i64 s[30:31]
store bfloat 1.0, ptr addrspace(1) %ptr0
store bfloat 42.0, ptr addrspace(1) %ptr1
ret void
@@ -3796,13 +3804,21 @@ define amdgpu_gfx void @test_inreg_arg_store(bfloat inreg %in, ptr addrspace(1)
; GFX11FAKE16-NEXT: global_store_b16 v[0:1], v2, off
; GFX11FAKE16-NEXT: s_setpc_b64 s[30:31]
;
-; GFX1250-LABEL: test_inreg_arg_store:
-; GFX1250: ; %bb.0:
-; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-NEXT: s_wait_kmcnt 0x0
-; GFX1250-NEXT: v_mov_b32_e32 v2, s4
-; GFX1250-NEXT: global_store_b16 v[0:1], v2, off
-; GFX1250-NEXT: s_set_pc_i64 s[30:31]
+; GFX1250TRUE16-LABEL: test_inreg_arg_store:
+; GFX1250TRUE16: ; %bb.0:
+; GFX1250TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250TRUE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250TRUE16-NEXT: v_mov_b16_e32 v2.l, s4
+; GFX1250TRUE16-NEXT: global_store_b16 v[0:1], v2, off
+; GFX1250TRUE16-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX1250FAKE16-LABEL: test_inreg_arg_store:
+; GFX1250FAKE16: ; %bb.0:
+; GFX1250FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250FAKE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250FAKE16-NEXT: v_mov_b32_e32 v2, s4
+; GFX1250FAKE16-NEXT: global_store_b16 v[0:1], v2, off
+; GFX1250FAKE16-NEXT: s_set_pc_i64 s[30:31]
store bfloat %in, ptr addrspace(1) %out
ret void
}
@@ -3866,12 +3882,20 @@ define bfloat @test_byval(ptr addrspace(5) byval(bfloat) %bv, bfloat %val) {
; GFX11FAKE16-NEXT: scratch_store_b16 off, v0, s32
; GFX11FAKE16-NEXT: s_setpc_b64 s[30:31]
;
-; GFX1250-LABEL: test_byval:
-; GFX1250: ; %bb.0:
-; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-NEXT: s_wait_kmcnt 0x0
-; GFX1250-NEXT: scratch_store_b16 off, v0, s32
-; GFX1250-NEXT: s_set_pc_i64 s[30:31]
+; GFX1250TRUE16-LABEL: test_byval:
+; GFX1250TRUE16: ; %bb.0:
+; GFX1250TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250TRUE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250TRUE16-NEXT: v_mov_b16_e32 v1.l, v0.l
+; GFX1250TRUE16-NEXT: scratch_store_b16 off, v1, s32
+; GFX1250TRUE16-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX1250FAKE16-LABEL: test_byval:
+; GFX1250FAKE16: ; %bb.0:
+; GFX1250FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250FAKE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250FAKE16-NEXT: scratch_store_b16 off, v0, s32
+; GFX1250FAKE16-NEXT: s_set_pc_i64 s[30:31]
store bfloat %val, ptr addrspace(5) %bv
%retval = load bfloat, ptr addrspace(5) %bv
ret bfloat %retval
@@ -6708,27 +6732,50 @@ define { <32 x i32>, bfloat } @test_overflow_stack(bfloat %a, <32 x i32> %b) {
; GFX11FAKE16-NEXT: scratch_store_b16 v0, v1, off offset:128
; GFX11FAKE16-NEXT: s_setpc_b64 s[30:31]
;
-; GFX1250-LABEL: test_overflow_stack:
-; GFX1250: ; %bb.0:
-; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-NEXT: s_wait_kmcnt 0x0
-; GFX1250-NEXT: s_clause 0x2
-; GFX1250-NEXT: scratch_load_b32 v33, off, s32 offset:8
-; GFX1250-NEXT: scratch_load_b32 v32, off, s32 offset:4
-; GFX1250-NEXT: scratch_load_b32 v31, off, s32
-; GFX1250-NEXT: s_clause 0x5
-; GFX1250-NEXT: scratch_store_b128 v0, v[22:25], off offset:80
-; GFX1250-NEXT: scratch_store_b128 v0, v[18:21], off offset:64
-; GFX1250-NEXT: scratch_store_b128 v0, v[14:17], off offset:48
-; GFX1250-NEXT: scratch_store_b128 v0, v[10:13], off offset:32
-; GFX1250-NEXT: scratch_store_b128 v0, v[6:9], off offset:16
-; GFX1250-NEXT: scratch_store_b128 v0, v[2:5], off
-; GFX1250-NEXT: s_wait_loadcnt 0x0
-; GFX1250-NEXT: s_clause 0x2
-; GFX1250-NEXT: scratch_store_b128 v0, v[30:33], off offset:112
-; GFX1250-NEXT: scratch_store_b128 v0, v[26:29], off offset:96
-; GFX1250-NEXT: scratch_store_b16 v0, v1, off offset:128
-; GFX1250-NEXT: s_set_pc_i64 s[30:31]
+; GFX1250TRUE16-LABEL: test_overflow_stack:
+; GFX1250TRUE16: ; %bb.0:
+; GFX1250TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250TRUE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250TRUE16-NEXT: s_clause 0x2
+; GFX1250TRUE16-NEXT: scratch_load_b32 v33, off, s32 offset:8
+; GFX1250TRUE16-NEXT: scratch_load_b32 v32, off, s32 offset:4
+; GFX1250TRUE16-NEXT: scratch_load_b32 v31, off, s32
+; GFX1250TRUE16-NEXT: s_clause 0x3
+; GFX1250TRUE16-NEXT: scratch_store_b128 v0, v[22:25], off offset:80
+; GFX1250TRUE16-NEXT: scratch_store_b128 v0, v[18:21], off offset:64
+; GFX1250TRUE16-NEXT: scratch_store_b128 v0, v[14:17], off offset:48
+; GFX1250TRUE16-NEXT: scratch_store_b128 v0, v[10:13], off offset:32
+; GFX1250TRUE16-NEXT: s_clause 0x1
+; GFX1250TRUE16-NEXT: scratch_store_b128 v0, v[6:9], off offset:16
+; GFX1250TRUE16-NEXT: scratch_store_b128 v0, v[2:5], off
+; GFX1250TRUE16-NEXT: s_wait_loadcnt 0x0
+; GFX1250TRUE16-NEXT: s_clause 0x2
+; GFX1250TRUE16-NEXT: scratch_store_b128 v0, v[30:33], off offset:112
+; GFX1250TRUE16-NEXT: scratch_store_b128 v0, v[26:29], off offset:96
+; GFX1250TRUE16-NEXT: scratch_store_b16 v0, v1, off offset:128
+; GFX1250TRUE16-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX1250FAKE16-LABEL: test_overflow_stack:
+; GFX1250FAKE16: ; %bb.0:
+; GFX1250FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250FAKE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250FAKE16-NEXT: s_clause 0x2
+; GFX1250FAKE16-NEXT: scratch_load_b32 v33, off, s32 offset:8
+; GFX1250FAKE16-NEXT: scratch_load_b32 v32, off, s32 offset:4
+; GFX1250FAKE16-NEXT: scratch_load_b32 v31, off, s32
+; GFX1250FAKE16-NEXT: s_clause 0x5
+; GFX1250FAKE16-NEXT: scratch_store_b128 v0, v[22:25], off offset:80
+; GFX1250FAKE16-NEXT: scratch_store_b128 v0, v[18:21], off offset:64
+; GFX1250FAKE16-NEXT: scratch_store_b128 v0, v[14:17], off offset:48
+; GFX1250FAKE16-NEXT: scratch_store_b128 v0, v[10:13], off offset:32
+; GFX1250FAKE16-NEXT: scratch_store_b128 v0, v[6:9], off offset:16
+; GFX1250FAKE16-NEXT: scratch_store_b128 v0, v[2:5], off
+; GFX1250FAKE16-NEXT: s_wait_loadcnt 0x0
+; GFX1250FAKE16-NEXT: s_clause 0x2
+; GFX1250FAKE16-NEXT: scratch_store_b128 v0, v[30:33], off offset:112
+; GFX1250FAKE16-NEXT: scratch_store_b128 v0, v[26:29], off offset:96
+; GFX1250FAKE16-NEXT: scratch_store_b16 v0, v1, off offset:128
+; GFX1250FAKE16-NEXT: s_set_pc_i64 s[30:31]
%ins.0 = insertvalue { <32 x i32>, bfloat } poison, <32 x i32> %b, 0
%ins.1 = insertvalue { <32 x i32>, bfloat } %ins.0 ,bfloat %a, 1
ret { <32 x i32>, bfloat } %ins.1
@@ -10726,15 +10773,29 @@ define bfloat @v_fadd_bf16(bfloat %a, bfloat %b) {
; GFX11FAKE16-NEXT: v_lshrrev_b32_e32 v0, 16, v0
; GFX11FAKE16-NEXT: s_setpc_b64 s[30:31]
;
-; GFX1250-LABEL: v_fadd_bf16:
-; GFX1250: ; %bb.0:
-; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-NEXT: s_wait_kmcnt 0x0
-; GFX1250-NEXT: v_dual_lshlrev_b32 v1, 16, v1 :: v_dual_lshlrev_b32 v0, 16, v0
-; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1250-NEXT: v_add_f32_e32 v0, v0, v1
-; GFX1250-NEXT: v_cvt_pk_bf16_f32 v0, v0, s0
-; GFX1250-NEXT: s_set_pc_i64 s[30:31]
+; GFX1250TRUE16-LABEL: v_fadd_bf16:
+; GFX1250TRUE16: ; %bb.0:
+; GFX1250TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250TRUE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250TRUE16-NEXT: v_mov_b16_e32 v2.l, 0
+; GFX1250TRUE16-NEXT: v_mov_b16_e32 v2.h, v1.l
+; GFX1250TRUE16-NEXT: v_mov_b16_e32 v1.h, v0.l
+; GFX1250TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1250TRUE16-NEXT: v_mov_b16_e32 v1.l, v2.l
+; GFX1250TRUE16-NEXT: v_add_f32_e32 v0, v1, v2
+; GFX1250TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1250TRUE16-NEXT: v_cvt_pk_bf16_f32 v0, v0, s0
+; GFX1250TRUE16-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX1250FAKE16-LABEL: v_fadd_bf16:
+; GFX1250FAKE16: ; %bb.0:
+; GFX1250FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250FAKE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250FAKE16-NEXT: v_dual_lshlrev_b32 v1, 16, v1 :: v_dual_lshlrev_b32 v0, 16, v0
+; GFX1250FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1250FAKE16-NEXT: v_add_f32_e32 v0, v0, v1
+; GFX1250FAKE16-NEXT: v_cvt_pk_bf16_f32 v0, v0, s0
+; GFX1250FAKE16-NEXT: s_set_pc_i64 s[30:31]
%op = fadd bfloat %a, %b
ret bfloat %op
}
@@ -15268,15 +15329,26 @@ define bfloat @v_fadd_bf16_fpimm_0(bfloat %arg0) {
; GFX11FAKE16-NEXT: v_lshrrev_b32_e32 v0, 16, v0
; GFX11FAKE16-NEXT: s_setpc_b64 s[30:31]
;
-; GFX1250-LABEL: v_fadd_bf16_fpimm_0:
-; GFX1250: ; %bb.0:
-; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-NEXT: s_wait_kmcnt 0x0
-; GFX1250-NEXT: v_lshlrev_b32_e32 v0, 16, v0
-; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1250-NEXT: v_add_f32_e32 v0, 1.0, v0
-; GFX1250-NEXT: v_cvt_pk_bf16_f32 v0, v0, s0
-; GFX1250-NEXT: s_set_pc_i64 s[30:31]
+; GFX1250TRUE16-LABEL: v_fadd_bf16_fpimm_0:
+; GFX1250TRUE16: ; %bb.0:
+; GFX1250TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250TRUE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250TRUE16-NEXT: v_mov_b16_e32 v1.l, 0
+; GFX1250TRUE16-NEXT: v_mov_b16_e32 v1.h, v0.l
+; GFX1250TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1250TRUE16-NEXT: v_add_f32_e32 v0, 1.0, v1
+; GFX1250TRUE16-NEXT: v_cvt_pk_bf16_f32 v0, v0, s0
+; GFX1250TRUE16-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX1250FAKE16-LABEL: v_fadd_bf16_fpimm_0:
+; GFX1250FAKE16: ; %bb.0:
+; GFX1250FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250FAKE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250FAKE16-NEXT: v_lshlrev_b32_e32 v0, 16, v0
+; GFX1250FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1250FAKE16-NEXT: v_add_f32_e32 v0, 1.0, v0
+; GFX1250FAKE16-NEXT: v_cvt_pk_bf16_f32 v0, v0, s0
+; GFX1250FAKE16-NEXT: s_set_pc_i64 s[30:31]
%add = fadd bfloat %arg0, 1.0
ret bfloat %add
}
@@ -15382,15 +15454,26 @@ define bfloat @v_fadd_bf16_fpimm_1(bfloat %arg0) {
; GFX11FAKE16-NEXT: v_lshrrev_b32_e32 v0, 16, v0
; GFX11FAKE16-NEXT: s_setpc_b64 s[30:31]
;
-; GFX1250-LABEL: v_fadd_bf16_fpimm_1:
-; GFX1250: ; %bb.0:
-; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-NEXT: s_wait_kmcnt 0x0
-; GFX1250-NEXT: v_lshlrev_b32_e32 v0, 16, v0
-; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1250-NEXT: v_add_f32_e32 v0, 0x42280000, v0
-; GFX1250-NEXT: v_cvt_pk_bf16_f32 v0, v0, s0
-; GFX1250-NEXT: s_set_pc_i64 s[30:31]
+; GFX1250TRUE16-LABEL: v_fadd_bf16_fpimm_1:
+; GFX1250TRUE16: ; %bb.0:
+; GFX1250TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250TRUE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250TRUE16-NEXT: v_mov_b16_e32 v1.l, 0
+; GFX1250TRUE16-NEXT: v_mov_b16_e32 v1.h, v0.l
+; GFX1250TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1250TRUE16-NEXT: v_add_f32_e32 v0, 0x42280000, v1
+; GFX1250TRUE16-NEXT: v_cvt_pk_bf16_f32 v0, v0, s0
+; GFX1250TRUE16-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX1250FAKE16-LABEL: v_fadd_bf16_fpimm_1:
+; GFX1250FAKE16: ; %bb.0:
+; GFX1250FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250FAKE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250FAKE16-NEXT: v_lshlrev_b32_e32 v0, 16, v0
+; GFX1250FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1250FAKE16-NEXT: v_add_f32_e32 v0, 0x42280000, v0
+; GFX1250FAKE16-NEXT: v_cvt_pk_bf16_f32 v0, v0, s0
+; GFX1250FAKE16-NEXT: s_set_pc_i64 s[30:31]
%add = fadd bfloat %arg0, 42.0
ret bfloat %add
}
@@ -15507,15 +15590,29 @@ define bfloat @v_fsub_bf16(bfloat %a, bfloat %b) {
; GFX11FAKE16-NEXT: v_lshrrev_b32_e32 v0, 16, v0
; GFX11FAKE16-NEXT: s_setpc_b64 s[30:31]
;
-; GFX1250-LABEL: v_fsub_bf16:
-; GFX1250: ; %bb.0:
-; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-NEXT: s_wait_kmcnt 0x0
-; GFX1250-NEXT: v_dual_lshlrev_b32 v1, 16, v1 :: v_dual_lshlrev_b32 v0, 16, v0
-; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1250-NEXT: v_sub_f32_e32 v0, v0, v1
-; GFX1250-NEXT: v_cvt_pk_bf16_f32 v0, v0, s0
-; GFX1250-NEXT: s_set_pc_i64 s[30:31]
+; GFX1250TRUE16-LABEL: v_fsub_bf16:
+; GFX1250TRUE16: ; %bb.0:
+; GFX1250TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250TRUE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250TRUE16-NEXT: v_mov_b16_e32 v2.l, 0
+; GFX1250TRUE16-NEXT: v_mov_b16_e32 v2.h, v1.l
+; GFX1250TRUE16-NEXT: v_mov_b16_e32 v1.h, v0.l
+; GFX1250TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1250TRUE16-NEXT: v_mov_b16_e32 v1.l, v2.l
+; GFX1250TRUE16-NEXT: v_sub_f32_e32 v0, v1, v2
+; GFX1250TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1250TRUE16-NEXT: v_cvt_pk_bf16_f32 v0, v0, s0
+; GFX1250TRUE16-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX1250FAKE16-LABEL: v_fsub_bf16:
+; GFX1250FAKE16: ; %bb.0:
+; GFX1250FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250FAKE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250FAKE16-NEXT: v_dual_lshlrev_b32 v1, 16, v1 :: v_dual_lshlrev_b32 v0, 16, v0
+; GFX1250FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1250FAKE16-NEXT: v_sub_f32_e32 v0, v0, v1
+; GFX1250FAKE16-NEXT: v_cvt_pk_bf16_f32 v0, v0, s0
+; GFX1250FAKE16-NEXT: s_set_pc_i64 s[30:31]
%op = fsub bfloat %a, %b
ret bfloat %op
}
@@ -15931,21 +16028,37 @@ define <3 x bfloat> @v_fsub_v3bf16(<3 x bfloat> %a, <3 x bfloat> %b) {
; GFX11FAKE16-NEXT: v_alignbit_b32 v1, s0, v1, 16
; GFX11FAKE16-NEXT: s_setpc_b64 s[30:31]
;
-; GFX1250-LABEL: v_fsub_v3bf16:
-; GFX1250: ; %bb.0:
-; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-NEXT: s_wait_kmcnt 0x0
-; GFX1250-NEXT: v_lshlrev_b32_e32 v3, 16, v3
-; GFX1250-NEXT: v_and_b32_e32 v4, 0xffff0000, v2
-; GFX1250-NEXT: v_and_b32_e32 v5, 0xffff0000, v0
-; GFX1250-NEXT: v_dual_lshlrev_b32 v2, 16, v2 :: v_dual_lshlrev_b32 v0, 16, v0
-; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1250-NEXT: v_dual_sub_f32 v4, v5, v4 :: v_dual_lshlrev_b32 v1, 16, v1
-; GFX1250-NEXT: v_dual_sub_f32 v0, v0, v2 :: v_dual_sub_f32 v1, v1, v3
-; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX1250-NEXT: v_cvt_pk_bf16_f32 v0, v0, v4
-; GFX1250-NEXT: v_cvt_pk_bf16_f32 v1, v1, s0
-; GFX1250-NEXT: s_set_pc_i64 s[30:31]
+; GFX1250TRUE16-LABEL: v_fsub_v3bf16:
+; GFX1250TRUE16: ; %bb.0:
+; GFX1250TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250TRUE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250TRUE16-NEXT: v_dual_lshlrev_b32 v3, 16, v3 :: v_dual_lshlrev_b32 v1, 16, v1
+; GFX1250TRUE16-NEXT: v_and_b32_e32 v4, 0xffff0000, v2
+; GFX1250TRUE16-NEXT: v_and_b32_e32 v5, 0xffff0000, v0
+; GFX1250TRUE16-NEXT: v_dual_lshlrev_b32 v2, 16, v2 :: v_dual_lshlrev_b32 v0, 16, v0
+; GFX1250TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX1250TRUE16-NEXT: v_sub_f32_e32 v1, v1, v3
+; GFX1250TRUE16-NEXT: v_dual_sub_f32 v3, v5, v4 :: v_dual_sub_f32 v0, v0, v2
+; GFX1250TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX1250TRUE16-NEXT: v_cvt_pk_bf16_f32 v1, v1, s0
+; GFX1250TRUE16-NEXT: v_cvt_pk_bf16_f32 v0, v0, v3
+; GFX1250TRUE16-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX1250FAKE16-LABEL: v_fsub_v3bf16:
+; GFX1250FAKE16: ; %bb.0:
+; GFX1250FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250FAKE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250FAKE16-NEXT: v_lshlrev_b32_e32 v3, 16, v3
+; GFX1250FAKE16-NEXT: v_and_b32_e32 v4, 0xffff0000, v2
+; GFX1250FAKE16-NEXT: v_and_b32_e32 v5, 0xffff0000, v0
+; GFX1250FAKE16-NEXT: v_dual_lshlrev_b32 v2, 16, v2 :: v_dual_lshlrev_b32 v0, 16, v0
+; GFX1250FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1250FAKE16-NEXT: v_dual_sub_f32 v4, v5, v4 :: v_dual_lshlrev_b32 v1, 16, v1
+; GFX1250FAKE16-NEXT: v_dual_sub_f32 v0, v0, v2 :: v_dual_sub_f32 v1, v1, v3
+; GFX1250FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX1250FAKE16-NEXT: v_cvt_pk_bf16_f32 v0, v0, v4
+; GFX1250FAKE16-NEXT: v_cvt_pk_bf16_f32 v1, v1, s0
+; GFX1250FAKE16-NEXT: s_set_pc_i64 s[30:31]
%op = fsub <3 x bfloat> %a, %b
ret <3 x bfloat> %op
}
@@ -16371,12 +16484,26 @@ define bfloat @v_fmul_bf16(bfloat %a, bfloat %b) {
; GFX11FAKE16-NEXT: v_lshrrev_b32_e32 v0, 16, v0
; GFX11FAKE16-NEXT: s_setpc_b64 s[30:31]
;
-; GFX1250-LABEL: v_fmul_bf16:
-; GFX1250: ; %bb.0:
-; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-NEXT: s_wait_kmcnt 0x0
-; GFX1250-NEXT: v_fma_mixlo_bf16 v0, v0, v1, 0 op_sel_hi:[1,1,0]
-; GFX1250-NEXT: s_set_pc_i64 s[30:31]
+; GFX1250TRUE16-LABEL: v_fmul_bf16:
+; GFX1250TRUE16: ; %bb.0:
+; GFX1250TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250TRUE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250TRUE16-NEXT: v_mov_b16_e32 v2.l, 0
+; GFX1250TRUE16-NEXT: v_mov_b16_e32 v2.h, v1.l
+; GFX1250TRUE16-NEXT: v_mov_b16_e32 v1.h, v0.l
+; GFX1250TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1250TRUE16-NEXT: v_mov_b16_e32 v1.l, v2.l
+; GFX1250TRUE16-NEXT: v_mul_f32_e32 v0, v1, v2
+; GFX1250TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1250TRUE16-NEXT: v_cvt_pk_bf16_f32 v0, v0, s0
+; GFX1250TRUE16-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX1250FAKE16-LABEL: v_fmul_bf16:
+; GFX1250FAKE16: ; %bb.0:
+; GFX1250FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250FAKE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250FAKE16-NEXT: v_fma_mixlo_bf16 v0, v0, v1, 0 op_sel_hi:[1,1,0]
+; GFX1250FAKE16-NEXT: s_set_pc_i64 s[30:31]
%op = fmul bfloat %a, %b
ret bfloat %op
}
@@ -21012,31 +21139,60 @@ define bfloat @v_fdiv_bf16(bfloat %a, bfloat %b) {
; GFX11FAKE16-NEXT: v_lshrrev_b32_e32 v0, 16, v0
; GFX11FAKE16-NEXT: s_setpc_b64 s[30:31]
;
-; GFX1250-LABEL: v_fdiv_bf16:
-; GFX1250: ; %bb.0:
-; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-NEXT: s_wait_kmcnt 0x0
-; GFX1250-NEXT: v_dual_lshlrev_b32 v0, 16, v0 :: v_dual_lshlrev_b32 v1, 16, v1
-; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1250-NEXT: v_div_scale_f32 v2, null, v1, v1, v0
-; GFX1250-NEXT: v_rcp_f32_e32 v3, v2
-; GFX1250-NEXT: v_nop
-; GFX1250-NEXT: s_delay_alu instid0(TRANS32_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1250-NEXT: v_fma_f32 v4, -v2, v3, 1.0
-; GFX1250-NEXT: v_fmac_f32_e32 v3, v4, v3
-; GFX1250-NEXT: v_div_scale_f32 v4, vcc_lo, v0, v1, v0
-; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1250-NEXT: v_mul_f32_e32 v5, v4, v3
-; GFX1250-NEXT: v_fma_f32 v6, -v2, v5, v4
-; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1250-NEXT: v_fmac_f32_e32 v5, v6, v3
-; GFX1250-NEXT: v_fma_f32 v2, -v2, v5, v4
-; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1250-NEXT: v_div_fmas_f32 v2, v2, v3, v5
-; GFX1250-NEXT: v_div_fixup_f32 v0, v2, v1, v0
-; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX1250-NEXT: v_cvt_pk_bf16_f32 v0, v0, s0
-; GFX1250-NEXT: s_set_pc_i64 s[30:31]
+; GFX1250TRUE16-LABEL: v_fdiv_bf16:
+; GFX1250TRUE16: ; %bb.0:
+; GFX1250TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250TRUE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250TRUE16-NEXT: v_mov_b16_e32 v2.l, 0
+; GFX1250TRUE16-NEXT: v_mov_b16_e32 v2.h, v0.l
+; GFX1250TRUE16-NEXT: v_mov_b16_e32 v0.h, v1.l
+; GFX1250TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1250TRUE16-NEXT: v_mov_b16_e32 v0.l, v2.l
+; GFX1250TRUE16-NEXT: v_div_scale_f32 v1, null, v0, v0, v2
+; GFX1250TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(TRANS32_DEP_1)
+; GFX1250TRUE16-NEXT: v_rcp_f32_e32 v3, v1
+; GFX1250TRUE16-NEXT: v_nop
+; GFX1250TRUE16-NEXT: v_fma_f32 v4, -v1, v3, 1.0
+; GFX1250TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
+; GFX1250TRUE16-NEXT: v_fmac_f32_e32 v3, v4, v3
+; GFX1250TRUE16-NEXT: v_div_scale_f32 v4, vcc_lo, v2, v0, v2
+; GFX1250TRUE16-NEXT: v_mul_f32_e32 v5, v4, v3
+; GFX1250TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1250TRUE16-NEXT: v_fma_f32 v6, -v1, v5, v4
+; GFX1250TRUE16-NEXT: v_fmac_f32_e32 v5, v6, v3
+; GFX1250TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1250TRUE16-NEXT: v_fma_f32 v1, -v1, v5, v4
+; GFX1250TRUE16-NEXT: v_div_fmas_f32 v1, v1, v3, v5
+; GFX1250TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1250TRUE16-NEXT: v_div_fixup_f32 v0, v1, v0, v2
+; GFX1250TRUE16-NEXT: v_cvt_pk_bf16_f32 v0, v0, s0
+; GFX1250TRUE16-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX1250FAKE16-LABEL: v_fdiv_bf16:
+; GFX1250FAKE16: ; %bb.0:
+; GFX1250FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250FAKE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250FAKE16-NEXT: v_dual_lshlrev_b32 v0, 16, v0 :: v_dual_lshlrev_b32 v1, 16, v1
+; GFX1250FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1250FAKE16-NEXT: v_div_scale_f32 v2, null, v1, v1, v0
+; GFX1250FAKE16-NEXT: v_rcp_f32_e32 v3, v2
+; GFX1250FAKE16-NEXT: v_nop
+; GFX1250FAKE16-NEXT: s_delay_alu instid0(TRANS32_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1250FAKE16-NEXT: v_fma_f32 v4, -v2, v3, 1.0
+; GFX1250FAKE16-NEXT: v_fmac_f32_e32 v3, v4, v3
+; GFX1250FAKE16-NEXT: v_div_scale_f32 v4, vcc_lo, v0, v1, v0
+; GFX1250FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1250FAKE16-NEXT: v_mul_f32_e32 v5, v4, v3
+; GFX1250FAKE16-NEXT: v_fma_f32 v6, -v2, v5, v4
+; GFX1250FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1250FAKE16-NEXT: v_fmac_f32_e32 v5, v6, v3
+; GFX1250FAKE16-NEXT: v_fma_f32 v2, -v2, v5, v4
+; GFX1250FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1250FAKE16-NEXT: v_div_fmas_f32 v2, v2, v3, v5
+; GFX1250FAKE16-NEXT: v_div_fixup_f32 v0, v2, v1, v0
+; GFX1250FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1250FAKE16-NEXT: v_cvt_pk_bf16_f32 v0, v0, s0
+; GFX1250FAKE16-NEXT: s_set_pc_i64 s[30:31]
%op = fdiv bfloat %a, %b
ret bfloat %op
}
@@ -21092,12 +21248,19 @@ define bfloat @v_fabs_bf16(bfloat %a) {
; GFX11FAKE16-NEXT: v_and_b32_e32 v0, 0x7fff, v0
; GFX11FAKE16-NEXT: s_setpc_b64 s[30:31]
;
-; GFX1250-LABEL: v_fabs_bf16:
-; GFX1250: ; %bb.0:
-; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-NEXT: s_wait_kmcnt 0x0
-; GFX1250-NEXT: v_and_b32_e32 v0, 0x7fff, v0
-; GFX1250-NEXT: s_set_pc_i64 s[30:31]
+; GFX1250TRUE16-LABEL: v_fabs_bf16:
+; GFX1250TRUE16: ; %bb.0:
+; GFX1250TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250TRUE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250TRUE16-NEXT: v_and_b16 v0.l, 0x7fff, v0.l
+; GFX1250TRUE16-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX1250FAKE16-LABEL: v_fabs_bf16:
+; GFX1250FAKE16: ; %bb.0:
+; GFX1250FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250FAKE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250FAKE16-NEXT: v_and_b32_e32 v0, 0x7fff, v0
+; GFX1250FAKE16-NEXT: s_set_pc_i64 s[30:31]
%op = call bfloat @llvm.fabs.bf16(bfloat %a)
ret bfloat %op
}
@@ -21198,12 +21361,19 @@ define bfloat @v_fneg_bf16(bfloat %a) {
; GFX11FAKE16-NEXT: v_xor_b32_e32 v0, 0x8000, v0
; GFX11FAKE16-NEXT: s_setpc_b64 s[30:31]
;
-; GFX1250-LABEL: v_fneg_bf16:
-; GFX1250: ; %bb.0:
-; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-NEXT: s_wait_kmcnt 0x0
-; GFX1250-NEXT: v_xor_b32_e32 v0, 0x8000, v0
-; GFX1250-NEXT: s_set_pc_i64 s[30:31]
+; GFX1250TRUE16-LABEL: v_fneg_bf16:
+; GFX1250TRUE16: ; %bb.0:
+; GFX1250TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250TRUE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250TRUE16-NEXT: v_xor_b16 v0.l, 0x8000, v0.l
+; GFX1250TRUE16-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX1250FAKE16-LABEL: v_fneg_bf16:
+; GFX1250FAKE16: ; %bb.0:
+; GFX1250FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250FAKE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250FAKE16-NEXT: v_xor_b32_e32 v0, 0x8000, v0
+; GFX1250FAKE16-NEXT: s_set_pc_i64 s[30:31]
%op = fneg bfloat %a
ret bfloat %op
}
@@ -21317,12 +21487,19 @@ define bfloat @v_fneg_fabs_bf16(bfloat %a) {
; GFX11FAKE16-NEXT: v_or_b32_e32 v0, 0x8000, v0
; GFX11FAKE16-NEXT: s_setpc_b64 s[30:31]
;
-; GFX1250-LABEL: v_fneg_fabs_bf16:
-; GFX1250: ; %bb.0:
-; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-NEXT: s_wait_kmcnt 0x0
-; GFX1250-NEXT: v_or_b32_e32 v0, 0x8000, v0
-; GFX1250-NEXT: s_set_pc_i64 s[30:31]
+; GFX1250TRUE16-LABEL: v_fneg_fabs_bf16:
+; GFX1250TRUE16: ; %bb.0:
+; GFX1250TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250TRUE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250TRUE16-NEXT: v_or_b16 v0.l, 0x8000, v0.l
+; GFX1250TRUE16-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX1250FAKE16-LABEL: v_fneg_fabs_bf16:
+; GFX1250FAKE16: ; %bb.0:
+; GFX1250FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250FAKE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250FAKE16-NEXT: v_or_b32_e32 v0, 0x8000, v0
+; GFX1250FAKE16-NEXT: s_set_pc_i64 s[30:31]
%fabs = call bfloat @llvm.fabs.bf16(bfloat %a)
%op = fneg bfloat %fabs
ret bfloat %op
@@ -21511,15 +21688,29 @@ define bfloat @v_minnum_bf16(bfloat %a, bfloat %b) {
; GFX11FAKE16-NEXT: v_lshrrev_b32_e32 v0, 16, v0
; GFX11FAKE16-NEXT: s_setpc_b64 s[30:31]
;
-; GFX1250-LABEL: v_minnum_bf16:
-; GFX1250: ; %bb.0:
-; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-NEXT: s_wait_kmcnt 0x0
-; GFX1250-NEXT: v_dual_lshlrev_b32 v1, 16, v1 :: v_dual_lshlrev_b32 v0, 16, v0
-; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1250-NEXT: v_min_num_f32_e32 v0, v0, v1
-; GFX1250-NEXT: v_cvt_pk_bf16_f32 v0, v0, s0
-; GFX1250-NEXT: s_set_pc_i64 s[30:31]
+; GFX1250TRUE16-LABEL: v_minnum_bf16:
+; GFX1250TRUE16: ; %bb.0:
+; GFX1250TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250TRUE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250TRUE16-NEXT: v_mov_b16_e32 v2.l, 0
+; GFX1250TRUE16-NEXT: v_mov_b16_e32 v2.h, v1.l
+; GFX1250TRUE16-NEXT: v_mov_b16_e32 v1.h, v0.l
+; GFX1250TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1250TRUE16-NEXT: v_mov_b16_e32 v1.l, v2.l
+; GFX1250TRUE16-NEXT: v_min_num_f32_e32 v0, v1, v2
+; GFX1250TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1250TRUE16-NEXT: v_cvt_pk_bf16_f32 v0, v0, s0
+; GFX1250TRUE16-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX1250FAKE16-LABEL: v_minnum_bf16:
+; GFX1250FAKE16: ; %bb.0:
+; GFX1250FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250FAKE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250FAKE16-NEXT: v_dual_lshlrev_b32 v1, 16, v1 :: v_dual_lshlrev_b32 v0, 16, v0
+; GFX1250FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1250FAKE16-NEXT: v_min_num_f32_e32 v0, v0, v1
+; GFX1250FAKE16-NEXT: v_cvt_pk_bf16_f32 v0, v0, s0
+; GFX1250FAKE16-NEXT: s_set_pc_i64 s[30:31]
%op = call bfloat @llvm.minnum.bf16(bfloat %a, bfloat %b)
ret bfloat %op
}
@@ -26073,15 +26264,29 @@ define bfloat @v_maxnum_bf16(bfloat %a, bfloat %b) {
; GFX11FAKE16-NEXT: v_lshrrev_b32_e32 v0, 16, v0
; GFX11FAKE16-NEXT: s_setpc_b64 s[30:31]
;
-; GFX1250-LABEL: v_maxnum_bf16:
-; GFX1250: ; %bb.0:
-; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-NEXT: s_wait_kmcnt 0x0
-; GFX1250-NEXT: v_dual_lshlrev_b32 v1, 16, v1 :: v_dual_lshlrev_b32 v0, 16, v0
-; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1250-NEXT: v_max_num_f32_e32 v0, v0, v1
-; GFX1250-NEXT: v_cvt_pk_bf16_f32 v0, v0, s0
-; GFX1250-NEXT: s_set_pc_i64 s[30:31]
+; GFX1250TRUE16-LABEL: v_maxnum_bf16:
+; GFX1250TRUE16: ; %bb.0:
+; GFX1250TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250TRUE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250TRUE16-NEXT: v_mov_b16_e32 v2.l, 0
+; GFX1250TRUE16-NEXT: v_mov_b16_e32 v2.h, v1.l
+; GFX1250TRUE16-NEXT: v_mov_b16_e32 v1.h, v0.l
+; GFX1250TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1250TRUE16-NEXT: v_mov_b16_e32 v1.l, v2.l
+; GFX1250TRUE16-NEXT: v_max_num_f32_e32 v0, v1, v2
+; GFX1250TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1250TRUE16-NEXT: v_cvt_pk_bf16_f32 v0, v0, s0
+; GFX1250TRUE16-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX1250FAKE16-LABEL: v_maxnum_bf16:
+; GFX1250FAKE16: ; %bb.0:
+; GFX1250FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250FAKE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250FAKE16-NEXT: v_dual_lshlrev_b32 v1, 16, v1 :: v_dual_lshlrev_b32 v0, 16, v0
+; GFX1250FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1250FAKE16-NEXT: v_max_num_f32_e32 v0, v0, v1
+; GFX1250FAKE16-NEXT: v_cvt_pk_bf16_f32 v0, v0, s0
+; GFX1250FAKE16-NEXT: s_set_pc_i64 s[30:31]
%op = call bfloat @llvm.maxnum.bf16(bfloat %a, bfloat %b)
ret bfloat %op
}
@@ -30764,12 +30969,19 @@ define bfloat @v_sqrt_bf16(bfloat %a) {
; GFX11FAKE16-NEXT: v_lshrrev_b32_e32 v0, 16, v0
; GFX11FAKE16-NEXT: s_setpc_b64 s[30:31]
;
-; GFX1250-LABEL: v_sqrt_bf16:
-; GFX1250: ; %bb.0:
-; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-NEXT: s_wait_kmcnt 0x0
-; GFX1250-NEXT: v_sqrt_bf16_e32 v0, v0
-; GFX1250-NEXT: s_set_pc_i64 s[30:31]
+; GFX1250TRUE16-LABEL: v_sqrt_bf16:
+; GFX1250TRUE16: ; %bb.0:
+; GFX1250TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250TRUE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250TRUE16-NEXT: v_sqrt_bf16_e32 v0.l, v0.l
+; GFX1250TRUE16-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX1250FAKE16-LABEL: v_sqrt_bf16:
+; GFX1250FAKE16: ; %bb.0:
+; GFX1250FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250FAKE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250FAKE16-NEXT: v_sqrt_bf16_e32 v0, v0
+; GFX1250FAKE16-NEXT: s_set_pc_i64 s[30:31]
%op = call bfloat @llvm.sqrt.bf16(bfloat %a)
ret bfloat %op
}
@@ -30877,15 +31089,26 @@ define bfloat @v_ldexp_bf16_i32(bfloat %a, i32 %b) {
; GFX11FAKE16-NEXT: v_lshrrev_b32_e32 v0, 16, v0
; GFX11FAKE16-NEXT: s_setpc_b64 s[30:31]
;
-; GFX1250-LABEL: v_ldexp_bf16_i32:
-; GFX1250: ; %bb.0:
-; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-NEXT: s_wait_kmcnt 0x0
-; GFX1250-NEXT: v_lshlrev_b32_e32 v0, 16, v0
-; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1250-NEXT: v_ldexp_f32 v0, v0, v1
-; GFX1250-NEXT: v_cvt_pk_bf16_f32 v0, v0, s0
-; GFX1250-NEXT: s_set_pc_i64 s[30:31]
+; GFX1250TRUE16-LABEL: v_ldexp_bf16_i32:
+; GFX1250TRUE16: ; %bb.0:
+; GFX1250TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250TRUE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250TRUE16-NEXT: v_mov_b16_e32 v2.l, 0
+; GFX1250TRUE16-NEXT: v_mov_b16_e32 v2.h, v0.l
+; GFX1250TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1250TRUE16-NEXT: v_ldexp_f32 v0, v2, v1
+; GFX1250TRUE16-NEXT: v_cvt_pk_bf16_f32 v0, v0, s0
+; GFX1250TRUE16-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX1250FAKE16-LABEL: v_ldexp_bf16_i32:
+; GFX1250FAKE16: ; %bb.0:
+; GFX1250FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250FAKE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250FAKE16-NEXT: v_lshlrev_b32_e32 v0, 16, v0
+; GFX1250FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1250FAKE16-NEXT: v_ldexp_f32 v0, v0, v1
+; GFX1250FAKE16-NEXT: v_cvt_pk_bf16_f32 v0, v0, s0
+; GFX1250FAKE16-NEXT: s_set_pc_i64 s[30:31]
%op = call bfloat @llvm.ldexp.bf16.i32(bfloat %a, i32 %b)
ret bfloat %op
}
@@ -31005,16 +31228,28 @@ define { bfloat, i16 } @v_frexp_bf16_i16(bfloat %a) {
; GFX11FAKE16-NEXT: v_lshrrev_b32_e32 v0, 16, v0
; GFX11FAKE16-NEXT: s_setpc_b64 s[30:31]
;
-; GFX1250-LABEL: v_frexp_bf16_i16:
-; GFX1250: ; %bb.0:
-; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-NEXT: s_wait_kmcnt 0x0
-; GFX1250-NEXT: v_lshlrev_b32_e32 v1, 16, v0
-; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
-; GFX1250-NEXT: v_frexp_mant_f32_e32 v0, v1
-; GFX1250-NEXT: v_frexp_exp_i32_f32_e32 v1, v1
-; GFX1250-NEXT: v_cvt_pk_bf16_f32 v0, v0, s0
-; GFX1250-NEXT: s_set_pc_i64 s[30:31]
+; GFX1250TRUE16-LABEL: v_frexp_bf16_i16:
+; GFX1250TRUE16: ; %bb.0:
+; GFX1250TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250TRUE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250TRUE16-NEXT: v_mov_b16_e32 v1.l, 0
+; GFX1250TRUE16-NEXT: v_mov_b16_e32 v1.h, v0.l
+; GFX1250TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX1250TRUE16-NEXT: v_frexp_mant_f32_e32 v0, v1
+; GFX1250TRUE16-NEXT: v_frexp_exp_i32_f32_e32 v1, v1
+; GFX1250TRUE16-NEXT: v_cvt_pk_bf16_f32 v0, v0, s0
+; GFX1250TRUE16-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX1250FAKE16-LABEL: v_frexp_bf16_i16:
+; GFX1250FAKE16: ; %bb.0:
+; GFX1250FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250FAKE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250FAKE16-NEXT: v_lshlrev_b32_e32 v1, 16, v0
+; GFX1250FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX1250FAKE16-NEXT: v_frexp_mant_f32_e32 v0, v1
+; GFX1250FAKE16-NEXT: v_frexp_exp_i32_f32_e32 v1, v1
+; GFX1250FAKE16-NEXT: v_cvt_pk_bf16_f32 v0, v0, s0
+; GFX1250FAKE16-NEXT: s_set_pc_i64 s[30:31]
%op = call { bfloat, i16 } @llvm.frexp.bf16.i16(bfloat %a)
ret { bfloat, i16 } %op
}
@@ -31254,31 +31489,58 @@ define bfloat @v_log_bf16(bfloat %a) {
; GFX11FAKE16-NEXT: v_lshrrev_b32_e32 v0, 16, v0
; GFX11FAKE16-NEXT: s_setpc_b64 s[30:31]
;
-; GFX1250-LABEL: v_log_bf16:
-; GFX1250: ; %bb.0:
-; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-NEXT: s_wait_kmcnt 0x0
-; GFX1250-NEXT: v_lshlrev_b32_e32 v0, 16, v0
-; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
-; GFX1250-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0
-; GFX1250-NEXT: v_cndmask_b32_e64 v1, 0, 32, vcc_lo
-; GFX1250-NEXT: v_ldexp_f32 v0, v0, v1
-; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(TRANS32_DEP_1)
-; GFX1250-NEXT: v_log_f32_e32 v0, v0
-; GFX1250-NEXT: v_nop
-; GFX1250-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0
-; GFX1250-NEXT: v_cmp_gt_f32_e64 s0, 0x7f800000, |v0|
-; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1250-NEXT: v_fma_f32 v2, 0x3f317217, v0, -v1
-; GFX1250-NEXT: v_fmamk_f32 v2, v0, 0x3377d1cf, v2
-; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1250-NEXT: v_add_f32_e32 v1, v1, v2
-; GFX1250-NEXT: v_cndmask_b32_e64 v0, v0, v1, s0
-; GFX1250-NEXT: v_cndmask_b32_e64 v1, 0, 0x41b17218, vcc_lo
-; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1250-NEXT: v_sub_f32_e32 v0, v0, v1
-; GFX1250-NEXT: v_cvt_pk_bf16_f32 v0, v0, s0
-; GFX1250-NEXT: s_set_pc_i64 s[30:31]
+; GFX1250TRUE16-LABEL: v_log_bf16:
+; GFX1250TRUE16: ; %bb.0:
+; GFX1250TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250TRUE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250TRUE16-NEXT: v_mov_b16_e32 v1.l, 0
+; GFX1250TRUE16-NEXT: v_mov_b16_e32 v1.h, v0.l
+; GFX1250TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
+; GFX1250TRUE16-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v1
+; GFX1250TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 32, vcc_lo
+; GFX1250TRUE16-NEXT: v_ldexp_f32 v0, v1, v0
+; GFX1250TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(TRANS32_DEP_1)
+; GFX1250TRUE16-NEXT: v_log_f32_e32 v0, v0
+; GFX1250TRUE16-NEXT: v_nop
+; GFX1250TRUE16-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0
+; GFX1250TRUE16-NEXT: v_cmp_gt_f32_e64 s0, 0x7f800000, |v0|
+; GFX1250TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1250TRUE16-NEXT: v_fma_f32 v2, 0x3f317217, v0, -v1
+; GFX1250TRUE16-NEXT: v_fmamk_f32 v2, v0, 0x3377d1cf, v2
+; GFX1250TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1250TRUE16-NEXT: v_add_f32_e32 v1, v1, v2
+; GFX1250TRUE16-NEXT: v_cndmask_b32_e64 v0, v0, v1, s0
+; GFX1250TRUE16-NEXT: v_cndmask_b32_e64 v1, 0, 0x41b17218, vcc_lo
+; GFX1250TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1250TRUE16-NEXT: v_sub_f32_e32 v0, v0, v1
+; GFX1250TRUE16-NEXT: v_cvt_pk_bf16_f32 v0, v0, s0
+; GFX1250TRUE16-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX1250FAKE16-LABEL: v_log_bf16:
+; GFX1250FAKE16: ; %bb.0:
+; GFX1250FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250FAKE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250FAKE16-NEXT: v_lshlrev_b32_e32 v0, 16, v0
+; GFX1250FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
+; GFX1250FAKE16-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0
+; GFX1250FAKE16-NEXT: v_cndmask_b32_e64 v1, 0, 32, vcc_lo
+; GFX1250FAKE16-NEXT: v_ldexp_f32 v0, v0, v1
+; GFX1250FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(TRANS32_DEP_1)
+; GFX1250FAKE16-NEXT: v_log_f32_e32 v0, v0
+; GFX1250FAKE16-NEXT: v_nop
+; GFX1250FAKE16-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0
+; GFX1250FAKE16-NEXT: v_cmp_gt_f32_e64 s0, 0x7f800000, |v0|
+; GFX1250FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1250FAKE16-NEXT: v_fma_f32 v2, 0x3f317217, v0, -v1
+; GFX1250FAKE16-NEXT: v_fmamk_f32 v2, v0, 0x3377d1cf, v2
+; GFX1250FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1250FAKE16-NEXT: v_add_f32_e32 v1, v1, v2
+; GFX1250FAKE16-NEXT: v_cndmask_b32_e64 v0, v0, v1, s0
+; GFX1250FAKE16-NEXT: v_cndmask_b32_e64 v1, 0, 0x41b17218, vcc_lo
+; GFX1250FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1250FAKE16-NEXT: v_sub_f32_e32 v0, v0, v1
+; GFX1250FAKE16-NEXT: v_cvt_pk_bf16_f32 v0, v0, s0
+; GFX1250FAKE16-NEXT: s_set_pc_i64 s[30:31]
%op = call bfloat @llvm.log.bf16(bfloat %a)
ret bfloat %op
}
@@ -31439,12 +31701,19 @@ define bfloat @v_log2_bf16(bfloat %a) {
; GFX11FAKE16-NEXT: v_lshrrev_b32_e32 v0, 16, v0
; GFX11FAKE16-NEXT: s_setpc_b64 s[30:31]
;
-; GFX1250-LABEL: v_log2_bf16:
-; GFX1250: ; %bb.0:
-; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-NEXT: s_wait_kmcnt 0x0
-; GFX1250-NEXT: v_log_bf16_e32 v0, v0
-; GFX1250-NEXT: s_set_pc_i64 s[30:31]
+; GFX1250TRUE16-LABEL: v_log2_bf16:
+; GFX1250TRUE16: ; %bb.0:
+; GFX1250TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250TRUE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250TRUE16-NEXT: v_log_bf16_e32 v0.l, v0.l
+; GFX1250TRUE16-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX1250FAKE16-LABEL: v_log2_bf16:
+; GFX1250FAKE16: ; %bb.0:
+; GFX1250FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250FAKE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250FAKE16-NEXT: v_log_bf16_e32 v0, v0
+; GFX1250FAKE16-NEXT: s_set_pc_i64 s[30:31]
%op = call bfloat @llvm.log2.bf16(bfloat %a)
ret bfloat %op
}
@@ -31679,31 +31948,58 @@ define bfloat @v_log10_bf16(bfloat %a) {
; GFX11FAKE16-NEXT: v_lshrrev_b32_e32 v0, 16, v0
; GFX11FAKE16-NEXT: s_setpc_b64 s[30:31]
;
-; GFX1250-LABEL: v_log10_bf16:
-; GFX1250: ; %bb.0:
-; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-NEXT: s_wait_kmcnt 0x0
-; GFX1250-NEXT: v_lshlrev_b32_e32 v0, 16, v0
-; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
-; GFX1250-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0
-; GFX1250-NEXT: v_cndmask_b32_e64 v1, 0, 32, vcc_lo
-; GFX1250-NEXT: v_ldexp_f32 v0, v0, v1
-; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(TRANS32_DEP_1)
-; GFX1250-NEXT: v_log_f32_e32 v0, v0
-; GFX1250-NEXT: v_nop
-; GFX1250-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0
-; GFX1250-NEXT: v_cmp_gt_f32_e64 s0, 0x7f800000, |v0|
-; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1250-NEXT: v_fma_f32 v2, 0x3e9a209a, v0, -v1
-; GFX1250-NEXT: v_fmamk_f32 v2, v0, 0x3284fbcf, v2
-; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1250-NEXT: v_add_f32_e32 v1, v1, v2
-; GFX1250-NEXT: v_cndmask_b32_e64 v0, v0, v1, s0
-; GFX1250-NEXT: v_cndmask_b32_e64 v1, 0, 0x411a209b, vcc_lo
-; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1250-NEXT: v_sub_f32_e32 v0, v0, v1
-; GFX1250-NEXT: v_cvt_pk_bf16_f32 v0, v0, s0
-; GFX1250-NEXT: s_set_pc_i64 s[30:31]
+; GFX1250TRUE16-LABEL: v_log10_bf16:
+; GFX1250TRUE16: ; %bb.0:
+; GFX1250TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250TRUE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250TRUE16-NEXT: v_mov_b16_e32 v1.l, 0
+; GFX1250TRUE16-NEXT: v_mov_b16_e32 v1.h, v0.l
+; GFX1250TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
+; GFX1250TRUE16-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v1
+; GFX1250TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 32, vcc_lo
+; GFX1250TRUE16-NEXT: v_ldexp_f32 v0, v1, v0
+; GFX1250TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(TRANS32_DEP_1)
+; GFX1250TRUE16-NEXT: v_log_f32_e32 v0, v0
+; GFX1250TRUE16-NEXT: v_nop
+; GFX1250TRUE16-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0
+; GFX1250TRUE16-NEXT: v_cmp_gt_f32_e64 s0, 0x7f800000, |v0|
+; GFX1250TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1250TRUE16-NEXT: v_fma_f32 v2, 0x3e9a209a, v0, -v1
+; GFX1250TRUE16-NEXT: v_fmamk_f32 v2, v0, 0x3284fbcf, v2
+; GFX1250TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1250TRUE16-NEXT: v_add_f32_e32 v1, v1, v2
+; GFX1250TRUE16-NEXT: v_cndmask_b32_e64 v0, v0, v1, s0
+; GFX1250TRUE16-NEXT: v_cndmask_b32_e64 v1, 0, 0x411a209b, vcc_lo
+; GFX1250TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1250TRUE16-NEXT: v_sub_f32_e32 v0, v0, v1
+; GFX1250TRUE16-NEXT: v_cvt_pk_bf16_f32 v0, v0, s0
+; GFX1250TRUE16-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX1250FAKE16-LABEL: v_log10_bf16:
+; GFX1250FAKE16: ; %bb.0:
+; GFX1250FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250FAKE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250FAKE16-NEXT: v_lshlrev_b32_e32 v0, 16, v0
+; GFX1250FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
+; GFX1250FAKE16-NEXT: v_cmp_gt_f32_e32 vcc_lo, 0x800000, v0
+; GFX1250FAKE16-NEXT: v_cndmask_b32_e64 v1, 0, 32, vcc_lo
+; GFX1250FAKE16-NEXT: v_ldexp_f32 v0, v0, v1
+; GFX1250FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(TRANS32_DEP_1)
+; GFX1250FAKE16-NEXT: v_log_f32_e32 v0, v0
+; GFX1250FAKE16-NEXT: v_nop
+; GFX1250FAKE16-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0
+; GFX1250FAKE16-NEXT: v_cmp_gt_f32_e64 s0, 0x7f800000, |v0|
+; GFX1250FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1250FAKE16-NEXT: v_fma_f32 v2, 0x3e9a209a, v0, -v1
+; GFX1250FAKE16-NEXT: v_fmamk_f32 v2, v0, 0x3284fbcf, v2
+; GFX1250FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1250FAKE16-NEXT: v_add_f32_e32 v1, v1, v2
+; GFX1250FAKE16-NEXT: v_cndmask_b32_e64 v0, v0, v1, s0
+; GFX1250FAKE16-NEXT: v_cndmask_b32_e64 v1, 0, 0x411a209b, vcc_lo
+; GFX1250FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1250FAKE16-NEXT: v_sub_f32_e32 v0, v0, v1
+; GFX1250FAKE16-NEXT: v_cvt_pk_bf16_f32 v0, v0, s0
+; GFX1250FAKE16-NEXT: s_set_pc_i64 s[30:31]
%op = call bfloat @llvm.log10.bf16(bfloat %a)
ret bfloat %op
}
@@ -31946,34 +32242,65 @@ define bfloat @v_exp_bf16(bfloat %a) {
; GFX11FAKE16-NEXT: v_lshrrev_b32_e32 v0, 16, v0
; GFX11FAKE16-NEXT: s_setpc_b64 s[30:31]
;
-; GFX1250-LABEL: v_exp_bf16:
-; GFX1250: ; %bb.0:
-; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-NEXT: s_wait_kmcnt 0x0
-; GFX1250-NEXT: v_lshlrev_b32_e32 v1, 16, v0
-; GFX1250-NEXT: s_mov_b32 s0, 0x3fb8aa3b
-; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1250-NEXT: v_mul_f32_e32 v2, 0x3fb8aa3b, v1
-; GFX1250-NEXT: v_rndne_f32_e32 v3, v2
-; GFX1250-NEXT: v_fma_mix_f32_bf16 v4, v0, s0, -v2 op_sel_hi:[1,0,0]
-; GFX1250-NEXT: s_mov_b32 s0, 0x32a5705f
-; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX1250-NEXT: v_sub_f32_e32 v2, v2, v3
-; GFX1250-NEXT: v_fma_mix_f32_bf16 v0, v0, s0, v4 op_sel_hi:[1,0,0]
-; GFX1250-NEXT: v_cmp_ngt_f32_e32 vcc_lo, 0xc2ce8ed0, v1
-; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
-; GFX1250-NEXT: v_add_f32_e32 v0, v2, v0
-; GFX1250-NEXT: v_cvt_i32_f32_e32 v2, v3
-; GFX1250-NEXT: v_exp_f32_e32 v0, v0
-; GFX1250-NEXT: v_nop
-; GFX1250-NEXT: s_delay_alu instid0(TRANS32_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1250-NEXT: v_ldexp_f32 v0, v0, v2
-; GFX1250-NEXT: v_cndmask_b32_e32 v0, 0, v0, vcc_lo
-; GFX1250-NEXT: v_cmp_nlt_f32_e32 vcc_lo, 0x42b17218, v1
-; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1250-NEXT: v_cndmask_b32_e32 v0, 0x7f800000, v0, vcc_lo
-; GFX1250-NEXT: v_cvt_pk_bf16_f32 v0, v0, s0
-; GFX1250-NEXT: s_set_pc_i64 s[30:31]
+; GFX1250TRUE16-LABEL: v_exp_bf16:
+; GFX1250TRUE16: ; %bb.0:
+; GFX1250TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250TRUE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250TRUE16-NEXT: v_mov_b16_e32 v1.l, 0
+; GFX1250TRUE16-NEXT: v_mov_b16_e32 v1.h, v0.l
+; GFX1250TRUE16-NEXT: s_mov_b32 s0, 0x3fb8aa3b
+; GFX1250TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX1250TRUE16-NEXT: v_mul_f32_e32 v2, 0x3fb8aa3b, v1
+; GFX1250TRUE16-NEXT: v_cmp_ngt_f32_e32 vcc_lo, 0xc2ce8ed0, v1
+; GFX1250TRUE16-NEXT: v_fma_mix_f32_bf16 v3, v0, s0, -v2 op_sel_hi:[1,0,0]
+; GFX1250TRUE16-NEXT: v_rndne_f32_e32 v4, v2
+; GFX1250TRUE16-NEXT: s_mov_b32 s0, 0x32a5705f
+; GFX1250TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instid1(SALU_CYCLE_1)
+; GFX1250TRUE16-NEXT: v_fma_mix_f32_bf16 v0, v0, s0, v3 op_sel_hi:[1,0,0]
+; GFX1250TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1250TRUE16-NEXT: v_sub_f32_e32 v2, v2, v4
+; GFX1250TRUE16-NEXT: v_add_f32_e32 v0, v2, v0
+; GFX1250TRUE16-NEXT: v_cvt_i32_f32_e32 v2, v4
+; GFX1250TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(TRANS32_DEP_1)
+; GFX1250TRUE16-NEXT: v_exp_f32_e32 v0, v0
+; GFX1250TRUE16-NEXT: v_nop
+; GFX1250TRUE16-NEXT: v_ldexp_f32 v0, v0, v2
+; GFX1250TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX1250TRUE16-NEXT: v_cndmask_b32_e32 v0, 0, v0, vcc_lo
+; GFX1250TRUE16-NEXT: v_cmp_nlt_f32_e32 vcc_lo, 0x42b17218, v1
+; GFX1250TRUE16-NEXT: v_cndmask_b32_e32 v0, 0x7f800000, v0, vcc_lo
+; GFX1250TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1250TRUE16-NEXT: v_cvt_pk_bf16_f32 v0, v0, s0
+; GFX1250TRUE16-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX1250FAKE16-LABEL: v_exp_bf16:
+; GFX1250FAKE16: ; %bb.0:
+; GFX1250FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250FAKE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250FAKE16-NEXT: v_lshlrev_b32_e32 v1, 16, v0
+; GFX1250FAKE16-NEXT: s_mov_b32 s0, 0x3fb8aa3b
+; GFX1250FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1250FAKE16-NEXT: v_mul_f32_e32 v2, 0x3fb8aa3b, v1
+; GFX1250FAKE16-NEXT: v_rndne_f32_e32 v3, v2
+; GFX1250FAKE16-NEXT: v_fma_mix_f32_bf16 v4, v0, s0, -v2 op_sel_hi:[1,0,0]
+; GFX1250FAKE16-NEXT: s_mov_b32 s0, 0x32a5705f
+; GFX1250FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX1250FAKE16-NEXT: v_sub_f32_e32 v2, v2, v3
+; GFX1250FAKE16-NEXT: v_fma_mix_f32_bf16 v0, v0, s0, v4 op_sel_hi:[1,0,0]
+; GFX1250FAKE16-NEXT: v_cmp_ngt_f32_e32 vcc_lo, 0xc2ce8ed0, v1
+; GFX1250FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX1250FAKE16-NEXT: v_add_f32_e32 v0, v2, v0
+; GFX1250FAKE16-NEXT: v_cvt_i32_f32_e32 v2, v3
+; GFX1250FAKE16-NEXT: v_exp_f32_e32 v0, v0
+; GFX1250FAKE16-NEXT: v_nop
+; GFX1250FAKE16-NEXT: s_delay_alu instid0(TRANS32_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1250FAKE16-NEXT: v_ldexp_f32 v0, v0, v2
+; GFX1250FAKE16-NEXT: v_cndmask_b32_e32 v0, 0, v0, vcc_lo
+; GFX1250FAKE16-NEXT: v_cmp_nlt_f32_e32 vcc_lo, 0x42b17218, v1
+; GFX1250FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1250FAKE16-NEXT: v_cndmask_b32_e32 v0, 0x7f800000, v0, vcc_lo
+; GFX1250FAKE16-NEXT: v_cvt_pk_bf16_f32 v0, v0, s0
+; GFX1250FAKE16-NEXT: s_set_pc_i64 s[30:31]
%op = call bfloat @llvm.exp.bf16(bfloat %a)
ret bfloat %op
}
@@ -32138,12 +32465,19 @@ define bfloat @v_exp2_bf16(bfloat %a) {
; GFX11FAKE16-NEXT: v_lshrrev_b32_e32 v0, 16, v0
; GFX11FAKE16-NEXT: s_setpc_b64 s[30:31]
;
-; GFX1250-LABEL: v_exp2_bf16:
-; GFX1250: ; %bb.0:
-; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-NEXT: s_wait_kmcnt 0x0
-; GFX1250-NEXT: v_exp_bf16_e32 v0, v0
-; GFX1250-NEXT: s_set_pc_i64 s[30:31]
+; GFX1250TRUE16-LABEL: v_exp2_bf16:
+; GFX1250TRUE16: ; %bb.0:
+; GFX1250TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250TRUE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250TRUE16-NEXT: v_exp_bf16_e32 v0.l, v0.l
+; GFX1250TRUE16-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX1250FAKE16-LABEL: v_exp2_bf16:
+; GFX1250FAKE16: ; %bb.0:
+; GFX1250FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250FAKE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250FAKE16-NEXT: v_exp_bf16_e32 v0, v0
+; GFX1250FAKE16-NEXT: s_set_pc_i64 s[30:31]
%op = call bfloat @llvm.exp2.bf16(bfloat %a)
ret bfloat %op
}
@@ -32382,34 +32716,65 @@ define bfloat @v_exp10_bf16(bfloat %a) {
; GFX11FAKE16-NEXT: v_lshrrev_b32_e32 v0, 16, v0
; GFX11FAKE16-NEXT: s_setpc_b64 s[30:31]
;
-; GFX1250-LABEL: v_exp10_bf16:
-; GFX1250: ; %bb.0:
-; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-NEXT: s_wait_kmcnt 0x0
-; GFX1250-NEXT: v_lshlrev_b32_e32 v1, 16, v0
-; GFX1250-NEXT: s_mov_b32 s0, 0x40549a78
-; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1250-NEXT: v_mul_f32_e32 v2, 0x40549a78, v1
-; GFX1250-NEXT: v_rndne_f32_e32 v3, v2
-; GFX1250-NEXT: v_fma_mix_f32_bf16 v4, v0, s0, -v2 op_sel_hi:[1,0,0]
-; GFX1250-NEXT: s_mov_b32 s0, 0x33979a37
-; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX1250-NEXT: v_sub_f32_e32 v2, v2, v3
-; GFX1250-NEXT: v_fma_mix_f32_bf16 v0, v0, s0, v4 op_sel_hi:[1,0,0]
-; GFX1250-NEXT: v_cmp_ngt_f32_e32 vcc_lo, 0xc23369f4, v1
-; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
-; GFX1250-NEXT: v_add_f32_e32 v0, v2, v0
-; GFX1250-NEXT: v_cvt_i32_f32_e32 v2, v3
-; GFX1250-NEXT: v_exp_f32_e32 v0, v0
-; GFX1250-NEXT: v_nop
-; GFX1250-NEXT: s_delay_alu instid0(TRANS32_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1250-NEXT: v_ldexp_f32 v0, v0, v2
-; GFX1250-NEXT: v_cndmask_b32_e32 v0, 0, v0, vcc_lo
-; GFX1250-NEXT: v_cmp_nlt_f32_e32 vcc_lo, 0x421a209b, v1
-; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1250-NEXT: v_cndmask_b32_e32 v0, 0x7f800000, v0, vcc_lo
-; GFX1250-NEXT: v_cvt_pk_bf16_f32 v0, v0, s0
-; GFX1250-NEXT: s_set_pc_i64 s[30:31]
+; GFX1250TRUE16-LABEL: v_exp10_bf16:
+; GFX1250TRUE16: ; %bb.0:
+; GFX1250TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250TRUE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250TRUE16-NEXT: v_mov_b16_e32 v1.l, 0
+; GFX1250TRUE16-NEXT: v_mov_b16_e32 v1.h, v0.l
+; GFX1250TRUE16-NEXT: s_mov_b32 s0, 0x40549a78
+; GFX1250TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX1250TRUE16-NEXT: v_mul_f32_e32 v2, 0x40549a78, v1
+; GFX1250TRUE16-NEXT: v_cmp_ngt_f32_e32 vcc_lo, 0xc23369f4, v1
+; GFX1250TRUE16-NEXT: v_fma_mix_f32_bf16 v3, v0, s0, -v2 op_sel_hi:[1,0,0]
+; GFX1250TRUE16-NEXT: v_rndne_f32_e32 v4, v2
+; GFX1250TRUE16-NEXT: s_mov_b32 s0, 0x33979a37
+; GFX1250TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instid1(SALU_CYCLE_1)
+; GFX1250TRUE16-NEXT: v_fma_mix_f32_bf16 v0, v0, s0, v3 op_sel_hi:[1,0,0]
+; GFX1250TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1250TRUE16-NEXT: v_sub_f32_e32 v2, v2, v4
+; GFX1250TRUE16-NEXT: v_add_f32_e32 v0, v2, v0
+; GFX1250TRUE16-NEXT: v_cvt_i32_f32_e32 v2, v4
+; GFX1250TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(TRANS32_DEP_1)
+; GFX1250TRUE16-NEXT: v_exp_f32_e32 v0, v0
+; GFX1250TRUE16-NEXT: v_nop
+; GFX1250TRUE16-NEXT: v_ldexp_f32 v0, v0, v2
+; GFX1250TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX1250TRUE16-NEXT: v_cndmask_b32_e32 v0, 0, v0, vcc_lo
+; GFX1250TRUE16-NEXT: v_cmp_nlt_f32_e32 vcc_lo, 0x421a209b, v1
+; GFX1250TRUE16-NEXT: v_cndmask_b32_e32 v0, 0x7f800000, v0, vcc_lo
+; GFX1250TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1250TRUE16-NEXT: v_cvt_pk_bf16_f32 v0, v0, s0
+; GFX1250TRUE16-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX1250FAKE16-LABEL: v_exp10_bf16:
+; GFX1250FAKE16: ; %bb.0:
+; GFX1250FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250FAKE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250FAKE16-NEXT: v_lshlrev_b32_e32 v1, 16, v0
+; GFX1250FAKE16-NEXT: s_mov_b32 s0, 0x40549a78
+; GFX1250FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1250FAKE16-NEXT: v_mul_f32_e32 v2, 0x40549a78, v1
+; GFX1250FAKE16-NEXT: v_rndne_f32_e32 v3, v2
+; GFX1250FAKE16-NEXT: v_fma_mix_f32_bf16 v4, v0, s0, -v2 op_sel_hi:[1,0,0]
+; GFX1250FAKE16-NEXT: s_mov_b32 s0, 0x33979a37
+; GFX1250FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX1250FAKE16-NEXT: v_sub_f32_e32 v2, v2, v3
+; GFX1250FAKE16-NEXT: v_fma_mix_f32_bf16 v0, v0, s0, v4 op_sel_hi:[1,0,0]
+; GFX1250FAKE16-NEXT: v_cmp_ngt_f32_e32 vcc_lo, 0xc23369f4, v1
+; GFX1250FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX1250FAKE16-NEXT: v_add_f32_e32 v0, v2, v0
+; GFX1250FAKE16-NEXT: v_cvt_i32_f32_e32 v2, v3
+; GFX1250FAKE16-NEXT: v_exp_f32_e32 v0, v0
+; GFX1250FAKE16-NEXT: v_nop
+; GFX1250FAKE16-NEXT: s_delay_alu instid0(TRANS32_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1250FAKE16-NEXT: v_ldexp_f32 v0, v0, v2
+; GFX1250FAKE16-NEXT: v_cndmask_b32_e32 v0, 0, v0, vcc_lo
+; GFX1250FAKE16-NEXT: v_cmp_nlt_f32_e32 vcc_lo, 0x421a209b, v1
+; GFX1250FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1250FAKE16-NEXT: v_cndmask_b32_e32 v0, 0x7f800000, v0, vcc_lo
+; GFX1250FAKE16-NEXT: v_cvt_pk_bf16_f32 v0, v0, s0
+; GFX1250FAKE16-NEXT: s_set_pc_i64 s[30:31]
%op = call bfloat @llvm.exp10.bf16(bfloat %a)
ret bfloat %op
}
@@ -32517,15 +32882,26 @@ define bfloat @v_ceil_bf16(bfloat %a) {
; GFX11FAKE16-NEXT: v_lshrrev_b32_e32 v0, 16, v0
; GFX11FAKE16-NEXT: s_setpc_b64 s[30:31]
;
-; GFX1250-LABEL: v_ceil_bf16:
-; GFX1250: ; %bb.0:
-; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-NEXT: s_wait_kmcnt 0x0
-; GFX1250-NEXT: v_lshlrev_b32_e32 v0, 16, v0
-; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1250-NEXT: v_ceil_f32_e32 v0, v0
-; GFX1250-NEXT: v_cvt_pk_bf16_f32 v0, v0, s0
-; GFX1250-NEXT: s_set_pc_i64 s[30:31]
+; GFX1250TRUE16-LABEL: v_ceil_bf16:
+; GFX1250TRUE16: ; %bb.0:
+; GFX1250TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250TRUE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250TRUE16-NEXT: v_mov_b16_e32 v1.l, 0
+; GFX1250TRUE16-NEXT: v_mov_b16_e32 v1.h, v0.l
+; GFX1250TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1250TRUE16-NEXT: v_ceil_f32_e32 v0, v1
+; GFX1250TRUE16-NEXT: v_cvt_pk_bf16_f32 v0, v0, s0
+; GFX1250TRUE16-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX1250FAKE16-LABEL: v_ceil_bf16:
+; GFX1250FAKE16: ; %bb.0:
+; GFX1250FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250FAKE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250FAKE16-NEXT: v_lshlrev_b32_e32 v0, 16, v0
+; GFX1250FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1250FAKE16-NEXT: v_ceil_f32_e32 v0, v0
+; GFX1250FAKE16-NEXT: v_cvt_pk_bf16_f32 v0, v0, s0
+; GFX1250FAKE16-NEXT: s_set_pc_i64 s[30:31]
%op = call bfloat @llvm.ceil.bf16(bfloat %a)
ret bfloat %op
}
@@ -32633,15 +33009,26 @@ define bfloat @v_trunc_bf16(bfloat %a) {
; GFX11FAKE16-NEXT: v_lshrrev_b32_e32 v0, 16, v0
; GFX11FAKE16-NEXT: s_setpc_b64 s[30:31]
;
-; GFX1250-LABEL: v_trunc_bf16:
-; GFX1250: ; %bb.0:
-; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-NEXT: s_wait_kmcnt 0x0
-; GFX1250-NEXT: v_lshlrev_b32_e32 v0, 16, v0
-; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1250-NEXT: v_trunc_f32_e32 v0, v0
-; GFX1250-NEXT: v_cvt_pk_bf16_f32 v0, v0, s0
-; GFX1250-NEXT: s_set_pc_i64 s[30:31]
+; GFX1250TRUE16-LABEL: v_trunc_bf16:
+; GFX1250TRUE16: ; %bb.0:
+; GFX1250TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250TRUE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250TRUE16-NEXT: v_mov_b16_e32 v1.l, 0
+; GFX1250TRUE16-NEXT: v_mov_b16_e32 v1.h, v0.l
+; GFX1250TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1250TRUE16-NEXT: v_trunc_f32_e32 v0, v1
+; GFX1250TRUE16-NEXT: v_cvt_pk_bf16_f32 v0, v0, s0
+; GFX1250TRUE16-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX1250FAKE16-LABEL: v_trunc_bf16:
+; GFX1250FAKE16: ; %bb.0:
+; GFX1250FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250FAKE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250FAKE16-NEXT: v_lshlrev_b32_e32 v0, 16, v0
+; GFX1250FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1250FAKE16-NEXT: v_trunc_f32_e32 v0, v0
+; GFX1250FAKE16-NEXT: v_cvt_pk_bf16_f32 v0, v0, s0
+; GFX1250FAKE16-NEXT: s_set_pc_i64 s[30:31]
%op = call bfloat @llvm.trunc.bf16(bfloat %a)
ret bfloat %op
}
@@ -32749,15 +33136,26 @@ define bfloat @v_rint_bf16(bfloat %a) {
; GFX11FAKE16-NEXT: v_lshrrev_b32_e32 v0, 16, v0
; GFX11FAKE16-NEXT: s_setpc_b64 s[30:31]
;
-; GFX1250-LABEL: v_rint_bf16:
-; GFX1250: ; %bb.0:
-; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-NEXT: s_wait_kmcnt 0x0
-; GFX1250-NEXT: v_lshlrev_b32_e32 v0, 16, v0
-; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1250-NEXT: v_rndne_f32_e32 v0, v0
-; GFX1250-NEXT: v_cvt_pk_bf16_f32 v0, v0, s0
-; GFX1250-NEXT: s_set_pc_i64 s[30:31]
+; GFX1250TRUE16-LABEL: v_rint_bf16:
+; GFX1250TRUE16: ; %bb.0:
+; GFX1250TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250TRUE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250TRUE16-NEXT: v_mov_b16_e32 v1.l, 0
+; GFX1250TRUE16-NEXT: v_mov_b16_e32 v1.h, v0.l
+; GFX1250TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1250TRUE16-NEXT: v_rndne_f32_e32 v0, v1
+; GFX1250TRUE16-NEXT: v_cvt_pk_bf16_f32 v0, v0, s0
+; GFX1250TRUE16-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX1250FAKE16-LABEL: v_rint_bf16:
+; GFX1250FAKE16: ; %bb.0:
+; GFX1250FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250FAKE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250FAKE16-NEXT: v_lshlrev_b32_e32 v0, 16, v0
+; GFX1250FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1250FAKE16-NEXT: v_rndne_f32_e32 v0, v0
+; GFX1250FAKE16-NEXT: v_cvt_pk_bf16_f32 v0, v0, s0
+; GFX1250FAKE16-NEXT: s_set_pc_i64 s[30:31]
%op = call bfloat @llvm.rint.bf16(bfloat %a)
ret bfloat %op
}
@@ -32865,15 +33263,26 @@ define bfloat @v_nearbyint_bf16(bfloat %a) {
; GFX11FAKE16-NEXT: v_lshrrev_b32_e32 v0, 16, v0
; GFX11FAKE16-NEXT: s_setpc_b64 s[30:31]
;
-; GFX1250-LABEL: v_nearbyint_bf16:
-; GFX1250: ; %bb.0:
-; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-NEXT: s_wait_kmcnt 0x0
-; GFX1250-NEXT: v_lshlrev_b32_e32 v0, 16, v0
-; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1250-NEXT: v_rndne_f32_e32 v0, v0
-; GFX1250-NEXT: v_cvt_pk_bf16_f32 v0, v0, s0
-; GFX1250-NEXT: s_set_pc_i64 s[30:31]
+; GFX1250TRUE16-LABEL: v_nearbyint_bf16:
+; GFX1250TRUE16: ; %bb.0:
+; GFX1250TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250TRUE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250TRUE16-NEXT: v_mov_b16_e32 v1.l, 0
+; GFX1250TRUE16-NEXT: v_mov_b16_e32 v1.h, v0.l
+; GFX1250TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1250TRUE16-NEXT: v_rndne_f32_e32 v0, v1
+; GFX1250TRUE16-NEXT: v_cvt_pk_bf16_f32 v0, v0, s0
+; GFX1250TRUE16-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX1250FAKE16-LABEL: v_nearbyint_bf16:
+; GFX1250FAKE16: ; %bb.0:
+; GFX1250FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250FAKE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250FAKE16-NEXT: v_lshlrev_b32_e32 v0, 16, v0
+; GFX1250FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1250FAKE16-NEXT: v_rndne_f32_e32 v0, v0
+; GFX1250FAKE16-NEXT: v_cvt_pk_bf16_f32 v0, v0, s0
+; GFX1250FAKE16-NEXT: s_set_pc_i64 s[30:31]
%op = call bfloat @llvm.nearbyint.bf16(bfloat %a)
ret bfloat %op
}
@@ -33031,23 +33440,42 @@ define bfloat @v_round_bf16(bfloat %a) {
; GFX11FAKE16-NEXT: v_lshrrev_b32_e32 v0, 16, v0
; GFX11FAKE16-NEXT: s_setpc_b64 s[30:31]
;
-; GFX1250-LABEL: v_round_bf16:
-; GFX1250: ; %bb.0:
-; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-NEXT: s_wait_kmcnt 0x0
-; GFX1250-NEXT: v_lshlrev_b32_e32 v0, 16, v0
-; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1250-NEXT: v_trunc_f32_e32 v1, v0
-; GFX1250-NEXT: v_sub_f32_e32 v2, v0, v1
-; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1250-NEXT: v_cmp_ge_f32_e64 s0, |v2|, 0.5
-; GFX1250-NEXT: v_cndmask_b32_e64 v2, 0, 1.0, s0
-; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1250-NEXT: v_bfi_b32 v0, 0x7fffffff, v2, v0
-; GFX1250-NEXT: v_add_f32_e32 v0, v1, v0
-; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX1250-NEXT: v_cvt_pk_bf16_f32 v0, v0, s0
-; GFX1250-NEXT: s_set_pc_i64 s[30:31]
+; GFX1250TRUE16-LABEL: v_round_bf16:
+; GFX1250TRUE16: ; %bb.0:
+; GFX1250TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250TRUE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250TRUE16-NEXT: v_mov_b16_e32 v1.l, 0
+; GFX1250TRUE16-NEXT: v_mov_b16_e32 v1.h, v0.l
+; GFX1250TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1250TRUE16-NEXT: v_trunc_f32_e32 v0, v1
+; GFX1250TRUE16-NEXT: v_sub_f32_e32 v2, v1, v0
+; GFX1250TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1250TRUE16-NEXT: v_cmp_ge_f32_e64 s0, |v2|, 0.5
+; GFX1250TRUE16-NEXT: v_cndmask_b32_e64 v2, 0, 1.0, s0
+; GFX1250TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1250TRUE16-NEXT: v_bfi_b32 v1, 0x7fffffff, v2, v1
+; GFX1250TRUE16-NEXT: v_add_f32_e32 v0, v0, v1
+; GFX1250TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1250TRUE16-NEXT: v_cvt_pk_bf16_f32 v0, v0, s0
+; GFX1250TRUE16-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX1250FAKE16-LABEL: v_round_bf16:
+; GFX1250FAKE16: ; %bb.0:
+; GFX1250FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250FAKE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250FAKE16-NEXT: v_lshlrev_b32_e32 v0, 16, v0
+; GFX1250FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1250FAKE16-NEXT: v_trunc_f32_e32 v1, v0
+; GFX1250FAKE16-NEXT: v_sub_f32_e32 v2, v0, v1
+; GFX1250FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1250FAKE16-NEXT: v_cmp_ge_f32_e64 s0, |v2|, 0.5
+; GFX1250FAKE16-NEXT: v_cndmask_b32_e64 v2, 0, 1.0, s0
+; GFX1250FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1250FAKE16-NEXT: v_bfi_b32 v0, 0x7fffffff, v2, v0
+; GFX1250FAKE16-NEXT: v_add_f32_e32 v0, v1, v0
+; GFX1250FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1250FAKE16-NEXT: v_cvt_pk_bf16_f32 v0, v0, s0
+; GFX1250FAKE16-NEXT: s_set_pc_i64 s[30:31]
%op = call bfloat @llvm.round.bf16(bfloat %a)
ret bfloat %op
}
@@ -33155,15 +33583,26 @@ define bfloat @v_roundeven_bf16(bfloat %a) {
; GFX11FAKE16-NEXT: v_lshrrev_b32_e32 v0, 16, v0
; GFX11FAKE16-NEXT: s_setpc_b64 s[30:31]
;
-; GFX1250-LABEL: v_roundeven_bf16:
-; GFX1250: ; %bb.0:
-; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-NEXT: s_wait_kmcnt 0x0
-; GFX1250-NEXT: v_lshlrev_b32_e32 v0, 16, v0
-; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1250-NEXT: v_rndne_f32_e32 v0, v0
-; GFX1250-NEXT: v_cvt_pk_bf16_f32 v0, v0, s0
-; GFX1250-NEXT: s_set_pc_i64 s[30:31]
+; GFX1250TRUE16-LABEL: v_roundeven_bf16:
+; GFX1250TRUE16: ; %bb.0:
+; GFX1250TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250TRUE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250TRUE16-NEXT: v_mov_b16_e32 v1.l, 0
+; GFX1250TRUE16-NEXT: v_mov_b16_e32 v1.h, v0.l
+; GFX1250TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1250TRUE16-NEXT: v_rndne_f32_e32 v0, v1
+; GFX1250TRUE16-NEXT: v_cvt_pk_bf16_f32 v0, v0, s0
+; GFX1250TRUE16-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX1250FAKE16-LABEL: v_roundeven_bf16:
+; GFX1250FAKE16: ; %bb.0:
+; GFX1250FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250FAKE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250FAKE16-NEXT: v_lshlrev_b32_e32 v0, 16, v0
+; GFX1250FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1250FAKE16-NEXT: v_rndne_f32_e32 v0, v0
+; GFX1250FAKE16-NEXT: v_cvt_pk_bf16_f32 v0, v0, s0
+; GFX1250FAKE16-NEXT: s_set_pc_i64 s[30:31]
%op = call bfloat @llvm.roundeven.bf16(bfloat %a)
ret bfloat %op
}
@@ -33271,15 +33710,26 @@ define bfloat @v_floor_bf16(bfloat %a) {
; GFX11FAKE16-NEXT: v_lshrrev_b32_e32 v0, 16, v0
; GFX11FAKE16-NEXT: s_setpc_b64 s[30:31]
;
-; GFX1250-LABEL: v_floor_bf16:
-; GFX1250: ; %bb.0:
-; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-NEXT: s_wait_kmcnt 0x0
-; GFX1250-NEXT: v_lshlrev_b32_e32 v0, 16, v0
-; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1250-NEXT: v_floor_f32_e32 v0, v0
-; GFX1250-NEXT: v_cvt_pk_bf16_f32 v0, v0, s0
-; GFX1250-NEXT: s_set_pc_i64 s[30:31]
+; GFX1250TRUE16-LABEL: v_floor_bf16:
+; GFX1250TRUE16: ; %bb.0:
+; GFX1250TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250TRUE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250TRUE16-NEXT: v_mov_b16_e32 v1.l, 0
+; GFX1250TRUE16-NEXT: v_mov_b16_e32 v1.h, v0.l
+; GFX1250TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1250TRUE16-NEXT: v_floor_f32_e32 v0, v1
+; GFX1250TRUE16-NEXT: v_cvt_pk_bf16_f32 v0, v0, s0
+; GFX1250TRUE16-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX1250FAKE16-LABEL: v_floor_bf16:
+; GFX1250FAKE16: ; %bb.0:
+; GFX1250FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250FAKE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250FAKE16-NEXT: v_lshlrev_b32_e32 v0, 16, v0
+; GFX1250FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1250FAKE16-NEXT: v_floor_f32_e32 v0, v0
+; GFX1250FAKE16-NEXT: v_cvt_pk_bf16_f32 v0, v0, s0
+; GFX1250FAKE16-NEXT: s_set_pc_i64 s[30:31]
%op = call bfloat @llvm.floor.bf16(bfloat %a)
ret bfloat %op
}
@@ -33385,15 +33835,26 @@ define bfloat @v_canonicalize_bf16(bfloat %a) {
; GFX11FAKE16-NEXT: v_lshrrev_b32_e32 v0, 16, v0
; GFX11FAKE16-NEXT: s_setpc_b64 s[30:31]
;
-; GFX1250-LABEL: v_canonicalize_bf16:
-; GFX1250: ; %bb.0:
-; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-NEXT: s_wait_kmcnt 0x0
-; GFX1250-NEXT: v_lshlrev_b32_e32 v0, 16, v0
-; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1250-NEXT: v_max_num_f32_e32 v0, v0, v0
-; GFX1250-NEXT: v_cvt_pk_bf16_f32 v0, v0, s0
-; GFX1250-NEXT: s_set_pc_i64 s[30:31]
+; GFX1250TRUE16-LABEL: v_canonicalize_bf16:
+; GFX1250TRUE16: ; %bb.0:
+; GFX1250TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250TRUE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250TRUE16-NEXT: v_mov_b16_e32 v1.l, 0
+; GFX1250TRUE16-NEXT: v_mov_b16_e32 v1.h, v0.l
+; GFX1250TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1250TRUE16-NEXT: v_max_num_f32_e32 v0, v1, v1
+; GFX1250TRUE16-NEXT: v_cvt_pk_bf16_f32 v0, v0, s0
+; GFX1250TRUE16-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX1250FAKE16-LABEL: v_canonicalize_bf16:
+; GFX1250FAKE16: ; %bb.0:
+; GFX1250FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250FAKE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250FAKE16-NEXT: v_lshlrev_b32_e32 v0, 16, v0
+; GFX1250FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1250FAKE16-NEXT: v_max_num_f32_e32 v0, v0, v0
+; GFX1250FAKE16-NEXT: v_cvt_pk_bf16_f32 v0, v0, s0
+; GFX1250FAKE16-NEXT: s_set_pc_i64 s[30:31]
%op = call bfloat @llvm.canonicalize.bf16(bfloat %a)
ret bfloat %op
}
@@ -33535,15 +33996,28 @@ define i1 @v_fcmp_oeq_bf16(bfloat %a, bfloat %b) {
; GFX11FAKE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
; GFX11FAKE16-NEXT: s_setpc_b64 s[30:31]
;
-; GFX1250-LABEL: v_fcmp_oeq_bf16:
-; GFX1250: ; %bb.0:
-; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-NEXT: s_wait_kmcnt 0x0
-; GFX1250-NEXT: v_dual_lshlrev_b32 v1, 16, v1 :: v_dual_lshlrev_b32 v0, 16, v0
-; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX1250-NEXT: v_cmp_eq_f32_e32 vcc_lo, v0, v1
-; GFX1250-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
-; GFX1250-NEXT: s_set_pc_i64 s[30:31]
+; GFX1250TRUE16-LABEL: v_fcmp_oeq_bf16:
+; GFX1250TRUE16: ; %bb.0:
+; GFX1250TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250TRUE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250TRUE16-NEXT: v_mov_b16_e32 v2.l, 0
+; GFX1250TRUE16-NEXT: v_mov_b16_e32 v2.h, v1.l
+; GFX1250TRUE16-NEXT: v_mov_b16_e32 v1.h, v0.l
+; GFX1250TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1250TRUE16-NEXT: v_mov_b16_e32 v1.l, v2.l
+; GFX1250TRUE16-NEXT: v_cmp_eq_f32_e32 vcc_lo, v1, v2
+; GFX1250TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
+; GFX1250TRUE16-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX1250FAKE16-LABEL: v_fcmp_oeq_bf16:
+; GFX1250FAKE16: ; %bb.0:
+; GFX1250FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250FAKE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250FAKE16-NEXT: v_dual_lshlrev_b32 v1, 16, v1 :: v_dual_lshlrev_b32 v0, 16, v0
+; GFX1250FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1250FAKE16-NEXT: v_cmp_eq_f32_e32 vcc_lo, v0, v1
+; GFX1250FAKE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
+; GFX1250FAKE16-NEXT: s_set_pc_i64 s[30:31]
%op = fcmp oeq bfloat %a, %b
ret i1 %op
}
@@ -33630,15 +34104,28 @@ define i1 @v_fcmp_ogt_bf16(bfloat %a, bfloat %b) {
; GFX11FAKE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
; GFX11FAKE16-NEXT: s_setpc_b64 s[30:31]
;
-; GFX1250-LABEL: v_fcmp_ogt_bf16:
-; GFX1250: ; %bb.0:
-; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-NEXT: s_wait_kmcnt 0x0
-; GFX1250-NEXT: v_dual_lshlrev_b32 v1, 16, v1 :: v_dual_lshlrev_b32 v0, 16, v0
-; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX1250-NEXT: v_cmp_gt_f32_e32 vcc_lo, v0, v1
-; GFX1250-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
-; GFX1250-NEXT: s_set_pc_i64 s[30:31]
+; GFX1250TRUE16-LABEL: v_fcmp_ogt_bf16:
+; GFX1250TRUE16: ; %bb.0:
+; GFX1250TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250TRUE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250TRUE16-NEXT: v_mov_b16_e32 v2.l, 0
+; GFX1250TRUE16-NEXT: v_mov_b16_e32 v2.h, v1.l
+; GFX1250TRUE16-NEXT: v_mov_b16_e32 v1.h, v0.l
+; GFX1250TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1250TRUE16-NEXT: v_mov_b16_e32 v1.l, v2.l
+; GFX1250TRUE16-NEXT: v_cmp_gt_f32_e32 vcc_lo, v1, v2
+; GFX1250TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
+; GFX1250TRUE16-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX1250FAKE16-LABEL: v_fcmp_ogt_bf16:
+; GFX1250FAKE16: ; %bb.0:
+; GFX1250FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250FAKE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250FAKE16-NEXT: v_dual_lshlrev_b32 v1, 16, v1 :: v_dual_lshlrev_b32 v0, 16, v0
+; GFX1250FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1250FAKE16-NEXT: v_cmp_gt_f32_e32 vcc_lo, v0, v1
+; GFX1250FAKE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
+; GFX1250FAKE16-NEXT: s_set_pc_i64 s[30:31]
%op = fcmp ogt bfloat %a, %b
ret i1 %op
}
@@ -33725,15 +34212,28 @@ define i1 @v_fcmp_oge_bf16(bfloat %a, bfloat %b) {
; GFX11FAKE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
; GFX11FAKE16-NEXT: s_setpc_b64 s[30:31]
;
-; GFX1250-LABEL: v_fcmp_oge_bf16:
-; GFX1250: ; %bb.0:
-; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-NEXT: s_wait_kmcnt 0x0
-; GFX1250-NEXT: v_dual_lshlrev_b32 v1, 16, v1 :: v_dual_lshlrev_b32 v0, 16, v0
-; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX1250-NEXT: v_cmp_ge_f32_e32 vcc_lo, v0, v1
-; GFX1250-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
-; GFX1250-NEXT: s_set_pc_i64 s[30:31]
+; GFX1250TRUE16-LABEL: v_fcmp_oge_bf16:
+; GFX1250TRUE16: ; %bb.0:
+; GFX1250TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250TRUE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250TRUE16-NEXT: v_mov_b16_e32 v2.l, 0
+; GFX1250TRUE16-NEXT: v_mov_b16_e32 v2.h, v1.l
+; GFX1250TRUE16-NEXT: v_mov_b16_e32 v1.h, v0.l
+; GFX1250TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1250TRUE16-NEXT: v_mov_b16_e32 v1.l, v2.l
+; GFX1250TRUE16-NEXT: v_cmp_ge_f32_e32 vcc_lo, v1, v2
+; GFX1250TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
+; GFX1250TRUE16-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX1250FAKE16-LABEL: v_fcmp_oge_bf16:
+; GFX1250FAKE16: ; %bb.0:
+; GFX1250FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250FAKE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250FAKE16-NEXT: v_dual_lshlrev_b32 v1, 16, v1 :: v_dual_lshlrev_b32 v0, 16, v0
+; GFX1250FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1250FAKE16-NEXT: v_cmp_ge_f32_e32 vcc_lo, v0, v1
+; GFX1250FAKE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
+; GFX1250FAKE16-NEXT: s_set_pc_i64 s[30:31]
%op = fcmp oge bfloat %a, %b
ret i1 %op
}
@@ -33820,15 +34320,28 @@ define i1 @v_fcmp_olt_bf16(bfloat %a, bfloat %b) {
; GFX11FAKE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
; GFX11FAKE16-NEXT: s_setpc_b64 s[30:31]
;
-; GFX1250-LABEL: v_fcmp_olt_bf16:
-; GFX1250: ; %bb.0:
-; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-NEXT: s_wait_kmcnt 0x0
-; GFX1250-NEXT: v_dual_lshlrev_b32 v1, 16, v1 :: v_dual_lshlrev_b32 v0, 16, v0
-; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX1250-NEXT: v_cmp_lt_f32_e32 vcc_lo, v0, v1
-; GFX1250-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
-; GFX1250-NEXT: s_set_pc_i64 s[30:31]
+; GFX1250TRUE16-LABEL: v_fcmp_olt_bf16:
+; GFX1250TRUE16: ; %bb.0:
+; GFX1250TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250TRUE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250TRUE16-NEXT: v_mov_b16_e32 v2.l, 0
+; GFX1250TRUE16-NEXT: v_mov_b16_e32 v2.h, v1.l
+; GFX1250TRUE16-NEXT: v_mov_b16_e32 v1.h, v0.l
+; GFX1250TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1250TRUE16-NEXT: v_mov_b16_e32 v1.l, v2.l
+; GFX1250TRUE16-NEXT: v_cmp_lt_f32_e32 vcc_lo, v1, v2
+; GFX1250TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
+; GFX1250TRUE16-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX1250FAKE16-LABEL: v_fcmp_olt_bf16:
+; GFX1250FAKE16: ; %bb.0:
+; GFX1250FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250FAKE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250FAKE16-NEXT: v_dual_lshlrev_b32 v1, 16, v1 :: v_dual_lshlrev_b32 v0, 16, v0
+; GFX1250FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1250FAKE16-NEXT: v_cmp_lt_f32_e32 vcc_lo, v0, v1
+; GFX1250FAKE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
+; GFX1250FAKE16-NEXT: s_set_pc_i64 s[30:31]
%op = fcmp olt bfloat %a, %b
ret i1 %op
}
@@ -33915,15 +34428,28 @@ define i1 @v_fcmp_ole_bf16(bfloat %a, bfloat %b) {
; GFX11FAKE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
; GFX11FAKE16-NEXT: s_setpc_b64 s[30:31]
;
-; GFX1250-LABEL: v_fcmp_ole_bf16:
-; GFX1250: ; %bb.0:
-; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-NEXT: s_wait_kmcnt 0x0
-; GFX1250-NEXT: v_dual_lshlrev_b32 v1, 16, v1 :: v_dual_lshlrev_b32 v0, 16, v0
-; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX1250-NEXT: v_cmp_le_f32_e32 vcc_lo, v0, v1
-; GFX1250-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
-; GFX1250-NEXT: s_set_pc_i64 s[30:31]
+; GFX1250TRUE16-LABEL: v_fcmp_ole_bf16:
+; GFX1250TRUE16: ; %bb.0:
+; GFX1250TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250TRUE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250TRUE16-NEXT: v_mov_b16_e32 v2.l, 0
+; GFX1250TRUE16-NEXT: v_mov_b16_e32 v2.h, v1.l
+; GFX1250TRUE16-NEXT: v_mov_b16_e32 v1.h, v0.l
+; GFX1250TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1250TRUE16-NEXT: v_mov_b16_e32 v1.l, v2.l
+; GFX1250TRUE16-NEXT: v_cmp_le_f32_e32 vcc_lo, v1, v2
+; GFX1250TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
+; GFX1250TRUE16-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX1250FAKE16-LABEL: v_fcmp_ole_bf16:
+; GFX1250FAKE16: ; %bb.0:
+; GFX1250FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250FAKE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250FAKE16-NEXT: v_dual_lshlrev_b32 v1, 16, v1 :: v_dual_lshlrev_b32 v0, 16, v0
+; GFX1250FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1250FAKE16-NEXT: v_cmp_le_f32_e32 vcc_lo, v0, v1
+; GFX1250FAKE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
+; GFX1250FAKE16-NEXT: s_set_pc_i64 s[30:31]
%op = fcmp ole bfloat %a, %b
ret i1 %op
}
@@ -34010,15 +34536,28 @@ define i1 @v_fcmp_one_bf16(bfloat %a, bfloat %b) {
; GFX11FAKE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
; GFX11FAKE16-NEXT: s_setpc_b64 s[30:31]
;
-; GFX1250-LABEL: v_fcmp_one_bf16:
-; GFX1250: ; %bb.0:
-; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-NEXT: s_wait_kmcnt 0x0
-; GFX1250-NEXT: v_dual_lshlrev_b32 v1, 16, v1 :: v_dual_lshlrev_b32 v0, 16, v0
-; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX1250-NEXT: v_cmp_lg_f32_e32 vcc_lo, v0, v1
-; GFX1250-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
-; GFX1250-NEXT: s_set_pc_i64 s[30:31]
+; GFX1250TRUE16-LABEL: v_fcmp_one_bf16:
+; GFX1250TRUE16: ; %bb.0:
+; GFX1250TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250TRUE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250TRUE16-NEXT: v_mov_b16_e32 v2.l, 0
+; GFX1250TRUE16-NEXT: v_mov_b16_e32 v2.h, v1.l
+; GFX1250TRUE16-NEXT: v_mov_b16_e32 v1.h, v0.l
+; GFX1250TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1250TRUE16-NEXT: v_mov_b16_e32 v1.l, v2.l
+; GFX1250TRUE16-NEXT: v_cmp_lg_f32_e32 vcc_lo, v1, v2
+; GFX1250TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
+; GFX1250TRUE16-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX1250FAKE16-LABEL: v_fcmp_one_bf16:
+; GFX1250FAKE16: ; %bb.0:
+; GFX1250FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250FAKE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250FAKE16-NEXT: v_dual_lshlrev_b32 v1, 16, v1 :: v_dual_lshlrev_b32 v0, 16, v0
+; GFX1250FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1250FAKE16-NEXT: v_cmp_lg_f32_e32 vcc_lo, v0, v1
+; GFX1250FAKE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
+; GFX1250FAKE16-NEXT: s_set_pc_i64 s[30:31]
%op = fcmp one bfloat %a, %b
ret i1 %op
}
@@ -34105,15 +34644,28 @@ define i1 @v_fcmp_uno_bf16(bfloat %a, bfloat %b) {
; GFX11FAKE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
; GFX11FAKE16-NEXT: s_setpc_b64 s[30:31]
;
-; GFX1250-LABEL: v_fcmp_uno_bf16:
-; GFX1250: ; %bb.0:
-; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-NEXT: s_wait_kmcnt 0x0
-; GFX1250-NEXT: v_dual_lshlrev_b32 v1, 16, v1 :: v_dual_lshlrev_b32 v0, 16, v0
-; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX1250-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v1
-; GFX1250-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
-; GFX1250-NEXT: s_set_pc_i64 s[30:31]
+; GFX1250TRUE16-LABEL: v_fcmp_uno_bf16:
+; GFX1250TRUE16: ; %bb.0:
+; GFX1250TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250TRUE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250TRUE16-NEXT: v_mov_b16_e32 v2.l, 0
+; GFX1250TRUE16-NEXT: v_mov_b16_e32 v2.h, v1.l
+; GFX1250TRUE16-NEXT: v_mov_b16_e32 v1.h, v0.l
+; GFX1250TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1250TRUE16-NEXT: v_mov_b16_e32 v1.l, v2.l
+; GFX1250TRUE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v1, v2
+; GFX1250TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
+; GFX1250TRUE16-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX1250FAKE16-LABEL: v_fcmp_uno_bf16:
+; GFX1250FAKE16: ; %bb.0:
+; GFX1250FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250FAKE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250FAKE16-NEXT: v_dual_lshlrev_b32 v1, 16, v1 :: v_dual_lshlrev_b32 v0, 16, v0
+; GFX1250FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1250FAKE16-NEXT: v_cmp_u_f32_e32 vcc_lo, v0, v1
+; GFX1250FAKE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
+; GFX1250FAKE16-NEXT: s_set_pc_i64 s[30:31]
%op = fcmp uno bfloat %a, %b
ret i1 %op
}
@@ -34200,15 +34752,28 @@ define i1 @v_fcmp_ueq_bf16(bfloat %a, bfloat %b) {
; GFX11FAKE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
; GFX11FAKE16-NEXT: s_setpc_b64 s[30:31]
;
-; GFX1250-LABEL: v_fcmp_ueq_bf16:
-; GFX1250: ; %bb.0:
-; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-NEXT: s_wait_kmcnt 0x0
-; GFX1250-NEXT: v_dual_lshlrev_b32 v1, 16, v1 :: v_dual_lshlrev_b32 v0, 16, v0
-; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX1250-NEXT: v_cmp_nlg_f32_e32 vcc_lo, v0, v1
-; GFX1250-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
-; GFX1250-NEXT: s_set_pc_i64 s[30:31]
+; GFX1250TRUE16-LABEL: v_fcmp_ueq_bf16:
+; GFX1250TRUE16: ; %bb.0:
+; GFX1250TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250TRUE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250TRUE16-NEXT: v_mov_b16_e32 v2.l, 0
+; GFX1250TRUE16-NEXT: v_mov_b16_e32 v2.h, v1.l
+; GFX1250TRUE16-NEXT: v_mov_b16_e32 v1.h, v0.l
+; GFX1250TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1250TRUE16-NEXT: v_mov_b16_e32 v1.l, v2.l
+; GFX1250TRUE16-NEXT: v_cmp_nlg_f32_e32 vcc_lo, v1, v2
+; GFX1250TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
+; GFX1250TRUE16-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX1250FAKE16-LABEL: v_fcmp_ueq_bf16:
+; GFX1250FAKE16: ; %bb.0:
+; GFX1250FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250FAKE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250FAKE16-NEXT: v_dual_lshlrev_b32 v1, 16, v1 :: v_dual_lshlrev_b32 v0, 16, v0
+; GFX1250FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1250FAKE16-NEXT: v_cmp_nlg_f32_e32 vcc_lo, v0, v1
+; GFX1250FAKE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
+; GFX1250FAKE16-NEXT: s_set_pc_i64 s[30:31]
%op = fcmp ueq bfloat %a, %b
ret i1 %op
}
@@ -34295,15 +34860,28 @@ define i1 @v_fcmp_ugt_bf16(bfloat %a, bfloat %b) {
; GFX11FAKE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
; GFX11FAKE16-NEXT: s_setpc_b64 s[30:31]
;
-; GFX1250-LABEL: v_fcmp_ugt_bf16:
-; GFX1250: ; %bb.0:
-; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-NEXT: s_wait_kmcnt 0x0
-; GFX1250-NEXT: v_dual_lshlrev_b32 v1, 16, v1 :: v_dual_lshlrev_b32 v0, 16, v0
-; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX1250-NEXT: v_cmp_nle_f32_e32 vcc_lo, v0, v1
-; GFX1250-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
-; GFX1250-NEXT: s_set_pc_i64 s[30:31]
+; GFX1250TRUE16-LABEL: v_fcmp_ugt_bf16:
+; GFX1250TRUE16: ; %bb.0:
+; GFX1250TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250TRUE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250TRUE16-NEXT: v_mov_b16_e32 v2.l, 0
+; GFX1250TRUE16-NEXT: v_mov_b16_e32 v2.h, v1.l
+; GFX1250TRUE16-NEXT: v_mov_b16_e32 v1.h, v0.l
+; GFX1250TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1250TRUE16-NEXT: v_mov_b16_e32 v1.l, v2.l
+; GFX1250TRUE16-NEXT: v_cmp_nle_f32_e32 vcc_lo, v1, v2
+; GFX1250TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
+; GFX1250TRUE16-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX1250FAKE16-LABEL: v_fcmp_ugt_bf16:
+; GFX1250FAKE16: ; %bb.0:
+; GFX1250FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250FAKE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250FAKE16-NEXT: v_dual_lshlrev_b32 v1, 16, v1 :: v_dual_lshlrev_b32 v0, 16, v0
+; GFX1250FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1250FAKE16-NEXT: v_cmp_nle_f32_e32 vcc_lo, v0, v1
+; GFX1250FAKE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
+; GFX1250FAKE16-NEXT: s_set_pc_i64 s[30:31]
%op = fcmp ugt bfloat %a, %b
ret i1 %op
}
@@ -34390,15 +34968,28 @@ define i1 @v_fcmp_uge_bf16(bfloat %a, bfloat %b) {
; GFX11FAKE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
; GFX11FAKE16-NEXT: s_setpc_b64 s[30:31]
;
-; GFX1250-LABEL: v_fcmp_uge_bf16:
-; GFX1250: ; %bb.0:
-; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-NEXT: s_wait_kmcnt 0x0
-; GFX1250-NEXT: v_dual_lshlrev_b32 v1, 16, v1 :: v_dual_lshlrev_b32 v0, 16, v0
-; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX1250-NEXT: v_cmp_nlt_f32_e32 vcc_lo, v0, v1
-; GFX1250-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
-; GFX1250-NEXT: s_set_pc_i64 s[30:31]
+; GFX1250TRUE16-LABEL: v_fcmp_uge_bf16:
+; GFX1250TRUE16: ; %bb.0:
+; GFX1250TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250TRUE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250TRUE16-NEXT: v_mov_b16_e32 v2.l, 0
+; GFX1250TRUE16-NEXT: v_mov_b16_e32 v2.h, v1.l
+; GFX1250TRUE16-NEXT: v_mov_b16_e32 v1.h, v0.l
+; GFX1250TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1250TRUE16-NEXT: v_mov_b16_e32 v1.l, v2.l
+; GFX1250TRUE16-NEXT: v_cmp_nlt_f32_e32 vcc_lo, v1, v2
+; GFX1250TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
+; GFX1250TRUE16-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX1250FAKE16-LABEL: v_fcmp_uge_bf16:
+; GFX1250FAKE16: ; %bb.0:
+; GFX1250FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250FAKE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250FAKE16-NEXT: v_dual_lshlrev_b32 v1, 16, v1 :: v_dual_lshlrev_b32 v0, 16, v0
+; GFX1250FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1250FAKE16-NEXT: v_cmp_nlt_f32_e32 vcc_lo, v0, v1
+; GFX1250FAKE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
+; GFX1250FAKE16-NEXT: s_set_pc_i64 s[30:31]
%op = fcmp uge bfloat %a, %b
ret i1 %op
}
@@ -34485,15 +35076,28 @@ define i1 @v_fcmp_ult_bf16(bfloat %a, bfloat %b) {
; GFX11FAKE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
; GFX11FAKE16-NEXT: s_setpc_b64 s[30:31]
;
-; GFX1250-LABEL: v_fcmp_ult_bf16:
-; GFX1250: ; %bb.0:
-; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-NEXT: s_wait_kmcnt 0x0
-; GFX1250-NEXT: v_dual_lshlrev_b32 v1, 16, v1 :: v_dual_lshlrev_b32 v0, 16, v0
-; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX1250-NEXT: v_cmp_nge_f32_e32 vcc_lo, v0, v1
-; GFX1250-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
-; GFX1250-NEXT: s_set_pc_i64 s[30:31]
+; GFX1250TRUE16-LABEL: v_fcmp_ult_bf16:
+; GFX1250TRUE16: ; %bb.0:
+; GFX1250TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250TRUE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250TRUE16-NEXT: v_mov_b16_e32 v2.l, 0
+; GFX1250TRUE16-NEXT: v_mov_b16_e32 v2.h, v1.l
+; GFX1250TRUE16-NEXT: v_mov_b16_e32 v1.h, v0.l
+; GFX1250TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1250TRUE16-NEXT: v_mov_b16_e32 v1.l, v2.l
+; GFX1250TRUE16-NEXT: v_cmp_nge_f32_e32 vcc_lo, v1, v2
+; GFX1250TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
+; GFX1250TRUE16-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX1250FAKE16-LABEL: v_fcmp_ult_bf16:
+; GFX1250FAKE16: ; %bb.0:
+; GFX1250FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250FAKE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250FAKE16-NEXT: v_dual_lshlrev_b32 v1, 16, v1 :: v_dual_lshlrev_b32 v0, 16, v0
+; GFX1250FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1250FAKE16-NEXT: v_cmp_nge_f32_e32 vcc_lo, v0, v1
+; GFX1250FAKE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
+; GFX1250FAKE16-NEXT: s_set_pc_i64 s[30:31]
%op = fcmp ult bfloat %a, %b
ret i1 %op
}
@@ -34580,15 +35184,28 @@ define i1 @v_fcmp_ule_bf16(bfloat %a, bfloat %b) {
; GFX11FAKE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
; GFX11FAKE16-NEXT: s_setpc_b64 s[30:31]
;
-; GFX1250-LABEL: v_fcmp_ule_bf16:
-; GFX1250: ; %bb.0:
-; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-NEXT: s_wait_kmcnt 0x0
-; GFX1250-NEXT: v_dual_lshlrev_b32 v1, 16, v1 :: v_dual_lshlrev_b32 v0, 16, v0
-; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX1250-NEXT: v_cmp_ngt_f32_e32 vcc_lo, v0, v1
-; GFX1250-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
-; GFX1250-NEXT: s_set_pc_i64 s[30:31]
+; GFX1250TRUE16-LABEL: v_fcmp_ule_bf16:
+; GFX1250TRUE16: ; %bb.0:
+; GFX1250TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250TRUE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250TRUE16-NEXT: v_mov_b16_e32 v2.l, 0
+; GFX1250TRUE16-NEXT: v_mov_b16_e32 v2.h, v1.l
+; GFX1250TRUE16-NEXT: v_mov_b16_e32 v1.h, v0.l
+; GFX1250TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1250TRUE16-NEXT: v_mov_b16_e32 v1.l, v2.l
+; GFX1250TRUE16-NEXT: v_cmp_ngt_f32_e32 vcc_lo, v1, v2
+; GFX1250TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
+; GFX1250TRUE16-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX1250FAKE16-LABEL: v_fcmp_ule_bf16:
+; GFX1250FAKE16: ; %bb.0:
+; GFX1250FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250FAKE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250FAKE16-NEXT: v_dual_lshlrev_b32 v1, 16, v1 :: v_dual_lshlrev_b32 v0, 16, v0
+; GFX1250FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1250FAKE16-NEXT: v_cmp_ngt_f32_e32 vcc_lo, v0, v1
+; GFX1250FAKE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
+; GFX1250FAKE16-NEXT: s_set_pc_i64 s[30:31]
%op = fcmp ule bfloat %a, %b
ret i1 %op
}
@@ -34675,15 +35292,28 @@ define i1 @v_fcmp_une_bf16(bfloat %a, bfloat %b) {
; GFX11FAKE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
; GFX11FAKE16-NEXT: s_setpc_b64 s[30:31]
;
-; GFX1250-LABEL: v_fcmp_une_bf16:
-; GFX1250: ; %bb.0:
-; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-NEXT: s_wait_kmcnt 0x0
-; GFX1250-NEXT: v_dual_lshlrev_b32 v1, 16, v1 :: v_dual_lshlrev_b32 v0, 16, v0
-; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX1250-NEXT: v_cmp_neq_f32_e32 vcc_lo, v0, v1
-; GFX1250-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
-; GFX1250-NEXT: s_set_pc_i64 s[30:31]
+; GFX1250TRUE16-LABEL: v_fcmp_une_bf16:
+; GFX1250TRUE16: ; %bb.0:
+; GFX1250TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250TRUE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250TRUE16-NEXT: v_mov_b16_e32 v2.l, 0
+; GFX1250TRUE16-NEXT: v_mov_b16_e32 v2.h, v1.l
+; GFX1250TRUE16-NEXT: v_mov_b16_e32 v1.h, v0.l
+; GFX1250TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1250TRUE16-NEXT: v_mov_b16_e32 v1.l, v2.l
+; GFX1250TRUE16-NEXT: v_cmp_neq_f32_e32 vcc_lo, v1, v2
+; GFX1250TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
+; GFX1250TRUE16-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX1250FAKE16-LABEL: v_fcmp_une_bf16:
+; GFX1250FAKE16: ; %bb.0:
+; GFX1250FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250FAKE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250FAKE16-NEXT: v_dual_lshlrev_b32 v1, 16, v1 :: v_dual_lshlrev_b32 v0, 16, v0
+; GFX1250FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1250FAKE16-NEXT: v_cmp_neq_f32_e32 vcc_lo, v0, v1
+; GFX1250FAKE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
+; GFX1250FAKE16-NEXT: s_set_pc_i64 s[30:31]
%op = fcmp une bfloat %a, %b
ret i1 %op
}
@@ -34790,14 +35420,24 @@ define i16 @v_fptosi_bf16_to_i16(bfloat %x) {
; GFX11FAKE16-NEXT: v_cvt_i32_f32_e32 v0, v0
; GFX11FAKE16-NEXT: s_setpc_b64 s[30:31]
;
-; GFX1250-LABEL: v_fptosi_bf16_to_i16:
-; GFX1250: ; %bb.0:
-; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-NEXT: s_wait_kmcnt 0x0
-; GFX1250-NEXT: v_lshlrev_b32_e32 v0, 16, v0
-; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX1250-NEXT: v_cvt_i32_f32_e32 v0, v0
-; GFX1250-NEXT: s_set_pc_i64 s[30:31]
+; GFX1250TRUE16-LABEL: v_fptosi_bf16_to_i16:
+; GFX1250TRUE16: ; %bb.0:
+; GFX1250TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250TRUE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250TRUE16-NEXT: v_mov_b16_e32 v1.l, 0
+; GFX1250TRUE16-NEXT: v_mov_b16_e32 v1.h, v0.l
+; GFX1250TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1250TRUE16-NEXT: v_cvt_i32_f32_e32 v0, v1
+; GFX1250TRUE16-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX1250FAKE16-LABEL: v_fptosi_bf16_to_i16:
+; GFX1250FAKE16: ; %bb.0:
+; GFX1250FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250FAKE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250FAKE16-NEXT: v_lshlrev_b32_e32 v0, 16, v0
+; GFX1250FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1250FAKE16-NEXT: v_cvt_i32_f32_e32 v0, v0
+; GFX1250FAKE16-NEXT: s_set_pc_i64 s[30:31]
%op = fptosi bfloat %x to i16
ret i16 %op
}
@@ -34899,18 +35539,31 @@ define <2 x i16> @v_fptosi_v2bf16_to_v2i16(<2 x bfloat> %x) {
; GFX11FAKE16-NEXT: v_perm_b32 v0, v0, v1, 0x5040100
; GFX11FAKE16-NEXT: s_setpc_b64 s[30:31]
;
-; GFX1250-LABEL: v_fptosi_v2bf16_to_v2i16:
-; GFX1250: ; %bb.0:
-; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-NEXT: s_wait_kmcnt 0x0
-; GFX1250-NEXT: v_lshlrev_b32_e32 v1, 16, v0
-; GFX1250-NEXT: v_and_b32_e32 v0, 0xffff0000, v0
-; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX1250-NEXT: v_cvt_i32_f32_e32 v1, v1
-; GFX1250-NEXT: v_cvt_i32_f32_e32 v0, v0
-; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX1250-NEXT: v_perm_b32 v0, v0, v1, 0x5040100
-; GFX1250-NEXT: s_set_pc_i64 s[30:31]
+; GFX1250TRUE16-LABEL: v_fptosi_v2bf16_to_v2i16:
+; GFX1250TRUE16: ; %bb.0:
+; GFX1250TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250TRUE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250TRUE16-NEXT: v_and_b32_e32 v1, 0xffff0000, v0
+; GFX1250TRUE16-NEXT: v_lshlrev_b32_e32 v0, 16, v0
+; GFX1250TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX1250TRUE16-NEXT: v_cvt_i32_f32_e32 v1, v1
+; GFX1250TRUE16-NEXT: v_cvt_i32_f32_e32 v0, v0
+; GFX1250TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2)
+; GFX1250TRUE16-NEXT: v_mov_b16_e32 v0.h, v1.l
+; GFX1250TRUE16-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX1250FAKE16-LABEL: v_fptosi_v2bf16_to_v2i16:
+; GFX1250FAKE16: ; %bb.0:
+; GFX1250FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250FAKE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250FAKE16-NEXT: v_lshlrev_b32_e32 v1, 16, v0
+; GFX1250FAKE16-NEXT: v_and_b32_e32 v0, 0xffff0000, v0
+; GFX1250FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX1250FAKE16-NEXT: v_cvt_i32_f32_e32 v1, v1
+; GFX1250FAKE16-NEXT: v_cvt_i32_f32_e32 v0, v0
+; GFX1250FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1250FAKE16-NEXT: v_perm_b32 v0, v0, v1, 0x5040100
+; GFX1250FAKE16-NEXT: s_set_pc_i64 s[30:31]
%op = fptosi <2 x bfloat> %x to <2 x i16>
ret <2 x i16> %op
}
@@ -35032,19 +35685,33 @@ define <3 x i16> @v_fptosi_v3bf16_to_v3i16(<3 x bfloat> %x) {
; GFX11FAKE16-NEXT: v_perm_b32 v0, v0, v2, 0x5040100
; GFX11FAKE16-NEXT: s_setpc_b64 s[30:31]
;
-; GFX1250-LABEL: v_fptosi_v3bf16_to_v3i16:
-; GFX1250: ; %bb.0:
-; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-NEXT: s_wait_kmcnt 0x0
-; GFX1250-NEXT: v_dual_lshlrev_b32 v2, 16, v0 :: v_dual_lshlrev_b32 v1, 16, v1
-; GFX1250-NEXT: v_and_b32_e32 v0, 0xffff0000, v0
-; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_3)
-; GFX1250-NEXT: v_cvt_i32_f32_e32 v2, v2
-; GFX1250-NEXT: v_cvt_i32_f32_e32 v1, v1
-; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1250-NEXT: v_cvt_i32_f32_e32 v0, v0
-; GFX1250-NEXT: v_perm_b32 v0, v0, v2, 0x5040100
-; GFX1250-NEXT: s_set_pc_i64 s[30:31]
+; GFX1250TRUE16-LABEL: v_fptosi_v3bf16_to_v3i16:
+; GFX1250TRUE16: ; %bb.0:
+; GFX1250TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250TRUE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250TRUE16-NEXT: v_and_b32_e32 v2, 0xffff0000, v0
+; GFX1250TRUE16-NEXT: v_dual_lshlrev_b32 v0, 16, v0 :: v_dual_lshlrev_b32 v1, 16, v1
+; GFX1250TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX1250TRUE16-NEXT: v_cvt_i32_f32_e32 v2, v2
+; GFX1250TRUE16-NEXT: v_cvt_i32_f32_e32 v0, v0
+; GFX1250TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
+; GFX1250TRUE16-NEXT: v_cvt_i32_f32_e32 v1, v1
+; GFX1250TRUE16-NEXT: v_mov_b16_e32 v0.h, v2.l
+; GFX1250TRUE16-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX1250FAKE16-LABEL: v_fptosi_v3bf16_to_v3i16:
+; GFX1250FAKE16: ; %bb.0:
+; GFX1250FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250FAKE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250FAKE16-NEXT: v_dual_lshlrev_b32 v2, 16, v0 :: v_dual_lshlrev_b32 v1, 16, v1
+; GFX1250FAKE16-NEXT: v_and_b32_e32 v0, 0xffff0000, v0
+; GFX1250FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_3)
+; GFX1250FAKE16-NEXT: v_cvt_i32_f32_e32 v2, v2
+; GFX1250FAKE16-NEXT: v_cvt_i32_f32_e32 v1, v1
+; GFX1250FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1250FAKE16-NEXT: v_cvt_i32_f32_e32 v0, v0
+; GFX1250FAKE16-NEXT: v_perm_b32 v0, v0, v2, 0x5040100
+; GFX1250FAKE16-NEXT: s_set_pc_i64 s[30:31]
%op = fptosi <3 x bfloat> %x to <3 x i16>
ret <3 x i16> %op
}
@@ -35198,23 +35865,41 @@ define <4 x i16> @v_fptosi_v4bf16_to_v4i16(<4 x bfloat> %x) {
; GFX11FAKE16-NEXT: v_perm_b32 v1, v1, v2, 0x5040100
; GFX11FAKE16-NEXT: s_setpc_b64 s[30:31]
;
-; GFX1250-LABEL: v_fptosi_v4bf16_to_v4i16:
-; GFX1250: ; %bb.0:
-; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-NEXT: s_wait_kmcnt 0x0
-; GFX1250-NEXT: v_dual_lshlrev_b32 v2, 16, v1 :: v_dual_lshlrev_b32 v3, 16, v0
-; GFX1250-NEXT: v_and_b32_e32 v0, 0xffff0000, v0
-; GFX1250-NEXT: v_and_b32_e32 v1, 0xffff0000, v1
-; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_4)
-; GFX1250-NEXT: v_cvt_i32_f32_e32 v2, v2
-; GFX1250-NEXT: v_cvt_i32_f32_e32 v3, v3
-; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
-; GFX1250-NEXT: v_cvt_i32_f32_e32 v0, v0
-; GFX1250-NEXT: v_cvt_i32_f32_e32 v1, v1
-; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX1250-NEXT: v_perm_b32 v0, v0, v3, 0x5040100
-; GFX1250-NEXT: v_perm_b32 v1, v1, v2, 0x5040100
-; GFX1250-NEXT: s_set_pc_i64 s[30:31]
+; GFX1250TRUE16-LABEL: v_fptosi_v4bf16_to_v4i16:
+; GFX1250TRUE16: ; %bb.0:
+; GFX1250TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250TRUE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250TRUE16-NEXT: v_and_b32_e32 v2, 0xffff0000, v0
+; GFX1250TRUE16-NEXT: v_and_b32_e32 v3, 0xffff0000, v1
+; GFX1250TRUE16-NEXT: v_dual_lshlrev_b32 v1, 16, v1 :: v_dual_lshlrev_b32 v0, 16, v0
+; GFX1250TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
+; GFX1250TRUE16-NEXT: v_cvt_i32_f32_e32 v2, v2
+; GFX1250TRUE16-NEXT: v_cvt_i32_f32_e32 v3, v3
+; GFX1250TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_4)
+; GFX1250TRUE16-NEXT: v_cvt_i32_f32_e32 v1, v1
+; GFX1250TRUE16-NEXT: v_cvt_i32_f32_e32 v0, v0
+; GFX1250TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
+; GFX1250TRUE16-NEXT: v_mov_b16_e32 v0.h, v2.l
+; GFX1250TRUE16-NEXT: v_mov_b16_e32 v1.h, v3.l
+; GFX1250TRUE16-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX1250FAKE16-LABEL: v_fptosi_v4bf16_to_v4i16:
+; GFX1250FAKE16: ; %bb.0:
+; GFX1250FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250FAKE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250FAKE16-NEXT: v_dual_lshlrev_b32 v2, 16, v1 :: v_dual_lshlrev_b32 v3, 16, v0
+; GFX1250FAKE16-NEXT: v_and_b32_e32 v0, 0xffff0000, v0
+; GFX1250FAKE16-NEXT: v_and_b32_e32 v1, 0xffff0000, v1
+; GFX1250FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_4)
+; GFX1250FAKE16-NEXT: v_cvt_i32_f32_e32 v2, v2
+; GFX1250FAKE16-NEXT: v_cvt_i32_f32_e32 v3, v3
+; GFX1250FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
+; GFX1250FAKE16-NEXT: v_cvt_i32_f32_e32 v0, v0
+; GFX1250FAKE16-NEXT: v_cvt_i32_f32_e32 v1, v1
+; GFX1250FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX1250FAKE16-NEXT: v_perm_b32 v0, v0, v3, 0x5040100
+; GFX1250FAKE16-NEXT: v_perm_b32 v1, v1, v2, 0x5040100
+; GFX1250FAKE16-NEXT: s_set_pc_i64 s[30:31]
%op = fptosi <4 x bfloat> %x to <4 x i16>
ret <4 x i16> %op
}
@@ -35274,14 +35959,24 @@ define i32 @v_fptosi_bf16_to_i32(bfloat %x) {
; GFX11FAKE16-NEXT: v_cvt_i32_f32_e32 v0, v0
; GFX11FAKE16-NEXT: s_setpc_b64 s[30:31]
;
-; GFX1250-LABEL: v_fptosi_bf16_to_i32:
-; GFX1250: ; %bb.0:
-; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-NEXT: s_wait_kmcnt 0x0
-; GFX1250-NEXT: v_lshlrev_b32_e32 v0, 16, v0
-; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX1250-NEXT: v_cvt_i32_f32_e32 v0, v0
-; GFX1250-NEXT: s_set_pc_i64 s[30:31]
+; GFX1250TRUE16-LABEL: v_fptosi_bf16_to_i32:
+; GFX1250TRUE16: ; %bb.0:
+; GFX1250TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250TRUE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250TRUE16-NEXT: v_mov_b16_e32 v1.l, 0
+; GFX1250TRUE16-NEXT: v_mov_b16_e32 v1.h, v0.l
+; GFX1250TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1250TRUE16-NEXT: v_cvt_i32_f32_e32 v0, v1
+; GFX1250TRUE16-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX1250FAKE16-LABEL: v_fptosi_bf16_to_i32:
+; GFX1250FAKE16: ; %bb.0:
+; GFX1250FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250FAKE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250FAKE16-NEXT: v_lshlrev_b32_e32 v0, 16, v0
+; GFX1250FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1250FAKE16-NEXT: v_cvt_i32_f32_e32 v0, v0
+; GFX1250FAKE16-NEXT: s_set_pc_i64 s[30:31]
%op = fptosi bfloat %x to i32
ret i32 %op
}
@@ -35729,26 +36424,48 @@ define i64 @v_fptosi_bf16_to_i64(bfloat %x) {
; GFX11FAKE16-NEXT: v_sub_co_ci_u32_e64 v1, null, v1, v3, vcc_lo
; GFX11FAKE16-NEXT: s_setpc_b64 s[30:31]
;
-; GFX1250-LABEL: v_fptosi_bf16_to_i64:
-; GFX1250: ; %bb.0:
-; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-NEXT: s_wait_kmcnt 0x0
-; GFX1250-NEXT: v_lshlrev_b32_e32 v0, 16, v0
-; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1250-NEXT: v_trunc_f32_e32 v0, v0
-; GFX1250-NEXT: v_mul_f32_e64 v1, 0x2f800000, |v0|
-; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1250-NEXT: v_floor_f32_e32 v1, v1
-; GFX1250-NEXT: v_fma_f32 v2, 0xcf800000, v1, |v0|
-; GFX1250-NEXT: v_ashrrev_i32_e32 v0, 31, v0
-; GFX1250-NEXT: v_cvt_u32_f32_e32 v3, v1
-; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX1250-NEXT: v_cvt_u32_f32_e32 v2, v2
-; GFX1250-NEXT: v_dual_mov_b32 v1, v0 :: v_dual_bitop2_b32 v3, v3, v0 bitop3:0x14
-; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1250-NEXT: v_xor_b32_e32 v2, v2, v0
-; GFX1250-NEXT: v_sub_nc_u64_e32 v[0:1], v[2:3], v[0:1]
-; GFX1250-NEXT: s_set_pc_i64 s[30:31]
+; GFX1250TRUE16-LABEL: v_fptosi_bf16_to_i64:
+; GFX1250TRUE16: ; %bb.0:
+; GFX1250TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250TRUE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250TRUE16-NEXT: v_mov_b16_e32 v1.l, 0
+; GFX1250TRUE16-NEXT: v_mov_b16_e32 v1.h, v0.l
+; GFX1250TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1250TRUE16-NEXT: v_trunc_f32_e32 v0, v1
+; GFX1250TRUE16-NEXT: v_mul_f32_e64 v1, 0x2f800000, |v0|
+; GFX1250TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1250TRUE16-NEXT: v_floor_f32_e32 v1, v1
+; GFX1250TRUE16-NEXT: v_fma_f32 v2, 0xcf800000, v1, |v0|
+; GFX1250TRUE16-NEXT: v_ashrrev_i32_e32 v0, 31, v0
+; GFX1250TRUE16-NEXT: v_cvt_u32_f32_e32 v3, v1
+; GFX1250TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX1250TRUE16-NEXT: v_cvt_u32_f32_e32 v2, v2
+; GFX1250TRUE16-NEXT: v_dual_mov_b32 v1, v0 :: v_dual_bitop2_b32 v3, v3, v0 bitop3:0x14
+; GFX1250TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1250TRUE16-NEXT: v_xor_b32_e32 v2, v2, v0
+; GFX1250TRUE16-NEXT: v_sub_nc_u64_e32 v[0:1], v[2:3], v[0:1]
+; GFX1250TRUE16-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX1250FAKE16-LABEL: v_fptosi_bf16_to_i64:
+; GFX1250FAKE16: ; %bb.0:
+; GFX1250FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250FAKE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250FAKE16-NEXT: v_lshlrev_b32_e32 v0, 16, v0
+; GFX1250FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1250FAKE16-NEXT: v_trunc_f32_e32 v0, v0
+; GFX1250FAKE16-NEXT: v_mul_f32_e64 v1, 0x2f800000, |v0|
+; GFX1250FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1250FAKE16-NEXT: v_floor_f32_e32 v1, v1
+; GFX1250FAKE16-NEXT: v_fma_f32 v2, 0xcf800000, v1, |v0|
+; GFX1250FAKE16-NEXT: v_ashrrev_i32_e32 v0, 31, v0
+; GFX1250FAKE16-NEXT: v_cvt_u32_f32_e32 v3, v1
+; GFX1250FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX1250FAKE16-NEXT: v_cvt_u32_f32_e32 v2, v2
+; GFX1250FAKE16-NEXT: v_dual_mov_b32 v1, v0 :: v_dual_bitop2_b32 v3, v3, v0 bitop3:0x14
+; GFX1250FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1250FAKE16-NEXT: v_xor_b32_e32 v2, v2, v0
+; GFX1250FAKE16-NEXT: v_sub_nc_u64_e32 v[0:1], v[2:3], v[0:1]
+; GFX1250FAKE16-NEXT: s_set_pc_i64 s[30:31]
%op = fptosi bfloat %x to i64
ret i64 %op
}
@@ -37293,22 +38010,39 @@ define <3 x bfloat> @v_sitofp_v3i16_to_v3bf16(<3 x i16> %x) {
; GFX11FAKE16-NEXT: v_alignbit_b32 v1, s0, v1, 16
; GFX11FAKE16-NEXT: s_setpc_b64 s[30:31]
;
-; GFX1250-LABEL: v_sitofp_v3i16_to_v3bf16:
-; GFX1250: ; %bb.0:
-; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-NEXT: s_wait_kmcnt 0x0
-; GFX1250-NEXT: v_ashrrev_i32_e32 v2, 16, v0
-; GFX1250-NEXT: v_bfe_i32 v0, v0, 0, 16
-; GFX1250-NEXT: v_bfe_i32 v1, v1, 0, 16
-; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
-; GFX1250-NEXT: v_cvt_f32_i32_e32 v2, v2
-; GFX1250-NEXT: v_cvt_f32_i32_e32 v0, v0
-; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX1250-NEXT: v_cvt_f32_i32_e32 v1, v1
-; GFX1250-NEXT: v_cvt_pk_bf16_f32 v0, v0, v2
-; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_2)
-; GFX1250-NEXT: v_cvt_pk_bf16_f32 v1, v1, s0
-; GFX1250-NEXT: s_set_pc_i64 s[30:31]
+; GFX1250TRUE16-LABEL: v_sitofp_v3i16_to_v3bf16:
+; GFX1250TRUE16: ; %bb.0:
+; GFX1250TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250TRUE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250TRUE16-NEXT: v_bfe_i32 v1, v1, 0, 16
+; GFX1250TRUE16-NEXT: v_ashrrev_i32_e32 v2, 16, v0
+; GFX1250TRUE16-NEXT: v_bfe_i32 v0, v0, 0, 16
+; GFX1250TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
+; GFX1250TRUE16-NEXT: v_cvt_f32_i32_e32 v1, v1
+; GFX1250TRUE16-NEXT: v_cvt_f32_i32_e32 v2, v2
+; GFX1250TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
+; GFX1250TRUE16-NEXT: v_cvt_f32_i32_e32 v0, v0
+; GFX1250TRUE16-NEXT: v_cvt_pk_bf16_f32 v1, v1, s0
+; GFX1250TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2)
+; GFX1250TRUE16-NEXT: v_cvt_pk_bf16_f32 v0, v0, v2
+; GFX1250TRUE16-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX1250FAKE16-LABEL: v_sitofp_v3i16_to_v3bf16:
+; GFX1250FAKE16: ; %bb.0:
+; GFX1250FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250FAKE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250FAKE16-NEXT: v_ashrrev_i32_e32 v2, 16, v0
+; GFX1250FAKE16-NEXT: v_bfe_i32 v0, v0, 0, 16
+; GFX1250FAKE16-NEXT: v_bfe_i32 v1, v1, 0, 16
+; GFX1250FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
+; GFX1250FAKE16-NEXT: v_cvt_f32_i32_e32 v2, v2
+; GFX1250FAKE16-NEXT: v_cvt_f32_i32_e32 v0, v0
+; GFX1250FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX1250FAKE16-NEXT: v_cvt_f32_i32_e32 v1, v1
+; GFX1250FAKE16-NEXT: v_cvt_pk_bf16_f32 v0, v0, v2
+; GFX1250FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2)
+; GFX1250FAKE16-NEXT: v_cvt_pk_bf16_f32 v1, v1, s0
+; GFX1250FAKE16-NEXT: s_set_pc_i64 s[30:31]
%op = sitofp <3 x i16> %x to <3 x bfloat>
ret <3 x bfloat> %op
}
@@ -37972,17 +38706,31 @@ define <3 x bfloat> @v_sitofp_v3i32_to_v3bf16(<3 x i32> %x) {
; GFX11FAKE16-NEXT: v_alignbit_b32 v1, s0, v2, 16
; GFX11FAKE16-NEXT: s_setpc_b64 s[30:31]
;
-; GFX1250-LABEL: v_sitofp_v3i32_to_v3bf16:
-; GFX1250: ; %bb.0:
-; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-NEXT: s_wait_kmcnt 0x0
-; GFX1250-NEXT: v_cvt_f32_i32_e32 v1, v1
-; GFX1250-NEXT: v_cvt_f32_i32_e32 v0, v0
-; GFX1250-NEXT: v_cvt_f32_i32_e32 v2, v2
-; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX1250-NEXT: v_cvt_pk_bf16_f32 v0, v0, v1
-; GFX1250-NEXT: v_cvt_pk_bf16_f32 v1, v2, s0
-; GFX1250-NEXT: s_set_pc_i64 s[30:31]
+; GFX1250TRUE16-LABEL: v_sitofp_v3i32_to_v3bf16:
+; GFX1250TRUE16: ; %bb.0:
+; GFX1250TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250TRUE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250TRUE16-NEXT: v_cvt_f32_i32_e32 v2, v2
+; GFX1250TRUE16-NEXT: v_cvt_f32_i32_e32 v1, v1
+; GFX1250TRUE16-NEXT: v_cvt_f32_i32_e32 v0, v0
+; GFX1250TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX1250TRUE16-NEXT: v_cvt_pk_bf16_f32 v2, v2, s0
+; GFX1250TRUE16-NEXT: v_cvt_pk_bf16_f32 v0, v0, v1
+; GFX1250TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2)
+; GFX1250TRUE16-NEXT: v_mov_b16_e32 v1.l, v2.l
+; GFX1250TRUE16-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX1250FAKE16-LABEL: v_sitofp_v3i32_to_v3bf16:
+; GFX1250FAKE16: ; %bb.0:
+; GFX1250FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250FAKE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250FAKE16-NEXT: v_cvt_f32_i32_e32 v1, v1
+; GFX1250FAKE16-NEXT: v_cvt_f32_i32_e32 v0, v0
+; GFX1250FAKE16-NEXT: v_cvt_f32_i32_e32 v2, v2
+; GFX1250FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX1250FAKE16-NEXT: v_cvt_pk_bf16_f32 v0, v0, v1
+; GFX1250FAKE16-NEXT: v_cvt_pk_bf16_f32 v1, v2, s0
+; GFX1250FAKE16-NEXT: s_set_pc_i64 s[30:31]
%op = sitofp <3 x i32> %x to <3 x bfloat>
ret <3 x bfloat> %op
}
@@ -39232,52 +39980,101 @@ define <3 x bfloat> @v_sitofp_v3i64_to_v3bf16(<3 x i64> %x) {
; GFX11FAKE16-NEXT: v_alignbit_b32 v1, s0, v1, 16
; GFX11FAKE16-NEXT: s_setpc_b64 s[30:31]
;
-; GFX1250-LABEL: v_sitofp_v3i64_to_v3bf16:
-; GFX1250: ; %bb.0:
-; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-NEXT: s_wait_kmcnt 0x0
-; GFX1250-NEXT: v_xor_b32_e32 v8, v4, v5
-; GFX1250-NEXT: v_xor_b32_e32 v6, v2, v3
-; GFX1250-NEXT: v_cls_i32_e32 v10, v3
-; GFX1250-NEXT: v_cls_i32_e32 v9, v5
-; GFX1250-NEXT: v_cls_i32_e32 v11, v1
-; GFX1250-NEXT: v_dual_ashrrev_i32 v8, 31, v8 :: v_dual_bitop2_b32 v7, v0, v1 bitop3:0x14
-; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1250-NEXT: v_dual_ashrrev_i32 v6, 31, v6 :: v_dual_ashrrev_i32 v7, 31, v7
-; GFX1250-NEXT: v_dual_add_nc_u32 v6, 32, v6 :: v_dual_add_nc_u32 v7, 32, v7
-; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX1250-NEXT: v_add_min_u32_e64 v6, v10, -1, v6
-; GFX1250-NEXT: v_add_min_u32_e64 v7, v11, -1, v7
-; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX1250-NEXT: v_lshlrev_b64_e32 v[2:3], v6, v[2:3]
-; GFX1250-NEXT: v_lshlrev_b64_e32 v[0:1], v7, v[0:1]
-; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_3)
-; GFX1250-NEXT: v_min_u32_e32 v2, 1, v2
-; GFX1250-NEXT: v_add_nc_u32_e32 v8, 32, v8
-; GFX1250-NEXT: v_min_u32_e32 v0, 1, v0
-; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
-; GFX1250-NEXT: v_or_b32_e32 v2, v3, v2
-; GFX1250-NEXT: v_add_min_u32_e64 v8, v9, -1, v8
-; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
-; GFX1250-NEXT: v_dual_sub_nc_u32 v3, 32, v6 :: v_dual_bitop2_b32 v0, v1, v0 bitop3:0x54
-; GFX1250-NEXT: v_cvt_f32_i32_e32 v2, v2
-; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_3)
-; GFX1250-NEXT: v_lshlrev_b64_e32 v[4:5], v8, v[4:5]
-; GFX1250-NEXT: v_sub_nc_u32_e32 v8, 32, v8
-; GFX1250-NEXT: v_ldexp_f32 v2, v2, v3
-; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1250-NEXT: v_min_u32_e32 v4, 1, v4
-; GFX1250-NEXT: v_dual_sub_nc_u32 v4, 32, v7 :: v_dual_bitop2_b32 v1, v5, v4 bitop3:0x54
-; GFX1250-NEXT: v_cvt_f32_i32_e32 v0, v0
-; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX1250-NEXT: v_cvt_f32_i32_e32 v1, v1
-; GFX1250-NEXT: v_ldexp_f32 v0, v0, v4
-; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX1250-NEXT: v_ldexp_f32 v1, v1, v8
-; GFX1250-NEXT: v_cvt_pk_bf16_f32 v0, v0, v2
-; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_2)
-; GFX1250-NEXT: v_cvt_pk_bf16_f32 v1, v1, s0
-; GFX1250-NEXT: s_set_pc_i64 s[30:31]
+; GFX1250TRUE16-LABEL: v_sitofp_v3i64_to_v3bf16:
+; GFX1250TRUE16: ; %bb.0:
+; GFX1250TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250TRUE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250TRUE16-NEXT: v_xor_b32_e32 v7, v2, v3
+; GFX1250TRUE16-NEXT: v_xor_b32_e32 v6, v4, v5
+; GFX1250TRUE16-NEXT: v_cls_i32_e32 v10, v3
+; GFX1250TRUE16-NEXT: v_cls_i32_e32 v9, v5
+; GFX1250TRUE16-NEXT: v_cls_i32_e32 v11, v1
+; GFX1250TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX1250TRUE16-NEXT: v_dual_ashrrev_i32 v7, 31, v7 :: v_dual_ashrrev_i32 v6, 31, v6
+; GFX1250TRUE16-NEXT: v_xor_b32_e32 v8, v0, v1
+; GFX1250TRUE16-NEXT: v_dual_add_nc_u32 v7, 32, v7 :: v_dual_add_nc_u32 v6, 32, v6
+; GFX1250TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX1250TRUE16-NEXT: v_ashrrev_i32_e32 v8, 31, v8
+; GFX1250TRUE16-NEXT: v_add_min_u32_e64 v7, v10, -1, v7
+; GFX1250TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX1250TRUE16-NEXT: v_add_min_u32_e64 v6, v9, -1, v6
+; GFX1250TRUE16-NEXT: v_lshlrev_b64_e32 v[2:3], v7, v[2:3]
+; GFX1250TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX1250TRUE16-NEXT: v_lshlrev_b64_e32 v[4:5], v6, v[4:5]
+; GFX1250TRUE16-NEXT: v_min_u32_e32 v2, 1, v2
+; GFX1250TRUE16-NEXT: v_add_nc_u32_e32 v8, 32, v8
+; GFX1250TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
+; GFX1250TRUE16-NEXT: v_min_u32_e32 v4, 1, v4
+; GFX1250TRUE16-NEXT: v_or_b32_e32 v2, v3, v2
+; GFX1250TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
+; GFX1250TRUE16-NEXT: v_add_min_u32_e64 v8, v11, -1, v8
+; GFX1250TRUE16-NEXT: v_dual_sub_nc_u32 v3, 32, v6 :: v_dual_bitop2_b32 v4, v5, v4 bitop3:0x54
+; GFX1250TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
+; GFX1250TRUE16-NEXT: v_cvt_f32_i32_e32 v2, v2
+; GFX1250TRUE16-NEXT: v_lshlrev_b64_e32 v[0:1], v8, v[0:1]
+; GFX1250TRUE16-NEXT: v_sub_nc_u32_e32 v5, 32, v8
+; GFX1250TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1250TRUE16-NEXT: v_min_u32_e32 v0, 1, v0
+; GFX1250TRUE16-NEXT: v_or_b32_e32 v0, v1, v0
+; GFX1250TRUE16-NEXT: v_cvt_f32_i32_e32 v1, v4
+; GFX1250TRUE16-NEXT: v_sub_nc_u32_e32 v4, 32, v7
+; GFX1250TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
+; GFX1250TRUE16-NEXT: v_cvt_f32_i32_e32 v0, v0
+; GFX1250TRUE16-NEXT: v_ldexp_f32 v1, v1, v3
+; GFX1250TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
+; GFX1250TRUE16-NEXT: v_ldexp_f32 v2, v2, v4
+; GFX1250TRUE16-NEXT: v_ldexp_f32 v0, v0, v5
+; GFX1250TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX1250TRUE16-NEXT: v_cvt_pk_bf16_f32 v1, v1, s0
+; GFX1250TRUE16-NEXT: v_cvt_pk_bf16_f32 v0, v0, v2
+; GFX1250TRUE16-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX1250FAKE16-LABEL: v_sitofp_v3i64_to_v3bf16:
+; GFX1250FAKE16: ; %bb.0:
+; GFX1250FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250FAKE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250FAKE16-NEXT: v_xor_b32_e32 v8, v4, v5
+; GFX1250FAKE16-NEXT: v_xor_b32_e32 v6, v2, v3
+; GFX1250FAKE16-NEXT: v_cls_i32_e32 v10, v3
+; GFX1250FAKE16-NEXT: v_cls_i32_e32 v9, v5
+; GFX1250FAKE16-NEXT: v_cls_i32_e32 v11, v1
+; GFX1250FAKE16-NEXT: v_dual_ashrrev_i32 v8, 31, v8 :: v_dual_bitop2_b32 v7, v0, v1 bitop3:0x14
+; GFX1250FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1250FAKE16-NEXT: v_dual_ashrrev_i32 v6, 31, v6 :: v_dual_ashrrev_i32 v7, 31, v7
+; GFX1250FAKE16-NEXT: v_dual_add_nc_u32 v6, 32, v6 :: v_dual_add_nc_u32 v7, 32, v7
+; GFX1250FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX1250FAKE16-NEXT: v_add_min_u32_e64 v6, v10, -1, v6
+; GFX1250FAKE16-NEXT: v_add_min_u32_e64 v7, v11, -1, v7
+; GFX1250FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX1250FAKE16-NEXT: v_lshlrev_b64_e32 v[2:3], v6, v[2:3]
+; GFX1250FAKE16-NEXT: v_lshlrev_b64_e32 v[0:1], v7, v[0:1]
+; GFX1250FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_3)
+; GFX1250FAKE16-NEXT: v_min_u32_e32 v2, 1, v2
+; GFX1250FAKE16-NEXT: v_add_nc_u32_e32 v8, 32, v8
+; GFX1250FAKE16-NEXT: v_min_u32_e32 v0, 1, v0
+; GFX1250FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
+; GFX1250FAKE16-NEXT: v_or_b32_e32 v2, v3, v2
+; GFX1250FAKE16-NEXT: v_add_min_u32_e64 v8, v9, -1, v8
+; GFX1250FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
+; GFX1250FAKE16-NEXT: v_dual_sub_nc_u32 v3, 32, v6 :: v_dual_bitop2_b32 v0, v1, v0 bitop3:0x54
+; GFX1250FAKE16-NEXT: v_cvt_f32_i32_e32 v2, v2
+; GFX1250FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_3)
+; GFX1250FAKE16-NEXT: v_lshlrev_b64_e32 v[4:5], v8, v[4:5]
+; GFX1250FAKE16-NEXT: v_sub_nc_u32_e32 v8, 32, v8
+; GFX1250FAKE16-NEXT: v_ldexp_f32 v2, v2, v3
+; GFX1250FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1250FAKE16-NEXT: v_min_u32_e32 v4, 1, v4
+; GFX1250FAKE16-NEXT: v_dual_sub_nc_u32 v4, 32, v7 :: v_dual_bitop2_b32 v1, v5, v4 bitop3:0x54
+; GFX1250FAKE16-NEXT: v_cvt_f32_i32_e32 v0, v0
+; GFX1250FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX1250FAKE16-NEXT: v_cvt_f32_i32_e32 v1, v1
+; GFX1250FAKE16-NEXT: v_ldexp_f32 v0, v0, v4
+; GFX1250FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX1250FAKE16-NEXT: v_ldexp_f32 v1, v1, v8
+; GFX1250FAKE16-NEXT: v_cvt_pk_bf16_f32 v0, v0, v2
+; GFX1250FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2)
+; GFX1250FAKE16-NEXT: v_cvt_pk_bf16_f32 v1, v1, s0
+; GFX1250FAKE16-NEXT: s_set_pc_i64 s[30:31]
%op = sitofp <3 x i64> %x to <3 x bfloat>
ret <3 x bfloat> %op
}
@@ -40015,15 +40812,26 @@ define bfloat @v_uitofp_i16_to_bf16(i16 %x) {
; GFX11FAKE16-NEXT: v_lshrrev_b32_e32 v0, 16, v0
; GFX11FAKE16-NEXT: s_setpc_b64 s[30:31]
;
-; GFX1250-LABEL: v_uitofp_i16_to_bf16:
-; GFX1250: ; %bb.0:
-; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-NEXT: s_wait_kmcnt 0x0
-; GFX1250-NEXT: v_and_b32_e32 v0, 0xffff, v0
-; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1250-NEXT: v_cvt_f32_u32_e32 v0, v0
-; GFX1250-NEXT: v_cvt_pk_bf16_f32 v0, v0, s0
-; GFX1250-NEXT: s_set_pc_i64 s[30:31]
+; GFX1250TRUE16-LABEL: v_uitofp_i16_to_bf16:
+; GFX1250TRUE16: ; %bb.0:
+; GFX1250TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250TRUE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250TRUE16-NEXT: v_mov_b16_e32 v1.h, 0
+; GFX1250TRUE16-NEXT: v_mov_b16_e32 v1.l, v0.l
+; GFX1250TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1250TRUE16-NEXT: v_cvt_f32_u32_e32 v0, v1
+; GFX1250TRUE16-NEXT: v_cvt_pk_bf16_f32 v0, v0, s0
+; GFX1250TRUE16-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX1250FAKE16-LABEL: v_uitofp_i16_to_bf16:
+; GFX1250FAKE16: ; %bb.0:
+; GFX1250FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250FAKE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250FAKE16-NEXT: v_and_b32_e32 v0, 0xffff, v0
+; GFX1250FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1250FAKE16-NEXT: v_cvt_f32_u32_e32 v0, v0
+; GFX1250FAKE16-NEXT: v_cvt_pk_bf16_f32 v0, v0, s0
+; GFX1250FAKE16-NEXT: s_set_pc_i64 s[30:31]
%op = uitofp i16 %x to bfloat
ret bfloat %op
}
@@ -40167,18 +40975,32 @@ define <2 x bfloat> @v_uitofp_v2i16_to_v2bf16(<2 x i16> %x) {
; GFX11FAKE16-NEXT: v_perm_b32 v0, v0, v1, 0x7060302
; GFX11FAKE16-NEXT: s_setpc_b64 s[30:31]
;
-; GFX1250-LABEL: v_uitofp_v2i16_to_v2bf16:
-; GFX1250: ; %bb.0:
-; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-NEXT: s_wait_kmcnt 0x0
-; GFX1250-NEXT: v_lshrrev_b32_e32 v1, 16, v0
-; GFX1250-NEXT: v_and_b32_e32 v0, 0xffff, v0
-; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX1250-NEXT: v_cvt_f32_u32_e32 v1, v1
-; GFX1250-NEXT: v_cvt_f32_u32_e32 v0, v0
-; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX1250-NEXT: v_cvt_pk_bf16_f32 v0, v0, v1
-; GFX1250-NEXT: s_set_pc_i64 s[30:31]
+; GFX1250TRUE16-LABEL: v_uitofp_v2i16_to_v2bf16:
+; GFX1250TRUE16: ; %bb.0:
+; GFX1250TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250TRUE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250TRUE16-NEXT: v_mov_b16_e32 v1.h, 0
+; GFX1250TRUE16-NEXT: v_and_b32_e32 v2, 0xffff, v0
+; GFX1250TRUE16-NEXT: v_mov_b16_e32 v1.l, v0.h
+; GFX1250TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX1250TRUE16-NEXT: v_cvt_f32_u32_e32 v0, v2
+; GFX1250TRUE16-NEXT: v_cvt_f32_u32_e32 v1, v1
+; GFX1250TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1250TRUE16-NEXT: v_cvt_pk_bf16_f32 v0, v0, v1
+; GFX1250TRUE16-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX1250FAKE16-LABEL: v_uitofp_v2i16_to_v2bf16:
+; GFX1250FAKE16: ; %bb.0:
+; GFX1250FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250FAKE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250FAKE16-NEXT: v_lshrrev_b32_e32 v1, 16, v0
+; GFX1250FAKE16-NEXT: v_and_b32_e32 v0, 0xffff, v0
+; GFX1250FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX1250FAKE16-NEXT: v_cvt_f32_u32_e32 v1, v1
+; GFX1250FAKE16-NEXT: v_cvt_f32_u32_e32 v0, v0
+; GFX1250FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1250FAKE16-NEXT: v_cvt_pk_bf16_f32 v0, v0, v1
+; GFX1250FAKE16-NEXT: s_set_pc_i64 s[30:31]
%op = uitofp <2 x i16> %x to <2 x bfloat>
ret <2 x bfloat> %op
}
@@ -40373,22 +41195,41 @@ define <3 x bfloat> @v_uitofp_v3i16_to_v3bf16(<3 x i16> %x) {
; GFX11FAKE16-NEXT: v_alignbit_b32 v1, s0, v1, 16
; GFX11FAKE16-NEXT: s_setpc_b64 s[30:31]
;
-; GFX1250-LABEL: v_uitofp_v3i16_to_v3bf16:
-; GFX1250: ; %bb.0:
-; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-NEXT: s_wait_kmcnt 0x0
-; GFX1250-NEXT: v_lshrrev_b32_e32 v2, 16, v0
-; GFX1250-NEXT: v_and_b32_e32 v0, 0xffff, v0
-; GFX1250-NEXT: v_and_b32_e32 v1, 0xffff, v1
-; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
-; GFX1250-NEXT: v_cvt_f32_u32_e32 v2, v2
-; GFX1250-NEXT: v_cvt_f32_u32_e32 v0, v0
-; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX1250-NEXT: v_cvt_f32_u32_e32 v1, v1
-; GFX1250-NEXT: v_cvt_pk_bf16_f32 v0, v0, v2
-; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_2)
-; GFX1250-NEXT: v_cvt_pk_bf16_f32 v1, v1, s0
-; GFX1250-NEXT: s_set_pc_i64 s[30:31]
+; GFX1250TRUE16-LABEL: v_uitofp_v3i16_to_v3bf16:
+; GFX1250TRUE16: ; %bb.0:
+; GFX1250TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250TRUE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250TRUE16-NEXT: v_and_b32_e32 v1, 0xffff, v1
+; GFX1250TRUE16-NEXT: v_and_b32_e32 v2, 0xffff, v0
+; GFX1250TRUE16-NEXT: v_mov_b16_e32 v3.h, 0
+; GFX1250TRUE16-NEXT: v_mov_b16_e32 v3.l, v0.h
+; GFX1250TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
+; GFX1250TRUE16-NEXT: v_cvt_f32_u32_e32 v0, v1
+; GFX1250TRUE16-NEXT: v_cvt_f32_u32_e32 v1, v2
+; GFX1250TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
+; GFX1250TRUE16-NEXT: v_cvt_f32_u32_e32 v2, v3
+; GFX1250TRUE16-NEXT: v_cvt_pk_bf16_f32 v3, v0, s0
+; GFX1250TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX1250TRUE16-NEXT: v_cvt_pk_bf16_f32 v0, v1, v2
+; GFX1250TRUE16-NEXT: v_mov_b16_e32 v1.l, v3.l
+; GFX1250TRUE16-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX1250FAKE16-LABEL: v_uitofp_v3i16_to_v3bf16:
+; GFX1250FAKE16: ; %bb.0:
+; GFX1250FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250FAKE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250FAKE16-NEXT: v_lshrrev_b32_e32 v2, 16, v0
+; GFX1250FAKE16-NEXT: v_and_b32_e32 v0, 0xffff, v0
+; GFX1250FAKE16-NEXT: v_and_b32_e32 v1, 0xffff, v1
+; GFX1250FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
+; GFX1250FAKE16-NEXT: v_cvt_f32_u32_e32 v2, v2
+; GFX1250FAKE16-NEXT: v_cvt_f32_u32_e32 v0, v0
+; GFX1250FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX1250FAKE16-NEXT: v_cvt_f32_u32_e32 v1, v1
+; GFX1250FAKE16-NEXT: v_cvt_pk_bf16_f32 v0, v0, v2
+; GFX1250FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2)
+; GFX1250FAKE16-NEXT: v_cvt_pk_bf16_f32 v1, v1, s0
+; GFX1250FAKE16-NEXT: s_set_pc_i64 s[30:31]
%op = uitofp <3 x i16> %x to <3 x bfloat>
ret <3 x bfloat> %op
}
@@ -40626,23 +41467,43 @@ define <4 x bfloat> @v_uitofp_v4i16_to_v4bf16(<4 x i16> %x) {
; GFX11FAKE16-NEXT: v_perm_b32 v1, v1, v2, 0x7060302
; GFX11FAKE16-NEXT: s_setpc_b64 s[30:31]
;
-; GFX1250-LABEL: v_uitofp_v4i16_to_v4bf16:
-; GFX1250: ; %bb.0:
-; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-NEXT: s_wait_kmcnt 0x0
-; GFX1250-NEXT: v_dual_lshrrev_b32 v2, 16, v1 :: v_dual_lshrrev_b32 v3, 16, v0
-; GFX1250-NEXT: v_and_b32_e32 v0, 0xffff, v0
-; GFX1250-NEXT: v_and_b32_e32 v1, 0xffff, v1
-; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_4)
-; GFX1250-NEXT: v_cvt_f32_u32_e32 v2, v2
-; GFX1250-NEXT: v_cvt_f32_u32_e32 v3, v3
-; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
-; GFX1250-NEXT: v_cvt_f32_u32_e32 v0, v0
-; GFX1250-NEXT: v_cvt_f32_u32_e32 v1, v1
-; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX1250-NEXT: v_cvt_pk_bf16_f32 v0, v0, v3
-; GFX1250-NEXT: v_cvt_pk_bf16_f32 v1, v1, v2
-; GFX1250-NEXT: s_set_pc_i64 s[30:31]
+; GFX1250TRUE16-LABEL: v_uitofp_v4i16_to_v4bf16:
+; GFX1250TRUE16: ; %bb.0:
+; GFX1250TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250TRUE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250TRUE16-NEXT: v_mov_b16_e32 v2.h, 0
+; GFX1250TRUE16-NEXT: v_mov_b16_e32 v2.l, v1.h
+; GFX1250TRUE16-NEXT: v_and_b32_e32 v3, 0xffff, v0
+; GFX1250TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_4)
+; GFX1250TRUE16-NEXT: v_cvt_f32_u32_e32 v4, v2
+; GFX1250TRUE16-NEXT: v_mov_b16_e32 v2.l, v0.h
+; GFX1250TRUE16-NEXT: v_and_b32_e32 v0, 0xffff, v1
+; GFX1250TRUE16-NEXT: v_cvt_f32_u32_e32 v1, v3
+; GFX1250TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
+; GFX1250TRUE16-NEXT: v_cvt_f32_u32_e32 v2, v2
+; GFX1250TRUE16-NEXT: v_cvt_f32_u32_e32 v3, v0
+; GFX1250TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX1250TRUE16-NEXT: v_cvt_pk_bf16_f32 v0, v1, v2
+; GFX1250TRUE16-NEXT: v_cvt_pk_bf16_f32 v1, v3, v4
+; GFX1250TRUE16-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX1250FAKE16-LABEL: v_uitofp_v4i16_to_v4bf16:
+; GFX1250FAKE16: ; %bb.0:
+; GFX1250FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250FAKE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250FAKE16-NEXT: v_dual_lshrrev_b32 v2, 16, v1 :: v_dual_lshrrev_b32 v3, 16, v0
+; GFX1250FAKE16-NEXT: v_and_b32_e32 v0, 0xffff, v0
+; GFX1250FAKE16-NEXT: v_and_b32_e32 v1, 0xffff, v1
+; GFX1250FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_4)
+; GFX1250FAKE16-NEXT: v_cvt_f32_u32_e32 v2, v2
+; GFX1250FAKE16-NEXT: v_cvt_f32_u32_e32 v3, v3
+; GFX1250FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
+; GFX1250FAKE16-NEXT: v_cvt_f32_u32_e32 v0, v0
+; GFX1250FAKE16-NEXT: v_cvt_f32_u32_e32 v1, v1
+; GFX1250FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX1250FAKE16-NEXT: v_cvt_pk_bf16_f32 v0, v0, v3
+; GFX1250FAKE16-NEXT: v_cvt_pk_bf16_f32 v1, v1, v2
+; GFX1250FAKE16-NEXT: s_set_pc_i64 s[30:31]
%op = uitofp <4 x i16> %x to <4 x bfloat>
ret <4 x bfloat> %op
}
@@ -41058,17 +41919,31 @@ define <3 x bfloat> @v_uitofp_v3i32_to_v3bf16(<3 x i32> %x) {
; GFX11FAKE16-NEXT: v_alignbit_b32 v1, s0, v2, 16
; GFX11FAKE16-NEXT: s_setpc_b64 s[30:31]
;
-; GFX1250-LABEL: v_uitofp_v3i32_to_v3bf16:
-; GFX1250: ; %bb.0:
-; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-NEXT: s_wait_kmcnt 0x0
-; GFX1250-NEXT: v_cvt_f32_u32_e32 v1, v1
-; GFX1250-NEXT: v_cvt_f32_u32_e32 v0, v0
-; GFX1250-NEXT: v_cvt_f32_u32_e32 v2, v2
-; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX1250-NEXT: v_cvt_pk_bf16_f32 v0, v0, v1
-; GFX1250-NEXT: v_cvt_pk_bf16_f32 v1, v2, s0
-; GFX1250-NEXT: s_set_pc_i64 s[30:31]
+; GFX1250TRUE16-LABEL: v_uitofp_v3i32_to_v3bf16:
+; GFX1250TRUE16: ; %bb.0:
+; GFX1250TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250TRUE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250TRUE16-NEXT: v_cvt_f32_u32_e32 v2, v2
+; GFX1250TRUE16-NEXT: v_cvt_f32_u32_e32 v1, v1
+; GFX1250TRUE16-NEXT: v_cvt_f32_u32_e32 v0, v0
+; GFX1250TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX1250TRUE16-NEXT: v_cvt_pk_bf16_f32 v2, v2, s0
+; GFX1250TRUE16-NEXT: v_cvt_pk_bf16_f32 v0, v0, v1
+; GFX1250TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2)
+; GFX1250TRUE16-NEXT: v_mov_b16_e32 v1.l, v2.l
+; GFX1250TRUE16-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX1250FAKE16-LABEL: v_uitofp_v3i32_to_v3bf16:
+; GFX1250FAKE16: ; %bb.0:
+; GFX1250FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250FAKE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250FAKE16-NEXT: v_cvt_f32_u32_e32 v1, v1
+; GFX1250FAKE16-NEXT: v_cvt_f32_u32_e32 v0, v0
+; GFX1250FAKE16-NEXT: v_cvt_f32_u32_e32 v2, v2
+; GFX1250FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX1250FAKE16-NEXT: v_cvt_pk_bf16_f32 v0, v0, v1
+; GFX1250FAKE16-NEXT: v_cvt_pk_bf16_f32 v1, v2, s0
+; GFX1250FAKE16-NEXT: s_set_pc_i64 s[30:31]
%op = uitofp <3 x i32> %x to <3 x bfloat>
ret <3 x bfloat> %op
}
@@ -42105,44 +42980,84 @@ define <3 x bfloat> @v_uitofp_v3i64_to_v3bf16(<3 x i64> %x) {
; GFX11FAKE16-NEXT: v_alignbit_b32 v1, s0, v1, 16
; GFX11FAKE16-NEXT: s_setpc_b64 s[30:31]
;
-; GFX1250-LABEL: v_uitofp_v3i64_to_v3bf16:
-; GFX1250: ; %bb.0:
-; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-NEXT: s_wait_kmcnt 0x0
-; GFX1250-NEXT: v_clz_i32_u32_e32 v6, v3
-; GFX1250-NEXT: v_clz_i32_u32_e32 v7, v1
-; GFX1250-NEXT: v_clz_i32_u32_e32 v8, v5
-; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
-; GFX1250-NEXT: v_min_u32_e32 v6, 32, v6
-; GFX1250-NEXT: v_min_u32_e32 v7, 32, v7
-; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
-; GFX1250-NEXT: v_min_u32_e32 v8, 32, v8
-; GFX1250-NEXT: v_lshlrev_b64_e32 v[2:3], v6, v[2:3]
-; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
-; GFX1250-NEXT: v_lshlrev_b64_e32 v[0:1], v7, v[0:1]
-; GFX1250-NEXT: v_lshlrev_b64_e32 v[4:5], v8, v[4:5]
-; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
-; GFX1250-NEXT: v_min_u32_e32 v2, 1, v2
-; GFX1250-NEXT: v_min_u32_e32 v0, 1, v0
-; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
-; GFX1250-NEXT: v_min_u32_e32 v4, 1, v4
-; GFX1250-NEXT: v_dual_sub_nc_u32 v8, 32, v8 :: v_dual_bitop2_b32 v2, v3, v2 bitop3:0x54
-; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
-; GFX1250-NEXT: v_dual_sub_nc_u32 v3, 32, v6 :: v_dual_bitop2_b32 v0, v1, v0 bitop3:0x54
-; GFX1250-NEXT: v_dual_sub_nc_u32 v4, 32, v7 :: v_dual_bitop2_b32 v1, v5, v4 bitop3:0x54
-; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
-; GFX1250-NEXT: v_cvt_f32_u32_e32 v2, v2
-; GFX1250-NEXT: v_cvt_f32_u32_e32 v0, v0
-; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
-; GFX1250-NEXT: v_cvt_f32_u32_e32 v1, v1
-; GFX1250-NEXT: v_ldexp_f32 v2, v2, v3
-; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
-; GFX1250-NEXT: v_ldexp_f32 v0, v0, v4
-; GFX1250-NEXT: v_ldexp_f32 v1, v1, v8
-; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX1250-NEXT: v_cvt_pk_bf16_f32 v0, v0, v2
-; GFX1250-NEXT: v_cvt_pk_bf16_f32 v1, v1, s0
-; GFX1250-NEXT: s_set_pc_i64 s[30:31]
+; GFX1250TRUE16-LABEL: v_uitofp_v3i64_to_v3bf16:
+; GFX1250TRUE16: ; %bb.0:
+; GFX1250TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250TRUE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250TRUE16-NEXT: v_clz_i32_u32_e32 v6, v5
+; GFX1250TRUE16-NEXT: v_clz_i32_u32_e32 v7, v3
+; GFX1250TRUE16-NEXT: v_clz_i32_u32_e32 v8, v1
+; GFX1250TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
+; GFX1250TRUE16-NEXT: v_min_u32_e32 v6, 32, v6
+; GFX1250TRUE16-NEXT: v_min_u32_e32 v7, 32, v7
+; GFX1250TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
+; GFX1250TRUE16-NEXT: v_min_u32_e32 v8, 32, v8
+; GFX1250TRUE16-NEXT: v_lshlrev_b64_e32 v[4:5], v6, v[4:5]
+; GFX1250TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
+; GFX1250TRUE16-NEXT: v_lshlrev_b64_e32 v[2:3], v7, v[2:3]
+; GFX1250TRUE16-NEXT: v_lshlrev_b64_e32 v[0:1], v8, v[0:1]
+; GFX1250TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
+; GFX1250TRUE16-NEXT: v_min_u32_e32 v4, 1, v4
+; GFX1250TRUE16-NEXT: v_min_u32_e32 v2, 1, v2
+; GFX1250TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
+; GFX1250TRUE16-NEXT: v_min_u32_e32 v0, 1, v0
+; GFX1250TRUE16-NEXT: v_or_b32_e32 v4, v5, v4
+; GFX1250TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
+; GFX1250TRUE16-NEXT: v_or_b32_e32 v2, v3, v2
+; GFX1250TRUE16-NEXT: v_dual_sub_nc_u32 v3, 32, v6 :: v_dual_bitop2_b32 v0, v1, v0 bitop3:0x54
+; GFX1250TRUE16-NEXT: v_sub_nc_u32_e32 v5, 32, v8
+; GFX1250TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_3) | instid1(VALU_DEP_4)
+; GFX1250TRUE16-NEXT: v_cvt_f32_u32_e32 v1, v4
+; GFX1250TRUE16-NEXT: v_sub_nc_u32_e32 v4, 32, v7
+; GFX1250TRUE16-NEXT: v_cvt_f32_u32_e32 v2, v2
+; GFX1250TRUE16-NEXT: v_cvt_f32_u32_e32 v0, v0
+; GFX1250TRUE16-NEXT: v_ldexp_f32 v1, v1, v3
+; GFX1250TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
+; GFX1250TRUE16-NEXT: v_ldexp_f32 v2, v2, v4
+; GFX1250TRUE16-NEXT: v_ldexp_f32 v0, v0, v5
+; GFX1250TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX1250TRUE16-NEXT: v_cvt_pk_bf16_f32 v1, v1, s0
+; GFX1250TRUE16-NEXT: v_cvt_pk_bf16_f32 v0, v0, v2
+; GFX1250TRUE16-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX1250FAKE16-LABEL: v_uitofp_v3i64_to_v3bf16:
+; GFX1250FAKE16: ; %bb.0:
+; GFX1250FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250FAKE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250FAKE16-NEXT: v_clz_i32_u32_e32 v6, v3
+; GFX1250FAKE16-NEXT: v_clz_i32_u32_e32 v7, v1
+; GFX1250FAKE16-NEXT: v_clz_i32_u32_e32 v8, v5
+; GFX1250FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
+; GFX1250FAKE16-NEXT: v_min_u32_e32 v6, 32, v6
+; GFX1250FAKE16-NEXT: v_min_u32_e32 v7, 32, v7
+; GFX1250FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
+; GFX1250FAKE16-NEXT: v_min_u32_e32 v8, 32, v8
+; GFX1250FAKE16-NEXT: v_lshlrev_b64_e32 v[2:3], v6, v[2:3]
+; GFX1250FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
+; GFX1250FAKE16-NEXT: v_lshlrev_b64_e32 v[0:1], v7, v[0:1]
+; GFX1250FAKE16-NEXT: v_lshlrev_b64_e32 v[4:5], v8, v[4:5]
+; GFX1250FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
+; GFX1250FAKE16-NEXT: v_min_u32_e32 v2, 1, v2
+; GFX1250FAKE16-NEXT: v_min_u32_e32 v0, 1, v0
+; GFX1250FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
+; GFX1250FAKE16-NEXT: v_min_u32_e32 v4, 1, v4
+; GFX1250FAKE16-NEXT: v_dual_sub_nc_u32 v8, 32, v8 :: v_dual_bitop2_b32 v2, v3, v2 bitop3:0x54
+; GFX1250FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
+; GFX1250FAKE16-NEXT: v_dual_sub_nc_u32 v3, 32, v6 :: v_dual_bitop2_b32 v0, v1, v0 bitop3:0x54
+; GFX1250FAKE16-NEXT: v_dual_sub_nc_u32 v4, 32, v7 :: v_dual_bitop2_b32 v1, v5, v4 bitop3:0x54
+; GFX1250FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
+; GFX1250FAKE16-NEXT: v_cvt_f32_u32_e32 v2, v2
+; GFX1250FAKE16-NEXT: v_cvt_f32_u32_e32 v0, v0
+; GFX1250FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
+; GFX1250FAKE16-NEXT: v_cvt_f32_u32_e32 v1, v1
+; GFX1250FAKE16-NEXT: v_ldexp_f32 v2, v2, v3
+; GFX1250FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
+; GFX1250FAKE16-NEXT: v_ldexp_f32 v0, v0, v4
+; GFX1250FAKE16-NEXT: v_ldexp_f32 v1, v1, v8
+; GFX1250FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX1250FAKE16-NEXT: v_cvt_pk_bf16_f32 v0, v0, v2
+; GFX1250FAKE16-NEXT: v_cvt_pk_bf16_f32 v1, v1, s0
+; GFX1250FAKE16-NEXT: s_set_pc_i64 s[30:31]
%op = uitofp <3 x i64> %x to <3 x bfloat>
ret <3 x bfloat> %op
}
@@ -42717,15 +43632,25 @@ define bfloat @v_select_bf16(i1 %cond, bfloat %a, bfloat %b) {
; GFX11FAKE16-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc_lo
; GFX11FAKE16-NEXT: s_setpc_b64 s[30:31]
;
-; GFX1250-LABEL: v_select_bf16:
-; GFX1250: ; %bb.0:
-; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-NEXT: s_wait_kmcnt 0x0
-; GFX1250-NEXT: v_and_b32_e32 v0, 1, v0
-; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX1250-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
-; GFX1250-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc_lo
-; GFX1250-NEXT: s_set_pc_i64 s[30:31]
+; GFX1250TRUE16-LABEL: v_select_bf16:
+; GFX1250TRUE16: ; %bb.0:
+; GFX1250TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250TRUE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250TRUE16-NEXT: v_and_b32_e32 v0, 1, v0
+; GFX1250TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1250TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
+; GFX1250TRUE16-NEXT: v_cndmask_b16 v0.l, v2.l, v1.l, vcc_lo
+; GFX1250TRUE16-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX1250FAKE16-LABEL: v_select_bf16:
+; GFX1250FAKE16: ; %bb.0:
+; GFX1250FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250FAKE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250FAKE16-NEXT: v_and_b32_e32 v0, 1, v0
+; GFX1250FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1250FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
+; GFX1250FAKE16-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc_lo
+; GFX1250FAKE16-NEXT: s_set_pc_i64 s[30:31]
%op = select i1 %cond, bfloat %a, bfloat %b
ret bfloat %op
}
@@ -42810,16 +43735,27 @@ define bfloat @v_select_fneg_lhs_bf16(i1 %cond, bfloat %a, bfloat %b) {
; GFX11FAKE16-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc_lo
; GFX11FAKE16-NEXT: s_setpc_b64 s[30:31]
;
-; GFX1250-LABEL: v_select_fneg_lhs_bf16:
-; GFX1250: ; %bb.0:
-; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-NEXT: s_wait_kmcnt 0x0
-; GFX1250-NEXT: v_and_b32_e32 v0, 1, v0
-; GFX1250-NEXT: v_xor_b32_e32 v1, 0x8000, v1
-; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX1250-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
-; GFX1250-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc_lo
-; GFX1250-NEXT: s_set_pc_i64 s[30:31]
+; GFX1250TRUE16-LABEL: v_select_fneg_lhs_bf16:
+; GFX1250TRUE16: ; %bb.0:
+; GFX1250TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250TRUE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250TRUE16-NEXT: v_and_b32_e32 v0, 1, v0
+; GFX1250TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
+; GFX1250TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
+; GFX1250TRUE16-NEXT: v_xor_b16 v0.l, 0x8000, v1.l
+; GFX1250TRUE16-NEXT: v_cndmask_b16 v0.l, v2.l, v0.l, vcc_lo
+; GFX1250TRUE16-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX1250FAKE16-LABEL: v_select_fneg_lhs_bf16:
+; GFX1250FAKE16: ; %bb.0:
+; GFX1250FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250FAKE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250FAKE16-NEXT: v_and_b32_e32 v0, 1, v0
+; GFX1250FAKE16-NEXT: v_xor_b32_e32 v1, 0x8000, v1
+; GFX1250FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX1250FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
+; GFX1250FAKE16-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc_lo
+; GFX1250FAKE16-NEXT: s_set_pc_i64 s[30:31]
%neg.a = fneg bfloat %a
%op = select i1 %cond, bfloat %neg.a, bfloat %b
ret bfloat %op
@@ -42905,16 +43841,27 @@ define bfloat @v_select_fneg_rhs_bf16(i1 %cond, bfloat %a, bfloat %b) {
; GFX11FAKE16-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc_lo
; GFX11FAKE16-NEXT: s_setpc_b64 s[30:31]
;
-; GFX1250-LABEL: v_select_fneg_rhs_bf16:
-; GFX1250: ; %bb.0:
-; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-NEXT: s_wait_kmcnt 0x0
-; GFX1250-NEXT: v_and_b32_e32 v0, 1, v0
-; GFX1250-NEXT: v_xor_b32_e32 v2, 0x8000, v2
-; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX1250-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
-; GFX1250-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc_lo
-; GFX1250-NEXT: s_set_pc_i64 s[30:31]
+; GFX1250TRUE16-LABEL: v_select_fneg_rhs_bf16:
+; GFX1250TRUE16: ; %bb.0:
+; GFX1250TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250TRUE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250TRUE16-NEXT: v_and_b32_e32 v0, 1, v0
+; GFX1250TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
+; GFX1250TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
+; GFX1250TRUE16-NEXT: v_xor_b16 v0.l, 0x8000, v2.l
+; GFX1250TRUE16-NEXT: v_cndmask_b16 v0.l, v0.l, v1.l, vcc_lo
+; GFX1250TRUE16-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX1250FAKE16-LABEL: v_select_fneg_rhs_bf16:
+; GFX1250FAKE16: ; %bb.0:
+; GFX1250FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250FAKE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250FAKE16-NEXT: v_and_b32_e32 v0, 1, v0
+; GFX1250FAKE16-NEXT: v_xor_b32_e32 v2, 0x8000, v2
+; GFX1250FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX1250FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
+; GFX1250FAKE16-NEXT: v_cndmask_b32_e32 v0, v2, v1, vcc_lo
+; GFX1250FAKE16-NEXT: s_set_pc_i64 s[30:31]
%neg.b = fneg bfloat %b
%op = select i1 %cond, bfloat %a, bfloat %neg.b
ret bfloat %op
@@ -43025,18 +43972,29 @@ define <2 x bfloat> @v_select_v2bf16(i1 %cond, <2 x bfloat> %a, <2 x bfloat> %b)
; GFX11FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
; GFX11FAKE16-NEXT: s_setpc_b64 s[30:31]
;
-; GFX1250-LABEL: v_select_v2bf16:
-; GFX1250: ; %bb.0:
-; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-NEXT: s_wait_kmcnt 0x0
-; GFX1250-NEXT: v_dual_lshrrev_b32 v3, 16, v1 :: v_dual_bitop2_b32 v0, 1, v0 bitop3:0x40
-; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
-; GFX1250-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
-; GFX1250-NEXT: v_dual_lshrrev_b32 v4, 16, v2 :: v_dual_cndmask_b32 v0, v2, v1, vcc_lo
-; GFX1250-NEXT: v_cndmask_b32_e32 v1, v4, v3, vcc_lo
-; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX1250-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
-; GFX1250-NEXT: s_set_pc_i64 s[30:31]
+; GFX1250TRUE16-LABEL: v_select_v2bf16:
+; GFX1250TRUE16: ; %bb.0:
+; GFX1250TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250TRUE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250TRUE16-NEXT: v_and_b32_e32 v0, 1, v0
+; GFX1250TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1250TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
+; GFX1250TRUE16-NEXT: v_cndmask_b16 v0.l, v2.l, v1.l, vcc_lo
+; GFX1250TRUE16-NEXT: v_cndmask_b16 v0.h, v2.h, v1.h, vcc_lo
+; GFX1250TRUE16-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX1250FAKE16-LABEL: v_select_v2bf16:
+; GFX1250FAKE16: ; %bb.0:
+; GFX1250FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250FAKE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250FAKE16-NEXT: v_dual_lshrrev_b32 v3, 16, v1 :: v_dual_bitop2_b32 v0, 1, v0 bitop3:0x40
+; GFX1250FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
+; GFX1250FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
+; GFX1250FAKE16-NEXT: v_dual_lshrrev_b32 v4, 16, v2 :: v_dual_cndmask_b32 v0, v2, v1, vcc_lo
+; GFX1250FAKE16-NEXT: v_cndmask_b32_e32 v1, v4, v3, vcc_lo
+; GFX1250FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1250FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
+; GFX1250FAKE16-NEXT: s_set_pc_i64 s[30:31]
%op = select i1 %cond, <2 x bfloat> %a, <2 x bfloat> %b
ret <2 x bfloat> %op
}
@@ -43155,20 +44113,34 @@ define <2 x bfloat> @v_vselect_v2bf16(<2 x i1> %cond, <2 x bfloat> %a, <2 x bflo
; GFX11FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
; GFX11FAKE16-NEXT: s_setpc_b64 s[30:31]
;
-; GFX1250-LABEL: v_vselect_v2bf16:
-; GFX1250: ; %bb.0:
-; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-NEXT: s_wait_kmcnt 0x0
-; GFX1250-NEXT: v_dual_lshrrev_b32 v4, 16, v2 :: v_dual_bitop2_b32 v0, 1, v0 bitop3:0x40
-; GFX1250-NEXT: v_dual_lshrrev_b32 v5, 16, v3 :: v_dual_bitop2_b32 v1, 1, v1 bitop3:0x40
-; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_3)
-; GFX1250-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
-; GFX1250-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc_lo
-; GFX1250-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v1
-; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1250-NEXT: v_cndmask_b32_e32 v1, v5, v4, vcc_lo
-; GFX1250-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
-; GFX1250-NEXT: s_set_pc_i64 s[30:31]
+; GFX1250TRUE16-LABEL: v_vselect_v2bf16:
+; GFX1250TRUE16: ; %bb.0:
+; GFX1250TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250TRUE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250TRUE16-NEXT: v_and_b16 v0.l, 1, v0.l
+; GFX1250TRUE16-NEXT: v_and_b16 v0.h, 1, v1.l
+; GFX1250TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX1250TRUE16-NEXT: v_cmp_eq_u16_e32 vcc_lo, 1, v0.l
+; GFX1250TRUE16-NEXT: v_cmp_eq_u16_e64 s0, 1, v0.h
+; GFX1250TRUE16-NEXT: v_cndmask_b16 v0.l, v3.l, v2.l, vcc_lo
+; GFX1250TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2)
+; GFX1250TRUE16-NEXT: v_cndmask_b16 v0.h, v3.h, v2.h, s0
+; GFX1250TRUE16-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX1250FAKE16-LABEL: v_vselect_v2bf16:
+; GFX1250FAKE16: ; %bb.0:
+; GFX1250FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250FAKE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250FAKE16-NEXT: v_dual_lshrrev_b32 v4, 16, v2 :: v_dual_bitop2_b32 v0, 1, v0 bitop3:0x40
+; GFX1250FAKE16-NEXT: v_dual_lshrrev_b32 v5, 16, v3 :: v_dual_bitop2_b32 v1, 1, v1 bitop3:0x40
+; GFX1250FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_3)
+; GFX1250FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
+; GFX1250FAKE16-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc_lo
+; GFX1250FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v1
+; GFX1250FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1250FAKE16-NEXT: v_cndmask_b32_e32 v1, v5, v4, vcc_lo
+; GFX1250FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
+; GFX1250FAKE16-NEXT: s_set_pc_i64 s[30:31]
%op = select <2 x i1> %cond, <2 x bfloat> %a, <2 x bfloat> %b
ret <2 x bfloat> %op
}
@@ -43256,16 +44228,26 @@ define amdgpu_ps i32 @s_select_bf16(bfloat inreg %a, bfloat inreg %b, i32 %c) {
; GFX11FAKE16-NEXT: v_readfirstlane_b32 s0, v0
; GFX11FAKE16-NEXT: ; return to shader part epilog
;
-; GFX1250-LABEL: s_select_bf16:
-; GFX1250: ; %bb.0:
-; GFX1250-NEXT: v_mov_b32_e32 v1, s0
-; GFX1250-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1250-NEXT: v_cndmask_b32_e32 v0, s1, v1, vcc_lo
-; GFX1250-NEXT: v_and_b32_e32 v0, 0xffff, v0
-; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX1250-NEXT: v_readfirstlane_b32 s0, v0
-; GFX1250-NEXT: ; return to shader part epilog
+; GFX1250TRUE16-LABEL: s_select_bf16:
+; GFX1250TRUE16: ; %bb.0:
+; GFX1250TRUE16-NEXT: v_mov_b16_e32 v1.l, s0
+; GFX1250TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX1250TRUE16-NEXT: v_mov_b16_e32 v0.h, 0
+; GFX1250TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1250TRUE16-NEXT: v_cndmask_b16 v0.l, s1, v1.l, vcc_lo
+; GFX1250TRUE16-NEXT: v_readfirstlane_b32 s0, v0
+; GFX1250TRUE16-NEXT: ; return to shader part epilog
+;
+; GFX1250FAKE16-LABEL: s_select_bf16:
+; GFX1250FAKE16: ; %bb.0:
+; GFX1250FAKE16-NEXT: v_mov_b32_e32 v1, s0
+; GFX1250FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX1250FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1250FAKE16-NEXT: v_cndmask_b32_e32 v0, s1, v1, vcc_lo
+; GFX1250FAKE16-NEXT: v_and_b32_e32 v0, 0xffff, v0
+; GFX1250FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1250FAKE16-NEXT: v_readfirstlane_b32 s0, v0
+; GFX1250FAKE16-NEXT: ; return to shader part epilog
%cond = icmp eq i32 %c, 0
%op = select i1 %cond, bfloat %a, bfloat %b
%cast = bitcast bfloat %op to i16
@@ -43402,20 +44384,34 @@ define amdgpu_ps i32 @s_select_v2bf16(<2 x bfloat> inreg %a, <2 x bfloat> inreg
; GFX11FAKE16-NEXT: v_readfirstlane_b32 s0, v0
; GFX11FAKE16-NEXT: ; return to shader part epilog
;
-; GFX1250-LABEL: s_select_v2bf16:
-; GFX1250: ; %bb.0:
-; GFX1250-NEXT: s_lshr_b32 s2, s0, 16
-; GFX1250-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX1250-NEXT: v_dual_mov_b32 v1, s2 :: v_dual_mov_b32 v2, s0
-; GFX1250-NEXT: s_lshr_b32 s3, s1, 16
-; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1)
-; GFX1250-NEXT: v_cndmask_b32_e32 v0, s3, v1, vcc_lo
-; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1250-NEXT: v_cndmask_b32_e32 v1, s1, v2, vcc_lo
-; GFX1250-NEXT: v_perm_b32 v0, v0, v1, 0x5040100
-; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX1250-NEXT: v_readfirstlane_b32 s0, v0
-; GFX1250-NEXT: ; return to shader part epilog
+; GFX1250TRUE16-LABEL: s_select_v2bf16:
+; GFX1250TRUE16: ; %bb.0:
+; GFX1250TRUE16-NEXT: s_lshr_b32 s2, s0, 16
+; GFX1250TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX1250TRUE16-NEXT: v_mov_b16_e32 v1.l, s2
+; GFX1250TRUE16-NEXT: v_mov_b16_e32 v0.l, s0
+; GFX1250TRUE16-NEXT: s_lshr_b32 s0, s1, 16
+; GFX1250TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instid1(SALU_CYCLE_1)
+; GFX1250TRUE16-NEXT: v_cndmask_b16 v0.h, s0, v1.l, vcc_lo
+; GFX1250TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1250TRUE16-NEXT: v_cndmask_b16 v0.l, s1, v0.l, vcc_lo
+; GFX1250TRUE16-NEXT: v_readfirstlane_b32 s0, v0
+; GFX1250TRUE16-NEXT: ; return to shader part epilog
+;
+; GFX1250FAKE16-LABEL: s_select_v2bf16:
+; GFX1250FAKE16: ; %bb.0:
+; GFX1250FAKE16-NEXT: s_lshr_b32 s2, s0, 16
+; GFX1250FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX1250FAKE16-NEXT: v_dual_mov_b32 v1, s2 :: v_dual_mov_b32 v2, s0
+; GFX1250FAKE16-NEXT: s_lshr_b32 s3, s1, 16
+; GFX1250FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1)
+; GFX1250FAKE16-NEXT: v_cndmask_b32_e32 v0, s3, v1, vcc_lo
+; GFX1250FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1250FAKE16-NEXT: v_cndmask_b32_e32 v1, s1, v2, vcc_lo
+; GFX1250FAKE16-NEXT: v_perm_b32 v0, v0, v1, 0x5040100
+; GFX1250FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1250FAKE16-NEXT: v_readfirstlane_b32 s0, v0
+; GFX1250FAKE16-NEXT: ; return to shader part epilog
%cond = icmp eq i32 %c, 0
%op = select i1 %cond, <2 x bfloat> %a, <2 x bfloat> %b
%cast = bitcast <2 x bfloat> %op to i32
@@ -43554,21 +44550,36 @@ define amdgpu_ps i32 @s_vselect_v2bf16(<2 x bfloat> inreg %a, <2 x bfloat> inreg
; GFX11FAKE16-NEXT: v_readfirstlane_b32 s0, v0
; GFX11FAKE16-NEXT: ; return to shader part epilog
;
-; GFX1250-LABEL: s_vselect_v2bf16:
-; GFX1250: ; %bb.0:
-; GFX1250-NEXT: s_lshr_b32 s2, s0, 16
-; GFX1250-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX1250-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s0
-; GFX1250-NEXT: s_lshr_b32 s0, s1, 16
-; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1)
-; GFX1250-NEXT: v_cndmask_b32_e32 v1, s0, v2, vcc_lo
-; GFX1250-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1250-NEXT: v_cndmask_b32_e32 v0, s1, v3, vcc_lo
-; GFX1250-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
-; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX1250-NEXT: v_readfirstlane_b32 s0, v0
-; GFX1250-NEXT: ; return to shader part epilog
+; GFX1250TRUE16-LABEL: s_vselect_v2bf16:
+; GFX1250TRUE16: ; %bb.0:
+; GFX1250TRUE16-NEXT: s_lshr_b32 s3, s0, 16
+; GFX1250TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX1250TRUE16-NEXT: v_cmp_eq_u32_e64 s2, 0, v1
+; GFX1250TRUE16-NEXT: v_mov_b16_e32 v0.l, s3
+; GFX1250TRUE16-NEXT: v_mov_b16_e32 v0.h, s0
+; GFX1250TRUE16-NEXT: s_lshr_b32 s0, s1, 16
+; GFX1250TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instid1(SALU_CYCLE_1)
+; GFX1250TRUE16-NEXT: v_cndmask_b16 v1.h, s0, v0.l, s2
+; GFX1250TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1250TRUE16-NEXT: v_cndmask_b16 v1.l, s1, v0.h, vcc_lo
+; GFX1250TRUE16-NEXT: v_readfirstlane_b32 s0, v1
+; GFX1250TRUE16-NEXT: ; return to shader part epilog
+;
+; GFX1250FAKE16-LABEL: s_vselect_v2bf16:
+; GFX1250FAKE16: ; %bb.0:
+; GFX1250FAKE16-NEXT: s_lshr_b32 s2, s0, 16
+; GFX1250FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX1250FAKE16-NEXT: v_dual_mov_b32 v2, s2 :: v_dual_mov_b32 v3, s0
+; GFX1250FAKE16-NEXT: s_lshr_b32 s0, s1, 16
+; GFX1250FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instid1(SALU_CYCLE_1)
+; GFX1250FAKE16-NEXT: v_cndmask_b32_e32 v1, s0, v2, vcc_lo
+; GFX1250FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX1250FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1250FAKE16-NEXT: v_cndmask_b32_e32 v0, s1, v3, vcc_lo
+; GFX1250FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
+; GFX1250FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1250FAKE16-NEXT: v_readfirstlane_b32 s0, v0
+; GFX1250FAKE16-NEXT: ; return to shader part epilog
%cond = icmp eq <2 x i32> %c, zeroinitializer
%op = select <2 x i1> %cond, <2 x bfloat> %a, <2 x bfloat> %b
%cast = bitcast <2 x bfloat> %op to i32
@@ -45557,32 +46568,55 @@ define amdgpu_ps <2 x i32> @s_vselect_v4bf16(<4 x bfloat> inreg %a, <4 x bfloat>
; GFX11FAKE16-NEXT: v_readfirstlane_b32 s1, v1
; GFX11FAKE16-NEXT: ; return to shader part epilog
;
-; GFX1250-LABEL: s_vselect_v4bf16:
-; GFX1250: ; %bb.0:
-; GFX1250-NEXT: s_lshr_b32 s4, s1, 16
-; GFX1250-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v3
-; GFX1250-NEXT: v_dual_mov_b32 v4, s4 :: v_dual_mov_b32 v5, s1
-; GFX1250-NEXT: s_lshr_b32 s4, s3, 16
-; GFX1250-NEXT: s_lshr_b32 s5, s0, 16
-; GFX1250-NEXT: v_mov_b32_e32 v6, s0
-; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_2)
-; GFX1250-NEXT: v_cndmask_b32_e32 v3, s4, v4, vcc_lo
-; GFX1250-NEXT: v_mov_b32_e32 v4, s5
-; GFX1250-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX1250-NEXT: s_lshr_b32 s0, s2, 16
-; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_2) | instid1(SALU_CYCLE_1)
-; GFX1250-NEXT: v_cndmask_b32_e32 v1, s0, v4, vcc_lo
-; GFX1250-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX1250-NEXT: v_cndmask_b32_e32 v0, s2, v6, vcc_lo
-; GFX1250-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v2
-; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
-; GFX1250-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
-; GFX1250-NEXT: v_cndmask_b32_e32 v2, s3, v5, vcc_lo
-; GFX1250-NEXT: v_readfirstlane_b32 s0, v0
-; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1250-NEXT: v_perm_b32 v1, v3, v2, 0x5040100
-; GFX1250-NEXT: v_readfirstlane_b32 s1, v1
-; GFX1250-NEXT: ; return to shader part epilog
+; GFX1250TRUE16-LABEL: s_vselect_v4bf16:
+; GFX1250TRUE16: ; %bb.0:
+; GFX1250TRUE16-NEXT: s_lshr_b32 s7, s1, 16
+; GFX1250TRUE16-NEXT: s_lshr_b32 s9, s0, 16
+; GFX1250TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX1250TRUE16-NEXT: v_cmp_eq_u32_e64 s4, 0, v1
+; GFX1250TRUE16-NEXT: v_cmp_eq_u32_e64 s5, 0, v2
+; GFX1250TRUE16-NEXT: v_cmp_eq_u32_e64 s6, 0, v3
+; GFX1250TRUE16-NEXT: v_mov_b16_e32 v0.l, s7
+; GFX1250TRUE16-NEXT: v_mov_b16_e32 v0.h, s9
+; GFX1250TRUE16-NEXT: v_mov_b16_e32 v1.l, s0
+; GFX1250TRUE16-NEXT: v_mov_b16_e32 v1.h, s1
+; GFX1250TRUE16-NEXT: s_lshr_b32 s8, s3, 16
+; GFX1250TRUE16-NEXT: s_lshr_b32 s0, s2, 16
+; GFX1250TRUE16-NEXT: v_cndmask_b16 v2.h, s8, v0.l, s6
+; GFX1250TRUE16-NEXT: v_cndmask_b16 v0.h, s0, v0.h, s4
+; GFX1250TRUE16-NEXT: v_cndmask_b16 v0.l, s2, v1.l, vcc_lo
+; GFX1250TRUE16-NEXT: v_cndmask_b16 v2.l, s3, v1.h, s5
+; GFX1250TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX1250TRUE16-NEXT: v_readfirstlane_b32 s0, v0
+; GFX1250TRUE16-NEXT: v_readfirstlane_b32 s1, v2
+; GFX1250TRUE16-NEXT: ; return to shader part epilog
+;
+; GFX1250FAKE16-LABEL: s_vselect_v4bf16:
+; GFX1250FAKE16: ; %bb.0:
+; GFX1250FAKE16-NEXT: s_lshr_b32 s4, s1, 16
+; GFX1250FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v3
+; GFX1250FAKE16-NEXT: v_dual_mov_b32 v4, s4 :: v_dual_mov_b32 v5, s1
+; GFX1250FAKE16-NEXT: s_lshr_b32 s4, s3, 16
+; GFX1250FAKE16-NEXT: s_lshr_b32 s5, s0, 16
+; GFX1250FAKE16-NEXT: v_mov_b32_e32 v6, s0
+; GFX1250FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2)
+; GFX1250FAKE16-NEXT: v_cndmask_b32_e32 v3, s4, v4, vcc_lo
+; GFX1250FAKE16-NEXT: v_mov_b32_e32 v4, s5
+; GFX1250FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX1250FAKE16-NEXT: s_lshr_b32 s0, s2, 16
+; GFX1250FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instid1(SALU_CYCLE_1)
+; GFX1250FAKE16-NEXT: v_cndmask_b32_e32 v1, s0, v4, vcc_lo
+; GFX1250FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX1250FAKE16-NEXT: v_cndmask_b32_e32 v0, s2, v6, vcc_lo
+; GFX1250FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v2
+; GFX1250FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX1250FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
+; GFX1250FAKE16-NEXT: v_cndmask_b32_e32 v2, s3, v5, vcc_lo
+; GFX1250FAKE16-NEXT: v_readfirstlane_b32 s0, v0
+; GFX1250FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1250FAKE16-NEXT: v_perm_b32 v1, v3, v2, 0x5040100
+; GFX1250FAKE16-NEXT: v_readfirstlane_b32 s1, v1
+; GFX1250FAKE16-NEXT: ; return to shader part epilog
%cond = icmp eq <4 x i32> %c, zeroinitializer
%op = select <4 x i1> %cond, <4 x bfloat> %a, <4 x bfloat> %b
%cast = bitcast <4 x bfloat> %op to <2 x i32>
@@ -45787,27 +46821,49 @@ define <4 x bfloat> @v_vselect_v4bf16(<4 x i1> %cond, <4 x bfloat> %a, <4 x bflo
; GFX11FAKE16-NEXT: v_perm_b32 v1, v3, v2, 0x5040100
; GFX11FAKE16-NEXT: s_setpc_b64 s[30:31]
;
-; GFX1250-LABEL: v_vselect_v4bf16:
-; GFX1250: ; %bb.0:
-; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-NEXT: s_wait_kmcnt 0x0
-; GFX1250-NEXT: v_and_b32_e32 v2, 1, v2
-; GFX1250-NEXT: v_dual_lshrrev_b32 v8, 16, v4 :: v_dual_bitop2_b32 v1, 1, v1 bitop3:0x40
-; GFX1250-NEXT: v_dual_lshrrev_b32 v9, 16, v6 :: v_dual_bitop2_b32 v3, 1, v3 bitop3:0x40
-; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_1)
-; GFX1250-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v2
-; GFX1250-NEXT: v_dual_cndmask_b32 v2, v7, v5, vcc_lo :: v_dual_bitop2_b32 v0, 1, v0 bitop3:0x40
-; GFX1250-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
-; GFX1250-NEXT: v_dual_lshrrev_b32 v7, 16, v7 :: v_dual_lshrrev_b32 v5, 16, v5
-; GFX1250-NEXT: v_cndmask_b32_e32 v0, v6, v4, vcc_lo
-; GFX1250-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v1
-; GFX1250-NEXT: v_cndmask_b32_e32 v1, v9, v8, vcc_lo
-; GFX1250-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v3
-; GFX1250-NEXT: v_cndmask_b32_e32 v3, v7, v5, vcc_lo
-; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX1250-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
-; GFX1250-NEXT: v_perm_b32 v1, v3, v2, 0x5040100
-; GFX1250-NEXT: s_set_pc_i64 s[30:31]
+; GFX1250TRUE16-LABEL: v_vselect_v4bf16:
+; GFX1250TRUE16: ; %bb.0:
+; GFX1250TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250TRUE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250TRUE16-NEXT: v_and_b16 v0.h, 1, v2.l
+; GFX1250TRUE16-NEXT: v_and_b16 v0.l, 1, v0.l
+; GFX1250TRUE16-NEXT: v_and_b16 v1.l, 1, v1.l
+; GFX1250TRUE16-NEXT: v_and_b16 v1.h, 1, v3.l
+; GFX1250TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
+; GFX1250TRUE16-NEXT: v_cmp_eq_u16_e32 vcc_lo, 1, v0.h
+; GFX1250TRUE16-NEXT: v_cmp_eq_u16_e64 s0, 1, v0.l
+; GFX1250TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
+; GFX1250TRUE16-NEXT: v_cmp_eq_u16_e64 s1, 1, v1.l
+; GFX1250TRUE16-NEXT: v_cmp_eq_u16_e64 s2, 1, v1.h
+; GFX1250TRUE16-NEXT: v_cndmask_b16 v1.l, v7.l, v5.l, vcc_lo
+; GFX1250TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
+; GFX1250TRUE16-NEXT: v_cndmask_b16 v0.l, v6.l, v4.l, s0
+; GFX1250TRUE16-NEXT: v_cndmask_b16 v0.h, v6.h, v4.h, s1
+; GFX1250TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4)
+; GFX1250TRUE16-NEXT: v_cndmask_b16 v1.h, v7.h, v5.h, s2
+; GFX1250TRUE16-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX1250FAKE16-LABEL: v_vselect_v4bf16:
+; GFX1250FAKE16: ; %bb.0:
+; GFX1250FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250FAKE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250FAKE16-NEXT: v_and_b32_e32 v2, 1, v2
+; GFX1250FAKE16-NEXT: v_dual_lshrrev_b32 v8, 16, v4 :: v_dual_bitop2_b32 v1, 1, v1 bitop3:0x40
+; GFX1250FAKE16-NEXT: v_dual_lshrrev_b32 v9, 16, v6 :: v_dual_bitop2_b32 v3, 1, v3 bitop3:0x40
+; GFX1250FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_1)
+; GFX1250FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v2
+; GFX1250FAKE16-NEXT: v_dual_cndmask_b32 v2, v7, v5, vcc_lo :: v_dual_bitop2_b32 v0, 1, v0 bitop3:0x40
+; GFX1250FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
+; GFX1250FAKE16-NEXT: v_dual_lshrrev_b32 v7, 16, v7 :: v_dual_lshrrev_b32 v5, 16, v5
+; GFX1250FAKE16-NEXT: v_cndmask_b32_e32 v0, v6, v4, vcc_lo
+; GFX1250FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v1
+; GFX1250FAKE16-NEXT: v_cndmask_b32_e32 v1, v9, v8, vcc_lo
+; GFX1250FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v3
+; GFX1250FAKE16-NEXT: v_cndmask_b32_e32 v3, v7, v5, vcc_lo
+; GFX1250FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX1250FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
+; GFX1250FAKE16-NEXT: v_perm_b32 v1, v3, v2, 0x5040100
+; GFX1250FAKE16-NEXT: s_set_pc_i64 s[30:31]
%op = select <4 x i1> %cond, <4 x bfloat> %a, <4 x bfloat> %b
ret <4 x bfloat> %op
}
@@ -46161,45 +47217,77 @@ define <8 x bfloat> @v_vselect_v8bf16(<8 x i1> %cond, <8 x bfloat> %a, <8 x bflo
; GFX11FAKE16-NEXT: v_perm_b32 v3, v7, v6, 0x5040100
; GFX11FAKE16-NEXT: s_setpc_b64 s[30:31]
;
-; GFX1250-LABEL: v_vselect_v8bf16:
-; GFX1250: ; %bb.0:
-; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-NEXT: s_wait_kmcnt 0x0
-; GFX1250-NEXT: v_and_b32_e32 v6, 1, v6
-; GFX1250-NEXT: v_and_b32_e32 v4, 1, v4
-; GFX1250-NEXT: v_dual_lshrrev_b32 v17, 16, v14 :: v_dual_bitop2_b32 v5, 1, v5 bitop3:0x40
-; GFX1250-NEXT: v_dual_lshrrev_b32 v16, 16, v10 :: v_dual_bitop2_b32 v3, 1, v3 bitop3:0x40
-; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_4)
-; GFX1250-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v6
-; GFX1250-NEXT: v_and_b32_e32 v1, 1, v1
-; GFX1250-NEXT: v_dual_cndmask_b32 v6, v15, v11, vcc_lo :: v_dual_bitop2_b32 v0, 1, v0 bitop3:0x40
-; GFX1250-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v4
-; GFX1250-NEXT: v_and_b32_e32 v7, 1, v7
-; GFX1250-NEXT: v_lshrrev_b32_e32 v11, 16, v11
-; GFX1250-NEXT: v_dual_cndmask_b32 v4, v14, v10 :: v_dual_lshrrev_b32 v15, 16, v15
-; GFX1250-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v5
-; GFX1250-NEXT: v_dual_lshrrev_b32 v14, 16, v12 :: v_dual_bitop2_b32 v2, 1, v2 bitop3:0x40
-; GFX1250-NEXT: v_lshrrev_b32_e32 v10, 16, v8
-; GFX1250-NEXT: v_cndmask_b32_e32 v5, v17, v16, vcc_lo
-; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_3)
-; GFX1250-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v2
-; GFX1250-NEXT: v_cndmask_b32_e32 v2, v13, v9, vcc_lo
-; GFX1250-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
-; GFX1250-NEXT: v_lshrrev_b32_e32 v9, 16, v9
-; GFX1250-NEXT: v_dual_cndmask_b32 v0, v12, v8 :: v_dual_lshrrev_b32 v13, 16, v13
-; GFX1250-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v1
-; GFX1250-NEXT: v_cndmask_b32_e32 v1, v14, v10, vcc_lo
-; GFX1250-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v3
-; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_3) | instid1(VALU_DEP_3)
-; GFX1250-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
-; GFX1250-NEXT: v_cndmask_b32_e32 v3, v13, v9, vcc_lo
-; GFX1250-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v7
-; GFX1250-NEXT: v_cndmask_b32_e32 v7, v15, v11, vcc_lo
-; GFX1250-NEXT: v_perm_b32 v1, v3, v2, 0x5040100
-; GFX1250-NEXT: v_perm_b32 v2, v5, v4, 0x5040100
-; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_3)
-; GFX1250-NEXT: v_perm_b32 v3, v7, v6, 0x5040100
-; GFX1250-NEXT: s_set_pc_i64 s[30:31]
+; GFX1250TRUE16-LABEL: v_vselect_v8bf16:
+; GFX1250TRUE16: ; %bb.0:
+; GFX1250TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250TRUE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250TRUE16-NEXT: v_and_b16 v0.h, 1, v1.l
+; GFX1250TRUE16-NEXT: v_and_b16 v0.l, 1, v0.l
+; GFX1250TRUE16-NEXT: v_and_b16 v1.l, 1, v3.l
+; GFX1250TRUE16-NEXT: v_and_b16 v1.h, 1, v5.l
+; GFX1250TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(NEXT) | instid1(VALU_DEP_4)
+; GFX1250TRUE16-NEXT: v_cmp_eq_u16_e32 vcc_lo, 1, v0.h
+; GFX1250TRUE16-NEXT: v_cmp_eq_u16_e64 s0, 1, v0.l
+; GFX1250TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_4)
+; GFX1250TRUE16-NEXT: v_cmp_eq_u16_e64 s1, 1, v1.l
+; GFX1250TRUE16-NEXT: v_and_b16 v0.l, 1, v6.l
+; GFX1250TRUE16-NEXT: v_and_b16 v0.h, 1, v4.l
+; GFX1250TRUE16-NEXT: v_and_b16 v1.l, 1, v2.l
+; GFX1250TRUE16-NEXT: v_and_b16 v2.l, 1, v7.l
+; GFX1250TRUE16-NEXT: v_cmp_eq_u16_e64 s5, 1, v1.h
+; GFX1250TRUE16-NEXT: v_cmp_eq_u16_e64 s2, 1, v0.l
+; GFX1250TRUE16-NEXT: v_cmp_eq_u16_e64 s3, 1, v0.h
+; GFX1250TRUE16-NEXT: v_cmp_eq_u16_e64 s4, 1, v1.l
+; GFX1250TRUE16-NEXT: v_cmp_eq_u16_e64 s6, 1, v2.l
+; GFX1250TRUE16-NEXT: v_cndmask_b16 v0.l, v12.l, v8.l, s0
+; GFX1250TRUE16-NEXT: v_cndmask_b16 v3.l, v15.l, v11.l, s2
+; GFX1250TRUE16-NEXT: v_cndmask_b16 v2.l, v14.l, v10.l, s3
+; GFX1250TRUE16-NEXT: v_cndmask_b16 v1.l, v13.l, v9.l, s4
+; GFX1250TRUE16-NEXT: v_cndmask_b16 v0.h, v12.h, v8.h, vcc_lo
+; GFX1250TRUE16-NEXT: v_cndmask_b16 v1.h, v13.h, v9.h, s1
+; GFX1250TRUE16-NEXT: v_cndmask_b16 v2.h, v14.h, v10.h, s5
+; GFX1250TRUE16-NEXT: v_cndmask_b16 v3.h, v15.h, v11.h, s6
+; GFX1250TRUE16-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX1250FAKE16-LABEL: v_vselect_v8bf16:
+; GFX1250FAKE16: ; %bb.0:
+; GFX1250FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250FAKE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250FAKE16-NEXT: v_and_b32_e32 v6, 1, v6
+; GFX1250FAKE16-NEXT: v_and_b32_e32 v4, 1, v4
+; GFX1250FAKE16-NEXT: v_dual_lshrrev_b32 v17, 16, v14 :: v_dual_bitop2_b32 v5, 1, v5 bitop3:0x40
+; GFX1250FAKE16-NEXT: v_dual_lshrrev_b32 v16, 16, v10 :: v_dual_bitop2_b32 v3, 1, v3 bitop3:0x40
+; GFX1250FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4)
+; GFX1250FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v6
+; GFX1250FAKE16-NEXT: v_and_b32_e32 v1, 1, v1
+; GFX1250FAKE16-NEXT: v_dual_cndmask_b32 v6, v15, v11, vcc_lo :: v_dual_bitop2_b32 v0, 1, v0 bitop3:0x40
+; GFX1250FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v4
+; GFX1250FAKE16-NEXT: v_and_b32_e32 v7, 1, v7
+; GFX1250FAKE16-NEXT: v_lshrrev_b32_e32 v11, 16, v11
+; GFX1250FAKE16-NEXT: v_dual_cndmask_b32 v4, v14, v10 :: v_dual_lshrrev_b32 v15, 16, v15
+; GFX1250FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v5
+; GFX1250FAKE16-NEXT: v_dual_lshrrev_b32 v14, 16, v12 :: v_dual_bitop2_b32 v2, 1, v2 bitop3:0x40
+; GFX1250FAKE16-NEXT: v_lshrrev_b32_e32 v10, 16, v8
+; GFX1250FAKE16-NEXT: v_cndmask_b32_e32 v5, v17, v16, vcc_lo
+; GFX1250FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3)
+; GFX1250FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v2
+; GFX1250FAKE16-NEXT: v_cndmask_b32_e32 v2, v13, v9, vcc_lo
+; GFX1250FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
+; GFX1250FAKE16-NEXT: v_lshrrev_b32_e32 v9, 16, v9
+; GFX1250FAKE16-NEXT: v_dual_cndmask_b32 v0, v12, v8 :: v_dual_lshrrev_b32 v13, 16, v13
+; GFX1250FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v1
+; GFX1250FAKE16-NEXT: v_cndmask_b32_e32 v1, v14, v10, vcc_lo
+; GFX1250FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v3
+; GFX1250FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_3) | instid1(VALU_DEP_3)
+; GFX1250FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
+; GFX1250FAKE16-NEXT: v_cndmask_b32_e32 v3, v13, v9, vcc_lo
+; GFX1250FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v7
+; GFX1250FAKE16-NEXT: v_cndmask_b32_e32 v7, v15, v11, vcc_lo
+; GFX1250FAKE16-NEXT: v_perm_b32 v1, v3, v2, 0x5040100
+; GFX1250FAKE16-NEXT: v_perm_b32 v2, v5, v4, 0x5040100
+; GFX1250FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3)
+; GFX1250FAKE16-NEXT: v_perm_b32 v3, v7, v6, 0x5040100
+; GFX1250FAKE16-NEXT: s_set_pc_i64 s[30:31]
%op = select <8 x i1> %cond, <8 x bfloat> %a, <8 x bfloat> %b
ret <8 x bfloat> %op
}
@@ -46939,73 +48027,129 @@ define <16 x bfloat> @v_vselect_v16bf16(<16 x i1> %cond, <16 x bfloat> %a, <16 x
; GFX11FAKE16-NEXT: v_perm_b32 v7, v15, v14, 0x5040100
; GFX11FAKE16-NEXT: s_setpc_b64 s[30:31]
;
-; GFX1250-LABEL: v_vselect_v16bf16:
-; GFX1250: ; %bb.0:
-; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-NEXT: s_wait_kmcnt 0x0
-; GFX1250-NEXT: scratch_load_b32 v31, off, s32
-; GFX1250-NEXT: v_dual_lshrrev_b32 v52, 16, v25 :: v_dual_bitop2_b32 v12, 1, v12 bitop3:0x40
-; GFX1250-NEXT: v_dual_lshrrev_b32 v53, 16, v16 :: v_dual_bitop2_b32 v13, 1, v13 bitop3:0x40
-; GFX1250-NEXT: v_dual_lshrrev_b32 v33, 16, v22 :: v_dual_bitop2_b32 v0, 1, v0 bitop3:0x40
-; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_3)
-; GFX1250-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v12
-; GFX1250-NEXT: v_dual_lshrrev_b32 v34, 16, v30 :: v_dual_bitop2_b32 v3, 1, v3 bitop3:0x40
-; GFX1250-NEXT: v_dual_lshrrev_b32 v51, 16, v17 :: v_dual_bitop2_b32 v10, 1, v10 bitop3:0x40
-; GFX1250-NEXT: v_cndmask_b32_e32 v12, v30, v22, vcc_lo
-; GFX1250-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v13
-; GFX1250-NEXT: v_dual_lshrrev_b32 v50, 16, v26 :: v_dual_bitop2_b32 v11, 1, v11 bitop3:0x40
-; GFX1250-NEXT: v_and_b32_e32 v14, 1, v14
-; GFX1250-NEXT: v_dual_lshrrev_b32 v35, 16, v21 :: v_dual_bitop2_b32 v2, 1, v2 bitop3:0x40
-; GFX1250-NEXT: v_cndmask_b32_e32 v13, v34, v33, vcc_lo
-; GFX1250-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v10
-; GFX1250-NEXT: v_dual_lshrrev_b32 v36, 16, v29 :: v_dual_bitop2_b32 v4, 1, v4 bitop3:0x40
-; GFX1250-NEXT: v_dual_lshrrev_b32 v49, 16, v18 :: v_dual_bitop2_b32 v8, 1, v8 bitop3:0x40
-; GFX1250-NEXT: v_cndmask_b32_e32 v10, v29, v21, vcc_lo
-; GFX1250-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v11
-; GFX1250-NEXT: v_dual_lshrrev_b32 v37, 16, v20 :: v_dual_bitop2_b32 v5, 1, v5 bitop3:0x40
-; GFX1250-NEXT: v_dual_lshrrev_b32 v38, 16, v28 :: v_dual_bitop2_b32 v7, 1, v7 bitop3:0x40
-; GFX1250-NEXT: v_dual_lshrrev_b32 v48, 16, v27 :: v_dual_bitop2_b32 v9, 1, v9 bitop3:0x40
-; GFX1250-NEXT: v_cndmask_b32_e32 v11, v36, v35, vcc_lo
-; GFX1250-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v8
-; GFX1250-NEXT: v_dual_lshrrev_b32 v39, 16, v19 :: v_dual_bitop2_b32 v6, 1, v6 bitop3:0x40
-; GFX1250-NEXT: v_dual_lshrrev_b32 v32, 16, v23 :: v_dual_bitop2_b32 v1, 1, v1 bitop3:0x40
-; GFX1250-NEXT: v_cndmask_b32_e32 v8, v28, v20, vcc_lo
-; GFX1250-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v9
-; GFX1250-NEXT: v_dual_lshrrev_b32 v54, 16, v24 :: v_dual_bitop2_b32 v15, 1, v15 bitop3:0x40
-; GFX1250-NEXT: v_cndmask_b32_e32 v9, v38, v37, vcc_lo
-; GFX1250-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v6
-; GFX1250-NEXT: v_cndmask_b32_e32 v6, v27, v19, vcc_lo
-; GFX1250-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v4
-; GFX1250-NEXT: v_cndmask_b32_e32 v4, v26, v18, vcc_lo
-; GFX1250-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v2
-; GFX1250-NEXT: v_cndmask_b32_e32 v2, v25, v17, vcc_lo
-; GFX1250-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v3
-; GFX1250-NEXT: v_cndmask_b32_e32 v3, v52, v51, vcc_lo
-; GFX1250-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
-; GFX1250-NEXT: v_cndmask_b32_e32 v0, v24, v16, vcc_lo
-; GFX1250-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v1
-; GFX1250-NEXT: v_cndmask_b32_e32 v1, v54, v53, vcc_lo
-; GFX1250-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v5
-; GFX1250-NEXT: v_cndmask_b32_e32 v5, v50, v49, vcc_lo
-; GFX1250-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v7
-; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_4)
-; GFX1250-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
-; GFX1250-NEXT: v_perm_b32 v1, v3, v2, 0x5040100
-; GFX1250-NEXT: v_perm_b32 v2, v5, v4, 0x5040100
-; GFX1250-NEXT: v_perm_b32 v4, v9, v8, 0x5040100
-; GFX1250-NEXT: v_perm_b32 v5, v11, v10, 0x5040100
-; GFX1250-NEXT: s_wait_loadcnt 0x0
-; GFX1250-NEXT: v_lshrrev_b32_e32 v3, 16, v31
-; GFX1250-NEXT: v_cndmask_b32_e32 v7, v48, v39, vcc_lo
-; GFX1250-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v14
-; GFX1250-NEXT: v_cndmask_b32_e32 v14, v31, v23, vcc_lo
-; GFX1250-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v15
-; GFX1250-NEXT: v_cndmask_b32_e32 v15, v3, v32, vcc_lo
-; GFX1250-NEXT: v_perm_b32 v3, v7, v6, 0x5040100
-; GFX1250-NEXT: v_perm_b32 v6, v13, v12, 0x5040100
-; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_3)
-; GFX1250-NEXT: v_perm_b32 v7, v15, v14, 0x5040100
-; GFX1250-NEXT: s_set_pc_i64 s[30:31]
+; GFX1250TRUE16-LABEL: v_vselect_v16bf16:
+; GFX1250TRUE16: ; %bb.0:
+; GFX1250TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250TRUE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250TRUE16-NEXT: scratch_load_b32 v31, off, s32
+; GFX1250TRUE16-NEXT: v_and_b16 v0.h, 1, v1.l
+; GFX1250TRUE16-NEXT: v_and_b16 v0.l, 1, v0.l
+; GFX1250TRUE16-NEXT: v_and_b16 v1.l, 1, v3.l
+; GFX1250TRUE16-NEXT: v_and_b16 v1.h, 1, v2.l
+; GFX1250TRUE16-NEXT: v_and_b16 v2.l, 1, v5.l
+; GFX1250TRUE16-NEXT: v_and_b16 v2.h, 1, v4.l
+; GFX1250TRUE16-NEXT: v_and_b16 v3.l, 1, v7.l
+; GFX1250TRUE16-NEXT: v_and_b16 v3.h, 1, v6.l
+; GFX1250TRUE16-NEXT: v_and_b16 v4.l, 1, v9.l
+; GFX1250TRUE16-NEXT: v_and_b16 v4.h, 1, v8.l
+; GFX1250TRUE16-NEXT: v_and_b16 v5.l, 1, v11.l
+; GFX1250TRUE16-NEXT: v_and_b16 v5.h, 1, v10.l
+; GFX1250TRUE16-NEXT: v_and_b16 v6.l, 1, v13.l
+; GFX1250TRUE16-NEXT: v_and_b16 v6.h, 1, v12.l
+; GFX1250TRUE16-NEXT: v_and_b16 v7.l, 1, v15.l
+; GFX1250TRUE16-NEXT: v_and_b16 v7.h, 1, v14.l
+; GFX1250TRUE16-NEXT: v_cmp_eq_u16_e32 vcc_lo, 1, v0.h
+; GFX1250TRUE16-NEXT: v_cmp_eq_u16_e64 s0, 1, v0.l
+; GFX1250TRUE16-NEXT: v_cmp_eq_u16_e64 s1, 1, v1.l
+; GFX1250TRUE16-NEXT: v_cmp_eq_u16_e64 s2, 1, v1.h
+; GFX1250TRUE16-NEXT: v_cmp_eq_u16_e64 s3, 1, v2.l
+; GFX1250TRUE16-NEXT: v_cmp_eq_u16_e64 s4, 1, v2.h
+; GFX1250TRUE16-NEXT: v_cmp_eq_u16_e64 s5, 1, v3.l
+; GFX1250TRUE16-NEXT: v_cmp_eq_u16_e64 s6, 1, v3.h
+; GFX1250TRUE16-NEXT: v_cmp_eq_u16_e64 s7, 1, v4.l
+; GFX1250TRUE16-NEXT: v_cmp_eq_u16_e64 s8, 1, v4.h
+; GFX1250TRUE16-NEXT: v_cmp_eq_u16_e64 s9, 1, v5.l
+; GFX1250TRUE16-NEXT: v_cmp_eq_u16_e64 s10, 1, v6.h
+; GFX1250TRUE16-NEXT: v_cmp_eq_u16_e64 s11, 1, v6.l
+; GFX1250TRUE16-NEXT: v_cmp_eq_u16_e64 s12, 1, v5.h
+; GFX1250TRUE16-NEXT: v_cmp_eq_u16_e64 s13, 1, v7.l
+; GFX1250TRUE16-NEXT: v_cmp_eq_u16_e64 s14, 1, v7.h
+; GFX1250TRUE16-NEXT: v_cndmask_b16 v6.l, v30.l, v22.l, s10
+; GFX1250TRUE16-NEXT: v_cndmask_b16 v6.h, v30.h, v22.h, s11
+; GFX1250TRUE16-NEXT: v_cndmask_b16 v5.l, v29.l, v21.l, s12
+; GFX1250TRUE16-NEXT: v_cndmask_b16 v5.h, v29.h, v21.h, s9
+; GFX1250TRUE16-NEXT: v_cndmask_b16 v4.l, v28.l, v20.l, s8
+; GFX1250TRUE16-NEXT: v_cndmask_b16 v4.h, v28.h, v20.h, s7
+; GFX1250TRUE16-NEXT: v_cndmask_b16 v3.l, v27.l, v19.l, s6
+; GFX1250TRUE16-NEXT: v_cndmask_b16 v3.h, v27.h, v19.h, s5
+; GFX1250TRUE16-NEXT: v_cndmask_b16 v2.l, v26.l, v18.l, s4
+; GFX1250TRUE16-NEXT: v_cndmask_b16 v1.l, v25.l, v17.l, s2
+; GFX1250TRUE16-NEXT: v_cndmask_b16 v0.l, v24.l, v16.l, s0
+; GFX1250TRUE16-NEXT: v_cndmask_b16 v0.h, v24.h, v16.h, vcc_lo
+; GFX1250TRUE16-NEXT: v_cndmask_b16 v1.h, v25.h, v17.h, s1
+; GFX1250TRUE16-NEXT: v_cndmask_b16 v2.h, v26.h, v18.h, s3
+; GFX1250TRUE16-NEXT: s_wait_loadcnt 0x0
+; GFX1250TRUE16-NEXT: v_cndmask_b16 v7.l, v31.l, v23.l, s14
+; GFX1250TRUE16-NEXT: v_cndmask_b16 v7.h, v31.h, v23.h, s13
+; GFX1250TRUE16-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX1250FAKE16-LABEL: v_vselect_v16bf16:
+; GFX1250FAKE16: ; %bb.0:
+; GFX1250FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250FAKE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250FAKE16-NEXT: scratch_load_b32 v31, off, s32
+; GFX1250FAKE16-NEXT: v_dual_lshrrev_b32 v52, 16, v25 :: v_dual_bitop2_b32 v12, 1, v12 bitop3:0x40
+; GFX1250FAKE16-NEXT: v_dual_lshrrev_b32 v53, 16, v16 :: v_dual_bitop2_b32 v13, 1, v13 bitop3:0x40
+; GFX1250FAKE16-NEXT: v_dual_lshrrev_b32 v33, 16, v22 :: v_dual_bitop2_b32 v0, 1, v0 bitop3:0x40
+; GFX1250FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3)
+; GFX1250FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v12
+; GFX1250FAKE16-NEXT: v_dual_lshrrev_b32 v34, 16, v30 :: v_dual_bitop2_b32 v3, 1, v3 bitop3:0x40
+; GFX1250FAKE16-NEXT: v_dual_lshrrev_b32 v51, 16, v17 :: v_dual_bitop2_b32 v10, 1, v10 bitop3:0x40
+; GFX1250FAKE16-NEXT: v_cndmask_b32_e32 v12, v30, v22, vcc_lo
+; GFX1250FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v13
+; GFX1250FAKE16-NEXT: v_dual_lshrrev_b32 v50, 16, v26 :: v_dual_bitop2_b32 v11, 1, v11 bitop3:0x40
+; GFX1250FAKE16-NEXT: v_and_b32_e32 v14, 1, v14
+; GFX1250FAKE16-NEXT: v_dual_lshrrev_b32 v35, 16, v21 :: v_dual_bitop2_b32 v2, 1, v2 bitop3:0x40
+; GFX1250FAKE16-NEXT: v_cndmask_b32_e32 v13, v34, v33, vcc_lo
+; GFX1250FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v10
+; GFX1250FAKE16-NEXT: v_dual_lshrrev_b32 v36, 16, v29 :: v_dual_bitop2_b32 v4, 1, v4 bitop3:0x40
+; GFX1250FAKE16-NEXT: v_dual_lshrrev_b32 v49, 16, v18 :: v_dual_bitop2_b32 v8, 1, v8 bitop3:0x40
+; GFX1250FAKE16-NEXT: v_cndmask_b32_e32 v10, v29, v21, vcc_lo
+; GFX1250FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v11
+; GFX1250FAKE16-NEXT: v_dual_lshrrev_b32 v37, 16, v20 :: v_dual_bitop2_b32 v5, 1, v5 bitop3:0x40
+; GFX1250FAKE16-NEXT: v_dual_lshrrev_b32 v38, 16, v28 :: v_dual_bitop2_b32 v7, 1, v7 bitop3:0x40
+; GFX1250FAKE16-NEXT: v_dual_lshrrev_b32 v48, 16, v27 :: v_dual_bitop2_b32 v9, 1, v9 bitop3:0x40
+; GFX1250FAKE16-NEXT: v_cndmask_b32_e32 v11, v36, v35, vcc_lo
+; GFX1250FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v8
+; GFX1250FAKE16-NEXT: v_dual_lshrrev_b32 v39, 16, v19 :: v_dual_bitop2_b32 v6, 1, v6 bitop3:0x40
+; GFX1250FAKE16-NEXT: v_dual_lshrrev_b32 v32, 16, v23 :: v_dual_bitop2_b32 v1, 1, v1 bitop3:0x40
+; GFX1250FAKE16-NEXT: v_cndmask_b32_e32 v8, v28, v20, vcc_lo
+; GFX1250FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v9
+; GFX1250FAKE16-NEXT: v_dual_lshrrev_b32 v54, 16, v24 :: v_dual_bitop2_b32 v15, 1, v15 bitop3:0x40
+; GFX1250FAKE16-NEXT: v_cndmask_b32_e32 v9, v38, v37, vcc_lo
+; GFX1250FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v6
+; GFX1250FAKE16-NEXT: v_cndmask_b32_e32 v6, v27, v19, vcc_lo
+; GFX1250FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v4
+; GFX1250FAKE16-NEXT: v_cndmask_b32_e32 v4, v26, v18, vcc_lo
+; GFX1250FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v2
+; GFX1250FAKE16-NEXT: v_cndmask_b32_e32 v2, v25, v17, vcc_lo
+; GFX1250FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v3
+; GFX1250FAKE16-NEXT: v_cndmask_b32_e32 v3, v52, v51, vcc_lo
+; GFX1250FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
+; GFX1250FAKE16-NEXT: v_cndmask_b32_e32 v0, v24, v16, vcc_lo
+; GFX1250FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v1
+; GFX1250FAKE16-NEXT: v_cndmask_b32_e32 v1, v54, v53, vcc_lo
+; GFX1250FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v5
+; GFX1250FAKE16-NEXT: v_cndmask_b32_e32 v5, v50, v49, vcc_lo
+; GFX1250FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v7
+; GFX1250FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_4) | instskip(SKIP_1) | instid1(VALU_DEP_4)
+; GFX1250FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
+; GFX1250FAKE16-NEXT: v_perm_b32 v1, v3, v2, 0x5040100
+; GFX1250FAKE16-NEXT: v_perm_b32 v2, v5, v4, 0x5040100
+; GFX1250FAKE16-NEXT: v_perm_b32 v4, v9, v8, 0x5040100
+; GFX1250FAKE16-NEXT: v_perm_b32 v5, v11, v10, 0x5040100
+; GFX1250FAKE16-NEXT: s_wait_loadcnt 0x0
+; GFX1250FAKE16-NEXT: v_lshrrev_b32_e32 v3, 16, v31
+; GFX1250FAKE16-NEXT: v_cndmask_b32_e32 v7, v48, v39, vcc_lo
+; GFX1250FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v14
+; GFX1250FAKE16-NEXT: v_cndmask_b32_e32 v14, v31, v23, vcc_lo
+; GFX1250FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v15
+; GFX1250FAKE16-NEXT: v_cndmask_b32_e32 v15, v3, v32, vcc_lo
+; GFX1250FAKE16-NEXT: v_perm_b32 v3, v7, v6, 0x5040100
+; GFX1250FAKE16-NEXT: v_perm_b32 v6, v13, v12, 0x5040100
+; GFX1250FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3)
+; GFX1250FAKE16-NEXT: v_perm_b32 v7, v15, v14, 0x5040100
+; GFX1250FAKE16-NEXT: s_set_pc_i64 s[30:31]
%op = select <16 x i1> %cond, <16 x bfloat> %a, <16 x bfloat> %b
ret <16 x bfloat> %op
}
@@ -48861,177 +50005,330 @@ define <32 x bfloat> @v_vselect_v32bf16(<32 x i1> %cond, <32 x bfloat> %a, <32 x
; GFX11FAKE16-NEXT: v_perm_b32 v15, v31, v30, 0x5040100
; GFX11FAKE16-NEXT: s_setpc_b64 s[30:31]
;
-; GFX1250-LABEL: v_vselect_v32bf16:
-; GFX1250: ; %bb.0:
-; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-NEXT: s_wait_kmcnt 0x0
-; GFX1250-NEXT: s_clause 0x1b
-; GFX1250-NEXT: scratch_load_b32 v31, off, s32 offset:60
-; GFX1250-NEXT: scratch_load_b32 v32, off, s32 offset:124
-; GFX1250-NEXT: scratch_load_u16 v33, off, s32
-; GFX1250-NEXT: scratch_load_b32 v34, off, s32 offset:128
-; GFX1250-NEXT: scratch_load_b32 v35, off, s32 offset:64
-; GFX1250-NEXT: scratch_load_b32 v36, off, s32 offset:120
-; GFX1250-NEXT: scratch_load_b32 v37, off, s32 offset:56
-; GFX1250-NEXT: scratch_load_b32 v38, off, s32 offset:116
-; GFX1250-NEXT: scratch_load_b32 v39, off, s32 offset:52
-; GFX1250-NEXT: scratch_load_b32 v48, off, s32 offset:112
-; GFX1250-NEXT: scratch_load_b32 v49, off, s32 offset:48
-; GFX1250-NEXT: scratch_load_b32 v50, off, s32 offset:108
-; GFX1250-NEXT: scratch_load_b32 v51, off, s32 offset:44
-; GFX1250-NEXT: scratch_load_b32 v52, off, s32 offset:104
-; GFX1250-NEXT: scratch_load_b32 v53, off, s32 offset:40
-; GFX1250-NEXT: scratch_load_b32 v54, off, s32 offset:100
-; GFX1250-NEXT: scratch_load_b32 v55, off, s32 offset:36
-; GFX1250-NEXT: scratch_load_b32 v64, off, s32 offset:76
-; GFX1250-NEXT: scratch_load_b32 v65, off, s32 offset:12
-; GFX1250-NEXT: scratch_load_b32 v66, off, s32 offset:96
-; GFX1250-NEXT: scratch_load_b32 v67, off, s32 offset:32
-; GFX1250-NEXT: scratch_load_b32 v68, off, s32 offset:80
-; GFX1250-NEXT: scratch_load_b32 v69, off, s32 offset:84
-; GFX1250-NEXT: scratch_load_b32 v70, off, s32 offset:92
-; GFX1250-NEXT: scratch_load_b32 v71, off, s32 offset:28
-; GFX1250-NEXT: scratch_load_b32 v80, off, s32 offset:20
-; GFX1250-NEXT: scratch_load_b32 v81, off, s32 offset:88
-; GFX1250-NEXT: scratch_load_b32 v82, off, s32 offset:24
-; GFX1250-NEXT: v_and_b32_e32 v30, 1, v30
-; GFX1250-NEXT: v_and_b32_e32 v29, 1, v29
-; GFX1250-NEXT: v_and_b32_e32 v26, 1, v26
-; GFX1250-NEXT: v_and_b32_e32 v24, 1, v24
-; GFX1250-NEXT: v_and_b32_e32 v22, 1, v22
-; GFX1250-NEXT: v_and_b32_e32 v20, 1, v20
-; GFX1250-NEXT: v_and_b32_e32 v18, 1, v18
-; GFX1250-NEXT: v_and_b32_e32 v16, 1, v16
-; GFX1250-NEXT: v_and_b32_e32 v10, 1, v10
-; GFX1250-NEXT: v_and_b32_e32 v6, 1, v6
-; GFX1250-NEXT: v_and_b32_e32 v4, 1, v4
-; GFX1250-NEXT: v_and_b32_e32 v1, 1, v1
-; GFX1250-NEXT: v_and_b32_e32 v3, 1, v3
-; GFX1250-NEXT: v_and_b32_e32 v5, 1, v5
-; GFX1250-NEXT: v_and_b32_e32 v23, 1, v23
-; GFX1250-NEXT: v_and_b32_e32 v9, 1, v9
-; GFX1250-NEXT: v_and_b32_e32 v13, 1, v13
-; GFX1250-NEXT: v_and_b32_e32 v15, 1, v15
-; GFX1250-NEXT: v_and_b32_e32 v21, 1, v21
-; GFX1250-NEXT: v_and_b32_e32 v11, 1, v11
-; GFX1250-NEXT: v_and_b32_e32 v19, 1, v19
-; GFX1250-NEXT: s_wait_loadcnt 0x1a
-; GFX1250-NEXT: v_dual_lshrrev_b32 v83, 16, v32 :: v_dual_bitop2_b32 v17, 1, v17 bitop3:0x40
-; GFX1250-NEXT: v_cmp_eq_u32_e64 s1, 1, v30
-; GFX1250-NEXT: v_and_b32_e32 v28, 1, v28
-; GFX1250-NEXT: s_wait_loadcnt 0x17
-; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX1250-NEXT: v_dual_cndmask_b32 v30, v34, v35, s1 :: v_dual_bitop2_b32 v33, 1, v33 bitop3:0x40
-; GFX1250-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v28
-; GFX1250-NEXT: v_lshrrev_b32_e32 v28, 16, v31
-; GFX1250-NEXT: v_cmp_eq_u32_e64 s0, 1, v29
-; GFX1250-NEXT: scratch_load_b32 v29, off, s32 offset:16
-; GFX1250-NEXT: v_dual_lshrrev_b32 v35, 16, v35 :: v_dual_lshrrev_b32 v34, 16, v34
-; GFX1250-NEXT: v_cndmask_b32_e32 v31, v32, v31, vcc_lo
-; GFX1250-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v33
-; GFX1250-NEXT: scratch_load_b32 v32, off, s32 offset:72
-; GFX1250-NEXT: v_cndmask_b32_e64 v28, v83, v28, s0
-; GFX1250-NEXT: scratch_load_b32 v83, off, s32 offset:4
-; GFX1250-NEXT: v_cndmask_b32_e32 v34, v34, v35, vcc_lo
-; GFX1250-NEXT: s_clause 0x1
-; GFX1250-NEXT: scratch_load_b32 v35, off, s32 offset:68
-; GFX1250-NEXT: scratch_load_b32 v33, off, s32 offset:8
-; GFX1250-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v26
-; GFX1250-NEXT: s_wait_loadcnt 0x1a
-; GFX1250-NEXT: v_dual_cndmask_b32 v26, v36, v37, vcc_lo :: v_dual_bitop2_b32 v0, 1, v0 bitop3:0x40
-; GFX1250-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v24
-; GFX1250-NEXT: v_dual_lshrrev_b32 v37, 16, v37 :: v_dual_bitop2_b32 v2, 1, v2 bitop3:0x40
-; GFX1250-NEXT: s_wait_loadcnt 0x18
-; GFX1250-NEXT: v_dual_lshrrev_b32 v36, 16, v36 :: v_dual_cndmask_b32 v24, v38, v39, vcc_lo
-; GFX1250-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v22
-; GFX1250-NEXT: v_dual_lshrrev_b32 v38, 16, v38 :: v_dual_bitop2_b32 v7, 1, v7 bitop3:0x40
-; GFX1250-NEXT: s_wait_loadcnt 0x16
-; GFX1250-NEXT: v_dual_cndmask_b32 v22, v48, v49 :: v_dual_lshrrev_b32 v39, 16, v39
-; GFX1250-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v20
-; GFX1250-NEXT: v_dual_lshrrev_b32 v49, 16, v49 :: v_dual_bitop2_b32 v8, 1, v8 bitop3:0x40
-; GFX1250-NEXT: s_wait_loadcnt 0x14
-; GFX1250-NEXT: v_dual_lshrrev_b32 v48, 16, v48 :: v_dual_cndmask_b32 v20, v50, v51, vcc_lo
-; GFX1250-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v18
-; GFX1250-NEXT: v_dual_lshrrev_b32 v51, 16, v51 :: v_dual_bitop2_b32 v12, 1, v12 bitop3:0x40
-; GFX1250-NEXT: s_wait_loadcnt 0x12
-; GFX1250-NEXT: v_dual_lshrrev_b32 v50, 16, v50 :: v_dual_cndmask_b32 v18, v52, v53, vcc_lo
-; GFX1250-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v16
-; GFX1250-NEXT: v_dual_lshrrev_b32 v53, 16, v53 :: v_dual_bitop2_b32 v14, 1, v14 bitop3:0x40
-; GFX1250-NEXT: s_wait_loadcnt 0x10
-; GFX1250-NEXT: v_dual_lshrrev_b32 v52, 16, v52 :: v_dual_cndmask_b32 v16, v54, v55, vcc_lo
-; GFX1250-NEXT: s_delay_alu instid0(VALU_DEP_2)
-; GFX1250-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v14
-; GFX1250-NEXT: v_dual_lshrrev_b32 v55, 16, v55 :: v_dual_lshrrev_b32 v54, 16, v54
-; GFX1250-NEXT: s_wait_loadcnt 0xc
-; GFX1250-NEXT: v_cndmask_b32_e32 v14, v66, v67, vcc_lo
-; GFX1250-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v12
-; GFX1250-NEXT: v_dual_lshrrev_b32 v67, 16, v67 :: v_dual_lshrrev_b32 v66, 16, v66
-; GFX1250-NEXT: s_wait_loadcnt 0x8
-; GFX1250-NEXT: v_cndmask_b32_e32 v12, v70, v71, vcc_lo
-; GFX1250-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v10
-; GFX1250-NEXT: v_dual_lshrrev_b32 v70, 16, v70 :: v_dual_bitop2_b32 v25, 1, v25 bitop3:0x40
-; GFX1250-NEXT: s_wait_loadcnt 0x5
-; GFX1250-NEXT: v_dual_cndmask_b32 v10, v81, v82 :: v_dual_lshrrev_b32 v71, 16, v71
-; GFX1250-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v8
-; GFX1250-NEXT: v_dual_lshrrev_b32 v82, 16, v82 :: v_dual_bitop2_b32 v27, 1, v27 bitop3:0x40
-; GFX1250-NEXT: v_dual_cndmask_b32 v8, v69, v80 :: v_dual_lshrrev_b32 v81, 16, v81
-; GFX1250-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v6
-; GFX1250-NEXT: v_dual_lshrrev_b32 v80, 16, v80 :: v_dual_lshrrev_b32 v69, 16, v69
-; GFX1250-NEXT: s_wait_loadcnt 0x4
-; GFX1250-NEXT: v_dual_cndmask_b32 v6, v68, v29 :: v_dual_lshrrev_b32 v29, 16, v29
-; GFX1250-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v4
-; GFX1250-NEXT: v_dual_lshrrev_b32 v68, 16, v68 :: v_dual_cndmask_b32 v4, v64, v65, vcc_lo
-; GFX1250-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v2
-; GFX1250-NEXT: v_dual_lshrrev_b32 v65, 16, v65 :: v_dual_lshrrev_b32 v64, 16, v64
-; GFX1250-NEXT: s_wait_loadcnt 0x0
-; GFX1250-NEXT: v_dual_cndmask_b32 v2, v32, v33 :: v_dual_lshrrev_b32 v33, 16, v33
-; GFX1250-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
-; GFX1250-NEXT: v_dual_lshrrev_b32 v32, 16, v32 :: v_dual_cndmask_b32 v0, v35, v83, vcc_lo
-; GFX1250-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v27
-; GFX1250-NEXT: v_dual_lshrrev_b32 v83, 16, v83 :: v_dual_cndmask_b32 v27, v36, v37, vcc_lo
-; GFX1250-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v25
-; GFX1250-NEXT: v_cndmask_b32_e32 v25, v38, v39, vcc_lo
-; GFX1250-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v23
-; GFX1250-NEXT: v_dual_lshrrev_b32 v35, 16, v35 :: v_dual_cndmask_b32 v23, v48, v49, vcc_lo
-; GFX1250-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v21
-; GFX1250-NEXT: v_cndmask_b32_e32 v21, v50, v51, vcc_lo
-; GFX1250-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v19
-; GFX1250-NEXT: v_cndmask_b32_e32 v19, v52, v53, vcc_lo
-; GFX1250-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v17
-; GFX1250-NEXT: v_cndmask_b32_e32 v17, v54, v55, vcc_lo
-; GFX1250-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v15
-; GFX1250-NEXT: v_cndmask_b32_e32 v15, v66, v67, vcc_lo
-; GFX1250-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v13
-; GFX1250-NEXT: v_cndmask_b32_e32 v13, v70, v71, vcc_lo
-; GFX1250-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v11
-; GFX1250-NEXT: v_cndmask_b32_e32 v11, v81, v82, vcc_lo
-; GFX1250-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v7
-; GFX1250-NEXT: v_cndmask_b32_e32 v7, v68, v29, vcc_lo
-; GFX1250-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v3
-; GFX1250-NEXT: v_cndmask_b32_e32 v3, v32, v33, vcc_lo
-; GFX1250-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v1
-; GFX1250-NEXT: v_cndmask_b32_e32 v1, v35, v83, vcc_lo
-; GFX1250-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v5
-; GFX1250-NEXT: v_cndmask_b32_e32 v5, v64, v65, vcc_lo
-; GFX1250-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v9
-; GFX1250-NEXT: v_cndmask_b32_e32 v9, v69, v80, vcc_lo
-; GFX1250-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
-; GFX1250-NEXT: v_perm_b32 v1, v3, v2, 0x5040100
-; GFX1250-NEXT: v_perm_b32 v2, v5, v4, 0x5040100
-; GFX1250-NEXT: v_perm_b32 v3, v7, v6, 0x5040100
-; GFX1250-NEXT: v_perm_b32 v4, v9, v8, 0x5040100
-; GFX1250-NEXT: v_perm_b32 v5, v11, v10, 0x5040100
-; GFX1250-NEXT: v_perm_b32 v6, v13, v12, 0x5040100
-; GFX1250-NEXT: v_perm_b32 v7, v15, v14, 0x5040100
-; GFX1250-NEXT: v_perm_b32 v8, v17, v16, 0x5040100
-; GFX1250-NEXT: v_perm_b32 v9, v19, v18, 0x5040100
-; GFX1250-NEXT: v_perm_b32 v10, v21, v20, 0x5040100
-; GFX1250-NEXT: v_perm_b32 v11, v23, v22, 0x5040100
-; GFX1250-NEXT: v_perm_b32 v12, v25, v24, 0x5040100
-; GFX1250-NEXT: v_perm_b32 v13, v27, v26, 0x5040100
-; GFX1250-NEXT: v_perm_b32 v14, v28, v31, 0x5040100
-; GFX1250-NEXT: v_perm_b32 v15, v34, v30, 0x5040100
-; GFX1250-NEXT: s_set_pc_i64 s[30:31]
+; GFX1250TRUE16-LABEL: v_vselect_v32bf16:
+; GFX1250TRUE16: ; %bb.0:
+; GFX1250TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250TRUE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250TRUE16-NEXT: s_clause 0x20
+; GFX1250TRUE16-NEXT: scratch_load_u16 v31, off, s32
+; GFX1250TRUE16-NEXT: scratch_load_b32 v32, off, s32 offset:68
+; GFX1250TRUE16-NEXT: scratch_load_b32 v33, off, s32 offset:72
+; GFX1250TRUE16-NEXT: scratch_load_b32 v34, off, s32 offset:76
+; GFX1250TRUE16-NEXT: scratch_load_b32 v35, off, s32 offset:124
+; GFX1250TRUE16-NEXT: scratch_load_b32 v36, off, s32 offset:128
+; GFX1250TRUE16-NEXT: scratch_load_b32 v37, off, s32 offset:64
+; GFX1250TRUE16-NEXT: scratch_load_b32 v38, off, s32 offset:60
+; GFX1250TRUE16-NEXT: scratch_load_b32 v39, off, s32 offset:120
+; GFX1250TRUE16-NEXT: scratch_load_b32 v48, off, s32 offset:56
+; GFX1250TRUE16-NEXT: scratch_load_b32 v49, off, s32 offset:116
+; GFX1250TRUE16-NEXT: scratch_load_b32 v50, off, s32 offset:52
+; GFX1250TRUE16-NEXT: scratch_load_b32 v51, off, s32 offset:112
+; GFX1250TRUE16-NEXT: scratch_load_b32 v52, off, s32 offset:48
+; GFX1250TRUE16-NEXT: scratch_load_b32 v53, off, s32 offset:108
+; GFX1250TRUE16-NEXT: scratch_load_b32 v54, off, s32 offset:44
+; GFX1250TRUE16-NEXT: scratch_load_b32 v55, off, s32 offset:104
+; GFX1250TRUE16-NEXT: scratch_load_b32 v64, off, s32 offset:40
+; GFX1250TRUE16-NEXT: scratch_load_b32 v65, off, s32 offset:100
+; GFX1250TRUE16-NEXT: scratch_load_b32 v66, off, s32 offset:36
+; GFX1250TRUE16-NEXT: scratch_load_b32 v67, off, s32 offset:96
+; GFX1250TRUE16-NEXT: scratch_load_b32 v68, off, s32 offset:32
+; GFX1250TRUE16-NEXT: scratch_load_b32 v69, off, s32 offset:92
+; GFX1250TRUE16-NEXT: scratch_load_b32 v70, off, s32 offset:28
+; GFX1250TRUE16-NEXT: scratch_load_b32 v71, off, s32 offset:88
+; GFX1250TRUE16-NEXT: scratch_load_b32 v80, off, s32 offset:24
+; GFX1250TRUE16-NEXT: scratch_load_b32 v81, off, s32 offset:84
+; GFX1250TRUE16-NEXT: scratch_load_b32 v82, off, s32 offset:20
+; GFX1250TRUE16-NEXT: scratch_load_b32 v83, off, s32 offset:80
+; GFX1250TRUE16-NEXT: scratch_load_b32 v84, off, s32 offset:16
+; GFX1250TRUE16-NEXT: scratch_load_b32 v85, off, s32 offset:12
+; GFX1250TRUE16-NEXT: scratch_load_b32 v86, off, s32 offset:8
+; GFX1250TRUE16-NEXT: scratch_load_b32 v87, off, s32 offset:4
+; GFX1250TRUE16-NEXT: v_and_b16 v0.h, 1, v1.l
+; GFX1250TRUE16-NEXT: v_and_b16 v0.l, 1, v0.l
+; GFX1250TRUE16-NEXT: v_and_b16 v1.l, 1, v3.l
+; GFX1250TRUE16-NEXT: v_and_b16 v1.h, 1, v2.l
+; GFX1250TRUE16-NEXT: v_and_b16 v2.l, 1, v9.l
+; GFX1250TRUE16-NEXT: v_cmp_eq_u16_e32 vcc_lo, 1, v0.h
+; GFX1250TRUE16-NEXT: v_and_b16 v0.h, 1, v4.l
+; GFX1250TRUE16-NEXT: v_cmp_eq_u16_e64 s1, 1, v0.l
+; GFX1250TRUE16-NEXT: v_cmp_eq_u16_e64 s0, 1, v1.l
+; GFX1250TRUE16-NEXT: v_cmp_eq_u16_e64 s2, 1, v1.h
+; GFX1250TRUE16-NEXT: v_and_b16 v0.l, 1, v5.l
+; GFX1250TRUE16-NEXT: v_and_b16 v1.l, 1, v7.l
+; GFX1250TRUE16-NEXT: v_and_b16 v1.h, 1, v6.l
+; GFX1250TRUE16-NEXT: v_and_b16 v2.h, 1, v8.l
+; GFX1250TRUE16-NEXT: v_and_b16 v3.l, 1, v11.l
+; GFX1250TRUE16-NEXT: v_and_b16 v3.h, 1, v10.l
+; GFX1250TRUE16-NEXT: v_and_b16 v4.l, 1, v13.l
+; GFX1250TRUE16-NEXT: v_and_b16 v4.h, 1, v12.l
+; GFX1250TRUE16-NEXT: v_and_b16 v5.l, 1, v15.l
+; GFX1250TRUE16-NEXT: v_and_b16 v5.h, 1, v14.l
+; GFX1250TRUE16-NEXT: v_and_b16 v6.l, 1, v17.l
+; GFX1250TRUE16-NEXT: v_and_b16 v6.h, 1, v16.l
+; GFX1250TRUE16-NEXT: v_and_b16 v7.l, 1, v19.l
+; GFX1250TRUE16-NEXT: v_and_b16 v7.h, 1, v18.l
+; GFX1250TRUE16-NEXT: v_and_b16 v8.l, 1, v21.l
+; GFX1250TRUE16-NEXT: v_and_b16 v8.h, 1, v20.l
+; GFX1250TRUE16-NEXT: v_and_b16 v9.l, 1, v23.l
+; GFX1250TRUE16-NEXT: v_and_b16 v9.h, 1, v22.l
+; GFX1250TRUE16-NEXT: v_and_b16 v10.l, 1, v25.l
+; GFX1250TRUE16-NEXT: v_and_b16 v10.h, 1, v24.l
+; GFX1250TRUE16-NEXT: v_and_b16 v11.l, 1, v27.l
+; GFX1250TRUE16-NEXT: v_and_b16 v11.h, 1, v26.l
+; GFX1250TRUE16-NEXT: v_and_b16 v12.l, 1, v29.l
+; GFX1250TRUE16-NEXT: v_and_b16 v12.h, 1, v28.l
+; GFX1250TRUE16-NEXT: v_and_b16 v13.l, 1, v30.l
+; GFX1250TRUE16-NEXT: v_cmp_eq_u16_e64 s4, 1, v0.h
+; GFX1250TRUE16-NEXT: v_cmp_eq_u16_e64 s3, 1, v0.l
+; GFX1250TRUE16-NEXT: v_cmp_eq_u16_e64 s5, 1, v1.l
+; GFX1250TRUE16-NEXT: v_cmp_eq_u16_e64 s6, 1, v1.h
+; GFX1250TRUE16-NEXT: v_cmp_eq_u16_e64 s7, 1, v2.l
+; GFX1250TRUE16-NEXT: v_cmp_eq_u16_e64 s8, 1, v2.h
+; GFX1250TRUE16-NEXT: v_cmp_eq_u16_e64 s9, 1, v3.l
+; GFX1250TRUE16-NEXT: v_cmp_eq_u16_e64 s10, 1, v3.h
+; GFX1250TRUE16-NEXT: v_cmp_eq_u16_e64 s11, 1, v4.l
+; GFX1250TRUE16-NEXT: v_cmp_eq_u16_e64 s12, 1, v4.h
+; GFX1250TRUE16-NEXT: v_cmp_eq_u16_e64 s13, 1, v5.l
+; GFX1250TRUE16-NEXT: v_cmp_eq_u16_e64 s14, 1, v5.h
+; GFX1250TRUE16-NEXT: v_cmp_eq_u16_e64 s15, 1, v6.l
+; GFX1250TRUE16-NEXT: v_cmp_eq_u16_e64 s16, 1, v6.h
+; GFX1250TRUE16-NEXT: v_cmp_eq_u16_e64 s17, 1, v7.l
+; GFX1250TRUE16-NEXT: v_cmp_eq_u16_e64 s18, 1, v7.h
+; GFX1250TRUE16-NEXT: v_cmp_eq_u16_e64 s19, 1, v8.l
+; GFX1250TRUE16-NEXT: v_cmp_eq_u16_e64 s20, 1, v8.h
+; GFX1250TRUE16-NEXT: v_cmp_eq_u16_e64 s21, 1, v9.l
+; GFX1250TRUE16-NEXT: v_cmp_eq_u16_e64 s22, 1, v9.h
+; GFX1250TRUE16-NEXT: v_cmp_eq_u16_e64 s23, 1, v10.l
+; GFX1250TRUE16-NEXT: v_cmp_eq_u16_e64 s24, 1, v10.h
+; GFX1250TRUE16-NEXT: v_cmp_eq_u16_e64 s25, 1, v11.l
+; GFX1250TRUE16-NEXT: v_cmp_eq_u16_e64 s26, 1, v13.l
+; GFX1250TRUE16-NEXT: v_cmp_eq_u16_e64 s27, 1, v12.h
+; GFX1250TRUE16-NEXT: v_cmp_eq_u16_e64 s28, 1, v12.l
+; GFX1250TRUE16-NEXT: v_cmp_eq_u16_e64 s29, 1, v11.h
+; GFX1250TRUE16-NEXT: s_wait_loadcnt 0x20
+; GFX1250TRUE16-NEXT: v_and_b16 v0.h, 1, v31.l
+; GFX1250TRUE16-NEXT: s_wait_loadcnt 0x1a
+; GFX1250TRUE16-NEXT: v_cndmask_b16 v15.l, v36.l, v37.l, s26
+; GFX1250TRUE16-NEXT: s_wait_loadcnt 0x19
+; GFX1250TRUE16-NEXT: v_cndmask_b16 v14.l, v35.l, v38.l, s27
+; GFX1250TRUE16-NEXT: v_cndmask_b16 v14.h, v35.h, v38.h, s28
+; GFX1250TRUE16-NEXT: s_wait_loadcnt 0x17
+; GFX1250TRUE16-NEXT: v_cndmask_b16 v13.l, v39.l, v48.l, s29
+; GFX1250TRUE16-NEXT: v_cndmask_b16 v13.h, v39.h, v48.h, s25
+; GFX1250TRUE16-NEXT: s_wait_loadcnt 0x15
+; GFX1250TRUE16-NEXT: v_cndmask_b16 v12.l, v49.l, v50.l, s24
+; GFX1250TRUE16-NEXT: v_cndmask_b16 v12.h, v49.h, v50.h, s23
+; GFX1250TRUE16-NEXT: s_wait_loadcnt 0x13
+; GFX1250TRUE16-NEXT: v_cndmask_b16 v11.l, v51.l, v52.l, s22
+; GFX1250TRUE16-NEXT: v_cndmask_b16 v11.h, v51.h, v52.h, s21
+; GFX1250TRUE16-NEXT: s_wait_loadcnt 0x11
+; GFX1250TRUE16-NEXT: v_cndmask_b16 v10.l, v53.l, v54.l, s20
+; GFX1250TRUE16-NEXT: v_cndmask_b16 v10.h, v53.h, v54.h, s19
+; GFX1250TRUE16-NEXT: s_wait_loadcnt 0xf
+; GFX1250TRUE16-NEXT: v_cndmask_b16 v9.l, v55.l, v64.l, s18
+; GFX1250TRUE16-NEXT: v_cndmask_b16 v9.h, v55.h, v64.h, s17
+; GFX1250TRUE16-NEXT: s_wait_loadcnt 0xd
+; GFX1250TRUE16-NEXT: v_cndmask_b16 v8.l, v65.l, v66.l, s16
+; GFX1250TRUE16-NEXT: v_cndmask_b16 v8.h, v65.h, v66.h, s15
+; GFX1250TRUE16-NEXT: s_wait_loadcnt 0xb
+; GFX1250TRUE16-NEXT: v_cndmask_b16 v7.l, v67.l, v68.l, s14
+; GFX1250TRUE16-NEXT: v_cndmask_b16 v7.h, v67.h, v68.h, s13
+; GFX1250TRUE16-NEXT: s_wait_loadcnt 0x9
+; GFX1250TRUE16-NEXT: v_cndmask_b16 v6.l, v69.l, v70.l, s12
+; GFX1250TRUE16-NEXT: v_cndmask_b16 v6.h, v69.h, v70.h, s11
+; GFX1250TRUE16-NEXT: s_wait_loadcnt 0x7
+; GFX1250TRUE16-NEXT: v_cndmask_b16 v5.l, v71.l, v80.l, s10
+; GFX1250TRUE16-NEXT: v_cndmask_b16 v5.h, v71.h, v80.h, s9
+; GFX1250TRUE16-NEXT: s_wait_loadcnt 0x5
+; GFX1250TRUE16-NEXT: v_cndmask_b16 v4.l, v81.l, v82.l, s8
+; GFX1250TRUE16-NEXT: v_cndmask_b16 v4.h, v81.h, v82.h, s7
+; GFX1250TRUE16-NEXT: s_wait_loadcnt 0x3
+; GFX1250TRUE16-NEXT: v_cndmask_b16 v3.l, v83.l, v84.l, s6
+; GFX1250TRUE16-NEXT: s_wait_loadcnt 0x2
+; GFX1250TRUE16-NEXT: v_cndmask_b16 v2.l, v34.l, v85.l, s4
+; GFX1250TRUE16-NEXT: s_wait_loadcnt 0x1
+; GFX1250TRUE16-NEXT: v_cndmask_b16 v1.l, v33.l, v86.l, s2
+; GFX1250TRUE16-NEXT: s_wait_loadcnt 0x0
+; GFX1250TRUE16-NEXT: v_cndmask_b16 v0.l, v32.l, v87.l, s1
+; GFX1250TRUE16-NEXT: v_cmp_eq_u16_e64 s1, 1, v0.h
+; GFX1250TRUE16-NEXT: v_cndmask_b16 v0.h, v32.h, v87.h, vcc_lo
+; GFX1250TRUE16-NEXT: v_cndmask_b16 v1.h, v33.h, v86.h, s0
+; GFX1250TRUE16-NEXT: v_cndmask_b16 v2.h, v34.h, v85.h, s3
+; GFX1250TRUE16-NEXT: v_cndmask_b16 v3.h, v83.h, v84.h, s5
+; GFX1250TRUE16-NEXT: v_cndmask_b16 v15.h, v36.h, v37.h, s1
+; GFX1250TRUE16-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX1250FAKE16-LABEL: v_vselect_v32bf16:
+; GFX1250FAKE16: ; %bb.0:
+; GFX1250FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250FAKE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250FAKE16-NEXT: s_clause 0x1b
+; GFX1250FAKE16-NEXT: scratch_load_b32 v31, off, s32 offset:60
+; GFX1250FAKE16-NEXT: scratch_load_b32 v32, off, s32 offset:124
+; GFX1250FAKE16-NEXT: scratch_load_u16 v33, off, s32
+; GFX1250FAKE16-NEXT: scratch_load_b32 v34, off, s32 offset:128
+; GFX1250FAKE16-NEXT: scratch_load_b32 v35, off, s32 offset:64
+; GFX1250FAKE16-NEXT: scratch_load_b32 v36, off, s32 offset:120
+; GFX1250FAKE16-NEXT: scratch_load_b32 v37, off, s32 offset:56
+; GFX1250FAKE16-NEXT: scratch_load_b32 v38, off, s32 offset:116
+; GFX1250FAKE16-NEXT: scratch_load_b32 v39, off, s32 offset:52
+; GFX1250FAKE16-NEXT: scratch_load_b32 v48, off, s32 offset:112
+; GFX1250FAKE16-NEXT: scratch_load_b32 v49, off, s32 offset:48
+; GFX1250FAKE16-NEXT: scratch_load_b32 v50, off, s32 offset:108
+; GFX1250FAKE16-NEXT: scratch_load_b32 v51, off, s32 offset:44
+; GFX1250FAKE16-NEXT: scratch_load_b32 v52, off, s32 offset:104
+; GFX1250FAKE16-NEXT: scratch_load_b32 v53, off, s32 offset:40
+; GFX1250FAKE16-NEXT: scratch_load_b32 v54, off, s32 offset:100
+; GFX1250FAKE16-NEXT: scratch_load_b32 v55, off, s32 offset:36
+; GFX1250FAKE16-NEXT: scratch_load_b32 v64, off, s32 offset:76
+; GFX1250FAKE16-NEXT: scratch_load_b32 v65, off, s32 offset:12
+; GFX1250FAKE16-NEXT: scratch_load_b32 v66, off, s32 offset:96
+; GFX1250FAKE16-NEXT: scratch_load_b32 v67, off, s32 offset:32
+; GFX1250FAKE16-NEXT: scratch_load_b32 v68, off, s32 offset:80
+; GFX1250FAKE16-NEXT: scratch_load_b32 v69, off, s32 offset:84
+; GFX1250FAKE16-NEXT: scratch_load_b32 v70, off, s32 offset:92
+; GFX1250FAKE16-NEXT: scratch_load_b32 v71, off, s32 offset:28
+; GFX1250FAKE16-NEXT: scratch_load_b32 v80, off, s32 offset:20
+; GFX1250FAKE16-NEXT: scratch_load_b32 v81, off, s32 offset:88
+; GFX1250FAKE16-NEXT: scratch_load_b32 v82, off, s32 offset:24
+; GFX1250FAKE16-NEXT: v_and_b32_e32 v30, 1, v30
+; GFX1250FAKE16-NEXT: v_and_b32_e32 v29, 1, v29
+; GFX1250FAKE16-NEXT: v_and_b32_e32 v26, 1, v26
+; GFX1250FAKE16-NEXT: v_and_b32_e32 v24, 1, v24
+; GFX1250FAKE16-NEXT: v_and_b32_e32 v22, 1, v22
+; GFX1250FAKE16-NEXT: v_and_b32_e32 v20, 1, v20
+; GFX1250FAKE16-NEXT: v_and_b32_e32 v18, 1, v18
+; GFX1250FAKE16-NEXT: v_and_b32_e32 v16, 1, v16
+; GFX1250FAKE16-NEXT: v_and_b32_e32 v10, 1, v10
+; GFX1250FAKE16-NEXT: v_and_b32_e32 v6, 1, v6
+; GFX1250FAKE16-NEXT: v_and_b32_e32 v4, 1, v4
+; GFX1250FAKE16-NEXT: v_and_b32_e32 v1, 1, v1
+; GFX1250FAKE16-NEXT: v_and_b32_e32 v3, 1, v3
+; GFX1250FAKE16-NEXT: v_and_b32_e32 v5, 1, v5
+; GFX1250FAKE16-NEXT: v_and_b32_e32 v23, 1, v23
+; GFX1250FAKE16-NEXT: v_and_b32_e32 v9, 1, v9
+; GFX1250FAKE16-NEXT: v_and_b32_e32 v13, 1, v13
+; GFX1250FAKE16-NEXT: v_and_b32_e32 v15, 1, v15
+; GFX1250FAKE16-NEXT: v_and_b32_e32 v21, 1, v21
+; GFX1250FAKE16-NEXT: v_and_b32_e32 v11, 1, v11
+; GFX1250FAKE16-NEXT: v_and_b32_e32 v19, 1, v19
+; GFX1250FAKE16-NEXT: s_wait_loadcnt 0x1a
+; GFX1250FAKE16-NEXT: v_dual_lshrrev_b32 v83, 16, v32 :: v_dual_bitop2_b32 v17, 1, v17 bitop3:0x40
+; GFX1250FAKE16-NEXT: v_cmp_eq_u32_e64 s1, 1, v30
+; GFX1250FAKE16-NEXT: v_and_b32_e32 v28, 1, v28
+; GFX1250FAKE16-NEXT: s_wait_loadcnt 0x17
+; GFX1250FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX1250FAKE16-NEXT: v_dual_cndmask_b32 v30, v34, v35, s1 :: v_dual_bitop2_b32 v33, 1, v33 bitop3:0x40
+; GFX1250FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v28
+; GFX1250FAKE16-NEXT: v_lshrrev_b32_e32 v28, 16, v31
+; GFX1250FAKE16-NEXT: v_cmp_eq_u32_e64 s0, 1, v29
+; GFX1250FAKE16-NEXT: scratch_load_b32 v29, off, s32 offset:16
+; GFX1250FAKE16-NEXT: v_dual_lshrrev_b32 v35, 16, v35 :: v_dual_lshrrev_b32 v34, 16, v34
+; GFX1250FAKE16-NEXT: v_cndmask_b32_e32 v31, v32, v31, vcc_lo
+; GFX1250FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v33
+; GFX1250FAKE16-NEXT: scratch_load_b32 v32, off, s32 offset:72
+; GFX1250FAKE16-NEXT: v_cndmask_b32_e64 v28, v83, v28, s0
+; GFX1250FAKE16-NEXT: scratch_load_b32 v83, off, s32 offset:4
+; GFX1250FAKE16-NEXT: v_cndmask_b32_e32 v34, v34, v35, vcc_lo
+; GFX1250FAKE16-NEXT: s_clause 0x1
+; GFX1250FAKE16-NEXT: scratch_load_b32 v35, off, s32 offset:68
+; GFX1250FAKE16-NEXT: scratch_load_b32 v33, off, s32 offset:8
+; GFX1250FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v26
+; GFX1250FAKE16-NEXT: s_wait_loadcnt 0x1a
+; GFX1250FAKE16-NEXT: v_dual_cndmask_b32 v26, v36, v37, vcc_lo :: v_dual_bitop2_b32 v0, 1, v0 bitop3:0x40
+; GFX1250FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v24
+; GFX1250FAKE16-NEXT: v_dual_lshrrev_b32 v37, 16, v37 :: v_dual_bitop2_b32 v2, 1, v2 bitop3:0x40
+; GFX1250FAKE16-NEXT: s_wait_loadcnt 0x18
+; GFX1250FAKE16-NEXT: v_dual_lshrrev_b32 v36, 16, v36 :: v_dual_cndmask_b32 v24, v38, v39, vcc_lo
+; GFX1250FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v22
+; GFX1250FAKE16-NEXT: v_dual_lshrrev_b32 v38, 16, v38 :: v_dual_bitop2_b32 v7, 1, v7 bitop3:0x40
+; GFX1250FAKE16-NEXT: s_wait_loadcnt 0x16
+; GFX1250FAKE16-NEXT: v_dual_cndmask_b32 v22, v48, v49 :: v_dual_lshrrev_b32 v39, 16, v39
+; GFX1250FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v20
+; GFX1250FAKE16-NEXT: v_dual_lshrrev_b32 v49, 16, v49 :: v_dual_bitop2_b32 v8, 1, v8 bitop3:0x40
+; GFX1250FAKE16-NEXT: s_wait_loadcnt 0x14
+; GFX1250FAKE16-NEXT: v_dual_lshrrev_b32 v48, 16, v48 :: v_dual_cndmask_b32 v20, v50, v51, vcc_lo
+; GFX1250FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v18
+; GFX1250FAKE16-NEXT: v_dual_lshrrev_b32 v51, 16, v51 :: v_dual_bitop2_b32 v12, 1, v12 bitop3:0x40
+; GFX1250FAKE16-NEXT: s_wait_loadcnt 0x12
+; GFX1250FAKE16-NEXT: v_dual_lshrrev_b32 v50, 16, v50 :: v_dual_cndmask_b32 v18, v52, v53, vcc_lo
+; GFX1250FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v16
+; GFX1250FAKE16-NEXT: v_dual_lshrrev_b32 v53, 16, v53 :: v_dual_bitop2_b32 v14, 1, v14 bitop3:0x40
+; GFX1250FAKE16-NEXT: s_wait_loadcnt 0x10
+; GFX1250FAKE16-NEXT: v_dual_lshrrev_b32 v52, 16, v52 :: v_dual_cndmask_b32 v16, v54, v55, vcc_lo
+; GFX1250FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2)
+; GFX1250FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v14
+; GFX1250FAKE16-NEXT: v_dual_lshrrev_b32 v55, 16, v55 :: v_dual_lshrrev_b32 v54, 16, v54
+; GFX1250FAKE16-NEXT: s_wait_loadcnt 0xc
+; GFX1250FAKE16-NEXT: v_cndmask_b32_e32 v14, v66, v67, vcc_lo
+; GFX1250FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v12
+; GFX1250FAKE16-NEXT: v_dual_lshrrev_b32 v67, 16, v67 :: v_dual_lshrrev_b32 v66, 16, v66
+; GFX1250FAKE16-NEXT: s_wait_loadcnt 0x8
+; GFX1250FAKE16-NEXT: v_cndmask_b32_e32 v12, v70, v71, vcc_lo
+; GFX1250FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v10
+; GFX1250FAKE16-NEXT: v_dual_lshrrev_b32 v70, 16, v70 :: v_dual_bitop2_b32 v25, 1, v25 bitop3:0x40
+; GFX1250FAKE16-NEXT: s_wait_loadcnt 0x5
+; GFX1250FAKE16-NEXT: v_dual_cndmask_b32 v10, v81, v82 :: v_dual_lshrrev_b32 v71, 16, v71
+; GFX1250FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v8
+; GFX1250FAKE16-NEXT: v_dual_lshrrev_b32 v82, 16, v82 :: v_dual_bitop2_b32 v27, 1, v27 bitop3:0x40
+; GFX1250FAKE16-NEXT: v_dual_cndmask_b32 v8, v69, v80 :: v_dual_lshrrev_b32 v81, 16, v81
+; GFX1250FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v6
+; GFX1250FAKE16-NEXT: v_dual_lshrrev_b32 v80, 16, v80 :: v_dual_lshrrev_b32 v69, 16, v69
+; GFX1250FAKE16-NEXT: s_wait_loadcnt 0x4
+; GFX1250FAKE16-NEXT: v_dual_cndmask_b32 v6, v68, v29 :: v_dual_lshrrev_b32 v29, 16, v29
+; GFX1250FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v4
+; GFX1250FAKE16-NEXT: v_dual_lshrrev_b32 v68, 16, v68 :: v_dual_cndmask_b32 v4, v64, v65, vcc_lo
+; GFX1250FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v2
+; GFX1250FAKE16-NEXT: v_dual_lshrrev_b32 v65, 16, v65 :: v_dual_lshrrev_b32 v64, 16, v64
+; GFX1250FAKE16-NEXT: s_wait_loadcnt 0x0
+; GFX1250FAKE16-NEXT: v_dual_cndmask_b32 v2, v32, v33 :: v_dual_lshrrev_b32 v33, 16, v33
+; GFX1250FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v0
+; GFX1250FAKE16-NEXT: v_dual_lshrrev_b32 v32, 16, v32 :: v_dual_cndmask_b32 v0, v35, v83, vcc_lo
+; GFX1250FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v27
+; GFX1250FAKE16-NEXT: v_dual_lshrrev_b32 v83, 16, v83 :: v_dual_cndmask_b32 v27, v36, v37, vcc_lo
+; GFX1250FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v25
+; GFX1250FAKE16-NEXT: v_cndmask_b32_e32 v25, v38, v39, vcc_lo
+; GFX1250FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v23
+; GFX1250FAKE16-NEXT: v_dual_lshrrev_b32 v35, 16, v35 :: v_dual_cndmask_b32 v23, v48, v49, vcc_lo
+; GFX1250FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v21
+; GFX1250FAKE16-NEXT: v_cndmask_b32_e32 v21, v50, v51, vcc_lo
+; GFX1250FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v19
+; GFX1250FAKE16-NEXT: v_cndmask_b32_e32 v19, v52, v53, vcc_lo
+; GFX1250FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v17
+; GFX1250FAKE16-NEXT: v_cndmask_b32_e32 v17, v54, v55, vcc_lo
+; GFX1250FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v15
+; GFX1250FAKE16-NEXT: v_cndmask_b32_e32 v15, v66, v67, vcc_lo
+; GFX1250FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v13
+; GFX1250FAKE16-NEXT: v_cndmask_b32_e32 v13, v70, v71, vcc_lo
+; GFX1250FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v11
+; GFX1250FAKE16-NEXT: v_cndmask_b32_e32 v11, v81, v82, vcc_lo
+; GFX1250FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v7
+; GFX1250FAKE16-NEXT: v_cndmask_b32_e32 v7, v68, v29, vcc_lo
+; GFX1250FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v3
+; GFX1250FAKE16-NEXT: v_cndmask_b32_e32 v3, v32, v33, vcc_lo
+; GFX1250FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v1
+; GFX1250FAKE16-NEXT: v_cndmask_b32_e32 v1, v35, v83, vcc_lo
+; GFX1250FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v5
+; GFX1250FAKE16-NEXT: v_cndmask_b32_e32 v5, v64, v65, vcc_lo
+; GFX1250FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v9
+; GFX1250FAKE16-NEXT: v_cndmask_b32_e32 v9, v69, v80, vcc_lo
+; GFX1250FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
+; GFX1250FAKE16-NEXT: v_perm_b32 v1, v3, v2, 0x5040100
+; GFX1250FAKE16-NEXT: v_perm_b32 v2, v5, v4, 0x5040100
+; GFX1250FAKE16-NEXT: v_perm_b32 v3, v7, v6, 0x5040100
+; GFX1250FAKE16-NEXT: v_perm_b32 v4, v9, v8, 0x5040100
+; GFX1250FAKE16-NEXT: v_perm_b32 v5, v11, v10, 0x5040100
+; GFX1250FAKE16-NEXT: v_perm_b32 v6, v13, v12, 0x5040100
+; GFX1250FAKE16-NEXT: v_perm_b32 v7, v15, v14, 0x5040100
+; GFX1250FAKE16-NEXT: v_perm_b32 v8, v17, v16, 0x5040100
+; GFX1250FAKE16-NEXT: v_perm_b32 v9, v19, v18, 0x5040100
+; GFX1250FAKE16-NEXT: v_perm_b32 v10, v21, v20, 0x5040100
+; GFX1250FAKE16-NEXT: v_perm_b32 v11, v23, v22, 0x5040100
+; GFX1250FAKE16-NEXT: v_perm_b32 v12, v25, v24, 0x5040100
+; GFX1250FAKE16-NEXT: v_perm_b32 v13, v27, v26, 0x5040100
+; GFX1250FAKE16-NEXT: v_perm_b32 v14, v28, v31, 0x5040100
+; GFX1250FAKE16-NEXT: v_perm_b32 v15, v34, v30, 0x5040100
+; GFX1250FAKE16-NEXT: s_set_pc_i64 s[30:31]
%op = select <32 x i1> %cond, <32 x bfloat> %a, <32 x bfloat> %b
ret <32 x bfloat> %op
}
@@ -49167,12 +50464,21 @@ define bfloat @v_fma_bf16(bfloat %a, bfloat %b, bfloat %c) {
; GFX11FAKE16-NEXT: v_lshrrev_b32_e32 v0, 16, v0
; GFX11FAKE16-NEXT: s_setpc_b64 s[30:31]
;
-; GFX1250-LABEL: v_fma_bf16:
-; GFX1250: ; %bb.0:
-; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-NEXT: s_wait_kmcnt 0x0
-; GFX1250-NEXT: v_fma_mixlo_bf16 v0, v0, v1, v2 op_sel_hi:[1,1,1]
-; GFX1250-NEXT: s_set_pc_i64 s[30:31]
+; GFX1250TRUE16-LABEL: v_fma_bf16:
+; GFX1250TRUE16: ; %bb.0:
+; GFX1250TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250TRUE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250TRUE16-NEXT: v_fma_mix_f32_bf16 v0, v0, v1, v2 op_sel_hi:[1,1,1]
+; GFX1250TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1250TRUE16-NEXT: v_cvt_pk_bf16_f32 v0, v0, s0
+; GFX1250TRUE16-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX1250FAKE16-LABEL: v_fma_bf16:
+; GFX1250FAKE16: ; %bb.0:
+; GFX1250FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250FAKE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250FAKE16-NEXT: v_fma_mixlo_bf16 v0, v0, v1, v2 op_sel_hi:[1,1,1]
+; GFX1250FAKE16-NEXT: s_set_pc_i64 s[30:31]
%op = call bfloat @llvm.fma.bf16(bfloat %a, bfloat %b, bfloat %c)
ret bfloat %op
}
@@ -54791,12 +56097,21 @@ define bfloat @v_fmuladd_bf16(bfloat %a, bfloat %b, bfloat %c) {
; GFX11FAKE16-NEXT: v_lshrrev_b32_e32 v0, 16, v0
; GFX11FAKE16-NEXT: s_setpc_b64 s[30:31]
;
-; GFX1250-LABEL: v_fmuladd_bf16:
-; GFX1250: ; %bb.0:
-; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
-; GFX1250-NEXT: s_wait_kmcnt 0x0
-; GFX1250-NEXT: v_fma_mixlo_bf16 v0, v0, v1, v2 op_sel_hi:[1,1,1]
-; GFX1250-NEXT: s_set_pc_i64 s[30:31]
+; GFX1250TRUE16-LABEL: v_fmuladd_bf16:
+; GFX1250TRUE16: ; %bb.0:
+; GFX1250TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250TRUE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250TRUE16-NEXT: v_fma_mix_f32_bf16 v0, v0, v1, v2 op_sel_hi:[1,1,1]
+; GFX1250TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1250TRUE16-NEXT: v_cvt_pk_bf16_f32 v0, v0, s0
+; GFX1250TRUE16-NEXT: s_set_pc_i64 s[30:31]
+;
+; GFX1250FAKE16-LABEL: v_fmuladd_bf16:
+; GFX1250FAKE16: ; %bb.0:
+; GFX1250FAKE16-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250FAKE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250FAKE16-NEXT: v_fma_mixlo_bf16 v0, v0, v1, v2 op_sel_hi:[1,1,1]
+; GFX1250FAKE16-NEXT: s_set_pc_i64 s[30:31]
%op = call bfloat @llvm.fmuladd.bf16(bfloat %a, bfloat %b, bfloat %c)
ret bfloat %op
}
@@ -55652,5 +56967,3 @@ define <4 x bfloat> @v_fmuladd_v4bf16(<4 x bfloat> %a, <4 x bfloat> %b, <4 x bfl
%op = call <4 x bfloat> @llvm.fmuladd.v4bf16(<4 x bfloat> %a, <4 x bfloat> %b, <4 x bfloat> %c)
ret <4 x bfloat> %op
}
-;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
-; GFX1250FAKE16: {{.*}}
diff --git a/llvm/test/CodeGen/AMDGPU/calling-conventions.ll b/llvm/test/CodeGen/AMDGPU/calling-conventions.ll
index 363a248..cbf6b66 100644
--- a/llvm/test/CodeGen/AMDGPU/calling-conventions.ll
+++ b/llvm/test/CodeGen/AMDGPU/calling-conventions.ll
@@ -1262,7 +1262,7 @@ define amdgpu_ps void @ps_mesa_i16(i16 %arg0) {
; GFX1250-TRUE16-LABEL: ps_mesa_i16:
; GFX1250-TRUE16: ; %bb.0:
; GFX1250-TRUE16-NEXT: v_add_nc_u16 v0.l, v0.l, v0.l
-; GFX1250-TRUE16-NEXT: flat_store_b16 v[0:1], v0
+; GFX1250-TRUE16-NEXT: global_store_b16 v[0:1], v0, off
; GFX1250-TRUE16-NEXT: s_endpgm
;
; GFX1250-FAKE16-LABEL: ps_mesa_i16:
@@ -3013,7 +3013,7 @@ define amdgpu_cs void @amdgpu_cs_v8i1(<8 x i1> %arg0) {
; GFX1250-TRUE16-NEXT: v_lshlrev_b16 v0.h, 4, v0.h
; GFX1250-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1250-TRUE16-NEXT: v_bitop3_b16 v0.l, v0.l, v0.h, 15 bitop3:0xec
-; GFX1250-TRUE16-NEXT: flat_store_b8 v[0:1], v0
+; GFX1250-TRUE16-NEXT: global_store_b8 v[0:1], v0, off
; GFX1250-TRUE16-NEXT: s_endpgm
;
; GFX1250-FAKE16-LABEL: amdgpu_cs_v8i1:
@@ -3297,7 +3297,7 @@ define amdgpu_cs void @amdgpu_cs_v16i1(<16 x i1> %arg0) {
; GFX1250-TRUE16-NEXT: v_or_b16 v0.h, v1.h, v1.l
; GFX1250-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1250-TRUE16-NEXT: v_bitop3_b16 v0.l, v0.l, v0.h, 0xff bitop3:0xec
-; GFX1250-TRUE16-NEXT: flat_store_b16 v[0:1], v0
+; GFX1250-TRUE16-NEXT: global_store_b16 v[0:1], v0, off
; GFX1250-TRUE16-NEXT: s_endpgm
;
; GFX1250-FAKE16-LABEL: amdgpu_cs_v16i1:
diff --git a/llvm/test/CodeGen/AMDGPU/direct-indirect-call.ll b/llvm/test/CodeGen/AMDGPU/direct-indirect-call.ll
index f706f53..eb40e5c 100644
--- a/llvm/test/CodeGen/AMDGPU/direct-indirect-call.ll
+++ b/llvm/test/CodeGen/AMDGPU/direct-indirect-call.ll
@@ -35,6 +35,6 @@ define amdgpu_kernel void @test_direct_indirect_call() {
ret void
}
;.
-; CHECK: attributes #[[ATTR0]] = { "amdgpu-agpr-alloc"="0" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR0]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
; CHECK: attributes #[[ATTR1]] = { "uniform-work-group-size"="false" }
;.
diff --git a/llvm/test/CodeGen/AMDGPU/duplicate-attribute-indirect.ll b/llvm/test/CodeGen/AMDGPU/duplicate-attribute-indirect.ll
index 8da204b..c02ff28 100644
--- a/llvm/test/CodeGen/AMDGPU/duplicate-attribute-indirect.ll
+++ b/llvm/test/CodeGen/AMDGPU/duplicate-attribute-indirect.ll
@@ -28,6 +28,6 @@ define amdgpu_kernel void @test_simple_indirect_call() #0 {
attributes #0 = { "amdgpu-no-dispatch-id" }
;.
-; ATTRIBUTOR_GCN: attributes #[[ATTR0]] = { "amdgpu-agpr-alloc"="0" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
+; ATTRIBUTOR_GCN: attributes #[[ATTR0]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
; ATTRIBUTOR_GCN: attributes #[[ATTR1]] = { "amdgpu-no-dispatch-id" "uniform-work-group-size"="false" }
;.
diff --git a/llvm/test/CodeGen/AMDGPU/fcanonicalize-elimination.ll b/llvm/test/CodeGen/AMDGPU/fcanonicalize-elimination.ll
index ab51693..05d3e9c3 100644
--- a/llvm/test/CodeGen/AMDGPU/fcanonicalize-elimination.ll
+++ b/llvm/test/CodeGen/AMDGPU/fcanonicalize-elimination.ll
@@ -497,12 +497,10 @@ define amdgpu_kernel void @test_fold_canonicalize_minnum_value_f32(ptr addrspace
ret void
}
-; FIXME: Should there be more checks here? minnum with NaN operand is simplified away.
+; FIXME: Should there be more checks here? minnum with sNaN operand is simplified to qNaN.
; GCN-LABEL: test_fold_canonicalize_sNaN_value_f32:
-; GCN: {{flat|global}}_load_dword [[LOAD:v[0-9]+]]
-; VI: v_mul_f32_e32 v{{[0-9]+}}, 1.0, [[LOAD]]
-; GFX9: v_max_f32_e32 v{{[0-9]+}}, [[LOAD]], [[LOAD]]
+; GCN: v_mov_b32_e32 v{{.+}}, 0x7fc00000
define amdgpu_kernel void @test_fold_canonicalize_sNaN_value_f32(ptr addrspace(1) %arg) {
%id = tail call i32 @llvm.amdgcn.workitem.id.x()
%gep = getelementptr inbounds float, ptr addrspace(1) %arg, i32 %id
diff --git a/llvm/test/CodeGen/AMDGPU/fneg-combines.new.ll b/llvm/test/CodeGen/AMDGPU/fneg-combines.new.ll
index 3de6df2..833be20 100644
--- a/llvm/test/CodeGen/AMDGPU/fneg-combines.new.ll
+++ b/llvm/test/CodeGen/AMDGPU/fneg-combines.new.ll
@@ -1949,8 +1949,7 @@ define float @v_fneg_self_minimumnum_f32_ieee(float %a) #0 {
; GCN-LABEL: v_fneg_self_minimumnum_f32_ieee:
; GCN: ; %bb.0:
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-NEXT: v_mul_f32_e32 v0, -1.0, v0
-; GCN-NEXT: v_max_f32_e32 v0, v0, v0
+; GCN-NEXT: v_xor_b32_e32 v0, 0x80000000, v0
; GCN-NEXT: s_setpc_b64 s[30:31]
%min = call float @llvm.minimumnum.f32(float %a, float %a)
%min.fneg = fneg float %min
@@ -1961,7 +1960,7 @@ define float @v_fneg_self_minimumnum_f32_no_ieee(float %a) #4 {
; GCN-LABEL: v_fneg_self_minimumnum_f32_no_ieee:
; GCN: ; %bb.0:
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-NEXT: v_max_f32_e64 v0, -v0, -v0
+; GCN-NEXT: v_xor_b32_e32 v0, 0x80000000, v0
; GCN-NEXT: s_setpc_b64 s[30:31]
%min = call float @llvm.minimumnum.f32(float %a, float %a)
%min.fneg = fneg float %min
@@ -2285,8 +2284,7 @@ define float @v_fneg_self_maximumnum_f32_ieee(float %a) #0 {
; GCN-LABEL: v_fneg_self_maximumnum_f32_ieee:
; GCN: ; %bb.0:
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-NEXT: v_mul_f32_e32 v0, -1.0, v0
-; GCN-NEXT: v_min_f32_e32 v0, v0, v0
+; GCN-NEXT: v_xor_b32_e32 v0, 0x80000000, v0
; GCN-NEXT: s_setpc_b64 s[30:31]
%max = call float @llvm.maximumnum.f32(float %a, float %a)
%max.fneg = fneg float %max
@@ -2297,7 +2295,7 @@ define float @v_fneg_self_maximumnum_f32_no_ieee(float %a) #4 {
; GCN-LABEL: v_fneg_self_maximumnum_f32_no_ieee:
; GCN: ; %bb.0:
; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-NEXT: v_min_f32_e64 v0, -v0, -v0
+; GCN-NEXT: v_xor_b32_e32 v0, 0x80000000, v0
; GCN-NEXT: s_setpc_b64 s[30:31]
%max = call float @llvm.maximumnum.f32(float %a, float %a)
%max.fneg = fneg float %max
diff --git a/llvm/test/CodeGen/AMDGPU/fptrunc.f16.ll b/llvm/test/CodeGen/AMDGPU/fptrunc.f16.ll
index 40d2765..b0dd187 100644
--- a/llvm/test/CodeGen/AMDGPU/fptrunc.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/fptrunc.f16.ll
@@ -11,9 +11,9 @@
; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=gfx1100 -global-isel=0 -mattr=-flat-for-global,-real-true16 -denormal-fp-math=preserve-sign < %s | FileCheck -enable-var-scope -check-prefixes=GFX11-SDAG-FAKE16 %s
; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=gfx1100 -global-isel=1 -mattr=-flat-for-global,+real-true16 -denormal-fp-math=preserve-sign < %s | FileCheck -enable-var-scope -check-prefixes=GFX11-GISEL-TRUE16 %s
; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=gfx1100 -global-isel=1 -mattr=-flat-for-global,-real-true16 -denormal-fp-math=preserve-sign < %s | FileCheck -enable-var-scope -check-prefixes=GFX11-GISEL-FAKE16 %s
-; TODO: FIXME-TRUE16 llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=gfx1250 -global-isel=0 -mattr=-flat-for-global,+real-true16 -denormal-fp-math=preserve-sign < %s | FileCheck -enable-var-scope -check-prefixes=GFX1250-SDAG-TRUE16 %s
+; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=gfx1250 -global-isel=0 -mattr=-flat-for-global,+real-true16 -denormal-fp-math=preserve-sign < %s | FileCheck -enable-var-scope -check-prefixes=GFX1250-SDAG-TRUE16 %s
; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=gfx1250 -global-isel=0 -mattr=-flat-for-global,-real-true16 -denormal-fp-math=preserve-sign < %s | FileCheck -enable-var-scope -check-prefixes=GFX1250-SDAG-FAKE16 %s
-; TODO: FIXME-TRUE16 llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=gfx1250 -global-isel=1 -mattr=-flat-for-global,+real-true16 -denormal-fp-math=preserve-sign < %s | FileCheck -enable-var-scope -check-prefixes=GFX1250-GISEL-TRUE16 %s
+; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=gfx1250 -global-isel=1 -mattr=-flat-for-global,+real-true16 -denormal-fp-math=preserve-sign < %s | FileCheck -enable-var-scope -check-prefixes=GFX1250-GISEL-TRUE16 %s
; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=gfx1250 -global-isel=1 -mattr=-flat-for-global,-real-true16 -denormal-fp-math=preserve-sign < %s | FileCheck -enable-var-scope -check-prefixes=GFX1250-GISEL-FAKE16 %s
define amdgpu_kernel void @fptrunc_f32_to_f16(
@@ -197,6 +197,24 @@ define amdgpu_kernel void @fptrunc_f32_to_f16(
; GFX11-GISEL-FAKE16-NEXT: buffer_store_b16 v0, off, s[0:3], 0
; GFX11-GISEL-FAKE16-NEXT: s_endpgm
;
+; GFX1250-SDAG-TRUE16-LABEL: fptrunc_f32_to_f16:
+; GFX1250-SDAG-TRUE16: ; %bb.0: ; %entry
+; GFX1250-SDAG-TRUE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
+; GFX1250-SDAG-TRUE16-NEXT: s_mov_b32 s6, -1
+; GFX1250-SDAG-TRUE16-NEXT: s_mov_b32 s7, 0x31016000
+; GFX1250-SDAG-TRUE16-NEXT: s_mov_b32 s10, s6
+; GFX1250-SDAG-TRUE16-NEXT: s_mov_b32 s11, s7
+; GFX1250-SDAG-TRUE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250-SDAG-TRUE16-NEXT: s_mov_b32 s8, s2
+; GFX1250-SDAG-TRUE16-NEXT: s_mov_b32 s9, s3
+; GFX1250-SDAG-TRUE16-NEXT: s_mov_b32 s4, s0
+; GFX1250-SDAG-TRUE16-NEXT: buffer_load_b32 v0, off, s[8:11], null
+; GFX1250-SDAG-TRUE16-NEXT: s_mov_b32 s5, s1
+; GFX1250-SDAG-TRUE16-NEXT: s_wait_loadcnt 0x0
+; GFX1250-SDAG-TRUE16-NEXT: v_cvt_f16_f32_e32 v0.l, v0
+; GFX1250-SDAG-TRUE16-NEXT: buffer_store_b16 v0, off, s[4:7], null
+; GFX1250-SDAG-TRUE16-NEXT: s_endpgm
+;
; GFX1250-SDAG-FAKE16-LABEL: fptrunc_f32_to_f16:
; GFX1250-SDAG-FAKE16: ; %bb.0: ; %entry
; GFX1250-SDAG-FAKE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
@@ -215,6 +233,21 @@ define amdgpu_kernel void @fptrunc_f32_to_f16(
; GFX1250-SDAG-FAKE16-NEXT: buffer_store_b16 v0, off, s[4:7], null
; GFX1250-SDAG-FAKE16-NEXT: s_endpgm
;
+; GFX1250-GISEL-TRUE16-LABEL: fptrunc_f32_to_f16:
+; GFX1250-GISEL-TRUE16: ; %bb.0: ; %entry
+; GFX1250-GISEL-TRUE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
+; GFX1250-GISEL-TRUE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250-GISEL-TRUE16-NEXT: s_load_b32 s2, s[2:3], 0x0
+; GFX1250-GISEL-TRUE16-NEXT: s_wait_xcnt 0x0
+; GFX1250-GISEL-TRUE16-NEXT: s_mov_b32 s3, 0x31016000
+; GFX1250-GISEL-TRUE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250-GISEL-TRUE16-NEXT: s_cvt_f16_f32 s2, s2
+; GFX1250-GISEL-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_3)
+; GFX1250-GISEL-TRUE16-NEXT: v_mov_b32_e32 v0, s2
+; GFX1250-GISEL-TRUE16-NEXT: s_mov_b32 s2, -1
+; GFX1250-GISEL-TRUE16-NEXT: buffer_store_b16 v0, off, s[0:3], null
+; GFX1250-GISEL-TRUE16-NEXT: s_endpgm
+;
; GFX1250-GISEL-FAKE16-LABEL: fptrunc_f32_to_f16:
; GFX1250-GISEL-FAKE16: ; %bb.0: ; %entry
; GFX1250-GISEL-FAKE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
@@ -419,6 +452,24 @@ define amdgpu_kernel void @fptrunc_f32_to_f16_afn(ptr addrspace(1) %r,
; GFX11-GISEL-FAKE16-NEXT: buffer_store_b16 v0, off, s[0:3], 0
; GFX11-GISEL-FAKE16-NEXT: s_endpgm
;
+; GFX1250-SDAG-TRUE16-LABEL: fptrunc_f32_to_f16_afn:
+; GFX1250-SDAG-TRUE16: ; %bb.0: ; %entry
+; GFX1250-SDAG-TRUE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
+; GFX1250-SDAG-TRUE16-NEXT: s_mov_b32 s6, -1
+; GFX1250-SDAG-TRUE16-NEXT: s_mov_b32 s7, 0x31016000
+; GFX1250-SDAG-TRUE16-NEXT: s_mov_b32 s10, s6
+; GFX1250-SDAG-TRUE16-NEXT: s_mov_b32 s11, s7
+; GFX1250-SDAG-TRUE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250-SDAG-TRUE16-NEXT: s_mov_b32 s8, s2
+; GFX1250-SDAG-TRUE16-NEXT: s_mov_b32 s9, s3
+; GFX1250-SDAG-TRUE16-NEXT: s_mov_b32 s4, s0
+; GFX1250-SDAG-TRUE16-NEXT: buffer_load_b32 v0, off, s[8:11], null
+; GFX1250-SDAG-TRUE16-NEXT: s_mov_b32 s5, s1
+; GFX1250-SDAG-TRUE16-NEXT: s_wait_loadcnt 0x0
+; GFX1250-SDAG-TRUE16-NEXT: v_cvt_f16_f32_e32 v0.l, v0
+; GFX1250-SDAG-TRUE16-NEXT: buffer_store_b16 v0, off, s[4:7], null
+; GFX1250-SDAG-TRUE16-NEXT: s_endpgm
+;
; GFX1250-SDAG-FAKE16-LABEL: fptrunc_f32_to_f16_afn:
; GFX1250-SDAG-FAKE16: ; %bb.0: ; %entry
; GFX1250-SDAG-FAKE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
@@ -437,6 +488,21 @@ define amdgpu_kernel void @fptrunc_f32_to_f16_afn(ptr addrspace(1) %r,
; GFX1250-SDAG-FAKE16-NEXT: buffer_store_b16 v0, off, s[4:7], null
; GFX1250-SDAG-FAKE16-NEXT: s_endpgm
;
+; GFX1250-GISEL-TRUE16-LABEL: fptrunc_f32_to_f16_afn:
+; GFX1250-GISEL-TRUE16: ; %bb.0: ; %entry
+; GFX1250-GISEL-TRUE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
+; GFX1250-GISEL-TRUE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250-GISEL-TRUE16-NEXT: s_load_b32 s2, s[2:3], 0x0
+; GFX1250-GISEL-TRUE16-NEXT: s_wait_xcnt 0x0
+; GFX1250-GISEL-TRUE16-NEXT: s_mov_b32 s3, 0x31016000
+; GFX1250-GISEL-TRUE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250-GISEL-TRUE16-NEXT: s_cvt_f16_f32 s2, s2
+; GFX1250-GISEL-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_3)
+; GFX1250-GISEL-TRUE16-NEXT: v_mov_b32_e32 v0, s2
+; GFX1250-GISEL-TRUE16-NEXT: s_mov_b32 s2, -1
+; GFX1250-GISEL-TRUE16-NEXT: buffer_store_b16 v0, off, s[0:3], null
+; GFX1250-GISEL-TRUE16-NEXT: s_endpgm
+;
; GFX1250-GISEL-FAKE16-LABEL: fptrunc_f32_to_f16_afn:
; GFX1250-GISEL-FAKE16: ; %bb.0: ; %entry
; GFX1250-GISEL-FAKE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
@@ -1160,6 +1226,73 @@ define amdgpu_kernel void @fptrunc_f64_to_f16(
; GFX11-GISEL-FAKE16-NEXT: buffer_store_b16 v0, off, s[0:3], 0
; GFX11-GISEL-FAKE16-NEXT: s_endpgm
;
+; GFX1250-SDAG-TRUE16-LABEL: fptrunc_f64_to_f16:
+; GFX1250-SDAG-TRUE16: ; %bb.0: ; %entry
+; GFX1250-SDAG-TRUE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
+; GFX1250-SDAG-TRUE16-NEXT: s_mov_b32 s6, -1
+; GFX1250-SDAG-TRUE16-NEXT: s_mov_b32 s7, 0x31016000
+; GFX1250-SDAG-TRUE16-NEXT: s_mov_b32 s10, s6
+; GFX1250-SDAG-TRUE16-NEXT: s_mov_b32 s11, s7
+; GFX1250-SDAG-TRUE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250-SDAG-TRUE16-NEXT: s_mov_b32 s8, s2
+; GFX1250-SDAG-TRUE16-NEXT: s_mov_b32 s9, s3
+; GFX1250-SDAG-TRUE16-NEXT: buffer_load_b64 v[0:1], off, s[8:11], null
+; GFX1250-SDAG-TRUE16-NEXT: s_wait_loadcnt 0x0
+; GFX1250-SDAG-TRUE16-NEXT: v_readfirstlane_b32 s2, v1
+; GFX1250-SDAG-TRUE16-NEXT: s_and_b32 s3, s2, 0x1ff
+; GFX1250-SDAG-TRUE16-NEXT: s_lshr_b32 s5, s2, 8
+; GFX1250-SDAG-TRUE16-NEXT: v_or_b32_e32 v0, s3, v0
+; GFX1250-SDAG-TRUE16-NEXT: s_bfe_u32 s3, s2, 0xb0014
+; GFX1250-SDAG-TRUE16-NEXT: s_and_b32 s5, s5, 0xffe
+; GFX1250-SDAG-TRUE16-NEXT: s_sub_co_i32 s4, 0x3f1, s3
+; GFX1250-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_2)
+; GFX1250-SDAG-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0
+; GFX1250-SDAG-TRUE16-NEXT: v_med3_i32 v1, s4, 0, 13
+; GFX1250-SDAG-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
+; GFX1250-SDAG-TRUE16-NEXT: v_readfirstlane_b32 s8, v1
+; GFX1250-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
+; GFX1250-SDAG-TRUE16-NEXT: v_readfirstlane_b32 s4, v0
+; GFX1250-SDAG-TRUE16-NEXT: s_or_b32 s4, s5, s4
+; GFX1250-SDAG-TRUE16-NEXT: s_or_b32 s5, s4, 0x1000
+; GFX1250-SDAG-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
+; GFX1250-SDAG-TRUE16-NEXT: s_lshr_b32 s9, s5, s8
+; GFX1250-SDAG-TRUE16-NEXT: s_lshl_b32 s8, s9, s8
+; GFX1250-SDAG-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_4) | instid1(SALU_CYCLE_1)
+; GFX1250-SDAG-TRUE16-NEXT: s_cmp_lg_u32 s8, s5
+; GFX1250-SDAG-TRUE16-NEXT: s_cselect_b32 s5, 1, 0
+; GFX1250-SDAG-TRUE16-NEXT: s_addk_co_i32 s3, 0xfc10
+; GFX1250-SDAG-TRUE16-NEXT: s_or_b32 s5, s9, s5
+; GFX1250-SDAG-TRUE16-NEXT: s_lshl_b32 s8, s3, 12
+; GFX1250-SDAG-TRUE16-NEXT: s_or_b32 s8, s4, s8
+; GFX1250-SDAG-TRUE16-NEXT: s_cmp_lt_i32 s3, 1
+; GFX1250-SDAG-TRUE16-NEXT: s_cselect_b32 s5, s5, s8
+; GFX1250-SDAG-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
+; GFX1250-SDAG-TRUE16-NEXT: s_and_b32 s8, s5, 7
+; GFX1250-SDAG-TRUE16-NEXT: s_cmp_gt_i32 s8, 5
+; GFX1250-SDAG-TRUE16-NEXT: s_cselect_b32 s9, 1, 0
+; GFX1250-SDAG-TRUE16-NEXT: s_cmp_eq_u32 s8, 3
+; GFX1250-SDAG-TRUE16-NEXT: s_cselect_b32 s8, 1, 0
+; GFX1250-SDAG-TRUE16-NEXT: s_lshr_b32 s5, s5, 2
+; GFX1250-SDAG-TRUE16-NEXT: s_or_b32 s8, s8, s9
+; GFX1250-SDAG-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
+; GFX1250-SDAG-TRUE16-NEXT: s_add_co_i32 s5, s5, s8
+; GFX1250-SDAG-TRUE16-NEXT: s_cmp_lt_i32 s3, 31
+; GFX1250-SDAG-TRUE16-NEXT: s_movk_i32 s8, 0x7e00
+; GFX1250-SDAG-TRUE16-NEXT: s_cselect_b32 s5, s5, 0x7c00
+; GFX1250-SDAG-TRUE16-NEXT: s_cmp_lg_u32 s4, 0
+; GFX1250-SDAG-TRUE16-NEXT: s_cselect_b32 s4, s8, 0x7c00
+; GFX1250-SDAG-TRUE16-NEXT: s_cmp_eq_u32 s3, 0x40f
+; GFX1250-SDAG-TRUE16-NEXT: s_cselect_b32 s3, s4, s5
+; GFX1250-SDAG-TRUE16-NEXT: s_lshr_b32 s2, s2, 16
+; GFX1250-SDAG-TRUE16-NEXT: s_mov_b32 s4, s0
+; GFX1250-SDAG-TRUE16-NEXT: s_and_b32 s2, s2, 0x8000
+; GFX1250-SDAG-TRUE16-NEXT: s_mov_b32 s5, s1
+; GFX1250-SDAG-TRUE16-NEXT: s_or_b32 s2, s2, s3
+; GFX1250-SDAG-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
+; GFX1250-SDAG-TRUE16-NEXT: v_mov_b32_e32 v0, s2
+; GFX1250-SDAG-TRUE16-NEXT: buffer_store_b16 v0, off, s[4:7], null
+; GFX1250-SDAG-TRUE16-NEXT: s_endpgm
+;
; GFX1250-SDAG-FAKE16-LABEL: fptrunc_f64_to_f16:
; GFX1250-SDAG-FAKE16: ; %bb.0: ; %entry
; GFX1250-SDAG-FAKE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
@@ -1227,6 +1360,63 @@ define amdgpu_kernel void @fptrunc_f64_to_f16(
; GFX1250-SDAG-FAKE16-NEXT: buffer_store_b16 v0, off, s[4:7], null
; GFX1250-SDAG-FAKE16-NEXT: s_endpgm
;
+; GFX1250-GISEL-TRUE16-LABEL: fptrunc_f64_to_f16:
+; GFX1250-GISEL-TRUE16: ; %bb.0: ; %entry
+; GFX1250-GISEL-TRUE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
+; GFX1250-GISEL-TRUE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250-GISEL-TRUE16-NEXT: s_load_b64 s[2:3], s[2:3], 0x0
+; GFX1250-GISEL-TRUE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250-GISEL-TRUE16-NEXT: s_and_b32 s6, s3, 0x1ff
+; GFX1250-GISEL-TRUE16-NEXT: s_bfe_u32 s4, s3, 0xb0014
+; GFX1250-GISEL-TRUE16-NEXT: s_lshr_b32 s5, s3, 8
+; GFX1250-GISEL-TRUE16-NEXT: s_or_b32 s2, s6, s2
+; GFX1250-GISEL-TRUE16-NEXT: s_addk_co_i32 s4, 0xfc10
+; GFX1250-GISEL-TRUE16-NEXT: s_and_b32 s5, s5, 0xffe
+; GFX1250-GISEL-TRUE16-NEXT: s_cmp_lg_u32 s2, 0
+; GFX1250-GISEL-TRUE16-NEXT: s_cselect_b32 s2, 1, 0
+; GFX1250-GISEL-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
+; GFX1250-GISEL-TRUE16-NEXT: s_or_b32 s2, s5, s2
+; GFX1250-GISEL-TRUE16-NEXT: s_cmp_lg_u32 s2, 0
+; GFX1250-GISEL-TRUE16-NEXT: s_cselect_b32 s5, 1, 0
+; GFX1250-GISEL-TRUE16-NEXT: s_sub_co_i32 s6, 1, s4
+; GFX1250-GISEL-TRUE16-NEXT: s_or_b32 s8, s2, 0x1000
+; GFX1250-GISEL-TRUE16-NEXT: s_max_i32 s6, s6, 0
+; GFX1250-GISEL-TRUE16-NEXT: s_lshl_b32 s7, s4, 12
+; GFX1250-GISEL-TRUE16-NEXT: s_min_i32 s6, s6, 13
+; GFX1250-GISEL-TRUE16-NEXT: s_lshl_b32 s5, s5, 9
+; GFX1250-GISEL-TRUE16-NEXT: s_lshr_b32 s9, s8, s6
+; GFX1250-GISEL-TRUE16-NEXT: s_or_b32 s2, s2, s7
+; GFX1250-GISEL-TRUE16-NEXT: s_lshl_b32 s6, s9, s6
+; GFX1250-GISEL-TRUE16-NEXT: s_or_b32 s5, s5, 0x7c00
+; GFX1250-GISEL-TRUE16-NEXT: s_cmp_lg_u32 s6, s8
+; GFX1250-GISEL-TRUE16-NEXT: s_cselect_b32 s6, 1, 0
+; GFX1250-GISEL-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_2) | instid1(SALU_CYCLE_1)
+; GFX1250-GISEL-TRUE16-NEXT: s_or_b32 s6, s9, s6
+; GFX1250-GISEL-TRUE16-NEXT: s_cmp_lt_i32 s4, 1
+; GFX1250-GISEL-TRUE16-NEXT: s_cselect_b32 s2, s6, s2
+; GFX1250-GISEL-TRUE16-NEXT: s_and_b32 s6, s2, 7
+; GFX1250-GISEL-TRUE16-NEXT: s_lshr_b32 s2, s2, 2
+; GFX1250-GISEL-TRUE16-NEXT: s_cmp_eq_u32 s6, 3
+; GFX1250-GISEL-TRUE16-NEXT: s_cselect_b32 s7, 1, 0
+; GFX1250-GISEL-TRUE16-NEXT: s_cmp_gt_i32 s6, 5
+; GFX1250-GISEL-TRUE16-NEXT: s_cselect_b32 s6, 1, 0
+; GFX1250-GISEL-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
+; GFX1250-GISEL-TRUE16-NEXT: s_or_b32 s6, s7, s6
+; GFX1250-GISEL-TRUE16-NEXT: s_add_co_i32 s2, s2, s6
+; GFX1250-GISEL-TRUE16-NEXT: s_cmp_gt_i32 s4, 30
+; GFX1250-GISEL-TRUE16-NEXT: s_cselect_b32 s2, 0x7c00, s2
+; GFX1250-GISEL-TRUE16-NEXT: s_cmp_eq_u32 s4, 0x40f
+; GFX1250-GISEL-TRUE16-NEXT: s_cselect_b32 s2, s5, s2
+; GFX1250-GISEL-TRUE16-NEXT: s_lshr_b32 s3, s3, 16
+; GFX1250-GISEL-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
+; GFX1250-GISEL-TRUE16-NEXT: s_and_b32 s3, s3, 0x8000
+; GFX1250-GISEL-TRUE16-NEXT: s_or_b32 s2, s3, s2
+; GFX1250-GISEL-TRUE16-NEXT: s_mov_b32 s3, 0x31016000
+; GFX1250-GISEL-TRUE16-NEXT: v_mov_b32_e32 v0, s2
+; GFX1250-GISEL-TRUE16-NEXT: s_mov_b32 s2, -1
+; GFX1250-GISEL-TRUE16-NEXT: buffer_store_b16 v0, off, s[0:3], null
+; GFX1250-GISEL-TRUE16-NEXT: s_endpgm
+;
; GFX1250-GISEL-FAKE16-LABEL: fptrunc_f64_to_f16:
; GFX1250-GISEL-FAKE16: ; %bb.0: ; %entry
; GFX1250-GISEL-FAKE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
@@ -1489,6 +1679,26 @@ define amdgpu_kernel void @fptrunc_f64_to_f16_afn(
; GFX11-GISEL-FAKE16-NEXT: buffer_store_b16 v0, off, s[0:3], 0
; GFX11-GISEL-FAKE16-NEXT: s_endpgm
;
+; GFX1250-SDAG-TRUE16-LABEL: fptrunc_f64_to_f16_afn:
+; GFX1250-SDAG-TRUE16: ; %bb.0: ; %entry
+; GFX1250-SDAG-TRUE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
+; GFX1250-SDAG-TRUE16-NEXT: s_mov_b32 s6, -1
+; GFX1250-SDAG-TRUE16-NEXT: s_mov_b32 s7, 0x31016000
+; GFX1250-SDAG-TRUE16-NEXT: s_mov_b32 s10, s6
+; GFX1250-SDAG-TRUE16-NEXT: s_mov_b32 s11, s7
+; GFX1250-SDAG-TRUE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250-SDAG-TRUE16-NEXT: s_mov_b32 s8, s2
+; GFX1250-SDAG-TRUE16-NEXT: s_mov_b32 s9, s3
+; GFX1250-SDAG-TRUE16-NEXT: s_mov_b32 s4, s0
+; GFX1250-SDAG-TRUE16-NEXT: buffer_load_b64 v[0:1], off, s[8:11], null
+; GFX1250-SDAG-TRUE16-NEXT: s_mov_b32 s5, s1
+; GFX1250-SDAG-TRUE16-NEXT: s_wait_loadcnt 0x0
+; GFX1250-SDAG-TRUE16-NEXT: v_cvt_f32_f64_e32 v0, v[0:1]
+; GFX1250-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-SDAG-TRUE16-NEXT: v_cvt_f16_f32_e32 v0.l, v0
+; GFX1250-SDAG-TRUE16-NEXT: buffer_store_b16 v0, off, s[4:7], null
+; GFX1250-SDAG-TRUE16-NEXT: s_endpgm
+;
; GFX1250-SDAG-FAKE16-LABEL: fptrunc_f64_to_f16_afn:
; GFX1250-SDAG-FAKE16: ; %bb.0: ; %entry
; GFX1250-SDAG-FAKE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
@@ -1509,6 +1719,20 @@ define amdgpu_kernel void @fptrunc_f64_to_f16_afn(
; GFX1250-SDAG-FAKE16-NEXT: buffer_store_b16 v0, off, s[4:7], null
; GFX1250-SDAG-FAKE16-NEXT: s_endpgm
;
+; GFX1250-GISEL-TRUE16-LABEL: fptrunc_f64_to_f16_afn:
+; GFX1250-GISEL-TRUE16: ; %bb.0: ; %entry
+; GFX1250-GISEL-TRUE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
+; GFX1250-GISEL-TRUE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250-GISEL-TRUE16-NEXT: s_load_b64 s[2:3], s[2:3], 0x0
+; GFX1250-GISEL-TRUE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250-GISEL-TRUE16-NEXT: v_cvt_f32_f64_e32 v0, s[2:3]
+; GFX1250-GISEL-TRUE16-NEXT: s_mov_b32 s2, -1
+; GFX1250-GISEL-TRUE16-NEXT: s_mov_b32 s3, 0x31016000
+; GFX1250-GISEL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-GISEL-TRUE16-NEXT: v_cvt_f16_f32_e32 v0.l, v0
+; GFX1250-GISEL-TRUE16-NEXT: buffer_store_b16 v0, off, s[0:3], null
+; GFX1250-GISEL-TRUE16-NEXT: s_endpgm
+;
; GFX1250-GISEL-FAKE16-LABEL: fptrunc_f64_to_f16_afn:
; GFX1250-GISEL-FAKE16: ; %bb.0: ; %entry
; GFX1250-GISEL-FAKE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
@@ -1740,6 +1964,24 @@ define amdgpu_kernel void @fptrunc_v2f32_to_v2f16(
; GFX11-GISEL-FAKE16-NEXT: buffer_store_b32 v0, off, s[0:3], 0
; GFX11-GISEL-FAKE16-NEXT: s_endpgm
;
+; GFX1250-SDAG-TRUE16-LABEL: fptrunc_v2f32_to_v2f16:
+; GFX1250-SDAG-TRUE16: ; %bb.0: ; %entry
+; GFX1250-SDAG-TRUE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
+; GFX1250-SDAG-TRUE16-NEXT: s_mov_b32 s6, -1
+; GFX1250-SDAG-TRUE16-NEXT: s_mov_b32 s7, 0x31016000
+; GFX1250-SDAG-TRUE16-NEXT: s_mov_b32 s10, s6
+; GFX1250-SDAG-TRUE16-NEXT: s_mov_b32 s11, s7
+; GFX1250-SDAG-TRUE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250-SDAG-TRUE16-NEXT: s_mov_b32 s8, s2
+; GFX1250-SDAG-TRUE16-NEXT: s_mov_b32 s9, s3
+; GFX1250-SDAG-TRUE16-NEXT: s_mov_b32 s4, s0
+; GFX1250-SDAG-TRUE16-NEXT: buffer_load_b64 v[0:1], off, s[8:11], null
+; GFX1250-SDAG-TRUE16-NEXT: s_mov_b32 s5, s1
+; GFX1250-SDAG-TRUE16-NEXT: s_wait_loadcnt 0x0
+; GFX1250-SDAG-TRUE16-NEXT: v_cvt_pk_f16_f32 v0, v0, v1
+; GFX1250-SDAG-TRUE16-NEXT: buffer_store_b32 v0, off, s[4:7], null
+; GFX1250-SDAG-TRUE16-NEXT: s_endpgm
+;
; GFX1250-SDAG-FAKE16-LABEL: fptrunc_v2f32_to_v2f16:
; GFX1250-SDAG-FAKE16: ; %bb.0: ; %entry
; GFX1250-SDAG-FAKE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
@@ -1758,6 +2000,20 @@ define amdgpu_kernel void @fptrunc_v2f32_to_v2f16(
; GFX1250-SDAG-FAKE16-NEXT: buffer_store_b32 v0, off, s[4:7], null
; GFX1250-SDAG-FAKE16-NEXT: s_endpgm
;
+; GFX1250-GISEL-TRUE16-LABEL: fptrunc_v2f32_to_v2f16:
+; GFX1250-GISEL-TRUE16: ; %bb.0: ; %entry
+; GFX1250-GISEL-TRUE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
+; GFX1250-GISEL-TRUE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250-GISEL-TRUE16-NEXT: s_load_b64 s[2:3], s[2:3], 0x0
+; GFX1250-GISEL-TRUE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250-GISEL-TRUE16-NEXT: v_mov_b64_e32 v[0:1], s[2:3]
+; GFX1250-GISEL-TRUE16-NEXT: s_mov_b32 s2, -1
+; GFX1250-GISEL-TRUE16-NEXT: s_mov_b32 s3, 0x31016000
+; GFX1250-GISEL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-GISEL-TRUE16-NEXT: v_cvt_pk_f16_f32 v0, v0, v1
+; GFX1250-GISEL-TRUE16-NEXT: buffer_store_b32 v0, off, s[0:3], null
+; GFX1250-GISEL-TRUE16-NEXT: s_endpgm
+;
; GFX1250-GISEL-FAKE16-LABEL: fptrunc_v2f32_to_v2f16:
; GFX1250-GISEL-FAKE16: ; %bb.0: ; %entry
; GFX1250-GISEL-FAKE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
@@ -3017,6 +3273,122 @@ define amdgpu_kernel void @fptrunc_v2f64_to_v2f16(
; GFX11-GISEL-FAKE16-NEXT: buffer_store_b32 v0, off, s[0:3], 0
; GFX11-GISEL-FAKE16-NEXT: s_endpgm
;
+; GFX1250-SDAG-TRUE16-LABEL: fptrunc_v2f64_to_v2f16:
+; GFX1250-SDAG-TRUE16: ; %bb.0: ; %entry
+; GFX1250-SDAG-TRUE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
+; GFX1250-SDAG-TRUE16-NEXT: s_mov_b32 s6, -1
+; GFX1250-SDAG-TRUE16-NEXT: s_mov_b32 s7, 0x31016000
+; GFX1250-SDAG-TRUE16-NEXT: s_mov_b32 s10, s6
+; GFX1250-SDAG-TRUE16-NEXT: s_mov_b32 s11, s7
+; GFX1250-SDAG-TRUE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250-SDAG-TRUE16-NEXT: s_mov_b32 s8, s2
+; GFX1250-SDAG-TRUE16-NEXT: s_mov_b32 s9, s3
+; GFX1250-SDAG-TRUE16-NEXT: buffer_load_b128 v[0:3], off, s[8:11], null
+; GFX1250-SDAG-TRUE16-NEXT: s_wait_loadcnt 0x0
+; GFX1250-SDAG-TRUE16-NEXT: v_readfirstlane_b32 s2, v3
+; GFX1250-SDAG-TRUE16-NEXT: s_and_b32 s3, s2, 0x1ff
+; GFX1250-SDAG-TRUE16-NEXT: s_lshr_b32 s5, s2, 8
+; GFX1250-SDAG-TRUE16-NEXT: v_or_b32_e32 v2, s3, v2
+; GFX1250-SDAG-TRUE16-NEXT: s_bfe_u32 s3, s2, 0xb0014
+; GFX1250-SDAG-TRUE16-NEXT: s_and_b32 s5, s5, 0xffe
+; GFX1250-SDAG-TRUE16-NEXT: s_sub_co_i32 s4, 0x3f1, s3
+; GFX1250-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_2)
+; GFX1250-SDAG-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v2
+; GFX1250-SDAG-TRUE16-NEXT: v_med3_i32 v3, s4, 0, 13
+; GFX1250-SDAG-TRUE16-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc_lo
+; GFX1250-SDAG-TRUE16-NEXT: v_readfirstlane_b32 s8, v3
+; GFX1250-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
+; GFX1250-SDAG-TRUE16-NEXT: v_readfirstlane_b32 s4, v2
+; GFX1250-SDAG-TRUE16-NEXT: s_or_b32 s4, s5, s4
+; GFX1250-SDAG-TRUE16-NEXT: s_or_b32 s5, s4, 0x1000
+; GFX1250-SDAG-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
+; GFX1250-SDAG-TRUE16-NEXT: s_lshr_b32 s9, s5, s8
+; GFX1250-SDAG-TRUE16-NEXT: s_lshl_b32 s8, s9, s8
+; GFX1250-SDAG-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_4) | instid1(SALU_CYCLE_1)
+; GFX1250-SDAG-TRUE16-NEXT: s_cmp_lg_u32 s8, s5
+; GFX1250-SDAG-TRUE16-NEXT: s_cselect_b32 s5, 1, 0
+; GFX1250-SDAG-TRUE16-NEXT: s_addk_co_i32 s3, 0xfc10
+; GFX1250-SDAG-TRUE16-NEXT: s_or_b32 s5, s9, s5
+; GFX1250-SDAG-TRUE16-NEXT: s_lshl_b32 s8, s3, 12
+; GFX1250-SDAG-TRUE16-NEXT: s_or_b32 s8, s4, s8
+; GFX1250-SDAG-TRUE16-NEXT: s_cmp_lt_i32 s3, 1
+; GFX1250-SDAG-TRUE16-NEXT: s_cselect_b32 s5, s5, s8
+; GFX1250-SDAG-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
+; GFX1250-SDAG-TRUE16-NEXT: s_and_b32 s8, s5, 7
+; GFX1250-SDAG-TRUE16-NEXT: s_cmp_gt_i32 s8, 5
+; GFX1250-SDAG-TRUE16-NEXT: s_cselect_b32 s9, 1, 0
+; GFX1250-SDAG-TRUE16-NEXT: s_cmp_eq_u32 s8, 3
+; GFX1250-SDAG-TRUE16-NEXT: s_cselect_b32 s8, 1, 0
+; GFX1250-SDAG-TRUE16-NEXT: s_lshr_b32 s5, s5, 2
+; GFX1250-SDAG-TRUE16-NEXT: s_or_b32 s8, s8, s9
+; GFX1250-SDAG-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
+; GFX1250-SDAG-TRUE16-NEXT: s_add_co_i32 s5, s5, s8
+; GFX1250-SDAG-TRUE16-NEXT: s_cmp_lt_i32 s3, 31
+; GFX1250-SDAG-TRUE16-NEXT: s_movk_i32 s8, 0x7e00
+; GFX1250-SDAG-TRUE16-NEXT: s_cselect_b32 s5, s5, 0x7c00
+; GFX1250-SDAG-TRUE16-NEXT: s_cmp_lg_u32 s4, 0
+; GFX1250-SDAG-TRUE16-NEXT: v_readfirstlane_b32 s4, v1
+; GFX1250-SDAG-TRUE16-NEXT: s_cselect_b32 s9, s8, 0x7c00
+; GFX1250-SDAG-TRUE16-NEXT: s_cmp_eq_u32 s3, 0x40f
+; GFX1250-SDAG-TRUE16-NEXT: s_cselect_b32 s3, s9, s5
+; GFX1250-SDAG-TRUE16-NEXT: s_and_b32 s5, s4, 0x1ff
+; GFX1250-SDAG-TRUE16-NEXT: s_lshr_b32 s10, s4, 8
+; GFX1250-SDAG-TRUE16-NEXT: v_or_b32_e32 v0, s5, v0
+; GFX1250-SDAG-TRUE16-NEXT: s_bfe_u32 s5, s4, 0xb0014
+; GFX1250-SDAG-TRUE16-NEXT: s_and_b32 s10, s10, 0xffe
+; GFX1250-SDAG-TRUE16-NEXT: s_sub_co_i32 s9, 0x3f1, s5
+; GFX1250-SDAG-TRUE16-NEXT: s_lshr_b32 s2, s2, 16
+; GFX1250-SDAG-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0
+; GFX1250-SDAG-TRUE16-NEXT: v_med3_i32 v1, s9, 0, 13
+; GFX1250-SDAG-TRUE16-NEXT: s_and_b32 s2, s2, 0x8000
+; GFX1250-SDAG-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX1250-SDAG-TRUE16-NEXT: s_or_b32 s2, s2, s3
+; GFX1250-SDAG-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
+; GFX1250-SDAG-TRUE16-NEXT: v_readfirstlane_b32 s11, v1
+; GFX1250-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
+; GFX1250-SDAG-TRUE16-NEXT: v_readfirstlane_b32 s9, v0
+; GFX1250-SDAG-TRUE16-NEXT: s_or_b32 s9, s10, s9
+; GFX1250-SDAG-TRUE16-NEXT: s_or_b32 s10, s9, 0x1000
+; GFX1250-SDAG-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
+; GFX1250-SDAG-TRUE16-NEXT: s_lshr_b32 s12, s10, s11
+; GFX1250-SDAG-TRUE16-NEXT: s_lshl_b32 s11, s12, s11
+; GFX1250-SDAG-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_4) | instid1(SALU_CYCLE_1)
+; GFX1250-SDAG-TRUE16-NEXT: s_cmp_lg_u32 s11, s10
+; GFX1250-SDAG-TRUE16-NEXT: s_cselect_b32 s3, 1, 0
+; GFX1250-SDAG-TRUE16-NEXT: s_addk_co_i32 s5, 0xfc10
+; GFX1250-SDAG-TRUE16-NEXT: s_or_b32 s3, s12, s3
+; GFX1250-SDAG-TRUE16-NEXT: s_lshl_b32 s10, s5, 12
+; GFX1250-SDAG-TRUE16-NEXT: s_or_b32 s10, s9, s10
+; GFX1250-SDAG-TRUE16-NEXT: s_cmp_lt_i32 s5, 1
+; GFX1250-SDAG-TRUE16-NEXT: s_cselect_b32 s3, s3, s10
+; GFX1250-SDAG-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
+; GFX1250-SDAG-TRUE16-NEXT: s_and_b32 s10, s3, 7
+; GFX1250-SDAG-TRUE16-NEXT: s_cmp_gt_i32 s10, 5
+; GFX1250-SDAG-TRUE16-NEXT: s_cselect_b32 s11, 1, 0
+; GFX1250-SDAG-TRUE16-NEXT: s_cmp_eq_u32 s10, 3
+; GFX1250-SDAG-TRUE16-NEXT: s_cselect_b32 s10, 1, 0
+; GFX1250-SDAG-TRUE16-NEXT: s_lshr_b32 s3, s3, 2
+; GFX1250-SDAG-TRUE16-NEXT: s_or_b32 s10, s10, s11
+; GFX1250-SDAG-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
+; GFX1250-SDAG-TRUE16-NEXT: s_add_co_i32 s3, s3, s10
+; GFX1250-SDAG-TRUE16-NEXT: s_cmp_lt_i32 s5, 31
+; GFX1250-SDAG-TRUE16-NEXT: s_cselect_b32 s3, s3, 0x7c00
+; GFX1250-SDAG-TRUE16-NEXT: s_cmp_lg_u32 s9, 0
+; GFX1250-SDAG-TRUE16-NEXT: s_cselect_b32 s8, s8, 0x7c00
+; GFX1250-SDAG-TRUE16-NEXT: s_cmp_eq_u32 s5, 0x40f
+; GFX1250-SDAG-TRUE16-NEXT: s_mov_b32 s5, s1
+; GFX1250-SDAG-TRUE16-NEXT: s_cselect_b32 s3, s8, s3
+; GFX1250-SDAG-TRUE16-NEXT: s_lshr_b32 s4, s4, 16
+; GFX1250-SDAG-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
+; GFX1250-SDAG-TRUE16-NEXT: s_and_b32 s4, s4, 0x8000
+; GFX1250-SDAG-TRUE16-NEXT: s_or_b32 s3, s4, s3
+; GFX1250-SDAG-TRUE16-NEXT: s_mov_b32 s4, s0
+; GFX1250-SDAG-TRUE16-NEXT: s_pack_ll_b32_b16 s2, s3, s2
+; GFX1250-SDAG-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
+; GFX1250-SDAG-TRUE16-NEXT: v_mov_b32_e32 v0, s2
+; GFX1250-SDAG-TRUE16-NEXT: buffer_store_b32 v0, off, s[4:7], null
+; GFX1250-SDAG-TRUE16-NEXT: s_endpgm
+;
; GFX1250-SDAG-FAKE16-LABEL: fptrunc_v2f64_to_v2f16:
; GFX1250-SDAG-FAKE16: ; %bb.0: ; %entry
; GFX1250-SDAG-FAKE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
@@ -3133,6 +3505,109 @@ define amdgpu_kernel void @fptrunc_v2f64_to_v2f16(
; GFX1250-SDAG-FAKE16-NEXT: buffer_store_b32 v0, off, s[4:7], null
; GFX1250-SDAG-FAKE16-NEXT: s_endpgm
;
+; GFX1250-GISEL-TRUE16-LABEL: fptrunc_v2f64_to_v2f16:
+; GFX1250-GISEL-TRUE16: ; %bb.0: ; %entry
+; GFX1250-GISEL-TRUE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
+; GFX1250-GISEL-TRUE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250-GISEL-TRUE16-NEXT: s_load_b128 s[4:7], s[2:3], 0x0
+; GFX1250-GISEL-TRUE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250-GISEL-TRUE16-NEXT: s_and_b32 s8, s5, 0x1ff
+; GFX1250-GISEL-TRUE16-NEXT: s_bfe_u32 s2, s5, 0xb0014
+; GFX1250-GISEL-TRUE16-NEXT: s_lshr_b32 s3, s5, 8
+; GFX1250-GISEL-TRUE16-NEXT: s_or_b32 s4, s8, s4
+; GFX1250-GISEL-TRUE16-NEXT: s_addk_co_i32 s2, 0xfc10
+; GFX1250-GISEL-TRUE16-NEXT: s_and_b32 s3, s3, 0xffe
+; GFX1250-GISEL-TRUE16-NEXT: s_cmp_lg_u32 s4, 0
+; GFX1250-GISEL-TRUE16-NEXT: s_cselect_b32 s4, 1, 0
+; GFX1250-GISEL-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
+; GFX1250-GISEL-TRUE16-NEXT: s_or_b32 s3, s3, s4
+; GFX1250-GISEL-TRUE16-NEXT: s_cmp_lg_u32 s3, 0
+; GFX1250-GISEL-TRUE16-NEXT: s_cselect_b32 s4, 1, 0
+; GFX1250-GISEL-TRUE16-NEXT: s_sub_co_i32 s8, 1, s2
+; GFX1250-GISEL-TRUE16-NEXT: s_or_b32 s10, s3, 0x1000
+; GFX1250-GISEL-TRUE16-NEXT: s_max_i32 s8, s8, 0
+; GFX1250-GISEL-TRUE16-NEXT: s_lshl_b32 s9, s2, 12
+; GFX1250-GISEL-TRUE16-NEXT: s_min_i32 s8, s8, 13
+; GFX1250-GISEL-TRUE16-NEXT: s_lshl_b32 s4, s4, 9
+; GFX1250-GISEL-TRUE16-NEXT: s_lshr_b32 s11, s10, s8
+; GFX1250-GISEL-TRUE16-NEXT: s_or_b32 s3, s3, s9
+; GFX1250-GISEL-TRUE16-NEXT: s_lshl_b32 s8, s11, s8
+; GFX1250-GISEL-TRUE16-NEXT: s_or_b32 s4, s4, 0x7c00
+; GFX1250-GISEL-TRUE16-NEXT: s_cmp_lg_u32 s8, s10
+; GFX1250-GISEL-TRUE16-NEXT: s_cselect_b32 s8, 1, 0
+; GFX1250-GISEL-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_2) | instid1(SALU_CYCLE_1)
+; GFX1250-GISEL-TRUE16-NEXT: s_or_b32 s8, s11, s8
+; GFX1250-GISEL-TRUE16-NEXT: s_cmp_lt_i32 s2, 1
+; GFX1250-GISEL-TRUE16-NEXT: s_cselect_b32 s3, s8, s3
+; GFX1250-GISEL-TRUE16-NEXT: s_and_b32 s8, s3, 7
+; GFX1250-GISEL-TRUE16-NEXT: s_lshr_b32 s3, s3, 2
+; GFX1250-GISEL-TRUE16-NEXT: s_cmp_eq_u32 s8, 3
+; GFX1250-GISEL-TRUE16-NEXT: s_cselect_b32 s9, 1, 0
+; GFX1250-GISEL-TRUE16-NEXT: s_cmp_gt_i32 s8, 5
+; GFX1250-GISEL-TRUE16-NEXT: s_cselect_b32 s8, 1, 0
+; GFX1250-GISEL-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
+; GFX1250-GISEL-TRUE16-NEXT: s_or_b32 s8, s9, s8
+; GFX1250-GISEL-TRUE16-NEXT: s_add_co_i32 s3, s3, s8
+; GFX1250-GISEL-TRUE16-NEXT: s_cmp_gt_i32 s2, 30
+; GFX1250-GISEL-TRUE16-NEXT: s_cselect_b32 s3, 0x7c00, s3
+; GFX1250-GISEL-TRUE16-NEXT: s_cmp_eq_u32 s2, 0x40f
+; GFX1250-GISEL-TRUE16-NEXT: s_cselect_b32 s2, s4, s3
+; GFX1250-GISEL-TRUE16-NEXT: s_lshr_b32 s3, s5, 16
+; GFX1250-GISEL-TRUE16-NEXT: s_and_b32 s8, s7, 0x1ff
+; GFX1250-GISEL-TRUE16-NEXT: s_bfe_u32 s4, s7, 0xb0014
+; GFX1250-GISEL-TRUE16-NEXT: s_lshr_b32 s5, s7, 8
+; GFX1250-GISEL-TRUE16-NEXT: s_and_b32 s3, s3, 0x8000
+; GFX1250-GISEL-TRUE16-NEXT: s_or_b32 s6, s8, s6
+; GFX1250-GISEL-TRUE16-NEXT: s_addk_co_i32 s4, 0xfc10
+; GFX1250-GISEL-TRUE16-NEXT: s_and_b32 s5, s5, 0xffe
+; GFX1250-GISEL-TRUE16-NEXT: s_or_b32 s2, s3, s2
+; GFX1250-GISEL-TRUE16-NEXT: s_cmp_lg_u32 s6, 0
+; GFX1250-GISEL-TRUE16-NEXT: s_cselect_b32 s3, 1, 0
+; GFX1250-GISEL-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
+; GFX1250-GISEL-TRUE16-NEXT: s_or_b32 s3, s5, s3
+; GFX1250-GISEL-TRUE16-NEXT: s_cmp_lg_u32 s3, 0
+; GFX1250-GISEL-TRUE16-NEXT: s_cselect_b32 s5, 1, 0
+; GFX1250-GISEL-TRUE16-NEXT: s_sub_co_i32 s6, 1, s4
+; GFX1250-GISEL-TRUE16-NEXT: s_or_b32 s9, s3, 0x1000
+; GFX1250-GISEL-TRUE16-NEXT: s_max_i32 s6, s6, 0
+; GFX1250-GISEL-TRUE16-NEXT: s_lshl_b32 s8, s4, 12
+; GFX1250-GISEL-TRUE16-NEXT: s_min_i32 s6, s6, 13
+; GFX1250-GISEL-TRUE16-NEXT: s_lshl_b32 s5, s5, 9
+; GFX1250-GISEL-TRUE16-NEXT: s_lshr_b32 s10, s9, s6
+; GFX1250-GISEL-TRUE16-NEXT: s_or_b32 s3, s3, s8
+; GFX1250-GISEL-TRUE16-NEXT: s_lshl_b32 s6, s10, s6
+; GFX1250-GISEL-TRUE16-NEXT: s_or_b32 s5, s5, 0x7c00
+; GFX1250-GISEL-TRUE16-NEXT: s_cmp_lg_u32 s6, s9
+; GFX1250-GISEL-TRUE16-NEXT: s_cselect_b32 s6, 1, 0
+; GFX1250-GISEL-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_2) | instid1(SALU_CYCLE_1)
+; GFX1250-GISEL-TRUE16-NEXT: s_or_b32 s6, s10, s6
+; GFX1250-GISEL-TRUE16-NEXT: s_cmp_lt_i32 s4, 1
+; GFX1250-GISEL-TRUE16-NEXT: s_cselect_b32 s3, s6, s3
+; GFX1250-GISEL-TRUE16-NEXT: s_and_b32 s6, s3, 7
+; GFX1250-GISEL-TRUE16-NEXT: s_lshr_b32 s3, s3, 2
+; GFX1250-GISEL-TRUE16-NEXT: s_cmp_eq_u32 s6, 3
+; GFX1250-GISEL-TRUE16-NEXT: s_cselect_b32 s8, 1, 0
+; GFX1250-GISEL-TRUE16-NEXT: s_cmp_gt_i32 s6, 5
+; GFX1250-GISEL-TRUE16-NEXT: s_cselect_b32 s6, 1, 0
+; GFX1250-GISEL-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
+; GFX1250-GISEL-TRUE16-NEXT: s_or_b32 s6, s8, s6
+; GFX1250-GISEL-TRUE16-NEXT: s_add_co_i32 s3, s3, s6
+; GFX1250-GISEL-TRUE16-NEXT: s_cmp_gt_i32 s4, 30
+; GFX1250-GISEL-TRUE16-NEXT: s_cselect_b32 s3, 0x7c00, s3
+; GFX1250-GISEL-TRUE16-NEXT: s_cmp_eq_u32 s4, 0x40f
+; GFX1250-GISEL-TRUE16-NEXT: s_cselect_b32 s3, s5, s3
+; GFX1250-GISEL-TRUE16-NEXT: s_lshr_b32 s4, s7, 16
+; GFX1250-GISEL-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
+; GFX1250-GISEL-TRUE16-NEXT: s_and_b32 s4, s4, 0x8000
+; GFX1250-GISEL-TRUE16-NEXT: s_or_b32 s3, s4, s3
+; GFX1250-GISEL-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
+; GFX1250-GISEL-TRUE16-NEXT: s_pack_ll_b32_b16 s2, s2, s3
+; GFX1250-GISEL-TRUE16-NEXT: s_mov_b32 s3, 0x31016000
+; GFX1250-GISEL-TRUE16-NEXT: v_mov_b32_e32 v0, s2
+; GFX1250-GISEL-TRUE16-NEXT: s_mov_b32 s2, -1
+; GFX1250-GISEL-TRUE16-NEXT: buffer_store_b32 v0, off, s[0:3], null
+; GFX1250-GISEL-TRUE16-NEXT: s_endpgm
+;
; GFX1250-GISEL-FAKE16-LABEL: fptrunc_v2f64_to_v2f16:
; GFX1250-GISEL-FAKE16: ; %bb.0: ; %entry
; GFX1250-GISEL-FAKE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
@@ -3481,6 +3956,27 @@ define amdgpu_kernel void @fptrunc_v2f64_to_v2f16_afn(
; GFX11-GISEL-FAKE16-NEXT: buffer_store_b32 v0, off, s[0:3], 0
; GFX11-GISEL-FAKE16-NEXT: s_endpgm
;
+; GFX1250-SDAG-TRUE16-LABEL: fptrunc_v2f64_to_v2f16_afn:
+; GFX1250-SDAG-TRUE16: ; %bb.0: ; %entry
+; GFX1250-SDAG-TRUE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
+; GFX1250-SDAG-TRUE16-NEXT: s_mov_b32 s6, -1
+; GFX1250-SDAG-TRUE16-NEXT: s_mov_b32 s7, 0x31016000
+; GFX1250-SDAG-TRUE16-NEXT: s_mov_b32 s10, s6
+; GFX1250-SDAG-TRUE16-NEXT: s_mov_b32 s11, s7
+; GFX1250-SDAG-TRUE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250-SDAG-TRUE16-NEXT: s_mov_b32 s8, s2
+; GFX1250-SDAG-TRUE16-NEXT: s_mov_b32 s9, s3
+; GFX1250-SDAG-TRUE16-NEXT: s_mov_b32 s4, s0
+; GFX1250-SDAG-TRUE16-NEXT: buffer_load_b128 v[0:3], off, s[8:11], null
+; GFX1250-SDAG-TRUE16-NEXT: s_mov_b32 s5, s1
+; GFX1250-SDAG-TRUE16-NEXT: s_wait_loadcnt 0x0
+; GFX1250-SDAG-TRUE16-NEXT: v_cvt_f32_f64_e32 v2, v[2:3]
+; GFX1250-SDAG-TRUE16-NEXT: v_cvt_f32_f64_e32 v0, v[0:1]
+; GFX1250-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-SDAG-TRUE16-NEXT: v_cvt_pk_f16_f32 v0, v0, v2
+; GFX1250-SDAG-TRUE16-NEXT: buffer_store_b32 v0, off, s[4:7], null
+; GFX1250-SDAG-TRUE16-NEXT: s_endpgm
+;
; GFX1250-SDAG-FAKE16-LABEL: fptrunc_v2f64_to_v2f16_afn:
; GFX1250-SDAG-FAKE16: ; %bb.0: ; %entry
; GFX1250-SDAG-FAKE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
@@ -3502,6 +3998,25 @@ define amdgpu_kernel void @fptrunc_v2f64_to_v2f16_afn(
; GFX1250-SDAG-FAKE16-NEXT: buffer_store_b32 v0, off, s[4:7], null
; GFX1250-SDAG-FAKE16-NEXT: s_endpgm
;
+; GFX1250-GISEL-TRUE16-LABEL: fptrunc_v2f64_to_v2f16_afn:
+; GFX1250-GISEL-TRUE16: ; %bb.0: ; %entry
+; GFX1250-GISEL-TRUE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
+; GFX1250-GISEL-TRUE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250-GISEL-TRUE16-NEXT: s_load_b128 s[4:7], s[2:3], 0x0
+; GFX1250-GISEL-TRUE16-NEXT: s_wait_xcnt 0x0
+; GFX1250-GISEL-TRUE16-NEXT: s_mov_b32 s2, -1
+; GFX1250-GISEL-TRUE16-NEXT: s_mov_b32 s3, 0x31016000
+; GFX1250-GISEL-TRUE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250-GISEL-TRUE16-NEXT: v_cvt_f32_f64_e32 v0, s[4:5]
+; GFX1250-GISEL-TRUE16-NEXT: v_cvt_f32_f64_e32 v1, s[6:7]
+; GFX1250-GISEL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX1250-GISEL-TRUE16-NEXT: v_cvt_f16_f32_e32 v0.l, v0
+; GFX1250-GISEL-TRUE16-NEXT: v_cvt_f16_f32_e32 v0.h, v1
+; GFX1250-GISEL-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-GISEL-TRUE16-NEXT: v_pack_b32_f16 v0, v0.l, v0.h
+; GFX1250-GISEL-TRUE16-NEXT: buffer_store_b32 v0, off, s[0:3], null
+; GFX1250-GISEL-TRUE16-NEXT: s_endpgm
+;
; GFX1250-GISEL-FAKE16-LABEL: fptrunc_v2f64_to_v2f16_afn:
; GFX1250-GISEL-FAKE16: ; %bb.0: ; %entry
; GFX1250-GISEL-FAKE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
@@ -3710,6 +4225,26 @@ define amdgpu_kernel void @fneg_fptrunc_f32_to_f16(
; GFX11-GISEL-FAKE16-NEXT: buffer_store_b16 v0, off, s[0:3], 0
; GFX11-GISEL-FAKE16-NEXT: s_endpgm
;
+; GFX1250-SDAG-TRUE16-LABEL: fneg_fptrunc_f32_to_f16:
+; GFX1250-SDAG-TRUE16: ; %bb.0: ; %entry
+; GFX1250-SDAG-TRUE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
+; GFX1250-SDAG-TRUE16-NEXT: s_mov_b32 s6, -1
+; GFX1250-SDAG-TRUE16-NEXT: s_mov_b32 s7, 0x31016000
+; GFX1250-SDAG-TRUE16-NEXT: s_mov_b32 s10, s6
+; GFX1250-SDAG-TRUE16-NEXT: s_mov_b32 s11, s7
+; GFX1250-SDAG-TRUE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250-SDAG-TRUE16-NEXT: s_mov_b32 s8, s2
+; GFX1250-SDAG-TRUE16-NEXT: s_mov_b32 s9, s3
+; GFX1250-SDAG-TRUE16-NEXT: s_mov_b32 s4, s0
+; GFX1250-SDAG-TRUE16-NEXT: buffer_load_b32 v0, off, s[8:11], null
+; GFX1250-SDAG-TRUE16-NEXT: s_mov_b32 s5, s1
+; GFX1250-SDAG-TRUE16-NEXT: s_wait_loadcnt 0x0
+; GFX1250-SDAG-TRUE16-NEXT: v_xor_b32_e32 v0, 0x80000000, v0
+; GFX1250-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-SDAG-TRUE16-NEXT: v_cvt_f16_f32_e32 v0.l, v0
+; GFX1250-SDAG-TRUE16-NEXT: buffer_store_b16 v0, off, s[4:7], null
+; GFX1250-SDAG-TRUE16-NEXT: s_endpgm
+;
; GFX1250-SDAG-FAKE16-LABEL: fneg_fptrunc_f32_to_f16:
; GFX1250-SDAG-FAKE16: ; %bb.0: ; %entry
; GFX1250-SDAG-FAKE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
@@ -3730,6 +4265,22 @@ define amdgpu_kernel void @fneg_fptrunc_f32_to_f16(
; GFX1250-SDAG-FAKE16-NEXT: buffer_store_b16 v0, off, s[4:7], null
; GFX1250-SDAG-FAKE16-NEXT: s_endpgm
;
+; GFX1250-GISEL-TRUE16-LABEL: fneg_fptrunc_f32_to_f16:
+; GFX1250-GISEL-TRUE16: ; %bb.0: ; %entry
+; GFX1250-GISEL-TRUE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
+; GFX1250-GISEL-TRUE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250-GISEL-TRUE16-NEXT: s_load_b32 s2, s[2:3], 0x0
+; GFX1250-GISEL-TRUE16-NEXT: s_wait_xcnt 0x0
+; GFX1250-GISEL-TRUE16-NEXT: s_mov_b32 s3, 0x31016000
+; GFX1250-GISEL-TRUE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250-GISEL-TRUE16-NEXT: s_xor_b32 s2, s2, 0x80000000
+; GFX1250-GISEL-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_3)
+; GFX1250-GISEL-TRUE16-NEXT: s_cvt_f16_f32 s2, s2
+; GFX1250-GISEL-TRUE16-NEXT: v_mov_b32_e32 v0, s2
+; GFX1250-GISEL-TRUE16-NEXT: s_mov_b32 s2, -1
+; GFX1250-GISEL-TRUE16-NEXT: buffer_store_b16 v0, off, s[0:3], null
+; GFX1250-GISEL-TRUE16-NEXT: s_endpgm
+;
; GFX1250-GISEL-FAKE16-LABEL: fneg_fptrunc_f32_to_f16:
; GFX1250-GISEL-FAKE16: ; %bb.0: ; %entry
; GFX1250-GISEL-FAKE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
@@ -3936,6 +4487,26 @@ define amdgpu_kernel void @fabs_fptrunc_f32_to_f16(
; GFX11-GISEL-FAKE16-NEXT: buffer_store_b16 v0, off, s[0:3], 0
; GFX11-GISEL-FAKE16-NEXT: s_endpgm
;
+; GFX1250-SDAG-TRUE16-LABEL: fabs_fptrunc_f32_to_f16:
+; GFX1250-SDAG-TRUE16: ; %bb.0: ; %entry
+; GFX1250-SDAG-TRUE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
+; GFX1250-SDAG-TRUE16-NEXT: s_mov_b32 s6, -1
+; GFX1250-SDAG-TRUE16-NEXT: s_mov_b32 s7, 0x31016000
+; GFX1250-SDAG-TRUE16-NEXT: s_mov_b32 s10, s6
+; GFX1250-SDAG-TRUE16-NEXT: s_mov_b32 s11, s7
+; GFX1250-SDAG-TRUE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250-SDAG-TRUE16-NEXT: s_mov_b32 s8, s2
+; GFX1250-SDAG-TRUE16-NEXT: s_mov_b32 s9, s3
+; GFX1250-SDAG-TRUE16-NEXT: s_mov_b32 s4, s0
+; GFX1250-SDAG-TRUE16-NEXT: buffer_load_b32 v0, off, s[8:11], null
+; GFX1250-SDAG-TRUE16-NEXT: s_mov_b32 s5, s1
+; GFX1250-SDAG-TRUE16-NEXT: s_wait_loadcnt 0x0
+; GFX1250-SDAG-TRUE16-NEXT: v_and_b32_e32 v0, 0x7fffffff, v0
+; GFX1250-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-SDAG-TRUE16-NEXT: v_cvt_f16_f32_e32 v0.l, v0
+; GFX1250-SDAG-TRUE16-NEXT: buffer_store_b16 v0, off, s[4:7], null
+; GFX1250-SDAG-TRUE16-NEXT: s_endpgm
+;
; GFX1250-SDAG-FAKE16-LABEL: fabs_fptrunc_f32_to_f16:
; GFX1250-SDAG-FAKE16: ; %bb.0: ; %entry
; GFX1250-SDAG-FAKE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
@@ -3956,6 +4527,22 @@ define amdgpu_kernel void @fabs_fptrunc_f32_to_f16(
; GFX1250-SDAG-FAKE16-NEXT: buffer_store_b16 v0, off, s[4:7], null
; GFX1250-SDAG-FAKE16-NEXT: s_endpgm
;
+; GFX1250-GISEL-TRUE16-LABEL: fabs_fptrunc_f32_to_f16:
+; GFX1250-GISEL-TRUE16: ; %bb.0: ; %entry
+; GFX1250-GISEL-TRUE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
+; GFX1250-GISEL-TRUE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250-GISEL-TRUE16-NEXT: s_load_b32 s2, s[2:3], 0x0
+; GFX1250-GISEL-TRUE16-NEXT: s_wait_xcnt 0x0
+; GFX1250-GISEL-TRUE16-NEXT: s_mov_b32 s3, 0x31016000
+; GFX1250-GISEL-TRUE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250-GISEL-TRUE16-NEXT: s_bitset0_b32 s2, 31
+; GFX1250-GISEL-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_3)
+; GFX1250-GISEL-TRUE16-NEXT: s_cvt_f16_f32 s2, s2
+; GFX1250-GISEL-TRUE16-NEXT: v_mov_b32_e32 v0, s2
+; GFX1250-GISEL-TRUE16-NEXT: s_mov_b32 s2, -1
+; GFX1250-GISEL-TRUE16-NEXT: buffer_store_b16 v0, off, s[0:3], null
+; GFX1250-GISEL-TRUE16-NEXT: s_endpgm
+;
; GFX1250-GISEL-FAKE16-LABEL: fabs_fptrunc_f32_to_f16:
; GFX1250-GISEL-FAKE16: ; %bb.0: ; %entry
; GFX1250-GISEL-FAKE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
@@ -4162,6 +4749,26 @@ define amdgpu_kernel void @fneg_fabs_fptrunc_f32_to_f16(
; GFX11-GISEL-FAKE16-NEXT: buffer_store_b16 v0, off, s[0:3], 0
; GFX11-GISEL-FAKE16-NEXT: s_endpgm
;
+; GFX1250-SDAG-TRUE16-LABEL: fneg_fabs_fptrunc_f32_to_f16:
+; GFX1250-SDAG-TRUE16: ; %bb.0: ; %entry
+; GFX1250-SDAG-TRUE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
+; GFX1250-SDAG-TRUE16-NEXT: s_mov_b32 s6, -1
+; GFX1250-SDAG-TRUE16-NEXT: s_mov_b32 s7, 0x31016000
+; GFX1250-SDAG-TRUE16-NEXT: s_mov_b32 s10, s6
+; GFX1250-SDAG-TRUE16-NEXT: s_mov_b32 s11, s7
+; GFX1250-SDAG-TRUE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250-SDAG-TRUE16-NEXT: s_mov_b32 s8, s2
+; GFX1250-SDAG-TRUE16-NEXT: s_mov_b32 s9, s3
+; GFX1250-SDAG-TRUE16-NEXT: s_mov_b32 s4, s0
+; GFX1250-SDAG-TRUE16-NEXT: buffer_load_b32 v0, off, s[8:11], null
+; GFX1250-SDAG-TRUE16-NEXT: s_mov_b32 s5, s1
+; GFX1250-SDAG-TRUE16-NEXT: s_wait_loadcnt 0x0
+; GFX1250-SDAG-TRUE16-NEXT: v_or_b32_e32 v0, 0x80000000, v0
+; GFX1250-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-SDAG-TRUE16-NEXT: v_cvt_f16_f32_e32 v0.l, v0
+; GFX1250-SDAG-TRUE16-NEXT: buffer_store_b16 v0, off, s[4:7], null
+; GFX1250-SDAG-TRUE16-NEXT: s_endpgm
+;
; GFX1250-SDAG-FAKE16-LABEL: fneg_fabs_fptrunc_f32_to_f16:
; GFX1250-SDAG-FAKE16: ; %bb.0: ; %entry
; GFX1250-SDAG-FAKE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
@@ -4182,6 +4789,22 @@ define amdgpu_kernel void @fneg_fabs_fptrunc_f32_to_f16(
; GFX1250-SDAG-FAKE16-NEXT: buffer_store_b16 v0, off, s[4:7], null
; GFX1250-SDAG-FAKE16-NEXT: s_endpgm
;
+; GFX1250-GISEL-TRUE16-LABEL: fneg_fabs_fptrunc_f32_to_f16:
+; GFX1250-GISEL-TRUE16: ; %bb.0: ; %entry
+; GFX1250-GISEL-TRUE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
+; GFX1250-GISEL-TRUE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250-GISEL-TRUE16-NEXT: s_load_b32 s2, s[2:3], 0x0
+; GFX1250-GISEL-TRUE16-NEXT: s_wait_xcnt 0x0
+; GFX1250-GISEL-TRUE16-NEXT: s_mov_b32 s3, 0x31016000
+; GFX1250-GISEL-TRUE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250-GISEL-TRUE16-NEXT: s_bitset1_b32 s2, 31
+; GFX1250-GISEL-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_3)
+; GFX1250-GISEL-TRUE16-NEXT: s_cvt_f16_f32 s2, s2
+; GFX1250-GISEL-TRUE16-NEXT: v_mov_b32_e32 v0, s2
+; GFX1250-GISEL-TRUE16-NEXT: s_mov_b32 s2, -1
+; GFX1250-GISEL-TRUE16-NEXT: buffer_store_b16 v0, off, s[0:3], null
+; GFX1250-GISEL-TRUE16-NEXT: s_endpgm
+;
; GFX1250-GISEL-FAKE16-LABEL: fneg_fabs_fptrunc_f32_to_f16:
; GFX1250-GISEL-FAKE16: ; %bb.0: ; %entry
; GFX1250-GISEL-FAKE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
@@ -4396,6 +5019,26 @@ define amdgpu_kernel void @fptrunc_f32_to_f16_zext_i32(
; GFX11-GISEL-FAKE16-NEXT: buffer_store_b32 v0, off, s[0:3], 0
; GFX11-GISEL-FAKE16-NEXT: s_endpgm
;
+; GFX1250-SDAG-TRUE16-LABEL: fptrunc_f32_to_f16_zext_i32:
+; GFX1250-SDAG-TRUE16: ; %bb.0: ; %entry
+; GFX1250-SDAG-TRUE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
+; GFX1250-SDAG-TRUE16-NEXT: s_mov_b32 s6, -1
+; GFX1250-SDAG-TRUE16-NEXT: s_mov_b32 s7, 0x31016000
+; GFX1250-SDAG-TRUE16-NEXT: s_mov_b32 s10, s6
+; GFX1250-SDAG-TRUE16-NEXT: s_mov_b32 s11, s7
+; GFX1250-SDAG-TRUE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250-SDAG-TRUE16-NEXT: s_mov_b32 s8, s2
+; GFX1250-SDAG-TRUE16-NEXT: s_mov_b32 s9, s3
+; GFX1250-SDAG-TRUE16-NEXT: s_mov_b32 s4, s0
+; GFX1250-SDAG-TRUE16-NEXT: buffer_load_b32 v0, off, s[8:11], null
+; GFX1250-SDAG-TRUE16-NEXT: s_mov_b32 s5, s1
+; GFX1250-SDAG-TRUE16-NEXT: s_wait_loadcnt 0x0
+; GFX1250-SDAG-TRUE16-NEXT: v_cvt_f16_f32_e32 v0.l, v0
+; GFX1250-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-SDAG-TRUE16-NEXT: v_and_b32_e32 v0, 0xffff, v0
+; GFX1250-SDAG-TRUE16-NEXT: buffer_store_b32 v0, off, s[4:7], null
+; GFX1250-SDAG-TRUE16-NEXT: s_endpgm
+;
; GFX1250-SDAG-FAKE16-LABEL: fptrunc_f32_to_f16_zext_i32:
; GFX1250-SDAG-FAKE16: ; %bb.0: ; %entry
; GFX1250-SDAG-FAKE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
@@ -4416,6 +5059,22 @@ define amdgpu_kernel void @fptrunc_f32_to_f16_zext_i32(
; GFX1250-SDAG-FAKE16-NEXT: buffer_store_b32 v0, off, s[4:7], null
; GFX1250-SDAG-FAKE16-NEXT: s_endpgm
;
+; GFX1250-GISEL-TRUE16-LABEL: fptrunc_f32_to_f16_zext_i32:
+; GFX1250-GISEL-TRUE16: ; %bb.0: ; %entry
+; GFX1250-GISEL-TRUE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
+; GFX1250-GISEL-TRUE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250-GISEL-TRUE16-NEXT: s_load_b32 s2, s[2:3], 0x0
+; GFX1250-GISEL-TRUE16-NEXT: s_wait_xcnt 0x0
+; GFX1250-GISEL-TRUE16-NEXT: s_mov_b32 s3, 0x31016000
+; GFX1250-GISEL-TRUE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250-GISEL-TRUE16-NEXT: s_cvt_f16_f32 s2, s2
+; GFX1250-GISEL-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_3) | instskip(NEXT) | instid1(SALU_CYCLE_1)
+; GFX1250-GISEL-TRUE16-NEXT: s_and_b32 s2, 0xffff, s2
+; GFX1250-GISEL-TRUE16-NEXT: v_mov_b32_e32 v0, s2
+; GFX1250-GISEL-TRUE16-NEXT: s_mov_b32 s2, -1
+; GFX1250-GISEL-TRUE16-NEXT: buffer_store_b32 v0, off, s[0:3], null
+; GFX1250-GISEL-TRUE16-NEXT: s_endpgm
+;
; GFX1250-GISEL-FAKE16-LABEL: fptrunc_f32_to_f16_zext_i32:
; GFX1250-GISEL-FAKE16: ; %bb.0: ; %entry
; GFX1250-GISEL-FAKE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
@@ -4630,6 +5289,27 @@ define amdgpu_kernel void @fptrunc_fabs_f32_to_f16_zext_i32(
; GFX11-GISEL-FAKE16-NEXT: buffer_store_b32 v0, off, s[0:3], 0
; GFX11-GISEL-FAKE16-NEXT: s_endpgm
;
+; GFX1250-SDAG-TRUE16-LABEL: fptrunc_fabs_f32_to_f16_zext_i32:
+; GFX1250-SDAG-TRUE16: ; %bb.0: ; %entry
+; GFX1250-SDAG-TRUE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
+; GFX1250-SDAG-TRUE16-NEXT: s_mov_b32 s6, -1
+; GFX1250-SDAG-TRUE16-NEXT: s_mov_b32 s7, 0x31016000
+; GFX1250-SDAG-TRUE16-NEXT: s_mov_b32 s10, s6
+; GFX1250-SDAG-TRUE16-NEXT: s_mov_b32 s11, s7
+; GFX1250-SDAG-TRUE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250-SDAG-TRUE16-NEXT: s_mov_b32 s8, s2
+; GFX1250-SDAG-TRUE16-NEXT: s_mov_b32 s9, s3
+; GFX1250-SDAG-TRUE16-NEXT: s_mov_b32 s4, s0
+; GFX1250-SDAG-TRUE16-NEXT: buffer_load_b32 v0, off, s[8:11], null
+; GFX1250-SDAG-TRUE16-NEXT: s_mov_b32 s5, s1
+; GFX1250-SDAG-TRUE16-NEXT: s_wait_loadcnt 0x0
+; GFX1250-SDAG-TRUE16-NEXT: v_and_b32_e32 v0, 0x7fffffff, v0
+; GFX1250-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX1250-SDAG-TRUE16-NEXT: v_cvt_f16_f32_e32 v0.l, v0
+; GFX1250-SDAG-TRUE16-NEXT: v_and_b32_e32 v0, 0xffff, v0
+; GFX1250-SDAG-TRUE16-NEXT: buffer_store_b32 v0, off, s[4:7], null
+; GFX1250-SDAG-TRUE16-NEXT: s_endpgm
+;
; GFX1250-SDAG-FAKE16-LABEL: fptrunc_fabs_f32_to_f16_zext_i32:
; GFX1250-SDAG-FAKE16: ; %bb.0: ; %entry
; GFX1250-SDAG-FAKE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
@@ -4651,6 +5331,24 @@ define amdgpu_kernel void @fptrunc_fabs_f32_to_f16_zext_i32(
; GFX1250-SDAG-FAKE16-NEXT: buffer_store_b32 v0, off, s[4:7], null
; GFX1250-SDAG-FAKE16-NEXT: s_endpgm
;
+; GFX1250-GISEL-TRUE16-LABEL: fptrunc_fabs_f32_to_f16_zext_i32:
+; GFX1250-GISEL-TRUE16: ; %bb.0: ; %entry
+; GFX1250-GISEL-TRUE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
+; GFX1250-GISEL-TRUE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250-GISEL-TRUE16-NEXT: s_load_b32 s2, s[2:3], 0x0
+; GFX1250-GISEL-TRUE16-NEXT: s_wait_xcnt 0x0
+; GFX1250-GISEL-TRUE16-NEXT: s_mov_b32 s3, 0x31016000
+; GFX1250-GISEL-TRUE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250-GISEL-TRUE16-NEXT: s_bitset0_b32 s2, 31
+; GFX1250-GISEL-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_3)
+; GFX1250-GISEL-TRUE16-NEXT: s_cvt_f16_f32 s2, s2
+; GFX1250-GISEL-TRUE16-NEXT: s_and_b32 s2, 0xffff, s2
+; GFX1250-GISEL-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
+; GFX1250-GISEL-TRUE16-NEXT: v_mov_b32_e32 v0, s2
+; GFX1250-GISEL-TRUE16-NEXT: s_mov_b32 s2, -1
+; GFX1250-GISEL-TRUE16-NEXT: buffer_store_b32 v0, off, s[0:3], null
+; GFX1250-GISEL-TRUE16-NEXT: s_endpgm
+;
; GFX1250-GISEL-FAKE16-LABEL: fptrunc_fabs_f32_to_f16_zext_i32:
; GFX1250-GISEL-FAKE16: ; %bb.0: ; %entry
; GFX1250-GISEL-FAKE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
@@ -4877,6 +5575,26 @@ define amdgpu_kernel void @fptrunc_f32_to_f16_sext_i32(
; GFX11-GISEL-FAKE16-NEXT: buffer_store_b32 v0, off, s[0:3], 0
; GFX11-GISEL-FAKE16-NEXT: s_endpgm
;
+; GFX1250-SDAG-TRUE16-LABEL: fptrunc_f32_to_f16_sext_i32:
+; GFX1250-SDAG-TRUE16: ; %bb.0: ; %entry
+; GFX1250-SDAG-TRUE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
+; GFX1250-SDAG-TRUE16-NEXT: s_mov_b32 s6, -1
+; GFX1250-SDAG-TRUE16-NEXT: s_mov_b32 s7, 0x31016000
+; GFX1250-SDAG-TRUE16-NEXT: s_mov_b32 s10, s6
+; GFX1250-SDAG-TRUE16-NEXT: s_mov_b32 s11, s7
+; GFX1250-SDAG-TRUE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250-SDAG-TRUE16-NEXT: s_mov_b32 s8, s2
+; GFX1250-SDAG-TRUE16-NEXT: s_mov_b32 s9, s3
+; GFX1250-SDAG-TRUE16-NEXT: s_mov_b32 s4, s0
+; GFX1250-SDAG-TRUE16-NEXT: buffer_load_b32 v0, off, s[8:11], null
+; GFX1250-SDAG-TRUE16-NEXT: s_mov_b32 s5, s1
+; GFX1250-SDAG-TRUE16-NEXT: s_wait_loadcnt 0x0
+; GFX1250-SDAG-TRUE16-NEXT: v_cvt_f16_f32_e32 v0.l, v0
+; GFX1250-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX1250-SDAG-TRUE16-NEXT: v_bfe_i32 v0, v0, 0, 16
+; GFX1250-SDAG-TRUE16-NEXT: buffer_store_b32 v0, off, s[4:7], null
+; GFX1250-SDAG-TRUE16-NEXT: s_endpgm
+;
; GFX1250-SDAG-FAKE16-LABEL: fptrunc_f32_to_f16_sext_i32:
; GFX1250-SDAG-FAKE16: ; %bb.0: ; %entry
; GFX1250-SDAG-FAKE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
@@ -4897,6 +5615,22 @@ define amdgpu_kernel void @fptrunc_f32_to_f16_sext_i32(
; GFX1250-SDAG-FAKE16-NEXT: buffer_store_b32 v0, off, s[4:7], null
; GFX1250-SDAG-FAKE16-NEXT: s_endpgm
;
+; GFX1250-GISEL-TRUE16-LABEL: fptrunc_f32_to_f16_sext_i32:
+; GFX1250-GISEL-TRUE16: ; %bb.0: ; %entry
+; GFX1250-GISEL-TRUE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
+; GFX1250-GISEL-TRUE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250-GISEL-TRUE16-NEXT: s_load_b32 s2, s[2:3], 0x0
+; GFX1250-GISEL-TRUE16-NEXT: s_wait_xcnt 0x0
+; GFX1250-GISEL-TRUE16-NEXT: s_mov_b32 s3, 0x31016000
+; GFX1250-GISEL-TRUE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250-GISEL-TRUE16-NEXT: s_cvt_f16_f32 s2, s2
+; GFX1250-GISEL-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_3) | instskip(NEXT) | instid1(SALU_CYCLE_1)
+; GFX1250-GISEL-TRUE16-NEXT: s_sext_i32_i16 s2, s2
+; GFX1250-GISEL-TRUE16-NEXT: v_mov_b32_e32 v0, s2
+; GFX1250-GISEL-TRUE16-NEXT: s_mov_b32 s2, -1
+; GFX1250-GISEL-TRUE16-NEXT: buffer_store_b32 v0, off, s[0:3], null
+; GFX1250-GISEL-TRUE16-NEXT: s_endpgm
+;
; GFX1250-GISEL-FAKE16-LABEL: fptrunc_f32_to_f16_sext_i32:
; GFX1250-GISEL-FAKE16: ; %bb.0: ; %entry
; GFX1250-GISEL-FAKE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
diff --git a/llvm/test/CodeGen/AMDGPU/fsub.ll b/llvm/test/CodeGen/AMDGPU/fsub.ll
index 743431c..d6a9cb1 100644
--- a/llvm/test/CodeGen/AMDGPU/fsub.ll
+++ b/llvm/test/CodeGen/AMDGPU/fsub.ll
@@ -92,43 +92,11 @@ define amdgpu_kernel void @v_fneg_fsub_nsz_f32(ptr addrspace(1) %out, ptr addrsp
ret void
}
-; FUNC-LABEL: {{^}}v_fneg_fsub_nsz_attribute_f32:
-; SI: v_sub_f32_e32 [[SUB:v[0-9]+]], {{v[0-9]+}}, {{v[0-9]+}}
-; SI-NOT: xor
-define amdgpu_kernel void @v_fneg_fsub_nsz_attribute_f32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
- %b_ptr = getelementptr float, ptr addrspace(1) %in, i32 1
- %a = load float, ptr addrspace(1) %in, align 4
- %b = load float, ptr addrspace(1) %b_ptr, align 4
- %result = fsub float %a, %b
- %neg.result = fsub float -0.0, %result
- store float %neg.result, ptr addrspace(1) %out, align 4
- ret void
-}
-
-; For some reason the attribute has a string "true" or "false", so
-; make sure it is disabled and the fneg is not folded if it is not
-; "true".
-; FUNC-LABEL: {{^}}v_fneg_fsub_nsz_false_attribute_f32:
-; SI: v_sub_f32_e32 [[SUB:v[0-9]+]], {{v[0-9]+}}, {{v[0-9]+}}
-; SI: v_xor_b32_e32 v{{[0-9]+}}, 0x80000000, [[SUB]]
-define amdgpu_kernel void @v_fneg_fsub_nsz_false_attribute_f32(ptr addrspace(1) %out, ptr addrspace(1) %in) #1 {
- %b_ptr = getelementptr float, ptr addrspace(1) %in, i32 1
- %a = load float, ptr addrspace(1) %in, align 4
- %b = load float, ptr addrspace(1) %b_ptr, align 4
- %result = fsub float %a, %b
- %neg.result = fsub float -0.0, %result
- store float %neg.result, ptr addrspace(1) %out, align 4
- ret void
-}
-
-; FUNC-LABEL: {{^}}v_fsub_0_nsz_attribute_f32:
+; FUNC-LABEL: {{^}}v_fsub_0_nsz_flag_f32:
; SI-NOT: v_sub
-define amdgpu_kernel void @v_fsub_0_nsz_attribute_f32(ptr addrspace(1) %out, ptr addrspace(1) %in) #0 {
+define amdgpu_kernel void @v_fsub_0_nsz_flag_f32(ptr addrspace(1) %out, ptr addrspace(1) %in) {
%a = load float, ptr addrspace(1) %in, align 4
- %result = fsub float %a, 0.0
+ %result = fsub nsz float %a, 0.0
store float %result, ptr addrspace(1) %out, align 4
ret void
}
-
-attributes #0 = { nounwind "no-signed-zeros-fp-math"="true" }
-attributes #1 = { nounwind "no-signed-zeros-fp-math"="false" }
diff --git a/llvm/test/CodeGen/AMDGPU/implicitarg-offset-attributes.ll b/llvm/test/CodeGen/AMDGPU/implicitarg-offset-attributes.ll
index 3089054..32f7d6b 100644
--- a/llvm/test/CodeGen/AMDGPU/implicitarg-offset-attributes.ll
+++ b/llvm/test/CodeGen/AMDGPU/implicitarg-offset-attributes.ll
@@ -276,23 +276,23 @@ attributes #0 = { nocallback nofree nosync nounwind speculatable willreturn memo
;.
; V4: attributes #[[ATTR0:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
-; V4: attributes #[[ATTR1]] = { "amdgpu-agpr-alloc"="0" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-multigrid-sync-arg" "uniform-work-group-size"="false" }
-; V4: attributes #[[ATTR2]] = { "amdgpu-agpr-alloc"="0" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-multigrid-sync-arg" "uniform-work-group-size"="false" }
-; V4: attributes #[[ATTR3]] = { "amdgpu-agpr-alloc"="0" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-multigrid-sync-arg" "uniform-work-group-size"="false" }
-; V4: attributes #[[ATTR4]] = { "amdgpu-agpr-alloc"="0" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-default-queue" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-multigrid-sync-arg" "uniform-work-group-size"="false" }
-; V4: attributes #[[ATTR5]] = { "amdgpu-agpr-alloc"="0" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-multigrid-sync-arg" "uniform-work-group-size"="false" }
+; V4: attributes #[[ATTR1]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-multigrid-sync-arg" "uniform-work-group-size"="false" }
+; V4: attributes #[[ATTR2]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-multigrid-sync-arg" "uniform-work-group-size"="false" }
+; V4: attributes #[[ATTR3]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-multigrid-sync-arg" "uniform-work-group-size"="false" }
+; V4: attributes #[[ATTR4]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-default-queue" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-multigrid-sync-arg" "uniform-work-group-size"="false" }
+; V4: attributes #[[ATTR5]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-multigrid-sync-arg" "uniform-work-group-size"="false" }
;.
; V5: attributes #[[ATTR0:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
-; V5: attributes #[[ATTR1]] = { "amdgpu-agpr-alloc"="0" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-multigrid-sync-arg" "uniform-work-group-size"="false" }
-; V5: attributes #[[ATTR2]] = { "amdgpu-agpr-alloc"="0" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-multigrid-sync-arg" "uniform-work-group-size"="false" }
-; V5: attributes #[[ATTR3]] = { "amdgpu-agpr-alloc"="0" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-default-queue" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-multigrid-sync-arg" "uniform-work-group-size"="false" }
-; V5: attributes #[[ATTR4]] = { "amdgpu-agpr-alloc"="0" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-multigrid-sync-arg" "uniform-work-group-size"="false" }
+; V5: attributes #[[ATTR1]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-multigrid-sync-arg" "uniform-work-group-size"="false" }
+; V5: attributes #[[ATTR2]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-multigrid-sync-arg" "uniform-work-group-size"="false" }
+; V5: attributes #[[ATTR3]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-default-queue" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-multigrid-sync-arg" "uniform-work-group-size"="false" }
+; V5: attributes #[[ATTR4]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-multigrid-sync-arg" "uniform-work-group-size"="false" }
;.
; V6: attributes #[[ATTR0:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
-; V6: attributes #[[ATTR1]] = { "amdgpu-agpr-alloc"="0" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-multigrid-sync-arg" "uniform-work-group-size"="false" }
-; V6: attributes #[[ATTR2]] = { "amdgpu-agpr-alloc"="0" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-multigrid-sync-arg" "uniform-work-group-size"="false" }
-; V6: attributes #[[ATTR3]] = { "amdgpu-agpr-alloc"="0" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-default-queue" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-multigrid-sync-arg" "uniform-work-group-size"="false" }
-; V6: attributes #[[ATTR4]] = { "amdgpu-agpr-alloc"="0" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-multigrid-sync-arg" "uniform-work-group-size"="false" }
+; V6: attributes #[[ATTR1]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-multigrid-sync-arg" "uniform-work-group-size"="false" }
+; V6: attributes #[[ATTR2]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-multigrid-sync-arg" "uniform-work-group-size"="false" }
+; V6: attributes #[[ATTR3]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-default-queue" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-multigrid-sync-arg" "uniform-work-group-size"="false" }
+; V6: attributes #[[ATTR4]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-multigrid-sync-arg" "uniform-work-group-size"="false" }
;.
; V4: [[META0:![0-9]+]] = !{i32 1, !"amdhsa_code_object_version", i32 400}
;.
diff --git a/llvm/test/CodeGen/AMDGPU/indirect-call-set-from-other-function.ll b/llvm/test/CodeGen/AMDGPU/indirect-call-set-from-other-function.ll
index d3ef1b7..a0f5d2f 100644
--- a/llvm/test/CodeGen/AMDGPU/indirect-call-set-from-other-function.ll
+++ b/llvm/test/CodeGen/AMDGPU/indirect-call-set-from-other-function.ll
@@ -68,6 +68,6 @@ if.end:
ret void
}
;.
-; CHECK: attributes #[[ATTR0]] = { "amdgpu-agpr-alloc"="0" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR0]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
; CHECK: attributes #[[ATTR1]] = { "uniform-work-group-size"="false" }
;.
diff --git a/llvm/test/CodeGen/AMDGPU/issue120256-annotate-constexpr-addrspacecast.ll b/llvm/test/CodeGen/AMDGPU/issue120256-annotate-constexpr-addrspacecast.ll
index 71a330e..4e952b6 100644
--- a/llvm/test/CodeGen/AMDGPU/issue120256-annotate-constexpr-addrspacecast.ll
+++ b/llvm/test/CodeGen/AMDGPU/issue120256-annotate-constexpr-addrspacecast.ll
@@ -55,8 +55,8 @@ define amdgpu_kernel void @issue120256_private(ptr addrspace(1) %out) {
; FIXME: Inference of amdgpu-no-queue-ptr should not depend on code object version.
!0 = !{i32 1, !"amdhsa_code_object_version", i32 400}
;.
-; CHECK: attributes #[[ATTR0]] = { "amdgpu-agpr-alloc"="0" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx803" "uniform-work-group-size"="false" }
-; CHECK: attributes #[[ATTR1]] = { "amdgpu-agpr-alloc"="0" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx803" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR0]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx803" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR1]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "target-cpu"="gfx803" "uniform-work-group-size"="false" }
;.
; CHECK: [[META0:![0-9]+]] = !{i32 1, !"amdhsa_code_object_version", i32 400}
;.
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.fp8.f16.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.fp8.f16.ll
index 6ccfad7..ff47563 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.fp8.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.cvt.fp8.f16.ll
@@ -14,7 +14,7 @@ define amdgpu_ps void @test_cvt_pk_bf8_f16_v(<2 x half> %a, ptr addrspace(1) %ou
; GFX1250-SDAG-REAL16: ; %bb.0:
; GFX1250-SDAG-REAL16-NEXT: v_dual_mov_b32 v3, v2 :: v_dual_mov_b32 v2, v1
; GFX1250-SDAG-REAL16-NEXT: v_cvt_pk_bf8_f16 v0.l, v0
-; GFX1250-SDAG-REAL16-NEXT: flat_store_b16 v[2:3], v0
+; GFX1250-SDAG-REAL16-NEXT: global_store_b16 v[2:3], v0, off
; GFX1250-SDAG-REAL16-NEXT: s_endpgm
;
; GFX1250-SDAG-FAKE16-LABEL: test_cvt_pk_bf8_f16_v:
@@ -28,7 +28,7 @@ define amdgpu_ps void @test_cvt_pk_bf8_f16_v(<2 x half> %a, ptr addrspace(1) %ou
; GFX1250-GISEL-REAL16: ; %bb.0:
; GFX1250-GISEL-REAL16-NEXT: v_dual_mov_b32 v4, v1 :: v_dual_mov_b32 v5, v2
; GFX1250-GISEL-REAL16-NEXT: v_cvt_pk_bf8_f16 v0.l, v0
-; GFX1250-GISEL-REAL16-NEXT: flat_store_b16 v[4:5], v0
+; GFX1250-GISEL-REAL16-NEXT: global_store_b16 v[4:5], v0, off
; GFX1250-GISEL-REAL16-NEXT: s_endpgm
;
; GFX1250-GISEL-FAKE16-LABEL: test_cvt_pk_bf8_f16_v:
@@ -46,7 +46,7 @@ define amdgpu_ps void @test_cvt_pk_bf8_f16_s(<2 x half> inreg %a, ptr addrspace(
; GFX1250-SDAG-REAL16-LABEL: test_cvt_pk_bf8_f16_s:
; GFX1250-SDAG-REAL16: ; %bb.0:
; GFX1250-SDAG-REAL16-NEXT: v_cvt_pk_bf8_f16 v2.l, s0
-; GFX1250-SDAG-REAL16-NEXT: flat_store_b16 v[0:1], v2
+; GFX1250-SDAG-REAL16-NEXT: global_store_b16 v[0:1], v2, off
; GFX1250-SDAG-REAL16-NEXT: s_endpgm
;
; GFX1250-SDAG-FAKE16-LABEL: test_cvt_pk_bf8_f16_s:
@@ -58,7 +58,7 @@ define amdgpu_ps void @test_cvt_pk_bf8_f16_s(<2 x half> inreg %a, ptr addrspace(
; GFX1250-GISEL-REAL16-LABEL: test_cvt_pk_bf8_f16_s:
; GFX1250-GISEL-REAL16: ; %bb.0:
; GFX1250-GISEL-REAL16-NEXT: v_cvt_pk_bf8_f16 v2.l, s0
-; GFX1250-GISEL-REAL16-NEXT: flat_store_b16 v[0:1], v2
+; GFX1250-GISEL-REAL16-NEXT: global_store_b16 v[0:1], v2, off
; GFX1250-GISEL-REAL16-NEXT: s_endpgm
;
; GFX1250-GISEL-FAKE16-LABEL: test_cvt_pk_bf8_f16_s:
@@ -75,7 +75,7 @@ define amdgpu_ps void @test_cvt_pk_bf8_f16_l(ptr addrspace(1) %out) {
; GFX1250-SDAG-REAL16-LABEL: test_cvt_pk_bf8_f16_l:
; GFX1250-SDAG-REAL16: ; %bb.0:
; GFX1250-SDAG-REAL16-NEXT: v_cvt_pk_bf8_f16 v2.l, 0x56400000
-; GFX1250-SDAG-REAL16-NEXT: flat_store_b16 v[0:1], v2
+; GFX1250-SDAG-REAL16-NEXT: global_store_b16 v[0:1], v2, off
; GFX1250-SDAG-REAL16-NEXT: s_endpgm
;
; GFX1250-SDAG-FAKE16-LABEL: test_cvt_pk_bf8_f16_l:
@@ -87,7 +87,7 @@ define amdgpu_ps void @test_cvt_pk_bf8_f16_l(ptr addrspace(1) %out) {
; GFX1250-GISEL-REAL16-LABEL: test_cvt_pk_bf8_f16_l:
; GFX1250-GISEL-REAL16: ; %bb.0:
; GFX1250-GISEL-REAL16-NEXT: v_cvt_pk_bf8_f16 v2.l, 0x56400000
-; GFX1250-GISEL-REAL16-NEXT: flat_store_b16 v[0:1], v2
+; GFX1250-GISEL-REAL16-NEXT: global_store_b16 v[0:1], v2, off
; GFX1250-GISEL-REAL16-NEXT: s_endpgm
;
; GFX1250-GISEL-FAKE16-LABEL: test_cvt_pk_bf8_f16_l:
@@ -105,7 +105,7 @@ define amdgpu_ps void @test_cvt_pk_fp8_f16_v(<2 x half> %a, ptr addrspace(1) %ou
; GFX1250-SDAG-REAL16: ; %bb.0:
; GFX1250-SDAG-REAL16-NEXT: v_dual_mov_b32 v3, v2 :: v_dual_mov_b32 v2, v1
; GFX1250-SDAG-REAL16-NEXT: v_cvt_pk_fp8_f16 v0.l, v0
-; GFX1250-SDAG-REAL16-NEXT: flat_store_b16 v[2:3], v0
+; GFX1250-SDAG-REAL16-NEXT: global_store_b16 v[2:3], v0, off
; GFX1250-SDAG-REAL16-NEXT: s_endpgm
;
; GFX1250-SDAG-FAKE16-LABEL: test_cvt_pk_fp8_f16_v:
@@ -119,7 +119,7 @@ define amdgpu_ps void @test_cvt_pk_fp8_f16_v(<2 x half> %a, ptr addrspace(1) %ou
; GFX1250-GISEL-REAL16: ; %bb.0:
; GFX1250-GISEL-REAL16-NEXT: v_dual_mov_b32 v4, v1 :: v_dual_mov_b32 v5, v2
; GFX1250-GISEL-REAL16-NEXT: v_cvt_pk_fp8_f16 v0.l, v0
-; GFX1250-GISEL-REAL16-NEXT: flat_store_b16 v[4:5], v0
+; GFX1250-GISEL-REAL16-NEXT: global_store_b16 v[4:5], v0, off
; GFX1250-GISEL-REAL16-NEXT: s_endpgm
;
; GFX1250-GISEL-FAKE16-LABEL: test_cvt_pk_fp8_f16_v:
@@ -137,7 +137,7 @@ define amdgpu_ps void @test_cvt_pk_fp8_f16_s(<2 x half> inreg %a, ptr addrspace(
; GFX1250-SDAG-REAL16-LABEL: test_cvt_pk_fp8_f16_s:
; GFX1250-SDAG-REAL16: ; %bb.0:
; GFX1250-SDAG-REAL16-NEXT: v_cvt_pk_fp8_f16 v2.l, s0
-; GFX1250-SDAG-REAL16-NEXT: flat_store_b16 v[0:1], v2
+; GFX1250-SDAG-REAL16-NEXT: global_store_b16 v[0:1], v2, off
; GFX1250-SDAG-REAL16-NEXT: s_endpgm
;
; GFX1250-SDAG-FAKE16-LABEL: test_cvt_pk_fp8_f16_s:
@@ -149,7 +149,7 @@ define amdgpu_ps void @test_cvt_pk_fp8_f16_s(<2 x half> inreg %a, ptr addrspace(
; GFX1250-GISEL-REAL16-LABEL: test_cvt_pk_fp8_f16_s:
; GFX1250-GISEL-REAL16: ; %bb.0:
; GFX1250-GISEL-REAL16-NEXT: v_cvt_pk_fp8_f16 v2.l, s0
-; GFX1250-GISEL-REAL16-NEXT: flat_store_b16 v[0:1], v2
+; GFX1250-GISEL-REAL16-NEXT: global_store_b16 v[0:1], v2, off
; GFX1250-GISEL-REAL16-NEXT: s_endpgm
;
; GFX1250-GISEL-FAKE16-LABEL: test_cvt_pk_fp8_f16_s:
@@ -166,7 +166,7 @@ define amdgpu_ps void @test_cvt_pk_fp8_f16_l(ptr addrspace(1) %out) {
; GFX1250-SDAG-REAL16-LABEL: test_cvt_pk_fp8_f16_l:
; GFX1250-SDAG-REAL16: ; %bb.0:
; GFX1250-SDAG-REAL16-NEXT: v_cvt_pk_fp8_f16 v2.l, 0x56400000
-; GFX1250-SDAG-REAL16-NEXT: flat_store_b16 v[0:1], v2
+; GFX1250-SDAG-REAL16-NEXT: global_store_b16 v[0:1], v2, off
; GFX1250-SDAG-REAL16-NEXT: s_endpgm
;
; GFX1250-SDAG-FAKE16-LABEL: test_cvt_pk_fp8_f16_l:
@@ -178,7 +178,7 @@ define amdgpu_ps void @test_cvt_pk_fp8_f16_l(ptr addrspace(1) %out) {
; GFX1250-GISEL-REAL16-LABEL: test_cvt_pk_fp8_f16_l:
; GFX1250-GISEL-REAL16: ; %bb.0:
; GFX1250-GISEL-REAL16-NEXT: v_cvt_pk_fp8_f16 v2.l, 0x56400000
-; GFX1250-GISEL-REAL16-NEXT: flat_store_b16 v[0:1], v2
+; GFX1250-GISEL-REAL16-NEXT: global_store_b16 v[0:1], v2, off
; GFX1250-GISEL-REAL16-NEXT: s_endpgm
;
; GFX1250-GISEL-FAKE16-LABEL: test_cvt_pk_fp8_f16_l:
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.rcp.bf16.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.rcp.bf16.ll
index 1e44a09..dbea832 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.rcp.bf16.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.rcp.bf16.ll
@@ -15,7 +15,7 @@ define amdgpu_kernel void @rcp_bf16(ptr addrspace(1) %out, bfloat %src) #1 {
; SDAG-TRUE16-NEXT: v_mov_b32_e32 v1, 0
; SDAG-TRUE16-NEXT: s_wait_kmcnt 0x0
; SDAG-TRUE16-NEXT: v_rcp_bf16_e32 v0.l, s2
-; SDAG-TRUE16-NEXT: flat_store_b16 v1, v0, s[0:1]
+; SDAG-TRUE16-NEXT: global_store_b16 v1, v0, s[0:1]
; SDAG-TRUE16-NEXT: s_endpgm
;
; SDAG-FAKE16-LABEL: rcp_bf16:
@@ -35,10 +35,10 @@ define amdgpu_kernel void @rcp_bf16_constant_4(ptr addrspace(1) %out) #1 {
; SDAG-TRUE16-LABEL: rcp_bf16_constant_4:
; SDAG-TRUE16: ; %bb.0:
; SDAG-TRUE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; SDAG-TRUE16-NEXT: v_mov_b16_e32 v0.l, 0x3e80
; SDAG-TRUE16-NEXT: v_mov_b32_e32 v1, 0
+; SDAG-TRUE16-NEXT: v_mov_b16_e32 v0.l, 0x3e80
; SDAG-TRUE16-NEXT: s_wait_kmcnt 0x0
-; SDAG-TRUE16-NEXT: flat_store_b16 v1, v0, s[0:1]
+; SDAG-TRUE16-NEXT: global_store_b16 v1, v0, s[0:1]
; SDAG-TRUE16-NEXT: s_endpgm
;
; SDAG-FAKE16-LABEL: rcp_bf16_constant_4:
@@ -57,10 +57,10 @@ define amdgpu_kernel void @rcp_bf16_constant_100(ptr addrspace(1) %out) #1 {
; SDAG-TRUE16-LABEL: rcp_bf16_constant_100:
; SDAG-TRUE16: ; %bb.0:
; SDAG-TRUE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; SDAG-TRUE16-NEXT: v_mov_b16_e32 v0.l, 0x3c24
; SDAG-TRUE16-NEXT: v_mov_b32_e32 v1, 0
+; SDAG-TRUE16-NEXT: v_mov_b16_e32 v0.l, 0x3c24
; SDAG-TRUE16-NEXT: s_wait_kmcnt 0x0
-; SDAG-TRUE16-NEXT: flat_store_b16 v1, v0, s[0:1]
+; SDAG-TRUE16-NEXT: global_store_b16 v1, v0, s[0:1]
; SDAG-TRUE16-NEXT: s_endpgm
;
; SDAG-FAKE16-LABEL: rcp_bf16_constant_100:
@@ -79,10 +79,10 @@ define amdgpu_kernel void @rcp_undef_bf16(ptr addrspace(1) %out) #1 {
; SDAG-TRUE16-LABEL: rcp_undef_bf16:
; SDAG-TRUE16: ; %bb.0:
; SDAG-TRUE16-NEXT: s_load_b64 s[0:1], s[4:5], 0x0
-; SDAG-TRUE16-NEXT: v_mov_b16_e32 v0.l, 0x7fc0
; SDAG-TRUE16-NEXT: v_mov_b32_e32 v1, 0
+; SDAG-TRUE16-NEXT: v_mov_b16_e32 v0.l, 0x7fc0
; SDAG-TRUE16-NEXT: s_wait_kmcnt 0x0
-; SDAG-TRUE16-NEXT: flat_store_b16 v1, v0, s[0:1]
+; SDAG-TRUE16-NEXT: global_store_b16 v1, v0, s[0:1]
; SDAG-TRUE16-NEXT: s_endpgm
;
; SDAG-FAKE16-LABEL: rcp_undef_bf16:
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.rsq.bf16.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.rsq.bf16.ll
index 42d12fd..662dc613 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.rsq.bf16.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.rsq.bf16.ll
@@ -15,7 +15,7 @@ define amdgpu_kernel void @rsq_bf16(ptr addrspace(1) %out, bfloat %src) #1 {
; SDAG-REAL16-NEXT: v_mov_b32_e32 v1, 0
; SDAG-REAL16-NEXT: s_wait_kmcnt 0x0
; SDAG-REAL16-NEXT: v_rsq_bf16_e32 v0.l, s2
-; SDAG-REAL16-NEXT: flat_store_b16 v1, v0, s[0:1]
+; SDAG-REAL16-NEXT: global_store_b16 v1, v0, s[0:1]
; SDAG-REAL16-NEXT: s_endpgm
;
; SDAG-FAKE16-LABEL: rsq_bf16:
@@ -38,7 +38,7 @@ define amdgpu_kernel void @rsq_bf16_constant_4(ptr addrspace(1) %out) #1 {
; SDAG-REAL16-NEXT: v_rsq_bf16_e32 v0.l, 4.0
; SDAG-REAL16-NEXT: v_mov_b32_e32 v1, 0
; SDAG-REAL16-NEXT: s_wait_kmcnt 0x0
-; SDAG-REAL16-NEXT: flat_store_b16 v1, v0, s[0:1]
+; SDAG-REAL16-NEXT: global_store_b16 v1, v0, s[0:1]
; SDAG-REAL16-NEXT: s_endpgm
;
; SDAG-FAKE16-LABEL: rsq_bf16_constant_4:
@@ -61,7 +61,7 @@ define amdgpu_kernel void @rsq_bf16_constant_100(ptr addrspace(1) %out) #1 {
; SDAG-REAL16-NEXT: v_rsq_bf16_e32 v0.l, 0x42c8
; SDAG-REAL16-NEXT: v_mov_b32_e32 v1, 0
; SDAG-REAL16-NEXT: s_wait_kmcnt 0x0
-; SDAG-REAL16-NEXT: flat_store_b16 v1, v0, s[0:1]
+; SDAG-REAL16-NEXT: global_store_b16 v1, v0, s[0:1]
; SDAG-REAL16-NEXT: s_endpgm
;
; SDAG-FAKE16-LABEL: rsq_bf16_constant_100:
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.tanh.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.tanh.ll
index dd89f80..ba769ef 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.tanh.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.tanh.ll
@@ -100,7 +100,7 @@ define amdgpu_kernel void @tanh_f16(ptr addrspace(1) %out, half %src) #1 {
; SDAG-REAL16-NEXT: v_mov_b32_e32 v1, 0
; SDAG-REAL16-NEXT: s_wait_kmcnt 0x0
; SDAG-REAL16-NEXT: v_tanh_f16_e32 v0.l, s2
-; SDAG-REAL16-NEXT: flat_store_b16 v1, v0, s[0:1]
+; SDAG-REAL16-NEXT: global_store_b16 v1, v0, s[0:1]
; SDAG-REAL16-NEXT: s_endpgm
;
; SDAG-FAKE16-LABEL: tanh_f16:
@@ -123,7 +123,7 @@ define amdgpu_kernel void @tanh_f16_constant_4.0(ptr addrspace(1) %out) #1 {
; SDAG-REAL16-NEXT: v_tanh_f16_e32 v0.l, 4.0
; SDAG-REAL16-NEXT: v_mov_b32_e32 v1, 0
; SDAG-REAL16-NEXT: s_wait_kmcnt 0x0
-; SDAG-REAL16-NEXT: flat_store_b16 v1, v0, s[0:1]
+; SDAG-REAL16-NEXT: global_store_b16 v1, v0, s[0:1]
; SDAG-REAL16-NEXT: s_endpgm
;
; SDAG-FAKE16-LABEL: tanh_f16_constant_4.0:
@@ -146,7 +146,7 @@ define amdgpu_kernel void @tanh_f16_constant_100.0(ptr addrspace(1) %out) #1 {
; SDAG-REAL16-NEXT: v_tanh_f16_e32 v0.l, 0x5640
; SDAG-REAL16-NEXT: v_mov_b32_e32 v1, 0
; SDAG-REAL16-NEXT: s_wait_kmcnt 0x0
-; SDAG-REAL16-NEXT: flat_store_b16 v1, v0, s[0:1]
+; SDAG-REAL16-NEXT: global_store_b16 v1, v0, s[0:1]
; SDAG-REAL16-NEXT: s_endpgm
;
; SDAG-FAKE16-LABEL: tanh_f16_constant_100.0:
@@ -182,7 +182,7 @@ define amdgpu_kernel void @tanh_bf16(ptr addrspace(1) %out, bfloat %src) #1 {
; SDAG-REAL16-NEXT: v_mov_b32_e32 v1, 0
; SDAG-REAL16-NEXT: s_wait_kmcnt 0x0
; SDAG-REAL16-NEXT: v_tanh_bf16_e32 v0.l, s2
-; SDAG-REAL16-NEXT: flat_store_b16 v1, v0, s[0:1]
+; SDAG-REAL16-NEXT: global_store_b16 v1, v0, s[0:1]
; SDAG-REAL16-NEXT: s_endpgm
;
; SDAG-FAKE16-LABEL: tanh_bf16:
@@ -205,7 +205,7 @@ define amdgpu_kernel void @tanh_bf16_constant_4(ptr addrspace(1) %out) #1 {
; SDAG-REAL16-NEXT: v_tanh_bf16_e32 v0.l, 4.0
; SDAG-REAL16-NEXT: v_mov_b32_e32 v1, 0
; SDAG-REAL16-NEXT: s_wait_kmcnt 0x0
-; SDAG-REAL16-NEXT: flat_store_b16 v1, v0, s[0:1]
+; SDAG-REAL16-NEXT: global_store_b16 v1, v0, s[0:1]
; SDAG-REAL16-NEXT: s_endpgm
;
; SDAG-FAKE16-LABEL: tanh_bf16_constant_4:
@@ -228,7 +228,7 @@ define amdgpu_kernel void @tanh_bf16_constant_100(ptr addrspace(1) %out) #1 {
; SDAG-REAL16-NEXT: v_tanh_bf16_e32 v0.l, 0x42c8
; SDAG-REAL16-NEXT: v_mov_b32_e32 v1, 0
; SDAG-REAL16-NEXT: s_wait_kmcnt 0x0
-; SDAG-REAL16-NEXT: flat_store_b16 v1, v0, s[0:1]
+; SDAG-REAL16-NEXT: global_store_b16 v1, v0, s[0:1]
; SDAG-REAL16-NEXT: s_endpgm
;
; SDAG-FAKE16-LABEL: tanh_bf16_constant_100:
diff --git a/llvm/test/CodeGen/AMDGPU/mad_int24.ll b/llvm/test/CodeGen/AMDGPU/mad_int24.ll
index 93fda94..dd88310 100644
--- a/llvm/test/CodeGen/AMDGPU/mad_int24.ll
+++ b/llvm/test/CodeGen/AMDGPU/mad_int24.ll
@@ -1,17 +1,79 @@
-; RUN: llc < %s -mtriple=amdgcn | FileCheck %s --check-prefix=GCN --check-prefix=FUNC
-; RUN: llc < %s -mtriple=amdgcn -mcpu=tonga -mattr=-flat-for-global | FileCheck %s --check-prefix=GCN --check-prefix=FUNC
-; RUN: llc < %s -mtriple=r600 -mcpu=redwood | FileCheck %s --check-prefix=EG --check-prefix=FUNC
-; RUN: llc < %s -mtriple=r600 -mcpu=cayman | FileCheck %s --check-prefix=CM --check-prefix=FUNC
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
+; RUN: llc < %s -mtriple=amdgcn| FileCheck %s --check-prefixes=GCN
+; RUN: llc < %s -mtriple=amdgcn -mcpu=tonga -mattr=-flat-for-global | FileCheck %s --check-prefixes=VI
+; RUN: llc < %s -mtriple=r600 -mcpu=redwood | FileCheck %s --check-prefixes=EG,R600,RW
+; RUN: llc < %s -mtriple=r600 -mcpu=cayman | FileCheck %s --check-prefixes=EG,R600,CM
-; FUNC-LABEL: {{^}}i32_mad24:
; Signed 24-bit multiply is not supported on pre-Cayman GPUs.
-; EG: MULLO_INT
-; CM: MULLO_INT
-; GCN: s_bfe_i32
-; GCN: s_bfe_i32
-; GCN: s_mul_i32
-; GCN: s_add_i32
define amdgpu_kernel void @i32_mad24(ptr addrspace(1) %out, i32 %a, i32 %b, i32 %c) {
+; GCN-LABEL: i32_mad24:
+; GCN: ; %bb.0: ; %entry
+; GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0xb
+; GCN-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x9
+; GCN-NEXT: s_mov_b32 s7, 0xf000
+; GCN-NEXT: s_waitcnt lgkmcnt(0)
+; GCN-NEXT: s_bfe_i32 s0, s0, 0x180000
+; GCN-NEXT: s_bfe_i32 s1, s1, 0x180000
+; GCN-NEXT: s_mul_i32 s0, s0, s1
+; GCN-NEXT: s_add_i32 s0, s0, s2
+; GCN-NEXT: s_mov_b32 s6, -1
+; GCN-NEXT: v_mov_b32_e32 v0, s0
+; GCN-NEXT: buffer_store_dword v0, off, s[4:7], 0
+; GCN-NEXT: s_endpgm
+;
+; VI-LABEL: i32_mad24:
+; VI: ; %bb.0: ; %entry
+; VI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x2c
+; VI-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x24
+; VI-NEXT: s_mov_b32 s7, 0xf000
+; VI-NEXT: s_mov_b32 s6, -1
+; VI-NEXT: s_waitcnt lgkmcnt(0)
+; VI-NEXT: s_bfe_i32 s0, s0, 0x180000
+; VI-NEXT: s_bfe_i32 s1, s1, 0x180000
+; VI-NEXT: s_mul_i32 s0, s0, s1
+; VI-NEXT: s_add_i32 s0, s0, s2
+; VI-NEXT: v_mov_b32_e32 v0, s0
+; VI-NEXT: buffer_store_dword v0, off, s[4:7], 0
+; VI-NEXT: s_endpgm
+;
+; RW-LABEL: i32_mad24:
+; RW: ; %bb.0: ; %entry
+; RW-NEXT: ALU 9, @4, KC0[CB0:0-32], KC1[]
+; RW-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.X, T1.X, 1
+; RW-NEXT: CF_END
+; RW-NEXT: PAD
+; RW-NEXT: ALU clause starting at 4:
+; RW-NEXT: LSHL T0.W, KC0[2].Z, literal.x,
+; RW-NEXT: LSHL * T1.W, KC0[2].W, literal.x,
+; RW-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; RW-NEXT: ASHR T1.W, PS, literal.x,
+; RW-NEXT: ASHR * T0.W, PV.W, literal.x,
+; RW-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; RW-NEXT: MULLO_INT * T0.X, PS, PV.W,
+; RW-NEXT: ADD_INT T0.X, PS, KC0[3].X,
+; RW-NEXT: LSHR * T1.X, KC0[2].Y, literal.x,
+; RW-NEXT: 2(2.802597e-45), 0(0.000000e+00)
+;
+; CM-LABEL: i32_mad24:
+; CM: ; %bb.0: ; %entry
+; CM-NEXT: ALU 12, @4, KC0[CB0:0-32], KC1[]
+; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T0.X, T1.X
+; CM-NEXT: CF_END
+; CM-NEXT: PAD
+; CM-NEXT: ALU clause starting at 4:
+; CM-NEXT: LSHL T0.Z, KC0[2].Z, literal.x,
+; CM-NEXT: LSHL * T0.W, KC0[2].W, literal.x,
+; CM-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; CM-NEXT: ASHR T1.Z, PV.W, literal.x,
+; CM-NEXT: ASHR * T0.W, PV.Z, literal.x,
+; CM-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; CM-NEXT: MULLO_INT T0.X, T0.W, T1.Z,
+; CM-NEXT: MULLO_INT T0.Y (MASKED), T0.W, T1.Z,
+; CM-NEXT: MULLO_INT T0.Z (MASKED), T0.W, T1.Z,
+; CM-NEXT: MULLO_INT * T0.W (MASKED), T0.W, T1.Z,
+; CM-NEXT: ADD_INT * T0.X, PV.X, KC0[3].X,
+; CM-NEXT: LSHR * T1.X, KC0[2].Y, literal.x,
+; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00)
entry:
%0 = shl i32 %a, 8
%a_24 = ashr i32 %0, 8
@@ -23,13 +85,25 @@ entry:
ret void
}
-; GCN-LABEL: {{^}}mad24_known_bits_destroyed:
-; GCN: s_waitcnt
-; GCN-NEXT: v_mad_i32_i24
-; GCN-NEXT: v_mul_i32_i24
-; GCN-NEXT: s_setpc_b64
define i32 @mad24_known_bits_destroyed(i32 %a, i32 %b, i32 %c) {
-
+; GCN-LABEL: mad24_known_bits_destroyed:
+; GCN: ; %bb.0:
+; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT: v_mad_i32_i24 v1, v0, v1, v2
+; GCN-NEXT: v_mul_i32_i24_e32 v0, v1, v0
+; GCN-NEXT: s_setpc_b64 s[30:31]
+;
+; VI-LABEL: mad24_known_bits_destroyed:
+; VI: ; %bb.0:
+; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-NEXT: v_mad_i32_i24 v1, v0, v1, v2
+; VI-NEXT: v_mul_i32_i24_e32 v0, v1, v0
+; VI-NEXT: s_setpc_b64 s[30:31]
+;
+; EG-LABEL: mad24_known_bits_destroyed:
+; EG: ; %bb.0:
+; EG-NEXT: CF_END
+; EG-NEXT: PAD
%shl.0 = shl i32 %a, 8
%sra.0 = ashr i32 %shl.0, 8
%shl.1 = shl i32 %b, 8
@@ -48,12 +122,25 @@ define i32 @mad24_known_bits_destroyed(i32 %a, i32 %b, i32 %c) {
ret i32 %mul1
}
-; GCN-LABEL: {{^}}mad24_intrin_known_bits_destroyed:
-; GCN: s_waitcnt
-; GCN-NEXT: v_mad_i32_i24
-; GCN-NEXT: v_mul_i32_i24
-; GCN-NEXT: s_setpc_b64
define i32 @mad24_intrin_known_bits_destroyed(i32 %a, i32 %b, i32 %c) {
+; GCN-LABEL: mad24_intrin_known_bits_destroyed:
+; GCN: ; %bb.0:
+; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT: v_mad_i32_i24 v1, v0, v1, v2
+; GCN-NEXT: v_mul_i32_i24_e32 v0, v1, v0
+; GCN-NEXT: s_setpc_b64 s[30:31]
+;
+; VI-LABEL: mad24_intrin_known_bits_destroyed:
+; VI: ; %bb.0:
+; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-NEXT: v_mad_i32_i24 v1, v0, v1, v2
+; VI-NEXT: v_mul_i32_i24_e32 v0, v1, v0
+; VI-NEXT: s_setpc_b64 s[30:31]
+;
+; EG-LABEL: mad24_intrin_known_bits_destroyed:
+; EG: ; %bb.0:
+; EG-NEXT: CF_END
+; EG-NEXT: PAD
%shl.0 = shl i32 %a, 8
%sra.0 = ashr i32 %shl.0, 8
%shl.1 = shl i32 %b, 8
@@ -73,17 +160,177 @@ define i32 @mad24_intrin_known_bits_destroyed(i32 %a, i32 %b, i32 %c) {
}
; Make sure no unnecessary BFEs are emitted in the loop.
-; GCN-LABEL: {{^}}mad24_destroyed_knownbits_2:
-; GCN-NOT: v_bfe
-; GCN: v_mad_i32_i24
-; GCN-NOT: v_bfe
-; GCN: v_mad_i32_i24
-; GCN-NOT: v_bfe
-; GCN: v_mad_i32_i24
-; GCN-NOT: v_bfe
-; GCN: v_mad_i32_i24
-; GCN-NOT: v_bfe
define void @mad24_destroyed_knownbits_2(i32 %arg, i32 %arg1, i32 %arg2, ptr addrspace(1) %arg3) {
+; GCN-LABEL: mad24_destroyed_knownbits_2:
+; GCN: ; %bb.0: ; %bb
+; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT: v_mov_b32_e32 v5, 1
+; GCN-NEXT: s_mov_b64 s[4:5], 0
+; GCN-NEXT: .LBB3_1: ; %bb6
+; GCN-NEXT: ; =>This Inner Loop Header: Depth=1
+; GCN-NEXT: v_mad_i32_i24 v0, v0, v5, v5
+; GCN-NEXT: v_add_i32_e32 v1, vcc, -1, v1
+; GCN-NEXT: v_mad_i32_i24 v5, v0, v5, v0
+; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
+; GCN-NEXT: v_mad_i32_i24 v0, v5, v0, v5
+; GCN-NEXT: s_or_b64 s[4:5], vcc, s[4:5]
+; GCN-NEXT: v_mad_i32_i24 v0, v0, v5, v0
+; GCN-NEXT: v_mov_b32_e32 v5, v2
+; GCN-NEXT: s_andn2_b64 exec, exec, s[4:5]
+; GCN-NEXT: s_cbranch_execnz .LBB3_1
+; GCN-NEXT: ; %bb.2: ; %bb5
+; GCN-NEXT: s_or_b64 exec, exec, s[4:5]
+; GCN-NEXT: s_mov_b32 s6, 0
+; GCN-NEXT: s_mov_b32 s7, 0xf000
+; GCN-NEXT: s_mov_b32 s4, s6
+; GCN-NEXT: s_mov_b32 s5, s6
+; GCN-NEXT: buffer_store_dword v0, v[3:4], s[4:7], 0 addr64
+; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0)
+; GCN-NEXT: s_setpc_b64 s[30:31]
+;
+; VI-LABEL: mad24_destroyed_knownbits_2:
+; VI: ; %bb.0: ; %bb
+; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-NEXT: v_mov_b32_e32 v5, 1
+; VI-NEXT: s_mov_b64 s[4:5], 0
+; VI-NEXT: .LBB3_1: ; %bb6
+; VI-NEXT: ; =>This Inner Loop Header: Depth=1
+; VI-NEXT: v_mad_i32_i24 v0, v0, v5, v5
+; VI-NEXT: v_mad_i32_i24 v5, v0, v5, v0
+; VI-NEXT: v_add_u32_e32 v1, vcc, -1, v1
+; VI-NEXT: v_mad_i32_i24 v0, v5, v0, v5
+; VI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
+; VI-NEXT: v_mad_i32_i24 v0, v0, v5, v0
+; VI-NEXT: s_or_b64 s[4:5], vcc, s[4:5]
+; VI-NEXT: v_mov_b32_e32 v5, v2
+; VI-NEXT: s_andn2_b64 exec, exec, s[4:5]
+; VI-NEXT: s_cbranch_execnz .LBB3_1
+; VI-NEXT: ; %bb.2: ; %bb5
+; VI-NEXT: s_or_b64 exec, exec, s[4:5]
+; VI-NEXT: flat_store_dword v[3:4], v0
+; VI-NEXT: s_waitcnt vmcnt(0)
+; VI-NEXT: s_setpc_b64 s[30:31]
+;
+; RW-LABEL: mad24_destroyed_knownbits_2:
+; RW: ; %bb.0: ; %bb
+; RW-NEXT: ALU 5, @10, KC0[CB0:0-32], KC1[]
+; RW-NEXT: LOOP_START_DX10 @7
+; RW-NEXT: ALU_PUSH_BEFORE 30, @16, KC0[], KC1[]
+; RW-NEXT: JUMP @6 POP:1
+; RW-NEXT: LOOP_BREAK @6
+; RW-NEXT: POP @6 POP:1
+; RW-NEXT: END_LOOP @2
+; RW-NEXT: ALU 1, @47, KC0[], KC1[]
+; RW-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.X, T1.X, 1
+; RW-NEXT: CF_END
+; RW-NEXT: ALU clause starting at 10:
+; RW-NEXT: MOV T0.X, KC0[2].Y,
+; RW-NEXT: MOV T0.Y, KC0[2].Z,
+; RW-NEXT: MOV * T0.Z, KC0[2].W,
+; RW-NEXT: MOV T0.W, KC0[3].X,
+; RW-NEXT: MOV * T1.W, literal.x,
+; RW-NEXT: 1(1.401298e-45), 0(0.000000e+00)
+; RW-NEXT: ALU clause starting at 16:
+; RW-NEXT: LSHL T2.W, T1.W, literal.x,
+; RW-NEXT: LSHL * T3.W, T0.X, literal.x,
+; RW-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; RW-NEXT: ASHR T3.W, PS, literal.x,
+; RW-NEXT: ASHR * T2.W, PV.W, literal.x,
+; RW-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; RW-NEXT: MULLO_INT * T0.X, PV.W, PS,
+; RW-NEXT: ADD_INT * T1.W, PS, T1.W,
+; RW-NEXT: LSHL * T3.W, PV.W, literal.x,
+; RW-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; RW-NEXT: ASHR * T3.W, PV.W, literal.x,
+; RW-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; RW-NEXT: MULLO_INT * T0.X, PV.W, T2.W,
+; RW-NEXT: ADD_INT * T1.W, PS, T1.W,
+; RW-NEXT: LSHL * T2.W, PV.W, literal.x,
+; RW-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; RW-NEXT: ASHR * T2.W, PV.W, literal.x,
+; RW-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; RW-NEXT: MULLO_INT * T0.X, PV.W, T3.W,
+; RW-NEXT: ADD_INT * T1.W, PS, T1.W,
+; RW-NEXT: LSHL * T3.W, PV.W, literal.x,
+; RW-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; RW-NEXT: ASHR * T3.W, PV.W, literal.x,
+; RW-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; RW-NEXT: ADD_INT T0.Y, T0.Y, literal.x,
+; RW-NEXT: MULLO_INT * T0.X, PV.W, T2.W,
+; RW-NEXT: -1(nan), 0(0.000000e+00)
+; RW-NEXT: ADD_INT T0.X, PS, T1.W,
+; RW-NEXT: SETE_INT T2.W, PV.Y, 0.0,
+; RW-NEXT: MOV * T1.W, T0.Z,
+; RW-NEXT: PRED_SETNE_INT * ExecMask,PredicateBit (MASKED), PV.W, 0.0,
+; RW-NEXT: ALU clause starting at 47:
+; RW-NEXT: LSHR * T1.X, T0.W, literal.x,
+; RW-NEXT: 2(2.802597e-45), 0(0.000000e+00)
+;
+; CM-LABEL: mad24_destroyed_knownbits_2:
+; CM: ; %bb.0: ; %bb
+; CM-NEXT: ALU 5, @10, KC0[CB0:0-32], KC1[]
+; CM-NEXT: LOOP_START_DX10 @7
+; CM-NEXT: ALU_PUSH_BEFORE 41, @16, KC0[], KC1[]
+; CM-NEXT: JUMP @6 POP:1
+; CM-NEXT: LOOP_BREAK @6
+; CM-NEXT: POP @6 POP:1
+; CM-NEXT: END_LOOP @2
+; CM-NEXT: ALU 1, @58, KC0[], KC1[]
+; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T1.X, T0.X
+; CM-NEXT: CF_END
+; CM-NEXT: ALU clause starting at 10:
+; CM-NEXT: MOV * T1.X, KC0[2].Y,
+; CM-NEXT: MOV T0.X, KC0[2].Z,
+; CM-NEXT: MOV T0.Y, KC0[2].W,
+; CM-NEXT: MOV T0.Z, KC0[3].X,
+; CM-NEXT: MOV * T0.W, literal.x,
+; CM-NEXT: 1(1.401298e-45), 0(0.000000e+00)
+; CM-NEXT: ALU clause starting at 16:
+; CM-NEXT: LSHL T1.Z, T0.W, literal.x,
+; CM-NEXT: LSHL * T1.W, T1.X, literal.x,
+; CM-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; CM-NEXT: ASHR T2.Z, PV.W, literal.x,
+; CM-NEXT: ASHR * T1.W, PV.Z, literal.x,
+; CM-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; CM-NEXT: MULLO_INT T1.X, T2.Z, T1.W,
+; CM-NEXT: MULLO_INT T1.Y (MASKED), T2.Z, T1.W,
+; CM-NEXT: MULLO_INT T1.Z (MASKED), T2.Z, T1.W,
+; CM-NEXT: MULLO_INT * T1.W (MASKED), T2.Z, T1.W,
+; CM-NEXT: ADD_INT * T0.W, PV.X, T0.W,
+; CM-NEXT: LSHL * T2.W, PV.W, literal.x,
+; CM-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; CM-NEXT: ASHR * T2.W, PV.W, literal.x,
+; CM-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; CM-NEXT: MULLO_INT T1.X, T2.W, T1.W,
+; CM-NEXT: MULLO_INT T1.Y (MASKED), T2.W, T1.W,
+; CM-NEXT: MULLO_INT T1.Z (MASKED), T2.W, T1.W,
+; CM-NEXT: MULLO_INT * T1.W (MASKED), T2.W, T1.W,
+; CM-NEXT: ADD_INT * T0.W, PV.X, T0.W,
+; CM-NEXT: LSHL * T1.W, PV.W, literal.x,
+; CM-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; CM-NEXT: ASHR * T1.W, PV.W, literal.x,
+; CM-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; CM-NEXT: MULLO_INT T1.X, T1.W, T2.W,
+; CM-NEXT: MULLO_INT T1.Y (MASKED), T1.W, T2.W,
+; CM-NEXT: MULLO_INT T1.Z (MASKED), T1.W, T2.W,
+; CM-NEXT: MULLO_INT * T1.W (MASKED), T1.W, T2.W,
+; CM-NEXT: ADD_INT * T0.W, PV.X, T0.W,
+; CM-NEXT: LSHL * T2.W, PV.W, literal.x,
+; CM-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; CM-NEXT: ADD_INT T0.X, T0.X, literal.x,
+; CM-NEXT: ASHR * T2.W, PV.W, literal.y,
+; CM-NEXT: -1(nan), 8(1.121039e-44)
+; CM-NEXT: MULLO_INT T1.X, T2.W, T1.W,
+; CM-NEXT: MULLO_INT T1.Y (MASKED), T2.W, T1.W,
+; CM-NEXT: MULLO_INT T1.Z (MASKED), T2.W, T1.W,
+; CM-NEXT: MULLO_INT * T1.W (MASKED), T2.W, T1.W,
+; CM-NEXT: ADD_INT T1.X, PV.X, T0.W,
+; CM-NEXT: SETE_INT T1.Z, T0.X, 0.0,
+; CM-NEXT: MOV * T0.W, T0.Y,
+; CM-NEXT: PRED_SETNE_INT * ExecMask,PredicateBit (MASKED), PV.Z, 0.0,
+; CM-NEXT: ALU clause starting at 58:
+; CM-NEXT: LSHR * T0.X, T0.Z, literal.x,
+; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00)
bb:
br label %bb6
@@ -119,3 +366,5 @@ bb6: ; preds = %bb6, %bb
}
declare i32 @llvm.amdgcn.mul.i24(i32, i32)
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; R600: {{.*}}
diff --git a/llvm/test/CodeGen/AMDGPU/mad_uint24.ll b/llvm/test/CodeGen/AMDGPU/mad_uint24.ll
index a6d458e..46b8df4 100644
--- a/llvm/test/CodeGen/AMDGPU/mad_uint24.ll
+++ b/llvm/test/CodeGen/AMDGPU/mad_uint24.ll
@@ -1,19 +1,75 @@
-; RUN: llc < %s -mtriple=r600 -mcpu=redwood | FileCheck %s --check-prefix=EG --check-prefix=FUNC
-; RUN: llc < %s -mtriple=r600 -mcpu=cayman | FileCheck %s --check-prefix=EG --check-prefix=FUNC
-; RUN: llc < %s -mtriple=amdgcn | FileCheck %s --check-prefix=SI --check-prefix=FUNC --check-prefix=GCN
-; RUN: llc < %s -mtriple=amdgcn -mcpu=tonga -mattr=-flat-for-global | FileCheck %s --check-prefix=VI --check-prefix=FUNC --check-prefix=GCN --check-prefix=GCN2
-; RUN: llc < %s -mtriple=amdgcn -mcpu=fiji -mattr=-flat-for-global | FileCheck %s --check-prefix=VI --check-prefix=FUNC --check-prefix=GCN --check-prefix=GCN2
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
+; RUN: llc < %s -mtriple=r600 -mcpu=redwood | FileCheck %s --check-prefixes=EG
+; RUN: llc < %s -mtriple=r600 -mcpu=cayman | FileCheck %s --check-prefixes=CM
+; RUN: llc < %s -mtriple=amdgcn | FileCheck %s --check-prefixes=GCN
+; RUN: llc < %s -mtriple=amdgcn -mcpu=tonga -mattr=-flat-for-global | FileCheck %s --check-prefixes=GFX8,SI
+; RUN: llc < %s -mtriple=amdgcn -mcpu=fiji -mattr=-flat-for-global | FileCheck %s --check-prefixes=GFX8,VI
declare i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
-; FUNC-LABEL: {{^}}u32_mad24:
-; EG: MULLO_INT
-; SI: s_mul_i32
-; SI: s_add_i32
-; VI: s_mul_{{[iu]}}32
-; VI: s_add_{{[iu]}}32
-
define amdgpu_kernel void @u32_mad24(ptr addrspace(1) %out, i32 %a, i32 %b, i32 %c) {
+; EG-LABEL: u32_mad24:
+; EG: ; %bb.0: ; %entry
+; EG-NEXT: ALU 6, @4, KC0[CB0:0-32], KC1[]
+; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.X, T1.X, 1
+; EG-NEXT: CF_END
+; EG-NEXT: PAD
+; EG-NEXT: ALU clause starting at 4:
+; EG-NEXT: AND_INT T0.W, KC0[2].W, literal.x,
+; EG-NEXT: AND_INT * T1.W, KC0[2].Z, literal.x,
+; EG-NEXT: 16777215(2.350989e-38), 0(0.000000e+00)
+; EG-NEXT: MULLO_INT * T0.X, PS, PV.W,
+; EG-NEXT: ADD_INT T0.X, PS, KC0[3].X,
+; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x,
+; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
+;
+; CM-LABEL: u32_mad24:
+; CM: ; %bb.0: ; %entry
+; CM-NEXT: ALU 9, @4, KC0[CB0:0-32], KC1[]
+; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T0.X, T1.X
+; CM-NEXT: CF_END
+; CM-NEXT: PAD
+; CM-NEXT: ALU clause starting at 4:
+; CM-NEXT: AND_INT T0.Z, KC0[2].W, literal.x,
+; CM-NEXT: AND_INT * T0.W, KC0[2].Z, literal.x,
+; CM-NEXT: 16777215(2.350989e-38), 0(0.000000e+00)
+; CM-NEXT: MULLO_INT T0.X, T0.W, T0.Z,
+; CM-NEXT: MULLO_INT T0.Y (MASKED), T0.W, T0.Z,
+; CM-NEXT: MULLO_INT T0.Z (MASKED), T0.W, T0.Z,
+; CM-NEXT: MULLO_INT * T0.W (MASKED), T0.W, T0.Z,
+; CM-NEXT: ADD_INT * T0.X, PV.X, KC0[3].X,
+; CM-NEXT: LSHR * T1.X, KC0[2].Y, literal.x,
+; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00)
+;
+; GCN-LABEL: u32_mad24:
+; GCN: ; %bb.0: ; %entry
+; GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0xb
+; GCN-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x9
+; GCN-NEXT: s_mov_b32 s7, 0xf000
+; GCN-NEXT: s_waitcnt lgkmcnt(0)
+; GCN-NEXT: s_and_b32 s0, s0, 0xffffff
+; GCN-NEXT: s_and_b32 s1, s1, 0xffffff
+; GCN-NEXT: s_mul_i32 s0, s0, s1
+; GCN-NEXT: s_add_i32 s0, s0, s2
+; GCN-NEXT: s_mov_b32 s6, -1
+; GCN-NEXT: v_mov_b32_e32 v0, s0
+; GCN-NEXT: buffer_store_dword v0, off, s[4:7], 0
+; GCN-NEXT: s_endpgm
+;
+; GFX8-LABEL: u32_mad24:
+; GFX8: ; %bb.0: ; %entry
+; GFX8-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x2c
+; GFX8-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x24
+; GFX8-NEXT: s_mov_b32 s7, 0xf000
+; GFX8-NEXT: s_mov_b32 s6, -1
+; GFX8-NEXT: s_waitcnt lgkmcnt(0)
+; GFX8-NEXT: s_and_b32 s0, s0, 0xffffff
+; GFX8-NEXT: s_and_b32 s1, s1, 0xffffff
+; GFX8-NEXT: s_mul_i32 s0, s0, s1
+; GFX8-NEXT: s_add_i32 s0, s0, s2
+; GFX8-NEXT: v_mov_b32_e32 v0, s0
+; GFX8-NEXT: buffer_store_dword v0, off, s[4:7], 0
+; GFX8-NEXT: s_endpgm
entry:
%0 = shl i32 %a, 8
%a_24 = lshr i32 %0, 8
@@ -25,18 +81,88 @@ entry:
ret void
}
-; FUNC-LABEL: {{^}}i16_mad24:
; The order of A and B does not matter.
-; EG: MULLO_INT {{[* ]*}}T{{[0-9]}}.[[MAD_CHAN:[XYZW]]]
-; EG: ADD_INT {{[* ]*}}T{{[0-9]}}.[[MAD_CHAN:[XYZW]]]
; The result must be sign-extended
-; EG: BFE_INT {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[MAD_CHAN]], 0.0, literal.x
-; EG: 16
-; GCN: s_mul_i32 [[MUL:s[0-9]]], {{[s][0-9], [s][0-9]}}
-; GCN: s_add_i32 [[MAD:s[0-9]]], [[MUL]], s{{[0-9]}}
-; GCN: s_sext_i32_i16 [[EXT:s[0-9]]], [[MAD]]
-; GCN: v_mov_b32_e32 v0, [[EXT]]
define amdgpu_kernel void @i16_mad24(ptr addrspace(1) %out, i16 %a, i16 %b, i16 %c) {
+; EG-LABEL: i16_mad24:
+; EG: ; %bb.0: ; %entry
+; EG-NEXT: ALU 0, @12, KC0[], KC1[]
+; EG-NEXT: TEX 2 @6
+; EG-NEXT: ALU 4, @13, KC0[CB0:0-32], KC1[]
+; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.X, T1.X, 1
+; EG-NEXT: CF_END
+; EG-NEXT: PAD
+; EG-NEXT: Fetch clause starting at 6:
+; EG-NEXT: VTX_READ_16 T1.X, T0.X, 40, #3
+; EG-NEXT: VTX_READ_16 T2.X, T0.X, 42, #3
+; EG-NEXT: VTX_READ_16 T0.X, T0.X, 44, #3
+; EG-NEXT: ALU clause starting at 12:
+; EG-NEXT: MOV * T0.X, 0.0,
+; EG-NEXT: ALU clause starting at 13:
+; EG-NEXT: MULLO_INT * T0.Y, T1.X, T2.X,
+; EG-NEXT: ADD_INT * T0.W, PS, T0.X,
+; EG-NEXT: BFE_INT T0.X, PV.W, 0.0, literal.x,
+; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.y,
+; EG-NEXT: 16(2.242078e-44), 2(2.802597e-45)
+;
+; CM-LABEL: i16_mad24:
+; CM: ; %bb.0: ; %entry
+; CM-NEXT: ALU 0, @12, KC0[], KC1[]
+; CM-NEXT: TEX 2 @6
+; CM-NEXT: ALU 8, @13, KC0[CB0:0-32], KC1[]
+; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T0.X, T1.X
+; CM-NEXT: CF_END
+; CM-NEXT: PAD
+; CM-NEXT: Fetch clause starting at 6:
+; CM-NEXT: VTX_READ_16 T1.X, T0.X, 40, #3
+; CM-NEXT: VTX_READ_16 T2.X, T0.X, 42, #3
+; CM-NEXT: VTX_READ_16 T0.X, T0.X, 44, #3
+; CM-NEXT: ALU clause starting at 12:
+; CM-NEXT: MOV * T0.X, 0.0,
+; CM-NEXT: ALU clause starting at 13:
+; CM-NEXT: MULLO_INT T0.X (MASKED), T1.X, T2.X,
+; CM-NEXT: MULLO_INT T0.Y, T1.X, T2.X,
+; CM-NEXT: MULLO_INT T0.Z (MASKED), T1.X, T2.X,
+; CM-NEXT: MULLO_INT * T0.W (MASKED), T1.X, T2.X,
+; CM-NEXT: ADD_INT * T0.W, PV.Y, T0.X,
+; CM-NEXT: BFE_INT * T0.X, PV.W, 0.0, literal.x,
+; CM-NEXT: 16(2.242078e-44), 0(0.000000e+00)
+; CM-NEXT: LSHR * T1.X, KC0[2].Y, literal.x,
+; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00)
+;
+; GCN-LABEL: i16_mad24:
+; GCN: ; %bb.0: ; %entry
+; GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
+; GCN-NEXT: s_load_dword s4, s[4:5], 0xb
+; GCN-NEXT: s_mov_b32 s7, 0xf000
+; GCN-NEXT: s_waitcnt lgkmcnt(0)
+; GCN-NEXT: s_lshr_b32 s2, s2, 16
+; GCN-NEXT: s_mul_i32 s2, s4, s2
+; GCN-NEXT: s_add_i32 s2, s2, s3
+; GCN-NEXT: s_sext_i32_i16 s2, s2
+; GCN-NEXT: s_mov_b32 s6, -1
+; GCN-NEXT: s_mov_b32 s4, s0
+; GCN-NEXT: s_mov_b32 s5, s1
+; GCN-NEXT: v_mov_b32_e32 v0, s2
+; GCN-NEXT: buffer_store_dword v0, off, s[4:7], 0
+; GCN-NEXT: s_endpgm
+;
+; GFX8-LABEL: i16_mad24:
+; GFX8: ; %bb.0: ; %entry
+; GFX8-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
+; GFX8-NEXT: s_load_dword s8, s[4:5], 0x2c
+; GFX8-NEXT: s_mov_b32 s7, 0xf000
+; GFX8-NEXT: s_mov_b32 s6, -1
+; GFX8-NEXT: s_waitcnt lgkmcnt(0)
+; GFX8-NEXT: s_mov_b32 s4, s0
+; GFX8-NEXT: s_lshr_b32 s0, s2, 16
+; GFX8-NEXT: s_mul_i32 s0, s8, s0
+; GFX8-NEXT: s_add_i32 s0, s0, s3
+; GFX8-NEXT: s_sext_i32_i16 s0, s0
+; GFX8-NEXT: s_mov_b32 s5, s1
+; GFX8-NEXT: v_mov_b32_e32 v0, s0
+; GFX8-NEXT: buffer_store_dword v0, off, s[4:7], 0
+; GFX8-NEXT: s_endpgm
entry:
%0 = mul i16 %a, %b
%1 = add i16 %0, %c
@@ -46,17 +172,85 @@ entry:
}
; FIXME: Need to handle non-uniform case for function below (load without gep).
-; FUNC-LABEL: {{^}}i8_mad24:
-; EG: MULLO_INT {{[* ]*}}T{{[0-9]}}.[[MAD_CHAN:[XYZW]]]
-; EG: ADD_INT {{[* ]*}}T{{[0-9]}}.[[MAD_CHAN:[XYZW]]]
; The result must be sign-extended
-; EG: BFE_INT {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[MAD_CHAN]], 0.0, literal.x
-; EG: 8
-; GCN: s_mul_i32 [[MUL:s[0-9]]], {{[s][0-9], [s][0-9]}}
-; GCN: s_add_i32 [[MAD:s[0-9]]], [[MUL]], s{{[0-9]}}
-; GCN: s_sext_i32_i8 [[EXT:s[0-9]]], [[MAD]]
-; GCN: v_mov_b32_e32 v0, [[EXT]]
define amdgpu_kernel void @i8_mad24(ptr addrspace(1) %out, i8 %a, i8 %b, i8 %c) {
+; EG-LABEL: i8_mad24:
+; EG: ; %bb.0: ; %entry
+; EG-NEXT: ALU 0, @12, KC0[], KC1[]
+; EG-NEXT: TEX 2 @6
+; EG-NEXT: ALU 4, @13, KC0[CB0:0-32], KC1[]
+; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.X, T1.X, 1
+; EG-NEXT: CF_END
+; EG-NEXT: PAD
+; EG-NEXT: Fetch clause starting at 6:
+; EG-NEXT: VTX_READ_8 T1.X, T0.X, 40, #3
+; EG-NEXT: VTX_READ_8 T2.X, T0.X, 41, #3
+; EG-NEXT: VTX_READ_8 T0.X, T0.X, 42, #3
+; EG-NEXT: ALU clause starting at 12:
+; EG-NEXT: MOV * T0.X, 0.0,
+; EG-NEXT: ALU clause starting at 13:
+; EG-NEXT: MULLO_INT * T0.Y, T1.X, T2.X,
+; EG-NEXT: ADD_INT * T0.W, PS, T0.X,
+; EG-NEXT: BFE_INT T0.X, PV.W, 0.0, literal.x,
+; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.y,
+; EG-NEXT: 8(1.121039e-44), 2(2.802597e-45)
+;
+; CM-LABEL: i8_mad24:
+; CM: ; %bb.0: ; %entry
+; CM-NEXT: ALU 0, @12, KC0[], KC1[]
+; CM-NEXT: TEX 2 @6
+; CM-NEXT: ALU 8, @13, KC0[CB0:0-32], KC1[]
+; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T0.X, T1.X
+; CM-NEXT: CF_END
+; CM-NEXT: PAD
+; CM-NEXT: Fetch clause starting at 6:
+; CM-NEXT: VTX_READ_8 T1.X, T0.X, 40, #3
+; CM-NEXT: VTX_READ_8 T2.X, T0.X, 41, #3
+; CM-NEXT: VTX_READ_8 T0.X, T0.X, 42, #3
+; CM-NEXT: ALU clause starting at 12:
+; CM-NEXT: MOV * T0.X, 0.0,
+; CM-NEXT: ALU clause starting at 13:
+; CM-NEXT: MULLO_INT T0.X (MASKED), T1.X, T2.X,
+; CM-NEXT: MULLO_INT T0.Y, T1.X, T2.X,
+; CM-NEXT: MULLO_INT T0.Z (MASKED), T1.X, T2.X,
+; CM-NEXT: MULLO_INT * T0.W (MASKED), T1.X, T2.X,
+; CM-NEXT: ADD_INT * T0.W, PV.Y, T0.X,
+; CM-NEXT: BFE_INT * T0.X, PV.W, 0.0, literal.x,
+; CM-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; CM-NEXT: LSHR * T1.X, KC0[2].Y, literal.x,
+; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00)
+;
+; GCN-LABEL: i8_mad24:
+; GCN: ; %bb.0: ; %entry
+; GCN-NEXT: s_load_dword s2, s[4:5], 0xb
+; GCN-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9
+; GCN-NEXT: s_mov_b32 s3, 0xf000
+; GCN-NEXT: s_waitcnt lgkmcnt(0)
+; GCN-NEXT: s_lshr_b32 s4, s2, 8
+; GCN-NEXT: s_lshr_b32 s5, s2, 16
+; GCN-NEXT: s_mul_i32 s2, s2, s4
+; GCN-NEXT: s_add_i32 s2, s2, s5
+; GCN-NEXT: s_sext_i32_i8 s4, s2
+; GCN-NEXT: s_mov_b32 s2, -1
+; GCN-NEXT: v_mov_b32_e32 v0, s4
+; GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0
+; GCN-NEXT: s_endpgm
+;
+; GFX8-LABEL: i8_mad24:
+; GFX8: ; %bb.0: ; %entry
+; GFX8-NEXT: s_load_dword s6, s[4:5], 0x2c
+; GFX8-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX8-NEXT: s_mov_b32 s3, 0xf000
+; GFX8-NEXT: s_mov_b32 s2, -1
+; GFX8-NEXT: s_waitcnt lgkmcnt(0)
+; GFX8-NEXT: s_lshr_b32 s4, s6, 8
+; GFX8-NEXT: s_lshr_b32 s5, s6, 16
+; GFX8-NEXT: s_mul_i32 s4, s6, s4
+; GFX8-NEXT: s_add_i32 s4, s4, s5
+; GFX8-NEXT: s_sext_i32_i8 s4, s4
+; GFX8-NEXT: v_mov_b32_e32 v0, s4
+; GFX8-NEXT: buffer_store_dword v0, off, s[0:3], 0
+; GFX8-NEXT: s_endpgm
entry:
%0 = mul i8 %a, %b
%1 = add i8 %0, %c
@@ -72,11 +266,75 @@ entry:
; 24-bit mad pattern wasn't being matched.
; Check that the select instruction is not deleted.
-; FUNC-LABEL: {{^}}i24_i32_i32_mad:
-; EG: CNDE_INT
-; SI: s_cselect
-; GCN2: s_cselect
define amdgpu_kernel void @i24_i32_i32_mad(ptr addrspace(1) %out, i32 %a, i32 %b, i32 %c, i32 %d) {
+; EG-LABEL: i24_i32_i32_mad:
+; EG: ; %bb.0: ; %entry
+; EG-NEXT: ALU 7, @4, KC0[CB0:0-32], KC1[]
+; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.X, T1.X, 1
+; EG-NEXT: CF_END
+; EG-NEXT: PAD
+; EG-NEXT: ALU clause starting at 4:
+; EG-NEXT: ASHR * T0.W, KC0[2].Z, literal.x,
+; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; EG-NEXT: CNDE_INT * T0.W, KC0[3].X, literal.x, PV.W,
+; EG-NEXT: 34(4.764415e-44), 0(0.000000e+00)
+; EG-NEXT: MULLO_INT * T0.X, PV.W, KC0[3].X,
+; EG-NEXT: ADD_INT T0.X, PS, KC0[3].Y,
+; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.x,
+; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
+;
+; CM-LABEL: i24_i32_i32_mad:
+; CM: ; %bb.0: ; %entry
+; CM-NEXT: ALU 10, @4, KC0[CB0:0-32], KC1[]
+; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T0.X, T1.X
+; CM-NEXT: CF_END
+; CM-NEXT: PAD
+; CM-NEXT: ALU clause starting at 4:
+; CM-NEXT: ASHR * T0.W, KC0[2].Z, literal.x,
+; CM-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; CM-NEXT: CNDE_INT * T0.W, KC0[3].X, literal.x, PV.W,
+; CM-NEXT: 34(4.764415e-44), 0(0.000000e+00)
+; CM-NEXT: MULLO_INT T0.X, T0.W, KC0[3].X,
+; CM-NEXT: MULLO_INT T0.Y (MASKED), T0.W, KC0[3].X,
+; CM-NEXT: MULLO_INT T0.Z (MASKED), T0.W, KC0[3].X,
+; CM-NEXT: MULLO_INT * T0.W (MASKED), T0.W, KC0[3].X,
+; CM-NEXT: ADD_INT * T0.X, PV.X, KC0[3].Y,
+; CM-NEXT: LSHR * T1.X, KC0[2].Y, literal.x,
+; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00)
+;
+; GCN-LABEL: i24_i32_i32_mad:
+; GCN: ; %bb.0: ; %entry
+; GCN-NEXT: s_load_dword s2, s[4:5], 0xb
+; GCN-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0xd
+; GCN-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9
+; GCN-NEXT: s_mov_b32 s3, 0xf000
+; GCN-NEXT: s_waitcnt lgkmcnt(0)
+; GCN-NEXT: s_ashr_i32 s2, s2, 8
+; GCN-NEXT: s_cmp_lg_u32 s6, 0
+; GCN-NEXT: s_cselect_b32 s2, s2, 34
+; GCN-NEXT: s_mul_i32 s2, s2, s6
+; GCN-NEXT: s_add_i32 s4, s2, s7
+; GCN-NEXT: s_mov_b32 s2, -1
+; GCN-NEXT: v_mov_b32_e32 v0, s4
+; GCN-NEXT: buffer_store_dword v0, off, s[0:3], 0
+; GCN-NEXT: s_endpgm
+;
+; GFX8-LABEL: i24_i32_i32_mad:
+; GFX8: ; %bb.0: ; %entry
+; GFX8-NEXT: s_load_dword s8, s[4:5], 0x2c
+; GFX8-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34
+; GFX8-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX8-NEXT: s_mov_b32 s3, 0xf000
+; GFX8-NEXT: s_mov_b32 s2, -1
+; GFX8-NEXT: s_waitcnt lgkmcnt(0)
+; GFX8-NEXT: s_ashr_i32 s4, s8, 8
+; GFX8-NEXT: s_cmp_lg_u32 s6, 0
+; GFX8-NEXT: s_cselect_b32 s4, s4, 34
+; GFX8-NEXT: s_mul_i32 s4, s4, s6
+; GFX8-NEXT: s_add_i32 s4, s4, s7
+; GFX8-NEXT: v_mov_b32_e32 v0, s4
+; GFX8-NEXT: buffer_store_dword v0, off, s[0:3], 0
+; GFX8-NEXT: s_endpgm
entry:
%0 = ashr i32 %a, 8
%1 = icmp ne i32 %c, 0
@@ -87,13 +345,139 @@ entry:
ret void
}
-; FUNC-LABEL: {{^}}extra_and:
-; SI-NOT: v_and
-; SI: s_mul_i32
-; SI: s_mul_i32
-; SI: s_add_i32
-; SI: s_add_i32
define amdgpu_kernel void @extra_and(ptr addrspace(1) %arg, i32 %arg2, i32 %arg3) {
+; EG-LABEL: extra_and:
+; EG: ; %bb.0: ; %bb
+; EG-NEXT: ALU 5, @10, KC0[CB0:0-32], KC1[]
+; EG-NEXT: LOOP_START_DX10 @7
+; EG-NEXT: ALU_PUSH_BEFORE 12, @16, KC0[], KC1[]
+; EG-NEXT: JUMP @6 POP:1
+; EG-NEXT: LOOP_BREAK @6
+; EG-NEXT: POP @6 POP:1
+; EG-NEXT: END_LOOP @2
+; EG-NEXT: ALU 1, @29, KC0[], KC1[]
+; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.X, T1.X, 1
+; EG-NEXT: CF_END
+; EG-NEXT: ALU clause starting at 10:
+; EG-NEXT: MOV * T1.W, literal.x,
+; EG-NEXT: 0(0.000000e+00), 0(0.000000e+00)
+; EG-NEXT: MOV * T3.W, PV.W,
+; EG-NEXT: MOV T0.Z, KC0[2].Y,
+; EG-NEXT: MOV T0.W, KC0[2].Z,
+; EG-NEXT: MOV * T2.W, KC0[2].W,
+; EG-NEXT: ALU clause starting at 16:
+; EG-NEXT: AND_INT T1.W, T1.W, literal.x,
+; EG-NEXT: AND_INT * T4.W, T3.W, literal.x,
+; EG-NEXT: 16777215(2.350989e-38), 0(0.000000e+00)
+; EG-NEXT: AND_INT T3.W, T3.W, literal.x,
+; EG-NEXT: MULLO_INT * T0.X, PS, PV.W,
+; EG-NEXT: 16777215(2.350989e-38), 0(0.000000e+00)
+; EG-NEXT: MULLO_INT * T0.Y, PV.W, T1.W,
+; EG-NEXT: ADD_INT T3.W, T2.W, PS,
+; EG-NEXT: ADD_INT * T1.W, T0.W, T0.X,
+; EG-NEXT: ADD_INT * T0.X, PS, PV.W,
+; EG-NEXT: SETNE_INT * T4.W, PV.X, literal.x,
+; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; EG-NEXT: PRED_SETE_INT * ExecMask,PredicateBit (MASKED), PV.W, 0.0,
+; EG-NEXT: ALU clause starting at 29:
+; EG-NEXT: LSHR * T1.X, T0.Z, literal.x,
+; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
+;
+; CM-LABEL: extra_and:
+; CM: ; %bb.0: ; %bb
+; CM-NEXT: ALU 5, @10, KC0[CB0:0-32], KC1[]
+; CM-NEXT: LOOP_START_DX10 @7
+; CM-NEXT: ALU_PUSH_BEFORE 17, @16, KC0[], KC1[]
+; CM-NEXT: JUMP @6 POP:1
+; CM-NEXT: LOOP_BREAK @6
+; CM-NEXT: POP @6 POP:1
+; CM-NEXT: END_LOOP @2
+; CM-NEXT: ALU 1, @34, KC0[], KC1[]
+; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T0.X, T1.X
+; CM-NEXT: CF_END
+; CM-NEXT: ALU clause starting at 10:
+; CM-NEXT: MOV * T0.W, literal.x,
+; CM-NEXT: 0(0.000000e+00), 0(0.000000e+00)
+; CM-NEXT: MOV * T1.Z, PV.W,
+; CM-NEXT: MOV T0.Y, KC0[2].Y,
+; CM-NEXT: MOV T0.Z, KC0[2].Z,
+; CM-NEXT: MOV * T1.W, KC0[2].W,
+; CM-NEXT: ALU clause starting at 16:
+; CM-NEXT: AND_INT T1.Y, T1.Z, literal.x,
+; CM-NEXT: AND_INT T2.Z, T0.W, literal.x,
+; CM-NEXT: AND_INT * T0.W, T1.Z, literal.x,
+; CM-NEXT: 16777215(2.350989e-38), 0(0.000000e+00)
+; CM-NEXT: MULLO_INT T0.X, T0.W, T2.Z,
+; CM-NEXT: MULLO_INT T0.Y (MASKED), T0.W, T2.Z,
+; CM-NEXT: MULLO_INT T0.Z (MASKED), T0.W, T2.Z,
+; CM-NEXT: MULLO_INT * T0.W (MASKED), T0.W, T2.Z,
+; CM-NEXT: MULLO_INT T0.X (MASKED), T1.Y, T2.Z,
+; CM-NEXT: MULLO_INT T0.Y (MASKED), T1.Y, T2.Z,
+; CM-NEXT: MULLO_INT T0.Z (MASKED), T1.Y, T2.Z,
+; CM-NEXT: MULLO_INT * T0.W, T1.Y, T2.Z,
+; CM-NEXT: ADD_INT T1.Z, T1.W, PV.W,
+; CM-NEXT: ADD_INT * T0.W, T0.Z, T0.X,
+; CM-NEXT: ADD_INT * T0.X, PV.W, PV.Z,
+; CM-NEXT: SETNE_INT * T2.W, PV.X, literal.x,
+; CM-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; CM-NEXT: PRED_SETE_INT * ExecMask,PredicateBit (MASKED), PV.W, 0.0,
+; CM-NEXT: ALU clause starting at 34:
+; CM-NEXT: LSHR * T1.X, T0.Y, literal.x,
+; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00)
+;
+; GCN-LABEL: extra_and:
+; GCN: ; %bb.0: ; %bb
+; GCN-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0xb
+; GCN-NEXT: s_mov_b32 s2, 0
+; GCN-NEXT: s_mov_b32 s6, 0
+; GCN-NEXT: .LBB4_1: ; %bb4
+; GCN-NEXT: ; =>This Inner Loop Header: Depth=1
+; GCN-NEXT: s_and_b32 s3, s6, 0xffffff
+; GCN-NEXT: s_and_b32 s6, s6, 0xffffff
+; GCN-NEXT: s_and_b32 s2, s2, 0xffffff
+; GCN-NEXT: s_mul_i32 s3, s3, s2
+; GCN-NEXT: s_mul_i32 s6, s6, s2
+; GCN-NEXT: s_waitcnt lgkmcnt(0)
+; GCN-NEXT: s_add_i32 s2, s0, s3
+; GCN-NEXT: s_add_i32 s6, s1, s6
+; GCN-NEXT: s_add_i32 s3, s2, s6
+; GCN-NEXT: s_cmp_lg_u32 s3, 8
+; GCN-NEXT: s_cbranch_scc1 .LBB4_1
+; GCN-NEXT: ; %bb.2: ; %bb18
+; GCN-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x9
+; GCN-NEXT: s_mov_b32 s7, 0xf000
+; GCN-NEXT: s_mov_b32 s6, -1
+; GCN-NEXT: v_mov_b32_e32 v0, s3
+; GCN-NEXT: s_waitcnt lgkmcnt(0)
+; GCN-NEXT: buffer_store_dword v0, off, s[4:7], 0
+; GCN-NEXT: s_endpgm
+;
+; GFX8-LABEL: extra_and:
+; GFX8: ; %bb.0: ; %bb
+; GFX8-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x2c
+; GFX8-NEXT: s_mov_b32 s2, 0
+; GFX8-NEXT: s_mov_b32 s6, 0
+; GFX8-NEXT: .LBB4_1: ; %bb4
+; GFX8-NEXT: ; =>This Inner Loop Header: Depth=1
+; GFX8-NEXT: s_and_b32 s3, s6, 0xffffff
+; GFX8-NEXT: s_and_b32 s6, s6, 0xffffff
+; GFX8-NEXT: s_and_b32 s2, s2, 0xffffff
+; GFX8-NEXT: s_mul_i32 s3, s3, s2
+; GFX8-NEXT: s_mul_i32 s6, s6, s2
+; GFX8-NEXT: s_waitcnt lgkmcnt(0)
+; GFX8-NEXT: s_add_i32 s2, s0, s3
+; GFX8-NEXT: s_add_i32 s6, s1, s6
+; GFX8-NEXT: s_add_i32 s3, s2, s6
+; GFX8-NEXT: s_cmp_lg_u32 s3, 8
+; GFX8-NEXT: s_cbranch_scc1 .LBB4_1
+; GFX8-NEXT: ; %bb.2: ; %bb18
+; GFX8-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x24
+; GFX8-NEXT: s_mov_b32 s7, 0xf000
+; GFX8-NEXT: s_mov_b32 s6, -1
+; GFX8-NEXT: v_mov_b32_e32 v0, s3
+; GFX8-NEXT: s_waitcnt lgkmcnt(0)
+; GFX8-NEXT: buffer_store_dword v0, off, s[4:7], 0
+; GFX8-NEXT: s_endpgm
bb:
br label %bb4
@@ -119,13 +503,139 @@ bb18: ; preds = %bb4
ret void
}
-; FUNC-LABEL: {{^}}dont_remove_shift
-; SI: s_lshr
-; SI: s_mul_i32
-; SI: s_mul_i32
-; SI: s_add_i32
-; SI: s_add_i32
define amdgpu_kernel void @dont_remove_shift(ptr addrspace(1) %arg, i32 %arg2, i32 %arg3) {
+; EG-LABEL: dont_remove_shift:
+; EG: ; %bb.0: ; %bb
+; EG-NEXT: ALU 5, @10, KC0[CB0:0-32], KC1[]
+; EG-NEXT: LOOP_START_DX10 @7
+; EG-NEXT: ALU_PUSH_BEFORE 12, @16, KC0[], KC1[]
+; EG-NEXT: JUMP @6 POP:1
+; EG-NEXT: LOOP_BREAK @6
+; EG-NEXT: POP @6 POP:1
+; EG-NEXT: END_LOOP @2
+; EG-NEXT: ALU 1, @29, KC0[], KC1[]
+; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.X, T1.X, 1
+; EG-NEXT: CF_END
+; EG-NEXT: ALU clause starting at 10:
+; EG-NEXT: MOV * T1.W, literal.x,
+; EG-NEXT: 0(0.000000e+00), 0(0.000000e+00)
+; EG-NEXT: MOV * T3.W, PV.W,
+; EG-NEXT: MOV T0.Z, KC0[2].Y,
+; EG-NEXT: MOV T0.W, KC0[2].Z,
+; EG-NEXT: MOV * T2.W, KC0[2].W,
+; EG-NEXT: ALU clause starting at 16:
+; EG-NEXT: LSHR T1.W, T1.W, literal.x,
+; EG-NEXT: LSHR * T4.W, T3.W, literal.x,
+; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; EG-NEXT: LSHR T3.W, T3.W, literal.x,
+; EG-NEXT: MULLO_INT * T0.X, PS, PV.W,
+; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; EG-NEXT: MULLO_INT * T0.Y, PV.W, T1.W,
+; EG-NEXT: ADD_INT T3.W, T2.W, PS,
+; EG-NEXT: ADD_INT * T1.W, T0.W, T0.X,
+; EG-NEXT: ADD_INT * T0.X, PS, PV.W,
+; EG-NEXT: SETNE_INT * T4.W, PV.X, literal.x,
+; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; EG-NEXT: PRED_SETE_INT * ExecMask,PredicateBit (MASKED), PV.W, 0.0,
+; EG-NEXT: ALU clause starting at 29:
+; EG-NEXT: LSHR * T1.X, T0.Z, literal.x,
+; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
+;
+; CM-LABEL: dont_remove_shift:
+; CM: ; %bb.0: ; %bb
+; CM-NEXT: ALU 5, @10, KC0[CB0:0-32], KC1[]
+; CM-NEXT: LOOP_START_DX10 @7
+; CM-NEXT: ALU_PUSH_BEFORE 17, @16, KC0[], KC1[]
+; CM-NEXT: JUMP @6 POP:1
+; CM-NEXT: LOOP_BREAK @6
+; CM-NEXT: POP @6 POP:1
+; CM-NEXT: END_LOOP @2
+; CM-NEXT: ALU 1, @34, KC0[], KC1[]
+; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T0.X, T1.X
+; CM-NEXT: CF_END
+; CM-NEXT: ALU clause starting at 10:
+; CM-NEXT: MOV * T0.W, literal.x,
+; CM-NEXT: 0(0.000000e+00), 0(0.000000e+00)
+; CM-NEXT: MOV * T1.Z, PV.W,
+; CM-NEXT: MOV T0.Y, KC0[2].Y,
+; CM-NEXT: MOV T0.Z, KC0[2].Z,
+; CM-NEXT: MOV * T1.W, KC0[2].W,
+; CM-NEXT: ALU clause starting at 16:
+; CM-NEXT: LSHR T1.Y, T1.Z, literal.x,
+; CM-NEXT: LSHR T2.Z, T0.W, literal.x,
+; CM-NEXT: LSHR * T0.W, T1.Z, literal.x,
+; CM-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; CM-NEXT: MULLO_INT T0.X, T0.W, T2.Z,
+; CM-NEXT: MULLO_INT T0.Y (MASKED), T0.W, T2.Z,
+; CM-NEXT: MULLO_INT T0.Z (MASKED), T0.W, T2.Z,
+; CM-NEXT: MULLO_INT * T0.W (MASKED), T0.W, T2.Z,
+; CM-NEXT: MULLO_INT T0.X (MASKED), T1.Y, T2.Z,
+; CM-NEXT: MULLO_INT T0.Y (MASKED), T1.Y, T2.Z,
+; CM-NEXT: MULLO_INT T0.Z (MASKED), T1.Y, T2.Z,
+; CM-NEXT: MULLO_INT * T0.W, T1.Y, T2.Z,
+; CM-NEXT: ADD_INT T1.Z, T1.W, PV.W,
+; CM-NEXT: ADD_INT * T0.W, T0.Z, T0.X,
+; CM-NEXT: ADD_INT * T0.X, PV.W, PV.Z,
+; CM-NEXT: SETNE_INT * T2.W, PV.X, literal.x,
+; CM-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; CM-NEXT: PRED_SETE_INT * ExecMask,PredicateBit (MASKED), PV.W, 0.0,
+; CM-NEXT: ALU clause starting at 34:
+; CM-NEXT: LSHR * T1.X, T0.Y, literal.x,
+; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00)
+;
+; GCN-LABEL: dont_remove_shift:
+; GCN: ; %bb.0: ; %bb
+; GCN-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0xb
+; GCN-NEXT: s_mov_b32 s2, 0
+; GCN-NEXT: s_mov_b32 s6, 0
+; GCN-NEXT: .LBB5_1: ; %bb4
+; GCN-NEXT: ; =>This Inner Loop Header: Depth=1
+; GCN-NEXT: s_lshr_b32 s3, s6, 8
+; GCN-NEXT: s_lshr_b32 s6, s6, 8
+; GCN-NEXT: s_lshr_b32 s2, s2, 8
+; GCN-NEXT: s_mul_i32 s3, s3, s2
+; GCN-NEXT: s_mul_i32 s6, s6, s2
+; GCN-NEXT: s_waitcnt lgkmcnt(0)
+; GCN-NEXT: s_add_i32 s2, s0, s3
+; GCN-NEXT: s_add_i32 s6, s1, s6
+; GCN-NEXT: s_add_i32 s3, s2, s6
+; GCN-NEXT: s_cmp_lg_u32 s3, 8
+; GCN-NEXT: s_cbranch_scc1 .LBB5_1
+; GCN-NEXT: ; %bb.2: ; %bb18
+; GCN-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x9
+; GCN-NEXT: s_mov_b32 s7, 0xf000
+; GCN-NEXT: s_mov_b32 s6, -1
+; GCN-NEXT: v_mov_b32_e32 v0, s3
+; GCN-NEXT: s_waitcnt lgkmcnt(0)
+; GCN-NEXT: buffer_store_dword v0, off, s[4:7], 0
+; GCN-NEXT: s_endpgm
+;
+; GFX8-LABEL: dont_remove_shift:
+; GFX8: ; %bb.0: ; %bb
+; GFX8-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x2c
+; GFX8-NEXT: s_mov_b32 s2, 0
+; GFX8-NEXT: s_mov_b32 s6, 0
+; GFX8-NEXT: .LBB5_1: ; %bb4
+; GFX8-NEXT: ; =>This Inner Loop Header: Depth=1
+; GFX8-NEXT: s_lshr_b32 s3, s6, 8
+; GFX8-NEXT: s_lshr_b32 s6, s6, 8
+; GFX8-NEXT: s_lshr_b32 s2, s2, 8
+; GFX8-NEXT: s_mul_i32 s3, s3, s2
+; GFX8-NEXT: s_mul_i32 s6, s6, s2
+; GFX8-NEXT: s_waitcnt lgkmcnt(0)
+; GFX8-NEXT: s_add_i32 s2, s0, s3
+; GFX8-NEXT: s_add_i32 s6, s1, s6
+; GFX8-NEXT: s_add_i32 s3, s2, s6
+; GFX8-NEXT: s_cmp_lg_u32 s3, 8
+; GFX8-NEXT: s_cbranch_scc1 .LBB5_1
+; GFX8-NEXT: ; %bb.2: ; %bb18
+; GFX8-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x24
+; GFX8-NEXT: s_mov_b32 s7, 0xf000
+; GFX8-NEXT: s_mov_b32 s6, -1
+; GFX8-NEXT: v_mov_b32_e32 v0, s3
+; GFX8-NEXT: s_waitcnt lgkmcnt(0)
+; GFX8-NEXT: buffer_store_dword v0, off, s[4:7], 0
+; GFX8-NEXT: s_endpgm
bb:
br label %bb4
@@ -151,19 +661,234 @@ bb18: ; preds = %bb4
ret void
}
-; FUNC-LABEL: {{^}}i8_mad_sat_16:
-; EG: MULLO_INT {{[* ]*}}T{{[0-9]}}.[[MAD_CHAN:[XYZW]]]
-; EG: ADD_INT {{[* ]*}}T{{[0-9]}}.[[MAD_CHAN:[XYZW]]]
-; The result must be sign-extended
-; EG: BFE_INT {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[MAD_CHAN]], 0.0, literal.x
-; EG: 8
-; SI: v_mad_u32_u24 [[MAD:v[0-9]]], {{[sv][0-9], [sv][0-9]}}
-; SI: v_bfe_i32 [[EXT:v[0-9]]], [[MAD]], 0, 16
-; SI: v_med3_i32 v{{[0-9]}}, [[EXT]],
-; VI: v_mad_u16 [[MAD:v[0-9]]], {{[sv][0-9], [sv][0-9]}}
-; VI: v_max_i16_e32 [[MAX:v[0-9]]], 0xff80, [[MAD]]
-; VI: v_min_i16_e32 {{v[0-9]}}, 0x7f, [[MAX]]
define amdgpu_kernel void @i8_mad_sat_16(ptr addrspace(1) %out, ptr addrspace(1) %in0, ptr addrspace(1) %in1, ptr addrspace(1) %in2, ptr addrspace(5) %idx) {
+; EG-LABEL: i8_mad_sat_16:
+; EG: ; %bb.0: ; %entry
+; EG-NEXT: ALU 4, @14, KC0[CB0:0-32], KC1[]
+; EG-NEXT: TEX 0 @8
+; EG-NEXT: ALU 1, @19, KC0[CB0:0-32], KC1[]
+; EG-NEXT: TEX 1 @10
+; EG-NEXT: ALU 24, @21, KC0[CB0:0-32], KC1[]
+; EG-NEXT: MEM_RAT MSKOR T0.XW, T1.X
+; EG-NEXT: CF_END
+; EG-NEXT: PAD
+; EG-NEXT: Fetch clause starting at 8:
+; EG-NEXT: VTX_READ_8 T1.X, T1.X, 0, #1
+; EG-NEXT: Fetch clause starting at 10:
+; EG-NEXT: VTX_READ_8 T3.X, T3.X, 0, #1
+; EG-NEXT: VTX_READ_8 T2.X, T2.X, 0, #1
+; EG-NEXT: ALU clause starting at 14:
+; EG-NEXT: LSHR * T0.W, KC0[3].Y, literal.x,
+; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
+; EG-NEXT: MOVA_INT * AR.x (MASKED), PV.W,
+; EG-NEXT: MOV * T0.X, T(0 + AR.x).X+,
+; EG-NEXT: ADD_INT * T1.X, KC0[2].W, PV.X,
+; EG-NEXT: ALU clause starting at 19:
+; EG-NEXT: ADD_INT T2.X, KC0[2].Z, T0.X,
+; EG-NEXT: ADD_INT * T3.X, KC0[3].X, T0.X,
+; EG-NEXT: ALU clause starting at 21:
+; EG-NEXT: BFE_INT T0.Z, T1.X, 0.0, literal.x,
+; EG-NEXT: BFE_INT * T0.W, T2.X, 0.0, literal.x, BS:VEC_120/SCL_212
+; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; EG-NEXT: BFE_INT T1.W, T3.X, 0.0, literal.x,
+; EG-NEXT: MULLO_INT * T0.Y, PV.Z, PV.W,
+; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; EG-NEXT: ADD_INT * T0.W, PS, PV.W,
+; EG-NEXT: BFE_INT * T0.W, PV.W, 0.0, literal.x,
+; EG-NEXT: 16(2.242078e-44), 0(0.000000e+00)
+; EG-NEXT: MAX_INT T0.W, PV.W, literal.x,
+; EG-NEXT: ADD_INT * T1.W, KC0[2].Y, T0.X,
+; EG-NEXT: -128(nan), 0(0.000000e+00)
+; EG-NEXT: AND_INT T2.W, PS, literal.x,
+; EG-NEXT: MIN_INT * T0.W, PV.W, literal.y,
+; EG-NEXT: 3(4.203895e-45), 127(1.779649e-43)
+; EG-NEXT: AND_INT T0.W, PS, literal.x,
+; EG-NEXT: LSHL * T2.W, PV.W, literal.y,
+; EG-NEXT: 255(3.573311e-43), 3(4.203895e-45)
+; EG-NEXT: LSHL T0.X, PV.W, PS,
+; EG-NEXT: LSHL * T0.W, literal.x, PS,
+; EG-NEXT: 255(3.573311e-43), 0(0.000000e+00)
+; EG-NEXT: MOV T0.Y, 0.0,
+; EG-NEXT: MOV * T0.Z, 0.0,
+; EG-NEXT: LSHR * T1.X, T1.W, literal.x,
+; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
+;
+; CM-LABEL: i8_mad_sat_16:
+; CM: ; %bb.0: ; %entry
+; CM-NEXT: ALU 4, @14, KC0[CB0:0-32], KC1[]
+; CM-NEXT: TEX 0 @8
+; CM-NEXT: ALU 1, @19, KC0[CB0:0-32], KC1[]
+; CM-NEXT: TEX 1 @10
+; CM-NEXT: ALU 26, @21, KC0[CB0:0-32], KC1[]
+; CM-NEXT: MEM_RAT MSKOR T1.XW, T0.X
+; CM-NEXT: CF_END
+; CM-NEXT: PAD
+; CM-NEXT: Fetch clause starting at 8:
+; CM-NEXT: VTX_READ_8 T1.X, T1.X, 0, #1
+; CM-NEXT: Fetch clause starting at 10:
+; CM-NEXT: VTX_READ_8 T3.X, T3.X, 0, #1
+; CM-NEXT: VTX_READ_8 T2.X, T2.X, 0, #1
+; CM-NEXT: ALU clause starting at 14:
+; CM-NEXT: LSHR * T0.W, KC0[3].Y, literal.x,
+; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00)
+; CM-NEXT: MOVA_INT * AR.x (MASKED), PV.W,
+; CM-NEXT: MOV * T0.X, T(0 + AR.x).X+,
+; CM-NEXT: ADD_INT * T1.X, KC0[3].X, PV.X,
+; CM-NEXT: ALU clause starting at 19:
+; CM-NEXT: ADD_INT * T2.X, KC0[2].W, T0.X,
+; CM-NEXT: ADD_INT * T3.X, KC0[2].Z, T0.X,
+; CM-NEXT: ALU clause starting at 21:
+; CM-NEXT: BFE_INT T0.Y, T1.X, 0.0, literal.x,
+; CM-NEXT: BFE_INT T0.Z, T2.X, 0.0, literal.x, BS:VEC_120/SCL_212
+; CM-NEXT: BFE_INT * T0.W, T3.X, 0.0, literal.x, BS:VEC_201
+; CM-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; CM-NEXT: MULLO_INT T0.X (MASKED), T0.Z, T0.W,
+; CM-NEXT: MULLO_INT T0.Y (MASKED), T0.Z, T0.W,
+; CM-NEXT: MULLO_INT T0.Z, T0.Z, T0.W,
+; CM-NEXT: MULLO_INT * T0.W (MASKED), T0.Z, T0.W,
+; CM-NEXT: ADD_INT * T0.W, PV.Z, T0.Y,
+; CM-NEXT: BFE_INT * T0.W, PV.W, 0.0, literal.x,
+; CM-NEXT: 16(2.242078e-44), 0(0.000000e+00)
+; CM-NEXT: MAX_INT T0.Z, PV.W, literal.x,
+; CM-NEXT: ADD_INT * T0.W, KC0[2].Y, T0.X,
+; CM-NEXT: -128(nan), 0(0.000000e+00)
+; CM-NEXT: AND_INT T1.Z, PV.W, literal.x,
+; CM-NEXT: MIN_INT * T1.W, PV.Z, literal.y,
+; CM-NEXT: 3(4.203895e-45), 127(1.779649e-43)
+; CM-NEXT: AND_INT T0.Z, PV.W, literal.x,
+; CM-NEXT: LSHL * T1.W, PV.Z, literal.y,
+; CM-NEXT: 255(3.573311e-43), 3(4.203895e-45)
+; CM-NEXT: LSHL T1.X, PV.Z, PV.W,
+; CM-NEXT: LSHL * T1.W, literal.x, PV.W,
+; CM-NEXT: 255(3.573311e-43), 0(0.000000e+00)
+; CM-NEXT: MOV T1.Y, 0.0,
+; CM-NEXT: MOV * T1.Z, 0.0,
+; CM-NEXT: LSHR * T0.X, T0.W, literal.x,
+; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00)
+;
+; GCN-LABEL: i8_mad_sat_16:
+; GCN: ; %bb.0: ; %entry
+; GCN-NEXT: s_mov_b32 s20, SCRATCH_RSRC_DWORD0
+; GCN-NEXT: s_mov_b32 s21, SCRATCH_RSRC_DWORD1
+; GCN-NEXT: s_mov_b32 s22, -1
+; GCN-NEXT: s_mov_b32 s23, 0xe8f000
+; GCN-NEXT: s_add_u32 s20, s20, s11
+; GCN-NEXT: s_addc_u32 s21, s21, 0
+; GCN-NEXT: s_load_dword s8, s[4:5], 0x11
+; GCN-NEXT: s_waitcnt lgkmcnt(0)
+; GCN-NEXT: s_add_i32 s9, s8, 4
+; GCN-NEXT: s_load_dwordx8 s[0:7], s[4:5], 0x9
+; GCN-NEXT: v_mov_b32_e32 v0, s8
+; GCN-NEXT: v_mov_b32_e32 v1, s9
+; GCN-NEXT: buffer_load_dword v1, v1, s[20:23], 0 offen
+; GCN-NEXT: buffer_load_dword v0, v0, s[20:23], 0 offen
+; GCN-NEXT: s_mov_b32 s11, 0xf000
+; GCN-NEXT: s_mov_b32 s10, 0
+; GCN-NEXT: s_mov_b64 s[14:15], s[10:11]
+; GCN-NEXT: s_mov_b64 s[18:19], s[10:11]
+; GCN-NEXT: s_waitcnt lgkmcnt(0)
+; GCN-NEXT: s_mov_b64 s[8:9], s[2:3]
+; GCN-NEXT: s_mov_b64 s[12:13], s[4:5]
+; GCN-NEXT: s_mov_b64 s[16:17], s[6:7]
+; GCN-NEXT: s_waitcnt vmcnt(0)
+; GCN-NEXT: buffer_load_sbyte v2, v[0:1], s[12:15], 0 addr64
+; GCN-NEXT: buffer_load_sbyte v3, v[0:1], s[8:11], 0 addr64
+; GCN-NEXT: buffer_load_sbyte v4, v[0:1], s[16:19], 0 addr64
+; GCN-NEXT: s_movk_i32 s2, 0xff80
+; GCN-NEXT: s_waitcnt vmcnt(2)
+; GCN-NEXT: v_and_b32_e32 v2, 0xffff, v2
+; GCN-NEXT: s_waitcnt vmcnt(1)
+; GCN-NEXT: v_and_b32_e32 v3, 0xffff, v3
+; GCN-NEXT: s_waitcnt vmcnt(0)
+; GCN-NEXT: v_mad_u32_u24 v2, v2, v3, v4
+; GCN-NEXT: v_bfe_i32 v2, v2, 0, 16
+; GCN-NEXT: v_mov_b32_e32 v3, 0x7f
+; GCN-NEXT: v_med3_i32 v2, v2, s2, v3
+; GCN-NEXT: s_mov_b64 s[2:3], s[10:11]
+; GCN-NEXT: buffer_store_byte v2, v[0:1], s[0:3], 0 addr64
+; GCN-NEXT: s_endpgm
+;
+; SI-LABEL: i8_mad_sat_16:
+; SI: ; %bb.0: ; %entry
+; SI-NEXT: s_mov_b32 s88, SCRATCH_RSRC_DWORD0
+; SI-NEXT: s_load_dword s0, s[4:5], 0x44
+; SI-NEXT: s_mov_b32 s89, SCRATCH_RSRC_DWORD1
+; SI-NEXT: s_mov_b32 s90, -1
+; SI-NEXT: s_mov_b32 s91, 0xe80000
+; SI-NEXT: s_add_u32 s88, s88, s11
+; SI-NEXT: s_addc_u32 s89, s89, 0
+; SI-NEXT: s_waitcnt lgkmcnt(0)
+; SI-NEXT: s_add_i32 s1, s0, 4
+; SI-NEXT: v_mov_b32_e32 v0, s0
+; SI-NEXT: buffer_load_dword v6, v0, s[88:91], 0 offen
+; SI-NEXT: v_mov_b32_e32 v0, s1
+; SI-NEXT: buffer_load_dword v7, v0, s[88:91], 0 offen
+; SI-NEXT: s_load_dwordx8 s[0:7], s[4:5], 0x24
+; SI-NEXT: s_waitcnt lgkmcnt(0)
+; SI-NEXT: v_mov_b32_e32 v1, s3
+; SI-NEXT: v_mov_b32_e32 v3, s5
+; SI-NEXT: v_mov_b32_e32 v5, s7
+; SI-NEXT: s_waitcnt vmcnt(1)
+; SI-NEXT: v_add_u32_e32 v0, vcc, s2, v6
+; SI-NEXT: s_waitcnt vmcnt(0)
+; SI-NEXT: v_addc_u32_e32 v1, vcc, v1, v7, vcc
+; SI-NEXT: v_add_u32_e32 v2, vcc, s4, v6
+; SI-NEXT: v_addc_u32_e32 v3, vcc, v3, v7, vcc
+; SI-NEXT: v_add_u32_e32 v4, vcc, s6, v6
+; SI-NEXT: v_addc_u32_e32 v5, vcc, v5, v7, vcc
+; SI-NEXT: flat_load_sbyte v0, v[0:1]
+; SI-NEXT: flat_load_sbyte v1, v[2:3]
+; SI-NEXT: flat_load_sbyte v2, v[4:5]
+; SI-NEXT: v_mov_b32_e32 v3, s1
+; SI-NEXT: s_waitcnt vmcnt(0)
+; SI-NEXT: v_mad_u16 v0, v1, v0, v2
+; SI-NEXT: v_max_i16_e32 v0, 0xff80, v0
+; SI-NEXT: v_min_i16_e32 v2, 0x7f, v0
+; SI-NEXT: v_add_u32_e32 v0, vcc, s0, v6
+; SI-NEXT: v_addc_u32_e32 v1, vcc, v3, v7, vcc
+; SI-NEXT: flat_store_byte v[0:1], v2
+; SI-NEXT: s_endpgm
+;
+; VI-LABEL: i8_mad_sat_16:
+; VI: ; %bb.0: ; %entry
+; VI-NEXT: s_mov_b32 s12, SCRATCH_RSRC_DWORD0
+; VI-NEXT: s_load_dword s0, s[4:5], 0x44
+; VI-NEXT: s_mov_b32 s13, SCRATCH_RSRC_DWORD1
+; VI-NEXT: s_mov_b32 s14, -1
+; VI-NEXT: s_mov_b32 s15, 0xe80000
+; VI-NEXT: s_add_u32 s12, s12, s11
+; VI-NEXT: s_addc_u32 s13, s13, 0
+; VI-NEXT: s_waitcnt lgkmcnt(0)
+; VI-NEXT: s_add_i32 s1, s0, 4
+; VI-NEXT: v_mov_b32_e32 v0, s0
+; VI-NEXT: buffer_load_dword v6, v0, s[12:15], 0 offen
+; VI-NEXT: v_mov_b32_e32 v0, s1
+; VI-NEXT: buffer_load_dword v7, v0, s[12:15], 0 offen
+; VI-NEXT: s_load_dwordx8 s[0:7], s[4:5], 0x24
+; VI-NEXT: s_waitcnt lgkmcnt(0)
+; VI-NEXT: v_mov_b32_e32 v1, s3
+; VI-NEXT: v_mov_b32_e32 v3, s5
+; VI-NEXT: v_mov_b32_e32 v5, s7
+; VI-NEXT: s_waitcnt vmcnt(1)
+; VI-NEXT: v_add_u32_e32 v0, vcc, s2, v6
+; VI-NEXT: s_waitcnt vmcnt(0)
+; VI-NEXT: v_addc_u32_e32 v1, vcc, v1, v7, vcc
+; VI-NEXT: v_add_u32_e32 v2, vcc, s4, v6
+; VI-NEXT: v_addc_u32_e32 v3, vcc, v3, v7, vcc
+; VI-NEXT: v_add_u32_e32 v4, vcc, s6, v6
+; VI-NEXT: v_addc_u32_e32 v5, vcc, v5, v7, vcc
+; VI-NEXT: flat_load_sbyte v0, v[0:1]
+; VI-NEXT: flat_load_sbyte v1, v[2:3]
+; VI-NEXT: flat_load_sbyte v2, v[4:5]
+; VI-NEXT: v_mov_b32_e32 v3, s1
+; VI-NEXT: s_waitcnt vmcnt(0)
+; VI-NEXT: v_mad_u16 v0, v1, v0, v2
+; VI-NEXT: v_max_i16_e32 v0, 0xff80, v0
+; VI-NEXT: v_min_i16_e32 v2, 0x7f, v0
+; VI-NEXT: v_add_u32_e32 v0, vcc, s0, v6
+; VI-NEXT: v_addc_u32_e32 v1, vcc, v3, v7, vcc
+; VI-NEXT: flat_store_byte v[0:1], v2
+; VI-NEXT: s_endpgm
entry:
%retval.0.i = load i64, ptr addrspace(5) %idx
%arrayidx = getelementptr inbounds i8, ptr addrspace(1) %in0, i64 %retval.0.i
@@ -187,16 +912,201 @@ entry:
ret void
}
-; FUNC-LABEL: {{^}}i8_mad_32:
-; EG: MULLO_INT {{[* ]*}}T{{[0-9]}}.[[MAD_CHAN:[XYZW]]]
-; EG: ADD_INT {{[* ]*}}T{{[0-9]}}.[[MAD_CHAN:[XYZW]]]
-; The result must be sign-extended
-; EG: BFE_INT {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[MAD_CHAN]], 0.0, literal.x
-; EG: 8
-; SI: v_mad_u32_u24 [[MAD:v[0-9]]], {{[sv][0-9], [sv][0-9]}}
-; VI: v_mad_u16 [[MAD:v[0-9]]], {{[sv][0-9], [sv][0-9]}}
-; GCN: v_bfe_i32 [[EXT:v[0-9]]], [[MAD]], 0, 16
define amdgpu_kernel void @i8_mad_32(ptr addrspace(1) %out, ptr addrspace(1) %a, ptr addrspace(1) %b, ptr addrspace(1) %c, ptr addrspace(5) %idx) {
+; EG-LABEL: i8_mad_32:
+; EG: ; %bb.0: ; %entry
+; EG-NEXT: ALU 4, @14, KC0[CB0:0-32], KC1[]
+; EG-NEXT: TEX 0 @8
+; EG-NEXT: ALU 1, @19, KC0[CB0:0-32], KC1[]
+; EG-NEXT: TEX 1 @10
+; EG-NEXT: ALU 9, @21, KC0[CB0:0-32], KC1[]
+; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.X, T1.X, 1
+; EG-NEXT: CF_END
+; EG-NEXT: PAD
+; EG-NEXT: Fetch clause starting at 8:
+; EG-NEXT: VTX_READ_8 T1.X, T1.X, 0, #1
+; EG-NEXT: Fetch clause starting at 10:
+; EG-NEXT: VTX_READ_8 T0.X, T0.X, 0, #1
+; EG-NEXT: VTX_READ_8 T2.X, T2.X, 0, #1
+; EG-NEXT: ALU clause starting at 14:
+; EG-NEXT: LSHR * T0.W, KC0[3].Y, literal.x,
+; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
+; EG-NEXT: MOVA_INT * AR.x (MASKED), PV.W,
+; EG-NEXT: MOV * T0.X, T(0 + AR.x).X+,
+; EG-NEXT: ADD_INT * T1.X, KC0[2].W, PV.X,
+; EG-NEXT: ALU clause starting at 19:
+; EG-NEXT: ADD_INT T2.X, KC0[2].Z, T0.X,
+; EG-NEXT: ADD_INT * T0.X, KC0[3].X, T0.X,
+; EG-NEXT: ALU clause starting at 21:
+; EG-NEXT: BFE_INT T0.Z, T1.X, 0.0, literal.x,
+; EG-NEXT: BFE_INT * T0.W, T2.X, 0.0, literal.x, BS:VEC_120/SCL_212
+; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; EG-NEXT: BFE_INT T1.W, T0.X, 0.0, literal.x,
+; EG-NEXT: MULLO_INT * T0.X, PV.W, PV.Z,
+; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; EG-NEXT: ADD_INT * T0.W, PS, PV.W,
+; EG-NEXT: BFE_INT T0.X, PV.W, 0.0, literal.x,
+; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.y,
+; EG-NEXT: 16(2.242078e-44), 2(2.802597e-45)
+;
+; CM-LABEL: i8_mad_32:
+; CM: ; %bb.0: ; %entry
+; CM-NEXT: ALU 4, @14, KC0[CB0:0-32], KC1[]
+; CM-NEXT: TEX 0 @8
+; CM-NEXT: ALU 1, @19, KC0[CB0:0-32], KC1[]
+; CM-NEXT: TEX 1 @10
+; CM-NEXT: ALU 12, @21, KC0[CB0:0-32], KC1[]
+; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T0.X, T1.X
+; CM-NEXT: CF_END
+; CM-NEXT: PAD
+; CM-NEXT: Fetch clause starting at 8:
+; CM-NEXT: VTX_READ_8 T1.X, T1.X, 0, #1
+; CM-NEXT: Fetch clause starting at 10:
+; CM-NEXT: VTX_READ_8 T0.X, T0.X, 0, #1
+; CM-NEXT: VTX_READ_8 T2.X, T2.X, 0, #1
+; CM-NEXT: ALU clause starting at 14:
+; CM-NEXT: LSHR * T0.W, KC0[3].Y, literal.x,
+; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00)
+; CM-NEXT: MOVA_INT * AR.x (MASKED), PV.W,
+; CM-NEXT: MOV * T0.X, T(0 + AR.x).X+,
+; CM-NEXT: ADD_INT * T1.X, KC0[3].X, PV.X,
+; CM-NEXT: ALU clause starting at 19:
+; CM-NEXT: ADD_INT * T2.X, KC0[2].W, T0.X,
+; CM-NEXT: ADD_INT * T0.X, KC0[2].Z, T0.X,
+; CM-NEXT: ALU clause starting at 21:
+; CM-NEXT: BFE_INT T0.Y, T1.X, 0.0, literal.x,
+; CM-NEXT: BFE_INT T0.Z, T2.X, 0.0, literal.x, BS:VEC_120/SCL_212
+; CM-NEXT: BFE_INT * T0.W, T0.X, 0.0, literal.x, BS:VEC_201
+; CM-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; CM-NEXT: MULLO_INT T0.X, T0.W, T0.Z,
+; CM-NEXT: MULLO_INT T0.Y (MASKED), T0.W, T0.Z,
+; CM-NEXT: MULLO_INT T0.Z (MASKED), T0.W, T0.Z,
+; CM-NEXT: MULLO_INT * T0.W (MASKED), T0.W, T0.Z,
+; CM-NEXT: ADD_INT * T0.W, PV.X, T0.Y,
+; CM-NEXT: BFE_INT * T0.X, PV.W, 0.0, literal.x,
+; CM-NEXT: 16(2.242078e-44), 0(0.000000e+00)
+; CM-NEXT: LSHR * T1.X, KC0[2].Y, literal.x,
+; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00)
+;
+; GCN-LABEL: i8_mad_32:
+; GCN: ; %bb.0: ; %entry
+; GCN-NEXT: s_mov_b32 s24, SCRATCH_RSRC_DWORD0
+; GCN-NEXT: s_mov_b32 s25, SCRATCH_RSRC_DWORD1
+; GCN-NEXT: s_mov_b32 s26, -1
+; GCN-NEXT: s_mov_b32 s27, 0xe8f000
+; GCN-NEXT: s_add_u32 s24, s24, s11
+; GCN-NEXT: s_addc_u32 s25, s25, 0
+; GCN-NEXT: s_load_dword s8, s[4:5], 0x11
+; GCN-NEXT: s_waitcnt lgkmcnt(0)
+; GCN-NEXT: s_add_i32 s9, s8, 4
+; GCN-NEXT: s_load_dwordx8 s[0:7], s[4:5], 0x9
+; GCN-NEXT: v_mov_b32_e32 v0, s8
+; GCN-NEXT: v_mov_b32_e32 v1, s9
+; GCN-NEXT: buffer_load_dword v1, v1, s[24:27], 0 offen
+; GCN-NEXT: buffer_load_dword v0, v0, s[24:27], 0 offen
+; GCN-NEXT: s_mov_b32 s11, 0xf000
+; GCN-NEXT: s_mov_b32 s14, 0
+; GCN-NEXT: s_mov_b32 s15, s11
+; GCN-NEXT: s_mov_b64 s[18:19], s[14:15]
+; GCN-NEXT: s_mov_b64 s[22:23], s[14:15]
+; GCN-NEXT: s_waitcnt lgkmcnt(0)
+; GCN-NEXT: s_mov_b64 s[12:13], s[2:3]
+; GCN-NEXT: s_mov_b64 s[16:17], s[4:5]
+; GCN-NEXT: s_mov_b64 s[20:21], s[6:7]
+; GCN-NEXT: s_waitcnt vmcnt(0)
+; GCN-NEXT: buffer_load_sbyte v2, v[0:1], s[12:15], 0 addr64
+; GCN-NEXT: buffer_load_sbyte v3, v[0:1], s[16:19], 0 addr64
+; GCN-NEXT: buffer_load_sbyte v0, v[0:1], s[20:23], 0 addr64
+; GCN-NEXT: s_mov_b32 s10, -1
+; GCN-NEXT: s_mov_b32 s8, s0
+; GCN-NEXT: s_mov_b32 s9, s1
+; GCN-NEXT: s_waitcnt vmcnt(2)
+; GCN-NEXT: v_and_b32_e32 v1, 0xffff, v2
+; GCN-NEXT: s_waitcnt vmcnt(1)
+; GCN-NEXT: v_and_b32_e32 v2, 0xffff, v3
+; GCN-NEXT: s_waitcnt vmcnt(0)
+; GCN-NEXT: v_mad_u32_u24 v0, v1, v2, v0
+; GCN-NEXT: v_bfe_i32 v0, v0, 0, 16
+; GCN-NEXT: buffer_store_dword v0, off, s[8:11], 0
+; GCN-NEXT: s_endpgm
+;
+; SI-LABEL: i8_mad_32:
+; SI: ; %bb.0: ; %entry
+; SI-NEXT: s_mov_b32 s88, SCRATCH_RSRC_DWORD0
+; SI-NEXT: s_load_dword s0, s[4:5], 0x44
+; SI-NEXT: s_mov_b32 s89, SCRATCH_RSRC_DWORD1
+; SI-NEXT: s_mov_b32 s90, -1
+; SI-NEXT: s_mov_b32 s91, 0xe80000
+; SI-NEXT: s_add_u32 s88, s88, s11
+; SI-NEXT: s_addc_u32 s89, s89, 0
+; SI-NEXT: s_waitcnt lgkmcnt(0)
+; SI-NEXT: s_add_i32 s1, s0, 4
+; SI-NEXT: v_mov_b32_e32 v0, s0
+; SI-NEXT: buffer_load_dword v4, v0, s[88:91], 0 offen
+; SI-NEXT: v_mov_b32_e32 v0, s1
+; SI-NEXT: buffer_load_dword v5, v0, s[88:91], 0 offen
+; SI-NEXT: s_load_dwordx8 s[0:7], s[4:5], 0x24
+; SI-NEXT: s_waitcnt lgkmcnt(0)
+; SI-NEXT: v_mov_b32_e32 v1, s3
+; SI-NEXT: v_mov_b32_e32 v3, s5
+; SI-NEXT: v_mov_b32_e32 v6, s7
+; SI-NEXT: s_mov_b32 s3, 0xf000
+; SI-NEXT: s_waitcnt vmcnt(1)
+; SI-NEXT: v_add_u32_e32 v0, vcc, s2, v4
+; SI-NEXT: s_waitcnt vmcnt(0)
+; SI-NEXT: v_addc_u32_e32 v1, vcc, v1, v5, vcc
+; SI-NEXT: v_add_u32_e32 v2, vcc, s4, v4
+; SI-NEXT: v_addc_u32_e32 v3, vcc, v3, v5, vcc
+; SI-NEXT: v_add_u32_e32 v4, vcc, s6, v4
+; SI-NEXT: v_addc_u32_e32 v5, vcc, v6, v5, vcc
+; SI-NEXT: flat_load_sbyte v0, v[0:1]
+; SI-NEXT: flat_load_sbyte v1, v[2:3]
+; SI-NEXT: flat_load_sbyte v2, v[4:5]
+; SI-NEXT: s_mov_b32 s2, -1
+; SI-NEXT: s_waitcnt vmcnt(0)
+; SI-NEXT: v_mad_u16 v0, v0, v1, v2
+; SI-NEXT: v_bfe_i32 v0, v0, 0, 16
+; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0
+; SI-NEXT: s_endpgm
+;
+; VI-LABEL: i8_mad_32:
+; VI: ; %bb.0: ; %entry
+; VI-NEXT: s_mov_b32 s12, SCRATCH_RSRC_DWORD0
+; VI-NEXT: s_load_dword s0, s[4:5], 0x44
+; VI-NEXT: s_mov_b32 s13, SCRATCH_RSRC_DWORD1
+; VI-NEXT: s_mov_b32 s14, -1
+; VI-NEXT: s_mov_b32 s15, 0xe80000
+; VI-NEXT: s_add_u32 s12, s12, s11
+; VI-NEXT: s_addc_u32 s13, s13, 0
+; VI-NEXT: s_waitcnt lgkmcnt(0)
+; VI-NEXT: s_add_i32 s1, s0, 4
+; VI-NEXT: v_mov_b32_e32 v0, s0
+; VI-NEXT: buffer_load_dword v4, v0, s[12:15], 0 offen
+; VI-NEXT: v_mov_b32_e32 v0, s1
+; VI-NEXT: buffer_load_dword v5, v0, s[12:15], 0 offen
+; VI-NEXT: s_load_dwordx8 s[0:7], s[4:5], 0x24
+; VI-NEXT: s_waitcnt lgkmcnt(0)
+; VI-NEXT: v_mov_b32_e32 v1, s3
+; VI-NEXT: v_mov_b32_e32 v3, s5
+; VI-NEXT: v_mov_b32_e32 v6, s7
+; VI-NEXT: s_mov_b32 s3, 0xf000
+; VI-NEXT: s_waitcnt vmcnt(1)
+; VI-NEXT: v_add_u32_e32 v0, vcc, s2, v4
+; VI-NEXT: s_waitcnt vmcnt(0)
+; VI-NEXT: v_addc_u32_e32 v1, vcc, v1, v5, vcc
+; VI-NEXT: v_add_u32_e32 v2, vcc, s4, v4
+; VI-NEXT: v_addc_u32_e32 v3, vcc, v3, v5, vcc
+; VI-NEXT: v_add_u32_e32 v4, vcc, s6, v4
+; VI-NEXT: v_addc_u32_e32 v5, vcc, v6, v5, vcc
+; VI-NEXT: flat_load_sbyte v0, v[0:1]
+; VI-NEXT: flat_load_sbyte v1, v[2:3]
+; VI-NEXT: flat_load_sbyte v2, v[4:5]
+; VI-NEXT: s_mov_b32 s2, -1
+; VI-NEXT: s_waitcnt vmcnt(0)
+; VI-NEXT: v_mad_u16 v0, v0, v1, v2
+; VI-NEXT: v_bfe_i32 v0, v0, 0, 16
+; VI-NEXT: buffer_store_dword v0, off, s[0:3], 0
+; VI-NEXT: s_endpgm
entry:
%retval.0.i = load i64, ptr addrspace(5) %idx
%arrayidx = getelementptr inbounds i8, ptr addrspace(1) %a, i64 %retval.0.i
@@ -215,16 +1125,207 @@ entry:
ret void
}
-; FUNC-LABEL: {{^}}i8_mad_64:
-; EG: MULLO_INT {{[* ]*}}T{{[0-9]}}.[[MAD_CHAN:[XYZW]]]
-; EG: ADD_INT {{[* ]*}}T{{[0-9]}}.[[MAD_CHAN:[XYZW]]]
-; The result must be sign-extended
-; EG: BFE_INT {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[MAD_CHAN]], 0.0, literal.x
-; EG: 8
-; SI: v_mad_u32_u24 [[MAD:v[0-9]]], {{[sv][0-9], [sv][0-9]}}
-; VI: v_mad_u16 [[MAD:v[0-9]]], {{[sv][0-9], [sv][0-9]}}
-; GCN: v_bfe_i32 [[EXT:v[0-9]]], [[MAD]], 0, 16
define amdgpu_kernel void @i8_mad_64(ptr addrspace(1) %out, ptr addrspace(1) %a, ptr addrspace(1) %b, ptr addrspace(1) %c, ptr addrspace(5) %idx) {
+; EG-LABEL: i8_mad_64:
+; EG: ; %bb.0: ; %entry
+; EG-NEXT: ALU 4, @14, KC0[CB0:0-32], KC1[]
+; EG-NEXT: TEX 0 @8
+; EG-NEXT: ALU 1, @19, KC0[CB0:0-32], KC1[]
+; EG-NEXT: TEX 1 @10
+; EG-NEXT: ALU 11, @21, KC0[CB0:0-32], KC1[]
+; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XY, T1.X, 1
+; EG-NEXT: CF_END
+; EG-NEXT: PAD
+; EG-NEXT: Fetch clause starting at 8:
+; EG-NEXT: VTX_READ_8 T1.X, T1.X, 0, #1
+; EG-NEXT: Fetch clause starting at 10:
+; EG-NEXT: VTX_READ_8 T0.X, T0.X, 0, #1
+; EG-NEXT: VTX_READ_8 T2.X, T2.X, 0, #1
+; EG-NEXT: ALU clause starting at 14:
+; EG-NEXT: LSHR * T0.W, KC0[3].Y, literal.x,
+; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
+; EG-NEXT: MOVA_INT * AR.x (MASKED), PV.W,
+; EG-NEXT: MOV * T0.X, T(0 + AR.x).X+,
+; EG-NEXT: ADD_INT * T1.X, KC0[2].W, PV.X,
+; EG-NEXT: ALU clause starting at 19:
+; EG-NEXT: ADD_INT T2.X, KC0[2].Z, T0.X,
+; EG-NEXT: ADD_INT * T0.X, KC0[3].X, T0.X,
+; EG-NEXT: ALU clause starting at 21:
+; EG-NEXT: BFE_INT T0.Z, T1.X, 0.0, literal.x,
+; EG-NEXT: BFE_INT * T0.W, T2.X, 0.0, literal.x, BS:VEC_120/SCL_212
+; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; EG-NEXT: BFE_INT T1.W, T0.X, 0.0, literal.x,
+; EG-NEXT: MULLO_INT * T0.X, PV.W, PV.Z,
+; EG-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; EG-NEXT: ADD_INT * T0.W, PS, PV.W,
+; EG-NEXT: BFE_INT T0.X, PV.W, 0.0, literal.x,
+; EG-NEXT: LSHR * T1.X, KC0[2].Y, literal.y,
+; EG-NEXT: 16(2.242078e-44), 2(2.802597e-45)
+; EG-NEXT: ASHR * T0.Y, PV.X, literal.x,
+; EG-NEXT: 31(4.344025e-44), 0(0.000000e+00)
+;
+; CM-LABEL: i8_mad_64:
+; CM: ; %bb.0: ; %entry
+; CM-NEXT: ALU 4, @14, KC0[CB0:0-32], KC1[]
+; CM-NEXT: TEX 0 @8
+; CM-NEXT: ALU 1, @19, KC0[CB0:0-32], KC1[]
+; CM-NEXT: TEX 1 @10
+; CM-NEXT: ALU 13, @21, KC0[CB0:0-32], KC1[]
+; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T0, T1.X
+; CM-NEXT: CF_END
+; CM-NEXT: PAD
+; CM-NEXT: Fetch clause starting at 8:
+; CM-NEXT: VTX_READ_8 T1.X, T1.X, 0, #1
+; CM-NEXT: Fetch clause starting at 10:
+; CM-NEXT: VTX_READ_8 T0.X, T0.X, 0, #1
+; CM-NEXT: VTX_READ_8 T2.X, T2.X, 0, #1
+; CM-NEXT: ALU clause starting at 14:
+; CM-NEXT: LSHR * T0.W, KC0[3].Y, literal.x,
+; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00)
+; CM-NEXT: MOVA_INT * AR.x (MASKED), PV.W,
+; CM-NEXT: MOV * T0.X, T(0 + AR.x).X+,
+; CM-NEXT: ADD_INT * T1.X, KC0[3].X, PV.X,
+; CM-NEXT: ALU clause starting at 19:
+; CM-NEXT: ADD_INT * T2.X, KC0[2].W, T0.X,
+; CM-NEXT: ADD_INT * T0.X, KC0[2].Z, T0.X,
+; CM-NEXT: ALU clause starting at 21:
+; CM-NEXT: BFE_INT T0.Y, T1.X, 0.0, literal.x,
+; CM-NEXT: BFE_INT T0.Z, T2.X, 0.0, literal.x, BS:VEC_120/SCL_212
+; CM-NEXT: BFE_INT * T0.W, T0.X, 0.0, literal.x, BS:VEC_201
+; CM-NEXT: 8(1.121039e-44), 0(0.000000e+00)
+; CM-NEXT: MULLO_INT T0.X, T0.W, T0.Z,
+; CM-NEXT: MULLO_INT T0.Y (MASKED), T0.W, T0.Z,
+; CM-NEXT: MULLO_INT T0.Z (MASKED), T0.W, T0.Z,
+; CM-NEXT: MULLO_INT * T0.W (MASKED), T0.W, T0.Z,
+; CM-NEXT: ADD_INT * T0.W, PV.X, T0.Y,
+; CM-NEXT: BFE_INT * T0.X, PV.W, 0.0, literal.x,
+; CM-NEXT: 16(2.242078e-44), 0(0.000000e+00)
+; CM-NEXT: LSHR T1.X, KC0[2].Y, literal.x,
+; CM-NEXT: ASHR * T0.Y, PV.X, literal.y,
+; CM-NEXT: 2(2.802597e-45), 31(4.344025e-44)
+;
+; GCN-LABEL: i8_mad_64:
+; GCN: ; %bb.0: ; %entry
+; GCN-NEXT: s_mov_b32 s24, SCRATCH_RSRC_DWORD0
+; GCN-NEXT: s_mov_b32 s25, SCRATCH_RSRC_DWORD1
+; GCN-NEXT: s_mov_b32 s26, -1
+; GCN-NEXT: s_mov_b32 s27, 0xe8f000
+; GCN-NEXT: s_add_u32 s24, s24, s11
+; GCN-NEXT: s_addc_u32 s25, s25, 0
+; GCN-NEXT: s_load_dword s8, s[4:5], 0x11
+; GCN-NEXT: s_waitcnt lgkmcnt(0)
+; GCN-NEXT: s_add_i32 s9, s8, 4
+; GCN-NEXT: s_load_dwordx8 s[0:7], s[4:5], 0x9
+; GCN-NEXT: v_mov_b32_e32 v0, s8
+; GCN-NEXT: v_mov_b32_e32 v1, s9
+; GCN-NEXT: buffer_load_dword v1, v1, s[24:27], 0 offen
+; GCN-NEXT: buffer_load_dword v0, v0, s[24:27], 0 offen
+; GCN-NEXT: s_mov_b32 s11, 0xf000
+; GCN-NEXT: s_mov_b32 s14, 0
+; GCN-NEXT: s_mov_b32 s15, s11
+; GCN-NEXT: s_mov_b64 s[18:19], s[14:15]
+; GCN-NEXT: s_mov_b64 s[22:23], s[14:15]
+; GCN-NEXT: s_waitcnt lgkmcnt(0)
+; GCN-NEXT: s_mov_b64 s[12:13], s[2:3]
+; GCN-NEXT: s_mov_b64 s[16:17], s[4:5]
+; GCN-NEXT: s_mov_b64 s[20:21], s[6:7]
+; GCN-NEXT: s_waitcnt vmcnt(0)
+; GCN-NEXT: buffer_load_sbyte v2, v[0:1], s[12:15], 0 addr64
+; GCN-NEXT: buffer_load_sbyte v3, v[0:1], s[16:19], 0 addr64
+; GCN-NEXT: buffer_load_sbyte v0, v[0:1], s[20:23], 0 addr64
+; GCN-NEXT: s_mov_b32 s10, -1
+; GCN-NEXT: s_mov_b32 s8, s0
+; GCN-NEXT: s_mov_b32 s9, s1
+; GCN-NEXT: s_waitcnt vmcnt(2)
+; GCN-NEXT: v_and_b32_e32 v1, 0xffff, v2
+; GCN-NEXT: s_waitcnt vmcnt(1)
+; GCN-NEXT: v_and_b32_e32 v2, 0xffff, v3
+; GCN-NEXT: s_waitcnt vmcnt(0)
+; GCN-NEXT: v_mad_u32_u24 v0, v1, v2, v0
+; GCN-NEXT: v_bfe_i32 v0, v0, 0, 16
+; GCN-NEXT: v_ashrrev_i32_e32 v1, 31, v0
+; GCN-NEXT: buffer_store_dwordx2 v[0:1], off, s[8:11], 0
+; GCN-NEXT: s_endpgm
+;
+; SI-LABEL: i8_mad_64:
+; SI: ; %bb.0: ; %entry
+; SI-NEXT: s_mov_b32 s88, SCRATCH_RSRC_DWORD0
+; SI-NEXT: s_load_dword s0, s[4:5], 0x44
+; SI-NEXT: s_mov_b32 s89, SCRATCH_RSRC_DWORD1
+; SI-NEXT: s_mov_b32 s90, -1
+; SI-NEXT: s_mov_b32 s91, 0xe80000
+; SI-NEXT: s_add_u32 s88, s88, s11
+; SI-NEXT: s_addc_u32 s89, s89, 0
+; SI-NEXT: s_waitcnt lgkmcnt(0)
+; SI-NEXT: s_add_i32 s1, s0, 4
+; SI-NEXT: v_mov_b32_e32 v0, s0
+; SI-NEXT: buffer_load_dword v4, v0, s[88:91], 0 offen
+; SI-NEXT: v_mov_b32_e32 v0, s1
+; SI-NEXT: buffer_load_dword v5, v0, s[88:91], 0 offen
+; SI-NEXT: s_load_dwordx8 s[0:7], s[4:5], 0x24
+; SI-NEXT: s_waitcnt lgkmcnt(0)
+; SI-NEXT: v_mov_b32_e32 v1, s3
+; SI-NEXT: v_mov_b32_e32 v3, s5
+; SI-NEXT: v_mov_b32_e32 v6, s7
+; SI-NEXT: s_mov_b32 s3, 0xf000
+; SI-NEXT: s_waitcnt vmcnt(1)
+; SI-NEXT: v_add_u32_e32 v0, vcc, s2, v4
+; SI-NEXT: s_waitcnt vmcnt(0)
+; SI-NEXT: v_addc_u32_e32 v1, vcc, v1, v5, vcc
+; SI-NEXT: v_add_u32_e32 v2, vcc, s4, v4
+; SI-NEXT: v_addc_u32_e32 v3, vcc, v3, v5, vcc
+; SI-NEXT: v_add_u32_e32 v4, vcc, s6, v4
+; SI-NEXT: v_addc_u32_e32 v5, vcc, v6, v5, vcc
+; SI-NEXT: flat_load_sbyte v0, v[0:1]
+; SI-NEXT: flat_load_sbyte v1, v[2:3]
+; SI-NEXT: flat_load_sbyte v2, v[4:5]
+; SI-NEXT: s_mov_b32 s2, -1
+; SI-NEXT: s_waitcnt vmcnt(0)
+; SI-NEXT: v_mad_u16 v0, v0, v1, v2
+; SI-NEXT: v_bfe_i32 v0, v0, 0, 16
+; SI-NEXT: v_ashrrev_i32_e32 v1, 31, v0
+; SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
+; SI-NEXT: s_endpgm
+;
+; VI-LABEL: i8_mad_64:
+; VI: ; %bb.0: ; %entry
+; VI-NEXT: s_mov_b32 s12, SCRATCH_RSRC_DWORD0
+; VI-NEXT: s_load_dword s0, s[4:5], 0x44
+; VI-NEXT: s_mov_b32 s13, SCRATCH_RSRC_DWORD1
+; VI-NEXT: s_mov_b32 s14, -1
+; VI-NEXT: s_mov_b32 s15, 0xe80000
+; VI-NEXT: s_add_u32 s12, s12, s11
+; VI-NEXT: s_addc_u32 s13, s13, 0
+; VI-NEXT: s_waitcnt lgkmcnt(0)
+; VI-NEXT: s_add_i32 s1, s0, 4
+; VI-NEXT: v_mov_b32_e32 v0, s0
+; VI-NEXT: buffer_load_dword v4, v0, s[12:15], 0 offen
+; VI-NEXT: v_mov_b32_e32 v0, s1
+; VI-NEXT: buffer_load_dword v5, v0, s[12:15], 0 offen
+; VI-NEXT: s_load_dwordx8 s[0:7], s[4:5], 0x24
+; VI-NEXT: s_waitcnt lgkmcnt(0)
+; VI-NEXT: v_mov_b32_e32 v1, s3
+; VI-NEXT: v_mov_b32_e32 v3, s5
+; VI-NEXT: v_mov_b32_e32 v6, s7
+; VI-NEXT: s_mov_b32 s3, 0xf000
+; VI-NEXT: s_waitcnt vmcnt(1)
+; VI-NEXT: v_add_u32_e32 v0, vcc, s2, v4
+; VI-NEXT: s_waitcnt vmcnt(0)
+; VI-NEXT: v_addc_u32_e32 v1, vcc, v1, v5, vcc
+; VI-NEXT: v_add_u32_e32 v2, vcc, s4, v4
+; VI-NEXT: v_addc_u32_e32 v3, vcc, v3, v5, vcc
+; VI-NEXT: v_add_u32_e32 v4, vcc, s6, v4
+; VI-NEXT: v_addc_u32_e32 v5, vcc, v6, v5, vcc
+; VI-NEXT: flat_load_sbyte v0, v[0:1]
+; VI-NEXT: flat_load_sbyte v1, v[2:3]
+; VI-NEXT: flat_load_sbyte v2, v[4:5]
+; VI-NEXT: s_mov_b32 s2, -1
+; VI-NEXT: s_waitcnt vmcnt(0)
+; VI-NEXT: v_mad_u16 v0, v0, v1, v2
+; VI-NEXT: v_bfe_i32 v0, v0, 0, 16
+; VI-NEXT: v_ashrrev_i32_e32 v1, 31, v0
+; VI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
+; VI-NEXT: s_endpgm
entry:
%retval.0.i = load i64, ptr addrspace(5) %idx
%arrayidx = getelementptr inbounds i8, ptr addrspace(1) %a, i64 %retval.0.i
@@ -248,17 +1349,236 @@ entry:
; had a chance to form mul24. The mul combine would then see
; extractelement with no known bits and fail. All of the mul/add
; combos in this loop should form v_mad_u32_u24.
-
-; FUNC-LABEL: {{^}}mad24_known_bits_destroyed:
-; GCN: v_mad_u32_u24
-; GCN: v_mad_u32_u24
-; GCN: v_mad_u32_u24
-; GCN: v_mad_u32_u24
-; GCN: v_mad_u32_u24
-; GCN: v_mad_u32_u24
-; GCN: v_mad_u32_u24
-; GCN: v_mad_u32_u24
define void @mad24_known_bits_destroyed(i32 %arg, <4 x i32> %arg1, <4 x i32> %arg2, <4 x i32> %arg3, i32 %arg4, i32 %arg5, i32 %arg6, ptr addrspace(1) %arg7, ptr addrspace(1) %arg8) #0 {
+; EG-LABEL: mad24_known_bits_destroyed:
+; EG: ; %bb.0: ; %bb
+; EG-NEXT: ALU 21, @12, KC0[CB0:0-32], KC1[]
+; EG-NEXT: LOOP_START_DX10 @11
+; EG-NEXT: ALU 8, @34, KC0[], KC1[]
+; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.X, T2.X, 0
+; EG-NEXT: ALU 14, @43, KC0[], KC1[]
+; EG-NEXT: MEM_RAT_CACHELESS STORE_RAW T0.XYZW, T1.X, 0
+; EG-NEXT: ALU_PUSH_BEFORE 3, @58, KC0[], KC1[]
+; EG-NEXT: JUMP @10 POP:1
+; EG-NEXT: LOOP_BREAK @10
+; EG-NEXT: POP @10 POP:1
+; EG-NEXT: END_LOOP @2
+; EG-NEXT: CF_END
+; EG-NEXT: ALU clause starting at 12:
+; EG-NEXT: MOV * T0.W, KC0[5].X,
+; EG-NEXT: MOV * T0.Z, KC0[4].W,
+; EG-NEXT: MOV * T0.Y, KC0[4].Z,
+; EG-NEXT: MOV T0.X, KC0[2].Y,
+; EG-NEXT: AND_INT * T1.Y, KC0[4].X, literal.x,
+; EG-NEXT: 16777215(2.350989e-38), 0(0.000000e+00)
+; EG-NEXT: AND_INT T1.Z, KC0[3].W, literal.x,
+; EG-NEXT: AND_INT T1.W, KC0[3].Z, literal.x,
+; EG-NEXT: MOV * T2.W, KC0[7].Y,
+; EG-NEXT: 16777215(2.350989e-38), 0(0.000000e+00)
+; EG-NEXT: LSHR T1.X, PS, literal.x,
+; EG-NEXT: AND_INT T2.Y, KC0[6].Y, literal.y,
+; EG-NEXT: MOV T2.Z, KC0[6].X,
+; EG-NEXT: MOV * T2.W, KC0[5].W,
+; EG-NEXT: 2(2.802597e-45), 16777215(2.350989e-38)
+; EG-NEXT: MOV * T3.W, KC0[7].X,
+; EG-NEXT: LSHR T2.X, PV.W, literal.x,
+; EG-NEXT: MOV T3.Y, KC0[5].Z,
+; EG-NEXT: MOV T3.Z, KC0[6].Z,
+; EG-NEXT: MOV * T3.W, KC0[6].W,
+; EG-NEXT: 2(2.802597e-45), 0(0.000000e+00)
+; EG-NEXT: MOV * T4.W, KC0[4].Y,
+; EG-NEXT: ALU clause starting at 34:
+; EG-NEXT: MULLO_INT * T0.X, T0.X, T2.Y,
+; EG-NEXT: ADD_INT * T4.W, PS, T3.Z,
+; EG-NEXT: AND_INT * T4.W, PV.W, literal.x,
+; EG-NEXT: 16777215(2.350989e-38), 0(0.000000e+00)
+; EG-NEXT: MULLO_INT * T0.X, PV.W, T2.Y,
+; EG-NEXT: MULLO_INT * T0.W, T0.W, T1.Y,
+; EG-NEXT: MULLO_INT * T0.Z, T0.Z, T1.Z,
+; EG-NEXT: MULLO_INT * T0.Y, T0.Y, T1.W,
+; EG-NEXT: ADD_INT * T0.X, T0.X, T3.Z,
+; EG-NEXT: ALU clause starting at 43:
+; EG-NEXT: ADD_INT * T4.W, T0.Y, T3.Y,
+; EG-NEXT: AND_INT T4.W, PV.W, literal.x,
+; EG-NEXT: ADD_INT * T5.W, T0.Z, T2.W,
+; EG-NEXT: 16777215(2.350989e-38), 0(0.000000e+00)
+; EG-NEXT: AND_INT T0.Z, PS, literal.x,
+; EG-NEXT: ADD_INT T0.W, T0.W, T2.Z,
+; EG-NEXT: MULLO_INT * T0.Y, PV.W, T1.W,
+; EG-NEXT: 16777215(2.350989e-38), 0(0.000000e+00)
+; EG-NEXT: ADD_INT T0.Y, PS, T3.Y,
+; EG-NEXT: AND_INT T0.W, PV.W, literal.x,
+; EG-NEXT: MULLO_INT * T0.Z, PV.Z, T1.Z,
+; EG-NEXT: 16777215(2.350989e-38), 0(0.000000e+00)
+; EG-NEXT: ADD_INT T0.Z, PS, T2.W,
+; EG-NEXT: MULLO_INT * T0.W, PV.W, T1.Y,
+; EG-NEXT: ADD_INT * T0.W, PS, T2.Z,
+; EG-NEXT: ALU clause starting at 58:
+; EG-NEXT: ADD_INT * T3.W, T3.W, literal.x,
+; EG-NEXT: -1(nan), 0(0.000000e+00)
+; EG-NEXT: SETE_INT * T4.W, PV.W, 0.0,
+; EG-NEXT: PRED_SETNE_INT * ExecMask,PredicateBit (MASKED), PV.W, 0.0,
+;
+; CM-LABEL: mad24_known_bits_destroyed:
+; CM: ; %bb.0: ; %bb
+; CM-NEXT: ALU 22, @12, KC0[CB0:0-32], KC1[]
+; CM-NEXT: LOOP_START_DX10 @11
+; CM-NEXT: ALU 23, @35, KC0[], KC1[]
+; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T0.X, T2.X
+; CM-NEXT: ALU 23, @59, KC0[], KC1[]
+; CM-NEXT: MEM_RAT_CACHELESS STORE_DWORD T0, T1.X
+; CM-NEXT: ALU_PUSH_BEFORE 3, @83, KC0[], KC1[]
+; CM-NEXT: JUMP @10 POP:1
+; CM-NEXT: LOOP_BREAK @10
+; CM-NEXT: POP @10 POP:1
+; CM-NEXT: END_LOOP @2
+; CM-NEXT: CF_END
+; CM-NEXT: ALU clause starting at 12:
+; CM-NEXT: MOV * T0.W, KC0[5].X,
+; CM-NEXT: MOV * T0.Z, KC0[4].W,
+; CM-NEXT: MOV * T0.Y, KC0[4].Z,
+; CM-NEXT: MOV T0.X, KC0[2].Y,
+; CM-NEXT: AND_INT * T1.Y, KC0[4].X, literal.x,
+; CM-NEXT: 16777215(2.350989e-38), 0(0.000000e+00)
+; CM-NEXT: AND_INT T1.Z, KC0[3].W, literal.x,
+; CM-NEXT: AND_INT * T1.W, KC0[3].Z, literal.x,
+; CM-NEXT: 16777215(2.350989e-38), 0(0.000000e+00)
+; CM-NEXT: AND_INT T2.Y, KC0[6].Y, literal.x,
+; CM-NEXT: MOV T2.Z, KC0[6].X,
+; CM-NEXT: MOV * T2.W, KC0[7].Y,
+; CM-NEXT: 16777215(2.350989e-38), 0(0.000000e+00)
+; CM-NEXT: LSHR T1.X, PV.W, literal.x,
+; CM-NEXT: MOV T3.Y, KC0[5].W,
+; CM-NEXT: MOV T3.Z, KC0[5].Z,
+; CM-NEXT: MOV * T2.W, KC0[7].X,
+; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00)
+; CM-NEXT: LSHR T2.X, PV.W, literal.x,
+; CM-NEXT: MOV T4.Y, KC0[6].Z,
+; CM-NEXT: MOV T4.Z, KC0[6].W,
+; CM-NEXT: MOV * T2.W, KC0[4].Y,
+; CM-NEXT: 2(2.802597e-45), 0(0.000000e+00)
+; CM-NEXT: ALU clause starting at 35:
+; CM-NEXT: MULLO_INT T0.X, T0.X, T2.Y,
+; CM-NEXT: MULLO_INT T0.Y (MASKED), T0.X, T2.Y,
+; CM-NEXT: MULLO_INT T0.Z (MASKED), T0.X, T2.Y,
+; CM-NEXT: MULLO_INT * T0.W (MASKED), T0.X, T2.Y,
+; CM-NEXT: ADD_INT * T2.W, PV.X, T4.Y,
+; CM-NEXT: AND_INT * T2.W, PV.W, literal.x,
+; CM-NEXT: 16777215(2.350989e-38), 0(0.000000e+00)
+; CM-NEXT: MULLO_INT T0.X, T2.W, T2.Y,
+; CM-NEXT: MULLO_INT T0.Y (MASKED), T2.W, T2.Y,
+; CM-NEXT: MULLO_INT T0.Z (MASKED), T2.W, T2.Y,
+; CM-NEXT: MULLO_INT * T0.W (MASKED), T2.W, T2.Y,
+; CM-NEXT: MULLO_INT T0.X (MASKED), T0.W, T1.Y,
+; CM-NEXT: MULLO_INT T0.Y (MASKED), T0.W, T1.Y,
+; CM-NEXT: MULLO_INT T0.Z (MASKED), T0.W, T1.Y,
+; CM-NEXT: MULLO_INT * T0.W, T0.W, T1.Y,
+; CM-NEXT: MULLO_INT T0.X (MASKED), T0.Z, T1.Z,
+; CM-NEXT: MULLO_INT T0.Y (MASKED), T0.Z, T1.Z,
+; CM-NEXT: MULLO_INT T0.Z, T0.Z, T1.Z,
+; CM-NEXT: MULLO_INT * T0.W (MASKED), T0.Z, T1.Z,
+; CM-NEXT: MULLO_INT T0.X (MASKED), T0.Y, T1.W,
+; CM-NEXT: MULLO_INT T0.Y, T0.Y, T1.W,
+; CM-NEXT: MULLO_INT T0.Z (MASKED), T0.Y, T1.W,
+; CM-NEXT: MULLO_INT * T0.W (MASKED), T0.Y, T1.W,
+; CM-NEXT: ADD_INT * T0.X, T0.X, T4.Y,
+; CM-NEXT: ALU clause starting at 59:
+; CM-NEXT: ADD_INT * T2.W, T0.Y, T3.Z,
+; CM-NEXT: ADD_INT T0.Z, T0.Z, T3.Y,
+; CM-NEXT: AND_INT * T2.W, PV.W, literal.x,
+; CM-NEXT: 16777215(2.350989e-38), 0(0.000000e+00)
+; CM-NEXT: MULLO_INT T0.X (MASKED), T2.W, T1.W,
+; CM-NEXT: MULLO_INT T0.Y, T2.W, T1.W,
+; CM-NEXT: MULLO_INT T0.Z (MASKED), T2.W, T1.W,
+; CM-NEXT: MULLO_INT * T0.W (MASKED), T2.W, T1.W,
+; CM-NEXT: ADD_INT T0.Y, PV.Y, T3.Z,
+; CM-NEXT: ADD_INT T5.Z, T0.W, T2.Z, BS:VEC_021/SCL_122
+; CM-NEXT: AND_INT * T0.W, T0.Z, literal.x,
+; CM-NEXT: 16777215(2.350989e-38), 0(0.000000e+00)
+; CM-NEXT: MULLO_INT T0.X (MASKED), T0.W, T1.Z,
+; CM-NEXT: MULLO_INT T0.Y (MASKED), T0.W, T1.Z,
+; CM-NEXT: MULLO_INT T0.Z, T0.W, T1.Z,
+; CM-NEXT: MULLO_INT * T0.W (MASKED), T0.W, T1.Z,
+; CM-NEXT: ADD_INT T0.Z, PV.Z, T3.Y,
+; CM-NEXT: AND_INT * T0.W, T5.Z, literal.x,
+; CM-NEXT: 16777215(2.350989e-38), 0(0.000000e+00)
+; CM-NEXT: MULLO_INT T0.X (MASKED), T0.W, T1.Y,
+; CM-NEXT: MULLO_INT T0.Y (MASKED), T0.W, T1.Y,
+; CM-NEXT: MULLO_INT T0.Z (MASKED), T0.W, T1.Y,
+; CM-NEXT: MULLO_INT * T0.W, T0.W, T1.Y,
+; CM-NEXT: ADD_INT * T0.W, PV.W, T2.Z,
+; CM-NEXT: ALU clause starting at 83:
+; CM-NEXT: ADD_INT * T4.Z, T4.Z, literal.x,
+; CM-NEXT: -1(nan), 0(0.000000e+00)
+; CM-NEXT: SETE_INT * T2.W, PV.Z, 0.0,
+; CM-NEXT: PRED_SETNE_INT * ExecMask,PredicateBit (MASKED), PV.W, 0.0,
+;
+; GCN-LABEL: mad24_known_bits_destroyed:
+; GCN: ; %bb.0: ; %bb
+; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT: v_mov_b32_e32 v5, v0
+; GCN-NEXT: v_and_b32_e32 v0, 0xffffff, v13
+; GCN-NEXT: v_and_b32_e32 v1, 0xffffff, v2
+; GCN-NEXT: v_and_b32_e32 v2, 0xffffff, v3
+; GCN-NEXT: v_and_b32_e32 v3, 0xffffff, v4
+; GCN-NEXT: s_mov_b64 s[8:9], 0
+; GCN-NEXT: s_mov_b32 s6, 0
+; GCN-NEXT: s_mov_b32 s7, 0xf000
+; GCN-NEXT: s_mov_b32 s4, s6
+; GCN-NEXT: s_mov_b32 s5, s6
+; GCN-NEXT: .LBB9_1: ; %bb19
+; GCN-NEXT: ; =>This Inner Loop Header: Depth=1
+; GCN-NEXT: v_mad_u32_u24 v4, v5, v0, v14
+; GCN-NEXT: s_waitcnt expcnt(0)
+; GCN-NEXT: v_mad_u32_u24 v6, v6, v1, v10
+; GCN-NEXT: v_mad_u32_u24 v7, v7, v2, v11
+; GCN-NEXT: v_mad_u32_u24 v8, v8, v3, v12
+; GCN-NEXT: v_add_i32_e32 v15, vcc, -1, v15
+; GCN-NEXT: v_mad_u32_u24 v5, v4, v0, v14
+; GCN-NEXT: v_mad_u32_u24 v6, v6, v1, v10
+; GCN-NEXT: v_mad_u32_u24 v7, v7, v2, v11
+; GCN-NEXT: v_mad_u32_u24 v8, v8, v3, v12
+; GCN-NEXT: v_cmp_eq_u32_e32 vcc, 0, v15
+; GCN-NEXT: buffer_store_dword v5, v[16:17], s[4:7], 0 addr64
+; GCN-NEXT: s_or_b64 s[8:9], vcc, s[8:9]
+; GCN-NEXT: buffer_store_dwordx4 v[5:8], v[18:19], s[4:7], 0 addr64
+; GCN-NEXT: s_andn2_b64 exec, exec, s[8:9]
+; GCN-NEXT: s_cbranch_execnz .LBB9_1
+; GCN-NEXT: ; %bb.2: ; %bb18
+; GCN-NEXT: s_or_b64 exec, exec, s[8:9]
+; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0)
+; GCN-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX8-LABEL: mad24_known_bits_destroyed:
+; GFX8: ; %bb.0: ; %bb
+; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-NEXT: v_mov_b32_e32 v5, v0
+; GFX8-NEXT: v_and_b32_e32 v0, 0xffffff, v13
+; GFX8-NEXT: v_and_b32_e32 v1, 0xffffff, v2
+; GFX8-NEXT: v_and_b32_e32 v2, 0xffffff, v3
+; GFX8-NEXT: v_and_b32_e32 v3, 0xffffff, v4
+; GFX8-NEXT: s_mov_b64 s[4:5], 0
+; GFX8-NEXT: .LBB9_1: ; %bb19
+; GFX8-NEXT: ; =>This Inner Loop Header: Depth=1
+; GFX8-NEXT: v_add_u32_e32 v15, vcc, -1, v15
+; GFX8-NEXT: v_mad_u32_u24 v4, v5, v0, v14
+; GFX8-NEXT: v_mad_u32_u24 v6, v6, v1, v10
+; GFX8-NEXT: v_mad_u32_u24 v7, v7, v2, v11
+; GFX8-NEXT: v_mad_u32_u24 v8, v8, v3, v12
+; GFX8-NEXT: v_cmp_eq_u32_e32 vcc, 0, v15
+; GFX8-NEXT: v_mad_u32_u24 v5, v4, v0, v14
+; GFX8-NEXT: v_mad_u32_u24 v6, v6, v1, v10
+; GFX8-NEXT: v_mad_u32_u24 v7, v7, v2, v11
+; GFX8-NEXT: v_mad_u32_u24 v8, v8, v3, v12
+; GFX8-NEXT: s_or_b64 s[4:5], vcc, s[4:5]
+; GFX8-NEXT: flat_store_dword v[16:17], v5
+; GFX8-NEXT: flat_store_dwordx4 v[18:19], v[5:8]
+; GFX8-NEXT: s_andn2_b64 exec, exec, s[4:5]
+; GFX8-NEXT: s_cbranch_execnz .LBB9_1
+; GFX8-NEXT: ; %bb.2: ; %bb18
+; GFX8-NEXT: s_or_b64 exec, exec, s[4:5]
+; GFX8-NEXT: s_waitcnt vmcnt(0)
+; GFX8-NEXT: s_setpc_b64 s[30:31]
bb:
%tmp = and i32 %arg4, 16777215
%tmp9 = extractelement <4 x i32> %arg1, i64 1
diff --git a/llvm/test/CodeGen/AMDGPU/min.ll b/llvm/test/CodeGen/AMDGPU/min.ll
index 6a3d31f..0458a64 100644
--- a/llvm/test/CodeGen/AMDGPU/min.ll
+++ b/llvm/test/CodeGen/AMDGPU/min.ll
@@ -6,9 +6,7 @@
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 < %s | FileCheck --check-prefix=GFX10 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -mattr=+real-true16 -amdgpu-enable-vopd=0 < %s | FileCheck --check-prefixes=GFX11,GFX11-TRUE16 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -mattr=-real-true16 -amdgpu-enable-vopd=0 < %s | FileCheck --check-prefixes=GFX11,GFX11-FAKE16 %s
-; TODO: FIXME-TRUE16 - Enable this llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1250 -mattr=+real-true16 -amdgpu-enable-vopd=0 < %s | FileCheck --check-prefixes=GFX1250,GFX1250-TRUE16 %s
-; Crashing on v_test_imin_slt_i16
-; LLVM ERROR: Cannot select: 0x5f895f65b050: i16,ch = load<(load (s16) from %ir.b.gep, addrspace 1)>
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1250 -mattr=+real-true16 -amdgpu-enable-vopd=0 < %s | FileCheck --check-prefixes=GFX1250,GFX1250-TRUE16 %s
; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1250 -mattr=-real-true16 -amdgpu-enable-vopd=0 < %s | FileCheck --check-prefixes=GFX1250,GFX1250-FAKE16 %s
define amdgpu_kernel void @v_test_imin_sle_i32(ptr addrspace(1) %out, ptr addrspace(1) %a.ptr, ptr addrspace(1) %b.ptr) #0 {
@@ -1482,20 +1480,35 @@ define amdgpu_kernel void @v_test_imin_slt_i16(ptr addrspace(1) %out, ptr addrsp
; GFX11-FAKE16-NEXT: global_store_b16 v0, v1, s[0:1]
; GFX11-FAKE16-NEXT: s_endpgm
;
-; GFX1250-LABEL: v_test_imin_slt_i16:
-; GFX1250: ; %bb.0:
-; GFX1250-NEXT: s_clause 0x1
-; GFX1250-NEXT: s_load_b128 s[0:3], s[4:5], 0x0
-; GFX1250-NEXT: s_load_b64 s[6:7], s[4:5], 0x10
-; GFX1250-NEXT: v_and_b32_e32 v0, 0x3ff, v0
-; GFX1250-NEXT: s_wait_kmcnt 0x0
-; GFX1250-NEXT: s_clause 0x1
-; GFX1250-NEXT: global_load_u16 v1, v0, s[2:3] scale_offset
-; GFX1250-NEXT: global_load_u16 v2, v0, s[6:7] scale_offset
-; GFX1250-NEXT: s_wait_loadcnt 0x0
-; GFX1250-NEXT: v_min_i16 v1, v1, v2
-; GFX1250-NEXT: global_store_b16 v0, v1, s[0:1] scale_offset
-; GFX1250-NEXT: s_endpgm
+; GFX1250-TRUE16-LABEL: v_test_imin_slt_i16:
+; GFX1250-TRUE16: ; %bb.0:
+; GFX1250-TRUE16-NEXT: s_clause 0x1
+; GFX1250-TRUE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x0
+; GFX1250-TRUE16-NEXT: s_load_b64 s[6:7], s[4:5], 0x10
+; GFX1250-TRUE16-NEXT: v_and_b32_e32 v1, 0x3ff, v0
+; GFX1250-TRUE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250-TRUE16-NEXT: s_clause 0x1
+; GFX1250-TRUE16-NEXT: global_load_u16 v0, v1, s[2:3] scale_offset
+; GFX1250-TRUE16-NEXT: global_load_u16 v2, v1, s[6:7] scale_offset
+; GFX1250-TRUE16-NEXT: s_wait_loadcnt 0x0
+; GFX1250-TRUE16-NEXT: v_min_i16 v0.l, v0.l, v2.l
+; GFX1250-TRUE16-NEXT: global_store_b16 v1, v0, s[0:1] scale_offset
+; GFX1250-TRUE16-NEXT: s_endpgm
+;
+; GFX1250-FAKE16-LABEL: v_test_imin_slt_i16:
+; GFX1250-FAKE16: ; %bb.0:
+; GFX1250-FAKE16-NEXT: s_clause 0x1
+; GFX1250-FAKE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x0
+; GFX1250-FAKE16-NEXT: s_load_b64 s[6:7], s[4:5], 0x10
+; GFX1250-FAKE16-NEXT: v_and_b32_e32 v0, 0x3ff, v0
+; GFX1250-FAKE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250-FAKE16-NEXT: s_clause 0x1
+; GFX1250-FAKE16-NEXT: global_load_u16 v1, v0, s[2:3] scale_offset
+; GFX1250-FAKE16-NEXT: global_load_u16 v2, v0, s[6:7] scale_offset
+; GFX1250-FAKE16-NEXT: s_wait_loadcnt 0x0
+; GFX1250-FAKE16-NEXT: v_min_i16 v1, v1, v2
+; GFX1250-FAKE16-NEXT: global_store_b16 v0, v1, s[0:1] scale_offset
+; GFX1250-FAKE16-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%a.gep = getelementptr inbounds i16, ptr addrspace(1) %aptr, i32 %tid
%b.gep = getelementptr inbounds i16, ptr addrspace(1) %bptr, i32 %tid
@@ -2769,20 +2782,35 @@ define amdgpu_kernel void @v_test_umin_ult_i8(ptr addrspace(1) %out, ptr addrspa
; GFX11-FAKE16-NEXT: global_store_b8 v0, v1, s[0:1]
; GFX11-FAKE16-NEXT: s_endpgm
;
-; GFX1250-LABEL: v_test_umin_ult_i8:
-; GFX1250: ; %bb.0:
-; GFX1250-NEXT: s_clause 0x1
-; GFX1250-NEXT: s_load_b128 s[0:3], s[4:5], 0x0
-; GFX1250-NEXT: s_load_b64 s[6:7], s[4:5], 0x10
-; GFX1250-NEXT: v_and_b32_e32 v0, 0x3ff, v0
-; GFX1250-NEXT: s_wait_kmcnt 0x0
-; GFX1250-NEXT: s_clause 0x1
-; GFX1250-NEXT: global_load_u8 v1, v0, s[2:3]
-; GFX1250-NEXT: global_load_u8 v2, v0, s[6:7]
-; GFX1250-NEXT: s_wait_loadcnt 0x0
-; GFX1250-NEXT: v_min_u16 v1, v1, v2
-; GFX1250-NEXT: global_store_b8 v0, v1, s[0:1]
-; GFX1250-NEXT: s_endpgm
+; GFX1250-TRUE16-LABEL: v_test_umin_ult_i8:
+; GFX1250-TRUE16: ; %bb.0:
+; GFX1250-TRUE16-NEXT: s_clause 0x1
+; GFX1250-TRUE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x0
+; GFX1250-TRUE16-NEXT: s_load_b64 s[6:7], s[4:5], 0x10
+; GFX1250-TRUE16-NEXT: v_and_b32_e32 v1, 0x3ff, v0
+; GFX1250-TRUE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250-TRUE16-NEXT: s_clause 0x1
+; GFX1250-TRUE16-NEXT: global_load_u8 v0, v1, s[2:3]
+; GFX1250-TRUE16-NEXT: global_load_u8 v2, v1, s[6:7]
+; GFX1250-TRUE16-NEXT: s_wait_loadcnt 0x0
+; GFX1250-TRUE16-NEXT: v_min_u16 v0.l, v0.l, v2.l
+; GFX1250-TRUE16-NEXT: global_store_b8 v1, v0, s[0:1]
+; GFX1250-TRUE16-NEXT: s_endpgm
+;
+; GFX1250-FAKE16-LABEL: v_test_umin_ult_i8:
+; GFX1250-FAKE16: ; %bb.0:
+; GFX1250-FAKE16-NEXT: s_clause 0x1
+; GFX1250-FAKE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x0
+; GFX1250-FAKE16-NEXT: s_load_b64 s[6:7], s[4:5], 0x10
+; GFX1250-FAKE16-NEXT: v_and_b32_e32 v0, 0x3ff, v0
+; GFX1250-FAKE16-NEXT: s_wait_kmcnt 0x0
+; GFX1250-FAKE16-NEXT: s_clause 0x1
+; GFX1250-FAKE16-NEXT: global_load_u8 v1, v0, s[2:3]
+; GFX1250-FAKE16-NEXT: global_load_u8 v2, v0, s[6:7]
+; GFX1250-FAKE16-NEXT: s_wait_loadcnt 0x0
+; GFX1250-FAKE16-NEXT: v_min_u16 v1, v1, v2
+; GFX1250-FAKE16-NEXT: global_store_b8 v0, v1, s[0:1]
+; GFX1250-FAKE16-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%a.gep = getelementptr inbounds i8, ptr addrspace(1) %a.ptr, i32 %tid
%b.gep = getelementptr inbounds i8, ptr addrspace(1) %b.ptr, i32 %tid
@@ -5069,5 +5097,3 @@ declare i32 @llvm.amdgcn.workitem.id.x() #1
attributes #0 = { nounwind }
attributes #1 = { nounwind readnone }
-;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
-; GFX1250-FAKE16: {{.*}}
diff --git a/llvm/test/CodeGen/AMDGPU/minmax.ll b/llvm/test/CodeGen/AMDGPU/minmax.ll
index 57e6943..56f9c5d 100644
--- a/llvm/test/CodeGen/AMDGPU/minmax.ll
+++ b/llvm/test/CodeGen/AMDGPU/minmax.ll
@@ -638,6 +638,14 @@ define void @test_med3_minimumnum_maximumnum_f32(ptr addrspace(1) %arg, float %x
; GFX12-NEXT: v_med3_num_f32 v2, v2, v3, v4
; GFX12-NEXT: global_store_b32 v[0:1], v2, off
; GFX12-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX1250-LABEL: test_med3_minimumnum_maximumnum_f32:
+; GFX1250: ; %bb.0:
+; GFX1250-NEXT: s_wait_loadcnt_dscnt 0x0
+; GFX1250-NEXT: s_wait_kmcnt 0x0
+; GFX1250-NEXT: v_med3_num_f32 v2, v2, v3, v4
+; GFX1250-NEXT: global_store_b32 v[0:1], v2, off
+; GFX1250-NEXT: s_set_pc_i64 s[30:31]
%tmp0 = call float @llvm.minimumnum.f32(float %x, float %y)
%tmp1 = call float @llvm.maximumnum.f32(float %x, float %y)
%tmp2 = call float @llvm.minimumnum.f32(float %tmp1, float %z)
@@ -798,7 +806,7 @@ define amdgpu_ps void @s_test_minmax_f16_ieee_false(half inreg %a, half inreg %b
; SDAG-GFX1250-TRUE16-NEXT: s_mov_b32 s5, s4
; SDAG-GFX1250-TRUE16-NEXT: s_mov_b32 s4, s3
; SDAG-GFX1250-TRUE16-NEXT: v_maxmin_num_f16 v0.l, s0, s1, v0.l
-; SDAG-GFX1250-TRUE16-NEXT: flat_store_b16 v1, v0, s[4:5]
+; SDAG-GFX1250-TRUE16-NEXT: global_store_b16 v1, v0, s[4:5]
; SDAG-GFX1250-TRUE16-NEXT: s_endpgm
;
; SDAG-GFX1250-FAKE16-LABEL: s_test_minmax_f16_ieee_false:
@@ -813,12 +821,12 @@ define amdgpu_ps void @s_test_minmax_f16_ieee_false(half inreg %a, half inreg %b
; GISEL-GFX1250-TRUE16-LABEL: s_test_minmax_f16_ieee_false:
; GISEL-GFX1250-TRUE16: ; %bb.0:
; GISEL-GFX1250-TRUE16-NEXT: s_max_num_f16 s0, s0, s1
+; GISEL-GFX1250-TRUE16-NEXT: v_mov_b32_e32 v1, 0
; GISEL-GFX1250-TRUE16-NEXT: s_mov_b32 s6, s3
; GISEL-GFX1250-TRUE16-NEXT: s_mov_b32 s7, s4
-; GISEL-GFX1250-TRUE16-NEXT: v_mov_b32_e32 v1, 0
; GISEL-GFX1250-TRUE16-NEXT: s_min_num_f16 s0, s0, s2
-; GISEL-GFX1250-TRUE16-NEXT: v_mov_b32_e32 v0, s0
-; GISEL-GFX1250-TRUE16-NEXT: flat_store_b16 v1, v0, s[6:7]
+; GISEL-GFX1250-TRUE16-NEXT: v_mov_b16_e32 v0.l, s0
+; GISEL-GFX1250-TRUE16-NEXT: global_store_b16 v1, v0, s[6:7]
; GISEL-GFX1250-TRUE16-NEXT: s_endpgm
;
; GISEL-GFX1250-FAKE16-LABEL: s_test_minmax_f16_ieee_false:
@@ -1246,7 +1254,7 @@ define void @test_med3_f16(ptr addrspace(1) %arg, half %x, half %y, half %z) #0
; SDAG-GFX1250-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
; SDAG-GFX1250-TRUE16-NEXT: s_wait_kmcnt 0x0
; SDAG-GFX1250-TRUE16-NEXT: v_med3_num_f16 v2.l, v2.l, v3.l, v4.l
-; SDAG-GFX1250-TRUE16-NEXT: flat_store_b16 v[0:1], v2
+; SDAG-GFX1250-TRUE16-NEXT: global_store_b16 v[0:1], v2, off
; SDAG-GFX1250-TRUE16-NEXT: s_set_pc_i64 s[30:31]
;
; SDAG-GFX1250-FAKE16-LABEL: test_med3_f16:
@@ -1262,7 +1270,7 @@ define void @test_med3_f16(ptr addrspace(1) %arg, half %x, half %y, half %z) #0
; GISEL-GFX1250-TRUE16-NEXT: s_wait_loadcnt_dscnt 0x0
; GISEL-GFX1250-TRUE16-NEXT: s_wait_kmcnt 0x0
; GISEL-GFX1250-TRUE16-NEXT: v_med3_num_f16 v2.l, v2.l, v3.l, v4.l
-; GISEL-GFX1250-TRUE16-NEXT: flat_store_b16 v[0:1], v2
+; GISEL-GFX1250-TRUE16-NEXT: global_store_b16 v[0:1], v2, off
; GISEL-GFX1250-TRUE16-NEXT: s_set_pc_i64 s[30:31]
;
; GISEL-GFX1250-FAKE16-LABEL: test_med3_f16:
diff --git a/llvm/test/CodeGen/AMDGPU/propagate-flat-work-group-size.ll b/llvm/test/CodeGen/AMDGPU/propagate-flat-work-group-size.ll
index 42469c8..23e90b3 100644
--- a/llvm/test/CodeGen/AMDGPU/propagate-flat-work-group-size.ll
+++ b/llvm/test/CodeGen/AMDGPU/propagate-flat-work-group-size.ll
@@ -202,13 +202,13 @@ attributes #5 = { "amdgpu-flat-work-group-size"="128,512" }
attributes #6 = { "amdgpu-flat-work-group-size"="512,512" }
attributes #7 = { "amdgpu-flat-work-group-size"="64,256" }
;.
-; CHECK: attributes #[[ATTR0]] = { "amdgpu-agpr-alloc"="0" "amdgpu-flat-work-group-size"="1,256" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
-; CHECK: attributes #[[ATTR1]] = { "amdgpu-agpr-alloc"="0" "amdgpu-flat-work-group-size"="64,128" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
-; CHECK: attributes #[[ATTR2]] = { "amdgpu-agpr-alloc"="0" "amdgpu-flat-work-group-size"="128,512" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
-; CHECK: attributes #[[ATTR3]] = { "amdgpu-agpr-alloc"="0" "amdgpu-flat-work-group-size"="64,64" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
-; CHECK: attributes #[[ATTR4]] = { "amdgpu-agpr-alloc"="0" "amdgpu-flat-work-group-size"="128,256" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
-; CHECK: attributes #[[ATTR5]] = { "amdgpu-agpr-alloc"="0" "amdgpu-flat-work-group-size"="512,1024" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
-; CHECK: attributes #[[ATTR6]] = { "amdgpu-agpr-alloc"="0" "amdgpu-flat-work-group-size"="512,512" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
-; CHECK: attributes #[[ATTR7]] = { "amdgpu-agpr-alloc"="0" "amdgpu-flat-work-group-size"="64,256" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
-; CHECK: attributes #[[ATTR8]] = { "amdgpu-agpr-alloc"="0" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR0]] = { "amdgpu-flat-work-group-size"="1,256" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR1]] = { "amdgpu-flat-work-group-size"="64,128" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR2]] = { "amdgpu-flat-work-group-size"="128,512" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR3]] = { "amdgpu-flat-work-group-size"="64,64" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR4]] = { "amdgpu-flat-work-group-size"="128,256" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR5]] = { "amdgpu-flat-work-group-size"="512,1024" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR6]] = { "amdgpu-flat-work-group-size"="512,512" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR7]] = { "amdgpu-flat-work-group-size"="64,256" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR8]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
;.
diff --git a/llvm/test/CodeGen/AMDGPU/propagate-waves-per-eu.ll b/llvm/test/CodeGen/AMDGPU/propagate-waves-per-eu.ll
index 06533b4..0be3147 100644
--- a/llvm/test/CodeGen/AMDGPU/propagate-waves-per-eu.ll
+++ b/llvm/test/CodeGen/AMDGPU/propagate-waves-per-eu.ll
@@ -399,25 +399,25 @@ attributes #17 = { "amdgpu-waves-per-eu"="5,8" }
attributes #18 = { "amdgpu-waves-per-eu"="9,10" }
attributes #19 = { "amdgpu-waves-per-eu"="8,9" }
;.
-; CHECK: attributes #[[ATTR0]] = { "amdgpu-agpr-alloc"="0" "amdgpu-flat-work-group-size"="1,64" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="2,8" "uniform-work-group-size"="false" }
-; CHECK: attributes #[[ATTR1]] = { "amdgpu-agpr-alloc"="0" "amdgpu-flat-work-group-size"="1,64" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="1,8" "uniform-work-group-size"="false" }
-; CHECK: attributes #[[ATTR2]] = { "amdgpu-agpr-alloc"="0" "amdgpu-flat-work-group-size"="1,64" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="1,2" "uniform-work-group-size"="false" }
-; CHECK: attributes #[[ATTR3]] = { "amdgpu-agpr-alloc"="0" "amdgpu-flat-work-group-size"="1,64" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="1,4" "uniform-work-group-size"="false" }
-; CHECK: attributes #[[ATTR4]] = { "amdgpu-agpr-alloc"="0" "amdgpu-flat-work-group-size"="1,64" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="9,9" "uniform-work-group-size"="false" }
-; CHECK: attributes #[[ATTR5]] = { "amdgpu-agpr-alloc"="0" "amdgpu-flat-work-group-size"="1,64" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="1,1" "uniform-work-group-size"="false" }
-; CHECK: attributes #[[ATTR6]] = { "amdgpu-agpr-alloc"="0" "amdgpu-flat-work-group-size"="1,64" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="9,10" "uniform-work-group-size"="false" }
-; CHECK: attributes #[[ATTR7]] = { "amdgpu-agpr-alloc"="0" "amdgpu-flat-work-group-size"="1,64" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="2,9" "uniform-work-group-size"="false" }
-; CHECK: attributes #[[ATTR8]] = { "amdgpu-agpr-alloc"="0" "amdgpu-flat-work-group-size"="1,64" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="3,8" "uniform-work-group-size"="false" }
-; CHECK: attributes #[[ATTR9]] = { "amdgpu-agpr-alloc"="0" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
-; CHECK: attributes #[[ATTR10]] = { "amdgpu-agpr-alloc"="0" "amdgpu-flat-work-group-size"="1,64" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
-; CHECK: attributes #[[ATTR11]] = { "amdgpu-agpr-alloc"="0" "amdgpu-flat-work-group-size"="1,64" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="1,123" "uniform-work-group-size"="false" }
-; CHECK: attributes #[[ATTR12]] = { "amdgpu-agpr-alloc"="0" "amdgpu-flat-work-group-size"="1,512" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
-; CHECK: attributes #[[ATTR13]] = { "amdgpu-agpr-alloc"="0" "amdgpu-flat-work-group-size"="1,512" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="3,6" "uniform-work-group-size"="false" }
-; CHECK: attributes #[[ATTR14]] = { "amdgpu-agpr-alloc"="0" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="3,6" "uniform-work-group-size"="false" }
-; CHECK: attributes #[[ATTR15]] = { "amdgpu-agpr-alloc"="0" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,8" "uniform-work-group-size"="false" }
-; CHECK: attributes #[[ATTR16]] = { "amdgpu-agpr-alloc"="0" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="6,8" "uniform-work-group-size"="false" }
-; CHECK: attributes #[[ATTR17]] = { "amdgpu-agpr-alloc"="0" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="5,5" "uniform-work-group-size"="false" }
-; CHECK: attributes #[[ATTR18]] = { "amdgpu-agpr-alloc"="0" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="5,8" "uniform-work-group-size"="false" }
-; CHECK: attributes #[[ATTR19]] = { "amdgpu-agpr-alloc"="0" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="9,10" "uniform-work-group-size"="false" }
-; CHECK: attributes #[[ATTR20]] = { "amdgpu-agpr-alloc"="0" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="8,9" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR0]] = { "amdgpu-flat-work-group-size"="1,64" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="2,8" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR1]] = { "amdgpu-flat-work-group-size"="1,64" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="1,8" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR2]] = { "amdgpu-flat-work-group-size"="1,64" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="1,2" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR3]] = { "amdgpu-flat-work-group-size"="1,64" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="1,4" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR4]] = { "amdgpu-flat-work-group-size"="1,64" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="9,9" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR5]] = { "amdgpu-flat-work-group-size"="1,64" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="1,1" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR6]] = { "amdgpu-flat-work-group-size"="1,64" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="9,10" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR7]] = { "amdgpu-flat-work-group-size"="1,64" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="2,9" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR8]] = { "amdgpu-flat-work-group-size"="1,64" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="3,8" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR9]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR10]] = { "amdgpu-flat-work-group-size"="1,64" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR11]] = { "amdgpu-flat-work-group-size"="1,64" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="1,123" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR12]] = { "amdgpu-flat-work-group-size"="1,512" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR13]] = { "amdgpu-flat-work-group-size"="1,512" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="3,6" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR14]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="3,6" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR15]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="4,8" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR16]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="6,8" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR17]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="5,5" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR18]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="5,8" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR19]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="9,10" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR20]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "amdgpu-waves-per-eu"="8,9" "uniform-work-group-size"="false" }
;.
diff --git a/llvm/test/CodeGen/AMDGPU/recursive_global_initializer.ll b/llvm/test/CodeGen/AMDGPU/recursive_global_initializer.ll
index 8930626..33da671 100644
--- a/llvm/test/CodeGen/AMDGPU/recursive_global_initializer.ll
+++ b/llvm/test/CodeGen/AMDGPU/recursive_global_initializer.ll
@@ -19,5 +19,5 @@ define void @hoge() {
ret void
}
;.
-; CHECK: attributes #[[ATTR0]] = { "amdgpu-agpr-alloc"="0" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR0]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
;.
diff --git a/llvm/test/CodeGen/AMDGPU/remove-no-kernel-id-attribute.ll b/llvm/test/CodeGen/AMDGPU/remove-no-kernel-id-attribute.ll
index 3dfb0e1..f847d66 100644
--- a/llvm/test/CodeGen/AMDGPU/remove-no-kernel-id-attribute.ll
+++ b/llvm/test/CodeGen/AMDGPU/remove-no-kernel-id-attribute.ll
@@ -191,12 +191,12 @@ define amdgpu_kernel void @kernel_lds_recursion() {
!1 = !{i32 1, !"amdhsa_code_object_version", i32 400}
;.
-; CHECK: attributes #[[ATTR0]] = { "amdgpu-agpr-alloc"="0" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
-; CHECK: attributes #[[ATTR1]] = { "amdgpu-agpr-alloc"="0" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
-; CHECK: attributes #[[ATTR2]] = { "amdgpu-agpr-alloc"="0" "amdgpu-lds-size"="2" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR0]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR1]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR2]] = { "amdgpu-lds-size"="2" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
; CHECK: attributes #[[ATTR3]] = { "amdgpu-lds-size"="4" "uniform-work-group-size"="false" }
-; CHECK: attributes #[[ATTR4]] = { "amdgpu-agpr-alloc"="0" "amdgpu-lds-size"="2" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
-; CHECK: attributes #[[ATTR5]] = { "amdgpu-agpr-alloc"="0" "amdgpu-lds-size"="4" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR4]] = { "amdgpu-lds-size"="2" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR5]] = { "amdgpu-lds-size"="4" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
; CHECK: attributes #[[ATTR6:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(none) }
; CHECK: attributes #[[ATTR7:[0-9]+]] = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
;.
diff --git a/llvm/test/CodeGen/AMDGPU/select-fabs-fneg-extract.v2f16.ll b/llvm/test/CodeGen/AMDGPU/select-fabs-fneg-extract.v2f16.ll
index bb22144..9814ed8 100644
--- a/llvm/test/CodeGen/AMDGPU/select-fabs-fneg-extract.v2f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/select-fabs-fneg-extract.v2f16.ll
@@ -1,15 +1,9 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=amdgcn -mcpu=hawaii < %s | FileCheck -check-prefixes=CI,CI-SAFE %s
-; RUN: llc -mtriple=amdgcn -mcpu=fiji < %s | FileCheck -check-prefixes=VI,VI-SAFE %s
-; RUN: llc -mtriple=amdgcn -mcpu=gfx906 < %s | FileCheck -check-prefixes=GFX9,GFX9-SAFE %s
-; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=+real-true16 < %s | FileCheck -check-prefixes=GFX11,GFX11-SAFE,GFX11-SAFE-TRUE16 %s
-; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=-real-true16 < %s | FileCheck -check-prefixes=GFX11,GFX11-SAFE,GFX11-SAFE-FAKE16 %s
-
-; RUN: llc -mtriple=amdgcn -mcpu=hawaii -enable-no-signed-zeros-fp-math < %s | FileCheck -check-prefixes=CI,CI-NSZ %s
-; RUN: llc -mtriple=amdgcn -mcpu=fiji -enable-no-signed-zeros-fp-math < %s | FileCheck -check-prefixes=VI,VI-NSZ %s
-; RUN: llc -mtriple=amdgcn -mcpu=gfx906 -enable-no-signed-zeros-fp-math < %s | FileCheck -check-prefixes=GFX9,GFX9-NSZ %s
-; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=+real-true16 -enable-no-signed-zeros-fp-math < %s | FileCheck -check-prefixes=GFX11,GFX11-NSZ,GFX11-NSZ-TRUE16 %s
-; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=-real-true16 -enable-no-signed-zeros-fp-math < %s | FileCheck -check-prefixes=GFX11,GFX11-NSZ,GFX11-NSZ-FAKE16 %s
+; RUN: llc -mtriple=amdgcn -mcpu=hawaii < %s | FileCheck -check-prefixes=CI %s
+; RUN: llc -mtriple=amdgcn -mcpu=fiji < %s | FileCheck -check-prefixes=VI %s
+; RUN: llc -mtriple=amdgcn -mcpu=gfx906 < %s | FileCheck -check-prefixes=GFX9 %s
+; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=+real-true16 < %s | FileCheck -check-prefixes=GFX11,GFX11-TRUE16 %s
+; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -mattr=-real-true16 < %s | FileCheck -check-prefixes=GFX11,GFX11-FAKE16 %s
define <2 x half> @add_select_fabs_fabs_v2f16(<2 x i32> %c, <2 x half> %x, <2 x half> %y, <2 x half> %z) {
; CI-LABEL: add_select_fabs_fabs_v2f16:
@@ -63,69 +57,37 @@ define <2 x half> @add_select_fabs_fabs_v2f16(<2 x i32> %c, <2 x half> %x, <2 x
; GFX9-NEXT: v_pk_add_f16 v0, v0, v4
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
-; GFX11-SAFE-TRUE16-LABEL: add_select_fabs_fabs_v2f16:
-; GFX11-SAFE-TRUE16: ; %bb.0:
-; GFX11-SAFE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-SAFE-TRUE16-NEXT: v_and_b32_e32 v0, 0x7fff7fff, v2
-; GFX11-SAFE-TRUE16-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v3
-; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v1
-; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.l, v2.l, v0.l, vcc_lo
-; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.h, v2.h, v0.h, s0
-; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-SAFE-TRUE16-NEXT: v_pk_add_f16 v0, v0, v4
-; GFX11-SAFE-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-SAFE-FAKE16-LABEL: add_select_fabs_fabs_v2f16:
-; GFX11-SAFE-FAKE16: ; %bb.0:
-; GFX11-SAFE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-FAKE16-NEXT: v_and_b32_e32 v3, 0x7fff7fff, v3
-; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_1)
-; GFX11-SAFE-FAKE16-NEXT: v_lshrrev_b32_e32 v6, 16, v3
-; GFX11-SAFE-FAKE16-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v2
-; GFX11-SAFE-FAKE16-NEXT: v_lshrrev_b32_e32 v5, 16, v2
-; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc_lo
-; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v1, v6, v5, vcc_lo
-; GFX11-SAFE-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
-; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-SAFE-FAKE16-NEXT: v_pk_add_f16 v0, v0, v4
-; GFX11-SAFE-FAKE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-TRUE16-LABEL: add_select_fabs_fabs_v2f16:
-; GFX11-NSZ-TRUE16: ; %bb.0:
-; GFX11-NSZ-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-NSZ-TRUE16-NEXT: v_and_b32_e32 v0, 0x7fff7fff, v2
-; GFX11-NSZ-TRUE16-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v3
-; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v1
-; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.l, v2.l, v0.l, vcc_lo
-; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.h, v2.h, v0.h, s0
-; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NSZ-TRUE16-NEXT: v_pk_add_f16 v0, v0, v4
-; GFX11-NSZ-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-FAKE16-LABEL: add_select_fabs_fabs_v2f16:
-; GFX11-NSZ-FAKE16: ; %bb.0:
-; GFX11-NSZ-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-FAKE16-NEXT: v_and_b32_e32 v3, 0x7fff7fff, v3
-; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_1)
-; GFX11-NSZ-FAKE16-NEXT: v_lshrrev_b32_e32 v6, 16, v3
-; GFX11-NSZ-FAKE16-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v2
-; GFX11-NSZ-FAKE16-NEXT: v_lshrrev_b32_e32 v5, 16, v2
-; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc_lo
-; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v1, v6, v5, vcc_lo
-; GFX11-NSZ-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
-; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NSZ-FAKE16-NEXT: v_pk_add_f16 v0, v0, v4
-; GFX11-NSZ-FAKE16-NEXT: s_setpc_b64 s[30:31]
+; GFX11-TRUE16-LABEL: add_select_fabs_fabs_v2f16:
+; GFX11-TRUE16: ; %bb.0:
+; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0x7fff7fff, v2
+; GFX11-TRUE16-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v3
+; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v1
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.l, v2.l, v0.l, vcc_lo
+; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.h, v2.h, v0.h, s0
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-TRUE16-NEXT: v_pk_add_f16 v0, v0, v4
+; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: add_select_fabs_fabs_v2f16:
+; GFX11-FAKE16: ; %bb.0:
+; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT: v_and_b32_e32 v3, 0x7fff7fff, v3
+; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_1)
+; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v6, 16, v3
+; GFX11-FAKE16-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v2
+; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v5, 16, v2
+; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc_lo
+; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v1, v6, v5, vcc_lo
+; GFX11-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
+; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-FAKE16-NEXT: v_pk_add_f16 v0, v0, v4
+; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
%cmp = icmp eq <2 x i32> %c, zeroinitializer
%fabs.x = call <2 x half> @llvm.fabs.v2f16(<2 x half> %x)
%fabs.y = call <2 x half> @llvm.fabs.v2f16(<2 x half> %y)
@@ -198,73 +160,39 @@ define { <2 x half>, <2 x half> } @add_select_multi_use_lhs_fabs_fabs_v2f16(<2 x
; GFX9-NEXT: v_pk_add_f16 v1, v1, v4
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
-; GFX11-SAFE-TRUE16-LABEL: add_select_multi_use_lhs_fabs_fabs_v2f16:
-; GFX11-SAFE-TRUE16: ; %bb.0:
-; GFX11-SAFE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-SAFE-TRUE16-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v2
-; GFX11-SAFE-TRUE16-NEXT: v_and_b32_e32 v0, 0x7fff7fff, v3
-; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v1
-; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
-; GFX11-SAFE-TRUE16-NEXT: v_pk_add_f16 v1, v2, v4
-; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.l, v0.l, v2.l, vcc_lo
-; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.h, v0.h, v2.h, s0
-; GFX11-SAFE-TRUE16-NEXT: v_pk_add_f16 v0, v0, v5
-; GFX11-SAFE-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-SAFE-FAKE16-LABEL: add_select_multi_use_lhs_fabs_fabs_v2f16:
-; GFX11-SAFE-FAKE16: ; %bb.0:
-; GFX11-SAFE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-FAKE16-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v2
-; GFX11-SAFE-FAKE16-NEXT: v_and_b32_e32 v3, 0x7fff7fff, v3
-; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
-; GFX11-SAFE-FAKE16-NEXT: v_lshrrev_b32_e32 v6, 16, v2
-; GFX11-SAFE-FAKE16-NEXT: v_lshrrev_b32_e32 v7, 16, v3
-; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc_lo
-; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v1, v7, v6, vcc_lo
-; GFX11-SAFE-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
-; GFX11-SAFE-FAKE16-NEXT: v_pk_add_f16 v1, v2, v4
-; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2)
-; GFX11-SAFE-FAKE16-NEXT: v_pk_add_f16 v0, v0, v5
-; GFX11-SAFE-FAKE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-TRUE16-LABEL: add_select_multi_use_lhs_fabs_fabs_v2f16:
-; GFX11-NSZ-TRUE16: ; %bb.0:
-; GFX11-NSZ-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-NSZ-TRUE16-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v2
-; GFX11-NSZ-TRUE16-NEXT: v_and_b32_e32 v0, 0x7fff7fff, v3
-; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v1
-; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
-; GFX11-NSZ-TRUE16-NEXT: v_pk_add_f16 v1, v2, v4
-; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.l, v0.l, v2.l, vcc_lo
-; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.h, v0.h, v2.h, s0
-; GFX11-NSZ-TRUE16-NEXT: v_pk_add_f16 v0, v0, v5
-; GFX11-NSZ-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-FAKE16-LABEL: add_select_multi_use_lhs_fabs_fabs_v2f16:
-; GFX11-NSZ-FAKE16: ; %bb.0:
-; GFX11-NSZ-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-FAKE16-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v2
-; GFX11-NSZ-FAKE16-NEXT: v_and_b32_e32 v3, 0x7fff7fff, v3
-; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
-; GFX11-NSZ-FAKE16-NEXT: v_lshrrev_b32_e32 v6, 16, v2
-; GFX11-NSZ-FAKE16-NEXT: v_lshrrev_b32_e32 v7, 16, v3
-; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc_lo
-; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v1, v7, v6, vcc_lo
-; GFX11-NSZ-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
-; GFX11-NSZ-FAKE16-NEXT: v_pk_add_f16 v1, v2, v4
-; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2)
-; GFX11-NSZ-FAKE16-NEXT: v_pk_add_f16 v0, v0, v5
-; GFX11-NSZ-FAKE16-NEXT: s_setpc_b64 s[30:31]
+; GFX11-TRUE16-LABEL: add_select_multi_use_lhs_fabs_fabs_v2f16:
+; GFX11-TRUE16: ; %bb.0:
+; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-TRUE16-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v2
+; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0x7fff7fff, v3
+; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v1
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
+; GFX11-TRUE16-NEXT: v_pk_add_f16 v1, v2, v4
+; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.l, v0.l, v2.l, vcc_lo
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.h, v0.h, v2.h, s0
+; GFX11-TRUE16-NEXT: v_pk_add_f16 v0, v0, v5
+; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: add_select_multi_use_lhs_fabs_fabs_v2f16:
+; GFX11-FAKE16: ; %bb.0:
+; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v2
+; GFX11-FAKE16-NEXT: v_and_b32_e32 v3, 0x7fff7fff, v3
+; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
+; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v6, 16, v2
+; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v7, 16, v3
+; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc_lo
+; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v1, v7, v6, vcc_lo
+; GFX11-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
+; GFX11-FAKE16-NEXT: v_pk_add_f16 v1, v2, v4
+; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2)
+; GFX11-FAKE16-NEXT: v_pk_add_f16 v0, v0, v5
+; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
%cmp = icmp eq <2 x i32> %c, zeroinitializer
%fabs.x = call <2 x half> @llvm.fabs.v2f16(<2 x half> %x)
%fabs.y = call <2 x half> @llvm.fabs.v2f16(<2 x half> %y)
@@ -328,73 +256,39 @@ define { <2 x half>, <2 x half> } @add_select_multi_store_use_lhs_fabs_fabs_v2f1
; GFX9-NEXT: v_pk_add_f16 v0, v0, v4
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
-; GFX11-SAFE-TRUE16-LABEL: add_select_multi_store_use_lhs_fabs_fabs_v2f16:
-; GFX11-SAFE-TRUE16: ; %bb.0:
-; GFX11-SAFE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-SAFE-TRUE16-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v2
-; GFX11-SAFE-TRUE16-NEXT: v_and_b32_e32 v0, 0x7fff7fff, v3
-; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v1
-; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
-; GFX11-SAFE-TRUE16-NEXT: v_mov_b32_e32 v1, v2
-; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.l, v0.l, v2.l, vcc_lo
-; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.h, v0.h, v2.h, s0
-; GFX11-SAFE-TRUE16-NEXT: v_pk_add_f16 v0, v0, v4
-; GFX11-SAFE-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-SAFE-FAKE16-LABEL: add_select_multi_store_use_lhs_fabs_fabs_v2f16:
-; GFX11-SAFE-FAKE16: ; %bb.0:
-; GFX11-SAFE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-FAKE16-NEXT: v_and_b32_e32 v3, 0x7fff7fff, v3
-; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_1)
-; GFX11-SAFE-FAKE16-NEXT: v_lshrrev_b32_e32 v6, 16, v3
-; GFX11-SAFE-FAKE16-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v2
-; GFX11-SAFE-FAKE16-NEXT: v_lshrrev_b32_e32 v5, 16, v2
-; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc_lo
-; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v1, v6, v5, vcc_lo
-; GFX11-SAFE-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
-; GFX11-SAFE-FAKE16-NEXT: v_mov_b32_e32 v1, v2
-; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2)
-; GFX11-SAFE-FAKE16-NEXT: v_pk_add_f16 v0, v0, v4
-; GFX11-SAFE-FAKE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-TRUE16-LABEL: add_select_multi_store_use_lhs_fabs_fabs_v2f16:
-; GFX11-NSZ-TRUE16: ; %bb.0:
-; GFX11-NSZ-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-NSZ-TRUE16-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v2
-; GFX11-NSZ-TRUE16-NEXT: v_and_b32_e32 v0, 0x7fff7fff, v3
-; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v1
-; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
-; GFX11-NSZ-TRUE16-NEXT: v_mov_b32_e32 v1, v2
-; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.l, v0.l, v2.l, vcc_lo
-; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.h, v0.h, v2.h, s0
-; GFX11-NSZ-TRUE16-NEXT: v_pk_add_f16 v0, v0, v4
-; GFX11-NSZ-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-FAKE16-LABEL: add_select_multi_store_use_lhs_fabs_fabs_v2f16:
-; GFX11-NSZ-FAKE16: ; %bb.0:
-; GFX11-NSZ-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-FAKE16-NEXT: v_and_b32_e32 v3, 0x7fff7fff, v3
-; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_1)
-; GFX11-NSZ-FAKE16-NEXT: v_lshrrev_b32_e32 v6, 16, v3
-; GFX11-NSZ-FAKE16-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v2
-; GFX11-NSZ-FAKE16-NEXT: v_lshrrev_b32_e32 v5, 16, v2
-; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc_lo
-; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v1, v6, v5, vcc_lo
-; GFX11-NSZ-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
-; GFX11-NSZ-FAKE16-NEXT: v_mov_b32_e32 v1, v2
-; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2)
-; GFX11-NSZ-FAKE16-NEXT: v_pk_add_f16 v0, v0, v4
-; GFX11-NSZ-FAKE16-NEXT: s_setpc_b64 s[30:31]
+; GFX11-TRUE16-LABEL: add_select_multi_store_use_lhs_fabs_fabs_v2f16:
+; GFX11-TRUE16: ; %bb.0:
+; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-TRUE16-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v2
+; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0x7fff7fff, v3
+; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v1
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
+; GFX11-TRUE16-NEXT: v_mov_b32_e32 v1, v2
+; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.l, v0.l, v2.l, vcc_lo
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.h, v0.h, v2.h, s0
+; GFX11-TRUE16-NEXT: v_pk_add_f16 v0, v0, v4
+; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: add_select_multi_store_use_lhs_fabs_fabs_v2f16:
+; GFX11-FAKE16: ; %bb.0:
+; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT: v_and_b32_e32 v3, 0x7fff7fff, v3
+; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_1)
+; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v6, 16, v3
+; GFX11-FAKE16-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v2
+; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v5, 16, v2
+; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc_lo
+; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v1, v6, v5, vcc_lo
+; GFX11-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
+; GFX11-FAKE16-NEXT: v_mov_b32_e32 v1, v2
+; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2)
+; GFX11-FAKE16-NEXT: v_pk_add_f16 v0, v0, v4
+; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
%cmp = icmp eq <2 x i32> %c, zeroinitializer
%fabs.x = call <2 x half> @llvm.fabs.v2f16(<2 x half> %x)
%fabs.y = call <2 x half> @llvm.fabs.v2f16(<2 x half> %y)
@@ -469,73 +363,39 @@ define { <2 x half>, <2 x half> } @add_select_multi_use_rhs_fabs_fabs_v2f16(<2 x
; GFX9-NEXT: v_pk_add_f16 v1, v2, v5
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
-; GFX11-SAFE-TRUE16-LABEL: add_select_multi_use_rhs_fabs_fabs_v2f16:
-; GFX11-SAFE-TRUE16: ; %bb.0:
-; GFX11-SAFE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-SAFE-TRUE16-NEXT: v_and_b32_e32 v0, 0x7fff7fff, v2
-; GFX11-SAFE-TRUE16-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v3
-; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v1
-; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.l, v2.l, v0.l, vcc_lo
-; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.h, v2.h, v0.h, s0
-; GFX11-SAFE-TRUE16-NEXT: v_pk_add_f16 v1, v2, v5
-; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2)
-; GFX11-SAFE-TRUE16-NEXT: v_pk_add_f16 v0, v0, v4
-; GFX11-SAFE-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-SAFE-FAKE16-LABEL: add_select_multi_use_rhs_fabs_fabs_v2f16:
-; GFX11-SAFE-FAKE16: ; %bb.0:
-; GFX11-SAFE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-FAKE16-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v2
-; GFX11-SAFE-FAKE16-NEXT: v_and_b32_e32 v3, 0x7fff7fff, v3
-; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
-; GFX11-SAFE-FAKE16-NEXT: v_lshrrev_b32_e32 v6, 16, v2
-; GFX11-SAFE-FAKE16-NEXT: v_lshrrev_b32_e32 v7, 16, v3
-; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc_lo
-; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v1, v7, v6, vcc_lo
-; GFX11-SAFE-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
-; GFX11-SAFE-FAKE16-NEXT: v_pk_add_f16 v1, v3, v5
-; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2)
-; GFX11-SAFE-FAKE16-NEXT: v_pk_add_f16 v0, v0, v4
-; GFX11-SAFE-FAKE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-TRUE16-LABEL: add_select_multi_use_rhs_fabs_fabs_v2f16:
-; GFX11-NSZ-TRUE16: ; %bb.0:
-; GFX11-NSZ-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-NSZ-TRUE16-NEXT: v_and_b32_e32 v0, 0x7fff7fff, v2
-; GFX11-NSZ-TRUE16-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v3
-; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v1
-; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.l, v2.l, v0.l, vcc_lo
-; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.h, v2.h, v0.h, s0
-; GFX11-NSZ-TRUE16-NEXT: v_pk_add_f16 v1, v2, v5
-; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2)
-; GFX11-NSZ-TRUE16-NEXT: v_pk_add_f16 v0, v0, v4
-; GFX11-NSZ-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-FAKE16-LABEL: add_select_multi_use_rhs_fabs_fabs_v2f16:
-; GFX11-NSZ-FAKE16: ; %bb.0:
-; GFX11-NSZ-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-FAKE16-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v2
-; GFX11-NSZ-FAKE16-NEXT: v_and_b32_e32 v3, 0x7fff7fff, v3
-; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
-; GFX11-NSZ-FAKE16-NEXT: v_lshrrev_b32_e32 v6, 16, v2
-; GFX11-NSZ-FAKE16-NEXT: v_lshrrev_b32_e32 v7, 16, v3
-; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc_lo
-; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v1, v7, v6, vcc_lo
-; GFX11-NSZ-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
-; GFX11-NSZ-FAKE16-NEXT: v_pk_add_f16 v1, v3, v5
-; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2)
-; GFX11-NSZ-FAKE16-NEXT: v_pk_add_f16 v0, v0, v4
-; GFX11-NSZ-FAKE16-NEXT: s_setpc_b64 s[30:31]
+; GFX11-TRUE16-LABEL: add_select_multi_use_rhs_fabs_fabs_v2f16:
+; GFX11-TRUE16: ; %bb.0:
+; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0x7fff7fff, v2
+; GFX11-TRUE16-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v3
+; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v1
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.l, v2.l, v0.l, vcc_lo
+; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.h, v2.h, v0.h, s0
+; GFX11-TRUE16-NEXT: v_pk_add_f16 v1, v2, v5
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2)
+; GFX11-TRUE16-NEXT: v_pk_add_f16 v0, v0, v4
+; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: add_select_multi_use_rhs_fabs_fabs_v2f16:
+; GFX11-FAKE16: ; %bb.0:
+; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v2
+; GFX11-FAKE16-NEXT: v_and_b32_e32 v3, 0x7fff7fff, v3
+; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
+; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v6, 16, v2
+; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v7, 16, v3
+; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc_lo
+; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v1, v7, v6, vcc_lo
+; GFX11-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
+; GFX11-FAKE16-NEXT: v_pk_add_f16 v1, v3, v5
+; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2)
+; GFX11-FAKE16-NEXT: v_pk_add_f16 v0, v0, v4
+; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
%cmp = icmp eq <2 x i32> %c, zeroinitializer
%fabs.x = call <2 x half> @llvm.fabs.v2f16(<2 x half> %x)
%fabs.y = call <2 x half> @llvm.fabs.v2f16(<2 x half> %y)
@@ -597,63 +457,34 @@ define <2 x half> @add_select_fabs_var_v2f16(<2 x i32> %c, <2 x half> %x, <2 x h
; GFX9-NEXT: v_pk_add_f16 v0, v0, v4
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
-; GFX11-SAFE-TRUE16-LABEL: add_select_fabs_var_v2f16:
-; GFX11-SAFE-TRUE16: ; %bb.0:
-; GFX11-SAFE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-SAFE-TRUE16-NEXT: v_and_b32_e32 v1, 0x7fff7fff, v2
-; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v0
-; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.h, v3.h, v1.h, vcc_lo
-; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.l, v3.l, v1.l, s0
-; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-SAFE-TRUE16-NEXT: v_pk_add_f16 v0, v0, v4
-; GFX11-SAFE-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-SAFE-FAKE16-LABEL: add_select_fabs_var_v2f16:
-; GFX11-SAFE-FAKE16: ; %bb.0:
-; GFX11-SAFE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-FAKE16-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v2
-; GFX11-SAFE-FAKE16-NEXT: v_lshrrev_b32_e32 v5, 16, v3
-; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-SAFE-FAKE16-NEXT: v_lshrrev_b32_e32 v6, 16, v2
-; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v1, v5, v6, vcc_lo
-; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc_lo
-; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-SAFE-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
-; GFX11-SAFE-FAKE16-NEXT: v_pk_add_f16 v0, v0, v4
-; GFX11-SAFE-FAKE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-TRUE16-LABEL: add_select_fabs_var_v2f16:
-; GFX11-NSZ-TRUE16: ; %bb.0:
-; GFX11-NSZ-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-NSZ-TRUE16-NEXT: v_and_b32_e32 v1, 0x7fff7fff, v2
-; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v0
-; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.h, v3.h, v1.h, vcc_lo
-; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.l, v3.l, v1.l, s0
-; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NSZ-TRUE16-NEXT: v_pk_add_f16 v0, v0, v4
-; GFX11-NSZ-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-FAKE16-LABEL: add_select_fabs_var_v2f16:
-; GFX11-NSZ-FAKE16: ; %bb.0:
-; GFX11-NSZ-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-FAKE16-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v2
-; GFX11-NSZ-FAKE16-NEXT: v_lshrrev_b32_e32 v5, 16, v3
-; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NSZ-FAKE16-NEXT: v_lshrrev_b32_e32 v6, 16, v2
-; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v1, v5, v6, vcc_lo
-; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc_lo
-; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NSZ-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
-; GFX11-NSZ-FAKE16-NEXT: v_pk_add_f16 v0, v0, v4
-; GFX11-NSZ-FAKE16-NEXT: s_setpc_b64 s[30:31]
+; GFX11-TRUE16-LABEL: add_select_fabs_var_v2f16:
+; GFX11-TRUE16: ; %bb.0:
+; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-TRUE16-NEXT: v_and_b32_e32 v1, 0x7fff7fff, v2
+; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v0
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.h, v3.h, v1.h, vcc_lo
+; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.l, v3.l, v1.l, s0
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-TRUE16-NEXT: v_pk_add_f16 v0, v0, v4
+; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: add_select_fabs_var_v2f16:
+; GFX11-FAKE16: ; %bb.0:
+; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v2
+; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v5, 16, v3
+; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v6, 16, v2
+; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v1, v5, v6, vcc_lo
+; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc_lo
+; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
+; GFX11-FAKE16-NEXT: v_pk_add_f16 v0, v0, v4
+; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
%cmp = icmp eq <2 x i32> %c, zeroinitializer
%fabs.x = call <2 x half> @llvm.fabs.v2f16(<2 x half> %x)
%select = select <2 x i1> %cmp, <2 x half> %fabs.x, <2 x half> %y
@@ -709,61 +540,33 @@ define <2 x half> @add_select_fabs_negk_v2f16(<2 x i32> %c, <2 x half> %x, <2 x
; GFX9-NEXT: v_pk_add_f16 v0, v0, v3
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
-; GFX11-SAFE-TRUE16-LABEL: add_select_fabs_negk_v2f16:
-; GFX11-SAFE-TRUE16: ; %bb.0:
-; GFX11-SAFE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-SAFE-TRUE16-NEXT: v_and_b32_e32 v0, 0x7fff7fff, v2
-; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v1
-; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.l, 0xbc00, v0.l, vcc_lo
-; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.h, 0xbc00, v0.h, s0
-; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-SAFE-TRUE16-NEXT: v_pk_add_f16 v0, v0, v3
-; GFX11-SAFE-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-SAFE-FAKE16-LABEL: add_select_fabs_negk_v2f16:
-; GFX11-SAFE-FAKE16: ; %bb.0:
-; GFX11-SAFE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-FAKE16-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v2
-; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3)
-; GFX11-SAFE-FAKE16-NEXT: v_lshrrev_b32_e32 v4, 16, v2
-; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0xbc00, v2, vcc_lo
-; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0xbc00, v4, vcc_lo
-; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-SAFE-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
-; GFX11-SAFE-FAKE16-NEXT: v_pk_add_f16 v0, v0, v3
-; GFX11-SAFE-FAKE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-TRUE16-LABEL: add_select_fabs_negk_v2f16:
-; GFX11-NSZ-TRUE16: ; %bb.0:
-; GFX11-NSZ-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-NSZ-TRUE16-NEXT: v_and_b32_e32 v0, 0x7fff7fff, v2
-; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v1
-; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.l, 0xbc00, v0.l, vcc_lo
-; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.h, 0xbc00, v0.h, s0
-; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NSZ-TRUE16-NEXT: v_pk_add_f16 v0, v0, v3
-; GFX11-NSZ-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-FAKE16-LABEL: add_select_fabs_negk_v2f16:
-; GFX11-NSZ-FAKE16: ; %bb.0:
-; GFX11-NSZ-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-FAKE16-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v2
-; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3)
-; GFX11-NSZ-FAKE16-NEXT: v_lshrrev_b32_e32 v4, 16, v2
-; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0xbc00, v2, vcc_lo
-; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0xbc00, v4, vcc_lo
-; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NSZ-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
-; GFX11-NSZ-FAKE16-NEXT: v_pk_add_f16 v0, v0, v3
-; GFX11-NSZ-FAKE16-NEXT: s_setpc_b64 s[30:31]
+; GFX11-TRUE16-LABEL: add_select_fabs_negk_v2f16:
+; GFX11-TRUE16: ; %bb.0:
+; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0x7fff7fff, v2
+; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v1
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.l, 0xbc00, v0.l, vcc_lo
+; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.h, 0xbc00, v0.h, s0
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-TRUE16-NEXT: v_pk_add_f16 v0, v0, v3
+; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: add_select_fabs_negk_v2f16:
+; GFX11-FAKE16: ; %bb.0:
+; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v2
+; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3)
+; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v4, 16, v2
+; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0xbc00, v2, vcc_lo
+; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0xbc00, v4, vcc_lo
+; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
+; GFX11-FAKE16-NEXT: v_pk_add_f16 v0, v0, v3
+; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
%cmp = icmp eq <2 x i32> %c, zeroinitializer
%fabs = call <2 x half> @llvm.fabs.v2f16(<2 x half> %x)
%select = select <2 x i1> %cmp, <2 x half> %fabs, <2 x half> <half -1.0, half -1.0>
@@ -815,61 +618,33 @@ define <2 x half> @add_select_fabs_negk_negk_v2f16(<2 x i32> %c, <2 x half> %x)
; GFX9-NEXT: v_pk_add_f16 v0, v2, v0 neg_lo:[0,1] neg_hi:[0,1]
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
-; GFX11-SAFE-TRUE16-LABEL: add_select_fabs_negk_negk_v2f16:
-; GFX11-SAFE-TRUE16: ; %bb.0:
-; GFX11-SAFE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-TRUE16-NEXT: v_mov_b16_e32 v3.l, 0xbc00
-; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v0
-; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.l, v3.l, 0xc000, vcc_lo
-; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.h, v3.l, 0xc000, s0
-; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-SAFE-TRUE16-NEXT: v_pack_b32_f16 v0, v0.h, v0.l
-; GFX11-SAFE-TRUE16-NEXT: v_pk_add_f16 v0, v2, v0 neg_lo:[0,1] neg_hi:[0,1]
-; GFX11-SAFE-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-SAFE-FAKE16-LABEL: add_select_fabs_negk_negk_v2f16:
-; GFX11-SAFE-FAKE16: ; %bb.0:
-; GFX11-SAFE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-FAKE16-NEXT: v_mov_b32_e32 v3, 0xc000
-; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1)
-; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0xbc00, v3, vcc_lo
-; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0xbc00, v3, vcc_lo
-; GFX11-SAFE-FAKE16-NEXT: v_pack_b32_f16 v0, v0, v1
-; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-SAFE-FAKE16-NEXT: v_pk_add_f16 v0, v2, v0 neg_lo:[0,1] neg_hi:[0,1]
-; GFX11-SAFE-FAKE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-TRUE16-LABEL: add_select_fabs_negk_negk_v2f16:
-; GFX11-NSZ-TRUE16: ; %bb.0:
-; GFX11-NSZ-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-TRUE16-NEXT: v_mov_b16_e32 v3.l, 0xbc00
-; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v0
-; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.l, v3.l, 0xc000, vcc_lo
-; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.h, v3.l, 0xc000, s0
-; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NSZ-TRUE16-NEXT: v_pack_b32_f16 v0, v0.h, v0.l
-; GFX11-NSZ-TRUE16-NEXT: v_pk_add_f16 v0, v2, v0 neg_lo:[0,1] neg_hi:[0,1]
-; GFX11-NSZ-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-FAKE16-LABEL: add_select_fabs_negk_negk_v2f16:
-; GFX11-NSZ-FAKE16: ; %bb.0:
-; GFX11-NSZ-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-FAKE16-NEXT: v_mov_b32_e32 v3, 0xc000
-; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1)
-; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0xbc00, v3, vcc_lo
-; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0xbc00, v3, vcc_lo
-; GFX11-NSZ-FAKE16-NEXT: v_pack_b32_f16 v0, v0, v1
-; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NSZ-FAKE16-NEXT: v_pk_add_f16 v0, v2, v0 neg_lo:[0,1] neg_hi:[0,1]
-; GFX11-NSZ-FAKE16-NEXT: s_setpc_b64 s[30:31]
+; GFX11-TRUE16-LABEL: add_select_fabs_negk_negk_v2f16:
+; GFX11-TRUE16: ; %bb.0:
+; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT: v_mov_b16_e32 v3.l, 0xbc00
+; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v0
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.l, v3.l, 0xc000, vcc_lo
+; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.h, v3.l, 0xc000, s0
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-TRUE16-NEXT: v_pack_b32_f16 v0, v0.h, v0.l
+; GFX11-TRUE16-NEXT: v_pk_add_f16 v0, v2, v0 neg_lo:[0,1] neg_hi:[0,1]
+; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: add_select_fabs_negk_negk_v2f16:
+; GFX11-FAKE16: ; %bb.0:
+; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT: v_mov_b32_e32 v3, 0xc000
+; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1)
+; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0xbc00, v3, vcc_lo
+; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0xbc00, v3, vcc_lo
+; GFX11-FAKE16-NEXT: v_pack_b32_f16 v0, v0, v1
+; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-FAKE16-NEXT: v_pk_add_f16 v0, v2, v0 neg_lo:[0,1] neg_hi:[0,1]
+; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
%cmp = icmp eq <2 x i32> %c, zeroinitializer
%select = select <2 x i1> %cmp, <2 x half> <half -2.0, half -2.0>, <2 x half> <half -1.0, half -1.0>
%fabs = call <2 x half> @llvm.fabs.v2f16(<2 x half> %select)
@@ -920,61 +695,33 @@ define <2 x half> @add_select_posk_posk_v2f16(<2 x i32> %c, <2 x half> %x) {
; GFX9-NEXT: v_pk_add_f16 v0, v0, v2
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
-; GFX11-SAFE-TRUE16-LABEL: add_select_posk_posk_v2f16:
-; GFX11-SAFE-TRUE16: ; %bb.0:
-; GFX11-SAFE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-TRUE16-NEXT: v_mov_b16_e32 v3.l, 0x3c00
-; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v0
-; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.l, v3.l, 0x4000, vcc_lo
-; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.h, v3.l, 0x4000, s0
-; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-SAFE-TRUE16-NEXT: v_pack_b32_f16 v0, v0.h, v0.l
-; GFX11-SAFE-TRUE16-NEXT: v_pk_add_f16 v0, v0, v2
-; GFX11-SAFE-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-SAFE-FAKE16-LABEL: add_select_posk_posk_v2f16:
-; GFX11-SAFE-FAKE16: ; %bb.0:
-; GFX11-SAFE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-FAKE16-NEXT: v_mov_b32_e32 v3, 0x4000
-; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1)
-; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0x3c00, v3, vcc_lo
-; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0x3c00, v3, vcc_lo
-; GFX11-SAFE-FAKE16-NEXT: v_pack_b32_f16 v0, v0, v1
-; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-SAFE-FAKE16-NEXT: v_pk_add_f16 v0, v0, v2
-; GFX11-SAFE-FAKE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-TRUE16-LABEL: add_select_posk_posk_v2f16:
-; GFX11-NSZ-TRUE16: ; %bb.0:
-; GFX11-NSZ-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-TRUE16-NEXT: v_mov_b16_e32 v3.l, 0x3c00
-; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v0
-; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.l, v3.l, 0x4000, vcc_lo
-; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.h, v3.l, 0x4000, s0
-; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NSZ-TRUE16-NEXT: v_pack_b32_f16 v0, v0.h, v0.l
-; GFX11-NSZ-TRUE16-NEXT: v_pk_add_f16 v0, v0, v2
-; GFX11-NSZ-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-FAKE16-LABEL: add_select_posk_posk_v2f16:
-; GFX11-NSZ-FAKE16: ; %bb.0:
-; GFX11-NSZ-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-FAKE16-NEXT: v_mov_b32_e32 v3, 0x4000
-; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1)
-; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0x3c00, v3, vcc_lo
-; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0x3c00, v3, vcc_lo
-; GFX11-NSZ-FAKE16-NEXT: v_pack_b32_f16 v0, v0, v1
-; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NSZ-FAKE16-NEXT: v_pk_add_f16 v0, v0, v2
-; GFX11-NSZ-FAKE16-NEXT: s_setpc_b64 s[30:31]
+; GFX11-TRUE16-LABEL: add_select_posk_posk_v2f16:
+; GFX11-TRUE16: ; %bb.0:
+; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT: v_mov_b16_e32 v3.l, 0x3c00
+; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v0
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.l, v3.l, 0x4000, vcc_lo
+; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.h, v3.l, 0x4000, s0
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-TRUE16-NEXT: v_pack_b32_f16 v0, v0.h, v0.l
+; GFX11-TRUE16-NEXT: v_pk_add_f16 v0, v0, v2
+; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: add_select_posk_posk_v2f16:
+; GFX11-FAKE16: ; %bb.0:
+; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT: v_mov_b32_e32 v3, 0x4000
+; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1)
+; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0x3c00, v3, vcc_lo
+; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0x3c00, v3, vcc_lo
+; GFX11-FAKE16-NEXT: v_pack_b32_f16 v0, v0, v1
+; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-FAKE16-NEXT: v_pk_add_f16 v0, v0, v2
+; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
%cmp = icmp eq <2 x i32> %c, zeroinitializer
%select = select <2 x i1> %cmp, <2 x half> <half 2.0, half 2.0>, <2 x half> <half 1.0, half 1.0>
%add = fadd <2 x half> %select, %x
@@ -1029,61 +776,33 @@ define <2 x half> @add_select_negk_fabs_v2f16(<2 x i32> %c, <2 x half> %x, <2 x
; GFX9-NEXT: v_pk_add_f16 v0, v0, v3
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
-; GFX11-SAFE-TRUE16-LABEL: add_select_negk_fabs_v2f16:
-; GFX11-SAFE-TRUE16: ; %bb.0:
-; GFX11-SAFE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-TRUE16-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v2
-; GFX11-SAFE-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0
-; GFX11-SAFE-TRUE16-NEXT: v_cmp_ne_u32_e64 s0, 0, v1
-; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.l, 0xbc00, v2.l, vcc_lo
-; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.h, 0xbc00, v2.h, s0
-; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-SAFE-TRUE16-NEXT: v_pk_add_f16 v0, v0, v3
-; GFX11-SAFE-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-SAFE-FAKE16-LABEL: add_select_negk_fabs_v2f16:
-; GFX11-SAFE-FAKE16: ; %bb.0:
-; GFX11-SAFE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-FAKE16-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v2
-; GFX11-SAFE-FAKE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0
-; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3)
-; GFX11-SAFE-FAKE16-NEXT: v_lshrrev_b32_e32 v4, 16, v2
-; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0xbc00, v2, vcc_lo
-; GFX11-SAFE-FAKE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v1
-; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0xbc00, v4, vcc_lo
-; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-SAFE-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
-; GFX11-SAFE-FAKE16-NEXT: v_pk_add_f16 v0, v0, v3
-; GFX11-SAFE-FAKE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-TRUE16-LABEL: add_select_negk_fabs_v2f16:
-; GFX11-NSZ-TRUE16: ; %bb.0:
-; GFX11-NSZ-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-TRUE16-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v2
-; GFX11-NSZ-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0
-; GFX11-NSZ-TRUE16-NEXT: v_cmp_ne_u32_e64 s0, 0, v1
-; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.l, 0xbc00, v2.l, vcc_lo
-; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.h, 0xbc00, v2.h, s0
-; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NSZ-TRUE16-NEXT: v_pk_add_f16 v0, v0, v3
-; GFX11-NSZ-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-FAKE16-LABEL: add_select_negk_fabs_v2f16:
-; GFX11-NSZ-FAKE16: ; %bb.0:
-; GFX11-NSZ-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-FAKE16-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v2
-; GFX11-NSZ-FAKE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0
-; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3)
-; GFX11-NSZ-FAKE16-NEXT: v_lshrrev_b32_e32 v4, 16, v2
-; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0xbc00, v2, vcc_lo
-; GFX11-NSZ-FAKE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v1
-; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0xbc00, v4, vcc_lo
-; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NSZ-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
-; GFX11-NSZ-FAKE16-NEXT: v_pk_add_f16 v0, v0, v3
-; GFX11-NSZ-FAKE16-NEXT: s_setpc_b64 s[30:31]
+; GFX11-TRUE16-LABEL: add_select_negk_fabs_v2f16:
+; GFX11-TRUE16: ; %bb.0:
+; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v2
+; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0
+; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e64 s0, 0, v1
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.l, 0xbc00, v2.l, vcc_lo
+; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.h, 0xbc00, v2.h, s0
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-TRUE16-NEXT: v_pk_add_f16 v0, v0, v3
+; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: add_select_negk_fabs_v2f16:
+; GFX11-FAKE16: ; %bb.0:
+; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v2
+; GFX11-FAKE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0
+; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3)
+; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v4, 16, v2
+; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0xbc00, v2, vcc_lo
+; GFX11-FAKE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v1
+; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0xbc00, v4, vcc_lo
+; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
+; GFX11-FAKE16-NEXT: v_pk_add_f16 v0, v0, v3
+; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
%cmp = icmp eq <2 x i32> %c, zeroinitializer
%fabs = call <2 x half> @llvm.fabs.v2f16(<2 x half> %x)
%select = select <2 x i1> %cmp, <2 x half> <half -1.0, half -1.0>, <2 x half> %fabs
@@ -1140,61 +859,33 @@ define <2 x half> @add_select_negliteralk_fabs_v2f16(<2 x i32> %c, <2 x half> %x
; GFX9-NEXT: v_pk_add_f16 v0, v0, v3
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
-; GFX11-SAFE-TRUE16-LABEL: add_select_negliteralk_fabs_v2f16:
-; GFX11-SAFE-TRUE16: ; %bb.0:
-; GFX11-SAFE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-TRUE16-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v2
-; GFX11-SAFE-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0
-; GFX11-SAFE-TRUE16-NEXT: v_cmp_ne_u32_e64 s0, 0, v1
-; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.l, 0xe400, v2.l, vcc_lo
-; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.h, 0xe400, v2.h, s0
-; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-SAFE-TRUE16-NEXT: v_pk_add_f16 v0, v0, v3
-; GFX11-SAFE-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-SAFE-FAKE16-LABEL: add_select_negliteralk_fabs_v2f16:
-; GFX11-SAFE-FAKE16: ; %bb.0:
-; GFX11-SAFE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-FAKE16-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v2
-; GFX11-SAFE-FAKE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0
-; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3)
-; GFX11-SAFE-FAKE16-NEXT: v_lshrrev_b32_e32 v4, 16, v2
-; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0xe400, v2, vcc_lo
-; GFX11-SAFE-FAKE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v1
-; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0xe400, v4, vcc_lo
-; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-SAFE-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
-; GFX11-SAFE-FAKE16-NEXT: v_pk_add_f16 v0, v0, v3
-; GFX11-SAFE-FAKE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-TRUE16-LABEL: add_select_negliteralk_fabs_v2f16:
-; GFX11-NSZ-TRUE16: ; %bb.0:
-; GFX11-NSZ-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-TRUE16-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v2
-; GFX11-NSZ-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0
-; GFX11-NSZ-TRUE16-NEXT: v_cmp_ne_u32_e64 s0, 0, v1
-; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.l, 0xe400, v2.l, vcc_lo
-; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.h, 0xe400, v2.h, s0
-; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NSZ-TRUE16-NEXT: v_pk_add_f16 v0, v0, v3
-; GFX11-NSZ-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-FAKE16-LABEL: add_select_negliteralk_fabs_v2f16:
-; GFX11-NSZ-FAKE16: ; %bb.0:
-; GFX11-NSZ-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-FAKE16-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v2
-; GFX11-NSZ-FAKE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0
-; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3)
-; GFX11-NSZ-FAKE16-NEXT: v_lshrrev_b32_e32 v4, 16, v2
-; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0xe400, v2, vcc_lo
-; GFX11-NSZ-FAKE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v1
-; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0xe400, v4, vcc_lo
-; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NSZ-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
-; GFX11-NSZ-FAKE16-NEXT: v_pk_add_f16 v0, v0, v3
-; GFX11-NSZ-FAKE16-NEXT: s_setpc_b64 s[30:31]
+; GFX11-TRUE16-LABEL: add_select_negliteralk_fabs_v2f16:
+; GFX11-TRUE16: ; %bb.0:
+; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v2
+; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0
+; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e64 s0, 0, v1
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.l, 0xe400, v2.l, vcc_lo
+; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.h, 0xe400, v2.h, s0
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-TRUE16-NEXT: v_pk_add_f16 v0, v0, v3
+; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: add_select_negliteralk_fabs_v2f16:
+; GFX11-FAKE16: ; %bb.0:
+; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v2
+; GFX11-FAKE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0
+; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3)
+; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v4, 16, v2
+; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0xe400, v2, vcc_lo
+; GFX11-FAKE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v1
+; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0xe400, v4, vcc_lo
+; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
+; GFX11-FAKE16-NEXT: v_pk_add_f16 v0, v0, v3
+; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
%cmp = icmp eq <2 x i32> %c, zeroinitializer
%fabs = call <2 x half> @llvm.fabs.v2f16(<2 x half> %x)
%select = select <2 x i1> %cmp, <2 x half> <half -1024.0, half -1024.0>, <2 x half> %fabs
@@ -1250,61 +941,33 @@ define <2 x half> @add_select_fabs_posk_v2f16(<2 x i32> %c, <2 x half> %x, <2 x
; GFX9-NEXT: v_pk_add_f16 v0, v0, v3
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
-; GFX11-SAFE-TRUE16-LABEL: add_select_fabs_posk_v2f16:
-; GFX11-SAFE-TRUE16: ; %bb.0:
-; GFX11-SAFE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-SAFE-TRUE16-NEXT: v_and_b32_e32 v0, 0x7fff7fff, v2
-; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v1
-; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.l, 0x3c00, v0.l, vcc_lo
-; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.h, 0x3c00, v0.h, s0
-; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-SAFE-TRUE16-NEXT: v_pk_add_f16 v0, v0, v3
-; GFX11-SAFE-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-SAFE-FAKE16-LABEL: add_select_fabs_posk_v2f16:
-; GFX11-SAFE-FAKE16: ; %bb.0:
-; GFX11-SAFE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-FAKE16-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v2
-; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3)
-; GFX11-SAFE-FAKE16-NEXT: v_lshrrev_b32_e32 v4, 16, v2
-; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0x3c00, v2, vcc_lo
-; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0x3c00, v4, vcc_lo
-; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-SAFE-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
-; GFX11-SAFE-FAKE16-NEXT: v_pk_add_f16 v0, v0, v3
-; GFX11-SAFE-FAKE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-TRUE16-LABEL: add_select_fabs_posk_v2f16:
-; GFX11-NSZ-TRUE16: ; %bb.0:
-; GFX11-NSZ-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-NSZ-TRUE16-NEXT: v_and_b32_e32 v0, 0x7fff7fff, v2
-; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v1
-; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.l, 0x3c00, v0.l, vcc_lo
-; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.h, 0x3c00, v0.h, s0
-; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NSZ-TRUE16-NEXT: v_pk_add_f16 v0, v0, v3
-; GFX11-NSZ-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-FAKE16-LABEL: add_select_fabs_posk_v2f16:
-; GFX11-NSZ-FAKE16: ; %bb.0:
-; GFX11-NSZ-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-FAKE16-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v2
-; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3)
-; GFX11-NSZ-FAKE16-NEXT: v_lshrrev_b32_e32 v4, 16, v2
-; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0x3c00, v2, vcc_lo
-; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0x3c00, v4, vcc_lo
-; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NSZ-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
-; GFX11-NSZ-FAKE16-NEXT: v_pk_add_f16 v0, v0, v3
-; GFX11-NSZ-FAKE16-NEXT: s_setpc_b64 s[30:31]
+; GFX11-TRUE16-LABEL: add_select_fabs_posk_v2f16:
+; GFX11-TRUE16: ; %bb.0:
+; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0x7fff7fff, v2
+; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v1
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.l, 0x3c00, v0.l, vcc_lo
+; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.h, 0x3c00, v0.h, s0
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-TRUE16-NEXT: v_pk_add_f16 v0, v0, v3
+; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: add_select_fabs_posk_v2f16:
+; GFX11-FAKE16: ; %bb.0:
+; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v2
+; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3)
+; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v4, 16, v2
+; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0x3c00, v2, vcc_lo
+; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0x3c00, v4, vcc_lo
+; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
+; GFX11-FAKE16-NEXT: v_pk_add_f16 v0, v0, v3
+; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
%cmp = icmp eq <2 x i32> %c, zeroinitializer
%fabs = call <2 x half> @llvm.fabs.v2f16(<2 x half> %x)
%select = select <2 x i1> %cmp, <2 x half> %fabs, <2 x half> <half 1.0, half 1.0>
@@ -1360,61 +1023,33 @@ define <2 x half> @add_select_posk_fabs_v2f16(<2 x i32> %c, <2 x half> %x, <2 x
; GFX9-NEXT: v_pk_add_f16 v0, v0, v3
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
-; GFX11-SAFE-TRUE16-LABEL: add_select_posk_fabs_v2f16:
-; GFX11-SAFE-TRUE16: ; %bb.0:
-; GFX11-SAFE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-TRUE16-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v2
-; GFX11-SAFE-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0
-; GFX11-SAFE-TRUE16-NEXT: v_cmp_ne_u32_e64 s0, 0, v1
-; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.l, 0x3c00, v2.l, vcc_lo
-; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.h, 0x3c00, v2.h, s0
-; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-SAFE-TRUE16-NEXT: v_pk_add_f16 v0, v0, v3
-; GFX11-SAFE-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-SAFE-FAKE16-LABEL: add_select_posk_fabs_v2f16:
-; GFX11-SAFE-FAKE16: ; %bb.0:
-; GFX11-SAFE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-FAKE16-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v2
-; GFX11-SAFE-FAKE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0
-; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3)
-; GFX11-SAFE-FAKE16-NEXT: v_lshrrev_b32_e32 v4, 16, v2
-; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0x3c00, v2, vcc_lo
-; GFX11-SAFE-FAKE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v1
-; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0x3c00, v4, vcc_lo
-; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-SAFE-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
-; GFX11-SAFE-FAKE16-NEXT: v_pk_add_f16 v0, v0, v3
-; GFX11-SAFE-FAKE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-TRUE16-LABEL: add_select_posk_fabs_v2f16:
-; GFX11-NSZ-TRUE16: ; %bb.0:
-; GFX11-NSZ-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-TRUE16-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v2
-; GFX11-NSZ-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0
-; GFX11-NSZ-TRUE16-NEXT: v_cmp_ne_u32_e64 s0, 0, v1
-; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.l, 0x3c00, v2.l, vcc_lo
-; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.h, 0x3c00, v2.h, s0
-; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NSZ-TRUE16-NEXT: v_pk_add_f16 v0, v0, v3
-; GFX11-NSZ-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-FAKE16-LABEL: add_select_posk_fabs_v2f16:
-; GFX11-NSZ-FAKE16: ; %bb.0:
-; GFX11-NSZ-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-FAKE16-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v2
-; GFX11-NSZ-FAKE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0
-; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3)
-; GFX11-NSZ-FAKE16-NEXT: v_lshrrev_b32_e32 v4, 16, v2
-; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0x3c00, v2, vcc_lo
-; GFX11-NSZ-FAKE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v1
-; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0x3c00, v4, vcc_lo
-; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NSZ-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
-; GFX11-NSZ-FAKE16-NEXT: v_pk_add_f16 v0, v0, v3
-; GFX11-NSZ-FAKE16-NEXT: s_setpc_b64 s[30:31]
+; GFX11-TRUE16-LABEL: add_select_posk_fabs_v2f16:
+; GFX11-TRUE16: ; %bb.0:
+; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v2
+; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0
+; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e64 s0, 0, v1
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.l, 0x3c00, v2.l, vcc_lo
+; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.h, 0x3c00, v2.h, s0
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-TRUE16-NEXT: v_pk_add_f16 v0, v0, v3
+; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: add_select_posk_fabs_v2f16:
+; GFX11-FAKE16: ; %bb.0:
+; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v2
+; GFX11-FAKE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0
+; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3)
+; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v4, 16, v2
+; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0x3c00, v2, vcc_lo
+; GFX11-FAKE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v1
+; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0x3c00, v4, vcc_lo
+; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
+; GFX11-FAKE16-NEXT: v_pk_add_f16 v0, v0, v3
+; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
%cmp = icmp eq <2 x i32> %c, zeroinitializer
%fabs = call <2 x half> @llvm.fabs.v2f16(<2 x half> %x)
%select = select <2 x i1> %cmp, <2 x half> <half 1.0, half 1.0>, <2 x half> %fabs
@@ -1470,57 +1105,31 @@ define <2 x half> @add_select_fneg_fneg_v2f16(<2 x i32> %c, <2 x half> %x, <2 x
; GFX9-NEXT: v_pk_add_f16 v0, v4, v0 neg_lo:[0,1] neg_hi:[0,1]
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
-; GFX11-SAFE-TRUE16-LABEL: add_select_fneg_fneg_v2f16:
-; GFX11-SAFE-TRUE16: ; %bb.0:
-; GFX11-SAFE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v0
-; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.h, v3.h, v2.h, vcc_lo
-; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.l, v3.l, v2.l, s0
-; GFX11-SAFE-TRUE16-NEXT: v_pk_add_f16 v0, v4, v0 neg_lo:[0,1] neg_hi:[0,1]
-; GFX11-SAFE-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-SAFE-FAKE16-LABEL: add_select_fneg_fneg_v2f16:
-; GFX11-SAFE-FAKE16: ; %bb.0:
-; GFX11-SAFE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-FAKE16-NEXT: v_lshrrev_b32_e32 v5, 16, v2
-; GFX11-SAFE-FAKE16-NEXT: v_lshrrev_b32_e32 v6, 16, v3
-; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1)
-; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v1, v6, v5, vcc_lo
-; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc_lo
-; GFX11-SAFE-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
-; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-SAFE-FAKE16-NEXT: v_pk_add_f16 v0, v4, v0 neg_lo:[0,1] neg_hi:[0,1]
-; GFX11-SAFE-FAKE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-TRUE16-LABEL: add_select_fneg_fneg_v2f16:
-; GFX11-NSZ-TRUE16: ; %bb.0:
-; GFX11-NSZ-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v0
-; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.h, v3.h, v2.h, vcc_lo
-; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.l, v3.l, v2.l, s0
-; GFX11-NSZ-TRUE16-NEXT: v_pk_add_f16 v0, v4, v0 neg_lo:[0,1] neg_hi:[0,1]
-; GFX11-NSZ-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-FAKE16-LABEL: add_select_fneg_fneg_v2f16:
-; GFX11-NSZ-FAKE16: ; %bb.0:
-; GFX11-NSZ-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-FAKE16-NEXT: v_lshrrev_b32_e32 v5, 16, v2
-; GFX11-NSZ-FAKE16-NEXT: v_lshrrev_b32_e32 v6, 16, v3
-; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1)
-; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v1, v6, v5, vcc_lo
-; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc_lo
-; GFX11-NSZ-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
-; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NSZ-FAKE16-NEXT: v_pk_add_f16 v0, v4, v0 neg_lo:[0,1] neg_hi:[0,1]
-; GFX11-NSZ-FAKE16-NEXT: s_setpc_b64 s[30:31]
+; GFX11-TRUE16-LABEL: add_select_fneg_fneg_v2f16:
+; GFX11-TRUE16: ; %bb.0:
+; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v0
+; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.h, v3.h, v2.h, vcc_lo
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.l, v3.l, v2.l, s0
+; GFX11-TRUE16-NEXT: v_pk_add_f16 v0, v4, v0 neg_lo:[0,1] neg_hi:[0,1]
+; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: add_select_fneg_fneg_v2f16:
+; GFX11-FAKE16: ; %bb.0:
+; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v5, 16, v2
+; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v6, 16, v3
+; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1)
+; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v1, v6, v5, vcc_lo
+; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc_lo
+; GFX11-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
+; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-FAKE16-NEXT: v_pk_add_f16 v0, v4, v0 neg_lo:[0,1] neg_hi:[0,1]
+; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
%cmp = icmp eq <2 x i32> %c, zeroinitializer
%fneg.x = fneg <2 x half> %x
%fneg.y = fneg <2 x half> %y
@@ -1587,61 +1196,33 @@ define { <2 x half>, <2 x half> } @add_select_multi_use_lhs_fneg_fneg_v2f16(<2 x
; GFX9-NEXT: v_pk_add_f16 v1, v5, v2 neg_lo:[0,1] neg_hi:[0,1]
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
-; GFX11-SAFE-TRUE16-LABEL: add_select_multi_use_lhs_fneg_fneg_v2f16:
-; GFX11-SAFE-TRUE16: ; %bb.0:
-; GFX11-SAFE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v0
-; GFX11-SAFE-TRUE16-NEXT: v_pk_add_f16 v1, v5, v2 neg_lo:[0,1] neg_hi:[0,1]
-; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.h, v3.h, v2.h, vcc_lo
-; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.l, v3.l, v2.l, s0
-; GFX11-SAFE-TRUE16-NEXT: v_pk_add_f16 v0, v4, v0 neg_lo:[0,1] neg_hi:[0,1]
-; GFX11-SAFE-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-SAFE-FAKE16-LABEL: add_select_multi_use_lhs_fneg_fneg_v2f16:
-; GFX11-SAFE-FAKE16: ; %bb.0:
-; GFX11-SAFE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-SAFE-FAKE16-NEXT: v_lshrrev_b32_e32 v6, 16, v2
-; GFX11-SAFE-FAKE16-NEXT: v_lshrrev_b32_e32 v7, 16, v3
-; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc_lo
-; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v1, v7, v6, vcc_lo
-; GFX11-SAFE-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
-; GFX11-SAFE-FAKE16-NEXT: v_pk_add_f16 v1, v5, v2 neg_lo:[0,1] neg_hi:[0,1]
-; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2)
-; GFX11-SAFE-FAKE16-NEXT: v_pk_add_f16 v0, v4, v0 neg_lo:[0,1] neg_hi:[0,1]
-; GFX11-SAFE-FAKE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-TRUE16-LABEL: add_select_multi_use_lhs_fneg_fneg_v2f16:
-; GFX11-NSZ-TRUE16: ; %bb.0:
-; GFX11-NSZ-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v0
-; GFX11-NSZ-TRUE16-NEXT: v_pk_add_f16 v1, v5, v2 neg_lo:[0,1] neg_hi:[0,1]
-; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.h, v3.h, v2.h, vcc_lo
-; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.l, v3.l, v2.l, s0
-; GFX11-NSZ-TRUE16-NEXT: v_pk_add_f16 v0, v4, v0 neg_lo:[0,1] neg_hi:[0,1]
-; GFX11-NSZ-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-FAKE16-LABEL: add_select_multi_use_lhs_fneg_fneg_v2f16:
-; GFX11-NSZ-FAKE16: ; %bb.0:
-; GFX11-NSZ-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-NSZ-FAKE16-NEXT: v_lshrrev_b32_e32 v6, 16, v2
-; GFX11-NSZ-FAKE16-NEXT: v_lshrrev_b32_e32 v7, 16, v3
-; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc_lo
-; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v1, v7, v6, vcc_lo
-; GFX11-NSZ-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
-; GFX11-NSZ-FAKE16-NEXT: v_pk_add_f16 v1, v5, v2 neg_lo:[0,1] neg_hi:[0,1]
-; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2)
-; GFX11-NSZ-FAKE16-NEXT: v_pk_add_f16 v0, v4, v0 neg_lo:[0,1] neg_hi:[0,1]
-; GFX11-NSZ-FAKE16-NEXT: s_setpc_b64 s[30:31]
+; GFX11-TRUE16-LABEL: add_select_multi_use_lhs_fneg_fneg_v2f16:
+; GFX11-TRUE16: ; %bb.0:
+; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v0
+; GFX11-TRUE16-NEXT: v_pk_add_f16 v1, v5, v2 neg_lo:[0,1] neg_hi:[0,1]
+; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.h, v3.h, v2.h, vcc_lo
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.l, v3.l, v2.l, s0
+; GFX11-TRUE16-NEXT: v_pk_add_f16 v0, v4, v0 neg_lo:[0,1] neg_hi:[0,1]
+; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: add_select_multi_use_lhs_fneg_fneg_v2f16:
+; GFX11-FAKE16: ; %bb.0:
+; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v6, 16, v2
+; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v7, 16, v3
+; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc_lo
+; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v1, v7, v6, vcc_lo
+; GFX11-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
+; GFX11-FAKE16-NEXT: v_pk_add_f16 v1, v5, v2 neg_lo:[0,1] neg_hi:[0,1]
+; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2)
+; GFX11-FAKE16-NEXT: v_pk_add_f16 v0, v4, v0 neg_lo:[0,1] neg_hi:[0,1]
+; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
%cmp = icmp eq <2 x i32> %c, zeroinitializer
%fneg.x = fneg <2 x half> %x
%fneg.y = fneg <2 x half> %y
@@ -1705,61 +1286,33 @@ define { <2 x half>, <2 x half> } @add_select_multi_store_use_lhs_fneg_fneg_v2f1
; GFX9-NEXT: v_pk_add_f16 v0, v4, v0 neg_lo:[0,1] neg_hi:[0,1]
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
-; GFX11-SAFE-TRUE16-LABEL: add_select_multi_store_use_lhs_fneg_fneg_v2f16:
-; GFX11-SAFE-TRUE16: ; %bb.0:
-; GFX11-SAFE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v0
-; GFX11-SAFE-TRUE16-NEXT: v_xor_b32_e32 v1, 0x80008000, v2
-; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.h, v3.h, v2.h, vcc_lo
-; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.l, v3.l, v2.l, s0
-; GFX11-SAFE-TRUE16-NEXT: v_pk_add_f16 v0, v4, v0 neg_lo:[0,1] neg_hi:[0,1]
-; GFX11-SAFE-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-SAFE-FAKE16-LABEL: add_select_multi_store_use_lhs_fneg_fneg_v2f16:
-; GFX11-SAFE-FAKE16: ; %bb.0:
-; GFX11-SAFE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-SAFE-FAKE16-NEXT: v_lshrrev_b32_e32 v5, 16, v2
-; GFX11-SAFE-FAKE16-NEXT: v_lshrrev_b32_e32 v6, 16, v3
-; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc_lo
-; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v1, v6, v5, vcc_lo
-; GFX11-SAFE-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
-; GFX11-SAFE-FAKE16-NEXT: v_xor_b32_e32 v1, 0x80008000, v2
-; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2)
-; GFX11-SAFE-FAKE16-NEXT: v_pk_add_f16 v0, v4, v0 neg_lo:[0,1] neg_hi:[0,1]
-; GFX11-SAFE-FAKE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-TRUE16-LABEL: add_select_multi_store_use_lhs_fneg_fneg_v2f16:
-; GFX11-NSZ-TRUE16: ; %bb.0:
-; GFX11-NSZ-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v0
-; GFX11-NSZ-TRUE16-NEXT: v_xor_b32_e32 v1, 0x80008000, v2
-; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.h, v3.h, v2.h, vcc_lo
-; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.l, v3.l, v2.l, s0
-; GFX11-NSZ-TRUE16-NEXT: v_pk_add_f16 v0, v4, v0 neg_lo:[0,1] neg_hi:[0,1]
-; GFX11-NSZ-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-FAKE16-LABEL: add_select_multi_store_use_lhs_fneg_fneg_v2f16:
-; GFX11-NSZ-FAKE16: ; %bb.0:
-; GFX11-NSZ-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-NSZ-FAKE16-NEXT: v_lshrrev_b32_e32 v5, 16, v2
-; GFX11-NSZ-FAKE16-NEXT: v_lshrrev_b32_e32 v6, 16, v3
-; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc_lo
-; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v1, v6, v5, vcc_lo
-; GFX11-NSZ-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
-; GFX11-NSZ-FAKE16-NEXT: v_xor_b32_e32 v1, 0x80008000, v2
-; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2)
-; GFX11-NSZ-FAKE16-NEXT: v_pk_add_f16 v0, v4, v0 neg_lo:[0,1] neg_hi:[0,1]
-; GFX11-NSZ-FAKE16-NEXT: s_setpc_b64 s[30:31]
+; GFX11-TRUE16-LABEL: add_select_multi_store_use_lhs_fneg_fneg_v2f16:
+; GFX11-TRUE16: ; %bb.0:
+; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v0
+; GFX11-TRUE16-NEXT: v_xor_b32_e32 v1, 0x80008000, v2
+; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.h, v3.h, v2.h, vcc_lo
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.l, v3.l, v2.l, s0
+; GFX11-TRUE16-NEXT: v_pk_add_f16 v0, v4, v0 neg_lo:[0,1] neg_hi:[0,1]
+; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: add_select_multi_store_use_lhs_fneg_fneg_v2f16:
+; GFX11-FAKE16: ; %bb.0:
+; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v5, 16, v2
+; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v6, 16, v3
+; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc_lo
+; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v1, v6, v5, vcc_lo
+; GFX11-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
+; GFX11-FAKE16-NEXT: v_xor_b32_e32 v1, 0x80008000, v2
+; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2)
+; GFX11-FAKE16-NEXT: v_pk_add_f16 v0, v4, v0 neg_lo:[0,1] neg_hi:[0,1]
+; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
%cmp = icmp eq <2 x i32> %c, zeroinitializer
%fneg.x = fneg <2 x half> %x
%fneg.y = fneg <2 x half> %y
@@ -1828,61 +1381,33 @@ define { <2 x half>, <2 x half> } @add_select_multi_use_rhs_fneg_fneg_v2f16(<2 x
; GFX9-NEXT: v_pk_add_f16 v1, v5, v3 neg_lo:[0,1] neg_hi:[0,1]
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
-; GFX11-SAFE-TRUE16-LABEL: add_select_multi_use_rhs_fneg_fneg_v2f16:
-; GFX11-SAFE-TRUE16: ; %bb.0:
-; GFX11-SAFE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v0
-; GFX11-SAFE-TRUE16-NEXT: v_pk_add_f16 v1, v5, v3 neg_lo:[0,1] neg_hi:[0,1]
-; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.h, v3.h, v2.h, vcc_lo
-; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.l, v3.l, v2.l, s0
-; GFX11-SAFE-TRUE16-NEXT: v_pk_add_f16 v0, v4, v0 neg_lo:[0,1] neg_hi:[0,1]
-; GFX11-SAFE-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-SAFE-FAKE16-LABEL: add_select_multi_use_rhs_fneg_fneg_v2f16:
-; GFX11-SAFE-FAKE16: ; %bb.0:
-; GFX11-SAFE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-SAFE-FAKE16-NEXT: v_lshrrev_b32_e32 v6, 16, v2
-; GFX11-SAFE-FAKE16-NEXT: v_lshrrev_b32_e32 v7, 16, v3
-; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc_lo
-; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v1, v7, v6, vcc_lo
-; GFX11-SAFE-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
-; GFX11-SAFE-FAKE16-NEXT: v_pk_add_f16 v1, v5, v3 neg_lo:[0,1] neg_hi:[0,1]
-; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2)
-; GFX11-SAFE-FAKE16-NEXT: v_pk_add_f16 v0, v4, v0 neg_lo:[0,1] neg_hi:[0,1]
-; GFX11-SAFE-FAKE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-TRUE16-LABEL: add_select_multi_use_rhs_fneg_fneg_v2f16:
-; GFX11-NSZ-TRUE16: ; %bb.0:
-; GFX11-NSZ-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v0
-; GFX11-NSZ-TRUE16-NEXT: v_pk_add_f16 v1, v5, v3 neg_lo:[0,1] neg_hi:[0,1]
-; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.h, v3.h, v2.h, vcc_lo
-; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.l, v3.l, v2.l, s0
-; GFX11-NSZ-TRUE16-NEXT: v_pk_add_f16 v0, v4, v0 neg_lo:[0,1] neg_hi:[0,1]
-; GFX11-NSZ-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-FAKE16-LABEL: add_select_multi_use_rhs_fneg_fneg_v2f16:
-; GFX11-NSZ-FAKE16: ; %bb.0:
-; GFX11-NSZ-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-NSZ-FAKE16-NEXT: v_lshrrev_b32_e32 v6, 16, v2
-; GFX11-NSZ-FAKE16-NEXT: v_lshrrev_b32_e32 v7, 16, v3
-; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc_lo
-; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v1, v7, v6, vcc_lo
-; GFX11-NSZ-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
-; GFX11-NSZ-FAKE16-NEXT: v_pk_add_f16 v1, v5, v3 neg_lo:[0,1] neg_hi:[0,1]
-; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2)
-; GFX11-NSZ-FAKE16-NEXT: v_pk_add_f16 v0, v4, v0 neg_lo:[0,1] neg_hi:[0,1]
-; GFX11-NSZ-FAKE16-NEXT: s_setpc_b64 s[30:31]
+; GFX11-TRUE16-LABEL: add_select_multi_use_rhs_fneg_fneg_v2f16:
+; GFX11-TRUE16: ; %bb.0:
+; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v0
+; GFX11-TRUE16-NEXT: v_pk_add_f16 v1, v5, v3 neg_lo:[0,1] neg_hi:[0,1]
+; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.h, v3.h, v2.h, vcc_lo
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.l, v3.l, v2.l, s0
+; GFX11-TRUE16-NEXT: v_pk_add_f16 v0, v4, v0 neg_lo:[0,1] neg_hi:[0,1]
+; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: add_select_multi_use_rhs_fneg_fneg_v2f16:
+; GFX11-FAKE16: ; %bb.0:
+; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v6, 16, v2
+; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v7, 16, v3
+; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc_lo
+; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v1, v7, v6, vcc_lo
+; GFX11-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
+; GFX11-FAKE16-NEXT: v_pk_add_f16 v1, v5, v3 neg_lo:[0,1] neg_hi:[0,1]
+; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2)
+; GFX11-FAKE16-NEXT: v_pk_add_f16 v0, v4, v0 neg_lo:[0,1] neg_hi:[0,1]
+; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
%cmp = icmp eq <2 x i32> %c, zeroinitializer
%fneg.x = fneg <2 x half> %x
%fneg.y = fneg <2 x half> %y
@@ -1948,63 +1473,34 @@ define <2 x half> @add_select_fneg_var_v2f16(<2 x i32> %c, <2 x half> %x, <2 x h
; GFX9-NEXT: v_pk_add_f16 v0, v0, v4
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
-; GFX11-SAFE-TRUE16-LABEL: add_select_fneg_var_v2f16:
-; GFX11-SAFE-TRUE16: ; %bb.0:
-; GFX11-SAFE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-SAFE-TRUE16-NEXT: v_xor_b32_e32 v1, 0x80008000, v2
-; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v0
-; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.h, v3.h, v1.h, vcc_lo
-; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.l, v3.l, v1.l, s0
-; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-SAFE-TRUE16-NEXT: v_pk_add_f16 v0, v0, v4
-; GFX11-SAFE-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-SAFE-FAKE16-LABEL: add_select_fneg_var_v2f16:
-; GFX11-SAFE-FAKE16: ; %bb.0:
-; GFX11-SAFE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-FAKE16-NEXT: v_xor_b32_e32 v2, 0x80008000, v2
-; GFX11-SAFE-FAKE16-NEXT: v_lshrrev_b32_e32 v5, 16, v3
-; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-SAFE-FAKE16-NEXT: v_lshrrev_b32_e32 v6, 16, v2
-; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v1, v5, v6, vcc_lo
-; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc_lo
-; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-SAFE-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
-; GFX11-SAFE-FAKE16-NEXT: v_pk_add_f16 v0, v0, v4
-; GFX11-SAFE-FAKE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-TRUE16-LABEL: add_select_fneg_var_v2f16:
-; GFX11-NSZ-TRUE16: ; %bb.0:
-; GFX11-NSZ-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-NSZ-TRUE16-NEXT: v_xor_b32_e32 v1, 0x80008000, v2
-; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v0
-; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.h, v3.h, v1.h, vcc_lo
-; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.l, v3.l, v1.l, s0
-; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NSZ-TRUE16-NEXT: v_pk_add_f16 v0, v0, v4
-; GFX11-NSZ-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-FAKE16-LABEL: add_select_fneg_var_v2f16:
-; GFX11-NSZ-FAKE16: ; %bb.0:
-; GFX11-NSZ-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-FAKE16-NEXT: v_xor_b32_e32 v2, 0x80008000, v2
-; GFX11-NSZ-FAKE16-NEXT: v_lshrrev_b32_e32 v5, 16, v3
-; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NSZ-FAKE16-NEXT: v_lshrrev_b32_e32 v6, 16, v2
-; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v1, v5, v6, vcc_lo
-; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc_lo
-; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NSZ-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
-; GFX11-NSZ-FAKE16-NEXT: v_pk_add_f16 v0, v0, v4
-; GFX11-NSZ-FAKE16-NEXT: s_setpc_b64 s[30:31]
+; GFX11-TRUE16-LABEL: add_select_fneg_var_v2f16:
+; GFX11-TRUE16: ; %bb.0:
+; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-TRUE16-NEXT: v_xor_b32_e32 v1, 0x80008000, v2
+; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v0
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.h, v3.h, v1.h, vcc_lo
+; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.l, v3.l, v1.l, s0
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-TRUE16-NEXT: v_pk_add_f16 v0, v0, v4
+; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: add_select_fneg_var_v2f16:
+; GFX11-FAKE16: ; %bb.0:
+; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT: v_xor_b32_e32 v2, 0x80008000, v2
+; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v5, 16, v3
+; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v6, 16, v2
+; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v1, v5, v6, vcc_lo
+; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc_lo
+; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
+; GFX11-FAKE16-NEXT: v_pk_add_f16 v0, v0, v4
+; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
%cmp = icmp eq <2 x i32> %c, zeroinitializer
%fneg.x = fneg <2 x half> %x
%select = select <2 x i1> %cmp, <2 x half> %fneg.x, <2 x half> %y
@@ -2058,55 +1554,30 @@ define <2 x half> @add_select_fneg_negk_v2f16(<2 x i32> %c, <2 x half> %x, <2 x
; GFX9-NEXT: v_pk_add_f16 v0, v3, v0 neg_lo:[0,1] neg_hi:[0,1]
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
-; GFX11-SAFE-TRUE16-LABEL: add_select_fneg_negk_v2f16:
-; GFX11-SAFE-TRUE16: ; %bb.0:
-; GFX11-SAFE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v0
-; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.h, 0x3c00, v2.h, vcc_lo
-; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.l, 0x3c00, v2.l, s0
-; GFX11-SAFE-TRUE16-NEXT: v_pk_add_f16 v0, v3, v0 neg_lo:[0,1] neg_hi:[0,1]
-; GFX11-SAFE-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-SAFE-FAKE16-LABEL: add_select_fneg_negk_v2f16:
-; GFX11-SAFE-FAKE16: ; %bb.0:
-; GFX11-SAFE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-FAKE16-NEXT: v_lshrrev_b32_e32 v4, 16, v2
-; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1)
-; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0x3c00, v4, vcc_lo
-; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0x3c00, v2, vcc_lo
-; GFX11-SAFE-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
-; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-SAFE-FAKE16-NEXT: v_pk_add_f16 v0, v3, v0 neg_lo:[0,1] neg_hi:[0,1]
-; GFX11-SAFE-FAKE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-TRUE16-LABEL: add_select_fneg_negk_v2f16:
-; GFX11-NSZ-TRUE16: ; %bb.0:
-; GFX11-NSZ-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v0
-; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.h, 0x3c00, v2.h, vcc_lo
-; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.l, 0x3c00, v2.l, s0
-; GFX11-NSZ-TRUE16-NEXT: v_pk_add_f16 v0, v3, v0 neg_lo:[0,1] neg_hi:[0,1]
-; GFX11-NSZ-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-FAKE16-LABEL: add_select_fneg_negk_v2f16:
-; GFX11-NSZ-FAKE16: ; %bb.0:
-; GFX11-NSZ-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-FAKE16-NEXT: v_lshrrev_b32_e32 v4, 16, v2
-; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1)
-; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0x3c00, v4, vcc_lo
-; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0x3c00, v2, vcc_lo
-; GFX11-NSZ-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
-; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NSZ-FAKE16-NEXT: v_pk_add_f16 v0, v3, v0 neg_lo:[0,1] neg_hi:[0,1]
-; GFX11-NSZ-FAKE16-NEXT: s_setpc_b64 s[30:31]
+; GFX11-TRUE16-LABEL: add_select_fneg_negk_v2f16:
+; GFX11-TRUE16: ; %bb.0:
+; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v0
+; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.h, 0x3c00, v2.h, vcc_lo
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.l, 0x3c00, v2.l, s0
+; GFX11-TRUE16-NEXT: v_pk_add_f16 v0, v3, v0 neg_lo:[0,1] neg_hi:[0,1]
+; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: add_select_fneg_negk_v2f16:
+; GFX11-FAKE16: ; %bb.0:
+; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v4, 16, v2
+; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1)
+; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0x3c00, v4, vcc_lo
+; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0x3c00, v2, vcc_lo
+; GFX11-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
+; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-FAKE16-NEXT: v_pk_add_f16 v0, v3, v0 neg_lo:[0,1] neg_hi:[0,1]
+; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
%cmp = icmp eq <2 x i32> %c, zeroinitializer
%fneg.x = fneg <2 x half> %x
%select = select <2 x i1> %cmp, <2 x half> %fneg.x, <2 x half> <half -1.0, half -1.0>
@@ -2161,55 +1632,30 @@ define <2 x half> @add_select_fneg_inv2pi_v2f16(<2 x i32> %c, <2 x half> %x, <2
; GFX9-NEXT: v_pk_add_f16 v0, v3, v0 neg_lo:[0,1] neg_hi:[0,1]
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
-; GFX11-SAFE-TRUE16-LABEL: add_select_fneg_inv2pi_v2f16:
-; GFX11-SAFE-TRUE16: ; %bb.0:
-; GFX11-SAFE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v0
-; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.h, 0xb118, v2.h, vcc_lo
-; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.l, 0xb118, v2.l, s0
-; GFX11-SAFE-TRUE16-NEXT: v_pk_add_f16 v0, v3, v0 neg_lo:[0,1] neg_hi:[0,1]
-; GFX11-SAFE-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-SAFE-FAKE16-LABEL: add_select_fneg_inv2pi_v2f16:
-; GFX11-SAFE-FAKE16: ; %bb.0:
-; GFX11-SAFE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-FAKE16-NEXT: v_lshrrev_b32_e32 v4, 16, v2
-; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1)
-; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0xb118, v4, vcc_lo
-; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0xb118, v2, vcc_lo
-; GFX11-SAFE-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
-; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-SAFE-FAKE16-NEXT: v_pk_add_f16 v0, v3, v0 neg_lo:[0,1] neg_hi:[0,1]
-; GFX11-SAFE-FAKE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-TRUE16-LABEL: add_select_fneg_inv2pi_v2f16:
-; GFX11-NSZ-TRUE16: ; %bb.0:
-; GFX11-NSZ-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v0
-; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.h, 0xb118, v2.h, vcc_lo
-; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.l, 0xb118, v2.l, s0
-; GFX11-NSZ-TRUE16-NEXT: v_pk_add_f16 v0, v3, v0 neg_lo:[0,1] neg_hi:[0,1]
-; GFX11-NSZ-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-FAKE16-LABEL: add_select_fneg_inv2pi_v2f16:
-; GFX11-NSZ-FAKE16: ; %bb.0:
-; GFX11-NSZ-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-FAKE16-NEXT: v_lshrrev_b32_e32 v4, 16, v2
-; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1)
-; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0xb118, v4, vcc_lo
-; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0xb118, v2, vcc_lo
-; GFX11-NSZ-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
-; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NSZ-FAKE16-NEXT: v_pk_add_f16 v0, v3, v0 neg_lo:[0,1] neg_hi:[0,1]
-; GFX11-NSZ-FAKE16-NEXT: s_setpc_b64 s[30:31]
+; GFX11-TRUE16-LABEL: add_select_fneg_inv2pi_v2f16:
+; GFX11-TRUE16: ; %bb.0:
+; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v0
+; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.h, 0xb118, v2.h, vcc_lo
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.l, 0xb118, v2.l, s0
+; GFX11-TRUE16-NEXT: v_pk_add_f16 v0, v3, v0 neg_lo:[0,1] neg_hi:[0,1]
+; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: add_select_fneg_inv2pi_v2f16:
+; GFX11-FAKE16: ; %bb.0:
+; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v4, 16, v2
+; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1)
+; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0xb118, v4, vcc_lo
+; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0xb118, v2, vcc_lo
+; GFX11-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
+; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-FAKE16-NEXT: v_pk_add_f16 v0, v3, v0 neg_lo:[0,1] neg_hi:[0,1]
+; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
%cmp = icmp eq <2 x i32> %c, zeroinitializer
%fneg.x = fneg <2 x half> %x
%select = select <2 x i1> %cmp, <2 x half> %fneg.x, <2 x half> <half 0xH3118, half 0xH3118>
@@ -2264,55 +1710,30 @@ define <2 x half> @add_select_fneg_neginv2pi_v2f16(<2 x i32> %c, <2 x half> %x,
; GFX9-NEXT: v_pk_add_f16 v0, v3, v0 neg_lo:[0,1] neg_hi:[0,1]
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
-; GFX11-SAFE-TRUE16-LABEL: add_select_fneg_neginv2pi_v2f16:
-; GFX11-SAFE-TRUE16: ; %bb.0:
-; GFX11-SAFE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v0
-; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.h, 0x3118, v2.h, vcc_lo
-; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.l, 0x3118, v2.l, s0
-; GFX11-SAFE-TRUE16-NEXT: v_pk_add_f16 v0, v3, v0 neg_lo:[0,1] neg_hi:[0,1]
-; GFX11-SAFE-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-SAFE-FAKE16-LABEL: add_select_fneg_neginv2pi_v2f16:
-; GFX11-SAFE-FAKE16: ; %bb.0:
-; GFX11-SAFE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-FAKE16-NEXT: v_lshrrev_b32_e32 v4, 16, v2
-; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1)
-; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0x3118, v4, vcc_lo
-; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0x3118, v2, vcc_lo
-; GFX11-SAFE-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
-; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-SAFE-FAKE16-NEXT: v_pk_add_f16 v0, v3, v0 neg_lo:[0,1] neg_hi:[0,1]
-; GFX11-SAFE-FAKE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-TRUE16-LABEL: add_select_fneg_neginv2pi_v2f16:
-; GFX11-NSZ-TRUE16: ; %bb.0:
-; GFX11-NSZ-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v0
-; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.h, 0x3118, v2.h, vcc_lo
-; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.l, 0x3118, v2.l, s0
-; GFX11-NSZ-TRUE16-NEXT: v_pk_add_f16 v0, v3, v0 neg_lo:[0,1] neg_hi:[0,1]
-; GFX11-NSZ-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-FAKE16-LABEL: add_select_fneg_neginv2pi_v2f16:
-; GFX11-NSZ-FAKE16: ; %bb.0:
-; GFX11-NSZ-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-FAKE16-NEXT: v_lshrrev_b32_e32 v4, 16, v2
-; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1)
-; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0x3118, v4, vcc_lo
-; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0x3118, v2, vcc_lo
-; GFX11-NSZ-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
-; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NSZ-FAKE16-NEXT: v_pk_add_f16 v0, v3, v0 neg_lo:[0,1] neg_hi:[0,1]
-; GFX11-NSZ-FAKE16-NEXT: s_setpc_b64 s[30:31]
+; GFX11-TRUE16-LABEL: add_select_fneg_neginv2pi_v2f16:
+; GFX11-TRUE16: ; %bb.0:
+; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v0
+; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.h, 0x3118, v2.h, vcc_lo
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.l, 0x3118, v2.l, s0
+; GFX11-TRUE16-NEXT: v_pk_add_f16 v0, v3, v0 neg_lo:[0,1] neg_hi:[0,1]
+; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: add_select_fneg_neginv2pi_v2f16:
+; GFX11-FAKE16: ; %bb.0:
+; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v4, 16, v2
+; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1)
+; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0x3118, v4, vcc_lo
+; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0x3118, v2, vcc_lo
+; GFX11-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
+; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-FAKE16-NEXT: v_pk_add_f16 v0, v3, v0 neg_lo:[0,1] neg_hi:[0,1]
+; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
%cmp = icmp eq <2 x i32> %c, zeroinitializer
%fneg.x = fneg <2 x half> %x
%select = select <2 x i1> %cmp, <2 x half> %fneg.x, <2 x half> <half 0xHB118, half 0xHB118>
@@ -2363,61 +1784,33 @@ define <2 x half> @add_select_negk_negk_v2f16(<2 x i32> %c, <2 x half> %x) {
; GFX9-NEXT: v_pk_add_f16 v0, v0, v2
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
-; GFX11-SAFE-TRUE16-LABEL: add_select_negk_negk_v2f16:
-; GFX11-SAFE-TRUE16: ; %bb.0:
-; GFX11-SAFE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-TRUE16-NEXT: v_mov_b16_e32 v3.l, 0xbc00
-; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v0
-; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.l, v3.l, 0xc000, vcc_lo
-; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.h, v3.l, 0xc000, s0
-; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-SAFE-TRUE16-NEXT: v_pack_b32_f16 v0, v0.h, v0.l
-; GFX11-SAFE-TRUE16-NEXT: v_pk_add_f16 v0, v0, v2
-; GFX11-SAFE-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-SAFE-FAKE16-LABEL: add_select_negk_negk_v2f16:
-; GFX11-SAFE-FAKE16: ; %bb.0:
-; GFX11-SAFE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-FAKE16-NEXT: v_mov_b32_e32 v3, 0xc000
-; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1)
-; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0xbc00, v3, vcc_lo
-; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0xbc00, v3, vcc_lo
-; GFX11-SAFE-FAKE16-NEXT: v_pack_b32_f16 v0, v0, v1
-; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-SAFE-FAKE16-NEXT: v_pk_add_f16 v0, v0, v2
-; GFX11-SAFE-FAKE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-TRUE16-LABEL: add_select_negk_negk_v2f16:
-; GFX11-NSZ-TRUE16: ; %bb.0:
-; GFX11-NSZ-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-TRUE16-NEXT: v_mov_b16_e32 v3.l, 0xbc00
-; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v0
-; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.l, v3.l, 0xc000, vcc_lo
-; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.h, v3.l, 0xc000, s0
-; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NSZ-TRUE16-NEXT: v_pack_b32_f16 v0, v0.h, v0.l
-; GFX11-NSZ-TRUE16-NEXT: v_pk_add_f16 v0, v0, v2
-; GFX11-NSZ-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-FAKE16-LABEL: add_select_negk_negk_v2f16:
-; GFX11-NSZ-FAKE16: ; %bb.0:
-; GFX11-NSZ-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-FAKE16-NEXT: v_mov_b32_e32 v3, 0xc000
-; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1)
-; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0xbc00, v3, vcc_lo
-; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0xbc00, v3, vcc_lo
-; GFX11-NSZ-FAKE16-NEXT: v_pack_b32_f16 v0, v0, v1
-; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NSZ-FAKE16-NEXT: v_pk_add_f16 v0, v0, v2
-; GFX11-NSZ-FAKE16-NEXT: s_setpc_b64 s[30:31]
+; GFX11-TRUE16-LABEL: add_select_negk_negk_v2f16:
+; GFX11-TRUE16: ; %bb.0:
+; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT: v_mov_b16_e32 v3.l, 0xbc00
+; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v0
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.l, v3.l, 0xc000, vcc_lo
+; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.h, v3.l, 0xc000, s0
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-TRUE16-NEXT: v_pack_b32_f16 v0, v0.h, v0.l
+; GFX11-TRUE16-NEXT: v_pk_add_f16 v0, v0, v2
+; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: add_select_negk_negk_v2f16:
+; GFX11-FAKE16: ; %bb.0:
+; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT: v_mov_b32_e32 v3, 0xc000
+; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1)
+; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0xbc00, v3, vcc_lo
+; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0xbc00, v3, vcc_lo
+; GFX11-FAKE16-NEXT: v_pack_b32_f16 v0, v0, v1
+; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-FAKE16-NEXT: v_pk_add_f16 v0, v0, v2
+; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
%cmp = icmp eq <2 x i32> %c, zeroinitializer
%select = select <2 x i1> %cmp, <2 x half> <half -2.0, half -2.0>, <2 x half> <half -1.0, half -1.0>
%add = fadd <2 x half> %select, %x
@@ -2469,61 +1862,33 @@ define <2 x half> @add_select_negliteralk_negliteralk_v2f16(<2 x i32> %c, <2 x h
; GFX9-NEXT: v_pk_add_f16 v0, v0, v2
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
-; GFX11-SAFE-TRUE16-LABEL: add_select_negliteralk_negliteralk_v2f16:
-; GFX11-SAFE-TRUE16: ; %bb.0:
-; GFX11-SAFE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-TRUE16-NEXT: v_mov_b16_e32 v3.l, 0xec00
-; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v0
-; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.l, v3.l, 0xe800, vcc_lo
-; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.h, v3.l, 0xe800, s0
-; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-SAFE-TRUE16-NEXT: v_pack_b32_f16 v0, v0.h, v0.l
-; GFX11-SAFE-TRUE16-NEXT: v_pk_add_f16 v0, v0, v2
-; GFX11-SAFE-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-SAFE-FAKE16-LABEL: add_select_negliteralk_negliteralk_v2f16:
-; GFX11-SAFE-FAKE16: ; %bb.0:
-; GFX11-SAFE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-FAKE16-NEXT: v_mov_b32_e32 v3, 0xe800
-; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1)
-; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0xec00, v3, vcc_lo
-; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0xec00, v3, vcc_lo
-; GFX11-SAFE-FAKE16-NEXT: v_pack_b32_f16 v0, v0, v1
-; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-SAFE-FAKE16-NEXT: v_pk_add_f16 v0, v0, v2
-; GFX11-SAFE-FAKE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-TRUE16-LABEL: add_select_negliteralk_negliteralk_v2f16:
-; GFX11-NSZ-TRUE16: ; %bb.0:
-; GFX11-NSZ-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-TRUE16-NEXT: v_mov_b16_e32 v3.l, 0xec00
-; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v0
-; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.l, v3.l, 0xe800, vcc_lo
-; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.h, v3.l, 0xe800, s0
-; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NSZ-TRUE16-NEXT: v_pack_b32_f16 v0, v0.h, v0.l
-; GFX11-NSZ-TRUE16-NEXT: v_pk_add_f16 v0, v0, v2
-; GFX11-NSZ-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-FAKE16-LABEL: add_select_negliteralk_negliteralk_v2f16:
-; GFX11-NSZ-FAKE16: ; %bb.0:
-; GFX11-NSZ-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-FAKE16-NEXT: v_mov_b32_e32 v3, 0xe800
-; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1)
-; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0xec00, v3, vcc_lo
-; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0xec00, v3, vcc_lo
-; GFX11-NSZ-FAKE16-NEXT: v_pack_b32_f16 v0, v0, v1
-; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NSZ-FAKE16-NEXT: v_pk_add_f16 v0, v0, v2
-; GFX11-NSZ-FAKE16-NEXT: s_setpc_b64 s[30:31]
+; GFX11-TRUE16-LABEL: add_select_negliteralk_negliteralk_v2f16:
+; GFX11-TRUE16: ; %bb.0:
+; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT: v_mov_b16_e32 v3.l, 0xec00
+; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v0
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.l, v3.l, 0xe800, vcc_lo
+; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.h, v3.l, 0xe800, s0
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-TRUE16-NEXT: v_pack_b32_f16 v0, v0.h, v0.l
+; GFX11-TRUE16-NEXT: v_pk_add_f16 v0, v0, v2
+; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: add_select_negliteralk_negliteralk_v2f16:
+; GFX11-FAKE16: ; %bb.0:
+; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT: v_mov_b32_e32 v3, 0xe800
+; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1)
+; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0xec00, v3, vcc_lo
+; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0xec00, v3, vcc_lo
+; GFX11-FAKE16-NEXT: v_pack_b32_f16 v0, v0, v1
+; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-FAKE16-NEXT: v_pk_add_f16 v0, v0, v2
+; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
%cmp = icmp eq <2 x i32> %c, zeroinitializer
%select = select <2 x i1> %cmp, <2 x half> <half -2048.0, half -2048.0>, <2 x half> <half -4096.0, half -4096.0>
%add = fadd <2 x half> %select, %x
@@ -2573,61 +1938,33 @@ define <2 x half> @add_select_fneg_negk_negk_v2f16(<2 x i32> %c, <2 x half> %x)
; GFX9-NEXT: v_pk_add_f16 v0, v2, v0 neg_lo:[0,1] neg_hi:[0,1]
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
-; GFX11-SAFE-TRUE16-LABEL: add_select_fneg_negk_negk_v2f16:
-; GFX11-SAFE-TRUE16: ; %bb.0:
-; GFX11-SAFE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-TRUE16-NEXT: v_mov_b16_e32 v3.l, 0xbc00
-; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v0
-; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.l, v3.l, 0xc000, vcc_lo
-; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.h, v3.l, 0xc000, s0
-; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-SAFE-TRUE16-NEXT: v_pack_b32_f16 v0, v0.h, v0.l
-; GFX11-SAFE-TRUE16-NEXT: v_pk_add_f16 v0, v2, v0 neg_lo:[0,1] neg_hi:[0,1]
-; GFX11-SAFE-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-SAFE-FAKE16-LABEL: add_select_fneg_negk_negk_v2f16:
-; GFX11-SAFE-FAKE16: ; %bb.0:
-; GFX11-SAFE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-FAKE16-NEXT: v_mov_b32_e32 v3, 0xc000
-; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1)
-; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0xbc00, v3, vcc_lo
-; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0xbc00, v3, vcc_lo
-; GFX11-SAFE-FAKE16-NEXT: v_pack_b32_f16 v0, v0, v1
-; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-SAFE-FAKE16-NEXT: v_pk_add_f16 v0, v2, v0 neg_lo:[0,1] neg_hi:[0,1]
-; GFX11-SAFE-FAKE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-TRUE16-LABEL: add_select_fneg_negk_negk_v2f16:
-; GFX11-NSZ-TRUE16: ; %bb.0:
-; GFX11-NSZ-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-TRUE16-NEXT: v_mov_b16_e32 v3.l, 0xbc00
-; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v0
-; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.l, v3.l, 0xc000, vcc_lo
-; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.h, v3.l, 0xc000, s0
-; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NSZ-TRUE16-NEXT: v_pack_b32_f16 v0, v0.h, v0.l
-; GFX11-NSZ-TRUE16-NEXT: v_pk_add_f16 v0, v2, v0 neg_lo:[0,1] neg_hi:[0,1]
-; GFX11-NSZ-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-FAKE16-LABEL: add_select_fneg_negk_negk_v2f16:
-; GFX11-NSZ-FAKE16: ; %bb.0:
-; GFX11-NSZ-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-FAKE16-NEXT: v_mov_b32_e32 v3, 0xc000
-; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1)
-; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0xbc00, v3, vcc_lo
-; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0xbc00, v3, vcc_lo
-; GFX11-NSZ-FAKE16-NEXT: v_pack_b32_f16 v0, v0, v1
-; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NSZ-FAKE16-NEXT: v_pk_add_f16 v0, v2, v0 neg_lo:[0,1] neg_hi:[0,1]
-; GFX11-NSZ-FAKE16-NEXT: s_setpc_b64 s[30:31]
+; GFX11-TRUE16-LABEL: add_select_fneg_negk_negk_v2f16:
+; GFX11-TRUE16: ; %bb.0:
+; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT: v_mov_b16_e32 v3.l, 0xbc00
+; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v0
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.l, v3.l, 0xc000, vcc_lo
+; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.h, v3.l, 0xc000, s0
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-TRUE16-NEXT: v_pack_b32_f16 v0, v0.h, v0.l
+; GFX11-TRUE16-NEXT: v_pk_add_f16 v0, v2, v0 neg_lo:[0,1] neg_hi:[0,1]
+; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: add_select_fneg_negk_negk_v2f16:
+; GFX11-FAKE16: ; %bb.0:
+; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT: v_mov_b32_e32 v3, 0xc000
+; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1)
+; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0xbc00, v3, vcc_lo
+; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0xbc00, v3, vcc_lo
+; GFX11-FAKE16-NEXT: v_pack_b32_f16 v0, v0, v1
+; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-FAKE16-NEXT: v_pk_add_f16 v0, v2, v0 neg_lo:[0,1] neg_hi:[0,1]
+; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
%cmp = icmp eq <2 x i32> %c, zeroinitializer
%select = select <2 x i1> %cmp, <2 x half> <half -2.0, half -2.0>, <2 x half> <half -1.0, half -1.0>
%fneg.x = fneg <2 x half> %select
@@ -2681,55 +2018,30 @@ define <2 x half> @add_select_negk_fneg_v2f16(<2 x i32> %c, <2 x half> %x, <2 x
; GFX9-NEXT: v_pk_add_f16 v0, v3, v0 neg_lo:[0,1] neg_hi:[0,1]
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
-; GFX11-SAFE-TRUE16-LABEL: add_select_negk_fneg_v2f16:
-; GFX11-SAFE-TRUE16: ; %bb.0:
-; GFX11-SAFE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v1
-; GFX11-SAFE-TRUE16-NEXT: v_cmp_ne_u32_e64 s0, 0, v0
-; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.h, 0x3c00, v2.h, vcc_lo
-; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.l, 0x3c00, v2.l, s0
-; GFX11-SAFE-TRUE16-NEXT: v_pk_add_f16 v0, v3, v0 neg_lo:[0,1] neg_hi:[0,1]
-; GFX11-SAFE-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-SAFE-FAKE16-LABEL: add_select_negk_fneg_v2f16:
-; GFX11-SAFE-FAKE16: ; %bb.0:
-; GFX11-SAFE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-FAKE16-NEXT: v_lshrrev_b32_e32 v4, 16, v2
-; GFX11-SAFE-FAKE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v1
-; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1)
-; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0x3c00, v4, vcc_lo
-; GFX11-SAFE-FAKE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0
-; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0x3c00, v2, vcc_lo
-; GFX11-SAFE-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
-; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-SAFE-FAKE16-NEXT: v_pk_add_f16 v0, v3, v0 neg_lo:[0,1] neg_hi:[0,1]
-; GFX11-SAFE-FAKE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-TRUE16-LABEL: add_select_negk_fneg_v2f16:
-; GFX11-NSZ-TRUE16: ; %bb.0:
-; GFX11-NSZ-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v1
-; GFX11-NSZ-TRUE16-NEXT: v_cmp_ne_u32_e64 s0, 0, v0
-; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.h, 0x3c00, v2.h, vcc_lo
-; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.l, 0x3c00, v2.l, s0
-; GFX11-NSZ-TRUE16-NEXT: v_pk_add_f16 v0, v3, v0 neg_lo:[0,1] neg_hi:[0,1]
-; GFX11-NSZ-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-FAKE16-LABEL: add_select_negk_fneg_v2f16:
-; GFX11-NSZ-FAKE16: ; %bb.0:
-; GFX11-NSZ-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-FAKE16-NEXT: v_lshrrev_b32_e32 v4, 16, v2
-; GFX11-NSZ-FAKE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v1
-; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1)
-; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0x3c00, v4, vcc_lo
-; GFX11-NSZ-FAKE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0
-; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0x3c00, v2, vcc_lo
-; GFX11-NSZ-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
-; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NSZ-FAKE16-NEXT: v_pk_add_f16 v0, v3, v0 neg_lo:[0,1] neg_hi:[0,1]
-; GFX11-NSZ-FAKE16-NEXT: s_setpc_b64 s[30:31]
+; GFX11-TRUE16-LABEL: add_select_negk_fneg_v2f16:
+; GFX11-TRUE16: ; %bb.0:
+; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v1
+; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e64 s0, 0, v0
+; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.h, 0x3c00, v2.h, vcc_lo
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.l, 0x3c00, v2.l, s0
+; GFX11-TRUE16-NEXT: v_pk_add_f16 v0, v3, v0 neg_lo:[0,1] neg_hi:[0,1]
+; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: add_select_negk_fneg_v2f16:
+; GFX11-FAKE16: ; %bb.0:
+; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v4, 16, v2
+; GFX11-FAKE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v1
+; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1)
+; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0x3c00, v4, vcc_lo
+; GFX11-FAKE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0
+; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0x3c00, v2, vcc_lo
+; GFX11-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
+; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-FAKE16-NEXT: v_pk_add_f16 v0, v3, v0 neg_lo:[0,1] neg_hi:[0,1]
+; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
%cmp = icmp eq <2 x i32> %c, zeroinitializer
%fneg.x = fneg <2 x half> %x
%select = select <2 x i1> %cmp, <2 x half> <half -1.0, half -1.0>, <2 x half> %fneg.x
@@ -2783,55 +2095,30 @@ define <2 x half> @add_select_fneg_posk_v2f16(<2 x i32> %c, <2 x half> %x, <2 x
; GFX9-NEXT: v_pk_add_f16 v0, v3, v0 neg_lo:[0,1] neg_hi:[0,1]
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
-; GFX11-SAFE-TRUE16-LABEL: add_select_fneg_posk_v2f16:
-; GFX11-SAFE-TRUE16: ; %bb.0:
-; GFX11-SAFE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v0
-; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.h, 0xbc00, v2.h, vcc_lo
-; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.l, 0xbc00, v2.l, s0
-; GFX11-SAFE-TRUE16-NEXT: v_pk_add_f16 v0, v3, v0 neg_lo:[0,1] neg_hi:[0,1]
-; GFX11-SAFE-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-SAFE-FAKE16-LABEL: add_select_fneg_posk_v2f16:
-; GFX11-SAFE-FAKE16: ; %bb.0:
-; GFX11-SAFE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-FAKE16-NEXT: v_lshrrev_b32_e32 v4, 16, v2
-; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1)
-; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0xbc00, v4, vcc_lo
-; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0xbc00, v2, vcc_lo
-; GFX11-SAFE-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
-; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-SAFE-FAKE16-NEXT: v_pk_add_f16 v0, v3, v0 neg_lo:[0,1] neg_hi:[0,1]
-; GFX11-SAFE-FAKE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-TRUE16-LABEL: add_select_fneg_posk_v2f16:
-; GFX11-NSZ-TRUE16: ; %bb.0:
-; GFX11-NSZ-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v0
-; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.h, 0xbc00, v2.h, vcc_lo
-; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.l, 0xbc00, v2.l, s0
-; GFX11-NSZ-TRUE16-NEXT: v_pk_add_f16 v0, v3, v0 neg_lo:[0,1] neg_hi:[0,1]
-; GFX11-NSZ-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-FAKE16-LABEL: add_select_fneg_posk_v2f16:
-; GFX11-NSZ-FAKE16: ; %bb.0:
-; GFX11-NSZ-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-FAKE16-NEXT: v_lshrrev_b32_e32 v4, 16, v2
-; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1)
-; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0xbc00, v4, vcc_lo
-; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0xbc00, v2, vcc_lo
-; GFX11-NSZ-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
-; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NSZ-FAKE16-NEXT: v_pk_add_f16 v0, v3, v0 neg_lo:[0,1] neg_hi:[0,1]
-; GFX11-NSZ-FAKE16-NEXT: s_setpc_b64 s[30:31]
+; GFX11-TRUE16-LABEL: add_select_fneg_posk_v2f16:
+; GFX11-TRUE16: ; %bb.0:
+; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v0
+; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.h, 0xbc00, v2.h, vcc_lo
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.l, 0xbc00, v2.l, s0
+; GFX11-TRUE16-NEXT: v_pk_add_f16 v0, v3, v0 neg_lo:[0,1] neg_hi:[0,1]
+; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: add_select_fneg_posk_v2f16:
+; GFX11-FAKE16: ; %bb.0:
+; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v4, 16, v2
+; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1)
+; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0xbc00, v4, vcc_lo
+; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0xbc00, v2, vcc_lo
+; GFX11-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
+; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-FAKE16-NEXT: v_pk_add_f16 v0, v3, v0 neg_lo:[0,1] neg_hi:[0,1]
+; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
%cmp = icmp eq <2 x i32> %c, zeroinitializer
%fneg.x = fneg <2 x half> %x
%select = select <2 x i1> %cmp, <2 x half> %fneg.x, <2 x half> <half 1.0, half 1.0>
@@ -2885,55 +2172,30 @@ define <2 x half> @add_select_posk_fneg_v2f16(<2 x i32> %c, <2 x half> %x, <2 x
; GFX9-NEXT: v_pk_add_f16 v0, v3, v0 neg_lo:[0,1] neg_hi:[0,1]
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
-; GFX11-SAFE-TRUE16-LABEL: add_select_posk_fneg_v2f16:
-; GFX11-SAFE-TRUE16: ; %bb.0:
-; GFX11-SAFE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v1
-; GFX11-SAFE-TRUE16-NEXT: v_cmp_ne_u32_e64 s0, 0, v0
-; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.h, 0xbc00, v2.h, vcc_lo
-; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.l, 0xbc00, v2.l, s0
-; GFX11-SAFE-TRUE16-NEXT: v_pk_add_f16 v0, v3, v0 neg_lo:[0,1] neg_hi:[0,1]
-; GFX11-SAFE-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-SAFE-FAKE16-LABEL: add_select_posk_fneg_v2f16:
-; GFX11-SAFE-FAKE16: ; %bb.0:
-; GFX11-SAFE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-FAKE16-NEXT: v_lshrrev_b32_e32 v4, 16, v2
-; GFX11-SAFE-FAKE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v1
-; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1)
-; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0xbc00, v4, vcc_lo
-; GFX11-SAFE-FAKE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0
-; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0xbc00, v2, vcc_lo
-; GFX11-SAFE-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
-; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-SAFE-FAKE16-NEXT: v_pk_add_f16 v0, v3, v0 neg_lo:[0,1] neg_hi:[0,1]
-; GFX11-SAFE-FAKE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-TRUE16-LABEL: add_select_posk_fneg_v2f16:
-; GFX11-NSZ-TRUE16: ; %bb.0:
-; GFX11-NSZ-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v1
-; GFX11-NSZ-TRUE16-NEXT: v_cmp_ne_u32_e64 s0, 0, v0
-; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.h, 0xbc00, v2.h, vcc_lo
-; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.l, 0xbc00, v2.l, s0
-; GFX11-NSZ-TRUE16-NEXT: v_pk_add_f16 v0, v3, v0 neg_lo:[0,1] neg_hi:[0,1]
-; GFX11-NSZ-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-FAKE16-LABEL: add_select_posk_fneg_v2f16:
-; GFX11-NSZ-FAKE16: ; %bb.0:
-; GFX11-NSZ-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-FAKE16-NEXT: v_lshrrev_b32_e32 v4, 16, v2
-; GFX11-NSZ-FAKE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v1
-; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1)
-; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0xbc00, v4, vcc_lo
-; GFX11-NSZ-FAKE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0
-; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0xbc00, v2, vcc_lo
-; GFX11-NSZ-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
-; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NSZ-FAKE16-NEXT: v_pk_add_f16 v0, v3, v0 neg_lo:[0,1] neg_hi:[0,1]
-; GFX11-NSZ-FAKE16-NEXT: s_setpc_b64 s[30:31]
+; GFX11-TRUE16-LABEL: add_select_posk_fneg_v2f16:
+; GFX11-TRUE16: ; %bb.0:
+; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v1
+; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e64 s0, 0, v0
+; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.h, 0xbc00, v2.h, vcc_lo
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.l, 0xbc00, v2.l, s0
+; GFX11-TRUE16-NEXT: v_pk_add_f16 v0, v3, v0 neg_lo:[0,1] neg_hi:[0,1]
+; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: add_select_posk_fneg_v2f16:
+; GFX11-FAKE16: ; %bb.0:
+; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v4, 16, v2
+; GFX11-FAKE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v1
+; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_1)
+; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0xbc00, v4, vcc_lo
+; GFX11-FAKE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0
+; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0xbc00, v2, vcc_lo
+; GFX11-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
+; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-FAKE16-NEXT: v_pk_add_f16 v0, v3, v0 neg_lo:[0,1] neg_hi:[0,1]
+; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
%cmp = icmp eq <2 x i32> %c, zeroinitializer
%fneg.x = fneg <2 x half> %x
%select = select <2 x i1> %cmp, <2 x half> <half 1.0, half 1.0>, <2 x half> %fneg.x
@@ -2997,69 +2259,37 @@ define <2 x half> @add_select_negfabs_fabs_v2f16(<2 x i32> %c, <2 x half> %x, <2
; GFX9-NEXT: v_pk_add_f16 v0, v0, v4
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
-; GFX11-SAFE-TRUE16-LABEL: add_select_negfabs_fabs_v2f16:
-; GFX11-SAFE-TRUE16: ; %bb.0:
-; GFX11-SAFE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-SAFE-TRUE16-NEXT: v_or_b32_e32 v0, 0x80008000, v2
-; GFX11-SAFE-TRUE16-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v3
-; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v1
-; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.l, v2.l, v0.l, vcc_lo
-; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.h, v2.h, v0.h, s0
-; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-SAFE-TRUE16-NEXT: v_pk_add_f16 v0, v0, v4
-; GFX11-SAFE-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-SAFE-FAKE16-LABEL: add_select_negfabs_fabs_v2f16:
-; GFX11-SAFE-FAKE16: ; %bb.0:
-; GFX11-SAFE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-FAKE16-NEXT: v_or_b32_e32 v2, 0x80008000, v2
-; GFX11-SAFE-FAKE16-NEXT: v_and_b32_e32 v3, 0x7fff7fff, v3
-; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
-; GFX11-SAFE-FAKE16-NEXT: v_lshrrev_b32_e32 v5, 16, v2
-; GFX11-SAFE-FAKE16-NEXT: v_lshrrev_b32_e32 v6, 16, v3
-; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc_lo
-; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v1, v6, v5, vcc_lo
-; GFX11-SAFE-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
-; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-SAFE-FAKE16-NEXT: v_pk_add_f16 v0, v0, v4
-; GFX11-SAFE-FAKE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-TRUE16-LABEL: add_select_negfabs_fabs_v2f16:
-; GFX11-NSZ-TRUE16: ; %bb.0:
-; GFX11-NSZ-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-NSZ-TRUE16-NEXT: v_or_b32_e32 v0, 0x80008000, v2
-; GFX11-NSZ-TRUE16-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v3
-; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v1
-; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.l, v2.l, v0.l, vcc_lo
-; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.h, v2.h, v0.h, s0
-; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NSZ-TRUE16-NEXT: v_pk_add_f16 v0, v0, v4
-; GFX11-NSZ-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-FAKE16-LABEL: add_select_negfabs_fabs_v2f16:
-; GFX11-NSZ-FAKE16: ; %bb.0:
-; GFX11-NSZ-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-FAKE16-NEXT: v_or_b32_e32 v2, 0x80008000, v2
-; GFX11-NSZ-FAKE16-NEXT: v_and_b32_e32 v3, 0x7fff7fff, v3
-; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
-; GFX11-NSZ-FAKE16-NEXT: v_lshrrev_b32_e32 v5, 16, v2
-; GFX11-NSZ-FAKE16-NEXT: v_lshrrev_b32_e32 v6, 16, v3
-; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc_lo
-; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v1, v6, v5, vcc_lo
-; GFX11-NSZ-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
-; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NSZ-FAKE16-NEXT: v_pk_add_f16 v0, v0, v4
-; GFX11-NSZ-FAKE16-NEXT: s_setpc_b64 s[30:31]
+; GFX11-TRUE16-LABEL: add_select_negfabs_fabs_v2f16:
+; GFX11-TRUE16: ; %bb.0:
+; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-TRUE16-NEXT: v_or_b32_e32 v0, 0x80008000, v2
+; GFX11-TRUE16-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v3
+; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v1
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.l, v2.l, v0.l, vcc_lo
+; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.h, v2.h, v0.h, s0
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-TRUE16-NEXT: v_pk_add_f16 v0, v0, v4
+; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: add_select_negfabs_fabs_v2f16:
+; GFX11-FAKE16: ; %bb.0:
+; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT: v_or_b32_e32 v2, 0x80008000, v2
+; GFX11-FAKE16-NEXT: v_and_b32_e32 v3, 0x7fff7fff, v3
+; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
+; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v5, 16, v2
+; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v6, 16, v3
+; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc_lo
+; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v1, v6, v5, vcc_lo
+; GFX11-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
+; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-FAKE16-NEXT: v_pk_add_f16 v0, v0, v4
+; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
%cmp = icmp eq <2 x i32> %c, zeroinitializer
%fabs.x = call <2 x half> @llvm.fabs.v2f16(<2 x half> %x)
%fneg.fabs.x = fneg <2 x half> %fabs.x
@@ -3125,69 +2355,37 @@ define <2 x half> @add_select_fabs_negfabs_v2f16(<2 x i32> %c, <2 x half> %x, <2
; GFX9-NEXT: v_pk_add_f16 v0, v0, v4
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
-; GFX11-SAFE-TRUE16-LABEL: add_select_fabs_negfabs_v2f16:
-; GFX11-SAFE-TRUE16: ; %bb.0:
-; GFX11-SAFE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-SAFE-TRUE16-NEXT: v_and_b32_e32 v0, 0x7fff7fff, v2
-; GFX11-SAFE-TRUE16-NEXT: v_or_b32_e32 v2, 0x80008000, v3
-; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v1
-; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.l, v2.l, v0.l, vcc_lo
-; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.h, v2.h, v0.h, s0
-; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-SAFE-TRUE16-NEXT: v_pk_add_f16 v0, v0, v4
-; GFX11-SAFE-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-SAFE-FAKE16-LABEL: add_select_fabs_negfabs_v2f16:
-; GFX11-SAFE-FAKE16: ; %bb.0:
-; GFX11-SAFE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-FAKE16-NEXT: v_or_b32_e32 v3, 0x80008000, v3
-; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_1)
-; GFX11-SAFE-FAKE16-NEXT: v_lshrrev_b32_e32 v6, 16, v3
-; GFX11-SAFE-FAKE16-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v2
-; GFX11-SAFE-FAKE16-NEXT: v_lshrrev_b32_e32 v5, 16, v2
-; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc_lo
-; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v1, v6, v5, vcc_lo
-; GFX11-SAFE-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
-; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-SAFE-FAKE16-NEXT: v_pk_add_f16 v0, v0, v4
-; GFX11-SAFE-FAKE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-TRUE16-LABEL: add_select_fabs_negfabs_v2f16:
-; GFX11-NSZ-TRUE16: ; %bb.0:
-; GFX11-NSZ-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-NSZ-TRUE16-NEXT: v_and_b32_e32 v0, 0x7fff7fff, v2
-; GFX11-NSZ-TRUE16-NEXT: v_or_b32_e32 v2, 0x80008000, v3
-; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v1
-; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.l, v2.l, v0.l, vcc_lo
-; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.h, v2.h, v0.h, s0
-; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NSZ-TRUE16-NEXT: v_pk_add_f16 v0, v0, v4
-; GFX11-NSZ-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-FAKE16-LABEL: add_select_fabs_negfabs_v2f16:
-; GFX11-NSZ-FAKE16: ; %bb.0:
-; GFX11-NSZ-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-FAKE16-NEXT: v_or_b32_e32 v3, 0x80008000, v3
-; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_1)
-; GFX11-NSZ-FAKE16-NEXT: v_lshrrev_b32_e32 v6, 16, v3
-; GFX11-NSZ-FAKE16-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v2
-; GFX11-NSZ-FAKE16-NEXT: v_lshrrev_b32_e32 v5, 16, v2
-; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc_lo
-; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v1, v6, v5, vcc_lo
-; GFX11-NSZ-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
-; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NSZ-FAKE16-NEXT: v_pk_add_f16 v0, v0, v4
-; GFX11-NSZ-FAKE16-NEXT: s_setpc_b64 s[30:31]
+; GFX11-TRUE16-LABEL: add_select_fabs_negfabs_v2f16:
+; GFX11-TRUE16: ; %bb.0:
+; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0x7fff7fff, v2
+; GFX11-TRUE16-NEXT: v_or_b32_e32 v2, 0x80008000, v3
+; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v1
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.l, v2.l, v0.l, vcc_lo
+; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.h, v2.h, v0.h, s0
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-TRUE16-NEXT: v_pk_add_f16 v0, v0, v4
+; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: add_select_fabs_negfabs_v2f16:
+; GFX11-FAKE16: ; %bb.0:
+; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT: v_or_b32_e32 v3, 0x80008000, v3
+; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_1)
+; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v6, 16, v3
+; GFX11-FAKE16-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v2
+; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v5, 16, v2
+; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc_lo
+; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v1, v6, v5, vcc_lo
+; GFX11-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
+; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-FAKE16-NEXT: v_pk_add_f16 v0, v0, v4
+; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
%cmp = icmp eq <2 x i32> %c, zeroinitializer
%fabs.x = call <2 x half> @llvm.fabs.v2f16(<2 x half> %x)
%fabs.y = call <2 x half> @llvm.fabs.v2f16(<2 x half> %y)
@@ -3253,69 +2451,37 @@ define <2 x half> @add_select_neg_fabs_v2f16(<2 x i32> %c, <2 x half> %x, <2 x h
; GFX9-NEXT: v_pk_add_f16 v0, v0, v4
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
-; GFX11-SAFE-TRUE16-LABEL: add_select_neg_fabs_v2f16:
-; GFX11-SAFE-TRUE16: ; %bb.0:
-; GFX11-SAFE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-SAFE-TRUE16-NEXT: v_xor_b32_e32 v0, 0x80008000, v2
-; GFX11-SAFE-TRUE16-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v3
-; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v1
-; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.l, v2.l, v0.l, vcc_lo
-; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.h, v2.h, v0.h, s0
-; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-SAFE-TRUE16-NEXT: v_pk_add_f16 v0, v0, v4
-; GFX11-SAFE-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-SAFE-FAKE16-LABEL: add_select_neg_fabs_v2f16:
-; GFX11-SAFE-FAKE16: ; %bb.0:
-; GFX11-SAFE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-FAKE16-NEXT: v_xor_b32_e32 v2, 0x80008000, v2
-; GFX11-SAFE-FAKE16-NEXT: v_and_b32_e32 v3, 0x7fff7fff, v3
-; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
-; GFX11-SAFE-FAKE16-NEXT: v_lshrrev_b32_e32 v5, 16, v2
-; GFX11-SAFE-FAKE16-NEXT: v_lshrrev_b32_e32 v6, 16, v3
-; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc_lo
-; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v1, v6, v5, vcc_lo
-; GFX11-SAFE-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
-; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-SAFE-FAKE16-NEXT: v_pk_add_f16 v0, v0, v4
-; GFX11-SAFE-FAKE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-TRUE16-LABEL: add_select_neg_fabs_v2f16:
-; GFX11-NSZ-TRUE16: ; %bb.0:
-; GFX11-NSZ-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-NSZ-TRUE16-NEXT: v_xor_b32_e32 v0, 0x80008000, v2
-; GFX11-NSZ-TRUE16-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v3
-; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v1
-; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.l, v2.l, v0.l, vcc_lo
-; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.h, v2.h, v0.h, s0
-; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NSZ-TRUE16-NEXT: v_pk_add_f16 v0, v0, v4
-; GFX11-NSZ-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-FAKE16-LABEL: add_select_neg_fabs_v2f16:
-; GFX11-NSZ-FAKE16: ; %bb.0:
-; GFX11-NSZ-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-FAKE16-NEXT: v_xor_b32_e32 v2, 0x80008000, v2
-; GFX11-NSZ-FAKE16-NEXT: v_and_b32_e32 v3, 0x7fff7fff, v3
-; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
-; GFX11-NSZ-FAKE16-NEXT: v_lshrrev_b32_e32 v5, 16, v2
-; GFX11-NSZ-FAKE16-NEXT: v_lshrrev_b32_e32 v6, 16, v3
-; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc_lo
-; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v1, v6, v5, vcc_lo
-; GFX11-NSZ-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
-; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NSZ-FAKE16-NEXT: v_pk_add_f16 v0, v0, v4
-; GFX11-NSZ-FAKE16-NEXT: s_setpc_b64 s[30:31]
+; GFX11-TRUE16-LABEL: add_select_neg_fabs_v2f16:
+; GFX11-TRUE16: ; %bb.0:
+; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-TRUE16-NEXT: v_xor_b32_e32 v0, 0x80008000, v2
+; GFX11-TRUE16-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v3
+; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v1
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.l, v2.l, v0.l, vcc_lo
+; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.h, v2.h, v0.h, s0
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-TRUE16-NEXT: v_pk_add_f16 v0, v0, v4
+; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: add_select_neg_fabs_v2f16:
+; GFX11-FAKE16: ; %bb.0:
+; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT: v_xor_b32_e32 v2, 0x80008000, v2
+; GFX11-FAKE16-NEXT: v_and_b32_e32 v3, 0x7fff7fff, v3
+; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
+; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v5, 16, v2
+; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v6, 16, v3
+; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc_lo
+; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v1, v6, v5, vcc_lo
+; GFX11-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
+; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-FAKE16-NEXT: v_pk_add_f16 v0, v0, v4
+; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
%cmp = icmp eq <2 x i32> %c, zeroinitializer
%fneg.x = fneg <2 x half> %x
%fabs.y = call <2 x half> @llvm.fabs.v2f16(<2 x half> %y)
@@ -3380,69 +2546,37 @@ define <2 x half> @add_select_fabs_neg_v2f16(<2 x i32> %c, <2 x half> %x, <2 x h
; GFX9-NEXT: v_pk_add_f16 v0, v0, v4
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
-; GFX11-SAFE-TRUE16-LABEL: add_select_fabs_neg_v2f16:
-; GFX11-SAFE-TRUE16: ; %bb.0:
-; GFX11-SAFE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-SAFE-TRUE16-NEXT: v_and_b32_e32 v0, 0x7fff7fff, v2
-; GFX11-SAFE-TRUE16-NEXT: v_xor_b32_e32 v2, 0x80008000, v3
-; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v1
-; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.l, v2.l, v0.l, vcc_lo
-; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.h, v2.h, v0.h, s0
-; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-SAFE-TRUE16-NEXT: v_pk_add_f16 v0, v0, v4
-; GFX11-SAFE-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-SAFE-FAKE16-LABEL: add_select_fabs_neg_v2f16:
-; GFX11-SAFE-FAKE16: ; %bb.0:
-; GFX11-SAFE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-FAKE16-NEXT: v_xor_b32_e32 v3, 0x80008000, v3
-; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_1)
-; GFX11-SAFE-FAKE16-NEXT: v_lshrrev_b32_e32 v6, 16, v3
-; GFX11-SAFE-FAKE16-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v2
-; GFX11-SAFE-FAKE16-NEXT: v_lshrrev_b32_e32 v5, 16, v2
-; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc_lo
-; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v1, v6, v5, vcc_lo
-; GFX11-SAFE-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
-; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-SAFE-FAKE16-NEXT: v_pk_add_f16 v0, v0, v4
-; GFX11-SAFE-FAKE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-TRUE16-LABEL: add_select_fabs_neg_v2f16:
-; GFX11-NSZ-TRUE16: ; %bb.0:
-; GFX11-NSZ-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-NSZ-TRUE16-NEXT: v_and_b32_e32 v0, 0x7fff7fff, v2
-; GFX11-NSZ-TRUE16-NEXT: v_xor_b32_e32 v2, 0x80008000, v3
-; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v1
-; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.l, v2.l, v0.l, vcc_lo
-; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.h, v2.h, v0.h, s0
-; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NSZ-TRUE16-NEXT: v_pk_add_f16 v0, v0, v4
-; GFX11-NSZ-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-FAKE16-LABEL: add_select_fabs_neg_v2f16:
-; GFX11-NSZ-FAKE16: ; %bb.0:
-; GFX11-NSZ-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-FAKE16-NEXT: v_xor_b32_e32 v3, 0x80008000, v3
-; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_1)
-; GFX11-NSZ-FAKE16-NEXT: v_lshrrev_b32_e32 v6, 16, v3
-; GFX11-NSZ-FAKE16-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v2
-; GFX11-NSZ-FAKE16-NEXT: v_lshrrev_b32_e32 v5, 16, v2
-; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc_lo
-; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v1, v6, v5, vcc_lo
-; GFX11-NSZ-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
-; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NSZ-FAKE16-NEXT: v_pk_add_f16 v0, v0, v4
-; GFX11-NSZ-FAKE16-NEXT: s_setpc_b64 s[30:31]
+; GFX11-TRUE16-LABEL: add_select_fabs_neg_v2f16:
+; GFX11-TRUE16: ; %bb.0:
+; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-TRUE16-NEXT: v_and_b32_e32 v0, 0x7fff7fff, v2
+; GFX11-TRUE16-NEXT: v_xor_b32_e32 v2, 0x80008000, v3
+; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v1
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.l, v2.l, v0.l, vcc_lo
+; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.h, v2.h, v0.h, s0
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-TRUE16-NEXT: v_pk_add_f16 v0, v0, v4
+; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: add_select_fabs_neg_v2f16:
+; GFX11-FAKE16: ; %bb.0:
+; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT: v_xor_b32_e32 v3, 0x80008000, v3
+; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(VALU_DEP_1)
+; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v6, 16, v3
+; GFX11-FAKE16-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v2
+; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v5, 16, v2
+; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc_lo
+; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v1, v6, v5, vcc_lo
+; GFX11-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
+; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-FAKE16-NEXT: v_pk_add_f16 v0, v0, v4
+; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
%cmp = icmp eq <2 x i32> %c, zeroinitializer
%fabs.x = call <2 x half> @llvm.fabs.v2f16(<2 x half> %x)
%fneg.y = fneg <2 x half> %y
@@ -3501,63 +2635,34 @@ define <2 x half> @add_select_neg_negfabs_v2f16(<2 x i32> %c, <2 x half> %x, <2
; GFX9-NEXT: v_pk_add_f16 v0, v4, v0 neg_lo:[0,1] neg_hi:[0,1]
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
-; GFX11-SAFE-TRUE16-LABEL: add_select_neg_negfabs_v2f16:
-; GFX11-SAFE-TRUE16: ; %bb.0:
-; GFX11-SAFE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-SAFE-TRUE16-NEXT: v_and_b32_e32 v1, 0x7fff7fff, v3
-; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v0
-; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.h, v1.h, v2.h, vcc_lo
-; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.l, v1.l, v2.l, s0
-; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-SAFE-TRUE16-NEXT: v_pk_add_f16 v0, v4, v0 neg_lo:[0,1] neg_hi:[0,1]
-; GFX11-SAFE-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-SAFE-FAKE16-LABEL: add_select_neg_negfabs_v2f16:
-; GFX11-SAFE-FAKE16: ; %bb.0:
-; GFX11-SAFE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-FAKE16-NEXT: v_and_b32_e32 v3, 0x7fff7fff, v3
-; GFX11-SAFE-FAKE16-NEXT: v_lshrrev_b32_e32 v5, 16, v2
-; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-SAFE-FAKE16-NEXT: v_lshrrev_b32_e32 v6, 16, v3
-; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v1, v6, v5, vcc_lo
-; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc_lo
-; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-SAFE-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
-; GFX11-SAFE-FAKE16-NEXT: v_pk_add_f16 v0, v4, v0 neg_lo:[0,1] neg_hi:[0,1]
-; GFX11-SAFE-FAKE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-TRUE16-LABEL: add_select_neg_negfabs_v2f16:
-; GFX11-NSZ-TRUE16: ; %bb.0:
-; GFX11-NSZ-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-NSZ-TRUE16-NEXT: v_and_b32_e32 v1, 0x7fff7fff, v3
-; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v0
-; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.h, v1.h, v2.h, vcc_lo
-; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.l, v1.l, v2.l, s0
-; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NSZ-TRUE16-NEXT: v_pk_add_f16 v0, v4, v0 neg_lo:[0,1] neg_hi:[0,1]
-; GFX11-NSZ-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-FAKE16-LABEL: add_select_neg_negfabs_v2f16:
-; GFX11-NSZ-FAKE16: ; %bb.0:
-; GFX11-NSZ-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-FAKE16-NEXT: v_and_b32_e32 v3, 0x7fff7fff, v3
-; GFX11-NSZ-FAKE16-NEXT: v_lshrrev_b32_e32 v5, 16, v2
-; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NSZ-FAKE16-NEXT: v_lshrrev_b32_e32 v6, 16, v3
-; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v1, v6, v5, vcc_lo
-; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc_lo
-; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NSZ-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
-; GFX11-NSZ-FAKE16-NEXT: v_pk_add_f16 v0, v4, v0 neg_lo:[0,1] neg_hi:[0,1]
-; GFX11-NSZ-FAKE16-NEXT: s_setpc_b64 s[30:31]
+; GFX11-TRUE16-LABEL: add_select_neg_negfabs_v2f16:
+; GFX11-TRUE16: ; %bb.0:
+; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-TRUE16-NEXT: v_and_b32_e32 v1, 0x7fff7fff, v3
+; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v0
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.h, v1.h, v2.h, vcc_lo
+; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.l, v1.l, v2.l, s0
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-TRUE16-NEXT: v_pk_add_f16 v0, v4, v0 neg_lo:[0,1] neg_hi:[0,1]
+; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: add_select_neg_negfabs_v2f16:
+; GFX11-FAKE16: ; %bb.0:
+; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT: v_and_b32_e32 v3, 0x7fff7fff, v3
+; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v5, 16, v2
+; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v6, 16, v3
+; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v1, v6, v5, vcc_lo
+; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v0, v3, v2, vcc_lo
+; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
+; GFX11-FAKE16-NEXT: v_pk_add_f16 v0, v4, v0 neg_lo:[0,1] neg_hi:[0,1]
+; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
%cmp = icmp eq <2 x i32> %c, zeroinitializer
%fneg.x = fneg <2 x half> %x
%fabs.y = call <2 x half> @llvm.fabs.v2f16(<2 x half> %y)
@@ -3617,63 +2722,34 @@ define <2 x half> @add_select_negfabs_neg_v2f16(<2 x i32> %c, <2 x half> %x, <2
; GFX9-NEXT: v_pk_add_f16 v0, v4, v0 neg_lo:[0,1] neg_hi:[0,1]
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
-; GFX11-SAFE-TRUE16-LABEL: add_select_negfabs_neg_v2f16:
-; GFX11-SAFE-TRUE16: ; %bb.0:
-; GFX11-SAFE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-SAFE-TRUE16-NEXT: v_and_b32_e32 v1, 0x7fff7fff, v2
-; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v0
-; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.h, v1.h, v3.h, vcc_lo
-; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.l, v1.l, v3.l, s0
-; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-SAFE-TRUE16-NEXT: v_pk_add_f16 v0, v4, v0 neg_lo:[0,1] neg_hi:[0,1]
-; GFX11-SAFE-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-SAFE-FAKE16-LABEL: add_select_negfabs_neg_v2f16:
-; GFX11-SAFE-FAKE16: ; %bb.0:
-; GFX11-SAFE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-FAKE16-NEXT: v_lshrrev_b32_e32 v5, 16, v3
-; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-SAFE-FAKE16-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v2
-; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-SAFE-FAKE16-NEXT: v_lshrrev_b32_e32 v6, 16, v2
-; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v1, v6, v5, vcc_lo
-; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v0, v2, v3, vcc_lo
-; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-SAFE-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
-; GFX11-SAFE-FAKE16-NEXT: v_pk_add_f16 v0, v4, v0 neg_lo:[0,1] neg_hi:[0,1]
-; GFX11-SAFE-FAKE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-TRUE16-LABEL: add_select_negfabs_neg_v2f16:
-; GFX11-NSZ-TRUE16: ; %bb.0:
-; GFX11-NSZ-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-NSZ-TRUE16-NEXT: v_and_b32_e32 v1, 0x7fff7fff, v2
-; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v0
-; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.h, v1.h, v3.h, vcc_lo
-; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.l, v1.l, v3.l, s0
-; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NSZ-TRUE16-NEXT: v_pk_add_f16 v0, v4, v0 neg_lo:[0,1] neg_hi:[0,1]
-; GFX11-NSZ-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-FAKE16-LABEL: add_select_negfabs_neg_v2f16:
-; GFX11-NSZ-FAKE16: ; %bb.0:
-; GFX11-NSZ-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-FAKE16-NEXT: v_lshrrev_b32_e32 v5, 16, v3
-; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-NSZ-FAKE16-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v2
-; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NSZ-FAKE16-NEXT: v_lshrrev_b32_e32 v6, 16, v2
-; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v1, v6, v5, vcc_lo
-; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v0, v2, v3, vcc_lo
-; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NSZ-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
-; GFX11-NSZ-FAKE16-NEXT: v_pk_add_f16 v0, v4, v0 neg_lo:[0,1] neg_hi:[0,1]
-; GFX11-NSZ-FAKE16-NEXT: s_setpc_b64 s[30:31]
+; GFX11-TRUE16-LABEL: add_select_negfabs_neg_v2f16:
+; GFX11-TRUE16: ; %bb.0:
+; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-TRUE16-NEXT: v_and_b32_e32 v1, 0x7fff7fff, v2
+; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v0
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.h, v1.h, v3.h, vcc_lo
+; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.l, v1.l, v3.l, s0
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-TRUE16-NEXT: v_pk_add_f16 v0, v4, v0 neg_lo:[0,1] neg_hi:[0,1]
+; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: add_select_negfabs_neg_v2f16:
+; GFX11-FAKE16: ; %bb.0:
+; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v5, 16, v3
+; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-FAKE16-NEXT: v_and_b32_e32 v2, 0x7fff7fff, v2
+; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v6, 16, v2
+; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v1, v6, v5, vcc_lo
+; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v0, v2, v3, vcc_lo
+; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
+; GFX11-FAKE16-NEXT: v_pk_add_f16 v0, v4, v0 neg_lo:[0,1] neg_hi:[0,1]
+; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
%cmp = icmp eq <2 x i32> %c, zeroinitializer
%fabs.x = call <2 x half> @llvm.fabs.v2f16(<2 x half> %x)
%fneg.fabs.x = fneg <2 x half> %fabs.x
@@ -3735,61 +2811,33 @@ define <2 x half> @mul_select_negfabs_posk_v2f16(<2 x i32> %c, <2 x half> %x, <2
; GFX9-NEXT: v_pk_mul_f16 v0, v0, v3
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
-; GFX11-SAFE-TRUE16-LABEL: mul_select_negfabs_posk_v2f16:
-; GFX11-SAFE-TRUE16: ; %bb.0:
-; GFX11-SAFE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-SAFE-TRUE16-NEXT: v_or_b32_e32 v0, 0x80008000, v2
-; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v1
-; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.l, 0x4400, v0.l, vcc_lo
-; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.h, 0x4400, v0.h, s0
-; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-SAFE-TRUE16-NEXT: v_pk_mul_f16 v0, v0, v3
-; GFX11-SAFE-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-SAFE-FAKE16-LABEL: mul_select_negfabs_posk_v2f16:
-; GFX11-SAFE-FAKE16: ; %bb.0:
-; GFX11-SAFE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-FAKE16-NEXT: v_or_b32_e32 v2, 0x80008000, v2
-; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3)
-; GFX11-SAFE-FAKE16-NEXT: v_lshrrev_b32_e32 v4, 16, v2
-; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0x4400, v2, vcc_lo
-; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0x4400, v4, vcc_lo
-; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-SAFE-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
-; GFX11-SAFE-FAKE16-NEXT: v_pk_mul_f16 v0, v0, v3
-; GFX11-SAFE-FAKE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-TRUE16-LABEL: mul_select_negfabs_posk_v2f16:
-; GFX11-NSZ-TRUE16: ; %bb.0:
-; GFX11-NSZ-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-NSZ-TRUE16-NEXT: v_or_b32_e32 v0, 0x80008000, v2
-; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v1
-; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.l, 0x4400, v0.l, vcc_lo
-; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.h, 0x4400, v0.h, s0
-; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NSZ-TRUE16-NEXT: v_pk_mul_f16 v0, v0, v3
-; GFX11-NSZ-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-FAKE16-LABEL: mul_select_negfabs_posk_v2f16:
-; GFX11-NSZ-FAKE16: ; %bb.0:
-; GFX11-NSZ-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-FAKE16-NEXT: v_or_b32_e32 v2, 0x80008000, v2
-; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3)
-; GFX11-NSZ-FAKE16-NEXT: v_lshrrev_b32_e32 v4, 16, v2
-; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0x4400, v2, vcc_lo
-; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0x4400, v4, vcc_lo
-; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NSZ-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
-; GFX11-NSZ-FAKE16-NEXT: v_pk_mul_f16 v0, v0, v3
-; GFX11-NSZ-FAKE16-NEXT: s_setpc_b64 s[30:31]
+; GFX11-TRUE16-LABEL: mul_select_negfabs_posk_v2f16:
+; GFX11-TRUE16: ; %bb.0:
+; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-TRUE16-NEXT: v_or_b32_e32 v0, 0x80008000, v2
+; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v1
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.l, 0x4400, v0.l, vcc_lo
+; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.h, 0x4400, v0.h, s0
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-TRUE16-NEXT: v_pk_mul_f16 v0, v0, v3
+; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: mul_select_negfabs_posk_v2f16:
+; GFX11-FAKE16: ; %bb.0:
+; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT: v_or_b32_e32 v2, 0x80008000, v2
+; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3)
+; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v4, 16, v2
+; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0x4400, v2, vcc_lo
+; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0x4400, v4, vcc_lo
+; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
+; GFX11-FAKE16-NEXT: v_pk_mul_f16 v0, v0, v3
+; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
%cmp = icmp eq <2 x i32> %c, zeroinitializer
%fabs.x = call <2 x half> @llvm.fabs.v2f16(<2 x half> %x)
%fneg.fabs.x = fneg <2 x half> %fabs.x
@@ -3850,61 +2898,33 @@ define <2 x half> @mul_select_posk_negfabs_v2f16(<2 x i32> %c, <2 x half> %x, <2
; GFX9-NEXT: v_pk_mul_f16 v0, v0, v3
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
-; GFX11-SAFE-TRUE16-LABEL: mul_select_posk_negfabs_v2f16:
-; GFX11-SAFE-TRUE16: ; %bb.0:
-; GFX11-SAFE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-TRUE16-NEXT: v_or_b32_e32 v2, 0x80008000, v2
-; GFX11-SAFE-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0
-; GFX11-SAFE-TRUE16-NEXT: v_cmp_ne_u32_e64 s0, 0, v1
-; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.l, 0x4400, v2.l, vcc_lo
-; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.h, 0x4400, v2.h, s0
-; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-SAFE-TRUE16-NEXT: v_pk_mul_f16 v0, v0, v3
-; GFX11-SAFE-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-SAFE-FAKE16-LABEL: mul_select_posk_negfabs_v2f16:
-; GFX11-SAFE-FAKE16: ; %bb.0:
-; GFX11-SAFE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-FAKE16-NEXT: v_or_b32_e32 v2, 0x80008000, v2
-; GFX11-SAFE-FAKE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0
-; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3)
-; GFX11-SAFE-FAKE16-NEXT: v_lshrrev_b32_e32 v4, 16, v2
-; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0x4400, v2, vcc_lo
-; GFX11-SAFE-FAKE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v1
-; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0x4400, v4, vcc_lo
-; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-SAFE-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
-; GFX11-SAFE-FAKE16-NEXT: v_pk_mul_f16 v0, v0, v3
-; GFX11-SAFE-FAKE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-TRUE16-LABEL: mul_select_posk_negfabs_v2f16:
-; GFX11-NSZ-TRUE16: ; %bb.0:
-; GFX11-NSZ-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-TRUE16-NEXT: v_or_b32_e32 v2, 0x80008000, v2
-; GFX11-NSZ-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0
-; GFX11-NSZ-TRUE16-NEXT: v_cmp_ne_u32_e64 s0, 0, v1
-; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.l, 0x4400, v2.l, vcc_lo
-; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.h, 0x4400, v2.h, s0
-; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NSZ-TRUE16-NEXT: v_pk_mul_f16 v0, v0, v3
-; GFX11-NSZ-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-FAKE16-LABEL: mul_select_posk_negfabs_v2f16:
-; GFX11-NSZ-FAKE16: ; %bb.0:
-; GFX11-NSZ-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-FAKE16-NEXT: v_or_b32_e32 v2, 0x80008000, v2
-; GFX11-NSZ-FAKE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0
-; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3)
-; GFX11-NSZ-FAKE16-NEXT: v_lshrrev_b32_e32 v4, 16, v2
-; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0x4400, v2, vcc_lo
-; GFX11-NSZ-FAKE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v1
-; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0x4400, v4, vcc_lo
-; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NSZ-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
-; GFX11-NSZ-FAKE16-NEXT: v_pk_mul_f16 v0, v0, v3
-; GFX11-NSZ-FAKE16-NEXT: s_setpc_b64 s[30:31]
+; GFX11-TRUE16-LABEL: mul_select_posk_negfabs_v2f16:
+; GFX11-TRUE16: ; %bb.0:
+; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT: v_or_b32_e32 v2, 0x80008000, v2
+; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0
+; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e64 s0, 0, v1
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.l, 0x4400, v2.l, vcc_lo
+; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.h, 0x4400, v2.h, s0
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-TRUE16-NEXT: v_pk_mul_f16 v0, v0, v3
+; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: mul_select_posk_negfabs_v2f16:
+; GFX11-FAKE16: ; %bb.0:
+; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT: v_or_b32_e32 v2, 0x80008000, v2
+; GFX11-FAKE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0
+; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3)
+; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v4, 16, v2
+; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0x4400, v2, vcc_lo
+; GFX11-FAKE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v1
+; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0x4400, v4, vcc_lo
+; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
+; GFX11-FAKE16-NEXT: v_pk_mul_f16 v0, v0, v3
+; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
%cmp = icmp eq <2 x i32> %c, zeroinitializer
%fabs.x = call <2 x half> @llvm.fabs.v2f16(<2 x half> %x)
%fneg.fabs.x = fneg <2 x half> %fabs.x
@@ -3965,61 +2985,33 @@ define <2 x half> @mul_select_negfabs_negk_v2f16(<2 x i32> %c, <2 x half> %x, <2
; GFX9-NEXT: v_pk_mul_f16 v0, v0, v3
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
-; GFX11-SAFE-TRUE16-LABEL: mul_select_negfabs_negk_v2f16:
-; GFX11-SAFE-TRUE16: ; %bb.0:
-; GFX11-SAFE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-SAFE-TRUE16-NEXT: v_or_b32_e32 v0, 0x80008000, v2
-; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v1
-; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.l, 0xc400, v0.l, vcc_lo
-; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.h, 0xc400, v0.h, s0
-; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-SAFE-TRUE16-NEXT: v_pk_mul_f16 v0, v0, v3
-; GFX11-SAFE-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-SAFE-FAKE16-LABEL: mul_select_negfabs_negk_v2f16:
-; GFX11-SAFE-FAKE16: ; %bb.0:
-; GFX11-SAFE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-FAKE16-NEXT: v_or_b32_e32 v2, 0x80008000, v2
-; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3)
-; GFX11-SAFE-FAKE16-NEXT: v_lshrrev_b32_e32 v4, 16, v2
-; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0xc400, v2, vcc_lo
-; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0xc400, v4, vcc_lo
-; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-SAFE-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
-; GFX11-SAFE-FAKE16-NEXT: v_pk_mul_f16 v0, v0, v3
-; GFX11-SAFE-FAKE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-TRUE16-LABEL: mul_select_negfabs_negk_v2f16:
-; GFX11-NSZ-TRUE16: ; %bb.0:
-; GFX11-NSZ-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-NSZ-TRUE16-NEXT: v_or_b32_e32 v0, 0x80008000, v2
-; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v1
-; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.l, 0xc400, v0.l, vcc_lo
-; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.h, 0xc400, v0.h, s0
-; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NSZ-TRUE16-NEXT: v_pk_mul_f16 v0, v0, v3
-; GFX11-NSZ-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-FAKE16-LABEL: mul_select_negfabs_negk_v2f16:
-; GFX11-NSZ-FAKE16: ; %bb.0:
-; GFX11-NSZ-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-FAKE16-NEXT: v_or_b32_e32 v2, 0x80008000, v2
-; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3)
-; GFX11-NSZ-FAKE16-NEXT: v_lshrrev_b32_e32 v4, 16, v2
-; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0xc400, v2, vcc_lo
-; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0xc400, v4, vcc_lo
-; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NSZ-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
-; GFX11-NSZ-FAKE16-NEXT: v_pk_mul_f16 v0, v0, v3
-; GFX11-NSZ-FAKE16-NEXT: s_setpc_b64 s[30:31]
+; GFX11-TRUE16-LABEL: mul_select_negfabs_negk_v2f16:
+; GFX11-TRUE16: ; %bb.0:
+; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-TRUE16-NEXT: v_or_b32_e32 v0, 0x80008000, v2
+; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v1
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.l, 0xc400, v0.l, vcc_lo
+; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.h, 0xc400, v0.h, s0
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-TRUE16-NEXT: v_pk_mul_f16 v0, v0, v3
+; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: mul_select_negfabs_negk_v2f16:
+; GFX11-FAKE16: ; %bb.0:
+; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT: v_or_b32_e32 v2, 0x80008000, v2
+; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3)
+; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v4, 16, v2
+; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0xc400, v2, vcc_lo
+; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0xc400, v4, vcc_lo
+; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
+; GFX11-FAKE16-NEXT: v_pk_mul_f16 v0, v0, v3
+; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
%cmp = icmp eq <2 x i32> %c, zeroinitializer
%fabs.x = call <2 x half> @llvm.fabs.v2f16(<2 x half> %x)
%fneg.fabs.x = fneg <2 x half> %fabs.x
@@ -4080,61 +3072,33 @@ define <2 x half> @mul_select_negk_negfabs_v2f16(<2 x i32> %c, <2 x half> %x, <2
; GFX9-NEXT: v_pk_mul_f16 v0, v0, v3
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
-; GFX11-SAFE-TRUE16-LABEL: mul_select_negk_negfabs_v2f16:
-; GFX11-SAFE-TRUE16: ; %bb.0:
-; GFX11-SAFE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-TRUE16-NEXT: v_or_b32_e32 v2, 0x80008000, v2
-; GFX11-SAFE-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0
-; GFX11-SAFE-TRUE16-NEXT: v_cmp_ne_u32_e64 s0, 0, v1
-; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.l, 0xc400, v2.l, vcc_lo
-; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.h, 0xc400, v2.h, s0
-; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-SAFE-TRUE16-NEXT: v_pk_mul_f16 v0, v0, v3
-; GFX11-SAFE-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-SAFE-FAKE16-LABEL: mul_select_negk_negfabs_v2f16:
-; GFX11-SAFE-FAKE16: ; %bb.0:
-; GFX11-SAFE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-FAKE16-NEXT: v_or_b32_e32 v2, 0x80008000, v2
-; GFX11-SAFE-FAKE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0
-; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3)
-; GFX11-SAFE-FAKE16-NEXT: v_lshrrev_b32_e32 v4, 16, v2
-; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0xc400, v2, vcc_lo
-; GFX11-SAFE-FAKE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v1
-; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0xc400, v4, vcc_lo
-; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-SAFE-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
-; GFX11-SAFE-FAKE16-NEXT: v_pk_mul_f16 v0, v0, v3
-; GFX11-SAFE-FAKE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-TRUE16-LABEL: mul_select_negk_negfabs_v2f16:
-; GFX11-NSZ-TRUE16: ; %bb.0:
-; GFX11-NSZ-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-TRUE16-NEXT: v_or_b32_e32 v2, 0x80008000, v2
-; GFX11-NSZ-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0
-; GFX11-NSZ-TRUE16-NEXT: v_cmp_ne_u32_e64 s0, 0, v1
-; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.l, 0xc400, v2.l, vcc_lo
-; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.h, 0xc400, v2.h, s0
-; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NSZ-TRUE16-NEXT: v_pk_mul_f16 v0, v0, v3
-; GFX11-NSZ-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-FAKE16-LABEL: mul_select_negk_negfabs_v2f16:
-; GFX11-NSZ-FAKE16: ; %bb.0:
-; GFX11-NSZ-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-FAKE16-NEXT: v_or_b32_e32 v2, 0x80008000, v2
-; GFX11-NSZ-FAKE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0
-; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3)
-; GFX11-NSZ-FAKE16-NEXT: v_lshrrev_b32_e32 v4, 16, v2
-; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0xc400, v2, vcc_lo
-; GFX11-NSZ-FAKE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v1
-; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0xc400, v4, vcc_lo
-; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NSZ-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
-; GFX11-NSZ-FAKE16-NEXT: v_pk_mul_f16 v0, v0, v3
-; GFX11-NSZ-FAKE16-NEXT: s_setpc_b64 s[30:31]
+; GFX11-TRUE16-LABEL: mul_select_negk_negfabs_v2f16:
+; GFX11-TRUE16: ; %bb.0:
+; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT: v_or_b32_e32 v2, 0x80008000, v2
+; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0
+; GFX11-TRUE16-NEXT: v_cmp_ne_u32_e64 s0, 0, v1
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.l, 0xc400, v2.l, vcc_lo
+; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.h, 0xc400, v2.h, s0
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-TRUE16-NEXT: v_pk_mul_f16 v0, v0, v3
+; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: mul_select_negk_negfabs_v2f16:
+; GFX11-FAKE16: ; %bb.0:
+; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT: v_or_b32_e32 v2, 0x80008000, v2
+; GFX11-FAKE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0
+; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3)
+; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v4, 16, v2
+; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0xc400, v2, vcc_lo
+; GFX11-FAKE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v1
+; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0xc400, v4, vcc_lo
+; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
+; GFX11-FAKE16-NEXT: v_pk_mul_f16 v0, v0, v3
+; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
%cmp = icmp eq <2 x i32> %c, zeroinitializer
%fabs.x = call <2 x half> @llvm.fabs.v2f16(<2 x half> %x)
%fneg.fabs.x = fneg <2 x half> %fabs.x
@@ -4171,115 +3135,63 @@ define <2 x half> @select_fneg_posk_src_add_v2f16(<2 x i32> %c, <2 x half> %x, <
; CI-NEXT: v_cndmask_b32_e32 v1, 2.0, v2, vcc
; CI-NEXT: s_setpc_b64 s[30:31]
;
-; VI-SAFE-LABEL: select_fneg_posk_src_add_v2f16:
-; VI-SAFE: ; %bb.0:
-; VI-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; VI-SAFE-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
-; VI-SAFE-NEXT: v_mov_b32_e32 v1, 0x4400
-; VI-SAFE-NEXT: v_add_f16_sdwa v1, v2, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
-; VI-SAFE-NEXT: v_add_f16_e32 v2, 4.0, v2
-; VI-SAFE-NEXT: v_or_b32_e32 v1, v2, v1
-; VI-SAFE-NEXT: v_xor_b32_e32 v1, 0x80008000, v1
-; VI-SAFE-NEXT: v_mov_b32_e32 v2, 0x4000
-; VI-SAFE-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v0
-; VI-SAFE-NEXT: v_cndmask_b32_e64 v0, v2, v1, s[4:5]
-; VI-SAFE-NEXT: v_cndmask_b32_sdwa v1, v2, v1, vcc dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
-; VI-SAFE-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
-; VI-SAFE-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX9-SAFE-LABEL: select_fneg_posk_src_add_v2f16:
-; GFX9-SAFE: ; %bb.0:
-; GFX9-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-SAFE-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
-; GFX9-SAFE-NEXT: v_pk_add_f16 v1, v2, 4.0 op_sel_hi:[1,0]
-; GFX9-SAFE-NEXT: v_xor_b32_e32 v1, 0x80008000, v1
-; GFX9-SAFE-NEXT: v_mov_b32_e32 v2, 0x4000
-; GFX9-SAFE-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v0
-; GFX9-SAFE-NEXT: v_cndmask_b32_e64 v0, v2, v1, s[4:5]
-; GFX9-SAFE-NEXT: v_cndmask_b32_sdwa v1, v2, v1, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
-; GFX9-SAFE-NEXT: s_mov_b32 s4, 0x5040100
-; GFX9-SAFE-NEXT: v_perm_b32 v0, v1, v0, s4
-; GFX9-SAFE-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-SAFE-TRUE16-LABEL: select_fneg_posk_src_add_v2f16:
-; GFX11-SAFE-TRUE16: ; %bb.0:
-; GFX11-SAFE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-TRUE16-NEXT: v_pk_add_f16 v2, v2, 4.0 op_sel_hi:[1,0]
-; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v1
-; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-SAFE-TRUE16-NEXT: v_xor_b32_e32 v0, 0x80008000, v2
-; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.l, 0x4000, v0.l, vcc_lo
-; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2)
-; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.h, 0x4000, v0.h, s0
-; GFX11-SAFE-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-SAFE-FAKE16-LABEL: select_fneg_posk_src_add_v2f16:
-; GFX11-SAFE-FAKE16: ; %bb.0:
-; GFX11-SAFE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-FAKE16-NEXT: v_pk_add_f16 v2, v2, 4.0 op_sel_hi:[1,0]
-; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-SAFE-FAKE16-NEXT: v_xor_b32_e32 v2, 0x80008000, v2
-; GFX11-SAFE-FAKE16-NEXT: v_lshrrev_b32_e32 v3, 16, v2
-; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0x4000, v2, vcc_lo
-; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0x4000, v3, vcc_lo
-; GFX11-SAFE-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
-; GFX11-SAFE-FAKE16-NEXT: s_setpc_b64 s[30:31]
-;
-; VI-NSZ-LABEL: select_fneg_posk_src_add_v2f16:
-; VI-NSZ: ; %bb.0:
-; VI-NSZ-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; VI-NSZ-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
-; VI-NSZ-NEXT: v_mov_b32_e32 v1, 0xc400
-; VI-NSZ-NEXT: v_sub_f16_sdwa v1, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
-; VI-NSZ-NEXT: v_sub_f16_e32 v2, -4.0, v2
-; VI-NSZ-NEXT: v_mov_b32_e32 v3, 0x4000
-; VI-NSZ-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v0
-; VI-NSZ-NEXT: v_cndmask_b32_e64 v0, v3, v2, s[4:5]
-; VI-NSZ-NEXT: v_cndmask_b32_sdwa v1, v3, v1, vcc dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
-; VI-NSZ-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
-; VI-NSZ-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX9-NSZ-LABEL: select_fneg_posk_src_add_v2f16:
-; GFX9-NSZ: ; %bb.0:
-; GFX9-NSZ-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NSZ-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
-; GFX9-NSZ-NEXT: v_pk_add_f16 v1, v2, -4.0 op_sel_hi:[1,0] neg_lo:[1,0] neg_hi:[1,0]
-; GFX9-NSZ-NEXT: v_mov_b32_e32 v2, 0x4000
-; GFX9-NSZ-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v0
-; GFX9-NSZ-NEXT: v_cndmask_b32_e64 v0, v2, v1, s[4:5]
-; GFX9-NSZ-NEXT: v_cndmask_b32_sdwa v1, v2, v1, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
-; GFX9-NSZ-NEXT: s_mov_b32 s4, 0x5040100
-; GFX9-NSZ-NEXT: v_perm_b32 v0, v1, v0, s4
-; GFX9-NSZ-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-TRUE16-LABEL: select_fneg_posk_src_add_v2f16:
-; GFX11-NSZ-TRUE16: ; %bb.0:
-; GFX11-NSZ-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-NSZ-TRUE16-NEXT: v_pk_add_f16 v0, v2, -4.0 op_sel_hi:[1,0] neg_lo:[1,0] neg_hi:[1,0]
-; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v1
-; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.l, 0x4000, v0.l, vcc_lo
-; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.h, 0x4000, v0.h, s0
-; GFX11-NSZ-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-FAKE16-LABEL: select_fneg_posk_src_add_v2f16:
-; GFX11-NSZ-FAKE16: ; %bb.0:
-; GFX11-NSZ-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-FAKE16-NEXT: v_pk_add_f16 v2, v2, -4.0 op_sel_hi:[1,0] neg_lo:[1,0] neg_hi:[1,0]
-; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3)
-; GFX11-NSZ-FAKE16-NEXT: v_lshrrev_b32_e32 v3, 16, v2
-; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0x4000, v2, vcc_lo
-; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0x4000, v3, vcc_lo
-; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NSZ-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
-; GFX11-NSZ-FAKE16-NEXT: s_setpc_b64 s[30:31]
+; VI-LABEL: select_fneg_posk_src_add_v2f16:
+; VI: ; %bb.0:
+; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
+; VI-NEXT: v_mov_b32_e32 v1, 0x4400
+; VI-NEXT: v_add_f16_sdwa v1, v2, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
+; VI-NEXT: v_add_f16_e32 v2, 4.0, v2
+; VI-NEXT: v_or_b32_e32 v1, v2, v1
+; VI-NEXT: v_xor_b32_e32 v1, 0x80008000, v1
+; VI-NEXT: v_mov_b32_e32 v2, 0x4000
+; VI-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v0
+; VI-NEXT: v_cndmask_b32_e64 v0, v2, v1, s[4:5]
+; VI-NEXT: v_cndmask_b32_sdwa v1, v2, v1, vcc dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
+; VI-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
+; VI-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-LABEL: select_fneg_posk_src_add_v2f16:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
+; GFX9-NEXT: v_pk_add_f16 v1, v2, 4.0 op_sel_hi:[1,0]
+; GFX9-NEXT: v_xor_b32_e32 v1, 0x80008000, v1
+; GFX9-NEXT: v_mov_b32_e32 v2, 0x4000
+; GFX9-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v0
+; GFX9-NEXT: v_cndmask_b32_e64 v0, v2, v1, s[4:5]
+; GFX9-NEXT: v_cndmask_b32_sdwa v1, v2, v1, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
+; GFX9-NEXT: s_mov_b32 s4, 0x5040100
+; GFX9-NEXT: v_perm_b32 v0, v1, v0, s4
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-TRUE16-LABEL: select_fneg_posk_src_add_v2f16:
+; GFX11-TRUE16: ; %bb.0:
+; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT: v_pk_add_f16 v2, v2, 4.0 op_sel_hi:[1,0]
+; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v1
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-TRUE16-NEXT: v_xor_b32_e32 v0, 0x80008000, v2
+; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.l, 0x4000, v0.l, vcc_lo
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2)
+; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.h, 0x4000, v0.h, s0
+; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: select_fneg_posk_src_add_v2f16:
+; GFX11-FAKE16: ; %bb.0:
+; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT: v_pk_add_f16 v2, v2, 4.0 op_sel_hi:[1,0]
+; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-FAKE16-NEXT: v_xor_b32_e32 v2, 0x80008000, v2
+; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v3, 16, v2
+; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0x4000, v2, vcc_lo
+; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0x4000, v3, vcc_lo
+; GFX11-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
+; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
%cmp = icmp eq <2 x i32> %c, zeroinitializer
%add = fadd <2 x half> %x, <half 4.0, half 4.0>
%fneg = fneg <2 x half> %add
@@ -4330,55 +3242,30 @@ define <2 x half> @select_fneg_posk_src_add_v2f16_nsz(<2 x i32> %c, <2 x half> %
; GFX9-NEXT: v_perm_b32 v0, v1, v0, s4
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
-; GFX11-SAFE-TRUE16-LABEL: select_fneg_posk_src_add_v2f16_nsz:
-; GFX11-SAFE-TRUE16: ; %bb.0:
-; GFX11-SAFE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-SAFE-TRUE16-NEXT: v_pk_add_f16 v0, v2, -4.0 op_sel_hi:[1,0] neg_lo:[1,0] neg_hi:[1,0]
-; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v1
-; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.l, 0x4000, v0.l, vcc_lo
-; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.h, 0x4000, v0.h, s0
-; GFX11-SAFE-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-SAFE-FAKE16-LABEL: select_fneg_posk_src_add_v2f16_nsz:
-; GFX11-SAFE-FAKE16: ; %bb.0:
-; GFX11-SAFE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-FAKE16-NEXT: v_pk_add_f16 v2, v2, -4.0 op_sel_hi:[1,0] neg_lo:[1,0] neg_hi:[1,0]
-; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3)
-; GFX11-SAFE-FAKE16-NEXT: v_lshrrev_b32_e32 v3, 16, v2
-; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0x4000, v2, vcc_lo
-; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0x4000, v3, vcc_lo
-; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-SAFE-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
-; GFX11-SAFE-FAKE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-TRUE16-LABEL: select_fneg_posk_src_add_v2f16_nsz:
-; GFX11-NSZ-TRUE16: ; %bb.0:
-; GFX11-NSZ-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-NSZ-TRUE16-NEXT: v_pk_add_f16 v0, v2, -4.0 op_sel_hi:[1,0] neg_lo:[1,0] neg_hi:[1,0]
-; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v1
-; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.l, 0x4000, v0.l, vcc_lo
-; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.h, 0x4000, v0.h, s0
-; GFX11-NSZ-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-FAKE16-LABEL: select_fneg_posk_src_add_v2f16_nsz:
-; GFX11-NSZ-FAKE16: ; %bb.0:
-; GFX11-NSZ-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-FAKE16-NEXT: v_pk_add_f16 v2, v2, -4.0 op_sel_hi:[1,0] neg_lo:[1,0] neg_hi:[1,0]
-; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3)
-; GFX11-NSZ-FAKE16-NEXT: v_lshrrev_b32_e32 v3, 16, v2
-; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0x4000, v2, vcc_lo
-; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0x4000, v3, vcc_lo
-; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NSZ-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
-; GFX11-NSZ-FAKE16-NEXT: s_setpc_b64 s[30:31]
+; GFX11-TRUE16-LABEL: select_fneg_posk_src_add_v2f16_nsz:
+; GFX11-TRUE16: ; %bb.0:
+; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-TRUE16-NEXT: v_pk_add_f16 v0, v2, -4.0 op_sel_hi:[1,0] neg_lo:[1,0] neg_hi:[1,0]
+; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v1
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.l, 0x4000, v0.l, vcc_lo
+; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.h, 0x4000, v0.h, s0
+; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: select_fneg_posk_src_add_v2f16_nsz:
+; GFX11-FAKE16: ; %bb.0:
+; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT: v_pk_add_f16 v2, v2, -4.0 op_sel_hi:[1,0] neg_lo:[1,0] neg_hi:[1,0]
+; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3)
+; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v3, 16, v2
+; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0x4000, v2, vcc_lo
+; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0x4000, v3, vcc_lo
+; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
+; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
%cmp = icmp eq <2 x i32> %c, zeroinitializer
%add = fadd nsz <2 x half> %x, <half 4.0, half 4.0>
%fneg = fneg <2 x half> %add
@@ -4387,153 +3274,86 @@ define <2 x half> @select_fneg_posk_src_add_v2f16_nsz(<2 x i32> %c, <2 x half> %
}
define <2 x half> @select_fneg_posk_src_sub_v2f16(<2 x i32> %c, <2 x half> %x) {
-; CI-SAFE-LABEL: select_fneg_posk_src_sub_v2f16:
-; CI-SAFE: ; %bb.0:
-; CI-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; CI-SAFE-NEXT: v_cvt_f16_f32_e32 v3, v3
-; CI-SAFE-NEXT: v_cvt_f16_f32_e32 v2, v2
-; CI-SAFE-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
-; CI-SAFE-NEXT: v_cvt_f32_f16_e32 v3, v3
-; CI-SAFE-NEXT: v_cvt_f32_f16_e32 v2, v2
-; CI-SAFE-NEXT: v_add_f32_e32 v3, -4.0, v3
-; CI-SAFE-NEXT: v_add_f32_e32 v2, -4.0, v2
-; CI-SAFE-NEXT: v_cvt_f16_f32_e32 v3, v3
-; CI-SAFE-NEXT: v_cvt_f16_f32_e32 v2, v2
-; CI-SAFE-NEXT: v_lshlrev_b32_e32 v3, 16, v3
-; CI-SAFE-NEXT: v_or_b32_e32 v2, v2, v3
-; CI-SAFE-NEXT: v_xor_b32_e32 v2, 0x80008000, v2
-; CI-SAFE-NEXT: v_cvt_f32_f16_e32 v3, v2
-; CI-SAFE-NEXT: v_lshrrev_b32_e32 v2, 16, v2
-; CI-SAFE-NEXT: v_cvt_f32_f16_e32 v2, v2
-; CI-SAFE-NEXT: v_cndmask_b32_e32 v0, 2.0, v3, vcc
-; CI-SAFE-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
-; CI-SAFE-NEXT: v_cndmask_b32_e32 v1, 2.0, v2, vcc
-; CI-SAFE-NEXT: s_setpc_b64 s[30:31]
-;
-; VI-SAFE-LABEL: select_fneg_posk_src_sub_v2f16:
-; VI-SAFE: ; %bb.0:
-; VI-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; VI-SAFE-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
-; VI-SAFE-NEXT: v_mov_b32_e32 v1, 0xc400
-; VI-SAFE-NEXT: v_add_f16_sdwa v1, v2, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
-; VI-SAFE-NEXT: v_add_f16_e32 v2, -4.0, v2
-; VI-SAFE-NEXT: v_or_b32_e32 v1, v2, v1
-; VI-SAFE-NEXT: v_xor_b32_e32 v1, 0x80008000, v1
-; VI-SAFE-NEXT: v_mov_b32_e32 v2, 0x4000
-; VI-SAFE-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v0
-; VI-SAFE-NEXT: v_cndmask_b32_e64 v0, v2, v1, s[4:5]
-; VI-SAFE-NEXT: v_cndmask_b32_sdwa v1, v2, v1, vcc dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
-; VI-SAFE-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
-; VI-SAFE-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX9-SAFE-LABEL: select_fneg_posk_src_sub_v2f16:
-; GFX9-SAFE: ; %bb.0:
-; GFX9-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-SAFE-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
-; GFX9-SAFE-NEXT: v_pk_add_f16 v1, v2, -4.0 op_sel_hi:[1,0]
-; GFX9-SAFE-NEXT: v_xor_b32_e32 v1, 0x80008000, v1
-; GFX9-SAFE-NEXT: v_mov_b32_e32 v2, 0x4000
-; GFX9-SAFE-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v0
-; GFX9-SAFE-NEXT: v_cndmask_b32_e64 v0, v2, v1, s[4:5]
-; GFX9-SAFE-NEXT: v_cndmask_b32_sdwa v1, v2, v1, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
-; GFX9-SAFE-NEXT: s_mov_b32 s4, 0x5040100
-; GFX9-SAFE-NEXT: v_perm_b32 v0, v1, v0, s4
-; GFX9-SAFE-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-SAFE-TRUE16-LABEL: select_fneg_posk_src_sub_v2f16:
-; GFX11-SAFE-TRUE16: ; %bb.0:
-; GFX11-SAFE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-TRUE16-NEXT: v_pk_add_f16 v2, v2, -4.0 op_sel_hi:[1,0]
-; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v1
-; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-SAFE-TRUE16-NEXT: v_xor_b32_e32 v0, 0x80008000, v2
-; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.l, 0x4000, v0.l, vcc_lo
-; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2)
-; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.h, 0x4000, v0.h, s0
-; GFX11-SAFE-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-SAFE-FAKE16-LABEL: select_fneg_posk_src_sub_v2f16:
-; GFX11-SAFE-FAKE16: ; %bb.0:
-; GFX11-SAFE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-FAKE16-NEXT: v_pk_add_f16 v2, v2, -4.0 op_sel_hi:[1,0]
-; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-SAFE-FAKE16-NEXT: v_xor_b32_e32 v2, 0x80008000, v2
-; GFX11-SAFE-FAKE16-NEXT: v_lshrrev_b32_e32 v3, 16, v2
-; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0x4000, v2, vcc_lo
-; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0x4000, v3, vcc_lo
-; GFX11-SAFE-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
-; GFX11-SAFE-FAKE16-NEXT: s_setpc_b64 s[30:31]
-;
-; CI-NSZ-LABEL: select_fneg_posk_src_sub_v2f16:
-; CI-NSZ: ; %bb.0:
-; CI-NSZ-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; CI-NSZ-NEXT: v_cvt_f16_f32_e32 v2, v2
-; CI-NSZ-NEXT: v_cvt_f16_f32_e32 v3, v3
-; CI-NSZ-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
-; CI-NSZ-NEXT: v_cvt_f32_f16_e32 v2, v2
-; CI-NSZ-NEXT: v_cvt_f32_f16_e32 v3, v3
-; CI-NSZ-NEXT: v_sub_f32_e32 v2, 4.0, v2
-; CI-NSZ-NEXT: v_sub_f32_e32 v3, 4.0, v3
-; CI-NSZ-NEXT: v_cndmask_b32_e32 v0, 2.0, v2, vcc
-; CI-NSZ-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
-; CI-NSZ-NEXT: v_cndmask_b32_e32 v1, 2.0, v3, vcc
-; CI-NSZ-NEXT: s_setpc_b64 s[30:31]
-;
-; VI-NSZ-LABEL: select_fneg_posk_src_sub_v2f16:
-; VI-NSZ: ; %bb.0:
-; VI-NSZ-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; VI-NSZ-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
-; VI-NSZ-NEXT: v_mov_b32_e32 v1, 0x4400
-; VI-NSZ-NEXT: v_sub_f16_sdwa v1, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
-; VI-NSZ-NEXT: v_sub_f16_e32 v2, 4.0, v2
-; VI-NSZ-NEXT: v_mov_b32_e32 v3, 0x4000
-; VI-NSZ-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v0
-; VI-NSZ-NEXT: v_cndmask_b32_e64 v0, v3, v2, s[4:5]
-; VI-NSZ-NEXT: v_cndmask_b32_sdwa v1, v3, v1, vcc dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
-; VI-NSZ-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
-; VI-NSZ-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX9-NSZ-LABEL: select_fneg_posk_src_sub_v2f16:
-; GFX9-NSZ: ; %bb.0:
-; GFX9-NSZ-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NSZ-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
-; GFX9-NSZ-NEXT: v_pk_add_f16 v1, v2, 4.0 op_sel_hi:[1,0] neg_lo:[1,0] neg_hi:[1,0]
-; GFX9-NSZ-NEXT: v_mov_b32_e32 v2, 0x4000
-; GFX9-NSZ-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v0
-; GFX9-NSZ-NEXT: v_cndmask_b32_e64 v0, v2, v1, s[4:5]
-; GFX9-NSZ-NEXT: v_cndmask_b32_sdwa v1, v2, v1, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
-; GFX9-NSZ-NEXT: s_mov_b32 s4, 0x5040100
-; GFX9-NSZ-NEXT: v_perm_b32 v0, v1, v0, s4
-; GFX9-NSZ-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-TRUE16-LABEL: select_fneg_posk_src_sub_v2f16:
-; GFX11-NSZ-TRUE16: ; %bb.0:
-; GFX11-NSZ-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-NSZ-TRUE16-NEXT: v_pk_add_f16 v0, v2, 4.0 op_sel_hi:[1,0] neg_lo:[1,0] neg_hi:[1,0]
-; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v1
-; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.l, 0x4000, v0.l, vcc_lo
-; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.h, 0x4000, v0.h, s0
-; GFX11-NSZ-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-FAKE16-LABEL: select_fneg_posk_src_sub_v2f16:
-; GFX11-NSZ-FAKE16: ; %bb.0:
-; GFX11-NSZ-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-FAKE16-NEXT: v_pk_add_f16 v2, v2, 4.0 op_sel_hi:[1,0] neg_lo:[1,0] neg_hi:[1,0]
-; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3)
-; GFX11-NSZ-FAKE16-NEXT: v_lshrrev_b32_e32 v3, 16, v2
-; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0x4000, v2, vcc_lo
-; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0x4000, v3, vcc_lo
-; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NSZ-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
-; GFX11-NSZ-FAKE16-NEXT: s_setpc_b64 s[30:31]
+; CI-LABEL: select_fneg_posk_src_sub_v2f16:
+; CI: ; %bb.0:
+; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CI-NEXT: v_cvt_f16_f32_e32 v3, v3
+; CI-NEXT: v_cvt_f16_f32_e32 v2, v2
+; CI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
+; CI-NEXT: v_cvt_f32_f16_e32 v3, v3
+; CI-NEXT: v_cvt_f32_f16_e32 v2, v2
+; CI-NEXT: v_add_f32_e32 v3, -4.0, v3
+; CI-NEXT: v_add_f32_e32 v2, -4.0, v2
+; CI-NEXT: v_cvt_f16_f32_e32 v3, v3
+; CI-NEXT: v_cvt_f16_f32_e32 v2, v2
+; CI-NEXT: v_lshlrev_b32_e32 v3, 16, v3
+; CI-NEXT: v_or_b32_e32 v2, v2, v3
+; CI-NEXT: v_xor_b32_e32 v2, 0x80008000, v2
+; CI-NEXT: v_cvt_f32_f16_e32 v3, v2
+; CI-NEXT: v_lshrrev_b32_e32 v2, 16, v2
+; CI-NEXT: v_cvt_f32_f16_e32 v2, v2
+; CI-NEXT: v_cndmask_b32_e32 v0, 2.0, v3, vcc
+; CI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
+; CI-NEXT: v_cndmask_b32_e32 v1, 2.0, v2, vcc
+; CI-NEXT: s_setpc_b64 s[30:31]
+;
+; VI-LABEL: select_fneg_posk_src_sub_v2f16:
+; VI: ; %bb.0:
+; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
+; VI-NEXT: v_mov_b32_e32 v1, 0xc400
+; VI-NEXT: v_add_f16_sdwa v1, v2, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
+; VI-NEXT: v_add_f16_e32 v2, -4.0, v2
+; VI-NEXT: v_or_b32_e32 v1, v2, v1
+; VI-NEXT: v_xor_b32_e32 v1, 0x80008000, v1
+; VI-NEXT: v_mov_b32_e32 v2, 0x4000
+; VI-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v0
+; VI-NEXT: v_cndmask_b32_e64 v0, v2, v1, s[4:5]
+; VI-NEXT: v_cndmask_b32_sdwa v1, v2, v1, vcc dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
+; VI-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
+; VI-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-LABEL: select_fneg_posk_src_sub_v2f16:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
+; GFX9-NEXT: v_pk_add_f16 v1, v2, -4.0 op_sel_hi:[1,0]
+; GFX9-NEXT: v_xor_b32_e32 v1, 0x80008000, v1
+; GFX9-NEXT: v_mov_b32_e32 v2, 0x4000
+; GFX9-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v0
+; GFX9-NEXT: v_cndmask_b32_e64 v0, v2, v1, s[4:5]
+; GFX9-NEXT: v_cndmask_b32_sdwa v1, v2, v1, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
+; GFX9-NEXT: s_mov_b32 s4, 0x5040100
+; GFX9-NEXT: v_perm_b32 v0, v1, v0, s4
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-TRUE16-LABEL: select_fneg_posk_src_sub_v2f16:
+; GFX11-TRUE16: ; %bb.0:
+; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT: v_pk_add_f16 v2, v2, -4.0 op_sel_hi:[1,0]
+; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v1
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-TRUE16-NEXT: v_xor_b32_e32 v0, 0x80008000, v2
+; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.l, 0x4000, v0.l, vcc_lo
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2)
+; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.h, 0x4000, v0.h, s0
+; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: select_fneg_posk_src_sub_v2f16:
+; GFX11-FAKE16: ; %bb.0:
+; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT: v_pk_add_f16 v2, v2, -4.0 op_sel_hi:[1,0]
+; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-FAKE16-NEXT: v_xor_b32_e32 v2, 0x80008000, v2
+; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v3, 16, v2
+; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0x4000, v2, vcc_lo
+; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0x4000, v3, vcc_lo
+; GFX11-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
+; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
%cmp = icmp eq <2 x i32> %c, zeroinitializer
%add = fsub <2 x half> %x, <half 4.0, half 4.0>
%fneg = fneg <2 x half> %add
@@ -4541,6 +3361,80 @@ define <2 x half> @select_fneg_posk_src_sub_v2f16(<2 x i32> %c, <2 x half> %x) {
ret <2 x half> %select
}
+define <2 x half> @select_fneg_posk_src_sub_v2f16_nsz(<2 x i32> %c, <2 x half> %x) {
+; CI-LABEL: select_fneg_posk_src_sub_v2f16_nsz:
+; CI: ; %bb.0:
+; CI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CI-NEXT: v_cvt_f16_f32_e32 v2, v2
+; CI-NEXT: v_cvt_f16_f32_e32 v3, v3
+; CI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
+; CI-NEXT: v_cvt_f32_f16_e32 v2, v2
+; CI-NEXT: v_cvt_f32_f16_e32 v3, v3
+; CI-NEXT: v_sub_f32_e32 v2, 4.0, v2
+; CI-NEXT: v_sub_f32_e32 v3, 4.0, v3
+; CI-NEXT: v_cndmask_b32_e32 v0, 2.0, v2, vcc
+; CI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
+; CI-NEXT: v_cndmask_b32_e32 v1, 2.0, v3, vcc
+; CI-NEXT: s_setpc_b64 s[30:31]
+;
+; VI-LABEL: select_fneg_posk_src_sub_v2f16_nsz:
+; VI: ; %bb.0:
+; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
+; VI-NEXT: v_mov_b32_e32 v1, 0x4400
+; VI-NEXT: v_sub_f16_sdwa v1, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
+; VI-NEXT: v_sub_f16_e32 v2, 4.0, v2
+; VI-NEXT: v_mov_b32_e32 v3, 0x4000
+; VI-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v0
+; VI-NEXT: v_cndmask_b32_e64 v0, v3, v2, s[4:5]
+; VI-NEXT: v_cndmask_b32_sdwa v1, v3, v1, vcc dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; VI-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
+; VI-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-LABEL: select_fneg_posk_src_sub_v2f16_nsz:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
+; GFX9-NEXT: v_pk_add_f16 v1, v2, 4.0 op_sel_hi:[1,0] neg_lo:[1,0] neg_hi:[1,0]
+; GFX9-NEXT: v_mov_b32_e32 v2, 0x4000
+; GFX9-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v0
+; GFX9-NEXT: v_cndmask_b32_e64 v0, v2, v1, s[4:5]
+; GFX9-NEXT: v_cndmask_b32_sdwa v1, v2, v1, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
+; GFX9-NEXT: s_mov_b32 s4, 0x5040100
+; GFX9-NEXT: v_perm_b32 v0, v1, v0, s4
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-TRUE16-LABEL: select_fneg_posk_src_sub_v2f16_nsz:
+; GFX11-TRUE16: ; %bb.0:
+; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-TRUE16-NEXT: v_pk_add_f16 v0, v2, 4.0 op_sel_hi:[1,0] neg_lo:[1,0] neg_hi:[1,0]
+; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v1
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.l, 0x4000, v0.l, vcc_lo
+; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.h, 0x4000, v0.h, s0
+; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: select_fneg_posk_src_sub_v2f16_nsz:
+; GFX11-FAKE16: ; %bb.0:
+; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT: v_pk_add_f16 v2, v2, 4.0 op_sel_hi:[1,0] neg_lo:[1,0] neg_hi:[1,0]
+; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3)
+; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v3, 16, v2
+; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0x4000, v2, vcc_lo
+; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0x4000, v3, vcc_lo
+; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
+; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
+ %cmp = icmp eq <2 x i32> %c, zeroinitializer
+ %add = fsub <2 x half> %x, <half 4.0, half 4.0>
+ %fneg = fneg nsz <2 x half> %add
+ %select = select <2 x i1> %cmp, <2 x half> %fneg, <2 x half> <half 2.0, half 2.0>
+ ret <2 x half> %select
+}
+
define <2 x half> @select_fneg_posk_src_mul_v2f16(<2 x i32> %c, <2 x half> %x) {
; CI-LABEL: select_fneg_posk_src_mul_v2f16:
; CI: ; %bb.0:
@@ -4584,55 +3478,30 @@ define <2 x half> @select_fneg_posk_src_mul_v2f16(<2 x i32> %c, <2 x half> %x) {
; GFX9-NEXT: v_perm_b32 v0, v1, v0, s4
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
-; GFX11-SAFE-TRUE16-LABEL: select_fneg_posk_src_mul_v2f16:
-; GFX11-SAFE-TRUE16: ; %bb.0:
-; GFX11-SAFE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-SAFE-TRUE16-NEXT: v_pk_mul_f16 v0, v2, -4.0 op_sel_hi:[1,0]
-; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v1
-; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.l, 0x4000, v0.l, vcc_lo
-; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.h, 0x4000, v0.h, s0
-; GFX11-SAFE-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-SAFE-FAKE16-LABEL: select_fneg_posk_src_mul_v2f16:
-; GFX11-SAFE-FAKE16: ; %bb.0:
-; GFX11-SAFE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-FAKE16-NEXT: v_pk_mul_f16 v2, v2, -4.0 op_sel_hi:[1,0]
-; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3)
-; GFX11-SAFE-FAKE16-NEXT: v_lshrrev_b32_e32 v3, 16, v2
-; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0x4000, v2, vcc_lo
-; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0x4000, v3, vcc_lo
-; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-SAFE-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
-; GFX11-SAFE-FAKE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-TRUE16-LABEL: select_fneg_posk_src_mul_v2f16:
-; GFX11-NSZ-TRUE16: ; %bb.0:
-; GFX11-NSZ-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-NSZ-TRUE16-NEXT: v_pk_mul_f16 v0, v2, -4.0 op_sel_hi:[1,0]
-; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v1
-; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.l, 0x4000, v0.l, vcc_lo
-; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.h, 0x4000, v0.h, s0
-; GFX11-NSZ-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-FAKE16-LABEL: select_fneg_posk_src_mul_v2f16:
-; GFX11-NSZ-FAKE16: ; %bb.0:
-; GFX11-NSZ-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-FAKE16-NEXT: v_pk_mul_f16 v2, v2, -4.0 op_sel_hi:[1,0]
-; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3)
-; GFX11-NSZ-FAKE16-NEXT: v_lshrrev_b32_e32 v3, 16, v2
-; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0x4000, v2, vcc_lo
-; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0x4000, v3, vcc_lo
-; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NSZ-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
-; GFX11-NSZ-FAKE16-NEXT: s_setpc_b64 s[30:31]
+; GFX11-TRUE16-LABEL: select_fneg_posk_src_mul_v2f16:
+; GFX11-TRUE16: ; %bb.0:
+; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-TRUE16-NEXT: v_pk_mul_f16 v0, v2, -4.0 op_sel_hi:[1,0]
+; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v1
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.l, 0x4000, v0.l, vcc_lo
+; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.h, 0x4000, v0.h, s0
+; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: select_fneg_posk_src_mul_v2f16:
+; GFX11-FAKE16: ; %bb.0:
+; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT: v_pk_mul_f16 v2, v2, -4.0 op_sel_hi:[1,0]
+; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3)
+; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v3, 16, v2
+; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0x4000, v2, vcc_lo
+; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0x4000, v3, vcc_lo
+; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
+; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
%cmp = icmp eq <2 x i32> %c, zeroinitializer
%mul = fmul <2 x half> %x, <half 4.0, half 4.0>
%fneg = fneg <2 x half> %mul
@@ -4668,118 +3537,65 @@ define <2 x half> @select_fneg_posk_src_fma_v2f16(<2 x i32> %c, <2 x half> %x, <
; CI-NEXT: v_cndmask_b32_e32 v1, 2.0, v2, vcc
; CI-NEXT: s_setpc_b64 s[30:31]
;
-; VI-SAFE-LABEL: select_fneg_posk_src_fma_v2f16:
-; VI-SAFE: ; %bb.0:
-; VI-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; VI-SAFE-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
-; VI-SAFE-NEXT: v_lshrrev_b32_e32 v1, 16, v3
-; VI-SAFE-NEXT: v_lshrrev_b32_e32 v4, 16, v2
-; VI-SAFE-NEXT: v_fma_f16 v1, v4, 4.0, v1
-; VI-SAFE-NEXT: v_lshlrev_b32_e32 v1, 16, v1
-; VI-SAFE-NEXT: v_fma_f16 v2, v2, 4.0, v3
-; VI-SAFE-NEXT: v_or_b32_e32 v1, v2, v1
-; VI-SAFE-NEXT: v_xor_b32_e32 v1, 0x80008000, v1
-; VI-SAFE-NEXT: v_mov_b32_e32 v2, 0x4000
-; VI-SAFE-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v0
-; VI-SAFE-NEXT: v_cndmask_b32_e64 v0, v2, v1, s[4:5]
-; VI-SAFE-NEXT: v_cndmask_b32_sdwa v1, v2, v1, vcc dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
-; VI-SAFE-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
-; VI-SAFE-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX9-SAFE-LABEL: select_fneg_posk_src_fma_v2f16:
-; GFX9-SAFE: ; %bb.0:
-; GFX9-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-SAFE-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
-; GFX9-SAFE-NEXT: v_pk_fma_f16 v1, v2, 4.0, v3 op_sel_hi:[1,0,1]
-; GFX9-SAFE-NEXT: v_xor_b32_e32 v1, 0x80008000, v1
-; GFX9-SAFE-NEXT: v_mov_b32_e32 v2, 0x4000
-; GFX9-SAFE-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v0
-; GFX9-SAFE-NEXT: v_cndmask_b32_e64 v0, v2, v1, s[4:5]
-; GFX9-SAFE-NEXT: v_cndmask_b32_sdwa v1, v2, v1, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
-; GFX9-SAFE-NEXT: s_mov_b32 s4, 0x5040100
-; GFX9-SAFE-NEXT: v_perm_b32 v0, v1, v0, s4
-; GFX9-SAFE-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-SAFE-TRUE16-LABEL: select_fneg_posk_src_fma_v2f16:
-; GFX11-SAFE-TRUE16: ; %bb.0:
-; GFX11-SAFE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-TRUE16-NEXT: v_pk_fma_f16 v2, v2, 4.0, v3 op_sel_hi:[1,0,1]
-; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v1
-; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-SAFE-TRUE16-NEXT: v_xor_b32_e32 v0, 0x80008000, v2
-; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.l, 0x4000, v0.l, vcc_lo
-; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2)
-; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.h, 0x4000, v0.h, s0
-; GFX11-SAFE-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-SAFE-FAKE16-LABEL: select_fneg_posk_src_fma_v2f16:
-; GFX11-SAFE-FAKE16: ; %bb.0:
-; GFX11-SAFE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-FAKE16-NEXT: v_pk_fma_f16 v2, v2, 4.0, v3 op_sel_hi:[1,0,1]
-; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-SAFE-FAKE16-NEXT: v_xor_b32_e32 v2, 0x80008000, v2
-; GFX11-SAFE-FAKE16-NEXT: v_lshrrev_b32_e32 v3, 16, v2
-; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0x4000, v2, vcc_lo
-; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0x4000, v3, vcc_lo
-; GFX11-SAFE-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
-; GFX11-SAFE-FAKE16-NEXT: s_setpc_b64 s[30:31]
-;
-; VI-NSZ-LABEL: select_fneg_posk_src_fma_v2f16:
-; VI-NSZ: ; %bb.0:
-; VI-NSZ-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; VI-NSZ-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
-; VI-NSZ-NEXT: v_lshrrev_b32_e32 v1, 16, v3
-; VI-NSZ-NEXT: v_lshrrev_b32_e32 v4, 16, v2
-; VI-NSZ-NEXT: v_fma_f16 v1, v4, -4.0, -v1
-; VI-NSZ-NEXT: v_fma_f16 v2, v2, -4.0, -v3
-; VI-NSZ-NEXT: v_mov_b32_e32 v3, 0x4000
-; VI-NSZ-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v0
-; VI-NSZ-NEXT: v_cndmask_b32_e64 v0, v3, v2, s[4:5]
-; VI-NSZ-NEXT: v_cndmask_b32_sdwa v1, v3, v1, vcc dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
-; VI-NSZ-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
-; VI-NSZ-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX9-NSZ-LABEL: select_fneg_posk_src_fma_v2f16:
-; GFX9-NSZ: ; %bb.0:
-; GFX9-NSZ-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NSZ-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
-; GFX9-NSZ-NEXT: v_pk_fma_f16 v1, v2, -4.0, v3 op_sel_hi:[1,0,1] neg_lo:[0,0,1] neg_hi:[0,0,1]
-; GFX9-NSZ-NEXT: v_mov_b32_e32 v2, 0x4000
-; GFX9-NSZ-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v0
-; GFX9-NSZ-NEXT: v_cndmask_b32_e64 v0, v2, v1, s[4:5]
-; GFX9-NSZ-NEXT: v_cndmask_b32_sdwa v1, v2, v1, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
-; GFX9-NSZ-NEXT: s_mov_b32 s4, 0x5040100
-; GFX9-NSZ-NEXT: v_perm_b32 v0, v1, v0, s4
-; GFX9-NSZ-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-TRUE16-LABEL: select_fneg_posk_src_fma_v2f16:
-; GFX11-NSZ-TRUE16: ; %bb.0:
-; GFX11-NSZ-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-NSZ-TRUE16-NEXT: v_pk_fma_f16 v0, v2, -4.0, v3 op_sel_hi:[1,0,1] neg_lo:[0,0,1] neg_hi:[0,0,1]
-; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v1
-; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.l, 0x4000, v0.l, vcc_lo
-; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.h, 0x4000, v0.h, s0
-; GFX11-NSZ-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-FAKE16-LABEL: select_fneg_posk_src_fma_v2f16:
-; GFX11-NSZ-FAKE16: ; %bb.0:
-; GFX11-NSZ-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-FAKE16-NEXT: v_pk_fma_f16 v2, v2, -4.0, v3 op_sel_hi:[1,0,1] neg_lo:[0,0,1] neg_hi:[0,0,1]
-; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3)
-; GFX11-NSZ-FAKE16-NEXT: v_lshrrev_b32_e32 v3, 16, v2
-; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0x4000, v2, vcc_lo
-; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0x4000, v3, vcc_lo
-; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NSZ-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
-; GFX11-NSZ-FAKE16-NEXT: s_setpc_b64 s[30:31]
+; VI-LABEL: select_fneg_posk_src_fma_v2f16:
+; VI: ; %bb.0:
+; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
+; VI-NEXT: v_lshrrev_b32_e32 v1, 16, v3
+; VI-NEXT: v_lshrrev_b32_e32 v4, 16, v2
+; VI-NEXT: v_fma_f16 v1, v4, 4.0, v1
+; VI-NEXT: v_lshlrev_b32_e32 v1, 16, v1
+; VI-NEXT: v_fma_f16 v2, v2, 4.0, v3
+; VI-NEXT: v_or_b32_e32 v1, v2, v1
+; VI-NEXT: v_xor_b32_e32 v1, 0x80008000, v1
+; VI-NEXT: v_mov_b32_e32 v2, 0x4000
+; VI-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v0
+; VI-NEXT: v_cndmask_b32_e64 v0, v2, v1, s[4:5]
+; VI-NEXT: v_cndmask_b32_sdwa v1, v2, v1, vcc dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
+; VI-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
+; VI-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-LABEL: select_fneg_posk_src_fma_v2f16:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
+; GFX9-NEXT: v_pk_fma_f16 v1, v2, 4.0, v3 op_sel_hi:[1,0,1]
+; GFX9-NEXT: v_xor_b32_e32 v1, 0x80008000, v1
+; GFX9-NEXT: v_mov_b32_e32 v2, 0x4000
+; GFX9-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v0
+; GFX9-NEXT: v_cndmask_b32_e64 v0, v2, v1, s[4:5]
+; GFX9-NEXT: v_cndmask_b32_sdwa v1, v2, v1, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
+; GFX9-NEXT: s_mov_b32 s4, 0x5040100
+; GFX9-NEXT: v_perm_b32 v0, v1, v0, s4
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-TRUE16-LABEL: select_fneg_posk_src_fma_v2f16:
+; GFX11-TRUE16: ; %bb.0:
+; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT: v_pk_fma_f16 v2, v2, 4.0, v3 op_sel_hi:[1,0,1]
+; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v1
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-TRUE16-NEXT: v_xor_b32_e32 v0, 0x80008000, v2
+; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.l, 0x4000, v0.l, vcc_lo
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2)
+; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.h, 0x4000, v0.h, s0
+; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: select_fneg_posk_src_fma_v2f16:
+; GFX11-FAKE16: ; %bb.0:
+; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT: v_pk_fma_f16 v2, v2, 4.0, v3 op_sel_hi:[1,0,1]
+; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-FAKE16-NEXT: v_xor_b32_e32 v2, 0x80008000, v2
+; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v3, 16, v2
+; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0x4000, v2, vcc_lo
+; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0x4000, v3, vcc_lo
+; GFX11-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
+; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
%cmp = icmp eq <2 x i32> %c, zeroinitializer
%fma = call <2 x half> @llvm.fma.v2f16(<2 x half> %x, <2 x half> <half 4.0, half 4.0>, <2 x half> %z)
%fneg = fneg <2 x half> %fma
@@ -4817,118 +3633,65 @@ define <2 x half> @select_fneg_posk_src_fmad_v2f16(<2 x i32> %c, <2 x half> %x,
; CI-NEXT: v_cndmask_b32_e32 v1, 2.0, v2, vcc
; CI-NEXT: s_setpc_b64 s[30:31]
;
-; VI-SAFE-LABEL: select_fneg_posk_src_fmad_v2f16:
-; VI-SAFE: ; %bb.0:
-; VI-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; VI-SAFE-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
-; VI-SAFE-NEXT: v_lshrrev_b32_e32 v1, 16, v3
-; VI-SAFE-NEXT: v_lshrrev_b32_e32 v4, 16, v2
-; VI-SAFE-NEXT: v_fma_f16 v1, v4, 4.0, v1
-; VI-SAFE-NEXT: v_lshlrev_b32_e32 v1, 16, v1
-; VI-SAFE-NEXT: v_fma_f16 v2, v2, 4.0, v3
-; VI-SAFE-NEXT: v_or_b32_e32 v1, v2, v1
-; VI-SAFE-NEXT: v_xor_b32_e32 v1, 0x80008000, v1
-; VI-SAFE-NEXT: v_mov_b32_e32 v2, 0x4000
-; VI-SAFE-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v0
-; VI-SAFE-NEXT: v_cndmask_b32_e64 v0, v2, v1, s[4:5]
-; VI-SAFE-NEXT: v_cndmask_b32_sdwa v1, v2, v1, vcc dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
-; VI-SAFE-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
-; VI-SAFE-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX9-SAFE-LABEL: select_fneg_posk_src_fmad_v2f16:
-; GFX9-SAFE: ; %bb.0:
-; GFX9-SAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-SAFE-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
-; GFX9-SAFE-NEXT: v_pk_fma_f16 v1, v2, 4.0, v3 op_sel_hi:[1,0,1]
-; GFX9-SAFE-NEXT: v_xor_b32_e32 v1, 0x80008000, v1
-; GFX9-SAFE-NEXT: v_mov_b32_e32 v2, 0x4000
-; GFX9-SAFE-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v0
-; GFX9-SAFE-NEXT: v_cndmask_b32_e64 v0, v2, v1, s[4:5]
-; GFX9-SAFE-NEXT: v_cndmask_b32_sdwa v1, v2, v1, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
-; GFX9-SAFE-NEXT: s_mov_b32 s4, 0x5040100
-; GFX9-SAFE-NEXT: v_perm_b32 v0, v1, v0, s4
-; GFX9-SAFE-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-SAFE-TRUE16-LABEL: select_fneg_posk_src_fmad_v2f16:
-; GFX11-SAFE-TRUE16: ; %bb.0:
-; GFX11-SAFE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-TRUE16-NEXT: v_pk_fma_f16 v2, v2, 4.0, v3 op_sel_hi:[1,0,1]
-; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v1
-; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-SAFE-TRUE16-NEXT: v_xor_b32_e32 v0, 0x80008000, v2
-; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.l, 0x4000, v0.l, vcc_lo
-; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2)
-; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.h, 0x4000, v0.h, s0
-; GFX11-SAFE-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-SAFE-FAKE16-LABEL: select_fneg_posk_src_fmad_v2f16:
-; GFX11-SAFE-FAKE16: ; %bb.0:
-; GFX11-SAFE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-FAKE16-NEXT: v_pk_fma_f16 v2, v2, 4.0, v3 op_sel_hi:[1,0,1]
-; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-SAFE-FAKE16-NEXT: v_xor_b32_e32 v2, 0x80008000, v2
-; GFX11-SAFE-FAKE16-NEXT: v_lshrrev_b32_e32 v3, 16, v2
-; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0x4000, v2, vcc_lo
-; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0x4000, v3, vcc_lo
-; GFX11-SAFE-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
-; GFX11-SAFE-FAKE16-NEXT: s_setpc_b64 s[30:31]
-;
-; VI-NSZ-LABEL: select_fneg_posk_src_fmad_v2f16:
-; VI-NSZ: ; %bb.0:
-; VI-NSZ-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; VI-NSZ-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
-; VI-NSZ-NEXT: v_lshrrev_b32_e32 v1, 16, v3
-; VI-NSZ-NEXT: v_lshrrev_b32_e32 v4, 16, v2
-; VI-NSZ-NEXT: v_fma_f16 v1, v4, -4.0, -v1
-; VI-NSZ-NEXT: v_fma_f16 v2, v2, -4.0, -v3
-; VI-NSZ-NEXT: v_mov_b32_e32 v3, 0x4000
-; VI-NSZ-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v0
-; VI-NSZ-NEXT: v_cndmask_b32_e64 v0, v3, v2, s[4:5]
-; VI-NSZ-NEXT: v_cndmask_b32_sdwa v1, v3, v1, vcc dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
-; VI-NSZ-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
-; VI-NSZ-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX9-NSZ-LABEL: select_fneg_posk_src_fmad_v2f16:
-; GFX9-NSZ: ; %bb.0:
-; GFX9-NSZ-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NSZ-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
-; GFX9-NSZ-NEXT: v_pk_fma_f16 v1, v2, -4.0, v3 op_sel_hi:[1,0,1] neg_lo:[0,0,1] neg_hi:[0,0,1]
-; GFX9-NSZ-NEXT: v_mov_b32_e32 v2, 0x4000
-; GFX9-NSZ-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v0
-; GFX9-NSZ-NEXT: v_cndmask_b32_e64 v0, v2, v1, s[4:5]
-; GFX9-NSZ-NEXT: v_cndmask_b32_sdwa v1, v2, v1, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
-; GFX9-NSZ-NEXT: s_mov_b32 s4, 0x5040100
-; GFX9-NSZ-NEXT: v_perm_b32 v0, v1, v0, s4
-; GFX9-NSZ-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-TRUE16-LABEL: select_fneg_posk_src_fmad_v2f16:
-; GFX11-NSZ-TRUE16: ; %bb.0:
-; GFX11-NSZ-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-NSZ-TRUE16-NEXT: v_pk_fma_f16 v0, v2, -4.0, v3 op_sel_hi:[1,0,1] neg_lo:[0,0,1] neg_hi:[0,0,1]
-; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v1
-; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.l, 0x4000, v0.l, vcc_lo
-; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.h, 0x4000, v0.h, s0
-; GFX11-NSZ-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-FAKE16-LABEL: select_fneg_posk_src_fmad_v2f16:
-; GFX11-NSZ-FAKE16: ; %bb.0:
-; GFX11-NSZ-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-FAKE16-NEXT: v_pk_fma_f16 v2, v2, -4.0, v3 op_sel_hi:[1,0,1] neg_lo:[0,0,1] neg_hi:[0,0,1]
-; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3)
-; GFX11-NSZ-FAKE16-NEXT: v_lshrrev_b32_e32 v3, 16, v2
-; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0x4000, v2, vcc_lo
-; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0x4000, v3, vcc_lo
-; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NSZ-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
-; GFX11-NSZ-FAKE16-NEXT: s_setpc_b64 s[30:31]
+; VI-LABEL: select_fneg_posk_src_fmad_v2f16:
+; VI: ; %bb.0:
+; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
+; VI-NEXT: v_lshrrev_b32_e32 v1, 16, v3
+; VI-NEXT: v_lshrrev_b32_e32 v4, 16, v2
+; VI-NEXT: v_fma_f16 v1, v4, 4.0, v1
+; VI-NEXT: v_lshlrev_b32_e32 v1, 16, v1
+; VI-NEXT: v_fma_f16 v2, v2, 4.0, v3
+; VI-NEXT: v_or_b32_e32 v1, v2, v1
+; VI-NEXT: v_xor_b32_e32 v1, 0x80008000, v1
+; VI-NEXT: v_mov_b32_e32 v2, 0x4000
+; VI-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v0
+; VI-NEXT: v_cndmask_b32_e64 v0, v2, v1, s[4:5]
+; VI-NEXT: v_cndmask_b32_sdwa v1, v2, v1, vcc dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
+; VI-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
+; VI-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-LABEL: select_fneg_posk_src_fmad_v2f16:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 0, v1
+; GFX9-NEXT: v_pk_fma_f16 v1, v2, 4.0, v3 op_sel_hi:[1,0,1]
+; GFX9-NEXT: v_xor_b32_e32 v1, 0x80008000, v1
+; GFX9-NEXT: v_mov_b32_e32 v2, 0x4000
+; GFX9-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v0
+; GFX9-NEXT: v_cndmask_b32_e64 v0, v2, v1, s[4:5]
+; GFX9-NEXT: v_cndmask_b32_sdwa v1, v2, v1, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
+; GFX9-NEXT: s_mov_b32 s4, 0x5040100
+; GFX9-NEXT: v_perm_b32 v0, v1, v0, s4
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-TRUE16-LABEL: select_fneg_posk_src_fmad_v2f16:
+; GFX11-TRUE16: ; %bb.0:
+; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT: v_pk_fma_f16 v2, v2, 4.0, v3 op_sel_hi:[1,0,1]
+; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v1
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-TRUE16-NEXT: v_xor_b32_e32 v0, 0x80008000, v2
+; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.l, 0x4000, v0.l, vcc_lo
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2)
+; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.h, 0x4000, v0.h, s0
+; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: select_fneg_posk_src_fmad_v2f16:
+; GFX11-FAKE16: ; %bb.0:
+; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT: v_pk_fma_f16 v2, v2, 4.0, v3 op_sel_hi:[1,0,1]
+; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-FAKE16-NEXT: v_xor_b32_e32 v2, 0x80008000, v2
+; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v3, 16, v2
+; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0x4000, v2, vcc_lo
+; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0x4000, v3, vcc_lo
+; GFX11-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
+; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
%cmp = icmp eq <2 x i32> %c, zeroinitializer
%fmad = call <2 x half> @llvm.fmuladd.v2f16(<2 x half> %x, <2 x half> <half 4.0, half 4.0>, <2 x half> %z)
%fneg = fneg <2 x half> %fmad
@@ -4986,55 +3749,30 @@ define <2 x half> @select_fneg_posk_src_fmad_v2f16_nsz(<2 x i32> %c, <2 x half>
; GFX9-NEXT: v_perm_b32 v0, v1, v0, s4
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
-; GFX11-SAFE-TRUE16-LABEL: select_fneg_posk_src_fmad_v2f16_nsz:
-; GFX11-SAFE-TRUE16: ; %bb.0:
-; GFX11-SAFE-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-SAFE-TRUE16-NEXT: v_pk_fma_f16 v0, v2, -4.0, v3 op_sel_hi:[1,0,1] neg_lo:[0,0,1] neg_hi:[0,0,1]
-; GFX11-SAFE-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v1
-; GFX11-SAFE-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.l, 0x4000, v0.l, vcc_lo
-; GFX11-SAFE-TRUE16-NEXT: v_cndmask_b16 v0.h, 0x4000, v0.h, s0
-; GFX11-SAFE-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-SAFE-FAKE16-LABEL: select_fneg_posk_src_fmad_v2f16_nsz:
-; GFX11-SAFE-FAKE16: ; %bb.0:
-; GFX11-SAFE-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SAFE-FAKE16-NEXT: v_pk_fma_f16 v2, v2, -4.0, v3 op_sel_hi:[1,0,1] neg_lo:[0,0,1] neg_hi:[0,0,1]
-; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3)
-; GFX11-SAFE-FAKE16-NEXT: v_lshrrev_b32_e32 v3, 16, v2
-; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0x4000, v2, vcc_lo
-; GFX11-SAFE-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-SAFE-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0x4000, v3, vcc_lo
-; GFX11-SAFE-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-SAFE-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
-; GFX11-SAFE-FAKE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-TRUE16-LABEL: select_fneg_posk_src_fmad_v2f16_nsz:
-; GFX11-NSZ-TRUE16: ; %bb.0:
-; GFX11-NSZ-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-NSZ-TRUE16-NEXT: v_pk_fma_f16 v0, v2, -4.0, v3 op_sel_hi:[1,0,1] neg_lo:[0,0,1] neg_hi:[0,0,1]
-; GFX11-NSZ-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v1
-; GFX11-NSZ-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.l, 0x4000, v0.l, vcc_lo
-; GFX11-NSZ-TRUE16-NEXT: v_cndmask_b16 v0.h, 0x4000, v0.h, s0
-; GFX11-NSZ-TRUE16-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX11-NSZ-FAKE16-LABEL: select_fneg_posk_src_fmad_v2f16_nsz:
-; GFX11-NSZ-FAKE16: ; %bb.0:
-; GFX11-NSZ-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-NSZ-FAKE16-NEXT: v_pk_fma_f16 v2, v2, -4.0, v3 op_sel_hi:[1,0,1] neg_lo:[0,0,1] neg_hi:[0,0,1]
-; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3)
-; GFX11-NSZ-FAKE16-NEXT: v_lshrrev_b32_e32 v3, 16, v2
-; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0x4000, v2, vcc_lo
-; GFX11-NSZ-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
-; GFX11-NSZ-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0x4000, v3, vcc_lo
-; GFX11-NSZ-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-NSZ-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
-; GFX11-NSZ-FAKE16-NEXT: s_setpc_b64 s[30:31]
+; GFX11-TRUE16-LABEL: select_fneg_posk_src_fmad_v2f16_nsz:
+; GFX11-TRUE16: ; %bb.0:
+; GFX11-TRUE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-TRUE16-NEXT: v_pk_fma_f16 v0, v2, -4.0, v3 op_sel_hi:[1,0,1] neg_lo:[0,0,1] neg_hi:[0,0,1]
+; GFX11-TRUE16-NEXT: v_cmp_eq_u32_e64 s0, 0, v1
+; GFX11-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.l, 0x4000, v0.l, vcc_lo
+; GFX11-TRUE16-NEXT: v_cndmask_b16 v0.h, 0x4000, v0.h, s0
+; GFX11-TRUE16-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-FAKE16-LABEL: select_fneg_posk_src_fmad_v2f16_nsz:
+; GFX11-FAKE16: ; %bb.0:
+; GFX11-FAKE16-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-FAKE16-NEXT: v_pk_fma_f16 v2, v2, -4.0, v3 op_sel_hi:[1,0,1] neg_lo:[0,0,1] neg_hi:[0,0,1]
+; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_2) | instid1(VALU_DEP_3)
+; GFX11-FAKE16-NEXT: v_lshrrev_b32_e32 v3, 16, v2
+; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v0, 0x4000, v2, vcc_lo
+; GFX11-FAKE16-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v1
+; GFX11-FAKE16-NEXT: v_cndmask_b32_e32 v1, 0x4000, v3, vcc_lo
+; GFX11-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-FAKE16-NEXT: v_perm_b32 v0, v1, v0, 0x5040100
+; GFX11-FAKE16-NEXT: s_setpc_b64 s[30:31]
%cmp = icmp eq <2 x i32> %c, zeroinitializer
%fmad = call nsz <2 x half> @llvm.fmuladd.v2f16(<2 x half> %x, <2 x half> <half 4.0, half 4.0>, <2 x half> %z)
%fneg = fneg <2 x half> %fmad
@@ -5049,5 +3787,3 @@ declare <2 x half> @llvm.fmuladd.v2f16(<2 x half>, <2 x half>, <2 x half>) #0
attributes #0 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
; GFX11: {{.*}}
-; GFX11-NSZ: {{.*}}
-; GFX11-SAFE: {{.*}}
diff --git a/llvm/test/CodeGen/AMDGPU/simple-indirect-call-2.ll b/llvm/test/CodeGen/AMDGPU/simple-indirect-call-2.ll
index f1cadea..0868148 100644
--- a/llvm/test/CodeGen/AMDGPU/simple-indirect-call-2.ll
+++ b/llvm/test/CodeGen/AMDGPU/simple-indirect-call-2.ll
@@ -63,7 +63,7 @@ define amdgpu_kernel void @foo(ptr noundef %fp) {
; OW-NEXT: ret void
;
; CW-LABEL: define {{[^@]+}}@foo
-; CW-SAME: (ptr noundef [[FP:%.*]]) #[[ATTR1:[0-9]+]] {
+; CW-SAME: (ptr noundef [[FP:%.*]]) #[[ATTR0]] {
; CW-NEXT: entry:
; CW-NEXT: [[FP_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
; CW-NEXT: store ptr [[FP]], ptr addrspace(5) [[FP_ADDR]], align 8
@@ -84,7 +84,7 @@ define amdgpu_kernel void @foo(ptr noundef %fp) {
; CW-NEXT: ret void
;
; NO-LABEL: define {{[^@]+}}@foo
-; NO-SAME: (ptr noundef [[FP:%.*]]) #[[ATTR1:[0-9]+]] {
+; NO-SAME: (ptr noundef [[FP:%.*]]) #[[ATTR0]] {
; NO-NEXT: entry:
; NO-NEXT: [[FP_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
; NO-NEXT: store ptr [[FP]], ptr addrspace(5) [[FP_ADDR]], align 8
@@ -101,14 +101,12 @@ entry:
}
;.
-; NO: attributes #[[ATTR0]] = { "amdgpu-agpr-alloc"="0" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
-; NO: attributes #[[ATTR1]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
+; NO: attributes #[[ATTR0]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
;.
-; OW: attributes #[[ATTR0]] = { "amdgpu-agpr-alloc"="0" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
+; OW: attributes #[[ATTR0]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
; OW: attributes #[[ATTR1]] = { "uniform-work-group-size"="false" }
;.
-; CW: attributes #[[ATTR0]] = { "amdgpu-agpr-alloc"="0" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
-; CW: attributes #[[ATTR1]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
+; CW: attributes #[[ATTR0]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
;.
; NO: [[META0]] = !{ptr @bar1, ptr @bar2}
;.
diff --git a/llvm/test/CodeGen/AMDGPU/simple-indirect-call.ll b/llvm/test/CodeGen/AMDGPU/simple-indirect-call.ll
index 775d2f9..8fcaf5e 100644
--- a/llvm/test/CodeGen/AMDGPU/simple-indirect-call.ll
+++ b/llvm/test/CodeGen/AMDGPU/simple-indirect-call.ll
@@ -58,7 +58,7 @@ define amdgpu_kernel void @test_simple_indirect_call() {
;.
-; ATTRIBUTOR_GCN: attributes #[[ATTR0]] = { "amdgpu-agpr-alloc"="0" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
+; ATTRIBUTOR_GCN: attributes #[[ATTR0]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
; ATTRIBUTOR_GCN: attributes #[[ATTR1]] = { "uniform-work-group-size"="false" }
;.
; ATTRIBUTOR_GCN: [[META0]] = !{i32 1, i32 5, i32 6, i32 10}
diff --git a/llvm/test/CodeGen/AMDGPU/uniform-work-group-attribute-missing.ll b/llvm/test/CodeGen/AMDGPU/uniform-work-group-attribute-missing.ll
index a1557418..8dfd3b7 100644
--- a/llvm/test/CodeGen/AMDGPU/uniform-work-group-attribute-missing.ll
+++ b/llvm/test/CodeGen/AMDGPU/uniform-work-group-attribute-missing.ll
@@ -31,5 +31,5 @@ define amdgpu_kernel void @kernel1() #1 {
attributes #0 = { "uniform-work-group-size"="true" }
;.
-; CHECK: attributes #[[ATTR0]] = { "amdgpu-agpr-alloc"="0" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR0]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
;.
diff --git a/llvm/test/CodeGen/AMDGPU/uniform-work-group-multistep.ll b/llvm/test/CodeGen/AMDGPU/uniform-work-group-multistep.ll
index fb225a9..fa01ee9 100644
--- a/llvm/test/CodeGen/AMDGPU/uniform-work-group-multistep.ll
+++ b/llvm/test/CodeGen/AMDGPU/uniform-work-group-multistep.ll
@@ -98,7 +98,7 @@ define amdgpu_kernel void @kernel2() #0 {
attributes #0 = { "uniform-work-group-size"="true" }
;.
; CHECK: attributes #[[ATTR0]] = { "uniform-work-group-size"="false" }
-; CHECK: attributes #[[ATTR1]] = { "amdgpu-agpr-alloc"="0" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR1]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
; CHECK: attributes #[[ATTR2]] = { "uniform-work-group-size"="true" }
-; CHECK: attributes #[[ATTR3]] = { "amdgpu-agpr-alloc"="0" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="true" }
+; CHECK: attributes #[[ATTR3]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="true" }
;.
diff --git a/llvm/test/CodeGen/AMDGPU/uniform-work-group-nested-function-calls.ll b/llvm/test/CodeGen/AMDGPU/uniform-work-group-nested-function-calls.ll
index cfede0c..09001ca 100644
--- a/llvm/test/CodeGen/AMDGPU/uniform-work-group-nested-function-calls.ll
+++ b/llvm/test/CodeGen/AMDGPU/uniform-work-group-nested-function-calls.ll
@@ -41,6 +41,6 @@ define amdgpu_kernel void @kernel3() #2 {
attributes #2 = { "uniform-work-group-size"="true" }
;.
-; CHECK: attributes #[[ATTR0]] = { "amdgpu-agpr-alloc"="0" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
-; CHECK: attributes #[[ATTR1]] = { "amdgpu-agpr-alloc"="0" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="true" }
+; CHECK: attributes #[[ATTR0]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR1]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="true" }
;.
diff --git a/llvm/test/CodeGen/AMDGPU/uniform-work-group-prevent-attribute-propagation.ll b/llvm/test/CodeGen/AMDGPU/uniform-work-group-prevent-attribute-propagation.ll
index 854b724..4dede21 100644
--- a/llvm/test/CodeGen/AMDGPU/uniform-work-group-prevent-attribute-propagation.ll
+++ b/llvm/test/CodeGen/AMDGPU/uniform-work-group-prevent-attribute-propagation.ll
@@ -41,6 +41,6 @@ define amdgpu_kernel void @kernel2() #2 {
attributes #1 = { "uniform-work-group-size"="true" }
;.
-; CHECK: attributes #[[ATTR0]] = { "amdgpu-agpr-alloc"="0" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
-; CHECK: attributes #[[ATTR1]] = { "amdgpu-agpr-alloc"="0" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="true" }
+; CHECK: attributes #[[ATTR0]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR1]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="true" }
;.
diff --git a/llvm/test/CodeGen/AMDGPU/uniform-work-group-propagate-attribute.ll b/llvm/test/CodeGen/AMDGPU/uniform-work-group-propagate-attribute.ll
index c4e0a60..08e1556 100644
--- a/llvm/test/CodeGen/AMDGPU/uniform-work-group-propagate-attribute.ll
+++ b/llvm/test/CodeGen/AMDGPU/uniform-work-group-propagate-attribute.ll
@@ -52,8 +52,8 @@ attributes #0 = { nounwind }
attributes #1 = { "uniform-work-group-size"="false" }
attributes #2 = { "uniform-work-group-size"="true" }
;.
-; CHECK: attributes #[[ATTR0]] = { nounwind "amdgpu-agpr-alloc"="0" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
-; CHECK: attributes #[[ATTR1]] = { "amdgpu-agpr-alloc"="0" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR0]] = { nounwind "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR1]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
; CHECK: attributes #[[ATTR2]] = { nounwind "uniform-work-group-size"="false" }
; CHECK: attributes #[[ATTR3]] = { "uniform-work-group-size"="true" }
;.
diff --git a/llvm/test/CodeGen/AMDGPU/uniform-work-group-recursion-test.ll b/llvm/test/CodeGen/AMDGPU/uniform-work-group-recursion-test.ll
index 05af74d..9090d605 100644
--- a/llvm/test/CodeGen/AMDGPU/uniform-work-group-recursion-test.ll
+++ b/llvm/test/CodeGen/AMDGPU/uniform-work-group-recursion-test.ll
@@ -101,7 +101,7 @@ define amdgpu_kernel void @kernel(ptr addrspace(1) %m) #1 {
attributes #0 = { nounwind readnone }
attributes #1 = { "uniform-work-group-size"="true" }
;.
-; CHECK: attributes #[[ATTR0]] = { nounwind memory(none) "amdgpu-agpr-alloc"="0" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
-; CHECK: attributes #[[ATTR1]] = { nounwind memory(none) "amdgpu-agpr-alloc"="0" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="true" }
-; CHECK: attributes #[[ATTR2]] = { "amdgpu-agpr-alloc"="0" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="true" }
+; CHECK: attributes #[[ATTR0]] = { nounwind memory(none) "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR1]] = { nounwind memory(none) "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="true" }
+; CHECK: attributes #[[ATTR2]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="true" }
;.
diff --git a/llvm/test/CodeGen/AMDGPU/uniform-work-group-test.ll b/llvm/test/CodeGen/AMDGPU/uniform-work-group-test.ll
index cdbca7f..5e109f4 100644
--- a/llvm/test/CodeGen/AMDGPU/uniform-work-group-test.ll
+++ b/llvm/test/CodeGen/AMDGPU/uniform-work-group-test.ll
@@ -61,5 +61,5 @@ define amdgpu_kernel void @kernel3() #0 {
attributes #0 = { "uniform-work-group-size"="false" }
;.
-; CHECK: attributes #[[ATTR0]] = { "amdgpu-agpr-alloc"="0" "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
+; CHECK: attributes #[[ATTR0]] = { "amdgpu-no-cluster-id-x" "amdgpu-no-cluster-id-y" "amdgpu-no-cluster-id-z" "amdgpu-no-completion-action" "amdgpu-no-default-queue" "amdgpu-no-dispatch-id" "amdgpu-no-dispatch-ptr" "amdgpu-no-flat-scratch-init" "amdgpu-no-heap-ptr" "amdgpu-no-hostcall-ptr" "amdgpu-no-implicitarg-ptr" "amdgpu-no-lds-kernel-id" "amdgpu-no-multigrid-sync-arg" "amdgpu-no-queue-ptr" "amdgpu-no-workgroup-id-x" "amdgpu-no-workgroup-id-y" "amdgpu-no-workgroup-id-z" "amdgpu-no-workitem-id-x" "amdgpu-no-workitem-id-y" "amdgpu-no-workitem-id-z" "uniform-work-group-size"="false" }
;.
diff --git a/llvm/test/CodeGen/ARM/GlobalISel/arm-instruction-select-combos.mir b/llvm/test/CodeGen/ARM/GlobalISel/arm-instruction-select-combos.mir
index 77eeb34..4dd8af0 100644
--- a/llvm/test/CodeGen/ARM/GlobalISel/arm-instruction-select-combos.mir
+++ b/llvm/test/CodeGen/ARM/GlobalISel/arm-instruction-select-combos.mir
@@ -447,7 +447,7 @@ body: |
; CHECK-LABEL: name: test_vnmuls
; CHECK: [[COPY:%[0-9]+]]:spr = COPY $s0
; CHECK: [[COPY1:%[0-9]+]]:spr = COPY $s1
- ; CHECK: [[VNMULS:%[0-9]+]]:spr = VNMULS [[COPY]], [[COPY1]], 14 /* CC::al */, $noreg
+ ; CHECK: [[VNMULS:%[0-9]+]]:spr = nofpexcept VNMULS [[COPY]], [[COPY1]], 14 /* CC::al */, $noreg, implicit $fpscr
; CHECK: $s0 = COPY [[VNMULS]]
; CHECK: BX_RET 14 /* CC::al */, $noreg, implicit $s0
%0(s32) = COPY $s0
@@ -477,7 +477,7 @@ body: |
; CHECK-LABEL: name: test_vnmuls_reassociate
; CHECK: [[COPY:%[0-9]+]]:spr = COPY $s0
; CHECK: [[COPY1:%[0-9]+]]:spr = COPY $s1
- ; CHECK: [[VNMULS:%[0-9]+]]:spr = VNMULS [[COPY]], [[COPY1]], 14 /* CC::al */, $noreg
+ ; CHECK: [[VNMULS:%[0-9]+]]:spr = nofpexcept VNMULS [[COPY]], [[COPY1]], 14 /* CC::al */, $noreg, implicit $fpscr
; CHECK: $s0 = COPY [[VNMULS]]
; CHECK: BX_RET 14 /* CC::al */, $noreg, implicit $s0
%0(s32) = COPY $s0
@@ -507,7 +507,7 @@ body: |
; CHECK-LABEL: name: test_vnmuld
; CHECK: [[COPY:%[0-9]+]]:dpr = COPY $d0
; CHECK: [[COPY1:%[0-9]+]]:dpr = COPY $d1
- ; CHECK: [[VNMULD:%[0-9]+]]:dpr = VNMULD [[COPY]], [[COPY1]], 14 /* CC::al */, $noreg
+ ; CHECK: [[VNMULD:%[0-9]+]]:dpr = nofpexcept VNMULD [[COPY]], [[COPY1]], 14 /* CC::al */, $noreg, implicit $fpscr
; CHECK: $d0 = COPY [[VNMULD]]
; CHECK: BX_RET 14 /* CC::al */, $noreg, implicit $d0
%0(s64) = COPY $d0
@@ -539,7 +539,7 @@ body: |
; CHECK: [[COPY:%[0-9]+]]:spr = COPY $s0
; CHECK: [[COPY1:%[0-9]+]]:spr = COPY $s1
; CHECK: [[COPY2:%[0-9]+]]:spr = COPY $s2
- ; CHECK: [[VFNMAS:%[0-9]+]]:spr = VFNMAS [[COPY2]], [[COPY]], [[COPY1]], 14 /* CC::al */, $noreg
+ ; CHECK: [[VFNMAS:%[0-9]+]]:spr = nofpexcept VFNMAS [[COPY2]], [[COPY]], [[COPY1]], 14 /* CC::al */, $noreg, implicit $fpscr
; CHECK: $s0 = COPY [[VFNMAS]]
; CHECK: BX_RET 14 /* CC::al */, $noreg, implicit $s0
%0(s32) = COPY $s0
@@ -573,7 +573,7 @@ body: |
; CHECK: [[COPY:%[0-9]+]]:dpr = COPY $d0
; CHECK: [[COPY1:%[0-9]+]]:dpr = COPY $d1
; CHECK: [[COPY2:%[0-9]+]]:dpr = COPY $d2
- ; CHECK: [[VFNMAD:%[0-9]+]]:dpr = VFNMAD [[COPY2]], [[COPY]], [[COPY1]], 14 /* CC::al */, $noreg
+ ; CHECK: [[VFNMAD:%[0-9]+]]:dpr = nofpexcept VFNMAD [[COPY2]], [[COPY]], [[COPY1]], 14 /* CC::al */, $noreg, implicit $fpscr
; CHECK: $d0 = COPY [[VFNMAD]]
; CHECK: BX_RET 14 /* CC::al */, $noreg, implicit $d0
%0(s64) = COPY $d0
@@ -607,7 +607,7 @@ body: |
; CHECK: [[COPY:%[0-9]+]]:spr = COPY $s0
; CHECK: [[COPY1:%[0-9]+]]:spr = COPY $s1
; CHECK: [[COPY2:%[0-9]+]]:spr = COPY $s2
- ; CHECK: [[VFMSS:%[0-9]+]]:spr = VFMSS [[COPY2]], [[COPY]], [[COPY1]], 14 /* CC::al */, $noreg
+ ; CHECK: [[VFMSS:%[0-9]+]]:spr = nofpexcept VFMSS [[COPY2]], [[COPY]], [[COPY1]], 14 /* CC::al */, $noreg, implicit $fpscr
; CHECK: $s0 = COPY [[VFMSS]]
; CHECK: BX_RET 14 /* CC::al */, $noreg, implicit $s0
%0(s32) = COPY $s0
@@ -640,7 +640,7 @@ body: |
; CHECK: [[COPY:%[0-9]+]]:dpr = COPY $d0
; CHECK: [[COPY1:%[0-9]+]]:dpr = COPY $d1
; CHECK: [[COPY2:%[0-9]+]]:dpr = COPY $d2
- ; CHECK: [[VFMSD:%[0-9]+]]:dpr = VFMSD [[COPY2]], [[COPY1]], [[COPY]], 14 /* CC::al */, $noreg
+ ; CHECK: [[VFMSD:%[0-9]+]]:dpr = nofpexcept VFMSD [[COPY2]], [[COPY1]], [[COPY]], 14 /* CC::al */, $noreg, implicit $fpscr
; CHECK: $d0 = COPY [[VFMSD]]
; CHECK: BX_RET 14 /* CC::al */, $noreg, implicit $d0
%0(s64) = COPY $d0
@@ -673,7 +673,7 @@ body: |
; CHECK: [[COPY:%[0-9]+]]:spr = COPY $s0
; CHECK: [[COPY1:%[0-9]+]]:spr = COPY $s1
; CHECK: [[COPY2:%[0-9]+]]:spr = COPY $s2
- ; CHECK: [[VFNMSS:%[0-9]+]]:spr = VFNMSS [[COPY2]], [[COPY]], [[COPY1]], 14 /* CC::al */, $noreg
+ ; CHECK: [[VFNMSS:%[0-9]+]]:spr = nofpexcept VFNMSS [[COPY2]], [[COPY]], [[COPY1]], 14 /* CC::al */, $noreg, implicit $fpscr
; CHECK: $s0 = COPY [[VFNMSS]]
; CHECK: BX_RET 14 /* CC::al */, $noreg, implicit $s0
%0(s32) = COPY $s0
diff --git a/llvm/test/CodeGen/ARM/GlobalISel/arm-select-copy_to_regclass-of-fptosi.mir b/llvm/test/CodeGen/ARM/GlobalISel/arm-select-copy_to_regclass-of-fptosi.mir
index 45a846b..4cded13 100644
--- a/llvm/test/CodeGen/ARM/GlobalISel/arm-select-copy_to_regclass-of-fptosi.mir
+++ b/llvm/test/CodeGen/ARM/GlobalISel/arm-select-copy_to_regclass-of-fptosi.mir
@@ -19,7 +19,7 @@ body: |
bb.1:
; CHECK-LABEL: name: test_fptosi
; CHECK: [[COPY:%[0-9]+]]:spr = COPY $s0
- ; CHECK: [[VTOSIZS:%[0-9]+]]:spr = VTOSIZS [[COPY]], 14 /* CC::al */, $noreg
+ ; CHECK: [[VTOSIZS:%[0-9]+]]:spr = nofpexcept VTOSIZS [[COPY]], 14 /* CC::al */, $noreg
; CHECK: [[COPY1:%[0-9]+]]:gpr = COPY [[VTOSIZS]]
; CHECK: $r0 = COPY [[COPY1]]
; CHECK: MOVPCLR 14 /* CC::al */, $noreg, implicit $r0
diff --git a/llvm/test/CodeGen/ARM/GlobalISel/select-fp.mir b/llvm/test/CodeGen/ARM/GlobalISel/select-fp.mir
index ec834f1..4517fe6 100644
--- a/llvm/test/CodeGen/ARM/GlobalISel/select-fp.mir
+++ b/llvm/test/CodeGen/ARM/GlobalISel/select-fp.mir
@@ -1,3 +1,4 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
# RUN: llc -O0 -mtriple arm-- -mattr=+vfp4,-neonfp -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s
# RUN: llc -O0 -mtriple thumb-- -mattr=+v6t2,+vfp4,-neonfp -run-pass=instruction-select -verify-machineinstrs %s -o - | FileCheck %s
--- |
@@ -76,11 +77,9 @@ body: |
...
---
name: test_fadd_s32
-# CHECK-LABEL: name: test_fadd_s32
legalized: true
regBankSelected: true
selected: false
-# CHECK: selected: true
registers:
- { id: 0, class: fprb }
- { id: 1, class: fprb }
@@ -89,28 +88,29 @@ body: |
bb.0:
liveins: $s0, $s1
+ ; CHECK-LABEL: name: test_fadd_s32
+ ; CHECK: liveins: $s0, $s1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:spr = COPY $s0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:spr = COPY $s1
+ ; CHECK-NEXT: [[VADDS:%[0-9]+]]:spr = nofpexcept VADDS [[COPY]], [[COPY1]], 14 /* CC::al */, $noreg, implicit $fpscr_rm
+ ; CHECK-NEXT: $s0 = COPY [[VADDS]]
+ ; CHECK-NEXT: BX_RET 14 /* CC::al */, $noreg, implicit $s0
%0(s32) = COPY $s0
- ; CHECK: [[VREGX:%[0-9]+]]:spr = COPY $s0
%1(s32) = COPY $s1
- ; CHECK: [[VREGY:%[0-9]+]]:spr = COPY $s1
%2(s32) = G_FADD %0, %1
- ; CHECK: [[VREGSUM:%[0-9]+]]:spr = VADDS [[VREGX]], [[VREGY]], 14 /* CC::al */, $noreg
$s0 = COPY %2(s32)
- ; CHECK: $s0 = COPY [[VREGSUM]]
BX_RET 14, $noreg, implicit $s0
- ; CHECK: BX_RET 14 /* CC::al */, $noreg, implicit $s0
...
---
name: test_fadd_s64
-# CHECK-LABEL: name: test_fadd_s64
legalized: true
regBankSelected: true
selected: false
-# CHECK: selected: true
registers:
- { id: 0, class: fprb }
- { id: 1, class: fprb }
@@ -119,28 +119,29 @@ body: |
bb.0:
liveins: $d0, $d1
+ ; CHECK-LABEL: name: test_fadd_s64
+ ; CHECK: liveins: $d0, $d1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:dpr = COPY $d0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:dpr = COPY $d1
+ ; CHECK-NEXT: [[VADDD:%[0-9]+]]:dpr = nofpexcept VADDD [[COPY]], [[COPY1]], 14 /* CC::al */, $noreg, implicit $fpscr_rm
+ ; CHECK-NEXT: $d0 = COPY [[VADDD]]
+ ; CHECK-NEXT: BX_RET 14 /* CC::al */, $noreg, implicit $d0
%0(s64) = COPY $d0
- ; CHECK: [[VREGX:%[0-9]+]]:dpr = COPY $d0
%1(s64) = COPY $d1
- ; CHECK: [[VREGY:%[0-9]+]]:dpr = COPY $d1
%2(s64) = G_FADD %0, %1
- ; CHECK: [[VREGSUM:%[0-9]+]]:dpr = VADDD [[VREGX]], [[VREGY]], 14 /* CC::al */, $noreg
$d0 = COPY %2(s64)
- ; CHECK: $d0 = COPY [[VREGSUM]]
BX_RET 14, $noreg, implicit $d0
- ; CHECK: BX_RET 14 /* CC::al */, $noreg, implicit $d0
...
---
name: test_fsub_s32
-# CHECK-LABEL: name: test_fsub_s32
legalized: true
regBankSelected: true
selected: false
-# CHECK: selected: true
registers:
- { id: 0, class: fprb }
- { id: 1, class: fprb }
@@ -149,28 +150,29 @@ body: |
bb.0:
liveins: $s0, $s1
+ ; CHECK-LABEL: name: test_fsub_s32
+ ; CHECK: liveins: $s0, $s1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:spr = COPY $s0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:spr = COPY $s1
+ ; CHECK-NEXT: [[VSUBS:%[0-9]+]]:spr = nofpexcept VSUBS [[COPY]], [[COPY1]], 14 /* CC::al */, $noreg, implicit $fpscr_rm
+ ; CHECK-NEXT: $s0 = COPY [[VSUBS]]
+ ; CHECK-NEXT: BX_RET 14 /* CC::al */, $noreg, implicit $s0
%0(s32) = COPY $s0
- ; CHECK: [[VREGX:%[0-9]+]]:spr = COPY $s0
%1(s32) = COPY $s1
- ; CHECK: [[VREGY:%[0-9]+]]:spr = COPY $s1
%2(s32) = G_FSUB %0, %1
- ; CHECK: [[VREGSUM:%[0-9]+]]:spr = VSUBS [[VREGX]], [[VREGY]], 14 /* CC::al */, $noreg
$s0 = COPY %2(s32)
- ; CHECK: $s0 = COPY [[VREGSUM]]
BX_RET 14, $noreg, implicit $s0
- ; CHECK: BX_RET 14 /* CC::al */, $noreg, implicit $s0
...
---
name: test_fsub_s64
-# CHECK-LABEL: name: test_fsub_s64
legalized: true
regBankSelected: true
selected: false
-# CHECK: selected: true
registers:
- { id: 0, class: fprb }
- { id: 1, class: fprb }
@@ -179,28 +181,29 @@ body: |
bb.0:
liveins: $d0, $d1
+ ; CHECK-LABEL: name: test_fsub_s64
+ ; CHECK: liveins: $d0, $d1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:dpr = COPY $d0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:dpr = COPY $d1
+ ; CHECK-NEXT: [[VSUBD:%[0-9]+]]:dpr = nofpexcept VSUBD [[COPY]], [[COPY1]], 14 /* CC::al */, $noreg, implicit $fpscr_rm
+ ; CHECK-NEXT: $d0 = COPY [[VSUBD]]
+ ; CHECK-NEXT: BX_RET 14 /* CC::al */, $noreg, implicit $d0
%0(s64) = COPY $d0
- ; CHECK: [[VREGX:%[0-9]+]]:dpr = COPY $d0
%1(s64) = COPY $d1
- ; CHECK: [[VREGY:%[0-9]+]]:dpr = COPY $d1
%2(s64) = G_FSUB %0, %1
- ; CHECK: [[VREGSUM:%[0-9]+]]:dpr = VSUBD [[VREGX]], [[VREGY]], 14 /* CC::al */, $noreg
$d0 = COPY %2(s64)
- ; CHECK: $d0 = COPY [[VREGSUM]]
BX_RET 14, $noreg, implicit $d0
- ; CHECK: BX_RET 14 /* CC::al */, $noreg, implicit $d0
...
---
name: test_fmul_s32
-# CHECK-LABEL: name: test_fmul_s32
legalized: true
regBankSelected: true
selected: false
-# CHECK: selected: true
registers:
- { id: 0, class: fprb }
- { id: 1, class: fprb }
@@ -209,28 +212,29 @@ body: |
bb.0:
liveins: $s0, $s1
+ ; CHECK-LABEL: name: test_fmul_s32
+ ; CHECK: liveins: $s0, $s1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:spr = COPY $s0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:spr = COPY $s1
+ ; CHECK-NEXT: [[VMULS:%[0-9]+]]:spr = nofpexcept VMULS [[COPY]], [[COPY1]], 14 /* CC::al */, $noreg, implicit $fpscr_rm
+ ; CHECK-NEXT: $s0 = COPY [[VMULS]]
+ ; CHECK-NEXT: BX_RET 14 /* CC::al */, $noreg, implicit $s0
%0(s32) = COPY $s0
- ; CHECK: [[VREGX:%[0-9]+]]:spr = COPY $s0
%1(s32) = COPY $s1
- ; CHECK: [[VREGY:%[0-9]+]]:spr = COPY $s1
%2(s32) = G_FMUL %0, %1
- ; CHECK: [[VREGSUM:%[0-9]+]]:spr = VMULS [[VREGX]], [[VREGY]], 14 /* CC::al */, $noreg
$s0 = COPY %2(s32)
- ; CHECK: $s0 = COPY [[VREGSUM]]
BX_RET 14, $noreg, implicit $s0
- ; CHECK: BX_RET 14 /* CC::al */, $noreg, implicit $s0
...
---
name: test_fmul_s64
-# CHECK-LABEL: name: test_fmul_s64
legalized: true
regBankSelected: true
selected: false
-# CHECK: selected: true
registers:
- { id: 0, class: fprb }
- { id: 1, class: fprb }
@@ -239,28 +243,29 @@ body: |
bb.0:
liveins: $d0, $d1
+ ; CHECK-LABEL: name: test_fmul_s64
+ ; CHECK: liveins: $d0, $d1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:dpr = COPY $d0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:dpr = COPY $d1
+ ; CHECK-NEXT: [[VMULD:%[0-9]+]]:dpr = nofpexcept VMULD [[COPY]], [[COPY1]], 14 /* CC::al */, $noreg, implicit $fpscr_rm
+ ; CHECK-NEXT: $d0 = COPY [[VMULD]]
+ ; CHECK-NEXT: BX_RET 14 /* CC::al */, $noreg, implicit $d0
%0(s64) = COPY $d0
- ; CHECK: [[VREGX:%[0-9]+]]:dpr = COPY $d0
%1(s64) = COPY $d1
- ; CHECK: [[VREGY:%[0-9]+]]:dpr = COPY $d1
%2(s64) = G_FMUL %0, %1
- ; CHECK: [[VREGSUM:%[0-9]+]]:dpr = VMULD [[VREGX]], [[VREGY]], 14 /* CC::al */, $noreg
$d0 = COPY %2(s64)
- ; CHECK: $d0 = COPY [[VREGSUM]]
BX_RET 14, $noreg, implicit $d0
- ; CHECK: BX_RET 14 /* CC::al */, $noreg, implicit $d0
...
---
name: test_fdiv_s32
-# CHECK-LABEL: name: test_fdiv_s32
legalized: true
regBankSelected: true
selected: false
-# CHECK: selected: true
registers:
- { id: 0, class: fprb }
- { id: 1, class: fprb }
@@ -269,28 +274,29 @@ body: |
bb.0:
liveins: $s0, $s1
+ ; CHECK-LABEL: name: test_fdiv_s32
+ ; CHECK: liveins: $s0, $s1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:spr = COPY $s0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:spr = COPY $s1
+ ; CHECK-NEXT: [[VDIVS:%[0-9]+]]:spr = nofpexcept VDIVS [[COPY]], [[COPY1]], 14 /* CC::al */, $noreg, implicit $fpscr_rm
+ ; CHECK-NEXT: $s0 = COPY [[VDIVS]]
+ ; CHECK-NEXT: BX_RET 14 /* CC::al */, $noreg, implicit $s0
%0(s32) = COPY $s0
- ; CHECK: [[VREGX:%[0-9]+]]:spr = COPY $s0
%1(s32) = COPY $s1
- ; CHECK: [[VREGY:%[0-9]+]]:spr = COPY $s1
%2(s32) = G_FDIV %0, %1
- ; CHECK: [[VREGSUM:%[0-9]+]]:spr = VDIVS [[VREGX]], [[VREGY]], 14 /* CC::al */, $noreg
$s0 = COPY %2(s32)
- ; CHECK: $s0 = COPY [[VREGSUM]]
BX_RET 14, $noreg, implicit $s0
- ; CHECK: BX_RET 14 /* CC::al */, $noreg, implicit $s0
...
---
name: test_fdiv_s64
-# CHECK-LABEL: name: test_fdiv_s64
legalized: true
regBankSelected: true
selected: false
-# CHECK: selected: true
registers:
- { id: 0, class: fprb }
- { id: 1, class: fprb }
@@ -299,28 +305,29 @@ body: |
bb.0:
liveins: $d0, $d1
+ ; CHECK-LABEL: name: test_fdiv_s64
+ ; CHECK: liveins: $d0, $d1
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:dpr = COPY $d0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:dpr = COPY $d1
+ ; CHECK-NEXT: [[VDIVD:%[0-9]+]]:dpr = nofpexcept VDIVD [[COPY]], [[COPY1]], 14 /* CC::al */, $noreg, implicit $fpscr_rm
+ ; CHECK-NEXT: $d0 = COPY [[VDIVD]]
+ ; CHECK-NEXT: BX_RET 14 /* CC::al */, $noreg, implicit $d0
%0(s64) = COPY $d0
- ; CHECK: [[VREGX:%[0-9]+]]:dpr = COPY $d0
%1(s64) = COPY $d1
- ; CHECK: [[VREGY:%[0-9]+]]:dpr = COPY $d1
%2(s64) = G_FDIV %0, %1
- ; CHECK: [[VREGSUM:%[0-9]+]]:dpr = VDIVD [[VREGX]], [[VREGY]], 14 /* CC::al */, $noreg
$d0 = COPY %2(s64)
- ; CHECK: $d0 = COPY [[VREGSUM]]
BX_RET 14, $noreg, implicit $d0
- ; CHECK: BX_RET 14 /* CC::al */, $noreg, implicit $d0
...
---
name: test_fneg_s32
-# CHECK-LABEL: name: test_fneg_s32
legalized: true
regBankSelected: true
selected: false
-# CHECK: selected: true
registers:
- { id: 0, class: fprb }
- { id: 1, class: fprb }
@@ -328,25 +335,26 @@ body: |
bb.0:
liveins: $s0
+ ; CHECK-LABEL: name: test_fneg_s32
+ ; CHECK: liveins: $s0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:spr = COPY $s0
+ ; CHECK-NEXT: [[VNEGS:%[0-9]+]]:spr = VNEGS [[COPY]], 14 /* CC::al */, $noreg
+ ; CHECK-NEXT: $s0 = COPY [[VNEGS]]
+ ; CHECK-NEXT: BX_RET 14 /* CC::al */, $noreg, implicit $s0
%0(s32) = COPY $s0
- ; CHECK: [[VREGX:%[0-9]+]]:spr = COPY $s0
%1(s32) = G_FNEG %0
- ; CHECK: [[VREGSUM:%[0-9]+]]:spr = VNEGS [[VREGX]], 14 /* CC::al */, $noreg
$s0 = COPY %1(s32)
- ; CHECK: $s0 = COPY [[VREGSUM]]
BX_RET 14, $noreg, implicit $s0
- ; CHECK: BX_RET 14 /* CC::al */, $noreg, implicit $s0
...
---
name: test_fneg_s64
-# CHECK-LABEL: name: test_fneg_s64
legalized: true
regBankSelected: true
selected: false
-# CHECK: selected: true
registers:
- { id: 0, class: fprb }
- { id: 1, class: fprb }
@@ -355,25 +363,26 @@ body: |
bb.0:
liveins: $d0
+ ; CHECK-LABEL: name: test_fneg_s64
+ ; CHECK: liveins: $d0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:dpr = COPY $d0
+ ; CHECK-NEXT: [[VNEGD:%[0-9]+]]:dpr = VNEGD [[COPY]], 14 /* CC::al */, $noreg
+ ; CHECK-NEXT: $d0 = COPY [[VNEGD]]
+ ; CHECK-NEXT: BX_RET 14 /* CC::al */, $noreg, implicit $d0
%0(s64) = COPY $d0
- ; CHECK: [[VREGX:%[0-9]+]]:dpr = COPY $d0
%1(s64) = G_FNEG %0
- ; CHECK: [[VREGSUM:%[0-9]+]]:dpr = VNEGD [[VREGX]], 14 /* CC::al */, $noreg
$d0 = COPY %1(s64)
- ; CHECK: $d0 = COPY [[VREGSUM]]
BX_RET 14, $noreg, implicit $d0
- ; CHECK: BX_RET 14 /* CC::al */, $noreg, implicit $d0
...
---
name: test_fma_s32
-# CHECK-LABEL: name: test_fma_s32
legalized: true
regBankSelected: true
selected: false
-# CHECK: selected: true
registers:
- { id: 0, class: fprb }
- { id: 1, class: fprb }
@@ -383,31 +392,32 @@ body: |
bb.0:
liveins: $s0, $s1, $s2
+ ; CHECK-LABEL: name: test_fma_s32
+ ; CHECK: liveins: $s0, $s1, $s2
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:spr = COPY $s0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:spr = COPY $s1
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:spr = COPY $s2
+ ; CHECK-NEXT: [[VFMAS:%[0-9]+]]:spr = nofpexcept VFMAS [[COPY2]], [[COPY]], [[COPY1]], 14 /* CC::al */, $noreg, implicit $fpscr_rm
+ ; CHECK-NEXT: $s0 = COPY [[VFMAS]]
+ ; CHECK-NEXT: BX_RET 14 /* CC::al */, $noreg, implicit $s0
%0(s32) = COPY $s0
- ; CHECK: [[VREGX:%[0-9]+]]:spr = COPY $s0
%1(s32) = COPY $s1
- ; CHECK: [[VREGY:%[0-9]+]]:spr = COPY $s1
%2(s32) = COPY $s2
- ; CHECK: [[VREGZ:%[0-9]+]]:spr = COPY $s2
%3(s32) = G_FMA %0, %1, %2
- ; CHECK: [[VREGR:%[0-9]+]]:spr = VFMAS [[VREGZ]], [[VREGX]], [[VREGY]], 14 /* CC::al */, $noreg
$s0 = COPY %3(s32)
- ; CHECK: $s0 = COPY [[VREGR]]
BX_RET 14, $noreg, implicit $s0
- ; CHECK: BX_RET 14 /* CC::al */, $noreg, implicit $s0
...
---
name: test_fma_s64
-# CHECK-LABEL: name: test_fma_s64
legalized: true
regBankSelected: true
selected: false
-# CHECK: selected: true
registers:
- { id: 0, class: fprb }
- { id: 1, class: fprb }
@@ -417,31 +427,32 @@ body: |
bb.0:
liveins: $d0, $d1, $d2
+ ; CHECK-LABEL: name: test_fma_s64
+ ; CHECK: liveins: $d0, $d1, $d2
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:dpr = COPY $d0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:dpr = COPY $d1
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:dpr = COPY $d2
+ ; CHECK-NEXT: [[VFMAD:%[0-9]+]]:dpr = nofpexcept VFMAD [[COPY2]], [[COPY]], [[COPY1]], 14 /* CC::al */, $noreg, implicit $fpscr_rm
+ ; CHECK-NEXT: $d0 = COPY [[VFMAD]]
+ ; CHECK-NEXT: BX_RET 14 /* CC::al */, $noreg, implicit $d0
%0(s64) = COPY $d0
- ; CHECK: [[VREGX:%[0-9]+]]:dpr = COPY $d0
%1(s64) = COPY $d1
- ; CHECK: [[VREGY:%[0-9]+]]:dpr = COPY $d1
%2(s64) = COPY $d2
- ; CHECK: [[VREGZ:%[0-9]+]]:dpr = COPY $d2
%3(s64) = G_FMA %0, %1, %2
- ; CHECK: [[VREGR:%[0-9]+]]:dpr = VFMAD [[VREGZ]], [[VREGX]], [[VREGY]], 14 /* CC::al */, $noreg
$d0 = COPY %3(s64)
- ; CHECK: $d0 = COPY [[VREGR]]
BX_RET 14, $noreg, implicit $d0
- ; CHECK: BX_RET 14 /* CC::al */, $noreg, implicit $d0
...
---
name: test_fpext_s32_to_s64
-# CHECK-LABEL: name: test_fpext_s32_to_s64
legalized: true
regBankSelected: true
selected: false
-# CHECK: selected: true
registers:
- { id: 0, class: fprb }
- { id: 1, class: fprb }
@@ -449,25 +460,26 @@ body: |
bb.0:
liveins: $s0
+ ; CHECK-LABEL: name: test_fpext_s32_to_s64
+ ; CHECK: liveins: $s0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:spr = COPY $s0
+ ; CHECK-NEXT: [[VCVTDS:%[0-9]+]]:dpr = nofpexcept VCVTDS [[COPY]], 14 /* CC::al */, $noreg, implicit $fpscr_rm
+ ; CHECK-NEXT: $d0 = COPY [[VCVTDS]]
+ ; CHECK-NEXT: BX_RET 14 /* CC::al */, $noreg, implicit $d0
%0(s32) = COPY $s0
- ; CHECK: [[VREGX:%[0-9]+]]:spr = COPY $s0
%1(s64) = G_FPEXT %0(s32)
- ; CHECK: [[VREGR:%[0-9]+]]:dpr = VCVTDS [[VREGX]], 14 /* CC::al */, $noreg
$d0 = COPY %1(s64)
- ; CHECK: $d0 = COPY [[VREGR]]
BX_RET 14, $noreg, implicit $d0
- ; CHECK: BX_RET 14 /* CC::al */, $noreg, implicit $d0
...
---
name: test_fptrunc_s64_to_s32
-# CHECK-LABEL: name: test_fptrunc_s64_to_s32
legalized: true
regBankSelected: true
selected: false
-# CHECK: selected: true
registers:
- { id: 0, class: fprb }
- { id: 1, class: fprb }
@@ -475,25 +487,26 @@ body: |
bb.0:
liveins: $d0
+ ; CHECK-LABEL: name: test_fptrunc_s64_to_s32
+ ; CHECK: liveins: $d0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:dpr = COPY $d0
+ ; CHECK-NEXT: [[VCVTSD:%[0-9]+]]:spr = nofpexcept VCVTSD [[COPY]], 14 /* CC::al */, $noreg, implicit $fpscr_rm
+ ; CHECK-NEXT: $s0 = COPY [[VCVTSD]]
+ ; CHECK-NEXT: BX_RET 14 /* CC::al */, $noreg, implicit $s0
%0(s64) = COPY $d0
- ; CHECK: [[VREGX:%[0-9]+]]:dpr = COPY $d0
%1(s32) = G_FPTRUNC %0(s64)
- ; CHECK: [[VREGR:%[0-9]+]]:spr = VCVTSD [[VREGX]], 14 /* CC::al */, $noreg
$s0 = COPY %1(s32)
- ; CHECK: $s0 = COPY [[VREGR]]
BX_RET 14, $noreg, implicit $s0
- ; CHECK: BX_RET 14 /* CC::al */, $noreg, implicit $s0
...
---
name: test_fptosi_s32
-# CHECK-LABEL: name: test_fptosi_s32
legalized: true
regBankSelected: true
selected: false
-# CHECK: selected: true
registers:
- { id: 0, class: fprb }
- { id: 1, class: gprb }
@@ -501,26 +514,27 @@ body: |
bb.0:
liveins: $s0
+ ; CHECK-LABEL: name: test_fptosi_s32
+ ; CHECK: liveins: $s0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:spr = COPY $s0
+ ; CHECK-NEXT: [[VTOSIZS:%[0-9]+]]:spr = nofpexcept VTOSIZS [[COPY]], 14 /* CC::al */, $noreg
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr = COPY [[VTOSIZS]]
+ ; CHECK-NEXT: $r0 = COPY [[COPY1]]
+ ; CHECK-NEXT: BX_RET 14 /* CC::al */, $noreg, implicit $r0
%0(s32) = COPY $s0
- ; CHECK: [[VREGX:%[0-9]+]]:spr = COPY $s0
%1(s32) = G_FPTOSI %0(s32)
- ; CHECK: [[VREGI:%[0-9]+]]:spr = VTOSIZS [[VREGX]], 14 /* CC::al */, $noreg
- ; CHECK: [[VREGR:%[0-9]+]]:gpr = COPY [[VREGI]]
$r0 = COPY %1(s32)
- ; CHECK: $r0 = COPY [[VREGR]]
BX_RET 14, $noreg, implicit $r0
- ; CHECK: BX_RET 14 /* CC::al */, $noreg, implicit $r0
...
---
name: test_fptosi_s64
-# CHECK-LABEL: name: test_fptosi_s64
legalized: true
regBankSelected: true
selected: false
-# CHECK: selected: true
registers:
- { id: 0, class: fprb }
- { id: 1, class: gprb }
@@ -528,26 +542,27 @@ body: |
bb.0:
liveins: $d0
+ ; CHECK-LABEL: name: test_fptosi_s64
+ ; CHECK: liveins: $d0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:dpr = COPY $d0
+ ; CHECK-NEXT: [[VTOSIZD:%[0-9]+]]:spr = nofpexcept VTOSIZD [[COPY]], 14 /* CC::al */, $noreg
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr = COPY [[VTOSIZD]]
+ ; CHECK-NEXT: $r0 = COPY [[COPY1]]
+ ; CHECK-NEXT: BX_RET 14 /* CC::al */, $noreg, implicit $r0
%0(s64) = COPY $d0
- ; CHECK: [[VREGX:%[0-9]+]]:dpr = COPY $d0
%1(s32) = G_FPTOSI %0(s64)
- ; CHECK: [[VREGI:%[0-9]+]]:spr = VTOSIZD [[VREGX]], 14 /* CC::al */, $noreg
- ; CHECK: [[VREGR:%[0-9]+]]:gpr = COPY [[VREGI]]
$r0 = COPY %1(s32)
- ; CHECK: $r0 = COPY [[VREGR]]
BX_RET 14, $noreg, implicit $r0
- ; CHECK: BX_RET 14 /* CC::al */, $noreg, implicit $r0
...
---
name: test_fptoui_s32
-# CHECK-LABEL: name: test_fptoui_s32
legalized: true
regBankSelected: true
selected: false
-# CHECK: selected: true
registers:
- { id: 0, class: fprb }
- { id: 1, class: gprb }
@@ -555,26 +570,27 @@ body: |
bb.0:
liveins: $s0
+ ; CHECK-LABEL: name: test_fptoui_s32
+ ; CHECK: liveins: $s0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:spr = COPY $s0
+ ; CHECK-NEXT: [[VTOUIZS:%[0-9]+]]:spr = nofpexcept VTOUIZS [[COPY]], 14 /* CC::al */, $noreg
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr = COPY [[VTOUIZS]]
+ ; CHECK-NEXT: $r0 = COPY [[COPY1]]
+ ; CHECK-NEXT: BX_RET 14 /* CC::al */, $noreg, implicit $r0
%0(s32) = COPY $s0
- ; CHECK: [[VREGX:%[0-9]+]]:spr = COPY $s0
%1(s32) = G_FPTOUI %0(s32)
- ; CHECK: [[VREGI:%[0-9]+]]:spr = VTOUIZS [[VREGX]], 14 /* CC::al */, $noreg
- ; CHECK: [[VREGR:%[0-9]+]]:gpr = COPY [[VREGI]]
$r0 = COPY %1(s32)
- ; CHECK: $r0 = COPY [[VREGR]]
BX_RET 14, $noreg, implicit $r0
- ; CHECK: BX_RET 14 /* CC::al */, $noreg, implicit $r0
...
---
name: test_fptoui_s64
-# CHECK-LABEL: name: test_fptoui_s64
legalized: true
regBankSelected: true
selected: false
-# CHECK: selected: true
registers:
- { id: 0, class: fprb }
- { id: 1, class: gprb }
@@ -582,26 +598,27 @@ body: |
bb.0:
liveins: $d0
+ ; CHECK-LABEL: name: test_fptoui_s64
+ ; CHECK: liveins: $d0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:dpr = COPY $d0
+ ; CHECK-NEXT: [[VTOUIZD:%[0-9]+]]:spr = nofpexcept VTOUIZD [[COPY]], 14 /* CC::al */, $noreg
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr = COPY [[VTOUIZD]]
+ ; CHECK-NEXT: $r0 = COPY [[COPY1]]
+ ; CHECK-NEXT: BX_RET 14 /* CC::al */, $noreg, implicit $r0
%0(s64) = COPY $d0
- ; CHECK: [[VREGX:%[0-9]+]]:dpr = COPY $d0
%1(s32) = G_FPTOUI %0(s64)
- ; CHECK: [[VREGI:%[0-9]+]]:spr = VTOUIZD [[VREGX]], 14 /* CC::al */, $noreg
- ; CHECK: [[VREGR:%[0-9]+]]:gpr = COPY [[VREGI]]
$r0 = COPY %1(s32)
- ; CHECK: $r0 = COPY [[VREGR]]
BX_RET 14, $noreg, implicit $r0
- ; CHECK: BX_RET 14 /* CC::al */, $noreg, implicit $r0
...
---
name: test_sitofp_s32
-# CHECK-LABEL: name: test_sitofp_s32
legalized: true
regBankSelected: true
selected: false
-# CHECK: selected: true
registers:
- { id: 0, class: gprb }
- { id: 1, class: fprb }
@@ -609,26 +626,27 @@ body: |
bb.0:
liveins: $r0
+ ; CHECK-LABEL: name: test_sitofp_s32
+ ; CHECK: liveins: $r0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $r0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:spr = COPY [[COPY]]
+ ; CHECK-NEXT: [[VSITOS:%[0-9]+]]:spr = nofpexcept VSITOS [[COPY1]], 14 /* CC::al */, $noreg
+ ; CHECK-NEXT: $s0 = COPY [[VSITOS]]
+ ; CHECK-NEXT: BX_RET 14 /* CC::al */, $noreg, implicit $s0
%0(s32) = COPY $r0
- ; CHECK: [[VREGX:%[0-9]+]]:gpr = COPY $r0
%1(s32) = G_SITOFP %0(s32)
- ; CHECK: [[VREGF:%[0-9]+]]:spr = COPY [[VREGX]]
- ; CHECK: [[VREGR:%[0-9]+]]:spr = VSITOS [[VREGF]], 14 /* CC::al */, $noreg
$s0 = COPY %1(s32)
- ; CHECK: $s0 = COPY [[VREGR]]
BX_RET 14, $noreg, implicit $s0
- ; CHECK: BX_RET 14 /* CC::al */, $noreg, implicit $s0
...
---
name: test_sitofp_s64
-# CHECK-LABEL: name: test_sitofp_s64
legalized: true
regBankSelected: true
selected: false
-# CHECK: selected: true
registers:
- { id: 0, class: gprb }
- { id: 1, class: fprb }
@@ -636,26 +654,27 @@ body: |
bb.0:
liveins: $r0
+ ; CHECK-LABEL: name: test_sitofp_s64
+ ; CHECK: liveins: $r0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $r0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:spr = COPY [[COPY]]
+ ; CHECK-NEXT: [[VSITOD:%[0-9]+]]:dpr = nofpexcept VSITOD [[COPY1]], 14 /* CC::al */, $noreg
+ ; CHECK-NEXT: $d0 = COPY [[VSITOD]]
+ ; CHECK-NEXT: BX_RET 14 /* CC::al */, $noreg, implicit $d0
%0(s32) = COPY $r0
- ; CHECK: [[VREGX:%[0-9]+]]:gpr = COPY $r0
%1(s64) = G_SITOFP %0(s32)
- ; CHECK: [[VREGF:%[0-9]+]]:spr = COPY [[VREGX]]
- ; CHECK: [[VREGR:%[0-9]+]]:dpr = VSITOD [[VREGF]], 14 /* CC::al */, $noreg
$d0 = COPY %1(s64)
- ; CHECK: $d0 = COPY [[VREGR]]
BX_RET 14, $noreg, implicit $d0
- ; CHECK: BX_RET 14 /* CC::al */, $noreg, implicit $d0
...
---
name: test_uitofp_s32
-# CHECK-LABEL: name: test_uitofp_s32
legalized: true
regBankSelected: true
selected: false
-# CHECK: selected: true
registers:
- { id: 0, class: gprb }
- { id: 1, class: fprb }
@@ -663,26 +682,27 @@ body: |
bb.0:
liveins: $r0
+ ; CHECK-LABEL: name: test_uitofp_s32
+ ; CHECK: liveins: $r0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $r0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:spr = COPY [[COPY]]
+ ; CHECK-NEXT: [[VUITOS:%[0-9]+]]:spr = nofpexcept VUITOS [[COPY1]], 14 /* CC::al */, $noreg
+ ; CHECK-NEXT: $s0 = COPY [[VUITOS]]
+ ; CHECK-NEXT: BX_RET 14 /* CC::al */, $noreg, implicit $s0
%0(s32) = COPY $r0
- ; CHECK: [[VREGX:%[0-9]+]]:gpr = COPY $r0
%1(s32) = G_UITOFP %0(s32)
- ; CHECK: [[VREGF:%[0-9]+]]:spr = COPY [[VREGX]]
- ; CHECK: [[VREGR:%[0-9]+]]:spr = VUITOS [[VREGF]], 14 /* CC::al */, $noreg
$s0 = COPY %1(s32)
- ; CHECK: $s0 = COPY [[VREGR]]
BX_RET 14, $noreg, implicit $s0
- ; CHECK: BX_RET 14 /* CC::al */, $noreg, implicit $s0
...
---
name: test_uitofp_s64
-# CHECK-LABEL: name: test_uitofp_s64
legalized: true
regBankSelected: true
selected: false
-# CHECK: selected: true
registers:
- { id: 0, class: gprb }
- { id: 1, class: fprb }
@@ -690,26 +710,27 @@ body: |
bb.0:
liveins: $r0
+ ; CHECK-LABEL: name: test_uitofp_s64
+ ; CHECK: liveins: $r0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $r0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:spr = COPY [[COPY]]
+ ; CHECK-NEXT: [[VUITOD:%[0-9]+]]:dpr = nofpexcept VUITOD [[COPY1]], 14 /* CC::al */, $noreg
+ ; CHECK-NEXT: $d0 = COPY [[VUITOD]]
+ ; CHECK-NEXT: BX_RET 14 /* CC::al */, $noreg, implicit $d0
%0(s32) = COPY $r0
- ; CHECK: [[VREGX:%[0-9]+]]:gpr = COPY $r0
%1(s64) = G_UITOFP %0(s32)
- ; CHECK: [[VREGF:%[0-9]+]]:spr = COPY [[VREGX]]
- ; CHECK: [[VREGR:%[0-9]+]]:dpr = VUITOD [[VREGF]], 14 /* CC::al */, $noreg
$d0 = COPY %1(s64)
- ; CHECK: $d0 = COPY [[VREGR]]
BX_RET 14, $noreg, implicit $d0
- ; CHECK: BX_RET 14 /* CC::al */, $noreg, implicit $d0
...
---
name: test_load_f32
-# CHECK-LABEL: name: test_load_f32
legalized: true
regBankSelected: true
selected: false
-# CHECK: selected: true
registers:
- { id: 0, class: gprb }
- { id: 1, class: fprb }
@@ -717,25 +738,26 @@ body: |
bb.0:
liveins: $r0
+ ; CHECK-LABEL: name: test_load_f32
+ ; CHECK: liveins: $r0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $r0
+ ; CHECK-NEXT: [[VLDRS:%[0-9]+]]:spr = VLDRS [[COPY]], 0, 14 /* CC::al */, $noreg :: (load (s32))
+ ; CHECK-NEXT: $s0 = COPY [[VLDRS]]
+ ; CHECK-NEXT: BX_RET 14 /* CC::al */, $noreg, implicit $s0
%0(p0) = COPY $r0
- ; CHECK: %[[P:[0-9]+]]:gpr = COPY $r0
%1(s32) = G_LOAD %0(p0) :: (load (s32))
- ; CHECK: %[[V:[0-9]+]]:spr = VLDRS %[[P]], 0, 14 /* CC::al */, $noreg
$s0 = COPY %1
- ; CHECK: $s0 = COPY %[[V]]
BX_RET 14, $noreg, implicit $s0
- ; CHECK: BX_RET 14 /* CC::al */, $noreg, implicit $s0
...
---
name: test_load_f64
-# CHECK-LABEL: name: test_load_f64
legalized: true
regBankSelected: true
selected: false
-# CHECK: selected: true
registers:
- { id: 0, class: gprb }
- { id: 1, class: fprb }
@@ -743,45 +765,50 @@ body: |
bb.0:
liveins: $r0
+ ; CHECK-LABEL: name: test_load_f64
+ ; CHECK: liveins: $r0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $r0
+ ; CHECK-NEXT: [[VLDRD:%[0-9]+]]:dpr = VLDRD [[COPY]], 0, 14 /* CC::al */, $noreg :: (load (s64))
+ ; CHECK-NEXT: $d0 = COPY [[VLDRD]]
+ ; CHECK-NEXT: BX_RET 14 /* CC::al */, $noreg, implicit $d0
%0(p0) = COPY $r0
- ; CHECK: %[[P:[0-9]+]]:gpr = COPY $r0
%1(s64) = G_LOAD %0(p0) :: (load (s64))
- ; CHECK: %[[V:[0-9]+]]:dpr = VLDRD %[[P]], 0, 14 /* CC::al */, $noreg
$d0 = COPY %1
- ; CHECK: $d0 = COPY %[[V]]
BX_RET 14, $noreg, implicit $d0
- ; CHECK: BX_RET 14 /* CC::al */, $noreg, implicit $d0
...
---
name: test_stores
-# CHECK-LABEL: name: test_stores
legalized: true
regBankSelected: true
selected: false
-# CHECK: selected: true
registers:
- { id: 0, class: gprb }
- { id: 1, class: fprb }
- { id: 2, class: fprb }
-# CHECK: id: [[P:[0-9]+]], class: gpr
-# CHECK: id: [[F32:[0-9]+]], class: spr
-# CHECK: id: [[F64:[0-9]+]], class: dpr
body: |
bb.0:
liveins: $r0, $s0, $d0
+ ; CHECK-LABEL: name: test_stores
+ ; CHECK: liveins: $r0, $s0, $d0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $r0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:spr = COPY $s0
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:dpr = COPY $d2
+ ; CHECK-NEXT: VSTRS [[COPY1]], [[COPY]], 0, 14 /* CC::al */, $noreg :: (store (s32))
+ ; CHECK-NEXT: VSTRD [[COPY2]], [[COPY]], 0, 14 /* CC::al */, $noreg :: (store (s64))
+ ; CHECK-NEXT: BX_RET 14 /* CC::al */, $noreg
%0(p0) = COPY $r0
%1(s32) = COPY $s0
%2(s64) = COPY $d2
G_STORE %1(s32), %0(p0) :: (store (s32))
- ; CHECK: VSTRS %[[F32]], %[[P]], 0, 14 /* CC::al */, $noreg
G_STORE %2(s64), %0(p0) :: (store (s64))
- ; CHECK: VSTRD %[[F64]], %[[P]], 0, 14 /* CC::al */, $noreg
BX_RET 14, $noreg
...
@@ -833,11 +860,9 @@ body: |
...
---
name: test_soft_fp_double
-# CHECK-LABEL: name: test_soft_fp_double
legalized: true
regBankSelected: true
selected: false
-# CHECK: selected: true
registers:
- { id: 0, class: gprb }
- { id: 1, class: gprb }
@@ -848,24 +873,27 @@ body: |
bb.0:
liveins: $r0, $r1, $r2, $r3
+ ; CHECK-LABEL: name: test_soft_fp_double
+ ; CHECK: liveins: $r0, $r1, $r2, $r3
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $r2
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr = COPY $r3
+ ; CHECK-NEXT: [[VMOVDRR:%[0-9]+]]:dpr = VMOVDRR [[COPY]], [[COPY1]], 14 /* CC::al */, $noreg
+ ; CHECK-NEXT: [[VMOVRRD:%[0-9]+]]:gpr, [[VMOVRRD1:%[0-9]+]]:gpr = VMOVRRD [[VMOVDRR]], 14 /* CC::al */, $noreg
+ ; CHECK-NEXT: $r0 = COPY [[VMOVRRD]]
+ ; CHECK-NEXT: $r1 = COPY [[VMOVRRD1]]
+ ; CHECK-NEXT: BX_RET 14 /* CC::al */, $noreg, implicit $r0, implicit $r1
%0(s32) = COPY $r2
- ; CHECK: [[IN1:%[0-9]+]]:gpr = COPY $r2
%1(s32) = COPY $r3
- ; CHECK: [[IN2:%[0-9]+]]:gpr = COPY $r3
%2(s64) = G_MERGE_VALUES %0(s32), %1(s32)
- ; CHECK: %[[DREG:[0-9]+]]:dpr = VMOVDRR [[IN1]], [[IN2]]
%3(s32), %4(s32) = G_UNMERGE_VALUES %2(s64)
- ; CHECK: [[OUT1:%[0-9]+]]:gpr, [[OUT2:%[0-9]+]]:gpr = VMOVRRD %[[DREG]]
$r0 = COPY %3
- ; CHECK: $r0 = COPY [[OUT1]]
$r1 = COPY %4
- ; CHECK: $r1 = COPY [[OUT2]]
BX_RET 14, $noreg, implicit $r0, implicit $r1
- ; CHECK: BX_RET 14 /* CC::al */, $noreg, implicit $r0, implicit $r1
...
diff --git a/llvm/test/CodeGen/ARM/GlobalISel/select-pr35926.mir b/llvm/test/CodeGen/ARM/GlobalISel/select-pr35926.mir
index a6fc4da..fa982d8 100644
--- a/llvm/test/CodeGen/ARM/GlobalISel/select-pr35926.mir
+++ b/llvm/test/CodeGen/ARM/GlobalISel/select-pr35926.mir
@@ -31,7 +31,7 @@ body: |
; CHECK: [[COPY:%[0-9]+]]:dpr = COPY $d0
; CHECK: [[COPY1:%[0-9]+]]:dpr = COPY $d1
; CHECK: [[COPY2:%[0-9]+]]:dpr = COPY $d2
- ; CHECK: [[VFNMSD:%[0-9]+]]:dpr = VFNMSD [[COPY2]], [[COPY1]], [[COPY]], 14 /* CC::al */, $noreg
+ ; CHECK: [[VFNMSD:%[0-9]+]]:dpr = nofpexcept VFNMSD [[COPY2]], [[COPY1]], [[COPY]], 14 /* CC::al */, $noreg, implicit $fpscr
; CHECK: $d0 = COPY [[VFNMSD]]
; CHECK: MOVPCLR 14 /* CC::al */, $noreg, implicit $d0
%0:fprb(s64) = COPY $d0
diff --git a/llvm/test/CodeGen/ARM/bf16_fast_math.ll b/llvm/test/CodeGen/ARM/bf16_fast_math.ll
index 1b18ea6..5f7e1e6 100644
--- a/llvm/test/CodeGen/ARM/bf16_fast_math.ll
+++ b/llvm/test/CodeGen/ARM/bf16_fast_math.ll
@@ -17,7 +17,7 @@ define bfloat @normal_fadd(bfloat %x, bfloat %y) {
; CHECK-NOBF16-NEXT: [[VMOVSR:%[0-9]+]]:spr = VMOVSR killed [[MOVsi]], 14 /* CC::al */, $noreg
; CHECK-NOBF16-NEXT: [[MOVsi1:%[0-9]+]]:gpr = MOVsi [[COPY1]], 130, 14 /* CC::al */, $noreg, $noreg
; CHECK-NOBF16-NEXT: [[VMOVSR1:%[0-9]+]]:spr = VMOVSR killed [[MOVsi1]], 14 /* CC::al */, $noreg
- ; CHECK-NOBF16-NEXT: [[VADDS:%[0-9]+]]:spr = VADDS killed [[VMOVSR1]], killed [[VMOVSR]], 14 /* CC::al */, $noreg
+ ; CHECK-NOBF16-NEXT: [[VADDS:%[0-9]+]]:spr = nofpexcept VADDS killed [[VMOVSR1]], killed [[VMOVSR]], 14 /* CC::al */, $noreg, implicit $fpscr
; CHECK-NOBF16-NEXT: [[VMOVRS:%[0-9]+]]:gpr = VMOVRS killed [[VADDS]], 14 /* CC::al */, $noreg
; CHECK-NOBF16-NEXT: ADJCALLSTACKDOWN 0, 0, 14 /* CC::al */, $noreg, implicit-def dead $sp, implicit $sp
; CHECK-NOBF16-NEXT: $r0 = COPY [[VMOVRS]]
@@ -44,7 +44,7 @@ define bfloat @fast_fadd(bfloat %x, bfloat %y) {
; CHECK-NOBF16-NEXT: [[VMOVSR:%[0-9]+]]:spr = VMOVSR killed [[MOVsi]], 14 /* CC::al */, $noreg
; CHECK-NOBF16-NEXT: [[MOVsi1:%[0-9]+]]:gpr = MOVsi [[COPY1]], 130, 14 /* CC::al */, $noreg, $noreg
; CHECK-NOBF16-NEXT: [[VMOVSR1:%[0-9]+]]:spr = VMOVSR killed [[MOVsi1]], 14 /* CC::al */, $noreg
- ; CHECK-NOBF16-NEXT: [[VADDS:%[0-9]+]]:spr = nnan ninf nsz arcp contract afn reassoc VADDS killed [[VMOVSR1]], killed [[VMOVSR]], 14 /* CC::al */, $noreg
+ ; CHECK-NOBF16-NEXT: [[VADDS:%[0-9]+]]:spr = nnan ninf nsz arcp contract afn reassoc nofpexcept VADDS killed [[VMOVSR1]], killed [[VMOVSR]], 14 /* CC::al */, $noreg, implicit $fpscr
; CHECK-NOBF16-NEXT: [[VMOVRS:%[0-9]+]]:gpr = VMOVRS killed [[VADDS]], 14 /* CC::al */, $noreg
; CHECK-NOBF16-NEXT: ADJCALLSTACKDOWN 0, 0, 14 /* CC::al */, $noreg, implicit-def dead $sp, implicit $sp
; CHECK-NOBF16-NEXT: $r0 = COPY [[VMOVRS]]
@@ -71,7 +71,7 @@ define bfloat @ninf_fadd(bfloat %x, bfloat %y) {
; CHECK-NOBF16-NEXT: [[VMOVSR:%[0-9]+]]:spr = VMOVSR killed [[MOVsi]], 14 /* CC::al */, $noreg
; CHECK-NOBF16-NEXT: [[MOVsi1:%[0-9]+]]:gpr = MOVsi [[COPY1]], 130, 14 /* CC::al */, $noreg, $noreg
; CHECK-NOBF16-NEXT: [[VMOVSR1:%[0-9]+]]:spr = VMOVSR killed [[MOVsi1]], 14 /* CC::al */, $noreg
- ; CHECK-NOBF16-NEXT: [[VADDS:%[0-9]+]]:spr = ninf VADDS killed [[VMOVSR1]], killed [[VMOVSR]], 14 /* CC::al */, $noreg
+ ; CHECK-NOBF16-NEXT: [[VADDS:%[0-9]+]]:spr = ninf nofpexcept VADDS killed [[VMOVSR1]], killed [[VMOVSR]], 14 /* CC::al */, $noreg, implicit $fpscr
; CHECK-NOBF16-NEXT: [[VMOVRS:%[0-9]+]]:gpr = VMOVRS killed [[VADDS]], 14 /* CC::al */, $noreg
; CHECK-NOBF16-NEXT: ADJCALLSTACKDOWN 0, 0, 14 /* CC::al */, $noreg, implicit-def dead $sp, implicit $sp
; CHECK-NOBF16-NEXT: $r0 = COPY [[VMOVRS]]
@@ -102,7 +102,7 @@ define bfloat @normal_fadd_sequence(bfloat %x, bfloat %y, bfloat %z) {
; CHECK-NOBF16-NEXT: [[VMOVSR:%[0-9]+]]:spr = VMOVSR killed [[MOVsi]], 14 /* CC::al */, $noreg
; CHECK-NOBF16-NEXT: [[MOVsi1:%[0-9]+]]:gpr = MOVsi [[COPY2]], 130, 14 /* CC::al */, $noreg, $noreg
; CHECK-NOBF16-NEXT: [[VMOVSR1:%[0-9]+]]:spr = VMOVSR killed [[MOVsi1]], 14 /* CC::al */, $noreg
- ; CHECK-NOBF16-NEXT: [[VADDS:%[0-9]+]]:spr = VADDS killed [[VMOVSR1]], killed [[VMOVSR]], 14 /* CC::al */, $noreg
+ ; CHECK-NOBF16-NEXT: [[VADDS:%[0-9]+]]:spr = nofpexcept VADDS killed [[VMOVSR1]], killed [[VMOVSR]], 14 /* CC::al */, $noreg, implicit $fpscr
; CHECK-NOBF16-NEXT: [[VMOVRS:%[0-9]+]]:gpr = VMOVRS killed [[VADDS]], 14 /* CC::al */, $noreg
; CHECK-NOBF16-NEXT: ADJCALLSTACKDOWN 0, 0, 14 /* CC::al */, $noreg, implicit-def dead $sp, implicit $sp
; CHECK-NOBF16-NEXT: $r0 = COPY [[VMOVRS]]
@@ -113,7 +113,7 @@ define bfloat @normal_fadd_sequence(bfloat %x, bfloat %y, bfloat %z) {
; CHECK-NOBF16-NEXT: [[VMOVSR2:%[0-9]+]]:spr = VMOVSR killed [[MOVsi2]], 14 /* CC::al */, $noreg
; CHECK-NOBF16-NEXT: [[MOVsi3:%[0-9]+]]:gpr = MOVsi [[COPY3]], 130, 14 /* CC::al */, $noreg, $noreg
; CHECK-NOBF16-NEXT: [[VMOVSR3:%[0-9]+]]:spr = VMOVSR killed [[MOVsi3]], 14 /* CC::al */, $noreg
- ; CHECK-NOBF16-NEXT: [[VADDS1:%[0-9]+]]:spr = VADDS killed [[VMOVSR3]], killed [[VMOVSR2]], 14 /* CC::al */, $noreg
+ ; CHECK-NOBF16-NEXT: [[VADDS1:%[0-9]+]]:spr = nofpexcept VADDS killed [[VMOVSR3]], killed [[VMOVSR2]], 14 /* CC::al */, $noreg, implicit $fpscr
; CHECK-NOBF16-NEXT: [[VMOVRS1:%[0-9]+]]:gpr = VMOVRS killed [[VADDS1]], 14 /* CC::al */, $noreg
; CHECK-NOBF16-NEXT: ADJCALLSTACKDOWN 0, 0, 14 /* CC::al */, $noreg, implicit-def dead $sp, implicit $sp
; CHECK-NOBF16-NEXT: $r0 = COPY [[VMOVRS1]]
@@ -142,10 +142,10 @@ define bfloat @nnan_ninf_contract_fadd_sequence(bfloat %x, bfloat %y, bfloat %z)
; CHECK-NOBF16-NEXT: [[VMOVSR:%[0-9]+]]:spr = VMOVSR killed [[MOVsi]], 14 /* CC::al */, $noreg
; CHECK-NOBF16-NEXT: [[MOVsi1:%[0-9]+]]:gpr = MOVsi [[COPY2]], 130, 14 /* CC::al */, $noreg, $noreg
; CHECK-NOBF16-NEXT: [[VMOVSR1:%[0-9]+]]:spr = VMOVSR killed [[MOVsi1]], 14 /* CC::al */, $noreg
- ; CHECK-NOBF16-NEXT: [[VADDS:%[0-9]+]]:spr = nnan ninf contract VADDS killed [[VMOVSR1]], killed [[VMOVSR]], 14 /* CC::al */, $noreg
+ ; CHECK-NOBF16-NEXT: [[VADDS:%[0-9]+]]:spr = nnan ninf contract nofpexcept VADDS killed [[VMOVSR1]], killed [[VMOVSR]], 14 /* CC::al */, $noreg, implicit $fpscr
; CHECK-NOBF16-NEXT: [[MOVsi2:%[0-9]+]]:gpr = MOVsi [[COPY]], 130, 14 /* CC::al */, $noreg, $noreg
; CHECK-NOBF16-NEXT: [[VMOVSR2:%[0-9]+]]:spr = VMOVSR killed [[MOVsi2]], 14 /* CC::al */, $noreg
- ; CHECK-NOBF16-NEXT: [[VADDS1:%[0-9]+]]:spr = nnan ninf contract VADDS killed [[VADDS]], killed [[VMOVSR2]], 14 /* CC::al */, $noreg
+ ; CHECK-NOBF16-NEXT: [[VADDS1:%[0-9]+]]:spr = nnan ninf contract nofpexcept VADDS killed [[VADDS]], killed [[VMOVSR2]], 14 /* CC::al */, $noreg, implicit $fpscr
; CHECK-NOBF16-NEXT: [[VMOVRS:%[0-9]+]]:gpr = VMOVRS killed [[VADDS1]], 14 /* CC::al */, $noreg
; CHECK-NOBF16-NEXT: ADJCALLSTACKDOWN 0, 0, 14 /* CC::al */, $noreg, implicit-def dead $sp, implicit $sp
; CHECK-NOBF16-NEXT: $r0 = COPY [[VMOVRS]]
@@ -174,7 +174,7 @@ define bfloat @ninf_fadd_sequence(bfloat %x, bfloat %y, bfloat %z) {
; CHECK-NOBF16-NEXT: [[VMOVSR:%[0-9]+]]:spr = VMOVSR killed [[MOVsi]], 14 /* CC::al */, $noreg
; CHECK-NOBF16-NEXT: [[MOVsi1:%[0-9]+]]:gpr = MOVsi [[COPY2]], 130, 14 /* CC::al */, $noreg, $noreg
; CHECK-NOBF16-NEXT: [[VMOVSR1:%[0-9]+]]:spr = VMOVSR killed [[MOVsi1]], 14 /* CC::al */, $noreg
- ; CHECK-NOBF16-NEXT: [[VADDS:%[0-9]+]]:spr = ninf VADDS killed [[VMOVSR1]], killed [[VMOVSR]], 14 /* CC::al */, $noreg
+ ; CHECK-NOBF16-NEXT: [[VADDS:%[0-9]+]]:spr = ninf nofpexcept VADDS killed [[VMOVSR1]], killed [[VMOVSR]], 14 /* CC::al */, $noreg, implicit $fpscr
; CHECK-NOBF16-NEXT: [[VMOVRS:%[0-9]+]]:gpr = VMOVRS killed [[VADDS]], 14 /* CC::al */, $noreg
; CHECK-NOBF16-NEXT: ADJCALLSTACKDOWN 0, 0, 14 /* CC::al */, $noreg, implicit-def dead $sp, implicit $sp
; CHECK-NOBF16-NEXT: $r0 = COPY [[VMOVRS]]
@@ -185,7 +185,7 @@ define bfloat @ninf_fadd_sequence(bfloat %x, bfloat %y, bfloat %z) {
; CHECK-NOBF16-NEXT: [[VMOVSR2:%[0-9]+]]:spr = VMOVSR killed [[MOVsi2]], 14 /* CC::al */, $noreg
; CHECK-NOBF16-NEXT: [[MOVsi3:%[0-9]+]]:gpr = MOVsi [[COPY3]], 130, 14 /* CC::al */, $noreg, $noreg
; CHECK-NOBF16-NEXT: [[VMOVSR3:%[0-9]+]]:spr = VMOVSR killed [[MOVsi3]], 14 /* CC::al */, $noreg
- ; CHECK-NOBF16-NEXT: [[VADDS1:%[0-9]+]]:spr = ninf VADDS killed [[VMOVSR3]], killed [[VMOVSR2]], 14 /* CC::al */, $noreg
+ ; CHECK-NOBF16-NEXT: [[VADDS1:%[0-9]+]]:spr = ninf nofpexcept VADDS killed [[VMOVSR3]], killed [[VMOVSR2]], 14 /* CC::al */, $noreg, implicit $fpscr
; CHECK-NOBF16-NEXT: [[VMOVRS1:%[0-9]+]]:gpr = VMOVRS killed [[VADDS1]], 14 /* CC::al */, $noreg
; CHECK-NOBF16-NEXT: ADJCALLSTACKDOWN 0, 0, 14 /* CC::al */, $noreg, implicit-def dead $sp, implicit $sp
; CHECK-NOBF16-NEXT: $r0 = COPY [[VMOVRS1]]
diff --git a/llvm/test/CodeGen/ARM/cortex-m7-wideops.mir b/llvm/test/CodeGen/ARM/cortex-m7-wideops.mir
index 1bee32f..fe23e85 100644
--- a/llvm/test/CodeGen/ARM/cortex-m7-wideops.mir
+++ b/llvm/test/CodeGen/ARM/cortex-m7-wideops.mir
@@ -22,15 +22,16 @@ body: |
; CHECK-LABEL: name: test_groups
; CHECK: liveins: $d0, $r0, $r1, $r2, $r3, $r4
- ; CHECK: renamable $d0 = VADDD killed renamable $d0, renamable $d0, 14 /* CC::al */, $noreg
- ; CHECK: renamable $r3 = t2ADDrr killed renamable $r3, renamable $r3, 14 /* CC::al */, $noreg, $noreg
- ; CHECK: renamable $s2 = VLDRS killed renamable $r0, 0, 14 /* CC::al */, $noreg
- ; CHECK: VSTRS killed renamable $s2, killed renamable $r1, 0, 14 /* CC::al */, $noreg
- ; CHECK: t2STRi12 killed renamable $r3, killed renamable $r2, 0, 14 /* CC::al */, $noreg
- ; CHECK: renamable $r4 = t2ADDrr killed renamable $r4, renamable $r4, 14 /* CC::al */, $noreg, $noreg
- ; CHECK: tBX_RET 14 /* CC::al */, $noreg, implicit killed $d0
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: renamable $s2 = VLDRS killed renamable $r0, 0, 14 /* CC::al */, $noreg
+ ; CHECK-NEXT: renamable $r3 = t2ADDrr killed renamable $r3, renamable $r3, 14 /* CC::al */, $noreg, $noreg
+ ; CHECK-NEXT: renamable $d0 = VADDD killed renamable $d0, renamable $d0, 14 /* CC::al */, $noreg, implicit $fpscr_rm
+ ; CHECK-NEXT: renamable $r4 = t2ADDrr killed renamable $r4, renamable $r4, 14 /* CC::al */, $noreg, $noreg
+ ; CHECK-NEXT: VSTRS killed renamable $s2, killed renamable $r1, 0, 14 /* CC::al */, $noreg
+ ; CHECK-NEXT: t2STRi12 killed renamable $r3, killed renamable $r2, 0, 14 /* CC::al */, $noreg
+ ; CHECK-NEXT: tBX_RET 14 /* CC::al */, $noreg, implicit killed $d0
renamable $s2 = VLDRS killed renamable $r0, 0, 14 /* CC::al */, $noreg
- renamable $d0 = VADDD killed renamable $d0, renamable $d0, 14 /* CC::al */, $noreg
+ renamable $d0 = VADDD killed renamable $d0, renamable $d0, 14 /* CC::al */, $noreg, implicit $fpscr_rm
VSTRS killed renamable $s2, killed renamable $r1, 0, 14 /* CC::al */, $noreg
renamable $r3 = t2ADDrr killed renamable $r3, renamable $r3, 14 /* CC::al */, $noreg, $noreg
t2STRi12 killed renamable $r3, killed renamable $r2, 0, 14 /* CC::al */, $noreg
diff --git a/llvm/test/CodeGen/ARM/fp16-litpool-arm.mir b/llvm/test/CodeGen/ARM/fp16-litpool-arm.mir
index 8e671c9..f5b2e98 100644
--- a/llvm/test/CodeGen/ARM/fp16-litpool-arm.mir
+++ b/llvm/test/CodeGen/ARM/fp16-litpool-arm.mir
@@ -81,7 +81,7 @@ body: |
STRi12 killed renamable $r1, killed renamable $r0, 0, 14, $noreg :: (volatile store (s32) into %ir.LL, align 8)
dead renamable $r0 = SPACE 8920, undef renamable $r0
renamable $s2 = VLDRH $sp, 1, 14, $noreg :: (volatile dereferenceable load (s16) from %ir.S)
- renamable $s0 = VADDH killed renamable $s2, killed renamable $s0, 14, $noreg
+ renamable $s0 = VADDH killed renamable $s2, killed renamable $s0, 14, $noreg, implicit $fpscr_rm
VSTRH renamable $s0, $sp, 1, 14, $noreg :: (volatile store (s16) into %ir.S)
renamable $r0 = VMOVRH killed renamable $s0, 14, $noreg
dead renamable $r1 = SPACE 1350, undef renamable $r0
diff --git a/llvm/test/CodeGen/ARM/fp16-litpool-thumb.mir b/llvm/test/CodeGen/ARM/fp16-litpool-thumb.mir
index 03ddd80..4b66476 100644
--- a/llvm/test/CodeGen/ARM/fp16-litpool-thumb.mir
+++ b/llvm/test/CodeGen/ARM/fp16-litpool-thumb.mir
@@ -72,7 +72,7 @@ body: |
renamable $s2 = VLDRH $sp, 1, 14, $noreg :: (volatile dereferenceable load (s16) from %ir.S)
renamable $s0 = VLDRH %const.1, 0, 14, $noreg :: (load (s16) from constant-pool)
dead renamable $r0 = SPACE 1230, undef renamable $r0
- renamable $s0 = VADDH killed renamable $s2, killed renamable $s0, 14, $noreg
+ renamable $s0 = VADDH killed renamable $s2, killed renamable $s0, 14, $noreg, implicit $fpscr_rm
VSTRH renamable $s0, $sp, 1, 14, $noreg :: (volatile store (s16) into %ir.S)
renamable $r0 = VMOVRH killed renamable $s0, 14, $noreg
dead renamable $r1 = SPACE 1330, undef renamable $r0
diff --git a/llvm/test/CodeGen/ARM/fp16-litpool2-arm.mir b/llvm/test/CodeGen/ARM/fp16-litpool2-arm.mir
index 46f028b..c16a62a 100644
--- a/llvm/test/CodeGen/ARM/fp16-litpool2-arm.mir
+++ b/llvm/test/CodeGen/ARM/fp16-litpool2-arm.mir
@@ -89,7 +89,7 @@ body: |
$sp = frame-setup SUBri $sp, 4, 14, $noreg, $noreg
frame-setup CFI_INSTRUCTION def_cfa_offset 4
renamable $s0 = VLDRH %const.0, 0, 14, $noreg :: (load (s16) from constant-pool)
- VCMPZH renamable $s0, 14, $noreg, implicit-def $fpscr_nzcv
+ VCMPZH renamable $s0, 14, $noreg, implicit-def $fpscr_nzcv, implicit $fpscr_rm
VSTRH killed renamable $s0, $sp, 1, 14, $noreg :: (store (s16) into %ir.res)
FMSTAT 14, $noreg, implicit-def $cpsr, implicit killed $fpscr_nzcv
Bcc %bb.2, 0, killed $cpsr
diff --git a/llvm/test/CodeGen/ARM/fp16-litpool3-arm.mir b/llvm/test/CodeGen/ARM/fp16-litpool3-arm.mir
index 5a03fcd..049b7d9 100644
--- a/llvm/test/CodeGen/ARM/fp16-litpool3-arm.mir
+++ b/llvm/test/CodeGen/ARM/fp16-litpool3-arm.mir
@@ -95,7 +95,7 @@ body: |
$sp = frame-setup SUBri $sp, 4, 14, $noreg, $noreg
frame-setup CFI_INSTRUCTION def_cfa_offset 4
renamable $s0 = VLDRH %const.0, 0, 14, $noreg :: (load (s16) from constant-pool)
- VCMPZH renamable $s0, 14, $noreg, implicit-def $fpscr_nzcv
+ VCMPZH renamable $s0, 14, $noreg, implicit-def $fpscr_nzcv, implicit $fpscr_rm
VSTRH killed renamable $s0, $sp, 1, 14, $noreg :: (store (s16) into %ir.res)
FMSTAT 14, $noreg, implicit-def $cpsr, implicit killed $fpscr_nzcv
Bcc %bb.2, 0, killed $cpsr
diff --git a/llvm/test/CodeGen/ARM/fp16_fast_math.ll b/llvm/test/CodeGen/ARM/fp16_fast_math.ll
index 165eb4b..47e1f84f 100644
--- a/llvm/test/CodeGen/ARM/fp16_fast_math.ll
+++ b/llvm/test/CodeGen/ARM/fp16_fast_math.ll
@@ -16,11 +16,11 @@ define half @normal_fadd(half %x, half %y) {
; CHECK-CVT-NEXT: [[COPY1:%[0-9]+]]:gpr = COPY $r0
; CHECK-CVT-NEXT: [[COPY2:%[0-9]+]]:spr = COPY [[COPY1]]
; CHECK-CVT-NEXT: [[COPY3:%[0-9]+]]:spr = COPY [[COPY]]
- ; CHECK-CVT-NEXT: [[VCVTBHS:%[0-9]+]]:spr = VCVTBHS killed [[COPY3]], 14 /* CC::al */, $noreg
- ; CHECK-CVT-NEXT: [[VCVTBHS1:%[0-9]+]]:spr = VCVTBHS killed [[COPY2]], 14 /* CC::al */, $noreg
- ; CHECK-CVT-NEXT: [[VADDS:%[0-9]+]]:spr = VADDS killed [[VCVTBHS1]], killed [[VCVTBHS]], 14 /* CC::al */, $noreg
+ ; CHECK-CVT-NEXT: [[VCVTBHS:%[0-9]+]]:spr = nofpexcept VCVTBHS killed [[COPY3]], 14 /* CC::al */, $noreg, implicit $fpscr
+ ; CHECK-CVT-NEXT: [[VCVTBHS1:%[0-9]+]]:spr = nofpexcept VCVTBHS killed [[COPY2]], 14 /* CC::al */, $noreg, implicit $fpscr
+ ; CHECK-CVT-NEXT: [[VADDS:%[0-9]+]]:spr = nofpexcept VADDS killed [[VCVTBHS1]], killed [[VCVTBHS]], 14 /* CC::al */, $noreg, implicit $fpscr
; CHECK-CVT-NEXT: [[DEF:%[0-9]+]]:spr = IMPLICIT_DEF
- ; CHECK-CVT-NEXT: [[VCVTBSH:%[0-9]+]]:spr = VCVTBSH [[DEF]], killed [[VADDS]], 14 /* CC::al */, $noreg
+ ; CHECK-CVT-NEXT: [[VCVTBSH:%[0-9]+]]:spr = nofpexcept VCVTBSH [[DEF]], killed [[VADDS]], 14 /* CC::al */, $noreg, implicit $fpscr
; CHECK-CVT-NEXT: [[COPY4:%[0-9]+]]:gpr = COPY killed [[VCVTBSH]]
; CHECK-CVT-NEXT: $r0 = COPY [[COPY4]]
; CHECK-CVT-NEXT: MOVPCLR 14 /* CC::al */, $noreg, implicit $r0
@@ -33,7 +33,7 @@ define half @normal_fadd(half %x, half %y) {
; CHECK-FP16-NEXT: [[COPY1:%[0-9]+]]:rgpr = COPY $r0
; CHECK-FP16-NEXT: [[VMOVHR:%[0-9]+]]:hpr = VMOVHR [[COPY]], 14, $noreg
; CHECK-FP16-NEXT: [[VMOVHR1:%[0-9]+]]:hpr = VMOVHR [[COPY1]], 14, $noreg
- ; CHECK-FP16-NEXT: [[VADDH:%[0-9]+]]:hpr = VADDH killed [[VMOVHR1]], killed [[VMOVHR]], 14, $noreg
+ ; CHECK-FP16-NEXT: [[VADDH:%[0-9]+]]:hpr = nofpexcept VADDH killed [[VMOVHR1]], killed [[VMOVHR]], 14, $noreg, implicit $fpscr
; CHECK-FP16-NEXT: $r0 = COPY [[VADDH]]
; CHECK-FP16-NEXT: MOVPCLR 14 /* CC::al */, $noreg, implicit $r0
entry:
@@ -50,11 +50,11 @@ define half @fast_fadd(half %x, half %y) {
; CHECK-CVT-NEXT: [[COPY1:%[0-9]+]]:gpr = COPY $r0
; CHECK-CVT-NEXT: [[COPY2:%[0-9]+]]:spr = COPY [[COPY1]]
; CHECK-CVT-NEXT: [[COPY3:%[0-9]+]]:spr = COPY [[COPY]]
- ; CHECK-CVT-NEXT: [[VCVTBHS:%[0-9]+]]:spr = nnan ninf nsz arcp contract afn reassoc VCVTBHS killed [[COPY3]], 14 /* CC::al */, $noreg
- ; CHECK-CVT-NEXT: [[VCVTBHS1:%[0-9]+]]:spr = nnan ninf nsz arcp contract afn reassoc VCVTBHS killed [[COPY2]], 14 /* CC::al */, $noreg
- ; CHECK-CVT-NEXT: [[VADDS:%[0-9]+]]:spr = nnan ninf nsz arcp contract afn reassoc VADDS killed [[VCVTBHS1]], killed [[VCVTBHS]], 14 /* CC::al */, $noreg
+ ; CHECK-CVT-NEXT: [[VCVTBHS:%[0-9]+]]:spr = nnan ninf nsz arcp contract afn reassoc nofpexcept VCVTBHS killed [[COPY3]], 14 /* CC::al */, $noreg, implicit $fpscr
+ ; CHECK-CVT-NEXT: [[VCVTBHS1:%[0-9]+]]:spr = nnan ninf nsz arcp contract afn reassoc nofpexcept VCVTBHS killed [[COPY2]], 14 /* CC::al */, $noreg, implicit $fpscr
+ ; CHECK-CVT-NEXT: [[VADDS:%[0-9]+]]:spr = nnan ninf nsz arcp contract afn reassoc nofpexcept VADDS killed [[VCVTBHS1]], killed [[VCVTBHS]], 14 /* CC::al */, $noreg, implicit $fpscr
; CHECK-CVT-NEXT: [[DEF:%[0-9]+]]:spr = IMPLICIT_DEF
- ; CHECK-CVT-NEXT: [[VCVTBSH:%[0-9]+]]:spr = VCVTBSH [[DEF]], killed [[VADDS]], 14 /* CC::al */, $noreg
+ ; CHECK-CVT-NEXT: [[VCVTBSH:%[0-9]+]]:spr = nofpexcept VCVTBSH [[DEF]], killed [[VADDS]], 14 /* CC::al */, $noreg, implicit $fpscr
; CHECK-CVT-NEXT: [[COPY4:%[0-9]+]]:gpr = COPY killed [[VCVTBSH]]
; CHECK-CVT-NEXT: $r0 = COPY [[COPY4]]
; CHECK-CVT-NEXT: MOVPCLR 14 /* CC::al */, $noreg, implicit $r0
@@ -67,7 +67,7 @@ define half @fast_fadd(half %x, half %y) {
; CHECK-FP16-NEXT: [[COPY1:%[0-9]+]]:rgpr = COPY $r0
; CHECK-FP16-NEXT: [[VMOVHR:%[0-9]+]]:hpr = VMOVHR [[COPY]], 14, $noreg
; CHECK-FP16-NEXT: [[VMOVHR1:%[0-9]+]]:hpr = VMOVHR [[COPY1]], 14, $noreg
- ; CHECK-FP16-NEXT: [[VADDH:%[0-9]+]]:hpr = nnan ninf nsz arcp contract afn reassoc VADDH killed [[VMOVHR1]], killed [[VMOVHR]], 14, $noreg
+ ; CHECK-FP16-NEXT: [[VADDH:%[0-9]+]]:hpr = nnan ninf nsz arcp contract afn reassoc nofpexcept VADDH killed [[VMOVHR1]], killed [[VMOVHR]], 14, $noreg, implicit $fpscr
; CHECK-FP16-NEXT: $r0 = COPY [[VADDH]]
; CHECK-FP16-NEXT: MOVPCLR 14 /* CC::al */, $noreg, implicit $r0
entry:
@@ -84,11 +84,11 @@ define half @ninf_fadd(half %x, half %y) {
; CHECK-CVT-NEXT: [[COPY1:%[0-9]+]]:gpr = COPY $r0
; CHECK-CVT-NEXT: [[COPY2:%[0-9]+]]:spr = COPY [[COPY1]]
; CHECK-CVT-NEXT: [[COPY3:%[0-9]+]]:spr = COPY [[COPY]]
- ; CHECK-CVT-NEXT: [[VCVTBHS:%[0-9]+]]:spr = ninf VCVTBHS killed [[COPY3]], 14 /* CC::al */, $noreg
- ; CHECK-CVT-NEXT: [[VCVTBHS1:%[0-9]+]]:spr = ninf VCVTBHS killed [[COPY2]], 14 /* CC::al */, $noreg
- ; CHECK-CVT-NEXT: [[VADDS:%[0-9]+]]:spr = ninf VADDS killed [[VCVTBHS1]], killed [[VCVTBHS]], 14 /* CC::al */, $noreg
+ ; CHECK-CVT-NEXT: [[VCVTBHS:%[0-9]+]]:spr = ninf nofpexcept VCVTBHS killed [[COPY3]], 14 /* CC::al */, $noreg, implicit $fpscr
+ ; CHECK-CVT-NEXT: [[VCVTBHS1:%[0-9]+]]:spr = ninf nofpexcept VCVTBHS killed [[COPY2]], 14 /* CC::al */, $noreg, implicit $fpscr
+ ; CHECK-CVT-NEXT: [[VADDS:%[0-9]+]]:spr = ninf nofpexcept VADDS killed [[VCVTBHS1]], killed [[VCVTBHS]], 14 /* CC::al */, $noreg, implicit $fpscr
; CHECK-CVT-NEXT: [[DEF:%[0-9]+]]:spr = IMPLICIT_DEF
- ; CHECK-CVT-NEXT: [[VCVTBSH:%[0-9]+]]:spr = VCVTBSH [[DEF]], killed [[VADDS]], 14 /* CC::al */, $noreg
+ ; CHECK-CVT-NEXT: [[VCVTBSH:%[0-9]+]]:spr = nofpexcept VCVTBSH [[DEF]], killed [[VADDS]], 14 /* CC::al */, $noreg, implicit $fpscr
; CHECK-CVT-NEXT: [[COPY4:%[0-9]+]]:gpr = COPY killed [[VCVTBSH]]
; CHECK-CVT-NEXT: $r0 = COPY [[COPY4]]
; CHECK-CVT-NEXT: MOVPCLR 14 /* CC::al */, $noreg, implicit $r0
@@ -101,7 +101,7 @@ define half @ninf_fadd(half %x, half %y) {
; CHECK-FP16-NEXT: [[COPY1:%[0-9]+]]:rgpr = COPY $r0
; CHECK-FP16-NEXT: [[VMOVHR:%[0-9]+]]:hpr = VMOVHR [[COPY]], 14, $noreg
; CHECK-FP16-NEXT: [[VMOVHR1:%[0-9]+]]:hpr = VMOVHR [[COPY1]], 14, $noreg
- ; CHECK-FP16-NEXT: [[VADDH:%[0-9]+]]:hpr = ninf VADDH killed [[VMOVHR1]], killed [[VMOVHR]], 14, $noreg
+ ; CHECK-FP16-NEXT: [[VADDH:%[0-9]+]]:hpr = ninf nofpexcept VADDH killed [[VMOVHR1]], killed [[VMOVHR]], 14, $noreg, implicit $fpscr
; CHECK-FP16-NEXT: $r0 = COPY [[VADDH]]
; CHECK-FP16-NEXT: MOVPCLR 14 /* CC::al */, $noreg, implicit $r0
entry:
@@ -122,19 +122,19 @@ define half @normal_fadd_sequence(half %x, half %y, half %z) {
; CHECK-CVT-NEXT: [[COPY2:%[0-9]+]]:gpr = COPY $r0
; CHECK-CVT-NEXT: [[COPY3:%[0-9]+]]:spr = COPY [[COPY2]]
; CHECK-CVT-NEXT: [[COPY4:%[0-9]+]]:spr = COPY [[COPY1]]
- ; CHECK-CVT-NEXT: [[VCVTBHS:%[0-9]+]]:spr = VCVTBHS killed [[COPY4]], 14 /* CC::al */, $noreg
- ; CHECK-CVT-NEXT: [[VCVTBHS1:%[0-9]+]]:spr = VCVTBHS killed [[COPY3]], 14 /* CC::al */, $noreg
- ; CHECK-CVT-NEXT: [[VADDS:%[0-9]+]]:spr = VADDS killed [[VCVTBHS1]], killed [[VCVTBHS]], 14 /* CC::al */, $noreg
+ ; CHECK-CVT-NEXT: [[VCVTBHS:%[0-9]+]]:spr = nofpexcept VCVTBHS killed [[COPY4]], 14 /* CC::al */, $noreg, implicit $fpscr
+ ; CHECK-CVT-NEXT: [[VCVTBHS1:%[0-9]+]]:spr = nofpexcept VCVTBHS killed [[COPY3]], 14 /* CC::al */, $noreg, implicit $fpscr
+ ; CHECK-CVT-NEXT: [[VADDS:%[0-9]+]]:spr = nofpexcept VADDS killed [[VCVTBHS1]], killed [[VCVTBHS]], 14 /* CC::al */, $noreg, implicit $fpscr
; CHECK-CVT-NEXT: [[COPY5:%[0-9]+]]:spr = COPY [[COPY]]
- ; CHECK-CVT-NEXT: [[VCVTBHS2:%[0-9]+]]:spr = VCVTBHS killed [[COPY5]], 14 /* CC::al */, $noreg
+ ; CHECK-CVT-NEXT: [[VCVTBHS2:%[0-9]+]]:spr = nofpexcept VCVTBHS killed [[COPY5]], 14 /* CC::al */, $noreg, implicit $fpscr
; CHECK-CVT-NEXT: [[DEF:%[0-9]+]]:spr = IMPLICIT_DEF
- ; CHECK-CVT-NEXT: [[VCVTBSH:%[0-9]+]]:spr = VCVTBSH [[DEF]], killed [[VADDS]], 14 /* CC::al */, $noreg
+ ; CHECK-CVT-NEXT: [[VCVTBSH:%[0-9]+]]:spr = nofpexcept VCVTBSH [[DEF]], killed [[VADDS]], 14 /* CC::al */, $noreg, implicit $fpscr
; CHECK-CVT-NEXT: [[COPY6:%[0-9]+]]:gpr = COPY killed [[VCVTBSH]]
; CHECK-CVT-NEXT: [[COPY7:%[0-9]+]]:spr = COPY killed [[COPY6]]
- ; CHECK-CVT-NEXT: [[VCVTBHS3:%[0-9]+]]:spr = VCVTBHS killed [[COPY7]], 14 /* CC::al */, $noreg
- ; CHECK-CVT-NEXT: [[VADDS1:%[0-9]+]]:spr = VADDS killed [[VCVTBHS3]], killed [[VCVTBHS2]], 14 /* CC::al */, $noreg
+ ; CHECK-CVT-NEXT: [[VCVTBHS3:%[0-9]+]]:spr = nofpexcept VCVTBHS killed [[COPY7]], 14 /* CC::al */, $noreg, implicit $fpscr
+ ; CHECK-CVT-NEXT: [[VADDS1:%[0-9]+]]:spr = nofpexcept VADDS killed [[VCVTBHS3]], killed [[VCVTBHS2]], 14 /* CC::al */, $noreg, implicit $fpscr
; CHECK-CVT-NEXT: [[DEF1:%[0-9]+]]:spr = IMPLICIT_DEF
- ; CHECK-CVT-NEXT: [[VCVTBSH1:%[0-9]+]]:spr = VCVTBSH [[DEF1]], killed [[VADDS1]], 14 /* CC::al */, $noreg
+ ; CHECK-CVT-NEXT: [[VCVTBSH1:%[0-9]+]]:spr = nofpexcept VCVTBSH [[DEF1]], killed [[VADDS1]], 14 /* CC::al */, $noreg, implicit $fpscr
; CHECK-CVT-NEXT: [[COPY8:%[0-9]+]]:gpr = COPY killed [[VCVTBSH1]]
; CHECK-CVT-NEXT: $r0 = COPY [[COPY8]]
; CHECK-CVT-NEXT: MOVPCLR 14 /* CC::al */, $noreg, implicit $r0
@@ -148,9 +148,9 @@ define half @normal_fadd_sequence(half %x, half %y, half %z) {
; CHECK-FP16-NEXT: [[COPY2:%[0-9]+]]:rgpr = COPY $r0
; CHECK-FP16-NEXT: [[VMOVHR:%[0-9]+]]:hpr = VMOVHR [[COPY1]], 14, $noreg
; CHECK-FP16-NEXT: [[VMOVHR1:%[0-9]+]]:hpr = VMOVHR [[COPY2]], 14, $noreg
- ; CHECK-FP16-NEXT: [[VADDH:%[0-9]+]]:hpr = VADDH killed [[VMOVHR1]], killed [[VMOVHR]], 14, $noreg
+ ; CHECK-FP16-NEXT: [[VADDH:%[0-9]+]]:hpr = nofpexcept VADDH killed [[VMOVHR1]], killed [[VMOVHR]], 14, $noreg, implicit $fpscr
; CHECK-FP16-NEXT: [[VMOVHR2:%[0-9]+]]:hpr = VMOVHR [[COPY]], 14, $noreg
- ; CHECK-FP16-NEXT: [[VADDH1:%[0-9]+]]:hpr = VADDH killed [[VADDH]], killed [[VMOVHR2]], 14, $noreg
+ ; CHECK-FP16-NEXT: [[VADDH1:%[0-9]+]]:hpr = nofpexcept VADDH killed [[VADDH]], killed [[VMOVHR2]], 14, $noreg, implicit $fpscr
; CHECK-FP16-NEXT: $r0 = COPY [[VADDH1]]
; CHECK-FP16-NEXT: MOVPCLR 14 /* CC::al */, $noreg, implicit $r0
entry:
@@ -169,14 +169,14 @@ define half @nnan_ninf_contract_fadd_sequence(half %x, half %y, half %z) {
; CHECK-CVT-NEXT: [[COPY2:%[0-9]+]]:gpr = COPY $r0
; CHECK-CVT-NEXT: [[COPY3:%[0-9]+]]:spr = COPY [[COPY2]]
; CHECK-CVT-NEXT: [[COPY4:%[0-9]+]]:spr = COPY [[COPY1]]
- ; CHECK-CVT-NEXT: [[VCVTBHS:%[0-9]+]]:spr = nnan ninf contract VCVTBHS killed [[COPY4]], 14 /* CC::al */, $noreg
- ; CHECK-CVT-NEXT: [[VCVTBHS1:%[0-9]+]]:spr = nnan ninf contract VCVTBHS killed [[COPY3]], 14 /* CC::al */, $noreg
- ; CHECK-CVT-NEXT: [[VADDS:%[0-9]+]]:spr = nnan ninf contract VADDS killed [[VCVTBHS1]], killed [[VCVTBHS]], 14 /* CC::al */, $noreg
+ ; CHECK-CVT-NEXT: [[VCVTBHS:%[0-9]+]]:spr = nnan ninf contract nofpexcept VCVTBHS killed [[COPY4]], 14 /* CC::al */, $noreg, implicit $fpscr
+ ; CHECK-CVT-NEXT: [[VCVTBHS1:%[0-9]+]]:spr = nnan ninf contract nofpexcept VCVTBHS killed [[COPY3]], 14 /* CC::al */, $noreg, implicit $fpscr
+ ; CHECK-CVT-NEXT: [[VADDS:%[0-9]+]]:spr = nnan ninf contract nofpexcept VADDS killed [[VCVTBHS1]], killed [[VCVTBHS]], 14 /* CC::al */, $noreg, implicit $fpscr
; CHECK-CVT-NEXT: [[COPY5:%[0-9]+]]:spr = COPY [[COPY]]
- ; CHECK-CVT-NEXT: [[VCVTBHS2:%[0-9]+]]:spr = nnan ninf contract VCVTBHS killed [[COPY5]], 14 /* CC::al */, $noreg
- ; CHECK-CVT-NEXT: [[VADDS1:%[0-9]+]]:spr = nnan ninf contract VADDS killed [[VADDS]], killed [[VCVTBHS2]], 14 /* CC::al */, $noreg
+ ; CHECK-CVT-NEXT: [[VCVTBHS2:%[0-9]+]]:spr = nnan ninf contract nofpexcept VCVTBHS killed [[COPY5]], 14 /* CC::al */, $noreg, implicit $fpscr
+ ; CHECK-CVT-NEXT: [[VADDS1:%[0-9]+]]:spr = nnan ninf contract nofpexcept VADDS killed [[VADDS]], killed [[VCVTBHS2]], 14 /* CC::al */, $noreg, implicit $fpscr
; CHECK-CVT-NEXT: [[DEF:%[0-9]+]]:spr = IMPLICIT_DEF
- ; CHECK-CVT-NEXT: [[VCVTBSH:%[0-9]+]]:spr = VCVTBSH [[DEF]], killed [[VADDS1]], 14 /* CC::al */, $noreg
+ ; CHECK-CVT-NEXT: [[VCVTBSH:%[0-9]+]]:spr = nofpexcept VCVTBSH [[DEF]], killed [[VADDS1]], 14 /* CC::al */, $noreg, implicit $fpscr
; CHECK-CVT-NEXT: [[COPY6:%[0-9]+]]:gpr = COPY killed [[VCVTBSH]]
; CHECK-CVT-NEXT: $r0 = COPY [[COPY6]]
; CHECK-CVT-NEXT: MOVPCLR 14 /* CC::al */, $noreg, implicit $r0
@@ -190,9 +190,9 @@ define half @nnan_ninf_contract_fadd_sequence(half %x, half %y, half %z) {
; CHECK-FP16-NEXT: [[COPY2:%[0-9]+]]:rgpr = COPY $r0
; CHECK-FP16-NEXT: [[VMOVHR:%[0-9]+]]:hpr = VMOVHR [[COPY1]], 14, $noreg
; CHECK-FP16-NEXT: [[VMOVHR1:%[0-9]+]]:hpr = VMOVHR [[COPY2]], 14, $noreg
- ; CHECK-FP16-NEXT: [[VADDH:%[0-9]+]]:hpr = nnan ninf contract VADDH killed [[VMOVHR1]], killed [[VMOVHR]], 14, $noreg
+ ; CHECK-FP16-NEXT: [[VADDH:%[0-9]+]]:hpr = nnan ninf contract nofpexcept VADDH killed [[VMOVHR1]], killed [[VMOVHR]], 14, $noreg, implicit $fpscr
; CHECK-FP16-NEXT: [[VMOVHR2:%[0-9]+]]:hpr = VMOVHR [[COPY]], 14, $noreg
- ; CHECK-FP16-NEXT: [[VADDH1:%[0-9]+]]:hpr = nnan ninf contract VADDH killed [[VADDH]], killed [[VMOVHR2]], 14, $noreg
+ ; CHECK-FP16-NEXT: [[VADDH1:%[0-9]+]]:hpr = nnan ninf contract nofpexcept VADDH killed [[VADDH]], killed [[VMOVHR2]], 14, $noreg, implicit $fpscr
; CHECK-FP16-NEXT: $r0 = COPY [[VADDH1]]
; CHECK-FP16-NEXT: MOVPCLR 14 /* CC::al */, $noreg, implicit $r0
entry:
@@ -211,19 +211,19 @@ define half @ninf_fadd_sequence(half %x, half %y, half %z) {
; CHECK-CVT-NEXT: [[COPY2:%[0-9]+]]:gpr = COPY $r0
; CHECK-CVT-NEXT: [[COPY3:%[0-9]+]]:spr = COPY [[COPY2]]
; CHECK-CVT-NEXT: [[COPY4:%[0-9]+]]:spr = COPY [[COPY1]]
- ; CHECK-CVT-NEXT: [[VCVTBHS:%[0-9]+]]:spr = ninf VCVTBHS killed [[COPY4]], 14 /* CC::al */, $noreg
- ; CHECK-CVT-NEXT: [[VCVTBHS1:%[0-9]+]]:spr = ninf VCVTBHS killed [[COPY3]], 14 /* CC::al */, $noreg
- ; CHECK-CVT-NEXT: [[VADDS:%[0-9]+]]:spr = ninf VADDS killed [[VCVTBHS1]], killed [[VCVTBHS]], 14 /* CC::al */, $noreg
+ ; CHECK-CVT-NEXT: [[VCVTBHS:%[0-9]+]]:spr = ninf nofpexcept VCVTBHS killed [[COPY4]], 14 /* CC::al */, $noreg, implicit $fpscr
+ ; CHECK-CVT-NEXT: [[VCVTBHS1:%[0-9]+]]:spr = ninf nofpexcept VCVTBHS killed [[COPY3]], 14 /* CC::al */, $noreg, implicit $fpscr
+ ; CHECK-CVT-NEXT: [[VADDS:%[0-9]+]]:spr = ninf nofpexcept VADDS killed [[VCVTBHS1]], killed [[VCVTBHS]], 14 /* CC::al */, $noreg, implicit $fpscr
; CHECK-CVT-NEXT: [[COPY5:%[0-9]+]]:spr = COPY [[COPY]]
- ; CHECK-CVT-NEXT: [[VCVTBHS2:%[0-9]+]]:spr = ninf VCVTBHS killed [[COPY5]], 14 /* CC::al */, $noreg
+ ; CHECK-CVT-NEXT: [[VCVTBHS2:%[0-9]+]]:spr = ninf nofpexcept VCVTBHS killed [[COPY5]], 14 /* CC::al */, $noreg, implicit $fpscr
; CHECK-CVT-NEXT: [[DEF:%[0-9]+]]:spr = IMPLICIT_DEF
- ; CHECK-CVT-NEXT: [[VCVTBSH:%[0-9]+]]:spr = VCVTBSH [[DEF]], killed [[VADDS]], 14 /* CC::al */, $noreg
+ ; CHECK-CVT-NEXT: [[VCVTBSH:%[0-9]+]]:spr = nofpexcept VCVTBSH [[DEF]], killed [[VADDS]], 14 /* CC::al */, $noreg, implicit $fpscr
; CHECK-CVT-NEXT: [[COPY6:%[0-9]+]]:gpr = COPY killed [[VCVTBSH]]
; CHECK-CVT-NEXT: [[COPY7:%[0-9]+]]:spr = COPY killed [[COPY6]]
- ; CHECK-CVT-NEXT: [[VCVTBHS3:%[0-9]+]]:spr = ninf VCVTBHS killed [[COPY7]], 14 /* CC::al */, $noreg
- ; CHECK-CVT-NEXT: [[VADDS1:%[0-9]+]]:spr = ninf VADDS killed [[VCVTBHS3]], killed [[VCVTBHS2]], 14 /* CC::al */, $noreg
+ ; CHECK-CVT-NEXT: [[VCVTBHS3:%[0-9]+]]:spr = ninf nofpexcept VCVTBHS killed [[COPY7]], 14 /* CC::al */, $noreg, implicit $fpscr
+ ; CHECK-CVT-NEXT: [[VADDS1:%[0-9]+]]:spr = ninf nofpexcept VADDS killed [[VCVTBHS3]], killed [[VCVTBHS2]], 14 /* CC::al */, $noreg, implicit $fpscr
; CHECK-CVT-NEXT: [[DEF1:%[0-9]+]]:spr = IMPLICIT_DEF
- ; CHECK-CVT-NEXT: [[VCVTBSH1:%[0-9]+]]:spr = VCVTBSH [[DEF1]], killed [[VADDS1]], 14 /* CC::al */, $noreg
+ ; CHECK-CVT-NEXT: [[VCVTBSH1:%[0-9]+]]:spr = nofpexcept VCVTBSH [[DEF1]], killed [[VADDS1]], 14 /* CC::al */, $noreg, implicit $fpscr
; CHECK-CVT-NEXT: [[COPY8:%[0-9]+]]:gpr = COPY killed [[VCVTBSH1]]
; CHECK-CVT-NEXT: $r0 = COPY [[COPY8]]
; CHECK-CVT-NEXT: MOVPCLR 14 /* CC::al */, $noreg, implicit $r0
@@ -237,9 +237,9 @@ define half @ninf_fadd_sequence(half %x, half %y, half %z) {
; CHECK-FP16-NEXT: [[COPY2:%[0-9]+]]:rgpr = COPY $r0
; CHECK-FP16-NEXT: [[VMOVHR:%[0-9]+]]:hpr = VMOVHR [[COPY1]], 14, $noreg
; CHECK-FP16-NEXT: [[VMOVHR1:%[0-9]+]]:hpr = VMOVHR [[COPY2]], 14, $noreg
- ; CHECK-FP16-NEXT: [[VADDH:%[0-9]+]]:hpr = ninf VADDH killed [[VMOVHR1]], killed [[VMOVHR]], 14, $noreg
+ ; CHECK-FP16-NEXT: [[VADDH:%[0-9]+]]:hpr = ninf nofpexcept VADDH killed [[VMOVHR1]], killed [[VMOVHR]], 14, $noreg, implicit $fpscr
; CHECK-FP16-NEXT: [[VMOVHR2:%[0-9]+]]:hpr = VMOVHR [[COPY]], 14, $noreg
- ; CHECK-FP16-NEXT: [[VADDH1:%[0-9]+]]:hpr = ninf VADDH killed [[VADDH]], killed [[VMOVHR2]], 14, $noreg
+ ; CHECK-FP16-NEXT: [[VADDH1:%[0-9]+]]:hpr = ninf nofpexcept VADDH killed [[VADDH]], killed [[VMOVHR2]], 14, $noreg, implicit $fpscr
; CHECK-FP16-NEXT: $r0 = COPY [[VADDH1]]
; CHECK-FP16-NEXT: MOVPCLR 14 /* CC::al */, $noreg, implicit $r0
entry:
diff --git a/llvm/test/CodeGen/ARM/ipra-reg-usage.ll b/llvm/test/CodeGen/ARM/ipra-reg-usage.ll
index c928390..90142cb 100644
--- a/llvm/test/CodeGen/ARM/ipra-reg-usage.ll
+++ b/llvm/test/CodeGen/ARM/ipra-reg-usage.ll
@@ -6,7 +6,7 @@ target triple = "armv7-eabi"
declare void @bar1()
define void @foo()#0 {
-; CHECK: foo Clobbered Registers: $apsr $apsr_nzcv $cpsr $fpcxtns $fpcxts $fpexc $fpinst $fpscr $fpscr_nzcv $fpscr_nzcvqc $fpsid $itstate $pc $ra_auth_code $sp $spsr $vpr $zr $d0 $d1 $d2 $d3 $d4 $d5 $d6 $d7 $d16 $d17 $d18 $d19 $d20 $d21 $d22 $d23 $d24 $d25 $d26 $d27 $d28 $d29 $d30 $d31 $fpinst2 $mvfr0 $mvfr1 $mvfr2 $p0 $q0 $q1 $q2 $q3 $q8 $q9 $q10 $q11 $q12 $q13 $q14 $q15 $r0 $r1 $r2 $r3 $r12 $s0 $s1 $s2 $s3 $s4 $s5 $s6 $s7 $s8 $s9 $s10 $s11 $s12 $s13 $s14 $s15 $d0_d2 $d1_d3 $d2_d4 $d3_d5 $d4_d6 $d5_d7 $d6_d8 $d7_d9 $d14_d16 $d15_d17 $d16_d18 $d17_d19 $d18_d20 $d19_d21 $d20_d22 $d21_d23 $d22_d24 $d23_d25 $d24_d26 $d25_d27 $d26_d28 $d27_d29 $d28_d30 $d29_d31 $q0_q1 $q1_q2 $q2_q3 $q3_q4 $q7_q8 $q8_q9 $q9_q10 $q10_q11 $q11_q12 $q12_q13 $q13_q14 $q14_q15 $q0_q1_q2_q3 $q1_q2_q3_q4 $q2_q3_q4_q5 $q3_q4_q5_q6 $q5_q6_q7_q8 $q6_q7_q8_q9 $q7_q8_q9_q10 $q8_q9_q10_q11 $q9_q10_q11_q12 $q10_q11_q12_q13 $q11_q12_q13_q14 $q12_q13_q14_q15 $r0_r1 $r2_r3 $r12_sp $d0_d1_d2 $d1_d2_d3 $d2_d3_d4 $d3_d4_d5 $d4_d5_d6 $d5_d6_d7 $d6_d7_d8 $d7_d8_d9 $d14_d15_d16 $d15_d16_d17 $d16_d17_d18 $d17_d18_d19 $d18_d19_d20 $d19_d20_d21 $d20_d21_d22 $d21_d22_d23 $d22_d23_d24 $d23_d24_d25 $d24_d25_d26 $d25_d26_d27 $d26_d27_d28 $d27_d28_d29 $d28_d29_d30 $d29_d30_d31 $d0_d2_d4 $d1_d3_d5 $d2_d4_d6 $d3_d5_d7 $d4_d6_d8 $d5_d7_d9 $d6_d8_d10 $d7_d9_d11 $d12_d14_d16 $d13_d15_d17 $d14_d16_d18 $d15_d17_d19 $d16_d18_d20 $d17_d19_d21 $d18_d20_d22 $d19_d21_d23 $d20_d22_d24 $d21_d23_d25 $d22_d24_d26 $d23_d25_d27 $d24_d26_d28 $d25_d27_d29 $d26_d28_d30 $d27_d29_d31 $d0_d2_d4_d6 $d1_d3_d5_d7 $d2_d4_d6_d8 $d3_d5_d7_d9 $d4_d6_d8_d10 $d5_d7_d9_d11 $d6_d8_d10_d12 $d7_d9_d11_d13 $d10_d12_d14_d16 $d11_d13_d15_d17 $d12_d14_d16_d18 $d13_d15_d17_d19 $d14_d16_d18_d20 $d15_d17_d19_d21 $d16_d18_d20_d22 $d17_d19_d21_d23 $d18_d20_d22_d24 $d19_d21_d23_d25 $d20_d22_d24_d26 $d21_d23_d25_d27 $d22_d24_d26_d28 $d23_d25_d27_d29 $d24_d26_d28_d30 $d25_d27_d29_d31 $d1_d2 $d3_d4 $d5_d6 $d7_d8 $d15_d16 $d17_d18 $d19_d20 $d21_d22 $d23_d24 $d25_d26 $d27_d28 $d29_d30 $d1_d2_d3_d4 $d3_d4_d5_d6 $d5_d6_d7_d8 $d7_d8_d9_d10 $d13_d14_d15_d16 $d15_d16_d17_d18 $d17_d18_d19_d20 $d19_d20_d21_d22 $d21_d22_d23_d24 $d23_d24_d25_d26 $d25_d26_d27_d28 $d27_d28_d29_d30
+; CHECK: foo Clobbered Registers: $apsr $apsr_nzcv $cpsr $fpcxtns $fpcxts $fpexc $fpinst $fpscr $fpscr_nzcv $fpscr_nzcvqc $fpscr_rm $fpsid $itstate $pc $ra_auth_code $sp $spsr $vpr $zr $d0 $d1 $d2 $d3 $d4 $d5 $d6 $d7 $d16 $d17 $d18 $d19 $d20 $d21 $d22 $d23 $d24 $d25 $d26 $d27 $d28 $d29 $d30 $d31 $fpinst2 $mvfr0 $mvfr1 $mvfr2 $p0 $q0 $q1 $q2 $q3 $q8 $q9 $q10 $q11 $q12 $q13 $q14 $q15 $r0 $r1 $r2 $r3 $r12 $s0 $s1 $s2 $s3 $s4 $s5 $s6 $s7 $s8 $s9 $s10 $s11 $s12 $s13 $s14 $s15 $d0_d2 $d1_d3 $d2_d4 $d3_d5 $d4_d6 $d5_d7 $d6_d8 $d7_d9 $d14_d16 $d15_d17 $d16_d18 $d17_d19 $d18_d20 $d19_d21 $d20_d22 $d21_d23 $d22_d24 $d23_d25 $d24_d26 $d25_d27 $d26_d28 $d27_d29 $d28_d30 $d29_d31 $q0_q1 $q1_q2 $q2_q3 $q3_q4 $q7_q8 $q8_q9 $q9_q10 $q10_q11 $q11_q12 $q12_q13 $q13_q14 $q14_q15 $q0_q1_q2_q3 $q1_q2_q3_q4 $q2_q3_q4_q5 $q3_q4_q5_q6 $q5_q6_q7_q8 $q6_q7_q8_q9 $q7_q8_q9_q10 $q8_q9_q10_q11 $q9_q10_q11_q12 $q10_q11_q12_q13 $q11_q12_q13_q14 $q12_q13_q14_q15 $r0_r1 $r2_r3 $r12_sp $d0_d1_d2 $d1_d2_d3 $d2_d3_d4 $d3_d4_d5 $d4_d5_d6 $d5_d6_d7 $d6_d7_d8 $d7_d8_d9 $d14_d15_d16 $d15_d16_d17 $d16_d17_d18 $d17_d18_d19 $d18_d19_d20 $d19_d20_d21 $d20_d21_d22 $d21_d22_d23 $d22_d23_d24 $d23_d24_d25 $d24_d25_d26 $d25_d26_d27 $d26_d27_d28 $d27_d28_d29 $d28_d29_d30 $d29_d30_d31 $d0_d2_d4 $d1_d3_d5 $d2_d4_d6 $d3_d5_d7 $d4_d6_d8 $d5_d7_d9 $d6_d8_d10 $d7_d9_d11 $d12_d14_d16 $d13_d15_d17 $d14_d16_d18 $d15_d17_d19 $d16_d18_d20 $d17_d19_d21 $d18_d20_d22 $d19_d21_d23 $d20_d22_d24 $d21_d23_d25 $d22_d24_d26 $d23_d25_d27 $d24_d26_d28 $d25_d27_d29 $d26_d28_d30 $d27_d29_d31 $d0_d2_d4_d6 $d1_d3_d5_d7 $d2_d4_d6_d8 $d3_d5_d7_d9 $d4_d6_d8_d10 $d5_d7_d9_d11 $d6_d8_d10_d12 $d7_d9_d11_d13 $d10_d12_d14_d16 $d11_d13_d15_d17 $d12_d14_d16_d18 $d13_d15_d17_d19 $d14_d16_d18_d20 $d15_d17_d19_d21 $d16_d18_d20_d22 $d17_d19_d21_d23 $d18_d20_d22_d24 $d19_d21_d23_d25 $d20_d22_d24_d26 $d21_d23_d25_d27 $d22_d24_d26_d28 $d23_d25_d27_d29 $d24_d26_d28_d30 $d25_d27_d29_d31 $d1_d2 $d3_d4 $d5_d6 $d7_d8 $d15_d16 $d17_d18 $d19_d20 $d21_d22 $d23_d24 $d25_d26 $d27_d28 $d29_d30 $d1_d2_d3_d4 $d3_d4_d5_d6 $d5_d6_d7_d8 $d7_d8_d9_d10 $d13_d14_d15_d16 $d15_d16_d17_d18 $d17_d18_d19_d20 $d19_d20_d21_d22 $d21_d22_d23_d24 $d23_d24_d25_d26 $d25_d26_d27_d28 $d27_d28_d29_d30
call void @bar1()
call void @bar2()
ret void
diff --git a/llvm/test/CodeGen/ARM/llrint-conv.ll b/llvm/test/CodeGen/ARM/llrint-conv.ll
index a1a04db..7274a8b 100644
--- a/llvm/test/CodeGen/ARM/llrint-conv.ll
+++ b/llvm/test/CodeGen/ARM/llrint-conv.ll
@@ -1,7 +1,8 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc < %s -mtriple=armv7-none-eabi -float-abi=soft | FileCheck %s --check-prefixes=CHECK,CHECK-SOFT
; RUN: llc < %s -mtriple=armv7-none-eabihf -mattr=+vfp2 -float-abi=hard | FileCheck %s --check-prefixes=CHECK,CHECK-NOFP16
-; RUN: llc < %s -mtriple=armv7-none-eabihf -mattr=+vfp2,+fullfp16 -float-abi=hard | FileCheck %s --check-prefixes=CHECK,CHECK-FP16
+; RUN: llc < %s -mtriple=armv8-none-eabihf -mattr=+fp-armv8 -float-abi=hard | FileCheck %s --check-prefixes=CHECK,CHECK-FPv8
+; RUN: llc < %s -mtriple=armv8-none-eabihf -mattr=+fp-armv8,+fullfp16 -float-abi=hard | FileCheck %s --check-prefixes=CHECK,CHECK-FP16
define i64 @testmsxh_builtin(half %x) {
; CHECK-SOFT-LABEL: testmsxh_builtin:
@@ -22,6 +23,14 @@ define i64 @testmsxh_builtin(half %x) {
; CHECK-NOFP16-NEXT: bl llrintf
; CHECK-NOFP16-NEXT: pop {r11, pc}
;
+; CHECK-FPv8-LABEL: testmsxh_builtin:
+; CHECK-FPv8: @ %bb.0: @ %entry
+; CHECK-FPv8-NEXT: .save {r11, lr}
+; CHECK-FPv8-NEXT: push {r11, lr}
+; CHECK-FPv8-NEXT: vcvtb.f32.f16 s0, s0
+; CHECK-FPv8-NEXT: bl llrintf
+; CHECK-FPv8-NEXT: pop {r11, pc}
+;
; CHECK-FP16-LABEL: testmsxh_builtin:
; CHECK-FP16: @ %bb.0: @ %entry
; CHECK-FP16-NEXT: .save {r11, lr}
diff --git a/llvm/test/CodeGen/ARM/lrint-conv.ll b/llvm/test/CodeGen/ARM/lrint-conv.ll
index 23a2685..2de2349 100644
--- a/llvm/test/CodeGen/ARM/lrint-conv.ll
+++ b/llvm/test/CodeGen/ARM/lrint-conv.ll
@@ -1,14 +1,43 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc < %s -mtriple=armv7-none-eabi -float-abi=soft | FileCheck %s --check-prefixes=CHECK,CHECK-SOFT
; RUN: llc < %s -mtriple=armv7-none-eabihf -mattr=+vfp2 -float-abi=hard | FileCheck %s --check-prefixes=CHECK,CHECK-NOFP16
-; RUN: llc < %s -mtriple=armv7-none-eabihf -mattr=+vfp2,+fullfp16 -float-abi=hard | FileCheck %s --check-prefixes=CHECK,CHECK-FP16
+; RUN: llc < %s -mtriple=armv8-none-eabihf -mattr=+fp-armv8 -float-abi=hard | FileCheck %s --check-prefixes=CHECK,CHECK-FPv8
+; RUN: llc < %s -mtriple=armv8-none-eabihf -mattr=+fp-armv8,+fullfp16 -float-abi=hard | FileCheck %s --check-prefixes=CHECK,CHECK-FP16
-; FIXME: crash
-; define i32 @testmswh_builtin(half %x) {
-; entry:
-; %0 = tail call i32 @llvm.lrint.i32.f16(half %x)
-; ret i32 %0
-; }
+define i32 @testmswh_builtin(half %x) {
+; CHECK-SOFT-LABEL: testmswh_builtin:
+; CHECK-SOFT: @ %bb.0: @ %entry
+; CHECK-SOFT-NEXT: .save {r11, lr}
+; CHECK-SOFT-NEXT: push {r11, lr}
+; CHECK-SOFT-NEXT: bl __aeabi_h2f
+; CHECK-SOFT-NEXT: pop {r11, lr}
+; CHECK-SOFT-NEXT: b lrintf
+;
+; CHECK-NOFP16-LABEL: testmswh_builtin:
+; CHECK-NOFP16: @ %bb.0: @ %entry
+; CHECK-NOFP16-NEXT: .save {r11, lr}
+; CHECK-NOFP16-NEXT: push {r11, lr}
+; CHECK-NOFP16-NEXT: vmov r0, s0
+; CHECK-NOFP16-NEXT: bl __aeabi_h2f
+; CHECK-NOFP16-NEXT: vmov s0, r0
+; CHECK-NOFP16-NEXT: pop {r11, lr}
+; CHECK-NOFP16-NEXT: b lrintf
+;
+; CHECK-FPv8-LABEL: testmswh_builtin:
+; CHECK-FPv8: @ %bb.0: @ %entry
+; CHECK-FPv8-NEXT: vcvtb.f32.f16 s0, s0
+; CHECK-FPv8-NEXT: b lrintf
+;
+; CHECK-FP16-LABEL: testmswh_builtin:
+; CHECK-FP16: @ %bb.0: @ %entry
+; CHECK-FP16-NEXT: vrintx.f16 s0, s0
+; CHECK-FP16-NEXT: vcvt.s32.f16 s0, s0
+; CHECK-FP16-NEXT: vmov r0, s0
+; CHECK-FP16-NEXT: bx lr
+entry:
+ %0 = tail call i32 @llvm.lrint.i32.f16(half %x)
+ ret i32 %0
+}
define i32 @testmsws_builtin(float %x) {
; CHECK-LABEL: testmsws_builtin:
@@ -39,8 +68,3 @@ entry:
%0 = tail call i32 @llvm.lrint.i32.f128(fp128 %x)
ret i32 %0
}
-
-;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
-; CHECK-FP16: {{.*}}
-; CHECK-NOFP16: {{.*}}
-; CHECK-SOFT: {{.*}}
diff --git a/llvm/test/CodeGen/ARM/misched-prevent-erase-history-of-subunits.mir b/llvm/test/CodeGen/ARM/misched-prevent-erase-history-of-subunits.mir
index 46f3e4b..17d6619 100644
--- a/llvm/test/CodeGen/ARM/misched-prevent-erase-history-of-subunits.mir
+++ b/llvm/test/CodeGen/ARM/misched-prevent-erase-history-of-subunits.mir
@@ -14,7 +14,7 @@
# CHECK: SU(1): %1:dpr = VABSD %0:dpr, 14, $noreg
# CHECK: SU(2): %2:dpr = VLDRD %const.0, 0, 14, $noreg :: (load (s64) from constant-pool)
# CHECK: SU(4): %3:rgpr = t2MOVi 0, 14, $noreg, $noreg
-# CHECK: SU(3): VCMPD %1:dpr, %2:dpr, 14, $noreg, implicit-def $fpscr_nzcv
+# CHECK: SU(3): VCMPD %1:dpr, %2:dpr, 14, $noreg, implicit-def $fpscr_nzcv, implicit $fpscr_rm
# CHECK: SU(5): $r0 = COPY %3:rgpr
---
name: test
@@ -29,7 +29,7 @@ body: |
%0:dpr = COPY $d0
%1:dpr = VABSD %0, 14 /* CC::al */, $noreg
%2:dpr = VLDRD %const.0, 0, 14 /* CC::al */, $noreg :: (load (s64) from constant-pool)
- VCMPD %1, %2, 14 /* CC::al */, $noreg, implicit-def $fpscr_nzcv
+ VCMPD %1, %2, 14 /* CC::al */, $noreg, implicit-def $fpscr_nzcv, implicit $fpscr_rm
%4:rgpr = t2MOVi 0, 14 /* CC::al */, $noreg, $noreg
$r0 = COPY %4
tBX_RET 14 /* CC::al */, $noreg, implicit killed $r0
diff --git a/llvm/test/CodeGen/ARM/vector-lrint.ll b/llvm/test/CodeGen/ARM/vector-lrint.ll
index c1159da..c3c8884 100644
--- a/llvm/test/CodeGen/ARM/vector-lrint.ll
+++ b/llvm/test/CodeGen/ARM/vector-lrint.ll
@@ -9,31 +9,1290 @@
; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=armebv7-unknown-none-eabihf -mattr=+neon | FileCheck %s --check-prefixes=BE-I32
; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=armebv7-unknown-none-eabihf -mattr=+neon | FileCheck %s --check-prefixes=BE-I64
-; FIXME: crash "Do not know how to soft promote this operator's operand!"
-; define <1 x iXLen> @lrint_v1f16(<1 x half> %x) {
-; %a = call <1 x iXLen> @llvm.lrint.v1iXLen.v1f16(<1 x half> %x)
-; ret <1 x iXLen> %a
-; }
+define <1 x iXLen> @lrint_v1f16(<1 x half> %x) {
+; LE-I32-LABEL: lrint_v1f16:
+; LE-I32: @ %bb.0:
+; LE-I32-NEXT: .save {r11, lr}
+; LE-I32-NEXT: push {r11, lr}
+; LE-I32-NEXT: vmov r0, s0
+; LE-I32-NEXT: bl __aeabi_f2h
+; LE-I32-NEXT: bl __aeabi_h2f
+; LE-I32-NEXT: vmov s0, r0
+; LE-I32-NEXT: bl lrintf
+; LE-I32-NEXT: pop {r11, pc}
+;
+; LE-I64-LABEL: lrint_v1f16:
+; LE-I64: @ %bb.0:
+; LE-I64-NEXT: .save {r11, lr}
+; LE-I64-NEXT: push {r11, lr}
+; LE-I64-NEXT: vmov r0, s0
+; LE-I64-NEXT: bl __aeabi_f2h
+; LE-I64-NEXT: bl __aeabi_h2f
+; LE-I64-NEXT: vmov s0, r0
+; LE-I64-NEXT: bl lrintf
+; LE-I64-NEXT: vmov.32 d0[0], r0
+; LE-I64-NEXT: vmov.32 d0[1], r1
+; LE-I64-NEXT: pop {r11, pc}
+;
+; BE-I32-LABEL: lrint_v1f16:
+; BE-I32: @ %bb.0:
+; BE-I32-NEXT: .save {r11, lr}
+; BE-I32-NEXT: push {r11, lr}
+; BE-I32-NEXT: vmov r0, s0
+; BE-I32-NEXT: bl __aeabi_f2h
+; BE-I32-NEXT: bl __aeabi_h2f
+; BE-I32-NEXT: vmov s0, r0
+; BE-I32-NEXT: bl lrintf
+; BE-I32-NEXT: pop {r11, pc}
+;
+; BE-I64-LABEL: lrint_v1f16:
+; BE-I64: @ %bb.0:
+; BE-I64-NEXT: .save {r11, lr}
+; BE-I64-NEXT: push {r11, lr}
+; BE-I64-NEXT: vmov r0, s0
+; BE-I64-NEXT: bl __aeabi_f2h
+; BE-I64-NEXT: bl __aeabi_h2f
+; BE-I64-NEXT: vmov s0, r0
+; BE-I64-NEXT: bl lrintf
+; BE-I64-NEXT: vmov.32 d16[0], r0
+; BE-I64-NEXT: vmov.32 d16[1], r1
+; BE-I64-NEXT: vrev64.32 d0, d16
+; BE-I64-NEXT: pop {r11, pc}
+ %a = call <1 x iXLen> @llvm.lrint.v1iXLen.v1f16(<1 x half> %x)
+ ret <1 x iXLen> %a
+}
-; define <2 x iXLen> @lrint_v2f16(<2 x half> %x) {
-; %a = call <2 x iXLen> @llvm.lrint.v2iXLen.v2f16(<2 x half> %x)
-; ret <2 x iXLen> %a
-; }
+define <2 x iXLen> @lrint_v2f16(<2 x half> %x) {
+; LE-I32-LABEL: lrint_v2f16:
+; LE-I32: @ %bb.0:
+; LE-I32-NEXT: .save {r11, lr}
+; LE-I32-NEXT: push {r11, lr}
+; LE-I32-NEXT: .vsave {d8}
+; LE-I32-NEXT: vpush {d8}
+; LE-I32-NEXT: vmov r0, s0
+; LE-I32-NEXT: vmov.f32 s16, s1
+; LE-I32-NEXT: bl __aeabi_h2f
+; LE-I32-NEXT: vmov s0, r0
+; LE-I32-NEXT: bl lrintf
+; LE-I32-NEXT: vmov r1, s16
+; LE-I32-NEXT: vmov.32 d8[0], r0
+; LE-I32-NEXT: mov r0, r1
+; LE-I32-NEXT: bl __aeabi_h2f
+; LE-I32-NEXT: vmov s0, r0
+; LE-I32-NEXT: bl lrintf
+; LE-I32-NEXT: vmov.32 d8[1], r0
+; LE-I32-NEXT: vorr d0, d8, d8
+; LE-I32-NEXT: vpop {d8}
+; LE-I32-NEXT: pop {r11, pc}
+;
+; LE-I64-LABEL: lrint_v2f16:
+; LE-I64: @ %bb.0:
+; LE-I64-NEXT: .save {r4, r5, r11, lr}
+; LE-I64-NEXT: push {r4, r5, r11, lr}
+; LE-I64-NEXT: .vsave {d8, d9}
+; LE-I64-NEXT: vpush {d8, d9}
+; LE-I64-NEXT: vmov r0, s1
+; LE-I64-NEXT: vmov.f32 s16, s0
+; LE-I64-NEXT: bl __aeabi_h2f
+; LE-I64-NEXT: vmov s0, r0
+; LE-I64-NEXT: bl lrintf
+; LE-I64-NEXT: mov r4, r0
+; LE-I64-NEXT: vmov r0, s16
+; LE-I64-NEXT: mov r5, r1
+; LE-I64-NEXT: bl __aeabi_h2f
+; LE-I64-NEXT: vmov s0, r0
+; LE-I64-NEXT: vmov.32 d9[0], r4
+; LE-I64-NEXT: bl lrintf
+; LE-I64-NEXT: vmov.32 d8[0], r0
+; LE-I64-NEXT: vmov.32 d9[1], r5
+; LE-I64-NEXT: vmov.32 d8[1], r1
+; LE-I64-NEXT: vorr q0, q4, q4
+; LE-I64-NEXT: vpop {d8, d9}
+; LE-I64-NEXT: pop {r4, r5, r11, pc}
+;
+; BE-I32-LABEL: lrint_v2f16:
+; BE-I32: @ %bb.0:
+; BE-I32-NEXT: .save {r11, lr}
+; BE-I32-NEXT: push {r11, lr}
+; BE-I32-NEXT: .vsave {d8}
+; BE-I32-NEXT: vpush {d8}
+; BE-I32-NEXT: vmov r0, s0
+; BE-I32-NEXT: vmov.f32 s16, s1
+; BE-I32-NEXT: bl __aeabi_h2f
+; BE-I32-NEXT: vmov s0, r0
+; BE-I32-NEXT: bl lrintf
+; BE-I32-NEXT: vmov r1, s16
+; BE-I32-NEXT: vmov.32 d8[0], r0
+; BE-I32-NEXT: mov r0, r1
+; BE-I32-NEXT: bl __aeabi_h2f
+; BE-I32-NEXT: vmov s0, r0
+; BE-I32-NEXT: bl lrintf
+; BE-I32-NEXT: vmov.32 d8[1], r0
+; BE-I32-NEXT: vrev64.32 d0, d8
+; BE-I32-NEXT: vpop {d8}
+; BE-I32-NEXT: pop {r11, pc}
+;
+; BE-I64-LABEL: lrint_v2f16:
+; BE-I64: @ %bb.0:
+; BE-I64-NEXT: .save {r4, r5, r11, lr}
+; BE-I64-NEXT: push {r4, r5, r11, lr}
+; BE-I64-NEXT: .vsave {d8}
+; BE-I64-NEXT: vpush {d8}
+; BE-I64-NEXT: vmov r0, s1
+; BE-I64-NEXT: vmov.f32 s16, s0
+; BE-I64-NEXT: bl __aeabi_h2f
+; BE-I64-NEXT: vmov s0, r0
+; BE-I64-NEXT: bl lrintf
+; BE-I64-NEXT: mov r4, r0
+; BE-I64-NEXT: vmov r0, s16
+; BE-I64-NEXT: mov r5, r1
+; BE-I64-NEXT: bl __aeabi_h2f
+; BE-I64-NEXT: vmov s0, r0
+; BE-I64-NEXT: vmov.32 d8[0], r4
+; BE-I64-NEXT: bl lrintf
+; BE-I64-NEXT: vmov.32 d16[0], r0
+; BE-I64-NEXT: vmov.32 d8[1], r5
+; BE-I64-NEXT: vmov.32 d16[1], r1
+; BE-I64-NEXT: vrev64.32 d1, d8
+; BE-I64-NEXT: vrev64.32 d0, d16
+; BE-I64-NEXT: vpop {d8}
+; BE-I64-NEXT: pop {r4, r5, r11, pc}
+ %a = call <2 x iXLen> @llvm.lrint.v2iXLen.v2f16(<2 x half> %x)
+ ret <2 x iXLen> %a
+}
-; define <4 x iXLen> @lrint_v4f16(<4 x half> %x) {
-; %a = call <4 x iXLen> @llvm.lrint.v4iXLen.v4f16(<4 x half> %x)
-; ret <4 x iXLen> %a
-; }
+define <4 x iXLen> @lrint_v4f16(<4 x half> %x) {
+; LE-I32-LABEL: lrint_v4f16:
+; LE-I32: @ %bb.0:
+; LE-I32-NEXT: .save {r4, r5, r11, lr}
+; LE-I32-NEXT: push {r4, r5, r11, lr}
+; LE-I32-NEXT: .vsave {d8, d9, d10, d11}
+; LE-I32-NEXT: vpush {d8, d9, d10, d11}
+; LE-I32-NEXT: vmov r0, s3
+; LE-I32-NEXT: vmov.f32 s16, s2
+; LE-I32-NEXT: vmov.f32 s18, s1
+; LE-I32-NEXT: vmov.f32 s20, s0
+; LE-I32-NEXT: bl __aeabi_h2f
+; LE-I32-NEXT: vmov s0, r0
+; LE-I32-NEXT: bl lrintf
+; LE-I32-NEXT: mov r4, r0
+; LE-I32-NEXT: vmov r0, s16
+; LE-I32-NEXT: bl __aeabi_h2f
+; LE-I32-NEXT: mov r5, r0
+; LE-I32-NEXT: vmov r0, s20
+; LE-I32-NEXT: bl __aeabi_h2f
+; LE-I32-NEXT: vmov s0, r0
+; LE-I32-NEXT: bl lrintf
+; LE-I32-NEXT: vmov s0, r5
+; LE-I32-NEXT: vmov.32 d10[0], r0
+; LE-I32-NEXT: bl lrintf
+; LE-I32-NEXT: vmov.32 d11[0], r0
+; LE-I32-NEXT: vmov r0, s18
+; LE-I32-NEXT: bl __aeabi_h2f
+; LE-I32-NEXT: vmov s0, r0
+; LE-I32-NEXT: vmov.32 d11[1], r4
+; LE-I32-NEXT: bl lrintf
+; LE-I32-NEXT: vmov.32 d10[1], r0
+; LE-I32-NEXT: vorr q0, q5, q5
+; LE-I32-NEXT: vpop {d8, d9, d10, d11}
+; LE-I32-NEXT: pop {r4, r5, r11, pc}
+;
+; LE-I64-LABEL: lrint_v4f16:
+; LE-I64: @ %bb.0:
+; LE-I64-NEXT: .save {r4, r5, r6, r7, r11, lr}
+; LE-I64-NEXT: push {r4, r5, r6, r7, r11, lr}
+; LE-I64-NEXT: .vsave {d12, d13}
+; LE-I64-NEXT: vpush {d12, d13}
+; LE-I64-NEXT: .vsave {d8, d9, d10}
+; LE-I64-NEXT: vpush {d8, d9, d10}
+; LE-I64-NEXT: vmov r0, s1
+; LE-I64-NEXT: vmov.f32 s16, s3
+; LE-I64-NEXT: vmov.f32 s20, s2
+; LE-I64-NEXT: vmov.f32 s18, s0
+; LE-I64-NEXT: bl __aeabi_h2f
+; LE-I64-NEXT: vmov s0, r0
+; LE-I64-NEXT: bl lrintf
+; LE-I64-NEXT: mov r5, r0
+; LE-I64-NEXT: vmov r0, s18
+; LE-I64-NEXT: mov r4, r1
+; LE-I64-NEXT: bl __aeabi_h2f
+; LE-I64-NEXT: mov r7, r0
+; LE-I64-NEXT: vmov r0, s16
+; LE-I64-NEXT: bl __aeabi_h2f
+; LE-I64-NEXT: vmov s0, r0
+; LE-I64-NEXT: bl lrintf
+; LE-I64-NEXT: vmov s0, r7
+; LE-I64-NEXT: mov r6, r1
+; LE-I64-NEXT: vmov.32 d9[0], r0
+; LE-I64-NEXT: bl lrintf
+; LE-I64-NEXT: vmov.32 d12[0], r0
+; LE-I64-NEXT: vmov r0, s20
+; LE-I64-NEXT: mov r7, r1
+; LE-I64-NEXT: bl __aeabi_h2f
+; LE-I64-NEXT: vmov s0, r0
+; LE-I64-NEXT: vmov.32 d13[0], r5
+; LE-I64-NEXT: bl lrintf
+; LE-I64-NEXT: vmov.32 d8[0], r0
+; LE-I64-NEXT: vmov.32 d13[1], r4
+; LE-I64-NEXT: vmov.32 d9[1], r6
+; LE-I64-NEXT: vmov.32 d12[1], r7
+; LE-I64-NEXT: vmov.32 d8[1], r1
+; LE-I64-NEXT: vorr q0, q6, q6
+; LE-I64-NEXT: vorr q1, q4, q4
+; LE-I64-NEXT: vpop {d8, d9, d10}
+; LE-I64-NEXT: vpop {d12, d13}
+; LE-I64-NEXT: pop {r4, r5, r6, r7, r11, pc}
+;
+; BE-I32-LABEL: lrint_v4f16:
+; BE-I32: @ %bb.0:
+; BE-I32-NEXT: .save {r4, r5, r11, lr}
+; BE-I32-NEXT: push {r4, r5, r11, lr}
+; BE-I32-NEXT: .vsave {d8, d9, d10, d11}
+; BE-I32-NEXT: vpush {d8, d9, d10, d11}
+; BE-I32-NEXT: vmov r0, s3
+; BE-I32-NEXT: vmov.f32 s16, s2
+; BE-I32-NEXT: vmov.f32 s18, s1
+; BE-I32-NEXT: vmov.f32 s20, s0
+; BE-I32-NEXT: bl __aeabi_h2f
+; BE-I32-NEXT: vmov s0, r0
+; BE-I32-NEXT: bl lrintf
+; BE-I32-NEXT: mov r4, r0
+; BE-I32-NEXT: vmov r0, s16
+; BE-I32-NEXT: bl __aeabi_h2f
+; BE-I32-NEXT: mov r5, r0
+; BE-I32-NEXT: vmov r0, s20
+; BE-I32-NEXT: bl __aeabi_h2f
+; BE-I32-NEXT: vmov s0, r0
+; BE-I32-NEXT: bl lrintf
+; BE-I32-NEXT: vmov s0, r5
+; BE-I32-NEXT: vmov.32 d10[0], r0
+; BE-I32-NEXT: bl lrintf
+; BE-I32-NEXT: vmov.32 d11[0], r0
+; BE-I32-NEXT: vmov r0, s18
+; BE-I32-NEXT: bl __aeabi_h2f
+; BE-I32-NEXT: vmov s0, r0
+; BE-I32-NEXT: vmov.32 d11[1], r4
+; BE-I32-NEXT: bl lrintf
+; BE-I32-NEXT: vmov.32 d10[1], r0
+; BE-I32-NEXT: vrev64.32 q0, q5
+; BE-I32-NEXT: vpop {d8, d9, d10, d11}
+; BE-I32-NEXT: pop {r4, r5, r11, pc}
+;
+; BE-I64-LABEL: lrint_v4f16:
+; BE-I64: @ %bb.0:
+; BE-I64-NEXT: .save {r4, r5, r6, r7, r11, lr}
+; BE-I64-NEXT: push {r4, r5, r6, r7, r11, lr}
+; BE-I64-NEXT: .vsave {d8, d9, d10}
+; BE-I64-NEXT: vpush {d8, d9, d10}
+; BE-I64-NEXT: vmov r0, s1
+; BE-I64-NEXT: vmov.f32 s16, s3
+; BE-I64-NEXT: vmov.f32 s18, s2
+; BE-I64-NEXT: vmov.f32 s20, s0
+; BE-I64-NEXT: bl __aeabi_h2f
+; BE-I64-NEXT: vmov s0, r0
+; BE-I64-NEXT: bl lrintf
+; BE-I64-NEXT: mov r5, r0
+; BE-I64-NEXT: vmov r0, s20
+; BE-I64-NEXT: mov r4, r1
+; BE-I64-NEXT: bl __aeabi_h2f
+; BE-I64-NEXT: mov r7, r0
+; BE-I64-NEXT: vmov r0, s16
+; BE-I64-NEXT: bl __aeabi_h2f
+; BE-I64-NEXT: vmov s0, r0
+; BE-I64-NEXT: bl lrintf
+; BE-I64-NEXT: vmov s0, r7
+; BE-I64-NEXT: mov r6, r1
+; BE-I64-NEXT: vmov.32 d8[0], r0
+; BE-I64-NEXT: bl lrintf
+; BE-I64-NEXT: vmov.32 d10[0], r0
+; BE-I64-NEXT: vmov r0, s18
+; BE-I64-NEXT: mov r7, r1
+; BE-I64-NEXT: bl __aeabi_h2f
+; BE-I64-NEXT: vmov s0, r0
+; BE-I64-NEXT: vmov.32 d9[0], r5
+; BE-I64-NEXT: bl lrintf
+; BE-I64-NEXT: vmov.32 d16[0], r0
+; BE-I64-NEXT: vmov.32 d9[1], r4
+; BE-I64-NEXT: vmov.32 d8[1], r6
+; BE-I64-NEXT: vmov.32 d10[1], r7
+; BE-I64-NEXT: vmov.32 d16[1], r1
+; BE-I64-NEXT: vrev64.32 d1, d9
+; BE-I64-NEXT: vrev64.32 d3, d8
+; BE-I64-NEXT: vrev64.32 d0, d10
+; BE-I64-NEXT: vrev64.32 d2, d16
+; BE-I64-NEXT: vpop {d8, d9, d10}
+; BE-I64-NEXT: pop {r4, r5, r6, r7, r11, pc}
+ %a = call <4 x iXLen> @llvm.lrint.v4iXLen.v4f16(<4 x half> %x)
+ ret <4 x iXLen> %a
+}
-; define <8 x iXLen> @lrint_v8f16(<8 x half> %x) {
-; %a = call <8 x iXLen> @llvm.lrint.v8iXLen.v8f16(<8 x half> %x)
-; ret <8 x iXLen> %a
-; }
+define <8 x iXLen> @lrint_v8f16(<8 x half> %x) {
+; LE-I32-LABEL: lrint_v8f16:
+; LE-I32: @ %bb.0:
+; LE-I32-NEXT: .save {r4, r5, r6, r7, r8, r9, r11, lr}
+; LE-I32-NEXT: push {r4, r5, r6, r7, r8, r9, r11, lr}
+; LE-I32-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14}
+; LE-I32-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14}
+; LE-I32-NEXT: vmov r0, s7
+; LE-I32-NEXT: vmov.f32 s18, s6
+; LE-I32-NEXT: vmov.f32 s16, s5
+; LE-I32-NEXT: vmov.f32 s20, s4
+; LE-I32-NEXT: vmov.f32 s22, s3
+; LE-I32-NEXT: vmov.f32 s24, s2
+; LE-I32-NEXT: vmov.f32 s26, s1
+; LE-I32-NEXT: vmov.f32 s28, s0
+; LE-I32-NEXT: bl __aeabi_h2f
+; LE-I32-NEXT: vmov s0, r0
+; LE-I32-NEXT: bl lrintf
+; LE-I32-NEXT: mov r8, r0
+; LE-I32-NEXT: vmov r0, s26
+; LE-I32-NEXT: bl __aeabi_h2f
+; LE-I32-NEXT: mov r9, r0
+; LE-I32-NEXT: vmov r0, s22
+; LE-I32-NEXT: bl __aeabi_h2f
+; LE-I32-NEXT: mov r6, r0
+; LE-I32-NEXT: vmov r0, s28
+; LE-I32-NEXT: bl __aeabi_h2f
+; LE-I32-NEXT: mov r7, r0
+; LE-I32-NEXT: vmov r0, s24
+; LE-I32-NEXT: bl __aeabi_h2f
+; LE-I32-NEXT: mov r4, r0
+; LE-I32-NEXT: vmov r0, s18
+; LE-I32-NEXT: bl __aeabi_h2f
+; LE-I32-NEXT: mov r5, r0
+; LE-I32-NEXT: vmov r0, s20
+; LE-I32-NEXT: bl __aeabi_h2f
+; LE-I32-NEXT: vmov s0, r0
+; LE-I32-NEXT: bl lrintf
+; LE-I32-NEXT: vmov s0, r5
+; LE-I32-NEXT: vmov.32 d10[0], r0
+; LE-I32-NEXT: bl lrintf
+; LE-I32-NEXT: vmov s0, r4
+; LE-I32-NEXT: vmov.32 d11[0], r0
+; LE-I32-NEXT: bl lrintf
+; LE-I32-NEXT: vmov s0, r7
+; LE-I32-NEXT: vmov.32 d13[0], r0
+; LE-I32-NEXT: bl lrintf
+; LE-I32-NEXT: vmov s0, r6
+; LE-I32-NEXT: vmov.32 d12[0], r0
+; LE-I32-NEXT: bl lrintf
+; LE-I32-NEXT: vmov s0, r9
+; LE-I32-NEXT: vmov.32 d13[1], r0
+; LE-I32-NEXT: bl lrintf
+; LE-I32-NEXT: vmov.32 d12[1], r0
+; LE-I32-NEXT: vmov r0, s16
+; LE-I32-NEXT: bl __aeabi_h2f
+; LE-I32-NEXT: vmov s0, r0
+; LE-I32-NEXT: vmov.32 d11[1], r8
+; LE-I32-NEXT: bl lrintf
+; LE-I32-NEXT: vmov.32 d10[1], r0
+; LE-I32-NEXT: vorr q0, q6, q6
+; LE-I32-NEXT: vorr q1, q5, q5
+; LE-I32-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14}
+; LE-I32-NEXT: pop {r4, r5, r6, r7, r8, r9, r11, pc}
+;
+; LE-I64-LABEL: lrint_v8f16:
+; LE-I64: @ %bb.0:
+; LE-I64-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; LE-I64-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; LE-I64-NEXT: .pad #4
+; LE-I64-NEXT: sub sp, sp, #4
+; LE-I64-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-I64-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-I64-NEXT: .pad #8
+; LE-I64-NEXT: sub sp, sp, #8
+; LE-I64-NEXT: vmov r0, s1
+; LE-I64-NEXT: vstr s6, [sp, #4] @ 4-byte Spill
+; LE-I64-NEXT: vmov.f32 s16, s7
+; LE-I64-NEXT: vmov.f32 s18, s5
+; LE-I64-NEXT: vmov.f32 s20, s4
+; LE-I64-NEXT: vmov.f32 s22, s3
+; LE-I64-NEXT: vmov.f32 s24, s2
+; LE-I64-NEXT: vmov.f32 s26, s0
+; LE-I64-NEXT: bl __aeabi_h2f
+; LE-I64-NEXT: vmov s0, r0
+; LE-I64-NEXT: bl lrintf
+; LE-I64-NEXT: mov r9, r0
+; LE-I64-NEXT: vmov r0, s26
+; LE-I64-NEXT: str r1, [sp] @ 4-byte Spill
+; LE-I64-NEXT: bl __aeabi_h2f
+; LE-I64-NEXT: mov r10, r0
+; LE-I64-NEXT: vmov r0, s22
+; LE-I64-NEXT: bl __aeabi_h2f
+; LE-I64-NEXT: mov r5, r0
+; LE-I64-NEXT: vmov r0, s24
+; LE-I64-NEXT: bl __aeabi_h2f
+; LE-I64-NEXT: mov r7, r0
+; LE-I64-NEXT: vmov r0, s18
+; LE-I64-NEXT: bl __aeabi_h2f
+; LE-I64-NEXT: mov r6, r0
+; LE-I64-NEXT: vmov r0, s20
+; LE-I64-NEXT: bl __aeabi_h2f
+; LE-I64-NEXT: mov r4, r0
+; LE-I64-NEXT: vmov r0, s16
+; LE-I64-NEXT: bl __aeabi_h2f
+; LE-I64-NEXT: vmov s0, r0
+; LE-I64-NEXT: bl lrintf
+; LE-I64-NEXT: vmov s0, r4
+; LE-I64-NEXT: mov r11, r1
+; LE-I64-NEXT: vmov.32 d11[0], r0
+; LE-I64-NEXT: bl lrintf
+; LE-I64-NEXT: vmov s0, r6
+; LE-I64-NEXT: mov r8, r1
+; LE-I64-NEXT: vmov.32 d12[0], r0
+; LE-I64-NEXT: bl lrintf
+; LE-I64-NEXT: vmov s0, r7
+; LE-I64-NEXT: mov r6, r1
+; LE-I64-NEXT: vmov.32 d13[0], r0
+; LE-I64-NEXT: bl lrintf
+; LE-I64-NEXT: vmov s0, r5
+; LE-I64-NEXT: mov r7, r1
+; LE-I64-NEXT: vmov.32 d14[0], r0
+; LE-I64-NEXT: bl lrintf
+; LE-I64-NEXT: vmov s0, r10
+; LE-I64-NEXT: mov r5, r1
+; LE-I64-NEXT: vmov.32 d15[0], r0
+; LE-I64-NEXT: bl lrintf
+; LE-I64-NEXT: vldr s0, [sp, #4] @ 4-byte Reload
+; LE-I64-NEXT: mov r4, r1
+; LE-I64-NEXT: vmov.32 d8[0], r0
+; LE-I64-NEXT: vmov r0, s0
+; LE-I64-NEXT: bl __aeabi_h2f
+; LE-I64-NEXT: vmov s0, r0
+; LE-I64-NEXT: vmov.32 d9[0], r9
+; LE-I64-NEXT: bl lrintf
+; LE-I64-NEXT: vmov.32 d10[0], r0
+; LE-I64-NEXT: ldr r0, [sp] @ 4-byte Reload
+; LE-I64-NEXT: vmov.32 d15[1], r5
+; LE-I64-NEXT: vmov.32 d9[1], r0
+; LE-I64-NEXT: vmov.32 d13[1], r6
+; LE-I64-NEXT: vmov.32 d11[1], r11
+; LE-I64-NEXT: vmov.32 d8[1], r4
+; LE-I64-NEXT: vmov.32 d14[1], r7
+; LE-I64-NEXT: vorr q0, q4, q4
+; LE-I64-NEXT: vmov.32 d12[1], r8
+; LE-I64-NEXT: vorr q1, q7, q7
+; LE-I64-NEXT: vmov.32 d10[1], r1
+; LE-I64-NEXT: vorr q2, q6, q6
+; LE-I64-NEXT: vorr q3, q5, q5
+; LE-I64-NEXT: add sp, sp, #8
+; LE-I64-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-I64-NEXT: add sp, sp, #4
+; LE-I64-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
+;
+; BE-I32-LABEL: lrint_v8f16:
+; BE-I32: @ %bb.0:
+; BE-I32-NEXT: .save {r4, r5, r6, r7, r8, r9, r11, lr}
+; BE-I32-NEXT: push {r4, r5, r6, r7, r8, r9, r11, lr}
+; BE-I32-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14}
+; BE-I32-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14}
+; BE-I32-NEXT: vmov r0, s1
+; BE-I32-NEXT: vmov.f32 s18, s7
+; BE-I32-NEXT: vmov.f32 s20, s6
+; BE-I32-NEXT: vmov.f32 s16, s5
+; BE-I32-NEXT: vmov.f32 s22, s4
+; BE-I32-NEXT: vmov.f32 s24, s3
+; BE-I32-NEXT: vmov.f32 s26, s2
+; BE-I32-NEXT: vmov.f32 s28, s0
+; BE-I32-NEXT: bl __aeabi_h2f
+; BE-I32-NEXT: vmov s0, r0
+; BE-I32-NEXT: bl lrintf
+; BE-I32-NEXT: mov r8, r0
+; BE-I32-NEXT: vmov r0, s24
+; BE-I32-NEXT: bl __aeabi_h2f
+; BE-I32-NEXT: mov r9, r0
+; BE-I32-NEXT: vmov r0, s18
+; BE-I32-NEXT: bl __aeabi_h2f
+; BE-I32-NEXT: mov r6, r0
+; BE-I32-NEXT: vmov r0, s26
+; BE-I32-NEXT: bl __aeabi_h2f
+; BE-I32-NEXT: mov r7, r0
+; BE-I32-NEXT: vmov r0, s20
+; BE-I32-NEXT: bl __aeabi_h2f
+; BE-I32-NEXT: mov r4, r0
+; BE-I32-NEXT: vmov r0, s28
+; BE-I32-NEXT: bl __aeabi_h2f
+; BE-I32-NEXT: mov r5, r0
+; BE-I32-NEXT: vmov r0, s22
+; BE-I32-NEXT: bl __aeabi_h2f
+; BE-I32-NEXT: vmov s0, r0
+; BE-I32-NEXT: bl lrintf
+; BE-I32-NEXT: vmov s0, r5
+; BE-I32-NEXT: vmov.32 d10[0], r0
+; BE-I32-NEXT: bl lrintf
+; BE-I32-NEXT: vmov s0, r4
+; BE-I32-NEXT: vmov.32 d12[0], r0
+; BE-I32-NEXT: bl lrintf
+; BE-I32-NEXT: vmov s0, r7
+; BE-I32-NEXT: vmov.32 d11[0], r0
+; BE-I32-NEXT: bl lrintf
+; BE-I32-NEXT: vmov s0, r6
+; BE-I32-NEXT: vmov.32 d13[0], r0
+; BE-I32-NEXT: bl lrintf
+; BE-I32-NEXT: vmov s0, r9
+; BE-I32-NEXT: vmov.32 d11[1], r0
+; BE-I32-NEXT: bl lrintf
+; BE-I32-NEXT: vmov.32 d13[1], r0
+; BE-I32-NEXT: vmov r0, s16
+; BE-I32-NEXT: bl __aeabi_h2f
+; BE-I32-NEXT: vmov s0, r0
+; BE-I32-NEXT: vmov.32 d12[1], r8
+; BE-I32-NEXT: bl lrintf
+; BE-I32-NEXT: vmov.32 d10[1], r0
+; BE-I32-NEXT: vrev64.32 q0, q6
+; BE-I32-NEXT: vrev64.32 q1, q5
+; BE-I32-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14}
+; BE-I32-NEXT: pop {r4, r5, r6, r7, r8, r9, r11, pc}
+;
+; BE-I64-LABEL: lrint_v8f16:
+; BE-I64: @ %bb.0:
+; BE-I64-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; BE-I64-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; BE-I64-NEXT: .pad #4
+; BE-I64-NEXT: sub sp, sp, #4
+; BE-I64-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14}
+; BE-I64-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14}
+; BE-I64-NEXT: .pad #8
+; BE-I64-NEXT: sub sp, sp, #8
+; BE-I64-NEXT: vmov r0, s1
+; BE-I64-NEXT: vmov.f32 s18, s7
+; BE-I64-NEXT: vmov.f32 s16, s6
+; BE-I64-NEXT: vmov.f32 s20, s5
+; BE-I64-NEXT: vmov.f32 s22, s4
+; BE-I64-NEXT: vmov.f32 s24, s3
+; BE-I64-NEXT: vmov.f32 s26, s2
+; BE-I64-NEXT: vmov.f32 s28, s0
+; BE-I64-NEXT: bl __aeabi_h2f
+; BE-I64-NEXT: vmov s0, r0
+; BE-I64-NEXT: bl lrintf
+; BE-I64-NEXT: mov r9, r0
+; BE-I64-NEXT: vmov r0, s28
+; BE-I64-NEXT: str r1, [sp, #4] @ 4-byte Spill
+; BE-I64-NEXT: bl __aeabi_h2f
+; BE-I64-NEXT: mov r10, r0
+; BE-I64-NEXT: vmov r0, s24
+; BE-I64-NEXT: bl __aeabi_h2f
+; BE-I64-NEXT: mov r5, r0
+; BE-I64-NEXT: vmov r0, s26
+; BE-I64-NEXT: bl __aeabi_h2f
+; BE-I64-NEXT: mov r7, r0
+; BE-I64-NEXT: vmov r0, s20
+; BE-I64-NEXT: bl __aeabi_h2f
+; BE-I64-NEXT: mov r6, r0
+; BE-I64-NEXT: vmov r0, s22
+; BE-I64-NEXT: bl __aeabi_h2f
+; BE-I64-NEXT: mov r4, r0
+; BE-I64-NEXT: vmov r0, s18
+; BE-I64-NEXT: bl __aeabi_h2f
+; BE-I64-NEXT: vmov s0, r0
+; BE-I64-NEXT: bl lrintf
+; BE-I64-NEXT: vmov s0, r4
+; BE-I64-NEXT: mov r11, r1
+; BE-I64-NEXT: vmov.32 d9[0], r0
+; BE-I64-NEXT: bl lrintf
+; BE-I64-NEXT: vmov s0, r6
+; BE-I64-NEXT: mov r8, r1
+; BE-I64-NEXT: vmov.32 d10[0], r0
+; BE-I64-NEXT: bl lrintf
+; BE-I64-NEXT: vmov s0, r7
+; BE-I64-NEXT: mov r6, r1
+; BE-I64-NEXT: vmov.32 d11[0], r0
+; BE-I64-NEXT: bl lrintf
+; BE-I64-NEXT: vmov s0, r5
+; BE-I64-NEXT: mov r7, r1
+; BE-I64-NEXT: vmov.32 d12[0], r0
+; BE-I64-NEXT: bl lrintf
+; BE-I64-NEXT: vmov s0, r10
+; BE-I64-NEXT: mov r5, r1
+; BE-I64-NEXT: vmov.32 d13[0], r0
+; BE-I64-NEXT: bl lrintf
+; BE-I64-NEXT: vmov.32 d14[0], r0
+; BE-I64-NEXT: vmov r0, s16
+; BE-I64-NEXT: mov r4, r1
+; BE-I64-NEXT: bl __aeabi_h2f
+; BE-I64-NEXT: vmov s0, r0
+; BE-I64-NEXT: vmov.32 d8[0], r9
+; BE-I64-NEXT: bl lrintf
+; BE-I64-NEXT: vmov.32 d16[0], r0
+; BE-I64-NEXT: ldr r0, [sp, #4] @ 4-byte Reload
+; BE-I64-NEXT: vmov.32 d13[1], r5
+; BE-I64-NEXT: vmov.32 d8[1], r0
+; BE-I64-NEXT: vmov.32 d11[1], r6
+; BE-I64-NEXT: vmov.32 d9[1], r11
+; BE-I64-NEXT: vmov.32 d14[1], r4
+; BE-I64-NEXT: vmov.32 d12[1], r7
+; BE-I64-NEXT: vmov.32 d10[1], r8
+; BE-I64-NEXT: vmov.32 d16[1], r1
+; BE-I64-NEXT: vrev64.32 d1, d8
+; BE-I64-NEXT: vrev64.32 d3, d13
+; BE-I64-NEXT: vrev64.32 d5, d11
+; BE-I64-NEXT: vrev64.32 d7, d9
+; BE-I64-NEXT: vrev64.32 d0, d14
+; BE-I64-NEXT: vrev64.32 d2, d12
+; BE-I64-NEXT: vrev64.32 d4, d10
+; BE-I64-NEXT: vrev64.32 d6, d16
+; BE-I64-NEXT: add sp, sp, #8
+; BE-I64-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14}
+; BE-I64-NEXT: add sp, sp, #4
+; BE-I64-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
+ %a = call <8 x iXLen> @llvm.lrint.v8iXLen.v8f16(<8 x half> %x)
+ ret <8 x iXLen> %a
+}
-; define <16 x iXLen> @lrint_v16f16(<16 x half> %x) {
-; %a = call <16 x iXLen> @llvm.lrint.v16iXLen.v16f16(<16 x half> %x)
-; ret <16 x iXLen> %a
-; }
+define <16 x iXLen> @lrint_v16f16(<16 x half> %x) {
+; LE-I32-LABEL: lrint_v16f16:
+; LE-I32: @ %bb.0:
+; LE-I32-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, lr}
+; LE-I32-NEXT: push {r4, r5, r6, r7, r8, r9, r10, lr}
+; LE-I32-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-I32-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-I32-NEXT: .pad #8
+; LE-I32-NEXT: sub sp, sp, #8
+; LE-I32-NEXT: vmov r0, s15
+; LE-I32-NEXT: vstr s13, [sp, #4] @ 4-byte Spill
+; LE-I32-NEXT: vmov.f32 s26, s14
+; LE-I32-NEXT: vstr s0, [sp] @ 4-byte Spill
+; LE-I32-NEXT: vmov.f32 s20, s12
+; LE-I32-NEXT: vmov.f32 s22, s11
+; LE-I32-NEXT: vmov.f32 s18, s10
+; LE-I32-NEXT: vmov.f32 s17, s9
+; LE-I32-NEXT: vmov.f32 s24, s8
+; LE-I32-NEXT: vmov.f32 s19, s7
+; LE-I32-NEXT: vmov.f32 s30, s6
+; LE-I32-NEXT: vmov.f32 s21, s5
+; LE-I32-NEXT: vmov.f32 s16, s4
+; LE-I32-NEXT: vmov.f32 s23, s3
+; LE-I32-NEXT: vmov.f32 s28, s2
+; LE-I32-NEXT: vmov.f32 s25, s1
+; LE-I32-NEXT: bl __aeabi_h2f
+; LE-I32-NEXT: vmov s0, r0
+; LE-I32-NEXT: bl lrintf
+; LE-I32-NEXT: mov r8, r0
+; LE-I32-NEXT: vmov r0, s17
+; LE-I32-NEXT: bl __aeabi_h2f
+; LE-I32-NEXT: mov r9, r0
+; LE-I32-NEXT: vmov r0, s22
+; LE-I32-NEXT: bl __aeabi_h2f
+; LE-I32-NEXT: mov r10, r0
+; LE-I32-NEXT: vmov r0, s21
+; LE-I32-NEXT: bl __aeabi_h2f
+; LE-I32-NEXT: mov r7, r0
+; LE-I32-NEXT: vmov r0, s19
+; LE-I32-NEXT: bl __aeabi_h2f
+; LE-I32-NEXT: mov r4, r0
+; LE-I32-NEXT: vmov r0, s25
+; LE-I32-NEXT: bl __aeabi_h2f
+; LE-I32-NEXT: mov r5, r0
+; LE-I32-NEXT: vmov r0, s23
+; LE-I32-NEXT: bl __aeabi_h2f
+; LE-I32-NEXT: mov r6, r0
+; LE-I32-NEXT: vmov r0, s20
+; LE-I32-NEXT: bl __aeabi_h2f
+; LE-I32-NEXT: vmov s0, r0
+; LE-I32-NEXT: bl lrintf
+; LE-I32-NEXT: vmov.32 d10[0], r0
+; LE-I32-NEXT: vmov r0, s26
+; LE-I32-NEXT: bl __aeabi_h2f
+; LE-I32-NEXT: vmov s0, r0
+; LE-I32-NEXT: bl lrintf
+; LE-I32-NEXT: vmov.32 d11[0], r0
+; LE-I32-NEXT: vmov r0, s24
+; LE-I32-NEXT: bl __aeabi_h2f
+; LE-I32-NEXT: vmov s0, r0
+; LE-I32-NEXT: bl lrintf
+; LE-I32-NEXT: vmov.32 d12[0], r0
+; LE-I32-NEXT: vmov r0, s18
+; LE-I32-NEXT: bl __aeabi_h2f
+; LE-I32-NEXT: vmov s0, r0
+; LE-I32-NEXT: bl lrintf
+; LE-I32-NEXT: vmov.32 d13[0], r0
+; LE-I32-NEXT: vmov r0, s16
+; LE-I32-NEXT: bl __aeabi_h2f
+; LE-I32-NEXT: vmov s0, r0
+; LE-I32-NEXT: bl lrintf
+; LE-I32-NEXT: vmov.32 d8[0], r0
+; LE-I32-NEXT: vmov r0, s30
+; LE-I32-NEXT: bl __aeabi_h2f
+; LE-I32-NEXT: vmov s0, r0
+; LE-I32-NEXT: bl lrintf
+; LE-I32-NEXT: vmov.32 d9[0], r0
+; LE-I32-NEXT: vmov r0, s28
+; LE-I32-NEXT: bl __aeabi_h2f
+; LE-I32-NEXT: vmov s0, r0
+; LE-I32-NEXT: bl lrintf
+; LE-I32-NEXT: vldr s0, [sp] @ 4-byte Reload
+; LE-I32-NEXT: vmov.32 d15[0], r0
+; LE-I32-NEXT: vmov r0, s0
+; LE-I32-NEXT: bl __aeabi_h2f
+; LE-I32-NEXT: vmov s0, r0
+; LE-I32-NEXT: bl lrintf
+; LE-I32-NEXT: vmov s0, r6
+; LE-I32-NEXT: vmov.32 d14[0], r0
+; LE-I32-NEXT: bl lrintf
+; LE-I32-NEXT: vmov s0, r5
+; LE-I32-NEXT: vmov.32 d15[1], r0
+; LE-I32-NEXT: bl lrintf
+; LE-I32-NEXT: vmov s0, r4
+; LE-I32-NEXT: vmov.32 d14[1], r0
+; LE-I32-NEXT: bl lrintf
+; LE-I32-NEXT: vmov s0, r7
+; LE-I32-NEXT: vmov.32 d9[1], r0
+; LE-I32-NEXT: bl lrintf
+; LE-I32-NEXT: vmov s0, r10
+; LE-I32-NEXT: vmov.32 d8[1], r0
+; LE-I32-NEXT: bl lrintf
+; LE-I32-NEXT: vmov s0, r9
+; LE-I32-NEXT: vmov.32 d13[1], r0
+; LE-I32-NEXT: bl lrintf
+; LE-I32-NEXT: vldr s0, [sp, #4] @ 4-byte Reload
+; LE-I32-NEXT: vmov.32 d12[1], r0
+; LE-I32-NEXT: vmov r0, s0
+; LE-I32-NEXT: bl __aeabi_h2f
+; LE-I32-NEXT: vmov s0, r0
+; LE-I32-NEXT: vmov.32 d11[1], r8
+; LE-I32-NEXT: bl lrintf
+; LE-I32-NEXT: vmov.32 d10[1], r0
+; LE-I32-NEXT: vorr q0, q7, q7
+; LE-I32-NEXT: vorr q1, q4, q4
+; LE-I32-NEXT: vorr q2, q6, q6
+; LE-I32-NEXT: vorr q3, q5, q5
+; LE-I32-NEXT: add sp, sp, #8
+; LE-I32-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-I32-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, pc}
+;
+; LE-I64-LABEL: lrint_v16f16:
+; LE-I64: @ %bb.0:
+; LE-I64-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; LE-I64-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; LE-I64-NEXT: .pad #4
+; LE-I64-NEXT: sub sp, sp, #4
+; LE-I64-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-I64-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-I64-NEXT: .pad #120
+; LE-I64-NEXT: sub sp, sp, #120
+; LE-I64-NEXT: mov r11, r0
+; LE-I64-NEXT: vmov r0, s7
+; LE-I64-NEXT: vstr s15, [sp, #24] @ 4-byte Spill
+; LE-I64-NEXT: vmov.f32 s23, s13
+; LE-I64-NEXT: vstr s14, [sp, #100] @ 4-byte Spill
+; LE-I64-NEXT: vmov.f32 s25, s12
+; LE-I64-NEXT: vmov.f32 s27, s11
+; LE-I64-NEXT: vstr s10, [sp, #104] @ 4-byte Spill
+; LE-I64-NEXT: vstr s9, [sp, #108] @ 4-byte Spill
+; LE-I64-NEXT: vmov.f32 s24, s8
+; LE-I64-NEXT: vmov.f32 s19, s6
+; LE-I64-NEXT: vmov.f32 s29, s5
+; LE-I64-NEXT: vmov.f32 s17, s4
+; LE-I64-NEXT: vmov.f32 s16, s3
+; LE-I64-NEXT: vmov.f32 s21, s2
+; LE-I64-NEXT: vmov.f32 s26, s1
+; LE-I64-NEXT: vmov.f32 s18, s0
+; LE-I64-NEXT: bl __aeabi_h2f
+; LE-I64-NEXT: vmov s0, r0
+; LE-I64-NEXT: bl lrintf
+; LE-I64-NEXT: mov r7, r0
+; LE-I64-NEXT: vmov r0, s25
+; LE-I64-NEXT: str r1, [sp, #56] @ 4-byte Spill
+; LE-I64-NEXT: bl __aeabi_h2f
+; LE-I64-NEXT: vmov s0, r0
+; LE-I64-NEXT: bl lrintf
+; LE-I64-NEXT: mov r5, r0
+; LE-I64-NEXT: vmov r0, s27
+; LE-I64-NEXT: str r1, [sp, #116] @ 4-byte Spill
+; LE-I64-NEXT: bl __aeabi_h2f
+; LE-I64-NEXT: vmov s0, r0
+; LE-I64-NEXT: bl lrintf
+; LE-I64-NEXT: mov r6, r0
+; LE-I64-NEXT: vmov r0, s29
+; LE-I64-NEXT: str r1, [sp, #112] @ 4-byte Spill
+; LE-I64-NEXT: bl __aeabi_h2f
+; LE-I64-NEXT: vmov s0, r0
+; LE-I64-NEXT: bl lrintf
+; LE-I64-NEXT: vmov.32 d15[0], r0
+; LE-I64-NEXT: vmov r0, s23
+; LE-I64-NEXT: mov r4, r1
+; LE-I64-NEXT: bl __aeabi_h2f
+; LE-I64-NEXT: vmov s0, r0
+; LE-I64-NEXT: add lr, sp, #80
+; LE-I64-NEXT: vmov.32 d17[0], r6
+; LE-I64-NEXT: vstmia lr, {d16, d17} @ 16-byte Spill
+; LE-I64-NEXT: bl lrintf
+; LE-I64-NEXT: mov r6, r0
+; LE-I64-NEXT: vmov r0, s17
+; LE-I64-NEXT: vmov r8, s21
+; LE-I64-NEXT: str r1, [sp, #76] @ 4-byte Spill
+; LE-I64-NEXT: vmov r10, s19
+; LE-I64-NEXT: vmov.32 d10[0], r5
+; LE-I64-NEXT: bl __aeabi_h2f
+; LE-I64-NEXT: vmov s0, r0
+; LE-I64-NEXT: add lr, sp, #40
+; LE-I64-NEXT: vmov.32 d11[0], r6
+; LE-I64-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill
+; LE-I64-NEXT: bl lrintf
+; LE-I64-NEXT: vmov.32 d14[0], r0
+; LE-I64-NEXT: mov r0, r10
+; LE-I64-NEXT: mov r9, r1
+; LE-I64-NEXT: bl __aeabi_h2f
+; LE-I64-NEXT: vmov s0, r0
+; LE-I64-NEXT: vmov.32 d11[0], r7
+; LE-I64-NEXT: bl lrintf
+; LE-I64-NEXT: vmov.32 d10[0], r0
+; LE-I64-NEXT: mov r0, r8
+; LE-I64-NEXT: mov r7, r1
+; LE-I64-NEXT: bl __aeabi_h2f
+; LE-I64-NEXT: mov r6, r0
+; LE-I64-NEXT: ldr r0, [sp, #56] @ 4-byte Reload
+; LE-I64-NEXT: vmov.32 d11[1], r0
+; LE-I64-NEXT: vmov r0, s18
+; LE-I64-NEXT: bl __aeabi_h2f
+; LE-I64-NEXT: mov r5, r0
+; LE-I64-NEXT: vmov r0, s16
+; LE-I64-NEXT: vmov.32 d10[1], r7
+; LE-I64-NEXT: add lr, sp, #56
+; LE-I64-NEXT: vstmia lr, {d10, d11} @ 16-byte Spill
+; LE-I64-NEXT: bl __aeabi_h2f
+; LE-I64-NEXT: vmov s0, r0
+; LE-I64-NEXT: vmov.32 d15[1], r4
+; LE-I64-NEXT: bl lrintf
+; LE-I64-NEXT: vmov.32 d9[0], r0
+; LE-I64-NEXT: vmov r0, s26
+; LE-I64-NEXT: add lr, sp, #24
+; LE-I64-NEXT: vmov r8, s24
+; LE-I64-NEXT: vmov.32 d14[1], r9
+; LE-I64-NEXT: mov r10, r1
+; LE-I64-NEXT: vmov s24, r5
+; LE-I64-NEXT: vldr s0, [sp, #24] @ 4-byte Reload
+; LE-I64-NEXT: vstmia lr, {d14, d15} @ 16-byte Spill
+; LE-I64-NEXT: vmov r7, s0
+; LE-I64-NEXT: bl __aeabi_h2f
+; LE-I64-NEXT: vmov.f32 s0, s24
+; LE-I64-NEXT: vmov s22, r0
+; LE-I64-NEXT: bl lrintf
+; LE-I64-NEXT: vmov.f32 s0, s22
+; LE-I64-NEXT: mov r5, r1
+; LE-I64-NEXT: vmov.32 d14[0], r0
+; LE-I64-NEXT: vmov s24, r6
+; LE-I64-NEXT: bl lrintf
+; LE-I64-NEXT: vmov.32 d15[0], r0
+; LE-I64-NEXT: mov r0, r7
+; LE-I64-NEXT: mov r6, r1
+; LE-I64-NEXT: bl __aeabi_h2f
+; LE-I64-NEXT: vmov.f32 s0, s24
+; LE-I64-NEXT: vmov s22, r0
+; LE-I64-NEXT: bl lrintf
+; LE-I64-NEXT: vmov.f32 s0, s22
+; LE-I64-NEXT: vmov.32 d8[0], r0
+; LE-I64-NEXT: add lr, sp, #8
+; LE-I64-NEXT: mov r9, r1
+; LE-I64-NEXT: vmov.32 d15[1], r6
+; LE-I64-NEXT: vstmia lr, {d8, d9} @ 16-byte Spill
+; LE-I64-NEXT: bl lrintf
+; LE-I64-NEXT: vmov.32 d13[0], r0
+; LE-I64-NEXT: mov r0, r8
+; LE-I64-NEXT: mov r6, r1
+; LE-I64-NEXT: bl __aeabi_h2f
+; LE-I64-NEXT: vldr s0, [sp, #100] @ 4-byte Reload
+; LE-I64-NEXT: mov r7, r0
+; LE-I64-NEXT: vmov.32 d14[1], r5
+; LE-I64-NEXT: vmov r0, s0
+; LE-I64-NEXT: bl __aeabi_h2f
+; LE-I64-NEXT: vldr s0, [sp, #104] @ 4-byte Reload
+; LE-I64-NEXT: vmov s20, r0
+; LE-I64-NEXT: vmov.32 d13[1], r6
+; LE-I64-NEXT: vmov r4, s0
+; LE-I64-NEXT: vldr s0, [sp, #108] @ 4-byte Reload
+; LE-I64-NEXT: vmov r0, s0
+; LE-I64-NEXT: bl __aeabi_h2f
+; LE-I64-NEXT: vmov.f32 s0, s20
+; LE-I64-NEXT: vmov s16, r0
+; LE-I64-NEXT: bl lrintf
+; LE-I64-NEXT: vmov.f32 s0, s16
+; LE-I64-NEXT: mov r5, r1
+; LE-I64-NEXT: vmov.32 d12[0], r0
+; LE-I64-NEXT: vmov s18, r7
+; LE-I64-NEXT: bl lrintf
+; LE-I64-NEXT: vmov.32 d11[0], r0
+; LE-I64-NEXT: mov r0, r4
+; LE-I64-NEXT: mov r6, r1
+; LE-I64-NEXT: bl __aeabi_h2f
+; LE-I64-NEXT: vmov.f32 s0, s18
+; LE-I64-NEXT: vmov s16, r0
+; LE-I64-NEXT: bl lrintf
+; LE-I64-NEXT: vmov.f32 s0, s16
+; LE-I64-NEXT: vmov.32 d10[0], r0
+; LE-I64-NEXT: mov r4, r1
+; LE-I64-NEXT: vmov.32 d11[1], r6
+; LE-I64-NEXT: bl lrintf
+; LE-I64-NEXT: add lr, sp, #80
+; LE-I64-NEXT: vmov.32 d10[1], r4
+; LE-I64-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-I64-NEXT: add lr, sp, #40
+; LE-I64-NEXT: vldmia lr, {d18, d19} @ 16-byte Reload
+; LE-I64-NEXT: add lr, sp, #8
+; LE-I64-NEXT: vmov.32 d16[0], r0
+; LE-I64-NEXT: ldr r0, [sp, #76] @ 4-byte Reload
+; LE-I64-NEXT: vldmia lr, {d20, d21} @ 16-byte Reload
+; LE-I64-NEXT: add lr, sp, #24
+; LE-I64-NEXT: vmov.32 d19[1], r0
+; LE-I64-NEXT: ldr r0, [sp, #116] @ 4-byte Reload
+; LE-I64-NEXT: vmov.32 d21[1], r10
+; LE-I64-NEXT: vmov.32 d18[1], r0
+; LE-I64-NEXT: ldr r0, [sp, #112] @ 4-byte Reload
+; LE-I64-NEXT: vmov.32 d12[1], r5
+; LE-I64-NEXT: vmov.32 d17[1], r0
+; LE-I64-NEXT: add r0, r11, #64
+; LE-I64-NEXT: vmov.32 d16[1], r1
+; LE-I64-NEXT: vst1.64 {d10, d11}, [r0:128]!
+; LE-I64-NEXT: vst1.64 {d16, d17}, [r0:128]!
+; LE-I64-NEXT: vst1.64 {d18, d19}, [r0:128]!
+; LE-I64-NEXT: vmov.32 d20[1], r9
+; LE-I64-NEXT: vst1.64 {d12, d13}, [r0:128]
+; LE-I64-NEXT: vst1.64 {d14, d15}, [r11:128]!
+; LE-I64-NEXT: vst1.64 {d20, d21}, [r11:128]!
+; LE-I64-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-I64-NEXT: add lr, sp, #56
+; LE-I64-NEXT: vst1.64 {d16, d17}, [r11:128]!
+; LE-I64-NEXT: vldmia lr, {d16, d17} @ 16-byte Reload
+; LE-I64-NEXT: vst1.64 {d16, d17}, [r11:128]
+; LE-I64-NEXT: add sp, sp, #120
+; LE-I64-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15}
+; LE-I64-NEXT: add sp, sp, #4
+; LE-I64-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
+;
+; BE-I32-LABEL: lrint_v16f16:
+; BE-I32: @ %bb.0:
+; BE-I32-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, lr}
+; BE-I32-NEXT: push {r4, r5, r6, r7, r8, r9, r10, lr}
+; BE-I32-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-I32-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-I32-NEXT: .pad #16
+; BE-I32-NEXT: sub sp, sp, #16
+; BE-I32-NEXT: vmov r0, s1
+; BE-I32-NEXT: vstr s14, [sp, #4] @ 4-byte Spill
+; BE-I32-NEXT: vmov.f32 s30, s15
+; BE-I32-NEXT: vstr s13, [sp, #12] @ 4-byte Spill
+; BE-I32-NEXT: vmov.f32 s17, s12
+; BE-I32-NEXT: vstr s10, [sp, #8] @ 4-byte Spill
+; BE-I32-NEXT: vmov.f32 s19, s11
+; BE-I32-NEXT: vstr s8, [sp] @ 4-byte Spill
+; BE-I32-NEXT: vmov.f32 s21, s9
+; BE-I32-NEXT: vmov.f32 s23, s7
+; BE-I32-NEXT: vmov.f32 s24, s6
+; BE-I32-NEXT: vmov.f32 s25, s5
+; BE-I32-NEXT: vmov.f32 s26, s4
+; BE-I32-NEXT: vmov.f32 s27, s3
+; BE-I32-NEXT: vmov.f32 s28, s2
+; BE-I32-NEXT: vmov.f32 s29, s0
+; BE-I32-NEXT: bl __aeabi_h2f
+; BE-I32-NEXT: vmov s0, r0
+; BE-I32-NEXT: bl lrintf
+; BE-I32-NEXT: mov r8, r0
+; BE-I32-NEXT: vmov r0, s27
+; BE-I32-NEXT: bl __aeabi_h2f
+; BE-I32-NEXT: mov r9, r0
+; BE-I32-NEXT: vmov r0, s25
+; BE-I32-NEXT: bl __aeabi_h2f
+; BE-I32-NEXT: mov r10, r0
+; BE-I32-NEXT: vmov r0, s23
+; BE-I32-NEXT: bl __aeabi_h2f
+; BE-I32-NEXT: mov r7, r0
+; BE-I32-NEXT: vmov r0, s21
+; BE-I32-NEXT: bl __aeabi_h2f
+; BE-I32-NEXT: mov r4, r0
+; BE-I32-NEXT: vmov r0, s19
+; BE-I32-NEXT: bl __aeabi_h2f
+; BE-I32-NEXT: mov r5, r0
+; BE-I32-NEXT: vmov r0, s30
+; BE-I32-NEXT: bl __aeabi_h2f
+; BE-I32-NEXT: mov r6, r0
+; BE-I32-NEXT: vmov r0, s17
+; BE-I32-NEXT: bl __aeabi_h2f
+; BE-I32-NEXT: vmov s0, r0
+; BE-I32-NEXT: bl lrintf
+; BE-I32-NEXT: vmov.32 d8[0], r0
+; BE-I32-NEXT: vmov r0, s29
+; BE-I32-NEXT: bl __aeabi_h2f
+; BE-I32-NEXT: vmov s0, r0
+; BE-I32-NEXT: bl lrintf
+; BE-I32-NEXT: vmov.32 d10[0], r0
+; BE-I32-NEXT: vmov r0, s28
+; BE-I32-NEXT: bl __aeabi_h2f
+; BE-I32-NEXT: vmov s0, r0
+; BE-I32-NEXT: bl lrintf
+; BE-I32-NEXT: vmov.32 d11[0], r0
+; BE-I32-NEXT: vmov r0, s26
+; BE-I32-NEXT: bl __aeabi_h2f
+; BE-I32-NEXT: vmov s0, r0
+; BE-I32-NEXT: bl lrintf
+; BE-I32-NEXT: vmov.32 d14[0], r0
+; BE-I32-NEXT: vmov r0, s24
+; BE-I32-NEXT: bl __aeabi_h2f
+; BE-I32-NEXT: vmov s0, r0
+; BE-I32-NEXT: bl lrintf
+; BE-I32-NEXT: vldr s0, [sp] @ 4-byte Reload
+; BE-I32-NEXT: vmov.32 d15[0], r0
+; BE-I32-NEXT: vmov r0, s0
+; BE-I32-NEXT: bl __aeabi_h2f
+; BE-I32-NEXT: vmov s0, r0
+; BE-I32-NEXT: bl lrintf
+; BE-I32-NEXT: vldr s0, [sp, #4] @ 4-byte Reload
+; BE-I32-NEXT: vmov.32 d12[0], r0
+; BE-I32-NEXT: vmov r0, s0
+; BE-I32-NEXT: bl __aeabi_h2f
+; BE-I32-NEXT: vmov s0, r0
+; BE-I32-NEXT: bl lrintf
+; BE-I32-NEXT: vldr s0, [sp, #8] @ 4-byte Reload
+; BE-I32-NEXT: vmov.32 d9[0], r0
+; BE-I32-NEXT: vmov r0, s0
+; BE-I32-NEXT: bl __aeabi_h2f
+; BE-I32-NEXT: vmov s0, r0
+; BE-I32-NEXT: bl lrintf
+; BE-I32-NEXT: vmov s0, r6
+; BE-I32-NEXT: vmov.32 d13[0], r0
+; BE-I32-NEXT: bl lrintf
+; BE-I32-NEXT: vmov s0, r5
+; BE-I32-NEXT: vmov.32 d9[1], r0
+; BE-I32-NEXT: bl lrintf
+; BE-I32-NEXT: vmov s0, r4
+; BE-I32-NEXT: vmov.32 d13[1], r0
+; BE-I32-NEXT: bl lrintf
+; BE-I32-NEXT: vmov s0, r7
+; BE-I32-NEXT: vmov.32 d12[1], r0
+; BE-I32-NEXT: bl lrintf
+; BE-I32-NEXT: vmov s0, r10
+; BE-I32-NEXT: vmov.32 d15[1], r0
+; BE-I32-NEXT: bl lrintf
+; BE-I32-NEXT: vmov s0, r9
+; BE-I32-NEXT: vmov.32 d14[1], r0
+; BE-I32-NEXT: bl lrintf
+; BE-I32-NEXT: vldr s0, [sp, #12] @ 4-byte Reload
+; BE-I32-NEXT: vmov.32 d11[1], r0
+; BE-I32-NEXT: vmov r0, s0
+; BE-I32-NEXT: bl __aeabi_h2f
+; BE-I32-NEXT: vmov s0, r0
+; BE-I32-NEXT: vmov.32 d10[1], r8
+; BE-I32-NEXT: bl lrintf
+; BE-I32-NEXT: vmov.32 d8[1], r0
+; BE-I32-NEXT: vrev64.32 q0, q5
+; BE-I32-NEXT: vrev64.32 q1, q7
+; BE-I32-NEXT: vrev64.32 q2, q6
+; BE-I32-NEXT: vrev64.32 q3, q4
+; BE-I32-NEXT: add sp, sp, #16
+; BE-I32-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-I32-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, pc}
+;
+; BE-I64-LABEL: lrint_v16f16:
+; BE-I64: @ %bb.0:
+; BE-I64-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; BE-I64-NEXT: push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; BE-I64-NEXT: .pad #4
+; BE-I64-NEXT: sub sp, sp, #4
+; BE-I64-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-I64-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-I64-NEXT: .pad #112
+; BE-I64-NEXT: sub sp, sp, #112
+; BE-I64-NEXT: mov r11, r0
+; BE-I64-NEXT: vmov r0, s14
+; BE-I64-NEXT: vmov.f32 s17, s15
+; BE-I64-NEXT: vstr s13, [sp, #52] @ 4-byte Spill
+; BE-I64-NEXT: vmov.f32 s21, s12
+; BE-I64-NEXT: vstr s10, [sp, #68] @ 4-byte Spill
+; BE-I64-NEXT: vmov.f32 s23, s11
+; BE-I64-NEXT: vstr s7, [sp, #72] @ 4-byte Spill
+; BE-I64-NEXT: vmov.f32 s19, s9
+; BE-I64-NEXT: vstr s4, [sp, #28] @ 4-byte Spill
+; BE-I64-NEXT: vmov.f32 s26, s8
+; BE-I64-NEXT: vmov.f32 s24, s6
+; BE-I64-NEXT: vmov.f32 s18, s5
+; BE-I64-NEXT: vmov.f32 s25, s3
+; BE-I64-NEXT: vmov.f32 s16, s2
+; BE-I64-NEXT: vmov.f32 s27, s1
+; BE-I64-NEXT: vmov.f32 s29, s0
+; BE-I64-NEXT: bl __aeabi_h2f
+; BE-I64-NEXT: vmov s0, r0
+; BE-I64-NEXT: bl lrintf
+; BE-I64-NEXT: mov r8, r0
+; BE-I64-NEXT: vmov r0, s29
+; BE-I64-NEXT: mov r4, r1
+; BE-I64-NEXT: bl __aeabi_h2f
+; BE-I64-NEXT: mov r9, r0
+; BE-I64-NEXT: vmov r0, s27
+; BE-I64-NEXT: bl __aeabi_h2f
+; BE-I64-NEXT: mov r7, r0
+; BE-I64-NEXT: vmov r0, s21
+; BE-I64-NEXT: bl __aeabi_h2f
+; BE-I64-NEXT: mov r6, r0
+; BE-I64-NEXT: vmov r0, s25
+; BE-I64-NEXT: bl __aeabi_h2f
+; BE-I64-NEXT: mov r5, r0
+; BE-I64-NEXT: vmov r0, s23
+; BE-I64-NEXT: bl __aeabi_h2f
+; BE-I64-NEXT: vmov s0, r0
+; BE-I64-NEXT: bl lrintf
+; BE-I64-NEXT: vmov.32 d16[0], r0
+; BE-I64-NEXT: vmov s0, r5
+; BE-I64-NEXT: str r1, [sp, #108] @ 4-byte Spill
+; BE-I64-NEXT: vstr d16, [sp, #96] @ 8-byte Spill
+; BE-I64-NEXT: bl lrintf
+; BE-I64-NEXT: vmov.32 d16[0], r0
+; BE-I64-NEXT: vmov s0, r6
+; BE-I64-NEXT: str r1, [sp, #92] @ 4-byte Spill
+; BE-I64-NEXT: vstr d16, [sp, #80] @ 8-byte Spill
+; BE-I64-NEXT: bl lrintf
+; BE-I64-NEXT: vmov.32 d16[0], r0
+; BE-I64-NEXT: vmov s0, r7
+; BE-I64-NEXT: str r1, [sp, #76] @ 4-byte Spill
+; BE-I64-NEXT: vstr d16, [sp, #56] @ 8-byte Spill
+; BE-I64-NEXT: bl lrintf
+; BE-I64-NEXT: vmov s0, r9
+; BE-I64-NEXT: mov r10, r1
+; BE-I64-NEXT: vmov.32 d14[0], r0
+; BE-I64-NEXT: bl lrintf
+; BE-I64-NEXT: vmov.32 d15[0], r0
+; BE-I64-NEXT: vmov r0, s17
+; BE-I64-NEXT: mov r5, r1
+; BE-I64-NEXT: bl __aeabi_h2f
+; BE-I64-NEXT: vmov s0, r0
+; BE-I64-NEXT: vmov.32 d10[0], r8
+; BE-I64-NEXT: vmov r6, s19
+; BE-I64-NEXT: bl lrintf
+; BE-I64-NEXT: vmov.32 d11[0], r0
+; BE-I64-NEXT: mov r0, r6
+; BE-I64-NEXT: mov r7, r1
+; BE-I64-NEXT: bl __aeabi_h2f
+; BE-I64-NEXT: mov r6, r0
+; BE-I64-NEXT: vmov r0, s18
+; BE-I64-NEXT: vmov.32 d10[1], r4
+; BE-I64-NEXT: vstr d10, [sp, #40] @ 8-byte Spill
+; BE-I64-NEXT: bl __aeabi_h2f
+; BE-I64-NEXT: mov r4, r0
+; BE-I64-NEXT: vmov r0, s16
+; BE-I64-NEXT: vmov.32 d11[1], r7
+; BE-I64-NEXT: vstr d11, [sp, #32] @ 8-byte Spill
+; BE-I64-NEXT: bl __aeabi_h2f
+; BE-I64-NEXT: vmov.32 d15[1], r5
+; BE-I64-NEXT: vmov s0, r0
+; BE-I64-NEXT: vstr d15, [sp, #16] @ 8-byte Spill
+; BE-I64-NEXT: bl lrintf
+; BE-I64-NEXT: vldr s0, [sp, #28] @ 4-byte Reload
+; BE-I64-NEXT: vmov r5, s26
+; BE-I64-NEXT: vmov.32 d16[0], r0
+; BE-I64-NEXT: vmov s26, r4
+; BE-I64-NEXT: vmov r0, s0
+; BE-I64-NEXT: mov r8, r1
+; BE-I64-NEXT: vmov.32 d14[1], r10
+; BE-I64-NEXT: vmov r4, s24
+; BE-I64-NEXT: vstr d16, [sp] @ 8-byte Spill
+; BE-I64-NEXT: vstr d14, [sp, #8] @ 8-byte Spill
+; BE-I64-NEXT: bl __aeabi_h2f
+; BE-I64-NEXT: vmov.f32 s0, s26
+; BE-I64-NEXT: vmov s22, r0
+; BE-I64-NEXT: bl lrintf
+; BE-I64-NEXT: vmov.f32 s0, s22
+; BE-I64-NEXT: mov r7, r1
+; BE-I64-NEXT: vmov.32 d13[0], r0
+; BE-I64-NEXT: vmov s24, r6
+; BE-I64-NEXT: bl lrintf
+; BE-I64-NEXT: vmov.32 d14[0], r0
+; BE-I64-NEXT: mov r0, r4
+; BE-I64-NEXT: mov r6, r1
+; BE-I64-NEXT: bl __aeabi_h2f
+; BE-I64-NEXT: vmov.f32 s0, s24
+; BE-I64-NEXT: vmov s22, r0
+; BE-I64-NEXT: bl lrintf
+; BE-I64-NEXT: vmov.f32 s0, s22
+; BE-I64-NEXT: mov r9, r1
+; BE-I64-NEXT: vmov.32 d12[0], r0
+; BE-I64-NEXT: vmov.32 d14[1], r6
+; BE-I64-NEXT: bl lrintf
+; BE-I64-NEXT: vmov.32 d11[0], r0
+; BE-I64-NEXT: mov r0, r5
+; BE-I64-NEXT: mov r6, r1
+; BE-I64-NEXT: bl __aeabi_h2f
+; BE-I64-NEXT: vldr s0, [sp, #52] @ 4-byte Reload
+; BE-I64-NEXT: mov r4, r0
+; BE-I64-NEXT: vmov.32 d13[1], r7
+; BE-I64-NEXT: vmov r0, s0
+; BE-I64-NEXT: bl __aeabi_h2f
+; BE-I64-NEXT: vldr s0, [sp, #68] @ 4-byte Reload
+; BE-I64-NEXT: vmov s20, r0
+; BE-I64-NEXT: vmov.32 d11[1], r6
+; BE-I64-NEXT: vmov r7, s0
+; BE-I64-NEXT: vldr s0, [sp, #72] @ 4-byte Reload
+; BE-I64-NEXT: vmov r0, s0
+; BE-I64-NEXT: bl __aeabi_h2f
+; BE-I64-NEXT: vmov.f32 s0, s20
+; BE-I64-NEXT: vmov s16, r0
+; BE-I64-NEXT: bl lrintf
+; BE-I64-NEXT: vmov.f32 s0, s16
+; BE-I64-NEXT: mov r5, r1
+; BE-I64-NEXT: vmov.32 d10[0], r0
+; BE-I64-NEXT: vmov s18, r4
+; BE-I64-NEXT: bl lrintf
+; BE-I64-NEXT: vmov.32 d15[0], r0
+; BE-I64-NEXT: mov r0, r7
+; BE-I64-NEXT: mov r4, r1
+; BE-I64-NEXT: bl __aeabi_h2f
+; BE-I64-NEXT: vmov.f32 s0, s18
+; BE-I64-NEXT: vmov s16, r0
+; BE-I64-NEXT: bl lrintf
+; BE-I64-NEXT: vmov.f32 s0, s16
+; BE-I64-NEXT: mov r6, r1
+; BE-I64-NEXT: vmov.32 d9[0], r0
+; BE-I64-NEXT: vmov.32 d15[1], r4
+; BE-I64-NEXT: bl lrintf
+; BE-I64-NEXT: vmov.32 d24[0], r0
+; BE-I64-NEXT: ldr r0, [sp, #76] @ 4-byte Reload
+; BE-I64-NEXT: vldr d23, [sp, #56] @ 8-byte Reload
+; BE-I64-NEXT: vldr d20, [sp, #8] @ 8-byte Reload
+; BE-I64-NEXT: vmov.32 d23[1], r0
+; BE-I64-NEXT: ldr r0, [sp, #92] @ 4-byte Reload
+; BE-I64-NEXT: vldr d22, [sp, #80] @ 8-byte Reload
+; BE-I64-NEXT: vldr d26, [sp, #16] @ 8-byte Reload
+; BE-I64-NEXT: vrev64.32 d21, d20
+; BE-I64-NEXT: vmov.32 d22[1], r0
+; BE-I64-NEXT: ldr r0, [sp, #108] @ 4-byte Reload
+; BE-I64-NEXT: vldr d30, [sp] @ 8-byte Reload
+; BE-I64-NEXT: vldr d25, [sp, #96] @ 8-byte Reload
+; BE-I64-NEXT: vrev64.32 d20, d26
+; BE-I64-NEXT: vldr d26, [sp, #32] @ 8-byte Reload
+; BE-I64-NEXT: vmov.32 d10[1], r5
+; BE-I64-NEXT: vmov.32 d12[1], r9
+; BE-I64-NEXT: vldr d28, [sp, #40] @ 8-byte Reload
+; BE-I64-NEXT: vrev64.32 d27, d26
+; BE-I64-NEXT: vmov.32 d25[1], r0
+; BE-I64-NEXT: add r0, r11, #64
+; BE-I64-NEXT: vmov.32 d30[1], r8
+; BE-I64-NEXT: vmov.32 d9[1], r6
+; BE-I64-NEXT: vrev64.32 d26, d28
+; BE-I64-NEXT: vrev64.32 d29, d10
+; BE-I64-NEXT: vmov.32 d24[1], r1
+; BE-I64-NEXT: vrev64.32 d1, d12
+; BE-I64-NEXT: vrev64.32 d28, d23
+; BE-I64-NEXT: vrev64.32 d23, d22
+; BE-I64-NEXT: vrev64.32 d22, d30
+; BE-I64-NEXT: vrev64.32 d31, d25
+; BE-I64-NEXT: vrev64.32 d0, d9
+; BE-I64-NEXT: vrev64.32 d30, d24
+; BE-I64-NEXT: vst1.64 {d0, d1}, [r0:128]!
+; BE-I64-NEXT: vst1.64 {d30, d31}, [r0:128]!
+; BE-I64-NEXT: vst1.64 {d28, d29}, [r0:128]!
+; BE-I64-NEXT: vrev64.32 d19, d13
+; BE-I64-NEXT: vst1.64 {d26, d27}, [r0:128]
+; BE-I64-NEXT: vst1.64 {d20, d21}, [r11:128]!
+; BE-I64-NEXT: vrev64.32 d18, d14
+; BE-I64-NEXT: vst1.64 {d22, d23}, [r11:128]!
+; BE-I64-NEXT: vrev64.32 d17, d15
+; BE-I64-NEXT: vrev64.32 d16, d11
+; BE-I64-NEXT: vst1.64 {d18, d19}, [r11:128]!
+; BE-I64-NEXT: vst1.64 {d16, d17}, [r11:128]
+; BE-I64-NEXT: add sp, sp, #112
+; BE-I64-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15}
+; BE-I64-NEXT: add sp, sp, #4
+; BE-I64-NEXT: pop {r4, r5, r6, r7, r8, r9, r10, r11, pc}
+ %a = call <16 x iXLen> @llvm.lrint.v16iXLen.v16f16(<16 x half> %x)
+ ret <16 x iXLen> %a
+}
define <1 x iXLen> @lrint_v1f32(<1 x float> %x) {
; LE-I32-LABEL: lrint_v1f32:
diff --git a/llvm/test/CodeGen/ARM/vlldm-vlstm-uops.mir b/llvm/test/CodeGen/ARM/vlldm-vlstm-uops.mir
index 8fa9337..03cb8e3 100644
--- a/llvm/test/CodeGen/ARM/vlldm-vlstm-uops.mir
+++ b/llvm/test/CodeGen/ARM/vlldm-vlstm-uops.mir
@@ -60,9 +60,9 @@ body: |
$sp = t2STMDB_UPD $sp, 14, $noreg, $r4, killed $r5, killed $r6, killed $r7, killed $r8, killed $r9, killed $r10, killed $r11
$r4 = t2BICri $r4, 1, 14, $noreg, $noreg
$sp = tSUBspi $sp, 34, 14, $noreg
- VLSTM $sp, 14 /* CC::al */, $noreg, 0, implicit-def $vpr, implicit-def $fpscr, implicit-def $fpscr_nzcv, implicit undef $vpr, implicit undef $fpscr, implicit undef $fpscr_nzcv, implicit undef $d0, implicit undef $d1, implicit undef $d2, implicit undef $d3, implicit undef $d4, implicit undef $d5, implicit undef $d6, implicit undef $d7, implicit $d8, implicit $d9, implicit $d10, implicit $d11, implicit $d12, implicit $d13, implicit $d14, implicit $d15
+ VLSTM $sp, 14 /* CC::al */, $noreg, 0, implicit-def $vpr, implicit-def $fpscr, implicit-def $fpscr_nzcv, implicit-def $fpscr_rm, implicit undef $vpr, implicit undef $fpscr, implicit undef $fpscr_nzcv, implicit undef $fpscr_rm, implicit undef $d0, implicit undef $d1, implicit undef $d2, implicit undef $d3, implicit undef $d4, implicit undef $d5, implicit undef $d6, implicit undef $d7, implicit $d8, implicit $d9, implicit $d10, implicit $d11, implicit $d12, implicit $d13, implicit $d14, implicit $d15
tBLXNSr 14, $noreg, killed $r4, csr_aapcs, implicit-def $lr, implicit $sp, implicit-def dead $lr, implicit $sp, implicit-def $sp, implicit-def $q0, implicit-def $q1, implicit-def $q2, implicit-def $q3, implicit-def $q4, implicit-def $q5, implicit-def $q6, implicit-def $q7
- VLLDM $sp, 14 /* CC::al */, $noreg, 0, implicit-def $vpr, implicit-def $fpscr, implicit-def $fpscr_nzcv, implicit-def $d0, implicit-def $d1, implicit-def $d2, implicit-def $d3, implicit-def $d4, implicit-def $d5, implicit-def $d6, implicit-def $d7, implicit-def $d8, implicit-def $d9, implicit-def $d10, implicit-def $d11, implicit-def $d12, implicit-def $d13, implicit-def $d14, implicit-def $d15
+ VLLDM $sp, 14 /* CC::al */, $noreg, 0, implicit-def $vpr, implicit-def $fpscr, implicit-def $fpscr_nzcv, implicit-def $fpscr_rm, implicit-def $d0, implicit-def $d1, implicit-def $d2, implicit-def $d3, implicit-def $d4, implicit-def $d5, implicit-def $d6, implicit-def $d7, implicit-def $d8, implicit-def $d9, implicit-def $d10, implicit-def $d11, implicit-def $d12, implicit-def $d13, implicit-def $d14, implicit-def $d15
$sp = tADDspi $sp, 34, 14, $noreg
$sp = t2LDMIA_UPD $sp, 14, $noreg, def $r4, def $r5, def $r6, def $r7, def $r8, def $r9, def $r10, def $r11
$sp = t2LDMIA_RET $sp, 14, $noreg, def $r4, def $pc
diff --git a/llvm/test/CodeGen/BPF/addr-space-memintrinsic-gep.ll b/llvm/test/CodeGen/BPF/addr-space-memintrinsic-gep.ll
new file mode 100644
index 0000000..1db8391
--- /dev/null
+++ b/llvm/test/CodeGen/BPF/addr-space-memintrinsic-gep.ll
@@ -0,0 +1,60 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
+; RUN: opt --bpf-check-and-opt-ir -S -mtriple=bpf-pc-linux < %s | FileCheck %s
+
+@page1 = dso_local local_unnamed_addr addrspace(1) global [10 x ptr] zeroinitializer, align 8
+@page2 = dso_local local_unnamed_addr addrspace(1) global [10 x ptr] zeroinitializer, align 8
+
+define dso_local void @test_memset() local_unnamed_addr {
+; CHECK-LABEL: define dso_local void @test_memset() local_unnamed_addr {
+; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 8 addrspacecast (ptr addrspace(1) getelementptr inbounds nuw (i8, ptr addrspace(1) @page1, i64 16) to ptr), i8 0, i64 16, i1 false)
+; CHECK-NEXT: ret void
+;
+ tail call void @llvm.memset.p1.i64(ptr addrspace(1) noundef nonnull align 8 dereferenceable(16) getelementptr inbounds nuw (i8, ptr addrspace(1) @page1, i64 16), i8 0, i64 16, i1 false)
+ ret void
+}
+
+declare void @llvm.memset.p1.i64(ptr addrspace(1) writeonly captures(none), i8, i64, i1 immarg)
+
+define dso_local void @test_memcpy() local_unnamed_addr {
+; CHECK-LABEL: define dso_local void @test_memcpy() local_unnamed_addr {
+; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 addrspacecast (ptr addrspace(1) getelementptr inbounds nuw (i8, ptr addrspace(1) @page2, i64 8) to ptr), ptr align 8 addrspacecast (ptr addrspace(1) getelementptr inbounds nuw (i8, ptr addrspace(1) @page1, i64 8) to ptr), i64 16, i1 false)
+; CHECK-NEXT: ret void
+;
+ tail call void @llvm.memcpy.p1.p1.i64(ptr addrspace(1) noundef nonnull align 8 dereferenceable(16) getelementptr inbounds nuw (i8, ptr addrspace(1) @page2, i64 8), ptr addrspace(1) noundef nonnull align 8 dereferenceable(16) getelementptr inbounds nuw (i8, ptr addrspace(1) @page1, i64 8), i64 16, i1 false)
+ ret void
+}
+
+declare void @llvm.memcpy.p1.p1.i64(ptr addrspace(1) noalias writeonly captures(none), ptr addrspace(1) noalias readonly captures(none), i64, i1 immarg)
+
+define dso_local void @test_memmove() local_unnamed_addr {
+; CHECK-LABEL: define dso_local void @test_memmove() local_unnamed_addr {
+; CHECK-NEXT: call void @llvm.memmove.p0.p0.i64(ptr align 8 addrspacecast (ptr addrspace(1) getelementptr inbounds nuw (i8, ptr addrspace(1) @page2, i64 16) to ptr), ptr align 8 addrspacecast (ptr addrspace(1) getelementptr inbounds nuw (i8, ptr addrspace(1) @page2, i64 8) to ptr), i64 16, i1 false)
+; CHECK-NEXT: ret void
+;
+ tail call void @llvm.memmove.p1.p1.i64(ptr addrspace(1) noundef nonnull align 8 dereferenceable(16) getelementptr inbounds nuw (i8, ptr addrspace(1) @page2, i64 16), ptr addrspace(1) noundef nonnull align 8 dereferenceable(16) getelementptr inbounds nuw (i8, ptr addrspace(1) @page2, i64 8), i64 16, i1 false)
+ ret void
+}
+
+declare void @llvm.memmove.p1.p1.i64(ptr addrspace(1) writeonly captures(none), ptr addrspace(1) readonly captures(none), i64, i1 immarg)
+
+define dso_local void @test_memset_inline() local_unnamed_addr {
+; CHECK-LABEL: define dso_local void @test_memset_inline() local_unnamed_addr {
+; CHECK-NEXT: call void @llvm.memset.inline.p0.i64(ptr align 8 addrspacecast (ptr addrspace(1) getelementptr inbounds nuw (i8, ptr addrspace(1) @page1, i64 16) to ptr), i8 0, i64 16, i1 false)
+; CHECK-NEXT: ret void
+;
+ tail call void @llvm.memset.inline.p1.i64(ptr addrspace(1) nonnull align 8 getelementptr inbounds nuw (i8, ptr addrspace(1) @page1, i64 16), i8 0, i64 16, i1 false)
+ ret void
+}
+
+declare void @llvm.memset.inline.p1.i64(ptr addrspace(1) writeonly captures(none), i8, i64, i1 immarg)
+
+define dso_local void @test_memcpy_inline() local_unnamed_addr {
+; CHECK-LABEL: define dso_local void @test_memcpy_inline() local_unnamed_addr {
+; CHECK-NEXT: call void @llvm.memcpy.inline.p0.p0.i64(ptr align 8 addrspacecast (ptr addrspace(1) getelementptr inbounds nuw (i8, ptr addrspace(1) @page2, i64 8) to ptr), ptr align 8 addrspacecast (ptr addrspace(1) getelementptr inbounds nuw (i8, ptr addrspace(1) @page1, i64 8) to ptr), i64 16, i1 false)
+; CHECK-NEXT: ret void
+;
+ tail call void @llvm.memcpy.inline.p1.p1.i64(ptr addrspace(1) nonnull align 8 getelementptr inbounds nuw (i8, ptr addrspace(1) @page2, i64 8), ptr addrspace(1) nonnull align 8 getelementptr inbounds nuw (i8, ptr addrspace(1) @page1, i64 8), i64 16, i1 false)
+ ret void
+}
+
+declare void @llvm.memcpy.inline.p1.p1.i64(ptr addrspace(1) noalias writeonly captures(none), ptr addrspace(1) noalias readonly captures(none), i64, i1 immarg)
diff --git a/llvm/test/CodeGen/BPF/addr-space-memintrinsic-no-gep.ll b/llvm/test/CodeGen/BPF/addr-space-memintrinsic-no-gep.ll
new file mode 100644
index 0000000..62fa2e4
--- /dev/null
+++ b/llvm/test/CodeGen/BPF/addr-space-memintrinsic-no-gep.ll
@@ -0,0 +1,49 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
+; RUN: opt --bpf-check-and-opt-ir -S -mtriple=bpf-pc-linux < %s | FileCheck %s
+
+@page1 = dso_local local_unnamed_addr addrspace(1) global [10 x ptr] zeroinitializer, align 8
+@page2 = dso_local local_unnamed_addr addrspace(1) global [10 x ptr] zeroinitializer, align 8
+
+define dso_local void @test_memset() local_unnamed_addr {
+; CHECK-LABEL: define dso_local void @test_memset() local_unnamed_addr {
+; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 8 addrspacecast (ptr addrspace(1) @page1 to ptr), i8 0, i64 16, i1 false)
+; CHECK-NEXT: ret void
+;
+ tail call void @llvm.memset.p1.i64(ptr addrspace(1) noundef align 8 dereferenceable(16) @page1, i8 0, i64 16, i1 false)
+ ret void
+}
+
+declare void @llvm.memset.p1.i64(ptr addrspace(1) writeonly captures(none), i8, i64, i1 immarg)
+
+define dso_local void @test_memcpy() local_unnamed_addr {
+; CHECK-LABEL: define dso_local void @test_memcpy() local_unnamed_addr {
+; CHECK-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 8 addrspacecast (ptr addrspace(1) @page2 to ptr), ptr align 8 addrspacecast (ptr addrspace(1) @page1 to ptr), i64 16, i1 false)
+; CHECK-NEXT: ret void
+;
+ tail call void @llvm.memcpy.p1.p1.i64(ptr addrspace(1) noundef align 8 dereferenceable(16) @page2, ptr addrspace(1) noundef align 8 dereferenceable(16) @page1, i64 16, i1 false)
+ ret void
+}
+
+declare void @llvm.memcpy.p1.p1.i64(ptr addrspace(1) noalias writeonly captures(none), ptr addrspace(1) noalias readonly captures(none), i64, i1 immarg)
+
+define dso_local void @test_memset_inline() local_unnamed_addr {
+; CHECK-LABEL: define dso_local void @test_memset_inline() local_unnamed_addr {
+; CHECK-NEXT: call void @llvm.memset.inline.p0.i64(ptr align 8 addrspacecast (ptr addrspace(1) @page1 to ptr), i8 0, i64 16, i1 false)
+; CHECK-NEXT: ret void
+;
+ tail call void @llvm.memset.inline.p1.i64(ptr addrspace(1) align 8 @page1, i8 0, i64 16, i1 false)
+ ret void
+}
+
+declare void @llvm.memset.inline.p1.i64(ptr addrspace(1) writeonly captures(none), i8, i64, i1 immarg)
+
+define dso_local void @test_memcpy_inline() local_unnamed_addr {
+; CHECK-LABEL: define dso_local void @test_memcpy_inline() local_unnamed_addr {
+; CHECK-NEXT: call void @llvm.memcpy.inline.p0.p0.i64(ptr align 8 addrspacecast (ptr addrspace(1) @page2 to ptr), ptr align 8 addrspacecast (ptr addrspace(1) @page1 to ptr), i64 16, i1 false)
+; CHECK-NEXT: ret void
+;
+ tail call void @llvm.memcpy.inline.p1.p1.i64(ptr addrspace(1) align 8 @page2, ptr addrspace(1) align 8 @page1, i64 16, i1 false)
+ ret void
+}
+
+declare void @llvm.memcpy.inline.p1.p1.i64(ptr addrspace(1) noalias writeonly captures(none), ptr addrspace(1) noalias readonly captures(none), i64, i1 immarg)
diff --git a/llvm/test/CodeGen/MIR2Vec/Inputs/mir2vec_dummy_2D_vocab.json b/llvm/test/CodeGen/MIR2Vec/Inputs/mir2vec_dummy_2D_vocab.json
new file mode 100644
index 0000000..2894fff
--- /dev/null
+++ b/llvm/test/CodeGen/MIR2Vec/Inputs/mir2vec_dummy_2D_vocab.json
@@ -0,0 +1,11 @@
+{
+ "entities" : {
+ "ABS_Fp":[1, 2],
+ "ADC":[3, 4],
+ "ADD":[5, 6],
+ "ADDPDrm":[7, 8],
+ "ADDPDrr":[9, 10],
+ "ADDPSrr":[11, 12],
+ "ADDSDrm":[13, 14]
+ }
+} \ No newline at end of file
diff --git a/llvm/test/CodeGen/MIR2Vec/Inputs/mir2vec_inconsistent_dims.json b/llvm/test/CodeGen/MIR2Vec/Inputs/mir2vec_inconsistent_dims.json
new file mode 100644
index 0000000..bf04163
--- /dev/null
+++ b/llvm/test/CodeGen/MIR2Vec/Inputs/mir2vec_inconsistent_dims.json
@@ -0,0 +1,7 @@
+{
+ "entities": {
+ "ADD": [1.0, 2.0, 3.0],
+ "SUB": [1.5],
+ "MUL": [2.0, 3.0]
+ }
+}
diff --git a/llvm/test/CodeGen/MIR2Vec/Inputs/mir2vec_invalid_vocab.json b/llvm/test/CodeGen/MIR2Vec/Inputs/mir2vec_invalid_vocab.json
new file mode 100644
index 0000000..585a85e
--- /dev/null
+++ b/llvm/test/CodeGen/MIR2Vec/Inputs/mir2vec_invalid_vocab.json
@@ -0,0 +1,5 @@
+{
+ "invalid_structure": {
+ "ADD": [ 1, 2, 3]
+ }
+} \ No newline at end of file
diff --git a/llvm/test/CodeGen/MIR2Vec/Inputs/mir2vec_zero_vocab.json b/llvm/test/CodeGen/MIR2Vec/Inputs/mir2vec_zero_vocab.json
new file mode 100644
index 0000000..63e8ccbd
--- /dev/null
+++ b/llvm/test/CodeGen/MIR2Vec/Inputs/mir2vec_zero_vocab.json
@@ -0,0 +1,12 @@
+{
+ "entities": {
+ "ADD": [],
+ "SUB": [],
+ "MUL": [],
+ "MOV": [],
+ "CMP": [],
+ "JMP": [],
+ "CALL": [],
+ "RET": []
+ }
+} \ No newline at end of file
diff --git a/llvm/test/CodeGen/MIR2Vec/Inputs/reference_x86_vocab_print.txt b/llvm/test/CodeGen/MIR2Vec/Inputs/reference_x86_vocab_print.txt
new file mode 100644
index 0000000..6327cff
--- /dev/null
+++ b/llvm/test/CodeGen/MIR2Vec/Inputs/reference_x86_vocab_print.txt
@@ -0,0 +1,6882 @@
+Key: AAA: [ 0.00 0.00 ]
+Key: AAD: [ 0.00 0.00 ]
+Key: AADD: [ 0.00 0.00 ]
+Key: AAM: [ 0.00 0.00 ]
+Key: AAND: [ 0.00 0.00 ]
+Key: AAS: [ 0.00 0.00 ]
+Key: ABS_F: [ 0.00 0.00 ]
+Key: ABS_Fp: [ 1.00 2.00 ]
+Key: ADC: [ 3.00 4.00 ]
+Key: ADCX: [ 0.00 0.00 ]
+Key: ADD: [ 5.00 6.00 ]
+Key: ADDPDrm: [ 7.00 8.00 ]
+Key: ADDPDrr: [ 9.00 10.00 ]
+Key: ADDPSrm: [ 0.00 0.00 ]
+Key: ADDPSrr: [ 11.00 12.00 ]
+Key: ADDR: [ 0.00 0.00 ]
+Key: ADDSDrm: [ 13.00 14.00 ]
+Key: ADDSDrm_Int: [ 0.00 0.00 ]
+Key: ADDSDrr: [ 0.00 0.00 ]
+Key: ADDSDrr_Int: [ 0.00 0.00 ]
+Key: ADDSSrm: [ 0.00 0.00 ]
+Key: ADDSSrm_Int: [ 0.00 0.00 ]
+Key: ADDSSrr: [ 0.00 0.00 ]
+Key: ADDSSrr_Int: [ 0.00 0.00 ]
+Key: ADDSUBPDrm: [ 0.00 0.00 ]
+Key: ADDSUBPDrr: [ 0.00 0.00 ]
+Key: ADDSUBPSrm: [ 0.00 0.00 ]
+Key: ADDSUBPSrr: [ 0.00 0.00 ]
+Key: ADD_F: [ 0.00 0.00 ]
+Key: ADD_FI: [ 0.00 0.00 ]
+Key: ADD_FPrST: [ 0.00 0.00 ]
+Key: ADD_FST: [ 0.00 0.00 ]
+Key: ADD_Fp: [ 0.00 0.00 ]
+Key: ADD_FpI: [ 0.00 0.00 ]
+Key: ADD_FrST: [ 0.00 0.00 ]
+Key: ADJCALLSTACKDOWN: [ 0.00 0.00 ]
+Key: ADJCALLSTACKUP: [ 0.00 0.00 ]
+Key: ADOX: [ 0.00 0.00 ]
+Key: AESDEC: [ 0.00 0.00 ]
+Key: AESDECLASTrm: [ 0.00 0.00 ]
+Key: AESDECLASTrr: [ 0.00 0.00 ]
+Key: AESDECWIDE: [ 0.00 0.00 ]
+Key: AESDECrm: [ 0.00 0.00 ]
+Key: AESDECrr: [ 0.00 0.00 ]
+Key: AESENC: [ 0.00 0.00 ]
+Key: AESENCLASTrm: [ 0.00 0.00 ]
+Key: AESENCLASTrr: [ 0.00 0.00 ]
+Key: AESENCWIDE: [ 0.00 0.00 ]
+Key: AESENCrm: [ 0.00 0.00 ]
+Key: AESENCrr: [ 0.00 0.00 ]
+Key: AESIMCrm: [ 0.00 0.00 ]
+Key: AESIMCrr: [ 0.00 0.00 ]
+Key: AESKEYGENASSISTrmi: [ 0.00 0.00 ]
+Key: AESKEYGENASSISTrri: [ 0.00 0.00 ]
+Key: AND: [ 0.00 0.00 ]
+Key: ANDN: [ 0.00 0.00 ]
+Key: ANDNPDrm: [ 0.00 0.00 ]
+Key: ANDNPDrr: [ 0.00 0.00 ]
+Key: ANDNPSrm: [ 0.00 0.00 ]
+Key: ANDNPSrr: [ 0.00 0.00 ]
+Key: ANDPDrm: [ 0.00 0.00 ]
+Key: ANDPDrr: [ 0.00 0.00 ]
+Key: ANDPSrm: [ 0.00 0.00 ]
+Key: ANDPSrr: [ 0.00 0.00 ]
+Key: ANNOTATION_LABEL: [ 0.00 0.00 ]
+Key: AOR: [ 0.00 0.00 ]
+Key: ARITH_FENCE: [ 0.00 0.00 ]
+Key: ARPL: [ 0.00 0.00 ]
+Key: ASAN_CHECK_MEMACCESS: [ 0.00 0.00 ]
+Key: AVX: [ 0.00 0.00 ]
+Key: AVX_SET: [ 0.00 0.00 ]
+Key: AXOR: [ 0.00 0.00 ]
+Key: BEXTR: [ 0.00 0.00 ]
+Key: BEXTRI: [ 0.00 0.00 ]
+Key: BLCFILL: [ 0.00 0.00 ]
+Key: BLCI: [ 0.00 0.00 ]
+Key: BLCIC: [ 0.00 0.00 ]
+Key: BLCMSK: [ 0.00 0.00 ]
+Key: BLCS: [ 0.00 0.00 ]
+Key: BLENDPDrmi: [ 0.00 0.00 ]
+Key: BLENDPDrri: [ 0.00 0.00 ]
+Key: BLENDPSrmi: [ 0.00 0.00 ]
+Key: BLENDPSrri: [ 0.00 0.00 ]
+Key: BLENDVPDrm: [ 0.00 0.00 ]
+Key: BLENDVPDrr: [ 0.00 0.00 ]
+Key: BLENDVPSrm: [ 0.00 0.00 ]
+Key: BLENDVPSrr: [ 0.00 0.00 ]
+Key: BLSFILL: [ 0.00 0.00 ]
+Key: BLSI: [ 0.00 0.00 ]
+Key: BLSIC: [ 0.00 0.00 ]
+Key: BLSMSK: [ 0.00 0.00 ]
+Key: BLSR: [ 0.00 0.00 ]
+Key: BOUNDS: [ 0.00 0.00 ]
+Key: BSF: [ 0.00 0.00 ]
+Key: BSR: [ 0.00 0.00 ]
+Key: BSWAP: [ 0.00 0.00 ]
+Key: BT: [ 0.00 0.00 ]
+Key: BTC: [ 0.00 0.00 ]
+Key: BTR: [ 0.00 0.00 ]
+Key: BTS: [ 0.00 0.00 ]
+Key: BUNDLE: [ 0.00 0.00 ]
+Key: BZHI: [ 0.00 0.00 ]
+Key: CALL: [ 0.00 0.00 ]
+Key: CALLpcrel: [ 0.00 0.00 ]
+Key: CATCHRET: [ 0.00 0.00 ]
+Key: CBW: [ 0.00 0.00 ]
+Key: CCMP: [ 0.00 0.00 ]
+Key: CDQ: [ 0.00 0.00 ]
+Key: CDQE: [ 0.00 0.00 ]
+Key: CFCMOV: [ 0.00 0.00 ]
+Key: CFI_INSTRUCTION: [ 0.00 0.00 ]
+Key: CHS_F: [ 0.00 0.00 ]
+Key: CHS_Fp: [ 0.00 0.00 ]
+Key: CLAC: [ 0.00 0.00 ]
+Key: CLC: [ 0.00 0.00 ]
+Key: CLD: [ 0.00 0.00 ]
+Key: CLDEMOTE: [ 0.00 0.00 ]
+Key: CLEANUPRET: [ 0.00 0.00 ]
+Key: CLFLUSH: [ 0.00 0.00 ]
+Key: CLFLUSHOPT: [ 0.00 0.00 ]
+Key: CLGI: [ 0.00 0.00 ]
+Key: CLI: [ 0.00 0.00 ]
+Key: CLRSSBSY: [ 0.00 0.00 ]
+Key: CLTS: [ 0.00 0.00 ]
+Key: CLUI: [ 0.00 0.00 ]
+Key: CLWB: [ 0.00 0.00 ]
+Key: CLZERO: [ 0.00 0.00 ]
+Key: CMC: [ 0.00 0.00 ]
+Key: CMOV: [ 0.00 0.00 ]
+Key: CMOVBE_F: [ 0.00 0.00 ]
+Key: CMOVBE_Fp: [ 0.00 0.00 ]
+Key: CMOVB_F: [ 0.00 0.00 ]
+Key: CMOVB_Fp: [ 0.00 0.00 ]
+Key: CMOVE_F: [ 0.00 0.00 ]
+Key: CMOVE_Fp: [ 0.00 0.00 ]
+Key: CMOVNBE_F: [ 0.00 0.00 ]
+Key: CMOVNBE_Fp: [ 0.00 0.00 ]
+Key: CMOVNB_F: [ 0.00 0.00 ]
+Key: CMOVNB_Fp: [ 0.00 0.00 ]
+Key: CMOVNE_F: [ 0.00 0.00 ]
+Key: CMOVNE_Fp: [ 0.00 0.00 ]
+Key: CMOVNP_F: [ 0.00 0.00 ]
+Key: CMOVNP_Fp: [ 0.00 0.00 ]
+Key: CMOVP_F: [ 0.00 0.00 ]
+Key: CMOVP_Fp: [ 0.00 0.00 ]
+Key: CMOV_FR: [ 0.00 0.00 ]
+Key: CMOV_GR: [ 0.00 0.00 ]
+Key: CMOV_RFP: [ 0.00 0.00 ]
+Key: CMOV_VK: [ 0.00 0.00 ]
+Key: CMOV_VR: [ 0.00 0.00 ]
+Key: CMP: [ 0.00 0.00 ]
+Key: CMPCCXADDmr: [ 0.00 0.00 ]
+Key: CMPPDrmi: [ 0.00 0.00 ]
+Key: CMPPDrri: [ 0.00 0.00 ]
+Key: CMPPSrmi: [ 0.00 0.00 ]
+Key: CMPPSrri: [ 0.00 0.00 ]
+Key: CMPSB: [ 0.00 0.00 ]
+Key: CMPSDrmi: [ 0.00 0.00 ]
+Key: CMPSDrmi_Int: [ 0.00 0.00 ]
+Key: CMPSDrri: [ 0.00 0.00 ]
+Key: CMPSDrri_Int: [ 0.00 0.00 ]
+Key: CMPSL: [ 0.00 0.00 ]
+Key: CMPSQ: [ 0.00 0.00 ]
+Key: CMPSSrmi: [ 0.00 0.00 ]
+Key: CMPSSrmi_Int: [ 0.00 0.00 ]
+Key: CMPSSrri: [ 0.00 0.00 ]
+Key: CMPSSrri_Int: [ 0.00 0.00 ]
+Key: CMPSW: [ 0.00 0.00 ]
+Key: CMPXCHG: [ 0.00 0.00 ]
+Key: COMISDrm: [ 0.00 0.00 ]
+Key: COMISDrm_Int: [ 0.00 0.00 ]
+Key: COMISDrr: [ 0.00 0.00 ]
+Key: COMISDrr_Int: [ 0.00 0.00 ]
+Key: COMISSrm: [ 0.00 0.00 ]
+Key: COMISSrm_Int: [ 0.00 0.00 ]
+Key: COMISSrr: [ 0.00 0.00 ]
+Key: COMISSrr_Int: [ 0.00 0.00 ]
+Key: COMP_FST: [ 0.00 0.00 ]
+Key: COM_FIPr: [ 0.00 0.00 ]
+Key: COM_FIr: [ 0.00 0.00 ]
+Key: COM_FST: [ 0.00 0.00 ]
+Key: COM_FpIr: [ 0.00 0.00 ]
+Key: COM_Fpr: [ 0.00 0.00 ]
+Key: CONVERGENCECTRL_ANCHOR: [ 0.00 0.00 ]
+Key: CONVERGENCECTRL_ENTRY: [ 0.00 0.00 ]
+Key: CONVERGENCECTRL_GLUE: [ 0.00 0.00 ]
+Key: CONVERGENCECTRL_LOOP: [ 0.00 0.00 ]
+Key: COPY: [ 0.00 0.00 ]
+Key: COPY_TO_REGCLASS: [ 0.00 0.00 ]
+Key: CPUID: [ 0.00 0.00 ]
+Key: CQO: [ 0.00 0.00 ]
+Key: CRC: [ 0.00 0.00 ]
+Key: CS_PREFIX: [ 0.00 0.00 ]
+Key: CTEST: [ 0.00 0.00 ]
+Key: CVTDQ: [ 0.00 0.00 ]
+Key: CVTPD: [ 0.00 0.00 ]
+Key: CVTPS: [ 0.00 0.00 ]
+Key: CVTSD: [ 0.00 0.00 ]
+Key: CVTSI: [ 0.00 0.00 ]
+Key: CVTSS: [ 0.00 0.00 ]
+Key: CVTTPD: [ 0.00 0.00 ]
+Key: CVTTPS: [ 0.00 0.00 ]
+Key: CVTTSD: [ 0.00 0.00 ]
+Key: CVTTSS: [ 0.00 0.00 ]
+Key: CWD: [ 0.00 0.00 ]
+Key: CWDE: [ 0.00 0.00 ]
+Key: DAA: [ 0.00 0.00 ]
+Key: DAS: [ 0.00 0.00 ]
+Key: DATA: [ 0.00 0.00 ]
+Key: DBG_INSTR_REF: [ 0.00 0.00 ]
+Key: DBG_LABEL: [ 0.00 0.00 ]
+Key: DBG_PHI: [ 0.00 0.00 ]
+Key: DBG_VALUE: [ 0.00 0.00 ]
+Key: DBG_VALUE_LIST: [ 0.00 0.00 ]
+Key: DEC: [ 0.00 0.00 ]
+Key: DIV: [ 0.00 0.00 ]
+Key: DIVPDrm: [ 0.00 0.00 ]
+Key: DIVPDrr: [ 0.00 0.00 ]
+Key: DIVPSrm: [ 0.00 0.00 ]
+Key: DIVPSrr: [ 0.00 0.00 ]
+Key: DIVR_F: [ 0.00 0.00 ]
+Key: DIVR_FI: [ 0.00 0.00 ]
+Key: DIVR_FPrST: [ 0.00 0.00 ]
+Key: DIVR_FST: [ 0.00 0.00 ]
+Key: DIVR_Fp: [ 0.00 0.00 ]
+Key: DIVR_FpI: [ 0.00 0.00 ]
+Key: DIVR_FrST: [ 0.00 0.00 ]
+Key: DIVSDrm: [ 0.00 0.00 ]
+Key: DIVSDrm_Int: [ 0.00 0.00 ]
+Key: DIVSDrr: [ 0.00 0.00 ]
+Key: DIVSDrr_Int: [ 0.00 0.00 ]
+Key: DIVSSrm: [ 0.00 0.00 ]
+Key: DIVSSrm_Int: [ 0.00 0.00 ]
+Key: DIVSSrr: [ 0.00 0.00 ]
+Key: DIVSSrr_Int: [ 0.00 0.00 ]
+Key: DIV_F: [ 0.00 0.00 ]
+Key: DIV_FI: [ 0.00 0.00 ]
+Key: DIV_FPrST: [ 0.00 0.00 ]
+Key: DIV_FST: [ 0.00 0.00 ]
+Key: DIV_Fp: [ 0.00 0.00 ]
+Key: DIV_FpI: [ 0.00 0.00 ]
+Key: DIV_FrST: [ 0.00 0.00 ]
+Key: DPPDrmi: [ 0.00 0.00 ]
+Key: DPPDrri: [ 0.00 0.00 ]
+Key: DPPSrmi: [ 0.00 0.00 ]
+Key: DPPSrri: [ 0.00 0.00 ]
+Key: DS_PREFIX: [ 0.00 0.00 ]
+Key: DYN_ALLOCA: [ 0.00 0.00 ]
+Key: EH_LABEL: [ 0.00 0.00 ]
+Key: EH_RETURN: [ 0.00 0.00 ]
+Key: EH_SjLj_LongJmp: [ 0.00 0.00 ]
+Key: EH_SjLj_SetJmp: [ 0.00 0.00 ]
+Key: EH_SjLj_Setup: [ 0.00 0.00 ]
+Key: ENCLS: [ 0.00 0.00 ]
+Key: ENCLU: [ 0.00 0.00 ]
+Key: ENCLV: [ 0.00 0.00 ]
+Key: ENCODEKEY: [ 0.00 0.00 ]
+Key: ENDBR: [ 0.00 0.00 ]
+Key: ENQCMD: [ 0.00 0.00 ]
+Key: ENQCMDS: [ 0.00 0.00 ]
+Key: ENTER: [ 0.00 0.00 ]
+Key: ERETS: [ 0.00 0.00 ]
+Key: ERETU: [ 0.00 0.00 ]
+Key: ES_PREFIX: [ 0.00 0.00 ]
+Key: EXTRACTPSmri: [ 0.00 0.00 ]
+Key: EXTRACTPSrri: [ 0.00 0.00 ]
+Key: EXTRACT_SUBREG: [ 0.00 0.00 ]
+Key: EXTRQ: [ 0.00 0.00 ]
+Key: EXTRQI: [ 0.00 0.00 ]
+Key: F: [ 0.00 0.00 ]
+Key: FAKE_USE: [ 0.00 0.00 ]
+Key: FARCALL: [ 0.00 0.00 ]
+Key: FARJMP: [ 0.00 0.00 ]
+Key: FAULTING_OP: [ 0.00 0.00 ]
+Key: FBLDm: [ 0.00 0.00 ]
+Key: FBSTPm: [ 0.00 0.00 ]
+Key: FCOM: [ 0.00 0.00 ]
+Key: FCOMP: [ 0.00 0.00 ]
+Key: FCOMPP: [ 0.00 0.00 ]
+Key: FCOS: [ 0.00 0.00 ]
+Key: FDECSTP: [ 0.00 0.00 ]
+Key: FEMMS: [ 0.00 0.00 ]
+Key: FENTRY_CALL: [ 0.00 0.00 ]
+Key: FFREE: [ 0.00 0.00 ]
+Key: FFREEP: [ 0.00 0.00 ]
+Key: FICOM: [ 0.00 0.00 ]
+Key: FICOMP: [ 0.00 0.00 ]
+Key: FINCSTP: [ 0.00 0.00 ]
+Key: FLDCW: [ 0.00 0.00 ]
+Key: FLDENVm: [ 0.00 0.00 ]
+Key: FLDL: [ 0.00 0.00 ]
+Key: FLDLG: [ 0.00 0.00 ]
+Key: FLDLN: [ 0.00 0.00 ]
+Key: FLDPI: [ 0.00 0.00 ]
+Key: FNCLEX: [ 0.00 0.00 ]
+Key: FNINIT: [ 0.00 0.00 ]
+Key: FNOP: [ 0.00 0.00 ]
+Key: FNSTCW: [ 0.00 0.00 ]
+Key: FNSTSW: [ 0.00 0.00 ]
+Key: FNSTSWm: [ 0.00 0.00 ]
+Key: FP: [ 0.00 0.00 ]
+Key: FPATAN: [ 0.00 0.00 ]
+Key: FPREM: [ 0.00 0.00 ]
+Key: FPTAN: [ 0.00 0.00 ]
+Key: FRNDINT: [ 0.00 0.00 ]
+Key: FRSTORm: [ 0.00 0.00 ]
+Key: FSAVEm: [ 0.00 0.00 ]
+Key: FSCALE: [ 0.00 0.00 ]
+Key: FSIN: [ 0.00 0.00 ]
+Key: FSINCOS: [ 0.00 0.00 ]
+Key: FSTENVm: [ 0.00 0.00 ]
+Key: FS_PREFIX: [ 0.00 0.00 ]
+Key: FXRSTOR: [ 0.00 0.00 ]
+Key: FXSAVE: [ 0.00 0.00 ]
+Key: FXTRACT: [ 0.00 0.00 ]
+Key: FYL: [ 0.00 0.00 ]
+Key: FsFLD: [ 0.00 0.00 ]
+Key: GC_LABEL: [ 0.00 0.00 ]
+Key: GETSEC: [ 0.00 0.00 ]
+Key: GF: [ 0.00 0.00 ]
+Key: GS_PREFIX: [ 0.00 0.00 ]
+Key: G_ABDS: [ 0.00 0.00 ]
+Key: G_ABDU: [ 0.00 0.00 ]
+Key: G_ABS: [ 0.00 0.00 ]
+Key: G_ADD: [ 0.00 0.00 ]
+Key: G_ADDRSPACE_CAST: [ 0.00 0.00 ]
+Key: G_AND: [ 0.00 0.00 ]
+Key: G_ANYEXT: [ 0.00 0.00 ]
+Key: G_ASHR: [ 0.00 0.00 ]
+Key: G_ASSERT_ALIGN: [ 0.00 0.00 ]
+Key: G_ASSERT_SEXT: [ 0.00 0.00 ]
+Key: G_ASSERT_ZEXT: [ 0.00 0.00 ]
+Key: G_ATOMICRMW_ADD: [ 0.00 0.00 ]
+Key: G_ATOMICRMW_AND: [ 0.00 0.00 ]
+Key: G_ATOMICRMW_FADD: [ 0.00 0.00 ]
+Key: G_ATOMICRMW_FMAX: [ 0.00 0.00 ]
+Key: G_ATOMICRMW_FMAXIMUM: [ 0.00 0.00 ]
+Key: G_ATOMICRMW_FMIN: [ 0.00 0.00 ]
+Key: G_ATOMICRMW_FMINIMUM: [ 0.00 0.00 ]
+Key: G_ATOMICRMW_FSUB: [ 0.00 0.00 ]
+Key: G_ATOMICRMW_MAX: [ 0.00 0.00 ]
+Key: G_ATOMICRMW_MIN: [ 0.00 0.00 ]
+Key: G_ATOMICRMW_NAND: [ 0.00 0.00 ]
+Key: G_ATOMICRMW_OR: [ 0.00 0.00 ]
+Key: G_ATOMICRMW_SUB: [ 0.00 0.00 ]
+Key: G_ATOMICRMW_UDEC_WRAP: [ 0.00 0.00 ]
+Key: G_ATOMICRMW_UINC_WRAP: [ 0.00 0.00 ]
+Key: G_ATOMICRMW_UMAX: [ 0.00 0.00 ]
+Key: G_ATOMICRMW_UMIN: [ 0.00 0.00 ]
+Key: G_ATOMICRMW_USUB_COND: [ 0.00 0.00 ]
+Key: G_ATOMICRMW_USUB_SAT: [ 0.00 0.00 ]
+Key: G_ATOMICRMW_XCHG: [ 0.00 0.00 ]
+Key: G_ATOMICRMW_XOR: [ 0.00 0.00 ]
+Key: G_ATOMIC_CMPXCHG: [ 0.00 0.00 ]
+Key: G_ATOMIC_CMPXCHG_WITH_SUCCESS: [ 0.00 0.00 ]
+Key: G_BITCAST: [ 0.00 0.00 ]
+Key: G_BITREVERSE: [ 0.00 0.00 ]
+Key: G_BLOCK_ADDR: [ 0.00 0.00 ]
+Key: G_BR: [ 0.00 0.00 ]
+Key: G_BRCOND: [ 0.00 0.00 ]
+Key: G_BRINDIRECT: [ 0.00 0.00 ]
+Key: G_BRJT: [ 0.00 0.00 ]
+Key: G_BSWAP: [ 0.00 0.00 ]
+Key: G_BUILD_VECTOR: [ 0.00 0.00 ]
+Key: G_BUILD_VECTOR_TRUNC: [ 0.00 0.00 ]
+Key: G_BZERO: [ 0.00 0.00 ]
+Key: G_CONCAT_VECTORS: [ 0.00 0.00 ]
+Key: G_CONSTANT: [ 0.00 0.00 ]
+Key: G_CONSTANT_FOLD_BARRIER: [ 0.00 0.00 ]
+Key: G_CONSTANT_POOL: [ 0.00 0.00 ]
+Key: G_CTLZ: [ 0.00 0.00 ]
+Key: G_CTLZ_ZERO_UNDEF: [ 0.00 0.00 ]
+Key: G_CTPOP: [ 0.00 0.00 ]
+Key: G_CTTZ: [ 0.00 0.00 ]
+Key: G_CTTZ_ZERO_UNDEF: [ 0.00 0.00 ]
+Key: G_DEBUGTRAP: [ 0.00 0.00 ]
+Key: G_DYN_STACKALLOC: [ 0.00 0.00 ]
+Key: G_EXTRACT: [ 0.00 0.00 ]
+Key: G_EXTRACT_SUBVECTOR: [ 0.00 0.00 ]
+Key: G_EXTRACT_VECTOR_ELT: [ 0.00 0.00 ]
+Key: G_FABS: [ 0.00 0.00 ]
+Key: G_FACOS: [ 0.00 0.00 ]
+Key: G_FADD: [ 0.00 0.00 ]
+Key: G_FASIN: [ 0.00 0.00 ]
+Key: G_FATAN: [ 0.00 0.00 ]
+Key: G_FCANONICALIZE: [ 0.00 0.00 ]
+Key: G_FCEIL: [ 0.00 0.00 ]
+Key: G_FCMP: [ 0.00 0.00 ]
+Key: G_FCONSTANT: [ 0.00 0.00 ]
+Key: G_FCOPYSIGN: [ 0.00 0.00 ]
+Key: G_FCOS: [ 0.00 0.00 ]
+Key: G_FCOSH: [ 0.00 0.00 ]
+Key: G_FDIV: [ 0.00 0.00 ]
+Key: G_FENCE: [ 0.00 0.00 ]
+Key: G_FEXP: [ 0.00 0.00 ]
+Key: G_FFLOOR: [ 0.00 0.00 ]
+Key: G_FFREXP: [ 0.00 0.00 ]
+Key: G_FILD: [ 0.00 0.00 ]
+Key: G_FIST: [ 0.00 0.00 ]
+Key: G_FLDCW: [ 0.00 0.00 ]
+Key: G_FLDEXP: [ 0.00 0.00 ]
+Key: G_FLOG: [ 0.00 0.00 ]
+Key: G_FMA: [ 0.00 0.00 ]
+Key: G_FMAD: [ 0.00 0.00 ]
+Key: G_FMAXIMUM: [ 0.00 0.00 ]
+Key: G_FMAXIMUMNUM: [ 0.00 0.00 ]
+Key: G_FMAXNUM: [ 0.00 0.00 ]
+Key: G_FMAXNUM_IEEE: [ 0.00 0.00 ]
+Key: G_FMINIMUM: [ 0.00 0.00 ]
+Key: G_FMINIMUMNUM: [ 0.00 0.00 ]
+Key: G_FMINNUM: [ 0.00 0.00 ]
+Key: G_FMINNUM_IEEE: [ 0.00 0.00 ]
+Key: G_FMODF: [ 0.00 0.00 ]
+Key: G_FMUL: [ 0.00 0.00 ]
+Key: G_FNEARBYINT: [ 0.00 0.00 ]
+Key: G_FNEG: [ 0.00 0.00 ]
+Key: G_FNSTCW: [ 0.00 0.00 ]
+Key: G_FPEXT: [ 0.00 0.00 ]
+Key: G_FPOW: [ 0.00 0.00 ]
+Key: G_FPOWI: [ 0.00 0.00 ]
+Key: G_FPTOSI: [ 0.00 0.00 ]
+Key: G_FPTOSI_SAT: [ 0.00 0.00 ]
+Key: G_FPTOUI: [ 0.00 0.00 ]
+Key: G_FPTOUI_SAT: [ 0.00 0.00 ]
+Key: G_FPTRUNC: [ 0.00 0.00 ]
+Key: G_FRAME_INDEX: [ 0.00 0.00 ]
+Key: G_FREEZE: [ 0.00 0.00 ]
+Key: G_FREM: [ 0.00 0.00 ]
+Key: G_FRINT: [ 0.00 0.00 ]
+Key: G_FSHL: [ 0.00 0.00 ]
+Key: G_FSHR: [ 0.00 0.00 ]
+Key: G_FSIN: [ 0.00 0.00 ]
+Key: G_FSINCOS: [ 0.00 0.00 ]
+Key: G_FSINH: [ 0.00 0.00 ]
+Key: G_FSQRT: [ 0.00 0.00 ]
+Key: G_FSUB: [ 0.00 0.00 ]
+Key: G_FTAN: [ 0.00 0.00 ]
+Key: G_FTANH: [ 0.00 0.00 ]
+Key: G_GET_FPENV: [ 0.00 0.00 ]
+Key: G_GET_FPMODE: [ 0.00 0.00 ]
+Key: G_GET_ROUNDING: [ 0.00 0.00 ]
+Key: G_GLOBAL_VALUE: [ 0.00 0.00 ]
+Key: G_ICMP: [ 0.00 0.00 ]
+Key: G_IMPLICIT_DEF: [ 0.00 0.00 ]
+Key: G_INDEXED_LOAD: [ 0.00 0.00 ]
+Key: G_INDEXED_SEXTLOAD: [ 0.00 0.00 ]
+Key: G_INDEXED_STORE: [ 0.00 0.00 ]
+Key: G_INDEXED_ZEXTLOAD: [ 0.00 0.00 ]
+Key: G_INSERT: [ 0.00 0.00 ]
+Key: G_INSERT_SUBVECTOR: [ 0.00 0.00 ]
+Key: G_INSERT_VECTOR_ELT: [ 0.00 0.00 ]
+Key: G_INTRINSIC: [ 0.00 0.00 ]
+Key: G_INTRINSIC_CONVERGENT: [ 0.00 0.00 ]
+Key: G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS: [ 0.00 0.00 ]
+Key: G_INTRINSIC_FPTRUNC_ROUND: [ 0.00 0.00 ]
+Key: G_INTRINSIC_LLRINT: [ 0.00 0.00 ]
+Key: G_INTRINSIC_LRINT: [ 0.00 0.00 ]
+Key: G_INTRINSIC_ROUND: [ 0.00 0.00 ]
+Key: G_INTRINSIC_ROUNDEVEN: [ 0.00 0.00 ]
+Key: G_INTRINSIC_TRUNC: [ 0.00 0.00 ]
+Key: G_INTRINSIC_W_SIDE_EFFECTS: [ 0.00 0.00 ]
+Key: G_INTTOPTR: [ 0.00 0.00 ]
+Key: G_INVOKE_REGION_START: [ 0.00 0.00 ]
+Key: G_IS_FPCLASS: [ 0.00 0.00 ]
+Key: G_JUMP_TABLE: [ 0.00 0.00 ]
+Key: G_LLROUND: [ 0.00 0.00 ]
+Key: G_LOAD: [ 0.00 0.00 ]
+Key: G_LROUND: [ 0.00 0.00 ]
+Key: G_LSHR: [ 0.00 0.00 ]
+Key: G_MEMCPY: [ 0.00 0.00 ]
+Key: G_MEMCPY_INLINE: [ 0.00 0.00 ]
+Key: G_MEMMOVE: [ 0.00 0.00 ]
+Key: G_MEMSET: [ 0.00 0.00 ]
+Key: G_MERGE_VALUES: [ 0.00 0.00 ]
+Key: G_MUL: [ 0.00 0.00 ]
+Key: G_OR: [ 0.00 0.00 ]
+Key: G_PHI: [ 0.00 0.00 ]
+Key: G_PREFETCH: [ 0.00 0.00 ]
+Key: G_PTRAUTH_GLOBAL_VALUE: [ 0.00 0.00 ]
+Key: G_PTRMASK: [ 0.00 0.00 ]
+Key: G_PTRTOINT: [ 0.00 0.00 ]
+Key: G_PTR_ADD: [ 0.00 0.00 ]
+Key: G_READCYCLECOUNTER: [ 0.00 0.00 ]
+Key: G_READSTEADYCOUNTER: [ 0.00 0.00 ]
+Key: G_READ_REGISTER: [ 0.00 0.00 ]
+Key: G_RESET_FPENV: [ 0.00 0.00 ]
+Key: G_RESET_FPMODE: [ 0.00 0.00 ]
+Key: G_ROTL: [ 0.00 0.00 ]
+Key: G_ROTR: [ 0.00 0.00 ]
+Key: G_SADDE: [ 0.00 0.00 ]
+Key: G_SADDO: [ 0.00 0.00 ]
+Key: G_SADDSAT: [ 0.00 0.00 ]
+Key: G_SBFX: [ 0.00 0.00 ]
+Key: G_SCMP: [ 0.00 0.00 ]
+Key: G_SDIV: [ 0.00 0.00 ]
+Key: G_SDIVFIX: [ 0.00 0.00 ]
+Key: G_SDIVFIXSAT: [ 0.00 0.00 ]
+Key: G_SDIVREM: [ 0.00 0.00 ]
+Key: G_SELECT: [ 0.00 0.00 ]
+Key: G_SET_FPENV: [ 0.00 0.00 ]
+Key: G_SET_FPMODE: [ 0.00 0.00 ]
+Key: G_SET_ROUNDING: [ 0.00 0.00 ]
+Key: G_SEXT: [ 0.00 0.00 ]
+Key: G_SEXTLOAD: [ 0.00 0.00 ]
+Key: G_SEXT_INREG: [ 0.00 0.00 ]
+Key: G_SHL: [ 0.00 0.00 ]
+Key: G_SHUFFLE_VECTOR: [ 0.00 0.00 ]
+Key: G_SITOFP: [ 0.00 0.00 ]
+Key: G_SMAX: [ 0.00 0.00 ]
+Key: G_SMIN: [ 0.00 0.00 ]
+Key: G_SMULFIX: [ 0.00 0.00 ]
+Key: G_SMULFIXSAT: [ 0.00 0.00 ]
+Key: G_SMULH: [ 0.00 0.00 ]
+Key: G_SMULO: [ 0.00 0.00 ]
+Key: G_SPLAT_VECTOR: [ 0.00 0.00 ]
+Key: G_SREM: [ 0.00 0.00 ]
+Key: G_SSHLSAT: [ 0.00 0.00 ]
+Key: G_SSUBE: [ 0.00 0.00 ]
+Key: G_SSUBO: [ 0.00 0.00 ]
+Key: G_SSUBSAT: [ 0.00 0.00 ]
+Key: G_STACKRESTORE: [ 0.00 0.00 ]
+Key: G_STACKSAVE: [ 0.00 0.00 ]
+Key: G_STEP_VECTOR: [ 0.00 0.00 ]
+Key: G_STORE: [ 0.00 0.00 ]
+Key: G_STRICT_FADD: [ 0.00 0.00 ]
+Key: G_STRICT_FDIV: [ 0.00 0.00 ]
+Key: G_STRICT_FLDEXP: [ 0.00 0.00 ]
+Key: G_STRICT_FMA: [ 0.00 0.00 ]
+Key: G_STRICT_FMUL: [ 0.00 0.00 ]
+Key: G_STRICT_FREM: [ 0.00 0.00 ]
+Key: G_STRICT_FSQRT: [ 0.00 0.00 ]
+Key: G_STRICT_FSUB: [ 0.00 0.00 ]
+Key: G_SUB: [ 0.00 0.00 ]
+Key: G_TRAP: [ 0.00 0.00 ]
+Key: G_TRUNC: [ 0.00 0.00 ]
+Key: G_TRUNC_SSAT_S: [ 0.00 0.00 ]
+Key: G_TRUNC_SSAT_U: [ 0.00 0.00 ]
+Key: G_TRUNC_USAT_U: [ 0.00 0.00 ]
+Key: G_UADDE: [ 0.00 0.00 ]
+Key: G_UADDO: [ 0.00 0.00 ]
+Key: G_UADDSAT: [ 0.00 0.00 ]
+Key: G_UBFX: [ 0.00 0.00 ]
+Key: G_UBSANTRAP: [ 0.00 0.00 ]
+Key: G_UCMP: [ 0.00 0.00 ]
+Key: G_UDIV: [ 0.00 0.00 ]
+Key: G_UDIVFIX: [ 0.00 0.00 ]
+Key: G_UDIVFIXSAT: [ 0.00 0.00 ]
+Key: G_UDIVREM: [ 0.00 0.00 ]
+Key: G_UITOFP: [ 0.00 0.00 ]
+Key: G_UMAX: [ 0.00 0.00 ]
+Key: G_UMIN: [ 0.00 0.00 ]
+Key: G_UMULFIX: [ 0.00 0.00 ]
+Key: G_UMULFIXSAT: [ 0.00 0.00 ]
+Key: G_UMULH: [ 0.00 0.00 ]
+Key: G_UMULO: [ 0.00 0.00 ]
+Key: G_UNMERGE_VALUES: [ 0.00 0.00 ]
+Key: G_UREM: [ 0.00 0.00 ]
+Key: G_USHLSAT: [ 0.00 0.00 ]
+Key: G_USUBE: [ 0.00 0.00 ]
+Key: G_USUBO: [ 0.00 0.00 ]
+Key: G_USUBSAT: [ 0.00 0.00 ]
+Key: G_VAARG: [ 0.00 0.00 ]
+Key: G_VASTART: [ 0.00 0.00 ]
+Key: G_VECREDUCE_ADD: [ 0.00 0.00 ]
+Key: G_VECREDUCE_AND: [ 0.00 0.00 ]
+Key: G_VECREDUCE_FADD: [ 0.00 0.00 ]
+Key: G_VECREDUCE_FMAX: [ 0.00 0.00 ]
+Key: G_VECREDUCE_FMAXIMUM: [ 0.00 0.00 ]
+Key: G_VECREDUCE_FMIN: [ 0.00 0.00 ]
+Key: G_VECREDUCE_FMINIMUM: [ 0.00 0.00 ]
+Key: G_VECREDUCE_FMUL: [ 0.00 0.00 ]
+Key: G_VECREDUCE_MUL: [ 0.00 0.00 ]
+Key: G_VECREDUCE_OR: [ 0.00 0.00 ]
+Key: G_VECREDUCE_SEQ_FADD: [ 0.00 0.00 ]
+Key: G_VECREDUCE_SEQ_FMUL: [ 0.00 0.00 ]
+Key: G_VECREDUCE_SMAX: [ 0.00 0.00 ]
+Key: G_VECREDUCE_SMIN: [ 0.00 0.00 ]
+Key: G_VECREDUCE_UMAX: [ 0.00 0.00 ]
+Key: G_VECREDUCE_UMIN: [ 0.00 0.00 ]
+Key: G_VECREDUCE_XOR: [ 0.00 0.00 ]
+Key: G_VECTOR_COMPRESS: [ 0.00 0.00 ]
+Key: G_VSCALE: [ 0.00 0.00 ]
+Key: G_WRITE_REGISTER: [ 0.00 0.00 ]
+Key: G_XOR: [ 0.00 0.00 ]
+Key: G_ZEXT: [ 0.00 0.00 ]
+Key: G_ZEXTLOAD: [ 0.00 0.00 ]
+Key: HADDPDrm: [ 0.00 0.00 ]
+Key: HADDPDrr: [ 0.00 0.00 ]
+Key: HADDPSrm: [ 0.00 0.00 ]
+Key: HADDPSrr: [ 0.00 0.00 ]
+Key: HLT: [ 0.00 0.00 ]
+Key: HRESET: [ 0.00 0.00 ]
+Key: HSUBPDrm: [ 0.00 0.00 ]
+Key: HSUBPDrr: [ 0.00 0.00 ]
+Key: HSUBPSrm: [ 0.00 0.00 ]
+Key: HSUBPSrr: [ 0.00 0.00 ]
+Key: ICALL_BRANCH_FUNNEL: [ 0.00 0.00 ]
+Key: IDIV: [ 0.00 0.00 ]
+Key: ILD_F: [ 0.00 0.00 ]
+Key: ILD_Fp: [ 0.00 0.00 ]
+Key: IMPLICIT_DEF: [ 0.00 0.00 ]
+Key: IMUL: [ 0.00 0.00 ]
+Key: IMULZU: [ 0.00 0.00 ]
+Key: IN: [ 0.00 0.00 ]
+Key: INC: [ 0.00 0.00 ]
+Key: INCSSPD: [ 0.00 0.00 ]
+Key: INCSSPQ: [ 0.00 0.00 ]
+Key: INDIRECT_THUNK_CALL: [ 0.00 0.00 ]
+Key: INDIRECT_THUNK_TCRETURN: [ 0.00 0.00 ]
+Key: INIT_UNDEF: [ 0.00 0.00 ]
+Key: INLINEASM: [ 0.00 0.00 ]
+Key: INLINEASM_BR: [ 0.00 0.00 ]
+Key: INSB: [ 0.00 0.00 ]
+Key: INSERTPSrmi: [ 0.00 0.00 ]
+Key: INSERTPSrri: [ 0.00 0.00 ]
+Key: INSERTQ: [ 0.00 0.00 ]
+Key: INSERTQI: [ 0.00 0.00 ]
+Key: INSERT_SUBREG: [ 0.00 0.00 ]
+Key: INSL: [ 0.00 0.00 ]
+Key: INSW: [ 0.00 0.00 ]
+Key: INT: [ 0.00 0.00 ]
+Key: INTO: [ 0.00 0.00 ]
+Key: INVD: [ 0.00 0.00 ]
+Key: INVEPT: [ 0.00 0.00 ]
+Key: INVLPG: [ 0.00 0.00 ]
+Key: INVLPGA: [ 0.00 0.00 ]
+Key: INVLPGB: [ 0.00 0.00 ]
+Key: INVPCID: [ 0.00 0.00 ]
+Key: INVVPID: [ 0.00 0.00 ]
+Key: IRET: [ 0.00 0.00 ]
+Key: ISTT_FP: [ 0.00 0.00 ]
+Key: ISTT_Fp: [ 0.00 0.00 ]
+Key: IST_F: [ 0.00 0.00 ]
+Key: IST_FP: [ 0.00 0.00 ]
+Key: IST_Fp: [ 0.00 0.00 ]
+Key: Int_eh_sjlj_setup_dispatch: [ 0.00 0.00 ]
+Key: JCC: [ 0.00 0.00 ]
+Key: JCXZ: [ 0.00 0.00 ]
+Key: JECXZ: [ 0.00 0.00 ]
+Key: JMP: [ 0.00 0.00 ]
+Key: JMPABS: [ 0.00 0.00 ]
+Key: JRCXZ: [ 0.00 0.00 ]
+Key: JUMP_TABLE_DEBUG_INFO: [ 0.00 0.00 ]
+Key: KADDBkk: [ 0.00 0.00 ]
+Key: KADDDkk: [ 0.00 0.00 ]
+Key: KADDQkk: [ 0.00 0.00 ]
+Key: KADDWkk: [ 0.00 0.00 ]
+Key: KANDBkk: [ 0.00 0.00 ]
+Key: KANDDkk: [ 0.00 0.00 ]
+Key: KANDNBkk: [ 0.00 0.00 ]
+Key: KANDNDkk: [ 0.00 0.00 ]
+Key: KANDNQkk: [ 0.00 0.00 ]
+Key: KANDNWkk: [ 0.00 0.00 ]
+Key: KANDQkk: [ 0.00 0.00 ]
+Key: KANDWkk: [ 0.00 0.00 ]
+Key: KCFI_CHECK: [ 0.00 0.00 ]
+Key: KILL: [ 0.00 0.00 ]
+Key: KMOVBkk: [ 0.00 0.00 ]
+Key: KMOVBkk_EVEX: [ 0.00 0.00 ]
+Key: KMOVBkm: [ 0.00 0.00 ]
+Key: KMOVBkm_EVEX: [ 0.00 0.00 ]
+Key: KMOVBkr: [ 0.00 0.00 ]
+Key: KMOVBkr_EVEX: [ 0.00 0.00 ]
+Key: KMOVBmk: [ 0.00 0.00 ]
+Key: KMOVBmk_EVEX: [ 0.00 0.00 ]
+Key: KMOVBrk: [ 0.00 0.00 ]
+Key: KMOVBrk_EVEX: [ 0.00 0.00 ]
+Key: KMOVDkk: [ 0.00 0.00 ]
+Key: KMOVDkk_EVEX: [ 0.00 0.00 ]
+Key: KMOVDkm: [ 0.00 0.00 ]
+Key: KMOVDkm_EVEX: [ 0.00 0.00 ]
+Key: KMOVDkr: [ 0.00 0.00 ]
+Key: KMOVDkr_EVEX: [ 0.00 0.00 ]
+Key: KMOVDmk: [ 0.00 0.00 ]
+Key: KMOVDmk_EVEX: [ 0.00 0.00 ]
+Key: KMOVDrk: [ 0.00 0.00 ]
+Key: KMOVDrk_EVEX: [ 0.00 0.00 ]
+Key: KMOVQkk: [ 0.00 0.00 ]
+Key: KMOVQkk_EVEX: [ 0.00 0.00 ]
+Key: KMOVQkm: [ 0.00 0.00 ]
+Key: KMOVQkm_EVEX: [ 0.00 0.00 ]
+Key: KMOVQkr: [ 0.00 0.00 ]
+Key: KMOVQkr_EVEX: [ 0.00 0.00 ]
+Key: KMOVQmk: [ 0.00 0.00 ]
+Key: KMOVQmk_EVEX: [ 0.00 0.00 ]
+Key: KMOVQrk: [ 0.00 0.00 ]
+Key: KMOVQrk_EVEX: [ 0.00 0.00 ]
+Key: KMOVWkk: [ 0.00 0.00 ]
+Key: KMOVWkk_EVEX: [ 0.00 0.00 ]
+Key: KMOVWkm: [ 0.00 0.00 ]
+Key: KMOVWkm_EVEX: [ 0.00 0.00 ]
+Key: KMOVWkr: [ 0.00 0.00 ]
+Key: KMOVWkr_EVEX: [ 0.00 0.00 ]
+Key: KMOVWmk: [ 0.00 0.00 ]
+Key: KMOVWmk_EVEX: [ 0.00 0.00 ]
+Key: KMOVWrk: [ 0.00 0.00 ]
+Key: KMOVWrk_EVEX: [ 0.00 0.00 ]
+Key: KNOTBkk: [ 0.00 0.00 ]
+Key: KNOTDkk: [ 0.00 0.00 ]
+Key: KNOTQkk: [ 0.00 0.00 ]
+Key: KNOTWkk: [ 0.00 0.00 ]
+Key: KORBkk: [ 0.00 0.00 ]
+Key: KORDkk: [ 0.00 0.00 ]
+Key: KORQkk: [ 0.00 0.00 ]
+Key: KORTESTBkk: [ 0.00 0.00 ]
+Key: KORTESTDkk: [ 0.00 0.00 ]
+Key: KORTESTQkk: [ 0.00 0.00 ]
+Key: KORTESTWkk: [ 0.00 0.00 ]
+Key: KORWkk: [ 0.00 0.00 ]
+Key: KSET: [ 0.00 0.00 ]
+Key: KSHIFTLBki: [ 0.00 0.00 ]
+Key: KSHIFTLDki: [ 0.00 0.00 ]
+Key: KSHIFTLQki: [ 0.00 0.00 ]
+Key: KSHIFTLWki: [ 0.00 0.00 ]
+Key: KSHIFTRBki: [ 0.00 0.00 ]
+Key: KSHIFTRDki: [ 0.00 0.00 ]
+Key: KSHIFTRQki: [ 0.00 0.00 ]
+Key: KSHIFTRWki: [ 0.00 0.00 ]
+Key: KTESTBkk: [ 0.00 0.00 ]
+Key: KTESTDkk: [ 0.00 0.00 ]
+Key: KTESTQkk: [ 0.00 0.00 ]
+Key: KTESTWkk: [ 0.00 0.00 ]
+Key: KUNPCKBWkk: [ 0.00 0.00 ]
+Key: KUNPCKDQkk: [ 0.00 0.00 ]
+Key: KUNPCKWDkk: [ 0.00 0.00 ]
+Key: KXNORBkk: [ 0.00 0.00 ]
+Key: KXNORDkk: [ 0.00 0.00 ]
+Key: KXNORQkk: [ 0.00 0.00 ]
+Key: KXNORWkk: [ 0.00 0.00 ]
+Key: KXORBkk: [ 0.00 0.00 ]
+Key: KXORDkk: [ 0.00 0.00 ]
+Key: KXORQkk: [ 0.00 0.00 ]
+Key: KXORWkk: [ 0.00 0.00 ]
+Key: LAHF: [ 0.00 0.00 ]
+Key: LAR: [ 0.00 0.00 ]
+Key: LCMPXCHG: [ 0.00 0.00 ]
+Key: LDDQUrm: [ 0.00 0.00 ]
+Key: LDMXCSR: [ 0.00 0.00 ]
+Key: LDS: [ 0.00 0.00 ]
+Key: LDTILECFG: [ 0.00 0.00 ]
+Key: LDTILECFG_EVEX: [ 0.00 0.00 ]
+Key: LD_F: [ 0.00 0.00 ]
+Key: LD_Fp: [ 0.00 0.00 ]
+Key: LD_Frr: [ 0.00 0.00 ]
+Key: LEA: [ 0.00 0.00 ]
+Key: LEAVE: [ 0.00 0.00 ]
+Key: LES: [ 0.00 0.00 ]
+Key: LFENCE: [ 0.00 0.00 ]
+Key: LFS: [ 0.00 0.00 ]
+Key: LGDT: [ 0.00 0.00 ]
+Key: LGS: [ 0.00 0.00 ]
+Key: LIDT: [ 0.00 0.00 ]
+Key: LIFETIME_END: [ 0.00 0.00 ]
+Key: LIFETIME_START: [ 0.00 0.00 ]
+Key: LKGS: [ 0.00 0.00 ]
+Key: LLDT: [ 0.00 0.00 ]
+Key: LLWPCB: [ 0.00 0.00 ]
+Key: LMSW: [ 0.00 0.00 ]
+Key: LOADIWKEY: [ 0.00 0.00 ]
+Key: LOAD_STACK_GUARD: [ 0.00 0.00 ]
+Key: LOCAL_ESCAPE: [ 0.00 0.00 ]
+Key: LOCK_ADD: [ 0.00 0.00 ]
+Key: LOCK_AND: [ 0.00 0.00 ]
+Key: LOCK_BTC: [ 0.00 0.00 ]
+Key: LOCK_BTC_RM: [ 0.00 0.00 ]
+Key: LOCK_BTR: [ 0.00 0.00 ]
+Key: LOCK_BTR_RM: [ 0.00 0.00 ]
+Key: LOCK_BTS: [ 0.00 0.00 ]
+Key: LOCK_BTS_RM: [ 0.00 0.00 ]
+Key: LOCK_DEC: [ 0.00 0.00 ]
+Key: LOCK_INC: [ 0.00 0.00 ]
+Key: LOCK_OR: [ 0.00 0.00 ]
+Key: LOCK_PREFIX: [ 0.00 0.00 ]
+Key: LOCK_SUB: [ 0.00 0.00 ]
+Key: LOCK_XOR: [ 0.00 0.00 ]
+Key: LODSB: [ 0.00 0.00 ]
+Key: LODSL: [ 0.00 0.00 ]
+Key: LODSQ: [ 0.00 0.00 ]
+Key: LODSW: [ 0.00 0.00 ]
+Key: LOOP: [ 0.00 0.00 ]
+Key: LOOPE: [ 0.00 0.00 ]
+Key: LOOPNE: [ 0.00 0.00 ]
+Key: LRET: [ 0.00 0.00 ]
+Key: LRETI: [ 0.00 0.00 ]
+Key: LSL: [ 0.00 0.00 ]
+Key: LSS: [ 0.00 0.00 ]
+Key: LTRm: [ 0.00 0.00 ]
+Key: LTRr: [ 0.00 0.00 ]
+Key: LWPINS: [ 0.00 0.00 ]
+Key: LWPVAL: [ 0.00 0.00 ]
+Key: LXADD: [ 0.00 0.00 ]
+Key: LZCNT: [ 0.00 0.00 ]
+Key: MASKMOVDQU: [ 0.00 0.00 ]
+Key: MASKPAIR: [ 0.00 0.00 ]
+Key: MAXCPDrm: [ 0.00 0.00 ]
+Key: MAXCPDrr: [ 0.00 0.00 ]
+Key: MAXCPSrm: [ 0.00 0.00 ]
+Key: MAXCPSrr: [ 0.00 0.00 ]
+Key: MAXCSDrm: [ 0.00 0.00 ]
+Key: MAXCSDrr: [ 0.00 0.00 ]
+Key: MAXCSSrm: [ 0.00 0.00 ]
+Key: MAXCSSrr: [ 0.00 0.00 ]
+Key: MAXPDrm: [ 0.00 0.00 ]
+Key: MAXPDrr: [ 0.00 0.00 ]
+Key: MAXPSrm: [ 0.00 0.00 ]
+Key: MAXPSrr: [ 0.00 0.00 ]
+Key: MAXSDrm: [ 0.00 0.00 ]
+Key: MAXSDrm_Int: [ 0.00 0.00 ]
+Key: MAXSDrr: [ 0.00 0.00 ]
+Key: MAXSDrr_Int: [ 0.00 0.00 ]
+Key: MAXSSrm: [ 0.00 0.00 ]
+Key: MAXSSrm_Int: [ 0.00 0.00 ]
+Key: MAXSSrr: [ 0.00 0.00 ]
+Key: MAXSSrr_Int: [ 0.00 0.00 ]
+Key: MEMBARRIER: [ 0.00 0.00 ]
+Key: MFENCE: [ 0.00 0.00 ]
+Key: MINCPDrm: [ 0.00 0.00 ]
+Key: MINCPDrr: [ 0.00 0.00 ]
+Key: MINCPSrm: [ 0.00 0.00 ]
+Key: MINCPSrr: [ 0.00 0.00 ]
+Key: MINCSDrm: [ 0.00 0.00 ]
+Key: MINCSDrr: [ 0.00 0.00 ]
+Key: MINCSSrm: [ 0.00 0.00 ]
+Key: MINCSSrr: [ 0.00 0.00 ]
+Key: MINPDrm: [ 0.00 0.00 ]
+Key: MINPDrr: [ 0.00 0.00 ]
+Key: MINPSrm: [ 0.00 0.00 ]
+Key: MINPSrr: [ 0.00 0.00 ]
+Key: MINSDrm: [ 0.00 0.00 ]
+Key: MINSDrm_Int: [ 0.00 0.00 ]
+Key: MINSDrr: [ 0.00 0.00 ]
+Key: MINSDrr_Int: [ 0.00 0.00 ]
+Key: MINSSrm: [ 0.00 0.00 ]
+Key: MINSSrm_Int: [ 0.00 0.00 ]
+Key: MINSSrr: [ 0.00 0.00 ]
+Key: MINSSrr_Int: [ 0.00 0.00 ]
+Key: MMX_CVTPD: [ 0.00 0.00 ]
+Key: MMX_CVTPI: [ 0.00 0.00 ]
+Key: MMX_CVTPS: [ 0.00 0.00 ]
+Key: MMX_CVTTPD: [ 0.00 0.00 ]
+Key: MMX_CVTTPS: [ 0.00 0.00 ]
+Key: MMX_EMMS: [ 0.00 0.00 ]
+Key: MMX_MASKMOVQ: [ 0.00 0.00 ]
+Key: MMX_MOVD: [ 0.00 0.00 ]
+Key: MMX_MOVDQ: [ 0.00 0.00 ]
+Key: MMX_MOVFR: [ 0.00 0.00 ]
+Key: MMX_MOVNTQmr: [ 0.00 0.00 ]
+Key: MMX_MOVQ: [ 0.00 0.00 ]
+Key: MMX_PABSBrm: [ 0.00 0.00 ]
+Key: MMX_PABSBrr: [ 0.00 0.00 ]
+Key: MMX_PABSDrm: [ 0.00 0.00 ]
+Key: MMX_PABSDrr: [ 0.00 0.00 ]
+Key: MMX_PABSWrm: [ 0.00 0.00 ]
+Key: MMX_PABSWrr: [ 0.00 0.00 ]
+Key: MMX_PACKSSDWrm: [ 0.00 0.00 ]
+Key: MMX_PACKSSDWrr: [ 0.00 0.00 ]
+Key: MMX_PACKSSWBrm: [ 0.00 0.00 ]
+Key: MMX_PACKSSWBrr: [ 0.00 0.00 ]
+Key: MMX_PACKUSWBrm: [ 0.00 0.00 ]
+Key: MMX_PACKUSWBrr: [ 0.00 0.00 ]
+Key: MMX_PADDBrm: [ 0.00 0.00 ]
+Key: MMX_PADDBrr: [ 0.00 0.00 ]
+Key: MMX_PADDDrm: [ 0.00 0.00 ]
+Key: MMX_PADDDrr: [ 0.00 0.00 ]
+Key: MMX_PADDQrm: [ 0.00 0.00 ]
+Key: MMX_PADDQrr: [ 0.00 0.00 ]
+Key: MMX_PADDSBrm: [ 0.00 0.00 ]
+Key: MMX_PADDSBrr: [ 0.00 0.00 ]
+Key: MMX_PADDSWrm: [ 0.00 0.00 ]
+Key: MMX_PADDSWrr: [ 0.00 0.00 ]
+Key: MMX_PADDUSBrm: [ 0.00 0.00 ]
+Key: MMX_PADDUSBrr: [ 0.00 0.00 ]
+Key: MMX_PADDUSWrm: [ 0.00 0.00 ]
+Key: MMX_PADDUSWrr: [ 0.00 0.00 ]
+Key: MMX_PADDWrm: [ 0.00 0.00 ]
+Key: MMX_PADDWrr: [ 0.00 0.00 ]
+Key: MMX_PALIGNRrmi: [ 0.00 0.00 ]
+Key: MMX_PALIGNRrri: [ 0.00 0.00 ]
+Key: MMX_PANDNrm: [ 0.00 0.00 ]
+Key: MMX_PANDNrr: [ 0.00 0.00 ]
+Key: MMX_PANDrm: [ 0.00 0.00 ]
+Key: MMX_PANDrr: [ 0.00 0.00 ]
+Key: MMX_PAVGBrm: [ 0.00 0.00 ]
+Key: MMX_PAVGBrr: [ 0.00 0.00 ]
+Key: MMX_PAVGWrm: [ 0.00 0.00 ]
+Key: MMX_PAVGWrr: [ 0.00 0.00 ]
+Key: MMX_PCMPEQBrm: [ 0.00 0.00 ]
+Key: MMX_PCMPEQBrr: [ 0.00 0.00 ]
+Key: MMX_PCMPEQDrm: [ 0.00 0.00 ]
+Key: MMX_PCMPEQDrr: [ 0.00 0.00 ]
+Key: MMX_PCMPEQWrm: [ 0.00 0.00 ]
+Key: MMX_PCMPEQWrr: [ 0.00 0.00 ]
+Key: MMX_PCMPGTBrm: [ 0.00 0.00 ]
+Key: MMX_PCMPGTBrr: [ 0.00 0.00 ]
+Key: MMX_PCMPGTDrm: [ 0.00 0.00 ]
+Key: MMX_PCMPGTDrr: [ 0.00 0.00 ]
+Key: MMX_PCMPGTWrm: [ 0.00 0.00 ]
+Key: MMX_PCMPGTWrr: [ 0.00 0.00 ]
+Key: MMX_PEXTRWrri: [ 0.00 0.00 ]
+Key: MMX_PHADDDrm: [ 0.00 0.00 ]
+Key: MMX_PHADDDrr: [ 0.00 0.00 ]
+Key: MMX_PHADDSWrm: [ 0.00 0.00 ]
+Key: MMX_PHADDSWrr: [ 0.00 0.00 ]
+Key: MMX_PHADDWrm: [ 0.00 0.00 ]
+Key: MMX_PHADDWrr: [ 0.00 0.00 ]
+Key: MMX_PHSUBDrm: [ 0.00 0.00 ]
+Key: MMX_PHSUBDrr: [ 0.00 0.00 ]
+Key: MMX_PHSUBSWrm: [ 0.00 0.00 ]
+Key: MMX_PHSUBSWrr: [ 0.00 0.00 ]
+Key: MMX_PHSUBWrm: [ 0.00 0.00 ]
+Key: MMX_PHSUBWrr: [ 0.00 0.00 ]
+Key: MMX_PINSRWrmi: [ 0.00 0.00 ]
+Key: MMX_PINSRWrri: [ 0.00 0.00 ]
+Key: MMX_PMADDUBSWrm: [ 0.00 0.00 ]
+Key: MMX_PMADDUBSWrr: [ 0.00 0.00 ]
+Key: MMX_PMADDWDrm: [ 0.00 0.00 ]
+Key: MMX_PMADDWDrr: [ 0.00 0.00 ]
+Key: MMX_PMAXSWrm: [ 0.00 0.00 ]
+Key: MMX_PMAXSWrr: [ 0.00 0.00 ]
+Key: MMX_PMAXUBrm: [ 0.00 0.00 ]
+Key: MMX_PMAXUBrr: [ 0.00 0.00 ]
+Key: MMX_PMINSWrm: [ 0.00 0.00 ]
+Key: MMX_PMINSWrr: [ 0.00 0.00 ]
+Key: MMX_PMINUBrm: [ 0.00 0.00 ]
+Key: MMX_PMINUBrr: [ 0.00 0.00 ]
+Key: MMX_PMOVMSKBrr: [ 0.00 0.00 ]
+Key: MMX_PMULHRSWrm: [ 0.00 0.00 ]
+Key: MMX_PMULHRSWrr: [ 0.00 0.00 ]
+Key: MMX_PMULHUWrm: [ 0.00 0.00 ]
+Key: MMX_PMULHUWrr: [ 0.00 0.00 ]
+Key: MMX_PMULHWrm: [ 0.00 0.00 ]
+Key: MMX_PMULHWrr: [ 0.00 0.00 ]
+Key: MMX_PMULLWrm: [ 0.00 0.00 ]
+Key: MMX_PMULLWrr: [ 0.00 0.00 ]
+Key: MMX_PMULUDQrm: [ 0.00 0.00 ]
+Key: MMX_PMULUDQrr: [ 0.00 0.00 ]
+Key: MMX_PORrm: [ 0.00 0.00 ]
+Key: MMX_PORrr: [ 0.00 0.00 ]
+Key: MMX_PSADBWrm: [ 0.00 0.00 ]
+Key: MMX_PSADBWrr: [ 0.00 0.00 ]
+Key: MMX_PSHUFBrm: [ 0.00 0.00 ]
+Key: MMX_PSHUFBrr: [ 0.00 0.00 ]
+Key: MMX_PSHUFWmi: [ 0.00 0.00 ]
+Key: MMX_PSHUFWri: [ 0.00 0.00 ]
+Key: MMX_PSIGNBrm: [ 0.00 0.00 ]
+Key: MMX_PSIGNBrr: [ 0.00 0.00 ]
+Key: MMX_PSIGNDrm: [ 0.00 0.00 ]
+Key: MMX_PSIGNDrr: [ 0.00 0.00 ]
+Key: MMX_PSIGNWrm: [ 0.00 0.00 ]
+Key: MMX_PSIGNWrr: [ 0.00 0.00 ]
+Key: MMX_PSLLDri: [ 0.00 0.00 ]
+Key: MMX_PSLLDrm: [ 0.00 0.00 ]
+Key: MMX_PSLLDrr: [ 0.00 0.00 ]
+Key: MMX_PSLLQri: [ 0.00 0.00 ]
+Key: MMX_PSLLQrm: [ 0.00 0.00 ]
+Key: MMX_PSLLQrr: [ 0.00 0.00 ]
+Key: MMX_PSLLWri: [ 0.00 0.00 ]
+Key: MMX_PSLLWrm: [ 0.00 0.00 ]
+Key: MMX_PSLLWrr: [ 0.00 0.00 ]
+Key: MMX_PSRADri: [ 0.00 0.00 ]
+Key: MMX_PSRADrm: [ 0.00 0.00 ]
+Key: MMX_PSRADrr: [ 0.00 0.00 ]
+Key: MMX_PSRAWri: [ 0.00 0.00 ]
+Key: MMX_PSRAWrm: [ 0.00 0.00 ]
+Key: MMX_PSRAWrr: [ 0.00 0.00 ]
+Key: MMX_PSRLDri: [ 0.00 0.00 ]
+Key: MMX_PSRLDrm: [ 0.00 0.00 ]
+Key: MMX_PSRLDrr: [ 0.00 0.00 ]
+Key: MMX_PSRLQri: [ 0.00 0.00 ]
+Key: MMX_PSRLQrm: [ 0.00 0.00 ]
+Key: MMX_PSRLQrr: [ 0.00 0.00 ]
+Key: MMX_PSRLWri: [ 0.00 0.00 ]
+Key: MMX_PSRLWrm: [ 0.00 0.00 ]
+Key: MMX_PSRLWrr: [ 0.00 0.00 ]
+Key: MMX_PSUBBrm: [ 0.00 0.00 ]
+Key: MMX_PSUBBrr: [ 0.00 0.00 ]
+Key: MMX_PSUBDrm: [ 0.00 0.00 ]
+Key: MMX_PSUBDrr: [ 0.00 0.00 ]
+Key: MMX_PSUBQrm: [ 0.00 0.00 ]
+Key: MMX_PSUBQrr: [ 0.00 0.00 ]
+Key: MMX_PSUBSBrm: [ 0.00 0.00 ]
+Key: MMX_PSUBSBrr: [ 0.00 0.00 ]
+Key: MMX_PSUBSWrm: [ 0.00 0.00 ]
+Key: MMX_PSUBSWrr: [ 0.00 0.00 ]
+Key: MMX_PSUBUSBrm: [ 0.00 0.00 ]
+Key: MMX_PSUBUSBrr: [ 0.00 0.00 ]
+Key: MMX_PSUBUSWrm: [ 0.00 0.00 ]
+Key: MMX_PSUBUSWrr: [ 0.00 0.00 ]
+Key: MMX_PSUBWrm: [ 0.00 0.00 ]
+Key: MMX_PSUBWrr: [ 0.00 0.00 ]
+Key: MMX_PUNPCKHBWrm: [ 0.00 0.00 ]
+Key: MMX_PUNPCKHBWrr: [ 0.00 0.00 ]
+Key: MMX_PUNPCKHDQrm: [ 0.00 0.00 ]
+Key: MMX_PUNPCKHDQrr: [ 0.00 0.00 ]
+Key: MMX_PUNPCKHWDrm: [ 0.00 0.00 ]
+Key: MMX_PUNPCKHWDrr: [ 0.00 0.00 ]
+Key: MMX_PUNPCKLBWrm: [ 0.00 0.00 ]
+Key: MMX_PUNPCKLBWrr: [ 0.00 0.00 ]
+Key: MMX_PUNPCKLDQrm: [ 0.00 0.00 ]
+Key: MMX_PUNPCKLDQrr: [ 0.00 0.00 ]
+Key: MMX_PUNPCKLWDrm: [ 0.00 0.00 ]
+Key: MMX_PUNPCKLWDrr: [ 0.00 0.00 ]
+Key: MMX_PXORrm: [ 0.00 0.00 ]
+Key: MMX_PXORrr: [ 0.00 0.00 ]
+Key: MMX_SET: [ 0.00 0.00 ]
+Key: MONITOR: [ 0.00 0.00 ]
+Key: MONITORX: [ 0.00 0.00 ]
+Key: MONTMUL: [ 0.00 0.00 ]
+Key: MORESTACK_RET: [ 0.00 0.00 ]
+Key: MORESTACK_RET_RESTORE_R: [ 0.00 0.00 ]
+Key: MOV: [ 0.00 0.00 ]
+Key: MOVAPDmr: [ 0.00 0.00 ]
+Key: MOVAPDrm: [ 0.00 0.00 ]
+Key: MOVAPDrr: [ 0.00 0.00 ]
+Key: MOVAPDrr_REV: [ 0.00 0.00 ]
+Key: MOVAPSmr: [ 0.00 0.00 ]
+Key: MOVAPSrm: [ 0.00 0.00 ]
+Key: MOVAPSrr: [ 0.00 0.00 ]
+Key: MOVAPSrr_REV: [ 0.00 0.00 ]
+Key: MOVBE: [ 0.00 0.00 ]
+Key: MOVDDUPrm: [ 0.00 0.00 ]
+Key: MOVDDUPrr: [ 0.00 0.00 ]
+Key: MOVDI: [ 0.00 0.00 ]
+Key: MOVDIR: [ 0.00 0.00 ]
+Key: MOVDIRI: [ 0.00 0.00 ]
+Key: MOVDQAmr: [ 0.00 0.00 ]
+Key: MOVDQArm: [ 0.00 0.00 ]
+Key: MOVDQArr: [ 0.00 0.00 ]
+Key: MOVDQArr_REV: [ 0.00 0.00 ]
+Key: MOVDQUmr: [ 0.00 0.00 ]
+Key: MOVDQUrm: [ 0.00 0.00 ]
+Key: MOVDQUrr: [ 0.00 0.00 ]
+Key: MOVDQUrr_REV: [ 0.00 0.00 ]
+Key: MOVHLPSrr: [ 0.00 0.00 ]
+Key: MOVHPDmr: [ 0.00 0.00 ]
+Key: MOVHPDrm: [ 0.00 0.00 ]
+Key: MOVHPSmr: [ 0.00 0.00 ]
+Key: MOVHPSrm: [ 0.00 0.00 ]
+Key: MOVLHPSrr: [ 0.00 0.00 ]
+Key: MOVLPDmr: [ 0.00 0.00 ]
+Key: MOVLPDrm: [ 0.00 0.00 ]
+Key: MOVLPSmr: [ 0.00 0.00 ]
+Key: MOVLPSrm: [ 0.00 0.00 ]
+Key: MOVMSKPDrr: [ 0.00 0.00 ]
+Key: MOVMSKPSrr: [ 0.00 0.00 ]
+Key: MOVNTDQArm: [ 0.00 0.00 ]
+Key: MOVNTDQmr: [ 0.00 0.00 ]
+Key: MOVNTI: [ 0.00 0.00 ]
+Key: MOVNTImr: [ 0.00 0.00 ]
+Key: MOVNTPDmr: [ 0.00 0.00 ]
+Key: MOVNTPSmr: [ 0.00 0.00 ]
+Key: MOVNTSD: [ 0.00 0.00 ]
+Key: MOVNTSS: [ 0.00 0.00 ]
+Key: MOVPC: [ 0.00 0.00 ]
+Key: MOVPDI: [ 0.00 0.00 ]
+Key: MOVPQI: [ 0.00 0.00 ]
+Key: MOVPQIto: [ 0.00 0.00 ]
+Key: MOVQI: [ 0.00 0.00 ]
+Key: MOVRS: [ 0.00 0.00 ]
+Key: MOVSB: [ 0.00 0.00 ]
+Key: MOVSDmr: [ 0.00 0.00 ]
+Key: MOVSDrm: [ 0.00 0.00 ]
+Key: MOVSDrm_alt: [ 0.00 0.00 ]
+Key: MOVSDrr: [ 0.00 0.00 ]
+Key: MOVSDrr_REV: [ 0.00 0.00 ]
+Key: MOVSDto: [ 0.00 0.00 ]
+Key: MOVSHDUPrm: [ 0.00 0.00 ]
+Key: MOVSHDUPrr: [ 0.00 0.00 ]
+Key: MOVSHPmr: [ 0.00 0.00 ]
+Key: MOVSHPrm: [ 0.00 0.00 ]
+Key: MOVSL: [ 0.00 0.00 ]
+Key: MOVSLDUPrm: [ 0.00 0.00 ]
+Key: MOVSLDUPrr: [ 0.00 0.00 ]
+Key: MOVSQ: [ 0.00 0.00 ]
+Key: MOVSS: [ 0.00 0.00 ]
+Key: MOVSSmr: [ 0.00 0.00 ]
+Key: MOVSSrm: [ 0.00 0.00 ]
+Key: MOVSSrm_alt: [ 0.00 0.00 ]
+Key: MOVSSrr: [ 0.00 0.00 ]
+Key: MOVSSrr_REV: [ 0.00 0.00 ]
+Key: MOVSW: [ 0.00 0.00 ]
+Key: MOVSX: [ 0.00 0.00 ]
+Key: MOVUPDmr: [ 0.00 0.00 ]
+Key: MOVUPDrm: [ 0.00 0.00 ]
+Key: MOVUPDrr: [ 0.00 0.00 ]
+Key: MOVUPDrr_REV: [ 0.00 0.00 ]
+Key: MOVUPSmr: [ 0.00 0.00 ]
+Key: MOVUPSrm: [ 0.00 0.00 ]
+Key: MOVUPSrr: [ 0.00 0.00 ]
+Key: MOVUPSrr_REV: [ 0.00 0.00 ]
+Key: MOVZPQILo: [ 0.00 0.00 ]
+Key: MOVZX: [ 0.00 0.00 ]
+Key: MPSADBWrmi: [ 0.00 0.00 ]
+Key: MPSADBWrri: [ 0.00 0.00 ]
+Key: MUL: [ 0.00 0.00 ]
+Key: MULPDrm: [ 0.00 0.00 ]
+Key: MULPDrr: [ 0.00 0.00 ]
+Key: MULPSrm: [ 0.00 0.00 ]
+Key: MULPSrr: [ 0.00 0.00 ]
+Key: MULSDrm: [ 0.00 0.00 ]
+Key: MULSDrm_Int: [ 0.00 0.00 ]
+Key: MULSDrr: [ 0.00 0.00 ]
+Key: MULSDrr_Int: [ 0.00 0.00 ]
+Key: MULSSrm: [ 0.00 0.00 ]
+Key: MULSSrm_Int: [ 0.00 0.00 ]
+Key: MULSSrr: [ 0.00 0.00 ]
+Key: MULSSrr_Int: [ 0.00 0.00 ]
+Key: MULX: [ 0.00 0.00 ]
+Key: MUL_F: [ 0.00 0.00 ]
+Key: MUL_FI: [ 0.00 0.00 ]
+Key: MUL_FPrST: [ 0.00 0.00 ]
+Key: MUL_FST: [ 0.00 0.00 ]
+Key: MUL_Fp: [ 0.00 0.00 ]
+Key: MUL_FpI: [ 0.00 0.00 ]
+Key: MUL_FrST: [ 0.00 0.00 ]
+Key: MWAITX: [ 0.00 0.00 ]
+Key: MWAITX_SAVE_RBX: [ 0.00 0.00 ]
+Key: MWAITXrrr: [ 0.00 0.00 ]
+Key: MWAITrr: [ 0.00 0.00 ]
+Key: NEG: [ 0.00 0.00 ]
+Key: NOOP: [ 0.00 0.00 ]
+Key: NOOPL: [ 0.00 0.00 ]
+Key: NOOPLr: [ 0.00 0.00 ]
+Key: NOOPQ: [ 0.00 0.00 ]
+Key: NOOPQr: [ 0.00 0.00 ]
+Key: NOOPW: [ 0.00 0.00 ]
+Key: NOOPWr: [ 0.00 0.00 ]
+Key: NOT: [ 0.00 0.00 ]
+Key: OR: [ 0.00 0.00 ]
+Key: ORPDrm: [ 0.00 0.00 ]
+Key: ORPDrr: [ 0.00 0.00 ]
+Key: ORPSrm: [ 0.00 0.00 ]
+Key: ORPSrr: [ 0.00 0.00 ]
+Key: OUT: [ 0.00 0.00 ]
+Key: OUTSB: [ 0.00 0.00 ]
+Key: OUTSL: [ 0.00 0.00 ]
+Key: OUTSW: [ 0.00 0.00 ]
+Key: PABSBrm: [ 0.00 0.00 ]
+Key: PABSBrr: [ 0.00 0.00 ]
+Key: PABSDrm: [ 0.00 0.00 ]
+Key: PABSDrr: [ 0.00 0.00 ]
+Key: PABSWrm: [ 0.00 0.00 ]
+Key: PABSWrr: [ 0.00 0.00 ]
+Key: PACKSSDWrm: [ 0.00 0.00 ]
+Key: PACKSSDWrr: [ 0.00 0.00 ]
+Key: PACKSSWBrm: [ 0.00 0.00 ]
+Key: PACKSSWBrr: [ 0.00 0.00 ]
+Key: PACKUSDWrm: [ 0.00 0.00 ]
+Key: PACKUSDWrr: [ 0.00 0.00 ]
+Key: PACKUSWBrm: [ 0.00 0.00 ]
+Key: PACKUSWBrr: [ 0.00 0.00 ]
+Key: PADDBrm: [ 0.00 0.00 ]
+Key: PADDBrr: [ 0.00 0.00 ]
+Key: PADDDrm: [ 0.00 0.00 ]
+Key: PADDDrr: [ 0.00 0.00 ]
+Key: PADDQrm: [ 0.00 0.00 ]
+Key: PADDQrr: [ 0.00 0.00 ]
+Key: PADDSBrm: [ 0.00 0.00 ]
+Key: PADDSBrr: [ 0.00 0.00 ]
+Key: PADDSWrm: [ 0.00 0.00 ]
+Key: PADDSWrr: [ 0.00 0.00 ]
+Key: PADDUSBrm: [ 0.00 0.00 ]
+Key: PADDUSBrr: [ 0.00 0.00 ]
+Key: PADDUSWrm: [ 0.00 0.00 ]
+Key: PADDUSWrr: [ 0.00 0.00 ]
+Key: PADDWrm: [ 0.00 0.00 ]
+Key: PADDWrr: [ 0.00 0.00 ]
+Key: PALIGNRrmi: [ 0.00 0.00 ]
+Key: PALIGNRrri: [ 0.00 0.00 ]
+Key: PANDNrm: [ 0.00 0.00 ]
+Key: PANDNrr: [ 0.00 0.00 ]
+Key: PANDrm: [ 0.00 0.00 ]
+Key: PANDrr: [ 0.00 0.00 ]
+Key: PATCHABLE_EVENT_CALL: [ 0.00 0.00 ]
+Key: PATCHABLE_FUNCTION_ENTER: [ 0.00 0.00 ]
+Key: PATCHABLE_FUNCTION_EXIT: [ 0.00 0.00 ]
+Key: PATCHABLE_OP: [ 0.00 0.00 ]
+Key: PATCHABLE_RET: [ 0.00 0.00 ]
+Key: PATCHABLE_TAIL_CALL: [ 0.00 0.00 ]
+Key: PATCHABLE_TYPED_EVENT_CALL: [ 0.00 0.00 ]
+Key: PATCHPOINT: [ 0.00 0.00 ]
+Key: PAUSE: [ 0.00 0.00 ]
+Key: PAVGBrm: [ 0.00 0.00 ]
+Key: PAVGBrr: [ 0.00 0.00 ]
+Key: PAVGUSBrm: [ 0.00 0.00 ]
+Key: PAVGUSBrr: [ 0.00 0.00 ]
+Key: PAVGWrm: [ 0.00 0.00 ]
+Key: PAVGWrr: [ 0.00 0.00 ]
+Key: PBLENDVBrm: [ 0.00 0.00 ]
+Key: PBLENDVBrr: [ 0.00 0.00 ]
+Key: PBLENDWrmi: [ 0.00 0.00 ]
+Key: PBLENDWrri: [ 0.00 0.00 ]
+Key: PBNDKB: [ 0.00 0.00 ]
+Key: PCLMULQDQrmi: [ 0.00 0.00 ]
+Key: PCLMULQDQrri: [ 0.00 0.00 ]
+Key: PCMPEQBrm: [ 0.00 0.00 ]
+Key: PCMPEQBrr: [ 0.00 0.00 ]
+Key: PCMPEQDrm: [ 0.00 0.00 ]
+Key: PCMPEQDrr: [ 0.00 0.00 ]
+Key: PCMPEQQrm: [ 0.00 0.00 ]
+Key: PCMPEQQrr: [ 0.00 0.00 ]
+Key: PCMPEQWrm: [ 0.00 0.00 ]
+Key: PCMPEQWrr: [ 0.00 0.00 ]
+Key: PCMPESTRIrmi: [ 0.00 0.00 ]
+Key: PCMPESTRIrri: [ 0.00 0.00 ]
+Key: PCMPESTRMrmi: [ 0.00 0.00 ]
+Key: PCMPESTRMrri: [ 0.00 0.00 ]
+Key: PCMPGTBrm: [ 0.00 0.00 ]
+Key: PCMPGTBrr: [ 0.00 0.00 ]
+Key: PCMPGTDrm: [ 0.00 0.00 ]
+Key: PCMPGTDrr: [ 0.00 0.00 ]
+Key: PCMPGTQrm: [ 0.00 0.00 ]
+Key: PCMPGTQrr: [ 0.00 0.00 ]
+Key: PCMPGTWrm: [ 0.00 0.00 ]
+Key: PCMPGTWrr: [ 0.00 0.00 ]
+Key: PCMPISTRIrmi: [ 0.00 0.00 ]
+Key: PCMPISTRIrri: [ 0.00 0.00 ]
+Key: PCMPISTRMrmi: [ 0.00 0.00 ]
+Key: PCMPISTRMrri: [ 0.00 0.00 ]
+Key: PCONFIG: [ 0.00 0.00 ]
+Key: PDEP: [ 0.00 0.00 ]
+Key: PEXT: [ 0.00 0.00 ]
+Key: PEXTRBmri: [ 0.00 0.00 ]
+Key: PEXTRBrri: [ 0.00 0.00 ]
+Key: PEXTRDmri: [ 0.00 0.00 ]
+Key: PEXTRDrri: [ 0.00 0.00 ]
+Key: PEXTRQmri: [ 0.00 0.00 ]
+Key: PEXTRQrri: [ 0.00 0.00 ]
+Key: PEXTRWmri: [ 0.00 0.00 ]
+Key: PEXTRWrri: [ 0.00 0.00 ]
+Key: PEXTRWrri_REV: [ 0.00 0.00 ]
+Key: PF: [ 0.00 0.00 ]
+Key: PFACCrm: [ 0.00 0.00 ]
+Key: PFACCrr: [ 0.00 0.00 ]
+Key: PFADDrm: [ 0.00 0.00 ]
+Key: PFADDrr: [ 0.00 0.00 ]
+Key: PFCMPEQrm: [ 0.00 0.00 ]
+Key: PFCMPEQrr: [ 0.00 0.00 ]
+Key: PFCMPGErm: [ 0.00 0.00 ]
+Key: PFCMPGErr: [ 0.00 0.00 ]
+Key: PFCMPGTrm: [ 0.00 0.00 ]
+Key: PFCMPGTrr: [ 0.00 0.00 ]
+Key: PFMAXrm: [ 0.00 0.00 ]
+Key: PFMAXrr: [ 0.00 0.00 ]
+Key: PFMINrm: [ 0.00 0.00 ]
+Key: PFMINrr: [ 0.00 0.00 ]
+Key: PFMULrm: [ 0.00 0.00 ]
+Key: PFMULrr: [ 0.00 0.00 ]
+Key: PFNACCrm: [ 0.00 0.00 ]
+Key: PFNACCrr: [ 0.00 0.00 ]
+Key: PFPNACCrm: [ 0.00 0.00 ]
+Key: PFPNACCrr: [ 0.00 0.00 ]
+Key: PFRCPIT: [ 0.00 0.00 ]
+Key: PFRCPrm: [ 0.00 0.00 ]
+Key: PFRCPrr: [ 0.00 0.00 ]
+Key: PFRSQIT: [ 0.00 0.00 ]
+Key: PFRSQRTrm: [ 0.00 0.00 ]
+Key: PFRSQRTrr: [ 0.00 0.00 ]
+Key: PFSUBRrm: [ 0.00 0.00 ]
+Key: PFSUBRrr: [ 0.00 0.00 ]
+Key: PFSUBrm: [ 0.00 0.00 ]
+Key: PFSUBrr: [ 0.00 0.00 ]
+Key: PHADDDrm: [ 0.00 0.00 ]
+Key: PHADDDrr: [ 0.00 0.00 ]
+Key: PHADDSWrm: [ 0.00 0.00 ]
+Key: PHADDSWrr: [ 0.00 0.00 ]
+Key: PHADDWrm: [ 0.00 0.00 ]
+Key: PHADDWrr: [ 0.00 0.00 ]
+Key: PHI: [ 0.00 0.00 ]
+Key: PHMINPOSUWrm: [ 0.00 0.00 ]
+Key: PHMINPOSUWrr: [ 0.00 0.00 ]
+Key: PHSUBDrm: [ 0.00 0.00 ]
+Key: PHSUBDrr: [ 0.00 0.00 ]
+Key: PHSUBSWrm: [ 0.00 0.00 ]
+Key: PHSUBSWrr: [ 0.00 0.00 ]
+Key: PHSUBWrm: [ 0.00 0.00 ]
+Key: PHSUBWrr: [ 0.00 0.00 ]
+Key: PI: [ 0.00 0.00 ]
+Key: PINSRBrmi: [ 0.00 0.00 ]
+Key: PINSRBrri: [ 0.00 0.00 ]
+Key: PINSRDrmi: [ 0.00 0.00 ]
+Key: PINSRDrri: [ 0.00 0.00 ]
+Key: PINSRQrmi: [ 0.00 0.00 ]
+Key: PINSRQrri: [ 0.00 0.00 ]
+Key: PINSRWrmi: [ 0.00 0.00 ]
+Key: PINSRWrri: [ 0.00 0.00 ]
+Key: PLDTILECFGV: [ 0.00 0.00 ]
+Key: PLEA: [ 0.00 0.00 ]
+Key: PMADDUBSWrm: [ 0.00 0.00 ]
+Key: PMADDUBSWrr: [ 0.00 0.00 ]
+Key: PMADDWDrm: [ 0.00 0.00 ]
+Key: PMADDWDrr: [ 0.00 0.00 ]
+Key: PMAXSBrm: [ 0.00 0.00 ]
+Key: PMAXSBrr: [ 0.00 0.00 ]
+Key: PMAXSDrm: [ 0.00 0.00 ]
+Key: PMAXSDrr: [ 0.00 0.00 ]
+Key: PMAXSWrm: [ 0.00 0.00 ]
+Key: PMAXSWrr: [ 0.00 0.00 ]
+Key: PMAXUBrm: [ 0.00 0.00 ]
+Key: PMAXUBrr: [ 0.00 0.00 ]
+Key: PMAXUDrm: [ 0.00 0.00 ]
+Key: PMAXUDrr: [ 0.00 0.00 ]
+Key: PMAXUWrm: [ 0.00 0.00 ]
+Key: PMAXUWrr: [ 0.00 0.00 ]
+Key: PMINSBrm: [ 0.00 0.00 ]
+Key: PMINSBrr: [ 0.00 0.00 ]
+Key: PMINSDrm: [ 0.00 0.00 ]
+Key: PMINSDrr: [ 0.00 0.00 ]
+Key: PMINSWrm: [ 0.00 0.00 ]
+Key: PMINSWrr: [ 0.00 0.00 ]
+Key: PMINUBrm: [ 0.00 0.00 ]
+Key: PMINUBrr: [ 0.00 0.00 ]
+Key: PMINUDrm: [ 0.00 0.00 ]
+Key: PMINUDrr: [ 0.00 0.00 ]
+Key: PMINUWrm: [ 0.00 0.00 ]
+Key: PMINUWrr: [ 0.00 0.00 ]
+Key: PMOVMSKBrr: [ 0.00 0.00 ]
+Key: PMOVSXBDrm: [ 0.00 0.00 ]
+Key: PMOVSXBDrr: [ 0.00 0.00 ]
+Key: PMOVSXBQrm: [ 0.00 0.00 ]
+Key: PMOVSXBQrr: [ 0.00 0.00 ]
+Key: PMOVSXBWrm: [ 0.00 0.00 ]
+Key: PMOVSXBWrr: [ 0.00 0.00 ]
+Key: PMOVSXDQrm: [ 0.00 0.00 ]
+Key: PMOVSXDQrr: [ 0.00 0.00 ]
+Key: PMOVSXWDrm: [ 0.00 0.00 ]
+Key: PMOVSXWDrr: [ 0.00 0.00 ]
+Key: PMOVSXWQrm: [ 0.00 0.00 ]
+Key: PMOVSXWQrr: [ 0.00 0.00 ]
+Key: PMOVZXBDrm: [ 0.00 0.00 ]
+Key: PMOVZXBDrr: [ 0.00 0.00 ]
+Key: PMOVZXBQrm: [ 0.00 0.00 ]
+Key: PMOVZXBQrr: [ 0.00 0.00 ]
+Key: PMOVZXBWrm: [ 0.00 0.00 ]
+Key: PMOVZXBWrr: [ 0.00 0.00 ]
+Key: PMOVZXDQrm: [ 0.00 0.00 ]
+Key: PMOVZXDQrr: [ 0.00 0.00 ]
+Key: PMOVZXWDrm: [ 0.00 0.00 ]
+Key: PMOVZXWDrr: [ 0.00 0.00 ]
+Key: PMOVZXWQrm: [ 0.00 0.00 ]
+Key: PMOVZXWQrr: [ 0.00 0.00 ]
+Key: PMULDQrm: [ 0.00 0.00 ]
+Key: PMULDQrr: [ 0.00 0.00 ]
+Key: PMULHRSWrm: [ 0.00 0.00 ]
+Key: PMULHRSWrr: [ 0.00 0.00 ]
+Key: PMULHRWrm: [ 0.00 0.00 ]
+Key: PMULHRWrr: [ 0.00 0.00 ]
+Key: PMULHUWrm: [ 0.00 0.00 ]
+Key: PMULHUWrr: [ 0.00 0.00 ]
+Key: PMULHWrm: [ 0.00 0.00 ]
+Key: PMULHWrr: [ 0.00 0.00 ]
+Key: PMULLDrm: [ 0.00 0.00 ]
+Key: PMULLDrr: [ 0.00 0.00 ]
+Key: PMULLWrm: [ 0.00 0.00 ]
+Key: PMULLWrr: [ 0.00 0.00 ]
+Key: PMULUDQrm: [ 0.00 0.00 ]
+Key: PMULUDQrr: [ 0.00 0.00 ]
+Key: POP: [ 0.00 0.00 ]
+Key: POPA: [ 0.00 0.00 ]
+Key: POPCNT: [ 0.00 0.00 ]
+Key: POPDS: [ 0.00 0.00 ]
+Key: POPES: [ 0.00 0.00 ]
+Key: POPF: [ 0.00 0.00 ]
+Key: POPFS: [ 0.00 0.00 ]
+Key: POPGS: [ 0.00 0.00 ]
+Key: POPP: [ 0.00 0.00 ]
+Key: POPSS: [ 0.00 0.00 ]
+Key: PORrm: [ 0.00 0.00 ]
+Key: PORrr: [ 0.00 0.00 ]
+Key: PREALLOCATED_ARG: [ 0.00 0.00 ]
+Key: PREALLOCATED_SETUP: [ 0.00 0.00 ]
+Key: PREFETCH: [ 0.00 0.00 ]
+Key: PREFETCHIT: [ 0.00 0.00 ]
+Key: PREFETCHNTA: [ 0.00 0.00 ]
+Key: PREFETCHRST: [ 0.00 0.00 ]
+Key: PREFETCHT: [ 0.00 0.00 ]
+Key: PREFETCHW: [ 0.00 0.00 ]
+Key: PREFETCHWT: [ 0.00 0.00 ]
+Key: PROBED_ALLOCA: [ 0.00 0.00 ]
+Key: PSADBWrm: [ 0.00 0.00 ]
+Key: PSADBWrr: [ 0.00 0.00 ]
+Key: PSEUDO_PROBE: [ 0.00 0.00 ]
+Key: PSHUFBrm: [ 0.00 0.00 ]
+Key: PSHUFBrr: [ 0.00 0.00 ]
+Key: PSHUFDmi: [ 0.00 0.00 ]
+Key: PSHUFDri: [ 0.00 0.00 ]
+Key: PSHUFHWmi: [ 0.00 0.00 ]
+Key: PSHUFHWri: [ 0.00 0.00 ]
+Key: PSHUFLWmi: [ 0.00 0.00 ]
+Key: PSHUFLWri: [ 0.00 0.00 ]
+Key: PSIGNBrm: [ 0.00 0.00 ]
+Key: PSIGNBrr: [ 0.00 0.00 ]
+Key: PSIGNDrm: [ 0.00 0.00 ]
+Key: PSIGNDrr: [ 0.00 0.00 ]
+Key: PSIGNWrm: [ 0.00 0.00 ]
+Key: PSIGNWrr: [ 0.00 0.00 ]
+Key: PSLLDQri: [ 0.00 0.00 ]
+Key: PSLLDri: [ 0.00 0.00 ]
+Key: PSLLDrm: [ 0.00 0.00 ]
+Key: PSLLDrr: [ 0.00 0.00 ]
+Key: PSLLQri: [ 0.00 0.00 ]
+Key: PSLLQrm: [ 0.00 0.00 ]
+Key: PSLLQrr: [ 0.00 0.00 ]
+Key: PSLLWri: [ 0.00 0.00 ]
+Key: PSLLWrm: [ 0.00 0.00 ]
+Key: PSLLWrr: [ 0.00 0.00 ]
+Key: PSMASH: [ 0.00 0.00 ]
+Key: PSRADri: [ 0.00 0.00 ]
+Key: PSRADrm: [ 0.00 0.00 ]
+Key: PSRADrr: [ 0.00 0.00 ]
+Key: PSRAWri: [ 0.00 0.00 ]
+Key: PSRAWrm: [ 0.00 0.00 ]
+Key: PSRAWrr: [ 0.00 0.00 ]
+Key: PSRLDQri: [ 0.00 0.00 ]
+Key: PSRLDri: [ 0.00 0.00 ]
+Key: PSRLDrm: [ 0.00 0.00 ]
+Key: PSRLDrr: [ 0.00 0.00 ]
+Key: PSRLQri: [ 0.00 0.00 ]
+Key: PSRLQrm: [ 0.00 0.00 ]
+Key: PSRLQrr: [ 0.00 0.00 ]
+Key: PSRLWri: [ 0.00 0.00 ]
+Key: PSRLWrm: [ 0.00 0.00 ]
+Key: PSRLWrr: [ 0.00 0.00 ]
+Key: PSUBBrm: [ 0.00 0.00 ]
+Key: PSUBBrr: [ 0.00 0.00 ]
+Key: PSUBDrm: [ 0.00 0.00 ]
+Key: PSUBDrr: [ 0.00 0.00 ]
+Key: PSUBQrm: [ 0.00 0.00 ]
+Key: PSUBQrr: [ 0.00 0.00 ]
+Key: PSUBSBrm: [ 0.00 0.00 ]
+Key: PSUBSBrr: [ 0.00 0.00 ]
+Key: PSUBSWrm: [ 0.00 0.00 ]
+Key: PSUBSWrr: [ 0.00 0.00 ]
+Key: PSUBUSBrm: [ 0.00 0.00 ]
+Key: PSUBUSBrr: [ 0.00 0.00 ]
+Key: PSUBUSWrm: [ 0.00 0.00 ]
+Key: PSUBUSWrr: [ 0.00 0.00 ]
+Key: PSUBWrm: [ 0.00 0.00 ]
+Key: PSUBWrr: [ 0.00 0.00 ]
+Key: PSWAPDrm: [ 0.00 0.00 ]
+Key: PSWAPDrr: [ 0.00 0.00 ]
+Key: PT: [ 0.00 0.00 ]
+Key: PTCMMIMFP: [ 0.00 0.00 ]
+Key: PTCMMRLFP: [ 0.00 0.00 ]
+Key: PTCONJTCMMIMFP: [ 0.00 0.00 ]
+Key: PTCONJTFP: [ 0.00 0.00 ]
+Key: PTCVTROWD: [ 0.00 0.00 ]
+Key: PTCVTROWPS: [ 0.00 0.00 ]
+Key: PTDPBF: [ 0.00 0.00 ]
+Key: PTDPBHF: [ 0.00 0.00 ]
+Key: PTDPBSSD: [ 0.00 0.00 ]
+Key: PTDPBSSDV: [ 0.00 0.00 ]
+Key: PTDPBSUD: [ 0.00 0.00 ]
+Key: PTDPBSUDV: [ 0.00 0.00 ]
+Key: PTDPBUSD: [ 0.00 0.00 ]
+Key: PTDPBUSDV: [ 0.00 0.00 ]
+Key: PTDPBUUD: [ 0.00 0.00 ]
+Key: PTDPBUUDV: [ 0.00 0.00 ]
+Key: PTDPFP: [ 0.00 0.00 ]
+Key: PTDPHBF: [ 0.00 0.00 ]
+Key: PTDPHF: [ 0.00 0.00 ]
+Key: PTESTrm: [ 0.00 0.00 ]
+Key: PTESTrr: [ 0.00 0.00 ]
+Key: PTILELOADD: [ 0.00 0.00 ]
+Key: PTILELOADDRS: [ 0.00 0.00 ]
+Key: PTILELOADDRST: [ 0.00 0.00 ]
+Key: PTILELOADDRSV: [ 0.00 0.00 ]
+Key: PTILELOADDT: [ 0.00 0.00 ]
+Key: PTILELOADDV: [ 0.00 0.00 ]
+Key: PTILEMOVROWrre: [ 0.00 0.00 ]
+Key: PTILEMOVROWrreV: [ 0.00 0.00 ]
+Key: PTILEMOVROWrri: [ 0.00 0.00 ]
+Key: PTILEMOVROWrriV: [ 0.00 0.00 ]
+Key: PTILEPAIRLOAD: [ 0.00 0.00 ]
+Key: PTILEPAIRSTORE: [ 0.00 0.00 ]
+Key: PTILESTORED: [ 0.00 0.00 ]
+Key: PTILESTOREDV: [ 0.00 0.00 ]
+Key: PTILEZERO: [ 0.00 0.00 ]
+Key: PTILEZEROV: [ 0.00 0.00 ]
+Key: PTMMULTF: [ 0.00 0.00 ]
+Key: PTTCMMIMFP: [ 0.00 0.00 ]
+Key: PTTCMMRLFP: [ 0.00 0.00 ]
+Key: PTTDPBF: [ 0.00 0.00 ]
+Key: PTTDPFP: [ 0.00 0.00 ]
+Key: PTTMMULTF: [ 0.00 0.00 ]
+Key: PTTRANSPOSED: [ 0.00 0.00 ]
+Key: PTTRANSPOSEDV: [ 0.00 0.00 ]
+Key: PTWRITE: [ 0.00 0.00 ]
+Key: PTWRITEm: [ 0.00 0.00 ]
+Key: PTWRITEr: [ 0.00 0.00 ]
+Key: PUNPCKHBWrm: [ 0.00 0.00 ]
+Key: PUNPCKHBWrr: [ 0.00 0.00 ]
+Key: PUNPCKHDQrm: [ 0.00 0.00 ]
+Key: PUNPCKHDQrr: [ 0.00 0.00 ]
+Key: PUNPCKHQDQrm: [ 0.00 0.00 ]
+Key: PUNPCKHQDQrr: [ 0.00 0.00 ]
+Key: PUNPCKHWDrm: [ 0.00 0.00 ]
+Key: PUNPCKHWDrr: [ 0.00 0.00 ]
+Key: PUNPCKLBWrm: [ 0.00 0.00 ]
+Key: PUNPCKLBWrr: [ 0.00 0.00 ]
+Key: PUNPCKLDQrm: [ 0.00 0.00 ]
+Key: PUNPCKLDQrr: [ 0.00 0.00 ]
+Key: PUNPCKLQDQrm: [ 0.00 0.00 ]
+Key: PUNPCKLQDQrr: [ 0.00 0.00 ]
+Key: PUNPCKLWDrm: [ 0.00 0.00 ]
+Key: PUNPCKLWDrr: [ 0.00 0.00 ]
+Key: PUSH: [ 0.00 0.00 ]
+Key: PUSHA: [ 0.00 0.00 ]
+Key: PUSHCS: [ 0.00 0.00 ]
+Key: PUSHDS: [ 0.00 0.00 ]
+Key: PUSHES: [ 0.00 0.00 ]
+Key: PUSHF: [ 0.00 0.00 ]
+Key: PUSHFS: [ 0.00 0.00 ]
+Key: PUSHGS: [ 0.00 0.00 ]
+Key: PUSHP: [ 0.00 0.00 ]
+Key: PUSHSS: [ 0.00 0.00 ]
+Key: PVALIDATE: [ 0.00 0.00 ]
+Key: PXORrm: [ 0.00 0.00 ]
+Key: PXORrr: [ 0.00 0.00 ]
+Key: RCL: [ 0.00 0.00 ]
+Key: RCPPSm: [ 0.00 0.00 ]
+Key: RCPPSr: [ 0.00 0.00 ]
+Key: RCPSSm: [ 0.00 0.00 ]
+Key: RCPSSm_Int: [ 0.00 0.00 ]
+Key: RCPSSr: [ 0.00 0.00 ]
+Key: RCPSSr_Int: [ 0.00 0.00 ]
+Key: RCR: [ 0.00 0.00 ]
+Key: RDFLAGS: [ 0.00 0.00 ]
+Key: RDFSBASE: [ 0.00 0.00 ]
+Key: RDGSBASE: [ 0.00 0.00 ]
+Key: RDMSR: [ 0.00 0.00 ]
+Key: RDMSRLIST: [ 0.00 0.00 ]
+Key: RDMSRri: [ 0.00 0.00 ]
+Key: RDMSRri_EVEX: [ 0.00 0.00 ]
+Key: RDPID: [ 0.00 0.00 ]
+Key: RDPKRUr: [ 0.00 0.00 ]
+Key: RDPMC: [ 0.00 0.00 ]
+Key: RDPRU: [ 0.00 0.00 ]
+Key: RDRAND: [ 0.00 0.00 ]
+Key: RDSEED: [ 0.00 0.00 ]
+Key: RDSSPD: [ 0.00 0.00 ]
+Key: RDSSPQ: [ 0.00 0.00 ]
+Key: RDTSC: [ 0.00 0.00 ]
+Key: RDTSCP: [ 0.00 0.00 ]
+Key: REG_SEQUENCE: [ 0.00 0.00 ]
+Key: REPNE_PREFIX: [ 0.00 0.00 ]
+Key: REP_MOVSB: [ 0.00 0.00 ]
+Key: REP_MOVSD: [ 0.00 0.00 ]
+Key: REP_MOVSQ: [ 0.00 0.00 ]
+Key: REP_MOVSW: [ 0.00 0.00 ]
+Key: REP_PREFIX: [ 0.00 0.00 ]
+Key: REP_STOSB: [ 0.00 0.00 ]
+Key: REP_STOSD: [ 0.00 0.00 ]
+Key: REP_STOSQ: [ 0.00 0.00 ]
+Key: REP_STOSW: [ 0.00 0.00 ]
+Key: RET: [ 0.00 0.00 ]
+Key: RETI: [ 0.00 0.00 ]
+Key: REX: [ 0.00 0.00 ]
+Key: RMPADJUST: [ 0.00 0.00 ]
+Key: RMPQUERY: [ 0.00 0.00 ]
+Key: RMPUPDATE: [ 0.00 0.00 ]
+Key: ROL: [ 0.00 0.00 ]
+Key: ROR: [ 0.00 0.00 ]
+Key: RORX: [ 0.00 0.00 ]
+Key: ROUNDPDmi: [ 0.00 0.00 ]
+Key: ROUNDPDri: [ 0.00 0.00 ]
+Key: ROUNDPSmi: [ 0.00 0.00 ]
+Key: ROUNDPSri: [ 0.00 0.00 ]
+Key: ROUNDSDmi: [ 0.00 0.00 ]
+Key: ROUNDSDmi_Int: [ 0.00 0.00 ]
+Key: ROUNDSDri: [ 0.00 0.00 ]
+Key: ROUNDSDri_Int: [ 0.00 0.00 ]
+Key: ROUNDSSmi: [ 0.00 0.00 ]
+Key: ROUNDSSmi_Int: [ 0.00 0.00 ]
+Key: ROUNDSSri: [ 0.00 0.00 ]
+Key: ROUNDSSri_Int: [ 0.00 0.00 ]
+Key: RSM: [ 0.00 0.00 ]
+Key: RSQRTPSm: [ 0.00 0.00 ]
+Key: RSQRTPSr: [ 0.00 0.00 ]
+Key: RSQRTSSm: [ 0.00 0.00 ]
+Key: RSQRTSSm_Int: [ 0.00 0.00 ]
+Key: RSQRTSSr: [ 0.00 0.00 ]
+Key: RSQRTSSr_Int: [ 0.00 0.00 ]
+Key: RSTORSSP: [ 0.00 0.00 ]
+Key: SAHF: [ 0.00 0.00 ]
+Key: SALC: [ 0.00 0.00 ]
+Key: SAR: [ 0.00 0.00 ]
+Key: SARX: [ 0.00 0.00 ]
+Key: SAVEPREVSSP: [ 0.00 0.00 ]
+Key: SBB: [ 0.00 0.00 ]
+Key: SCASB: [ 0.00 0.00 ]
+Key: SCASL: [ 0.00 0.00 ]
+Key: SCASQ: [ 0.00 0.00 ]
+Key: SCASW: [ 0.00 0.00 ]
+Key: SEAMCALL: [ 0.00 0.00 ]
+Key: SEAMOPS: [ 0.00 0.00 ]
+Key: SEAMRET: [ 0.00 0.00 ]
+Key: SEG_ALLOCA: [ 0.00 0.00 ]
+Key: SEH_BeginEpilogue: [ 0.00 0.00 ]
+Key: SEH_EndEpilogue: [ 0.00 0.00 ]
+Key: SEH_EndPrologue: [ 0.00 0.00 ]
+Key: SEH_PushFrame: [ 0.00 0.00 ]
+Key: SEH_PushReg: [ 0.00 0.00 ]
+Key: SEH_SaveReg: [ 0.00 0.00 ]
+Key: SEH_SaveXMM: [ 0.00 0.00 ]
+Key: SEH_SetFrame: [ 0.00 0.00 ]
+Key: SEH_StackAlign: [ 0.00 0.00 ]
+Key: SEH_StackAlloc: [ 0.00 0.00 ]
+Key: SEH_UnwindV: [ 0.00 0.00 ]
+Key: SEH_UnwindVersion: [ 0.00 0.00 ]
+Key: SENDUIPI: [ 0.00 0.00 ]
+Key: SERIALIZE: [ 0.00 0.00 ]
+Key: SETB_C: [ 0.00 0.00 ]
+Key: SETCCm: [ 0.00 0.00 ]
+Key: SETCCm_EVEX: [ 0.00 0.00 ]
+Key: SETCCr: [ 0.00 0.00 ]
+Key: SETCCr_EVEX: [ 0.00 0.00 ]
+Key: SETSSBSY: [ 0.00 0.00 ]
+Key: SETZUCCm: [ 0.00 0.00 ]
+Key: SETZUCCr: [ 0.00 0.00 ]
+Key: SFENCE: [ 0.00 0.00 ]
+Key: SGDT: [ 0.00 0.00 ]
+Key: SHA: [ 0.00 0.00 ]
+Key: SHL: [ 0.00 0.00 ]
+Key: SHLD: [ 0.00 0.00 ]
+Key: SHLDROT: [ 0.00 0.00 ]
+Key: SHLX: [ 0.00 0.00 ]
+Key: SHR: [ 0.00 0.00 ]
+Key: SHRD: [ 0.00 0.00 ]
+Key: SHRDROT: [ 0.00 0.00 ]
+Key: SHRX: [ 0.00 0.00 ]
+Key: SHUFPDrmi: [ 0.00 0.00 ]
+Key: SHUFPDrri: [ 0.00 0.00 ]
+Key: SHUFPSrmi: [ 0.00 0.00 ]
+Key: SHUFPSrri: [ 0.00 0.00 ]
+Key: SIDT: [ 0.00 0.00 ]
+Key: SKINIT: [ 0.00 0.00 ]
+Key: SLDT: [ 0.00 0.00 ]
+Key: SLWPCB: [ 0.00 0.00 ]
+Key: SMSW: [ 0.00 0.00 ]
+Key: SQRTPDm: [ 0.00 0.00 ]
+Key: SQRTPDr: [ 0.00 0.00 ]
+Key: SQRTPSm: [ 0.00 0.00 ]
+Key: SQRTPSr: [ 0.00 0.00 ]
+Key: SQRTSDm: [ 0.00 0.00 ]
+Key: SQRTSDm_Int: [ 0.00 0.00 ]
+Key: SQRTSDr: [ 0.00 0.00 ]
+Key: SQRTSDr_Int: [ 0.00 0.00 ]
+Key: SQRTSSm: [ 0.00 0.00 ]
+Key: SQRTSSm_Int: [ 0.00 0.00 ]
+Key: SQRTSSr: [ 0.00 0.00 ]
+Key: SQRTSSr_Int: [ 0.00 0.00 ]
+Key: SQRT_F: [ 0.00 0.00 ]
+Key: SQRT_Fp: [ 0.00 0.00 ]
+Key: SS_PREFIX: [ 0.00 0.00 ]
+Key: STAC: [ 0.00 0.00 ]
+Key: STACKALLOC_W_PROBING: [ 0.00 0.00 ]
+Key: STACKMAP: [ 0.00 0.00 ]
+Key: STATEPOINT: [ 0.00 0.00 ]
+Key: STC: [ 0.00 0.00 ]
+Key: STD: [ 0.00 0.00 ]
+Key: STGI: [ 0.00 0.00 ]
+Key: STI: [ 0.00 0.00 ]
+Key: STMXCSR: [ 0.00 0.00 ]
+Key: STOSB: [ 0.00 0.00 ]
+Key: STOSL: [ 0.00 0.00 ]
+Key: STOSQ: [ 0.00 0.00 ]
+Key: STOSW: [ 0.00 0.00 ]
+Key: STR: [ 0.00 0.00 ]
+Key: STRm: [ 0.00 0.00 ]
+Key: STTILECFG: [ 0.00 0.00 ]
+Key: STTILECFG_EVEX: [ 0.00 0.00 ]
+Key: STUI: [ 0.00 0.00 ]
+Key: ST_F: [ 0.00 0.00 ]
+Key: ST_FP: [ 0.00 0.00 ]
+Key: ST_FPrr: [ 0.00 0.00 ]
+Key: ST_Fp: [ 0.00 0.00 ]
+Key: ST_FpP: [ 0.00 0.00 ]
+Key: ST_Frr: [ 0.00 0.00 ]
+Key: SUB: [ 0.00 0.00 ]
+Key: SUBPDrm: [ 0.00 0.00 ]
+Key: SUBPDrr: [ 0.00 0.00 ]
+Key: SUBPSrm: [ 0.00 0.00 ]
+Key: SUBPSrr: [ 0.00 0.00 ]
+Key: SUBREG_TO_REG: [ 0.00 0.00 ]
+Key: SUBR_F: [ 0.00 0.00 ]
+Key: SUBR_FI: [ 0.00 0.00 ]
+Key: SUBR_FPrST: [ 0.00 0.00 ]
+Key: SUBR_FST: [ 0.00 0.00 ]
+Key: SUBR_Fp: [ 0.00 0.00 ]
+Key: SUBR_FpI: [ 0.00 0.00 ]
+Key: SUBR_FrST: [ 0.00 0.00 ]
+Key: SUBSDrm: [ 0.00 0.00 ]
+Key: SUBSDrm_Int: [ 0.00 0.00 ]
+Key: SUBSDrr: [ 0.00 0.00 ]
+Key: SUBSDrr_Int: [ 0.00 0.00 ]
+Key: SUBSSrm: [ 0.00 0.00 ]
+Key: SUBSSrm_Int: [ 0.00 0.00 ]
+Key: SUBSSrr: [ 0.00 0.00 ]
+Key: SUBSSrr_Int: [ 0.00 0.00 ]
+Key: SUB_F: [ 0.00 0.00 ]
+Key: SUB_FI: [ 0.00 0.00 ]
+Key: SUB_FPrST: [ 0.00 0.00 ]
+Key: SUB_FST: [ 0.00 0.00 ]
+Key: SUB_Fp: [ 0.00 0.00 ]
+Key: SUB_FpI: [ 0.00 0.00 ]
+Key: SUB_FrST: [ 0.00 0.00 ]
+Key: SWAPGS: [ 0.00 0.00 ]
+Key: SYSCALL: [ 0.00 0.00 ]
+Key: SYSENTER: [ 0.00 0.00 ]
+Key: SYSEXIT: [ 0.00 0.00 ]
+Key: SYSRET: [ 0.00 0.00 ]
+Key: T: [ 0.00 0.00 ]
+Key: TAILJMPd: [ 0.00 0.00 ]
+Key: TAILJMPd_CC: [ 0.00 0.00 ]
+Key: TAILJMPm: [ 0.00 0.00 ]
+Key: TAILJMPr: [ 0.00 0.00 ]
+Key: TCMMIMFP: [ 0.00 0.00 ]
+Key: TCMMRLFP: [ 0.00 0.00 ]
+Key: TCONJTCMMIMFP: [ 0.00 0.00 ]
+Key: TCONJTFP: [ 0.00 0.00 ]
+Key: TCRETURN_HIPE: [ 0.00 0.00 ]
+Key: TCRETURN_WIN: [ 0.00 0.00 ]
+Key: TCRETURN_WINmi: [ 0.00 0.00 ]
+Key: TCRETURNdi: [ 0.00 0.00 ]
+Key: TCRETURNdicc: [ 0.00 0.00 ]
+Key: TCRETURNmi: [ 0.00 0.00 ]
+Key: TCRETURNri: [ 0.00 0.00 ]
+Key: TCVTROWD: [ 0.00 0.00 ]
+Key: TCVTROWPS: [ 0.00 0.00 ]
+Key: TDCALL: [ 0.00 0.00 ]
+Key: TDPBF: [ 0.00 0.00 ]
+Key: TDPBHF: [ 0.00 0.00 ]
+Key: TDPBSSD: [ 0.00 0.00 ]
+Key: TDPBSUD: [ 0.00 0.00 ]
+Key: TDPBUSD: [ 0.00 0.00 ]
+Key: TDPBUUD: [ 0.00 0.00 ]
+Key: TDPFP: [ 0.00 0.00 ]
+Key: TDPHBF: [ 0.00 0.00 ]
+Key: TDPHF: [ 0.00 0.00 ]
+Key: TEST: [ 0.00 0.00 ]
+Key: TESTUI: [ 0.00 0.00 ]
+Key: TILELOADD: [ 0.00 0.00 ]
+Key: TILELOADDRS: [ 0.00 0.00 ]
+Key: TILELOADDRST: [ 0.00 0.00 ]
+Key: TILELOADDRS_EVEX: [ 0.00 0.00 ]
+Key: TILELOADDT: [ 0.00 0.00 ]
+Key: TILELOADD_EVEX: [ 0.00 0.00 ]
+Key: TILEMOVROWrre: [ 0.00 0.00 ]
+Key: TILEMOVROWrri: [ 0.00 0.00 ]
+Key: TILERELEASE: [ 0.00 0.00 ]
+Key: TILESTORED: [ 0.00 0.00 ]
+Key: TILESTORED_EVEX: [ 0.00 0.00 ]
+Key: TILEZERO: [ 0.00 0.00 ]
+Key: TLBSYNC: [ 0.00 0.00 ]
+Key: TLSCall: [ 0.00 0.00 ]
+Key: TLS_addr: [ 0.00 0.00 ]
+Key: TLS_addrX: [ 0.00 0.00 ]
+Key: TLS_base_addr: [ 0.00 0.00 ]
+Key: TLS_base_addrX: [ 0.00 0.00 ]
+Key: TLS_desc: [ 0.00 0.00 ]
+Key: TMMULTF: [ 0.00 0.00 ]
+Key: TPAUSE: [ 0.00 0.00 ]
+Key: TRAP: [ 0.00 0.00 ]
+Key: TST_F: [ 0.00 0.00 ]
+Key: TST_Fp: [ 0.00 0.00 ]
+Key: TTCMMIMFP: [ 0.00 0.00 ]
+Key: TTCMMRLFP: [ 0.00 0.00 ]
+Key: TTDPBF: [ 0.00 0.00 ]
+Key: TTDPFP: [ 0.00 0.00 ]
+Key: TTMMULTF: [ 0.00 0.00 ]
+Key: TTRANSPOSED: [ 0.00 0.00 ]
+Key: TZCNT: [ 0.00 0.00 ]
+Key: TZMSK: [ 0.00 0.00 ]
+Key: UBSAN_UD: [ 0.00 0.00 ]
+Key: UCOMISDrm: [ 0.00 0.00 ]
+Key: UCOMISDrm_Int: [ 0.00 0.00 ]
+Key: UCOMISDrr: [ 0.00 0.00 ]
+Key: UCOMISDrr_Int: [ 0.00 0.00 ]
+Key: UCOMISSrm: [ 0.00 0.00 ]
+Key: UCOMISSrm_Int: [ 0.00 0.00 ]
+Key: UCOMISSrr: [ 0.00 0.00 ]
+Key: UCOMISSrr_Int: [ 0.00 0.00 ]
+Key: UCOM_FIPr: [ 0.00 0.00 ]
+Key: UCOM_FIr: [ 0.00 0.00 ]
+Key: UCOM_FPPr: [ 0.00 0.00 ]
+Key: UCOM_FPr: [ 0.00 0.00 ]
+Key: UCOM_FpIr: [ 0.00 0.00 ]
+Key: UCOM_Fpr: [ 0.00 0.00 ]
+Key: UCOM_Fr: [ 0.00 0.00 ]
+Key: UD: [ 0.00 0.00 ]
+Key: UIRET: [ 0.00 0.00 ]
+Key: UMONITOR: [ 0.00 0.00 ]
+Key: UMWAIT: [ 0.00 0.00 ]
+Key: UNPCKHPDrm: [ 0.00 0.00 ]
+Key: UNPCKHPDrr: [ 0.00 0.00 ]
+Key: UNPCKHPSrm: [ 0.00 0.00 ]
+Key: UNPCKHPSrr: [ 0.00 0.00 ]
+Key: UNPCKLPDrm: [ 0.00 0.00 ]
+Key: UNPCKLPDrr: [ 0.00 0.00 ]
+Key: UNPCKLPSrm: [ 0.00 0.00 ]
+Key: UNPCKLPSrr: [ 0.00 0.00 ]
+Key: URDMSRri: [ 0.00 0.00 ]
+Key: URDMSRri_EVEX: [ 0.00 0.00 ]
+Key: URDMSRrr: [ 0.00 0.00 ]
+Key: URDMSRrr_EVEX: [ 0.00 0.00 ]
+Key: UWRMSRir: [ 0.00 0.00 ]
+Key: UWRMSRir_EVEX: [ 0.00 0.00 ]
+Key: UWRMSRrr: [ 0.00 0.00 ]
+Key: UWRMSRrr_EVEX: [ 0.00 0.00 ]
+Key: V: [ 0.00 0.00 ]
+Key: VAARG: [ 0.00 0.00 ]
+Key: VAARG_X: [ 0.00 0.00 ]
+Key: VADDBF: [ 0.00 0.00 ]
+Key: VADDPDYrm: [ 0.00 0.00 ]
+Key: VADDPDYrr: [ 0.00 0.00 ]
+Key: VADDPDZ: [ 0.00 0.00 ]
+Key: VADDPDZrm: [ 0.00 0.00 ]
+Key: VADDPDZrmb: [ 0.00 0.00 ]
+Key: VADDPDZrmbk: [ 0.00 0.00 ]
+Key: VADDPDZrmbkz: [ 0.00 0.00 ]
+Key: VADDPDZrmk: [ 0.00 0.00 ]
+Key: VADDPDZrmkz: [ 0.00 0.00 ]
+Key: VADDPDZrr: [ 0.00 0.00 ]
+Key: VADDPDZrrb: [ 0.00 0.00 ]
+Key: VADDPDZrrbk: [ 0.00 0.00 ]
+Key: VADDPDZrrbkz: [ 0.00 0.00 ]
+Key: VADDPDZrrk: [ 0.00 0.00 ]
+Key: VADDPDZrrkz: [ 0.00 0.00 ]
+Key: VADDPDrm: [ 0.00 0.00 ]
+Key: VADDPDrr: [ 0.00 0.00 ]
+Key: VADDPHZ: [ 0.00 0.00 ]
+Key: VADDPHZrm: [ 0.00 0.00 ]
+Key: VADDPHZrmb: [ 0.00 0.00 ]
+Key: VADDPHZrmbk: [ 0.00 0.00 ]
+Key: VADDPHZrmbkz: [ 0.00 0.00 ]
+Key: VADDPHZrmk: [ 0.00 0.00 ]
+Key: VADDPHZrmkz: [ 0.00 0.00 ]
+Key: VADDPHZrr: [ 0.00 0.00 ]
+Key: VADDPHZrrb: [ 0.00 0.00 ]
+Key: VADDPHZrrbk: [ 0.00 0.00 ]
+Key: VADDPHZrrbkz: [ 0.00 0.00 ]
+Key: VADDPHZrrk: [ 0.00 0.00 ]
+Key: VADDPHZrrkz: [ 0.00 0.00 ]
+Key: VADDPSYrm: [ 0.00 0.00 ]
+Key: VADDPSYrr: [ 0.00 0.00 ]
+Key: VADDPSZ: [ 0.00 0.00 ]
+Key: VADDPSZrm: [ 0.00 0.00 ]
+Key: VADDPSZrmb: [ 0.00 0.00 ]
+Key: VADDPSZrmbk: [ 0.00 0.00 ]
+Key: VADDPSZrmbkz: [ 0.00 0.00 ]
+Key: VADDPSZrmk: [ 0.00 0.00 ]
+Key: VADDPSZrmkz: [ 0.00 0.00 ]
+Key: VADDPSZrr: [ 0.00 0.00 ]
+Key: VADDPSZrrb: [ 0.00 0.00 ]
+Key: VADDPSZrrbk: [ 0.00 0.00 ]
+Key: VADDPSZrrbkz: [ 0.00 0.00 ]
+Key: VADDPSZrrk: [ 0.00 0.00 ]
+Key: VADDPSZrrkz: [ 0.00 0.00 ]
+Key: VADDPSrm: [ 0.00 0.00 ]
+Key: VADDPSrr: [ 0.00 0.00 ]
+Key: VADDSDZrm: [ 0.00 0.00 ]
+Key: VADDSDZrm_Int: [ 0.00 0.00 ]
+Key: VADDSDZrmk_Int: [ 0.00 0.00 ]
+Key: VADDSDZrmkz_Int: [ 0.00 0.00 ]
+Key: VADDSDZrr: [ 0.00 0.00 ]
+Key: VADDSDZrr_Int: [ 0.00 0.00 ]
+Key: VADDSDZrrb_Int: [ 0.00 0.00 ]
+Key: VADDSDZrrbk_Int: [ 0.00 0.00 ]
+Key: VADDSDZrrbkz_Int: [ 0.00 0.00 ]
+Key: VADDSDZrrk_Int: [ 0.00 0.00 ]
+Key: VADDSDZrrkz_Int: [ 0.00 0.00 ]
+Key: VADDSDrm: [ 0.00 0.00 ]
+Key: VADDSDrm_Int: [ 0.00 0.00 ]
+Key: VADDSDrr: [ 0.00 0.00 ]
+Key: VADDSDrr_Int: [ 0.00 0.00 ]
+Key: VADDSHZrm: [ 0.00 0.00 ]
+Key: VADDSHZrm_Int: [ 0.00 0.00 ]
+Key: VADDSHZrmk_Int: [ 0.00 0.00 ]
+Key: VADDSHZrmkz_Int: [ 0.00 0.00 ]
+Key: VADDSHZrr: [ 0.00 0.00 ]
+Key: VADDSHZrr_Int: [ 0.00 0.00 ]
+Key: VADDSHZrrb_Int: [ 0.00 0.00 ]
+Key: VADDSHZrrbk_Int: [ 0.00 0.00 ]
+Key: VADDSHZrrbkz_Int: [ 0.00 0.00 ]
+Key: VADDSHZrrk_Int: [ 0.00 0.00 ]
+Key: VADDSHZrrkz_Int: [ 0.00 0.00 ]
+Key: VADDSSZrm: [ 0.00 0.00 ]
+Key: VADDSSZrm_Int: [ 0.00 0.00 ]
+Key: VADDSSZrmk_Int: [ 0.00 0.00 ]
+Key: VADDSSZrmkz_Int: [ 0.00 0.00 ]
+Key: VADDSSZrr: [ 0.00 0.00 ]
+Key: VADDSSZrr_Int: [ 0.00 0.00 ]
+Key: VADDSSZrrb_Int: [ 0.00 0.00 ]
+Key: VADDSSZrrbk_Int: [ 0.00 0.00 ]
+Key: VADDSSZrrbkz_Int: [ 0.00 0.00 ]
+Key: VADDSSZrrk_Int: [ 0.00 0.00 ]
+Key: VADDSSZrrkz_Int: [ 0.00 0.00 ]
+Key: VADDSSrm: [ 0.00 0.00 ]
+Key: VADDSSrm_Int: [ 0.00 0.00 ]
+Key: VADDSSrr: [ 0.00 0.00 ]
+Key: VADDSSrr_Int: [ 0.00 0.00 ]
+Key: VADDSUBPDYrm: [ 0.00 0.00 ]
+Key: VADDSUBPDYrr: [ 0.00 0.00 ]
+Key: VADDSUBPDrm: [ 0.00 0.00 ]
+Key: VADDSUBPDrr: [ 0.00 0.00 ]
+Key: VADDSUBPSYrm: [ 0.00 0.00 ]
+Key: VADDSUBPSYrr: [ 0.00 0.00 ]
+Key: VADDSUBPSrm: [ 0.00 0.00 ]
+Key: VADDSUBPSrr: [ 0.00 0.00 ]
+Key: VAESDECLASTYrm: [ 0.00 0.00 ]
+Key: VAESDECLASTYrr: [ 0.00 0.00 ]
+Key: VAESDECLASTZ: [ 0.00 0.00 ]
+Key: VAESDECLASTZrm: [ 0.00 0.00 ]
+Key: VAESDECLASTZrr: [ 0.00 0.00 ]
+Key: VAESDECLASTrm: [ 0.00 0.00 ]
+Key: VAESDECLASTrr: [ 0.00 0.00 ]
+Key: VAESDECYrm: [ 0.00 0.00 ]
+Key: VAESDECYrr: [ 0.00 0.00 ]
+Key: VAESDECZ: [ 0.00 0.00 ]
+Key: VAESDECZrm: [ 0.00 0.00 ]
+Key: VAESDECZrr: [ 0.00 0.00 ]
+Key: VAESDECrm: [ 0.00 0.00 ]
+Key: VAESDECrr: [ 0.00 0.00 ]
+Key: VAESENCLASTYrm: [ 0.00 0.00 ]
+Key: VAESENCLASTYrr: [ 0.00 0.00 ]
+Key: VAESENCLASTZ: [ 0.00 0.00 ]
+Key: VAESENCLASTZrm: [ 0.00 0.00 ]
+Key: VAESENCLASTZrr: [ 0.00 0.00 ]
+Key: VAESENCLASTrm: [ 0.00 0.00 ]
+Key: VAESENCLASTrr: [ 0.00 0.00 ]
+Key: VAESENCYrm: [ 0.00 0.00 ]
+Key: VAESENCYrr: [ 0.00 0.00 ]
+Key: VAESENCZ: [ 0.00 0.00 ]
+Key: VAESENCZrm: [ 0.00 0.00 ]
+Key: VAESENCZrr: [ 0.00 0.00 ]
+Key: VAESENCrm: [ 0.00 0.00 ]
+Key: VAESENCrr: [ 0.00 0.00 ]
+Key: VAESIMCrm: [ 0.00 0.00 ]
+Key: VAESIMCrr: [ 0.00 0.00 ]
+Key: VAESKEYGENASSISTrmi: [ 0.00 0.00 ]
+Key: VAESKEYGENASSISTrri: [ 0.00 0.00 ]
+Key: VALIGNDZ: [ 0.00 0.00 ]
+Key: VALIGNDZrmbi: [ 0.00 0.00 ]
+Key: VALIGNDZrmbik: [ 0.00 0.00 ]
+Key: VALIGNDZrmbikz: [ 0.00 0.00 ]
+Key: VALIGNDZrmi: [ 0.00 0.00 ]
+Key: VALIGNDZrmik: [ 0.00 0.00 ]
+Key: VALIGNDZrmikz: [ 0.00 0.00 ]
+Key: VALIGNDZrri: [ 0.00 0.00 ]
+Key: VALIGNDZrrik: [ 0.00 0.00 ]
+Key: VALIGNDZrrikz: [ 0.00 0.00 ]
+Key: VALIGNQZ: [ 0.00 0.00 ]
+Key: VALIGNQZrmbi: [ 0.00 0.00 ]
+Key: VALIGNQZrmbik: [ 0.00 0.00 ]
+Key: VALIGNQZrmbikz: [ 0.00 0.00 ]
+Key: VALIGNQZrmi: [ 0.00 0.00 ]
+Key: VALIGNQZrmik: [ 0.00 0.00 ]
+Key: VALIGNQZrmikz: [ 0.00 0.00 ]
+Key: VALIGNQZrri: [ 0.00 0.00 ]
+Key: VALIGNQZrrik: [ 0.00 0.00 ]
+Key: VALIGNQZrrikz: [ 0.00 0.00 ]
+Key: VANDNPDYrm: [ 0.00 0.00 ]
+Key: VANDNPDYrr: [ 0.00 0.00 ]
+Key: VANDNPDZ: [ 0.00 0.00 ]
+Key: VANDNPDZrm: [ 0.00 0.00 ]
+Key: VANDNPDZrmb: [ 0.00 0.00 ]
+Key: VANDNPDZrmbk: [ 0.00 0.00 ]
+Key: VANDNPDZrmbkz: [ 0.00 0.00 ]
+Key: VANDNPDZrmk: [ 0.00 0.00 ]
+Key: VANDNPDZrmkz: [ 0.00 0.00 ]
+Key: VANDNPDZrr: [ 0.00 0.00 ]
+Key: VANDNPDZrrk: [ 0.00 0.00 ]
+Key: VANDNPDZrrkz: [ 0.00 0.00 ]
+Key: VANDNPDrm: [ 0.00 0.00 ]
+Key: VANDNPDrr: [ 0.00 0.00 ]
+Key: VANDNPSYrm: [ 0.00 0.00 ]
+Key: VANDNPSYrr: [ 0.00 0.00 ]
+Key: VANDNPSZ: [ 0.00 0.00 ]
+Key: VANDNPSZrm: [ 0.00 0.00 ]
+Key: VANDNPSZrmb: [ 0.00 0.00 ]
+Key: VANDNPSZrmbk: [ 0.00 0.00 ]
+Key: VANDNPSZrmbkz: [ 0.00 0.00 ]
+Key: VANDNPSZrmk: [ 0.00 0.00 ]
+Key: VANDNPSZrmkz: [ 0.00 0.00 ]
+Key: VANDNPSZrr: [ 0.00 0.00 ]
+Key: VANDNPSZrrk: [ 0.00 0.00 ]
+Key: VANDNPSZrrkz: [ 0.00 0.00 ]
+Key: VANDNPSrm: [ 0.00 0.00 ]
+Key: VANDNPSrr: [ 0.00 0.00 ]
+Key: VANDPDYrm: [ 0.00 0.00 ]
+Key: VANDPDYrr: [ 0.00 0.00 ]
+Key: VANDPDZ: [ 0.00 0.00 ]
+Key: VANDPDZrm: [ 0.00 0.00 ]
+Key: VANDPDZrmb: [ 0.00 0.00 ]
+Key: VANDPDZrmbk: [ 0.00 0.00 ]
+Key: VANDPDZrmbkz: [ 0.00 0.00 ]
+Key: VANDPDZrmk: [ 0.00 0.00 ]
+Key: VANDPDZrmkz: [ 0.00 0.00 ]
+Key: VANDPDZrr: [ 0.00 0.00 ]
+Key: VANDPDZrrk: [ 0.00 0.00 ]
+Key: VANDPDZrrkz: [ 0.00 0.00 ]
+Key: VANDPDrm: [ 0.00 0.00 ]
+Key: VANDPDrr: [ 0.00 0.00 ]
+Key: VANDPSYrm: [ 0.00 0.00 ]
+Key: VANDPSYrr: [ 0.00 0.00 ]
+Key: VANDPSZ: [ 0.00 0.00 ]
+Key: VANDPSZrm: [ 0.00 0.00 ]
+Key: VANDPSZrmb: [ 0.00 0.00 ]
+Key: VANDPSZrmbk: [ 0.00 0.00 ]
+Key: VANDPSZrmbkz: [ 0.00 0.00 ]
+Key: VANDPSZrmk: [ 0.00 0.00 ]
+Key: VANDPSZrmkz: [ 0.00 0.00 ]
+Key: VANDPSZrr: [ 0.00 0.00 ]
+Key: VANDPSZrrk: [ 0.00 0.00 ]
+Key: VANDPSZrrkz: [ 0.00 0.00 ]
+Key: VANDPSrm: [ 0.00 0.00 ]
+Key: VANDPSrr: [ 0.00 0.00 ]
+Key: VASTART_SAVE_XMM_REGS: [ 0.00 0.00 ]
+Key: VBCSTNEBF: [ 0.00 0.00 ]
+Key: VBCSTNESH: [ 0.00 0.00 ]
+Key: VBLENDMPDZ: [ 0.00 0.00 ]
+Key: VBLENDMPDZrm: [ 0.00 0.00 ]
+Key: VBLENDMPDZrmb: [ 0.00 0.00 ]
+Key: VBLENDMPDZrmbk: [ 0.00 0.00 ]
+Key: VBLENDMPDZrmbkz: [ 0.00 0.00 ]
+Key: VBLENDMPDZrmk: [ 0.00 0.00 ]
+Key: VBLENDMPDZrmkz: [ 0.00 0.00 ]
+Key: VBLENDMPDZrr: [ 0.00 0.00 ]
+Key: VBLENDMPDZrrk: [ 0.00 0.00 ]
+Key: VBLENDMPDZrrkz: [ 0.00 0.00 ]
+Key: VBLENDMPSZ: [ 0.00 0.00 ]
+Key: VBLENDMPSZrm: [ 0.00 0.00 ]
+Key: VBLENDMPSZrmb: [ 0.00 0.00 ]
+Key: VBLENDMPSZrmbk: [ 0.00 0.00 ]
+Key: VBLENDMPSZrmbkz: [ 0.00 0.00 ]
+Key: VBLENDMPSZrmk: [ 0.00 0.00 ]
+Key: VBLENDMPSZrmkz: [ 0.00 0.00 ]
+Key: VBLENDMPSZrr: [ 0.00 0.00 ]
+Key: VBLENDMPSZrrk: [ 0.00 0.00 ]
+Key: VBLENDMPSZrrkz: [ 0.00 0.00 ]
+Key: VBLENDPDYrmi: [ 0.00 0.00 ]
+Key: VBLENDPDYrri: [ 0.00 0.00 ]
+Key: VBLENDPDrmi: [ 0.00 0.00 ]
+Key: VBLENDPDrri: [ 0.00 0.00 ]
+Key: VBLENDPSYrmi: [ 0.00 0.00 ]
+Key: VBLENDPSYrri: [ 0.00 0.00 ]
+Key: VBLENDPSrmi: [ 0.00 0.00 ]
+Key: VBLENDPSrri: [ 0.00 0.00 ]
+Key: VBLENDVPDYrmr: [ 0.00 0.00 ]
+Key: VBLENDVPDYrrr: [ 0.00 0.00 ]
+Key: VBLENDVPDrmr: [ 0.00 0.00 ]
+Key: VBLENDVPDrrr: [ 0.00 0.00 ]
+Key: VBLENDVPSYrmr: [ 0.00 0.00 ]
+Key: VBLENDVPSYrrr: [ 0.00 0.00 ]
+Key: VBLENDVPSrmr: [ 0.00 0.00 ]
+Key: VBLENDVPSrrr: [ 0.00 0.00 ]
+Key: VBROADCASTF: [ 0.00 0.00 ]
+Key: VBROADCASTI: [ 0.00 0.00 ]
+Key: VBROADCASTSDYrm: [ 0.00 0.00 ]
+Key: VBROADCASTSDYrr: [ 0.00 0.00 ]
+Key: VBROADCASTSDZ: [ 0.00 0.00 ]
+Key: VBROADCASTSDZrm: [ 0.00 0.00 ]
+Key: VBROADCASTSDZrmk: [ 0.00 0.00 ]
+Key: VBROADCASTSDZrmkz: [ 0.00 0.00 ]
+Key: VBROADCASTSDZrr: [ 0.00 0.00 ]
+Key: VBROADCASTSDZrrk: [ 0.00 0.00 ]
+Key: VBROADCASTSDZrrkz: [ 0.00 0.00 ]
+Key: VBROADCASTSSYrm: [ 0.00 0.00 ]
+Key: VBROADCASTSSYrr: [ 0.00 0.00 ]
+Key: VBROADCASTSSZ: [ 0.00 0.00 ]
+Key: VBROADCASTSSZrm: [ 0.00 0.00 ]
+Key: VBROADCASTSSZrmk: [ 0.00 0.00 ]
+Key: VBROADCASTSSZrmkz: [ 0.00 0.00 ]
+Key: VBROADCASTSSZrr: [ 0.00 0.00 ]
+Key: VBROADCASTSSZrrk: [ 0.00 0.00 ]
+Key: VBROADCASTSSZrrkz: [ 0.00 0.00 ]
+Key: VBROADCASTSSrm: [ 0.00 0.00 ]
+Key: VBROADCASTSSrr: [ 0.00 0.00 ]
+Key: VCMPBF: [ 0.00 0.00 ]
+Key: VCMPPDYrmi: [ 0.00 0.00 ]
+Key: VCMPPDYrri: [ 0.00 0.00 ]
+Key: VCMPPDZ: [ 0.00 0.00 ]
+Key: VCMPPDZrmbi: [ 0.00 0.00 ]
+Key: VCMPPDZrmbik: [ 0.00 0.00 ]
+Key: VCMPPDZrmi: [ 0.00 0.00 ]
+Key: VCMPPDZrmik: [ 0.00 0.00 ]
+Key: VCMPPDZrri: [ 0.00 0.00 ]
+Key: VCMPPDZrrib: [ 0.00 0.00 ]
+Key: VCMPPDZrribk: [ 0.00 0.00 ]
+Key: VCMPPDZrrik: [ 0.00 0.00 ]
+Key: VCMPPDrmi: [ 0.00 0.00 ]
+Key: VCMPPDrri: [ 0.00 0.00 ]
+Key: VCMPPHZ: [ 0.00 0.00 ]
+Key: VCMPPHZrmbi: [ 0.00 0.00 ]
+Key: VCMPPHZrmbik: [ 0.00 0.00 ]
+Key: VCMPPHZrmi: [ 0.00 0.00 ]
+Key: VCMPPHZrmik: [ 0.00 0.00 ]
+Key: VCMPPHZrri: [ 0.00 0.00 ]
+Key: VCMPPHZrrib: [ 0.00 0.00 ]
+Key: VCMPPHZrribk: [ 0.00 0.00 ]
+Key: VCMPPHZrrik: [ 0.00 0.00 ]
+Key: VCMPPSYrmi: [ 0.00 0.00 ]
+Key: VCMPPSYrri: [ 0.00 0.00 ]
+Key: VCMPPSZ: [ 0.00 0.00 ]
+Key: VCMPPSZrmbi: [ 0.00 0.00 ]
+Key: VCMPPSZrmbik: [ 0.00 0.00 ]
+Key: VCMPPSZrmi: [ 0.00 0.00 ]
+Key: VCMPPSZrmik: [ 0.00 0.00 ]
+Key: VCMPPSZrri: [ 0.00 0.00 ]
+Key: VCMPPSZrrib: [ 0.00 0.00 ]
+Key: VCMPPSZrribk: [ 0.00 0.00 ]
+Key: VCMPPSZrrik: [ 0.00 0.00 ]
+Key: VCMPPSrmi: [ 0.00 0.00 ]
+Key: VCMPPSrri: [ 0.00 0.00 ]
+Key: VCMPSDZrmi: [ 0.00 0.00 ]
+Key: VCMPSDZrmi_Int: [ 0.00 0.00 ]
+Key: VCMPSDZrmik_Int: [ 0.00 0.00 ]
+Key: VCMPSDZrri: [ 0.00 0.00 ]
+Key: VCMPSDZrri_Int: [ 0.00 0.00 ]
+Key: VCMPSDZrrib_Int: [ 0.00 0.00 ]
+Key: VCMPSDZrribk_Int: [ 0.00 0.00 ]
+Key: VCMPSDZrrik_Int: [ 0.00 0.00 ]
+Key: VCMPSDrmi: [ 0.00 0.00 ]
+Key: VCMPSDrmi_Int: [ 0.00 0.00 ]
+Key: VCMPSDrri: [ 0.00 0.00 ]
+Key: VCMPSDrri_Int: [ 0.00 0.00 ]
+Key: VCMPSHZrmi: [ 0.00 0.00 ]
+Key: VCMPSHZrmi_Int: [ 0.00 0.00 ]
+Key: VCMPSHZrmik_Int: [ 0.00 0.00 ]
+Key: VCMPSHZrri: [ 0.00 0.00 ]
+Key: VCMPSHZrri_Int: [ 0.00 0.00 ]
+Key: VCMPSHZrrib_Int: [ 0.00 0.00 ]
+Key: VCMPSHZrribk_Int: [ 0.00 0.00 ]
+Key: VCMPSHZrrik_Int: [ 0.00 0.00 ]
+Key: VCMPSSZrmi: [ 0.00 0.00 ]
+Key: VCMPSSZrmi_Int: [ 0.00 0.00 ]
+Key: VCMPSSZrmik_Int: [ 0.00 0.00 ]
+Key: VCMPSSZrri: [ 0.00 0.00 ]
+Key: VCMPSSZrri_Int: [ 0.00 0.00 ]
+Key: VCMPSSZrrib_Int: [ 0.00 0.00 ]
+Key: VCMPSSZrribk_Int: [ 0.00 0.00 ]
+Key: VCMPSSZrrik_Int: [ 0.00 0.00 ]
+Key: VCMPSSrmi: [ 0.00 0.00 ]
+Key: VCMPSSrmi_Int: [ 0.00 0.00 ]
+Key: VCMPSSrri: [ 0.00 0.00 ]
+Key: VCMPSSrri_Int: [ 0.00 0.00 ]
+Key: VCOMISBF: [ 0.00 0.00 ]
+Key: VCOMISDZrm: [ 0.00 0.00 ]
+Key: VCOMISDZrm_Int: [ 0.00 0.00 ]
+Key: VCOMISDZrr: [ 0.00 0.00 ]
+Key: VCOMISDZrr_Int: [ 0.00 0.00 ]
+Key: VCOMISDZrrb: [ 0.00 0.00 ]
+Key: VCOMISDrm: [ 0.00 0.00 ]
+Key: VCOMISDrm_Int: [ 0.00 0.00 ]
+Key: VCOMISDrr: [ 0.00 0.00 ]
+Key: VCOMISDrr_Int: [ 0.00 0.00 ]
+Key: VCOMISHZrm: [ 0.00 0.00 ]
+Key: VCOMISHZrm_Int: [ 0.00 0.00 ]
+Key: VCOMISHZrr: [ 0.00 0.00 ]
+Key: VCOMISHZrr_Int: [ 0.00 0.00 ]
+Key: VCOMISHZrrb: [ 0.00 0.00 ]
+Key: VCOMISSZrm: [ 0.00 0.00 ]
+Key: VCOMISSZrm_Int: [ 0.00 0.00 ]
+Key: VCOMISSZrr: [ 0.00 0.00 ]
+Key: VCOMISSZrr_Int: [ 0.00 0.00 ]
+Key: VCOMISSZrrb: [ 0.00 0.00 ]
+Key: VCOMISSrm: [ 0.00 0.00 ]
+Key: VCOMISSrm_Int: [ 0.00 0.00 ]
+Key: VCOMISSrr: [ 0.00 0.00 ]
+Key: VCOMISSrr_Int: [ 0.00 0.00 ]
+Key: VCOMPRESSPDZ: [ 0.00 0.00 ]
+Key: VCOMPRESSPDZmr: [ 0.00 0.00 ]
+Key: VCOMPRESSPDZmrk: [ 0.00 0.00 ]
+Key: VCOMPRESSPDZrr: [ 0.00 0.00 ]
+Key: VCOMPRESSPDZrrk: [ 0.00 0.00 ]
+Key: VCOMPRESSPDZrrkz: [ 0.00 0.00 ]
+Key: VCOMPRESSPSZ: [ 0.00 0.00 ]
+Key: VCOMPRESSPSZmr: [ 0.00 0.00 ]
+Key: VCOMPRESSPSZmrk: [ 0.00 0.00 ]
+Key: VCOMPRESSPSZrr: [ 0.00 0.00 ]
+Key: VCOMPRESSPSZrrk: [ 0.00 0.00 ]
+Key: VCOMPRESSPSZrrkz: [ 0.00 0.00 ]
+Key: VCOMXSDZrm_Int: [ 0.00 0.00 ]
+Key: VCOMXSDZrr_Int: [ 0.00 0.00 ]
+Key: VCOMXSDZrrb_Int: [ 0.00 0.00 ]
+Key: VCOMXSHZrm_Int: [ 0.00 0.00 ]
+Key: VCOMXSHZrr_Int: [ 0.00 0.00 ]
+Key: VCOMXSHZrrb_Int: [ 0.00 0.00 ]
+Key: VCOMXSSZrm_Int: [ 0.00 0.00 ]
+Key: VCOMXSSZrr_Int: [ 0.00 0.00 ]
+Key: VCOMXSSZrrb_Int: [ 0.00 0.00 ]
+Key: VCVT: [ 0.00 0.00 ]
+Key: VCVTBF: [ 0.00 0.00 ]
+Key: VCVTBIASPH: [ 0.00 0.00 ]
+Key: VCVTDQ: [ 0.00 0.00 ]
+Key: VCVTHF: [ 0.00 0.00 ]
+Key: VCVTNE: [ 0.00 0.00 ]
+Key: VCVTNEEBF: [ 0.00 0.00 ]
+Key: VCVTNEEPH: [ 0.00 0.00 ]
+Key: VCVTNEOBF: [ 0.00 0.00 ]
+Key: VCVTNEOPH: [ 0.00 0.00 ]
+Key: VCVTNEPS: [ 0.00 0.00 ]
+Key: VCVTPD: [ 0.00 0.00 ]
+Key: VCVTPH: [ 0.00 0.00 ]
+Key: VCVTPS: [ 0.00 0.00 ]
+Key: VCVTQQ: [ 0.00 0.00 ]
+Key: VCVTSD: [ 0.00 0.00 ]
+Key: VCVTSH: [ 0.00 0.00 ]
+Key: VCVTSI: [ 0.00 0.00 ]
+Key: VCVTSS: [ 0.00 0.00 ]
+Key: VCVTTBF: [ 0.00 0.00 ]
+Key: VCVTTPD: [ 0.00 0.00 ]
+Key: VCVTTPH: [ 0.00 0.00 ]
+Key: VCVTTPS: [ 0.00 0.00 ]
+Key: VCVTTSD: [ 0.00 0.00 ]
+Key: VCVTTSH: [ 0.00 0.00 ]
+Key: VCVTTSS: [ 0.00 0.00 ]
+Key: VCVTUDQ: [ 0.00 0.00 ]
+Key: VCVTUQQ: [ 0.00 0.00 ]
+Key: VCVTUSI: [ 0.00 0.00 ]
+Key: VCVTUW: [ 0.00 0.00 ]
+Key: VCVTW: [ 0.00 0.00 ]
+Key: VDBPSADBWZ: [ 0.00 0.00 ]
+Key: VDBPSADBWZrmi: [ 0.00 0.00 ]
+Key: VDBPSADBWZrmik: [ 0.00 0.00 ]
+Key: VDBPSADBWZrmikz: [ 0.00 0.00 ]
+Key: VDBPSADBWZrri: [ 0.00 0.00 ]
+Key: VDBPSADBWZrrik: [ 0.00 0.00 ]
+Key: VDBPSADBWZrrikz: [ 0.00 0.00 ]
+Key: VDIVBF: [ 0.00 0.00 ]
+Key: VDIVPDYrm: [ 0.00 0.00 ]
+Key: VDIVPDYrr: [ 0.00 0.00 ]
+Key: VDIVPDZ: [ 0.00 0.00 ]
+Key: VDIVPDZrm: [ 0.00 0.00 ]
+Key: VDIVPDZrmb: [ 0.00 0.00 ]
+Key: VDIVPDZrmbk: [ 0.00 0.00 ]
+Key: VDIVPDZrmbkz: [ 0.00 0.00 ]
+Key: VDIVPDZrmk: [ 0.00 0.00 ]
+Key: VDIVPDZrmkz: [ 0.00 0.00 ]
+Key: VDIVPDZrr: [ 0.00 0.00 ]
+Key: VDIVPDZrrb: [ 0.00 0.00 ]
+Key: VDIVPDZrrbk: [ 0.00 0.00 ]
+Key: VDIVPDZrrbkz: [ 0.00 0.00 ]
+Key: VDIVPDZrrk: [ 0.00 0.00 ]
+Key: VDIVPDZrrkz: [ 0.00 0.00 ]
+Key: VDIVPDrm: [ 0.00 0.00 ]
+Key: VDIVPDrr: [ 0.00 0.00 ]
+Key: VDIVPHZ: [ 0.00 0.00 ]
+Key: VDIVPHZrm: [ 0.00 0.00 ]
+Key: VDIVPHZrmb: [ 0.00 0.00 ]
+Key: VDIVPHZrmbk: [ 0.00 0.00 ]
+Key: VDIVPHZrmbkz: [ 0.00 0.00 ]
+Key: VDIVPHZrmk: [ 0.00 0.00 ]
+Key: VDIVPHZrmkz: [ 0.00 0.00 ]
+Key: VDIVPHZrr: [ 0.00 0.00 ]
+Key: VDIVPHZrrb: [ 0.00 0.00 ]
+Key: VDIVPHZrrbk: [ 0.00 0.00 ]
+Key: VDIVPHZrrbkz: [ 0.00 0.00 ]
+Key: VDIVPHZrrk: [ 0.00 0.00 ]
+Key: VDIVPHZrrkz: [ 0.00 0.00 ]
+Key: VDIVPSYrm: [ 0.00 0.00 ]
+Key: VDIVPSYrr: [ 0.00 0.00 ]
+Key: VDIVPSZ: [ 0.00 0.00 ]
+Key: VDIVPSZrm: [ 0.00 0.00 ]
+Key: VDIVPSZrmb: [ 0.00 0.00 ]
+Key: VDIVPSZrmbk: [ 0.00 0.00 ]
+Key: VDIVPSZrmbkz: [ 0.00 0.00 ]
+Key: VDIVPSZrmk: [ 0.00 0.00 ]
+Key: VDIVPSZrmkz: [ 0.00 0.00 ]
+Key: VDIVPSZrr: [ 0.00 0.00 ]
+Key: VDIVPSZrrb: [ 0.00 0.00 ]
+Key: VDIVPSZrrbk: [ 0.00 0.00 ]
+Key: VDIVPSZrrbkz: [ 0.00 0.00 ]
+Key: VDIVPSZrrk: [ 0.00 0.00 ]
+Key: VDIVPSZrrkz: [ 0.00 0.00 ]
+Key: VDIVPSrm: [ 0.00 0.00 ]
+Key: VDIVPSrr: [ 0.00 0.00 ]
+Key: VDIVSDZrm: [ 0.00 0.00 ]
+Key: VDIVSDZrm_Int: [ 0.00 0.00 ]
+Key: VDIVSDZrmk_Int: [ 0.00 0.00 ]
+Key: VDIVSDZrmkz_Int: [ 0.00 0.00 ]
+Key: VDIVSDZrr: [ 0.00 0.00 ]
+Key: VDIVSDZrr_Int: [ 0.00 0.00 ]
+Key: VDIVSDZrrb_Int: [ 0.00 0.00 ]
+Key: VDIVSDZrrbk_Int: [ 0.00 0.00 ]
+Key: VDIVSDZrrbkz_Int: [ 0.00 0.00 ]
+Key: VDIVSDZrrk_Int: [ 0.00 0.00 ]
+Key: VDIVSDZrrkz_Int: [ 0.00 0.00 ]
+Key: VDIVSDrm: [ 0.00 0.00 ]
+Key: VDIVSDrm_Int: [ 0.00 0.00 ]
+Key: VDIVSDrr: [ 0.00 0.00 ]
+Key: VDIVSDrr_Int: [ 0.00 0.00 ]
+Key: VDIVSHZrm: [ 0.00 0.00 ]
+Key: VDIVSHZrm_Int: [ 0.00 0.00 ]
+Key: VDIVSHZrmk_Int: [ 0.00 0.00 ]
+Key: VDIVSHZrmkz_Int: [ 0.00 0.00 ]
+Key: VDIVSHZrr: [ 0.00 0.00 ]
+Key: VDIVSHZrr_Int: [ 0.00 0.00 ]
+Key: VDIVSHZrrb_Int: [ 0.00 0.00 ]
+Key: VDIVSHZrrbk_Int: [ 0.00 0.00 ]
+Key: VDIVSHZrrbkz_Int: [ 0.00 0.00 ]
+Key: VDIVSHZrrk_Int: [ 0.00 0.00 ]
+Key: VDIVSHZrrkz_Int: [ 0.00 0.00 ]
+Key: VDIVSSZrm: [ 0.00 0.00 ]
+Key: VDIVSSZrm_Int: [ 0.00 0.00 ]
+Key: VDIVSSZrmk_Int: [ 0.00 0.00 ]
+Key: VDIVSSZrmkz_Int: [ 0.00 0.00 ]
+Key: VDIVSSZrr: [ 0.00 0.00 ]
+Key: VDIVSSZrr_Int: [ 0.00 0.00 ]
+Key: VDIVSSZrrb_Int: [ 0.00 0.00 ]
+Key: VDIVSSZrrbk_Int: [ 0.00 0.00 ]
+Key: VDIVSSZrrbkz_Int: [ 0.00 0.00 ]
+Key: VDIVSSZrrk_Int: [ 0.00 0.00 ]
+Key: VDIVSSZrrkz_Int: [ 0.00 0.00 ]
+Key: VDIVSSrm: [ 0.00 0.00 ]
+Key: VDIVSSrm_Int: [ 0.00 0.00 ]
+Key: VDIVSSrr: [ 0.00 0.00 ]
+Key: VDIVSSrr_Int: [ 0.00 0.00 ]
+Key: VDPBF: [ 0.00 0.00 ]
+Key: VDPPDrmi: [ 0.00 0.00 ]
+Key: VDPPDrri: [ 0.00 0.00 ]
+Key: VDPPHPSZ: [ 0.00 0.00 ]
+Key: VDPPHPSZm: [ 0.00 0.00 ]
+Key: VDPPHPSZmb: [ 0.00 0.00 ]
+Key: VDPPHPSZmbk: [ 0.00 0.00 ]
+Key: VDPPHPSZmbkz: [ 0.00 0.00 ]
+Key: VDPPHPSZmk: [ 0.00 0.00 ]
+Key: VDPPHPSZmkz: [ 0.00 0.00 ]
+Key: VDPPHPSZr: [ 0.00 0.00 ]
+Key: VDPPHPSZrk: [ 0.00 0.00 ]
+Key: VDPPHPSZrkz: [ 0.00 0.00 ]
+Key: VDPPSYrmi: [ 0.00 0.00 ]
+Key: VDPPSYrri: [ 0.00 0.00 ]
+Key: VDPPSrmi: [ 0.00 0.00 ]
+Key: VDPPSrri: [ 0.00 0.00 ]
+Key: VERRm: [ 0.00 0.00 ]
+Key: VERRr: [ 0.00 0.00 ]
+Key: VERWm: [ 0.00 0.00 ]
+Key: VERWr: [ 0.00 0.00 ]
+Key: VEXP: [ 0.00 0.00 ]
+Key: VEXPANDPDZ: [ 0.00 0.00 ]
+Key: VEXPANDPDZrm: [ 0.00 0.00 ]
+Key: VEXPANDPDZrmk: [ 0.00 0.00 ]
+Key: VEXPANDPDZrmkz: [ 0.00 0.00 ]
+Key: VEXPANDPDZrr: [ 0.00 0.00 ]
+Key: VEXPANDPDZrrk: [ 0.00 0.00 ]
+Key: VEXPANDPDZrrkz: [ 0.00 0.00 ]
+Key: VEXPANDPSZ: [ 0.00 0.00 ]
+Key: VEXPANDPSZrm: [ 0.00 0.00 ]
+Key: VEXPANDPSZrmk: [ 0.00 0.00 ]
+Key: VEXPANDPSZrmkz: [ 0.00 0.00 ]
+Key: VEXPANDPSZrr: [ 0.00 0.00 ]
+Key: VEXPANDPSZrrk: [ 0.00 0.00 ]
+Key: VEXPANDPSZrrkz: [ 0.00 0.00 ]
+Key: VEXTRACTF: [ 0.00 0.00 ]
+Key: VEXTRACTI: [ 0.00 0.00 ]
+Key: VEXTRACTPSZmri: [ 0.00 0.00 ]
+Key: VEXTRACTPSZrri: [ 0.00 0.00 ]
+Key: VEXTRACTPSmri: [ 0.00 0.00 ]
+Key: VEXTRACTPSrri: [ 0.00 0.00 ]
+Key: VFCMADDCPHZ: [ 0.00 0.00 ]
+Key: VFCMADDCPHZm: [ 0.00 0.00 ]
+Key: VFCMADDCPHZmb: [ 0.00 0.00 ]
+Key: VFCMADDCPHZmbk: [ 0.00 0.00 ]
+Key: VFCMADDCPHZmbkz: [ 0.00 0.00 ]
+Key: VFCMADDCPHZmk: [ 0.00 0.00 ]
+Key: VFCMADDCPHZmkz: [ 0.00 0.00 ]
+Key: VFCMADDCPHZr: [ 0.00 0.00 ]
+Key: VFCMADDCPHZrb: [ 0.00 0.00 ]
+Key: VFCMADDCPHZrbk: [ 0.00 0.00 ]
+Key: VFCMADDCPHZrbkz: [ 0.00 0.00 ]
+Key: VFCMADDCPHZrk: [ 0.00 0.00 ]
+Key: VFCMADDCPHZrkz: [ 0.00 0.00 ]
+Key: VFCMADDCSHZm: [ 0.00 0.00 ]
+Key: VFCMADDCSHZmk: [ 0.00 0.00 ]
+Key: VFCMADDCSHZmkz: [ 0.00 0.00 ]
+Key: VFCMADDCSHZr: [ 0.00 0.00 ]
+Key: VFCMADDCSHZrb: [ 0.00 0.00 ]
+Key: VFCMADDCSHZrbk: [ 0.00 0.00 ]
+Key: VFCMADDCSHZrbkz: [ 0.00 0.00 ]
+Key: VFCMADDCSHZrk: [ 0.00 0.00 ]
+Key: VFCMADDCSHZrkz: [ 0.00 0.00 ]
+Key: VFCMULCPHZ: [ 0.00 0.00 ]
+Key: VFCMULCPHZrm: [ 0.00 0.00 ]
+Key: VFCMULCPHZrmb: [ 0.00 0.00 ]
+Key: VFCMULCPHZrmbk: [ 0.00 0.00 ]
+Key: VFCMULCPHZrmbkz: [ 0.00 0.00 ]
+Key: VFCMULCPHZrmk: [ 0.00 0.00 ]
+Key: VFCMULCPHZrmkz: [ 0.00 0.00 ]
+Key: VFCMULCPHZrr: [ 0.00 0.00 ]
+Key: VFCMULCPHZrrb: [ 0.00 0.00 ]
+Key: VFCMULCPHZrrbk: [ 0.00 0.00 ]
+Key: VFCMULCPHZrrbkz: [ 0.00 0.00 ]
+Key: VFCMULCPHZrrk: [ 0.00 0.00 ]
+Key: VFCMULCPHZrrkz: [ 0.00 0.00 ]
+Key: VFCMULCSHZrm: [ 0.00 0.00 ]
+Key: VFCMULCSHZrmk: [ 0.00 0.00 ]
+Key: VFCMULCSHZrmkz: [ 0.00 0.00 ]
+Key: VFCMULCSHZrr: [ 0.00 0.00 ]
+Key: VFCMULCSHZrrb: [ 0.00 0.00 ]
+Key: VFCMULCSHZrrbk: [ 0.00 0.00 ]
+Key: VFCMULCSHZrrbkz: [ 0.00 0.00 ]
+Key: VFCMULCSHZrrk: [ 0.00 0.00 ]
+Key: VFCMULCSHZrrkz: [ 0.00 0.00 ]
+Key: VFIXUPIMMPDZ: [ 0.00 0.00 ]
+Key: VFIXUPIMMPDZrmbi: [ 0.00 0.00 ]
+Key: VFIXUPIMMPDZrmbik: [ 0.00 0.00 ]
+Key: VFIXUPIMMPDZrmbikz: [ 0.00 0.00 ]
+Key: VFIXUPIMMPDZrmi: [ 0.00 0.00 ]
+Key: VFIXUPIMMPDZrmik: [ 0.00 0.00 ]
+Key: VFIXUPIMMPDZrmikz: [ 0.00 0.00 ]
+Key: VFIXUPIMMPDZrri: [ 0.00 0.00 ]
+Key: VFIXUPIMMPDZrrib: [ 0.00 0.00 ]
+Key: VFIXUPIMMPDZrribk: [ 0.00 0.00 ]
+Key: VFIXUPIMMPDZrribkz: [ 0.00 0.00 ]
+Key: VFIXUPIMMPDZrrik: [ 0.00 0.00 ]
+Key: VFIXUPIMMPDZrrikz: [ 0.00 0.00 ]
+Key: VFIXUPIMMPSZ: [ 0.00 0.00 ]
+Key: VFIXUPIMMPSZrmbi: [ 0.00 0.00 ]
+Key: VFIXUPIMMPSZrmbik: [ 0.00 0.00 ]
+Key: VFIXUPIMMPSZrmbikz: [ 0.00 0.00 ]
+Key: VFIXUPIMMPSZrmi: [ 0.00 0.00 ]
+Key: VFIXUPIMMPSZrmik: [ 0.00 0.00 ]
+Key: VFIXUPIMMPSZrmikz: [ 0.00 0.00 ]
+Key: VFIXUPIMMPSZrri: [ 0.00 0.00 ]
+Key: VFIXUPIMMPSZrrib: [ 0.00 0.00 ]
+Key: VFIXUPIMMPSZrribk: [ 0.00 0.00 ]
+Key: VFIXUPIMMPSZrribkz: [ 0.00 0.00 ]
+Key: VFIXUPIMMPSZrrik: [ 0.00 0.00 ]
+Key: VFIXUPIMMPSZrrikz: [ 0.00 0.00 ]
+Key: VFIXUPIMMSDZrmi: [ 0.00 0.00 ]
+Key: VFIXUPIMMSDZrmik: [ 0.00 0.00 ]
+Key: VFIXUPIMMSDZrmikz: [ 0.00 0.00 ]
+Key: VFIXUPIMMSDZrri: [ 0.00 0.00 ]
+Key: VFIXUPIMMSDZrrib: [ 0.00 0.00 ]
+Key: VFIXUPIMMSDZrribk: [ 0.00 0.00 ]
+Key: VFIXUPIMMSDZrribkz: [ 0.00 0.00 ]
+Key: VFIXUPIMMSDZrrik: [ 0.00 0.00 ]
+Key: VFIXUPIMMSDZrrikz: [ 0.00 0.00 ]
+Key: VFIXUPIMMSSZrmi: [ 0.00 0.00 ]
+Key: VFIXUPIMMSSZrmik: [ 0.00 0.00 ]
+Key: VFIXUPIMMSSZrmikz: [ 0.00 0.00 ]
+Key: VFIXUPIMMSSZrri: [ 0.00 0.00 ]
+Key: VFIXUPIMMSSZrrib: [ 0.00 0.00 ]
+Key: VFIXUPIMMSSZrribk: [ 0.00 0.00 ]
+Key: VFIXUPIMMSSZrribkz: [ 0.00 0.00 ]
+Key: VFIXUPIMMSSZrrik: [ 0.00 0.00 ]
+Key: VFIXUPIMMSSZrrikz: [ 0.00 0.00 ]
+Key: VFMADD: [ 0.00 0.00 ]
+Key: VFMADDCPHZ: [ 0.00 0.00 ]
+Key: VFMADDCPHZm: [ 0.00 0.00 ]
+Key: VFMADDCPHZmb: [ 0.00 0.00 ]
+Key: VFMADDCPHZmbk: [ 0.00 0.00 ]
+Key: VFMADDCPHZmbkz: [ 0.00 0.00 ]
+Key: VFMADDCPHZmk: [ 0.00 0.00 ]
+Key: VFMADDCPHZmkz: [ 0.00 0.00 ]
+Key: VFMADDCPHZr: [ 0.00 0.00 ]
+Key: VFMADDCPHZrb: [ 0.00 0.00 ]
+Key: VFMADDCPHZrbk: [ 0.00 0.00 ]
+Key: VFMADDCPHZrbkz: [ 0.00 0.00 ]
+Key: VFMADDCPHZrk: [ 0.00 0.00 ]
+Key: VFMADDCPHZrkz: [ 0.00 0.00 ]
+Key: VFMADDCSHZm: [ 0.00 0.00 ]
+Key: VFMADDCSHZmk: [ 0.00 0.00 ]
+Key: VFMADDCSHZmkz: [ 0.00 0.00 ]
+Key: VFMADDCSHZr: [ 0.00 0.00 ]
+Key: VFMADDCSHZrb: [ 0.00 0.00 ]
+Key: VFMADDCSHZrbk: [ 0.00 0.00 ]
+Key: VFMADDCSHZrbkz: [ 0.00 0.00 ]
+Key: VFMADDCSHZrk: [ 0.00 0.00 ]
+Key: VFMADDCSHZrkz: [ 0.00 0.00 ]
+Key: VFMADDPD: [ 0.00 0.00 ]
+Key: VFMADDPS: [ 0.00 0.00 ]
+Key: VFMADDSD: [ 0.00 0.00 ]
+Key: VFMADDSS: [ 0.00 0.00 ]
+Key: VFMADDSUB: [ 0.00 0.00 ]
+Key: VFMADDSUBPD: [ 0.00 0.00 ]
+Key: VFMADDSUBPS: [ 0.00 0.00 ]
+Key: VFMSUB: [ 0.00 0.00 ]
+Key: VFMSUBADD: [ 0.00 0.00 ]
+Key: VFMSUBADDPD: [ 0.00 0.00 ]
+Key: VFMSUBADDPS: [ 0.00 0.00 ]
+Key: VFMSUBPD: [ 0.00 0.00 ]
+Key: VFMSUBPS: [ 0.00 0.00 ]
+Key: VFMSUBSD: [ 0.00 0.00 ]
+Key: VFMSUBSS: [ 0.00 0.00 ]
+Key: VFMULCPHZ: [ 0.00 0.00 ]
+Key: VFMULCPHZrm: [ 0.00 0.00 ]
+Key: VFMULCPHZrmb: [ 0.00 0.00 ]
+Key: VFMULCPHZrmbk: [ 0.00 0.00 ]
+Key: VFMULCPHZrmbkz: [ 0.00 0.00 ]
+Key: VFMULCPHZrmk: [ 0.00 0.00 ]
+Key: VFMULCPHZrmkz: [ 0.00 0.00 ]
+Key: VFMULCPHZrr: [ 0.00 0.00 ]
+Key: VFMULCPHZrrb: [ 0.00 0.00 ]
+Key: VFMULCPHZrrbk: [ 0.00 0.00 ]
+Key: VFMULCPHZrrbkz: [ 0.00 0.00 ]
+Key: VFMULCPHZrrk: [ 0.00 0.00 ]
+Key: VFMULCPHZrrkz: [ 0.00 0.00 ]
+Key: VFMULCSHZrm: [ 0.00 0.00 ]
+Key: VFMULCSHZrmk: [ 0.00 0.00 ]
+Key: VFMULCSHZrmkz: [ 0.00 0.00 ]
+Key: VFMULCSHZrr: [ 0.00 0.00 ]
+Key: VFMULCSHZrrb: [ 0.00 0.00 ]
+Key: VFMULCSHZrrbk: [ 0.00 0.00 ]
+Key: VFMULCSHZrrbkz: [ 0.00 0.00 ]
+Key: VFMULCSHZrrk: [ 0.00 0.00 ]
+Key: VFMULCSHZrrkz: [ 0.00 0.00 ]
+Key: VFNMADD: [ 0.00 0.00 ]
+Key: VFNMADDPD: [ 0.00 0.00 ]
+Key: VFNMADDPS: [ 0.00 0.00 ]
+Key: VFNMADDSD: [ 0.00 0.00 ]
+Key: VFNMADDSS: [ 0.00 0.00 ]
+Key: VFNMSUB: [ 0.00 0.00 ]
+Key: VFNMSUBPD: [ 0.00 0.00 ]
+Key: VFNMSUBPS: [ 0.00 0.00 ]
+Key: VFNMSUBSD: [ 0.00 0.00 ]
+Key: VFNMSUBSS: [ 0.00 0.00 ]
+Key: VFPCLASSBF: [ 0.00 0.00 ]
+Key: VFPCLASSPDZ: [ 0.00 0.00 ]
+Key: VFPCLASSPDZmbi: [ 0.00 0.00 ]
+Key: VFPCLASSPDZmbik: [ 0.00 0.00 ]
+Key: VFPCLASSPDZmi: [ 0.00 0.00 ]
+Key: VFPCLASSPDZmik: [ 0.00 0.00 ]
+Key: VFPCLASSPDZri: [ 0.00 0.00 ]
+Key: VFPCLASSPDZrik: [ 0.00 0.00 ]
+Key: VFPCLASSPHZ: [ 0.00 0.00 ]
+Key: VFPCLASSPHZmbi: [ 0.00 0.00 ]
+Key: VFPCLASSPHZmbik: [ 0.00 0.00 ]
+Key: VFPCLASSPHZmi: [ 0.00 0.00 ]
+Key: VFPCLASSPHZmik: [ 0.00 0.00 ]
+Key: VFPCLASSPHZri: [ 0.00 0.00 ]
+Key: VFPCLASSPHZrik: [ 0.00 0.00 ]
+Key: VFPCLASSPSZ: [ 0.00 0.00 ]
+Key: VFPCLASSPSZmbi: [ 0.00 0.00 ]
+Key: VFPCLASSPSZmbik: [ 0.00 0.00 ]
+Key: VFPCLASSPSZmi: [ 0.00 0.00 ]
+Key: VFPCLASSPSZmik: [ 0.00 0.00 ]
+Key: VFPCLASSPSZri: [ 0.00 0.00 ]
+Key: VFPCLASSPSZrik: [ 0.00 0.00 ]
+Key: VFPCLASSSDZmi: [ 0.00 0.00 ]
+Key: VFPCLASSSDZmik: [ 0.00 0.00 ]
+Key: VFPCLASSSDZri: [ 0.00 0.00 ]
+Key: VFPCLASSSDZrik: [ 0.00 0.00 ]
+Key: VFPCLASSSHZmi: [ 0.00 0.00 ]
+Key: VFPCLASSSHZmik: [ 0.00 0.00 ]
+Key: VFPCLASSSHZri: [ 0.00 0.00 ]
+Key: VFPCLASSSHZrik: [ 0.00 0.00 ]
+Key: VFPCLASSSSZmi: [ 0.00 0.00 ]
+Key: VFPCLASSSSZmik: [ 0.00 0.00 ]
+Key: VFPCLASSSSZri: [ 0.00 0.00 ]
+Key: VFPCLASSSSZrik: [ 0.00 0.00 ]
+Key: VFRCZPDYrm: [ 0.00 0.00 ]
+Key: VFRCZPDYrr: [ 0.00 0.00 ]
+Key: VFRCZPDrm: [ 0.00 0.00 ]
+Key: VFRCZPDrr: [ 0.00 0.00 ]
+Key: VFRCZPSYrm: [ 0.00 0.00 ]
+Key: VFRCZPSYrr: [ 0.00 0.00 ]
+Key: VFRCZPSrm: [ 0.00 0.00 ]
+Key: VFRCZPSrr: [ 0.00 0.00 ]
+Key: VFRCZSDrm: [ 0.00 0.00 ]
+Key: VFRCZSDrr: [ 0.00 0.00 ]
+Key: VFRCZSSrm: [ 0.00 0.00 ]
+Key: VFRCZSSrr: [ 0.00 0.00 ]
+Key: VGATHERDPDYrm: [ 0.00 0.00 ]
+Key: VGATHERDPDZ: [ 0.00 0.00 ]
+Key: VGATHERDPDZrm: [ 0.00 0.00 ]
+Key: VGATHERDPDrm: [ 0.00 0.00 ]
+Key: VGATHERDPSYrm: [ 0.00 0.00 ]
+Key: VGATHERDPSZ: [ 0.00 0.00 ]
+Key: VGATHERDPSZrm: [ 0.00 0.00 ]
+Key: VGATHERDPSrm: [ 0.00 0.00 ]
+Key: VGATHERPF: [ 0.00 0.00 ]
+Key: VGATHERQPDYrm: [ 0.00 0.00 ]
+Key: VGATHERQPDZ: [ 0.00 0.00 ]
+Key: VGATHERQPDZrm: [ 0.00 0.00 ]
+Key: VGATHERQPDrm: [ 0.00 0.00 ]
+Key: VGATHERQPSYrm: [ 0.00 0.00 ]
+Key: VGATHERQPSZ: [ 0.00 0.00 ]
+Key: VGATHERQPSZrm: [ 0.00 0.00 ]
+Key: VGATHERQPSrm: [ 0.00 0.00 ]
+Key: VGETEXPBF: [ 0.00 0.00 ]
+Key: VGETEXPPDZ: [ 0.00 0.00 ]
+Key: VGETEXPPDZm: [ 0.00 0.00 ]
+Key: VGETEXPPDZmb: [ 0.00 0.00 ]
+Key: VGETEXPPDZmbk: [ 0.00 0.00 ]
+Key: VGETEXPPDZmbkz: [ 0.00 0.00 ]
+Key: VGETEXPPDZmk: [ 0.00 0.00 ]
+Key: VGETEXPPDZmkz: [ 0.00 0.00 ]
+Key: VGETEXPPDZr: [ 0.00 0.00 ]
+Key: VGETEXPPDZrb: [ 0.00 0.00 ]
+Key: VGETEXPPDZrbk: [ 0.00 0.00 ]
+Key: VGETEXPPDZrbkz: [ 0.00 0.00 ]
+Key: VGETEXPPDZrk: [ 0.00 0.00 ]
+Key: VGETEXPPDZrkz: [ 0.00 0.00 ]
+Key: VGETEXPPHZ: [ 0.00 0.00 ]
+Key: VGETEXPPHZm: [ 0.00 0.00 ]
+Key: VGETEXPPHZmb: [ 0.00 0.00 ]
+Key: VGETEXPPHZmbk: [ 0.00 0.00 ]
+Key: VGETEXPPHZmbkz: [ 0.00 0.00 ]
+Key: VGETEXPPHZmk: [ 0.00 0.00 ]
+Key: VGETEXPPHZmkz: [ 0.00 0.00 ]
+Key: VGETEXPPHZr: [ 0.00 0.00 ]
+Key: VGETEXPPHZrb: [ 0.00 0.00 ]
+Key: VGETEXPPHZrbk: [ 0.00 0.00 ]
+Key: VGETEXPPHZrbkz: [ 0.00 0.00 ]
+Key: VGETEXPPHZrk: [ 0.00 0.00 ]
+Key: VGETEXPPHZrkz: [ 0.00 0.00 ]
+Key: VGETEXPPSZ: [ 0.00 0.00 ]
+Key: VGETEXPPSZm: [ 0.00 0.00 ]
+Key: VGETEXPPSZmb: [ 0.00 0.00 ]
+Key: VGETEXPPSZmbk: [ 0.00 0.00 ]
+Key: VGETEXPPSZmbkz: [ 0.00 0.00 ]
+Key: VGETEXPPSZmk: [ 0.00 0.00 ]
+Key: VGETEXPPSZmkz: [ 0.00 0.00 ]
+Key: VGETEXPPSZr: [ 0.00 0.00 ]
+Key: VGETEXPPSZrb: [ 0.00 0.00 ]
+Key: VGETEXPPSZrbk: [ 0.00 0.00 ]
+Key: VGETEXPPSZrbkz: [ 0.00 0.00 ]
+Key: VGETEXPPSZrk: [ 0.00 0.00 ]
+Key: VGETEXPPSZrkz: [ 0.00 0.00 ]
+Key: VGETEXPSDZm: [ 0.00 0.00 ]
+Key: VGETEXPSDZmk: [ 0.00 0.00 ]
+Key: VGETEXPSDZmkz: [ 0.00 0.00 ]
+Key: VGETEXPSDZr: [ 0.00 0.00 ]
+Key: VGETEXPSDZrb: [ 0.00 0.00 ]
+Key: VGETEXPSDZrbk: [ 0.00 0.00 ]
+Key: VGETEXPSDZrbkz: [ 0.00 0.00 ]
+Key: VGETEXPSDZrk: [ 0.00 0.00 ]
+Key: VGETEXPSDZrkz: [ 0.00 0.00 ]
+Key: VGETEXPSHZm: [ 0.00 0.00 ]
+Key: VGETEXPSHZmk: [ 0.00 0.00 ]
+Key: VGETEXPSHZmkz: [ 0.00 0.00 ]
+Key: VGETEXPSHZr: [ 0.00 0.00 ]
+Key: VGETEXPSHZrb: [ 0.00 0.00 ]
+Key: VGETEXPSHZrbk: [ 0.00 0.00 ]
+Key: VGETEXPSHZrbkz: [ 0.00 0.00 ]
+Key: VGETEXPSHZrk: [ 0.00 0.00 ]
+Key: VGETEXPSHZrkz: [ 0.00 0.00 ]
+Key: VGETEXPSSZm: [ 0.00 0.00 ]
+Key: VGETEXPSSZmk: [ 0.00 0.00 ]
+Key: VGETEXPSSZmkz: [ 0.00 0.00 ]
+Key: VGETEXPSSZr: [ 0.00 0.00 ]
+Key: VGETEXPSSZrb: [ 0.00 0.00 ]
+Key: VGETEXPSSZrbk: [ 0.00 0.00 ]
+Key: VGETEXPSSZrbkz: [ 0.00 0.00 ]
+Key: VGETEXPSSZrk: [ 0.00 0.00 ]
+Key: VGETEXPSSZrkz: [ 0.00 0.00 ]
+Key: VGETMANTBF: [ 0.00 0.00 ]
+Key: VGETMANTPDZ: [ 0.00 0.00 ]
+Key: VGETMANTPDZrmbi: [ 0.00 0.00 ]
+Key: VGETMANTPDZrmbik: [ 0.00 0.00 ]
+Key: VGETMANTPDZrmbikz: [ 0.00 0.00 ]
+Key: VGETMANTPDZrmi: [ 0.00 0.00 ]
+Key: VGETMANTPDZrmik: [ 0.00 0.00 ]
+Key: VGETMANTPDZrmikz: [ 0.00 0.00 ]
+Key: VGETMANTPDZrri: [ 0.00 0.00 ]
+Key: VGETMANTPDZrrib: [ 0.00 0.00 ]
+Key: VGETMANTPDZrribk: [ 0.00 0.00 ]
+Key: VGETMANTPDZrribkz: [ 0.00 0.00 ]
+Key: VGETMANTPDZrrik: [ 0.00 0.00 ]
+Key: VGETMANTPDZrrikz: [ 0.00 0.00 ]
+Key: VGETMANTPHZ: [ 0.00 0.00 ]
+Key: VGETMANTPHZrmbi: [ 0.00 0.00 ]
+Key: VGETMANTPHZrmbik: [ 0.00 0.00 ]
+Key: VGETMANTPHZrmbikz: [ 0.00 0.00 ]
+Key: VGETMANTPHZrmi: [ 0.00 0.00 ]
+Key: VGETMANTPHZrmik: [ 0.00 0.00 ]
+Key: VGETMANTPHZrmikz: [ 0.00 0.00 ]
+Key: VGETMANTPHZrri: [ 0.00 0.00 ]
+Key: VGETMANTPHZrrib: [ 0.00 0.00 ]
+Key: VGETMANTPHZrribk: [ 0.00 0.00 ]
+Key: VGETMANTPHZrribkz: [ 0.00 0.00 ]
+Key: VGETMANTPHZrrik: [ 0.00 0.00 ]
+Key: VGETMANTPHZrrikz: [ 0.00 0.00 ]
+Key: VGETMANTPSZ: [ 0.00 0.00 ]
+Key: VGETMANTPSZrmbi: [ 0.00 0.00 ]
+Key: VGETMANTPSZrmbik: [ 0.00 0.00 ]
+Key: VGETMANTPSZrmbikz: [ 0.00 0.00 ]
+Key: VGETMANTPSZrmi: [ 0.00 0.00 ]
+Key: VGETMANTPSZrmik: [ 0.00 0.00 ]
+Key: VGETMANTPSZrmikz: [ 0.00 0.00 ]
+Key: VGETMANTPSZrri: [ 0.00 0.00 ]
+Key: VGETMANTPSZrrib: [ 0.00 0.00 ]
+Key: VGETMANTPSZrribk: [ 0.00 0.00 ]
+Key: VGETMANTPSZrribkz: [ 0.00 0.00 ]
+Key: VGETMANTPSZrrik: [ 0.00 0.00 ]
+Key: VGETMANTPSZrrikz: [ 0.00 0.00 ]
+Key: VGETMANTSDZrmi: [ 0.00 0.00 ]
+Key: VGETMANTSDZrmik: [ 0.00 0.00 ]
+Key: VGETMANTSDZrmikz: [ 0.00 0.00 ]
+Key: VGETMANTSDZrri: [ 0.00 0.00 ]
+Key: VGETMANTSDZrrib: [ 0.00 0.00 ]
+Key: VGETMANTSDZrribk: [ 0.00 0.00 ]
+Key: VGETMANTSDZrribkz: [ 0.00 0.00 ]
+Key: VGETMANTSDZrrik: [ 0.00 0.00 ]
+Key: VGETMANTSDZrrikz: [ 0.00 0.00 ]
+Key: VGETMANTSHZrmi: [ 0.00 0.00 ]
+Key: VGETMANTSHZrmik: [ 0.00 0.00 ]
+Key: VGETMANTSHZrmikz: [ 0.00 0.00 ]
+Key: VGETMANTSHZrri: [ 0.00 0.00 ]
+Key: VGETMANTSHZrrib: [ 0.00 0.00 ]
+Key: VGETMANTSHZrribk: [ 0.00 0.00 ]
+Key: VGETMANTSHZrribkz: [ 0.00 0.00 ]
+Key: VGETMANTSHZrrik: [ 0.00 0.00 ]
+Key: VGETMANTSHZrrikz: [ 0.00 0.00 ]
+Key: VGETMANTSSZrmi: [ 0.00 0.00 ]
+Key: VGETMANTSSZrmik: [ 0.00 0.00 ]
+Key: VGETMANTSSZrmikz: [ 0.00 0.00 ]
+Key: VGETMANTSSZrri: [ 0.00 0.00 ]
+Key: VGETMANTSSZrrib: [ 0.00 0.00 ]
+Key: VGETMANTSSZrribk: [ 0.00 0.00 ]
+Key: VGETMANTSSZrribkz: [ 0.00 0.00 ]
+Key: VGETMANTSSZrrik: [ 0.00 0.00 ]
+Key: VGETMANTSSZrrikz: [ 0.00 0.00 ]
+Key: VGF: [ 0.00 0.00 ]
+Key: VHADDPDYrm: [ 0.00 0.00 ]
+Key: VHADDPDYrr: [ 0.00 0.00 ]
+Key: VHADDPDrm: [ 0.00 0.00 ]
+Key: VHADDPDrr: [ 0.00 0.00 ]
+Key: VHADDPSYrm: [ 0.00 0.00 ]
+Key: VHADDPSYrr: [ 0.00 0.00 ]
+Key: VHADDPSrm: [ 0.00 0.00 ]
+Key: VHADDPSrr: [ 0.00 0.00 ]
+Key: VHSUBPDYrm: [ 0.00 0.00 ]
+Key: VHSUBPDYrr: [ 0.00 0.00 ]
+Key: VHSUBPDrm: [ 0.00 0.00 ]
+Key: VHSUBPDrr: [ 0.00 0.00 ]
+Key: VHSUBPSYrm: [ 0.00 0.00 ]
+Key: VHSUBPSYrr: [ 0.00 0.00 ]
+Key: VHSUBPSrm: [ 0.00 0.00 ]
+Key: VHSUBPSrr: [ 0.00 0.00 ]
+Key: VINSERTF: [ 0.00 0.00 ]
+Key: VINSERTI: [ 0.00 0.00 ]
+Key: VINSERTPSZrmi: [ 0.00 0.00 ]
+Key: VINSERTPSZrri: [ 0.00 0.00 ]
+Key: VINSERTPSrmi: [ 0.00 0.00 ]
+Key: VINSERTPSrri: [ 0.00 0.00 ]
+Key: VLDDQUYrm: [ 0.00 0.00 ]
+Key: VLDDQUrm: [ 0.00 0.00 ]
+Key: VLDMXCSR: [ 0.00 0.00 ]
+Key: VMASKMOVDQU: [ 0.00 0.00 ]
+Key: VMASKMOVPDYmr: [ 0.00 0.00 ]
+Key: VMASKMOVPDYrm: [ 0.00 0.00 ]
+Key: VMASKMOVPDmr: [ 0.00 0.00 ]
+Key: VMASKMOVPDrm: [ 0.00 0.00 ]
+Key: VMASKMOVPSYmr: [ 0.00 0.00 ]
+Key: VMASKMOVPSYrm: [ 0.00 0.00 ]
+Key: VMASKMOVPSmr: [ 0.00 0.00 ]
+Key: VMASKMOVPSrm: [ 0.00 0.00 ]
+Key: VMAXBF: [ 0.00 0.00 ]
+Key: VMAXCPDYrm: [ 0.00 0.00 ]
+Key: VMAXCPDYrr: [ 0.00 0.00 ]
+Key: VMAXCPDZ: [ 0.00 0.00 ]
+Key: VMAXCPDZrm: [ 0.00 0.00 ]
+Key: VMAXCPDZrmb: [ 0.00 0.00 ]
+Key: VMAXCPDZrmbk: [ 0.00 0.00 ]
+Key: VMAXCPDZrmbkz: [ 0.00 0.00 ]
+Key: VMAXCPDZrmk: [ 0.00 0.00 ]
+Key: VMAXCPDZrmkz: [ 0.00 0.00 ]
+Key: VMAXCPDZrr: [ 0.00 0.00 ]
+Key: VMAXCPDZrrk: [ 0.00 0.00 ]
+Key: VMAXCPDZrrkz: [ 0.00 0.00 ]
+Key: VMAXCPDrm: [ 0.00 0.00 ]
+Key: VMAXCPDrr: [ 0.00 0.00 ]
+Key: VMAXCPHZ: [ 0.00 0.00 ]
+Key: VMAXCPHZrm: [ 0.00 0.00 ]
+Key: VMAXCPHZrmb: [ 0.00 0.00 ]
+Key: VMAXCPHZrmbk: [ 0.00 0.00 ]
+Key: VMAXCPHZrmbkz: [ 0.00 0.00 ]
+Key: VMAXCPHZrmk: [ 0.00 0.00 ]
+Key: VMAXCPHZrmkz: [ 0.00 0.00 ]
+Key: VMAXCPHZrr: [ 0.00 0.00 ]
+Key: VMAXCPHZrrk: [ 0.00 0.00 ]
+Key: VMAXCPHZrrkz: [ 0.00 0.00 ]
+Key: VMAXCPSYrm: [ 0.00 0.00 ]
+Key: VMAXCPSYrr: [ 0.00 0.00 ]
+Key: VMAXCPSZ: [ 0.00 0.00 ]
+Key: VMAXCPSZrm: [ 0.00 0.00 ]
+Key: VMAXCPSZrmb: [ 0.00 0.00 ]
+Key: VMAXCPSZrmbk: [ 0.00 0.00 ]
+Key: VMAXCPSZrmbkz: [ 0.00 0.00 ]
+Key: VMAXCPSZrmk: [ 0.00 0.00 ]
+Key: VMAXCPSZrmkz: [ 0.00 0.00 ]
+Key: VMAXCPSZrr: [ 0.00 0.00 ]
+Key: VMAXCPSZrrk: [ 0.00 0.00 ]
+Key: VMAXCPSZrrkz: [ 0.00 0.00 ]
+Key: VMAXCPSrm: [ 0.00 0.00 ]
+Key: VMAXCPSrr: [ 0.00 0.00 ]
+Key: VMAXCSDZrm: [ 0.00 0.00 ]
+Key: VMAXCSDZrr: [ 0.00 0.00 ]
+Key: VMAXCSDrm: [ 0.00 0.00 ]
+Key: VMAXCSDrr: [ 0.00 0.00 ]
+Key: VMAXCSHZrm: [ 0.00 0.00 ]
+Key: VMAXCSHZrr: [ 0.00 0.00 ]
+Key: VMAXCSSZrm: [ 0.00 0.00 ]
+Key: VMAXCSSZrr: [ 0.00 0.00 ]
+Key: VMAXCSSrm: [ 0.00 0.00 ]
+Key: VMAXCSSrr: [ 0.00 0.00 ]
+Key: VMAXPDYrm: [ 0.00 0.00 ]
+Key: VMAXPDYrr: [ 0.00 0.00 ]
+Key: VMAXPDZ: [ 0.00 0.00 ]
+Key: VMAXPDZrm: [ 0.00 0.00 ]
+Key: VMAXPDZrmb: [ 0.00 0.00 ]
+Key: VMAXPDZrmbk: [ 0.00 0.00 ]
+Key: VMAXPDZrmbkz: [ 0.00 0.00 ]
+Key: VMAXPDZrmk: [ 0.00 0.00 ]
+Key: VMAXPDZrmkz: [ 0.00 0.00 ]
+Key: VMAXPDZrr: [ 0.00 0.00 ]
+Key: VMAXPDZrrb: [ 0.00 0.00 ]
+Key: VMAXPDZrrbk: [ 0.00 0.00 ]
+Key: VMAXPDZrrbkz: [ 0.00 0.00 ]
+Key: VMAXPDZrrk: [ 0.00 0.00 ]
+Key: VMAXPDZrrkz: [ 0.00 0.00 ]
+Key: VMAXPDrm: [ 0.00 0.00 ]
+Key: VMAXPDrr: [ 0.00 0.00 ]
+Key: VMAXPHZ: [ 0.00 0.00 ]
+Key: VMAXPHZrm: [ 0.00 0.00 ]
+Key: VMAXPHZrmb: [ 0.00 0.00 ]
+Key: VMAXPHZrmbk: [ 0.00 0.00 ]
+Key: VMAXPHZrmbkz: [ 0.00 0.00 ]
+Key: VMAXPHZrmk: [ 0.00 0.00 ]
+Key: VMAXPHZrmkz: [ 0.00 0.00 ]
+Key: VMAXPHZrr: [ 0.00 0.00 ]
+Key: VMAXPHZrrb: [ 0.00 0.00 ]
+Key: VMAXPHZrrbk: [ 0.00 0.00 ]
+Key: VMAXPHZrrbkz: [ 0.00 0.00 ]
+Key: VMAXPHZrrk: [ 0.00 0.00 ]
+Key: VMAXPHZrrkz: [ 0.00 0.00 ]
+Key: VMAXPSYrm: [ 0.00 0.00 ]
+Key: VMAXPSYrr: [ 0.00 0.00 ]
+Key: VMAXPSZ: [ 0.00 0.00 ]
+Key: VMAXPSZrm: [ 0.00 0.00 ]
+Key: VMAXPSZrmb: [ 0.00 0.00 ]
+Key: VMAXPSZrmbk: [ 0.00 0.00 ]
+Key: VMAXPSZrmbkz: [ 0.00 0.00 ]
+Key: VMAXPSZrmk: [ 0.00 0.00 ]
+Key: VMAXPSZrmkz: [ 0.00 0.00 ]
+Key: VMAXPSZrr: [ 0.00 0.00 ]
+Key: VMAXPSZrrb: [ 0.00 0.00 ]
+Key: VMAXPSZrrbk: [ 0.00 0.00 ]
+Key: VMAXPSZrrbkz: [ 0.00 0.00 ]
+Key: VMAXPSZrrk: [ 0.00 0.00 ]
+Key: VMAXPSZrrkz: [ 0.00 0.00 ]
+Key: VMAXPSrm: [ 0.00 0.00 ]
+Key: VMAXPSrr: [ 0.00 0.00 ]
+Key: VMAXSDZrm: [ 0.00 0.00 ]
+Key: VMAXSDZrm_Int: [ 0.00 0.00 ]
+Key: VMAXSDZrmk_Int: [ 0.00 0.00 ]
+Key: VMAXSDZrmkz_Int: [ 0.00 0.00 ]
+Key: VMAXSDZrr: [ 0.00 0.00 ]
+Key: VMAXSDZrr_Int: [ 0.00 0.00 ]
+Key: VMAXSDZrrb_Int: [ 0.00 0.00 ]
+Key: VMAXSDZrrbk_Int: [ 0.00 0.00 ]
+Key: VMAXSDZrrbkz_Int: [ 0.00 0.00 ]
+Key: VMAXSDZrrk_Int: [ 0.00 0.00 ]
+Key: VMAXSDZrrkz_Int: [ 0.00 0.00 ]
+Key: VMAXSDrm: [ 0.00 0.00 ]
+Key: VMAXSDrm_Int: [ 0.00 0.00 ]
+Key: VMAXSDrr: [ 0.00 0.00 ]
+Key: VMAXSDrr_Int: [ 0.00 0.00 ]
+Key: VMAXSHZrm: [ 0.00 0.00 ]
+Key: VMAXSHZrm_Int: [ 0.00 0.00 ]
+Key: VMAXSHZrmk_Int: [ 0.00 0.00 ]
+Key: VMAXSHZrmkz_Int: [ 0.00 0.00 ]
+Key: VMAXSHZrr: [ 0.00 0.00 ]
+Key: VMAXSHZrr_Int: [ 0.00 0.00 ]
+Key: VMAXSHZrrb_Int: [ 0.00 0.00 ]
+Key: VMAXSHZrrbk_Int: [ 0.00 0.00 ]
+Key: VMAXSHZrrbkz_Int: [ 0.00 0.00 ]
+Key: VMAXSHZrrk_Int: [ 0.00 0.00 ]
+Key: VMAXSHZrrkz_Int: [ 0.00 0.00 ]
+Key: VMAXSSZrm: [ 0.00 0.00 ]
+Key: VMAXSSZrm_Int: [ 0.00 0.00 ]
+Key: VMAXSSZrmk_Int: [ 0.00 0.00 ]
+Key: VMAXSSZrmkz_Int: [ 0.00 0.00 ]
+Key: VMAXSSZrr: [ 0.00 0.00 ]
+Key: VMAXSSZrr_Int: [ 0.00 0.00 ]
+Key: VMAXSSZrrb_Int: [ 0.00 0.00 ]
+Key: VMAXSSZrrbk_Int: [ 0.00 0.00 ]
+Key: VMAXSSZrrbkz_Int: [ 0.00 0.00 ]
+Key: VMAXSSZrrk_Int: [ 0.00 0.00 ]
+Key: VMAXSSZrrkz_Int: [ 0.00 0.00 ]
+Key: VMAXSSrm: [ 0.00 0.00 ]
+Key: VMAXSSrm_Int: [ 0.00 0.00 ]
+Key: VMAXSSrr: [ 0.00 0.00 ]
+Key: VMAXSSrr_Int: [ 0.00 0.00 ]
+Key: VMCALL: [ 0.00 0.00 ]
+Key: VMCLEARm: [ 0.00 0.00 ]
+Key: VMFUNC: [ 0.00 0.00 ]
+Key: VMINBF: [ 0.00 0.00 ]
+Key: VMINCPDYrm: [ 0.00 0.00 ]
+Key: VMINCPDYrr: [ 0.00 0.00 ]
+Key: VMINCPDZ: [ 0.00 0.00 ]
+Key: VMINCPDZrm: [ 0.00 0.00 ]
+Key: VMINCPDZrmb: [ 0.00 0.00 ]
+Key: VMINCPDZrmbk: [ 0.00 0.00 ]
+Key: VMINCPDZrmbkz: [ 0.00 0.00 ]
+Key: VMINCPDZrmk: [ 0.00 0.00 ]
+Key: VMINCPDZrmkz: [ 0.00 0.00 ]
+Key: VMINCPDZrr: [ 0.00 0.00 ]
+Key: VMINCPDZrrk: [ 0.00 0.00 ]
+Key: VMINCPDZrrkz: [ 0.00 0.00 ]
+Key: VMINCPDrm: [ 0.00 0.00 ]
+Key: VMINCPDrr: [ 0.00 0.00 ]
+Key: VMINCPHZ: [ 0.00 0.00 ]
+Key: VMINCPHZrm: [ 0.00 0.00 ]
+Key: VMINCPHZrmb: [ 0.00 0.00 ]
+Key: VMINCPHZrmbk: [ 0.00 0.00 ]
+Key: VMINCPHZrmbkz: [ 0.00 0.00 ]
+Key: VMINCPHZrmk: [ 0.00 0.00 ]
+Key: VMINCPHZrmkz: [ 0.00 0.00 ]
+Key: VMINCPHZrr: [ 0.00 0.00 ]
+Key: VMINCPHZrrk: [ 0.00 0.00 ]
+Key: VMINCPHZrrkz: [ 0.00 0.00 ]
+Key: VMINCPSYrm: [ 0.00 0.00 ]
+Key: VMINCPSYrr: [ 0.00 0.00 ]
+Key: VMINCPSZ: [ 0.00 0.00 ]
+Key: VMINCPSZrm: [ 0.00 0.00 ]
+Key: VMINCPSZrmb: [ 0.00 0.00 ]
+Key: VMINCPSZrmbk: [ 0.00 0.00 ]
+Key: VMINCPSZrmbkz: [ 0.00 0.00 ]
+Key: VMINCPSZrmk: [ 0.00 0.00 ]
+Key: VMINCPSZrmkz: [ 0.00 0.00 ]
+Key: VMINCPSZrr: [ 0.00 0.00 ]
+Key: VMINCPSZrrk: [ 0.00 0.00 ]
+Key: VMINCPSZrrkz: [ 0.00 0.00 ]
+Key: VMINCPSrm: [ 0.00 0.00 ]
+Key: VMINCPSrr: [ 0.00 0.00 ]
+Key: VMINCSDZrm: [ 0.00 0.00 ]
+Key: VMINCSDZrr: [ 0.00 0.00 ]
+Key: VMINCSDrm: [ 0.00 0.00 ]
+Key: VMINCSDrr: [ 0.00 0.00 ]
+Key: VMINCSHZrm: [ 0.00 0.00 ]
+Key: VMINCSHZrr: [ 0.00 0.00 ]
+Key: VMINCSSZrm: [ 0.00 0.00 ]
+Key: VMINCSSZrr: [ 0.00 0.00 ]
+Key: VMINCSSrm: [ 0.00 0.00 ]
+Key: VMINCSSrr: [ 0.00 0.00 ]
+Key: VMINMAXBF: [ 0.00 0.00 ]
+Key: VMINMAXPDZ: [ 0.00 0.00 ]
+Key: VMINMAXPDZrmbi: [ 0.00 0.00 ]
+Key: VMINMAXPDZrmbik: [ 0.00 0.00 ]
+Key: VMINMAXPDZrmbikz: [ 0.00 0.00 ]
+Key: VMINMAXPDZrmi: [ 0.00 0.00 ]
+Key: VMINMAXPDZrmik: [ 0.00 0.00 ]
+Key: VMINMAXPDZrmikz: [ 0.00 0.00 ]
+Key: VMINMAXPDZrri: [ 0.00 0.00 ]
+Key: VMINMAXPDZrrib: [ 0.00 0.00 ]
+Key: VMINMAXPDZrribk: [ 0.00 0.00 ]
+Key: VMINMAXPDZrribkz: [ 0.00 0.00 ]
+Key: VMINMAXPDZrrik: [ 0.00 0.00 ]
+Key: VMINMAXPDZrrikz: [ 0.00 0.00 ]
+Key: VMINMAXPHZ: [ 0.00 0.00 ]
+Key: VMINMAXPHZrmbi: [ 0.00 0.00 ]
+Key: VMINMAXPHZrmbik: [ 0.00 0.00 ]
+Key: VMINMAXPHZrmbikz: [ 0.00 0.00 ]
+Key: VMINMAXPHZrmi: [ 0.00 0.00 ]
+Key: VMINMAXPHZrmik: [ 0.00 0.00 ]
+Key: VMINMAXPHZrmikz: [ 0.00 0.00 ]
+Key: VMINMAXPHZrri: [ 0.00 0.00 ]
+Key: VMINMAXPHZrrib: [ 0.00 0.00 ]
+Key: VMINMAXPHZrribk: [ 0.00 0.00 ]
+Key: VMINMAXPHZrribkz: [ 0.00 0.00 ]
+Key: VMINMAXPHZrrik: [ 0.00 0.00 ]
+Key: VMINMAXPHZrrikz: [ 0.00 0.00 ]
+Key: VMINMAXPSZ: [ 0.00 0.00 ]
+Key: VMINMAXPSZrmbi: [ 0.00 0.00 ]
+Key: VMINMAXPSZrmbik: [ 0.00 0.00 ]
+Key: VMINMAXPSZrmbikz: [ 0.00 0.00 ]
+Key: VMINMAXPSZrmi: [ 0.00 0.00 ]
+Key: VMINMAXPSZrmik: [ 0.00 0.00 ]
+Key: VMINMAXPSZrmikz: [ 0.00 0.00 ]
+Key: VMINMAXPSZrri: [ 0.00 0.00 ]
+Key: VMINMAXPSZrrib: [ 0.00 0.00 ]
+Key: VMINMAXPSZrribk: [ 0.00 0.00 ]
+Key: VMINMAXPSZrribkz: [ 0.00 0.00 ]
+Key: VMINMAXPSZrrik: [ 0.00 0.00 ]
+Key: VMINMAXPSZrrikz: [ 0.00 0.00 ]
+Key: VMINMAXSDrmi: [ 0.00 0.00 ]
+Key: VMINMAXSDrmi_Int: [ 0.00 0.00 ]
+Key: VMINMAXSDrmik_Int: [ 0.00 0.00 ]
+Key: VMINMAXSDrmikz_Int: [ 0.00 0.00 ]
+Key: VMINMAXSDrri: [ 0.00 0.00 ]
+Key: VMINMAXSDrri_Int: [ 0.00 0.00 ]
+Key: VMINMAXSDrrib_Int: [ 0.00 0.00 ]
+Key: VMINMAXSDrribk_Int: [ 0.00 0.00 ]
+Key: VMINMAXSDrribkz_Int: [ 0.00 0.00 ]
+Key: VMINMAXSDrrik_Int: [ 0.00 0.00 ]
+Key: VMINMAXSDrrikz_Int: [ 0.00 0.00 ]
+Key: VMINMAXSHrmi: [ 0.00 0.00 ]
+Key: VMINMAXSHrmi_Int: [ 0.00 0.00 ]
+Key: VMINMAXSHrmik_Int: [ 0.00 0.00 ]
+Key: VMINMAXSHrmikz_Int: [ 0.00 0.00 ]
+Key: VMINMAXSHrri: [ 0.00 0.00 ]
+Key: VMINMAXSHrri_Int: [ 0.00 0.00 ]
+Key: VMINMAXSHrrib_Int: [ 0.00 0.00 ]
+Key: VMINMAXSHrribk_Int: [ 0.00 0.00 ]
+Key: VMINMAXSHrribkz_Int: [ 0.00 0.00 ]
+Key: VMINMAXSHrrik_Int: [ 0.00 0.00 ]
+Key: VMINMAXSHrrikz_Int: [ 0.00 0.00 ]
+Key: VMINMAXSSrmi: [ 0.00 0.00 ]
+Key: VMINMAXSSrmi_Int: [ 0.00 0.00 ]
+Key: VMINMAXSSrmik_Int: [ 0.00 0.00 ]
+Key: VMINMAXSSrmikz_Int: [ 0.00 0.00 ]
+Key: VMINMAXSSrri: [ 0.00 0.00 ]
+Key: VMINMAXSSrri_Int: [ 0.00 0.00 ]
+Key: VMINMAXSSrrib_Int: [ 0.00 0.00 ]
+Key: VMINMAXSSrribk_Int: [ 0.00 0.00 ]
+Key: VMINMAXSSrribkz_Int: [ 0.00 0.00 ]
+Key: VMINMAXSSrrik_Int: [ 0.00 0.00 ]
+Key: VMINMAXSSrrikz_Int: [ 0.00 0.00 ]
+Key: VMINPDYrm: [ 0.00 0.00 ]
+Key: VMINPDYrr: [ 0.00 0.00 ]
+Key: VMINPDZ: [ 0.00 0.00 ]
+Key: VMINPDZrm: [ 0.00 0.00 ]
+Key: VMINPDZrmb: [ 0.00 0.00 ]
+Key: VMINPDZrmbk: [ 0.00 0.00 ]
+Key: VMINPDZrmbkz: [ 0.00 0.00 ]
+Key: VMINPDZrmk: [ 0.00 0.00 ]
+Key: VMINPDZrmkz: [ 0.00 0.00 ]
+Key: VMINPDZrr: [ 0.00 0.00 ]
+Key: VMINPDZrrb: [ 0.00 0.00 ]
+Key: VMINPDZrrbk: [ 0.00 0.00 ]
+Key: VMINPDZrrbkz: [ 0.00 0.00 ]
+Key: VMINPDZrrk: [ 0.00 0.00 ]
+Key: VMINPDZrrkz: [ 0.00 0.00 ]
+Key: VMINPDrm: [ 0.00 0.00 ]
+Key: VMINPDrr: [ 0.00 0.00 ]
+Key: VMINPHZ: [ 0.00 0.00 ]
+Key: VMINPHZrm: [ 0.00 0.00 ]
+Key: VMINPHZrmb: [ 0.00 0.00 ]
+Key: VMINPHZrmbk: [ 0.00 0.00 ]
+Key: VMINPHZrmbkz: [ 0.00 0.00 ]
+Key: VMINPHZrmk: [ 0.00 0.00 ]
+Key: VMINPHZrmkz: [ 0.00 0.00 ]
+Key: VMINPHZrr: [ 0.00 0.00 ]
+Key: VMINPHZrrb: [ 0.00 0.00 ]
+Key: VMINPHZrrbk: [ 0.00 0.00 ]
+Key: VMINPHZrrbkz: [ 0.00 0.00 ]
+Key: VMINPHZrrk: [ 0.00 0.00 ]
+Key: VMINPHZrrkz: [ 0.00 0.00 ]
+Key: VMINPSYrm: [ 0.00 0.00 ]
+Key: VMINPSYrr: [ 0.00 0.00 ]
+Key: VMINPSZ: [ 0.00 0.00 ]
+Key: VMINPSZrm: [ 0.00 0.00 ]
+Key: VMINPSZrmb: [ 0.00 0.00 ]
+Key: VMINPSZrmbk: [ 0.00 0.00 ]
+Key: VMINPSZrmbkz: [ 0.00 0.00 ]
+Key: VMINPSZrmk: [ 0.00 0.00 ]
+Key: VMINPSZrmkz: [ 0.00 0.00 ]
+Key: VMINPSZrr: [ 0.00 0.00 ]
+Key: VMINPSZrrb: [ 0.00 0.00 ]
+Key: VMINPSZrrbk: [ 0.00 0.00 ]
+Key: VMINPSZrrbkz: [ 0.00 0.00 ]
+Key: VMINPSZrrk: [ 0.00 0.00 ]
+Key: VMINPSZrrkz: [ 0.00 0.00 ]
+Key: VMINPSrm: [ 0.00 0.00 ]
+Key: VMINPSrr: [ 0.00 0.00 ]
+Key: VMINSDZrm: [ 0.00 0.00 ]
+Key: VMINSDZrm_Int: [ 0.00 0.00 ]
+Key: VMINSDZrmk_Int: [ 0.00 0.00 ]
+Key: VMINSDZrmkz_Int: [ 0.00 0.00 ]
+Key: VMINSDZrr: [ 0.00 0.00 ]
+Key: VMINSDZrr_Int: [ 0.00 0.00 ]
+Key: VMINSDZrrb_Int: [ 0.00 0.00 ]
+Key: VMINSDZrrbk_Int: [ 0.00 0.00 ]
+Key: VMINSDZrrbkz_Int: [ 0.00 0.00 ]
+Key: VMINSDZrrk_Int: [ 0.00 0.00 ]
+Key: VMINSDZrrkz_Int: [ 0.00 0.00 ]
+Key: VMINSDrm: [ 0.00 0.00 ]
+Key: VMINSDrm_Int: [ 0.00 0.00 ]
+Key: VMINSDrr: [ 0.00 0.00 ]
+Key: VMINSDrr_Int: [ 0.00 0.00 ]
+Key: VMINSHZrm: [ 0.00 0.00 ]
+Key: VMINSHZrm_Int: [ 0.00 0.00 ]
+Key: VMINSHZrmk_Int: [ 0.00 0.00 ]
+Key: VMINSHZrmkz_Int: [ 0.00 0.00 ]
+Key: VMINSHZrr: [ 0.00 0.00 ]
+Key: VMINSHZrr_Int: [ 0.00 0.00 ]
+Key: VMINSHZrrb_Int: [ 0.00 0.00 ]
+Key: VMINSHZrrbk_Int: [ 0.00 0.00 ]
+Key: VMINSHZrrbkz_Int: [ 0.00 0.00 ]
+Key: VMINSHZrrk_Int: [ 0.00 0.00 ]
+Key: VMINSHZrrkz_Int: [ 0.00 0.00 ]
+Key: VMINSSZrm: [ 0.00 0.00 ]
+Key: VMINSSZrm_Int: [ 0.00 0.00 ]
+Key: VMINSSZrmk_Int: [ 0.00 0.00 ]
+Key: VMINSSZrmkz_Int: [ 0.00 0.00 ]
+Key: VMINSSZrr: [ 0.00 0.00 ]
+Key: VMINSSZrr_Int: [ 0.00 0.00 ]
+Key: VMINSSZrrb_Int: [ 0.00 0.00 ]
+Key: VMINSSZrrbk_Int: [ 0.00 0.00 ]
+Key: VMINSSZrrbkz_Int: [ 0.00 0.00 ]
+Key: VMINSSZrrk_Int: [ 0.00 0.00 ]
+Key: VMINSSZrrkz_Int: [ 0.00 0.00 ]
+Key: VMINSSrm: [ 0.00 0.00 ]
+Key: VMINSSrm_Int: [ 0.00 0.00 ]
+Key: VMINSSrr: [ 0.00 0.00 ]
+Key: VMINSSrr_Int: [ 0.00 0.00 ]
+Key: VMLAUNCH: [ 0.00 0.00 ]
+Key: VMLOAD: [ 0.00 0.00 ]
+Key: VMMCALL: [ 0.00 0.00 ]
+Key: VMOV: [ 0.00 0.00 ]
+Key: VMOVAPDYmr: [ 0.00 0.00 ]
+Key: VMOVAPDYrm: [ 0.00 0.00 ]
+Key: VMOVAPDYrr: [ 0.00 0.00 ]
+Key: VMOVAPDYrr_REV: [ 0.00 0.00 ]
+Key: VMOVAPDZ: [ 0.00 0.00 ]
+Key: VMOVAPDZmr: [ 0.00 0.00 ]
+Key: VMOVAPDZmrk: [ 0.00 0.00 ]
+Key: VMOVAPDZrm: [ 0.00 0.00 ]
+Key: VMOVAPDZrmk: [ 0.00 0.00 ]
+Key: VMOVAPDZrmkz: [ 0.00 0.00 ]
+Key: VMOVAPDZrr: [ 0.00 0.00 ]
+Key: VMOVAPDZrr_REV: [ 0.00 0.00 ]
+Key: VMOVAPDZrrk: [ 0.00 0.00 ]
+Key: VMOVAPDZrrk_REV: [ 0.00 0.00 ]
+Key: VMOVAPDZrrkz: [ 0.00 0.00 ]
+Key: VMOVAPDZrrkz_REV: [ 0.00 0.00 ]
+Key: VMOVAPDmr: [ 0.00 0.00 ]
+Key: VMOVAPDrm: [ 0.00 0.00 ]
+Key: VMOVAPDrr: [ 0.00 0.00 ]
+Key: VMOVAPDrr_REV: [ 0.00 0.00 ]
+Key: VMOVAPSYmr: [ 0.00 0.00 ]
+Key: VMOVAPSYrm: [ 0.00 0.00 ]
+Key: VMOVAPSYrr: [ 0.00 0.00 ]
+Key: VMOVAPSYrr_REV: [ 0.00 0.00 ]
+Key: VMOVAPSZ: [ 0.00 0.00 ]
+Key: VMOVAPSZmr: [ 0.00 0.00 ]
+Key: VMOVAPSZmrk: [ 0.00 0.00 ]
+Key: VMOVAPSZrm: [ 0.00 0.00 ]
+Key: VMOVAPSZrmk: [ 0.00 0.00 ]
+Key: VMOVAPSZrmkz: [ 0.00 0.00 ]
+Key: VMOVAPSZrr: [ 0.00 0.00 ]
+Key: VMOVAPSZrr_REV: [ 0.00 0.00 ]
+Key: VMOVAPSZrrk: [ 0.00 0.00 ]
+Key: VMOVAPSZrrk_REV: [ 0.00 0.00 ]
+Key: VMOVAPSZrrkz: [ 0.00 0.00 ]
+Key: VMOVAPSZrrkz_REV: [ 0.00 0.00 ]
+Key: VMOVAPSmr: [ 0.00 0.00 ]
+Key: VMOVAPSrm: [ 0.00 0.00 ]
+Key: VMOVAPSrr: [ 0.00 0.00 ]
+Key: VMOVAPSrr_REV: [ 0.00 0.00 ]
+Key: VMOVDDUPYrm: [ 0.00 0.00 ]
+Key: VMOVDDUPYrr: [ 0.00 0.00 ]
+Key: VMOVDDUPZ: [ 0.00 0.00 ]
+Key: VMOVDDUPZrm: [ 0.00 0.00 ]
+Key: VMOVDDUPZrmk: [ 0.00 0.00 ]
+Key: VMOVDDUPZrmkz: [ 0.00 0.00 ]
+Key: VMOVDDUPZrr: [ 0.00 0.00 ]
+Key: VMOVDDUPZrrk: [ 0.00 0.00 ]
+Key: VMOVDDUPZrrkz: [ 0.00 0.00 ]
+Key: VMOVDDUPrm: [ 0.00 0.00 ]
+Key: VMOVDDUPrr: [ 0.00 0.00 ]
+Key: VMOVDI: [ 0.00 0.00 ]
+Key: VMOVDQA: [ 0.00 0.00 ]
+Key: VMOVDQAYmr: [ 0.00 0.00 ]
+Key: VMOVDQAYrm: [ 0.00 0.00 ]
+Key: VMOVDQAYrr: [ 0.00 0.00 ]
+Key: VMOVDQAYrr_REV: [ 0.00 0.00 ]
+Key: VMOVDQAmr: [ 0.00 0.00 ]
+Key: VMOVDQArm: [ 0.00 0.00 ]
+Key: VMOVDQArr: [ 0.00 0.00 ]
+Key: VMOVDQArr_REV: [ 0.00 0.00 ]
+Key: VMOVDQU: [ 0.00 0.00 ]
+Key: VMOVDQUYmr: [ 0.00 0.00 ]
+Key: VMOVDQUYrm: [ 0.00 0.00 ]
+Key: VMOVDQUYrr: [ 0.00 0.00 ]
+Key: VMOVDQUYrr_REV: [ 0.00 0.00 ]
+Key: VMOVDQUmr: [ 0.00 0.00 ]
+Key: VMOVDQUrm: [ 0.00 0.00 ]
+Key: VMOVDQUrr: [ 0.00 0.00 ]
+Key: VMOVDQUrr_REV: [ 0.00 0.00 ]
+Key: VMOVHLPSZrr: [ 0.00 0.00 ]
+Key: VMOVHLPSrr: [ 0.00 0.00 ]
+Key: VMOVHPDZ: [ 0.00 0.00 ]
+Key: VMOVHPDmr: [ 0.00 0.00 ]
+Key: VMOVHPDrm: [ 0.00 0.00 ]
+Key: VMOVHPSZ: [ 0.00 0.00 ]
+Key: VMOVHPSmr: [ 0.00 0.00 ]
+Key: VMOVHPSrm: [ 0.00 0.00 ]
+Key: VMOVLHPSZrr: [ 0.00 0.00 ]
+Key: VMOVLHPSrr: [ 0.00 0.00 ]
+Key: VMOVLPDZ: [ 0.00 0.00 ]
+Key: VMOVLPDmr: [ 0.00 0.00 ]
+Key: VMOVLPDrm: [ 0.00 0.00 ]
+Key: VMOVLPSZ: [ 0.00 0.00 ]
+Key: VMOVLPSmr: [ 0.00 0.00 ]
+Key: VMOVLPSrm: [ 0.00 0.00 ]
+Key: VMOVMSKPDYrr: [ 0.00 0.00 ]
+Key: VMOVMSKPDrr: [ 0.00 0.00 ]
+Key: VMOVMSKPSYrr: [ 0.00 0.00 ]
+Key: VMOVMSKPSrr: [ 0.00 0.00 ]
+Key: VMOVNTDQAYrm: [ 0.00 0.00 ]
+Key: VMOVNTDQAZ: [ 0.00 0.00 ]
+Key: VMOVNTDQAZrm: [ 0.00 0.00 ]
+Key: VMOVNTDQArm: [ 0.00 0.00 ]
+Key: VMOVNTDQYmr: [ 0.00 0.00 ]
+Key: VMOVNTDQZ: [ 0.00 0.00 ]
+Key: VMOVNTDQZmr: [ 0.00 0.00 ]
+Key: VMOVNTDQmr: [ 0.00 0.00 ]
+Key: VMOVNTPDYmr: [ 0.00 0.00 ]
+Key: VMOVNTPDZ: [ 0.00 0.00 ]
+Key: VMOVNTPDZmr: [ 0.00 0.00 ]
+Key: VMOVNTPDmr: [ 0.00 0.00 ]
+Key: VMOVNTPSYmr: [ 0.00 0.00 ]
+Key: VMOVNTPSZ: [ 0.00 0.00 ]
+Key: VMOVNTPSZmr: [ 0.00 0.00 ]
+Key: VMOVNTPSmr: [ 0.00 0.00 ]
+Key: VMOVPDI: [ 0.00 0.00 ]
+Key: VMOVPQI: [ 0.00 0.00 ]
+Key: VMOVPQIto: [ 0.00 0.00 ]
+Key: VMOVQI: [ 0.00 0.00 ]
+Key: VMOVRSBZ: [ 0.00 0.00 ]
+Key: VMOVRSBZm: [ 0.00 0.00 ]
+Key: VMOVRSBZmk: [ 0.00 0.00 ]
+Key: VMOVRSBZmkz: [ 0.00 0.00 ]
+Key: VMOVRSDZ: [ 0.00 0.00 ]
+Key: VMOVRSDZm: [ 0.00 0.00 ]
+Key: VMOVRSDZmk: [ 0.00 0.00 ]
+Key: VMOVRSDZmkz: [ 0.00 0.00 ]
+Key: VMOVRSQZ: [ 0.00 0.00 ]
+Key: VMOVRSQZm: [ 0.00 0.00 ]
+Key: VMOVRSQZmk: [ 0.00 0.00 ]
+Key: VMOVRSQZmkz: [ 0.00 0.00 ]
+Key: VMOVRSWZ: [ 0.00 0.00 ]
+Key: VMOVRSWZm: [ 0.00 0.00 ]
+Key: VMOVRSWZmk: [ 0.00 0.00 ]
+Key: VMOVRSWZmkz: [ 0.00 0.00 ]
+Key: VMOVSDZmr: [ 0.00 0.00 ]
+Key: VMOVSDZmrk: [ 0.00 0.00 ]
+Key: VMOVSDZrm: [ 0.00 0.00 ]
+Key: VMOVSDZrm_alt: [ 0.00 0.00 ]
+Key: VMOVSDZrmk: [ 0.00 0.00 ]
+Key: VMOVSDZrmkz: [ 0.00 0.00 ]
+Key: VMOVSDZrr: [ 0.00 0.00 ]
+Key: VMOVSDZrr_REV: [ 0.00 0.00 ]
+Key: VMOVSDZrrk: [ 0.00 0.00 ]
+Key: VMOVSDZrrk_REV: [ 0.00 0.00 ]
+Key: VMOVSDZrrkz: [ 0.00 0.00 ]
+Key: VMOVSDZrrkz_REV: [ 0.00 0.00 ]
+Key: VMOVSDmr: [ 0.00 0.00 ]
+Key: VMOVSDrm: [ 0.00 0.00 ]
+Key: VMOVSDrm_alt: [ 0.00 0.00 ]
+Key: VMOVSDrr: [ 0.00 0.00 ]
+Key: VMOVSDrr_REV: [ 0.00 0.00 ]
+Key: VMOVSDto: [ 0.00 0.00 ]
+Key: VMOVSH: [ 0.00 0.00 ]
+Key: VMOVSHDUPYrm: [ 0.00 0.00 ]
+Key: VMOVSHDUPYrr: [ 0.00 0.00 ]
+Key: VMOVSHDUPZ: [ 0.00 0.00 ]
+Key: VMOVSHDUPZrm: [ 0.00 0.00 ]
+Key: VMOVSHDUPZrmk: [ 0.00 0.00 ]
+Key: VMOVSHDUPZrmkz: [ 0.00 0.00 ]
+Key: VMOVSHDUPZrr: [ 0.00 0.00 ]
+Key: VMOVSHDUPZrrk: [ 0.00 0.00 ]
+Key: VMOVSHDUPZrrkz: [ 0.00 0.00 ]
+Key: VMOVSHDUPrm: [ 0.00 0.00 ]
+Key: VMOVSHDUPrr: [ 0.00 0.00 ]
+Key: VMOVSHZmr: [ 0.00 0.00 ]
+Key: VMOVSHZmrk: [ 0.00 0.00 ]
+Key: VMOVSHZrm: [ 0.00 0.00 ]
+Key: VMOVSHZrm_alt: [ 0.00 0.00 ]
+Key: VMOVSHZrmk: [ 0.00 0.00 ]
+Key: VMOVSHZrmkz: [ 0.00 0.00 ]
+Key: VMOVSHZrr: [ 0.00 0.00 ]
+Key: VMOVSHZrr_REV: [ 0.00 0.00 ]
+Key: VMOVSHZrrk: [ 0.00 0.00 ]
+Key: VMOVSHZrrk_REV: [ 0.00 0.00 ]
+Key: VMOVSHZrrkz: [ 0.00 0.00 ]
+Key: VMOVSHZrrkz_REV: [ 0.00 0.00 ]
+Key: VMOVSHtoW: [ 0.00 0.00 ]
+Key: VMOVSLDUPYrm: [ 0.00 0.00 ]
+Key: VMOVSLDUPYrr: [ 0.00 0.00 ]
+Key: VMOVSLDUPZ: [ 0.00 0.00 ]
+Key: VMOVSLDUPZrm: [ 0.00 0.00 ]
+Key: VMOVSLDUPZrmk: [ 0.00 0.00 ]
+Key: VMOVSLDUPZrmkz: [ 0.00 0.00 ]
+Key: VMOVSLDUPZrr: [ 0.00 0.00 ]
+Key: VMOVSLDUPZrrk: [ 0.00 0.00 ]
+Key: VMOVSLDUPZrrkz: [ 0.00 0.00 ]
+Key: VMOVSLDUPrm: [ 0.00 0.00 ]
+Key: VMOVSLDUPrr: [ 0.00 0.00 ]
+Key: VMOVSS: [ 0.00 0.00 ]
+Key: VMOVSSZmr: [ 0.00 0.00 ]
+Key: VMOVSSZmrk: [ 0.00 0.00 ]
+Key: VMOVSSZrm: [ 0.00 0.00 ]
+Key: VMOVSSZrm_alt: [ 0.00 0.00 ]
+Key: VMOVSSZrmk: [ 0.00 0.00 ]
+Key: VMOVSSZrmkz: [ 0.00 0.00 ]
+Key: VMOVSSZrr: [ 0.00 0.00 ]
+Key: VMOVSSZrr_REV: [ 0.00 0.00 ]
+Key: VMOVSSZrrk: [ 0.00 0.00 ]
+Key: VMOVSSZrrk_REV: [ 0.00 0.00 ]
+Key: VMOVSSZrrkz: [ 0.00 0.00 ]
+Key: VMOVSSZrrkz_REV: [ 0.00 0.00 ]
+Key: VMOVSSmr: [ 0.00 0.00 ]
+Key: VMOVSSrm: [ 0.00 0.00 ]
+Key: VMOVSSrm_alt: [ 0.00 0.00 ]
+Key: VMOVSSrr: [ 0.00 0.00 ]
+Key: VMOVSSrr_REV: [ 0.00 0.00 ]
+Key: VMOVUPDYmr: [ 0.00 0.00 ]
+Key: VMOVUPDYrm: [ 0.00 0.00 ]
+Key: VMOVUPDYrr: [ 0.00 0.00 ]
+Key: VMOVUPDYrr_REV: [ 0.00 0.00 ]
+Key: VMOVUPDZ: [ 0.00 0.00 ]
+Key: VMOVUPDZmr: [ 0.00 0.00 ]
+Key: VMOVUPDZmrk: [ 0.00 0.00 ]
+Key: VMOVUPDZrm: [ 0.00 0.00 ]
+Key: VMOVUPDZrmk: [ 0.00 0.00 ]
+Key: VMOVUPDZrmkz: [ 0.00 0.00 ]
+Key: VMOVUPDZrr: [ 0.00 0.00 ]
+Key: VMOVUPDZrr_REV: [ 0.00 0.00 ]
+Key: VMOVUPDZrrk: [ 0.00 0.00 ]
+Key: VMOVUPDZrrk_REV: [ 0.00 0.00 ]
+Key: VMOVUPDZrrkz: [ 0.00 0.00 ]
+Key: VMOVUPDZrrkz_REV: [ 0.00 0.00 ]
+Key: VMOVUPDmr: [ 0.00 0.00 ]
+Key: VMOVUPDrm: [ 0.00 0.00 ]
+Key: VMOVUPDrr: [ 0.00 0.00 ]
+Key: VMOVUPDrr_REV: [ 0.00 0.00 ]
+Key: VMOVUPSYmr: [ 0.00 0.00 ]
+Key: VMOVUPSYrm: [ 0.00 0.00 ]
+Key: VMOVUPSYrr: [ 0.00 0.00 ]
+Key: VMOVUPSYrr_REV: [ 0.00 0.00 ]
+Key: VMOVUPSZ: [ 0.00 0.00 ]
+Key: VMOVUPSZmr: [ 0.00 0.00 ]
+Key: VMOVUPSZmrk: [ 0.00 0.00 ]
+Key: VMOVUPSZrm: [ 0.00 0.00 ]
+Key: VMOVUPSZrmk: [ 0.00 0.00 ]
+Key: VMOVUPSZrmkz: [ 0.00 0.00 ]
+Key: VMOVUPSZrr: [ 0.00 0.00 ]
+Key: VMOVUPSZrr_REV: [ 0.00 0.00 ]
+Key: VMOVUPSZrrk: [ 0.00 0.00 ]
+Key: VMOVUPSZrrk_REV: [ 0.00 0.00 ]
+Key: VMOVUPSZrrkz: [ 0.00 0.00 ]
+Key: VMOVUPSZrrkz_REV: [ 0.00 0.00 ]
+Key: VMOVUPSmr: [ 0.00 0.00 ]
+Key: VMOVUPSrm: [ 0.00 0.00 ]
+Key: VMOVUPSrr: [ 0.00 0.00 ]
+Key: VMOVUPSrr_REV: [ 0.00 0.00 ]
+Key: VMOVW: [ 0.00 0.00 ]
+Key: VMOVWmr: [ 0.00 0.00 ]
+Key: VMOVWrm: [ 0.00 0.00 ]
+Key: VMOVZPDILo: [ 0.00 0.00 ]
+Key: VMOVZPQILo: [ 0.00 0.00 ]
+Key: VMOVZPWILo: [ 0.00 0.00 ]
+Key: VMPSADBWYrmi: [ 0.00 0.00 ]
+Key: VMPSADBWYrri: [ 0.00 0.00 ]
+Key: VMPSADBWZ: [ 0.00 0.00 ]
+Key: VMPSADBWZrmi: [ 0.00 0.00 ]
+Key: VMPSADBWZrmik: [ 0.00 0.00 ]
+Key: VMPSADBWZrmikz: [ 0.00 0.00 ]
+Key: VMPSADBWZrri: [ 0.00 0.00 ]
+Key: VMPSADBWZrrik: [ 0.00 0.00 ]
+Key: VMPSADBWZrrikz: [ 0.00 0.00 ]
+Key: VMPSADBWrmi: [ 0.00 0.00 ]
+Key: VMPSADBWrri: [ 0.00 0.00 ]
+Key: VMPTRLDm: [ 0.00 0.00 ]
+Key: VMPTRSTm: [ 0.00 0.00 ]
+Key: VMREAD: [ 0.00 0.00 ]
+Key: VMRESUME: [ 0.00 0.00 ]
+Key: VMRUN: [ 0.00 0.00 ]
+Key: VMSAVE: [ 0.00 0.00 ]
+Key: VMULBF: [ 0.00 0.00 ]
+Key: VMULPDYrm: [ 0.00 0.00 ]
+Key: VMULPDYrr: [ 0.00 0.00 ]
+Key: VMULPDZ: [ 0.00 0.00 ]
+Key: VMULPDZrm: [ 0.00 0.00 ]
+Key: VMULPDZrmb: [ 0.00 0.00 ]
+Key: VMULPDZrmbk: [ 0.00 0.00 ]
+Key: VMULPDZrmbkz: [ 0.00 0.00 ]
+Key: VMULPDZrmk: [ 0.00 0.00 ]
+Key: VMULPDZrmkz: [ 0.00 0.00 ]
+Key: VMULPDZrr: [ 0.00 0.00 ]
+Key: VMULPDZrrb: [ 0.00 0.00 ]
+Key: VMULPDZrrbk: [ 0.00 0.00 ]
+Key: VMULPDZrrbkz: [ 0.00 0.00 ]
+Key: VMULPDZrrk: [ 0.00 0.00 ]
+Key: VMULPDZrrkz: [ 0.00 0.00 ]
+Key: VMULPDrm: [ 0.00 0.00 ]
+Key: VMULPDrr: [ 0.00 0.00 ]
+Key: VMULPHZ: [ 0.00 0.00 ]
+Key: VMULPHZrm: [ 0.00 0.00 ]
+Key: VMULPHZrmb: [ 0.00 0.00 ]
+Key: VMULPHZrmbk: [ 0.00 0.00 ]
+Key: VMULPHZrmbkz: [ 0.00 0.00 ]
+Key: VMULPHZrmk: [ 0.00 0.00 ]
+Key: VMULPHZrmkz: [ 0.00 0.00 ]
+Key: VMULPHZrr: [ 0.00 0.00 ]
+Key: VMULPHZrrb: [ 0.00 0.00 ]
+Key: VMULPHZrrbk: [ 0.00 0.00 ]
+Key: VMULPHZrrbkz: [ 0.00 0.00 ]
+Key: VMULPHZrrk: [ 0.00 0.00 ]
+Key: VMULPHZrrkz: [ 0.00 0.00 ]
+Key: VMULPSYrm: [ 0.00 0.00 ]
+Key: VMULPSYrr: [ 0.00 0.00 ]
+Key: VMULPSZ: [ 0.00 0.00 ]
+Key: VMULPSZrm: [ 0.00 0.00 ]
+Key: VMULPSZrmb: [ 0.00 0.00 ]
+Key: VMULPSZrmbk: [ 0.00 0.00 ]
+Key: VMULPSZrmbkz: [ 0.00 0.00 ]
+Key: VMULPSZrmk: [ 0.00 0.00 ]
+Key: VMULPSZrmkz: [ 0.00 0.00 ]
+Key: VMULPSZrr: [ 0.00 0.00 ]
+Key: VMULPSZrrb: [ 0.00 0.00 ]
+Key: VMULPSZrrbk: [ 0.00 0.00 ]
+Key: VMULPSZrrbkz: [ 0.00 0.00 ]
+Key: VMULPSZrrk: [ 0.00 0.00 ]
+Key: VMULPSZrrkz: [ 0.00 0.00 ]
+Key: VMULPSrm: [ 0.00 0.00 ]
+Key: VMULPSrr: [ 0.00 0.00 ]
+Key: VMULSDZrm: [ 0.00 0.00 ]
+Key: VMULSDZrm_Int: [ 0.00 0.00 ]
+Key: VMULSDZrmk_Int: [ 0.00 0.00 ]
+Key: VMULSDZrmkz_Int: [ 0.00 0.00 ]
+Key: VMULSDZrr: [ 0.00 0.00 ]
+Key: VMULSDZrr_Int: [ 0.00 0.00 ]
+Key: VMULSDZrrb_Int: [ 0.00 0.00 ]
+Key: VMULSDZrrbk_Int: [ 0.00 0.00 ]
+Key: VMULSDZrrbkz_Int: [ 0.00 0.00 ]
+Key: VMULSDZrrk_Int: [ 0.00 0.00 ]
+Key: VMULSDZrrkz_Int: [ 0.00 0.00 ]
+Key: VMULSDrm: [ 0.00 0.00 ]
+Key: VMULSDrm_Int: [ 0.00 0.00 ]
+Key: VMULSDrr: [ 0.00 0.00 ]
+Key: VMULSDrr_Int: [ 0.00 0.00 ]
+Key: VMULSHZrm: [ 0.00 0.00 ]
+Key: VMULSHZrm_Int: [ 0.00 0.00 ]
+Key: VMULSHZrmk_Int: [ 0.00 0.00 ]
+Key: VMULSHZrmkz_Int: [ 0.00 0.00 ]
+Key: VMULSHZrr: [ 0.00 0.00 ]
+Key: VMULSHZrr_Int: [ 0.00 0.00 ]
+Key: VMULSHZrrb_Int: [ 0.00 0.00 ]
+Key: VMULSHZrrbk_Int: [ 0.00 0.00 ]
+Key: VMULSHZrrbkz_Int: [ 0.00 0.00 ]
+Key: VMULSHZrrk_Int: [ 0.00 0.00 ]
+Key: VMULSHZrrkz_Int: [ 0.00 0.00 ]
+Key: VMULSSZrm: [ 0.00 0.00 ]
+Key: VMULSSZrm_Int: [ 0.00 0.00 ]
+Key: VMULSSZrmk_Int: [ 0.00 0.00 ]
+Key: VMULSSZrmkz_Int: [ 0.00 0.00 ]
+Key: VMULSSZrr: [ 0.00 0.00 ]
+Key: VMULSSZrr_Int: [ 0.00 0.00 ]
+Key: VMULSSZrrb_Int: [ 0.00 0.00 ]
+Key: VMULSSZrrbk_Int: [ 0.00 0.00 ]
+Key: VMULSSZrrbkz_Int: [ 0.00 0.00 ]
+Key: VMULSSZrrk_Int: [ 0.00 0.00 ]
+Key: VMULSSZrrkz_Int: [ 0.00 0.00 ]
+Key: VMULSSrm: [ 0.00 0.00 ]
+Key: VMULSSrm_Int: [ 0.00 0.00 ]
+Key: VMULSSrr: [ 0.00 0.00 ]
+Key: VMULSSrr_Int: [ 0.00 0.00 ]
+Key: VMWRITE: [ 0.00 0.00 ]
+Key: VMXOFF: [ 0.00 0.00 ]
+Key: VMXON: [ 0.00 0.00 ]
+Key: VORPDYrm: [ 0.00 0.00 ]
+Key: VORPDYrr: [ 0.00 0.00 ]
+Key: VORPDZ: [ 0.00 0.00 ]
+Key: VORPDZrm: [ 0.00 0.00 ]
+Key: VORPDZrmb: [ 0.00 0.00 ]
+Key: VORPDZrmbk: [ 0.00 0.00 ]
+Key: VORPDZrmbkz: [ 0.00 0.00 ]
+Key: VORPDZrmk: [ 0.00 0.00 ]
+Key: VORPDZrmkz: [ 0.00 0.00 ]
+Key: VORPDZrr: [ 0.00 0.00 ]
+Key: VORPDZrrk: [ 0.00 0.00 ]
+Key: VORPDZrrkz: [ 0.00 0.00 ]
+Key: VORPDrm: [ 0.00 0.00 ]
+Key: VORPDrr: [ 0.00 0.00 ]
+Key: VORPSYrm: [ 0.00 0.00 ]
+Key: VORPSYrr: [ 0.00 0.00 ]
+Key: VORPSZ: [ 0.00 0.00 ]
+Key: VORPSZrm: [ 0.00 0.00 ]
+Key: VORPSZrmb: [ 0.00 0.00 ]
+Key: VORPSZrmbk: [ 0.00 0.00 ]
+Key: VORPSZrmbkz: [ 0.00 0.00 ]
+Key: VORPSZrmk: [ 0.00 0.00 ]
+Key: VORPSZrmkz: [ 0.00 0.00 ]
+Key: VORPSZrr: [ 0.00 0.00 ]
+Key: VORPSZrrk: [ 0.00 0.00 ]
+Key: VORPSZrrkz: [ 0.00 0.00 ]
+Key: VORPSrm: [ 0.00 0.00 ]
+Key: VORPSrr: [ 0.00 0.00 ]
+Key: VP: [ 0.00 0.00 ]
+Key: VPABSBYrm: [ 0.00 0.00 ]
+Key: VPABSBYrr: [ 0.00 0.00 ]
+Key: VPABSBZ: [ 0.00 0.00 ]
+Key: VPABSBZrm: [ 0.00 0.00 ]
+Key: VPABSBZrmk: [ 0.00 0.00 ]
+Key: VPABSBZrmkz: [ 0.00 0.00 ]
+Key: VPABSBZrr: [ 0.00 0.00 ]
+Key: VPABSBZrrk: [ 0.00 0.00 ]
+Key: VPABSBZrrkz: [ 0.00 0.00 ]
+Key: VPABSBrm: [ 0.00 0.00 ]
+Key: VPABSBrr: [ 0.00 0.00 ]
+Key: VPABSDYrm: [ 0.00 0.00 ]
+Key: VPABSDYrr: [ 0.00 0.00 ]
+Key: VPABSDZ: [ 0.00 0.00 ]
+Key: VPABSDZrm: [ 0.00 0.00 ]
+Key: VPABSDZrmb: [ 0.00 0.00 ]
+Key: VPABSDZrmbk: [ 0.00 0.00 ]
+Key: VPABSDZrmbkz: [ 0.00 0.00 ]
+Key: VPABSDZrmk: [ 0.00 0.00 ]
+Key: VPABSDZrmkz: [ 0.00 0.00 ]
+Key: VPABSDZrr: [ 0.00 0.00 ]
+Key: VPABSDZrrk: [ 0.00 0.00 ]
+Key: VPABSDZrrkz: [ 0.00 0.00 ]
+Key: VPABSDrm: [ 0.00 0.00 ]
+Key: VPABSDrr: [ 0.00 0.00 ]
+Key: VPABSQZ: [ 0.00 0.00 ]
+Key: VPABSQZrm: [ 0.00 0.00 ]
+Key: VPABSQZrmb: [ 0.00 0.00 ]
+Key: VPABSQZrmbk: [ 0.00 0.00 ]
+Key: VPABSQZrmbkz: [ 0.00 0.00 ]
+Key: VPABSQZrmk: [ 0.00 0.00 ]
+Key: VPABSQZrmkz: [ 0.00 0.00 ]
+Key: VPABSQZrr: [ 0.00 0.00 ]
+Key: VPABSQZrrk: [ 0.00 0.00 ]
+Key: VPABSQZrrkz: [ 0.00 0.00 ]
+Key: VPABSWYrm: [ 0.00 0.00 ]
+Key: VPABSWYrr: [ 0.00 0.00 ]
+Key: VPABSWZ: [ 0.00 0.00 ]
+Key: VPABSWZrm: [ 0.00 0.00 ]
+Key: VPABSWZrmk: [ 0.00 0.00 ]
+Key: VPABSWZrmkz: [ 0.00 0.00 ]
+Key: VPABSWZrr: [ 0.00 0.00 ]
+Key: VPABSWZrrk: [ 0.00 0.00 ]
+Key: VPABSWZrrkz: [ 0.00 0.00 ]
+Key: VPABSWrm: [ 0.00 0.00 ]
+Key: VPABSWrr: [ 0.00 0.00 ]
+Key: VPACKSSDWYrm: [ 0.00 0.00 ]
+Key: VPACKSSDWYrr: [ 0.00 0.00 ]
+Key: VPACKSSDWZ: [ 0.00 0.00 ]
+Key: VPACKSSDWZrm: [ 0.00 0.00 ]
+Key: VPACKSSDWZrmb: [ 0.00 0.00 ]
+Key: VPACKSSDWZrmbk: [ 0.00 0.00 ]
+Key: VPACKSSDWZrmbkz: [ 0.00 0.00 ]
+Key: VPACKSSDWZrmk: [ 0.00 0.00 ]
+Key: VPACKSSDWZrmkz: [ 0.00 0.00 ]
+Key: VPACKSSDWZrr: [ 0.00 0.00 ]
+Key: VPACKSSDWZrrk: [ 0.00 0.00 ]
+Key: VPACKSSDWZrrkz: [ 0.00 0.00 ]
+Key: VPACKSSDWrm: [ 0.00 0.00 ]
+Key: VPACKSSDWrr: [ 0.00 0.00 ]
+Key: VPACKSSWBYrm: [ 0.00 0.00 ]
+Key: VPACKSSWBYrr: [ 0.00 0.00 ]
+Key: VPACKSSWBZ: [ 0.00 0.00 ]
+Key: VPACKSSWBZrm: [ 0.00 0.00 ]
+Key: VPACKSSWBZrmk: [ 0.00 0.00 ]
+Key: VPACKSSWBZrmkz: [ 0.00 0.00 ]
+Key: VPACKSSWBZrr: [ 0.00 0.00 ]
+Key: VPACKSSWBZrrk: [ 0.00 0.00 ]
+Key: VPACKSSWBZrrkz: [ 0.00 0.00 ]
+Key: VPACKSSWBrm: [ 0.00 0.00 ]
+Key: VPACKSSWBrr: [ 0.00 0.00 ]
+Key: VPACKUSDWYrm: [ 0.00 0.00 ]
+Key: VPACKUSDWYrr: [ 0.00 0.00 ]
+Key: VPACKUSDWZ: [ 0.00 0.00 ]
+Key: VPACKUSDWZrm: [ 0.00 0.00 ]
+Key: VPACKUSDWZrmb: [ 0.00 0.00 ]
+Key: VPACKUSDWZrmbk: [ 0.00 0.00 ]
+Key: VPACKUSDWZrmbkz: [ 0.00 0.00 ]
+Key: VPACKUSDWZrmk: [ 0.00 0.00 ]
+Key: VPACKUSDWZrmkz: [ 0.00 0.00 ]
+Key: VPACKUSDWZrr: [ 0.00 0.00 ]
+Key: VPACKUSDWZrrk: [ 0.00 0.00 ]
+Key: VPACKUSDWZrrkz: [ 0.00 0.00 ]
+Key: VPACKUSDWrm: [ 0.00 0.00 ]
+Key: VPACKUSDWrr: [ 0.00 0.00 ]
+Key: VPACKUSWBYrm: [ 0.00 0.00 ]
+Key: VPACKUSWBYrr: [ 0.00 0.00 ]
+Key: VPACKUSWBZ: [ 0.00 0.00 ]
+Key: VPACKUSWBZrm: [ 0.00 0.00 ]
+Key: VPACKUSWBZrmk: [ 0.00 0.00 ]
+Key: VPACKUSWBZrmkz: [ 0.00 0.00 ]
+Key: VPACKUSWBZrr: [ 0.00 0.00 ]
+Key: VPACKUSWBZrrk: [ 0.00 0.00 ]
+Key: VPACKUSWBZrrkz: [ 0.00 0.00 ]
+Key: VPACKUSWBrm: [ 0.00 0.00 ]
+Key: VPACKUSWBrr: [ 0.00 0.00 ]
+Key: VPADDBYrm: [ 0.00 0.00 ]
+Key: VPADDBYrr: [ 0.00 0.00 ]
+Key: VPADDBZ: [ 0.00 0.00 ]
+Key: VPADDBZrm: [ 0.00 0.00 ]
+Key: VPADDBZrmk: [ 0.00 0.00 ]
+Key: VPADDBZrmkz: [ 0.00 0.00 ]
+Key: VPADDBZrr: [ 0.00 0.00 ]
+Key: VPADDBZrrk: [ 0.00 0.00 ]
+Key: VPADDBZrrkz: [ 0.00 0.00 ]
+Key: VPADDBrm: [ 0.00 0.00 ]
+Key: VPADDBrr: [ 0.00 0.00 ]
+Key: VPADDDYrm: [ 0.00 0.00 ]
+Key: VPADDDYrr: [ 0.00 0.00 ]
+Key: VPADDDZ: [ 0.00 0.00 ]
+Key: VPADDDZrm: [ 0.00 0.00 ]
+Key: VPADDDZrmb: [ 0.00 0.00 ]
+Key: VPADDDZrmbk: [ 0.00 0.00 ]
+Key: VPADDDZrmbkz: [ 0.00 0.00 ]
+Key: VPADDDZrmk: [ 0.00 0.00 ]
+Key: VPADDDZrmkz: [ 0.00 0.00 ]
+Key: VPADDDZrr: [ 0.00 0.00 ]
+Key: VPADDDZrrk: [ 0.00 0.00 ]
+Key: VPADDDZrrkz: [ 0.00 0.00 ]
+Key: VPADDDrm: [ 0.00 0.00 ]
+Key: VPADDDrr: [ 0.00 0.00 ]
+Key: VPADDQYrm: [ 0.00 0.00 ]
+Key: VPADDQYrr: [ 0.00 0.00 ]
+Key: VPADDQZ: [ 0.00 0.00 ]
+Key: VPADDQZrm: [ 0.00 0.00 ]
+Key: VPADDQZrmb: [ 0.00 0.00 ]
+Key: VPADDQZrmbk: [ 0.00 0.00 ]
+Key: VPADDQZrmbkz: [ 0.00 0.00 ]
+Key: VPADDQZrmk: [ 0.00 0.00 ]
+Key: VPADDQZrmkz: [ 0.00 0.00 ]
+Key: VPADDQZrr: [ 0.00 0.00 ]
+Key: VPADDQZrrk: [ 0.00 0.00 ]
+Key: VPADDQZrrkz: [ 0.00 0.00 ]
+Key: VPADDQrm: [ 0.00 0.00 ]
+Key: VPADDQrr: [ 0.00 0.00 ]
+Key: VPADDSBYrm: [ 0.00 0.00 ]
+Key: VPADDSBYrr: [ 0.00 0.00 ]
+Key: VPADDSBZ: [ 0.00 0.00 ]
+Key: VPADDSBZrm: [ 0.00 0.00 ]
+Key: VPADDSBZrmk: [ 0.00 0.00 ]
+Key: VPADDSBZrmkz: [ 0.00 0.00 ]
+Key: VPADDSBZrr: [ 0.00 0.00 ]
+Key: VPADDSBZrrk: [ 0.00 0.00 ]
+Key: VPADDSBZrrkz: [ 0.00 0.00 ]
+Key: VPADDSBrm: [ 0.00 0.00 ]
+Key: VPADDSBrr: [ 0.00 0.00 ]
+Key: VPADDSWYrm: [ 0.00 0.00 ]
+Key: VPADDSWYrr: [ 0.00 0.00 ]
+Key: VPADDSWZ: [ 0.00 0.00 ]
+Key: VPADDSWZrm: [ 0.00 0.00 ]
+Key: VPADDSWZrmk: [ 0.00 0.00 ]
+Key: VPADDSWZrmkz: [ 0.00 0.00 ]
+Key: VPADDSWZrr: [ 0.00 0.00 ]
+Key: VPADDSWZrrk: [ 0.00 0.00 ]
+Key: VPADDSWZrrkz: [ 0.00 0.00 ]
+Key: VPADDSWrm: [ 0.00 0.00 ]
+Key: VPADDSWrr: [ 0.00 0.00 ]
+Key: VPADDUSBYrm: [ 0.00 0.00 ]
+Key: VPADDUSBYrr: [ 0.00 0.00 ]
+Key: VPADDUSBZ: [ 0.00 0.00 ]
+Key: VPADDUSBZrm: [ 0.00 0.00 ]
+Key: VPADDUSBZrmk: [ 0.00 0.00 ]
+Key: VPADDUSBZrmkz: [ 0.00 0.00 ]
+Key: VPADDUSBZrr: [ 0.00 0.00 ]
+Key: VPADDUSBZrrk: [ 0.00 0.00 ]
+Key: VPADDUSBZrrkz: [ 0.00 0.00 ]
+Key: VPADDUSBrm: [ 0.00 0.00 ]
+Key: VPADDUSBrr: [ 0.00 0.00 ]
+Key: VPADDUSWYrm: [ 0.00 0.00 ]
+Key: VPADDUSWYrr: [ 0.00 0.00 ]
+Key: VPADDUSWZ: [ 0.00 0.00 ]
+Key: VPADDUSWZrm: [ 0.00 0.00 ]
+Key: VPADDUSWZrmk: [ 0.00 0.00 ]
+Key: VPADDUSWZrmkz: [ 0.00 0.00 ]
+Key: VPADDUSWZrr: [ 0.00 0.00 ]
+Key: VPADDUSWZrrk: [ 0.00 0.00 ]
+Key: VPADDUSWZrrkz: [ 0.00 0.00 ]
+Key: VPADDUSWrm: [ 0.00 0.00 ]
+Key: VPADDUSWrr: [ 0.00 0.00 ]
+Key: VPADDWYrm: [ 0.00 0.00 ]
+Key: VPADDWYrr: [ 0.00 0.00 ]
+Key: VPADDWZ: [ 0.00 0.00 ]
+Key: VPADDWZrm: [ 0.00 0.00 ]
+Key: VPADDWZrmk: [ 0.00 0.00 ]
+Key: VPADDWZrmkz: [ 0.00 0.00 ]
+Key: VPADDWZrr: [ 0.00 0.00 ]
+Key: VPADDWZrrk: [ 0.00 0.00 ]
+Key: VPADDWZrrkz: [ 0.00 0.00 ]
+Key: VPADDWrm: [ 0.00 0.00 ]
+Key: VPADDWrr: [ 0.00 0.00 ]
+Key: VPALIGNRYrmi: [ 0.00 0.00 ]
+Key: VPALIGNRYrri: [ 0.00 0.00 ]
+Key: VPALIGNRZ: [ 0.00 0.00 ]
+Key: VPALIGNRZrmi: [ 0.00 0.00 ]
+Key: VPALIGNRZrmik: [ 0.00 0.00 ]
+Key: VPALIGNRZrmikz: [ 0.00 0.00 ]
+Key: VPALIGNRZrri: [ 0.00 0.00 ]
+Key: VPALIGNRZrrik: [ 0.00 0.00 ]
+Key: VPALIGNRZrrikz: [ 0.00 0.00 ]
+Key: VPALIGNRrmi: [ 0.00 0.00 ]
+Key: VPALIGNRrri: [ 0.00 0.00 ]
+Key: VPANDDZ: [ 0.00 0.00 ]
+Key: VPANDDZrm: [ 0.00 0.00 ]
+Key: VPANDDZrmb: [ 0.00 0.00 ]
+Key: VPANDDZrmbk: [ 0.00 0.00 ]
+Key: VPANDDZrmbkz: [ 0.00 0.00 ]
+Key: VPANDDZrmk: [ 0.00 0.00 ]
+Key: VPANDDZrmkz: [ 0.00 0.00 ]
+Key: VPANDDZrr: [ 0.00 0.00 ]
+Key: VPANDDZrrk: [ 0.00 0.00 ]
+Key: VPANDDZrrkz: [ 0.00 0.00 ]
+Key: VPANDNDZ: [ 0.00 0.00 ]
+Key: VPANDNDZrm: [ 0.00 0.00 ]
+Key: VPANDNDZrmb: [ 0.00 0.00 ]
+Key: VPANDNDZrmbk: [ 0.00 0.00 ]
+Key: VPANDNDZrmbkz: [ 0.00 0.00 ]
+Key: VPANDNDZrmk: [ 0.00 0.00 ]
+Key: VPANDNDZrmkz: [ 0.00 0.00 ]
+Key: VPANDNDZrr: [ 0.00 0.00 ]
+Key: VPANDNDZrrk: [ 0.00 0.00 ]
+Key: VPANDNDZrrkz: [ 0.00 0.00 ]
+Key: VPANDNQZ: [ 0.00 0.00 ]
+Key: VPANDNQZrm: [ 0.00 0.00 ]
+Key: VPANDNQZrmb: [ 0.00 0.00 ]
+Key: VPANDNQZrmbk: [ 0.00 0.00 ]
+Key: VPANDNQZrmbkz: [ 0.00 0.00 ]
+Key: VPANDNQZrmk: [ 0.00 0.00 ]
+Key: VPANDNQZrmkz: [ 0.00 0.00 ]
+Key: VPANDNQZrr: [ 0.00 0.00 ]
+Key: VPANDNQZrrk: [ 0.00 0.00 ]
+Key: VPANDNQZrrkz: [ 0.00 0.00 ]
+Key: VPANDNYrm: [ 0.00 0.00 ]
+Key: VPANDNYrr: [ 0.00 0.00 ]
+Key: VPANDNrm: [ 0.00 0.00 ]
+Key: VPANDNrr: [ 0.00 0.00 ]
+Key: VPANDQZ: [ 0.00 0.00 ]
+Key: VPANDQZrm: [ 0.00 0.00 ]
+Key: VPANDQZrmb: [ 0.00 0.00 ]
+Key: VPANDQZrmbk: [ 0.00 0.00 ]
+Key: VPANDQZrmbkz: [ 0.00 0.00 ]
+Key: VPANDQZrmk: [ 0.00 0.00 ]
+Key: VPANDQZrmkz: [ 0.00 0.00 ]
+Key: VPANDQZrr: [ 0.00 0.00 ]
+Key: VPANDQZrrk: [ 0.00 0.00 ]
+Key: VPANDQZrrkz: [ 0.00 0.00 ]
+Key: VPANDYrm: [ 0.00 0.00 ]
+Key: VPANDYrr: [ 0.00 0.00 ]
+Key: VPANDrm: [ 0.00 0.00 ]
+Key: VPANDrr: [ 0.00 0.00 ]
+Key: VPAVGBYrm: [ 0.00 0.00 ]
+Key: VPAVGBYrr: [ 0.00 0.00 ]
+Key: VPAVGBZ: [ 0.00 0.00 ]
+Key: VPAVGBZrm: [ 0.00 0.00 ]
+Key: VPAVGBZrmk: [ 0.00 0.00 ]
+Key: VPAVGBZrmkz: [ 0.00 0.00 ]
+Key: VPAVGBZrr: [ 0.00 0.00 ]
+Key: VPAVGBZrrk: [ 0.00 0.00 ]
+Key: VPAVGBZrrkz: [ 0.00 0.00 ]
+Key: VPAVGBrm: [ 0.00 0.00 ]
+Key: VPAVGBrr: [ 0.00 0.00 ]
+Key: VPAVGWYrm: [ 0.00 0.00 ]
+Key: VPAVGWYrr: [ 0.00 0.00 ]
+Key: VPAVGWZ: [ 0.00 0.00 ]
+Key: VPAVGWZrm: [ 0.00 0.00 ]
+Key: VPAVGWZrmk: [ 0.00 0.00 ]
+Key: VPAVGWZrmkz: [ 0.00 0.00 ]
+Key: VPAVGWZrr: [ 0.00 0.00 ]
+Key: VPAVGWZrrk: [ 0.00 0.00 ]
+Key: VPAVGWZrrkz: [ 0.00 0.00 ]
+Key: VPAVGWrm: [ 0.00 0.00 ]
+Key: VPAVGWrr: [ 0.00 0.00 ]
+Key: VPBLENDDYrmi: [ 0.00 0.00 ]
+Key: VPBLENDDYrri: [ 0.00 0.00 ]
+Key: VPBLENDDrmi: [ 0.00 0.00 ]
+Key: VPBLENDDrri: [ 0.00 0.00 ]
+Key: VPBLENDMBZ: [ 0.00 0.00 ]
+Key: VPBLENDMBZrm: [ 0.00 0.00 ]
+Key: VPBLENDMBZrmk: [ 0.00 0.00 ]
+Key: VPBLENDMBZrmkz: [ 0.00 0.00 ]
+Key: VPBLENDMBZrr: [ 0.00 0.00 ]
+Key: VPBLENDMBZrrk: [ 0.00 0.00 ]
+Key: VPBLENDMBZrrkz: [ 0.00 0.00 ]
+Key: VPBLENDMDZ: [ 0.00 0.00 ]
+Key: VPBLENDMDZrm: [ 0.00 0.00 ]
+Key: VPBLENDMDZrmb: [ 0.00 0.00 ]
+Key: VPBLENDMDZrmbk: [ 0.00 0.00 ]
+Key: VPBLENDMDZrmbkz: [ 0.00 0.00 ]
+Key: VPBLENDMDZrmk: [ 0.00 0.00 ]
+Key: VPBLENDMDZrmkz: [ 0.00 0.00 ]
+Key: VPBLENDMDZrr: [ 0.00 0.00 ]
+Key: VPBLENDMDZrrk: [ 0.00 0.00 ]
+Key: VPBLENDMDZrrkz: [ 0.00 0.00 ]
+Key: VPBLENDMQZ: [ 0.00 0.00 ]
+Key: VPBLENDMQZrm: [ 0.00 0.00 ]
+Key: VPBLENDMQZrmb: [ 0.00 0.00 ]
+Key: VPBLENDMQZrmbk: [ 0.00 0.00 ]
+Key: VPBLENDMQZrmbkz: [ 0.00 0.00 ]
+Key: VPBLENDMQZrmk: [ 0.00 0.00 ]
+Key: VPBLENDMQZrmkz: [ 0.00 0.00 ]
+Key: VPBLENDMQZrr: [ 0.00 0.00 ]
+Key: VPBLENDMQZrrk: [ 0.00 0.00 ]
+Key: VPBLENDMQZrrkz: [ 0.00 0.00 ]
+Key: VPBLENDMWZ: [ 0.00 0.00 ]
+Key: VPBLENDMWZrm: [ 0.00 0.00 ]
+Key: VPBLENDMWZrmk: [ 0.00 0.00 ]
+Key: VPBLENDMWZrmkz: [ 0.00 0.00 ]
+Key: VPBLENDMWZrr: [ 0.00 0.00 ]
+Key: VPBLENDMWZrrk: [ 0.00 0.00 ]
+Key: VPBLENDMWZrrkz: [ 0.00 0.00 ]
+Key: VPBLENDVBYrmr: [ 0.00 0.00 ]
+Key: VPBLENDVBYrrr: [ 0.00 0.00 ]
+Key: VPBLENDVBrmr: [ 0.00 0.00 ]
+Key: VPBLENDVBrrr: [ 0.00 0.00 ]
+Key: VPBLENDWYrmi: [ 0.00 0.00 ]
+Key: VPBLENDWYrri: [ 0.00 0.00 ]
+Key: VPBLENDWrmi: [ 0.00 0.00 ]
+Key: VPBLENDWrri: [ 0.00 0.00 ]
+Key: VPBROADCASTBYrm: [ 0.00 0.00 ]
+Key: VPBROADCASTBYrr: [ 0.00 0.00 ]
+Key: VPBROADCASTBZ: [ 0.00 0.00 ]
+Key: VPBROADCASTBZrm: [ 0.00 0.00 ]
+Key: VPBROADCASTBZrmk: [ 0.00 0.00 ]
+Key: VPBROADCASTBZrmkz: [ 0.00 0.00 ]
+Key: VPBROADCASTBZrr: [ 0.00 0.00 ]
+Key: VPBROADCASTBZrrk: [ 0.00 0.00 ]
+Key: VPBROADCASTBZrrkz: [ 0.00 0.00 ]
+Key: VPBROADCASTBrZ: [ 0.00 0.00 ]
+Key: VPBROADCASTBrZrr: [ 0.00 0.00 ]
+Key: VPBROADCASTBrZrrk: [ 0.00 0.00 ]
+Key: VPBROADCASTBrZrrkz: [ 0.00 0.00 ]
+Key: VPBROADCASTBrm: [ 0.00 0.00 ]
+Key: VPBROADCASTBrr: [ 0.00 0.00 ]
+Key: VPBROADCASTDYrm: [ 0.00 0.00 ]
+Key: VPBROADCASTDYrr: [ 0.00 0.00 ]
+Key: VPBROADCASTDZ: [ 0.00 0.00 ]
+Key: VPBROADCASTDZrm: [ 0.00 0.00 ]
+Key: VPBROADCASTDZrmk: [ 0.00 0.00 ]
+Key: VPBROADCASTDZrmkz: [ 0.00 0.00 ]
+Key: VPBROADCASTDZrr: [ 0.00 0.00 ]
+Key: VPBROADCASTDZrrk: [ 0.00 0.00 ]
+Key: VPBROADCASTDZrrkz: [ 0.00 0.00 ]
+Key: VPBROADCASTDrZ: [ 0.00 0.00 ]
+Key: VPBROADCASTDrZrr: [ 0.00 0.00 ]
+Key: VPBROADCASTDrZrrk: [ 0.00 0.00 ]
+Key: VPBROADCASTDrZrrkz: [ 0.00 0.00 ]
+Key: VPBROADCASTDrm: [ 0.00 0.00 ]
+Key: VPBROADCASTDrr: [ 0.00 0.00 ]
+Key: VPBROADCASTMB: [ 0.00 0.00 ]
+Key: VPBROADCASTMW: [ 0.00 0.00 ]
+Key: VPBROADCASTQYrm: [ 0.00 0.00 ]
+Key: VPBROADCASTQYrr: [ 0.00 0.00 ]
+Key: VPBROADCASTQZ: [ 0.00 0.00 ]
+Key: VPBROADCASTQZrm: [ 0.00 0.00 ]
+Key: VPBROADCASTQZrmk: [ 0.00 0.00 ]
+Key: VPBROADCASTQZrmkz: [ 0.00 0.00 ]
+Key: VPBROADCASTQZrr: [ 0.00 0.00 ]
+Key: VPBROADCASTQZrrk: [ 0.00 0.00 ]
+Key: VPBROADCASTQZrrkz: [ 0.00 0.00 ]
+Key: VPBROADCASTQrZ: [ 0.00 0.00 ]
+Key: VPBROADCASTQrZrr: [ 0.00 0.00 ]
+Key: VPBROADCASTQrZrrk: [ 0.00 0.00 ]
+Key: VPBROADCASTQrZrrkz: [ 0.00 0.00 ]
+Key: VPBROADCASTQrm: [ 0.00 0.00 ]
+Key: VPBROADCASTQrr: [ 0.00 0.00 ]
+Key: VPBROADCASTWYrm: [ 0.00 0.00 ]
+Key: VPBROADCASTWYrr: [ 0.00 0.00 ]
+Key: VPBROADCASTWZ: [ 0.00 0.00 ]
+Key: VPBROADCASTWZrm: [ 0.00 0.00 ]
+Key: VPBROADCASTWZrmk: [ 0.00 0.00 ]
+Key: VPBROADCASTWZrmkz: [ 0.00 0.00 ]
+Key: VPBROADCASTWZrr: [ 0.00 0.00 ]
+Key: VPBROADCASTWZrrk: [ 0.00 0.00 ]
+Key: VPBROADCASTWZrrkz: [ 0.00 0.00 ]
+Key: VPBROADCASTWrZ: [ 0.00 0.00 ]
+Key: VPBROADCASTWrZrr: [ 0.00 0.00 ]
+Key: VPBROADCASTWrZrrk: [ 0.00 0.00 ]
+Key: VPBROADCASTWrZrrkz: [ 0.00 0.00 ]
+Key: VPBROADCASTWrm: [ 0.00 0.00 ]
+Key: VPBROADCASTWrr: [ 0.00 0.00 ]
+Key: VPCLMULQDQYrmi: [ 0.00 0.00 ]
+Key: VPCLMULQDQYrri: [ 0.00 0.00 ]
+Key: VPCLMULQDQZ: [ 0.00 0.00 ]
+Key: VPCLMULQDQZrmi: [ 0.00 0.00 ]
+Key: VPCLMULQDQZrri: [ 0.00 0.00 ]
+Key: VPCLMULQDQrmi: [ 0.00 0.00 ]
+Key: VPCLMULQDQrri: [ 0.00 0.00 ]
+Key: VPCMOVYrmr: [ 0.00 0.00 ]
+Key: VPCMOVYrrm: [ 0.00 0.00 ]
+Key: VPCMOVYrrr: [ 0.00 0.00 ]
+Key: VPCMOVYrrr_REV: [ 0.00 0.00 ]
+Key: VPCMOVrmr: [ 0.00 0.00 ]
+Key: VPCMOVrrm: [ 0.00 0.00 ]
+Key: VPCMOVrrr: [ 0.00 0.00 ]
+Key: VPCMOVrrr_REV: [ 0.00 0.00 ]
+Key: VPCMPBZ: [ 0.00 0.00 ]
+Key: VPCMPBZrmi: [ 0.00 0.00 ]
+Key: VPCMPBZrmik: [ 0.00 0.00 ]
+Key: VPCMPBZrri: [ 0.00 0.00 ]
+Key: VPCMPBZrrik: [ 0.00 0.00 ]
+Key: VPCMPDZ: [ 0.00 0.00 ]
+Key: VPCMPDZrmbi: [ 0.00 0.00 ]
+Key: VPCMPDZrmbik: [ 0.00 0.00 ]
+Key: VPCMPDZrmi: [ 0.00 0.00 ]
+Key: VPCMPDZrmik: [ 0.00 0.00 ]
+Key: VPCMPDZrri: [ 0.00 0.00 ]
+Key: VPCMPDZrrik: [ 0.00 0.00 ]
+Key: VPCMPEQBYrm: [ 0.00 0.00 ]
+Key: VPCMPEQBYrr: [ 0.00 0.00 ]
+Key: VPCMPEQBZ: [ 0.00 0.00 ]
+Key: VPCMPEQBZrm: [ 0.00 0.00 ]
+Key: VPCMPEQBZrmk: [ 0.00 0.00 ]
+Key: VPCMPEQBZrr: [ 0.00 0.00 ]
+Key: VPCMPEQBZrrk: [ 0.00 0.00 ]
+Key: VPCMPEQBrm: [ 0.00 0.00 ]
+Key: VPCMPEQBrr: [ 0.00 0.00 ]
+Key: VPCMPEQDYrm: [ 0.00 0.00 ]
+Key: VPCMPEQDYrr: [ 0.00 0.00 ]
+Key: VPCMPEQDZ: [ 0.00 0.00 ]
+Key: VPCMPEQDZrm: [ 0.00 0.00 ]
+Key: VPCMPEQDZrmb: [ 0.00 0.00 ]
+Key: VPCMPEQDZrmbk: [ 0.00 0.00 ]
+Key: VPCMPEQDZrmk: [ 0.00 0.00 ]
+Key: VPCMPEQDZrr: [ 0.00 0.00 ]
+Key: VPCMPEQDZrrk: [ 0.00 0.00 ]
+Key: VPCMPEQDrm: [ 0.00 0.00 ]
+Key: VPCMPEQDrr: [ 0.00 0.00 ]
+Key: VPCMPEQQYrm: [ 0.00 0.00 ]
+Key: VPCMPEQQYrr: [ 0.00 0.00 ]
+Key: VPCMPEQQZ: [ 0.00 0.00 ]
+Key: VPCMPEQQZrm: [ 0.00 0.00 ]
+Key: VPCMPEQQZrmb: [ 0.00 0.00 ]
+Key: VPCMPEQQZrmbk: [ 0.00 0.00 ]
+Key: VPCMPEQQZrmk: [ 0.00 0.00 ]
+Key: VPCMPEQQZrr: [ 0.00 0.00 ]
+Key: VPCMPEQQZrrk: [ 0.00 0.00 ]
+Key: VPCMPEQQrm: [ 0.00 0.00 ]
+Key: VPCMPEQQrr: [ 0.00 0.00 ]
+Key: VPCMPEQWYrm: [ 0.00 0.00 ]
+Key: VPCMPEQWYrr: [ 0.00 0.00 ]
+Key: VPCMPEQWZ: [ 0.00 0.00 ]
+Key: VPCMPEQWZrm: [ 0.00 0.00 ]
+Key: VPCMPEQWZrmk: [ 0.00 0.00 ]
+Key: VPCMPEQWZrr: [ 0.00 0.00 ]
+Key: VPCMPEQWZrrk: [ 0.00 0.00 ]
+Key: VPCMPEQWrm: [ 0.00 0.00 ]
+Key: VPCMPEQWrr: [ 0.00 0.00 ]
+Key: VPCMPESTRIrmi: [ 0.00 0.00 ]
+Key: VPCMPESTRIrri: [ 0.00 0.00 ]
+Key: VPCMPESTRMrmi: [ 0.00 0.00 ]
+Key: VPCMPESTRMrri: [ 0.00 0.00 ]
+Key: VPCMPGTBYrm: [ 0.00 0.00 ]
+Key: VPCMPGTBYrr: [ 0.00 0.00 ]
+Key: VPCMPGTBZ: [ 0.00 0.00 ]
+Key: VPCMPGTBZrm: [ 0.00 0.00 ]
+Key: VPCMPGTBZrmk: [ 0.00 0.00 ]
+Key: VPCMPGTBZrr: [ 0.00 0.00 ]
+Key: VPCMPGTBZrrk: [ 0.00 0.00 ]
+Key: VPCMPGTBrm: [ 0.00 0.00 ]
+Key: VPCMPGTBrr: [ 0.00 0.00 ]
+Key: VPCMPGTDYrm: [ 0.00 0.00 ]
+Key: VPCMPGTDYrr: [ 0.00 0.00 ]
+Key: VPCMPGTDZ: [ 0.00 0.00 ]
+Key: VPCMPGTDZrm: [ 0.00 0.00 ]
+Key: VPCMPGTDZrmb: [ 0.00 0.00 ]
+Key: VPCMPGTDZrmbk: [ 0.00 0.00 ]
+Key: VPCMPGTDZrmk: [ 0.00 0.00 ]
+Key: VPCMPGTDZrr: [ 0.00 0.00 ]
+Key: VPCMPGTDZrrk: [ 0.00 0.00 ]
+Key: VPCMPGTDrm: [ 0.00 0.00 ]
+Key: VPCMPGTDrr: [ 0.00 0.00 ]
+Key: VPCMPGTQYrm: [ 0.00 0.00 ]
+Key: VPCMPGTQYrr: [ 0.00 0.00 ]
+Key: VPCMPGTQZ: [ 0.00 0.00 ]
+Key: VPCMPGTQZrm: [ 0.00 0.00 ]
+Key: VPCMPGTQZrmb: [ 0.00 0.00 ]
+Key: VPCMPGTQZrmbk: [ 0.00 0.00 ]
+Key: VPCMPGTQZrmk: [ 0.00 0.00 ]
+Key: VPCMPGTQZrr: [ 0.00 0.00 ]
+Key: VPCMPGTQZrrk: [ 0.00 0.00 ]
+Key: VPCMPGTQrm: [ 0.00 0.00 ]
+Key: VPCMPGTQrr: [ 0.00 0.00 ]
+Key: VPCMPGTWYrm: [ 0.00 0.00 ]
+Key: VPCMPGTWYrr: [ 0.00 0.00 ]
+Key: VPCMPGTWZ: [ 0.00 0.00 ]
+Key: VPCMPGTWZrm: [ 0.00 0.00 ]
+Key: VPCMPGTWZrmk: [ 0.00 0.00 ]
+Key: VPCMPGTWZrr: [ 0.00 0.00 ]
+Key: VPCMPGTWZrrk: [ 0.00 0.00 ]
+Key: VPCMPGTWrm: [ 0.00 0.00 ]
+Key: VPCMPGTWrr: [ 0.00 0.00 ]
+Key: VPCMPISTRIrmi: [ 0.00 0.00 ]
+Key: VPCMPISTRIrri: [ 0.00 0.00 ]
+Key: VPCMPISTRMrmi: [ 0.00 0.00 ]
+Key: VPCMPISTRMrri: [ 0.00 0.00 ]
+Key: VPCMPQZ: [ 0.00 0.00 ]
+Key: VPCMPQZrmbi: [ 0.00 0.00 ]
+Key: VPCMPQZrmbik: [ 0.00 0.00 ]
+Key: VPCMPQZrmi: [ 0.00 0.00 ]
+Key: VPCMPQZrmik: [ 0.00 0.00 ]
+Key: VPCMPQZrri: [ 0.00 0.00 ]
+Key: VPCMPQZrrik: [ 0.00 0.00 ]
+Key: VPCMPUBZ: [ 0.00 0.00 ]
+Key: VPCMPUBZrmi: [ 0.00 0.00 ]
+Key: VPCMPUBZrmik: [ 0.00 0.00 ]
+Key: VPCMPUBZrri: [ 0.00 0.00 ]
+Key: VPCMPUBZrrik: [ 0.00 0.00 ]
+Key: VPCMPUDZ: [ 0.00 0.00 ]
+Key: VPCMPUDZrmbi: [ 0.00 0.00 ]
+Key: VPCMPUDZrmbik: [ 0.00 0.00 ]
+Key: VPCMPUDZrmi: [ 0.00 0.00 ]
+Key: VPCMPUDZrmik: [ 0.00 0.00 ]
+Key: VPCMPUDZrri: [ 0.00 0.00 ]
+Key: VPCMPUDZrrik: [ 0.00 0.00 ]
+Key: VPCMPUQZ: [ 0.00 0.00 ]
+Key: VPCMPUQZrmbi: [ 0.00 0.00 ]
+Key: VPCMPUQZrmbik: [ 0.00 0.00 ]
+Key: VPCMPUQZrmi: [ 0.00 0.00 ]
+Key: VPCMPUQZrmik: [ 0.00 0.00 ]
+Key: VPCMPUQZrri: [ 0.00 0.00 ]
+Key: VPCMPUQZrrik: [ 0.00 0.00 ]
+Key: VPCMPUWZ: [ 0.00 0.00 ]
+Key: VPCMPUWZrmi: [ 0.00 0.00 ]
+Key: VPCMPUWZrmik: [ 0.00 0.00 ]
+Key: VPCMPUWZrri: [ 0.00 0.00 ]
+Key: VPCMPUWZrrik: [ 0.00 0.00 ]
+Key: VPCMPWZ: [ 0.00 0.00 ]
+Key: VPCMPWZrmi: [ 0.00 0.00 ]
+Key: VPCMPWZrmik: [ 0.00 0.00 ]
+Key: VPCMPWZrri: [ 0.00 0.00 ]
+Key: VPCMPWZrrik: [ 0.00 0.00 ]
+Key: VPCOMBmi: [ 0.00 0.00 ]
+Key: VPCOMBri: [ 0.00 0.00 ]
+Key: VPCOMDmi: [ 0.00 0.00 ]
+Key: VPCOMDri: [ 0.00 0.00 ]
+Key: VPCOMPRESSBZ: [ 0.00 0.00 ]
+Key: VPCOMPRESSBZmr: [ 0.00 0.00 ]
+Key: VPCOMPRESSBZmrk: [ 0.00 0.00 ]
+Key: VPCOMPRESSBZrr: [ 0.00 0.00 ]
+Key: VPCOMPRESSBZrrk: [ 0.00 0.00 ]
+Key: VPCOMPRESSBZrrkz: [ 0.00 0.00 ]
+Key: VPCOMPRESSDZ: [ 0.00 0.00 ]
+Key: VPCOMPRESSDZmr: [ 0.00 0.00 ]
+Key: VPCOMPRESSDZmrk: [ 0.00 0.00 ]
+Key: VPCOMPRESSDZrr: [ 0.00 0.00 ]
+Key: VPCOMPRESSDZrrk: [ 0.00 0.00 ]
+Key: VPCOMPRESSDZrrkz: [ 0.00 0.00 ]
+Key: VPCOMPRESSQZ: [ 0.00 0.00 ]
+Key: VPCOMPRESSQZmr: [ 0.00 0.00 ]
+Key: VPCOMPRESSQZmrk: [ 0.00 0.00 ]
+Key: VPCOMPRESSQZrr: [ 0.00 0.00 ]
+Key: VPCOMPRESSQZrrk: [ 0.00 0.00 ]
+Key: VPCOMPRESSQZrrkz: [ 0.00 0.00 ]
+Key: VPCOMPRESSWZ: [ 0.00 0.00 ]
+Key: VPCOMPRESSWZmr: [ 0.00 0.00 ]
+Key: VPCOMPRESSWZmrk: [ 0.00 0.00 ]
+Key: VPCOMPRESSWZrr: [ 0.00 0.00 ]
+Key: VPCOMPRESSWZrrk: [ 0.00 0.00 ]
+Key: VPCOMPRESSWZrrkz: [ 0.00 0.00 ]
+Key: VPCOMQmi: [ 0.00 0.00 ]
+Key: VPCOMQri: [ 0.00 0.00 ]
+Key: VPCOMUBmi: [ 0.00 0.00 ]
+Key: VPCOMUBri: [ 0.00 0.00 ]
+Key: VPCOMUDmi: [ 0.00 0.00 ]
+Key: VPCOMUDri: [ 0.00 0.00 ]
+Key: VPCOMUQmi: [ 0.00 0.00 ]
+Key: VPCOMUQri: [ 0.00 0.00 ]
+Key: VPCOMUWmi: [ 0.00 0.00 ]
+Key: VPCOMUWri: [ 0.00 0.00 ]
+Key: VPCOMWmi: [ 0.00 0.00 ]
+Key: VPCOMWri: [ 0.00 0.00 ]
+Key: VPCONFLICTDZ: [ 0.00 0.00 ]
+Key: VPCONFLICTDZrm: [ 0.00 0.00 ]
+Key: VPCONFLICTDZrmb: [ 0.00 0.00 ]
+Key: VPCONFLICTDZrmbk: [ 0.00 0.00 ]
+Key: VPCONFLICTDZrmbkz: [ 0.00 0.00 ]
+Key: VPCONFLICTDZrmk: [ 0.00 0.00 ]
+Key: VPCONFLICTDZrmkz: [ 0.00 0.00 ]
+Key: VPCONFLICTDZrr: [ 0.00 0.00 ]
+Key: VPCONFLICTDZrrk: [ 0.00 0.00 ]
+Key: VPCONFLICTDZrrkz: [ 0.00 0.00 ]
+Key: VPCONFLICTQZ: [ 0.00 0.00 ]
+Key: VPCONFLICTQZrm: [ 0.00 0.00 ]
+Key: VPCONFLICTQZrmb: [ 0.00 0.00 ]
+Key: VPCONFLICTQZrmbk: [ 0.00 0.00 ]
+Key: VPCONFLICTQZrmbkz: [ 0.00 0.00 ]
+Key: VPCONFLICTQZrmk: [ 0.00 0.00 ]
+Key: VPCONFLICTQZrmkz: [ 0.00 0.00 ]
+Key: VPCONFLICTQZrr: [ 0.00 0.00 ]
+Key: VPCONFLICTQZrrk: [ 0.00 0.00 ]
+Key: VPCONFLICTQZrrkz: [ 0.00 0.00 ]
+Key: VPDPBSSDSYrm: [ 0.00 0.00 ]
+Key: VPDPBSSDSYrr: [ 0.00 0.00 ]
+Key: VPDPBSSDSZ: [ 0.00 0.00 ]
+Key: VPDPBSSDSZrm: [ 0.00 0.00 ]
+Key: VPDPBSSDSZrmb: [ 0.00 0.00 ]
+Key: VPDPBSSDSZrmbk: [ 0.00 0.00 ]
+Key: VPDPBSSDSZrmbkz: [ 0.00 0.00 ]
+Key: VPDPBSSDSZrmk: [ 0.00 0.00 ]
+Key: VPDPBSSDSZrmkz: [ 0.00 0.00 ]
+Key: VPDPBSSDSZrr: [ 0.00 0.00 ]
+Key: VPDPBSSDSZrrk: [ 0.00 0.00 ]
+Key: VPDPBSSDSZrrkz: [ 0.00 0.00 ]
+Key: VPDPBSSDSrm: [ 0.00 0.00 ]
+Key: VPDPBSSDSrr: [ 0.00 0.00 ]
+Key: VPDPBSSDYrm: [ 0.00 0.00 ]
+Key: VPDPBSSDYrr: [ 0.00 0.00 ]
+Key: VPDPBSSDZ: [ 0.00 0.00 ]
+Key: VPDPBSSDZrm: [ 0.00 0.00 ]
+Key: VPDPBSSDZrmb: [ 0.00 0.00 ]
+Key: VPDPBSSDZrmbk: [ 0.00 0.00 ]
+Key: VPDPBSSDZrmbkz: [ 0.00 0.00 ]
+Key: VPDPBSSDZrmk: [ 0.00 0.00 ]
+Key: VPDPBSSDZrmkz: [ 0.00 0.00 ]
+Key: VPDPBSSDZrr: [ 0.00 0.00 ]
+Key: VPDPBSSDZrrk: [ 0.00 0.00 ]
+Key: VPDPBSSDZrrkz: [ 0.00 0.00 ]
+Key: VPDPBSSDrm: [ 0.00 0.00 ]
+Key: VPDPBSSDrr: [ 0.00 0.00 ]
+Key: VPDPBSUDSYrm: [ 0.00 0.00 ]
+Key: VPDPBSUDSYrr: [ 0.00 0.00 ]
+Key: VPDPBSUDSZ: [ 0.00 0.00 ]
+Key: VPDPBSUDSZrm: [ 0.00 0.00 ]
+Key: VPDPBSUDSZrmb: [ 0.00 0.00 ]
+Key: VPDPBSUDSZrmbk: [ 0.00 0.00 ]
+Key: VPDPBSUDSZrmbkz: [ 0.00 0.00 ]
+Key: VPDPBSUDSZrmk: [ 0.00 0.00 ]
+Key: VPDPBSUDSZrmkz: [ 0.00 0.00 ]
+Key: VPDPBSUDSZrr: [ 0.00 0.00 ]
+Key: VPDPBSUDSZrrk: [ 0.00 0.00 ]
+Key: VPDPBSUDSZrrkz: [ 0.00 0.00 ]
+Key: VPDPBSUDSrm: [ 0.00 0.00 ]
+Key: VPDPBSUDSrr: [ 0.00 0.00 ]
+Key: VPDPBSUDYrm: [ 0.00 0.00 ]
+Key: VPDPBSUDYrr: [ 0.00 0.00 ]
+Key: VPDPBSUDZ: [ 0.00 0.00 ]
+Key: VPDPBSUDZrm: [ 0.00 0.00 ]
+Key: VPDPBSUDZrmb: [ 0.00 0.00 ]
+Key: VPDPBSUDZrmbk: [ 0.00 0.00 ]
+Key: VPDPBSUDZrmbkz: [ 0.00 0.00 ]
+Key: VPDPBSUDZrmk: [ 0.00 0.00 ]
+Key: VPDPBSUDZrmkz: [ 0.00 0.00 ]
+Key: VPDPBSUDZrr: [ 0.00 0.00 ]
+Key: VPDPBSUDZrrk: [ 0.00 0.00 ]
+Key: VPDPBSUDZrrkz: [ 0.00 0.00 ]
+Key: VPDPBSUDrm: [ 0.00 0.00 ]
+Key: VPDPBSUDrr: [ 0.00 0.00 ]
+Key: VPDPBUSDSYrm: [ 0.00 0.00 ]
+Key: VPDPBUSDSYrr: [ 0.00 0.00 ]
+Key: VPDPBUSDSZ: [ 0.00 0.00 ]
+Key: VPDPBUSDSZrm: [ 0.00 0.00 ]
+Key: VPDPBUSDSZrmb: [ 0.00 0.00 ]
+Key: VPDPBUSDSZrmbk: [ 0.00 0.00 ]
+Key: VPDPBUSDSZrmbkz: [ 0.00 0.00 ]
+Key: VPDPBUSDSZrmk: [ 0.00 0.00 ]
+Key: VPDPBUSDSZrmkz: [ 0.00 0.00 ]
+Key: VPDPBUSDSZrr: [ 0.00 0.00 ]
+Key: VPDPBUSDSZrrk: [ 0.00 0.00 ]
+Key: VPDPBUSDSZrrkz: [ 0.00 0.00 ]
+Key: VPDPBUSDSrm: [ 0.00 0.00 ]
+Key: VPDPBUSDSrr: [ 0.00 0.00 ]
+Key: VPDPBUSDYrm: [ 0.00 0.00 ]
+Key: VPDPBUSDYrr: [ 0.00 0.00 ]
+Key: VPDPBUSDZ: [ 0.00 0.00 ]
+Key: VPDPBUSDZrm: [ 0.00 0.00 ]
+Key: VPDPBUSDZrmb: [ 0.00 0.00 ]
+Key: VPDPBUSDZrmbk: [ 0.00 0.00 ]
+Key: VPDPBUSDZrmbkz: [ 0.00 0.00 ]
+Key: VPDPBUSDZrmk: [ 0.00 0.00 ]
+Key: VPDPBUSDZrmkz: [ 0.00 0.00 ]
+Key: VPDPBUSDZrr: [ 0.00 0.00 ]
+Key: VPDPBUSDZrrk: [ 0.00 0.00 ]
+Key: VPDPBUSDZrrkz: [ 0.00 0.00 ]
+Key: VPDPBUSDrm: [ 0.00 0.00 ]
+Key: VPDPBUSDrr: [ 0.00 0.00 ]
+Key: VPDPBUUDSYrm: [ 0.00 0.00 ]
+Key: VPDPBUUDSYrr: [ 0.00 0.00 ]
+Key: VPDPBUUDSZ: [ 0.00 0.00 ]
+Key: VPDPBUUDSZrm: [ 0.00 0.00 ]
+Key: VPDPBUUDSZrmb: [ 0.00 0.00 ]
+Key: VPDPBUUDSZrmbk: [ 0.00 0.00 ]
+Key: VPDPBUUDSZrmbkz: [ 0.00 0.00 ]
+Key: VPDPBUUDSZrmk: [ 0.00 0.00 ]
+Key: VPDPBUUDSZrmkz: [ 0.00 0.00 ]
+Key: VPDPBUUDSZrr: [ 0.00 0.00 ]
+Key: VPDPBUUDSZrrk: [ 0.00 0.00 ]
+Key: VPDPBUUDSZrrkz: [ 0.00 0.00 ]
+Key: VPDPBUUDSrm: [ 0.00 0.00 ]
+Key: VPDPBUUDSrr: [ 0.00 0.00 ]
+Key: VPDPBUUDYrm: [ 0.00 0.00 ]
+Key: VPDPBUUDYrr: [ 0.00 0.00 ]
+Key: VPDPBUUDZ: [ 0.00 0.00 ]
+Key: VPDPBUUDZrm: [ 0.00 0.00 ]
+Key: VPDPBUUDZrmb: [ 0.00 0.00 ]
+Key: VPDPBUUDZrmbk: [ 0.00 0.00 ]
+Key: VPDPBUUDZrmbkz: [ 0.00 0.00 ]
+Key: VPDPBUUDZrmk: [ 0.00 0.00 ]
+Key: VPDPBUUDZrmkz: [ 0.00 0.00 ]
+Key: VPDPBUUDZrr: [ 0.00 0.00 ]
+Key: VPDPBUUDZrrk: [ 0.00 0.00 ]
+Key: VPDPBUUDZrrkz: [ 0.00 0.00 ]
+Key: VPDPBUUDrm: [ 0.00 0.00 ]
+Key: VPDPBUUDrr: [ 0.00 0.00 ]
+Key: VPDPWSSDSYrm: [ 0.00 0.00 ]
+Key: VPDPWSSDSYrr: [ 0.00 0.00 ]
+Key: VPDPWSSDSZ: [ 0.00 0.00 ]
+Key: VPDPWSSDSZrm: [ 0.00 0.00 ]
+Key: VPDPWSSDSZrmb: [ 0.00 0.00 ]
+Key: VPDPWSSDSZrmbk: [ 0.00 0.00 ]
+Key: VPDPWSSDSZrmbkz: [ 0.00 0.00 ]
+Key: VPDPWSSDSZrmk: [ 0.00 0.00 ]
+Key: VPDPWSSDSZrmkz: [ 0.00 0.00 ]
+Key: VPDPWSSDSZrr: [ 0.00 0.00 ]
+Key: VPDPWSSDSZrrk: [ 0.00 0.00 ]
+Key: VPDPWSSDSZrrkz: [ 0.00 0.00 ]
+Key: VPDPWSSDSrm: [ 0.00 0.00 ]
+Key: VPDPWSSDSrr: [ 0.00 0.00 ]
+Key: VPDPWSSDYrm: [ 0.00 0.00 ]
+Key: VPDPWSSDYrr: [ 0.00 0.00 ]
+Key: VPDPWSSDZ: [ 0.00 0.00 ]
+Key: VPDPWSSDZrm: [ 0.00 0.00 ]
+Key: VPDPWSSDZrmb: [ 0.00 0.00 ]
+Key: VPDPWSSDZrmbk: [ 0.00 0.00 ]
+Key: VPDPWSSDZrmbkz: [ 0.00 0.00 ]
+Key: VPDPWSSDZrmk: [ 0.00 0.00 ]
+Key: VPDPWSSDZrmkz: [ 0.00 0.00 ]
+Key: VPDPWSSDZrr: [ 0.00 0.00 ]
+Key: VPDPWSSDZrrk: [ 0.00 0.00 ]
+Key: VPDPWSSDZrrkz: [ 0.00 0.00 ]
+Key: VPDPWSSDrm: [ 0.00 0.00 ]
+Key: VPDPWSSDrr: [ 0.00 0.00 ]
+Key: VPDPWSUDSYrm: [ 0.00 0.00 ]
+Key: VPDPWSUDSYrr: [ 0.00 0.00 ]
+Key: VPDPWSUDSZ: [ 0.00 0.00 ]
+Key: VPDPWSUDSZrm: [ 0.00 0.00 ]
+Key: VPDPWSUDSZrmb: [ 0.00 0.00 ]
+Key: VPDPWSUDSZrmbk: [ 0.00 0.00 ]
+Key: VPDPWSUDSZrmbkz: [ 0.00 0.00 ]
+Key: VPDPWSUDSZrmk: [ 0.00 0.00 ]
+Key: VPDPWSUDSZrmkz: [ 0.00 0.00 ]
+Key: VPDPWSUDSZrr: [ 0.00 0.00 ]
+Key: VPDPWSUDSZrrk: [ 0.00 0.00 ]
+Key: VPDPWSUDSZrrkz: [ 0.00 0.00 ]
+Key: VPDPWSUDSrm: [ 0.00 0.00 ]
+Key: VPDPWSUDSrr: [ 0.00 0.00 ]
+Key: VPDPWSUDYrm: [ 0.00 0.00 ]
+Key: VPDPWSUDYrr: [ 0.00 0.00 ]
+Key: VPDPWSUDZ: [ 0.00 0.00 ]
+Key: VPDPWSUDZrm: [ 0.00 0.00 ]
+Key: VPDPWSUDZrmb: [ 0.00 0.00 ]
+Key: VPDPWSUDZrmbk: [ 0.00 0.00 ]
+Key: VPDPWSUDZrmbkz: [ 0.00 0.00 ]
+Key: VPDPWSUDZrmk: [ 0.00 0.00 ]
+Key: VPDPWSUDZrmkz: [ 0.00 0.00 ]
+Key: VPDPWSUDZrr: [ 0.00 0.00 ]
+Key: VPDPWSUDZrrk: [ 0.00 0.00 ]
+Key: VPDPWSUDZrrkz: [ 0.00 0.00 ]
+Key: VPDPWSUDrm: [ 0.00 0.00 ]
+Key: VPDPWSUDrr: [ 0.00 0.00 ]
+Key: VPDPWUSDSYrm: [ 0.00 0.00 ]
+Key: VPDPWUSDSYrr: [ 0.00 0.00 ]
+Key: VPDPWUSDSZ: [ 0.00 0.00 ]
+Key: VPDPWUSDSZrm: [ 0.00 0.00 ]
+Key: VPDPWUSDSZrmb: [ 0.00 0.00 ]
+Key: VPDPWUSDSZrmbk: [ 0.00 0.00 ]
+Key: VPDPWUSDSZrmbkz: [ 0.00 0.00 ]
+Key: VPDPWUSDSZrmk: [ 0.00 0.00 ]
+Key: VPDPWUSDSZrmkz: [ 0.00 0.00 ]
+Key: VPDPWUSDSZrr: [ 0.00 0.00 ]
+Key: VPDPWUSDSZrrk: [ 0.00 0.00 ]
+Key: VPDPWUSDSZrrkz: [ 0.00 0.00 ]
+Key: VPDPWUSDSrm: [ 0.00 0.00 ]
+Key: VPDPWUSDSrr: [ 0.00 0.00 ]
+Key: VPDPWUSDYrm: [ 0.00 0.00 ]
+Key: VPDPWUSDYrr: [ 0.00 0.00 ]
+Key: VPDPWUSDZ: [ 0.00 0.00 ]
+Key: VPDPWUSDZrm: [ 0.00 0.00 ]
+Key: VPDPWUSDZrmb: [ 0.00 0.00 ]
+Key: VPDPWUSDZrmbk: [ 0.00 0.00 ]
+Key: VPDPWUSDZrmbkz: [ 0.00 0.00 ]
+Key: VPDPWUSDZrmk: [ 0.00 0.00 ]
+Key: VPDPWUSDZrmkz: [ 0.00 0.00 ]
+Key: VPDPWUSDZrr: [ 0.00 0.00 ]
+Key: VPDPWUSDZrrk: [ 0.00 0.00 ]
+Key: VPDPWUSDZrrkz: [ 0.00 0.00 ]
+Key: VPDPWUSDrm: [ 0.00 0.00 ]
+Key: VPDPWUSDrr: [ 0.00 0.00 ]
+Key: VPDPWUUDSYrm: [ 0.00 0.00 ]
+Key: VPDPWUUDSYrr: [ 0.00 0.00 ]
+Key: VPDPWUUDSZ: [ 0.00 0.00 ]
+Key: VPDPWUUDSZrm: [ 0.00 0.00 ]
+Key: VPDPWUUDSZrmb: [ 0.00 0.00 ]
+Key: VPDPWUUDSZrmbk: [ 0.00 0.00 ]
+Key: VPDPWUUDSZrmbkz: [ 0.00 0.00 ]
+Key: VPDPWUUDSZrmk: [ 0.00 0.00 ]
+Key: VPDPWUUDSZrmkz: [ 0.00 0.00 ]
+Key: VPDPWUUDSZrr: [ 0.00 0.00 ]
+Key: VPDPWUUDSZrrk: [ 0.00 0.00 ]
+Key: VPDPWUUDSZrrkz: [ 0.00 0.00 ]
+Key: VPDPWUUDSrm: [ 0.00 0.00 ]
+Key: VPDPWUUDSrr: [ 0.00 0.00 ]
+Key: VPDPWUUDYrm: [ 0.00 0.00 ]
+Key: VPDPWUUDYrr: [ 0.00 0.00 ]
+Key: VPDPWUUDZ: [ 0.00 0.00 ]
+Key: VPDPWUUDZrm: [ 0.00 0.00 ]
+Key: VPDPWUUDZrmb: [ 0.00 0.00 ]
+Key: VPDPWUUDZrmbk: [ 0.00 0.00 ]
+Key: VPDPWUUDZrmbkz: [ 0.00 0.00 ]
+Key: VPDPWUUDZrmk: [ 0.00 0.00 ]
+Key: VPDPWUUDZrmkz: [ 0.00 0.00 ]
+Key: VPDPWUUDZrr: [ 0.00 0.00 ]
+Key: VPDPWUUDZrrk: [ 0.00 0.00 ]
+Key: VPDPWUUDZrrkz: [ 0.00 0.00 ]
+Key: VPDPWUUDrm: [ 0.00 0.00 ]
+Key: VPDPWUUDrr: [ 0.00 0.00 ]
+Key: VPERM: [ 0.00 0.00 ]
+Key: VPERMBZ: [ 0.00 0.00 ]
+Key: VPERMBZrm: [ 0.00 0.00 ]
+Key: VPERMBZrmk: [ 0.00 0.00 ]
+Key: VPERMBZrmkz: [ 0.00 0.00 ]
+Key: VPERMBZrr: [ 0.00 0.00 ]
+Key: VPERMBZrrk: [ 0.00 0.00 ]
+Key: VPERMBZrrkz: [ 0.00 0.00 ]
+Key: VPERMDYrm: [ 0.00 0.00 ]
+Key: VPERMDYrr: [ 0.00 0.00 ]
+Key: VPERMDZ: [ 0.00 0.00 ]
+Key: VPERMDZrm: [ 0.00 0.00 ]
+Key: VPERMDZrmb: [ 0.00 0.00 ]
+Key: VPERMDZrmbk: [ 0.00 0.00 ]
+Key: VPERMDZrmbkz: [ 0.00 0.00 ]
+Key: VPERMDZrmk: [ 0.00 0.00 ]
+Key: VPERMDZrmkz: [ 0.00 0.00 ]
+Key: VPERMDZrr: [ 0.00 0.00 ]
+Key: VPERMDZrrk: [ 0.00 0.00 ]
+Key: VPERMDZrrkz: [ 0.00 0.00 ]
+Key: VPERMI: [ 0.00 0.00 ]
+Key: VPERMIL: [ 0.00 0.00 ]
+Key: VPERMILPDYmi: [ 0.00 0.00 ]
+Key: VPERMILPDYri: [ 0.00 0.00 ]
+Key: VPERMILPDYrm: [ 0.00 0.00 ]
+Key: VPERMILPDYrr: [ 0.00 0.00 ]
+Key: VPERMILPDZ: [ 0.00 0.00 ]
+Key: VPERMILPDZmbi: [ 0.00 0.00 ]
+Key: VPERMILPDZmbik: [ 0.00 0.00 ]
+Key: VPERMILPDZmbikz: [ 0.00 0.00 ]
+Key: VPERMILPDZmi: [ 0.00 0.00 ]
+Key: VPERMILPDZmik: [ 0.00 0.00 ]
+Key: VPERMILPDZmikz: [ 0.00 0.00 ]
+Key: VPERMILPDZri: [ 0.00 0.00 ]
+Key: VPERMILPDZrik: [ 0.00 0.00 ]
+Key: VPERMILPDZrikz: [ 0.00 0.00 ]
+Key: VPERMILPDZrm: [ 0.00 0.00 ]
+Key: VPERMILPDZrmb: [ 0.00 0.00 ]
+Key: VPERMILPDZrmbk: [ 0.00 0.00 ]
+Key: VPERMILPDZrmbkz: [ 0.00 0.00 ]
+Key: VPERMILPDZrmk: [ 0.00 0.00 ]
+Key: VPERMILPDZrmkz: [ 0.00 0.00 ]
+Key: VPERMILPDZrr: [ 0.00 0.00 ]
+Key: VPERMILPDZrrk: [ 0.00 0.00 ]
+Key: VPERMILPDZrrkz: [ 0.00 0.00 ]
+Key: VPERMILPDmi: [ 0.00 0.00 ]
+Key: VPERMILPDri: [ 0.00 0.00 ]
+Key: VPERMILPDrm: [ 0.00 0.00 ]
+Key: VPERMILPDrr: [ 0.00 0.00 ]
+Key: VPERMILPSYmi: [ 0.00 0.00 ]
+Key: VPERMILPSYri: [ 0.00 0.00 ]
+Key: VPERMILPSYrm: [ 0.00 0.00 ]
+Key: VPERMILPSYrr: [ 0.00 0.00 ]
+Key: VPERMILPSZ: [ 0.00 0.00 ]
+Key: VPERMILPSZmbi: [ 0.00 0.00 ]
+Key: VPERMILPSZmbik: [ 0.00 0.00 ]
+Key: VPERMILPSZmbikz: [ 0.00 0.00 ]
+Key: VPERMILPSZmi: [ 0.00 0.00 ]
+Key: VPERMILPSZmik: [ 0.00 0.00 ]
+Key: VPERMILPSZmikz: [ 0.00 0.00 ]
+Key: VPERMILPSZri: [ 0.00 0.00 ]
+Key: VPERMILPSZrik: [ 0.00 0.00 ]
+Key: VPERMILPSZrikz: [ 0.00 0.00 ]
+Key: VPERMILPSZrm: [ 0.00 0.00 ]
+Key: VPERMILPSZrmb: [ 0.00 0.00 ]
+Key: VPERMILPSZrmbk: [ 0.00 0.00 ]
+Key: VPERMILPSZrmbkz: [ 0.00 0.00 ]
+Key: VPERMILPSZrmk: [ 0.00 0.00 ]
+Key: VPERMILPSZrmkz: [ 0.00 0.00 ]
+Key: VPERMILPSZrr: [ 0.00 0.00 ]
+Key: VPERMILPSZrrk: [ 0.00 0.00 ]
+Key: VPERMILPSZrrkz: [ 0.00 0.00 ]
+Key: VPERMILPSmi: [ 0.00 0.00 ]
+Key: VPERMILPSri: [ 0.00 0.00 ]
+Key: VPERMILPSrm: [ 0.00 0.00 ]
+Key: VPERMILPSrr: [ 0.00 0.00 ]
+Key: VPERMPDYmi: [ 0.00 0.00 ]
+Key: VPERMPDYri: [ 0.00 0.00 ]
+Key: VPERMPDZ: [ 0.00 0.00 ]
+Key: VPERMPDZmbi: [ 0.00 0.00 ]
+Key: VPERMPDZmbik: [ 0.00 0.00 ]
+Key: VPERMPDZmbikz: [ 0.00 0.00 ]
+Key: VPERMPDZmi: [ 0.00 0.00 ]
+Key: VPERMPDZmik: [ 0.00 0.00 ]
+Key: VPERMPDZmikz: [ 0.00 0.00 ]
+Key: VPERMPDZri: [ 0.00 0.00 ]
+Key: VPERMPDZrik: [ 0.00 0.00 ]
+Key: VPERMPDZrikz: [ 0.00 0.00 ]
+Key: VPERMPDZrm: [ 0.00 0.00 ]
+Key: VPERMPDZrmb: [ 0.00 0.00 ]
+Key: VPERMPDZrmbk: [ 0.00 0.00 ]
+Key: VPERMPDZrmbkz: [ 0.00 0.00 ]
+Key: VPERMPDZrmk: [ 0.00 0.00 ]
+Key: VPERMPDZrmkz: [ 0.00 0.00 ]
+Key: VPERMPDZrr: [ 0.00 0.00 ]
+Key: VPERMPDZrrk: [ 0.00 0.00 ]
+Key: VPERMPDZrrkz: [ 0.00 0.00 ]
+Key: VPERMPSYrm: [ 0.00 0.00 ]
+Key: VPERMPSYrr: [ 0.00 0.00 ]
+Key: VPERMPSZ: [ 0.00 0.00 ]
+Key: VPERMPSZrm: [ 0.00 0.00 ]
+Key: VPERMPSZrmb: [ 0.00 0.00 ]
+Key: VPERMPSZrmbk: [ 0.00 0.00 ]
+Key: VPERMPSZrmbkz: [ 0.00 0.00 ]
+Key: VPERMPSZrmk: [ 0.00 0.00 ]
+Key: VPERMPSZrmkz: [ 0.00 0.00 ]
+Key: VPERMPSZrr: [ 0.00 0.00 ]
+Key: VPERMPSZrrk: [ 0.00 0.00 ]
+Key: VPERMPSZrrkz: [ 0.00 0.00 ]
+Key: VPERMQYmi: [ 0.00 0.00 ]
+Key: VPERMQYri: [ 0.00 0.00 ]
+Key: VPERMQZ: [ 0.00 0.00 ]
+Key: VPERMQZmbi: [ 0.00 0.00 ]
+Key: VPERMQZmbik: [ 0.00 0.00 ]
+Key: VPERMQZmbikz: [ 0.00 0.00 ]
+Key: VPERMQZmi: [ 0.00 0.00 ]
+Key: VPERMQZmik: [ 0.00 0.00 ]
+Key: VPERMQZmikz: [ 0.00 0.00 ]
+Key: VPERMQZri: [ 0.00 0.00 ]
+Key: VPERMQZrik: [ 0.00 0.00 ]
+Key: VPERMQZrikz: [ 0.00 0.00 ]
+Key: VPERMQZrm: [ 0.00 0.00 ]
+Key: VPERMQZrmb: [ 0.00 0.00 ]
+Key: VPERMQZrmbk: [ 0.00 0.00 ]
+Key: VPERMQZrmbkz: [ 0.00 0.00 ]
+Key: VPERMQZrmk: [ 0.00 0.00 ]
+Key: VPERMQZrmkz: [ 0.00 0.00 ]
+Key: VPERMQZrr: [ 0.00 0.00 ]
+Key: VPERMQZrrk: [ 0.00 0.00 ]
+Key: VPERMQZrrkz: [ 0.00 0.00 ]
+Key: VPERMT: [ 0.00 0.00 ]
+Key: VPERMWZ: [ 0.00 0.00 ]
+Key: VPERMWZrm: [ 0.00 0.00 ]
+Key: VPERMWZrmk: [ 0.00 0.00 ]
+Key: VPERMWZrmkz: [ 0.00 0.00 ]
+Key: VPERMWZrr: [ 0.00 0.00 ]
+Key: VPERMWZrrk: [ 0.00 0.00 ]
+Key: VPERMWZrrkz: [ 0.00 0.00 ]
+Key: VPEXPANDBZ: [ 0.00 0.00 ]
+Key: VPEXPANDBZrm: [ 0.00 0.00 ]
+Key: VPEXPANDBZrmk: [ 0.00 0.00 ]
+Key: VPEXPANDBZrmkz: [ 0.00 0.00 ]
+Key: VPEXPANDBZrr: [ 0.00 0.00 ]
+Key: VPEXPANDBZrrk: [ 0.00 0.00 ]
+Key: VPEXPANDBZrrkz: [ 0.00 0.00 ]
+Key: VPEXPANDDZ: [ 0.00 0.00 ]
+Key: VPEXPANDDZrm: [ 0.00 0.00 ]
+Key: VPEXPANDDZrmk: [ 0.00 0.00 ]
+Key: VPEXPANDDZrmkz: [ 0.00 0.00 ]
+Key: VPEXPANDDZrr: [ 0.00 0.00 ]
+Key: VPEXPANDDZrrk: [ 0.00 0.00 ]
+Key: VPEXPANDDZrrkz: [ 0.00 0.00 ]
+Key: VPEXPANDQZ: [ 0.00 0.00 ]
+Key: VPEXPANDQZrm: [ 0.00 0.00 ]
+Key: VPEXPANDQZrmk: [ 0.00 0.00 ]
+Key: VPEXPANDQZrmkz: [ 0.00 0.00 ]
+Key: VPEXPANDQZrr: [ 0.00 0.00 ]
+Key: VPEXPANDQZrrk: [ 0.00 0.00 ]
+Key: VPEXPANDQZrrkz: [ 0.00 0.00 ]
+Key: VPEXPANDWZ: [ 0.00 0.00 ]
+Key: VPEXPANDWZrm: [ 0.00 0.00 ]
+Key: VPEXPANDWZrmk: [ 0.00 0.00 ]
+Key: VPEXPANDWZrmkz: [ 0.00 0.00 ]
+Key: VPEXPANDWZrr: [ 0.00 0.00 ]
+Key: VPEXPANDWZrrk: [ 0.00 0.00 ]
+Key: VPEXPANDWZrrkz: [ 0.00 0.00 ]
+Key: VPEXTRBZmri: [ 0.00 0.00 ]
+Key: VPEXTRBZrri: [ 0.00 0.00 ]
+Key: VPEXTRBmri: [ 0.00 0.00 ]
+Key: VPEXTRBrri: [ 0.00 0.00 ]
+Key: VPEXTRDZmri: [ 0.00 0.00 ]
+Key: VPEXTRDZrri: [ 0.00 0.00 ]
+Key: VPEXTRDmri: [ 0.00 0.00 ]
+Key: VPEXTRDrri: [ 0.00 0.00 ]
+Key: VPEXTRQZmri: [ 0.00 0.00 ]
+Key: VPEXTRQZrri: [ 0.00 0.00 ]
+Key: VPEXTRQmri: [ 0.00 0.00 ]
+Key: VPEXTRQrri: [ 0.00 0.00 ]
+Key: VPEXTRWZmri: [ 0.00 0.00 ]
+Key: VPEXTRWZrri: [ 0.00 0.00 ]
+Key: VPEXTRWZrri_REV: [ 0.00 0.00 ]
+Key: VPEXTRWmri: [ 0.00 0.00 ]
+Key: VPEXTRWrri: [ 0.00 0.00 ]
+Key: VPEXTRWrri_REV: [ 0.00 0.00 ]
+Key: VPGATHERDDYrm: [ 0.00 0.00 ]
+Key: VPGATHERDDZ: [ 0.00 0.00 ]
+Key: VPGATHERDDZrm: [ 0.00 0.00 ]
+Key: VPGATHERDDrm: [ 0.00 0.00 ]
+Key: VPGATHERDQYrm: [ 0.00 0.00 ]
+Key: VPGATHERDQZ: [ 0.00 0.00 ]
+Key: VPGATHERDQZrm: [ 0.00 0.00 ]
+Key: VPGATHERDQrm: [ 0.00 0.00 ]
+Key: VPGATHERQDYrm: [ 0.00 0.00 ]
+Key: VPGATHERQDZ: [ 0.00 0.00 ]
+Key: VPGATHERQDZrm: [ 0.00 0.00 ]
+Key: VPGATHERQDrm: [ 0.00 0.00 ]
+Key: VPGATHERQQYrm: [ 0.00 0.00 ]
+Key: VPGATHERQQZ: [ 0.00 0.00 ]
+Key: VPGATHERQQZrm: [ 0.00 0.00 ]
+Key: VPGATHERQQrm: [ 0.00 0.00 ]
+Key: VPHADDBDrm: [ 0.00 0.00 ]
+Key: VPHADDBDrr: [ 0.00 0.00 ]
+Key: VPHADDBQrm: [ 0.00 0.00 ]
+Key: VPHADDBQrr: [ 0.00 0.00 ]
+Key: VPHADDBWrm: [ 0.00 0.00 ]
+Key: VPHADDBWrr: [ 0.00 0.00 ]
+Key: VPHADDDQrm: [ 0.00 0.00 ]
+Key: VPHADDDQrr: [ 0.00 0.00 ]
+Key: VPHADDDYrm: [ 0.00 0.00 ]
+Key: VPHADDDYrr: [ 0.00 0.00 ]
+Key: VPHADDDrm: [ 0.00 0.00 ]
+Key: VPHADDDrr: [ 0.00 0.00 ]
+Key: VPHADDSWYrm: [ 0.00 0.00 ]
+Key: VPHADDSWYrr: [ 0.00 0.00 ]
+Key: VPHADDSWrm: [ 0.00 0.00 ]
+Key: VPHADDSWrr: [ 0.00 0.00 ]
+Key: VPHADDUBDrm: [ 0.00 0.00 ]
+Key: VPHADDUBDrr: [ 0.00 0.00 ]
+Key: VPHADDUBQrm: [ 0.00 0.00 ]
+Key: VPHADDUBQrr: [ 0.00 0.00 ]
+Key: VPHADDUBWrm: [ 0.00 0.00 ]
+Key: VPHADDUBWrr: [ 0.00 0.00 ]
+Key: VPHADDUDQrm: [ 0.00 0.00 ]
+Key: VPHADDUDQrr: [ 0.00 0.00 ]
+Key: VPHADDUWDrm: [ 0.00 0.00 ]
+Key: VPHADDUWDrr: [ 0.00 0.00 ]
+Key: VPHADDUWQrm: [ 0.00 0.00 ]
+Key: VPHADDUWQrr: [ 0.00 0.00 ]
+Key: VPHADDWDrm: [ 0.00 0.00 ]
+Key: VPHADDWDrr: [ 0.00 0.00 ]
+Key: VPHADDWQrm: [ 0.00 0.00 ]
+Key: VPHADDWQrr: [ 0.00 0.00 ]
+Key: VPHADDWYrm: [ 0.00 0.00 ]
+Key: VPHADDWYrr: [ 0.00 0.00 ]
+Key: VPHADDWrm: [ 0.00 0.00 ]
+Key: VPHADDWrr: [ 0.00 0.00 ]
+Key: VPHMINPOSUWrm: [ 0.00 0.00 ]
+Key: VPHMINPOSUWrr: [ 0.00 0.00 ]
+Key: VPHSUBBWrm: [ 0.00 0.00 ]
+Key: VPHSUBBWrr: [ 0.00 0.00 ]
+Key: VPHSUBDQrm: [ 0.00 0.00 ]
+Key: VPHSUBDQrr: [ 0.00 0.00 ]
+Key: VPHSUBDYrm: [ 0.00 0.00 ]
+Key: VPHSUBDYrr: [ 0.00 0.00 ]
+Key: VPHSUBDrm: [ 0.00 0.00 ]
+Key: VPHSUBDrr: [ 0.00 0.00 ]
+Key: VPHSUBSWYrm: [ 0.00 0.00 ]
+Key: VPHSUBSWYrr: [ 0.00 0.00 ]
+Key: VPHSUBSWrm: [ 0.00 0.00 ]
+Key: VPHSUBSWrr: [ 0.00 0.00 ]
+Key: VPHSUBWDrm: [ 0.00 0.00 ]
+Key: VPHSUBWDrr: [ 0.00 0.00 ]
+Key: VPHSUBWYrm: [ 0.00 0.00 ]
+Key: VPHSUBWYrr: [ 0.00 0.00 ]
+Key: VPHSUBWrm: [ 0.00 0.00 ]
+Key: VPHSUBWrr: [ 0.00 0.00 ]
+Key: VPINSRBZrmi: [ 0.00 0.00 ]
+Key: VPINSRBZrri: [ 0.00 0.00 ]
+Key: VPINSRBrmi: [ 0.00 0.00 ]
+Key: VPINSRBrri: [ 0.00 0.00 ]
+Key: VPINSRDZrmi: [ 0.00 0.00 ]
+Key: VPINSRDZrri: [ 0.00 0.00 ]
+Key: VPINSRDrmi: [ 0.00 0.00 ]
+Key: VPINSRDrri: [ 0.00 0.00 ]
+Key: VPINSRQZrmi: [ 0.00 0.00 ]
+Key: VPINSRQZrri: [ 0.00 0.00 ]
+Key: VPINSRQrmi: [ 0.00 0.00 ]
+Key: VPINSRQrri: [ 0.00 0.00 ]
+Key: VPINSRWZrmi: [ 0.00 0.00 ]
+Key: VPINSRWZrri: [ 0.00 0.00 ]
+Key: VPINSRWrmi: [ 0.00 0.00 ]
+Key: VPINSRWrri: [ 0.00 0.00 ]
+Key: VPLZCNTDZ: [ 0.00 0.00 ]
+Key: VPLZCNTDZrm: [ 0.00 0.00 ]
+Key: VPLZCNTDZrmb: [ 0.00 0.00 ]
+Key: VPLZCNTDZrmbk: [ 0.00 0.00 ]
+Key: VPLZCNTDZrmbkz: [ 0.00 0.00 ]
+Key: VPLZCNTDZrmk: [ 0.00 0.00 ]
+Key: VPLZCNTDZrmkz: [ 0.00 0.00 ]
+Key: VPLZCNTDZrr: [ 0.00 0.00 ]
+Key: VPLZCNTDZrrk: [ 0.00 0.00 ]
+Key: VPLZCNTDZrrkz: [ 0.00 0.00 ]
+Key: VPLZCNTQZ: [ 0.00 0.00 ]
+Key: VPLZCNTQZrm: [ 0.00 0.00 ]
+Key: VPLZCNTQZrmb: [ 0.00 0.00 ]
+Key: VPLZCNTQZrmbk: [ 0.00 0.00 ]
+Key: VPLZCNTQZrmbkz: [ 0.00 0.00 ]
+Key: VPLZCNTQZrmk: [ 0.00 0.00 ]
+Key: VPLZCNTQZrmkz: [ 0.00 0.00 ]
+Key: VPLZCNTQZrr: [ 0.00 0.00 ]
+Key: VPLZCNTQZrrk: [ 0.00 0.00 ]
+Key: VPLZCNTQZrrkz: [ 0.00 0.00 ]
+Key: VPMACSDDrm: [ 0.00 0.00 ]
+Key: VPMACSDDrr: [ 0.00 0.00 ]
+Key: VPMACSDQHrm: [ 0.00 0.00 ]
+Key: VPMACSDQHrr: [ 0.00 0.00 ]
+Key: VPMACSDQLrm: [ 0.00 0.00 ]
+Key: VPMACSDQLrr: [ 0.00 0.00 ]
+Key: VPMACSSDDrm: [ 0.00 0.00 ]
+Key: VPMACSSDDrr: [ 0.00 0.00 ]
+Key: VPMACSSDQHrm: [ 0.00 0.00 ]
+Key: VPMACSSDQHrr: [ 0.00 0.00 ]
+Key: VPMACSSDQLrm: [ 0.00 0.00 ]
+Key: VPMACSSDQLrr: [ 0.00 0.00 ]
+Key: VPMACSSWDrm: [ 0.00 0.00 ]
+Key: VPMACSSWDrr: [ 0.00 0.00 ]
+Key: VPMACSSWWrm: [ 0.00 0.00 ]
+Key: VPMACSSWWrr: [ 0.00 0.00 ]
+Key: VPMACSWDrm: [ 0.00 0.00 ]
+Key: VPMACSWDrr: [ 0.00 0.00 ]
+Key: VPMACSWWrm: [ 0.00 0.00 ]
+Key: VPMACSWWrr: [ 0.00 0.00 ]
+Key: VPMADCSSWDrm: [ 0.00 0.00 ]
+Key: VPMADCSSWDrr: [ 0.00 0.00 ]
+Key: VPMADCSWDrm: [ 0.00 0.00 ]
+Key: VPMADCSWDrr: [ 0.00 0.00 ]
+Key: VPMADD: [ 0.00 0.00 ]
+Key: VPMADDUBSWYrm: [ 0.00 0.00 ]
+Key: VPMADDUBSWYrr: [ 0.00 0.00 ]
+Key: VPMADDUBSWZ: [ 0.00 0.00 ]
+Key: VPMADDUBSWZrm: [ 0.00 0.00 ]
+Key: VPMADDUBSWZrmk: [ 0.00 0.00 ]
+Key: VPMADDUBSWZrmkz: [ 0.00 0.00 ]
+Key: VPMADDUBSWZrr: [ 0.00 0.00 ]
+Key: VPMADDUBSWZrrk: [ 0.00 0.00 ]
+Key: VPMADDUBSWZrrkz: [ 0.00 0.00 ]
+Key: VPMADDUBSWrm: [ 0.00 0.00 ]
+Key: VPMADDUBSWrr: [ 0.00 0.00 ]
+Key: VPMADDWDYrm: [ 0.00 0.00 ]
+Key: VPMADDWDYrr: [ 0.00 0.00 ]
+Key: VPMADDWDZ: [ 0.00 0.00 ]
+Key: VPMADDWDZrm: [ 0.00 0.00 ]
+Key: VPMADDWDZrmk: [ 0.00 0.00 ]
+Key: VPMADDWDZrmkz: [ 0.00 0.00 ]
+Key: VPMADDWDZrr: [ 0.00 0.00 ]
+Key: VPMADDWDZrrk: [ 0.00 0.00 ]
+Key: VPMADDWDZrrkz: [ 0.00 0.00 ]
+Key: VPMADDWDrm: [ 0.00 0.00 ]
+Key: VPMADDWDrr: [ 0.00 0.00 ]
+Key: VPMASKMOVDYmr: [ 0.00 0.00 ]
+Key: VPMASKMOVDYrm: [ 0.00 0.00 ]
+Key: VPMASKMOVDmr: [ 0.00 0.00 ]
+Key: VPMASKMOVDrm: [ 0.00 0.00 ]
+Key: VPMASKMOVQYmr: [ 0.00 0.00 ]
+Key: VPMASKMOVQYrm: [ 0.00 0.00 ]
+Key: VPMASKMOVQmr: [ 0.00 0.00 ]
+Key: VPMASKMOVQrm: [ 0.00 0.00 ]
+Key: VPMAXSBYrm: [ 0.00 0.00 ]
+Key: VPMAXSBYrr: [ 0.00 0.00 ]
+Key: VPMAXSBZ: [ 0.00 0.00 ]
+Key: VPMAXSBZrm: [ 0.00 0.00 ]
+Key: VPMAXSBZrmk: [ 0.00 0.00 ]
+Key: VPMAXSBZrmkz: [ 0.00 0.00 ]
+Key: VPMAXSBZrr: [ 0.00 0.00 ]
+Key: VPMAXSBZrrk: [ 0.00 0.00 ]
+Key: VPMAXSBZrrkz: [ 0.00 0.00 ]
+Key: VPMAXSBrm: [ 0.00 0.00 ]
+Key: VPMAXSBrr: [ 0.00 0.00 ]
+Key: VPMAXSDYrm: [ 0.00 0.00 ]
+Key: VPMAXSDYrr: [ 0.00 0.00 ]
+Key: VPMAXSDZ: [ 0.00 0.00 ]
+Key: VPMAXSDZrm: [ 0.00 0.00 ]
+Key: VPMAXSDZrmb: [ 0.00 0.00 ]
+Key: VPMAXSDZrmbk: [ 0.00 0.00 ]
+Key: VPMAXSDZrmbkz: [ 0.00 0.00 ]
+Key: VPMAXSDZrmk: [ 0.00 0.00 ]
+Key: VPMAXSDZrmkz: [ 0.00 0.00 ]
+Key: VPMAXSDZrr: [ 0.00 0.00 ]
+Key: VPMAXSDZrrk: [ 0.00 0.00 ]
+Key: VPMAXSDZrrkz: [ 0.00 0.00 ]
+Key: VPMAXSDrm: [ 0.00 0.00 ]
+Key: VPMAXSDrr: [ 0.00 0.00 ]
+Key: VPMAXSQZ: [ 0.00 0.00 ]
+Key: VPMAXSQZrm: [ 0.00 0.00 ]
+Key: VPMAXSQZrmb: [ 0.00 0.00 ]
+Key: VPMAXSQZrmbk: [ 0.00 0.00 ]
+Key: VPMAXSQZrmbkz: [ 0.00 0.00 ]
+Key: VPMAXSQZrmk: [ 0.00 0.00 ]
+Key: VPMAXSQZrmkz: [ 0.00 0.00 ]
+Key: VPMAXSQZrr: [ 0.00 0.00 ]
+Key: VPMAXSQZrrk: [ 0.00 0.00 ]
+Key: VPMAXSQZrrkz: [ 0.00 0.00 ]
+Key: VPMAXSWYrm: [ 0.00 0.00 ]
+Key: VPMAXSWYrr: [ 0.00 0.00 ]
+Key: VPMAXSWZ: [ 0.00 0.00 ]
+Key: VPMAXSWZrm: [ 0.00 0.00 ]
+Key: VPMAXSWZrmk: [ 0.00 0.00 ]
+Key: VPMAXSWZrmkz: [ 0.00 0.00 ]
+Key: VPMAXSWZrr: [ 0.00 0.00 ]
+Key: VPMAXSWZrrk: [ 0.00 0.00 ]
+Key: VPMAXSWZrrkz: [ 0.00 0.00 ]
+Key: VPMAXSWrm: [ 0.00 0.00 ]
+Key: VPMAXSWrr: [ 0.00 0.00 ]
+Key: VPMAXUBYrm: [ 0.00 0.00 ]
+Key: VPMAXUBYrr: [ 0.00 0.00 ]
+Key: VPMAXUBZ: [ 0.00 0.00 ]
+Key: VPMAXUBZrm: [ 0.00 0.00 ]
+Key: VPMAXUBZrmk: [ 0.00 0.00 ]
+Key: VPMAXUBZrmkz: [ 0.00 0.00 ]
+Key: VPMAXUBZrr: [ 0.00 0.00 ]
+Key: VPMAXUBZrrk: [ 0.00 0.00 ]
+Key: VPMAXUBZrrkz: [ 0.00 0.00 ]
+Key: VPMAXUBrm: [ 0.00 0.00 ]
+Key: VPMAXUBrr: [ 0.00 0.00 ]
+Key: VPMAXUDYrm: [ 0.00 0.00 ]
+Key: VPMAXUDYrr: [ 0.00 0.00 ]
+Key: VPMAXUDZ: [ 0.00 0.00 ]
+Key: VPMAXUDZrm: [ 0.00 0.00 ]
+Key: VPMAXUDZrmb: [ 0.00 0.00 ]
+Key: VPMAXUDZrmbk: [ 0.00 0.00 ]
+Key: VPMAXUDZrmbkz: [ 0.00 0.00 ]
+Key: VPMAXUDZrmk: [ 0.00 0.00 ]
+Key: VPMAXUDZrmkz: [ 0.00 0.00 ]
+Key: VPMAXUDZrr: [ 0.00 0.00 ]
+Key: VPMAXUDZrrk: [ 0.00 0.00 ]
+Key: VPMAXUDZrrkz: [ 0.00 0.00 ]
+Key: VPMAXUDrm: [ 0.00 0.00 ]
+Key: VPMAXUDrr: [ 0.00 0.00 ]
+Key: VPMAXUQZ: [ 0.00 0.00 ]
+Key: VPMAXUQZrm: [ 0.00 0.00 ]
+Key: VPMAXUQZrmb: [ 0.00 0.00 ]
+Key: VPMAXUQZrmbk: [ 0.00 0.00 ]
+Key: VPMAXUQZrmbkz: [ 0.00 0.00 ]
+Key: VPMAXUQZrmk: [ 0.00 0.00 ]
+Key: VPMAXUQZrmkz: [ 0.00 0.00 ]
+Key: VPMAXUQZrr: [ 0.00 0.00 ]
+Key: VPMAXUQZrrk: [ 0.00 0.00 ]
+Key: VPMAXUQZrrkz: [ 0.00 0.00 ]
+Key: VPMAXUWYrm: [ 0.00 0.00 ]
+Key: VPMAXUWYrr: [ 0.00 0.00 ]
+Key: VPMAXUWZ: [ 0.00 0.00 ]
+Key: VPMAXUWZrm: [ 0.00 0.00 ]
+Key: VPMAXUWZrmk: [ 0.00 0.00 ]
+Key: VPMAXUWZrmkz: [ 0.00 0.00 ]
+Key: VPMAXUWZrr: [ 0.00 0.00 ]
+Key: VPMAXUWZrrk: [ 0.00 0.00 ]
+Key: VPMAXUWZrrkz: [ 0.00 0.00 ]
+Key: VPMAXUWrm: [ 0.00 0.00 ]
+Key: VPMAXUWrr: [ 0.00 0.00 ]
+Key: VPMINSBYrm: [ 0.00 0.00 ]
+Key: VPMINSBYrr: [ 0.00 0.00 ]
+Key: VPMINSBZ: [ 0.00 0.00 ]
+Key: VPMINSBZrm: [ 0.00 0.00 ]
+Key: VPMINSBZrmk: [ 0.00 0.00 ]
+Key: VPMINSBZrmkz: [ 0.00 0.00 ]
+Key: VPMINSBZrr: [ 0.00 0.00 ]
+Key: VPMINSBZrrk: [ 0.00 0.00 ]
+Key: VPMINSBZrrkz: [ 0.00 0.00 ]
+Key: VPMINSBrm: [ 0.00 0.00 ]
+Key: VPMINSBrr: [ 0.00 0.00 ]
+Key: VPMINSDYrm: [ 0.00 0.00 ]
+Key: VPMINSDYrr: [ 0.00 0.00 ]
+Key: VPMINSDZ: [ 0.00 0.00 ]
+Key: VPMINSDZrm: [ 0.00 0.00 ]
+Key: VPMINSDZrmb: [ 0.00 0.00 ]
+Key: VPMINSDZrmbk: [ 0.00 0.00 ]
+Key: VPMINSDZrmbkz: [ 0.00 0.00 ]
+Key: VPMINSDZrmk: [ 0.00 0.00 ]
+Key: VPMINSDZrmkz: [ 0.00 0.00 ]
+Key: VPMINSDZrr: [ 0.00 0.00 ]
+Key: VPMINSDZrrk: [ 0.00 0.00 ]
+Key: VPMINSDZrrkz: [ 0.00 0.00 ]
+Key: VPMINSDrm: [ 0.00 0.00 ]
+Key: VPMINSDrr: [ 0.00 0.00 ]
+Key: VPMINSQZ: [ 0.00 0.00 ]
+Key: VPMINSQZrm: [ 0.00 0.00 ]
+Key: VPMINSQZrmb: [ 0.00 0.00 ]
+Key: VPMINSQZrmbk: [ 0.00 0.00 ]
+Key: VPMINSQZrmbkz: [ 0.00 0.00 ]
+Key: VPMINSQZrmk: [ 0.00 0.00 ]
+Key: VPMINSQZrmkz: [ 0.00 0.00 ]
+Key: VPMINSQZrr: [ 0.00 0.00 ]
+Key: VPMINSQZrrk: [ 0.00 0.00 ]
+Key: VPMINSQZrrkz: [ 0.00 0.00 ]
+Key: VPMINSWYrm: [ 0.00 0.00 ]
+Key: VPMINSWYrr: [ 0.00 0.00 ]
+Key: VPMINSWZ: [ 0.00 0.00 ]
+Key: VPMINSWZrm: [ 0.00 0.00 ]
+Key: VPMINSWZrmk: [ 0.00 0.00 ]
+Key: VPMINSWZrmkz: [ 0.00 0.00 ]
+Key: VPMINSWZrr: [ 0.00 0.00 ]
+Key: VPMINSWZrrk: [ 0.00 0.00 ]
+Key: VPMINSWZrrkz: [ 0.00 0.00 ]
+Key: VPMINSWrm: [ 0.00 0.00 ]
+Key: VPMINSWrr: [ 0.00 0.00 ]
+Key: VPMINUBYrm: [ 0.00 0.00 ]
+Key: VPMINUBYrr: [ 0.00 0.00 ]
+Key: VPMINUBZ: [ 0.00 0.00 ]
+Key: VPMINUBZrm: [ 0.00 0.00 ]
+Key: VPMINUBZrmk: [ 0.00 0.00 ]
+Key: VPMINUBZrmkz: [ 0.00 0.00 ]
+Key: VPMINUBZrr: [ 0.00 0.00 ]
+Key: VPMINUBZrrk: [ 0.00 0.00 ]
+Key: VPMINUBZrrkz: [ 0.00 0.00 ]
+Key: VPMINUBrm: [ 0.00 0.00 ]
+Key: VPMINUBrr: [ 0.00 0.00 ]
+Key: VPMINUDYrm: [ 0.00 0.00 ]
+Key: VPMINUDYrr: [ 0.00 0.00 ]
+Key: VPMINUDZ: [ 0.00 0.00 ]
+Key: VPMINUDZrm: [ 0.00 0.00 ]
+Key: VPMINUDZrmb: [ 0.00 0.00 ]
+Key: VPMINUDZrmbk: [ 0.00 0.00 ]
+Key: VPMINUDZrmbkz: [ 0.00 0.00 ]
+Key: VPMINUDZrmk: [ 0.00 0.00 ]
+Key: VPMINUDZrmkz: [ 0.00 0.00 ]
+Key: VPMINUDZrr: [ 0.00 0.00 ]
+Key: VPMINUDZrrk: [ 0.00 0.00 ]
+Key: VPMINUDZrrkz: [ 0.00 0.00 ]
+Key: VPMINUDrm: [ 0.00 0.00 ]
+Key: VPMINUDrr: [ 0.00 0.00 ]
+Key: VPMINUQZ: [ 0.00 0.00 ]
+Key: VPMINUQZrm: [ 0.00 0.00 ]
+Key: VPMINUQZrmb: [ 0.00 0.00 ]
+Key: VPMINUQZrmbk: [ 0.00 0.00 ]
+Key: VPMINUQZrmbkz: [ 0.00 0.00 ]
+Key: VPMINUQZrmk: [ 0.00 0.00 ]
+Key: VPMINUQZrmkz: [ 0.00 0.00 ]
+Key: VPMINUQZrr: [ 0.00 0.00 ]
+Key: VPMINUQZrrk: [ 0.00 0.00 ]
+Key: VPMINUQZrrkz: [ 0.00 0.00 ]
+Key: VPMINUWYrm: [ 0.00 0.00 ]
+Key: VPMINUWYrr: [ 0.00 0.00 ]
+Key: VPMINUWZ: [ 0.00 0.00 ]
+Key: VPMINUWZrm: [ 0.00 0.00 ]
+Key: VPMINUWZrmk: [ 0.00 0.00 ]
+Key: VPMINUWZrmkz: [ 0.00 0.00 ]
+Key: VPMINUWZrr: [ 0.00 0.00 ]
+Key: VPMINUWZrrk: [ 0.00 0.00 ]
+Key: VPMINUWZrrkz: [ 0.00 0.00 ]
+Key: VPMINUWrm: [ 0.00 0.00 ]
+Key: VPMINUWrr: [ 0.00 0.00 ]
+Key: VPMOVB: [ 0.00 0.00 ]
+Key: VPMOVD: [ 0.00 0.00 ]
+Key: VPMOVDBZ: [ 0.00 0.00 ]
+Key: VPMOVDBZmr: [ 0.00 0.00 ]
+Key: VPMOVDBZmrk: [ 0.00 0.00 ]
+Key: VPMOVDBZrr: [ 0.00 0.00 ]
+Key: VPMOVDBZrrk: [ 0.00 0.00 ]
+Key: VPMOVDBZrrkz: [ 0.00 0.00 ]
+Key: VPMOVDWZ: [ 0.00 0.00 ]
+Key: VPMOVDWZmr: [ 0.00 0.00 ]
+Key: VPMOVDWZmrk: [ 0.00 0.00 ]
+Key: VPMOVDWZrr: [ 0.00 0.00 ]
+Key: VPMOVDWZrrk: [ 0.00 0.00 ]
+Key: VPMOVDWZrrkz: [ 0.00 0.00 ]
+Key: VPMOVM: [ 0.00 0.00 ]
+Key: VPMOVMSKBYrr: [ 0.00 0.00 ]
+Key: VPMOVMSKBrr: [ 0.00 0.00 ]
+Key: VPMOVQ: [ 0.00 0.00 ]
+Key: VPMOVQBZ: [ 0.00 0.00 ]
+Key: VPMOVQBZmr: [ 0.00 0.00 ]
+Key: VPMOVQBZmrk: [ 0.00 0.00 ]
+Key: VPMOVQBZrr: [ 0.00 0.00 ]
+Key: VPMOVQBZrrk: [ 0.00 0.00 ]
+Key: VPMOVQBZrrkz: [ 0.00 0.00 ]
+Key: VPMOVQDZ: [ 0.00 0.00 ]
+Key: VPMOVQDZmr: [ 0.00 0.00 ]
+Key: VPMOVQDZmrk: [ 0.00 0.00 ]
+Key: VPMOVQDZrr: [ 0.00 0.00 ]
+Key: VPMOVQDZrrk: [ 0.00 0.00 ]
+Key: VPMOVQDZrrkz: [ 0.00 0.00 ]
+Key: VPMOVQWZ: [ 0.00 0.00 ]
+Key: VPMOVQWZmr: [ 0.00 0.00 ]
+Key: VPMOVQWZmrk: [ 0.00 0.00 ]
+Key: VPMOVQWZrr: [ 0.00 0.00 ]
+Key: VPMOVQWZrrk: [ 0.00 0.00 ]
+Key: VPMOVQWZrrkz: [ 0.00 0.00 ]
+Key: VPMOVSDBZ: [ 0.00 0.00 ]
+Key: VPMOVSDBZmr: [ 0.00 0.00 ]
+Key: VPMOVSDBZmrk: [ 0.00 0.00 ]
+Key: VPMOVSDBZrr: [ 0.00 0.00 ]
+Key: VPMOVSDBZrrk: [ 0.00 0.00 ]
+Key: VPMOVSDBZrrkz: [ 0.00 0.00 ]
+Key: VPMOVSDWZ: [ 0.00 0.00 ]
+Key: VPMOVSDWZmr: [ 0.00 0.00 ]
+Key: VPMOVSDWZmrk: [ 0.00 0.00 ]
+Key: VPMOVSDWZrr: [ 0.00 0.00 ]
+Key: VPMOVSDWZrrk: [ 0.00 0.00 ]
+Key: VPMOVSDWZrrkz: [ 0.00 0.00 ]
+Key: VPMOVSQBZ: [ 0.00 0.00 ]
+Key: VPMOVSQBZmr: [ 0.00 0.00 ]
+Key: VPMOVSQBZmrk: [ 0.00 0.00 ]
+Key: VPMOVSQBZrr: [ 0.00 0.00 ]
+Key: VPMOVSQBZrrk: [ 0.00 0.00 ]
+Key: VPMOVSQBZrrkz: [ 0.00 0.00 ]
+Key: VPMOVSQDZ: [ 0.00 0.00 ]
+Key: VPMOVSQDZmr: [ 0.00 0.00 ]
+Key: VPMOVSQDZmrk: [ 0.00 0.00 ]
+Key: VPMOVSQDZrr: [ 0.00 0.00 ]
+Key: VPMOVSQDZrrk: [ 0.00 0.00 ]
+Key: VPMOVSQDZrrkz: [ 0.00 0.00 ]
+Key: VPMOVSQWZ: [ 0.00 0.00 ]
+Key: VPMOVSQWZmr: [ 0.00 0.00 ]
+Key: VPMOVSQWZmrk: [ 0.00 0.00 ]
+Key: VPMOVSQWZrr: [ 0.00 0.00 ]
+Key: VPMOVSQWZrrk: [ 0.00 0.00 ]
+Key: VPMOVSQWZrrkz: [ 0.00 0.00 ]
+Key: VPMOVSWBZ: [ 0.00 0.00 ]
+Key: VPMOVSWBZmr: [ 0.00 0.00 ]
+Key: VPMOVSWBZmrk: [ 0.00 0.00 ]
+Key: VPMOVSWBZrr: [ 0.00 0.00 ]
+Key: VPMOVSWBZrrk: [ 0.00 0.00 ]
+Key: VPMOVSWBZrrkz: [ 0.00 0.00 ]
+Key: VPMOVSXBDYrm: [ 0.00 0.00 ]
+Key: VPMOVSXBDYrr: [ 0.00 0.00 ]
+Key: VPMOVSXBDZ: [ 0.00 0.00 ]
+Key: VPMOVSXBDZrm: [ 0.00 0.00 ]
+Key: VPMOVSXBDZrmk: [ 0.00 0.00 ]
+Key: VPMOVSXBDZrmkz: [ 0.00 0.00 ]
+Key: VPMOVSXBDZrr: [ 0.00 0.00 ]
+Key: VPMOVSXBDZrrk: [ 0.00 0.00 ]
+Key: VPMOVSXBDZrrkz: [ 0.00 0.00 ]
+Key: VPMOVSXBDrm: [ 0.00 0.00 ]
+Key: VPMOVSXBDrr: [ 0.00 0.00 ]
+Key: VPMOVSXBQYrm: [ 0.00 0.00 ]
+Key: VPMOVSXBQYrr: [ 0.00 0.00 ]
+Key: VPMOVSXBQZ: [ 0.00 0.00 ]
+Key: VPMOVSXBQZrm: [ 0.00 0.00 ]
+Key: VPMOVSXBQZrmk: [ 0.00 0.00 ]
+Key: VPMOVSXBQZrmkz: [ 0.00 0.00 ]
+Key: VPMOVSXBQZrr: [ 0.00 0.00 ]
+Key: VPMOVSXBQZrrk: [ 0.00 0.00 ]
+Key: VPMOVSXBQZrrkz: [ 0.00 0.00 ]
+Key: VPMOVSXBQrm: [ 0.00 0.00 ]
+Key: VPMOVSXBQrr: [ 0.00 0.00 ]
+Key: VPMOVSXBWYrm: [ 0.00 0.00 ]
+Key: VPMOVSXBWYrr: [ 0.00 0.00 ]
+Key: VPMOVSXBWZ: [ 0.00 0.00 ]
+Key: VPMOVSXBWZrm: [ 0.00 0.00 ]
+Key: VPMOVSXBWZrmk: [ 0.00 0.00 ]
+Key: VPMOVSXBWZrmkz: [ 0.00 0.00 ]
+Key: VPMOVSXBWZrr: [ 0.00 0.00 ]
+Key: VPMOVSXBWZrrk: [ 0.00 0.00 ]
+Key: VPMOVSXBWZrrkz: [ 0.00 0.00 ]
+Key: VPMOVSXBWrm: [ 0.00 0.00 ]
+Key: VPMOVSXBWrr: [ 0.00 0.00 ]
+Key: VPMOVSXDQYrm: [ 0.00 0.00 ]
+Key: VPMOVSXDQYrr: [ 0.00 0.00 ]
+Key: VPMOVSXDQZ: [ 0.00 0.00 ]
+Key: VPMOVSXDQZrm: [ 0.00 0.00 ]
+Key: VPMOVSXDQZrmk: [ 0.00 0.00 ]
+Key: VPMOVSXDQZrmkz: [ 0.00 0.00 ]
+Key: VPMOVSXDQZrr: [ 0.00 0.00 ]
+Key: VPMOVSXDQZrrk: [ 0.00 0.00 ]
+Key: VPMOVSXDQZrrkz: [ 0.00 0.00 ]
+Key: VPMOVSXDQrm: [ 0.00 0.00 ]
+Key: VPMOVSXDQrr: [ 0.00 0.00 ]
+Key: VPMOVSXWDYrm: [ 0.00 0.00 ]
+Key: VPMOVSXWDYrr: [ 0.00 0.00 ]
+Key: VPMOVSXWDZ: [ 0.00 0.00 ]
+Key: VPMOVSXWDZrm: [ 0.00 0.00 ]
+Key: VPMOVSXWDZrmk: [ 0.00 0.00 ]
+Key: VPMOVSXWDZrmkz: [ 0.00 0.00 ]
+Key: VPMOVSXWDZrr: [ 0.00 0.00 ]
+Key: VPMOVSXWDZrrk: [ 0.00 0.00 ]
+Key: VPMOVSXWDZrrkz: [ 0.00 0.00 ]
+Key: VPMOVSXWDrm: [ 0.00 0.00 ]
+Key: VPMOVSXWDrr: [ 0.00 0.00 ]
+Key: VPMOVSXWQYrm: [ 0.00 0.00 ]
+Key: VPMOVSXWQYrr: [ 0.00 0.00 ]
+Key: VPMOVSXWQZ: [ 0.00 0.00 ]
+Key: VPMOVSXWQZrm: [ 0.00 0.00 ]
+Key: VPMOVSXWQZrmk: [ 0.00 0.00 ]
+Key: VPMOVSXWQZrmkz: [ 0.00 0.00 ]
+Key: VPMOVSXWQZrr: [ 0.00 0.00 ]
+Key: VPMOVSXWQZrrk: [ 0.00 0.00 ]
+Key: VPMOVSXWQZrrkz: [ 0.00 0.00 ]
+Key: VPMOVSXWQrm: [ 0.00 0.00 ]
+Key: VPMOVSXWQrr: [ 0.00 0.00 ]
+Key: VPMOVUSDBZ: [ 0.00 0.00 ]
+Key: VPMOVUSDBZmr: [ 0.00 0.00 ]
+Key: VPMOVUSDBZmrk: [ 0.00 0.00 ]
+Key: VPMOVUSDBZrr: [ 0.00 0.00 ]
+Key: VPMOVUSDBZrrk: [ 0.00 0.00 ]
+Key: VPMOVUSDBZrrkz: [ 0.00 0.00 ]
+Key: VPMOVUSDWZ: [ 0.00 0.00 ]
+Key: VPMOVUSDWZmr: [ 0.00 0.00 ]
+Key: VPMOVUSDWZmrk: [ 0.00 0.00 ]
+Key: VPMOVUSDWZrr: [ 0.00 0.00 ]
+Key: VPMOVUSDWZrrk: [ 0.00 0.00 ]
+Key: VPMOVUSDWZrrkz: [ 0.00 0.00 ]
+Key: VPMOVUSQBZ: [ 0.00 0.00 ]
+Key: VPMOVUSQBZmr: [ 0.00 0.00 ]
+Key: VPMOVUSQBZmrk: [ 0.00 0.00 ]
+Key: VPMOVUSQBZrr: [ 0.00 0.00 ]
+Key: VPMOVUSQBZrrk: [ 0.00 0.00 ]
+Key: VPMOVUSQBZrrkz: [ 0.00 0.00 ]
+Key: VPMOVUSQDZ: [ 0.00 0.00 ]
+Key: VPMOVUSQDZmr: [ 0.00 0.00 ]
+Key: VPMOVUSQDZmrk: [ 0.00 0.00 ]
+Key: VPMOVUSQDZrr: [ 0.00 0.00 ]
+Key: VPMOVUSQDZrrk: [ 0.00 0.00 ]
+Key: VPMOVUSQDZrrkz: [ 0.00 0.00 ]
+Key: VPMOVUSQWZ: [ 0.00 0.00 ]
+Key: VPMOVUSQWZmr: [ 0.00 0.00 ]
+Key: VPMOVUSQWZmrk: [ 0.00 0.00 ]
+Key: VPMOVUSQWZrr: [ 0.00 0.00 ]
+Key: VPMOVUSQWZrrk: [ 0.00 0.00 ]
+Key: VPMOVUSQWZrrkz: [ 0.00 0.00 ]
+Key: VPMOVUSWBZ: [ 0.00 0.00 ]
+Key: VPMOVUSWBZmr: [ 0.00 0.00 ]
+Key: VPMOVUSWBZmrk: [ 0.00 0.00 ]
+Key: VPMOVUSWBZrr: [ 0.00 0.00 ]
+Key: VPMOVUSWBZrrk: [ 0.00 0.00 ]
+Key: VPMOVUSWBZrrkz: [ 0.00 0.00 ]
+Key: VPMOVW: [ 0.00 0.00 ]
+Key: VPMOVWBZ: [ 0.00 0.00 ]
+Key: VPMOVWBZmr: [ 0.00 0.00 ]
+Key: VPMOVWBZmrk: [ 0.00 0.00 ]
+Key: VPMOVWBZrr: [ 0.00 0.00 ]
+Key: VPMOVWBZrrk: [ 0.00 0.00 ]
+Key: VPMOVWBZrrkz: [ 0.00 0.00 ]
+Key: VPMOVZXBDYrm: [ 0.00 0.00 ]
+Key: VPMOVZXBDYrr: [ 0.00 0.00 ]
+Key: VPMOVZXBDZ: [ 0.00 0.00 ]
+Key: VPMOVZXBDZrm: [ 0.00 0.00 ]
+Key: VPMOVZXBDZrmk: [ 0.00 0.00 ]
+Key: VPMOVZXBDZrmkz: [ 0.00 0.00 ]
+Key: VPMOVZXBDZrr: [ 0.00 0.00 ]
+Key: VPMOVZXBDZrrk: [ 0.00 0.00 ]
+Key: VPMOVZXBDZrrkz: [ 0.00 0.00 ]
+Key: VPMOVZXBDrm: [ 0.00 0.00 ]
+Key: VPMOVZXBDrr: [ 0.00 0.00 ]
+Key: VPMOVZXBQYrm: [ 0.00 0.00 ]
+Key: VPMOVZXBQYrr: [ 0.00 0.00 ]
+Key: VPMOVZXBQZ: [ 0.00 0.00 ]
+Key: VPMOVZXBQZrm: [ 0.00 0.00 ]
+Key: VPMOVZXBQZrmk: [ 0.00 0.00 ]
+Key: VPMOVZXBQZrmkz: [ 0.00 0.00 ]
+Key: VPMOVZXBQZrr: [ 0.00 0.00 ]
+Key: VPMOVZXBQZrrk: [ 0.00 0.00 ]
+Key: VPMOVZXBQZrrkz: [ 0.00 0.00 ]
+Key: VPMOVZXBQrm: [ 0.00 0.00 ]
+Key: VPMOVZXBQrr: [ 0.00 0.00 ]
+Key: VPMOVZXBWYrm: [ 0.00 0.00 ]
+Key: VPMOVZXBWYrr: [ 0.00 0.00 ]
+Key: VPMOVZXBWZ: [ 0.00 0.00 ]
+Key: VPMOVZXBWZrm: [ 0.00 0.00 ]
+Key: VPMOVZXBWZrmk: [ 0.00 0.00 ]
+Key: VPMOVZXBWZrmkz: [ 0.00 0.00 ]
+Key: VPMOVZXBWZrr: [ 0.00 0.00 ]
+Key: VPMOVZXBWZrrk: [ 0.00 0.00 ]
+Key: VPMOVZXBWZrrkz: [ 0.00 0.00 ]
+Key: VPMOVZXBWrm: [ 0.00 0.00 ]
+Key: VPMOVZXBWrr: [ 0.00 0.00 ]
+Key: VPMOVZXDQYrm: [ 0.00 0.00 ]
+Key: VPMOVZXDQYrr: [ 0.00 0.00 ]
+Key: VPMOVZXDQZ: [ 0.00 0.00 ]
+Key: VPMOVZXDQZrm: [ 0.00 0.00 ]
+Key: VPMOVZXDQZrmk: [ 0.00 0.00 ]
+Key: VPMOVZXDQZrmkz: [ 0.00 0.00 ]
+Key: VPMOVZXDQZrr: [ 0.00 0.00 ]
+Key: VPMOVZXDQZrrk: [ 0.00 0.00 ]
+Key: VPMOVZXDQZrrkz: [ 0.00 0.00 ]
+Key: VPMOVZXDQrm: [ 0.00 0.00 ]
+Key: VPMOVZXDQrr: [ 0.00 0.00 ]
+Key: VPMOVZXWDYrm: [ 0.00 0.00 ]
+Key: VPMOVZXWDYrr: [ 0.00 0.00 ]
+Key: VPMOVZXWDZ: [ 0.00 0.00 ]
+Key: VPMOVZXWDZrm: [ 0.00 0.00 ]
+Key: VPMOVZXWDZrmk: [ 0.00 0.00 ]
+Key: VPMOVZXWDZrmkz: [ 0.00 0.00 ]
+Key: VPMOVZXWDZrr: [ 0.00 0.00 ]
+Key: VPMOVZXWDZrrk: [ 0.00 0.00 ]
+Key: VPMOVZXWDZrrkz: [ 0.00 0.00 ]
+Key: VPMOVZXWDrm: [ 0.00 0.00 ]
+Key: VPMOVZXWDrr: [ 0.00 0.00 ]
+Key: VPMOVZXWQYrm: [ 0.00 0.00 ]
+Key: VPMOVZXWQYrr: [ 0.00 0.00 ]
+Key: VPMOVZXWQZ: [ 0.00 0.00 ]
+Key: VPMOVZXWQZrm: [ 0.00 0.00 ]
+Key: VPMOVZXWQZrmk: [ 0.00 0.00 ]
+Key: VPMOVZXWQZrmkz: [ 0.00 0.00 ]
+Key: VPMOVZXWQZrr: [ 0.00 0.00 ]
+Key: VPMOVZXWQZrrk: [ 0.00 0.00 ]
+Key: VPMOVZXWQZrrkz: [ 0.00 0.00 ]
+Key: VPMOVZXWQrm: [ 0.00 0.00 ]
+Key: VPMOVZXWQrr: [ 0.00 0.00 ]
+Key: VPMULDQYrm: [ 0.00 0.00 ]
+Key: VPMULDQYrr: [ 0.00 0.00 ]
+Key: VPMULDQZ: [ 0.00 0.00 ]
+Key: VPMULDQZrm: [ 0.00 0.00 ]
+Key: VPMULDQZrmb: [ 0.00 0.00 ]
+Key: VPMULDQZrmbk: [ 0.00 0.00 ]
+Key: VPMULDQZrmbkz: [ 0.00 0.00 ]
+Key: VPMULDQZrmk: [ 0.00 0.00 ]
+Key: VPMULDQZrmkz: [ 0.00 0.00 ]
+Key: VPMULDQZrr: [ 0.00 0.00 ]
+Key: VPMULDQZrrk: [ 0.00 0.00 ]
+Key: VPMULDQZrrkz: [ 0.00 0.00 ]
+Key: VPMULDQrm: [ 0.00 0.00 ]
+Key: VPMULDQrr: [ 0.00 0.00 ]
+Key: VPMULHRSWYrm: [ 0.00 0.00 ]
+Key: VPMULHRSWYrr: [ 0.00 0.00 ]
+Key: VPMULHRSWZ: [ 0.00 0.00 ]
+Key: VPMULHRSWZrm: [ 0.00 0.00 ]
+Key: VPMULHRSWZrmk: [ 0.00 0.00 ]
+Key: VPMULHRSWZrmkz: [ 0.00 0.00 ]
+Key: VPMULHRSWZrr: [ 0.00 0.00 ]
+Key: VPMULHRSWZrrk: [ 0.00 0.00 ]
+Key: VPMULHRSWZrrkz: [ 0.00 0.00 ]
+Key: VPMULHRSWrm: [ 0.00 0.00 ]
+Key: VPMULHRSWrr: [ 0.00 0.00 ]
+Key: VPMULHUWYrm: [ 0.00 0.00 ]
+Key: VPMULHUWYrr: [ 0.00 0.00 ]
+Key: VPMULHUWZ: [ 0.00 0.00 ]
+Key: VPMULHUWZrm: [ 0.00 0.00 ]
+Key: VPMULHUWZrmk: [ 0.00 0.00 ]
+Key: VPMULHUWZrmkz: [ 0.00 0.00 ]
+Key: VPMULHUWZrr: [ 0.00 0.00 ]
+Key: VPMULHUWZrrk: [ 0.00 0.00 ]
+Key: VPMULHUWZrrkz: [ 0.00 0.00 ]
+Key: VPMULHUWrm: [ 0.00 0.00 ]
+Key: VPMULHUWrr: [ 0.00 0.00 ]
+Key: VPMULHWYrm: [ 0.00 0.00 ]
+Key: VPMULHWYrr: [ 0.00 0.00 ]
+Key: VPMULHWZ: [ 0.00 0.00 ]
+Key: VPMULHWZrm: [ 0.00 0.00 ]
+Key: VPMULHWZrmk: [ 0.00 0.00 ]
+Key: VPMULHWZrmkz: [ 0.00 0.00 ]
+Key: VPMULHWZrr: [ 0.00 0.00 ]
+Key: VPMULHWZrrk: [ 0.00 0.00 ]
+Key: VPMULHWZrrkz: [ 0.00 0.00 ]
+Key: VPMULHWrm: [ 0.00 0.00 ]
+Key: VPMULHWrr: [ 0.00 0.00 ]
+Key: VPMULLDYrm: [ 0.00 0.00 ]
+Key: VPMULLDYrr: [ 0.00 0.00 ]
+Key: VPMULLDZ: [ 0.00 0.00 ]
+Key: VPMULLDZrm: [ 0.00 0.00 ]
+Key: VPMULLDZrmb: [ 0.00 0.00 ]
+Key: VPMULLDZrmbk: [ 0.00 0.00 ]
+Key: VPMULLDZrmbkz: [ 0.00 0.00 ]
+Key: VPMULLDZrmk: [ 0.00 0.00 ]
+Key: VPMULLDZrmkz: [ 0.00 0.00 ]
+Key: VPMULLDZrr: [ 0.00 0.00 ]
+Key: VPMULLDZrrk: [ 0.00 0.00 ]
+Key: VPMULLDZrrkz: [ 0.00 0.00 ]
+Key: VPMULLDrm: [ 0.00 0.00 ]
+Key: VPMULLDrr: [ 0.00 0.00 ]
+Key: VPMULLQZ: [ 0.00 0.00 ]
+Key: VPMULLQZrm: [ 0.00 0.00 ]
+Key: VPMULLQZrmb: [ 0.00 0.00 ]
+Key: VPMULLQZrmbk: [ 0.00 0.00 ]
+Key: VPMULLQZrmbkz: [ 0.00 0.00 ]
+Key: VPMULLQZrmk: [ 0.00 0.00 ]
+Key: VPMULLQZrmkz: [ 0.00 0.00 ]
+Key: VPMULLQZrr: [ 0.00 0.00 ]
+Key: VPMULLQZrrk: [ 0.00 0.00 ]
+Key: VPMULLQZrrkz: [ 0.00 0.00 ]
+Key: VPMULLWYrm: [ 0.00 0.00 ]
+Key: VPMULLWYrr: [ 0.00 0.00 ]
+Key: VPMULLWZ: [ 0.00 0.00 ]
+Key: VPMULLWZrm: [ 0.00 0.00 ]
+Key: VPMULLWZrmk: [ 0.00 0.00 ]
+Key: VPMULLWZrmkz: [ 0.00 0.00 ]
+Key: VPMULLWZrr: [ 0.00 0.00 ]
+Key: VPMULLWZrrk: [ 0.00 0.00 ]
+Key: VPMULLWZrrkz: [ 0.00 0.00 ]
+Key: VPMULLWrm: [ 0.00 0.00 ]
+Key: VPMULLWrr: [ 0.00 0.00 ]
+Key: VPMULTISHIFTQBZ: [ 0.00 0.00 ]
+Key: VPMULTISHIFTQBZrm: [ 0.00 0.00 ]
+Key: VPMULTISHIFTQBZrmb: [ 0.00 0.00 ]
+Key: VPMULTISHIFTQBZrmbk: [ 0.00 0.00 ]
+Key: VPMULTISHIFTQBZrmbkz: [ 0.00 0.00 ]
+Key: VPMULTISHIFTQBZrmk: [ 0.00 0.00 ]
+Key: VPMULTISHIFTQBZrmkz: [ 0.00 0.00 ]
+Key: VPMULTISHIFTQBZrr: [ 0.00 0.00 ]
+Key: VPMULTISHIFTQBZrrk: [ 0.00 0.00 ]
+Key: VPMULTISHIFTQBZrrkz: [ 0.00 0.00 ]
+Key: VPMULUDQYrm: [ 0.00 0.00 ]
+Key: VPMULUDQYrr: [ 0.00 0.00 ]
+Key: VPMULUDQZ: [ 0.00 0.00 ]
+Key: VPMULUDQZrm: [ 0.00 0.00 ]
+Key: VPMULUDQZrmb: [ 0.00 0.00 ]
+Key: VPMULUDQZrmbk: [ 0.00 0.00 ]
+Key: VPMULUDQZrmbkz: [ 0.00 0.00 ]
+Key: VPMULUDQZrmk: [ 0.00 0.00 ]
+Key: VPMULUDQZrmkz: [ 0.00 0.00 ]
+Key: VPMULUDQZrr: [ 0.00 0.00 ]
+Key: VPMULUDQZrrk: [ 0.00 0.00 ]
+Key: VPMULUDQZrrkz: [ 0.00 0.00 ]
+Key: VPMULUDQrm: [ 0.00 0.00 ]
+Key: VPMULUDQrr: [ 0.00 0.00 ]
+Key: VPOPCNTBZ: [ 0.00 0.00 ]
+Key: VPOPCNTBZrm: [ 0.00 0.00 ]
+Key: VPOPCNTBZrmk: [ 0.00 0.00 ]
+Key: VPOPCNTBZrmkz: [ 0.00 0.00 ]
+Key: VPOPCNTBZrr: [ 0.00 0.00 ]
+Key: VPOPCNTBZrrk: [ 0.00 0.00 ]
+Key: VPOPCNTBZrrkz: [ 0.00 0.00 ]
+Key: VPOPCNTDZ: [ 0.00 0.00 ]
+Key: VPOPCNTDZrm: [ 0.00 0.00 ]
+Key: VPOPCNTDZrmb: [ 0.00 0.00 ]
+Key: VPOPCNTDZrmbk: [ 0.00 0.00 ]
+Key: VPOPCNTDZrmbkz: [ 0.00 0.00 ]
+Key: VPOPCNTDZrmk: [ 0.00 0.00 ]
+Key: VPOPCNTDZrmkz: [ 0.00 0.00 ]
+Key: VPOPCNTDZrr: [ 0.00 0.00 ]
+Key: VPOPCNTDZrrk: [ 0.00 0.00 ]
+Key: VPOPCNTDZrrkz: [ 0.00 0.00 ]
+Key: VPOPCNTQZ: [ 0.00 0.00 ]
+Key: VPOPCNTQZrm: [ 0.00 0.00 ]
+Key: VPOPCNTQZrmb: [ 0.00 0.00 ]
+Key: VPOPCNTQZrmbk: [ 0.00 0.00 ]
+Key: VPOPCNTQZrmbkz: [ 0.00 0.00 ]
+Key: VPOPCNTQZrmk: [ 0.00 0.00 ]
+Key: VPOPCNTQZrmkz: [ 0.00 0.00 ]
+Key: VPOPCNTQZrr: [ 0.00 0.00 ]
+Key: VPOPCNTQZrrk: [ 0.00 0.00 ]
+Key: VPOPCNTQZrrkz: [ 0.00 0.00 ]
+Key: VPOPCNTWZ: [ 0.00 0.00 ]
+Key: VPOPCNTWZrm: [ 0.00 0.00 ]
+Key: VPOPCNTWZrmk: [ 0.00 0.00 ]
+Key: VPOPCNTWZrmkz: [ 0.00 0.00 ]
+Key: VPOPCNTWZrr: [ 0.00 0.00 ]
+Key: VPOPCNTWZrrk: [ 0.00 0.00 ]
+Key: VPOPCNTWZrrkz: [ 0.00 0.00 ]
+Key: VPORDZ: [ 0.00 0.00 ]
+Key: VPORDZrm: [ 0.00 0.00 ]
+Key: VPORDZrmb: [ 0.00 0.00 ]
+Key: VPORDZrmbk: [ 0.00 0.00 ]
+Key: VPORDZrmbkz: [ 0.00 0.00 ]
+Key: VPORDZrmk: [ 0.00 0.00 ]
+Key: VPORDZrmkz: [ 0.00 0.00 ]
+Key: VPORDZrr: [ 0.00 0.00 ]
+Key: VPORDZrrk: [ 0.00 0.00 ]
+Key: VPORDZrrkz: [ 0.00 0.00 ]
+Key: VPORQZ: [ 0.00 0.00 ]
+Key: VPORQZrm: [ 0.00 0.00 ]
+Key: VPORQZrmb: [ 0.00 0.00 ]
+Key: VPORQZrmbk: [ 0.00 0.00 ]
+Key: VPORQZrmbkz: [ 0.00 0.00 ]
+Key: VPORQZrmk: [ 0.00 0.00 ]
+Key: VPORQZrmkz: [ 0.00 0.00 ]
+Key: VPORQZrr: [ 0.00 0.00 ]
+Key: VPORQZrrk: [ 0.00 0.00 ]
+Key: VPORQZrrkz: [ 0.00 0.00 ]
+Key: VPORYrm: [ 0.00 0.00 ]
+Key: VPORYrr: [ 0.00 0.00 ]
+Key: VPORrm: [ 0.00 0.00 ]
+Key: VPORrr: [ 0.00 0.00 ]
+Key: VPPERMrmr: [ 0.00 0.00 ]
+Key: VPPERMrrm: [ 0.00 0.00 ]
+Key: VPPERMrrr: [ 0.00 0.00 ]
+Key: VPPERMrrr_REV: [ 0.00 0.00 ]
+Key: VPROLDZ: [ 0.00 0.00 ]
+Key: VPROLDZmbi: [ 0.00 0.00 ]
+Key: VPROLDZmbik: [ 0.00 0.00 ]
+Key: VPROLDZmbikz: [ 0.00 0.00 ]
+Key: VPROLDZmi: [ 0.00 0.00 ]
+Key: VPROLDZmik: [ 0.00 0.00 ]
+Key: VPROLDZmikz: [ 0.00 0.00 ]
+Key: VPROLDZri: [ 0.00 0.00 ]
+Key: VPROLDZrik: [ 0.00 0.00 ]
+Key: VPROLDZrikz: [ 0.00 0.00 ]
+Key: VPROLQZ: [ 0.00 0.00 ]
+Key: VPROLQZmbi: [ 0.00 0.00 ]
+Key: VPROLQZmbik: [ 0.00 0.00 ]
+Key: VPROLQZmbikz: [ 0.00 0.00 ]
+Key: VPROLQZmi: [ 0.00 0.00 ]
+Key: VPROLQZmik: [ 0.00 0.00 ]
+Key: VPROLQZmikz: [ 0.00 0.00 ]
+Key: VPROLQZri: [ 0.00 0.00 ]
+Key: VPROLQZrik: [ 0.00 0.00 ]
+Key: VPROLQZrikz: [ 0.00 0.00 ]
+Key: VPROLVDZ: [ 0.00 0.00 ]
+Key: VPROLVDZrm: [ 0.00 0.00 ]
+Key: VPROLVDZrmb: [ 0.00 0.00 ]
+Key: VPROLVDZrmbk: [ 0.00 0.00 ]
+Key: VPROLVDZrmbkz: [ 0.00 0.00 ]
+Key: VPROLVDZrmk: [ 0.00 0.00 ]
+Key: VPROLVDZrmkz: [ 0.00 0.00 ]
+Key: VPROLVDZrr: [ 0.00 0.00 ]
+Key: VPROLVDZrrk: [ 0.00 0.00 ]
+Key: VPROLVDZrrkz: [ 0.00 0.00 ]
+Key: VPROLVQZ: [ 0.00 0.00 ]
+Key: VPROLVQZrm: [ 0.00 0.00 ]
+Key: VPROLVQZrmb: [ 0.00 0.00 ]
+Key: VPROLVQZrmbk: [ 0.00 0.00 ]
+Key: VPROLVQZrmbkz: [ 0.00 0.00 ]
+Key: VPROLVQZrmk: [ 0.00 0.00 ]
+Key: VPROLVQZrmkz: [ 0.00 0.00 ]
+Key: VPROLVQZrr: [ 0.00 0.00 ]
+Key: VPROLVQZrrk: [ 0.00 0.00 ]
+Key: VPROLVQZrrkz: [ 0.00 0.00 ]
+Key: VPRORDZ: [ 0.00 0.00 ]
+Key: VPRORDZmbi: [ 0.00 0.00 ]
+Key: VPRORDZmbik: [ 0.00 0.00 ]
+Key: VPRORDZmbikz: [ 0.00 0.00 ]
+Key: VPRORDZmi: [ 0.00 0.00 ]
+Key: VPRORDZmik: [ 0.00 0.00 ]
+Key: VPRORDZmikz: [ 0.00 0.00 ]
+Key: VPRORDZri: [ 0.00 0.00 ]
+Key: VPRORDZrik: [ 0.00 0.00 ]
+Key: VPRORDZrikz: [ 0.00 0.00 ]
+Key: VPRORQZ: [ 0.00 0.00 ]
+Key: VPRORQZmbi: [ 0.00 0.00 ]
+Key: VPRORQZmbik: [ 0.00 0.00 ]
+Key: VPRORQZmbikz: [ 0.00 0.00 ]
+Key: VPRORQZmi: [ 0.00 0.00 ]
+Key: VPRORQZmik: [ 0.00 0.00 ]
+Key: VPRORQZmikz: [ 0.00 0.00 ]
+Key: VPRORQZri: [ 0.00 0.00 ]
+Key: VPRORQZrik: [ 0.00 0.00 ]
+Key: VPRORQZrikz: [ 0.00 0.00 ]
+Key: VPRORVDZ: [ 0.00 0.00 ]
+Key: VPRORVDZrm: [ 0.00 0.00 ]
+Key: VPRORVDZrmb: [ 0.00 0.00 ]
+Key: VPRORVDZrmbk: [ 0.00 0.00 ]
+Key: VPRORVDZrmbkz: [ 0.00 0.00 ]
+Key: VPRORVDZrmk: [ 0.00 0.00 ]
+Key: VPRORVDZrmkz: [ 0.00 0.00 ]
+Key: VPRORVDZrr: [ 0.00 0.00 ]
+Key: VPRORVDZrrk: [ 0.00 0.00 ]
+Key: VPRORVDZrrkz: [ 0.00 0.00 ]
+Key: VPRORVQZ: [ 0.00 0.00 ]
+Key: VPRORVQZrm: [ 0.00 0.00 ]
+Key: VPRORVQZrmb: [ 0.00 0.00 ]
+Key: VPRORVQZrmbk: [ 0.00 0.00 ]
+Key: VPRORVQZrmbkz: [ 0.00 0.00 ]
+Key: VPRORVQZrmk: [ 0.00 0.00 ]
+Key: VPRORVQZrmkz: [ 0.00 0.00 ]
+Key: VPRORVQZrr: [ 0.00 0.00 ]
+Key: VPRORVQZrrk: [ 0.00 0.00 ]
+Key: VPRORVQZrrkz: [ 0.00 0.00 ]
+Key: VPROTBmi: [ 0.00 0.00 ]
+Key: VPROTBmr: [ 0.00 0.00 ]
+Key: VPROTBri: [ 0.00 0.00 ]
+Key: VPROTBrm: [ 0.00 0.00 ]
+Key: VPROTBrr: [ 0.00 0.00 ]
+Key: VPROTBrr_REV: [ 0.00 0.00 ]
+Key: VPROTDmi: [ 0.00 0.00 ]
+Key: VPROTDmr: [ 0.00 0.00 ]
+Key: VPROTDri: [ 0.00 0.00 ]
+Key: VPROTDrm: [ 0.00 0.00 ]
+Key: VPROTDrr: [ 0.00 0.00 ]
+Key: VPROTDrr_REV: [ 0.00 0.00 ]
+Key: VPROTQmi: [ 0.00 0.00 ]
+Key: VPROTQmr: [ 0.00 0.00 ]
+Key: VPROTQri: [ 0.00 0.00 ]
+Key: VPROTQrm: [ 0.00 0.00 ]
+Key: VPROTQrr: [ 0.00 0.00 ]
+Key: VPROTQrr_REV: [ 0.00 0.00 ]
+Key: VPROTWmi: [ 0.00 0.00 ]
+Key: VPROTWmr: [ 0.00 0.00 ]
+Key: VPROTWri: [ 0.00 0.00 ]
+Key: VPROTWrm: [ 0.00 0.00 ]
+Key: VPROTWrr: [ 0.00 0.00 ]
+Key: VPROTWrr_REV: [ 0.00 0.00 ]
+Key: VPSADBWYrm: [ 0.00 0.00 ]
+Key: VPSADBWYrr: [ 0.00 0.00 ]
+Key: VPSADBWZ: [ 0.00 0.00 ]
+Key: VPSADBWZrm: [ 0.00 0.00 ]
+Key: VPSADBWZrr: [ 0.00 0.00 ]
+Key: VPSADBWrm: [ 0.00 0.00 ]
+Key: VPSADBWrr: [ 0.00 0.00 ]
+Key: VPSCATTERDDZ: [ 0.00 0.00 ]
+Key: VPSCATTERDDZmr: [ 0.00 0.00 ]
+Key: VPSCATTERDQZ: [ 0.00 0.00 ]
+Key: VPSCATTERDQZmr: [ 0.00 0.00 ]
+Key: VPSCATTERQDZ: [ 0.00 0.00 ]
+Key: VPSCATTERQDZmr: [ 0.00 0.00 ]
+Key: VPSCATTERQQZ: [ 0.00 0.00 ]
+Key: VPSCATTERQQZmr: [ 0.00 0.00 ]
+Key: VPSHABmr: [ 0.00 0.00 ]
+Key: VPSHABrm: [ 0.00 0.00 ]
+Key: VPSHABrr: [ 0.00 0.00 ]
+Key: VPSHABrr_REV: [ 0.00 0.00 ]
+Key: VPSHADmr: [ 0.00 0.00 ]
+Key: VPSHADrm: [ 0.00 0.00 ]
+Key: VPSHADrr: [ 0.00 0.00 ]
+Key: VPSHADrr_REV: [ 0.00 0.00 ]
+Key: VPSHAQmr: [ 0.00 0.00 ]
+Key: VPSHAQrm: [ 0.00 0.00 ]
+Key: VPSHAQrr: [ 0.00 0.00 ]
+Key: VPSHAQrr_REV: [ 0.00 0.00 ]
+Key: VPSHAWmr: [ 0.00 0.00 ]
+Key: VPSHAWrm: [ 0.00 0.00 ]
+Key: VPSHAWrr: [ 0.00 0.00 ]
+Key: VPSHAWrr_REV: [ 0.00 0.00 ]
+Key: VPSHLBmr: [ 0.00 0.00 ]
+Key: VPSHLBrm: [ 0.00 0.00 ]
+Key: VPSHLBrr: [ 0.00 0.00 ]
+Key: VPSHLBrr_REV: [ 0.00 0.00 ]
+Key: VPSHLDDZ: [ 0.00 0.00 ]
+Key: VPSHLDDZrmbi: [ 0.00 0.00 ]
+Key: VPSHLDDZrmbik: [ 0.00 0.00 ]
+Key: VPSHLDDZrmbikz: [ 0.00 0.00 ]
+Key: VPSHLDDZrmi: [ 0.00 0.00 ]
+Key: VPSHLDDZrmik: [ 0.00 0.00 ]
+Key: VPSHLDDZrmikz: [ 0.00 0.00 ]
+Key: VPSHLDDZrri: [ 0.00 0.00 ]
+Key: VPSHLDDZrrik: [ 0.00 0.00 ]
+Key: VPSHLDDZrrikz: [ 0.00 0.00 ]
+Key: VPSHLDQZ: [ 0.00 0.00 ]
+Key: VPSHLDQZrmbi: [ 0.00 0.00 ]
+Key: VPSHLDQZrmbik: [ 0.00 0.00 ]
+Key: VPSHLDQZrmbikz: [ 0.00 0.00 ]
+Key: VPSHLDQZrmi: [ 0.00 0.00 ]
+Key: VPSHLDQZrmik: [ 0.00 0.00 ]
+Key: VPSHLDQZrmikz: [ 0.00 0.00 ]
+Key: VPSHLDQZrri: [ 0.00 0.00 ]
+Key: VPSHLDQZrrik: [ 0.00 0.00 ]
+Key: VPSHLDQZrrikz: [ 0.00 0.00 ]
+Key: VPSHLDVDZ: [ 0.00 0.00 ]
+Key: VPSHLDVDZm: [ 0.00 0.00 ]
+Key: VPSHLDVDZmb: [ 0.00 0.00 ]
+Key: VPSHLDVDZmbk: [ 0.00 0.00 ]
+Key: VPSHLDVDZmbkz: [ 0.00 0.00 ]
+Key: VPSHLDVDZmk: [ 0.00 0.00 ]
+Key: VPSHLDVDZmkz: [ 0.00 0.00 ]
+Key: VPSHLDVDZr: [ 0.00 0.00 ]
+Key: VPSHLDVDZrk: [ 0.00 0.00 ]
+Key: VPSHLDVDZrkz: [ 0.00 0.00 ]
+Key: VPSHLDVQZ: [ 0.00 0.00 ]
+Key: VPSHLDVQZm: [ 0.00 0.00 ]
+Key: VPSHLDVQZmb: [ 0.00 0.00 ]
+Key: VPSHLDVQZmbk: [ 0.00 0.00 ]
+Key: VPSHLDVQZmbkz: [ 0.00 0.00 ]
+Key: VPSHLDVQZmk: [ 0.00 0.00 ]
+Key: VPSHLDVQZmkz: [ 0.00 0.00 ]
+Key: VPSHLDVQZr: [ 0.00 0.00 ]
+Key: VPSHLDVQZrk: [ 0.00 0.00 ]
+Key: VPSHLDVQZrkz: [ 0.00 0.00 ]
+Key: VPSHLDVWZ: [ 0.00 0.00 ]
+Key: VPSHLDVWZm: [ 0.00 0.00 ]
+Key: VPSHLDVWZmk: [ 0.00 0.00 ]
+Key: VPSHLDVWZmkz: [ 0.00 0.00 ]
+Key: VPSHLDVWZr: [ 0.00 0.00 ]
+Key: VPSHLDVWZrk: [ 0.00 0.00 ]
+Key: VPSHLDVWZrkz: [ 0.00 0.00 ]
+Key: VPSHLDWZ: [ 0.00 0.00 ]
+Key: VPSHLDWZrmi: [ 0.00 0.00 ]
+Key: VPSHLDWZrmik: [ 0.00 0.00 ]
+Key: VPSHLDWZrmikz: [ 0.00 0.00 ]
+Key: VPSHLDWZrri: [ 0.00 0.00 ]
+Key: VPSHLDWZrrik: [ 0.00 0.00 ]
+Key: VPSHLDWZrrikz: [ 0.00 0.00 ]
+Key: VPSHLDmr: [ 0.00 0.00 ]
+Key: VPSHLDrm: [ 0.00 0.00 ]
+Key: VPSHLDrr: [ 0.00 0.00 ]
+Key: VPSHLDrr_REV: [ 0.00 0.00 ]
+Key: VPSHLQmr: [ 0.00 0.00 ]
+Key: VPSHLQrm: [ 0.00 0.00 ]
+Key: VPSHLQrr: [ 0.00 0.00 ]
+Key: VPSHLQrr_REV: [ 0.00 0.00 ]
+Key: VPSHLWmr: [ 0.00 0.00 ]
+Key: VPSHLWrm: [ 0.00 0.00 ]
+Key: VPSHLWrr: [ 0.00 0.00 ]
+Key: VPSHLWrr_REV: [ 0.00 0.00 ]
+Key: VPSHRDDZ: [ 0.00 0.00 ]
+Key: VPSHRDDZrmbi: [ 0.00 0.00 ]
+Key: VPSHRDDZrmbik: [ 0.00 0.00 ]
+Key: VPSHRDDZrmbikz: [ 0.00 0.00 ]
+Key: VPSHRDDZrmi: [ 0.00 0.00 ]
+Key: VPSHRDDZrmik: [ 0.00 0.00 ]
+Key: VPSHRDDZrmikz: [ 0.00 0.00 ]
+Key: VPSHRDDZrri: [ 0.00 0.00 ]
+Key: VPSHRDDZrrik: [ 0.00 0.00 ]
+Key: VPSHRDDZrrikz: [ 0.00 0.00 ]
+Key: VPSHRDQZ: [ 0.00 0.00 ]
+Key: VPSHRDQZrmbi: [ 0.00 0.00 ]
+Key: VPSHRDQZrmbik: [ 0.00 0.00 ]
+Key: VPSHRDQZrmbikz: [ 0.00 0.00 ]
+Key: VPSHRDQZrmi: [ 0.00 0.00 ]
+Key: VPSHRDQZrmik: [ 0.00 0.00 ]
+Key: VPSHRDQZrmikz: [ 0.00 0.00 ]
+Key: VPSHRDQZrri: [ 0.00 0.00 ]
+Key: VPSHRDQZrrik: [ 0.00 0.00 ]
+Key: VPSHRDQZrrikz: [ 0.00 0.00 ]
+Key: VPSHRDVDZ: [ 0.00 0.00 ]
+Key: VPSHRDVDZm: [ 0.00 0.00 ]
+Key: VPSHRDVDZmb: [ 0.00 0.00 ]
+Key: VPSHRDVDZmbk: [ 0.00 0.00 ]
+Key: VPSHRDVDZmbkz: [ 0.00 0.00 ]
+Key: VPSHRDVDZmk: [ 0.00 0.00 ]
+Key: VPSHRDVDZmkz: [ 0.00 0.00 ]
+Key: VPSHRDVDZr: [ 0.00 0.00 ]
+Key: VPSHRDVDZrk: [ 0.00 0.00 ]
+Key: VPSHRDVDZrkz: [ 0.00 0.00 ]
+Key: VPSHRDVQZ: [ 0.00 0.00 ]
+Key: VPSHRDVQZm: [ 0.00 0.00 ]
+Key: VPSHRDVQZmb: [ 0.00 0.00 ]
+Key: VPSHRDVQZmbk: [ 0.00 0.00 ]
+Key: VPSHRDVQZmbkz: [ 0.00 0.00 ]
+Key: VPSHRDVQZmk: [ 0.00 0.00 ]
+Key: VPSHRDVQZmkz: [ 0.00 0.00 ]
+Key: VPSHRDVQZr: [ 0.00 0.00 ]
+Key: VPSHRDVQZrk: [ 0.00 0.00 ]
+Key: VPSHRDVQZrkz: [ 0.00 0.00 ]
+Key: VPSHRDVWZ: [ 0.00 0.00 ]
+Key: VPSHRDVWZm: [ 0.00 0.00 ]
+Key: VPSHRDVWZmk: [ 0.00 0.00 ]
+Key: VPSHRDVWZmkz: [ 0.00 0.00 ]
+Key: VPSHRDVWZr: [ 0.00 0.00 ]
+Key: VPSHRDVWZrk: [ 0.00 0.00 ]
+Key: VPSHRDVWZrkz: [ 0.00 0.00 ]
+Key: VPSHRDWZ: [ 0.00 0.00 ]
+Key: VPSHRDWZrmi: [ 0.00 0.00 ]
+Key: VPSHRDWZrmik: [ 0.00 0.00 ]
+Key: VPSHRDWZrmikz: [ 0.00 0.00 ]
+Key: VPSHRDWZrri: [ 0.00 0.00 ]
+Key: VPSHRDWZrrik: [ 0.00 0.00 ]
+Key: VPSHRDWZrrikz: [ 0.00 0.00 ]
+Key: VPSHUFBITQMBZ: [ 0.00 0.00 ]
+Key: VPSHUFBITQMBZrm: [ 0.00 0.00 ]
+Key: VPSHUFBITQMBZrmk: [ 0.00 0.00 ]
+Key: VPSHUFBITQMBZrr: [ 0.00 0.00 ]
+Key: VPSHUFBITQMBZrrk: [ 0.00 0.00 ]
+Key: VPSHUFBYrm: [ 0.00 0.00 ]
+Key: VPSHUFBYrr: [ 0.00 0.00 ]
+Key: VPSHUFBZ: [ 0.00 0.00 ]
+Key: VPSHUFBZrm: [ 0.00 0.00 ]
+Key: VPSHUFBZrmk: [ 0.00 0.00 ]
+Key: VPSHUFBZrmkz: [ 0.00 0.00 ]
+Key: VPSHUFBZrr: [ 0.00 0.00 ]
+Key: VPSHUFBZrrk: [ 0.00 0.00 ]
+Key: VPSHUFBZrrkz: [ 0.00 0.00 ]
+Key: VPSHUFBrm: [ 0.00 0.00 ]
+Key: VPSHUFBrr: [ 0.00 0.00 ]
+Key: VPSHUFDYmi: [ 0.00 0.00 ]
+Key: VPSHUFDYri: [ 0.00 0.00 ]
+Key: VPSHUFDZ: [ 0.00 0.00 ]
+Key: VPSHUFDZmbi: [ 0.00 0.00 ]
+Key: VPSHUFDZmbik: [ 0.00 0.00 ]
+Key: VPSHUFDZmbikz: [ 0.00 0.00 ]
+Key: VPSHUFDZmi: [ 0.00 0.00 ]
+Key: VPSHUFDZmik: [ 0.00 0.00 ]
+Key: VPSHUFDZmikz: [ 0.00 0.00 ]
+Key: VPSHUFDZri: [ 0.00 0.00 ]
+Key: VPSHUFDZrik: [ 0.00 0.00 ]
+Key: VPSHUFDZrikz: [ 0.00 0.00 ]
+Key: VPSHUFDmi: [ 0.00 0.00 ]
+Key: VPSHUFDri: [ 0.00 0.00 ]
+Key: VPSHUFHWYmi: [ 0.00 0.00 ]
+Key: VPSHUFHWYri: [ 0.00 0.00 ]
+Key: VPSHUFHWZ: [ 0.00 0.00 ]
+Key: VPSHUFHWZmi: [ 0.00 0.00 ]
+Key: VPSHUFHWZmik: [ 0.00 0.00 ]
+Key: VPSHUFHWZmikz: [ 0.00 0.00 ]
+Key: VPSHUFHWZri: [ 0.00 0.00 ]
+Key: VPSHUFHWZrik: [ 0.00 0.00 ]
+Key: VPSHUFHWZrikz: [ 0.00 0.00 ]
+Key: VPSHUFHWmi: [ 0.00 0.00 ]
+Key: VPSHUFHWri: [ 0.00 0.00 ]
+Key: VPSHUFLWYmi: [ 0.00 0.00 ]
+Key: VPSHUFLWYri: [ 0.00 0.00 ]
+Key: VPSHUFLWZ: [ 0.00 0.00 ]
+Key: VPSHUFLWZmi: [ 0.00 0.00 ]
+Key: VPSHUFLWZmik: [ 0.00 0.00 ]
+Key: VPSHUFLWZmikz: [ 0.00 0.00 ]
+Key: VPSHUFLWZri: [ 0.00 0.00 ]
+Key: VPSHUFLWZrik: [ 0.00 0.00 ]
+Key: VPSHUFLWZrikz: [ 0.00 0.00 ]
+Key: VPSHUFLWmi: [ 0.00 0.00 ]
+Key: VPSHUFLWri: [ 0.00 0.00 ]
+Key: VPSIGNBYrm: [ 0.00 0.00 ]
+Key: VPSIGNBYrr: [ 0.00 0.00 ]
+Key: VPSIGNBrm: [ 0.00 0.00 ]
+Key: VPSIGNBrr: [ 0.00 0.00 ]
+Key: VPSIGNDYrm: [ 0.00 0.00 ]
+Key: VPSIGNDYrr: [ 0.00 0.00 ]
+Key: VPSIGNDrm: [ 0.00 0.00 ]
+Key: VPSIGNDrr: [ 0.00 0.00 ]
+Key: VPSIGNWYrm: [ 0.00 0.00 ]
+Key: VPSIGNWYrr: [ 0.00 0.00 ]
+Key: VPSIGNWrm: [ 0.00 0.00 ]
+Key: VPSIGNWrr: [ 0.00 0.00 ]
+Key: VPSLLDQYri: [ 0.00 0.00 ]
+Key: VPSLLDQZ: [ 0.00 0.00 ]
+Key: VPSLLDQZmi: [ 0.00 0.00 ]
+Key: VPSLLDQZri: [ 0.00 0.00 ]
+Key: VPSLLDQri: [ 0.00 0.00 ]
+Key: VPSLLDYri: [ 0.00 0.00 ]
+Key: VPSLLDYrm: [ 0.00 0.00 ]
+Key: VPSLLDYrr: [ 0.00 0.00 ]
+Key: VPSLLDZ: [ 0.00 0.00 ]
+Key: VPSLLDZmbi: [ 0.00 0.00 ]
+Key: VPSLLDZmbik: [ 0.00 0.00 ]
+Key: VPSLLDZmbikz: [ 0.00 0.00 ]
+Key: VPSLLDZmi: [ 0.00 0.00 ]
+Key: VPSLLDZmik: [ 0.00 0.00 ]
+Key: VPSLLDZmikz: [ 0.00 0.00 ]
+Key: VPSLLDZri: [ 0.00 0.00 ]
+Key: VPSLLDZrik: [ 0.00 0.00 ]
+Key: VPSLLDZrikz: [ 0.00 0.00 ]
+Key: VPSLLDZrm: [ 0.00 0.00 ]
+Key: VPSLLDZrmk: [ 0.00 0.00 ]
+Key: VPSLLDZrmkz: [ 0.00 0.00 ]
+Key: VPSLLDZrr: [ 0.00 0.00 ]
+Key: VPSLLDZrrk: [ 0.00 0.00 ]
+Key: VPSLLDZrrkz: [ 0.00 0.00 ]
+Key: VPSLLDri: [ 0.00 0.00 ]
+Key: VPSLLDrm: [ 0.00 0.00 ]
+Key: VPSLLDrr: [ 0.00 0.00 ]
+Key: VPSLLQYri: [ 0.00 0.00 ]
+Key: VPSLLQYrm: [ 0.00 0.00 ]
+Key: VPSLLQYrr: [ 0.00 0.00 ]
+Key: VPSLLQZ: [ 0.00 0.00 ]
+Key: VPSLLQZmbi: [ 0.00 0.00 ]
+Key: VPSLLQZmbik: [ 0.00 0.00 ]
+Key: VPSLLQZmbikz: [ 0.00 0.00 ]
+Key: VPSLLQZmi: [ 0.00 0.00 ]
+Key: VPSLLQZmik: [ 0.00 0.00 ]
+Key: VPSLLQZmikz: [ 0.00 0.00 ]
+Key: VPSLLQZri: [ 0.00 0.00 ]
+Key: VPSLLQZrik: [ 0.00 0.00 ]
+Key: VPSLLQZrikz: [ 0.00 0.00 ]
+Key: VPSLLQZrm: [ 0.00 0.00 ]
+Key: VPSLLQZrmk: [ 0.00 0.00 ]
+Key: VPSLLQZrmkz: [ 0.00 0.00 ]
+Key: VPSLLQZrr: [ 0.00 0.00 ]
+Key: VPSLLQZrrk: [ 0.00 0.00 ]
+Key: VPSLLQZrrkz: [ 0.00 0.00 ]
+Key: VPSLLQri: [ 0.00 0.00 ]
+Key: VPSLLQrm: [ 0.00 0.00 ]
+Key: VPSLLQrr: [ 0.00 0.00 ]
+Key: VPSLLVDYrm: [ 0.00 0.00 ]
+Key: VPSLLVDYrr: [ 0.00 0.00 ]
+Key: VPSLLVDZ: [ 0.00 0.00 ]
+Key: VPSLLVDZrm: [ 0.00 0.00 ]
+Key: VPSLLVDZrmb: [ 0.00 0.00 ]
+Key: VPSLLVDZrmbk: [ 0.00 0.00 ]
+Key: VPSLLVDZrmbkz: [ 0.00 0.00 ]
+Key: VPSLLVDZrmk: [ 0.00 0.00 ]
+Key: VPSLLVDZrmkz: [ 0.00 0.00 ]
+Key: VPSLLVDZrr: [ 0.00 0.00 ]
+Key: VPSLLVDZrrk: [ 0.00 0.00 ]
+Key: VPSLLVDZrrkz: [ 0.00 0.00 ]
+Key: VPSLLVDrm: [ 0.00 0.00 ]
+Key: VPSLLVDrr: [ 0.00 0.00 ]
+Key: VPSLLVQYrm: [ 0.00 0.00 ]
+Key: VPSLLVQYrr: [ 0.00 0.00 ]
+Key: VPSLLVQZ: [ 0.00 0.00 ]
+Key: VPSLLVQZrm: [ 0.00 0.00 ]
+Key: VPSLLVQZrmb: [ 0.00 0.00 ]
+Key: VPSLLVQZrmbk: [ 0.00 0.00 ]
+Key: VPSLLVQZrmbkz: [ 0.00 0.00 ]
+Key: VPSLLVQZrmk: [ 0.00 0.00 ]
+Key: VPSLLVQZrmkz: [ 0.00 0.00 ]
+Key: VPSLLVQZrr: [ 0.00 0.00 ]
+Key: VPSLLVQZrrk: [ 0.00 0.00 ]
+Key: VPSLLVQZrrkz: [ 0.00 0.00 ]
+Key: VPSLLVQrm: [ 0.00 0.00 ]
+Key: VPSLLVQrr: [ 0.00 0.00 ]
+Key: VPSLLVWZ: [ 0.00 0.00 ]
+Key: VPSLLVWZrm: [ 0.00 0.00 ]
+Key: VPSLLVWZrmk: [ 0.00 0.00 ]
+Key: VPSLLVWZrmkz: [ 0.00 0.00 ]
+Key: VPSLLVWZrr: [ 0.00 0.00 ]
+Key: VPSLLVWZrrk: [ 0.00 0.00 ]
+Key: VPSLLVWZrrkz: [ 0.00 0.00 ]
+Key: VPSLLWYri: [ 0.00 0.00 ]
+Key: VPSLLWYrm: [ 0.00 0.00 ]
+Key: VPSLLWYrr: [ 0.00 0.00 ]
+Key: VPSLLWZ: [ 0.00 0.00 ]
+Key: VPSLLWZmi: [ 0.00 0.00 ]
+Key: VPSLLWZmik: [ 0.00 0.00 ]
+Key: VPSLLWZmikz: [ 0.00 0.00 ]
+Key: VPSLLWZri: [ 0.00 0.00 ]
+Key: VPSLLWZrik: [ 0.00 0.00 ]
+Key: VPSLLWZrikz: [ 0.00 0.00 ]
+Key: VPSLLWZrm: [ 0.00 0.00 ]
+Key: VPSLLWZrmk: [ 0.00 0.00 ]
+Key: VPSLLWZrmkz: [ 0.00 0.00 ]
+Key: VPSLLWZrr: [ 0.00 0.00 ]
+Key: VPSLLWZrrk: [ 0.00 0.00 ]
+Key: VPSLLWZrrkz: [ 0.00 0.00 ]
+Key: VPSLLWri: [ 0.00 0.00 ]
+Key: VPSLLWrm: [ 0.00 0.00 ]
+Key: VPSLLWrr: [ 0.00 0.00 ]
+Key: VPSRADYri: [ 0.00 0.00 ]
+Key: VPSRADYrm: [ 0.00 0.00 ]
+Key: VPSRADYrr: [ 0.00 0.00 ]
+Key: VPSRADZ: [ 0.00 0.00 ]
+Key: VPSRADZmbi: [ 0.00 0.00 ]
+Key: VPSRADZmbik: [ 0.00 0.00 ]
+Key: VPSRADZmbikz: [ 0.00 0.00 ]
+Key: VPSRADZmi: [ 0.00 0.00 ]
+Key: VPSRADZmik: [ 0.00 0.00 ]
+Key: VPSRADZmikz: [ 0.00 0.00 ]
+Key: VPSRADZri: [ 0.00 0.00 ]
+Key: VPSRADZrik: [ 0.00 0.00 ]
+Key: VPSRADZrikz: [ 0.00 0.00 ]
+Key: VPSRADZrm: [ 0.00 0.00 ]
+Key: VPSRADZrmk: [ 0.00 0.00 ]
+Key: VPSRADZrmkz: [ 0.00 0.00 ]
+Key: VPSRADZrr: [ 0.00 0.00 ]
+Key: VPSRADZrrk: [ 0.00 0.00 ]
+Key: VPSRADZrrkz: [ 0.00 0.00 ]
+Key: VPSRADri: [ 0.00 0.00 ]
+Key: VPSRADrm: [ 0.00 0.00 ]
+Key: VPSRADrr: [ 0.00 0.00 ]
+Key: VPSRAQZ: [ 0.00 0.00 ]
+Key: VPSRAQZmbi: [ 0.00 0.00 ]
+Key: VPSRAQZmbik: [ 0.00 0.00 ]
+Key: VPSRAQZmbikz: [ 0.00 0.00 ]
+Key: VPSRAQZmi: [ 0.00 0.00 ]
+Key: VPSRAQZmik: [ 0.00 0.00 ]
+Key: VPSRAQZmikz: [ 0.00 0.00 ]
+Key: VPSRAQZri: [ 0.00 0.00 ]
+Key: VPSRAQZrik: [ 0.00 0.00 ]
+Key: VPSRAQZrikz: [ 0.00 0.00 ]
+Key: VPSRAQZrm: [ 0.00 0.00 ]
+Key: VPSRAQZrmk: [ 0.00 0.00 ]
+Key: VPSRAQZrmkz: [ 0.00 0.00 ]
+Key: VPSRAQZrr: [ 0.00 0.00 ]
+Key: VPSRAQZrrk: [ 0.00 0.00 ]
+Key: VPSRAQZrrkz: [ 0.00 0.00 ]
+Key: VPSRAVDYrm: [ 0.00 0.00 ]
+Key: VPSRAVDYrr: [ 0.00 0.00 ]
+Key: VPSRAVDZ: [ 0.00 0.00 ]
+Key: VPSRAVDZrm: [ 0.00 0.00 ]
+Key: VPSRAVDZrmb: [ 0.00 0.00 ]
+Key: VPSRAVDZrmbk: [ 0.00 0.00 ]
+Key: VPSRAVDZrmbkz: [ 0.00 0.00 ]
+Key: VPSRAVDZrmk: [ 0.00 0.00 ]
+Key: VPSRAVDZrmkz: [ 0.00 0.00 ]
+Key: VPSRAVDZrr: [ 0.00 0.00 ]
+Key: VPSRAVDZrrk: [ 0.00 0.00 ]
+Key: VPSRAVDZrrkz: [ 0.00 0.00 ]
+Key: VPSRAVDrm: [ 0.00 0.00 ]
+Key: VPSRAVDrr: [ 0.00 0.00 ]
+Key: VPSRAVQZ: [ 0.00 0.00 ]
+Key: VPSRAVQZrm: [ 0.00 0.00 ]
+Key: VPSRAVQZrmb: [ 0.00 0.00 ]
+Key: VPSRAVQZrmbk: [ 0.00 0.00 ]
+Key: VPSRAVQZrmbkz: [ 0.00 0.00 ]
+Key: VPSRAVQZrmk: [ 0.00 0.00 ]
+Key: VPSRAVQZrmkz: [ 0.00 0.00 ]
+Key: VPSRAVQZrr: [ 0.00 0.00 ]
+Key: VPSRAVQZrrk: [ 0.00 0.00 ]
+Key: VPSRAVQZrrkz: [ 0.00 0.00 ]
+Key: VPSRAVWZ: [ 0.00 0.00 ]
+Key: VPSRAVWZrm: [ 0.00 0.00 ]
+Key: VPSRAVWZrmk: [ 0.00 0.00 ]
+Key: VPSRAVWZrmkz: [ 0.00 0.00 ]
+Key: VPSRAVWZrr: [ 0.00 0.00 ]
+Key: VPSRAVWZrrk: [ 0.00 0.00 ]
+Key: VPSRAVWZrrkz: [ 0.00 0.00 ]
+Key: VPSRAWYri: [ 0.00 0.00 ]
+Key: VPSRAWYrm: [ 0.00 0.00 ]
+Key: VPSRAWYrr: [ 0.00 0.00 ]
+Key: VPSRAWZ: [ 0.00 0.00 ]
+Key: VPSRAWZmi: [ 0.00 0.00 ]
+Key: VPSRAWZmik: [ 0.00 0.00 ]
+Key: VPSRAWZmikz: [ 0.00 0.00 ]
+Key: VPSRAWZri: [ 0.00 0.00 ]
+Key: VPSRAWZrik: [ 0.00 0.00 ]
+Key: VPSRAWZrikz: [ 0.00 0.00 ]
+Key: VPSRAWZrm: [ 0.00 0.00 ]
+Key: VPSRAWZrmk: [ 0.00 0.00 ]
+Key: VPSRAWZrmkz: [ 0.00 0.00 ]
+Key: VPSRAWZrr: [ 0.00 0.00 ]
+Key: VPSRAWZrrk: [ 0.00 0.00 ]
+Key: VPSRAWZrrkz: [ 0.00 0.00 ]
+Key: VPSRAWri: [ 0.00 0.00 ]
+Key: VPSRAWrm: [ 0.00 0.00 ]
+Key: VPSRAWrr: [ 0.00 0.00 ]
+Key: VPSRLDQYri: [ 0.00 0.00 ]
+Key: VPSRLDQZ: [ 0.00 0.00 ]
+Key: VPSRLDQZmi: [ 0.00 0.00 ]
+Key: VPSRLDQZri: [ 0.00 0.00 ]
+Key: VPSRLDQri: [ 0.00 0.00 ]
+Key: VPSRLDYri: [ 0.00 0.00 ]
+Key: VPSRLDYrm: [ 0.00 0.00 ]
+Key: VPSRLDYrr: [ 0.00 0.00 ]
+Key: VPSRLDZ: [ 0.00 0.00 ]
+Key: VPSRLDZmbi: [ 0.00 0.00 ]
+Key: VPSRLDZmbik: [ 0.00 0.00 ]
+Key: VPSRLDZmbikz: [ 0.00 0.00 ]
+Key: VPSRLDZmi: [ 0.00 0.00 ]
+Key: VPSRLDZmik: [ 0.00 0.00 ]
+Key: VPSRLDZmikz: [ 0.00 0.00 ]
+Key: VPSRLDZri: [ 0.00 0.00 ]
+Key: VPSRLDZrik: [ 0.00 0.00 ]
+Key: VPSRLDZrikz: [ 0.00 0.00 ]
+Key: VPSRLDZrm: [ 0.00 0.00 ]
+Key: VPSRLDZrmk: [ 0.00 0.00 ]
+Key: VPSRLDZrmkz: [ 0.00 0.00 ]
+Key: VPSRLDZrr: [ 0.00 0.00 ]
+Key: VPSRLDZrrk: [ 0.00 0.00 ]
+Key: VPSRLDZrrkz: [ 0.00 0.00 ]
+Key: VPSRLDri: [ 0.00 0.00 ]
+Key: VPSRLDrm: [ 0.00 0.00 ]
+Key: VPSRLDrr: [ 0.00 0.00 ]
+Key: VPSRLQYri: [ 0.00 0.00 ]
+Key: VPSRLQYrm: [ 0.00 0.00 ]
+Key: VPSRLQYrr: [ 0.00 0.00 ]
+Key: VPSRLQZ: [ 0.00 0.00 ]
+Key: VPSRLQZmbi: [ 0.00 0.00 ]
+Key: VPSRLQZmbik: [ 0.00 0.00 ]
+Key: VPSRLQZmbikz: [ 0.00 0.00 ]
+Key: VPSRLQZmi: [ 0.00 0.00 ]
+Key: VPSRLQZmik: [ 0.00 0.00 ]
+Key: VPSRLQZmikz: [ 0.00 0.00 ]
+Key: VPSRLQZri: [ 0.00 0.00 ]
+Key: VPSRLQZrik: [ 0.00 0.00 ]
+Key: VPSRLQZrikz: [ 0.00 0.00 ]
+Key: VPSRLQZrm: [ 0.00 0.00 ]
+Key: VPSRLQZrmk: [ 0.00 0.00 ]
+Key: VPSRLQZrmkz: [ 0.00 0.00 ]
+Key: VPSRLQZrr: [ 0.00 0.00 ]
+Key: VPSRLQZrrk: [ 0.00 0.00 ]
+Key: VPSRLQZrrkz: [ 0.00 0.00 ]
+Key: VPSRLQri: [ 0.00 0.00 ]
+Key: VPSRLQrm: [ 0.00 0.00 ]
+Key: VPSRLQrr: [ 0.00 0.00 ]
+Key: VPSRLVDYrm: [ 0.00 0.00 ]
+Key: VPSRLVDYrr: [ 0.00 0.00 ]
+Key: VPSRLVDZ: [ 0.00 0.00 ]
+Key: VPSRLVDZrm: [ 0.00 0.00 ]
+Key: VPSRLVDZrmb: [ 0.00 0.00 ]
+Key: VPSRLVDZrmbk: [ 0.00 0.00 ]
+Key: VPSRLVDZrmbkz: [ 0.00 0.00 ]
+Key: VPSRLVDZrmk: [ 0.00 0.00 ]
+Key: VPSRLVDZrmkz: [ 0.00 0.00 ]
+Key: VPSRLVDZrr: [ 0.00 0.00 ]
+Key: VPSRLVDZrrk: [ 0.00 0.00 ]
+Key: VPSRLVDZrrkz: [ 0.00 0.00 ]
+Key: VPSRLVDrm: [ 0.00 0.00 ]
+Key: VPSRLVDrr: [ 0.00 0.00 ]
+Key: VPSRLVQYrm: [ 0.00 0.00 ]
+Key: VPSRLVQYrr: [ 0.00 0.00 ]
+Key: VPSRLVQZ: [ 0.00 0.00 ]
+Key: VPSRLVQZrm: [ 0.00 0.00 ]
+Key: VPSRLVQZrmb: [ 0.00 0.00 ]
+Key: VPSRLVQZrmbk: [ 0.00 0.00 ]
+Key: VPSRLVQZrmbkz: [ 0.00 0.00 ]
+Key: VPSRLVQZrmk: [ 0.00 0.00 ]
+Key: VPSRLVQZrmkz: [ 0.00 0.00 ]
+Key: VPSRLVQZrr: [ 0.00 0.00 ]
+Key: VPSRLVQZrrk: [ 0.00 0.00 ]
+Key: VPSRLVQZrrkz: [ 0.00 0.00 ]
+Key: VPSRLVQrm: [ 0.00 0.00 ]
+Key: VPSRLVQrr: [ 0.00 0.00 ]
+Key: VPSRLVWZ: [ 0.00 0.00 ]
+Key: VPSRLVWZrm: [ 0.00 0.00 ]
+Key: VPSRLVWZrmk: [ 0.00 0.00 ]
+Key: VPSRLVWZrmkz: [ 0.00 0.00 ]
+Key: VPSRLVWZrr: [ 0.00 0.00 ]
+Key: VPSRLVWZrrk: [ 0.00 0.00 ]
+Key: VPSRLVWZrrkz: [ 0.00 0.00 ]
+Key: VPSRLWYri: [ 0.00 0.00 ]
+Key: VPSRLWYrm: [ 0.00 0.00 ]
+Key: VPSRLWYrr: [ 0.00 0.00 ]
+Key: VPSRLWZ: [ 0.00 0.00 ]
+Key: VPSRLWZmi: [ 0.00 0.00 ]
+Key: VPSRLWZmik: [ 0.00 0.00 ]
+Key: VPSRLWZmikz: [ 0.00 0.00 ]
+Key: VPSRLWZri: [ 0.00 0.00 ]
+Key: VPSRLWZrik: [ 0.00 0.00 ]
+Key: VPSRLWZrikz: [ 0.00 0.00 ]
+Key: VPSRLWZrm: [ 0.00 0.00 ]
+Key: VPSRLWZrmk: [ 0.00 0.00 ]
+Key: VPSRLWZrmkz: [ 0.00 0.00 ]
+Key: VPSRLWZrr: [ 0.00 0.00 ]
+Key: VPSRLWZrrk: [ 0.00 0.00 ]
+Key: VPSRLWZrrkz: [ 0.00 0.00 ]
+Key: VPSRLWri: [ 0.00 0.00 ]
+Key: VPSRLWrm: [ 0.00 0.00 ]
+Key: VPSRLWrr: [ 0.00 0.00 ]
+Key: VPSUBBYrm: [ 0.00 0.00 ]
+Key: VPSUBBYrr: [ 0.00 0.00 ]
+Key: VPSUBBZ: [ 0.00 0.00 ]
+Key: VPSUBBZrm: [ 0.00 0.00 ]
+Key: VPSUBBZrmk: [ 0.00 0.00 ]
+Key: VPSUBBZrmkz: [ 0.00 0.00 ]
+Key: VPSUBBZrr: [ 0.00 0.00 ]
+Key: VPSUBBZrrk: [ 0.00 0.00 ]
+Key: VPSUBBZrrkz: [ 0.00 0.00 ]
+Key: VPSUBBrm: [ 0.00 0.00 ]
+Key: VPSUBBrr: [ 0.00 0.00 ]
+Key: VPSUBDYrm: [ 0.00 0.00 ]
+Key: VPSUBDYrr: [ 0.00 0.00 ]
+Key: VPSUBDZ: [ 0.00 0.00 ]
+Key: VPSUBDZrm: [ 0.00 0.00 ]
+Key: VPSUBDZrmb: [ 0.00 0.00 ]
+Key: VPSUBDZrmbk: [ 0.00 0.00 ]
+Key: VPSUBDZrmbkz: [ 0.00 0.00 ]
+Key: VPSUBDZrmk: [ 0.00 0.00 ]
+Key: VPSUBDZrmkz: [ 0.00 0.00 ]
+Key: VPSUBDZrr: [ 0.00 0.00 ]
+Key: VPSUBDZrrk: [ 0.00 0.00 ]
+Key: VPSUBDZrrkz: [ 0.00 0.00 ]
+Key: VPSUBDrm: [ 0.00 0.00 ]
+Key: VPSUBDrr: [ 0.00 0.00 ]
+Key: VPSUBQYrm: [ 0.00 0.00 ]
+Key: VPSUBQYrr: [ 0.00 0.00 ]
+Key: VPSUBQZ: [ 0.00 0.00 ]
+Key: VPSUBQZrm: [ 0.00 0.00 ]
+Key: VPSUBQZrmb: [ 0.00 0.00 ]
+Key: VPSUBQZrmbk: [ 0.00 0.00 ]
+Key: VPSUBQZrmbkz: [ 0.00 0.00 ]
+Key: VPSUBQZrmk: [ 0.00 0.00 ]
+Key: VPSUBQZrmkz: [ 0.00 0.00 ]
+Key: VPSUBQZrr: [ 0.00 0.00 ]
+Key: VPSUBQZrrk: [ 0.00 0.00 ]
+Key: VPSUBQZrrkz: [ 0.00 0.00 ]
+Key: VPSUBQrm: [ 0.00 0.00 ]
+Key: VPSUBQrr: [ 0.00 0.00 ]
+Key: VPSUBSBYrm: [ 0.00 0.00 ]
+Key: VPSUBSBYrr: [ 0.00 0.00 ]
+Key: VPSUBSBZ: [ 0.00 0.00 ]
+Key: VPSUBSBZrm: [ 0.00 0.00 ]
+Key: VPSUBSBZrmk: [ 0.00 0.00 ]
+Key: VPSUBSBZrmkz: [ 0.00 0.00 ]
+Key: VPSUBSBZrr: [ 0.00 0.00 ]
+Key: VPSUBSBZrrk: [ 0.00 0.00 ]
+Key: VPSUBSBZrrkz: [ 0.00 0.00 ]
+Key: VPSUBSBrm: [ 0.00 0.00 ]
+Key: VPSUBSBrr: [ 0.00 0.00 ]
+Key: VPSUBSWYrm: [ 0.00 0.00 ]
+Key: VPSUBSWYrr: [ 0.00 0.00 ]
+Key: VPSUBSWZ: [ 0.00 0.00 ]
+Key: VPSUBSWZrm: [ 0.00 0.00 ]
+Key: VPSUBSWZrmk: [ 0.00 0.00 ]
+Key: VPSUBSWZrmkz: [ 0.00 0.00 ]
+Key: VPSUBSWZrr: [ 0.00 0.00 ]
+Key: VPSUBSWZrrk: [ 0.00 0.00 ]
+Key: VPSUBSWZrrkz: [ 0.00 0.00 ]
+Key: VPSUBSWrm: [ 0.00 0.00 ]
+Key: VPSUBSWrr: [ 0.00 0.00 ]
+Key: VPSUBUSBYrm: [ 0.00 0.00 ]
+Key: VPSUBUSBYrr: [ 0.00 0.00 ]
+Key: VPSUBUSBZ: [ 0.00 0.00 ]
+Key: VPSUBUSBZrm: [ 0.00 0.00 ]
+Key: VPSUBUSBZrmk: [ 0.00 0.00 ]
+Key: VPSUBUSBZrmkz: [ 0.00 0.00 ]
+Key: VPSUBUSBZrr: [ 0.00 0.00 ]
+Key: VPSUBUSBZrrk: [ 0.00 0.00 ]
+Key: VPSUBUSBZrrkz: [ 0.00 0.00 ]
+Key: VPSUBUSBrm: [ 0.00 0.00 ]
+Key: VPSUBUSBrr: [ 0.00 0.00 ]
+Key: VPSUBUSWYrm: [ 0.00 0.00 ]
+Key: VPSUBUSWYrr: [ 0.00 0.00 ]
+Key: VPSUBUSWZ: [ 0.00 0.00 ]
+Key: VPSUBUSWZrm: [ 0.00 0.00 ]
+Key: VPSUBUSWZrmk: [ 0.00 0.00 ]
+Key: VPSUBUSWZrmkz: [ 0.00 0.00 ]
+Key: VPSUBUSWZrr: [ 0.00 0.00 ]
+Key: VPSUBUSWZrrk: [ 0.00 0.00 ]
+Key: VPSUBUSWZrrkz: [ 0.00 0.00 ]
+Key: VPSUBUSWrm: [ 0.00 0.00 ]
+Key: VPSUBUSWrr: [ 0.00 0.00 ]
+Key: VPSUBWYrm: [ 0.00 0.00 ]
+Key: VPSUBWYrr: [ 0.00 0.00 ]
+Key: VPSUBWZ: [ 0.00 0.00 ]
+Key: VPSUBWZrm: [ 0.00 0.00 ]
+Key: VPSUBWZrmk: [ 0.00 0.00 ]
+Key: VPSUBWZrmkz: [ 0.00 0.00 ]
+Key: VPSUBWZrr: [ 0.00 0.00 ]
+Key: VPSUBWZrrk: [ 0.00 0.00 ]
+Key: VPSUBWZrrkz: [ 0.00 0.00 ]
+Key: VPSUBWrm: [ 0.00 0.00 ]
+Key: VPSUBWrr: [ 0.00 0.00 ]
+Key: VPTERNLOGDZ: [ 0.00 0.00 ]
+Key: VPTERNLOGDZrmbi: [ 0.00 0.00 ]
+Key: VPTERNLOGDZrmbik: [ 0.00 0.00 ]
+Key: VPTERNLOGDZrmbikz: [ 0.00 0.00 ]
+Key: VPTERNLOGDZrmi: [ 0.00 0.00 ]
+Key: VPTERNLOGDZrmik: [ 0.00 0.00 ]
+Key: VPTERNLOGDZrmikz: [ 0.00 0.00 ]
+Key: VPTERNLOGDZrri: [ 0.00 0.00 ]
+Key: VPTERNLOGDZrrik: [ 0.00 0.00 ]
+Key: VPTERNLOGDZrrikz: [ 0.00 0.00 ]
+Key: VPTERNLOGQZ: [ 0.00 0.00 ]
+Key: VPTERNLOGQZrmbi: [ 0.00 0.00 ]
+Key: VPTERNLOGQZrmbik: [ 0.00 0.00 ]
+Key: VPTERNLOGQZrmbikz: [ 0.00 0.00 ]
+Key: VPTERNLOGQZrmi: [ 0.00 0.00 ]
+Key: VPTERNLOGQZrmik: [ 0.00 0.00 ]
+Key: VPTERNLOGQZrmikz: [ 0.00 0.00 ]
+Key: VPTERNLOGQZrri: [ 0.00 0.00 ]
+Key: VPTERNLOGQZrrik: [ 0.00 0.00 ]
+Key: VPTERNLOGQZrrikz: [ 0.00 0.00 ]
+Key: VPTESTMBZ: [ 0.00 0.00 ]
+Key: VPTESTMBZrm: [ 0.00 0.00 ]
+Key: VPTESTMBZrmk: [ 0.00 0.00 ]
+Key: VPTESTMBZrr: [ 0.00 0.00 ]
+Key: VPTESTMBZrrk: [ 0.00 0.00 ]
+Key: VPTESTMDZ: [ 0.00 0.00 ]
+Key: VPTESTMDZrm: [ 0.00 0.00 ]
+Key: VPTESTMDZrmb: [ 0.00 0.00 ]
+Key: VPTESTMDZrmbk: [ 0.00 0.00 ]
+Key: VPTESTMDZrmk: [ 0.00 0.00 ]
+Key: VPTESTMDZrr: [ 0.00 0.00 ]
+Key: VPTESTMDZrrk: [ 0.00 0.00 ]
+Key: VPTESTMQZ: [ 0.00 0.00 ]
+Key: VPTESTMQZrm: [ 0.00 0.00 ]
+Key: VPTESTMQZrmb: [ 0.00 0.00 ]
+Key: VPTESTMQZrmbk: [ 0.00 0.00 ]
+Key: VPTESTMQZrmk: [ 0.00 0.00 ]
+Key: VPTESTMQZrr: [ 0.00 0.00 ]
+Key: VPTESTMQZrrk: [ 0.00 0.00 ]
+Key: VPTESTMWZ: [ 0.00 0.00 ]
+Key: VPTESTMWZrm: [ 0.00 0.00 ]
+Key: VPTESTMWZrmk: [ 0.00 0.00 ]
+Key: VPTESTMWZrr: [ 0.00 0.00 ]
+Key: VPTESTMWZrrk: [ 0.00 0.00 ]
+Key: VPTESTNMBZ: [ 0.00 0.00 ]
+Key: VPTESTNMBZrm: [ 0.00 0.00 ]
+Key: VPTESTNMBZrmk: [ 0.00 0.00 ]
+Key: VPTESTNMBZrr: [ 0.00 0.00 ]
+Key: VPTESTNMBZrrk: [ 0.00 0.00 ]
+Key: VPTESTNMDZ: [ 0.00 0.00 ]
+Key: VPTESTNMDZrm: [ 0.00 0.00 ]
+Key: VPTESTNMDZrmb: [ 0.00 0.00 ]
+Key: VPTESTNMDZrmbk: [ 0.00 0.00 ]
+Key: VPTESTNMDZrmk: [ 0.00 0.00 ]
+Key: VPTESTNMDZrr: [ 0.00 0.00 ]
+Key: VPTESTNMDZrrk: [ 0.00 0.00 ]
+Key: VPTESTNMQZ: [ 0.00 0.00 ]
+Key: VPTESTNMQZrm: [ 0.00 0.00 ]
+Key: VPTESTNMQZrmb: [ 0.00 0.00 ]
+Key: VPTESTNMQZrmbk: [ 0.00 0.00 ]
+Key: VPTESTNMQZrmk: [ 0.00 0.00 ]
+Key: VPTESTNMQZrr: [ 0.00 0.00 ]
+Key: VPTESTNMQZrrk: [ 0.00 0.00 ]
+Key: VPTESTNMWZ: [ 0.00 0.00 ]
+Key: VPTESTNMWZrm: [ 0.00 0.00 ]
+Key: VPTESTNMWZrmk: [ 0.00 0.00 ]
+Key: VPTESTNMWZrr: [ 0.00 0.00 ]
+Key: VPTESTNMWZrrk: [ 0.00 0.00 ]
+Key: VPTESTYrm: [ 0.00 0.00 ]
+Key: VPTESTYrr: [ 0.00 0.00 ]
+Key: VPTESTrm: [ 0.00 0.00 ]
+Key: VPTESTrr: [ 0.00 0.00 ]
+Key: VPUNPCKHBWYrm: [ 0.00 0.00 ]
+Key: VPUNPCKHBWYrr: [ 0.00 0.00 ]
+Key: VPUNPCKHBWZ: [ 0.00 0.00 ]
+Key: VPUNPCKHBWZrm: [ 0.00 0.00 ]
+Key: VPUNPCKHBWZrmk: [ 0.00 0.00 ]
+Key: VPUNPCKHBWZrmkz: [ 0.00 0.00 ]
+Key: VPUNPCKHBWZrr: [ 0.00 0.00 ]
+Key: VPUNPCKHBWZrrk: [ 0.00 0.00 ]
+Key: VPUNPCKHBWZrrkz: [ 0.00 0.00 ]
+Key: VPUNPCKHBWrm: [ 0.00 0.00 ]
+Key: VPUNPCKHBWrr: [ 0.00 0.00 ]
+Key: VPUNPCKHDQYrm: [ 0.00 0.00 ]
+Key: VPUNPCKHDQYrr: [ 0.00 0.00 ]
+Key: VPUNPCKHDQZ: [ 0.00 0.00 ]
+Key: VPUNPCKHDQZrm: [ 0.00 0.00 ]
+Key: VPUNPCKHDQZrmb: [ 0.00 0.00 ]
+Key: VPUNPCKHDQZrmbk: [ 0.00 0.00 ]
+Key: VPUNPCKHDQZrmbkz: [ 0.00 0.00 ]
+Key: VPUNPCKHDQZrmk: [ 0.00 0.00 ]
+Key: VPUNPCKHDQZrmkz: [ 0.00 0.00 ]
+Key: VPUNPCKHDQZrr: [ 0.00 0.00 ]
+Key: VPUNPCKHDQZrrk: [ 0.00 0.00 ]
+Key: VPUNPCKHDQZrrkz: [ 0.00 0.00 ]
+Key: VPUNPCKHDQrm: [ 0.00 0.00 ]
+Key: VPUNPCKHDQrr: [ 0.00 0.00 ]
+Key: VPUNPCKHQDQYrm: [ 0.00 0.00 ]
+Key: VPUNPCKHQDQYrr: [ 0.00 0.00 ]
+Key: VPUNPCKHQDQZ: [ 0.00 0.00 ]
+Key: VPUNPCKHQDQZrm: [ 0.00 0.00 ]
+Key: VPUNPCKHQDQZrmb: [ 0.00 0.00 ]
+Key: VPUNPCKHQDQZrmbk: [ 0.00 0.00 ]
+Key: VPUNPCKHQDQZrmbkz: [ 0.00 0.00 ]
+Key: VPUNPCKHQDQZrmk: [ 0.00 0.00 ]
+Key: VPUNPCKHQDQZrmkz: [ 0.00 0.00 ]
+Key: VPUNPCKHQDQZrr: [ 0.00 0.00 ]
+Key: VPUNPCKHQDQZrrk: [ 0.00 0.00 ]
+Key: VPUNPCKHQDQZrrkz: [ 0.00 0.00 ]
+Key: VPUNPCKHQDQrm: [ 0.00 0.00 ]
+Key: VPUNPCKHQDQrr: [ 0.00 0.00 ]
+Key: VPUNPCKHWDYrm: [ 0.00 0.00 ]
+Key: VPUNPCKHWDYrr: [ 0.00 0.00 ]
+Key: VPUNPCKHWDZ: [ 0.00 0.00 ]
+Key: VPUNPCKHWDZrm: [ 0.00 0.00 ]
+Key: VPUNPCKHWDZrmk: [ 0.00 0.00 ]
+Key: VPUNPCKHWDZrmkz: [ 0.00 0.00 ]
+Key: VPUNPCKHWDZrr: [ 0.00 0.00 ]
+Key: VPUNPCKHWDZrrk: [ 0.00 0.00 ]
+Key: VPUNPCKHWDZrrkz: [ 0.00 0.00 ]
+Key: VPUNPCKHWDrm: [ 0.00 0.00 ]
+Key: VPUNPCKHWDrr: [ 0.00 0.00 ]
+Key: VPUNPCKLBWYrm: [ 0.00 0.00 ]
+Key: VPUNPCKLBWYrr: [ 0.00 0.00 ]
+Key: VPUNPCKLBWZ: [ 0.00 0.00 ]
+Key: VPUNPCKLBWZrm: [ 0.00 0.00 ]
+Key: VPUNPCKLBWZrmk: [ 0.00 0.00 ]
+Key: VPUNPCKLBWZrmkz: [ 0.00 0.00 ]
+Key: VPUNPCKLBWZrr: [ 0.00 0.00 ]
+Key: VPUNPCKLBWZrrk: [ 0.00 0.00 ]
+Key: VPUNPCKLBWZrrkz: [ 0.00 0.00 ]
+Key: VPUNPCKLBWrm: [ 0.00 0.00 ]
+Key: VPUNPCKLBWrr: [ 0.00 0.00 ]
+Key: VPUNPCKLDQYrm: [ 0.00 0.00 ]
+Key: VPUNPCKLDQYrr: [ 0.00 0.00 ]
+Key: VPUNPCKLDQZ: [ 0.00 0.00 ]
+Key: VPUNPCKLDQZrm: [ 0.00 0.00 ]
+Key: VPUNPCKLDQZrmb: [ 0.00 0.00 ]
+Key: VPUNPCKLDQZrmbk: [ 0.00 0.00 ]
+Key: VPUNPCKLDQZrmbkz: [ 0.00 0.00 ]
+Key: VPUNPCKLDQZrmk: [ 0.00 0.00 ]
+Key: VPUNPCKLDQZrmkz: [ 0.00 0.00 ]
+Key: VPUNPCKLDQZrr: [ 0.00 0.00 ]
+Key: VPUNPCKLDQZrrk: [ 0.00 0.00 ]
+Key: VPUNPCKLDQZrrkz: [ 0.00 0.00 ]
+Key: VPUNPCKLDQrm: [ 0.00 0.00 ]
+Key: VPUNPCKLDQrr: [ 0.00 0.00 ]
+Key: VPUNPCKLQDQYrm: [ 0.00 0.00 ]
+Key: VPUNPCKLQDQYrr: [ 0.00 0.00 ]
+Key: VPUNPCKLQDQZ: [ 0.00 0.00 ]
+Key: VPUNPCKLQDQZrm: [ 0.00 0.00 ]
+Key: VPUNPCKLQDQZrmb: [ 0.00 0.00 ]
+Key: VPUNPCKLQDQZrmbk: [ 0.00 0.00 ]
+Key: VPUNPCKLQDQZrmbkz: [ 0.00 0.00 ]
+Key: VPUNPCKLQDQZrmk: [ 0.00 0.00 ]
+Key: VPUNPCKLQDQZrmkz: [ 0.00 0.00 ]
+Key: VPUNPCKLQDQZrr: [ 0.00 0.00 ]
+Key: VPUNPCKLQDQZrrk: [ 0.00 0.00 ]
+Key: VPUNPCKLQDQZrrkz: [ 0.00 0.00 ]
+Key: VPUNPCKLQDQrm: [ 0.00 0.00 ]
+Key: VPUNPCKLQDQrr: [ 0.00 0.00 ]
+Key: VPUNPCKLWDYrm: [ 0.00 0.00 ]
+Key: VPUNPCKLWDYrr: [ 0.00 0.00 ]
+Key: VPUNPCKLWDZ: [ 0.00 0.00 ]
+Key: VPUNPCKLWDZrm: [ 0.00 0.00 ]
+Key: VPUNPCKLWDZrmk: [ 0.00 0.00 ]
+Key: VPUNPCKLWDZrmkz: [ 0.00 0.00 ]
+Key: VPUNPCKLWDZrr: [ 0.00 0.00 ]
+Key: VPUNPCKLWDZrrk: [ 0.00 0.00 ]
+Key: VPUNPCKLWDZrrkz: [ 0.00 0.00 ]
+Key: VPUNPCKLWDrm: [ 0.00 0.00 ]
+Key: VPUNPCKLWDrr: [ 0.00 0.00 ]
+Key: VPXORDZ: [ 0.00 0.00 ]
+Key: VPXORDZrm: [ 0.00 0.00 ]
+Key: VPXORDZrmb: [ 0.00 0.00 ]
+Key: VPXORDZrmbk: [ 0.00 0.00 ]
+Key: VPXORDZrmbkz: [ 0.00 0.00 ]
+Key: VPXORDZrmk: [ 0.00 0.00 ]
+Key: VPXORDZrmkz: [ 0.00 0.00 ]
+Key: VPXORDZrr: [ 0.00 0.00 ]
+Key: VPXORDZrrk: [ 0.00 0.00 ]
+Key: VPXORDZrrkz: [ 0.00 0.00 ]
+Key: VPXORQZ: [ 0.00 0.00 ]
+Key: VPXORQZrm: [ 0.00 0.00 ]
+Key: VPXORQZrmb: [ 0.00 0.00 ]
+Key: VPXORQZrmbk: [ 0.00 0.00 ]
+Key: VPXORQZrmbkz: [ 0.00 0.00 ]
+Key: VPXORQZrmk: [ 0.00 0.00 ]
+Key: VPXORQZrmkz: [ 0.00 0.00 ]
+Key: VPXORQZrr: [ 0.00 0.00 ]
+Key: VPXORQZrrk: [ 0.00 0.00 ]
+Key: VPXORQZrrkz: [ 0.00 0.00 ]
+Key: VPXORYrm: [ 0.00 0.00 ]
+Key: VPXORYrr: [ 0.00 0.00 ]
+Key: VPXORrm: [ 0.00 0.00 ]
+Key: VPXORrr: [ 0.00 0.00 ]
+Key: VRANGEPDZ: [ 0.00 0.00 ]
+Key: VRANGEPDZrmbi: [ 0.00 0.00 ]
+Key: VRANGEPDZrmbik: [ 0.00 0.00 ]
+Key: VRANGEPDZrmbikz: [ 0.00 0.00 ]
+Key: VRANGEPDZrmi: [ 0.00 0.00 ]
+Key: VRANGEPDZrmik: [ 0.00 0.00 ]
+Key: VRANGEPDZrmikz: [ 0.00 0.00 ]
+Key: VRANGEPDZrri: [ 0.00 0.00 ]
+Key: VRANGEPDZrrib: [ 0.00 0.00 ]
+Key: VRANGEPDZrribk: [ 0.00 0.00 ]
+Key: VRANGEPDZrribkz: [ 0.00 0.00 ]
+Key: VRANGEPDZrrik: [ 0.00 0.00 ]
+Key: VRANGEPDZrrikz: [ 0.00 0.00 ]
+Key: VRANGEPSZ: [ 0.00 0.00 ]
+Key: VRANGEPSZrmbi: [ 0.00 0.00 ]
+Key: VRANGEPSZrmbik: [ 0.00 0.00 ]
+Key: VRANGEPSZrmbikz: [ 0.00 0.00 ]
+Key: VRANGEPSZrmi: [ 0.00 0.00 ]
+Key: VRANGEPSZrmik: [ 0.00 0.00 ]
+Key: VRANGEPSZrmikz: [ 0.00 0.00 ]
+Key: VRANGEPSZrri: [ 0.00 0.00 ]
+Key: VRANGEPSZrrib: [ 0.00 0.00 ]
+Key: VRANGEPSZrribk: [ 0.00 0.00 ]
+Key: VRANGEPSZrribkz: [ 0.00 0.00 ]
+Key: VRANGEPSZrrik: [ 0.00 0.00 ]
+Key: VRANGEPSZrrikz: [ 0.00 0.00 ]
+Key: VRANGESDZrmi: [ 0.00 0.00 ]
+Key: VRANGESDZrmik: [ 0.00 0.00 ]
+Key: VRANGESDZrmikz: [ 0.00 0.00 ]
+Key: VRANGESDZrri: [ 0.00 0.00 ]
+Key: VRANGESDZrrib: [ 0.00 0.00 ]
+Key: VRANGESDZrribk: [ 0.00 0.00 ]
+Key: VRANGESDZrribkz: [ 0.00 0.00 ]
+Key: VRANGESDZrrik: [ 0.00 0.00 ]
+Key: VRANGESDZrrikz: [ 0.00 0.00 ]
+Key: VRANGESSZrmi: [ 0.00 0.00 ]
+Key: VRANGESSZrmik: [ 0.00 0.00 ]
+Key: VRANGESSZrmikz: [ 0.00 0.00 ]
+Key: VRANGESSZrri: [ 0.00 0.00 ]
+Key: VRANGESSZrrib: [ 0.00 0.00 ]
+Key: VRANGESSZrribk: [ 0.00 0.00 ]
+Key: VRANGESSZrribkz: [ 0.00 0.00 ]
+Key: VRANGESSZrrik: [ 0.00 0.00 ]
+Key: VRANGESSZrrikz: [ 0.00 0.00 ]
+Key: VRCP: [ 0.00 0.00 ]
+Key: VRCPBF: [ 0.00 0.00 ]
+Key: VRCPPHZ: [ 0.00 0.00 ]
+Key: VRCPPHZm: [ 0.00 0.00 ]
+Key: VRCPPHZmb: [ 0.00 0.00 ]
+Key: VRCPPHZmbk: [ 0.00 0.00 ]
+Key: VRCPPHZmbkz: [ 0.00 0.00 ]
+Key: VRCPPHZmk: [ 0.00 0.00 ]
+Key: VRCPPHZmkz: [ 0.00 0.00 ]
+Key: VRCPPHZr: [ 0.00 0.00 ]
+Key: VRCPPHZrk: [ 0.00 0.00 ]
+Key: VRCPPHZrkz: [ 0.00 0.00 ]
+Key: VRCPPSYm: [ 0.00 0.00 ]
+Key: VRCPPSYr: [ 0.00 0.00 ]
+Key: VRCPPSm: [ 0.00 0.00 ]
+Key: VRCPPSr: [ 0.00 0.00 ]
+Key: VRCPSHZrm: [ 0.00 0.00 ]
+Key: VRCPSHZrmk: [ 0.00 0.00 ]
+Key: VRCPSHZrmkz: [ 0.00 0.00 ]
+Key: VRCPSHZrr: [ 0.00 0.00 ]
+Key: VRCPSHZrrk: [ 0.00 0.00 ]
+Key: VRCPSHZrrkz: [ 0.00 0.00 ]
+Key: VRCPSSm: [ 0.00 0.00 ]
+Key: VRCPSSm_Int: [ 0.00 0.00 ]
+Key: VRCPSSr: [ 0.00 0.00 ]
+Key: VRCPSSr_Int: [ 0.00 0.00 ]
+Key: VREDUCEBF: [ 0.00 0.00 ]
+Key: VREDUCEPDZ: [ 0.00 0.00 ]
+Key: VREDUCEPDZrmbi: [ 0.00 0.00 ]
+Key: VREDUCEPDZrmbik: [ 0.00 0.00 ]
+Key: VREDUCEPDZrmbikz: [ 0.00 0.00 ]
+Key: VREDUCEPDZrmi: [ 0.00 0.00 ]
+Key: VREDUCEPDZrmik: [ 0.00 0.00 ]
+Key: VREDUCEPDZrmikz: [ 0.00 0.00 ]
+Key: VREDUCEPDZrri: [ 0.00 0.00 ]
+Key: VREDUCEPDZrrib: [ 0.00 0.00 ]
+Key: VREDUCEPDZrribk: [ 0.00 0.00 ]
+Key: VREDUCEPDZrribkz: [ 0.00 0.00 ]
+Key: VREDUCEPDZrrik: [ 0.00 0.00 ]
+Key: VREDUCEPDZrrikz: [ 0.00 0.00 ]
+Key: VREDUCEPHZ: [ 0.00 0.00 ]
+Key: VREDUCEPHZrmbi: [ 0.00 0.00 ]
+Key: VREDUCEPHZrmbik: [ 0.00 0.00 ]
+Key: VREDUCEPHZrmbikz: [ 0.00 0.00 ]
+Key: VREDUCEPHZrmi: [ 0.00 0.00 ]
+Key: VREDUCEPHZrmik: [ 0.00 0.00 ]
+Key: VREDUCEPHZrmikz: [ 0.00 0.00 ]
+Key: VREDUCEPHZrri: [ 0.00 0.00 ]
+Key: VREDUCEPHZrrib: [ 0.00 0.00 ]
+Key: VREDUCEPHZrribk: [ 0.00 0.00 ]
+Key: VREDUCEPHZrribkz: [ 0.00 0.00 ]
+Key: VREDUCEPHZrrik: [ 0.00 0.00 ]
+Key: VREDUCEPHZrrikz: [ 0.00 0.00 ]
+Key: VREDUCEPSZ: [ 0.00 0.00 ]
+Key: VREDUCEPSZrmbi: [ 0.00 0.00 ]
+Key: VREDUCEPSZrmbik: [ 0.00 0.00 ]
+Key: VREDUCEPSZrmbikz: [ 0.00 0.00 ]
+Key: VREDUCEPSZrmi: [ 0.00 0.00 ]
+Key: VREDUCEPSZrmik: [ 0.00 0.00 ]
+Key: VREDUCEPSZrmikz: [ 0.00 0.00 ]
+Key: VREDUCEPSZrri: [ 0.00 0.00 ]
+Key: VREDUCEPSZrrib: [ 0.00 0.00 ]
+Key: VREDUCEPSZrribk: [ 0.00 0.00 ]
+Key: VREDUCEPSZrribkz: [ 0.00 0.00 ]
+Key: VREDUCEPSZrrik: [ 0.00 0.00 ]
+Key: VREDUCEPSZrrikz: [ 0.00 0.00 ]
+Key: VREDUCESDZrmi: [ 0.00 0.00 ]
+Key: VREDUCESDZrmik: [ 0.00 0.00 ]
+Key: VREDUCESDZrmikz: [ 0.00 0.00 ]
+Key: VREDUCESDZrri: [ 0.00 0.00 ]
+Key: VREDUCESDZrrib: [ 0.00 0.00 ]
+Key: VREDUCESDZrribk: [ 0.00 0.00 ]
+Key: VREDUCESDZrribkz: [ 0.00 0.00 ]
+Key: VREDUCESDZrrik: [ 0.00 0.00 ]
+Key: VREDUCESDZrrikz: [ 0.00 0.00 ]
+Key: VREDUCESHZrmi: [ 0.00 0.00 ]
+Key: VREDUCESHZrmik: [ 0.00 0.00 ]
+Key: VREDUCESHZrmikz: [ 0.00 0.00 ]
+Key: VREDUCESHZrri: [ 0.00 0.00 ]
+Key: VREDUCESHZrrib: [ 0.00 0.00 ]
+Key: VREDUCESHZrribk: [ 0.00 0.00 ]
+Key: VREDUCESHZrribkz: [ 0.00 0.00 ]
+Key: VREDUCESHZrrik: [ 0.00 0.00 ]
+Key: VREDUCESHZrrikz: [ 0.00 0.00 ]
+Key: VREDUCESSZrmi: [ 0.00 0.00 ]
+Key: VREDUCESSZrmik: [ 0.00 0.00 ]
+Key: VREDUCESSZrmikz: [ 0.00 0.00 ]
+Key: VREDUCESSZrri: [ 0.00 0.00 ]
+Key: VREDUCESSZrrib: [ 0.00 0.00 ]
+Key: VREDUCESSZrribk: [ 0.00 0.00 ]
+Key: VREDUCESSZrribkz: [ 0.00 0.00 ]
+Key: VREDUCESSZrrik: [ 0.00 0.00 ]
+Key: VREDUCESSZrrikz: [ 0.00 0.00 ]
+Key: VRNDSCALEBF: [ 0.00 0.00 ]
+Key: VRNDSCALEPDZ: [ 0.00 0.00 ]
+Key: VRNDSCALEPDZrmbi: [ 0.00 0.00 ]
+Key: VRNDSCALEPDZrmbik: [ 0.00 0.00 ]
+Key: VRNDSCALEPDZrmbikz: [ 0.00 0.00 ]
+Key: VRNDSCALEPDZrmi: [ 0.00 0.00 ]
+Key: VRNDSCALEPDZrmik: [ 0.00 0.00 ]
+Key: VRNDSCALEPDZrmikz: [ 0.00 0.00 ]
+Key: VRNDSCALEPDZrri: [ 0.00 0.00 ]
+Key: VRNDSCALEPDZrrib: [ 0.00 0.00 ]
+Key: VRNDSCALEPDZrribk: [ 0.00 0.00 ]
+Key: VRNDSCALEPDZrribkz: [ 0.00 0.00 ]
+Key: VRNDSCALEPDZrrik: [ 0.00 0.00 ]
+Key: VRNDSCALEPDZrrikz: [ 0.00 0.00 ]
+Key: VRNDSCALEPHZ: [ 0.00 0.00 ]
+Key: VRNDSCALEPHZrmbi: [ 0.00 0.00 ]
+Key: VRNDSCALEPHZrmbik: [ 0.00 0.00 ]
+Key: VRNDSCALEPHZrmbikz: [ 0.00 0.00 ]
+Key: VRNDSCALEPHZrmi: [ 0.00 0.00 ]
+Key: VRNDSCALEPHZrmik: [ 0.00 0.00 ]
+Key: VRNDSCALEPHZrmikz: [ 0.00 0.00 ]
+Key: VRNDSCALEPHZrri: [ 0.00 0.00 ]
+Key: VRNDSCALEPHZrrib: [ 0.00 0.00 ]
+Key: VRNDSCALEPHZrribk: [ 0.00 0.00 ]
+Key: VRNDSCALEPHZrribkz: [ 0.00 0.00 ]
+Key: VRNDSCALEPHZrrik: [ 0.00 0.00 ]
+Key: VRNDSCALEPHZrrikz: [ 0.00 0.00 ]
+Key: VRNDSCALEPSZ: [ 0.00 0.00 ]
+Key: VRNDSCALEPSZrmbi: [ 0.00 0.00 ]
+Key: VRNDSCALEPSZrmbik: [ 0.00 0.00 ]
+Key: VRNDSCALEPSZrmbikz: [ 0.00 0.00 ]
+Key: VRNDSCALEPSZrmi: [ 0.00 0.00 ]
+Key: VRNDSCALEPSZrmik: [ 0.00 0.00 ]
+Key: VRNDSCALEPSZrmikz: [ 0.00 0.00 ]
+Key: VRNDSCALEPSZrri: [ 0.00 0.00 ]
+Key: VRNDSCALEPSZrrib: [ 0.00 0.00 ]
+Key: VRNDSCALEPSZrribk: [ 0.00 0.00 ]
+Key: VRNDSCALEPSZrribkz: [ 0.00 0.00 ]
+Key: VRNDSCALEPSZrrik: [ 0.00 0.00 ]
+Key: VRNDSCALEPSZrrikz: [ 0.00 0.00 ]
+Key: VRNDSCALESDZrmi: [ 0.00 0.00 ]
+Key: VRNDSCALESDZrmi_Int: [ 0.00 0.00 ]
+Key: VRNDSCALESDZrmik_Int: [ 0.00 0.00 ]
+Key: VRNDSCALESDZrmikz_Int: [ 0.00 0.00 ]
+Key: VRNDSCALESDZrri: [ 0.00 0.00 ]
+Key: VRNDSCALESDZrri_Int: [ 0.00 0.00 ]
+Key: VRNDSCALESDZrrib_Int: [ 0.00 0.00 ]
+Key: VRNDSCALESDZrribk_Int: [ 0.00 0.00 ]
+Key: VRNDSCALESDZrribkz_Int: [ 0.00 0.00 ]
+Key: VRNDSCALESDZrrik_Int: [ 0.00 0.00 ]
+Key: VRNDSCALESDZrrikz_Int: [ 0.00 0.00 ]
+Key: VRNDSCALESHZrmi: [ 0.00 0.00 ]
+Key: VRNDSCALESHZrmi_Int: [ 0.00 0.00 ]
+Key: VRNDSCALESHZrmik_Int: [ 0.00 0.00 ]
+Key: VRNDSCALESHZrmikz_Int: [ 0.00 0.00 ]
+Key: VRNDSCALESHZrri: [ 0.00 0.00 ]
+Key: VRNDSCALESHZrri_Int: [ 0.00 0.00 ]
+Key: VRNDSCALESHZrrib_Int: [ 0.00 0.00 ]
+Key: VRNDSCALESHZrribk_Int: [ 0.00 0.00 ]
+Key: VRNDSCALESHZrribkz_Int: [ 0.00 0.00 ]
+Key: VRNDSCALESHZrrik_Int: [ 0.00 0.00 ]
+Key: VRNDSCALESHZrrikz_Int: [ 0.00 0.00 ]
+Key: VRNDSCALESSZrmi: [ 0.00 0.00 ]
+Key: VRNDSCALESSZrmi_Int: [ 0.00 0.00 ]
+Key: VRNDSCALESSZrmik_Int: [ 0.00 0.00 ]
+Key: VRNDSCALESSZrmikz_Int: [ 0.00 0.00 ]
+Key: VRNDSCALESSZrri: [ 0.00 0.00 ]
+Key: VRNDSCALESSZrri_Int: [ 0.00 0.00 ]
+Key: VRNDSCALESSZrrib_Int: [ 0.00 0.00 ]
+Key: VRNDSCALESSZrribk_Int: [ 0.00 0.00 ]
+Key: VRNDSCALESSZrribkz_Int: [ 0.00 0.00 ]
+Key: VRNDSCALESSZrrik_Int: [ 0.00 0.00 ]
+Key: VRNDSCALESSZrrikz_Int: [ 0.00 0.00 ]
+Key: VROUNDPDYmi: [ 0.00 0.00 ]
+Key: VROUNDPDYri: [ 0.00 0.00 ]
+Key: VROUNDPDmi: [ 0.00 0.00 ]
+Key: VROUNDPDri: [ 0.00 0.00 ]
+Key: VROUNDPSYmi: [ 0.00 0.00 ]
+Key: VROUNDPSYri: [ 0.00 0.00 ]
+Key: VROUNDPSmi: [ 0.00 0.00 ]
+Key: VROUNDPSri: [ 0.00 0.00 ]
+Key: VROUNDSDmi: [ 0.00 0.00 ]
+Key: VROUNDSDmi_Int: [ 0.00 0.00 ]
+Key: VROUNDSDri: [ 0.00 0.00 ]
+Key: VROUNDSDri_Int: [ 0.00 0.00 ]
+Key: VROUNDSSmi: [ 0.00 0.00 ]
+Key: VROUNDSSmi_Int: [ 0.00 0.00 ]
+Key: VROUNDSSri: [ 0.00 0.00 ]
+Key: VROUNDSSri_Int: [ 0.00 0.00 ]
+Key: VRSQRT: [ 0.00 0.00 ]
+Key: VRSQRTBF: [ 0.00 0.00 ]
+Key: VRSQRTPHZ: [ 0.00 0.00 ]
+Key: VRSQRTPHZm: [ 0.00 0.00 ]
+Key: VRSQRTPHZmb: [ 0.00 0.00 ]
+Key: VRSQRTPHZmbk: [ 0.00 0.00 ]
+Key: VRSQRTPHZmbkz: [ 0.00 0.00 ]
+Key: VRSQRTPHZmk: [ 0.00 0.00 ]
+Key: VRSQRTPHZmkz: [ 0.00 0.00 ]
+Key: VRSQRTPHZr: [ 0.00 0.00 ]
+Key: VRSQRTPHZrk: [ 0.00 0.00 ]
+Key: VRSQRTPHZrkz: [ 0.00 0.00 ]
+Key: VRSQRTPSYm: [ 0.00 0.00 ]
+Key: VRSQRTPSYr: [ 0.00 0.00 ]
+Key: VRSQRTPSm: [ 0.00 0.00 ]
+Key: VRSQRTPSr: [ 0.00 0.00 ]
+Key: VRSQRTSHZrm: [ 0.00 0.00 ]
+Key: VRSQRTSHZrmk: [ 0.00 0.00 ]
+Key: VRSQRTSHZrmkz: [ 0.00 0.00 ]
+Key: VRSQRTSHZrr: [ 0.00 0.00 ]
+Key: VRSQRTSHZrrk: [ 0.00 0.00 ]
+Key: VRSQRTSHZrrkz: [ 0.00 0.00 ]
+Key: VRSQRTSSm: [ 0.00 0.00 ]
+Key: VRSQRTSSm_Int: [ 0.00 0.00 ]
+Key: VRSQRTSSr: [ 0.00 0.00 ]
+Key: VRSQRTSSr_Int: [ 0.00 0.00 ]
+Key: VSCALEFBF: [ 0.00 0.00 ]
+Key: VSCALEFPDZ: [ 0.00 0.00 ]
+Key: VSCALEFPDZrm: [ 0.00 0.00 ]
+Key: VSCALEFPDZrmb: [ 0.00 0.00 ]
+Key: VSCALEFPDZrmbk: [ 0.00 0.00 ]
+Key: VSCALEFPDZrmbkz: [ 0.00 0.00 ]
+Key: VSCALEFPDZrmk: [ 0.00 0.00 ]
+Key: VSCALEFPDZrmkz: [ 0.00 0.00 ]
+Key: VSCALEFPDZrr: [ 0.00 0.00 ]
+Key: VSCALEFPDZrrb: [ 0.00 0.00 ]
+Key: VSCALEFPDZrrbk: [ 0.00 0.00 ]
+Key: VSCALEFPDZrrbkz: [ 0.00 0.00 ]
+Key: VSCALEFPDZrrk: [ 0.00 0.00 ]
+Key: VSCALEFPDZrrkz: [ 0.00 0.00 ]
+Key: VSCALEFPHZ: [ 0.00 0.00 ]
+Key: VSCALEFPHZrm: [ 0.00 0.00 ]
+Key: VSCALEFPHZrmb: [ 0.00 0.00 ]
+Key: VSCALEFPHZrmbk: [ 0.00 0.00 ]
+Key: VSCALEFPHZrmbkz: [ 0.00 0.00 ]
+Key: VSCALEFPHZrmk: [ 0.00 0.00 ]
+Key: VSCALEFPHZrmkz: [ 0.00 0.00 ]
+Key: VSCALEFPHZrr: [ 0.00 0.00 ]
+Key: VSCALEFPHZrrb: [ 0.00 0.00 ]
+Key: VSCALEFPHZrrbk: [ 0.00 0.00 ]
+Key: VSCALEFPHZrrbkz: [ 0.00 0.00 ]
+Key: VSCALEFPHZrrk: [ 0.00 0.00 ]
+Key: VSCALEFPHZrrkz: [ 0.00 0.00 ]
+Key: VSCALEFPSZ: [ 0.00 0.00 ]
+Key: VSCALEFPSZrm: [ 0.00 0.00 ]
+Key: VSCALEFPSZrmb: [ 0.00 0.00 ]
+Key: VSCALEFPSZrmbk: [ 0.00 0.00 ]
+Key: VSCALEFPSZrmbkz: [ 0.00 0.00 ]
+Key: VSCALEFPSZrmk: [ 0.00 0.00 ]
+Key: VSCALEFPSZrmkz: [ 0.00 0.00 ]
+Key: VSCALEFPSZrr: [ 0.00 0.00 ]
+Key: VSCALEFPSZrrb: [ 0.00 0.00 ]
+Key: VSCALEFPSZrrbk: [ 0.00 0.00 ]
+Key: VSCALEFPSZrrbkz: [ 0.00 0.00 ]
+Key: VSCALEFPSZrrk: [ 0.00 0.00 ]
+Key: VSCALEFPSZrrkz: [ 0.00 0.00 ]
+Key: VSCALEFSDZrm: [ 0.00 0.00 ]
+Key: VSCALEFSDZrmk: [ 0.00 0.00 ]
+Key: VSCALEFSDZrmkz: [ 0.00 0.00 ]
+Key: VSCALEFSDZrr: [ 0.00 0.00 ]
+Key: VSCALEFSDZrrb_Int: [ 0.00 0.00 ]
+Key: VSCALEFSDZrrbk_Int: [ 0.00 0.00 ]
+Key: VSCALEFSDZrrbkz_Int: [ 0.00 0.00 ]
+Key: VSCALEFSDZrrk: [ 0.00 0.00 ]
+Key: VSCALEFSDZrrkz: [ 0.00 0.00 ]
+Key: VSCALEFSHZrm: [ 0.00 0.00 ]
+Key: VSCALEFSHZrmk: [ 0.00 0.00 ]
+Key: VSCALEFSHZrmkz: [ 0.00 0.00 ]
+Key: VSCALEFSHZrr: [ 0.00 0.00 ]
+Key: VSCALEFSHZrrb_Int: [ 0.00 0.00 ]
+Key: VSCALEFSHZrrbk_Int: [ 0.00 0.00 ]
+Key: VSCALEFSHZrrbkz_Int: [ 0.00 0.00 ]
+Key: VSCALEFSHZrrk: [ 0.00 0.00 ]
+Key: VSCALEFSHZrrkz: [ 0.00 0.00 ]
+Key: VSCALEFSSZrm: [ 0.00 0.00 ]
+Key: VSCALEFSSZrmk: [ 0.00 0.00 ]
+Key: VSCALEFSSZrmkz: [ 0.00 0.00 ]
+Key: VSCALEFSSZrr: [ 0.00 0.00 ]
+Key: VSCALEFSSZrrb_Int: [ 0.00 0.00 ]
+Key: VSCALEFSSZrrbk_Int: [ 0.00 0.00 ]
+Key: VSCALEFSSZrrbkz_Int: [ 0.00 0.00 ]
+Key: VSCALEFSSZrrk: [ 0.00 0.00 ]
+Key: VSCALEFSSZrrkz: [ 0.00 0.00 ]
+Key: VSCATTERDPDZ: [ 0.00 0.00 ]
+Key: VSCATTERDPDZmr: [ 0.00 0.00 ]
+Key: VSCATTERDPSZ: [ 0.00 0.00 ]
+Key: VSCATTERDPSZmr: [ 0.00 0.00 ]
+Key: VSCATTERPF: [ 0.00 0.00 ]
+Key: VSCATTERQPDZ: [ 0.00 0.00 ]
+Key: VSCATTERQPDZmr: [ 0.00 0.00 ]
+Key: VSCATTERQPSZ: [ 0.00 0.00 ]
+Key: VSCATTERQPSZmr: [ 0.00 0.00 ]
+Key: VSHA: [ 0.00 0.00 ]
+Key: VSHUFF: [ 0.00 0.00 ]
+Key: VSHUFI: [ 0.00 0.00 ]
+Key: VSHUFPDYrmi: [ 0.00 0.00 ]
+Key: VSHUFPDYrri: [ 0.00 0.00 ]
+Key: VSHUFPDZ: [ 0.00 0.00 ]
+Key: VSHUFPDZrmbi: [ 0.00 0.00 ]
+Key: VSHUFPDZrmbik: [ 0.00 0.00 ]
+Key: VSHUFPDZrmbikz: [ 0.00 0.00 ]
+Key: VSHUFPDZrmi: [ 0.00 0.00 ]
+Key: VSHUFPDZrmik: [ 0.00 0.00 ]
+Key: VSHUFPDZrmikz: [ 0.00 0.00 ]
+Key: VSHUFPDZrri: [ 0.00 0.00 ]
+Key: VSHUFPDZrrik: [ 0.00 0.00 ]
+Key: VSHUFPDZrrikz: [ 0.00 0.00 ]
+Key: VSHUFPDrmi: [ 0.00 0.00 ]
+Key: VSHUFPDrri: [ 0.00 0.00 ]
+Key: VSHUFPSYrmi: [ 0.00 0.00 ]
+Key: VSHUFPSYrri: [ 0.00 0.00 ]
+Key: VSHUFPSZ: [ 0.00 0.00 ]
+Key: VSHUFPSZrmbi: [ 0.00 0.00 ]
+Key: VSHUFPSZrmbik: [ 0.00 0.00 ]
+Key: VSHUFPSZrmbikz: [ 0.00 0.00 ]
+Key: VSHUFPSZrmi: [ 0.00 0.00 ]
+Key: VSHUFPSZrmik: [ 0.00 0.00 ]
+Key: VSHUFPSZrmikz: [ 0.00 0.00 ]
+Key: VSHUFPSZrri: [ 0.00 0.00 ]
+Key: VSHUFPSZrrik: [ 0.00 0.00 ]
+Key: VSHUFPSZrrikz: [ 0.00 0.00 ]
+Key: VSHUFPSrmi: [ 0.00 0.00 ]
+Key: VSHUFPSrri: [ 0.00 0.00 ]
+Key: VSM: [ 0.00 0.00 ]
+Key: VSQRTBF: [ 0.00 0.00 ]
+Key: VSQRTPDYm: [ 0.00 0.00 ]
+Key: VSQRTPDYr: [ 0.00 0.00 ]
+Key: VSQRTPDZ: [ 0.00 0.00 ]
+Key: VSQRTPDZm: [ 0.00 0.00 ]
+Key: VSQRTPDZmb: [ 0.00 0.00 ]
+Key: VSQRTPDZmbk: [ 0.00 0.00 ]
+Key: VSQRTPDZmbkz: [ 0.00 0.00 ]
+Key: VSQRTPDZmk: [ 0.00 0.00 ]
+Key: VSQRTPDZmkz: [ 0.00 0.00 ]
+Key: VSQRTPDZr: [ 0.00 0.00 ]
+Key: VSQRTPDZrb: [ 0.00 0.00 ]
+Key: VSQRTPDZrbk: [ 0.00 0.00 ]
+Key: VSQRTPDZrbkz: [ 0.00 0.00 ]
+Key: VSQRTPDZrk: [ 0.00 0.00 ]
+Key: VSQRTPDZrkz: [ 0.00 0.00 ]
+Key: VSQRTPDm: [ 0.00 0.00 ]
+Key: VSQRTPDr: [ 0.00 0.00 ]
+Key: VSQRTPHZ: [ 0.00 0.00 ]
+Key: VSQRTPHZm: [ 0.00 0.00 ]
+Key: VSQRTPHZmb: [ 0.00 0.00 ]
+Key: VSQRTPHZmbk: [ 0.00 0.00 ]
+Key: VSQRTPHZmbkz: [ 0.00 0.00 ]
+Key: VSQRTPHZmk: [ 0.00 0.00 ]
+Key: VSQRTPHZmkz: [ 0.00 0.00 ]
+Key: VSQRTPHZr: [ 0.00 0.00 ]
+Key: VSQRTPHZrb: [ 0.00 0.00 ]
+Key: VSQRTPHZrbk: [ 0.00 0.00 ]
+Key: VSQRTPHZrbkz: [ 0.00 0.00 ]
+Key: VSQRTPHZrk: [ 0.00 0.00 ]
+Key: VSQRTPHZrkz: [ 0.00 0.00 ]
+Key: VSQRTPSYm: [ 0.00 0.00 ]
+Key: VSQRTPSYr: [ 0.00 0.00 ]
+Key: VSQRTPSZ: [ 0.00 0.00 ]
+Key: VSQRTPSZm: [ 0.00 0.00 ]
+Key: VSQRTPSZmb: [ 0.00 0.00 ]
+Key: VSQRTPSZmbk: [ 0.00 0.00 ]
+Key: VSQRTPSZmbkz: [ 0.00 0.00 ]
+Key: VSQRTPSZmk: [ 0.00 0.00 ]
+Key: VSQRTPSZmkz: [ 0.00 0.00 ]
+Key: VSQRTPSZr: [ 0.00 0.00 ]
+Key: VSQRTPSZrb: [ 0.00 0.00 ]
+Key: VSQRTPSZrbk: [ 0.00 0.00 ]
+Key: VSQRTPSZrbkz: [ 0.00 0.00 ]
+Key: VSQRTPSZrk: [ 0.00 0.00 ]
+Key: VSQRTPSZrkz: [ 0.00 0.00 ]
+Key: VSQRTPSm: [ 0.00 0.00 ]
+Key: VSQRTPSr: [ 0.00 0.00 ]
+Key: VSQRTSDZm: [ 0.00 0.00 ]
+Key: VSQRTSDZm_Int: [ 0.00 0.00 ]
+Key: VSQRTSDZmk_Int: [ 0.00 0.00 ]
+Key: VSQRTSDZmkz_Int: [ 0.00 0.00 ]
+Key: VSQRTSDZr: [ 0.00 0.00 ]
+Key: VSQRTSDZr_Int: [ 0.00 0.00 ]
+Key: VSQRTSDZrb_Int: [ 0.00 0.00 ]
+Key: VSQRTSDZrbk_Int: [ 0.00 0.00 ]
+Key: VSQRTSDZrbkz_Int: [ 0.00 0.00 ]
+Key: VSQRTSDZrk_Int: [ 0.00 0.00 ]
+Key: VSQRTSDZrkz_Int: [ 0.00 0.00 ]
+Key: VSQRTSDm: [ 0.00 0.00 ]
+Key: VSQRTSDm_Int: [ 0.00 0.00 ]
+Key: VSQRTSDr: [ 0.00 0.00 ]
+Key: VSQRTSDr_Int: [ 0.00 0.00 ]
+Key: VSQRTSHZm: [ 0.00 0.00 ]
+Key: VSQRTSHZm_Int: [ 0.00 0.00 ]
+Key: VSQRTSHZmk_Int: [ 0.00 0.00 ]
+Key: VSQRTSHZmkz_Int: [ 0.00 0.00 ]
+Key: VSQRTSHZr: [ 0.00 0.00 ]
+Key: VSQRTSHZr_Int: [ 0.00 0.00 ]
+Key: VSQRTSHZrb_Int: [ 0.00 0.00 ]
+Key: VSQRTSHZrbk_Int: [ 0.00 0.00 ]
+Key: VSQRTSHZrbkz_Int: [ 0.00 0.00 ]
+Key: VSQRTSHZrk_Int: [ 0.00 0.00 ]
+Key: VSQRTSHZrkz_Int: [ 0.00 0.00 ]
+Key: VSQRTSSZm: [ 0.00 0.00 ]
+Key: VSQRTSSZm_Int: [ 0.00 0.00 ]
+Key: VSQRTSSZmk_Int: [ 0.00 0.00 ]
+Key: VSQRTSSZmkz_Int: [ 0.00 0.00 ]
+Key: VSQRTSSZr: [ 0.00 0.00 ]
+Key: VSQRTSSZr_Int: [ 0.00 0.00 ]
+Key: VSQRTSSZrb_Int: [ 0.00 0.00 ]
+Key: VSQRTSSZrbk_Int: [ 0.00 0.00 ]
+Key: VSQRTSSZrbkz_Int: [ 0.00 0.00 ]
+Key: VSQRTSSZrk_Int: [ 0.00 0.00 ]
+Key: VSQRTSSZrkz_Int: [ 0.00 0.00 ]
+Key: VSQRTSSm: [ 0.00 0.00 ]
+Key: VSQRTSSm_Int: [ 0.00 0.00 ]
+Key: VSQRTSSr: [ 0.00 0.00 ]
+Key: VSQRTSSr_Int: [ 0.00 0.00 ]
+Key: VSTMXCSR: [ 0.00 0.00 ]
+Key: VSUBBF: [ 0.00 0.00 ]
+Key: VSUBPDYrm: [ 0.00 0.00 ]
+Key: VSUBPDYrr: [ 0.00 0.00 ]
+Key: VSUBPDZ: [ 0.00 0.00 ]
+Key: VSUBPDZrm: [ 0.00 0.00 ]
+Key: VSUBPDZrmb: [ 0.00 0.00 ]
+Key: VSUBPDZrmbk: [ 0.00 0.00 ]
+Key: VSUBPDZrmbkz: [ 0.00 0.00 ]
+Key: VSUBPDZrmk: [ 0.00 0.00 ]
+Key: VSUBPDZrmkz: [ 0.00 0.00 ]
+Key: VSUBPDZrr: [ 0.00 0.00 ]
+Key: VSUBPDZrrb: [ 0.00 0.00 ]
+Key: VSUBPDZrrbk: [ 0.00 0.00 ]
+Key: VSUBPDZrrbkz: [ 0.00 0.00 ]
+Key: VSUBPDZrrk: [ 0.00 0.00 ]
+Key: VSUBPDZrrkz: [ 0.00 0.00 ]
+Key: VSUBPDrm: [ 0.00 0.00 ]
+Key: VSUBPDrr: [ 0.00 0.00 ]
+Key: VSUBPHZ: [ 0.00 0.00 ]
+Key: VSUBPHZrm: [ 0.00 0.00 ]
+Key: VSUBPHZrmb: [ 0.00 0.00 ]
+Key: VSUBPHZrmbk: [ 0.00 0.00 ]
+Key: VSUBPHZrmbkz: [ 0.00 0.00 ]
+Key: VSUBPHZrmk: [ 0.00 0.00 ]
+Key: VSUBPHZrmkz: [ 0.00 0.00 ]
+Key: VSUBPHZrr: [ 0.00 0.00 ]
+Key: VSUBPHZrrb: [ 0.00 0.00 ]
+Key: VSUBPHZrrbk: [ 0.00 0.00 ]
+Key: VSUBPHZrrbkz: [ 0.00 0.00 ]
+Key: VSUBPHZrrk: [ 0.00 0.00 ]
+Key: VSUBPHZrrkz: [ 0.00 0.00 ]
+Key: VSUBPSYrm: [ 0.00 0.00 ]
+Key: VSUBPSYrr: [ 0.00 0.00 ]
+Key: VSUBPSZ: [ 0.00 0.00 ]
+Key: VSUBPSZrm: [ 0.00 0.00 ]
+Key: VSUBPSZrmb: [ 0.00 0.00 ]
+Key: VSUBPSZrmbk: [ 0.00 0.00 ]
+Key: VSUBPSZrmbkz: [ 0.00 0.00 ]
+Key: VSUBPSZrmk: [ 0.00 0.00 ]
+Key: VSUBPSZrmkz: [ 0.00 0.00 ]
+Key: VSUBPSZrr: [ 0.00 0.00 ]
+Key: VSUBPSZrrb: [ 0.00 0.00 ]
+Key: VSUBPSZrrbk: [ 0.00 0.00 ]
+Key: VSUBPSZrrbkz: [ 0.00 0.00 ]
+Key: VSUBPSZrrk: [ 0.00 0.00 ]
+Key: VSUBPSZrrkz: [ 0.00 0.00 ]
+Key: VSUBPSrm: [ 0.00 0.00 ]
+Key: VSUBPSrr: [ 0.00 0.00 ]
+Key: VSUBSDZrm: [ 0.00 0.00 ]
+Key: VSUBSDZrm_Int: [ 0.00 0.00 ]
+Key: VSUBSDZrmk_Int: [ 0.00 0.00 ]
+Key: VSUBSDZrmkz_Int: [ 0.00 0.00 ]
+Key: VSUBSDZrr: [ 0.00 0.00 ]
+Key: VSUBSDZrr_Int: [ 0.00 0.00 ]
+Key: VSUBSDZrrb_Int: [ 0.00 0.00 ]
+Key: VSUBSDZrrbk_Int: [ 0.00 0.00 ]
+Key: VSUBSDZrrbkz_Int: [ 0.00 0.00 ]
+Key: VSUBSDZrrk_Int: [ 0.00 0.00 ]
+Key: VSUBSDZrrkz_Int: [ 0.00 0.00 ]
+Key: VSUBSDrm: [ 0.00 0.00 ]
+Key: VSUBSDrm_Int: [ 0.00 0.00 ]
+Key: VSUBSDrr: [ 0.00 0.00 ]
+Key: VSUBSDrr_Int: [ 0.00 0.00 ]
+Key: VSUBSHZrm: [ 0.00 0.00 ]
+Key: VSUBSHZrm_Int: [ 0.00 0.00 ]
+Key: VSUBSHZrmk_Int: [ 0.00 0.00 ]
+Key: VSUBSHZrmkz_Int: [ 0.00 0.00 ]
+Key: VSUBSHZrr: [ 0.00 0.00 ]
+Key: VSUBSHZrr_Int: [ 0.00 0.00 ]
+Key: VSUBSHZrrb_Int: [ 0.00 0.00 ]
+Key: VSUBSHZrrbk_Int: [ 0.00 0.00 ]
+Key: VSUBSHZrrbkz_Int: [ 0.00 0.00 ]
+Key: VSUBSHZrrk_Int: [ 0.00 0.00 ]
+Key: VSUBSHZrrkz_Int: [ 0.00 0.00 ]
+Key: VSUBSSZrm: [ 0.00 0.00 ]
+Key: VSUBSSZrm_Int: [ 0.00 0.00 ]
+Key: VSUBSSZrmk_Int: [ 0.00 0.00 ]
+Key: VSUBSSZrmkz_Int: [ 0.00 0.00 ]
+Key: VSUBSSZrr: [ 0.00 0.00 ]
+Key: VSUBSSZrr_Int: [ 0.00 0.00 ]
+Key: VSUBSSZrrb_Int: [ 0.00 0.00 ]
+Key: VSUBSSZrrbk_Int: [ 0.00 0.00 ]
+Key: VSUBSSZrrbkz_Int: [ 0.00 0.00 ]
+Key: VSUBSSZrrk_Int: [ 0.00 0.00 ]
+Key: VSUBSSZrrkz_Int: [ 0.00 0.00 ]
+Key: VSUBSSrm: [ 0.00 0.00 ]
+Key: VSUBSSrm_Int: [ 0.00 0.00 ]
+Key: VSUBSSrr: [ 0.00 0.00 ]
+Key: VSUBSSrr_Int: [ 0.00 0.00 ]
+Key: VTESTPDYrm: [ 0.00 0.00 ]
+Key: VTESTPDYrr: [ 0.00 0.00 ]
+Key: VTESTPDrm: [ 0.00 0.00 ]
+Key: VTESTPDrr: [ 0.00 0.00 ]
+Key: VTESTPSYrm: [ 0.00 0.00 ]
+Key: VTESTPSYrr: [ 0.00 0.00 ]
+Key: VTESTPSrm: [ 0.00 0.00 ]
+Key: VTESTPSrr: [ 0.00 0.00 ]
+Key: VUCOMISDZrm: [ 0.00 0.00 ]
+Key: VUCOMISDZrm_Int: [ 0.00 0.00 ]
+Key: VUCOMISDZrr: [ 0.00 0.00 ]
+Key: VUCOMISDZrr_Int: [ 0.00 0.00 ]
+Key: VUCOMISDZrrb: [ 0.00 0.00 ]
+Key: VUCOMISDrm: [ 0.00 0.00 ]
+Key: VUCOMISDrm_Int: [ 0.00 0.00 ]
+Key: VUCOMISDrr: [ 0.00 0.00 ]
+Key: VUCOMISDrr_Int: [ 0.00 0.00 ]
+Key: VUCOMISHZrm: [ 0.00 0.00 ]
+Key: VUCOMISHZrm_Int: [ 0.00 0.00 ]
+Key: VUCOMISHZrr: [ 0.00 0.00 ]
+Key: VUCOMISHZrr_Int: [ 0.00 0.00 ]
+Key: VUCOMISHZrrb: [ 0.00 0.00 ]
+Key: VUCOMISSZrm: [ 0.00 0.00 ]
+Key: VUCOMISSZrm_Int: [ 0.00 0.00 ]
+Key: VUCOMISSZrr: [ 0.00 0.00 ]
+Key: VUCOMISSZrr_Int: [ 0.00 0.00 ]
+Key: VUCOMISSZrrb: [ 0.00 0.00 ]
+Key: VUCOMISSrm: [ 0.00 0.00 ]
+Key: VUCOMISSrm_Int: [ 0.00 0.00 ]
+Key: VUCOMISSrr: [ 0.00 0.00 ]
+Key: VUCOMISSrr_Int: [ 0.00 0.00 ]
+Key: VUCOMXSDZrm: [ 0.00 0.00 ]
+Key: VUCOMXSDZrm_Int: [ 0.00 0.00 ]
+Key: VUCOMXSDZrr: [ 0.00 0.00 ]
+Key: VUCOMXSDZrr_Int: [ 0.00 0.00 ]
+Key: VUCOMXSDZrrb_Int: [ 0.00 0.00 ]
+Key: VUCOMXSHZrm: [ 0.00 0.00 ]
+Key: VUCOMXSHZrm_Int: [ 0.00 0.00 ]
+Key: VUCOMXSHZrr: [ 0.00 0.00 ]
+Key: VUCOMXSHZrr_Int: [ 0.00 0.00 ]
+Key: VUCOMXSHZrrb_Int: [ 0.00 0.00 ]
+Key: VUCOMXSSZrm: [ 0.00 0.00 ]
+Key: VUCOMXSSZrm_Int: [ 0.00 0.00 ]
+Key: VUCOMXSSZrr: [ 0.00 0.00 ]
+Key: VUCOMXSSZrr_Int: [ 0.00 0.00 ]
+Key: VUCOMXSSZrrb_Int: [ 0.00 0.00 ]
+Key: VUNPCKHPDYrm: [ 0.00 0.00 ]
+Key: VUNPCKHPDYrr: [ 0.00 0.00 ]
+Key: VUNPCKHPDZ: [ 0.00 0.00 ]
+Key: VUNPCKHPDZrm: [ 0.00 0.00 ]
+Key: VUNPCKHPDZrmb: [ 0.00 0.00 ]
+Key: VUNPCKHPDZrmbk: [ 0.00 0.00 ]
+Key: VUNPCKHPDZrmbkz: [ 0.00 0.00 ]
+Key: VUNPCKHPDZrmk: [ 0.00 0.00 ]
+Key: VUNPCKHPDZrmkz: [ 0.00 0.00 ]
+Key: VUNPCKHPDZrr: [ 0.00 0.00 ]
+Key: VUNPCKHPDZrrk: [ 0.00 0.00 ]
+Key: VUNPCKHPDZrrkz: [ 0.00 0.00 ]
+Key: VUNPCKHPDrm: [ 0.00 0.00 ]
+Key: VUNPCKHPDrr: [ 0.00 0.00 ]
+Key: VUNPCKHPSYrm: [ 0.00 0.00 ]
+Key: VUNPCKHPSYrr: [ 0.00 0.00 ]
+Key: VUNPCKHPSZ: [ 0.00 0.00 ]
+Key: VUNPCKHPSZrm: [ 0.00 0.00 ]
+Key: VUNPCKHPSZrmb: [ 0.00 0.00 ]
+Key: VUNPCKHPSZrmbk: [ 0.00 0.00 ]
+Key: VUNPCKHPSZrmbkz: [ 0.00 0.00 ]
+Key: VUNPCKHPSZrmk: [ 0.00 0.00 ]
+Key: VUNPCKHPSZrmkz: [ 0.00 0.00 ]
+Key: VUNPCKHPSZrr: [ 0.00 0.00 ]
+Key: VUNPCKHPSZrrk: [ 0.00 0.00 ]
+Key: VUNPCKHPSZrrkz: [ 0.00 0.00 ]
+Key: VUNPCKHPSrm: [ 0.00 0.00 ]
+Key: VUNPCKHPSrr: [ 0.00 0.00 ]
+Key: VUNPCKLPDYrm: [ 0.00 0.00 ]
+Key: VUNPCKLPDYrr: [ 0.00 0.00 ]
+Key: VUNPCKLPDZ: [ 0.00 0.00 ]
+Key: VUNPCKLPDZrm: [ 0.00 0.00 ]
+Key: VUNPCKLPDZrmb: [ 0.00 0.00 ]
+Key: VUNPCKLPDZrmbk: [ 0.00 0.00 ]
+Key: VUNPCKLPDZrmbkz: [ 0.00 0.00 ]
+Key: VUNPCKLPDZrmk: [ 0.00 0.00 ]
+Key: VUNPCKLPDZrmkz: [ 0.00 0.00 ]
+Key: VUNPCKLPDZrr: [ 0.00 0.00 ]
+Key: VUNPCKLPDZrrk: [ 0.00 0.00 ]
+Key: VUNPCKLPDZrrkz: [ 0.00 0.00 ]
+Key: VUNPCKLPDrm: [ 0.00 0.00 ]
+Key: VUNPCKLPDrr: [ 0.00 0.00 ]
+Key: VUNPCKLPSYrm: [ 0.00 0.00 ]
+Key: VUNPCKLPSYrr: [ 0.00 0.00 ]
+Key: VUNPCKLPSZ: [ 0.00 0.00 ]
+Key: VUNPCKLPSZrm: [ 0.00 0.00 ]
+Key: VUNPCKLPSZrmb: [ 0.00 0.00 ]
+Key: VUNPCKLPSZrmbk: [ 0.00 0.00 ]
+Key: VUNPCKLPSZrmbkz: [ 0.00 0.00 ]
+Key: VUNPCKLPSZrmk: [ 0.00 0.00 ]
+Key: VUNPCKLPSZrmkz: [ 0.00 0.00 ]
+Key: VUNPCKLPSZrr: [ 0.00 0.00 ]
+Key: VUNPCKLPSZrrk: [ 0.00 0.00 ]
+Key: VUNPCKLPSZrrkz: [ 0.00 0.00 ]
+Key: VUNPCKLPSrm: [ 0.00 0.00 ]
+Key: VUNPCKLPSrr: [ 0.00 0.00 ]
+Key: VXORPDYrm: [ 0.00 0.00 ]
+Key: VXORPDYrr: [ 0.00 0.00 ]
+Key: VXORPDZ: [ 0.00 0.00 ]
+Key: VXORPDZrm: [ 0.00 0.00 ]
+Key: VXORPDZrmb: [ 0.00 0.00 ]
+Key: VXORPDZrmbk: [ 0.00 0.00 ]
+Key: VXORPDZrmbkz: [ 0.00 0.00 ]
+Key: VXORPDZrmk: [ 0.00 0.00 ]
+Key: VXORPDZrmkz: [ 0.00 0.00 ]
+Key: VXORPDZrr: [ 0.00 0.00 ]
+Key: VXORPDZrrk: [ 0.00 0.00 ]
+Key: VXORPDZrrkz: [ 0.00 0.00 ]
+Key: VXORPDrm: [ 0.00 0.00 ]
+Key: VXORPDrr: [ 0.00 0.00 ]
+Key: VXORPSYrm: [ 0.00 0.00 ]
+Key: VXORPSYrr: [ 0.00 0.00 ]
+Key: VXORPSZ: [ 0.00 0.00 ]
+Key: VXORPSZrm: [ 0.00 0.00 ]
+Key: VXORPSZrmb: [ 0.00 0.00 ]
+Key: VXORPSZrmbk: [ 0.00 0.00 ]
+Key: VXORPSZrmbkz: [ 0.00 0.00 ]
+Key: VXORPSZrmk: [ 0.00 0.00 ]
+Key: VXORPSZrmkz: [ 0.00 0.00 ]
+Key: VXORPSZrr: [ 0.00 0.00 ]
+Key: VXORPSZrrk: [ 0.00 0.00 ]
+Key: VXORPSZrrkz: [ 0.00 0.00 ]
+Key: VXORPSrm: [ 0.00 0.00 ]
+Key: VXORPSrr: [ 0.00 0.00 ]
+Key: VZEROALL: [ 0.00 0.00 ]
+Key: VZEROUPPER: [ 0.00 0.00 ]
+Key: V_SET: [ 0.00 0.00 ]
+Key: V_SETALLONES: [ 0.00 0.00 ]
+Key: WAIT: [ 0.00 0.00 ]
+Key: WBINVD: [ 0.00 0.00 ]
+Key: WBNOINVD: [ 0.00 0.00 ]
+Key: WRFLAGS: [ 0.00 0.00 ]
+Key: WRFSBASE: [ 0.00 0.00 ]
+Key: WRGSBASE: [ 0.00 0.00 ]
+Key: WRMSR: [ 0.00 0.00 ]
+Key: WRMSRLIST: [ 0.00 0.00 ]
+Key: WRMSRNS: [ 0.00 0.00 ]
+Key: WRMSRNSir: [ 0.00 0.00 ]
+Key: WRMSRNSir_EVEX: [ 0.00 0.00 ]
+Key: WRPKRUr: [ 0.00 0.00 ]
+Key: WRSSD: [ 0.00 0.00 ]
+Key: WRSSD_EVEX: [ 0.00 0.00 ]
+Key: WRSSQ: [ 0.00 0.00 ]
+Key: WRSSQ_EVEX: [ 0.00 0.00 ]
+Key: WRUSSD: [ 0.00 0.00 ]
+Key: WRUSSD_EVEX: [ 0.00 0.00 ]
+Key: WRUSSQ: [ 0.00 0.00 ]
+Key: WRUSSQ_EVEX: [ 0.00 0.00 ]
+Key: XABORT: [ 0.00 0.00 ]
+Key: XABORT_DEF: [ 0.00 0.00 ]
+Key: XACQUIRE_PREFIX: [ 0.00 0.00 ]
+Key: XADD: [ 0.00 0.00 ]
+Key: XAM_F: [ 0.00 0.00 ]
+Key: XAM_Fp: [ 0.00 0.00 ]
+Key: XBEGIN: [ 0.00 0.00 ]
+Key: XCHG: [ 0.00 0.00 ]
+Key: XCH_F: [ 0.00 0.00 ]
+Key: XCRYPTCBC: [ 0.00 0.00 ]
+Key: XCRYPTCFB: [ 0.00 0.00 ]
+Key: XCRYPTCTR: [ 0.00 0.00 ]
+Key: XCRYPTECB: [ 0.00 0.00 ]
+Key: XCRYPTOFB: [ 0.00 0.00 ]
+Key: XEND: [ 0.00 0.00 ]
+Key: XGETBV: [ 0.00 0.00 ]
+Key: XLAT: [ 0.00 0.00 ]
+Key: XOR: [ 0.00 0.00 ]
+Key: XORPDrm: [ 0.00 0.00 ]
+Key: XORPDrr: [ 0.00 0.00 ]
+Key: XORPSrm: [ 0.00 0.00 ]
+Key: XORPSrr: [ 0.00 0.00 ]
+Key: XRELEASE_PREFIX: [ 0.00 0.00 ]
+Key: XRESLDTRK: [ 0.00 0.00 ]
+Key: XRSTOR: [ 0.00 0.00 ]
+Key: XRSTORS: [ 0.00 0.00 ]
+Key: XSAVE: [ 0.00 0.00 ]
+Key: XSAVEC: [ 0.00 0.00 ]
+Key: XSAVEOPT: [ 0.00 0.00 ]
+Key: XSAVES: [ 0.00 0.00 ]
+Key: XSETBV: [ 0.00 0.00 ]
+Key: XSHA: [ 0.00 0.00 ]
+Key: XSTORE: [ 0.00 0.00 ]
+Key: XSUSLDTRK: [ 0.00 0.00 ]
+Key: XTEST: [ 0.00 0.00 ]
diff --git a/llvm/test/CodeGen/MIR2Vec/Inputs/reference_x86_vocab_wo=0.5_print.txt b/llvm/test/CodeGen/MIR2Vec/Inputs/reference_x86_vocab_wo=0.5_print.txt
new file mode 100644
index 0000000..4409e6d
--- /dev/null
+++ b/llvm/test/CodeGen/MIR2Vec/Inputs/reference_x86_vocab_wo=0.5_print.txt
@@ -0,0 +1,6882 @@
+Key: AAA: [ 0.00 0.00 ]
+Key: AAD: [ 0.00 0.00 ]
+Key: AADD: [ 0.00 0.00 ]
+Key: AAM: [ 0.00 0.00 ]
+Key: AAND: [ 0.00 0.00 ]
+Key: AAS: [ 0.00 0.00 ]
+Key: ABS_F: [ 0.00 0.00 ]
+Key: ABS_Fp: [ 0.50 1.00 ]
+Key: ADC: [ 1.50 2.00 ]
+Key: ADCX: [ 0.00 0.00 ]
+Key: ADD: [ 2.50 3.00 ]
+Key: ADDPDrm: [ 3.50 4.00 ]
+Key: ADDPDrr: [ 4.50 5.00 ]
+Key: ADDPSrm: [ 0.00 0.00 ]
+Key: ADDPSrr: [ 5.50 6.00 ]
+Key: ADDR: [ 0.00 0.00 ]
+Key: ADDSDrm: [ 6.50 7.00 ]
+Key: ADDSDrm_Int: [ 0.00 0.00 ]
+Key: ADDSDrr: [ 0.00 0.00 ]
+Key: ADDSDrr_Int: [ 0.00 0.00 ]
+Key: ADDSSrm: [ 0.00 0.00 ]
+Key: ADDSSrm_Int: [ 0.00 0.00 ]
+Key: ADDSSrr: [ 0.00 0.00 ]
+Key: ADDSSrr_Int: [ 0.00 0.00 ]
+Key: ADDSUBPDrm: [ 0.00 0.00 ]
+Key: ADDSUBPDrr: [ 0.00 0.00 ]
+Key: ADDSUBPSrm: [ 0.00 0.00 ]
+Key: ADDSUBPSrr: [ 0.00 0.00 ]
+Key: ADD_F: [ 0.00 0.00 ]
+Key: ADD_FI: [ 0.00 0.00 ]
+Key: ADD_FPrST: [ 0.00 0.00 ]
+Key: ADD_FST: [ 0.00 0.00 ]
+Key: ADD_Fp: [ 0.00 0.00 ]
+Key: ADD_FpI: [ 0.00 0.00 ]
+Key: ADD_FrST: [ 0.00 0.00 ]
+Key: ADJCALLSTACKDOWN: [ 0.00 0.00 ]
+Key: ADJCALLSTACKUP: [ 0.00 0.00 ]
+Key: ADOX: [ 0.00 0.00 ]
+Key: AESDEC: [ 0.00 0.00 ]
+Key: AESDECLASTrm: [ 0.00 0.00 ]
+Key: AESDECLASTrr: [ 0.00 0.00 ]
+Key: AESDECWIDE: [ 0.00 0.00 ]
+Key: AESDECrm: [ 0.00 0.00 ]
+Key: AESDECrr: [ 0.00 0.00 ]
+Key: AESENC: [ 0.00 0.00 ]
+Key: AESENCLASTrm: [ 0.00 0.00 ]
+Key: AESENCLASTrr: [ 0.00 0.00 ]
+Key: AESENCWIDE: [ 0.00 0.00 ]
+Key: AESENCrm: [ 0.00 0.00 ]
+Key: AESENCrr: [ 0.00 0.00 ]
+Key: AESIMCrm: [ 0.00 0.00 ]
+Key: AESIMCrr: [ 0.00 0.00 ]
+Key: AESKEYGENASSISTrmi: [ 0.00 0.00 ]
+Key: AESKEYGENASSISTrri: [ 0.00 0.00 ]
+Key: AND: [ 0.00 0.00 ]
+Key: ANDN: [ 0.00 0.00 ]
+Key: ANDNPDrm: [ 0.00 0.00 ]
+Key: ANDNPDrr: [ 0.00 0.00 ]
+Key: ANDNPSrm: [ 0.00 0.00 ]
+Key: ANDNPSrr: [ 0.00 0.00 ]
+Key: ANDPDrm: [ 0.00 0.00 ]
+Key: ANDPDrr: [ 0.00 0.00 ]
+Key: ANDPSrm: [ 0.00 0.00 ]
+Key: ANDPSrr: [ 0.00 0.00 ]
+Key: ANNOTATION_LABEL: [ 0.00 0.00 ]
+Key: AOR: [ 0.00 0.00 ]
+Key: ARITH_FENCE: [ 0.00 0.00 ]
+Key: ARPL: [ 0.00 0.00 ]
+Key: ASAN_CHECK_MEMACCESS: [ 0.00 0.00 ]
+Key: AVX: [ 0.00 0.00 ]
+Key: AVX_SET: [ 0.00 0.00 ]
+Key: AXOR: [ 0.00 0.00 ]
+Key: BEXTR: [ 0.00 0.00 ]
+Key: BEXTRI: [ 0.00 0.00 ]
+Key: BLCFILL: [ 0.00 0.00 ]
+Key: BLCI: [ 0.00 0.00 ]
+Key: BLCIC: [ 0.00 0.00 ]
+Key: BLCMSK: [ 0.00 0.00 ]
+Key: BLCS: [ 0.00 0.00 ]
+Key: BLENDPDrmi: [ 0.00 0.00 ]
+Key: BLENDPDrri: [ 0.00 0.00 ]
+Key: BLENDPSrmi: [ 0.00 0.00 ]
+Key: BLENDPSrri: [ 0.00 0.00 ]
+Key: BLENDVPDrm: [ 0.00 0.00 ]
+Key: BLENDVPDrr: [ 0.00 0.00 ]
+Key: BLENDVPSrm: [ 0.00 0.00 ]
+Key: BLENDVPSrr: [ 0.00 0.00 ]
+Key: BLSFILL: [ 0.00 0.00 ]
+Key: BLSI: [ 0.00 0.00 ]
+Key: BLSIC: [ 0.00 0.00 ]
+Key: BLSMSK: [ 0.00 0.00 ]
+Key: BLSR: [ 0.00 0.00 ]
+Key: BOUNDS: [ 0.00 0.00 ]
+Key: BSF: [ 0.00 0.00 ]
+Key: BSR: [ 0.00 0.00 ]
+Key: BSWAP: [ 0.00 0.00 ]
+Key: BT: [ 0.00 0.00 ]
+Key: BTC: [ 0.00 0.00 ]
+Key: BTR: [ 0.00 0.00 ]
+Key: BTS: [ 0.00 0.00 ]
+Key: BUNDLE: [ 0.00 0.00 ]
+Key: BZHI: [ 0.00 0.00 ]
+Key: CALL: [ 0.00 0.00 ]
+Key: CALLpcrel: [ 0.00 0.00 ]
+Key: CATCHRET: [ 0.00 0.00 ]
+Key: CBW: [ 0.00 0.00 ]
+Key: CCMP: [ 0.00 0.00 ]
+Key: CDQ: [ 0.00 0.00 ]
+Key: CDQE: [ 0.00 0.00 ]
+Key: CFCMOV: [ 0.00 0.00 ]
+Key: CFI_INSTRUCTION: [ 0.00 0.00 ]
+Key: CHS_F: [ 0.00 0.00 ]
+Key: CHS_Fp: [ 0.00 0.00 ]
+Key: CLAC: [ 0.00 0.00 ]
+Key: CLC: [ 0.00 0.00 ]
+Key: CLD: [ 0.00 0.00 ]
+Key: CLDEMOTE: [ 0.00 0.00 ]
+Key: CLEANUPRET: [ 0.00 0.00 ]
+Key: CLFLUSH: [ 0.00 0.00 ]
+Key: CLFLUSHOPT: [ 0.00 0.00 ]
+Key: CLGI: [ 0.00 0.00 ]
+Key: CLI: [ 0.00 0.00 ]
+Key: CLRSSBSY: [ 0.00 0.00 ]
+Key: CLTS: [ 0.00 0.00 ]
+Key: CLUI: [ 0.00 0.00 ]
+Key: CLWB: [ 0.00 0.00 ]
+Key: CLZERO: [ 0.00 0.00 ]
+Key: CMC: [ 0.00 0.00 ]
+Key: CMOV: [ 0.00 0.00 ]
+Key: CMOVBE_F: [ 0.00 0.00 ]
+Key: CMOVBE_Fp: [ 0.00 0.00 ]
+Key: CMOVB_F: [ 0.00 0.00 ]
+Key: CMOVB_Fp: [ 0.00 0.00 ]
+Key: CMOVE_F: [ 0.00 0.00 ]
+Key: CMOVE_Fp: [ 0.00 0.00 ]
+Key: CMOVNBE_F: [ 0.00 0.00 ]
+Key: CMOVNBE_Fp: [ 0.00 0.00 ]
+Key: CMOVNB_F: [ 0.00 0.00 ]
+Key: CMOVNB_Fp: [ 0.00 0.00 ]
+Key: CMOVNE_F: [ 0.00 0.00 ]
+Key: CMOVNE_Fp: [ 0.00 0.00 ]
+Key: CMOVNP_F: [ 0.00 0.00 ]
+Key: CMOVNP_Fp: [ 0.00 0.00 ]
+Key: CMOVP_F: [ 0.00 0.00 ]
+Key: CMOVP_Fp: [ 0.00 0.00 ]
+Key: CMOV_FR: [ 0.00 0.00 ]
+Key: CMOV_GR: [ 0.00 0.00 ]
+Key: CMOV_RFP: [ 0.00 0.00 ]
+Key: CMOV_VK: [ 0.00 0.00 ]
+Key: CMOV_VR: [ 0.00 0.00 ]
+Key: CMP: [ 0.00 0.00 ]
+Key: CMPCCXADDmr: [ 0.00 0.00 ]
+Key: CMPPDrmi: [ 0.00 0.00 ]
+Key: CMPPDrri: [ 0.00 0.00 ]
+Key: CMPPSrmi: [ 0.00 0.00 ]
+Key: CMPPSrri: [ 0.00 0.00 ]
+Key: CMPSB: [ 0.00 0.00 ]
+Key: CMPSDrmi: [ 0.00 0.00 ]
+Key: CMPSDrmi_Int: [ 0.00 0.00 ]
+Key: CMPSDrri: [ 0.00 0.00 ]
+Key: CMPSDrri_Int: [ 0.00 0.00 ]
+Key: CMPSL: [ 0.00 0.00 ]
+Key: CMPSQ: [ 0.00 0.00 ]
+Key: CMPSSrmi: [ 0.00 0.00 ]
+Key: CMPSSrmi_Int: [ 0.00 0.00 ]
+Key: CMPSSrri: [ 0.00 0.00 ]
+Key: CMPSSrri_Int: [ 0.00 0.00 ]
+Key: CMPSW: [ 0.00 0.00 ]
+Key: CMPXCHG: [ 0.00 0.00 ]
+Key: COMISDrm: [ 0.00 0.00 ]
+Key: COMISDrm_Int: [ 0.00 0.00 ]
+Key: COMISDrr: [ 0.00 0.00 ]
+Key: COMISDrr_Int: [ 0.00 0.00 ]
+Key: COMISSrm: [ 0.00 0.00 ]
+Key: COMISSrm_Int: [ 0.00 0.00 ]
+Key: COMISSrr: [ 0.00 0.00 ]
+Key: COMISSrr_Int: [ 0.00 0.00 ]
+Key: COMP_FST: [ 0.00 0.00 ]
+Key: COM_FIPr: [ 0.00 0.00 ]
+Key: COM_FIr: [ 0.00 0.00 ]
+Key: COM_FST: [ 0.00 0.00 ]
+Key: COM_FpIr: [ 0.00 0.00 ]
+Key: COM_Fpr: [ 0.00 0.00 ]
+Key: CONVERGENCECTRL_ANCHOR: [ 0.00 0.00 ]
+Key: CONVERGENCECTRL_ENTRY: [ 0.00 0.00 ]
+Key: CONVERGENCECTRL_GLUE: [ 0.00 0.00 ]
+Key: CONVERGENCECTRL_LOOP: [ 0.00 0.00 ]
+Key: COPY: [ 0.00 0.00 ]
+Key: COPY_TO_REGCLASS: [ 0.00 0.00 ]
+Key: CPUID: [ 0.00 0.00 ]
+Key: CQO: [ 0.00 0.00 ]
+Key: CRC: [ 0.00 0.00 ]
+Key: CS_PREFIX: [ 0.00 0.00 ]
+Key: CTEST: [ 0.00 0.00 ]
+Key: CVTDQ: [ 0.00 0.00 ]
+Key: CVTPD: [ 0.00 0.00 ]
+Key: CVTPS: [ 0.00 0.00 ]
+Key: CVTSD: [ 0.00 0.00 ]
+Key: CVTSI: [ 0.00 0.00 ]
+Key: CVTSS: [ 0.00 0.00 ]
+Key: CVTTPD: [ 0.00 0.00 ]
+Key: CVTTPS: [ 0.00 0.00 ]
+Key: CVTTSD: [ 0.00 0.00 ]
+Key: CVTTSS: [ 0.00 0.00 ]
+Key: CWD: [ 0.00 0.00 ]
+Key: CWDE: [ 0.00 0.00 ]
+Key: DAA: [ 0.00 0.00 ]
+Key: DAS: [ 0.00 0.00 ]
+Key: DATA: [ 0.00 0.00 ]
+Key: DBG_INSTR_REF: [ 0.00 0.00 ]
+Key: DBG_LABEL: [ 0.00 0.00 ]
+Key: DBG_PHI: [ 0.00 0.00 ]
+Key: DBG_VALUE: [ 0.00 0.00 ]
+Key: DBG_VALUE_LIST: [ 0.00 0.00 ]
+Key: DEC: [ 0.00 0.00 ]
+Key: DIV: [ 0.00 0.00 ]
+Key: DIVPDrm: [ 0.00 0.00 ]
+Key: DIVPDrr: [ 0.00 0.00 ]
+Key: DIVPSrm: [ 0.00 0.00 ]
+Key: DIVPSrr: [ 0.00 0.00 ]
+Key: DIVR_F: [ 0.00 0.00 ]
+Key: DIVR_FI: [ 0.00 0.00 ]
+Key: DIVR_FPrST: [ 0.00 0.00 ]
+Key: DIVR_FST: [ 0.00 0.00 ]
+Key: DIVR_Fp: [ 0.00 0.00 ]
+Key: DIVR_FpI: [ 0.00 0.00 ]
+Key: DIVR_FrST: [ 0.00 0.00 ]
+Key: DIVSDrm: [ 0.00 0.00 ]
+Key: DIVSDrm_Int: [ 0.00 0.00 ]
+Key: DIVSDrr: [ 0.00 0.00 ]
+Key: DIVSDrr_Int: [ 0.00 0.00 ]
+Key: DIVSSrm: [ 0.00 0.00 ]
+Key: DIVSSrm_Int: [ 0.00 0.00 ]
+Key: DIVSSrr: [ 0.00 0.00 ]
+Key: DIVSSrr_Int: [ 0.00 0.00 ]
+Key: DIV_F: [ 0.00 0.00 ]
+Key: DIV_FI: [ 0.00 0.00 ]
+Key: DIV_FPrST: [ 0.00 0.00 ]
+Key: DIV_FST: [ 0.00 0.00 ]
+Key: DIV_Fp: [ 0.00 0.00 ]
+Key: DIV_FpI: [ 0.00 0.00 ]
+Key: DIV_FrST: [ 0.00 0.00 ]
+Key: DPPDrmi: [ 0.00 0.00 ]
+Key: DPPDrri: [ 0.00 0.00 ]
+Key: DPPSrmi: [ 0.00 0.00 ]
+Key: DPPSrri: [ 0.00 0.00 ]
+Key: DS_PREFIX: [ 0.00 0.00 ]
+Key: DYN_ALLOCA: [ 0.00 0.00 ]
+Key: EH_LABEL: [ 0.00 0.00 ]
+Key: EH_RETURN: [ 0.00 0.00 ]
+Key: EH_SjLj_LongJmp: [ 0.00 0.00 ]
+Key: EH_SjLj_SetJmp: [ 0.00 0.00 ]
+Key: EH_SjLj_Setup: [ 0.00 0.00 ]
+Key: ENCLS: [ 0.00 0.00 ]
+Key: ENCLU: [ 0.00 0.00 ]
+Key: ENCLV: [ 0.00 0.00 ]
+Key: ENCODEKEY: [ 0.00 0.00 ]
+Key: ENDBR: [ 0.00 0.00 ]
+Key: ENQCMD: [ 0.00 0.00 ]
+Key: ENQCMDS: [ 0.00 0.00 ]
+Key: ENTER: [ 0.00 0.00 ]
+Key: ERETS: [ 0.00 0.00 ]
+Key: ERETU: [ 0.00 0.00 ]
+Key: ES_PREFIX: [ 0.00 0.00 ]
+Key: EXTRACTPSmri: [ 0.00 0.00 ]
+Key: EXTRACTPSrri: [ 0.00 0.00 ]
+Key: EXTRACT_SUBREG: [ 0.00 0.00 ]
+Key: EXTRQ: [ 0.00 0.00 ]
+Key: EXTRQI: [ 0.00 0.00 ]
+Key: F: [ 0.00 0.00 ]
+Key: FAKE_USE: [ 0.00 0.00 ]
+Key: FARCALL: [ 0.00 0.00 ]
+Key: FARJMP: [ 0.00 0.00 ]
+Key: FAULTING_OP: [ 0.00 0.00 ]
+Key: FBLDm: [ 0.00 0.00 ]
+Key: FBSTPm: [ 0.00 0.00 ]
+Key: FCOM: [ 0.00 0.00 ]
+Key: FCOMP: [ 0.00 0.00 ]
+Key: FCOMPP: [ 0.00 0.00 ]
+Key: FCOS: [ 0.00 0.00 ]
+Key: FDECSTP: [ 0.00 0.00 ]
+Key: FEMMS: [ 0.00 0.00 ]
+Key: FENTRY_CALL: [ 0.00 0.00 ]
+Key: FFREE: [ 0.00 0.00 ]
+Key: FFREEP: [ 0.00 0.00 ]
+Key: FICOM: [ 0.00 0.00 ]
+Key: FICOMP: [ 0.00 0.00 ]
+Key: FINCSTP: [ 0.00 0.00 ]
+Key: FLDCW: [ 0.00 0.00 ]
+Key: FLDENVm: [ 0.00 0.00 ]
+Key: FLDL: [ 0.00 0.00 ]
+Key: FLDLG: [ 0.00 0.00 ]
+Key: FLDLN: [ 0.00 0.00 ]
+Key: FLDPI: [ 0.00 0.00 ]
+Key: FNCLEX: [ 0.00 0.00 ]
+Key: FNINIT: [ 0.00 0.00 ]
+Key: FNOP: [ 0.00 0.00 ]
+Key: FNSTCW: [ 0.00 0.00 ]
+Key: FNSTSW: [ 0.00 0.00 ]
+Key: FNSTSWm: [ 0.00 0.00 ]
+Key: FP: [ 0.00 0.00 ]
+Key: FPATAN: [ 0.00 0.00 ]
+Key: FPREM: [ 0.00 0.00 ]
+Key: FPTAN: [ 0.00 0.00 ]
+Key: FRNDINT: [ 0.00 0.00 ]
+Key: FRSTORm: [ 0.00 0.00 ]
+Key: FSAVEm: [ 0.00 0.00 ]
+Key: FSCALE: [ 0.00 0.00 ]
+Key: FSIN: [ 0.00 0.00 ]
+Key: FSINCOS: [ 0.00 0.00 ]
+Key: FSTENVm: [ 0.00 0.00 ]
+Key: FS_PREFIX: [ 0.00 0.00 ]
+Key: FXRSTOR: [ 0.00 0.00 ]
+Key: FXSAVE: [ 0.00 0.00 ]
+Key: FXTRACT: [ 0.00 0.00 ]
+Key: FYL: [ 0.00 0.00 ]
+Key: FsFLD: [ 0.00 0.00 ]
+Key: GC_LABEL: [ 0.00 0.00 ]
+Key: GETSEC: [ 0.00 0.00 ]
+Key: GF: [ 0.00 0.00 ]
+Key: GS_PREFIX: [ 0.00 0.00 ]
+Key: G_ABDS: [ 0.00 0.00 ]
+Key: G_ABDU: [ 0.00 0.00 ]
+Key: G_ABS: [ 0.00 0.00 ]
+Key: G_ADD: [ 0.00 0.00 ]
+Key: G_ADDRSPACE_CAST: [ 0.00 0.00 ]
+Key: G_AND: [ 0.00 0.00 ]
+Key: G_ANYEXT: [ 0.00 0.00 ]
+Key: G_ASHR: [ 0.00 0.00 ]
+Key: G_ASSERT_ALIGN: [ 0.00 0.00 ]
+Key: G_ASSERT_SEXT: [ 0.00 0.00 ]
+Key: G_ASSERT_ZEXT: [ 0.00 0.00 ]
+Key: G_ATOMICRMW_ADD: [ 0.00 0.00 ]
+Key: G_ATOMICRMW_AND: [ 0.00 0.00 ]
+Key: G_ATOMICRMW_FADD: [ 0.00 0.00 ]
+Key: G_ATOMICRMW_FMAX: [ 0.00 0.00 ]
+Key: G_ATOMICRMW_FMAXIMUM: [ 0.00 0.00 ]
+Key: G_ATOMICRMW_FMIN: [ 0.00 0.00 ]
+Key: G_ATOMICRMW_FMINIMUM: [ 0.00 0.00 ]
+Key: G_ATOMICRMW_FSUB: [ 0.00 0.00 ]
+Key: G_ATOMICRMW_MAX: [ 0.00 0.00 ]
+Key: G_ATOMICRMW_MIN: [ 0.00 0.00 ]
+Key: G_ATOMICRMW_NAND: [ 0.00 0.00 ]
+Key: G_ATOMICRMW_OR: [ 0.00 0.00 ]
+Key: G_ATOMICRMW_SUB: [ 0.00 0.00 ]
+Key: G_ATOMICRMW_UDEC_WRAP: [ 0.00 0.00 ]
+Key: G_ATOMICRMW_UINC_WRAP: [ 0.00 0.00 ]
+Key: G_ATOMICRMW_UMAX: [ 0.00 0.00 ]
+Key: G_ATOMICRMW_UMIN: [ 0.00 0.00 ]
+Key: G_ATOMICRMW_USUB_COND: [ 0.00 0.00 ]
+Key: G_ATOMICRMW_USUB_SAT: [ 0.00 0.00 ]
+Key: G_ATOMICRMW_XCHG: [ 0.00 0.00 ]
+Key: G_ATOMICRMW_XOR: [ 0.00 0.00 ]
+Key: G_ATOMIC_CMPXCHG: [ 0.00 0.00 ]
+Key: G_ATOMIC_CMPXCHG_WITH_SUCCESS: [ 0.00 0.00 ]
+Key: G_BITCAST: [ 0.00 0.00 ]
+Key: G_BITREVERSE: [ 0.00 0.00 ]
+Key: G_BLOCK_ADDR: [ 0.00 0.00 ]
+Key: G_BR: [ 0.00 0.00 ]
+Key: G_BRCOND: [ 0.00 0.00 ]
+Key: G_BRINDIRECT: [ 0.00 0.00 ]
+Key: G_BRJT: [ 0.00 0.00 ]
+Key: G_BSWAP: [ 0.00 0.00 ]
+Key: G_BUILD_VECTOR: [ 0.00 0.00 ]
+Key: G_BUILD_VECTOR_TRUNC: [ 0.00 0.00 ]
+Key: G_BZERO: [ 0.00 0.00 ]
+Key: G_CONCAT_VECTORS: [ 0.00 0.00 ]
+Key: G_CONSTANT: [ 0.00 0.00 ]
+Key: G_CONSTANT_FOLD_BARRIER: [ 0.00 0.00 ]
+Key: G_CONSTANT_POOL: [ 0.00 0.00 ]
+Key: G_CTLZ: [ 0.00 0.00 ]
+Key: G_CTLZ_ZERO_UNDEF: [ 0.00 0.00 ]
+Key: G_CTPOP: [ 0.00 0.00 ]
+Key: G_CTTZ: [ 0.00 0.00 ]
+Key: G_CTTZ_ZERO_UNDEF: [ 0.00 0.00 ]
+Key: G_DEBUGTRAP: [ 0.00 0.00 ]
+Key: G_DYN_STACKALLOC: [ 0.00 0.00 ]
+Key: G_EXTRACT: [ 0.00 0.00 ]
+Key: G_EXTRACT_SUBVECTOR: [ 0.00 0.00 ]
+Key: G_EXTRACT_VECTOR_ELT: [ 0.00 0.00 ]
+Key: G_FABS: [ 0.00 0.00 ]
+Key: G_FACOS: [ 0.00 0.00 ]
+Key: G_FADD: [ 0.00 0.00 ]
+Key: G_FASIN: [ 0.00 0.00 ]
+Key: G_FATAN: [ 0.00 0.00 ]
+Key: G_FCANONICALIZE: [ 0.00 0.00 ]
+Key: G_FCEIL: [ 0.00 0.00 ]
+Key: G_FCMP: [ 0.00 0.00 ]
+Key: G_FCONSTANT: [ 0.00 0.00 ]
+Key: G_FCOPYSIGN: [ 0.00 0.00 ]
+Key: G_FCOS: [ 0.00 0.00 ]
+Key: G_FCOSH: [ 0.00 0.00 ]
+Key: G_FDIV: [ 0.00 0.00 ]
+Key: G_FENCE: [ 0.00 0.00 ]
+Key: G_FEXP: [ 0.00 0.00 ]
+Key: G_FFLOOR: [ 0.00 0.00 ]
+Key: G_FFREXP: [ 0.00 0.00 ]
+Key: G_FILD: [ 0.00 0.00 ]
+Key: G_FIST: [ 0.00 0.00 ]
+Key: G_FLDCW: [ 0.00 0.00 ]
+Key: G_FLDEXP: [ 0.00 0.00 ]
+Key: G_FLOG: [ 0.00 0.00 ]
+Key: G_FMA: [ 0.00 0.00 ]
+Key: G_FMAD: [ 0.00 0.00 ]
+Key: G_FMAXIMUM: [ 0.00 0.00 ]
+Key: G_FMAXIMUMNUM: [ 0.00 0.00 ]
+Key: G_FMAXNUM: [ 0.00 0.00 ]
+Key: G_FMAXNUM_IEEE: [ 0.00 0.00 ]
+Key: G_FMINIMUM: [ 0.00 0.00 ]
+Key: G_FMINIMUMNUM: [ 0.00 0.00 ]
+Key: G_FMINNUM: [ 0.00 0.00 ]
+Key: G_FMINNUM_IEEE: [ 0.00 0.00 ]
+Key: G_FMODF: [ 0.00 0.00 ]
+Key: G_FMUL: [ 0.00 0.00 ]
+Key: G_FNEARBYINT: [ 0.00 0.00 ]
+Key: G_FNEG: [ 0.00 0.00 ]
+Key: G_FNSTCW: [ 0.00 0.00 ]
+Key: G_FPEXT: [ 0.00 0.00 ]
+Key: G_FPOW: [ 0.00 0.00 ]
+Key: G_FPOWI: [ 0.00 0.00 ]
+Key: G_FPTOSI: [ 0.00 0.00 ]
+Key: G_FPTOSI_SAT: [ 0.00 0.00 ]
+Key: G_FPTOUI: [ 0.00 0.00 ]
+Key: G_FPTOUI_SAT: [ 0.00 0.00 ]
+Key: G_FPTRUNC: [ 0.00 0.00 ]
+Key: G_FRAME_INDEX: [ 0.00 0.00 ]
+Key: G_FREEZE: [ 0.00 0.00 ]
+Key: G_FREM: [ 0.00 0.00 ]
+Key: G_FRINT: [ 0.00 0.00 ]
+Key: G_FSHL: [ 0.00 0.00 ]
+Key: G_FSHR: [ 0.00 0.00 ]
+Key: G_FSIN: [ 0.00 0.00 ]
+Key: G_FSINCOS: [ 0.00 0.00 ]
+Key: G_FSINH: [ 0.00 0.00 ]
+Key: G_FSQRT: [ 0.00 0.00 ]
+Key: G_FSUB: [ 0.00 0.00 ]
+Key: G_FTAN: [ 0.00 0.00 ]
+Key: G_FTANH: [ 0.00 0.00 ]
+Key: G_GET_FPENV: [ 0.00 0.00 ]
+Key: G_GET_FPMODE: [ 0.00 0.00 ]
+Key: G_GET_ROUNDING: [ 0.00 0.00 ]
+Key: G_GLOBAL_VALUE: [ 0.00 0.00 ]
+Key: G_ICMP: [ 0.00 0.00 ]
+Key: G_IMPLICIT_DEF: [ 0.00 0.00 ]
+Key: G_INDEXED_LOAD: [ 0.00 0.00 ]
+Key: G_INDEXED_SEXTLOAD: [ 0.00 0.00 ]
+Key: G_INDEXED_STORE: [ 0.00 0.00 ]
+Key: G_INDEXED_ZEXTLOAD: [ 0.00 0.00 ]
+Key: G_INSERT: [ 0.00 0.00 ]
+Key: G_INSERT_SUBVECTOR: [ 0.00 0.00 ]
+Key: G_INSERT_VECTOR_ELT: [ 0.00 0.00 ]
+Key: G_INTRINSIC: [ 0.00 0.00 ]
+Key: G_INTRINSIC_CONVERGENT: [ 0.00 0.00 ]
+Key: G_INTRINSIC_CONVERGENT_W_SIDE_EFFECTS: [ 0.00 0.00 ]
+Key: G_INTRINSIC_FPTRUNC_ROUND: [ 0.00 0.00 ]
+Key: G_INTRINSIC_LLRINT: [ 0.00 0.00 ]
+Key: G_INTRINSIC_LRINT: [ 0.00 0.00 ]
+Key: G_INTRINSIC_ROUND: [ 0.00 0.00 ]
+Key: G_INTRINSIC_ROUNDEVEN: [ 0.00 0.00 ]
+Key: G_INTRINSIC_TRUNC: [ 0.00 0.00 ]
+Key: G_INTRINSIC_W_SIDE_EFFECTS: [ 0.00 0.00 ]
+Key: G_INTTOPTR: [ 0.00 0.00 ]
+Key: G_INVOKE_REGION_START: [ 0.00 0.00 ]
+Key: G_IS_FPCLASS: [ 0.00 0.00 ]
+Key: G_JUMP_TABLE: [ 0.00 0.00 ]
+Key: G_LLROUND: [ 0.00 0.00 ]
+Key: G_LOAD: [ 0.00 0.00 ]
+Key: G_LROUND: [ 0.00 0.00 ]
+Key: G_LSHR: [ 0.00 0.00 ]
+Key: G_MEMCPY: [ 0.00 0.00 ]
+Key: G_MEMCPY_INLINE: [ 0.00 0.00 ]
+Key: G_MEMMOVE: [ 0.00 0.00 ]
+Key: G_MEMSET: [ 0.00 0.00 ]
+Key: G_MERGE_VALUES: [ 0.00 0.00 ]
+Key: G_MUL: [ 0.00 0.00 ]
+Key: G_OR: [ 0.00 0.00 ]
+Key: G_PHI: [ 0.00 0.00 ]
+Key: G_PREFETCH: [ 0.00 0.00 ]
+Key: G_PTRAUTH_GLOBAL_VALUE: [ 0.00 0.00 ]
+Key: G_PTRMASK: [ 0.00 0.00 ]
+Key: G_PTRTOINT: [ 0.00 0.00 ]
+Key: G_PTR_ADD: [ 0.00 0.00 ]
+Key: G_READCYCLECOUNTER: [ 0.00 0.00 ]
+Key: G_READSTEADYCOUNTER: [ 0.00 0.00 ]
+Key: G_READ_REGISTER: [ 0.00 0.00 ]
+Key: G_RESET_FPENV: [ 0.00 0.00 ]
+Key: G_RESET_FPMODE: [ 0.00 0.00 ]
+Key: G_ROTL: [ 0.00 0.00 ]
+Key: G_ROTR: [ 0.00 0.00 ]
+Key: G_SADDE: [ 0.00 0.00 ]
+Key: G_SADDO: [ 0.00 0.00 ]
+Key: G_SADDSAT: [ 0.00 0.00 ]
+Key: G_SBFX: [ 0.00 0.00 ]
+Key: G_SCMP: [ 0.00 0.00 ]
+Key: G_SDIV: [ 0.00 0.00 ]
+Key: G_SDIVFIX: [ 0.00 0.00 ]
+Key: G_SDIVFIXSAT: [ 0.00 0.00 ]
+Key: G_SDIVREM: [ 0.00 0.00 ]
+Key: G_SELECT: [ 0.00 0.00 ]
+Key: G_SET_FPENV: [ 0.00 0.00 ]
+Key: G_SET_FPMODE: [ 0.00 0.00 ]
+Key: G_SET_ROUNDING: [ 0.00 0.00 ]
+Key: G_SEXT: [ 0.00 0.00 ]
+Key: G_SEXTLOAD: [ 0.00 0.00 ]
+Key: G_SEXT_INREG: [ 0.00 0.00 ]
+Key: G_SHL: [ 0.00 0.00 ]
+Key: G_SHUFFLE_VECTOR: [ 0.00 0.00 ]
+Key: G_SITOFP: [ 0.00 0.00 ]
+Key: G_SMAX: [ 0.00 0.00 ]
+Key: G_SMIN: [ 0.00 0.00 ]
+Key: G_SMULFIX: [ 0.00 0.00 ]
+Key: G_SMULFIXSAT: [ 0.00 0.00 ]
+Key: G_SMULH: [ 0.00 0.00 ]
+Key: G_SMULO: [ 0.00 0.00 ]
+Key: G_SPLAT_VECTOR: [ 0.00 0.00 ]
+Key: G_SREM: [ 0.00 0.00 ]
+Key: G_SSHLSAT: [ 0.00 0.00 ]
+Key: G_SSUBE: [ 0.00 0.00 ]
+Key: G_SSUBO: [ 0.00 0.00 ]
+Key: G_SSUBSAT: [ 0.00 0.00 ]
+Key: G_STACKRESTORE: [ 0.00 0.00 ]
+Key: G_STACKSAVE: [ 0.00 0.00 ]
+Key: G_STEP_VECTOR: [ 0.00 0.00 ]
+Key: G_STORE: [ 0.00 0.00 ]
+Key: G_STRICT_FADD: [ 0.00 0.00 ]
+Key: G_STRICT_FDIV: [ 0.00 0.00 ]
+Key: G_STRICT_FLDEXP: [ 0.00 0.00 ]
+Key: G_STRICT_FMA: [ 0.00 0.00 ]
+Key: G_STRICT_FMUL: [ 0.00 0.00 ]
+Key: G_STRICT_FREM: [ 0.00 0.00 ]
+Key: G_STRICT_FSQRT: [ 0.00 0.00 ]
+Key: G_STRICT_FSUB: [ 0.00 0.00 ]
+Key: G_SUB: [ 0.00 0.00 ]
+Key: G_TRAP: [ 0.00 0.00 ]
+Key: G_TRUNC: [ 0.00 0.00 ]
+Key: G_TRUNC_SSAT_S: [ 0.00 0.00 ]
+Key: G_TRUNC_SSAT_U: [ 0.00 0.00 ]
+Key: G_TRUNC_USAT_U: [ 0.00 0.00 ]
+Key: G_UADDE: [ 0.00 0.00 ]
+Key: G_UADDO: [ 0.00 0.00 ]
+Key: G_UADDSAT: [ 0.00 0.00 ]
+Key: G_UBFX: [ 0.00 0.00 ]
+Key: G_UBSANTRAP: [ 0.00 0.00 ]
+Key: G_UCMP: [ 0.00 0.00 ]
+Key: G_UDIV: [ 0.00 0.00 ]
+Key: G_UDIVFIX: [ 0.00 0.00 ]
+Key: G_UDIVFIXSAT: [ 0.00 0.00 ]
+Key: G_UDIVREM: [ 0.00 0.00 ]
+Key: G_UITOFP: [ 0.00 0.00 ]
+Key: G_UMAX: [ 0.00 0.00 ]
+Key: G_UMIN: [ 0.00 0.00 ]
+Key: G_UMULFIX: [ 0.00 0.00 ]
+Key: G_UMULFIXSAT: [ 0.00 0.00 ]
+Key: G_UMULH: [ 0.00 0.00 ]
+Key: G_UMULO: [ 0.00 0.00 ]
+Key: G_UNMERGE_VALUES: [ 0.00 0.00 ]
+Key: G_UREM: [ 0.00 0.00 ]
+Key: G_USHLSAT: [ 0.00 0.00 ]
+Key: G_USUBE: [ 0.00 0.00 ]
+Key: G_USUBO: [ 0.00 0.00 ]
+Key: G_USUBSAT: [ 0.00 0.00 ]
+Key: G_VAARG: [ 0.00 0.00 ]
+Key: G_VASTART: [ 0.00 0.00 ]
+Key: G_VECREDUCE_ADD: [ 0.00 0.00 ]
+Key: G_VECREDUCE_AND: [ 0.00 0.00 ]
+Key: G_VECREDUCE_FADD: [ 0.00 0.00 ]
+Key: G_VECREDUCE_FMAX: [ 0.00 0.00 ]
+Key: G_VECREDUCE_FMAXIMUM: [ 0.00 0.00 ]
+Key: G_VECREDUCE_FMIN: [ 0.00 0.00 ]
+Key: G_VECREDUCE_FMINIMUM: [ 0.00 0.00 ]
+Key: G_VECREDUCE_FMUL: [ 0.00 0.00 ]
+Key: G_VECREDUCE_MUL: [ 0.00 0.00 ]
+Key: G_VECREDUCE_OR: [ 0.00 0.00 ]
+Key: G_VECREDUCE_SEQ_FADD: [ 0.00 0.00 ]
+Key: G_VECREDUCE_SEQ_FMUL: [ 0.00 0.00 ]
+Key: G_VECREDUCE_SMAX: [ 0.00 0.00 ]
+Key: G_VECREDUCE_SMIN: [ 0.00 0.00 ]
+Key: G_VECREDUCE_UMAX: [ 0.00 0.00 ]
+Key: G_VECREDUCE_UMIN: [ 0.00 0.00 ]
+Key: G_VECREDUCE_XOR: [ 0.00 0.00 ]
+Key: G_VECTOR_COMPRESS: [ 0.00 0.00 ]
+Key: G_VSCALE: [ 0.00 0.00 ]
+Key: G_WRITE_REGISTER: [ 0.00 0.00 ]
+Key: G_XOR: [ 0.00 0.00 ]
+Key: G_ZEXT: [ 0.00 0.00 ]
+Key: G_ZEXTLOAD: [ 0.00 0.00 ]
+Key: HADDPDrm: [ 0.00 0.00 ]
+Key: HADDPDrr: [ 0.00 0.00 ]
+Key: HADDPSrm: [ 0.00 0.00 ]
+Key: HADDPSrr: [ 0.00 0.00 ]
+Key: HLT: [ 0.00 0.00 ]
+Key: HRESET: [ 0.00 0.00 ]
+Key: HSUBPDrm: [ 0.00 0.00 ]
+Key: HSUBPDrr: [ 0.00 0.00 ]
+Key: HSUBPSrm: [ 0.00 0.00 ]
+Key: HSUBPSrr: [ 0.00 0.00 ]
+Key: ICALL_BRANCH_FUNNEL: [ 0.00 0.00 ]
+Key: IDIV: [ 0.00 0.00 ]
+Key: ILD_F: [ 0.00 0.00 ]
+Key: ILD_Fp: [ 0.00 0.00 ]
+Key: IMPLICIT_DEF: [ 0.00 0.00 ]
+Key: IMUL: [ 0.00 0.00 ]
+Key: IMULZU: [ 0.00 0.00 ]
+Key: IN: [ 0.00 0.00 ]
+Key: INC: [ 0.00 0.00 ]
+Key: INCSSPD: [ 0.00 0.00 ]
+Key: INCSSPQ: [ 0.00 0.00 ]
+Key: INDIRECT_THUNK_CALL: [ 0.00 0.00 ]
+Key: INDIRECT_THUNK_TCRETURN: [ 0.00 0.00 ]
+Key: INIT_UNDEF: [ 0.00 0.00 ]
+Key: INLINEASM: [ 0.00 0.00 ]
+Key: INLINEASM_BR: [ 0.00 0.00 ]
+Key: INSB: [ 0.00 0.00 ]
+Key: INSERTPSrmi: [ 0.00 0.00 ]
+Key: INSERTPSrri: [ 0.00 0.00 ]
+Key: INSERTQ: [ 0.00 0.00 ]
+Key: INSERTQI: [ 0.00 0.00 ]
+Key: INSERT_SUBREG: [ 0.00 0.00 ]
+Key: INSL: [ 0.00 0.00 ]
+Key: INSW: [ 0.00 0.00 ]
+Key: INT: [ 0.00 0.00 ]
+Key: INTO: [ 0.00 0.00 ]
+Key: INVD: [ 0.00 0.00 ]
+Key: INVEPT: [ 0.00 0.00 ]
+Key: INVLPG: [ 0.00 0.00 ]
+Key: INVLPGA: [ 0.00 0.00 ]
+Key: INVLPGB: [ 0.00 0.00 ]
+Key: INVPCID: [ 0.00 0.00 ]
+Key: INVVPID: [ 0.00 0.00 ]
+Key: IRET: [ 0.00 0.00 ]
+Key: ISTT_FP: [ 0.00 0.00 ]
+Key: ISTT_Fp: [ 0.00 0.00 ]
+Key: IST_F: [ 0.00 0.00 ]
+Key: IST_FP: [ 0.00 0.00 ]
+Key: IST_Fp: [ 0.00 0.00 ]
+Key: Int_eh_sjlj_setup_dispatch: [ 0.00 0.00 ]
+Key: JCC: [ 0.00 0.00 ]
+Key: JCXZ: [ 0.00 0.00 ]
+Key: JECXZ: [ 0.00 0.00 ]
+Key: JMP: [ 0.00 0.00 ]
+Key: JMPABS: [ 0.00 0.00 ]
+Key: JRCXZ: [ 0.00 0.00 ]
+Key: JUMP_TABLE_DEBUG_INFO: [ 0.00 0.00 ]
+Key: KADDBkk: [ 0.00 0.00 ]
+Key: KADDDkk: [ 0.00 0.00 ]
+Key: KADDQkk: [ 0.00 0.00 ]
+Key: KADDWkk: [ 0.00 0.00 ]
+Key: KANDBkk: [ 0.00 0.00 ]
+Key: KANDDkk: [ 0.00 0.00 ]
+Key: KANDNBkk: [ 0.00 0.00 ]
+Key: KANDNDkk: [ 0.00 0.00 ]
+Key: KANDNQkk: [ 0.00 0.00 ]
+Key: KANDNWkk: [ 0.00 0.00 ]
+Key: KANDQkk: [ 0.00 0.00 ]
+Key: KANDWkk: [ 0.00 0.00 ]
+Key: KCFI_CHECK: [ 0.00 0.00 ]
+Key: KILL: [ 0.00 0.00 ]
+Key: KMOVBkk: [ 0.00 0.00 ]
+Key: KMOVBkk_EVEX: [ 0.00 0.00 ]
+Key: KMOVBkm: [ 0.00 0.00 ]
+Key: KMOVBkm_EVEX: [ 0.00 0.00 ]
+Key: KMOVBkr: [ 0.00 0.00 ]
+Key: KMOVBkr_EVEX: [ 0.00 0.00 ]
+Key: KMOVBmk: [ 0.00 0.00 ]
+Key: KMOVBmk_EVEX: [ 0.00 0.00 ]
+Key: KMOVBrk: [ 0.00 0.00 ]
+Key: KMOVBrk_EVEX: [ 0.00 0.00 ]
+Key: KMOVDkk: [ 0.00 0.00 ]
+Key: KMOVDkk_EVEX: [ 0.00 0.00 ]
+Key: KMOVDkm: [ 0.00 0.00 ]
+Key: KMOVDkm_EVEX: [ 0.00 0.00 ]
+Key: KMOVDkr: [ 0.00 0.00 ]
+Key: KMOVDkr_EVEX: [ 0.00 0.00 ]
+Key: KMOVDmk: [ 0.00 0.00 ]
+Key: KMOVDmk_EVEX: [ 0.00 0.00 ]
+Key: KMOVDrk: [ 0.00 0.00 ]
+Key: KMOVDrk_EVEX: [ 0.00 0.00 ]
+Key: KMOVQkk: [ 0.00 0.00 ]
+Key: KMOVQkk_EVEX: [ 0.00 0.00 ]
+Key: KMOVQkm: [ 0.00 0.00 ]
+Key: KMOVQkm_EVEX: [ 0.00 0.00 ]
+Key: KMOVQkr: [ 0.00 0.00 ]
+Key: KMOVQkr_EVEX: [ 0.00 0.00 ]
+Key: KMOVQmk: [ 0.00 0.00 ]
+Key: KMOVQmk_EVEX: [ 0.00 0.00 ]
+Key: KMOVQrk: [ 0.00 0.00 ]
+Key: KMOVQrk_EVEX: [ 0.00 0.00 ]
+Key: KMOVWkk: [ 0.00 0.00 ]
+Key: KMOVWkk_EVEX: [ 0.00 0.00 ]
+Key: KMOVWkm: [ 0.00 0.00 ]
+Key: KMOVWkm_EVEX: [ 0.00 0.00 ]
+Key: KMOVWkr: [ 0.00 0.00 ]
+Key: KMOVWkr_EVEX: [ 0.00 0.00 ]
+Key: KMOVWmk: [ 0.00 0.00 ]
+Key: KMOVWmk_EVEX: [ 0.00 0.00 ]
+Key: KMOVWrk: [ 0.00 0.00 ]
+Key: KMOVWrk_EVEX: [ 0.00 0.00 ]
+Key: KNOTBkk: [ 0.00 0.00 ]
+Key: KNOTDkk: [ 0.00 0.00 ]
+Key: KNOTQkk: [ 0.00 0.00 ]
+Key: KNOTWkk: [ 0.00 0.00 ]
+Key: KORBkk: [ 0.00 0.00 ]
+Key: KORDkk: [ 0.00 0.00 ]
+Key: KORQkk: [ 0.00 0.00 ]
+Key: KORTESTBkk: [ 0.00 0.00 ]
+Key: KORTESTDkk: [ 0.00 0.00 ]
+Key: KORTESTQkk: [ 0.00 0.00 ]
+Key: KORTESTWkk: [ 0.00 0.00 ]
+Key: KORWkk: [ 0.00 0.00 ]
+Key: KSET: [ 0.00 0.00 ]
+Key: KSHIFTLBki: [ 0.00 0.00 ]
+Key: KSHIFTLDki: [ 0.00 0.00 ]
+Key: KSHIFTLQki: [ 0.00 0.00 ]
+Key: KSHIFTLWki: [ 0.00 0.00 ]
+Key: KSHIFTRBki: [ 0.00 0.00 ]
+Key: KSHIFTRDki: [ 0.00 0.00 ]
+Key: KSHIFTRQki: [ 0.00 0.00 ]
+Key: KSHIFTRWki: [ 0.00 0.00 ]
+Key: KTESTBkk: [ 0.00 0.00 ]
+Key: KTESTDkk: [ 0.00 0.00 ]
+Key: KTESTQkk: [ 0.00 0.00 ]
+Key: KTESTWkk: [ 0.00 0.00 ]
+Key: KUNPCKBWkk: [ 0.00 0.00 ]
+Key: KUNPCKDQkk: [ 0.00 0.00 ]
+Key: KUNPCKWDkk: [ 0.00 0.00 ]
+Key: KXNORBkk: [ 0.00 0.00 ]
+Key: KXNORDkk: [ 0.00 0.00 ]
+Key: KXNORQkk: [ 0.00 0.00 ]
+Key: KXNORWkk: [ 0.00 0.00 ]
+Key: KXORBkk: [ 0.00 0.00 ]
+Key: KXORDkk: [ 0.00 0.00 ]
+Key: KXORQkk: [ 0.00 0.00 ]
+Key: KXORWkk: [ 0.00 0.00 ]
+Key: LAHF: [ 0.00 0.00 ]
+Key: LAR: [ 0.00 0.00 ]
+Key: LCMPXCHG: [ 0.00 0.00 ]
+Key: LDDQUrm: [ 0.00 0.00 ]
+Key: LDMXCSR: [ 0.00 0.00 ]
+Key: LDS: [ 0.00 0.00 ]
+Key: LDTILECFG: [ 0.00 0.00 ]
+Key: LDTILECFG_EVEX: [ 0.00 0.00 ]
+Key: LD_F: [ 0.00 0.00 ]
+Key: LD_Fp: [ 0.00 0.00 ]
+Key: LD_Frr: [ 0.00 0.00 ]
+Key: LEA: [ 0.00 0.00 ]
+Key: LEAVE: [ 0.00 0.00 ]
+Key: LES: [ 0.00 0.00 ]
+Key: LFENCE: [ 0.00 0.00 ]
+Key: LFS: [ 0.00 0.00 ]
+Key: LGDT: [ 0.00 0.00 ]
+Key: LGS: [ 0.00 0.00 ]
+Key: LIDT: [ 0.00 0.00 ]
+Key: LIFETIME_END: [ 0.00 0.00 ]
+Key: LIFETIME_START: [ 0.00 0.00 ]
+Key: LKGS: [ 0.00 0.00 ]
+Key: LLDT: [ 0.00 0.00 ]
+Key: LLWPCB: [ 0.00 0.00 ]
+Key: LMSW: [ 0.00 0.00 ]
+Key: LOADIWKEY: [ 0.00 0.00 ]
+Key: LOAD_STACK_GUARD: [ 0.00 0.00 ]
+Key: LOCAL_ESCAPE: [ 0.00 0.00 ]
+Key: LOCK_ADD: [ 0.00 0.00 ]
+Key: LOCK_AND: [ 0.00 0.00 ]
+Key: LOCK_BTC: [ 0.00 0.00 ]
+Key: LOCK_BTC_RM: [ 0.00 0.00 ]
+Key: LOCK_BTR: [ 0.00 0.00 ]
+Key: LOCK_BTR_RM: [ 0.00 0.00 ]
+Key: LOCK_BTS: [ 0.00 0.00 ]
+Key: LOCK_BTS_RM: [ 0.00 0.00 ]
+Key: LOCK_DEC: [ 0.00 0.00 ]
+Key: LOCK_INC: [ 0.00 0.00 ]
+Key: LOCK_OR: [ 0.00 0.00 ]
+Key: LOCK_PREFIX: [ 0.00 0.00 ]
+Key: LOCK_SUB: [ 0.00 0.00 ]
+Key: LOCK_XOR: [ 0.00 0.00 ]
+Key: LODSB: [ 0.00 0.00 ]
+Key: LODSL: [ 0.00 0.00 ]
+Key: LODSQ: [ 0.00 0.00 ]
+Key: LODSW: [ 0.00 0.00 ]
+Key: LOOP: [ 0.00 0.00 ]
+Key: LOOPE: [ 0.00 0.00 ]
+Key: LOOPNE: [ 0.00 0.00 ]
+Key: LRET: [ 0.00 0.00 ]
+Key: LRETI: [ 0.00 0.00 ]
+Key: LSL: [ 0.00 0.00 ]
+Key: LSS: [ 0.00 0.00 ]
+Key: LTRm: [ 0.00 0.00 ]
+Key: LTRr: [ 0.00 0.00 ]
+Key: LWPINS: [ 0.00 0.00 ]
+Key: LWPVAL: [ 0.00 0.00 ]
+Key: LXADD: [ 0.00 0.00 ]
+Key: LZCNT: [ 0.00 0.00 ]
+Key: MASKMOVDQU: [ 0.00 0.00 ]
+Key: MASKPAIR: [ 0.00 0.00 ]
+Key: MAXCPDrm: [ 0.00 0.00 ]
+Key: MAXCPDrr: [ 0.00 0.00 ]
+Key: MAXCPSrm: [ 0.00 0.00 ]
+Key: MAXCPSrr: [ 0.00 0.00 ]
+Key: MAXCSDrm: [ 0.00 0.00 ]
+Key: MAXCSDrr: [ 0.00 0.00 ]
+Key: MAXCSSrm: [ 0.00 0.00 ]
+Key: MAXCSSrr: [ 0.00 0.00 ]
+Key: MAXPDrm: [ 0.00 0.00 ]
+Key: MAXPDrr: [ 0.00 0.00 ]
+Key: MAXPSrm: [ 0.00 0.00 ]
+Key: MAXPSrr: [ 0.00 0.00 ]
+Key: MAXSDrm: [ 0.00 0.00 ]
+Key: MAXSDrm_Int: [ 0.00 0.00 ]
+Key: MAXSDrr: [ 0.00 0.00 ]
+Key: MAXSDrr_Int: [ 0.00 0.00 ]
+Key: MAXSSrm: [ 0.00 0.00 ]
+Key: MAXSSrm_Int: [ 0.00 0.00 ]
+Key: MAXSSrr: [ 0.00 0.00 ]
+Key: MAXSSrr_Int: [ 0.00 0.00 ]
+Key: MEMBARRIER: [ 0.00 0.00 ]
+Key: MFENCE: [ 0.00 0.00 ]
+Key: MINCPDrm: [ 0.00 0.00 ]
+Key: MINCPDrr: [ 0.00 0.00 ]
+Key: MINCPSrm: [ 0.00 0.00 ]
+Key: MINCPSrr: [ 0.00 0.00 ]
+Key: MINCSDrm: [ 0.00 0.00 ]
+Key: MINCSDrr: [ 0.00 0.00 ]
+Key: MINCSSrm: [ 0.00 0.00 ]
+Key: MINCSSrr: [ 0.00 0.00 ]
+Key: MINPDrm: [ 0.00 0.00 ]
+Key: MINPDrr: [ 0.00 0.00 ]
+Key: MINPSrm: [ 0.00 0.00 ]
+Key: MINPSrr: [ 0.00 0.00 ]
+Key: MINSDrm: [ 0.00 0.00 ]
+Key: MINSDrm_Int: [ 0.00 0.00 ]
+Key: MINSDrr: [ 0.00 0.00 ]
+Key: MINSDrr_Int: [ 0.00 0.00 ]
+Key: MINSSrm: [ 0.00 0.00 ]
+Key: MINSSrm_Int: [ 0.00 0.00 ]
+Key: MINSSrr: [ 0.00 0.00 ]
+Key: MINSSrr_Int: [ 0.00 0.00 ]
+Key: MMX_CVTPD: [ 0.00 0.00 ]
+Key: MMX_CVTPI: [ 0.00 0.00 ]
+Key: MMX_CVTPS: [ 0.00 0.00 ]
+Key: MMX_CVTTPD: [ 0.00 0.00 ]
+Key: MMX_CVTTPS: [ 0.00 0.00 ]
+Key: MMX_EMMS: [ 0.00 0.00 ]
+Key: MMX_MASKMOVQ: [ 0.00 0.00 ]
+Key: MMX_MOVD: [ 0.00 0.00 ]
+Key: MMX_MOVDQ: [ 0.00 0.00 ]
+Key: MMX_MOVFR: [ 0.00 0.00 ]
+Key: MMX_MOVNTQmr: [ 0.00 0.00 ]
+Key: MMX_MOVQ: [ 0.00 0.00 ]
+Key: MMX_PABSBrm: [ 0.00 0.00 ]
+Key: MMX_PABSBrr: [ 0.00 0.00 ]
+Key: MMX_PABSDrm: [ 0.00 0.00 ]
+Key: MMX_PABSDrr: [ 0.00 0.00 ]
+Key: MMX_PABSWrm: [ 0.00 0.00 ]
+Key: MMX_PABSWrr: [ 0.00 0.00 ]
+Key: MMX_PACKSSDWrm: [ 0.00 0.00 ]
+Key: MMX_PACKSSDWrr: [ 0.00 0.00 ]
+Key: MMX_PACKSSWBrm: [ 0.00 0.00 ]
+Key: MMX_PACKSSWBrr: [ 0.00 0.00 ]
+Key: MMX_PACKUSWBrm: [ 0.00 0.00 ]
+Key: MMX_PACKUSWBrr: [ 0.00 0.00 ]
+Key: MMX_PADDBrm: [ 0.00 0.00 ]
+Key: MMX_PADDBrr: [ 0.00 0.00 ]
+Key: MMX_PADDDrm: [ 0.00 0.00 ]
+Key: MMX_PADDDrr: [ 0.00 0.00 ]
+Key: MMX_PADDQrm: [ 0.00 0.00 ]
+Key: MMX_PADDQrr: [ 0.00 0.00 ]
+Key: MMX_PADDSBrm: [ 0.00 0.00 ]
+Key: MMX_PADDSBrr: [ 0.00 0.00 ]
+Key: MMX_PADDSWrm: [ 0.00 0.00 ]
+Key: MMX_PADDSWrr: [ 0.00 0.00 ]
+Key: MMX_PADDUSBrm: [ 0.00 0.00 ]
+Key: MMX_PADDUSBrr: [ 0.00 0.00 ]
+Key: MMX_PADDUSWrm: [ 0.00 0.00 ]
+Key: MMX_PADDUSWrr: [ 0.00 0.00 ]
+Key: MMX_PADDWrm: [ 0.00 0.00 ]
+Key: MMX_PADDWrr: [ 0.00 0.00 ]
+Key: MMX_PALIGNRrmi: [ 0.00 0.00 ]
+Key: MMX_PALIGNRrri: [ 0.00 0.00 ]
+Key: MMX_PANDNrm: [ 0.00 0.00 ]
+Key: MMX_PANDNrr: [ 0.00 0.00 ]
+Key: MMX_PANDrm: [ 0.00 0.00 ]
+Key: MMX_PANDrr: [ 0.00 0.00 ]
+Key: MMX_PAVGBrm: [ 0.00 0.00 ]
+Key: MMX_PAVGBrr: [ 0.00 0.00 ]
+Key: MMX_PAVGWrm: [ 0.00 0.00 ]
+Key: MMX_PAVGWrr: [ 0.00 0.00 ]
+Key: MMX_PCMPEQBrm: [ 0.00 0.00 ]
+Key: MMX_PCMPEQBrr: [ 0.00 0.00 ]
+Key: MMX_PCMPEQDrm: [ 0.00 0.00 ]
+Key: MMX_PCMPEQDrr: [ 0.00 0.00 ]
+Key: MMX_PCMPEQWrm: [ 0.00 0.00 ]
+Key: MMX_PCMPEQWrr: [ 0.00 0.00 ]
+Key: MMX_PCMPGTBrm: [ 0.00 0.00 ]
+Key: MMX_PCMPGTBrr: [ 0.00 0.00 ]
+Key: MMX_PCMPGTDrm: [ 0.00 0.00 ]
+Key: MMX_PCMPGTDrr: [ 0.00 0.00 ]
+Key: MMX_PCMPGTWrm: [ 0.00 0.00 ]
+Key: MMX_PCMPGTWrr: [ 0.00 0.00 ]
+Key: MMX_PEXTRWrri: [ 0.00 0.00 ]
+Key: MMX_PHADDDrm: [ 0.00 0.00 ]
+Key: MMX_PHADDDrr: [ 0.00 0.00 ]
+Key: MMX_PHADDSWrm: [ 0.00 0.00 ]
+Key: MMX_PHADDSWrr: [ 0.00 0.00 ]
+Key: MMX_PHADDWrm: [ 0.00 0.00 ]
+Key: MMX_PHADDWrr: [ 0.00 0.00 ]
+Key: MMX_PHSUBDrm: [ 0.00 0.00 ]
+Key: MMX_PHSUBDrr: [ 0.00 0.00 ]
+Key: MMX_PHSUBSWrm: [ 0.00 0.00 ]
+Key: MMX_PHSUBSWrr: [ 0.00 0.00 ]
+Key: MMX_PHSUBWrm: [ 0.00 0.00 ]
+Key: MMX_PHSUBWrr: [ 0.00 0.00 ]
+Key: MMX_PINSRWrmi: [ 0.00 0.00 ]
+Key: MMX_PINSRWrri: [ 0.00 0.00 ]
+Key: MMX_PMADDUBSWrm: [ 0.00 0.00 ]
+Key: MMX_PMADDUBSWrr: [ 0.00 0.00 ]
+Key: MMX_PMADDWDrm: [ 0.00 0.00 ]
+Key: MMX_PMADDWDrr: [ 0.00 0.00 ]
+Key: MMX_PMAXSWrm: [ 0.00 0.00 ]
+Key: MMX_PMAXSWrr: [ 0.00 0.00 ]
+Key: MMX_PMAXUBrm: [ 0.00 0.00 ]
+Key: MMX_PMAXUBrr: [ 0.00 0.00 ]
+Key: MMX_PMINSWrm: [ 0.00 0.00 ]
+Key: MMX_PMINSWrr: [ 0.00 0.00 ]
+Key: MMX_PMINUBrm: [ 0.00 0.00 ]
+Key: MMX_PMINUBrr: [ 0.00 0.00 ]
+Key: MMX_PMOVMSKBrr: [ 0.00 0.00 ]
+Key: MMX_PMULHRSWrm: [ 0.00 0.00 ]
+Key: MMX_PMULHRSWrr: [ 0.00 0.00 ]
+Key: MMX_PMULHUWrm: [ 0.00 0.00 ]
+Key: MMX_PMULHUWrr: [ 0.00 0.00 ]
+Key: MMX_PMULHWrm: [ 0.00 0.00 ]
+Key: MMX_PMULHWrr: [ 0.00 0.00 ]
+Key: MMX_PMULLWrm: [ 0.00 0.00 ]
+Key: MMX_PMULLWrr: [ 0.00 0.00 ]
+Key: MMX_PMULUDQrm: [ 0.00 0.00 ]
+Key: MMX_PMULUDQrr: [ 0.00 0.00 ]
+Key: MMX_PORrm: [ 0.00 0.00 ]
+Key: MMX_PORrr: [ 0.00 0.00 ]
+Key: MMX_PSADBWrm: [ 0.00 0.00 ]
+Key: MMX_PSADBWrr: [ 0.00 0.00 ]
+Key: MMX_PSHUFBrm: [ 0.00 0.00 ]
+Key: MMX_PSHUFBrr: [ 0.00 0.00 ]
+Key: MMX_PSHUFWmi: [ 0.00 0.00 ]
+Key: MMX_PSHUFWri: [ 0.00 0.00 ]
+Key: MMX_PSIGNBrm: [ 0.00 0.00 ]
+Key: MMX_PSIGNBrr: [ 0.00 0.00 ]
+Key: MMX_PSIGNDrm: [ 0.00 0.00 ]
+Key: MMX_PSIGNDrr: [ 0.00 0.00 ]
+Key: MMX_PSIGNWrm: [ 0.00 0.00 ]
+Key: MMX_PSIGNWrr: [ 0.00 0.00 ]
+Key: MMX_PSLLDri: [ 0.00 0.00 ]
+Key: MMX_PSLLDrm: [ 0.00 0.00 ]
+Key: MMX_PSLLDrr: [ 0.00 0.00 ]
+Key: MMX_PSLLQri: [ 0.00 0.00 ]
+Key: MMX_PSLLQrm: [ 0.00 0.00 ]
+Key: MMX_PSLLQrr: [ 0.00 0.00 ]
+Key: MMX_PSLLWri: [ 0.00 0.00 ]
+Key: MMX_PSLLWrm: [ 0.00 0.00 ]
+Key: MMX_PSLLWrr: [ 0.00 0.00 ]
+Key: MMX_PSRADri: [ 0.00 0.00 ]
+Key: MMX_PSRADrm: [ 0.00 0.00 ]
+Key: MMX_PSRADrr: [ 0.00 0.00 ]
+Key: MMX_PSRAWri: [ 0.00 0.00 ]
+Key: MMX_PSRAWrm: [ 0.00 0.00 ]
+Key: MMX_PSRAWrr: [ 0.00 0.00 ]
+Key: MMX_PSRLDri: [ 0.00 0.00 ]
+Key: MMX_PSRLDrm: [ 0.00 0.00 ]
+Key: MMX_PSRLDrr: [ 0.00 0.00 ]
+Key: MMX_PSRLQri: [ 0.00 0.00 ]
+Key: MMX_PSRLQrm: [ 0.00 0.00 ]
+Key: MMX_PSRLQrr: [ 0.00 0.00 ]
+Key: MMX_PSRLWri: [ 0.00 0.00 ]
+Key: MMX_PSRLWrm: [ 0.00 0.00 ]
+Key: MMX_PSRLWrr: [ 0.00 0.00 ]
+Key: MMX_PSUBBrm: [ 0.00 0.00 ]
+Key: MMX_PSUBBrr: [ 0.00 0.00 ]
+Key: MMX_PSUBDrm: [ 0.00 0.00 ]
+Key: MMX_PSUBDrr: [ 0.00 0.00 ]
+Key: MMX_PSUBQrm: [ 0.00 0.00 ]
+Key: MMX_PSUBQrr: [ 0.00 0.00 ]
+Key: MMX_PSUBSBrm: [ 0.00 0.00 ]
+Key: MMX_PSUBSBrr: [ 0.00 0.00 ]
+Key: MMX_PSUBSWrm: [ 0.00 0.00 ]
+Key: MMX_PSUBSWrr: [ 0.00 0.00 ]
+Key: MMX_PSUBUSBrm: [ 0.00 0.00 ]
+Key: MMX_PSUBUSBrr: [ 0.00 0.00 ]
+Key: MMX_PSUBUSWrm: [ 0.00 0.00 ]
+Key: MMX_PSUBUSWrr: [ 0.00 0.00 ]
+Key: MMX_PSUBWrm: [ 0.00 0.00 ]
+Key: MMX_PSUBWrr: [ 0.00 0.00 ]
+Key: MMX_PUNPCKHBWrm: [ 0.00 0.00 ]
+Key: MMX_PUNPCKHBWrr: [ 0.00 0.00 ]
+Key: MMX_PUNPCKHDQrm: [ 0.00 0.00 ]
+Key: MMX_PUNPCKHDQrr: [ 0.00 0.00 ]
+Key: MMX_PUNPCKHWDrm: [ 0.00 0.00 ]
+Key: MMX_PUNPCKHWDrr: [ 0.00 0.00 ]
+Key: MMX_PUNPCKLBWrm: [ 0.00 0.00 ]
+Key: MMX_PUNPCKLBWrr: [ 0.00 0.00 ]
+Key: MMX_PUNPCKLDQrm: [ 0.00 0.00 ]
+Key: MMX_PUNPCKLDQrr: [ 0.00 0.00 ]
+Key: MMX_PUNPCKLWDrm: [ 0.00 0.00 ]
+Key: MMX_PUNPCKLWDrr: [ 0.00 0.00 ]
+Key: MMX_PXORrm: [ 0.00 0.00 ]
+Key: MMX_PXORrr: [ 0.00 0.00 ]
+Key: MMX_SET: [ 0.00 0.00 ]
+Key: MONITOR: [ 0.00 0.00 ]
+Key: MONITORX: [ 0.00 0.00 ]
+Key: MONTMUL: [ 0.00 0.00 ]
+Key: MORESTACK_RET: [ 0.00 0.00 ]
+Key: MORESTACK_RET_RESTORE_R: [ 0.00 0.00 ]
+Key: MOV: [ 0.00 0.00 ]
+Key: MOVAPDmr: [ 0.00 0.00 ]
+Key: MOVAPDrm: [ 0.00 0.00 ]
+Key: MOVAPDrr: [ 0.00 0.00 ]
+Key: MOVAPDrr_REV: [ 0.00 0.00 ]
+Key: MOVAPSmr: [ 0.00 0.00 ]
+Key: MOVAPSrm: [ 0.00 0.00 ]
+Key: MOVAPSrr: [ 0.00 0.00 ]
+Key: MOVAPSrr_REV: [ 0.00 0.00 ]
+Key: MOVBE: [ 0.00 0.00 ]
+Key: MOVDDUPrm: [ 0.00 0.00 ]
+Key: MOVDDUPrr: [ 0.00 0.00 ]
+Key: MOVDI: [ 0.00 0.00 ]
+Key: MOVDIR: [ 0.00 0.00 ]
+Key: MOVDIRI: [ 0.00 0.00 ]
+Key: MOVDQAmr: [ 0.00 0.00 ]
+Key: MOVDQArm: [ 0.00 0.00 ]
+Key: MOVDQArr: [ 0.00 0.00 ]
+Key: MOVDQArr_REV: [ 0.00 0.00 ]
+Key: MOVDQUmr: [ 0.00 0.00 ]
+Key: MOVDQUrm: [ 0.00 0.00 ]
+Key: MOVDQUrr: [ 0.00 0.00 ]
+Key: MOVDQUrr_REV: [ 0.00 0.00 ]
+Key: MOVHLPSrr: [ 0.00 0.00 ]
+Key: MOVHPDmr: [ 0.00 0.00 ]
+Key: MOVHPDrm: [ 0.00 0.00 ]
+Key: MOVHPSmr: [ 0.00 0.00 ]
+Key: MOVHPSrm: [ 0.00 0.00 ]
+Key: MOVLHPSrr: [ 0.00 0.00 ]
+Key: MOVLPDmr: [ 0.00 0.00 ]
+Key: MOVLPDrm: [ 0.00 0.00 ]
+Key: MOVLPSmr: [ 0.00 0.00 ]
+Key: MOVLPSrm: [ 0.00 0.00 ]
+Key: MOVMSKPDrr: [ 0.00 0.00 ]
+Key: MOVMSKPSrr: [ 0.00 0.00 ]
+Key: MOVNTDQArm: [ 0.00 0.00 ]
+Key: MOVNTDQmr: [ 0.00 0.00 ]
+Key: MOVNTI: [ 0.00 0.00 ]
+Key: MOVNTImr: [ 0.00 0.00 ]
+Key: MOVNTPDmr: [ 0.00 0.00 ]
+Key: MOVNTPSmr: [ 0.00 0.00 ]
+Key: MOVNTSD: [ 0.00 0.00 ]
+Key: MOVNTSS: [ 0.00 0.00 ]
+Key: MOVPC: [ 0.00 0.00 ]
+Key: MOVPDI: [ 0.00 0.00 ]
+Key: MOVPQI: [ 0.00 0.00 ]
+Key: MOVPQIto: [ 0.00 0.00 ]
+Key: MOVQI: [ 0.00 0.00 ]
+Key: MOVRS: [ 0.00 0.00 ]
+Key: MOVSB: [ 0.00 0.00 ]
+Key: MOVSDmr: [ 0.00 0.00 ]
+Key: MOVSDrm: [ 0.00 0.00 ]
+Key: MOVSDrm_alt: [ 0.00 0.00 ]
+Key: MOVSDrr: [ 0.00 0.00 ]
+Key: MOVSDrr_REV: [ 0.00 0.00 ]
+Key: MOVSDto: [ 0.00 0.00 ]
+Key: MOVSHDUPrm: [ 0.00 0.00 ]
+Key: MOVSHDUPrr: [ 0.00 0.00 ]
+Key: MOVSHPmr: [ 0.00 0.00 ]
+Key: MOVSHPrm: [ 0.00 0.00 ]
+Key: MOVSL: [ 0.00 0.00 ]
+Key: MOVSLDUPrm: [ 0.00 0.00 ]
+Key: MOVSLDUPrr: [ 0.00 0.00 ]
+Key: MOVSQ: [ 0.00 0.00 ]
+Key: MOVSS: [ 0.00 0.00 ]
+Key: MOVSSmr: [ 0.00 0.00 ]
+Key: MOVSSrm: [ 0.00 0.00 ]
+Key: MOVSSrm_alt: [ 0.00 0.00 ]
+Key: MOVSSrr: [ 0.00 0.00 ]
+Key: MOVSSrr_REV: [ 0.00 0.00 ]
+Key: MOVSW: [ 0.00 0.00 ]
+Key: MOVSX: [ 0.00 0.00 ]
+Key: MOVUPDmr: [ 0.00 0.00 ]
+Key: MOVUPDrm: [ 0.00 0.00 ]
+Key: MOVUPDrr: [ 0.00 0.00 ]
+Key: MOVUPDrr_REV: [ 0.00 0.00 ]
+Key: MOVUPSmr: [ 0.00 0.00 ]
+Key: MOVUPSrm: [ 0.00 0.00 ]
+Key: MOVUPSrr: [ 0.00 0.00 ]
+Key: MOVUPSrr_REV: [ 0.00 0.00 ]
+Key: MOVZPQILo: [ 0.00 0.00 ]
+Key: MOVZX: [ 0.00 0.00 ]
+Key: MPSADBWrmi: [ 0.00 0.00 ]
+Key: MPSADBWrri: [ 0.00 0.00 ]
+Key: MUL: [ 0.00 0.00 ]
+Key: MULPDrm: [ 0.00 0.00 ]
+Key: MULPDrr: [ 0.00 0.00 ]
+Key: MULPSrm: [ 0.00 0.00 ]
+Key: MULPSrr: [ 0.00 0.00 ]
+Key: MULSDrm: [ 0.00 0.00 ]
+Key: MULSDrm_Int: [ 0.00 0.00 ]
+Key: MULSDrr: [ 0.00 0.00 ]
+Key: MULSDrr_Int: [ 0.00 0.00 ]
+Key: MULSSrm: [ 0.00 0.00 ]
+Key: MULSSrm_Int: [ 0.00 0.00 ]
+Key: MULSSrr: [ 0.00 0.00 ]
+Key: MULSSrr_Int: [ 0.00 0.00 ]
+Key: MULX: [ 0.00 0.00 ]
+Key: MUL_F: [ 0.00 0.00 ]
+Key: MUL_FI: [ 0.00 0.00 ]
+Key: MUL_FPrST: [ 0.00 0.00 ]
+Key: MUL_FST: [ 0.00 0.00 ]
+Key: MUL_Fp: [ 0.00 0.00 ]
+Key: MUL_FpI: [ 0.00 0.00 ]
+Key: MUL_FrST: [ 0.00 0.00 ]
+Key: MWAITX: [ 0.00 0.00 ]
+Key: MWAITX_SAVE_RBX: [ 0.00 0.00 ]
+Key: MWAITXrrr: [ 0.00 0.00 ]
+Key: MWAITrr: [ 0.00 0.00 ]
+Key: NEG: [ 0.00 0.00 ]
+Key: NOOP: [ 0.00 0.00 ]
+Key: NOOPL: [ 0.00 0.00 ]
+Key: NOOPLr: [ 0.00 0.00 ]
+Key: NOOPQ: [ 0.00 0.00 ]
+Key: NOOPQr: [ 0.00 0.00 ]
+Key: NOOPW: [ 0.00 0.00 ]
+Key: NOOPWr: [ 0.00 0.00 ]
+Key: NOT: [ 0.00 0.00 ]
+Key: OR: [ 0.00 0.00 ]
+Key: ORPDrm: [ 0.00 0.00 ]
+Key: ORPDrr: [ 0.00 0.00 ]
+Key: ORPSrm: [ 0.00 0.00 ]
+Key: ORPSrr: [ 0.00 0.00 ]
+Key: OUT: [ 0.00 0.00 ]
+Key: OUTSB: [ 0.00 0.00 ]
+Key: OUTSL: [ 0.00 0.00 ]
+Key: OUTSW: [ 0.00 0.00 ]
+Key: PABSBrm: [ 0.00 0.00 ]
+Key: PABSBrr: [ 0.00 0.00 ]
+Key: PABSDrm: [ 0.00 0.00 ]
+Key: PABSDrr: [ 0.00 0.00 ]
+Key: PABSWrm: [ 0.00 0.00 ]
+Key: PABSWrr: [ 0.00 0.00 ]
+Key: PACKSSDWrm: [ 0.00 0.00 ]
+Key: PACKSSDWrr: [ 0.00 0.00 ]
+Key: PACKSSWBrm: [ 0.00 0.00 ]
+Key: PACKSSWBrr: [ 0.00 0.00 ]
+Key: PACKUSDWrm: [ 0.00 0.00 ]
+Key: PACKUSDWrr: [ 0.00 0.00 ]
+Key: PACKUSWBrm: [ 0.00 0.00 ]
+Key: PACKUSWBrr: [ 0.00 0.00 ]
+Key: PADDBrm: [ 0.00 0.00 ]
+Key: PADDBrr: [ 0.00 0.00 ]
+Key: PADDDrm: [ 0.00 0.00 ]
+Key: PADDDrr: [ 0.00 0.00 ]
+Key: PADDQrm: [ 0.00 0.00 ]
+Key: PADDQrr: [ 0.00 0.00 ]
+Key: PADDSBrm: [ 0.00 0.00 ]
+Key: PADDSBrr: [ 0.00 0.00 ]
+Key: PADDSWrm: [ 0.00 0.00 ]
+Key: PADDSWrr: [ 0.00 0.00 ]
+Key: PADDUSBrm: [ 0.00 0.00 ]
+Key: PADDUSBrr: [ 0.00 0.00 ]
+Key: PADDUSWrm: [ 0.00 0.00 ]
+Key: PADDUSWrr: [ 0.00 0.00 ]
+Key: PADDWrm: [ 0.00 0.00 ]
+Key: PADDWrr: [ 0.00 0.00 ]
+Key: PALIGNRrmi: [ 0.00 0.00 ]
+Key: PALIGNRrri: [ 0.00 0.00 ]
+Key: PANDNrm: [ 0.00 0.00 ]
+Key: PANDNrr: [ 0.00 0.00 ]
+Key: PANDrm: [ 0.00 0.00 ]
+Key: PANDrr: [ 0.00 0.00 ]
+Key: PATCHABLE_EVENT_CALL: [ 0.00 0.00 ]
+Key: PATCHABLE_FUNCTION_ENTER: [ 0.00 0.00 ]
+Key: PATCHABLE_FUNCTION_EXIT: [ 0.00 0.00 ]
+Key: PATCHABLE_OP: [ 0.00 0.00 ]
+Key: PATCHABLE_RET: [ 0.00 0.00 ]
+Key: PATCHABLE_TAIL_CALL: [ 0.00 0.00 ]
+Key: PATCHABLE_TYPED_EVENT_CALL: [ 0.00 0.00 ]
+Key: PATCHPOINT: [ 0.00 0.00 ]
+Key: PAUSE: [ 0.00 0.00 ]
+Key: PAVGBrm: [ 0.00 0.00 ]
+Key: PAVGBrr: [ 0.00 0.00 ]
+Key: PAVGUSBrm: [ 0.00 0.00 ]
+Key: PAVGUSBrr: [ 0.00 0.00 ]
+Key: PAVGWrm: [ 0.00 0.00 ]
+Key: PAVGWrr: [ 0.00 0.00 ]
+Key: PBLENDVBrm: [ 0.00 0.00 ]
+Key: PBLENDVBrr: [ 0.00 0.00 ]
+Key: PBLENDWrmi: [ 0.00 0.00 ]
+Key: PBLENDWrri: [ 0.00 0.00 ]
+Key: PBNDKB: [ 0.00 0.00 ]
+Key: PCLMULQDQrmi: [ 0.00 0.00 ]
+Key: PCLMULQDQrri: [ 0.00 0.00 ]
+Key: PCMPEQBrm: [ 0.00 0.00 ]
+Key: PCMPEQBrr: [ 0.00 0.00 ]
+Key: PCMPEQDrm: [ 0.00 0.00 ]
+Key: PCMPEQDrr: [ 0.00 0.00 ]
+Key: PCMPEQQrm: [ 0.00 0.00 ]
+Key: PCMPEQQrr: [ 0.00 0.00 ]
+Key: PCMPEQWrm: [ 0.00 0.00 ]
+Key: PCMPEQWrr: [ 0.00 0.00 ]
+Key: PCMPESTRIrmi: [ 0.00 0.00 ]
+Key: PCMPESTRIrri: [ 0.00 0.00 ]
+Key: PCMPESTRMrmi: [ 0.00 0.00 ]
+Key: PCMPESTRMrri: [ 0.00 0.00 ]
+Key: PCMPGTBrm: [ 0.00 0.00 ]
+Key: PCMPGTBrr: [ 0.00 0.00 ]
+Key: PCMPGTDrm: [ 0.00 0.00 ]
+Key: PCMPGTDrr: [ 0.00 0.00 ]
+Key: PCMPGTQrm: [ 0.00 0.00 ]
+Key: PCMPGTQrr: [ 0.00 0.00 ]
+Key: PCMPGTWrm: [ 0.00 0.00 ]
+Key: PCMPGTWrr: [ 0.00 0.00 ]
+Key: PCMPISTRIrmi: [ 0.00 0.00 ]
+Key: PCMPISTRIrri: [ 0.00 0.00 ]
+Key: PCMPISTRMrmi: [ 0.00 0.00 ]
+Key: PCMPISTRMrri: [ 0.00 0.00 ]
+Key: PCONFIG: [ 0.00 0.00 ]
+Key: PDEP: [ 0.00 0.00 ]
+Key: PEXT: [ 0.00 0.00 ]
+Key: PEXTRBmri: [ 0.00 0.00 ]
+Key: PEXTRBrri: [ 0.00 0.00 ]
+Key: PEXTRDmri: [ 0.00 0.00 ]
+Key: PEXTRDrri: [ 0.00 0.00 ]
+Key: PEXTRQmri: [ 0.00 0.00 ]
+Key: PEXTRQrri: [ 0.00 0.00 ]
+Key: PEXTRWmri: [ 0.00 0.00 ]
+Key: PEXTRWrri: [ 0.00 0.00 ]
+Key: PEXTRWrri_REV: [ 0.00 0.00 ]
+Key: PF: [ 0.00 0.00 ]
+Key: PFACCrm: [ 0.00 0.00 ]
+Key: PFACCrr: [ 0.00 0.00 ]
+Key: PFADDrm: [ 0.00 0.00 ]
+Key: PFADDrr: [ 0.00 0.00 ]
+Key: PFCMPEQrm: [ 0.00 0.00 ]
+Key: PFCMPEQrr: [ 0.00 0.00 ]
+Key: PFCMPGErm: [ 0.00 0.00 ]
+Key: PFCMPGErr: [ 0.00 0.00 ]
+Key: PFCMPGTrm: [ 0.00 0.00 ]
+Key: PFCMPGTrr: [ 0.00 0.00 ]
+Key: PFMAXrm: [ 0.00 0.00 ]
+Key: PFMAXrr: [ 0.00 0.00 ]
+Key: PFMINrm: [ 0.00 0.00 ]
+Key: PFMINrr: [ 0.00 0.00 ]
+Key: PFMULrm: [ 0.00 0.00 ]
+Key: PFMULrr: [ 0.00 0.00 ]
+Key: PFNACCrm: [ 0.00 0.00 ]
+Key: PFNACCrr: [ 0.00 0.00 ]
+Key: PFPNACCrm: [ 0.00 0.00 ]
+Key: PFPNACCrr: [ 0.00 0.00 ]
+Key: PFRCPIT: [ 0.00 0.00 ]
+Key: PFRCPrm: [ 0.00 0.00 ]
+Key: PFRCPrr: [ 0.00 0.00 ]
+Key: PFRSQIT: [ 0.00 0.00 ]
+Key: PFRSQRTrm: [ 0.00 0.00 ]
+Key: PFRSQRTrr: [ 0.00 0.00 ]
+Key: PFSUBRrm: [ 0.00 0.00 ]
+Key: PFSUBRrr: [ 0.00 0.00 ]
+Key: PFSUBrm: [ 0.00 0.00 ]
+Key: PFSUBrr: [ 0.00 0.00 ]
+Key: PHADDDrm: [ 0.00 0.00 ]
+Key: PHADDDrr: [ 0.00 0.00 ]
+Key: PHADDSWrm: [ 0.00 0.00 ]
+Key: PHADDSWrr: [ 0.00 0.00 ]
+Key: PHADDWrm: [ 0.00 0.00 ]
+Key: PHADDWrr: [ 0.00 0.00 ]
+Key: PHI: [ 0.00 0.00 ]
+Key: PHMINPOSUWrm: [ 0.00 0.00 ]
+Key: PHMINPOSUWrr: [ 0.00 0.00 ]
+Key: PHSUBDrm: [ 0.00 0.00 ]
+Key: PHSUBDrr: [ 0.00 0.00 ]
+Key: PHSUBSWrm: [ 0.00 0.00 ]
+Key: PHSUBSWrr: [ 0.00 0.00 ]
+Key: PHSUBWrm: [ 0.00 0.00 ]
+Key: PHSUBWrr: [ 0.00 0.00 ]
+Key: PI: [ 0.00 0.00 ]
+Key: PINSRBrmi: [ 0.00 0.00 ]
+Key: PINSRBrri: [ 0.00 0.00 ]
+Key: PINSRDrmi: [ 0.00 0.00 ]
+Key: PINSRDrri: [ 0.00 0.00 ]
+Key: PINSRQrmi: [ 0.00 0.00 ]
+Key: PINSRQrri: [ 0.00 0.00 ]
+Key: PINSRWrmi: [ 0.00 0.00 ]
+Key: PINSRWrri: [ 0.00 0.00 ]
+Key: PLDTILECFGV: [ 0.00 0.00 ]
+Key: PLEA: [ 0.00 0.00 ]
+Key: PMADDUBSWrm: [ 0.00 0.00 ]
+Key: PMADDUBSWrr: [ 0.00 0.00 ]
+Key: PMADDWDrm: [ 0.00 0.00 ]
+Key: PMADDWDrr: [ 0.00 0.00 ]
+Key: PMAXSBrm: [ 0.00 0.00 ]
+Key: PMAXSBrr: [ 0.00 0.00 ]
+Key: PMAXSDrm: [ 0.00 0.00 ]
+Key: PMAXSDrr: [ 0.00 0.00 ]
+Key: PMAXSWrm: [ 0.00 0.00 ]
+Key: PMAXSWrr: [ 0.00 0.00 ]
+Key: PMAXUBrm: [ 0.00 0.00 ]
+Key: PMAXUBrr: [ 0.00 0.00 ]
+Key: PMAXUDrm: [ 0.00 0.00 ]
+Key: PMAXUDrr: [ 0.00 0.00 ]
+Key: PMAXUWrm: [ 0.00 0.00 ]
+Key: PMAXUWrr: [ 0.00 0.00 ]
+Key: PMINSBrm: [ 0.00 0.00 ]
+Key: PMINSBrr: [ 0.00 0.00 ]
+Key: PMINSDrm: [ 0.00 0.00 ]
+Key: PMINSDrr: [ 0.00 0.00 ]
+Key: PMINSWrm: [ 0.00 0.00 ]
+Key: PMINSWrr: [ 0.00 0.00 ]
+Key: PMINUBrm: [ 0.00 0.00 ]
+Key: PMINUBrr: [ 0.00 0.00 ]
+Key: PMINUDrm: [ 0.00 0.00 ]
+Key: PMINUDrr: [ 0.00 0.00 ]
+Key: PMINUWrm: [ 0.00 0.00 ]
+Key: PMINUWrr: [ 0.00 0.00 ]
+Key: PMOVMSKBrr: [ 0.00 0.00 ]
+Key: PMOVSXBDrm: [ 0.00 0.00 ]
+Key: PMOVSXBDrr: [ 0.00 0.00 ]
+Key: PMOVSXBQrm: [ 0.00 0.00 ]
+Key: PMOVSXBQrr: [ 0.00 0.00 ]
+Key: PMOVSXBWrm: [ 0.00 0.00 ]
+Key: PMOVSXBWrr: [ 0.00 0.00 ]
+Key: PMOVSXDQrm: [ 0.00 0.00 ]
+Key: PMOVSXDQrr: [ 0.00 0.00 ]
+Key: PMOVSXWDrm: [ 0.00 0.00 ]
+Key: PMOVSXWDrr: [ 0.00 0.00 ]
+Key: PMOVSXWQrm: [ 0.00 0.00 ]
+Key: PMOVSXWQrr: [ 0.00 0.00 ]
+Key: PMOVZXBDrm: [ 0.00 0.00 ]
+Key: PMOVZXBDrr: [ 0.00 0.00 ]
+Key: PMOVZXBQrm: [ 0.00 0.00 ]
+Key: PMOVZXBQrr: [ 0.00 0.00 ]
+Key: PMOVZXBWrm: [ 0.00 0.00 ]
+Key: PMOVZXBWrr: [ 0.00 0.00 ]
+Key: PMOVZXDQrm: [ 0.00 0.00 ]
+Key: PMOVZXDQrr: [ 0.00 0.00 ]
+Key: PMOVZXWDrm: [ 0.00 0.00 ]
+Key: PMOVZXWDrr: [ 0.00 0.00 ]
+Key: PMOVZXWQrm: [ 0.00 0.00 ]
+Key: PMOVZXWQrr: [ 0.00 0.00 ]
+Key: PMULDQrm: [ 0.00 0.00 ]
+Key: PMULDQrr: [ 0.00 0.00 ]
+Key: PMULHRSWrm: [ 0.00 0.00 ]
+Key: PMULHRSWrr: [ 0.00 0.00 ]
+Key: PMULHRWrm: [ 0.00 0.00 ]
+Key: PMULHRWrr: [ 0.00 0.00 ]
+Key: PMULHUWrm: [ 0.00 0.00 ]
+Key: PMULHUWrr: [ 0.00 0.00 ]
+Key: PMULHWrm: [ 0.00 0.00 ]
+Key: PMULHWrr: [ 0.00 0.00 ]
+Key: PMULLDrm: [ 0.00 0.00 ]
+Key: PMULLDrr: [ 0.00 0.00 ]
+Key: PMULLWrm: [ 0.00 0.00 ]
+Key: PMULLWrr: [ 0.00 0.00 ]
+Key: PMULUDQrm: [ 0.00 0.00 ]
+Key: PMULUDQrr: [ 0.00 0.00 ]
+Key: POP: [ 0.00 0.00 ]
+Key: POPA: [ 0.00 0.00 ]
+Key: POPCNT: [ 0.00 0.00 ]
+Key: POPDS: [ 0.00 0.00 ]
+Key: POPES: [ 0.00 0.00 ]
+Key: POPF: [ 0.00 0.00 ]
+Key: POPFS: [ 0.00 0.00 ]
+Key: POPGS: [ 0.00 0.00 ]
+Key: POPP: [ 0.00 0.00 ]
+Key: POPSS: [ 0.00 0.00 ]
+Key: PORrm: [ 0.00 0.00 ]
+Key: PORrr: [ 0.00 0.00 ]
+Key: PREALLOCATED_ARG: [ 0.00 0.00 ]
+Key: PREALLOCATED_SETUP: [ 0.00 0.00 ]
+Key: PREFETCH: [ 0.00 0.00 ]
+Key: PREFETCHIT: [ 0.00 0.00 ]
+Key: PREFETCHNTA: [ 0.00 0.00 ]
+Key: PREFETCHRST: [ 0.00 0.00 ]
+Key: PREFETCHT: [ 0.00 0.00 ]
+Key: PREFETCHW: [ 0.00 0.00 ]
+Key: PREFETCHWT: [ 0.00 0.00 ]
+Key: PROBED_ALLOCA: [ 0.00 0.00 ]
+Key: PSADBWrm: [ 0.00 0.00 ]
+Key: PSADBWrr: [ 0.00 0.00 ]
+Key: PSEUDO_PROBE: [ 0.00 0.00 ]
+Key: PSHUFBrm: [ 0.00 0.00 ]
+Key: PSHUFBrr: [ 0.00 0.00 ]
+Key: PSHUFDmi: [ 0.00 0.00 ]
+Key: PSHUFDri: [ 0.00 0.00 ]
+Key: PSHUFHWmi: [ 0.00 0.00 ]
+Key: PSHUFHWri: [ 0.00 0.00 ]
+Key: PSHUFLWmi: [ 0.00 0.00 ]
+Key: PSHUFLWri: [ 0.00 0.00 ]
+Key: PSIGNBrm: [ 0.00 0.00 ]
+Key: PSIGNBrr: [ 0.00 0.00 ]
+Key: PSIGNDrm: [ 0.00 0.00 ]
+Key: PSIGNDrr: [ 0.00 0.00 ]
+Key: PSIGNWrm: [ 0.00 0.00 ]
+Key: PSIGNWrr: [ 0.00 0.00 ]
+Key: PSLLDQri: [ 0.00 0.00 ]
+Key: PSLLDri: [ 0.00 0.00 ]
+Key: PSLLDrm: [ 0.00 0.00 ]
+Key: PSLLDrr: [ 0.00 0.00 ]
+Key: PSLLQri: [ 0.00 0.00 ]
+Key: PSLLQrm: [ 0.00 0.00 ]
+Key: PSLLQrr: [ 0.00 0.00 ]
+Key: PSLLWri: [ 0.00 0.00 ]
+Key: PSLLWrm: [ 0.00 0.00 ]
+Key: PSLLWrr: [ 0.00 0.00 ]
+Key: PSMASH: [ 0.00 0.00 ]
+Key: PSRADri: [ 0.00 0.00 ]
+Key: PSRADrm: [ 0.00 0.00 ]
+Key: PSRADrr: [ 0.00 0.00 ]
+Key: PSRAWri: [ 0.00 0.00 ]
+Key: PSRAWrm: [ 0.00 0.00 ]
+Key: PSRAWrr: [ 0.00 0.00 ]
+Key: PSRLDQri: [ 0.00 0.00 ]
+Key: PSRLDri: [ 0.00 0.00 ]
+Key: PSRLDrm: [ 0.00 0.00 ]
+Key: PSRLDrr: [ 0.00 0.00 ]
+Key: PSRLQri: [ 0.00 0.00 ]
+Key: PSRLQrm: [ 0.00 0.00 ]
+Key: PSRLQrr: [ 0.00 0.00 ]
+Key: PSRLWri: [ 0.00 0.00 ]
+Key: PSRLWrm: [ 0.00 0.00 ]
+Key: PSRLWrr: [ 0.00 0.00 ]
+Key: PSUBBrm: [ 0.00 0.00 ]
+Key: PSUBBrr: [ 0.00 0.00 ]
+Key: PSUBDrm: [ 0.00 0.00 ]
+Key: PSUBDrr: [ 0.00 0.00 ]
+Key: PSUBQrm: [ 0.00 0.00 ]
+Key: PSUBQrr: [ 0.00 0.00 ]
+Key: PSUBSBrm: [ 0.00 0.00 ]
+Key: PSUBSBrr: [ 0.00 0.00 ]
+Key: PSUBSWrm: [ 0.00 0.00 ]
+Key: PSUBSWrr: [ 0.00 0.00 ]
+Key: PSUBUSBrm: [ 0.00 0.00 ]
+Key: PSUBUSBrr: [ 0.00 0.00 ]
+Key: PSUBUSWrm: [ 0.00 0.00 ]
+Key: PSUBUSWrr: [ 0.00 0.00 ]
+Key: PSUBWrm: [ 0.00 0.00 ]
+Key: PSUBWrr: [ 0.00 0.00 ]
+Key: PSWAPDrm: [ 0.00 0.00 ]
+Key: PSWAPDrr: [ 0.00 0.00 ]
+Key: PT: [ 0.00 0.00 ]
+Key: PTCMMIMFP: [ 0.00 0.00 ]
+Key: PTCMMRLFP: [ 0.00 0.00 ]
+Key: PTCONJTCMMIMFP: [ 0.00 0.00 ]
+Key: PTCONJTFP: [ 0.00 0.00 ]
+Key: PTCVTROWD: [ 0.00 0.00 ]
+Key: PTCVTROWPS: [ 0.00 0.00 ]
+Key: PTDPBF: [ 0.00 0.00 ]
+Key: PTDPBHF: [ 0.00 0.00 ]
+Key: PTDPBSSD: [ 0.00 0.00 ]
+Key: PTDPBSSDV: [ 0.00 0.00 ]
+Key: PTDPBSUD: [ 0.00 0.00 ]
+Key: PTDPBSUDV: [ 0.00 0.00 ]
+Key: PTDPBUSD: [ 0.00 0.00 ]
+Key: PTDPBUSDV: [ 0.00 0.00 ]
+Key: PTDPBUUD: [ 0.00 0.00 ]
+Key: PTDPBUUDV: [ 0.00 0.00 ]
+Key: PTDPFP: [ 0.00 0.00 ]
+Key: PTDPHBF: [ 0.00 0.00 ]
+Key: PTDPHF: [ 0.00 0.00 ]
+Key: PTESTrm: [ 0.00 0.00 ]
+Key: PTESTrr: [ 0.00 0.00 ]
+Key: PTILELOADD: [ 0.00 0.00 ]
+Key: PTILELOADDRS: [ 0.00 0.00 ]
+Key: PTILELOADDRST: [ 0.00 0.00 ]
+Key: PTILELOADDRSV: [ 0.00 0.00 ]
+Key: PTILELOADDT: [ 0.00 0.00 ]
+Key: PTILELOADDV: [ 0.00 0.00 ]
+Key: PTILEMOVROWrre: [ 0.00 0.00 ]
+Key: PTILEMOVROWrreV: [ 0.00 0.00 ]
+Key: PTILEMOVROWrri: [ 0.00 0.00 ]
+Key: PTILEMOVROWrriV: [ 0.00 0.00 ]
+Key: PTILEPAIRLOAD: [ 0.00 0.00 ]
+Key: PTILEPAIRSTORE: [ 0.00 0.00 ]
+Key: PTILESTORED: [ 0.00 0.00 ]
+Key: PTILESTOREDV: [ 0.00 0.00 ]
+Key: PTILEZERO: [ 0.00 0.00 ]
+Key: PTILEZEROV: [ 0.00 0.00 ]
+Key: PTMMULTF: [ 0.00 0.00 ]
+Key: PTTCMMIMFP: [ 0.00 0.00 ]
+Key: PTTCMMRLFP: [ 0.00 0.00 ]
+Key: PTTDPBF: [ 0.00 0.00 ]
+Key: PTTDPFP: [ 0.00 0.00 ]
+Key: PTTMMULTF: [ 0.00 0.00 ]
+Key: PTTRANSPOSED: [ 0.00 0.00 ]
+Key: PTTRANSPOSEDV: [ 0.00 0.00 ]
+Key: PTWRITE: [ 0.00 0.00 ]
+Key: PTWRITEm: [ 0.00 0.00 ]
+Key: PTWRITEr: [ 0.00 0.00 ]
+Key: PUNPCKHBWrm: [ 0.00 0.00 ]
+Key: PUNPCKHBWrr: [ 0.00 0.00 ]
+Key: PUNPCKHDQrm: [ 0.00 0.00 ]
+Key: PUNPCKHDQrr: [ 0.00 0.00 ]
+Key: PUNPCKHQDQrm: [ 0.00 0.00 ]
+Key: PUNPCKHQDQrr: [ 0.00 0.00 ]
+Key: PUNPCKHWDrm: [ 0.00 0.00 ]
+Key: PUNPCKHWDrr: [ 0.00 0.00 ]
+Key: PUNPCKLBWrm: [ 0.00 0.00 ]
+Key: PUNPCKLBWrr: [ 0.00 0.00 ]
+Key: PUNPCKLDQrm: [ 0.00 0.00 ]
+Key: PUNPCKLDQrr: [ 0.00 0.00 ]
+Key: PUNPCKLQDQrm: [ 0.00 0.00 ]
+Key: PUNPCKLQDQrr: [ 0.00 0.00 ]
+Key: PUNPCKLWDrm: [ 0.00 0.00 ]
+Key: PUNPCKLWDrr: [ 0.00 0.00 ]
+Key: PUSH: [ 0.00 0.00 ]
+Key: PUSHA: [ 0.00 0.00 ]
+Key: PUSHCS: [ 0.00 0.00 ]
+Key: PUSHDS: [ 0.00 0.00 ]
+Key: PUSHES: [ 0.00 0.00 ]
+Key: PUSHF: [ 0.00 0.00 ]
+Key: PUSHFS: [ 0.00 0.00 ]
+Key: PUSHGS: [ 0.00 0.00 ]
+Key: PUSHP: [ 0.00 0.00 ]
+Key: PUSHSS: [ 0.00 0.00 ]
+Key: PVALIDATE: [ 0.00 0.00 ]
+Key: PXORrm: [ 0.00 0.00 ]
+Key: PXORrr: [ 0.00 0.00 ]
+Key: RCL: [ 0.00 0.00 ]
+Key: RCPPSm: [ 0.00 0.00 ]
+Key: RCPPSr: [ 0.00 0.00 ]
+Key: RCPSSm: [ 0.00 0.00 ]
+Key: RCPSSm_Int: [ 0.00 0.00 ]
+Key: RCPSSr: [ 0.00 0.00 ]
+Key: RCPSSr_Int: [ 0.00 0.00 ]
+Key: RCR: [ 0.00 0.00 ]
+Key: RDFLAGS: [ 0.00 0.00 ]
+Key: RDFSBASE: [ 0.00 0.00 ]
+Key: RDGSBASE: [ 0.00 0.00 ]
+Key: RDMSR: [ 0.00 0.00 ]
+Key: RDMSRLIST: [ 0.00 0.00 ]
+Key: RDMSRri: [ 0.00 0.00 ]
+Key: RDMSRri_EVEX: [ 0.00 0.00 ]
+Key: RDPID: [ 0.00 0.00 ]
+Key: RDPKRUr: [ 0.00 0.00 ]
+Key: RDPMC: [ 0.00 0.00 ]
+Key: RDPRU: [ 0.00 0.00 ]
+Key: RDRAND: [ 0.00 0.00 ]
+Key: RDSEED: [ 0.00 0.00 ]
+Key: RDSSPD: [ 0.00 0.00 ]
+Key: RDSSPQ: [ 0.00 0.00 ]
+Key: RDTSC: [ 0.00 0.00 ]
+Key: RDTSCP: [ 0.00 0.00 ]
+Key: REG_SEQUENCE: [ 0.00 0.00 ]
+Key: REPNE_PREFIX: [ 0.00 0.00 ]
+Key: REP_MOVSB: [ 0.00 0.00 ]
+Key: REP_MOVSD: [ 0.00 0.00 ]
+Key: REP_MOVSQ: [ 0.00 0.00 ]
+Key: REP_MOVSW: [ 0.00 0.00 ]
+Key: REP_PREFIX: [ 0.00 0.00 ]
+Key: REP_STOSB: [ 0.00 0.00 ]
+Key: REP_STOSD: [ 0.00 0.00 ]
+Key: REP_STOSQ: [ 0.00 0.00 ]
+Key: REP_STOSW: [ 0.00 0.00 ]
+Key: RET: [ 0.00 0.00 ]
+Key: RETI: [ 0.00 0.00 ]
+Key: REX: [ 0.00 0.00 ]
+Key: RMPADJUST: [ 0.00 0.00 ]
+Key: RMPQUERY: [ 0.00 0.00 ]
+Key: RMPUPDATE: [ 0.00 0.00 ]
+Key: ROL: [ 0.00 0.00 ]
+Key: ROR: [ 0.00 0.00 ]
+Key: RORX: [ 0.00 0.00 ]
+Key: ROUNDPDmi: [ 0.00 0.00 ]
+Key: ROUNDPDri: [ 0.00 0.00 ]
+Key: ROUNDPSmi: [ 0.00 0.00 ]
+Key: ROUNDPSri: [ 0.00 0.00 ]
+Key: ROUNDSDmi: [ 0.00 0.00 ]
+Key: ROUNDSDmi_Int: [ 0.00 0.00 ]
+Key: ROUNDSDri: [ 0.00 0.00 ]
+Key: ROUNDSDri_Int: [ 0.00 0.00 ]
+Key: ROUNDSSmi: [ 0.00 0.00 ]
+Key: ROUNDSSmi_Int: [ 0.00 0.00 ]
+Key: ROUNDSSri: [ 0.00 0.00 ]
+Key: ROUNDSSri_Int: [ 0.00 0.00 ]
+Key: RSM: [ 0.00 0.00 ]
+Key: RSQRTPSm: [ 0.00 0.00 ]
+Key: RSQRTPSr: [ 0.00 0.00 ]
+Key: RSQRTSSm: [ 0.00 0.00 ]
+Key: RSQRTSSm_Int: [ 0.00 0.00 ]
+Key: RSQRTSSr: [ 0.00 0.00 ]
+Key: RSQRTSSr_Int: [ 0.00 0.00 ]
+Key: RSTORSSP: [ 0.00 0.00 ]
+Key: SAHF: [ 0.00 0.00 ]
+Key: SALC: [ 0.00 0.00 ]
+Key: SAR: [ 0.00 0.00 ]
+Key: SARX: [ 0.00 0.00 ]
+Key: SAVEPREVSSP: [ 0.00 0.00 ]
+Key: SBB: [ 0.00 0.00 ]
+Key: SCASB: [ 0.00 0.00 ]
+Key: SCASL: [ 0.00 0.00 ]
+Key: SCASQ: [ 0.00 0.00 ]
+Key: SCASW: [ 0.00 0.00 ]
+Key: SEAMCALL: [ 0.00 0.00 ]
+Key: SEAMOPS: [ 0.00 0.00 ]
+Key: SEAMRET: [ 0.00 0.00 ]
+Key: SEG_ALLOCA: [ 0.00 0.00 ]
+Key: SEH_BeginEpilogue: [ 0.00 0.00 ]
+Key: SEH_EndEpilogue: [ 0.00 0.00 ]
+Key: SEH_EndPrologue: [ 0.00 0.00 ]
+Key: SEH_PushFrame: [ 0.00 0.00 ]
+Key: SEH_PushReg: [ 0.00 0.00 ]
+Key: SEH_SaveReg: [ 0.00 0.00 ]
+Key: SEH_SaveXMM: [ 0.00 0.00 ]
+Key: SEH_SetFrame: [ 0.00 0.00 ]
+Key: SEH_StackAlign: [ 0.00 0.00 ]
+Key: SEH_StackAlloc: [ 0.00 0.00 ]
+Key: SEH_UnwindV: [ 0.00 0.00 ]
+Key: SEH_UnwindVersion: [ 0.00 0.00 ]
+Key: SENDUIPI: [ 0.00 0.00 ]
+Key: SERIALIZE: [ 0.00 0.00 ]
+Key: SETB_C: [ 0.00 0.00 ]
+Key: SETCCm: [ 0.00 0.00 ]
+Key: SETCCm_EVEX: [ 0.00 0.00 ]
+Key: SETCCr: [ 0.00 0.00 ]
+Key: SETCCr_EVEX: [ 0.00 0.00 ]
+Key: SETSSBSY: [ 0.00 0.00 ]
+Key: SETZUCCm: [ 0.00 0.00 ]
+Key: SETZUCCr: [ 0.00 0.00 ]
+Key: SFENCE: [ 0.00 0.00 ]
+Key: SGDT: [ 0.00 0.00 ]
+Key: SHA: [ 0.00 0.00 ]
+Key: SHL: [ 0.00 0.00 ]
+Key: SHLD: [ 0.00 0.00 ]
+Key: SHLDROT: [ 0.00 0.00 ]
+Key: SHLX: [ 0.00 0.00 ]
+Key: SHR: [ 0.00 0.00 ]
+Key: SHRD: [ 0.00 0.00 ]
+Key: SHRDROT: [ 0.00 0.00 ]
+Key: SHRX: [ 0.00 0.00 ]
+Key: SHUFPDrmi: [ 0.00 0.00 ]
+Key: SHUFPDrri: [ 0.00 0.00 ]
+Key: SHUFPSrmi: [ 0.00 0.00 ]
+Key: SHUFPSrri: [ 0.00 0.00 ]
+Key: SIDT: [ 0.00 0.00 ]
+Key: SKINIT: [ 0.00 0.00 ]
+Key: SLDT: [ 0.00 0.00 ]
+Key: SLWPCB: [ 0.00 0.00 ]
+Key: SMSW: [ 0.00 0.00 ]
+Key: SQRTPDm: [ 0.00 0.00 ]
+Key: SQRTPDr: [ 0.00 0.00 ]
+Key: SQRTPSm: [ 0.00 0.00 ]
+Key: SQRTPSr: [ 0.00 0.00 ]
+Key: SQRTSDm: [ 0.00 0.00 ]
+Key: SQRTSDm_Int: [ 0.00 0.00 ]
+Key: SQRTSDr: [ 0.00 0.00 ]
+Key: SQRTSDr_Int: [ 0.00 0.00 ]
+Key: SQRTSSm: [ 0.00 0.00 ]
+Key: SQRTSSm_Int: [ 0.00 0.00 ]
+Key: SQRTSSr: [ 0.00 0.00 ]
+Key: SQRTSSr_Int: [ 0.00 0.00 ]
+Key: SQRT_F: [ 0.00 0.00 ]
+Key: SQRT_Fp: [ 0.00 0.00 ]
+Key: SS_PREFIX: [ 0.00 0.00 ]
+Key: STAC: [ 0.00 0.00 ]
+Key: STACKALLOC_W_PROBING: [ 0.00 0.00 ]
+Key: STACKMAP: [ 0.00 0.00 ]
+Key: STATEPOINT: [ 0.00 0.00 ]
+Key: STC: [ 0.00 0.00 ]
+Key: STD: [ 0.00 0.00 ]
+Key: STGI: [ 0.00 0.00 ]
+Key: STI: [ 0.00 0.00 ]
+Key: STMXCSR: [ 0.00 0.00 ]
+Key: STOSB: [ 0.00 0.00 ]
+Key: STOSL: [ 0.00 0.00 ]
+Key: STOSQ: [ 0.00 0.00 ]
+Key: STOSW: [ 0.00 0.00 ]
+Key: STR: [ 0.00 0.00 ]
+Key: STRm: [ 0.00 0.00 ]
+Key: STTILECFG: [ 0.00 0.00 ]
+Key: STTILECFG_EVEX: [ 0.00 0.00 ]
+Key: STUI: [ 0.00 0.00 ]
+Key: ST_F: [ 0.00 0.00 ]
+Key: ST_FP: [ 0.00 0.00 ]
+Key: ST_FPrr: [ 0.00 0.00 ]
+Key: ST_Fp: [ 0.00 0.00 ]
+Key: ST_FpP: [ 0.00 0.00 ]
+Key: ST_Frr: [ 0.00 0.00 ]
+Key: SUB: [ 0.00 0.00 ]
+Key: SUBPDrm: [ 0.00 0.00 ]
+Key: SUBPDrr: [ 0.00 0.00 ]
+Key: SUBPSrm: [ 0.00 0.00 ]
+Key: SUBPSrr: [ 0.00 0.00 ]
+Key: SUBREG_TO_REG: [ 0.00 0.00 ]
+Key: SUBR_F: [ 0.00 0.00 ]
+Key: SUBR_FI: [ 0.00 0.00 ]
+Key: SUBR_FPrST: [ 0.00 0.00 ]
+Key: SUBR_FST: [ 0.00 0.00 ]
+Key: SUBR_Fp: [ 0.00 0.00 ]
+Key: SUBR_FpI: [ 0.00 0.00 ]
+Key: SUBR_FrST: [ 0.00 0.00 ]
+Key: SUBSDrm: [ 0.00 0.00 ]
+Key: SUBSDrm_Int: [ 0.00 0.00 ]
+Key: SUBSDrr: [ 0.00 0.00 ]
+Key: SUBSDrr_Int: [ 0.00 0.00 ]
+Key: SUBSSrm: [ 0.00 0.00 ]
+Key: SUBSSrm_Int: [ 0.00 0.00 ]
+Key: SUBSSrr: [ 0.00 0.00 ]
+Key: SUBSSrr_Int: [ 0.00 0.00 ]
+Key: SUB_F: [ 0.00 0.00 ]
+Key: SUB_FI: [ 0.00 0.00 ]
+Key: SUB_FPrST: [ 0.00 0.00 ]
+Key: SUB_FST: [ 0.00 0.00 ]
+Key: SUB_Fp: [ 0.00 0.00 ]
+Key: SUB_FpI: [ 0.00 0.00 ]
+Key: SUB_FrST: [ 0.00 0.00 ]
+Key: SWAPGS: [ 0.00 0.00 ]
+Key: SYSCALL: [ 0.00 0.00 ]
+Key: SYSENTER: [ 0.00 0.00 ]
+Key: SYSEXIT: [ 0.00 0.00 ]
+Key: SYSRET: [ 0.00 0.00 ]
+Key: T: [ 0.00 0.00 ]
+Key: TAILJMPd: [ 0.00 0.00 ]
+Key: TAILJMPd_CC: [ 0.00 0.00 ]
+Key: TAILJMPm: [ 0.00 0.00 ]
+Key: TAILJMPr: [ 0.00 0.00 ]
+Key: TCMMIMFP: [ 0.00 0.00 ]
+Key: TCMMRLFP: [ 0.00 0.00 ]
+Key: TCONJTCMMIMFP: [ 0.00 0.00 ]
+Key: TCONJTFP: [ 0.00 0.00 ]
+Key: TCRETURN_HIPE: [ 0.00 0.00 ]
+Key: TCRETURN_WIN: [ 0.00 0.00 ]
+Key: TCRETURN_WINmi: [ 0.00 0.00 ]
+Key: TCRETURNdi: [ 0.00 0.00 ]
+Key: TCRETURNdicc: [ 0.00 0.00 ]
+Key: TCRETURNmi: [ 0.00 0.00 ]
+Key: TCRETURNri: [ 0.00 0.00 ]
+Key: TCVTROWD: [ 0.00 0.00 ]
+Key: TCVTROWPS: [ 0.00 0.00 ]
+Key: TDCALL: [ 0.00 0.00 ]
+Key: TDPBF: [ 0.00 0.00 ]
+Key: TDPBHF: [ 0.00 0.00 ]
+Key: TDPBSSD: [ 0.00 0.00 ]
+Key: TDPBSUD: [ 0.00 0.00 ]
+Key: TDPBUSD: [ 0.00 0.00 ]
+Key: TDPBUUD: [ 0.00 0.00 ]
+Key: TDPFP: [ 0.00 0.00 ]
+Key: TDPHBF: [ 0.00 0.00 ]
+Key: TDPHF: [ 0.00 0.00 ]
+Key: TEST: [ 0.00 0.00 ]
+Key: TESTUI: [ 0.00 0.00 ]
+Key: TILELOADD: [ 0.00 0.00 ]
+Key: TILELOADDRS: [ 0.00 0.00 ]
+Key: TILELOADDRST: [ 0.00 0.00 ]
+Key: TILELOADDRS_EVEX: [ 0.00 0.00 ]
+Key: TILELOADDT: [ 0.00 0.00 ]
+Key: TILELOADD_EVEX: [ 0.00 0.00 ]
+Key: TILEMOVROWrre: [ 0.00 0.00 ]
+Key: TILEMOVROWrri: [ 0.00 0.00 ]
+Key: TILERELEASE: [ 0.00 0.00 ]
+Key: TILESTORED: [ 0.00 0.00 ]
+Key: TILESTORED_EVEX: [ 0.00 0.00 ]
+Key: TILEZERO: [ 0.00 0.00 ]
+Key: TLBSYNC: [ 0.00 0.00 ]
+Key: TLSCall: [ 0.00 0.00 ]
+Key: TLS_addr: [ 0.00 0.00 ]
+Key: TLS_addrX: [ 0.00 0.00 ]
+Key: TLS_base_addr: [ 0.00 0.00 ]
+Key: TLS_base_addrX: [ 0.00 0.00 ]
+Key: TLS_desc: [ 0.00 0.00 ]
+Key: TMMULTF: [ 0.00 0.00 ]
+Key: TPAUSE: [ 0.00 0.00 ]
+Key: TRAP: [ 0.00 0.00 ]
+Key: TST_F: [ 0.00 0.00 ]
+Key: TST_Fp: [ 0.00 0.00 ]
+Key: TTCMMIMFP: [ 0.00 0.00 ]
+Key: TTCMMRLFP: [ 0.00 0.00 ]
+Key: TTDPBF: [ 0.00 0.00 ]
+Key: TTDPFP: [ 0.00 0.00 ]
+Key: TTMMULTF: [ 0.00 0.00 ]
+Key: TTRANSPOSED: [ 0.00 0.00 ]
+Key: TZCNT: [ 0.00 0.00 ]
+Key: TZMSK: [ 0.00 0.00 ]
+Key: UBSAN_UD: [ 0.00 0.00 ]
+Key: UCOMISDrm: [ 0.00 0.00 ]
+Key: UCOMISDrm_Int: [ 0.00 0.00 ]
+Key: UCOMISDrr: [ 0.00 0.00 ]
+Key: UCOMISDrr_Int: [ 0.00 0.00 ]
+Key: UCOMISSrm: [ 0.00 0.00 ]
+Key: UCOMISSrm_Int: [ 0.00 0.00 ]
+Key: UCOMISSrr: [ 0.00 0.00 ]
+Key: UCOMISSrr_Int: [ 0.00 0.00 ]
+Key: UCOM_FIPr: [ 0.00 0.00 ]
+Key: UCOM_FIr: [ 0.00 0.00 ]
+Key: UCOM_FPPr: [ 0.00 0.00 ]
+Key: UCOM_FPr: [ 0.00 0.00 ]
+Key: UCOM_FpIr: [ 0.00 0.00 ]
+Key: UCOM_Fpr: [ 0.00 0.00 ]
+Key: UCOM_Fr: [ 0.00 0.00 ]
+Key: UD: [ 0.00 0.00 ]
+Key: UIRET: [ 0.00 0.00 ]
+Key: UMONITOR: [ 0.00 0.00 ]
+Key: UMWAIT: [ 0.00 0.00 ]
+Key: UNPCKHPDrm: [ 0.00 0.00 ]
+Key: UNPCKHPDrr: [ 0.00 0.00 ]
+Key: UNPCKHPSrm: [ 0.00 0.00 ]
+Key: UNPCKHPSrr: [ 0.00 0.00 ]
+Key: UNPCKLPDrm: [ 0.00 0.00 ]
+Key: UNPCKLPDrr: [ 0.00 0.00 ]
+Key: UNPCKLPSrm: [ 0.00 0.00 ]
+Key: UNPCKLPSrr: [ 0.00 0.00 ]
+Key: URDMSRri: [ 0.00 0.00 ]
+Key: URDMSRri_EVEX: [ 0.00 0.00 ]
+Key: URDMSRrr: [ 0.00 0.00 ]
+Key: URDMSRrr_EVEX: [ 0.00 0.00 ]
+Key: UWRMSRir: [ 0.00 0.00 ]
+Key: UWRMSRir_EVEX: [ 0.00 0.00 ]
+Key: UWRMSRrr: [ 0.00 0.00 ]
+Key: UWRMSRrr_EVEX: [ 0.00 0.00 ]
+Key: V: [ 0.00 0.00 ]
+Key: VAARG: [ 0.00 0.00 ]
+Key: VAARG_X: [ 0.00 0.00 ]
+Key: VADDBF: [ 0.00 0.00 ]
+Key: VADDPDYrm: [ 0.00 0.00 ]
+Key: VADDPDYrr: [ 0.00 0.00 ]
+Key: VADDPDZ: [ 0.00 0.00 ]
+Key: VADDPDZrm: [ 0.00 0.00 ]
+Key: VADDPDZrmb: [ 0.00 0.00 ]
+Key: VADDPDZrmbk: [ 0.00 0.00 ]
+Key: VADDPDZrmbkz: [ 0.00 0.00 ]
+Key: VADDPDZrmk: [ 0.00 0.00 ]
+Key: VADDPDZrmkz: [ 0.00 0.00 ]
+Key: VADDPDZrr: [ 0.00 0.00 ]
+Key: VADDPDZrrb: [ 0.00 0.00 ]
+Key: VADDPDZrrbk: [ 0.00 0.00 ]
+Key: VADDPDZrrbkz: [ 0.00 0.00 ]
+Key: VADDPDZrrk: [ 0.00 0.00 ]
+Key: VADDPDZrrkz: [ 0.00 0.00 ]
+Key: VADDPDrm: [ 0.00 0.00 ]
+Key: VADDPDrr: [ 0.00 0.00 ]
+Key: VADDPHZ: [ 0.00 0.00 ]
+Key: VADDPHZrm: [ 0.00 0.00 ]
+Key: VADDPHZrmb: [ 0.00 0.00 ]
+Key: VADDPHZrmbk: [ 0.00 0.00 ]
+Key: VADDPHZrmbkz: [ 0.00 0.00 ]
+Key: VADDPHZrmk: [ 0.00 0.00 ]
+Key: VADDPHZrmkz: [ 0.00 0.00 ]
+Key: VADDPHZrr: [ 0.00 0.00 ]
+Key: VADDPHZrrb: [ 0.00 0.00 ]
+Key: VADDPHZrrbk: [ 0.00 0.00 ]
+Key: VADDPHZrrbkz: [ 0.00 0.00 ]
+Key: VADDPHZrrk: [ 0.00 0.00 ]
+Key: VADDPHZrrkz: [ 0.00 0.00 ]
+Key: VADDPSYrm: [ 0.00 0.00 ]
+Key: VADDPSYrr: [ 0.00 0.00 ]
+Key: VADDPSZ: [ 0.00 0.00 ]
+Key: VADDPSZrm: [ 0.00 0.00 ]
+Key: VADDPSZrmb: [ 0.00 0.00 ]
+Key: VADDPSZrmbk: [ 0.00 0.00 ]
+Key: VADDPSZrmbkz: [ 0.00 0.00 ]
+Key: VADDPSZrmk: [ 0.00 0.00 ]
+Key: VADDPSZrmkz: [ 0.00 0.00 ]
+Key: VADDPSZrr: [ 0.00 0.00 ]
+Key: VADDPSZrrb: [ 0.00 0.00 ]
+Key: VADDPSZrrbk: [ 0.00 0.00 ]
+Key: VADDPSZrrbkz: [ 0.00 0.00 ]
+Key: VADDPSZrrk: [ 0.00 0.00 ]
+Key: VADDPSZrrkz: [ 0.00 0.00 ]
+Key: VADDPSrm: [ 0.00 0.00 ]
+Key: VADDPSrr: [ 0.00 0.00 ]
+Key: VADDSDZrm: [ 0.00 0.00 ]
+Key: VADDSDZrm_Int: [ 0.00 0.00 ]
+Key: VADDSDZrmk_Int: [ 0.00 0.00 ]
+Key: VADDSDZrmkz_Int: [ 0.00 0.00 ]
+Key: VADDSDZrr: [ 0.00 0.00 ]
+Key: VADDSDZrr_Int: [ 0.00 0.00 ]
+Key: VADDSDZrrb_Int: [ 0.00 0.00 ]
+Key: VADDSDZrrbk_Int: [ 0.00 0.00 ]
+Key: VADDSDZrrbkz_Int: [ 0.00 0.00 ]
+Key: VADDSDZrrk_Int: [ 0.00 0.00 ]
+Key: VADDSDZrrkz_Int: [ 0.00 0.00 ]
+Key: VADDSDrm: [ 0.00 0.00 ]
+Key: VADDSDrm_Int: [ 0.00 0.00 ]
+Key: VADDSDrr: [ 0.00 0.00 ]
+Key: VADDSDrr_Int: [ 0.00 0.00 ]
+Key: VADDSHZrm: [ 0.00 0.00 ]
+Key: VADDSHZrm_Int: [ 0.00 0.00 ]
+Key: VADDSHZrmk_Int: [ 0.00 0.00 ]
+Key: VADDSHZrmkz_Int: [ 0.00 0.00 ]
+Key: VADDSHZrr: [ 0.00 0.00 ]
+Key: VADDSHZrr_Int: [ 0.00 0.00 ]
+Key: VADDSHZrrb_Int: [ 0.00 0.00 ]
+Key: VADDSHZrrbk_Int: [ 0.00 0.00 ]
+Key: VADDSHZrrbkz_Int: [ 0.00 0.00 ]
+Key: VADDSHZrrk_Int: [ 0.00 0.00 ]
+Key: VADDSHZrrkz_Int: [ 0.00 0.00 ]
+Key: VADDSSZrm: [ 0.00 0.00 ]
+Key: VADDSSZrm_Int: [ 0.00 0.00 ]
+Key: VADDSSZrmk_Int: [ 0.00 0.00 ]
+Key: VADDSSZrmkz_Int: [ 0.00 0.00 ]
+Key: VADDSSZrr: [ 0.00 0.00 ]
+Key: VADDSSZrr_Int: [ 0.00 0.00 ]
+Key: VADDSSZrrb_Int: [ 0.00 0.00 ]
+Key: VADDSSZrrbk_Int: [ 0.00 0.00 ]
+Key: VADDSSZrrbkz_Int: [ 0.00 0.00 ]
+Key: VADDSSZrrk_Int: [ 0.00 0.00 ]
+Key: VADDSSZrrkz_Int: [ 0.00 0.00 ]
+Key: VADDSSrm: [ 0.00 0.00 ]
+Key: VADDSSrm_Int: [ 0.00 0.00 ]
+Key: VADDSSrr: [ 0.00 0.00 ]
+Key: VADDSSrr_Int: [ 0.00 0.00 ]
+Key: VADDSUBPDYrm: [ 0.00 0.00 ]
+Key: VADDSUBPDYrr: [ 0.00 0.00 ]
+Key: VADDSUBPDrm: [ 0.00 0.00 ]
+Key: VADDSUBPDrr: [ 0.00 0.00 ]
+Key: VADDSUBPSYrm: [ 0.00 0.00 ]
+Key: VADDSUBPSYrr: [ 0.00 0.00 ]
+Key: VADDSUBPSrm: [ 0.00 0.00 ]
+Key: VADDSUBPSrr: [ 0.00 0.00 ]
+Key: VAESDECLASTYrm: [ 0.00 0.00 ]
+Key: VAESDECLASTYrr: [ 0.00 0.00 ]
+Key: VAESDECLASTZ: [ 0.00 0.00 ]
+Key: VAESDECLASTZrm: [ 0.00 0.00 ]
+Key: VAESDECLASTZrr: [ 0.00 0.00 ]
+Key: VAESDECLASTrm: [ 0.00 0.00 ]
+Key: VAESDECLASTrr: [ 0.00 0.00 ]
+Key: VAESDECYrm: [ 0.00 0.00 ]
+Key: VAESDECYrr: [ 0.00 0.00 ]
+Key: VAESDECZ: [ 0.00 0.00 ]
+Key: VAESDECZrm: [ 0.00 0.00 ]
+Key: VAESDECZrr: [ 0.00 0.00 ]
+Key: VAESDECrm: [ 0.00 0.00 ]
+Key: VAESDECrr: [ 0.00 0.00 ]
+Key: VAESENCLASTYrm: [ 0.00 0.00 ]
+Key: VAESENCLASTYrr: [ 0.00 0.00 ]
+Key: VAESENCLASTZ: [ 0.00 0.00 ]
+Key: VAESENCLASTZrm: [ 0.00 0.00 ]
+Key: VAESENCLASTZrr: [ 0.00 0.00 ]
+Key: VAESENCLASTrm: [ 0.00 0.00 ]
+Key: VAESENCLASTrr: [ 0.00 0.00 ]
+Key: VAESENCYrm: [ 0.00 0.00 ]
+Key: VAESENCYrr: [ 0.00 0.00 ]
+Key: VAESENCZ: [ 0.00 0.00 ]
+Key: VAESENCZrm: [ 0.00 0.00 ]
+Key: VAESENCZrr: [ 0.00 0.00 ]
+Key: VAESENCrm: [ 0.00 0.00 ]
+Key: VAESENCrr: [ 0.00 0.00 ]
+Key: VAESIMCrm: [ 0.00 0.00 ]
+Key: VAESIMCrr: [ 0.00 0.00 ]
+Key: VAESKEYGENASSISTrmi: [ 0.00 0.00 ]
+Key: VAESKEYGENASSISTrri: [ 0.00 0.00 ]
+Key: VALIGNDZ: [ 0.00 0.00 ]
+Key: VALIGNDZrmbi: [ 0.00 0.00 ]
+Key: VALIGNDZrmbik: [ 0.00 0.00 ]
+Key: VALIGNDZrmbikz: [ 0.00 0.00 ]
+Key: VALIGNDZrmi: [ 0.00 0.00 ]
+Key: VALIGNDZrmik: [ 0.00 0.00 ]
+Key: VALIGNDZrmikz: [ 0.00 0.00 ]
+Key: VALIGNDZrri: [ 0.00 0.00 ]
+Key: VALIGNDZrrik: [ 0.00 0.00 ]
+Key: VALIGNDZrrikz: [ 0.00 0.00 ]
+Key: VALIGNQZ: [ 0.00 0.00 ]
+Key: VALIGNQZrmbi: [ 0.00 0.00 ]
+Key: VALIGNQZrmbik: [ 0.00 0.00 ]
+Key: VALIGNQZrmbikz: [ 0.00 0.00 ]
+Key: VALIGNQZrmi: [ 0.00 0.00 ]
+Key: VALIGNQZrmik: [ 0.00 0.00 ]
+Key: VALIGNQZrmikz: [ 0.00 0.00 ]
+Key: VALIGNQZrri: [ 0.00 0.00 ]
+Key: VALIGNQZrrik: [ 0.00 0.00 ]
+Key: VALIGNQZrrikz: [ 0.00 0.00 ]
+Key: VANDNPDYrm: [ 0.00 0.00 ]
+Key: VANDNPDYrr: [ 0.00 0.00 ]
+Key: VANDNPDZ: [ 0.00 0.00 ]
+Key: VANDNPDZrm: [ 0.00 0.00 ]
+Key: VANDNPDZrmb: [ 0.00 0.00 ]
+Key: VANDNPDZrmbk: [ 0.00 0.00 ]
+Key: VANDNPDZrmbkz: [ 0.00 0.00 ]
+Key: VANDNPDZrmk: [ 0.00 0.00 ]
+Key: VANDNPDZrmkz: [ 0.00 0.00 ]
+Key: VANDNPDZrr: [ 0.00 0.00 ]
+Key: VANDNPDZrrk: [ 0.00 0.00 ]
+Key: VANDNPDZrrkz: [ 0.00 0.00 ]
+Key: VANDNPDrm: [ 0.00 0.00 ]
+Key: VANDNPDrr: [ 0.00 0.00 ]
+Key: VANDNPSYrm: [ 0.00 0.00 ]
+Key: VANDNPSYrr: [ 0.00 0.00 ]
+Key: VANDNPSZ: [ 0.00 0.00 ]
+Key: VANDNPSZrm: [ 0.00 0.00 ]
+Key: VANDNPSZrmb: [ 0.00 0.00 ]
+Key: VANDNPSZrmbk: [ 0.00 0.00 ]
+Key: VANDNPSZrmbkz: [ 0.00 0.00 ]
+Key: VANDNPSZrmk: [ 0.00 0.00 ]
+Key: VANDNPSZrmkz: [ 0.00 0.00 ]
+Key: VANDNPSZrr: [ 0.00 0.00 ]
+Key: VANDNPSZrrk: [ 0.00 0.00 ]
+Key: VANDNPSZrrkz: [ 0.00 0.00 ]
+Key: VANDNPSrm: [ 0.00 0.00 ]
+Key: VANDNPSrr: [ 0.00 0.00 ]
+Key: VANDPDYrm: [ 0.00 0.00 ]
+Key: VANDPDYrr: [ 0.00 0.00 ]
+Key: VANDPDZ: [ 0.00 0.00 ]
+Key: VANDPDZrm: [ 0.00 0.00 ]
+Key: VANDPDZrmb: [ 0.00 0.00 ]
+Key: VANDPDZrmbk: [ 0.00 0.00 ]
+Key: VANDPDZrmbkz: [ 0.00 0.00 ]
+Key: VANDPDZrmk: [ 0.00 0.00 ]
+Key: VANDPDZrmkz: [ 0.00 0.00 ]
+Key: VANDPDZrr: [ 0.00 0.00 ]
+Key: VANDPDZrrk: [ 0.00 0.00 ]
+Key: VANDPDZrrkz: [ 0.00 0.00 ]
+Key: VANDPDrm: [ 0.00 0.00 ]
+Key: VANDPDrr: [ 0.00 0.00 ]
+Key: VANDPSYrm: [ 0.00 0.00 ]
+Key: VANDPSYrr: [ 0.00 0.00 ]
+Key: VANDPSZ: [ 0.00 0.00 ]
+Key: VANDPSZrm: [ 0.00 0.00 ]
+Key: VANDPSZrmb: [ 0.00 0.00 ]
+Key: VANDPSZrmbk: [ 0.00 0.00 ]
+Key: VANDPSZrmbkz: [ 0.00 0.00 ]
+Key: VANDPSZrmk: [ 0.00 0.00 ]
+Key: VANDPSZrmkz: [ 0.00 0.00 ]
+Key: VANDPSZrr: [ 0.00 0.00 ]
+Key: VANDPSZrrk: [ 0.00 0.00 ]
+Key: VANDPSZrrkz: [ 0.00 0.00 ]
+Key: VANDPSrm: [ 0.00 0.00 ]
+Key: VANDPSrr: [ 0.00 0.00 ]
+Key: VASTART_SAVE_XMM_REGS: [ 0.00 0.00 ]
+Key: VBCSTNEBF: [ 0.00 0.00 ]
+Key: VBCSTNESH: [ 0.00 0.00 ]
+Key: VBLENDMPDZ: [ 0.00 0.00 ]
+Key: VBLENDMPDZrm: [ 0.00 0.00 ]
+Key: VBLENDMPDZrmb: [ 0.00 0.00 ]
+Key: VBLENDMPDZrmbk: [ 0.00 0.00 ]
+Key: VBLENDMPDZrmbkz: [ 0.00 0.00 ]
+Key: VBLENDMPDZrmk: [ 0.00 0.00 ]
+Key: VBLENDMPDZrmkz: [ 0.00 0.00 ]
+Key: VBLENDMPDZrr: [ 0.00 0.00 ]
+Key: VBLENDMPDZrrk: [ 0.00 0.00 ]
+Key: VBLENDMPDZrrkz: [ 0.00 0.00 ]
+Key: VBLENDMPSZ: [ 0.00 0.00 ]
+Key: VBLENDMPSZrm: [ 0.00 0.00 ]
+Key: VBLENDMPSZrmb: [ 0.00 0.00 ]
+Key: VBLENDMPSZrmbk: [ 0.00 0.00 ]
+Key: VBLENDMPSZrmbkz: [ 0.00 0.00 ]
+Key: VBLENDMPSZrmk: [ 0.00 0.00 ]
+Key: VBLENDMPSZrmkz: [ 0.00 0.00 ]
+Key: VBLENDMPSZrr: [ 0.00 0.00 ]
+Key: VBLENDMPSZrrk: [ 0.00 0.00 ]
+Key: VBLENDMPSZrrkz: [ 0.00 0.00 ]
+Key: VBLENDPDYrmi: [ 0.00 0.00 ]
+Key: VBLENDPDYrri: [ 0.00 0.00 ]
+Key: VBLENDPDrmi: [ 0.00 0.00 ]
+Key: VBLENDPDrri: [ 0.00 0.00 ]
+Key: VBLENDPSYrmi: [ 0.00 0.00 ]
+Key: VBLENDPSYrri: [ 0.00 0.00 ]
+Key: VBLENDPSrmi: [ 0.00 0.00 ]
+Key: VBLENDPSrri: [ 0.00 0.00 ]
+Key: VBLENDVPDYrmr: [ 0.00 0.00 ]
+Key: VBLENDVPDYrrr: [ 0.00 0.00 ]
+Key: VBLENDVPDrmr: [ 0.00 0.00 ]
+Key: VBLENDVPDrrr: [ 0.00 0.00 ]
+Key: VBLENDVPSYrmr: [ 0.00 0.00 ]
+Key: VBLENDVPSYrrr: [ 0.00 0.00 ]
+Key: VBLENDVPSrmr: [ 0.00 0.00 ]
+Key: VBLENDVPSrrr: [ 0.00 0.00 ]
+Key: VBROADCASTF: [ 0.00 0.00 ]
+Key: VBROADCASTI: [ 0.00 0.00 ]
+Key: VBROADCASTSDYrm: [ 0.00 0.00 ]
+Key: VBROADCASTSDYrr: [ 0.00 0.00 ]
+Key: VBROADCASTSDZ: [ 0.00 0.00 ]
+Key: VBROADCASTSDZrm: [ 0.00 0.00 ]
+Key: VBROADCASTSDZrmk: [ 0.00 0.00 ]
+Key: VBROADCASTSDZrmkz: [ 0.00 0.00 ]
+Key: VBROADCASTSDZrr: [ 0.00 0.00 ]
+Key: VBROADCASTSDZrrk: [ 0.00 0.00 ]
+Key: VBROADCASTSDZrrkz: [ 0.00 0.00 ]
+Key: VBROADCASTSSYrm: [ 0.00 0.00 ]
+Key: VBROADCASTSSYrr: [ 0.00 0.00 ]
+Key: VBROADCASTSSZ: [ 0.00 0.00 ]
+Key: VBROADCASTSSZrm: [ 0.00 0.00 ]
+Key: VBROADCASTSSZrmk: [ 0.00 0.00 ]
+Key: VBROADCASTSSZrmkz: [ 0.00 0.00 ]
+Key: VBROADCASTSSZrr: [ 0.00 0.00 ]
+Key: VBROADCASTSSZrrk: [ 0.00 0.00 ]
+Key: VBROADCASTSSZrrkz: [ 0.00 0.00 ]
+Key: VBROADCASTSSrm: [ 0.00 0.00 ]
+Key: VBROADCASTSSrr: [ 0.00 0.00 ]
+Key: VCMPBF: [ 0.00 0.00 ]
+Key: VCMPPDYrmi: [ 0.00 0.00 ]
+Key: VCMPPDYrri: [ 0.00 0.00 ]
+Key: VCMPPDZ: [ 0.00 0.00 ]
+Key: VCMPPDZrmbi: [ 0.00 0.00 ]
+Key: VCMPPDZrmbik: [ 0.00 0.00 ]
+Key: VCMPPDZrmi: [ 0.00 0.00 ]
+Key: VCMPPDZrmik: [ 0.00 0.00 ]
+Key: VCMPPDZrri: [ 0.00 0.00 ]
+Key: VCMPPDZrrib: [ 0.00 0.00 ]
+Key: VCMPPDZrribk: [ 0.00 0.00 ]
+Key: VCMPPDZrrik: [ 0.00 0.00 ]
+Key: VCMPPDrmi: [ 0.00 0.00 ]
+Key: VCMPPDrri: [ 0.00 0.00 ]
+Key: VCMPPHZ: [ 0.00 0.00 ]
+Key: VCMPPHZrmbi: [ 0.00 0.00 ]
+Key: VCMPPHZrmbik: [ 0.00 0.00 ]
+Key: VCMPPHZrmi: [ 0.00 0.00 ]
+Key: VCMPPHZrmik: [ 0.00 0.00 ]
+Key: VCMPPHZrri: [ 0.00 0.00 ]
+Key: VCMPPHZrrib: [ 0.00 0.00 ]
+Key: VCMPPHZrribk: [ 0.00 0.00 ]
+Key: VCMPPHZrrik: [ 0.00 0.00 ]
+Key: VCMPPSYrmi: [ 0.00 0.00 ]
+Key: VCMPPSYrri: [ 0.00 0.00 ]
+Key: VCMPPSZ: [ 0.00 0.00 ]
+Key: VCMPPSZrmbi: [ 0.00 0.00 ]
+Key: VCMPPSZrmbik: [ 0.00 0.00 ]
+Key: VCMPPSZrmi: [ 0.00 0.00 ]
+Key: VCMPPSZrmik: [ 0.00 0.00 ]
+Key: VCMPPSZrri: [ 0.00 0.00 ]
+Key: VCMPPSZrrib: [ 0.00 0.00 ]
+Key: VCMPPSZrribk: [ 0.00 0.00 ]
+Key: VCMPPSZrrik: [ 0.00 0.00 ]
+Key: VCMPPSrmi: [ 0.00 0.00 ]
+Key: VCMPPSrri: [ 0.00 0.00 ]
+Key: VCMPSDZrmi: [ 0.00 0.00 ]
+Key: VCMPSDZrmi_Int: [ 0.00 0.00 ]
+Key: VCMPSDZrmik_Int: [ 0.00 0.00 ]
+Key: VCMPSDZrri: [ 0.00 0.00 ]
+Key: VCMPSDZrri_Int: [ 0.00 0.00 ]
+Key: VCMPSDZrrib_Int: [ 0.00 0.00 ]
+Key: VCMPSDZrribk_Int: [ 0.00 0.00 ]
+Key: VCMPSDZrrik_Int: [ 0.00 0.00 ]
+Key: VCMPSDrmi: [ 0.00 0.00 ]
+Key: VCMPSDrmi_Int: [ 0.00 0.00 ]
+Key: VCMPSDrri: [ 0.00 0.00 ]
+Key: VCMPSDrri_Int: [ 0.00 0.00 ]
+Key: VCMPSHZrmi: [ 0.00 0.00 ]
+Key: VCMPSHZrmi_Int: [ 0.00 0.00 ]
+Key: VCMPSHZrmik_Int: [ 0.00 0.00 ]
+Key: VCMPSHZrri: [ 0.00 0.00 ]
+Key: VCMPSHZrri_Int: [ 0.00 0.00 ]
+Key: VCMPSHZrrib_Int: [ 0.00 0.00 ]
+Key: VCMPSHZrribk_Int: [ 0.00 0.00 ]
+Key: VCMPSHZrrik_Int: [ 0.00 0.00 ]
+Key: VCMPSSZrmi: [ 0.00 0.00 ]
+Key: VCMPSSZrmi_Int: [ 0.00 0.00 ]
+Key: VCMPSSZrmik_Int: [ 0.00 0.00 ]
+Key: VCMPSSZrri: [ 0.00 0.00 ]
+Key: VCMPSSZrri_Int: [ 0.00 0.00 ]
+Key: VCMPSSZrrib_Int: [ 0.00 0.00 ]
+Key: VCMPSSZrribk_Int: [ 0.00 0.00 ]
+Key: VCMPSSZrrik_Int: [ 0.00 0.00 ]
+Key: VCMPSSrmi: [ 0.00 0.00 ]
+Key: VCMPSSrmi_Int: [ 0.00 0.00 ]
+Key: VCMPSSrri: [ 0.00 0.00 ]
+Key: VCMPSSrri_Int: [ 0.00 0.00 ]
+Key: VCOMISBF: [ 0.00 0.00 ]
+Key: VCOMISDZrm: [ 0.00 0.00 ]
+Key: VCOMISDZrm_Int: [ 0.00 0.00 ]
+Key: VCOMISDZrr: [ 0.00 0.00 ]
+Key: VCOMISDZrr_Int: [ 0.00 0.00 ]
+Key: VCOMISDZrrb: [ 0.00 0.00 ]
+Key: VCOMISDrm: [ 0.00 0.00 ]
+Key: VCOMISDrm_Int: [ 0.00 0.00 ]
+Key: VCOMISDrr: [ 0.00 0.00 ]
+Key: VCOMISDrr_Int: [ 0.00 0.00 ]
+Key: VCOMISHZrm: [ 0.00 0.00 ]
+Key: VCOMISHZrm_Int: [ 0.00 0.00 ]
+Key: VCOMISHZrr: [ 0.00 0.00 ]
+Key: VCOMISHZrr_Int: [ 0.00 0.00 ]
+Key: VCOMISHZrrb: [ 0.00 0.00 ]
+Key: VCOMISSZrm: [ 0.00 0.00 ]
+Key: VCOMISSZrm_Int: [ 0.00 0.00 ]
+Key: VCOMISSZrr: [ 0.00 0.00 ]
+Key: VCOMISSZrr_Int: [ 0.00 0.00 ]
+Key: VCOMISSZrrb: [ 0.00 0.00 ]
+Key: VCOMISSrm: [ 0.00 0.00 ]
+Key: VCOMISSrm_Int: [ 0.00 0.00 ]
+Key: VCOMISSrr: [ 0.00 0.00 ]
+Key: VCOMISSrr_Int: [ 0.00 0.00 ]
+Key: VCOMPRESSPDZ: [ 0.00 0.00 ]
+Key: VCOMPRESSPDZmr: [ 0.00 0.00 ]
+Key: VCOMPRESSPDZmrk: [ 0.00 0.00 ]
+Key: VCOMPRESSPDZrr: [ 0.00 0.00 ]
+Key: VCOMPRESSPDZrrk: [ 0.00 0.00 ]
+Key: VCOMPRESSPDZrrkz: [ 0.00 0.00 ]
+Key: VCOMPRESSPSZ: [ 0.00 0.00 ]
+Key: VCOMPRESSPSZmr: [ 0.00 0.00 ]
+Key: VCOMPRESSPSZmrk: [ 0.00 0.00 ]
+Key: VCOMPRESSPSZrr: [ 0.00 0.00 ]
+Key: VCOMPRESSPSZrrk: [ 0.00 0.00 ]
+Key: VCOMPRESSPSZrrkz: [ 0.00 0.00 ]
+Key: VCOMXSDZrm_Int: [ 0.00 0.00 ]
+Key: VCOMXSDZrr_Int: [ 0.00 0.00 ]
+Key: VCOMXSDZrrb_Int: [ 0.00 0.00 ]
+Key: VCOMXSHZrm_Int: [ 0.00 0.00 ]
+Key: VCOMXSHZrr_Int: [ 0.00 0.00 ]
+Key: VCOMXSHZrrb_Int: [ 0.00 0.00 ]
+Key: VCOMXSSZrm_Int: [ 0.00 0.00 ]
+Key: VCOMXSSZrr_Int: [ 0.00 0.00 ]
+Key: VCOMXSSZrrb_Int: [ 0.00 0.00 ]
+Key: VCVT: [ 0.00 0.00 ]
+Key: VCVTBF: [ 0.00 0.00 ]
+Key: VCVTBIASPH: [ 0.00 0.00 ]
+Key: VCVTDQ: [ 0.00 0.00 ]
+Key: VCVTHF: [ 0.00 0.00 ]
+Key: VCVTNE: [ 0.00 0.00 ]
+Key: VCVTNEEBF: [ 0.00 0.00 ]
+Key: VCVTNEEPH: [ 0.00 0.00 ]
+Key: VCVTNEOBF: [ 0.00 0.00 ]
+Key: VCVTNEOPH: [ 0.00 0.00 ]
+Key: VCVTNEPS: [ 0.00 0.00 ]
+Key: VCVTPD: [ 0.00 0.00 ]
+Key: VCVTPH: [ 0.00 0.00 ]
+Key: VCVTPS: [ 0.00 0.00 ]
+Key: VCVTQQ: [ 0.00 0.00 ]
+Key: VCVTSD: [ 0.00 0.00 ]
+Key: VCVTSH: [ 0.00 0.00 ]
+Key: VCVTSI: [ 0.00 0.00 ]
+Key: VCVTSS: [ 0.00 0.00 ]
+Key: VCVTTBF: [ 0.00 0.00 ]
+Key: VCVTTPD: [ 0.00 0.00 ]
+Key: VCVTTPH: [ 0.00 0.00 ]
+Key: VCVTTPS: [ 0.00 0.00 ]
+Key: VCVTTSD: [ 0.00 0.00 ]
+Key: VCVTTSH: [ 0.00 0.00 ]
+Key: VCVTTSS: [ 0.00 0.00 ]
+Key: VCVTUDQ: [ 0.00 0.00 ]
+Key: VCVTUQQ: [ 0.00 0.00 ]
+Key: VCVTUSI: [ 0.00 0.00 ]
+Key: VCVTUW: [ 0.00 0.00 ]
+Key: VCVTW: [ 0.00 0.00 ]
+Key: VDBPSADBWZ: [ 0.00 0.00 ]
+Key: VDBPSADBWZrmi: [ 0.00 0.00 ]
+Key: VDBPSADBWZrmik: [ 0.00 0.00 ]
+Key: VDBPSADBWZrmikz: [ 0.00 0.00 ]
+Key: VDBPSADBWZrri: [ 0.00 0.00 ]
+Key: VDBPSADBWZrrik: [ 0.00 0.00 ]
+Key: VDBPSADBWZrrikz: [ 0.00 0.00 ]
+Key: VDIVBF: [ 0.00 0.00 ]
+Key: VDIVPDYrm: [ 0.00 0.00 ]
+Key: VDIVPDYrr: [ 0.00 0.00 ]
+Key: VDIVPDZ: [ 0.00 0.00 ]
+Key: VDIVPDZrm: [ 0.00 0.00 ]
+Key: VDIVPDZrmb: [ 0.00 0.00 ]
+Key: VDIVPDZrmbk: [ 0.00 0.00 ]
+Key: VDIVPDZrmbkz: [ 0.00 0.00 ]
+Key: VDIVPDZrmk: [ 0.00 0.00 ]
+Key: VDIVPDZrmkz: [ 0.00 0.00 ]
+Key: VDIVPDZrr: [ 0.00 0.00 ]
+Key: VDIVPDZrrb: [ 0.00 0.00 ]
+Key: VDIVPDZrrbk: [ 0.00 0.00 ]
+Key: VDIVPDZrrbkz: [ 0.00 0.00 ]
+Key: VDIVPDZrrk: [ 0.00 0.00 ]
+Key: VDIVPDZrrkz: [ 0.00 0.00 ]
+Key: VDIVPDrm: [ 0.00 0.00 ]
+Key: VDIVPDrr: [ 0.00 0.00 ]
+Key: VDIVPHZ: [ 0.00 0.00 ]
+Key: VDIVPHZrm: [ 0.00 0.00 ]
+Key: VDIVPHZrmb: [ 0.00 0.00 ]
+Key: VDIVPHZrmbk: [ 0.00 0.00 ]
+Key: VDIVPHZrmbkz: [ 0.00 0.00 ]
+Key: VDIVPHZrmk: [ 0.00 0.00 ]
+Key: VDIVPHZrmkz: [ 0.00 0.00 ]
+Key: VDIVPHZrr: [ 0.00 0.00 ]
+Key: VDIVPHZrrb: [ 0.00 0.00 ]
+Key: VDIVPHZrrbk: [ 0.00 0.00 ]
+Key: VDIVPHZrrbkz: [ 0.00 0.00 ]
+Key: VDIVPHZrrk: [ 0.00 0.00 ]
+Key: VDIVPHZrrkz: [ 0.00 0.00 ]
+Key: VDIVPSYrm: [ 0.00 0.00 ]
+Key: VDIVPSYrr: [ 0.00 0.00 ]
+Key: VDIVPSZ: [ 0.00 0.00 ]
+Key: VDIVPSZrm: [ 0.00 0.00 ]
+Key: VDIVPSZrmb: [ 0.00 0.00 ]
+Key: VDIVPSZrmbk: [ 0.00 0.00 ]
+Key: VDIVPSZrmbkz: [ 0.00 0.00 ]
+Key: VDIVPSZrmk: [ 0.00 0.00 ]
+Key: VDIVPSZrmkz: [ 0.00 0.00 ]
+Key: VDIVPSZrr: [ 0.00 0.00 ]
+Key: VDIVPSZrrb: [ 0.00 0.00 ]
+Key: VDIVPSZrrbk: [ 0.00 0.00 ]
+Key: VDIVPSZrrbkz: [ 0.00 0.00 ]
+Key: VDIVPSZrrk: [ 0.00 0.00 ]
+Key: VDIVPSZrrkz: [ 0.00 0.00 ]
+Key: VDIVPSrm: [ 0.00 0.00 ]
+Key: VDIVPSrr: [ 0.00 0.00 ]
+Key: VDIVSDZrm: [ 0.00 0.00 ]
+Key: VDIVSDZrm_Int: [ 0.00 0.00 ]
+Key: VDIVSDZrmk_Int: [ 0.00 0.00 ]
+Key: VDIVSDZrmkz_Int: [ 0.00 0.00 ]
+Key: VDIVSDZrr: [ 0.00 0.00 ]
+Key: VDIVSDZrr_Int: [ 0.00 0.00 ]
+Key: VDIVSDZrrb_Int: [ 0.00 0.00 ]
+Key: VDIVSDZrrbk_Int: [ 0.00 0.00 ]
+Key: VDIVSDZrrbkz_Int: [ 0.00 0.00 ]
+Key: VDIVSDZrrk_Int: [ 0.00 0.00 ]
+Key: VDIVSDZrrkz_Int: [ 0.00 0.00 ]
+Key: VDIVSDrm: [ 0.00 0.00 ]
+Key: VDIVSDrm_Int: [ 0.00 0.00 ]
+Key: VDIVSDrr: [ 0.00 0.00 ]
+Key: VDIVSDrr_Int: [ 0.00 0.00 ]
+Key: VDIVSHZrm: [ 0.00 0.00 ]
+Key: VDIVSHZrm_Int: [ 0.00 0.00 ]
+Key: VDIVSHZrmk_Int: [ 0.00 0.00 ]
+Key: VDIVSHZrmkz_Int: [ 0.00 0.00 ]
+Key: VDIVSHZrr: [ 0.00 0.00 ]
+Key: VDIVSHZrr_Int: [ 0.00 0.00 ]
+Key: VDIVSHZrrb_Int: [ 0.00 0.00 ]
+Key: VDIVSHZrrbk_Int: [ 0.00 0.00 ]
+Key: VDIVSHZrrbkz_Int: [ 0.00 0.00 ]
+Key: VDIVSHZrrk_Int: [ 0.00 0.00 ]
+Key: VDIVSHZrrkz_Int: [ 0.00 0.00 ]
+Key: VDIVSSZrm: [ 0.00 0.00 ]
+Key: VDIVSSZrm_Int: [ 0.00 0.00 ]
+Key: VDIVSSZrmk_Int: [ 0.00 0.00 ]
+Key: VDIVSSZrmkz_Int: [ 0.00 0.00 ]
+Key: VDIVSSZrr: [ 0.00 0.00 ]
+Key: VDIVSSZrr_Int: [ 0.00 0.00 ]
+Key: VDIVSSZrrb_Int: [ 0.00 0.00 ]
+Key: VDIVSSZrrbk_Int: [ 0.00 0.00 ]
+Key: VDIVSSZrrbkz_Int: [ 0.00 0.00 ]
+Key: VDIVSSZrrk_Int: [ 0.00 0.00 ]
+Key: VDIVSSZrrkz_Int: [ 0.00 0.00 ]
+Key: VDIVSSrm: [ 0.00 0.00 ]
+Key: VDIVSSrm_Int: [ 0.00 0.00 ]
+Key: VDIVSSrr: [ 0.00 0.00 ]
+Key: VDIVSSrr_Int: [ 0.00 0.00 ]
+Key: VDPBF: [ 0.00 0.00 ]
+Key: VDPPDrmi: [ 0.00 0.00 ]
+Key: VDPPDrri: [ 0.00 0.00 ]
+Key: VDPPHPSZ: [ 0.00 0.00 ]
+Key: VDPPHPSZm: [ 0.00 0.00 ]
+Key: VDPPHPSZmb: [ 0.00 0.00 ]
+Key: VDPPHPSZmbk: [ 0.00 0.00 ]
+Key: VDPPHPSZmbkz: [ 0.00 0.00 ]
+Key: VDPPHPSZmk: [ 0.00 0.00 ]
+Key: VDPPHPSZmkz: [ 0.00 0.00 ]
+Key: VDPPHPSZr: [ 0.00 0.00 ]
+Key: VDPPHPSZrk: [ 0.00 0.00 ]
+Key: VDPPHPSZrkz: [ 0.00 0.00 ]
+Key: VDPPSYrmi: [ 0.00 0.00 ]
+Key: VDPPSYrri: [ 0.00 0.00 ]
+Key: VDPPSrmi: [ 0.00 0.00 ]
+Key: VDPPSrri: [ 0.00 0.00 ]
+Key: VERRm: [ 0.00 0.00 ]
+Key: VERRr: [ 0.00 0.00 ]
+Key: VERWm: [ 0.00 0.00 ]
+Key: VERWr: [ 0.00 0.00 ]
+Key: VEXP: [ 0.00 0.00 ]
+Key: VEXPANDPDZ: [ 0.00 0.00 ]
+Key: VEXPANDPDZrm: [ 0.00 0.00 ]
+Key: VEXPANDPDZrmk: [ 0.00 0.00 ]
+Key: VEXPANDPDZrmkz: [ 0.00 0.00 ]
+Key: VEXPANDPDZrr: [ 0.00 0.00 ]
+Key: VEXPANDPDZrrk: [ 0.00 0.00 ]
+Key: VEXPANDPDZrrkz: [ 0.00 0.00 ]
+Key: VEXPANDPSZ: [ 0.00 0.00 ]
+Key: VEXPANDPSZrm: [ 0.00 0.00 ]
+Key: VEXPANDPSZrmk: [ 0.00 0.00 ]
+Key: VEXPANDPSZrmkz: [ 0.00 0.00 ]
+Key: VEXPANDPSZrr: [ 0.00 0.00 ]
+Key: VEXPANDPSZrrk: [ 0.00 0.00 ]
+Key: VEXPANDPSZrrkz: [ 0.00 0.00 ]
+Key: VEXTRACTF: [ 0.00 0.00 ]
+Key: VEXTRACTI: [ 0.00 0.00 ]
+Key: VEXTRACTPSZmri: [ 0.00 0.00 ]
+Key: VEXTRACTPSZrri: [ 0.00 0.00 ]
+Key: VEXTRACTPSmri: [ 0.00 0.00 ]
+Key: VEXTRACTPSrri: [ 0.00 0.00 ]
+Key: VFCMADDCPHZ: [ 0.00 0.00 ]
+Key: VFCMADDCPHZm: [ 0.00 0.00 ]
+Key: VFCMADDCPHZmb: [ 0.00 0.00 ]
+Key: VFCMADDCPHZmbk: [ 0.00 0.00 ]
+Key: VFCMADDCPHZmbkz: [ 0.00 0.00 ]
+Key: VFCMADDCPHZmk: [ 0.00 0.00 ]
+Key: VFCMADDCPHZmkz: [ 0.00 0.00 ]
+Key: VFCMADDCPHZr: [ 0.00 0.00 ]
+Key: VFCMADDCPHZrb: [ 0.00 0.00 ]
+Key: VFCMADDCPHZrbk: [ 0.00 0.00 ]
+Key: VFCMADDCPHZrbkz: [ 0.00 0.00 ]
+Key: VFCMADDCPHZrk: [ 0.00 0.00 ]
+Key: VFCMADDCPHZrkz: [ 0.00 0.00 ]
+Key: VFCMADDCSHZm: [ 0.00 0.00 ]
+Key: VFCMADDCSHZmk: [ 0.00 0.00 ]
+Key: VFCMADDCSHZmkz: [ 0.00 0.00 ]
+Key: VFCMADDCSHZr: [ 0.00 0.00 ]
+Key: VFCMADDCSHZrb: [ 0.00 0.00 ]
+Key: VFCMADDCSHZrbk: [ 0.00 0.00 ]
+Key: VFCMADDCSHZrbkz: [ 0.00 0.00 ]
+Key: VFCMADDCSHZrk: [ 0.00 0.00 ]
+Key: VFCMADDCSHZrkz: [ 0.00 0.00 ]
+Key: VFCMULCPHZ: [ 0.00 0.00 ]
+Key: VFCMULCPHZrm: [ 0.00 0.00 ]
+Key: VFCMULCPHZrmb: [ 0.00 0.00 ]
+Key: VFCMULCPHZrmbk: [ 0.00 0.00 ]
+Key: VFCMULCPHZrmbkz: [ 0.00 0.00 ]
+Key: VFCMULCPHZrmk: [ 0.00 0.00 ]
+Key: VFCMULCPHZrmkz: [ 0.00 0.00 ]
+Key: VFCMULCPHZrr: [ 0.00 0.00 ]
+Key: VFCMULCPHZrrb: [ 0.00 0.00 ]
+Key: VFCMULCPHZrrbk: [ 0.00 0.00 ]
+Key: VFCMULCPHZrrbkz: [ 0.00 0.00 ]
+Key: VFCMULCPHZrrk: [ 0.00 0.00 ]
+Key: VFCMULCPHZrrkz: [ 0.00 0.00 ]
+Key: VFCMULCSHZrm: [ 0.00 0.00 ]
+Key: VFCMULCSHZrmk: [ 0.00 0.00 ]
+Key: VFCMULCSHZrmkz: [ 0.00 0.00 ]
+Key: VFCMULCSHZrr: [ 0.00 0.00 ]
+Key: VFCMULCSHZrrb: [ 0.00 0.00 ]
+Key: VFCMULCSHZrrbk: [ 0.00 0.00 ]
+Key: VFCMULCSHZrrbkz: [ 0.00 0.00 ]
+Key: VFCMULCSHZrrk: [ 0.00 0.00 ]
+Key: VFCMULCSHZrrkz: [ 0.00 0.00 ]
+Key: VFIXUPIMMPDZ: [ 0.00 0.00 ]
+Key: VFIXUPIMMPDZrmbi: [ 0.00 0.00 ]
+Key: VFIXUPIMMPDZrmbik: [ 0.00 0.00 ]
+Key: VFIXUPIMMPDZrmbikz: [ 0.00 0.00 ]
+Key: VFIXUPIMMPDZrmi: [ 0.00 0.00 ]
+Key: VFIXUPIMMPDZrmik: [ 0.00 0.00 ]
+Key: VFIXUPIMMPDZrmikz: [ 0.00 0.00 ]
+Key: VFIXUPIMMPDZrri: [ 0.00 0.00 ]
+Key: VFIXUPIMMPDZrrib: [ 0.00 0.00 ]
+Key: VFIXUPIMMPDZrribk: [ 0.00 0.00 ]
+Key: VFIXUPIMMPDZrribkz: [ 0.00 0.00 ]
+Key: VFIXUPIMMPDZrrik: [ 0.00 0.00 ]
+Key: VFIXUPIMMPDZrrikz: [ 0.00 0.00 ]
+Key: VFIXUPIMMPSZ: [ 0.00 0.00 ]
+Key: VFIXUPIMMPSZrmbi: [ 0.00 0.00 ]
+Key: VFIXUPIMMPSZrmbik: [ 0.00 0.00 ]
+Key: VFIXUPIMMPSZrmbikz: [ 0.00 0.00 ]
+Key: VFIXUPIMMPSZrmi: [ 0.00 0.00 ]
+Key: VFIXUPIMMPSZrmik: [ 0.00 0.00 ]
+Key: VFIXUPIMMPSZrmikz: [ 0.00 0.00 ]
+Key: VFIXUPIMMPSZrri: [ 0.00 0.00 ]
+Key: VFIXUPIMMPSZrrib: [ 0.00 0.00 ]
+Key: VFIXUPIMMPSZrribk: [ 0.00 0.00 ]
+Key: VFIXUPIMMPSZrribkz: [ 0.00 0.00 ]
+Key: VFIXUPIMMPSZrrik: [ 0.00 0.00 ]
+Key: VFIXUPIMMPSZrrikz: [ 0.00 0.00 ]
+Key: VFIXUPIMMSDZrmi: [ 0.00 0.00 ]
+Key: VFIXUPIMMSDZrmik: [ 0.00 0.00 ]
+Key: VFIXUPIMMSDZrmikz: [ 0.00 0.00 ]
+Key: VFIXUPIMMSDZrri: [ 0.00 0.00 ]
+Key: VFIXUPIMMSDZrrib: [ 0.00 0.00 ]
+Key: VFIXUPIMMSDZrribk: [ 0.00 0.00 ]
+Key: VFIXUPIMMSDZrribkz: [ 0.00 0.00 ]
+Key: VFIXUPIMMSDZrrik: [ 0.00 0.00 ]
+Key: VFIXUPIMMSDZrrikz: [ 0.00 0.00 ]
+Key: VFIXUPIMMSSZrmi: [ 0.00 0.00 ]
+Key: VFIXUPIMMSSZrmik: [ 0.00 0.00 ]
+Key: VFIXUPIMMSSZrmikz: [ 0.00 0.00 ]
+Key: VFIXUPIMMSSZrri: [ 0.00 0.00 ]
+Key: VFIXUPIMMSSZrrib: [ 0.00 0.00 ]
+Key: VFIXUPIMMSSZrribk: [ 0.00 0.00 ]
+Key: VFIXUPIMMSSZrribkz: [ 0.00 0.00 ]
+Key: VFIXUPIMMSSZrrik: [ 0.00 0.00 ]
+Key: VFIXUPIMMSSZrrikz: [ 0.00 0.00 ]
+Key: VFMADD: [ 0.00 0.00 ]
+Key: VFMADDCPHZ: [ 0.00 0.00 ]
+Key: VFMADDCPHZm: [ 0.00 0.00 ]
+Key: VFMADDCPHZmb: [ 0.00 0.00 ]
+Key: VFMADDCPHZmbk: [ 0.00 0.00 ]
+Key: VFMADDCPHZmbkz: [ 0.00 0.00 ]
+Key: VFMADDCPHZmk: [ 0.00 0.00 ]
+Key: VFMADDCPHZmkz: [ 0.00 0.00 ]
+Key: VFMADDCPHZr: [ 0.00 0.00 ]
+Key: VFMADDCPHZrb: [ 0.00 0.00 ]
+Key: VFMADDCPHZrbk: [ 0.00 0.00 ]
+Key: VFMADDCPHZrbkz: [ 0.00 0.00 ]
+Key: VFMADDCPHZrk: [ 0.00 0.00 ]
+Key: VFMADDCPHZrkz: [ 0.00 0.00 ]
+Key: VFMADDCSHZm: [ 0.00 0.00 ]
+Key: VFMADDCSHZmk: [ 0.00 0.00 ]
+Key: VFMADDCSHZmkz: [ 0.00 0.00 ]
+Key: VFMADDCSHZr: [ 0.00 0.00 ]
+Key: VFMADDCSHZrb: [ 0.00 0.00 ]
+Key: VFMADDCSHZrbk: [ 0.00 0.00 ]
+Key: VFMADDCSHZrbkz: [ 0.00 0.00 ]
+Key: VFMADDCSHZrk: [ 0.00 0.00 ]
+Key: VFMADDCSHZrkz: [ 0.00 0.00 ]
+Key: VFMADDPD: [ 0.00 0.00 ]
+Key: VFMADDPS: [ 0.00 0.00 ]
+Key: VFMADDSD: [ 0.00 0.00 ]
+Key: VFMADDSS: [ 0.00 0.00 ]
+Key: VFMADDSUB: [ 0.00 0.00 ]
+Key: VFMADDSUBPD: [ 0.00 0.00 ]
+Key: VFMADDSUBPS: [ 0.00 0.00 ]
+Key: VFMSUB: [ 0.00 0.00 ]
+Key: VFMSUBADD: [ 0.00 0.00 ]
+Key: VFMSUBADDPD: [ 0.00 0.00 ]
+Key: VFMSUBADDPS: [ 0.00 0.00 ]
+Key: VFMSUBPD: [ 0.00 0.00 ]
+Key: VFMSUBPS: [ 0.00 0.00 ]
+Key: VFMSUBSD: [ 0.00 0.00 ]
+Key: VFMSUBSS: [ 0.00 0.00 ]
+Key: VFMULCPHZ: [ 0.00 0.00 ]
+Key: VFMULCPHZrm: [ 0.00 0.00 ]
+Key: VFMULCPHZrmb: [ 0.00 0.00 ]
+Key: VFMULCPHZrmbk: [ 0.00 0.00 ]
+Key: VFMULCPHZrmbkz: [ 0.00 0.00 ]
+Key: VFMULCPHZrmk: [ 0.00 0.00 ]
+Key: VFMULCPHZrmkz: [ 0.00 0.00 ]
+Key: VFMULCPHZrr: [ 0.00 0.00 ]
+Key: VFMULCPHZrrb: [ 0.00 0.00 ]
+Key: VFMULCPHZrrbk: [ 0.00 0.00 ]
+Key: VFMULCPHZrrbkz: [ 0.00 0.00 ]
+Key: VFMULCPHZrrk: [ 0.00 0.00 ]
+Key: VFMULCPHZrrkz: [ 0.00 0.00 ]
+Key: VFMULCSHZrm: [ 0.00 0.00 ]
+Key: VFMULCSHZrmk: [ 0.00 0.00 ]
+Key: VFMULCSHZrmkz: [ 0.00 0.00 ]
+Key: VFMULCSHZrr: [ 0.00 0.00 ]
+Key: VFMULCSHZrrb: [ 0.00 0.00 ]
+Key: VFMULCSHZrrbk: [ 0.00 0.00 ]
+Key: VFMULCSHZrrbkz: [ 0.00 0.00 ]
+Key: VFMULCSHZrrk: [ 0.00 0.00 ]
+Key: VFMULCSHZrrkz: [ 0.00 0.00 ]
+Key: VFNMADD: [ 0.00 0.00 ]
+Key: VFNMADDPD: [ 0.00 0.00 ]
+Key: VFNMADDPS: [ 0.00 0.00 ]
+Key: VFNMADDSD: [ 0.00 0.00 ]
+Key: VFNMADDSS: [ 0.00 0.00 ]
+Key: VFNMSUB: [ 0.00 0.00 ]
+Key: VFNMSUBPD: [ 0.00 0.00 ]
+Key: VFNMSUBPS: [ 0.00 0.00 ]
+Key: VFNMSUBSD: [ 0.00 0.00 ]
+Key: VFNMSUBSS: [ 0.00 0.00 ]
+Key: VFPCLASSBF: [ 0.00 0.00 ]
+Key: VFPCLASSPDZ: [ 0.00 0.00 ]
+Key: VFPCLASSPDZmbi: [ 0.00 0.00 ]
+Key: VFPCLASSPDZmbik: [ 0.00 0.00 ]
+Key: VFPCLASSPDZmi: [ 0.00 0.00 ]
+Key: VFPCLASSPDZmik: [ 0.00 0.00 ]
+Key: VFPCLASSPDZri: [ 0.00 0.00 ]
+Key: VFPCLASSPDZrik: [ 0.00 0.00 ]
+Key: VFPCLASSPHZ: [ 0.00 0.00 ]
+Key: VFPCLASSPHZmbi: [ 0.00 0.00 ]
+Key: VFPCLASSPHZmbik: [ 0.00 0.00 ]
+Key: VFPCLASSPHZmi: [ 0.00 0.00 ]
+Key: VFPCLASSPHZmik: [ 0.00 0.00 ]
+Key: VFPCLASSPHZri: [ 0.00 0.00 ]
+Key: VFPCLASSPHZrik: [ 0.00 0.00 ]
+Key: VFPCLASSPSZ: [ 0.00 0.00 ]
+Key: VFPCLASSPSZmbi: [ 0.00 0.00 ]
+Key: VFPCLASSPSZmbik: [ 0.00 0.00 ]
+Key: VFPCLASSPSZmi: [ 0.00 0.00 ]
+Key: VFPCLASSPSZmik: [ 0.00 0.00 ]
+Key: VFPCLASSPSZri: [ 0.00 0.00 ]
+Key: VFPCLASSPSZrik: [ 0.00 0.00 ]
+Key: VFPCLASSSDZmi: [ 0.00 0.00 ]
+Key: VFPCLASSSDZmik: [ 0.00 0.00 ]
+Key: VFPCLASSSDZri: [ 0.00 0.00 ]
+Key: VFPCLASSSDZrik: [ 0.00 0.00 ]
+Key: VFPCLASSSHZmi: [ 0.00 0.00 ]
+Key: VFPCLASSSHZmik: [ 0.00 0.00 ]
+Key: VFPCLASSSHZri: [ 0.00 0.00 ]
+Key: VFPCLASSSHZrik: [ 0.00 0.00 ]
+Key: VFPCLASSSSZmi: [ 0.00 0.00 ]
+Key: VFPCLASSSSZmik: [ 0.00 0.00 ]
+Key: VFPCLASSSSZri: [ 0.00 0.00 ]
+Key: VFPCLASSSSZrik: [ 0.00 0.00 ]
+Key: VFRCZPDYrm: [ 0.00 0.00 ]
+Key: VFRCZPDYrr: [ 0.00 0.00 ]
+Key: VFRCZPDrm: [ 0.00 0.00 ]
+Key: VFRCZPDrr: [ 0.00 0.00 ]
+Key: VFRCZPSYrm: [ 0.00 0.00 ]
+Key: VFRCZPSYrr: [ 0.00 0.00 ]
+Key: VFRCZPSrm: [ 0.00 0.00 ]
+Key: VFRCZPSrr: [ 0.00 0.00 ]
+Key: VFRCZSDrm: [ 0.00 0.00 ]
+Key: VFRCZSDrr: [ 0.00 0.00 ]
+Key: VFRCZSSrm: [ 0.00 0.00 ]
+Key: VFRCZSSrr: [ 0.00 0.00 ]
+Key: VGATHERDPDYrm: [ 0.00 0.00 ]
+Key: VGATHERDPDZ: [ 0.00 0.00 ]
+Key: VGATHERDPDZrm: [ 0.00 0.00 ]
+Key: VGATHERDPDrm: [ 0.00 0.00 ]
+Key: VGATHERDPSYrm: [ 0.00 0.00 ]
+Key: VGATHERDPSZ: [ 0.00 0.00 ]
+Key: VGATHERDPSZrm: [ 0.00 0.00 ]
+Key: VGATHERDPSrm: [ 0.00 0.00 ]
+Key: VGATHERPF: [ 0.00 0.00 ]
+Key: VGATHERQPDYrm: [ 0.00 0.00 ]
+Key: VGATHERQPDZ: [ 0.00 0.00 ]
+Key: VGATHERQPDZrm: [ 0.00 0.00 ]
+Key: VGATHERQPDrm: [ 0.00 0.00 ]
+Key: VGATHERQPSYrm: [ 0.00 0.00 ]
+Key: VGATHERQPSZ: [ 0.00 0.00 ]
+Key: VGATHERQPSZrm: [ 0.00 0.00 ]
+Key: VGATHERQPSrm: [ 0.00 0.00 ]
+Key: VGETEXPBF: [ 0.00 0.00 ]
+Key: VGETEXPPDZ: [ 0.00 0.00 ]
+Key: VGETEXPPDZm: [ 0.00 0.00 ]
+Key: VGETEXPPDZmb: [ 0.00 0.00 ]
+Key: VGETEXPPDZmbk: [ 0.00 0.00 ]
+Key: VGETEXPPDZmbkz: [ 0.00 0.00 ]
+Key: VGETEXPPDZmk: [ 0.00 0.00 ]
+Key: VGETEXPPDZmkz: [ 0.00 0.00 ]
+Key: VGETEXPPDZr: [ 0.00 0.00 ]
+Key: VGETEXPPDZrb: [ 0.00 0.00 ]
+Key: VGETEXPPDZrbk: [ 0.00 0.00 ]
+Key: VGETEXPPDZrbkz: [ 0.00 0.00 ]
+Key: VGETEXPPDZrk: [ 0.00 0.00 ]
+Key: VGETEXPPDZrkz: [ 0.00 0.00 ]
+Key: VGETEXPPHZ: [ 0.00 0.00 ]
+Key: VGETEXPPHZm: [ 0.00 0.00 ]
+Key: VGETEXPPHZmb: [ 0.00 0.00 ]
+Key: VGETEXPPHZmbk: [ 0.00 0.00 ]
+Key: VGETEXPPHZmbkz: [ 0.00 0.00 ]
+Key: VGETEXPPHZmk: [ 0.00 0.00 ]
+Key: VGETEXPPHZmkz: [ 0.00 0.00 ]
+Key: VGETEXPPHZr: [ 0.00 0.00 ]
+Key: VGETEXPPHZrb: [ 0.00 0.00 ]
+Key: VGETEXPPHZrbk: [ 0.00 0.00 ]
+Key: VGETEXPPHZrbkz: [ 0.00 0.00 ]
+Key: VGETEXPPHZrk: [ 0.00 0.00 ]
+Key: VGETEXPPHZrkz: [ 0.00 0.00 ]
+Key: VGETEXPPSZ: [ 0.00 0.00 ]
+Key: VGETEXPPSZm: [ 0.00 0.00 ]
+Key: VGETEXPPSZmb: [ 0.00 0.00 ]
+Key: VGETEXPPSZmbk: [ 0.00 0.00 ]
+Key: VGETEXPPSZmbkz: [ 0.00 0.00 ]
+Key: VGETEXPPSZmk: [ 0.00 0.00 ]
+Key: VGETEXPPSZmkz: [ 0.00 0.00 ]
+Key: VGETEXPPSZr: [ 0.00 0.00 ]
+Key: VGETEXPPSZrb: [ 0.00 0.00 ]
+Key: VGETEXPPSZrbk: [ 0.00 0.00 ]
+Key: VGETEXPPSZrbkz: [ 0.00 0.00 ]
+Key: VGETEXPPSZrk: [ 0.00 0.00 ]
+Key: VGETEXPPSZrkz: [ 0.00 0.00 ]
+Key: VGETEXPSDZm: [ 0.00 0.00 ]
+Key: VGETEXPSDZmk: [ 0.00 0.00 ]
+Key: VGETEXPSDZmkz: [ 0.00 0.00 ]
+Key: VGETEXPSDZr: [ 0.00 0.00 ]
+Key: VGETEXPSDZrb: [ 0.00 0.00 ]
+Key: VGETEXPSDZrbk: [ 0.00 0.00 ]
+Key: VGETEXPSDZrbkz: [ 0.00 0.00 ]
+Key: VGETEXPSDZrk: [ 0.00 0.00 ]
+Key: VGETEXPSDZrkz: [ 0.00 0.00 ]
+Key: VGETEXPSHZm: [ 0.00 0.00 ]
+Key: VGETEXPSHZmk: [ 0.00 0.00 ]
+Key: VGETEXPSHZmkz: [ 0.00 0.00 ]
+Key: VGETEXPSHZr: [ 0.00 0.00 ]
+Key: VGETEXPSHZrb: [ 0.00 0.00 ]
+Key: VGETEXPSHZrbk: [ 0.00 0.00 ]
+Key: VGETEXPSHZrbkz: [ 0.00 0.00 ]
+Key: VGETEXPSHZrk: [ 0.00 0.00 ]
+Key: VGETEXPSHZrkz: [ 0.00 0.00 ]
+Key: VGETEXPSSZm: [ 0.00 0.00 ]
+Key: VGETEXPSSZmk: [ 0.00 0.00 ]
+Key: VGETEXPSSZmkz: [ 0.00 0.00 ]
+Key: VGETEXPSSZr: [ 0.00 0.00 ]
+Key: VGETEXPSSZrb: [ 0.00 0.00 ]
+Key: VGETEXPSSZrbk: [ 0.00 0.00 ]
+Key: VGETEXPSSZrbkz: [ 0.00 0.00 ]
+Key: VGETEXPSSZrk: [ 0.00 0.00 ]
+Key: VGETEXPSSZrkz: [ 0.00 0.00 ]
+Key: VGETMANTBF: [ 0.00 0.00 ]
+Key: VGETMANTPDZ: [ 0.00 0.00 ]
+Key: VGETMANTPDZrmbi: [ 0.00 0.00 ]
+Key: VGETMANTPDZrmbik: [ 0.00 0.00 ]
+Key: VGETMANTPDZrmbikz: [ 0.00 0.00 ]
+Key: VGETMANTPDZrmi: [ 0.00 0.00 ]
+Key: VGETMANTPDZrmik: [ 0.00 0.00 ]
+Key: VGETMANTPDZrmikz: [ 0.00 0.00 ]
+Key: VGETMANTPDZrri: [ 0.00 0.00 ]
+Key: VGETMANTPDZrrib: [ 0.00 0.00 ]
+Key: VGETMANTPDZrribk: [ 0.00 0.00 ]
+Key: VGETMANTPDZrribkz: [ 0.00 0.00 ]
+Key: VGETMANTPDZrrik: [ 0.00 0.00 ]
+Key: VGETMANTPDZrrikz: [ 0.00 0.00 ]
+Key: VGETMANTPHZ: [ 0.00 0.00 ]
+Key: VGETMANTPHZrmbi: [ 0.00 0.00 ]
+Key: VGETMANTPHZrmbik: [ 0.00 0.00 ]
+Key: VGETMANTPHZrmbikz: [ 0.00 0.00 ]
+Key: VGETMANTPHZrmi: [ 0.00 0.00 ]
+Key: VGETMANTPHZrmik: [ 0.00 0.00 ]
+Key: VGETMANTPHZrmikz: [ 0.00 0.00 ]
+Key: VGETMANTPHZrri: [ 0.00 0.00 ]
+Key: VGETMANTPHZrrib: [ 0.00 0.00 ]
+Key: VGETMANTPHZrribk: [ 0.00 0.00 ]
+Key: VGETMANTPHZrribkz: [ 0.00 0.00 ]
+Key: VGETMANTPHZrrik: [ 0.00 0.00 ]
+Key: VGETMANTPHZrrikz: [ 0.00 0.00 ]
+Key: VGETMANTPSZ: [ 0.00 0.00 ]
+Key: VGETMANTPSZrmbi: [ 0.00 0.00 ]
+Key: VGETMANTPSZrmbik: [ 0.00 0.00 ]
+Key: VGETMANTPSZrmbikz: [ 0.00 0.00 ]
+Key: VGETMANTPSZrmi: [ 0.00 0.00 ]
+Key: VGETMANTPSZrmik: [ 0.00 0.00 ]
+Key: VGETMANTPSZrmikz: [ 0.00 0.00 ]
+Key: VGETMANTPSZrri: [ 0.00 0.00 ]
+Key: VGETMANTPSZrrib: [ 0.00 0.00 ]
+Key: VGETMANTPSZrribk: [ 0.00 0.00 ]
+Key: VGETMANTPSZrribkz: [ 0.00 0.00 ]
+Key: VGETMANTPSZrrik: [ 0.00 0.00 ]
+Key: VGETMANTPSZrrikz: [ 0.00 0.00 ]
+Key: VGETMANTSDZrmi: [ 0.00 0.00 ]
+Key: VGETMANTSDZrmik: [ 0.00 0.00 ]
+Key: VGETMANTSDZrmikz: [ 0.00 0.00 ]
+Key: VGETMANTSDZrri: [ 0.00 0.00 ]
+Key: VGETMANTSDZrrib: [ 0.00 0.00 ]
+Key: VGETMANTSDZrribk: [ 0.00 0.00 ]
+Key: VGETMANTSDZrribkz: [ 0.00 0.00 ]
+Key: VGETMANTSDZrrik: [ 0.00 0.00 ]
+Key: VGETMANTSDZrrikz: [ 0.00 0.00 ]
+Key: VGETMANTSHZrmi: [ 0.00 0.00 ]
+Key: VGETMANTSHZrmik: [ 0.00 0.00 ]
+Key: VGETMANTSHZrmikz: [ 0.00 0.00 ]
+Key: VGETMANTSHZrri: [ 0.00 0.00 ]
+Key: VGETMANTSHZrrib: [ 0.00 0.00 ]
+Key: VGETMANTSHZrribk: [ 0.00 0.00 ]
+Key: VGETMANTSHZrribkz: [ 0.00 0.00 ]
+Key: VGETMANTSHZrrik: [ 0.00 0.00 ]
+Key: VGETMANTSHZrrikz: [ 0.00 0.00 ]
+Key: VGETMANTSSZrmi: [ 0.00 0.00 ]
+Key: VGETMANTSSZrmik: [ 0.00 0.00 ]
+Key: VGETMANTSSZrmikz: [ 0.00 0.00 ]
+Key: VGETMANTSSZrri: [ 0.00 0.00 ]
+Key: VGETMANTSSZrrib: [ 0.00 0.00 ]
+Key: VGETMANTSSZrribk: [ 0.00 0.00 ]
+Key: VGETMANTSSZrribkz: [ 0.00 0.00 ]
+Key: VGETMANTSSZrrik: [ 0.00 0.00 ]
+Key: VGETMANTSSZrrikz: [ 0.00 0.00 ]
+Key: VGF: [ 0.00 0.00 ]
+Key: VHADDPDYrm: [ 0.00 0.00 ]
+Key: VHADDPDYrr: [ 0.00 0.00 ]
+Key: VHADDPDrm: [ 0.00 0.00 ]
+Key: VHADDPDrr: [ 0.00 0.00 ]
+Key: VHADDPSYrm: [ 0.00 0.00 ]
+Key: VHADDPSYrr: [ 0.00 0.00 ]
+Key: VHADDPSrm: [ 0.00 0.00 ]
+Key: VHADDPSrr: [ 0.00 0.00 ]
+Key: VHSUBPDYrm: [ 0.00 0.00 ]
+Key: VHSUBPDYrr: [ 0.00 0.00 ]
+Key: VHSUBPDrm: [ 0.00 0.00 ]
+Key: VHSUBPDrr: [ 0.00 0.00 ]
+Key: VHSUBPSYrm: [ 0.00 0.00 ]
+Key: VHSUBPSYrr: [ 0.00 0.00 ]
+Key: VHSUBPSrm: [ 0.00 0.00 ]
+Key: VHSUBPSrr: [ 0.00 0.00 ]
+Key: VINSERTF: [ 0.00 0.00 ]
+Key: VINSERTI: [ 0.00 0.00 ]
+Key: VINSERTPSZrmi: [ 0.00 0.00 ]
+Key: VINSERTPSZrri: [ 0.00 0.00 ]
+Key: VINSERTPSrmi: [ 0.00 0.00 ]
+Key: VINSERTPSrri: [ 0.00 0.00 ]
+Key: VLDDQUYrm: [ 0.00 0.00 ]
+Key: VLDDQUrm: [ 0.00 0.00 ]
+Key: VLDMXCSR: [ 0.00 0.00 ]
+Key: VMASKMOVDQU: [ 0.00 0.00 ]
+Key: VMASKMOVPDYmr: [ 0.00 0.00 ]
+Key: VMASKMOVPDYrm: [ 0.00 0.00 ]
+Key: VMASKMOVPDmr: [ 0.00 0.00 ]
+Key: VMASKMOVPDrm: [ 0.00 0.00 ]
+Key: VMASKMOVPSYmr: [ 0.00 0.00 ]
+Key: VMASKMOVPSYrm: [ 0.00 0.00 ]
+Key: VMASKMOVPSmr: [ 0.00 0.00 ]
+Key: VMASKMOVPSrm: [ 0.00 0.00 ]
+Key: VMAXBF: [ 0.00 0.00 ]
+Key: VMAXCPDYrm: [ 0.00 0.00 ]
+Key: VMAXCPDYrr: [ 0.00 0.00 ]
+Key: VMAXCPDZ: [ 0.00 0.00 ]
+Key: VMAXCPDZrm: [ 0.00 0.00 ]
+Key: VMAXCPDZrmb: [ 0.00 0.00 ]
+Key: VMAXCPDZrmbk: [ 0.00 0.00 ]
+Key: VMAXCPDZrmbkz: [ 0.00 0.00 ]
+Key: VMAXCPDZrmk: [ 0.00 0.00 ]
+Key: VMAXCPDZrmkz: [ 0.00 0.00 ]
+Key: VMAXCPDZrr: [ 0.00 0.00 ]
+Key: VMAXCPDZrrk: [ 0.00 0.00 ]
+Key: VMAXCPDZrrkz: [ 0.00 0.00 ]
+Key: VMAXCPDrm: [ 0.00 0.00 ]
+Key: VMAXCPDrr: [ 0.00 0.00 ]
+Key: VMAXCPHZ: [ 0.00 0.00 ]
+Key: VMAXCPHZrm: [ 0.00 0.00 ]
+Key: VMAXCPHZrmb: [ 0.00 0.00 ]
+Key: VMAXCPHZrmbk: [ 0.00 0.00 ]
+Key: VMAXCPHZrmbkz: [ 0.00 0.00 ]
+Key: VMAXCPHZrmk: [ 0.00 0.00 ]
+Key: VMAXCPHZrmkz: [ 0.00 0.00 ]
+Key: VMAXCPHZrr: [ 0.00 0.00 ]
+Key: VMAXCPHZrrk: [ 0.00 0.00 ]
+Key: VMAXCPHZrrkz: [ 0.00 0.00 ]
+Key: VMAXCPSYrm: [ 0.00 0.00 ]
+Key: VMAXCPSYrr: [ 0.00 0.00 ]
+Key: VMAXCPSZ: [ 0.00 0.00 ]
+Key: VMAXCPSZrm: [ 0.00 0.00 ]
+Key: VMAXCPSZrmb: [ 0.00 0.00 ]
+Key: VMAXCPSZrmbk: [ 0.00 0.00 ]
+Key: VMAXCPSZrmbkz: [ 0.00 0.00 ]
+Key: VMAXCPSZrmk: [ 0.00 0.00 ]
+Key: VMAXCPSZrmkz: [ 0.00 0.00 ]
+Key: VMAXCPSZrr: [ 0.00 0.00 ]
+Key: VMAXCPSZrrk: [ 0.00 0.00 ]
+Key: VMAXCPSZrrkz: [ 0.00 0.00 ]
+Key: VMAXCPSrm: [ 0.00 0.00 ]
+Key: VMAXCPSrr: [ 0.00 0.00 ]
+Key: VMAXCSDZrm: [ 0.00 0.00 ]
+Key: VMAXCSDZrr: [ 0.00 0.00 ]
+Key: VMAXCSDrm: [ 0.00 0.00 ]
+Key: VMAXCSDrr: [ 0.00 0.00 ]
+Key: VMAXCSHZrm: [ 0.00 0.00 ]
+Key: VMAXCSHZrr: [ 0.00 0.00 ]
+Key: VMAXCSSZrm: [ 0.00 0.00 ]
+Key: VMAXCSSZrr: [ 0.00 0.00 ]
+Key: VMAXCSSrm: [ 0.00 0.00 ]
+Key: VMAXCSSrr: [ 0.00 0.00 ]
+Key: VMAXPDYrm: [ 0.00 0.00 ]
+Key: VMAXPDYrr: [ 0.00 0.00 ]
+Key: VMAXPDZ: [ 0.00 0.00 ]
+Key: VMAXPDZrm: [ 0.00 0.00 ]
+Key: VMAXPDZrmb: [ 0.00 0.00 ]
+Key: VMAXPDZrmbk: [ 0.00 0.00 ]
+Key: VMAXPDZrmbkz: [ 0.00 0.00 ]
+Key: VMAXPDZrmk: [ 0.00 0.00 ]
+Key: VMAXPDZrmkz: [ 0.00 0.00 ]
+Key: VMAXPDZrr: [ 0.00 0.00 ]
+Key: VMAXPDZrrb: [ 0.00 0.00 ]
+Key: VMAXPDZrrbk: [ 0.00 0.00 ]
+Key: VMAXPDZrrbkz: [ 0.00 0.00 ]
+Key: VMAXPDZrrk: [ 0.00 0.00 ]
+Key: VMAXPDZrrkz: [ 0.00 0.00 ]
+Key: VMAXPDrm: [ 0.00 0.00 ]
+Key: VMAXPDrr: [ 0.00 0.00 ]
+Key: VMAXPHZ: [ 0.00 0.00 ]
+Key: VMAXPHZrm: [ 0.00 0.00 ]
+Key: VMAXPHZrmb: [ 0.00 0.00 ]
+Key: VMAXPHZrmbk: [ 0.00 0.00 ]
+Key: VMAXPHZrmbkz: [ 0.00 0.00 ]
+Key: VMAXPHZrmk: [ 0.00 0.00 ]
+Key: VMAXPHZrmkz: [ 0.00 0.00 ]
+Key: VMAXPHZrr: [ 0.00 0.00 ]
+Key: VMAXPHZrrb: [ 0.00 0.00 ]
+Key: VMAXPHZrrbk: [ 0.00 0.00 ]
+Key: VMAXPHZrrbkz: [ 0.00 0.00 ]
+Key: VMAXPHZrrk: [ 0.00 0.00 ]
+Key: VMAXPHZrrkz: [ 0.00 0.00 ]
+Key: VMAXPSYrm: [ 0.00 0.00 ]
+Key: VMAXPSYrr: [ 0.00 0.00 ]
+Key: VMAXPSZ: [ 0.00 0.00 ]
+Key: VMAXPSZrm: [ 0.00 0.00 ]
+Key: VMAXPSZrmb: [ 0.00 0.00 ]
+Key: VMAXPSZrmbk: [ 0.00 0.00 ]
+Key: VMAXPSZrmbkz: [ 0.00 0.00 ]
+Key: VMAXPSZrmk: [ 0.00 0.00 ]
+Key: VMAXPSZrmkz: [ 0.00 0.00 ]
+Key: VMAXPSZrr: [ 0.00 0.00 ]
+Key: VMAXPSZrrb: [ 0.00 0.00 ]
+Key: VMAXPSZrrbk: [ 0.00 0.00 ]
+Key: VMAXPSZrrbkz: [ 0.00 0.00 ]
+Key: VMAXPSZrrk: [ 0.00 0.00 ]
+Key: VMAXPSZrrkz: [ 0.00 0.00 ]
+Key: VMAXPSrm: [ 0.00 0.00 ]
+Key: VMAXPSrr: [ 0.00 0.00 ]
+Key: VMAXSDZrm: [ 0.00 0.00 ]
+Key: VMAXSDZrm_Int: [ 0.00 0.00 ]
+Key: VMAXSDZrmk_Int: [ 0.00 0.00 ]
+Key: VMAXSDZrmkz_Int: [ 0.00 0.00 ]
+Key: VMAXSDZrr: [ 0.00 0.00 ]
+Key: VMAXSDZrr_Int: [ 0.00 0.00 ]
+Key: VMAXSDZrrb_Int: [ 0.00 0.00 ]
+Key: VMAXSDZrrbk_Int: [ 0.00 0.00 ]
+Key: VMAXSDZrrbkz_Int: [ 0.00 0.00 ]
+Key: VMAXSDZrrk_Int: [ 0.00 0.00 ]
+Key: VMAXSDZrrkz_Int: [ 0.00 0.00 ]
+Key: VMAXSDrm: [ 0.00 0.00 ]
+Key: VMAXSDrm_Int: [ 0.00 0.00 ]
+Key: VMAXSDrr: [ 0.00 0.00 ]
+Key: VMAXSDrr_Int: [ 0.00 0.00 ]
+Key: VMAXSHZrm: [ 0.00 0.00 ]
+Key: VMAXSHZrm_Int: [ 0.00 0.00 ]
+Key: VMAXSHZrmk_Int: [ 0.00 0.00 ]
+Key: VMAXSHZrmkz_Int: [ 0.00 0.00 ]
+Key: VMAXSHZrr: [ 0.00 0.00 ]
+Key: VMAXSHZrr_Int: [ 0.00 0.00 ]
+Key: VMAXSHZrrb_Int: [ 0.00 0.00 ]
+Key: VMAXSHZrrbk_Int: [ 0.00 0.00 ]
+Key: VMAXSHZrrbkz_Int: [ 0.00 0.00 ]
+Key: VMAXSHZrrk_Int: [ 0.00 0.00 ]
+Key: VMAXSHZrrkz_Int: [ 0.00 0.00 ]
+Key: VMAXSSZrm: [ 0.00 0.00 ]
+Key: VMAXSSZrm_Int: [ 0.00 0.00 ]
+Key: VMAXSSZrmk_Int: [ 0.00 0.00 ]
+Key: VMAXSSZrmkz_Int: [ 0.00 0.00 ]
+Key: VMAXSSZrr: [ 0.00 0.00 ]
+Key: VMAXSSZrr_Int: [ 0.00 0.00 ]
+Key: VMAXSSZrrb_Int: [ 0.00 0.00 ]
+Key: VMAXSSZrrbk_Int: [ 0.00 0.00 ]
+Key: VMAXSSZrrbkz_Int: [ 0.00 0.00 ]
+Key: VMAXSSZrrk_Int: [ 0.00 0.00 ]
+Key: VMAXSSZrrkz_Int: [ 0.00 0.00 ]
+Key: VMAXSSrm: [ 0.00 0.00 ]
+Key: VMAXSSrm_Int: [ 0.00 0.00 ]
+Key: VMAXSSrr: [ 0.00 0.00 ]
+Key: VMAXSSrr_Int: [ 0.00 0.00 ]
+Key: VMCALL: [ 0.00 0.00 ]
+Key: VMCLEARm: [ 0.00 0.00 ]
+Key: VMFUNC: [ 0.00 0.00 ]
+Key: VMINBF: [ 0.00 0.00 ]
+Key: VMINCPDYrm: [ 0.00 0.00 ]
+Key: VMINCPDYrr: [ 0.00 0.00 ]
+Key: VMINCPDZ: [ 0.00 0.00 ]
+Key: VMINCPDZrm: [ 0.00 0.00 ]
+Key: VMINCPDZrmb: [ 0.00 0.00 ]
+Key: VMINCPDZrmbk: [ 0.00 0.00 ]
+Key: VMINCPDZrmbkz: [ 0.00 0.00 ]
+Key: VMINCPDZrmk: [ 0.00 0.00 ]
+Key: VMINCPDZrmkz: [ 0.00 0.00 ]
+Key: VMINCPDZrr: [ 0.00 0.00 ]
+Key: VMINCPDZrrk: [ 0.00 0.00 ]
+Key: VMINCPDZrrkz: [ 0.00 0.00 ]
+Key: VMINCPDrm: [ 0.00 0.00 ]
+Key: VMINCPDrr: [ 0.00 0.00 ]
+Key: VMINCPHZ: [ 0.00 0.00 ]
+Key: VMINCPHZrm: [ 0.00 0.00 ]
+Key: VMINCPHZrmb: [ 0.00 0.00 ]
+Key: VMINCPHZrmbk: [ 0.00 0.00 ]
+Key: VMINCPHZrmbkz: [ 0.00 0.00 ]
+Key: VMINCPHZrmk: [ 0.00 0.00 ]
+Key: VMINCPHZrmkz: [ 0.00 0.00 ]
+Key: VMINCPHZrr: [ 0.00 0.00 ]
+Key: VMINCPHZrrk: [ 0.00 0.00 ]
+Key: VMINCPHZrrkz: [ 0.00 0.00 ]
+Key: VMINCPSYrm: [ 0.00 0.00 ]
+Key: VMINCPSYrr: [ 0.00 0.00 ]
+Key: VMINCPSZ: [ 0.00 0.00 ]
+Key: VMINCPSZrm: [ 0.00 0.00 ]
+Key: VMINCPSZrmb: [ 0.00 0.00 ]
+Key: VMINCPSZrmbk: [ 0.00 0.00 ]
+Key: VMINCPSZrmbkz: [ 0.00 0.00 ]
+Key: VMINCPSZrmk: [ 0.00 0.00 ]
+Key: VMINCPSZrmkz: [ 0.00 0.00 ]
+Key: VMINCPSZrr: [ 0.00 0.00 ]
+Key: VMINCPSZrrk: [ 0.00 0.00 ]
+Key: VMINCPSZrrkz: [ 0.00 0.00 ]
+Key: VMINCPSrm: [ 0.00 0.00 ]
+Key: VMINCPSrr: [ 0.00 0.00 ]
+Key: VMINCSDZrm: [ 0.00 0.00 ]
+Key: VMINCSDZrr: [ 0.00 0.00 ]
+Key: VMINCSDrm: [ 0.00 0.00 ]
+Key: VMINCSDrr: [ 0.00 0.00 ]
+Key: VMINCSHZrm: [ 0.00 0.00 ]
+Key: VMINCSHZrr: [ 0.00 0.00 ]
+Key: VMINCSSZrm: [ 0.00 0.00 ]
+Key: VMINCSSZrr: [ 0.00 0.00 ]
+Key: VMINCSSrm: [ 0.00 0.00 ]
+Key: VMINCSSrr: [ 0.00 0.00 ]
+Key: VMINMAXBF: [ 0.00 0.00 ]
+Key: VMINMAXPDZ: [ 0.00 0.00 ]
+Key: VMINMAXPDZrmbi: [ 0.00 0.00 ]
+Key: VMINMAXPDZrmbik: [ 0.00 0.00 ]
+Key: VMINMAXPDZrmbikz: [ 0.00 0.00 ]
+Key: VMINMAXPDZrmi: [ 0.00 0.00 ]
+Key: VMINMAXPDZrmik: [ 0.00 0.00 ]
+Key: VMINMAXPDZrmikz: [ 0.00 0.00 ]
+Key: VMINMAXPDZrri: [ 0.00 0.00 ]
+Key: VMINMAXPDZrrib: [ 0.00 0.00 ]
+Key: VMINMAXPDZrribk: [ 0.00 0.00 ]
+Key: VMINMAXPDZrribkz: [ 0.00 0.00 ]
+Key: VMINMAXPDZrrik: [ 0.00 0.00 ]
+Key: VMINMAXPDZrrikz: [ 0.00 0.00 ]
+Key: VMINMAXPHZ: [ 0.00 0.00 ]
+Key: VMINMAXPHZrmbi: [ 0.00 0.00 ]
+Key: VMINMAXPHZrmbik: [ 0.00 0.00 ]
+Key: VMINMAXPHZrmbikz: [ 0.00 0.00 ]
+Key: VMINMAXPHZrmi: [ 0.00 0.00 ]
+Key: VMINMAXPHZrmik: [ 0.00 0.00 ]
+Key: VMINMAXPHZrmikz: [ 0.00 0.00 ]
+Key: VMINMAXPHZrri: [ 0.00 0.00 ]
+Key: VMINMAXPHZrrib: [ 0.00 0.00 ]
+Key: VMINMAXPHZrribk: [ 0.00 0.00 ]
+Key: VMINMAXPHZrribkz: [ 0.00 0.00 ]
+Key: VMINMAXPHZrrik: [ 0.00 0.00 ]
+Key: VMINMAXPHZrrikz: [ 0.00 0.00 ]
+Key: VMINMAXPSZ: [ 0.00 0.00 ]
+Key: VMINMAXPSZrmbi: [ 0.00 0.00 ]
+Key: VMINMAXPSZrmbik: [ 0.00 0.00 ]
+Key: VMINMAXPSZrmbikz: [ 0.00 0.00 ]
+Key: VMINMAXPSZrmi: [ 0.00 0.00 ]
+Key: VMINMAXPSZrmik: [ 0.00 0.00 ]
+Key: VMINMAXPSZrmikz: [ 0.00 0.00 ]
+Key: VMINMAXPSZrri: [ 0.00 0.00 ]
+Key: VMINMAXPSZrrib: [ 0.00 0.00 ]
+Key: VMINMAXPSZrribk: [ 0.00 0.00 ]
+Key: VMINMAXPSZrribkz: [ 0.00 0.00 ]
+Key: VMINMAXPSZrrik: [ 0.00 0.00 ]
+Key: VMINMAXPSZrrikz: [ 0.00 0.00 ]
+Key: VMINMAXSDrmi: [ 0.00 0.00 ]
+Key: VMINMAXSDrmi_Int: [ 0.00 0.00 ]
+Key: VMINMAXSDrmik_Int: [ 0.00 0.00 ]
+Key: VMINMAXSDrmikz_Int: [ 0.00 0.00 ]
+Key: VMINMAXSDrri: [ 0.00 0.00 ]
+Key: VMINMAXSDrri_Int: [ 0.00 0.00 ]
+Key: VMINMAXSDrrib_Int: [ 0.00 0.00 ]
+Key: VMINMAXSDrribk_Int: [ 0.00 0.00 ]
+Key: VMINMAXSDrribkz_Int: [ 0.00 0.00 ]
+Key: VMINMAXSDrrik_Int: [ 0.00 0.00 ]
+Key: VMINMAXSDrrikz_Int: [ 0.00 0.00 ]
+Key: VMINMAXSHrmi: [ 0.00 0.00 ]
+Key: VMINMAXSHrmi_Int: [ 0.00 0.00 ]
+Key: VMINMAXSHrmik_Int: [ 0.00 0.00 ]
+Key: VMINMAXSHrmikz_Int: [ 0.00 0.00 ]
+Key: VMINMAXSHrri: [ 0.00 0.00 ]
+Key: VMINMAXSHrri_Int: [ 0.00 0.00 ]
+Key: VMINMAXSHrrib_Int: [ 0.00 0.00 ]
+Key: VMINMAXSHrribk_Int: [ 0.00 0.00 ]
+Key: VMINMAXSHrribkz_Int: [ 0.00 0.00 ]
+Key: VMINMAXSHrrik_Int: [ 0.00 0.00 ]
+Key: VMINMAXSHrrikz_Int: [ 0.00 0.00 ]
+Key: VMINMAXSSrmi: [ 0.00 0.00 ]
+Key: VMINMAXSSrmi_Int: [ 0.00 0.00 ]
+Key: VMINMAXSSrmik_Int: [ 0.00 0.00 ]
+Key: VMINMAXSSrmikz_Int: [ 0.00 0.00 ]
+Key: VMINMAXSSrri: [ 0.00 0.00 ]
+Key: VMINMAXSSrri_Int: [ 0.00 0.00 ]
+Key: VMINMAXSSrrib_Int: [ 0.00 0.00 ]
+Key: VMINMAXSSrribk_Int: [ 0.00 0.00 ]
+Key: VMINMAXSSrribkz_Int: [ 0.00 0.00 ]
+Key: VMINMAXSSrrik_Int: [ 0.00 0.00 ]
+Key: VMINMAXSSrrikz_Int: [ 0.00 0.00 ]
+Key: VMINPDYrm: [ 0.00 0.00 ]
+Key: VMINPDYrr: [ 0.00 0.00 ]
+Key: VMINPDZ: [ 0.00 0.00 ]
+Key: VMINPDZrm: [ 0.00 0.00 ]
+Key: VMINPDZrmb: [ 0.00 0.00 ]
+Key: VMINPDZrmbk: [ 0.00 0.00 ]
+Key: VMINPDZrmbkz: [ 0.00 0.00 ]
+Key: VMINPDZrmk: [ 0.00 0.00 ]
+Key: VMINPDZrmkz: [ 0.00 0.00 ]
+Key: VMINPDZrr: [ 0.00 0.00 ]
+Key: VMINPDZrrb: [ 0.00 0.00 ]
+Key: VMINPDZrrbk: [ 0.00 0.00 ]
+Key: VMINPDZrrbkz: [ 0.00 0.00 ]
+Key: VMINPDZrrk: [ 0.00 0.00 ]
+Key: VMINPDZrrkz: [ 0.00 0.00 ]
+Key: VMINPDrm: [ 0.00 0.00 ]
+Key: VMINPDrr: [ 0.00 0.00 ]
+Key: VMINPHZ: [ 0.00 0.00 ]
+Key: VMINPHZrm: [ 0.00 0.00 ]
+Key: VMINPHZrmb: [ 0.00 0.00 ]
+Key: VMINPHZrmbk: [ 0.00 0.00 ]
+Key: VMINPHZrmbkz: [ 0.00 0.00 ]
+Key: VMINPHZrmk: [ 0.00 0.00 ]
+Key: VMINPHZrmkz: [ 0.00 0.00 ]
+Key: VMINPHZrr: [ 0.00 0.00 ]
+Key: VMINPHZrrb: [ 0.00 0.00 ]
+Key: VMINPHZrrbk: [ 0.00 0.00 ]
+Key: VMINPHZrrbkz: [ 0.00 0.00 ]
+Key: VMINPHZrrk: [ 0.00 0.00 ]
+Key: VMINPHZrrkz: [ 0.00 0.00 ]
+Key: VMINPSYrm: [ 0.00 0.00 ]
+Key: VMINPSYrr: [ 0.00 0.00 ]
+Key: VMINPSZ: [ 0.00 0.00 ]
+Key: VMINPSZrm: [ 0.00 0.00 ]
+Key: VMINPSZrmb: [ 0.00 0.00 ]
+Key: VMINPSZrmbk: [ 0.00 0.00 ]
+Key: VMINPSZrmbkz: [ 0.00 0.00 ]
+Key: VMINPSZrmk: [ 0.00 0.00 ]
+Key: VMINPSZrmkz: [ 0.00 0.00 ]
+Key: VMINPSZrr: [ 0.00 0.00 ]
+Key: VMINPSZrrb: [ 0.00 0.00 ]
+Key: VMINPSZrrbk: [ 0.00 0.00 ]
+Key: VMINPSZrrbkz: [ 0.00 0.00 ]
+Key: VMINPSZrrk: [ 0.00 0.00 ]
+Key: VMINPSZrrkz: [ 0.00 0.00 ]
+Key: VMINPSrm: [ 0.00 0.00 ]
+Key: VMINPSrr: [ 0.00 0.00 ]
+Key: VMINSDZrm: [ 0.00 0.00 ]
+Key: VMINSDZrm_Int: [ 0.00 0.00 ]
+Key: VMINSDZrmk_Int: [ 0.00 0.00 ]
+Key: VMINSDZrmkz_Int: [ 0.00 0.00 ]
+Key: VMINSDZrr: [ 0.00 0.00 ]
+Key: VMINSDZrr_Int: [ 0.00 0.00 ]
+Key: VMINSDZrrb_Int: [ 0.00 0.00 ]
+Key: VMINSDZrrbk_Int: [ 0.00 0.00 ]
+Key: VMINSDZrrbkz_Int: [ 0.00 0.00 ]
+Key: VMINSDZrrk_Int: [ 0.00 0.00 ]
+Key: VMINSDZrrkz_Int: [ 0.00 0.00 ]
+Key: VMINSDrm: [ 0.00 0.00 ]
+Key: VMINSDrm_Int: [ 0.00 0.00 ]
+Key: VMINSDrr: [ 0.00 0.00 ]
+Key: VMINSDrr_Int: [ 0.00 0.00 ]
+Key: VMINSHZrm: [ 0.00 0.00 ]
+Key: VMINSHZrm_Int: [ 0.00 0.00 ]
+Key: VMINSHZrmk_Int: [ 0.00 0.00 ]
+Key: VMINSHZrmkz_Int: [ 0.00 0.00 ]
+Key: VMINSHZrr: [ 0.00 0.00 ]
+Key: VMINSHZrr_Int: [ 0.00 0.00 ]
+Key: VMINSHZrrb_Int: [ 0.00 0.00 ]
+Key: VMINSHZrrbk_Int: [ 0.00 0.00 ]
+Key: VMINSHZrrbkz_Int: [ 0.00 0.00 ]
+Key: VMINSHZrrk_Int: [ 0.00 0.00 ]
+Key: VMINSHZrrkz_Int: [ 0.00 0.00 ]
+Key: VMINSSZrm: [ 0.00 0.00 ]
+Key: VMINSSZrm_Int: [ 0.00 0.00 ]
+Key: VMINSSZrmk_Int: [ 0.00 0.00 ]
+Key: VMINSSZrmkz_Int: [ 0.00 0.00 ]
+Key: VMINSSZrr: [ 0.00 0.00 ]
+Key: VMINSSZrr_Int: [ 0.00 0.00 ]
+Key: VMINSSZrrb_Int: [ 0.00 0.00 ]
+Key: VMINSSZrrbk_Int: [ 0.00 0.00 ]
+Key: VMINSSZrrbkz_Int: [ 0.00 0.00 ]
+Key: VMINSSZrrk_Int: [ 0.00 0.00 ]
+Key: VMINSSZrrkz_Int: [ 0.00 0.00 ]
+Key: VMINSSrm: [ 0.00 0.00 ]
+Key: VMINSSrm_Int: [ 0.00 0.00 ]
+Key: VMINSSrr: [ 0.00 0.00 ]
+Key: VMINSSrr_Int: [ 0.00 0.00 ]
+Key: VMLAUNCH: [ 0.00 0.00 ]
+Key: VMLOAD: [ 0.00 0.00 ]
+Key: VMMCALL: [ 0.00 0.00 ]
+Key: VMOV: [ 0.00 0.00 ]
+Key: VMOVAPDYmr: [ 0.00 0.00 ]
+Key: VMOVAPDYrm: [ 0.00 0.00 ]
+Key: VMOVAPDYrr: [ 0.00 0.00 ]
+Key: VMOVAPDYrr_REV: [ 0.00 0.00 ]
+Key: VMOVAPDZ: [ 0.00 0.00 ]
+Key: VMOVAPDZmr: [ 0.00 0.00 ]
+Key: VMOVAPDZmrk: [ 0.00 0.00 ]
+Key: VMOVAPDZrm: [ 0.00 0.00 ]
+Key: VMOVAPDZrmk: [ 0.00 0.00 ]
+Key: VMOVAPDZrmkz: [ 0.00 0.00 ]
+Key: VMOVAPDZrr: [ 0.00 0.00 ]
+Key: VMOVAPDZrr_REV: [ 0.00 0.00 ]
+Key: VMOVAPDZrrk: [ 0.00 0.00 ]
+Key: VMOVAPDZrrk_REV: [ 0.00 0.00 ]
+Key: VMOVAPDZrrkz: [ 0.00 0.00 ]
+Key: VMOVAPDZrrkz_REV: [ 0.00 0.00 ]
+Key: VMOVAPDmr: [ 0.00 0.00 ]
+Key: VMOVAPDrm: [ 0.00 0.00 ]
+Key: VMOVAPDrr: [ 0.00 0.00 ]
+Key: VMOVAPDrr_REV: [ 0.00 0.00 ]
+Key: VMOVAPSYmr: [ 0.00 0.00 ]
+Key: VMOVAPSYrm: [ 0.00 0.00 ]
+Key: VMOVAPSYrr: [ 0.00 0.00 ]
+Key: VMOVAPSYrr_REV: [ 0.00 0.00 ]
+Key: VMOVAPSZ: [ 0.00 0.00 ]
+Key: VMOVAPSZmr: [ 0.00 0.00 ]
+Key: VMOVAPSZmrk: [ 0.00 0.00 ]
+Key: VMOVAPSZrm: [ 0.00 0.00 ]
+Key: VMOVAPSZrmk: [ 0.00 0.00 ]
+Key: VMOVAPSZrmkz: [ 0.00 0.00 ]
+Key: VMOVAPSZrr: [ 0.00 0.00 ]
+Key: VMOVAPSZrr_REV: [ 0.00 0.00 ]
+Key: VMOVAPSZrrk: [ 0.00 0.00 ]
+Key: VMOVAPSZrrk_REV: [ 0.00 0.00 ]
+Key: VMOVAPSZrrkz: [ 0.00 0.00 ]
+Key: VMOVAPSZrrkz_REV: [ 0.00 0.00 ]
+Key: VMOVAPSmr: [ 0.00 0.00 ]
+Key: VMOVAPSrm: [ 0.00 0.00 ]
+Key: VMOVAPSrr: [ 0.00 0.00 ]
+Key: VMOVAPSrr_REV: [ 0.00 0.00 ]
+Key: VMOVDDUPYrm: [ 0.00 0.00 ]
+Key: VMOVDDUPYrr: [ 0.00 0.00 ]
+Key: VMOVDDUPZ: [ 0.00 0.00 ]
+Key: VMOVDDUPZrm: [ 0.00 0.00 ]
+Key: VMOVDDUPZrmk: [ 0.00 0.00 ]
+Key: VMOVDDUPZrmkz: [ 0.00 0.00 ]
+Key: VMOVDDUPZrr: [ 0.00 0.00 ]
+Key: VMOVDDUPZrrk: [ 0.00 0.00 ]
+Key: VMOVDDUPZrrkz: [ 0.00 0.00 ]
+Key: VMOVDDUPrm: [ 0.00 0.00 ]
+Key: VMOVDDUPrr: [ 0.00 0.00 ]
+Key: VMOVDI: [ 0.00 0.00 ]
+Key: VMOVDQA: [ 0.00 0.00 ]
+Key: VMOVDQAYmr: [ 0.00 0.00 ]
+Key: VMOVDQAYrm: [ 0.00 0.00 ]
+Key: VMOVDQAYrr: [ 0.00 0.00 ]
+Key: VMOVDQAYrr_REV: [ 0.00 0.00 ]
+Key: VMOVDQAmr: [ 0.00 0.00 ]
+Key: VMOVDQArm: [ 0.00 0.00 ]
+Key: VMOVDQArr: [ 0.00 0.00 ]
+Key: VMOVDQArr_REV: [ 0.00 0.00 ]
+Key: VMOVDQU: [ 0.00 0.00 ]
+Key: VMOVDQUYmr: [ 0.00 0.00 ]
+Key: VMOVDQUYrm: [ 0.00 0.00 ]
+Key: VMOVDQUYrr: [ 0.00 0.00 ]
+Key: VMOVDQUYrr_REV: [ 0.00 0.00 ]
+Key: VMOVDQUmr: [ 0.00 0.00 ]
+Key: VMOVDQUrm: [ 0.00 0.00 ]
+Key: VMOVDQUrr: [ 0.00 0.00 ]
+Key: VMOVDQUrr_REV: [ 0.00 0.00 ]
+Key: VMOVHLPSZrr: [ 0.00 0.00 ]
+Key: VMOVHLPSrr: [ 0.00 0.00 ]
+Key: VMOVHPDZ: [ 0.00 0.00 ]
+Key: VMOVHPDmr: [ 0.00 0.00 ]
+Key: VMOVHPDrm: [ 0.00 0.00 ]
+Key: VMOVHPSZ: [ 0.00 0.00 ]
+Key: VMOVHPSmr: [ 0.00 0.00 ]
+Key: VMOVHPSrm: [ 0.00 0.00 ]
+Key: VMOVLHPSZrr: [ 0.00 0.00 ]
+Key: VMOVLHPSrr: [ 0.00 0.00 ]
+Key: VMOVLPDZ: [ 0.00 0.00 ]
+Key: VMOVLPDmr: [ 0.00 0.00 ]
+Key: VMOVLPDrm: [ 0.00 0.00 ]
+Key: VMOVLPSZ: [ 0.00 0.00 ]
+Key: VMOVLPSmr: [ 0.00 0.00 ]
+Key: VMOVLPSrm: [ 0.00 0.00 ]
+Key: VMOVMSKPDYrr: [ 0.00 0.00 ]
+Key: VMOVMSKPDrr: [ 0.00 0.00 ]
+Key: VMOVMSKPSYrr: [ 0.00 0.00 ]
+Key: VMOVMSKPSrr: [ 0.00 0.00 ]
+Key: VMOVNTDQAYrm: [ 0.00 0.00 ]
+Key: VMOVNTDQAZ: [ 0.00 0.00 ]
+Key: VMOVNTDQAZrm: [ 0.00 0.00 ]
+Key: VMOVNTDQArm: [ 0.00 0.00 ]
+Key: VMOVNTDQYmr: [ 0.00 0.00 ]
+Key: VMOVNTDQZ: [ 0.00 0.00 ]
+Key: VMOVNTDQZmr: [ 0.00 0.00 ]
+Key: VMOVNTDQmr: [ 0.00 0.00 ]
+Key: VMOVNTPDYmr: [ 0.00 0.00 ]
+Key: VMOVNTPDZ: [ 0.00 0.00 ]
+Key: VMOVNTPDZmr: [ 0.00 0.00 ]
+Key: VMOVNTPDmr: [ 0.00 0.00 ]
+Key: VMOVNTPSYmr: [ 0.00 0.00 ]
+Key: VMOVNTPSZ: [ 0.00 0.00 ]
+Key: VMOVNTPSZmr: [ 0.00 0.00 ]
+Key: VMOVNTPSmr: [ 0.00 0.00 ]
+Key: VMOVPDI: [ 0.00 0.00 ]
+Key: VMOVPQI: [ 0.00 0.00 ]
+Key: VMOVPQIto: [ 0.00 0.00 ]
+Key: VMOVQI: [ 0.00 0.00 ]
+Key: VMOVRSBZ: [ 0.00 0.00 ]
+Key: VMOVRSBZm: [ 0.00 0.00 ]
+Key: VMOVRSBZmk: [ 0.00 0.00 ]
+Key: VMOVRSBZmkz: [ 0.00 0.00 ]
+Key: VMOVRSDZ: [ 0.00 0.00 ]
+Key: VMOVRSDZm: [ 0.00 0.00 ]
+Key: VMOVRSDZmk: [ 0.00 0.00 ]
+Key: VMOVRSDZmkz: [ 0.00 0.00 ]
+Key: VMOVRSQZ: [ 0.00 0.00 ]
+Key: VMOVRSQZm: [ 0.00 0.00 ]
+Key: VMOVRSQZmk: [ 0.00 0.00 ]
+Key: VMOVRSQZmkz: [ 0.00 0.00 ]
+Key: VMOVRSWZ: [ 0.00 0.00 ]
+Key: VMOVRSWZm: [ 0.00 0.00 ]
+Key: VMOVRSWZmk: [ 0.00 0.00 ]
+Key: VMOVRSWZmkz: [ 0.00 0.00 ]
+Key: VMOVSDZmr: [ 0.00 0.00 ]
+Key: VMOVSDZmrk: [ 0.00 0.00 ]
+Key: VMOVSDZrm: [ 0.00 0.00 ]
+Key: VMOVSDZrm_alt: [ 0.00 0.00 ]
+Key: VMOVSDZrmk: [ 0.00 0.00 ]
+Key: VMOVSDZrmkz: [ 0.00 0.00 ]
+Key: VMOVSDZrr: [ 0.00 0.00 ]
+Key: VMOVSDZrr_REV: [ 0.00 0.00 ]
+Key: VMOVSDZrrk: [ 0.00 0.00 ]
+Key: VMOVSDZrrk_REV: [ 0.00 0.00 ]
+Key: VMOVSDZrrkz: [ 0.00 0.00 ]
+Key: VMOVSDZrrkz_REV: [ 0.00 0.00 ]
+Key: VMOVSDmr: [ 0.00 0.00 ]
+Key: VMOVSDrm: [ 0.00 0.00 ]
+Key: VMOVSDrm_alt: [ 0.00 0.00 ]
+Key: VMOVSDrr: [ 0.00 0.00 ]
+Key: VMOVSDrr_REV: [ 0.00 0.00 ]
+Key: VMOVSDto: [ 0.00 0.00 ]
+Key: VMOVSH: [ 0.00 0.00 ]
+Key: VMOVSHDUPYrm: [ 0.00 0.00 ]
+Key: VMOVSHDUPYrr: [ 0.00 0.00 ]
+Key: VMOVSHDUPZ: [ 0.00 0.00 ]
+Key: VMOVSHDUPZrm: [ 0.00 0.00 ]
+Key: VMOVSHDUPZrmk: [ 0.00 0.00 ]
+Key: VMOVSHDUPZrmkz: [ 0.00 0.00 ]
+Key: VMOVSHDUPZrr: [ 0.00 0.00 ]
+Key: VMOVSHDUPZrrk: [ 0.00 0.00 ]
+Key: VMOVSHDUPZrrkz: [ 0.00 0.00 ]
+Key: VMOVSHDUPrm: [ 0.00 0.00 ]
+Key: VMOVSHDUPrr: [ 0.00 0.00 ]
+Key: VMOVSHZmr: [ 0.00 0.00 ]
+Key: VMOVSHZmrk: [ 0.00 0.00 ]
+Key: VMOVSHZrm: [ 0.00 0.00 ]
+Key: VMOVSHZrm_alt: [ 0.00 0.00 ]
+Key: VMOVSHZrmk: [ 0.00 0.00 ]
+Key: VMOVSHZrmkz: [ 0.00 0.00 ]
+Key: VMOVSHZrr: [ 0.00 0.00 ]
+Key: VMOVSHZrr_REV: [ 0.00 0.00 ]
+Key: VMOVSHZrrk: [ 0.00 0.00 ]
+Key: VMOVSHZrrk_REV: [ 0.00 0.00 ]
+Key: VMOVSHZrrkz: [ 0.00 0.00 ]
+Key: VMOVSHZrrkz_REV: [ 0.00 0.00 ]
+Key: VMOVSHtoW: [ 0.00 0.00 ]
+Key: VMOVSLDUPYrm: [ 0.00 0.00 ]
+Key: VMOVSLDUPYrr: [ 0.00 0.00 ]
+Key: VMOVSLDUPZ: [ 0.00 0.00 ]
+Key: VMOVSLDUPZrm: [ 0.00 0.00 ]
+Key: VMOVSLDUPZrmk: [ 0.00 0.00 ]
+Key: VMOVSLDUPZrmkz: [ 0.00 0.00 ]
+Key: VMOVSLDUPZrr: [ 0.00 0.00 ]
+Key: VMOVSLDUPZrrk: [ 0.00 0.00 ]
+Key: VMOVSLDUPZrrkz: [ 0.00 0.00 ]
+Key: VMOVSLDUPrm: [ 0.00 0.00 ]
+Key: VMOVSLDUPrr: [ 0.00 0.00 ]
+Key: VMOVSS: [ 0.00 0.00 ]
+Key: VMOVSSZmr: [ 0.00 0.00 ]
+Key: VMOVSSZmrk: [ 0.00 0.00 ]
+Key: VMOVSSZrm: [ 0.00 0.00 ]
+Key: VMOVSSZrm_alt: [ 0.00 0.00 ]
+Key: VMOVSSZrmk: [ 0.00 0.00 ]
+Key: VMOVSSZrmkz: [ 0.00 0.00 ]
+Key: VMOVSSZrr: [ 0.00 0.00 ]
+Key: VMOVSSZrr_REV: [ 0.00 0.00 ]
+Key: VMOVSSZrrk: [ 0.00 0.00 ]
+Key: VMOVSSZrrk_REV: [ 0.00 0.00 ]
+Key: VMOVSSZrrkz: [ 0.00 0.00 ]
+Key: VMOVSSZrrkz_REV: [ 0.00 0.00 ]
+Key: VMOVSSmr: [ 0.00 0.00 ]
+Key: VMOVSSrm: [ 0.00 0.00 ]
+Key: VMOVSSrm_alt: [ 0.00 0.00 ]
+Key: VMOVSSrr: [ 0.00 0.00 ]
+Key: VMOVSSrr_REV: [ 0.00 0.00 ]
+Key: VMOVUPDYmr: [ 0.00 0.00 ]
+Key: VMOVUPDYrm: [ 0.00 0.00 ]
+Key: VMOVUPDYrr: [ 0.00 0.00 ]
+Key: VMOVUPDYrr_REV: [ 0.00 0.00 ]
+Key: VMOVUPDZ: [ 0.00 0.00 ]
+Key: VMOVUPDZmr: [ 0.00 0.00 ]
+Key: VMOVUPDZmrk: [ 0.00 0.00 ]
+Key: VMOVUPDZrm: [ 0.00 0.00 ]
+Key: VMOVUPDZrmk: [ 0.00 0.00 ]
+Key: VMOVUPDZrmkz: [ 0.00 0.00 ]
+Key: VMOVUPDZrr: [ 0.00 0.00 ]
+Key: VMOVUPDZrr_REV: [ 0.00 0.00 ]
+Key: VMOVUPDZrrk: [ 0.00 0.00 ]
+Key: VMOVUPDZrrk_REV: [ 0.00 0.00 ]
+Key: VMOVUPDZrrkz: [ 0.00 0.00 ]
+Key: VMOVUPDZrrkz_REV: [ 0.00 0.00 ]
+Key: VMOVUPDmr: [ 0.00 0.00 ]
+Key: VMOVUPDrm: [ 0.00 0.00 ]
+Key: VMOVUPDrr: [ 0.00 0.00 ]
+Key: VMOVUPDrr_REV: [ 0.00 0.00 ]
+Key: VMOVUPSYmr: [ 0.00 0.00 ]
+Key: VMOVUPSYrm: [ 0.00 0.00 ]
+Key: VMOVUPSYrr: [ 0.00 0.00 ]
+Key: VMOVUPSYrr_REV: [ 0.00 0.00 ]
+Key: VMOVUPSZ: [ 0.00 0.00 ]
+Key: VMOVUPSZmr: [ 0.00 0.00 ]
+Key: VMOVUPSZmrk: [ 0.00 0.00 ]
+Key: VMOVUPSZrm: [ 0.00 0.00 ]
+Key: VMOVUPSZrmk: [ 0.00 0.00 ]
+Key: VMOVUPSZrmkz: [ 0.00 0.00 ]
+Key: VMOVUPSZrr: [ 0.00 0.00 ]
+Key: VMOVUPSZrr_REV: [ 0.00 0.00 ]
+Key: VMOVUPSZrrk: [ 0.00 0.00 ]
+Key: VMOVUPSZrrk_REV: [ 0.00 0.00 ]
+Key: VMOVUPSZrrkz: [ 0.00 0.00 ]
+Key: VMOVUPSZrrkz_REV: [ 0.00 0.00 ]
+Key: VMOVUPSmr: [ 0.00 0.00 ]
+Key: VMOVUPSrm: [ 0.00 0.00 ]
+Key: VMOVUPSrr: [ 0.00 0.00 ]
+Key: VMOVUPSrr_REV: [ 0.00 0.00 ]
+Key: VMOVW: [ 0.00 0.00 ]
+Key: VMOVWmr: [ 0.00 0.00 ]
+Key: VMOVWrm: [ 0.00 0.00 ]
+Key: VMOVZPDILo: [ 0.00 0.00 ]
+Key: VMOVZPQILo: [ 0.00 0.00 ]
+Key: VMOVZPWILo: [ 0.00 0.00 ]
+Key: VMPSADBWYrmi: [ 0.00 0.00 ]
+Key: VMPSADBWYrri: [ 0.00 0.00 ]
+Key: VMPSADBWZ: [ 0.00 0.00 ]
+Key: VMPSADBWZrmi: [ 0.00 0.00 ]
+Key: VMPSADBWZrmik: [ 0.00 0.00 ]
+Key: VMPSADBWZrmikz: [ 0.00 0.00 ]
+Key: VMPSADBWZrri: [ 0.00 0.00 ]
+Key: VMPSADBWZrrik: [ 0.00 0.00 ]
+Key: VMPSADBWZrrikz: [ 0.00 0.00 ]
+Key: VMPSADBWrmi: [ 0.00 0.00 ]
+Key: VMPSADBWrri: [ 0.00 0.00 ]
+Key: VMPTRLDm: [ 0.00 0.00 ]
+Key: VMPTRSTm: [ 0.00 0.00 ]
+Key: VMREAD: [ 0.00 0.00 ]
+Key: VMRESUME: [ 0.00 0.00 ]
+Key: VMRUN: [ 0.00 0.00 ]
+Key: VMSAVE: [ 0.00 0.00 ]
+Key: VMULBF: [ 0.00 0.00 ]
+Key: VMULPDYrm: [ 0.00 0.00 ]
+Key: VMULPDYrr: [ 0.00 0.00 ]
+Key: VMULPDZ: [ 0.00 0.00 ]
+Key: VMULPDZrm: [ 0.00 0.00 ]
+Key: VMULPDZrmb: [ 0.00 0.00 ]
+Key: VMULPDZrmbk: [ 0.00 0.00 ]
+Key: VMULPDZrmbkz: [ 0.00 0.00 ]
+Key: VMULPDZrmk: [ 0.00 0.00 ]
+Key: VMULPDZrmkz: [ 0.00 0.00 ]
+Key: VMULPDZrr: [ 0.00 0.00 ]
+Key: VMULPDZrrb: [ 0.00 0.00 ]
+Key: VMULPDZrrbk: [ 0.00 0.00 ]
+Key: VMULPDZrrbkz: [ 0.00 0.00 ]
+Key: VMULPDZrrk: [ 0.00 0.00 ]
+Key: VMULPDZrrkz: [ 0.00 0.00 ]
+Key: VMULPDrm: [ 0.00 0.00 ]
+Key: VMULPDrr: [ 0.00 0.00 ]
+Key: VMULPHZ: [ 0.00 0.00 ]
+Key: VMULPHZrm: [ 0.00 0.00 ]
+Key: VMULPHZrmb: [ 0.00 0.00 ]
+Key: VMULPHZrmbk: [ 0.00 0.00 ]
+Key: VMULPHZrmbkz: [ 0.00 0.00 ]
+Key: VMULPHZrmk: [ 0.00 0.00 ]
+Key: VMULPHZrmkz: [ 0.00 0.00 ]
+Key: VMULPHZrr: [ 0.00 0.00 ]
+Key: VMULPHZrrb: [ 0.00 0.00 ]
+Key: VMULPHZrrbk: [ 0.00 0.00 ]
+Key: VMULPHZrrbkz: [ 0.00 0.00 ]
+Key: VMULPHZrrk: [ 0.00 0.00 ]
+Key: VMULPHZrrkz: [ 0.00 0.00 ]
+Key: VMULPSYrm: [ 0.00 0.00 ]
+Key: VMULPSYrr: [ 0.00 0.00 ]
+Key: VMULPSZ: [ 0.00 0.00 ]
+Key: VMULPSZrm: [ 0.00 0.00 ]
+Key: VMULPSZrmb: [ 0.00 0.00 ]
+Key: VMULPSZrmbk: [ 0.00 0.00 ]
+Key: VMULPSZrmbkz: [ 0.00 0.00 ]
+Key: VMULPSZrmk: [ 0.00 0.00 ]
+Key: VMULPSZrmkz: [ 0.00 0.00 ]
+Key: VMULPSZrr: [ 0.00 0.00 ]
+Key: VMULPSZrrb: [ 0.00 0.00 ]
+Key: VMULPSZrrbk: [ 0.00 0.00 ]
+Key: VMULPSZrrbkz: [ 0.00 0.00 ]
+Key: VMULPSZrrk: [ 0.00 0.00 ]
+Key: VMULPSZrrkz: [ 0.00 0.00 ]
+Key: VMULPSrm: [ 0.00 0.00 ]
+Key: VMULPSrr: [ 0.00 0.00 ]
+Key: VMULSDZrm: [ 0.00 0.00 ]
+Key: VMULSDZrm_Int: [ 0.00 0.00 ]
+Key: VMULSDZrmk_Int: [ 0.00 0.00 ]
+Key: VMULSDZrmkz_Int: [ 0.00 0.00 ]
+Key: VMULSDZrr: [ 0.00 0.00 ]
+Key: VMULSDZrr_Int: [ 0.00 0.00 ]
+Key: VMULSDZrrb_Int: [ 0.00 0.00 ]
+Key: VMULSDZrrbk_Int: [ 0.00 0.00 ]
+Key: VMULSDZrrbkz_Int: [ 0.00 0.00 ]
+Key: VMULSDZrrk_Int: [ 0.00 0.00 ]
+Key: VMULSDZrrkz_Int: [ 0.00 0.00 ]
+Key: VMULSDrm: [ 0.00 0.00 ]
+Key: VMULSDrm_Int: [ 0.00 0.00 ]
+Key: VMULSDrr: [ 0.00 0.00 ]
+Key: VMULSDrr_Int: [ 0.00 0.00 ]
+Key: VMULSHZrm: [ 0.00 0.00 ]
+Key: VMULSHZrm_Int: [ 0.00 0.00 ]
+Key: VMULSHZrmk_Int: [ 0.00 0.00 ]
+Key: VMULSHZrmkz_Int: [ 0.00 0.00 ]
+Key: VMULSHZrr: [ 0.00 0.00 ]
+Key: VMULSHZrr_Int: [ 0.00 0.00 ]
+Key: VMULSHZrrb_Int: [ 0.00 0.00 ]
+Key: VMULSHZrrbk_Int: [ 0.00 0.00 ]
+Key: VMULSHZrrbkz_Int: [ 0.00 0.00 ]
+Key: VMULSHZrrk_Int: [ 0.00 0.00 ]
+Key: VMULSHZrrkz_Int: [ 0.00 0.00 ]
+Key: VMULSSZrm: [ 0.00 0.00 ]
+Key: VMULSSZrm_Int: [ 0.00 0.00 ]
+Key: VMULSSZrmk_Int: [ 0.00 0.00 ]
+Key: VMULSSZrmkz_Int: [ 0.00 0.00 ]
+Key: VMULSSZrr: [ 0.00 0.00 ]
+Key: VMULSSZrr_Int: [ 0.00 0.00 ]
+Key: VMULSSZrrb_Int: [ 0.00 0.00 ]
+Key: VMULSSZrrbk_Int: [ 0.00 0.00 ]
+Key: VMULSSZrrbkz_Int: [ 0.00 0.00 ]
+Key: VMULSSZrrk_Int: [ 0.00 0.00 ]
+Key: VMULSSZrrkz_Int: [ 0.00 0.00 ]
+Key: VMULSSrm: [ 0.00 0.00 ]
+Key: VMULSSrm_Int: [ 0.00 0.00 ]
+Key: VMULSSrr: [ 0.00 0.00 ]
+Key: VMULSSrr_Int: [ 0.00 0.00 ]
+Key: VMWRITE: [ 0.00 0.00 ]
+Key: VMXOFF: [ 0.00 0.00 ]
+Key: VMXON: [ 0.00 0.00 ]
+Key: VORPDYrm: [ 0.00 0.00 ]
+Key: VORPDYrr: [ 0.00 0.00 ]
+Key: VORPDZ: [ 0.00 0.00 ]
+Key: VORPDZrm: [ 0.00 0.00 ]
+Key: VORPDZrmb: [ 0.00 0.00 ]
+Key: VORPDZrmbk: [ 0.00 0.00 ]
+Key: VORPDZrmbkz: [ 0.00 0.00 ]
+Key: VORPDZrmk: [ 0.00 0.00 ]
+Key: VORPDZrmkz: [ 0.00 0.00 ]
+Key: VORPDZrr: [ 0.00 0.00 ]
+Key: VORPDZrrk: [ 0.00 0.00 ]
+Key: VORPDZrrkz: [ 0.00 0.00 ]
+Key: VORPDrm: [ 0.00 0.00 ]
+Key: VORPDrr: [ 0.00 0.00 ]
+Key: VORPSYrm: [ 0.00 0.00 ]
+Key: VORPSYrr: [ 0.00 0.00 ]
+Key: VORPSZ: [ 0.00 0.00 ]
+Key: VORPSZrm: [ 0.00 0.00 ]
+Key: VORPSZrmb: [ 0.00 0.00 ]
+Key: VORPSZrmbk: [ 0.00 0.00 ]
+Key: VORPSZrmbkz: [ 0.00 0.00 ]
+Key: VORPSZrmk: [ 0.00 0.00 ]
+Key: VORPSZrmkz: [ 0.00 0.00 ]
+Key: VORPSZrr: [ 0.00 0.00 ]
+Key: VORPSZrrk: [ 0.00 0.00 ]
+Key: VORPSZrrkz: [ 0.00 0.00 ]
+Key: VORPSrm: [ 0.00 0.00 ]
+Key: VORPSrr: [ 0.00 0.00 ]
+Key: VP: [ 0.00 0.00 ]
+Key: VPABSBYrm: [ 0.00 0.00 ]
+Key: VPABSBYrr: [ 0.00 0.00 ]
+Key: VPABSBZ: [ 0.00 0.00 ]
+Key: VPABSBZrm: [ 0.00 0.00 ]
+Key: VPABSBZrmk: [ 0.00 0.00 ]
+Key: VPABSBZrmkz: [ 0.00 0.00 ]
+Key: VPABSBZrr: [ 0.00 0.00 ]
+Key: VPABSBZrrk: [ 0.00 0.00 ]
+Key: VPABSBZrrkz: [ 0.00 0.00 ]
+Key: VPABSBrm: [ 0.00 0.00 ]
+Key: VPABSBrr: [ 0.00 0.00 ]
+Key: VPABSDYrm: [ 0.00 0.00 ]
+Key: VPABSDYrr: [ 0.00 0.00 ]
+Key: VPABSDZ: [ 0.00 0.00 ]
+Key: VPABSDZrm: [ 0.00 0.00 ]
+Key: VPABSDZrmb: [ 0.00 0.00 ]
+Key: VPABSDZrmbk: [ 0.00 0.00 ]
+Key: VPABSDZrmbkz: [ 0.00 0.00 ]
+Key: VPABSDZrmk: [ 0.00 0.00 ]
+Key: VPABSDZrmkz: [ 0.00 0.00 ]
+Key: VPABSDZrr: [ 0.00 0.00 ]
+Key: VPABSDZrrk: [ 0.00 0.00 ]
+Key: VPABSDZrrkz: [ 0.00 0.00 ]
+Key: VPABSDrm: [ 0.00 0.00 ]
+Key: VPABSDrr: [ 0.00 0.00 ]
+Key: VPABSQZ: [ 0.00 0.00 ]
+Key: VPABSQZrm: [ 0.00 0.00 ]
+Key: VPABSQZrmb: [ 0.00 0.00 ]
+Key: VPABSQZrmbk: [ 0.00 0.00 ]
+Key: VPABSQZrmbkz: [ 0.00 0.00 ]
+Key: VPABSQZrmk: [ 0.00 0.00 ]
+Key: VPABSQZrmkz: [ 0.00 0.00 ]
+Key: VPABSQZrr: [ 0.00 0.00 ]
+Key: VPABSQZrrk: [ 0.00 0.00 ]
+Key: VPABSQZrrkz: [ 0.00 0.00 ]
+Key: VPABSWYrm: [ 0.00 0.00 ]
+Key: VPABSWYrr: [ 0.00 0.00 ]
+Key: VPABSWZ: [ 0.00 0.00 ]
+Key: VPABSWZrm: [ 0.00 0.00 ]
+Key: VPABSWZrmk: [ 0.00 0.00 ]
+Key: VPABSWZrmkz: [ 0.00 0.00 ]
+Key: VPABSWZrr: [ 0.00 0.00 ]
+Key: VPABSWZrrk: [ 0.00 0.00 ]
+Key: VPABSWZrrkz: [ 0.00 0.00 ]
+Key: VPABSWrm: [ 0.00 0.00 ]
+Key: VPABSWrr: [ 0.00 0.00 ]
+Key: VPACKSSDWYrm: [ 0.00 0.00 ]
+Key: VPACKSSDWYrr: [ 0.00 0.00 ]
+Key: VPACKSSDWZ: [ 0.00 0.00 ]
+Key: VPACKSSDWZrm: [ 0.00 0.00 ]
+Key: VPACKSSDWZrmb: [ 0.00 0.00 ]
+Key: VPACKSSDWZrmbk: [ 0.00 0.00 ]
+Key: VPACKSSDWZrmbkz: [ 0.00 0.00 ]
+Key: VPACKSSDWZrmk: [ 0.00 0.00 ]
+Key: VPACKSSDWZrmkz: [ 0.00 0.00 ]
+Key: VPACKSSDWZrr: [ 0.00 0.00 ]
+Key: VPACKSSDWZrrk: [ 0.00 0.00 ]
+Key: VPACKSSDWZrrkz: [ 0.00 0.00 ]
+Key: VPACKSSDWrm: [ 0.00 0.00 ]
+Key: VPACKSSDWrr: [ 0.00 0.00 ]
+Key: VPACKSSWBYrm: [ 0.00 0.00 ]
+Key: VPACKSSWBYrr: [ 0.00 0.00 ]
+Key: VPACKSSWBZ: [ 0.00 0.00 ]
+Key: VPACKSSWBZrm: [ 0.00 0.00 ]
+Key: VPACKSSWBZrmk: [ 0.00 0.00 ]
+Key: VPACKSSWBZrmkz: [ 0.00 0.00 ]
+Key: VPACKSSWBZrr: [ 0.00 0.00 ]
+Key: VPACKSSWBZrrk: [ 0.00 0.00 ]
+Key: VPACKSSWBZrrkz: [ 0.00 0.00 ]
+Key: VPACKSSWBrm: [ 0.00 0.00 ]
+Key: VPACKSSWBrr: [ 0.00 0.00 ]
+Key: VPACKUSDWYrm: [ 0.00 0.00 ]
+Key: VPACKUSDWYrr: [ 0.00 0.00 ]
+Key: VPACKUSDWZ: [ 0.00 0.00 ]
+Key: VPACKUSDWZrm: [ 0.00 0.00 ]
+Key: VPACKUSDWZrmb: [ 0.00 0.00 ]
+Key: VPACKUSDWZrmbk: [ 0.00 0.00 ]
+Key: VPACKUSDWZrmbkz: [ 0.00 0.00 ]
+Key: VPACKUSDWZrmk: [ 0.00 0.00 ]
+Key: VPACKUSDWZrmkz: [ 0.00 0.00 ]
+Key: VPACKUSDWZrr: [ 0.00 0.00 ]
+Key: VPACKUSDWZrrk: [ 0.00 0.00 ]
+Key: VPACKUSDWZrrkz: [ 0.00 0.00 ]
+Key: VPACKUSDWrm: [ 0.00 0.00 ]
+Key: VPACKUSDWrr: [ 0.00 0.00 ]
+Key: VPACKUSWBYrm: [ 0.00 0.00 ]
+Key: VPACKUSWBYrr: [ 0.00 0.00 ]
+Key: VPACKUSWBZ: [ 0.00 0.00 ]
+Key: VPACKUSWBZrm: [ 0.00 0.00 ]
+Key: VPACKUSWBZrmk: [ 0.00 0.00 ]
+Key: VPACKUSWBZrmkz: [ 0.00 0.00 ]
+Key: VPACKUSWBZrr: [ 0.00 0.00 ]
+Key: VPACKUSWBZrrk: [ 0.00 0.00 ]
+Key: VPACKUSWBZrrkz: [ 0.00 0.00 ]
+Key: VPACKUSWBrm: [ 0.00 0.00 ]
+Key: VPACKUSWBrr: [ 0.00 0.00 ]
+Key: VPADDBYrm: [ 0.00 0.00 ]
+Key: VPADDBYrr: [ 0.00 0.00 ]
+Key: VPADDBZ: [ 0.00 0.00 ]
+Key: VPADDBZrm: [ 0.00 0.00 ]
+Key: VPADDBZrmk: [ 0.00 0.00 ]
+Key: VPADDBZrmkz: [ 0.00 0.00 ]
+Key: VPADDBZrr: [ 0.00 0.00 ]
+Key: VPADDBZrrk: [ 0.00 0.00 ]
+Key: VPADDBZrrkz: [ 0.00 0.00 ]
+Key: VPADDBrm: [ 0.00 0.00 ]
+Key: VPADDBrr: [ 0.00 0.00 ]
+Key: VPADDDYrm: [ 0.00 0.00 ]
+Key: VPADDDYrr: [ 0.00 0.00 ]
+Key: VPADDDZ: [ 0.00 0.00 ]
+Key: VPADDDZrm: [ 0.00 0.00 ]
+Key: VPADDDZrmb: [ 0.00 0.00 ]
+Key: VPADDDZrmbk: [ 0.00 0.00 ]
+Key: VPADDDZrmbkz: [ 0.00 0.00 ]
+Key: VPADDDZrmk: [ 0.00 0.00 ]
+Key: VPADDDZrmkz: [ 0.00 0.00 ]
+Key: VPADDDZrr: [ 0.00 0.00 ]
+Key: VPADDDZrrk: [ 0.00 0.00 ]
+Key: VPADDDZrrkz: [ 0.00 0.00 ]
+Key: VPADDDrm: [ 0.00 0.00 ]
+Key: VPADDDrr: [ 0.00 0.00 ]
+Key: VPADDQYrm: [ 0.00 0.00 ]
+Key: VPADDQYrr: [ 0.00 0.00 ]
+Key: VPADDQZ: [ 0.00 0.00 ]
+Key: VPADDQZrm: [ 0.00 0.00 ]
+Key: VPADDQZrmb: [ 0.00 0.00 ]
+Key: VPADDQZrmbk: [ 0.00 0.00 ]
+Key: VPADDQZrmbkz: [ 0.00 0.00 ]
+Key: VPADDQZrmk: [ 0.00 0.00 ]
+Key: VPADDQZrmkz: [ 0.00 0.00 ]
+Key: VPADDQZrr: [ 0.00 0.00 ]
+Key: VPADDQZrrk: [ 0.00 0.00 ]
+Key: VPADDQZrrkz: [ 0.00 0.00 ]
+Key: VPADDQrm: [ 0.00 0.00 ]
+Key: VPADDQrr: [ 0.00 0.00 ]
+Key: VPADDSBYrm: [ 0.00 0.00 ]
+Key: VPADDSBYrr: [ 0.00 0.00 ]
+Key: VPADDSBZ: [ 0.00 0.00 ]
+Key: VPADDSBZrm: [ 0.00 0.00 ]
+Key: VPADDSBZrmk: [ 0.00 0.00 ]
+Key: VPADDSBZrmkz: [ 0.00 0.00 ]
+Key: VPADDSBZrr: [ 0.00 0.00 ]
+Key: VPADDSBZrrk: [ 0.00 0.00 ]
+Key: VPADDSBZrrkz: [ 0.00 0.00 ]
+Key: VPADDSBrm: [ 0.00 0.00 ]
+Key: VPADDSBrr: [ 0.00 0.00 ]
+Key: VPADDSWYrm: [ 0.00 0.00 ]
+Key: VPADDSWYrr: [ 0.00 0.00 ]
+Key: VPADDSWZ: [ 0.00 0.00 ]
+Key: VPADDSWZrm: [ 0.00 0.00 ]
+Key: VPADDSWZrmk: [ 0.00 0.00 ]
+Key: VPADDSWZrmkz: [ 0.00 0.00 ]
+Key: VPADDSWZrr: [ 0.00 0.00 ]
+Key: VPADDSWZrrk: [ 0.00 0.00 ]
+Key: VPADDSWZrrkz: [ 0.00 0.00 ]
+Key: VPADDSWrm: [ 0.00 0.00 ]
+Key: VPADDSWrr: [ 0.00 0.00 ]
+Key: VPADDUSBYrm: [ 0.00 0.00 ]
+Key: VPADDUSBYrr: [ 0.00 0.00 ]
+Key: VPADDUSBZ: [ 0.00 0.00 ]
+Key: VPADDUSBZrm: [ 0.00 0.00 ]
+Key: VPADDUSBZrmk: [ 0.00 0.00 ]
+Key: VPADDUSBZrmkz: [ 0.00 0.00 ]
+Key: VPADDUSBZrr: [ 0.00 0.00 ]
+Key: VPADDUSBZrrk: [ 0.00 0.00 ]
+Key: VPADDUSBZrrkz: [ 0.00 0.00 ]
+Key: VPADDUSBrm: [ 0.00 0.00 ]
+Key: VPADDUSBrr: [ 0.00 0.00 ]
+Key: VPADDUSWYrm: [ 0.00 0.00 ]
+Key: VPADDUSWYrr: [ 0.00 0.00 ]
+Key: VPADDUSWZ: [ 0.00 0.00 ]
+Key: VPADDUSWZrm: [ 0.00 0.00 ]
+Key: VPADDUSWZrmk: [ 0.00 0.00 ]
+Key: VPADDUSWZrmkz: [ 0.00 0.00 ]
+Key: VPADDUSWZrr: [ 0.00 0.00 ]
+Key: VPADDUSWZrrk: [ 0.00 0.00 ]
+Key: VPADDUSWZrrkz: [ 0.00 0.00 ]
+Key: VPADDUSWrm: [ 0.00 0.00 ]
+Key: VPADDUSWrr: [ 0.00 0.00 ]
+Key: VPADDWYrm: [ 0.00 0.00 ]
+Key: VPADDWYrr: [ 0.00 0.00 ]
+Key: VPADDWZ: [ 0.00 0.00 ]
+Key: VPADDWZrm: [ 0.00 0.00 ]
+Key: VPADDWZrmk: [ 0.00 0.00 ]
+Key: VPADDWZrmkz: [ 0.00 0.00 ]
+Key: VPADDWZrr: [ 0.00 0.00 ]
+Key: VPADDWZrrk: [ 0.00 0.00 ]
+Key: VPADDWZrrkz: [ 0.00 0.00 ]
+Key: VPADDWrm: [ 0.00 0.00 ]
+Key: VPADDWrr: [ 0.00 0.00 ]
+Key: VPALIGNRYrmi: [ 0.00 0.00 ]
+Key: VPALIGNRYrri: [ 0.00 0.00 ]
+Key: VPALIGNRZ: [ 0.00 0.00 ]
+Key: VPALIGNRZrmi: [ 0.00 0.00 ]
+Key: VPALIGNRZrmik: [ 0.00 0.00 ]
+Key: VPALIGNRZrmikz: [ 0.00 0.00 ]
+Key: VPALIGNRZrri: [ 0.00 0.00 ]
+Key: VPALIGNRZrrik: [ 0.00 0.00 ]
+Key: VPALIGNRZrrikz: [ 0.00 0.00 ]
+Key: VPALIGNRrmi: [ 0.00 0.00 ]
+Key: VPALIGNRrri: [ 0.00 0.00 ]
+Key: VPANDDZ: [ 0.00 0.00 ]
+Key: VPANDDZrm: [ 0.00 0.00 ]
+Key: VPANDDZrmb: [ 0.00 0.00 ]
+Key: VPANDDZrmbk: [ 0.00 0.00 ]
+Key: VPANDDZrmbkz: [ 0.00 0.00 ]
+Key: VPANDDZrmk: [ 0.00 0.00 ]
+Key: VPANDDZrmkz: [ 0.00 0.00 ]
+Key: VPANDDZrr: [ 0.00 0.00 ]
+Key: VPANDDZrrk: [ 0.00 0.00 ]
+Key: VPANDDZrrkz: [ 0.00 0.00 ]
+Key: VPANDNDZ: [ 0.00 0.00 ]
+Key: VPANDNDZrm: [ 0.00 0.00 ]
+Key: VPANDNDZrmb: [ 0.00 0.00 ]
+Key: VPANDNDZrmbk: [ 0.00 0.00 ]
+Key: VPANDNDZrmbkz: [ 0.00 0.00 ]
+Key: VPANDNDZrmk: [ 0.00 0.00 ]
+Key: VPANDNDZrmkz: [ 0.00 0.00 ]
+Key: VPANDNDZrr: [ 0.00 0.00 ]
+Key: VPANDNDZrrk: [ 0.00 0.00 ]
+Key: VPANDNDZrrkz: [ 0.00 0.00 ]
+Key: VPANDNQZ: [ 0.00 0.00 ]
+Key: VPANDNQZrm: [ 0.00 0.00 ]
+Key: VPANDNQZrmb: [ 0.00 0.00 ]
+Key: VPANDNQZrmbk: [ 0.00 0.00 ]
+Key: VPANDNQZrmbkz: [ 0.00 0.00 ]
+Key: VPANDNQZrmk: [ 0.00 0.00 ]
+Key: VPANDNQZrmkz: [ 0.00 0.00 ]
+Key: VPANDNQZrr: [ 0.00 0.00 ]
+Key: VPANDNQZrrk: [ 0.00 0.00 ]
+Key: VPANDNQZrrkz: [ 0.00 0.00 ]
+Key: VPANDNYrm: [ 0.00 0.00 ]
+Key: VPANDNYrr: [ 0.00 0.00 ]
+Key: VPANDNrm: [ 0.00 0.00 ]
+Key: VPANDNrr: [ 0.00 0.00 ]
+Key: VPANDQZ: [ 0.00 0.00 ]
+Key: VPANDQZrm: [ 0.00 0.00 ]
+Key: VPANDQZrmb: [ 0.00 0.00 ]
+Key: VPANDQZrmbk: [ 0.00 0.00 ]
+Key: VPANDQZrmbkz: [ 0.00 0.00 ]
+Key: VPANDQZrmk: [ 0.00 0.00 ]
+Key: VPANDQZrmkz: [ 0.00 0.00 ]
+Key: VPANDQZrr: [ 0.00 0.00 ]
+Key: VPANDQZrrk: [ 0.00 0.00 ]
+Key: VPANDQZrrkz: [ 0.00 0.00 ]
+Key: VPANDYrm: [ 0.00 0.00 ]
+Key: VPANDYrr: [ 0.00 0.00 ]
+Key: VPANDrm: [ 0.00 0.00 ]
+Key: VPANDrr: [ 0.00 0.00 ]
+Key: VPAVGBYrm: [ 0.00 0.00 ]
+Key: VPAVGBYrr: [ 0.00 0.00 ]
+Key: VPAVGBZ: [ 0.00 0.00 ]
+Key: VPAVGBZrm: [ 0.00 0.00 ]
+Key: VPAVGBZrmk: [ 0.00 0.00 ]
+Key: VPAVGBZrmkz: [ 0.00 0.00 ]
+Key: VPAVGBZrr: [ 0.00 0.00 ]
+Key: VPAVGBZrrk: [ 0.00 0.00 ]
+Key: VPAVGBZrrkz: [ 0.00 0.00 ]
+Key: VPAVGBrm: [ 0.00 0.00 ]
+Key: VPAVGBrr: [ 0.00 0.00 ]
+Key: VPAVGWYrm: [ 0.00 0.00 ]
+Key: VPAVGWYrr: [ 0.00 0.00 ]
+Key: VPAVGWZ: [ 0.00 0.00 ]
+Key: VPAVGWZrm: [ 0.00 0.00 ]
+Key: VPAVGWZrmk: [ 0.00 0.00 ]
+Key: VPAVGWZrmkz: [ 0.00 0.00 ]
+Key: VPAVGWZrr: [ 0.00 0.00 ]
+Key: VPAVGWZrrk: [ 0.00 0.00 ]
+Key: VPAVGWZrrkz: [ 0.00 0.00 ]
+Key: VPAVGWrm: [ 0.00 0.00 ]
+Key: VPAVGWrr: [ 0.00 0.00 ]
+Key: VPBLENDDYrmi: [ 0.00 0.00 ]
+Key: VPBLENDDYrri: [ 0.00 0.00 ]
+Key: VPBLENDDrmi: [ 0.00 0.00 ]
+Key: VPBLENDDrri: [ 0.00 0.00 ]
+Key: VPBLENDMBZ: [ 0.00 0.00 ]
+Key: VPBLENDMBZrm: [ 0.00 0.00 ]
+Key: VPBLENDMBZrmk: [ 0.00 0.00 ]
+Key: VPBLENDMBZrmkz: [ 0.00 0.00 ]
+Key: VPBLENDMBZrr: [ 0.00 0.00 ]
+Key: VPBLENDMBZrrk: [ 0.00 0.00 ]
+Key: VPBLENDMBZrrkz: [ 0.00 0.00 ]
+Key: VPBLENDMDZ: [ 0.00 0.00 ]
+Key: VPBLENDMDZrm: [ 0.00 0.00 ]
+Key: VPBLENDMDZrmb: [ 0.00 0.00 ]
+Key: VPBLENDMDZrmbk: [ 0.00 0.00 ]
+Key: VPBLENDMDZrmbkz: [ 0.00 0.00 ]
+Key: VPBLENDMDZrmk: [ 0.00 0.00 ]
+Key: VPBLENDMDZrmkz: [ 0.00 0.00 ]
+Key: VPBLENDMDZrr: [ 0.00 0.00 ]
+Key: VPBLENDMDZrrk: [ 0.00 0.00 ]
+Key: VPBLENDMDZrrkz: [ 0.00 0.00 ]
+Key: VPBLENDMQZ: [ 0.00 0.00 ]
+Key: VPBLENDMQZrm: [ 0.00 0.00 ]
+Key: VPBLENDMQZrmb: [ 0.00 0.00 ]
+Key: VPBLENDMQZrmbk: [ 0.00 0.00 ]
+Key: VPBLENDMQZrmbkz: [ 0.00 0.00 ]
+Key: VPBLENDMQZrmk: [ 0.00 0.00 ]
+Key: VPBLENDMQZrmkz: [ 0.00 0.00 ]
+Key: VPBLENDMQZrr: [ 0.00 0.00 ]
+Key: VPBLENDMQZrrk: [ 0.00 0.00 ]
+Key: VPBLENDMQZrrkz: [ 0.00 0.00 ]
+Key: VPBLENDMWZ: [ 0.00 0.00 ]
+Key: VPBLENDMWZrm: [ 0.00 0.00 ]
+Key: VPBLENDMWZrmk: [ 0.00 0.00 ]
+Key: VPBLENDMWZrmkz: [ 0.00 0.00 ]
+Key: VPBLENDMWZrr: [ 0.00 0.00 ]
+Key: VPBLENDMWZrrk: [ 0.00 0.00 ]
+Key: VPBLENDMWZrrkz: [ 0.00 0.00 ]
+Key: VPBLENDVBYrmr: [ 0.00 0.00 ]
+Key: VPBLENDVBYrrr: [ 0.00 0.00 ]
+Key: VPBLENDVBrmr: [ 0.00 0.00 ]
+Key: VPBLENDVBrrr: [ 0.00 0.00 ]
+Key: VPBLENDWYrmi: [ 0.00 0.00 ]
+Key: VPBLENDWYrri: [ 0.00 0.00 ]
+Key: VPBLENDWrmi: [ 0.00 0.00 ]
+Key: VPBLENDWrri: [ 0.00 0.00 ]
+Key: VPBROADCASTBYrm: [ 0.00 0.00 ]
+Key: VPBROADCASTBYrr: [ 0.00 0.00 ]
+Key: VPBROADCASTBZ: [ 0.00 0.00 ]
+Key: VPBROADCASTBZrm: [ 0.00 0.00 ]
+Key: VPBROADCASTBZrmk: [ 0.00 0.00 ]
+Key: VPBROADCASTBZrmkz: [ 0.00 0.00 ]
+Key: VPBROADCASTBZrr: [ 0.00 0.00 ]
+Key: VPBROADCASTBZrrk: [ 0.00 0.00 ]
+Key: VPBROADCASTBZrrkz: [ 0.00 0.00 ]
+Key: VPBROADCASTBrZ: [ 0.00 0.00 ]
+Key: VPBROADCASTBrZrr: [ 0.00 0.00 ]
+Key: VPBROADCASTBrZrrk: [ 0.00 0.00 ]
+Key: VPBROADCASTBrZrrkz: [ 0.00 0.00 ]
+Key: VPBROADCASTBrm: [ 0.00 0.00 ]
+Key: VPBROADCASTBrr: [ 0.00 0.00 ]
+Key: VPBROADCASTDYrm: [ 0.00 0.00 ]
+Key: VPBROADCASTDYrr: [ 0.00 0.00 ]
+Key: VPBROADCASTDZ: [ 0.00 0.00 ]
+Key: VPBROADCASTDZrm: [ 0.00 0.00 ]
+Key: VPBROADCASTDZrmk: [ 0.00 0.00 ]
+Key: VPBROADCASTDZrmkz: [ 0.00 0.00 ]
+Key: VPBROADCASTDZrr: [ 0.00 0.00 ]
+Key: VPBROADCASTDZrrk: [ 0.00 0.00 ]
+Key: VPBROADCASTDZrrkz: [ 0.00 0.00 ]
+Key: VPBROADCASTDrZ: [ 0.00 0.00 ]
+Key: VPBROADCASTDrZrr: [ 0.00 0.00 ]
+Key: VPBROADCASTDrZrrk: [ 0.00 0.00 ]
+Key: VPBROADCASTDrZrrkz: [ 0.00 0.00 ]
+Key: VPBROADCASTDrm: [ 0.00 0.00 ]
+Key: VPBROADCASTDrr: [ 0.00 0.00 ]
+Key: VPBROADCASTMB: [ 0.00 0.00 ]
+Key: VPBROADCASTMW: [ 0.00 0.00 ]
+Key: VPBROADCASTQYrm: [ 0.00 0.00 ]
+Key: VPBROADCASTQYrr: [ 0.00 0.00 ]
+Key: VPBROADCASTQZ: [ 0.00 0.00 ]
+Key: VPBROADCASTQZrm: [ 0.00 0.00 ]
+Key: VPBROADCASTQZrmk: [ 0.00 0.00 ]
+Key: VPBROADCASTQZrmkz: [ 0.00 0.00 ]
+Key: VPBROADCASTQZrr: [ 0.00 0.00 ]
+Key: VPBROADCASTQZrrk: [ 0.00 0.00 ]
+Key: VPBROADCASTQZrrkz: [ 0.00 0.00 ]
+Key: VPBROADCASTQrZ: [ 0.00 0.00 ]
+Key: VPBROADCASTQrZrr: [ 0.00 0.00 ]
+Key: VPBROADCASTQrZrrk: [ 0.00 0.00 ]
+Key: VPBROADCASTQrZrrkz: [ 0.00 0.00 ]
+Key: VPBROADCASTQrm: [ 0.00 0.00 ]
+Key: VPBROADCASTQrr: [ 0.00 0.00 ]
+Key: VPBROADCASTWYrm: [ 0.00 0.00 ]
+Key: VPBROADCASTWYrr: [ 0.00 0.00 ]
+Key: VPBROADCASTWZ: [ 0.00 0.00 ]
+Key: VPBROADCASTWZrm: [ 0.00 0.00 ]
+Key: VPBROADCASTWZrmk: [ 0.00 0.00 ]
+Key: VPBROADCASTWZrmkz: [ 0.00 0.00 ]
+Key: VPBROADCASTWZrr: [ 0.00 0.00 ]
+Key: VPBROADCASTWZrrk: [ 0.00 0.00 ]
+Key: VPBROADCASTWZrrkz: [ 0.00 0.00 ]
+Key: VPBROADCASTWrZ: [ 0.00 0.00 ]
+Key: VPBROADCASTWrZrr: [ 0.00 0.00 ]
+Key: VPBROADCASTWrZrrk: [ 0.00 0.00 ]
+Key: VPBROADCASTWrZrrkz: [ 0.00 0.00 ]
+Key: VPBROADCASTWrm: [ 0.00 0.00 ]
+Key: VPBROADCASTWrr: [ 0.00 0.00 ]
+Key: VPCLMULQDQYrmi: [ 0.00 0.00 ]
+Key: VPCLMULQDQYrri: [ 0.00 0.00 ]
+Key: VPCLMULQDQZ: [ 0.00 0.00 ]
+Key: VPCLMULQDQZrmi: [ 0.00 0.00 ]
+Key: VPCLMULQDQZrri: [ 0.00 0.00 ]
+Key: VPCLMULQDQrmi: [ 0.00 0.00 ]
+Key: VPCLMULQDQrri: [ 0.00 0.00 ]
+Key: VPCMOVYrmr: [ 0.00 0.00 ]
+Key: VPCMOVYrrm: [ 0.00 0.00 ]
+Key: VPCMOVYrrr: [ 0.00 0.00 ]
+Key: VPCMOVYrrr_REV: [ 0.00 0.00 ]
+Key: VPCMOVrmr: [ 0.00 0.00 ]
+Key: VPCMOVrrm: [ 0.00 0.00 ]
+Key: VPCMOVrrr: [ 0.00 0.00 ]
+Key: VPCMOVrrr_REV: [ 0.00 0.00 ]
+Key: VPCMPBZ: [ 0.00 0.00 ]
+Key: VPCMPBZrmi: [ 0.00 0.00 ]
+Key: VPCMPBZrmik: [ 0.00 0.00 ]
+Key: VPCMPBZrri: [ 0.00 0.00 ]
+Key: VPCMPBZrrik: [ 0.00 0.00 ]
+Key: VPCMPDZ: [ 0.00 0.00 ]
+Key: VPCMPDZrmbi: [ 0.00 0.00 ]
+Key: VPCMPDZrmbik: [ 0.00 0.00 ]
+Key: VPCMPDZrmi: [ 0.00 0.00 ]
+Key: VPCMPDZrmik: [ 0.00 0.00 ]
+Key: VPCMPDZrri: [ 0.00 0.00 ]
+Key: VPCMPDZrrik: [ 0.00 0.00 ]
+Key: VPCMPEQBYrm: [ 0.00 0.00 ]
+Key: VPCMPEQBYrr: [ 0.00 0.00 ]
+Key: VPCMPEQBZ: [ 0.00 0.00 ]
+Key: VPCMPEQBZrm: [ 0.00 0.00 ]
+Key: VPCMPEQBZrmk: [ 0.00 0.00 ]
+Key: VPCMPEQBZrr: [ 0.00 0.00 ]
+Key: VPCMPEQBZrrk: [ 0.00 0.00 ]
+Key: VPCMPEQBrm: [ 0.00 0.00 ]
+Key: VPCMPEQBrr: [ 0.00 0.00 ]
+Key: VPCMPEQDYrm: [ 0.00 0.00 ]
+Key: VPCMPEQDYrr: [ 0.00 0.00 ]
+Key: VPCMPEQDZ: [ 0.00 0.00 ]
+Key: VPCMPEQDZrm: [ 0.00 0.00 ]
+Key: VPCMPEQDZrmb: [ 0.00 0.00 ]
+Key: VPCMPEQDZrmbk: [ 0.00 0.00 ]
+Key: VPCMPEQDZrmk: [ 0.00 0.00 ]
+Key: VPCMPEQDZrr: [ 0.00 0.00 ]
+Key: VPCMPEQDZrrk: [ 0.00 0.00 ]
+Key: VPCMPEQDrm: [ 0.00 0.00 ]
+Key: VPCMPEQDrr: [ 0.00 0.00 ]
+Key: VPCMPEQQYrm: [ 0.00 0.00 ]
+Key: VPCMPEQQYrr: [ 0.00 0.00 ]
+Key: VPCMPEQQZ: [ 0.00 0.00 ]
+Key: VPCMPEQQZrm: [ 0.00 0.00 ]
+Key: VPCMPEQQZrmb: [ 0.00 0.00 ]
+Key: VPCMPEQQZrmbk: [ 0.00 0.00 ]
+Key: VPCMPEQQZrmk: [ 0.00 0.00 ]
+Key: VPCMPEQQZrr: [ 0.00 0.00 ]
+Key: VPCMPEQQZrrk: [ 0.00 0.00 ]
+Key: VPCMPEQQrm: [ 0.00 0.00 ]
+Key: VPCMPEQQrr: [ 0.00 0.00 ]
+Key: VPCMPEQWYrm: [ 0.00 0.00 ]
+Key: VPCMPEQWYrr: [ 0.00 0.00 ]
+Key: VPCMPEQWZ: [ 0.00 0.00 ]
+Key: VPCMPEQWZrm: [ 0.00 0.00 ]
+Key: VPCMPEQWZrmk: [ 0.00 0.00 ]
+Key: VPCMPEQWZrr: [ 0.00 0.00 ]
+Key: VPCMPEQWZrrk: [ 0.00 0.00 ]
+Key: VPCMPEQWrm: [ 0.00 0.00 ]
+Key: VPCMPEQWrr: [ 0.00 0.00 ]
+Key: VPCMPESTRIrmi: [ 0.00 0.00 ]
+Key: VPCMPESTRIrri: [ 0.00 0.00 ]
+Key: VPCMPESTRMrmi: [ 0.00 0.00 ]
+Key: VPCMPESTRMrri: [ 0.00 0.00 ]
+Key: VPCMPGTBYrm: [ 0.00 0.00 ]
+Key: VPCMPGTBYrr: [ 0.00 0.00 ]
+Key: VPCMPGTBZ: [ 0.00 0.00 ]
+Key: VPCMPGTBZrm: [ 0.00 0.00 ]
+Key: VPCMPGTBZrmk: [ 0.00 0.00 ]
+Key: VPCMPGTBZrr: [ 0.00 0.00 ]
+Key: VPCMPGTBZrrk: [ 0.00 0.00 ]
+Key: VPCMPGTBrm: [ 0.00 0.00 ]
+Key: VPCMPGTBrr: [ 0.00 0.00 ]
+Key: VPCMPGTDYrm: [ 0.00 0.00 ]
+Key: VPCMPGTDYrr: [ 0.00 0.00 ]
+Key: VPCMPGTDZ: [ 0.00 0.00 ]
+Key: VPCMPGTDZrm: [ 0.00 0.00 ]
+Key: VPCMPGTDZrmb: [ 0.00 0.00 ]
+Key: VPCMPGTDZrmbk: [ 0.00 0.00 ]
+Key: VPCMPGTDZrmk: [ 0.00 0.00 ]
+Key: VPCMPGTDZrr: [ 0.00 0.00 ]
+Key: VPCMPGTDZrrk: [ 0.00 0.00 ]
+Key: VPCMPGTDrm: [ 0.00 0.00 ]
+Key: VPCMPGTDrr: [ 0.00 0.00 ]
+Key: VPCMPGTQYrm: [ 0.00 0.00 ]
+Key: VPCMPGTQYrr: [ 0.00 0.00 ]
+Key: VPCMPGTQZ: [ 0.00 0.00 ]
+Key: VPCMPGTQZrm: [ 0.00 0.00 ]
+Key: VPCMPGTQZrmb: [ 0.00 0.00 ]
+Key: VPCMPGTQZrmbk: [ 0.00 0.00 ]
+Key: VPCMPGTQZrmk: [ 0.00 0.00 ]
+Key: VPCMPGTQZrr: [ 0.00 0.00 ]
+Key: VPCMPGTQZrrk: [ 0.00 0.00 ]
+Key: VPCMPGTQrm: [ 0.00 0.00 ]
+Key: VPCMPGTQrr: [ 0.00 0.00 ]
+Key: VPCMPGTWYrm: [ 0.00 0.00 ]
+Key: VPCMPGTWYrr: [ 0.00 0.00 ]
+Key: VPCMPGTWZ: [ 0.00 0.00 ]
+Key: VPCMPGTWZrm: [ 0.00 0.00 ]
+Key: VPCMPGTWZrmk: [ 0.00 0.00 ]
+Key: VPCMPGTWZrr: [ 0.00 0.00 ]
+Key: VPCMPGTWZrrk: [ 0.00 0.00 ]
+Key: VPCMPGTWrm: [ 0.00 0.00 ]
+Key: VPCMPGTWrr: [ 0.00 0.00 ]
+Key: VPCMPISTRIrmi: [ 0.00 0.00 ]
+Key: VPCMPISTRIrri: [ 0.00 0.00 ]
+Key: VPCMPISTRMrmi: [ 0.00 0.00 ]
+Key: VPCMPISTRMrri: [ 0.00 0.00 ]
+Key: VPCMPQZ: [ 0.00 0.00 ]
+Key: VPCMPQZrmbi: [ 0.00 0.00 ]
+Key: VPCMPQZrmbik: [ 0.00 0.00 ]
+Key: VPCMPQZrmi: [ 0.00 0.00 ]
+Key: VPCMPQZrmik: [ 0.00 0.00 ]
+Key: VPCMPQZrri: [ 0.00 0.00 ]
+Key: VPCMPQZrrik: [ 0.00 0.00 ]
+Key: VPCMPUBZ: [ 0.00 0.00 ]
+Key: VPCMPUBZrmi: [ 0.00 0.00 ]
+Key: VPCMPUBZrmik: [ 0.00 0.00 ]
+Key: VPCMPUBZrri: [ 0.00 0.00 ]
+Key: VPCMPUBZrrik: [ 0.00 0.00 ]
+Key: VPCMPUDZ: [ 0.00 0.00 ]
+Key: VPCMPUDZrmbi: [ 0.00 0.00 ]
+Key: VPCMPUDZrmbik: [ 0.00 0.00 ]
+Key: VPCMPUDZrmi: [ 0.00 0.00 ]
+Key: VPCMPUDZrmik: [ 0.00 0.00 ]
+Key: VPCMPUDZrri: [ 0.00 0.00 ]
+Key: VPCMPUDZrrik: [ 0.00 0.00 ]
+Key: VPCMPUQZ: [ 0.00 0.00 ]
+Key: VPCMPUQZrmbi: [ 0.00 0.00 ]
+Key: VPCMPUQZrmbik: [ 0.00 0.00 ]
+Key: VPCMPUQZrmi: [ 0.00 0.00 ]
+Key: VPCMPUQZrmik: [ 0.00 0.00 ]
+Key: VPCMPUQZrri: [ 0.00 0.00 ]
+Key: VPCMPUQZrrik: [ 0.00 0.00 ]
+Key: VPCMPUWZ: [ 0.00 0.00 ]
+Key: VPCMPUWZrmi: [ 0.00 0.00 ]
+Key: VPCMPUWZrmik: [ 0.00 0.00 ]
+Key: VPCMPUWZrri: [ 0.00 0.00 ]
+Key: VPCMPUWZrrik: [ 0.00 0.00 ]
+Key: VPCMPWZ: [ 0.00 0.00 ]
+Key: VPCMPWZrmi: [ 0.00 0.00 ]
+Key: VPCMPWZrmik: [ 0.00 0.00 ]
+Key: VPCMPWZrri: [ 0.00 0.00 ]
+Key: VPCMPWZrrik: [ 0.00 0.00 ]
+Key: VPCOMBmi: [ 0.00 0.00 ]
+Key: VPCOMBri: [ 0.00 0.00 ]
+Key: VPCOMDmi: [ 0.00 0.00 ]
+Key: VPCOMDri: [ 0.00 0.00 ]
+Key: VPCOMPRESSBZ: [ 0.00 0.00 ]
+Key: VPCOMPRESSBZmr: [ 0.00 0.00 ]
+Key: VPCOMPRESSBZmrk: [ 0.00 0.00 ]
+Key: VPCOMPRESSBZrr: [ 0.00 0.00 ]
+Key: VPCOMPRESSBZrrk: [ 0.00 0.00 ]
+Key: VPCOMPRESSBZrrkz: [ 0.00 0.00 ]
+Key: VPCOMPRESSDZ: [ 0.00 0.00 ]
+Key: VPCOMPRESSDZmr: [ 0.00 0.00 ]
+Key: VPCOMPRESSDZmrk: [ 0.00 0.00 ]
+Key: VPCOMPRESSDZrr: [ 0.00 0.00 ]
+Key: VPCOMPRESSDZrrk: [ 0.00 0.00 ]
+Key: VPCOMPRESSDZrrkz: [ 0.00 0.00 ]
+Key: VPCOMPRESSQZ: [ 0.00 0.00 ]
+Key: VPCOMPRESSQZmr: [ 0.00 0.00 ]
+Key: VPCOMPRESSQZmrk: [ 0.00 0.00 ]
+Key: VPCOMPRESSQZrr: [ 0.00 0.00 ]
+Key: VPCOMPRESSQZrrk: [ 0.00 0.00 ]
+Key: VPCOMPRESSQZrrkz: [ 0.00 0.00 ]
+Key: VPCOMPRESSWZ: [ 0.00 0.00 ]
+Key: VPCOMPRESSWZmr: [ 0.00 0.00 ]
+Key: VPCOMPRESSWZmrk: [ 0.00 0.00 ]
+Key: VPCOMPRESSWZrr: [ 0.00 0.00 ]
+Key: VPCOMPRESSWZrrk: [ 0.00 0.00 ]
+Key: VPCOMPRESSWZrrkz: [ 0.00 0.00 ]
+Key: VPCOMQmi: [ 0.00 0.00 ]
+Key: VPCOMQri: [ 0.00 0.00 ]
+Key: VPCOMUBmi: [ 0.00 0.00 ]
+Key: VPCOMUBri: [ 0.00 0.00 ]
+Key: VPCOMUDmi: [ 0.00 0.00 ]
+Key: VPCOMUDri: [ 0.00 0.00 ]
+Key: VPCOMUQmi: [ 0.00 0.00 ]
+Key: VPCOMUQri: [ 0.00 0.00 ]
+Key: VPCOMUWmi: [ 0.00 0.00 ]
+Key: VPCOMUWri: [ 0.00 0.00 ]
+Key: VPCOMWmi: [ 0.00 0.00 ]
+Key: VPCOMWri: [ 0.00 0.00 ]
+Key: VPCONFLICTDZ: [ 0.00 0.00 ]
+Key: VPCONFLICTDZrm: [ 0.00 0.00 ]
+Key: VPCONFLICTDZrmb: [ 0.00 0.00 ]
+Key: VPCONFLICTDZrmbk: [ 0.00 0.00 ]
+Key: VPCONFLICTDZrmbkz: [ 0.00 0.00 ]
+Key: VPCONFLICTDZrmk: [ 0.00 0.00 ]
+Key: VPCONFLICTDZrmkz: [ 0.00 0.00 ]
+Key: VPCONFLICTDZrr: [ 0.00 0.00 ]
+Key: VPCONFLICTDZrrk: [ 0.00 0.00 ]
+Key: VPCONFLICTDZrrkz: [ 0.00 0.00 ]
+Key: VPCONFLICTQZ: [ 0.00 0.00 ]
+Key: VPCONFLICTQZrm: [ 0.00 0.00 ]
+Key: VPCONFLICTQZrmb: [ 0.00 0.00 ]
+Key: VPCONFLICTQZrmbk: [ 0.00 0.00 ]
+Key: VPCONFLICTQZrmbkz: [ 0.00 0.00 ]
+Key: VPCONFLICTQZrmk: [ 0.00 0.00 ]
+Key: VPCONFLICTQZrmkz: [ 0.00 0.00 ]
+Key: VPCONFLICTQZrr: [ 0.00 0.00 ]
+Key: VPCONFLICTQZrrk: [ 0.00 0.00 ]
+Key: VPCONFLICTQZrrkz: [ 0.00 0.00 ]
+Key: VPDPBSSDSYrm: [ 0.00 0.00 ]
+Key: VPDPBSSDSYrr: [ 0.00 0.00 ]
+Key: VPDPBSSDSZ: [ 0.00 0.00 ]
+Key: VPDPBSSDSZrm: [ 0.00 0.00 ]
+Key: VPDPBSSDSZrmb: [ 0.00 0.00 ]
+Key: VPDPBSSDSZrmbk: [ 0.00 0.00 ]
+Key: VPDPBSSDSZrmbkz: [ 0.00 0.00 ]
+Key: VPDPBSSDSZrmk: [ 0.00 0.00 ]
+Key: VPDPBSSDSZrmkz: [ 0.00 0.00 ]
+Key: VPDPBSSDSZrr: [ 0.00 0.00 ]
+Key: VPDPBSSDSZrrk: [ 0.00 0.00 ]
+Key: VPDPBSSDSZrrkz: [ 0.00 0.00 ]
+Key: VPDPBSSDSrm: [ 0.00 0.00 ]
+Key: VPDPBSSDSrr: [ 0.00 0.00 ]
+Key: VPDPBSSDYrm: [ 0.00 0.00 ]
+Key: VPDPBSSDYrr: [ 0.00 0.00 ]
+Key: VPDPBSSDZ: [ 0.00 0.00 ]
+Key: VPDPBSSDZrm: [ 0.00 0.00 ]
+Key: VPDPBSSDZrmb: [ 0.00 0.00 ]
+Key: VPDPBSSDZrmbk: [ 0.00 0.00 ]
+Key: VPDPBSSDZrmbkz: [ 0.00 0.00 ]
+Key: VPDPBSSDZrmk: [ 0.00 0.00 ]
+Key: VPDPBSSDZrmkz: [ 0.00 0.00 ]
+Key: VPDPBSSDZrr: [ 0.00 0.00 ]
+Key: VPDPBSSDZrrk: [ 0.00 0.00 ]
+Key: VPDPBSSDZrrkz: [ 0.00 0.00 ]
+Key: VPDPBSSDrm: [ 0.00 0.00 ]
+Key: VPDPBSSDrr: [ 0.00 0.00 ]
+Key: VPDPBSUDSYrm: [ 0.00 0.00 ]
+Key: VPDPBSUDSYrr: [ 0.00 0.00 ]
+Key: VPDPBSUDSZ: [ 0.00 0.00 ]
+Key: VPDPBSUDSZrm: [ 0.00 0.00 ]
+Key: VPDPBSUDSZrmb: [ 0.00 0.00 ]
+Key: VPDPBSUDSZrmbk: [ 0.00 0.00 ]
+Key: VPDPBSUDSZrmbkz: [ 0.00 0.00 ]
+Key: VPDPBSUDSZrmk: [ 0.00 0.00 ]
+Key: VPDPBSUDSZrmkz: [ 0.00 0.00 ]
+Key: VPDPBSUDSZrr: [ 0.00 0.00 ]
+Key: VPDPBSUDSZrrk: [ 0.00 0.00 ]
+Key: VPDPBSUDSZrrkz: [ 0.00 0.00 ]
+Key: VPDPBSUDSrm: [ 0.00 0.00 ]
+Key: VPDPBSUDSrr: [ 0.00 0.00 ]
+Key: VPDPBSUDYrm: [ 0.00 0.00 ]
+Key: VPDPBSUDYrr: [ 0.00 0.00 ]
+Key: VPDPBSUDZ: [ 0.00 0.00 ]
+Key: VPDPBSUDZrm: [ 0.00 0.00 ]
+Key: VPDPBSUDZrmb: [ 0.00 0.00 ]
+Key: VPDPBSUDZrmbk: [ 0.00 0.00 ]
+Key: VPDPBSUDZrmbkz: [ 0.00 0.00 ]
+Key: VPDPBSUDZrmk: [ 0.00 0.00 ]
+Key: VPDPBSUDZrmkz: [ 0.00 0.00 ]
+Key: VPDPBSUDZrr: [ 0.00 0.00 ]
+Key: VPDPBSUDZrrk: [ 0.00 0.00 ]
+Key: VPDPBSUDZrrkz: [ 0.00 0.00 ]
+Key: VPDPBSUDrm: [ 0.00 0.00 ]
+Key: VPDPBSUDrr: [ 0.00 0.00 ]
+Key: VPDPBUSDSYrm: [ 0.00 0.00 ]
+Key: VPDPBUSDSYrr: [ 0.00 0.00 ]
+Key: VPDPBUSDSZ: [ 0.00 0.00 ]
+Key: VPDPBUSDSZrm: [ 0.00 0.00 ]
+Key: VPDPBUSDSZrmb: [ 0.00 0.00 ]
+Key: VPDPBUSDSZrmbk: [ 0.00 0.00 ]
+Key: VPDPBUSDSZrmbkz: [ 0.00 0.00 ]
+Key: VPDPBUSDSZrmk: [ 0.00 0.00 ]
+Key: VPDPBUSDSZrmkz: [ 0.00 0.00 ]
+Key: VPDPBUSDSZrr: [ 0.00 0.00 ]
+Key: VPDPBUSDSZrrk: [ 0.00 0.00 ]
+Key: VPDPBUSDSZrrkz: [ 0.00 0.00 ]
+Key: VPDPBUSDSrm: [ 0.00 0.00 ]
+Key: VPDPBUSDSrr: [ 0.00 0.00 ]
+Key: VPDPBUSDYrm: [ 0.00 0.00 ]
+Key: VPDPBUSDYrr: [ 0.00 0.00 ]
+Key: VPDPBUSDZ: [ 0.00 0.00 ]
+Key: VPDPBUSDZrm: [ 0.00 0.00 ]
+Key: VPDPBUSDZrmb: [ 0.00 0.00 ]
+Key: VPDPBUSDZrmbk: [ 0.00 0.00 ]
+Key: VPDPBUSDZrmbkz: [ 0.00 0.00 ]
+Key: VPDPBUSDZrmk: [ 0.00 0.00 ]
+Key: VPDPBUSDZrmkz: [ 0.00 0.00 ]
+Key: VPDPBUSDZrr: [ 0.00 0.00 ]
+Key: VPDPBUSDZrrk: [ 0.00 0.00 ]
+Key: VPDPBUSDZrrkz: [ 0.00 0.00 ]
+Key: VPDPBUSDrm: [ 0.00 0.00 ]
+Key: VPDPBUSDrr: [ 0.00 0.00 ]
+Key: VPDPBUUDSYrm: [ 0.00 0.00 ]
+Key: VPDPBUUDSYrr: [ 0.00 0.00 ]
+Key: VPDPBUUDSZ: [ 0.00 0.00 ]
+Key: VPDPBUUDSZrm: [ 0.00 0.00 ]
+Key: VPDPBUUDSZrmb: [ 0.00 0.00 ]
+Key: VPDPBUUDSZrmbk: [ 0.00 0.00 ]
+Key: VPDPBUUDSZrmbkz: [ 0.00 0.00 ]
+Key: VPDPBUUDSZrmk: [ 0.00 0.00 ]
+Key: VPDPBUUDSZrmkz: [ 0.00 0.00 ]
+Key: VPDPBUUDSZrr: [ 0.00 0.00 ]
+Key: VPDPBUUDSZrrk: [ 0.00 0.00 ]
+Key: VPDPBUUDSZrrkz: [ 0.00 0.00 ]
+Key: VPDPBUUDSrm: [ 0.00 0.00 ]
+Key: VPDPBUUDSrr: [ 0.00 0.00 ]
+Key: VPDPBUUDYrm: [ 0.00 0.00 ]
+Key: VPDPBUUDYrr: [ 0.00 0.00 ]
+Key: VPDPBUUDZ: [ 0.00 0.00 ]
+Key: VPDPBUUDZrm: [ 0.00 0.00 ]
+Key: VPDPBUUDZrmb: [ 0.00 0.00 ]
+Key: VPDPBUUDZrmbk: [ 0.00 0.00 ]
+Key: VPDPBUUDZrmbkz: [ 0.00 0.00 ]
+Key: VPDPBUUDZrmk: [ 0.00 0.00 ]
+Key: VPDPBUUDZrmkz: [ 0.00 0.00 ]
+Key: VPDPBUUDZrr: [ 0.00 0.00 ]
+Key: VPDPBUUDZrrk: [ 0.00 0.00 ]
+Key: VPDPBUUDZrrkz: [ 0.00 0.00 ]
+Key: VPDPBUUDrm: [ 0.00 0.00 ]
+Key: VPDPBUUDrr: [ 0.00 0.00 ]
+Key: VPDPWSSDSYrm: [ 0.00 0.00 ]
+Key: VPDPWSSDSYrr: [ 0.00 0.00 ]
+Key: VPDPWSSDSZ: [ 0.00 0.00 ]
+Key: VPDPWSSDSZrm: [ 0.00 0.00 ]
+Key: VPDPWSSDSZrmb: [ 0.00 0.00 ]
+Key: VPDPWSSDSZrmbk: [ 0.00 0.00 ]
+Key: VPDPWSSDSZrmbkz: [ 0.00 0.00 ]
+Key: VPDPWSSDSZrmk: [ 0.00 0.00 ]
+Key: VPDPWSSDSZrmkz: [ 0.00 0.00 ]
+Key: VPDPWSSDSZrr: [ 0.00 0.00 ]
+Key: VPDPWSSDSZrrk: [ 0.00 0.00 ]
+Key: VPDPWSSDSZrrkz: [ 0.00 0.00 ]
+Key: VPDPWSSDSrm: [ 0.00 0.00 ]
+Key: VPDPWSSDSrr: [ 0.00 0.00 ]
+Key: VPDPWSSDYrm: [ 0.00 0.00 ]
+Key: VPDPWSSDYrr: [ 0.00 0.00 ]
+Key: VPDPWSSDZ: [ 0.00 0.00 ]
+Key: VPDPWSSDZrm: [ 0.00 0.00 ]
+Key: VPDPWSSDZrmb: [ 0.00 0.00 ]
+Key: VPDPWSSDZrmbk: [ 0.00 0.00 ]
+Key: VPDPWSSDZrmbkz: [ 0.00 0.00 ]
+Key: VPDPWSSDZrmk: [ 0.00 0.00 ]
+Key: VPDPWSSDZrmkz: [ 0.00 0.00 ]
+Key: VPDPWSSDZrr: [ 0.00 0.00 ]
+Key: VPDPWSSDZrrk: [ 0.00 0.00 ]
+Key: VPDPWSSDZrrkz: [ 0.00 0.00 ]
+Key: VPDPWSSDrm: [ 0.00 0.00 ]
+Key: VPDPWSSDrr: [ 0.00 0.00 ]
+Key: VPDPWSUDSYrm: [ 0.00 0.00 ]
+Key: VPDPWSUDSYrr: [ 0.00 0.00 ]
+Key: VPDPWSUDSZ: [ 0.00 0.00 ]
+Key: VPDPWSUDSZrm: [ 0.00 0.00 ]
+Key: VPDPWSUDSZrmb: [ 0.00 0.00 ]
+Key: VPDPWSUDSZrmbk: [ 0.00 0.00 ]
+Key: VPDPWSUDSZrmbkz: [ 0.00 0.00 ]
+Key: VPDPWSUDSZrmk: [ 0.00 0.00 ]
+Key: VPDPWSUDSZrmkz: [ 0.00 0.00 ]
+Key: VPDPWSUDSZrr: [ 0.00 0.00 ]
+Key: VPDPWSUDSZrrk: [ 0.00 0.00 ]
+Key: VPDPWSUDSZrrkz: [ 0.00 0.00 ]
+Key: VPDPWSUDSrm: [ 0.00 0.00 ]
+Key: VPDPWSUDSrr: [ 0.00 0.00 ]
+Key: VPDPWSUDYrm: [ 0.00 0.00 ]
+Key: VPDPWSUDYrr: [ 0.00 0.00 ]
+Key: VPDPWSUDZ: [ 0.00 0.00 ]
+Key: VPDPWSUDZrm: [ 0.00 0.00 ]
+Key: VPDPWSUDZrmb: [ 0.00 0.00 ]
+Key: VPDPWSUDZrmbk: [ 0.00 0.00 ]
+Key: VPDPWSUDZrmbkz: [ 0.00 0.00 ]
+Key: VPDPWSUDZrmk: [ 0.00 0.00 ]
+Key: VPDPWSUDZrmkz: [ 0.00 0.00 ]
+Key: VPDPWSUDZrr: [ 0.00 0.00 ]
+Key: VPDPWSUDZrrk: [ 0.00 0.00 ]
+Key: VPDPWSUDZrrkz: [ 0.00 0.00 ]
+Key: VPDPWSUDrm: [ 0.00 0.00 ]
+Key: VPDPWSUDrr: [ 0.00 0.00 ]
+Key: VPDPWUSDSYrm: [ 0.00 0.00 ]
+Key: VPDPWUSDSYrr: [ 0.00 0.00 ]
+Key: VPDPWUSDSZ: [ 0.00 0.00 ]
+Key: VPDPWUSDSZrm: [ 0.00 0.00 ]
+Key: VPDPWUSDSZrmb: [ 0.00 0.00 ]
+Key: VPDPWUSDSZrmbk: [ 0.00 0.00 ]
+Key: VPDPWUSDSZrmbkz: [ 0.00 0.00 ]
+Key: VPDPWUSDSZrmk: [ 0.00 0.00 ]
+Key: VPDPWUSDSZrmkz: [ 0.00 0.00 ]
+Key: VPDPWUSDSZrr: [ 0.00 0.00 ]
+Key: VPDPWUSDSZrrk: [ 0.00 0.00 ]
+Key: VPDPWUSDSZrrkz: [ 0.00 0.00 ]
+Key: VPDPWUSDSrm: [ 0.00 0.00 ]
+Key: VPDPWUSDSrr: [ 0.00 0.00 ]
+Key: VPDPWUSDYrm: [ 0.00 0.00 ]
+Key: VPDPWUSDYrr: [ 0.00 0.00 ]
+Key: VPDPWUSDZ: [ 0.00 0.00 ]
+Key: VPDPWUSDZrm: [ 0.00 0.00 ]
+Key: VPDPWUSDZrmb: [ 0.00 0.00 ]
+Key: VPDPWUSDZrmbk: [ 0.00 0.00 ]
+Key: VPDPWUSDZrmbkz: [ 0.00 0.00 ]
+Key: VPDPWUSDZrmk: [ 0.00 0.00 ]
+Key: VPDPWUSDZrmkz: [ 0.00 0.00 ]
+Key: VPDPWUSDZrr: [ 0.00 0.00 ]
+Key: VPDPWUSDZrrk: [ 0.00 0.00 ]
+Key: VPDPWUSDZrrkz: [ 0.00 0.00 ]
+Key: VPDPWUSDrm: [ 0.00 0.00 ]
+Key: VPDPWUSDrr: [ 0.00 0.00 ]
+Key: VPDPWUUDSYrm: [ 0.00 0.00 ]
+Key: VPDPWUUDSYrr: [ 0.00 0.00 ]
+Key: VPDPWUUDSZ: [ 0.00 0.00 ]
+Key: VPDPWUUDSZrm: [ 0.00 0.00 ]
+Key: VPDPWUUDSZrmb: [ 0.00 0.00 ]
+Key: VPDPWUUDSZrmbk: [ 0.00 0.00 ]
+Key: VPDPWUUDSZrmbkz: [ 0.00 0.00 ]
+Key: VPDPWUUDSZrmk: [ 0.00 0.00 ]
+Key: VPDPWUUDSZrmkz: [ 0.00 0.00 ]
+Key: VPDPWUUDSZrr: [ 0.00 0.00 ]
+Key: VPDPWUUDSZrrk: [ 0.00 0.00 ]
+Key: VPDPWUUDSZrrkz: [ 0.00 0.00 ]
+Key: VPDPWUUDSrm: [ 0.00 0.00 ]
+Key: VPDPWUUDSrr: [ 0.00 0.00 ]
+Key: VPDPWUUDYrm: [ 0.00 0.00 ]
+Key: VPDPWUUDYrr: [ 0.00 0.00 ]
+Key: VPDPWUUDZ: [ 0.00 0.00 ]
+Key: VPDPWUUDZrm: [ 0.00 0.00 ]
+Key: VPDPWUUDZrmb: [ 0.00 0.00 ]
+Key: VPDPWUUDZrmbk: [ 0.00 0.00 ]
+Key: VPDPWUUDZrmbkz: [ 0.00 0.00 ]
+Key: VPDPWUUDZrmk: [ 0.00 0.00 ]
+Key: VPDPWUUDZrmkz: [ 0.00 0.00 ]
+Key: VPDPWUUDZrr: [ 0.00 0.00 ]
+Key: VPDPWUUDZrrk: [ 0.00 0.00 ]
+Key: VPDPWUUDZrrkz: [ 0.00 0.00 ]
+Key: VPDPWUUDrm: [ 0.00 0.00 ]
+Key: VPDPWUUDrr: [ 0.00 0.00 ]
+Key: VPERM: [ 0.00 0.00 ]
+Key: VPERMBZ: [ 0.00 0.00 ]
+Key: VPERMBZrm: [ 0.00 0.00 ]
+Key: VPERMBZrmk: [ 0.00 0.00 ]
+Key: VPERMBZrmkz: [ 0.00 0.00 ]
+Key: VPERMBZrr: [ 0.00 0.00 ]
+Key: VPERMBZrrk: [ 0.00 0.00 ]
+Key: VPERMBZrrkz: [ 0.00 0.00 ]
+Key: VPERMDYrm: [ 0.00 0.00 ]
+Key: VPERMDYrr: [ 0.00 0.00 ]
+Key: VPERMDZ: [ 0.00 0.00 ]
+Key: VPERMDZrm: [ 0.00 0.00 ]
+Key: VPERMDZrmb: [ 0.00 0.00 ]
+Key: VPERMDZrmbk: [ 0.00 0.00 ]
+Key: VPERMDZrmbkz: [ 0.00 0.00 ]
+Key: VPERMDZrmk: [ 0.00 0.00 ]
+Key: VPERMDZrmkz: [ 0.00 0.00 ]
+Key: VPERMDZrr: [ 0.00 0.00 ]
+Key: VPERMDZrrk: [ 0.00 0.00 ]
+Key: VPERMDZrrkz: [ 0.00 0.00 ]
+Key: VPERMI: [ 0.00 0.00 ]
+Key: VPERMIL: [ 0.00 0.00 ]
+Key: VPERMILPDYmi: [ 0.00 0.00 ]
+Key: VPERMILPDYri: [ 0.00 0.00 ]
+Key: VPERMILPDYrm: [ 0.00 0.00 ]
+Key: VPERMILPDYrr: [ 0.00 0.00 ]
+Key: VPERMILPDZ: [ 0.00 0.00 ]
+Key: VPERMILPDZmbi: [ 0.00 0.00 ]
+Key: VPERMILPDZmbik: [ 0.00 0.00 ]
+Key: VPERMILPDZmbikz: [ 0.00 0.00 ]
+Key: VPERMILPDZmi: [ 0.00 0.00 ]
+Key: VPERMILPDZmik: [ 0.00 0.00 ]
+Key: VPERMILPDZmikz: [ 0.00 0.00 ]
+Key: VPERMILPDZri: [ 0.00 0.00 ]
+Key: VPERMILPDZrik: [ 0.00 0.00 ]
+Key: VPERMILPDZrikz: [ 0.00 0.00 ]
+Key: VPERMILPDZrm: [ 0.00 0.00 ]
+Key: VPERMILPDZrmb: [ 0.00 0.00 ]
+Key: VPERMILPDZrmbk: [ 0.00 0.00 ]
+Key: VPERMILPDZrmbkz: [ 0.00 0.00 ]
+Key: VPERMILPDZrmk: [ 0.00 0.00 ]
+Key: VPERMILPDZrmkz: [ 0.00 0.00 ]
+Key: VPERMILPDZrr: [ 0.00 0.00 ]
+Key: VPERMILPDZrrk: [ 0.00 0.00 ]
+Key: VPERMILPDZrrkz: [ 0.00 0.00 ]
+Key: VPERMILPDmi: [ 0.00 0.00 ]
+Key: VPERMILPDri: [ 0.00 0.00 ]
+Key: VPERMILPDrm: [ 0.00 0.00 ]
+Key: VPERMILPDrr: [ 0.00 0.00 ]
+Key: VPERMILPSYmi: [ 0.00 0.00 ]
+Key: VPERMILPSYri: [ 0.00 0.00 ]
+Key: VPERMILPSYrm: [ 0.00 0.00 ]
+Key: VPERMILPSYrr: [ 0.00 0.00 ]
+Key: VPERMILPSZ: [ 0.00 0.00 ]
+Key: VPERMILPSZmbi: [ 0.00 0.00 ]
+Key: VPERMILPSZmbik: [ 0.00 0.00 ]
+Key: VPERMILPSZmbikz: [ 0.00 0.00 ]
+Key: VPERMILPSZmi: [ 0.00 0.00 ]
+Key: VPERMILPSZmik: [ 0.00 0.00 ]
+Key: VPERMILPSZmikz: [ 0.00 0.00 ]
+Key: VPERMILPSZri: [ 0.00 0.00 ]
+Key: VPERMILPSZrik: [ 0.00 0.00 ]
+Key: VPERMILPSZrikz: [ 0.00 0.00 ]
+Key: VPERMILPSZrm: [ 0.00 0.00 ]
+Key: VPERMILPSZrmb: [ 0.00 0.00 ]
+Key: VPERMILPSZrmbk: [ 0.00 0.00 ]
+Key: VPERMILPSZrmbkz: [ 0.00 0.00 ]
+Key: VPERMILPSZrmk: [ 0.00 0.00 ]
+Key: VPERMILPSZrmkz: [ 0.00 0.00 ]
+Key: VPERMILPSZrr: [ 0.00 0.00 ]
+Key: VPERMILPSZrrk: [ 0.00 0.00 ]
+Key: VPERMILPSZrrkz: [ 0.00 0.00 ]
+Key: VPERMILPSmi: [ 0.00 0.00 ]
+Key: VPERMILPSri: [ 0.00 0.00 ]
+Key: VPERMILPSrm: [ 0.00 0.00 ]
+Key: VPERMILPSrr: [ 0.00 0.00 ]
+Key: VPERMPDYmi: [ 0.00 0.00 ]
+Key: VPERMPDYri: [ 0.00 0.00 ]
+Key: VPERMPDZ: [ 0.00 0.00 ]
+Key: VPERMPDZmbi: [ 0.00 0.00 ]
+Key: VPERMPDZmbik: [ 0.00 0.00 ]
+Key: VPERMPDZmbikz: [ 0.00 0.00 ]
+Key: VPERMPDZmi: [ 0.00 0.00 ]
+Key: VPERMPDZmik: [ 0.00 0.00 ]
+Key: VPERMPDZmikz: [ 0.00 0.00 ]
+Key: VPERMPDZri: [ 0.00 0.00 ]
+Key: VPERMPDZrik: [ 0.00 0.00 ]
+Key: VPERMPDZrikz: [ 0.00 0.00 ]
+Key: VPERMPDZrm: [ 0.00 0.00 ]
+Key: VPERMPDZrmb: [ 0.00 0.00 ]
+Key: VPERMPDZrmbk: [ 0.00 0.00 ]
+Key: VPERMPDZrmbkz: [ 0.00 0.00 ]
+Key: VPERMPDZrmk: [ 0.00 0.00 ]
+Key: VPERMPDZrmkz: [ 0.00 0.00 ]
+Key: VPERMPDZrr: [ 0.00 0.00 ]
+Key: VPERMPDZrrk: [ 0.00 0.00 ]
+Key: VPERMPDZrrkz: [ 0.00 0.00 ]
+Key: VPERMPSYrm: [ 0.00 0.00 ]
+Key: VPERMPSYrr: [ 0.00 0.00 ]
+Key: VPERMPSZ: [ 0.00 0.00 ]
+Key: VPERMPSZrm: [ 0.00 0.00 ]
+Key: VPERMPSZrmb: [ 0.00 0.00 ]
+Key: VPERMPSZrmbk: [ 0.00 0.00 ]
+Key: VPERMPSZrmbkz: [ 0.00 0.00 ]
+Key: VPERMPSZrmk: [ 0.00 0.00 ]
+Key: VPERMPSZrmkz: [ 0.00 0.00 ]
+Key: VPERMPSZrr: [ 0.00 0.00 ]
+Key: VPERMPSZrrk: [ 0.00 0.00 ]
+Key: VPERMPSZrrkz: [ 0.00 0.00 ]
+Key: VPERMQYmi: [ 0.00 0.00 ]
+Key: VPERMQYri: [ 0.00 0.00 ]
+Key: VPERMQZ: [ 0.00 0.00 ]
+Key: VPERMQZmbi: [ 0.00 0.00 ]
+Key: VPERMQZmbik: [ 0.00 0.00 ]
+Key: VPERMQZmbikz: [ 0.00 0.00 ]
+Key: VPERMQZmi: [ 0.00 0.00 ]
+Key: VPERMQZmik: [ 0.00 0.00 ]
+Key: VPERMQZmikz: [ 0.00 0.00 ]
+Key: VPERMQZri: [ 0.00 0.00 ]
+Key: VPERMQZrik: [ 0.00 0.00 ]
+Key: VPERMQZrikz: [ 0.00 0.00 ]
+Key: VPERMQZrm: [ 0.00 0.00 ]
+Key: VPERMQZrmb: [ 0.00 0.00 ]
+Key: VPERMQZrmbk: [ 0.00 0.00 ]
+Key: VPERMQZrmbkz: [ 0.00 0.00 ]
+Key: VPERMQZrmk: [ 0.00 0.00 ]
+Key: VPERMQZrmkz: [ 0.00 0.00 ]
+Key: VPERMQZrr: [ 0.00 0.00 ]
+Key: VPERMQZrrk: [ 0.00 0.00 ]
+Key: VPERMQZrrkz: [ 0.00 0.00 ]
+Key: VPERMT: [ 0.00 0.00 ]
+Key: VPERMWZ: [ 0.00 0.00 ]
+Key: VPERMWZrm: [ 0.00 0.00 ]
+Key: VPERMWZrmk: [ 0.00 0.00 ]
+Key: VPERMWZrmkz: [ 0.00 0.00 ]
+Key: VPERMWZrr: [ 0.00 0.00 ]
+Key: VPERMWZrrk: [ 0.00 0.00 ]
+Key: VPERMWZrrkz: [ 0.00 0.00 ]
+Key: VPEXPANDBZ: [ 0.00 0.00 ]
+Key: VPEXPANDBZrm: [ 0.00 0.00 ]
+Key: VPEXPANDBZrmk: [ 0.00 0.00 ]
+Key: VPEXPANDBZrmkz: [ 0.00 0.00 ]
+Key: VPEXPANDBZrr: [ 0.00 0.00 ]
+Key: VPEXPANDBZrrk: [ 0.00 0.00 ]
+Key: VPEXPANDBZrrkz: [ 0.00 0.00 ]
+Key: VPEXPANDDZ: [ 0.00 0.00 ]
+Key: VPEXPANDDZrm: [ 0.00 0.00 ]
+Key: VPEXPANDDZrmk: [ 0.00 0.00 ]
+Key: VPEXPANDDZrmkz: [ 0.00 0.00 ]
+Key: VPEXPANDDZrr: [ 0.00 0.00 ]
+Key: VPEXPANDDZrrk: [ 0.00 0.00 ]
+Key: VPEXPANDDZrrkz: [ 0.00 0.00 ]
+Key: VPEXPANDQZ: [ 0.00 0.00 ]
+Key: VPEXPANDQZrm: [ 0.00 0.00 ]
+Key: VPEXPANDQZrmk: [ 0.00 0.00 ]
+Key: VPEXPANDQZrmkz: [ 0.00 0.00 ]
+Key: VPEXPANDQZrr: [ 0.00 0.00 ]
+Key: VPEXPANDQZrrk: [ 0.00 0.00 ]
+Key: VPEXPANDQZrrkz: [ 0.00 0.00 ]
+Key: VPEXPANDWZ: [ 0.00 0.00 ]
+Key: VPEXPANDWZrm: [ 0.00 0.00 ]
+Key: VPEXPANDWZrmk: [ 0.00 0.00 ]
+Key: VPEXPANDWZrmkz: [ 0.00 0.00 ]
+Key: VPEXPANDWZrr: [ 0.00 0.00 ]
+Key: VPEXPANDWZrrk: [ 0.00 0.00 ]
+Key: VPEXPANDWZrrkz: [ 0.00 0.00 ]
+Key: VPEXTRBZmri: [ 0.00 0.00 ]
+Key: VPEXTRBZrri: [ 0.00 0.00 ]
+Key: VPEXTRBmri: [ 0.00 0.00 ]
+Key: VPEXTRBrri: [ 0.00 0.00 ]
+Key: VPEXTRDZmri: [ 0.00 0.00 ]
+Key: VPEXTRDZrri: [ 0.00 0.00 ]
+Key: VPEXTRDmri: [ 0.00 0.00 ]
+Key: VPEXTRDrri: [ 0.00 0.00 ]
+Key: VPEXTRQZmri: [ 0.00 0.00 ]
+Key: VPEXTRQZrri: [ 0.00 0.00 ]
+Key: VPEXTRQmri: [ 0.00 0.00 ]
+Key: VPEXTRQrri: [ 0.00 0.00 ]
+Key: VPEXTRWZmri: [ 0.00 0.00 ]
+Key: VPEXTRWZrri: [ 0.00 0.00 ]
+Key: VPEXTRWZrri_REV: [ 0.00 0.00 ]
+Key: VPEXTRWmri: [ 0.00 0.00 ]
+Key: VPEXTRWrri: [ 0.00 0.00 ]
+Key: VPEXTRWrri_REV: [ 0.00 0.00 ]
+Key: VPGATHERDDYrm: [ 0.00 0.00 ]
+Key: VPGATHERDDZ: [ 0.00 0.00 ]
+Key: VPGATHERDDZrm: [ 0.00 0.00 ]
+Key: VPGATHERDDrm: [ 0.00 0.00 ]
+Key: VPGATHERDQYrm: [ 0.00 0.00 ]
+Key: VPGATHERDQZ: [ 0.00 0.00 ]
+Key: VPGATHERDQZrm: [ 0.00 0.00 ]
+Key: VPGATHERDQrm: [ 0.00 0.00 ]
+Key: VPGATHERQDYrm: [ 0.00 0.00 ]
+Key: VPGATHERQDZ: [ 0.00 0.00 ]
+Key: VPGATHERQDZrm: [ 0.00 0.00 ]
+Key: VPGATHERQDrm: [ 0.00 0.00 ]
+Key: VPGATHERQQYrm: [ 0.00 0.00 ]
+Key: VPGATHERQQZ: [ 0.00 0.00 ]
+Key: VPGATHERQQZrm: [ 0.00 0.00 ]
+Key: VPGATHERQQrm: [ 0.00 0.00 ]
+Key: VPHADDBDrm: [ 0.00 0.00 ]
+Key: VPHADDBDrr: [ 0.00 0.00 ]
+Key: VPHADDBQrm: [ 0.00 0.00 ]
+Key: VPHADDBQrr: [ 0.00 0.00 ]
+Key: VPHADDBWrm: [ 0.00 0.00 ]
+Key: VPHADDBWrr: [ 0.00 0.00 ]
+Key: VPHADDDQrm: [ 0.00 0.00 ]
+Key: VPHADDDQrr: [ 0.00 0.00 ]
+Key: VPHADDDYrm: [ 0.00 0.00 ]
+Key: VPHADDDYrr: [ 0.00 0.00 ]
+Key: VPHADDDrm: [ 0.00 0.00 ]
+Key: VPHADDDrr: [ 0.00 0.00 ]
+Key: VPHADDSWYrm: [ 0.00 0.00 ]
+Key: VPHADDSWYrr: [ 0.00 0.00 ]
+Key: VPHADDSWrm: [ 0.00 0.00 ]
+Key: VPHADDSWrr: [ 0.00 0.00 ]
+Key: VPHADDUBDrm: [ 0.00 0.00 ]
+Key: VPHADDUBDrr: [ 0.00 0.00 ]
+Key: VPHADDUBQrm: [ 0.00 0.00 ]
+Key: VPHADDUBQrr: [ 0.00 0.00 ]
+Key: VPHADDUBWrm: [ 0.00 0.00 ]
+Key: VPHADDUBWrr: [ 0.00 0.00 ]
+Key: VPHADDUDQrm: [ 0.00 0.00 ]
+Key: VPHADDUDQrr: [ 0.00 0.00 ]
+Key: VPHADDUWDrm: [ 0.00 0.00 ]
+Key: VPHADDUWDrr: [ 0.00 0.00 ]
+Key: VPHADDUWQrm: [ 0.00 0.00 ]
+Key: VPHADDUWQrr: [ 0.00 0.00 ]
+Key: VPHADDWDrm: [ 0.00 0.00 ]
+Key: VPHADDWDrr: [ 0.00 0.00 ]
+Key: VPHADDWQrm: [ 0.00 0.00 ]
+Key: VPHADDWQrr: [ 0.00 0.00 ]
+Key: VPHADDWYrm: [ 0.00 0.00 ]
+Key: VPHADDWYrr: [ 0.00 0.00 ]
+Key: VPHADDWrm: [ 0.00 0.00 ]
+Key: VPHADDWrr: [ 0.00 0.00 ]
+Key: VPHMINPOSUWrm: [ 0.00 0.00 ]
+Key: VPHMINPOSUWrr: [ 0.00 0.00 ]
+Key: VPHSUBBWrm: [ 0.00 0.00 ]
+Key: VPHSUBBWrr: [ 0.00 0.00 ]
+Key: VPHSUBDQrm: [ 0.00 0.00 ]
+Key: VPHSUBDQrr: [ 0.00 0.00 ]
+Key: VPHSUBDYrm: [ 0.00 0.00 ]
+Key: VPHSUBDYrr: [ 0.00 0.00 ]
+Key: VPHSUBDrm: [ 0.00 0.00 ]
+Key: VPHSUBDrr: [ 0.00 0.00 ]
+Key: VPHSUBSWYrm: [ 0.00 0.00 ]
+Key: VPHSUBSWYrr: [ 0.00 0.00 ]
+Key: VPHSUBSWrm: [ 0.00 0.00 ]
+Key: VPHSUBSWrr: [ 0.00 0.00 ]
+Key: VPHSUBWDrm: [ 0.00 0.00 ]
+Key: VPHSUBWDrr: [ 0.00 0.00 ]
+Key: VPHSUBWYrm: [ 0.00 0.00 ]
+Key: VPHSUBWYrr: [ 0.00 0.00 ]
+Key: VPHSUBWrm: [ 0.00 0.00 ]
+Key: VPHSUBWrr: [ 0.00 0.00 ]
+Key: VPINSRBZrmi: [ 0.00 0.00 ]
+Key: VPINSRBZrri: [ 0.00 0.00 ]
+Key: VPINSRBrmi: [ 0.00 0.00 ]
+Key: VPINSRBrri: [ 0.00 0.00 ]
+Key: VPINSRDZrmi: [ 0.00 0.00 ]
+Key: VPINSRDZrri: [ 0.00 0.00 ]
+Key: VPINSRDrmi: [ 0.00 0.00 ]
+Key: VPINSRDrri: [ 0.00 0.00 ]
+Key: VPINSRQZrmi: [ 0.00 0.00 ]
+Key: VPINSRQZrri: [ 0.00 0.00 ]
+Key: VPINSRQrmi: [ 0.00 0.00 ]
+Key: VPINSRQrri: [ 0.00 0.00 ]
+Key: VPINSRWZrmi: [ 0.00 0.00 ]
+Key: VPINSRWZrri: [ 0.00 0.00 ]
+Key: VPINSRWrmi: [ 0.00 0.00 ]
+Key: VPINSRWrri: [ 0.00 0.00 ]
+Key: VPLZCNTDZ: [ 0.00 0.00 ]
+Key: VPLZCNTDZrm: [ 0.00 0.00 ]
+Key: VPLZCNTDZrmb: [ 0.00 0.00 ]
+Key: VPLZCNTDZrmbk: [ 0.00 0.00 ]
+Key: VPLZCNTDZrmbkz: [ 0.00 0.00 ]
+Key: VPLZCNTDZrmk: [ 0.00 0.00 ]
+Key: VPLZCNTDZrmkz: [ 0.00 0.00 ]
+Key: VPLZCNTDZrr: [ 0.00 0.00 ]
+Key: VPLZCNTDZrrk: [ 0.00 0.00 ]
+Key: VPLZCNTDZrrkz: [ 0.00 0.00 ]
+Key: VPLZCNTQZ: [ 0.00 0.00 ]
+Key: VPLZCNTQZrm: [ 0.00 0.00 ]
+Key: VPLZCNTQZrmb: [ 0.00 0.00 ]
+Key: VPLZCNTQZrmbk: [ 0.00 0.00 ]
+Key: VPLZCNTQZrmbkz: [ 0.00 0.00 ]
+Key: VPLZCNTQZrmk: [ 0.00 0.00 ]
+Key: VPLZCNTQZrmkz: [ 0.00 0.00 ]
+Key: VPLZCNTQZrr: [ 0.00 0.00 ]
+Key: VPLZCNTQZrrk: [ 0.00 0.00 ]
+Key: VPLZCNTQZrrkz: [ 0.00 0.00 ]
+Key: VPMACSDDrm: [ 0.00 0.00 ]
+Key: VPMACSDDrr: [ 0.00 0.00 ]
+Key: VPMACSDQHrm: [ 0.00 0.00 ]
+Key: VPMACSDQHrr: [ 0.00 0.00 ]
+Key: VPMACSDQLrm: [ 0.00 0.00 ]
+Key: VPMACSDQLrr: [ 0.00 0.00 ]
+Key: VPMACSSDDrm: [ 0.00 0.00 ]
+Key: VPMACSSDDrr: [ 0.00 0.00 ]
+Key: VPMACSSDQHrm: [ 0.00 0.00 ]
+Key: VPMACSSDQHrr: [ 0.00 0.00 ]
+Key: VPMACSSDQLrm: [ 0.00 0.00 ]
+Key: VPMACSSDQLrr: [ 0.00 0.00 ]
+Key: VPMACSSWDrm: [ 0.00 0.00 ]
+Key: VPMACSSWDrr: [ 0.00 0.00 ]
+Key: VPMACSSWWrm: [ 0.00 0.00 ]
+Key: VPMACSSWWrr: [ 0.00 0.00 ]
+Key: VPMACSWDrm: [ 0.00 0.00 ]
+Key: VPMACSWDrr: [ 0.00 0.00 ]
+Key: VPMACSWWrm: [ 0.00 0.00 ]
+Key: VPMACSWWrr: [ 0.00 0.00 ]
+Key: VPMADCSSWDrm: [ 0.00 0.00 ]
+Key: VPMADCSSWDrr: [ 0.00 0.00 ]
+Key: VPMADCSWDrm: [ 0.00 0.00 ]
+Key: VPMADCSWDrr: [ 0.00 0.00 ]
+Key: VPMADD: [ 0.00 0.00 ]
+Key: VPMADDUBSWYrm: [ 0.00 0.00 ]
+Key: VPMADDUBSWYrr: [ 0.00 0.00 ]
+Key: VPMADDUBSWZ: [ 0.00 0.00 ]
+Key: VPMADDUBSWZrm: [ 0.00 0.00 ]
+Key: VPMADDUBSWZrmk: [ 0.00 0.00 ]
+Key: VPMADDUBSWZrmkz: [ 0.00 0.00 ]
+Key: VPMADDUBSWZrr: [ 0.00 0.00 ]
+Key: VPMADDUBSWZrrk: [ 0.00 0.00 ]
+Key: VPMADDUBSWZrrkz: [ 0.00 0.00 ]
+Key: VPMADDUBSWrm: [ 0.00 0.00 ]
+Key: VPMADDUBSWrr: [ 0.00 0.00 ]
+Key: VPMADDWDYrm: [ 0.00 0.00 ]
+Key: VPMADDWDYrr: [ 0.00 0.00 ]
+Key: VPMADDWDZ: [ 0.00 0.00 ]
+Key: VPMADDWDZrm: [ 0.00 0.00 ]
+Key: VPMADDWDZrmk: [ 0.00 0.00 ]
+Key: VPMADDWDZrmkz: [ 0.00 0.00 ]
+Key: VPMADDWDZrr: [ 0.00 0.00 ]
+Key: VPMADDWDZrrk: [ 0.00 0.00 ]
+Key: VPMADDWDZrrkz: [ 0.00 0.00 ]
+Key: VPMADDWDrm: [ 0.00 0.00 ]
+Key: VPMADDWDrr: [ 0.00 0.00 ]
+Key: VPMASKMOVDYmr: [ 0.00 0.00 ]
+Key: VPMASKMOVDYrm: [ 0.00 0.00 ]
+Key: VPMASKMOVDmr: [ 0.00 0.00 ]
+Key: VPMASKMOVDrm: [ 0.00 0.00 ]
+Key: VPMASKMOVQYmr: [ 0.00 0.00 ]
+Key: VPMASKMOVQYrm: [ 0.00 0.00 ]
+Key: VPMASKMOVQmr: [ 0.00 0.00 ]
+Key: VPMASKMOVQrm: [ 0.00 0.00 ]
+Key: VPMAXSBYrm: [ 0.00 0.00 ]
+Key: VPMAXSBYrr: [ 0.00 0.00 ]
+Key: VPMAXSBZ: [ 0.00 0.00 ]
+Key: VPMAXSBZrm: [ 0.00 0.00 ]
+Key: VPMAXSBZrmk: [ 0.00 0.00 ]
+Key: VPMAXSBZrmkz: [ 0.00 0.00 ]
+Key: VPMAXSBZrr: [ 0.00 0.00 ]
+Key: VPMAXSBZrrk: [ 0.00 0.00 ]
+Key: VPMAXSBZrrkz: [ 0.00 0.00 ]
+Key: VPMAXSBrm: [ 0.00 0.00 ]
+Key: VPMAXSBrr: [ 0.00 0.00 ]
+Key: VPMAXSDYrm: [ 0.00 0.00 ]
+Key: VPMAXSDYrr: [ 0.00 0.00 ]
+Key: VPMAXSDZ: [ 0.00 0.00 ]
+Key: VPMAXSDZrm: [ 0.00 0.00 ]
+Key: VPMAXSDZrmb: [ 0.00 0.00 ]
+Key: VPMAXSDZrmbk: [ 0.00 0.00 ]
+Key: VPMAXSDZrmbkz: [ 0.00 0.00 ]
+Key: VPMAXSDZrmk: [ 0.00 0.00 ]
+Key: VPMAXSDZrmkz: [ 0.00 0.00 ]
+Key: VPMAXSDZrr: [ 0.00 0.00 ]
+Key: VPMAXSDZrrk: [ 0.00 0.00 ]
+Key: VPMAXSDZrrkz: [ 0.00 0.00 ]
+Key: VPMAXSDrm: [ 0.00 0.00 ]
+Key: VPMAXSDrr: [ 0.00 0.00 ]
+Key: VPMAXSQZ: [ 0.00 0.00 ]
+Key: VPMAXSQZrm: [ 0.00 0.00 ]
+Key: VPMAXSQZrmb: [ 0.00 0.00 ]
+Key: VPMAXSQZrmbk: [ 0.00 0.00 ]
+Key: VPMAXSQZrmbkz: [ 0.00 0.00 ]
+Key: VPMAXSQZrmk: [ 0.00 0.00 ]
+Key: VPMAXSQZrmkz: [ 0.00 0.00 ]
+Key: VPMAXSQZrr: [ 0.00 0.00 ]
+Key: VPMAXSQZrrk: [ 0.00 0.00 ]
+Key: VPMAXSQZrrkz: [ 0.00 0.00 ]
+Key: VPMAXSWYrm: [ 0.00 0.00 ]
+Key: VPMAXSWYrr: [ 0.00 0.00 ]
+Key: VPMAXSWZ: [ 0.00 0.00 ]
+Key: VPMAXSWZrm: [ 0.00 0.00 ]
+Key: VPMAXSWZrmk: [ 0.00 0.00 ]
+Key: VPMAXSWZrmkz: [ 0.00 0.00 ]
+Key: VPMAXSWZrr: [ 0.00 0.00 ]
+Key: VPMAXSWZrrk: [ 0.00 0.00 ]
+Key: VPMAXSWZrrkz: [ 0.00 0.00 ]
+Key: VPMAXSWrm: [ 0.00 0.00 ]
+Key: VPMAXSWrr: [ 0.00 0.00 ]
+Key: VPMAXUBYrm: [ 0.00 0.00 ]
+Key: VPMAXUBYrr: [ 0.00 0.00 ]
+Key: VPMAXUBZ: [ 0.00 0.00 ]
+Key: VPMAXUBZrm: [ 0.00 0.00 ]
+Key: VPMAXUBZrmk: [ 0.00 0.00 ]
+Key: VPMAXUBZrmkz: [ 0.00 0.00 ]
+Key: VPMAXUBZrr: [ 0.00 0.00 ]
+Key: VPMAXUBZrrk: [ 0.00 0.00 ]
+Key: VPMAXUBZrrkz: [ 0.00 0.00 ]
+Key: VPMAXUBrm: [ 0.00 0.00 ]
+Key: VPMAXUBrr: [ 0.00 0.00 ]
+Key: VPMAXUDYrm: [ 0.00 0.00 ]
+Key: VPMAXUDYrr: [ 0.00 0.00 ]
+Key: VPMAXUDZ: [ 0.00 0.00 ]
+Key: VPMAXUDZrm: [ 0.00 0.00 ]
+Key: VPMAXUDZrmb: [ 0.00 0.00 ]
+Key: VPMAXUDZrmbk: [ 0.00 0.00 ]
+Key: VPMAXUDZrmbkz: [ 0.00 0.00 ]
+Key: VPMAXUDZrmk: [ 0.00 0.00 ]
+Key: VPMAXUDZrmkz: [ 0.00 0.00 ]
+Key: VPMAXUDZrr: [ 0.00 0.00 ]
+Key: VPMAXUDZrrk: [ 0.00 0.00 ]
+Key: VPMAXUDZrrkz: [ 0.00 0.00 ]
+Key: VPMAXUDrm: [ 0.00 0.00 ]
+Key: VPMAXUDrr: [ 0.00 0.00 ]
+Key: VPMAXUQZ: [ 0.00 0.00 ]
+Key: VPMAXUQZrm: [ 0.00 0.00 ]
+Key: VPMAXUQZrmb: [ 0.00 0.00 ]
+Key: VPMAXUQZrmbk: [ 0.00 0.00 ]
+Key: VPMAXUQZrmbkz: [ 0.00 0.00 ]
+Key: VPMAXUQZrmk: [ 0.00 0.00 ]
+Key: VPMAXUQZrmkz: [ 0.00 0.00 ]
+Key: VPMAXUQZrr: [ 0.00 0.00 ]
+Key: VPMAXUQZrrk: [ 0.00 0.00 ]
+Key: VPMAXUQZrrkz: [ 0.00 0.00 ]
+Key: VPMAXUWYrm: [ 0.00 0.00 ]
+Key: VPMAXUWYrr: [ 0.00 0.00 ]
+Key: VPMAXUWZ: [ 0.00 0.00 ]
+Key: VPMAXUWZrm: [ 0.00 0.00 ]
+Key: VPMAXUWZrmk: [ 0.00 0.00 ]
+Key: VPMAXUWZrmkz: [ 0.00 0.00 ]
+Key: VPMAXUWZrr: [ 0.00 0.00 ]
+Key: VPMAXUWZrrk: [ 0.00 0.00 ]
+Key: VPMAXUWZrrkz: [ 0.00 0.00 ]
+Key: VPMAXUWrm: [ 0.00 0.00 ]
+Key: VPMAXUWrr: [ 0.00 0.00 ]
+Key: VPMINSBYrm: [ 0.00 0.00 ]
+Key: VPMINSBYrr: [ 0.00 0.00 ]
+Key: VPMINSBZ: [ 0.00 0.00 ]
+Key: VPMINSBZrm: [ 0.00 0.00 ]
+Key: VPMINSBZrmk: [ 0.00 0.00 ]
+Key: VPMINSBZrmkz: [ 0.00 0.00 ]
+Key: VPMINSBZrr: [ 0.00 0.00 ]
+Key: VPMINSBZrrk: [ 0.00 0.00 ]
+Key: VPMINSBZrrkz: [ 0.00 0.00 ]
+Key: VPMINSBrm: [ 0.00 0.00 ]
+Key: VPMINSBrr: [ 0.00 0.00 ]
+Key: VPMINSDYrm: [ 0.00 0.00 ]
+Key: VPMINSDYrr: [ 0.00 0.00 ]
+Key: VPMINSDZ: [ 0.00 0.00 ]
+Key: VPMINSDZrm: [ 0.00 0.00 ]
+Key: VPMINSDZrmb: [ 0.00 0.00 ]
+Key: VPMINSDZrmbk: [ 0.00 0.00 ]
+Key: VPMINSDZrmbkz: [ 0.00 0.00 ]
+Key: VPMINSDZrmk: [ 0.00 0.00 ]
+Key: VPMINSDZrmkz: [ 0.00 0.00 ]
+Key: VPMINSDZrr: [ 0.00 0.00 ]
+Key: VPMINSDZrrk: [ 0.00 0.00 ]
+Key: VPMINSDZrrkz: [ 0.00 0.00 ]
+Key: VPMINSDrm: [ 0.00 0.00 ]
+Key: VPMINSDrr: [ 0.00 0.00 ]
+Key: VPMINSQZ: [ 0.00 0.00 ]
+Key: VPMINSQZrm: [ 0.00 0.00 ]
+Key: VPMINSQZrmb: [ 0.00 0.00 ]
+Key: VPMINSQZrmbk: [ 0.00 0.00 ]
+Key: VPMINSQZrmbkz: [ 0.00 0.00 ]
+Key: VPMINSQZrmk: [ 0.00 0.00 ]
+Key: VPMINSQZrmkz: [ 0.00 0.00 ]
+Key: VPMINSQZrr: [ 0.00 0.00 ]
+Key: VPMINSQZrrk: [ 0.00 0.00 ]
+Key: VPMINSQZrrkz: [ 0.00 0.00 ]
+Key: VPMINSWYrm: [ 0.00 0.00 ]
+Key: VPMINSWYrr: [ 0.00 0.00 ]
+Key: VPMINSWZ: [ 0.00 0.00 ]
+Key: VPMINSWZrm: [ 0.00 0.00 ]
+Key: VPMINSWZrmk: [ 0.00 0.00 ]
+Key: VPMINSWZrmkz: [ 0.00 0.00 ]
+Key: VPMINSWZrr: [ 0.00 0.00 ]
+Key: VPMINSWZrrk: [ 0.00 0.00 ]
+Key: VPMINSWZrrkz: [ 0.00 0.00 ]
+Key: VPMINSWrm: [ 0.00 0.00 ]
+Key: VPMINSWrr: [ 0.00 0.00 ]
+Key: VPMINUBYrm: [ 0.00 0.00 ]
+Key: VPMINUBYrr: [ 0.00 0.00 ]
+Key: VPMINUBZ: [ 0.00 0.00 ]
+Key: VPMINUBZrm: [ 0.00 0.00 ]
+Key: VPMINUBZrmk: [ 0.00 0.00 ]
+Key: VPMINUBZrmkz: [ 0.00 0.00 ]
+Key: VPMINUBZrr: [ 0.00 0.00 ]
+Key: VPMINUBZrrk: [ 0.00 0.00 ]
+Key: VPMINUBZrrkz: [ 0.00 0.00 ]
+Key: VPMINUBrm: [ 0.00 0.00 ]
+Key: VPMINUBrr: [ 0.00 0.00 ]
+Key: VPMINUDYrm: [ 0.00 0.00 ]
+Key: VPMINUDYrr: [ 0.00 0.00 ]
+Key: VPMINUDZ: [ 0.00 0.00 ]
+Key: VPMINUDZrm: [ 0.00 0.00 ]
+Key: VPMINUDZrmb: [ 0.00 0.00 ]
+Key: VPMINUDZrmbk: [ 0.00 0.00 ]
+Key: VPMINUDZrmbkz: [ 0.00 0.00 ]
+Key: VPMINUDZrmk: [ 0.00 0.00 ]
+Key: VPMINUDZrmkz: [ 0.00 0.00 ]
+Key: VPMINUDZrr: [ 0.00 0.00 ]
+Key: VPMINUDZrrk: [ 0.00 0.00 ]
+Key: VPMINUDZrrkz: [ 0.00 0.00 ]
+Key: VPMINUDrm: [ 0.00 0.00 ]
+Key: VPMINUDrr: [ 0.00 0.00 ]
+Key: VPMINUQZ: [ 0.00 0.00 ]
+Key: VPMINUQZrm: [ 0.00 0.00 ]
+Key: VPMINUQZrmb: [ 0.00 0.00 ]
+Key: VPMINUQZrmbk: [ 0.00 0.00 ]
+Key: VPMINUQZrmbkz: [ 0.00 0.00 ]
+Key: VPMINUQZrmk: [ 0.00 0.00 ]
+Key: VPMINUQZrmkz: [ 0.00 0.00 ]
+Key: VPMINUQZrr: [ 0.00 0.00 ]
+Key: VPMINUQZrrk: [ 0.00 0.00 ]
+Key: VPMINUQZrrkz: [ 0.00 0.00 ]
+Key: VPMINUWYrm: [ 0.00 0.00 ]
+Key: VPMINUWYrr: [ 0.00 0.00 ]
+Key: VPMINUWZ: [ 0.00 0.00 ]
+Key: VPMINUWZrm: [ 0.00 0.00 ]
+Key: VPMINUWZrmk: [ 0.00 0.00 ]
+Key: VPMINUWZrmkz: [ 0.00 0.00 ]
+Key: VPMINUWZrr: [ 0.00 0.00 ]
+Key: VPMINUWZrrk: [ 0.00 0.00 ]
+Key: VPMINUWZrrkz: [ 0.00 0.00 ]
+Key: VPMINUWrm: [ 0.00 0.00 ]
+Key: VPMINUWrr: [ 0.00 0.00 ]
+Key: VPMOVB: [ 0.00 0.00 ]
+Key: VPMOVD: [ 0.00 0.00 ]
+Key: VPMOVDBZ: [ 0.00 0.00 ]
+Key: VPMOVDBZmr: [ 0.00 0.00 ]
+Key: VPMOVDBZmrk: [ 0.00 0.00 ]
+Key: VPMOVDBZrr: [ 0.00 0.00 ]
+Key: VPMOVDBZrrk: [ 0.00 0.00 ]
+Key: VPMOVDBZrrkz: [ 0.00 0.00 ]
+Key: VPMOVDWZ: [ 0.00 0.00 ]
+Key: VPMOVDWZmr: [ 0.00 0.00 ]
+Key: VPMOVDWZmrk: [ 0.00 0.00 ]
+Key: VPMOVDWZrr: [ 0.00 0.00 ]
+Key: VPMOVDWZrrk: [ 0.00 0.00 ]
+Key: VPMOVDWZrrkz: [ 0.00 0.00 ]
+Key: VPMOVM: [ 0.00 0.00 ]
+Key: VPMOVMSKBYrr: [ 0.00 0.00 ]
+Key: VPMOVMSKBrr: [ 0.00 0.00 ]
+Key: VPMOVQ: [ 0.00 0.00 ]
+Key: VPMOVQBZ: [ 0.00 0.00 ]
+Key: VPMOVQBZmr: [ 0.00 0.00 ]
+Key: VPMOVQBZmrk: [ 0.00 0.00 ]
+Key: VPMOVQBZrr: [ 0.00 0.00 ]
+Key: VPMOVQBZrrk: [ 0.00 0.00 ]
+Key: VPMOVQBZrrkz: [ 0.00 0.00 ]
+Key: VPMOVQDZ: [ 0.00 0.00 ]
+Key: VPMOVQDZmr: [ 0.00 0.00 ]
+Key: VPMOVQDZmrk: [ 0.00 0.00 ]
+Key: VPMOVQDZrr: [ 0.00 0.00 ]
+Key: VPMOVQDZrrk: [ 0.00 0.00 ]
+Key: VPMOVQDZrrkz: [ 0.00 0.00 ]
+Key: VPMOVQWZ: [ 0.00 0.00 ]
+Key: VPMOVQWZmr: [ 0.00 0.00 ]
+Key: VPMOVQWZmrk: [ 0.00 0.00 ]
+Key: VPMOVQWZrr: [ 0.00 0.00 ]
+Key: VPMOVQWZrrk: [ 0.00 0.00 ]
+Key: VPMOVQWZrrkz: [ 0.00 0.00 ]
+Key: VPMOVSDBZ: [ 0.00 0.00 ]
+Key: VPMOVSDBZmr: [ 0.00 0.00 ]
+Key: VPMOVSDBZmrk: [ 0.00 0.00 ]
+Key: VPMOVSDBZrr: [ 0.00 0.00 ]
+Key: VPMOVSDBZrrk: [ 0.00 0.00 ]
+Key: VPMOVSDBZrrkz: [ 0.00 0.00 ]
+Key: VPMOVSDWZ: [ 0.00 0.00 ]
+Key: VPMOVSDWZmr: [ 0.00 0.00 ]
+Key: VPMOVSDWZmrk: [ 0.00 0.00 ]
+Key: VPMOVSDWZrr: [ 0.00 0.00 ]
+Key: VPMOVSDWZrrk: [ 0.00 0.00 ]
+Key: VPMOVSDWZrrkz: [ 0.00 0.00 ]
+Key: VPMOVSQBZ: [ 0.00 0.00 ]
+Key: VPMOVSQBZmr: [ 0.00 0.00 ]
+Key: VPMOVSQBZmrk: [ 0.00 0.00 ]
+Key: VPMOVSQBZrr: [ 0.00 0.00 ]
+Key: VPMOVSQBZrrk: [ 0.00 0.00 ]
+Key: VPMOVSQBZrrkz: [ 0.00 0.00 ]
+Key: VPMOVSQDZ: [ 0.00 0.00 ]
+Key: VPMOVSQDZmr: [ 0.00 0.00 ]
+Key: VPMOVSQDZmrk: [ 0.00 0.00 ]
+Key: VPMOVSQDZrr: [ 0.00 0.00 ]
+Key: VPMOVSQDZrrk: [ 0.00 0.00 ]
+Key: VPMOVSQDZrrkz: [ 0.00 0.00 ]
+Key: VPMOVSQWZ: [ 0.00 0.00 ]
+Key: VPMOVSQWZmr: [ 0.00 0.00 ]
+Key: VPMOVSQWZmrk: [ 0.00 0.00 ]
+Key: VPMOVSQWZrr: [ 0.00 0.00 ]
+Key: VPMOVSQWZrrk: [ 0.00 0.00 ]
+Key: VPMOVSQWZrrkz: [ 0.00 0.00 ]
+Key: VPMOVSWBZ: [ 0.00 0.00 ]
+Key: VPMOVSWBZmr: [ 0.00 0.00 ]
+Key: VPMOVSWBZmrk: [ 0.00 0.00 ]
+Key: VPMOVSWBZrr: [ 0.00 0.00 ]
+Key: VPMOVSWBZrrk: [ 0.00 0.00 ]
+Key: VPMOVSWBZrrkz: [ 0.00 0.00 ]
+Key: VPMOVSXBDYrm: [ 0.00 0.00 ]
+Key: VPMOVSXBDYrr: [ 0.00 0.00 ]
+Key: VPMOVSXBDZ: [ 0.00 0.00 ]
+Key: VPMOVSXBDZrm: [ 0.00 0.00 ]
+Key: VPMOVSXBDZrmk: [ 0.00 0.00 ]
+Key: VPMOVSXBDZrmkz: [ 0.00 0.00 ]
+Key: VPMOVSXBDZrr: [ 0.00 0.00 ]
+Key: VPMOVSXBDZrrk: [ 0.00 0.00 ]
+Key: VPMOVSXBDZrrkz: [ 0.00 0.00 ]
+Key: VPMOVSXBDrm: [ 0.00 0.00 ]
+Key: VPMOVSXBDrr: [ 0.00 0.00 ]
+Key: VPMOVSXBQYrm: [ 0.00 0.00 ]
+Key: VPMOVSXBQYrr: [ 0.00 0.00 ]
+Key: VPMOVSXBQZ: [ 0.00 0.00 ]
+Key: VPMOVSXBQZrm: [ 0.00 0.00 ]
+Key: VPMOVSXBQZrmk: [ 0.00 0.00 ]
+Key: VPMOVSXBQZrmkz: [ 0.00 0.00 ]
+Key: VPMOVSXBQZrr: [ 0.00 0.00 ]
+Key: VPMOVSXBQZrrk: [ 0.00 0.00 ]
+Key: VPMOVSXBQZrrkz: [ 0.00 0.00 ]
+Key: VPMOVSXBQrm: [ 0.00 0.00 ]
+Key: VPMOVSXBQrr: [ 0.00 0.00 ]
+Key: VPMOVSXBWYrm: [ 0.00 0.00 ]
+Key: VPMOVSXBWYrr: [ 0.00 0.00 ]
+Key: VPMOVSXBWZ: [ 0.00 0.00 ]
+Key: VPMOVSXBWZrm: [ 0.00 0.00 ]
+Key: VPMOVSXBWZrmk: [ 0.00 0.00 ]
+Key: VPMOVSXBWZrmkz: [ 0.00 0.00 ]
+Key: VPMOVSXBWZrr: [ 0.00 0.00 ]
+Key: VPMOVSXBWZrrk: [ 0.00 0.00 ]
+Key: VPMOVSXBWZrrkz: [ 0.00 0.00 ]
+Key: VPMOVSXBWrm: [ 0.00 0.00 ]
+Key: VPMOVSXBWrr: [ 0.00 0.00 ]
+Key: VPMOVSXDQYrm: [ 0.00 0.00 ]
+Key: VPMOVSXDQYrr: [ 0.00 0.00 ]
+Key: VPMOVSXDQZ: [ 0.00 0.00 ]
+Key: VPMOVSXDQZrm: [ 0.00 0.00 ]
+Key: VPMOVSXDQZrmk: [ 0.00 0.00 ]
+Key: VPMOVSXDQZrmkz: [ 0.00 0.00 ]
+Key: VPMOVSXDQZrr: [ 0.00 0.00 ]
+Key: VPMOVSXDQZrrk: [ 0.00 0.00 ]
+Key: VPMOVSXDQZrrkz: [ 0.00 0.00 ]
+Key: VPMOVSXDQrm: [ 0.00 0.00 ]
+Key: VPMOVSXDQrr: [ 0.00 0.00 ]
+Key: VPMOVSXWDYrm: [ 0.00 0.00 ]
+Key: VPMOVSXWDYrr: [ 0.00 0.00 ]
+Key: VPMOVSXWDZ: [ 0.00 0.00 ]
+Key: VPMOVSXWDZrm: [ 0.00 0.00 ]
+Key: VPMOVSXWDZrmk: [ 0.00 0.00 ]
+Key: VPMOVSXWDZrmkz: [ 0.00 0.00 ]
+Key: VPMOVSXWDZrr: [ 0.00 0.00 ]
+Key: VPMOVSXWDZrrk: [ 0.00 0.00 ]
+Key: VPMOVSXWDZrrkz: [ 0.00 0.00 ]
+Key: VPMOVSXWDrm: [ 0.00 0.00 ]
+Key: VPMOVSXWDrr: [ 0.00 0.00 ]
+Key: VPMOVSXWQYrm: [ 0.00 0.00 ]
+Key: VPMOVSXWQYrr: [ 0.00 0.00 ]
+Key: VPMOVSXWQZ: [ 0.00 0.00 ]
+Key: VPMOVSXWQZrm: [ 0.00 0.00 ]
+Key: VPMOVSXWQZrmk: [ 0.00 0.00 ]
+Key: VPMOVSXWQZrmkz: [ 0.00 0.00 ]
+Key: VPMOVSXWQZrr: [ 0.00 0.00 ]
+Key: VPMOVSXWQZrrk: [ 0.00 0.00 ]
+Key: VPMOVSXWQZrrkz: [ 0.00 0.00 ]
+Key: VPMOVSXWQrm: [ 0.00 0.00 ]
+Key: VPMOVSXWQrr: [ 0.00 0.00 ]
+Key: VPMOVUSDBZ: [ 0.00 0.00 ]
+Key: VPMOVUSDBZmr: [ 0.00 0.00 ]
+Key: VPMOVUSDBZmrk: [ 0.00 0.00 ]
+Key: VPMOVUSDBZrr: [ 0.00 0.00 ]
+Key: VPMOVUSDBZrrk: [ 0.00 0.00 ]
+Key: VPMOVUSDBZrrkz: [ 0.00 0.00 ]
+Key: VPMOVUSDWZ: [ 0.00 0.00 ]
+Key: VPMOVUSDWZmr: [ 0.00 0.00 ]
+Key: VPMOVUSDWZmrk: [ 0.00 0.00 ]
+Key: VPMOVUSDWZrr: [ 0.00 0.00 ]
+Key: VPMOVUSDWZrrk: [ 0.00 0.00 ]
+Key: VPMOVUSDWZrrkz: [ 0.00 0.00 ]
+Key: VPMOVUSQBZ: [ 0.00 0.00 ]
+Key: VPMOVUSQBZmr: [ 0.00 0.00 ]
+Key: VPMOVUSQBZmrk: [ 0.00 0.00 ]
+Key: VPMOVUSQBZrr: [ 0.00 0.00 ]
+Key: VPMOVUSQBZrrk: [ 0.00 0.00 ]
+Key: VPMOVUSQBZrrkz: [ 0.00 0.00 ]
+Key: VPMOVUSQDZ: [ 0.00 0.00 ]
+Key: VPMOVUSQDZmr: [ 0.00 0.00 ]
+Key: VPMOVUSQDZmrk: [ 0.00 0.00 ]
+Key: VPMOVUSQDZrr: [ 0.00 0.00 ]
+Key: VPMOVUSQDZrrk: [ 0.00 0.00 ]
+Key: VPMOVUSQDZrrkz: [ 0.00 0.00 ]
+Key: VPMOVUSQWZ: [ 0.00 0.00 ]
+Key: VPMOVUSQWZmr: [ 0.00 0.00 ]
+Key: VPMOVUSQWZmrk: [ 0.00 0.00 ]
+Key: VPMOVUSQWZrr: [ 0.00 0.00 ]
+Key: VPMOVUSQWZrrk: [ 0.00 0.00 ]
+Key: VPMOVUSQWZrrkz: [ 0.00 0.00 ]
+Key: VPMOVUSWBZ: [ 0.00 0.00 ]
+Key: VPMOVUSWBZmr: [ 0.00 0.00 ]
+Key: VPMOVUSWBZmrk: [ 0.00 0.00 ]
+Key: VPMOVUSWBZrr: [ 0.00 0.00 ]
+Key: VPMOVUSWBZrrk: [ 0.00 0.00 ]
+Key: VPMOVUSWBZrrkz: [ 0.00 0.00 ]
+Key: VPMOVW: [ 0.00 0.00 ]
+Key: VPMOVWBZ: [ 0.00 0.00 ]
+Key: VPMOVWBZmr: [ 0.00 0.00 ]
+Key: VPMOVWBZmrk: [ 0.00 0.00 ]
+Key: VPMOVWBZrr: [ 0.00 0.00 ]
+Key: VPMOVWBZrrk: [ 0.00 0.00 ]
+Key: VPMOVWBZrrkz: [ 0.00 0.00 ]
+Key: VPMOVZXBDYrm: [ 0.00 0.00 ]
+Key: VPMOVZXBDYrr: [ 0.00 0.00 ]
+Key: VPMOVZXBDZ: [ 0.00 0.00 ]
+Key: VPMOVZXBDZrm: [ 0.00 0.00 ]
+Key: VPMOVZXBDZrmk: [ 0.00 0.00 ]
+Key: VPMOVZXBDZrmkz: [ 0.00 0.00 ]
+Key: VPMOVZXBDZrr: [ 0.00 0.00 ]
+Key: VPMOVZXBDZrrk: [ 0.00 0.00 ]
+Key: VPMOVZXBDZrrkz: [ 0.00 0.00 ]
+Key: VPMOVZXBDrm: [ 0.00 0.00 ]
+Key: VPMOVZXBDrr: [ 0.00 0.00 ]
+Key: VPMOVZXBQYrm: [ 0.00 0.00 ]
+Key: VPMOVZXBQYrr: [ 0.00 0.00 ]
+Key: VPMOVZXBQZ: [ 0.00 0.00 ]
+Key: VPMOVZXBQZrm: [ 0.00 0.00 ]
+Key: VPMOVZXBQZrmk: [ 0.00 0.00 ]
+Key: VPMOVZXBQZrmkz: [ 0.00 0.00 ]
+Key: VPMOVZXBQZrr: [ 0.00 0.00 ]
+Key: VPMOVZXBQZrrk: [ 0.00 0.00 ]
+Key: VPMOVZXBQZrrkz: [ 0.00 0.00 ]
+Key: VPMOVZXBQrm: [ 0.00 0.00 ]
+Key: VPMOVZXBQrr: [ 0.00 0.00 ]
+Key: VPMOVZXBWYrm: [ 0.00 0.00 ]
+Key: VPMOVZXBWYrr: [ 0.00 0.00 ]
+Key: VPMOVZXBWZ: [ 0.00 0.00 ]
+Key: VPMOVZXBWZrm: [ 0.00 0.00 ]
+Key: VPMOVZXBWZrmk: [ 0.00 0.00 ]
+Key: VPMOVZXBWZrmkz: [ 0.00 0.00 ]
+Key: VPMOVZXBWZrr: [ 0.00 0.00 ]
+Key: VPMOVZXBWZrrk: [ 0.00 0.00 ]
+Key: VPMOVZXBWZrrkz: [ 0.00 0.00 ]
+Key: VPMOVZXBWrm: [ 0.00 0.00 ]
+Key: VPMOVZXBWrr: [ 0.00 0.00 ]
+Key: VPMOVZXDQYrm: [ 0.00 0.00 ]
+Key: VPMOVZXDQYrr: [ 0.00 0.00 ]
+Key: VPMOVZXDQZ: [ 0.00 0.00 ]
+Key: VPMOVZXDQZrm: [ 0.00 0.00 ]
+Key: VPMOVZXDQZrmk: [ 0.00 0.00 ]
+Key: VPMOVZXDQZrmkz: [ 0.00 0.00 ]
+Key: VPMOVZXDQZrr: [ 0.00 0.00 ]
+Key: VPMOVZXDQZrrk: [ 0.00 0.00 ]
+Key: VPMOVZXDQZrrkz: [ 0.00 0.00 ]
+Key: VPMOVZXDQrm: [ 0.00 0.00 ]
+Key: VPMOVZXDQrr: [ 0.00 0.00 ]
+Key: VPMOVZXWDYrm: [ 0.00 0.00 ]
+Key: VPMOVZXWDYrr: [ 0.00 0.00 ]
+Key: VPMOVZXWDZ: [ 0.00 0.00 ]
+Key: VPMOVZXWDZrm: [ 0.00 0.00 ]
+Key: VPMOVZXWDZrmk: [ 0.00 0.00 ]
+Key: VPMOVZXWDZrmkz: [ 0.00 0.00 ]
+Key: VPMOVZXWDZrr: [ 0.00 0.00 ]
+Key: VPMOVZXWDZrrk: [ 0.00 0.00 ]
+Key: VPMOVZXWDZrrkz: [ 0.00 0.00 ]
+Key: VPMOVZXWDrm: [ 0.00 0.00 ]
+Key: VPMOVZXWDrr: [ 0.00 0.00 ]
+Key: VPMOVZXWQYrm: [ 0.00 0.00 ]
+Key: VPMOVZXWQYrr: [ 0.00 0.00 ]
+Key: VPMOVZXWQZ: [ 0.00 0.00 ]
+Key: VPMOVZXWQZrm: [ 0.00 0.00 ]
+Key: VPMOVZXWQZrmk: [ 0.00 0.00 ]
+Key: VPMOVZXWQZrmkz: [ 0.00 0.00 ]
+Key: VPMOVZXWQZrr: [ 0.00 0.00 ]
+Key: VPMOVZXWQZrrk: [ 0.00 0.00 ]
+Key: VPMOVZXWQZrrkz: [ 0.00 0.00 ]
+Key: VPMOVZXWQrm: [ 0.00 0.00 ]
+Key: VPMOVZXWQrr: [ 0.00 0.00 ]
+Key: VPMULDQYrm: [ 0.00 0.00 ]
+Key: VPMULDQYrr: [ 0.00 0.00 ]
+Key: VPMULDQZ: [ 0.00 0.00 ]
+Key: VPMULDQZrm: [ 0.00 0.00 ]
+Key: VPMULDQZrmb: [ 0.00 0.00 ]
+Key: VPMULDQZrmbk: [ 0.00 0.00 ]
+Key: VPMULDQZrmbkz: [ 0.00 0.00 ]
+Key: VPMULDQZrmk: [ 0.00 0.00 ]
+Key: VPMULDQZrmkz: [ 0.00 0.00 ]
+Key: VPMULDQZrr: [ 0.00 0.00 ]
+Key: VPMULDQZrrk: [ 0.00 0.00 ]
+Key: VPMULDQZrrkz: [ 0.00 0.00 ]
+Key: VPMULDQrm: [ 0.00 0.00 ]
+Key: VPMULDQrr: [ 0.00 0.00 ]
+Key: VPMULHRSWYrm: [ 0.00 0.00 ]
+Key: VPMULHRSWYrr: [ 0.00 0.00 ]
+Key: VPMULHRSWZ: [ 0.00 0.00 ]
+Key: VPMULHRSWZrm: [ 0.00 0.00 ]
+Key: VPMULHRSWZrmk: [ 0.00 0.00 ]
+Key: VPMULHRSWZrmkz: [ 0.00 0.00 ]
+Key: VPMULHRSWZrr: [ 0.00 0.00 ]
+Key: VPMULHRSWZrrk: [ 0.00 0.00 ]
+Key: VPMULHRSWZrrkz: [ 0.00 0.00 ]
+Key: VPMULHRSWrm: [ 0.00 0.00 ]
+Key: VPMULHRSWrr: [ 0.00 0.00 ]
+Key: VPMULHUWYrm: [ 0.00 0.00 ]
+Key: VPMULHUWYrr: [ 0.00 0.00 ]
+Key: VPMULHUWZ: [ 0.00 0.00 ]
+Key: VPMULHUWZrm: [ 0.00 0.00 ]
+Key: VPMULHUWZrmk: [ 0.00 0.00 ]
+Key: VPMULHUWZrmkz: [ 0.00 0.00 ]
+Key: VPMULHUWZrr: [ 0.00 0.00 ]
+Key: VPMULHUWZrrk: [ 0.00 0.00 ]
+Key: VPMULHUWZrrkz: [ 0.00 0.00 ]
+Key: VPMULHUWrm: [ 0.00 0.00 ]
+Key: VPMULHUWrr: [ 0.00 0.00 ]
+Key: VPMULHWYrm: [ 0.00 0.00 ]
+Key: VPMULHWYrr: [ 0.00 0.00 ]
+Key: VPMULHWZ: [ 0.00 0.00 ]
+Key: VPMULHWZrm: [ 0.00 0.00 ]
+Key: VPMULHWZrmk: [ 0.00 0.00 ]
+Key: VPMULHWZrmkz: [ 0.00 0.00 ]
+Key: VPMULHWZrr: [ 0.00 0.00 ]
+Key: VPMULHWZrrk: [ 0.00 0.00 ]
+Key: VPMULHWZrrkz: [ 0.00 0.00 ]
+Key: VPMULHWrm: [ 0.00 0.00 ]
+Key: VPMULHWrr: [ 0.00 0.00 ]
+Key: VPMULLDYrm: [ 0.00 0.00 ]
+Key: VPMULLDYrr: [ 0.00 0.00 ]
+Key: VPMULLDZ: [ 0.00 0.00 ]
+Key: VPMULLDZrm: [ 0.00 0.00 ]
+Key: VPMULLDZrmb: [ 0.00 0.00 ]
+Key: VPMULLDZrmbk: [ 0.00 0.00 ]
+Key: VPMULLDZrmbkz: [ 0.00 0.00 ]
+Key: VPMULLDZrmk: [ 0.00 0.00 ]
+Key: VPMULLDZrmkz: [ 0.00 0.00 ]
+Key: VPMULLDZrr: [ 0.00 0.00 ]
+Key: VPMULLDZrrk: [ 0.00 0.00 ]
+Key: VPMULLDZrrkz: [ 0.00 0.00 ]
+Key: VPMULLDrm: [ 0.00 0.00 ]
+Key: VPMULLDrr: [ 0.00 0.00 ]
+Key: VPMULLQZ: [ 0.00 0.00 ]
+Key: VPMULLQZrm: [ 0.00 0.00 ]
+Key: VPMULLQZrmb: [ 0.00 0.00 ]
+Key: VPMULLQZrmbk: [ 0.00 0.00 ]
+Key: VPMULLQZrmbkz: [ 0.00 0.00 ]
+Key: VPMULLQZrmk: [ 0.00 0.00 ]
+Key: VPMULLQZrmkz: [ 0.00 0.00 ]
+Key: VPMULLQZrr: [ 0.00 0.00 ]
+Key: VPMULLQZrrk: [ 0.00 0.00 ]
+Key: VPMULLQZrrkz: [ 0.00 0.00 ]
+Key: VPMULLWYrm: [ 0.00 0.00 ]
+Key: VPMULLWYrr: [ 0.00 0.00 ]
+Key: VPMULLWZ: [ 0.00 0.00 ]
+Key: VPMULLWZrm: [ 0.00 0.00 ]
+Key: VPMULLWZrmk: [ 0.00 0.00 ]
+Key: VPMULLWZrmkz: [ 0.00 0.00 ]
+Key: VPMULLWZrr: [ 0.00 0.00 ]
+Key: VPMULLWZrrk: [ 0.00 0.00 ]
+Key: VPMULLWZrrkz: [ 0.00 0.00 ]
+Key: VPMULLWrm: [ 0.00 0.00 ]
+Key: VPMULLWrr: [ 0.00 0.00 ]
+Key: VPMULTISHIFTQBZ: [ 0.00 0.00 ]
+Key: VPMULTISHIFTQBZrm: [ 0.00 0.00 ]
+Key: VPMULTISHIFTQBZrmb: [ 0.00 0.00 ]
+Key: VPMULTISHIFTQBZrmbk: [ 0.00 0.00 ]
+Key: VPMULTISHIFTQBZrmbkz: [ 0.00 0.00 ]
+Key: VPMULTISHIFTQBZrmk: [ 0.00 0.00 ]
+Key: VPMULTISHIFTQBZrmkz: [ 0.00 0.00 ]
+Key: VPMULTISHIFTQBZrr: [ 0.00 0.00 ]
+Key: VPMULTISHIFTQBZrrk: [ 0.00 0.00 ]
+Key: VPMULTISHIFTQBZrrkz: [ 0.00 0.00 ]
+Key: VPMULUDQYrm: [ 0.00 0.00 ]
+Key: VPMULUDQYrr: [ 0.00 0.00 ]
+Key: VPMULUDQZ: [ 0.00 0.00 ]
+Key: VPMULUDQZrm: [ 0.00 0.00 ]
+Key: VPMULUDQZrmb: [ 0.00 0.00 ]
+Key: VPMULUDQZrmbk: [ 0.00 0.00 ]
+Key: VPMULUDQZrmbkz: [ 0.00 0.00 ]
+Key: VPMULUDQZrmk: [ 0.00 0.00 ]
+Key: VPMULUDQZrmkz: [ 0.00 0.00 ]
+Key: VPMULUDQZrr: [ 0.00 0.00 ]
+Key: VPMULUDQZrrk: [ 0.00 0.00 ]
+Key: VPMULUDQZrrkz: [ 0.00 0.00 ]
+Key: VPMULUDQrm: [ 0.00 0.00 ]
+Key: VPMULUDQrr: [ 0.00 0.00 ]
+Key: VPOPCNTBZ: [ 0.00 0.00 ]
+Key: VPOPCNTBZrm: [ 0.00 0.00 ]
+Key: VPOPCNTBZrmk: [ 0.00 0.00 ]
+Key: VPOPCNTBZrmkz: [ 0.00 0.00 ]
+Key: VPOPCNTBZrr: [ 0.00 0.00 ]
+Key: VPOPCNTBZrrk: [ 0.00 0.00 ]
+Key: VPOPCNTBZrrkz: [ 0.00 0.00 ]
+Key: VPOPCNTDZ: [ 0.00 0.00 ]
+Key: VPOPCNTDZrm: [ 0.00 0.00 ]
+Key: VPOPCNTDZrmb: [ 0.00 0.00 ]
+Key: VPOPCNTDZrmbk: [ 0.00 0.00 ]
+Key: VPOPCNTDZrmbkz: [ 0.00 0.00 ]
+Key: VPOPCNTDZrmk: [ 0.00 0.00 ]
+Key: VPOPCNTDZrmkz: [ 0.00 0.00 ]
+Key: VPOPCNTDZrr: [ 0.00 0.00 ]
+Key: VPOPCNTDZrrk: [ 0.00 0.00 ]
+Key: VPOPCNTDZrrkz: [ 0.00 0.00 ]
+Key: VPOPCNTQZ: [ 0.00 0.00 ]
+Key: VPOPCNTQZrm: [ 0.00 0.00 ]
+Key: VPOPCNTQZrmb: [ 0.00 0.00 ]
+Key: VPOPCNTQZrmbk: [ 0.00 0.00 ]
+Key: VPOPCNTQZrmbkz: [ 0.00 0.00 ]
+Key: VPOPCNTQZrmk: [ 0.00 0.00 ]
+Key: VPOPCNTQZrmkz: [ 0.00 0.00 ]
+Key: VPOPCNTQZrr: [ 0.00 0.00 ]
+Key: VPOPCNTQZrrk: [ 0.00 0.00 ]
+Key: VPOPCNTQZrrkz: [ 0.00 0.00 ]
+Key: VPOPCNTWZ: [ 0.00 0.00 ]
+Key: VPOPCNTWZrm: [ 0.00 0.00 ]
+Key: VPOPCNTWZrmk: [ 0.00 0.00 ]
+Key: VPOPCNTWZrmkz: [ 0.00 0.00 ]
+Key: VPOPCNTWZrr: [ 0.00 0.00 ]
+Key: VPOPCNTWZrrk: [ 0.00 0.00 ]
+Key: VPOPCNTWZrrkz: [ 0.00 0.00 ]
+Key: VPORDZ: [ 0.00 0.00 ]
+Key: VPORDZrm: [ 0.00 0.00 ]
+Key: VPORDZrmb: [ 0.00 0.00 ]
+Key: VPORDZrmbk: [ 0.00 0.00 ]
+Key: VPORDZrmbkz: [ 0.00 0.00 ]
+Key: VPORDZrmk: [ 0.00 0.00 ]
+Key: VPORDZrmkz: [ 0.00 0.00 ]
+Key: VPORDZrr: [ 0.00 0.00 ]
+Key: VPORDZrrk: [ 0.00 0.00 ]
+Key: VPORDZrrkz: [ 0.00 0.00 ]
+Key: VPORQZ: [ 0.00 0.00 ]
+Key: VPORQZrm: [ 0.00 0.00 ]
+Key: VPORQZrmb: [ 0.00 0.00 ]
+Key: VPORQZrmbk: [ 0.00 0.00 ]
+Key: VPORQZrmbkz: [ 0.00 0.00 ]
+Key: VPORQZrmk: [ 0.00 0.00 ]
+Key: VPORQZrmkz: [ 0.00 0.00 ]
+Key: VPORQZrr: [ 0.00 0.00 ]
+Key: VPORQZrrk: [ 0.00 0.00 ]
+Key: VPORQZrrkz: [ 0.00 0.00 ]
+Key: VPORYrm: [ 0.00 0.00 ]
+Key: VPORYrr: [ 0.00 0.00 ]
+Key: VPORrm: [ 0.00 0.00 ]
+Key: VPORrr: [ 0.00 0.00 ]
+Key: VPPERMrmr: [ 0.00 0.00 ]
+Key: VPPERMrrm: [ 0.00 0.00 ]
+Key: VPPERMrrr: [ 0.00 0.00 ]
+Key: VPPERMrrr_REV: [ 0.00 0.00 ]
+Key: VPROLDZ: [ 0.00 0.00 ]
+Key: VPROLDZmbi: [ 0.00 0.00 ]
+Key: VPROLDZmbik: [ 0.00 0.00 ]
+Key: VPROLDZmbikz: [ 0.00 0.00 ]
+Key: VPROLDZmi: [ 0.00 0.00 ]
+Key: VPROLDZmik: [ 0.00 0.00 ]
+Key: VPROLDZmikz: [ 0.00 0.00 ]
+Key: VPROLDZri: [ 0.00 0.00 ]
+Key: VPROLDZrik: [ 0.00 0.00 ]
+Key: VPROLDZrikz: [ 0.00 0.00 ]
+Key: VPROLQZ: [ 0.00 0.00 ]
+Key: VPROLQZmbi: [ 0.00 0.00 ]
+Key: VPROLQZmbik: [ 0.00 0.00 ]
+Key: VPROLQZmbikz: [ 0.00 0.00 ]
+Key: VPROLQZmi: [ 0.00 0.00 ]
+Key: VPROLQZmik: [ 0.00 0.00 ]
+Key: VPROLQZmikz: [ 0.00 0.00 ]
+Key: VPROLQZri: [ 0.00 0.00 ]
+Key: VPROLQZrik: [ 0.00 0.00 ]
+Key: VPROLQZrikz: [ 0.00 0.00 ]
+Key: VPROLVDZ: [ 0.00 0.00 ]
+Key: VPROLVDZrm: [ 0.00 0.00 ]
+Key: VPROLVDZrmb: [ 0.00 0.00 ]
+Key: VPROLVDZrmbk: [ 0.00 0.00 ]
+Key: VPROLVDZrmbkz: [ 0.00 0.00 ]
+Key: VPROLVDZrmk: [ 0.00 0.00 ]
+Key: VPROLVDZrmkz: [ 0.00 0.00 ]
+Key: VPROLVDZrr: [ 0.00 0.00 ]
+Key: VPROLVDZrrk: [ 0.00 0.00 ]
+Key: VPROLVDZrrkz: [ 0.00 0.00 ]
+Key: VPROLVQZ: [ 0.00 0.00 ]
+Key: VPROLVQZrm: [ 0.00 0.00 ]
+Key: VPROLVQZrmb: [ 0.00 0.00 ]
+Key: VPROLVQZrmbk: [ 0.00 0.00 ]
+Key: VPROLVQZrmbkz: [ 0.00 0.00 ]
+Key: VPROLVQZrmk: [ 0.00 0.00 ]
+Key: VPROLVQZrmkz: [ 0.00 0.00 ]
+Key: VPROLVQZrr: [ 0.00 0.00 ]
+Key: VPROLVQZrrk: [ 0.00 0.00 ]
+Key: VPROLVQZrrkz: [ 0.00 0.00 ]
+Key: VPRORDZ: [ 0.00 0.00 ]
+Key: VPRORDZmbi: [ 0.00 0.00 ]
+Key: VPRORDZmbik: [ 0.00 0.00 ]
+Key: VPRORDZmbikz: [ 0.00 0.00 ]
+Key: VPRORDZmi: [ 0.00 0.00 ]
+Key: VPRORDZmik: [ 0.00 0.00 ]
+Key: VPRORDZmikz: [ 0.00 0.00 ]
+Key: VPRORDZri: [ 0.00 0.00 ]
+Key: VPRORDZrik: [ 0.00 0.00 ]
+Key: VPRORDZrikz: [ 0.00 0.00 ]
+Key: VPRORQZ: [ 0.00 0.00 ]
+Key: VPRORQZmbi: [ 0.00 0.00 ]
+Key: VPRORQZmbik: [ 0.00 0.00 ]
+Key: VPRORQZmbikz: [ 0.00 0.00 ]
+Key: VPRORQZmi: [ 0.00 0.00 ]
+Key: VPRORQZmik: [ 0.00 0.00 ]
+Key: VPRORQZmikz: [ 0.00 0.00 ]
+Key: VPRORQZri: [ 0.00 0.00 ]
+Key: VPRORQZrik: [ 0.00 0.00 ]
+Key: VPRORQZrikz: [ 0.00 0.00 ]
+Key: VPRORVDZ: [ 0.00 0.00 ]
+Key: VPRORVDZrm: [ 0.00 0.00 ]
+Key: VPRORVDZrmb: [ 0.00 0.00 ]
+Key: VPRORVDZrmbk: [ 0.00 0.00 ]
+Key: VPRORVDZrmbkz: [ 0.00 0.00 ]
+Key: VPRORVDZrmk: [ 0.00 0.00 ]
+Key: VPRORVDZrmkz: [ 0.00 0.00 ]
+Key: VPRORVDZrr: [ 0.00 0.00 ]
+Key: VPRORVDZrrk: [ 0.00 0.00 ]
+Key: VPRORVDZrrkz: [ 0.00 0.00 ]
+Key: VPRORVQZ: [ 0.00 0.00 ]
+Key: VPRORVQZrm: [ 0.00 0.00 ]
+Key: VPRORVQZrmb: [ 0.00 0.00 ]
+Key: VPRORVQZrmbk: [ 0.00 0.00 ]
+Key: VPRORVQZrmbkz: [ 0.00 0.00 ]
+Key: VPRORVQZrmk: [ 0.00 0.00 ]
+Key: VPRORVQZrmkz: [ 0.00 0.00 ]
+Key: VPRORVQZrr: [ 0.00 0.00 ]
+Key: VPRORVQZrrk: [ 0.00 0.00 ]
+Key: VPRORVQZrrkz: [ 0.00 0.00 ]
+Key: VPROTBmi: [ 0.00 0.00 ]
+Key: VPROTBmr: [ 0.00 0.00 ]
+Key: VPROTBri: [ 0.00 0.00 ]
+Key: VPROTBrm: [ 0.00 0.00 ]
+Key: VPROTBrr: [ 0.00 0.00 ]
+Key: VPROTBrr_REV: [ 0.00 0.00 ]
+Key: VPROTDmi: [ 0.00 0.00 ]
+Key: VPROTDmr: [ 0.00 0.00 ]
+Key: VPROTDri: [ 0.00 0.00 ]
+Key: VPROTDrm: [ 0.00 0.00 ]
+Key: VPROTDrr: [ 0.00 0.00 ]
+Key: VPROTDrr_REV: [ 0.00 0.00 ]
+Key: VPROTQmi: [ 0.00 0.00 ]
+Key: VPROTQmr: [ 0.00 0.00 ]
+Key: VPROTQri: [ 0.00 0.00 ]
+Key: VPROTQrm: [ 0.00 0.00 ]
+Key: VPROTQrr: [ 0.00 0.00 ]
+Key: VPROTQrr_REV: [ 0.00 0.00 ]
+Key: VPROTWmi: [ 0.00 0.00 ]
+Key: VPROTWmr: [ 0.00 0.00 ]
+Key: VPROTWri: [ 0.00 0.00 ]
+Key: VPROTWrm: [ 0.00 0.00 ]
+Key: VPROTWrr: [ 0.00 0.00 ]
+Key: VPROTWrr_REV: [ 0.00 0.00 ]
+Key: VPSADBWYrm: [ 0.00 0.00 ]
+Key: VPSADBWYrr: [ 0.00 0.00 ]
+Key: VPSADBWZ: [ 0.00 0.00 ]
+Key: VPSADBWZrm: [ 0.00 0.00 ]
+Key: VPSADBWZrr: [ 0.00 0.00 ]
+Key: VPSADBWrm: [ 0.00 0.00 ]
+Key: VPSADBWrr: [ 0.00 0.00 ]
+Key: VPSCATTERDDZ: [ 0.00 0.00 ]
+Key: VPSCATTERDDZmr: [ 0.00 0.00 ]
+Key: VPSCATTERDQZ: [ 0.00 0.00 ]
+Key: VPSCATTERDQZmr: [ 0.00 0.00 ]
+Key: VPSCATTERQDZ: [ 0.00 0.00 ]
+Key: VPSCATTERQDZmr: [ 0.00 0.00 ]
+Key: VPSCATTERQQZ: [ 0.00 0.00 ]
+Key: VPSCATTERQQZmr: [ 0.00 0.00 ]
+Key: VPSHABmr: [ 0.00 0.00 ]
+Key: VPSHABrm: [ 0.00 0.00 ]
+Key: VPSHABrr: [ 0.00 0.00 ]
+Key: VPSHABrr_REV: [ 0.00 0.00 ]
+Key: VPSHADmr: [ 0.00 0.00 ]
+Key: VPSHADrm: [ 0.00 0.00 ]
+Key: VPSHADrr: [ 0.00 0.00 ]
+Key: VPSHADrr_REV: [ 0.00 0.00 ]
+Key: VPSHAQmr: [ 0.00 0.00 ]
+Key: VPSHAQrm: [ 0.00 0.00 ]
+Key: VPSHAQrr: [ 0.00 0.00 ]
+Key: VPSHAQrr_REV: [ 0.00 0.00 ]
+Key: VPSHAWmr: [ 0.00 0.00 ]
+Key: VPSHAWrm: [ 0.00 0.00 ]
+Key: VPSHAWrr: [ 0.00 0.00 ]
+Key: VPSHAWrr_REV: [ 0.00 0.00 ]
+Key: VPSHLBmr: [ 0.00 0.00 ]
+Key: VPSHLBrm: [ 0.00 0.00 ]
+Key: VPSHLBrr: [ 0.00 0.00 ]
+Key: VPSHLBrr_REV: [ 0.00 0.00 ]
+Key: VPSHLDDZ: [ 0.00 0.00 ]
+Key: VPSHLDDZrmbi: [ 0.00 0.00 ]
+Key: VPSHLDDZrmbik: [ 0.00 0.00 ]
+Key: VPSHLDDZrmbikz: [ 0.00 0.00 ]
+Key: VPSHLDDZrmi: [ 0.00 0.00 ]
+Key: VPSHLDDZrmik: [ 0.00 0.00 ]
+Key: VPSHLDDZrmikz: [ 0.00 0.00 ]
+Key: VPSHLDDZrri: [ 0.00 0.00 ]
+Key: VPSHLDDZrrik: [ 0.00 0.00 ]
+Key: VPSHLDDZrrikz: [ 0.00 0.00 ]
+Key: VPSHLDQZ: [ 0.00 0.00 ]
+Key: VPSHLDQZrmbi: [ 0.00 0.00 ]
+Key: VPSHLDQZrmbik: [ 0.00 0.00 ]
+Key: VPSHLDQZrmbikz: [ 0.00 0.00 ]
+Key: VPSHLDQZrmi: [ 0.00 0.00 ]
+Key: VPSHLDQZrmik: [ 0.00 0.00 ]
+Key: VPSHLDQZrmikz: [ 0.00 0.00 ]
+Key: VPSHLDQZrri: [ 0.00 0.00 ]
+Key: VPSHLDQZrrik: [ 0.00 0.00 ]
+Key: VPSHLDQZrrikz: [ 0.00 0.00 ]
+Key: VPSHLDVDZ: [ 0.00 0.00 ]
+Key: VPSHLDVDZm: [ 0.00 0.00 ]
+Key: VPSHLDVDZmb: [ 0.00 0.00 ]
+Key: VPSHLDVDZmbk: [ 0.00 0.00 ]
+Key: VPSHLDVDZmbkz: [ 0.00 0.00 ]
+Key: VPSHLDVDZmk: [ 0.00 0.00 ]
+Key: VPSHLDVDZmkz: [ 0.00 0.00 ]
+Key: VPSHLDVDZr: [ 0.00 0.00 ]
+Key: VPSHLDVDZrk: [ 0.00 0.00 ]
+Key: VPSHLDVDZrkz: [ 0.00 0.00 ]
+Key: VPSHLDVQZ: [ 0.00 0.00 ]
+Key: VPSHLDVQZm: [ 0.00 0.00 ]
+Key: VPSHLDVQZmb: [ 0.00 0.00 ]
+Key: VPSHLDVQZmbk: [ 0.00 0.00 ]
+Key: VPSHLDVQZmbkz: [ 0.00 0.00 ]
+Key: VPSHLDVQZmk: [ 0.00 0.00 ]
+Key: VPSHLDVQZmkz: [ 0.00 0.00 ]
+Key: VPSHLDVQZr: [ 0.00 0.00 ]
+Key: VPSHLDVQZrk: [ 0.00 0.00 ]
+Key: VPSHLDVQZrkz: [ 0.00 0.00 ]
+Key: VPSHLDVWZ: [ 0.00 0.00 ]
+Key: VPSHLDVWZm: [ 0.00 0.00 ]
+Key: VPSHLDVWZmk: [ 0.00 0.00 ]
+Key: VPSHLDVWZmkz: [ 0.00 0.00 ]
+Key: VPSHLDVWZr: [ 0.00 0.00 ]
+Key: VPSHLDVWZrk: [ 0.00 0.00 ]
+Key: VPSHLDVWZrkz: [ 0.00 0.00 ]
+Key: VPSHLDWZ: [ 0.00 0.00 ]
+Key: VPSHLDWZrmi: [ 0.00 0.00 ]
+Key: VPSHLDWZrmik: [ 0.00 0.00 ]
+Key: VPSHLDWZrmikz: [ 0.00 0.00 ]
+Key: VPSHLDWZrri: [ 0.00 0.00 ]
+Key: VPSHLDWZrrik: [ 0.00 0.00 ]
+Key: VPSHLDWZrrikz: [ 0.00 0.00 ]
+Key: VPSHLDmr: [ 0.00 0.00 ]
+Key: VPSHLDrm: [ 0.00 0.00 ]
+Key: VPSHLDrr: [ 0.00 0.00 ]
+Key: VPSHLDrr_REV: [ 0.00 0.00 ]
+Key: VPSHLQmr: [ 0.00 0.00 ]
+Key: VPSHLQrm: [ 0.00 0.00 ]
+Key: VPSHLQrr: [ 0.00 0.00 ]
+Key: VPSHLQrr_REV: [ 0.00 0.00 ]
+Key: VPSHLWmr: [ 0.00 0.00 ]
+Key: VPSHLWrm: [ 0.00 0.00 ]
+Key: VPSHLWrr: [ 0.00 0.00 ]
+Key: VPSHLWrr_REV: [ 0.00 0.00 ]
+Key: VPSHRDDZ: [ 0.00 0.00 ]
+Key: VPSHRDDZrmbi: [ 0.00 0.00 ]
+Key: VPSHRDDZrmbik: [ 0.00 0.00 ]
+Key: VPSHRDDZrmbikz: [ 0.00 0.00 ]
+Key: VPSHRDDZrmi: [ 0.00 0.00 ]
+Key: VPSHRDDZrmik: [ 0.00 0.00 ]
+Key: VPSHRDDZrmikz: [ 0.00 0.00 ]
+Key: VPSHRDDZrri: [ 0.00 0.00 ]
+Key: VPSHRDDZrrik: [ 0.00 0.00 ]
+Key: VPSHRDDZrrikz: [ 0.00 0.00 ]
+Key: VPSHRDQZ: [ 0.00 0.00 ]
+Key: VPSHRDQZrmbi: [ 0.00 0.00 ]
+Key: VPSHRDQZrmbik: [ 0.00 0.00 ]
+Key: VPSHRDQZrmbikz: [ 0.00 0.00 ]
+Key: VPSHRDQZrmi: [ 0.00 0.00 ]
+Key: VPSHRDQZrmik: [ 0.00 0.00 ]
+Key: VPSHRDQZrmikz: [ 0.00 0.00 ]
+Key: VPSHRDQZrri: [ 0.00 0.00 ]
+Key: VPSHRDQZrrik: [ 0.00 0.00 ]
+Key: VPSHRDQZrrikz: [ 0.00 0.00 ]
+Key: VPSHRDVDZ: [ 0.00 0.00 ]
+Key: VPSHRDVDZm: [ 0.00 0.00 ]
+Key: VPSHRDVDZmb: [ 0.00 0.00 ]
+Key: VPSHRDVDZmbk: [ 0.00 0.00 ]
+Key: VPSHRDVDZmbkz: [ 0.00 0.00 ]
+Key: VPSHRDVDZmk: [ 0.00 0.00 ]
+Key: VPSHRDVDZmkz: [ 0.00 0.00 ]
+Key: VPSHRDVDZr: [ 0.00 0.00 ]
+Key: VPSHRDVDZrk: [ 0.00 0.00 ]
+Key: VPSHRDVDZrkz: [ 0.00 0.00 ]
+Key: VPSHRDVQZ: [ 0.00 0.00 ]
+Key: VPSHRDVQZm: [ 0.00 0.00 ]
+Key: VPSHRDVQZmb: [ 0.00 0.00 ]
+Key: VPSHRDVQZmbk: [ 0.00 0.00 ]
+Key: VPSHRDVQZmbkz: [ 0.00 0.00 ]
+Key: VPSHRDVQZmk: [ 0.00 0.00 ]
+Key: VPSHRDVQZmkz: [ 0.00 0.00 ]
+Key: VPSHRDVQZr: [ 0.00 0.00 ]
+Key: VPSHRDVQZrk: [ 0.00 0.00 ]
+Key: VPSHRDVQZrkz: [ 0.00 0.00 ]
+Key: VPSHRDVWZ: [ 0.00 0.00 ]
+Key: VPSHRDVWZm: [ 0.00 0.00 ]
+Key: VPSHRDVWZmk: [ 0.00 0.00 ]
+Key: VPSHRDVWZmkz: [ 0.00 0.00 ]
+Key: VPSHRDVWZr: [ 0.00 0.00 ]
+Key: VPSHRDVWZrk: [ 0.00 0.00 ]
+Key: VPSHRDVWZrkz: [ 0.00 0.00 ]
+Key: VPSHRDWZ: [ 0.00 0.00 ]
+Key: VPSHRDWZrmi: [ 0.00 0.00 ]
+Key: VPSHRDWZrmik: [ 0.00 0.00 ]
+Key: VPSHRDWZrmikz: [ 0.00 0.00 ]
+Key: VPSHRDWZrri: [ 0.00 0.00 ]
+Key: VPSHRDWZrrik: [ 0.00 0.00 ]
+Key: VPSHRDWZrrikz: [ 0.00 0.00 ]
+Key: VPSHUFBITQMBZ: [ 0.00 0.00 ]
+Key: VPSHUFBITQMBZrm: [ 0.00 0.00 ]
+Key: VPSHUFBITQMBZrmk: [ 0.00 0.00 ]
+Key: VPSHUFBITQMBZrr: [ 0.00 0.00 ]
+Key: VPSHUFBITQMBZrrk: [ 0.00 0.00 ]
+Key: VPSHUFBYrm: [ 0.00 0.00 ]
+Key: VPSHUFBYrr: [ 0.00 0.00 ]
+Key: VPSHUFBZ: [ 0.00 0.00 ]
+Key: VPSHUFBZrm: [ 0.00 0.00 ]
+Key: VPSHUFBZrmk: [ 0.00 0.00 ]
+Key: VPSHUFBZrmkz: [ 0.00 0.00 ]
+Key: VPSHUFBZrr: [ 0.00 0.00 ]
+Key: VPSHUFBZrrk: [ 0.00 0.00 ]
+Key: VPSHUFBZrrkz: [ 0.00 0.00 ]
+Key: VPSHUFBrm: [ 0.00 0.00 ]
+Key: VPSHUFBrr: [ 0.00 0.00 ]
+Key: VPSHUFDYmi: [ 0.00 0.00 ]
+Key: VPSHUFDYri: [ 0.00 0.00 ]
+Key: VPSHUFDZ: [ 0.00 0.00 ]
+Key: VPSHUFDZmbi: [ 0.00 0.00 ]
+Key: VPSHUFDZmbik: [ 0.00 0.00 ]
+Key: VPSHUFDZmbikz: [ 0.00 0.00 ]
+Key: VPSHUFDZmi: [ 0.00 0.00 ]
+Key: VPSHUFDZmik: [ 0.00 0.00 ]
+Key: VPSHUFDZmikz: [ 0.00 0.00 ]
+Key: VPSHUFDZri: [ 0.00 0.00 ]
+Key: VPSHUFDZrik: [ 0.00 0.00 ]
+Key: VPSHUFDZrikz: [ 0.00 0.00 ]
+Key: VPSHUFDmi: [ 0.00 0.00 ]
+Key: VPSHUFDri: [ 0.00 0.00 ]
+Key: VPSHUFHWYmi: [ 0.00 0.00 ]
+Key: VPSHUFHWYri: [ 0.00 0.00 ]
+Key: VPSHUFHWZ: [ 0.00 0.00 ]
+Key: VPSHUFHWZmi: [ 0.00 0.00 ]
+Key: VPSHUFHWZmik: [ 0.00 0.00 ]
+Key: VPSHUFHWZmikz: [ 0.00 0.00 ]
+Key: VPSHUFHWZri: [ 0.00 0.00 ]
+Key: VPSHUFHWZrik: [ 0.00 0.00 ]
+Key: VPSHUFHWZrikz: [ 0.00 0.00 ]
+Key: VPSHUFHWmi: [ 0.00 0.00 ]
+Key: VPSHUFHWri: [ 0.00 0.00 ]
+Key: VPSHUFLWYmi: [ 0.00 0.00 ]
+Key: VPSHUFLWYri: [ 0.00 0.00 ]
+Key: VPSHUFLWZ: [ 0.00 0.00 ]
+Key: VPSHUFLWZmi: [ 0.00 0.00 ]
+Key: VPSHUFLWZmik: [ 0.00 0.00 ]
+Key: VPSHUFLWZmikz: [ 0.00 0.00 ]
+Key: VPSHUFLWZri: [ 0.00 0.00 ]
+Key: VPSHUFLWZrik: [ 0.00 0.00 ]
+Key: VPSHUFLWZrikz: [ 0.00 0.00 ]
+Key: VPSHUFLWmi: [ 0.00 0.00 ]
+Key: VPSHUFLWri: [ 0.00 0.00 ]
+Key: VPSIGNBYrm: [ 0.00 0.00 ]
+Key: VPSIGNBYrr: [ 0.00 0.00 ]
+Key: VPSIGNBrm: [ 0.00 0.00 ]
+Key: VPSIGNBrr: [ 0.00 0.00 ]
+Key: VPSIGNDYrm: [ 0.00 0.00 ]
+Key: VPSIGNDYrr: [ 0.00 0.00 ]
+Key: VPSIGNDrm: [ 0.00 0.00 ]
+Key: VPSIGNDrr: [ 0.00 0.00 ]
+Key: VPSIGNWYrm: [ 0.00 0.00 ]
+Key: VPSIGNWYrr: [ 0.00 0.00 ]
+Key: VPSIGNWrm: [ 0.00 0.00 ]
+Key: VPSIGNWrr: [ 0.00 0.00 ]
+Key: VPSLLDQYri: [ 0.00 0.00 ]
+Key: VPSLLDQZ: [ 0.00 0.00 ]
+Key: VPSLLDQZmi: [ 0.00 0.00 ]
+Key: VPSLLDQZri: [ 0.00 0.00 ]
+Key: VPSLLDQri: [ 0.00 0.00 ]
+Key: VPSLLDYri: [ 0.00 0.00 ]
+Key: VPSLLDYrm: [ 0.00 0.00 ]
+Key: VPSLLDYrr: [ 0.00 0.00 ]
+Key: VPSLLDZ: [ 0.00 0.00 ]
+Key: VPSLLDZmbi: [ 0.00 0.00 ]
+Key: VPSLLDZmbik: [ 0.00 0.00 ]
+Key: VPSLLDZmbikz: [ 0.00 0.00 ]
+Key: VPSLLDZmi: [ 0.00 0.00 ]
+Key: VPSLLDZmik: [ 0.00 0.00 ]
+Key: VPSLLDZmikz: [ 0.00 0.00 ]
+Key: VPSLLDZri: [ 0.00 0.00 ]
+Key: VPSLLDZrik: [ 0.00 0.00 ]
+Key: VPSLLDZrikz: [ 0.00 0.00 ]
+Key: VPSLLDZrm: [ 0.00 0.00 ]
+Key: VPSLLDZrmk: [ 0.00 0.00 ]
+Key: VPSLLDZrmkz: [ 0.00 0.00 ]
+Key: VPSLLDZrr: [ 0.00 0.00 ]
+Key: VPSLLDZrrk: [ 0.00 0.00 ]
+Key: VPSLLDZrrkz: [ 0.00 0.00 ]
+Key: VPSLLDri: [ 0.00 0.00 ]
+Key: VPSLLDrm: [ 0.00 0.00 ]
+Key: VPSLLDrr: [ 0.00 0.00 ]
+Key: VPSLLQYri: [ 0.00 0.00 ]
+Key: VPSLLQYrm: [ 0.00 0.00 ]
+Key: VPSLLQYrr: [ 0.00 0.00 ]
+Key: VPSLLQZ: [ 0.00 0.00 ]
+Key: VPSLLQZmbi: [ 0.00 0.00 ]
+Key: VPSLLQZmbik: [ 0.00 0.00 ]
+Key: VPSLLQZmbikz: [ 0.00 0.00 ]
+Key: VPSLLQZmi: [ 0.00 0.00 ]
+Key: VPSLLQZmik: [ 0.00 0.00 ]
+Key: VPSLLQZmikz: [ 0.00 0.00 ]
+Key: VPSLLQZri: [ 0.00 0.00 ]
+Key: VPSLLQZrik: [ 0.00 0.00 ]
+Key: VPSLLQZrikz: [ 0.00 0.00 ]
+Key: VPSLLQZrm: [ 0.00 0.00 ]
+Key: VPSLLQZrmk: [ 0.00 0.00 ]
+Key: VPSLLQZrmkz: [ 0.00 0.00 ]
+Key: VPSLLQZrr: [ 0.00 0.00 ]
+Key: VPSLLQZrrk: [ 0.00 0.00 ]
+Key: VPSLLQZrrkz: [ 0.00 0.00 ]
+Key: VPSLLQri: [ 0.00 0.00 ]
+Key: VPSLLQrm: [ 0.00 0.00 ]
+Key: VPSLLQrr: [ 0.00 0.00 ]
+Key: VPSLLVDYrm: [ 0.00 0.00 ]
+Key: VPSLLVDYrr: [ 0.00 0.00 ]
+Key: VPSLLVDZ: [ 0.00 0.00 ]
+Key: VPSLLVDZrm: [ 0.00 0.00 ]
+Key: VPSLLVDZrmb: [ 0.00 0.00 ]
+Key: VPSLLVDZrmbk: [ 0.00 0.00 ]
+Key: VPSLLVDZrmbkz: [ 0.00 0.00 ]
+Key: VPSLLVDZrmk: [ 0.00 0.00 ]
+Key: VPSLLVDZrmkz: [ 0.00 0.00 ]
+Key: VPSLLVDZrr: [ 0.00 0.00 ]
+Key: VPSLLVDZrrk: [ 0.00 0.00 ]
+Key: VPSLLVDZrrkz: [ 0.00 0.00 ]
+Key: VPSLLVDrm: [ 0.00 0.00 ]
+Key: VPSLLVDrr: [ 0.00 0.00 ]
+Key: VPSLLVQYrm: [ 0.00 0.00 ]
+Key: VPSLLVQYrr: [ 0.00 0.00 ]
+Key: VPSLLVQZ: [ 0.00 0.00 ]
+Key: VPSLLVQZrm: [ 0.00 0.00 ]
+Key: VPSLLVQZrmb: [ 0.00 0.00 ]
+Key: VPSLLVQZrmbk: [ 0.00 0.00 ]
+Key: VPSLLVQZrmbkz: [ 0.00 0.00 ]
+Key: VPSLLVQZrmk: [ 0.00 0.00 ]
+Key: VPSLLVQZrmkz: [ 0.00 0.00 ]
+Key: VPSLLVQZrr: [ 0.00 0.00 ]
+Key: VPSLLVQZrrk: [ 0.00 0.00 ]
+Key: VPSLLVQZrrkz: [ 0.00 0.00 ]
+Key: VPSLLVQrm: [ 0.00 0.00 ]
+Key: VPSLLVQrr: [ 0.00 0.00 ]
+Key: VPSLLVWZ: [ 0.00 0.00 ]
+Key: VPSLLVWZrm: [ 0.00 0.00 ]
+Key: VPSLLVWZrmk: [ 0.00 0.00 ]
+Key: VPSLLVWZrmkz: [ 0.00 0.00 ]
+Key: VPSLLVWZrr: [ 0.00 0.00 ]
+Key: VPSLLVWZrrk: [ 0.00 0.00 ]
+Key: VPSLLVWZrrkz: [ 0.00 0.00 ]
+Key: VPSLLWYri: [ 0.00 0.00 ]
+Key: VPSLLWYrm: [ 0.00 0.00 ]
+Key: VPSLLWYrr: [ 0.00 0.00 ]
+Key: VPSLLWZ: [ 0.00 0.00 ]
+Key: VPSLLWZmi: [ 0.00 0.00 ]
+Key: VPSLLWZmik: [ 0.00 0.00 ]
+Key: VPSLLWZmikz: [ 0.00 0.00 ]
+Key: VPSLLWZri: [ 0.00 0.00 ]
+Key: VPSLLWZrik: [ 0.00 0.00 ]
+Key: VPSLLWZrikz: [ 0.00 0.00 ]
+Key: VPSLLWZrm: [ 0.00 0.00 ]
+Key: VPSLLWZrmk: [ 0.00 0.00 ]
+Key: VPSLLWZrmkz: [ 0.00 0.00 ]
+Key: VPSLLWZrr: [ 0.00 0.00 ]
+Key: VPSLLWZrrk: [ 0.00 0.00 ]
+Key: VPSLLWZrrkz: [ 0.00 0.00 ]
+Key: VPSLLWri: [ 0.00 0.00 ]
+Key: VPSLLWrm: [ 0.00 0.00 ]
+Key: VPSLLWrr: [ 0.00 0.00 ]
+Key: VPSRADYri: [ 0.00 0.00 ]
+Key: VPSRADYrm: [ 0.00 0.00 ]
+Key: VPSRADYrr: [ 0.00 0.00 ]
+Key: VPSRADZ: [ 0.00 0.00 ]
+Key: VPSRADZmbi: [ 0.00 0.00 ]
+Key: VPSRADZmbik: [ 0.00 0.00 ]
+Key: VPSRADZmbikz: [ 0.00 0.00 ]
+Key: VPSRADZmi: [ 0.00 0.00 ]
+Key: VPSRADZmik: [ 0.00 0.00 ]
+Key: VPSRADZmikz: [ 0.00 0.00 ]
+Key: VPSRADZri: [ 0.00 0.00 ]
+Key: VPSRADZrik: [ 0.00 0.00 ]
+Key: VPSRADZrikz: [ 0.00 0.00 ]
+Key: VPSRADZrm: [ 0.00 0.00 ]
+Key: VPSRADZrmk: [ 0.00 0.00 ]
+Key: VPSRADZrmkz: [ 0.00 0.00 ]
+Key: VPSRADZrr: [ 0.00 0.00 ]
+Key: VPSRADZrrk: [ 0.00 0.00 ]
+Key: VPSRADZrrkz: [ 0.00 0.00 ]
+Key: VPSRADri: [ 0.00 0.00 ]
+Key: VPSRADrm: [ 0.00 0.00 ]
+Key: VPSRADrr: [ 0.00 0.00 ]
+Key: VPSRAQZ: [ 0.00 0.00 ]
+Key: VPSRAQZmbi: [ 0.00 0.00 ]
+Key: VPSRAQZmbik: [ 0.00 0.00 ]
+Key: VPSRAQZmbikz: [ 0.00 0.00 ]
+Key: VPSRAQZmi: [ 0.00 0.00 ]
+Key: VPSRAQZmik: [ 0.00 0.00 ]
+Key: VPSRAQZmikz: [ 0.00 0.00 ]
+Key: VPSRAQZri: [ 0.00 0.00 ]
+Key: VPSRAQZrik: [ 0.00 0.00 ]
+Key: VPSRAQZrikz: [ 0.00 0.00 ]
+Key: VPSRAQZrm: [ 0.00 0.00 ]
+Key: VPSRAQZrmk: [ 0.00 0.00 ]
+Key: VPSRAQZrmkz: [ 0.00 0.00 ]
+Key: VPSRAQZrr: [ 0.00 0.00 ]
+Key: VPSRAQZrrk: [ 0.00 0.00 ]
+Key: VPSRAQZrrkz: [ 0.00 0.00 ]
+Key: VPSRAVDYrm: [ 0.00 0.00 ]
+Key: VPSRAVDYrr: [ 0.00 0.00 ]
+Key: VPSRAVDZ: [ 0.00 0.00 ]
+Key: VPSRAVDZrm: [ 0.00 0.00 ]
+Key: VPSRAVDZrmb: [ 0.00 0.00 ]
+Key: VPSRAVDZrmbk: [ 0.00 0.00 ]
+Key: VPSRAVDZrmbkz: [ 0.00 0.00 ]
+Key: VPSRAVDZrmk: [ 0.00 0.00 ]
+Key: VPSRAVDZrmkz: [ 0.00 0.00 ]
+Key: VPSRAVDZrr: [ 0.00 0.00 ]
+Key: VPSRAVDZrrk: [ 0.00 0.00 ]
+Key: VPSRAVDZrrkz: [ 0.00 0.00 ]
+Key: VPSRAVDrm: [ 0.00 0.00 ]
+Key: VPSRAVDrr: [ 0.00 0.00 ]
+Key: VPSRAVQZ: [ 0.00 0.00 ]
+Key: VPSRAVQZrm: [ 0.00 0.00 ]
+Key: VPSRAVQZrmb: [ 0.00 0.00 ]
+Key: VPSRAVQZrmbk: [ 0.00 0.00 ]
+Key: VPSRAVQZrmbkz: [ 0.00 0.00 ]
+Key: VPSRAVQZrmk: [ 0.00 0.00 ]
+Key: VPSRAVQZrmkz: [ 0.00 0.00 ]
+Key: VPSRAVQZrr: [ 0.00 0.00 ]
+Key: VPSRAVQZrrk: [ 0.00 0.00 ]
+Key: VPSRAVQZrrkz: [ 0.00 0.00 ]
+Key: VPSRAVWZ: [ 0.00 0.00 ]
+Key: VPSRAVWZrm: [ 0.00 0.00 ]
+Key: VPSRAVWZrmk: [ 0.00 0.00 ]
+Key: VPSRAVWZrmkz: [ 0.00 0.00 ]
+Key: VPSRAVWZrr: [ 0.00 0.00 ]
+Key: VPSRAVWZrrk: [ 0.00 0.00 ]
+Key: VPSRAVWZrrkz: [ 0.00 0.00 ]
+Key: VPSRAWYri: [ 0.00 0.00 ]
+Key: VPSRAWYrm: [ 0.00 0.00 ]
+Key: VPSRAWYrr: [ 0.00 0.00 ]
+Key: VPSRAWZ: [ 0.00 0.00 ]
+Key: VPSRAWZmi: [ 0.00 0.00 ]
+Key: VPSRAWZmik: [ 0.00 0.00 ]
+Key: VPSRAWZmikz: [ 0.00 0.00 ]
+Key: VPSRAWZri: [ 0.00 0.00 ]
+Key: VPSRAWZrik: [ 0.00 0.00 ]
+Key: VPSRAWZrikz: [ 0.00 0.00 ]
+Key: VPSRAWZrm: [ 0.00 0.00 ]
+Key: VPSRAWZrmk: [ 0.00 0.00 ]
+Key: VPSRAWZrmkz: [ 0.00 0.00 ]
+Key: VPSRAWZrr: [ 0.00 0.00 ]
+Key: VPSRAWZrrk: [ 0.00 0.00 ]
+Key: VPSRAWZrrkz: [ 0.00 0.00 ]
+Key: VPSRAWri: [ 0.00 0.00 ]
+Key: VPSRAWrm: [ 0.00 0.00 ]
+Key: VPSRAWrr: [ 0.00 0.00 ]
+Key: VPSRLDQYri: [ 0.00 0.00 ]
+Key: VPSRLDQZ: [ 0.00 0.00 ]
+Key: VPSRLDQZmi: [ 0.00 0.00 ]
+Key: VPSRLDQZri: [ 0.00 0.00 ]
+Key: VPSRLDQri: [ 0.00 0.00 ]
+Key: VPSRLDYri: [ 0.00 0.00 ]
+Key: VPSRLDYrm: [ 0.00 0.00 ]
+Key: VPSRLDYrr: [ 0.00 0.00 ]
+Key: VPSRLDZ: [ 0.00 0.00 ]
+Key: VPSRLDZmbi: [ 0.00 0.00 ]
+Key: VPSRLDZmbik: [ 0.00 0.00 ]
+Key: VPSRLDZmbikz: [ 0.00 0.00 ]
+Key: VPSRLDZmi: [ 0.00 0.00 ]
+Key: VPSRLDZmik: [ 0.00 0.00 ]
+Key: VPSRLDZmikz: [ 0.00 0.00 ]
+Key: VPSRLDZri: [ 0.00 0.00 ]
+Key: VPSRLDZrik: [ 0.00 0.00 ]
+Key: VPSRLDZrikz: [ 0.00 0.00 ]
+Key: VPSRLDZrm: [ 0.00 0.00 ]
+Key: VPSRLDZrmk: [ 0.00 0.00 ]
+Key: VPSRLDZrmkz: [ 0.00 0.00 ]
+Key: VPSRLDZrr: [ 0.00 0.00 ]
+Key: VPSRLDZrrk: [ 0.00 0.00 ]
+Key: VPSRLDZrrkz: [ 0.00 0.00 ]
+Key: VPSRLDri: [ 0.00 0.00 ]
+Key: VPSRLDrm: [ 0.00 0.00 ]
+Key: VPSRLDrr: [ 0.00 0.00 ]
+Key: VPSRLQYri: [ 0.00 0.00 ]
+Key: VPSRLQYrm: [ 0.00 0.00 ]
+Key: VPSRLQYrr: [ 0.00 0.00 ]
+Key: VPSRLQZ: [ 0.00 0.00 ]
+Key: VPSRLQZmbi: [ 0.00 0.00 ]
+Key: VPSRLQZmbik: [ 0.00 0.00 ]
+Key: VPSRLQZmbikz: [ 0.00 0.00 ]
+Key: VPSRLQZmi: [ 0.00 0.00 ]
+Key: VPSRLQZmik: [ 0.00 0.00 ]
+Key: VPSRLQZmikz: [ 0.00 0.00 ]
+Key: VPSRLQZri: [ 0.00 0.00 ]
+Key: VPSRLQZrik: [ 0.00 0.00 ]
+Key: VPSRLQZrikz: [ 0.00 0.00 ]
+Key: VPSRLQZrm: [ 0.00 0.00 ]
+Key: VPSRLQZrmk: [ 0.00 0.00 ]
+Key: VPSRLQZrmkz: [ 0.00 0.00 ]
+Key: VPSRLQZrr: [ 0.00 0.00 ]
+Key: VPSRLQZrrk: [ 0.00 0.00 ]
+Key: VPSRLQZrrkz: [ 0.00 0.00 ]
+Key: VPSRLQri: [ 0.00 0.00 ]
+Key: VPSRLQrm: [ 0.00 0.00 ]
+Key: VPSRLQrr: [ 0.00 0.00 ]
+Key: VPSRLVDYrm: [ 0.00 0.00 ]
+Key: VPSRLVDYrr: [ 0.00 0.00 ]
+Key: VPSRLVDZ: [ 0.00 0.00 ]
+Key: VPSRLVDZrm: [ 0.00 0.00 ]
+Key: VPSRLVDZrmb: [ 0.00 0.00 ]
+Key: VPSRLVDZrmbk: [ 0.00 0.00 ]
+Key: VPSRLVDZrmbkz: [ 0.00 0.00 ]
+Key: VPSRLVDZrmk: [ 0.00 0.00 ]
+Key: VPSRLVDZrmkz: [ 0.00 0.00 ]
+Key: VPSRLVDZrr: [ 0.00 0.00 ]
+Key: VPSRLVDZrrk: [ 0.00 0.00 ]
+Key: VPSRLVDZrrkz: [ 0.00 0.00 ]
+Key: VPSRLVDrm: [ 0.00 0.00 ]
+Key: VPSRLVDrr: [ 0.00 0.00 ]
+Key: VPSRLVQYrm: [ 0.00 0.00 ]
+Key: VPSRLVQYrr: [ 0.00 0.00 ]
+Key: VPSRLVQZ: [ 0.00 0.00 ]
+Key: VPSRLVQZrm: [ 0.00 0.00 ]
+Key: VPSRLVQZrmb: [ 0.00 0.00 ]
+Key: VPSRLVQZrmbk: [ 0.00 0.00 ]
+Key: VPSRLVQZrmbkz: [ 0.00 0.00 ]
+Key: VPSRLVQZrmk: [ 0.00 0.00 ]
+Key: VPSRLVQZrmkz: [ 0.00 0.00 ]
+Key: VPSRLVQZrr: [ 0.00 0.00 ]
+Key: VPSRLVQZrrk: [ 0.00 0.00 ]
+Key: VPSRLVQZrrkz: [ 0.00 0.00 ]
+Key: VPSRLVQrm: [ 0.00 0.00 ]
+Key: VPSRLVQrr: [ 0.00 0.00 ]
+Key: VPSRLVWZ: [ 0.00 0.00 ]
+Key: VPSRLVWZrm: [ 0.00 0.00 ]
+Key: VPSRLVWZrmk: [ 0.00 0.00 ]
+Key: VPSRLVWZrmkz: [ 0.00 0.00 ]
+Key: VPSRLVWZrr: [ 0.00 0.00 ]
+Key: VPSRLVWZrrk: [ 0.00 0.00 ]
+Key: VPSRLVWZrrkz: [ 0.00 0.00 ]
+Key: VPSRLWYri: [ 0.00 0.00 ]
+Key: VPSRLWYrm: [ 0.00 0.00 ]
+Key: VPSRLWYrr: [ 0.00 0.00 ]
+Key: VPSRLWZ: [ 0.00 0.00 ]
+Key: VPSRLWZmi: [ 0.00 0.00 ]
+Key: VPSRLWZmik: [ 0.00 0.00 ]
+Key: VPSRLWZmikz: [ 0.00 0.00 ]
+Key: VPSRLWZri: [ 0.00 0.00 ]
+Key: VPSRLWZrik: [ 0.00 0.00 ]
+Key: VPSRLWZrikz: [ 0.00 0.00 ]
+Key: VPSRLWZrm: [ 0.00 0.00 ]
+Key: VPSRLWZrmk: [ 0.00 0.00 ]
+Key: VPSRLWZrmkz: [ 0.00 0.00 ]
+Key: VPSRLWZrr: [ 0.00 0.00 ]
+Key: VPSRLWZrrk: [ 0.00 0.00 ]
+Key: VPSRLWZrrkz: [ 0.00 0.00 ]
+Key: VPSRLWri: [ 0.00 0.00 ]
+Key: VPSRLWrm: [ 0.00 0.00 ]
+Key: VPSRLWrr: [ 0.00 0.00 ]
+Key: VPSUBBYrm: [ 0.00 0.00 ]
+Key: VPSUBBYrr: [ 0.00 0.00 ]
+Key: VPSUBBZ: [ 0.00 0.00 ]
+Key: VPSUBBZrm: [ 0.00 0.00 ]
+Key: VPSUBBZrmk: [ 0.00 0.00 ]
+Key: VPSUBBZrmkz: [ 0.00 0.00 ]
+Key: VPSUBBZrr: [ 0.00 0.00 ]
+Key: VPSUBBZrrk: [ 0.00 0.00 ]
+Key: VPSUBBZrrkz: [ 0.00 0.00 ]
+Key: VPSUBBrm: [ 0.00 0.00 ]
+Key: VPSUBBrr: [ 0.00 0.00 ]
+Key: VPSUBDYrm: [ 0.00 0.00 ]
+Key: VPSUBDYrr: [ 0.00 0.00 ]
+Key: VPSUBDZ: [ 0.00 0.00 ]
+Key: VPSUBDZrm: [ 0.00 0.00 ]
+Key: VPSUBDZrmb: [ 0.00 0.00 ]
+Key: VPSUBDZrmbk: [ 0.00 0.00 ]
+Key: VPSUBDZrmbkz: [ 0.00 0.00 ]
+Key: VPSUBDZrmk: [ 0.00 0.00 ]
+Key: VPSUBDZrmkz: [ 0.00 0.00 ]
+Key: VPSUBDZrr: [ 0.00 0.00 ]
+Key: VPSUBDZrrk: [ 0.00 0.00 ]
+Key: VPSUBDZrrkz: [ 0.00 0.00 ]
+Key: VPSUBDrm: [ 0.00 0.00 ]
+Key: VPSUBDrr: [ 0.00 0.00 ]
+Key: VPSUBQYrm: [ 0.00 0.00 ]
+Key: VPSUBQYrr: [ 0.00 0.00 ]
+Key: VPSUBQZ: [ 0.00 0.00 ]
+Key: VPSUBQZrm: [ 0.00 0.00 ]
+Key: VPSUBQZrmb: [ 0.00 0.00 ]
+Key: VPSUBQZrmbk: [ 0.00 0.00 ]
+Key: VPSUBQZrmbkz: [ 0.00 0.00 ]
+Key: VPSUBQZrmk: [ 0.00 0.00 ]
+Key: VPSUBQZrmkz: [ 0.00 0.00 ]
+Key: VPSUBQZrr: [ 0.00 0.00 ]
+Key: VPSUBQZrrk: [ 0.00 0.00 ]
+Key: VPSUBQZrrkz: [ 0.00 0.00 ]
+Key: VPSUBQrm: [ 0.00 0.00 ]
+Key: VPSUBQrr: [ 0.00 0.00 ]
+Key: VPSUBSBYrm: [ 0.00 0.00 ]
+Key: VPSUBSBYrr: [ 0.00 0.00 ]
+Key: VPSUBSBZ: [ 0.00 0.00 ]
+Key: VPSUBSBZrm: [ 0.00 0.00 ]
+Key: VPSUBSBZrmk: [ 0.00 0.00 ]
+Key: VPSUBSBZrmkz: [ 0.00 0.00 ]
+Key: VPSUBSBZrr: [ 0.00 0.00 ]
+Key: VPSUBSBZrrk: [ 0.00 0.00 ]
+Key: VPSUBSBZrrkz: [ 0.00 0.00 ]
+Key: VPSUBSBrm: [ 0.00 0.00 ]
+Key: VPSUBSBrr: [ 0.00 0.00 ]
+Key: VPSUBSWYrm: [ 0.00 0.00 ]
+Key: VPSUBSWYrr: [ 0.00 0.00 ]
+Key: VPSUBSWZ: [ 0.00 0.00 ]
+Key: VPSUBSWZrm: [ 0.00 0.00 ]
+Key: VPSUBSWZrmk: [ 0.00 0.00 ]
+Key: VPSUBSWZrmkz: [ 0.00 0.00 ]
+Key: VPSUBSWZrr: [ 0.00 0.00 ]
+Key: VPSUBSWZrrk: [ 0.00 0.00 ]
+Key: VPSUBSWZrrkz: [ 0.00 0.00 ]
+Key: VPSUBSWrm: [ 0.00 0.00 ]
+Key: VPSUBSWrr: [ 0.00 0.00 ]
+Key: VPSUBUSBYrm: [ 0.00 0.00 ]
+Key: VPSUBUSBYrr: [ 0.00 0.00 ]
+Key: VPSUBUSBZ: [ 0.00 0.00 ]
+Key: VPSUBUSBZrm: [ 0.00 0.00 ]
+Key: VPSUBUSBZrmk: [ 0.00 0.00 ]
+Key: VPSUBUSBZrmkz: [ 0.00 0.00 ]
+Key: VPSUBUSBZrr: [ 0.00 0.00 ]
+Key: VPSUBUSBZrrk: [ 0.00 0.00 ]
+Key: VPSUBUSBZrrkz: [ 0.00 0.00 ]
+Key: VPSUBUSBrm: [ 0.00 0.00 ]
+Key: VPSUBUSBrr: [ 0.00 0.00 ]
+Key: VPSUBUSWYrm: [ 0.00 0.00 ]
+Key: VPSUBUSWYrr: [ 0.00 0.00 ]
+Key: VPSUBUSWZ: [ 0.00 0.00 ]
+Key: VPSUBUSWZrm: [ 0.00 0.00 ]
+Key: VPSUBUSWZrmk: [ 0.00 0.00 ]
+Key: VPSUBUSWZrmkz: [ 0.00 0.00 ]
+Key: VPSUBUSWZrr: [ 0.00 0.00 ]
+Key: VPSUBUSWZrrk: [ 0.00 0.00 ]
+Key: VPSUBUSWZrrkz: [ 0.00 0.00 ]
+Key: VPSUBUSWrm: [ 0.00 0.00 ]
+Key: VPSUBUSWrr: [ 0.00 0.00 ]
+Key: VPSUBWYrm: [ 0.00 0.00 ]
+Key: VPSUBWYrr: [ 0.00 0.00 ]
+Key: VPSUBWZ: [ 0.00 0.00 ]
+Key: VPSUBWZrm: [ 0.00 0.00 ]
+Key: VPSUBWZrmk: [ 0.00 0.00 ]
+Key: VPSUBWZrmkz: [ 0.00 0.00 ]
+Key: VPSUBWZrr: [ 0.00 0.00 ]
+Key: VPSUBWZrrk: [ 0.00 0.00 ]
+Key: VPSUBWZrrkz: [ 0.00 0.00 ]
+Key: VPSUBWrm: [ 0.00 0.00 ]
+Key: VPSUBWrr: [ 0.00 0.00 ]
+Key: VPTERNLOGDZ: [ 0.00 0.00 ]
+Key: VPTERNLOGDZrmbi: [ 0.00 0.00 ]
+Key: VPTERNLOGDZrmbik: [ 0.00 0.00 ]
+Key: VPTERNLOGDZrmbikz: [ 0.00 0.00 ]
+Key: VPTERNLOGDZrmi: [ 0.00 0.00 ]
+Key: VPTERNLOGDZrmik: [ 0.00 0.00 ]
+Key: VPTERNLOGDZrmikz: [ 0.00 0.00 ]
+Key: VPTERNLOGDZrri: [ 0.00 0.00 ]
+Key: VPTERNLOGDZrrik: [ 0.00 0.00 ]
+Key: VPTERNLOGDZrrikz: [ 0.00 0.00 ]
+Key: VPTERNLOGQZ: [ 0.00 0.00 ]
+Key: VPTERNLOGQZrmbi: [ 0.00 0.00 ]
+Key: VPTERNLOGQZrmbik: [ 0.00 0.00 ]
+Key: VPTERNLOGQZrmbikz: [ 0.00 0.00 ]
+Key: VPTERNLOGQZrmi: [ 0.00 0.00 ]
+Key: VPTERNLOGQZrmik: [ 0.00 0.00 ]
+Key: VPTERNLOGQZrmikz: [ 0.00 0.00 ]
+Key: VPTERNLOGQZrri: [ 0.00 0.00 ]
+Key: VPTERNLOGQZrrik: [ 0.00 0.00 ]
+Key: VPTERNLOGQZrrikz: [ 0.00 0.00 ]
+Key: VPTESTMBZ: [ 0.00 0.00 ]
+Key: VPTESTMBZrm: [ 0.00 0.00 ]
+Key: VPTESTMBZrmk: [ 0.00 0.00 ]
+Key: VPTESTMBZrr: [ 0.00 0.00 ]
+Key: VPTESTMBZrrk: [ 0.00 0.00 ]
+Key: VPTESTMDZ: [ 0.00 0.00 ]
+Key: VPTESTMDZrm: [ 0.00 0.00 ]
+Key: VPTESTMDZrmb: [ 0.00 0.00 ]
+Key: VPTESTMDZrmbk: [ 0.00 0.00 ]
+Key: VPTESTMDZrmk: [ 0.00 0.00 ]
+Key: VPTESTMDZrr: [ 0.00 0.00 ]
+Key: VPTESTMDZrrk: [ 0.00 0.00 ]
+Key: VPTESTMQZ: [ 0.00 0.00 ]
+Key: VPTESTMQZrm: [ 0.00 0.00 ]
+Key: VPTESTMQZrmb: [ 0.00 0.00 ]
+Key: VPTESTMQZrmbk: [ 0.00 0.00 ]
+Key: VPTESTMQZrmk: [ 0.00 0.00 ]
+Key: VPTESTMQZrr: [ 0.00 0.00 ]
+Key: VPTESTMQZrrk: [ 0.00 0.00 ]
+Key: VPTESTMWZ: [ 0.00 0.00 ]
+Key: VPTESTMWZrm: [ 0.00 0.00 ]
+Key: VPTESTMWZrmk: [ 0.00 0.00 ]
+Key: VPTESTMWZrr: [ 0.00 0.00 ]
+Key: VPTESTMWZrrk: [ 0.00 0.00 ]
+Key: VPTESTNMBZ: [ 0.00 0.00 ]
+Key: VPTESTNMBZrm: [ 0.00 0.00 ]
+Key: VPTESTNMBZrmk: [ 0.00 0.00 ]
+Key: VPTESTNMBZrr: [ 0.00 0.00 ]
+Key: VPTESTNMBZrrk: [ 0.00 0.00 ]
+Key: VPTESTNMDZ: [ 0.00 0.00 ]
+Key: VPTESTNMDZrm: [ 0.00 0.00 ]
+Key: VPTESTNMDZrmb: [ 0.00 0.00 ]
+Key: VPTESTNMDZrmbk: [ 0.00 0.00 ]
+Key: VPTESTNMDZrmk: [ 0.00 0.00 ]
+Key: VPTESTNMDZrr: [ 0.00 0.00 ]
+Key: VPTESTNMDZrrk: [ 0.00 0.00 ]
+Key: VPTESTNMQZ: [ 0.00 0.00 ]
+Key: VPTESTNMQZrm: [ 0.00 0.00 ]
+Key: VPTESTNMQZrmb: [ 0.00 0.00 ]
+Key: VPTESTNMQZrmbk: [ 0.00 0.00 ]
+Key: VPTESTNMQZrmk: [ 0.00 0.00 ]
+Key: VPTESTNMQZrr: [ 0.00 0.00 ]
+Key: VPTESTNMQZrrk: [ 0.00 0.00 ]
+Key: VPTESTNMWZ: [ 0.00 0.00 ]
+Key: VPTESTNMWZrm: [ 0.00 0.00 ]
+Key: VPTESTNMWZrmk: [ 0.00 0.00 ]
+Key: VPTESTNMWZrr: [ 0.00 0.00 ]
+Key: VPTESTNMWZrrk: [ 0.00 0.00 ]
+Key: VPTESTYrm: [ 0.00 0.00 ]
+Key: VPTESTYrr: [ 0.00 0.00 ]
+Key: VPTESTrm: [ 0.00 0.00 ]
+Key: VPTESTrr: [ 0.00 0.00 ]
+Key: VPUNPCKHBWYrm: [ 0.00 0.00 ]
+Key: VPUNPCKHBWYrr: [ 0.00 0.00 ]
+Key: VPUNPCKHBWZ: [ 0.00 0.00 ]
+Key: VPUNPCKHBWZrm: [ 0.00 0.00 ]
+Key: VPUNPCKHBWZrmk: [ 0.00 0.00 ]
+Key: VPUNPCKHBWZrmkz: [ 0.00 0.00 ]
+Key: VPUNPCKHBWZrr: [ 0.00 0.00 ]
+Key: VPUNPCKHBWZrrk: [ 0.00 0.00 ]
+Key: VPUNPCKHBWZrrkz: [ 0.00 0.00 ]
+Key: VPUNPCKHBWrm: [ 0.00 0.00 ]
+Key: VPUNPCKHBWrr: [ 0.00 0.00 ]
+Key: VPUNPCKHDQYrm: [ 0.00 0.00 ]
+Key: VPUNPCKHDQYrr: [ 0.00 0.00 ]
+Key: VPUNPCKHDQZ: [ 0.00 0.00 ]
+Key: VPUNPCKHDQZrm: [ 0.00 0.00 ]
+Key: VPUNPCKHDQZrmb: [ 0.00 0.00 ]
+Key: VPUNPCKHDQZrmbk: [ 0.00 0.00 ]
+Key: VPUNPCKHDQZrmbkz: [ 0.00 0.00 ]
+Key: VPUNPCKHDQZrmk: [ 0.00 0.00 ]
+Key: VPUNPCKHDQZrmkz: [ 0.00 0.00 ]
+Key: VPUNPCKHDQZrr: [ 0.00 0.00 ]
+Key: VPUNPCKHDQZrrk: [ 0.00 0.00 ]
+Key: VPUNPCKHDQZrrkz: [ 0.00 0.00 ]
+Key: VPUNPCKHDQrm: [ 0.00 0.00 ]
+Key: VPUNPCKHDQrr: [ 0.00 0.00 ]
+Key: VPUNPCKHQDQYrm: [ 0.00 0.00 ]
+Key: VPUNPCKHQDQYrr: [ 0.00 0.00 ]
+Key: VPUNPCKHQDQZ: [ 0.00 0.00 ]
+Key: VPUNPCKHQDQZrm: [ 0.00 0.00 ]
+Key: VPUNPCKHQDQZrmb: [ 0.00 0.00 ]
+Key: VPUNPCKHQDQZrmbk: [ 0.00 0.00 ]
+Key: VPUNPCKHQDQZrmbkz: [ 0.00 0.00 ]
+Key: VPUNPCKHQDQZrmk: [ 0.00 0.00 ]
+Key: VPUNPCKHQDQZrmkz: [ 0.00 0.00 ]
+Key: VPUNPCKHQDQZrr: [ 0.00 0.00 ]
+Key: VPUNPCKHQDQZrrk: [ 0.00 0.00 ]
+Key: VPUNPCKHQDQZrrkz: [ 0.00 0.00 ]
+Key: VPUNPCKHQDQrm: [ 0.00 0.00 ]
+Key: VPUNPCKHQDQrr: [ 0.00 0.00 ]
+Key: VPUNPCKHWDYrm: [ 0.00 0.00 ]
+Key: VPUNPCKHWDYrr: [ 0.00 0.00 ]
+Key: VPUNPCKHWDZ: [ 0.00 0.00 ]
+Key: VPUNPCKHWDZrm: [ 0.00 0.00 ]
+Key: VPUNPCKHWDZrmk: [ 0.00 0.00 ]
+Key: VPUNPCKHWDZrmkz: [ 0.00 0.00 ]
+Key: VPUNPCKHWDZrr: [ 0.00 0.00 ]
+Key: VPUNPCKHWDZrrk: [ 0.00 0.00 ]
+Key: VPUNPCKHWDZrrkz: [ 0.00 0.00 ]
+Key: VPUNPCKHWDrm: [ 0.00 0.00 ]
+Key: VPUNPCKHWDrr: [ 0.00 0.00 ]
+Key: VPUNPCKLBWYrm: [ 0.00 0.00 ]
+Key: VPUNPCKLBWYrr: [ 0.00 0.00 ]
+Key: VPUNPCKLBWZ: [ 0.00 0.00 ]
+Key: VPUNPCKLBWZrm: [ 0.00 0.00 ]
+Key: VPUNPCKLBWZrmk: [ 0.00 0.00 ]
+Key: VPUNPCKLBWZrmkz: [ 0.00 0.00 ]
+Key: VPUNPCKLBWZrr: [ 0.00 0.00 ]
+Key: VPUNPCKLBWZrrk: [ 0.00 0.00 ]
+Key: VPUNPCKLBWZrrkz: [ 0.00 0.00 ]
+Key: VPUNPCKLBWrm: [ 0.00 0.00 ]
+Key: VPUNPCKLBWrr: [ 0.00 0.00 ]
+Key: VPUNPCKLDQYrm: [ 0.00 0.00 ]
+Key: VPUNPCKLDQYrr: [ 0.00 0.00 ]
+Key: VPUNPCKLDQZ: [ 0.00 0.00 ]
+Key: VPUNPCKLDQZrm: [ 0.00 0.00 ]
+Key: VPUNPCKLDQZrmb: [ 0.00 0.00 ]
+Key: VPUNPCKLDQZrmbk: [ 0.00 0.00 ]
+Key: VPUNPCKLDQZrmbkz: [ 0.00 0.00 ]
+Key: VPUNPCKLDQZrmk: [ 0.00 0.00 ]
+Key: VPUNPCKLDQZrmkz: [ 0.00 0.00 ]
+Key: VPUNPCKLDQZrr: [ 0.00 0.00 ]
+Key: VPUNPCKLDQZrrk: [ 0.00 0.00 ]
+Key: VPUNPCKLDQZrrkz: [ 0.00 0.00 ]
+Key: VPUNPCKLDQrm: [ 0.00 0.00 ]
+Key: VPUNPCKLDQrr: [ 0.00 0.00 ]
+Key: VPUNPCKLQDQYrm: [ 0.00 0.00 ]
+Key: VPUNPCKLQDQYrr: [ 0.00 0.00 ]
+Key: VPUNPCKLQDQZ: [ 0.00 0.00 ]
+Key: VPUNPCKLQDQZrm: [ 0.00 0.00 ]
+Key: VPUNPCKLQDQZrmb: [ 0.00 0.00 ]
+Key: VPUNPCKLQDQZrmbk: [ 0.00 0.00 ]
+Key: VPUNPCKLQDQZrmbkz: [ 0.00 0.00 ]
+Key: VPUNPCKLQDQZrmk: [ 0.00 0.00 ]
+Key: VPUNPCKLQDQZrmkz: [ 0.00 0.00 ]
+Key: VPUNPCKLQDQZrr: [ 0.00 0.00 ]
+Key: VPUNPCKLQDQZrrk: [ 0.00 0.00 ]
+Key: VPUNPCKLQDQZrrkz: [ 0.00 0.00 ]
+Key: VPUNPCKLQDQrm: [ 0.00 0.00 ]
+Key: VPUNPCKLQDQrr: [ 0.00 0.00 ]
+Key: VPUNPCKLWDYrm: [ 0.00 0.00 ]
+Key: VPUNPCKLWDYrr: [ 0.00 0.00 ]
+Key: VPUNPCKLWDZ: [ 0.00 0.00 ]
+Key: VPUNPCKLWDZrm: [ 0.00 0.00 ]
+Key: VPUNPCKLWDZrmk: [ 0.00 0.00 ]
+Key: VPUNPCKLWDZrmkz: [ 0.00 0.00 ]
+Key: VPUNPCKLWDZrr: [ 0.00 0.00 ]
+Key: VPUNPCKLWDZrrk: [ 0.00 0.00 ]
+Key: VPUNPCKLWDZrrkz: [ 0.00 0.00 ]
+Key: VPUNPCKLWDrm: [ 0.00 0.00 ]
+Key: VPUNPCKLWDrr: [ 0.00 0.00 ]
+Key: VPXORDZ: [ 0.00 0.00 ]
+Key: VPXORDZrm: [ 0.00 0.00 ]
+Key: VPXORDZrmb: [ 0.00 0.00 ]
+Key: VPXORDZrmbk: [ 0.00 0.00 ]
+Key: VPXORDZrmbkz: [ 0.00 0.00 ]
+Key: VPXORDZrmk: [ 0.00 0.00 ]
+Key: VPXORDZrmkz: [ 0.00 0.00 ]
+Key: VPXORDZrr: [ 0.00 0.00 ]
+Key: VPXORDZrrk: [ 0.00 0.00 ]
+Key: VPXORDZrrkz: [ 0.00 0.00 ]
+Key: VPXORQZ: [ 0.00 0.00 ]
+Key: VPXORQZrm: [ 0.00 0.00 ]
+Key: VPXORQZrmb: [ 0.00 0.00 ]
+Key: VPXORQZrmbk: [ 0.00 0.00 ]
+Key: VPXORQZrmbkz: [ 0.00 0.00 ]
+Key: VPXORQZrmk: [ 0.00 0.00 ]
+Key: VPXORQZrmkz: [ 0.00 0.00 ]
+Key: VPXORQZrr: [ 0.00 0.00 ]
+Key: VPXORQZrrk: [ 0.00 0.00 ]
+Key: VPXORQZrrkz: [ 0.00 0.00 ]
+Key: VPXORYrm: [ 0.00 0.00 ]
+Key: VPXORYrr: [ 0.00 0.00 ]
+Key: VPXORrm: [ 0.00 0.00 ]
+Key: VPXORrr: [ 0.00 0.00 ]
+Key: VRANGEPDZ: [ 0.00 0.00 ]
+Key: VRANGEPDZrmbi: [ 0.00 0.00 ]
+Key: VRANGEPDZrmbik: [ 0.00 0.00 ]
+Key: VRANGEPDZrmbikz: [ 0.00 0.00 ]
+Key: VRANGEPDZrmi: [ 0.00 0.00 ]
+Key: VRANGEPDZrmik: [ 0.00 0.00 ]
+Key: VRANGEPDZrmikz: [ 0.00 0.00 ]
+Key: VRANGEPDZrri: [ 0.00 0.00 ]
+Key: VRANGEPDZrrib: [ 0.00 0.00 ]
+Key: VRANGEPDZrribk: [ 0.00 0.00 ]
+Key: VRANGEPDZrribkz: [ 0.00 0.00 ]
+Key: VRANGEPDZrrik: [ 0.00 0.00 ]
+Key: VRANGEPDZrrikz: [ 0.00 0.00 ]
+Key: VRANGEPSZ: [ 0.00 0.00 ]
+Key: VRANGEPSZrmbi: [ 0.00 0.00 ]
+Key: VRANGEPSZrmbik: [ 0.00 0.00 ]
+Key: VRANGEPSZrmbikz: [ 0.00 0.00 ]
+Key: VRANGEPSZrmi: [ 0.00 0.00 ]
+Key: VRANGEPSZrmik: [ 0.00 0.00 ]
+Key: VRANGEPSZrmikz: [ 0.00 0.00 ]
+Key: VRANGEPSZrri: [ 0.00 0.00 ]
+Key: VRANGEPSZrrib: [ 0.00 0.00 ]
+Key: VRANGEPSZrribk: [ 0.00 0.00 ]
+Key: VRANGEPSZrribkz: [ 0.00 0.00 ]
+Key: VRANGEPSZrrik: [ 0.00 0.00 ]
+Key: VRANGEPSZrrikz: [ 0.00 0.00 ]
+Key: VRANGESDZrmi: [ 0.00 0.00 ]
+Key: VRANGESDZrmik: [ 0.00 0.00 ]
+Key: VRANGESDZrmikz: [ 0.00 0.00 ]
+Key: VRANGESDZrri: [ 0.00 0.00 ]
+Key: VRANGESDZrrib: [ 0.00 0.00 ]
+Key: VRANGESDZrribk: [ 0.00 0.00 ]
+Key: VRANGESDZrribkz: [ 0.00 0.00 ]
+Key: VRANGESDZrrik: [ 0.00 0.00 ]
+Key: VRANGESDZrrikz: [ 0.00 0.00 ]
+Key: VRANGESSZrmi: [ 0.00 0.00 ]
+Key: VRANGESSZrmik: [ 0.00 0.00 ]
+Key: VRANGESSZrmikz: [ 0.00 0.00 ]
+Key: VRANGESSZrri: [ 0.00 0.00 ]
+Key: VRANGESSZrrib: [ 0.00 0.00 ]
+Key: VRANGESSZrribk: [ 0.00 0.00 ]
+Key: VRANGESSZrribkz: [ 0.00 0.00 ]
+Key: VRANGESSZrrik: [ 0.00 0.00 ]
+Key: VRANGESSZrrikz: [ 0.00 0.00 ]
+Key: VRCP: [ 0.00 0.00 ]
+Key: VRCPBF: [ 0.00 0.00 ]
+Key: VRCPPHZ: [ 0.00 0.00 ]
+Key: VRCPPHZm: [ 0.00 0.00 ]
+Key: VRCPPHZmb: [ 0.00 0.00 ]
+Key: VRCPPHZmbk: [ 0.00 0.00 ]
+Key: VRCPPHZmbkz: [ 0.00 0.00 ]
+Key: VRCPPHZmk: [ 0.00 0.00 ]
+Key: VRCPPHZmkz: [ 0.00 0.00 ]
+Key: VRCPPHZr: [ 0.00 0.00 ]
+Key: VRCPPHZrk: [ 0.00 0.00 ]
+Key: VRCPPHZrkz: [ 0.00 0.00 ]
+Key: VRCPPSYm: [ 0.00 0.00 ]
+Key: VRCPPSYr: [ 0.00 0.00 ]
+Key: VRCPPSm: [ 0.00 0.00 ]
+Key: VRCPPSr: [ 0.00 0.00 ]
+Key: VRCPSHZrm: [ 0.00 0.00 ]
+Key: VRCPSHZrmk: [ 0.00 0.00 ]
+Key: VRCPSHZrmkz: [ 0.00 0.00 ]
+Key: VRCPSHZrr: [ 0.00 0.00 ]
+Key: VRCPSHZrrk: [ 0.00 0.00 ]
+Key: VRCPSHZrrkz: [ 0.00 0.00 ]
+Key: VRCPSSm: [ 0.00 0.00 ]
+Key: VRCPSSm_Int: [ 0.00 0.00 ]
+Key: VRCPSSr: [ 0.00 0.00 ]
+Key: VRCPSSr_Int: [ 0.00 0.00 ]
+Key: VREDUCEBF: [ 0.00 0.00 ]
+Key: VREDUCEPDZ: [ 0.00 0.00 ]
+Key: VREDUCEPDZrmbi: [ 0.00 0.00 ]
+Key: VREDUCEPDZrmbik: [ 0.00 0.00 ]
+Key: VREDUCEPDZrmbikz: [ 0.00 0.00 ]
+Key: VREDUCEPDZrmi: [ 0.00 0.00 ]
+Key: VREDUCEPDZrmik: [ 0.00 0.00 ]
+Key: VREDUCEPDZrmikz: [ 0.00 0.00 ]
+Key: VREDUCEPDZrri: [ 0.00 0.00 ]
+Key: VREDUCEPDZrrib: [ 0.00 0.00 ]
+Key: VREDUCEPDZrribk: [ 0.00 0.00 ]
+Key: VREDUCEPDZrribkz: [ 0.00 0.00 ]
+Key: VREDUCEPDZrrik: [ 0.00 0.00 ]
+Key: VREDUCEPDZrrikz: [ 0.00 0.00 ]
+Key: VREDUCEPHZ: [ 0.00 0.00 ]
+Key: VREDUCEPHZrmbi: [ 0.00 0.00 ]
+Key: VREDUCEPHZrmbik: [ 0.00 0.00 ]
+Key: VREDUCEPHZrmbikz: [ 0.00 0.00 ]
+Key: VREDUCEPHZrmi: [ 0.00 0.00 ]
+Key: VREDUCEPHZrmik: [ 0.00 0.00 ]
+Key: VREDUCEPHZrmikz: [ 0.00 0.00 ]
+Key: VREDUCEPHZrri: [ 0.00 0.00 ]
+Key: VREDUCEPHZrrib: [ 0.00 0.00 ]
+Key: VREDUCEPHZrribk: [ 0.00 0.00 ]
+Key: VREDUCEPHZrribkz: [ 0.00 0.00 ]
+Key: VREDUCEPHZrrik: [ 0.00 0.00 ]
+Key: VREDUCEPHZrrikz: [ 0.00 0.00 ]
+Key: VREDUCEPSZ: [ 0.00 0.00 ]
+Key: VREDUCEPSZrmbi: [ 0.00 0.00 ]
+Key: VREDUCEPSZrmbik: [ 0.00 0.00 ]
+Key: VREDUCEPSZrmbikz: [ 0.00 0.00 ]
+Key: VREDUCEPSZrmi: [ 0.00 0.00 ]
+Key: VREDUCEPSZrmik: [ 0.00 0.00 ]
+Key: VREDUCEPSZrmikz: [ 0.00 0.00 ]
+Key: VREDUCEPSZrri: [ 0.00 0.00 ]
+Key: VREDUCEPSZrrib: [ 0.00 0.00 ]
+Key: VREDUCEPSZrribk: [ 0.00 0.00 ]
+Key: VREDUCEPSZrribkz: [ 0.00 0.00 ]
+Key: VREDUCEPSZrrik: [ 0.00 0.00 ]
+Key: VREDUCEPSZrrikz: [ 0.00 0.00 ]
+Key: VREDUCESDZrmi: [ 0.00 0.00 ]
+Key: VREDUCESDZrmik: [ 0.00 0.00 ]
+Key: VREDUCESDZrmikz: [ 0.00 0.00 ]
+Key: VREDUCESDZrri: [ 0.00 0.00 ]
+Key: VREDUCESDZrrib: [ 0.00 0.00 ]
+Key: VREDUCESDZrribk: [ 0.00 0.00 ]
+Key: VREDUCESDZrribkz: [ 0.00 0.00 ]
+Key: VREDUCESDZrrik: [ 0.00 0.00 ]
+Key: VREDUCESDZrrikz: [ 0.00 0.00 ]
+Key: VREDUCESHZrmi: [ 0.00 0.00 ]
+Key: VREDUCESHZrmik: [ 0.00 0.00 ]
+Key: VREDUCESHZrmikz: [ 0.00 0.00 ]
+Key: VREDUCESHZrri: [ 0.00 0.00 ]
+Key: VREDUCESHZrrib: [ 0.00 0.00 ]
+Key: VREDUCESHZrribk: [ 0.00 0.00 ]
+Key: VREDUCESHZrribkz: [ 0.00 0.00 ]
+Key: VREDUCESHZrrik: [ 0.00 0.00 ]
+Key: VREDUCESHZrrikz: [ 0.00 0.00 ]
+Key: VREDUCESSZrmi: [ 0.00 0.00 ]
+Key: VREDUCESSZrmik: [ 0.00 0.00 ]
+Key: VREDUCESSZrmikz: [ 0.00 0.00 ]
+Key: VREDUCESSZrri: [ 0.00 0.00 ]
+Key: VREDUCESSZrrib: [ 0.00 0.00 ]
+Key: VREDUCESSZrribk: [ 0.00 0.00 ]
+Key: VREDUCESSZrribkz: [ 0.00 0.00 ]
+Key: VREDUCESSZrrik: [ 0.00 0.00 ]
+Key: VREDUCESSZrrikz: [ 0.00 0.00 ]
+Key: VRNDSCALEBF: [ 0.00 0.00 ]
+Key: VRNDSCALEPDZ: [ 0.00 0.00 ]
+Key: VRNDSCALEPDZrmbi: [ 0.00 0.00 ]
+Key: VRNDSCALEPDZrmbik: [ 0.00 0.00 ]
+Key: VRNDSCALEPDZrmbikz: [ 0.00 0.00 ]
+Key: VRNDSCALEPDZrmi: [ 0.00 0.00 ]
+Key: VRNDSCALEPDZrmik: [ 0.00 0.00 ]
+Key: VRNDSCALEPDZrmikz: [ 0.00 0.00 ]
+Key: VRNDSCALEPDZrri: [ 0.00 0.00 ]
+Key: VRNDSCALEPDZrrib: [ 0.00 0.00 ]
+Key: VRNDSCALEPDZrribk: [ 0.00 0.00 ]
+Key: VRNDSCALEPDZrribkz: [ 0.00 0.00 ]
+Key: VRNDSCALEPDZrrik: [ 0.00 0.00 ]
+Key: VRNDSCALEPDZrrikz: [ 0.00 0.00 ]
+Key: VRNDSCALEPHZ: [ 0.00 0.00 ]
+Key: VRNDSCALEPHZrmbi: [ 0.00 0.00 ]
+Key: VRNDSCALEPHZrmbik: [ 0.00 0.00 ]
+Key: VRNDSCALEPHZrmbikz: [ 0.00 0.00 ]
+Key: VRNDSCALEPHZrmi: [ 0.00 0.00 ]
+Key: VRNDSCALEPHZrmik: [ 0.00 0.00 ]
+Key: VRNDSCALEPHZrmikz: [ 0.00 0.00 ]
+Key: VRNDSCALEPHZrri: [ 0.00 0.00 ]
+Key: VRNDSCALEPHZrrib: [ 0.00 0.00 ]
+Key: VRNDSCALEPHZrribk: [ 0.00 0.00 ]
+Key: VRNDSCALEPHZrribkz: [ 0.00 0.00 ]
+Key: VRNDSCALEPHZrrik: [ 0.00 0.00 ]
+Key: VRNDSCALEPHZrrikz: [ 0.00 0.00 ]
+Key: VRNDSCALEPSZ: [ 0.00 0.00 ]
+Key: VRNDSCALEPSZrmbi: [ 0.00 0.00 ]
+Key: VRNDSCALEPSZrmbik: [ 0.00 0.00 ]
+Key: VRNDSCALEPSZrmbikz: [ 0.00 0.00 ]
+Key: VRNDSCALEPSZrmi: [ 0.00 0.00 ]
+Key: VRNDSCALEPSZrmik: [ 0.00 0.00 ]
+Key: VRNDSCALEPSZrmikz: [ 0.00 0.00 ]
+Key: VRNDSCALEPSZrri: [ 0.00 0.00 ]
+Key: VRNDSCALEPSZrrib: [ 0.00 0.00 ]
+Key: VRNDSCALEPSZrribk: [ 0.00 0.00 ]
+Key: VRNDSCALEPSZrribkz: [ 0.00 0.00 ]
+Key: VRNDSCALEPSZrrik: [ 0.00 0.00 ]
+Key: VRNDSCALEPSZrrikz: [ 0.00 0.00 ]
+Key: VRNDSCALESDZrmi: [ 0.00 0.00 ]
+Key: VRNDSCALESDZrmi_Int: [ 0.00 0.00 ]
+Key: VRNDSCALESDZrmik_Int: [ 0.00 0.00 ]
+Key: VRNDSCALESDZrmikz_Int: [ 0.00 0.00 ]
+Key: VRNDSCALESDZrri: [ 0.00 0.00 ]
+Key: VRNDSCALESDZrri_Int: [ 0.00 0.00 ]
+Key: VRNDSCALESDZrrib_Int: [ 0.00 0.00 ]
+Key: VRNDSCALESDZrribk_Int: [ 0.00 0.00 ]
+Key: VRNDSCALESDZrribkz_Int: [ 0.00 0.00 ]
+Key: VRNDSCALESDZrrik_Int: [ 0.00 0.00 ]
+Key: VRNDSCALESDZrrikz_Int: [ 0.00 0.00 ]
+Key: VRNDSCALESHZrmi: [ 0.00 0.00 ]
+Key: VRNDSCALESHZrmi_Int: [ 0.00 0.00 ]
+Key: VRNDSCALESHZrmik_Int: [ 0.00 0.00 ]
+Key: VRNDSCALESHZrmikz_Int: [ 0.00 0.00 ]
+Key: VRNDSCALESHZrri: [ 0.00 0.00 ]
+Key: VRNDSCALESHZrri_Int: [ 0.00 0.00 ]
+Key: VRNDSCALESHZrrib_Int: [ 0.00 0.00 ]
+Key: VRNDSCALESHZrribk_Int: [ 0.00 0.00 ]
+Key: VRNDSCALESHZrribkz_Int: [ 0.00 0.00 ]
+Key: VRNDSCALESHZrrik_Int: [ 0.00 0.00 ]
+Key: VRNDSCALESHZrrikz_Int: [ 0.00 0.00 ]
+Key: VRNDSCALESSZrmi: [ 0.00 0.00 ]
+Key: VRNDSCALESSZrmi_Int: [ 0.00 0.00 ]
+Key: VRNDSCALESSZrmik_Int: [ 0.00 0.00 ]
+Key: VRNDSCALESSZrmikz_Int: [ 0.00 0.00 ]
+Key: VRNDSCALESSZrri: [ 0.00 0.00 ]
+Key: VRNDSCALESSZrri_Int: [ 0.00 0.00 ]
+Key: VRNDSCALESSZrrib_Int: [ 0.00 0.00 ]
+Key: VRNDSCALESSZrribk_Int: [ 0.00 0.00 ]
+Key: VRNDSCALESSZrribkz_Int: [ 0.00 0.00 ]
+Key: VRNDSCALESSZrrik_Int: [ 0.00 0.00 ]
+Key: VRNDSCALESSZrrikz_Int: [ 0.00 0.00 ]
+Key: VROUNDPDYmi: [ 0.00 0.00 ]
+Key: VROUNDPDYri: [ 0.00 0.00 ]
+Key: VROUNDPDmi: [ 0.00 0.00 ]
+Key: VROUNDPDri: [ 0.00 0.00 ]
+Key: VROUNDPSYmi: [ 0.00 0.00 ]
+Key: VROUNDPSYri: [ 0.00 0.00 ]
+Key: VROUNDPSmi: [ 0.00 0.00 ]
+Key: VROUNDPSri: [ 0.00 0.00 ]
+Key: VROUNDSDmi: [ 0.00 0.00 ]
+Key: VROUNDSDmi_Int: [ 0.00 0.00 ]
+Key: VROUNDSDri: [ 0.00 0.00 ]
+Key: VROUNDSDri_Int: [ 0.00 0.00 ]
+Key: VROUNDSSmi: [ 0.00 0.00 ]
+Key: VROUNDSSmi_Int: [ 0.00 0.00 ]
+Key: VROUNDSSri: [ 0.00 0.00 ]
+Key: VROUNDSSri_Int: [ 0.00 0.00 ]
+Key: VRSQRT: [ 0.00 0.00 ]
+Key: VRSQRTBF: [ 0.00 0.00 ]
+Key: VRSQRTPHZ: [ 0.00 0.00 ]
+Key: VRSQRTPHZm: [ 0.00 0.00 ]
+Key: VRSQRTPHZmb: [ 0.00 0.00 ]
+Key: VRSQRTPHZmbk: [ 0.00 0.00 ]
+Key: VRSQRTPHZmbkz: [ 0.00 0.00 ]
+Key: VRSQRTPHZmk: [ 0.00 0.00 ]
+Key: VRSQRTPHZmkz: [ 0.00 0.00 ]
+Key: VRSQRTPHZr: [ 0.00 0.00 ]
+Key: VRSQRTPHZrk: [ 0.00 0.00 ]
+Key: VRSQRTPHZrkz: [ 0.00 0.00 ]
+Key: VRSQRTPSYm: [ 0.00 0.00 ]
+Key: VRSQRTPSYr: [ 0.00 0.00 ]
+Key: VRSQRTPSm: [ 0.00 0.00 ]
+Key: VRSQRTPSr: [ 0.00 0.00 ]
+Key: VRSQRTSHZrm: [ 0.00 0.00 ]
+Key: VRSQRTSHZrmk: [ 0.00 0.00 ]
+Key: VRSQRTSHZrmkz: [ 0.00 0.00 ]
+Key: VRSQRTSHZrr: [ 0.00 0.00 ]
+Key: VRSQRTSHZrrk: [ 0.00 0.00 ]
+Key: VRSQRTSHZrrkz: [ 0.00 0.00 ]
+Key: VRSQRTSSm: [ 0.00 0.00 ]
+Key: VRSQRTSSm_Int: [ 0.00 0.00 ]
+Key: VRSQRTSSr: [ 0.00 0.00 ]
+Key: VRSQRTSSr_Int: [ 0.00 0.00 ]
+Key: VSCALEFBF: [ 0.00 0.00 ]
+Key: VSCALEFPDZ: [ 0.00 0.00 ]
+Key: VSCALEFPDZrm: [ 0.00 0.00 ]
+Key: VSCALEFPDZrmb: [ 0.00 0.00 ]
+Key: VSCALEFPDZrmbk: [ 0.00 0.00 ]
+Key: VSCALEFPDZrmbkz: [ 0.00 0.00 ]
+Key: VSCALEFPDZrmk: [ 0.00 0.00 ]
+Key: VSCALEFPDZrmkz: [ 0.00 0.00 ]
+Key: VSCALEFPDZrr: [ 0.00 0.00 ]
+Key: VSCALEFPDZrrb: [ 0.00 0.00 ]
+Key: VSCALEFPDZrrbk: [ 0.00 0.00 ]
+Key: VSCALEFPDZrrbkz: [ 0.00 0.00 ]
+Key: VSCALEFPDZrrk: [ 0.00 0.00 ]
+Key: VSCALEFPDZrrkz: [ 0.00 0.00 ]
+Key: VSCALEFPHZ: [ 0.00 0.00 ]
+Key: VSCALEFPHZrm: [ 0.00 0.00 ]
+Key: VSCALEFPHZrmb: [ 0.00 0.00 ]
+Key: VSCALEFPHZrmbk: [ 0.00 0.00 ]
+Key: VSCALEFPHZrmbkz: [ 0.00 0.00 ]
+Key: VSCALEFPHZrmk: [ 0.00 0.00 ]
+Key: VSCALEFPHZrmkz: [ 0.00 0.00 ]
+Key: VSCALEFPHZrr: [ 0.00 0.00 ]
+Key: VSCALEFPHZrrb: [ 0.00 0.00 ]
+Key: VSCALEFPHZrrbk: [ 0.00 0.00 ]
+Key: VSCALEFPHZrrbkz: [ 0.00 0.00 ]
+Key: VSCALEFPHZrrk: [ 0.00 0.00 ]
+Key: VSCALEFPHZrrkz: [ 0.00 0.00 ]
+Key: VSCALEFPSZ: [ 0.00 0.00 ]
+Key: VSCALEFPSZrm: [ 0.00 0.00 ]
+Key: VSCALEFPSZrmb: [ 0.00 0.00 ]
+Key: VSCALEFPSZrmbk: [ 0.00 0.00 ]
+Key: VSCALEFPSZrmbkz: [ 0.00 0.00 ]
+Key: VSCALEFPSZrmk: [ 0.00 0.00 ]
+Key: VSCALEFPSZrmkz: [ 0.00 0.00 ]
+Key: VSCALEFPSZrr: [ 0.00 0.00 ]
+Key: VSCALEFPSZrrb: [ 0.00 0.00 ]
+Key: VSCALEFPSZrrbk: [ 0.00 0.00 ]
+Key: VSCALEFPSZrrbkz: [ 0.00 0.00 ]
+Key: VSCALEFPSZrrk: [ 0.00 0.00 ]
+Key: VSCALEFPSZrrkz: [ 0.00 0.00 ]
+Key: VSCALEFSDZrm: [ 0.00 0.00 ]
+Key: VSCALEFSDZrmk: [ 0.00 0.00 ]
+Key: VSCALEFSDZrmkz: [ 0.00 0.00 ]
+Key: VSCALEFSDZrr: [ 0.00 0.00 ]
+Key: VSCALEFSDZrrb_Int: [ 0.00 0.00 ]
+Key: VSCALEFSDZrrbk_Int: [ 0.00 0.00 ]
+Key: VSCALEFSDZrrbkz_Int: [ 0.00 0.00 ]
+Key: VSCALEFSDZrrk: [ 0.00 0.00 ]
+Key: VSCALEFSDZrrkz: [ 0.00 0.00 ]
+Key: VSCALEFSHZrm: [ 0.00 0.00 ]
+Key: VSCALEFSHZrmk: [ 0.00 0.00 ]
+Key: VSCALEFSHZrmkz: [ 0.00 0.00 ]
+Key: VSCALEFSHZrr: [ 0.00 0.00 ]
+Key: VSCALEFSHZrrb_Int: [ 0.00 0.00 ]
+Key: VSCALEFSHZrrbk_Int: [ 0.00 0.00 ]
+Key: VSCALEFSHZrrbkz_Int: [ 0.00 0.00 ]
+Key: VSCALEFSHZrrk: [ 0.00 0.00 ]
+Key: VSCALEFSHZrrkz: [ 0.00 0.00 ]
+Key: VSCALEFSSZrm: [ 0.00 0.00 ]
+Key: VSCALEFSSZrmk: [ 0.00 0.00 ]
+Key: VSCALEFSSZrmkz: [ 0.00 0.00 ]
+Key: VSCALEFSSZrr: [ 0.00 0.00 ]
+Key: VSCALEFSSZrrb_Int: [ 0.00 0.00 ]
+Key: VSCALEFSSZrrbk_Int: [ 0.00 0.00 ]
+Key: VSCALEFSSZrrbkz_Int: [ 0.00 0.00 ]
+Key: VSCALEFSSZrrk: [ 0.00 0.00 ]
+Key: VSCALEFSSZrrkz: [ 0.00 0.00 ]
+Key: VSCATTERDPDZ: [ 0.00 0.00 ]
+Key: VSCATTERDPDZmr: [ 0.00 0.00 ]
+Key: VSCATTERDPSZ: [ 0.00 0.00 ]
+Key: VSCATTERDPSZmr: [ 0.00 0.00 ]
+Key: VSCATTERPF: [ 0.00 0.00 ]
+Key: VSCATTERQPDZ: [ 0.00 0.00 ]
+Key: VSCATTERQPDZmr: [ 0.00 0.00 ]
+Key: VSCATTERQPSZ: [ 0.00 0.00 ]
+Key: VSCATTERQPSZmr: [ 0.00 0.00 ]
+Key: VSHA: [ 0.00 0.00 ]
+Key: VSHUFF: [ 0.00 0.00 ]
+Key: VSHUFI: [ 0.00 0.00 ]
+Key: VSHUFPDYrmi: [ 0.00 0.00 ]
+Key: VSHUFPDYrri: [ 0.00 0.00 ]
+Key: VSHUFPDZ: [ 0.00 0.00 ]
+Key: VSHUFPDZrmbi: [ 0.00 0.00 ]
+Key: VSHUFPDZrmbik: [ 0.00 0.00 ]
+Key: VSHUFPDZrmbikz: [ 0.00 0.00 ]
+Key: VSHUFPDZrmi: [ 0.00 0.00 ]
+Key: VSHUFPDZrmik: [ 0.00 0.00 ]
+Key: VSHUFPDZrmikz: [ 0.00 0.00 ]
+Key: VSHUFPDZrri: [ 0.00 0.00 ]
+Key: VSHUFPDZrrik: [ 0.00 0.00 ]
+Key: VSHUFPDZrrikz: [ 0.00 0.00 ]
+Key: VSHUFPDrmi: [ 0.00 0.00 ]
+Key: VSHUFPDrri: [ 0.00 0.00 ]
+Key: VSHUFPSYrmi: [ 0.00 0.00 ]
+Key: VSHUFPSYrri: [ 0.00 0.00 ]
+Key: VSHUFPSZ: [ 0.00 0.00 ]
+Key: VSHUFPSZrmbi: [ 0.00 0.00 ]
+Key: VSHUFPSZrmbik: [ 0.00 0.00 ]
+Key: VSHUFPSZrmbikz: [ 0.00 0.00 ]
+Key: VSHUFPSZrmi: [ 0.00 0.00 ]
+Key: VSHUFPSZrmik: [ 0.00 0.00 ]
+Key: VSHUFPSZrmikz: [ 0.00 0.00 ]
+Key: VSHUFPSZrri: [ 0.00 0.00 ]
+Key: VSHUFPSZrrik: [ 0.00 0.00 ]
+Key: VSHUFPSZrrikz: [ 0.00 0.00 ]
+Key: VSHUFPSrmi: [ 0.00 0.00 ]
+Key: VSHUFPSrri: [ 0.00 0.00 ]
+Key: VSM: [ 0.00 0.00 ]
+Key: VSQRTBF: [ 0.00 0.00 ]
+Key: VSQRTPDYm: [ 0.00 0.00 ]
+Key: VSQRTPDYr: [ 0.00 0.00 ]
+Key: VSQRTPDZ: [ 0.00 0.00 ]
+Key: VSQRTPDZm: [ 0.00 0.00 ]
+Key: VSQRTPDZmb: [ 0.00 0.00 ]
+Key: VSQRTPDZmbk: [ 0.00 0.00 ]
+Key: VSQRTPDZmbkz: [ 0.00 0.00 ]
+Key: VSQRTPDZmk: [ 0.00 0.00 ]
+Key: VSQRTPDZmkz: [ 0.00 0.00 ]
+Key: VSQRTPDZr: [ 0.00 0.00 ]
+Key: VSQRTPDZrb: [ 0.00 0.00 ]
+Key: VSQRTPDZrbk: [ 0.00 0.00 ]
+Key: VSQRTPDZrbkz: [ 0.00 0.00 ]
+Key: VSQRTPDZrk: [ 0.00 0.00 ]
+Key: VSQRTPDZrkz: [ 0.00 0.00 ]
+Key: VSQRTPDm: [ 0.00 0.00 ]
+Key: VSQRTPDr: [ 0.00 0.00 ]
+Key: VSQRTPHZ: [ 0.00 0.00 ]
+Key: VSQRTPHZm: [ 0.00 0.00 ]
+Key: VSQRTPHZmb: [ 0.00 0.00 ]
+Key: VSQRTPHZmbk: [ 0.00 0.00 ]
+Key: VSQRTPHZmbkz: [ 0.00 0.00 ]
+Key: VSQRTPHZmk: [ 0.00 0.00 ]
+Key: VSQRTPHZmkz: [ 0.00 0.00 ]
+Key: VSQRTPHZr: [ 0.00 0.00 ]
+Key: VSQRTPHZrb: [ 0.00 0.00 ]
+Key: VSQRTPHZrbk: [ 0.00 0.00 ]
+Key: VSQRTPHZrbkz: [ 0.00 0.00 ]
+Key: VSQRTPHZrk: [ 0.00 0.00 ]
+Key: VSQRTPHZrkz: [ 0.00 0.00 ]
+Key: VSQRTPSYm: [ 0.00 0.00 ]
+Key: VSQRTPSYr: [ 0.00 0.00 ]
+Key: VSQRTPSZ: [ 0.00 0.00 ]
+Key: VSQRTPSZm: [ 0.00 0.00 ]
+Key: VSQRTPSZmb: [ 0.00 0.00 ]
+Key: VSQRTPSZmbk: [ 0.00 0.00 ]
+Key: VSQRTPSZmbkz: [ 0.00 0.00 ]
+Key: VSQRTPSZmk: [ 0.00 0.00 ]
+Key: VSQRTPSZmkz: [ 0.00 0.00 ]
+Key: VSQRTPSZr: [ 0.00 0.00 ]
+Key: VSQRTPSZrb: [ 0.00 0.00 ]
+Key: VSQRTPSZrbk: [ 0.00 0.00 ]
+Key: VSQRTPSZrbkz: [ 0.00 0.00 ]
+Key: VSQRTPSZrk: [ 0.00 0.00 ]
+Key: VSQRTPSZrkz: [ 0.00 0.00 ]
+Key: VSQRTPSm: [ 0.00 0.00 ]
+Key: VSQRTPSr: [ 0.00 0.00 ]
+Key: VSQRTSDZm: [ 0.00 0.00 ]
+Key: VSQRTSDZm_Int: [ 0.00 0.00 ]
+Key: VSQRTSDZmk_Int: [ 0.00 0.00 ]
+Key: VSQRTSDZmkz_Int: [ 0.00 0.00 ]
+Key: VSQRTSDZr: [ 0.00 0.00 ]
+Key: VSQRTSDZr_Int: [ 0.00 0.00 ]
+Key: VSQRTSDZrb_Int: [ 0.00 0.00 ]
+Key: VSQRTSDZrbk_Int: [ 0.00 0.00 ]
+Key: VSQRTSDZrbkz_Int: [ 0.00 0.00 ]
+Key: VSQRTSDZrk_Int: [ 0.00 0.00 ]
+Key: VSQRTSDZrkz_Int: [ 0.00 0.00 ]
+Key: VSQRTSDm: [ 0.00 0.00 ]
+Key: VSQRTSDm_Int: [ 0.00 0.00 ]
+Key: VSQRTSDr: [ 0.00 0.00 ]
+Key: VSQRTSDr_Int: [ 0.00 0.00 ]
+Key: VSQRTSHZm: [ 0.00 0.00 ]
+Key: VSQRTSHZm_Int: [ 0.00 0.00 ]
+Key: VSQRTSHZmk_Int: [ 0.00 0.00 ]
+Key: VSQRTSHZmkz_Int: [ 0.00 0.00 ]
+Key: VSQRTSHZr: [ 0.00 0.00 ]
+Key: VSQRTSHZr_Int: [ 0.00 0.00 ]
+Key: VSQRTSHZrb_Int: [ 0.00 0.00 ]
+Key: VSQRTSHZrbk_Int: [ 0.00 0.00 ]
+Key: VSQRTSHZrbkz_Int: [ 0.00 0.00 ]
+Key: VSQRTSHZrk_Int: [ 0.00 0.00 ]
+Key: VSQRTSHZrkz_Int: [ 0.00 0.00 ]
+Key: VSQRTSSZm: [ 0.00 0.00 ]
+Key: VSQRTSSZm_Int: [ 0.00 0.00 ]
+Key: VSQRTSSZmk_Int: [ 0.00 0.00 ]
+Key: VSQRTSSZmkz_Int: [ 0.00 0.00 ]
+Key: VSQRTSSZr: [ 0.00 0.00 ]
+Key: VSQRTSSZr_Int: [ 0.00 0.00 ]
+Key: VSQRTSSZrb_Int: [ 0.00 0.00 ]
+Key: VSQRTSSZrbk_Int: [ 0.00 0.00 ]
+Key: VSQRTSSZrbkz_Int: [ 0.00 0.00 ]
+Key: VSQRTSSZrk_Int: [ 0.00 0.00 ]
+Key: VSQRTSSZrkz_Int: [ 0.00 0.00 ]
+Key: VSQRTSSm: [ 0.00 0.00 ]
+Key: VSQRTSSm_Int: [ 0.00 0.00 ]
+Key: VSQRTSSr: [ 0.00 0.00 ]
+Key: VSQRTSSr_Int: [ 0.00 0.00 ]
+Key: VSTMXCSR: [ 0.00 0.00 ]
+Key: VSUBBF: [ 0.00 0.00 ]
+Key: VSUBPDYrm: [ 0.00 0.00 ]
+Key: VSUBPDYrr: [ 0.00 0.00 ]
+Key: VSUBPDZ: [ 0.00 0.00 ]
+Key: VSUBPDZrm: [ 0.00 0.00 ]
+Key: VSUBPDZrmb: [ 0.00 0.00 ]
+Key: VSUBPDZrmbk: [ 0.00 0.00 ]
+Key: VSUBPDZrmbkz: [ 0.00 0.00 ]
+Key: VSUBPDZrmk: [ 0.00 0.00 ]
+Key: VSUBPDZrmkz: [ 0.00 0.00 ]
+Key: VSUBPDZrr: [ 0.00 0.00 ]
+Key: VSUBPDZrrb: [ 0.00 0.00 ]
+Key: VSUBPDZrrbk: [ 0.00 0.00 ]
+Key: VSUBPDZrrbkz: [ 0.00 0.00 ]
+Key: VSUBPDZrrk: [ 0.00 0.00 ]
+Key: VSUBPDZrrkz: [ 0.00 0.00 ]
+Key: VSUBPDrm: [ 0.00 0.00 ]
+Key: VSUBPDrr: [ 0.00 0.00 ]
+Key: VSUBPHZ: [ 0.00 0.00 ]
+Key: VSUBPHZrm: [ 0.00 0.00 ]
+Key: VSUBPHZrmb: [ 0.00 0.00 ]
+Key: VSUBPHZrmbk: [ 0.00 0.00 ]
+Key: VSUBPHZrmbkz: [ 0.00 0.00 ]
+Key: VSUBPHZrmk: [ 0.00 0.00 ]
+Key: VSUBPHZrmkz: [ 0.00 0.00 ]
+Key: VSUBPHZrr: [ 0.00 0.00 ]
+Key: VSUBPHZrrb: [ 0.00 0.00 ]
+Key: VSUBPHZrrbk: [ 0.00 0.00 ]
+Key: VSUBPHZrrbkz: [ 0.00 0.00 ]
+Key: VSUBPHZrrk: [ 0.00 0.00 ]
+Key: VSUBPHZrrkz: [ 0.00 0.00 ]
+Key: VSUBPSYrm: [ 0.00 0.00 ]
+Key: VSUBPSYrr: [ 0.00 0.00 ]
+Key: VSUBPSZ: [ 0.00 0.00 ]
+Key: VSUBPSZrm: [ 0.00 0.00 ]
+Key: VSUBPSZrmb: [ 0.00 0.00 ]
+Key: VSUBPSZrmbk: [ 0.00 0.00 ]
+Key: VSUBPSZrmbkz: [ 0.00 0.00 ]
+Key: VSUBPSZrmk: [ 0.00 0.00 ]
+Key: VSUBPSZrmkz: [ 0.00 0.00 ]
+Key: VSUBPSZrr: [ 0.00 0.00 ]
+Key: VSUBPSZrrb: [ 0.00 0.00 ]
+Key: VSUBPSZrrbk: [ 0.00 0.00 ]
+Key: VSUBPSZrrbkz: [ 0.00 0.00 ]
+Key: VSUBPSZrrk: [ 0.00 0.00 ]
+Key: VSUBPSZrrkz: [ 0.00 0.00 ]
+Key: VSUBPSrm: [ 0.00 0.00 ]
+Key: VSUBPSrr: [ 0.00 0.00 ]
+Key: VSUBSDZrm: [ 0.00 0.00 ]
+Key: VSUBSDZrm_Int: [ 0.00 0.00 ]
+Key: VSUBSDZrmk_Int: [ 0.00 0.00 ]
+Key: VSUBSDZrmkz_Int: [ 0.00 0.00 ]
+Key: VSUBSDZrr: [ 0.00 0.00 ]
+Key: VSUBSDZrr_Int: [ 0.00 0.00 ]
+Key: VSUBSDZrrb_Int: [ 0.00 0.00 ]
+Key: VSUBSDZrrbk_Int: [ 0.00 0.00 ]
+Key: VSUBSDZrrbkz_Int: [ 0.00 0.00 ]
+Key: VSUBSDZrrk_Int: [ 0.00 0.00 ]
+Key: VSUBSDZrrkz_Int: [ 0.00 0.00 ]
+Key: VSUBSDrm: [ 0.00 0.00 ]
+Key: VSUBSDrm_Int: [ 0.00 0.00 ]
+Key: VSUBSDrr: [ 0.00 0.00 ]
+Key: VSUBSDrr_Int: [ 0.00 0.00 ]
+Key: VSUBSHZrm: [ 0.00 0.00 ]
+Key: VSUBSHZrm_Int: [ 0.00 0.00 ]
+Key: VSUBSHZrmk_Int: [ 0.00 0.00 ]
+Key: VSUBSHZrmkz_Int: [ 0.00 0.00 ]
+Key: VSUBSHZrr: [ 0.00 0.00 ]
+Key: VSUBSHZrr_Int: [ 0.00 0.00 ]
+Key: VSUBSHZrrb_Int: [ 0.00 0.00 ]
+Key: VSUBSHZrrbk_Int: [ 0.00 0.00 ]
+Key: VSUBSHZrrbkz_Int: [ 0.00 0.00 ]
+Key: VSUBSHZrrk_Int: [ 0.00 0.00 ]
+Key: VSUBSHZrrkz_Int: [ 0.00 0.00 ]
+Key: VSUBSSZrm: [ 0.00 0.00 ]
+Key: VSUBSSZrm_Int: [ 0.00 0.00 ]
+Key: VSUBSSZrmk_Int: [ 0.00 0.00 ]
+Key: VSUBSSZrmkz_Int: [ 0.00 0.00 ]
+Key: VSUBSSZrr: [ 0.00 0.00 ]
+Key: VSUBSSZrr_Int: [ 0.00 0.00 ]
+Key: VSUBSSZrrb_Int: [ 0.00 0.00 ]
+Key: VSUBSSZrrbk_Int: [ 0.00 0.00 ]
+Key: VSUBSSZrrbkz_Int: [ 0.00 0.00 ]
+Key: VSUBSSZrrk_Int: [ 0.00 0.00 ]
+Key: VSUBSSZrrkz_Int: [ 0.00 0.00 ]
+Key: VSUBSSrm: [ 0.00 0.00 ]
+Key: VSUBSSrm_Int: [ 0.00 0.00 ]
+Key: VSUBSSrr: [ 0.00 0.00 ]
+Key: VSUBSSrr_Int: [ 0.00 0.00 ]
+Key: VTESTPDYrm: [ 0.00 0.00 ]
+Key: VTESTPDYrr: [ 0.00 0.00 ]
+Key: VTESTPDrm: [ 0.00 0.00 ]
+Key: VTESTPDrr: [ 0.00 0.00 ]
+Key: VTESTPSYrm: [ 0.00 0.00 ]
+Key: VTESTPSYrr: [ 0.00 0.00 ]
+Key: VTESTPSrm: [ 0.00 0.00 ]
+Key: VTESTPSrr: [ 0.00 0.00 ]
+Key: VUCOMISDZrm: [ 0.00 0.00 ]
+Key: VUCOMISDZrm_Int: [ 0.00 0.00 ]
+Key: VUCOMISDZrr: [ 0.00 0.00 ]
+Key: VUCOMISDZrr_Int: [ 0.00 0.00 ]
+Key: VUCOMISDZrrb: [ 0.00 0.00 ]
+Key: VUCOMISDrm: [ 0.00 0.00 ]
+Key: VUCOMISDrm_Int: [ 0.00 0.00 ]
+Key: VUCOMISDrr: [ 0.00 0.00 ]
+Key: VUCOMISDrr_Int: [ 0.00 0.00 ]
+Key: VUCOMISHZrm: [ 0.00 0.00 ]
+Key: VUCOMISHZrm_Int: [ 0.00 0.00 ]
+Key: VUCOMISHZrr: [ 0.00 0.00 ]
+Key: VUCOMISHZrr_Int: [ 0.00 0.00 ]
+Key: VUCOMISHZrrb: [ 0.00 0.00 ]
+Key: VUCOMISSZrm: [ 0.00 0.00 ]
+Key: VUCOMISSZrm_Int: [ 0.00 0.00 ]
+Key: VUCOMISSZrr: [ 0.00 0.00 ]
+Key: VUCOMISSZrr_Int: [ 0.00 0.00 ]
+Key: VUCOMISSZrrb: [ 0.00 0.00 ]
+Key: VUCOMISSrm: [ 0.00 0.00 ]
+Key: VUCOMISSrm_Int: [ 0.00 0.00 ]
+Key: VUCOMISSrr: [ 0.00 0.00 ]
+Key: VUCOMISSrr_Int: [ 0.00 0.00 ]
+Key: VUCOMXSDZrm: [ 0.00 0.00 ]
+Key: VUCOMXSDZrm_Int: [ 0.00 0.00 ]
+Key: VUCOMXSDZrr: [ 0.00 0.00 ]
+Key: VUCOMXSDZrr_Int: [ 0.00 0.00 ]
+Key: VUCOMXSDZrrb_Int: [ 0.00 0.00 ]
+Key: VUCOMXSHZrm: [ 0.00 0.00 ]
+Key: VUCOMXSHZrm_Int: [ 0.00 0.00 ]
+Key: VUCOMXSHZrr: [ 0.00 0.00 ]
+Key: VUCOMXSHZrr_Int: [ 0.00 0.00 ]
+Key: VUCOMXSHZrrb_Int: [ 0.00 0.00 ]
+Key: VUCOMXSSZrm: [ 0.00 0.00 ]
+Key: VUCOMXSSZrm_Int: [ 0.00 0.00 ]
+Key: VUCOMXSSZrr: [ 0.00 0.00 ]
+Key: VUCOMXSSZrr_Int: [ 0.00 0.00 ]
+Key: VUCOMXSSZrrb_Int: [ 0.00 0.00 ]
+Key: VUNPCKHPDYrm: [ 0.00 0.00 ]
+Key: VUNPCKHPDYrr: [ 0.00 0.00 ]
+Key: VUNPCKHPDZ: [ 0.00 0.00 ]
+Key: VUNPCKHPDZrm: [ 0.00 0.00 ]
+Key: VUNPCKHPDZrmb: [ 0.00 0.00 ]
+Key: VUNPCKHPDZrmbk: [ 0.00 0.00 ]
+Key: VUNPCKHPDZrmbkz: [ 0.00 0.00 ]
+Key: VUNPCKHPDZrmk: [ 0.00 0.00 ]
+Key: VUNPCKHPDZrmkz: [ 0.00 0.00 ]
+Key: VUNPCKHPDZrr: [ 0.00 0.00 ]
+Key: VUNPCKHPDZrrk: [ 0.00 0.00 ]
+Key: VUNPCKHPDZrrkz: [ 0.00 0.00 ]
+Key: VUNPCKHPDrm: [ 0.00 0.00 ]
+Key: VUNPCKHPDrr: [ 0.00 0.00 ]
+Key: VUNPCKHPSYrm: [ 0.00 0.00 ]
+Key: VUNPCKHPSYrr: [ 0.00 0.00 ]
+Key: VUNPCKHPSZ: [ 0.00 0.00 ]
+Key: VUNPCKHPSZrm: [ 0.00 0.00 ]
+Key: VUNPCKHPSZrmb: [ 0.00 0.00 ]
+Key: VUNPCKHPSZrmbk: [ 0.00 0.00 ]
+Key: VUNPCKHPSZrmbkz: [ 0.00 0.00 ]
+Key: VUNPCKHPSZrmk: [ 0.00 0.00 ]
+Key: VUNPCKHPSZrmkz: [ 0.00 0.00 ]
+Key: VUNPCKHPSZrr: [ 0.00 0.00 ]
+Key: VUNPCKHPSZrrk: [ 0.00 0.00 ]
+Key: VUNPCKHPSZrrkz: [ 0.00 0.00 ]
+Key: VUNPCKHPSrm: [ 0.00 0.00 ]
+Key: VUNPCKHPSrr: [ 0.00 0.00 ]
+Key: VUNPCKLPDYrm: [ 0.00 0.00 ]
+Key: VUNPCKLPDYrr: [ 0.00 0.00 ]
+Key: VUNPCKLPDZ: [ 0.00 0.00 ]
+Key: VUNPCKLPDZrm: [ 0.00 0.00 ]
+Key: VUNPCKLPDZrmb: [ 0.00 0.00 ]
+Key: VUNPCKLPDZrmbk: [ 0.00 0.00 ]
+Key: VUNPCKLPDZrmbkz: [ 0.00 0.00 ]
+Key: VUNPCKLPDZrmk: [ 0.00 0.00 ]
+Key: VUNPCKLPDZrmkz: [ 0.00 0.00 ]
+Key: VUNPCKLPDZrr: [ 0.00 0.00 ]
+Key: VUNPCKLPDZrrk: [ 0.00 0.00 ]
+Key: VUNPCKLPDZrrkz: [ 0.00 0.00 ]
+Key: VUNPCKLPDrm: [ 0.00 0.00 ]
+Key: VUNPCKLPDrr: [ 0.00 0.00 ]
+Key: VUNPCKLPSYrm: [ 0.00 0.00 ]
+Key: VUNPCKLPSYrr: [ 0.00 0.00 ]
+Key: VUNPCKLPSZ: [ 0.00 0.00 ]
+Key: VUNPCKLPSZrm: [ 0.00 0.00 ]
+Key: VUNPCKLPSZrmb: [ 0.00 0.00 ]
+Key: VUNPCKLPSZrmbk: [ 0.00 0.00 ]
+Key: VUNPCKLPSZrmbkz: [ 0.00 0.00 ]
+Key: VUNPCKLPSZrmk: [ 0.00 0.00 ]
+Key: VUNPCKLPSZrmkz: [ 0.00 0.00 ]
+Key: VUNPCKLPSZrr: [ 0.00 0.00 ]
+Key: VUNPCKLPSZrrk: [ 0.00 0.00 ]
+Key: VUNPCKLPSZrrkz: [ 0.00 0.00 ]
+Key: VUNPCKLPSrm: [ 0.00 0.00 ]
+Key: VUNPCKLPSrr: [ 0.00 0.00 ]
+Key: VXORPDYrm: [ 0.00 0.00 ]
+Key: VXORPDYrr: [ 0.00 0.00 ]
+Key: VXORPDZ: [ 0.00 0.00 ]
+Key: VXORPDZrm: [ 0.00 0.00 ]
+Key: VXORPDZrmb: [ 0.00 0.00 ]
+Key: VXORPDZrmbk: [ 0.00 0.00 ]
+Key: VXORPDZrmbkz: [ 0.00 0.00 ]
+Key: VXORPDZrmk: [ 0.00 0.00 ]
+Key: VXORPDZrmkz: [ 0.00 0.00 ]
+Key: VXORPDZrr: [ 0.00 0.00 ]
+Key: VXORPDZrrk: [ 0.00 0.00 ]
+Key: VXORPDZrrkz: [ 0.00 0.00 ]
+Key: VXORPDrm: [ 0.00 0.00 ]
+Key: VXORPDrr: [ 0.00 0.00 ]
+Key: VXORPSYrm: [ 0.00 0.00 ]
+Key: VXORPSYrr: [ 0.00 0.00 ]
+Key: VXORPSZ: [ 0.00 0.00 ]
+Key: VXORPSZrm: [ 0.00 0.00 ]
+Key: VXORPSZrmb: [ 0.00 0.00 ]
+Key: VXORPSZrmbk: [ 0.00 0.00 ]
+Key: VXORPSZrmbkz: [ 0.00 0.00 ]
+Key: VXORPSZrmk: [ 0.00 0.00 ]
+Key: VXORPSZrmkz: [ 0.00 0.00 ]
+Key: VXORPSZrr: [ 0.00 0.00 ]
+Key: VXORPSZrrk: [ 0.00 0.00 ]
+Key: VXORPSZrrkz: [ 0.00 0.00 ]
+Key: VXORPSrm: [ 0.00 0.00 ]
+Key: VXORPSrr: [ 0.00 0.00 ]
+Key: VZEROALL: [ 0.00 0.00 ]
+Key: VZEROUPPER: [ 0.00 0.00 ]
+Key: V_SET: [ 0.00 0.00 ]
+Key: V_SETALLONES: [ 0.00 0.00 ]
+Key: WAIT: [ 0.00 0.00 ]
+Key: WBINVD: [ 0.00 0.00 ]
+Key: WBNOINVD: [ 0.00 0.00 ]
+Key: WRFLAGS: [ 0.00 0.00 ]
+Key: WRFSBASE: [ 0.00 0.00 ]
+Key: WRGSBASE: [ 0.00 0.00 ]
+Key: WRMSR: [ 0.00 0.00 ]
+Key: WRMSRLIST: [ 0.00 0.00 ]
+Key: WRMSRNS: [ 0.00 0.00 ]
+Key: WRMSRNSir: [ 0.00 0.00 ]
+Key: WRMSRNSir_EVEX: [ 0.00 0.00 ]
+Key: WRPKRUr: [ 0.00 0.00 ]
+Key: WRSSD: [ 0.00 0.00 ]
+Key: WRSSD_EVEX: [ 0.00 0.00 ]
+Key: WRSSQ: [ 0.00 0.00 ]
+Key: WRSSQ_EVEX: [ 0.00 0.00 ]
+Key: WRUSSD: [ 0.00 0.00 ]
+Key: WRUSSD_EVEX: [ 0.00 0.00 ]
+Key: WRUSSQ: [ 0.00 0.00 ]
+Key: WRUSSQ_EVEX: [ 0.00 0.00 ]
+Key: XABORT: [ 0.00 0.00 ]
+Key: XABORT_DEF: [ 0.00 0.00 ]
+Key: XACQUIRE_PREFIX: [ 0.00 0.00 ]
+Key: XADD: [ 0.00 0.00 ]
+Key: XAM_F: [ 0.00 0.00 ]
+Key: XAM_Fp: [ 0.00 0.00 ]
+Key: XBEGIN: [ 0.00 0.00 ]
+Key: XCHG: [ 0.00 0.00 ]
+Key: XCH_F: [ 0.00 0.00 ]
+Key: XCRYPTCBC: [ 0.00 0.00 ]
+Key: XCRYPTCFB: [ 0.00 0.00 ]
+Key: XCRYPTCTR: [ 0.00 0.00 ]
+Key: XCRYPTECB: [ 0.00 0.00 ]
+Key: XCRYPTOFB: [ 0.00 0.00 ]
+Key: XEND: [ 0.00 0.00 ]
+Key: XGETBV: [ 0.00 0.00 ]
+Key: XLAT: [ 0.00 0.00 ]
+Key: XOR: [ 0.00 0.00 ]
+Key: XORPDrm: [ 0.00 0.00 ]
+Key: XORPDrr: [ 0.00 0.00 ]
+Key: XORPSrm: [ 0.00 0.00 ]
+Key: XORPSrr: [ 0.00 0.00 ]
+Key: XRELEASE_PREFIX: [ 0.00 0.00 ]
+Key: XRESLDTRK: [ 0.00 0.00 ]
+Key: XRSTOR: [ 0.00 0.00 ]
+Key: XRSTORS: [ 0.00 0.00 ]
+Key: XSAVE: [ 0.00 0.00 ]
+Key: XSAVEC: [ 0.00 0.00 ]
+Key: XSAVEOPT: [ 0.00 0.00 ]
+Key: XSAVES: [ 0.00 0.00 ]
+Key: XSETBV: [ 0.00 0.00 ]
+Key: XSHA: [ 0.00 0.00 ]
+Key: XSTORE: [ 0.00 0.00 ]
+Key: XSUSLDTRK: [ 0.00 0.00 ]
+Key: XTEST: [ 0.00 0.00 ]
diff --git a/llvm/test/CodeGen/MIR2Vec/vocab-basic.ll b/llvm/test/CodeGen/MIR2Vec/vocab-basic.ll
new file mode 100644
index 0000000..a57dd0b
--- /dev/null
+++ b/llvm/test/CodeGen/MIR2Vec/vocab-basic.ll
@@ -0,0 +1,14 @@
+; REQUIRES: x86_64-linux
+; RUN: llc -o /dev/null -print-mir2vec-vocab -mir2vec-vocab-path=%S/Inputs/mir2vec_dummy_2D_vocab.json %s 2> %t1.log
+; RUN: diff %S/Inputs/reference_x86_vocab_print.txt %t1.log
+
+; RUN: llc -o /dev/null -print-mir2vec-vocab -mir2vec-opc-weight=1 -mir2vec-vocab-path=%S/Inputs/mir2vec_dummy_2D_vocab.json %s 2> %t1.log
+; RUN: diff %S/Inputs/reference_x86_vocab_print.txt %t1.log
+
+; RUN: llc -o /dev/null -print-mir2vec-vocab -mir2vec-opc-weight=0.5 -mir2vec-vocab-path=%S/Inputs/mir2vec_dummy_2D_vocab.json %s 2> %t1.log
+; RUN: diff %S/Inputs/reference_x86_vocab_wo=0.5_print.txt %t1.log
+
+define dso_local void @test() {
+ entry:
+ ret void
+}
diff --git a/llvm/test/CodeGen/MIR2Vec/vocab-error-handling.ll b/llvm/test/CodeGen/MIR2Vec/vocab-error-handling.ll
new file mode 100644
index 0000000..1da516a
--- /dev/null
+++ b/llvm/test/CodeGen/MIR2Vec/vocab-error-handling.ll
@@ -0,0 +1,15 @@
+; REQUIRES: x86_64-linux
+; RUN: not llc -o /dev/null -print-mir2vec-vocab %s 2>&1 | FileCheck %s --check-prefix=CHECK-INVALID
+; RUN: not llc -o /dev/null -print-mir2vec-vocab -mir2vec-vocab-path=%S/Inputs/mir2vec_zero_vocab.json %s 2>&1 | FileCheck %s --check-prefix=CHECK-ZERO-DIM
+; RUN: not llc -o /dev/null -print-mir2vec-vocab -mir2vec-vocab-path=%S/Inputs/mir2vec_invalid_vocab.json %s 2>&1 | FileCheck %s --check-prefix=CHECK-NO-ENTITIES
+; RUN: not llc -o /dev/null -print-mir2vec-vocab -mir2vec-vocab-path=%S/Inputs/mir2vec_inconsistent_dims.json %s 2>&1 | FileCheck %s --check-prefix=CHECK-INCONSISTENT-DIMS
+
+define dso_local void @test() {
+ entry:
+ ret void
+}
+
+; CHECK-INVALID: error: MIR2Vec vocabulary file path not specified; set it using --mir2vec-vocab-path
+; CHECK-ZERO-DIM: error: Dimension of 'entities' section of the vocabulary is zero
+; CHECK-NO-ENTITIES: error: Missing 'entities' section in vocabulary file
+; CHECK-INCONSISTENT-DIMS: error: All vectors in the 'entities' section of the vocabulary are not of the same dimension
diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/atomic-load-store-fp.ll b/llvm/test/CodeGen/RISCV/GlobalISel/atomic-load-store-fp.ll
index 4ad2d2c..4914357 100644
--- a/llvm/test/CodeGen/RISCV/GlobalISel/atomic-load-store-fp.ll
+++ b/llvm/test/CodeGen/RISCV/GlobalISel/atomic-load-store-fp.ll
@@ -23,6 +23,16 @@
; RUN: llc -mtriple=riscv64 -global-isel -mattr=+d,+a,+ztso -verify-machineinstrs < %s \
; RUN: | FileCheck -check-prefixes=RV64IA,RV64IA-TSO-TRAILING-FENCE %s
+; RUN: llc -mtriple=riscv32 -global-isel -mattr=+d,+a,+experimental-zalasr -verify-machineinstrs < %s \
+; RUN: | FileCheck -check-prefixes=RV32IA,RV32IA-ZALASR,RV32IA-ZALASR-WMO %s
+; RUN: llc -mtriple=riscv32 -global-isel -mattr=+d,+a,+experimental-zalasr,+ztso -verify-machineinstrs < %s \
+; RUN: | FileCheck -check-prefixes=RV32IA,RV32IA-ZALASR,RV32IA-ZALASR-TSO %s
+
+; RUN: llc -mtriple=riscv64 -global-isel -mattr=+d,+a,+experimental-zalasr -verify-machineinstrs < %s \
+; RUN: | FileCheck -check-prefixes=RV64IA,RV64IA-ZALASR,RV64IA-ZALASR-WMO %s
+; RUN: llc -mtriple=riscv64 -global-isel -mattr=+d,+a,+experimental-zalasr,+ztso -verify-machineinstrs < %s \
+; RUN: | FileCheck -check-prefixes=RV64IA,RV64IA-ZALASR,RV64IA-ZALASR-TSO %s
+
define float @atomic_load_f32_unordered(ptr %a) nounwind {
; RV32I-LABEL: atomic_load_f32_unordered:
@@ -171,6 +181,30 @@ define float @atomic_load_f32_acquire(ptr %a) nounwind {
; RV64IA-TSO-TRAILING-FENCE-NEXT: lw a0, 0(a0)
; RV64IA-TSO-TRAILING-FENCE-NEXT: fmv.w.x fa0, a0
; RV64IA-TSO-TRAILING-FENCE-NEXT: ret
+;
+; RV32IA-ZALASR-WMO-LABEL: atomic_load_f32_acquire:
+; RV32IA-ZALASR-WMO: # %bb.0:
+; RV32IA-ZALASR-WMO-NEXT: lw.aq a0, (a0)
+; RV32IA-ZALASR-WMO-NEXT: fmv.w.x fa0, a0
+; RV32IA-ZALASR-WMO-NEXT: ret
+;
+; RV32IA-ZALASR-TSO-LABEL: atomic_load_f32_acquire:
+; RV32IA-ZALASR-TSO: # %bb.0:
+; RV32IA-ZALASR-TSO-NEXT: lw a0, 0(a0)
+; RV32IA-ZALASR-TSO-NEXT: fmv.w.x fa0, a0
+; RV32IA-ZALASR-TSO-NEXT: ret
+;
+; RV64IA-ZALASR-WMO-LABEL: atomic_load_f32_acquire:
+; RV64IA-ZALASR-WMO: # %bb.0:
+; RV64IA-ZALASR-WMO-NEXT: lw.aq a0, (a0)
+; RV64IA-ZALASR-WMO-NEXT: fmv.w.x fa0, a0
+; RV64IA-ZALASR-WMO-NEXT: ret
+;
+; RV64IA-ZALASR-TSO-LABEL: atomic_load_f32_acquire:
+; RV64IA-ZALASR-TSO: # %bb.0:
+; RV64IA-ZALASR-TSO-NEXT: lw a0, 0(a0)
+; RV64IA-ZALASR-TSO-NEXT: fmv.w.x fa0, a0
+; RV64IA-ZALASR-TSO-NEXT: ret
%1 = load atomic float, ptr %a acquire, align 4
ret float %1
}
@@ -256,6 +290,18 @@ define float @atomic_load_f32_seq_cst(ptr %a) nounwind {
; RV64IA-TSO-TRAILING-FENCE-NEXT: lw a0, 0(a0)
; RV64IA-TSO-TRAILING-FENCE-NEXT: fmv.w.x fa0, a0
; RV64IA-TSO-TRAILING-FENCE-NEXT: ret
+;
+; RV32IA-ZALASR-LABEL: atomic_load_f32_seq_cst:
+; RV32IA-ZALASR: # %bb.0:
+; RV32IA-ZALASR-NEXT: lw.aq a0, (a0)
+; RV32IA-ZALASR-NEXT: fmv.w.x fa0, a0
+; RV32IA-ZALASR-NEXT: ret
+;
+; RV64IA-ZALASR-LABEL: atomic_load_f32_seq_cst:
+; RV64IA-ZALASR: # %bb.0:
+; RV64IA-ZALASR-NEXT: lw.aq a0, (a0)
+; RV64IA-ZALASR-NEXT: fmv.w.x fa0, a0
+; RV64IA-ZALASR-NEXT: ret
%1 = load atomic float, ptr %a seq_cst, align 4
ret float %1
}
@@ -414,6 +460,18 @@ define double @atomic_load_f64_acquire(ptr %a) nounwind {
; RV64IA-TSO-TRAILING-FENCE-NEXT: ld a0, 0(a0)
; RV64IA-TSO-TRAILING-FENCE-NEXT: fmv.d.x fa0, a0
; RV64IA-TSO-TRAILING-FENCE-NEXT: ret
+;
+; RV64IA-ZALASR-WMO-LABEL: atomic_load_f64_acquire:
+; RV64IA-ZALASR-WMO: # %bb.0:
+; RV64IA-ZALASR-WMO-NEXT: ld.aq a0, (a0)
+; RV64IA-ZALASR-WMO-NEXT: fmv.d.x fa0, a0
+; RV64IA-ZALASR-WMO-NEXT: ret
+;
+; RV64IA-ZALASR-TSO-LABEL: atomic_load_f64_acquire:
+; RV64IA-ZALASR-TSO: # %bb.0:
+; RV64IA-ZALASR-TSO-NEXT: ld a0, 0(a0)
+; RV64IA-ZALASR-TSO-NEXT: fmv.d.x fa0, a0
+; RV64IA-ZALASR-TSO-NEXT: ret
%1 = load atomic double, ptr %a acquire, align 8
ret double %1
}
@@ -484,6 +542,12 @@ define double @atomic_load_f64_seq_cst(ptr %a) nounwind {
; RV64IA-TSO-TRAILING-FENCE-NEXT: ld a0, 0(a0)
; RV64IA-TSO-TRAILING-FENCE-NEXT: fmv.d.x fa0, a0
; RV64IA-TSO-TRAILING-FENCE-NEXT: ret
+;
+; RV64IA-ZALASR-LABEL: atomic_load_f64_seq_cst:
+; RV64IA-ZALASR: # %bb.0:
+; RV64IA-ZALASR-NEXT: ld.aq a0, (a0)
+; RV64IA-ZALASR-NEXT: fmv.d.x fa0, a0
+; RV64IA-ZALASR-NEXT: ret
%1 = load atomic double, ptr %a seq_cst, align 8
ret double %1
}
@@ -635,6 +699,30 @@ define void @atomic_store_f32_release(ptr %a, float %b) nounwind {
; RV64IA-TSO-TRAILING-FENCE-NEXT: fmv.x.w a1, fa0
; RV64IA-TSO-TRAILING-FENCE-NEXT: sw a1, 0(a0)
; RV64IA-TSO-TRAILING-FENCE-NEXT: ret
+;
+; RV32IA-ZALASR-WMO-LABEL: atomic_store_f32_release:
+; RV32IA-ZALASR-WMO: # %bb.0:
+; RV32IA-ZALASR-WMO-NEXT: fmv.x.w a1, fa0
+; RV32IA-ZALASR-WMO-NEXT: sw.rl a1, (a0)
+; RV32IA-ZALASR-WMO-NEXT: ret
+;
+; RV32IA-ZALASR-TSO-LABEL: atomic_store_f32_release:
+; RV32IA-ZALASR-TSO: # %bb.0:
+; RV32IA-ZALASR-TSO-NEXT: fmv.x.w a1, fa0
+; RV32IA-ZALASR-TSO-NEXT: sw a1, 0(a0)
+; RV32IA-ZALASR-TSO-NEXT: ret
+;
+; RV64IA-ZALASR-WMO-LABEL: atomic_store_f32_release:
+; RV64IA-ZALASR-WMO: # %bb.0:
+; RV64IA-ZALASR-WMO-NEXT: fmv.x.w a1, fa0
+; RV64IA-ZALASR-WMO-NEXT: sw.rl a1, (a0)
+; RV64IA-ZALASR-WMO-NEXT: ret
+;
+; RV64IA-ZALASR-TSO-LABEL: atomic_store_f32_release:
+; RV64IA-ZALASR-TSO: # %bb.0:
+; RV64IA-ZALASR-TSO-NEXT: fmv.x.w a1, fa0
+; RV64IA-ZALASR-TSO-NEXT: sw a1, 0(a0)
+; RV64IA-ZALASR-TSO-NEXT: ret
store atomic float %b, ptr %a release, align 4
ret void
}
@@ -718,6 +806,18 @@ define void @atomic_store_f32_seq_cst(ptr %a, float %b) nounwind {
; RV64IA-TSO-TRAILING-FENCE-NEXT: sw a1, 0(a0)
; RV64IA-TSO-TRAILING-FENCE-NEXT: fence rw, rw
; RV64IA-TSO-TRAILING-FENCE-NEXT: ret
+;
+; RV32IA-ZALASR-LABEL: atomic_store_f32_seq_cst:
+; RV32IA-ZALASR: # %bb.0:
+; RV32IA-ZALASR-NEXT: fmv.x.w a1, fa0
+; RV32IA-ZALASR-NEXT: sw.rl a1, (a0)
+; RV32IA-ZALASR-NEXT: ret
+;
+; RV64IA-ZALASR-LABEL: atomic_store_f32_seq_cst:
+; RV64IA-ZALASR: # %bb.0:
+; RV64IA-ZALASR-NEXT: fmv.x.w a1, fa0
+; RV64IA-ZALASR-NEXT: sw.rl a1, (a0)
+; RV64IA-ZALASR-NEXT: ret
store atomic float %b, ptr %a seq_cst, align 4
ret void
}
@@ -876,6 +976,18 @@ define void @atomic_store_f64_release(ptr %a, double %b) nounwind {
; RV64IA-TSO-TRAILING-FENCE-NEXT: fmv.x.d a1, fa0
; RV64IA-TSO-TRAILING-FENCE-NEXT: sd a1, 0(a0)
; RV64IA-TSO-TRAILING-FENCE-NEXT: ret
+;
+; RV64IA-ZALASR-WMO-LABEL: atomic_store_f64_release:
+; RV64IA-ZALASR-WMO: # %bb.0:
+; RV64IA-ZALASR-WMO-NEXT: fmv.x.d a1, fa0
+; RV64IA-ZALASR-WMO-NEXT: sd.rl a1, (a0)
+; RV64IA-ZALASR-WMO-NEXT: ret
+;
+; RV64IA-ZALASR-TSO-LABEL: atomic_store_f64_release:
+; RV64IA-ZALASR-TSO: # %bb.0:
+; RV64IA-ZALASR-TSO-NEXT: fmv.x.d a1, fa0
+; RV64IA-ZALASR-TSO-NEXT: sd a1, 0(a0)
+; RV64IA-ZALASR-TSO-NEXT: ret
store atomic double %b, ptr %a release, align 8
ret void
}
@@ -945,6 +1057,12 @@ define void @atomic_store_f64_seq_cst(ptr %a, double %b) nounwind {
; RV64IA-TSO-TRAILING-FENCE-NEXT: sd a1, 0(a0)
; RV64IA-TSO-TRAILING-FENCE-NEXT: fence rw, rw
; RV64IA-TSO-TRAILING-FENCE-NEXT: ret
+;
+; RV64IA-ZALASR-LABEL: atomic_store_f64_seq_cst:
+; RV64IA-ZALASR: # %bb.0:
+; RV64IA-ZALASR-NEXT: fmv.x.d a1, fa0
+; RV64IA-ZALASR-NEXT: sd.rl a1, (a0)
+; RV64IA-ZALASR-NEXT: ret
store atomic double %b, ptr %a seq_cst, align 8
ret void
}
diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/atomic-cmpxchg-rv32.mir b/llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/atomic-cmpxchg-rv32.mir
index 74249c1..e2d3bff 100644
--- a/llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/atomic-cmpxchg-rv32.mir
+++ b/llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/atomic-cmpxchg-rv32.mir
@@ -17,7 +17,7 @@ body: |
; RV32IA-ZABHA-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x10
; RV32IA-ZABHA-NEXT: [[COPY1:%[0-9]+]]:gpr = COPY $x0
; RV32IA-ZABHA-NEXT: [[ADDI:%[0-9]+]]:gpr = ADDI $x0, 1
- ; RV32IA-ZABHA-NEXT: [[AMOCAS_B:%[0-9]+]]:gpr = AMOCAS_B [[COPY1]], [[COPY]], [[ADDI]] :: (load store monotonic (s8))
+ ; RV32IA-ZABHA-NEXT: [[AMOCAS_B:%[0-9]+]]:gpr = AMOCAS_B [[COPY1]], [[ADDI]], [[COPY]] :: (load store monotonic (s8))
; RV32IA-ZABHA-NEXT: $x10 = COPY [[AMOCAS_B]]
; RV32IA-ZABHA-NEXT: PseudoRET implicit $x10
%0:gpr(p0) = COPY $x10
@@ -42,7 +42,7 @@ body: |
; RV32IA-ZABHA-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x10
; RV32IA-ZABHA-NEXT: [[COPY1:%[0-9]+]]:gpr = COPY $x0
; RV32IA-ZABHA-NEXT: [[ADDI:%[0-9]+]]:gpr = ADDI $x0, 1
- ; RV32IA-ZABHA-NEXT: [[AMOCAS_H:%[0-9]+]]:gpr = AMOCAS_H [[COPY1]], [[COPY]], [[ADDI]] :: (load store monotonic (s16))
+ ; RV32IA-ZABHA-NEXT: [[AMOCAS_H:%[0-9]+]]:gpr = AMOCAS_H [[COPY1]], [[ADDI]], [[COPY]] :: (load store monotonic (s16))
; RV32IA-ZABHA-NEXT: $x10 = COPY [[AMOCAS_H]]
; RV32IA-ZABHA-NEXT: PseudoRET implicit $x10
%0:gpr(p0) = COPY $x10
@@ -67,7 +67,7 @@ body: |
; RV32IA-ZABHA-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x10
; RV32IA-ZABHA-NEXT: [[COPY1:%[0-9]+]]:gpr = COPY $x0
; RV32IA-ZABHA-NEXT: [[ADDI:%[0-9]+]]:gpr = ADDI $x0, 1
- ; RV32IA-ZABHA-NEXT: [[AMOCAS_W:%[0-9]+]]:gpr = AMOCAS_W [[COPY1]], [[COPY]], [[ADDI]] :: (load store monotonic (s32))
+ ; RV32IA-ZABHA-NEXT: [[AMOCAS_W:%[0-9]+]]:gpr = AMOCAS_W [[COPY1]], [[ADDI]], [[COPY]] :: (load store monotonic (s32))
; RV32IA-ZABHA-NEXT: $x10 = COPY [[AMOCAS_W]]
; RV32IA-ZABHA-NEXT: PseudoRET implicit $x10
%0:gpr(p0) = COPY $x10
@@ -92,7 +92,7 @@ body: |
; RV32IA-ZABHA-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x10
; RV32IA-ZABHA-NEXT: [[COPY1:%[0-9]+]]:gpr = COPY $x0
; RV32IA-ZABHA-NEXT: [[ADDI:%[0-9]+]]:gpr = ADDI $x0, 1
- ; RV32IA-ZABHA-NEXT: [[AMOCAS_W:%[0-9]+]]:gpr = AMOCAS_W [[COPY1]], [[COPY]], [[ADDI]] :: (load store monotonic (s32))
+ ; RV32IA-ZABHA-NEXT: [[AMOCAS_W:%[0-9]+]]:gpr = AMOCAS_W [[COPY1]], [[ADDI]], [[COPY]] :: (load store monotonic (s32))
; RV32IA-ZABHA-NEXT: [[SLTIU:%[0-9]+]]:gpr = SLTIU [[AMOCAS_W]], 1
; RV32IA-ZABHA-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $x2, implicit $x2
; RV32IA-ZABHA-NEXT: $x10 = COPY [[AMOCAS_W]]
diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/atomic-cmpxchg-rv64.mir b/llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/atomic-cmpxchg-rv64.mir
index a2f7e30..ab537ea 100644
--- a/llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/atomic-cmpxchg-rv64.mir
+++ b/llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/atomic-cmpxchg-rv64.mir
@@ -17,7 +17,7 @@ body: |
; RV64IA-ZABHA-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x10
; RV64IA-ZABHA-NEXT: [[COPY1:%[0-9]+]]:gpr = COPY $x0
; RV64IA-ZABHA-NEXT: [[ADDI:%[0-9]+]]:gpr = ADDI $x0, 1
- ; RV64IA-ZABHA-NEXT: [[AMOCAS_B:%[0-9]+]]:gpr = AMOCAS_B [[COPY1]], [[COPY]], [[ADDI]] :: (load store monotonic (s8))
+ ; RV64IA-ZABHA-NEXT: [[AMOCAS_B:%[0-9]+]]:gpr = AMOCAS_B [[COPY1]], [[ADDI]], [[COPY]] :: (load store monotonic (s8))
; RV64IA-ZABHA-NEXT: $x10 = COPY [[AMOCAS_B]]
; RV64IA-ZABHA-NEXT: PseudoRET implicit $x10
%0:gpr(p0) = COPY $x10
@@ -42,7 +42,7 @@ body: |
; RV64IA-ZABHA-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x10
; RV64IA-ZABHA-NEXT: [[COPY1:%[0-9]+]]:gpr = COPY $x0
; RV64IA-ZABHA-NEXT: [[ADDI:%[0-9]+]]:gpr = ADDI $x0, 1
- ; RV64IA-ZABHA-NEXT: [[AMOCAS_H:%[0-9]+]]:gpr = AMOCAS_H [[COPY1]], [[COPY]], [[ADDI]] :: (load store monotonic (s16))
+ ; RV64IA-ZABHA-NEXT: [[AMOCAS_H:%[0-9]+]]:gpr = AMOCAS_H [[COPY1]], [[ADDI]], [[COPY]] :: (load store monotonic (s16))
; RV64IA-ZABHA-NEXT: $x10 = COPY [[AMOCAS_H]]
; RV64IA-ZABHA-NEXT: PseudoRET implicit $x10
%0:gpr(p0) = COPY $x10
@@ -67,7 +67,7 @@ body: |
; RV64IA-ZABHA-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x10
; RV64IA-ZABHA-NEXT: [[COPY1:%[0-9]+]]:gpr = COPY $x0
; RV64IA-ZABHA-NEXT: [[ADDI:%[0-9]+]]:gpr = ADDI $x0, 1
- ; RV64IA-ZABHA-NEXT: [[AMOCAS_W:%[0-9]+]]:gpr = AMOCAS_W [[COPY1]], [[COPY]], [[ADDI]] :: (load store monotonic (s32))
+ ; RV64IA-ZABHA-NEXT: [[AMOCAS_W:%[0-9]+]]:gpr = AMOCAS_W [[COPY1]], [[ADDI]], [[COPY]] :: (load store monotonic (s32))
; RV64IA-ZABHA-NEXT: $x10 = COPY [[AMOCAS_W]]
; RV64IA-ZABHA-NEXT: PseudoRET implicit $x10
%0:gpr(p0) = COPY $x10
@@ -92,7 +92,7 @@ body: |
; RV64IA-ZABHA-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x10
; RV64IA-ZABHA-NEXT: [[COPY1:%[0-9]+]]:gpr = COPY $x0
; RV64IA-ZABHA-NEXT: [[ADDI:%[0-9]+]]:gpr = ADDI $x0, 1
- ; RV64IA-ZABHA-NEXT: [[AMOCAS_D_RV64_:%[0-9]+]]:gpr = AMOCAS_D_RV64 [[COPY1]], [[COPY]], [[ADDI]] :: (load store monotonic (s64))
+ ; RV64IA-ZABHA-NEXT: [[AMOCAS_D_RV64_:%[0-9]+]]:gpr = AMOCAS_D_RV64 [[COPY1]], [[ADDI]], [[COPY]] :: (load store monotonic (s64))
; RV64IA-ZABHA-NEXT: $x10 = COPY [[AMOCAS_D_RV64_]]
; RV64IA-ZABHA-NEXT: PseudoRET implicit $x10
%0:gpr(p0) = COPY $x10
@@ -116,7 +116,7 @@ body: |
; RV64IA-ZABHA-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x10
; RV64IA-ZABHA-NEXT: [[COPY1:%[0-9]+]]:gpr = COPY $x0
; RV64IA-ZABHA-NEXT: [[ADDI:%[0-9]+]]:gpr = ADDI $x0, 1
- ; RV64IA-ZABHA-NEXT: [[AMOCAS_D_RV64_:%[0-9]+]]:gpr = AMOCAS_D_RV64 [[COPY1]], [[COPY]], [[ADDI]] :: (load store monotonic (s64))
+ ; RV64IA-ZABHA-NEXT: [[AMOCAS_D_RV64_:%[0-9]+]]:gpr = AMOCAS_D_RV64 [[COPY1]], [[ADDI]], [[COPY]] :: (load store monotonic (s64))
; RV64IA-ZABHA-NEXT: [[SLTIU:%[0-9]+]]:gpr = SLTIU [[AMOCAS_D_RV64_]], 1
; RV64IA-ZABHA-NEXT: ADJCALLSTACKDOWN 0, 0, implicit-def $x2, implicit $x2
; RV64IA-ZABHA-NEXT: $x10 = COPY [[AMOCAS_D_RV64_]]
diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/atomicrmw-add-sub-rv32.mir b/llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/atomicrmw-add-sub-rv32.mir
index f7fdc33..e547972 100644
--- a/llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/atomicrmw-add-sub-rv32.mir
+++ b/llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/atomicrmw-add-sub-rv32.mir
@@ -15,7 +15,7 @@ body: |
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x10
; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr = COPY $x11
- ; CHECK-NEXT: [[AMOADD_B:%[0-9]+]]:gpr = AMOADD_B [[COPY]], [[COPY1]] :: (load store monotonic (s8))
+ ; CHECK-NEXT: [[AMOADD_B:%[0-9]+]]:gpr = AMOADD_B [[COPY1]], [[COPY]] :: (load store monotonic (s8))
; CHECK-NEXT: $x10 = COPY [[AMOADD_B]]
; CHECK-NEXT: PseudoRET implicit $x10
%0:gprb(p0) = COPY $x10
@@ -38,7 +38,7 @@ body: |
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x10
; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr = COPY $x11
- ; CHECK-NEXT: [[AMOADD_H:%[0-9]+]]:gpr = AMOADD_H [[COPY]], [[COPY1]] :: (load store monotonic (s16))
+ ; CHECK-NEXT: [[AMOADD_H:%[0-9]+]]:gpr = AMOADD_H [[COPY1]], [[COPY]] :: (load store monotonic (s16))
; CHECK-NEXT: $x10 = COPY [[AMOADD_H]]
; CHECK-NEXT: PseudoRET implicit $x10
%0:gprb(p0) = COPY $x10
@@ -61,7 +61,7 @@ body: |
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x10
; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr = COPY $x11
- ; CHECK-NEXT: [[AMOADD_W:%[0-9]+]]:gpr = AMOADD_W [[COPY]], [[COPY1]] :: (load store monotonic (s32))
+ ; CHECK-NEXT: [[AMOADD_W:%[0-9]+]]:gpr = AMOADD_W [[COPY1]], [[COPY]] :: (load store monotonic (s32))
; CHECK-NEXT: $x10 = COPY [[AMOADD_W]]
; CHECK-NEXT: PseudoRET implicit $x10
%0:gprb(p0) = COPY $x10
@@ -86,7 +86,7 @@ body: |
; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr = COPY $x11
; CHECK-NEXT: [[COPY2:%[0-9]+]]:gpr = COPY $x0
; CHECK-NEXT: [[SUB:%[0-9]+]]:gpr = SUB [[COPY2]], [[COPY1]]
- ; CHECK-NEXT: [[AMOADD_B:%[0-9]+]]:gpr = AMOADD_B [[COPY]], [[SUB]] :: (load store monotonic (s8))
+ ; CHECK-NEXT: [[AMOADD_B:%[0-9]+]]:gpr = AMOADD_B [[SUB]], [[COPY]] :: (load store monotonic (s8))
; CHECK-NEXT: $x10 = COPY [[AMOADD_B]]
; CHECK-NEXT: PseudoRET implicit $x10
%0:gprb(p0) = COPY $x10
@@ -113,7 +113,7 @@ body: |
; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr = COPY $x11
; CHECK-NEXT: [[COPY2:%[0-9]+]]:gpr = COPY $x0
; CHECK-NEXT: [[SUB:%[0-9]+]]:gpr = SUB [[COPY2]], [[COPY1]]
- ; CHECK-NEXT: [[AMOADD_H:%[0-9]+]]:gpr = AMOADD_H [[COPY]], [[SUB]] :: (load store monotonic (s16))
+ ; CHECK-NEXT: [[AMOADD_H:%[0-9]+]]:gpr = AMOADD_H [[SUB]], [[COPY]] :: (load store monotonic (s16))
; CHECK-NEXT: $x10 = COPY [[AMOADD_H]]
; CHECK-NEXT: PseudoRET implicit $x10
%0:gprb(p0) = COPY $x10
@@ -140,7 +140,7 @@ body: |
; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr = COPY $x11
; CHECK-NEXT: [[COPY2:%[0-9]+]]:gpr = COPY $x0
; CHECK-NEXT: [[SUB:%[0-9]+]]:gpr = SUB [[COPY2]], [[COPY1]]
- ; CHECK-NEXT: [[AMOADD_B:%[0-9]+]]:gpr = AMOADD_B [[COPY]], [[SUB]] :: (load store monotonic (s8))
+ ; CHECK-NEXT: [[AMOADD_B:%[0-9]+]]:gpr = AMOADD_B [[SUB]], [[COPY]] :: (load store monotonic (s8))
; CHECK-NEXT: $x10 = COPY [[AMOADD_B]]
; CHECK-NEXT: PseudoRET implicit $x10
%0:gprb(p0) = COPY $x10
diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/atomicrmw-add-sub-rv64.mir b/llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/atomicrmw-add-sub-rv64.mir
index 178586c..f34826c 100644
--- a/llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/atomicrmw-add-sub-rv64.mir
+++ b/llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/atomicrmw-add-sub-rv64.mir
@@ -15,7 +15,7 @@ body: |
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x10
; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr = COPY $x11
- ; CHECK-NEXT: [[AMOADD_B:%[0-9]+]]:gpr = AMOADD_B [[COPY]], [[COPY1]] :: (load store monotonic (s8))
+ ; CHECK-NEXT: [[AMOADD_B:%[0-9]+]]:gpr = AMOADD_B [[COPY1]], [[COPY]] :: (load store monotonic (s8))
; CHECK-NEXT: $x10 = COPY [[AMOADD_B]]
; CHECK-NEXT: PseudoRET implicit $x10
%0:gprb(p0) = COPY $x10
@@ -38,7 +38,7 @@ body: |
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x10
; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr = COPY $x11
- ; CHECK-NEXT: [[AMOADD_H:%[0-9]+]]:gpr = AMOADD_H [[COPY]], [[COPY1]] :: (load store monotonic (s16))
+ ; CHECK-NEXT: [[AMOADD_H:%[0-9]+]]:gpr = AMOADD_H [[COPY1]], [[COPY]] :: (load store monotonic (s16))
; CHECK-NEXT: $x10 = COPY [[AMOADD_H]]
; CHECK-NEXT: PseudoRET implicit $x10
%0:gprb(p0) = COPY $x10
@@ -61,7 +61,7 @@ body: |
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x10
; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr = COPY $x11
- ; CHECK-NEXT: [[AMOADD_W:%[0-9]+]]:gpr = AMOADD_W [[COPY]], [[COPY1]] :: (load store monotonic (s32))
+ ; CHECK-NEXT: [[AMOADD_W:%[0-9]+]]:gpr = AMOADD_W [[COPY1]], [[COPY]] :: (load store monotonic (s32))
; CHECK-NEXT: $x10 = COPY [[AMOADD_W]]
; CHECK-NEXT: PseudoRET implicit $x10
%0:gprb(p0) = COPY $x10
@@ -84,7 +84,7 @@ body: |
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr = COPY $x10
; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr = COPY $x11
- ; CHECK-NEXT: [[AMOADD_D:%[0-9]+]]:gpr = AMOADD_D [[COPY]], [[COPY1]] :: (load store monotonic (s64))
+ ; CHECK-NEXT: [[AMOADD_D:%[0-9]+]]:gpr = AMOADD_D [[COPY1]], [[COPY]] :: (load store monotonic (s64))
; CHECK-NEXT: $x10 = COPY [[AMOADD_D]]
; CHECK-NEXT: PseudoRET implicit $x10
%0:gprb(p0) = COPY $x10
@@ -109,7 +109,7 @@ body: |
; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr = COPY $x11
; CHECK-NEXT: [[COPY2:%[0-9]+]]:gpr = COPY $x0
; CHECK-NEXT: [[SUB:%[0-9]+]]:gpr = SUB [[COPY2]], [[COPY1]]
- ; CHECK-NEXT: [[AMOADD_B:%[0-9]+]]:gpr = AMOADD_B [[COPY]], [[SUB]] :: (load store monotonic (s8))
+ ; CHECK-NEXT: [[AMOADD_B:%[0-9]+]]:gpr = AMOADD_B [[SUB]], [[COPY]] :: (load store monotonic (s8))
; CHECK-NEXT: $x10 = COPY [[AMOADD_B]]
; CHECK-NEXT: PseudoRET implicit $x10
%0:gprb(p0) = COPY $x10
@@ -136,7 +136,7 @@ body: |
; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr = COPY $x11
; CHECK-NEXT: [[COPY2:%[0-9]+]]:gpr = COPY $x0
; CHECK-NEXT: [[SUB:%[0-9]+]]:gpr = SUB [[COPY2]], [[COPY1]]
- ; CHECK-NEXT: [[AMOADD_H:%[0-9]+]]:gpr = AMOADD_H [[COPY]], [[SUB]] :: (load store monotonic (s16))
+ ; CHECK-NEXT: [[AMOADD_H:%[0-9]+]]:gpr = AMOADD_H [[SUB]], [[COPY]] :: (load store monotonic (s16))
; CHECK-NEXT: $x10 = COPY [[AMOADD_H]]
; CHECK-NEXT: PseudoRET implicit $x10
%0:gprb(p0) = COPY $x10
@@ -163,7 +163,7 @@ body: |
; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr = COPY $x11
; CHECK-NEXT: [[COPY2:%[0-9]+]]:gpr = COPY $x0
; CHECK-NEXT: [[SUB:%[0-9]+]]:gpr = SUB [[COPY2]], [[COPY1]]
- ; CHECK-NEXT: [[AMOADD_W:%[0-9]+]]:gpr = AMOADD_W [[COPY]], [[SUB]] :: (load store monotonic (s32))
+ ; CHECK-NEXT: [[AMOADD_W:%[0-9]+]]:gpr = AMOADD_W [[SUB]], [[COPY]] :: (load store monotonic (s32))
; CHECK-NEXT: $x10 = COPY [[AMOADD_W]]
; CHECK-NEXT: PseudoRET implicit $x10
%0:gprb(p0) = COPY $x10
@@ -190,7 +190,7 @@ body: |
; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr = COPY $x11
; CHECK-NEXT: [[COPY2:%[0-9]+]]:gpr = COPY $x0
; CHECK-NEXT: [[SUB:%[0-9]+]]:gpr = SUB [[COPY2]], [[COPY1]]
- ; CHECK-NEXT: [[AMOADD_B:%[0-9]+]]:gpr = AMOADD_B [[COPY]], [[SUB]] :: (load store monotonic (s8))
+ ; CHECK-NEXT: [[AMOADD_B:%[0-9]+]]:gpr = AMOADD_B [[SUB]], [[COPY]] :: (load store monotonic (s8))
; CHECK-NEXT: $x10 = COPY [[AMOADD_B]]
; CHECK-NEXT: PseudoRET implicit $x10
%0:gprb(p0) = COPY $x10
diff --git a/llvm/test/CodeGen/RISCV/atomic-rmw.ll b/llvm/test/CodeGen/RISCV/atomic-rmw.ll
index b0510f8..1213256 100644
--- a/llvm/test/CodeGen/RISCV/atomic-rmw.ll
+++ b/llvm/test/CodeGen/RISCV/atomic-rmw.ll
@@ -21,10 +21,19 @@
; RUN: llc -mtriple=riscv64 -mattr=+a,+ztso,+zacas -verify-machineinstrs < %s \
; RUN: | FileCheck -check-prefixes=RV64IA,RV64IA-ZACAS,RV64IA-TSO,RV64IA-TSO-ZACAS %s
+; RUN: llc -mtriple=riscv32 -mattr=+a,+zabha -verify-machineinstrs < %s \
+; RUN: | FileCheck -check-prefixes=RV32IA,RV32IA-WMO,RV32IA-WMO-ZABHA,RV32IA-WMO-ZABHA-NOZACAS %s
+; RUN: llc -mtriple=riscv32 -mattr=+a,+ztso,+zabha -verify-machineinstrs < %s \
+; RUN: | FileCheck -check-prefixes=RV32IA,RV32IA-TSO,RV32IA-TSO-ZABHA,RV32IA-TSO-ZABHA-NOZACAS %s
; RUN: llc -mtriple=riscv64 -mattr=+a,+zabha -verify-machineinstrs < %s \
; RUN: | FileCheck -check-prefixes=RV64IA,RV64IA-WMO,RV64IA-WMO-ZABHA,RV64IA-WMO-ZABHA-NOZACAS %s
; RUN: llc -mtriple=riscv64 -mattr=+a,+ztso,+zabha -verify-machineinstrs < %s \
; RUN: | FileCheck -check-prefixes=RV64IA,RV64IA-TSO,RV64IA-TSO-ZABHA,RV64IA-TSO-ZABHA-NOZACAS %s
+
+; RUN: llc -mtriple=riscv32 -mattr=+a,+zabha,+zacas -verify-machineinstrs < %s \
+; RUN: | FileCheck -check-prefixes=RV32IA,RV32IA-WMO,RV32IA-WMO-ZABHA,RV32IA-WMO-ZABHA-ZACAS %s
+; RUN: llc -mtriple=riscv32 -mattr=+a,+ztso,+zabha,+zacas -verify-machineinstrs < %s \
+; RUN: | FileCheck -check-prefixes=RV32IA,RV32IA-TSO,RV32IA-TSO-ZABHA,RV32IA-TSO-ZABHA-ZACAS %s
; RUN: llc -mtriple=riscv64 -mattr=+a,+zabha,+zacas -verify-machineinstrs < %s \
; RUN: | FileCheck -check-prefixes=RV64IA,RV64IA-WMO,RV64IA-WMO-ZABHA,RV64IA-WMO-ZABHA-ZACAS %s
; RUN: llc -mtriple=riscv64 -mattr=+a,+ztso,+zabha,+zacas -verify-machineinstrs < %s \
@@ -41,25 +50,25 @@ define i8 @atomicrmw_xchg_i8_monotonic(ptr %a, i8 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
-; RV32IA-LABEL: atomicrmw_xchg_i8_monotonic:
-; RV32IA: # %bb.0:
-; RV32IA-NEXT: andi a2, a0, -4
-; RV32IA-NEXT: slli a0, a0, 3
-; RV32IA-NEXT: li a3, 255
-; RV32IA-NEXT: zext.b a1, a1
-; RV32IA-NEXT: sll a3, a3, a0
-; RV32IA-NEXT: sll a1, a1, a0
-; RV32IA-NEXT: .LBB0_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-NEXT: lr.w a4, (a2)
-; RV32IA-NEXT: mv a5, a1
-; RV32IA-NEXT: xor a5, a4, a5
-; RV32IA-NEXT: and a5, a5, a3
-; RV32IA-NEXT: xor a5, a4, a5
-; RV32IA-NEXT: sc.w a5, a5, (a2)
-; RV32IA-NEXT: bnez a5, .LBB0_1
-; RV32IA-NEXT: # %bb.2:
-; RV32IA-NEXT: srl a0, a4, a0
-; RV32IA-NEXT: ret
+; RV32IA-NOZACAS-LABEL: atomicrmw_xchg_i8_monotonic:
+; RV32IA-NOZACAS: # %bb.0:
+; RV32IA-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-NOZACAS-NEXT: li a3, 255
+; RV32IA-NOZACAS-NEXT: zext.b a1, a1
+; RV32IA-NOZACAS-NEXT: sll a3, a3, a0
+; RV32IA-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-NOZACAS-NEXT: .LBB0_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-NOZACAS-NEXT: lr.w a4, (a2)
+; RV32IA-NOZACAS-NEXT: mv a5, a1
+; RV32IA-NOZACAS-NEXT: xor a5, a4, a5
+; RV32IA-NOZACAS-NEXT: and a5, a5, a3
+; RV32IA-NOZACAS-NEXT: xor a5, a4, a5
+; RV32IA-NOZACAS-NEXT: sc.w a5, a5, (a2)
+; RV32IA-NOZACAS-NEXT: bnez a5, .LBB0_1
+; RV32IA-NOZACAS-NEXT: # %bb.2:
+; RV32IA-NOZACAS-NEXT: srl a0, a4, a0
+; RV32IA-NOZACAS-NEXT: ret
;
; RV64I-LABEL: atomicrmw_xchg_i8_monotonic:
; RV64I: # %bb.0:
@@ -91,6 +100,26 @@ define i8 @atomicrmw_xchg_i8_monotonic(ptr %a, i8 %b) nounwind {
; RV64IA-NOZACAS-NEXT: srlw a0, a4, a0
; RV64IA-NOZACAS-NEXT: ret
;
+; RV32IA-ZACAS-LABEL: atomicrmw_xchg_i8_monotonic:
+; RV32IA-ZACAS: # %bb.0:
+; RV32IA-ZACAS-NEXT: andi a2, a0, -4
+; RV32IA-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-ZACAS-NEXT: li a3, 255
+; RV32IA-ZACAS-NEXT: zext.b a1, a1
+; RV32IA-ZACAS-NEXT: sll a3, a3, a0
+; RV32IA-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-ZACAS-NEXT: .LBB0_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-ZACAS-NEXT: lr.w a4, (a2)
+; RV32IA-ZACAS-NEXT: mv a5, a1
+; RV32IA-ZACAS-NEXT: xor a5, a4, a5
+; RV32IA-ZACAS-NEXT: and a5, a5, a3
+; RV32IA-ZACAS-NEXT: xor a5, a4, a5
+; RV32IA-ZACAS-NEXT: sc.w a5, a5, (a2)
+; RV32IA-ZACAS-NEXT: bnez a5, .LBB0_1
+; RV32IA-ZACAS-NEXT: # %bb.2:
+; RV32IA-ZACAS-NEXT: srl a0, a4, a0
+; RV32IA-ZACAS-NEXT: ret
+;
; RV64IA-ZACAS-LABEL: atomicrmw_xchg_i8_monotonic:
; RV64IA-ZACAS: # %bb.0:
; RV64IA-ZACAS-NEXT: andi a2, a0, -4
@@ -111,6 +140,16 @@ define i8 @atomicrmw_xchg_i8_monotonic(ptr %a, i8 %b) nounwind {
; RV64IA-ZACAS-NEXT: srlw a0, a4, a0
; RV64IA-ZACAS-NEXT: ret
;
+; RV32IA-WMO-ZABHA-LABEL: atomicrmw_xchg_i8_monotonic:
+; RV32IA-WMO-ZABHA: # %bb.0:
+; RV32IA-WMO-ZABHA-NEXT: amoswap.b a0, a1, (a0)
+; RV32IA-WMO-ZABHA-NEXT: ret
+;
+; RV32IA-TSO-ZABHA-LABEL: atomicrmw_xchg_i8_monotonic:
+; RV32IA-TSO-ZABHA: # %bb.0:
+; RV32IA-TSO-ZABHA-NEXT: amoswap.b a0, a1, (a0)
+; RV32IA-TSO-ZABHA-NEXT: ret
+;
; RV64IA-WMO-ZABHA-LABEL: atomicrmw_xchg_i8_monotonic:
; RV64IA-WMO-ZABHA: # %bb.0:
; RV64IA-WMO-ZABHA-NEXT: amoswap.b a0, a1, (a0)
@@ -135,45 +174,45 @@ define i8 @atomicrmw_xchg_i8_acquire(ptr %a, i8 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
-; RV32IA-WMO-LABEL: atomicrmw_xchg_i8_acquire:
-; RV32IA-WMO: # %bb.0:
-; RV32IA-WMO-NEXT: andi a2, a0, -4
-; RV32IA-WMO-NEXT: slli a0, a0, 3
-; RV32IA-WMO-NEXT: li a3, 255
-; RV32IA-WMO-NEXT: zext.b a1, a1
-; RV32IA-WMO-NEXT: sll a3, a3, a0
-; RV32IA-WMO-NEXT: sll a1, a1, a0
-; RV32IA-WMO-NEXT: .LBB1_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-WMO-NEXT: lr.w.aq a4, (a2)
-; RV32IA-WMO-NEXT: mv a5, a1
-; RV32IA-WMO-NEXT: xor a5, a4, a5
-; RV32IA-WMO-NEXT: and a5, a5, a3
-; RV32IA-WMO-NEXT: xor a5, a4, a5
-; RV32IA-WMO-NEXT: sc.w a5, a5, (a2)
-; RV32IA-WMO-NEXT: bnez a5, .LBB1_1
-; RV32IA-WMO-NEXT: # %bb.2:
-; RV32IA-WMO-NEXT: srl a0, a4, a0
-; RV32IA-WMO-NEXT: ret
+; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_xchg_i8_acquire:
+; RV32IA-WMO-NOZACAS: # %bb.0:
+; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-WMO-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-NOZACAS-NEXT: li a3, 255
+; RV32IA-WMO-NOZACAS-NEXT: zext.b a1, a1
+; RV32IA-WMO-NOZACAS-NEXT: sll a3, a3, a0
+; RV32IA-WMO-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-WMO-NOZACAS-NEXT: .LBB1_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-NOZACAS-NEXT: lr.w.aq a4, (a2)
+; RV32IA-WMO-NOZACAS-NEXT: mv a5, a1
+; RV32IA-WMO-NOZACAS-NEXT: xor a5, a4, a5
+; RV32IA-WMO-NOZACAS-NEXT: and a5, a5, a3
+; RV32IA-WMO-NOZACAS-NEXT: xor a5, a4, a5
+; RV32IA-WMO-NOZACAS-NEXT: sc.w a5, a5, (a2)
+; RV32IA-WMO-NOZACAS-NEXT: bnez a5, .LBB1_1
+; RV32IA-WMO-NOZACAS-NEXT: # %bb.2:
+; RV32IA-WMO-NOZACAS-NEXT: srl a0, a4, a0
+; RV32IA-WMO-NOZACAS-NEXT: ret
;
-; RV32IA-TSO-LABEL: atomicrmw_xchg_i8_acquire:
-; RV32IA-TSO: # %bb.0:
-; RV32IA-TSO-NEXT: andi a2, a0, -4
-; RV32IA-TSO-NEXT: slli a0, a0, 3
-; RV32IA-TSO-NEXT: li a3, 255
-; RV32IA-TSO-NEXT: zext.b a1, a1
-; RV32IA-TSO-NEXT: sll a3, a3, a0
-; RV32IA-TSO-NEXT: sll a1, a1, a0
-; RV32IA-TSO-NEXT: .LBB1_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-TSO-NEXT: lr.w a4, (a2)
-; RV32IA-TSO-NEXT: mv a5, a1
-; RV32IA-TSO-NEXT: xor a5, a4, a5
-; RV32IA-TSO-NEXT: and a5, a5, a3
-; RV32IA-TSO-NEXT: xor a5, a4, a5
-; RV32IA-TSO-NEXT: sc.w a5, a5, (a2)
-; RV32IA-TSO-NEXT: bnez a5, .LBB1_1
-; RV32IA-TSO-NEXT: # %bb.2:
-; RV32IA-TSO-NEXT: srl a0, a4, a0
-; RV32IA-TSO-NEXT: ret
+; RV32IA-TSO-NOZACAS-LABEL: atomicrmw_xchg_i8_acquire:
+; RV32IA-TSO-NOZACAS: # %bb.0:
+; RV32IA-TSO-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-TSO-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-NOZACAS-NEXT: li a3, 255
+; RV32IA-TSO-NOZACAS-NEXT: zext.b a1, a1
+; RV32IA-TSO-NOZACAS-NEXT: sll a3, a3, a0
+; RV32IA-TSO-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-TSO-NOZACAS-NEXT: .LBB1_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-NOZACAS-NEXT: lr.w a4, (a2)
+; RV32IA-TSO-NOZACAS-NEXT: mv a5, a1
+; RV32IA-TSO-NOZACAS-NEXT: xor a5, a4, a5
+; RV32IA-TSO-NOZACAS-NEXT: and a5, a5, a3
+; RV32IA-TSO-NOZACAS-NEXT: xor a5, a4, a5
+; RV32IA-TSO-NOZACAS-NEXT: sc.w a5, a5, (a2)
+; RV32IA-TSO-NOZACAS-NEXT: bnez a5, .LBB1_1
+; RV32IA-TSO-NOZACAS-NEXT: # %bb.2:
+; RV32IA-TSO-NOZACAS-NEXT: srl a0, a4, a0
+; RV32IA-TSO-NOZACAS-NEXT: ret
;
; RV64I-LABEL: atomicrmw_xchg_i8_acquire:
; RV64I: # %bb.0:
@@ -225,6 +264,46 @@ define i8 @atomicrmw_xchg_i8_acquire(ptr %a, i8 %b) nounwind {
; RV64IA-TSO-NOZACAS-NEXT: srlw a0, a4, a0
; RV64IA-TSO-NOZACAS-NEXT: ret
;
+; RV32IA-WMO-ZACAS-LABEL: atomicrmw_xchg_i8_acquire:
+; RV32IA-WMO-ZACAS: # %bb.0:
+; RV32IA-WMO-ZACAS-NEXT: andi a2, a0, -4
+; RV32IA-WMO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-ZACAS-NEXT: li a3, 255
+; RV32IA-WMO-ZACAS-NEXT: zext.b a1, a1
+; RV32IA-WMO-ZACAS-NEXT: sll a3, a3, a0
+; RV32IA-WMO-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-WMO-ZACAS-NEXT: .LBB1_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-ZACAS-NEXT: lr.w.aq a4, (a2)
+; RV32IA-WMO-ZACAS-NEXT: mv a5, a1
+; RV32IA-WMO-ZACAS-NEXT: xor a5, a4, a5
+; RV32IA-WMO-ZACAS-NEXT: and a5, a5, a3
+; RV32IA-WMO-ZACAS-NEXT: xor a5, a4, a5
+; RV32IA-WMO-ZACAS-NEXT: sc.w a5, a5, (a2)
+; RV32IA-WMO-ZACAS-NEXT: bnez a5, .LBB1_1
+; RV32IA-WMO-ZACAS-NEXT: # %bb.2:
+; RV32IA-WMO-ZACAS-NEXT: srl a0, a4, a0
+; RV32IA-WMO-ZACAS-NEXT: ret
+;
+; RV32IA-TSO-ZACAS-LABEL: atomicrmw_xchg_i8_acquire:
+; RV32IA-TSO-ZACAS: # %bb.0:
+; RV32IA-TSO-ZACAS-NEXT: andi a2, a0, -4
+; RV32IA-TSO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-ZACAS-NEXT: li a3, 255
+; RV32IA-TSO-ZACAS-NEXT: zext.b a1, a1
+; RV32IA-TSO-ZACAS-NEXT: sll a3, a3, a0
+; RV32IA-TSO-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-TSO-ZACAS-NEXT: .LBB1_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-ZACAS-NEXT: lr.w a4, (a2)
+; RV32IA-TSO-ZACAS-NEXT: mv a5, a1
+; RV32IA-TSO-ZACAS-NEXT: xor a5, a4, a5
+; RV32IA-TSO-ZACAS-NEXT: and a5, a5, a3
+; RV32IA-TSO-ZACAS-NEXT: xor a5, a4, a5
+; RV32IA-TSO-ZACAS-NEXT: sc.w a5, a5, (a2)
+; RV32IA-TSO-ZACAS-NEXT: bnez a5, .LBB1_1
+; RV32IA-TSO-ZACAS-NEXT: # %bb.2:
+; RV32IA-TSO-ZACAS-NEXT: srl a0, a4, a0
+; RV32IA-TSO-ZACAS-NEXT: ret
+;
; RV64IA-WMO-ZACAS-LABEL: atomicrmw_xchg_i8_acquire:
; RV64IA-WMO-ZACAS: # %bb.0:
; RV64IA-WMO-ZACAS-NEXT: andi a2, a0, -4
@@ -265,6 +344,16 @@ define i8 @atomicrmw_xchg_i8_acquire(ptr %a, i8 %b) nounwind {
; RV64IA-TSO-ZACAS-NEXT: srlw a0, a4, a0
; RV64IA-TSO-ZACAS-NEXT: ret
;
+; RV32IA-WMO-ZABHA-LABEL: atomicrmw_xchg_i8_acquire:
+; RV32IA-WMO-ZABHA: # %bb.0:
+; RV32IA-WMO-ZABHA-NEXT: amoswap.b.aq a0, a1, (a0)
+; RV32IA-WMO-ZABHA-NEXT: ret
+;
+; RV32IA-TSO-ZABHA-LABEL: atomicrmw_xchg_i8_acquire:
+; RV32IA-TSO-ZABHA: # %bb.0:
+; RV32IA-TSO-ZABHA-NEXT: amoswap.b a0, a1, (a0)
+; RV32IA-TSO-ZABHA-NEXT: ret
+;
; RV64IA-WMO-ZABHA-LABEL: atomicrmw_xchg_i8_acquire:
; RV64IA-WMO-ZABHA: # %bb.0:
; RV64IA-WMO-ZABHA-NEXT: amoswap.b.aq a0, a1, (a0)
@@ -289,45 +378,45 @@ define i8 @atomicrmw_xchg_i8_release(ptr %a, i8 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
-; RV32IA-WMO-LABEL: atomicrmw_xchg_i8_release:
-; RV32IA-WMO: # %bb.0:
-; RV32IA-WMO-NEXT: andi a2, a0, -4
-; RV32IA-WMO-NEXT: slli a0, a0, 3
-; RV32IA-WMO-NEXT: li a3, 255
-; RV32IA-WMO-NEXT: zext.b a1, a1
-; RV32IA-WMO-NEXT: sll a3, a3, a0
-; RV32IA-WMO-NEXT: sll a1, a1, a0
-; RV32IA-WMO-NEXT: .LBB2_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-WMO-NEXT: lr.w a4, (a2)
-; RV32IA-WMO-NEXT: mv a5, a1
-; RV32IA-WMO-NEXT: xor a5, a4, a5
-; RV32IA-WMO-NEXT: and a5, a5, a3
-; RV32IA-WMO-NEXT: xor a5, a4, a5
-; RV32IA-WMO-NEXT: sc.w.rl a5, a5, (a2)
-; RV32IA-WMO-NEXT: bnez a5, .LBB2_1
-; RV32IA-WMO-NEXT: # %bb.2:
-; RV32IA-WMO-NEXT: srl a0, a4, a0
-; RV32IA-WMO-NEXT: ret
+; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_xchg_i8_release:
+; RV32IA-WMO-NOZACAS: # %bb.0:
+; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-WMO-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-NOZACAS-NEXT: li a3, 255
+; RV32IA-WMO-NOZACAS-NEXT: zext.b a1, a1
+; RV32IA-WMO-NOZACAS-NEXT: sll a3, a3, a0
+; RV32IA-WMO-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-WMO-NOZACAS-NEXT: .LBB2_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-NOZACAS-NEXT: lr.w a4, (a2)
+; RV32IA-WMO-NOZACAS-NEXT: mv a5, a1
+; RV32IA-WMO-NOZACAS-NEXT: xor a5, a4, a5
+; RV32IA-WMO-NOZACAS-NEXT: and a5, a5, a3
+; RV32IA-WMO-NOZACAS-NEXT: xor a5, a4, a5
+; RV32IA-WMO-NOZACAS-NEXT: sc.w.rl a5, a5, (a2)
+; RV32IA-WMO-NOZACAS-NEXT: bnez a5, .LBB2_1
+; RV32IA-WMO-NOZACAS-NEXT: # %bb.2:
+; RV32IA-WMO-NOZACAS-NEXT: srl a0, a4, a0
+; RV32IA-WMO-NOZACAS-NEXT: ret
;
-; RV32IA-TSO-LABEL: atomicrmw_xchg_i8_release:
-; RV32IA-TSO: # %bb.0:
-; RV32IA-TSO-NEXT: andi a2, a0, -4
-; RV32IA-TSO-NEXT: slli a0, a0, 3
-; RV32IA-TSO-NEXT: li a3, 255
-; RV32IA-TSO-NEXT: zext.b a1, a1
-; RV32IA-TSO-NEXT: sll a3, a3, a0
-; RV32IA-TSO-NEXT: sll a1, a1, a0
-; RV32IA-TSO-NEXT: .LBB2_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-TSO-NEXT: lr.w a4, (a2)
-; RV32IA-TSO-NEXT: mv a5, a1
-; RV32IA-TSO-NEXT: xor a5, a4, a5
-; RV32IA-TSO-NEXT: and a5, a5, a3
-; RV32IA-TSO-NEXT: xor a5, a4, a5
-; RV32IA-TSO-NEXT: sc.w a5, a5, (a2)
-; RV32IA-TSO-NEXT: bnez a5, .LBB2_1
-; RV32IA-TSO-NEXT: # %bb.2:
-; RV32IA-TSO-NEXT: srl a0, a4, a0
-; RV32IA-TSO-NEXT: ret
+; RV32IA-TSO-NOZACAS-LABEL: atomicrmw_xchg_i8_release:
+; RV32IA-TSO-NOZACAS: # %bb.0:
+; RV32IA-TSO-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-TSO-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-NOZACAS-NEXT: li a3, 255
+; RV32IA-TSO-NOZACAS-NEXT: zext.b a1, a1
+; RV32IA-TSO-NOZACAS-NEXT: sll a3, a3, a0
+; RV32IA-TSO-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-TSO-NOZACAS-NEXT: .LBB2_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-NOZACAS-NEXT: lr.w a4, (a2)
+; RV32IA-TSO-NOZACAS-NEXT: mv a5, a1
+; RV32IA-TSO-NOZACAS-NEXT: xor a5, a4, a5
+; RV32IA-TSO-NOZACAS-NEXT: and a5, a5, a3
+; RV32IA-TSO-NOZACAS-NEXT: xor a5, a4, a5
+; RV32IA-TSO-NOZACAS-NEXT: sc.w a5, a5, (a2)
+; RV32IA-TSO-NOZACAS-NEXT: bnez a5, .LBB2_1
+; RV32IA-TSO-NOZACAS-NEXT: # %bb.2:
+; RV32IA-TSO-NOZACAS-NEXT: srl a0, a4, a0
+; RV32IA-TSO-NOZACAS-NEXT: ret
;
; RV64I-LABEL: atomicrmw_xchg_i8_release:
; RV64I: # %bb.0:
@@ -379,6 +468,46 @@ define i8 @atomicrmw_xchg_i8_release(ptr %a, i8 %b) nounwind {
; RV64IA-TSO-NOZACAS-NEXT: srlw a0, a4, a0
; RV64IA-TSO-NOZACAS-NEXT: ret
;
+; RV32IA-WMO-ZACAS-LABEL: atomicrmw_xchg_i8_release:
+; RV32IA-WMO-ZACAS: # %bb.0:
+; RV32IA-WMO-ZACAS-NEXT: andi a2, a0, -4
+; RV32IA-WMO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-ZACAS-NEXT: li a3, 255
+; RV32IA-WMO-ZACAS-NEXT: zext.b a1, a1
+; RV32IA-WMO-ZACAS-NEXT: sll a3, a3, a0
+; RV32IA-WMO-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-WMO-ZACAS-NEXT: .LBB2_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-ZACAS-NEXT: lr.w a4, (a2)
+; RV32IA-WMO-ZACAS-NEXT: mv a5, a1
+; RV32IA-WMO-ZACAS-NEXT: xor a5, a4, a5
+; RV32IA-WMO-ZACAS-NEXT: and a5, a5, a3
+; RV32IA-WMO-ZACAS-NEXT: xor a5, a4, a5
+; RV32IA-WMO-ZACAS-NEXT: sc.w.rl a5, a5, (a2)
+; RV32IA-WMO-ZACAS-NEXT: bnez a5, .LBB2_1
+; RV32IA-WMO-ZACAS-NEXT: # %bb.2:
+; RV32IA-WMO-ZACAS-NEXT: srl a0, a4, a0
+; RV32IA-WMO-ZACAS-NEXT: ret
+;
+; RV32IA-TSO-ZACAS-LABEL: atomicrmw_xchg_i8_release:
+; RV32IA-TSO-ZACAS: # %bb.0:
+; RV32IA-TSO-ZACAS-NEXT: andi a2, a0, -4
+; RV32IA-TSO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-ZACAS-NEXT: li a3, 255
+; RV32IA-TSO-ZACAS-NEXT: zext.b a1, a1
+; RV32IA-TSO-ZACAS-NEXT: sll a3, a3, a0
+; RV32IA-TSO-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-TSO-ZACAS-NEXT: .LBB2_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-ZACAS-NEXT: lr.w a4, (a2)
+; RV32IA-TSO-ZACAS-NEXT: mv a5, a1
+; RV32IA-TSO-ZACAS-NEXT: xor a5, a4, a5
+; RV32IA-TSO-ZACAS-NEXT: and a5, a5, a3
+; RV32IA-TSO-ZACAS-NEXT: xor a5, a4, a5
+; RV32IA-TSO-ZACAS-NEXT: sc.w a5, a5, (a2)
+; RV32IA-TSO-ZACAS-NEXT: bnez a5, .LBB2_1
+; RV32IA-TSO-ZACAS-NEXT: # %bb.2:
+; RV32IA-TSO-ZACAS-NEXT: srl a0, a4, a0
+; RV32IA-TSO-ZACAS-NEXT: ret
+;
; RV64IA-WMO-ZACAS-LABEL: atomicrmw_xchg_i8_release:
; RV64IA-WMO-ZACAS: # %bb.0:
; RV64IA-WMO-ZACAS-NEXT: andi a2, a0, -4
@@ -419,6 +548,16 @@ define i8 @atomicrmw_xchg_i8_release(ptr %a, i8 %b) nounwind {
; RV64IA-TSO-ZACAS-NEXT: srlw a0, a4, a0
; RV64IA-TSO-ZACAS-NEXT: ret
;
+; RV32IA-WMO-ZABHA-LABEL: atomicrmw_xchg_i8_release:
+; RV32IA-WMO-ZABHA: # %bb.0:
+; RV32IA-WMO-ZABHA-NEXT: amoswap.b.rl a0, a1, (a0)
+; RV32IA-WMO-ZABHA-NEXT: ret
+;
+; RV32IA-TSO-ZABHA-LABEL: atomicrmw_xchg_i8_release:
+; RV32IA-TSO-ZABHA: # %bb.0:
+; RV32IA-TSO-ZABHA-NEXT: amoswap.b a0, a1, (a0)
+; RV32IA-TSO-ZABHA-NEXT: ret
+;
; RV64IA-WMO-ZABHA-LABEL: atomicrmw_xchg_i8_release:
; RV64IA-WMO-ZABHA: # %bb.0:
; RV64IA-WMO-ZABHA-NEXT: amoswap.b.rl a0, a1, (a0)
@@ -443,45 +582,45 @@ define i8 @atomicrmw_xchg_i8_acq_rel(ptr %a, i8 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
-; RV32IA-WMO-LABEL: atomicrmw_xchg_i8_acq_rel:
-; RV32IA-WMO: # %bb.0:
-; RV32IA-WMO-NEXT: andi a2, a0, -4
-; RV32IA-WMO-NEXT: slli a0, a0, 3
-; RV32IA-WMO-NEXT: li a3, 255
-; RV32IA-WMO-NEXT: zext.b a1, a1
-; RV32IA-WMO-NEXT: sll a3, a3, a0
-; RV32IA-WMO-NEXT: sll a1, a1, a0
-; RV32IA-WMO-NEXT: .LBB3_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-WMO-NEXT: lr.w.aq a4, (a2)
-; RV32IA-WMO-NEXT: mv a5, a1
-; RV32IA-WMO-NEXT: xor a5, a4, a5
-; RV32IA-WMO-NEXT: and a5, a5, a3
-; RV32IA-WMO-NEXT: xor a5, a4, a5
-; RV32IA-WMO-NEXT: sc.w.rl a5, a5, (a2)
-; RV32IA-WMO-NEXT: bnez a5, .LBB3_1
-; RV32IA-WMO-NEXT: # %bb.2:
-; RV32IA-WMO-NEXT: srl a0, a4, a0
-; RV32IA-WMO-NEXT: ret
+; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_xchg_i8_acq_rel:
+; RV32IA-WMO-NOZACAS: # %bb.0:
+; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-WMO-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-NOZACAS-NEXT: li a3, 255
+; RV32IA-WMO-NOZACAS-NEXT: zext.b a1, a1
+; RV32IA-WMO-NOZACAS-NEXT: sll a3, a3, a0
+; RV32IA-WMO-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-WMO-NOZACAS-NEXT: .LBB3_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-NOZACAS-NEXT: lr.w.aq a4, (a2)
+; RV32IA-WMO-NOZACAS-NEXT: mv a5, a1
+; RV32IA-WMO-NOZACAS-NEXT: xor a5, a4, a5
+; RV32IA-WMO-NOZACAS-NEXT: and a5, a5, a3
+; RV32IA-WMO-NOZACAS-NEXT: xor a5, a4, a5
+; RV32IA-WMO-NOZACAS-NEXT: sc.w.rl a5, a5, (a2)
+; RV32IA-WMO-NOZACAS-NEXT: bnez a5, .LBB3_1
+; RV32IA-WMO-NOZACAS-NEXT: # %bb.2:
+; RV32IA-WMO-NOZACAS-NEXT: srl a0, a4, a0
+; RV32IA-WMO-NOZACAS-NEXT: ret
;
-; RV32IA-TSO-LABEL: atomicrmw_xchg_i8_acq_rel:
-; RV32IA-TSO: # %bb.0:
-; RV32IA-TSO-NEXT: andi a2, a0, -4
-; RV32IA-TSO-NEXT: slli a0, a0, 3
-; RV32IA-TSO-NEXT: li a3, 255
-; RV32IA-TSO-NEXT: zext.b a1, a1
-; RV32IA-TSO-NEXT: sll a3, a3, a0
-; RV32IA-TSO-NEXT: sll a1, a1, a0
-; RV32IA-TSO-NEXT: .LBB3_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-TSO-NEXT: lr.w a4, (a2)
-; RV32IA-TSO-NEXT: mv a5, a1
-; RV32IA-TSO-NEXT: xor a5, a4, a5
-; RV32IA-TSO-NEXT: and a5, a5, a3
-; RV32IA-TSO-NEXT: xor a5, a4, a5
-; RV32IA-TSO-NEXT: sc.w a5, a5, (a2)
-; RV32IA-TSO-NEXT: bnez a5, .LBB3_1
-; RV32IA-TSO-NEXT: # %bb.2:
-; RV32IA-TSO-NEXT: srl a0, a4, a0
-; RV32IA-TSO-NEXT: ret
+; RV32IA-TSO-NOZACAS-LABEL: atomicrmw_xchg_i8_acq_rel:
+; RV32IA-TSO-NOZACAS: # %bb.0:
+; RV32IA-TSO-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-TSO-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-NOZACAS-NEXT: li a3, 255
+; RV32IA-TSO-NOZACAS-NEXT: zext.b a1, a1
+; RV32IA-TSO-NOZACAS-NEXT: sll a3, a3, a0
+; RV32IA-TSO-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-TSO-NOZACAS-NEXT: .LBB3_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-NOZACAS-NEXT: lr.w a4, (a2)
+; RV32IA-TSO-NOZACAS-NEXT: mv a5, a1
+; RV32IA-TSO-NOZACAS-NEXT: xor a5, a4, a5
+; RV32IA-TSO-NOZACAS-NEXT: and a5, a5, a3
+; RV32IA-TSO-NOZACAS-NEXT: xor a5, a4, a5
+; RV32IA-TSO-NOZACAS-NEXT: sc.w a5, a5, (a2)
+; RV32IA-TSO-NOZACAS-NEXT: bnez a5, .LBB3_1
+; RV32IA-TSO-NOZACAS-NEXT: # %bb.2:
+; RV32IA-TSO-NOZACAS-NEXT: srl a0, a4, a0
+; RV32IA-TSO-NOZACAS-NEXT: ret
;
; RV64I-LABEL: atomicrmw_xchg_i8_acq_rel:
; RV64I: # %bb.0:
@@ -533,6 +672,46 @@ define i8 @atomicrmw_xchg_i8_acq_rel(ptr %a, i8 %b) nounwind {
; RV64IA-TSO-NOZACAS-NEXT: srlw a0, a4, a0
; RV64IA-TSO-NOZACAS-NEXT: ret
;
+; RV32IA-WMO-ZACAS-LABEL: atomicrmw_xchg_i8_acq_rel:
+; RV32IA-WMO-ZACAS: # %bb.0:
+; RV32IA-WMO-ZACAS-NEXT: andi a2, a0, -4
+; RV32IA-WMO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-ZACAS-NEXT: li a3, 255
+; RV32IA-WMO-ZACAS-NEXT: zext.b a1, a1
+; RV32IA-WMO-ZACAS-NEXT: sll a3, a3, a0
+; RV32IA-WMO-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-WMO-ZACAS-NEXT: .LBB3_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-ZACAS-NEXT: lr.w.aq a4, (a2)
+; RV32IA-WMO-ZACAS-NEXT: mv a5, a1
+; RV32IA-WMO-ZACAS-NEXT: xor a5, a4, a5
+; RV32IA-WMO-ZACAS-NEXT: and a5, a5, a3
+; RV32IA-WMO-ZACAS-NEXT: xor a5, a4, a5
+; RV32IA-WMO-ZACAS-NEXT: sc.w.rl a5, a5, (a2)
+; RV32IA-WMO-ZACAS-NEXT: bnez a5, .LBB3_1
+; RV32IA-WMO-ZACAS-NEXT: # %bb.2:
+; RV32IA-WMO-ZACAS-NEXT: srl a0, a4, a0
+; RV32IA-WMO-ZACAS-NEXT: ret
+;
+; RV32IA-TSO-ZACAS-LABEL: atomicrmw_xchg_i8_acq_rel:
+; RV32IA-TSO-ZACAS: # %bb.0:
+; RV32IA-TSO-ZACAS-NEXT: andi a2, a0, -4
+; RV32IA-TSO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-ZACAS-NEXT: li a3, 255
+; RV32IA-TSO-ZACAS-NEXT: zext.b a1, a1
+; RV32IA-TSO-ZACAS-NEXT: sll a3, a3, a0
+; RV32IA-TSO-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-TSO-ZACAS-NEXT: .LBB3_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-ZACAS-NEXT: lr.w a4, (a2)
+; RV32IA-TSO-ZACAS-NEXT: mv a5, a1
+; RV32IA-TSO-ZACAS-NEXT: xor a5, a4, a5
+; RV32IA-TSO-ZACAS-NEXT: and a5, a5, a3
+; RV32IA-TSO-ZACAS-NEXT: xor a5, a4, a5
+; RV32IA-TSO-ZACAS-NEXT: sc.w a5, a5, (a2)
+; RV32IA-TSO-ZACAS-NEXT: bnez a5, .LBB3_1
+; RV32IA-TSO-ZACAS-NEXT: # %bb.2:
+; RV32IA-TSO-ZACAS-NEXT: srl a0, a4, a0
+; RV32IA-TSO-ZACAS-NEXT: ret
+;
; RV64IA-WMO-ZACAS-LABEL: atomicrmw_xchg_i8_acq_rel:
; RV64IA-WMO-ZACAS: # %bb.0:
; RV64IA-WMO-ZACAS-NEXT: andi a2, a0, -4
@@ -573,6 +752,16 @@ define i8 @atomicrmw_xchg_i8_acq_rel(ptr %a, i8 %b) nounwind {
; RV64IA-TSO-ZACAS-NEXT: srlw a0, a4, a0
; RV64IA-TSO-ZACAS-NEXT: ret
;
+; RV32IA-WMO-ZABHA-LABEL: atomicrmw_xchg_i8_acq_rel:
+; RV32IA-WMO-ZABHA: # %bb.0:
+; RV32IA-WMO-ZABHA-NEXT: amoswap.b.aqrl a0, a1, (a0)
+; RV32IA-WMO-ZABHA-NEXT: ret
+;
+; RV32IA-TSO-ZABHA-LABEL: atomicrmw_xchg_i8_acq_rel:
+; RV32IA-TSO-ZABHA: # %bb.0:
+; RV32IA-TSO-ZABHA-NEXT: amoswap.b a0, a1, (a0)
+; RV32IA-TSO-ZABHA-NEXT: ret
+;
; RV64IA-WMO-ZABHA-LABEL: atomicrmw_xchg_i8_acq_rel:
; RV64IA-WMO-ZABHA: # %bb.0:
; RV64IA-WMO-ZABHA-NEXT: amoswap.b.aqrl a0, a1, (a0)
@@ -597,25 +786,25 @@ define i8 @atomicrmw_xchg_i8_seq_cst(ptr %a, i8 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
-; RV32IA-LABEL: atomicrmw_xchg_i8_seq_cst:
-; RV32IA: # %bb.0:
-; RV32IA-NEXT: andi a2, a0, -4
-; RV32IA-NEXT: slli a0, a0, 3
-; RV32IA-NEXT: li a3, 255
-; RV32IA-NEXT: zext.b a1, a1
-; RV32IA-NEXT: sll a3, a3, a0
-; RV32IA-NEXT: sll a1, a1, a0
-; RV32IA-NEXT: .LBB4_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-NEXT: lr.w.aqrl a4, (a2)
-; RV32IA-NEXT: mv a5, a1
-; RV32IA-NEXT: xor a5, a4, a5
-; RV32IA-NEXT: and a5, a5, a3
-; RV32IA-NEXT: xor a5, a4, a5
-; RV32IA-NEXT: sc.w.rl a5, a5, (a2)
-; RV32IA-NEXT: bnez a5, .LBB4_1
-; RV32IA-NEXT: # %bb.2:
-; RV32IA-NEXT: srl a0, a4, a0
-; RV32IA-NEXT: ret
+; RV32IA-NOZACAS-LABEL: atomicrmw_xchg_i8_seq_cst:
+; RV32IA-NOZACAS: # %bb.0:
+; RV32IA-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-NOZACAS-NEXT: li a3, 255
+; RV32IA-NOZACAS-NEXT: zext.b a1, a1
+; RV32IA-NOZACAS-NEXT: sll a3, a3, a0
+; RV32IA-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-NOZACAS-NEXT: .LBB4_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-NOZACAS-NEXT: lr.w.aqrl a4, (a2)
+; RV32IA-NOZACAS-NEXT: mv a5, a1
+; RV32IA-NOZACAS-NEXT: xor a5, a4, a5
+; RV32IA-NOZACAS-NEXT: and a5, a5, a3
+; RV32IA-NOZACAS-NEXT: xor a5, a4, a5
+; RV32IA-NOZACAS-NEXT: sc.w.rl a5, a5, (a2)
+; RV32IA-NOZACAS-NEXT: bnez a5, .LBB4_1
+; RV32IA-NOZACAS-NEXT: # %bb.2:
+; RV32IA-NOZACAS-NEXT: srl a0, a4, a0
+; RV32IA-NOZACAS-NEXT: ret
;
; RV64I-LABEL: atomicrmw_xchg_i8_seq_cst:
; RV64I: # %bb.0:
@@ -647,6 +836,26 @@ define i8 @atomicrmw_xchg_i8_seq_cst(ptr %a, i8 %b) nounwind {
; RV64IA-NOZACAS-NEXT: srlw a0, a4, a0
; RV64IA-NOZACAS-NEXT: ret
;
+; RV32IA-ZACAS-LABEL: atomicrmw_xchg_i8_seq_cst:
+; RV32IA-ZACAS: # %bb.0:
+; RV32IA-ZACAS-NEXT: andi a2, a0, -4
+; RV32IA-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-ZACAS-NEXT: li a3, 255
+; RV32IA-ZACAS-NEXT: zext.b a1, a1
+; RV32IA-ZACAS-NEXT: sll a3, a3, a0
+; RV32IA-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-ZACAS-NEXT: .LBB4_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-ZACAS-NEXT: lr.w.aqrl a4, (a2)
+; RV32IA-ZACAS-NEXT: mv a5, a1
+; RV32IA-ZACAS-NEXT: xor a5, a4, a5
+; RV32IA-ZACAS-NEXT: and a5, a5, a3
+; RV32IA-ZACAS-NEXT: xor a5, a4, a5
+; RV32IA-ZACAS-NEXT: sc.w.rl a5, a5, (a2)
+; RV32IA-ZACAS-NEXT: bnez a5, .LBB4_1
+; RV32IA-ZACAS-NEXT: # %bb.2:
+; RV32IA-ZACAS-NEXT: srl a0, a4, a0
+; RV32IA-ZACAS-NEXT: ret
+;
; RV64IA-ZACAS-LABEL: atomicrmw_xchg_i8_seq_cst:
; RV64IA-ZACAS: # %bb.0:
; RV64IA-ZACAS-NEXT: andi a2, a0, -4
@@ -667,6 +876,16 @@ define i8 @atomicrmw_xchg_i8_seq_cst(ptr %a, i8 %b) nounwind {
; RV64IA-ZACAS-NEXT: srlw a0, a4, a0
; RV64IA-ZACAS-NEXT: ret
;
+; RV32IA-WMO-ZABHA-LABEL: atomicrmw_xchg_i8_seq_cst:
+; RV32IA-WMO-ZABHA: # %bb.0:
+; RV32IA-WMO-ZABHA-NEXT: amoswap.b.aqrl a0, a1, (a0)
+; RV32IA-WMO-ZABHA-NEXT: ret
+;
+; RV32IA-TSO-ZABHA-LABEL: atomicrmw_xchg_i8_seq_cst:
+; RV32IA-TSO-ZABHA: # %bb.0:
+; RV32IA-TSO-ZABHA-NEXT: amoswap.b a0, a1, (a0)
+; RV32IA-TSO-ZABHA-NEXT: ret
+;
; RV64IA-WMO-ZABHA-LABEL: atomicrmw_xchg_i8_seq_cst:
; RV64IA-WMO-ZABHA: # %bb.0:
; RV64IA-WMO-ZABHA-NEXT: amoswap.b.aqrl a0, a1, (a0)
@@ -695,16 +914,16 @@ define i8 @atomicrmw_xchg_0_i8_monotonic(ptr %a) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
-; RV32IA-LABEL: atomicrmw_xchg_0_i8_monotonic:
-; RV32IA: # %bb.0:
-; RV32IA-NEXT: andi a1, a0, -4
-; RV32IA-NEXT: slli a0, a0, 3
-; RV32IA-NEXT: li a2, 255
-; RV32IA-NEXT: sll a2, a2, a0
-; RV32IA-NEXT: not a2, a2
-; RV32IA-NEXT: amoand.w a1, a2, (a1)
-; RV32IA-NEXT: srl a0, a1, a0
-; RV32IA-NEXT: ret
+; RV32IA-NOZACAS-LABEL: atomicrmw_xchg_0_i8_monotonic:
+; RV32IA-NOZACAS: # %bb.0:
+; RV32IA-NOZACAS-NEXT: andi a1, a0, -4
+; RV32IA-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-NOZACAS-NEXT: li a2, 255
+; RV32IA-NOZACAS-NEXT: sll a2, a2, a0
+; RV32IA-NOZACAS-NEXT: not a2, a2
+; RV32IA-NOZACAS-NEXT: amoand.w a1, a2, (a1)
+; RV32IA-NOZACAS-NEXT: srl a0, a1, a0
+; RV32IA-NOZACAS-NEXT: ret
;
; RV64I-LABEL: atomicrmw_xchg_0_i8_monotonic:
; RV64I: # %bb.0:
@@ -728,6 +947,17 @@ define i8 @atomicrmw_xchg_0_i8_monotonic(ptr %a) nounwind {
; RV64IA-NOZACAS-NEXT: srlw a0, a1, a0
; RV64IA-NOZACAS-NEXT: ret
;
+; RV32IA-ZACAS-LABEL: atomicrmw_xchg_0_i8_monotonic:
+; RV32IA-ZACAS: # %bb.0:
+; RV32IA-ZACAS-NEXT: andi a1, a0, -4
+; RV32IA-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-ZACAS-NEXT: li a2, 255
+; RV32IA-ZACAS-NEXT: sll a2, a2, a0
+; RV32IA-ZACAS-NEXT: not a2, a2
+; RV32IA-ZACAS-NEXT: amoand.w a1, a2, (a1)
+; RV32IA-ZACAS-NEXT: srl a0, a1, a0
+; RV32IA-ZACAS-NEXT: ret
+;
; RV64IA-ZACAS-LABEL: atomicrmw_xchg_0_i8_monotonic:
; RV64IA-ZACAS: # %bb.0:
; RV64IA-ZACAS-NEXT: andi a1, a0, -4
@@ -739,6 +969,16 @@ define i8 @atomicrmw_xchg_0_i8_monotonic(ptr %a) nounwind {
; RV64IA-ZACAS-NEXT: srlw a0, a1, a0
; RV64IA-ZACAS-NEXT: ret
;
+; RV32IA-WMO-ZABHA-LABEL: atomicrmw_xchg_0_i8_monotonic:
+; RV32IA-WMO-ZABHA: # %bb.0:
+; RV32IA-WMO-ZABHA-NEXT: amoswap.b a0, zero, (a0)
+; RV32IA-WMO-ZABHA-NEXT: ret
+;
+; RV32IA-TSO-ZABHA-LABEL: atomicrmw_xchg_0_i8_monotonic:
+; RV32IA-TSO-ZABHA: # %bb.0:
+; RV32IA-TSO-ZABHA-NEXT: amoswap.b a0, zero, (a0)
+; RV32IA-TSO-ZABHA-NEXT: ret
+;
; RV64IA-WMO-ZABHA-LABEL: atomicrmw_xchg_0_i8_monotonic:
; RV64IA-WMO-ZABHA: # %bb.0:
; RV64IA-WMO-ZABHA-NEXT: amoswap.b a0, zero, (a0)
@@ -764,27 +1004,27 @@ define i8 @atomicrmw_xchg_0_i8_acquire(ptr %a) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
-; RV32IA-WMO-LABEL: atomicrmw_xchg_0_i8_acquire:
-; RV32IA-WMO: # %bb.0:
-; RV32IA-WMO-NEXT: andi a1, a0, -4
-; RV32IA-WMO-NEXT: slli a0, a0, 3
-; RV32IA-WMO-NEXT: li a2, 255
-; RV32IA-WMO-NEXT: sll a2, a2, a0
-; RV32IA-WMO-NEXT: not a2, a2
-; RV32IA-WMO-NEXT: amoand.w.aq a1, a2, (a1)
-; RV32IA-WMO-NEXT: srl a0, a1, a0
-; RV32IA-WMO-NEXT: ret
+; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_xchg_0_i8_acquire:
+; RV32IA-WMO-NOZACAS: # %bb.0:
+; RV32IA-WMO-NOZACAS-NEXT: andi a1, a0, -4
+; RV32IA-WMO-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-NOZACAS-NEXT: li a2, 255
+; RV32IA-WMO-NOZACAS-NEXT: sll a2, a2, a0
+; RV32IA-WMO-NOZACAS-NEXT: not a2, a2
+; RV32IA-WMO-NOZACAS-NEXT: amoand.w.aq a1, a2, (a1)
+; RV32IA-WMO-NOZACAS-NEXT: srl a0, a1, a0
+; RV32IA-WMO-NOZACAS-NEXT: ret
;
-; RV32IA-TSO-LABEL: atomicrmw_xchg_0_i8_acquire:
-; RV32IA-TSO: # %bb.0:
-; RV32IA-TSO-NEXT: andi a1, a0, -4
-; RV32IA-TSO-NEXT: slli a0, a0, 3
-; RV32IA-TSO-NEXT: li a2, 255
-; RV32IA-TSO-NEXT: sll a2, a2, a0
-; RV32IA-TSO-NEXT: not a2, a2
-; RV32IA-TSO-NEXT: amoand.w a1, a2, (a1)
-; RV32IA-TSO-NEXT: srl a0, a1, a0
-; RV32IA-TSO-NEXT: ret
+; RV32IA-TSO-NOZACAS-LABEL: atomicrmw_xchg_0_i8_acquire:
+; RV32IA-TSO-NOZACAS: # %bb.0:
+; RV32IA-TSO-NOZACAS-NEXT: andi a1, a0, -4
+; RV32IA-TSO-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-NOZACAS-NEXT: li a2, 255
+; RV32IA-TSO-NOZACAS-NEXT: sll a2, a2, a0
+; RV32IA-TSO-NOZACAS-NEXT: not a2, a2
+; RV32IA-TSO-NOZACAS-NEXT: amoand.w a1, a2, (a1)
+; RV32IA-TSO-NOZACAS-NEXT: srl a0, a1, a0
+; RV32IA-TSO-NOZACAS-NEXT: ret
;
; RV64I-LABEL: atomicrmw_xchg_0_i8_acquire:
; RV64I: # %bb.0:
@@ -819,6 +1059,28 @@ define i8 @atomicrmw_xchg_0_i8_acquire(ptr %a) nounwind {
; RV64IA-TSO-NOZACAS-NEXT: srlw a0, a1, a0
; RV64IA-TSO-NOZACAS-NEXT: ret
;
+; RV32IA-WMO-ZACAS-LABEL: atomicrmw_xchg_0_i8_acquire:
+; RV32IA-WMO-ZACAS: # %bb.0:
+; RV32IA-WMO-ZACAS-NEXT: andi a1, a0, -4
+; RV32IA-WMO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-ZACAS-NEXT: li a2, 255
+; RV32IA-WMO-ZACAS-NEXT: sll a2, a2, a0
+; RV32IA-WMO-ZACAS-NEXT: not a2, a2
+; RV32IA-WMO-ZACAS-NEXT: amoand.w.aq a1, a2, (a1)
+; RV32IA-WMO-ZACAS-NEXT: srl a0, a1, a0
+; RV32IA-WMO-ZACAS-NEXT: ret
+;
+; RV32IA-TSO-ZACAS-LABEL: atomicrmw_xchg_0_i8_acquire:
+; RV32IA-TSO-ZACAS: # %bb.0:
+; RV32IA-TSO-ZACAS-NEXT: andi a1, a0, -4
+; RV32IA-TSO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-ZACAS-NEXT: li a2, 255
+; RV32IA-TSO-ZACAS-NEXT: sll a2, a2, a0
+; RV32IA-TSO-ZACAS-NEXT: not a2, a2
+; RV32IA-TSO-ZACAS-NEXT: amoand.w a1, a2, (a1)
+; RV32IA-TSO-ZACAS-NEXT: srl a0, a1, a0
+; RV32IA-TSO-ZACAS-NEXT: ret
+;
; RV64IA-WMO-ZACAS-LABEL: atomicrmw_xchg_0_i8_acquire:
; RV64IA-WMO-ZACAS: # %bb.0:
; RV64IA-WMO-ZACAS-NEXT: andi a1, a0, -4
@@ -841,6 +1103,16 @@ define i8 @atomicrmw_xchg_0_i8_acquire(ptr %a) nounwind {
; RV64IA-TSO-ZACAS-NEXT: srlw a0, a1, a0
; RV64IA-TSO-ZACAS-NEXT: ret
;
+; RV32IA-WMO-ZABHA-LABEL: atomicrmw_xchg_0_i8_acquire:
+; RV32IA-WMO-ZABHA: # %bb.0:
+; RV32IA-WMO-ZABHA-NEXT: amoswap.b.aq a0, zero, (a0)
+; RV32IA-WMO-ZABHA-NEXT: ret
+;
+; RV32IA-TSO-ZABHA-LABEL: atomicrmw_xchg_0_i8_acquire:
+; RV32IA-TSO-ZABHA: # %bb.0:
+; RV32IA-TSO-ZABHA-NEXT: amoswap.b a0, zero, (a0)
+; RV32IA-TSO-ZABHA-NEXT: ret
+;
; RV64IA-WMO-ZABHA-LABEL: atomicrmw_xchg_0_i8_acquire:
; RV64IA-WMO-ZABHA: # %bb.0:
; RV64IA-WMO-ZABHA-NEXT: amoswap.b.aq a0, zero, (a0)
@@ -866,27 +1138,27 @@ define i8 @atomicrmw_xchg_0_i8_release(ptr %a) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
-; RV32IA-WMO-LABEL: atomicrmw_xchg_0_i8_release:
-; RV32IA-WMO: # %bb.0:
-; RV32IA-WMO-NEXT: andi a1, a0, -4
-; RV32IA-WMO-NEXT: slli a0, a0, 3
-; RV32IA-WMO-NEXT: li a2, 255
-; RV32IA-WMO-NEXT: sll a2, a2, a0
-; RV32IA-WMO-NEXT: not a2, a2
-; RV32IA-WMO-NEXT: amoand.w.rl a1, a2, (a1)
-; RV32IA-WMO-NEXT: srl a0, a1, a0
-; RV32IA-WMO-NEXT: ret
+; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_xchg_0_i8_release:
+; RV32IA-WMO-NOZACAS: # %bb.0:
+; RV32IA-WMO-NOZACAS-NEXT: andi a1, a0, -4
+; RV32IA-WMO-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-NOZACAS-NEXT: li a2, 255
+; RV32IA-WMO-NOZACAS-NEXT: sll a2, a2, a0
+; RV32IA-WMO-NOZACAS-NEXT: not a2, a2
+; RV32IA-WMO-NOZACAS-NEXT: amoand.w.rl a1, a2, (a1)
+; RV32IA-WMO-NOZACAS-NEXT: srl a0, a1, a0
+; RV32IA-WMO-NOZACAS-NEXT: ret
;
-; RV32IA-TSO-LABEL: atomicrmw_xchg_0_i8_release:
-; RV32IA-TSO: # %bb.0:
-; RV32IA-TSO-NEXT: andi a1, a0, -4
-; RV32IA-TSO-NEXT: slli a0, a0, 3
-; RV32IA-TSO-NEXT: li a2, 255
-; RV32IA-TSO-NEXT: sll a2, a2, a0
-; RV32IA-TSO-NEXT: not a2, a2
-; RV32IA-TSO-NEXT: amoand.w a1, a2, (a1)
-; RV32IA-TSO-NEXT: srl a0, a1, a0
-; RV32IA-TSO-NEXT: ret
+; RV32IA-TSO-NOZACAS-LABEL: atomicrmw_xchg_0_i8_release:
+; RV32IA-TSO-NOZACAS: # %bb.0:
+; RV32IA-TSO-NOZACAS-NEXT: andi a1, a0, -4
+; RV32IA-TSO-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-NOZACAS-NEXT: li a2, 255
+; RV32IA-TSO-NOZACAS-NEXT: sll a2, a2, a0
+; RV32IA-TSO-NOZACAS-NEXT: not a2, a2
+; RV32IA-TSO-NOZACAS-NEXT: amoand.w a1, a2, (a1)
+; RV32IA-TSO-NOZACAS-NEXT: srl a0, a1, a0
+; RV32IA-TSO-NOZACAS-NEXT: ret
;
; RV64I-LABEL: atomicrmw_xchg_0_i8_release:
; RV64I: # %bb.0:
@@ -921,6 +1193,28 @@ define i8 @atomicrmw_xchg_0_i8_release(ptr %a) nounwind {
; RV64IA-TSO-NOZACAS-NEXT: srlw a0, a1, a0
; RV64IA-TSO-NOZACAS-NEXT: ret
;
+; RV32IA-WMO-ZACAS-LABEL: atomicrmw_xchg_0_i8_release:
+; RV32IA-WMO-ZACAS: # %bb.0:
+; RV32IA-WMO-ZACAS-NEXT: andi a1, a0, -4
+; RV32IA-WMO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-ZACAS-NEXT: li a2, 255
+; RV32IA-WMO-ZACAS-NEXT: sll a2, a2, a0
+; RV32IA-WMO-ZACAS-NEXT: not a2, a2
+; RV32IA-WMO-ZACAS-NEXT: amoand.w.rl a1, a2, (a1)
+; RV32IA-WMO-ZACAS-NEXT: srl a0, a1, a0
+; RV32IA-WMO-ZACAS-NEXT: ret
+;
+; RV32IA-TSO-ZACAS-LABEL: atomicrmw_xchg_0_i8_release:
+; RV32IA-TSO-ZACAS: # %bb.0:
+; RV32IA-TSO-ZACAS-NEXT: andi a1, a0, -4
+; RV32IA-TSO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-ZACAS-NEXT: li a2, 255
+; RV32IA-TSO-ZACAS-NEXT: sll a2, a2, a0
+; RV32IA-TSO-ZACAS-NEXT: not a2, a2
+; RV32IA-TSO-ZACAS-NEXT: amoand.w a1, a2, (a1)
+; RV32IA-TSO-ZACAS-NEXT: srl a0, a1, a0
+; RV32IA-TSO-ZACAS-NEXT: ret
+;
; RV64IA-WMO-ZACAS-LABEL: atomicrmw_xchg_0_i8_release:
; RV64IA-WMO-ZACAS: # %bb.0:
; RV64IA-WMO-ZACAS-NEXT: andi a1, a0, -4
@@ -943,6 +1237,16 @@ define i8 @atomicrmw_xchg_0_i8_release(ptr %a) nounwind {
; RV64IA-TSO-ZACAS-NEXT: srlw a0, a1, a0
; RV64IA-TSO-ZACAS-NEXT: ret
;
+; RV32IA-WMO-ZABHA-LABEL: atomicrmw_xchg_0_i8_release:
+; RV32IA-WMO-ZABHA: # %bb.0:
+; RV32IA-WMO-ZABHA-NEXT: amoswap.b.rl a0, zero, (a0)
+; RV32IA-WMO-ZABHA-NEXT: ret
+;
+; RV32IA-TSO-ZABHA-LABEL: atomicrmw_xchg_0_i8_release:
+; RV32IA-TSO-ZABHA: # %bb.0:
+; RV32IA-TSO-ZABHA-NEXT: amoswap.b a0, zero, (a0)
+; RV32IA-TSO-ZABHA-NEXT: ret
+;
; RV64IA-WMO-ZABHA-LABEL: atomicrmw_xchg_0_i8_release:
; RV64IA-WMO-ZABHA: # %bb.0:
; RV64IA-WMO-ZABHA-NEXT: amoswap.b.rl a0, zero, (a0)
@@ -968,27 +1272,27 @@ define i8 @atomicrmw_xchg_0_i8_acq_rel(ptr %a) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
-; RV32IA-WMO-LABEL: atomicrmw_xchg_0_i8_acq_rel:
-; RV32IA-WMO: # %bb.0:
-; RV32IA-WMO-NEXT: andi a1, a0, -4
-; RV32IA-WMO-NEXT: slli a0, a0, 3
-; RV32IA-WMO-NEXT: li a2, 255
-; RV32IA-WMO-NEXT: sll a2, a2, a0
-; RV32IA-WMO-NEXT: not a2, a2
-; RV32IA-WMO-NEXT: amoand.w.aqrl a1, a2, (a1)
-; RV32IA-WMO-NEXT: srl a0, a1, a0
-; RV32IA-WMO-NEXT: ret
+; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_xchg_0_i8_acq_rel:
+; RV32IA-WMO-NOZACAS: # %bb.0:
+; RV32IA-WMO-NOZACAS-NEXT: andi a1, a0, -4
+; RV32IA-WMO-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-NOZACAS-NEXT: li a2, 255
+; RV32IA-WMO-NOZACAS-NEXT: sll a2, a2, a0
+; RV32IA-WMO-NOZACAS-NEXT: not a2, a2
+; RV32IA-WMO-NOZACAS-NEXT: amoand.w.aqrl a1, a2, (a1)
+; RV32IA-WMO-NOZACAS-NEXT: srl a0, a1, a0
+; RV32IA-WMO-NOZACAS-NEXT: ret
;
-; RV32IA-TSO-LABEL: atomicrmw_xchg_0_i8_acq_rel:
-; RV32IA-TSO: # %bb.0:
-; RV32IA-TSO-NEXT: andi a1, a0, -4
-; RV32IA-TSO-NEXT: slli a0, a0, 3
-; RV32IA-TSO-NEXT: li a2, 255
-; RV32IA-TSO-NEXT: sll a2, a2, a0
-; RV32IA-TSO-NEXT: not a2, a2
-; RV32IA-TSO-NEXT: amoand.w a1, a2, (a1)
-; RV32IA-TSO-NEXT: srl a0, a1, a0
-; RV32IA-TSO-NEXT: ret
+; RV32IA-TSO-NOZACAS-LABEL: atomicrmw_xchg_0_i8_acq_rel:
+; RV32IA-TSO-NOZACAS: # %bb.0:
+; RV32IA-TSO-NOZACAS-NEXT: andi a1, a0, -4
+; RV32IA-TSO-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-NOZACAS-NEXT: li a2, 255
+; RV32IA-TSO-NOZACAS-NEXT: sll a2, a2, a0
+; RV32IA-TSO-NOZACAS-NEXT: not a2, a2
+; RV32IA-TSO-NOZACAS-NEXT: amoand.w a1, a2, (a1)
+; RV32IA-TSO-NOZACAS-NEXT: srl a0, a1, a0
+; RV32IA-TSO-NOZACAS-NEXT: ret
;
; RV64I-LABEL: atomicrmw_xchg_0_i8_acq_rel:
; RV64I: # %bb.0:
@@ -1023,6 +1327,28 @@ define i8 @atomicrmw_xchg_0_i8_acq_rel(ptr %a) nounwind {
; RV64IA-TSO-NOZACAS-NEXT: srlw a0, a1, a0
; RV64IA-TSO-NOZACAS-NEXT: ret
;
+; RV32IA-WMO-ZACAS-LABEL: atomicrmw_xchg_0_i8_acq_rel:
+; RV32IA-WMO-ZACAS: # %bb.0:
+; RV32IA-WMO-ZACAS-NEXT: andi a1, a0, -4
+; RV32IA-WMO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-ZACAS-NEXT: li a2, 255
+; RV32IA-WMO-ZACAS-NEXT: sll a2, a2, a0
+; RV32IA-WMO-ZACAS-NEXT: not a2, a2
+; RV32IA-WMO-ZACAS-NEXT: amoand.w.aqrl a1, a2, (a1)
+; RV32IA-WMO-ZACAS-NEXT: srl a0, a1, a0
+; RV32IA-WMO-ZACAS-NEXT: ret
+;
+; RV32IA-TSO-ZACAS-LABEL: atomicrmw_xchg_0_i8_acq_rel:
+; RV32IA-TSO-ZACAS: # %bb.0:
+; RV32IA-TSO-ZACAS-NEXT: andi a1, a0, -4
+; RV32IA-TSO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-ZACAS-NEXT: li a2, 255
+; RV32IA-TSO-ZACAS-NEXT: sll a2, a2, a0
+; RV32IA-TSO-ZACAS-NEXT: not a2, a2
+; RV32IA-TSO-ZACAS-NEXT: amoand.w a1, a2, (a1)
+; RV32IA-TSO-ZACAS-NEXT: srl a0, a1, a0
+; RV32IA-TSO-ZACAS-NEXT: ret
+;
; RV64IA-WMO-ZACAS-LABEL: atomicrmw_xchg_0_i8_acq_rel:
; RV64IA-WMO-ZACAS: # %bb.0:
; RV64IA-WMO-ZACAS-NEXT: andi a1, a0, -4
@@ -1045,6 +1371,16 @@ define i8 @atomicrmw_xchg_0_i8_acq_rel(ptr %a) nounwind {
; RV64IA-TSO-ZACAS-NEXT: srlw a0, a1, a0
; RV64IA-TSO-ZACAS-NEXT: ret
;
+; RV32IA-WMO-ZABHA-LABEL: atomicrmw_xchg_0_i8_acq_rel:
+; RV32IA-WMO-ZABHA: # %bb.0:
+; RV32IA-WMO-ZABHA-NEXT: amoswap.b.aqrl a0, zero, (a0)
+; RV32IA-WMO-ZABHA-NEXT: ret
+;
+; RV32IA-TSO-ZABHA-LABEL: atomicrmw_xchg_0_i8_acq_rel:
+; RV32IA-TSO-ZABHA: # %bb.0:
+; RV32IA-TSO-ZABHA-NEXT: amoswap.b a0, zero, (a0)
+; RV32IA-TSO-ZABHA-NEXT: ret
+;
; RV64IA-WMO-ZABHA-LABEL: atomicrmw_xchg_0_i8_acq_rel:
; RV64IA-WMO-ZABHA: # %bb.0:
; RV64IA-WMO-ZABHA-NEXT: amoswap.b.aqrl a0, zero, (a0)
@@ -1070,27 +1406,27 @@ define i8 @atomicrmw_xchg_0_i8_seq_cst(ptr %a) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
-; RV32IA-WMO-LABEL: atomicrmw_xchg_0_i8_seq_cst:
-; RV32IA-WMO: # %bb.0:
-; RV32IA-WMO-NEXT: andi a1, a0, -4
-; RV32IA-WMO-NEXT: slli a0, a0, 3
-; RV32IA-WMO-NEXT: li a2, 255
-; RV32IA-WMO-NEXT: sll a2, a2, a0
-; RV32IA-WMO-NEXT: not a2, a2
-; RV32IA-WMO-NEXT: amoand.w.aqrl a1, a2, (a1)
-; RV32IA-WMO-NEXT: srl a0, a1, a0
-; RV32IA-WMO-NEXT: ret
+; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_xchg_0_i8_seq_cst:
+; RV32IA-WMO-NOZACAS: # %bb.0:
+; RV32IA-WMO-NOZACAS-NEXT: andi a1, a0, -4
+; RV32IA-WMO-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-NOZACAS-NEXT: li a2, 255
+; RV32IA-WMO-NOZACAS-NEXT: sll a2, a2, a0
+; RV32IA-WMO-NOZACAS-NEXT: not a2, a2
+; RV32IA-WMO-NOZACAS-NEXT: amoand.w.aqrl a1, a2, (a1)
+; RV32IA-WMO-NOZACAS-NEXT: srl a0, a1, a0
+; RV32IA-WMO-NOZACAS-NEXT: ret
;
-; RV32IA-TSO-LABEL: atomicrmw_xchg_0_i8_seq_cst:
-; RV32IA-TSO: # %bb.0:
-; RV32IA-TSO-NEXT: andi a1, a0, -4
-; RV32IA-TSO-NEXT: slli a0, a0, 3
-; RV32IA-TSO-NEXT: li a2, 255
-; RV32IA-TSO-NEXT: sll a2, a2, a0
-; RV32IA-TSO-NEXT: not a2, a2
-; RV32IA-TSO-NEXT: amoand.w a1, a2, (a1)
-; RV32IA-TSO-NEXT: srl a0, a1, a0
-; RV32IA-TSO-NEXT: ret
+; RV32IA-TSO-NOZACAS-LABEL: atomicrmw_xchg_0_i8_seq_cst:
+; RV32IA-TSO-NOZACAS: # %bb.0:
+; RV32IA-TSO-NOZACAS-NEXT: andi a1, a0, -4
+; RV32IA-TSO-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-NOZACAS-NEXT: li a2, 255
+; RV32IA-TSO-NOZACAS-NEXT: sll a2, a2, a0
+; RV32IA-TSO-NOZACAS-NEXT: not a2, a2
+; RV32IA-TSO-NOZACAS-NEXT: amoand.w a1, a2, (a1)
+; RV32IA-TSO-NOZACAS-NEXT: srl a0, a1, a0
+; RV32IA-TSO-NOZACAS-NEXT: ret
;
; RV64I-LABEL: atomicrmw_xchg_0_i8_seq_cst:
; RV64I: # %bb.0:
@@ -1125,6 +1461,28 @@ define i8 @atomicrmw_xchg_0_i8_seq_cst(ptr %a) nounwind {
; RV64IA-TSO-NOZACAS-NEXT: srlw a0, a1, a0
; RV64IA-TSO-NOZACAS-NEXT: ret
;
+; RV32IA-WMO-ZACAS-LABEL: atomicrmw_xchg_0_i8_seq_cst:
+; RV32IA-WMO-ZACAS: # %bb.0:
+; RV32IA-WMO-ZACAS-NEXT: andi a1, a0, -4
+; RV32IA-WMO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-ZACAS-NEXT: li a2, 255
+; RV32IA-WMO-ZACAS-NEXT: sll a2, a2, a0
+; RV32IA-WMO-ZACAS-NEXT: not a2, a2
+; RV32IA-WMO-ZACAS-NEXT: amoand.w.aqrl a1, a2, (a1)
+; RV32IA-WMO-ZACAS-NEXT: srl a0, a1, a0
+; RV32IA-WMO-ZACAS-NEXT: ret
+;
+; RV32IA-TSO-ZACAS-LABEL: atomicrmw_xchg_0_i8_seq_cst:
+; RV32IA-TSO-ZACAS: # %bb.0:
+; RV32IA-TSO-ZACAS-NEXT: andi a1, a0, -4
+; RV32IA-TSO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-ZACAS-NEXT: li a2, 255
+; RV32IA-TSO-ZACAS-NEXT: sll a2, a2, a0
+; RV32IA-TSO-ZACAS-NEXT: not a2, a2
+; RV32IA-TSO-ZACAS-NEXT: amoand.w a1, a2, (a1)
+; RV32IA-TSO-ZACAS-NEXT: srl a0, a1, a0
+; RV32IA-TSO-ZACAS-NEXT: ret
+;
; RV64IA-WMO-ZACAS-LABEL: atomicrmw_xchg_0_i8_seq_cst:
; RV64IA-WMO-ZACAS: # %bb.0:
; RV64IA-WMO-ZACAS-NEXT: andi a1, a0, -4
@@ -1147,6 +1505,16 @@ define i8 @atomicrmw_xchg_0_i8_seq_cst(ptr %a) nounwind {
; RV64IA-TSO-ZACAS-NEXT: srlw a0, a1, a0
; RV64IA-TSO-ZACAS-NEXT: ret
;
+; RV32IA-WMO-ZABHA-LABEL: atomicrmw_xchg_0_i8_seq_cst:
+; RV32IA-WMO-ZABHA: # %bb.0:
+; RV32IA-WMO-ZABHA-NEXT: amoswap.b.aqrl a0, zero, (a0)
+; RV32IA-WMO-ZABHA-NEXT: ret
+;
+; RV32IA-TSO-ZABHA-LABEL: atomicrmw_xchg_0_i8_seq_cst:
+; RV32IA-TSO-ZABHA: # %bb.0:
+; RV32IA-TSO-ZABHA-NEXT: amoswap.b a0, zero, (a0)
+; RV32IA-TSO-ZABHA-NEXT: ret
+;
; RV64IA-WMO-ZABHA-LABEL: atomicrmw_xchg_0_i8_seq_cst:
; RV64IA-WMO-ZABHA: # %bb.0:
; RV64IA-WMO-ZABHA-NEXT: amoswap.b.aqrl a0, zero, (a0)
@@ -1172,15 +1540,15 @@ define i8 @atomicrmw_xchg_minus_1_i8_monotonic(ptr %a) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
-; RV32IA-LABEL: atomicrmw_xchg_minus_1_i8_monotonic:
-; RV32IA: # %bb.0:
-; RV32IA-NEXT: andi a1, a0, -4
-; RV32IA-NEXT: slli a0, a0, 3
-; RV32IA-NEXT: li a2, 255
-; RV32IA-NEXT: sll a2, a2, a0
-; RV32IA-NEXT: amoor.w a1, a2, (a1)
-; RV32IA-NEXT: srl a0, a1, a0
-; RV32IA-NEXT: ret
+; RV32IA-NOZACAS-LABEL: atomicrmw_xchg_minus_1_i8_monotonic:
+; RV32IA-NOZACAS: # %bb.0:
+; RV32IA-NOZACAS-NEXT: andi a1, a0, -4
+; RV32IA-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-NOZACAS-NEXT: li a2, 255
+; RV32IA-NOZACAS-NEXT: sll a2, a2, a0
+; RV32IA-NOZACAS-NEXT: amoor.w a1, a2, (a1)
+; RV32IA-NOZACAS-NEXT: srl a0, a1, a0
+; RV32IA-NOZACAS-NEXT: ret
;
; RV64I-LABEL: atomicrmw_xchg_minus_1_i8_monotonic:
; RV64I: # %bb.0:
@@ -1203,6 +1571,16 @@ define i8 @atomicrmw_xchg_minus_1_i8_monotonic(ptr %a) nounwind {
; RV64IA-NOZACAS-NEXT: srlw a0, a1, a0
; RV64IA-NOZACAS-NEXT: ret
;
+; RV32IA-ZACAS-LABEL: atomicrmw_xchg_minus_1_i8_monotonic:
+; RV32IA-ZACAS: # %bb.0:
+; RV32IA-ZACAS-NEXT: andi a1, a0, -4
+; RV32IA-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-ZACAS-NEXT: li a2, 255
+; RV32IA-ZACAS-NEXT: sll a2, a2, a0
+; RV32IA-ZACAS-NEXT: amoor.w a1, a2, (a1)
+; RV32IA-ZACAS-NEXT: srl a0, a1, a0
+; RV32IA-ZACAS-NEXT: ret
+;
; RV64IA-ZACAS-LABEL: atomicrmw_xchg_minus_1_i8_monotonic:
; RV64IA-ZACAS: # %bb.0:
; RV64IA-ZACAS-NEXT: andi a1, a0, -4
@@ -1213,6 +1591,18 @@ define i8 @atomicrmw_xchg_minus_1_i8_monotonic(ptr %a) nounwind {
; RV64IA-ZACAS-NEXT: srlw a0, a1, a0
; RV64IA-ZACAS-NEXT: ret
;
+; RV32IA-WMO-ZABHA-LABEL: atomicrmw_xchg_minus_1_i8_monotonic:
+; RV32IA-WMO-ZABHA: # %bb.0:
+; RV32IA-WMO-ZABHA-NEXT: li a1, -1
+; RV32IA-WMO-ZABHA-NEXT: amoswap.b a0, a1, (a0)
+; RV32IA-WMO-ZABHA-NEXT: ret
+;
+; RV32IA-TSO-ZABHA-LABEL: atomicrmw_xchg_minus_1_i8_monotonic:
+; RV32IA-TSO-ZABHA: # %bb.0:
+; RV32IA-TSO-ZABHA-NEXT: li a1, -1
+; RV32IA-TSO-ZABHA-NEXT: amoswap.b a0, a1, (a0)
+; RV32IA-TSO-ZABHA-NEXT: ret
+;
; RV64IA-WMO-ZABHA-LABEL: atomicrmw_xchg_minus_1_i8_monotonic:
; RV64IA-WMO-ZABHA: # %bb.0:
; RV64IA-WMO-ZABHA-NEXT: li a1, -1
@@ -1240,25 +1630,25 @@ define i8 @atomicrmw_xchg_minus_1_i8_acquire(ptr %a) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
-; RV32IA-WMO-LABEL: atomicrmw_xchg_minus_1_i8_acquire:
-; RV32IA-WMO: # %bb.0:
-; RV32IA-WMO-NEXT: andi a1, a0, -4
-; RV32IA-WMO-NEXT: slli a0, a0, 3
-; RV32IA-WMO-NEXT: li a2, 255
-; RV32IA-WMO-NEXT: sll a2, a2, a0
-; RV32IA-WMO-NEXT: amoor.w.aq a1, a2, (a1)
-; RV32IA-WMO-NEXT: srl a0, a1, a0
-; RV32IA-WMO-NEXT: ret
+; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_xchg_minus_1_i8_acquire:
+; RV32IA-WMO-NOZACAS: # %bb.0:
+; RV32IA-WMO-NOZACAS-NEXT: andi a1, a0, -4
+; RV32IA-WMO-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-NOZACAS-NEXT: li a2, 255
+; RV32IA-WMO-NOZACAS-NEXT: sll a2, a2, a0
+; RV32IA-WMO-NOZACAS-NEXT: amoor.w.aq a1, a2, (a1)
+; RV32IA-WMO-NOZACAS-NEXT: srl a0, a1, a0
+; RV32IA-WMO-NOZACAS-NEXT: ret
;
-; RV32IA-TSO-LABEL: atomicrmw_xchg_minus_1_i8_acquire:
-; RV32IA-TSO: # %bb.0:
-; RV32IA-TSO-NEXT: andi a1, a0, -4
-; RV32IA-TSO-NEXT: slli a0, a0, 3
-; RV32IA-TSO-NEXT: li a2, 255
-; RV32IA-TSO-NEXT: sll a2, a2, a0
-; RV32IA-TSO-NEXT: amoor.w a1, a2, (a1)
-; RV32IA-TSO-NEXT: srl a0, a1, a0
-; RV32IA-TSO-NEXT: ret
+; RV32IA-TSO-NOZACAS-LABEL: atomicrmw_xchg_minus_1_i8_acquire:
+; RV32IA-TSO-NOZACAS: # %bb.0:
+; RV32IA-TSO-NOZACAS-NEXT: andi a1, a0, -4
+; RV32IA-TSO-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-NOZACAS-NEXT: li a2, 255
+; RV32IA-TSO-NOZACAS-NEXT: sll a2, a2, a0
+; RV32IA-TSO-NOZACAS-NEXT: amoor.w a1, a2, (a1)
+; RV32IA-TSO-NOZACAS-NEXT: srl a0, a1, a0
+; RV32IA-TSO-NOZACAS-NEXT: ret
;
; RV64I-LABEL: atomicrmw_xchg_minus_1_i8_acquire:
; RV64I: # %bb.0:
@@ -1291,6 +1681,26 @@ define i8 @atomicrmw_xchg_minus_1_i8_acquire(ptr %a) nounwind {
; RV64IA-TSO-NOZACAS-NEXT: srlw a0, a1, a0
; RV64IA-TSO-NOZACAS-NEXT: ret
;
+; RV32IA-WMO-ZACAS-LABEL: atomicrmw_xchg_minus_1_i8_acquire:
+; RV32IA-WMO-ZACAS: # %bb.0:
+; RV32IA-WMO-ZACAS-NEXT: andi a1, a0, -4
+; RV32IA-WMO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-ZACAS-NEXT: li a2, 255
+; RV32IA-WMO-ZACAS-NEXT: sll a2, a2, a0
+; RV32IA-WMO-ZACAS-NEXT: amoor.w.aq a1, a2, (a1)
+; RV32IA-WMO-ZACAS-NEXT: srl a0, a1, a0
+; RV32IA-WMO-ZACAS-NEXT: ret
+;
+; RV32IA-TSO-ZACAS-LABEL: atomicrmw_xchg_minus_1_i8_acquire:
+; RV32IA-TSO-ZACAS: # %bb.0:
+; RV32IA-TSO-ZACAS-NEXT: andi a1, a0, -4
+; RV32IA-TSO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-ZACAS-NEXT: li a2, 255
+; RV32IA-TSO-ZACAS-NEXT: sll a2, a2, a0
+; RV32IA-TSO-ZACAS-NEXT: amoor.w a1, a2, (a1)
+; RV32IA-TSO-ZACAS-NEXT: srl a0, a1, a0
+; RV32IA-TSO-ZACAS-NEXT: ret
+;
; RV64IA-WMO-ZACAS-LABEL: atomicrmw_xchg_minus_1_i8_acquire:
; RV64IA-WMO-ZACAS: # %bb.0:
; RV64IA-WMO-ZACAS-NEXT: andi a1, a0, -4
@@ -1311,6 +1721,18 @@ define i8 @atomicrmw_xchg_minus_1_i8_acquire(ptr %a) nounwind {
; RV64IA-TSO-ZACAS-NEXT: srlw a0, a1, a0
; RV64IA-TSO-ZACAS-NEXT: ret
;
+; RV32IA-WMO-ZABHA-LABEL: atomicrmw_xchg_minus_1_i8_acquire:
+; RV32IA-WMO-ZABHA: # %bb.0:
+; RV32IA-WMO-ZABHA-NEXT: li a1, -1
+; RV32IA-WMO-ZABHA-NEXT: amoswap.b.aq a0, a1, (a0)
+; RV32IA-WMO-ZABHA-NEXT: ret
+;
+; RV32IA-TSO-ZABHA-LABEL: atomicrmw_xchg_minus_1_i8_acquire:
+; RV32IA-TSO-ZABHA: # %bb.0:
+; RV32IA-TSO-ZABHA-NEXT: li a1, -1
+; RV32IA-TSO-ZABHA-NEXT: amoswap.b a0, a1, (a0)
+; RV32IA-TSO-ZABHA-NEXT: ret
+;
; RV64IA-WMO-ZABHA-LABEL: atomicrmw_xchg_minus_1_i8_acquire:
; RV64IA-WMO-ZABHA: # %bb.0:
; RV64IA-WMO-ZABHA-NEXT: li a1, -1
@@ -1338,25 +1760,25 @@ define i8 @atomicrmw_xchg_minus_1_i8_release(ptr %a) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
-; RV32IA-WMO-LABEL: atomicrmw_xchg_minus_1_i8_release:
-; RV32IA-WMO: # %bb.0:
-; RV32IA-WMO-NEXT: andi a1, a0, -4
-; RV32IA-WMO-NEXT: slli a0, a0, 3
-; RV32IA-WMO-NEXT: li a2, 255
-; RV32IA-WMO-NEXT: sll a2, a2, a0
-; RV32IA-WMO-NEXT: amoor.w.rl a1, a2, (a1)
-; RV32IA-WMO-NEXT: srl a0, a1, a0
-; RV32IA-WMO-NEXT: ret
+; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_xchg_minus_1_i8_release:
+; RV32IA-WMO-NOZACAS: # %bb.0:
+; RV32IA-WMO-NOZACAS-NEXT: andi a1, a0, -4
+; RV32IA-WMO-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-NOZACAS-NEXT: li a2, 255
+; RV32IA-WMO-NOZACAS-NEXT: sll a2, a2, a0
+; RV32IA-WMO-NOZACAS-NEXT: amoor.w.rl a1, a2, (a1)
+; RV32IA-WMO-NOZACAS-NEXT: srl a0, a1, a0
+; RV32IA-WMO-NOZACAS-NEXT: ret
;
-; RV32IA-TSO-LABEL: atomicrmw_xchg_minus_1_i8_release:
-; RV32IA-TSO: # %bb.0:
-; RV32IA-TSO-NEXT: andi a1, a0, -4
-; RV32IA-TSO-NEXT: slli a0, a0, 3
-; RV32IA-TSO-NEXT: li a2, 255
-; RV32IA-TSO-NEXT: sll a2, a2, a0
-; RV32IA-TSO-NEXT: amoor.w a1, a2, (a1)
-; RV32IA-TSO-NEXT: srl a0, a1, a0
-; RV32IA-TSO-NEXT: ret
+; RV32IA-TSO-NOZACAS-LABEL: atomicrmw_xchg_minus_1_i8_release:
+; RV32IA-TSO-NOZACAS: # %bb.0:
+; RV32IA-TSO-NOZACAS-NEXT: andi a1, a0, -4
+; RV32IA-TSO-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-NOZACAS-NEXT: li a2, 255
+; RV32IA-TSO-NOZACAS-NEXT: sll a2, a2, a0
+; RV32IA-TSO-NOZACAS-NEXT: amoor.w a1, a2, (a1)
+; RV32IA-TSO-NOZACAS-NEXT: srl a0, a1, a0
+; RV32IA-TSO-NOZACAS-NEXT: ret
;
; RV64I-LABEL: atomicrmw_xchg_minus_1_i8_release:
; RV64I: # %bb.0:
@@ -1389,6 +1811,26 @@ define i8 @atomicrmw_xchg_minus_1_i8_release(ptr %a) nounwind {
; RV64IA-TSO-NOZACAS-NEXT: srlw a0, a1, a0
; RV64IA-TSO-NOZACAS-NEXT: ret
;
+; RV32IA-WMO-ZACAS-LABEL: atomicrmw_xchg_minus_1_i8_release:
+; RV32IA-WMO-ZACAS: # %bb.0:
+; RV32IA-WMO-ZACAS-NEXT: andi a1, a0, -4
+; RV32IA-WMO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-ZACAS-NEXT: li a2, 255
+; RV32IA-WMO-ZACAS-NEXT: sll a2, a2, a0
+; RV32IA-WMO-ZACAS-NEXT: amoor.w.rl a1, a2, (a1)
+; RV32IA-WMO-ZACAS-NEXT: srl a0, a1, a0
+; RV32IA-WMO-ZACAS-NEXT: ret
+;
+; RV32IA-TSO-ZACAS-LABEL: atomicrmw_xchg_minus_1_i8_release:
+; RV32IA-TSO-ZACAS: # %bb.0:
+; RV32IA-TSO-ZACAS-NEXT: andi a1, a0, -4
+; RV32IA-TSO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-ZACAS-NEXT: li a2, 255
+; RV32IA-TSO-ZACAS-NEXT: sll a2, a2, a0
+; RV32IA-TSO-ZACAS-NEXT: amoor.w a1, a2, (a1)
+; RV32IA-TSO-ZACAS-NEXT: srl a0, a1, a0
+; RV32IA-TSO-ZACAS-NEXT: ret
+;
; RV64IA-WMO-ZACAS-LABEL: atomicrmw_xchg_minus_1_i8_release:
; RV64IA-WMO-ZACAS: # %bb.0:
; RV64IA-WMO-ZACAS-NEXT: andi a1, a0, -4
@@ -1409,6 +1851,18 @@ define i8 @atomicrmw_xchg_minus_1_i8_release(ptr %a) nounwind {
; RV64IA-TSO-ZACAS-NEXT: srlw a0, a1, a0
; RV64IA-TSO-ZACAS-NEXT: ret
;
+; RV32IA-WMO-ZABHA-LABEL: atomicrmw_xchg_minus_1_i8_release:
+; RV32IA-WMO-ZABHA: # %bb.0:
+; RV32IA-WMO-ZABHA-NEXT: li a1, -1
+; RV32IA-WMO-ZABHA-NEXT: amoswap.b.rl a0, a1, (a0)
+; RV32IA-WMO-ZABHA-NEXT: ret
+;
+; RV32IA-TSO-ZABHA-LABEL: atomicrmw_xchg_minus_1_i8_release:
+; RV32IA-TSO-ZABHA: # %bb.0:
+; RV32IA-TSO-ZABHA-NEXT: li a1, -1
+; RV32IA-TSO-ZABHA-NEXT: amoswap.b a0, a1, (a0)
+; RV32IA-TSO-ZABHA-NEXT: ret
+;
; RV64IA-WMO-ZABHA-LABEL: atomicrmw_xchg_minus_1_i8_release:
; RV64IA-WMO-ZABHA: # %bb.0:
; RV64IA-WMO-ZABHA-NEXT: li a1, -1
@@ -1436,25 +1890,25 @@ define i8 @atomicrmw_xchg_minus_1_i8_acq_rel(ptr %a) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
-; RV32IA-WMO-LABEL: atomicrmw_xchg_minus_1_i8_acq_rel:
-; RV32IA-WMO: # %bb.0:
-; RV32IA-WMO-NEXT: andi a1, a0, -4
-; RV32IA-WMO-NEXT: slli a0, a0, 3
-; RV32IA-WMO-NEXT: li a2, 255
-; RV32IA-WMO-NEXT: sll a2, a2, a0
-; RV32IA-WMO-NEXT: amoor.w.aqrl a1, a2, (a1)
-; RV32IA-WMO-NEXT: srl a0, a1, a0
-; RV32IA-WMO-NEXT: ret
+; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_xchg_minus_1_i8_acq_rel:
+; RV32IA-WMO-NOZACAS: # %bb.0:
+; RV32IA-WMO-NOZACAS-NEXT: andi a1, a0, -4
+; RV32IA-WMO-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-NOZACAS-NEXT: li a2, 255
+; RV32IA-WMO-NOZACAS-NEXT: sll a2, a2, a0
+; RV32IA-WMO-NOZACAS-NEXT: amoor.w.aqrl a1, a2, (a1)
+; RV32IA-WMO-NOZACAS-NEXT: srl a0, a1, a0
+; RV32IA-WMO-NOZACAS-NEXT: ret
;
-; RV32IA-TSO-LABEL: atomicrmw_xchg_minus_1_i8_acq_rel:
-; RV32IA-TSO: # %bb.0:
-; RV32IA-TSO-NEXT: andi a1, a0, -4
-; RV32IA-TSO-NEXT: slli a0, a0, 3
-; RV32IA-TSO-NEXT: li a2, 255
-; RV32IA-TSO-NEXT: sll a2, a2, a0
-; RV32IA-TSO-NEXT: amoor.w a1, a2, (a1)
-; RV32IA-TSO-NEXT: srl a0, a1, a0
-; RV32IA-TSO-NEXT: ret
+; RV32IA-TSO-NOZACAS-LABEL: atomicrmw_xchg_minus_1_i8_acq_rel:
+; RV32IA-TSO-NOZACAS: # %bb.0:
+; RV32IA-TSO-NOZACAS-NEXT: andi a1, a0, -4
+; RV32IA-TSO-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-NOZACAS-NEXT: li a2, 255
+; RV32IA-TSO-NOZACAS-NEXT: sll a2, a2, a0
+; RV32IA-TSO-NOZACAS-NEXT: amoor.w a1, a2, (a1)
+; RV32IA-TSO-NOZACAS-NEXT: srl a0, a1, a0
+; RV32IA-TSO-NOZACAS-NEXT: ret
;
; RV64I-LABEL: atomicrmw_xchg_minus_1_i8_acq_rel:
; RV64I: # %bb.0:
@@ -1487,6 +1941,26 @@ define i8 @atomicrmw_xchg_minus_1_i8_acq_rel(ptr %a) nounwind {
; RV64IA-TSO-NOZACAS-NEXT: srlw a0, a1, a0
; RV64IA-TSO-NOZACAS-NEXT: ret
;
+; RV32IA-WMO-ZACAS-LABEL: atomicrmw_xchg_minus_1_i8_acq_rel:
+; RV32IA-WMO-ZACAS: # %bb.0:
+; RV32IA-WMO-ZACAS-NEXT: andi a1, a0, -4
+; RV32IA-WMO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-ZACAS-NEXT: li a2, 255
+; RV32IA-WMO-ZACAS-NEXT: sll a2, a2, a0
+; RV32IA-WMO-ZACAS-NEXT: amoor.w.aqrl a1, a2, (a1)
+; RV32IA-WMO-ZACAS-NEXT: srl a0, a1, a0
+; RV32IA-WMO-ZACAS-NEXT: ret
+;
+; RV32IA-TSO-ZACAS-LABEL: atomicrmw_xchg_minus_1_i8_acq_rel:
+; RV32IA-TSO-ZACAS: # %bb.0:
+; RV32IA-TSO-ZACAS-NEXT: andi a1, a0, -4
+; RV32IA-TSO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-ZACAS-NEXT: li a2, 255
+; RV32IA-TSO-ZACAS-NEXT: sll a2, a2, a0
+; RV32IA-TSO-ZACAS-NEXT: amoor.w a1, a2, (a1)
+; RV32IA-TSO-ZACAS-NEXT: srl a0, a1, a0
+; RV32IA-TSO-ZACAS-NEXT: ret
+;
; RV64IA-WMO-ZACAS-LABEL: atomicrmw_xchg_minus_1_i8_acq_rel:
; RV64IA-WMO-ZACAS: # %bb.0:
; RV64IA-WMO-ZACAS-NEXT: andi a1, a0, -4
@@ -1507,6 +1981,18 @@ define i8 @atomicrmw_xchg_minus_1_i8_acq_rel(ptr %a) nounwind {
; RV64IA-TSO-ZACAS-NEXT: srlw a0, a1, a0
; RV64IA-TSO-ZACAS-NEXT: ret
;
+; RV32IA-WMO-ZABHA-LABEL: atomicrmw_xchg_minus_1_i8_acq_rel:
+; RV32IA-WMO-ZABHA: # %bb.0:
+; RV32IA-WMO-ZABHA-NEXT: li a1, -1
+; RV32IA-WMO-ZABHA-NEXT: amoswap.b.aqrl a0, a1, (a0)
+; RV32IA-WMO-ZABHA-NEXT: ret
+;
+; RV32IA-TSO-ZABHA-LABEL: atomicrmw_xchg_minus_1_i8_acq_rel:
+; RV32IA-TSO-ZABHA: # %bb.0:
+; RV32IA-TSO-ZABHA-NEXT: li a1, -1
+; RV32IA-TSO-ZABHA-NEXT: amoswap.b a0, a1, (a0)
+; RV32IA-TSO-ZABHA-NEXT: ret
+;
; RV64IA-WMO-ZABHA-LABEL: atomicrmw_xchg_minus_1_i8_acq_rel:
; RV64IA-WMO-ZABHA: # %bb.0:
; RV64IA-WMO-ZABHA-NEXT: li a1, -1
@@ -1534,25 +2020,25 @@ define i8 @atomicrmw_xchg_minus_1_i8_seq_cst(ptr %a) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
-; RV32IA-WMO-LABEL: atomicrmw_xchg_minus_1_i8_seq_cst:
-; RV32IA-WMO: # %bb.0:
-; RV32IA-WMO-NEXT: andi a1, a0, -4
-; RV32IA-WMO-NEXT: slli a0, a0, 3
-; RV32IA-WMO-NEXT: li a2, 255
-; RV32IA-WMO-NEXT: sll a2, a2, a0
-; RV32IA-WMO-NEXT: amoor.w.aqrl a1, a2, (a1)
-; RV32IA-WMO-NEXT: srl a0, a1, a0
-; RV32IA-WMO-NEXT: ret
+; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_xchg_minus_1_i8_seq_cst:
+; RV32IA-WMO-NOZACAS: # %bb.0:
+; RV32IA-WMO-NOZACAS-NEXT: andi a1, a0, -4
+; RV32IA-WMO-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-NOZACAS-NEXT: li a2, 255
+; RV32IA-WMO-NOZACAS-NEXT: sll a2, a2, a0
+; RV32IA-WMO-NOZACAS-NEXT: amoor.w.aqrl a1, a2, (a1)
+; RV32IA-WMO-NOZACAS-NEXT: srl a0, a1, a0
+; RV32IA-WMO-NOZACAS-NEXT: ret
;
-; RV32IA-TSO-LABEL: atomicrmw_xchg_minus_1_i8_seq_cst:
-; RV32IA-TSO: # %bb.0:
-; RV32IA-TSO-NEXT: andi a1, a0, -4
-; RV32IA-TSO-NEXT: slli a0, a0, 3
-; RV32IA-TSO-NEXT: li a2, 255
-; RV32IA-TSO-NEXT: sll a2, a2, a0
-; RV32IA-TSO-NEXT: amoor.w a1, a2, (a1)
-; RV32IA-TSO-NEXT: srl a0, a1, a0
-; RV32IA-TSO-NEXT: ret
+; RV32IA-TSO-NOZACAS-LABEL: atomicrmw_xchg_minus_1_i8_seq_cst:
+; RV32IA-TSO-NOZACAS: # %bb.0:
+; RV32IA-TSO-NOZACAS-NEXT: andi a1, a0, -4
+; RV32IA-TSO-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-NOZACAS-NEXT: li a2, 255
+; RV32IA-TSO-NOZACAS-NEXT: sll a2, a2, a0
+; RV32IA-TSO-NOZACAS-NEXT: amoor.w a1, a2, (a1)
+; RV32IA-TSO-NOZACAS-NEXT: srl a0, a1, a0
+; RV32IA-TSO-NOZACAS-NEXT: ret
;
; RV64I-LABEL: atomicrmw_xchg_minus_1_i8_seq_cst:
; RV64I: # %bb.0:
@@ -1585,6 +2071,26 @@ define i8 @atomicrmw_xchg_minus_1_i8_seq_cst(ptr %a) nounwind {
; RV64IA-TSO-NOZACAS-NEXT: srlw a0, a1, a0
; RV64IA-TSO-NOZACAS-NEXT: ret
;
+; RV32IA-WMO-ZACAS-LABEL: atomicrmw_xchg_minus_1_i8_seq_cst:
+; RV32IA-WMO-ZACAS: # %bb.0:
+; RV32IA-WMO-ZACAS-NEXT: andi a1, a0, -4
+; RV32IA-WMO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-ZACAS-NEXT: li a2, 255
+; RV32IA-WMO-ZACAS-NEXT: sll a2, a2, a0
+; RV32IA-WMO-ZACAS-NEXT: amoor.w.aqrl a1, a2, (a1)
+; RV32IA-WMO-ZACAS-NEXT: srl a0, a1, a0
+; RV32IA-WMO-ZACAS-NEXT: ret
+;
+; RV32IA-TSO-ZACAS-LABEL: atomicrmw_xchg_minus_1_i8_seq_cst:
+; RV32IA-TSO-ZACAS: # %bb.0:
+; RV32IA-TSO-ZACAS-NEXT: andi a1, a0, -4
+; RV32IA-TSO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-ZACAS-NEXT: li a2, 255
+; RV32IA-TSO-ZACAS-NEXT: sll a2, a2, a0
+; RV32IA-TSO-ZACAS-NEXT: amoor.w a1, a2, (a1)
+; RV32IA-TSO-ZACAS-NEXT: srl a0, a1, a0
+; RV32IA-TSO-ZACAS-NEXT: ret
+;
; RV64IA-WMO-ZACAS-LABEL: atomicrmw_xchg_minus_1_i8_seq_cst:
; RV64IA-WMO-ZACAS: # %bb.0:
; RV64IA-WMO-ZACAS-NEXT: andi a1, a0, -4
@@ -1605,6 +2111,18 @@ define i8 @atomicrmw_xchg_minus_1_i8_seq_cst(ptr %a) nounwind {
; RV64IA-TSO-ZACAS-NEXT: srlw a0, a1, a0
; RV64IA-TSO-ZACAS-NEXT: ret
;
+; RV32IA-WMO-ZABHA-LABEL: atomicrmw_xchg_minus_1_i8_seq_cst:
+; RV32IA-WMO-ZABHA: # %bb.0:
+; RV32IA-WMO-ZABHA-NEXT: li a1, -1
+; RV32IA-WMO-ZABHA-NEXT: amoswap.b.aqrl a0, a1, (a0)
+; RV32IA-WMO-ZABHA-NEXT: ret
+;
+; RV32IA-TSO-ZABHA-LABEL: atomicrmw_xchg_minus_1_i8_seq_cst:
+; RV32IA-TSO-ZABHA: # %bb.0:
+; RV32IA-TSO-ZABHA-NEXT: li a1, -1
+; RV32IA-TSO-ZABHA-NEXT: amoswap.b a0, a1, (a0)
+; RV32IA-TSO-ZABHA-NEXT: ret
+;
; RV64IA-WMO-ZABHA-LABEL: atomicrmw_xchg_minus_1_i8_seq_cst:
; RV64IA-WMO-ZABHA: # %bb.0:
; RV64IA-WMO-ZABHA-NEXT: li a1, -1
@@ -1631,25 +2149,25 @@ define i8 @atomicrmw_add_i8_monotonic(ptr %a, i8 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
-; RV32IA-LABEL: atomicrmw_add_i8_monotonic:
-; RV32IA: # %bb.0:
-; RV32IA-NEXT: andi a2, a0, -4
-; RV32IA-NEXT: slli a0, a0, 3
-; RV32IA-NEXT: li a3, 255
-; RV32IA-NEXT: zext.b a1, a1
-; RV32IA-NEXT: sll a3, a3, a0
-; RV32IA-NEXT: sll a1, a1, a0
-; RV32IA-NEXT: .LBB15_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-NEXT: lr.w a4, (a2)
-; RV32IA-NEXT: add a5, a4, a1
-; RV32IA-NEXT: xor a5, a4, a5
-; RV32IA-NEXT: and a5, a5, a3
-; RV32IA-NEXT: xor a5, a4, a5
-; RV32IA-NEXT: sc.w a5, a5, (a2)
-; RV32IA-NEXT: bnez a5, .LBB15_1
-; RV32IA-NEXT: # %bb.2:
-; RV32IA-NEXT: srl a0, a4, a0
-; RV32IA-NEXT: ret
+; RV32IA-NOZACAS-LABEL: atomicrmw_add_i8_monotonic:
+; RV32IA-NOZACAS: # %bb.0:
+; RV32IA-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-NOZACAS-NEXT: li a3, 255
+; RV32IA-NOZACAS-NEXT: zext.b a1, a1
+; RV32IA-NOZACAS-NEXT: sll a3, a3, a0
+; RV32IA-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-NOZACAS-NEXT: .LBB15_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-NOZACAS-NEXT: lr.w a4, (a2)
+; RV32IA-NOZACAS-NEXT: add a5, a4, a1
+; RV32IA-NOZACAS-NEXT: xor a5, a4, a5
+; RV32IA-NOZACAS-NEXT: and a5, a5, a3
+; RV32IA-NOZACAS-NEXT: xor a5, a4, a5
+; RV32IA-NOZACAS-NEXT: sc.w a5, a5, (a2)
+; RV32IA-NOZACAS-NEXT: bnez a5, .LBB15_1
+; RV32IA-NOZACAS-NEXT: # %bb.2:
+; RV32IA-NOZACAS-NEXT: srl a0, a4, a0
+; RV32IA-NOZACAS-NEXT: ret
;
; RV64I-LABEL: atomicrmw_add_i8_monotonic:
; RV64I: # %bb.0:
@@ -1681,6 +2199,26 @@ define i8 @atomicrmw_add_i8_monotonic(ptr %a, i8 %b) nounwind {
; RV64IA-NOZACAS-NEXT: srlw a0, a4, a0
; RV64IA-NOZACAS-NEXT: ret
;
+; RV32IA-ZACAS-LABEL: atomicrmw_add_i8_monotonic:
+; RV32IA-ZACAS: # %bb.0:
+; RV32IA-ZACAS-NEXT: andi a2, a0, -4
+; RV32IA-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-ZACAS-NEXT: li a3, 255
+; RV32IA-ZACAS-NEXT: zext.b a1, a1
+; RV32IA-ZACAS-NEXT: sll a3, a3, a0
+; RV32IA-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-ZACAS-NEXT: .LBB15_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-ZACAS-NEXT: lr.w a4, (a2)
+; RV32IA-ZACAS-NEXT: add a5, a4, a1
+; RV32IA-ZACAS-NEXT: xor a5, a4, a5
+; RV32IA-ZACAS-NEXT: and a5, a5, a3
+; RV32IA-ZACAS-NEXT: xor a5, a4, a5
+; RV32IA-ZACAS-NEXT: sc.w a5, a5, (a2)
+; RV32IA-ZACAS-NEXT: bnez a5, .LBB15_1
+; RV32IA-ZACAS-NEXT: # %bb.2:
+; RV32IA-ZACAS-NEXT: srl a0, a4, a0
+; RV32IA-ZACAS-NEXT: ret
+;
; RV64IA-ZACAS-LABEL: atomicrmw_add_i8_monotonic:
; RV64IA-ZACAS: # %bb.0:
; RV64IA-ZACAS-NEXT: andi a2, a0, -4
@@ -1701,6 +2239,16 @@ define i8 @atomicrmw_add_i8_monotonic(ptr %a, i8 %b) nounwind {
; RV64IA-ZACAS-NEXT: srlw a0, a4, a0
; RV64IA-ZACAS-NEXT: ret
;
+; RV32IA-WMO-ZABHA-LABEL: atomicrmw_add_i8_monotonic:
+; RV32IA-WMO-ZABHA: # %bb.0:
+; RV32IA-WMO-ZABHA-NEXT: amoadd.b a0, a1, (a0)
+; RV32IA-WMO-ZABHA-NEXT: ret
+;
+; RV32IA-TSO-ZABHA-LABEL: atomicrmw_add_i8_monotonic:
+; RV32IA-TSO-ZABHA: # %bb.0:
+; RV32IA-TSO-ZABHA-NEXT: amoadd.b a0, a1, (a0)
+; RV32IA-TSO-ZABHA-NEXT: ret
+;
; RV64IA-WMO-ZABHA-LABEL: atomicrmw_add_i8_monotonic:
; RV64IA-WMO-ZABHA: # %bb.0:
; RV64IA-WMO-ZABHA-NEXT: amoadd.b a0, a1, (a0)
@@ -1725,45 +2273,45 @@ define i8 @atomicrmw_add_i8_acquire(ptr %a, i8 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
-; RV32IA-WMO-LABEL: atomicrmw_add_i8_acquire:
-; RV32IA-WMO: # %bb.0:
-; RV32IA-WMO-NEXT: andi a2, a0, -4
-; RV32IA-WMO-NEXT: slli a0, a0, 3
-; RV32IA-WMO-NEXT: li a3, 255
-; RV32IA-WMO-NEXT: zext.b a1, a1
-; RV32IA-WMO-NEXT: sll a3, a3, a0
-; RV32IA-WMO-NEXT: sll a1, a1, a0
-; RV32IA-WMO-NEXT: .LBB16_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-WMO-NEXT: lr.w.aq a4, (a2)
-; RV32IA-WMO-NEXT: add a5, a4, a1
-; RV32IA-WMO-NEXT: xor a5, a4, a5
-; RV32IA-WMO-NEXT: and a5, a5, a3
-; RV32IA-WMO-NEXT: xor a5, a4, a5
-; RV32IA-WMO-NEXT: sc.w a5, a5, (a2)
-; RV32IA-WMO-NEXT: bnez a5, .LBB16_1
-; RV32IA-WMO-NEXT: # %bb.2:
-; RV32IA-WMO-NEXT: srl a0, a4, a0
-; RV32IA-WMO-NEXT: ret
+; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_add_i8_acquire:
+; RV32IA-WMO-NOZACAS: # %bb.0:
+; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-WMO-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-NOZACAS-NEXT: li a3, 255
+; RV32IA-WMO-NOZACAS-NEXT: zext.b a1, a1
+; RV32IA-WMO-NOZACAS-NEXT: sll a3, a3, a0
+; RV32IA-WMO-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-WMO-NOZACAS-NEXT: .LBB16_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-NOZACAS-NEXT: lr.w.aq a4, (a2)
+; RV32IA-WMO-NOZACAS-NEXT: add a5, a4, a1
+; RV32IA-WMO-NOZACAS-NEXT: xor a5, a4, a5
+; RV32IA-WMO-NOZACAS-NEXT: and a5, a5, a3
+; RV32IA-WMO-NOZACAS-NEXT: xor a5, a4, a5
+; RV32IA-WMO-NOZACAS-NEXT: sc.w a5, a5, (a2)
+; RV32IA-WMO-NOZACAS-NEXT: bnez a5, .LBB16_1
+; RV32IA-WMO-NOZACAS-NEXT: # %bb.2:
+; RV32IA-WMO-NOZACAS-NEXT: srl a0, a4, a0
+; RV32IA-WMO-NOZACAS-NEXT: ret
;
-; RV32IA-TSO-LABEL: atomicrmw_add_i8_acquire:
-; RV32IA-TSO: # %bb.0:
-; RV32IA-TSO-NEXT: andi a2, a0, -4
-; RV32IA-TSO-NEXT: slli a0, a0, 3
-; RV32IA-TSO-NEXT: li a3, 255
-; RV32IA-TSO-NEXT: zext.b a1, a1
-; RV32IA-TSO-NEXT: sll a3, a3, a0
-; RV32IA-TSO-NEXT: sll a1, a1, a0
-; RV32IA-TSO-NEXT: .LBB16_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-TSO-NEXT: lr.w a4, (a2)
-; RV32IA-TSO-NEXT: add a5, a4, a1
-; RV32IA-TSO-NEXT: xor a5, a4, a5
-; RV32IA-TSO-NEXT: and a5, a5, a3
-; RV32IA-TSO-NEXT: xor a5, a4, a5
-; RV32IA-TSO-NEXT: sc.w a5, a5, (a2)
-; RV32IA-TSO-NEXT: bnez a5, .LBB16_1
-; RV32IA-TSO-NEXT: # %bb.2:
-; RV32IA-TSO-NEXT: srl a0, a4, a0
-; RV32IA-TSO-NEXT: ret
+; RV32IA-TSO-NOZACAS-LABEL: atomicrmw_add_i8_acquire:
+; RV32IA-TSO-NOZACAS: # %bb.0:
+; RV32IA-TSO-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-TSO-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-NOZACAS-NEXT: li a3, 255
+; RV32IA-TSO-NOZACAS-NEXT: zext.b a1, a1
+; RV32IA-TSO-NOZACAS-NEXT: sll a3, a3, a0
+; RV32IA-TSO-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-TSO-NOZACAS-NEXT: .LBB16_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-NOZACAS-NEXT: lr.w a4, (a2)
+; RV32IA-TSO-NOZACAS-NEXT: add a5, a4, a1
+; RV32IA-TSO-NOZACAS-NEXT: xor a5, a4, a5
+; RV32IA-TSO-NOZACAS-NEXT: and a5, a5, a3
+; RV32IA-TSO-NOZACAS-NEXT: xor a5, a4, a5
+; RV32IA-TSO-NOZACAS-NEXT: sc.w a5, a5, (a2)
+; RV32IA-TSO-NOZACAS-NEXT: bnez a5, .LBB16_1
+; RV32IA-TSO-NOZACAS-NEXT: # %bb.2:
+; RV32IA-TSO-NOZACAS-NEXT: srl a0, a4, a0
+; RV32IA-TSO-NOZACAS-NEXT: ret
;
; RV64I-LABEL: atomicrmw_add_i8_acquire:
; RV64I: # %bb.0:
@@ -1815,6 +2363,46 @@ define i8 @atomicrmw_add_i8_acquire(ptr %a, i8 %b) nounwind {
; RV64IA-TSO-NOZACAS-NEXT: srlw a0, a4, a0
; RV64IA-TSO-NOZACAS-NEXT: ret
;
+; RV32IA-WMO-ZACAS-LABEL: atomicrmw_add_i8_acquire:
+; RV32IA-WMO-ZACAS: # %bb.0:
+; RV32IA-WMO-ZACAS-NEXT: andi a2, a0, -4
+; RV32IA-WMO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-ZACAS-NEXT: li a3, 255
+; RV32IA-WMO-ZACAS-NEXT: zext.b a1, a1
+; RV32IA-WMO-ZACAS-NEXT: sll a3, a3, a0
+; RV32IA-WMO-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-WMO-ZACAS-NEXT: .LBB16_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-ZACAS-NEXT: lr.w.aq a4, (a2)
+; RV32IA-WMO-ZACAS-NEXT: add a5, a4, a1
+; RV32IA-WMO-ZACAS-NEXT: xor a5, a4, a5
+; RV32IA-WMO-ZACAS-NEXT: and a5, a5, a3
+; RV32IA-WMO-ZACAS-NEXT: xor a5, a4, a5
+; RV32IA-WMO-ZACAS-NEXT: sc.w a5, a5, (a2)
+; RV32IA-WMO-ZACAS-NEXT: bnez a5, .LBB16_1
+; RV32IA-WMO-ZACAS-NEXT: # %bb.2:
+; RV32IA-WMO-ZACAS-NEXT: srl a0, a4, a0
+; RV32IA-WMO-ZACAS-NEXT: ret
+;
+; RV32IA-TSO-ZACAS-LABEL: atomicrmw_add_i8_acquire:
+; RV32IA-TSO-ZACAS: # %bb.0:
+; RV32IA-TSO-ZACAS-NEXT: andi a2, a0, -4
+; RV32IA-TSO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-ZACAS-NEXT: li a3, 255
+; RV32IA-TSO-ZACAS-NEXT: zext.b a1, a1
+; RV32IA-TSO-ZACAS-NEXT: sll a3, a3, a0
+; RV32IA-TSO-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-TSO-ZACAS-NEXT: .LBB16_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-ZACAS-NEXT: lr.w a4, (a2)
+; RV32IA-TSO-ZACAS-NEXT: add a5, a4, a1
+; RV32IA-TSO-ZACAS-NEXT: xor a5, a4, a5
+; RV32IA-TSO-ZACAS-NEXT: and a5, a5, a3
+; RV32IA-TSO-ZACAS-NEXT: xor a5, a4, a5
+; RV32IA-TSO-ZACAS-NEXT: sc.w a5, a5, (a2)
+; RV32IA-TSO-ZACAS-NEXT: bnez a5, .LBB16_1
+; RV32IA-TSO-ZACAS-NEXT: # %bb.2:
+; RV32IA-TSO-ZACAS-NEXT: srl a0, a4, a0
+; RV32IA-TSO-ZACAS-NEXT: ret
+;
; RV64IA-WMO-ZACAS-LABEL: atomicrmw_add_i8_acquire:
; RV64IA-WMO-ZACAS: # %bb.0:
; RV64IA-WMO-ZACAS-NEXT: andi a2, a0, -4
@@ -1855,6 +2443,16 @@ define i8 @atomicrmw_add_i8_acquire(ptr %a, i8 %b) nounwind {
; RV64IA-TSO-ZACAS-NEXT: srlw a0, a4, a0
; RV64IA-TSO-ZACAS-NEXT: ret
;
+; RV32IA-WMO-ZABHA-LABEL: atomicrmw_add_i8_acquire:
+; RV32IA-WMO-ZABHA: # %bb.0:
+; RV32IA-WMO-ZABHA-NEXT: amoadd.b.aq a0, a1, (a0)
+; RV32IA-WMO-ZABHA-NEXT: ret
+;
+; RV32IA-TSO-ZABHA-LABEL: atomicrmw_add_i8_acquire:
+; RV32IA-TSO-ZABHA: # %bb.0:
+; RV32IA-TSO-ZABHA-NEXT: amoadd.b a0, a1, (a0)
+; RV32IA-TSO-ZABHA-NEXT: ret
+;
; RV64IA-WMO-ZABHA-LABEL: atomicrmw_add_i8_acquire:
; RV64IA-WMO-ZABHA: # %bb.0:
; RV64IA-WMO-ZABHA-NEXT: amoadd.b.aq a0, a1, (a0)
@@ -1879,45 +2477,45 @@ define i8 @atomicrmw_add_i8_release(ptr %a, i8 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
-; RV32IA-WMO-LABEL: atomicrmw_add_i8_release:
-; RV32IA-WMO: # %bb.0:
-; RV32IA-WMO-NEXT: andi a2, a0, -4
-; RV32IA-WMO-NEXT: slli a0, a0, 3
-; RV32IA-WMO-NEXT: li a3, 255
-; RV32IA-WMO-NEXT: zext.b a1, a1
-; RV32IA-WMO-NEXT: sll a3, a3, a0
-; RV32IA-WMO-NEXT: sll a1, a1, a0
-; RV32IA-WMO-NEXT: .LBB17_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-WMO-NEXT: lr.w a4, (a2)
-; RV32IA-WMO-NEXT: add a5, a4, a1
-; RV32IA-WMO-NEXT: xor a5, a4, a5
-; RV32IA-WMO-NEXT: and a5, a5, a3
-; RV32IA-WMO-NEXT: xor a5, a4, a5
-; RV32IA-WMO-NEXT: sc.w.rl a5, a5, (a2)
-; RV32IA-WMO-NEXT: bnez a5, .LBB17_1
-; RV32IA-WMO-NEXT: # %bb.2:
-; RV32IA-WMO-NEXT: srl a0, a4, a0
-; RV32IA-WMO-NEXT: ret
+; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_add_i8_release:
+; RV32IA-WMO-NOZACAS: # %bb.0:
+; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-WMO-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-NOZACAS-NEXT: li a3, 255
+; RV32IA-WMO-NOZACAS-NEXT: zext.b a1, a1
+; RV32IA-WMO-NOZACAS-NEXT: sll a3, a3, a0
+; RV32IA-WMO-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-WMO-NOZACAS-NEXT: .LBB17_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-NOZACAS-NEXT: lr.w a4, (a2)
+; RV32IA-WMO-NOZACAS-NEXT: add a5, a4, a1
+; RV32IA-WMO-NOZACAS-NEXT: xor a5, a4, a5
+; RV32IA-WMO-NOZACAS-NEXT: and a5, a5, a3
+; RV32IA-WMO-NOZACAS-NEXT: xor a5, a4, a5
+; RV32IA-WMO-NOZACAS-NEXT: sc.w.rl a5, a5, (a2)
+; RV32IA-WMO-NOZACAS-NEXT: bnez a5, .LBB17_1
+; RV32IA-WMO-NOZACAS-NEXT: # %bb.2:
+; RV32IA-WMO-NOZACAS-NEXT: srl a0, a4, a0
+; RV32IA-WMO-NOZACAS-NEXT: ret
;
-; RV32IA-TSO-LABEL: atomicrmw_add_i8_release:
-; RV32IA-TSO: # %bb.0:
-; RV32IA-TSO-NEXT: andi a2, a0, -4
-; RV32IA-TSO-NEXT: slli a0, a0, 3
-; RV32IA-TSO-NEXT: li a3, 255
-; RV32IA-TSO-NEXT: zext.b a1, a1
-; RV32IA-TSO-NEXT: sll a3, a3, a0
-; RV32IA-TSO-NEXT: sll a1, a1, a0
-; RV32IA-TSO-NEXT: .LBB17_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-TSO-NEXT: lr.w a4, (a2)
-; RV32IA-TSO-NEXT: add a5, a4, a1
-; RV32IA-TSO-NEXT: xor a5, a4, a5
-; RV32IA-TSO-NEXT: and a5, a5, a3
-; RV32IA-TSO-NEXT: xor a5, a4, a5
-; RV32IA-TSO-NEXT: sc.w a5, a5, (a2)
-; RV32IA-TSO-NEXT: bnez a5, .LBB17_1
-; RV32IA-TSO-NEXT: # %bb.2:
-; RV32IA-TSO-NEXT: srl a0, a4, a0
-; RV32IA-TSO-NEXT: ret
+; RV32IA-TSO-NOZACAS-LABEL: atomicrmw_add_i8_release:
+; RV32IA-TSO-NOZACAS: # %bb.0:
+; RV32IA-TSO-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-TSO-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-NOZACAS-NEXT: li a3, 255
+; RV32IA-TSO-NOZACAS-NEXT: zext.b a1, a1
+; RV32IA-TSO-NOZACAS-NEXT: sll a3, a3, a0
+; RV32IA-TSO-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-TSO-NOZACAS-NEXT: .LBB17_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-NOZACAS-NEXT: lr.w a4, (a2)
+; RV32IA-TSO-NOZACAS-NEXT: add a5, a4, a1
+; RV32IA-TSO-NOZACAS-NEXT: xor a5, a4, a5
+; RV32IA-TSO-NOZACAS-NEXT: and a5, a5, a3
+; RV32IA-TSO-NOZACAS-NEXT: xor a5, a4, a5
+; RV32IA-TSO-NOZACAS-NEXT: sc.w a5, a5, (a2)
+; RV32IA-TSO-NOZACAS-NEXT: bnez a5, .LBB17_1
+; RV32IA-TSO-NOZACAS-NEXT: # %bb.2:
+; RV32IA-TSO-NOZACAS-NEXT: srl a0, a4, a0
+; RV32IA-TSO-NOZACAS-NEXT: ret
;
; RV64I-LABEL: atomicrmw_add_i8_release:
; RV64I: # %bb.0:
@@ -1969,6 +2567,46 @@ define i8 @atomicrmw_add_i8_release(ptr %a, i8 %b) nounwind {
; RV64IA-TSO-NOZACAS-NEXT: srlw a0, a4, a0
; RV64IA-TSO-NOZACAS-NEXT: ret
;
+; RV32IA-WMO-ZACAS-LABEL: atomicrmw_add_i8_release:
+; RV32IA-WMO-ZACAS: # %bb.0:
+; RV32IA-WMO-ZACAS-NEXT: andi a2, a0, -4
+; RV32IA-WMO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-ZACAS-NEXT: li a3, 255
+; RV32IA-WMO-ZACAS-NEXT: zext.b a1, a1
+; RV32IA-WMO-ZACAS-NEXT: sll a3, a3, a0
+; RV32IA-WMO-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-WMO-ZACAS-NEXT: .LBB17_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-ZACAS-NEXT: lr.w a4, (a2)
+; RV32IA-WMO-ZACAS-NEXT: add a5, a4, a1
+; RV32IA-WMO-ZACAS-NEXT: xor a5, a4, a5
+; RV32IA-WMO-ZACAS-NEXT: and a5, a5, a3
+; RV32IA-WMO-ZACAS-NEXT: xor a5, a4, a5
+; RV32IA-WMO-ZACAS-NEXT: sc.w.rl a5, a5, (a2)
+; RV32IA-WMO-ZACAS-NEXT: bnez a5, .LBB17_1
+; RV32IA-WMO-ZACAS-NEXT: # %bb.2:
+; RV32IA-WMO-ZACAS-NEXT: srl a0, a4, a0
+; RV32IA-WMO-ZACAS-NEXT: ret
+;
+; RV32IA-TSO-ZACAS-LABEL: atomicrmw_add_i8_release:
+; RV32IA-TSO-ZACAS: # %bb.0:
+; RV32IA-TSO-ZACAS-NEXT: andi a2, a0, -4
+; RV32IA-TSO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-ZACAS-NEXT: li a3, 255
+; RV32IA-TSO-ZACAS-NEXT: zext.b a1, a1
+; RV32IA-TSO-ZACAS-NEXT: sll a3, a3, a0
+; RV32IA-TSO-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-TSO-ZACAS-NEXT: .LBB17_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-ZACAS-NEXT: lr.w a4, (a2)
+; RV32IA-TSO-ZACAS-NEXT: add a5, a4, a1
+; RV32IA-TSO-ZACAS-NEXT: xor a5, a4, a5
+; RV32IA-TSO-ZACAS-NEXT: and a5, a5, a3
+; RV32IA-TSO-ZACAS-NEXT: xor a5, a4, a5
+; RV32IA-TSO-ZACAS-NEXT: sc.w a5, a5, (a2)
+; RV32IA-TSO-ZACAS-NEXT: bnez a5, .LBB17_1
+; RV32IA-TSO-ZACAS-NEXT: # %bb.2:
+; RV32IA-TSO-ZACAS-NEXT: srl a0, a4, a0
+; RV32IA-TSO-ZACAS-NEXT: ret
+;
; RV64IA-WMO-ZACAS-LABEL: atomicrmw_add_i8_release:
; RV64IA-WMO-ZACAS: # %bb.0:
; RV64IA-WMO-ZACAS-NEXT: andi a2, a0, -4
@@ -2009,6 +2647,16 @@ define i8 @atomicrmw_add_i8_release(ptr %a, i8 %b) nounwind {
; RV64IA-TSO-ZACAS-NEXT: srlw a0, a4, a0
; RV64IA-TSO-ZACAS-NEXT: ret
;
+; RV32IA-WMO-ZABHA-LABEL: atomicrmw_add_i8_release:
+; RV32IA-WMO-ZABHA: # %bb.0:
+; RV32IA-WMO-ZABHA-NEXT: amoadd.b.rl a0, a1, (a0)
+; RV32IA-WMO-ZABHA-NEXT: ret
+;
+; RV32IA-TSO-ZABHA-LABEL: atomicrmw_add_i8_release:
+; RV32IA-TSO-ZABHA: # %bb.0:
+; RV32IA-TSO-ZABHA-NEXT: amoadd.b a0, a1, (a0)
+; RV32IA-TSO-ZABHA-NEXT: ret
+;
; RV64IA-WMO-ZABHA-LABEL: atomicrmw_add_i8_release:
; RV64IA-WMO-ZABHA: # %bb.0:
; RV64IA-WMO-ZABHA-NEXT: amoadd.b.rl a0, a1, (a0)
@@ -2033,45 +2681,45 @@ define i8 @atomicrmw_add_i8_acq_rel(ptr %a, i8 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
-; RV32IA-WMO-LABEL: atomicrmw_add_i8_acq_rel:
-; RV32IA-WMO: # %bb.0:
-; RV32IA-WMO-NEXT: andi a2, a0, -4
-; RV32IA-WMO-NEXT: slli a0, a0, 3
-; RV32IA-WMO-NEXT: li a3, 255
-; RV32IA-WMO-NEXT: zext.b a1, a1
-; RV32IA-WMO-NEXT: sll a3, a3, a0
-; RV32IA-WMO-NEXT: sll a1, a1, a0
-; RV32IA-WMO-NEXT: .LBB18_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-WMO-NEXT: lr.w.aq a4, (a2)
-; RV32IA-WMO-NEXT: add a5, a4, a1
-; RV32IA-WMO-NEXT: xor a5, a4, a5
-; RV32IA-WMO-NEXT: and a5, a5, a3
-; RV32IA-WMO-NEXT: xor a5, a4, a5
-; RV32IA-WMO-NEXT: sc.w.rl a5, a5, (a2)
-; RV32IA-WMO-NEXT: bnez a5, .LBB18_1
-; RV32IA-WMO-NEXT: # %bb.2:
-; RV32IA-WMO-NEXT: srl a0, a4, a0
-; RV32IA-WMO-NEXT: ret
+; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_add_i8_acq_rel:
+; RV32IA-WMO-NOZACAS: # %bb.0:
+; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-WMO-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-NOZACAS-NEXT: li a3, 255
+; RV32IA-WMO-NOZACAS-NEXT: zext.b a1, a1
+; RV32IA-WMO-NOZACAS-NEXT: sll a3, a3, a0
+; RV32IA-WMO-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-WMO-NOZACAS-NEXT: .LBB18_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-NOZACAS-NEXT: lr.w.aq a4, (a2)
+; RV32IA-WMO-NOZACAS-NEXT: add a5, a4, a1
+; RV32IA-WMO-NOZACAS-NEXT: xor a5, a4, a5
+; RV32IA-WMO-NOZACAS-NEXT: and a5, a5, a3
+; RV32IA-WMO-NOZACAS-NEXT: xor a5, a4, a5
+; RV32IA-WMO-NOZACAS-NEXT: sc.w.rl a5, a5, (a2)
+; RV32IA-WMO-NOZACAS-NEXT: bnez a5, .LBB18_1
+; RV32IA-WMO-NOZACAS-NEXT: # %bb.2:
+; RV32IA-WMO-NOZACAS-NEXT: srl a0, a4, a0
+; RV32IA-WMO-NOZACAS-NEXT: ret
;
-; RV32IA-TSO-LABEL: atomicrmw_add_i8_acq_rel:
-; RV32IA-TSO: # %bb.0:
-; RV32IA-TSO-NEXT: andi a2, a0, -4
-; RV32IA-TSO-NEXT: slli a0, a0, 3
-; RV32IA-TSO-NEXT: li a3, 255
-; RV32IA-TSO-NEXT: zext.b a1, a1
-; RV32IA-TSO-NEXT: sll a3, a3, a0
-; RV32IA-TSO-NEXT: sll a1, a1, a0
-; RV32IA-TSO-NEXT: .LBB18_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-TSO-NEXT: lr.w a4, (a2)
-; RV32IA-TSO-NEXT: add a5, a4, a1
-; RV32IA-TSO-NEXT: xor a5, a4, a5
-; RV32IA-TSO-NEXT: and a5, a5, a3
-; RV32IA-TSO-NEXT: xor a5, a4, a5
-; RV32IA-TSO-NEXT: sc.w a5, a5, (a2)
-; RV32IA-TSO-NEXT: bnez a5, .LBB18_1
-; RV32IA-TSO-NEXT: # %bb.2:
-; RV32IA-TSO-NEXT: srl a0, a4, a0
-; RV32IA-TSO-NEXT: ret
+; RV32IA-TSO-NOZACAS-LABEL: atomicrmw_add_i8_acq_rel:
+; RV32IA-TSO-NOZACAS: # %bb.0:
+; RV32IA-TSO-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-TSO-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-NOZACAS-NEXT: li a3, 255
+; RV32IA-TSO-NOZACAS-NEXT: zext.b a1, a1
+; RV32IA-TSO-NOZACAS-NEXT: sll a3, a3, a0
+; RV32IA-TSO-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-TSO-NOZACAS-NEXT: .LBB18_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-NOZACAS-NEXT: lr.w a4, (a2)
+; RV32IA-TSO-NOZACAS-NEXT: add a5, a4, a1
+; RV32IA-TSO-NOZACAS-NEXT: xor a5, a4, a5
+; RV32IA-TSO-NOZACAS-NEXT: and a5, a5, a3
+; RV32IA-TSO-NOZACAS-NEXT: xor a5, a4, a5
+; RV32IA-TSO-NOZACAS-NEXT: sc.w a5, a5, (a2)
+; RV32IA-TSO-NOZACAS-NEXT: bnez a5, .LBB18_1
+; RV32IA-TSO-NOZACAS-NEXT: # %bb.2:
+; RV32IA-TSO-NOZACAS-NEXT: srl a0, a4, a0
+; RV32IA-TSO-NOZACAS-NEXT: ret
;
; RV64I-LABEL: atomicrmw_add_i8_acq_rel:
; RV64I: # %bb.0:
@@ -2123,6 +2771,46 @@ define i8 @atomicrmw_add_i8_acq_rel(ptr %a, i8 %b) nounwind {
; RV64IA-TSO-NOZACAS-NEXT: srlw a0, a4, a0
; RV64IA-TSO-NOZACAS-NEXT: ret
;
+; RV32IA-WMO-ZACAS-LABEL: atomicrmw_add_i8_acq_rel:
+; RV32IA-WMO-ZACAS: # %bb.0:
+; RV32IA-WMO-ZACAS-NEXT: andi a2, a0, -4
+; RV32IA-WMO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-ZACAS-NEXT: li a3, 255
+; RV32IA-WMO-ZACAS-NEXT: zext.b a1, a1
+; RV32IA-WMO-ZACAS-NEXT: sll a3, a3, a0
+; RV32IA-WMO-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-WMO-ZACAS-NEXT: .LBB18_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-ZACAS-NEXT: lr.w.aq a4, (a2)
+; RV32IA-WMO-ZACAS-NEXT: add a5, a4, a1
+; RV32IA-WMO-ZACAS-NEXT: xor a5, a4, a5
+; RV32IA-WMO-ZACAS-NEXT: and a5, a5, a3
+; RV32IA-WMO-ZACAS-NEXT: xor a5, a4, a5
+; RV32IA-WMO-ZACAS-NEXT: sc.w.rl a5, a5, (a2)
+; RV32IA-WMO-ZACAS-NEXT: bnez a5, .LBB18_1
+; RV32IA-WMO-ZACAS-NEXT: # %bb.2:
+; RV32IA-WMO-ZACAS-NEXT: srl a0, a4, a0
+; RV32IA-WMO-ZACAS-NEXT: ret
+;
+; RV32IA-TSO-ZACAS-LABEL: atomicrmw_add_i8_acq_rel:
+; RV32IA-TSO-ZACAS: # %bb.0:
+; RV32IA-TSO-ZACAS-NEXT: andi a2, a0, -4
+; RV32IA-TSO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-ZACAS-NEXT: li a3, 255
+; RV32IA-TSO-ZACAS-NEXT: zext.b a1, a1
+; RV32IA-TSO-ZACAS-NEXT: sll a3, a3, a0
+; RV32IA-TSO-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-TSO-ZACAS-NEXT: .LBB18_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-ZACAS-NEXT: lr.w a4, (a2)
+; RV32IA-TSO-ZACAS-NEXT: add a5, a4, a1
+; RV32IA-TSO-ZACAS-NEXT: xor a5, a4, a5
+; RV32IA-TSO-ZACAS-NEXT: and a5, a5, a3
+; RV32IA-TSO-ZACAS-NEXT: xor a5, a4, a5
+; RV32IA-TSO-ZACAS-NEXT: sc.w a5, a5, (a2)
+; RV32IA-TSO-ZACAS-NEXT: bnez a5, .LBB18_1
+; RV32IA-TSO-ZACAS-NEXT: # %bb.2:
+; RV32IA-TSO-ZACAS-NEXT: srl a0, a4, a0
+; RV32IA-TSO-ZACAS-NEXT: ret
+;
; RV64IA-WMO-ZACAS-LABEL: atomicrmw_add_i8_acq_rel:
; RV64IA-WMO-ZACAS: # %bb.0:
; RV64IA-WMO-ZACAS-NEXT: andi a2, a0, -4
@@ -2163,6 +2851,16 @@ define i8 @atomicrmw_add_i8_acq_rel(ptr %a, i8 %b) nounwind {
; RV64IA-TSO-ZACAS-NEXT: srlw a0, a4, a0
; RV64IA-TSO-ZACAS-NEXT: ret
;
+; RV32IA-WMO-ZABHA-LABEL: atomicrmw_add_i8_acq_rel:
+; RV32IA-WMO-ZABHA: # %bb.0:
+; RV32IA-WMO-ZABHA-NEXT: amoadd.b.aqrl a0, a1, (a0)
+; RV32IA-WMO-ZABHA-NEXT: ret
+;
+; RV32IA-TSO-ZABHA-LABEL: atomicrmw_add_i8_acq_rel:
+; RV32IA-TSO-ZABHA: # %bb.0:
+; RV32IA-TSO-ZABHA-NEXT: amoadd.b a0, a1, (a0)
+; RV32IA-TSO-ZABHA-NEXT: ret
+;
; RV64IA-WMO-ZABHA-LABEL: atomicrmw_add_i8_acq_rel:
; RV64IA-WMO-ZABHA: # %bb.0:
; RV64IA-WMO-ZABHA-NEXT: amoadd.b.aqrl a0, a1, (a0)
@@ -2187,25 +2885,25 @@ define i8 @atomicrmw_add_i8_seq_cst(ptr %a, i8 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
-; RV32IA-LABEL: atomicrmw_add_i8_seq_cst:
-; RV32IA: # %bb.0:
-; RV32IA-NEXT: andi a2, a0, -4
-; RV32IA-NEXT: slli a0, a0, 3
-; RV32IA-NEXT: li a3, 255
-; RV32IA-NEXT: zext.b a1, a1
-; RV32IA-NEXT: sll a3, a3, a0
-; RV32IA-NEXT: sll a1, a1, a0
-; RV32IA-NEXT: .LBB19_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-NEXT: lr.w.aqrl a4, (a2)
-; RV32IA-NEXT: add a5, a4, a1
-; RV32IA-NEXT: xor a5, a4, a5
-; RV32IA-NEXT: and a5, a5, a3
-; RV32IA-NEXT: xor a5, a4, a5
-; RV32IA-NEXT: sc.w.rl a5, a5, (a2)
-; RV32IA-NEXT: bnez a5, .LBB19_1
-; RV32IA-NEXT: # %bb.2:
-; RV32IA-NEXT: srl a0, a4, a0
-; RV32IA-NEXT: ret
+; RV32IA-NOZACAS-LABEL: atomicrmw_add_i8_seq_cst:
+; RV32IA-NOZACAS: # %bb.0:
+; RV32IA-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-NOZACAS-NEXT: li a3, 255
+; RV32IA-NOZACAS-NEXT: zext.b a1, a1
+; RV32IA-NOZACAS-NEXT: sll a3, a3, a0
+; RV32IA-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-NOZACAS-NEXT: .LBB19_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-NOZACAS-NEXT: lr.w.aqrl a4, (a2)
+; RV32IA-NOZACAS-NEXT: add a5, a4, a1
+; RV32IA-NOZACAS-NEXT: xor a5, a4, a5
+; RV32IA-NOZACAS-NEXT: and a5, a5, a3
+; RV32IA-NOZACAS-NEXT: xor a5, a4, a5
+; RV32IA-NOZACAS-NEXT: sc.w.rl a5, a5, (a2)
+; RV32IA-NOZACAS-NEXT: bnez a5, .LBB19_1
+; RV32IA-NOZACAS-NEXT: # %bb.2:
+; RV32IA-NOZACAS-NEXT: srl a0, a4, a0
+; RV32IA-NOZACAS-NEXT: ret
;
; RV64I-LABEL: atomicrmw_add_i8_seq_cst:
; RV64I: # %bb.0:
@@ -2237,6 +2935,26 @@ define i8 @atomicrmw_add_i8_seq_cst(ptr %a, i8 %b) nounwind {
; RV64IA-NOZACAS-NEXT: srlw a0, a4, a0
; RV64IA-NOZACAS-NEXT: ret
;
+; RV32IA-ZACAS-LABEL: atomicrmw_add_i8_seq_cst:
+; RV32IA-ZACAS: # %bb.0:
+; RV32IA-ZACAS-NEXT: andi a2, a0, -4
+; RV32IA-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-ZACAS-NEXT: li a3, 255
+; RV32IA-ZACAS-NEXT: zext.b a1, a1
+; RV32IA-ZACAS-NEXT: sll a3, a3, a0
+; RV32IA-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-ZACAS-NEXT: .LBB19_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-ZACAS-NEXT: lr.w.aqrl a4, (a2)
+; RV32IA-ZACAS-NEXT: add a5, a4, a1
+; RV32IA-ZACAS-NEXT: xor a5, a4, a5
+; RV32IA-ZACAS-NEXT: and a5, a5, a3
+; RV32IA-ZACAS-NEXT: xor a5, a4, a5
+; RV32IA-ZACAS-NEXT: sc.w.rl a5, a5, (a2)
+; RV32IA-ZACAS-NEXT: bnez a5, .LBB19_1
+; RV32IA-ZACAS-NEXT: # %bb.2:
+; RV32IA-ZACAS-NEXT: srl a0, a4, a0
+; RV32IA-ZACAS-NEXT: ret
+;
; RV64IA-ZACAS-LABEL: atomicrmw_add_i8_seq_cst:
; RV64IA-ZACAS: # %bb.0:
; RV64IA-ZACAS-NEXT: andi a2, a0, -4
@@ -2257,6 +2975,16 @@ define i8 @atomicrmw_add_i8_seq_cst(ptr %a, i8 %b) nounwind {
; RV64IA-ZACAS-NEXT: srlw a0, a4, a0
; RV64IA-ZACAS-NEXT: ret
;
+; RV32IA-WMO-ZABHA-LABEL: atomicrmw_add_i8_seq_cst:
+; RV32IA-WMO-ZABHA: # %bb.0:
+; RV32IA-WMO-ZABHA-NEXT: amoadd.b.aqrl a0, a1, (a0)
+; RV32IA-WMO-ZABHA-NEXT: ret
+;
+; RV32IA-TSO-ZABHA-LABEL: atomicrmw_add_i8_seq_cst:
+; RV32IA-TSO-ZABHA: # %bb.0:
+; RV32IA-TSO-ZABHA-NEXT: amoadd.b a0, a1, (a0)
+; RV32IA-TSO-ZABHA-NEXT: ret
+;
; RV64IA-WMO-ZABHA-LABEL: atomicrmw_add_i8_seq_cst:
; RV64IA-WMO-ZABHA: # %bb.0:
; RV64IA-WMO-ZABHA-NEXT: amoadd.b.aqrl a0, a1, (a0)
@@ -2281,25 +3009,25 @@ define i8 @atomicrmw_sub_i8_monotonic(ptr %a, i8 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
-; RV32IA-LABEL: atomicrmw_sub_i8_monotonic:
-; RV32IA: # %bb.0:
-; RV32IA-NEXT: andi a2, a0, -4
-; RV32IA-NEXT: slli a0, a0, 3
-; RV32IA-NEXT: li a3, 255
-; RV32IA-NEXT: zext.b a1, a1
-; RV32IA-NEXT: sll a3, a3, a0
-; RV32IA-NEXT: sll a1, a1, a0
-; RV32IA-NEXT: .LBB20_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-NEXT: lr.w a4, (a2)
-; RV32IA-NEXT: sub a5, a4, a1
-; RV32IA-NEXT: xor a5, a4, a5
-; RV32IA-NEXT: and a5, a5, a3
-; RV32IA-NEXT: xor a5, a4, a5
-; RV32IA-NEXT: sc.w a5, a5, (a2)
-; RV32IA-NEXT: bnez a5, .LBB20_1
-; RV32IA-NEXT: # %bb.2:
-; RV32IA-NEXT: srl a0, a4, a0
-; RV32IA-NEXT: ret
+; RV32IA-NOZACAS-LABEL: atomicrmw_sub_i8_monotonic:
+; RV32IA-NOZACAS: # %bb.0:
+; RV32IA-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-NOZACAS-NEXT: li a3, 255
+; RV32IA-NOZACAS-NEXT: zext.b a1, a1
+; RV32IA-NOZACAS-NEXT: sll a3, a3, a0
+; RV32IA-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-NOZACAS-NEXT: .LBB20_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-NOZACAS-NEXT: lr.w a4, (a2)
+; RV32IA-NOZACAS-NEXT: sub a5, a4, a1
+; RV32IA-NOZACAS-NEXT: xor a5, a4, a5
+; RV32IA-NOZACAS-NEXT: and a5, a5, a3
+; RV32IA-NOZACAS-NEXT: xor a5, a4, a5
+; RV32IA-NOZACAS-NEXT: sc.w a5, a5, (a2)
+; RV32IA-NOZACAS-NEXT: bnez a5, .LBB20_1
+; RV32IA-NOZACAS-NEXT: # %bb.2:
+; RV32IA-NOZACAS-NEXT: srl a0, a4, a0
+; RV32IA-NOZACAS-NEXT: ret
;
; RV64I-LABEL: atomicrmw_sub_i8_monotonic:
; RV64I: # %bb.0:
@@ -2331,6 +3059,26 @@ define i8 @atomicrmw_sub_i8_monotonic(ptr %a, i8 %b) nounwind {
; RV64IA-NOZACAS-NEXT: srlw a0, a4, a0
; RV64IA-NOZACAS-NEXT: ret
;
+; RV32IA-ZACAS-LABEL: atomicrmw_sub_i8_monotonic:
+; RV32IA-ZACAS: # %bb.0:
+; RV32IA-ZACAS-NEXT: andi a2, a0, -4
+; RV32IA-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-ZACAS-NEXT: li a3, 255
+; RV32IA-ZACAS-NEXT: zext.b a1, a1
+; RV32IA-ZACAS-NEXT: sll a3, a3, a0
+; RV32IA-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-ZACAS-NEXT: .LBB20_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-ZACAS-NEXT: lr.w a4, (a2)
+; RV32IA-ZACAS-NEXT: sub a5, a4, a1
+; RV32IA-ZACAS-NEXT: xor a5, a4, a5
+; RV32IA-ZACAS-NEXT: and a5, a5, a3
+; RV32IA-ZACAS-NEXT: xor a5, a4, a5
+; RV32IA-ZACAS-NEXT: sc.w a5, a5, (a2)
+; RV32IA-ZACAS-NEXT: bnez a5, .LBB20_1
+; RV32IA-ZACAS-NEXT: # %bb.2:
+; RV32IA-ZACAS-NEXT: srl a0, a4, a0
+; RV32IA-ZACAS-NEXT: ret
+;
; RV64IA-ZACAS-LABEL: atomicrmw_sub_i8_monotonic:
; RV64IA-ZACAS: # %bb.0:
; RV64IA-ZACAS-NEXT: andi a2, a0, -4
@@ -2351,6 +3099,18 @@ define i8 @atomicrmw_sub_i8_monotonic(ptr %a, i8 %b) nounwind {
; RV64IA-ZACAS-NEXT: srlw a0, a4, a0
; RV64IA-ZACAS-NEXT: ret
;
+; RV32IA-WMO-ZABHA-LABEL: atomicrmw_sub_i8_monotonic:
+; RV32IA-WMO-ZABHA: # %bb.0:
+; RV32IA-WMO-ZABHA-NEXT: neg a1, a1
+; RV32IA-WMO-ZABHA-NEXT: amoadd.b a0, a1, (a0)
+; RV32IA-WMO-ZABHA-NEXT: ret
+;
+; RV32IA-TSO-ZABHA-LABEL: atomicrmw_sub_i8_monotonic:
+; RV32IA-TSO-ZABHA: # %bb.0:
+; RV32IA-TSO-ZABHA-NEXT: neg a1, a1
+; RV32IA-TSO-ZABHA-NEXT: amoadd.b a0, a1, (a0)
+; RV32IA-TSO-ZABHA-NEXT: ret
+;
; RV64IA-WMO-ZABHA-LABEL: atomicrmw_sub_i8_monotonic:
; RV64IA-WMO-ZABHA: # %bb.0:
; RV64IA-WMO-ZABHA-NEXT: neg a1, a1
@@ -2377,45 +3137,45 @@ define i8 @atomicrmw_sub_i8_acquire(ptr %a, i8 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
-; RV32IA-WMO-LABEL: atomicrmw_sub_i8_acquire:
-; RV32IA-WMO: # %bb.0:
-; RV32IA-WMO-NEXT: andi a2, a0, -4
-; RV32IA-WMO-NEXT: slli a0, a0, 3
-; RV32IA-WMO-NEXT: li a3, 255
-; RV32IA-WMO-NEXT: zext.b a1, a1
-; RV32IA-WMO-NEXT: sll a3, a3, a0
-; RV32IA-WMO-NEXT: sll a1, a1, a0
-; RV32IA-WMO-NEXT: .LBB21_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-WMO-NEXT: lr.w.aq a4, (a2)
-; RV32IA-WMO-NEXT: sub a5, a4, a1
-; RV32IA-WMO-NEXT: xor a5, a4, a5
-; RV32IA-WMO-NEXT: and a5, a5, a3
-; RV32IA-WMO-NEXT: xor a5, a4, a5
-; RV32IA-WMO-NEXT: sc.w a5, a5, (a2)
-; RV32IA-WMO-NEXT: bnez a5, .LBB21_1
-; RV32IA-WMO-NEXT: # %bb.2:
-; RV32IA-WMO-NEXT: srl a0, a4, a0
-; RV32IA-WMO-NEXT: ret
+; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_sub_i8_acquire:
+; RV32IA-WMO-NOZACAS: # %bb.0:
+; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-WMO-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-NOZACAS-NEXT: li a3, 255
+; RV32IA-WMO-NOZACAS-NEXT: zext.b a1, a1
+; RV32IA-WMO-NOZACAS-NEXT: sll a3, a3, a0
+; RV32IA-WMO-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-WMO-NOZACAS-NEXT: .LBB21_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-NOZACAS-NEXT: lr.w.aq a4, (a2)
+; RV32IA-WMO-NOZACAS-NEXT: sub a5, a4, a1
+; RV32IA-WMO-NOZACAS-NEXT: xor a5, a4, a5
+; RV32IA-WMO-NOZACAS-NEXT: and a5, a5, a3
+; RV32IA-WMO-NOZACAS-NEXT: xor a5, a4, a5
+; RV32IA-WMO-NOZACAS-NEXT: sc.w a5, a5, (a2)
+; RV32IA-WMO-NOZACAS-NEXT: bnez a5, .LBB21_1
+; RV32IA-WMO-NOZACAS-NEXT: # %bb.2:
+; RV32IA-WMO-NOZACAS-NEXT: srl a0, a4, a0
+; RV32IA-WMO-NOZACAS-NEXT: ret
;
-; RV32IA-TSO-LABEL: atomicrmw_sub_i8_acquire:
-; RV32IA-TSO: # %bb.0:
-; RV32IA-TSO-NEXT: andi a2, a0, -4
-; RV32IA-TSO-NEXT: slli a0, a0, 3
-; RV32IA-TSO-NEXT: li a3, 255
-; RV32IA-TSO-NEXT: zext.b a1, a1
-; RV32IA-TSO-NEXT: sll a3, a3, a0
-; RV32IA-TSO-NEXT: sll a1, a1, a0
-; RV32IA-TSO-NEXT: .LBB21_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-TSO-NEXT: lr.w a4, (a2)
-; RV32IA-TSO-NEXT: sub a5, a4, a1
-; RV32IA-TSO-NEXT: xor a5, a4, a5
-; RV32IA-TSO-NEXT: and a5, a5, a3
-; RV32IA-TSO-NEXT: xor a5, a4, a5
-; RV32IA-TSO-NEXT: sc.w a5, a5, (a2)
-; RV32IA-TSO-NEXT: bnez a5, .LBB21_1
-; RV32IA-TSO-NEXT: # %bb.2:
-; RV32IA-TSO-NEXT: srl a0, a4, a0
-; RV32IA-TSO-NEXT: ret
+; RV32IA-TSO-NOZACAS-LABEL: atomicrmw_sub_i8_acquire:
+; RV32IA-TSO-NOZACAS: # %bb.0:
+; RV32IA-TSO-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-TSO-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-NOZACAS-NEXT: li a3, 255
+; RV32IA-TSO-NOZACAS-NEXT: zext.b a1, a1
+; RV32IA-TSO-NOZACAS-NEXT: sll a3, a3, a0
+; RV32IA-TSO-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-TSO-NOZACAS-NEXT: .LBB21_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-NOZACAS-NEXT: lr.w a4, (a2)
+; RV32IA-TSO-NOZACAS-NEXT: sub a5, a4, a1
+; RV32IA-TSO-NOZACAS-NEXT: xor a5, a4, a5
+; RV32IA-TSO-NOZACAS-NEXT: and a5, a5, a3
+; RV32IA-TSO-NOZACAS-NEXT: xor a5, a4, a5
+; RV32IA-TSO-NOZACAS-NEXT: sc.w a5, a5, (a2)
+; RV32IA-TSO-NOZACAS-NEXT: bnez a5, .LBB21_1
+; RV32IA-TSO-NOZACAS-NEXT: # %bb.2:
+; RV32IA-TSO-NOZACAS-NEXT: srl a0, a4, a0
+; RV32IA-TSO-NOZACAS-NEXT: ret
;
; RV64I-LABEL: atomicrmw_sub_i8_acquire:
; RV64I: # %bb.0:
@@ -2467,6 +3227,46 @@ define i8 @atomicrmw_sub_i8_acquire(ptr %a, i8 %b) nounwind {
; RV64IA-TSO-NOZACAS-NEXT: srlw a0, a4, a0
; RV64IA-TSO-NOZACAS-NEXT: ret
;
+; RV32IA-WMO-ZACAS-LABEL: atomicrmw_sub_i8_acquire:
+; RV32IA-WMO-ZACAS: # %bb.0:
+; RV32IA-WMO-ZACAS-NEXT: andi a2, a0, -4
+; RV32IA-WMO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-ZACAS-NEXT: li a3, 255
+; RV32IA-WMO-ZACAS-NEXT: zext.b a1, a1
+; RV32IA-WMO-ZACAS-NEXT: sll a3, a3, a0
+; RV32IA-WMO-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-WMO-ZACAS-NEXT: .LBB21_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-ZACAS-NEXT: lr.w.aq a4, (a2)
+; RV32IA-WMO-ZACAS-NEXT: sub a5, a4, a1
+; RV32IA-WMO-ZACAS-NEXT: xor a5, a4, a5
+; RV32IA-WMO-ZACAS-NEXT: and a5, a5, a3
+; RV32IA-WMO-ZACAS-NEXT: xor a5, a4, a5
+; RV32IA-WMO-ZACAS-NEXT: sc.w a5, a5, (a2)
+; RV32IA-WMO-ZACAS-NEXT: bnez a5, .LBB21_1
+; RV32IA-WMO-ZACAS-NEXT: # %bb.2:
+; RV32IA-WMO-ZACAS-NEXT: srl a0, a4, a0
+; RV32IA-WMO-ZACAS-NEXT: ret
+;
+; RV32IA-TSO-ZACAS-LABEL: atomicrmw_sub_i8_acquire:
+; RV32IA-TSO-ZACAS: # %bb.0:
+; RV32IA-TSO-ZACAS-NEXT: andi a2, a0, -4
+; RV32IA-TSO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-ZACAS-NEXT: li a3, 255
+; RV32IA-TSO-ZACAS-NEXT: zext.b a1, a1
+; RV32IA-TSO-ZACAS-NEXT: sll a3, a3, a0
+; RV32IA-TSO-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-TSO-ZACAS-NEXT: .LBB21_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-ZACAS-NEXT: lr.w a4, (a2)
+; RV32IA-TSO-ZACAS-NEXT: sub a5, a4, a1
+; RV32IA-TSO-ZACAS-NEXT: xor a5, a4, a5
+; RV32IA-TSO-ZACAS-NEXT: and a5, a5, a3
+; RV32IA-TSO-ZACAS-NEXT: xor a5, a4, a5
+; RV32IA-TSO-ZACAS-NEXT: sc.w a5, a5, (a2)
+; RV32IA-TSO-ZACAS-NEXT: bnez a5, .LBB21_1
+; RV32IA-TSO-ZACAS-NEXT: # %bb.2:
+; RV32IA-TSO-ZACAS-NEXT: srl a0, a4, a0
+; RV32IA-TSO-ZACAS-NEXT: ret
+;
; RV64IA-WMO-ZACAS-LABEL: atomicrmw_sub_i8_acquire:
; RV64IA-WMO-ZACAS: # %bb.0:
; RV64IA-WMO-ZACAS-NEXT: andi a2, a0, -4
@@ -2507,6 +3307,18 @@ define i8 @atomicrmw_sub_i8_acquire(ptr %a, i8 %b) nounwind {
; RV64IA-TSO-ZACAS-NEXT: srlw a0, a4, a0
; RV64IA-TSO-ZACAS-NEXT: ret
;
+; RV32IA-WMO-ZABHA-LABEL: atomicrmw_sub_i8_acquire:
+; RV32IA-WMO-ZABHA: # %bb.0:
+; RV32IA-WMO-ZABHA-NEXT: neg a1, a1
+; RV32IA-WMO-ZABHA-NEXT: amoadd.b.aq a0, a1, (a0)
+; RV32IA-WMO-ZABHA-NEXT: ret
+;
+; RV32IA-TSO-ZABHA-LABEL: atomicrmw_sub_i8_acquire:
+; RV32IA-TSO-ZABHA: # %bb.0:
+; RV32IA-TSO-ZABHA-NEXT: neg a1, a1
+; RV32IA-TSO-ZABHA-NEXT: amoadd.b a0, a1, (a0)
+; RV32IA-TSO-ZABHA-NEXT: ret
+;
; RV64IA-WMO-ZABHA-LABEL: atomicrmw_sub_i8_acquire:
; RV64IA-WMO-ZABHA: # %bb.0:
; RV64IA-WMO-ZABHA-NEXT: neg a1, a1
@@ -2533,45 +3345,45 @@ define i8 @atomicrmw_sub_i8_release(ptr %a, i8 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
-; RV32IA-WMO-LABEL: atomicrmw_sub_i8_release:
-; RV32IA-WMO: # %bb.0:
-; RV32IA-WMO-NEXT: andi a2, a0, -4
-; RV32IA-WMO-NEXT: slli a0, a0, 3
-; RV32IA-WMO-NEXT: li a3, 255
-; RV32IA-WMO-NEXT: zext.b a1, a1
-; RV32IA-WMO-NEXT: sll a3, a3, a0
-; RV32IA-WMO-NEXT: sll a1, a1, a0
-; RV32IA-WMO-NEXT: .LBB22_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-WMO-NEXT: lr.w a4, (a2)
-; RV32IA-WMO-NEXT: sub a5, a4, a1
-; RV32IA-WMO-NEXT: xor a5, a4, a5
-; RV32IA-WMO-NEXT: and a5, a5, a3
-; RV32IA-WMO-NEXT: xor a5, a4, a5
-; RV32IA-WMO-NEXT: sc.w.rl a5, a5, (a2)
-; RV32IA-WMO-NEXT: bnez a5, .LBB22_1
-; RV32IA-WMO-NEXT: # %bb.2:
-; RV32IA-WMO-NEXT: srl a0, a4, a0
-; RV32IA-WMO-NEXT: ret
+; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_sub_i8_release:
+; RV32IA-WMO-NOZACAS: # %bb.0:
+; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-WMO-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-NOZACAS-NEXT: li a3, 255
+; RV32IA-WMO-NOZACAS-NEXT: zext.b a1, a1
+; RV32IA-WMO-NOZACAS-NEXT: sll a3, a3, a0
+; RV32IA-WMO-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-WMO-NOZACAS-NEXT: .LBB22_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-NOZACAS-NEXT: lr.w a4, (a2)
+; RV32IA-WMO-NOZACAS-NEXT: sub a5, a4, a1
+; RV32IA-WMO-NOZACAS-NEXT: xor a5, a4, a5
+; RV32IA-WMO-NOZACAS-NEXT: and a5, a5, a3
+; RV32IA-WMO-NOZACAS-NEXT: xor a5, a4, a5
+; RV32IA-WMO-NOZACAS-NEXT: sc.w.rl a5, a5, (a2)
+; RV32IA-WMO-NOZACAS-NEXT: bnez a5, .LBB22_1
+; RV32IA-WMO-NOZACAS-NEXT: # %bb.2:
+; RV32IA-WMO-NOZACAS-NEXT: srl a0, a4, a0
+; RV32IA-WMO-NOZACAS-NEXT: ret
;
-; RV32IA-TSO-LABEL: atomicrmw_sub_i8_release:
-; RV32IA-TSO: # %bb.0:
-; RV32IA-TSO-NEXT: andi a2, a0, -4
-; RV32IA-TSO-NEXT: slli a0, a0, 3
-; RV32IA-TSO-NEXT: li a3, 255
-; RV32IA-TSO-NEXT: zext.b a1, a1
-; RV32IA-TSO-NEXT: sll a3, a3, a0
-; RV32IA-TSO-NEXT: sll a1, a1, a0
-; RV32IA-TSO-NEXT: .LBB22_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-TSO-NEXT: lr.w a4, (a2)
-; RV32IA-TSO-NEXT: sub a5, a4, a1
-; RV32IA-TSO-NEXT: xor a5, a4, a5
-; RV32IA-TSO-NEXT: and a5, a5, a3
-; RV32IA-TSO-NEXT: xor a5, a4, a5
-; RV32IA-TSO-NEXT: sc.w a5, a5, (a2)
-; RV32IA-TSO-NEXT: bnez a5, .LBB22_1
-; RV32IA-TSO-NEXT: # %bb.2:
-; RV32IA-TSO-NEXT: srl a0, a4, a0
-; RV32IA-TSO-NEXT: ret
+; RV32IA-TSO-NOZACAS-LABEL: atomicrmw_sub_i8_release:
+; RV32IA-TSO-NOZACAS: # %bb.0:
+; RV32IA-TSO-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-TSO-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-NOZACAS-NEXT: li a3, 255
+; RV32IA-TSO-NOZACAS-NEXT: zext.b a1, a1
+; RV32IA-TSO-NOZACAS-NEXT: sll a3, a3, a0
+; RV32IA-TSO-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-TSO-NOZACAS-NEXT: .LBB22_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-NOZACAS-NEXT: lr.w a4, (a2)
+; RV32IA-TSO-NOZACAS-NEXT: sub a5, a4, a1
+; RV32IA-TSO-NOZACAS-NEXT: xor a5, a4, a5
+; RV32IA-TSO-NOZACAS-NEXT: and a5, a5, a3
+; RV32IA-TSO-NOZACAS-NEXT: xor a5, a4, a5
+; RV32IA-TSO-NOZACAS-NEXT: sc.w a5, a5, (a2)
+; RV32IA-TSO-NOZACAS-NEXT: bnez a5, .LBB22_1
+; RV32IA-TSO-NOZACAS-NEXT: # %bb.2:
+; RV32IA-TSO-NOZACAS-NEXT: srl a0, a4, a0
+; RV32IA-TSO-NOZACAS-NEXT: ret
;
; RV64I-LABEL: atomicrmw_sub_i8_release:
; RV64I: # %bb.0:
@@ -2623,6 +3435,46 @@ define i8 @atomicrmw_sub_i8_release(ptr %a, i8 %b) nounwind {
; RV64IA-TSO-NOZACAS-NEXT: srlw a0, a4, a0
; RV64IA-TSO-NOZACAS-NEXT: ret
;
+; RV32IA-WMO-ZACAS-LABEL: atomicrmw_sub_i8_release:
+; RV32IA-WMO-ZACAS: # %bb.0:
+; RV32IA-WMO-ZACAS-NEXT: andi a2, a0, -4
+; RV32IA-WMO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-ZACAS-NEXT: li a3, 255
+; RV32IA-WMO-ZACAS-NEXT: zext.b a1, a1
+; RV32IA-WMO-ZACAS-NEXT: sll a3, a3, a0
+; RV32IA-WMO-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-WMO-ZACAS-NEXT: .LBB22_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-ZACAS-NEXT: lr.w a4, (a2)
+; RV32IA-WMO-ZACAS-NEXT: sub a5, a4, a1
+; RV32IA-WMO-ZACAS-NEXT: xor a5, a4, a5
+; RV32IA-WMO-ZACAS-NEXT: and a5, a5, a3
+; RV32IA-WMO-ZACAS-NEXT: xor a5, a4, a5
+; RV32IA-WMO-ZACAS-NEXT: sc.w.rl a5, a5, (a2)
+; RV32IA-WMO-ZACAS-NEXT: bnez a5, .LBB22_1
+; RV32IA-WMO-ZACAS-NEXT: # %bb.2:
+; RV32IA-WMO-ZACAS-NEXT: srl a0, a4, a0
+; RV32IA-WMO-ZACAS-NEXT: ret
+;
+; RV32IA-TSO-ZACAS-LABEL: atomicrmw_sub_i8_release:
+; RV32IA-TSO-ZACAS: # %bb.0:
+; RV32IA-TSO-ZACAS-NEXT: andi a2, a0, -4
+; RV32IA-TSO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-ZACAS-NEXT: li a3, 255
+; RV32IA-TSO-ZACAS-NEXT: zext.b a1, a1
+; RV32IA-TSO-ZACAS-NEXT: sll a3, a3, a0
+; RV32IA-TSO-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-TSO-ZACAS-NEXT: .LBB22_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-ZACAS-NEXT: lr.w a4, (a2)
+; RV32IA-TSO-ZACAS-NEXT: sub a5, a4, a1
+; RV32IA-TSO-ZACAS-NEXT: xor a5, a4, a5
+; RV32IA-TSO-ZACAS-NEXT: and a5, a5, a3
+; RV32IA-TSO-ZACAS-NEXT: xor a5, a4, a5
+; RV32IA-TSO-ZACAS-NEXT: sc.w a5, a5, (a2)
+; RV32IA-TSO-ZACAS-NEXT: bnez a5, .LBB22_1
+; RV32IA-TSO-ZACAS-NEXT: # %bb.2:
+; RV32IA-TSO-ZACAS-NEXT: srl a0, a4, a0
+; RV32IA-TSO-ZACAS-NEXT: ret
+;
; RV64IA-WMO-ZACAS-LABEL: atomicrmw_sub_i8_release:
; RV64IA-WMO-ZACAS: # %bb.0:
; RV64IA-WMO-ZACAS-NEXT: andi a2, a0, -4
@@ -2663,6 +3515,18 @@ define i8 @atomicrmw_sub_i8_release(ptr %a, i8 %b) nounwind {
; RV64IA-TSO-ZACAS-NEXT: srlw a0, a4, a0
; RV64IA-TSO-ZACAS-NEXT: ret
;
+; RV32IA-WMO-ZABHA-LABEL: atomicrmw_sub_i8_release:
+; RV32IA-WMO-ZABHA: # %bb.0:
+; RV32IA-WMO-ZABHA-NEXT: neg a1, a1
+; RV32IA-WMO-ZABHA-NEXT: amoadd.b.rl a0, a1, (a0)
+; RV32IA-WMO-ZABHA-NEXT: ret
+;
+; RV32IA-TSO-ZABHA-LABEL: atomicrmw_sub_i8_release:
+; RV32IA-TSO-ZABHA: # %bb.0:
+; RV32IA-TSO-ZABHA-NEXT: neg a1, a1
+; RV32IA-TSO-ZABHA-NEXT: amoadd.b a0, a1, (a0)
+; RV32IA-TSO-ZABHA-NEXT: ret
+;
; RV64IA-WMO-ZABHA-LABEL: atomicrmw_sub_i8_release:
; RV64IA-WMO-ZABHA: # %bb.0:
; RV64IA-WMO-ZABHA-NEXT: neg a1, a1
@@ -2689,45 +3553,45 @@ define i8 @atomicrmw_sub_i8_acq_rel(ptr %a, i8 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
-; RV32IA-WMO-LABEL: atomicrmw_sub_i8_acq_rel:
-; RV32IA-WMO: # %bb.0:
-; RV32IA-WMO-NEXT: andi a2, a0, -4
-; RV32IA-WMO-NEXT: slli a0, a0, 3
-; RV32IA-WMO-NEXT: li a3, 255
-; RV32IA-WMO-NEXT: zext.b a1, a1
-; RV32IA-WMO-NEXT: sll a3, a3, a0
-; RV32IA-WMO-NEXT: sll a1, a1, a0
-; RV32IA-WMO-NEXT: .LBB23_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-WMO-NEXT: lr.w.aq a4, (a2)
-; RV32IA-WMO-NEXT: sub a5, a4, a1
-; RV32IA-WMO-NEXT: xor a5, a4, a5
-; RV32IA-WMO-NEXT: and a5, a5, a3
-; RV32IA-WMO-NEXT: xor a5, a4, a5
-; RV32IA-WMO-NEXT: sc.w.rl a5, a5, (a2)
-; RV32IA-WMO-NEXT: bnez a5, .LBB23_1
-; RV32IA-WMO-NEXT: # %bb.2:
-; RV32IA-WMO-NEXT: srl a0, a4, a0
-; RV32IA-WMO-NEXT: ret
+; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_sub_i8_acq_rel:
+; RV32IA-WMO-NOZACAS: # %bb.0:
+; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-WMO-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-NOZACAS-NEXT: li a3, 255
+; RV32IA-WMO-NOZACAS-NEXT: zext.b a1, a1
+; RV32IA-WMO-NOZACAS-NEXT: sll a3, a3, a0
+; RV32IA-WMO-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-WMO-NOZACAS-NEXT: .LBB23_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-NOZACAS-NEXT: lr.w.aq a4, (a2)
+; RV32IA-WMO-NOZACAS-NEXT: sub a5, a4, a1
+; RV32IA-WMO-NOZACAS-NEXT: xor a5, a4, a5
+; RV32IA-WMO-NOZACAS-NEXT: and a5, a5, a3
+; RV32IA-WMO-NOZACAS-NEXT: xor a5, a4, a5
+; RV32IA-WMO-NOZACAS-NEXT: sc.w.rl a5, a5, (a2)
+; RV32IA-WMO-NOZACAS-NEXT: bnez a5, .LBB23_1
+; RV32IA-WMO-NOZACAS-NEXT: # %bb.2:
+; RV32IA-WMO-NOZACAS-NEXT: srl a0, a4, a0
+; RV32IA-WMO-NOZACAS-NEXT: ret
;
-; RV32IA-TSO-LABEL: atomicrmw_sub_i8_acq_rel:
-; RV32IA-TSO: # %bb.0:
-; RV32IA-TSO-NEXT: andi a2, a0, -4
-; RV32IA-TSO-NEXT: slli a0, a0, 3
-; RV32IA-TSO-NEXT: li a3, 255
-; RV32IA-TSO-NEXT: zext.b a1, a1
-; RV32IA-TSO-NEXT: sll a3, a3, a0
-; RV32IA-TSO-NEXT: sll a1, a1, a0
-; RV32IA-TSO-NEXT: .LBB23_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-TSO-NEXT: lr.w a4, (a2)
-; RV32IA-TSO-NEXT: sub a5, a4, a1
-; RV32IA-TSO-NEXT: xor a5, a4, a5
-; RV32IA-TSO-NEXT: and a5, a5, a3
-; RV32IA-TSO-NEXT: xor a5, a4, a5
-; RV32IA-TSO-NEXT: sc.w a5, a5, (a2)
-; RV32IA-TSO-NEXT: bnez a5, .LBB23_1
-; RV32IA-TSO-NEXT: # %bb.2:
-; RV32IA-TSO-NEXT: srl a0, a4, a0
-; RV32IA-TSO-NEXT: ret
+; RV32IA-TSO-NOZACAS-LABEL: atomicrmw_sub_i8_acq_rel:
+; RV32IA-TSO-NOZACAS: # %bb.0:
+; RV32IA-TSO-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-TSO-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-NOZACAS-NEXT: li a3, 255
+; RV32IA-TSO-NOZACAS-NEXT: zext.b a1, a1
+; RV32IA-TSO-NOZACAS-NEXT: sll a3, a3, a0
+; RV32IA-TSO-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-TSO-NOZACAS-NEXT: .LBB23_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-NOZACAS-NEXT: lr.w a4, (a2)
+; RV32IA-TSO-NOZACAS-NEXT: sub a5, a4, a1
+; RV32IA-TSO-NOZACAS-NEXT: xor a5, a4, a5
+; RV32IA-TSO-NOZACAS-NEXT: and a5, a5, a3
+; RV32IA-TSO-NOZACAS-NEXT: xor a5, a4, a5
+; RV32IA-TSO-NOZACAS-NEXT: sc.w a5, a5, (a2)
+; RV32IA-TSO-NOZACAS-NEXT: bnez a5, .LBB23_1
+; RV32IA-TSO-NOZACAS-NEXT: # %bb.2:
+; RV32IA-TSO-NOZACAS-NEXT: srl a0, a4, a0
+; RV32IA-TSO-NOZACAS-NEXT: ret
;
; RV64I-LABEL: atomicrmw_sub_i8_acq_rel:
; RV64I: # %bb.0:
@@ -2779,6 +3643,46 @@ define i8 @atomicrmw_sub_i8_acq_rel(ptr %a, i8 %b) nounwind {
; RV64IA-TSO-NOZACAS-NEXT: srlw a0, a4, a0
; RV64IA-TSO-NOZACAS-NEXT: ret
;
+; RV32IA-WMO-ZACAS-LABEL: atomicrmw_sub_i8_acq_rel:
+; RV32IA-WMO-ZACAS: # %bb.0:
+; RV32IA-WMO-ZACAS-NEXT: andi a2, a0, -4
+; RV32IA-WMO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-ZACAS-NEXT: li a3, 255
+; RV32IA-WMO-ZACAS-NEXT: zext.b a1, a1
+; RV32IA-WMO-ZACAS-NEXT: sll a3, a3, a0
+; RV32IA-WMO-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-WMO-ZACAS-NEXT: .LBB23_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-ZACAS-NEXT: lr.w.aq a4, (a2)
+; RV32IA-WMO-ZACAS-NEXT: sub a5, a4, a1
+; RV32IA-WMO-ZACAS-NEXT: xor a5, a4, a5
+; RV32IA-WMO-ZACAS-NEXT: and a5, a5, a3
+; RV32IA-WMO-ZACAS-NEXT: xor a5, a4, a5
+; RV32IA-WMO-ZACAS-NEXT: sc.w.rl a5, a5, (a2)
+; RV32IA-WMO-ZACAS-NEXT: bnez a5, .LBB23_1
+; RV32IA-WMO-ZACAS-NEXT: # %bb.2:
+; RV32IA-WMO-ZACAS-NEXT: srl a0, a4, a0
+; RV32IA-WMO-ZACAS-NEXT: ret
+;
+; RV32IA-TSO-ZACAS-LABEL: atomicrmw_sub_i8_acq_rel:
+; RV32IA-TSO-ZACAS: # %bb.0:
+; RV32IA-TSO-ZACAS-NEXT: andi a2, a0, -4
+; RV32IA-TSO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-ZACAS-NEXT: li a3, 255
+; RV32IA-TSO-ZACAS-NEXT: zext.b a1, a1
+; RV32IA-TSO-ZACAS-NEXT: sll a3, a3, a0
+; RV32IA-TSO-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-TSO-ZACAS-NEXT: .LBB23_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-ZACAS-NEXT: lr.w a4, (a2)
+; RV32IA-TSO-ZACAS-NEXT: sub a5, a4, a1
+; RV32IA-TSO-ZACAS-NEXT: xor a5, a4, a5
+; RV32IA-TSO-ZACAS-NEXT: and a5, a5, a3
+; RV32IA-TSO-ZACAS-NEXT: xor a5, a4, a5
+; RV32IA-TSO-ZACAS-NEXT: sc.w a5, a5, (a2)
+; RV32IA-TSO-ZACAS-NEXT: bnez a5, .LBB23_1
+; RV32IA-TSO-ZACAS-NEXT: # %bb.2:
+; RV32IA-TSO-ZACAS-NEXT: srl a0, a4, a0
+; RV32IA-TSO-ZACAS-NEXT: ret
+;
; RV64IA-WMO-ZACAS-LABEL: atomicrmw_sub_i8_acq_rel:
; RV64IA-WMO-ZACAS: # %bb.0:
; RV64IA-WMO-ZACAS-NEXT: andi a2, a0, -4
@@ -2819,6 +3723,18 @@ define i8 @atomicrmw_sub_i8_acq_rel(ptr %a, i8 %b) nounwind {
; RV64IA-TSO-ZACAS-NEXT: srlw a0, a4, a0
; RV64IA-TSO-ZACAS-NEXT: ret
;
+; RV32IA-WMO-ZABHA-LABEL: atomicrmw_sub_i8_acq_rel:
+; RV32IA-WMO-ZABHA: # %bb.0:
+; RV32IA-WMO-ZABHA-NEXT: neg a1, a1
+; RV32IA-WMO-ZABHA-NEXT: amoadd.b.aqrl a0, a1, (a0)
+; RV32IA-WMO-ZABHA-NEXT: ret
+;
+; RV32IA-TSO-ZABHA-LABEL: atomicrmw_sub_i8_acq_rel:
+; RV32IA-TSO-ZABHA: # %bb.0:
+; RV32IA-TSO-ZABHA-NEXT: neg a1, a1
+; RV32IA-TSO-ZABHA-NEXT: amoadd.b a0, a1, (a0)
+; RV32IA-TSO-ZABHA-NEXT: ret
+;
; RV64IA-WMO-ZABHA-LABEL: atomicrmw_sub_i8_acq_rel:
; RV64IA-WMO-ZABHA: # %bb.0:
; RV64IA-WMO-ZABHA-NEXT: neg a1, a1
@@ -2845,25 +3761,25 @@ define i8 @atomicrmw_sub_i8_seq_cst(ptr %a, i8 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
-; RV32IA-LABEL: atomicrmw_sub_i8_seq_cst:
-; RV32IA: # %bb.0:
-; RV32IA-NEXT: andi a2, a0, -4
-; RV32IA-NEXT: slli a0, a0, 3
-; RV32IA-NEXT: li a3, 255
-; RV32IA-NEXT: zext.b a1, a1
-; RV32IA-NEXT: sll a3, a3, a0
-; RV32IA-NEXT: sll a1, a1, a0
-; RV32IA-NEXT: .LBB24_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-NEXT: lr.w.aqrl a4, (a2)
-; RV32IA-NEXT: sub a5, a4, a1
-; RV32IA-NEXT: xor a5, a4, a5
-; RV32IA-NEXT: and a5, a5, a3
-; RV32IA-NEXT: xor a5, a4, a5
-; RV32IA-NEXT: sc.w.rl a5, a5, (a2)
-; RV32IA-NEXT: bnez a5, .LBB24_1
-; RV32IA-NEXT: # %bb.2:
-; RV32IA-NEXT: srl a0, a4, a0
-; RV32IA-NEXT: ret
+; RV32IA-NOZACAS-LABEL: atomicrmw_sub_i8_seq_cst:
+; RV32IA-NOZACAS: # %bb.0:
+; RV32IA-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-NOZACAS-NEXT: li a3, 255
+; RV32IA-NOZACAS-NEXT: zext.b a1, a1
+; RV32IA-NOZACAS-NEXT: sll a3, a3, a0
+; RV32IA-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-NOZACAS-NEXT: .LBB24_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-NOZACAS-NEXT: lr.w.aqrl a4, (a2)
+; RV32IA-NOZACAS-NEXT: sub a5, a4, a1
+; RV32IA-NOZACAS-NEXT: xor a5, a4, a5
+; RV32IA-NOZACAS-NEXT: and a5, a5, a3
+; RV32IA-NOZACAS-NEXT: xor a5, a4, a5
+; RV32IA-NOZACAS-NEXT: sc.w.rl a5, a5, (a2)
+; RV32IA-NOZACAS-NEXT: bnez a5, .LBB24_1
+; RV32IA-NOZACAS-NEXT: # %bb.2:
+; RV32IA-NOZACAS-NEXT: srl a0, a4, a0
+; RV32IA-NOZACAS-NEXT: ret
;
; RV64I-LABEL: atomicrmw_sub_i8_seq_cst:
; RV64I: # %bb.0:
@@ -2895,6 +3811,26 @@ define i8 @atomicrmw_sub_i8_seq_cst(ptr %a, i8 %b) nounwind {
; RV64IA-NOZACAS-NEXT: srlw a0, a4, a0
; RV64IA-NOZACAS-NEXT: ret
;
+; RV32IA-ZACAS-LABEL: atomicrmw_sub_i8_seq_cst:
+; RV32IA-ZACAS: # %bb.0:
+; RV32IA-ZACAS-NEXT: andi a2, a0, -4
+; RV32IA-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-ZACAS-NEXT: li a3, 255
+; RV32IA-ZACAS-NEXT: zext.b a1, a1
+; RV32IA-ZACAS-NEXT: sll a3, a3, a0
+; RV32IA-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-ZACAS-NEXT: .LBB24_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-ZACAS-NEXT: lr.w.aqrl a4, (a2)
+; RV32IA-ZACAS-NEXT: sub a5, a4, a1
+; RV32IA-ZACAS-NEXT: xor a5, a4, a5
+; RV32IA-ZACAS-NEXT: and a5, a5, a3
+; RV32IA-ZACAS-NEXT: xor a5, a4, a5
+; RV32IA-ZACAS-NEXT: sc.w.rl a5, a5, (a2)
+; RV32IA-ZACAS-NEXT: bnez a5, .LBB24_1
+; RV32IA-ZACAS-NEXT: # %bb.2:
+; RV32IA-ZACAS-NEXT: srl a0, a4, a0
+; RV32IA-ZACAS-NEXT: ret
+;
; RV64IA-ZACAS-LABEL: atomicrmw_sub_i8_seq_cst:
; RV64IA-ZACAS: # %bb.0:
; RV64IA-ZACAS-NEXT: andi a2, a0, -4
@@ -2915,6 +3851,18 @@ define i8 @atomicrmw_sub_i8_seq_cst(ptr %a, i8 %b) nounwind {
; RV64IA-ZACAS-NEXT: srlw a0, a4, a0
; RV64IA-ZACAS-NEXT: ret
;
+; RV32IA-WMO-ZABHA-LABEL: atomicrmw_sub_i8_seq_cst:
+; RV32IA-WMO-ZABHA: # %bb.0:
+; RV32IA-WMO-ZABHA-NEXT: neg a1, a1
+; RV32IA-WMO-ZABHA-NEXT: amoadd.b.aqrl a0, a1, (a0)
+; RV32IA-WMO-ZABHA-NEXT: ret
+;
+; RV32IA-TSO-ZABHA-LABEL: atomicrmw_sub_i8_seq_cst:
+; RV32IA-TSO-ZABHA: # %bb.0:
+; RV32IA-TSO-ZABHA-NEXT: neg a1, a1
+; RV32IA-TSO-ZABHA-NEXT: amoadd.b a0, a1, (a0)
+; RV32IA-TSO-ZABHA-NEXT: ret
+;
; RV64IA-WMO-ZABHA-LABEL: atomicrmw_sub_i8_seq_cst:
; RV64IA-WMO-ZABHA: # %bb.0:
; RV64IA-WMO-ZABHA-NEXT: neg a1, a1
@@ -2941,19 +3889,19 @@ define i8 @atomicrmw_and_i8_monotonic(ptr %a, i8 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
-; RV32IA-LABEL: atomicrmw_and_i8_monotonic:
-; RV32IA: # %bb.0:
-; RV32IA-NEXT: andi a2, a0, -4
-; RV32IA-NEXT: slli a0, a0, 3
-; RV32IA-NEXT: li a3, 255
-; RV32IA-NEXT: zext.b a1, a1
-; RV32IA-NEXT: sll a3, a3, a0
-; RV32IA-NEXT: not a3, a3
-; RV32IA-NEXT: sll a1, a1, a0
-; RV32IA-NEXT: or a1, a1, a3
-; RV32IA-NEXT: amoand.w a1, a1, (a2)
-; RV32IA-NEXT: srl a0, a1, a0
-; RV32IA-NEXT: ret
+; RV32IA-NOZACAS-LABEL: atomicrmw_and_i8_monotonic:
+; RV32IA-NOZACAS: # %bb.0:
+; RV32IA-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-NOZACAS-NEXT: li a3, 255
+; RV32IA-NOZACAS-NEXT: zext.b a1, a1
+; RV32IA-NOZACAS-NEXT: sll a3, a3, a0
+; RV32IA-NOZACAS-NEXT: not a3, a3
+; RV32IA-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-NOZACAS-NEXT: or a1, a1, a3
+; RV32IA-NOZACAS-NEXT: amoand.w a1, a1, (a2)
+; RV32IA-NOZACAS-NEXT: srl a0, a1, a0
+; RV32IA-NOZACAS-NEXT: ret
;
; RV64I-LABEL: atomicrmw_and_i8_monotonic:
; RV64I: # %bb.0:
@@ -2979,6 +3927,20 @@ define i8 @atomicrmw_and_i8_monotonic(ptr %a, i8 %b) nounwind {
; RV64IA-NOZACAS-NEXT: srlw a0, a1, a0
; RV64IA-NOZACAS-NEXT: ret
;
+; RV32IA-ZACAS-LABEL: atomicrmw_and_i8_monotonic:
+; RV32IA-ZACAS: # %bb.0:
+; RV32IA-ZACAS-NEXT: andi a2, a0, -4
+; RV32IA-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-ZACAS-NEXT: li a3, 255
+; RV32IA-ZACAS-NEXT: zext.b a1, a1
+; RV32IA-ZACAS-NEXT: sll a3, a3, a0
+; RV32IA-ZACAS-NEXT: not a3, a3
+; RV32IA-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-ZACAS-NEXT: or a1, a1, a3
+; RV32IA-ZACAS-NEXT: amoand.w a1, a1, (a2)
+; RV32IA-ZACAS-NEXT: srl a0, a1, a0
+; RV32IA-ZACAS-NEXT: ret
+;
; RV64IA-ZACAS-LABEL: atomicrmw_and_i8_monotonic:
; RV64IA-ZACAS: # %bb.0:
; RV64IA-ZACAS-NEXT: andi a2, a0, -4
@@ -2993,6 +3955,16 @@ define i8 @atomicrmw_and_i8_monotonic(ptr %a, i8 %b) nounwind {
; RV64IA-ZACAS-NEXT: srlw a0, a1, a0
; RV64IA-ZACAS-NEXT: ret
;
+; RV32IA-WMO-ZABHA-LABEL: atomicrmw_and_i8_monotonic:
+; RV32IA-WMO-ZABHA: # %bb.0:
+; RV32IA-WMO-ZABHA-NEXT: amoand.b a0, a1, (a0)
+; RV32IA-WMO-ZABHA-NEXT: ret
+;
+; RV32IA-TSO-ZABHA-LABEL: atomicrmw_and_i8_monotonic:
+; RV32IA-TSO-ZABHA: # %bb.0:
+; RV32IA-TSO-ZABHA-NEXT: amoand.b a0, a1, (a0)
+; RV32IA-TSO-ZABHA-NEXT: ret
+;
; RV64IA-WMO-ZABHA-LABEL: atomicrmw_and_i8_monotonic:
; RV64IA-WMO-ZABHA: # %bb.0:
; RV64IA-WMO-ZABHA-NEXT: amoand.b a0, a1, (a0)
@@ -3017,33 +3989,33 @@ define i8 @atomicrmw_and_i8_acquire(ptr %a, i8 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
-; RV32IA-WMO-LABEL: atomicrmw_and_i8_acquire:
-; RV32IA-WMO: # %bb.0:
-; RV32IA-WMO-NEXT: andi a2, a0, -4
-; RV32IA-WMO-NEXT: slli a0, a0, 3
-; RV32IA-WMO-NEXT: li a3, 255
-; RV32IA-WMO-NEXT: zext.b a1, a1
-; RV32IA-WMO-NEXT: sll a3, a3, a0
-; RV32IA-WMO-NEXT: not a3, a3
-; RV32IA-WMO-NEXT: sll a1, a1, a0
-; RV32IA-WMO-NEXT: or a1, a1, a3
-; RV32IA-WMO-NEXT: amoand.w.aq a1, a1, (a2)
-; RV32IA-WMO-NEXT: srl a0, a1, a0
-; RV32IA-WMO-NEXT: ret
+; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_and_i8_acquire:
+; RV32IA-WMO-NOZACAS: # %bb.0:
+; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-WMO-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-NOZACAS-NEXT: li a3, 255
+; RV32IA-WMO-NOZACAS-NEXT: zext.b a1, a1
+; RV32IA-WMO-NOZACAS-NEXT: sll a3, a3, a0
+; RV32IA-WMO-NOZACAS-NEXT: not a3, a3
+; RV32IA-WMO-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-WMO-NOZACAS-NEXT: or a1, a1, a3
+; RV32IA-WMO-NOZACAS-NEXT: amoand.w.aq a1, a1, (a2)
+; RV32IA-WMO-NOZACAS-NEXT: srl a0, a1, a0
+; RV32IA-WMO-NOZACAS-NEXT: ret
;
-; RV32IA-TSO-LABEL: atomicrmw_and_i8_acquire:
-; RV32IA-TSO: # %bb.0:
-; RV32IA-TSO-NEXT: andi a2, a0, -4
-; RV32IA-TSO-NEXT: slli a0, a0, 3
-; RV32IA-TSO-NEXT: li a3, 255
-; RV32IA-TSO-NEXT: zext.b a1, a1
-; RV32IA-TSO-NEXT: sll a3, a3, a0
-; RV32IA-TSO-NEXT: not a3, a3
-; RV32IA-TSO-NEXT: sll a1, a1, a0
-; RV32IA-TSO-NEXT: or a1, a1, a3
-; RV32IA-TSO-NEXT: amoand.w a1, a1, (a2)
-; RV32IA-TSO-NEXT: srl a0, a1, a0
-; RV32IA-TSO-NEXT: ret
+; RV32IA-TSO-NOZACAS-LABEL: atomicrmw_and_i8_acquire:
+; RV32IA-TSO-NOZACAS: # %bb.0:
+; RV32IA-TSO-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-TSO-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-NOZACAS-NEXT: li a3, 255
+; RV32IA-TSO-NOZACAS-NEXT: zext.b a1, a1
+; RV32IA-TSO-NOZACAS-NEXT: sll a3, a3, a0
+; RV32IA-TSO-NOZACAS-NEXT: not a3, a3
+; RV32IA-TSO-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-TSO-NOZACAS-NEXT: or a1, a1, a3
+; RV32IA-TSO-NOZACAS-NEXT: amoand.w a1, a1, (a2)
+; RV32IA-TSO-NOZACAS-NEXT: srl a0, a1, a0
+; RV32IA-TSO-NOZACAS-NEXT: ret
;
; RV64I-LABEL: atomicrmw_and_i8_acquire:
; RV64I: # %bb.0:
@@ -3083,6 +4055,34 @@ define i8 @atomicrmw_and_i8_acquire(ptr %a, i8 %b) nounwind {
; RV64IA-TSO-NOZACAS-NEXT: srlw a0, a1, a0
; RV64IA-TSO-NOZACAS-NEXT: ret
;
+; RV32IA-WMO-ZACAS-LABEL: atomicrmw_and_i8_acquire:
+; RV32IA-WMO-ZACAS: # %bb.0:
+; RV32IA-WMO-ZACAS-NEXT: andi a2, a0, -4
+; RV32IA-WMO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-ZACAS-NEXT: li a3, 255
+; RV32IA-WMO-ZACAS-NEXT: zext.b a1, a1
+; RV32IA-WMO-ZACAS-NEXT: sll a3, a3, a0
+; RV32IA-WMO-ZACAS-NEXT: not a3, a3
+; RV32IA-WMO-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-WMO-ZACAS-NEXT: or a1, a1, a3
+; RV32IA-WMO-ZACAS-NEXT: amoand.w.aq a1, a1, (a2)
+; RV32IA-WMO-ZACAS-NEXT: srl a0, a1, a0
+; RV32IA-WMO-ZACAS-NEXT: ret
+;
+; RV32IA-TSO-ZACAS-LABEL: atomicrmw_and_i8_acquire:
+; RV32IA-TSO-ZACAS: # %bb.0:
+; RV32IA-TSO-ZACAS-NEXT: andi a2, a0, -4
+; RV32IA-TSO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-ZACAS-NEXT: li a3, 255
+; RV32IA-TSO-ZACAS-NEXT: zext.b a1, a1
+; RV32IA-TSO-ZACAS-NEXT: sll a3, a3, a0
+; RV32IA-TSO-ZACAS-NEXT: not a3, a3
+; RV32IA-TSO-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-TSO-ZACAS-NEXT: or a1, a1, a3
+; RV32IA-TSO-ZACAS-NEXT: amoand.w a1, a1, (a2)
+; RV32IA-TSO-ZACAS-NEXT: srl a0, a1, a0
+; RV32IA-TSO-ZACAS-NEXT: ret
+;
; RV64IA-WMO-ZACAS-LABEL: atomicrmw_and_i8_acquire:
; RV64IA-WMO-ZACAS: # %bb.0:
; RV64IA-WMO-ZACAS-NEXT: andi a2, a0, -4
@@ -3111,6 +4111,16 @@ define i8 @atomicrmw_and_i8_acquire(ptr %a, i8 %b) nounwind {
; RV64IA-TSO-ZACAS-NEXT: srlw a0, a1, a0
; RV64IA-TSO-ZACAS-NEXT: ret
;
+; RV32IA-WMO-ZABHA-LABEL: atomicrmw_and_i8_acquire:
+; RV32IA-WMO-ZABHA: # %bb.0:
+; RV32IA-WMO-ZABHA-NEXT: amoand.b.aq a0, a1, (a0)
+; RV32IA-WMO-ZABHA-NEXT: ret
+;
+; RV32IA-TSO-ZABHA-LABEL: atomicrmw_and_i8_acquire:
+; RV32IA-TSO-ZABHA: # %bb.0:
+; RV32IA-TSO-ZABHA-NEXT: amoand.b a0, a1, (a0)
+; RV32IA-TSO-ZABHA-NEXT: ret
+;
; RV64IA-WMO-ZABHA-LABEL: atomicrmw_and_i8_acquire:
; RV64IA-WMO-ZABHA: # %bb.0:
; RV64IA-WMO-ZABHA-NEXT: amoand.b.aq a0, a1, (a0)
@@ -3135,33 +4145,33 @@ define i8 @atomicrmw_and_i8_release(ptr %a, i8 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
-; RV32IA-WMO-LABEL: atomicrmw_and_i8_release:
-; RV32IA-WMO: # %bb.0:
-; RV32IA-WMO-NEXT: andi a2, a0, -4
-; RV32IA-WMO-NEXT: slli a0, a0, 3
-; RV32IA-WMO-NEXT: li a3, 255
-; RV32IA-WMO-NEXT: zext.b a1, a1
-; RV32IA-WMO-NEXT: sll a3, a3, a0
-; RV32IA-WMO-NEXT: not a3, a3
-; RV32IA-WMO-NEXT: sll a1, a1, a0
-; RV32IA-WMO-NEXT: or a1, a1, a3
-; RV32IA-WMO-NEXT: amoand.w.rl a1, a1, (a2)
-; RV32IA-WMO-NEXT: srl a0, a1, a0
-; RV32IA-WMO-NEXT: ret
+; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_and_i8_release:
+; RV32IA-WMO-NOZACAS: # %bb.0:
+; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-WMO-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-NOZACAS-NEXT: li a3, 255
+; RV32IA-WMO-NOZACAS-NEXT: zext.b a1, a1
+; RV32IA-WMO-NOZACAS-NEXT: sll a3, a3, a0
+; RV32IA-WMO-NOZACAS-NEXT: not a3, a3
+; RV32IA-WMO-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-WMO-NOZACAS-NEXT: or a1, a1, a3
+; RV32IA-WMO-NOZACAS-NEXT: amoand.w.rl a1, a1, (a2)
+; RV32IA-WMO-NOZACAS-NEXT: srl a0, a1, a0
+; RV32IA-WMO-NOZACAS-NEXT: ret
;
-; RV32IA-TSO-LABEL: atomicrmw_and_i8_release:
-; RV32IA-TSO: # %bb.0:
-; RV32IA-TSO-NEXT: andi a2, a0, -4
-; RV32IA-TSO-NEXT: slli a0, a0, 3
-; RV32IA-TSO-NEXT: li a3, 255
-; RV32IA-TSO-NEXT: zext.b a1, a1
-; RV32IA-TSO-NEXT: sll a3, a3, a0
-; RV32IA-TSO-NEXT: not a3, a3
-; RV32IA-TSO-NEXT: sll a1, a1, a0
-; RV32IA-TSO-NEXT: or a1, a1, a3
-; RV32IA-TSO-NEXT: amoand.w a1, a1, (a2)
-; RV32IA-TSO-NEXT: srl a0, a1, a0
-; RV32IA-TSO-NEXT: ret
+; RV32IA-TSO-NOZACAS-LABEL: atomicrmw_and_i8_release:
+; RV32IA-TSO-NOZACAS: # %bb.0:
+; RV32IA-TSO-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-TSO-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-NOZACAS-NEXT: li a3, 255
+; RV32IA-TSO-NOZACAS-NEXT: zext.b a1, a1
+; RV32IA-TSO-NOZACAS-NEXT: sll a3, a3, a0
+; RV32IA-TSO-NOZACAS-NEXT: not a3, a3
+; RV32IA-TSO-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-TSO-NOZACAS-NEXT: or a1, a1, a3
+; RV32IA-TSO-NOZACAS-NEXT: amoand.w a1, a1, (a2)
+; RV32IA-TSO-NOZACAS-NEXT: srl a0, a1, a0
+; RV32IA-TSO-NOZACAS-NEXT: ret
;
; RV64I-LABEL: atomicrmw_and_i8_release:
; RV64I: # %bb.0:
@@ -3201,6 +4211,34 @@ define i8 @atomicrmw_and_i8_release(ptr %a, i8 %b) nounwind {
; RV64IA-TSO-NOZACAS-NEXT: srlw a0, a1, a0
; RV64IA-TSO-NOZACAS-NEXT: ret
;
+; RV32IA-WMO-ZACAS-LABEL: atomicrmw_and_i8_release:
+; RV32IA-WMO-ZACAS: # %bb.0:
+; RV32IA-WMO-ZACAS-NEXT: andi a2, a0, -4
+; RV32IA-WMO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-ZACAS-NEXT: li a3, 255
+; RV32IA-WMO-ZACAS-NEXT: zext.b a1, a1
+; RV32IA-WMO-ZACAS-NEXT: sll a3, a3, a0
+; RV32IA-WMO-ZACAS-NEXT: not a3, a3
+; RV32IA-WMO-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-WMO-ZACAS-NEXT: or a1, a1, a3
+; RV32IA-WMO-ZACAS-NEXT: amoand.w.rl a1, a1, (a2)
+; RV32IA-WMO-ZACAS-NEXT: srl a0, a1, a0
+; RV32IA-WMO-ZACAS-NEXT: ret
+;
+; RV32IA-TSO-ZACAS-LABEL: atomicrmw_and_i8_release:
+; RV32IA-TSO-ZACAS: # %bb.0:
+; RV32IA-TSO-ZACAS-NEXT: andi a2, a0, -4
+; RV32IA-TSO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-ZACAS-NEXT: li a3, 255
+; RV32IA-TSO-ZACAS-NEXT: zext.b a1, a1
+; RV32IA-TSO-ZACAS-NEXT: sll a3, a3, a0
+; RV32IA-TSO-ZACAS-NEXT: not a3, a3
+; RV32IA-TSO-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-TSO-ZACAS-NEXT: or a1, a1, a3
+; RV32IA-TSO-ZACAS-NEXT: amoand.w a1, a1, (a2)
+; RV32IA-TSO-ZACAS-NEXT: srl a0, a1, a0
+; RV32IA-TSO-ZACAS-NEXT: ret
+;
; RV64IA-WMO-ZACAS-LABEL: atomicrmw_and_i8_release:
; RV64IA-WMO-ZACAS: # %bb.0:
; RV64IA-WMO-ZACAS-NEXT: andi a2, a0, -4
@@ -3229,6 +4267,16 @@ define i8 @atomicrmw_and_i8_release(ptr %a, i8 %b) nounwind {
; RV64IA-TSO-ZACAS-NEXT: srlw a0, a1, a0
; RV64IA-TSO-ZACAS-NEXT: ret
;
+; RV32IA-WMO-ZABHA-LABEL: atomicrmw_and_i8_release:
+; RV32IA-WMO-ZABHA: # %bb.0:
+; RV32IA-WMO-ZABHA-NEXT: amoand.b.rl a0, a1, (a0)
+; RV32IA-WMO-ZABHA-NEXT: ret
+;
+; RV32IA-TSO-ZABHA-LABEL: atomicrmw_and_i8_release:
+; RV32IA-TSO-ZABHA: # %bb.0:
+; RV32IA-TSO-ZABHA-NEXT: amoand.b a0, a1, (a0)
+; RV32IA-TSO-ZABHA-NEXT: ret
+;
; RV64IA-WMO-ZABHA-LABEL: atomicrmw_and_i8_release:
; RV64IA-WMO-ZABHA: # %bb.0:
; RV64IA-WMO-ZABHA-NEXT: amoand.b.rl a0, a1, (a0)
@@ -3253,33 +4301,33 @@ define i8 @atomicrmw_and_i8_acq_rel(ptr %a, i8 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
-; RV32IA-WMO-LABEL: atomicrmw_and_i8_acq_rel:
-; RV32IA-WMO: # %bb.0:
-; RV32IA-WMO-NEXT: andi a2, a0, -4
-; RV32IA-WMO-NEXT: slli a0, a0, 3
-; RV32IA-WMO-NEXT: li a3, 255
-; RV32IA-WMO-NEXT: zext.b a1, a1
-; RV32IA-WMO-NEXT: sll a3, a3, a0
-; RV32IA-WMO-NEXT: not a3, a3
-; RV32IA-WMO-NEXT: sll a1, a1, a0
-; RV32IA-WMO-NEXT: or a1, a1, a3
-; RV32IA-WMO-NEXT: amoand.w.aqrl a1, a1, (a2)
-; RV32IA-WMO-NEXT: srl a0, a1, a0
-; RV32IA-WMO-NEXT: ret
+; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_and_i8_acq_rel:
+; RV32IA-WMO-NOZACAS: # %bb.0:
+; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-WMO-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-NOZACAS-NEXT: li a3, 255
+; RV32IA-WMO-NOZACAS-NEXT: zext.b a1, a1
+; RV32IA-WMO-NOZACAS-NEXT: sll a3, a3, a0
+; RV32IA-WMO-NOZACAS-NEXT: not a3, a3
+; RV32IA-WMO-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-WMO-NOZACAS-NEXT: or a1, a1, a3
+; RV32IA-WMO-NOZACAS-NEXT: amoand.w.aqrl a1, a1, (a2)
+; RV32IA-WMO-NOZACAS-NEXT: srl a0, a1, a0
+; RV32IA-WMO-NOZACAS-NEXT: ret
;
-; RV32IA-TSO-LABEL: atomicrmw_and_i8_acq_rel:
-; RV32IA-TSO: # %bb.0:
-; RV32IA-TSO-NEXT: andi a2, a0, -4
-; RV32IA-TSO-NEXT: slli a0, a0, 3
-; RV32IA-TSO-NEXT: li a3, 255
-; RV32IA-TSO-NEXT: zext.b a1, a1
-; RV32IA-TSO-NEXT: sll a3, a3, a0
-; RV32IA-TSO-NEXT: not a3, a3
-; RV32IA-TSO-NEXT: sll a1, a1, a0
-; RV32IA-TSO-NEXT: or a1, a1, a3
-; RV32IA-TSO-NEXT: amoand.w a1, a1, (a2)
-; RV32IA-TSO-NEXT: srl a0, a1, a0
-; RV32IA-TSO-NEXT: ret
+; RV32IA-TSO-NOZACAS-LABEL: atomicrmw_and_i8_acq_rel:
+; RV32IA-TSO-NOZACAS: # %bb.0:
+; RV32IA-TSO-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-TSO-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-NOZACAS-NEXT: li a3, 255
+; RV32IA-TSO-NOZACAS-NEXT: zext.b a1, a1
+; RV32IA-TSO-NOZACAS-NEXT: sll a3, a3, a0
+; RV32IA-TSO-NOZACAS-NEXT: not a3, a3
+; RV32IA-TSO-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-TSO-NOZACAS-NEXT: or a1, a1, a3
+; RV32IA-TSO-NOZACAS-NEXT: amoand.w a1, a1, (a2)
+; RV32IA-TSO-NOZACAS-NEXT: srl a0, a1, a0
+; RV32IA-TSO-NOZACAS-NEXT: ret
;
; RV64I-LABEL: atomicrmw_and_i8_acq_rel:
; RV64I: # %bb.0:
@@ -3319,6 +4367,34 @@ define i8 @atomicrmw_and_i8_acq_rel(ptr %a, i8 %b) nounwind {
; RV64IA-TSO-NOZACAS-NEXT: srlw a0, a1, a0
; RV64IA-TSO-NOZACAS-NEXT: ret
;
+; RV32IA-WMO-ZACAS-LABEL: atomicrmw_and_i8_acq_rel:
+; RV32IA-WMO-ZACAS: # %bb.0:
+; RV32IA-WMO-ZACAS-NEXT: andi a2, a0, -4
+; RV32IA-WMO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-ZACAS-NEXT: li a3, 255
+; RV32IA-WMO-ZACAS-NEXT: zext.b a1, a1
+; RV32IA-WMO-ZACAS-NEXT: sll a3, a3, a0
+; RV32IA-WMO-ZACAS-NEXT: not a3, a3
+; RV32IA-WMO-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-WMO-ZACAS-NEXT: or a1, a1, a3
+; RV32IA-WMO-ZACAS-NEXT: amoand.w.aqrl a1, a1, (a2)
+; RV32IA-WMO-ZACAS-NEXT: srl a0, a1, a0
+; RV32IA-WMO-ZACAS-NEXT: ret
+;
+; RV32IA-TSO-ZACAS-LABEL: atomicrmw_and_i8_acq_rel:
+; RV32IA-TSO-ZACAS: # %bb.0:
+; RV32IA-TSO-ZACAS-NEXT: andi a2, a0, -4
+; RV32IA-TSO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-ZACAS-NEXT: li a3, 255
+; RV32IA-TSO-ZACAS-NEXT: zext.b a1, a1
+; RV32IA-TSO-ZACAS-NEXT: sll a3, a3, a0
+; RV32IA-TSO-ZACAS-NEXT: not a3, a3
+; RV32IA-TSO-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-TSO-ZACAS-NEXT: or a1, a1, a3
+; RV32IA-TSO-ZACAS-NEXT: amoand.w a1, a1, (a2)
+; RV32IA-TSO-ZACAS-NEXT: srl a0, a1, a0
+; RV32IA-TSO-ZACAS-NEXT: ret
+;
; RV64IA-WMO-ZACAS-LABEL: atomicrmw_and_i8_acq_rel:
; RV64IA-WMO-ZACAS: # %bb.0:
; RV64IA-WMO-ZACAS-NEXT: andi a2, a0, -4
@@ -3347,6 +4423,16 @@ define i8 @atomicrmw_and_i8_acq_rel(ptr %a, i8 %b) nounwind {
; RV64IA-TSO-ZACAS-NEXT: srlw a0, a1, a0
; RV64IA-TSO-ZACAS-NEXT: ret
;
+; RV32IA-WMO-ZABHA-LABEL: atomicrmw_and_i8_acq_rel:
+; RV32IA-WMO-ZABHA: # %bb.0:
+; RV32IA-WMO-ZABHA-NEXT: amoand.b.aqrl a0, a1, (a0)
+; RV32IA-WMO-ZABHA-NEXT: ret
+;
+; RV32IA-TSO-ZABHA-LABEL: atomicrmw_and_i8_acq_rel:
+; RV32IA-TSO-ZABHA: # %bb.0:
+; RV32IA-TSO-ZABHA-NEXT: amoand.b a0, a1, (a0)
+; RV32IA-TSO-ZABHA-NEXT: ret
+;
; RV64IA-WMO-ZABHA-LABEL: atomicrmw_and_i8_acq_rel:
; RV64IA-WMO-ZABHA: # %bb.0:
; RV64IA-WMO-ZABHA-NEXT: amoand.b.aqrl a0, a1, (a0)
@@ -3371,33 +4457,33 @@ define i8 @atomicrmw_and_i8_seq_cst(ptr %a, i8 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
-; RV32IA-WMO-LABEL: atomicrmw_and_i8_seq_cst:
-; RV32IA-WMO: # %bb.0:
-; RV32IA-WMO-NEXT: andi a2, a0, -4
-; RV32IA-WMO-NEXT: slli a0, a0, 3
-; RV32IA-WMO-NEXT: li a3, 255
-; RV32IA-WMO-NEXT: zext.b a1, a1
-; RV32IA-WMO-NEXT: sll a3, a3, a0
-; RV32IA-WMO-NEXT: not a3, a3
-; RV32IA-WMO-NEXT: sll a1, a1, a0
-; RV32IA-WMO-NEXT: or a1, a1, a3
-; RV32IA-WMO-NEXT: amoand.w.aqrl a1, a1, (a2)
-; RV32IA-WMO-NEXT: srl a0, a1, a0
-; RV32IA-WMO-NEXT: ret
+; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_and_i8_seq_cst:
+; RV32IA-WMO-NOZACAS: # %bb.0:
+; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-WMO-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-NOZACAS-NEXT: li a3, 255
+; RV32IA-WMO-NOZACAS-NEXT: zext.b a1, a1
+; RV32IA-WMO-NOZACAS-NEXT: sll a3, a3, a0
+; RV32IA-WMO-NOZACAS-NEXT: not a3, a3
+; RV32IA-WMO-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-WMO-NOZACAS-NEXT: or a1, a1, a3
+; RV32IA-WMO-NOZACAS-NEXT: amoand.w.aqrl a1, a1, (a2)
+; RV32IA-WMO-NOZACAS-NEXT: srl a0, a1, a0
+; RV32IA-WMO-NOZACAS-NEXT: ret
;
-; RV32IA-TSO-LABEL: atomicrmw_and_i8_seq_cst:
-; RV32IA-TSO: # %bb.0:
-; RV32IA-TSO-NEXT: andi a2, a0, -4
-; RV32IA-TSO-NEXT: slli a0, a0, 3
-; RV32IA-TSO-NEXT: li a3, 255
-; RV32IA-TSO-NEXT: zext.b a1, a1
-; RV32IA-TSO-NEXT: sll a3, a3, a0
-; RV32IA-TSO-NEXT: not a3, a3
-; RV32IA-TSO-NEXT: sll a1, a1, a0
-; RV32IA-TSO-NEXT: or a1, a1, a3
-; RV32IA-TSO-NEXT: amoand.w a1, a1, (a2)
-; RV32IA-TSO-NEXT: srl a0, a1, a0
-; RV32IA-TSO-NEXT: ret
+; RV32IA-TSO-NOZACAS-LABEL: atomicrmw_and_i8_seq_cst:
+; RV32IA-TSO-NOZACAS: # %bb.0:
+; RV32IA-TSO-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-TSO-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-NOZACAS-NEXT: li a3, 255
+; RV32IA-TSO-NOZACAS-NEXT: zext.b a1, a1
+; RV32IA-TSO-NOZACAS-NEXT: sll a3, a3, a0
+; RV32IA-TSO-NOZACAS-NEXT: not a3, a3
+; RV32IA-TSO-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-TSO-NOZACAS-NEXT: or a1, a1, a3
+; RV32IA-TSO-NOZACAS-NEXT: amoand.w a1, a1, (a2)
+; RV32IA-TSO-NOZACAS-NEXT: srl a0, a1, a0
+; RV32IA-TSO-NOZACAS-NEXT: ret
;
; RV64I-LABEL: atomicrmw_and_i8_seq_cst:
; RV64I: # %bb.0:
@@ -3437,6 +4523,34 @@ define i8 @atomicrmw_and_i8_seq_cst(ptr %a, i8 %b) nounwind {
; RV64IA-TSO-NOZACAS-NEXT: srlw a0, a1, a0
; RV64IA-TSO-NOZACAS-NEXT: ret
;
+; RV32IA-WMO-ZACAS-LABEL: atomicrmw_and_i8_seq_cst:
+; RV32IA-WMO-ZACAS: # %bb.0:
+; RV32IA-WMO-ZACAS-NEXT: andi a2, a0, -4
+; RV32IA-WMO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-ZACAS-NEXT: li a3, 255
+; RV32IA-WMO-ZACAS-NEXT: zext.b a1, a1
+; RV32IA-WMO-ZACAS-NEXT: sll a3, a3, a0
+; RV32IA-WMO-ZACAS-NEXT: not a3, a3
+; RV32IA-WMO-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-WMO-ZACAS-NEXT: or a1, a1, a3
+; RV32IA-WMO-ZACAS-NEXT: amoand.w.aqrl a1, a1, (a2)
+; RV32IA-WMO-ZACAS-NEXT: srl a0, a1, a0
+; RV32IA-WMO-ZACAS-NEXT: ret
+;
+; RV32IA-TSO-ZACAS-LABEL: atomicrmw_and_i8_seq_cst:
+; RV32IA-TSO-ZACAS: # %bb.0:
+; RV32IA-TSO-ZACAS-NEXT: andi a2, a0, -4
+; RV32IA-TSO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-ZACAS-NEXT: li a3, 255
+; RV32IA-TSO-ZACAS-NEXT: zext.b a1, a1
+; RV32IA-TSO-ZACAS-NEXT: sll a3, a3, a0
+; RV32IA-TSO-ZACAS-NEXT: not a3, a3
+; RV32IA-TSO-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-TSO-ZACAS-NEXT: or a1, a1, a3
+; RV32IA-TSO-ZACAS-NEXT: amoand.w a1, a1, (a2)
+; RV32IA-TSO-ZACAS-NEXT: srl a0, a1, a0
+; RV32IA-TSO-ZACAS-NEXT: ret
+;
; RV64IA-WMO-ZACAS-LABEL: atomicrmw_and_i8_seq_cst:
; RV64IA-WMO-ZACAS: # %bb.0:
; RV64IA-WMO-ZACAS-NEXT: andi a2, a0, -4
@@ -3465,6 +4579,16 @@ define i8 @atomicrmw_and_i8_seq_cst(ptr %a, i8 %b) nounwind {
; RV64IA-TSO-ZACAS-NEXT: srlw a0, a1, a0
; RV64IA-TSO-ZACAS-NEXT: ret
;
+; RV32IA-WMO-ZABHA-LABEL: atomicrmw_and_i8_seq_cst:
+; RV32IA-WMO-ZABHA: # %bb.0:
+; RV32IA-WMO-ZABHA-NEXT: amoand.b.aqrl a0, a1, (a0)
+; RV32IA-WMO-ZABHA-NEXT: ret
+;
+; RV32IA-TSO-ZABHA-LABEL: atomicrmw_and_i8_seq_cst:
+; RV32IA-TSO-ZABHA: # %bb.0:
+; RV32IA-TSO-ZABHA-NEXT: amoand.b a0, a1, (a0)
+; RV32IA-TSO-ZABHA-NEXT: ret
+;
; RV64IA-WMO-ZABHA-LABEL: atomicrmw_and_i8_seq_cst:
; RV64IA-WMO-ZABHA: # %bb.0:
; RV64IA-WMO-ZABHA-NEXT: amoand.b.aqrl a0, a1, (a0)
@@ -3489,26 +4613,26 @@ define i8 @atomicrmw_nand_i8_monotonic(ptr %a, i8 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
-; RV32IA-LABEL: atomicrmw_nand_i8_monotonic:
-; RV32IA: # %bb.0:
-; RV32IA-NEXT: andi a2, a0, -4
-; RV32IA-NEXT: slli a0, a0, 3
-; RV32IA-NEXT: li a3, 255
-; RV32IA-NEXT: zext.b a1, a1
-; RV32IA-NEXT: sll a3, a3, a0
-; RV32IA-NEXT: sll a1, a1, a0
-; RV32IA-NEXT: .LBB30_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-NEXT: lr.w a4, (a2)
-; RV32IA-NEXT: and a5, a4, a1
-; RV32IA-NEXT: not a5, a5
-; RV32IA-NEXT: xor a5, a4, a5
-; RV32IA-NEXT: and a5, a5, a3
-; RV32IA-NEXT: xor a5, a4, a5
-; RV32IA-NEXT: sc.w a5, a5, (a2)
-; RV32IA-NEXT: bnez a5, .LBB30_1
-; RV32IA-NEXT: # %bb.2:
-; RV32IA-NEXT: srl a0, a4, a0
-; RV32IA-NEXT: ret
+; RV32IA-NOZACAS-LABEL: atomicrmw_nand_i8_monotonic:
+; RV32IA-NOZACAS: # %bb.0:
+; RV32IA-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-NOZACAS-NEXT: li a3, 255
+; RV32IA-NOZACAS-NEXT: zext.b a1, a1
+; RV32IA-NOZACAS-NEXT: sll a3, a3, a0
+; RV32IA-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-NOZACAS-NEXT: .LBB30_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-NOZACAS-NEXT: lr.w a4, (a2)
+; RV32IA-NOZACAS-NEXT: and a5, a4, a1
+; RV32IA-NOZACAS-NEXT: not a5, a5
+; RV32IA-NOZACAS-NEXT: xor a5, a4, a5
+; RV32IA-NOZACAS-NEXT: and a5, a5, a3
+; RV32IA-NOZACAS-NEXT: xor a5, a4, a5
+; RV32IA-NOZACAS-NEXT: sc.w a5, a5, (a2)
+; RV32IA-NOZACAS-NEXT: bnez a5, .LBB30_1
+; RV32IA-NOZACAS-NEXT: # %bb.2:
+; RV32IA-NOZACAS-NEXT: srl a0, a4, a0
+; RV32IA-NOZACAS-NEXT: ret
;
; RV64I-LABEL: atomicrmw_nand_i8_monotonic:
; RV64I: # %bb.0:
@@ -3541,6 +4665,27 @@ define i8 @atomicrmw_nand_i8_monotonic(ptr %a, i8 %b) nounwind {
; RV64IA-NOZACAS-NEXT: srlw a0, a4, a0
; RV64IA-NOZACAS-NEXT: ret
;
+; RV32IA-ZACAS-LABEL: atomicrmw_nand_i8_monotonic:
+; RV32IA-ZACAS: # %bb.0:
+; RV32IA-ZACAS-NEXT: andi a2, a0, -4
+; RV32IA-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-ZACAS-NEXT: li a3, 255
+; RV32IA-ZACAS-NEXT: zext.b a1, a1
+; RV32IA-ZACAS-NEXT: sll a3, a3, a0
+; RV32IA-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-ZACAS-NEXT: .LBB30_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-ZACAS-NEXT: lr.w a4, (a2)
+; RV32IA-ZACAS-NEXT: and a5, a4, a1
+; RV32IA-ZACAS-NEXT: not a5, a5
+; RV32IA-ZACAS-NEXT: xor a5, a4, a5
+; RV32IA-ZACAS-NEXT: and a5, a5, a3
+; RV32IA-ZACAS-NEXT: xor a5, a4, a5
+; RV32IA-ZACAS-NEXT: sc.w a5, a5, (a2)
+; RV32IA-ZACAS-NEXT: bnez a5, .LBB30_1
+; RV32IA-ZACAS-NEXT: # %bb.2:
+; RV32IA-ZACAS-NEXT: srl a0, a4, a0
+; RV32IA-ZACAS-NEXT: ret
+;
; RV64IA-ZACAS-LABEL: atomicrmw_nand_i8_monotonic:
; RV64IA-ZACAS: # %bb.0:
; RV64IA-ZACAS-NEXT: andi a2, a0, -4
@@ -3562,6 +4707,48 @@ define i8 @atomicrmw_nand_i8_monotonic(ptr %a, i8 %b) nounwind {
; RV64IA-ZACAS-NEXT: srlw a0, a4, a0
; RV64IA-ZACAS-NEXT: ret
;
+; RV32IA-WMO-ZABHA-NOZACAS-LABEL: atomicrmw_nand_i8_monotonic:
+; RV32IA-WMO-ZABHA-NOZACAS: # %bb.0:
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: li a3, 255
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: zext.b a1, a1
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: sll a3, a3, a0
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: .LBB30_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: lr.w a4, (a2)
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: and a5, a4, a1
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: not a5, a5
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: xor a5, a4, a5
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: and a5, a5, a3
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: xor a5, a4, a5
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: sc.w a5, a5, (a2)
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: bnez a5, .LBB30_1
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: # %bb.2:
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: srl a0, a4, a0
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: ret
+;
+; RV32IA-TSO-ZABHA-NOZACAS-LABEL: atomicrmw_nand_i8_monotonic:
+; RV32IA-TSO-ZABHA-NOZACAS: # %bb.0:
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: li a3, 255
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: zext.b a1, a1
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: sll a3, a3, a0
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: .LBB30_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: lr.w a4, (a2)
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: and a5, a4, a1
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: not a5, a5
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: xor a5, a4, a5
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: and a5, a5, a3
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: xor a5, a4, a5
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: sc.w a5, a5, (a2)
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: bnez a5, .LBB30_1
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: # %bb.2:
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: srl a0, a4, a0
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: ret
+;
; RV64IA-WMO-ZABHA-NOZACAS-LABEL: atomicrmw_nand_i8_monotonic:
; RV64IA-WMO-ZABHA-NOZACAS: # %bb.0:
; RV64IA-WMO-ZABHA-NOZACAS-NEXT: andi a2, a0, -4
@@ -3604,6 +4791,36 @@ define i8 @atomicrmw_nand_i8_monotonic(ptr %a, i8 %b) nounwind {
; RV64IA-TSO-ZABHA-NOZACAS-NEXT: srlw a0, a4, a0
; RV64IA-TSO-ZABHA-NOZACAS-NEXT: ret
;
+; RV32IA-WMO-ZABHA-ZACAS-LABEL: atomicrmw_nand_i8_monotonic:
+; RV32IA-WMO-ZABHA-ZACAS: # %bb.0:
+; RV32IA-WMO-ZABHA-ZACAS-NEXT: mv a2, a0
+; RV32IA-WMO-ZABHA-ZACAS-NEXT: lbu a0, 0(a0)
+; RV32IA-WMO-ZABHA-ZACAS-NEXT: .LBB30_1: # %atomicrmw.start
+; RV32IA-WMO-ZABHA-ZACAS-NEXT: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-ZABHA-ZACAS-NEXT: and a3, a0, a1
+; RV32IA-WMO-ZABHA-ZACAS-NEXT: not a3, a3
+; RV32IA-WMO-ZABHA-ZACAS-NEXT: slli a4, a0, 24
+; RV32IA-WMO-ZABHA-ZACAS-NEXT: amocas.b a0, a3, (a2)
+; RV32IA-WMO-ZABHA-ZACAS-NEXT: srai a4, a4, 24
+; RV32IA-WMO-ZABHA-ZACAS-NEXT: bne a0, a4, .LBB30_1
+; RV32IA-WMO-ZABHA-ZACAS-NEXT: # %bb.2: # %atomicrmw.end
+; RV32IA-WMO-ZABHA-ZACAS-NEXT: ret
+;
+; RV32IA-TSO-ZABHA-ZACAS-LABEL: atomicrmw_nand_i8_monotonic:
+; RV32IA-TSO-ZABHA-ZACAS: # %bb.0:
+; RV32IA-TSO-ZABHA-ZACAS-NEXT: mv a2, a0
+; RV32IA-TSO-ZABHA-ZACAS-NEXT: lbu a0, 0(a0)
+; RV32IA-TSO-ZABHA-ZACAS-NEXT: .LBB30_1: # %atomicrmw.start
+; RV32IA-TSO-ZABHA-ZACAS-NEXT: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-ZABHA-ZACAS-NEXT: and a3, a0, a1
+; RV32IA-TSO-ZABHA-ZACAS-NEXT: not a3, a3
+; RV32IA-TSO-ZABHA-ZACAS-NEXT: slli a4, a0, 24
+; RV32IA-TSO-ZABHA-ZACAS-NEXT: amocas.b a0, a3, (a2)
+; RV32IA-TSO-ZABHA-ZACAS-NEXT: srai a4, a4, 24
+; RV32IA-TSO-ZABHA-ZACAS-NEXT: bne a0, a4, .LBB30_1
+; RV32IA-TSO-ZABHA-ZACAS-NEXT: # %bb.2: # %atomicrmw.end
+; RV32IA-TSO-ZABHA-ZACAS-NEXT: ret
+;
; RV64IA-WMO-ZABHA-ZACAS-LABEL: atomicrmw_nand_i8_monotonic:
; RV64IA-WMO-ZABHA-ZACAS: # %bb.0:
; RV64IA-WMO-ZABHA-ZACAS-NEXT: mv a2, a0
@@ -3648,47 +4865,47 @@ define i8 @atomicrmw_nand_i8_acquire(ptr %a, i8 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
-; RV32IA-WMO-LABEL: atomicrmw_nand_i8_acquire:
-; RV32IA-WMO: # %bb.0:
-; RV32IA-WMO-NEXT: andi a2, a0, -4
-; RV32IA-WMO-NEXT: slli a0, a0, 3
-; RV32IA-WMO-NEXT: li a3, 255
-; RV32IA-WMO-NEXT: zext.b a1, a1
-; RV32IA-WMO-NEXT: sll a3, a3, a0
-; RV32IA-WMO-NEXT: sll a1, a1, a0
-; RV32IA-WMO-NEXT: .LBB31_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-WMO-NEXT: lr.w.aq a4, (a2)
-; RV32IA-WMO-NEXT: and a5, a4, a1
-; RV32IA-WMO-NEXT: not a5, a5
-; RV32IA-WMO-NEXT: xor a5, a4, a5
-; RV32IA-WMO-NEXT: and a5, a5, a3
-; RV32IA-WMO-NEXT: xor a5, a4, a5
-; RV32IA-WMO-NEXT: sc.w a5, a5, (a2)
-; RV32IA-WMO-NEXT: bnez a5, .LBB31_1
-; RV32IA-WMO-NEXT: # %bb.2:
-; RV32IA-WMO-NEXT: srl a0, a4, a0
-; RV32IA-WMO-NEXT: ret
+; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_nand_i8_acquire:
+; RV32IA-WMO-NOZACAS: # %bb.0:
+; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-WMO-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-NOZACAS-NEXT: li a3, 255
+; RV32IA-WMO-NOZACAS-NEXT: zext.b a1, a1
+; RV32IA-WMO-NOZACAS-NEXT: sll a3, a3, a0
+; RV32IA-WMO-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-WMO-NOZACAS-NEXT: .LBB31_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-NOZACAS-NEXT: lr.w.aq a4, (a2)
+; RV32IA-WMO-NOZACAS-NEXT: and a5, a4, a1
+; RV32IA-WMO-NOZACAS-NEXT: not a5, a5
+; RV32IA-WMO-NOZACAS-NEXT: xor a5, a4, a5
+; RV32IA-WMO-NOZACAS-NEXT: and a5, a5, a3
+; RV32IA-WMO-NOZACAS-NEXT: xor a5, a4, a5
+; RV32IA-WMO-NOZACAS-NEXT: sc.w a5, a5, (a2)
+; RV32IA-WMO-NOZACAS-NEXT: bnez a5, .LBB31_1
+; RV32IA-WMO-NOZACAS-NEXT: # %bb.2:
+; RV32IA-WMO-NOZACAS-NEXT: srl a0, a4, a0
+; RV32IA-WMO-NOZACAS-NEXT: ret
;
-; RV32IA-TSO-LABEL: atomicrmw_nand_i8_acquire:
-; RV32IA-TSO: # %bb.0:
-; RV32IA-TSO-NEXT: andi a2, a0, -4
-; RV32IA-TSO-NEXT: slli a0, a0, 3
-; RV32IA-TSO-NEXT: li a3, 255
-; RV32IA-TSO-NEXT: zext.b a1, a1
-; RV32IA-TSO-NEXT: sll a3, a3, a0
-; RV32IA-TSO-NEXT: sll a1, a1, a0
-; RV32IA-TSO-NEXT: .LBB31_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-TSO-NEXT: lr.w a4, (a2)
-; RV32IA-TSO-NEXT: and a5, a4, a1
-; RV32IA-TSO-NEXT: not a5, a5
-; RV32IA-TSO-NEXT: xor a5, a4, a5
-; RV32IA-TSO-NEXT: and a5, a5, a3
-; RV32IA-TSO-NEXT: xor a5, a4, a5
-; RV32IA-TSO-NEXT: sc.w a5, a5, (a2)
-; RV32IA-TSO-NEXT: bnez a5, .LBB31_1
-; RV32IA-TSO-NEXT: # %bb.2:
-; RV32IA-TSO-NEXT: srl a0, a4, a0
-; RV32IA-TSO-NEXT: ret
+; RV32IA-TSO-NOZACAS-LABEL: atomicrmw_nand_i8_acquire:
+; RV32IA-TSO-NOZACAS: # %bb.0:
+; RV32IA-TSO-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-TSO-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-NOZACAS-NEXT: li a3, 255
+; RV32IA-TSO-NOZACAS-NEXT: zext.b a1, a1
+; RV32IA-TSO-NOZACAS-NEXT: sll a3, a3, a0
+; RV32IA-TSO-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-TSO-NOZACAS-NEXT: .LBB31_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-NOZACAS-NEXT: lr.w a4, (a2)
+; RV32IA-TSO-NOZACAS-NEXT: and a5, a4, a1
+; RV32IA-TSO-NOZACAS-NEXT: not a5, a5
+; RV32IA-TSO-NOZACAS-NEXT: xor a5, a4, a5
+; RV32IA-TSO-NOZACAS-NEXT: and a5, a5, a3
+; RV32IA-TSO-NOZACAS-NEXT: xor a5, a4, a5
+; RV32IA-TSO-NOZACAS-NEXT: sc.w a5, a5, (a2)
+; RV32IA-TSO-NOZACAS-NEXT: bnez a5, .LBB31_1
+; RV32IA-TSO-NOZACAS-NEXT: # %bb.2:
+; RV32IA-TSO-NOZACAS-NEXT: srl a0, a4, a0
+; RV32IA-TSO-NOZACAS-NEXT: ret
;
; RV64I-LABEL: atomicrmw_nand_i8_acquire:
; RV64I: # %bb.0:
@@ -3742,6 +4959,48 @@ define i8 @atomicrmw_nand_i8_acquire(ptr %a, i8 %b) nounwind {
; RV64IA-TSO-NOZACAS-NEXT: srlw a0, a4, a0
; RV64IA-TSO-NOZACAS-NEXT: ret
;
+; RV32IA-WMO-ZACAS-LABEL: atomicrmw_nand_i8_acquire:
+; RV32IA-WMO-ZACAS: # %bb.0:
+; RV32IA-WMO-ZACAS-NEXT: andi a2, a0, -4
+; RV32IA-WMO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-ZACAS-NEXT: li a3, 255
+; RV32IA-WMO-ZACAS-NEXT: zext.b a1, a1
+; RV32IA-WMO-ZACAS-NEXT: sll a3, a3, a0
+; RV32IA-WMO-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-WMO-ZACAS-NEXT: .LBB31_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-ZACAS-NEXT: lr.w.aq a4, (a2)
+; RV32IA-WMO-ZACAS-NEXT: and a5, a4, a1
+; RV32IA-WMO-ZACAS-NEXT: not a5, a5
+; RV32IA-WMO-ZACAS-NEXT: xor a5, a4, a5
+; RV32IA-WMO-ZACAS-NEXT: and a5, a5, a3
+; RV32IA-WMO-ZACAS-NEXT: xor a5, a4, a5
+; RV32IA-WMO-ZACAS-NEXT: sc.w a5, a5, (a2)
+; RV32IA-WMO-ZACAS-NEXT: bnez a5, .LBB31_1
+; RV32IA-WMO-ZACAS-NEXT: # %bb.2:
+; RV32IA-WMO-ZACAS-NEXT: srl a0, a4, a0
+; RV32IA-WMO-ZACAS-NEXT: ret
+;
+; RV32IA-TSO-ZACAS-LABEL: atomicrmw_nand_i8_acquire:
+; RV32IA-TSO-ZACAS: # %bb.0:
+; RV32IA-TSO-ZACAS-NEXT: andi a2, a0, -4
+; RV32IA-TSO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-ZACAS-NEXT: li a3, 255
+; RV32IA-TSO-ZACAS-NEXT: zext.b a1, a1
+; RV32IA-TSO-ZACAS-NEXT: sll a3, a3, a0
+; RV32IA-TSO-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-TSO-ZACAS-NEXT: .LBB31_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-ZACAS-NEXT: lr.w a4, (a2)
+; RV32IA-TSO-ZACAS-NEXT: and a5, a4, a1
+; RV32IA-TSO-ZACAS-NEXT: not a5, a5
+; RV32IA-TSO-ZACAS-NEXT: xor a5, a4, a5
+; RV32IA-TSO-ZACAS-NEXT: and a5, a5, a3
+; RV32IA-TSO-ZACAS-NEXT: xor a5, a4, a5
+; RV32IA-TSO-ZACAS-NEXT: sc.w a5, a5, (a2)
+; RV32IA-TSO-ZACAS-NEXT: bnez a5, .LBB31_1
+; RV32IA-TSO-ZACAS-NEXT: # %bb.2:
+; RV32IA-TSO-ZACAS-NEXT: srl a0, a4, a0
+; RV32IA-TSO-ZACAS-NEXT: ret
+;
; RV64IA-WMO-ZACAS-LABEL: atomicrmw_nand_i8_acquire:
; RV64IA-WMO-ZACAS: # %bb.0:
; RV64IA-WMO-ZACAS-NEXT: andi a2, a0, -4
@@ -3784,6 +5043,48 @@ define i8 @atomicrmw_nand_i8_acquire(ptr %a, i8 %b) nounwind {
; RV64IA-TSO-ZACAS-NEXT: srlw a0, a4, a0
; RV64IA-TSO-ZACAS-NEXT: ret
;
+; RV32IA-WMO-ZABHA-NOZACAS-LABEL: atomicrmw_nand_i8_acquire:
+; RV32IA-WMO-ZABHA-NOZACAS: # %bb.0:
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: li a3, 255
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: zext.b a1, a1
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: sll a3, a3, a0
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: .LBB31_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: lr.w.aq a4, (a2)
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: and a5, a4, a1
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: not a5, a5
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: xor a5, a4, a5
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: and a5, a5, a3
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: xor a5, a4, a5
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: sc.w a5, a5, (a2)
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: bnez a5, .LBB31_1
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: # %bb.2:
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: srl a0, a4, a0
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: ret
+;
+; RV32IA-TSO-ZABHA-NOZACAS-LABEL: atomicrmw_nand_i8_acquire:
+; RV32IA-TSO-ZABHA-NOZACAS: # %bb.0:
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: li a3, 255
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: zext.b a1, a1
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: sll a3, a3, a0
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: .LBB31_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: lr.w a4, (a2)
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: and a5, a4, a1
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: not a5, a5
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: xor a5, a4, a5
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: and a5, a5, a3
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: xor a5, a4, a5
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: sc.w a5, a5, (a2)
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: bnez a5, .LBB31_1
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: # %bb.2:
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: srl a0, a4, a0
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: ret
+;
; RV64IA-WMO-ZABHA-NOZACAS-LABEL: atomicrmw_nand_i8_acquire:
; RV64IA-WMO-ZABHA-NOZACAS: # %bb.0:
; RV64IA-WMO-ZABHA-NOZACAS-NEXT: andi a2, a0, -4
@@ -3826,6 +5127,36 @@ define i8 @atomicrmw_nand_i8_acquire(ptr %a, i8 %b) nounwind {
; RV64IA-TSO-ZABHA-NOZACAS-NEXT: srlw a0, a4, a0
; RV64IA-TSO-ZABHA-NOZACAS-NEXT: ret
;
+; RV32IA-WMO-ZABHA-ZACAS-LABEL: atomicrmw_nand_i8_acquire:
+; RV32IA-WMO-ZABHA-ZACAS: # %bb.0:
+; RV32IA-WMO-ZABHA-ZACAS-NEXT: mv a2, a0
+; RV32IA-WMO-ZABHA-ZACAS-NEXT: lbu a0, 0(a0)
+; RV32IA-WMO-ZABHA-ZACAS-NEXT: .LBB31_1: # %atomicrmw.start
+; RV32IA-WMO-ZABHA-ZACAS-NEXT: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-ZABHA-ZACAS-NEXT: and a3, a0, a1
+; RV32IA-WMO-ZABHA-ZACAS-NEXT: not a3, a3
+; RV32IA-WMO-ZABHA-ZACAS-NEXT: slli a4, a0, 24
+; RV32IA-WMO-ZABHA-ZACAS-NEXT: amocas.b.aq a0, a3, (a2)
+; RV32IA-WMO-ZABHA-ZACAS-NEXT: srai a4, a4, 24
+; RV32IA-WMO-ZABHA-ZACAS-NEXT: bne a0, a4, .LBB31_1
+; RV32IA-WMO-ZABHA-ZACAS-NEXT: # %bb.2: # %atomicrmw.end
+; RV32IA-WMO-ZABHA-ZACAS-NEXT: ret
+;
+; RV32IA-TSO-ZABHA-ZACAS-LABEL: atomicrmw_nand_i8_acquire:
+; RV32IA-TSO-ZABHA-ZACAS: # %bb.0:
+; RV32IA-TSO-ZABHA-ZACAS-NEXT: mv a2, a0
+; RV32IA-TSO-ZABHA-ZACAS-NEXT: lbu a0, 0(a0)
+; RV32IA-TSO-ZABHA-ZACAS-NEXT: .LBB31_1: # %atomicrmw.start
+; RV32IA-TSO-ZABHA-ZACAS-NEXT: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-ZABHA-ZACAS-NEXT: and a3, a0, a1
+; RV32IA-TSO-ZABHA-ZACAS-NEXT: not a3, a3
+; RV32IA-TSO-ZABHA-ZACAS-NEXT: slli a4, a0, 24
+; RV32IA-TSO-ZABHA-ZACAS-NEXT: amocas.b a0, a3, (a2)
+; RV32IA-TSO-ZABHA-ZACAS-NEXT: srai a4, a4, 24
+; RV32IA-TSO-ZABHA-ZACAS-NEXT: bne a0, a4, .LBB31_1
+; RV32IA-TSO-ZABHA-ZACAS-NEXT: # %bb.2: # %atomicrmw.end
+; RV32IA-TSO-ZABHA-ZACAS-NEXT: ret
+;
; RV64IA-WMO-ZABHA-ZACAS-LABEL: atomicrmw_nand_i8_acquire:
; RV64IA-WMO-ZABHA-ZACAS: # %bb.0:
; RV64IA-WMO-ZABHA-ZACAS-NEXT: mv a2, a0
@@ -3870,47 +5201,47 @@ define i8 @atomicrmw_nand_i8_release(ptr %a, i8 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
-; RV32IA-WMO-LABEL: atomicrmw_nand_i8_release:
-; RV32IA-WMO: # %bb.0:
-; RV32IA-WMO-NEXT: andi a2, a0, -4
-; RV32IA-WMO-NEXT: slli a0, a0, 3
-; RV32IA-WMO-NEXT: li a3, 255
-; RV32IA-WMO-NEXT: zext.b a1, a1
-; RV32IA-WMO-NEXT: sll a3, a3, a0
-; RV32IA-WMO-NEXT: sll a1, a1, a0
-; RV32IA-WMO-NEXT: .LBB32_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-WMO-NEXT: lr.w a4, (a2)
-; RV32IA-WMO-NEXT: and a5, a4, a1
-; RV32IA-WMO-NEXT: not a5, a5
-; RV32IA-WMO-NEXT: xor a5, a4, a5
-; RV32IA-WMO-NEXT: and a5, a5, a3
-; RV32IA-WMO-NEXT: xor a5, a4, a5
-; RV32IA-WMO-NEXT: sc.w.rl a5, a5, (a2)
-; RV32IA-WMO-NEXT: bnez a5, .LBB32_1
-; RV32IA-WMO-NEXT: # %bb.2:
-; RV32IA-WMO-NEXT: srl a0, a4, a0
-; RV32IA-WMO-NEXT: ret
+; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_nand_i8_release:
+; RV32IA-WMO-NOZACAS: # %bb.0:
+; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-WMO-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-NOZACAS-NEXT: li a3, 255
+; RV32IA-WMO-NOZACAS-NEXT: zext.b a1, a1
+; RV32IA-WMO-NOZACAS-NEXT: sll a3, a3, a0
+; RV32IA-WMO-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-WMO-NOZACAS-NEXT: .LBB32_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-NOZACAS-NEXT: lr.w a4, (a2)
+; RV32IA-WMO-NOZACAS-NEXT: and a5, a4, a1
+; RV32IA-WMO-NOZACAS-NEXT: not a5, a5
+; RV32IA-WMO-NOZACAS-NEXT: xor a5, a4, a5
+; RV32IA-WMO-NOZACAS-NEXT: and a5, a5, a3
+; RV32IA-WMO-NOZACAS-NEXT: xor a5, a4, a5
+; RV32IA-WMO-NOZACAS-NEXT: sc.w.rl a5, a5, (a2)
+; RV32IA-WMO-NOZACAS-NEXT: bnez a5, .LBB32_1
+; RV32IA-WMO-NOZACAS-NEXT: # %bb.2:
+; RV32IA-WMO-NOZACAS-NEXT: srl a0, a4, a0
+; RV32IA-WMO-NOZACAS-NEXT: ret
;
-; RV32IA-TSO-LABEL: atomicrmw_nand_i8_release:
-; RV32IA-TSO: # %bb.0:
-; RV32IA-TSO-NEXT: andi a2, a0, -4
-; RV32IA-TSO-NEXT: slli a0, a0, 3
-; RV32IA-TSO-NEXT: li a3, 255
-; RV32IA-TSO-NEXT: zext.b a1, a1
-; RV32IA-TSO-NEXT: sll a3, a3, a0
-; RV32IA-TSO-NEXT: sll a1, a1, a0
-; RV32IA-TSO-NEXT: .LBB32_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-TSO-NEXT: lr.w a4, (a2)
-; RV32IA-TSO-NEXT: and a5, a4, a1
-; RV32IA-TSO-NEXT: not a5, a5
-; RV32IA-TSO-NEXT: xor a5, a4, a5
-; RV32IA-TSO-NEXT: and a5, a5, a3
-; RV32IA-TSO-NEXT: xor a5, a4, a5
-; RV32IA-TSO-NEXT: sc.w a5, a5, (a2)
-; RV32IA-TSO-NEXT: bnez a5, .LBB32_1
-; RV32IA-TSO-NEXT: # %bb.2:
-; RV32IA-TSO-NEXT: srl a0, a4, a0
-; RV32IA-TSO-NEXT: ret
+; RV32IA-TSO-NOZACAS-LABEL: atomicrmw_nand_i8_release:
+; RV32IA-TSO-NOZACAS: # %bb.0:
+; RV32IA-TSO-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-TSO-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-NOZACAS-NEXT: li a3, 255
+; RV32IA-TSO-NOZACAS-NEXT: zext.b a1, a1
+; RV32IA-TSO-NOZACAS-NEXT: sll a3, a3, a0
+; RV32IA-TSO-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-TSO-NOZACAS-NEXT: .LBB32_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-NOZACAS-NEXT: lr.w a4, (a2)
+; RV32IA-TSO-NOZACAS-NEXT: and a5, a4, a1
+; RV32IA-TSO-NOZACAS-NEXT: not a5, a5
+; RV32IA-TSO-NOZACAS-NEXT: xor a5, a4, a5
+; RV32IA-TSO-NOZACAS-NEXT: and a5, a5, a3
+; RV32IA-TSO-NOZACAS-NEXT: xor a5, a4, a5
+; RV32IA-TSO-NOZACAS-NEXT: sc.w a5, a5, (a2)
+; RV32IA-TSO-NOZACAS-NEXT: bnez a5, .LBB32_1
+; RV32IA-TSO-NOZACAS-NEXT: # %bb.2:
+; RV32IA-TSO-NOZACAS-NEXT: srl a0, a4, a0
+; RV32IA-TSO-NOZACAS-NEXT: ret
;
; RV64I-LABEL: atomicrmw_nand_i8_release:
; RV64I: # %bb.0:
@@ -3964,6 +5295,48 @@ define i8 @atomicrmw_nand_i8_release(ptr %a, i8 %b) nounwind {
; RV64IA-TSO-NOZACAS-NEXT: srlw a0, a4, a0
; RV64IA-TSO-NOZACAS-NEXT: ret
;
+; RV32IA-WMO-ZACAS-LABEL: atomicrmw_nand_i8_release:
+; RV32IA-WMO-ZACAS: # %bb.0:
+; RV32IA-WMO-ZACAS-NEXT: andi a2, a0, -4
+; RV32IA-WMO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-ZACAS-NEXT: li a3, 255
+; RV32IA-WMO-ZACAS-NEXT: zext.b a1, a1
+; RV32IA-WMO-ZACAS-NEXT: sll a3, a3, a0
+; RV32IA-WMO-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-WMO-ZACAS-NEXT: .LBB32_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-ZACAS-NEXT: lr.w a4, (a2)
+; RV32IA-WMO-ZACAS-NEXT: and a5, a4, a1
+; RV32IA-WMO-ZACAS-NEXT: not a5, a5
+; RV32IA-WMO-ZACAS-NEXT: xor a5, a4, a5
+; RV32IA-WMO-ZACAS-NEXT: and a5, a5, a3
+; RV32IA-WMO-ZACAS-NEXT: xor a5, a4, a5
+; RV32IA-WMO-ZACAS-NEXT: sc.w.rl a5, a5, (a2)
+; RV32IA-WMO-ZACAS-NEXT: bnez a5, .LBB32_1
+; RV32IA-WMO-ZACAS-NEXT: # %bb.2:
+; RV32IA-WMO-ZACAS-NEXT: srl a0, a4, a0
+; RV32IA-WMO-ZACAS-NEXT: ret
+;
+; RV32IA-TSO-ZACAS-LABEL: atomicrmw_nand_i8_release:
+; RV32IA-TSO-ZACAS: # %bb.0:
+; RV32IA-TSO-ZACAS-NEXT: andi a2, a0, -4
+; RV32IA-TSO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-ZACAS-NEXT: li a3, 255
+; RV32IA-TSO-ZACAS-NEXT: zext.b a1, a1
+; RV32IA-TSO-ZACAS-NEXT: sll a3, a3, a0
+; RV32IA-TSO-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-TSO-ZACAS-NEXT: .LBB32_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-ZACAS-NEXT: lr.w a4, (a2)
+; RV32IA-TSO-ZACAS-NEXT: and a5, a4, a1
+; RV32IA-TSO-ZACAS-NEXT: not a5, a5
+; RV32IA-TSO-ZACAS-NEXT: xor a5, a4, a5
+; RV32IA-TSO-ZACAS-NEXT: and a5, a5, a3
+; RV32IA-TSO-ZACAS-NEXT: xor a5, a4, a5
+; RV32IA-TSO-ZACAS-NEXT: sc.w a5, a5, (a2)
+; RV32IA-TSO-ZACAS-NEXT: bnez a5, .LBB32_1
+; RV32IA-TSO-ZACAS-NEXT: # %bb.2:
+; RV32IA-TSO-ZACAS-NEXT: srl a0, a4, a0
+; RV32IA-TSO-ZACAS-NEXT: ret
+;
; RV64IA-WMO-ZACAS-LABEL: atomicrmw_nand_i8_release:
; RV64IA-WMO-ZACAS: # %bb.0:
; RV64IA-WMO-ZACAS-NEXT: andi a2, a0, -4
@@ -4006,6 +5379,48 @@ define i8 @atomicrmw_nand_i8_release(ptr %a, i8 %b) nounwind {
; RV64IA-TSO-ZACAS-NEXT: srlw a0, a4, a0
; RV64IA-TSO-ZACAS-NEXT: ret
;
+; RV32IA-WMO-ZABHA-NOZACAS-LABEL: atomicrmw_nand_i8_release:
+; RV32IA-WMO-ZABHA-NOZACAS: # %bb.0:
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: li a3, 255
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: zext.b a1, a1
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: sll a3, a3, a0
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: .LBB32_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: lr.w a4, (a2)
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: and a5, a4, a1
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: not a5, a5
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: xor a5, a4, a5
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: and a5, a5, a3
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: xor a5, a4, a5
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: sc.w.rl a5, a5, (a2)
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: bnez a5, .LBB32_1
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: # %bb.2:
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: srl a0, a4, a0
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: ret
+;
+; RV32IA-TSO-ZABHA-NOZACAS-LABEL: atomicrmw_nand_i8_release:
+; RV32IA-TSO-ZABHA-NOZACAS: # %bb.0:
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: li a3, 255
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: zext.b a1, a1
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: sll a3, a3, a0
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: .LBB32_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: lr.w a4, (a2)
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: and a5, a4, a1
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: not a5, a5
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: xor a5, a4, a5
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: and a5, a5, a3
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: xor a5, a4, a5
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: sc.w a5, a5, (a2)
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: bnez a5, .LBB32_1
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: # %bb.2:
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: srl a0, a4, a0
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: ret
+;
; RV64IA-WMO-ZABHA-NOZACAS-LABEL: atomicrmw_nand_i8_release:
; RV64IA-WMO-ZABHA-NOZACAS: # %bb.0:
; RV64IA-WMO-ZABHA-NOZACAS-NEXT: andi a2, a0, -4
@@ -4048,6 +5463,36 @@ define i8 @atomicrmw_nand_i8_release(ptr %a, i8 %b) nounwind {
; RV64IA-TSO-ZABHA-NOZACAS-NEXT: srlw a0, a4, a0
; RV64IA-TSO-ZABHA-NOZACAS-NEXT: ret
;
+; RV32IA-WMO-ZABHA-ZACAS-LABEL: atomicrmw_nand_i8_release:
+; RV32IA-WMO-ZABHA-ZACAS: # %bb.0:
+; RV32IA-WMO-ZABHA-ZACAS-NEXT: mv a2, a0
+; RV32IA-WMO-ZABHA-ZACAS-NEXT: lbu a0, 0(a0)
+; RV32IA-WMO-ZABHA-ZACAS-NEXT: .LBB32_1: # %atomicrmw.start
+; RV32IA-WMO-ZABHA-ZACAS-NEXT: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-ZABHA-ZACAS-NEXT: and a3, a0, a1
+; RV32IA-WMO-ZABHA-ZACAS-NEXT: not a3, a3
+; RV32IA-WMO-ZABHA-ZACAS-NEXT: slli a4, a0, 24
+; RV32IA-WMO-ZABHA-ZACAS-NEXT: amocas.b.rl a0, a3, (a2)
+; RV32IA-WMO-ZABHA-ZACAS-NEXT: srai a4, a4, 24
+; RV32IA-WMO-ZABHA-ZACAS-NEXT: bne a0, a4, .LBB32_1
+; RV32IA-WMO-ZABHA-ZACAS-NEXT: # %bb.2: # %atomicrmw.end
+; RV32IA-WMO-ZABHA-ZACAS-NEXT: ret
+;
+; RV32IA-TSO-ZABHA-ZACAS-LABEL: atomicrmw_nand_i8_release:
+; RV32IA-TSO-ZABHA-ZACAS: # %bb.0:
+; RV32IA-TSO-ZABHA-ZACAS-NEXT: mv a2, a0
+; RV32IA-TSO-ZABHA-ZACAS-NEXT: lbu a0, 0(a0)
+; RV32IA-TSO-ZABHA-ZACAS-NEXT: .LBB32_1: # %atomicrmw.start
+; RV32IA-TSO-ZABHA-ZACAS-NEXT: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-ZABHA-ZACAS-NEXT: and a3, a0, a1
+; RV32IA-TSO-ZABHA-ZACAS-NEXT: not a3, a3
+; RV32IA-TSO-ZABHA-ZACAS-NEXT: slli a4, a0, 24
+; RV32IA-TSO-ZABHA-ZACAS-NEXT: amocas.b a0, a3, (a2)
+; RV32IA-TSO-ZABHA-ZACAS-NEXT: srai a4, a4, 24
+; RV32IA-TSO-ZABHA-ZACAS-NEXT: bne a0, a4, .LBB32_1
+; RV32IA-TSO-ZABHA-ZACAS-NEXT: # %bb.2: # %atomicrmw.end
+; RV32IA-TSO-ZABHA-ZACAS-NEXT: ret
+;
; RV64IA-WMO-ZABHA-ZACAS-LABEL: atomicrmw_nand_i8_release:
; RV64IA-WMO-ZABHA-ZACAS: # %bb.0:
; RV64IA-WMO-ZABHA-ZACAS-NEXT: mv a2, a0
@@ -4092,47 +5537,47 @@ define i8 @atomicrmw_nand_i8_acq_rel(ptr %a, i8 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
-; RV32IA-WMO-LABEL: atomicrmw_nand_i8_acq_rel:
-; RV32IA-WMO: # %bb.0:
-; RV32IA-WMO-NEXT: andi a2, a0, -4
-; RV32IA-WMO-NEXT: slli a0, a0, 3
-; RV32IA-WMO-NEXT: li a3, 255
-; RV32IA-WMO-NEXT: zext.b a1, a1
-; RV32IA-WMO-NEXT: sll a3, a3, a0
-; RV32IA-WMO-NEXT: sll a1, a1, a0
-; RV32IA-WMO-NEXT: .LBB33_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-WMO-NEXT: lr.w.aq a4, (a2)
-; RV32IA-WMO-NEXT: and a5, a4, a1
-; RV32IA-WMO-NEXT: not a5, a5
-; RV32IA-WMO-NEXT: xor a5, a4, a5
-; RV32IA-WMO-NEXT: and a5, a5, a3
-; RV32IA-WMO-NEXT: xor a5, a4, a5
-; RV32IA-WMO-NEXT: sc.w.rl a5, a5, (a2)
-; RV32IA-WMO-NEXT: bnez a5, .LBB33_1
-; RV32IA-WMO-NEXT: # %bb.2:
-; RV32IA-WMO-NEXT: srl a0, a4, a0
-; RV32IA-WMO-NEXT: ret
+; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_nand_i8_acq_rel:
+; RV32IA-WMO-NOZACAS: # %bb.0:
+; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-WMO-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-NOZACAS-NEXT: li a3, 255
+; RV32IA-WMO-NOZACAS-NEXT: zext.b a1, a1
+; RV32IA-WMO-NOZACAS-NEXT: sll a3, a3, a0
+; RV32IA-WMO-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-WMO-NOZACAS-NEXT: .LBB33_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-NOZACAS-NEXT: lr.w.aq a4, (a2)
+; RV32IA-WMO-NOZACAS-NEXT: and a5, a4, a1
+; RV32IA-WMO-NOZACAS-NEXT: not a5, a5
+; RV32IA-WMO-NOZACAS-NEXT: xor a5, a4, a5
+; RV32IA-WMO-NOZACAS-NEXT: and a5, a5, a3
+; RV32IA-WMO-NOZACAS-NEXT: xor a5, a4, a5
+; RV32IA-WMO-NOZACAS-NEXT: sc.w.rl a5, a5, (a2)
+; RV32IA-WMO-NOZACAS-NEXT: bnez a5, .LBB33_1
+; RV32IA-WMO-NOZACAS-NEXT: # %bb.2:
+; RV32IA-WMO-NOZACAS-NEXT: srl a0, a4, a0
+; RV32IA-WMO-NOZACAS-NEXT: ret
;
-; RV32IA-TSO-LABEL: atomicrmw_nand_i8_acq_rel:
-; RV32IA-TSO: # %bb.0:
-; RV32IA-TSO-NEXT: andi a2, a0, -4
-; RV32IA-TSO-NEXT: slli a0, a0, 3
-; RV32IA-TSO-NEXT: li a3, 255
-; RV32IA-TSO-NEXT: zext.b a1, a1
-; RV32IA-TSO-NEXT: sll a3, a3, a0
-; RV32IA-TSO-NEXT: sll a1, a1, a0
-; RV32IA-TSO-NEXT: .LBB33_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-TSO-NEXT: lr.w a4, (a2)
-; RV32IA-TSO-NEXT: and a5, a4, a1
-; RV32IA-TSO-NEXT: not a5, a5
-; RV32IA-TSO-NEXT: xor a5, a4, a5
-; RV32IA-TSO-NEXT: and a5, a5, a3
-; RV32IA-TSO-NEXT: xor a5, a4, a5
-; RV32IA-TSO-NEXT: sc.w a5, a5, (a2)
-; RV32IA-TSO-NEXT: bnez a5, .LBB33_1
-; RV32IA-TSO-NEXT: # %bb.2:
-; RV32IA-TSO-NEXT: srl a0, a4, a0
-; RV32IA-TSO-NEXT: ret
+; RV32IA-TSO-NOZACAS-LABEL: atomicrmw_nand_i8_acq_rel:
+; RV32IA-TSO-NOZACAS: # %bb.0:
+; RV32IA-TSO-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-TSO-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-NOZACAS-NEXT: li a3, 255
+; RV32IA-TSO-NOZACAS-NEXT: zext.b a1, a1
+; RV32IA-TSO-NOZACAS-NEXT: sll a3, a3, a0
+; RV32IA-TSO-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-TSO-NOZACAS-NEXT: .LBB33_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-NOZACAS-NEXT: lr.w a4, (a2)
+; RV32IA-TSO-NOZACAS-NEXT: and a5, a4, a1
+; RV32IA-TSO-NOZACAS-NEXT: not a5, a5
+; RV32IA-TSO-NOZACAS-NEXT: xor a5, a4, a5
+; RV32IA-TSO-NOZACAS-NEXT: and a5, a5, a3
+; RV32IA-TSO-NOZACAS-NEXT: xor a5, a4, a5
+; RV32IA-TSO-NOZACAS-NEXT: sc.w a5, a5, (a2)
+; RV32IA-TSO-NOZACAS-NEXT: bnez a5, .LBB33_1
+; RV32IA-TSO-NOZACAS-NEXT: # %bb.2:
+; RV32IA-TSO-NOZACAS-NEXT: srl a0, a4, a0
+; RV32IA-TSO-NOZACAS-NEXT: ret
;
; RV64I-LABEL: atomicrmw_nand_i8_acq_rel:
; RV64I: # %bb.0:
@@ -4186,6 +5631,48 @@ define i8 @atomicrmw_nand_i8_acq_rel(ptr %a, i8 %b) nounwind {
; RV64IA-TSO-NOZACAS-NEXT: srlw a0, a4, a0
; RV64IA-TSO-NOZACAS-NEXT: ret
;
+; RV32IA-WMO-ZACAS-LABEL: atomicrmw_nand_i8_acq_rel:
+; RV32IA-WMO-ZACAS: # %bb.0:
+; RV32IA-WMO-ZACAS-NEXT: andi a2, a0, -4
+; RV32IA-WMO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-ZACAS-NEXT: li a3, 255
+; RV32IA-WMO-ZACAS-NEXT: zext.b a1, a1
+; RV32IA-WMO-ZACAS-NEXT: sll a3, a3, a0
+; RV32IA-WMO-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-WMO-ZACAS-NEXT: .LBB33_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-ZACAS-NEXT: lr.w.aq a4, (a2)
+; RV32IA-WMO-ZACAS-NEXT: and a5, a4, a1
+; RV32IA-WMO-ZACAS-NEXT: not a5, a5
+; RV32IA-WMO-ZACAS-NEXT: xor a5, a4, a5
+; RV32IA-WMO-ZACAS-NEXT: and a5, a5, a3
+; RV32IA-WMO-ZACAS-NEXT: xor a5, a4, a5
+; RV32IA-WMO-ZACAS-NEXT: sc.w.rl a5, a5, (a2)
+; RV32IA-WMO-ZACAS-NEXT: bnez a5, .LBB33_1
+; RV32IA-WMO-ZACAS-NEXT: # %bb.2:
+; RV32IA-WMO-ZACAS-NEXT: srl a0, a4, a0
+; RV32IA-WMO-ZACAS-NEXT: ret
+;
+; RV32IA-TSO-ZACAS-LABEL: atomicrmw_nand_i8_acq_rel:
+; RV32IA-TSO-ZACAS: # %bb.0:
+; RV32IA-TSO-ZACAS-NEXT: andi a2, a0, -4
+; RV32IA-TSO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-ZACAS-NEXT: li a3, 255
+; RV32IA-TSO-ZACAS-NEXT: zext.b a1, a1
+; RV32IA-TSO-ZACAS-NEXT: sll a3, a3, a0
+; RV32IA-TSO-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-TSO-ZACAS-NEXT: .LBB33_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-ZACAS-NEXT: lr.w a4, (a2)
+; RV32IA-TSO-ZACAS-NEXT: and a5, a4, a1
+; RV32IA-TSO-ZACAS-NEXT: not a5, a5
+; RV32IA-TSO-ZACAS-NEXT: xor a5, a4, a5
+; RV32IA-TSO-ZACAS-NEXT: and a5, a5, a3
+; RV32IA-TSO-ZACAS-NEXT: xor a5, a4, a5
+; RV32IA-TSO-ZACAS-NEXT: sc.w a5, a5, (a2)
+; RV32IA-TSO-ZACAS-NEXT: bnez a5, .LBB33_1
+; RV32IA-TSO-ZACAS-NEXT: # %bb.2:
+; RV32IA-TSO-ZACAS-NEXT: srl a0, a4, a0
+; RV32IA-TSO-ZACAS-NEXT: ret
+;
; RV64IA-WMO-ZACAS-LABEL: atomicrmw_nand_i8_acq_rel:
; RV64IA-WMO-ZACAS: # %bb.0:
; RV64IA-WMO-ZACAS-NEXT: andi a2, a0, -4
@@ -4228,6 +5715,48 @@ define i8 @atomicrmw_nand_i8_acq_rel(ptr %a, i8 %b) nounwind {
; RV64IA-TSO-ZACAS-NEXT: srlw a0, a4, a0
; RV64IA-TSO-ZACAS-NEXT: ret
;
+; RV32IA-WMO-ZABHA-NOZACAS-LABEL: atomicrmw_nand_i8_acq_rel:
+; RV32IA-WMO-ZABHA-NOZACAS: # %bb.0:
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: li a3, 255
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: zext.b a1, a1
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: sll a3, a3, a0
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: .LBB33_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: lr.w.aq a4, (a2)
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: and a5, a4, a1
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: not a5, a5
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: xor a5, a4, a5
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: and a5, a5, a3
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: xor a5, a4, a5
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: sc.w.rl a5, a5, (a2)
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: bnez a5, .LBB33_1
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: # %bb.2:
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: srl a0, a4, a0
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: ret
+;
+; RV32IA-TSO-ZABHA-NOZACAS-LABEL: atomicrmw_nand_i8_acq_rel:
+; RV32IA-TSO-ZABHA-NOZACAS: # %bb.0:
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: li a3, 255
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: zext.b a1, a1
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: sll a3, a3, a0
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: .LBB33_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: lr.w a4, (a2)
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: and a5, a4, a1
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: not a5, a5
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: xor a5, a4, a5
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: and a5, a5, a3
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: xor a5, a4, a5
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: sc.w a5, a5, (a2)
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: bnez a5, .LBB33_1
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: # %bb.2:
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: srl a0, a4, a0
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: ret
+;
; RV64IA-WMO-ZABHA-NOZACAS-LABEL: atomicrmw_nand_i8_acq_rel:
; RV64IA-WMO-ZABHA-NOZACAS: # %bb.0:
; RV64IA-WMO-ZABHA-NOZACAS-NEXT: andi a2, a0, -4
@@ -4270,6 +5799,36 @@ define i8 @atomicrmw_nand_i8_acq_rel(ptr %a, i8 %b) nounwind {
; RV64IA-TSO-ZABHA-NOZACAS-NEXT: srlw a0, a4, a0
; RV64IA-TSO-ZABHA-NOZACAS-NEXT: ret
;
+; RV32IA-WMO-ZABHA-ZACAS-LABEL: atomicrmw_nand_i8_acq_rel:
+; RV32IA-WMO-ZABHA-ZACAS: # %bb.0:
+; RV32IA-WMO-ZABHA-ZACAS-NEXT: mv a2, a0
+; RV32IA-WMO-ZABHA-ZACAS-NEXT: lbu a0, 0(a0)
+; RV32IA-WMO-ZABHA-ZACAS-NEXT: .LBB33_1: # %atomicrmw.start
+; RV32IA-WMO-ZABHA-ZACAS-NEXT: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-ZABHA-ZACAS-NEXT: and a3, a0, a1
+; RV32IA-WMO-ZABHA-ZACAS-NEXT: not a3, a3
+; RV32IA-WMO-ZABHA-ZACAS-NEXT: slli a4, a0, 24
+; RV32IA-WMO-ZABHA-ZACAS-NEXT: amocas.b.aqrl a0, a3, (a2)
+; RV32IA-WMO-ZABHA-ZACAS-NEXT: srai a4, a4, 24
+; RV32IA-WMO-ZABHA-ZACAS-NEXT: bne a0, a4, .LBB33_1
+; RV32IA-WMO-ZABHA-ZACAS-NEXT: # %bb.2: # %atomicrmw.end
+; RV32IA-WMO-ZABHA-ZACAS-NEXT: ret
+;
+; RV32IA-TSO-ZABHA-ZACAS-LABEL: atomicrmw_nand_i8_acq_rel:
+; RV32IA-TSO-ZABHA-ZACAS: # %bb.0:
+; RV32IA-TSO-ZABHA-ZACAS-NEXT: mv a2, a0
+; RV32IA-TSO-ZABHA-ZACAS-NEXT: lbu a0, 0(a0)
+; RV32IA-TSO-ZABHA-ZACAS-NEXT: .LBB33_1: # %atomicrmw.start
+; RV32IA-TSO-ZABHA-ZACAS-NEXT: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-ZABHA-ZACAS-NEXT: and a3, a0, a1
+; RV32IA-TSO-ZABHA-ZACAS-NEXT: not a3, a3
+; RV32IA-TSO-ZABHA-ZACAS-NEXT: slli a4, a0, 24
+; RV32IA-TSO-ZABHA-ZACAS-NEXT: amocas.b a0, a3, (a2)
+; RV32IA-TSO-ZABHA-ZACAS-NEXT: srai a4, a4, 24
+; RV32IA-TSO-ZABHA-ZACAS-NEXT: bne a0, a4, .LBB33_1
+; RV32IA-TSO-ZABHA-ZACAS-NEXT: # %bb.2: # %atomicrmw.end
+; RV32IA-TSO-ZABHA-ZACAS-NEXT: ret
+;
; RV64IA-WMO-ZABHA-ZACAS-LABEL: atomicrmw_nand_i8_acq_rel:
; RV64IA-WMO-ZABHA-ZACAS: # %bb.0:
; RV64IA-WMO-ZABHA-ZACAS-NEXT: mv a2, a0
@@ -4314,26 +5873,26 @@ define i8 @atomicrmw_nand_i8_seq_cst(ptr %a, i8 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
-; RV32IA-LABEL: atomicrmw_nand_i8_seq_cst:
-; RV32IA: # %bb.0:
-; RV32IA-NEXT: andi a2, a0, -4
-; RV32IA-NEXT: slli a0, a0, 3
-; RV32IA-NEXT: li a3, 255
-; RV32IA-NEXT: zext.b a1, a1
-; RV32IA-NEXT: sll a3, a3, a0
-; RV32IA-NEXT: sll a1, a1, a0
-; RV32IA-NEXT: .LBB34_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-NEXT: lr.w.aqrl a4, (a2)
-; RV32IA-NEXT: and a5, a4, a1
-; RV32IA-NEXT: not a5, a5
-; RV32IA-NEXT: xor a5, a4, a5
-; RV32IA-NEXT: and a5, a5, a3
-; RV32IA-NEXT: xor a5, a4, a5
-; RV32IA-NEXT: sc.w.rl a5, a5, (a2)
-; RV32IA-NEXT: bnez a5, .LBB34_1
-; RV32IA-NEXT: # %bb.2:
-; RV32IA-NEXT: srl a0, a4, a0
-; RV32IA-NEXT: ret
+; RV32IA-NOZACAS-LABEL: atomicrmw_nand_i8_seq_cst:
+; RV32IA-NOZACAS: # %bb.0:
+; RV32IA-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-NOZACAS-NEXT: li a3, 255
+; RV32IA-NOZACAS-NEXT: zext.b a1, a1
+; RV32IA-NOZACAS-NEXT: sll a3, a3, a0
+; RV32IA-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-NOZACAS-NEXT: .LBB34_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-NOZACAS-NEXT: lr.w.aqrl a4, (a2)
+; RV32IA-NOZACAS-NEXT: and a5, a4, a1
+; RV32IA-NOZACAS-NEXT: not a5, a5
+; RV32IA-NOZACAS-NEXT: xor a5, a4, a5
+; RV32IA-NOZACAS-NEXT: and a5, a5, a3
+; RV32IA-NOZACAS-NEXT: xor a5, a4, a5
+; RV32IA-NOZACAS-NEXT: sc.w.rl a5, a5, (a2)
+; RV32IA-NOZACAS-NEXT: bnez a5, .LBB34_1
+; RV32IA-NOZACAS-NEXT: # %bb.2:
+; RV32IA-NOZACAS-NEXT: srl a0, a4, a0
+; RV32IA-NOZACAS-NEXT: ret
;
; RV64I-LABEL: atomicrmw_nand_i8_seq_cst:
; RV64I: # %bb.0:
@@ -4366,6 +5925,27 @@ define i8 @atomicrmw_nand_i8_seq_cst(ptr %a, i8 %b) nounwind {
; RV64IA-NOZACAS-NEXT: srlw a0, a4, a0
; RV64IA-NOZACAS-NEXT: ret
;
+; RV32IA-ZACAS-LABEL: atomicrmw_nand_i8_seq_cst:
+; RV32IA-ZACAS: # %bb.0:
+; RV32IA-ZACAS-NEXT: andi a2, a0, -4
+; RV32IA-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-ZACAS-NEXT: li a3, 255
+; RV32IA-ZACAS-NEXT: zext.b a1, a1
+; RV32IA-ZACAS-NEXT: sll a3, a3, a0
+; RV32IA-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-ZACAS-NEXT: .LBB34_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-ZACAS-NEXT: lr.w.aqrl a4, (a2)
+; RV32IA-ZACAS-NEXT: and a5, a4, a1
+; RV32IA-ZACAS-NEXT: not a5, a5
+; RV32IA-ZACAS-NEXT: xor a5, a4, a5
+; RV32IA-ZACAS-NEXT: and a5, a5, a3
+; RV32IA-ZACAS-NEXT: xor a5, a4, a5
+; RV32IA-ZACAS-NEXT: sc.w.rl a5, a5, (a2)
+; RV32IA-ZACAS-NEXT: bnez a5, .LBB34_1
+; RV32IA-ZACAS-NEXT: # %bb.2:
+; RV32IA-ZACAS-NEXT: srl a0, a4, a0
+; RV32IA-ZACAS-NEXT: ret
+;
; RV64IA-ZACAS-LABEL: atomicrmw_nand_i8_seq_cst:
; RV64IA-ZACAS: # %bb.0:
; RV64IA-ZACAS-NEXT: andi a2, a0, -4
@@ -4387,6 +5967,48 @@ define i8 @atomicrmw_nand_i8_seq_cst(ptr %a, i8 %b) nounwind {
; RV64IA-ZACAS-NEXT: srlw a0, a4, a0
; RV64IA-ZACAS-NEXT: ret
;
+; RV32IA-WMO-ZABHA-NOZACAS-LABEL: atomicrmw_nand_i8_seq_cst:
+; RV32IA-WMO-ZABHA-NOZACAS: # %bb.0:
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: li a3, 255
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: zext.b a1, a1
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: sll a3, a3, a0
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: .LBB34_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: lr.w.aqrl a4, (a2)
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: and a5, a4, a1
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: not a5, a5
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: xor a5, a4, a5
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: and a5, a5, a3
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: xor a5, a4, a5
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: sc.w.rl a5, a5, (a2)
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: bnez a5, .LBB34_1
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: # %bb.2:
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: srl a0, a4, a0
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: ret
+;
+; RV32IA-TSO-ZABHA-NOZACAS-LABEL: atomicrmw_nand_i8_seq_cst:
+; RV32IA-TSO-ZABHA-NOZACAS: # %bb.0:
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: li a3, 255
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: zext.b a1, a1
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: sll a3, a3, a0
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: .LBB34_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: lr.w.aqrl a4, (a2)
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: and a5, a4, a1
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: not a5, a5
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: xor a5, a4, a5
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: and a5, a5, a3
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: xor a5, a4, a5
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: sc.w.rl a5, a5, (a2)
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: bnez a5, .LBB34_1
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: # %bb.2:
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: srl a0, a4, a0
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: ret
+;
; RV64IA-WMO-ZABHA-NOZACAS-LABEL: atomicrmw_nand_i8_seq_cst:
; RV64IA-WMO-ZABHA-NOZACAS: # %bb.0:
; RV64IA-WMO-ZABHA-NOZACAS-NEXT: andi a2, a0, -4
@@ -4429,6 +6051,38 @@ define i8 @atomicrmw_nand_i8_seq_cst(ptr %a, i8 %b) nounwind {
; RV64IA-TSO-ZABHA-NOZACAS-NEXT: srlw a0, a4, a0
; RV64IA-TSO-ZABHA-NOZACAS-NEXT: ret
;
+; RV32IA-WMO-ZABHA-ZACAS-LABEL: atomicrmw_nand_i8_seq_cst:
+; RV32IA-WMO-ZABHA-ZACAS: # %bb.0:
+; RV32IA-WMO-ZABHA-ZACAS-NEXT: mv a2, a0
+; RV32IA-WMO-ZABHA-ZACAS-NEXT: lbu a0, 0(a0)
+; RV32IA-WMO-ZABHA-ZACAS-NEXT: .LBB34_1: # %atomicrmw.start
+; RV32IA-WMO-ZABHA-ZACAS-NEXT: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-ZABHA-ZACAS-NEXT: and a3, a0, a1
+; RV32IA-WMO-ZABHA-ZACAS-NEXT: fence rw, rw
+; RV32IA-WMO-ZABHA-ZACAS-NEXT: not a3, a3
+; RV32IA-WMO-ZABHA-ZACAS-NEXT: slli a4, a0, 24
+; RV32IA-WMO-ZABHA-ZACAS-NEXT: amocas.b.aqrl a0, a3, (a2)
+; RV32IA-WMO-ZABHA-ZACAS-NEXT: srai a4, a4, 24
+; RV32IA-WMO-ZABHA-ZACAS-NEXT: bne a0, a4, .LBB34_1
+; RV32IA-WMO-ZABHA-ZACAS-NEXT: # %bb.2: # %atomicrmw.end
+; RV32IA-WMO-ZABHA-ZACAS-NEXT: ret
+;
+; RV32IA-TSO-ZABHA-ZACAS-LABEL: atomicrmw_nand_i8_seq_cst:
+; RV32IA-TSO-ZABHA-ZACAS: # %bb.0:
+; RV32IA-TSO-ZABHA-ZACAS-NEXT: mv a2, a0
+; RV32IA-TSO-ZABHA-ZACAS-NEXT: lbu a0, 0(a0)
+; RV32IA-TSO-ZABHA-ZACAS-NEXT: .LBB34_1: # %atomicrmw.start
+; RV32IA-TSO-ZABHA-ZACAS-NEXT: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-ZABHA-ZACAS-NEXT: and a3, a0, a1
+; RV32IA-TSO-ZABHA-ZACAS-NEXT: fence rw, rw
+; RV32IA-TSO-ZABHA-ZACAS-NEXT: not a3, a3
+; RV32IA-TSO-ZABHA-ZACAS-NEXT: slli a4, a0, 24
+; RV32IA-TSO-ZABHA-ZACAS-NEXT: amocas.b a0, a3, (a2)
+; RV32IA-TSO-ZABHA-ZACAS-NEXT: srai a4, a4, 24
+; RV32IA-TSO-ZABHA-ZACAS-NEXT: bne a0, a4, .LBB34_1
+; RV32IA-TSO-ZABHA-ZACAS-NEXT: # %bb.2: # %atomicrmw.end
+; RV32IA-TSO-ZABHA-ZACAS-NEXT: ret
+;
; RV64IA-WMO-ZABHA-ZACAS-LABEL: atomicrmw_nand_i8_seq_cst:
; RV64IA-WMO-ZABHA-ZACAS: # %bb.0:
; RV64IA-WMO-ZABHA-ZACAS-NEXT: mv a2, a0
@@ -4475,15 +6129,15 @@ define i8 @atomicrmw_or_i8_monotonic(ptr %a, i8 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
-; RV32IA-LABEL: atomicrmw_or_i8_monotonic:
-; RV32IA: # %bb.0:
-; RV32IA-NEXT: andi a2, a0, -4
-; RV32IA-NEXT: slli a0, a0, 3
-; RV32IA-NEXT: zext.b a1, a1
-; RV32IA-NEXT: sll a1, a1, a0
-; RV32IA-NEXT: amoor.w a1, a1, (a2)
-; RV32IA-NEXT: srl a0, a1, a0
-; RV32IA-NEXT: ret
+; RV32IA-NOZACAS-LABEL: atomicrmw_or_i8_monotonic:
+; RV32IA-NOZACAS: # %bb.0:
+; RV32IA-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-NOZACAS-NEXT: zext.b a1, a1
+; RV32IA-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-NOZACAS-NEXT: amoor.w a1, a1, (a2)
+; RV32IA-NOZACAS-NEXT: srl a0, a1, a0
+; RV32IA-NOZACAS-NEXT: ret
;
; RV64I-LABEL: atomicrmw_or_i8_monotonic:
; RV64I: # %bb.0:
@@ -4505,6 +6159,16 @@ define i8 @atomicrmw_or_i8_monotonic(ptr %a, i8 %b) nounwind {
; RV64IA-NOZACAS-NEXT: srlw a0, a1, a0
; RV64IA-NOZACAS-NEXT: ret
;
+; RV32IA-ZACAS-LABEL: atomicrmw_or_i8_monotonic:
+; RV32IA-ZACAS: # %bb.0:
+; RV32IA-ZACAS-NEXT: andi a2, a0, -4
+; RV32IA-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-ZACAS-NEXT: zext.b a1, a1
+; RV32IA-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-ZACAS-NEXT: amoor.w a1, a1, (a2)
+; RV32IA-ZACAS-NEXT: srl a0, a1, a0
+; RV32IA-ZACAS-NEXT: ret
+;
; RV64IA-ZACAS-LABEL: atomicrmw_or_i8_monotonic:
; RV64IA-ZACAS: # %bb.0:
; RV64IA-ZACAS-NEXT: andi a2, a0, -4
@@ -4515,6 +6179,16 @@ define i8 @atomicrmw_or_i8_monotonic(ptr %a, i8 %b) nounwind {
; RV64IA-ZACAS-NEXT: srlw a0, a1, a0
; RV64IA-ZACAS-NEXT: ret
;
+; RV32IA-WMO-ZABHA-LABEL: atomicrmw_or_i8_monotonic:
+; RV32IA-WMO-ZABHA: # %bb.0:
+; RV32IA-WMO-ZABHA-NEXT: amoor.b a0, a1, (a0)
+; RV32IA-WMO-ZABHA-NEXT: ret
+;
+; RV32IA-TSO-ZABHA-LABEL: atomicrmw_or_i8_monotonic:
+; RV32IA-TSO-ZABHA: # %bb.0:
+; RV32IA-TSO-ZABHA-NEXT: amoor.b a0, a1, (a0)
+; RV32IA-TSO-ZABHA-NEXT: ret
+;
; RV64IA-WMO-ZABHA-LABEL: atomicrmw_or_i8_monotonic:
; RV64IA-WMO-ZABHA: # %bb.0:
; RV64IA-WMO-ZABHA-NEXT: amoor.b a0, a1, (a0)
@@ -4539,25 +6213,25 @@ define i8 @atomicrmw_or_i8_acquire(ptr %a, i8 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
-; RV32IA-WMO-LABEL: atomicrmw_or_i8_acquire:
-; RV32IA-WMO: # %bb.0:
-; RV32IA-WMO-NEXT: andi a2, a0, -4
-; RV32IA-WMO-NEXT: slli a0, a0, 3
-; RV32IA-WMO-NEXT: zext.b a1, a1
-; RV32IA-WMO-NEXT: sll a1, a1, a0
-; RV32IA-WMO-NEXT: amoor.w.aq a1, a1, (a2)
-; RV32IA-WMO-NEXT: srl a0, a1, a0
-; RV32IA-WMO-NEXT: ret
+; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_or_i8_acquire:
+; RV32IA-WMO-NOZACAS: # %bb.0:
+; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-WMO-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-NOZACAS-NEXT: zext.b a1, a1
+; RV32IA-WMO-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-WMO-NOZACAS-NEXT: amoor.w.aq a1, a1, (a2)
+; RV32IA-WMO-NOZACAS-NEXT: srl a0, a1, a0
+; RV32IA-WMO-NOZACAS-NEXT: ret
;
-; RV32IA-TSO-LABEL: atomicrmw_or_i8_acquire:
-; RV32IA-TSO: # %bb.0:
-; RV32IA-TSO-NEXT: andi a2, a0, -4
-; RV32IA-TSO-NEXT: slli a0, a0, 3
-; RV32IA-TSO-NEXT: zext.b a1, a1
-; RV32IA-TSO-NEXT: sll a1, a1, a0
-; RV32IA-TSO-NEXT: amoor.w a1, a1, (a2)
-; RV32IA-TSO-NEXT: srl a0, a1, a0
-; RV32IA-TSO-NEXT: ret
+; RV32IA-TSO-NOZACAS-LABEL: atomicrmw_or_i8_acquire:
+; RV32IA-TSO-NOZACAS: # %bb.0:
+; RV32IA-TSO-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-TSO-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-NOZACAS-NEXT: zext.b a1, a1
+; RV32IA-TSO-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-TSO-NOZACAS-NEXT: amoor.w a1, a1, (a2)
+; RV32IA-TSO-NOZACAS-NEXT: srl a0, a1, a0
+; RV32IA-TSO-NOZACAS-NEXT: ret
;
; RV64I-LABEL: atomicrmw_or_i8_acquire:
; RV64I: # %bb.0:
@@ -4589,6 +6263,26 @@ define i8 @atomicrmw_or_i8_acquire(ptr %a, i8 %b) nounwind {
; RV64IA-TSO-NOZACAS-NEXT: srlw a0, a1, a0
; RV64IA-TSO-NOZACAS-NEXT: ret
;
+; RV32IA-WMO-ZACAS-LABEL: atomicrmw_or_i8_acquire:
+; RV32IA-WMO-ZACAS: # %bb.0:
+; RV32IA-WMO-ZACAS-NEXT: andi a2, a0, -4
+; RV32IA-WMO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-ZACAS-NEXT: zext.b a1, a1
+; RV32IA-WMO-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-WMO-ZACAS-NEXT: amoor.w.aq a1, a1, (a2)
+; RV32IA-WMO-ZACAS-NEXT: srl a0, a1, a0
+; RV32IA-WMO-ZACAS-NEXT: ret
+;
+; RV32IA-TSO-ZACAS-LABEL: atomicrmw_or_i8_acquire:
+; RV32IA-TSO-ZACAS: # %bb.0:
+; RV32IA-TSO-ZACAS-NEXT: andi a2, a0, -4
+; RV32IA-TSO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-ZACAS-NEXT: zext.b a1, a1
+; RV32IA-TSO-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-TSO-ZACAS-NEXT: amoor.w a1, a1, (a2)
+; RV32IA-TSO-ZACAS-NEXT: srl a0, a1, a0
+; RV32IA-TSO-ZACAS-NEXT: ret
+;
; RV64IA-WMO-ZACAS-LABEL: atomicrmw_or_i8_acquire:
; RV64IA-WMO-ZACAS: # %bb.0:
; RV64IA-WMO-ZACAS-NEXT: andi a2, a0, -4
@@ -4609,6 +6303,16 @@ define i8 @atomicrmw_or_i8_acquire(ptr %a, i8 %b) nounwind {
; RV64IA-TSO-ZACAS-NEXT: srlw a0, a1, a0
; RV64IA-TSO-ZACAS-NEXT: ret
;
+; RV32IA-WMO-ZABHA-LABEL: atomicrmw_or_i8_acquire:
+; RV32IA-WMO-ZABHA: # %bb.0:
+; RV32IA-WMO-ZABHA-NEXT: amoor.b.aq a0, a1, (a0)
+; RV32IA-WMO-ZABHA-NEXT: ret
+;
+; RV32IA-TSO-ZABHA-LABEL: atomicrmw_or_i8_acquire:
+; RV32IA-TSO-ZABHA: # %bb.0:
+; RV32IA-TSO-ZABHA-NEXT: amoor.b a0, a1, (a0)
+; RV32IA-TSO-ZABHA-NEXT: ret
+;
; RV64IA-WMO-ZABHA-LABEL: atomicrmw_or_i8_acquire:
; RV64IA-WMO-ZABHA: # %bb.0:
; RV64IA-WMO-ZABHA-NEXT: amoor.b.aq a0, a1, (a0)
@@ -4633,25 +6337,25 @@ define i8 @atomicrmw_or_i8_release(ptr %a, i8 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
-; RV32IA-WMO-LABEL: atomicrmw_or_i8_release:
-; RV32IA-WMO: # %bb.0:
-; RV32IA-WMO-NEXT: andi a2, a0, -4
-; RV32IA-WMO-NEXT: slli a0, a0, 3
-; RV32IA-WMO-NEXT: zext.b a1, a1
-; RV32IA-WMO-NEXT: sll a1, a1, a0
-; RV32IA-WMO-NEXT: amoor.w.rl a1, a1, (a2)
-; RV32IA-WMO-NEXT: srl a0, a1, a0
-; RV32IA-WMO-NEXT: ret
+; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_or_i8_release:
+; RV32IA-WMO-NOZACAS: # %bb.0:
+; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-WMO-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-NOZACAS-NEXT: zext.b a1, a1
+; RV32IA-WMO-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-WMO-NOZACAS-NEXT: amoor.w.rl a1, a1, (a2)
+; RV32IA-WMO-NOZACAS-NEXT: srl a0, a1, a0
+; RV32IA-WMO-NOZACAS-NEXT: ret
;
-; RV32IA-TSO-LABEL: atomicrmw_or_i8_release:
-; RV32IA-TSO: # %bb.0:
-; RV32IA-TSO-NEXT: andi a2, a0, -4
-; RV32IA-TSO-NEXT: slli a0, a0, 3
-; RV32IA-TSO-NEXT: zext.b a1, a1
-; RV32IA-TSO-NEXT: sll a1, a1, a0
-; RV32IA-TSO-NEXT: amoor.w a1, a1, (a2)
-; RV32IA-TSO-NEXT: srl a0, a1, a0
-; RV32IA-TSO-NEXT: ret
+; RV32IA-TSO-NOZACAS-LABEL: atomicrmw_or_i8_release:
+; RV32IA-TSO-NOZACAS: # %bb.0:
+; RV32IA-TSO-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-TSO-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-NOZACAS-NEXT: zext.b a1, a1
+; RV32IA-TSO-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-TSO-NOZACAS-NEXT: amoor.w a1, a1, (a2)
+; RV32IA-TSO-NOZACAS-NEXT: srl a0, a1, a0
+; RV32IA-TSO-NOZACAS-NEXT: ret
;
; RV64I-LABEL: atomicrmw_or_i8_release:
; RV64I: # %bb.0:
@@ -4683,6 +6387,26 @@ define i8 @atomicrmw_or_i8_release(ptr %a, i8 %b) nounwind {
; RV64IA-TSO-NOZACAS-NEXT: srlw a0, a1, a0
; RV64IA-TSO-NOZACAS-NEXT: ret
;
+; RV32IA-WMO-ZACAS-LABEL: atomicrmw_or_i8_release:
+; RV32IA-WMO-ZACAS: # %bb.0:
+; RV32IA-WMO-ZACAS-NEXT: andi a2, a0, -4
+; RV32IA-WMO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-ZACAS-NEXT: zext.b a1, a1
+; RV32IA-WMO-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-WMO-ZACAS-NEXT: amoor.w.rl a1, a1, (a2)
+; RV32IA-WMO-ZACAS-NEXT: srl a0, a1, a0
+; RV32IA-WMO-ZACAS-NEXT: ret
+;
+; RV32IA-TSO-ZACAS-LABEL: atomicrmw_or_i8_release:
+; RV32IA-TSO-ZACAS: # %bb.0:
+; RV32IA-TSO-ZACAS-NEXT: andi a2, a0, -4
+; RV32IA-TSO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-ZACAS-NEXT: zext.b a1, a1
+; RV32IA-TSO-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-TSO-ZACAS-NEXT: amoor.w a1, a1, (a2)
+; RV32IA-TSO-ZACAS-NEXT: srl a0, a1, a0
+; RV32IA-TSO-ZACAS-NEXT: ret
+;
; RV64IA-WMO-ZACAS-LABEL: atomicrmw_or_i8_release:
; RV64IA-WMO-ZACAS: # %bb.0:
; RV64IA-WMO-ZACAS-NEXT: andi a2, a0, -4
@@ -4703,6 +6427,16 @@ define i8 @atomicrmw_or_i8_release(ptr %a, i8 %b) nounwind {
; RV64IA-TSO-ZACAS-NEXT: srlw a0, a1, a0
; RV64IA-TSO-ZACAS-NEXT: ret
;
+; RV32IA-WMO-ZABHA-LABEL: atomicrmw_or_i8_release:
+; RV32IA-WMO-ZABHA: # %bb.0:
+; RV32IA-WMO-ZABHA-NEXT: amoor.b.rl a0, a1, (a0)
+; RV32IA-WMO-ZABHA-NEXT: ret
+;
+; RV32IA-TSO-ZABHA-LABEL: atomicrmw_or_i8_release:
+; RV32IA-TSO-ZABHA: # %bb.0:
+; RV32IA-TSO-ZABHA-NEXT: amoor.b a0, a1, (a0)
+; RV32IA-TSO-ZABHA-NEXT: ret
+;
; RV64IA-WMO-ZABHA-LABEL: atomicrmw_or_i8_release:
; RV64IA-WMO-ZABHA: # %bb.0:
; RV64IA-WMO-ZABHA-NEXT: amoor.b.rl a0, a1, (a0)
@@ -4727,25 +6461,25 @@ define i8 @atomicrmw_or_i8_acq_rel(ptr %a, i8 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
-; RV32IA-WMO-LABEL: atomicrmw_or_i8_acq_rel:
-; RV32IA-WMO: # %bb.0:
-; RV32IA-WMO-NEXT: andi a2, a0, -4
-; RV32IA-WMO-NEXT: slli a0, a0, 3
-; RV32IA-WMO-NEXT: zext.b a1, a1
-; RV32IA-WMO-NEXT: sll a1, a1, a0
-; RV32IA-WMO-NEXT: amoor.w.aqrl a1, a1, (a2)
-; RV32IA-WMO-NEXT: srl a0, a1, a0
-; RV32IA-WMO-NEXT: ret
+; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_or_i8_acq_rel:
+; RV32IA-WMO-NOZACAS: # %bb.0:
+; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-WMO-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-NOZACAS-NEXT: zext.b a1, a1
+; RV32IA-WMO-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-WMO-NOZACAS-NEXT: amoor.w.aqrl a1, a1, (a2)
+; RV32IA-WMO-NOZACAS-NEXT: srl a0, a1, a0
+; RV32IA-WMO-NOZACAS-NEXT: ret
;
-; RV32IA-TSO-LABEL: atomicrmw_or_i8_acq_rel:
-; RV32IA-TSO: # %bb.0:
-; RV32IA-TSO-NEXT: andi a2, a0, -4
-; RV32IA-TSO-NEXT: slli a0, a0, 3
-; RV32IA-TSO-NEXT: zext.b a1, a1
-; RV32IA-TSO-NEXT: sll a1, a1, a0
-; RV32IA-TSO-NEXT: amoor.w a1, a1, (a2)
-; RV32IA-TSO-NEXT: srl a0, a1, a0
-; RV32IA-TSO-NEXT: ret
+; RV32IA-TSO-NOZACAS-LABEL: atomicrmw_or_i8_acq_rel:
+; RV32IA-TSO-NOZACAS: # %bb.0:
+; RV32IA-TSO-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-TSO-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-NOZACAS-NEXT: zext.b a1, a1
+; RV32IA-TSO-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-TSO-NOZACAS-NEXT: amoor.w a1, a1, (a2)
+; RV32IA-TSO-NOZACAS-NEXT: srl a0, a1, a0
+; RV32IA-TSO-NOZACAS-NEXT: ret
;
; RV64I-LABEL: atomicrmw_or_i8_acq_rel:
; RV64I: # %bb.0:
@@ -4777,6 +6511,26 @@ define i8 @atomicrmw_or_i8_acq_rel(ptr %a, i8 %b) nounwind {
; RV64IA-TSO-NOZACAS-NEXT: srlw a0, a1, a0
; RV64IA-TSO-NOZACAS-NEXT: ret
;
+; RV32IA-WMO-ZACAS-LABEL: atomicrmw_or_i8_acq_rel:
+; RV32IA-WMO-ZACAS: # %bb.0:
+; RV32IA-WMO-ZACAS-NEXT: andi a2, a0, -4
+; RV32IA-WMO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-ZACAS-NEXT: zext.b a1, a1
+; RV32IA-WMO-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-WMO-ZACAS-NEXT: amoor.w.aqrl a1, a1, (a2)
+; RV32IA-WMO-ZACAS-NEXT: srl a0, a1, a0
+; RV32IA-WMO-ZACAS-NEXT: ret
+;
+; RV32IA-TSO-ZACAS-LABEL: atomicrmw_or_i8_acq_rel:
+; RV32IA-TSO-ZACAS: # %bb.0:
+; RV32IA-TSO-ZACAS-NEXT: andi a2, a0, -4
+; RV32IA-TSO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-ZACAS-NEXT: zext.b a1, a1
+; RV32IA-TSO-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-TSO-ZACAS-NEXT: amoor.w a1, a1, (a2)
+; RV32IA-TSO-ZACAS-NEXT: srl a0, a1, a0
+; RV32IA-TSO-ZACAS-NEXT: ret
+;
; RV64IA-WMO-ZACAS-LABEL: atomicrmw_or_i8_acq_rel:
; RV64IA-WMO-ZACAS: # %bb.0:
; RV64IA-WMO-ZACAS-NEXT: andi a2, a0, -4
@@ -4797,6 +6551,16 @@ define i8 @atomicrmw_or_i8_acq_rel(ptr %a, i8 %b) nounwind {
; RV64IA-TSO-ZACAS-NEXT: srlw a0, a1, a0
; RV64IA-TSO-ZACAS-NEXT: ret
;
+; RV32IA-WMO-ZABHA-LABEL: atomicrmw_or_i8_acq_rel:
+; RV32IA-WMO-ZABHA: # %bb.0:
+; RV32IA-WMO-ZABHA-NEXT: amoor.b.aqrl a0, a1, (a0)
+; RV32IA-WMO-ZABHA-NEXT: ret
+;
+; RV32IA-TSO-ZABHA-LABEL: atomicrmw_or_i8_acq_rel:
+; RV32IA-TSO-ZABHA: # %bb.0:
+; RV32IA-TSO-ZABHA-NEXT: amoor.b a0, a1, (a0)
+; RV32IA-TSO-ZABHA-NEXT: ret
+;
; RV64IA-WMO-ZABHA-LABEL: atomicrmw_or_i8_acq_rel:
; RV64IA-WMO-ZABHA: # %bb.0:
; RV64IA-WMO-ZABHA-NEXT: amoor.b.aqrl a0, a1, (a0)
@@ -4821,25 +6585,25 @@ define i8 @atomicrmw_or_i8_seq_cst(ptr %a, i8 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
-; RV32IA-WMO-LABEL: atomicrmw_or_i8_seq_cst:
-; RV32IA-WMO: # %bb.0:
-; RV32IA-WMO-NEXT: andi a2, a0, -4
-; RV32IA-WMO-NEXT: slli a0, a0, 3
-; RV32IA-WMO-NEXT: zext.b a1, a1
-; RV32IA-WMO-NEXT: sll a1, a1, a0
-; RV32IA-WMO-NEXT: amoor.w.aqrl a1, a1, (a2)
-; RV32IA-WMO-NEXT: srl a0, a1, a0
-; RV32IA-WMO-NEXT: ret
+; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_or_i8_seq_cst:
+; RV32IA-WMO-NOZACAS: # %bb.0:
+; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-WMO-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-NOZACAS-NEXT: zext.b a1, a1
+; RV32IA-WMO-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-WMO-NOZACAS-NEXT: amoor.w.aqrl a1, a1, (a2)
+; RV32IA-WMO-NOZACAS-NEXT: srl a0, a1, a0
+; RV32IA-WMO-NOZACAS-NEXT: ret
;
-; RV32IA-TSO-LABEL: atomicrmw_or_i8_seq_cst:
-; RV32IA-TSO: # %bb.0:
-; RV32IA-TSO-NEXT: andi a2, a0, -4
-; RV32IA-TSO-NEXT: slli a0, a0, 3
-; RV32IA-TSO-NEXT: zext.b a1, a1
-; RV32IA-TSO-NEXT: sll a1, a1, a0
-; RV32IA-TSO-NEXT: amoor.w a1, a1, (a2)
-; RV32IA-TSO-NEXT: srl a0, a1, a0
-; RV32IA-TSO-NEXT: ret
+; RV32IA-TSO-NOZACAS-LABEL: atomicrmw_or_i8_seq_cst:
+; RV32IA-TSO-NOZACAS: # %bb.0:
+; RV32IA-TSO-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-TSO-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-NOZACAS-NEXT: zext.b a1, a1
+; RV32IA-TSO-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-TSO-NOZACAS-NEXT: amoor.w a1, a1, (a2)
+; RV32IA-TSO-NOZACAS-NEXT: srl a0, a1, a0
+; RV32IA-TSO-NOZACAS-NEXT: ret
;
; RV64I-LABEL: atomicrmw_or_i8_seq_cst:
; RV64I: # %bb.0:
@@ -4871,6 +6635,26 @@ define i8 @atomicrmw_or_i8_seq_cst(ptr %a, i8 %b) nounwind {
; RV64IA-TSO-NOZACAS-NEXT: srlw a0, a1, a0
; RV64IA-TSO-NOZACAS-NEXT: ret
;
+; RV32IA-WMO-ZACAS-LABEL: atomicrmw_or_i8_seq_cst:
+; RV32IA-WMO-ZACAS: # %bb.0:
+; RV32IA-WMO-ZACAS-NEXT: andi a2, a0, -4
+; RV32IA-WMO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-ZACAS-NEXT: zext.b a1, a1
+; RV32IA-WMO-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-WMO-ZACAS-NEXT: amoor.w.aqrl a1, a1, (a2)
+; RV32IA-WMO-ZACAS-NEXT: srl a0, a1, a0
+; RV32IA-WMO-ZACAS-NEXT: ret
+;
+; RV32IA-TSO-ZACAS-LABEL: atomicrmw_or_i8_seq_cst:
+; RV32IA-TSO-ZACAS: # %bb.0:
+; RV32IA-TSO-ZACAS-NEXT: andi a2, a0, -4
+; RV32IA-TSO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-ZACAS-NEXT: zext.b a1, a1
+; RV32IA-TSO-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-TSO-ZACAS-NEXT: amoor.w a1, a1, (a2)
+; RV32IA-TSO-ZACAS-NEXT: srl a0, a1, a0
+; RV32IA-TSO-ZACAS-NEXT: ret
+;
; RV64IA-WMO-ZACAS-LABEL: atomicrmw_or_i8_seq_cst:
; RV64IA-WMO-ZACAS: # %bb.0:
; RV64IA-WMO-ZACAS-NEXT: andi a2, a0, -4
@@ -4891,6 +6675,16 @@ define i8 @atomicrmw_or_i8_seq_cst(ptr %a, i8 %b) nounwind {
; RV64IA-TSO-ZACAS-NEXT: srlw a0, a1, a0
; RV64IA-TSO-ZACAS-NEXT: ret
;
+; RV32IA-WMO-ZABHA-LABEL: atomicrmw_or_i8_seq_cst:
+; RV32IA-WMO-ZABHA: # %bb.0:
+; RV32IA-WMO-ZABHA-NEXT: amoor.b.aqrl a0, a1, (a0)
+; RV32IA-WMO-ZABHA-NEXT: ret
+;
+; RV32IA-TSO-ZABHA-LABEL: atomicrmw_or_i8_seq_cst:
+; RV32IA-TSO-ZABHA: # %bb.0:
+; RV32IA-TSO-ZABHA-NEXT: amoor.b a0, a1, (a0)
+; RV32IA-TSO-ZABHA-NEXT: ret
+;
; RV64IA-WMO-ZABHA-LABEL: atomicrmw_or_i8_seq_cst:
; RV64IA-WMO-ZABHA: # %bb.0:
; RV64IA-WMO-ZABHA-NEXT: amoor.b.aqrl a0, a1, (a0)
@@ -4915,15 +6709,15 @@ define i8 @atomicrmw_xor_i8_monotonic(ptr %a, i8 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
-; RV32IA-LABEL: atomicrmw_xor_i8_monotonic:
-; RV32IA: # %bb.0:
-; RV32IA-NEXT: andi a2, a0, -4
-; RV32IA-NEXT: slli a0, a0, 3
-; RV32IA-NEXT: zext.b a1, a1
-; RV32IA-NEXT: sll a1, a1, a0
-; RV32IA-NEXT: amoxor.w a1, a1, (a2)
-; RV32IA-NEXT: srl a0, a1, a0
-; RV32IA-NEXT: ret
+; RV32IA-NOZACAS-LABEL: atomicrmw_xor_i8_monotonic:
+; RV32IA-NOZACAS: # %bb.0:
+; RV32IA-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-NOZACAS-NEXT: zext.b a1, a1
+; RV32IA-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-NOZACAS-NEXT: amoxor.w a1, a1, (a2)
+; RV32IA-NOZACAS-NEXT: srl a0, a1, a0
+; RV32IA-NOZACAS-NEXT: ret
;
; RV64I-LABEL: atomicrmw_xor_i8_monotonic:
; RV64I: # %bb.0:
@@ -4945,6 +6739,16 @@ define i8 @atomicrmw_xor_i8_monotonic(ptr %a, i8 %b) nounwind {
; RV64IA-NOZACAS-NEXT: srlw a0, a1, a0
; RV64IA-NOZACAS-NEXT: ret
;
+; RV32IA-ZACAS-LABEL: atomicrmw_xor_i8_monotonic:
+; RV32IA-ZACAS: # %bb.0:
+; RV32IA-ZACAS-NEXT: andi a2, a0, -4
+; RV32IA-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-ZACAS-NEXT: zext.b a1, a1
+; RV32IA-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-ZACAS-NEXT: amoxor.w a1, a1, (a2)
+; RV32IA-ZACAS-NEXT: srl a0, a1, a0
+; RV32IA-ZACAS-NEXT: ret
+;
; RV64IA-ZACAS-LABEL: atomicrmw_xor_i8_monotonic:
; RV64IA-ZACAS: # %bb.0:
; RV64IA-ZACAS-NEXT: andi a2, a0, -4
@@ -4955,6 +6759,16 @@ define i8 @atomicrmw_xor_i8_monotonic(ptr %a, i8 %b) nounwind {
; RV64IA-ZACAS-NEXT: srlw a0, a1, a0
; RV64IA-ZACAS-NEXT: ret
;
+; RV32IA-WMO-ZABHA-LABEL: atomicrmw_xor_i8_monotonic:
+; RV32IA-WMO-ZABHA: # %bb.0:
+; RV32IA-WMO-ZABHA-NEXT: amoxor.b a0, a1, (a0)
+; RV32IA-WMO-ZABHA-NEXT: ret
+;
+; RV32IA-TSO-ZABHA-LABEL: atomicrmw_xor_i8_monotonic:
+; RV32IA-TSO-ZABHA: # %bb.0:
+; RV32IA-TSO-ZABHA-NEXT: amoxor.b a0, a1, (a0)
+; RV32IA-TSO-ZABHA-NEXT: ret
+;
; RV64IA-WMO-ZABHA-LABEL: atomicrmw_xor_i8_monotonic:
; RV64IA-WMO-ZABHA: # %bb.0:
; RV64IA-WMO-ZABHA-NEXT: amoxor.b a0, a1, (a0)
@@ -4979,25 +6793,25 @@ define i8 @atomicrmw_xor_i8_acquire(ptr %a, i8 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
-; RV32IA-WMO-LABEL: atomicrmw_xor_i8_acquire:
-; RV32IA-WMO: # %bb.0:
-; RV32IA-WMO-NEXT: andi a2, a0, -4
-; RV32IA-WMO-NEXT: slli a0, a0, 3
-; RV32IA-WMO-NEXT: zext.b a1, a1
-; RV32IA-WMO-NEXT: sll a1, a1, a0
-; RV32IA-WMO-NEXT: amoxor.w.aq a1, a1, (a2)
-; RV32IA-WMO-NEXT: srl a0, a1, a0
-; RV32IA-WMO-NEXT: ret
+; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_xor_i8_acquire:
+; RV32IA-WMO-NOZACAS: # %bb.0:
+; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-WMO-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-NOZACAS-NEXT: zext.b a1, a1
+; RV32IA-WMO-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-WMO-NOZACAS-NEXT: amoxor.w.aq a1, a1, (a2)
+; RV32IA-WMO-NOZACAS-NEXT: srl a0, a1, a0
+; RV32IA-WMO-NOZACAS-NEXT: ret
;
-; RV32IA-TSO-LABEL: atomicrmw_xor_i8_acquire:
-; RV32IA-TSO: # %bb.0:
-; RV32IA-TSO-NEXT: andi a2, a0, -4
-; RV32IA-TSO-NEXT: slli a0, a0, 3
-; RV32IA-TSO-NEXT: zext.b a1, a1
-; RV32IA-TSO-NEXT: sll a1, a1, a0
-; RV32IA-TSO-NEXT: amoxor.w a1, a1, (a2)
-; RV32IA-TSO-NEXT: srl a0, a1, a0
-; RV32IA-TSO-NEXT: ret
+; RV32IA-TSO-NOZACAS-LABEL: atomicrmw_xor_i8_acquire:
+; RV32IA-TSO-NOZACAS: # %bb.0:
+; RV32IA-TSO-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-TSO-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-NOZACAS-NEXT: zext.b a1, a1
+; RV32IA-TSO-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-TSO-NOZACAS-NEXT: amoxor.w a1, a1, (a2)
+; RV32IA-TSO-NOZACAS-NEXT: srl a0, a1, a0
+; RV32IA-TSO-NOZACAS-NEXT: ret
;
; RV64I-LABEL: atomicrmw_xor_i8_acquire:
; RV64I: # %bb.0:
@@ -5029,6 +6843,26 @@ define i8 @atomicrmw_xor_i8_acquire(ptr %a, i8 %b) nounwind {
; RV64IA-TSO-NOZACAS-NEXT: srlw a0, a1, a0
; RV64IA-TSO-NOZACAS-NEXT: ret
;
+; RV32IA-WMO-ZACAS-LABEL: atomicrmw_xor_i8_acquire:
+; RV32IA-WMO-ZACAS: # %bb.0:
+; RV32IA-WMO-ZACAS-NEXT: andi a2, a0, -4
+; RV32IA-WMO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-ZACAS-NEXT: zext.b a1, a1
+; RV32IA-WMO-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-WMO-ZACAS-NEXT: amoxor.w.aq a1, a1, (a2)
+; RV32IA-WMO-ZACAS-NEXT: srl a0, a1, a0
+; RV32IA-WMO-ZACAS-NEXT: ret
+;
+; RV32IA-TSO-ZACAS-LABEL: atomicrmw_xor_i8_acquire:
+; RV32IA-TSO-ZACAS: # %bb.0:
+; RV32IA-TSO-ZACAS-NEXT: andi a2, a0, -4
+; RV32IA-TSO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-ZACAS-NEXT: zext.b a1, a1
+; RV32IA-TSO-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-TSO-ZACAS-NEXT: amoxor.w a1, a1, (a2)
+; RV32IA-TSO-ZACAS-NEXT: srl a0, a1, a0
+; RV32IA-TSO-ZACAS-NEXT: ret
+;
; RV64IA-WMO-ZACAS-LABEL: atomicrmw_xor_i8_acquire:
; RV64IA-WMO-ZACAS: # %bb.0:
; RV64IA-WMO-ZACAS-NEXT: andi a2, a0, -4
@@ -5049,6 +6883,16 @@ define i8 @atomicrmw_xor_i8_acquire(ptr %a, i8 %b) nounwind {
; RV64IA-TSO-ZACAS-NEXT: srlw a0, a1, a0
; RV64IA-TSO-ZACAS-NEXT: ret
;
+; RV32IA-WMO-ZABHA-LABEL: atomicrmw_xor_i8_acquire:
+; RV32IA-WMO-ZABHA: # %bb.0:
+; RV32IA-WMO-ZABHA-NEXT: amoxor.b.aq a0, a1, (a0)
+; RV32IA-WMO-ZABHA-NEXT: ret
+;
+; RV32IA-TSO-ZABHA-LABEL: atomicrmw_xor_i8_acquire:
+; RV32IA-TSO-ZABHA: # %bb.0:
+; RV32IA-TSO-ZABHA-NEXT: amoxor.b a0, a1, (a0)
+; RV32IA-TSO-ZABHA-NEXT: ret
+;
; RV64IA-WMO-ZABHA-LABEL: atomicrmw_xor_i8_acquire:
; RV64IA-WMO-ZABHA: # %bb.0:
; RV64IA-WMO-ZABHA-NEXT: amoxor.b.aq a0, a1, (a0)
@@ -5073,25 +6917,25 @@ define i8 @atomicrmw_xor_i8_release(ptr %a, i8 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
-; RV32IA-WMO-LABEL: atomicrmw_xor_i8_release:
-; RV32IA-WMO: # %bb.0:
-; RV32IA-WMO-NEXT: andi a2, a0, -4
-; RV32IA-WMO-NEXT: slli a0, a0, 3
-; RV32IA-WMO-NEXT: zext.b a1, a1
-; RV32IA-WMO-NEXT: sll a1, a1, a0
-; RV32IA-WMO-NEXT: amoxor.w.rl a1, a1, (a2)
-; RV32IA-WMO-NEXT: srl a0, a1, a0
-; RV32IA-WMO-NEXT: ret
+; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_xor_i8_release:
+; RV32IA-WMO-NOZACAS: # %bb.0:
+; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-WMO-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-NOZACAS-NEXT: zext.b a1, a1
+; RV32IA-WMO-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-WMO-NOZACAS-NEXT: amoxor.w.rl a1, a1, (a2)
+; RV32IA-WMO-NOZACAS-NEXT: srl a0, a1, a0
+; RV32IA-WMO-NOZACAS-NEXT: ret
;
-; RV32IA-TSO-LABEL: atomicrmw_xor_i8_release:
-; RV32IA-TSO: # %bb.0:
-; RV32IA-TSO-NEXT: andi a2, a0, -4
-; RV32IA-TSO-NEXT: slli a0, a0, 3
-; RV32IA-TSO-NEXT: zext.b a1, a1
-; RV32IA-TSO-NEXT: sll a1, a1, a0
-; RV32IA-TSO-NEXT: amoxor.w a1, a1, (a2)
-; RV32IA-TSO-NEXT: srl a0, a1, a0
-; RV32IA-TSO-NEXT: ret
+; RV32IA-TSO-NOZACAS-LABEL: atomicrmw_xor_i8_release:
+; RV32IA-TSO-NOZACAS: # %bb.0:
+; RV32IA-TSO-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-TSO-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-NOZACAS-NEXT: zext.b a1, a1
+; RV32IA-TSO-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-TSO-NOZACAS-NEXT: amoxor.w a1, a1, (a2)
+; RV32IA-TSO-NOZACAS-NEXT: srl a0, a1, a0
+; RV32IA-TSO-NOZACAS-NEXT: ret
;
; RV64I-LABEL: atomicrmw_xor_i8_release:
; RV64I: # %bb.0:
@@ -5123,6 +6967,26 @@ define i8 @atomicrmw_xor_i8_release(ptr %a, i8 %b) nounwind {
; RV64IA-TSO-NOZACAS-NEXT: srlw a0, a1, a0
; RV64IA-TSO-NOZACAS-NEXT: ret
;
+; RV32IA-WMO-ZACAS-LABEL: atomicrmw_xor_i8_release:
+; RV32IA-WMO-ZACAS: # %bb.0:
+; RV32IA-WMO-ZACAS-NEXT: andi a2, a0, -4
+; RV32IA-WMO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-ZACAS-NEXT: zext.b a1, a1
+; RV32IA-WMO-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-WMO-ZACAS-NEXT: amoxor.w.rl a1, a1, (a2)
+; RV32IA-WMO-ZACAS-NEXT: srl a0, a1, a0
+; RV32IA-WMO-ZACAS-NEXT: ret
+;
+; RV32IA-TSO-ZACAS-LABEL: atomicrmw_xor_i8_release:
+; RV32IA-TSO-ZACAS: # %bb.0:
+; RV32IA-TSO-ZACAS-NEXT: andi a2, a0, -4
+; RV32IA-TSO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-ZACAS-NEXT: zext.b a1, a1
+; RV32IA-TSO-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-TSO-ZACAS-NEXT: amoxor.w a1, a1, (a2)
+; RV32IA-TSO-ZACAS-NEXT: srl a0, a1, a0
+; RV32IA-TSO-ZACAS-NEXT: ret
+;
; RV64IA-WMO-ZACAS-LABEL: atomicrmw_xor_i8_release:
; RV64IA-WMO-ZACAS: # %bb.0:
; RV64IA-WMO-ZACAS-NEXT: andi a2, a0, -4
@@ -5143,6 +7007,16 @@ define i8 @atomicrmw_xor_i8_release(ptr %a, i8 %b) nounwind {
; RV64IA-TSO-ZACAS-NEXT: srlw a0, a1, a0
; RV64IA-TSO-ZACAS-NEXT: ret
;
+; RV32IA-WMO-ZABHA-LABEL: atomicrmw_xor_i8_release:
+; RV32IA-WMO-ZABHA: # %bb.0:
+; RV32IA-WMO-ZABHA-NEXT: amoxor.b.rl a0, a1, (a0)
+; RV32IA-WMO-ZABHA-NEXT: ret
+;
+; RV32IA-TSO-ZABHA-LABEL: atomicrmw_xor_i8_release:
+; RV32IA-TSO-ZABHA: # %bb.0:
+; RV32IA-TSO-ZABHA-NEXT: amoxor.b a0, a1, (a0)
+; RV32IA-TSO-ZABHA-NEXT: ret
+;
; RV64IA-WMO-ZABHA-LABEL: atomicrmw_xor_i8_release:
; RV64IA-WMO-ZABHA: # %bb.0:
; RV64IA-WMO-ZABHA-NEXT: amoxor.b.rl a0, a1, (a0)
@@ -5167,25 +7041,25 @@ define i8 @atomicrmw_xor_i8_acq_rel(ptr %a, i8 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
-; RV32IA-WMO-LABEL: atomicrmw_xor_i8_acq_rel:
-; RV32IA-WMO: # %bb.0:
-; RV32IA-WMO-NEXT: andi a2, a0, -4
-; RV32IA-WMO-NEXT: slli a0, a0, 3
-; RV32IA-WMO-NEXT: zext.b a1, a1
-; RV32IA-WMO-NEXT: sll a1, a1, a0
-; RV32IA-WMO-NEXT: amoxor.w.aqrl a1, a1, (a2)
-; RV32IA-WMO-NEXT: srl a0, a1, a0
-; RV32IA-WMO-NEXT: ret
+; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_xor_i8_acq_rel:
+; RV32IA-WMO-NOZACAS: # %bb.0:
+; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-WMO-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-NOZACAS-NEXT: zext.b a1, a1
+; RV32IA-WMO-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-WMO-NOZACAS-NEXT: amoxor.w.aqrl a1, a1, (a2)
+; RV32IA-WMO-NOZACAS-NEXT: srl a0, a1, a0
+; RV32IA-WMO-NOZACAS-NEXT: ret
;
-; RV32IA-TSO-LABEL: atomicrmw_xor_i8_acq_rel:
-; RV32IA-TSO: # %bb.0:
-; RV32IA-TSO-NEXT: andi a2, a0, -4
-; RV32IA-TSO-NEXT: slli a0, a0, 3
-; RV32IA-TSO-NEXT: zext.b a1, a1
-; RV32IA-TSO-NEXT: sll a1, a1, a0
-; RV32IA-TSO-NEXT: amoxor.w a1, a1, (a2)
-; RV32IA-TSO-NEXT: srl a0, a1, a0
-; RV32IA-TSO-NEXT: ret
+; RV32IA-TSO-NOZACAS-LABEL: atomicrmw_xor_i8_acq_rel:
+; RV32IA-TSO-NOZACAS: # %bb.0:
+; RV32IA-TSO-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-TSO-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-NOZACAS-NEXT: zext.b a1, a1
+; RV32IA-TSO-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-TSO-NOZACAS-NEXT: amoxor.w a1, a1, (a2)
+; RV32IA-TSO-NOZACAS-NEXT: srl a0, a1, a0
+; RV32IA-TSO-NOZACAS-NEXT: ret
;
; RV64I-LABEL: atomicrmw_xor_i8_acq_rel:
; RV64I: # %bb.0:
@@ -5217,6 +7091,26 @@ define i8 @atomicrmw_xor_i8_acq_rel(ptr %a, i8 %b) nounwind {
; RV64IA-TSO-NOZACAS-NEXT: srlw a0, a1, a0
; RV64IA-TSO-NOZACAS-NEXT: ret
;
+; RV32IA-WMO-ZACAS-LABEL: atomicrmw_xor_i8_acq_rel:
+; RV32IA-WMO-ZACAS: # %bb.0:
+; RV32IA-WMO-ZACAS-NEXT: andi a2, a0, -4
+; RV32IA-WMO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-ZACAS-NEXT: zext.b a1, a1
+; RV32IA-WMO-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-WMO-ZACAS-NEXT: amoxor.w.aqrl a1, a1, (a2)
+; RV32IA-WMO-ZACAS-NEXT: srl a0, a1, a0
+; RV32IA-WMO-ZACAS-NEXT: ret
+;
+; RV32IA-TSO-ZACAS-LABEL: atomicrmw_xor_i8_acq_rel:
+; RV32IA-TSO-ZACAS: # %bb.0:
+; RV32IA-TSO-ZACAS-NEXT: andi a2, a0, -4
+; RV32IA-TSO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-ZACAS-NEXT: zext.b a1, a1
+; RV32IA-TSO-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-TSO-ZACAS-NEXT: amoxor.w a1, a1, (a2)
+; RV32IA-TSO-ZACAS-NEXT: srl a0, a1, a0
+; RV32IA-TSO-ZACAS-NEXT: ret
+;
; RV64IA-WMO-ZACAS-LABEL: atomicrmw_xor_i8_acq_rel:
; RV64IA-WMO-ZACAS: # %bb.0:
; RV64IA-WMO-ZACAS-NEXT: andi a2, a0, -4
@@ -5237,6 +7131,16 @@ define i8 @atomicrmw_xor_i8_acq_rel(ptr %a, i8 %b) nounwind {
; RV64IA-TSO-ZACAS-NEXT: srlw a0, a1, a0
; RV64IA-TSO-ZACAS-NEXT: ret
;
+; RV32IA-WMO-ZABHA-LABEL: atomicrmw_xor_i8_acq_rel:
+; RV32IA-WMO-ZABHA: # %bb.0:
+; RV32IA-WMO-ZABHA-NEXT: amoxor.b.aqrl a0, a1, (a0)
+; RV32IA-WMO-ZABHA-NEXT: ret
+;
+; RV32IA-TSO-ZABHA-LABEL: atomicrmw_xor_i8_acq_rel:
+; RV32IA-TSO-ZABHA: # %bb.0:
+; RV32IA-TSO-ZABHA-NEXT: amoxor.b a0, a1, (a0)
+; RV32IA-TSO-ZABHA-NEXT: ret
+;
; RV64IA-WMO-ZABHA-LABEL: atomicrmw_xor_i8_acq_rel:
; RV64IA-WMO-ZABHA: # %bb.0:
; RV64IA-WMO-ZABHA-NEXT: amoxor.b.aqrl a0, a1, (a0)
@@ -5261,25 +7165,25 @@ define i8 @atomicrmw_xor_i8_seq_cst(ptr %a, i8 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
-; RV32IA-WMO-LABEL: atomicrmw_xor_i8_seq_cst:
-; RV32IA-WMO: # %bb.0:
-; RV32IA-WMO-NEXT: andi a2, a0, -4
-; RV32IA-WMO-NEXT: slli a0, a0, 3
-; RV32IA-WMO-NEXT: zext.b a1, a1
-; RV32IA-WMO-NEXT: sll a1, a1, a0
-; RV32IA-WMO-NEXT: amoxor.w.aqrl a1, a1, (a2)
-; RV32IA-WMO-NEXT: srl a0, a1, a0
-; RV32IA-WMO-NEXT: ret
+; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_xor_i8_seq_cst:
+; RV32IA-WMO-NOZACAS: # %bb.0:
+; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-WMO-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-NOZACAS-NEXT: zext.b a1, a1
+; RV32IA-WMO-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-WMO-NOZACAS-NEXT: amoxor.w.aqrl a1, a1, (a2)
+; RV32IA-WMO-NOZACAS-NEXT: srl a0, a1, a0
+; RV32IA-WMO-NOZACAS-NEXT: ret
;
-; RV32IA-TSO-LABEL: atomicrmw_xor_i8_seq_cst:
-; RV32IA-TSO: # %bb.0:
-; RV32IA-TSO-NEXT: andi a2, a0, -4
-; RV32IA-TSO-NEXT: slli a0, a0, 3
-; RV32IA-TSO-NEXT: zext.b a1, a1
-; RV32IA-TSO-NEXT: sll a1, a1, a0
-; RV32IA-TSO-NEXT: amoxor.w a1, a1, (a2)
-; RV32IA-TSO-NEXT: srl a0, a1, a0
-; RV32IA-TSO-NEXT: ret
+; RV32IA-TSO-NOZACAS-LABEL: atomicrmw_xor_i8_seq_cst:
+; RV32IA-TSO-NOZACAS: # %bb.0:
+; RV32IA-TSO-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-TSO-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-NOZACAS-NEXT: zext.b a1, a1
+; RV32IA-TSO-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-TSO-NOZACAS-NEXT: amoxor.w a1, a1, (a2)
+; RV32IA-TSO-NOZACAS-NEXT: srl a0, a1, a0
+; RV32IA-TSO-NOZACAS-NEXT: ret
;
; RV64I-LABEL: atomicrmw_xor_i8_seq_cst:
; RV64I: # %bb.0:
@@ -5311,6 +7215,26 @@ define i8 @atomicrmw_xor_i8_seq_cst(ptr %a, i8 %b) nounwind {
; RV64IA-TSO-NOZACAS-NEXT: srlw a0, a1, a0
; RV64IA-TSO-NOZACAS-NEXT: ret
;
+; RV32IA-WMO-ZACAS-LABEL: atomicrmw_xor_i8_seq_cst:
+; RV32IA-WMO-ZACAS: # %bb.0:
+; RV32IA-WMO-ZACAS-NEXT: andi a2, a0, -4
+; RV32IA-WMO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-ZACAS-NEXT: zext.b a1, a1
+; RV32IA-WMO-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-WMO-ZACAS-NEXT: amoxor.w.aqrl a1, a1, (a2)
+; RV32IA-WMO-ZACAS-NEXT: srl a0, a1, a0
+; RV32IA-WMO-ZACAS-NEXT: ret
+;
+; RV32IA-TSO-ZACAS-LABEL: atomicrmw_xor_i8_seq_cst:
+; RV32IA-TSO-ZACAS: # %bb.0:
+; RV32IA-TSO-ZACAS-NEXT: andi a2, a0, -4
+; RV32IA-TSO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-ZACAS-NEXT: zext.b a1, a1
+; RV32IA-TSO-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-TSO-ZACAS-NEXT: amoxor.w a1, a1, (a2)
+; RV32IA-TSO-ZACAS-NEXT: srl a0, a1, a0
+; RV32IA-TSO-ZACAS-NEXT: ret
+;
; RV64IA-WMO-ZACAS-LABEL: atomicrmw_xor_i8_seq_cst:
; RV64IA-WMO-ZACAS: # %bb.0:
; RV64IA-WMO-ZACAS-NEXT: andi a2, a0, -4
@@ -5331,6 +7255,16 @@ define i8 @atomicrmw_xor_i8_seq_cst(ptr %a, i8 %b) nounwind {
; RV64IA-TSO-ZACAS-NEXT: srlw a0, a1, a0
; RV64IA-TSO-ZACAS-NEXT: ret
;
+; RV32IA-WMO-ZABHA-LABEL: atomicrmw_xor_i8_seq_cst:
+; RV32IA-WMO-ZABHA: # %bb.0:
+; RV32IA-WMO-ZABHA-NEXT: amoxor.b.aqrl a0, a1, (a0)
+; RV32IA-WMO-ZABHA-NEXT: ret
+;
+; RV32IA-TSO-ZABHA-LABEL: atomicrmw_xor_i8_seq_cst:
+; RV32IA-TSO-ZABHA: # %bb.0:
+; RV32IA-TSO-ZABHA-NEXT: amoxor.b a0, a1, (a0)
+; RV32IA-TSO-ZABHA-NEXT: ret
+;
; RV64IA-WMO-ZABHA-LABEL: atomicrmw_xor_i8_seq_cst:
; RV64IA-WMO-ZABHA: # %bb.0:
; RV64IA-WMO-ZABHA-NEXT: amoxor.b.aqrl a0, a1, (a0)
@@ -5387,34 +7321,34 @@ define i8 @atomicrmw_max_i8_monotonic(ptr %a, i8 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 32
; RV32I-NEXT: ret
;
-; RV32IA-LABEL: atomicrmw_max_i8_monotonic:
-; RV32IA: # %bb.0:
-; RV32IA-NEXT: andi a2, a0, -4
-; RV32IA-NEXT: slli a0, a0, 3
-; RV32IA-NEXT: li a3, 255
-; RV32IA-NEXT: slli a1, a1, 24
-; RV32IA-NEXT: andi a4, a0, 24
-; RV32IA-NEXT: sll a3, a3, a0
-; RV32IA-NEXT: srai a1, a1, 24
-; RV32IA-NEXT: sll a1, a1, a0
-; RV32IA-NEXT: xori a4, a4, 24
-; RV32IA-NEXT: .LBB45_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-NEXT: lr.w a5, (a2)
-; RV32IA-NEXT: and a7, a5, a3
-; RV32IA-NEXT: mv a6, a5
-; RV32IA-NEXT: sll a7, a7, a4
-; RV32IA-NEXT: sra a7, a7, a4
-; RV32IA-NEXT: bge a7, a1, .LBB45_3
-; RV32IA-NEXT: # %bb.2: # in Loop: Header=BB45_1 Depth=1
-; RV32IA-NEXT: xor a6, a5, a1
-; RV32IA-NEXT: and a6, a6, a3
-; RV32IA-NEXT: xor a6, a5, a6
-; RV32IA-NEXT: .LBB45_3: # in Loop: Header=BB45_1 Depth=1
-; RV32IA-NEXT: sc.w a6, a6, (a2)
-; RV32IA-NEXT: bnez a6, .LBB45_1
-; RV32IA-NEXT: # %bb.4:
-; RV32IA-NEXT: srl a0, a5, a0
-; RV32IA-NEXT: ret
+; RV32IA-NOZACAS-LABEL: atomicrmw_max_i8_monotonic:
+; RV32IA-NOZACAS: # %bb.0:
+; RV32IA-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-NOZACAS-NEXT: li a3, 255
+; RV32IA-NOZACAS-NEXT: slli a1, a1, 24
+; RV32IA-NOZACAS-NEXT: andi a4, a0, 24
+; RV32IA-NOZACAS-NEXT: sll a3, a3, a0
+; RV32IA-NOZACAS-NEXT: srai a1, a1, 24
+; RV32IA-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-NOZACAS-NEXT: xori a4, a4, 24
+; RV32IA-NOZACAS-NEXT: .LBB45_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-NOZACAS-NEXT: lr.w a5, (a2)
+; RV32IA-NOZACAS-NEXT: and a7, a5, a3
+; RV32IA-NOZACAS-NEXT: mv a6, a5
+; RV32IA-NOZACAS-NEXT: sll a7, a7, a4
+; RV32IA-NOZACAS-NEXT: sra a7, a7, a4
+; RV32IA-NOZACAS-NEXT: bge a7, a1, .LBB45_3
+; RV32IA-NOZACAS-NEXT: # %bb.2: # in Loop: Header=BB45_1 Depth=1
+; RV32IA-NOZACAS-NEXT: xor a6, a5, a1
+; RV32IA-NOZACAS-NEXT: and a6, a6, a3
+; RV32IA-NOZACAS-NEXT: xor a6, a5, a6
+; RV32IA-NOZACAS-NEXT: .LBB45_3: # in Loop: Header=BB45_1 Depth=1
+; RV32IA-NOZACAS-NEXT: sc.w a6, a6, (a2)
+; RV32IA-NOZACAS-NEXT: bnez a6, .LBB45_1
+; RV32IA-NOZACAS-NEXT: # %bb.4:
+; RV32IA-NOZACAS-NEXT: srl a0, a5, a0
+; RV32IA-NOZACAS-NEXT: ret
;
; RV64I-LABEL: atomicrmw_max_i8_monotonic:
; RV64I: # %bb.0:
@@ -5487,6 +7421,35 @@ define i8 @atomicrmw_max_i8_monotonic(ptr %a, i8 %b) nounwind {
; RV64IA-NOZACAS-NEXT: srlw a0, a5, a0
; RV64IA-NOZACAS-NEXT: ret
;
+; RV32IA-ZACAS-LABEL: atomicrmw_max_i8_monotonic:
+; RV32IA-ZACAS: # %bb.0:
+; RV32IA-ZACAS-NEXT: andi a2, a0, -4
+; RV32IA-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-ZACAS-NEXT: li a3, 255
+; RV32IA-ZACAS-NEXT: slli a1, a1, 24
+; RV32IA-ZACAS-NEXT: andi a4, a0, 24
+; RV32IA-ZACAS-NEXT: sll a3, a3, a0
+; RV32IA-ZACAS-NEXT: srai a1, a1, 24
+; RV32IA-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-ZACAS-NEXT: xori a4, a4, 24
+; RV32IA-ZACAS-NEXT: .LBB45_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-ZACAS-NEXT: lr.w a5, (a2)
+; RV32IA-ZACAS-NEXT: and a7, a5, a3
+; RV32IA-ZACAS-NEXT: mv a6, a5
+; RV32IA-ZACAS-NEXT: sll a7, a7, a4
+; RV32IA-ZACAS-NEXT: sra a7, a7, a4
+; RV32IA-ZACAS-NEXT: bge a7, a1, .LBB45_3
+; RV32IA-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB45_1 Depth=1
+; RV32IA-ZACAS-NEXT: xor a6, a5, a1
+; RV32IA-ZACAS-NEXT: and a6, a6, a3
+; RV32IA-ZACAS-NEXT: xor a6, a5, a6
+; RV32IA-ZACAS-NEXT: .LBB45_3: # in Loop: Header=BB45_1 Depth=1
+; RV32IA-ZACAS-NEXT: sc.w a6, a6, (a2)
+; RV32IA-ZACAS-NEXT: bnez a6, .LBB45_1
+; RV32IA-ZACAS-NEXT: # %bb.4:
+; RV32IA-ZACAS-NEXT: srl a0, a5, a0
+; RV32IA-ZACAS-NEXT: ret
+;
; RV64IA-ZACAS-LABEL: atomicrmw_max_i8_monotonic:
; RV64IA-ZACAS: # %bb.0:
; RV64IA-ZACAS-NEXT: andi a2, a0, -4
@@ -5516,6 +7479,16 @@ define i8 @atomicrmw_max_i8_monotonic(ptr %a, i8 %b) nounwind {
; RV64IA-ZACAS-NEXT: srlw a0, a5, a0
; RV64IA-ZACAS-NEXT: ret
;
+; RV32IA-WMO-ZABHA-LABEL: atomicrmw_max_i8_monotonic:
+; RV32IA-WMO-ZABHA: # %bb.0:
+; RV32IA-WMO-ZABHA-NEXT: amomax.b a0, a1, (a0)
+; RV32IA-WMO-ZABHA-NEXT: ret
+;
+; RV32IA-TSO-ZABHA-LABEL: atomicrmw_max_i8_monotonic:
+; RV32IA-TSO-ZABHA: # %bb.0:
+; RV32IA-TSO-ZABHA-NEXT: amomax.b a0, a1, (a0)
+; RV32IA-TSO-ZABHA-NEXT: ret
+;
; RV64IA-WMO-ZABHA-LABEL: atomicrmw_max_i8_monotonic:
; RV64IA-WMO-ZABHA: # %bb.0:
; RV64IA-WMO-ZABHA-NEXT: amomax.b a0, a1, (a0)
@@ -5572,63 +7545,63 @@ define i8 @atomicrmw_max_i8_acquire(ptr %a, i8 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 32
; RV32I-NEXT: ret
;
-; RV32IA-WMO-LABEL: atomicrmw_max_i8_acquire:
-; RV32IA-WMO: # %bb.0:
-; RV32IA-WMO-NEXT: andi a2, a0, -4
-; RV32IA-WMO-NEXT: slli a0, a0, 3
-; RV32IA-WMO-NEXT: li a3, 255
-; RV32IA-WMO-NEXT: slli a1, a1, 24
-; RV32IA-WMO-NEXT: andi a4, a0, 24
-; RV32IA-WMO-NEXT: sll a3, a3, a0
-; RV32IA-WMO-NEXT: srai a1, a1, 24
-; RV32IA-WMO-NEXT: sll a1, a1, a0
-; RV32IA-WMO-NEXT: xori a4, a4, 24
-; RV32IA-WMO-NEXT: .LBB46_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-WMO-NEXT: lr.w.aq a5, (a2)
-; RV32IA-WMO-NEXT: and a7, a5, a3
-; RV32IA-WMO-NEXT: mv a6, a5
-; RV32IA-WMO-NEXT: sll a7, a7, a4
-; RV32IA-WMO-NEXT: sra a7, a7, a4
-; RV32IA-WMO-NEXT: bge a7, a1, .LBB46_3
-; RV32IA-WMO-NEXT: # %bb.2: # in Loop: Header=BB46_1 Depth=1
-; RV32IA-WMO-NEXT: xor a6, a5, a1
-; RV32IA-WMO-NEXT: and a6, a6, a3
-; RV32IA-WMO-NEXT: xor a6, a5, a6
-; RV32IA-WMO-NEXT: .LBB46_3: # in Loop: Header=BB46_1 Depth=1
-; RV32IA-WMO-NEXT: sc.w a6, a6, (a2)
-; RV32IA-WMO-NEXT: bnez a6, .LBB46_1
-; RV32IA-WMO-NEXT: # %bb.4:
-; RV32IA-WMO-NEXT: srl a0, a5, a0
-; RV32IA-WMO-NEXT: ret
+; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_max_i8_acquire:
+; RV32IA-WMO-NOZACAS: # %bb.0:
+; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-WMO-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-NOZACAS-NEXT: li a3, 255
+; RV32IA-WMO-NOZACAS-NEXT: slli a1, a1, 24
+; RV32IA-WMO-NOZACAS-NEXT: andi a4, a0, 24
+; RV32IA-WMO-NOZACAS-NEXT: sll a3, a3, a0
+; RV32IA-WMO-NOZACAS-NEXT: srai a1, a1, 24
+; RV32IA-WMO-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-WMO-NOZACAS-NEXT: xori a4, a4, 24
+; RV32IA-WMO-NOZACAS-NEXT: .LBB46_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-NOZACAS-NEXT: lr.w.aq a5, (a2)
+; RV32IA-WMO-NOZACAS-NEXT: and a7, a5, a3
+; RV32IA-WMO-NOZACAS-NEXT: mv a6, a5
+; RV32IA-WMO-NOZACAS-NEXT: sll a7, a7, a4
+; RV32IA-WMO-NOZACAS-NEXT: sra a7, a7, a4
+; RV32IA-WMO-NOZACAS-NEXT: bge a7, a1, .LBB46_3
+; RV32IA-WMO-NOZACAS-NEXT: # %bb.2: # in Loop: Header=BB46_1 Depth=1
+; RV32IA-WMO-NOZACAS-NEXT: xor a6, a5, a1
+; RV32IA-WMO-NOZACAS-NEXT: and a6, a6, a3
+; RV32IA-WMO-NOZACAS-NEXT: xor a6, a5, a6
+; RV32IA-WMO-NOZACAS-NEXT: .LBB46_3: # in Loop: Header=BB46_1 Depth=1
+; RV32IA-WMO-NOZACAS-NEXT: sc.w a6, a6, (a2)
+; RV32IA-WMO-NOZACAS-NEXT: bnez a6, .LBB46_1
+; RV32IA-WMO-NOZACAS-NEXT: # %bb.4:
+; RV32IA-WMO-NOZACAS-NEXT: srl a0, a5, a0
+; RV32IA-WMO-NOZACAS-NEXT: ret
;
-; RV32IA-TSO-LABEL: atomicrmw_max_i8_acquire:
-; RV32IA-TSO: # %bb.0:
-; RV32IA-TSO-NEXT: andi a2, a0, -4
-; RV32IA-TSO-NEXT: slli a0, a0, 3
-; RV32IA-TSO-NEXT: li a3, 255
-; RV32IA-TSO-NEXT: slli a1, a1, 24
-; RV32IA-TSO-NEXT: andi a4, a0, 24
-; RV32IA-TSO-NEXT: sll a3, a3, a0
-; RV32IA-TSO-NEXT: srai a1, a1, 24
-; RV32IA-TSO-NEXT: sll a1, a1, a0
-; RV32IA-TSO-NEXT: xori a4, a4, 24
-; RV32IA-TSO-NEXT: .LBB46_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-TSO-NEXT: lr.w a5, (a2)
-; RV32IA-TSO-NEXT: and a7, a5, a3
-; RV32IA-TSO-NEXT: mv a6, a5
-; RV32IA-TSO-NEXT: sll a7, a7, a4
-; RV32IA-TSO-NEXT: sra a7, a7, a4
-; RV32IA-TSO-NEXT: bge a7, a1, .LBB46_3
-; RV32IA-TSO-NEXT: # %bb.2: # in Loop: Header=BB46_1 Depth=1
-; RV32IA-TSO-NEXT: xor a6, a5, a1
-; RV32IA-TSO-NEXT: and a6, a6, a3
-; RV32IA-TSO-NEXT: xor a6, a5, a6
-; RV32IA-TSO-NEXT: .LBB46_3: # in Loop: Header=BB46_1 Depth=1
-; RV32IA-TSO-NEXT: sc.w a6, a6, (a2)
-; RV32IA-TSO-NEXT: bnez a6, .LBB46_1
-; RV32IA-TSO-NEXT: # %bb.4:
-; RV32IA-TSO-NEXT: srl a0, a5, a0
-; RV32IA-TSO-NEXT: ret
+; RV32IA-TSO-NOZACAS-LABEL: atomicrmw_max_i8_acquire:
+; RV32IA-TSO-NOZACAS: # %bb.0:
+; RV32IA-TSO-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-TSO-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-NOZACAS-NEXT: li a3, 255
+; RV32IA-TSO-NOZACAS-NEXT: slli a1, a1, 24
+; RV32IA-TSO-NOZACAS-NEXT: andi a4, a0, 24
+; RV32IA-TSO-NOZACAS-NEXT: sll a3, a3, a0
+; RV32IA-TSO-NOZACAS-NEXT: srai a1, a1, 24
+; RV32IA-TSO-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-TSO-NOZACAS-NEXT: xori a4, a4, 24
+; RV32IA-TSO-NOZACAS-NEXT: .LBB46_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-NOZACAS-NEXT: lr.w a5, (a2)
+; RV32IA-TSO-NOZACAS-NEXT: and a7, a5, a3
+; RV32IA-TSO-NOZACAS-NEXT: mv a6, a5
+; RV32IA-TSO-NOZACAS-NEXT: sll a7, a7, a4
+; RV32IA-TSO-NOZACAS-NEXT: sra a7, a7, a4
+; RV32IA-TSO-NOZACAS-NEXT: bge a7, a1, .LBB46_3
+; RV32IA-TSO-NOZACAS-NEXT: # %bb.2: # in Loop: Header=BB46_1 Depth=1
+; RV32IA-TSO-NOZACAS-NEXT: xor a6, a5, a1
+; RV32IA-TSO-NOZACAS-NEXT: and a6, a6, a3
+; RV32IA-TSO-NOZACAS-NEXT: xor a6, a5, a6
+; RV32IA-TSO-NOZACAS-NEXT: .LBB46_3: # in Loop: Header=BB46_1 Depth=1
+; RV32IA-TSO-NOZACAS-NEXT: sc.w a6, a6, (a2)
+; RV32IA-TSO-NOZACAS-NEXT: bnez a6, .LBB46_1
+; RV32IA-TSO-NOZACAS-NEXT: # %bb.4:
+; RV32IA-TSO-NOZACAS-NEXT: srl a0, a5, a0
+; RV32IA-TSO-NOZACAS-NEXT: ret
;
; RV64I-LABEL: atomicrmw_max_i8_acquire:
; RV64I: # %bb.0:
@@ -5730,6 +7703,64 @@ define i8 @atomicrmw_max_i8_acquire(ptr %a, i8 %b) nounwind {
; RV64IA-TSO-NOZACAS-NEXT: srlw a0, a5, a0
; RV64IA-TSO-NOZACAS-NEXT: ret
;
+; RV32IA-WMO-ZACAS-LABEL: atomicrmw_max_i8_acquire:
+; RV32IA-WMO-ZACAS: # %bb.0:
+; RV32IA-WMO-ZACAS-NEXT: andi a2, a0, -4
+; RV32IA-WMO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-ZACAS-NEXT: li a3, 255
+; RV32IA-WMO-ZACAS-NEXT: slli a1, a1, 24
+; RV32IA-WMO-ZACAS-NEXT: andi a4, a0, 24
+; RV32IA-WMO-ZACAS-NEXT: sll a3, a3, a0
+; RV32IA-WMO-ZACAS-NEXT: srai a1, a1, 24
+; RV32IA-WMO-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-WMO-ZACAS-NEXT: xori a4, a4, 24
+; RV32IA-WMO-ZACAS-NEXT: .LBB46_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-ZACAS-NEXT: lr.w.aq a5, (a2)
+; RV32IA-WMO-ZACAS-NEXT: and a7, a5, a3
+; RV32IA-WMO-ZACAS-NEXT: mv a6, a5
+; RV32IA-WMO-ZACAS-NEXT: sll a7, a7, a4
+; RV32IA-WMO-ZACAS-NEXT: sra a7, a7, a4
+; RV32IA-WMO-ZACAS-NEXT: bge a7, a1, .LBB46_3
+; RV32IA-WMO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB46_1 Depth=1
+; RV32IA-WMO-ZACAS-NEXT: xor a6, a5, a1
+; RV32IA-WMO-ZACAS-NEXT: and a6, a6, a3
+; RV32IA-WMO-ZACAS-NEXT: xor a6, a5, a6
+; RV32IA-WMO-ZACAS-NEXT: .LBB46_3: # in Loop: Header=BB46_1 Depth=1
+; RV32IA-WMO-ZACAS-NEXT: sc.w a6, a6, (a2)
+; RV32IA-WMO-ZACAS-NEXT: bnez a6, .LBB46_1
+; RV32IA-WMO-ZACAS-NEXT: # %bb.4:
+; RV32IA-WMO-ZACAS-NEXT: srl a0, a5, a0
+; RV32IA-WMO-ZACAS-NEXT: ret
+;
+; RV32IA-TSO-ZACAS-LABEL: atomicrmw_max_i8_acquire:
+; RV32IA-TSO-ZACAS: # %bb.0:
+; RV32IA-TSO-ZACAS-NEXT: andi a2, a0, -4
+; RV32IA-TSO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-ZACAS-NEXT: li a3, 255
+; RV32IA-TSO-ZACAS-NEXT: slli a1, a1, 24
+; RV32IA-TSO-ZACAS-NEXT: andi a4, a0, 24
+; RV32IA-TSO-ZACAS-NEXT: sll a3, a3, a0
+; RV32IA-TSO-ZACAS-NEXT: srai a1, a1, 24
+; RV32IA-TSO-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-TSO-ZACAS-NEXT: xori a4, a4, 24
+; RV32IA-TSO-ZACAS-NEXT: .LBB46_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-ZACAS-NEXT: lr.w a5, (a2)
+; RV32IA-TSO-ZACAS-NEXT: and a7, a5, a3
+; RV32IA-TSO-ZACAS-NEXT: mv a6, a5
+; RV32IA-TSO-ZACAS-NEXT: sll a7, a7, a4
+; RV32IA-TSO-ZACAS-NEXT: sra a7, a7, a4
+; RV32IA-TSO-ZACAS-NEXT: bge a7, a1, .LBB46_3
+; RV32IA-TSO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB46_1 Depth=1
+; RV32IA-TSO-ZACAS-NEXT: xor a6, a5, a1
+; RV32IA-TSO-ZACAS-NEXT: and a6, a6, a3
+; RV32IA-TSO-ZACAS-NEXT: xor a6, a5, a6
+; RV32IA-TSO-ZACAS-NEXT: .LBB46_3: # in Loop: Header=BB46_1 Depth=1
+; RV32IA-TSO-ZACAS-NEXT: sc.w a6, a6, (a2)
+; RV32IA-TSO-ZACAS-NEXT: bnez a6, .LBB46_1
+; RV32IA-TSO-ZACAS-NEXT: # %bb.4:
+; RV32IA-TSO-ZACAS-NEXT: srl a0, a5, a0
+; RV32IA-TSO-ZACAS-NEXT: ret
+;
; RV64IA-WMO-ZACAS-LABEL: atomicrmw_max_i8_acquire:
; RV64IA-WMO-ZACAS: # %bb.0:
; RV64IA-WMO-ZACAS-NEXT: andi a2, a0, -4
@@ -5788,6 +7819,16 @@ define i8 @atomicrmw_max_i8_acquire(ptr %a, i8 %b) nounwind {
; RV64IA-TSO-ZACAS-NEXT: srlw a0, a5, a0
; RV64IA-TSO-ZACAS-NEXT: ret
;
+; RV32IA-WMO-ZABHA-LABEL: atomicrmw_max_i8_acquire:
+; RV32IA-WMO-ZABHA: # %bb.0:
+; RV32IA-WMO-ZABHA-NEXT: amomax.b.aq a0, a1, (a0)
+; RV32IA-WMO-ZABHA-NEXT: ret
+;
+; RV32IA-TSO-ZABHA-LABEL: atomicrmw_max_i8_acquire:
+; RV32IA-TSO-ZABHA: # %bb.0:
+; RV32IA-TSO-ZABHA-NEXT: amomax.b a0, a1, (a0)
+; RV32IA-TSO-ZABHA-NEXT: ret
+;
; RV64IA-WMO-ZABHA-LABEL: atomicrmw_max_i8_acquire:
; RV64IA-WMO-ZABHA: # %bb.0:
; RV64IA-WMO-ZABHA-NEXT: amomax.b.aq a0, a1, (a0)
@@ -5844,63 +7885,63 @@ define i8 @atomicrmw_max_i8_release(ptr %a, i8 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 32
; RV32I-NEXT: ret
;
-; RV32IA-WMO-LABEL: atomicrmw_max_i8_release:
-; RV32IA-WMO: # %bb.0:
-; RV32IA-WMO-NEXT: andi a2, a0, -4
-; RV32IA-WMO-NEXT: slli a0, a0, 3
-; RV32IA-WMO-NEXT: li a3, 255
-; RV32IA-WMO-NEXT: slli a1, a1, 24
-; RV32IA-WMO-NEXT: andi a4, a0, 24
-; RV32IA-WMO-NEXT: sll a3, a3, a0
-; RV32IA-WMO-NEXT: srai a1, a1, 24
-; RV32IA-WMO-NEXT: sll a1, a1, a0
-; RV32IA-WMO-NEXT: xori a4, a4, 24
-; RV32IA-WMO-NEXT: .LBB47_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-WMO-NEXT: lr.w a5, (a2)
-; RV32IA-WMO-NEXT: and a7, a5, a3
-; RV32IA-WMO-NEXT: mv a6, a5
-; RV32IA-WMO-NEXT: sll a7, a7, a4
-; RV32IA-WMO-NEXT: sra a7, a7, a4
-; RV32IA-WMO-NEXT: bge a7, a1, .LBB47_3
-; RV32IA-WMO-NEXT: # %bb.2: # in Loop: Header=BB47_1 Depth=1
-; RV32IA-WMO-NEXT: xor a6, a5, a1
-; RV32IA-WMO-NEXT: and a6, a6, a3
-; RV32IA-WMO-NEXT: xor a6, a5, a6
-; RV32IA-WMO-NEXT: .LBB47_3: # in Loop: Header=BB47_1 Depth=1
-; RV32IA-WMO-NEXT: sc.w.rl a6, a6, (a2)
-; RV32IA-WMO-NEXT: bnez a6, .LBB47_1
-; RV32IA-WMO-NEXT: # %bb.4:
-; RV32IA-WMO-NEXT: srl a0, a5, a0
-; RV32IA-WMO-NEXT: ret
+; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_max_i8_release:
+; RV32IA-WMO-NOZACAS: # %bb.0:
+; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-WMO-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-NOZACAS-NEXT: li a3, 255
+; RV32IA-WMO-NOZACAS-NEXT: slli a1, a1, 24
+; RV32IA-WMO-NOZACAS-NEXT: andi a4, a0, 24
+; RV32IA-WMO-NOZACAS-NEXT: sll a3, a3, a0
+; RV32IA-WMO-NOZACAS-NEXT: srai a1, a1, 24
+; RV32IA-WMO-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-WMO-NOZACAS-NEXT: xori a4, a4, 24
+; RV32IA-WMO-NOZACAS-NEXT: .LBB47_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-NOZACAS-NEXT: lr.w a5, (a2)
+; RV32IA-WMO-NOZACAS-NEXT: and a7, a5, a3
+; RV32IA-WMO-NOZACAS-NEXT: mv a6, a5
+; RV32IA-WMO-NOZACAS-NEXT: sll a7, a7, a4
+; RV32IA-WMO-NOZACAS-NEXT: sra a7, a7, a4
+; RV32IA-WMO-NOZACAS-NEXT: bge a7, a1, .LBB47_3
+; RV32IA-WMO-NOZACAS-NEXT: # %bb.2: # in Loop: Header=BB47_1 Depth=1
+; RV32IA-WMO-NOZACAS-NEXT: xor a6, a5, a1
+; RV32IA-WMO-NOZACAS-NEXT: and a6, a6, a3
+; RV32IA-WMO-NOZACAS-NEXT: xor a6, a5, a6
+; RV32IA-WMO-NOZACAS-NEXT: .LBB47_3: # in Loop: Header=BB47_1 Depth=1
+; RV32IA-WMO-NOZACAS-NEXT: sc.w.rl a6, a6, (a2)
+; RV32IA-WMO-NOZACAS-NEXT: bnez a6, .LBB47_1
+; RV32IA-WMO-NOZACAS-NEXT: # %bb.4:
+; RV32IA-WMO-NOZACAS-NEXT: srl a0, a5, a0
+; RV32IA-WMO-NOZACAS-NEXT: ret
;
-; RV32IA-TSO-LABEL: atomicrmw_max_i8_release:
-; RV32IA-TSO: # %bb.0:
-; RV32IA-TSO-NEXT: andi a2, a0, -4
-; RV32IA-TSO-NEXT: slli a0, a0, 3
-; RV32IA-TSO-NEXT: li a3, 255
-; RV32IA-TSO-NEXT: slli a1, a1, 24
-; RV32IA-TSO-NEXT: andi a4, a0, 24
-; RV32IA-TSO-NEXT: sll a3, a3, a0
-; RV32IA-TSO-NEXT: srai a1, a1, 24
-; RV32IA-TSO-NEXT: sll a1, a1, a0
-; RV32IA-TSO-NEXT: xori a4, a4, 24
-; RV32IA-TSO-NEXT: .LBB47_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-TSO-NEXT: lr.w a5, (a2)
-; RV32IA-TSO-NEXT: and a7, a5, a3
-; RV32IA-TSO-NEXT: mv a6, a5
-; RV32IA-TSO-NEXT: sll a7, a7, a4
-; RV32IA-TSO-NEXT: sra a7, a7, a4
-; RV32IA-TSO-NEXT: bge a7, a1, .LBB47_3
-; RV32IA-TSO-NEXT: # %bb.2: # in Loop: Header=BB47_1 Depth=1
-; RV32IA-TSO-NEXT: xor a6, a5, a1
-; RV32IA-TSO-NEXT: and a6, a6, a3
-; RV32IA-TSO-NEXT: xor a6, a5, a6
-; RV32IA-TSO-NEXT: .LBB47_3: # in Loop: Header=BB47_1 Depth=1
-; RV32IA-TSO-NEXT: sc.w a6, a6, (a2)
-; RV32IA-TSO-NEXT: bnez a6, .LBB47_1
-; RV32IA-TSO-NEXT: # %bb.4:
-; RV32IA-TSO-NEXT: srl a0, a5, a0
-; RV32IA-TSO-NEXT: ret
+; RV32IA-TSO-NOZACAS-LABEL: atomicrmw_max_i8_release:
+; RV32IA-TSO-NOZACAS: # %bb.0:
+; RV32IA-TSO-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-TSO-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-NOZACAS-NEXT: li a3, 255
+; RV32IA-TSO-NOZACAS-NEXT: slli a1, a1, 24
+; RV32IA-TSO-NOZACAS-NEXT: andi a4, a0, 24
+; RV32IA-TSO-NOZACAS-NEXT: sll a3, a3, a0
+; RV32IA-TSO-NOZACAS-NEXT: srai a1, a1, 24
+; RV32IA-TSO-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-TSO-NOZACAS-NEXT: xori a4, a4, 24
+; RV32IA-TSO-NOZACAS-NEXT: .LBB47_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-NOZACAS-NEXT: lr.w a5, (a2)
+; RV32IA-TSO-NOZACAS-NEXT: and a7, a5, a3
+; RV32IA-TSO-NOZACAS-NEXT: mv a6, a5
+; RV32IA-TSO-NOZACAS-NEXT: sll a7, a7, a4
+; RV32IA-TSO-NOZACAS-NEXT: sra a7, a7, a4
+; RV32IA-TSO-NOZACAS-NEXT: bge a7, a1, .LBB47_3
+; RV32IA-TSO-NOZACAS-NEXT: # %bb.2: # in Loop: Header=BB47_1 Depth=1
+; RV32IA-TSO-NOZACAS-NEXT: xor a6, a5, a1
+; RV32IA-TSO-NOZACAS-NEXT: and a6, a6, a3
+; RV32IA-TSO-NOZACAS-NEXT: xor a6, a5, a6
+; RV32IA-TSO-NOZACAS-NEXT: .LBB47_3: # in Loop: Header=BB47_1 Depth=1
+; RV32IA-TSO-NOZACAS-NEXT: sc.w a6, a6, (a2)
+; RV32IA-TSO-NOZACAS-NEXT: bnez a6, .LBB47_1
+; RV32IA-TSO-NOZACAS-NEXT: # %bb.4:
+; RV32IA-TSO-NOZACAS-NEXT: srl a0, a5, a0
+; RV32IA-TSO-NOZACAS-NEXT: ret
;
; RV64I-LABEL: atomicrmw_max_i8_release:
; RV64I: # %bb.0:
@@ -6002,6 +8043,64 @@ define i8 @atomicrmw_max_i8_release(ptr %a, i8 %b) nounwind {
; RV64IA-TSO-NOZACAS-NEXT: srlw a0, a5, a0
; RV64IA-TSO-NOZACAS-NEXT: ret
;
+; RV32IA-WMO-ZACAS-LABEL: atomicrmw_max_i8_release:
+; RV32IA-WMO-ZACAS: # %bb.0:
+; RV32IA-WMO-ZACAS-NEXT: andi a2, a0, -4
+; RV32IA-WMO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-ZACAS-NEXT: li a3, 255
+; RV32IA-WMO-ZACAS-NEXT: slli a1, a1, 24
+; RV32IA-WMO-ZACAS-NEXT: andi a4, a0, 24
+; RV32IA-WMO-ZACAS-NEXT: sll a3, a3, a0
+; RV32IA-WMO-ZACAS-NEXT: srai a1, a1, 24
+; RV32IA-WMO-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-WMO-ZACAS-NEXT: xori a4, a4, 24
+; RV32IA-WMO-ZACAS-NEXT: .LBB47_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-ZACAS-NEXT: lr.w a5, (a2)
+; RV32IA-WMO-ZACAS-NEXT: and a7, a5, a3
+; RV32IA-WMO-ZACAS-NEXT: mv a6, a5
+; RV32IA-WMO-ZACAS-NEXT: sll a7, a7, a4
+; RV32IA-WMO-ZACAS-NEXT: sra a7, a7, a4
+; RV32IA-WMO-ZACAS-NEXT: bge a7, a1, .LBB47_3
+; RV32IA-WMO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB47_1 Depth=1
+; RV32IA-WMO-ZACAS-NEXT: xor a6, a5, a1
+; RV32IA-WMO-ZACAS-NEXT: and a6, a6, a3
+; RV32IA-WMO-ZACAS-NEXT: xor a6, a5, a6
+; RV32IA-WMO-ZACAS-NEXT: .LBB47_3: # in Loop: Header=BB47_1 Depth=1
+; RV32IA-WMO-ZACAS-NEXT: sc.w.rl a6, a6, (a2)
+; RV32IA-WMO-ZACAS-NEXT: bnez a6, .LBB47_1
+; RV32IA-WMO-ZACAS-NEXT: # %bb.4:
+; RV32IA-WMO-ZACAS-NEXT: srl a0, a5, a0
+; RV32IA-WMO-ZACAS-NEXT: ret
+;
+; RV32IA-TSO-ZACAS-LABEL: atomicrmw_max_i8_release:
+; RV32IA-TSO-ZACAS: # %bb.0:
+; RV32IA-TSO-ZACAS-NEXT: andi a2, a0, -4
+; RV32IA-TSO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-ZACAS-NEXT: li a3, 255
+; RV32IA-TSO-ZACAS-NEXT: slli a1, a1, 24
+; RV32IA-TSO-ZACAS-NEXT: andi a4, a0, 24
+; RV32IA-TSO-ZACAS-NEXT: sll a3, a3, a0
+; RV32IA-TSO-ZACAS-NEXT: srai a1, a1, 24
+; RV32IA-TSO-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-TSO-ZACAS-NEXT: xori a4, a4, 24
+; RV32IA-TSO-ZACAS-NEXT: .LBB47_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-ZACAS-NEXT: lr.w a5, (a2)
+; RV32IA-TSO-ZACAS-NEXT: and a7, a5, a3
+; RV32IA-TSO-ZACAS-NEXT: mv a6, a5
+; RV32IA-TSO-ZACAS-NEXT: sll a7, a7, a4
+; RV32IA-TSO-ZACAS-NEXT: sra a7, a7, a4
+; RV32IA-TSO-ZACAS-NEXT: bge a7, a1, .LBB47_3
+; RV32IA-TSO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB47_1 Depth=1
+; RV32IA-TSO-ZACAS-NEXT: xor a6, a5, a1
+; RV32IA-TSO-ZACAS-NEXT: and a6, a6, a3
+; RV32IA-TSO-ZACAS-NEXT: xor a6, a5, a6
+; RV32IA-TSO-ZACAS-NEXT: .LBB47_3: # in Loop: Header=BB47_1 Depth=1
+; RV32IA-TSO-ZACAS-NEXT: sc.w a6, a6, (a2)
+; RV32IA-TSO-ZACAS-NEXT: bnez a6, .LBB47_1
+; RV32IA-TSO-ZACAS-NEXT: # %bb.4:
+; RV32IA-TSO-ZACAS-NEXT: srl a0, a5, a0
+; RV32IA-TSO-ZACAS-NEXT: ret
+;
; RV64IA-WMO-ZACAS-LABEL: atomicrmw_max_i8_release:
; RV64IA-WMO-ZACAS: # %bb.0:
; RV64IA-WMO-ZACAS-NEXT: andi a2, a0, -4
@@ -6060,6 +8159,16 @@ define i8 @atomicrmw_max_i8_release(ptr %a, i8 %b) nounwind {
; RV64IA-TSO-ZACAS-NEXT: srlw a0, a5, a0
; RV64IA-TSO-ZACAS-NEXT: ret
;
+; RV32IA-WMO-ZABHA-LABEL: atomicrmw_max_i8_release:
+; RV32IA-WMO-ZABHA: # %bb.0:
+; RV32IA-WMO-ZABHA-NEXT: amomax.b.rl a0, a1, (a0)
+; RV32IA-WMO-ZABHA-NEXT: ret
+;
+; RV32IA-TSO-ZABHA-LABEL: atomicrmw_max_i8_release:
+; RV32IA-TSO-ZABHA: # %bb.0:
+; RV32IA-TSO-ZABHA-NEXT: amomax.b a0, a1, (a0)
+; RV32IA-TSO-ZABHA-NEXT: ret
+;
; RV64IA-WMO-ZABHA-LABEL: atomicrmw_max_i8_release:
; RV64IA-WMO-ZABHA: # %bb.0:
; RV64IA-WMO-ZABHA-NEXT: amomax.b.rl a0, a1, (a0)
@@ -6116,63 +8225,63 @@ define i8 @atomicrmw_max_i8_acq_rel(ptr %a, i8 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 32
; RV32I-NEXT: ret
;
-; RV32IA-WMO-LABEL: atomicrmw_max_i8_acq_rel:
-; RV32IA-WMO: # %bb.0:
-; RV32IA-WMO-NEXT: andi a2, a0, -4
-; RV32IA-WMO-NEXT: slli a0, a0, 3
-; RV32IA-WMO-NEXT: li a3, 255
-; RV32IA-WMO-NEXT: slli a1, a1, 24
-; RV32IA-WMO-NEXT: andi a4, a0, 24
-; RV32IA-WMO-NEXT: sll a3, a3, a0
-; RV32IA-WMO-NEXT: srai a1, a1, 24
-; RV32IA-WMO-NEXT: sll a1, a1, a0
-; RV32IA-WMO-NEXT: xori a4, a4, 24
-; RV32IA-WMO-NEXT: .LBB48_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-WMO-NEXT: lr.w.aq a5, (a2)
-; RV32IA-WMO-NEXT: and a7, a5, a3
-; RV32IA-WMO-NEXT: mv a6, a5
-; RV32IA-WMO-NEXT: sll a7, a7, a4
-; RV32IA-WMO-NEXT: sra a7, a7, a4
-; RV32IA-WMO-NEXT: bge a7, a1, .LBB48_3
-; RV32IA-WMO-NEXT: # %bb.2: # in Loop: Header=BB48_1 Depth=1
-; RV32IA-WMO-NEXT: xor a6, a5, a1
-; RV32IA-WMO-NEXT: and a6, a6, a3
-; RV32IA-WMO-NEXT: xor a6, a5, a6
-; RV32IA-WMO-NEXT: .LBB48_3: # in Loop: Header=BB48_1 Depth=1
-; RV32IA-WMO-NEXT: sc.w.rl a6, a6, (a2)
-; RV32IA-WMO-NEXT: bnez a6, .LBB48_1
-; RV32IA-WMO-NEXT: # %bb.4:
-; RV32IA-WMO-NEXT: srl a0, a5, a0
-; RV32IA-WMO-NEXT: ret
+; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_max_i8_acq_rel:
+; RV32IA-WMO-NOZACAS: # %bb.0:
+; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-WMO-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-NOZACAS-NEXT: li a3, 255
+; RV32IA-WMO-NOZACAS-NEXT: slli a1, a1, 24
+; RV32IA-WMO-NOZACAS-NEXT: andi a4, a0, 24
+; RV32IA-WMO-NOZACAS-NEXT: sll a3, a3, a0
+; RV32IA-WMO-NOZACAS-NEXT: srai a1, a1, 24
+; RV32IA-WMO-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-WMO-NOZACAS-NEXT: xori a4, a4, 24
+; RV32IA-WMO-NOZACAS-NEXT: .LBB48_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-NOZACAS-NEXT: lr.w.aq a5, (a2)
+; RV32IA-WMO-NOZACAS-NEXT: and a7, a5, a3
+; RV32IA-WMO-NOZACAS-NEXT: mv a6, a5
+; RV32IA-WMO-NOZACAS-NEXT: sll a7, a7, a4
+; RV32IA-WMO-NOZACAS-NEXT: sra a7, a7, a4
+; RV32IA-WMO-NOZACAS-NEXT: bge a7, a1, .LBB48_3
+; RV32IA-WMO-NOZACAS-NEXT: # %bb.2: # in Loop: Header=BB48_1 Depth=1
+; RV32IA-WMO-NOZACAS-NEXT: xor a6, a5, a1
+; RV32IA-WMO-NOZACAS-NEXT: and a6, a6, a3
+; RV32IA-WMO-NOZACAS-NEXT: xor a6, a5, a6
+; RV32IA-WMO-NOZACAS-NEXT: .LBB48_3: # in Loop: Header=BB48_1 Depth=1
+; RV32IA-WMO-NOZACAS-NEXT: sc.w.rl a6, a6, (a2)
+; RV32IA-WMO-NOZACAS-NEXT: bnez a6, .LBB48_1
+; RV32IA-WMO-NOZACAS-NEXT: # %bb.4:
+; RV32IA-WMO-NOZACAS-NEXT: srl a0, a5, a0
+; RV32IA-WMO-NOZACAS-NEXT: ret
;
-; RV32IA-TSO-LABEL: atomicrmw_max_i8_acq_rel:
-; RV32IA-TSO: # %bb.0:
-; RV32IA-TSO-NEXT: andi a2, a0, -4
-; RV32IA-TSO-NEXT: slli a0, a0, 3
-; RV32IA-TSO-NEXT: li a3, 255
-; RV32IA-TSO-NEXT: slli a1, a1, 24
-; RV32IA-TSO-NEXT: andi a4, a0, 24
-; RV32IA-TSO-NEXT: sll a3, a3, a0
-; RV32IA-TSO-NEXT: srai a1, a1, 24
-; RV32IA-TSO-NEXT: sll a1, a1, a0
-; RV32IA-TSO-NEXT: xori a4, a4, 24
-; RV32IA-TSO-NEXT: .LBB48_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-TSO-NEXT: lr.w a5, (a2)
-; RV32IA-TSO-NEXT: and a7, a5, a3
-; RV32IA-TSO-NEXT: mv a6, a5
-; RV32IA-TSO-NEXT: sll a7, a7, a4
-; RV32IA-TSO-NEXT: sra a7, a7, a4
-; RV32IA-TSO-NEXT: bge a7, a1, .LBB48_3
-; RV32IA-TSO-NEXT: # %bb.2: # in Loop: Header=BB48_1 Depth=1
-; RV32IA-TSO-NEXT: xor a6, a5, a1
-; RV32IA-TSO-NEXT: and a6, a6, a3
-; RV32IA-TSO-NEXT: xor a6, a5, a6
-; RV32IA-TSO-NEXT: .LBB48_3: # in Loop: Header=BB48_1 Depth=1
-; RV32IA-TSO-NEXT: sc.w a6, a6, (a2)
-; RV32IA-TSO-NEXT: bnez a6, .LBB48_1
-; RV32IA-TSO-NEXT: # %bb.4:
-; RV32IA-TSO-NEXT: srl a0, a5, a0
-; RV32IA-TSO-NEXT: ret
+; RV32IA-TSO-NOZACAS-LABEL: atomicrmw_max_i8_acq_rel:
+; RV32IA-TSO-NOZACAS: # %bb.0:
+; RV32IA-TSO-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-TSO-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-NOZACAS-NEXT: li a3, 255
+; RV32IA-TSO-NOZACAS-NEXT: slli a1, a1, 24
+; RV32IA-TSO-NOZACAS-NEXT: andi a4, a0, 24
+; RV32IA-TSO-NOZACAS-NEXT: sll a3, a3, a0
+; RV32IA-TSO-NOZACAS-NEXT: srai a1, a1, 24
+; RV32IA-TSO-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-TSO-NOZACAS-NEXT: xori a4, a4, 24
+; RV32IA-TSO-NOZACAS-NEXT: .LBB48_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-NOZACAS-NEXT: lr.w a5, (a2)
+; RV32IA-TSO-NOZACAS-NEXT: and a7, a5, a3
+; RV32IA-TSO-NOZACAS-NEXT: mv a6, a5
+; RV32IA-TSO-NOZACAS-NEXT: sll a7, a7, a4
+; RV32IA-TSO-NOZACAS-NEXT: sra a7, a7, a4
+; RV32IA-TSO-NOZACAS-NEXT: bge a7, a1, .LBB48_3
+; RV32IA-TSO-NOZACAS-NEXT: # %bb.2: # in Loop: Header=BB48_1 Depth=1
+; RV32IA-TSO-NOZACAS-NEXT: xor a6, a5, a1
+; RV32IA-TSO-NOZACAS-NEXT: and a6, a6, a3
+; RV32IA-TSO-NOZACAS-NEXT: xor a6, a5, a6
+; RV32IA-TSO-NOZACAS-NEXT: .LBB48_3: # in Loop: Header=BB48_1 Depth=1
+; RV32IA-TSO-NOZACAS-NEXT: sc.w a6, a6, (a2)
+; RV32IA-TSO-NOZACAS-NEXT: bnez a6, .LBB48_1
+; RV32IA-TSO-NOZACAS-NEXT: # %bb.4:
+; RV32IA-TSO-NOZACAS-NEXT: srl a0, a5, a0
+; RV32IA-TSO-NOZACAS-NEXT: ret
;
; RV64I-LABEL: atomicrmw_max_i8_acq_rel:
; RV64I: # %bb.0:
@@ -6274,6 +8383,64 @@ define i8 @atomicrmw_max_i8_acq_rel(ptr %a, i8 %b) nounwind {
; RV64IA-TSO-NOZACAS-NEXT: srlw a0, a5, a0
; RV64IA-TSO-NOZACAS-NEXT: ret
;
+; RV32IA-WMO-ZACAS-LABEL: atomicrmw_max_i8_acq_rel:
+; RV32IA-WMO-ZACAS: # %bb.0:
+; RV32IA-WMO-ZACAS-NEXT: andi a2, a0, -4
+; RV32IA-WMO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-ZACAS-NEXT: li a3, 255
+; RV32IA-WMO-ZACAS-NEXT: slli a1, a1, 24
+; RV32IA-WMO-ZACAS-NEXT: andi a4, a0, 24
+; RV32IA-WMO-ZACAS-NEXT: sll a3, a3, a0
+; RV32IA-WMO-ZACAS-NEXT: srai a1, a1, 24
+; RV32IA-WMO-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-WMO-ZACAS-NEXT: xori a4, a4, 24
+; RV32IA-WMO-ZACAS-NEXT: .LBB48_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-ZACAS-NEXT: lr.w.aq a5, (a2)
+; RV32IA-WMO-ZACAS-NEXT: and a7, a5, a3
+; RV32IA-WMO-ZACAS-NEXT: mv a6, a5
+; RV32IA-WMO-ZACAS-NEXT: sll a7, a7, a4
+; RV32IA-WMO-ZACAS-NEXT: sra a7, a7, a4
+; RV32IA-WMO-ZACAS-NEXT: bge a7, a1, .LBB48_3
+; RV32IA-WMO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB48_1 Depth=1
+; RV32IA-WMO-ZACAS-NEXT: xor a6, a5, a1
+; RV32IA-WMO-ZACAS-NEXT: and a6, a6, a3
+; RV32IA-WMO-ZACAS-NEXT: xor a6, a5, a6
+; RV32IA-WMO-ZACAS-NEXT: .LBB48_3: # in Loop: Header=BB48_1 Depth=1
+; RV32IA-WMO-ZACAS-NEXT: sc.w.rl a6, a6, (a2)
+; RV32IA-WMO-ZACAS-NEXT: bnez a6, .LBB48_1
+; RV32IA-WMO-ZACAS-NEXT: # %bb.4:
+; RV32IA-WMO-ZACAS-NEXT: srl a0, a5, a0
+; RV32IA-WMO-ZACAS-NEXT: ret
+;
+; RV32IA-TSO-ZACAS-LABEL: atomicrmw_max_i8_acq_rel:
+; RV32IA-TSO-ZACAS: # %bb.0:
+; RV32IA-TSO-ZACAS-NEXT: andi a2, a0, -4
+; RV32IA-TSO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-ZACAS-NEXT: li a3, 255
+; RV32IA-TSO-ZACAS-NEXT: slli a1, a1, 24
+; RV32IA-TSO-ZACAS-NEXT: andi a4, a0, 24
+; RV32IA-TSO-ZACAS-NEXT: sll a3, a3, a0
+; RV32IA-TSO-ZACAS-NEXT: srai a1, a1, 24
+; RV32IA-TSO-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-TSO-ZACAS-NEXT: xori a4, a4, 24
+; RV32IA-TSO-ZACAS-NEXT: .LBB48_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-ZACAS-NEXT: lr.w a5, (a2)
+; RV32IA-TSO-ZACAS-NEXT: and a7, a5, a3
+; RV32IA-TSO-ZACAS-NEXT: mv a6, a5
+; RV32IA-TSO-ZACAS-NEXT: sll a7, a7, a4
+; RV32IA-TSO-ZACAS-NEXT: sra a7, a7, a4
+; RV32IA-TSO-ZACAS-NEXT: bge a7, a1, .LBB48_3
+; RV32IA-TSO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB48_1 Depth=1
+; RV32IA-TSO-ZACAS-NEXT: xor a6, a5, a1
+; RV32IA-TSO-ZACAS-NEXT: and a6, a6, a3
+; RV32IA-TSO-ZACAS-NEXT: xor a6, a5, a6
+; RV32IA-TSO-ZACAS-NEXT: .LBB48_3: # in Loop: Header=BB48_1 Depth=1
+; RV32IA-TSO-ZACAS-NEXT: sc.w a6, a6, (a2)
+; RV32IA-TSO-ZACAS-NEXT: bnez a6, .LBB48_1
+; RV32IA-TSO-ZACAS-NEXT: # %bb.4:
+; RV32IA-TSO-ZACAS-NEXT: srl a0, a5, a0
+; RV32IA-TSO-ZACAS-NEXT: ret
+;
; RV64IA-WMO-ZACAS-LABEL: atomicrmw_max_i8_acq_rel:
; RV64IA-WMO-ZACAS: # %bb.0:
; RV64IA-WMO-ZACAS-NEXT: andi a2, a0, -4
@@ -6332,6 +8499,16 @@ define i8 @atomicrmw_max_i8_acq_rel(ptr %a, i8 %b) nounwind {
; RV64IA-TSO-ZACAS-NEXT: srlw a0, a5, a0
; RV64IA-TSO-ZACAS-NEXT: ret
;
+; RV32IA-WMO-ZABHA-LABEL: atomicrmw_max_i8_acq_rel:
+; RV32IA-WMO-ZABHA: # %bb.0:
+; RV32IA-WMO-ZABHA-NEXT: amomax.b.aqrl a0, a1, (a0)
+; RV32IA-WMO-ZABHA-NEXT: ret
+;
+; RV32IA-TSO-ZABHA-LABEL: atomicrmw_max_i8_acq_rel:
+; RV32IA-TSO-ZABHA: # %bb.0:
+; RV32IA-TSO-ZABHA-NEXT: amomax.b a0, a1, (a0)
+; RV32IA-TSO-ZABHA-NEXT: ret
+;
; RV64IA-WMO-ZABHA-LABEL: atomicrmw_max_i8_acq_rel:
; RV64IA-WMO-ZABHA: # %bb.0:
; RV64IA-WMO-ZABHA-NEXT: amomax.b.aqrl a0, a1, (a0)
@@ -6388,34 +8565,34 @@ define i8 @atomicrmw_max_i8_seq_cst(ptr %a, i8 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 32
; RV32I-NEXT: ret
;
-; RV32IA-LABEL: atomicrmw_max_i8_seq_cst:
-; RV32IA: # %bb.0:
-; RV32IA-NEXT: andi a2, a0, -4
-; RV32IA-NEXT: slli a0, a0, 3
-; RV32IA-NEXT: li a3, 255
-; RV32IA-NEXT: slli a1, a1, 24
-; RV32IA-NEXT: andi a4, a0, 24
-; RV32IA-NEXT: sll a3, a3, a0
-; RV32IA-NEXT: srai a1, a1, 24
-; RV32IA-NEXT: sll a1, a1, a0
-; RV32IA-NEXT: xori a4, a4, 24
-; RV32IA-NEXT: .LBB49_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-NEXT: lr.w.aqrl a5, (a2)
-; RV32IA-NEXT: and a7, a5, a3
-; RV32IA-NEXT: mv a6, a5
-; RV32IA-NEXT: sll a7, a7, a4
-; RV32IA-NEXT: sra a7, a7, a4
-; RV32IA-NEXT: bge a7, a1, .LBB49_3
-; RV32IA-NEXT: # %bb.2: # in Loop: Header=BB49_1 Depth=1
-; RV32IA-NEXT: xor a6, a5, a1
-; RV32IA-NEXT: and a6, a6, a3
-; RV32IA-NEXT: xor a6, a5, a6
-; RV32IA-NEXT: .LBB49_3: # in Loop: Header=BB49_1 Depth=1
-; RV32IA-NEXT: sc.w.rl a6, a6, (a2)
-; RV32IA-NEXT: bnez a6, .LBB49_1
-; RV32IA-NEXT: # %bb.4:
-; RV32IA-NEXT: srl a0, a5, a0
-; RV32IA-NEXT: ret
+; RV32IA-NOZACAS-LABEL: atomicrmw_max_i8_seq_cst:
+; RV32IA-NOZACAS: # %bb.0:
+; RV32IA-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-NOZACAS-NEXT: li a3, 255
+; RV32IA-NOZACAS-NEXT: slli a1, a1, 24
+; RV32IA-NOZACAS-NEXT: andi a4, a0, 24
+; RV32IA-NOZACAS-NEXT: sll a3, a3, a0
+; RV32IA-NOZACAS-NEXT: srai a1, a1, 24
+; RV32IA-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-NOZACAS-NEXT: xori a4, a4, 24
+; RV32IA-NOZACAS-NEXT: .LBB49_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-NOZACAS-NEXT: lr.w.aqrl a5, (a2)
+; RV32IA-NOZACAS-NEXT: and a7, a5, a3
+; RV32IA-NOZACAS-NEXT: mv a6, a5
+; RV32IA-NOZACAS-NEXT: sll a7, a7, a4
+; RV32IA-NOZACAS-NEXT: sra a7, a7, a4
+; RV32IA-NOZACAS-NEXT: bge a7, a1, .LBB49_3
+; RV32IA-NOZACAS-NEXT: # %bb.2: # in Loop: Header=BB49_1 Depth=1
+; RV32IA-NOZACAS-NEXT: xor a6, a5, a1
+; RV32IA-NOZACAS-NEXT: and a6, a6, a3
+; RV32IA-NOZACAS-NEXT: xor a6, a5, a6
+; RV32IA-NOZACAS-NEXT: .LBB49_3: # in Loop: Header=BB49_1 Depth=1
+; RV32IA-NOZACAS-NEXT: sc.w.rl a6, a6, (a2)
+; RV32IA-NOZACAS-NEXT: bnez a6, .LBB49_1
+; RV32IA-NOZACAS-NEXT: # %bb.4:
+; RV32IA-NOZACAS-NEXT: srl a0, a5, a0
+; RV32IA-NOZACAS-NEXT: ret
;
; RV64I-LABEL: atomicrmw_max_i8_seq_cst:
; RV64I: # %bb.0:
@@ -6488,6 +8665,35 @@ define i8 @atomicrmw_max_i8_seq_cst(ptr %a, i8 %b) nounwind {
; RV64IA-NOZACAS-NEXT: srlw a0, a5, a0
; RV64IA-NOZACAS-NEXT: ret
;
+; RV32IA-ZACAS-LABEL: atomicrmw_max_i8_seq_cst:
+; RV32IA-ZACAS: # %bb.0:
+; RV32IA-ZACAS-NEXT: andi a2, a0, -4
+; RV32IA-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-ZACAS-NEXT: li a3, 255
+; RV32IA-ZACAS-NEXT: slli a1, a1, 24
+; RV32IA-ZACAS-NEXT: andi a4, a0, 24
+; RV32IA-ZACAS-NEXT: sll a3, a3, a0
+; RV32IA-ZACAS-NEXT: srai a1, a1, 24
+; RV32IA-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-ZACAS-NEXT: xori a4, a4, 24
+; RV32IA-ZACAS-NEXT: .LBB49_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-ZACAS-NEXT: lr.w.aqrl a5, (a2)
+; RV32IA-ZACAS-NEXT: and a7, a5, a3
+; RV32IA-ZACAS-NEXT: mv a6, a5
+; RV32IA-ZACAS-NEXT: sll a7, a7, a4
+; RV32IA-ZACAS-NEXT: sra a7, a7, a4
+; RV32IA-ZACAS-NEXT: bge a7, a1, .LBB49_3
+; RV32IA-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB49_1 Depth=1
+; RV32IA-ZACAS-NEXT: xor a6, a5, a1
+; RV32IA-ZACAS-NEXT: and a6, a6, a3
+; RV32IA-ZACAS-NEXT: xor a6, a5, a6
+; RV32IA-ZACAS-NEXT: .LBB49_3: # in Loop: Header=BB49_1 Depth=1
+; RV32IA-ZACAS-NEXT: sc.w.rl a6, a6, (a2)
+; RV32IA-ZACAS-NEXT: bnez a6, .LBB49_1
+; RV32IA-ZACAS-NEXT: # %bb.4:
+; RV32IA-ZACAS-NEXT: srl a0, a5, a0
+; RV32IA-ZACAS-NEXT: ret
+;
; RV64IA-ZACAS-LABEL: atomicrmw_max_i8_seq_cst:
; RV64IA-ZACAS: # %bb.0:
; RV64IA-ZACAS-NEXT: andi a2, a0, -4
@@ -6517,6 +8723,16 @@ define i8 @atomicrmw_max_i8_seq_cst(ptr %a, i8 %b) nounwind {
; RV64IA-ZACAS-NEXT: srlw a0, a5, a0
; RV64IA-ZACAS-NEXT: ret
;
+; RV32IA-WMO-ZABHA-LABEL: atomicrmw_max_i8_seq_cst:
+; RV32IA-WMO-ZABHA: # %bb.0:
+; RV32IA-WMO-ZABHA-NEXT: amomax.b.aqrl a0, a1, (a0)
+; RV32IA-WMO-ZABHA-NEXT: ret
+;
+; RV32IA-TSO-ZABHA-LABEL: atomicrmw_max_i8_seq_cst:
+; RV32IA-TSO-ZABHA: # %bb.0:
+; RV32IA-TSO-ZABHA-NEXT: amomax.b a0, a1, (a0)
+; RV32IA-TSO-ZABHA-NEXT: ret
+;
; RV64IA-WMO-ZABHA-LABEL: atomicrmw_max_i8_seq_cst:
; RV64IA-WMO-ZABHA: # %bb.0:
; RV64IA-WMO-ZABHA-NEXT: amomax.b.aqrl a0, a1, (a0)
@@ -6573,34 +8789,34 @@ define i8 @atomicrmw_min_i8_monotonic(ptr %a, i8 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 32
; RV32I-NEXT: ret
;
-; RV32IA-LABEL: atomicrmw_min_i8_monotonic:
-; RV32IA: # %bb.0:
-; RV32IA-NEXT: andi a2, a0, -4
-; RV32IA-NEXT: slli a0, a0, 3
-; RV32IA-NEXT: li a3, 255
-; RV32IA-NEXT: slli a1, a1, 24
-; RV32IA-NEXT: andi a4, a0, 24
-; RV32IA-NEXT: sll a3, a3, a0
-; RV32IA-NEXT: srai a1, a1, 24
-; RV32IA-NEXT: sll a1, a1, a0
-; RV32IA-NEXT: xori a4, a4, 24
-; RV32IA-NEXT: .LBB50_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-NEXT: lr.w a5, (a2)
-; RV32IA-NEXT: and a7, a5, a3
-; RV32IA-NEXT: mv a6, a5
-; RV32IA-NEXT: sll a7, a7, a4
-; RV32IA-NEXT: sra a7, a7, a4
-; RV32IA-NEXT: bge a1, a7, .LBB50_3
-; RV32IA-NEXT: # %bb.2: # in Loop: Header=BB50_1 Depth=1
-; RV32IA-NEXT: xor a6, a5, a1
-; RV32IA-NEXT: and a6, a6, a3
-; RV32IA-NEXT: xor a6, a5, a6
-; RV32IA-NEXT: .LBB50_3: # in Loop: Header=BB50_1 Depth=1
-; RV32IA-NEXT: sc.w a6, a6, (a2)
-; RV32IA-NEXT: bnez a6, .LBB50_1
-; RV32IA-NEXT: # %bb.4:
-; RV32IA-NEXT: srl a0, a5, a0
-; RV32IA-NEXT: ret
+; RV32IA-NOZACAS-LABEL: atomicrmw_min_i8_monotonic:
+; RV32IA-NOZACAS: # %bb.0:
+; RV32IA-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-NOZACAS-NEXT: li a3, 255
+; RV32IA-NOZACAS-NEXT: slli a1, a1, 24
+; RV32IA-NOZACAS-NEXT: andi a4, a0, 24
+; RV32IA-NOZACAS-NEXT: sll a3, a3, a0
+; RV32IA-NOZACAS-NEXT: srai a1, a1, 24
+; RV32IA-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-NOZACAS-NEXT: xori a4, a4, 24
+; RV32IA-NOZACAS-NEXT: .LBB50_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-NOZACAS-NEXT: lr.w a5, (a2)
+; RV32IA-NOZACAS-NEXT: and a7, a5, a3
+; RV32IA-NOZACAS-NEXT: mv a6, a5
+; RV32IA-NOZACAS-NEXT: sll a7, a7, a4
+; RV32IA-NOZACAS-NEXT: sra a7, a7, a4
+; RV32IA-NOZACAS-NEXT: bge a1, a7, .LBB50_3
+; RV32IA-NOZACAS-NEXT: # %bb.2: # in Loop: Header=BB50_1 Depth=1
+; RV32IA-NOZACAS-NEXT: xor a6, a5, a1
+; RV32IA-NOZACAS-NEXT: and a6, a6, a3
+; RV32IA-NOZACAS-NEXT: xor a6, a5, a6
+; RV32IA-NOZACAS-NEXT: .LBB50_3: # in Loop: Header=BB50_1 Depth=1
+; RV32IA-NOZACAS-NEXT: sc.w a6, a6, (a2)
+; RV32IA-NOZACAS-NEXT: bnez a6, .LBB50_1
+; RV32IA-NOZACAS-NEXT: # %bb.4:
+; RV32IA-NOZACAS-NEXT: srl a0, a5, a0
+; RV32IA-NOZACAS-NEXT: ret
;
; RV64I-LABEL: atomicrmw_min_i8_monotonic:
; RV64I: # %bb.0:
@@ -6673,6 +8889,35 @@ define i8 @atomicrmw_min_i8_monotonic(ptr %a, i8 %b) nounwind {
; RV64IA-NOZACAS-NEXT: srlw a0, a5, a0
; RV64IA-NOZACAS-NEXT: ret
;
+; RV32IA-ZACAS-LABEL: atomicrmw_min_i8_monotonic:
+; RV32IA-ZACAS: # %bb.0:
+; RV32IA-ZACAS-NEXT: andi a2, a0, -4
+; RV32IA-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-ZACAS-NEXT: li a3, 255
+; RV32IA-ZACAS-NEXT: slli a1, a1, 24
+; RV32IA-ZACAS-NEXT: andi a4, a0, 24
+; RV32IA-ZACAS-NEXT: sll a3, a3, a0
+; RV32IA-ZACAS-NEXT: srai a1, a1, 24
+; RV32IA-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-ZACAS-NEXT: xori a4, a4, 24
+; RV32IA-ZACAS-NEXT: .LBB50_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-ZACAS-NEXT: lr.w a5, (a2)
+; RV32IA-ZACAS-NEXT: and a7, a5, a3
+; RV32IA-ZACAS-NEXT: mv a6, a5
+; RV32IA-ZACAS-NEXT: sll a7, a7, a4
+; RV32IA-ZACAS-NEXT: sra a7, a7, a4
+; RV32IA-ZACAS-NEXT: bge a1, a7, .LBB50_3
+; RV32IA-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB50_1 Depth=1
+; RV32IA-ZACAS-NEXT: xor a6, a5, a1
+; RV32IA-ZACAS-NEXT: and a6, a6, a3
+; RV32IA-ZACAS-NEXT: xor a6, a5, a6
+; RV32IA-ZACAS-NEXT: .LBB50_3: # in Loop: Header=BB50_1 Depth=1
+; RV32IA-ZACAS-NEXT: sc.w a6, a6, (a2)
+; RV32IA-ZACAS-NEXT: bnez a6, .LBB50_1
+; RV32IA-ZACAS-NEXT: # %bb.4:
+; RV32IA-ZACAS-NEXT: srl a0, a5, a0
+; RV32IA-ZACAS-NEXT: ret
+;
; RV64IA-ZACAS-LABEL: atomicrmw_min_i8_monotonic:
; RV64IA-ZACAS: # %bb.0:
; RV64IA-ZACAS-NEXT: andi a2, a0, -4
@@ -6702,6 +8947,16 @@ define i8 @atomicrmw_min_i8_monotonic(ptr %a, i8 %b) nounwind {
; RV64IA-ZACAS-NEXT: srlw a0, a5, a0
; RV64IA-ZACAS-NEXT: ret
;
+; RV32IA-WMO-ZABHA-LABEL: atomicrmw_min_i8_monotonic:
+; RV32IA-WMO-ZABHA: # %bb.0:
+; RV32IA-WMO-ZABHA-NEXT: amomin.b a0, a1, (a0)
+; RV32IA-WMO-ZABHA-NEXT: ret
+;
+; RV32IA-TSO-ZABHA-LABEL: atomicrmw_min_i8_monotonic:
+; RV32IA-TSO-ZABHA: # %bb.0:
+; RV32IA-TSO-ZABHA-NEXT: amomin.b a0, a1, (a0)
+; RV32IA-TSO-ZABHA-NEXT: ret
+;
; RV64IA-WMO-ZABHA-LABEL: atomicrmw_min_i8_monotonic:
; RV64IA-WMO-ZABHA: # %bb.0:
; RV64IA-WMO-ZABHA-NEXT: amomin.b a0, a1, (a0)
@@ -6758,63 +9013,63 @@ define i8 @atomicrmw_min_i8_acquire(ptr %a, i8 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 32
; RV32I-NEXT: ret
;
-; RV32IA-WMO-LABEL: atomicrmw_min_i8_acquire:
-; RV32IA-WMO: # %bb.0:
-; RV32IA-WMO-NEXT: andi a2, a0, -4
-; RV32IA-WMO-NEXT: slli a0, a0, 3
-; RV32IA-WMO-NEXT: li a3, 255
-; RV32IA-WMO-NEXT: slli a1, a1, 24
-; RV32IA-WMO-NEXT: andi a4, a0, 24
-; RV32IA-WMO-NEXT: sll a3, a3, a0
-; RV32IA-WMO-NEXT: srai a1, a1, 24
-; RV32IA-WMO-NEXT: sll a1, a1, a0
-; RV32IA-WMO-NEXT: xori a4, a4, 24
-; RV32IA-WMO-NEXT: .LBB51_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-WMO-NEXT: lr.w.aq a5, (a2)
-; RV32IA-WMO-NEXT: and a7, a5, a3
-; RV32IA-WMO-NEXT: mv a6, a5
-; RV32IA-WMO-NEXT: sll a7, a7, a4
-; RV32IA-WMO-NEXT: sra a7, a7, a4
-; RV32IA-WMO-NEXT: bge a1, a7, .LBB51_3
-; RV32IA-WMO-NEXT: # %bb.2: # in Loop: Header=BB51_1 Depth=1
-; RV32IA-WMO-NEXT: xor a6, a5, a1
-; RV32IA-WMO-NEXT: and a6, a6, a3
-; RV32IA-WMO-NEXT: xor a6, a5, a6
-; RV32IA-WMO-NEXT: .LBB51_3: # in Loop: Header=BB51_1 Depth=1
-; RV32IA-WMO-NEXT: sc.w a6, a6, (a2)
-; RV32IA-WMO-NEXT: bnez a6, .LBB51_1
-; RV32IA-WMO-NEXT: # %bb.4:
-; RV32IA-WMO-NEXT: srl a0, a5, a0
-; RV32IA-WMO-NEXT: ret
+; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_min_i8_acquire:
+; RV32IA-WMO-NOZACAS: # %bb.0:
+; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-WMO-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-NOZACAS-NEXT: li a3, 255
+; RV32IA-WMO-NOZACAS-NEXT: slli a1, a1, 24
+; RV32IA-WMO-NOZACAS-NEXT: andi a4, a0, 24
+; RV32IA-WMO-NOZACAS-NEXT: sll a3, a3, a0
+; RV32IA-WMO-NOZACAS-NEXT: srai a1, a1, 24
+; RV32IA-WMO-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-WMO-NOZACAS-NEXT: xori a4, a4, 24
+; RV32IA-WMO-NOZACAS-NEXT: .LBB51_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-NOZACAS-NEXT: lr.w.aq a5, (a2)
+; RV32IA-WMO-NOZACAS-NEXT: and a7, a5, a3
+; RV32IA-WMO-NOZACAS-NEXT: mv a6, a5
+; RV32IA-WMO-NOZACAS-NEXT: sll a7, a7, a4
+; RV32IA-WMO-NOZACAS-NEXT: sra a7, a7, a4
+; RV32IA-WMO-NOZACAS-NEXT: bge a1, a7, .LBB51_3
+; RV32IA-WMO-NOZACAS-NEXT: # %bb.2: # in Loop: Header=BB51_1 Depth=1
+; RV32IA-WMO-NOZACAS-NEXT: xor a6, a5, a1
+; RV32IA-WMO-NOZACAS-NEXT: and a6, a6, a3
+; RV32IA-WMO-NOZACAS-NEXT: xor a6, a5, a6
+; RV32IA-WMO-NOZACAS-NEXT: .LBB51_3: # in Loop: Header=BB51_1 Depth=1
+; RV32IA-WMO-NOZACAS-NEXT: sc.w a6, a6, (a2)
+; RV32IA-WMO-NOZACAS-NEXT: bnez a6, .LBB51_1
+; RV32IA-WMO-NOZACAS-NEXT: # %bb.4:
+; RV32IA-WMO-NOZACAS-NEXT: srl a0, a5, a0
+; RV32IA-WMO-NOZACAS-NEXT: ret
;
-; RV32IA-TSO-LABEL: atomicrmw_min_i8_acquire:
-; RV32IA-TSO: # %bb.0:
-; RV32IA-TSO-NEXT: andi a2, a0, -4
-; RV32IA-TSO-NEXT: slli a0, a0, 3
-; RV32IA-TSO-NEXT: li a3, 255
-; RV32IA-TSO-NEXT: slli a1, a1, 24
-; RV32IA-TSO-NEXT: andi a4, a0, 24
-; RV32IA-TSO-NEXT: sll a3, a3, a0
-; RV32IA-TSO-NEXT: srai a1, a1, 24
-; RV32IA-TSO-NEXT: sll a1, a1, a0
-; RV32IA-TSO-NEXT: xori a4, a4, 24
-; RV32IA-TSO-NEXT: .LBB51_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-TSO-NEXT: lr.w a5, (a2)
-; RV32IA-TSO-NEXT: and a7, a5, a3
-; RV32IA-TSO-NEXT: mv a6, a5
-; RV32IA-TSO-NEXT: sll a7, a7, a4
-; RV32IA-TSO-NEXT: sra a7, a7, a4
-; RV32IA-TSO-NEXT: bge a1, a7, .LBB51_3
-; RV32IA-TSO-NEXT: # %bb.2: # in Loop: Header=BB51_1 Depth=1
-; RV32IA-TSO-NEXT: xor a6, a5, a1
-; RV32IA-TSO-NEXT: and a6, a6, a3
-; RV32IA-TSO-NEXT: xor a6, a5, a6
-; RV32IA-TSO-NEXT: .LBB51_3: # in Loop: Header=BB51_1 Depth=1
-; RV32IA-TSO-NEXT: sc.w a6, a6, (a2)
-; RV32IA-TSO-NEXT: bnez a6, .LBB51_1
-; RV32IA-TSO-NEXT: # %bb.4:
-; RV32IA-TSO-NEXT: srl a0, a5, a0
-; RV32IA-TSO-NEXT: ret
+; RV32IA-TSO-NOZACAS-LABEL: atomicrmw_min_i8_acquire:
+; RV32IA-TSO-NOZACAS: # %bb.0:
+; RV32IA-TSO-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-TSO-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-NOZACAS-NEXT: li a3, 255
+; RV32IA-TSO-NOZACAS-NEXT: slli a1, a1, 24
+; RV32IA-TSO-NOZACAS-NEXT: andi a4, a0, 24
+; RV32IA-TSO-NOZACAS-NEXT: sll a3, a3, a0
+; RV32IA-TSO-NOZACAS-NEXT: srai a1, a1, 24
+; RV32IA-TSO-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-TSO-NOZACAS-NEXT: xori a4, a4, 24
+; RV32IA-TSO-NOZACAS-NEXT: .LBB51_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-NOZACAS-NEXT: lr.w a5, (a2)
+; RV32IA-TSO-NOZACAS-NEXT: and a7, a5, a3
+; RV32IA-TSO-NOZACAS-NEXT: mv a6, a5
+; RV32IA-TSO-NOZACAS-NEXT: sll a7, a7, a4
+; RV32IA-TSO-NOZACAS-NEXT: sra a7, a7, a4
+; RV32IA-TSO-NOZACAS-NEXT: bge a1, a7, .LBB51_3
+; RV32IA-TSO-NOZACAS-NEXT: # %bb.2: # in Loop: Header=BB51_1 Depth=1
+; RV32IA-TSO-NOZACAS-NEXT: xor a6, a5, a1
+; RV32IA-TSO-NOZACAS-NEXT: and a6, a6, a3
+; RV32IA-TSO-NOZACAS-NEXT: xor a6, a5, a6
+; RV32IA-TSO-NOZACAS-NEXT: .LBB51_3: # in Loop: Header=BB51_1 Depth=1
+; RV32IA-TSO-NOZACAS-NEXT: sc.w a6, a6, (a2)
+; RV32IA-TSO-NOZACAS-NEXT: bnez a6, .LBB51_1
+; RV32IA-TSO-NOZACAS-NEXT: # %bb.4:
+; RV32IA-TSO-NOZACAS-NEXT: srl a0, a5, a0
+; RV32IA-TSO-NOZACAS-NEXT: ret
;
; RV64I-LABEL: atomicrmw_min_i8_acquire:
; RV64I: # %bb.0:
@@ -6916,6 +9171,64 @@ define i8 @atomicrmw_min_i8_acquire(ptr %a, i8 %b) nounwind {
; RV64IA-TSO-NOZACAS-NEXT: srlw a0, a5, a0
; RV64IA-TSO-NOZACAS-NEXT: ret
;
+; RV32IA-WMO-ZACAS-LABEL: atomicrmw_min_i8_acquire:
+; RV32IA-WMO-ZACAS: # %bb.0:
+; RV32IA-WMO-ZACAS-NEXT: andi a2, a0, -4
+; RV32IA-WMO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-ZACAS-NEXT: li a3, 255
+; RV32IA-WMO-ZACAS-NEXT: slli a1, a1, 24
+; RV32IA-WMO-ZACAS-NEXT: andi a4, a0, 24
+; RV32IA-WMO-ZACAS-NEXT: sll a3, a3, a0
+; RV32IA-WMO-ZACAS-NEXT: srai a1, a1, 24
+; RV32IA-WMO-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-WMO-ZACAS-NEXT: xori a4, a4, 24
+; RV32IA-WMO-ZACAS-NEXT: .LBB51_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-ZACAS-NEXT: lr.w.aq a5, (a2)
+; RV32IA-WMO-ZACAS-NEXT: and a7, a5, a3
+; RV32IA-WMO-ZACAS-NEXT: mv a6, a5
+; RV32IA-WMO-ZACAS-NEXT: sll a7, a7, a4
+; RV32IA-WMO-ZACAS-NEXT: sra a7, a7, a4
+; RV32IA-WMO-ZACAS-NEXT: bge a1, a7, .LBB51_3
+; RV32IA-WMO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB51_1 Depth=1
+; RV32IA-WMO-ZACAS-NEXT: xor a6, a5, a1
+; RV32IA-WMO-ZACAS-NEXT: and a6, a6, a3
+; RV32IA-WMO-ZACAS-NEXT: xor a6, a5, a6
+; RV32IA-WMO-ZACAS-NEXT: .LBB51_3: # in Loop: Header=BB51_1 Depth=1
+; RV32IA-WMO-ZACAS-NEXT: sc.w a6, a6, (a2)
+; RV32IA-WMO-ZACAS-NEXT: bnez a6, .LBB51_1
+; RV32IA-WMO-ZACAS-NEXT: # %bb.4:
+; RV32IA-WMO-ZACAS-NEXT: srl a0, a5, a0
+; RV32IA-WMO-ZACAS-NEXT: ret
+;
+; RV32IA-TSO-ZACAS-LABEL: atomicrmw_min_i8_acquire:
+; RV32IA-TSO-ZACAS: # %bb.0:
+; RV32IA-TSO-ZACAS-NEXT: andi a2, a0, -4
+; RV32IA-TSO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-ZACAS-NEXT: li a3, 255
+; RV32IA-TSO-ZACAS-NEXT: slli a1, a1, 24
+; RV32IA-TSO-ZACAS-NEXT: andi a4, a0, 24
+; RV32IA-TSO-ZACAS-NEXT: sll a3, a3, a0
+; RV32IA-TSO-ZACAS-NEXT: srai a1, a1, 24
+; RV32IA-TSO-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-TSO-ZACAS-NEXT: xori a4, a4, 24
+; RV32IA-TSO-ZACAS-NEXT: .LBB51_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-ZACAS-NEXT: lr.w a5, (a2)
+; RV32IA-TSO-ZACAS-NEXT: and a7, a5, a3
+; RV32IA-TSO-ZACAS-NEXT: mv a6, a5
+; RV32IA-TSO-ZACAS-NEXT: sll a7, a7, a4
+; RV32IA-TSO-ZACAS-NEXT: sra a7, a7, a4
+; RV32IA-TSO-ZACAS-NEXT: bge a1, a7, .LBB51_3
+; RV32IA-TSO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB51_1 Depth=1
+; RV32IA-TSO-ZACAS-NEXT: xor a6, a5, a1
+; RV32IA-TSO-ZACAS-NEXT: and a6, a6, a3
+; RV32IA-TSO-ZACAS-NEXT: xor a6, a5, a6
+; RV32IA-TSO-ZACAS-NEXT: .LBB51_3: # in Loop: Header=BB51_1 Depth=1
+; RV32IA-TSO-ZACAS-NEXT: sc.w a6, a6, (a2)
+; RV32IA-TSO-ZACAS-NEXT: bnez a6, .LBB51_1
+; RV32IA-TSO-ZACAS-NEXT: # %bb.4:
+; RV32IA-TSO-ZACAS-NEXT: srl a0, a5, a0
+; RV32IA-TSO-ZACAS-NEXT: ret
+;
; RV64IA-WMO-ZACAS-LABEL: atomicrmw_min_i8_acquire:
; RV64IA-WMO-ZACAS: # %bb.0:
; RV64IA-WMO-ZACAS-NEXT: andi a2, a0, -4
@@ -6974,6 +9287,16 @@ define i8 @atomicrmw_min_i8_acquire(ptr %a, i8 %b) nounwind {
; RV64IA-TSO-ZACAS-NEXT: srlw a0, a5, a0
; RV64IA-TSO-ZACAS-NEXT: ret
;
+; RV32IA-WMO-ZABHA-LABEL: atomicrmw_min_i8_acquire:
+; RV32IA-WMO-ZABHA: # %bb.0:
+; RV32IA-WMO-ZABHA-NEXT: amomin.b.aq a0, a1, (a0)
+; RV32IA-WMO-ZABHA-NEXT: ret
+;
+; RV32IA-TSO-ZABHA-LABEL: atomicrmw_min_i8_acquire:
+; RV32IA-TSO-ZABHA: # %bb.0:
+; RV32IA-TSO-ZABHA-NEXT: amomin.b a0, a1, (a0)
+; RV32IA-TSO-ZABHA-NEXT: ret
+;
; RV64IA-WMO-ZABHA-LABEL: atomicrmw_min_i8_acquire:
; RV64IA-WMO-ZABHA: # %bb.0:
; RV64IA-WMO-ZABHA-NEXT: amomin.b.aq a0, a1, (a0)
@@ -7030,63 +9353,63 @@ define i8 @atomicrmw_min_i8_release(ptr %a, i8 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 32
; RV32I-NEXT: ret
;
-; RV32IA-WMO-LABEL: atomicrmw_min_i8_release:
-; RV32IA-WMO: # %bb.0:
-; RV32IA-WMO-NEXT: andi a2, a0, -4
-; RV32IA-WMO-NEXT: slli a0, a0, 3
-; RV32IA-WMO-NEXT: li a3, 255
-; RV32IA-WMO-NEXT: slli a1, a1, 24
-; RV32IA-WMO-NEXT: andi a4, a0, 24
-; RV32IA-WMO-NEXT: sll a3, a3, a0
-; RV32IA-WMO-NEXT: srai a1, a1, 24
-; RV32IA-WMO-NEXT: sll a1, a1, a0
-; RV32IA-WMO-NEXT: xori a4, a4, 24
-; RV32IA-WMO-NEXT: .LBB52_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-WMO-NEXT: lr.w a5, (a2)
-; RV32IA-WMO-NEXT: and a7, a5, a3
-; RV32IA-WMO-NEXT: mv a6, a5
-; RV32IA-WMO-NEXT: sll a7, a7, a4
-; RV32IA-WMO-NEXT: sra a7, a7, a4
-; RV32IA-WMO-NEXT: bge a1, a7, .LBB52_3
-; RV32IA-WMO-NEXT: # %bb.2: # in Loop: Header=BB52_1 Depth=1
-; RV32IA-WMO-NEXT: xor a6, a5, a1
-; RV32IA-WMO-NEXT: and a6, a6, a3
-; RV32IA-WMO-NEXT: xor a6, a5, a6
-; RV32IA-WMO-NEXT: .LBB52_3: # in Loop: Header=BB52_1 Depth=1
-; RV32IA-WMO-NEXT: sc.w.rl a6, a6, (a2)
-; RV32IA-WMO-NEXT: bnez a6, .LBB52_1
-; RV32IA-WMO-NEXT: # %bb.4:
-; RV32IA-WMO-NEXT: srl a0, a5, a0
-; RV32IA-WMO-NEXT: ret
+; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_min_i8_release:
+; RV32IA-WMO-NOZACAS: # %bb.0:
+; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-WMO-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-NOZACAS-NEXT: li a3, 255
+; RV32IA-WMO-NOZACAS-NEXT: slli a1, a1, 24
+; RV32IA-WMO-NOZACAS-NEXT: andi a4, a0, 24
+; RV32IA-WMO-NOZACAS-NEXT: sll a3, a3, a0
+; RV32IA-WMO-NOZACAS-NEXT: srai a1, a1, 24
+; RV32IA-WMO-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-WMO-NOZACAS-NEXT: xori a4, a4, 24
+; RV32IA-WMO-NOZACAS-NEXT: .LBB52_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-NOZACAS-NEXT: lr.w a5, (a2)
+; RV32IA-WMO-NOZACAS-NEXT: and a7, a5, a3
+; RV32IA-WMO-NOZACAS-NEXT: mv a6, a5
+; RV32IA-WMO-NOZACAS-NEXT: sll a7, a7, a4
+; RV32IA-WMO-NOZACAS-NEXT: sra a7, a7, a4
+; RV32IA-WMO-NOZACAS-NEXT: bge a1, a7, .LBB52_3
+; RV32IA-WMO-NOZACAS-NEXT: # %bb.2: # in Loop: Header=BB52_1 Depth=1
+; RV32IA-WMO-NOZACAS-NEXT: xor a6, a5, a1
+; RV32IA-WMO-NOZACAS-NEXT: and a6, a6, a3
+; RV32IA-WMO-NOZACAS-NEXT: xor a6, a5, a6
+; RV32IA-WMO-NOZACAS-NEXT: .LBB52_3: # in Loop: Header=BB52_1 Depth=1
+; RV32IA-WMO-NOZACAS-NEXT: sc.w.rl a6, a6, (a2)
+; RV32IA-WMO-NOZACAS-NEXT: bnez a6, .LBB52_1
+; RV32IA-WMO-NOZACAS-NEXT: # %bb.4:
+; RV32IA-WMO-NOZACAS-NEXT: srl a0, a5, a0
+; RV32IA-WMO-NOZACAS-NEXT: ret
;
-; RV32IA-TSO-LABEL: atomicrmw_min_i8_release:
-; RV32IA-TSO: # %bb.0:
-; RV32IA-TSO-NEXT: andi a2, a0, -4
-; RV32IA-TSO-NEXT: slli a0, a0, 3
-; RV32IA-TSO-NEXT: li a3, 255
-; RV32IA-TSO-NEXT: slli a1, a1, 24
-; RV32IA-TSO-NEXT: andi a4, a0, 24
-; RV32IA-TSO-NEXT: sll a3, a3, a0
-; RV32IA-TSO-NEXT: srai a1, a1, 24
-; RV32IA-TSO-NEXT: sll a1, a1, a0
-; RV32IA-TSO-NEXT: xori a4, a4, 24
-; RV32IA-TSO-NEXT: .LBB52_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-TSO-NEXT: lr.w a5, (a2)
-; RV32IA-TSO-NEXT: and a7, a5, a3
-; RV32IA-TSO-NEXT: mv a6, a5
-; RV32IA-TSO-NEXT: sll a7, a7, a4
-; RV32IA-TSO-NEXT: sra a7, a7, a4
-; RV32IA-TSO-NEXT: bge a1, a7, .LBB52_3
-; RV32IA-TSO-NEXT: # %bb.2: # in Loop: Header=BB52_1 Depth=1
-; RV32IA-TSO-NEXT: xor a6, a5, a1
-; RV32IA-TSO-NEXT: and a6, a6, a3
-; RV32IA-TSO-NEXT: xor a6, a5, a6
-; RV32IA-TSO-NEXT: .LBB52_3: # in Loop: Header=BB52_1 Depth=1
-; RV32IA-TSO-NEXT: sc.w a6, a6, (a2)
-; RV32IA-TSO-NEXT: bnez a6, .LBB52_1
-; RV32IA-TSO-NEXT: # %bb.4:
-; RV32IA-TSO-NEXT: srl a0, a5, a0
-; RV32IA-TSO-NEXT: ret
+; RV32IA-TSO-NOZACAS-LABEL: atomicrmw_min_i8_release:
+; RV32IA-TSO-NOZACAS: # %bb.0:
+; RV32IA-TSO-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-TSO-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-NOZACAS-NEXT: li a3, 255
+; RV32IA-TSO-NOZACAS-NEXT: slli a1, a1, 24
+; RV32IA-TSO-NOZACAS-NEXT: andi a4, a0, 24
+; RV32IA-TSO-NOZACAS-NEXT: sll a3, a3, a0
+; RV32IA-TSO-NOZACAS-NEXT: srai a1, a1, 24
+; RV32IA-TSO-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-TSO-NOZACAS-NEXT: xori a4, a4, 24
+; RV32IA-TSO-NOZACAS-NEXT: .LBB52_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-NOZACAS-NEXT: lr.w a5, (a2)
+; RV32IA-TSO-NOZACAS-NEXT: and a7, a5, a3
+; RV32IA-TSO-NOZACAS-NEXT: mv a6, a5
+; RV32IA-TSO-NOZACAS-NEXT: sll a7, a7, a4
+; RV32IA-TSO-NOZACAS-NEXT: sra a7, a7, a4
+; RV32IA-TSO-NOZACAS-NEXT: bge a1, a7, .LBB52_3
+; RV32IA-TSO-NOZACAS-NEXT: # %bb.2: # in Loop: Header=BB52_1 Depth=1
+; RV32IA-TSO-NOZACAS-NEXT: xor a6, a5, a1
+; RV32IA-TSO-NOZACAS-NEXT: and a6, a6, a3
+; RV32IA-TSO-NOZACAS-NEXT: xor a6, a5, a6
+; RV32IA-TSO-NOZACAS-NEXT: .LBB52_3: # in Loop: Header=BB52_1 Depth=1
+; RV32IA-TSO-NOZACAS-NEXT: sc.w a6, a6, (a2)
+; RV32IA-TSO-NOZACAS-NEXT: bnez a6, .LBB52_1
+; RV32IA-TSO-NOZACAS-NEXT: # %bb.4:
+; RV32IA-TSO-NOZACAS-NEXT: srl a0, a5, a0
+; RV32IA-TSO-NOZACAS-NEXT: ret
;
; RV64I-LABEL: atomicrmw_min_i8_release:
; RV64I: # %bb.0:
@@ -7188,6 +9511,64 @@ define i8 @atomicrmw_min_i8_release(ptr %a, i8 %b) nounwind {
; RV64IA-TSO-NOZACAS-NEXT: srlw a0, a5, a0
; RV64IA-TSO-NOZACAS-NEXT: ret
;
+; RV32IA-WMO-ZACAS-LABEL: atomicrmw_min_i8_release:
+; RV32IA-WMO-ZACAS: # %bb.0:
+; RV32IA-WMO-ZACAS-NEXT: andi a2, a0, -4
+; RV32IA-WMO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-ZACAS-NEXT: li a3, 255
+; RV32IA-WMO-ZACAS-NEXT: slli a1, a1, 24
+; RV32IA-WMO-ZACAS-NEXT: andi a4, a0, 24
+; RV32IA-WMO-ZACAS-NEXT: sll a3, a3, a0
+; RV32IA-WMO-ZACAS-NEXT: srai a1, a1, 24
+; RV32IA-WMO-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-WMO-ZACAS-NEXT: xori a4, a4, 24
+; RV32IA-WMO-ZACAS-NEXT: .LBB52_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-ZACAS-NEXT: lr.w a5, (a2)
+; RV32IA-WMO-ZACAS-NEXT: and a7, a5, a3
+; RV32IA-WMO-ZACAS-NEXT: mv a6, a5
+; RV32IA-WMO-ZACAS-NEXT: sll a7, a7, a4
+; RV32IA-WMO-ZACAS-NEXT: sra a7, a7, a4
+; RV32IA-WMO-ZACAS-NEXT: bge a1, a7, .LBB52_3
+; RV32IA-WMO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB52_1 Depth=1
+; RV32IA-WMO-ZACAS-NEXT: xor a6, a5, a1
+; RV32IA-WMO-ZACAS-NEXT: and a6, a6, a3
+; RV32IA-WMO-ZACAS-NEXT: xor a6, a5, a6
+; RV32IA-WMO-ZACAS-NEXT: .LBB52_3: # in Loop: Header=BB52_1 Depth=1
+; RV32IA-WMO-ZACAS-NEXT: sc.w.rl a6, a6, (a2)
+; RV32IA-WMO-ZACAS-NEXT: bnez a6, .LBB52_1
+; RV32IA-WMO-ZACAS-NEXT: # %bb.4:
+; RV32IA-WMO-ZACAS-NEXT: srl a0, a5, a0
+; RV32IA-WMO-ZACAS-NEXT: ret
+;
+; RV32IA-TSO-ZACAS-LABEL: atomicrmw_min_i8_release:
+; RV32IA-TSO-ZACAS: # %bb.0:
+; RV32IA-TSO-ZACAS-NEXT: andi a2, a0, -4
+; RV32IA-TSO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-ZACAS-NEXT: li a3, 255
+; RV32IA-TSO-ZACAS-NEXT: slli a1, a1, 24
+; RV32IA-TSO-ZACAS-NEXT: andi a4, a0, 24
+; RV32IA-TSO-ZACAS-NEXT: sll a3, a3, a0
+; RV32IA-TSO-ZACAS-NEXT: srai a1, a1, 24
+; RV32IA-TSO-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-TSO-ZACAS-NEXT: xori a4, a4, 24
+; RV32IA-TSO-ZACAS-NEXT: .LBB52_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-ZACAS-NEXT: lr.w a5, (a2)
+; RV32IA-TSO-ZACAS-NEXT: and a7, a5, a3
+; RV32IA-TSO-ZACAS-NEXT: mv a6, a5
+; RV32IA-TSO-ZACAS-NEXT: sll a7, a7, a4
+; RV32IA-TSO-ZACAS-NEXT: sra a7, a7, a4
+; RV32IA-TSO-ZACAS-NEXT: bge a1, a7, .LBB52_3
+; RV32IA-TSO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB52_1 Depth=1
+; RV32IA-TSO-ZACAS-NEXT: xor a6, a5, a1
+; RV32IA-TSO-ZACAS-NEXT: and a6, a6, a3
+; RV32IA-TSO-ZACAS-NEXT: xor a6, a5, a6
+; RV32IA-TSO-ZACAS-NEXT: .LBB52_3: # in Loop: Header=BB52_1 Depth=1
+; RV32IA-TSO-ZACAS-NEXT: sc.w a6, a6, (a2)
+; RV32IA-TSO-ZACAS-NEXT: bnez a6, .LBB52_1
+; RV32IA-TSO-ZACAS-NEXT: # %bb.4:
+; RV32IA-TSO-ZACAS-NEXT: srl a0, a5, a0
+; RV32IA-TSO-ZACAS-NEXT: ret
+;
; RV64IA-WMO-ZACAS-LABEL: atomicrmw_min_i8_release:
; RV64IA-WMO-ZACAS: # %bb.0:
; RV64IA-WMO-ZACAS-NEXT: andi a2, a0, -4
@@ -7246,6 +9627,16 @@ define i8 @atomicrmw_min_i8_release(ptr %a, i8 %b) nounwind {
; RV64IA-TSO-ZACAS-NEXT: srlw a0, a5, a0
; RV64IA-TSO-ZACAS-NEXT: ret
;
+; RV32IA-WMO-ZABHA-LABEL: atomicrmw_min_i8_release:
+; RV32IA-WMO-ZABHA: # %bb.0:
+; RV32IA-WMO-ZABHA-NEXT: amomin.b.rl a0, a1, (a0)
+; RV32IA-WMO-ZABHA-NEXT: ret
+;
+; RV32IA-TSO-ZABHA-LABEL: atomicrmw_min_i8_release:
+; RV32IA-TSO-ZABHA: # %bb.0:
+; RV32IA-TSO-ZABHA-NEXT: amomin.b a0, a1, (a0)
+; RV32IA-TSO-ZABHA-NEXT: ret
+;
; RV64IA-WMO-ZABHA-LABEL: atomicrmw_min_i8_release:
; RV64IA-WMO-ZABHA: # %bb.0:
; RV64IA-WMO-ZABHA-NEXT: amomin.b.rl a0, a1, (a0)
@@ -7302,63 +9693,63 @@ define i8 @atomicrmw_min_i8_acq_rel(ptr %a, i8 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 32
; RV32I-NEXT: ret
;
-; RV32IA-WMO-LABEL: atomicrmw_min_i8_acq_rel:
-; RV32IA-WMO: # %bb.0:
-; RV32IA-WMO-NEXT: andi a2, a0, -4
-; RV32IA-WMO-NEXT: slli a0, a0, 3
-; RV32IA-WMO-NEXT: li a3, 255
-; RV32IA-WMO-NEXT: slli a1, a1, 24
-; RV32IA-WMO-NEXT: andi a4, a0, 24
-; RV32IA-WMO-NEXT: sll a3, a3, a0
-; RV32IA-WMO-NEXT: srai a1, a1, 24
-; RV32IA-WMO-NEXT: sll a1, a1, a0
-; RV32IA-WMO-NEXT: xori a4, a4, 24
-; RV32IA-WMO-NEXT: .LBB53_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-WMO-NEXT: lr.w.aq a5, (a2)
-; RV32IA-WMO-NEXT: and a7, a5, a3
-; RV32IA-WMO-NEXT: mv a6, a5
-; RV32IA-WMO-NEXT: sll a7, a7, a4
-; RV32IA-WMO-NEXT: sra a7, a7, a4
-; RV32IA-WMO-NEXT: bge a1, a7, .LBB53_3
-; RV32IA-WMO-NEXT: # %bb.2: # in Loop: Header=BB53_1 Depth=1
-; RV32IA-WMO-NEXT: xor a6, a5, a1
-; RV32IA-WMO-NEXT: and a6, a6, a3
-; RV32IA-WMO-NEXT: xor a6, a5, a6
-; RV32IA-WMO-NEXT: .LBB53_3: # in Loop: Header=BB53_1 Depth=1
-; RV32IA-WMO-NEXT: sc.w.rl a6, a6, (a2)
-; RV32IA-WMO-NEXT: bnez a6, .LBB53_1
-; RV32IA-WMO-NEXT: # %bb.4:
-; RV32IA-WMO-NEXT: srl a0, a5, a0
-; RV32IA-WMO-NEXT: ret
+; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_min_i8_acq_rel:
+; RV32IA-WMO-NOZACAS: # %bb.0:
+; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-WMO-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-NOZACAS-NEXT: li a3, 255
+; RV32IA-WMO-NOZACAS-NEXT: slli a1, a1, 24
+; RV32IA-WMO-NOZACAS-NEXT: andi a4, a0, 24
+; RV32IA-WMO-NOZACAS-NEXT: sll a3, a3, a0
+; RV32IA-WMO-NOZACAS-NEXT: srai a1, a1, 24
+; RV32IA-WMO-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-WMO-NOZACAS-NEXT: xori a4, a4, 24
+; RV32IA-WMO-NOZACAS-NEXT: .LBB53_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-NOZACAS-NEXT: lr.w.aq a5, (a2)
+; RV32IA-WMO-NOZACAS-NEXT: and a7, a5, a3
+; RV32IA-WMO-NOZACAS-NEXT: mv a6, a5
+; RV32IA-WMO-NOZACAS-NEXT: sll a7, a7, a4
+; RV32IA-WMO-NOZACAS-NEXT: sra a7, a7, a4
+; RV32IA-WMO-NOZACAS-NEXT: bge a1, a7, .LBB53_3
+; RV32IA-WMO-NOZACAS-NEXT: # %bb.2: # in Loop: Header=BB53_1 Depth=1
+; RV32IA-WMO-NOZACAS-NEXT: xor a6, a5, a1
+; RV32IA-WMO-NOZACAS-NEXT: and a6, a6, a3
+; RV32IA-WMO-NOZACAS-NEXT: xor a6, a5, a6
+; RV32IA-WMO-NOZACAS-NEXT: .LBB53_3: # in Loop: Header=BB53_1 Depth=1
+; RV32IA-WMO-NOZACAS-NEXT: sc.w.rl a6, a6, (a2)
+; RV32IA-WMO-NOZACAS-NEXT: bnez a6, .LBB53_1
+; RV32IA-WMO-NOZACAS-NEXT: # %bb.4:
+; RV32IA-WMO-NOZACAS-NEXT: srl a0, a5, a0
+; RV32IA-WMO-NOZACAS-NEXT: ret
;
-; RV32IA-TSO-LABEL: atomicrmw_min_i8_acq_rel:
-; RV32IA-TSO: # %bb.0:
-; RV32IA-TSO-NEXT: andi a2, a0, -4
-; RV32IA-TSO-NEXT: slli a0, a0, 3
-; RV32IA-TSO-NEXT: li a3, 255
-; RV32IA-TSO-NEXT: slli a1, a1, 24
-; RV32IA-TSO-NEXT: andi a4, a0, 24
-; RV32IA-TSO-NEXT: sll a3, a3, a0
-; RV32IA-TSO-NEXT: srai a1, a1, 24
-; RV32IA-TSO-NEXT: sll a1, a1, a0
-; RV32IA-TSO-NEXT: xori a4, a4, 24
-; RV32IA-TSO-NEXT: .LBB53_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-TSO-NEXT: lr.w a5, (a2)
-; RV32IA-TSO-NEXT: and a7, a5, a3
-; RV32IA-TSO-NEXT: mv a6, a5
-; RV32IA-TSO-NEXT: sll a7, a7, a4
-; RV32IA-TSO-NEXT: sra a7, a7, a4
-; RV32IA-TSO-NEXT: bge a1, a7, .LBB53_3
-; RV32IA-TSO-NEXT: # %bb.2: # in Loop: Header=BB53_1 Depth=1
-; RV32IA-TSO-NEXT: xor a6, a5, a1
-; RV32IA-TSO-NEXT: and a6, a6, a3
-; RV32IA-TSO-NEXT: xor a6, a5, a6
-; RV32IA-TSO-NEXT: .LBB53_3: # in Loop: Header=BB53_1 Depth=1
-; RV32IA-TSO-NEXT: sc.w a6, a6, (a2)
-; RV32IA-TSO-NEXT: bnez a6, .LBB53_1
-; RV32IA-TSO-NEXT: # %bb.4:
-; RV32IA-TSO-NEXT: srl a0, a5, a0
-; RV32IA-TSO-NEXT: ret
+; RV32IA-TSO-NOZACAS-LABEL: atomicrmw_min_i8_acq_rel:
+; RV32IA-TSO-NOZACAS: # %bb.0:
+; RV32IA-TSO-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-TSO-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-NOZACAS-NEXT: li a3, 255
+; RV32IA-TSO-NOZACAS-NEXT: slli a1, a1, 24
+; RV32IA-TSO-NOZACAS-NEXT: andi a4, a0, 24
+; RV32IA-TSO-NOZACAS-NEXT: sll a3, a3, a0
+; RV32IA-TSO-NOZACAS-NEXT: srai a1, a1, 24
+; RV32IA-TSO-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-TSO-NOZACAS-NEXT: xori a4, a4, 24
+; RV32IA-TSO-NOZACAS-NEXT: .LBB53_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-NOZACAS-NEXT: lr.w a5, (a2)
+; RV32IA-TSO-NOZACAS-NEXT: and a7, a5, a3
+; RV32IA-TSO-NOZACAS-NEXT: mv a6, a5
+; RV32IA-TSO-NOZACAS-NEXT: sll a7, a7, a4
+; RV32IA-TSO-NOZACAS-NEXT: sra a7, a7, a4
+; RV32IA-TSO-NOZACAS-NEXT: bge a1, a7, .LBB53_3
+; RV32IA-TSO-NOZACAS-NEXT: # %bb.2: # in Loop: Header=BB53_1 Depth=1
+; RV32IA-TSO-NOZACAS-NEXT: xor a6, a5, a1
+; RV32IA-TSO-NOZACAS-NEXT: and a6, a6, a3
+; RV32IA-TSO-NOZACAS-NEXT: xor a6, a5, a6
+; RV32IA-TSO-NOZACAS-NEXT: .LBB53_3: # in Loop: Header=BB53_1 Depth=1
+; RV32IA-TSO-NOZACAS-NEXT: sc.w a6, a6, (a2)
+; RV32IA-TSO-NOZACAS-NEXT: bnez a6, .LBB53_1
+; RV32IA-TSO-NOZACAS-NEXT: # %bb.4:
+; RV32IA-TSO-NOZACAS-NEXT: srl a0, a5, a0
+; RV32IA-TSO-NOZACAS-NEXT: ret
;
; RV64I-LABEL: atomicrmw_min_i8_acq_rel:
; RV64I: # %bb.0:
@@ -7460,6 +9851,64 @@ define i8 @atomicrmw_min_i8_acq_rel(ptr %a, i8 %b) nounwind {
; RV64IA-TSO-NOZACAS-NEXT: srlw a0, a5, a0
; RV64IA-TSO-NOZACAS-NEXT: ret
;
+; RV32IA-WMO-ZACAS-LABEL: atomicrmw_min_i8_acq_rel:
+; RV32IA-WMO-ZACAS: # %bb.0:
+; RV32IA-WMO-ZACAS-NEXT: andi a2, a0, -4
+; RV32IA-WMO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-ZACAS-NEXT: li a3, 255
+; RV32IA-WMO-ZACAS-NEXT: slli a1, a1, 24
+; RV32IA-WMO-ZACAS-NEXT: andi a4, a0, 24
+; RV32IA-WMO-ZACAS-NEXT: sll a3, a3, a0
+; RV32IA-WMO-ZACAS-NEXT: srai a1, a1, 24
+; RV32IA-WMO-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-WMO-ZACAS-NEXT: xori a4, a4, 24
+; RV32IA-WMO-ZACAS-NEXT: .LBB53_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-ZACAS-NEXT: lr.w.aq a5, (a2)
+; RV32IA-WMO-ZACAS-NEXT: and a7, a5, a3
+; RV32IA-WMO-ZACAS-NEXT: mv a6, a5
+; RV32IA-WMO-ZACAS-NEXT: sll a7, a7, a4
+; RV32IA-WMO-ZACAS-NEXT: sra a7, a7, a4
+; RV32IA-WMO-ZACAS-NEXT: bge a1, a7, .LBB53_3
+; RV32IA-WMO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB53_1 Depth=1
+; RV32IA-WMO-ZACAS-NEXT: xor a6, a5, a1
+; RV32IA-WMO-ZACAS-NEXT: and a6, a6, a3
+; RV32IA-WMO-ZACAS-NEXT: xor a6, a5, a6
+; RV32IA-WMO-ZACAS-NEXT: .LBB53_3: # in Loop: Header=BB53_1 Depth=1
+; RV32IA-WMO-ZACAS-NEXT: sc.w.rl a6, a6, (a2)
+; RV32IA-WMO-ZACAS-NEXT: bnez a6, .LBB53_1
+; RV32IA-WMO-ZACAS-NEXT: # %bb.4:
+; RV32IA-WMO-ZACAS-NEXT: srl a0, a5, a0
+; RV32IA-WMO-ZACAS-NEXT: ret
+;
+; RV32IA-TSO-ZACAS-LABEL: atomicrmw_min_i8_acq_rel:
+; RV32IA-TSO-ZACAS: # %bb.0:
+; RV32IA-TSO-ZACAS-NEXT: andi a2, a0, -4
+; RV32IA-TSO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-ZACAS-NEXT: li a3, 255
+; RV32IA-TSO-ZACAS-NEXT: slli a1, a1, 24
+; RV32IA-TSO-ZACAS-NEXT: andi a4, a0, 24
+; RV32IA-TSO-ZACAS-NEXT: sll a3, a3, a0
+; RV32IA-TSO-ZACAS-NEXT: srai a1, a1, 24
+; RV32IA-TSO-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-TSO-ZACAS-NEXT: xori a4, a4, 24
+; RV32IA-TSO-ZACAS-NEXT: .LBB53_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-ZACAS-NEXT: lr.w a5, (a2)
+; RV32IA-TSO-ZACAS-NEXT: and a7, a5, a3
+; RV32IA-TSO-ZACAS-NEXT: mv a6, a5
+; RV32IA-TSO-ZACAS-NEXT: sll a7, a7, a4
+; RV32IA-TSO-ZACAS-NEXT: sra a7, a7, a4
+; RV32IA-TSO-ZACAS-NEXT: bge a1, a7, .LBB53_3
+; RV32IA-TSO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB53_1 Depth=1
+; RV32IA-TSO-ZACAS-NEXT: xor a6, a5, a1
+; RV32IA-TSO-ZACAS-NEXT: and a6, a6, a3
+; RV32IA-TSO-ZACAS-NEXT: xor a6, a5, a6
+; RV32IA-TSO-ZACAS-NEXT: .LBB53_3: # in Loop: Header=BB53_1 Depth=1
+; RV32IA-TSO-ZACAS-NEXT: sc.w a6, a6, (a2)
+; RV32IA-TSO-ZACAS-NEXT: bnez a6, .LBB53_1
+; RV32IA-TSO-ZACAS-NEXT: # %bb.4:
+; RV32IA-TSO-ZACAS-NEXT: srl a0, a5, a0
+; RV32IA-TSO-ZACAS-NEXT: ret
+;
; RV64IA-WMO-ZACAS-LABEL: atomicrmw_min_i8_acq_rel:
; RV64IA-WMO-ZACAS: # %bb.0:
; RV64IA-WMO-ZACAS-NEXT: andi a2, a0, -4
@@ -7518,6 +9967,16 @@ define i8 @atomicrmw_min_i8_acq_rel(ptr %a, i8 %b) nounwind {
; RV64IA-TSO-ZACAS-NEXT: srlw a0, a5, a0
; RV64IA-TSO-ZACAS-NEXT: ret
;
+; RV32IA-WMO-ZABHA-LABEL: atomicrmw_min_i8_acq_rel:
+; RV32IA-WMO-ZABHA: # %bb.0:
+; RV32IA-WMO-ZABHA-NEXT: amomin.b.aqrl a0, a1, (a0)
+; RV32IA-WMO-ZABHA-NEXT: ret
+;
+; RV32IA-TSO-ZABHA-LABEL: atomicrmw_min_i8_acq_rel:
+; RV32IA-TSO-ZABHA: # %bb.0:
+; RV32IA-TSO-ZABHA-NEXT: amomin.b a0, a1, (a0)
+; RV32IA-TSO-ZABHA-NEXT: ret
+;
; RV64IA-WMO-ZABHA-LABEL: atomicrmw_min_i8_acq_rel:
; RV64IA-WMO-ZABHA: # %bb.0:
; RV64IA-WMO-ZABHA-NEXT: amomin.b.aqrl a0, a1, (a0)
@@ -7574,34 +10033,34 @@ define i8 @atomicrmw_min_i8_seq_cst(ptr %a, i8 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 32
; RV32I-NEXT: ret
;
-; RV32IA-LABEL: atomicrmw_min_i8_seq_cst:
-; RV32IA: # %bb.0:
-; RV32IA-NEXT: andi a2, a0, -4
-; RV32IA-NEXT: slli a0, a0, 3
-; RV32IA-NEXT: li a3, 255
-; RV32IA-NEXT: slli a1, a1, 24
-; RV32IA-NEXT: andi a4, a0, 24
-; RV32IA-NEXT: sll a3, a3, a0
-; RV32IA-NEXT: srai a1, a1, 24
-; RV32IA-NEXT: sll a1, a1, a0
-; RV32IA-NEXT: xori a4, a4, 24
-; RV32IA-NEXT: .LBB54_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-NEXT: lr.w.aqrl a5, (a2)
-; RV32IA-NEXT: and a7, a5, a3
-; RV32IA-NEXT: mv a6, a5
-; RV32IA-NEXT: sll a7, a7, a4
-; RV32IA-NEXT: sra a7, a7, a4
-; RV32IA-NEXT: bge a1, a7, .LBB54_3
-; RV32IA-NEXT: # %bb.2: # in Loop: Header=BB54_1 Depth=1
-; RV32IA-NEXT: xor a6, a5, a1
-; RV32IA-NEXT: and a6, a6, a3
-; RV32IA-NEXT: xor a6, a5, a6
-; RV32IA-NEXT: .LBB54_3: # in Loop: Header=BB54_1 Depth=1
-; RV32IA-NEXT: sc.w.rl a6, a6, (a2)
-; RV32IA-NEXT: bnez a6, .LBB54_1
-; RV32IA-NEXT: # %bb.4:
-; RV32IA-NEXT: srl a0, a5, a0
-; RV32IA-NEXT: ret
+; RV32IA-NOZACAS-LABEL: atomicrmw_min_i8_seq_cst:
+; RV32IA-NOZACAS: # %bb.0:
+; RV32IA-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-NOZACAS-NEXT: li a3, 255
+; RV32IA-NOZACAS-NEXT: slli a1, a1, 24
+; RV32IA-NOZACAS-NEXT: andi a4, a0, 24
+; RV32IA-NOZACAS-NEXT: sll a3, a3, a0
+; RV32IA-NOZACAS-NEXT: srai a1, a1, 24
+; RV32IA-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-NOZACAS-NEXT: xori a4, a4, 24
+; RV32IA-NOZACAS-NEXT: .LBB54_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-NOZACAS-NEXT: lr.w.aqrl a5, (a2)
+; RV32IA-NOZACAS-NEXT: and a7, a5, a3
+; RV32IA-NOZACAS-NEXT: mv a6, a5
+; RV32IA-NOZACAS-NEXT: sll a7, a7, a4
+; RV32IA-NOZACAS-NEXT: sra a7, a7, a4
+; RV32IA-NOZACAS-NEXT: bge a1, a7, .LBB54_3
+; RV32IA-NOZACAS-NEXT: # %bb.2: # in Loop: Header=BB54_1 Depth=1
+; RV32IA-NOZACAS-NEXT: xor a6, a5, a1
+; RV32IA-NOZACAS-NEXT: and a6, a6, a3
+; RV32IA-NOZACAS-NEXT: xor a6, a5, a6
+; RV32IA-NOZACAS-NEXT: .LBB54_3: # in Loop: Header=BB54_1 Depth=1
+; RV32IA-NOZACAS-NEXT: sc.w.rl a6, a6, (a2)
+; RV32IA-NOZACAS-NEXT: bnez a6, .LBB54_1
+; RV32IA-NOZACAS-NEXT: # %bb.4:
+; RV32IA-NOZACAS-NEXT: srl a0, a5, a0
+; RV32IA-NOZACAS-NEXT: ret
;
; RV64I-LABEL: atomicrmw_min_i8_seq_cst:
; RV64I: # %bb.0:
@@ -7674,6 +10133,35 @@ define i8 @atomicrmw_min_i8_seq_cst(ptr %a, i8 %b) nounwind {
; RV64IA-NOZACAS-NEXT: srlw a0, a5, a0
; RV64IA-NOZACAS-NEXT: ret
;
+; RV32IA-ZACAS-LABEL: atomicrmw_min_i8_seq_cst:
+; RV32IA-ZACAS: # %bb.0:
+; RV32IA-ZACAS-NEXT: andi a2, a0, -4
+; RV32IA-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-ZACAS-NEXT: li a3, 255
+; RV32IA-ZACAS-NEXT: slli a1, a1, 24
+; RV32IA-ZACAS-NEXT: andi a4, a0, 24
+; RV32IA-ZACAS-NEXT: sll a3, a3, a0
+; RV32IA-ZACAS-NEXT: srai a1, a1, 24
+; RV32IA-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-ZACAS-NEXT: xori a4, a4, 24
+; RV32IA-ZACAS-NEXT: .LBB54_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-ZACAS-NEXT: lr.w.aqrl a5, (a2)
+; RV32IA-ZACAS-NEXT: and a7, a5, a3
+; RV32IA-ZACAS-NEXT: mv a6, a5
+; RV32IA-ZACAS-NEXT: sll a7, a7, a4
+; RV32IA-ZACAS-NEXT: sra a7, a7, a4
+; RV32IA-ZACAS-NEXT: bge a1, a7, .LBB54_3
+; RV32IA-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB54_1 Depth=1
+; RV32IA-ZACAS-NEXT: xor a6, a5, a1
+; RV32IA-ZACAS-NEXT: and a6, a6, a3
+; RV32IA-ZACAS-NEXT: xor a6, a5, a6
+; RV32IA-ZACAS-NEXT: .LBB54_3: # in Loop: Header=BB54_1 Depth=1
+; RV32IA-ZACAS-NEXT: sc.w.rl a6, a6, (a2)
+; RV32IA-ZACAS-NEXT: bnez a6, .LBB54_1
+; RV32IA-ZACAS-NEXT: # %bb.4:
+; RV32IA-ZACAS-NEXT: srl a0, a5, a0
+; RV32IA-ZACAS-NEXT: ret
+;
; RV64IA-ZACAS-LABEL: atomicrmw_min_i8_seq_cst:
; RV64IA-ZACAS: # %bb.0:
; RV64IA-ZACAS-NEXT: andi a2, a0, -4
@@ -7703,6 +10191,16 @@ define i8 @atomicrmw_min_i8_seq_cst(ptr %a, i8 %b) nounwind {
; RV64IA-ZACAS-NEXT: srlw a0, a5, a0
; RV64IA-ZACAS-NEXT: ret
;
+; RV32IA-WMO-ZABHA-LABEL: atomicrmw_min_i8_seq_cst:
+; RV32IA-WMO-ZABHA: # %bb.0:
+; RV32IA-WMO-ZABHA-NEXT: amomin.b.aqrl a0, a1, (a0)
+; RV32IA-WMO-ZABHA-NEXT: ret
+;
+; RV32IA-TSO-ZABHA-LABEL: atomicrmw_min_i8_seq_cst:
+; RV32IA-TSO-ZABHA: # %bb.0:
+; RV32IA-TSO-ZABHA-NEXT: amomin.b a0, a1, (a0)
+; RV32IA-TSO-ZABHA-NEXT: ret
+;
; RV64IA-WMO-ZABHA-LABEL: atomicrmw_min_i8_seq_cst:
; RV64IA-WMO-ZABHA: # %bb.0:
; RV64IA-WMO-ZABHA-NEXT: amomin.b.aqrl a0, a1, (a0)
@@ -7757,29 +10255,29 @@ define i8 @atomicrmw_umax_i8_monotonic(ptr %a, i8 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 32
; RV32I-NEXT: ret
;
-; RV32IA-LABEL: atomicrmw_umax_i8_monotonic:
-; RV32IA: # %bb.0:
-; RV32IA-NEXT: andi a2, a0, -4
-; RV32IA-NEXT: slli a0, a0, 3
-; RV32IA-NEXT: li a3, 255
-; RV32IA-NEXT: zext.b a1, a1
-; RV32IA-NEXT: sll a3, a3, a0
-; RV32IA-NEXT: sll a1, a1, a0
-; RV32IA-NEXT: .LBB55_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-NEXT: lr.w a4, (a2)
-; RV32IA-NEXT: and a6, a4, a3
-; RV32IA-NEXT: mv a5, a4
-; RV32IA-NEXT: bgeu a6, a1, .LBB55_3
-; RV32IA-NEXT: # %bb.2: # in Loop: Header=BB55_1 Depth=1
-; RV32IA-NEXT: xor a5, a4, a1
-; RV32IA-NEXT: and a5, a5, a3
-; RV32IA-NEXT: xor a5, a4, a5
-; RV32IA-NEXT: .LBB55_3: # in Loop: Header=BB55_1 Depth=1
-; RV32IA-NEXT: sc.w a5, a5, (a2)
-; RV32IA-NEXT: bnez a5, .LBB55_1
-; RV32IA-NEXT: # %bb.4:
-; RV32IA-NEXT: srl a0, a4, a0
-; RV32IA-NEXT: ret
+; RV32IA-NOZACAS-LABEL: atomicrmw_umax_i8_monotonic:
+; RV32IA-NOZACAS: # %bb.0:
+; RV32IA-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-NOZACAS-NEXT: li a3, 255
+; RV32IA-NOZACAS-NEXT: zext.b a1, a1
+; RV32IA-NOZACAS-NEXT: sll a3, a3, a0
+; RV32IA-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-NOZACAS-NEXT: .LBB55_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-NOZACAS-NEXT: lr.w a4, (a2)
+; RV32IA-NOZACAS-NEXT: and a6, a4, a3
+; RV32IA-NOZACAS-NEXT: mv a5, a4
+; RV32IA-NOZACAS-NEXT: bgeu a6, a1, .LBB55_3
+; RV32IA-NOZACAS-NEXT: # %bb.2: # in Loop: Header=BB55_1 Depth=1
+; RV32IA-NOZACAS-NEXT: xor a5, a4, a1
+; RV32IA-NOZACAS-NEXT: and a5, a5, a3
+; RV32IA-NOZACAS-NEXT: xor a5, a4, a5
+; RV32IA-NOZACAS-NEXT: .LBB55_3: # in Loop: Header=BB55_1 Depth=1
+; RV32IA-NOZACAS-NEXT: sc.w a5, a5, (a2)
+; RV32IA-NOZACAS-NEXT: bnez a5, .LBB55_1
+; RV32IA-NOZACAS-NEXT: # %bb.4:
+; RV32IA-NOZACAS-NEXT: srl a0, a4, a0
+; RV32IA-NOZACAS-NEXT: ret
;
; RV64I-LABEL: atomicrmw_umax_i8_monotonic:
; RV64I: # %bb.0:
@@ -7845,6 +10343,30 @@ define i8 @atomicrmw_umax_i8_monotonic(ptr %a, i8 %b) nounwind {
; RV64IA-NOZACAS-NEXT: srlw a0, a4, a0
; RV64IA-NOZACAS-NEXT: ret
;
+; RV32IA-ZACAS-LABEL: atomicrmw_umax_i8_monotonic:
+; RV32IA-ZACAS: # %bb.0:
+; RV32IA-ZACAS-NEXT: andi a2, a0, -4
+; RV32IA-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-ZACAS-NEXT: li a3, 255
+; RV32IA-ZACAS-NEXT: zext.b a1, a1
+; RV32IA-ZACAS-NEXT: sll a3, a3, a0
+; RV32IA-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-ZACAS-NEXT: .LBB55_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-ZACAS-NEXT: lr.w a4, (a2)
+; RV32IA-ZACAS-NEXT: and a6, a4, a3
+; RV32IA-ZACAS-NEXT: mv a5, a4
+; RV32IA-ZACAS-NEXT: bgeu a6, a1, .LBB55_3
+; RV32IA-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB55_1 Depth=1
+; RV32IA-ZACAS-NEXT: xor a5, a4, a1
+; RV32IA-ZACAS-NEXT: and a5, a5, a3
+; RV32IA-ZACAS-NEXT: xor a5, a4, a5
+; RV32IA-ZACAS-NEXT: .LBB55_3: # in Loop: Header=BB55_1 Depth=1
+; RV32IA-ZACAS-NEXT: sc.w a5, a5, (a2)
+; RV32IA-ZACAS-NEXT: bnez a5, .LBB55_1
+; RV32IA-ZACAS-NEXT: # %bb.4:
+; RV32IA-ZACAS-NEXT: srl a0, a4, a0
+; RV32IA-ZACAS-NEXT: ret
+;
; RV64IA-ZACAS-LABEL: atomicrmw_umax_i8_monotonic:
; RV64IA-ZACAS: # %bb.0:
; RV64IA-ZACAS-NEXT: andi a2, a0, -4
@@ -7869,6 +10391,16 @@ define i8 @atomicrmw_umax_i8_monotonic(ptr %a, i8 %b) nounwind {
; RV64IA-ZACAS-NEXT: srlw a0, a4, a0
; RV64IA-ZACAS-NEXT: ret
;
+; RV32IA-WMO-ZABHA-LABEL: atomicrmw_umax_i8_monotonic:
+; RV32IA-WMO-ZABHA: # %bb.0:
+; RV32IA-WMO-ZABHA-NEXT: amomaxu.b a0, a1, (a0)
+; RV32IA-WMO-ZABHA-NEXT: ret
+;
+; RV32IA-TSO-ZABHA-LABEL: atomicrmw_umax_i8_monotonic:
+; RV32IA-TSO-ZABHA: # %bb.0:
+; RV32IA-TSO-ZABHA-NEXT: amomaxu.b a0, a1, (a0)
+; RV32IA-TSO-ZABHA-NEXT: ret
+;
; RV64IA-WMO-ZABHA-LABEL: atomicrmw_umax_i8_monotonic:
; RV64IA-WMO-ZABHA: # %bb.0:
; RV64IA-WMO-ZABHA-NEXT: amomaxu.b a0, a1, (a0)
@@ -7923,53 +10455,53 @@ define i8 @atomicrmw_umax_i8_acquire(ptr %a, i8 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 32
; RV32I-NEXT: ret
;
-; RV32IA-WMO-LABEL: atomicrmw_umax_i8_acquire:
-; RV32IA-WMO: # %bb.0:
-; RV32IA-WMO-NEXT: andi a2, a0, -4
-; RV32IA-WMO-NEXT: slli a0, a0, 3
-; RV32IA-WMO-NEXT: li a3, 255
-; RV32IA-WMO-NEXT: zext.b a1, a1
-; RV32IA-WMO-NEXT: sll a3, a3, a0
-; RV32IA-WMO-NEXT: sll a1, a1, a0
-; RV32IA-WMO-NEXT: .LBB56_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-WMO-NEXT: lr.w.aq a4, (a2)
-; RV32IA-WMO-NEXT: and a6, a4, a3
-; RV32IA-WMO-NEXT: mv a5, a4
-; RV32IA-WMO-NEXT: bgeu a6, a1, .LBB56_3
-; RV32IA-WMO-NEXT: # %bb.2: # in Loop: Header=BB56_1 Depth=1
-; RV32IA-WMO-NEXT: xor a5, a4, a1
-; RV32IA-WMO-NEXT: and a5, a5, a3
-; RV32IA-WMO-NEXT: xor a5, a4, a5
-; RV32IA-WMO-NEXT: .LBB56_3: # in Loop: Header=BB56_1 Depth=1
-; RV32IA-WMO-NEXT: sc.w a5, a5, (a2)
-; RV32IA-WMO-NEXT: bnez a5, .LBB56_1
-; RV32IA-WMO-NEXT: # %bb.4:
-; RV32IA-WMO-NEXT: srl a0, a4, a0
-; RV32IA-WMO-NEXT: ret
+; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_umax_i8_acquire:
+; RV32IA-WMO-NOZACAS: # %bb.0:
+; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-WMO-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-NOZACAS-NEXT: li a3, 255
+; RV32IA-WMO-NOZACAS-NEXT: zext.b a1, a1
+; RV32IA-WMO-NOZACAS-NEXT: sll a3, a3, a0
+; RV32IA-WMO-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-WMO-NOZACAS-NEXT: .LBB56_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-NOZACAS-NEXT: lr.w.aq a4, (a2)
+; RV32IA-WMO-NOZACAS-NEXT: and a6, a4, a3
+; RV32IA-WMO-NOZACAS-NEXT: mv a5, a4
+; RV32IA-WMO-NOZACAS-NEXT: bgeu a6, a1, .LBB56_3
+; RV32IA-WMO-NOZACAS-NEXT: # %bb.2: # in Loop: Header=BB56_1 Depth=1
+; RV32IA-WMO-NOZACAS-NEXT: xor a5, a4, a1
+; RV32IA-WMO-NOZACAS-NEXT: and a5, a5, a3
+; RV32IA-WMO-NOZACAS-NEXT: xor a5, a4, a5
+; RV32IA-WMO-NOZACAS-NEXT: .LBB56_3: # in Loop: Header=BB56_1 Depth=1
+; RV32IA-WMO-NOZACAS-NEXT: sc.w a5, a5, (a2)
+; RV32IA-WMO-NOZACAS-NEXT: bnez a5, .LBB56_1
+; RV32IA-WMO-NOZACAS-NEXT: # %bb.4:
+; RV32IA-WMO-NOZACAS-NEXT: srl a0, a4, a0
+; RV32IA-WMO-NOZACAS-NEXT: ret
;
-; RV32IA-TSO-LABEL: atomicrmw_umax_i8_acquire:
-; RV32IA-TSO: # %bb.0:
-; RV32IA-TSO-NEXT: andi a2, a0, -4
-; RV32IA-TSO-NEXT: slli a0, a0, 3
-; RV32IA-TSO-NEXT: li a3, 255
-; RV32IA-TSO-NEXT: zext.b a1, a1
-; RV32IA-TSO-NEXT: sll a3, a3, a0
-; RV32IA-TSO-NEXT: sll a1, a1, a0
-; RV32IA-TSO-NEXT: .LBB56_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-TSO-NEXT: lr.w a4, (a2)
-; RV32IA-TSO-NEXT: and a6, a4, a3
-; RV32IA-TSO-NEXT: mv a5, a4
-; RV32IA-TSO-NEXT: bgeu a6, a1, .LBB56_3
-; RV32IA-TSO-NEXT: # %bb.2: # in Loop: Header=BB56_1 Depth=1
-; RV32IA-TSO-NEXT: xor a5, a4, a1
-; RV32IA-TSO-NEXT: and a5, a5, a3
-; RV32IA-TSO-NEXT: xor a5, a4, a5
-; RV32IA-TSO-NEXT: .LBB56_3: # in Loop: Header=BB56_1 Depth=1
-; RV32IA-TSO-NEXT: sc.w a5, a5, (a2)
-; RV32IA-TSO-NEXT: bnez a5, .LBB56_1
-; RV32IA-TSO-NEXT: # %bb.4:
-; RV32IA-TSO-NEXT: srl a0, a4, a0
-; RV32IA-TSO-NEXT: ret
+; RV32IA-TSO-NOZACAS-LABEL: atomicrmw_umax_i8_acquire:
+; RV32IA-TSO-NOZACAS: # %bb.0:
+; RV32IA-TSO-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-TSO-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-NOZACAS-NEXT: li a3, 255
+; RV32IA-TSO-NOZACAS-NEXT: zext.b a1, a1
+; RV32IA-TSO-NOZACAS-NEXT: sll a3, a3, a0
+; RV32IA-TSO-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-TSO-NOZACAS-NEXT: .LBB56_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-NOZACAS-NEXT: lr.w a4, (a2)
+; RV32IA-TSO-NOZACAS-NEXT: and a6, a4, a3
+; RV32IA-TSO-NOZACAS-NEXT: mv a5, a4
+; RV32IA-TSO-NOZACAS-NEXT: bgeu a6, a1, .LBB56_3
+; RV32IA-TSO-NOZACAS-NEXT: # %bb.2: # in Loop: Header=BB56_1 Depth=1
+; RV32IA-TSO-NOZACAS-NEXT: xor a5, a4, a1
+; RV32IA-TSO-NOZACAS-NEXT: and a5, a5, a3
+; RV32IA-TSO-NOZACAS-NEXT: xor a5, a4, a5
+; RV32IA-TSO-NOZACAS-NEXT: .LBB56_3: # in Loop: Header=BB56_1 Depth=1
+; RV32IA-TSO-NOZACAS-NEXT: sc.w a5, a5, (a2)
+; RV32IA-TSO-NOZACAS-NEXT: bnez a5, .LBB56_1
+; RV32IA-TSO-NOZACAS-NEXT: # %bb.4:
+; RV32IA-TSO-NOZACAS-NEXT: srl a0, a4, a0
+; RV32IA-TSO-NOZACAS-NEXT: ret
;
; RV64I-LABEL: atomicrmw_umax_i8_acquire:
; RV64I: # %bb.0:
@@ -8059,6 +10591,54 @@ define i8 @atomicrmw_umax_i8_acquire(ptr %a, i8 %b) nounwind {
; RV64IA-TSO-NOZACAS-NEXT: srlw a0, a4, a0
; RV64IA-TSO-NOZACAS-NEXT: ret
;
+; RV32IA-WMO-ZACAS-LABEL: atomicrmw_umax_i8_acquire:
+; RV32IA-WMO-ZACAS: # %bb.0:
+; RV32IA-WMO-ZACAS-NEXT: andi a2, a0, -4
+; RV32IA-WMO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-ZACAS-NEXT: li a3, 255
+; RV32IA-WMO-ZACAS-NEXT: zext.b a1, a1
+; RV32IA-WMO-ZACAS-NEXT: sll a3, a3, a0
+; RV32IA-WMO-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-WMO-ZACAS-NEXT: .LBB56_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-ZACAS-NEXT: lr.w.aq a4, (a2)
+; RV32IA-WMO-ZACAS-NEXT: and a6, a4, a3
+; RV32IA-WMO-ZACAS-NEXT: mv a5, a4
+; RV32IA-WMO-ZACAS-NEXT: bgeu a6, a1, .LBB56_3
+; RV32IA-WMO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB56_1 Depth=1
+; RV32IA-WMO-ZACAS-NEXT: xor a5, a4, a1
+; RV32IA-WMO-ZACAS-NEXT: and a5, a5, a3
+; RV32IA-WMO-ZACAS-NEXT: xor a5, a4, a5
+; RV32IA-WMO-ZACAS-NEXT: .LBB56_3: # in Loop: Header=BB56_1 Depth=1
+; RV32IA-WMO-ZACAS-NEXT: sc.w a5, a5, (a2)
+; RV32IA-WMO-ZACAS-NEXT: bnez a5, .LBB56_1
+; RV32IA-WMO-ZACAS-NEXT: # %bb.4:
+; RV32IA-WMO-ZACAS-NEXT: srl a0, a4, a0
+; RV32IA-WMO-ZACAS-NEXT: ret
+;
+; RV32IA-TSO-ZACAS-LABEL: atomicrmw_umax_i8_acquire:
+; RV32IA-TSO-ZACAS: # %bb.0:
+; RV32IA-TSO-ZACAS-NEXT: andi a2, a0, -4
+; RV32IA-TSO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-ZACAS-NEXT: li a3, 255
+; RV32IA-TSO-ZACAS-NEXT: zext.b a1, a1
+; RV32IA-TSO-ZACAS-NEXT: sll a3, a3, a0
+; RV32IA-TSO-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-TSO-ZACAS-NEXT: .LBB56_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-ZACAS-NEXT: lr.w a4, (a2)
+; RV32IA-TSO-ZACAS-NEXT: and a6, a4, a3
+; RV32IA-TSO-ZACAS-NEXT: mv a5, a4
+; RV32IA-TSO-ZACAS-NEXT: bgeu a6, a1, .LBB56_3
+; RV32IA-TSO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB56_1 Depth=1
+; RV32IA-TSO-ZACAS-NEXT: xor a5, a4, a1
+; RV32IA-TSO-ZACAS-NEXT: and a5, a5, a3
+; RV32IA-TSO-ZACAS-NEXT: xor a5, a4, a5
+; RV32IA-TSO-ZACAS-NEXT: .LBB56_3: # in Loop: Header=BB56_1 Depth=1
+; RV32IA-TSO-ZACAS-NEXT: sc.w a5, a5, (a2)
+; RV32IA-TSO-ZACAS-NEXT: bnez a5, .LBB56_1
+; RV32IA-TSO-ZACAS-NEXT: # %bb.4:
+; RV32IA-TSO-ZACAS-NEXT: srl a0, a4, a0
+; RV32IA-TSO-ZACAS-NEXT: ret
+;
; RV64IA-WMO-ZACAS-LABEL: atomicrmw_umax_i8_acquire:
; RV64IA-WMO-ZACAS: # %bb.0:
; RV64IA-WMO-ZACAS-NEXT: andi a2, a0, -4
@@ -8107,6 +10687,16 @@ define i8 @atomicrmw_umax_i8_acquire(ptr %a, i8 %b) nounwind {
; RV64IA-TSO-ZACAS-NEXT: srlw a0, a4, a0
; RV64IA-TSO-ZACAS-NEXT: ret
;
+; RV32IA-WMO-ZABHA-LABEL: atomicrmw_umax_i8_acquire:
+; RV32IA-WMO-ZABHA: # %bb.0:
+; RV32IA-WMO-ZABHA-NEXT: amomaxu.b.aq a0, a1, (a0)
+; RV32IA-WMO-ZABHA-NEXT: ret
+;
+; RV32IA-TSO-ZABHA-LABEL: atomicrmw_umax_i8_acquire:
+; RV32IA-TSO-ZABHA: # %bb.0:
+; RV32IA-TSO-ZABHA-NEXT: amomaxu.b a0, a1, (a0)
+; RV32IA-TSO-ZABHA-NEXT: ret
+;
; RV64IA-WMO-ZABHA-LABEL: atomicrmw_umax_i8_acquire:
; RV64IA-WMO-ZABHA: # %bb.0:
; RV64IA-WMO-ZABHA-NEXT: amomaxu.b.aq a0, a1, (a0)
@@ -8161,53 +10751,53 @@ define i8 @atomicrmw_umax_i8_release(ptr %a, i8 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 32
; RV32I-NEXT: ret
;
-; RV32IA-WMO-LABEL: atomicrmw_umax_i8_release:
-; RV32IA-WMO: # %bb.0:
-; RV32IA-WMO-NEXT: andi a2, a0, -4
-; RV32IA-WMO-NEXT: slli a0, a0, 3
-; RV32IA-WMO-NEXT: li a3, 255
-; RV32IA-WMO-NEXT: zext.b a1, a1
-; RV32IA-WMO-NEXT: sll a3, a3, a0
-; RV32IA-WMO-NEXT: sll a1, a1, a0
-; RV32IA-WMO-NEXT: .LBB57_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-WMO-NEXT: lr.w a4, (a2)
-; RV32IA-WMO-NEXT: and a6, a4, a3
-; RV32IA-WMO-NEXT: mv a5, a4
-; RV32IA-WMO-NEXT: bgeu a6, a1, .LBB57_3
-; RV32IA-WMO-NEXT: # %bb.2: # in Loop: Header=BB57_1 Depth=1
-; RV32IA-WMO-NEXT: xor a5, a4, a1
-; RV32IA-WMO-NEXT: and a5, a5, a3
-; RV32IA-WMO-NEXT: xor a5, a4, a5
-; RV32IA-WMO-NEXT: .LBB57_3: # in Loop: Header=BB57_1 Depth=1
-; RV32IA-WMO-NEXT: sc.w.rl a5, a5, (a2)
-; RV32IA-WMO-NEXT: bnez a5, .LBB57_1
-; RV32IA-WMO-NEXT: # %bb.4:
-; RV32IA-WMO-NEXT: srl a0, a4, a0
-; RV32IA-WMO-NEXT: ret
+; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_umax_i8_release:
+; RV32IA-WMO-NOZACAS: # %bb.0:
+; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-WMO-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-NOZACAS-NEXT: li a3, 255
+; RV32IA-WMO-NOZACAS-NEXT: zext.b a1, a1
+; RV32IA-WMO-NOZACAS-NEXT: sll a3, a3, a0
+; RV32IA-WMO-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-WMO-NOZACAS-NEXT: .LBB57_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-NOZACAS-NEXT: lr.w a4, (a2)
+; RV32IA-WMO-NOZACAS-NEXT: and a6, a4, a3
+; RV32IA-WMO-NOZACAS-NEXT: mv a5, a4
+; RV32IA-WMO-NOZACAS-NEXT: bgeu a6, a1, .LBB57_3
+; RV32IA-WMO-NOZACAS-NEXT: # %bb.2: # in Loop: Header=BB57_1 Depth=1
+; RV32IA-WMO-NOZACAS-NEXT: xor a5, a4, a1
+; RV32IA-WMO-NOZACAS-NEXT: and a5, a5, a3
+; RV32IA-WMO-NOZACAS-NEXT: xor a5, a4, a5
+; RV32IA-WMO-NOZACAS-NEXT: .LBB57_3: # in Loop: Header=BB57_1 Depth=1
+; RV32IA-WMO-NOZACAS-NEXT: sc.w.rl a5, a5, (a2)
+; RV32IA-WMO-NOZACAS-NEXT: bnez a5, .LBB57_1
+; RV32IA-WMO-NOZACAS-NEXT: # %bb.4:
+; RV32IA-WMO-NOZACAS-NEXT: srl a0, a4, a0
+; RV32IA-WMO-NOZACAS-NEXT: ret
;
-; RV32IA-TSO-LABEL: atomicrmw_umax_i8_release:
-; RV32IA-TSO: # %bb.0:
-; RV32IA-TSO-NEXT: andi a2, a0, -4
-; RV32IA-TSO-NEXT: slli a0, a0, 3
-; RV32IA-TSO-NEXT: li a3, 255
-; RV32IA-TSO-NEXT: zext.b a1, a1
-; RV32IA-TSO-NEXT: sll a3, a3, a0
-; RV32IA-TSO-NEXT: sll a1, a1, a0
-; RV32IA-TSO-NEXT: .LBB57_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-TSO-NEXT: lr.w a4, (a2)
-; RV32IA-TSO-NEXT: and a6, a4, a3
-; RV32IA-TSO-NEXT: mv a5, a4
-; RV32IA-TSO-NEXT: bgeu a6, a1, .LBB57_3
-; RV32IA-TSO-NEXT: # %bb.2: # in Loop: Header=BB57_1 Depth=1
-; RV32IA-TSO-NEXT: xor a5, a4, a1
-; RV32IA-TSO-NEXT: and a5, a5, a3
-; RV32IA-TSO-NEXT: xor a5, a4, a5
-; RV32IA-TSO-NEXT: .LBB57_3: # in Loop: Header=BB57_1 Depth=1
-; RV32IA-TSO-NEXT: sc.w a5, a5, (a2)
-; RV32IA-TSO-NEXT: bnez a5, .LBB57_1
-; RV32IA-TSO-NEXT: # %bb.4:
-; RV32IA-TSO-NEXT: srl a0, a4, a0
-; RV32IA-TSO-NEXT: ret
+; RV32IA-TSO-NOZACAS-LABEL: atomicrmw_umax_i8_release:
+; RV32IA-TSO-NOZACAS: # %bb.0:
+; RV32IA-TSO-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-TSO-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-NOZACAS-NEXT: li a3, 255
+; RV32IA-TSO-NOZACAS-NEXT: zext.b a1, a1
+; RV32IA-TSO-NOZACAS-NEXT: sll a3, a3, a0
+; RV32IA-TSO-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-TSO-NOZACAS-NEXT: .LBB57_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-NOZACAS-NEXT: lr.w a4, (a2)
+; RV32IA-TSO-NOZACAS-NEXT: and a6, a4, a3
+; RV32IA-TSO-NOZACAS-NEXT: mv a5, a4
+; RV32IA-TSO-NOZACAS-NEXT: bgeu a6, a1, .LBB57_3
+; RV32IA-TSO-NOZACAS-NEXT: # %bb.2: # in Loop: Header=BB57_1 Depth=1
+; RV32IA-TSO-NOZACAS-NEXT: xor a5, a4, a1
+; RV32IA-TSO-NOZACAS-NEXT: and a5, a5, a3
+; RV32IA-TSO-NOZACAS-NEXT: xor a5, a4, a5
+; RV32IA-TSO-NOZACAS-NEXT: .LBB57_3: # in Loop: Header=BB57_1 Depth=1
+; RV32IA-TSO-NOZACAS-NEXT: sc.w a5, a5, (a2)
+; RV32IA-TSO-NOZACAS-NEXT: bnez a5, .LBB57_1
+; RV32IA-TSO-NOZACAS-NEXT: # %bb.4:
+; RV32IA-TSO-NOZACAS-NEXT: srl a0, a4, a0
+; RV32IA-TSO-NOZACAS-NEXT: ret
;
; RV64I-LABEL: atomicrmw_umax_i8_release:
; RV64I: # %bb.0:
@@ -8297,6 +10887,54 @@ define i8 @atomicrmw_umax_i8_release(ptr %a, i8 %b) nounwind {
; RV64IA-TSO-NOZACAS-NEXT: srlw a0, a4, a0
; RV64IA-TSO-NOZACAS-NEXT: ret
;
+; RV32IA-WMO-ZACAS-LABEL: atomicrmw_umax_i8_release:
+; RV32IA-WMO-ZACAS: # %bb.0:
+; RV32IA-WMO-ZACAS-NEXT: andi a2, a0, -4
+; RV32IA-WMO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-ZACAS-NEXT: li a3, 255
+; RV32IA-WMO-ZACAS-NEXT: zext.b a1, a1
+; RV32IA-WMO-ZACAS-NEXT: sll a3, a3, a0
+; RV32IA-WMO-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-WMO-ZACAS-NEXT: .LBB57_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-ZACAS-NEXT: lr.w a4, (a2)
+; RV32IA-WMO-ZACAS-NEXT: and a6, a4, a3
+; RV32IA-WMO-ZACAS-NEXT: mv a5, a4
+; RV32IA-WMO-ZACAS-NEXT: bgeu a6, a1, .LBB57_3
+; RV32IA-WMO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB57_1 Depth=1
+; RV32IA-WMO-ZACAS-NEXT: xor a5, a4, a1
+; RV32IA-WMO-ZACAS-NEXT: and a5, a5, a3
+; RV32IA-WMO-ZACAS-NEXT: xor a5, a4, a5
+; RV32IA-WMO-ZACAS-NEXT: .LBB57_3: # in Loop: Header=BB57_1 Depth=1
+; RV32IA-WMO-ZACAS-NEXT: sc.w.rl a5, a5, (a2)
+; RV32IA-WMO-ZACAS-NEXT: bnez a5, .LBB57_1
+; RV32IA-WMO-ZACAS-NEXT: # %bb.4:
+; RV32IA-WMO-ZACAS-NEXT: srl a0, a4, a0
+; RV32IA-WMO-ZACAS-NEXT: ret
+;
+; RV32IA-TSO-ZACAS-LABEL: atomicrmw_umax_i8_release:
+; RV32IA-TSO-ZACAS: # %bb.0:
+; RV32IA-TSO-ZACAS-NEXT: andi a2, a0, -4
+; RV32IA-TSO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-ZACAS-NEXT: li a3, 255
+; RV32IA-TSO-ZACAS-NEXT: zext.b a1, a1
+; RV32IA-TSO-ZACAS-NEXT: sll a3, a3, a0
+; RV32IA-TSO-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-TSO-ZACAS-NEXT: .LBB57_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-ZACAS-NEXT: lr.w a4, (a2)
+; RV32IA-TSO-ZACAS-NEXT: and a6, a4, a3
+; RV32IA-TSO-ZACAS-NEXT: mv a5, a4
+; RV32IA-TSO-ZACAS-NEXT: bgeu a6, a1, .LBB57_3
+; RV32IA-TSO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB57_1 Depth=1
+; RV32IA-TSO-ZACAS-NEXT: xor a5, a4, a1
+; RV32IA-TSO-ZACAS-NEXT: and a5, a5, a3
+; RV32IA-TSO-ZACAS-NEXT: xor a5, a4, a5
+; RV32IA-TSO-ZACAS-NEXT: .LBB57_3: # in Loop: Header=BB57_1 Depth=1
+; RV32IA-TSO-ZACAS-NEXT: sc.w a5, a5, (a2)
+; RV32IA-TSO-ZACAS-NEXT: bnez a5, .LBB57_1
+; RV32IA-TSO-ZACAS-NEXT: # %bb.4:
+; RV32IA-TSO-ZACAS-NEXT: srl a0, a4, a0
+; RV32IA-TSO-ZACAS-NEXT: ret
+;
; RV64IA-WMO-ZACAS-LABEL: atomicrmw_umax_i8_release:
; RV64IA-WMO-ZACAS: # %bb.0:
; RV64IA-WMO-ZACAS-NEXT: andi a2, a0, -4
@@ -8345,6 +10983,16 @@ define i8 @atomicrmw_umax_i8_release(ptr %a, i8 %b) nounwind {
; RV64IA-TSO-ZACAS-NEXT: srlw a0, a4, a0
; RV64IA-TSO-ZACAS-NEXT: ret
;
+; RV32IA-WMO-ZABHA-LABEL: atomicrmw_umax_i8_release:
+; RV32IA-WMO-ZABHA: # %bb.0:
+; RV32IA-WMO-ZABHA-NEXT: amomaxu.b.rl a0, a1, (a0)
+; RV32IA-WMO-ZABHA-NEXT: ret
+;
+; RV32IA-TSO-ZABHA-LABEL: atomicrmw_umax_i8_release:
+; RV32IA-TSO-ZABHA: # %bb.0:
+; RV32IA-TSO-ZABHA-NEXT: amomaxu.b a0, a1, (a0)
+; RV32IA-TSO-ZABHA-NEXT: ret
+;
; RV64IA-WMO-ZABHA-LABEL: atomicrmw_umax_i8_release:
; RV64IA-WMO-ZABHA: # %bb.0:
; RV64IA-WMO-ZABHA-NEXT: amomaxu.b.rl a0, a1, (a0)
@@ -8399,53 +11047,53 @@ define i8 @atomicrmw_umax_i8_acq_rel(ptr %a, i8 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 32
; RV32I-NEXT: ret
;
-; RV32IA-WMO-LABEL: atomicrmw_umax_i8_acq_rel:
-; RV32IA-WMO: # %bb.0:
-; RV32IA-WMO-NEXT: andi a2, a0, -4
-; RV32IA-WMO-NEXT: slli a0, a0, 3
-; RV32IA-WMO-NEXT: li a3, 255
-; RV32IA-WMO-NEXT: zext.b a1, a1
-; RV32IA-WMO-NEXT: sll a3, a3, a0
-; RV32IA-WMO-NEXT: sll a1, a1, a0
-; RV32IA-WMO-NEXT: .LBB58_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-WMO-NEXT: lr.w.aq a4, (a2)
-; RV32IA-WMO-NEXT: and a6, a4, a3
-; RV32IA-WMO-NEXT: mv a5, a4
-; RV32IA-WMO-NEXT: bgeu a6, a1, .LBB58_3
-; RV32IA-WMO-NEXT: # %bb.2: # in Loop: Header=BB58_1 Depth=1
-; RV32IA-WMO-NEXT: xor a5, a4, a1
-; RV32IA-WMO-NEXT: and a5, a5, a3
-; RV32IA-WMO-NEXT: xor a5, a4, a5
-; RV32IA-WMO-NEXT: .LBB58_3: # in Loop: Header=BB58_1 Depth=1
-; RV32IA-WMO-NEXT: sc.w.rl a5, a5, (a2)
-; RV32IA-WMO-NEXT: bnez a5, .LBB58_1
-; RV32IA-WMO-NEXT: # %bb.4:
-; RV32IA-WMO-NEXT: srl a0, a4, a0
-; RV32IA-WMO-NEXT: ret
+; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_umax_i8_acq_rel:
+; RV32IA-WMO-NOZACAS: # %bb.0:
+; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-WMO-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-NOZACAS-NEXT: li a3, 255
+; RV32IA-WMO-NOZACAS-NEXT: zext.b a1, a1
+; RV32IA-WMO-NOZACAS-NEXT: sll a3, a3, a0
+; RV32IA-WMO-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-WMO-NOZACAS-NEXT: .LBB58_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-NOZACAS-NEXT: lr.w.aq a4, (a2)
+; RV32IA-WMO-NOZACAS-NEXT: and a6, a4, a3
+; RV32IA-WMO-NOZACAS-NEXT: mv a5, a4
+; RV32IA-WMO-NOZACAS-NEXT: bgeu a6, a1, .LBB58_3
+; RV32IA-WMO-NOZACAS-NEXT: # %bb.2: # in Loop: Header=BB58_1 Depth=1
+; RV32IA-WMO-NOZACAS-NEXT: xor a5, a4, a1
+; RV32IA-WMO-NOZACAS-NEXT: and a5, a5, a3
+; RV32IA-WMO-NOZACAS-NEXT: xor a5, a4, a5
+; RV32IA-WMO-NOZACAS-NEXT: .LBB58_3: # in Loop: Header=BB58_1 Depth=1
+; RV32IA-WMO-NOZACAS-NEXT: sc.w.rl a5, a5, (a2)
+; RV32IA-WMO-NOZACAS-NEXT: bnez a5, .LBB58_1
+; RV32IA-WMO-NOZACAS-NEXT: # %bb.4:
+; RV32IA-WMO-NOZACAS-NEXT: srl a0, a4, a0
+; RV32IA-WMO-NOZACAS-NEXT: ret
;
-; RV32IA-TSO-LABEL: atomicrmw_umax_i8_acq_rel:
-; RV32IA-TSO: # %bb.0:
-; RV32IA-TSO-NEXT: andi a2, a0, -4
-; RV32IA-TSO-NEXT: slli a0, a0, 3
-; RV32IA-TSO-NEXT: li a3, 255
-; RV32IA-TSO-NEXT: zext.b a1, a1
-; RV32IA-TSO-NEXT: sll a3, a3, a0
-; RV32IA-TSO-NEXT: sll a1, a1, a0
-; RV32IA-TSO-NEXT: .LBB58_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-TSO-NEXT: lr.w a4, (a2)
-; RV32IA-TSO-NEXT: and a6, a4, a3
-; RV32IA-TSO-NEXT: mv a5, a4
-; RV32IA-TSO-NEXT: bgeu a6, a1, .LBB58_3
-; RV32IA-TSO-NEXT: # %bb.2: # in Loop: Header=BB58_1 Depth=1
-; RV32IA-TSO-NEXT: xor a5, a4, a1
-; RV32IA-TSO-NEXT: and a5, a5, a3
-; RV32IA-TSO-NEXT: xor a5, a4, a5
-; RV32IA-TSO-NEXT: .LBB58_3: # in Loop: Header=BB58_1 Depth=1
-; RV32IA-TSO-NEXT: sc.w a5, a5, (a2)
-; RV32IA-TSO-NEXT: bnez a5, .LBB58_1
-; RV32IA-TSO-NEXT: # %bb.4:
-; RV32IA-TSO-NEXT: srl a0, a4, a0
-; RV32IA-TSO-NEXT: ret
+; RV32IA-TSO-NOZACAS-LABEL: atomicrmw_umax_i8_acq_rel:
+; RV32IA-TSO-NOZACAS: # %bb.0:
+; RV32IA-TSO-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-TSO-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-NOZACAS-NEXT: li a3, 255
+; RV32IA-TSO-NOZACAS-NEXT: zext.b a1, a1
+; RV32IA-TSO-NOZACAS-NEXT: sll a3, a3, a0
+; RV32IA-TSO-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-TSO-NOZACAS-NEXT: .LBB58_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-NOZACAS-NEXT: lr.w a4, (a2)
+; RV32IA-TSO-NOZACAS-NEXT: and a6, a4, a3
+; RV32IA-TSO-NOZACAS-NEXT: mv a5, a4
+; RV32IA-TSO-NOZACAS-NEXT: bgeu a6, a1, .LBB58_3
+; RV32IA-TSO-NOZACAS-NEXT: # %bb.2: # in Loop: Header=BB58_1 Depth=1
+; RV32IA-TSO-NOZACAS-NEXT: xor a5, a4, a1
+; RV32IA-TSO-NOZACAS-NEXT: and a5, a5, a3
+; RV32IA-TSO-NOZACAS-NEXT: xor a5, a4, a5
+; RV32IA-TSO-NOZACAS-NEXT: .LBB58_3: # in Loop: Header=BB58_1 Depth=1
+; RV32IA-TSO-NOZACAS-NEXT: sc.w a5, a5, (a2)
+; RV32IA-TSO-NOZACAS-NEXT: bnez a5, .LBB58_1
+; RV32IA-TSO-NOZACAS-NEXT: # %bb.4:
+; RV32IA-TSO-NOZACAS-NEXT: srl a0, a4, a0
+; RV32IA-TSO-NOZACAS-NEXT: ret
;
; RV64I-LABEL: atomicrmw_umax_i8_acq_rel:
; RV64I: # %bb.0:
@@ -8535,6 +11183,54 @@ define i8 @atomicrmw_umax_i8_acq_rel(ptr %a, i8 %b) nounwind {
; RV64IA-TSO-NOZACAS-NEXT: srlw a0, a4, a0
; RV64IA-TSO-NOZACAS-NEXT: ret
;
+; RV32IA-WMO-ZACAS-LABEL: atomicrmw_umax_i8_acq_rel:
+; RV32IA-WMO-ZACAS: # %bb.0:
+; RV32IA-WMO-ZACAS-NEXT: andi a2, a0, -4
+; RV32IA-WMO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-ZACAS-NEXT: li a3, 255
+; RV32IA-WMO-ZACAS-NEXT: zext.b a1, a1
+; RV32IA-WMO-ZACAS-NEXT: sll a3, a3, a0
+; RV32IA-WMO-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-WMO-ZACAS-NEXT: .LBB58_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-ZACAS-NEXT: lr.w.aq a4, (a2)
+; RV32IA-WMO-ZACAS-NEXT: and a6, a4, a3
+; RV32IA-WMO-ZACAS-NEXT: mv a5, a4
+; RV32IA-WMO-ZACAS-NEXT: bgeu a6, a1, .LBB58_3
+; RV32IA-WMO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB58_1 Depth=1
+; RV32IA-WMO-ZACAS-NEXT: xor a5, a4, a1
+; RV32IA-WMO-ZACAS-NEXT: and a5, a5, a3
+; RV32IA-WMO-ZACAS-NEXT: xor a5, a4, a5
+; RV32IA-WMO-ZACAS-NEXT: .LBB58_3: # in Loop: Header=BB58_1 Depth=1
+; RV32IA-WMO-ZACAS-NEXT: sc.w.rl a5, a5, (a2)
+; RV32IA-WMO-ZACAS-NEXT: bnez a5, .LBB58_1
+; RV32IA-WMO-ZACAS-NEXT: # %bb.4:
+; RV32IA-WMO-ZACAS-NEXT: srl a0, a4, a0
+; RV32IA-WMO-ZACAS-NEXT: ret
+;
+; RV32IA-TSO-ZACAS-LABEL: atomicrmw_umax_i8_acq_rel:
+; RV32IA-TSO-ZACAS: # %bb.0:
+; RV32IA-TSO-ZACAS-NEXT: andi a2, a0, -4
+; RV32IA-TSO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-ZACAS-NEXT: li a3, 255
+; RV32IA-TSO-ZACAS-NEXT: zext.b a1, a1
+; RV32IA-TSO-ZACAS-NEXT: sll a3, a3, a0
+; RV32IA-TSO-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-TSO-ZACAS-NEXT: .LBB58_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-ZACAS-NEXT: lr.w a4, (a2)
+; RV32IA-TSO-ZACAS-NEXT: and a6, a4, a3
+; RV32IA-TSO-ZACAS-NEXT: mv a5, a4
+; RV32IA-TSO-ZACAS-NEXT: bgeu a6, a1, .LBB58_3
+; RV32IA-TSO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB58_1 Depth=1
+; RV32IA-TSO-ZACAS-NEXT: xor a5, a4, a1
+; RV32IA-TSO-ZACAS-NEXT: and a5, a5, a3
+; RV32IA-TSO-ZACAS-NEXT: xor a5, a4, a5
+; RV32IA-TSO-ZACAS-NEXT: .LBB58_3: # in Loop: Header=BB58_1 Depth=1
+; RV32IA-TSO-ZACAS-NEXT: sc.w a5, a5, (a2)
+; RV32IA-TSO-ZACAS-NEXT: bnez a5, .LBB58_1
+; RV32IA-TSO-ZACAS-NEXT: # %bb.4:
+; RV32IA-TSO-ZACAS-NEXT: srl a0, a4, a0
+; RV32IA-TSO-ZACAS-NEXT: ret
+;
; RV64IA-WMO-ZACAS-LABEL: atomicrmw_umax_i8_acq_rel:
; RV64IA-WMO-ZACAS: # %bb.0:
; RV64IA-WMO-ZACAS-NEXT: andi a2, a0, -4
@@ -8583,6 +11279,16 @@ define i8 @atomicrmw_umax_i8_acq_rel(ptr %a, i8 %b) nounwind {
; RV64IA-TSO-ZACAS-NEXT: srlw a0, a4, a0
; RV64IA-TSO-ZACAS-NEXT: ret
;
+; RV32IA-WMO-ZABHA-LABEL: atomicrmw_umax_i8_acq_rel:
+; RV32IA-WMO-ZABHA: # %bb.0:
+; RV32IA-WMO-ZABHA-NEXT: amomaxu.b.aqrl a0, a1, (a0)
+; RV32IA-WMO-ZABHA-NEXT: ret
+;
+; RV32IA-TSO-ZABHA-LABEL: atomicrmw_umax_i8_acq_rel:
+; RV32IA-TSO-ZABHA: # %bb.0:
+; RV32IA-TSO-ZABHA-NEXT: amomaxu.b a0, a1, (a0)
+; RV32IA-TSO-ZABHA-NEXT: ret
+;
; RV64IA-WMO-ZABHA-LABEL: atomicrmw_umax_i8_acq_rel:
; RV64IA-WMO-ZABHA: # %bb.0:
; RV64IA-WMO-ZABHA-NEXT: amomaxu.b.aqrl a0, a1, (a0)
@@ -8637,29 +11343,29 @@ define i8 @atomicrmw_umax_i8_seq_cst(ptr %a, i8 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 32
; RV32I-NEXT: ret
;
-; RV32IA-LABEL: atomicrmw_umax_i8_seq_cst:
-; RV32IA: # %bb.0:
-; RV32IA-NEXT: andi a2, a0, -4
-; RV32IA-NEXT: slli a0, a0, 3
-; RV32IA-NEXT: li a3, 255
-; RV32IA-NEXT: zext.b a1, a1
-; RV32IA-NEXT: sll a3, a3, a0
-; RV32IA-NEXT: sll a1, a1, a0
-; RV32IA-NEXT: .LBB59_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-NEXT: lr.w.aqrl a4, (a2)
-; RV32IA-NEXT: and a6, a4, a3
-; RV32IA-NEXT: mv a5, a4
-; RV32IA-NEXT: bgeu a6, a1, .LBB59_3
-; RV32IA-NEXT: # %bb.2: # in Loop: Header=BB59_1 Depth=1
-; RV32IA-NEXT: xor a5, a4, a1
-; RV32IA-NEXT: and a5, a5, a3
-; RV32IA-NEXT: xor a5, a4, a5
-; RV32IA-NEXT: .LBB59_3: # in Loop: Header=BB59_1 Depth=1
-; RV32IA-NEXT: sc.w.rl a5, a5, (a2)
-; RV32IA-NEXT: bnez a5, .LBB59_1
-; RV32IA-NEXT: # %bb.4:
-; RV32IA-NEXT: srl a0, a4, a0
-; RV32IA-NEXT: ret
+; RV32IA-NOZACAS-LABEL: atomicrmw_umax_i8_seq_cst:
+; RV32IA-NOZACAS: # %bb.0:
+; RV32IA-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-NOZACAS-NEXT: li a3, 255
+; RV32IA-NOZACAS-NEXT: zext.b a1, a1
+; RV32IA-NOZACAS-NEXT: sll a3, a3, a0
+; RV32IA-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-NOZACAS-NEXT: .LBB59_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-NOZACAS-NEXT: lr.w.aqrl a4, (a2)
+; RV32IA-NOZACAS-NEXT: and a6, a4, a3
+; RV32IA-NOZACAS-NEXT: mv a5, a4
+; RV32IA-NOZACAS-NEXT: bgeu a6, a1, .LBB59_3
+; RV32IA-NOZACAS-NEXT: # %bb.2: # in Loop: Header=BB59_1 Depth=1
+; RV32IA-NOZACAS-NEXT: xor a5, a4, a1
+; RV32IA-NOZACAS-NEXT: and a5, a5, a3
+; RV32IA-NOZACAS-NEXT: xor a5, a4, a5
+; RV32IA-NOZACAS-NEXT: .LBB59_3: # in Loop: Header=BB59_1 Depth=1
+; RV32IA-NOZACAS-NEXT: sc.w.rl a5, a5, (a2)
+; RV32IA-NOZACAS-NEXT: bnez a5, .LBB59_1
+; RV32IA-NOZACAS-NEXT: # %bb.4:
+; RV32IA-NOZACAS-NEXT: srl a0, a4, a0
+; RV32IA-NOZACAS-NEXT: ret
;
; RV64I-LABEL: atomicrmw_umax_i8_seq_cst:
; RV64I: # %bb.0:
@@ -8725,6 +11431,30 @@ define i8 @atomicrmw_umax_i8_seq_cst(ptr %a, i8 %b) nounwind {
; RV64IA-NOZACAS-NEXT: srlw a0, a4, a0
; RV64IA-NOZACAS-NEXT: ret
;
+; RV32IA-ZACAS-LABEL: atomicrmw_umax_i8_seq_cst:
+; RV32IA-ZACAS: # %bb.0:
+; RV32IA-ZACAS-NEXT: andi a2, a0, -4
+; RV32IA-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-ZACAS-NEXT: li a3, 255
+; RV32IA-ZACAS-NEXT: zext.b a1, a1
+; RV32IA-ZACAS-NEXT: sll a3, a3, a0
+; RV32IA-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-ZACAS-NEXT: .LBB59_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-ZACAS-NEXT: lr.w.aqrl a4, (a2)
+; RV32IA-ZACAS-NEXT: and a6, a4, a3
+; RV32IA-ZACAS-NEXT: mv a5, a4
+; RV32IA-ZACAS-NEXT: bgeu a6, a1, .LBB59_3
+; RV32IA-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB59_1 Depth=1
+; RV32IA-ZACAS-NEXT: xor a5, a4, a1
+; RV32IA-ZACAS-NEXT: and a5, a5, a3
+; RV32IA-ZACAS-NEXT: xor a5, a4, a5
+; RV32IA-ZACAS-NEXT: .LBB59_3: # in Loop: Header=BB59_1 Depth=1
+; RV32IA-ZACAS-NEXT: sc.w.rl a5, a5, (a2)
+; RV32IA-ZACAS-NEXT: bnez a5, .LBB59_1
+; RV32IA-ZACAS-NEXT: # %bb.4:
+; RV32IA-ZACAS-NEXT: srl a0, a4, a0
+; RV32IA-ZACAS-NEXT: ret
+;
; RV64IA-ZACAS-LABEL: atomicrmw_umax_i8_seq_cst:
; RV64IA-ZACAS: # %bb.0:
; RV64IA-ZACAS-NEXT: andi a2, a0, -4
@@ -8749,6 +11479,16 @@ define i8 @atomicrmw_umax_i8_seq_cst(ptr %a, i8 %b) nounwind {
; RV64IA-ZACAS-NEXT: srlw a0, a4, a0
; RV64IA-ZACAS-NEXT: ret
;
+; RV32IA-WMO-ZABHA-LABEL: atomicrmw_umax_i8_seq_cst:
+; RV32IA-WMO-ZABHA: # %bb.0:
+; RV32IA-WMO-ZABHA-NEXT: amomaxu.b.aqrl a0, a1, (a0)
+; RV32IA-WMO-ZABHA-NEXT: ret
+;
+; RV32IA-TSO-ZABHA-LABEL: atomicrmw_umax_i8_seq_cst:
+; RV32IA-TSO-ZABHA: # %bb.0:
+; RV32IA-TSO-ZABHA-NEXT: amomaxu.b a0, a1, (a0)
+; RV32IA-TSO-ZABHA-NEXT: ret
+;
; RV64IA-WMO-ZABHA-LABEL: atomicrmw_umax_i8_seq_cst:
; RV64IA-WMO-ZABHA: # %bb.0:
; RV64IA-WMO-ZABHA-NEXT: amomaxu.b.aqrl a0, a1, (a0)
@@ -8803,29 +11543,29 @@ define i8 @atomicrmw_umin_i8_monotonic(ptr %a, i8 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 32
; RV32I-NEXT: ret
;
-; RV32IA-LABEL: atomicrmw_umin_i8_monotonic:
-; RV32IA: # %bb.0:
-; RV32IA-NEXT: andi a2, a0, -4
-; RV32IA-NEXT: slli a0, a0, 3
-; RV32IA-NEXT: li a3, 255
-; RV32IA-NEXT: zext.b a1, a1
-; RV32IA-NEXT: sll a3, a3, a0
-; RV32IA-NEXT: sll a1, a1, a0
-; RV32IA-NEXT: .LBB60_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-NEXT: lr.w a4, (a2)
-; RV32IA-NEXT: and a6, a4, a3
-; RV32IA-NEXT: mv a5, a4
-; RV32IA-NEXT: bgeu a1, a6, .LBB60_3
-; RV32IA-NEXT: # %bb.2: # in Loop: Header=BB60_1 Depth=1
-; RV32IA-NEXT: xor a5, a4, a1
-; RV32IA-NEXT: and a5, a5, a3
-; RV32IA-NEXT: xor a5, a4, a5
-; RV32IA-NEXT: .LBB60_3: # in Loop: Header=BB60_1 Depth=1
-; RV32IA-NEXT: sc.w a5, a5, (a2)
-; RV32IA-NEXT: bnez a5, .LBB60_1
-; RV32IA-NEXT: # %bb.4:
-; RV32IA-NEXT: srl a0, a4, a0
-; RV32IA-NEXT: ret
+; RV32IA-NOZACAS-LABEL: atomicrmw_umin_i8_monotonic:
+; RV32IA-NOZACAS: # %bb.0:
+; RV32IA-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-NOZACAS-NEXT: li a3, 255
+; RV32IA-NOZACAS-NEXT: zext.b a1, a1
+; RV32IA-NOZACAS-NEXT: sll a3, a3, a0
+; RV32IA-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-NOZACAS-NEXT: .LBB60_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-NOZACAS-NEXT: lr.w a4, (a2)
+; RV32IA-NOZACAS-NEXT: and a6, a4, a3
+; RV32IA-NOZACAS-NEXT: mv a5, a4
+; RV32IA-NOZACAS-NEXT: bgeu a1, a6, .LBB60_3
+; RV32IA-NOZACAS-NEXT: # %bb.2: # in Loop: Header=BB60_1 Depth=1
+; RV32IA-NOZACAS-NEXT: xor a5, a4, a1
+; RV32IA-NOZACAS-NEXT: and a5, a5, a3
+; RV32IA-NOZACAS-NEXT: xor a5, a4, a5
+; RV32IA-NOZACAS-NEXT: .LBB60_3: # in Loop: Header=BB60_1 Depth=1
+; RV32IA-NOZACAS-NEXT: sc.w a5, a5, (a2)
+; RV32IA-NOZACAS-NEXT: bnez a5, .LBB60_1
+; RV32IA-NOZACAS-NEXT: # %bb.4:
+; RV32IA-NOZACAS-NEXT: srl a0, a4, a0
+; RV32IA-NOZACAS-NEXT: ret
;
; RV64I-LABEL: atomicrmw_umin_i8_monotonic:
; RV64I: # %bb.0:
@@ -8891,6 +11631,30 @@ define i8 @atomicrmw_umin_i8_monotonic(ptr %a, i8 %b) nounwind {
; RV64IA-NOZACAS-NEXT: srlw a0, a4, a0
; RV64IA-NOZACAS-NEXT: ret
;
+; RV32IA-ZACAS-LABEL: atomicrmw_umin_i8_monotonic:
+; RV32IA-ZACAS: # %bb.0:
+; RV32IA-ZACAS-NEXT: andi a2, a0, -4
+; RV32IA-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-ZACAS-NEXT: li a3, 255
+; RV32IA-ZACAS-NEXT: zext.b a1, a1
+; RV32IA-ZACAS-NEXT: sll a3, a3, a0
+; RV32IA-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-ZACAS-NEXT: .LBB60_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-ZACAS-NEXT: lr.w a4, (a2)
+; RV32IA-ZACAS-NEXT: and a6, a4, a3
+; RV32IA-ZACAS-NEXT: mv a5, a4
+; RV32IA-ZACAS-NEXT: bgeu a1, a6, .LBB60_3
+; RV32IA-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB60_1 Depth=1
+; RV32IA-ZACAS-NEXT: xor a5, a4, a1
+; RV32IA-ZACAS-NEXT: and a5, a5, a3
+; RV32IA-ZACAS-NEXT: xor a5, a4, a5
+; RV32IA-ZACAS-NEXT: .LBB60_3: # in Loop: Header=BB60_1 Depth=1
+; RV32IA-ZACAS-NEXT: sc.w a5, a5, (a2)
+; RV32IA-ZACAS-NEXT: bnez a5, .LBB60_1
+; RV32IA-ZACAS-NEXT: # %bb.4:
+; RV32IA-ZACAS-NEXT: srl a0, a4, a0
+; RV32IA-ZACAS-NEXT: ret
+;
; RV64IA-ZACAS-LABEL: atomicrmw_umin_i8_monotonic:
; RV64IA-ZACAS: # %bb.0:
; RV64IA-ZACAS-NEXT: andi a2, a0, -4
@@ -8915,6 +11679,16 @@ define i8 @atomicrmw_umin_i8_monotonic(ptr %a, i8 %b) nounwind {
; RV64IA-ZACAS-NEXT: srlw a0, a4, a0
; RV64IA-ZACAS-NEXT: ret
;
+; RV32IA-WMO-ZABHA-LABEL: atomicrmw_umin_i8_monotonic:
+; RV32IA-WMO-ZABHA: # %bb.0:
+; RV32IA-WMO-ZABHA-NEXT: amominu.b a0, a1, (a0)
+; RV32IA-WMO-ZABHA-NEXT: ret
+;
+; RV32IA-TSO-ZABHA-LABEL: atomicrmw_umin_i8_monotonic:
+; RV32IA-TSO-ZABHA: # %bb.0:
+; RV32IA-TSO-ZABHA-NEXT: amominu.b a0, a1, (a0)
+; RV32IA-TSO-ZABHA-NEXT: ret
+;
; RV64IA-WMO-ZABHA-LABEL: atomicrmw_umin_i8_monotonic:
; RV64IA-WMO-ZABHA: # %bb.0:
; RV64IA-WMO-ZABHA-NEXT: amominu.b a0, a1, (a0)
@@ -8969,53 +11743,53 @@ define i8 @atomicrmw_umin_i8_acquire(ptr %a, i8 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 32
; RV32I-NEXT: ret
;
-; RV32IA-WMO-LABEL: atomicrmw_umin_i8_acquire:
-; RV32IA-WMO: # %bb.0:
-; RV32IA-WMO-NEXT: andi a2, a0, -4
-; RV32IA-WMO-NEXT: slli a0, a0, 3
-; RV32IA-WMO-NEXT: li a3, 255
-; RV32IA-WMO-NEXT: zext.b a1, a1
-; RV32IA-WMO-NEXT: sll a3, a3, a0
-; RV32IA-WMO-NEXT: sll a1, a1, a0
-; RV32IA-WMO-NEXT: .LBB61_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-WMO-NEXT: lr.w.aq a4, (a2)
-; RV32IA-WMO-NEXT: and a6, a4, a3
-; RV32IA-WMO-NEXT: mv a5, a4
-; RV32IA-WMO-NEXT: bgeu a1, a6, .LBB61_3
-; RV32IA-WMO-NEXT: # %bb.2: # in Loop: Header=BB61_1 Depth=1
-; RV32IA-WMO-NEXT: xor a5, a4, a1
-; RV32IA-WMO-NEXT: and a5, a5, a3
-; RV32IA-WMO-NEXT: xor a5, a4, a5
-; RV32IA-WMO-NEXT: .LBB61_3: # in Loop: Header=BB61_1 Depth=1
-; RV32IA-WMO-NEXT: sc.w a5, a5, (a2)
-; RV32IA-WMO-NEXT: bnez a5, .LBB61_1
-; RV32IA-WMO-NEXT: # %bb.4:
-; RV32IA-WMO-NEXT: srl a0, a4, a0
-; RV32IA-WMO-NEXT: ret
+; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_umin_i8_acquire:
+; RV32IA-WMO-NOZACAS: # %bb.0:
+; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-WMO-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-NOZACAS-NEXT: li a3, 255
+; RV32IA-WMO-NOZACAS-NEXT: zext.b a1, a1
+; RV32IA-WMO-NOZACAS-NEXT: sll a3, a3, a0
+; RV32IA-WMO-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-WMO-NOZACAS-NEXT: .LBB61_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-NOZACAS-NEXT: lr.w.aq a4, (a2)
+; RV32IA-WMO-NOZACAS-NEXT: and a6, a4, a3
+; RV32IA-WMO-NOZACAS-NEXT: mv a5, a4
+; RV32IA-WMO-NOZACAS-NEXT: bgeu a1, a6, .LBB61_3
+; RV32IA-WMO-NOZACAS-NEXT: # %bb.2: # in Loop: Header=BB61_1 Depth=1
+; RV32IA-WMO-NOZACAS-NEXT: xor a5, a4, a1
+; RV32IA-WMO-NOZACAS-NEXT: and a5, a5, a3
+; RV32IA-WMO-NOZACAS-NEXT: xor a5, a4, a5
+; RV32IA-WMO-NOZACAS-NEXT: .LBB61_3: # in Loop: Header=BB61_1 Depth=1
+; RV32IA-WMO-NOZACAS-NEXT: sc.w a5, a5, (a2)
+; RV32IA-WMO-NOZACAS-NEXT: bnez a5, .LBB61_1
+; RV32IA-WMO-NOZACAS-NEXT: # %bb.4:
+; RV32IA-WMO-NOZACAS-NEXT: srl a0, a4, a0
+; RV32IA-WMO-NOZACAS-NEXT: ret
;
-; RV32IA-TSO-LABEL: atomicrmw_umin_i8_acquire:
-; RV32IA-TSO: # %bb.0:
-; RV32IA-TSO-NEXT: andi a2, a0, -4
-; RV32IA-TSO-NEXT: slli a0, a0, 3
-; RV32IA-TSO-NEXT: li a3, 255
-; RV32IA-TSO-NEXT: zext.b a1, a1
-; RV32IA-TSO-NEXT: sll a3, a3, a0
-; RV32IA-TSO-NEXT: sll a1, a1, a0
-; RV32IA-TSO-NEXT: .LBB61_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-TSO-NEXT: lr.w a4, (a2)
-; RV32IA-TSO-NEXT: and a6, a4, a3
-; RV32IA-TSO-NEXT: mv a5, a4
-; RV32IA-TSO-NEXT: bgeu a1, a6, .LBB61_3
-; RV32IA-TSO-NEXT: # %bb.2: # in Loop: Header=BB61_1 Depth=1
-; RV32IA-TSO-NEXT: xor a5, a4, a1
-; RV32IA-TSO-NEXT: and a5, a5, a3
-; RV32IA-TSO-NEXT: xor a5, a4, a5
-; RV32IA-TSO-NEXT: .LBB61_3: # in Loop: Header=BB61_1 Depth=1
-; RV32IA-TSO-NEXT: sc.w a5, a5, (a2)
-; RV32IA-TSO-NEXT: bnez a5, .LBB61_1
-; RV32IA-TSO-NEXT: # %bb.4:
-; RV32IA-TSO-NEXT: srl a0, a4, a0
-; RV32IA-TSO-NEXT: ret
+; RV32IA-TSO-NOZACAS-LABEL: atomicrmw_umin_i8_acquire:
+; RV32IA-TSO-NOZACAS: # %bb.0:
+; RV32IA-TSO-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-TSO-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-NOZACAS-NEXT: li a3, 255
+; RV32IA-TSO-NOZACAS-NEXT: zext.b a1, a1
+; RV32IA-TSO-NOZACAS-NEXT: sll a3, a3, a0
+; RV32IA-TSO-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-TSO-NOZACAS-NEXT: .LBB61_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-NOZACAS-NEXT: lr.w a4, (a2)
+; RV32IA-TSO-NOZACAS-NEXT: and a6, a4, a3
+; RV32IA-TSO-NOZACAS-NEXT: mv a5, a4
+; RV32IA-TSO-NOZACAS-NEXT: bgeu a1, a6, .LBB61_3
+; RV32IA-TSO-NOZACAS-NEXT: # %bb.2: # in Loop: Header=BB61_1 Depth=1
+; RV32IA-TSO-NOZACAS-NEXT: xor a5, a4, a1
+; RV32IA-TSO-NOZACAS-NEXT: and a5, a5, a3
+; RV32IA-TSO-NOZACAS-NEXT: xor a5, a4, a5
+; RV32IA-TSO-NOZACAS-NEXT: .LBB61_3: # in Loop: Header=BB61_1 Depth=1
+; RV32IA-TSO-NOZACAS-NEXT: sc.w a5, a5, (a2)
+; RV32IA-TSO-NOZACAS-NEXT: bnez a5, .LBB61_1
+; RV32IA-TSO-NOZACAS-NEXT: # %bb.4:
+; RV32IA-TSO-NOZACAS-NEXT: srl a0, a4, a0
+; RV32IA-TSO-NOZACAS-NEXT: ret
;
; RV64I-LABEL: atomicrmw_umin_i8_acquire:
; RV64I: # %bb.0:
@@ -9105,6 +11879,54 @@ define i8 @atomicrmw_umin_i8_acquire(ptr %a, i8 %b) nounwind {
; RV64IA-TSO-NOZACAS-NEXT: srlw a0, a4, a0
; RV64IA-TSO-NOZACAS-NEXT: ret
;
+; RV32IA-WMO-ZACAS-LABEL: atomicrmw_umin_i8_acquire:
+; RV32IA-WMO-ZACAS: # %bb.0:
+; RV32IA-WMO-ZACAS-NEXT: andi a2, a0, -4
+; RV32IA-WMO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-ZACAS-NEXT: li a3, 255
+; RV32IA-WMO-ZACAS-NEXT: zext.b a1, a1
+; RV32IA-WMO-ZACAS-NEXT: sll a3, a3, a0
+; RV32IA-WMO-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-WMO-ZACAS-NEXT: .LBB61_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-ZACAS-NEXT: lr.w.aq a4, (a2)
+; RV32IA-WMO-ZACAS-NEXT: and a6, a4, a3
+; RV32IA-WMO-ZACAS-NEXT: mv a5, a4
+; RV32IA-WMO-ZACAS-NEXT: bgeu a1, a6, .LBB61_3
+; RV32IA-WMO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB61_1 Depth=1
+; RV32IA-WMO-ZACAS-NEXT: xor a5, a4, a1
+; RV32IA-WMO-ZACAS-NEXT: and a5, a5, a3
+; RV32IA-WMO-ZACAS-NEXT: xor a5, a4, a5
+; RV32IA-WMO-ZACAS-NEXT: .LBB61_3: # in Loop: Header=BB61_1 Depth=1
+; RV32IA-WMO-ZACAS-NEXT: sc.w a5, a5, (a2)
+; RV32IA-WMO-ZACAS-NEXT: bnez a5, .LBB61_1
+; RV32IA-WMO-ZACAS-NEXT: # %bb.4:
+; RV32IA-WMO-ZACAS-NEXT: srl a0, a4, a0
+; RV32IA-WMO-ZACAS-NEXT: ret
+;
+; RV32IA-TSO-ZACAS-LABEL: atomicrmw_umin_i8_acquire:
+; RV32IA-TSO-ZACAS: # %bb.0:
+; RV32IA-TSO-ZACAS-NEXT: andi a2, a0, -4
+; RV32IA-TSO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-ZACAS-NEXT: li a3, 255
+; RV32IA-TSO-ZACAS-NEXT: zext.b a1, a1
+; RV32IA-TSO-ZACAS-NEXT: sll a3, a3, a0
+; RV32IA-TSO-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-TSO-ZACAS-NEXT: .LBB61_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-ZACAS-NEXT: lr.w a4, (a2)
+; RV32IA-TSO-ZACAS-NEXT: and a6, a4, a3
+; RV32IA-TSO-ZACAS-NEXT: mv a5, a4
+; RV32IA-TSO-ZACAS-NEXT: bgeu a1, a6, .LBB61_3
+; RV32IA-TSO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB61_1 Depth=1
+; RV32IA-TSO-ZACAS-NEXT: xor a5, a4, a1
+; RV32IA-TSO-ZACAS-NEXT: and a5, a5, a3
+; RV32IA-TSO-ZACAS-NEXT: xor a5, a4, a5
+; RV32IA-TSO-ZACAS-NEXT: .LBB61_3: # in Loop: Header=BB61_1 Depth=1
+; RV32IA-TSO-ZACAS-NEXT: sc.w a5, a5, (a2)
+; RV32IA-TSO-ZACAS-NEXT: bnez a5, .LBB61_1
+; RV32IA-TSO-ZACAS-NEXT: # %bb.4:
+; RV32IA-TSO-ZACAS-NEXT: srl a0, a4, a0
+; RV32IA-TSO-ZACAS-NEXT: ret
+;
; RV64IA-WMO-ZACAS-LABEL: atomicrmw_umin_i8_acquire:
; RV64IA-WMO-ZACAS: # %bb.0:
; RV64IA-WMO-ZACAS-NEXT: andi a2, a0, -4
@@ -9153,6 +11975,16 @@ define i8 @atomicrmw_umin_i8_acquire(ptr %a, i8 %b) nounwind {
; RV64IA-TSO-ZACAS-NEXT: srlw a0, a4, a0
; RV64IA-TSO-ZACAS-NEXT: ret
;
+; RV32IA-WMO-ZABHA-LABEL: atomicrmw_umin_i8_acquire:
+; RV32IA-WMO-ZABHA: # %bb.0:
+; RV32IA-WMO-ZABHA-NEXT: amominu.b.aq a0, a1, (a0)
+; RV32IA-WMO-ZABHA-NEXT: ret
+;
+; RV32IA-TSO-ZABHA-LABEL: atomicrmw_umin_i8_acquire:
+; RV32IA-TSO-ZABHA: # %bb.0:
+; RV32IA-TSO-ZABHA-NEXT: amominu.b a0, a1, (a0)
+; RV32IA-TSO-ZABHA-NEXT: ret
+;
; RV64IA-WMO-ZABHA-LABEL: atomicrmw_umin_i8_acquire:
; RV64IA-WMO-ZABHA: # %bb.0:
; RV64IA-WMO-ZABHA-NEXT: amominu.b.aq a0, a1, (a0)
@@ -9207,53 +12039,53 @@ define i8 @atomicrmw_umin_i8_release(ptr %a, i8 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 32
; RV32I-NEXT: ret
;
-; RV32IA-WMO-LABEL: atomicrmw_umin_i8_release:
-; RV32IA-WMO: # %bb.0:
-; RV32IA-WMO-NEXT: andi a2, a0, -4
-; RV32IA-WMO-NEXT: slli a0, a0, 3
-; RV32IA-WMO-NEXT: li a3, 255
-; RV32IA-WMO-NEXT: zext.b a1, a1
-; RV32IA-WMO-NEXT: sll a3, a3, a0
-; RV32IA-WMO-NEXT: sll a1, a1, a0
-; RV32IA-WMO-NEXT: .LBB62_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-WMO-NEXT: lr.w a4, (a2)
-; RV32IA-WMO-NEXT: and a6, a4, a3
-; RV32IA-WMO-NEXT: mv a5, a4
-; RV32IA-WMO-NEXT: bgeu a1, a6, .LBB62_3
-; RV32IA-WMO-NEXT: # %bb.2: # in Loop: Header=BB62_1 Depth=1
-; RV32IA-WMO-NEXT: xor a5, a4, a1
-; RV32IA-WMO-NEXT: and a5, a5, a3
-; RV32IA-WMO-NEXT: xor a5, a4, a5
-; RV32IA-WMO-NEXT: .LBB62_3: # in Loop: Header=BB62_1 Depth=1
-; RV32IA-WMO-NEXT: sc.w.rl a5, a5, (a2)
-; RV32IA-WMO-NEXT: bnez a5, .LBB62_1
-; RV32IA-WMO-NEXT: # %bb.4:
-; RV32IA-WMO-NEXT: srl a0, a4, a0
-; RV32IA-WMO-NEXT: ret
+; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_umin_i8_release:
+; RV32IA-WMO-NOZACAS: # %bb.0:
+; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-WMO-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-NOZACAS-NEXT: li a3, 255
+; RV32IA-WMO-NOZACAS-NEXT: zext.b a1, a1
+; RV32IA-WMO-NOZACAS-NEXT: sll a3, a3, a0
+; RV32IA-WMO-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-WMO-NOZACAS-NEXT: .LBB62_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-NOZACAS-NEXT: lr.w a4, (a2)
+; RV32IA-WMO-NOZACAS-NEXT: and a6, a4, a3
+; RV32IA-WMO-NOZACAS-NEXT: mv a5, a4
+; RV32IA-WMO-NOZACAS-NEXT: bgeu a1, a6, .LBB62_3
+; RV32IA-WMO-NOZACAS-NEXT: # %bb.2: # in Loop: Header=BB62_1 Depth=1
+; RV32IA-WMO-NOZACAS-NEXT: xor a5, a4, a1
+; RV32IA-WMO-NOZACAS-NEXT: and a5, a5, a3
+; RV32IA-WMO-NOZACAS-NEXT: xor a5, a4, a5
+; RV32IA-WMO-NOZACAS-NEXT: .LBB62_3: # in Loop: Header=BB62_1 Depth=1
+; RV32IA-WMO-NOZACAS-NEXT: sc.w.rl a5, a5, (a2)
+; RV32IA-WMO-NOZACAS-NEXT: bnez a5, .LBB62_1
+; RV32IA-WMO-NOZACAS-NEXT: # %bb.4:
+; RV32IA-WMO-NOZACAS-NEXT: srl a0, a4, a0
+; RV32IA-WMO-NOZACAS-NEXT: ret
;
-; RV32IA-TSO-LABEL: atomicrmw_umin_i8_release:
-; RV32IA-TSO: # %bb.0:
-; RV32IA-TSO-NEXT: andi a2, a0, -4
-; RV32IA-TSO-NEXT: slli a0, a0, 3
-; RV32IA-TSO-NEXT: li a3, 255
-; RV32IA-TSO-NEXT: zext.b a1, a1
-; RV32IA-TSO-NEXT: sll a3, a3, a0
-; RV32IA-TSO-NEXT: sll a1, a1, a0
-; RV32IA-TSO-NEXT: .LBB62_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-TSO-NEXT: lr.w a4, (a2)
-; RV32IA-TSO-NEXT: and a6, a4, a3
-; RV32IA-TSO-NEXT: mv a5, a4
-; RV32IA-TSO-NEXT: bgeu a1, a6, .LBB62_3
-; RV32IA-TSO-NEXT: # %bb.2: # in Loop: Header=BB62_1 Depth=1
-; RV32IA-TSO-NEXT: xor a5, a4, a1
-; RV32IA-TSO-NEXT: and a5, a5, a3
-; RV32IA-TSO-NEXT: xor a5, a4, a5
-; RV32IA-TSO-NEXT: .LBB62_3: # in Loop: Header=BB62_1 Depth=1
-; RV32IA-TSO-NEXT: sc.w a5, a5, (a2)
-; RV32IA-TSO-NEXT: bnez a5, .LBB62_1
-; RV32IA-TSO-NEXT: # %bb.4:
-; RV32IA-TSO-NEXT: srl a0, a4, a0
-; RV32IA-TSO-NEXT: ret
+; RV32IA-TSO-NOZACAS-LABEL: atomicrmw_umin_i8_release:
+; RV32IA-TSO-NOZACAS: # %bb.0:
+; RV32IA-TSO-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-TSO-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-NOZACAS-NEXT: li a3, 255
+; RV32IA-TSO-NOZACAS-NEXT: zext.b a1, a1
+; RV32IA-TSO-NOZACAS-NEXT: sll a3, a3, a0
+; RV32IA-TSO-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-TSO-NOZACAS-NEXT: .LBB62_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-NOZACAS-NEXT: lr.w a4, (a2)
+; RV32IA-TSO-NOZACAS-NEXT: and a6, a4, a3
+; RV32IA-TSO-NOZACAS-NEXT: mv a5, a4
+; RV32IA-TSO-NOZACAS-NEXT: bgeu a1, a6, .LBB62_3
+; RV32IA-TSO-NOZACAS-NEXT: # %bb.2: # in Loop: Header=BB62_1 Depth=1
+; RV32IA-TSO-NOZACAS-NEXT: xor a5, a4, a1
+; RV32IA-TSO-NOZACAS-NEXT: and a5, a5, a3
+; RV32IA-TSO-NOZACAS-NEXT: xor a5, a4, a5
+; RV32IA-TSO-NOZACAS-NEXT: .LBB62_3: # in Loop: Header=BB62_1 Depth=1
+; RV32IA-TSO-NOZACAS-NEXT: sc.w a5, a5, (a2)
+; RV32IA-TSO-NOZACAS-NEXT: bnez a5, .LBB62_1
+; RV32IA-TSO-NOZACAS-NEXT: # %bb.4:
+; RV32IA-TSO-NOZACAS-NEXT: srl a0, a4, a0
+; RV32IA-TSO-NOZACAS-NEXT: ret
;
; RV64I-LABEL: atomicrmw_umin_i8_release:
; RV64I: # %bb.0:
@@ -9343,6 +12175,54 @@ define i8 @atomicrmw_umin_i8_release(ptr %a, i8 %b) nounwind {
; RV64IA-TSO-NOZACAS-NEXT: srlw a0, a4, a0
; RV64IA-TSO-NOZACAS-NEXT: ret
;
+; RV32IA-WMO-ZACAS-LABEL: atomicrmw_umin_i8_release:
+; RV32IA-WMO-ZACAS: # %bb.0:
+; RV32IA-WMO-ZACAS-NEXT: andi a2, a0, -4
+; RV32IA-WMO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-ZACAS-NEXT: li a3, 255
+; RV32IA-WMO-ZACAS-NEXT: zext.b a1, a1
+; RV32IA-WMO-ZACAS-NEXT: sll a3, a3, a0
+; RV32IA-WMO-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-WMO-ZACAS-NEXT: .LBB62_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-ZACAS-NEXT: lr.w a4, (a2)
+; RV32IA-WMO-ZACAS-NEXT: and a6, a4, a3
+; RV32IA-WMO-ZACAS-NEXT: mv a5, a4
+; RV32IA-WMO-ZACAS-NEXT: bgeu a1, a6, .LBB62_3
+; RV32IA-WMO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB62_1 Depth=1
+; RV32IA-WMO-ZACAS-NEXT: xor a5, a4, a1
+; RV32IA-WMO-ZACAS-NEXT: and a5, a5, a3
+; RV32IA-WMO-ZACAS-NEXT: xor a5, a4, a5
+; RV32IA-WMO-ZACAS-NEXT: .LBB62_3: # in Loop: Header=BB62_1 Depth=1
+; RV32IA-WMO-ZACAS-NEXT: sc.w.rl a5, a5, (a2)
+; RV32IA-WMO-ZACAS-NEXT: bnez a5, .LBB62_1
+; RV32IA-WMO-ZACAS-NEXT: # %bb.4:
+; RV32IA-WMO-ZACAS-NEXT: srl a0, a4, a0
+; RV32IA-WMO-ZACAS-NEXT: ret
+;
+; RV32IA-TSO-ZACAS-LABEL: atomicrmw_umin_i8_release:
+; RV32IA-TSO-ZACAS: # %bb.0:
+; RV32IA-TSO-ZACAS-NEXT: andi a2, a0, -4
+; RV32IA-TSO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-ZACAS-NEXT: li a3, 255
+; RV32IA-TSO-ZACAS-NEXT: zext.b a1, a1
+; RV32IA-TSO-ZACAS-NEXT: sll a3, a3, a0
+; RV32IA-TSO-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-TSO-ZACAS-NEXT: .LBB62_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-ZACAS-NEXT: lr.w a4, (a2)
+; RV32IA-TSO-ZACAS-NEXT: and a6, a4, a3
+; RV32IA-TSO-ZACAS-NEXT: mv a5, a4
+; RV32IA-TSO-ZACAS-NEXT: bgeu a1, a6, .LBB62_3
+; RV32IA-TSO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB62_1 Depth=1
+; RV32IA-TSO-ZACAS-NEXT: xor a5, a4, a1
+; RV32IA-TSO-ZACAS-NEXT: and a5, a5, a3
+; RV32IA-TSO-ZACAS-NEXT: xor a5, a4, a5
+; RV32IA-TSO-ZACAS-NEXT: .LBB62_3: # in Loop: Header=BB62_1 Depth=1
+; RV32IA-TSO-ZACAS-NEXT: sc.w a5, a5, (a2)
+; RV32IA-TSO-ZACAS-NEXT: bnez a5, .LBB62_1
+; RV32IA-TSO-ZACAS-NEXT: # %bb.4:
+; RV32IA-TSO-ZACAS-NEXT: srl a0, a4, a0
+; RV32IA-TSO-ZACAS-NEXT: ret
+;
; RV64IA-WMO-ZACAS-LABEL: atomicrmw_umin_i8_release:
; RV64IA-WMO-ZACAS: # %bb.0:
; RV64IA-WMO-ZACAS-NEXT: andi a2, a0, -4
@@ -9391,6 +12271,16 @@ define i8 @atomicrmw_umin_i8_release(ptr %a, i8 %b) nounwind {
; RV64IA-TSO-ZACAS-NEXT: srlw a0, a4, a0
; RV64IA-TSO-ZACAS-NEXT: ret
;
+; RV32IA-WMO-ZABHA-LABEL: atomicrmw_umin_i8_release:
+; RV32IA-WMO-ZABHA: # %bb.0:
+; RV32IA-WMO-ZABHA-NEXT: amominu.b.rl a0, a1, (a0)
+; RV32IA-WMO-ZABHA-NEXT: ret
+;
+; RV32IA-TSO-ZABHA-LABEL: atomicrmw_umin_i8_release:
+; RV32IA-TSO-ZABHA: # %bb.0:
+; RV32IA-TSO-ZABHA-NEXT: amominu.b a0, a1, (a0)
+; RV32IA-TSO-ZABHA-NEXT: ret
+;
; RV64IA-WMO-ZABHA-LABEL: atomicrmw_umin_i8_release:
; RV64IA-WMO-ZABHA: # %bb.0:
; RV64IA-WMO-ZABHA-NEXT: amominu.b.rl a0, a1, (a0)
@@ -9445,53 +12335,53 @@ define i8 @atomicrmw_umin_i8_acq_rel(ptr %a, i8 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 32
; RV32I-NEXT: ret
;
-; RV32IA-WMO-LABEL: atomicrmw_umin_i8_acq_rel:
-; RV32IA-WMO: # %bb.0:
-; RV32IA-WMO-NEXT: andi a2, a0, -4
-; RV32IA-WMO-NEXT: slli a0, a0, 3
-; RV32IA-WMO-NEXT: li a3, 255
-; RV32IA-WMO-NEXT: zext.b a1, a1
-; RV32IA-WMO-NEXT: sll a3, a3, a0
-; RV32IA-WMO-NEXT: sll a1, a1, a0
-; RV32IA-WMO-NEXT: .LBB63_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-WMO-NEXT: lr.w.aq a4, (a2)
-; RV32IA-WMO-NEXT: and a6, a4, a3
-; RV32IA-WMO-NEXT: mv a5, a4
-; RV32IA-WMO-NEXT: bgeu a1, a6, .LBB63_3
-; RV32IA-WMO-NEXT: # %bb.2: # in Loop: Header=BB63_1 Depth=1
-; RV32IA-WMO-NEXT: xor a5, a4, a1
-; RV32IA-WMO-NEXT: and a5, a5, a3
-; RV32IA-WMO-NEXT: xor a5, a4, a5
-; RV32IA-WMO-NEXT: .LBB63_3: # in Loop: Header=BB63_1 Depth=1
-; RV32IA-WMO-NEXT: sc.w.rl a5, a5, (a2)
-; RV32IA-WMO-NEXT: bnez a5, .LBB63_1
-; RV32IA-WMO-NEXT: # %bb.4:
-; RV32IA-WMO-NEXT: srl a0, a4, a0
-; RV32IA-WMO-NEXT: ret
+; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_umin_i8_acq_rel:
+; RV32IA-WMO-NOZACAS: # %bb.0:
+; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-WMO-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-NOZACAS-NEXT: li a3, 255
+; RV32IA-WMO-NOZACAS-NEXT: zext.b a1, a1
+; RV32IA-WMO-NOZACAS-NEXT: sll a3, a3, a0
+; RV32IA-WMO-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-WMO-NOZACAS-NEXT: .LBB63_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-NOZACAS-NEXT: lr.w.aq a4, (a2)
+; RV32IA-WMO-NOZACAS-NEXT: and a6, a4, a3
+; RV32IA-WMO-NOZACAS-NEXT: mv a5, a4
+; RV32IA-WMO-NOZACAS-NEXT: bgeu a1, a6, .LBB63_3
+; RV32IA-WMO-NOZACAS-NEXT: # %bb.2: # in Loop: Header=BB63_1 Depth=1
+; RV32IA-WMO-NOZACAS-NEXT: xor a5, a4, a1
+; RV32IA-WMO-NOZACAS-NEXT: and a5, a5, a3
+; RV32IA-WMO-NOZACAS-NEXT: xor a5, a4, a5
+; RV32IA-WMO-NOZACAS-NEXT: .LBB63_3: # in Loop: Header=BB63_1 Depth=1
+; RV32IA-WMO-NOZACAS-NEXT: sc.w.rl a5, a5, (a2)
+; RV32IA-WMO-NOZACAS-NEXT: bnez a5, .LBB63_1
+; RV32IA-WMO-NOZACAS-NEXT: # %bb.4:
+; RV32IA-WMO-NOZACAS-NEXT: srl a0, a4, a0
+; RV32IA-WMO-NOZACAS-NEXT: ret
;
-; RV32IA-TSO-LABEL: atomicrmw_umin_i8_acq_rel:
-; RV32IA-TSO: # %bb.0:
-; RV32IA-TSO-NEXT: andi a2, a0, -4
-; RV32IA-TSO-NEXT: slli a0, a0, 3
-; RV32IA-TSO-NEXT: li a3, 255
-; RV32IA-TSO-NEXT: zext.b a1, a1
-; RV32IA-TSO-NEXT: sll a3, a3, a0
-; RV32IA-TSO-NEXT: sll a1, a1, a0
-; RV32IA-TSO-NEXT: .LBB63_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-TSO-NEXT: lr.w a4, (a2)
-; RV32IA-TSO-NEXT: and a6, a4, a3
-; RV32IA-TSO-NEXT: mv a5, a4
-; RV32IA-TSO-NEXT: bgeu a1, a6, .LBB63_3
-; RV32IA-TSO-NEXT: # %bb.2: # in Loop: Header=BB63_1 Depth=1
-; RV32IA-TSO-NEXT: xor a5, a4, a1
-; RV32IA-TSO-NEXT: and a5, a5, a3
-; RV32IA-TSO-NEXT: xor a5, a4, a5
-; RV32IA-TSO-NEXT: .LBB63_3: # in Loop: Header=BB63_1 Depth=1
-; RV32IA-TSO-NEXT: sc.w a5, a5, (a2)
-; RV32IA-TSO-NEXT: bnez a5, .LBB63_1
-; RV32IA-TSO-NEXT: # %bb.4:
-; RV32IA-TSO-NEXT: srl a0, a4, a0
-; RV32IA-TSO-NEXT: ret
+; RV32IA-TSO-NOZACAS-LABEL: atomicrmw_umin_i8_acq_rel:
+; RV32IA-TSO-NOZACAS: # %bb.0:
+; RV32IA-TSO-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-TSO-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-NOZACAS-NEXT: li a3, 255
+; RV32IA-TSO-NOZACAS-NEXT: zext.b a1, a1
+; RV32IA-TSO-NOZACAS-NEXT: sll a3, a3, a0
+; RV32IA-TSO-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-TSO-NOZACAS-NEXT: .LBB63_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-NOZACAS-NEXT: lr.w a4, (a2)
+; RV32IA-TSO-NOZACAS-NEXT: and a6, a4, a3
+; RV32IA-TSO-NOZACAS-NEXT: mv a5, a4
+; RV32IA-TSO-NOZACAS-NEXT: bgeu a1, a6, .LBB63_3
+; RV32IA-TSO-NOZACAS-NEXT: # %bb.2: # in Loop: Header=BB63_1 Depth=1
+; RV32IA-TSO-NOZACAS-NEXT: xor a5, a4, a1
+; RV32IA-TSO-NOZACAS-NEXT: and a5, a5, a3
+; RV32IA-TSO-NOZACAS-NEXT: xor a5, a4, a5
+; RV32IA-TSO-NOZACAS-NEXT: .LBB63_3: # in Loop: Header=BB63_1 Depth=1
+; RV32IA-TSO-NOZACAS-NEXT: sc.w a5, a5, (a2)
+; RV32IA-TSO-NOZACAS-NEXT: bnez a5, .LBB63_1
+; RV32IA-TSO-NOZACAS-NEXT: # %bb.4:
+; RV32IA-TSO-NOZACAS-NEXT: srl a0, a4, a0
+; RV32IA-TSO-NOZACAS-NEXT: ret
;
; RV64I-LABEL: atomicrmw_umin_i8_acq_rel:
; RV64I: # %bb.0:
@@ -9581,6 +12471,54 @@ define i8 @atomicrmw_umin_i8_acq_rel(ptr %a, i8 %b) nounwind {
; RV64IA-TSO-NOZACAS-NEXT: srlw a0, a4, a0
; RV64IA-TSO-NOZACAS-NEXT: ret
;
+; RV32IA-WMO-ZACAS-LABEL: atomicrmw_umin_i8_acq_rel:
+; RV32IA-WMO-ZACAS: # %bb.0:
+; RV32IA-WMO-ZACAS-NEXT: andi a2, a0, -4
+; RV32IA-WMO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-ZACAS-NEXT: li a3, 255
+; RV32IA-WMO-ZACAS-NEXT: zext.b a1, a1
+; RV32IA-WMO-ZACAS-NEXT: sll a3, a3, a0
+; RV32IA-WMO-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-WMO-ZACAS-NEXT: .LBB63_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-ZACAS-NEXT: lr.w.aq a4, (a2)
+; RV32IA-WMO-ZACAS-NEXT: and a6, a4, a3
+; RV32IA-WMO-ZACAS-NEXT: mv a5, a4
+; RV32IA-WMO-ZACAS-NEXT: bgeu a1, a6, .LBB63_3
+; RV32IA-WMO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB63_1 Depth=1
+; RV32IA-WMO-ZACAS-NEXT: xor a5, a4, a1
+; RV32IA-WMO-ZACAS-NEXT: and a5, a5, a3
+; RV32IA-WMO-ZACAS-NEXT: xor a5, a4, a5
+; RV32IA-WMO-ZACAS-NEXT: .LBB63_3: # in Loop: Header=BB63_1 Depth=1
+; RV32IA-WMO-ZACAS-NEXT: sc.w.rl a5, a5, (a2)
+; RV32IA-WMO-ZACAS-NEXT: bnez a5, .LBB63_1
+; RV32IA-WMO-ZACAS-NEXT: # %bb.4:
+; RV32IA-WMO-ZACAS-NEXT: srl a0, a4, a0
+; RV32IA-WMO-ZACAS-NEXT: ret
+;
+; RV32IA-TSO-ZACAS-LABEL: atomicrmw_umin_i8_acq_rel:
+; RV32IA-TSO-ZACAS: # %bb.0:
+; RV32IA-TSO-ZACAS-NEXT: andi a2, a0, -4
+; RV32IA-TSO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-ZACAS-NEXT: li a3, 255
+; RV32IA-TSO-ZACAS-NEXT: zext.b a1, a1
+; RV32IA-TSO-ZACAS-NEXT: sll a3, a3, a0
+; RV32IA-TSO-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-TSO-ZACAS-NEXT: .LBB63_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-ZACAS-NEXT: lr.w a4, (a2)
+; RV32IA-TSO-ZACAS-NEXT: and a6, a4, a3
+; RV32IA-TSO-ZACAS-NEXT: mv a5, a4
+; RV32IA-TSO-ZACAS-NEXT: bgeu a1, a6, .LBB63_3
+; RV32IA-TSO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB63_1 Depth=1
+; RV32IA-TSO-ZACAS-NEXT: xor a5, a4, a1
+; RV32IA-TSO-ZACAS-NEXT: and a5, a5, a3
+; RV32IA-TSO-ZACAS-NEXT: xor a5, a4, a5
+; RV32IA-TSO-ZACAS-NEXT: .LBB63_3: # in Loop: Header=BB63_1 Depth=1
+; RV32IA-TSO-ZACAS-NEXT: sc.w a5, a5, (a2)
+; RV32IA-TSO-ZACAS-NEXT: bnez a5, .LBB63_1
+; RV32IA-TSO-ZACAS-NEXT: # %bb.4:
+; RV32IA-TSO-ZACAS-NEXT: srl a0, a4, a0
+; RV32IA-TSO-ZACAS-NEXT: ret
+;
; RV64IA-WMO-ZACAS-LABEL: atomicrmw_umin_i8_acq_rel:
; RV64IA-WMO-ZACAS: # %bb.0:
; RV64IA-WMO-ZACAS-NEXT: andi a2, a0, -4
@@ -9629,6 +12567,16 @@ define i8 @atomicrmw_umin_i8_acq_rel(ptr %a, i8 %b) nounwind {
; RV64IA-TSO-ZACAS-NEXT: srlw a0, a4, a0
; RV64IA-TSO-ZACAS-NEXT: ret
;
+; RV32IA-WMO-ZABHA-LABEL: atomicrmw_umin_i8_acq_rel:
+; RV32IA-WMO-ZABHA: # %bb.0:
+; RV32IA-WMO-ZABHA-NEXT: amominu.b.aqrl a0, a1, (a0)
+; RV32IA-WMO-ZABHA-NEXT: ret
+;
+; RV32IA-TSO-ZABHA-LABEL: atomicrmw_umin_i8_acq_rel:
+; RV32IA-TSO-ZABHA: # %bb.0:
+; RV32IA-TSO-ZABHA-NEXT: amominu.b a0, a1, (a0)
+; RV32IA-TSO-ZABHA-NEXT: ret
+;
; RV64IA-WMO-ZABHA-LABEL: atomicrmw_umin_i8_acq_rel:
; RV64IA-WMO-ZABHA: # %bb.0:
; RV64IA-WMO-ZABHA-NEXT: amominu.b.aqrl a0, a1, (a0)
@@ -9683,29 +12631,29 @@ define i8 @atomicrmw_umin_i8_seq_cst(ptr %a, i8 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 32
; RV32I-NEXT: ret
;
-; RV32IA-LABEL: atomicrmw_umin_i8_seq_cst:
-; RV32IA: # %bb.0:
-; RV32IA-NEXT: andi a2, a0, -4
-; RV32IA-NEXT: slli a0, a0, 3
-; RV32IA-NEXT: li a3, 255
-; RV32IA-NEXT: zext.b a1, a1
-; RV32IA-NEXT: sll a3, a3, a0
-; RV32IA-NEXT: sll a1, a1, a0
-; RV32IA-NEXT: .LBB64_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-NEXT: lr.w.aqrl a4, (a2)
-; RV32IA-NEXT: and a6, a4, a3
-; RV32IA-NEXT: mv a5, a4
-; RV32IA-NEXT: bgeu a1, a6, .LBB64_3
-; RV32IA-NEXT: # %bb.2: # in Loop: Header=BB64_1 Depth=1
-; RV32IA-NEXT: xor a5, a4, a1
-; RV32IA-NEXT: and a5, a5, a3
-; RV32IA-NEXT: xor a5, a4, a5
-; RV32IA-NEXT: .LBB64_3: # in Loop: Header=BB64_1 Depth=1
-; RV32IA-NEXT: sc.w.rl a5, a5, (a2)
-; RV32IA-NEXT: bnez a5, .LBB64_1
-; RV32IA-NEXT: # %bb.4:
-; RV32IA-NEXT: srl a0, a4, a0
-; RV32IA-NEXT: ret
+; RV32IA-NOZACAS-LABEL: atomicrmw_umin_i8_seq_cst:
+; RV32IA-NOZACAS: # %bb.0:
+; RV32IA-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-NOZACAS-NEXT: li a3, 255
+; RV32IA-NOZACAS-NEXT: zext.b a1, a1
+; RV32IA-NOZACAS-NEXT: sll a3, a3, a0
+; RV32IA-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-NOZACAS-NEXT: .LBB64_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-NOZACAS-NEXT: lr.w.aqrl a4, (a2)
+; RV32IA-NOZACAS-NEXT: and a6, a4, a3
+; RV32IA-NOZACAS-NEXT: mv a5, a4
+; RV32IA-NOZACAS-NEXT: bgeu a1, a6, .LBB64_3
+; RV32IA-NOZACAS-NEXT: # %bb.2: # in Loop: Header=BB64_1 Depth=1
+; RV32IA-NOZACAS-NEXT: xor a5, a4, a1
+; RV32IA-NOZACAS-NEXT: and a5, a5, a3
+; RV32IA-NOZACAS-NEXT: xor a5, a4, a5
+; RV32IA-NOZACAS-NEXT: .LBB64_3: # in Loop: Header=BB64_1 Depth=1
+; RV32IA-NOZACAS-NEXT: sc.w.rl a5, a5, (a2)
+; RV32IA-NOZACAS-NEXT: bnez a5, .LBB64_1
+; RV32IA-NOZACAS-NEXT: # %bb.4:
+; RV32IA-NOZACAS-NEXT: srl a0, a4, a0
+; RV32IA-NOZACAS-NEXT: ret
;
; RV64I-LABEL: atomicrmw_umin_i8_seq_cst:
; RV64I: # %bb.0:
@@ -9771,6 +12719,30 @@ define i8 @atomicrmw_umin_i8_seq_cst(ptr %a, i8 %b) nounwind {
; RV64IA-NOZACAS-NEXT: srlw a0, a4, a0
; RV64IA-NOZACAS-NEXT: ret
;
+; RV32IA-ZACAS-LABEL: atomicrmw_umin_i8_seq_cst:
+; RV32IA-ZACAS: # %bb.0:
+; RV32IA-ZACAS-NEXT: andi a2, a0, -4
+; RV32IA-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-ZACAS-NEXT: li a3, 255
+; RV32IA-ZACAS-NEXT: zext.b a1, a1
+; RV32IA-ZACAS-NEXT: sll a3, a3, a0
+; RV32IA-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-ZACAS-NEXT: .LBB64_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-ZACAS-NEXT: lr.w.aqrl a4, (a2)
+; RV32IA-ZACAS-NEXT: and a6, a4, a3
+; RV32IA-ZACAS-NEXT: mv a5, a4
+; RV32IA-ZACAS-NEXT: bgeu a1, a6, .LBB64_3
+; RV32IA-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB64_1 Depth=1
+; RV32IA-ZACAS-NEXT: xor a5, a4, a1
+; RV32IA-ZACAS-NEXT: and a5, a5, a3
+; RV32IA-ZACAS-NEXT: xor a5, a4, a5
+; RV32IA-ZACAS-NEXT: .LBB64_3: # in Loop: Header=BB64_1 Depth=1
+; RV32IA-ZACAS-NEXT: sc.w.rl a5, a5, (a2)
+; RV32IA-ZACAS-NEXT: bnez a5, .LBB64_1
+; RV32IA-ZACAS-NEXT: # %bb.4:
+; RV32IA-ZACAS-NEXT: srl a0, a4, a0
+; RV32IA-ZACAS-NEXT: ret
+;
; RV64IA-ZACAS-LABEL: atomicrmw_umin_i8_seq_cst:
; RV64IA-ZACAS: # %bb.0:
; RV64IA-ZACAS-NEXT: andi a2, a0, -4
@@ -9795,6 +12767,16 @@ define i8 @atomicrmw_umin_i8_seq_cst(ptr %a, i8 %b) nounwind {
; RV64IA-ZACAS-NEXT: srlw a0, a4, a0
; RV64IA-ZACAS-NEXT: ret
;
+; RV32IA-WMO-ZABHA-LABEL: atomicrmw_umin_i8_seq_cst:
+; RV32IA-WMO-ZABHA: # %bb.0:
+; RV32IA-WMO-ZABHA-NEXT: amominu.b.aqrl a0, a1, (a0)
+; RV32IA-WMO-ZABHA-NEXT: ret
+;
+; RV32IA-TSO-ZABHA-LABEL: atomicrmw_umin_i8_seq_cst:
+; RV32IA-TSO-ZABHA: # %bb.0:
+; RV32IA-TSO-ZABHA-NEXT: amominu.b a0, a1, (a0)
+; RV32IA-TSO-ZABHA-NEXT: ret
+;
; RV64IA-WMO-ZABHA-LABEL: atomicrmw_umin_i8_seq_cst:
; RV64IA-WMO-ZABHA: # %bb.0:
; RV64IA-WMO-ZABHA-NEXT: amominu.b.aqrl a0, a1, (a0)
@@ -9819,26 +12801,26 @@ define i16 @atomicrmw_xchg_i16_monotonic(ptr %a, i16 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
-; RV32IA-LABEL: atomicrmw_xchg_i16_monotonic:
-; RV32IA: # %bb.0:
-; RV32IA-NEXT: andi a2, a0, -4
-; RV32IA-NEXT: slli a0, a0, 3
-; RV32IA-NEXT: lui a3, 16
-; RV32IA-NEXT: addi a3, a3, -1
-; RV32IA-NEXT: sll a4, a3, a0
-; RV32IA-NEXT: and a1, a1, a3
-; RV32IA-NEXT: sll a1, a1, a0
-; RV32IA-NEXT: .LBB65_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-NEXT: lr.w a3, (a2)
-; RV32IA-NEXT: mv a5, a1
-; RV32IA-NEXT: xor a5, a3, a5
-; RV32IA-NEXT: and a5, a5, a4
-; RV32IA-NEXT: xor a5, a3, a5
-; RV32IA-NEXT: sc.w a5, a5, (a2)
-; RV32IA-NEXT: bnez a5, .LBB65_1
-; RV32IA-NEXT: # %bb.2:
-; RV32IA-NEXT: srl a0, a3, a0
-; RV32IA-NEXT: ret
+; RV32IA-NOZACAS-LABEL: atomicrmw_xchg_i16_monotonic:
+; RV32IA-NOZACAS: # %bb.0:
+; RV32IA-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-NOZACAS-NEXT: lui a3, 16
+; RV32IA-NOZACAS-NEXT: addi a3, a3, -1
+; RV32IA-NOZACAS-NEXT: sll a4, a3, a0
+; RV32IA-NOZACAS-NEXT: and a1, a1, a3
+; RV32IA-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-NOZACAS-NEXT: .LBB65_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-NOZACAS-NEXT: lr.w a3, (a2)
+; RV32IA-NOZACAS-NEXT: mv a5, a1
+; RV32IA-NOZACAS-NEXT: xor a5, a3, a5
+; RV32IA-NOZACAS-NEXT: and a5, a5, a4
+; RV32IA-NOZACAS-NEXT: xor a5, a3, a5
+; RV32IA-NOZACAS-NEXT: sc.w a5, a5, (a2)
+; RV32IA-NOZACAS-NEXT: bnez a5, .LBB65_1
+; RV32IA-NOZACAS-NEXT: # %bb.2:
+; RV32IA-NOZACAS-NEXT: srl a0, a3, a0
+; RV32IA-NOZACAS-NEXT: ret
;
; RV64I-LABEL: atomicrmw_xchg_i16_monotonic:
; RV64I: # %bb.0:
@@ -9871,6 +12853,27 @@ define i16 @atomicrmw_xchg_i16_monotonic(ptr %a, i16 %b) nounwind {
; RV64IA-NOZACAS-NEXT: srlw a0, a3, a0
; RV64IA-NOZACAS-NEXT: ret
;
+; RV32IA-ZACAS-LABEL: atomicrmw_xchg_i16_monotonic:
+; RV32IA-ZACAS: # %bb.0:
+; RV32IA-ZACAS-NEXT: andi a2, a0, -4
+; RV32IA-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-ZACAS-NEXT: lui a3, 16
+; RV32IA-ZACAS-NEXT: addi a3, a3, -1
+; RV32IA-ZACAS-NEXT: sll a4, a3, a0
+; RV32IA-ZACAS-NEXT: and a1, a1, a3
+; RV32IA-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-ZACAS-NEXT: .LBB65_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-ZACAS-NEXT: lr.w a3, (a2)
+; RV32IA-ZACAS-NEXT: mv a5, a1
+; RV32IA-ZACAS-NEXT: xor a5, a3, a5
+; RV32IA-ZACAS-NEXT: and a5, a5, a4
+; RV32IA-ZACAS-NEXT: xor a5, a3, a5
+; RV32IA-ZACAS-NEXT: sc.w a5, a5, (a2)
+; RV32IA-ZACAS-NEXT: bnez a5, .LBB65_1
+; RV32IA-ZACAS-NEXT: # %bb.2:
+; RV32IA-ZACAS-NEXT: srl a0, a3, a0
+; RV32IA-ZACAS-NEXT: ret
+;
; RV64IA-ZACAS-LABEL: atomicrmw_xchg_i16_monotonic:
; RV64IA-ZACAS: # %bb.0:
; RV64IA-ZACAS-NEXT: andi a2, a0, -4
@@ -9892,6 +12895,16 @@ define i16 @atomicrmw_xchg_i16_monotonic(ptr %a, i16 %b) nounwind {
; RV64IA-ZACAS-NEXT: srlw a0, a3, a0
; RV64IA-ZACAS-NEXT: ret
;
+; RV32IA-WMO-ZABHA-LABEL: atomicrmw_xchg_i16_monotonic:
+; RV32IA-WMO-ZABHA: # %bb.0:
+; RV32IA-WMO-ZABHA-NEXT: amoswap.h a0, a1, (a0)
+; RV32IA-WMO-ZABHA-NEXT: ret
+;
+; RV32IA-TSO-ZABHA-LABEL: atomicrmw_xchg_i16_monotonic:
+; RV32IA-TSO-ZABHA: # %bb.0:
+; RV32IA-TSO-ZABHA-NEXT: amoswap.h a0, a1, (a0)
+; RV32IA-TSO-ZABHA-NEXT: ret
+;
; RV64IA-WMO-ZABHA-LABEL: atomicrmw_xchg_i16_monotonic:
; RV64IA-WMO-ZABHA: # %bb.0:
; RV64IA-WMO-ZABHA-NEXT: amoswap.h a0, a1, (a0)
@@ -9916,47 +12929,47 @@ define i16 @atomicrmw_xchg_i16_acquire(ptr %a, i16 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
-; RV32IA-WMO-LABEL: atomicrmw_xchg_i16_acquire:
-; RV32IA-WMO: # %bb.0:
-; RV32IA-WMO-NEXT: andi a2, a0, -4
-; RV32IA-WMO-NEXT: slli a0, a0, 3
-; RV32IA-WMO-NEXT: lui a3, 16
-; RV32IA-WMO-NEXT: addi a3, a3, -1
-; RV32IA-WMO-NEXT: sll a4, a3, a0
-; RV32IA-WMO-NEXT: and a1, a1, a3
-; RV32IA-WMO-NEXT: sll a1, a1, a0
-; RV32IA-WMO-NEXT: .LBB66_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-WMO-NEXT: lr.w.aq a3, (a2)
-; RV32IA-WMO-NEXT: mv a5, a1
-; RV32IA-WMO-NEXT: xor a5, a3, a5
-; RV32IA-WMO-NEXT: and a5, a5, a4
-; RV32IA-WMO-NEXT: xor a5, a3, a5
-; RV32IA-WMO-NEXT: sc.w a5, a5, (a2)
-; RV32IA-WMO-NEXT: bnez a5, .LBB66_1
-; RV32IA-WMO-NEXT: # %bb.2:
-; RV32IA-WMO-NEXT: srl a0, a3, a0
-; RV32IA-WMO-NEXT: ret
+; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_xchg_i16_acquire:
+; RV32IA-WMO-NOZACAS: # %bb.0:
+; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-WMO-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-NOZACAS-NEXT: lui a3, 16
+; RV32IA-WMO-NOZACAS-NEXT: addi a3, a3, -1
+; RV32IA-WMO-NOZACAS-NEXT: sll a4, a3, a0
+; RV32IA-WMO-NOZACAS-NEXT: and a1, a1, a3
+; RV32IA-WMO-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-WMO-NOZACAS-NEXT: .LBB66_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-NOZACAS-NEXT: lr.w.aq a3, (a2)
+; RV32IA-WMO-NOZACAS-NEXT: mv a5, a1
+; RV32IA-WMO-NOZACAS-NEXT: xor a5, a3, a5
+; RV32IA-WMO-NOZACAS-NEXT: and a5, a5, a4
+; RV32IA-WMO-NOZACAS-NEXT: xor a5, a3, a5
+; RV32IA-WMO-NOZACAS-NEXT: sc.w a5, a5, (a2)
+; RV32IA-WMO-NOZACAS-NEXT: bnez a5, .LBB66_1
+; RV32IA-WMO-NOZACAS-NEXT: # %bb.2:
+; RV32IA-WMO-NOZACAS-NEXT: srl a0, a3, a0
+; RV32IA-WMO-NOZACAS-NEXT: ret
;
-; RV32IA-TSO-LABEL: atomicrmw_xchg_i16_acquire:
-; RV32IA-TSO: # %bb.0:
-; RV32IA-TSO-NEXT: andi a2, a0, -4
-; RV32IA-TSO-NEXT: slli a0, a0, 3
-; RV32IA-TSO-NEXT: lui a3, 16
-; RV32IA-TSO-NEXT: addi a3, a3, -1
-; RV32IA-TSO-NEXT: sll a4, a3, a0
-; RV32IA-TSO-NEXT: and a1, a1, a3
-; RV32IA-TSO-NEXT: sll a1, a1, a0
-; RV32IA-TSO-NEXT: .LBB66_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-TSO-NEXT: lr.w a3, (a2)
-; RV32IA-TSO-NEXT: mv a5, a1
-; RV32IA-TSO-NEXT: xor a5, a3, a5
-; RV32IA-TSO-NEXT: and a5, a5, a4
-; RV32IA-TSO-NEXT: xor a5, a3, a5
-; RV32IA-TSO-NEXT: sc.w a5, a5, (a2)
-; RV32IA-TSO-NEXT: bnez a5, .LBB66_1
-; RV32IA-TSO-NEXT: # %bb.2:
-; RV32IA-TSO-NEXT: srl a0, a3, a0
-; RV32IA-TSO-NEXT: ret
+; RV32IA-TSO-NOZACAS-LABEL: atomicrmw_xchg_i16_acquire:
+; RV32IA-TSO-NOZACAS: # %bb.0:
+; RV32IA-TSO-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-TSO-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-NOZACAS-NEXT: lui a3, 16
+; RV32IA-TSO-NOZACAS-NEXT: addi a3, a3, -1
+; RV32IA-TSO-NOZACAS-NEXT: sll a4, a3, a0
+; RV32IA-TSO-NOZACAS-NEXT: and a1, a1, a3
+; RV32IA-TSO-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-TSO-NOZACAS-NEXT: .LBB66_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-NOZACAS-NEXT: lr.w a3, (a2)
+; RV32IA-TSO-NOZACAS-NEXT: mv a5, a1
+; RV32IA-TSO-NOZACAS-NEXT: xor a5, a3, a5
+; RV32IA-TSO-NOZACAS-NEXT: and a5, a5, a4
+; RV32IA-TSO-NOZACAS-NEXT: xor a5, a3, a5
+; RV32IA-TSO-NOZACAS-NEXT: sc.w a5, a5, (a2)
+; RV32IA-TSO-NOZACAS-NEXT: bnez a5, .LBB66_1
+; RV32IA-TSO-NOZACAS-NEXT: # %bb.2:
+; RV32IA-TSO-NOZACAS-NEXT: srl a0, a3, a0
+; RV32IA-TSO-NOZACAS-NEXT: ret
;
; RV64I-LABEL: atomicrmw_xchg_i16_acquire:
; RV64I: # %bb.0:
@@ -10010,6 +13023,48 @@ define i16 @atomicrmw_xchg_i16_acquire(ptr %a, i16 %b) nounwind {
; RV64IA-TSO-NOZACAS-NEXT: srlw a0, a3, a0
; RV64IA-TSO-NOZACAS-NEXT: ret
;
+; RV32IA-WMO-ZACAS-LABEL: atomicrmw_xchg_i16_acquire:
+; RV32IA-WMO-ZACAS: # %bb.0:
+; RV32IA-WMO-ZACAS-NEXT: andi a2, a0, -4
+; RV32IA-WMO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-ZACAS-NEXT: lui a3, 16
+; RV32IA-WMO-ZACAS-NEXT: addi a3, a3, -1
+; RV32IA-WMO-ZACAS-NEXT: sll a4, a3, a0
+; RV32IA-WMO-ZACAS-NEXT: and a1, a1, a3
+; RV32IA-WMO-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-WMO-ZACAS-NEXT: .LBB66_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-ZACAS-NEXT: lr.w.aq a3, (a2)
+; RV32IA-WMO-ZACAS-NEXT: mv a5, a1
+; RV32IA-WMO-ZACAS-NEXT: xor a5, a3, a5
+; RV32IA-WMO-ZACAS-NEXT: and a5, a5, a4
+; RV32IA-WMO-ZACAS-NEXT: xor a5, a3, a5
+; RV32IA-WMO-ZACAS-NEXT: sc.w a5, a5, (a2)
+; RV32IA-WMO-ZACAS-NEXT: bnez a5, .LBB66_1
+; RV32IA-WMO-ZACAS-NEXT: # %bb.2:
+; RV32IA-WMO-ZACAS-NEXT: srl a0, a3, a0
+; RV32IA-WMO-ZACAS-NEXT: ret
+;
+; RV32IA-TSO-ZACAS-LABEL: atomicrmw_xchg_i16_acquire:
+; RV32IA-TSO-ZACAS: # %bb.0:
+; RV32IA-TSO-ZACAS-NEXT: andi a2, a0, -4
+; RV32IA-TSO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-ZACAS-NEXT: lui a3, 16
+; RV32IA-TSO-ZACAS-NEXT: addi a3, a3, -1
+; RV32IA-TSO-ZACAS-NEXT: sll a4, a3, a0
+; RV32IA-TSO-ZACAS-NEXT: and a1, a1, a3
+; RV32IA-TSO-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-TSO-ZACAS-NEXT: .LBB66_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-ZACAS-NEXT: lr.w a3, (a2)
+; RV32IA-TSO-ZACAS-NEXT: mv a5, a1
+; RV32IA-TSO-ZACAS-NEXT: xor a5, a3, a5
+; RV32IA-TSO-ZACAS-NEXT: and a5, a5, a4
+; RV32IA-TSO-ZACAS-NEXT: xor a5, a3, a5
+; RV32IA-TSO-ZACAS-NEXT: sc.w a5, a5, (a2)
+; RV32IA-TSO-ZACAS-NEXT: bnez a5, .LBB66_1
+; RV32IA-TSO-ZACAS-NEXT: # %bb.2:
+; RV32IA-TSO-ZACAS-NEXT: srl a0, a3, a0
+; RV32IA-TSO-ZACAS-NEXT: ret
+;
; RV64IA-WMO-ZACAS-LABEL: atomicrmw_xchg_i16_acquire:
; RV64IA-WMO-ZACAS: # %bb.0:
; RV64IA-WMO-ZACAS-NEXT: andi a2, a0, -4
@@ -10052,6 +13107,16 @@ define i16 @atomicrmw_xchg_i16_acquire(ptr %a, i16 %b) nounwind {
; RV64IA-TSO-ZACAS-NEXT: srlw a0, a3, a0
; RV64IA-TSO-ZACAS-NEXT: ret
;
+; RV32IA-WMO-ZABHA-LABEL: atomicrmw_xchg_i16_acquire:
+; RV32IA-WMO-ZABHA: # %bb.0:
+; RV32IA-WMO-ZABHA-NEXT: amoswap.h.aq a0, a1, (a0)
+; RV32IA-WMO-ZABHA-NEXT: ret
+;
+; RV32IA-TSO-ZABHA-LABEL: atomicrmw_xchg_i16_acquire:
+; RV32IA-TSO-ZABHA: # %bb.0:
+; RV32IA-TSO-ZABHA-NEXT: amoswap.h a0, a1, (a0)
+; RV32IA-TSO-ZABHA-NEXT: ret
+;
; RV64IA-WMO-ZABHA-LABEL: atomicrmw_xchg_i16_acquire:
; RV64IA-WMO-ZABHA: # %bb.0:
; RV64IA-WMO-ZABHA-NEXT: amoswap.h.aq a0, a1, (a0)
@@ -10076,47 +13141,47 @@ define i16 @atomicrmw_xchg_i16_release(ptr %a, i16 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
-; RV32IA-WMO-LABEL: atomicrmw_xchg_i16_release:
-; RV32IA-WMO: # %bb.0:
-; RV32IA-WMO-NEXT: andi a2, a0, -4
-; RV32IA-WMO-NEXT: slli a0, a0, 3
-; RV32IA-WMO-NEXT: lui a3, 16
-; RV32IA-WMO-NEXT: addi a3, a3, -1
-; RV32IA-WMO-NEXT: sll a4, a3, a0
-; RV32IA-WMO-NEXT: and a1, a1, a3
-; RV32IA-WMO-NEXT: sll a1, a1, a0
-; RV32IA-WMO-NEXT: .LBB67_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-WMO-NEXT: lr.w a3, (a2)
-; RV32IA-WMO-NEXT: mv a5, a1
-; RV32IA-WMO-NEXT: xor a5, a3, a5
-; RV32IA-WMO-NEXT: and a5, a5, a4
-; RV32IA-WMO-NEXT: xor a5, a3, a5
-; RV32IA-WMO-NEXT: sc.w.rl a5, a5, (a2)
-; RV32IA-WMO-NEXT: bnez a5, .LBB67_1
-; RV32IA-WMO-NEXT: # %bb.2:
-; RV32IA-WMO-NEXT: srl a0, a3, a0
-; RV32IA-WMO-NEXT: ret
+; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_xchg_i16_release:
+; RV32IA-WMO-NOZACAS: # %bb.0:
+; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-WMO-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-NOZACAS-NEXT: lui a3, 16
+; RV32IA-WMO-NOZACAS-NEXT: addi a3, a3, -1
+; RV32IA-WMO-NOZACAS-NEXT: sll a4, a3, a0
+; RV32IA-WMO-NOZACAS-NEXT: and a1, a1, a3
+; RV32IA-WMO-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-WMO-NOZACAS-NEXT: .LBB67_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-NOZACAS-NEXT: lr.w a3, (a2)
+; RV32IA-WMO-NOZACAS-NEXT: mv a5, a1
+; RV32IA-WMO-NOZACAS-NEXT: xor a5, a3, a5
+; RV32IA-WMO-NOZACAS-NEXT: and a5, a5, a4
+; RV32IA-WMO-NOZACAS-NEXT: xor a5, a3, a5
+; RV32IA-WMO-NOZACAS-NEXT: sc.w.rl a5, a5, (a2)
+; RV32IA-WMO-NOZACAS-NEXT: bnez a5, .LBB67_1
+; RV32IA-WMO-NOZACAS-NEXT: # %bb.2:
+; RV32IA-WMO-NOZACAS-NEXT: srl a0, a3, a0
+; RV32IA-WMO-NOZACAS-NEXT: ret
;
-; RV32IA-TSO-LABEL: atomicrmw_xchg_i16_release:
-; RV32IA-TSO: # %bb.0:
-; RV32IA-TSO-NEXT: andi a2, a0, -4
-; RV32IA-TSO-NEXT: slli a0, a0, 3
-; RV32IA-TSO-NEXT: lui a3, 16
-; RV32IA-TSO-NEXT: addi a3, a3, -1
-; RV32IA-TSO-NEXT: sll a4, a3, a0
-; RV32IA-TSO-NEXT: and a1, a1, a3
-; RV32IA-TSO-NEXT: sll a1, a1, a0
-; RV32IA-TSO-NEXT: .LBB67_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-TSO-NEXT: lr.w a3, (a2)
-; RV32IA-TSO-NEXT: mv a5, a1
-; RV32IA-TSO-NEXT: xor a5, a3, a5
-; RV32IA-TSO-NEXT: and a5, a5, a4
-; RV32IA-TSO-NEXT: xor a5, a3, a5
-; RV32IA-TSO-NEXT: sc.w a5, a5, (a2)
-; RV32IA-TSO-NEXT: bnez a5, .LBB67_1
-; RV32IA-TSO-NEXT: # %bb.2:
-; RV32IA-TSO-NEXT: srl a0, a3, a0
-; RV32IA-TSO-NEXT: ret
+; RV32IA-TSO-NOZACAS-LABEL: atomicrmw_xchg_i16_release:
+; RV32IA-TSO-NOZACAS: # %bb.0:
+; RV32IA-TSO-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-TSO-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-NOZACAS-NEXT: lui a3, 16
+; RV32IA-TSO-NOZACAS-NEXT: addi a3, a3, -1
+; RV32IA-TSO-NOZACAS-NEXT: sll a4, a3, a0
+; RV32IA-TSO-NOZACAS-NEXT: and a1, a1, a3
+; RV32IA-TSO-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-TSO-NOZACAS-NEXT: .LBB67_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-NOZACAS-NEXT: lr.w a3, (a2)
+; RV32IA-TSO-NOZACAS-NEXT: mv a5, a1
+; RV32IA-TSO-NOZACAS-NEXT: xor a5, a3, a5
+; RV32IA-TSO-NOZACAS-NEXT: and a5, a5, a4
+; RV32IA-TSO-NOZACAS-NEXT: xor a5, a3, a5
+; RV32IA-TSO-NOZACAS-NEXT: sc.w a5, a5, (a2)
+; RV32IA-TSO-NOZACAS-NEXT: bnez a5, .LBB67_1
+; RV32IA-TSO-NOZACAS-NEXT: # %bb.2:
+; RV32IA-TSO-NOZACAS-NEXT: srl a0, a3, a0
+; RV32IA-TSO-NOZACAS-NEXT: ret
;
; RV64I-LABEL: atomicrmw_xchg_i16_release:
; RV64I: # %bb.0:
@@ -10170,6 +13235,48 @@ define i16 @atomicrmw_xchg_i16_release(ptr %a, i16 %b) nounwind {
; RV64IA-TSO-NOZACAS-NEXT: srlw a0, a3, a0
; RV64IA-TSO-NOZACAS-NEXT: ret
;
+; RV32IA-WMO-ZACAS-LABEL: atomicrmw_xchg_i16_release:
+; RV32IA-WMO-ZACAS: # %bb.0:
+; RV32IA-WMO-ZACAS-NEXT: andi a2, a0, -4
+; RV32IA-WMO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-ZACAS-NEXT: lui a3, 16
+; RV32IA-WMO-ZACAS-NEXT: addi a3, a3, -1
+; RV32IA-WMO-ZACAS-NEXT: sll a4, a3, a0
+; RV32IA-WMO-ZACAS-NEXT: and a1, a1, a3
+; RV32IA-WMO-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-WMO-ZACAS-NEXT: .LBB67_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-ZACAS-NEXT: lr.w a3, (a2)
+; RV32IA-WMO-ZACAS-NEXT: mv a5, a1
+; RV32IA-WMO-ZACAS-NEXT: xor a5, a3, a5
+; RV32IA-WMO-ZACAS-NEXT: and a5, a5, a4
+; RV32IA-WMO-ZACAS-NEXT: xor a5, a3, a5
+; RV32IA-WMO-ZACAS-NEXT: sc.w.rl a5, a5, (a2)
+; RV32IA-WMO-ZACAS-NEXT: bnez a5, .LBB67_1
+; RV32IA-WMO-ZACAS-NEXT: # %bb.2:
+; RV32IA-WMO-ZACAS-NEXT: srl a0, a3, a0
+; RV32IA-WMO-ZACAS-NEXT: ret
+;
+; RV32IA-TSO-ZACAS-LABEL: atomicrmw_xchg_i16_release:
+; RV32IA-TSO-ZACAS: # %bb.0:
+; RV32IA-TSO-ZACAS-NEXT: andi a2, a0, -4
+; RV32IA-TSO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-ZACAS-NEXT: lui a3, 16
+; RV32IA-TSO-ZACAS-NEXT: addi a3, a3, -1
+; RV32IA-TSO-ZACAS-NEXT: sll a4, a3, a0
+; RV32IA-TSO-ZACAS-NEXT: and a1, a1, a3
+; RV32IA-TSO-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-TSO-ZACAS-NEXT: .LBB67_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-ZACAS-NEXT: lr.w a3, (a2)
+; RV32IA-TSO-ZACAS-NEXT: mv a5, a1
+; RV32IA-TSO-ZACAS-NEXT: xor a5, a3, a5
+; RV32IA-TSO-ZACAS-NEXT: and a5, a5, a4
+; RV32IA-TSO-ZACAS-NEXT: xor a5, a3, a5
+; RV32IA-TSO-ZACAS-NEXT: sc.w a5, a5, (a2)
+; RV32IA-TSO-ZACAS-NEXT: bnez a5, .LBB67_1
+; RV32IA-TSO-ZACAS-NEXT: # %bb.2:
+; RV32IA-TSO-ZACAS-NEXT: srl a0, a3, a0
+; RV32IA-TSO-ZACAS-NEXT: ret
+;
; RV64IA-WMO-ZACAS-LABEL: atomicrmw_xchg_i16_release:
; RV64IA-WMO-ZACAS: # %bb.0:
; RV64IA-WMO-ZACAS-NEXT: andi a2, a0, -4
@@ -10212,6 +13319,16 @@ define i16 @atomicrmw_xchg_i16_release(ptr %a, i16 %b) nounwind {
; RV64IA-TSO-ZACAS-NEXT: srlw a0, a3, a0
; RV64IA-TSO-ZACAS-NEXT: ret
;
+; RV32IA-WMO-ZABHA-LABEL: atomicrmw_xchg_i16_release:
+; RV32IA-WMO-ZABHA: # %bb.0:
+; RV32IA-WMO-ZABHA-NEXT: amoswap.h.rl a0, a1, (a0)
+; RV32IA-WMO-ZABHA-NEXT: ret
+;
+; RV32IA-TSO-ZABHA-LABEL: atomicrmw_xchg_i16_release:
+; RV32IA-TSO-ZABHA: # %bb.0:
+; RV32IA-TSO-ZABHA-NEXT: amoswap.h a0, a1, (a0)
+; RV32IA-TSO-ZABHA-NEXT: ret
+;
; RV64IA-WMO-ZABHA-LABEL: atomicrmw_xchg_i16_release:
; RV64IA-WMO-ZABHA: # %bb.0:
; RV64IA-WMO-ZABHA-NEXT: amoswap.h.rl a0, a1, (a0)
@@ -10236,47 +13353,47 @@ define i16 @atomicrmw_xchg_i16_acq_rel(ptr %a, i16 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
-; RV32IA-WMO-LABEL: atomicrmw_xchg_i16_acq_rel:
-; RV32IA-WMO: # %bb.0:
-; RV32IA-WMO-NEXT: andi a2, a0, -4
-; RV32IA-WMO-NEXT: slli a0, a0, 3
-; RV32IA-WMO-NEXT: lui a3, 16
-; RV32IA-WMO-NEXT: addi a3, a3, -1
-; RV32IA-WMO-NEXT: sll a4, a3, a0
-; RV32IA-WMO-NEXT: and a1, a1, a3
-; RV32IA-WMO-NEXT: sll a1, a1, a0
-; RV32IA-WMO-NEXT: .LBB68_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-WMO-NEXT: lr.w.aq a3, (a2)
-; RV32IA-WMO-NEXT: mv a5, a1
-; RV32IA-WMO-NEXT: xor a5, a3, a5
-; RV32IA-WMO-NEXT: and a5, a5, a4
-; RV32IA-WMO-NEXT: xor a5, a3, a5
-; RV32IA-WMO-NEXT: sc.w.rl a5, a5, (a2)
-; RV32IA-WMO-NEXT: bnez a5, .LBB68_1
-; RV32IA-WMO-NEXT: # %bb.2:
-; RV32IA-WMO-NEXT: srl a0, a3, a0
-; RV32IA-WMO-NEXT: ret
+; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_xchg_i16_acq_rel:
+; RV32IA-WMO-NOZACAS: # %bb.0:
+; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-WMO-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-NOZACAS-NEXT: lui a3, 16
+; RV32IA-WMO-NOZACAS-NEXT: addi a3, a3, -1
+; RV32IA-WMO-NOZACAS-NEXT: sll a4, a3, a0
+; RV32IA-WMO-NOZACAS-NEXT: and a1, a1, a3
+; RV32IA-WMO-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-WMO-NOZACAS-NEXT: .LBB68_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-NOZACAS-NEXT: lr.w.aq a3, (a2)
+; RV32IA-WMO-NOZACAS-NEXT: mv a5, a1
+; RV32IA-WMO-NOZACAS-NEXT: xor a5, a3, a5
+; RV32IA-WMO-NOZACAS-NEXT: and a5, a5, a4
+; RV32IA-WMO-NOZACAS-NEXT: xor a5, a3, a5
+; RV32IA-WMO-NOZACAS-NEXT: sc.w.rl a5, a5, (a2)
+; RV32IA-WMO-NOZACAS-NEXT: bnez a5, .LBB68_1
+; RV32IA-WMO-NOZACAS-NEXT: # %bb.2:
+; RV32IA-WMO-NOZACAS-NEXT: srl a0, a3, a0
+; RV32IA-WMO-NOZACAS-NEXT: ret
;
-; RV32IA-TSO-LABEL: atomicrmw_xchg_i16_acq_rel:
-; RV32IA-TSO: # %bb.0:
-; RV32IA-TSO-NEXT: andi a2, a0, -4
-; RV32IA-TSO-NEXT: slli a0, a0, 3
-; RV32IA-TSO-NEXT: lui a3, 16
-; RV32IA-TSO-NEXT: addi a3, a3, -1
-; RV32IA-TSO-NEXT: sll a4, a3, a0
-; RV32IA-TSO-NEXT: and a1, a1, a3
-; RV32IA-TSO-NEXT: sll a1, a1, a0
-; RV32IA-TSO-NEXT: .LBB68_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-TSO-NEXT: lr.w a3, (a2)
-; RV32IA-TSO-NEXT: mv a5, a1
-; RV32IA-TSO-NEXT: xor a5, a3, a5
-; RV32IA-TSO-NEXT: and a5, a5, a4
-; RV32IA-TSO-NEXT: xor a5, a3, a5
-; RV32IA-TSO-NEXT: sc.w a5, a5, (a2)
-; RV32IA-TSO-NEXT: bnez a5, .LBB68_1
-; RV32IA-TSO-NEXT: # %bb.2:
-; RV32IA-TSO-NEXT: srl a0, a3, a0
-; RV32IA-TSO-NEXT: ret
+; RV32IA-TSO-NOZACAS-LABEL: atomicrmw_xchg_i16_acq_rel:
+; RV32IA-TSO-NOZACAS: # %bb.0:
+; RV32IA-TSO-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-TSO-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-NOZACAS-NEXT: lui a3, 16
+; RV32IA-TSO-NOZACAS-NEXT: addi a3, a3, -1
+; RV32IA-TSO-NOZACAS-NEXT: sll a4, a3, a0
+; RV32IA-TSO-NOZACAS-NEXT: and a1, a1, a3
+; RV32IA-TSO-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-TSO-NOZACAS-NEXT: .LBB68_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-NOZACAS-NEXT: lr.w a3, (a2)
+; RV32IA-TSO-NOZACAS-NEXT: mv a5, a1
+; RV32IA-TSO-NOZACAS-NEXT: xor a5, a3, a5
+; RV32IA-TSO-NOZACAS-NEXT: and a5, a5, a4
+; RV32IA-TSO-NOZACAS-NEXT: xor a5, a3, a5
+; RV32IA-TSO-NOZACAS-NEXT: sc.w a5, a5, (a2)
+; RV32IA-TSO-NOZACAS-NEXT: bnez a5, .LBB68_1
+; RV32IA-TSO-NOZACAS-NEXT: # %bb.2:
+; RV32IA-TSO-NOZACAS-NEXT: srl a0, a3, a0
+; RV32IA-TSO-NOZACAS-NEXT: ret
;
; RV64I-LABEL: atomicrmw_xchg_i16_acq_rel:
; RV64I: # %bb.0:
@@ -10330,6 +13447,48 @@ define i16 @atomicrmw_xchg_i16_acq_rel(ptr %a, i16 %b) nounwind {
; RV64IA-TSO-NOZACAS-NEXT: srlw a0, a3, a0
; RV64IA-TSO-NOZACAS-NEXT: ret
;
+; RV32IA-WMO-ZACAS-LABEL: atomicrmw_xchg_i16_acq_rel:
+; RV32IA-WMO-ZACAS: # %bb.0:
+; RV32IA-WMO-ZACAS-NEXT: andi a2, a0, -4
+; RV32IA-WMO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-ZACAS-NEXT: lui a3, 16
+; RV32IA-WMO-ZACAS-NEXT: addi a3, a3, -1
+; RV32IA-WMO-ZACAS-NEXT: sll a4, a3, a0
+; RV32IA-WMO-ZACAS-NEXT: and a1, a1, a3
+; RV32IA-WMO-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-WMO-ZACAS-NEXT: .LBB68_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-ZACAS-NEXT: lr.w.aq a3, (a2)
+; RV32IA-WMO-ZACAS-NEXT: mv a5, a1
+; RV32IA-WMO-ZACAS-NEXT: xor a5, a3, a5
+; RV32IA-WMO-ZACAS-NEXT: and a5, a5, a4
+; RV32IA-WMO-ZACAS-NEXT: xor a5, a3, a5
+; RV32IA-WMO-ZACAS-NEXT: sc.w.rl a5, a5, (a2)
+; RV32IA-WMO-ZACAS-NEXT: bnez a5, .LBB68_1
+; RV32IA-WMO-ZACAS-NEXT: # %bb.2:
+; RV32IA-WMO-ZACAS-NEXT: srl a0, a3, a0
+; RV32IA-WMO-ZACAS-NEXT: ret
+;
+; RV32IA-TSO-ZACAS-LABEL: atomicrmw_xchg_i16_acq_rel:
+; RV32IA-TSO-ZACAS: # %bb.0:
+; RV32IA-TSO-ZACAS-NEXT: andi a2, a0, -4
+; RV32IA-TSO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-ZACAS-NEXT: lui a3, 16
+; RV32IA-TSO-ZACAS-NEXT: addi a3, a3, -1
+; RV32IA-TSO-ZACAS-NEXT: sll a4, a3, a0
+; RV32IA-TSO-ZACAS-NEXT: and a1, a1, a3
+; RV32IA-TSO-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-TSO-ZACAS-NEXT: .LBB68_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-ZACAS-NEXT: lr.w a3, (a2)
+; RV32IA-TSO-ZACAS-NEXT: mv a5, a1
+; RV32IA-TSO-ZACAS-NEXT: xor a5, a3, a5
+; RV32IA-TSO-ZACAS-NEXT: and a5, a5, a4
+; RV32IA-TSO-ZACAS-NEXT: xor a5, a3, a5
+; RV32IA-TSO-ZACAS-NEXT: sc.w a5, a5, (a2)
+; RV32IA-TSO-ZACAS-NEXT: bnez a5, .LBB68_1
+; RV32IA-TSO-ZACAS-NEXT: # %bb.2:
+; RV32IA-TSO-ZACAS-NEXT: srl a0, a3, a0
+; RV32IA-TSO-ZACAS-NEXT: ret
+;
; RV64IA-WMO-ZACAS-LABEL: atomicrmw_xchg_i16_acq_rel:
; RV64IA-WMO-ZACAS: # %bb.0:
; RV64IA-WMO-ZACAS-NEXT: andi a2, a0, -4
@@ -10372,6 +13531,16 @@ define i16 @atomicrmw_xchg_i16_acq_rel(ptr %a, i16 %b) nounwind {
; RV64IA-TSO-ZACAS-NEXT: srlw a0, a3, a0
; RV64IA-TSO-ZACAS-NEXT: ret
;
+; RV32IA-WMO-ZABHA-LABEL: atomicrmw_xchg_i16_acq_rel:
+; RV32IA-WMO-ZABHA: # %bb.0:
+; RV32IA-WMO-ZABHA-NEXT: amoswap.h.aqrl a0, a1, (a0)
+; RV32IA-WMO-ZABHA-NEXT: ret
+;
+; RV32IA-TSO-ZABHA-LABEL: atomicrmw_xchg_i16_acq_rel:
+; RV32IA-TSO-ZABHA: # %bb.0:
+; RV32IA-TSO-ZABHA-NEXT: amoswap.h a0, a1, (a0)
+; RV32IA-TSO-ZABHA-NEXT: ret
+;
; RV64IA-WMO-ZABHA-LABEL: atomicrmw_xchg_i16_acq_rel:
; RV64IA-WMO-ZABHA: # %bb.0:
; RV64IA-WMO-ZABHA-NEXT: amoswap.h.aqrl a0, a1, (a0)
@@ -10396,26 +13565,26 @@ define i16 @atomicrmw_xchg_i16_seq_cst(ptr %a, i16 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
-; RV32IA-LABEL: atomicrmw_xchg_i16_seq_cst:
-; RV32IA: # %bb.0:
-; RV32IA-NEXT: andi a2, a0, -4
-; RV32IA-NEXT: slli a0, a0, 3
-; RV32IA-NEXT: lui a3, 16
-; RV32IA-NEXT: addi a3, a3, -1
-; RV32IA-NEXT: sll a4, a3, a0
-; RV32IA-NEXT: and a1, a1, a3
-; RV32IA-NEXT: sll a1, a1, a0
-; RV32IA-NEXT: .LBB69_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-NEXT: lr.w.aqrl a3, (a2)
-; RV32IA-NEXT: mv a5, a1
-; RV32IA-NEXT: xor a5, a3, a5
-; RV32IA-NEXT: and a5, a5, a4
-; RV32IA-NEXT: xor a5, a3, a5
-; RV32IA-NEXT: sc.w.rl a5, a5, (a2)
-; RV32IA-NEXT: bnez a5, .LBB69_1
-; RV32IA-NEXT: # %bb.2:
-; RV32IA-NEXT: srl a0, a3, a0
-; RV32IA-NEXT: ret
+; RV32IA-NOZACAS-LABEL: atomicrmw_xchg_i16_seq_cst:
+; RV32IA-NOZACAS: # %bb.0:
+; RV32IA-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-NOZACAS-NEXT: lui a3, 16
+; RV32IA-NOZACAS-NEXT: addi a3, a3, -1
+; RV32IA-NOZACAS-NEXT: sll a4, a3, a0
+; RV32IA-NOZACAS-NEXT: and a1, a1, a3
+; RV32IA-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-NOZACAS-NEXT: .LBB69_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-NOZACAS-NEXT: lr.w.aqrl a3, (a2)
+; RV32IA-NOZACAS-NEXT: mv a5, a1
+; RV32IA-NOZACAS-NEXT: xor a5, a3, a5
+; RV32IA-NOZACAS-NEXT: and a5, a5, a4
+; RV32IA-NOZACAS-NEXT: xor a5, a3, a5
+; RV32IA-NOZACAS-NEXT: sc.w.rl a5, a5, (a2)
+; RV32IA-NOZACAS-NEXT: bnez a5, .LBB69_1
+; RV32IA-NOZACAS-NEXT: # %bb.2:
+; RV32IA-NOZACAS-NEXT: srl a0, a3, a0
+; RV32IA-NOZACAS-NEXT: ret
;
; RV64I-LABEL: atomicrmw_xchg_i16_seq_cst:
; RV64I: # %bb.0:
@@ -10448,6 +13617,27 @@ define i16 @atomicrmw_xchg_i16_seq_cst(ptr %a, i16 %b) nounwind {
; RV64IA-NOZACAS-NEXT: srlw a0, a3, a0
; RV64IA-NOZACAS-NEXT: ret
;
+; RV32IA-ZACAS-LABEL: atomicrmw_xchg_i16_seq_cst:
+; RV32IA-ZACAS: # %bb.0:
+; RV32IA-ZACAS-NEXT: andi a2, a0, -4
+; RV32IA-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-ZACAS-NEXT: lui a3, 16
+; RV32IA-ZACAS-NEXT: addi a3, a3, -1
+; RV32IA-ZACAS-NEXT: sll a4, a3, a0
+; RV32IA-ZACAS-NEXT: and a1, a1, a3
+; RV32IA-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-ZACAS-NEXT: .LBB69_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-ZACAS-NEXT: lr.w.aqrl a3, (a2)
+; RV32IA-ZACAS-NEXT: mv a5, a1
+; RV32IA-ZACAS-NEXT: xor a5, a3, a5
+; RV32IA-ZACAS-NEXT: and a5, a5, a4
+; RV32IA-ZACAS-NEXT: xor a5, a3, a5
+; RV32IA-ZACAS-NEXT: sc.w.rl a5, a5, (a2)
+; RV32IA-ZACAS-NEXT: bnez a5, .LBB69_1
+; RV32IA-ZACAS-NEXT: # %bb.2:
+; RV32IA-ZACAS-NEXT: srl a0, a3, a0
+; RV32IA-ZACAS-NEXT: ret
+;
; RV64IA-ZACAS-LABEL: atomicrmw_xchg_i16_seq_cst:
; RV64IA-ZACAS: # %bb.0:
; RV64IA-ZACAS-NEXT: andi a2, a0, -4
@@ -10469,6 +13659,16 @@ define i16 @atomicrmw_xchg_i16_seq_cst(ptr %a, i16 %b) nounwind {
; RV64IA-ZACAS-NEXT: srlw a0, a3, a0
; RV64IA-ZACAS-NEXT: ret
;
+; RV32IA-WMO-ZABHA-LABEL: atomicrmw_xchg_i16_seq_cst:
+; RV32IA-WMO-ZABHA: # %bb.0:
+; RV32IA-WMO-ZABHA-NEXT: amoswap.h.aqrl a0, a1, (a0)
+; RV32IA-WMO-ZABHA-NEXT: ret
+;
+; RV32IA-TSO-ZABHA-LABEL: atomicrmw_xchg_i16_seq_cst:
+; RV32IA-TSO-ZABHA: # %bb.0:
+; RV32IA-TSO-ZABHA-NEXT: amoswap.h a0, a1, (a0)
+; RV32IA-TSO-ZABHA-NEXT: ret
+;
; RV64IA-WMO-ZABHA-LABEL: atomicrmw_xchg_i16_seq_cst:
; RV64IA-WMO-ZABHA: # %bb.0:
; RV64IA-WMO-ZABHA-NEXT: amoswap.h.aqrl a0, a1, (a0)
@@ -10497,17 +13697,17 @@ define i16 @atomicrmw_xchg_0_i16_monotonic(ptr %a) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
-; RV32IA-LABEL: atomicrmw_xchg_0_i16_monotonic:
-; RV32IA: # %bb.0:
-; RV32IA-NEXT: andi a1, a0, -4
-; RV32IA-NEXT: slli a0, a0, 3
-; RV32IA-NEXT: lui a2, 16
-; RV32IA-NEXT: addi a2, a2, -1
-; RV32IA-NEXT: sll a2, a2, a0
-; RV32IA-NEXT: not a2, a2
-; RV32IA-NEXT: amoand.w a1, a2, (a1)
-; RV32IA-NEXT: srl a0, a1, a0
-; RV32IA-NEXT: ret
+; RV32IA-NOZACAS-LABEL: atomicrmw_xchg_0_i16_monotonic:
+; RV32IA-NOZACAS: # %bb.0:
+; RV32IA-NOZACAS-NEXT: andi a1, a0, -4
+; RV32IA-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-NOZACAS-NEXT: lui a2, 16
+; RV32IA-NOZACAS-NEXT: addi a2, a2, -1
+; RV32IA-NOZACAS-NEXT: sll a2, a2, a0
+; RV32IA-NOZACAS-NEXT: not a2, a2
+; RV32IA-NOZACAS-NEXT: amoand.w a1, a2, (a1)
+; RV32IA-NOZACAS-NEXT: srl a0, a1, a0
+; RV32IA-NOZACAS-NEXT: ret
;
; RV64I-LABEL: atomicrmw_xchg_0_i16_monotonic:
; RV64I: # %bb.0:
@@ -10532,6 +13732,18 @@ define i16 @atomicrmw_xchg_0_i16_monotonic(ptr %a) nounwind {
; RV64IA-NOZACAS-NEXT: srlw a0, a1, a0
; RV64IA-NOZACAS-NEXT: ret
;
+; RV32IA-ZACAS-LABEL: atomicrmw_xchg_0_i16_monotonic:
+; RV32IA-ZACAS: # %bb.0:
+; RV32IA-ZACAS-NEXT: andi a1, a0, -4
+; RV32IA-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-ZACAS-NEXT: lui a2, 16
+; RV32IA-ZACAS-NEXT: addi a2, a2, -1
+; RV32IA-ZACAS-NEXT: sll a2, a2, a0
+; RV32IA-ZACAS-NEXT: not a2, a2
+; RV32IA-ZACAS-NEXT: amoand.w a1, a2, (a1)
+; RV32IA-ZACAS-NEXT: srl a0, a1, a0
+; RV32IA-ZACAS-NEXT: ret
+;
; RV64IA-ZACAS-LABEL: atomicrmw_xchg_0_i16_monotonic:
; RV64IA-ZACAS: # %bb.0:
; RV64IA-ZACAS-NEXT: andi a1, a0, -4
@@ -10544,6 +13756,16 @@ define i16 @atomicrmw_xchg_0_i16_monotonic(ptr %a) nounwind {
; RV64IA-ZACAS-NEXT: srlw a0, a1, a0
; RV64IA-ZACAS-NEXT: ret
;
+; RV32IA-WMO-ZABHA-LABEL: atomicrmw_xchg_0_i16_monotonic:
+; RV32IA-WMO-ZABHA: # %bb.0:
+; RV32IA-WMO-ZABHA-NEXT: amoswap.h a0, zero, (a0)
+; RV32IA-WMO-ZABHA-NEXT: ret
+;
+; RV32IA-TSO-ZABHA-LABEL: atomicrmw_xchg_0_i16_monotonic:
+; RV32IA-TSO-ZABHA: # %bb.0:
+; RV32IA-TSO-ZABHA-NEXT: amoswap.h a0, zero, (a0)
+; RV32IA-TSO-ZABHA-NEXT: ret
+;
; RV64IA-WMO-ZABHA-LABEL: atomicrmw_xchg_0_i16_monotonic:
; RV64IA-WMO-ZABHA: # %bb.0:
; RV64IA-WMO-ZABHA-NEXT: amoswap.h a0, zero, (a0)
@@ -10569,29 +13791,29 @@ define i16 @atomicrmw_xchg_0_i16_acquire(ptr %a) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
-; RV32IA-WMO-LABEL: atomicrmw_xchg_0_i16_acquire:
-; RV32IA-WMO: # %bb.0:
-; RV32IA-WMO-NEXT: andi a1, a0, -4
-; RV32IA-WMO-NEXT: slli a0, a0, 3
-; RV32IA-WMO-NEXT: lui a2, 16
-; RV32IA-WMO-NEXT: addi a2, a2, -1
-; RV32IA-WMO-NEXT: sll a2, a2, a0
-; RV32IA-WMO-NEXT: not a2, a2
-; RV32IA-WMO-NEXT: amoand.w.aq a1, a2, (a1)
-; RV32IA-WMO-NEXT: srl a0, a1, a0
-; RV32IA-WMO-NEXT: ret
+; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_xchg_0_i16_acquire:
+; RV32IA-WMO-NOZACAS: # %bb.0:
+; RV32IA-WMO-NOZACAS-NEXT: andi a1, a0, -4
+; RV32IA-WMO-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-NOZACAS-NEXT: lui a2, 16
+; RV32IA-WMO-NOZACAS-NEXT: addi a2, a2, -1
+; RV32IA-WMO-NOZACAS-NEXT: sll a2, a2, a0
+; RV32IA-WMO-NOZACAS-NEXT: not a2, a2
+; RV32IA-WMO-NOZACAS-NEXT: amoand.w.aq a1, a2, (a1)
+; RV32IA-WMO-NOZACAS-NEXT: srl a0, a1, a0
+; RV32IA-WMO-NOZACAS-NEXT: ret
;
-; RV32IA-TSO-LABEL: atomicrmw_xchg_0_i16_acquire:
-; RV32IA-TSO: # %bb.0:
-; RV32IA-TSO-NEXT: andi a1, a0, -4
-; RV32IA-TSO-NEXT: slli a0, a0, 3
-; RV32IA-TSO-NEXT: lui a2, 16
-; RV32IA-TSO-NEXT: addi a2, a2, -1
-; RV32IA-TSO-NEXT: sll a2, a2, a0
-; RV32IA-TSO-NEXT: not a2, a2
-; RV32IA-TSO-NEXT: amoand.w a1, a2, (a1)
-; RV32IA-TSO-NEXT: srl a0, a1, a0
-; RV32IA-TSO-NEXT: ret
+; RV32IA-TSO-NOZACAS-LABEL: atomicrmw_xchg_0_i16_acquire:
+; RV32IA-TSO-NOZACAS: # %bb.0:
+; RV32IA-TSO-NOZACAS-NEXT: andi a1, a0, -4
+; RV32IA-TSO-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-NOZACAS-NEXT: lui a2, 16
+; RV32IA-TSO-NOZACAS-NEXT: addi a2, a2, -1
+; RV32IA-TSO-NOZACAS-NEXT: sll a2, a2, a0
+; RV32IA-TSO-NOZACAS-NEXT: not a2, a2
+; RV32IA-TSO-NOZACAS-NEXT: amoand.w a1, a2, (a1)
+; RV32IA-TSO-NOZACAS-NEXT: srl a0, a1, a0
+; RV32IA-TSO-NOZACAS-NEXT: ret
;
; RV64I-LABEL: atomicrmw_xchg_0_i16_acquire:
; RV64I: # %bb.0:
@@ -10628,6 +13850,30 @@ define i16 @atomicrmw_xchg_0_i16_acquire(ptr %a) nounwind {
; RV64IA-TSO-NOZACAS-NEXT: srlw a0, a1, a0
; RV64IA-TSO-NOZACAS-NEXT: ret
;
+; RV32IA-WMO-ZACAS-LABEL: atomicrmw_xchg_0_i16_acquire:
+; RV32IA-WMO-ZACAS: # %bb.0:
+; RV32IA-WMO-ZACAS-NEXT: andi a1, a0, -4
+; RV32IA-WMO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-ZACAS-NEXT: lui a2, 16
+; RV32IA-WMO-ZACAS-NEXT: addi a2, a2, -1
+; RV32IA-WMO-ZACAS-NEXT: sll a2, a2, a0
+; RV32IA-WMO-ZACAS-NEXT: not a2, a2
+; RV32IA-WMO-ZACAS-NEXT: amoand.w.aq a1, a2, (a1)
+; RV32IA-WMO-ZACAS-NEXT: srl a0, a1, a0
+; RV32IA-WMO-ZACAS-NEXT: ret
+;
+; RV32IA-TSO-ZACAS-LABEL: atomicrmw_xchg_0_i16_acquire:
+; RV32IA-TSO-ZACAS: # %bb.0:
+; RV32IA-TSO-ZACAS-NEXT: andi a1, a0, -4
+; RV32IA-TSO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-ZACAS-NEXT: lui a2, 16
+; RV32IA-TSO-ZACAS-NEXT: addi a2, a2, -1
+; RV32IA-TSO-ZACAS-NEXT: sll a2, a2, a0
+; RV32IA-TSO-ZACAS-NEXT: not a2, a2
+; RV32IA-TSO-ZACAS-NEXT: amoand.w a1, a2, (a1)
+; RV32IA-TSO-ZACAS-NEXT: srl a0, a1, a0
+; RV32IA-TSO-ZACAS-NEXT: ret
+;
; RV64IA-WMO-ZACAS-LABEL: atomicrmw_xchg_0_i16_acquire:
; RV64IA-WMO-ZACAS: # %bb.0:
; RV64IA-WMO-ZACAS-NEXT: andi a1, a0, -4
@@ -10652,6 +13898,16 @@ define i16 @atomicrmw_xchg_0_i16_acquire(ptr %a) nounwind {
; RV64IA-TSO-ZACAS-NEXT: srlw a0, a1, a0
; RV64IA-TSO-ZACAS-NEXT: ret
;
+; RV32IA-WMO-ZABHA-LABEL: atomicrmw_xchg_0_i16_acquire:
+; RV32IA-WMO-ZABHA: # %bb.0:
+; RV32IA-WMO-ZABHA-NEXT: amoswap.h.aq a0, zero, (a0)
+; RV32IA-WMO-ZABHA-NEXT: ret
+;
+; RV32IA-TSO-ZABHA-LABEL: atomicrmw_xchg_0_i16_acquire:
+; RV32IA-TSO-ZABHA: # %bb.0:
+; RV32IA-TSO-ZABHA-NEXT: amoswap.h a0, zero, (a0)
+; RV32IA-TSO-ZABHA-NEXT: ret
+;
; RV64IA-WMO-ZABHA-LABEL: atomicrmw_xchg_0_i16_acquire:
; RV64IA-WMO-ZABHA: # %bb.0:
; RV64IA-WMO-ZABHA-NEXT: amoswap.h.aq a0, zero, (a0)
@@ -10677,29 +13933,29 @@ define i16 @atomicrmw_xchg_0_i16_release(ptr %a) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
-; RV32IA-WMO-LABEL: atomicrmw_xchg_0_i16_release:
-; RV32IA-WMO: # %bb.0:
-; RV32IA-WMO-NEXT: andi a1, a0, -4
-; RV32IA-WMO-NEXT: slli a0, a0, 3
-; RV32IA-WMO-NEXT: lui a2, 16
-; RV32IA-WMO-NEXT: addi a2, a2, -1
-; RV32IA-WMO-NEXT: sll a2, a2, a0
-; RV32IA-WMO-NEXT: not a2, a2
-; RV32IA-WMO-NEXT: amoand.w.rl a1, a2, (a1)
-; RV32IA-WMO-NEXT: srl a0, a1, a0
-; RV32IA-WMO-NEXT: ret
+; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_xchg_0_i16_release:
+; RV32IA-WMO-NOZACAS: # %bb.0:
+; RV32IA-WMO-NOZACAS-NEXT: andi a1, a0, -4
+; RV32IA-WMO-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-NOZACAS-NEXT: lui a2, 16
+; RV32IA-WMO-NOZACAS-NEXT: addi a2, a2, -1
+; RV32IA-WMO-NOZACAS-NEXT: sll a2, a2, a0
+; RV32IA-WMO-NOZACAS-NEXT: not a2, a2
+; RV32IA-WMO-NOZACAS-NEXT: amoand.w.rl a1, a2, (a1)
+; RV32IA-WMO-NOZACAS-NEXT: srl a0, a1, a0
+; RV32IA-WMO-NOZACAS-NEXT: ret
;
-; RV32IA-TSO-LABEL: atomicrmw_xchg_0_i16_release:
-; RV32IA-TSO: # %bb.0:
-; RV32IA-TSO-NEXT: andi a1, a0, -4
-; RV32IA-TSO-NEXT: slli a0, a0, 3
-; RV32IA-TSO-NEXT: lui a2, 16
-; RV32IA-TSO-NEXT: addi a2, a2, -1
-; RV32IA-TSO-NEXT: sll a2, a2, a0
-; RV32IA-TSO-NEXT: not a2, a2
-; RV32IA-TSO-NEXT: amoand.w a1, a2, (a1)
-; RV32IA-TSO-NEXT: srl a0, a1, a0
-; RV32IA-TSO-NEXT: ret
+; RV32IA-TSO-NOZACAS-LABEL: atomicrmw_xchg_0_i16_release:
+; RV32IA-TSO-NOZACAS: # %bb.0:
+; RV32IA-TSO-NOZACAS-NEXT: andi a1, a0, -4
+; RV32IA-TSO-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-NOZACAS-NEXT: lui a2, 16
+; RV32IA-TSO-NOZACAS-NEXT: addi a2, a2, -1
+; RV32IA-TSO-NOZACAS-NEXT: sll a2, a2, a0
+; RV32IA-TSO-NOZACAS-NEXT: not a2, a2
+; RV32IA-TSO-NOZACAS-NEXT: amoand.w a1, a2, (a1)
+; RV32IA-TSO-NOZACAS-NEXT: srl a0, a1, a0
+; RV32IA-TSO-NOZACAS-NEXT: ret
;
; RV64I-LABEL: atomicrmw_xchg_0_i16_release:
; RV64I: # %bb.0:
@@ -10736,6 +13992,30 @@ define i16 @atomicrmw_xchg_0_i16_release(ptr %a) nounwind {
; RV64IA-TSO-NOZACAS-NEXT: srlw a0, a1, a0
; RV64IA-TSO-NOZACAS-NEXT: ret
;
+; RV32IA-WMO-ZACAS-LABEL: atomicrmw_xchg_0_i16_release:
+; RV32IA-WMO-ZACAS: # %bb.0:
+; RV32IA-WMO-ZACAS-NEXT: andi a1, a0, -4
+; RV32IA-WMO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-ZACAS-NEXT: lui a2, 16
+; RV32IA-WMO-ZACAS-NEXT: addi a2, a2, -1
+; RV32IA-WMO-ZACAS-NEXT: sll a2, a2, a0
+; RV32IA-WMO-ZACAS-NEXT: not a2, a2
+; RV32IA-WMO-ZACAS-NEXT: amoand.w.rl a1, a2, (a1)
+; RV32IA-WMO-ZACAS-NEXT: srl a0, a1, a0
+; RV32IA-WMO-ZACAS-NEXT: ret
+;
+; RV32IA-TSO-ZACAS-LABEL: atomicrmw_xchg_0_i16_release:
+; RV32IA-TSO-ZACAS: # %bb.0:
+; RV32IA-TSO-ZACAS-NEXT: andi a1, a0, -4
+; RV32IA-TSO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-ZACAS-NEXT: lui a2, 16
+; RV32IA-TSO-ZACAS-NEXT: addi a2, a2, -1
+; RV32IA-TSO-ZACAS-NEXT: sll a2, a2, a0
+; RV32IA-TSO-ZACAS-NEXT: not a2, a2
+; RV32IA-TSO-ZACAS-NEXT: amoand.w a1, a2, (a1)
+; RV32IA-TSO-ZACAS-NEXT: srl a0, a1, a0
+; RV32IA-TSO-ZACAS-NEXT: ret
+;
; RV64IA-WMO-ZACAS-LABEL: atomicrmw_xchg_0_i16_release:
; RV64IA-WMO-ZACAS: # %bb.0:
; RV64IA-WMO-ZACAS-NEXT: andi a1, a0, -4
@@ -10760,6 +14040,16 @@ define i16 @atomicrmw_xchg_0_i16_release(ptr %a) nounwind {
; RV64IA-TSO-ZACAS-NEXT: srlw a0, a1, a0
; RV64IA-TSO-ZACAS-NEXT: ret
;
+; RV32IA-WMO-ZABHA-LABEL: atomicrmw_xchg_0_i16_release:
+; RV32IA-WMO-ZABHA: # %bb.0:
+; RV32IA-WMO-ZABHA-NEXT: amoswap.h.rl a0, zero, (a0)
+; RV32IA-WMO-ZABHA-NEXT: ret
+;
+; RV32IA-TSO-ZABHA-LABEL: atomicrmw_xchg_0_i16_release:
+; RV32IA-TSO-ZABHA: # %bb.0:
+; RV32IA-TSO-ZABHA-NEXT: amoswap.h a0, zero, (a0)
+; RV32IA-TSO-ZABHA-NEXT: ret
+;
; RV64IA-WMO-ZABHA-LABEL: atomicrmw_xchg_0_i16_release:
; RV64IA-WMO-ZABHA: # %bb.0:
; RV64IA-WMO-ZABHA-NEXT: amoswap.h.rl a0, zero, (a0)
@@ -10785,29 +14075,29 @@ define i16 @atomicrmw_xchg_0_i16_acq_rel(ptr %a) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
-; RV32IA-WMO-LABEL: atomicrmw_xchg_0_i16_acq_rel:
-; RV32IA-WMO: # %bb.0:
-; RV32IA-WMO-NEXT: andi a1, a0, -4
-; RV32IA-WMO-NEXT: slli a0, a0, 3
-; RV32IA-WMO-NEXT: lui a2, 16
-; RV32IA-WMO-NEXT: addi a2, a2, -1
-; RV32IA-WMO-NEXT: sll a2, a2, a0
-; RV32IA-WMO-NEXT: not a2, a2
-; RV32IA-WMO-NEXT: amoand.w.aqrl a1, a2, (a1)
-; RV32IA-WMO-NEXT: srl a0, a1, a0
-; RV32IA-WMO-NEXT: ret
+; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_xchg_0_i16_acq_rel:
+; RV32IA-WMO-NOZACAS: # %bb.0:
+; RV32IA-WMO-NOZACAS-NEXT: andi a1, a0, -4
+; RV32IA-WMO-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-NOZACAS-NEXT: lui a2, 16
+; RV32IA-WMO-NOZACAS-NEXT: addi a2, a2, -1
+; RV32IA-WMO-NOZACAS-NEXT: sll a2, a2, a0
+; RV32IA-WMO-NOZACAS-NEXT: not a2, a2
+; RV32IA-WMO-NOZACAS-NEXT: amoand.w.aqrl a1, a2, (a1)
+; RV32IA-WMO-NOZACAS-NEXT: srl a0, a1, a0
+; RV32IA-WMO-NOZACAS-NEXT: ret
;
-; RV32IA-TSO-LABEL: atomicrmw_xchg_0_i16_acq_rel:
-; RV32IA-TSO: # %bb.0:
-; RV32IA-TSO-NEXT: andi a1, a0, -4
-; RV32IA-TSO-NEXT: slli a0, a0, 3
-; RV32IA-TSO-NEXT: lui a2, 16
-; RV32IA-TSO-NEXT: addi a2, a2, -1
-; RV32IA-TSO-NEXT: sll a2, a2, a0
-; RV32IA-TSO-NEXT: not a2, a2
-; RV32IA-TSO-NEXT: amoand.w a1, a2, (a1)
-; RV32IA-TSO-NEXT: srl a0, a1, a0
-; RV32IA-TSO-NEXT: ret
+; RV32IA-TSO-NOZACAS-LABEL: atomicrmw_xchg_0_i16_acq_rel:
+; RV32IA-TSO-NOZACAS: # %bb.0:
+; RV32IA-TSO-NOZACAS-NEXT: andi a1, a0, -4
+; RV32IA-TSO-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-NOZACAS-NEXT: lui a2, 16
+; RV32IA-TSO-NOZACAS-NEXT: addi a2, a2, -1
+; RV32IA-TSO-NOZACAS-NEXT: sll a2, a2, a0
+; RV32IA-TSO-NOZACAS-NEXT: not a2, a2
+; RV32IA-TSO-NOZACAS-NEXT: amoand.w a1, a2, (a1)
+; RV32IA-TSO-NOZACAS-NEXT: srl a0, a1, a0
+; RV32IA-TSO-NOZACAS-NEXT: ret
;
; RV64I-LABEL: atomicrmw_xchg_0_i16_acq_rel:
; RV64I: # %bb.0:
@@ -10844,6 +14134,30 @@ define i16 @atomicrmw_xchg_0_i16_acq_rel(ptr %a) nounwind {
; RV64IA-TSO-NOZACAS-NEXT: srlw a0, a1, a0
; RV64IA-TSO-NOZACAS-NEXT: ret
;
+; RV32IA-WMO-ZACAS-LABEL: atomicrmw_xchg_0_i16_acq_rel:
+; RV32IA-WMO-ZACAS: # %bb.0:
+; RV32IA-WMO-ZACAS-NEXT: andi a1, a0, -4
+; RV32IA-WMO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-ZACAS-NEXT: lui a2, 16
+; RV32IA-WMO-ZACAS-NEXT: addi a2, a2, -1
+; RV32IA-WMO-ZACAS-NEXT: sll a2, a2, a0
+; RV32IA-WMO-ZACAS-NEXT: not a2, a2
+; RV32IA-WMO-ZACAS-NEXT: amoand.w.aqrl a1, a2, (a1)
+; RV32IA-WMO-ZACAS-NEXT: srl a0, a1, a0
+; RV32IA-WMO-ZACAS-NEXT: ret
+;
+; RV32IA-TSO-ZACAS-LABEL: atomicrmw_xchg_0_i16_acq_rel:
+; RV32IA-TSO-ZACAS: # %bb.0:
+; RV32IA-TSO-ZACAS-NEXT: andi a1, a0, -4
+; RV32IA-TSO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-ZACAS-NEXT: lui a2, 16
+; RV32IA-TSO-ZACAS-NEXT: addi a2, a2, -1
+; RV32IA-TSO-ZACAS-NEXT: sll a2, a2, a0
+; RV32IA-TSO-ZACAS-NEXT: not a2, a2
+; RV32IA-TSO-ZACAS-NEXT: amoand.w a1, a2, (a1)
+; RV32IA-TSO-ZACAS-NEXT: srl a0, a1, a0
+; RV32IA-TSO-ZACAS-NEXT: ret
+;
; RV64IA-WMO-ZACAS-LABEL: atomicrmw_xchg_0_i16_acq_rel:
; RV64IA-WMO-ZACAS: # %bb.0:
; RV64IA-WMO-ZACAS-NEXT: andi a1, a0, -4
@@ -10868,6 +14182,16 @@ define i16 @atomicrmw_xchg_0_i16_acq_rel(ptr %a) nounwind {
; RV64IA-TSO-ZACAS-NEXT: srlw a0, a1, a0
; RV64IA-TSO-ZACAS-NEXT: ret
;
+; RV32IA-WMO-ZABHA-LABEL: atomicrmw_xchg_0_i16_acq_rel:
+; RV32IA-WMO-ZABHA: # %bb.0:
+; RV32IA-WMO-ZABHA-NEXT: amoswap.h.aqrl a0, zero, (a0)
+; RV32IA-WMO-ZABHA-NEXT: ret
+;
+; RV32IA-TSO-ZABHA-LABEL: atomicrmw_xchg_0_i16_acq_rel:
+; RV32IA-TSO-ZABHA: # %bb.0:
+; RV32IA-TSO-ZABHA-NEXT: amoswap.h a0, zero, (a0)
+; RV32IA-TSO-ZABHA-NEXT: ret
+;
; RV64IA-WMO-ZABHA-LABEL: atomicrmw_xchg_0_i16_acq_rel:
; RV64IA-WMO-ZABHA: # %bb.0:
; RV64IA-WMO-ZABHA-NEXT: amoswap.h.aqrl a0, zero, (a0)
@@ -10893,29 +14217,29 @@ define i16 @atomicrmw_xchg_0_i16_seq_cst(ptr %a) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
-; RV32IA-WMO-LABEL: atomicrmw_xchg_0_i16_seq_cst:
-; RV32IA-WMO: # %bb.0:
-; RV32IA-WMO-NEXT: andi a1, a0, -4
-; RV32IA-WMO-NEXT: slli a0, a0, 3
-; RV32IA-WMO-NEXT: lui a2, 16
-; RV32IA-WMO-NEXT: addi a2, a2, -1
-; RV32IA-WMO-NEXT: sll a2, a2, a0
-; RV32IA-WMO-NEXT: not a2, a2
-; RV32IA-WMO-NEXT: amoand.w.aqrl a1, a2, (a1)
-; RV32IA-WMO-NEXT: srl a0, a1, a0
-; RV32IA-WMO-NEXT: ret
+; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_xchg_0_i16_seq_cst:
+; RV32IA-WMO-NOZACAS: # %bb.0:
+; RV32IA-WMO-NOZACAS-NEXT: andi a1, a0, -4
+; RV32IA-WMO-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-NOZACAS-NEXT: lui a2, 16
+; RV32IA-WMO-NOZACAS-NEXT: addi a2, a2, -1
+; RV32IA-WMO-NOZACAS-NEXT: sll a2, a2, a0
+; RV32IA-WMO-NOZACAS-NEXT: not a2, a2
+; RV32IA-WMO-NOZACAS-NEXT: amoand.w.aqrl a1, a2, (a1)
+; RV32IA-WMO-NOZACAS-NEXT: srl a0, a1, a0
+; RV32IA-WMO-NOZACAS-NEXT: ret
;
-; RV32IA-TSO-LABEL: atomicrmw_xchg_0_i16_seq_cst:
-; RV32IA-TSO: # %bb.0:
-; RV32IA-TSO-NEXT: andi a1, a0, -4
-; RV32IA-TSO-NEXT: slli a0, a0, 3
-; RV32IA-TSO-NEXT: lui a2, 16
-; RV32IA-TSO-NEXT: addi a2, a2, -1
-; RV32IA-TSO-NEXT: sll a2, a2, a0
-; RV32IA-TSO-NEXT: not a2, a2
-; RV32IA-TSO-NEXT: amoand.w a1, a2, (a1)
-; RV32IA-TSO-NEXT: srl a0, a1, a0
-; RV32IA-TSO-NEXT: ret
+; RV32IA-TSO-NOZACAS-LABEL: atomicrmw_xchg_0_i16_seq_cst:
+; RV32IA-TSO-NOZACAS: # %bb.0:
+; RV32IA-TSO-NOZACAS-NEXT: andi a1, a0, -4
+; RV32IA-TSO-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-NOZACAS-NEXT: lui a2, 16
+; RV32IA-TSO-NOZACAS-NEXT: addi a2, a2, -1
+; RV32IA-TSO-NOZACAS-NEXT: sll a2, a2, a0
+; RV32IA-TSO-NOZACAS-NEXT: not a2, a2
+; RV32IA-TSO-NOZACAS-NEXT: amoand.w a1, a2, (a1)
+; RV32IA-TSO-NOZACAS-NEXT: srl a0, a1, a0
+; RV32IA-TSO-NOZACAS-NEXT: ret
;
; RV64I-LABEL: atomicrmw_xchg_0_i16_seq_cst:
; RV64I: # %bb.0:
@@ -10952,6 +14276,30 @@ define i16 @atomicrmw_xchg_0_i16_seq_cst(ptr %a) nounwind {
; RV64IA-TSO-NOZACAS-NEXT: srlw a0, a1, a0
; RV64IA-TSO-NOZACAS-NEXT: ret
;
+; RV32IA-WMO-ZACAS-LABEL: atomicrmw_xchg_0_i16_seq_cst:
+; RV32IA-WMO-ZACAS: # %bb.0:
+; RV32IA-WMO-ZACAS-NEXT: andi a1, a0, -4
+; RV32IA-WMO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-ZACAS-NEXT: lui a2, 16
+; RV32IA-WMO-ZACAS-NEXT: addi a2, a2, -1
+; RV32IA-WMO-ZACAS-NEXT: sll a2, a2, a0
+; RV32IA-WMO-ZACAS-NEXT: not a2, a2
+; RV32IA-WMO-ZACAS-NEXT: amoand.w.aqrl a1, a2, (a1)
+; RV32IA-WMO-ZACAS-NEXT: srl a0, a1, a0
+; RV32IA-WMO-ZACAS-NEXT: ret
+;
+; RV32IA-TSO-ZACAS-LABEL: atomicrmw_xchg_0_i16_seq_cst:
+; RV32IA-TSO-ZACAS: # %bb.0:
+; RV32IA-TSO-ZACAS-NEXT: andi a1, a0, -4
+; RV32IA-TSO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-ZACAS-NEXT: lui a2, 16
+; RV32IA-TSO-ZACAS-NEXT: addi a2, a2, -1
+; RV32IA-TSO-ZACAS-NEXT: sll a2, a2, a0
+; RV32IA-TSO-ZACAS-NEXT: not a2, a2
+; RV32IA-TSO-ZACAS-NEXT: amoand.w a1, a2, (a1)
+; RV32IA-TSO-ZACAS-NEXT: srl a0, a1, a0
+; RV32IA-TSO-ZACAS-NEXT: ret
+;
; RV64IA-WMO-ZACAS-LABEL: atomicrmw_xchg_0_i16_seq_cst:
; RV64IA-WMO-ZACAS: # %bb.0:
; RV64IA-WMO-ZACAS-NEXT: andi a1, a0, -4
@@ -10976,6 +14324,16 @@ define i16 @atomicrmw_xchg_0_i16_seq_cst(ptr %a) nounwind {
; RV64IA-TSO-ZACAS-NEXT: srlw a0, a1, a0
; RV64IA-TSO-ZACAS-NEXT: ret
;
+; RV32IA-WMO-ZABHA-LABEL: atomicrmw_xchg_0_i16_seq_cst:
+; RV32IA-WMO-ZABHA: # %bb.0:
+; RV32IA-WMO-ZABHA-NEXT: amoswap.h.aqrl a0, zero, (a0)
+; RV32IA-WMO-ZABHA-NEXT: ret
+;
+; RV32IA-TSO-ZABHA-LABEL: atomicrmw_xchg_0_i16_seq_cst:
+; RV32IA-TSO-ZABHA: # %bb.0:
+; RV32IA-TSO-ZABHA-NEXT: amoswap.h a0, zero, (a0)
+; RV32IA-TSO-ZABHA-NEXT: ret
+;
; RV64IA-WMO-ZABHA-LABEL: atomicrmw_xchg_0_i16_seq_cst:
; RV64IA-WMO-ZABHA: # %bb.0:
; RV64IA-WMO-ZABHA-NEXT: amoswap.h.aqrl a0, zero, (a0)
@@ -11002,16 +14360,16 @@ define i16 @atomicrmw_xchg_minus_1_i16_monotonic(ptr %a) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
-; RV32IA-LABEL: atomicrmw_xchg_minus_1_i16_monotonic:
-; RV32IA: # %bb.0:
-; RV32IA-NEXT: andi a1, a0, -4
-; RV32IA-NEXT: slli a0, a0, 3
-; RV32IA-NEXT: lui a2, 16
-; RV32IA-NEXT: addi a2, a2, -1
-; RV32IA-NEXT: sll a2, a2, a0
-; RV32IA-NEXT: amoor.w a1, a2, (a1)
-; RV32IA-NEXT: srl a0, a1, a0
-; RV32IA-NEXT: ret
+; RV32IA-NOZACAS-LABEL: atomicrmw_xchg_minus_1_i16_monotonic:
+; RV32IA-NOZACAS: # %bb.0:
+; RV32IA-NOZACAS-NEXT: andi a1, a0, -4
+; RV32IA-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-NOZACAS-NEXT: lui a2, 16
+; RV32IA-NOZACAS-NEXT: addi a2, a2, -1
+; RV32IA-NOZACAS-NEXT: sll a2, a2, a0
+; RV32IA-NOZACAS-NEXT: amoor.w a1, a2, (a1)
+; RV32IA-NOZACAS-NEXT: srl a0, a1, a0
+; RV32IA-NOZACAS-NEXT: ret
;
; RV64I-LABEL: atomicrmw_xchg_minus_1_i16_monotonic:
; RV64I: # %bb.0:
@@ -11036,6 +14394,17 @@ define i16 @atomicrmw_xchg_minus_1_i16_monotonic(ptr %a) nounwind {
; RV64IA-NOZACAS-NEXT: srlw a0, a1, a0
; RV64IA-NOZACAS-NEXT: ret
;
+; RV32IA-ZACAS-LABEL: atomicrmw_xchg_minus_1_i16_monotonic:
+; RV32IA-ZACAS: # %bb.0:
+; RV32IA-ZACAS-NEXT: andi a1, a0, -4
+; RV32IA-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-ZACAS-NEXT: lui a2, 16
+; RV32IA-ZACAS-NEXT: addi a2, a2, -1
+; RV32IA-ZACAS-NEXT: sll a2, a2, a0
+; RV32IA-ZACAS-NEXT: amoor.w a1, a2, (a1)
+; RV32IA-ZACAS-NEXT: srl a0, a1, a0
+; RV32IA-ZACAS-NEXT: ret
+;
; RV64IA-ZACAS-LABEL: atomicrmw_xchg_minus_1_i16_monotonic:
; RV64IA-ZACAS: # %bb.0:
; RV64IA-ZACAS-NEXT: andi a1, a0, -4
@@ -11047,6 +14416,18 @@ define i16 @atomicrmw_xchg_minus_1_i16_monotonic(ptr %a) nounwind {
; RV64IA-ZACAS-NEXT: srlw a0, a1, a0
; RV64IA-ZACAS-NEXT: ret
;
+; RV32IA-WMO-ZABHA-LABEL: atomicrmw_xchg_minus_1_i16_monotonic:
+; RV32IA-WMO-ZABHA: # %bb.0:
+; RV32IA-WMO-ZABHA-NEXT: li a1, -1
+; RV32IA-WMO-ZABHA-NEXT: amoswap.h a0, a1, (a0)
+; RV32IA-WMO-ZABHA-NEXT: ret
+;
+; RV32IA-TSO-ZABHA-LABEL: atomicrmw_xchg_minus_1_i16_monotonic:
+; RV32IA-TSO-ZABHA: # %bb.0:
+; RV32IA-TSO-ZABHA-NEXT: li a1, -1
+; RV32IA-TSO-ZABHA-NEXT: amoswap.h a0, a1, (a0)
+; RV32IA-TSO-ZABHA-NEXT: ret
+;
; RV64IA-WMO-ZABHA-LABEL: atomicrmw_xchg_minus_1_i16_monotonic:
; RV64IA-WMO-ZABHA: # %bb.0:
; RV64IA-WMO-ZABHA-NEXT: li a1, -1
@@ -11075,27 +14456,27 @@ define i16 @atomicrmw_xchg_minus_1_i16_acquire(ptr %a) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
-; RV32IA-WMO-LABEL: atomicrmw_xchg_minus_1_i16_acquire:
-; RV32IA-WMO: # %bb.0:
-; RV32IA-WMO-NEXT: andi a1, a0, -4
-; RV32IA-WMO-NEXT: slli a0, a0, 3
-; RV32IA-WMO-NEXT: lui a2, 16
-; RV32IA-WMO-NEXT: addi a2, a2, -1
-; RV32IA-WMO-NEXT: sll a2, a2, a0
-; RV32IA-WMO-NEXT: amoor.w.aq a1, a2, (a1)
-; RV32IA-WMO-NEXT: srl a0, a1, a0
-; RV32IA-WMO-NEXT: ret
+; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_xchg_minus_1_i16_acquire:
+; RV32IA-WMO-NOZACAS: # %bb.0:
+; RV32IA-WMO-NOZACAS-NEXT: andi a1, a0, -4
+; RV32IA-WMO-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-NOZACAS-NEXT: lui a2, 16
+; RV32IA-WMO-NOZACAS-NEXT: addi a2, a2, -1
+; RV32IA-WMO-NOZACAS-NEXT: sll a2, a2, a0
+; RV32IA-WMO-NOZACAS-NEXT: amoor.w.aq a1, a2, (a1)
+; RV32IA-WMO-NOZACAS-NEXT: srl a0, a1, a0
+; RV32IA-WMO-NOZACAS-NEXT: ret
;
-; RV32IA-TSO-LABEL: atomicrmw_xchg_minus_1_i16_acquire:
-; RV32IA-TSO: # %bb.0:
-; RV32IA-TSO-NEXT: andi a1, a0, -4
-; RV32IA-TSO-NEXT: slli a0, a0, 3
-; RV32IA-TSO-NEXT: lui a2, 16
-; RV32IA-TSO-NEXT: addi a2, a2, -1
-; RV32IA-TSO-NEXT: sll a2, a2, a0
-; RV32IA-TSO-NEXT: amoor.w a1, a2, (a1)
-; RV32IA-TSO-NEXT: srl a0, a1, a0
-; RV32IA-TSO-NEXT: ret
+; RV32IA-TSO-NOZACAS-LABEL: atomicrmw_xchg_minus_1_i16_acquire:
+; RV32IA-TSO-NOZACAS: # %bb.0:
+; RV32IA-TSO-NOZACAS-NEXT: andi a1, a0, -4
+; RV32IA-TSO-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-NOZACAS-NEXT: lui a2, 16
+; RV32IA-TSO-NOZACAS-NEXT: addi a2, a2, -1
+; RV32IA-TSO-NOZACAS-NEXT: sll a2, a2, a0
+; RV32IA-TSO-NOZACAS-NEXT: amoor.w a1, a2, (a1)
+; RV32IA-TSO-NOZACAS-NEXT: srl a0, a1, a0
+; RV32IA-TSO-NOZACAS-NEXT: ret
;
; RV64I-LABEL: atomicrmw_xchg_minus_1_i16_acquire:
; RV64I: # %bb.0:
@@ -11131,6 +14512,28 @@ define i16 @atomicrmw_xchg_minus_1_i16_acquire(ptr %a) nounwind {
; RV64IA-TSO-NOZACAS-NEXT: srlw a0, a1, a0
; RV64IA-TSO-NOZACAS-NEXT: ret
;
+; RV32IA-WMO-ZACAS-LABEL: atomicrmw_xchg_minus_1_i16_acquire:
+; RV32IA-WMO-ZACAS: # %bb.0:
+; RV32IA-WMO-ZACAS-NEXT: andi a1, a0, -4
+; RV32IA-WMO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-ZACAS-NEXT: lui a2, 16
+; RV32IA-WMO-ZACAS-NEXT: addi a2, a2, -1
+; RV32IA-WMO-ZACAS-NEXT: sll a2, a2, a0
+; RV32IA-WMO-ZACAS-NEXT: amoor.w.aq a1, a2, (a1)
+; RV32IA-WMO-ZACAS-NEXT: srl a0, a1, a0
+; RV32IA-WMO-ZACAS-NEXT: ret
+;
+; RV32IA-TSO-ZACAS-LABEL: atomicrmw_xchg_minus_1_i16_acquire:
+; RV32IA-TSO-ZACAS: # %bb.0:
+; RV32IA-TSO-ZACAS-NEXT: andi a1, a0, -4
+; RV32IA-TSO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-ZACAS-NEXT: lui a2, 16
+; RV32IA-TSO-ZACAS-NEXT: addi a2, a2, -1
+; RV32IA-TSO-ZACAS-NEXT: sll a2, a2, a0
+; RV32IA-TSO-ZACAS-NEXT: amoor.w a1, a2, (a1)
+; RV32IA-TSO-ZACAS-NEXT: srl a0, a1, a0
+; RV32IA-TSO-ZACAS-NEXT: ret
+;
; RV64IA-WMO-ZACAS-LABEL: atomicrmw_xchg_minus_1_i16_acquire:
; RV64IA-WMO-ZACAS: # %bb.0:
; RV64IA-WMO-ZACAS-NEXT: andi a1, a0, -4
@@ -11153,6 +14556,18 @@ define i16 @atomicrmw_xchg_minus_1_i16_acquire(ptr %a) nounwind {
; RV64IA-TSO-ZACAS-NEXT: srlw a0, a1, a0
; RV64IA-TSO-ZACAS-NEXT: ret
;
+; RV32IA-WMO-ZABHA-LABEL: atomicrmw_xchg_minus_1_i16_acquire:
+; RV32IA-WMO-ZABHA: # %bb.0:
+; RV32IA-WMO-ZABHA-NEXT: li a1, -1
+; RV32IA-WMO-ZABHA-NEXT: amoswap.h.aq a0, a1, (a0)
+; RV32IA-WMO-ZABHA-NEXT: ret
+;
+; RV32IA-TSO-ZABHA-LABEL: atomicrmw_xchg_minus_1_i16_acquire:
+; RV32IA-TSO-ZABHA: # %bb.0:
+; RV32IA-TSO-ZABHA-NEXT: li a1, -1
+; RV32IA-TSO-ZABHA-NEXT: amoswap.h a0, a1, (a0)
+; RV32IA-TSO-ZABHA-NEXT: ret
+;
; RV64IA-WMO-ZABHA-LABEL: atomicrmw_xchg_minus_1_i16_acquire:
; RV64IA-WMO-ZABHA: # %bb.0:
; RV64IA-WMO-ZABHA-NEXT: li a1, -1
@@ -11181,27 +14596,27 @@ define i16 @atomicrmw_xchg_minus_1_i16_release(ptr %a) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
-; RV32IA-WMO-LABEL: atomicrmw_xchg_minus_1_i16_release:
-; RV32IA-WMO: # %bb.0:
-; RV32IA-WMO-NEXT: andi a1, a0, -4
-; RV32IA-WMO-NEXT: slli a0, a0, 3
-; RV32IA-WMO-NEXT: lui a2, 16
-; RV32IA-WMO-NEXT: addi a2, a2, -1
-; RV32IA-WMO-NEXT: sll a2, a2, a0
-; RV32IA-WMO-NEXT: amoor.w.rl a1, a2, (a1)
-; RV32IA-WMO-NEXT: srl a0, a1, a0
-; RV32IA-WMO-NEXT: ret
+; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_xchg_minus_1_i16_release:
+; RV32IA-WMO-NOZACAS: # %bb.0:
+; RV32IA-WMO-NOZACAS-NEXT: andi a1, a0, -4
+; RV32IA-WMO-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-NOZACAS-NEXT: lui a2, 16
+; RV32IA-WMO-NOZACAS-NEXT: addi a2, a2, -1
+; RV32IA-WMO-NOZACAS-NEXT: sll a2, a2, a0
+; RV32IA-WMO-NOZACAS-NEXT: amoor.w.rl a1, a2, (a1)
+; RV32IA-WMO-NOZACAS-NEXT: srl a0, a1, a0
+; RV32IA-WMO-NOZACAS-NEXT: ret
;
-; RV32IA-TSO-LABEL: atomicrmw_xchg_minus_1_i16_release:
-; RV32IA-TSO: # %bb.0:
-; RV32IA-TSO-NEXT: andi a1, a0, -4
-; RV32IA-TSO-NEXT: slli a0, a0, 3
-; RV32IA-TSO-NEXT: lui a2, 16
-; RV32IA-TSO-NEXT: addi a2, a2, -1
-; RV32IA-TSO-NEXT: sll a2, a2, a0
-; RV32IA-TSO-NEXT: amoor.w a1, a2, (a1)
-; RV32IA-TSO-NEXT: srl a0, a1, a0
-; RV32IA-TSO-NEXT: ret
+; RV32IA-TSO-NOZACAS-LABEL: atomicrmw_xchg_minus_1_i16_release:
+; RV32IA-TSO-NOZACAS: # %bb.0:
+; RV32IA-TSO-NOZACAS-NEXT: andi a1, a0, -4
+; RV32IA-TSO-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-NOZACAS-NEXT: lui a2, 16
+; RV32IA-TSO-NOZACAS-NEXT: addi a2, a2, -1
+; RV32IA-TSO-NOZACAS-NEXT: sll a2, a2, a0
+; RV32IA-TSO-NOZACAS-NEXT: amoor.w a1, a2, (a1)
+; RV32IA-TSO-NOZACAS-NEXT: srl a0, a1, a0
+; RV32IA-TSO-NOZACAS-NEXT: ret
;
; RV64I-LABEL: atomicrmw_xchg_minus_1_i16_release:
; RV64I: # %bb.0:
@@ -11237,6 +14652,28 @@ define i16 @atomicrmw_xchg_minus_1_i16_release(ptr %a) nounwind {
; RV64IA-TSO-NOZACAS-NEXT: srlw a0, a1, a0
; RV64IA-TSO-NOZACAS-NEXT: ret
;
+; RV32IA-WMO-ZACAS-LABEL: atomicrmw_xchg_minus_1_i16_release:
+; RV32IA-WMO-ZACAS: # %bb.0:
+; RV32IA-WMO-ZACAS-NEXT: andi a1, a0, -4
+; RV32IA-WMO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-ZACAS-NEXT: lui a2, 16
+; RV32IA-WMO-ZACAS-NEXT: addi a2, a2, -1
+; RV32IA-WMO-ZACAS-NEXT: sll a2, a2, a0
+; RV32IA-WMO-ZACAS-NEXT: amoor.w.rl a1, a2, (a1)
+; RV32IA-WMO-ZACAS-NEXT: srl a0, a1, a0
+; RV32IA-WMO-ZACAS-NEXT: ret
+;
+; RV32IA-TSO-ZACAS-LABEL: atomicrmw_xchg_minus_1_i16_release:
+; RV32IA-TSO-ZACAS: # %bb.0:
+; RV32IA-TSO-ZACAS-NEXT: andi a1, a0, -4
+; RV32IA-TSO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-ZACAS-NEXT: lui a2, 16
+; RV32IA-TSO-ZACAS-NEXT: addi a2, a2, -1
+; RV32IA-TSO-ZACAS-NEXT: sll a2, a2, a0
+; RV32IA-TSO-ZACAS-NEXT: amoor.w a1, a2, (a1)
+; RV32IA-TSO-ZACAS-NEXT: srl a0, a1, a0
+; RV32IA-TSO-ZACAS-NEXT: ret
+;
; RV64IA-WMO-ZACAS-LABEL: atomicrmw_xchg_minus_1_i16_release:
; RV64IA-WMO-ZACAS: # %bb.0:
; RV64IA-WMO-ZACAS-NEXT: andi a1, a0, -4
@@ -11259,6 +14696,18 @@ define i16 @atomicrmw_xchg_minus_1_i16_release(ptr %a) nounwind {
; RV64IA-TSO-ZACAS-NEXT: srlw a0, a1, a0
; RV64IA-TSO-ZACAS-NEXT: ret
;
+; RV32IA-WMO-ZABHA-LABEL: atomicrmw_xchg_minus_1_i16_release:
+; RV32IA-WMO-ZABHA: # %bb.0:
+; RV32IA-WMO-ZABHA-NEXT: li a1, -1
+; RV32IA-WMO-ZABHA-NEXT: amoswap.h.rl a0, a1, (a0)
+; RV32IA-WMO-ZABHA-NEXT: ret
+;
+; RV32IA-TSO-ZABHA-LABEL: atomicrmw_xchg_minus_1_i16_release:
+; RV32IA-TSO-ZABHA: # %bb.0:
+; RV32IA-TSO-ZABHA-NEXT: li a1, -1
+; RV32IA-TSO-ZABHA-NEXT: amoswap.h a0, a1, (a0)
+; RV32IA-TSO-ZABHA-NEXT: ret
+;
; RV64IA-WMO-ZABHA-LABEL: atomicrmw_xchg_minus_1_i16_release:
; RV64IA-WMO-ZABHA: # %bb.0:
; RV64IA-WMO-ZABHA-NEXT: li a1, -1
@@ -11287,27 +14736,27 @@ define i16 @atomicrmw_xchg_minus_1_i16_acq_rel(ptr %a) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
-; RV32IA-WMO-LABEL: atomicrmw_xchg_minus_1_i16_acq_rel:
-; RV32IA-WMO: # %bb.0:
-; RV32IA-WMO-NEXT: andi a1, a0, -4
-; RV32IA-WMO-NEXT: slli a0, a0, 3
-; RV32IA-WMO-NEXT: lui a2, 16
-; RV32IA-WMO-NEXT: addi a2, a2, -1
-; RV32IA-WMO-NEXT: sll a2, a2, a0
-; RV32IA-WMO-NEXT: amoor.w.aqrl a1, a2, (a1)
-; RV32IA-WMO-NEXT: srl a0, a1, a0
-; RV32IA-WMO-NEXT: ret
+; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_xchg_minus_1_i16_acq_rel:
+; RV32IA-WMO-NOZACAS: # %bb.0:
+; RV32IA-WMO-NOZACAS-NEXT: andi a1, a0, -4
+; RV32IA-WMO-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-NOZACAS-NEXT: lui a2, 16
+; RV32IA-WMO-NOZACAS-NEXT: addi a2, a2, -1
+; RV32IA-WMO-NOZACAS-NEXT: sll a2, a2, a0
+; RV32IA-WMO-NOZACAS-NEXT: amoor.w.aqrl a1, a2, (a1)
+; RV32IA-WMO-NOZACAS-NEXT: srl a0, a1, a0
+; RV32IA-WMO-NOZACAS-NEXT: ret
;
-; RV32IA-TSO-LABEL: atomicrmw_xchg_minus_1_i16_acq_rel:
-; RV32IA-TSO: # %bb.0:
-; RV32IA-TSO-NEXT: andi a1, a0, -4
-; RV32IA-TSO-NEXT: slli a0, a0, 3
-; RV32IA-TSO-NEXT: lui a2, 16
-; RV32IA-TSO-NEXT: addi a2, a2, -1
-; RV32IA-TSO-NEXT: sll a2, a2, a0
-; RV32IA-TSO-NEXT: amoor.w a1, a2, (a1)
-; RV32IA-TSO-NEXT: srl a0, a1, a0
-; RV32IA-TSO-NEXT: ret
+; RV32IA-TSO-NOZACAS-LABEL: atomicrmw_xchg_minus_1_i16_acq_rel:
+; RV32IA-TSO-NOZACAS: # %bb.0:
+; RV32IA-TSO-NOZACAS-NEXT: andi a1, a0, -4
+; RV32IA-TSO-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-NOZACAS-NEXT: lui a2, 16
+; RV32IA-TSO-NOZACAS-NEXT: addi a2, a2, -1
+; RV32IA-TSO-NOZACAS-NEXT: sll a2, a2, a0
+; RV32IA-TSO-NOZACAS-NEXT: amoor.w a1, a2, (a1)
+; RV32IA-TSO-NOZACAS-NEXT: srl a0, a1, a0
+; RV32IA-TSO-NOZACAS-NEXT: ret
;
; RV64I-LABEL: atomicrmw_xchg_minus_1_i16_acq_rel:
; RV64I: # %bb.0:
@@ -11343,6 +14792,28 @@ define i16 @atomicrmw_xchg_minus_1_i16_acq_rel(ptr %a) nounwind {
; RV64IA-TSO-NOZACAS-NEXT: srlw a0, a1, a0
; RV64IA-TSO-NOZACAS-NEXT: ret
;
+; RV32IA-WMO-ZACAS-LABEL: atomicrmw_xchg_minus_1_i16_acq_rel:
+; RV32IA-WMO-ZACAS: # %bb.0:
+; RV32IA-WMO-ZACAS-NEXT: andi a1, a0, -4
+; RV32IA-WMO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-ZACAS-NEXT: lui a2, 16
+; RV32IA-WMO-ZACAS-NEXT: addi a2, a2, -1
+; RV32IA-WMO-ZACAS-NEXT: sll a2, a2, a0
+; RV32IA-WMO-ZACAS-NEXT: amoor.w.aqrl a1, a2, (a1)
+; RV32IA-WMO-ZACAS-NEXT: srl a0, a1, a0
+; RV32IA-WMO-ZACAS-NEXT: ret
+;
+; RV32IA-TSO-ZACAS-LABEL: atomicrmw_xchg_minus_1_i16_acq_rel:
+; RV32IA-TSO-ZACAS: # %bb.0:
+; RV32IA-TSO-ZACAS-NEXT: andi a1, a0, -4
+; RV32IA-TSO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-ZACAS-NEXT: lui a2, 16
+; RV32IA-TSO-ZACAS-NEXT: addi a2, a2, -1
+; RV32IA-TSO-ZACAS-NEXT: sll a2, a2, a0
+; RV32IA-TSO-ZACAS-NEXT: amoor.w a1, a2, (a1)
+; RV32IA-TSO-ZACAS-NEXT: srl a0, a1, a0
+; RV32IA-TSO-ZACAS-NEXT: ret
+;
; RV64IA-WMO-ZACAS-LABEL: atomicrmw_xchg_minus_1_i16_acq_rel:
; RV64IA-WMO-ZACAS: # %bb.0:
; RV64IA-WMO-ZACAS-NEXT: andi a1, a0, -4
@@ -11365,6 +14836,18 @@ define i16 @atomicrmw_xchg_minus_1_i16_acq_rel(ptr %a) nounwind {
; RV64IA-TSO-ZACAS-NEXT: srlw a0, a1, a0
; RV64IA-TSO-ZACAS-NEXT: ret
;
+; RV32IA-WMO-ZABHA-LABEL: atomicrmw_xchg_minus_1_i16_acq_rel:
+; RV32IA-WMO-ZABHA: # %bb.0:
+; RV32IA-WMO-ZABHA-NEXT: li a1, -1
+; RV32IA-WMO-ZABHA-NEXT: amoswap.h.aqrl a0, a1, (a0)
+; RV32IA-WMO-ZABHA-NEXT: ret
+;
+; RV32IA-TSO-ZABHA-LABEL: atomicrmw_xchg_minus_1_i16_acq_rel:
+; RV32IA-TSO-ZABHA: # %bb.0:
+; RV32IA-TSO-ZABHA-NEXT: li a1, -1
+; RV32IA-TSO-ZABHA-NEXT: amoswap.h a0, a1, (a0)
+; RV32IA-TSO-ZABHA-NEXT: ret
+;
; RV64IA-WMO-ZABHA-LABEL: atomicrmw_xchg_minus_1_i16_acq_rel:
; RV64IA-WMO-ZABHA: # %bb.0:
; RV64IA-WMO-ZABHA-NEXT: li a1, -1
@@ -11393,27 +14876,27 @@ define i16 @atomicrmw_xchg_minus_1_i16_seq_cst(ptr %a) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
-; RV32IA-WMO-LABEL: atomicrmw_xchg_minus_1_i16_seq_cst:
-; RV32IA-WMO: # %bb.0:
-; RV32IA-WMO-NEXT: andi a1, a0, -4
-; RV32IA-WMO-NEXT: slli a0, a0, 3
-; RV32IA-WMO-NEXT: lui a2, 16
-; RV32IA-WMO-NEXT: addi a2, a2, -1
-; RV32IA-WMO-NEXT: sll a2, a2, a0
-; RV32IA-WMO-NEXT: amoor.w.aqrl a1, a2, (a1)
-; RV32IA-WMO-NEXT: srl a0, a1, a0
-; RV32IA-WMO-NEXT: ret
+; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_xchg_minus_1_i16_seq_cst:
+; RV32IA-WMO-NOZACAS: # %bb.0:
+; RV32IA-WMO-NOZACAS-NEXT: andi a1, a0, -4
+; RV32IA-WMO-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-NOZACAS-NEXT: lui a2, 16
+; RV32IA-WMO-NOZACAS-NEXT: addi a2, a2, -1
+; RV32IA-WMO-NOZACAS-NEXT: sll a2, a2, a0
+; RV32IA-WMO-NOZACAS-NEXT: amoor.w.aqrl a1, a2, (a1)
+; RV32IA-WMO-NOZACAS-NEXT: srl a0, a1, a0
+; RV32IA-WMO-NOZACAS-NEXT: ret
;
-; RV32IA-TSO-LABEL: atomicrmw_xchg_minus_1_i16_seq_cst:
-; RV32IA-TSO: # %bb.0:
-; RV32IA-TSO-NEXT: andi a1, a0, -4
-; RV32IA-TSO-NEXT: slli a0, a0, 3
-; RV32IA-TSO-NEXT: lui a2, 16
-; RV32IA-TSO-NEXT: addi a2, a2, -1
-; RV32IA-TSO-NEXT: sll a2, a2, a0
-; RV32IA-TSO-NEXT: amoor.w a1, a2, (a1)
-; RV32IA-TSO-NEXT: srl a0, a1, a0
-; RV32IA-TSO-NEXT: ret
+; RV32IA-TSO-NOZACAS-LABEL: atomicrmw_xchg_minus_1_i16_seq_cst:
+; RV32IA-TSO-NOZACAS: # %bb.0:
+; RV32IA-TSO-NOZACAS-NEXT: andi a1, a0, -4
+; RV32IA-TSO-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-NOZACAS-NEXT: lui a2, 16
+; RV32IA-TSO-NOZACAS-NEXT: addi a2, a2, -1
+; RV32IA-TSO-NOZACAS-NEXT: sll a2, a2, a0
+; RV32IA-TSO-NOZACAS-NEXT: amoor.w a1, a2, (a1)
+; RV32IA-TSO-NOZACAS-NEXT: srl a0, a1, a0
+; RV32IA-TSO-NOZACAS-NEXT: ret
;
; RV64I-LABEL: atomicrmw_xchg_minus_1_i16_seq_cst:
; RV64I: # %bb.0:
@@ -11449,6 +14932,28 @@ define i16 @atomicrmw_xchg_minus_1_i16_seq_cst(ptr %a) nounwind {
; RV64IA-TSO-NOZACAS-NEXT: srlw a0, a1, a0
; RV64IA-TSO-NOZACAS-NEXT: ret
;
+; RV32IA-WMO-ZACAS-LABEL: atomicrmw_xchg_minus_1_i16_seq_cst:
+; RV32IA-WMO-ZACAS: # %bb.0:
+; RV32IA-WMO-ZACAS-NEXT: andi a1, a0, -4
+; RV32IA-WMO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-ZACAS-NEXT: lui a2, 16
+; RV32IA-WMO-ZACAS-NEXT: addi a2, a2, -1
+; RV32IA-WMO-ZACAS-NEXT: sll a2, a2, a0
+; RV32IA-WMO-ZACAS-NEXT: amoor.w.aqrl a1, a2, (a1)
+; RV32IA-WMO-ZACAS-NEXT: srl a0, a1, a0
+; RV32IA-WMO-ZACAS-NEXT: ret
+;
+; RV32IA-TSO-ZACAS-LABEL: atomicrmw_xchg_minus_1_i16_seq_cst:
+; RV32IA-TSO-ZACAS: # %bb.0:
+; RV32IA-TSO-ZACAS-NEXT: andi a1, a0, -4
+; RV32IA-TSO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-ZACAS-NEXT: lui a2, 16
+; RV32IA-TSO-ZACAS-NEXT: addi a2, a2, -1
+; RV32IA-TSO-ZACAS-NEXT: sll a2, a2, a0
+; RV32IA-TSO-ZACAS-NEXT: amoor.w a1, a2, (a1)
+; RV32IA-TSO-ZACAS-NEXT: srl a0, a1, a0
+; RV32IA-TSO-ZACAS-NEXT: ret
+;
; RV64IA-WMO-ZACAS-LABEL: atomicrmw_xchg_minus_1_i16_seq_cst:
; RV64IA-WMO-ZACAS: # %bb.0:
; RV64IA-WMO-ZACAS-NEXT: andi a1, a0, -4
@@ -11471,6 +14976,18 @@ define i16 @atomicrmw_xchg_minus_1_i16_seq_cst(ptr %a) nounwind {
; RV64IA-TSO-ZACAS-NEXT: srlw a0, a1, a0
; RV64IA-TSO-ZACAS-NEXT: ret
;
+; RV32IA-WMO-ZABHA-LABEL: atomicrmw_xchg_minus_1_i16_seq_cst:
+; RV32IA-WMO-ZABHA: # %bb.0:
+; RV32IA-WMO-ZABHA-NEXT: li a1, -1
+; RV32IA-WMO-ZABHA-NEXT: amoswap.h.aqrl a0, a1, (a0)
+; RV32IA-WMO-ZABHA-NEXT: ret
+;
+; RV32IA-TSO-ZABHA-LABEL: atomicrmw_xchg_minus_1_i16_seq_cst:
+; RV32IA-TSO-ZABHA: # %bb.0:
+; RV32IA-TSO-ZABHA-NEXT: li a1, -1
+; RV32IA-TSO-ZABHA-NEXT: amoswap.h a0, a1, (a0)
+; RV32IA-TSO-ZABHA-NEXT: ret
+;
; RV64IA-WMO-ZABHA-LABEL: atomicrmw_xchg_minus_1_i16_seq_cst:
; RV64IA-WMO-ZABHA: # %bb.0:
; RV64IA-WMO-ZABHA-NEXT: li a1, -1
@@ -11497,26 +15014,26 @@ define i16 @atomicrmw_add_i16_monotonic(ptr %a, i16 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
-; RV32IA-LABEL: atomicrmw_add_i16_monotonic:
-; RV32IA: # %bb.0:
-; RV32IA-NEXT: andi a2, a0, -4
-; RV32IA-NEXT: slli a0, a0, 3
-; RV32IA-NEXT: lui a3, 16
-; RV32IA-NEXT: addi a3, a3, -1
-; RV32IA-NEXT: sll a4, a3, a0
-; RV32IA-NEXT: and a1, a1, a3
-; RV32IA-NEXT: sll a1, a1, a0
-; RV32IA-NEXT: .LBB80_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-NEXT: lr.w a3, (a2)
-; RV32IA-NEXT: add a5, a3, a1
-; RV32IA-NEXT: xor a5, a3, a5
-; RV32IA-NEXT: and a5, a5, a4
-; RV32IA-NEXT: xor a5, a3, a5
-; RV32IA-NEXT: sc.w a5, a5, (a2)
-; RV32IA-NEXT: bnez a5, .LBB80_1
-; RV32IA-NEXT: # %bb.2:
-; RV32IA-NEXT: srl a0, a3, a0
-; RV32IA-NEXT: ret
+; RV32IA-NOZACAS-LABEL: atomicrmw_add_i16_monotonic:
+; RV32IA-NOZACAS: # %bb.0:
+; RV32IA-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-NOZACAS-NEXT: lui a3, 16
+; RV32IA-NOZACAS-NEXT: addi a3, a3, -1
+; RV32IA-NOZACAS-NEXT: sll a4, a3, a0
+; RV32IA-NOZACAS-NEXT: and a1, a1, a3
+; RV32IA-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-NOZACAS-NEXT: .LBB80_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-NOZACAS-NEXT: lr.w a3, (a2)
+; RV32IA-NOZACAS-NEXT: add a5, a3, a1
+; RV32IA-NOZACAS-NEXT: xor a5, a3, a5
+; RV32IA-NOZACAS-NEXT: and a5, a5, a4
+; RV32IA-NOZACAS-NEXT: xor a5, a3, a5
+; RV32IA-NOZACAS-NEXT: sc.w a5, a5, (a2)
+; RV32IA-NOZACAS-NEXT: bnez a5, .LBB80_1
+; RV32IA-NOZACAS-NEXT: # %bb.2:
+; RV32IA-NOZACAS-NEXT: srl a0, a3, a0
+; RV32IA-NOZACAS-NEXT: ret
;
; RV64I-LABEL: atomicrmw_add_i16_monotonic:
; RV64I: # %bb.0:
@@ -11549,6 +15066,27 @@ define i16 @atomicrmw_add_i16_monotonic(ptr %a, i16 %b) nounwind {
; RV64IA-NOZACAS-NEXT: srlw a0, a3, a0
; RV64IA-NOZACAS-NEXT: ret
;
+; RV32IA-ZACAS-LABEL: atomicrmw_add_i16_monotonic:
+; RV32IA-ZACAS: # %bb.0:
+; RV32IA-ZACAS-NEXT: andi a2, a0, -4
+; RV32IA-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-ZACAS-NEXT: lui a3, 16
+; RV32IA-ZACAS-NEXT: addi a3, a3, -1
+; RV32IA-ZACAS-NEXT: sll a4, a3, a0
+; RV32IA-ZACAS-NEXT: and a1, a1, a3
+; RV32IA-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-ZACAS-NEXT: .LBB80_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-ZACAS-NEXT: lr.w a3, (a2)
+; RV32IA-ZACAS-NEXT: add a5, a3, a1
+; RV32IA-ZACAS-NEXT: xor a5, a3, a5
+; RV32IA-ZACAS-NEXT: and a5, a5, a4
+; RV32IA-ZACAS-NEXT: xor a5, a3, a5
+; RV32IA-ZACAS-NEXT: sc.w a5, a5, (a2)
+; RV32IA-ZACAS-NEXT: bnez a5, .LBB80_1
+; RV32IA-ZACAS-NEXT: # %bb.2:
+; RV32IA-ZACAS-NEXT: srl a0, a3, a0
+; RV32IA-ZACAS-NEXT: ret
+;
; RV64IA-ZACAS-LABEL: atomicrmw_add_i16_monotonic:
; RV64IA-ZACAS: # %bb.0:
; RV64IA-ZACAS-NEXT: andi a2, a0, -4
@@ -11570,6 +15108,16 @@ define i16 @atomicrmw_add_i16_monotonic(ptr %a, i16 %b) nounwind {
; RV64IA-ZACAS-NEXT: srlw a0, a3, a0
; RV64IA-ZACAS-NEXT: ret
;
+; RV32IA-WMO-ZABHA-LABEL: atomicrmw_add_i16_monotonic:
+; RV32IA-WMO-ZABHA: # %bb.0:
+; RV32IA-WMO-ZABHA-NEXT: amoadd.h a0, a1, (a0)
+; RV32IA-WMO-ZABHA-NEXT: ret
+;
+; RV32IA-TSO-ZABHA-LABEL: atomicrmw_add_i16_monotonic:
+; RV32IA-TSO-ZABHA: # %bb.0:
+; RV32IA-TSO-ZABHA-NEXT: amoadd.h a0, a1, (a0)
+; RV32IA-TSO-ZABHA-NEXT: ret
+;
; RV64IA-WMO-ZABHA-LABEL: atomicrmw_add_i16_monotonic:
; RV64IA-WMO-ZABHA: # %bb.0:
; RV64IA-WMO-ZABHA-NEXT: amoadd.h a0, a1, (a0)
@@ -11594,47 +15142,47 @@ define i16 @atomicrmw_add_i16_acquire(ptr %a, i16 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
-; RV32IA-WMO-LABEL: atomicrmw_add_i16_acquire:
-; RV32IA-WMO: # %bb.0:
-; RV32IA-WMO-NEXT: andi a2, a0, -4
-; RV32IA-WMO-NEXT: slli a0, a0, 3
-; RV32IA-WMO-NEXT: lui a3, 16
-; RV32IA-WMO-NEXT: addi a3, a3, -1
-; RV32IA-WMO-NEXT: sll a4, a3, a0
-; RV32IA-WMO-NEXT: and a1, a1, a3
-; RV32IA-WMO-NEXT: sll a1, a1, a0
-; RV32IA-WMO-NEXT: .LBB81_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-WMO-NEXT: lr.w.aq a3, (a2)
-; RV32IA-WMO-NEXT: add a5, a3, a1
-; RV32IA-WMO-NEXT: xor a5, a3, a5
-; RV32IA-WMO-NEXT: and a5, a5, a4
-; RV32IA-WMO-NEXT: xor a5, a3, a5
-; RV32IA-WMO-NEXT: sc.w a5, a5, (a2)
-; RV32IA-WMO-NEXT: bnez a5, .LBB81_1
-; RV32IA-WMO-NEXT: # %bb.2:
-; RV32IA-WMO-NEXT: srl a0, a3, a0
-; RV32IA-WMO-NEXT: ret
+; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_add_i16_acquire:
+; RV32IA-WMO-NOZACAS: # %bb.0:
+; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-WMO-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-NOZACAS-NEXT: lui a3, 16
+; RV32IA-WMO-NOZACAS-NEXT: addi a3, a3, -1
+; RV32IA-WMO-NOZACAS-NEXT: sll a4, a3, a0
+; RV32IA-WMO-NOZACAS-NEXT: and a1, a1, a3
+; RV32IA-WMO-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-WMO-NOZACAS-NEXT: .LBB81_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-NOZACAS-NEXT: lr.w.aq a3, (a2)
+; RV32IA-WMO-NOZACAS-NEXT: add a5, a3, a1
+; RV32IA-WMO-NOZACAS-NEXT: xor a5, a3, a5
+; RV32IA-WMO-NOZACAS-NEXT: and a5, a5, a4
+; RV32IA-WMO-NOZACAS-NEXT: xor a5, a3, a5
+; RV32IA-WMO-NOZACAS-NEXT: sc.w a5, a5, (a2)
+; RV32IA-WMO-NOZACAS-NEXT: bnez a5, .LBB81_1
+; RV32IA-WMO-NOZACAS-NEXT: # %bb.2:
+; RV32IA-WMO-NOZACAS-NEXT: srl a0, a3, a0
+; RV32IA-WMO-NOZACAS-NEXT: ret
;
-; RV32IA-TSO-LABEL: atomicrmw_add_i16_acquire:
-; RV32IA-TSO: # %bb.0:
-; RV32IA-TSO-NEXT: andi a2, a0, -4
-; RV32IA-TSO-NEXT: slli a0, a0, 3
-; RV32IA-TSO-NEXT: lui a3, 16
-; RV32IA-TSO-NEXT: addi a3, a3, -1
-; RV32IA-TSO-NEXT: sll a4, a3, a0
-; RV32IA-TSO-NEXT: and a1, a1, a3
-; RV32IA-TSO-NEXT: sll a1, a1, a0
-; RV32IA-TSO-NEXT: .LBB81_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-TSO-NEXT: lr.w a3, (a2)
-; RV32IA-TSO-NEXT: add a5, a3, a1
-; RV32IA-TSO-NEXT: xor a5, a3, a5
-; RV32IA-TSO-NEXT: and a5, a5, a4
-; RV32IA-TSO-NEXT: xor a5, a3, a5
-; RV32IA-TSO-NEXT: sc.w a5, a5, (a2)
-; RV32IA-TSO-NEXT: bnez a5, .LBB81_1
-; RV32IA-TSO-NEXT: # %bb.2:
-; RV32IA-TSO-NEXT: srl a0, a3, a0
-; RV32IA-TSO-NEXT: ret
+; RV32IA-TSO-NOZACAS-LABEL: atomicrmw_add_i16_acquire:
+; RV32IA-TSO-NOZACAS: # %bb.0:
+; RV32IA-TSO-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-TSO-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-NOZACAS-NEXT: lui a3, 16
+; RV32IA-TSO-NOZACAS-NEXT: addi a3, a3, -1
+; RV32IA-TSO-NOZACAS-NEXT: sll a4, a3, a0
+; RV32IA-TSO-NOZACAS-NEXT: and a1, a1, a3
+; RV32IA-TSO-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-TSO-NOZACAS-NEXT: .LBB81_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-NOZACAS-NEXT: lr.w a3, (a2)
+; RV32IA-TSO-NOZACAS-NEXT: add a5, a3, a1
+; RV32IA-TSO-NOZACAS-NEXT: xor a5, a3, a5
+; RV32IA-TSO-NOZACAS-NEXT: and a5, a5, a4
+; RV32IA-TSO-NOZACAS-NEXT: xor a5, a3, a5
+; RV32IA-TSO-NOZACAS-NEXT: sc.w a5, a5, (a2)
+; RV32IA-TSO-NOZACAS-NEXT: bnez a5, .LBB81_1
+; RV32IA-TSO-NOZACAS-NEXT: # %bb.2:
+; RV32IA-TSO-NOZACAS-NEXT: srl a0, a3, a0
+; RV32IA-TSO-NOZACAS-NEXT: ret
;
; RV64I-LABEL: atomicrmw_add_i16_acquire:
; RV64I: # %bb.0:
@@ -11688,6 +15236,48 @@ define i16 @atomicrmw_add_i16_acquire(ptr %a, i16 %b) nounwind {
; RV64IA-TSO-NOZACAS-NEXT: srlw a0, a3, a0
; RV64IA-TSO-NOZACAS-NEXT: ret
;
+; RV32IA-WMO-ZACAS-LABEL: atomicrmw_add_i16_acquire:
+; RV32IA-WMO-ZACAS: # %bb.0:
+; RV32IA-WMO-ZACAS-NEXT: andi a2, a0, -4
+; RV32IA-WMO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-ZACAS-NEXT: lui a3, 16
+; RV32IA-WMO-ZACAS-NEXT: addi a3, a3, -1
+; RV32IA-WMO-ZACAS-NEXT: sll a4, a3, a0
+; RV32IA-WMO-ZACAS-NEXT: and a1, a1, a3
+; RV32IA-WMO-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-WMO-ZACAS-NEXT: .LBB81_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-ZACAS-NEXT: lr.w.aq a3, (a2)
+; RV32IA-WMO-ZACAS-NEXT: add a5, a3, a1
+; RV32IA-WMO-ZACAS-NEXT: xor a5, a3, a5
+; RV32IA-WMO-ZACAS-NEXT: and a5, a5, a4
+; RV32IA-WMO-ZACAS-NEXT: xor a5, a3, a5
+; RV32IA-WMO-ZACAS-NEXT: sc.w a5, a5, (a2)
+; RV32IA-WMO-ZACAS-NEXT: bnez a5, .LBB81_1
+; RV32IA-WMO-ZACAS-NEXT: # %bb.2:
+; RV32IA-WMO-ZACAS-NEXT: srl a0, a3, a0
+; RV32IA-WMO-ZACAS-NEXT: ret
+;
+; RV32IA-TSO-ZACAS-LABEL: atomicrmw_add_i16_acquire:
+; RV32IA-TSO-ZACAS: # %bb.0:
+; RV32IA-TSO-ZACAS-NEXT: andi a2, a0, -4
+; RV32IA-TSO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-ZACAS-NEXT: lui a3, 16
+; RV32IA-TSO-ZACAS-NEXT: addi a3, a3, -1
+; RV32IA-TSO-ZACAS-NEXT: sll a4, a3, a0
+; RV32IA-TSO-ZACAS-NEXT: and a1, a1, a3
+; RV32IA-TSO-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-TSO-ZACAS-NEXT: .LBB81_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-ZACAS-NEXT: lr.w a3, (a2)
+; RV32IA-TSO-ZACAS-NEXT: add a5, a3, a1
+; RV32IA-TSO-ZACAS-NEXT: xor a5, a3, a5
+; RV32IA-TSO-ZACAS-NEXT: and a5, a5, a4
+; RV32IA-TSO-ZACAS-NEXT: xor a5, a3, a5
+; RV32IA-TSO-ZACAS-NEXT: sc.w a5, a5, (a2)
+; RV32IA-TSO-ZACAS-NEXT: bnez a5, .LBB81_1
+; RV32IA-TSO-ZACAS-NEXT: # %bb.2:
+; RV32IA-TSO-ZACAS-NEXT: srl a0, a3, a0
+; RV32IA-TSO-ZACAS-NEXT: ret
+;
; RV64IA-WMO-ZACAS-LABEL: atomicrmw_add_i16_acquire:
; RV64IA-WMO-ZACAS: # %bb.0:
; RV64IA-WMO-ZACAS-NEXT: andi a2, a0, -4
@@ -11730,6 +15320,16 @@ define i16 @atomicrmw_add_i16_acquire(ptr %a, i16 %b) nounwind {
; RV64IA-TSO-ZACAS-NEXT: srlw a0, a3, a0
; RV64IA-TSO-ZACAS-NEXT: ret
;
+; RV32IA-WMO-ZABHA-LABEL: atomicrmw_add_i16_acquire:
+; RV32IA-WMO-ZABHA: # %bb.0:
+; RV32IA-WMO-ZABHA-NEXT: amoadd.h.aq a0, a1, (a0)
+; RV32IA-WMO-ZABHA-NEXT: ret
+;
+; RV32IA-TSO-ZABHA-LABEL: atomicrmw_add_i16_acquire:
+; RV32IA-TSO-ZABHA: # %bb.0:
+; RV32IA-TSO-ZABHA-NEXT: amoadd.h a0, a1, (a0)
+; RV32IA-TSO-ZABHA-NEXT: ret
+;
; RV64IA-WMO-ZABHA-LABEL: atomicrmw_add_i16_acquire:
; RV64IA-WMO-ZABHA: # %bb.0:
; RV64IA-WMO-ZABHA-NEXT: amoadd.h.aq a0, a1, (a0)
@@ -11754,47 +15354,47 @@ define i16 @atomicrmw_add_i16_release(ptr %a, i16 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
-; RV32IA-WMO-LABEL: atomicrmw_add_i16_release:
-; RV32IA-WMO: # %bb.0:
-; RV32IA-WMO-NEXT: andi a2, a0, -4
-; RV32IA-WMO-NEXT: slli a0, a0, 3
-; RV32IA-WMO-NEXT: lui a3, 16
-; RV32IA-WMO-NEXT: addi a3, a3, -1
-; RV32IA-WMO-NEXT: sll a4, a3, a0
-; RV32IA-WMO-NEXT: and a1, a1, a3
-; RV32IA-WMO-NEXT: sll a1, a1, a0
-; RV32IA-WMO-NEXT: .LBB82_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-WMO-NEXT: lr.w a3, (a2)
-; RV32IA-WMO-NEXT: add a5, a3, a1
-; RV32IA-WMO-NEXT: xor a5, a3, a5
-; RV32IA-WMO-NEXT: and a5, a5, a4
-; RV32IA-WMO-NEXT: xor a5, a3, a5
-; RV32IA-WMO-NEXT: sc.w.rl a5, a5, (a2)
-; RV32IA-WMO-NEXT: bnez a5, .LBB82_1
-; RV32IA-WMO-NEXT: # %bb.2:
-; RV32IA-WMO-NEXT: srl a0, a3, a0
-; RV32IA-WMO-NEXT: ret
+; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_add_i16_release:
+; RV32IA-WMO-NOZACAS: # %bb.0:
+; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-WMO-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-NOZACAS-NEXT: lui a3, 16
+; RV32IA-WMO-NOZACAS-NEXT: addi a3, a3, -1
+; RV32IA-WMO-NOZACAS-NEXT: sll a4, a3, a0
+; RV32IA-WMO-NOZACAS-NEXT: and a1, a1, a3
+; RV32IA-WMO-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-WMO-NOZACAS-NEXT: .LBB82_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-NOZACAS-NEXT: lr.w a3, (a2)
+; RV32IA-WMO-NOZACAS-NEXT: add a5, a3, a1
+; RV32IA-WMO-NOZACAS-NEXT: xor a5, a3, a5
+; RV32IA-WMO-NOZACAS-NEXT: and a5, a5, a4
+; RV32IA-WMO-NOZACAS-NEXT: xor a5, a3, a5
+; RV32IA-WMO-NOZACAS-NEXT: sc.w.rl a5, a5, (a2)
+; RV32IA-WMO-NOZACAS-NEXT: bnez a5, .LBB82_1
+; RV32IA-WMO-NOZACAS-NEXT: # %bb.2:
+; RV32IA-WMO-NOZACAS-NEXT: srl a0, a3, a0
+; RV32IA-WMO-NOZACAS-NEXT: ret
;
-; RV32IA-TSO-LABEL: atomicrmw_add_i16_release:
-; RV32IA-TSO: # %bb.0:
-; RV32IA-TSO-NEXT: andi a2, a0, -4
-; RV32IA-TSO-NEXT: slli a0, a0, 3
-; RV32IA-TSO-NEXT: lui a3, 16
-; RV32IA-TSO-NEXT: addi a3, a3, -1
-; RV32IA-TSO-NEXT: sll a4, a3, a0
-; RV32IA-TSO-NEXT: and a1, a1, a3
-; RV32IA-TSO-NEXT: sll a1, a1, a0
-; RV32IA-TSO-NEXT: .LBB82_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-TSO-NEXT: lr.w a3, (a2)
-; RV32IA-TSO-NEXT: add a5, a3, a1
-; RV32IA-TSO-NEXT: xor a5, a3, a5
-; RV32IA-TSO-NEXT: and a5, a5, a4
-; RV32IA-TSO-NEXT: xor a5, a3, a5
-; RV32IA-TSO-NEXT: sc.w a5, a5, (a2)
-; RV32IA-TSO-NEXT: bnez a5, .LBB82_1
-; RV32IA-TSO-NEXT: # %bb.2:
-; RV32IA-TSO-NEXT: srl a0, a3, a0
-; RV32IA-TSO-NEXT: ret
+; RV32IA-TSO-NOZACAS-LABEL: atomicrmw_add_i16_release:
+; RV32IA-TSO-NOZACAS: # %bb.0:
+; RV32IA-TSO-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-TSO-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-NOZACAS-NEXT: lui a3, 16
+; RV32IA-TSO-NOZACAS-NEXT: addi a3, a3, -1
+; RV32IA-TSO-NOZACAS-NEXT: sll a4, a3, a0
+; RV32IA-TSO-NOZACAS-NEXT: and a1, a1, a3
+; RV32IA-TSO-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-TSO-NOZACAS-NEXT: .LBB82_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-NOZACAS-NEXT: lr.w a3, (a2)
+; RV32IA-TSO-NOZACAS-NEXT: add a5, a3, a1
+; RV32IA-TSO-NOZACAS-NEXT: xor a5, a3, a5
+; RV32IA-TSO-NOZACAS-NEXT: and a5, a5, a4
+; RV32IA-TSO-NOZACAS-NEXT: xor a5, a3, a5
+; RV32IA-TSO-NOZACAS-NEXT: sc.w a5, a5, (a2)
+; RV32IA-TSO-NOZACAS-NEXT: bnez a5, .LBB82_1
+; RV32IA-TSO-NOZACAS-NEXT: # %bb.2:
+; RV32IA-TSO-NOZACAS-NEXT: srl a0, a3, a0
+; RV32IA-TSO-NOZACAS-NEXT: ret
;
; RV64I-LABEL: atomicrmw_add_i16_release:
; RV64I: # %bb.0:
@@ -11848,6 +15448,48 @@ define i16 @atomicrmw_add_i16_release(ptr %a, i16 %b) nounwind {
; RV64IA-TSO-NOZACAS-NEXT: srlw a0, a3, a0
; RV64IA-TSO-NOZACAS-NEXT: ret
;
+; RV32IA-WMO-ZACAS-LABEL: atomicrmw_add_i16_release:
+; RV32IA-WMO-ZACAS: # %bb.0:
+; RV32IA-WMO-ZACAS-NEXT: andi a2, a0, -4
+; RV32IA-WMO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-ZACAS-NEXT: lui a3, 16
+; RV32IA-WMO-ZACAS-NEXT: addi a3, a3, -1
+; RV32IA-WMO-ZACAS-NEXT: sll a4, a3, a0
+; RV32IA-WMO-ZACAS-NEXT: and a1, a1, a3
+; RV32IA-WMO-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-WMO-ZACAS-NEXT: .LBB82_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-ZACAS-NEXT: lr.w a3, (a2)
+; RV32IA-WMO-ZACAS-NEXT: add a5, a3, a1
+; RV32IA-WMO-ZACAS-NEXT: xor a5, a3, a5
+; RV32IA-WMO-ZACAS-NEXT: and a5, a5, a4
+; RV32IA-WMO-ZACAS-NEXT: xor a5, a3, a5
+; RV32IA-WMO-ZACAS-NEXT: sc.w.rl a5, a5, (a2)
+; RV32IA-WMO-ZACAS-NEXT: bnez a5, .LBB82_1
+; RV32IA-WMO-ZACAS-NEXT: # %bb.2:
+; RV32IA-WMO-ZACAS-NEXT: srl a0, a3, a0
+; RV32IA-WMO-ZACAS-NEXT: ret
+;
+; RV32IA-TSO-ZACAS-LABEL: atomicrmw_add_i16_release:
+; RV32IA-TSO-ZACAS: # %bb.0:
+; RV32IA-TSO-ZACAS-NEXT: andi a2, a0, -4
+; RV32IA-TSO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-ZACAS-NEXT: lui a3, 16
+; RV32IA-TSO-ZACAS-NEXT: addi a3, a3, -1
+; RV32IA-TSO-ZACAS-NEXT: sll a4, a3, a0
+; RV32IA-TSO-ZACAS-NEXT: and a1, a1, a3
+; RV32IA-TSO-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-TSO-ZACAS-NEXT: .LBB82_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-ZACAS-NEXT: lr.w a3, (a2)
+; RV32IA-TSO-ZACAS-NEXT: add a5, a3, a1
+; RV32IA-TSO-ZACAS-NEXT: xor a5, a3, a5
+; RV32IA-TSO-ZACAS-NEXT: and a5, a5, a4
+; RV32IA-TSO-ZACAS-NEXT: xor a5, a3, a5
+; RV32IA-TSO-ZACAS-NEXT: sc.w a5, a5, (a2)
+; RV32IA-TSO-ZACAS-NEXT: bnez a5, .LBB82_1
+; RV32IA-TSO-ZACAS-NEXT: # %bb.2:
+; RV32IA-TSO-ZACAS-NEXT: srl a0, a3, a0
+; RV32IA-TSO-ZACAS-NEXT: ret
+;
; RV64IA-WMO-ZACAS-LABEL: atomicrmw_add_i16_release:
; RV64IA-WMO-ZACAS: # %bb.0:
; RV64IA-WMO-ZACAS-NEXT: andi a2, a0, -4
@@ -11890,6 +15532,16 @@ define i16 @atomicrmw_add_i16_release(ptr %a, i16 %b) nounwind {
; RV64IA-TSO-ZACAS-NEXT: srlw a0, a3, a0
; RV64IA-TSO-ZACAS-NEXT: ret
;
+; RV32IA-WMO-ZABHA-LABEL: atomicrmw_add_i16_release:
+; RV32IA-WMO-ZABHA: # %bb.0:
+; RV32IA-WMO-ZABHA-NEXT: amoadd.h.rl a0, a1, (a0)
+; RV32IA-WMO-ZABHA-NEXT: ret
+;
+; RV32IA-TSO-ZABHA-LABEL: atomicrmw_add_i16_release:
+; RV32IA-TSO-ZABHA: # %bb.0:
+; RV32IA-TSO-ZABHA-NEXT: amoadd.h a0, a1, (a0)
+; RV32IA-TSO-ZABHA-NEXT: ret
+;
; RV64IA-WMO-ZABHA-LABEL: atomicrmw_add_i16_release:
; RV64IA-WMO-ZABHA: # %bb.0:
; RV64IA-WMO-ZABHA-NEXT: amoadd.h.rl a0, a1, (a0)
@@ -11914,47 +15566,47 @@ define i16 @atomicrmw_add_i16_acq_rel(ptr %a, i16 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
-; RV32IA-WMO-LABEL: atomicrmw_add_i16_acq_rel:
-; RV32IA-WMO: # %bb.0:
-; RV32IA-WMO-NEXT: andi a2, a0, -4
-; RV32IA-WMO-NEXT: slli a0, a0, 3
-; RV32IA-WMO-NEXT: lui a3, 16
-; RV32IA-WMO-NEXT: addi a3, a3, -1
-; RV32IA-WMO-NEXT: sll a4, a3, a0
-; RV32IA-WMO-NEXT: and a1, a1, a3
-; RV32IA-WMO-NEXT: sll a1, a1, a0
-; RV32IA-WMO-NEXT: .LBB83_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-WMO-NEXT: lr.w.aq a3, (a2)
-; RV32IA-WMO-NEXT: add a5, a3, a1
-; RV32IA-WMO-NEXT: xor a5, a3, a5
-; RV32IA-WMO-NEXT: and a5, a5, a4
-; RV32IA-WMO-NEXT: xor a5, a3, a5
-; RV32IA-WMO-NEXT: sc.w.rl a5, a5, (a2)
-; RV32IA-WMO-NEXT: bnez a5, .LBB83_1
-; RV32IA-WMO-NEXT: # %bb.2:
-; RV32IA-WMO-NEXT: srl a0, a3, a0
-; RV32IA-WMO-NEXT: ret
+; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_add_i16_acq_rel:
+; RV32IA-WMO-NOZACAS: # %bb.0:
+; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-WMO-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-NOZACAS-NEXT: lui a3, 16
+; RV32IA-WMO-NOZACAS-NEXT: addi a3, a3, -1
+; RV32IA-WMO-NOZACAS-NEXT: sll a4, a3, a0
+; RV32IA-WMO-NOZACAS-NEXT: and a1, a1, a3
+; RV32IA-WMO-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-WMO-NOZACAS-NEXT: .LBB83_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-NOZACAS-NEXT: lr.w.aq a3, (a2)
+; RV32IA-WMO-NOZACAS-NEXT: add a5, a3, a1
+; RV32IA-WMO-NOZACAS-NEXT: xor a5, a3, a5
+; RV32IA-WMO-NOZACAS-NEXT: and a5, a5, a4
+; RV32IA-WMO-NOZACAS-NEXT: xor a5, a3, a5
+; RV32IA-WMO-NOZACAS-NEXT: sc.w.rl a5, a5, (a2)
+; RV32IA-WMO-NOZACAS-NEXT: bnez a5, .LBB83_1
+; RV32IA-WMO-NOZACAS-NEXT: # %bb.2:
+; RV32IA-WMO-NOZACAS-NEXT: srl a0, a3, a0
+; RV32IA-WMO-NOZACAS-NEXT: ret
;
-; RV32IA-TSO-LABEL: atomicrmw_add_i16_acq_rel:
-; RV32IA-TSO: # %bb.0:
-; RV32IA-TSO-NEXT: andi a2, a0, -4
-; RV32IA-TSO-NEXT: slli a0, a0, 3
-; RV32IA-TSO-NEXT: lui a3, 16
-; RV32IA-TSO-NEXT: addi a3, a3, -1
-; RV32IA-TSO-NEXT: sll a4, a3, a0
-; RV32IA-TSO-NEXT: and a1, a1, a3
-; RV32IA-TSO-NEXT: sll a1, a1, a0
-; RV32IA-TSO-NEXT: .LBB83_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-TSO-NEXT: lr.w a3, (a2)
-; RV32IA-TSO-NEXT: add a5, a3, a1
-; RV32IA-TSO-NEXT: xor a5, a3, a5
-; RV32IA-TSO-NEXT: and a5, a5, a4
-; RV32IA-TSO-NEXT: xor a5, a3, a5
-; RV32IA-TSO-NEXT: sc.w a5, a5, (a2)
-; RV32IA-TSO-NEXT: bnez a5, .LBB83_1
-; RV32IA-TSO-NEXT: # %bb.2:
-; RV32IA-TSO-NEXT: srl a0, a3, a0
-; RV32IA-TSO-NEXT: ret
+; RV32IA-TSO-NOZACAS-LABEL: atomicrmw_add_i16_acq_rel:
+; RV32IA-TSO-NOZACAS: # %bb.0:
+; RV32IA-TSO-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-TSO-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-NOZACAS-NEXT: lui a3, 16
+; RV32IA-TSO-NOZACAS-NEXT: addi a3, a3, -1
+; RV32IA-TSO-NOZACAS-NEXT: sll a4, a3, a0
+; RV32IA-TSO-NOZACAS-NEXT: and a1, a1, a3
+; RV32IA-TSO-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-TSO-NOZACAS-NEXT: .LBB83_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-NOZACAS-NEXT: lr.w a3, (a2)
+; RV32IA-TSO-NOZACAS-NEXT: add a5, a3, a1
+; RV32IA-TSO-NOZACAS-NEXT: xor a5, a3, a5
+; RV32IA-TSO-NOZACAS-NEXT: and a5, a5, a4
+; RV32IA-TSO-NOZACAS-NEXT: xor a5, a3, a5
+; RV32IA-TSO-NOZACAS-NEXT: sc.w a5, a5, (a2)
+; RV32IA-TSO-NOZACAS-NEXT: bnez a5, .LBB83_1
+; RV32IA-TSO-NOZACAS-NEXT: # %bb.2:
+; RV32IA-TSO-NOZACAS-NEXT: srl a0, a3, a0
+; RV32IA-TSO-NOZACAS-NEXT: ret
;
; RV64I-LABEL: atomicrmw_add_i16_acq_rel:
; RV64I: # %bb.0:
@@ -12008,6 +15660,48 @@ define i16 @atomicrmw_add_i16_acq_rel(ptr %a, i16 %b) nounwind {
; RV64IA-TSO-NOZACAS-NEXT: srlw a0, a3, a0
; RV64IA-TSO-NOZACAS-NEXT: ret
;
+; RV32IA-WMO-ZACAS-LABEL: atomicrmw_add_i16_acq_rel:
+; RV32IA-WMO-ZACAS: # %bb.0:
+; RV32IA-WMO-ZACAS-NEXT: andi a2, a0, -4
+; RV32IA-WMO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-ZACAS-NEXT: lui a3, 16
+; RV32IA-WMO-ZACAS-NEXT: addi a3, a3, -1
+; RV32IA-WMO-ZACAS-NEXT: sll a4, a3, a0
+; RV32IA-WMO-ZACAS-NEXT: and a1, a1, a3
+; RV32IA-WMO-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-WMO-ZACAS-NEXT: .LBB83_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-ZACAS-NEXT: lr.w.aq a3, (a2)
+; RV32IA-WMO-ZACAS-NEXT: add a5, a3, a1
+; RV32IA-WMO-ZACAS-NEXT: xor a5, a3, a5
+; RV32IA-WMO-ZACAS-NEXT: and a5, a5, a4
+; RV32IA-WMO-ZACAS-NEXT: xor a5, a3, a5
+; RV32IA-WMO-ZACAS-NEXT: sc.w.rl a5, a5, (a2)
+; RV32IA-WMO-ZACAS-NEXT: bnez a5, .LBB83_1
+; RV32IA-WMO-ZACAS-NEXT: # %bb.2:
+; RV32IA-WMO-ZACAS-NEXT: srl a0, a3, a0
+; RV32IA-WMO-ZACAS-NEXT: ret
+;
+; RV32IA-TSO-ZACAS-LABEL: atomicrmw_add_i16_acq_rel:
+; RV32IA-TSO-ZACAS: # %bb.0:
+; RV32IA-TSO-ZACAS-NEXT: andi a2, a0, -4
+; RV32IA-TSO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-ZACAS-NEXT: lui a3, 16
+; RV32IA-TSO-ZACAS-NEXT: addi a3, a3, -1
+; RV32IA-TSO-ZACAS-NEXT: sll a4, a3, a0
+; RV32IA-TSO-ZACAS-NEXT: and a1, a1, a3
+; RV32IA-TSO-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-TSO-ZACAS-NEXT: .LBB83_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-ZACAS-NEXT: lr.w a3, (a2)
+; RV32IA-TSO-ZACAS-NEXT: add a5, a3, a1
+; RV32IA-TSO-ZACAS-NEXT: xor a5, a3, a5
+; RV32IA-TSO-ZACAS-NEXT: and a5, a5, a4
+; RV32IA-TSO-ZACAS-NEXT: xor a5, a3, a5
+; RV32IA-TSO-ZACAS-NEXT: sc.w a5, a5, (a2)
+; RV32IA-TSO-ZACAS-NEXT: bnez a5, .LBB83_1
+; RV32IA-TSO-ZACAS-NEXT: # %bb.2:
+; RV32IA-TSO-ZACAS-NEXT: srl a0, a3, a0
+; RV32IA-TSO-ZACAS-NEXT: ret
+;
; RV64IA-WMO-ZACAS-LABEL: atomicrmw_add_i16_acq_rel:
; RV64IA-WMO-ZACAS: # %bb.0:
; RV64IA-WMO-ZACAS-NEXT: andi a2, a0, -4
@@ -12050,6 +15744,16 @@ define i16 @atomicrmw_add_i16_acq_rel(ptr %a, i16 %b) nounwind {
; RV64IA-TSO-ZACAS-NEXT: srlw a0, a3, a0
; RV64IA-TSO-ZACAS-NEXT: ret
;
+; RV32IA-WMO-ZABHA-LABEL: atomicrmw_add_i16_acq_rel:
+; RV32IA-WMO-ZABHA: # %bb.0:
+; RV32IA-WMO-ZABHA-NEXT: amoadd.h.aqrl a0, a1, (a0)
+; RV32IA-WMO-ZABHA-NEXT: ret
+;
+; RV32IA-TSO-ZABHA-LABEL: atomicrmw_add_i16_acq_rel:
+; RV32IA-TSO-ZABHA: # %bb.0:
+; RV32IA-TSO-ZABHA-NEXT: amoadd.h a0, a1, (a0)
+; RV32IA-TSO-ZABHA-NEXT: ret
+;
; RV64IA-WMO-ZABHA-LABEL: atomicrmw_add_i16_acq_rel:
; RV64IA-WMO-ZABHA: # %bb.0:
; RV64IA-WMO-ZABHA-NEXT: amoadd.h.aqrl a0, a1, (a0)
@@ -12074,26 +15778,26 @@ define i16 @atomicrmw_add_i16_seq_cst(ptr %a, i16 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
-; RV32IA-LABEL: atomicrmw_add_i16_seq_cst:
-; RV32IA: # %bb.0:
-; RV32IA-NEXT: andi a2, a0, -4
-; RV32IA-NEXT: slli a0, a0, 3
-; RV32IA-NEXT: lui a3, 16
-; RV32IA-NEXT: addi a3, a3, -1
-; RV32IA-NEXT: sll a4, a3, a0
-; RV32IA-NEXT: and a1, a1, a3
-; RV32IA-NEXT: sll a1, a1, a0
-; RV32IA-NEXT: .LBB84_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-NEXT: lr.w.aqrl a3, (a2)
-; RV32IA-NEXT: add a5, a3, a1
-; RV32IA-NEXT: xor a5, a3, a5
-; RV32IA-NEXT: and a5, a5, a4
-; RV32IA-NEXT: xor a5, a3, a5
-; RV32IA-NEXT: sc.w.rl a5, a5, (a2)
-; RV32IA-NEXT: bnez a5, .LBB84_1
-; RV32IA-NEXT: # %bb.2:
-; RV32IA-NEXT: srl a0, a3, a0
-; RV32IA-NEXT: ret
+; RV32IA-NOZACAS-LABEL: atomicrmw_add_i16_seq_cst:
+; RV32IA-NOZACAS: # %bb.0:
+; RV32IA-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-NOZACAS-NEXT: lui a3, 16
+; RV32IA-NOZACAS-NEXT: addi a3, a3, -1
+; RV32IA-NOZACAS-NEXT: sll a4, a3, a0
+; RV32IA-NOZACAS-NEXT: and a1, a1, a3
+; RV32IA-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-NOZACAS-NEXT: .LBB84_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-NOZACAS-NEXT: lr.w.aqrl a3, (a2)
+; RV32IA-NOZACAS-NEXT: add a5, a3, a1
+; RV32IA-NOZACAS-NEXT: xor a5, a3, a5
+; RV32IA-NOZACAS-NEXT: and a5, a5, a4
+; RV32IA-NOZACAS-NEXT: xor a5, a3, a5
+; RV32IA-NOZACAS-NEXT: sc.w.rl a5, a5, (a2)
+; RV32IA-NOZACAS-NEXT: bnez a5, .LBB84_1
+; RV32IA-NOZACAS-NEXT: # %bb.2:
+; RV32IA-NOZACAS-NEXT: srl a0, a3, a0
+; RV32IA-NOZACAS-NEXT: ret
;
; RV64I-LABEL: atomicrmw_add_i16_seq_cst:
; RV64I: # %bb.0:
@@ -12126,6 +15830,27 @@ define i16 @atomicrmw_add_i16_seq_cst(ptr %a, i16 %b) nounwind {
; RV64IA-NOZACAS-NEXT: srlw a0, a3, a0
; RV64IA-NOZACAS-NEXT: ret
;
+; RV32IA-ZACAS-LABEL: atomicrmw_add_i16_seq_cst:
+; RV32IA-ZACAS: # %bb.0:
+; RV32IA-ZACAS-NEXT: andi a2, a0, -4
+; RV32IA-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-ZACAS-NEXT: lui a3, 16
+; RV32IA-ZACAS-NEXT: addi a3, a3, -1
+; RV32IA-ZACAS-NEXT: sll a4, a3, a0
+; RV32IA-ZACAS-NEXT: and a1, a1, a3
+; RV32IA-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-ZACAS-NEXT: .LBB84_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-ZACAS-NEXT: lr.w.aqrl a3, (a2)
+; RV32IA-ZACAS-NEXT: add a5, a3, a1
+; RV32IA-ZACAS-NEXT: xor a5, a3, a5
+; RV32IA-ZACAS-NEXT: and a5, a5, a4
+; RV32IA-ZACAS-NEXT: xor a5, a3, a5
+; RV32IA-ZACAS-NEXT: sc.w.rl a5, a5, (a2)
+; RV32IA-ZACAS-NEXT: bnez a5, .LBB84_1
+; RV32IA-ZACAS-NEXT: # %bb.2:
+; RV32IA-ZACAS-NEXT: srl a0, a3, a0
+; RV32IA-ZACAS-NEXT: ret
+;
; RV64IA-ZACAS-LABEL: atomicrmw_add_i16_seq_cst:
; RV64IA-ZACAS: # %bb.0:
; RV64IA-ZACAS-NEXT: andi a2, a0, -4
@@ -12147,6 +15872,16 @@ define i16 @atomicrmw_add_i16_seq_cst(ptr %a, i16 %b) nounwind {
; RV64IA-ZACAS-NEXT: srlw a0, a3, a0
; RV64IA-ZACAS-NEXT: ret
;
+; RV32IA-WMO-ZABHA-LABEL: atomicrmw_add_i16_seq_cst:
+; RV32IA-WMO-ZABHA: # %bb.0:
+; RV32IA-WMO-ZABHA-NEXT: amoadd.h.aqrl a0, a1, (a0)
+; RV32IA-WMO-ZABHA-NEXT: ret
+;
+; RV32IA-TSO-ZABHA-LABEL: atomicrmw_add_i16_seq_cst:
+; RV32IA-TSO-ZABHA: # %bb.0:
+; RV32IA-TSO-ZABHA-NEXT: amoadd.h a0, a1, (a0)
+; RV32IA-TSO-ZABHA-NEXT: ret
+;
; RV64IA-WMO-ZABHA-LABEL: atomicrmw_add_i16_seq_cst:
; RV64IA-WMO-ZABHA: # %bb.0:
; RV64IA-WMO-ZABHA-NEXT: amoadd.h.aqrl a0, a1, (a0)
@@ -12171,26 +15906,26 @@ define i16 @atomicrmw_sub_i16_monotonic(ptr %a, i16 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
-; RV32IA-LABEL: atomicrmw_sub_i16_monotonic:
-; RV32IA: # %bb.0:
-; RV32IA-NEXT: andi a2, a0, -4
-; RV32IA-NEXT: slli a0, a0, 3
-; RV32IA-NEXT: lui a3, 16
-; RV32IA-NEXT: addi a3, a3, -1
-; RV32IA-NEXT: sll a4, a3, a0
-; RV32IA-NEXT: and a1, a1, a3
-; RV32IA-NEXT: sll a1, a1, a0
-; RV32IA-NEXT: .LBB85_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-NEXT: lr.w a3, (a2)
-; RV32IA-NEXT: sub a5, a3, a1
-; RV32IA-NEXT: xor a5, a3, a5
-; RV32IA-NEXT: and a5, a5, a4
-; RV32IA-NEXT: xor a5, a3, a5
-; RV32IA-NEXT: sc.w a5, a5, (a2)
-; RV32IA-NEXT: bnez a5, .LBB85_1
-; RV32IA-NEXT: # %bb.2:
-; RV32IA-NEXT: srl a0, a3, a0
-; RV32IA-NEXT: ret
+; RV32IA-NOZACAS-LABEL: atomicrmw_sub_i16_monotonic:
+; RV32IA-NOZACAS: # %bb.0:
+; RV32IA-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-NOZACAS-NEXT: lui a3, 16
+; RV32IA-NOZACAS-NEXT: addi a3, a3, -1
+; RV32IA-NOZACAS-NEXT: sll a4, a3, a0
+; RV32IA-NOZACAS-NEXT: and a1, a1, a3
+; RV32IA-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-NOZACAS-NEXT: .LBB85_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-NOZACAS-NEXT: lr.w a3, (a2)
+; RV32IA-NOZACAS-NEXT: sub a5, a3, a1
+; RV32IA-NOZACAS-NEXT: xor a5, a3, a5
+; RV32IA-NOZACAS-NEXT: and a5, a5, a4
+; RV32IA-NOZACAS-NEXT: xor a5, a3, a5
+; RV32IA-NOZACAS-NEXT: sc.w a5, a5, (a2)
+; RV32IA-NOZACAS-NEXT: bnez a5, .LBB85_1
+; RV32IA-NOZACAS-NEXT: # %bb.2:
+; RV32IA-NOZACAS-NEXT: srl a0, a3, a0
+; RV32IA-NOZACAS-NEXT: ret
;
; RV64I-LABEL: atomicrmw_sub_i16_monotonic:
; RV64I: # %bb.0:
@@ -12223,6 +15958,27 @@ define i16 @atomicrmw_sub_i16_monotonic(ptr %a, i16 %b) nounwind {
; RV64IA-NOZACAS-NEXT: srlw a0, a3, a0
; RV64IA-NOZACAS-NEXT: ret
;
+; RV32IA-ZACAS-LABEL: atomicrmw_sub_i16_monotonic:
+; RV32IA-ZACAS: # %bb.0:
+; RV32IA-ZACAS-NEXT: andi a2, a0, -4
+; RV32IA-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-ZACAS-NEXT: lui a3, 16
+; RV32IA-ZACAS-NEXT: addi a3, a3, -1
+; RV32IA-ZACAS-NEXT: sll a4, a3, a0
+; RV32IA-ZACAS-NEXT: and a1, a1, a3
+; RV32IA-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-ZACAS-NEXT: .LBB85_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-ZACAS-NEXT: lr.w a3, (a2)
+; RV32IA-ZACAS-NEXT: sub a5, a3, a1
+; RV32IA-ZACAS-NEXT: xor a5, a3, a5
+; RV32IA-ZACAS-NEXT: and a5, a5, a4
+; RV32IA-ZACAS-NEXT: xor a5, a3, a5
+; RV32IA-ZACAS-NEXT: sc.w a5, a5, (a2)
+; RV32IA-ZACAS-NEXT: bnez a5, .LBB85_1
+; RV32IA-ZACAS-NEXT: # %bb.2:
+; RV32IA-ZACAS-NEXT: srl a0, a3, a0
+; RV32IA-ZACAS-NEXT: ret
+;
; RV64IA-ZACAS-LABEL: atomicrmw_sub_i16_monotonic:
; RV64IA-ZACAS: # %bb.0:
; RV64IA-ZACAS-NEXT: andi a2, a0, -4
@@ -12244,6 +16000,18 @@ define i16 @atomicrmw_sub_i16_monotonic(ptr %a, i16 %b) nounwind {
; RV64IA-ZACAS-NEXT: srlw a0, a3, a0
; RV64IA-ZACAS-NEXT: ret
;
+; RV32IA-WMO-ZABHA-LABEL: atomicrmw_sub_i16_monotonic:
+; RV32IA-WMO-ZABHA: # %bb.0:
+; RV32IA-WMO-ZABHA-NEXT: neg a1, a1
+; RV32IA-WMO-ZABHA-NEXT: amoadd.h a0, a1, (a0)
+; RV32IA-WMO-ZABHA-NEXT: ret
+;
+; RV32IA-TSO-ZABHA-LABEL: atomicrmw_sub_i16_monotonic:
+; RV32IA-TSO-ZABHA: # %bb.0:
+; RV32IA-TSO-ZABHA-NEXT: neg a1, a1
+; RV32IA-TSO-ZABHA-NEXT: amoadd.h a0, a1, (a0)
+; RV32IA-TSO-ZABHA-NEXT: ret
+;
; RV64IA-WMO-ZABHA-LABEL: atomicrmw_sub_i16_monotonic:
; RV64IA-WMO-ZABHA: # %bb.0:
; RV64IA-WMO-ZABHA-NEXT: neg a1, a1
@@ -12270,47 +16038,47 @@ define i16 @atomicrmw_sub_i16_acquire(ptr %a, i16 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
-; RV32IA-WMO-LABEL: atomicrmw_sub_i16_acquire:
-; RV32IA-WMO: # %bb.0:
-; RV32IA-WMO-NEXT: andi a2, a0, -4
-; RV32IA-WMO-NEXT: slli a0, a0, 3
-; RV32IA-WMO-NEXT: lui a3, 16
-; RV32IA-WMO-NEXT: addi a3, a3, -1
-; RV32IA-WMO-NEXT: sll a4, a3, a0
-; RV32IA-WMO-NEXT: and a1, a1, a3
-; RV32IA-WMO-NEXT: sll a1, a1, a0
-; RV32IA-WMO-NEXT: .LBB86_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-WMO-NEXT: lr.w.aq a3, (a2)
-; RV32IA-WMO-NEXT: sub a5, a3, a1
-; RV32IA-WMO-NEXT: xor a5, a3, a5
-; RV32IA-WMO-NEXT: and a5, a5, a4
-; RV32IA-WMO-NEXT: xor a5, a3, a5
-; RV32IA-WMO-NEXT: sc.w a5, a5, (a2)
-; RV32IA-WMO-NEXT: bnez a5, .LBB86_1
-; RV32IA-WMO-NEXT: # %bb.2:
-; RV32IA-WMO-NEXT: srl a0, a3, a0
-; RV32IA-WMO-NEXT: ret
+; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_sub_i16_acquire:
+; RV32IA-WMO-NOZACAS: # %bb.0:
+; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-WMO-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-NOZACAS-NEXT: lui a3, 16
+; RV32IA-WMO-NOZACAS-NEXT: addi a3, a3, -1
+; RV32IA-WMO-NOZACAS-NEXT: sll a4, a3, a0
+; RV32IA-WMO-NOZACAS-NEXT: and a1, a1, a3
+; RV32IA-WMO-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-WMO-NOZACAS-NEXT: .LBB86_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-NOZACAS-NEXT: lr.w.aq a3, (a2)
+; RV32IA-WMO-NOZACAS-NEXT: sub a5, a3, a1
+; RV32IA-WMO-NOZACAS-NEXT: xor a5, a3, a5
+; RV32IA-WMO-NOZACAS-NEXT: and a5, a5, a4
+; RV32IA-WMO-NOZACAS-NEXT: xor a5, a3, a5
+; RV32IA-WMO-NOZACAS-NEXT: sc.w a5, a5, (a2)
+; RV32IA-WMO-NOZACAS-NEXT: bnez a5, .LBB86_1
+; RV32IA-WMO-NOZACAS-NEXT: # %bb.2:
+; RV32IA-WMO-NOZACAS-NEXT: srl a0, a3, a0
+; RV32IA-WMO-NOZACAS-NEXT: ret
;
-; RV32IA-TSO-LABEL: atomicrmw_sub_i16_acquire:
-; RV32IA-TSO: # %bb.0:
-; RV32IA-TSO-NEXT: andi a2, a0, -4
-; RV32IA-TSO-NEXT: slli a0, a0, 3
-; RV32IA-TSO-NEXT: lui a3, 16
-; RV32IA-TSO-NEXT: addi a3, a3, -1
-; RV32IA-TSO-NEXT: sll a4, a3, a0
-; RV32IA-TSO-NEXT: and a1, a1, a3
-; RV32IA-TSO-NEXT: sll a1, a1, a0
-; RV32IA-TSO-NEXT: .LBB86_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-TSO-NEXT: lr.w a3, (a2)
-; RV32IA-TSO-NEXT: sub a5, a3, a1
-; RV32IA-TSO-NEXT: xor a5, a3, a5
-; RV32IA-TSO-NEXT: and a5, a5, a4
-; RV32IA-TSO-NEXT: xor a5, a3, a5
-; RV32IA-TSO-NEXT: sc.w a5, a5, (a2)
-; RV32IA-TSO-NEXT: bnez a5, .LBB86_1
-; RV32IA-TSO-NEXT: # %bb.2:
-; RV32IA-TSO-NEXT: srl a0, a3, a0
-; RV32IA-TSO-NEXT: ret
+; RV32IA-TSO-NOZACAS-LABEL: atomicrmw_sub_i16_acquire:
+; RV32IA-TSO-NOZACAS: # %bb.0:
+; RV32IA-TSO-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-TSO-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-NOZACAS-NEXT: lui a3, 16
+; RV32IA-TSO-NOZACAS-NEXT: addi a3, a3, -1
+; RV32IA-TSO-NOZACAS-NEXT: sll a4, a3, a0
+; RV32IA-TSO-NOZACAS-NEXT: and a1, a1, a3
+; RV32IA-TSO-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-TSO-NOZACAS-NEXT: .LBB86_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-NOZACAS-NEXT: lr.w a3, (a2)
+; RV32IA-TSO-NOZACAS-NEXT: sub a5, a3, a1
+; RV32IA-TSO-NOZACAS-NEXT: xor a5, a3, a5
+; RV32IA-TSO-NOZACAS-NEXT: and a5, a5, a4
+; RV32IA-TSO-NOZACAS-NEXT: xor a5, a3, a5
+; RV32IA-TSO-NOZACAS-NEXT: sc.w a5, a5, (a2)
+; RV32IA-TSO-NOZACAS-NEXT: bnez a5, .LBB86_1
+; RV32IA-TSO-NOZACAS-NEXT: # %bb.2:
+; RV32IA-TSO-NOZACAS-NEXT: srl a0, a3, a0
+; RV32IA-TSO-NOZACAS-NEXT: ret
;
; RV64I-LABEL: atomicrmw_sub_i16_acquire:
; RV64I: # %bb.0:
@@ -12364,6 +16132,48 @@ define i16 @atomicrmw_sub_i16_acquire(ptr %a, i16 %b) nounwind {
; RV64IA-TSO-NOZACAS-NEXT: srlw a0, a3, a0
; RV64IA-TSO-NOZACAS-NEXT: ret
;
+; RV32IA-WMO-ZACAS-LABEL: atomicrmw_sub_i16_acquire:
+; RV32IA-WMO-ZACAS: # %bb.0:
+; RV32IA-WMO-ZACAS-NEXT: andi a2, a0, -4
+; RV32IA-WMO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-ZACAS-NEXT: lui a3, 16
+; RV32IA-WMO-ZACAS-NEXT: addi a3, a3, -1
+; RV32IA-WMO-ZACAS-NEXT: sll a4, a3, a0
+; RV32IA-WMO-ZACAS-NEXT: and a1, a1, a3
+; RV32IA-WMO-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-WMO-ZACAS-NEXT: .LBB86_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-ZACAS-NEXT: lr.w.aq a3, (a2)
+; RV32IA-WMO-ZACAS-NEXT: sub a5, a3, a1
+; RV32IA-WMO-ZACAS-NEXT: xor a5, a3, a5
+; RV32IA-WMO-ZACAS-NEXT: and a5, a5, a4
+; RV32IA-WMO-ZACAS-NEXT: xor a5, a3, a5
+; RV32IA-WMO-ZACAS-NEXT: sc.w a5, a5, (a2)
+; RV32IA-WMO-ZACAS-NEXT: bnez a5, .LBB86_1
+; RV32IA-WMO-ZACAS-NEXT: # %bb.2:
+; RV32IA-WMO-ZACAS-NEXT: srl a0, a3, a0
+; RV32IA-WMO-ZACAS-NEXT: ret
+;
+; RV32IA-TSO-ZACAS-LABEL: atomicrmw_sub_i16_acquire:
+; RV32IA-TSO-ZACAS: # %bb.0:
+; RV32IA-TSO-ZACAS-NEXT: andi a2, a0, -4
+; RV32IA-TSO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-ZACAS-NEXT: lui a3, 16
+; RV32IA-TSO-ZACAS-NEXT: addi a3, a3, -1
+; RV32IA-TSO-ZACAS-NEXT: sll a4, a3, a0
+; RV32IA-TSO-ZACAS-NEXT: and a1, a1, a3
+; RV32IA-TSO-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-TSO-ZACAS-NEXT: .LBB86_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-ZACAS-NEXT: lr.w a3, (a2)
+; RV32IA-TSO-ZACAS-NEXT: sub a5, a3, a1
+; RV32IA-TSO-ZACAS-NEXT: xor a5, a3, a5
+; RV32IA-TSO-ZACAS-NEXT: and a5, a5, a4
+; RV32IA-TSO-ZACAS-NEXT: xor a5, a3, a5
+; RV32IA-TSO-ZACAS-NEXT: sc.w a5, a5, (a2)
+; RV32IA-TSO-ZACAS-NEXT: bnez a5, .LBB86_1
+; RV32IA-TSO-ZACAS-NEXT: # %bb.2:
+; RV32IA-TSO-ZACAS-NEXT: srl a0, a3, a0
+; RV32IA-TSO-ZACAS-NEXT: ret
+;
; RV64IA-WMO-ZACAS-LABEL: atomicrmw_sub_i16_acquire:
; RV64IA-WMO-ZACAS: # %bb.0:
; RV64IA-WMO-ZACAS-NEXT: andi a2, a0, -4
@@ -12406,6 +16216,18 @@ define i16 @atomicrmw_sub_i16_acquire(ptr %a, i16 %b) nounwind {
; RV64IA-TSO-ZACAS-NEXT: srlw a0, a3, a0
; RV64IA-TSO-ZACAS-NEXT: ret
;
+; RV32IA-WMO-ZABHA-LABEL: atomicrmw_sub_i16_acquire:
+; RV32IA-WMO-ZABHA: # %bb.0:
+; RV32IA-WMO-ZABHA-NEXT: neg a1, a1
+; RV32IA-WMO-ZABHA-NEXT: amoadd.h.aq a0, a1, (a0)
+; RV32IA-WMO-ZABHA-NEXT: ret
+;
+; RV32IA-TSO-ZABHA-LABEL: atomicrmw_sub_i16_acquire:
+; RV32IA-TSO-ZABHA: # %bb.0:
+; RV32IA-TSO-ZABHA-NEXT: neg a1, a1
+; RV32IA-TSO-ZABHA-NEXT: amoadd.h a0, a1, (a0)
+; RV32IA-TSO-ZABHA-NEXT: ret
+;
; RV64IA-WMO-ZABHA-LABEL: atomicrmw_sub_i16_acquire:
; RV64IA-WMO-ZABHA: # %bb.0:
; RV64IA-WMO-ZABHA-NEXT: neg a1, a1
@@ -12432,47 +16254,47 @@ define i16 @atomicrmw_sub_i16_release(ptr %a, i16 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
-; RV32IA-WMO-LABEL: atomicrmw_sub_i16_release:
-; RV32IA-WMO: # %bb.0:
-; RV32IA-WMO-NEXT: andi a2, a0, -4
-; RV32IA-WMO-NEXT: slli a0, a0, 3
-; RV32IA-WMO-NEXT: lui a3, 16
-; RV32IA-WMO-NEXT: addi a3, a3, -1
-; RV32IA-WMO-NEXT: sll a4, a3, a0
-; RV32IA-WMO-NEXT: and a1, a1, a3
-; RV32IA-WMO-NEXT: sll a1, a1, a0
-; RV32IA-WMO-NEXT: .LBB87_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-WMO-NEXT: lr.w a3, (a2)
-; RV32IA-WMO-NEXT: sub a5, a3, a1
-; RV32IA-WMO-NEXT: xor a5, a3, a5
-; RV32IA-WMO-NEXT: and a5, a5, a4
-; RV32IA-WMO-NEXT: xor a5, a3, a5
-; RV32IA-WMO-NEXT: sc.w.rl a5, a5, (a2)
-; RV32IA-WMO-NEXT: bnez a5, .LBB87_1
-; RV32IA-WMO-NEXT: # %bb.2:
-; RV32IA-WMO-NEXT: srl a0, a3, a0
-; RV32IA-WMO-NEXT: ret
+; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_sub_i16_release:
+; RV32IA-WMO-NOZACAS: # %bb.0:
+; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-WMO-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-NOZACAS-NEXT: lui a3, 16
+; RV32IA-WMO-NOZACAS-NEXT: addi a3, a3, -1
+; RV32IA-WMO-NOZACAS-NEXT: sll a4, a3, a0
+; RV32IA-WMO-NOZACAS-NEXT: and a1, a1, a3
+; RV32IA-WMO-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-WMO-NOZACAS-NEXT: .LBB87_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-NOZACAS-NEXT: lr.w a3, (a2)
+; RV32IA-WMO-NOZACAS-NEXT: sub a5, a3, a1
+; RV32IA-WMO-NOZACAS-NEXT: xor a5, a3, a5
+; RV32IA-WMO-NOZACAS-NEXT: and a5, a5, a4
+; RV32IA-WMO-NOZACAS-NEXT: xor a5, a3, a5
+; RV32IA-WMO-NOZACAS-NEXT: sc.w.rl a5, a5, (a2)
+; RV32IA-WMO-NOZACAS-NEXT: bnez a5, .LBB87_1
+; RV32IA-WMO-NOZACAS-NEXT: # %bb.2:
+; RV32IA-WMO-NOZACAS-NEXT: srl a0, a3, a0
+; RV32IA-WMO-NOZACAS-NEXT: ret
;
-; RV32IA-TSO-LABEL: atomicrmw_sub_i16_release:
-; RV32IA-TSO: # %bb.0:
-; RV32IA-TSO-NEXT: andi a2, a0, -4
-; RV32IA-TSO-NEXT: slli a0, a0, 3
-; RV32IA-TSO-NEXT: lui a3, 16
-; RV32IA-TSO-NEXT: addi a3, a3, -1
-; RV32IA-TSO-NEXT: sll a4, a3, a0
-; RV32IA-TSO-NEXT: and a1, a1, a3
-; RV32IA-TSO-NEXT: sll a1, a1, a0
-; RV32IA-TSO-NEXT: .LBB87_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-TSO-NEXT: lr.w a3, (a2)
-; RV32IA-TSO-NEXT: sub a5, a3, a1
-; RV32IA-TSO-NEXT: xor a5, a3, a5
-; RV32IA-TSO-NEXT: and a5, a5, a4
-; RV32IA-TSO-NEXT: xor a5, a3, a5
-; RV32IA-TSO-NEXT: sc.w a5, a5, (a2)
-; RV32IA-TSO-NEXT: bnez a5, .LBB87_1
-; RV32IA-TSO-NEXT: # %bb.2:
-; RV32IA-TSO-NEXT: srl a0, a3, a0
-; RV32IA-TSO-NEXT: ret
+; RV32IA-TSO-NOZACAS-LABEL: atomicrmw_sub_i16_release:
+; RV32IA-TSO-NOZACAS: # %bb.0:
+; RV32IA-TSO-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-TSO-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-NOZACAS-NEXT: lui a3, 16
+; RV32IA-TSO-NOZACAS-NEXT: addi a3, a3, -1
+; RV32IA-TSO-NOZACAS-NEXT: sll a4, a3, a0
+; RV32IA-TSO-NOZACAS-NEXT: and a1, a1, a3
+; RV32IA-TSO-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-TSO-NOZACAS-NEXT: .LBB87_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-NOZACAS-NEXT: lr.w a3, (a2)
+; RV32IA-TSO-NOZACAS-NEXT: sub a5, a3, a1
+; RV32IA-TSO-NOZACAS-NEXT: xor a5, a3, a5
+; RV32IA-TSO-NOZACAS-NEXT: and a5, a5, a4
+; RV32IA-TSO-NOZACAS-NEXT: xor a5, a3, a5
+; RV32IA-TSO-NOZACAS-NEXT: sc.w a5, a5, (a2)
+; RV32IA-TSO-NOZACAS-NEXT: bnez a5, .LBB87_1
+; RV32IA-TSO-NOZACAS-NEXT: # %bb.2:
+; RV32IA-TSO-NOZACAS-NEXT: srl a0, a3, a0
+; RV32IA-TSO-NOZACAS-NEXT: ret
;
; RV64I-LABEL: atomicrmw_sub_i16_release:
; RV64I: # %bb.0:
@@ -12526,6 +16348,48 @@ define i16 @atomicrmw_sub_i16_release(ptr %a, i16 %b) nounwind {
; RV64IA-TSO-NOZACAS-NEXT: srlw a0, a3, a0
; RV64IA-TSO-NOZACAS-NEXT: ret
;
+; RV32IA-WMO-ZACAS-LABEL: atomicrmw_sub_i16_release:
+; RV32IA-WMO-ZACAS: # %bb.0:
+; RV32IA-WMO-ZACAS-NEXT: andi a2, a0, -4
+; RV32IA-WMO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-ZACAS-NEXT: lui a3, 16
+; RV32IA-WMO-ZACAS-NEXT: addi a3, a3, -1
+; RV32IA-WMO-ZACAS-NEXT: sll a4, a3, a0
+; RV32IA-WMO-ZACAS-NEXT: and a1, a1, a3
+; RV32IA-WMO-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-WMO-ZACAS-NEXT: .LBB87_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-ZACAS-NEXT: lr.w a3, (a2)
+; RV32IA-WMO-ZACAS-NEXT: sub a5, a3, a1
+; RV32IA-WMO-ZACAS-NEXT: xor a5, a3, a5
+; RV32IA-WMO-ZACAS-NEXT: and a5, a5, a4
+; RV32IA-WMO-ZACAS-NEXT: xor a5, a3, a5
+; RV32IA-WMO-ZACAS-NEXT: sc.w.rl a5, a5, (a2)
+; RV32IA-WMO-ZACAS-NEXT: bnez a5, .LBB87_1
+; RV32IA-WMO-ZACAS-NEXT: # %bb.2:
+; RV32IA-WMO-ZACAS-NEXT: srl a0, a3, a0
+; RV32IA-WMO-ZACAS-NEXT: ret
+;
+; RV32IA-TSO-ZACAS-LABEL: atomicrmw_sub_i16_release:
+; RV32IA-TSO-ZACAS: # %bb.0:
+; RV32IA-TSO-ZACAS-NEXT: andi a2, a0, -4
+; RV32IA-TSO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-ZACAS-NEXT: lui a3, 16
+; RV32IA-TSO-ZACAS-NEXT: addi a3, a3, -1
+; RV32IA-TSO-ZACAS-NEXT: sll a4, a3, a0
+; RV32IA-TSO-ZACAS-NEXT: and a1, a1, a3
+; RV32IA-TSO-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-TSO-ZACAS-NEXT: .LBB87_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-ZACAS-NEXT: lr.w a3, (a2)
+; RV32IA-TSO-ZACAS-NEXT: sub a5, a3, a1
+; RV32IA-TSO-ZACAS-NEXT: xor a5, a3, a5
+; RV32IA-TSO-ZACAS-NEXT: and a5, a5, a4
+; RV32IA-TSO-ZACAS-NEXT: xor a5, a3, a5
+; RV32IA-TSO-ZACAS-NEXT: sc.w a5, a5, (a2)
+; RV32IA-TSO-ZACAS-NEXT: bnez a5, .LBB87_1
+; RV32IA-TSO-ZACAS-NEXT: # %bb.2:
+; RV32IA-TSO-ZACAS-NEXT: srl a0, a3, a0
+; RV32IA-TSO-ZACAS-NEXT: ret
+;
; RV64IA-WMO-ZACAS-LABEL: atomicrmw_sub_i16_release:
; RV64IA-WMO-ZACAS: # %bb.0:
; RV64IA-WMO-ZACAS-NEXT: andi a2, a0, -4
@@ -12568,6 +16432,18 @@ define i16 @atomicrmw_sub_i16_release(ptr %a, i16 %b) nounwind {
; RV64IA-TSO-ZACAS-NEXT: srlw a0, a3, a0
; RV64IA-TSO-ZACAS-NEXT: ret
;
+; RV32IA-WMO-ZABHA-LABEL: atomicrmw_sub_i16_release:
+; RV32IA-WMO-ZABHA: # %bb.0:
+; RV32IA-WMO-ZABHA-NEXT: neg a1, a1
+; RV32IA-WMO-ZABHA-NEXT: amoadd.h.rl a0, a1, (a0)
+; RV32IA-WMO-ZABHA-NEXT: ret
+;
+; RV32IA-TSO-ZABHA-LABEL: atomicrmw_sub_i16_release:
+; RV32IA-TSO-ZABHA: # %bb.0:
+; RV32IA-TSO-ZABHA-NEXT: neg a1, a1
+; RV32IA-TSO-ZABHA-NEXT: amoadd.h a0, a1, (a0)
+; RV32IA-TSO-ZABHA-NEXT: ret
+;
; RV64IA-WMO-ZABHA-LABEL: atomicrmw_sub_i16_release:
; RV64IA-WMO-ZABHA: # %bb.0:
; RV64IA-WMO-ZABHA-NEXT: neg a1, a1
@@ -12594,47 +16470,47 @@ define i16 @atomicrmw_sub_i16_acq_rel(ptr %a, i16 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
-; RV32IA-WMO-LABEL: atomicrmw_sub_i16_acq_rel:
-; RV32IA-WMO: # %bb.0:
-; RV32IA-WMO-NEXT: andi a2, a0, -4
-; RV32IA-WMO-NEXT: slli a0, a0, 3
-; RV32IA-WMO-NEXT: lui a3, 16
-; RV32IA-WMO-NEXT: addi a3, a3, -1
-; RV32IA-WMO-NEXT: sll a4, a3, a0
-; RV32IA-WMO-NEXT: and a1, a1, a3
-; RV32IA-WMO-NEXT: sll a1, a1, a0
-; RV32IA-WMO-NEXT: .LBB88_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-WMO-NEXT: lr.w.aq a3, (a2)
-; RV32IA-WMO-NEXT: sub a5, a3, a1
-; RV32IA-WMO-NEXT: xor a5, a3, a5
-; RV32IA-WMO-NEXT: and a5, a5, a4
-; RV32IA-WMO-NEXT: xor a5, a3, a5
-; RV32IA-WMO-NEXT: sc.w.rl a5, a5, (a2)
-; RV32IA-WMO-NEXT: bnez a5, .LBB88_1
-; RV32IA-WMO-NEXT: # %bb.2:
-; RV32IA-WMO-NEXT: srl a0, a3, a0
-; RV32IA-WMO-NEXT: ret
+; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_sub_i16_acq_rel:
+; RV32IA-WMO-NOZACAS: # %bb.0:
+; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-WMO-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-NOZACAS-NEXT: lui a3, 16
+; RV32IA-WMO-NOZACAS-NEXT: addi a3, a3, -1
+; RV32IA-WMO-NOZACAS-NEXT: sll a4, a3, a0
+; RV32IA-WMO-NOZACAS-NEXT: and a1, a1, a3
+; RV32IA-WMO-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-WMO-NOZACAS-NEXT: .LBB88_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-NOZACAS-NEXT: lr.w.aq a3, (a2)
+; RV32IA-WMO-NOZACAS-NEXT: sub a5, a3, a1
+; RV32IA-WMO-NOZACAS-NEXT: xor a5, a3, a5
+; RV32IA-WMO-NOZACAS-NEXT: and a5, a5, a4
+; RV32IA-WMO-NOZACAS-NEXT: xor a5, a3, a5
+; RV32IA-WMO-NOZACAS-NEXT: sc.w.rl a5, a5, (a2)
+; RV32IA-WMO-NOZACAS-NEXT: bnez a5, .LBB88_1
+; RV32IA-WMO-NOZACAS-NEXT: # %bb.2:
+; RV32IA-WMO-NOZACAS-NEXT: srl a0, a3, a0
+; RV32IA-WMO-NOZACAS-NEXT: ret
;
-; RV32IA-TSO-LABEL: atomicrmw_sub_i16_acq_rel:
-; RV32IA-TSO: # %bb.0:
-; RV32IA-TSO-NEXT: andi a2, a0, -4
-; RV32IA-TSO-NEXT: slli a0, a0, 3
-; RV32IA-TSO-NEXT: lui a3, 16
-; RV32IA-TSO-NEXT: addi a3, a3, -1
-; RV32IA-TSO-NEXT: sll a4, a3, a0
-; RV32IA-TSO-NEXT: and a1, a1, a3
-; RV32IA-TSO-NEXT: sll a1, a1, a0
-; RV32IA-TSO-NEXT: .LBB88_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-TSO-NEXT: lr.w a3, (a2)
-; RV32IA-TSO-NEXT: sub a5, a3, a1
-; RV32IA-TSO-NEXT: xor a5, a3, a5
-; RV32IA-TSO-NEXT: and a5, a5, a4
-; RV32IA-TSO-NEXT: xor a5, a3, a5
-; RV32IA-TSO-NEXT: sc.w a5, a5, (a2)
-; RV32IA-TSO-NEXT: bnez a5, .LBB88_1
-; RV32IA-TSO-NEXT: # %bb.2:
-; RV32IA-TSO-NEXT: srl a0, a3, a0
-; RV32IA-TSO-NEXT: ret
+; RV32IA-TSO-NOZACAS-LABEL: atomicrmw_sub_i16_acq_rel:
+; RV32IA-TSO-NOZACAS: # %bb.0:
+; RV32IA-TSO-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-TSO-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-NOZACAS-NEXT: lui a3, 16
+; RV32IA-TSO-NOZACAS-NEXT: addi a3, a3, -1
+; RV32IA-TSO-NOZACAS-NEXT: sll a4, a3, a0
+; RV32IA-TSO-NOZACAS-NEXT: and a1, a1, a3
+; RV32IA-TSO-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-TSO-NOZACAS-NEXT: .LBB88_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-NOZACAS-NEXT: lr.w a3, (a2)
+; RV32IA-TSO-NOZACAS-NEXT: sub a5, a3, a1
+; RV32IA-TSO-NOZACAS-NEXT: xor a5, a3, a5
+; RV32IA-TSO-NOZACAS-NEXT: and a5, a5, a4
+; RV32IA-TSO-NOZACAS-NEXT: xor a5, a3, a5
+; RV32IA-TSO-NOZACAS-NEXT: sc.w a5, a5, (a2)
+; RV32IA-TSO-NOZACAS-NEXT: bnez a5, .LBB88_1
+; RV32IA-TSO-NOZACAS-NEXT: # %bb.2:
+; RV32IA-TSO-NOZACAS-NEXT: srl a0, a3, a0
+; RV32IA-TSO-NOZACAS-NEXT: ret
;
; RV64I-LABEL: atomicrmw_sub_i16_acq_rel:
; RV64I: # %bb.0:
@@ -12688,6 +16564,48 @@ define i16 @atomicrmw_sub_i16_acq_rel(ptr %a, i16 %b) nounwind {
; RV64IA-TSO-NOZACAS-NEXT: srlw a0, a3, a0
; RV64IA-TSO-NOZACAS-NEXT: ret
;
+; RV32IA-WMO-ZACAS-LABEL: atomicrmw_sub_i16_acq_rel:
+; RV32IA-WMO-ZACAS: # %bb.0:
+; RV32IA-WMO-ZACAS-NEXT: andi a2, a0, -4
+; RV32IA-WMO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-ZACAS-NEXT: lui a3, 16
+; RV32IA-WMO-ZACAS-NEXT: addi a3, a3, -1
+; RV32IA-WMO-ZACAS-NEXT: sll a4, a3, a0
+; RV32IA-WMO-ZACAS-NEXT: and a1, a1, a3
+; RV32IA-WMO-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-WMO-ZACAS-NEXT: .LBB88_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-ZACAS-NEXT: lr.w.aq a3, (a2)
+; RV32IA-WMO-ZACAS-NEXT: sub a5, a3, a1
+; RV32IA-WMO-ZACAS-NEXT: xor a5, a3, a5
+; RV32IA-WMO-ZACAS-NEXT: and a5, a5, a4
+; RV32IA-WMO-ZACAS-NEXT: xor a5, a3, a5
+; RV32IA-WMO-ZACAS-NEXT: sc.w.rl a5, a5, (a2)
+; RV32IA-WMO-ZACAS-NEXT: bnez a5, .LBB88_1
+; RV32IA-WMO-ZACAS-NEXT: # %bb.2:
+; RV32IA-WMO-ZACAS-NEXT: srl a0, a3, a0
+; RV32IA-WMO-ZACAS-NEXT: ret
+;
+; RV32IA-TSO-ZACAS-LABEL: atomicrmw_sub_i16_acq_rel:
+; RV32IA-TSO-ZACAS: # %bb.0:
+; RV32IA-TSO-ZACAS-NEXT: andi a2, a0, -4
+; RV32IA-TSO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-ZACAS-NEXT: lui a3, 16
+; RV32IA-TSO-ZACAS-NEXT: addi a3, a3, -1
+; RV32IA-TSO-ZACAS-NEXT: sll a4, a3, a0
+; RV32IA-TSO-ZACAS-NEXT: and a1, a1, a3
+; RV32IA-TSO-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-TSO-ZACAS-NEXT: .LBB88_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-ZACAS-NEXT: lr.w a3, (a2)
+; RV32IA-TSO-ZACAS-NEXT: sub a5, a3, a1
+; RV32IA-TSO-ZACAS-NEXT: xor a5, a3, a5
+; RV32IA-TSO-ZACAS-NEXT: and a5, a5, a4
+; RV32IA-TSO-ZACAS-NEXT: xor a5, a3, a5
+; RV32IA-TSO-ZACAS-NEXT: sc.w a5, a5, (a2)
+; RV32IA-TSO-ZACAS-NEXT: bnez a5, .LBB88_1
+; RV32IA-TSO-ZACAS-NEXT: # %bb.2:
+; RV32IA-TSO-ZACAS-NEXT: srl a0, a3, a0
+; RV32IA-TSO-ZACAS-NEXT: ret
+;
; RV64IA-WMO-ZACAS-LABEL: atomicrmw_sub_i16_acq_rel:
; RV64IA-WMO-ZACAS: # %bb.0:
; RV64IA-WMO-ZACAS-NEXT: andi a2, a0, -4
@@ -12730,6 +16648,18 @@ define i16 @atomicrmw_sub_i16_acq_rel(ptr %a, i16 %b) nounwind {
; RV64IA-TSO-ZACAS-NEXT: srlw a0, a3, a0
; RV64IA-TSO-ZACAS-NEXT: ret
;
+; RV32IA-WMO-ZABHA-LABEL: atomicrmw_sub_i16_acq_rel:
+; RV32IA-WMO-ZABHA: # %bb.0:
+; RV32IA-WMO-ZABHA-NEXT: neg a1, a1
+; RV32IA-WMO-ZABHA-NEXT: amoadd.h.aqrl a0, a1, (a0)
+; RV32IA-WMO-ZABHA-NEXT: ret
+;
+; RV32IA-TSO-ZABHA-LABEL: atomicrmw_sub_i16_acq_rel:
+; RV32IA-TSO-ZABHA: # %bb.0:
+; RV32IA-TSO-ZABHA-NEXT: neg a1, a1
+; RV32IA-TSO-ZABHA-NEXT: amoadd.h a0, a1, (a0)
+; RV32IA-TSO-ZABHA-NEXT: ret
+;
; RV64IA-WMO-ZABHA-LABEL: atomicrmw_sub_i16_acq_rel:
; RV64IA-WMO-ZABHA: # %bb.0:
; RV64IA-WMO-ZABHA-NEXT: neg a1, a1
@@ -12756,26 +16686,26 @@ define i16 @atomicrmw_sub_i16_seq_cst(ptr %a, i16 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
-; RV32IA-LABEL: atomicrmw_sub_i16_seq_cst:
-; RV32IA: # %bb.0:
-; RV32IA-NEXT: andi a2, a0, -4
-; RV32IA-NEXT: slli a0, a0, 3
-; RV32IA-NEXT: lui a3, 16
-; RV32IA-NEXT: addi a3, a3, -1
-; RV32IA-NEXT: sll a4, a3, a0
-; RV32IA-NEXT: and a1, a1, a3
-; RV32IA-NEXT: sll a1, a1, a0
-; RV32IA-NEXT: .LBB89_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-NEXT: lr.w.aqrl a3, (a2)
-; RV32IA-NEXT: sub a5, a3, a1
-; RV32IA-NEXT: xor a5, a3, a5
-; RV32IA-NEXT: and a5, a5, a4
-; RV32IA-NEXT: xor a5, a3, a5
-; RV32IA-NEXT: sc.w.rl a5, a5, (a2)
-; RV32IA-NEXT: bnez a5, .LBB89_1
-; RV32IA-NEXT: # %bb.2:
-; RV32IA-NEXT: srl a0, a3, a0
-; RV32IA-NEXT: ret
+; RV32IA-NOZACAS-LABEL: atomicrmw_sub_i16_seq_cst:
+; RV32IA-NOZACAS: # %bb.0:
+; RV32IA-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-NOZACAS-NEXT: lui a3, 16
+; RV32IA-NOZACAS-NEXT: addi a3, a3, -1
+; RV32IA-NOZACAS-NEXT: sll a4, a3, a0
+; RV32IA-NOZACAS-NEXT: and a1, a1, a3
+; RV32IA-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-NOZACAS-NEXT: .LBB89_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-NOZACAS-NEXT: lr.w.aqrl a3, (a2)
+; RV32IA-NOZACAS-NEXT: sub a5, a3, a1
+; RV32IA-NOZACAS-NEXT: xor a5, a3, a5
+; RV32IA-NOZACAS-NEXT: and a5, a5, a4
+; RV32IA-NOZACAS-NEXT: xor a5, a3, a5
+; RV32IA-NOZACAS-NEXT: sc.w.rl a5, a5, (a2)
+; RV32IA-NOZACAS-NEXT: bnez a5, .LBB89_1
+; RV32IA-NOZACAS-NEXT: # %bb.2:
+; RV32IA-NOZACAS-NEXT: srl a0, a3, a0
+; RV32IA-NOZACAS-NEXT: ret
;
; RV64I-LABEL: atomicrmw_sub_i16_seq_cst:
; RV64I: # %bb.0:
@@ -12808,6 +16738,27 @@ define i16 @atomicrmw_sub_i16_seq_cst(ptr %a, i16 %b) nounwind {
; RV64IA-NOZACAS-NEXT: srlw a0, a3, a0
; RV64IA-NOZACAS-NEXT: ret
;
+; RV32IA-ZACAS-LABEL: atomicrmw_sub_i16_seq_cst:
+; RV32IA-ZACAS: # %bb.0:
+; RV32IA-ZACAS-NEXT: andi a2, a0, -4
+; RV32IA-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-ZACAS-NEXT: lui a3, 16
+; RV32IA-ZACAS-NEXT: addi a3, a3, -1
+; RV32IA-ZACAS-NEXT: sll a4, a3, a0
+; RV32IA-ZACAS-NEXT: and a1, a1, a3
+; RV32IA-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-ZACAS-NEXT: .LBB89_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-ZACAS-NEXT: lr.w.aqrl a3, (a2)
+; RV32IA-ZACAS-NEXT: sub a5, a3, a1
+; RV32IA-ZACAS-NEXT: xor a5, a3, a5
+; RV32IA-ZACAS-NEXT: and a5, a5, a4
+; RV32IA-ZACAS-NEXT: xor a5, a3, a5
+; RV32IA-ZACAS-NEXT: sc.w.rl a5, a5, (a2)
+; RV32IA-ZACAS-NEXT: bnez a5, .LBB89_1
+; RV32IA-ZACAS-NEXT: # %bb.2:
+; RV32IA-ZACAS-NEXT: srl a0, a3, a0
+; RV32IA-ZACAS-NEXT: ret
+;
; RV64IA-ZACAS-LABEL: atomicrmw_sub_i16_seq_cst:
; RV64IA-ZACAS: # %bb.0:
; RV64IA-ZACAS-NEXT: andi a2, a0, -4
@@ -12829,6 +16780,18 @@ define i16 @atomicrmw_sub_i16_seq_cst(ptr %a, i16 %b) nounwind {
; RV64IA-ZACAS-NEXT: srlw a0, a3, a0
; RV64IA-ZACAS-NEXT: ret
;
+; RV32IA-WMO-ZABHA-LABEL: atomicrmw_sub_i16_seq_cst:
+; RV32IA-WMO-ZABHA: # %bb.0:
+; RV32IA-WMO-ZABHA-NEXT: neg a1, a1
+; RV32IA-WMO-ZABHA-NEXT: amoadd.h.aqrl a0, a1, (a0)
+; RV32IA-WMO-ZABHA-NEXT: ret
+;
+; RV32IA-TSO-ZABHA-LABEL: atomicrmw_sub_i16_seq_cst:
+; RV32IA-TSO-ZABHA: # %bb.0:
+; RV32IA-TSO-ZABHA-NEXT: neg a1, a1
+; RV32IA-TSO-ZABHA-NEXT: amoadd.h a0, a1, (a0)
+; RV32IA-TSO-ZABHA-NEXT: ret
+;
; RV64IA-WMO-ZABHA-LABEL: atomicrmw_sub_i16_seq_cst:
; RV64IA-WMO-ZABHA: # %bb.0:
; RV64IA-WMO-ZABHA-NEXT: neg a1, a1
@@ -12855,20 +16818,20 @@ define i16 @atomicrmw_and_i16_monotonic(ptr %a, i16 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
-; RV32IA-LABEL: atomicrmw_and_i16_monotonic:
-; RV32IA: # %bb.0:
-; RV32IA-NEXT: andi a2, a0, -4
-; RV32IA-NEXT: slli a0, a0, 3
-; RV32IA-NEXT: lui a3, 16
-; RV32IA-NEXT: addi a3, a3, -1
-; RV32IA-NEXT: sll a4, a3, a0
-; RV32IA-NEXT: and a1, a1, a3
-; RV32IA-NEXT: not a3, a4
-; RV32IA-NEXT: sll a1, a1, a0
-; RV32IA-NEXT: or a1, a1, a3
-; RV32IA-NEXT: amoand.w a1, a1, (a2)
-; RV32IA-NEXT: srl a0, a1, a0
-; RV32IA-NEXT: ret
+; RV32IA-NOZACAS-LABEL: atomicrmw_and_i16_monotonic:
+; RV32IA-NOZACAS: # %bb.0:
+; RV32IA-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-NOZACAS-NEXT: lui a3, 16
+; RV32IA-NOZACAS-NEXT: addi a3, a3, -1
+; RV32IA-NOZACAS-NEXT: sll a4, a3, a0
+; RV32IA-NOZACAS-NEXT: and a1, a1, a3
+; RV32IA-NOZACAS-NEXT: not a3, a4
+; RV32IA-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-NOZACAS-NEXT: or a1, a1, a3
+; RV32IA-NOZACAS-NEXT: amoand.w a1, a1, (a2)
+; RV32IA-NOZACAS-NEXT: srl a0, a1, a0
+; RV32IA-NOZACAS-NEXT: ret
;
; RV64I-LABEL: atomicrmw_and_i16_monotonic:
; RV64I: # %bb.0:
@@ -12895,6 +16858,21 @@ define i16 @atomicrmw_and_i16_monotonic(ptr %a, i16 %b) nounwind {
; RV64IA-NOZACAS-NEXT: srlw a0, a1, a0
; RV64IA-NOZACAS-NEXT: ret
;
+; RV32IA-ZACAS-LABEL: atomicrmw_and_i16_monotonic:
+; RV32IA-ZACAS: # %bb.0:
+; RV32IA-ZACAS-NEXT: andi a2, a0, -4
+; RV32IA-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-ZACAS-NEXT: lui a3, 16
+; RV32IA-ZACAS-NEXT: addi a3, a3, -1
+; RV32IA-ZACAS-NEXT: sll a4, a3, a0
+; RV32IA-ZACAS-NEXT: and a1, a1, a3
+; RV32IA-ZACAS-NEXT: not a3, a4
+; RV32IA-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-ZACAS-NEXT: or a1, a1, a3
+; RV32IA-ZACAS-NEXT: amoand.w a1, a1, (a2)
+; RV32IA-ZACAS-NEXT: srl a0, a1, a0
+; RV32IA-ZACAS-NEXT: ret
+;
; RV64IA-ZACAS-LABEL: atomicrmw_and_i16_monotonic:
; RV64IA-ZACAS: # %bb.0:
; RV64IA-ZACAS-NEXT: andi a2, a0, -4
@@ -12910,6 +16888,16 @@ define i16 @atomicrmw_and_i16_monotonic(ptr %a, i16 %b) nounwind {
; RV64IA-ZACAS-NEXT: srlw a0, a1, a0
; RV64IA-ZACAS-NEXT: ret
;
+; RV32IA-WMO-ZABHA-LABEL: atomicrmw_and_i16_monotonic:
+; RV32IA-WMO-ZABHA: # %bb.0:
+; RV32IA-WMO-ZABHA-NEXT: amoand.h a0, a1, (a0)
+; RV32IA-WMO-ZABHA-NEXT: ret
+;
+; RV32IA-TSO-ZABHA-LABEL: atomicrmw_and_i16_monotonic:
+; RV32IA-TSO-ZABHA: # %bb.0:
+; RV32IA-TSO-ZABHA-NEXT: amoand.h a0, a1, (a0)
+; RV32IA-TSO-ZABHA-NEXT: ret
+;
; RV64IA-WMO-ZABHA-LABEL: atomicrmw_and_i16_monotonic:
; RV64IA-WMO-ZABHA: # %bb.0:
; RV64IA-WMO-ZABHA-NEXT: amoand.h a0, a1, (a0)
@@ -12934,35 +16922,35 @@ define i16 @atomicrmw_and_i16_acquire(ptr %a, i16 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
-; RV32IA-WMO-LABEL: atomicrmw_and_i16_acquire:
-; RV32IA-WMO: # %bb.0:
-; RV32IA-WMO-NEXT: andi a2, a0, -4
-; RV32IA-WMO-NEXT: slli a0, a0, 3
-; RV32IA-WMO-NEXT: lui a3, 16
-; RV32IA-WMO-NEXT: addi a3, a3, -1
-; RV32IA-WMO-NEXT: sll a4, a3, a0
-; RV32IA-WMO-NEXT: and a1, a1, a3
-; RV32IA-WMO-NEXT: not a3, a4
-; RV32IA-WMO-NEXT: sll a1, a1, a0
-; RV32IA-WMO-NEXT: or a1, a1, a3
-; RV32IA-WMO-NEXT: amoand.w.aq a1, a1, (a2)
-; RV32IA-WMO-NEXT: srl a0, a1, a0
-; RV32IA-WMO-NEXT: ret
+; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_and_i16_acquire:
+; RV32IA-WMO-NOZACAS: # %bb.0:
+; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-WMO-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-NOZACAS-NEXT: lui a3, 16
+; RV32IA-WMO-NOZACAS-NEXT: addi a3, a3, -1
+; RV32IA-WMO-NOZACAS-NEXT: sll a4, a3, a0
+; RV32IA-WMO-NOZACAS-NEXT: and a1, a1, a3
+; RV32IA-WMO-NOZACAS-NEXT: not a3, a4
+; RV32IA-WMO-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-WMO-NOZACAS-NEXT: or a1, a1, a3
+; RV32IA-WMO-NOZACAS-NEXT: amoand.w.aq a1, a1, (a2)
+; RV32IA-WMO-NOZACAS-NEXT: srl a0, a1, a0
+; RV32IA-WMO-NOZACAS-NEXT: ret
;
-; RV32IA-TSO-LABEL: atomicrmw_and_i16_acquire:
-; RV32IA-TSO: # %bb.0:
-; RV32IA-TSO-NEXT: andi a2, a0, -4
-; RV32IA-TSO-NEXT: slli a0, a0, 3
-; RV32IA-TSO-NEXT: lui a3, 16
-; RV32IA-TSO-NEXT: addi a3, a3, -1
-; RV32IA-TSO-NEXT: sll a4, a3, a0
-; RV32IA-TSO-NEXT: and a1, a1, a3
-; RV32IA-TSO-NEXT: not a3, a4
-; RV32IA-TSO-NEXT: sll a1, a1, a0
-; RV32IA-TSO-NEXT: or a1, a1, a3
-; RV32IA-TSO-NEXT: amoand.w a1, a1, (a2)
-; RV32IA-TSO-NEXT: srl a0, a1, a0
-; RV32IA-TSO-NEXT: ret
+; RV32IA-TSO-NOZACAS-LABEL: atomicrmw_and_i16_acquire:
+; RV32IA-TSO-NOZACAS: # %bb.0:
+; RV32IA-TSO-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-TSO-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-NOZACAS-NEXT: lui a3, 16
+; RV32IA-TSO-NOZACAS-NEXT: addi a3, a3, -1
+; RV32IA-TSO-NOZACAS-NEXT: sll a4, a3, a0
+; RV32IA-TSO-NOZACAS-NEXT: and a1, a1, a3
+; RV32IA-TSO-NOZACAS-NEXT: not a3, a4
+; RV32IA-TSO-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-TSO-NOZACAS-NEXT: or a1, a1, a3
+; RV32IA-TSO-NOZACAS-NEXT: amoand.w a1, a1, (a2)
+; RV32IA-TSO-NOZACAS-NEXT: srl a0, a1, a0
+; RV32IA-TSO-NOZACAS-NEXT: ret
;
; RV64I-LABEL: atomicrmw_and_i16_acquire:
; RV64I: # %bb.0:
@@ -13004,6 +16992,36 @@ define i16 @atomicrmw_and_i16_acquire(ptr %a, i16 %b) nounwind {
; RV64IA-TSO-NOZACAS-NEXT: srlw a0, a1, a0
; RV64IA-TSO-NOZACAS-NEXT: ret
;
+; RV32IA-WMO-ZACAS-LABEL: atomicrmw_and_i16_acquire:
+; RV32IA-WMO-ZACAS: # %bb.0:
+; RV32IA-WMO-ZACAS-NEXT: andi a2, a0, -4
+; RV32IA-WMO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-ZACAS-NEXT: lui a3, 16
+; RV32IA-WMO-ZACAS-NEXT: addi a3, a3, -1
+; RV32IA-WMO-ZACAS-NEXT: sll a4, a3, a0
+; RV32IA-WMO-ZACAS-NEXT: and a1, a1, a3
+; RV32IA-WMO-ZACAS-NEXT: not a3, a4
+; RV32IA-WMO-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-WMO-ZACAS-NEXT: or a1, a1, a3
+; RV32IA-WMO-ZACAS-NEXT: amoand.w.aq a1, a1, (a2)
+; RV32IA-WMO-ZACAS-NEXT: srl a0, a1, a0
+; RV32IA-WMO-ZACAS-NEXT: ret
+;
+; RV32IA-TSO-ZACAS-LABEL: atomicrmw_and_i16_acquire:
+; RV32IA-TSO-ZACAS: # %bb.0:
+; RV32IA-TSO-ZACAS-NEXT: andi a2, a0, -4
+; RV32IA-TSO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-ZACAS-NEXT: lui a3, 16
+; RV32IA-TSO-ZACAS-NEXT: addi a3, a3, -1
+; RV32IA-TSO-ZACAS-NEXT: sll a4, a3, a0
+; RV32IA-TSO-ZACAS-NEXT: and a1, a1, a3
+; RV32IA-TSO-ZACAS-NEXT: not a3, a4
+; RV32IA-TSO-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-TSO-ZACAS-NEXT: or a1, a1, a3
+; RV32IA-TSO-ZACAS-NEXT: amoand.w a1, a1, (a2)
+; RV32IA-TSO-ZACAS-NEXT: srl a0, a1, a0
+; RV32IA-TSO-ZACAS-NEXT: ret
+;
; RV64IA-WMO-ZACAS-LABEL: atomicrmw_and_i16_acquire:
; RV64IA-WMO-ZACAS: # %bb.0:
; RV64IA-WMO-ZACAS-NEXT: andi a2, a0, -4
@@ -13034,6 +17052,16 @@ define i16 @atomicrmw_and_i16_acquire(ptr %a, i16 %b) nounwind {
; RV64IA-TSO-ZACAS-NEXT: srlw a0, a1, a0
; RV64IA-TSO-ZACAS-NEXT: ret
;
+; RV32IA-WMO-ZABHA-LABEL: atomicrmw_and_i16_acquire:
+; RV32IA-WMO-ZABHA: # %bb.0:
+; RV32IA-WMO-ZABHA-NEXT: amoand.h.aq a0, a1, (a0)
+; RV32IA-WMO-ZABHA-NEXT: ret
+;
+; RV32IA-TSO-ZABHA-LABEL: atomicrmw_and_i16_acquire:
+; RV32IA-TSO-ZABHA: # %bb.0:
+; RV32IA-TSO-ZABHA-NEXT: amoand.h a0, a1, (a0)
+; RV32IA-TSO-ZABHA-NEXT: ret
+;
; RV64IA-WMO-ZABHA-LABEL: atomicrmw_and_i16_acquire:
; RV64IA-WMO-ZABHA: # %bb.0:
; RV64IA-WMO-ZABHA-NEXT: amoand.h.aq a0, a1, (a0)
@@ -13058,35 +17086,35 @@ define i16 @atomicrmw_and_i16_release(ptr %a, i16 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
-; RV32IA-WMO-LABEL: atomicrmw_and_i16_release:
-; RV32IA-WMO: # %bb.0:
-; RV32IA-WMO-NEXT: andi a2, a0, -4
-; RV32IA-WMO-NEXT: slli a0, a0, 3
-; RV32IA-WMO-NEXT: lui a3, 16
-; RV32IA-WMO-NEXT: addi a3, a3, -1
-; RV32IA-WMO-NEXT: sll a4, a3, a0
-; RV32IA-WMO-NEXT: and a1, a1, a3
-; RV32IA-WMO-NEXT: not a3, a4
-; RV32IA-WMO-NEXT: sll a1, a1, a0
-; RV32IA-WMO-NEXT: or a1, a1, a3
-; RV32IA-WMO-NEXT: amoand.w.rl a1, a1, (a2)
-; RV32IA-WMO-NEXT: srl a0, a1, a0
-; RV32IA-WMO-NEXT: ret
+; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_and_i16_release:
+; RV32IA-WMO-NOZACAS: # %bb.0:
+; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-WMO-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-NOZACAS-NEXT: lui a3, 16
+; RV32IA-WMO-NOZACAS-NEXT: addi a3, a3, -1
+; RV32IA-WMO-NOZACAS-NEXT: sll a4, a3, a0
+; RV32IA-WMO-NOZACAS-NEXT: and a1, a1, a3
+; RV32IA-WMO-NOZACAS-NEXT: not a3, a4
+; RV32IA-WMO-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-WMO-NOZACAS-NEXT: or a1, a1, a3
+; RV32IA-WMO-NOZACAS-NEXT: amoand.w.rl a1, a1, (a2)
+; RV32IA-WMO-NOZACAS-NEXT: srl a0, a1, a0
+; RV32IA-WMO-NOZACAS-NEXT: ret
;
-; RV32IA-TSO-LABEL: atomicrmw_and_i16_release:
-; RV32IA-TSO: # %bb.0:
-; RV32IA-TSO-NEXT: andi a2, a0, -4
-; RV32IA-TSO-NEXT: slli a0, a0, 3
-; RV32IA-TSO-NEXT: lui a3, 16
-; RV32IA-TSO-NEXT: addi a3, a3, -1
-; RV32IA-TSO-NEXT: sll a4, a3, a0
-; RV32IA-TSO-NEXT: and a1, a1, a3
-; RV32IA-TSO-NEXT: not a3, a4
-; RV32IA-TSO-NEXT: sll a1, a1, a0
-; RV32IA-TSO-NEXT: or a1, a1, a3
-; RV32IA-TSO-NEXT: amoand.w a1, a1, (a2)
-; RV32IA-TSO-NEXT: srl a0, a1, a0
-; RV32IA-TSO-NEXT: ret
+; RV32IA-TSO-NOZACAS-LABEL: atomicrmw_and_i16_release:
+; RV32IA-TSO-NOZACAS: # %bb.0:
+; RV32IA-TSO-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-TSO-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-NOZACAS-NEXT: lui a3, 16
+; RV32IA-TSO-NOZACAS-NEXT: addi a3, a3, -1
+; RV32IA-TSO-NOZACAS-NEXT: sll a4, a3, a0
+; RV32IA-TSO-NOZACAS-NEXT: and a1, a1, a3
+; RV32IA-TSO-NOZACAS-NEXT: not a3, a4
+; RV32IA-TSO-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-TSO-NOZACAS-NEXT: or a1, a1, a3
+; RV32IA-TSO-NOZACAS-NEXT: amoand.w a1, a1, (a2)
+; RV32IA-TSO-NOZACAS-NEXT: srl a0, a1, a0
+; RV32IA-TSO-NOZACAS-NEXT: ret
;
; RV64I-LABEL: atomicrmw_and_i16_release:
; RV64I: # %bb.0:
@@ -13128,6 +17156,36 @@ define i16 @atomicrmw_and_i16_release(ptr %a, i16 %b) nounwind {
; RV64IA-TSO-NOZACAS-NEXT: srlw a0, a1, a0
; RV64IA-TSO-NOZACAS-NEXT: ret
;
+; RV32IA-WMO-ZACAS-LABEL: atomicrmw_and_i16_release:
+; RV32IA-WMO-ZACAS: # %bb.0:
+; RV32IA-WMO-ZACAS-NEXT: andi a2, a0, -4
+; RV32IA-WMO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-ZACAS-NEXT: lui a3, 16
+; RV32IA-WMO-ZACAS-NEXT: addi a3, a3, -1
+; RV32IA-WMO-ZACAS-NEXT: sll a4, a3, a0
+; RV32IA-WMO-ZACAS-NEXT: and a1, a1, a3
+; RV32IA-WMO-ZACAS-NEXT: not a3, a4
+; RV32IA-WMO-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-WMO-ZACAS-NEXT: or a1, a1, a3
+; RV32IA-WMO-ZACAS-NEXT: amoand.w.rl a1, a1, (a2)
+; RV32IA-WMO-ZACAS-NEXT: srl a0, a1, a0
+; RV32IA-WMO-ZACAS-NEXT: ret
+;
+; RV32IA-TSO-ZACAS-LABEL: atomicrmw_and_i16_release:
+; RV32IA-TSO-ZACAS: # %bb.0:
+; RV32IA-TSO-ZACAS-NEXT: andi a2, a0, -4
+; RV32IA-TSO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-ZACAS-NEXT: lui a3, 16
+; RV32IA-TSO-ZACAS-NEXT: addi a3, a3, -1
+; RV32IA-TSO-ZACAS-NEXT: sll a4, a3, a0
+; RV32IA-TSO-ZACAS-NEXT: and a1, a1, a3
+; RV32IA-TSO-ZACAS-NEXT: not a3, a4
+; RV32IA-TSO-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-TSO-ZACAS-NEXT: or a1, a1, a3
+; RV32IA-TSO-ZACAS-NEXT: amoand.w a1, a1, (a2)
+; RV32IA-TSO-ZACAS-NEXT: srl a0, a1, a0
+; RV32IA-TSO-ZACAS-NEXT: ret
+;
; RV64IA-WMO-ZACAS-LABEL: atomicrmw_and_i16_release:
; RV64IA-WMO-ZACAS: # %bb.0:
; RV64IA-WMO-ZACAS-NEXT: andi a2, a0, -4
@@ -13158,6 +17216,16 @@ define i16 @atomicrmw_and_i16_release(ptr %a, i16 %b) nounwind {
; RV64IA-TSO-ZACAS-NEXT: srlw a0, a1, a0
; RV64IA-TSO-ZACAS-NEXT: ret
;
+; RV32IA-WMO-ZABHA-LABEL: atomicrmw_and_i16_release:
+; RV32IA-WMO-ZABHA: # %bb.0:
+; RV32IA-WMO-ZABHA-NEXT: amoand.h.rl a0, a1, (a0)
+; RV32IA-WMO-ZABHA-NEXT: ret
+;
+; RV32IA-TSO-ZABHA-LABEL: atomicrmw_and_i16_release:
+; RV32IA-TSO-ZABHA: # %bb.0:
+; RV32IA-TSO-ZABHA-NEXT: amoand.h a0, a1, (a0)
+; RV32IA-TSO-ZABHA-NEXT: ret
+;
; RV64IA-WMO-ZABHA-LABEL: atomicrmw_and_i16_release:
; RV64IA-WMO-ZABHA: # %bb.0:
; RV64IA-WMO-ZABHA-NEXT: amoand.h.rl a0, a1, (a0)
@@ -13182,35 +17250,35 @@ define i16 @atomicrmw_and_i16_acq_rel(ptr %a, i16 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
-; RV32IA-WMO-LABEL: atomicrmw_and_i16_acq_rel:
-; RV32IA-WMO: # %bb.0:
-; RV32IA-WMO-NEXT: andi a2, a0, -4
-; RV32IA-WMO-NEXT: slli a0, a0, 3
-; RV32IA-WMO-NEXT: lui a3, 16
-; RV32IA-WMO-NEXT: addi a3, a3, -1
-; RV32IA-WMO-NEXT: sll a4, a3, a0
-; RV32IA-WMO-NEXT: and a1, a1, a3
-; RV32IA-WMO-NEXT: not a3, a4
-; RV32IA-WMO-NEXT: sll a1, a1, a0
-; RV32IA-WMO-NEXT: or a1, a1, a3
-; RV32IA-WMO-NEXT: amoand.w.aqrl a1, a1, (a2)
-; RV32IA-WMO-NEXT: srl a0, a1, a0
-; RV32IA-WMO-NEXT: ret
+; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_and_i16_acq_rel:
+; RV32IA-WMO-NOZACAS: # %bb.0:
+; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-WMO-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-NOZACAS-NEXT: lui a3, 16
+; RV32IA-WMO-NOZACAS-NEXT: addi a3, a3, -1
+; RV32IA-WMO-NOZACAS-NEXT: sll a4, a3, a0
+; RV32IA-WMO-NOZACAS-NEXT: and a1, a1, a3
+; RV32IA-WMO-NOZACAS-NEXT: not a3, a4
+; RV32IA-WMO-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-WMO-NOZACAS-NEXT: or a1, a1, a3
+; RV32IA-WMO-NOZACAS-NEXT: amoand.w.aqrl a1, a1, (a2)
+; RV32IA-WMO-NOZACAS-NEXT: srl a0, a1, a0
+; RV32IA-WMO-NOZACAS-NEXT: ret
;
-; RV32IA-TSO-LABEL: atomicrmw_and_i16_acq_rel:
-; RV32IA-TSO: # %bb.0:
-; RV32IA-TSO-NEXT: andi a2, a0, -4
-; RV32IA-TSO-NEXT: slli a0, a0, 3
-; RV32IA-TSO-NEXT: lui a3, 16
-; RV32IA-TSO-NEXT: addi a3, a3, -1
-; RV32IA-TSO-NEXT: sll a4, a3, a0
-; RV32IA-TSO-NEXT: and a1, a1, a3
-; RV32IA-TSO-NEXT: not a3, a4
-; RV32IA-TSO-NEXT: sll a1, a1, a0
-; RV32IA-TSO-NEXT: or a1, a1, a3
-; RV32IA-TSO-NEXT: amoand.w a1, a1, (a2)
-; RV32IA-TSO-NEXT: srl a0, a1, a0
-; RV32IA-TSO-NEXT: ret
+; RV32IA-TSO-NOZACAS-LABEL: atomicrmw_and_i16_acq_rel:
+; RV32IA-TSO-NOZACAS: # %bb.0:
+; RV32IA-TSO-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-TSO-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-NOZACAS-NEXT: lui a3, 16
+; RV32IA-TSO-NOZACAS-NEXT: addi a3, a3, -1
+; RV32IA-TSO-NOZACAS-NEXT: sll a4, a3, a0
+; RV32IA-TSO-NOZACAS-NEXT: and a1, a1, a3
+; RV32IA-TSO-NOZACAS-NEXT: not a3, a4
+; RV32IA-TSO-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-TSO-NOZACAS-NEXT: or a1, a1, a3
+; RV32IA-TSO-NOZACAS-NEXT: amoand.w a1, a1, (a2)
+; RV32IA-TSO-NOZACAS-NEXT: srl a0, a1, a0
+; RV32IA-TSO-NOZACAS-NEXT: ret
;
; RV64I-LABEL: atomicrmw_and_i16_acq_rel:
; RV64I: # %bb.0:
@@ -13252,6 +17320,36 @@ define i16 @atomicrmw_and_i16_acq_rel(ptr %a, i16 %b) nounwind {
; RV64IA-TSO-NOZACAS-NEXT: srlw a0, a1, a0
; RV64IA-TSO-NOZACAS-NEXT: ret
;
+; RV32IA-WMO-ZACAS-LABEL: atomicrmw_and_i16_acq_rel:
+; RV32IA-WMO-ZACAS: # %bb.0:
+; RV32IA-WMO-ZACAS-NEXT: andi a2, a0, -4
+; RV32IA-WMO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-ZACAS-NEXT: lui a3, 16
+; RV32IA-WMO-ZACAS-NEXT: addi a3, a3, -1
+; RV32IA-WMO-ZACAS-NEXT: sll a4, a3, a0
+; RV32IA-WMO-ZACAS-NEXT: and a1, a1, a3
+; RV32IA-WMO-ZACAS-NEXT: not a3, a4
+; RV32IA-WMO-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-WMO-ZACAS-NEXT: or a1, a1, a3
+; RV32IA-WMO-ZACAS-NEXT: amoand.w.aqrl a1, a1, (a2)
+; RV32IA-WMO-ZACAS-NEXT: srl a0, a1, a0
+; RV32IA-WMO-ZACAS-NEXT: ret
+;
+; RV32IA-TSO-ZACAS-LABEL: atomicrmw_and_i16_acq_rel:
+; RV32IA-TSO-ZACAS: # %bb.0:
+; RV32IA-TSO-ZACAS-NEXT: andi a2, a0, -4
+; RV32IA-TSO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-ZACAS-NEXT: lui a3, 16
+; RV32IA-TSO-ZACAS-NEXT: addi a3, a3, -1
+; RV32IA-TSO-ZACAS-NEXT: sll a4, a3, a0
+; RV32IA-TSO-ZACAS-NEXT: and a1, a1, a3
+; RV32IA-TSO-ZACAS-NEXT: not a3, a4
+; RV32IA-TSO-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-TSO-ZACAS-NEXT: or a1, a1, a3
+; RV32IA-TSO-ZACAS-NEXT: amoand.w a1, a1, (a2)
+; RV32IA-TSO-ZACAS-NEXT: srl a0, a1, a0
+; RV32IA-TSO-ZACAS-NEXT: ret
+;
; RV64IA-WMO-ZACAS-LABEL: atomicrmw_and_i16_acq_rel:
; RV64IA-WMO-ZACAS: # %bb.0:
; RV64IA-WMO-ZACAS-NEXT: andi a2, a0, -4
@@ -13282,6 +17380,16 @@ define i16 @atomicrmw_and_i16_acq_rel(ptr %a, i16 %b) nounwind {
; RV64IA-TSO-ZACAS-NEXT: srlw a0, a1, a0
; RV64IA-TSO-ZACAS-NEXT: ret
;
+; RV32IA-WMO-ZABHA-LABEL: atomicrmw_and_i16_acq_rel:
+; RV32IA-WMO-ZABHA: # %bb.0:
+; RV32IA-WMO-ZABHA-NEXT: amoand.h.aqrl a0, a1, (a0)
+; RV32IA-WMO-ZABHA-NEXT: ret
+;
+; RV32IA-TSO-ZABHA-LABEL: atomicrmw_and_i16_acq_rel:
+; RV32IA-TSO-ZABHA: # %bb.0:
+; RV32IA-TSO-ZABHA-NEXT: amoand.h a0, a1, (a0)
+; RV32IA-TSO-ZABHA-NEXT: ret
+;
; RV64IA-WMO-ZABHA-LABEL: atomicrmw_and_i16_acq_rel:
; RV64IA-WMO-ZABHA: # %bb.0:
; RV64IA-WMO-ZABHA-NEXT: amoand.h.aqrl a0, a1, (a0)
@@ -13306,35 +17414,35 @@ define i16 @atomicrmw_and_i16_seq_cst(ptr %a, i16 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
-; RV32IA-WMO-LABEL: atomicrmw_and_i16_seq_cst:
-; RV32IA-WMO: # %bb.0:
-; RV32IA-WMO-NEXT: andi a2, a0, -4
-; RV32IA-WMO-NEXT: slli a0, a0, 3
-; RV32IA-WMO-NEXT: lui a3, 16
-; RV32IA-WMO-NEXT: addi a3, a3, -1
-; RV32IA-WMO-NEXT: sll a4, a3, a0
-; RV32IA-WMO-NEXT: and a1, a1, a3
-; RV32IA-WMO-NEXT: not a3, a4
-; RV32IA-WMO-NEXT: sll a1, a1, a0
-; RV32IA-WMO-NEXT: or a1, a1, a3
-; RV32IA-WMO-NEXT: amoand.w.aqrl a1, a1, (a2)
-; RV32IA-WMO-NEXT: srl a0, a1, a0
-; RV32IA-WMO-NEXT: ret
+; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_and_i16_seq_cst:
+; RV32IA-WMO-NOZACAS: # %bb.0:
+; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-WMO-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-NOZACAS-NEXT: lui a3, 16
+; RV32IA-WMO-NOZACAS-NEXT: addi a3, a3, -1
+; RV32IA-WMO-NOZACAS-NEXT: sll a4, a3, a0
+; RV32IA-WMO-NOZACAS-NEXT: and a1, a1, a3
+; RV32IA-WMO-NOZACAS-NEXT: not a3, a4
+; RV32IA-WMO-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-WMO-NOZACAS-NEXT: or a1, a1, a3
+; RV32IA-WMO-NOZACAS-NEXT: amoand.w.aqrl a1, a1, (a2)
+; RV32IA-WMO-NOZACAS-NEXT: srl a0, a1, a0
+; RV32IA-WMO-NOZACAS-NEXT: ret
;
-; RV32IA-TSO-LABEL: atomicrmw_and_i16_seq_cst:
-; RV32IA-TSO: # %bb.0:
-; RV32IA-TSO-NEXT: andi a2, a0, -4
-; RV32IA-TSO-NEXT: slli a0, a0, 3
-; RV32IA-TSO-NEXT: lui a3, 16
-; RV32IA-TSO-NEXT: addi a3, a3, -1
-; RV32IA-TSO-NEXT: sll a4, a3, a0
-; RV32IA-TSO-NEXT: and a1, a1, a3
-; RV32IA-TSO-NEXT: not a3, a4
-; RV32IA-TSO-NEXT: sll a1, a1, a0
-; RV32IA-TSO-NEXT: or a1, a1, a3
-; RV32IA-TSO-NEXT: amoand.w a1, a1, (a2)
-; RV32IA-TSO-NEXT: srl a0, a1, a0
-; RV32IA-TSO-NEXT: ret
+; RV32IA-TSO-NOZACAS-LABEL: atomicrmw_and_i16_seq_cst:
+; RV32IA-TSO-NOZACAS: # %bb.0:
+; RV32IA-TSO-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-TSO-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-NOZACAS-NEXT: lui a3, 16
+; RV32IA-TSO-NOZACAS-NEXT: addi a3, a3, -1
+; RV32IA-TSO-NOZACAS-NEXT: sll a4, a3, a0
+; RV32IA-TSO-NOZACAS-NEXT: and a1, a1, a3
+; RV32IA-TSO-NOZACAS-NEXT: not a3, a4
+; RV32IA-TSO-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-TSO-NOZACAS-NEXT: or a1, a1, a3
+; RV32IA-TSO-NOZACAS-NEXT: amoand.w a1, a1, (a2)
+; RV32IA-TSO-NOZACAS-NEXT: srl a0, a1, a0
+; RV32IA-TSO-NOZACAS-NEXT: ret
;
; RV64I-LABEL: atomicrmw_and_i16_seq_cst:
; RV64I: # %bb.0:
@@ -13376,6 +17484,36 @@ define i16 @atomicrmw_and_i16_seq_cst(ptr %a, i16 %b) nounwind {
; RV64IA-TSO-NOZACAS-NEXT: srlw a0, a1, a0
; RV64IA-TSO-NOZACAS-NEXT: ret
;
+; RV32IA-WMO-ZACAS-LABEL: atomicrmw_and_i16_seq_cst:
+; RV32IA-WMO-ZACAS: # %bb.0:
+; RV32IA-WMO-ZACAS-NEXT: andi a2, a0, -4
+; RV32IA-WMO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-ZACAS-NEXT: lui a3, 16
+; RV32IA-WMO-ZACAS-NEXT: addi a3, a3, -1
+; RV32IA-WMO-ZACAS-NEXT: sll a4, a3, a0
+; RV32IA-WMO-ZACAS-NEXT: and a1, a1, a3
+; RV32IA-WMO-ZACAS-NEXT: not a3, a4
+; RV32IA-WMO-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-WMO-ZACAS-NEXT: or a1, a1, a3
+; RV32IA-WMO-ZACAS-NEXT: amoand.w.aqrl a1, a1, (a2)
+; RV32IA-WMO-ZACAS-NEXT: srl a0, a1, a0
+; RV32IA-WMO-ZACAS-NEXT: ret
+;
+; RV32IA-TSO-ZACAS-LABEL: atomicrmw_and_i16_seq_cst:
+; RV32IA-TSO-ZACAS: # %bb.0:
+; RV32IA-TSO-ZACAS-NEXT: andi a2, a0, -4
+; RV32IA-TSO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-ZACAS-NEXT: lui a3, 16
+; RV32IA-TSO-ZACAS-NEXT: addi a3, a3, -1
+; RV32IA-TSO-ZACAS-NEXT: sll a4, a3, a0
+; RV32IA-TSO-ZACAS-NEXT: and a1, a1, a3
+; RV32IA-TSO-ZACAS-NEXT: not a3, a4
+; RV32IA-TSO-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-TSO-ZACAS-NEXT: or a1, a1, a3
+; RV32IA-TSO-ZACAS-NEXT: amoand.w a1, a1, (a2)
+; RV32IA-TSO-ZACAS-NEXT: srl a0, a1, a0
+; RV32IA-TSO-ZACAS-NEXT: ret
+;
; RV64IA-WMO-ZACAS-LABEL: atomicrmw_and_i16_seq_cst:
; RV64IA-WMO-ZACAS: # %bb.0:
; RV64IA-WMO-ZACAS-NEXT: andi a2, a0, -4
@@ -13406,6 +17544,16 @@ define i16 @atomicrmw_and_i16_seq_cst(ptr %a, i16 %b) nounwind {
; RV64IA-TSO-ZACAS-NEXT: srlw a0, a1, a0
; RV64IA-TSO-ZACAS-NEXT: ret
;
+; RV32IA-WMO-ZABHA-LABEL: atomicrmw_and_i16_seq_cst:
+; RV32IA-WMO-ZABHA: # %bb.0:
+; RV32IA-WMO-ZABHA-NEXT: amoand.h.aqrl a0, a1, (a0)
+; RV32IA-WMO-ZABHA-NEXT: ret
+;
+; RV32IA-TSO-ZABHA-LABEL: atomicrmw_and_i16_seq_cst:
+; RV32IA-TSO-ZABHA: # %bb.0:
+; RV32IA-TSO-ZABHA-NEXT: amoand.h a0, a1, (a0)
+; RV32IA-TSO-ZABHA-NEXT: ret
+;
; RV64IA-WMO-ZABHA-LABEL: atomicrmw_and_i16_seq_cst:
; RV64IA-WMO-ZABHA: # %bb.0:
; RV64IA-WMO-ZABHA-NEXT: amoand.h.aqrl a0, a1, (a0)
@@ -13430,27 +17578,27 @@ define i16 @atomicrmw_nand_i16_monotonic(ptr %a, i16 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
-; RV32IA-LABEL: atomicrmw_nand_i16_monotonic:
-; RV32IA: # %bb.0:
-; RV32IA-NEXT: andi a2, a0, -4
-; RV32IA-NEXT: slli a0, a0, 3
-; RV32IA-NEXT: lui a3, 16
-; RV32IA-NEXT: addi a3, a3, -1
-; RV32IA-NEXT: sll a4, a3, a0
-; RV32IA-NEXT: and a1, a1, a3
-; RV32IA-NEXT: sll a1, a1, a0
-; RV32IA-NEXT: .LBB95_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-NEXT: lr.w a3, (a2)
-; RV32IA-NEXT: and a5, a3, a1
-; RV32IA-NEXT: not a5, a5
-; RV32IA-NEXT: xor a5, a3, a5
-; RV32IA-NEXT: and a5, a5, a4
-; RV32IA-NEXT: xor a5, a3, a5
-; RV32IA-NEXT: sc.w a5, a5, (a2)
-; RV32IA-NEXT: bnez a5, .LBB95_1
-; RV32IA-NEXT: # %bb.2:
-; RV32IA-NEXT: srl a0, a3, a0
-; RV32IA-NEXT: ret
+; RV32IA-NOZACAS-LABEL: atomicrmw_nand_i16_monotonic:
+; RV32IA-NOZACAS: # %bb.0:
+; RV32IA-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-NOZACAS-NEXT: lui a3, 16
+; RV32IA-NOZACAS-NEXT: addi a3, a3, -1
+; RV32IA-NOZACAS-NEXT: sll a4, a3, a0
+; RV32IA-NOZACAS-NEXT: and a1, a1, a3
+; RV32IA-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-NOZACAS-NEXT: .LBB95_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-NOZACAS-NEXT: lr.w a3, (a2)
+; RV32IA-NOZACAS-NEXT: and a5, a3, a1
+; RV32IA-NOZACAS-NEXT: not a5, a5
+; RV32IA-NOZACAS-NEXT: xor a5, a3, a5
+; RV32IA-NOZACAS-NEXT: and a5, a5, a4
+; RV32IA-NOZACAS-NEXT: xor a5, a3, a5
+; RV32IA-NOZACAS-NEXT: sc.w a5, a5, (a2)
+; RV32IA-NOZACAS-NEXT: bnez a5, .LBB95_1
+; RV32IA-NOZACAS-NEXT: # %bb.2:
+; RV32IA-NOZACAS-NEXT: srl a0, a3, a0
+; RV32IA-NOZACAS-NEXT: ret
;
; RV64I-LABEL: atomicrmw_nand_i16_monotonic:
; RV64I: # %bb.0:
@@ -13484,6 +17632,28 @@ define i16 @atomicrmw_nand_i16_monotonic(ptr %a, i16 %b) nounwind {
; RV64IA-NOZACAS-NEXT: srlw a0, a3, a0
; RV64IA-NOZACAS-NEXT: ret
;
+; RV32IA-ZACAS-LABEL: atomicrmw_nand_i16_monotonic:
+; RV32IA-ZACAS: # %bb.0:
+; RV32IA-ZACAS-NEXT: andi a2, a0, -4
+; RV32IA-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-ZACAS-NEXT: lui a3, 16
+; RV32IA-ZACAS-NEXT: addi a3, a3, -1
+; RV32IA-ZACAS-NEXT: sll a4, a3, a0
+; RV32IA-ZACAS-NEXT: and a1, a1, a3
+; RV32IA-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-ZACAS-NEXT: .LBB95_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-ZACAS-NEXT: lr.w a3, (a2)
+; RV32IA-ZACAS-NEXT: and a5, a3, a1
+; RV32IA-ZACAS-NEXT: not a5, a5
+; RV32IA-ZACAS-NEXT: xor a5, a3, a5
+; RV32IA-ZACAS-NEXT: and a5, a5, a4
+; RV32IA-ZACAS-NEXT: xor a5, a3, a5
+; RV32IA-ZACAS-NEXT: sc.w a5, a5, (a2)
+; RV32IA-ZACAS-NEXT: bnez a5, .LBB95_1
+; RV32IA-ZACAS-NEXT: # %bb.2:
+; RV32IA-ZACAS-NEXT: srl a0, a3, a0
+; RV32IA-ZACAS-NEXT: ret
+;
; RV64IA-ZACAS-LABEL: atomicrmw_nand_i16_monotonic:
; RV64IA-ZACAS: # %bb.0:
; RV64IA-ZACAS-NEXT: andi a2, a0, -4
@@ -13506,6 +17676,50 @@ define i16 @atomicrmw_nand_i16_monotonic(ptr %a, i16 %b) nounwind {
; RV64IA-ZACAS-NEXT: srlw a0, a3, a0
; RV64IA-ZACAS-NEXT: ret
;
+; RV32IA-WMO-ZABHA-NOZACAS-LABEL: atomicrmw_nand_i16_monotonic:
+; RV32IA-WMO-ZABHA-NOZACAS: # %bb.0:
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: lui a3, 16
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: addi a3, a3, -1
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: sll a4, a3, a0
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: and a1, a1, a3
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: .LBB95_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: lr.w a3, (a2)
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: and a5, a3, a1
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: not a5, a5
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: xor a5, a3, a5
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: and a5, a5, a4
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: xor a5, a3, a5
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: sc.w a5, a5, (a2)
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: bnez a5, .LBB95_1
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: # %bb.2:
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: srl a0, a3, a0
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: ret
+;
+; RV32IA-TSO-ZABHA-NOZACAS-LABEL: atomicrmw_nand_i16_monotonic:
+; RV32IA-TSO-ZABHA-NOZACAS: # %bb.0:
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: lui a3, 16
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: addi a3, a3, -1
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: sll a4, a3, a0
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: and a1, a1, a3
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: .LBB95_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: lr.w a3, (a2)
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: and a5, a3, a1
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: not a5, a5
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: xor a5, a3, a5
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: and a5, a5, a4
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: xor a5, a3, a5
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: sc.w a5, a5, (a2)
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: bnez a5, .LBB95_1
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: # %bb.2:
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: srl a0, a3, a0
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: ret
+;
; RV64IA-WMO-ZABHA-NOZACAS-LABEL: atomicrmw_nand_i16_monotonic:
; RV64IA-WMO-ZABHA-NOZACAS: # %bb.0:
; RV64IA-WMO-ZABHA-NOZACAS-NEXT: andi a2, a0, -4
@@ -13550,6 +17764,36 @@ define i16 @atomicrmw_nand_i16_monotonic(ptr %a, i16 %b) nounwind {
; RV64IA-TSO-ZABHA-NOZACAS-NEXT: srlw a0, a3, a0
; RV64IA-TSO-ZABHA-NOZACAS-NEXT: ret
;
+; RV32IA-WMO-ZABHA-ZACAS-LABEL: atomicrmw_nand_i16_monotonic:
+; RV32IA-WMO-ZABHA-ZACAS: # %bb.0:
+; RV32IA-WMO-ZABHA-ZACAS-NEXT: mv a2, a0
+; RV32IA-WMO-ZABHA-ZACAS-NEXT: lhu a0, 0(a0)
+; RV32IA-WMO-ZABHA-ZACAS-NEXT: .LBB95_1: # %atomicrmw.start
+; RV32IA-WMO-ZABHA-ZACAS-NEXT: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-ZABHA-ZACAS-NEXT: and a3, a0, a1
+; RV32IA-WMO-ZABHA-ZACAS-NEXT: not a3, a3
+; RV32IA-WMO-ZABHA-ZACAS-NEXT: slli a4, a0, 16
+; RV32IA-WMO-ZABHA-ZACAS-NEXT: amocas.h a0, a3, (a2)
+; RV32IA-WMO-ZABHA-ZACAS-NEXT: srai a4, a4, 16
+; RV32IA-WMO-ZABHA-ZACAS-NEXT: bne a0, a4, .LBB95_1
+; RV32IA-WMO-ZABHA-ZACAS-NEXT: # %bb.2: # %atomicrmw.end
+; RV32IA-WMO-ZABHA-ZACAS-NEXT: ret
+;
+; RV32IA-TSO-ZABHA-ZACAS-LABEL: atomicrmw_nand_i16_monotonic:
+; RV32IA-TSO-ZABHA-ZACAS: # %bb.0:
+; RV32IA-TSO-ZABHA-ZACAS-NEXT: mv a2, a0
+; RV32IA-TSO-ZABHA-ZACAS-NEXT: lhu a0, 0(a0)
+; RV32IA-TSO-ZABHA-ZACAS-NEXT: .LBB95_1: # %atomicrmw.start
+; RV32IA-TSO-ZABHA-ZACAS-NEXT: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-ZABHA-ZACAS-NEXT: and a3, a0, a1
+; RV32IA-TSO-ZABHA-ZACAS-NEXT: not a3, a3
+; RV32IA-TSO-ZABHA-ZACAS-NEXT: slli a4, a0, 16
+; RV32IA-TSO-ZABHA-ZACAS-NEXT: amocas.h a0, a3, (a2)
+; RV32IA-TSO-ZABHA-ZACAS-NEXT: srai a4, a4, 16
+; RV32IA-TSO-ZABHA-ZACAS-NEXT: bne a0, a4, .LBB95_1
+; RV32IA-TSO-ZABHA-ZACAS-NEXT: # %bb.2: # %atomicrmw.end
+; RV32IA-TSO-ZABHA-ZACAS-NEXT: ret
+;
; RV64IA-WMO-ZABHA-ZACAS-LABEL: atomicrmw_nand_i16_monotonic:
; RV64IA-WMO-ZABHA-ZACAS: # %bb.0:
; RV64IA-WMO-ZABHA-ZACAS-NEXT: mv a2, a0
@@ -13594,49 +17838,49 @@ define i16 @atomicrmw_nand_i16_acquire(ptr %a, i16 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
-; RV32IA-WMO-LABEL: atomicrmw_nand_i16_acquire:
-; RV32IA-WMO: # %bb.0:
-; RV32IA-WMO-NEXT: andi a2, a0, -4
-; RV32IA-WMO-NEXT: slli a0, a0, 3
-; RV32IA-WMO-NEXT: lui a3, 16
-; RV32IA-WMO-NEXT: addi a3, a3, -1
-; RV32IA-WMO-NEXT: sll a4, a3, a0
-; RV32IA-WMO-NEXT: and a1, a1, a3
-; RV32IA-WMO-NEXT: sll a1, a1, a0
-; RV32IA-WMO-NEXT: .LBB96_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-WMO-NEXT: lr.w.aq a3, (a2)
-; RV32IA-WMO-NEXT: and a5, a3, a1
-; RV32IA-WMO-NEXT: not a5, a5
-; RV32IA-WMO-NEXT: xor a5, a3, a5
-; RV32IA-WMO-NEXT: and a5, a5, a4
-; RV32IA-WMO-NEXT: xor a5, a3, a5
-; RV32IA-WMO-NEXT: sc.w a5, a5, (a2)
-; RV32IA-WMO-NEXT: bnez a5, .LBB96_1
-; RV32IA-WMO-NEXT: # %bb.2:
-; RV32IA-WMO-NEXT: srl a0, a3, a0
-; RV32IA-WMO-NEXT: ret
+; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_nand_i16_acquire:
+; RV32IA-WMO-NOZACAS: # %bb.0:
+; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-WMO-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-NOZACAS-NEXT: lui a3, 16
+; RV32IA-WMO-NOZACAS-NEXT: addi a3, a3, -1
+; RV32IA-WMO-NOZACAS-NEXT: sll a4, a3, a0
+; RV32IA-WMO-NOZACAS-NEXT: and a1, a1, a3
+; RV32IA-WMO-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-WMO-NOZACAS-NEXT: .LBB96_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-NOZACAS-NEXT: lr.w.aq a3, (a2)
+; RV32IA-WMO-NOZACAS-NEXT: and a5, a3, a1
+; RV32IA-WMO-NOZACAS-NEXT: not a5, a5
+; RV32IA-WMO-NOZACAS-NEXT: xor a5, a3, a5
+; RV32IA-WMO-NOZACAS-NEXT: and a5, a5, a4
+; RV32IA-WMO-NOZACAS-NEXT: xor a5, a3, a5
+; RV32IA-WMO-NOZACAS-NEXT: sc.w a5, a5, (a2)
+; RV32IA-WMO-NOZACAS-NEXT: bnez a5, .LBB96_1
+; RV32IA-WMO-NOZACAS-NEXT: # %bb.2:
+; RV32IA-WMO-NOZACAS-NEXT: srl a0, a3, a0
+; RV32IA-WMO-NOZACAS-NEXT: ret
;
-; RV32IA-TSO-LABEL: atomicrmw_nand_i16_acquire:
-; RV32IA-TSO: # %bb.0:
-; RV32IA-TSO-NEXT: andi a2, a0, -4
-; RV32IA-TSO-NEXT: slli a0, a0, 3
-; RV32IA-TSO-NEXT: lui a3, 16
-; RV32IA-TSO-NEXT: addi a3, a3, -1
-; RV32IA-TSO-NEXT: sll a4, a3, a0
-; RV32IA-TSO-NEXT: and a1, a1, a3
-; RV32IA-TSO-NEXT: sll a1, a1, a0
-; RV32IA-TSO-NEXT: .LBB96_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-TSO-NEXT: lr.w a3, (a2)
-; RV32IA-TSO-NEXT: and a5, a3, a1
-; RV32IA-TSO-NEXT: not a5, a5
-; RV32IA-TSO-NEXT: xor a5, a3, a5
-; RV32IA-TSO-NEXT: and a5, a5, a4
-; RV32IA-TSO-NEXT: xor a5, a3, a5
-; RV32IA-TSO-NEXT: sc.w a5, a5, (a2)
-; RV32IA-TSO-NEXT: bnez a5, .LBB96_1
-; RV32IA-TSO-NEXT: # %bb.2:
-; RV32IA-TSO-NEXT: srl a0, a3, a0
-; RV32IA-TSO-NEXT: ret
+; RV32IA-TSO-NOZACAS-LABEL: atomicrmw_nand_i16_acquire:
+; RV32IA-TSO-NOZACAS: # %bb.0:
+; RV32IA-TSO-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-TSO-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-NOZACAS-NEXT: lui a3, 16
+; RV32IA-TSO-NOZACAS-NEXT: addi a3, a3, -1
+; RV32IA-TSO-NOZACAS-NEXT: sll a4, a3, a0
+; RV32IA-TSO-NOZACAS-NEXT: and a1, a1, a3
+; RV32IA-TSO-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-TSO-NOZACAS-NEXT: .LBB96_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-NOZACAS-NEXT: lr.w a3, (a2)
+; RV32IA-TSO-NOZACAS-NEXT: and a5, a3, a1
+; RV32IA-TSO-NOZACAS-NEXT: not a5, a5
+; RV32IA-TSO-NOZACAS-NEXT: xor a5, a3, a5
+; RV32IA-TSO-NOZACAS-NEXT: and a5, a5, a4
+; RV32IA-TSO-NOZACAS-NEXT: xor a5, a3, a5
+; RV32IA-TSO-NOZACAS-NEXT: sc.w a5, a5, (a2)
+; RV32IA-TSO-NOZACAS-NEXT: bnez a5, .LBB96_1
+; RV32IA-TSO-NOZACAS-NEXT: # %bb.2:
+; RV32IA-TSO-NOZACAS-NEXT: srl a0, a3, a0
+; RV32IA-TSO-NOZACAS-NEXT: ret
;
; RV64I-LABEL: atomicrmw_nand_i16_acquire:
; RV64I: # %bb.0:
@@ -13692,6 +17936,50 @@ define i16 @atomicrmw_nand_i16_acquire(ptr %a, i16 %b) nounwind {
; RV64IA-TSO-NOZACAS-NEXT: srlw a0, a3, a0
; RV64IA-TSO-NOZACAS-NEXT: ret
;
+; RV32IA-WMO-ZACAS-LABEL: atomicrmw_nand_i16_acquire:
+; RV32IA-WMO-ZACAS: # %bb.0:
+; RV32IA-WMO-ZACAS-NEXT: andi a2, a0, -4
+; RV32IA-WMO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-ZACAS-NEXT: lui a3, 16
+; RV32IA-WMO-ZACAS-NEXT: addi a3, a3, -1
+; RV32IA-WMO-ZACAS-NEXT: sll a4, a3, a0
+; RV32IA-WMO-ZACAS-NEXT: and a1, a1, a3
+; RV32IA-WMO-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-WMO-ZACAS-NEXT: .LBB96_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-ZACAS-NEXT: lr.w.aq a3, (a2)
+; RV32IA-WMO-ZACAS-NEXT: and a5, a3, a1
+; RV32IA-WMO-ZACAS-NEXT: not a5, a5
+; RV32IA-WMO-ZACAS-NEXT: xor a5, a3, a5
+; RV32IA-WMO-ZACAS-NEXT: and a5, a5, a4
+; RV32IA-WMO-ZACAS-NEXT: xor a5, a3, a5
+; RV32IA-WMO-ZACAS-NEXT: sc.w a5, a5, (a2)
+; RV32IA-WMO-ZACAS-NEXT: bnez a5, .LBB96_1
+; RV32IA-WMO-ZACAS-NEXT: # %bb.2:
+; RV32IA-WMO-ZACAS-NEXT: srl a0, a3, a0
+; RV32IA-WMO-ZACAS-NEXT: ret
+;
+; RV32IA-TSO-ZACAS-LABEL: atomicrmw_nand_i16_acquire:
+; RV32IA-TSO-ZACAS: # %bb.0:
+; RV32IA-TSO-ZACAS-NEXT: andi a2, a0, -4
+; RV32IA-TSO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-ZACAS-NEXT: lui a3, 16
+; RV32IA-TSO-ZACAS-NEXT: addi a3, a3, -1
+; RV32IA-TSO-ZACAS-NEXT: sll a4, a3, a0
+; RV32IA-TSO-ZACAS-NEXT: and a1, a1, a3
+; RV32IA-TSO-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-TSO-ZACAS-NEXT: .LBB96_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-ZACAS-NEXT: lr.w a3, (a2)
+; RV32IA-TSO-ZACAS-NEXT: and a5, a3, a1
+; RV32IA-TSO-ZACAS-NEXT: not a5, a5
+; RV32IA-TSO-ZACAS-NEXT: xor a5, a3, a5
+; RV32IA-TSO-ZACAS-NEXT: and a5, a5, a4
+; RV32IA-TSO-ZACAS-NEXT: xor a5, a3, a5
+; RV32IA-TSO-ZACAS-NEXT: sc.w a5, a5, (a2)
+; RV32IA-TSO-ZACAS-NEXT: bnez a5, .LBB96_1
+; RV32IA-TSO-ZACAS-NEXT: # %bb.2:
+; RV32IA-TSO-ZACAS-NEXT: srl a0, a3, a0
+; RV32IA-TSO-ZACAS-NEXT: ret
+;
; RV64IA-WMO-ZACAS-LABEL: atomicrmw_nand_i16_acquire:
; RV64IA-WMO-ZACAS: # %bb.0:
; RV64IA-WMO-ZACAS-NEXT: andi a2, a0, -4
@@ -13736,6 +18024,50 @@ define i16 @atomicrmw_nand_i16_acquire(ptr %a, i16 %b) nounwind {
; RV64IA-TSO-ZACAS-NEXT: srlw a0, a3, a0
; RV64IA-TSO-ZACAS-NEXT: ret
;
+; RV32IA-WMO-ZABHA-NOZACAS-LABEL: atomicrmw_nand_i16_acquire:
+; RV32IA-WMO-ZABHA-NOZACAS: # %bb.0:
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: lui a3, 16
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: addi a3, a3, -1
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: sll a4, a3, a0
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: and a1, a1, a3
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: .LBB96_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: lr.w.aq a3, (a2)
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: and a5, a3, a1
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: not a5, a5
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: xor a5, a3, a5
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: and a5, a5, a4
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: xor a5, a3, a5
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: sc.w a5, a5, (a2)
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: bnez a5, .LBB96_1
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: # %bb.2:
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: srl a0, a3, a0
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: ret
+;
+; RV32IA-TSO-ZABHA-NOZACAS-LABEL: atomicrmw_nand_i16_acquire:
+; RV32IA-TSO-ZABHA-NOZACAS: # %bb.0:
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: lui a3, 16
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: addi a3, a3, -1
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: sll a4, a3, a0
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: and a1, a1, a3
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: .LBB96_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: lr.w a3, (a2)
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: and a5, a3, a1
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: not a5, a5
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: xor a5, a3, a5
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: and a5, a5, a4
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: xor a5, a3, a5
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: sc.w a5, a5, (a2)
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: bnez a5, .LBB96_1
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: # %bb.2:
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: srl a0, a3, a0
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: ret
+;
; RV64IA-WMO-ZABHA-NOZACAS-LABEL: atomicrmw_nand_i16_acquire:
; RV64IA-WMO-ZABHA-NOZACAS: # %bb.0:
; RV64IA-WMO-ZABHA-NOZACAS-NEXT: andi a2, a0, -4
@@ -13780,6 +18112,36 @@ define i16 @atomicrmw_nand_i16_acquire(ptr %a, i16 %b) nounwind {
; RV64IA-TSO-ZABHA-NOZACAS-NEXT: srlw a0, a3, a0
; RV64IA-TSO-ZABHA-NOZACAS-NEXT: ret
;
+; RV32IA-WMO-ZABHA-ZACAS-LABEL: atomicrmw_nand_i16_acquire:
+; RV32IA-WMO-ZABHA-ZACAS: # %bb.0:
+; RV32IA-WMO-ZABHA-ZACAS-NEXT: mv a2, a0
+; RV32IA-WMO-ZABHA-ZACAS-NEXT: lhu a0, 0(a0)
+; RV32IA-WMO-ZABHA-ZACAS-NEXT: .LBB96_1: # %atomicrmw.start
+; RV32IA-WMO-ZABHA-ZACAS-NEXT: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-ZABHA-ZACAS-NEXT: and a3, a0, a1
+; RV32IA-WMO-ZABHA-ZACAS-NEXT: not a3, a3
+; RV32IA-WMO-ZABHA-ZACAS-NEXT: slli a4, a0, 16
+; RV32IA-WMO-ZABHA-ZACAS-NEXT: amocas.h.aq a0, a3, (a2)
+; RV32IA-WMO-ZABHA-ZACAS-NEXT: srai a4, a4, 16
+; RV32IA-WMO-ZABHA-ZACAS-NEXT: bne a0, a4, .LBB96_1
+; RV32IA-WMO-ZABHA-ZACAS-NEXT: # %bb.2: # %atomicrmw.end
+; RV32IA-WMO-ZABHA-ZACAS-NEXT: ret
+;
+; RV32IA-TSO-ZABHA-ZACAS-LABEL: atomicrmw_nand_i16_acquire:
+; RV32IA-TSO-ZABHA-ZACAS: # %bb.0:
+; RV32IA-TSO-ZABHA-ZACAS-NEXT: mv a2, a0
+; RV32IA-TSO-ZABHA-ZACAS-NEXT: lhu a0, 0(a0)
+; RV32IA-TSO-ZABHA-ZACAS-NEXT: .LBB96_1: # %atomicrmw.start
+; RV32IA-TSO-ZABHA-ZACAS-NEXT: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-ZABHA-ZACAS-NEXT: and a3, a0, a1
+; RV32IA-TSO-ZABHA-ZACAS-NEXT: not a3, a3
+; RV32IA-TSO-ZABHA-ZACAS-NEXT: slli a4, a0, 16
+; RV32IA-TSO-ZABHA-ZACAS-NEXT: amocas.h a0, a3, (a2)
+; RV32IA-TSO-ZABHA-ZACAS-NEXT: srai a4, a4, 16
+; RV32IA-TSO-ZABHA-ZACAS-NEXT: bne a0, a4, .LBB96_1
+; RV32IA-TSO-ZABHA-ZACAS-NEXT: # %bb.2: # %atomicrmw.end
+; RV32IA-TSO-ZABHA-ZACAS-NEXT: ret
+;
; RV64IA-WMO-ZABHA-ZACAS-LABEL: atomicrmw_nand_i16_acquire:
; RV64IA-WMO-ZABHA-ZACAS: # %bb.0:
; RV64IA-WMO-ZABHA-ZACAS-NEXT: mv a2, a0
@@ -13824,49 +18186,49 @@ define i16 @atomicrmw_nand_i16_release(ptr %a, i16 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
-; RV32IA-WMO-LABEL: atomicrmw_nand_i16_release:
-; RV32IA-WMO: # %bb.0:
-; RV32IA-WMO-NEXT: andi a2, a0, -4
-; RV32IA-WMO-NEXT: slli a0, a0, 3
-; RV32IA-WMO-NEXT: lui a3, 16
-; RV32IA-WMO-NEXT: addi a3, a3, -1
-; RV32IA-WMO-NEXT: sll a4, a3, a0
-; RV32IA-WMO-NEXT: and a1, a1, a3
-; RV32IA-WMO-NEXT: sll a1, a1, a0
-; RV32IA-WMO-NEXT: .LBB97_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-WMO-NEXT: lr.w a3, (a2)
-; RV32IA-WMO-NEXT: and a5, a3, a1
-; RV32IA-WMO-NEXT: not a5, a5
-; RV32IA-WMO-NEXT: xor a5, a3, a5
-; RV32IA-WMO-NEXT: and a5, a5, a4
-; RV32IA-WMO-NEXT: xor a5, a3, a5
-; RV32IA-WMO-NEXT: sc.w.rl a5, a5, (a2)
-; RV32IA-WMO-NEXT: bnez a5, .LBB97_1
-; RV32IA-WMO-NEXT: # %bb.2:
-; RV32IA-WMO-NEXT: srl a0, a3, a0
-; RV32IA-WMO-NEXT: ret
+; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_nand_i16_release:
+; RV32IA-WMO-NOZACAS: # %bb.0:
+; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-WMO-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-NOZACAS-NEXT: lui a3, 16
+; RV32IA-WMO-NOZACAS-NEXT: addi a3, a3, -1
+; RV32IA-WMO-NOZACAS-NEXT: sll a4, a3, a0
+; RV32IA-WMO-NOZACAS-NEXT: and a1, a1, a3
+; RV32IA-WMO-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-WMO-NOZACAS-NEXT: .LBB97_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-NOZACAS-NEXT: lr.w a3, (a2)
+; RV32IA-WMO-NOZACAS-NEXT: and a5, a3, a1
+; RV32IA-WMO-NOZACAS-NEXT: not a5, a5
+; RV32IA-WMO-NOZACAS-NEXT: xor a5, a3, a5
+; RV32IA-WMO-NOZACAS-NEXT: and a5, a5, a4
+; RV32IA-WMO-NOZACAS-NEXT: xor a5, a3, a5
+; RV32IA-WMO-NOZACAS-NEXT: sc.w.rl a5, a5, (a2)
+; RV32IA-WMO-NOZACAS-NEXT: bnez a5, .LBB97_1
+; RV32IA-WMO-NOZACAS-NEXT: # %bb.2:
+; RV32IA-WMO-NOZACAS-NEXT: srl a0, a3, a0
+; RV32IA-WMO-NOZACAS-NEXT: ret
;
-; RV32IA-TSO-LABEL: atomicrmw_nand_i16_release:
-; RV32IA-TSO: # %bb.0:
-; RV32IA-TSO-NEXT: andi a2, a0, -4
-; RV32IA-TSO-NEXT: slli a0, a0, 3
-; RV32IA-TSO-NEXT: lui a3, 16
-; RV32IA-TSO-NEXT: addi a3, a3, -1
-; RV32IA-TSO-NEXT: sll a4, a3, a0
-; RV32IA-TSO-NEXT: and a1, a1, a3
-; RV32IA-TSO-NEXT: sll a1, a1, a0
-; RV32IA-TSO-NEXT: .LBB97_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-TSO-NEXT: lr.w a3, (a2)
-; RV32IA-TSO-NEXT: and a5, a3, a1
-; RV32IA-TSO-NEXT: not a5, a5
-; RV32IA-TSO-NEXT: xor a5, a3, a5
-; RV32IA-TSO-NEXT: and a5, a5, a4
-; RV32IA-TSO-NEXT: xor a5, a3, a5
-; RV32IA-TSO-NEXT: sc.w a5, a5, (a2)
-; RV32IA-TSO-NEXT: bnez a5, .LBB97_1
-; RV32IA-TSO-NEXT: # %bb.2:
-; RV32IA-TSO-NEXT: srl a0, a3, a0
-; RV32IA-TSO-NEXT: ret
+; RV32IA-TSO-NOZACAS-LABEL: atomicrmw_nand_i16_release:
+; RV32IA-TSO-NOZACAS: # %bb.0:
+; RV32IA-TSO-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-TSO-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-NOZACAS-NEXT: lui a3, 16
+; RV32IA-TSO-NOZACAS-NEXT: addi a3, a3, -1
+; RV32IA-TSO-NOZACAS-NEXT: sll a4, a3, a0
+; RV32IA-TSO-NOZACAS-NEXT: and a1, a1, a3
+; RV32IA-TSO-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-TSO-NOZACAS-NEXT: .LBB97_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-NOZACAS-NEXT: lr.w a3, (a2)
+; RV32IA-TSO-NOZACAS-NEXT: and a5, a3, a1
+; RV32IA-TSO-NOZACAS-NEXT: not a5, a5
+; RV32IA-TSO-NOZACAS-NEXT: xor a5, a3, a5
+; RV32IA-TSO-NOZACAS-NEXT: and a5, a5, a4
+; RV32IA-TSO-NOZACAS-NEXT: xor a5, a3, a5
+; RV32IA-TSO-NOZACAS-NEXT: sc.w a5, a5, (a2)
+; RV32IA-TSO-NOZACAS-NEXT: bnez a5, .LBB97_1
+; RV32IA-TSO-NOZACAS-NEXT: # %bb.2:
+; RV32IA-TSO-NOZACAS-NEXT: srl a0, a3, a0
+; RV32IA-TSO-NOZACAS-NEXT: ret
;
; RV64I-LABEL: atomicrmw_nand_i16_release:
; RV64I: # %bb.0:
@@ -13922,6 +18284,50 @@ define i16 @atomicrmw_nand_i16_release(ptr %a, i16 %b) nounwind {
; RV64IA-TSO-NOZACAS-NEXT: srlw a0, a3, a0
; RV64IA-TSO-NOZACAS-NEXT: ret
;
+; RV32IA-WMO-ZACAS-LABEL: atomicrmw_nand_i16_release:
+; RV32IA-WMO-ZACAS: # %bb.0:
+; RV32IA-WMO-ZACAS-NEXT: andi a2, a0, -4
+; RV32IA-WMO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-ZACAS-NEXT: lui a3, 16
+; RV32IA-WMO-ZACAS-NEXT: addi a3, a3, -1
+; RV32IA-WMO-ZACAS-NEXT: sll a4, a3, a0
+; RV32IA-WMO-ZACAS-NEXT: and a1, a1, a3
+; RV32IA-WMO-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-WMO-ZACAS-NEXT: .LBB97_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-ZACAS-NEXT: lr.w a3, (a2)
+; RV32IA-WMO-ZACAS-NEXT: and a5, a3, a1
+; RV32IA-WMO-ZACAS-NEXT: not a5, a5
+; RV32IA-WMO-ZACAS-NEXT: xor a5, a3, a5
+; RV32IA-WMO-ZACAS-NEXT: and a5, a5, a4
+; RV32IA-WMO-ZACAS-NEXT: xor a5, a3, a5
+; RV32IA-WMO-ZACAS-NEXT: sc.w.rl a5, a5, (a2)
+; RV32IA-WMO-ZACAS-NEXT: bnez a5, .LBB97_1
+; RV32IA-WMO-ZACAS-NEXT: # %bb.2:
+; RV32IA-WMO-ZACAS-NEXT: srl a0, a3, a0
+; RV32IA-WMO-ZACAS-NEXT: ret
+;
+; RV32IA-TSO-ZACAS-LABEL: atomicrmw_nand_i16_release:
+; RV32IA-TSO-ZACAS: # %bb.0:
+; RV32IA-TSO-ZACAS-NEXT: andi a2, a0, -4
+; RV32IA-TSO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-ZACAS-NEXT: lui a3, 16
+; RV32IA-TSO-ZACAS-NEXT: addi a3, a3, -1
+; RV32IA-TSO-ZACAS-NEXT: sll a4, a3, a0
+; RV32IA-TSO-ZACAS-NEXT: and a1, a1, a3
+; RV32IA-TSO-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-TSO-ZACAS-NEXT: .LBB97_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-ZACAS-NEXT: lr.w a3, (a2)
+; RV32IA-TSO-ZACAS-NEXT: and a5, a3, a1
+; RV32IA-TSO-ZACAS-NEXT: not a5, a5
+; RV32IA-TSO-ZACAS-NEXT: xor a5, a3, a5
+; RV32IA-TSO-ZACAS-NEXT: and a5, a5, a4
+; RV32IA-TSO-ZACAS-NEXT: xor a5, a3, a5
+; RV32IA-TSO-ZACAS-NEXT: sc.w a5, a5, (a2)
+; RV32IA-TSO-ZACAS-NEXT: bnez a5, .LBB97_1
+; RV32IA-TSO-ZACAS-NEXT: # %bb.2:
+; RV32IA-TSO-ZACAS-NEXT: srl a0, a3, a0
+; RV32IA-TSO-ZACAS-NEXT: ret
+;
; RV64IA-WMO-ZACAS-LABEL: atomicrmw_nand_i16_release:
; RV64IA-WMO-ZACAS: # %bb.0:
; RV64IA-WMO-ZACAS-NEXT: andi a2, a0, -4
@@ -13966,6 +18372,50 @@ define i16 @atomicrmw_nand_i16_release(ptr %a, i16 %b) nounwind {
; RV64IA-TSO-ZACAS-NEXT: srlw a0, a3, a0
; RV64IA-TSO-ZACAS-NEXT: ret
;
+; RV32IA-WMO-ZABHA-NOZACAS-LABEL: atomicrmw_nand_i16_release:
+; RV32IA-WMO-ZABHA-NOZACAS: # %bb.0:
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: lui a3, 16
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: addi a3, a3, -1
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: sll a4, a3, a0
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: and a1, a1, a3
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: .LBB97_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: lr.w a3, (a2)
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: and a5, a3, a1
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: not a5, a5
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: xor a5, a3, a5
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: and a5, a5, a4
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: xor a5, a3, a5
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: sc.w.rl a5, a5, (a2)
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: bnez a5, .LBB97_1
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: # %bb.2:
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: srl a0, a3, a0
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: ret
+;
+; RV32IA-TSO-ZABHA-NOZACAS-LABEL: atomicrmw_nand_i16_release:
+; RV32IA-TSO-ZABHA-NOZACAS: # %bb.0:
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: lui a3, 16
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: addi a3, a3, -1
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: sll a4, a3, a0
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: and a1, a1, a3
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: .LBB97_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: lr.w a3, (a2)
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: and a5, a3, a1
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: not a5, a5
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: xor a5, a3, a5
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: and a5, a5, a4
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: xor a5, a3, a5
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: sc.w a5, a5, (a2)
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: bnez a5, .LBB97_1
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: # %bb.2:
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: srl a0, a3, a0
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: ret
+;
; RV64IA-WMO-ZABHA-NOZACAS-LABEL: atomicrmw_nand_i16_release:
; RV64IA-WMO-ZABHA-NOZACAS: # %bb.0:
; RV64IA-WMO-ZABHA-NOZACAS-NEXT: andi a2, a0, -4
@@ -14010,6 +18460,36 @@ define i16 @atomicrmw_nand_i16_release(ptr %a, i16 %b) nounwind {
; RV64IA-TSO-ZABHA-NOZACAS-NEXT: srlw a0, a3, a0
; RV64IA-TSO-ZABHA-NOZACAS-NEXT: ret
;
+; RV32IA-WMO-ZABHA-ZACAS-LABEL: atomicrmw_nand_i16_release:
+; RV32IA-WMO-ZABHA-ZACAS: # %bb.0:
+; RV32IA-WMO-ZABHA-ZACAS-NEXT: mv a2, a0
+; RV32IA-WMO-ZABHA-ZACAS-NEXT: lhu a0, 0(a0)
+; RV32IA-WMO-ZABHA-ZACAS-NEXT: .LBB97_1: # %atomicrmw.start
+; RV32IA-WMO-ZABHA-ZACAS-NEXT: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-ZABHA-ZACAS-NEXT: and a3, a0, a1
+; RV32IA-WMO-ZABHA-ZACAS-NEXT: not a3, a3
+; RV32IA-WMO-ZABHA-ZACAS-NEXT: slli a4, a0, 16
+; RV32IA-WMO-ZABHA-ZACAS-NEXT: amocas.h.rl a0, a3, (a2)
+; RV32IA-WMO-ZABHA-ZACAS-NEXT: srai a4, a4, 16
+; RV32IA-WMO-ZABHA-ZACAS-NEXT: bne a0, a4, .LBB97_1
+; RV32IA-WMO-ZABHA-ZACAS-NEXT: # %bb.2: # %atomicrmw.end
+; RV32IA-WMO-ZABHA-ZACAS-NEXT: ret
+;
+; RV32IA-TSO-ZABHA-ZACAS-LABEL: atomicrmw_nand_i16_release:
+; RV32IA-TSO-ZABHA-ZACAS: # %bb.0:
+; RV32IA-TSO-ZABHA-ZACAS-NEXT: mv a2, a0
+; RV32IA-TSO-ZABHA-ZACAS-NEXT: lhu a0, 0(a0)
+; RV32IA-TSO-ZABHA-ZACAS-NEXT: .LBB97_1: # %atomicrmw.start
+; RV32IA-TSO-ZABHA-ZACAS-NEXT: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-ZABHA-ZACAS-NEXT: and a3, a0, a1
+; RV32IA-TSO-ZABHA-ZACAS-NEXT: not a3, a3
+; RV32IA-TSO-ZABHA-ZACAS-NEXT: slli a4, a0, 16
+; RV32IA-TSO-ZABHA-ZACAS-NEXT: amocas.h a0, a3, (a2)
+; RV32IA-TSO-ZABHA-ZACAS-NEXT: srai a4, a4, 16
+; RV32IA-TSO-ZABHA-ZACAS-NEXT: bne a0, a4, .LBB97_1
+; RV32IA-TSO-ZABHA-ZACAS-NEXT: # %bb.2: # %atomicrmw.end
+; RV32IA-TSO-ZABHA-ZACAS-NEXT: ret
+;
; RV64IA-WMO-ZABHA-ZACAS-LABEL: atomicrmw_nand_i16_release:
; RV64IA-WMO-ZABHA-ZACAS: # %bb.0:
; RV64IA-WMO-ZABHA-ZACAS-NEXT: mv a2, a0
@@ -14054,49 +18534,49 @@ define i16 @atomicrmw_nand_i16_acq_rel(ptr %a, i16 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
-; RV32IA-WMO-LABEL: atomicrmw_nand_i16_acq_rel:
-; RV32IA-WMO: # %bb.0:
-; RV32IA-WMO-NEXT: andi a2, a0, -4
-; RV32IA-WMO-NEXT: slli a0, a0, 3
-; RV32IA-WMO-NEXT: lui a3, 16
-; RV32IA-WMO-NEXT: addi a3, a3, -1
-; RV32IA-WMO-NEXT: sll a4, a3, a0
-; RV32IA-WMO-NEXT: and a1, a1, a3
-; RV32IA-WMO-NEXT: sll a1, a1, a0
-; RV32IA-WMO-NEXT: .LBB98_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-WMO-NEXT: lr.w.aq a3, (a2)
-; RV32IA-WMO-NEXT: and a5, a3, a1
-; RV32IA-WMO-NEXT: not a5, a5
-; RV32IA-WMO-NEXT: xor a5, a3, a5
-; RV32IA-WMO-NEXT: and a5, a5, a4
-; RV32IA-WMO-NEXT: xor a5, a3, a5
-; RV32IA-WMO-NEXT: sc.w.rl a5, a5, (a2)
-; RV32IA-WMO-NEXT: bnez a5, .LBB98_1
-; RV32IA-WMO-NEXT: # %bb.2:
-; RV32IA-WMO-NEXT: srl a0, a3, a0
-; RV32IA-WMO-NEXT: ret
+; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_nand_i16_acq_rel:
+; RV32IA-WMO-NOZACAS: # %bb.0:
+; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-WMO-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-NOZACAS-NEXT: lui a3, 16
+; RV32IA-WMO-NOZACAS-NEXT: addi a3, a3, -1
+; RV32IA-WMO-NOZACAS-NEXT: sll a4, a3, a0
+; RV32IA-WMO-NOZACAS-NEXT: and a1, a1, a3
+; RV32IA-WMO-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-WMO-NOZACAS-NEXT: .LBB98_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-NOZACAS-NEXT: lr.w.aq a3, (a2)
+; RV32IA-WMO-NOZACAS-NEXT: and a5, a3, a1
+; RV32IA-WMO-NOZACAS-NEXT: not a5, a5
+; RV32IA-WMO-NOZACAS-NEXT: xor a5, a3, a5
+; RV32IA-WMO-NOZACAS-NEXT: and a5, a5, a4
+; RV32IA-WMO-NOZACAS-NEXT: xor a5, a3, a5
+; RV32IA-WMO-NOZACAS-NEXT: sc.w.rl a5, a5, (a2)
+; RV32IA-WMO-NOZACAS-NEXT: bnez a5, .LBB98_1
+; RV32IA-WMO-NOZACAS-NEXT: # %bb.2:
+; RV32IA-WMO-NOZACAS-NEXT: srl a0, a3, a0
+; RV32IA-WMO-NOZACAS-NEXT: ret
;
-; RV32IA-TSO-LABEL: atomicrmw_nand_i16_acq_rel:
-; RV32IA-TSO: # %bb.0:
-; RV32IA-TSO-NEXT: andi a2, a0, -4
-; RV32IA-TSO-NEXT: slli a0, a0, 3
-; RV32IA-TSO-NEXT: lui a3, 16
-; RV32IA-TSO-NEXT: addi a3, a3, -1
-; RV32IA-TSO-NEXT: sll a4, a3, a0
-; RV32IA-TSO-NEXT: and a1, a1, a3
-; RV32IA-TSO-NEXT: sll a1, a1, a0
-; RV32IA-TSO-NEXT: .LBB98_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-TSO-NEXT: lr.w a3, (a2)
-; RV32IA-TSO-NEXT: and a5, a3, a1
-; RV32IA-TSO-NEXT: not a5, a5
-; RV32IA-TSO-NEXT: xor a5, a3, a5
-; RV32IA-TSO-NEXT: and a5, a5, a4
-; RV32IA-TSO-NEXT: xor a5, a3, a5
-; RV32IA-TSO-NEXT: sc.w a5, a5, (a2)
-; RV32IA-TSO-NEXT: bnez a5, .LBB98_1
-; RV32IA-TSO-NEXT: # %bb.2:
-; RV32IA-TSO-NEXT: srl a0, a3, a0
-; RV32IA-TSO-NEXT: ret
+; RV32IA-TSO-NOZACAS-LABEL: atomicrmw_nand_i16_acq_rel:
+; RV32IA-TSO-NOZACAS: # %bb.0:
+; RV32IA-TSO-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-TSO-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-NOZACAS-NEXT: lui a3, 16
+; RV32IA-TSO-NOZACAS-NEXT: addi a3, a3, -1
+; RV32IA-TSO-NOZACAS-NEXT: sll a4, a3, a0
+; RV32IA-TSO-NOZACAS-NEXT: and a1, a1, a3
+; RV32IA-TSO-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-TSO-NOZACAS-NEXT: .LBB98_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-NOZACAS-NEXT: lr.w a3, (a2)
+; RV32IA-TSO-NOZACAS-NEXT: and a5, a3, a1
+; RV32IA-TSO-NOZACAS-NEXT: not a5, a5
+; RV32IA-TSO-NOZACAS-NEXT: xor a5, a3, a5
+; RV32IA-TSO-NOZACAS-NEXT: and a5, a5, a4
+; RV32IA-TSO-NOZACAS-NEXT: xor a5, a3, a5
+; RV32IA-TSO-NOZACAS-NEXT: sc.w a5, a5, (a2)
+; RV32IA-TSO-NOZACAS-NEXT: bnez a5, .LBB98_1
+; RV32IA-TSO-NOZACAS-NEXT: # %bb.2:
+; RV32IA-TSO-NOZACAS-NEXT: srl a0, a3, a0
+; RV32IA-TSO-NOZACAS-NEXT: ret
;
; RV64I-LABEL: atomicrmw_nand_i16_acq_rel:
; RV64I: # %bb.0:
@@ -14152,6 +18632,50 @@ define i16 @atomicrmw_nand_i16_acq_rel(ptr %a, i16 %b) nounwind {
; RV64IA-TSO-NOZACAS-NEXT: srlw a0, a3, a0
; RV64IA-TSO-NOZACAS-NEXT: ret
;
+; RV32IA-WMO-ZACAS-LABEL: atomicrmw_nand_i16_acq_rel:
+; RV32IA-WMO-ZACAS: # %bb.0:
+; RV32IA-WMO-ZACAS-NEXT: andi a2, a0, -4
+; RV32IA-WMO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-ZACAS-NEXT: lui a3, 16
+; RV32IA-WMO-ZACAS-NEXT: addi a3, a3, -1
+; RV32IA-WMO-ZACAS-NEXT: sll a4, a3, a0
+; RV32IA-WMO-ZACAS-NEXT: and a1, a1, a3
+; RV32IA-WMO-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-WMO-ZACAS-NEXT: .LBB98_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-ZACAS-NEXT: lr.w.aq a3, (a2)
+; RV32IA-WMO-ZACAS-NEXT: and a5, a3, a1
+; RV32IA-WMO-ZACAS-NEXT: not a5, a5
+; RV32IA-WMO-ZACAS-NEXT: xor a5, a3, a5
+; RV32IA-WMO-ZACAS-NEXT: and a5, a5, a4
+; RV32IA-WMO-ZACAS-NEXT: xor a5, a3, a5
+; RV32IA-WMO-ZACAS-NEXT: sc.w.rl a5, a5, (a2)
+; RV32IA-WMO-ZACAS-NEXT: bnez a5, .LBB98_1
+; RV32IA-WMO-ZACAS-NEXT: # %bb.2:
+; RV32IA-WMO-ZACAS-NEXT: srl a0, a3, a0
+; RV32IA-WMO-ZACAS-NEXT: ret
+;
+; RV32IA-TSO-ZACAS-LABEL: atomicrmw_nand_i16_acq_rel:
+; RV32IA-TSO-ZACAS: # %bb.0:
+; RV32IA-TSO-ZACAS-NEXT: andi a2, a0, -4
+; RV32IA-TSO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-ZACAS-NEXT: lui a3, 16
+; RV32IA-TSO-ZACAS-NEXT: addi a3, a3, -1
+; RV32IA-TSO-ZACAS-NEXT: sll a4, a3, a0
+; RV32IA-TSO-ZACAS-NEXT: and a1, a1, a3
+; RV32IA-TSO-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-TSO-ZACAS-NEXT: .LBB98_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-ZACAS-NEXT: lr.w a3, (a2)
+; RV32IA-TSO-ZACAS-NEXT: and a5, a3, a1
+; RV32IA-TSO-ZACAS-NEXT: not a5, a5
+; RV32IA-TSO-ZACAS-NEXT: xor a5, a3, a5
+; RV32IA-TSO-ZACAS-NEXT: and a5, a5, a4
+; RV32IA-TSO-ZACAS-NEXT: xor a5, a3, a5
+; RV32IA-TSO-ZACAS-NEXT: sc.w a5, a5, (a2)
+; RV32IA-TSO-ZACAS-NEXT: bnez a5, .LBB98_1
+; RV32IA-TSO-ZACAS-NEXT: # %bb.2:
+; RV32IA-TSO-ZACAS-NEXT: srl a0, a3, a0
+; RV32IA-TSO-ZACAS-NEXT: ret
+;
; RV64IA-WMO-ZACAS-LABEL: atomicrmw_nand_i16_acq_rel:
; RV64IA-WMO-ZACAS: # %bb.0:
; RV64IA-WMO-ZACAS-NEXT: andi a2, a0, -4
@@ -14196,6 +18720,50 @@ define i16 @atomicrmw_nand_i16_acq_rel(ptr %a, i16 %b) nounwind {
; RV64IA-TSO-ZACAS-NEXT: srlw a0, a3, a0
; RV64IA-TSO-ZACAS-NEXT: ret
;
+; RV32IA-WMO-ZABHA-NOZACAS-LABEL: atomicrmw_nand_i16_acq_rel:
+; RV32IA-WMO-ZABHA-NOZACAS: # %bb.0:
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: lui a3, 16
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: addi a3, a3, -1
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: sll a4, a3, a0
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: and a1, a1, a3
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: .LBB98_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: lr.w.aq a3, (a2)
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: and a5, a3, a1
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: not a5, a5
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: xor a5, a3, a5
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: and a5, a5, a4
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: xor a5, a3, a5
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: sc.w.rl a5, a5, (a2)
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: bnez a5, .LBB98_1
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: # %bb.2:
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: srl a0, a3, a0
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: ret
+;
+; RV32IA-TSO-ZABHA-NOZACAS-LABEL: atomicrmw_nand_i16_acq_rel:
+; RV32IA-TSO-ZABHA-NOZACAS: # %bb.0:
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: lui a3, 16
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: addi a3, a3, -1
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: sll a4, a3, a0
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: and a1, a1, a3
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: .LBB98_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: lr.w a3, (a2)
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: and a5, a3, a1
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: not a5, a5
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: xor a5, a3, a5
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: and a5, a5, a4
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: xor a5, a3, a5
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: sc.w a5, a5, (a2)
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: bnez a5, .LBB98_1
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: # %bb.2:
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: srl a0, a3, a0
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: ret
+;
; RV64IA-WMO-ZABHA-NOZACAS-LABEL: atomicrmw_nand_i16_acq_rel:
; RV64IA-WMO-ZABHA-NOZACAS: # %bb.0:
; RV64IA-WMO-ZABHA-NOZACAS-NEXT: andi a2, a0, -4
@@ -14240,6 +18808,36 @@ define i16 @atomicrmw_nand_i16_acq_rel(ptr %a, i16 %b) nounwind {
; RV64IA-TSO-ZABHA-NOZACAS-NEXT: srlw a0, a3, a0
; RV64IA-TSO-ZABHA-NOZACAS-NEXT: ret
;
+; RV32IA-WMO-ZABHA-ZACAS-LABEL: atomicrmw_nand_i16_acq_rel:
+; RV32IA-WMO-ZABHA-ZACAS: # %bb.0:
+; RV32IA-WMO-ZABHA-ZACAS-NEXT: mv a2, a0
+; RV32IA-WMO-ZABHA-ZACAS-NEXT: lhu a0, 0(a0)
+; RV32IA-WMO-ZABHA-ZACAS-NEXT: .LBB98_1: # %atomicrmw.start
+; RV32IA-WMO-ZABHA-ZACAS-NEXT: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-ZABHA-ZACAS-NEXT: and a3, a0, a1
+; RV32IA-WMO-ZABHA-ZACAS-NEXT: not a3, a3
+; RV32IA-WMO-ZABHA-ZACAS-NEXT: slli a4, a0, 16
+; RV32IA-WMO-ZABHA-ZACAS-NEXT: amocas.h.aqrl a0, a3, (a2)
+; RV32IA-WMO-ZABHA-ZACAS-NEXT: srai a4, a4, 16
+; RV32IA-WMO-ZABHA-ZACAS-NEXT: bne a0, a4, .LBB98_1
+; RV32IA-WMO-ZABHA-ZACAS-NEXT: # %bb.2: # %atomicrmw.end
+; RV32IA-WMO-ZABHA-ZACAS-NEXT: ret
+;
+; RV32IA-TSO-ZABHA-ZACAS-LABEL: atomicrmw_nand_i16_acq_rel:
+; RV32IA-TSO-ZABHA-ZACAS: # %bb.0:
+; RV32IA-TSO-ZABHA-ZACAS-NEXT: mv a2, a0
+; RV32IA-TSO-ZABHA-ZACAS-NEXT: lhu a0, 0(a0)
+; RV32IA-TSO-ZABHA-ZACAS-NEXT: .LBB98_1: # %atomicrmw.start
+; RV32IA-TSO-ZABHA-ZACAS-NEXT: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-ZABHA-ZACAS-NEXT: and a3, a0, a1
+; RV32IA-TSO-ZABHA-ZACAS-NEXT: not a3, a3
+; RV32IA-TSO-ZABHA-ZACAS-NEXT: slli a4, a0, 16
+; RV32IA-TSO-ZABHA-ZACAS-NEXT: amocas.h a0, a3, (a2)
+; RV32IA-TSO-ZABHA-ZACAS-NEXT: srai a4, a4, 16
+; RV32IA-TSO-ZABHA-ZACAS-NEXT: bne a0, a4, .LBB98_1
+; RV32IA-TSO-ZABHA-ZACAS-NEXT: # %bb.2: # %atomicrmw.end
+; RV32IA-TSO-ZABHA-ZACAS-NEXT: ret
+;
; RV64IA-WMO-ZABHA-ZACAS-LABEL: atomicrmw_nand_i16_acq_rel:
; RV64IA-WMO-ZABHA-ZACAS: # %bb.0:
; RV64IA-WMO-ZABHA-ZACAS-NEXT: mv a2, a0
@@ -14284,27 +18882,27 @@ define i16 @atomicrmw_nand_i16_seq_cst(ptr %a, i16 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
-; RV32IA-LABEL: atomicrmw_nand_i16_seq_cst:
-; RV32IA: # %bb.0:
-; RV32IA-NEXT: andi a2, a0, -4
-; RV32IA-NEXT: slli a0, a0, 3
-; RV32IA-NEXT: lui a3, 16
-; RV32IA-NEXT: addi a3, a3, -1
-; RV32IA-NEXT: sll a4, a3, a0
-; RV32IA-NEXT: and a1, a1, a3
-; RV32IA-NEXT: sll a1, a1, a0
-; RV32IA-NEXT: .LBB99_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-NEXT: lr.w.aqrl a3, (a2)
-; RV32IA-NEXT: and a5, a3, a1
-; RV32IA-NEXT: not a5, a5
-; RV32IA-NEXT: xor a5, a3, a5
-; RV32IA-NEXT: and a5, a5, a4
-; RV32IA-NEXT: xor a5, a3, a5
-; RV32IA-NEXT: sc.w.rl a5, a5, (a2)
-; RV32IA-NEXT: bnez a5, .LBB99_1
-; RV32IA-NEXT: # %bb.2:
-; RV32IA-NEXT: srl a0, a3, a0
-; RV32IA-NEXT: ret
+; RV32IA-NOZACAS-LABEL: atomicrmw_nand_i16_seq_cst:
+; RV32IA-NOZACAS: # %bb.0:
+; RV32IA-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-NOZACAS-NEXT: lui a3, 16
+; RV32IA-NOZACAS-NEXT: addi a3, a3, -1
+; RV32IA-NOZACAS-NEXT: sll a4, a3, a0
+; RV32IA-NOZACAS-NEXT: and a1, a1, a3
+; RV32IA-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-NOZACAS-NEXT: .LBB99_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-NOZACAS-NEXT: lr.w.aqrl a3, (a2)
+; RV32IA-NOZACAS-NEXT: and a5, a3, a1
+; RV32IA-NOZACAS-NEXT: not a5, a5
+; RV32IA-NOZACAS-NEXT: xor a5, a3, a5
+; RV32IA-NOZACAS-NEXT: and a5, a5, a4
+; RV32IA-NOZACAS-NEXT: xor a5, a3, a5
+; RV32IA-NOZACAS-NEXT: sc.w.rl a5, a5, (a2)
+; RV32IA-NOZACAS-NEXT: bnez a5, .LBB99_1
+; RV32IA-NOZACAS-NEXT: # %bb.2:
+; RV32IA-NOZACAS-NEXT: srl a0, a3, a0
+; RV32IA-NOZACAS-NEXT: ret
;
; RV64I-LABEL: atomicrmw_nand_i16_seq_cst:
; RV64I: # %bb.0:
@@ -14338,6 +18936,28 @@ define i16 @atomicrmw_nand_i16_seq_cst(ptr %a, i16 %b) nounwind {
; RV64IA-NOZACAS-NEXT: srlw a0, a3, a0
; RV64IA-NOZACAS-NEXT: ret
;
+; RV32IA-ZACAS-LABEL: atomicrmw_nand_i16_seq_cst:
+; RV32IA-ZACAS: # %bb.0:
+; RV32IA-ZACAS-NEXT: andi a2, a0, -4
+; RV32IA-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-ZACAS-NEXT: lui a3, 16
+; RV32IA-ZACAS-NEXT: addi a3, a3, -1
+; RV32IA-ZACAS-NEXT: sll a4, a3, a0
+; RV32IA-ZACAS-NEXT: and a1, a1, a3
+; RV32IA-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-ZACAS-NEXT: .LBB99_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-ZACAS-NEXT: lr.w.aqrl a3, (a2)
+; RV32IA-ZACAS-NEXT: and a5, a3, a1
+; RV32IA-ZACAS-NEXT: not a5, a5
+; RV32IA-ZACAS-NEXT: xor a5, a3, a5
+; RV32IA-ZACAS-NEXT: and a5, a5, a4
+; RV32IA-ZACAS-NEXT: xor a5, a3, a5
+; RV32IA-ZACAS-NEXT: sc.w.rl a5, a5, (a2)
+; RV32IA-ZACAS-NEXT: bnez a5, .LBB99_1
+; RV32IA-ZACAS-NEXT: # %bb.2:
+; RV32IA-ZACAS-NEXT: srl a0, a3, a0
+; RV32IA-ZACAS-NEXT: ret
+;
; RV64IA-ZACAS-LABEL: atomicrmw_nand_i16_seq_cst:
; RV64IA-ZACAS: # %bb.0:
; RV64IA-ZACAS-NEXT: andi a2, a0, -4
@@ -14360,6 +18980,50 @@ define i16 @atomicrmw_nand_i16_seq_cst(ptr %a, i16 %b) nounwind {
; RV64IA-ZACAS-NEXT: srlw a0, a3, a0
; RV64IA-ZACAS-NEXT: ret
;
+; RV32IA-WMO-ZABHA-NOZACAS-LABEL: atomicrmw_nand_i16_seq_cst:
+; RV32IA-WMO-ZABHA-NOZACAS: # %bb.0:
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: lui a3, 16
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: addi a3, a3, -1
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: sll a4, a3, a0
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: and a1, a1, a3
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: .LBB99_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: lr.w.aqrl a3, (a2)
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: and a5, a3, a1
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: not a5, a5
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: xor a5, a3, a5
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: and a5, a5, a4
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: xor a5, a3, a5
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: sc.w.rl a5, a5, (a2)
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: bnez a5, .LBB99_1
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: # %bb.2:
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: srl a0, a3, a0
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: ret
+;
+; RV32IA-TSO-ZABHA-NOZACAS-LABEL: atomicrmw_nand_i16_seq_cst:
+; RV32IA-TSO-ZABHA-NOZACAS: # %bb.0:
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: lui a3, 16
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: addi a3, a3, -1
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: sll a4, a3, a0
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: and a1, a1, a3
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: .LBB99_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: lr.w.aqrl a3, (a2)
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: and a5, a3, a1
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: not a5, a5
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: xor a5, a3, a5
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: and a5, a5, a4
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: xor a5, a3, a5
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: sc.w.rl a5, a5, (a2)
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: bnez a5, .LBB99_1
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: # %bb.2:
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: srl a0, a3, a0
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: ret
+;
; RV64IA-WMO-ZABHA-NOZACAS-LABEL: atomicrmw_nand_i16_seq_cst:
; RV64IA-WMO-ZABHA-NOZACAS: # %bb.0:
; RV64IA-WMO-ZABHA-NOZACAS-NEXT: andi a2, a0, -4
@@ -14404,6 +19068,38 @@ define i16 @atomicrmw_nand_i16_seq_cst(ptr %a, i16 %b) nounwind {
; RV64IA-TSO-ZABHA-NOZACAS-NEXT: srlw a0, a3, a0
; RV64IA-TSO-ZABHA-NOZACAS-NEXT: ret
;
+; RV32IA-WMO-ZABHA-ZACAS-LABEL: atomicrmw_nand_i16_seq_cst:
+; RV32IA-WMO-ZABHA-ZACAS: # %bb.0:
+; RV32IA-WMO-ZABHA-ZACAS-NEXT: mv a2, a0
+; RV32IA-WMO-ZABHA-ZACAS-NEXT: lhu a0, 0(a0)
+; RV32IA-WMO-ZABHA-ZACAS-NEXT: .LBB99_1: # %atomicrmw.start
+; RV32IA-WMO-ZABHA-ZACAS-NEXT: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-ZABHA-ZACAS-NEXT: and a3, a0, a1
+; RV32IA-WMO-ZABHA-ZACAS-NEXT: fence rw, rw
+; RV32IA-WMO-ZABHA-ZACAS-NEXT: not a3, a3
+; RV32IA-WMO-ZABHA-ZACAS-NEXT: slli a4, a0, 16
+; RV32IA-WMO-ZABHA-ZACAS-NEXT: amocas.h.aqrl a0, a3, (a2)
+; RV32IA-WMO-ZABHA-ZACAS-NEXT: srai a4, a4, 16
+; RV32IA-WMO-ZABHA-ZACAS-NEXT: bne a0, a4, .LBB99_1
+; RV32IA-WMO-ZABHA-ZACAS-NEXT: # %bb.2: # %atomicrmw.end
+; RV32IA-WMO-ZABHA-ZACAS-NEXT: ret
+;
+; RV32IA-TSO-ZABHA-ZACAS-LABEL: atomicrmw_nand_i16_seq_cst:
+; RV32IA-TSO-ZABHA-ZACAS: # %bb.0:
+; RV32IA-TSO-ZABHA-ZACAS-NEXT: mv a2, a0
+; RV32IA-TSO-ZABHA-ZACAS-NEXT: lhu a0, 0(a0)
+; RV32IA-TSO-ZABHA-ZACAS-NEXT: .LBB99_1: # %atomicrmw.start
+; RV32IA-TSO-ZABHA-ZACAS-NEXT: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-ZABHA-ZACAS-NEXT: and a3, a0, a1
+; RV32IA-TSO-ZABHA-ZACAS-NEXT: fence rw, rw
+; RV32IA-TSO-ZABHA-ZACAS-NEXT: not a3, a3
+; RV32IA-TSO-ZABHA-ZACAS-NEXT: slli a4, a0, 16
+; RV32IA-TSO-ZABHA-ZACAS-NEXT: amocas.h a0, a3, (a2)
+; RV32IA-TSO-ZABHA-ZACAS-NEXT: srai a4, a4, 16
+; RV32IA-TSO-ZABHA-ZACAS-NEXT: bne a0, a4, .LBB99_1
+; RV32IA-TSO-ZABHA-ZACAS-NEXT: # %bb.2: # %atomicrmw.end
+; RV32IA-TSO-ZABHA-ZACAS-NEXT: ret
+;
; RV64IA-WMO-ZABHA-ZACAS-LABEL: atomicrmw_nand_i16_seq_cst:
; RV64IA-WMO-ZABHA-ZACAS: # %bb.0:
; RV64IA-WMO-ZABHA-ZACAS-NEXT: mv a2, a0
@@ -14450,16 +19146,16 @@ define i16 @atomicrmw_or_i16_monotonic(ptr %a, i16 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
-; RV32IA-LABEL: atomicrmw_or_i16_monotonic:
-; RV32IA: # %bb.0:
-; RV32IA-NEXT: andi a2, a0, -4
-; RV32IA-NEXT: slli a0, a0, 3
-; RV32IA-NEXT: slli a1, a1, 16
-; RV32IA-NEXT: srli a1, a1, 16
-; RV32IA-NEXT: sll a1, a1, a0
-; RV32IA-NEXT: amoor.w a1, a1, (a2)
-; RV32IA-NEXT: srl a0, a1, a0
-; RV32IA-NEXT: ret
+; RV32IA-NOZACAS-LABEL: atomicrmw_or_i16_monotonic:
+; RV32IA-NOZACAS: # %bb.0:
+; RV32IA-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-NOZACAS-NEXT: slli a1, a1, 16
+; RV32IA-NOZACAS-NEXT: srli a1, a1, 16
+; RV32IA-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-NOZACAS-NEXT: amoor.w a1, a1, (a2)
+; RV32IA-NOZACAS-NEXT: srl a0, a1, a0
+; RV32IA-NOZACAS-NEXT: ret
;
; RV64I-LABEL: atomicrmw_or_i16_monotonic:
; RV64I: # %bb.0:
@@ -14482,6 +19178,17 @@ define i16 @atomicrmw_or_i16_monotonic(ptr %a, i16 %b) nounwind {
; RV64IA-NOZACAS-NEXT: srlw a0, a1, a0
; RV64IA-NOZACAS-NEXT: ret
;
+; RV32IA-ZACAS-LABEL: atomicrmw_or_i16_monotonic:
+; RV32IA-ZACAS: # %bb.0:
+; RV32IA-ZACAS-NEXT: andi a2, a0, -4
+; RV32IA-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-ZACAS-NEXT: slli a1, a1, 16
+; RV32IA-ZACAS-NEXT: srli a1, a1, 16
+; RV32IA-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-ZACAS-NEXT: amoor.w a1, a1, (a2)
+; RV32IA-ZACAS-NEXT: srl a0, a1, a0
+; RV32IA-ZACAS-NEXT: ret
+;
; RV64IA-ZACAS-LABEL: atomicrmw_or_i16_monotonic:
; RV64IA-ZACAS: # %bb.0:
; RV64IA-ZACAS-NEXT: andi a2, a0, -4
@@ -14493,6 +19200,16 @@ define i16 @atomicrmw_or_i16_monotonic(ptr %a, i16 %b) nounwind {
; RV64IA-ZACAS-NEXT: srlw a0, a1, a0
; RV64IA-ZACAS-NEXT: ret
;
+; RV32IA-WMO-ZABHA-LABEL: atomicrmw_or_i16_monotonic:
+; RV32IA-WMO-ZABHA: # %bb.0:
+; RV32IA-WMO-ZABHA-NEXT: amoor.h a0, a1, (a0)
+; RV32IA-WMO-ZABHA-NEXT: ret
+;
+; RV32IA-TSO-ZABHA-LABEL: atomicrmw_or_i16_monotonic:
+; RV32IA-TSO-ZABHA: # %bb.0:
+; RV32IA-TSO-ZABHA-NEXT: amoor.h a0, a1, (a0)
+; RV32IA-TSO-ZABHA-NEXT: ret
+;
; RV64IA-WMO-ZABHA-LABEL: atomicrmw_or_i16_monotonic:
; RV64IA-WMO-ZABHA: # %bb.0:
; RV64IA-WMO-ZABHA-NEXT: amoor.h a0, a1, (a0)
@@ -14517,27 +19234,27 @@ define i16 @atomicrmw_or_i16_acquire(ptr %a, i16 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
-; RV32IA-WMO-LABEL: atomicrmw_or_i16_acquire:
-; RV32IA-WMO: # %bb.0:
-; RV32IA-WMO-NEXT: andi a2, a0, -4
-; RV32IA-WMO-NEXT: slli a0, a0, 3
-; RV32IA-WMO-NEXT: slli a1, a1, 16
-; RV32IA-WMO-NEXT: srli a1, a1, 16
-; RV32IA-WMO-NEXT: sll a1, a1, a0
-; RV32IA-WMO-NEXT: amoor.w.aq a1, a1, (a2)
-; RV32IA-WMO-NEXT: srl a0, a1, a0
-; RV32IA-WMO-NEXT: ret
+; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_or_i16_acquire:
+; RV32IA-WMO-NOZACAS: # %bb.0:
+; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-WMO-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-NOZACAS-NEXT: slli a1, a1, 16
+; RV32IA-WMO-NOZACAS-NEXT: srli a1, a1, 16
+; RV32IA-WMO-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-WMO-NOZACAS-NEXT: amoor.w.aq a1, a1, (a2)
+; RV32IA-WMO-NOZACAS-NEXT: srl a0, a1, a0
+; RV32IA-WMO-NOZACAS-NEXT: ret
;
-; RV32IA-TSO-LABEL: atomicrmw_or_i16_acquire:
-; RV32IA-TSO: # %bb.0:
-; RV32IA-TSO-NEXT: andi a2, a0, -4
-; RV32IA-TSO-NEXT: slli a0, a0, 3
-; RV32IA-TSO-NEXT: slli a1, a1, 16
-; RV32IA-TSO-NEXT: srli a1, a1, 16
-; RV32IA-TSO-NEXT: sll a1, a1, a0
-; RV32IA-TSO-NEXT: amoor.w a1, a1, (a2)
-; RV32IA-TSO-NEXT: srl a0, a1, a0
-; RV32IA-TSO-NEXT: ret
+; RV32IA-TSO-NOZACAS-LABEL: atomicrmw_or_i16_acquire:
+; RV32IA-TSO-NOZACAS: # %bb.0:
+; RV32IA-TSO-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-TSO-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-NOZACAS-NEXT: slli a1, a1, 16
+; RV32IA-TSO-NOZACAS-NEXT: srli a1, a1, 16
+; RV32IA-TSO-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-TSO-NOZACAS-NEXT: amoor.w a1, a1, (a2)
+; RV32IA-TSO-NOZACAS-NEXT: srl a0, a1, a0
+; RV32IA-TSO-NOZACAS-NEXT: ret
;
; RV64I-LABEL: atomicrmw_or_i16_acquire:
; RV64I: # %bb.0:
@@ -14571,6 +19288,28 @@ define i16 @atomicrmw_or_i16_acquire(ptr %a, i16 %b) nounwind {
; RV64IA-TSO-NOZACAS-NEXT: srlw a0, a1, a0
; RV64IA-TSO-NOZACAS-NEXT: ret
;
+; RV32IA-WMO-ZACAS-LABEL: atomicrmw_or_i16_acquire:
+; RV32IA-WMO-ZACAS: # %bb.0:
+; RV32IA-WMO-ZACAS-NEXT: andi a2, a0, -4
+; RV32IA-WMO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-ZACAS-NEXT: slli a1, a1, 16
+; RV32IA-WMO-ZACAS-NEXT: srli a1, a1, 16
+; RV32IA-WMO-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-WMO-ZACAS-NEXT: amoor.w.aq a1, a1, (a2)
+; RV32IA-WMO-ZACAS-NEXT: srl a0, a1, a0
+; RV32IA-WMO-ZACAS-NEXT: ret
+;
+; RV32IA-TSO-ZACAS-LABEL: atomicrmw_or_i16_acquire:
+; RV32IA-TSO-ZACAS: # %bb.0:
+; RV32IA-TSO-ZACAS-NEXT: andi a2, a0, -4
+; RV32IA-TSO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-ZACAS-NEXT: slli a1, a1, 16
+; RV32IA-TSO-ZACAS-NEXT: srli a1, a1, 16
+; RV32IA-TSO-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-TSO-ZACAS-NEXT: amoor.w a1, a1, (a2)
+; RV32IA-TSO-ZACAS-NEXT: srl a0, a1, a0
+; RV32IA-TSO-ZACAS-NEXT: ret
+;
; RV64IA-WMO-ZACAS-LABEL: atomicrmw_or_i16_acquire:
; RV64IA-WMO-ZACAS: # %bb.0:
; RV64IA-WMO-ZACAS-NEXT: andi a2, a0, -4
@@ -14593,6 +19332,16 @@ define i16 @atomicrmw_or_i16_acquire(ptr %a, i16 %b) nounwind {
; RV64IA-TSO-ZACAS-NEXT: srlw a0, a1, a0
; RV64IA-TSO-ZACAS-NEXT: ret
;
+; RV32IA-WMO-ZABHA-LABEL: atomicrmw_or_i16_acquire:
+; RV32IA-WMO-ZABHA: # %bb.0:
+; RV32IA-WMO-ZABHA-NEXT: amoor.h.aq a0, a1, (a0)
+; RV32IA-WMO-ZABHA-NEXT: ret
+;
+; RV32IA-TSO-ZABHA-LABEL: atomicrmw_or_i16_acquire:
+; RV32IA-TSO-ZABHA: # %bb.0:
+; RV32IA-TSO-ZABHA-NEXT: amoor.h a0, a1, (a0)
+; RV32IA-TSO-ZABHA-NEXT: ret
+;
; RV64IA-WMO-ZABHA-LABEL: atomicrmw_or_i16_acquire:
; RV64IA-WMO-ZABHA: # %bb.0:
; RV64IA-WMO-ZABHA-NEXT: amoor.h.aq a0, a1, (a0)
@@ -14617,27 +19366,27 @@ define i16 @atomicrmw_or_i16_release(ptr %a, i16 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
-; RV32IA-WMO-LABEL: atomicrmw_or_i16_release:
-; RV32IA-WMO: # %bb.0:
-; RV32IA-WMO-NEXT: andi a2, a0, -4
-; RV32IA-WMO-NEXT: slli a0, a0, 3
-; RV32IA-WMO-NEXT: slli a1, a1, 16
-; RV32IA-WMO-NEXT: srli a1, a1, 16
-; RV32IA-WMO-NEXT: sll a1, a1, a0
-; RV32IA-WMO-NEXT: amoor.w.rl a1, a1, (a2)
-; RV32IA-WMO-NEXT: srl a0, a1, a0
-; RV32IA-WMO-NEXT: ret
+; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_or_i16_release:
+; RV32IA-WMO-NOZACAS: # %bb.0:
+; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-WMO-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-NOZACAS-NEXT: slli a1, a1, 16
+; RV32IA-WMO-NOZACAS-NEXT: srli a1, a1, 16
+; RV32IA-WMO-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-WMO-NOZACAS-NEXT: amoor.w.rl a1, a1, (a2)
+; RV32IA-WMO-NOZACAS-NEXT: srl a0, a1, a0
+; RV32IA-WMO-NOZACAS-NEXT: ret
;
-; RV32IA-TSO-LABEL: atomicrmw_or_i16_release:
-; RV32IA-TSO: # %bb.0:
-; RV32IA-TSO-NEXT: andi a2, a0, -4
-; RV32IA-TSO-NEXT: slli a0, a0, 3
-; RV32IA-TSO-NEXT: slli a1, a1, 16
-; RV32IA-TSO-NEXT: srli a1, a1, 16
-; RV32IA-TSO-NEXT: sll a1, a1, a0
-; RV32IA-TSO-NEXT: amoor.w a1, a1, (a2)
-; RV32IA-TSO-NEXT: srl a0, a1, a0
-; RV32IA-TSO-NEXT: ret
+; RV32IA-TSO-NOZACAS-LABEL: atomicrmw_or_i16_release:
+; RV32IA-TSO-NOZACAS: # %bb.0:
+; RV32IA-TSO-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-TSO-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-NOZACAS-NEXT: slli a1, a1, 16
+; RV32IA-TSO-NOZACAS-NEXT: srli a1, a1, 16
+; RV32IA-TSO-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-TSO-NOZACAS-NEXT: amoor.w a1, a1, (a2)
+; RV32IA-TSO-NOZACAS-NEXT: srl a0, a1, a0
+; RV32IA-TSO-NOZACAS-NEXT: ret
;
; RV64I-LABEL: atomicrmw_or_i16_release:
; RV64I: # %bb.0:
@@ -14671,6 +19420,28 @@ define i16 @atomicrmw_or_i16_release(ptr %a, i16 %b) nounwind {
; RV64IA-TSO-NOZACAS-NEXT: srlw a0, a1, a0
; RV64IA-TSO-NOZACAS-NEXT: ret
;
+; RV32IA-WMO-ZACAS-LABEL: atomicrmw_or_i16_release:
+; RV32IA-WMO-ZACAS: # %bb.0:
+; RV32IA-WMO-ZACAS-NEXT: andi a2, a0, -4
+; RV32IA-WMO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-ZACAS-NEXT: slli a1, a1, 16
+; RV32IA-WMO-ZACAS-NEXT: srli a1, a1, 16
+; RV32IA-WMO-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-WMO-ZACAS-NEXT: amoor.w.rl a1, a1, (a2)
+; RV32IA-WMO-ZACAS-NEXT: srl a0, a1, a0
+; RV32IA-WMO-ZACAS-NEXT: ret
+;
+; RV32IA-TSO-ZACAS-LABEL: atomicrmw_or_i16_release:
+; RV32IA-TSO-ZACAS: # %bb.0:
+; RV32IA-TSO-ZACAS-NEXT: andi a2, a0, -4
+; RV32IA-TSO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-ZACAS-NEXT: slli a1, a1, 16
+; RV32IA-TSO-ZACAS-NEXT: srli a1, a1, 16
+; RV32IA-TSO-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-TSO-ZACAS-NEXT: amoor.w a1, a1, (a2)
+; RV32IA-TSO-ZACAS-NEXT: srl a0, a1, a0
+; RV32IA-TSO-ZACAS-NEXT: ret
+;
; RV64IA-WMO-ZACAS-LABEL: atomicrmw_or_i16_release:
; RV64IA-WMO-ZACAS: # %bb.0:
; RV64IA-WMO-ZACAS-NEXT: andi a2, a0, -4
@@ -14693,6 +19464,16 @@ define i16 @atomicrmw_or_i16_release(ptr %a, i16 %b) nounwind {
; RV64IA-TSO-ZACAS-NEXT: srlw a0, a1, a0
; RV64IA-TSO-ZACAS-NEXT: ret
;
+; RV32IA-WMO-ZABHA-LABEL: atomicrmw_or_i16_release:
+; RV32IA-WMO-ZABHA: # %bb.0:
+; RV32IA-WMO-ZABHA-NEXT: amoor.h.rl a0, a1, (a0)
+; RV32IA-WMO-ZABHA-NEXT: ret
+;
+; RV32IA-TSO-ZABHA-LABEL: atomicrmw_or_i16_release:
+; RV32IA-TSO-ZABHA: # %bb.0:
+; RV32IA-TSO-ZABHA-NEXT: amoor.h a0, a1, (a0)
+; RV32IA-TSO-ZABHA-NEXT: ret
+;
; RV64IA-WMO-ZABHA-LABEL: atomicrmw_or_i16_release:
; RV64IA-WMO-ZABHA: # %bb.0:
; RV64IA-WMO-ZABHA-NEXT: amoor.h.rl a0, a1, (a0)
@@ -14717,27 +19498,27 @@ define i16 @atomicrmw_or_i16_acq_rel(ptr %a, i16 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
-; RV32IA-WMO-LABEL: atomicrmw_or_i16_acq_rel:
-; RV32IA-WMO: # %bb.0:
-; RV32IA-WMO-NEXT: andi a2, a0, -4
-; RV32IA-WMO-NEXT: slli a0, a0, 3
-; RV32IA-WMO-NEXT: slli a1, a1, 16
-; RV32IA-WMO-NEXT: srli a1, a1, 16
-; RV32IA-WMO-NEXT: sll a1, a1, a0
-; RV32IA-WMO-NEXT: amoor.w.aqrl a1, a1, (a2)
-; RV32IA-WMO-NEXT: srl a0, a1, a0
-; RV32IA-WMO-NEXT: ret
+; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_or_i16_acq_rel:
+; RV32IA-WMO-NOZACAS: # %bb.0:
+; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-WMO-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-NOZACAS-NEXT: slli a1, a1, 16
+; RV32IA-WMO-NOZACAS-NEXT: srli a1, a1, 16
+; RV32IA-WMO-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-WMO-NOZACAS-NEXT: amoor.w.aqrl a1, a1, (a2)
+; RV32IA-WMO-NOZACAS-NEXT: srl a0, a1, a0
+; RV32IA-WMO-NOZACAS-NEXT: ret
;
-; RV32IA-TSO-LABEL: atomicrmw_or_i16_acq_rel:
-; RV32IA-TSO: # %bb.0:
-; RV32IA-TSO-NEXT: andi a2, a0, -4
-; RV32IA-TSO-NEXT: slli a0, a0, 3
-; RV32IA-TSO-NEXT: slli a1, a1, 16
-; RV32IA-TSO-NEXT: srli a1, a1, 16
-; RV32IA-TSO-NEXT: sll a1, a1, a0
-; RV32IA-TSO-NEXT: amoor.w a1, a1, (a2)
-; RV32IA-TSO-NEXT: srl a0, a1, a0
-; RV32IA-TSO-NEXT: ret
+; RV32IA-TSO-NOZACAS-LABEL: atomicrmw_or_i16_acq_rel:
+; RV32IA-TSO-NOZACAS: # %bb.0:
+; RV32IA-TSO-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-TSO-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-NOZACAS-NEXT: slli a1, a1, 16
+; RV32IA-TSO-NOZACAS-NEXT: srli a1, a1, 16
+; RV32IA-TSO-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-TSO-NOZACAS-NEXT: amoor.w a1, a1, (a2)
+; RV32IA-TSO-NOZACAS-NEXT: srl a0, a1, a0
+; RV32IA-TSO-NOZACAS-NEXT: ret
;
; RV64I-LABEL: atomicrmw_or_i16_acq_rel:
; RV64I: # %bb.0:
@@ -14771,6 +19552,28 @@ define i16 @atomicrmw_or_i16_acq_rel(ptr %a, i16 %b) nounwind {
; RV64IA-TSO-NOZACAS-NEXT: srlw a0, a1, a0
; RV64IA-TSO-NOZACAS-NEXT: ret
;
+; RV32IA-WMO-ZACAS-LABEL: atomicrmw_or_i16_acq_rel:
+; RV32IA-WMO-ZACAS: # %bb.0:
+; RV32IA-WMO-ZACAS-NEXT: andi a2, a0, -4
+; RV32IA-WMO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-ZACAS-NEXT: slli a1, a1, 16
+; RV32IA-WMO-ZACAS-NEXT: srli a1, a1, 16
+; RV32IA-WMO-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-WMO-ZACAS-NEXT: amoor.w.aqrl a1, a1, (a2)
+; RV32IA-WMO-ZACAS-NEXT: srl a0, a1, a0
+; RV32IA-WMO-ZACAS-NEXT: ret
+;
+; RV32IA-TSO-ZACAS-LABEL: atomicrmw_or_i16_acq_rel:
+; RV32IA-TSO-ZACAS: # %bb.0:
+; RV32IA-TSO-ZACAS-NEXT: andi a2, a0, -4
+; RV32IA-TSO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-ZACAS-NEXT: slli a1, a1, 16
+; RV32IA-TSO-ZACAS-NEXT: srli a1, a1, 16
+; RV32IA-TSO-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-TSO-ZACAS-NEXT: amoor.w a1, a1, (a2)
+; RV32IA-TSO-ZACAS-NEXT: srl a0, a1, a0
+; RV32IA-TSO-ZACAS-NEXT: ret
+;
; RV64IA-WMO-ZACAS-LABEL: atomicrmw_or_i16_acq_rel:
; RV64IA-WMO-ZACAS: # %bb.0:
; RV64IA-WMO-ZACAS-NEXT: andi a2, a0, -4
@@ -14793,6 +19596,16 @@ define i16 @atomicrmw_or_i16_acq_rel(ptr %a, i16 %b) nounwind {
; RV64IA-TSO-ZACAS-NEXT: srlw a0, a1, a0
; RV64IA-TSO-ZACAS-NEXT: ret
;
+; RV32IA-WMO-ZABHA-LABEL: atomicrmw_or_i16_acq_rel:
+; RV32IA-WMO-ZABHA: # %bb.0:
+; RV32IA-WMO-ZABHA-NEXT: amoor.h.aqrl a0, a1, (a0)
+; RV32IA-WMO-ZABHA-NEXT: ret
+;
+; RV32IA-TSO-ZABHA-LABEL: atomicrmw_or_i16_acq_rel:
+; RV32IA-TSO-ZABHA: # %bb.0:
+; RV32IA-TSO-ZABHA-NEXT: amoor.h a0, a1, (a0)
+; RV32IA-TSO-ZABHA-NEXT: ret
+;
; RV64IA-WMO-ZABHA-LABEL: atomicrmw_or_i16_acq_rel:
; RV64IA-WMO-ZABHA: # %bb.0:
; RV64IA-WMO-ZABHA-NEXT: amoor.h.aqrl a0, a1, (a0)
@@ -14817,27 +19630,27 @@ define i16 @atomicrmw_or_i16_seq_cst(ptr %a, i16 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
-; RV32IA-WMO-LABEL: atomicrmw_or_i16_seq_cst:
-; RV32IA-WMO: # %bb.0:
-; RV32IA-WMO-NEXT: andi a2, a0, -4
-; RV32IA-WMO-NEXT: slli a0, a0, 3
-; RV32IA-WMO-NEXT: slli a1, a1, 16
-; RV32IA-WMO-NEXT: srli a1, a1, 16
-; RV32IA-WMO-NEXT: sll a1, a1, a0
-; RV32IA-WMO-NEXT: amoor.w.aqrl a1, a1, (a2)
-; RV32IA-WMO-NEXT: srl a0, a1, a0
-; RV32IA-WMO-NEXT: ret
+; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_or_i16_seq_cst:
+; RV32IA-WMO-NOZACAS: # %bb.0:
+; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-WMO-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-NOZACAS-NEXT: slli a1, a1, 16
+; RV32IA-WMO-NOZACAS-NEXT: srli a1, a1, 16
+; RV32IA-WMO-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-WMO-NOZACAS-NEXT: amoor.w.aqrl a1, a1, (a2)
+; RV32IA-WMO-NOZACAS-NEXT: srl a0, a1, a0
+; RV32IA-WMO-NOZACAS-NEXT: ret
;
-; RV32IA-TSO-LABEL: atomicrmw_or_i16_seq_cst:
-; RV32IA-TSO: # %bb.0:
-; RV32IA-TSO-NEXT: andi a2, a0, -4
-; RV32IA-TSO-NEXT: slli a0, a0, 3
-; RV32IA-TSO-NEXT: slli a1, a1, 16
-; RV32IA-TSO-NEXT: srli a1, a1, 16
-; RV32IA-TSO-NEXT: sll a1, a1, a0
-; RV32IA-TSO-NEXT: amoor.w a1, a1, (a2)
-; RV32IA-TSO-NEXT: srl a0, a1, a0
-; RV32IA-TSO-NEXT: ret
+; RV32IA-TSO-NOZACAS-LABEL: atomicrmw_or_i16_seq_cst:
+; RV32IA-TSO-NOZACAS: # %bb.0:
+; RV32IA-TSO-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-TSO-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-NOZACAS-NEXT: slli a1, a1, 16
+; RV32IA-TSO-NOZACAS-NEXT: srli a1, a1, 16
+; RV32IA-TSO-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-TSO-NOZACAS-NEXT: amoor.w a1, a1, (a2)
+; RV32IA-TSO-NOZACAS-NEXT: srl a0, a1, a0
+; RV32IA-TSO-NOZACAS-NEXT: ret
;
; RV64I-LABEL: atomicrmw_or_i16_seq_cst:
; RV64I: # %bb.0:
@@ -14871,6 +19684,28 @@ define i16 @atomicrmw_or_i16_seq_cst(ptr %a, i16 %b) nounwind {
; RV64IA-TSO-NOZACAS-NEXT: srlw a0, a1, a0
; RV64IA-TSO-NOZACAS-NEXT: ret
;
+; RV32IA-WMO-ZACAS-LABEL: atomicrmw_or_i16_seq_cst:
+; RV32IA-WMO-ZACAS: # %bb.0:
+; RV32IA-WMO-ZACAS-NEXT: andi a2, a0, -4
+; RV32IA-WMO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-ZACAS-NEXT: slli a1, a1, 16
+; RV32IA-WMO-ZACAS-NEXT: srli a1, a1, 16
+; RV32IA-WMO-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-WMO-ZACAS-NEXT: amoor.w.aqrl a1, a1, (a2)
+; RV32IA-WMO-ZACAS-NEXT: srl a0, a1, a0
+; RV32IA-WMO-ZACAS-NEXT: ret
+;
+; RV32IA-TSO-ZACAS-LABEL: atomicrmw_or_i16_seq_cst:
+; RV32IA-TSO-ZACAS: # %bb.0:
+; RV32IA-TSO-ZACAS-NEXT: andi a2, a0, -4
+; RV32IA-TSO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-ZACAS-NEXT: slli a1, a1, 16
+; RV32IA-TSO-ZACAS-NEXT: srli a1, a1, 16
+; RV32IA-TSO-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-TSO-ZACAS-NEXT: amoor.w a1, a1, (a2)
+; RV32IA-TSO-ZACAS-NEXT: srl a0, a1, a0
+; RV32IA-TSO-ZACAS-NEXT: ret
+;
; RV64IA-WMO-ZACAS-LABEL: atomicrmw_or_i16_seq_cst:
; RV64IA-WMO-ZACAS: # %bb.0:
; RV64IA-WMO-ZACAS-NEXT: andi a2, a0, -4
@@ -14893,6 +19728,16 @@ define i16 @atomicrmw_or_i16_seq_cst(ptr %a, i16 %b) nounwind {
; RV64IA-TSO-ZACAS-NEXT: srlw a0, a1, a0
; RV64IA-TSO-ZACAS-NEXT: ret
;
+; RV32IA-WMO-ZABHA-LABEL: atomicrmw_or_i16_seq_cst:
+; RV32IA-WMO-ZABHA: # %bb.0:
+; RV32IA-WMO-ZABHA-NEXT: amoor.h.aqrl a0, a1, (a0)
+; RV32IA-WMO-ZABHA-NEXT: ret
+;
+; RV32IA-TSO-ZABHA-LABEL: atomicrmw_or_i16_seq_cst:
+; RV32IA-TSO-ZABHA: # %bb.0:
+; RV32IA-TSO-ZABHA-NEXT: amoor.h a0, a1, (a0)
+; RV32IA-TSO-ZABHA-NEXT: ret
+;
; RV64IA-WMO-ZABHA-LABEL: atomicrmw_or_i16_seq_cst:
; RV64IA-WMO-ZABHA: # %bb.0:
; RV64IA-WMO-ZABHA-NEXT: amoor.h.aqrl a0, a1, (a0)
@@ -14917,16 +19762,16 @@ define i16 @atomicrmw_xor_i16_monotonic(ptr %a, i16 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
-; RV32IA-LABEL: atomicrmw_xor_i16_monotonic:
-; RV32IA: # %bb.0:
-; RV32IA-NEXT: andi a2, a0, -4
-; RV32IA-NEXT: slli a0, a0, 3
-; RV32IA-NEXT: slli a1, a1, 16
-; RV32IA-NEXT: srli a1, a1, 16
-; RV32IA-NEXT: sll a1, a1, a0
-; RV32IA-NEXT: amoxor.w a1, a1, (a2)
-; RV32IA-NEXT: srl a0, a1, a0
-; RV32IA-NEXT: ret
+; RV32IA-NOZACAS-LABEL: atomicrmw_xor_i16_monotonic:
+; RV32IA-NOZACAS: # %bb.0:
+; RV32IA-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-NOZACAS-NEXT: slli a1, a1, 16
+; RV32IA-NOZACAS-NEXT: srli a1, a1, 16
+; RV32IA-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-NOZACAS-NEXT: amoxor.w a1, a1, (a2)
+; RV32IA-NOZACAS-NEXT: srl a0, a1, a0
+; RV32IA-NOZACAS-NEXT: ret
;
; RV64I-LABEL: atomicrmw_xor_i16_monotonic:
; RV64I: # %bb.0:
@@ -14949,6 +19794,17 @@ define i16 @atomicrmw_xor_i16_monotonic(ptr %a, i16 %b) nounwind {
; RV64IA-NOZACAS-NEXT: srlw a0, a1, a0
; RV64IA-NOZACAS-NEXT: ret
;
+; RV32IA-ZACAS-LABEL: atomicrmw_xor_i16_monotonic:
+; RV32IA-ZACAS: # %bb.0:
+; RV32IA-ZACAS-NEXT: andi a2, a0, -4
+; RV32IA-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-ZACAS-NEXT: slli a1, a1, 16
+; RV32IA-ZACAS-NEXT: srli a1, a1, 16
+; RV32IA-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-ZACAS-NEXT: amoxor.w a1, a1, (a2)
+; RV32IA-ZACAS-NEXT: srl a0, a1, a0
+; RV32IA-ZACAS-NEXT: ret
+;
; RV64IA-ZACAS-LABEL: atomicrmw_xor_i16_monotonic:
; RV64IA-ZACAS: # %bb.0:
; RV64IA-ZACAS-NEXT: andi a2, a0, -4
@@ -14960,6 +19816,16 @@ define i16 @atomicrmw_xor_i16_monotonic(ptr %a, i16 %b) nounwind {
; RV64IA-ZACAS-NEXT: srlw a0, a1, a0
; RV64IA-ZACAS-NEXT: ret
;
+; RV32IA-WMO-ZABHA-LABEL: atomicrmw_xor_i16_monotonic:
+; RV32IA-WMO-ZABHA: # %bb.0:
+; RV32IA-WMO-ZABHA-NEXT: amoxor.h a0, a1, (a0)
+; RV32IA-WMO-ZABHA-NEXT: ret
+;
+; RV32IA-TSO-ZABHA-LABEL: atomicrmw_xor_i16_monotonic:
+; RV32IA-TSO-ZABHA: # %bb.0:
+; RV32IA-TSO-ZABHA-NEXT: amoxor.h a0, a1, (a0)
+; RV32IA-TSO-ZABHA-NEXT: ret
+;
; RV64IA-WMO-ZABHA-LABEL: atomicrmw_xor_i16_monotonic:
; RV64IA-WMO-ZABHA: # %bb.0:
; RV64IA-WMO-ZABHA-NEXT: amoxor.h a0, a1, (a0)
@@ -14984,27 +19850,27 @@ define i16 @atomicrmw_xor_i16_acquire(ptr %a, i16 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
-; RV32IA-WMO-LABEL: atomicrmw_xor_i16_acquire:
-; RV32IA-WMO: # %bb.0:
-; RV32IA-WMO-NEXT: andi a2, a0, -4
-; RV32IA-WMO-NEXT: slli a0, a0, 3
-; RV32IA-WMO-NEXT: slli a1, a1, 16
-; RV32IA-WMO-NEXT: srli a1, a1, 16
-; RV32IA-WMO-NEXT: sll a1, a1, a0
-; RV32IA-WMO-NEXT: amoxor.w.aq a1, a1, (a2)
-; RV32IA-WMO-NEXT: srl a0, a1, a0
-; RV32IA-WMO-NEXT: ret
+; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_xor_i16_acquire:
+; RV32IA-WMO-NOZACAS: # %bb.0:
+; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-WMO-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-NOZACAS-NEXT: slli a1, a1, 16
+; RV32IA-WMO-NOZACAS-NEXT: srli a1, a1, 16
+; RV32IA-WMO-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-WMO-NOZACAS-NEXT: amoxor.w.aq a1, a1, (a2)
+; RV32IA-WMO-NOZACAS-NEXT: srl a0, a1, a0
+; RV32IA-WMO-NOZACAS-NEXT: ret
;
-; RV32IA-TSO-LABEL: atomicrmw_xor_i16_acquire:
-; RV32IA-TSO: # %bb.0:
-; RV32IA-TSO-NEXT: andi a2, a0, -4
-; RV32IA-TSO-NEXT: slli a0, a0, 3
-; RV32IA-TSO-NEXT: slli a1, a1, 16
-; RV32IA-TSO-NEXT: srli a1, a1, 16
-; RV32IA-TSO-NEXT: sll a1, a1, a0
-; RV32IA-TSO-NEXT: amoxor.w a1, a1, (a2)
-; RV32IA-TSO-NEXT: srl a0, a1, a0
-; RV32IA-TSO-NEXT: ret
+; RV32IA-TSO-NOZACAS-LABEL: atomicrmw_xor_i16_acquire:
+; RV32IA-TSO-NOZACAS: # %bb.0:
+; RV32IA-TSO-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-TSO-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-NOZACAS-NEXT: slli a1, a1, 16
+; RV32IA-TSO-NOZACAS-NEXT: srli a1, a1, 16
+; RV32IA-TSO-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-TSO-NOZACAS-NEXT: amoxor.w a1, a1, (a2)
+; RV32IA-TSO-NOZACAS-NEXT: srl a0, a1, a0
+; RV32IA-TSO-NOZACAS-NEXT: ret
;
; RV64I-LABEL: atomicrmw_xor_i16_acquire:
; RV64I: # %bb.0:
@@ -15038,6 +19904,28 @@ define i16 @atomicrmw_xor_i16_acquire(ptr %a, i16 %b) nounwind {
; RV64IA-TSO-NOZACAS-NEXT: srlw a0, a1, a0
; RV64IA-TSO-NOZACAS-NEXT: ret
;
+; RV32IA-WMO-ZACAS-LABEL: atomicrmw_xor_i16_acquire:
+; RV32IA-WMO-ZACAS: # %bb.0:
+; RV32IA-WMO-ZACAS-NEXT: andi a2, a0, -4
+; RV32IA-WMO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-ZACAS-NEXT: slli a1, a1, 16
+; RV32IA-WMO-ZACAS-NEXT: srli a1, a1, 16
+; RV32IA-WMO-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-WMO-ZACAS-NEXT: amoxor.w.aq a1, a1, (a2)
+; RV32IA-WMO-ZACAS-NEXT: srl a0, a1, a0
+; RV32IA-WMO-ZACAS-NEXT: ret
+;
+; RV32IA-TSO-ZACAS-LABEL: atomicrmw_xor_i16_acquire:
+; RV32IA-TSO-ZACAS: # %bb.0:
+; RV32IA-TSO-ZACAS-NEXT: andi a2, a0, -4
+; RV32IA-TSO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-ZACAS-NEXT: slli a1, a1, 16
+; RV32IA-TSO-ZACAS-NEXT: srli a1, a1, 16
+; RV32IA-TSO-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-TSO-ZACAS-NEXT: amoxor.w a1, a1, (a2)
+; RV32IA-TSO-ZACAS-NEXT: srl a0, a1, a0
+; RV32IA-TSO-ZACAS-NEXT: ret
+;
; RV64IA-WMO-ZACAS-LABEL: atomicrmw_xor_i16_acquire:
; RV64IA-WMO-ZACAS: # %bb.0:
; RV64IA-WMO-ZACAS-NEXT: andi a2, a0, -4
@@ -15060,6 +19948,16 @@ define i16 @atomicrmw_xor_i16_acquire(ptr %a, i16 %b) nounwind {
; RV64IA-TSO-ZACAS-NEXT: srlw a0, a1, a0
; RV64IA-TSO-ZACAS-NEXT: ret
;
+; RV32IA-WMO-ZABHA-LABEL: atomicrmw_xor_i16_acquire:
+; RV32IA-WMO-ZABHA: # %bb.0:
+; RV32IA-WMO-ZABHA-NEXT: amoxor.h.aq a0, a1, (a0)
+; RV32IA-WMO-ZABHA-NEXT: ret
+;
+; RV32IA-TSO-ZABHA-LABEL: atomicrmw_xor_i16_acquire:
+; RV32IA-TSO-ZABHA: # %bb.0:
+; RV32IA-TSO-ZABHA-NEXT: amoxor.h a0, a1, (a0)
+; RV32IA-TSO-ZABHA-NEXT: ret
+;
; RV64IA-WMO-ZABHA-LABEL: atomicrmw_xor_i16_acquire:
; RV64IA-WMO-ZABHA: # %bb.0:
; RV64IA-WMO-ZABHA-NEXT: amoxor.h.aq a0, a1, (a0)
@@ -15084,27 +19982,27 @@ define i16 @atomicrmw_xor_i16_release(ptr %a, i16 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
-; RV32IA-WMO-LABEL: atomicrmw_xor_i16_release:
-; RV32IA-WMO: # %bb.0:
-; RV32IA-WMO-NEXT: andi a2, a0, -4
-; RV32IA-WMO-NEXT: slli a0, a0, 3
-; RV32IA-WMO-NEXT: slli a1, a1, 16
-; RV32IA-WMO-NEXT: srli a1, a1, 16
-; RV32IA-WMO-NEXT: sll a1, a1, a0
-; RV32IA-WMO-NEXT: amoxor.w.rl a1, a1, (a2)
-; RV32IA-WMO-NEXT: srl a0, a1, a0
-; RV32IA-WMO-NEXT: ret
+; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_xor_i16_release:
+; RV32IA-WMO-NOZACAS: # %bb.0:
+; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-WMO-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-NOZACAS-NEXT: slli a1, a1, 16
+; RV32IA-WMO-NOZACAS-NEXT: srli a1, a1, 16
+; RV32IA-WMO-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-WMO-NOZACAS-NEXT: amoxor.w.rl a1, a1, (a2)
+; RV32IA-WMO-NOZACAS-NEXT: srl a0, a1, a0
+; RV32IA-WMO-NOZACAS-NEXT: ret
;
-; RV32IA-TSO-LABEL: atomicrmw_xor_i16_release:
-; RV32IA-TSO: # %bb.0:
-; RV32IA-TSO-NEXT: andi a2, a0, -4
-; RV32IA-TSO-NEXT: slli a0, a0, 3
-; RV32IA-TSO-NEXT: slli a1, a1, 16
-; RV32IA-TSO-NEXT: srli a1, a1, 16
-; RV32IA-TSO-NEXT: sll a1, a1, a0
-; RV32IA-TSO-NEXT: amoxor.w a1, a1, (a2)
-; RV32IA-TSO-NEXT: srl a0, a1, a0
-; RV32IA-TSO-NEXT: ret
+; RV32IA-TSO-NOZACAS-LABEL: atomicrmw_xor_i16_release:
+; RV32IA-TSO-NOZACAS: # %bb.0:
+; RV32IA-TSO-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-TSO-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-NOZACAS-NEXT: slli a1, a1, 16
+; RV32IA-TSO-NOZACAS-NEXT: srli a1, a1, 16
+; RV32IA-TSO-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-TSO-NOZACAS-NEXT: amoxor.w a1, a1, (a2)
+; RV32IA-TSO-NOZACAS-NEXT: srl a0, a1, a0
+; RV32IA-TSO-NOZACAS-NEXT: ret
;
; RV64I-LABEL: atomicrmw_xor_i16_release:
; RV64I: # %bb.0:
@@ -15138,6 +20036,28 @@ define i16 @atomicrmw_xor_i16_release(ptr %a, i16 %b) nounwind {
; RV64IA-TSO-NOZACAS-NEXT: srlw a0, a1, a0
; RV64IA-TSO-NOZACAS-NEXT: ret
;
+; RV32IA-WMO-ZACAS-LABEL: atomicrmw_xor_i16_release:
+; RV32IA-WMO-ZACAS: # %bb.0:
+; RV32IA-WMO-ZACAS-NEXT: andi a2, a0, -4
+; RV32IA-WMO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-ZACAS-NEXT: slli a1, a1, 16
+; RV32IA-WMO-ZACAS-NEXT: srli a1, a1, 16
+; RV32IA-WMO-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-WMO-ZACAS-NEXT: amoxor.w.rl a1, a1, (a2)
+; RV32IA-WMO-ZACAS-NEXT: srl a0, a1, a0
+; RV32IA-WMO-ZACAS-NEXT: ret
+;
+; RV32IA-TSO-ZACAS-LABEL: atomicrmw_xor_i16_release:
+; RV32IA-TSO-ZACAS: # %bb.0:
+; RV32IA-TSO-ZACAS-NEXT: andi a2, a0, -4
+; RV32IA-TSO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-ZACAS-NEXT: slli a1, a1, 16
+; RV32IA-TSO-ZACAS-NEXT: srli a1, a1, 16
+; RV32IA-TSO-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-TSO-ZACAS-NEXT: amoxor.w a1, a1, (a2)
+; RV32IA-TSO-ZACAS-NEXT: srl a0, a1, a0
+; RV32IA-TSO-ZACAS-NEXT: ret
+;
; RV64IA-WMO-ZACAS-LABEL: atomicrmw_xor_i16_release:
; RV64IA-WMO-ZACAS: # %bb.0:
; RV64IA-WMO-ZACAS-NEXT: andi a2, a0, -4
@@ -15160,6 +20080,16 @@ define i16 @atomicrmw_xor_i16_release(ptr %a, i16 %b) nounwind {
; RV64IA-TSO-ZACAS-NEXT: srlw a0, a1, a0
; RV64IA-TSO-ZACAS-NEXT: ret
;
+; RV32IA-WMO-ZABHA-LABEL: atomicrmw_xor_i16_release:
+; RV32IA-WMO-ZABHA: # %bb.0:
+; RV32IA-WMO-ZABHA-NEXT: amoxor.h.rl a0, a1, (a0)
+; RV32IA-WMO-ZABHA-NEXT: ret
+;
+; RV32IA-TSO-ZABHA-LABEL: atomicrmw_xor_i16_release:
+; RV32IA-TSO-ZABHA: # %bb.0:
+; RV32IA-TSO-ZABHA-NEXT: amoxor.h a0, a1, (a0)
+; RV32IA-TSO-ZABHA-NEXT: ret
+;
; RV64IA-WMO-ZABHA-LABEL: atomicrmw_xor_i16_release:
; RV64IA-WMO-ZABHA: # %bb.0:
; RV64IA-WMO-ZABHA-NEXT: amoxor.h.rl a0, a1, (a0)
@@ -15184,27 +20114,27 @@ define i16 @atomicrmw_xor_i16_acq_rel(ptr %a, i16 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
-; RV32IA-WMO-LABEL: atomicrmw_xor_i16_acq_rel:
-; RV32IA-WMO: # %bb.0:
-; RV32IA-WMO-NEXT: andi a2, a0, -4
-; RV32IA-WMO-NEXT: slli a0, a0, 3
-; RV32IA-WMO-NEXT: slli a1, a1, 16
-; RV32IA-WMO-NEXT: srli a1, a1, 16
-; RV32IA-WMO-NEXT: sll a1, a1, a0
-; RV32IA-WMO-NEXT: amoxor.w.aqrl a1, a1, (a2)
-; RV32IA-WMO-NEXT: srl a0, a1, a0
-; RV32IA-WMO-NEXT: ret
+; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_xor_i16_acq_rel:
+; RV32IA-WMO-NOZACAS: # %bb.0:
+; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-WMO-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-NOZACAS-NEXT: slli a1, a1, 16
+; RV32IA-WMO-NOZACAS-NEXT: srli a1, a1, 16
+; RV32IA-WMO-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-WMO-NOZACAS-NEXT: amoxor.w.aqrl a1, a1, (a2)
+; RV32IA-WMO-NOZACAS-NEXT: srl a0, a1, a0
+; RV32IA-WMO-NOZACAS-NEXT: ret
;
-; RV32IA-TSO-LABEL: atomicrmw_xor_i16_acq_rel:
-; RV32IA-TSO: # %bb.0:
-; RV32IA-TSO-NEXT: andi a2, a0, -4
-; RV32IA-TSO-NEXT: slli a0, a0, 3
-; RV32IA-TSO-NEXT: slli a1, a1, 16
-; RV32IA-TSO-NEXT: srli a1, a1, 16
-; RV32IA-TSO-NEXT: sll a1, a1, a0
-; RV32IA-TSO-NEXT: amoxor.w a1, a1, (a2)
-; RV32IA-TSO-NEXT: srl a0, a1, a0
-; RV32IA-TSO-NEXT: ret
+; RV32IA-TSO-NOZACAS-LABEL: atomicrmw_xor_i16_acq_rel:
+; RV32IA-TSO-NOZACAS: # %bb.0:
+; RV32IA-TSO-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-TSO-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-NOZACAS-NEXT: slli a1, a1, 16
+; RV32IA-TSO-NOZACAS-NEXT: srli a1, a1, 16
+; RV32IA-TSO-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-TSO-NOZACAS-NEXT: amoxor.w a1, a1, (a2)
+; RV32IA-TSO-NOZACAS-NEXT: srl a0, a1, a0
+; RV32IA-TSO-NOZACAS-NEXT: ret
;
; RV64I-LABEL: atomicrmw_xor_i16_acq_rel:
; RV64I: # %bb.0:
@@ -15238,6 +20168,28 @@ define i16 @atomicrmw_xor_i16_acq_rel(ptr %a, i16 %b) nounwind {
; RV64IA-TSO-NOZACAS-NEXT: srlw a0, a1, a0
; RV64IA-TSO-NOZACAS-NEXT: ret
;
+; RV32IA-WMO-ZACAS-LABEL: atomicrmw_xor_i16_acq_rel:
+; RV32IA-WMO-ZACAS: # %bb.0:
+; RV32IA-WMO-ZACAS-NEXT: andi a2, a0, -4
+; RV32IA-WMO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-ZACAS-NEXT: slli a1, a1, 16
+; RV32IA-WMO-ZACAS-NEXT: srli a1, a1, 16
+; RV32IA-WMO-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-WMO-ZACAS-NEXT: amoxor.w.aqrl a1, a1, (a2)
+; RV32IA-WMO-ZACAS-NEXT: srl a0, a1, a0
+; RV32IA-WMO-ZACAS-NEXT: ret
+;
+; RV32IA-TSO-ZACAS-LABEL: atomicrmw_xor_i16_acq_rel:
+; RV32IA-TSO-ZACAS: # %bb.0:
+; RV32IA-TSO-ZACAS-NEXT: andi a2, a0, -4
+; RV32IA-TSO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-ZACAS-NEXT: slli a1, a1, 16
+; RV32IA-TSO-ZACAS-NEXT: srli a1, a1, 16
+; RV32IA-TSO-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-TSO-ZACAS-NEXT: amoxor.w a1, a1, (a2)
+; RV32IA-TSO-ZACAS-NEXT: srl a0, a1, a0
+; RV32IA-TSO-ZACAS-NEXT: ret
+;
; RV64IA-WMO-ZACAS-LABEL: atomicrmw_xor_i16_acq_rel:
; RV64IA-WMO-ZACAS: # %bb.0:
; RV64IA-WMO-ZACAS-NEXT: andi a2, a0, -4
@@ -15260,6 +20212,16 @@ define i16 @atomicrmw_xor_i16_acq_rel(ptr %a, i16 %b) nounwind {
; RV64IA-TSO-ZACAS-NEXT: srlw a0, a1, a0
; RV64IA-TSO-ZACAS-NEXT: ret
;
+; RV32IA-WMO-ZABHA-LABEL: atomicrmw_xor_i16_acq_rel:
+; RV32IA-WMO-ZABHA: # %bb.0:
+; RV32IA-WMO-ZABHA-NEXT: amoxor.h.aqrl a0, a1, (a0)
+; RV32IA-WMO-ZABHA-NEXT: ret
+;
+; RV32IA-TSO-ZABHA-LABEL: atomicrmw_xor_i16_acq_rel:
+; RV32IA-TSO-ZABHA: # %bb.0:
+; RV32IA-TSO-ZABHA-NEXT: amoxor.h a0, a1, (a0)
+; RV32IA-TSO-ZABHA-NEXT: ret
+;
; RV64IA-WMO-ZABHA-LABEL: atomicrmw_xor_i16_acq_rel:
; RV64IA-WMO-ZABHA: # %bb.0:
; RV64IA-WMO-ZABHA-NEXT: amoxor.h.aqrl a0, a1, (a0)
@@ -15284,27 +20246,27 @@ define i16 @atomicrmw_xor_i16_seq_cst(ptr %a, i16 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 16
; RV32I-NEXT: ret
;
-; RV32IA-WMO-LABEL: atomicrmw_xor_i16_seq_cst:
-; RV32IA-WMO: # %bb.0:
-; RV32IA-WMO-NEXT: andi a2, a0, -4
-; RV32IA-WMO-NEXT: slli a0, a0, 3
-; RV32IA-WMO-NEXT: slli a1, a1, 16
-; RV32IA-WMO-NEXT: srli a1, a1, 16
-; RV32IA-WMO-NEXT: sll a1, a1, a0
-; RV32IA-WMO-NEXT: amoxor.w.aqrl a1, a1, (a2)
-; RV32IA-WMO-NEXT: srl a0, a1, a0
-; RV32IA-WMO-NEXT: ret
+; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_xor_i16_seq_cst:
+; RV32IA-WMO-NOZACAS: # %bb.0:
+; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-WMO-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-NOZACAS-NEXT: slli a1, a1, 16
+; RV32IA-WMO-NOZACAS-NEXT: srli a1, a1, 16
+; RV32IA-WMO-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-WMO-NOZACAS-NEXT: amoxor.w.aqrl a1, a1, (a2)
+; RV32IA-WMO-NOZACAS-NEXT: srl a0, a1, a0
+; RV32IA-WMO-NOZACAS-NEXT: ret
;
-; RV32IA-TSO-LABEL: atomicrmw_xor_i16_seq_cst:
-; RV32IA-TSO: # %bb.0:
-; RV32IA-TSO-NEXT: andi a2, a0, -4
-; RV32IA-TSO-NEXT: slli a0, a0, 3
-; RV32IA-TSO-NEXT: slli a1, a1, 16
-; RV32IA-TSO-NEXT: srli a1, a1, 16
-; RV32IA-TSO-NEXT: sll a1, a1, a0
-; RV32IA-TSO-NEXT: amoxor.w a1, a1, (a2)
-; RV32IA-TSO-NEXT: srl a0, a1, a0
-; RV32IA-TSO-NEXT: ret
+; RV32IA-TSO-NOZACAS-LABEL: atomicrmw_xor_i16_seq_cst:
+; RV32IA-TSO-NOZACAS: # %bb.0:
+; RV32IA-TSO-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-TSO-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-NOZACAS-NEXT: slli a1, a1, 16
+; RV32IA-TSO-NOZACAS-NEXT: srli a1, a1, 16
+; RV32IA-TSO-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-TSO-NOZACAS-NEXT: amoxor.w a1, a1, (a2)
+; RV32IA-TSO-NOZACAS-NEXT: srl a0, a1, a0
+; RV32IA-TSO-NOZACAS-NEXT: ret
;
; RV64I-LABEL: atomicrmw_xor_i16_seq_cst:
; RV64I: # %bb.0:
@@ -15338,6 +20300,28 @@ define i16 @atomicrmw_xor_i16_seq_cst(ptr %a, i16 %b) nounwind {
; RV64IA-TSO-NOZACAS-NEXT: srlw a0, a1, a0
; RV64IA-TSO-NOZACAS-NEXT: ret
;
+; RV32IA-WMO-ZACAS-LABEL: atomicrmw_xor_i16_seq_cst:
+; RV32IA-WMO-ZACAS: # %bb.0:
+; RV32IA-WMO-ZACAS-NEXT: andi a2, a0, -4
+; RV32IA-WMO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-ZACAS-NEXT: slli a1, a1, 16
+; RV32IA-WMO-ZACAS-NEXT: srli a1, a1, 16
+; RV32IA-WMO-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-WMO-ZACAS-NEXT: amoxor.w.aqrl a1, a1, (a2)
+; RV32IA-WMO-ZACAS-NEXT: srl a0, a1, a0
+; RV32IA-WMO-ZACAS-NEXT: ret
+;
+; RV32IA-TSO-ZACAS-LABEL: atomicrmw_xor_i16_seq_cst:
+; RV32IA-TSO-ZACAS: # %bb.0:
+; RV32IA-TSO-ZACAS-NEXT: andi a2, a0, -4
+; RV32IA-TSO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-ZACAS-NEXT: slli a1, a1, 16
+; RV32IA-TSO-ZACAS-NEXT: srli a1, a1, 16
+; RV32IA-TSO-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-TSO-ZACAS-NEXT: amoxor.w a1, a1, (a2)
+; RV32IA-TSO-ZACAS-NEXT: srl a0, a1, a0
+; RV32IA-TSO-ZACAS-NEXT: ret
+;
; RV64IA-WMO-ZACAS-LABEL: atomicrmw_xor_i16_seq_cst:
; RV64IA-WMO-ZACAS: # %bb.0:
; RV64IA-WMO-ZACAS-NEXT: andi a2, a0, -4
@@ -15360,6 +20344,16 @@ define i16 @atomicrmw_xor_i16_seq_cst(ptr %a, i16 %b) nounwind {
; RV64IA-TSO-ZACAS-NEXT: srlw a0, a1, a0
; RV64IA-TSO-ZACAS-NEXT: ret
;
+; RV32IA-WMO-ZABHA-LABEL: atomicrmw_xor_i16_seq_cst:
+; RV32IA-WMO-ZABHA: # %bb.0:
+; RV32IA-WMO-ZABHA-NEXT: amoxor.h.aqrl a0, a1, (a0)
+; RV32IA-WMO-ZABHA-NEXT: ret
+;
+; RV32IA-TSO-ZABHA-LABEL: atomicrmw_xor_i16_seq_cst:
+; RV32IA-TSO-ZABHA: # %bb.0:
+; RV32IA-TSO-ZABHA-NEXT: amoxor.h a0, a1, (a0)
+; RV32IA-TSO-ZABHA-NEXT: ret
+;
; RV64IA-WMO-ZABHA-LABEL: atomicrmw_xor_i16_seq_cst:
; RV64IA-WMO-ZABHA: # %bb.0:
; RV64IA-WMO-ZABHA-NEXT: amoxor.h.aqrl a0, a1, (a0)
@@ -15416,36 +20410,36 @@ define i16 @atomicrmw_max_i16_monotonic(ptr %a, i16 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 32
; RV32I-NEXT: ret
;
-; RV32IA-LABEL: atomicrmw_max_i16_monotonic:
-; RV32IA: # %bb.0:
-; RV32IA-NEXT: andi a2, a0, -4
-; RV32IA-NEXT: slli a0, a0, 3
-; RV32IA-NEXT: lui a3, 16
-; RV32IA-NEXT: slli a1, a1, 16
-; RV32IA-NEXT: li a4, 16
-; RV32IA-NEXT: andi a5, a0, 24
-; RV32IA-NEXT: addi a3, a3, -1
-; RV32IA-NEXT: srai a1, a1, 16
-; RV32IA-NEXT: sll a3, a3, a0
-; RV32IA-NEXT: sll a1, a1, a0
-; RV32IA-NEXT: sub a4, a4, a5
-; RV32IA-NEXT: .LBB110_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-NEXT: lr.w a5, (a2)
-; RV32IA-NEXT: and a7, a5, a3
-; RV32IA-NEXT: mv a6, a5
-; RV32IA-NEXT: sll a7, a7, a4
-; RV32IA-NEXT: sra a7, a7, a4
-; RV32IA-NEXT: bge a7, a1, .LBB110_3
-; RV32IA-NEXT: # %bb.2: # in Loop: Header=BB110_1 Depth=1
-; RV32IA-NEXT: xor a6, a5, a1
-; RV32IA-NEXT: and a6, a6, a3
-; RV32IA-NEXT: xor a6, a5, a6
-; RV32IA-NEXT: .LBB110_3: # in Loop: Header=BB110_1 Depth=1
-; RV32IA-NEXT: sc.w a6, a6, (a2)
-; RV32IA-NEXT: bnez a6, .LBB110_1
-; RV32IA-NEXT: # %bb.4:
-; RV32IA-NEXT: srl a0, a5, a0
-; RV32IA-NEXT: ret
+; RV32IA-NOZACAS-LABEL: atomicrmw_max_i16_monotonic:
+; RV32IA-NOZACAS: # %bb.0:
+; RV32IA-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-NOZACAS-NEXT: lui a3, 16
+; RV32IA-NOZACAS-NEXT: slli a1, a1, 16
+; RV32IA-NOZACAS-NEXT: li a4, 16
+; RV32IA-NOZACAS-NEXT: andi a5, a0, 24
+; RV32IA-NOZACAS-NEXT: addi a3, a3, -1
+; RV32IA-NOZACAS-NEXT: srai a1, a1, 16
+; RV32IA-NOZACAS-NEXT: sll a3, a3, a0
+; RV32IA-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-NOZACAS-NEXT: sub a4, a4, a5
+; RV32IA-NOZACAS-NEXT: .LBB110_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-NOZACAS-NEXT: lr.w a5, (a2)
+; RV32IA-NOZACAS-NEXT: and a7, a5, a3
+; RV32IA-NOZACAS-NEXT: mv a6, a5
+; RV32IA-NOZACAS-NEXT: sll a7, a7, a4
+; RV32IA-NOZACAS-NEXT: sra a7, a7, a4
+; RV32IA-NOZACAS-NEXT: bge a7, a1, .LBB110_3
+; RV32IA-NOZACAS-NEXT: # %bb.2: # in Loop: Header=BB110_1 Depth=1
+; RV32IA-NOZACAS-NEXT: xor a6, a5, a1
+; RV32IA-NOZACAS-NEXT: and a6, a6, a3
+; RV32IA-NOZACAS-NEXT: xor a6, a5, a6
+; RV32IA-NOZACAS-NEXT: .LBB110_3: # in Loop: Header=BB110_1 Depth=1
+; RV32IA-NOZACAS-NEXT: sc.w a6, a6, (a2)
+; RV32IA-NOZACAS-NEXT: bnez a6, .LBB110_1
+; RV32IA-NOZACAS-NEXT: # %bb.4:
+; RV32IA-NOZACAS-NEXT: srl a0, a5, a0
+; RV32IA-NOZACAS-NEXT: ret
;
; RV64I-LABEL: atomicrmw_max_i16_monotonic:
; RV64I: # %bb.0:
@@ -15520,6 +20514,37 @@ define i16 @atomicrmw_max_i16_monotonic(ptr %a, i16 %b) nounwind {
; RV64IA-NOZACAS-NEXT: srlw a0, a5, a0
; RV64IA-NOZACAS-NEXT: ret
;
+; RV32IA-ZACAS-LABEL: atomicrmw_max_i16_monotonic:
+; RV32IA-ZACAS: # %bb.0:
+; RV32IA-ZACAS-NEXT: andi a2, a0, -4
+; RV32IA-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-ZACAS-NEXT: lui a3, 16
+; RV32IA-ZACAS-NEXT: slli a1, a1, 16
+; RV32IA-ZACAS-NEXT: li a4, 16
+; RV32IA-ZACAS-NEXT: andi a5, a0, 24
+; RV32IA-ZACAS-NEXT: addi a3, a3, -1
+; RV32IA-ZACAS-NEXT: srai a1, a1, 16
+; RV32IA-ZACAS-NEXT: sll a3, a3, a0
+; RV32IA-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-ZACAS-NEXT: sub a4, a4, a5
+; RV32IA-ZACAS-NEXT: .LBB110_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-ZACAS-NEXT: lr.w a5, (a2)
+; RV32IA-ZACAS-NEXT: and a7, a5, a3
+; RV32IA-ZACAS-NEXT: mv a6, a5
+; RV32IA-ZACAS-NEXT: sll a7, a7, a4
+; RV32IA-ZACAS-NEXT: sra a7, a7, a4
+; RV32IA-ZACAS-NEXT: bge a7, a1, .LBB110_3
+; RV32IA-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB110_1 Depth=1
+; RV32IA-ZACAS-NEXT: xor a6, a5, a1
+; RV32IA-ZACAS-NEXT: and a6, a6, a3
+; RV32IA-ZACAS-NEXT: xor a6, a5, a6
+; RV32IA-ZACAS-NEXT: .LBB110_3: # in Loop: Header=BB110_1 Depth=1
+; RV32IA-ZACAS-NEXT: sc.w a6, a6, (a2)
+; RV32IA-ZACAS-NEXT: bnez a6, .LBB110_1
+; RV32IA-ZACAS-NEXT: # %bb.4:
+; RV32IA-ZACAS-NEXT: srl a0, a5, a0
+; RV32IA-ZACAS-NEXT: ret
+;
; RV64IA-ZACAS-LABEL: atomicrmw_max_i16_monotonic:
; RV64IA-ZACAS: # %bb.0:
; RV64IA-ZACAS-NEXT: andi a2, a0, -4
@@ -15551,6 +20576,16 @@ define i16 @atomicrmw_max_i16_monotonic(ptr %a, i16 %b) nounwind {
; RV64IA-ZACAS-NEXT: srlw a0, a5, a0
; RV64IA-ZACAS-NEXT: ret
;
+; RV32IA-WMO-ZABHA-LABEL: atomicrmw_max_i16_monotonic:
+; RV32IA-WMO-ZABHA: # %bb.0:
+; RV32IA-WMO-ZABHA-NEXT: amomax.h a0, a1, (a0)
+; RV32IA-WMO-ZABHA-NEXT: ret
+;
+; RV32IA-TSO-ZABHA-LABEL: atomicrmw_max_i16_monotonic:
+; RV32IA-TSO-ZABHA: # %bb.0:
+; RV32IA-TSO-ZABHA-NEXT: amomax.h a0, a1, (a0)
+; RV32IA-TSO-ZABHA-NEXT: ret
+;
; RV64IA-WMO-ZABHA-LABEL: atomicrmw_max_i16_monotonic:
; RV64IA-WMO-ZABHA: # %bb.0:
; RV64IA-WMO-ZABHA-NEXT: amomax.h a0, a1, (a0)
@@ -15607,67 +20642,67 @@ define i16 @atomicrmw_max_i16_acquire(ptr %a, i16 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 32
; RV32I-NEXT: ret
;
-; RV32IA-WMO-LABEL: atomicrmw_max_i16_acquire:
-; RV32IA-WMO: # %bb.0:
-; RV32IA-WMO-NEXT: andi a2, a0, -4
-; RV32IA-WMO-NEXT: slli a0, a0, 3
-; RV32IA-WMO-NEXT: lui a3, 16
-; RV32IA-WMO-NEXT: slli a1, a1, 16
-; RV32IA-WMO-NEXT: li a4, 16
-; RV32IA-WMO-NEXT: andi a5, a0, 24
-; RV32IA-WMO-NEXT: addi a3, a3, -1
-; RV32IA-WMO-NEXT: srai a1, a1, 16
-; RV32IA-WMO-NEXT: sll a3, a3, a0
-; RV32IA-WMO-NEXT: sll a1, a1, a0
-; RV32IA-WMO-NEXT: sub a4, a4, a5
-; RV32IA-WMO-NEXT: .LBB111_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-WMO-NEXT: lr.w.aq a5, (a2)
-; RV32IA-WMO-NEXT: and a7, a5, a3
-; RV32IA-WMO-NEXT: mv a6, a5
-; RV32IA-WMO-NEXT: sll a7, a7, a4
-; RV32IA-WMO-NEXT: sra a7, a7, a4
-; RV32IA-WMO-NEXT: bge a7, a1, .LBB111_3
-; RV32IA-WMO-NEXT: # %bb.2: # in Loop: Header=BB111_1 Depth=1
-; RV32IA-WMO-NEXT: xor a6, a5, a1
-; RV32IA-WMO-NEXT: and a6, a6, a3
-; RV32IA-WMO-NEXT: xor a6, a5, a6
-; RV32IA-WMO-NEXT: .LBB111_3: # in Loop: Header=BB111_1 Depth=1
-; RV32IA-WMO-NEXT: sc.w a6, a6, (a2)
-; RV32IA-WMO-NEXT: bnez a6, .LBB111_1
-; RV32IA-WMO-NEXT: # %bb.4:
-; RV32IA-WMO-NEXT: srl a0, a5, a0
-; RV32IA-WMO-NEXT: ret
+; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_max_i16_acquire:
+; RV32IA-WMO-NOZACAS: # %bb.0:
+; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-WMO-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-NOZACAS-NEXT: lui a3, 16
+; RV32IA-WMO-NOZACAS-NEXT: slli a1, a1, 16
+; RV32IA-WMO-NOZACAS-NEXT: li a4, 16
+; RV32IA-WMO-NOZACAS-NEXT: andi a5, a0, 24
+; RV32IA-WMO-NOZACAS-NEXT: addi a3, a3, -1
+; RV32IA-WMO-NOZACAS-NEXT: srai a1, a1, 16
+; RV32IA-WMO-NOZACAS-NEXT: sll a3, a3, a0
+; RV32IA-WMO-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-WMO-NOZACAS-NEXT: sub a4, a4, a5
+; RV32IA-WMO-NOZACAS-NEXT: .LBB111_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-NOZACAS-NEXT: lr.w.aq a5, (a2)
+; RV32IA-WMO-NOZACAS-NEXT: and a7, a5, a3
+; RV32IA-WMO-NOZACAS-NEXT: mv a6, a5
+; RV32IA-WMO-NOZACAS-NEXT: sll a7, a7, a4
+; RV32IA-WMO-NOZACAS-NEXT: sra a7, a7, a4
+; RV32IA-WMO-NOZACAS-NEXT: bge a7, a1, .LBB111_3
+; RV32IA-WMO-NOZACAS-NEXT: # %bb.2: # in Loop: Header=BB111_1 Depth=1
+; RV32IA-WMO-NOZACAS-NEXT: xor a6, a5, a1
+; RV32IA-WMO-NOZACAS-NEXT: and a6, a6, a3
+; RV32IA-WMO-NOZACAS-NEXT: xor a6, a5, a6
+; RV32IA-WMO-NOZACAS-NEXT: .LBB111_3: # in Loop: Header=BB111_1 Depth=1
+; RV32IA-WMO-NOZACAS-NEXT: sc.w a6, a6, (a2)
+; RV32IA-WMO-NOZACAS-NEXT: bnez a6, .LBB111_1
+; RV32IA-WMO-NOZACAS-NEXT: # %bb.4:
+; RV32IA-WMO-NOZACAS-NEXT: srl a0, a5, a0
+; RV32IA-WMO-NOZACAS-NEXT: ret
;
-; RV32IA-TSO-LABEL: atomicrmw_max_i16_acquire:
-; RV32IA-TSO: # %bb.0:
-; RV32IA-TSO-NEXT: andi a2, a0, -4
-; RV32IA-TSO-NEXT: slli a0, a0, 3
-; RV32IA-TSO-NEXT: lui a3, 16
-; RV32IA-TSO-NEXT: slli a1, a1, 16
-; RV32IA-TSO-NEXT: li a4, 16
-; RV32IA-TSO-NEXT: andi a5, a0, 24
-; RV32IA-TSO-NEXT: addi a3, a3, -1
-; RV32IA-TSO-NEXT: srai a1, a1, 16
-; RV32IA-TSO-NEXT: sll a3, a3, a0
-; RV32IA-TSO-NEXT: sll a1, a1, a0
-; RV32IA-TSO-NEXT: sub a4, a4, a5
-; RV32IA-TSO-NEXT: .LBB111_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-TSO-NEXT: lr.w a5, (a2)
-; RV32IA-TSO-NEXT: and a7, a5, a3
-; RV32IA-TSO-NEXT: mv a6, a5
-; RV32IA-TSO-NEXT: sll a7, a7, a4
-; RV32IA-TSO-NEXT: sra a7, a7, a4
-; RV32IA-TSO-NEXT: bge a7, a1, .LBB111_3
-; RV32IA-TSO-NEXT: # %bb.2: # in Loop: Header=BB111_1 Depth=1
-; RV32IA-TSO-NEXT: xor a6, a5, a1
-; RV32IA-TSO-NEXT: and a6, a6, a3
-; RV32IA-TSO-NEXT: xor a6, a5, a6
-; RV32IA-TSO-NEXT: .LBB111_3: # in Loop: Header=BB111_1 Depth=1
-; RV32IA-TSO-NEXT: sc.w a6, a6, (a2)
-; RV32IA-TSO-NEXT: bnez a6, .LBB111_1
-; RV32IA-TSO-NEXT: # %bb.4:
-; RV32IA-TSO-NEXT: srl a0, a5, a0
-; RV32IA-TSO-NEXT: ret
+; RV32IA-TSO-NOZACAS-LABEL: atomicrmw_max_i16_acquire:
+; RV32IA-TSO-NOZACAS: # %bb.0:
+; RV32IA-TSO-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-TSO-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-NOZACAS-NEXT: lui a3, 16
+; RV32IA-TSO-NOZACAS-NEXT: slli a1, a1, 16
+; RV32IA-TSO-NOZACAS-NEXT: li a4, 16
+; RV32IA-TSO-NOZACAS-NEXT: andi a5, a0, 24
+; RV32IA-TSO-NOZACAS-NEXT: addi a3, a3, -1
+; RV32IA-TSO-NOZACAS-NEXT: srai a1, a1, 16
+; RV32IA-TSO-NOZACAS-NEXT: sll a3, a3, a0
+; RV32IA-TSO-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-TSO-NOZACAS-NEXT: sub a4, a4, a5
+; RV32IA-TSO-NOZACAS-NEXT: .LBB111_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-NOZACAS-NEXT: lr.w a5, (a2)
+; RV32IA-TSO-NOZACAS-NEXT: and a7, a5, a3
+; RV32IA-TSO-NOZACAS-NEXT: mv a6, a5
+; RV32IA-TSO-NOZACAS-NEXT: sll a7, a7, a4
+; RV32IA-TSO-NOZACAS-NEXT: sra a7, a7, a4
+; RV32IA-TSO-NOZACAS-NEXT: bge a7, a1, .LBB111_3
+; RV32IA-TSO-NOZACAS-NEXT: # %bb.2: # in Loop: Header=BB111_1 Depth=1
+; RV32IA-TSO-NOZACAS-NEXT: xor a6, a5, a1
+; RV32IA-TSO-NOZACAS-NEXT: and a6, a6, a3
+; RV32IA-TSO-NOZACAS-NEXT: xor a6, a5, a6
+; RV32IA-TSO-NOZACAS-NEXT: .LBB111_3: # in Loop: Header=BB111_1 Depth=1
+; RV32IA-TSO-NOZACAS-NEXT: sc.w a6, a6, (a2)
+; RV32IA-TSO-NOZACAS-NEXT: bnez a6, .LBB111_1
+; RV32IA-TSO-NOZACAS-NEXT: # %bb.4:
+; RV32IA-TSO-NOZACAS-NEXT: srl a0, a5, a0
+; RV32IA-TSO-NOZACAS-NEXT: ret
;
; RV64I-LABEL: atomicrmw_max_i16_acquire:
; RV64I: # %bb.0:
@@ -15773,6 +20808,68 @@ define i16 @atomicrmw_max_i16_acquire(ptr %a, i16 %b) nounwind {
; RV64IA-TSO-NOZACAS-NEXT: srlw a0, a5, a0
; RV64IA-TSO-NOZACAS-NEXT: ret
;
+; RV32IA-WMO-ZACAS-LABEL: atomicrmw_max_i16_acquire:
+; RV32IA-WMO-ZACAS: # %bb.0:
+; RV32IA-WMO-ZACAS-NEXT: andi a2, a0, -4
+; RV32IA-WMO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-ZACAS-NEXT: lui a3, 16
+; RV32IA-WMO-ZACAS-NEXT: slli a1, a1, 16
+; RV32IA-WMO-ZACAS-NEXT: li a4, 16
+; RV32IA-WMO-ZACAS-NEXT: andi a5, a0, 24
+; RV32IA-WMO-ZACAS-NEXT: addi a3, a3, -1
+; RV32IA-WMO-ZACAS-NEXT: srai a1, a1, 16
+; RV32IA-WMO-ZACAS-NEXT: sll a3, a3, a0
+; RV32IA-WMO-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-WMO-ZACAS-NEXT: sub a4, a4, a5
+; RV32IA-WMO-ZACAS-NEXT: .LBB111_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-ZACAS-NEXT: lr.w.aq a5, (a2)
+; RV32IA-WMO-ZACAS-NEXT: and a7, a5, a3
+; RV32IA-WMO-ZACAS-NEXT: mv a6, a5
+; RV32IA-WMO-ZACAS-NEXT: sll a7, a7, a4
+; RV32IA-WMO-ZACAS-NEXT: sra a7, a7, a4
+; RV32IA-WMO-ZACAS-NEXT: bge a7, a1, .LBB111_3
+; RV32IA-WMO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB111_1 Depth=1
+; RV32IA-WMO-ZACAS-NEXT: xor a6, a5, a1
+; RV32IA-WMO-ZACAS-NEXT: and a6, a6, a3
+; RV32IA-WMO-ZACAS-NEXT: xor a6, a5, a6
+; RV32IA-WMO-ZACAS-NEXT: .LBB111_3: # in Loop: Header=BB111_1 Depth=1
+; RV32IA-WMO-ZACAS-NEXT: sc.w a6, a6, (a2)
+; RV32IA-WMO-ZACAS-NEXT: bnez a6, .LBB111_1
+; RV32IA-WMO-ZACAS-NEXT: # %bb.4:
+; RV32IA-WMO-ZACAS-NEXT: srl a0, a5, a0
+; RV32IA-WMO-ZACAS-NEXT: ret
+;
+; RV32IA-TSO-ZACAS-LABEL: atomicrmw_max_i16_acquire:
+; RV32IA-TSO-ZACAS: # %bb.0:
+; RV32IA-TSO-ZACAS-NEXT: andi a2, a0, -4
+; RV32IA-TSO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-ZACAS-NEXT: lui a3, 16
+; RV32IA-TSO-ZACAS-NEXT: slli a1, a1, 16
+; RV32IA-TSO-ZACAS-NEXT: li a4, 16
+; RV32IA-TSO-ZACAS-NEXT: andi a5, a0, 24
+; RV32IA-TSO-ZACAS-NEXT: addi a3, a3, -1
+; RV32IA-TSO-ZACAS-NEXT: srai a1, a1, 16
+; RV32IA-TSO-ZACAS-NEXT: sll a3, a3, a0
+; RV32IA-TSO-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-TSO-ZACAS-NEXT: sub a4, a4, a5
+; RV32IA-TSO-ZACAS-NEXT: .LBB111_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-ZACAS-NEXT: lr.w a5, (a2)
+; RV32IA-TSO-ZACAS-NEXT: and a7, a5, a3
+; RV32IA-TSO-ZACAS-NEXT: mv a6, a5
+; RV32IA-TSO-ZACAS-NEXT: sll a7, a7, a4
+; RV32IA-TSO-ZACAS-NEXT: sra a7, a7, a4
+; RV32IA-TSO-ZACAS-NEXT: bge a7, a1, .LBB111_3
+; RV32IA-TSO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB111_1 Depth=1
+; RV32IA-TSO-ZACAS-NEXT: xor a6, a5, a1
+; RV32IA-TSO-ZACAS-NEXT: and a6, a6, a3
+; RV32IA-TSO-ZACAS-NEXT: xor a6, a5, a6
+; RV32IA-TSO-ZACAS-NEXT: .LBB111_3: # in Loop: Header=BB111_1 Depth=1
+; RV32IA-TSO-ZACAS-NEXT: sc.w a6, a6, (a2)
+; RV32IA-TSO-ZACAS-NEXT: bnez a6, .LBB111_1
+; RV32IA-TSO-ZACAS-NEXT: # %bb.4:
+; RV32IA-TSO-ZACAS-NEXT: srl a0, a5, a0
+; RV32IA-TSO-ZACAS-NEXT: ret
+;
; RV64IA-WMO-ZACAS-LABEL: atomicrmw_max_i16_acquire:
; RV64IA-WMO-ZACAS: # %bb.0:
; RV64IA-WMO-ZACAS-NEXT: andi a2, a0, -4
@@ -15835,6 +20932,16 @@ define i16 @atomicrmw_max_i16_acquire(ptr %a, i16 %b) nounwind {
; RV64IA-TSO-ZACAS-NEXT: srlw a0, a5, a0
; RV64IA-TSO-ZACAS-NEXT: ret
;
+; RV32IA-WMO-ZABHA-LABEL: atomicrmw_max_i16_acquire:
+; RV32IA-WMO-ZABHA: # %bb.0:
+; RV32IA-WMO-ZABHA-NEXT: amomax.h.aq a0, a1, (a0)
+; RV32IA-WMO-ZABHA-NEXT: ret
+;
+; RV32IA-TSO-ZABHA-LABEL: atomicrmw_max_i16_acquire:
+; RV32IA-TSO-ZABHA: # %bb.0:
+; RV32IA-TSO-ZABHA-NEXT: amomax.h a0, a1, (a0)
+; RV32IA-TSO-ZABHA-NEXT: ret
+;
; RV64IA-WMO-ZABHA-LABEL: atomicrmw_max_i16_acquire:
; RV64IA-WMO-ZABHA: # %bb.0:
; RV64IA-WMO-ZABHA-NEXT: amomax.h.aq a0, a1, (a0)
@@ -15891,67 +20998,67 @@ define i16 @atomicrmw_max_i16_release(ptr %a, i16 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 32
; RV32I-NEXT: ret
;
-; RV32IA-WMO-LABEL: atomicrmw_max_i16_release:
-; RV32IA-WMO: # %bb.0:
-; RV32IA-WMO-NEXT: andi a2, a0, -4
-; RV32IA-WMO-NEXT: slli a0, a0, 3
-; RV32IA-WMO-NEXT: lui a3, 16
-; RV32IA-WMO-NEXT: slli a1, a1, 16
-; RV32IA-WMO-NEXT: li a4, 16
-; RV32IA-WMO-NEXT: andi a5, a0, 24
-; RV32IA-WMO-NEXT: addi a3, a3, -1
-; RV32IA-WMO-NEXT: srai a1, a1, 16
-; RV32IA-WMO-NEXT: sll a3, a3, a0
-; RV32IA-WMO-NEXT: sll a1, a1, a0
-; RV32IA-WMO-NEXT: sub a4, a4, a5
-; RV32IA-WMO-NEXT: .LBB112_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-WMO-NEXT: lr.w a5, (a2)
-; RV32IA-WMO-NEXT: and a7, a5, a3
-; RV32IA-WMO-NEXT: mv a6, a5
-; RV32IA-WMO-NEXT: sll a7, a7, a4
-; RV32IA-WMO-NEXT: sra a7, a7, a4
-; RV32IA-WMO-NEXT: bge a7, a1, .LBB112_3
-; RV32IA-WMO-NEXT: # %bb.2: # in Loop: Header=BB112_1 Depth=1
-; RV32IA-WMO-NEXT: xor a6, a5, a1
-; RV32IA-WMO-NEXT: and a6, a6, a3
-; RV32IA-WMO-NEXT: xor a6, a5, a6
-; RV32IA-WMO-NEXT: .LBB112_3: # in Loop: Header=BB112_1 Depth=1
-; RV32IA-WMO-NEXT: sc.w.rl a6, a6, (a2)
-; RV32IA-WMO-NEXT: bnez a6, .LBB112_1
-; RV32IA-WMO-NEXT: # %bb.4:
-; RV32IA-WMO-NEXT: srl a0, a5, a0
-; RV32IA-WMO-NEXT: ret
+; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_max_i16_release:
+; RV32IA-WMO-NOZACAS: # %bb.0:
+; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-WMO-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-NOZACAS-NEXT: lui a3, 16
+; RV32IA-WMO-NOZACAS-NEXT: slli a1, a1, 16
+; RV32IA-WMO-NOZACAS-NEXT: li a4, 16
+; RV32IA-WMO-NOZACAS-NEXT: andi a5, a0, 24
+; RV32IA-WMO-NOZACAS-NEXT: addi a3, a3, -1
+; RV32IA-WMO-NOZACAS-NEXT: srai a1, a1, 16
+; RV32IA-WMO-NOZACAS-NEXT: sll a3, a3, a0
+; RV32IA-WMO-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-WMO-NOZACAS-NEXT: sub a4, a4, a5
+; RV32IA-WMO-NOZACAS-NEXT: .LBB112_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-NOZACAS-NEXT: lr.w a5, (a2)
+; RV32IA-WMO-NOZACAS-NEXT: and a7, a5, a3
+; RV32IA-WMO-NOZACAS-NEXT: mv a6, a5
+; RV32IA-WMO-NOZACAS-NEXT: sll a7, a7, a4
+; RV32IA-WMO-NOZACAS-NEXT: sra a7, a7, a4
+; RV32IA-WMO-NOZACAS-NEXT: bge a7, a1, .LBB112_3
+; RV32IA-WMO-NOZACAS-NEXT: # %bb.2: # in Loop: Header=BB112_1 Depth=1
+; RV32IA-WMO-NOZACAS-NEXT: xor a6, a5, a1
+; RV32IA-WMO-NOZACAS-NEXT: and a6, a6, a3
+; RV32IA-WMO-NOZACAS-NEXT: xor a6, a5, a6
+; RV32IA-WMO-NOZACAS-NEXT: .LBB112_3: # in Loop: Header=BB112_1 Depth=1
+; RV32IA-WMO-NOZACAS-NEXT: sc.w.rl a6, a6, (a2)
+; RV32IA-WMO-NOZACAS-NEXT: bnez a6, .LBB112_1
+; RV32IA-WMO-NOZACAS-NEXT: # %bb.4:
+; RV32IA-WMO-NOZACAS-NEXT: srl a0, a5, a0
+; RV32IA-WMO-NOZACAS-NEXT: ret
;
-; RV32IA-TSO-LABEL: atomicrmw_max_i16_release:
-; RV32IA-TSO: # %bb.0:
-; RV32IA-TSO-NEXT: andi a2, a0, -4
-; RV32IA-TSO-NEXT: slli a0, a0, 3
-; RV32IA-TSO-NEXT: lui a3, 16
-; RV32IA-TSO-NEXT: slli a1, a1, 16
-; RV32IA-TSO-NEXT: li a4, 16
-; RV32IA-TSO-NEXT: andi a5, a0, 24
-; RV32IA-TSO-NEXT: addi a3, a3, -1
-; RV32IA-TSO-NEXT: srai a1, a1, 16
-; RV32IA-TSO-NEXT: sll a3, a3, a0
-; RV32IA-TSO-NEXT: sll a1, a1, a0
-; RV32IA-TSO-NEXT: sub a4, a4, a5
-; RV32IA-TSO-NEXT: .LBB112_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-TSO-NEXT: lr.w a5, (a2)
-; RV32IA-TSO-NEXT: and a7, a5, a3
-; RV32IA-TSO-NEXT: mv a6, a5
-; RV32IA-TSO-NEXT: sll a7, a7, a4
-; RV32IA-TSO-NEXT: sra a7, a7, a4
-; RV32IA-TSO-NEXT: bge a7, a1, .LBB112_3
-; RV32IA-TSO-NEXT: # %bb.2: # in Loop: Header=BB112_1 Depth=1
-; RV32IA-TSO-NEXT: xor a6, a5, a1
-; RV32IA-TSO-NEXT: and a6, a6, a3
-; RV32IA-TSO-NEXT: xor a6, a5, a6
-; RV32IA-TSO-NEXT: .LBB112_3: # in Loop: Header=BB112_1 Depth=1
-; RV32IA-TSO-NEXT: sc.w a6, a6, (a2)
-; RV32IA-TSO-NEXT: bnez a6, .LBB112_1
-; RV32IA-TSO-NEXT: # %bb.4:
-; RV32IA-TSO-NEXT: srl a0, a5, a0
-; RV32IA-TSO-NEXT: ret
+; RV32IA-TSO-NOZACAS-LABEL: atomicrmw_max_i16_release:
+; RV32IA-TSO-NOZACAS: # %bb.0:
+; RV32IA-TSO-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-TSO-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-NOZACAS-NEXT: lui a3, 16
+; RV32IA-TSO-NOZACAS-NEXT: slli a1, a1, 16
+; RV32IA-TSO-NOZACAS-NEXT: li a4, 16
+; RV32IA-TSO-NOZACAS-NEXT: andi a5, a0, 24
+; RV32IA-TSO-NOZACAS-NEXT: addi a3, a3, -1
+; RV32IA-TSO-NOZACAS-NEXT: srai a1, a1, 16
+; RV32IA-TSO-NOZACAS-NEXT: sll a3, a3, a0
+; RV32IA-TSO-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-TSO-NOZACAS-NEXT: sub a4, a4, a5
+; RV32IA-TSO-NOZACAS-NEXT: .LBB112_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-NOZACAS-NEXT: lr.w a5, (a2)
+; RV32IA-TSO-NOZACAS-NEXT: and a7, a5, a3
+; RV32IA-TSO-NOZACAS-NEXT: mv a6, a5
+; RV32IA-TSO-NOZACAS-NEXT: sll a7, a7, a4
+; RV32IA-TSO-NOZACAS-NEXT: sra a7, a7, a4
+; RV32IA-TSO-NOZACAS-NEXT: bge a7, a1, .LBB112_3
+; RV32IA-TSO-NOZACAS-NEXT: # %bb.2: # in Loop: Header=BB112_1 Depth=1
+; RV32IA-TSO-NOZACAS-NEXT: xor a6, a5, a1
+; RV32IA-TSO-NOZACAS-NEXT: and a6, a6, a3
+; RV32IA-TSO-NOZACAS-NEXT: xor a6, a5, a6
+; RV32IA-TSO-NOZACAS-NEXT: .LBB112_3: # in Loop: Header=BB112_1 Depth=1
+; RV32IA-TSO-NOZACAS-NEXT: sc.w a6, a6, (a2)
+; RV32IA-TSO-NOZACAS-NEXT: bnez a6, .LBB112_1
+; RV32IA-TSO-NOZACAS-NEXT: # %bb.4:
+; RV32IA-TSO-NOZACAS-NEXT: srl a0, a5, a0
+; RV32IA-TSO-NOZACAS-NEXT: ret
;
; RV64I-LABEL: atomicrmw_max_i16_release:
; RV64I: # %bb.0:
@@ -16057,6 +21164,68 @@ define i16 @atomicrmw_max_i16_release(ptr %a, i16 %b) nounwind {
; RV64IA-TSO-NOZACAS-NEXT: srlw a0, a5, a0
; RV64IA-TSO-NOZACAS-NEXT: ret
;
+; RV32IA-WMO-ZACAS-LABEL: atomicrmw_max_i16_release:
+; RV32IA-WMO-ZACAS: # %bb.0:
+; RV32IA-WMO-ZACAS-NEXT: andi a2, a0, -4
+; RV32IA-WMO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-ZACAS-NEXT: lui a3, 16
+; RV32IA-WMO-ZACAS-NEXT: slli a1, a1, 16
+; RV32IA-WMO-ZACAS-NEXT: li a4, 16
+; RV32IA-WMO-ZACAS-NEXT: andi a5, a0, 24
+; RV32IA-WMO-ZACAS-NEXT: addi a3, a3, -1
+; RV32IA-WMO-ZACAS-NEXT: srai a1, a1, 16
+; RV32IA-WMO-ZACAS-NEXT: sll a3, a3, a0
+; RV32IA-WMO-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-WMO-ZACAS-NEXT: sub a4, a4, a5
+; RV32IA-WMO-ZACAS-NEXT: .LBB112_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-ZACAS-NEXT: lr.w a5, (a2)
+; RV32IA-WMO-ZACAS-NEXT: and a7, a5, a3
+; RV32IA-WMO-ZACAS-NEXT: mv a6, a5
+; RV32IA-WMO-ZACAS-NEXT: sll a7, a7, a4
+; RV32IA-WMO-ZACAS-NEXT: sra a7, a7, a4
+; RV32IA-WMO-ZACAS-NEXT: bge a7, a1, .LBB112_3
+; RV32IA-WMO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB112_1 Depth=1
+; RV32IA-WMO-ZACAS-NEXT: xor a6, a5, a1
+; RV32IA-WMO-ZACAS-NEXT: and a6, a6, a3
+; RV32IA-WMO-ZACAS-NEXT: xor a6, a5, a6
+; RV32IA-WMO-ZACAS-NEXT: .LBB112_3: # in Loop: Header=BB112_1 Depth=1
+; RV32IA-WMO-ZACAS-NEXT: sc.w.rl a6, a6, (a2)
+; RV32IA-WMO-ZACAS-NEXT: bnez a6, .LBB112_1
+; RV32IA-WMO-ZACAS-NEXT: # %bb.4:
+; RV32IA-WMO-ZACAS-NEXT: srl a0, a5, a0
+; RV32IA-WMO-ZACAS-NEXT: ret
+;
+; RV32IA-TSO-ZACAS-LABEL: atomicrmw_max_i16_release:
+; RV32IA-TSO-ZACAS: # %bb.0:
+; RV32IA-TSO-ZACAS-NEXT: andi a2, a0, -4
+; RV32IA-TSO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-ZACAS-NEXT: lui a3, 16
+; RV32IA-TSO-ZACAS-NEXT: slli a1, a1, 16
+; RV32IA-TSO-ZACAS-NEXT: li a4, 16
+; RV32IA-TSO-ZACAS-NEXT: andi a5, a0, 24
+; RV32IA-TSO-ZACAS-NEXT: addi a3, a3, -1
+; RV32IA-TSO-ZACAS-NEXT: srai a1, a1, 16
+; RV32IA-TSO-ZACAS-NEXT: sll a3, a3, a0
+; RV32IA-TSO-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-TSO-ZACAS-NEXT: sub a4, a4, a5
+; RV32IA-TSO-ZACAS-NEXT: .LBB112_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-ZACAS-NEXT: lr.w a5, (a2)
+; RV32IA-TSO-ZACAS-NEXT: and a7, a5, a3
+; RV32IA-TSO-ZACAS-NEXT: mv a6, a5
+; RV32IA-TSO-ZACAS-NEXT: sll a7, a7, a4
+; RV32IA-TSO-ZACAS-NEXT: sra a7, a7, a4
+; RV32IA-TSO-ZACAS-NEXT: bge a7, a1, .LBB112_3
+; RV32IA-TSO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB112_1 Depth=1
+; RV32IA-TSO-ZACAS-NEXT: xor a6, a5, a1
+; RV32IA-TSO-ZACAS-NEXT: and a6, a6, a3
+; RV32IA-TSO-ZACAS-NEXT: xor a6, a5, a6
+; RV32IA-TSO-ZACAS-NEXT: .LBB112_3: # in Loop: Header=BB112_1 Depth=1
+; RV32IA-TSO-ZACAS-NEXT: sc.w a6, a6, (a2)
+; RV32IA-TSO-ZACAS-NEXT: bnez a6, .LBB112_1
+; RV32IA-TSO-ZACAS-NEXT: # %bb.4:
+; RV32IA-TSO-ZACAS-NEXT: srl a0, a5, a0
+; RV32IA-TSO-ZACAS-NEXT: ret
+;
; RV64IA-WMO-ZACAS-LABEL: atomicrmw_max_i16_release:
; RV64IA-WMO-ZACAS: # %bb.0:
; RV64IA-WMO-ZACAS-NEXT: andi a2, a0, -4
@@ -16119,6 +21288,16 @@ define i16 @atomicrmw_max_i16_release(ptr %a, i16 %b) nounwind {
; RV64IA-TSO-ZACAS-NEXT: srlw a0, a5, a0
; RV64IA-TSO-ZACAS-NEXT: ret
;
+; RV32IA-WMO-ZABHA-LABEL: atomicrmw_max_i16_release:
+; RV32IA-WMO-ZABHA: # %bb.0:
+; RV32IA-WMO-ZABHA-NEXT: amomax.h.rl a0, a1, (a0)
+; RV32IA-WMO-ZABHA-NEXT: ret
+;
+; RV32IA-TSO-ZABHA-LABEL: atomicrmw_max_i16_release:
+; RV32IA-TSO-ZABHA: # %bb.0:
+; RV32IA-TSO-ZABHA-NEXT: amomax.h a0, a1, (a0)
+; RV32IA-TSO-ZABHA-NEXT: ret
+;
; RV64IA-WMO-ZABHA-LABEL: atomicrmw_max_i16_release:
; RV64IA-WMO-ZABHA: # %bb.0:
; RV64IA-WMO-ZABHA-NEXT: amomax.h.rl a0, a1, (a0)
@@ -16175,67 +21354,67 @@ define i16 @atomicrmw_max_i16_acq_rel(ptr %a, i16 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 32
; RV32I-NEXT: ret
;
-; RV32IA-WMO-LABEL: atomicrmw_max_i16_acq_rel:
-; RV32IA-WMO: # %bb.0:
-; RV32IA-WMO-NEXT: andi a2, a0, -4
-; RV32IA-WMO-NEXT: slli a0, a0, 3
-; RV32IA-WMO-NEXT: lui a3, 16
-; RV32IA-WMO-NEXT: slli a1, a1, 16
-; RV32IA-WMO-NEXT: li a4, 16
-; RV32IA-WMO-NEXT: andi a5, a0, 24
-; RV32IA-WMO-NEXT: addi a3, a3, -1
-; RV32IA-WMO-NEXT: srai a1, a1, 16
-; RV32IA-WMO-NEXT: sll a3, a3, a0
-; RV32IA-WMO-NEXT: sll a1, a1, a0
-; RV32IA-WMO-NEXT: sub a4, a4, a5
-; RV32IA-WMO-NEXT: .LBB113_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-WMO-NEXT: lr.w.aq a5, (a2)
-; RV32IA-WMO-NEXT: and a7, a5, a3
-; RV32IA-WMO-NEXT: mv a6, a5
-; RV32IA-WMO-NEXT: sll a7, a7, a4
-; RV32IA-WMO-NEXT: sra a7, a7, a4
-; RV32IA-WMO-NEXT: bge a7, a1, .LBB113_3
-; RV32IA-WMO-NEXT: # %bb.2: # in Loop: Header=BB113_1 Depth=1
-; RV32IA-WMO-NEXT: xor a6, a5, a1
-; RV32IA-WMO-NEXT: and a6, a6, a3
-; RV32IA-WMO-NEXT: xor a6, a5, a6
-; RV32IA-WMO-NEXT: .LBB113_3: # in Loop: Header=BB113_1 Depth=1
-; RV32IA-WMO-NEXT: sc.w.rl a6, a6, (a2)
-; RV32IA-WMO-NEXT: bnez a6, .LBB113_1
-; RV32IA-WMO-NEXT: # %bb.4:
-; RV32IA-WMO-NEXT: srl a0, a5, a0
-; RV32IA-WMO-NEXT: ret
+; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_max_i16_acq_rel:
+; RV32IA-WMO-NOZACAS: # %bb.0:
+; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-WMO-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-NOZACAS-NEXT: lui a3, 16
+; RV32IA-WMO-NOZACAS-NEXT: slli a1, a1, 16
+; RV32IA-WMO-NOZACAS-NEXT: li a4, 16
+; RV32IA-WMO-NOZACAS-NEXT: andi a5, a0, 24
+; RV32IA-WMO-NOZACAS-NEXT: addi a3, a3, -1
+; RV32IA-WMO-NOZACAS-NEXT: srai a1, a1, 16
+; RV32IA-WMO-NOZACAS-NEXT: sll a3, a3, a0
+; RV32IA-WMO-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-WMO-NOZACAS-NEXT: sub a4, a4, a5
+; RV32IA-WMO-NOZACAS-NEXT: .LBB113_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-NOZACAS-NEXT: lr.w.aq a5, (a2)
+; RV32IA-WMO-NOZACAS-NEXT: and a7, a5, a3
+; RV32IA-WMO-NOZACAS-NEXT: mv a6, a5
+; RV32IA-WMO-NOZACAS-NEXT: sll a7, a7, a4
+; RV32IA-WMO-NOZACAS-NEXT: sra a7, a7, a4
+; RV32IA-WMO-NOZACAS-NEXT: bge a7, a1, .LBB113_3
+; RV32IA-WMO-NOZACAS-NEXT: # %bb.2: # in Loop: Header=BB113_1 Depth=1
+; RV32IA-WMO-NOZACAS-NEXT: xor a6, a5, a1
+; RV32IA-WMO-NOZACAS-NEXT: and a6, a6, a3
+; RV32IA-WMO-NOZACAS-NEXT: xor a6, a5, a6
+; RV32IA-WMO-NOZACAS-NEXT: .LBB113_3: # in Loop: Header=BB113_1 Depth=1
+; RV32IA-WMO-NOZACAS-NEXT: sc.w.rl a6, a6, (a2)
+; RV32IA-WMO-NOZACAS-NEXT: bnez a6, .LBB113_1
+; RV32IA-WMO-NOZACAS-NEXT: # %bb.4:
+; RV32IA-WMO-NOZACAS-NEXT: srl a0, a5, a0
+; RV32IA-WMO-NOZACAS-NEXT: ret
;
-; RV32IA-TSO-LABEL: atomicrmw_max_i16_acq_rel:
-; RV32IA-TSO: # %bb.0:
-; RV32IA-TSO-NEXT: andi a2, a0, -4
-; RV32IA-TSO-NEXT: slli a0, a0, 3
-; RV32IA-TSO-NEXT: lui a3, 16
-; RV32IA-TSO-NEXT: slli a1, a1, 16
-; RV32IA-TSO-NEXT: li a4, 16
-; RV32IA-TSO-NEXT: andi a5, a0, 24
-; RV32IA-TSO-NEXT: addi a3, a3, -1
-; RV32IA-TSO-NEXT: srai a1, a1, 16
-; RV32IA-TSO-NEXT: sll a3, a3, a0
-; RV32IA-TSO-NEXT: sll a1, a1, a0
-; RV32IA-TSO-NEXT: sub a4, a4, a5
-; RV32IA-TSO-NEXT: .LBB113_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-TSO-NEXT: lr.w a5, (a2)
-; RV32IA-TSO-NEXT: and a7, a5, a3
-; RV32IA-TSO-NEXT: mv a6, a5
-; RV32IA-TSO-NEXT: sll a7, a7, a4
-; RV32IA-TSO-NEXT: sra a7, a7, a4
-; RV32IA-TSO-NEXT: bge a7, a1, .LBB113_3
-; RV32IA-TSO-NEXT: # %bb.2: # in Loop: Header=BB113_1 Depth=1
-; RV32IA-TSO-NEXT: xor a6, a5, a1
-; RV32IA-TSO-NEXT: and a6, a6, a3
-; RV32IA-TSO-NEXT: xor a6, a5, a6
-; RV32IA-TSO-NEXT: .LBB113_3: # in Loop: Header=BB113_1 Depth=1
-; RV32IA-TSO-NEXT: sc.w a6, a6, (a2)
-; RV32IA-TSO-NEXT: bnez a6, .LBB113_1
-; RV32IA-TSO-NEXT: # %bb.4:
-; RV32IA-TSO-NEXT: srl a0, a5, a0
-; RV32IA-TSO-NEXT: ret
+; RV32IA-TSO-NOZACAS-LABEL: atomicrmw_max_i16_acq_rel:
+; RV32IA-TSO-NOZACAS: # %bb.0:
+; RV32IA-TSO-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-TSO-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-NOZACAS-NEXT: lui a3, 16
+; RV32IA-TSO-NOZACAS-NEXT: slli a1, a1, 16
+; RV32IA-TSO-NOZACAS-NEXT: li a4, 16
+; RV32IA-TSO-NOZACAS-NEXT: andi a5, a0, 24
+; RV32IA-TSO-NOZACAS-NEXT: addi a3, a3, -1
+; RV32IA-TSO-NOZACAS-NEXT: srai a1, a1, 16
+; RV32IA-TSO-NOZACAS-NEXT: sll a3, a3, a0
+; RV32IA-TSO-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-TSO-NOZACAS-NEXT: sub a4, a4, a5
+; RV32IA-TSO-NOZACAS-NEXT: .LBB113_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-NOZACAS-NEXT: lr.w a5, (a2)
+; RV32IA-TSO-NOZACAS-NEXT: and a7, a5, a3
+; RV32IA-TSO-NOZACAS-NEXT: mv a6, a5
+; RV32IA-TSO-NOZACAS-NEXT: sll a7, a7, a4
+; RV32IA-TSO-NOZACAS-NEXT: sra a7, a7, a4
+; RV32IA-TSO-NOZACAS-NEXT: bge a7, a1, .LBB113_3
+; RV32IA-TSO-NOZACAS-NEXT: # %bb.2: # in Loop: Header=BB113_1 Depth=1
+; RV32IA-TSO-NOZACAS-NEXT: xor a6, a5, a1
+; RV32IA-TSO-NOZACAS-NEXT: and a6, a6, a3
+; RV32IA-TSO-NOZACAS-NEXT: xor a6, a5, a6
+; RV32IA-TSO-NOZACAS-NEXT: .LBB113_3: # in Loop: Header=BB113_1 Depth=1
+; RV32IA-TSO-NOZACAS-NEXT: sc.w a6, a6, (a2)
+; RV32IA-TSO-NOZACAS-NEXT: bnez a6, .LBB113_1
+; RV32IA-TSO-NOZACAS-NEXT: # %bb.4:
+; RV32IA-TSO-NOZACAS-NEXT: srl a0, a5, a0
+; RV32IA-TSO-NOZACAS-NEXT: ret
;
; RV64I-LABEL: atomicrmw_max_i16_acq_rel:
; RV64I: # %bb.0:
@@ -16341,6 +21520,68 @@ define i16 @atomicrmw_max_i16_acq_rel(ptr %a, i16 %b) nounwind {
; RV64IA-TSO-NOZACAS-NEXT: srlw a0, a5, a0
; RV64IA-TSO-NOZACAS-NEXT: ret
;
+; RV32IA-WMO-ZACAS-LABEL: atomicrmw_max_i16_acq_rel:
+; RV32IA-WMO-ZACAS: # %bb.0:
+; RV32IA-WMO-ZACAS-NEXT: andi a2, a0, -4
+; RV32IA-WMO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-ZACAS-NEXT: lui a3, 16
+; RV32IA-WMO-ZACAS-NEXT: slli a1, a1, 16
+; RV32IA-WMO-ZACAS-NEXT: li a4, 16
+; RV32IA-WMO-ZACAS-NEXT: andi a5, a0, 24
+; RV32IA-WMO-ZACAS-NEXT: addi a3, a3, -1
+; RV32IA-WMO-ZACAS-NEXT: srai a1, a1, 16
+; RV32IA-WMO-ZACAS-NEXT: sll a3, a3, a0
+; RV32IA-WMO-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-WMO-ZACAS-NEXT: sub a4, a4, a5
+; RV32IA-WMO-ZACAS-NEXT: .LBB113_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-ZACAS-NEXT: lr.w.aq a5, (a2)
+; RV32IA-WMO-ZACAS-NEXT: and a7, a5, a3
+; RV32IA-WMO-ZACAS-NEXT: mv a6, a5
+; RV32IA-WMO-ZACAS-NEXT: sll a7, a7, a4
+; RV32IA-WMO-ZACAS-NEXT: sra a7, a7, a4
+; RV32IA-WMO-ZACAS-NEXT: bge a7, a1, .LBB113_3
+; RV32IA-WMO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB113_1 Depth=1
+; RV32IA-WMO-ZACAS-NEXT: xor a6, a5, a1
+; RV32IA-WMO-ZACAS-NEXT: and a6, a6, a3
+; RV32IA-WMO-ZACAS-NEXT: xor a6, a5, a6
+; RV32IA-WMO-ZACAS-NEXT: .LBB113_3: # in Loop: Header=BB113_1 Depth=1
+; RV32IA-WMO-ZACAS-NEXT: sc.w.rl a6, a6, (a2)
+; RV32IA-WMO-ZACAS-NEXT: bnez a6, .LBB113_1
+; RV32IA-WMO-ZACAS-NEXT: # %bb.4:
+; RV32IA-WMO-ZACAS-NEXT: srl a0, a5, a0
+; RV32IA-WMO-ZACAS-NEXT: ret
+;
+; RV32IA-TSO-ZACAS-LABEL: atomicrmw_max_i16_acq_rel:
+; RV32IA-TSO-ZACAS: # %bb.0:
+; RV32IA-TSO-ZACAS-NEXT: andi a2, a0, -4
+; RV32IA-TSO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-ZACAS-NEXT: lui a3, 16
+; RV32IA-TSO-ZACAS-NEXT: slli a1, a1, 16
+; RV32IA-TSO-ZACAS-NEXT: li a4, 16
+; RV32IA-TSO-ZACAS-NEXT: andi a5, a0, 24
+; RV32IA-TSO-ZACAS-NEXT: addi a3, a3, -1
+; RV32IA-TSO-ZACAS-NEXT: srai a1, a1, 16
+; RV32IA-TSO-ZACAS-NEXT: sll a3, a3, a0
+; RV32IA-TSO-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-TSO-ZACAS-NEXT: sub a4, a4, a5
+; RV32IA-TSO-ZACAS-NEXT: .LBB113_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-ZACAS-NEXT: lr.w a5, (a2)
+; RV32IA-TSO-ZACAS-NEXT: and a7, a5, a3
+; RV32IA-TSO-ZACAS-NEXT: mv a6, a5
+; RV32IA-TSO-ZACAS-NEXT: sll a7, a7, a4
+; RV32IA-TSO-ZACAS-NEXT: sra a7, a7, a4
+; RV32IA-TSO-ZACAS-NEXT: bge a7, a1, .LBB113_3
+; RV32IA-TSO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB113_1 Depth=1
+; RV32IA-TSO-ZACAS-NEXT: xor a6, a5, a1
+; RV32IA-TSO-ZACAS-NEXT: and a6, a6, a3
+; RV32IA-TSO-ZACAS-NEXT: xor a6, a5, a6
+; RV32IA-TSO-ZACAS-NEXT: .LBB113_3: # in Loop: Header=BB113_1 Depth=1
+; RV32IA-TSO-ZACAS-NEXT: sc.w a6, a6, (a2)
+; RV32IA-TSO-ZACAS-NEXT: bnez a6, .LBB113_1
+; RV32IA-TSO-ZACAS-NEXT: # %bb.4:
+; RV32IA-TSO-ZACAS-NEXT: srl a0, a5, a0
+; RV32IA-TSO-ZACAS-NEXT: ret
+;
; RV64IA-WMO-ZACAS-LABEL: atomicrmw_max_i16_acq_rel:
; RV64IA-WMO-ZACAS: # %bb.0:
; RV64IA-WMO-ZACAS-NEXT: andi a2, a0, -4
@@ -16403,6 +21644,16 @@ define i16 @atomicrmw_max_i16_acq_rel(ptr %a, i16 %b) nounwind {
; RV64IA-TSO-ZACAS-NEXT: srlw a0, a5, a0
; RV64IA-TSO-ZACAS-NEXT: ret
;
+; RV32IA-WMO-ZABHA-LABEL: atomicrmw_max_i16_acq_rel:
+; RV32IA-WMO-ZABHA: # %bb.0:
+; RV32IA-WMO-ZABHA-NEXT: amomax.h.aqrl a0, a1, (a0)
+; RV32IA-WMO-ZABHA-NEXT: ret
+;
+; RV32IA-TSO-ZABHA-LABEL: atomicrmw_max_i16_acq_rel:
+; RV32IA-TSO-ZABHA: # %bb.0:
+; RV32IA-TSO-ZABHA-NEXT: amomax.h a0, a1, (a0)
+; RV32IA-TSO-ZABHA-NEXT: ret
+;
; RV64IA-WMO-ZABHA-LABEL: atomicrmw_max_i16_acq_rel:
; RV64IA-WMO-ZABHA: # %bb.0:
; RV64IA-WMO-ZABHA-NEXT: amomax.h.aqrl a0, a1, (a0)
@@ -16459,36 +21710,36 @@ define i16 @atomicrmw_max_i16_seq_cst(ptr %a, i16 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 32
; RV32I-NEXT: ret
;
-; RV32IA-LABEL: atomicrmw_max_i16_seq_cst:
-; RV32IA: # %bb.0:
-; RV32IA-NEXT: andi a2, a0, -4
-; RV32IA-NEXT: slli a0, a0, 3
-; RV32IA-NEXT: lui a3, 16
-; RV32IA-NEXT: slli a1, a1, 16
-; RV32IA-NEXT: li a4, 16
-; RV32IA-NEXT: andi a5, a0, 24
-; RV32IA-NEXT: addi a3, a3, -1
-; RV32IA-NEXT: srai a1, a1, 16
-; RV32IA-NEXT: sll a3, a3, a0
-; RV32IA-NEXT: sll a1, a1, a0
-; RV32IA-NEXT: sub a4, a4, a5
-; RV32IA-NEXT: .LBB114_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-NEXT: lr.w.aqrl a5, (a2)
-; RV32IA-NEXT: and a7, a5, a3
-; RV32IA-NEXT: mv a6, a5
-; RV32IA-NEXT: sll a7, a7, a4
-; RV32IA-NEXT: sra a7, a7, a4
-; RV32IA-NEXT: bge a7, a1, .LBB114_3
-; RV32IA-NEXT: # %bb.2: # in Loop: Header=BB114_1 Depth=1
-; RV32IA-NEXT: xor a6, a5, a1
-; RV32IA-NEXT: and a6, a6, a3
-; RV32IA-NEXT: xor a6, a5, a6
-; RV32IA-NEXT: .LBB114_3: # in Loop: Header=BB114_1 Depth=1
-; RV32IA-NEXT: sc.w.rl a6, a6, (a2)
-; RV32IA-NEXT: bnez a6, .LBB114_1
-; RV32IA-NEXT: # %bb.4:
-; RV32IA-NEXT: srl a0, a5, a0
-; RV32IA-NEXT: ret
+; RV32IA-NOZACAS-LABEL: atomicrmw_max_i16_seq_cst:
+; RV32IA-NOZACAS: # %bb.0:
+; RV32IA-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-NOZACAS-NEXT: lui a3, 16
+; RV32IA-NOZACAS-NEXT: slli a1, a1, 16
+; RV32IA-NOZACAS-NEXT: li a4, 16
+; RV32IA-NOZACAS-NEXT: andi a5, a0, 24
+; RV32IA-NOZACAS-NEXT: addi a3, a3, -1
+; RV32IA-NOZACAS-NEXT: srai a1, a1, 16
+; RV32IA-NOZACAS-NEXT: sll a3, a3, a0
+; RV32IA-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-NOZACAS-NEXT: sub a4, a4, a5
+; RV32IA-NOZACAS-NEXT: .LBB114_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-NOZACAS-NEXT: lr.w.aqrl a5, (a2)
+; RV32IA-NOZACAS-NEXT: and a7, a5, a3
+; RV32IA-NOZACAS-NEXT: mv a6, a5
+; RV32IA-NOZACAS-NEXT: sll a7, a7, a4
+; RV32IA-NOZACAS-NEXT: sra a7, a7, a4
+; RV32IA-NOZACAS-NEXT: bge a7, a1, .LBB114_3
+; RV32IA-NOZACAS-NEXT: # %bb.2: # in Loop: Header=BB114_1 Depth=1
+; RV32IA-NOZACAS-NEXT: xor a6, a5, a1
+; RV32IA-NOZACAS-NEXT: and a6, a6, a3
+; RV32IA-NOZACAS-NEXT: xor a6, a5, a6
+; RV32IA-NOZACAS-NEXT: .LBB114_3: # in Loop: Header=BB114_1 Depth=1
+; RV32IA-NOZACAS-NEXT: sc.w.rl a6, a6, (a2)
+; RV32IA-NOZACAS-NEXT: bnez a6, .LBB114_1
+; RV32IA-NOZACAS-NEXT: # %bb.4:
+; RV32IA-NOZACAS-NEXT: srl a0, a5, a0
+; RV32IA-NOZACAS-NEXT: ret
;
; RV64I-LABEL: atomicrmw_max_i16_seq_cst:
; RV64I: # %bb.0:
@@ -16563,6 +21814,37 @@ define i16 @atomicrmw_max_i16_seq_cst(ptr %a, i16 %b) nounwind {
; RV64IA-NOZACAS-NEXT: srlw a0, a5, a0
; RV64IA-NOZACAS-NEXT: ret
;
+; RV32IA-ZACAS-LABEL: atomicrmw_max_i16_seq_cst:
+; RV32IA-ZACAS: # %bb.0:
+; RV32IA-ZACAS-NEXT: andi a2, a0, -4
+; RV32IA-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-ZACAS-NEXT: lui a3, 16
+; RV32IA-ZACAS-NEXT: slli a1, a1, 16
+; RV32IA-ZACAS-NEXT: li a4, 16
+; RV32IA-ZACAS-NEXT: andi a5, a0, 24
+; RV32IA-ZACAS-NEXT: addi a3, a3, -1
+; RV32IA-ZACAS-NEXT: srai a1, a1, 16
+; RV32IA-ZACAS-NEXT: sll a3, a3, a0
+; RV32IA-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-ZACAS-NEXT: sub a4, a4, a5
+; RV32IA-ZACAS-NEXT: .LBB114_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-ZACAS-NEXT: lr.w.aqrl a5, (a2)
+; RV32IA-ZACAS-NEXT: and a7, a5, a3
+; RV32IA-ZACAS-NEXT: mv a6, a5
+; RV32IA-ZACAS-NEXT: sll a7, a7, a4
+; RV32IA-ZACAS-NEXT: sra a7, a7, a4
+; RV32IA-ZACAS-NEXT: bge a7, a1, .LBB114_3
+; RV32IA-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB114_1 Depth=1
+; RV32IA-ZACAS-NEXT: xor a6, a5, a1
+; RV32IA-ZACAS-NEXT: and a6, a6, a3
+; RV32IA-ZACAS-NEXT: xor a6, a5, a6
+; RV32IA-ZACAS-NEXT: .LBB114_3: # in Loop: Header=BB114_1 Depth=1
+; RV32IA-ZACAS-NEXT: sc.w.rl a6, a6, (a2)
+; RV32IA-ZACAS-NEXT: bnez a6, .LBB114_1
+; RV32IA-ZACAS-NEXT: # %bb.4:
+; RV32IA-ZACAS-NEXT: srl a0, a5, a0
+; RV32IA-ZACAS-NEXT: ret
+;
; RV64IA-ZACAS-LABEL: atomicrmw_max_i16_seq_cst:
; RV64IA-ZACAS: # %bb.0:
; RV64IA-ZACAS-NEXT: andi a2, a0, -4
@@ -16594,6 +21876,16 @@ define i16 @atomicrmw_max_i16_seq_cst(ptr %a, i16 %b) nounwind {
; RV64IA-ZACAS-NEXT: srlw a0, a5, a0
; RV64IA-ZACAS-NEXT: ret
;
+; RV32IA-WMO-ZABHA-LABEL: atomicrmw_max_i16_seq_cst:
+; RV32IA-WMO-ZABHA: # %bb.0:
+; RV32IA-WMO-ZABHA-NEXT: amomax.h.aqrl a0, a1, (a0)
+; RV32IA-WMO-ZABHA-NEXT: ret
+;
+; RV32IA-TSO-ZABHA-LABEL: atomicrmw_max_i16_seq_cst:
+; RV32IA-TSO-ZABHA: # %bb.0:
+; RV32IA-TSO-ZABHA-NEXT: amomax.h a0, a1, (a0)
+; RV32IA-TSO-ZABHA-NEXT: ret
+;
; RV64IA-WMO-ZABHA-LABEL: atomicrmw_max_i16_seq_cst:
; RV64IA-WMO-ZABHA: # %bb.0:
; RV64IA-WMO-ZABHA-NEXT: amomax.h.aqrl a0, a1, (a0)
@@ -16650,36 +21942,36 @@ define i16 @atomicrmw_min_i16_monotonic(ptr %a, i16 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 32
; RV32I-NEXT: ret
;
-; RV32IA-LABEL: atomicrmw_min_i16_monotonic:
-; RV32IA: # %bb.0:
-; RV32IA-NEXT: andi a2, a0, -4
-; RV32IA-NEXT: slli a0, a0, 3
-; RV32IA-NEXT: lui a3, 16
-; RV32IA-NEXT: slli a1, a1, 16
-; RV32IA-NEXT: li a4, 16
-; RV32IA-NEXT: andi a5, a0, 24
-; RV32IA-NEXT: addi a3, a3, -1
-; RV32IA-NEXT: srai a1, a1, 16
-; RV32IA-NEXT: sll a3, a3, a0
-; RV32IA-NEXT: sll a1, a1, a0
-; RV32IA-NEXT: sub a4, a4, a5
-; RV32IA-NEXT: .LBB115_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-NEXT: lr.w a5, (a2)
-; RV32IA-NEXT: and a7, a5, a3
-; RV32IA-NEXT: mv a6, a5
-; RV32IA-NEXT: sll a7, a7, a4
-; RV32IA-NEXT: sra a7, a7, a4
-; RV32IA-NEXT: bge a1, a7, .LBB115_3
-; RV32IA-NEXT: # %bb.2: # in Loop: Header=BB115_1 Depth=1
-; RV32IA-NEXT: xor a6, a5, a1
-; RV32IA-NEXT: and a6, a6, a3
-; RV32IA-NEXT: xor a6, a5, a6
-; RV32IA-NEXT: .LBB115_3: # in Loop: Header=BB115_1 Depth=1
-; RV32IA-NEXT: sc.w a6, a6, (a2)
-; RV32IA-NEXT: bnez a6, .LBB115_1
-; RV32IA-NEXT: # %bb.4:
-; RV32IA-NEXT: srl a0, a5, a0
-; RV32IA-NEXT: ret
+; RV32IA-NOZACAS-LABEL: atomicrmw_min_i16_monotonic:
+; RV32IA-NOZACAS: # %bb.0:
+; RV32IA-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-NOZACAS-NEXT: lui a3, 16
+; RV32IA-NOZACAS-NEXT: slli a1, a1, 16
+; RV32IA-NOZACAS-NEXT: li a4, 16
+; RV32IA-NOZACAS-NEXT: andi a5, a0, 24
+; RV32IA-NOZACAS-NEXT: addi a3, a3, -1
+; RV32IA-NOZACAS-NEXT: srai a1, a1, 16
+; RV32IA-NOZACAS-NEXT: sll a3, a3, a0
+; RV32IA-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-NOZACAS-NEXT: sub a4, a4, a5
+; RV32IA-NOZACAS-NEXT: .LBB115_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-NOZACAS-NEXT: lr.w a5, (a2)
+; RV32IA-NOZACAS-NEXT: and a7, a5, a3
+; RV32IA-NOZACAS-NEXT: mv a6, a5
+; RV32IA-NOZACAS-NEXT: sll a7, a7, a4
+; RV32IA-NOZACAS-NEXT: sra a7, a7, a4
+; RV32IA-NOZACAS-NEXT: bge a1, a7, .LBB115_3
+; RV32IA-NOZACAS-NEXT: # %bb.2: # in Loop: Header=BB115_1 Depth=1
+; RV32IA-NOZACAS-NEXT: xor a6, a5, a1
+; RV32IA-NOZACAS-NEXT: and a6, a6, a3
+; RV32IA-NOZACAS-NEXT: xor a6, a5, a6
+; RV32IA-NOZACAS-NEXT: .LBB115_3: # in Loop: Header=BB115_1 Depth=1
+; RV32IA-NOZACAS-NEXT: sc.w a6, a6, (a2)
+; RV32IA-NOZACAS-NEXT: bnez a6, .LBB115_1
+; RV32IA-NOZACAS-NEXT: # %bb.4:
+; RV32IA-NOZACAS-NEXT: srl a0, a5, a0
+; RV32IA-NOZACAS-NEXT: ret
;
; RV64I-LABEL: atomicrmw_min_i16_monotonic:
; RV64I: # %bb.0:
@@ -16754,6 +22046,37 @@ define i16 @atomicrmw_min_i16_monotonic(ptr %a, i16 %b) nounwind {
; RV64IA-NOZACAS-NEXT: srlw a0, a5, a0
; RV64IA-NOZACAS-NEXT: ret
;
+; RV32IA-ZACAS-LABEL: atomicrmw_min_i16_monotonic:
+; RV32IA-ZACAS: # %bb.0:
+; RV32IA-ZACAS-NEXT: andi a2, a0, -4
+; RV32IA-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-ZACAS-NEXT: lui a3, 16
+; RV32IA-ZACAS-NEXT: slli a1, a1, 16
+; RV32IA-ZACAS-NEXT: li a4, 16
+; RV32IA-ZACAS-NEXT: andi a5, a0, 24
+; RV32IA-ZACAS-NEXT: addi a3, a3, -1
+; RV32IA-ZACAS-NEXT: srai a1, a1, 16
+; RV32IA-ZACAS-NEXT: sll a3, a3, a0
+; RV32IA-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-ZACAS-NEXT: sub a4, a4, a5
+; RV32IA-ZACAS-NEXT: .LBB115_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-ZACAS-NEXT: lr.w a5, (a2)
+; RV32IA-ZACAS-NEXT: and a7, a5, a3
+; RV32IA-ZACAS-NEXT: mv a6, a5
+; RV32IA-ZACAS-NEXT: sll a7, a7, a4
+; RV32IA-ZACAS-NEXT: sra a7, a7, a4
+; RV32IA-ZACAS-NEXT: bge a1, a7, .LBB115_3
+; RV32IA-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB115_1 Depth=1
+; RV32IA-ZACAS-NEXT: xor a6, a5, a1
+; RV32IA-ZACAS-NEXT: and a6, a6, a3
+; RV32IA-ZACAS-NEXT: xor a6, a5, a6
+; RV32IA-ZACAS-NEXT: .LBB115_3: # in Loop: Header=BB115_1 Depth=1
+; RV32IA-ZACAS-NEXT: sc.w a6, a6, (a2)
+; RV32IA-ZACAS-NEXT: bnez a6, .LBB115_1
+; RV32IA-ZACAS-NEXT: # %bb.4:
+; RV32IA-ZACAS-NEXT: srl a0, a5, a0
+; RV32IA-ZACAS-NEXT: ret
+;
; RV64IA-ZACAS-LABEL: atomicrmw_min_i16_monotonic:
; RV64IA-ZACAS: # %bb.0:
; RV64IA-ZACAS-NEXT: andi a2, a0, -4
@@ -16785,6 +22108,16 @@ define i16 @atomicrmw_min_i16_monotonic(ptr %a, i16 %b) nounwind {
; RV64IA-ZACAS-NEXT: srlw a0, a5, a0
; RV64IA-ZACAS-NEXT: ret
;
+; RV32IA-WMO-ZABHA-LABEL: atomicrmw_min_i16_monotonic:
+; RV32IA-WMO-ZABHA: # %bb.0:
+; RV32IA-WMO-ZABHA-NEXT: amomin.h a0, a1, (a0)
+; RV32IA-WMO-ZABHA-NEXT: ret
+;
+; RV32IA-TSO-ZABHA-LABEL: atomicrmw_min_i16_monotonic:
+; RV32IA-TSO-ZABHA: # %bb.0:
+; RV32IA-TSO-ZABHA-NEXT: amomin.h a0, a1, (a0)
+; RV32IA-TSO-ZABHA-NEXT: ret
+;
; RV64IA-WMO-ZABHA-LABEL: atomicrmw_min_i16_monotonic:
; RV64IA-WMO-ZABHA: # %bb.0:
; RV64IA-WMO-ZABHA-NEXT: amomin.h a0, a1, (a0)
@@ -16841,67 +22174,67 @@ define i16 @atomicrmw_min_i16_acquire(ptr %a, i16 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 32
; RV32I-NEXT: ret
;
-; RV32IA-WMO-LABEL: atomicrmw_min_i16_acquire:
-; RV32IA-WMO: # %bb.0:
-; RV32IA-WMO-NEXT: andi a2, a0, -4
-; RV32IA-WMO-NEXT: slli a0, a0, 3
-; RV32IA-WMO-NEXT: lui a3, 16
-; RV32IA-WMO-NEXT: slli a1, a1, 16
-; RV32IA-WMO-NEXT: li a4, 16
-; RV32IA-WMO-NEXT: andi a5, a0, 24
-; RV32IA-WMO-NEXT: addi a3, a3, -1
-; RV32IA-WMO-NEXT: srai a1, a1, 16
-; RV32IA-WMO-NEXT: sll a3, a3, a0
-; RV32IA-WMO-NEXT: sll a1, a1, a0
-; RV32IA-WMO-NEXT: sub a4, a4, a5
-; RV32IA-WMO-NEXT: .LBB116_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-WMO-NEXT: lr.w.aq a5, (a2)
-; RV32IA-WMO-NEXT: and a7, a5, a3
-; RV32IA-WMO-NEXT: mv a6, a5
-; RV32IA-WMO-NEXT: sll a7, a7, a4
-; RV32IA-WMO-NEXT: sra a7, a7, a4
-; RV32IA-WMO-NEXT: bge a1, a7, .LBB116_3
-; RV32IA-WMO-NEXT: # %bb.2: # in Loop: Header=BB116_1 Depth=1
-; RV32IA-WMO-NEXT: xor a6, a5, a1
-; RV32IA-WMO-NEXT: and a6, a6, a3
-; RV32IA-WMO-NEXT: xor a6, a5, a6
-; RV32IA-WMO-NEXT: .LBB116_3: # in Loop: Header=BB116_1 Depth=1
-; RV32IA-WMO-NEXT: sc.w a6, a6, (a2)
-; RV32IA-WMO-NEXT: bnez a6, .LBB116_1
-; RV32IA-WMO-NEXT: # %bb.4:
-; RV32IA-WMO-NEXT: srl a0, a5, a0
-; RV32IA-WMO-NEXT: ret
+; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_min_i16_acquire:
+; RV32IA-WMO-NOZACAS: # %bb.0:
+; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-WMO-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-NOZACAS-NEXT: lui a3, 16
+; RV32IA-WMO-NOZACAS-NEXT: slli a1, a1, 16
+; RV32IA-WMO-NOZACAS-NEXT: li a4, 16
+; RV32IA-WMO-NOZACAS-NEXT: andi a5, a0, 24
+; RV32IA-WMO-NOZACAS-NEXT: addi a3, a3, -1
+; RV32IA-WMO-NOZACAS-NEXT: srai a1, a1, 16
+; RV32IA-WMO-NOZACAS-NEXT: sll a3, a3, a0
+; RV32IA-WMO-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-WMO-NOZACAS-NEXT: sub a4, a4, a5
+; RV32IA-WMO-NOZACAS-NEXT: .LBB116_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-NOZACAS-NEXT: lr.w.aq a5, (a2)
+; RV32IA-WMO-NOZACAS-NEXT: and a7, a5, a3
+; RV32IA-WMO-NOZACAS-NEXT: mv a6, a5
+; RV32IA-WMO-NOZACAS-NEXT: sll a7, a7, a4
+; RV32IA-WMO-NOZACAS-NEXT: sra a7, a7, a4
+; RV32IA-WMO-NOZACAS-NEXT: bge a1, a7, .LBB116_3
+; RV32IA-WMO-NOZACAS-NEXT: # %bb.2: # in Loop: Header=BB116_1 Depth=1
+; RV32IA-WMO-NOZACAS-NEXT: xor a6, a5, a1
+; RV32IA-WMO-NOZACAS-NEXT: and a6, a6, a3
+; RV32IA-WMO-NOZACAS-NEXT: xor a6, a5, a6
+; RV32IA-WMO-NOZACAS-NEXT: .LBB116_3: # in Loop: Header=BB116_1 Depth=1
+; RV32IA-WMO-NOZACAS-NEXT: sc.w a6, a6, (a2)
+; RV32IA-WMO-NOZACAS-NEXT: bnez a6, .LBB116_1
+; RV32IA-WMO-NOZACAS-NEXT: # %bb.4:
+; RV32IA-WMO-NOZACAS-NEXT: srl a0, a5, a0
+; RV32IA-WMO-NOZACAS-NEXT: ret
;
-; RV32IA-TSO-LABEL: atomicrmw_min_i16_acquire:
-; RV32IA-TSO: # %bb.0:
-; RV32IA-TSO-NEXT: andi a2, a0, -4
-; RV32IA-TSO-NEXT: slli a0, a0, 3
-; RV32IA-TSO-NEXT: lui a3, 16
-; RV32IA-TSO-NEXT: slli a1, a1, 16
-; RV32IA-TSO-NEXT: li a4, 16
-; RV32IA-TSO-NEXT: andi a5, a0, 24
-; RV32IA-TSO-NEXT: addi a3, a3, -1
-; RV32IA-TSO-NEXT: srai a1, a1, 16
-; RV32IA-TSO-NEXT: sll a3, a3, a0
-; RV32IA-TSO-NEXT: sll a1, a1, a0
-; RV32IA-TSO-NEXT: sub a4, a4, a5
-; RV32IA-TSO-NEXT: .LBB116_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-TSO-NEXT: lr.w a5, (a2)
-; RV32IA-TSO-NEXT: and a7, a5, a3
-; RV32IA-TSO-NEXT: mv a6, a5
-; RV32IA-TSO-NEXT: sll a7, a7, a4
-; RV32IA-TSO-NEXT: sra a7, a7, a4
-; RV32IA-TSO-NEXT: bge a1, a7, .LBB116_3
-; RV32IA-TSO-NEXT: # %bb.2: # in Loop: Header=BB116_1 Depth=1
-; RV32IA-TSO-NEXT: xor a6, a5, a1
-; RV32IA-TSO-NEXT: and a6, a6, a3
-; RV32IA-TSO-NEXT: xor a6, a5, a6
-; RV32IA-TSO-NEXT: .LBB116_3: # in Loop: Header=BB116_1 Depth=1
-; RV32IA-TSO-NEXT: sc.w a6, a6, (a2)
-; RV32IA-TSO-NEXT: bnez a6, .LBB116_1
-; RV32IA-TSO-NEXT: # %bb.4:
-; RV32IA-TSO-NEXT: srl a0, a5, a0
-; RV32IA-TSO-NEXT: ret
+; RV32IA-TSO-NOZACAS-LABEL: atomicrmw_min_i16_acquire:
+; RV32IA-TSO-NOZACAS: # %bb.0:
+; RV32IA-TSO-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-TSO-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-NOZACAS-NEXT: lui a3, 16
+; RV32IA-TSO-NOZACAS-NEXT: slli a1, a1, 16
+; RV32IA-TSO-NOZACAS-NEXT: li a4, 16
+; RV32IA-TSO-NOZACAS-NEXT: andi a5, a0, 24
+; RV32IA-TSO-NOZACAS-NEXT: addi a3, a3, -1
+; RV32IA-TSO-NOZACAS-NEXT: srai a1, a1, 16
+; RV32IA-TSO-NOZACAS-NEXT: sll a3, a3, a0
+; RV32IA-TSO-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-TSO-NOZACAS-NEXT: sub a4, a4, a5
+; RV32IA-TSO-NOZACAS-NEXT: .LBB116_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-NOZACAS-NEXT: lr.w a5, (a2)
+; RV32IA-TSO-NOZACAS-NEXT: and a7, a5, a3
+; RV32IA-TSO-NOZACAS-NEXT: mv a6, a5
+; RV32IA-TSO-NOZACAS-NEXT: sll a7, a7, a4
+; RV32IA-TSO-NOZACAS-NEXT: sra a7, a7, a4
+; RV32IA-TSO-NOZACAS-NEXT: bge a1, a7, .LBB116_3
+; RV32IA-TSO-NOZACAS-NEXT: # %bb.2: # in Loop: Header=BB116_1 Depth=1
+; RV32IA-TSO-NOZACAS-NEXT: xor a6, a5, a1
+; RV32IA-TSO-NOZACAS-NEXT: and a6, a6, a3
+; RV32IA-TSO-NOZACAS-NEXT: xor a6, a5, a6
+; RV32IA-TSO-NOZACAS-NEXT: .LBB116_3: # in Loop: Header=BB116_1 Depth=1
+; RV32IA-TSO-NOZACAS-NEXT: sc.w a6, a6, (a2)
+; RV32IA-TSO-NOZACAS-NEXT: bnez a6, .LBB116_1
+; RV32IA-TSO-NOZACAS-NEXT: # %bb.4:
+; RV32IA-TSO-NOZACAS-NEXT: srl a0, a5, a0
+; RV32IA-TSO-NOZACAS-NEXT: ret
;
; RV64I-LABEL: atomicrmw_min_i16_acquire:
; RV64I: # %bb.0:
@@ -17007,6 +22340,68 @@ define i16 @atomicrmw_min_i16_acquire(ptr %a, i16 %b) nounwind {
; RV64IA-TSO-NOZACAS-NEXT: srlw a0, a5, a0
; RV64IA-TSO-NOZACAS-NEXT: ret
;
+; RV32IA-WMO-ZACAS-LABEL: atomicrmw_min_i16_acquire:
+; RV32IA-WMO-ZACAS: # %bb.0:
+; RV32IA-WMO-ZACAS-NEXT: andi a2, a0, -4
+; RV32IA-WMO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-ZACAS-NEXT: lui a3, 16
+; RV32IA-WMO-ZACAS-NEXT: slli a1, a1, 16
+; RV32IA-WMO-ZACAS-NEXT: li a4, 16
+; RV32IA-WMO-ZACAS-NEXT: andi a5, a0, 24
+; RV32IA-WMO-ZACAS-NEXT: addi a3, a3, -1
+; RV32IA-WMO-ZACAS-NEXT: srai a1, a1, 16
+; RV32IA-WMO-ZACAS-NEXT: sll a3, a3, a0
+; RV32IA-WMO-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-WMO-ZACAS-NEXT: sub a4, a4, a5
+; RV32IA-WMO-ZACAS-NEXT: .LBB116_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-ZACAS-NEXT: lr.w.aq a5, (a2)
+; RV32IA-WMO-ZACAS-NEXT: and a7, a5, a3
+; RV32IA-WMO-ZACAS-NEXT: mv a6, a5
+; RV32IA-WMO-ZACAS-NEXT: sll a7, a7, a4
+; RV32IA-WMO-ZACAS-NEXT: sra a7, a7, a4
+; RV32IA-WMO-ZACAS-NEXT: bge a1, a7, .LBB116_3
+; RV32IA-WMO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB116_1 Depth=1
+; RV32IA-WMO-ZACAS-NEXT: xor a6, a5, a1
+; RV32IA-WMO-ZACAS-NEXT: and a6, a6, a3
+; RV32IA-WMO-ZACAS-NEXT: xor a6, a5, a6
+; RV32IA-WMO-ZACAS-NEXT: .LBB116_3: # in Loop: Header=BB116_1 Depth=1
+; RV32IA-WMO-ZACAS-NEXT: sc.w a6, a6, (a2)
+; RV32IA-WMO-ZACAS-NEXT: bnez a6, .LBB116_1
+; RV32IA-WMO-ZACAS-NEXT: # %bb.4:
+; RV32IA-WMO-ZACAS-NEXT: srl a0, a5, a0
+; RV32IA-WMO-ZACAS-NEXT: ret
+;
+; RV32IA-TSO-ZACAS-LABEL: atomicrmw_min_i16_acquire:
+; RV32IA-TSO-ZACAS: # %bb.0:
+; RV32IA-TSO-ZACAS-NEXT: andi a2, a0, -4
+; RV32IA-TSO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-ZACAS-NEXT: lui a3, 16
+; RV32IA-TSO-ZACAS-NEXT: slli a1, a1, 16
+; RV32IA-TSO-ZACAS-NEXT: li a4, 16
+; RV32IA-TSO-ZACAS-NEXT: andi a5, a0, 24
+; RV32IA-TSO-ZACAS-NEXT: addi a3, a3, -1
+; RV32IA-TSO-ZACAS-NEXT: srai a1, a1, 16
+; RV32IA-TSO-ZACAS-NEXT: sll a3, a3, a0
+; RV32IA-TSO-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-TSO-ZACAS-NEXT: sub a4, a4, a5
+; RV32IA-TSO-ZACAS-NEXT: .LBB116_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-ZACAS-NEXT: lr.w a5, (a2)
+; RV32IA-TSO-ZACAS-NEXT: and a7, a5, a3
+; RV32IA-TSO-ZACAS-NEXT: mv a6, a5
+; RV32IA-TSO-ZACAS-NEXT: sll a7, a7, a4
+; RV32IA-TSO-ZACAS-NEXT: sra a7, a7, a4
+; RV32IA-TSO-ZACAS-NEXT: bge a1, a7, .LBB116_3
+; RV32IA-TSO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB116_1 Depth=1
+; RV32IA-TSO-ZACAS-NEXT: xor a6, a5, a1
+; RV32IA-TSO-ZACAS-NEXT: and a6, a6, a3
+; RV32IA-TSO-ZACAS-NEXT: xor a6, a5, a6
+; RV32IA-TSO-ZACAS-NEXT: .LBB116_3: # in Loop: Header=BB116_1 Depth=1
+; RV32IA-TSO-ZACAS-NEXT: sc.w a6, a6, (a2)
+; RV32IA-TSO-ZACAS-NEXT: bnez a6, .LBB116_1
+; RV32IA-TSO-ZACAS-NEXT: # %bb.4:
+; RV32IA-TSO-ZACAS-NEXT: srl a0, a5, a0
+; RV32IA-TSO-ZACAS-NEXT: ret
+;
; RV64IA-WMO-ZACAS-LABEL: atomicrmw_min_i16_acquire:
; RV64IA-WMO-ZACAS: # %bb.0:
; RV64IA-WMO-ZACAS-NEXT: andi a2, a0, -4
@@ -17069,6 +22464,16 @@ define i16 @atomicrmw_min_i16_acquire(ptr %a, i16 %b) nounwind {
; RV64IA-TSO-ZACAS-NEXT: srlw a0, a5, a0
; RV64IA-TSO-ZACAS-NEXT: ret
;
+; RV32IA-WMO-ZABHA-LABEL: atomicrmw_min_i16_acquire:
+; RV32IA-WMO-ZABHA: # %bb.0:
+; RV32IA-WMO-ZABHA-NEXT: amomin.h.aq a0, a1, (a0)
+; RV32IA-WMO-ZABHA-NEXT: ret
+;
+; RV32IA-TSO-ZABHA-LABEL: atomicrmw_min_i16_acquire:
+; RV32IA-TSO-ZABHA: # %bb.0:
+; RV32IA-TSO-ZABHA-NEXT: amomin.h a0, a1, (a0)
+; RV32IA-TSO-ZABHA-NEXT: ret
+;
; RV64IA-WMO-ZABHA-LABEL: atomicrmw_min_i16_acquire:
; RV64IA-WMO-ZABHA: # %bb.0:
; RV64IA-WMO-ZABHA-NEXT: amomin.h.aq a0, a1, (a0)
@@ -17125,67 +22530,67 @@ define i16 @atomicrmw_min_i16_release(ptr %a, i16 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 32
; RV32I-NEXT: ret
;
-; RV32IA-WMO-LABEL: atomicrmw_min_i16_release:
-; RV32IA-WMO: # %bb.0:
-; RV32IA-WMO-NEXT: andi a2, a0, -4
-; RV32IA-WMO-NEXT: slli a0, a0, 3
-; RV32IA-WMO-NEXT: lui a3, 16
-; RV32IA-WMO-NEXT: slli a1, a1, 16
-; RV32IA-WMO-NEXT: li a4, 16
-; RV32IA-WMO-NEXT: andi a5, a0, 24
-; RV32IA-WMO-NEXT: addi a3, a3, -1
-; RV32IA-WMO-NEXT: srai a1, a1, 16
-; RV32IA-WMO-NEXT: sll a3, a3, a0
-; RV32IA-WMO-NEXT: sll a1, a1, a0
-; RV32IA-WMO-NEXT: sub a4, a4, a5
-; RV32IA-WMO-NEXT: .LBB117_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-WMO-NEXT: lr.w a5, (a2)
-; RV32IA-WMO-NEXT: and a7, a5, a3
-; RV32IA-WMO-NEXT: mv a6, a5
-; RV32IA-WMO-NEXT: sll a7, a7, a4
-; RV32IA-WMO-NEXT: sra a7, a7, a4
-; RV32IA-WMO-NEXT: bge a1, a7, .LBB117_3
-; RV32IA-WMO-NEXT: # %bb.2: # in Loop: Header=BB117_1 Depth=1
-; RV32IA-WMO-NEXT: xor a6, a5, a1
-; RV32IA-WMO-NEXT: and a6, a6, a3
-; RV32IA-WMO-NEXT: xor a6, a5, a6
-; RV32IA-WMO-NEXT: .LBB117_3: # in Loop: Header=BB117_1 Depth=1
-; RV32IA-WMO-NEXT: sc.w.rl a6, a6, (a2)
-; RV32IA-WMO-NEXT: bnez a6, .LBB117_1
-; RV32IA-WMO-NEXT: # %bb.4:
-; RV32IA-WMO-NEXT: srl a0, a5, a0
-; RV32IA-WMO-NEXT: ret
+; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_min_i16_release:
+; RV32IA-WMO-NOZACAS: # %bb.0:
+; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-WMO-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-NOZACAS-NEXT: lui a3, 16
+; RV32IA-WMO-NOZACAS-NEXT: slli a1, a1, 16
+; RV32IA-WMO-NOZACAS-NEXT: li a4, 16
+; RV32IA-WMO-NOZACAS-NEXT: andi a5, a0, 24
+; RV32IA-WMO-NOZACAS-NEXT: addi a3, a3, -1
+; RV32IA-WMO-NOZACAS-NEXT: srai a1, a1, 16
+; RV32IA-WMO-NOZACAS-NEXT: sll a3, a3, a0
+; RV32IA-WMO-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-WMO-NOZACAS-NEXT: sub a4, a4, a5
+; RV32IA-WMO-NOZACAS-NEXT: .LBB117_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-NOZACAS-NEXT: lr.w a5, (a2)
+; RV32IA-WMO-NOZACAS-NEXT: and a7, a5, a3
+; RV32IA-WMO-NOZACAS-NEXT: mv a6, a5
+; RV32IA-WMO-NOZACAS-NEXT: sll a7, a7, a4
+; RV32IA-WMO-NOZACAS-NEXT: sra a7, a7, a4
+; RV32IA-WMO-NOZACAS-NEXT: bge a1, a7, .LBB117_3
+; RV32IA-WMO-NOZACAS-NEXT: # %bb.2: # in Loop: Header=BB117_1 Depth=1
+; RV32IA-WMO-NOZACAS-NEXT: xor a6, a5, a1
+; RV32IA-WMO-NOZACAS-NEXT: and a6, a6, a3
+; RV32IA-WMO-NOZACAS-NEXT: xor a6, a5, a6
+; RV32IA-WMO-NOZACAS-NEXT: .LBB117_3: # in Loop: Header=BB117_1 Depth=1
+; RV32IA-WMO-NOZACAS-NEXT: sc.w.rl a6, a6, (a2)
+; RV32IA-WMO-NOZACAS-NEXT: bnez a6, .LBB117_1
+; RV32IA-WMO-NOZACAS-NEXT: # %bb.4:
+; RV32IA-WMO-NOZACAS-NEXT: srl a0, a5, a0
+; RV32IA-WMO-NOZACAS-NEXT: ret
;
-; RV32IA-TSO-LABEL: atomicrmw_min_i16_release:
-; RV32IA-TSO: # %bb.0:
-; RV32IA-TSO-NEXT: andi a2, a0, -4
-; RV32IA-TSO-NEXT: slli a0, a0, 3
-; RV32IA-TSO-NEXT: lui a3, 16
-; RV32IA-TSO-NEXT: slli a1, a1, 16
-; RV32IA-TSO-NEXT: li a4, 16
-; RV32IA-TSO-NEXT: andi a5, a0, 24
-; RV32IA-TSO-NEXT: addi a3, a3, -1
-; RV32IA-TSO-NEXT: srai a1, a1, 16
-; RV32IA-TSO-NEXT: sll a3, a3, a0
-; RV32IA-TSO-NEXT: sll a1, a1, a0
-; RV32IA-TSO-NEXT: sub a4, a4, a5
-; RV32IA-TSO-NEXT: .LBB117_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-TSO-NEXT: lr.w a5, (a2)
-; RV32IA-TSO-NEXT: and a7, a5, a3
-; RV32IA-TSO-NEXT: mv a6, a5
-; RV32IA-TSO-NEXT: sll a7, a7, a4
-; RV32IA-TSO-NEXT: sra a7, a7, a4
-; RV32IA-TSO-NEXT: bge a1, a7, .LBB117_3
-; RV32IA-TSO-NEXT: # %bb.2: # in Loop: Header=BB117_1 Depth=1
-; RV32IA-TSO-NEXT: xor a6, a5, a1
-; RV32IA-TSO-NEXT: and a6, a6, a3
-; RV32IA-TSO-NEXT: xor a6, a5, a6
-; RV32IA-TSO-NEXT: .LBB117_3: # in Loop: Header=BB117_1 Depth=1
-; RV32IA-TSO-NEXT: sc.w a6, a6, (a2)
-; RV32IA-TSO-NEXT: bnez a6, .LBB117_1
-; RV32IA-TSO-NEXT: # %bb.4:
-; RV32IA-TSO-NEXT: srl a0, a5, a0
-; RV32IA-TSO-NEXT: ret
+; RV32IA-TSO-NOZACAS-LABEL: atomicrmw_min_i16_release:
+; RV32IA-TSO-NOZACAS: # %bb.0:
+; RV32IA-TSO-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-TSO-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-NOZACAS-NEXT: lui a3, 16
+; RV32IA-TSO-NOZACAS-NEXT: slli a1, a1, 16
+; RV32IA-TSO-NOZACAS-NEXT: li a4, 16
+; RV32IA-TSO-NOZACAS-NEXT: andi a5, a0, 24
+; RV32IA-TSO-NOZACAS-NEXT: addi a3, a3, -1
+; RV32IA-TSO-NOZACAS-NEXT: srai a1, a1, 16
+; RV32IA-TSO-NOZACAS-NEXT: sll a3, a3, a0
+; RV32IA-TSO-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-TSO-NOZACAS-NEXT: sub a4, a4, a5
+; RV32IA-TSO-NOZACAS-NEXT: .LBB117_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-NOZACAS-NEXT: lr.w a5, (a2)
+; RV32IA-TSO-NOZACAS-NEXT: and a7, a5, a3
+; RV32IA-TSO-NOZACAS-NEXT: mv a6, a5
+; RV32IA-TSO-NOZACAS-NEXT: sll a7, a7, a4
+; RV32IA-TSO-NOZACAS-NEXT: sra a7, a7, a4
+; RV32IA-TSO-NOZACAS-NEXT: bge a1, a7, .LBB117_3
+; RV32IA-TSO-NOZACAS-NEXT: # %bb.2: # in Loop: Header=BB117_1 Depth=1
+; RV32IA-TSO-NOZACAS-NEXT: xor a6, a5, a1
+; RV32IA-TSO-NOZACAS-NEXT: and a6, a6, a3
+; RV32IA-TSO-NOZACAS-NEXT: xor a6, a5, a6
+; RV32IA-TSO-NOZACAS-NEXT: .LBB117_3: # in Loop: Header=BB117_1 Depth=1
+; RV32IA-TSO-NOZACAS-NEXT: sc.w a6, a6, (a2)
+; RV32IA-TSO-NOZACAS-NEXT: bnez a6, .LBB117_1
+; RV32IA-TSO-NOZACAS-NEXT: # %bb.4:
+; RV32IA-TSO-NOZACAS-NEXT: srl a0, a5, a0
+; RV32IA-TSO-NOZACAS-NEXT: ret
;
; RV64I-LABEL: atomicrmw_min_i16_release:
; RV64I: # %bb.0:
@@ -17291,6 +22696,68 @@ define i16 @atomicrmw_min_i16_release(ptr %a, i16 %b) nounwind {
; RV64IA-TSO-NOZACAS-NEXT: srlw a0, a5, a0
; RV64IA-TSO-NOZACAS-NEXT: ret
;
+; RV32IA-WMO-ZACAS-LABEL: atomicrmw_min_i16_release:
+; RV32IA-WMO-ZACAS: # %bb.0:
+; RV32IA-WMO-ZACAS-NEXT: andi a2, a0, -4
+; RV32IA-WMO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-ZACAS-NEXT: lui a3, 16
+; RV32IA-WMO-ZACAS-NEXT: slli a1, a1, 16
+; RV32IA-WMO-ZACAS-NEXT: li a4, 16
+; RV32IA-WMO-ZACAS-NEXT: andi a5, a0, 24
+; RV32IA-WMO-ZACAS-NEXT: addi a3, a3, -1
+; RV32IA-WMO-ZACAS-NEXT: srai a1, a1, 16
+; RV32IA-WMO-ZACAS-NEXT: sll a3, a3, a0
+; RV32IA-WMO-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-WMO-ZACAS-NEXT: sub a4, a4, a5
+; RV32IA-WMO-ZACAS-NEXT: .LBB117_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-ZACAS-NEXT: lr.w a5, (a2)
+; RV32IA-WMO-ZACAS-NEXT: and a7, a5, a3
+; RV32IA-WMO-ZACAS-NEXT: mv a6, a5
+; RV32IA-WMO-ZACAS-NEXT: sll a7, a7, a4
+; RV32IA-WMO-ZACAS-NEXT: sra a7, a7, a4
+; RV32IA-WMO-ZACAS-NEXT: bge a1, a7, .LBB117_3
+; RV32IA-WMO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB117_1 Depth=1
+; RV32IA-WMO-ZACAS-NEXT: xor a6, a5, a1
+; RV32IA-WMO-ZACAS-NEXT: and a6, a6, a3
+; RV32IA-WMO-ZACAS-NEXT: xor a6, a5, a6
+; RV32IA-WMO-ZACAS-NEXT: .LBB117_3: # in Loop: Header=BB117_1 Depth=1
+; RV32IA-WMO-ZACAS-NEXT: sc.w.rl a6, a6, (a2)
+; RV32IA-WMO-ZACAS-NEXT: bnez a6, .LBB117_1
+; RV32IA-WMO-ZACAS-NEXT: # %bb.4:
+; RV32IA-WMO-ZACAS-NEXT: srl a0, a5, a0
+; RV32IA-WMO-ZACAS-NEXT: ret
+;
+; RV32IA-TSO-ZACAS-LABEL: atomicrmw_min_i16_release:
+; RV32IA-TSO-ZACAS: # %bb.0:
+; RV32IA-TSO-ZACAS-NEXT: andi a2, a0, -4
+; RV32IA-TSO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-ZACAS-NEXT: lui a3, 16
+; RV32IA-TSO-ZACAS-NEXT: slli a1, a1, 16
+; RV32IA-TSO-ZACAS-NEXT: li a4, 16
+; RV32IA-TSO-ZACAS-NEXT: andi a5, a0, 24
+; RV32IA-TSO-ZACAS-NEXT: addi a3, a3, -1
+; RV32IA-TSO-ZACAS-NEXT: srai a1, a1, 16
+; RV32IA-TSO-ZACAS-NEXT: sll a3, a3, a0
+; RV32IA-TSO-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-TSO-ZACAS-NEXT: sub a4, a4, a5
+; RV32IA-TSO-ZACAS-NEXT: .LBB117_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-ZACAS-NEXT: lr.w a5, (a2)
+; RV32IA-TSO-ZACAS-NEXT: and a7, a5, a3
+; RV32IA-TSO-ZACAS-NEXT: mv a6, a5
+; RV32IA-TSO-ZACAS-NEXT: sll a7, a7, a4
+; RV32IA-TSO-ZACAS-NEXT: sra a7, a7, a4
+; RV32IA-TSO-ZACAS-NEXT: bge a1, a7, .LBB117_3
+; RV32IA-TSO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB117_1 Depth=1
+; RV32IA-TSO-ZACAS-NEXT: xor a6, a5, a1
+; RV32IA-TSO-ZACAS-NEXT: and a6, a6, a3
+; RV32IA-TSO-ZACAS-NEXT: xor a6, a5, a6
+; RV32IA-TSO-ZACAS-NEXT: .LBB117_3: # in Loop: Header=BB117_1 Depth=1
+; RV32IA-TSO-ZACAS-NEXT: sc.w a6, a6, (a2)
+; RV32IA-TSO-ZACAS-NEXT: bnez a6, .LBB117_1
+; RV32IA-TSO-ZACAS-NEXT: # %bb.4:
+; RV32IA-TSO-ZACAS-NEXT: srl a0, a5, a0
+; RV32IA-TSO-ZACAS-NEXT: ret
+;
; RV64IA-WMO-ZACAS-LABEL: atomicrmw_min_i16_release:
; RV64IA-WMO-ZACAS: # %bb.0:
; RV64IA-WMO-ZACAS-NEXT: andi a2, a0, -4
@@ -17353,6 +22820,16 @@ define i16 @atomicrmw_min_i16_release(ptr %a, i16 %b) nounwind {
; RV64IA-TSO-ZACAS-NEXT: srlw a0, a5, a0
; RV64IA-TSO-ZACAS-NEXT: ret
;
+; RV32IA-WMO-ZABHA-LABEL: atomicrmw_min_i16_release:
+; RV32IA-WMO-ZABHA: # %bb.0:
+; RV32IA-WMO-ZABHA-NEXT: amomin.h.rl a0, a1, (a0)
+; RV32IA-WMO-ZABHA-NEXT: ret
+;
+; RV32IA-TSO-ZABHA-LABEL: atomicrmw_min_i16_release:
+; RV32IA-TSO-ZABHA: # %bb.0:
+; RV32IA-TSO-ZABHA-NEXT: amomin.h a0, a1, (a0)
+; RV32IA-TSO-ZABHA-NEXT: ret
+;
; RV64IA-WMO-ZABHA-LABEL: atomicrmw_min_i16_release:
; RV64IA-WMO-ZABHA: # %bb.0:
; RV64IA-WMO-ZABHA-NEXT: amomin.h.rl a0, a1, (a0)
@@ -17409,67 +22886,67 @@ define i16 @atomicrmw_min_i16_acq_rel(ptr %a, i16 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 32
; RV32I-NEXT: ret
;
-; RV32IA-WMO-LABEL: atomicrmw_min_i16_acq_rel:
-; RV32IA-WMO: # %bb.0:
-; RV32IA-WMO-NEXT: andi a2, a0, -4
-; RV32IA-WMO-NEXT: slli a0, a0, 3
-; RV32IA-WMO-NEXT: lui a3, 16
-; RV32IA-WMO-NEXT: slli a1, a1, 16
-; RV32IA-WMO-NEXT: li a4, 16
-; RV32IA-WMO-NEXT: andi a5, a0, 24
-; RV32IA-WMO-NEXT: addi a3, a3, -1
-; RV32IA-WMO-NEXT: srai a1, a1, 16
-; RV32IA-WMO-NEXT: sll a3, a3, a0
-; RV32IA-WMO-NEXT: sll a1, a1, a0
-; RV32IA-WMO-NEXT: sub a4, a4, a5
-; RV32IA-WMO-NEXT: .LBB118_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-WMO-NEXT: lr.w.aq a5, (a2)
-; RV32IA-WMO-NEXT: and a7, a5, a3
-; RV32IA-WMO-NEXT: mv a6, a5
-; RV32IA-WMO-NEXT: sll a7, a7, a4
-; RV32IA-WMO-NEXT: sra a7, a7, a4
-; RV32IA-WMO-NEXT: bge a1, a7, .LBB118_3
-; RV32IA-WMO-NEXT: # %bb.2: # in Loop: Header=BB118_1 Depth=1
-; RV32IA-WMO-NEXT: xor a6, a5, a1
-; RV32IA-WMO-NEXT: and a6, a6, a3
-; RV32IA-WMO-NEXT: xor a6, a5, a6
-; RV32IA-WMO-NEXT: .LBB118_3: # in Loop: Header=BB118_1 Depth=1
-; RV32IA-WMO-NEXT: sc.w.rl a6, a6, (a2)
-; RV32IA-WMO-NEXT: bnez a6, .LBB118_1
-; RV32IA-WMO-NEXT: # %bb.4:
-; RV32IA-WMO-NEXT: srl a0, a5, a0
-; RV32IA-WMO-NEXT: ret
+; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_min_i16_acq_rel:
+; RV32IA-WMO-NOZACAS: # %bb.0:
+; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-WMO-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-NOZACAS-NEXT: lui a3, 16
+; RV32IA-WMO-NOZACAS-NEXT: slli a1, a1, 16
+; RV32IA-WMO-NOZACAS-NEXT: li a4, 16
+; RV32IA-WMO-NOZACAS-NEXT: andi a5, a0, 24
+; RV32IA-WMO-NOZACAS-NEXT: addi a3, a3, -1
+; RV32IA-WMO-NOZACAS-NEXT: srai a1, a1, 16
+; RV32IA-WMO-NOZACAS-NEXT: sll a3, a3, a0
+; RV32IA-WMO-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-WMO-NOZACAS-NEXT: sub a4, a4, a5
+; RV32IA-WMO-NOZACAS-NEXT: .LBB118_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-NOZACAS-NEXT: lr.w.aq a5, (a2)
+; RV32IA-WMO-NOZACAS-NEXT: and a7, a5, a3
+; RV32IA-WMO-NOZACAS-NEXT: mv a6, a5
+; RV32IA-WMO-NOZACAS-NEXT: sll a7, a7, a4
+; RV32IA-WMO-NOZACAS-NEXT: sra a7, a7, a4
+; RV32IA-WMO-NOZACAS-NEXT: bge a1, a7, .LBB118_3
+; RV32IA-WMO-NOZACAS-NEXT: # %bb.2: # in Loop: Header=BB118_1 Depth=1
+; RV32IA-WMO-NOZACAS-NEXT: xor a6, a5, a1
+; RV32IA-WMO-NOZACAS-NEXT: and a6, a6, a3
+; RV32IA-WMO-NOZACAS-NEXT: xor a6, a5, a6
+; RV32IA-WMO-NOZACAS-NEXT: .LBB118_3: # in Loop: Header=BB118_1 Depth=1
+; RV32IA-WMO-NOZACAS-NEXT: sc.w.rl a6, a6, (a2)
+; RV32IA-WMO-NOZACAS-NEXT: bnez a6, .LBB118_1
+; RV32IA-WMO-NOZACAS-NEXT: # %bb.4:
+; RV32IA-WMO-NOZACAS-NEXT: srl a0, a5, a0
+; RV32IA-WMO-NOZACAS-NEXT: ret
;
-; RV32IA-TSO-LABEL: atomicrmw_min_i16_acq_rel:
-; RV32IA-TSO: # %bb.0:
-; RV32IA-TSO-NEXT: andi a2, a0, -4
-; RV32IA-TSO-NEXT: slli a0, a0, 3
-; RV32IA-TSO-NEXT: lui a3, 16
-; RV32IA-TSO-NEXT: slli a1, a1, 16
-; RV32IA-TSO-NEXT: li a4, 16
-; RV32IA-TSO-NEXT: andi a5, a0, 24
-; RV32IA-TSO-NEXT: addi a3, a3, -1
-; RV32IA-TSO-NEXT: srai a1, a1, 16
-; RV32IA-TSO-NEXT: sll a3, a3, a0
-; RV32IA-TSO-NEXT: sll a1, a1, a0
-; RV32IA-TSO-NEXT: sub a4, a4, a5
-; RV32IA-TSO-NEXT: .LBB118_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-TSO-NEXT: lr.w a5, (a2)
-; RV32IA-TSO-NEXT: and a7, a5, a3
-; RV32IA-TSO-NEXT: mv a6, a5
-; RV32IA-TSO-NEXT: sll a7, a7, a4
-; RV32IA-TSO-NEXT: sra a7, a7, a4
-; RV32IA-TSO-NEXT: bge a1, a7, .LBB118_3
-; RV32IA-TSO-NEXT: # %bb.2: # in Loop: Header=BB118_1 Depth=1
-; RV32IA-TSO-NEXT: xor a6, a5, a1
-; RV32IA-TSO-NEXT: and a6, a6, a3
-; RV32IA-TSO-NEXT: xor a6, a5, a6
-; RV32IA-TSO-NEXT: .LBB118_3: # in Loop: Header=BB118_1 Depth=1
-; RV32IA-TSO-NEXT: sc.w a6, a6, (a2)
-; RV32IA-TSO-NEXT: bnez a6, .LBB118_1
-; RV32IA-TSO-NEXT: # %bb.4:
-; RV32IA-TSO-NEXT: srl a0, a5, a0
-; RV32IA-TSO-NEXT: ret
+; RV32IA-TSO-NOZACAS-LABEL: atomicrmw_min_i16_acq_rel:
+; RV32IA-TSO-NOZACAS: # %bb.0:
+; RV32IA-TSO-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-TSO-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-NOZACAS-NEXT: lui a3, 16
+; RV32IA-TSO-NOZACAS-NEXT: slli a1, a1, 16
+; RV32IA-TSO-NOZACAS-NEXT: li a4, 16
+; RV32IA-TSO-NOZACAS-NEXT: andi a5, a0, 24
+; RV32IA-TSO-NOZACAS-NEXT: addi a3, a3, -1
+; RV32IA-TSO-NOZACAS-NEXT: srai a1, a1, 16
+; RV32IA-TSO-NOZACAS-NEXT: sll a3, a3, a0
+; RV32IA-TSO-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-TSO-NOZACAS-NEXT: sub a4, a4, a5
+; RV32IA-TSO-NOZACAS-NEXT: .LBB118_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-NOZACAS-NEXT: lr.w a5, (a2)
+; RV32IA-TSO-NOZACAS-NEXT: and a7, a5, a3
+; RV32IA-TSO-NOZACAS-NEXT: mv a6, a5
+; RV32IA-TSO-NOZACAS-NEXT: sll a7, a7, a4
+; RV32IA-TSO-NOZACAS-NEXT: sra a7, a7, a4
+; RV32IA-TSO-NOZACAS-NEXT: bge a1, a7, .LBB118_3
+; RV32IA-TSO-NOZACAS-NEXT: # %bb.2: # in Loop: Header=BB118_1 Depth=1
+; RV32IA-TSO-NOZACAS-NEXT: xor a6, a5, a1
+; RV32IA-TSO-NOZACAS-NEXT: and a6, a6, a3
+; RV32IA-TSO-NOZACAS-NEXT: xor a6, a5, a6
+; RV32IA-TSO-NOZACAS-NEXT: .LBB118_3: # in Loop: Header=BB118_1 Depth=1
+; RV32IA-TSO-NOZACAS-NEXT: sc.w a6, a6, (a2)
+; RV32IA-TSO-NOZACAS-NEXT: bnez a6, .LBB118_1
+; RV32IA-TSO-NOZACAS-NEXT: # %bb.4:
+; RV32IA-TSO-NOZACAS-NEXT: srl a0, a5, a0
+; RV32IA-TSO-NOZACAS-NEXT: ret
;
; RV64I-LABEL: atomicrmw_min_i16_acq_rel:
; RV64I: # %bb.0:
@@ -17575,6 +23052,68 @@ define i16 @atomicrmw_min_i16_acq_rel(ptr %a, i16 %b) nounwind {
; RV64IA-TSO-NOZACAS-NEXT: srlw a0, a5, a0
; RV64IA-TSO-NOZACAS-NEXT: ret
;
+; RV32IA-WMO-ZACAS-LABEL: atomicrmw_min_i16_acq_rel:
+; RV32IA-WMO-ZACAS: # %bb.0:
+; RV32IA-WMO-ZACAS-NEXT: andi a2, a0, -4
+; RV32IA-WMO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-ZACAS-NEXT: lui a3, 16
+; RV32IA-WMO-ZACAS-NEXT: slli a1, a1, 16
+; RV32IA-WMO-ZACAS-NEXT: li a4, 16
+; RV32IA-WMO-ZACAS-NEXT: andi a5, a0, 24
+; RV32IA-WMO-ZACAS-NEXT: addi a3, a3, -1
+; RV32IA-WMO-ZACAS-NEXT: srai a1, a1, 16
+; RV32IA-WMO-ZACAS-NEXT: sll a3, a3, a0
+; RV32IA-WMO-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-WMO-ZACAS-NEXT: sub a4, a4, a5
+; RV32IA-WMO-ZACAS-NEXT: .LBB118_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-ZACAS-NEXT: lr.w.aq a5, (a2)
+; RV32IA-WMO-ZACAS-NEXT: and a7, a5, a3
+; RV32IA-WMO-ZACAS-NEXT: mv a6, a5
+; RV32IA-WMO-ZACAS-NEXT: sll a7, a7, a4
+; RV32IA-WMO-ZACAS-NEXT: sra a7, a7, a4
+; RV32IA-WMO-ZACAS-NEXT: bge a1, a7, .LBB118_3
+; RV32IA-WMO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB118_1 Depth=1
+; RV32IA-WMO-ZACAS-NEXT: xor a6, a5, a1
+; RV32IA-WMO-ZACAS-NEXT: and a6, a6, a3
+; RV32IA-WMO-ZACAS-NEXT: xor a6, a5, a6
+; RV32IA-WMO-ZACAS-NEXT: .LBB118_3: # in Loop: Header=BB118_1 Depth=1
+; RV32IA-WMO-ZACAS-NEXT: sc.w.rl a6, a6, (a2)
+; RV32IA-WMO-ZACAS-NEXT: bnez a6, .LBB118_1
+; RV32IA-WMO-ZACAS-NEXT: # %bb.4:
+; RV32IA-WMO-ZACAS-NEXT: srl a0, a5, a0
+; RV32IA-WMO-ZACAS-NEXT: ret
+;
+; RV32IA-TSO-ZACAS-LABEL: atomicrmw_min_i16_acq_rel:
+; RV32IA-TSO-ZACAS: # %bb.0:
+; RV32IA-TSO-ZACAS-NEXT: andi a2, a0, -4
+; RV32IA-TSO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-ZACAS-NEXT: lui a3, 16
+; RV32IA-TSO-ZACAS-NEXT: slli a1, a1, 16
+; RV32IA-TSO-ZACAS-NEXT: li a4, 16
+; RV32IA-TSO-ZACAS-NEXT: andi a5, a0, 24
+; RV32IA-TSO-ZACAS-NEXT: addi a3, a3, -1
+; RV32IA-TSO-ZACAS-NEXT: srai a1, a1, 16
+; RV32IA-TSO-ZACAS-NEXT: sll a3, a3, a0
+; RV32IA-TSO-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-TSO-ZACAS-NEXT: sub a4, a4, a5
+; RV32IA-TSO-ZACAS-NEXT: .LBB118_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-ZACAS-NEXT: lr.w a5, (a2)
+; RV32IA-TSO-ZACAS-NEXT: and a7, a5, a3
+; RV32IA-TSO-ZACAS-NEXT: mv a6, a5
+; RV32IA-TSO-ZACAS-NEXT: sll a7, a7, a4
+; RV32IA-TSO-ZACAS-NEXT: sra a7, a7, a4
+; RV32IA-TSO-ZACAS-NEXT: bge a1, a7, .LBB118_3
+; RV32IA-TSO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB118_1 Depth=1
+; RV32IA-TSO-ZACAS-NEXT: xor a6, a5, a1
+; RV32IA-TSO-ZACAS-NEXT: and a6, a6, a3
+; RV32IA-TSO-ZACAS-NEXT: xor a6, a5, a6
+; RV32IA-TSO-ZACAS-NEXT: .LBB118_3: # in Loop: Header=BB118_1 Depth=1
+; RV32IA-TSO-ZACAS-NEXT: sc.w a6, a6, (a2)
+; RV32IA-TSO-ZACAS-NEXT: bnez a6, .LBB118_1
+; RV32IA-TSO-ZACAS-NEXT: # %bb.4:
+; RV32IA-TSO-ZACAS-NEXT: srl a0, a5, a0
+; RV32IA-TSO-ZACAS-NEXT: ret
+;
; RV64IA-WMO-ZACAS-LABEL: atomicrmw_min_i16_acq_rel:
; RV64IA-WMO-ZACAS: # %bb.0:
; RV64IA-WMO-ZACAS-NEXT: andi a2, a0, -4
@@ -17637,6 +23176,16 @@ define i16 @atomicrmw_min_i16_acq_rel(ptr %a, i16 %b) nounwind {
; RV64IA-TSO-ZACAS-NEXT: srlw a0, a5, a0
; RV64IA-TSO-ZACAS-NEXT: ret
;
+; RV32IA-WMO-ZABHA-LABEL: atomicrmw_min_i16_acq_rel:
+; RV32IA-WMO-ZABHA: # %bb.0:
+; RV32IA-WMO-ZABHA-NEXT: amomin.h.aqrl a0, a1, (a0)
+; RV32IA-WMO-ZABHA-NEXT: ret
+;
+; RV32IA-TSO-ZABHA-LABEL: atomicrmw_min_i16_acq_rel:
+; RV32IA-TSO-ZABHA: # %bb.0:
+; RV32IA-TSO-ZABHA-NEXT: amomin.h a0, a1, (a0)
+; RV32IA-TSO-ZABHA-NEXT: ret
+;
; RV64IA-WMO-ZABHA-LABEL: atomicrmw_min_i16_acq_rel:
; RV64IA-WMO-ZABHA: # %bb.0:
; RV64IA-WMO-ZABHA-NEXT: amomin.h.aqrl a0, a1, (a0)
@@ -17693,36 +23242,36 @@ define i16 @atomicrmw_min_i16_seq_cst(ptr %a, i16 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 32
; RV32I-NEXT: ret
;
-; RV32IA-LABEL: atomicrmw_min_i16_seq_cst:
-; RV32IA: # %bb.0:
-; RV32IA-NEXT: andi a2, a0, -4
-; RV32IA-NEXT: slli a0, a0, 3
-; RV32IA-NEXT: lui a3, 16
-; RV32IA-NEXT: slli a1, a1, 16
-; RV32IA-NEXT: li a4, 16
-; RV32IA-NEXT: andi a5, a0, 24
-; RV32IA-NEXT: addi a3, a3, -1
-; RV32IA-NEXT: srai a1, a1, 16
-; RV32IA-NEXT: sll a3, a3, a0
-; RV32IA-NEXT: sll a1, a1, a0
-; RV32IA-NEXT: sub a4, a4, a5
-; RV32IA-NEXT: .LBB119_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-NEXT: lr.w.aqrl a5, (a2)
-; RV32IA-NEXT: and a7, a5, a3
-; RV32IA-NEXT: mv a6, a5
-; RV32IA-NEXT: sll a7, a7, a4
-; RV32IA-NEXT: sra a7, a7, a4
-; RV32IA-NEXT: bge a1, a7, .LBB119_3
-; RV32IA-NEXT: # %bb.2: # in Loop: Header=BB119_1 Depth=1
-; RV32IA-NEXT: xor a6, a5, a1
-; RV32IA-NEXT: and a6, a6, a3
-; RV32IA-NEXT: xor a6, a5, a6
-; RV32IA-NEXT: .LBB119_3: # in Loop: Header=BB119_1 Depth=1
-; RV32IA-NEXT: sc.w.rl a6, a6, (a2)
-; RV32IA-NEXT: bnez a6, .LBB119_1
-; RV32IA-NEXT: # %bb.4:
-; RV32IA-NEXT: srl a0, a5, a0
-; RV32IA-NEXT: ret
+; RV32IA-NOZACAS-LABEL: atomicrmw_min_i16_seq_cst:
+; RV32IA-NOZACAS: # %bb.0:
+; RV32IA-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-NOZACAS-NEXT: lui a3, 16
+; RV32IA-NOZACAS-NEXT: slli a1, a1, 16
+; RV32IA-NOZACAS-NEXT: li a4, 16
+; RV32IA-NOZACAS-NEXT: andi a5, a0, 24
+; RV32IA-NOZACAS-NEXT: addi a3, a3, -1
+; RV32IA-NOZACAS-NEXT: srai a1, a1, 16
+; RV32IA-NOZACAS-NEXT: sll a3, a3, a0
+; RV32IA-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-NOZACAS-NEXT: sub a4, a4, a5
+; RV32IA-NOZACAS-NEXT: .LBB119_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-NOZACAS-NEXT: lr.w.aqrl a5, (a2)
+; RV32IA-NOZACAS-NEXT: and a7, a5, a3
+; RV32IA-NOZACAS-NEXT: mv a6, a5
+; RV32IA-NOZACAS-NEXT: sll a7, a7, a4
+; RV32IA-NOZACAS-NEXT: sra a7, a7, a4
+; RV32IA-NOZACAS-NEXT: bge a1, a7, .LBB119_3
+; RV32IA-NOZACAS-NEXT: # %bb.2: # in Loop: Header=BB119_1 Depth=1
+; RV32IA-NOZACAS-NEXT: xor a6, a5, a1
+; RV32IA-NOZACAS-NEXT: and a6, a6, a3
+; RV32IA-NOZACAS-NEXT: xor a6, a5, a6
+; RV32IA-NOZACAS-NEXT: .LBB119_3: # in Loop: Header=BB119_1 Depth=1
+; RV32IA-NOZACAS-NEXT: sc.w.rl a6, a6, (a2)
+; RV32IA-NOZACAS-NEXT: bnez a6, .LBB119_1
+; RV32IA-NOZACAS-NEXT: # %bb.4:
+; RV32IA-NOZACAS-NEXT: srl a0, a5, a0
+; RV32IA-NOZACAS-NEXT: ret
;
; RV64I-LABEL: atomicrmw_min_i16_seq_cst:
; RV64I: # %bb.0:
@@ -17797,6 +23346,37 @@ define i16 @atomicrmw_min_i16_seq_cst(ptr %a, i16 %b) nounwind {
; RV64IA-NOZACAS-NEXT: srlw a0, a5, a0
; RV64IA-NOZACAS-NEXT: ret
;
+; RV32IA-ZACAS-LABEL: atomicrmw_min_i16_seq_cst:
+; RV32IA-ZACAS: # %bb.0:
+; RV32IA-ZACAS-NEXT: andi a2, a0, -4
+; RV32IA-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-ZACAS-NEXT: lui a3, 16
+; RV32IA-ZACAS-NEXT: slli a1, a1, 16
+; RV32IA-ZACAS-NEXT: li a4, 16
+; RV32IA-ZACAS-NEXT: andi a5, a0, 24
+; RV32IA-ZACAS-NEXT: addi a3, a3, -1
+; RV32IA-ZACAS-NEXT: srai a1, a1, 16
+; RV32IA-ZACAS-NEXT: sll a3, a3, a0
+; RV32IA-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-ZACAS-NEXT: sub a4, a4, a5
+; RV32IA-ZACAS-NEXT: .LBB119_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-ZACAS-NEXT: lr.w.aqrl a5, (a2)
+; RV32IA-ZACAS-NEXT: and a7, a5, a3
+; RV32IA-ZACAS-NEXT: mv a6, a5
+; RV32IA-ZACAS-NEXT: sll a7, a7, a4
+; RV32IA-ZACAS-NEXT: sra a7, a7, a4
+; RV32IA-ZACAS-NEXT: bge a1, a7, .LBB119_3
+; RV32IA-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB119_1 Depth=1
+; RV32IA-ZACAS-NEXT: xor a6, a5, a1
+; RV32IA-ZACAS-NEXT: and a6, a6, a3
+; RV32IA-ZACAS-NEXT: xor a6, a5, a6
+; RV32IA-ZACAS-NEXT: .LBB119_3: # in Loop: Header=BB119_1 Depth=1
+; RV32IA-ZACAS-NEXT: sc.w.rl a6, a6, (a2)
+; RV32IA-ZACAS-NEXT: bnez a6, .LBB119_1
+; RV32IA-ZACAS-NEXT: # %bb.4:
+; RV32IA-ZACAS-NEXT: srl a0, a5, a0
+; RV32IA-ZACAS-NEXT: ret
+;
; RV64IA-ZACAS-LABEL: atomicrmw_min_i16_seq_cst:
; RV64IA-ZACAS: # %bb.0:
; RV64IA-ZACAS-NEXT: andi a2, a0, -4
@@ -17828,6 +23408,16 @@ define i16 @atomicrmw_min_i16_seq_cst(ptr %a, i16 %b) nounwind {
; RV64IA-ZACAS-NEXT: srlw a0, a5, a0
; RV64IA-ZACAS-NEXT: ret
;
+; RV32IA-WMO-ZABHA-LABEL: atomicrmw_min_i16_seq_cst:
+; RV32IA-WMO-ZABHA: # %bb.0:
+; RV32IA-WMO-ZABHA-NEXT: amomin.h.aqrl a0, a1, (a0)
+; RV32IA-WMO-ZABHA-NEXT: ret
+;
+; RV32IA-TSO-ZABHA-LABEL: atomicrmw_min_i16_seq_cst:
+; RV32IA-TSO-ZABHA: # %bb.0:
+; RV32IA-TSO-ZABHA-NEXT: amomin.h a0, a1, (a0)
+; RV32IA-TSO-ZABHA-NEXT: ret
+;
; RV64IA-WMO-ZABHA-LABEL: atomicrmw_min_i16_seq_cst:
; RV64IA-WMO-ZABHA: # %bb.0:
; RV64IA-WMO-ZABHA-NEXT: amomin.h.aqrl a0, a1, (a0)
@@ -17886,30 +23476,30 @@ define i16 @atomicrmw_umax_i16_monotonic(ptr %a, i16 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 32
; RV32I-NEXT: ret
;
-; RV32IA-LABEL: atomicrmw_umax_i16_monotonic:
-; RV32IA: # %bb.0:
-; RV32IA-NEXT: andi a2, a0, -4
-; RV32IA-NEXT: slli a0, a0, 3
-; RV32IA-NEXT: lui a3, 16
-; RV32IA-NEXT: addi a3, a3, -1
-; RV32IA-NEXT: sll a4, a3, a0
-; RV32IA-NEXT: and a1, a1, a3
-; RV32IA-NEXT: sll a1, a1, a0
-; RV32IA-NEXT: .LBB120_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-NEXT: lr.w a3, (a2)
-; RV32IA-NEXT: and a6, a3, a4
-; RV32IA-NEXT: mv a5, a3
-; RV32IA-NEXT: bgeu a6, a1, .LBB120_3
-; RV32IA-NEXT: # %bb.2: # in Loop: Header=BB120_1 Depth=1
-; RV32IA-NEXT: xor a5, a3, a1
-; RV32IA-NEXT: and a5, a5, a4
-; RV32IA-NEXT: xor a5, a3, a5
-; RV32IA-NEXT: .LBB120_3: # in Loop: Header=BB120_1 Depth=1
-; RV32IA-NEXT: sc.w a5, a5, (a2)
-; RV32IA-NEXT: bnez a5, .LBB120_1
-; RV32IA-NEXT: # %bb.4:
-; RV32IA-NEXT: srl a0, a3, a0
-; RV32IA-NEXT: ret
+; RV32IA-NOZACAS-LABEL: atomicrmw_umax_i16_monotonic:
+; RV32IA-NOZACAS: # %bb.0:
+; RV32IA-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-NOZACAS-NEXT: lui a3, 16
+; RV32IA-NOZACAS-NEXT: addi a3, a3, -1
+; RV32IA-NOZACAS-NEXT: sll a4, a3, a0
+; RV32IA-NOZACAS-NEXT: and a1, a1, a3
+; RV32IA-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-NOZACAS-NEXT: .LBB120_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-NOZACAS-NEXT: lr.w a3, (a2)
+; RV32IA-NOZACAS-NEXT: and a6, a3, a4
+; RV32IA-NOZACAS-NEXT: mv a5, a3
+; RV32IA-NOZACAS-NEXT: bgeu a6, a1, .LBB120_3
+; RV32IA-NOZACAS-NEXT: # %bb.2: # in Loop: Header=BB120_1 Depth=1
+; RV32IA-NOZACAS-NEXT: xor a5, a3, a1
+; RV32IA-NOZACAS-NEXT: and a5, a5, a4
+; RV32IA-NOZACAS-NEXT: xor a5, a3, a5
+; RV32IA-NOZACAS-NEXT: .LBB120_3: # in Loop: Header=BB120_1 Depth=1
+; RV32IA-NOZACAS-NEXT: sc.w a5, a5, (a2)
+; RV32IA-NOZACAS-NEXT: bnez a5, .LBB120_1
+; RV32IA-NOZACAS-NEXT: # %bb.4:
+; RV32IA-NOZACAS-NEXT: srl a0, a3, a0
+; RV32IA-NOZACAS-NEXT: ret
;
; RV64I-LABEL: atomicrmw_umax_i16_monotonic:
; RV64I: # %bb.0:
@@ -17980,6 +23570,31 @@ define i16 @atomicrmw_umax_i16_monotonic(ptr %a, i16 %b) nounwind {
; RV64IA-NOZACAS-NEXT: srlw a0, a3, a0
; RV64IA-NOZACAS-NEXT: ret
;
+; RV32IA-ZACAS-LABEL: atomicrmw_umax_i16_monotonic:
+; RV32IA-ZACAS: # %bb.0:
+; RV32IA-ZACAS-NEXT: andi a2, a0, -4
+; RV32IA-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-ZACAS-NEXT: lui a3, 16
+; RV32IA-ZACAS-NEXT: addi a3, a3, -1
+; RV32IA-ZACAS-NEXT: sll a4, a3, a0
+; RV32IA-ZACAS-NEXT: and a1, a1, a3
+; RV32IA-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-ZACAS-NEXT: .LBB120_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-ZACAS-NEXT: lr.w a3, (a2)
+; RV32IA-ZACAS-NEXT: and a6, a3, a4
+; RV32IA-ZACAS-NEXT: mv a5, a3
+; RV32IA-ZACAS-NEXT: bgeu a6, a1, .LBB120_3
+; RV32IA-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB120_1 Depth=1
+; RV32IA-ZACAS-NEXT: xor a5, a3, a1
+; RV32IA-ZACAS-NEXT: and a5, a5, a4
+; RV32IA-ZACAS-NEXT: xor a5, a3, a5
+; RV32IA-ZACAS-NEXT: .LBB120_3: # in Loop: Header=BB120_1 Depth=1
+; RV32IA-ZACAS-NEXT: sc.w a5, a5, (a2)
+; RV32IA-ZACAS-NEXT: bnez a5, .LBB120_1
+; RV32IA-ZACAS-NEXT: # %bb.4:
+; RV32IA-ZACAS-NEXT: srl a0, a3, a0
+; RV32IA-ZACAS-NEXT: ret
+;
; RV64IA-ZACAS-LABEL: atomicrmw_umax_i16_monotonic:
; RV64IA-ZACAS: # %bb.0:
; RV64IA-ZACAS-NEXT: andi a2, a0, -4
@@ -18005,6 +23620,16 @@ define i16 @atomicrmw_umax_i16_monotonic(ptr %a, i16 %b) nounwind {
; RV64IA-ZACAS-NEXT: srlw a0, a3, a0
; RV64IA-ZACAS-NEXT: ret
;
+; RV32IA-WMO-ZABHA-LABEL: atomicrmw_umax_i16_monotonic:
+; RV32IA-WMO-ZABHA: # %bb.0:
+; RV32IA-WMO-ZABHA-NEXT: amomaxu.h a0, a1, (a0)
+; RV32IA-WMO-ZABHA-NEXT: ret
+;
+; RV32IA-TSO-ZABHA-LABEL: atomicrmw_umax_i16_monotonic:
+; RV32IA-TSO-ZABHA: # %bb.0:
+; RV32IA-TSO-ZABHA-NEXT: amomaxu.h a0, a1, (a0)
+; RV32IA-TSO-ZABHA-NEXT: ret
+;
; RV64IA-WMO-ZABHA-LABEL: atomicrmw_umax_i16_monotonic:
; RV64IA-WMO-ZABHA: # %bb.0:
; RV64IA-WMO-ZABHA-NEXT: amomaxu.h a0, a1, (a0)
@@ -18063,55 +23688,55 @@ define i16 @atomicrmw_umax_i16_acquire(ptr %a, i16 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 32
; RV32I-NEXT: ret
;
-; RV32IA-WMO-LABEL: atomicrmw_umax_i16_acquire:
-; RV32IA-WMO: # %bb.0:
-; RV32IA-WMO-NEXT: andi a2, a0, -4
-; RV32IA-WMO-NEXT: slli a0, a0, 3
-; RV32IA-WMO-NEXT: lui a3, 16
-; RV32IA-WMO-NEXT: addi a3, a3, -1
-; RV32IA-WMO-NEXT: sll a4, a3, a0
-; RV32IA-WMO-NEXT: and a1, a1, a3
-; RV32IA-WMO-NEXT: sll a1, a1, a0
-; RV32IA-WMO-NEXT: .LBB121_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-WMO-NEXT: lr.w.aq a3, (a2)
-; RV32IA-WMO-NEXT: and a6, a3, a4
-; RV32IA-WMO-NEXT: mv a5, a3
-; RV32IA-WMO-NEXT: bgeu a6, a1, .LBB121_3
-; RV32IA-WMO-NEXT: # %bb.2: # in Loop: Header=BB121_1 Depth=1
-; RV32IA-WMO-NEXT: xor a5, a3, a1
-; RV32IA-WMO-NEXT: and a5, a5, a4
-; RV32IA-WMO-NEXT: xor a5, a3, a5
-; RV32IA-WMO-NEXT: .LBB121_3: # in Loop: Header=BB121_1 Depth=1
-; RV32IA-WMO-NEXT: sc.w a5, a5, (a2)
-; RV32IA-WMO-NEXT: bnez a5, .LBB121_1
-; RV32IA-WMO-NEXT: # %bb.4:
-; RV32IA-WMO-NEXT: srl a0, a3, a0
-; RV32IA-WMO-NEXT: ret
+; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_umax_i16_acquire:
+; RV32IA-WMO-NOZACAS: # %bb.0:
+; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-WMO-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-NOZACAS-NEXT: lui a3, 16
+; RV32IA-WMO-NOZACAS-NEXT: addi a3, a3, -1
+; RV32IA-WMO-NOZACAS-NEXT: sll a4, a3, a0
+; RV32IA-WMO-NOZACAS-NEXT: and a1, a1, a3
+; RV32IA-WMO-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-WMO-NOZACAS-NEXT: .LBB121_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-NOZACAS-NEXT: lr.w.aq a3, (a2)
+; RV32IA-WMO-NOZACAS-NEXT: and a6, a3, a4
+; RV32IA-WMO-NOZACAS-NEXT: mv a5, a3
+; RV32IA-WMO-NOZACAS-NEXT: bgeu a6, a1, .LBB121_3
+; RV32IA-WMO-NOZACAS-NEXT: # %bb.2: # in Loop: Header=BB121_1 Depth=1
+; RV32IA-WMO-NOZACAS-NEXT: xor a5, a3, a1
+; RV32IA-WMO-NOZACAS-NEXT: and a5, a5, a4
+; RV32IA-WMO-NOZACAS-NEXT: xor a5, a3, a5
+; RV32IA-WMO-NOZACAS-NEXT: .LBB121_3: # in Loop: Header=BB121_1 Depth=1
+; RV32IA-WMO-NOZACAS-NEXT: sc.w a5, a5, (a2)
+; RV32IA-WMO-NOZACAS-NEXT: bnez a5, .LBB121_1
+; RV32IA-WMO-NOZACAS-NEXT: # %bb.4:
+; RV32IA-WMO-NOZACAS-NEXT: srl a0, a3, a0
+; RV32IA-WMO-NOZACAS-NEXT: ret
;
-; RV32IA-TSO-LABEL: atomicrmw_umax_i16_acquire:
-; RV32IA-TSO: # %bb.0:
-; RV32IA-TSO-NEXT: andi a2, a0, -4
-; RV32IA-TSO-NEXT: slli a0, a0, 3
-; RV32IA-TSO-NEXT: lui a3, 16
-; RV32IA-TSO-NEXT: addi a3, a3, -1
-; RV32IA-TSO-NEXT: sll a4, a3, a0
-; RV32IA-TSO-NEXT: and a1, a1, a3
-; RV32IA-TSO-NEXT: sll a1, a1, a0
-; RV32IA-TSO-NEXT: .LBB121_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-TSO-NEXT: lr.w a3, (a2)
-; RV32IA-TSO-NEXT: and a6, a3, a4
-; RV32IA-TSO-NEXT: mv a5, a3
-; RV32IA-TSO-NEXT: bgeu a6, a1, .LBB121_3
-; RV32IA-TSO-NEXT: # %bb.2: # in Loop: Header=BB121_1 Depth=1
-; RV32IA-TSO-NEXT: xor a5, a3, a1
-; RV32IA-TSO-NEXT: and a5, a5, a4
-; RV32IA-TSO-NEXT: xor a5, a3, a5
-; RV32IA-TSO-NEXT: .LBB121_3: # in Loop: Header=BB121_1 Depth=1
-; RV32IA-TSO-NEXT: sc.w a5, a5, (a2)
-; RV32IA-TSO-NEXT: bnez a5, .LBB121_1
-; RV32IA-TSO-NEXT: # %bb.4:
-; RV32IA-TSO-NEXT: srl a0, a3, a0
-; RV32IA-TSO-NEXT: ret
+; RV32IA-TSO-NOZACAS-LABEL: atomicrmw_umax_i16_acquire:
+; RV32IA-TSO-NOZACAS: # %bb.0:
+; RV32IA-TSO-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-TSO-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-NOZACAS-NEXT: lui a3, 16
+; RV32IA-TSO-NOZACAS-NEXT: addi a3, a3, -1
+; RV32IA-TSO-NOZACAS-NEXT: sll a4, a3, a0
+; RV32IA-TSO-NOZACAS-NEXT: and a1, a1, a3
+; RV32IA-TSO-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-TSO-NOZACAS-NEXT: .LBB121_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-NOZACAS-NEXT: lr.w a3, (a2)
+; RV32IA-TSO-NOZACAS-NEXT: and a6, a3, a4
+; RV32IA-TSO-NOZACAS-NEXT: mv a5, a3
+; RV32IA-TSO-NOZACAS-NEXT: bgeu a6, a1, .LBB121_3
+; RV32IA-TSO-NOZACAS-NEXT: # %bb.2: # in Loop: Header=BB121_1 Depth=1
+; RV32IA-TSO-NOZACAS-NEXT: xor a5, a3, a1
+; RV32IA-TSO-NOZACAS-NEXT: and a5, a5, a4
+; RV32IA-TSO-NOZACAS-NEXT: xor a5, a3, a5
+; RV32IA-TSO-NOZACAS-NEXT: .LBB121_3: # in Loop: Header=BB121_1 Depth=1
+; RV32IA-TSO-NOZACAS-NEXT: sc.w a5, a5, (a2)
+; RV32IA-TSO-NOZACAS-NEXT: bnez a5, .LBB121_1
+; RV32IA-TSO-NOZACAS-NEXT: # %bb.4:
+; RV32IA-TSO-NOZACAS-NEXT: srl a0, a3, a0
+; RV32IA-TSO-NOZACAS-NEXT: ret
;
; RV64I-LABEL: atomicrmw_umax_i16_acquire:
; RV64I: # %bb.0:
@@ -18207,6 +23832,56 @@ define i16 @atomicrmw_umax_i16_acquire(ptr %a, i16 %b) nounwind {
; RV64IA-TSO-NOZACAS-NEXT: srlw a0, a3, a0
; RV64IA-TSO-NOZACAS-NEXT: ret
;
+; RV32IA-WMO-ZACAS-LABEL: atomicrmw_umax_i16_acquire:
+; RV32IA-WMO-ZACAS: # %bb.0:
+; RV32IA-WMO-ZACAS-NEXT: andi a2, a0, -4
+; RV32IA-WMO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-ZACAS-NEXT: lui a3, 16
+; RV32IA-WMO-ZACAS-NEXT: addi a3, a3, -1
+; RV32IA-WMO-ZACAS-NEXT: sll a4, a3, a0
+; RV32IA-WMO-ZACAS-NEXT: and a1, a1, a3
+; RV32IA-WMO-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-WMO-ZACAS-NEXT: .LBB121_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-ZACAS-NEXT: lr.w.aq a3, (a2)
+; RV32IA-WMO-ZACAS-NEXT: and a6, a3, a4
+; RV32IA-WMO-ZACAS-NEXT: mv a5, a3
+; RV32IA-WMO-ZACAS-NEXT: bgeu a6, a1, .LBB121_3
+; RV32IA-WMO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB121_1 Depth=1
+; RV32IA-WMO-ZACAS-NEXT: xor a5, a3, a1
+; RV32IA-WMO-ZACAS-NEXT: and a5, a5, a4
+; RV32IA-WMO-ZACAS-NEXT: xor a5, a3, a5
+; RV32IA-WMO-ZACAS-NEXT: .LBB121_3: # in Loop: Header=BB121_1 Depth=1
+; RV32IA-WMO-ZACAS-NEXT: sc.w a5, a5, (a2)
+; RV32IA-WMO-ZACAS-NEXT: bnez a5, .LBB121_1
+; RV32IA-WMO-ZACAS-NEXT: # %bb.4:
+; RV32IA-WMO-ZACAS-NEXT: srl a0, a3, a0
+; RV32IA-WMO-ZACAS-NEXT: ret
+;
+; RV32IA-TSO-ZACAS-LABEL: atomicrmw_umax_i16_acquire:
+; RV32IA-TSO-ZACAS: # %bb.0:
+; RV32IA-TSO-ZACAS-NEXT: andi a2, a0, -4
+; RV32IA-TSO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-ZACAS-NEXT: lui a3, 16
+; RV32IA-TSO-ZACAS-NEXT: addi a3, a3, -1
+; RV32IA-TSO-ZACAS-NEXT: sll a4, a3, a0
+; RV32IA-TSO-ZACAS-NEXT: and a1, a1, a3
+; RV32IA-TSO-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-TSO-ZACAS-NEXT: .LBB121_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-ZACAS-NEXT: lr.w a3, (a2)
+; RV32IA-TSO-ZACAS-NEXT: and a6, a3, a4
+; RV32IA-TSO-ZACAS-NEXT: mv a5, a3
+; RV32IA-TSO-ZACAS-NEXT: bgeu a6, a1, .LBB121_3
+; RV32IA-TSO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB121_1 Depth=1
+; RV32IA-TSO-ZACAS-NEXT: xor a5, a3, a1
+; RV32IA-TSO-ZACAS-NEXT: and a5, a5, a4
+; RV32IA-TSO-ZACAS-NEXT: xor a5, a3, a5
+; RV32IA-TSO-ZACAS-NEXT: .LBB121_3: # in Loop: Header=BB121_1 Depth=1
+; RV32IA-TSO-ZACAS-NEXT: sc.w a5, a5, (a2)
+; RV32IA-TSO-ZACAS-NEXT: bnez a5, .LBB121_1
+; RV32IA-TSO-ZACAS-NEXT: # %bb.4:
+; RV32IA-TSO-ZACAS-NEXT: srl a0, a3, a0
+; RV32IA-TSO-ZACAS-NEXT: ret
+;
; RV64IA-WMO-ZACAS-LABEL: atomicrmw_umax_i16_acquire:
; RV64IA-WMO-ZACAS: # %bb.0:
; RV64IA-WMO-ZACAS-NEXT: andi a2, a0, -4
@@ -18257,6 +23932,16 @@ define i16 @atomicrmw_umax_i16_acquire(ptr %a, i16 %b) nounwind {
; RV64IA-TSO-ZACAS-NEXT: srlw a0, a3, a0
; RV64IA-TSO-ZACAS-NEXT: ret
;
+; RV32IA-WMO-ZABHA-LABEL: atomicrmw_umax_i16_acquire:
+; RV32IA-WMO-ZABHA: # %bb.0:
+; RV32IA-WMO-ZABHA-NEXT: amomaxu.h.aq a0, a1, (a0)
+; RV32IA-WMO-ZABHA-NEXT: ret
+;
+; RV32IA-TSO-ZABHA-LABEL: atomicrmw_umax_i16_acquire:
+; RV32IA-TSO-ZABHA: # %bb.0:
+; RV32IA-TSO-ZABHA-NEXT: amomaxu.h a0, a1, (a0)
+; RV32IA-TSO-ZABHA-NEXT: ret
+;
; RV64IA-WMO-ZABHA-LABEL: atomicrmw_umax_i16_acquire:
; RV64IA-WMO-ZABHA: # %bb.0:
; RV64IA-WMO-ZABHA-NEXT: amomaxu.h.aq a0, a1, (a0)
@@ -18315,55 +24000,55 @@ define i16 @atomicrmw_umax_i16_release(ptr %a, i16 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 32
; RV32I-NEXT: ret
;
-; RV32IA-WMO-LABEL: atomicrmw_umax_i16_release:
-; RV32IA-WMO: # %bb.0:
-; RV32IA-WMO-NEXT: andi a2, a0, -4
-; RV32IA-WMO-NEXT: slli a0, a0, 3
-; RV32IA-WMO-NEXT: lui a3, 16
-; RV32IA-WMO-NEXT: addi a3, a3, -1
-; RV32IA-WMO-NEXT: sll a4, a3, a0
-; RV32IA-WMO-NEXT: and a1, a1, a3
-; RV32IA-WMO-NEXT: sll a1, a1, a0
-; RV32IA-WMO-NEXT: .LBB122_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-WMO-NEXT: lr.w a3, (a2)
-; RV32IA-WMO-NEXT: and a6, a3, a4
-; RV32IA-WMO-NEXT: mv a5, a3
-; RV32IA-WMO-NEXT: bgeu a6, a1, .LBB122_3
-; RV32IA-WMO-NEXT: # %bb.2: # in Loop: Header=BB122_1 Depth=1
-; RV32IA-WMO-NEXT: xor a5, a3, a1
-; RV32IA-WMO-NEXT: and a5, a5, a4
-; RV32IA-WMO-NEXT: xor a5, a3, a5
-; RV32IA-WMO-NEXT: .LBB122_3: # in Loop: Header=BB122_1 Depth=1
-; RV32IA-WMO-NEXT: sc.w.rl a5, a5, (a2)
-; RV32IA-WMO-NEXT: bnez a5, .LBB122_1
-; RV32IA-WMO-NEXT: # %bb.4:
-; RV32IA-WMO-NEXT: srl a0, a3, a0
-; RV32IA-WMO-NEXT: ret
+; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_umax_i16_release:
+; RV32IA-WMO-NOZACAS: # %bb.0:
+; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-WMO-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-NOZACAS-NEXT: lui a3, 16
+; RV32IA-WMO-NOZACAS-NEXT: addi a3, a3, -1
+; RV32IA-WMO-NOZACAS-NEXT: sll a4, a3, a0
+; RV32IA-WMO-NOZACAS-NEXT: and a1, a1, a3
+; RV32IA-WMO-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-WMO-NOZACAS-NEXT: .LBB122_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-NOZACAS-NEXT: lr.w a3, (a2)
+; RV32IA-WMO-NOZACAS-NEXT: and a6, a3, a4
+; RV32IA-WMO-NOZACAS-NEXT: mv a5, a3
+; RV32IA-WMO-NOZACAS-NEXT: bgeu a6, a1, .LBB122_3
+; RV32IA-WMO-NOZACAS-NEXT: # %bb.2: # in Loop: Header=BB122_1 Depth=1
+; RV32IA-WMO-NOZACAS-NEXT: xor a5, a3, a1
+; RV32IA-WMO-NOZACAS-NEXT: and a5, a5, a4
+; RV32IA-WMO-NOZACAS-NEXT: xor a5, a3, a5
+; RV32IA-WMO-NOZACAS-NEXT: .LBB122_3: # in Loop: Header=BB122_1 Depth=1
+; RV32IA-WMO-NOZACAS-NEXT: sc.w.rl a5, a5, (a2)
+; RV32IA-WMO-NOZACAS-NEXT: bnez a5, .LBB122_1
+; RV32IA-WMO-NOZACAS-NEXT: # %bb.4:
+; RV32IA-WMO-NOZACAS-NEXT: srl a0, a3, a0
+; RV32IA-WMO-NOZACAS-NEXT: ret
;
-; RV32IA-TSO-LABEL: atomicrmw_umax_i16_release:
-; RV32IA-TSO: # %bb.0:
-; RV32IA-TSO-NEXT: andi a2, a0, -4
-; RV32IA-TSO-NEXT: slli a0, a0, 3
-; RV32IA-TSO-NEXT: lui a3, 16
-; RV32IA-TSO-NEXT: addi a3, a3, -1
-; RV32IA-TSO-NEXT: sll a4, a3, a0
-; RV32IA-TSO-NEXT: and a1, a1, a3
-; RV32IA-TSO-NEXT: sll a1, a1, a0
-; RV32IA-TSO-NEXT: .LBB122_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-TSO-NEXT: lr.w a3, (a2)
-; RV32IA-TSO-NEXT: and a6, a3, a4
-; RV32IA-TSO-NEXT: mv a5, a3
-; RV32IA-TSO-NEXT: bgeu a6, a1, .LBB122_3
-; RV32IA-TSO-NEXT: # %bb.2: # in Loop: Header=BB122_1 Depth=1
-; RV32IA-TSO-NEXT: xor a5, a3, a1
-; RV32IA-TSO-NEXT: and a5, a5, a4
-; RV32IA-TSO-NEXT: xor a5, a3, a5
-; RV32IA-TSO-NEXT: .LBB122_3: # in Loop: Header=BB122_1 Depth=1
-; RV32IA-TSO-NEXT: sc.w a5, a5, (a2)
-; RV32IA-TSO-NEXT: bnez a5, .LBB122_1
-; RV32IA-TSO-NEXT: # %bb.4:
-; RV32IA-TSO-NEXT: srl a0, a3, a0
-; RV32IA-TSO-NEXT: ret
+; RV32IA-TSO-NOZACAS-LABEL: atomicrmw_umax_i16_release:
+; RV32IA-TSO-NOZACAS: # %bb.0:
+; RV32IA-TSO-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-TSO-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-NOZACAS-NEXT: lui a3, 16
+; RV32IA-TSO-NOZACAS-NEXT: addi a3, a3, -1
+; RV32IA-TSO-NOZACAS-NEXT: sll a4, a3, a0
+; RV32IA-TSO-NOZACAS-NEXT: and a1, a1, a3
+; RV32IA-TSO-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-TSO-NOZACAS-NEXT: .LBB122_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-NOZACAS-NEXT: lr.w a3, (a2)
+; RV32IA-TSO-NOZACAS-NEXT: and a6, a3, a4
+; RV32IA-TSO-NOZACAS-NEXT: mv a5, a3
+; RV32IA-TSO-NOZACAS-NEXT: bgeu a6, a1, .LBB122_3
+; RV32IA-TSO-NOZACAS-NEXT: # %bb.2: # in Loop: Header=BB122_1 Depth=1
+; RV32IA-TSO-NOZACAS-NEXT: xor a5, a3, a1
+; RV32IA-TSO-NOZACAS-NEXT: and a5, a5, a4
+; RV32IA-TSO-NOZACAS-NEXT: xor a5, a3, a5
+; RV32IA-TSO-NOZACAS-NEXT: .LBB122_3: # in Loop: Header=BB122_1 Depth=1
+; RV32IA-TSO-NOZACAS-NEXT: sc.w a5, a5, (a2)
+; RV32IA-TSO-NOZACAS-NEXT: bnez a5, .LBB122_1
+; RV32IA-TSO-NOZACAS-NEXT: # %bb.4:
+; RV32IA-TSO-NOZACAS-NEXT: srl a0, a3, a0
+; RV32IA-TSO-NOZACAS-NEXT: ret
;
; RV64I-LABEL: atomicrmw_umax_i16_release:
; RV64I: # %bb.0:
@@ -18459,6 +24144,56 @@ define i16 @atomicrmw_umax_i16_release(ptr %a, i16 %b) nounwind {
; RV64IA-TSO-NOZACAS-NEXT: srlw a0, a3, a0
; RV64IA-TSO-NOZACAS-NEXT: ret
;
+; RV32IA-WMO-ZACAS-LABEL: atomicrmw_umax_i16_release:
+; RV32IA-WMO-ZACAS: # %bb.0:
+; RV32IA-WMO-ZACAS-NEXT: andi a2, a0, -4
+; RV32IA-WMO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-ZACAS-NEXT: lui a3, 16
+; RV32IA-WMO-ZACAS-NEXT: addi a3, a3, -1
+; RV32IA-WMO-ZACAS-NEXT: sll a4, a3, a0
+; RV32IA-WMO-ZACAS-NEXT: and a1, a1, a3
+; RV32IA-WMO-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-WMO-ZACAS-NEXT: .LBB122_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-ZACAS-NEXT: lr.w a3, (a2)
+; RV32IA-WMO-ZACAS-NEXT: and a6, a3, a4
+; RV32IA-WMO-ZACAS-NEXT: mv a5, a3
+; RV32IA-WMO-ZACAS-NEXT: bgeu a6, a1, .LBB122_3
+; RV32IA-WMO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB122_1 Depth=1
+; RV32IA-WMO-ZACAS-NEXT: xor a5, a3, a1
+; RV32IA-WMO-ZACAS-NEXT: and a5, a5, a4
+; RV32IA-WMO-ZACAS-NEXT: xor a5, a3, a5
+; RV32IA-WMO-ZACAS-NEXT: .LBB122_3: # in Loop: Header=BB122_1 Depth=1
+; RV32IA-WMO-ZACAS-NEXT: sc.w.rl a5, a5, (a2)
+; RV32IA-WMO-ZACAS-NEXT: bnez a5, .LBB122_1
+; RV32IA-WMO-ZACAS-NEXT: # %bb.4:
+; RV32IA-WMO-ZACAS-NEXT: srl a0, a3, a0
+; RV32IA-WMO-ZACAS-NEXT: ret
+;
+; RV32IA-TSO-ZACAS-LABEL: atomicrmw_umax_i16_release:
+; RV32IA-TSO-ZACAS: # %bb.0:
+; RV32IA-TSO-ZACAS-NEXT: andi a2, a0, -4
+; RV32IA-TSO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-ZACAS-NEXT: lui a3, 16
+; RV32IA-TSO-ZACAS-NEXT: addi a3, a3, -1
+; RV32IA-TSO-ZACAS-NEXT: sll a4, a3, a0
+; RV32IA-TSO-ZACAS-NEXT: and a1, a1, a3
+; RV32IA-TSO-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-TSO-ZACAS-NEXT: .LBB122_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-ZACAS-NEXT: lr.w a3, (a2)
+; RV32IA-TSO-ZACAS-NEXT: and a6, a3, a4
+; RV32IA-TSO-ZACAS-NEXT: mv a5, a3
+; RV32IA-TSO-ZACAS-NEXT: bgeu a6, a1, .LBB122_3
+; RV32IA-TSO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB122_1 Depth=1
+; RV32IA-TSO-ZACAS-NEXT: xor a5, a3, a1
+; RV32IA-TSO-ZACAS-NEXT: and a5, a5, a4
+; RV32IA-TSO-ZACAS-NEXT: xor a5, a3, a5
+; RV32IA-TSO-ZACAS-NEXT: .LBB122_3: # in Loop: Header=BB122_1 Depth=1
+; RV32IA-TSO-ZACAS-NEXT: sc.w a5, a5, (a2)
+; RV32IA-TSO-ZACAS-NEXT: bnez a5, .LBB122_1
+; RV32IA-TSO-ZACAS-NEXT: # %bb.4:
+; RV32IA-TSO-ZACAS-NEXT: srl a0, a3, a0
+; RV32IA-TSO-ZACAS-NEXT: ret
+;
; RV64IA-WMO-ZACAS-LABEL: atomicrmw_umax_i16_release:
; RV64IA-WMO-ZACAS: # %bb.0:
; RV64IA-WMO-ZACAS-NEXT: andi a2, a0, -4
@@ -18509,6 +24244,16 @@ define i16 @atomicrmw_umax_i16_release(ptr %a, i16 %b) nounwind {
; RV64IA-TSO-ZACAS-NEXT: srlw a0, a3, a0
; RV64IA-TSO-ZACAS-NEXT: ret
;
+; RV32IA-WMO-ZABHA-LABEL: atomicrmw_umax_i16_release:
+; RV32IA-WMO-ZABHA: # %bb.0:
+; RV32IA-WMO-ZABHA-NEXT: amomaxu.h.rl a0, a1, (a0)
+; RV32IA-WMO-ZABHA-NEXT: ret
+;
+; RV32IA-TSO-ZABHA-LABEL: atomicrmw_umax_i16_release:
+; RV32IA-TSO-ZABHA: # %bb.0:
+; RV32IA-TSO-ZABHA-NEXT: amomaxu.h a0, a1, (a0)
+; RV32IA-TSO-ZABHA-NEXT: ret
+;
; RV64IA-WMO-ZABHA-LABEL: atomicrmw_umax_i16_release:
; RV64IA-WMO-ZABHA: # %bb.0:
; RV64IA-WMO-ZABHA-NEXT: amomaxu.h.rl a0, a1, (a0)
@@ -18567,55 +24312,55 @@ define i16 @atomicrmw_umax_i16_acq_rel(ptr %a, i16 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 32
; RV32I-NEXT: ret
;
-; RV32IA-WMO-LABEL: atomicrmw_umax_i16_acq_rel:
-; RV32IA-WMO: # %bb.0:
-; RV32IA-WMO-NEXT: andi a2, a0, -4
-; RV32IA-WMO-NEXT: slli a0, a0, 3
-; RV32IA-WMO-NEXT: lui a3, 16
-; RV32IA-WMO-NEXT: addi a3, a3, -1
-; RV32IA-WMO-NEXT: sll a4, a3, a0
-; RV32IA-WMO-NEXT: and a1, a1, a3
-; RV32IA-WMO-NEXT: sll a1, a1, a0
-; RV32IA-WMO-NEXT: .LBB123_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-WMO-NEXT: lr.w.aq a3, (a2)
-; RV32IA-WMO-NEXT: and a6, a3, a4
-; RV32IA-WMO-NEXT: mv a5, a3
-; RV32IA-WMO-NEXT: bgeu a6, a1, .LBB123_3
-; RV32IA-WMO-NEXT: # %bb.2: # in Loop: Header=BB123_1 Depth=1
-; RV32IA-WMO-NEXT: xor a5, a3, a1
-; RV32IA-WMO-NEXT: and a5, a5, a4
-; RV32IA-WMO-NEXT: xor a5, a3, a5
-; RV32IA-WMO-NEXT: .LBB123_3: # in Loop: Header=BB123_1 Depth=1
-; RV32IA-WMO-NEXT: sc.w.rl a5, a5, (a2)
-; RV32IA-WMO-NEXT: bnez a5, .LBB123_1
-; RV32IA-WMO-NEXT: # %bb.4:
-; RV32IA-WMO-NEXT: srl a0, a3, a0
-; RV32IA-WMO-NEXT: ret
+; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_umax_i16_acq_rel:
+; RV32IA-WMO-NOZACAS: # %bb.0:
+; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-WMO-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-NOZACAS-NEXT: lui a3, 16
+; RV32IA-WMO-NOZACAS-NEXT: addi a3, a3, -1
+; RV32IA-WMO-NOZACAS-NEXT: sll a4, a3, a0
+; RV32IA-WMO-NOZACAS-NEXT: and a1, a1, a3
+; RV32IA-WMO-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-WMO-NOZACAS-NEXT: .LBB123_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-NOZACAS-NEXT: lr.w.aq a3, (a2)
+; RV32IA-WMO-NOZACAS-NEXT: and a6, a3, a4
+; RV32IA-WMO-NOZACAS-NEXT: mv a5, a3
+; RV32IA-WMO-NOZACAS-NEXT: bgeu a6, a1, .LBB123_3
+; RV32IA-WMO-NOZACAS-NEXT: # %bb.2: # in Loop: Header=BB123_1 Depth=1
+; RV32IA-WMO-NOZACAS-NEXT: xor a5, a3, a1
+; RV32IA-WMO-NOZACAS-NEXT: and a5, a5, a4
+; RV32IA-WMO-NOZACAS-NEXT: xor a5, a3, a5
+; RV32IA-WMO-NOZACAS-NEXT: .LBB123_3: # in Loop: Header=BB123_1 Depth=1
+; RV32IA-WMO-NOZACAS-NEXT: sc.w.rl a5, a5, (a2)
+; RV32IA-WMO-NOZACAS-NEXT: bnez a5, .LBB123_1
+; RV32IA-WMO-NOZACAS-NEXT: # %bb.4:
+; RV32IA-WMO-NOZACAS-NEXT: srl a0, a3, a0
+; RV32IA-WMO-NOZACAS-NEXT: ret
;
-; RV32IA-TSO-LABEL: atomicrmw_umax_i16_acq_rel:
-; RV32IA-TSO: # %bb.0:
-; RV32IA-TSO-NEXT: andi a2, a0, -4
-; RV32IA-TSO-NEXT: slli a0, a0, 3
-; RV32IA-TSO-NEXT: lui a3, 16
-; RV32IA-TSO-NEXT: addi a3, a3, -1
-; RV32IA-TSO-NEXT: sll a4, a3, a0
-; RV32IA-TSO-NEXT: and a1, a1, a3
-; RV32IA-TSO-NEXT: sll a1, a1, a0
-; RV32IA-TSO-NEXT: .LBB123_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-TSO-NEXT: lr.w a3, (a2)
-; RV32IA-TSO-NEXT: and a6, a3, a4
-; RV32IA-TSO-NEXT: mv a5, a3
-; RV32IA-TSO-NEXT: bgeu a6, a1, .LBB123_3
-; RV32IA-TSO-NEXT: # %bb.2: # in Loop: Header=BB123_1 Depth=1
-; RV32IA-TSO-NEXT: xor a5, a3, a1
-; RV32IA-TSO-NEXT: and a5, a5, a4
-; RV32IA-TSO-NEXT: xor a5, a3, a5
-; RV32IA-TSO-NEXT: .LBB123_3: # in Loop: Header=BB123_1 Depth=1
-; RV32IA-TSO-NEXT: sc.w a5, a5, (a2)
-; RV32IA-TSO-NEXT: bnez a5, .LBB123_1
-; RV32IA-TSO-NEXT: # %bb.4:
-; RV32IA-TSO-NEXT: srl a0, a3, a0
-; RV32IA-TSO-NEXT: ret
+; RV32IA-TSO-NOZACAS-LABEL: atomicrmw_umax_i16_acq_rel:
+; RV32IA-TSO-NOZACAS: # %bb.0:
+; RV32IA-TSO-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-TSO-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-NOZACAS-NEXT: lui a3, 16
+; RV32IA-TSO-NOZACAS-NEXT: addi a3, a3, -1
+; RV32IA-TSO-NOZACAS-NEXT: sll a4, a3, a0
+; RV32IA-TSO-NOZACAS-NEXT: and a1, a1, a3
+; RV32IA-TSO-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-TSO-NOZACAS-NEXT: .LBB123_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-NOZACAS-NEXT: lr.w a3, (a2)
+; RV32IA-TSO-NOZACAS-NEXT: and a6, a3, a4
+; RV32IA-TSO-NOZACAS-NEXT: mv a5, a3
+; RV32IA-TSO-NOZACAS-NEXT: bgeu a6, a1, .LBB123_3
+; RV32IA-TSO-NOZACAS-NEXT: # %bb.2: # in Loop: Header=BB123_1 Depth=1
+; RV32IA-TSO-NOZACAS-NEXT: xor a5, a3, a1
+; RV32IA-TSO-NOZACAS-NEXT: and a5, a5, a4
+; RV32IA-TSO-NOZACAS-NEXT: xor a5, a3, a5
+; RV32IA-TSO-NOZACAS-NEXT: .LBB123_3: # in Loop: Header=BB123_1 Depth=1
+; RV32IA-TSO-NOZACAS-NEXT: sc.w a5, a5, (a2)
+; RV32IA-TSO-NOZACAS-NEXT: bnez a5, .LBB123_1
+; RV32IA-TSO-NOZACAS-NEXT: # %bb.4:
+; RV32IA-TSO-NOZACAS-NEXT: srl a0, a3, a0
+; RV32IA-TSO-NOZACAS-NEXT: ret
;
; RV64I-LABEL: atomicrmw_umax_i16_acq_rel:
; RV64I: # %bb.0:
@@ -18711,6 +24456,56 @@ define i16 @atomicrmw_umax_i16_acq_rel(ptr %a, i16 %b) nounwind {
; RV64IA-TSO-NOZACAS-NEXT: srlw a0, a3, a0
; RV64IA-TSO-NOZACAS-NEXT: ret
;
+; RV32IA-WMO-ZACAS-LABEL: atomicrmw_umax_i16_acq_rel:
+; RV32IA-WMO-ZACAS: # %bb.0:
+; RV32IA-WMO-ZACAS-NEXT: andi a2, a0, -4
+; RV32IA-WMO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-ZACAS-NEXT: lui a3, 16
+; RV32IA-WMO-ZACAS-NEXT: addi a3, a3, -1
+; RV32IA-WMO-ZACAS-NEXT: sll a4, a3, a0
+; RV32IA-WMO-ZACAS-NEXT: and a1, a1, a3
+; RV32IA-WMO-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-WMO-ZACAS-NEXT: .LBB123_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-ZACAS-NEXT: lr.w.aq a3, (a2)
+; RV32IA-WMO-ZACAS-NEXT: and a6, a3, a4
+; RV32IA-WMO-ZACAS-NEXT: mv a5, a3
+; RV32IA-WMO-ZACAS-NEXT: bgeu a6, a1, .LBB123_3
+; RV32IA-WMO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB123_1 Depth=1
+; RV32IA-WMO-ZACAS-NEXT: xor a5, a3, a1
+; RV32IA-WMO-ZACAS-NEXT: and a5, a5, a4
+; RV32IA-WMO-ZACAS-NEXT: xor a5, a3, a5
+; RV32IA-WMO-ZACAS-NEXT: .LBB123_3: # in Loop: Header=BB123_1 Depth=1
+; RV32IA-WMO-ZACAS-NEXT: sc.w.rl a5, a5, (a2)
+; RV32IA-WMO-ZACAS-NEXT: bnez a5, .LBB123_1
+; RV32IA-WMO-ZACAS-NEXT: # %bb.4:
+; RV32IA-WMO-ZACAS-NEXT: srl a0, a3, a0
+; RV32IA-WMO-ZACAS-NEXT: ret
+;
+; RV32IA-TSO-ZACAS-LABEL: atomicrmw_umax_i16_acq_rel:
+; RV32IA-TSO-ZACAS: # %bb.0:
+; RV32IA-TSO-ZACAS-NEXT: andi a2, a0, -4
+; RV32IA-TSO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-ZACAS-NEXT: lui a3, 16
+; RV32IA-TSO-ZACAS-NEXT: addi a3, a3, -1
+; RV32IA-TSO-ZACAS-NEXT: sll a4, a3, a0
+; RV32IA-TSO-ZACAS-NEXT: and a1, a1, a3
+; RV32IA-TSO-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-TSO-ZACAS-NEXT: .LBB123_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-ZACAS-NEXT: lr.w a3, (a2)
+; RV32IA-TSO-ZACAS-NEXT: and a6, a3, a4
+; RV32IA-TSO-ZACAS-NEXT: mv a5, a3
+; RV32IA-TSO-ZACAS-NEXT: bgeu a6, a1, .LBB123_3
+; RV32IA-TSO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB123_1 Depth=1
+; RV32IA-TSO-ZACAS-NEXT: xor a5, a3, a1
+; RV32IA-TSO-ZACAS-NEXT: and a5, a5, a4
+; RV32IA-TSO-ZACAS-NEXT: xor a5, a3, a5
+; RV32IA-TSO-ZACAS-NEXT: .LBB123_3: # in Loop: Header=BB123_1 Depth=1
+; RV32IA-TSO-ZACAS-NEXT: sc.w a5, a5, (a2)
+; RV32IA-TSO-ZACAS-NEXT: bnez a5, .LBB123_1
+; RV32IA-TSO-ZACAS-NEXT: # %bb.4:
+; RV32IA-TSO-ZACAS-NEXT: srl a0, a3, a0
+; RV32IA-TSO-ZACAS-NEXT: ret
+;
; RV64IA-WMO-ZACAS-LABEL: atomicrmw_umax_i16_acq_rel:
; RV64IA-WMO-ZACAS: # %bb.0:
; RV64IA-WMO-ZACAS-NEXT: andi a2, a0, -4
@@ -18761,6 +24556,16 @@ define i16 @atomicrmw_umax_i16_acq_rel(ptr %a, i16 %b) nounwind {
; RV64IA-TSO-ZACAS-NEXT: srlw a0, a3, a0
; RV64IA-TSO-ZACAS-NEXT: ret
;
+; RV32IA-WMO-ZABHA-LABEL: atomicrmw_umax_i16_acq_rel:
+; RV32IA-WMO-ZABHA: # %bb.0:
+; RV32IA-WMO-ZABHA-NEXT: amomaxu.h.aqrl a0, a1, (a0)
+; RV32IA-WMO-ZABHA-NEXT: ret
+;
+; RV32IA-TSO-ZABHA-LABEL: atomicrmw_umax_i16_acq_rel:
+; RV32IA-TSO-ZABHA: # %bb.0:
+; RV32IA-TSO-ZABHA-NEXT: amomaxu.h a0, a1, (a0)
+; RV32IA-TSO-ZABHA-NEXT: ret
+;
; RV64IA-WMO-ZABHA-LABEL: atomicrmw_umax_i16_acq_rel:
; RV64IA-WMO-ZABHA: # %bb.0:
; RV64IA-WMO-ZABHA-NEXT: amomaxu.h.aqrl a0, a1, (a0)
@@ -18819,30 +24624,30 @@ define i16 @atomicrmw_umax_i16_seq_cst(ptr %a, i16 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 32
; RV32I-NEXT: ret
;
-; RV32IA-LABEL: atomicrmw_umax_i16_seq_cst:
-; RV32IA: # %bb.0:
-; RV32IA-NEXT: andi a2, a0, -4
-; RV32IA-NEXT: slli a0, a0, 3
-; RV32IA-NEXT: lui a3, 16
-; RV32IA-NEXT: addi a3, a3, -1
-; RV32IA-NEXT: sll a4, a3, a0
-; RV32IA-NEXT: and a1, a1, a3
-; RV32IA-NEXT: sll a1, a1, a0
-; RV32IA-NEXT: .LBB124_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-NEXT: lr.w.aqrl a3, (a2)
-; RV32IA-NEXT: and a6, a3, a4
-; RV32IA-NEXT: mv a5, a3
-; RV32IA-NEXT: bgeu a6, a1, .LBB124_3
-; RV32IA-NEXT: # %bb.2: # in Loop: Header=BB124_1 Depth=1
-; RV32IA-NEXT: xor a5, a3, a1
-; RV32IA-NEXT: and a5, a5, a4
-; RV32IA-NEXT: xor a5, a3, a5
-; RV32IA-NEXT: .LBB124_3: # in Loop: Header=BB124_1 Depth=1
-; RV32IA-NEXT: sc.w.rl a5, a5, (a2)
-; RV32IA-NEXT: bnez a5, .LBB124_1
-; RV32IA-NEXT: # %bb.4:
-; RV32IA-NEXT: srl a0, a3, a0
-; RV32IA-NEXT: ret
+; RV32IA-NOZACAS-LABEL: atomicrmw_umax_i16_seq_cst:
+; RV32IA-NOZACAS: # %bb.0:
+; RV32IA-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-NOZACAS-NEXT: lui a3, 16
+; RV32IA-NOZACAS-NEXT: addi a3, a3, -1
+; RV32IA-NOZACAS-NEXT: sll a4, a3, a0
+; RV32IA-NOZACAS-NEXT: and a1, a1, a3
+; RV32IA-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-NOZACAS-NEXT: .LBB124_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-NOZACAS-NEXT: lr.w.aqrl a3, (a2)
+; RV32IA-NOZACAS-NEXT: and a6, a3, a4
+; RV32IA-NOZACAS-NEXT: mv a5, a3
+; RV32IA-NOZACAS-NEXT: bgeu a6, a1, .LBB124_3
+; RV32IA-NOZACAS-NEXT: # %bb.2: # in Loop: Header=BB124_1 Depth=1
+; RV32IA-NOZACAS-NEXT: xor a5, a3, a1
+; RV32IA-NOZACAS-NEXT: and a5, a5, a4
+; RV32IA-NOZACAS-NEXT: xor a5, a3, a5
+; RV32IA-NOZACAS-NEXT: .LBB124_3: # in Loop: Header=BB124_1 Depth=1
+; RV32IA-NOZACAS-NEXT: sc.w.rl a5, a5, (a2)
+; RV32IA-NOZACAS-NEXT: bnez a5, .LBB124_1
+; RV32IA-NOZACAS-NEXT: # %bb.4:
+; RV32IA-NOZACAS-NEXT: srl a0, a3, a0
+; RV32IA-NOZACAS-NEXT: ret
;
; RV64I-LABEL: atomicrmw_umax_i16_seq_cst:
; RV64I: # %bb.0:
@@ -18913,6 +24718,31 @@ define i16 @atomicrmw_umax_i16_seq_cst(ptr %a, i16 %b) nounwind {
; RV64IA-NOZACAS-NEXT: srlw a0, a3, a0
; RV64IA-NOZACAS-NEXT: ret
;
+; RV32IA-ZACAS-LABEL: atomicrmw_umax_i16_seq_cst:
+; RV32IA-ZACAS: # %bb.0:
+; RV32IA-ZACAS-NEXT: andi a2, a0, -4
+; RV32IA-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-ZACAS-NEXT: lui a3, 16
+; RV32IA-ZACAS-NEXT: addi a3, a3, -1
+; RV32IA-ZACAS-NEXT: sll a4, a3, a0
+; RV32IA-ZACAS-NEXT: and a1, a1, a3
+; RV32IA-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-ZACAS-NEXT: .LBB124_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-ZACAS-NEXT: lr.w.aqrl a3, (a2)
+; RV32IA-ZACAS-NEXT: and a6, a3, a4
+; RV32IA-ZACAS-NEXT: mv a5, a3
+; RV32IA-ZACAS-NEXT: bgeu a6, a1, .LBB124_3
+; RV32IA-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB124_1 Depth=1
+; RV32IA-ZACAS-NEXT: xor a5, a3, a1
+; RV32IA-ZACAS-NEXT: and a5, a5, a4
+; RV32IA-ZACAS-NEXT: xor a5, a3, a5
+; RV32IA-ZACAS-NEXT: .LBB124_3: # in Loop: Header=BB124_1 Depth=1
+; RV32IA-ZACAS-NEXT: sc.w.rl a5, a5, (a2)
+; RV32IA-ZACAS-NEXT: bnez a5, .LBB124_1
+; RV32IA-ZACAS-NEXT: # %bb.4:
+; RV32IA-ZACAS-NEXT: srl a0, a3, a0
+; RV32IA-ZACAS-NEXT: ret
+;
; RV64IA-ZACAS-LABEL: atomicrmw_umax_i16_seq_cst:
; RV64IA-ZACAS: # %bb.0:
; RV64IA-ZACAS-NEXT: andi a2, a0, -4
@@ -18938,6 +24768,16 @@ define i16 @atomicrmw_umax_i16_seq_cst(ptr %a, i16 %b) nounwind {
; RV64IA-ZACAS-NEXT: srlw a0, a3, a0
; RV64IA-ZACAS-NEXT: ret
;
+; RV32IA-WMO-ZABHA-LABEL: atomicrmw_umax_i16_seq_cst:
+; RV32IA-WMO-ZABHA: # %bb.0:
+; RV32IA-WMO-ZABHA-NEXT: amomaxu.h.aqrl a0, a1, (a0)
+; RV32IA-WMO-ZABHA-NEXT: ret
+;
+; RV32IA-TSO-ZABHA-LABEL: atomicrmw_umax_i16_seq_cst:
+; RV32IA-TSO-ZABHA: # %bb.0:
+; RV32IA-TSO-ZABHA-NEXT: amomaxu.h a0, a1, (a0)
+; RV32IA-TSO-ZABHA-NEXT: ret
+;
; RV64IA-WMO-ZABHA-LABEL: atomicrmw_umax_i16_seq_cst:
; RV64IA-WMO-ZABHA: # %bb.0:
; RV64IA-WMO-ZABHA-NEXT: amomaxu.h.aqrl a0, a1, (a0)
@@ -18996,30 +24836,30 @@ define i16 @atomicrmw_umin_i16_monotonic(ptr %a, i16 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 32
; RV32I-NEXT: ret
;
-; RV32IA-LABEL: atomicrmw_umin_i16_monotonic:
-; RV32IA: # %bb.0:
-; RV32IA-NEXT: andi a2, a0, -4
-; RV32IA-NEXT: slli a0, a0, 3
-; RV32IA-NEXT: lui a3, 16
-; RV32IA-NEXT: addi a3, a3, -1
-; RV32IA-NEXT: sll a4, a3, a0
-; RV32IA-NEXT: and a1, a1, a3
-; RV32IA-NEXT: sll a1, a1, a0
-; RV32IA-NEXT: .LBB125_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-NEXT: lr.w a3, (a2)
-; RV32IA-NEXT: and a6, a3, a4
-; RV32IA-NEXT: mv a5, a3
-; RV32IA-NEXT: bgeu a1, a6, .LBB125_3
-; RV32IA-NEXT: # %bb.2: # in Loop: Header=BB125_1 Depth=1
-; RV32IA-NEXT: xor a5, a3, a1
-; RV32IA-NEXT: and a5, a5, a4
-; RV32IA-NEXT: xor a5, a3, a5
-; RV32IA-NEXT: .LBB125_3: # in Loop: Header=BB125_1 Depth=1
-; RV32IA-NEXT: sc.w a5, a5, (a2)
-; RV32IA-NEXT: bnez a5, .LBB125_1
-; RV32IA-NEXT: # %bb.4:
-; RV32IA-NEXT: srl a0, a3, a0
-; RV32IA-NEXT: ret
+; RV32IA-NOZACAS-LABEL: atomicrmw_umin_i16_monotonic:
+; RV32IA-NOZACAS: # %bb.0:
+; RV32IA-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-NOZACAS-NEXT: lui a3, 16
+; RV32IA-NOZACAS-NEXT: addi a3, a3, -1
+; RV32IA-NOZACAS-NEXT: sll a4, a3, a0
+; RV32IA-NOZACAS-NEXT: and a1, a1, a3
+; RV32IA-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-NOZACAS-NEXT: .LBB125_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-NOZACAS-NEXT: lr.w a3, (a2)
+; RV32IA-NOZACAS-NEXT: and a6, a3, a4
+; RV32IA-NOZACAS-NEXT: mv a5, a3
+; RV32IA-NOZACAS-NEXT: bgeu a1, a6, .LBB125_3
+; RV32IA-NOZACAS-NEXT: # %bb.2: # in Loop: Header=BB125_1 Depth=1
+; RV32IA-NOZACAS-NEXT: xor a5, a3, a1
+; RV32IA-NOZACAS-NEXT: and a5, a5, a4
+; RV32IA-NOZACAS-NEXT: xor a5, a3, a5
+; RV32IA-NOZACAS-NEXT: .LBB125_3: # in Loop: Header=BB125_1 Depth=1
+; RV32IA-NOZACAS-NEXT: sc.w a5, a5, (a2)
+; RV32IA-NOZACAS-NEXT: bnez a5, .LBB125_1
+; RV32IA-NOZACAS-NEXT: # %bb.4:
+; RV32IA-NOZACAS-NEXT: srl a0, a3, a0
+; RV32IA-NOZACAS-NEXT: ret
;
; RV64I-LABEL: atomicrmw_umin_i16_monotonic:
; RV64I: # %bb.0:
@@ -19090,6 +24930,31 @@ define i16 @atomicrmw_umin_i16_monotonic(ptr %a, i16 %b) nounwind {
; RV64IA-NOZACAS-NEXT: srlw a0, a3, a0
; RV64IA-NOZACAS-NEXT: ret
;
+; RV32IA-ZACAS-LABEL: atomicrmw_umin_i16_monotonic:
+; RV32IA-ZACAS: # %bb.0:
+; RV32IA-ZACAS-NEXT: andi a2, a0, -4
+; RV32IA-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-ZACAS-NEXT: lui a3, 16
+; RV32IA-ZACAS-NEXT: addi a3, a3, -1
+; RV32IA-ZACAS-NEXT: sll a4, a3, a0
+; RV32IA-ZACAS-NEXT: and a1, a1, a3
+; RV32IA-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-ZACAS-NEXT: .LBB125_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-ZACAS-NEXT: lr.w a3, (a2)
+; RV32IA-ZACAS-NEXT: and a6, a3, a4
+; RV32IA-ZACAS-NEXT: mv a5, a3
+; RV32IA-ZACAS-NEXT: bgeu a1, a6, .LBB125_3
+; RV32IA-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB125_1 Depth=1
+; RV32IA-ZACAS-NEXT: xor a5, a3, a1
+; RV32IA-ZACAS-NEXT: and a5, a5, a4
+; RV32IA-ZACAS-NEXT: xor a5, a3, a5
+; RV32IA-ZACAS-NEXT: .LBB125_3: # in Loop: Header=BB125_1 Depth=1
+; RV32IA-ZACAS-NEXT: sc.w a5, a5, (a2)
+; RV32IA-ZACAS-NEXT: bnez a5, .LBB125_1
+; RV32IA-ZACAS-NEXT: # %bb.4:
+; RV32IA-ZACAS-NEXT: srl a0, a3, a0
+; RV32IA-ZACAS-NEXT: ret
+;
; RV64IA-ZACAS-LABEL: atomicrmw_umin_i16_monotonic:
; RV64IA-ZACAS: # %bb.0:
; RV64IA-ZACAS-NEXT: andi a2, a0, -4
@@ -19115,6 +24980,16 @@ define i16 @atomicrmw_umin_i16_monotonic(ptr %a, i16 %b) nounwind {
; RV64IA-ZACAS-NEXT: srlw a0, a3, a0
; RV64IA-ZACAS-NEXT: ret
;
+; RV32IA-WMO-ZABHA-LABEL: atomicrmw_umin_i16_monotonic:
+; RV32IA-WMO-ZABHA: # %bb.0:
+; RV32IA-WMO-ZABHA-NEXT: amominu.h a0, a1, (a0)
+; RV32IA-WMO-ZABHA-NEXT: ret
+;
+; RV32IA-TSO-ZABHA-LABEL: atomicrmw_umin_i16_monotonic:
+; RV32IA-TSO-ZABHA: # %bb.0:
+; RV32IA-TSO-ZABHA-NEXT: amominu.h a0, a1, (a0)
+; RV32IA-TSO-ZABHA-NEXT: ret
+;
; RV64IA-WMO-ZABHA-LABEL: atomicrmw_umin_i16_monotonic:
; RV64IA-WMO-ZABHA: # %bb.0:
; RV64IA-WMO-ZABHA-NEXT: amominu.h a0, a1, (a0)
@@ -19173,55 +25048,55 @@ define i16 @atomicrmw_umin_i16_acquire(ptr %a, i16 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 32
; RV32I-NEXT: ret
;
-; RV32IA-WMO-LABEL: atomicrmw_umin_i16_acquire:
-; RV32IA-WMO: # %bb.0:
-; RV32IA-WMO-NEXT: andi a2, a0, -4
-; RV32IA-WMO-NEXT: slli a0, a0, 3
-; RV32IA-WMO-NEXT: lui a3, 16
-; RV32IA-WMO-NEXT: addi a3, a3, -1
-; RV32IA-WMO-NEXT: sll a4, a3, a0
-; RV32IA-WMO-NEXT: and a1, a1, a3
-; RV32IA-WMO-NEXT: sll a1, a1, a0
-; RV32IA-WMO-NEXT: .LBB126_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-WMO-NEXT: lr.w.aq a3, (a2)
-; RV32IA-WMO-NEXT: and a6, a3, a4
-; RV32IA-WMO-NEXT: mv a5, a3
-; RV32IA-WMO-NEXT: bgeu a1, a6, .LBB126_3
-; RV32IA-WMO-NEXT: # %bb.2: # in Loop: Header=BB126_1 Depth=1
-; RV32IA-WMO-NEXT: xor a5, a3, a1
-; RV32IA-WMO-NEXT: and a5, a5, a4
-; RV32IA-WMO-NEXT: xor a5, a3, a5
-; RV32IA-WMO-NEXT: .LBB126_3: # in Loop: Header=BB126_1 Depth=1
-; RV32IA-WMO-NEXT: sc.w a5, a5, (a2)
-; RV32IA-WMO-NEXT: bnez a5, .LBB126_1
-; RV32IA-WMO-NEXT: # %bb.4:
-; RV32IA-WMO-NEXT: srl a0, a3, a0
-; RV32IA-WMO-NEXT: ret
+; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_umin_i16_acquire:
+; RV32IA-WMO-NOZACAS: # %bb.0:
+; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-WMO-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-NOZACAS-NEXT: lui a3, 16
+; RV32IA-WMO-NOZACAS-NEXT: addi a3, a3, -1
+; RV32IA-WMO-NOZACAS-NEXT: sll a4, a3, a0
+; RV32IA-WMO-NOZACAS-NEXT: and a1, a1, a3
+; RV32IA-WMO-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-WMO-NOZACAS-NEXT: .LBB126_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-NOZACAS-NEXT: lr.w.aq a3, (a2)
+; RV32IA-WMO-NOZACAS-NEXT: and a6, a3, a4
+; RV32IA-WMO-NOZACAS-NEXT: mv a5, a3
+; RV32IA-WMO-NOZACAS-NEXT: bgeu a1, a6, .LBB126_3
+; RV32IA-WMO-NOZACAS-NEXT: # %bb.2: # in Loop: Header=BB126_1 Depth=1
+; RV32IA-WMO-NOZACAS-NEXT: xor a5, a3, a1
+; RV32IA-WMO-NOZACAS-NEXT: and a5, a5, a4
+; RV32IA-WMO-NOZACAS-NEXT: xor a5, a3, a5
+; RV32IA-WMO-NOZACAS-NEXT: .LBB126_3: # in Loop: Header=BB126_1 Depth=1
+; RV32IA-WMO-NOZACAS-NEXT: sc.w a5, a5, (a2)
+; RV32IA-WMO-NOZACAS-NEXT: bnez a5, .LBB126_1
+; RV32IA-WMO-NOZACAS-NEXT: # %bb.4:
+; RV32IA-WMO-NOZACAS-NEXT: srl a0, a3, a0
+; RV32IA-WMO-NOZACAS-NEXT: ret
;
-; RV32IA-TSO-LABEL: atomicrmw_umin_i16_acquire:
-; RV32IA-TSO: # %bb.0:
-; RV32IA-TSO-NEXT: andi a2, a0, -4
-; RV32IA-TSO-NEXT: slli a0, a0, 3
-; RV32IA-TSO-NEXT: lui a3, 16
-; RV32IA-TSO-NEXT: addi a3, a3, -1
-; RV32IA-TSO-NEXT: sll a4, a3, a0
-; RV32IA-TSO-NEXT: and a1, a1, a3
-; RV32IA-TSO-NEXT: sll a1, a1, a0
-; RV32IA-TSO-NEXT: .LBB126_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-TSO-NEXT: lr.w a3, (a2)
-; RV32IA-TSO-NEXT: and a6, a3, a4
-; RV32IA-TSO-NEXT: mv a5, a3
-; RV32IA-TSO-NEXT: bgeu a1, a6, .LBB126_3
-; RV32IA-TSO-NEXT: # %bb.2: # in Loop: Header=BB126_1 Depth=1
-; RV32IA-TSO-NEXT: xor a5, a3, a1
-; RV32IA-TSO-NEXT: and a5, a5, a4
-; RV32IA-TSO-NEXT: xor a5, a3, a5
-; RV32IA-TSO-NEXT: .LBB126_3: # in Loop: Header=BB126_1 Depth=1
-; RV32IA-TSO-NEXT: sc.w a5, a5, (a2)
-; RV32IA-TSO-NEXT: bnez a5, .LBB126_1
-; RV32IA-TSO-NEXT: # %bb.4:
-; RV32IA-TSO-NEXT: srl a0, a3, a0
-; RV32IA-TSO-NEXT: ret
+; RV32IA-TSO-NOZACAS-LABEL: atomicrmw_umin_i16_acquire:
+; RV32IA-TSO-NOZACAS: # %bb.0:
+; RV32IA-TSO-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-TSO-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-NOZACAS-NEXT: lui a3, 16
+; RV32IA-TSO-NOZACAS-NEXT: addi a3, a3, -1
+; RV32IA-TSO-NOZACAS-NEXT: sll a4, a3, a0
+; RV32IA-TSO-NOZACAS-NEXT: and a1, a1, a3
+; RV32IA-TSO-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-TSO-NOZACAS-NEXT: .LBB126_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-NOZACAS-NEXT: lr.w a3, (a2)
+; RV32IA-TSO-NOZACAS-NEXT: and a6, a3, a4
+; RV32IA-TSO-NOZACAS-NEXT: mv a5, a3
+; RV32IA-TSO-NOZACAS-NEXT: bgeu a1, a6, .LBB126_3
+; RV32IA-TSO-NOZACAS-NEXT: # %bb.2: # in Loop: Header=BB126_1 Depth=1
+; RV32IA-TSO-NOZACAS-NEXT: xor a5, a3, a1
+; RV32IA-TSO-NOZACAS-NEXT: and a5, a5, a4
+; RV32IA-TSO-NOZACAS-NEXT: xor a5, a3, a5
+; RV32IA-TSO-NOZACAS-NEXT: .LBB126_3: # in Loop: Header=BB126_1 Depth=1
+; RV32IA-TSO-NOZACAS-NEXT: sc.w a5, a5, (a2)
+; RV32IA-TSO-NOZACAS-NEXT: bnez a5, .LBB126_1
+; RV32IA-TSO-NOZACAS-NEXT: # %bb.4:
+; RV32IA-TSO-NOZACAS-NEXT: srl a0, a3, a0
+; RV32IA-TSO-NOZACAS-NEXT: ret
;
; RV64I-LABEL: atomicrmw_umin_i16_acquire:
; RV64I: # %bb.0:
@@ -19317,6 +25192,56 @@ define i16 @atomicrmw_umin_i16_acquire(ptr %a, i16 %b) nounwind {
; RV64IA-TSO-NOZACAS-NEXT: srlw a0, a3, a0
; RV64IA-TSO-NOZACAS-NEXT: ret
;
+; RV32IA-WMO-ZACAS-LABEL: atomicrmw_umin_i16_acquire:
+; RV32IA-WMO-ZACAS: # %bb.0:
+; RV32IA-WMO-ZACAS-NEXT: andi a2, a0, -4
+; RV32IA-WMO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-ZACAS-NEXT: lui a3, 16
+; RV32IA-WMO-ZACAS-NEXT: addi a3, a3, -1
+; RV32IA-WMO-ZACAS-NEXT: sll a4, a3, a0
+; RV32IA-WMO-ZACAS-NEXT: and a1, a1, a3
+; RV32IA-WMO-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-WMO-ZACAS-NEXT: .LBB126_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-ZACAS-NEXT: lr.w.aq a3, (a2)
+; RV32IA-WMO-ZACAS-NEXT: and a6, a3, a4
+; RV32IA-WMO-ZACAS-NEXT: mv a5, a3
+; RV32IA-WMO-ZACAS-NEXT: bgeu a1, a6, .LBB126_3
+; RV32IA-WMO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB126_1 Depth=1
+; RV32IA-WMO-ZACAS-NEXT: xor a5, a3, a1
+; RV32IA-WMO-ZACAS-NEXT: and a5, a5, a4
+; RV32IA-WMO-ZACAS-NEXT: xor a5, a3, a5
+; RV32IA-WMO-ZACAS-NEXT: .LBB126_3: # in Loop: Header=BB126_1 Depth=1
+; RV32IA-WMO-ZACAS-NEXT: sc.w a5, a5, (a2)
+; RV32IA-WMO-ZACAS-NEXT: bnez a5, .LBB126_1
+; RV32IA-WMO-ZACAS-NEXT: # %bb.4:
+; RV32IA-WMO-ZACAS-NEXT: srl a0, a3, a0
+; RV32IA-WMO-ZACAS-NEXT: ret
+;
+; RV32IA-TSO-ZACAS-LABEL: atomicrmw_umin_i16_acquire:
+; RV32IA-TSO-ZACAS: # %bb.0:
+; RV32IA-TSO-ZACAS-NEXT: andi a2, a0, -4
+; RV32IA-TSO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-ZACAS-NEXT: lui a3, 16
+; RV32IA-TSO-ZACAS-NEXT: addi a3, a3, -1
+; RV32IA-TSO-ZACAS-NEXT: sll a4, a3, a0
+; RV32IA-TSO-ZACAS-NEXT: and a1, a1, a3
+; RV32IA-TSO-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-TSO-ZACAS-NEXT: .LBB126_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-ZACAS-NEXT: lr.w a3, (a2)
+; RV32IA-TSO-ZACAS-NEXT: and a6, a3, a4
+; RV32IA-TSO-ZACAS-NEXT: mv a5, a3
+; RV32IA-TSO-ZACAS-NEXT: bgeu a1, a6, .LBB126_3
+; RV32IA-TSO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB126_1 Depth=1
+; RV32IA-TSO-ZACAS-NEXT: xor a5, a3, a1
+; RV32IA-TSO-ZACAS-NEXT: and a5, a5, a4
+; RV32IA-TSO-ZACAS-NEXT: xor a5, a3, a5
+; RV32IA-TSO-ZACAS-NEXT: .LBB126_3: # in Loop: Header=BB126_1 Depth=1
+; RV32IA-TSO-ZACAS-NEXT: sc.w a5, a5, (a2)
+; RV32IA-TSO-ZACAS-NEXT: bnez a5, .LBB126_1
+; RV32IA-TSO-ZACAS-NEXT: # %bb.4:
+; RV32IA-TSO-ZACAS-NEXT: srl a0, a3, a0
+; RV32IA-TSO-ZACAS-NEXT: ret
+;
; RV64IA-WMO-ZACAS-LABEL: atomicrmw_umin_i16_acquire:
; RV64IA-WMO-ZACAS: # %bb.0:
; RV64IA-WMO-ZACAS-NEXT: andi a2, a0, -4
@@ -19367,6 +25292,16 @@ define i16 @atomicrmw_umin_i16_acquire(ptr %a, i16 %b) nounwind {
; RV64IA-TSO-ZACAS-NEXT: srlw a0, a3, a0
; RV64IA-TSO-ZACAS-NEXT: ret
;
+; RV32IA-WMO-ZABHA-LABEL: atomicrmw_umin_i16_acquire:
+; RV32IA-WMO-ZABHA: # %bb.0:
+; RV32IA-WMO-ZABHA-NEXT: amominu.h.aq a0, a1, (a0)
+; RV32IA-WMO-ZABHA-NEXT: ret
+;
+; RV32IA-TSO-ZABHA-LABEL: atomicrmw_umin_i16_acquire:
+; RV32IA-TSO-ZABHA: # %bb.0:
+; RV32IA-TSO-ZABHA-NEXT: amominu.h a0, a1, (a0)
+; RV32IA-TSO-ZABHA-NEXT: ret
+;
; RV64IA-WMO-ZABHA-LABEL: atomicrmw_umin_i16_acquire:
; RV64IA-WMO-ZABHA: # %bb.0:
; RV64IA-WMO-ZABHA-NEXT: amominu.h.aq a0, a1, (a0)
@@ -19425,55 +25360,55 @@ define i16 @atomicrmw_umin_i16_release(ptr %a, i16 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 32
; RV32I-NEXT: ret
;
-; RV32IA-WMO-LABEL: atomicrmw_umin_i16_release:
-; RV32IA-WMO: # %bb.0:
-; RV32IA-WMO-NEXT: andi a2, a0, -4
-; RV32IA-WMO-NEXT: slli a0, a0, 3
-; RV32IA-WMO-NEXT: lui a3, 16
-; RV32IA-WMO-NEXT: addi a3, a3, -1
-; RV32IA-WMO-NEXT: sll a4, a3, a0
-; RV32IA-WMO-NEXT: and a1, a1, a3
-; RV32IA-WMO-NEXT: sll a1, a1, a0
-; RV32IA-WMO-NEXT: .LBB127_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-WMO-NEXT: lr.w a3, (a2)
-; RV32IA-WMO-NEXT: and a6, a3, a4
-; RV32IA-WMO-NEXT: mv a5, a3
-; RV32IA-WMO-NEXT: bgeu a1, a6, .LBB127_3
-; RV32IA-WMO-NEXT: # %bb.2: # in Loop: Header=BB127_1 Depth=1
-; RV32IA-WMO-NEXT: xor a5, a3, a1
-; RV32IA-WMO-NEXT: and a5, a5, a4
-; RV32IA-WMO-NEXT: xor a5, a3, a5
-; RV32IA-WMO-NEXT: .LBB127_3: # in Loop: Header=BB127_1 Depth=1
-; RV32IA-WMO-NEXT: sc.w.rl a5, a5, (a2)
-; RV32IA-WMO-NEXT: bnez a5, .LBB127_1
-; RV32IA-WMO-NEXT: # %bb.4:
-; RV32IA-WMO-NEXT: srl a0, a3, a0
-; RV32IA-WMO-NEXT: ret
+; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_umin_i16_release:
+; RV32IA-WMO-NOZACAS: # %bb.0:
+; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-WMO-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-NOZACAS-NEXT: lui a3, 16
+; RV32IA-WMO-NOZACAS-NEXT: addi a3, a3, -1
+; RV32IA-WMO-NOZACAS-NEXT: sll a4, a3, a0
+; RV32IA-WMO-NOZACAS-NEXT: and a1, a1, a3
+; RV32IA-WMO-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-WMO-NOZACAS-NEXT: .LBB127_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-NOZACAS-NEXT: lr.w a3, (a2)
+; RV32IA-WMO-NOZACAS-NEXT: and a6, a3, a4
+; RV32IA-WMO-NOZACAS-NEXT: mv a5, a3
+; RV32IA-WMO-NOZACAS-NEXT: bgeu a1, a6, .LBB127_3
+; RV32IA-WMO-NOZACAS-NEXT: # %bb.2: # in Loop: Header=BB127_1 Depth=1
+; RV32IA-WMO-NOZACAS-NEXT: xor a5, a3, a1
+; RV32IA-WMO-NOZACAS-NEXT: and a5, a5, a4
+; RV32IA-WMO-NOZACAS-NEXT: xor a5, a3, a5
+; RV32IA-WMO-NOZACAS-NEXT: .LBB127_3: # in Loop: Header=BB127_1 Depth=1
+; RV32IA-WMO-NOZACAS-NEXT: sc.w.rl a5, a5, (a2)
+; RV32IA-WMO-NOZACAS-NEXT: bnez a5, .LBB127_1
+; RV32IA-WMO-NOZACAS-NEXT: # %bb.4:
+; RV32IA-WMO-NOZACAS-NEXT: srl a0, a3, a0
+; RV32IA-WMO-NOZACAS-NEXT: ret
;
-; RV32IA-TSO-LABEL: atomicrmw_umin_i16_release:
-; RV32IA-TSO: # %bb.0:
-; RV32IA-TSO-NEXT: andi a2, a0, -4
-; RV32IA-TSO-NEXT: slli a0, a0, 3
-; RV32IA-TSO-NEXT: lui a3, 16
-; RV32IA-TSO-NEXT: addi a3, a3, -1
-; RV32IA-TSO-NEXT: sll a4, a3, a0
-; RV32IA-TSO-NEXT: and a1, a1, a3
-; RV32IA-TSO-NEXT: sll a1, a1, a0
-; RV32IA-TSO-NEXT: .LBB127_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-TSO-NEXT: lr.w a3, (a2)
-; RV32IA-TSO-NEXT: and a6, a3, a4
-; RV32IA-TSO-NEXT: mv a5, a3
-; RV32IA-TSO-NEXT: bgeu a1, a6, .LBB127_3
-; RV32IA-TSO-NEXT: # %bb.2: # in Loop: Header=BB127_1 Depth=1
-; RV32IA-TSO-NEXT: xor a5, a3, a1
-; RV32IA-TSO-NEXT: and a5, a5, a4
-; RV32IA-TSO-NEXT: xor a5, a3, a5
-; RV32IA-TSO-NEXT: .LBB127_3: # in Loop: Header=BB127_1 Depth=1
-; RV32IA-TSO-NEXT: sc.w a5, a5, (a2)
-; RV32IA-TSO-NEXT: bnez a5, .LBB127_1
-; RV32IA-TSO-NEXT: # %bb.4:
-; RV32IA-TSO-NEXT: srl a0, a3, a0
-; RV32IA-TSO-NEXT: ret
+; RV32IA-TSO-NOZACAS-LABEL: atomicrmw_umin_i16_release:
+; RV32IA-TSO-NOZACAS: # %bb.0:
+; RV32IA-TSO-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-TSO-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-NOZACAS-NEXT: lui a3, 16
+; RV32IA-TSO-NOZACAS-NEXT: addi a3, a3, -1
+; RV32IA-TSO-NOZACAS-NEXT: sll a4, a3, a0
+; RV32IA-TSO-NOZACAS-NEXT: and a1, a1, a3
+; RV32IA-TSO-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-TSO-NOZACAS-NEXT: .LBB127_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-NOZACAS-NEXT: lr.w a3, (a2)
+; RV32IA-TSO-NOZACAS-NEXT: and a6, a3, a4
+; RV32IA-TSO-NOZACAS-NEXT: mv a5, a3
+; RV32IA-TSO-NOZACAS-NEXT: bgeu a1, a6, .LBB127_3
+; RV32IA-TSO-NOZACAS-NEXT: # %bb.2: # in Loop: Header=BB127_1 Depth=1
+; RV32IA-TSO-NOZACAS-NEXT: xor a5, a3, a1
+; RV32IA-TSO-NOZACAS-NEXT: and a5, a5, a4
+; RV32IA-TSO-NOZACAS-NEXT: xor a5, a3, a5
+; RV32IA-TSO-NOZACAS-NEXT: .LBB127_3: # in Loop: Header=BB127_1 Depth=1
+; RV32IA-TSO-NOZACAS-NEXT: sc.w a5, a5, (a2)
+; RV32IA-TSO-NOZACAS-NEXT: bnez a5, .LBB127_1
+; RV32IA-TSO-NOZACAS-NEXT: # %bb.4:
+; RV32IA-TSO-NOZACAS-NEXT: srl a0, a3, a0
+; RV32IA-TSO-NOZACAS-NEXT: ret
;
; RV64I-LABEL: atomicrmw_umin_i16_release:
; RV64I: # %bb.0:
@@ -19569,6 +25504,56 @@ define i16 @atomicrmw_umin_i16_release(ptr %a, i16 %b) nounwind {
; RV64IA-TSO-NOZACAS-NEXT: srlw a0, a3, a0
; RV64IA-TSO-NOZACAS-NEXT: ret
;
+; RV32IA-WMO-ZACAS-LABEL: atomicrmw_umin_i16_release:
+; RV32IA-WMO-ZACAS: # %bb.0:
+; RV32IA-WMO-ZACAS-NEXT: andi a2, a0, -4
+; RV32IA-WMO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-ZACAS-NEXT: lui a3, 16
+; RV32IA-WMO-ZACAS-NEXT: addi a3, a3, -1
+; RV32IA-WMO-ZACAS-NEXT: sll a4, a3, a0
+; RV32IA-WMO-ZACAS-NEXT: and a1, a1, a3
+; RV32IA-WMO-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-WMO-ZACAS-NEXT: .LBB127_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-ZACAS-NEXT: lr.w a3, (a2)
+; RV32IA-WMO-ZACAS-NEXT: and a6, a3, a4
+; RV32IA-WMO-ZACAS-NEXT: mv a5, a3
+; RV32IA-WMO-ZACAS-NEXT: bgeu a1, a6, .LBB127_3
+; RV32IA-WMO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB127_1 Depth=1
+; RV32IA-WMO-ZACAS-NEXT: xor a5, a3, a1
+; RV32IA-WMO-ZACAS-NEXT: and a5, a5, a4
+; RV32IA-WMO-ZACAS-NEXT: xor a5, a3, a5
+; RV32IA-WMO-ZACAS-NEXT: .LBB127_3: # in Loop: Header=BB127_1 Depth=1
+; RV32IA-WMO-ZACAS-NEXT: sc.w.rl a5, a5, (a2)
+; RV32IA-WMO-ZACAS-NEXT: bnez a5, .LBB127_1
+; RV32IA-WMO-ZACAS-NEXT: # %bb.4:
+; RV32IA-WMO-ZACAS-NEXT: srl a0, a3, a0
+; RV32IA-WMO-ZACAS-NEXT: ret
+;
+; RV32IA-TSO-ZACAS-LABEL: atomicrmw_umin_i16_release:
+; RV32IA-TSO-ZACAS: # %bb.0:
+; RV32IA-TSO-ZACAS-NEXT: andi a2, a0, -4
+; RV32IA-TSO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-ZACAS-NEXT: lui a3, 16
+; RV32IA-TSO-ZACAS-NEXT: addi a3, a3, -1
+; RV32IA-TSO-ZACAS-NEXT: sll a4, a3, a0
+; RV32IA-TSO-ZACAS-NEXT: and a1, a1, a3
+; RV32IA-TSO-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-TSO-ZACAS-NEXT: .LBB127_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-ZACAS-NEXT: lr.w a3, (a2)
+; RV32IA-TSO-ZACAS-NEXT: and a6, a3, a4
+; RV32IA-TSO-ZACAS-NEXT: mv a5, a3
+; RV32IA-TSO-ZACAS-NEXT: bgeu a1, a6, .LBB127_3
+; RV32IA-TSO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB127_1 Depth=1
+; RV32IA-TSO-ZACAS-NEXT: xor a5, a3, a1
+; RV32IA-TSO-ZACAS-NEXT: and a5, a5, a4
+; RV32IA-TSO-ZACAS-NEXT: xor a5, a3, a5
+; RV32IA-TSO-ZACAS-NEXT: .LBB127_3: # in Loop: Header=BB127_1 Depth=1
+; RV32IA-TSO-ZACAS-NEXT: sc.w a5, a5, (a2)
+; RV32IA-TSO-ZACAS-NEXT: bnez a5, .LBB127_1
+; RV32IA-TSO-ZACAS-NEXT: # %bb.4:
+; RV32IA-TSO-ZACAS-NEXT: srl a0, a3, a0
+; RV32IA-TSO-ZACAS-NEXT: ret
+;
; RV64IA-WMO-ZACAS-LABEL: atomicrmw_umin_i16_release:
; RV64IA-WMO-ZACAS: # %bb.0:
; RV64IA-WMO-ZACAS-NEXT: andi a2, a0, -4
@@ -19619,6 +25604,16 @@ define i16 @atomicrmw_umin_i16_release(ptr %a, i16 %b) nounwind {
; RV64IA-TSO-ZACAS-NEXT: srlw a0, a3, a0
; RV64IA-TSO-ZACAS-NEXT: ret
;
+; RV32IA-WMO-ZABHA-LABEL: atomicrmw_umin_i16_release:
+; RV32IA-WMO-ZABHA: # %bb.0:
+; RV32IA-WMO-ZABHA-NEXT: amominu.h.rl a0, a1, (a0)
+; RV32IA-WMO-ZABHA-NEXT: ret
+;
+; RV32IA-TSO-ZABHA-LABEL: atomicrmw_umin_i16_release:
+; RV32IA-TSO-ZABHA: # %bb.0:
+; RV32IA-TSO-ZABHA-NEXT: amominu.h a0, a1, (a0)
+; RV32IA-TSO-ZABHA-NEXT: ret
+;
; RV64IA-WMO-ZABHA-LABEL: atomicrmw_umin_i16_release:
; RV64IA-WMO-ZABHA: # %bb.0:
; RV64IA-WMO-ZABHA-NEXT: amominu.h.rl a0, a1, (a0)
@@ -19677,55 +25672,55 @@ define i16 @atomicrmw_umin_i16_acq_rel(ptr %a, i16 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 32
; RV32I-NEXT: ret
;
-; RV32IA-WMO-LABEL: atomicrmw_umin_i16_acq_rel:
-; RV32IA-WMO: # %bb.0:
-; RV32IA-WMO-NEXT: andi a2, a0, -4
-; RV32IA-WMO-NEXT: slli a0, a0, 3
-; RV32IA-WMO-NEXT: lui a3, 16
-; RV32IA-WMO-NEXT: addi a3, a3, -1
-; RV32IA-WMO-NEXT: sll a4, a3, a0
-; RV32IA-WMO-NEXT: and a1, a1, a3
-; RV32IA-WMO-NEXT: sll a1, a1, a0
-; RV32IA-WMO-NEXT: .LBB128_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-WMO-NEXT: lr.w.aq a3, (a2)
-; RV32IA-WMO-NEXT: and a6, a3, a4
-; RV32IA-WMO-NEXT: mv a5, a3
-; RV32IA-WMO-NEXT: bgeu a1, a6, .LBB128_3
-; RV32IA-WMO-NEXT: # %bb.2: # in Loop: Header=BB128_1 Depth=1
-; RV32IA-WMO-NEXT: xor a5, a3, a1
-; RV32IA-WMO-NEXT: and a5, a5, a4
-; RV32IA-WMO-NEXT: xor a5, a3, a5
-; RV32IA-WMO-NEXT: .LBB128_3: # in Loop: Header=BB128_1 Depth=1
-; RV32IA-WMO-NEXT: sc.w.rl a5, a5, (a2)
-; RV32IA-WMO-NEXT: bnez a5, .LBB128_1
-; RV32IA-WMO-NEXT: # %bb.4:
-; RV32IA-WMO-NEXT: srl a0, a3, a0
-; RV32IA-WMO-NEXT: ret
+; RV32IA-WMO-NOZACAS-LABEL: atomicrmw_umin_i16_acq_rel:
+; RV32IA-WMO-NOZACAS: # %bb.0:
+; RV32IA-WMO-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-WMO-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-NOZACAS-NEXT: lui a3, 16
+; RV32IA-WMO-NOZACAS-NEXT: addi a3, a3, -1
+; RV32IA-WMO-NOZACAS-NEXT: sll a4, a3, a0
+; RV32IA-WMO-NOZACAS-NEXT: and a1, a1, a3
+; RV32IA-WMO-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-WMO-NOZACAS-NEXT: .LBB128_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-NOZACAS-NEXT: lr.w.aq a3, (a2)
+; RV32IA-WMO-NOZACAS-NEXT: and a6, a3, a4
+; RV32IA-WMO-NOZACAS-NEXT: mv a5, a3
+; RV32IA-WMO-NOZACAS-NEXT: bgeu a1, a6, .LBB128_3
+; RV32IA-WMO-NOZACAS-NEXT: # %bb.2: # in Loop: Header=BB128_1 Depth=1
+; RV32IA-WMO-NOZACAS-NEXT: xor a5, a3, a1
+; RV32IA-WMO-NOZACAS-NEXT: and a5, a5, a4
+; RV32IA-WMO-NOZACAS-NEXT: xor a5, a3, a5
+; RV32IA-WMO-NOZACAS-NEXT: .LBB128_3: # in Loop: Header=BB128_1 Depth=1
+; RV32IA-WMO-NOZACAS-NEXT: sc.w.rl a5, a5, (a2)
+; RV32IA-WMO-NOZACAS-NEXT: bnez a5, .LBB128_1
+; RV32IA-WMO-NOZACAS-NEXT: # %bb.4:
+; RV32IA-WMO-NOZACAS-NEXT: srl a0, a3, a0
+; RV32IA-WMO-NOZACAS-NEXT: ret
;
-; RV32IA-TSO-LABEL: atomicrmw_umin_i16_acq_rel:
-; RV32IA-TSO: # %bb.0:
-; RV32IA-TSO-NEXT: andi a2, a0, -4
-; RV32IA-TSO-NEXT: slli a0, a0, 3
-; RV32IA-TSO-NEXT: lui a3, 16
-; RV32IA-TSO-NEXT: addi a3, a3, -1
-; RV32IA-TSO-NEXT: sll a4, a3, a0
-; RV32IA-TSO-NEXT: and a1, a1, a3
-; RV32IA-TSO-NEXT: sll a1, a1, a0
-; RV32IA-TSO-NEXT: .LBB128_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-TSO-NEXT: lr.w a3, (a2)
-; RV32IA-TSO-NEXT: and a6, a3, a4
-; RV32IA-TSO-NEXT: mv a5, a3
-; RV32IA-TSO-NEXT: bgeu a1, a6, .LBB128_3
-; RV32IA-TSO-NEXT: # %bb.2: # in Loop: Header=BB128_1 Depth=1
-; RV32IA-TSO-NEXT: xor a5, a3, a1
-; RV32IA-TSO-NEXT: and a5, a5, a4
-; RV32IA-TSO-NEXT: xor a5, a3, a5
-; RV32IA-TSO-NEXT: .LBB128_3: # in Loop: Header=BB128_1 Depth=1
-; RV32IA-TSO-NEXT: sc.w a5, a5, (a2)
-; RV32IA-TSO-NEXT: bnez a5, .LBB128_1
-; RV32IA-TSO-NEXT: # %bb.4:
-; RV32IA-TSO-NEXT: srl a0, a3, a0
-; RV32IA-TSO-NEXT: ret
+; RV32IA-TSO-NOZACAS-LABEL: atomicrmw_umin_i16_acq_rel:
+; RV32IA-TSO-NOZACAS: # %bb.0:
+; RV32IA-TSO-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-TSO-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-NOZACAS-NEXT: lui a3, 16
+; RV32IA-TSO-NOZACAS-NEXT: addi a3, a3, -1
+; RV32IA-TSO-NOZACAS-NEXT: sll a4, a3, a0
+; RV32IA-TSO-NOZACAS-NEXT: and a1, a1, a3
+; RV32IA-TSO-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-TSO-NOZACAS-NEXT: .LBB128_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-NOZACAS-NEXT: lr.w a3, (a2)
+; RV32IA-TSO-NOZACAS-NEXT: and a6, a3, a4
+; RV32IA-TSO-NOZACAS-NEXT: mv a5, a3
+; RV32IA-TSO-NOZACAS-NEXT: bgeu a1, a6, .LBB128_3
+; RV32IA-TSO-NOZACAS-NEXT: # %bb.2: # in Loop: Header=BB128_1 Depth=1
+; RV32IA-TSO-NOZACAS-NEXT: xor a5, a3, a1
+; RV32IA-TSO-NOZACAS-NEXT: and a5, a5, a4
+; RV32IA-TSO-NOZACAS-NEXT: xor a5, a3, a5
+; RV32IA-TSO-NOZACAS-NEXT: .LBB128_3: # in Loop: Header=BB128_1 Depth=1
+; RV32IA-TSO-NOZACAS-NEXT: sc.w a5, a5, (a2)
+; RV32IA-TSO-NOZACAS-NEXT: bnez a5, .LBB128_1
+; RV32IA-TSO-NOZACAS-NEXT: # %bb.4:
+; RV32IA-TSO-NOZACAS-NEXT: srl a0, a3, a0
+; RV32IA-TSO-NOZACAS-NEXT: ret
;
; RV64I-LABEL: atomicrmw_umin_i16_acq_rel:
; RV64I: # %bb.0:
@@ -19821,6 +25816,56 @@ define i16 @atomicrmw_umin_i16_acq_rel(ptr %a, i16 %b) nounwind {
; RV64IA-TSO-NOZACAS-NEXT: srlw a0, a3, a0
; RV64IA-TSO-NOZACAS-NEXT: ret
;
+; RV32IA-WMO-ZACAS-LABEL: atomicrmw_umin_i16_acq_rel:
+; RV32IA-WMO-ZACAS: # %bb.0:
+; RV32IA-WMO-ZACAS-NEXT: andi a2, a0, -4
+; RV32IA-WMO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-WMO-ZACAS-NEXT: lui a3, 16
+; RV32IA-WMO-ZACAS-NEXT: addi a3, a3, -1
+; RV32IA-WMO-ZACAS-NEXT: sll a4, a3, a0
+; RV32IA-WMO-ZACAS-NEXT: and a1, a1, a3
+; RV32IA-WMO-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-WMO-ZACAS-NEXT: .LBB128_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-ZACAS-NEXT: lr.w.aq a3, (a2)
+; RV32IA-WMO-ZACAS-NEXT: and a6, a3, a4
+; RV32IA-WMO-ZACAS-NEXT: mv a5, a3
+; RV32IA-WMO-ZACAS-NEXT: bgeu a1, a6, .LBB128_3
+; RV32IA-WMO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB128_1 Depth=1
+; RV32IA-WMO-ZACAS-NEXT: xor a5, a3, a1
+; RV32IA-WMO-ZACAS-NEXT: and a5, a5, a4
+; RV32IA-WMO-ZACAS-NEXT: xor a5, a3, a5
+; RV32IA-WMO-ZACAS-NEXT: .LBB128_3: # in Loop: Header=BB128_1 Depth=1
+; RV32IA-WMO-ZACAS-NEXT: sc.w.rl a5, a5, (a2)
+; RV32IA-WMO-ZACAS-NEXT: bnez a5, .LBB128_1
+; RV32IA-WMO-ZACAS-NEXT: # %bb.4:
+; RV32IA-WMO-ZACAS-NEXT: srl a0, a3, a0
+; RV32IA-WMO-ZACAS-NEXT: ret
+;
+; RV32IA-TSO-ZACAS-LABEL: atomicrmw_umin_i16_acq_rel:
+; RV32IA-TSO-ZACAS: # %bb.0:
+; RV32IA-TSO-ZACAS-NEXT: andi a2, a0, -4
+; RV32IA-TSO-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-TSO-ZACAS-NEXT: lui a3, 16
+; RV32IA-TSO-ZACAS-NEXT: addi a3, a3, -1
+; RV32IA-TSO-ZACAS-NEXT: sll a4, a3, a0
+; RV32IA-TSO-ZACAS-NEXT: and a1, a1, a3
+; RV32IA-TSO-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-TSO-ZACAS-NEXT: .LBB128_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-ZACAS-NEXT: lr.w a3, (a2)
+; RV32IA-TSO-ZACAS-NEXT: and a6, a3, a4
+; RV32IA-TSO-ZACAS-NEXT: mv a5, a3
+; RV32IA-TSO-ZACAS-NEXT: bgeu a1, a6, .LBB128_3
+; RV32IA-TSO-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB128_1 Depth=1
+; RV32IA-TSO-ZACAS-NEXT: xor a5, a3, a1
+; RV32IA-TSO-ZACAS-NEXT: and a5, a5, a4
+; RV32IA-TSO-ZACAS-NEXT: xor a5, a3, a5
+; RV32IA-TSO-ZACAS-NEXT: .LBB128_3: # in Loop: Header=BB128_1 Depth=1
+; RV32IA-TSO-ZACAS-NEXT: sc.w a5, a5, (a2)
+; RV32IA-TSO-ZACAS-NEXT: bnez a5, .LBB128_1
+; RV32IA-TSO-ZACAS-NEXT: # %bb.4:
+; RV32IA-TSO-ZACAS-NEXT: srl a0, a3, a0
+; RV32IA-TSO-ZACAS-NEXT: ret
+;
; RV64IA-WMO-ZACAS-LABEL: atomicrmw_umin_i16_acq_rel:
; RV64IA-WMO-ZACAS: # %bb.0:
; RV64IA-WMO-ZACAS-NEXT: andi a2, a0, -4
@@ -19871,6 +25916,16 @@ define i16 @atomicrmw_umin_i16_acq_rel(ptr %a, i16 %b) nounwind {
; RV64IA-TSO-ZACAS-NEXT: srlw a0, a3, a0
; RV64IA-TSO-ZACAS-NEXT: ret
;
+; RV32IA-WMO-ZABHA-LABEL: atomicrmw_umin_i16_acq_rel:
+; RV32IA-WMO-ZABHA: # %bb.0:
+; RV32IA-WMO-ZABHA-NEXT: amominu.h.aqrl a0, a1, (a0)
+; RV32IA-WMO-ZABHA-NEXT: ret
+;
+; RV32IA-TSO-ZABHA-LABEL: atomicrmw_umin_i16_acq_rel:
+; RV32IA-TSO-ZABHA: # %bb.0:
+; RV32IA-TSO-ZABHA-NEXT: amominu.h a0, a1, (a0)
+; RV32IA-TSO-ZABHA-NEXT: ret
+;
; RV64IA-WMO-ZABHA-LABEL: atomicrmw_umin_i16_acq_rel:
; RV64IA-WMO-ZABHA: # %bb.0:
; RV64IA-WMO-ZABHA-NEXT: amominu.h.aqrl a0, a1, (a0)
@@ -19929,30 +25984,30 @@ define i16 @atomicrmw_umin_i16_seq_cst(ptr %a, i16 %b) nounwind {
; RV32I-NEXT: addi sp, sp, 32
; RV32I-NEXT: ret
;
-; RV32IA-LABEL: atomicrmw_umin_i16_seq_cst:
-; RV32IA: # %bb.0:
-; RV32IA-NEXT: andi a2, a0, -4
-; RV32IA-NEXT: slli a0, a0, 3
-; RV32IA-NEXT: lui a3, 16
-; RV32IA-NEXT: addi a3, a3, -1
-; RV32IA-NEXT: sll a4, a3, a0
-; RV32IA-NEXT: and a1, a1, a3
-; RV32IA-NEXT: sll a1, a1, a0
-; RV32IA-NEXT: .LBB129_1: # =>This Inner Loop Header: Depth=1
-; RV32IA-NEXT: lr.w.aqrl a3, (a2)
-; RV32IA-NEXT: and a6, a3, a4
-; RV32IA-NEXT: mv a5, a3
-; RV32IA-NEXT: bgeu a1, a6, .LBB129_3
-; RV32IA-NEXT: # %bb.2: # in Loop: Header=BB129_1 Depth=1
-; RV32IA-NEXT: xor a5, a3, a1
-; RV32IA-NEXT: and a5, a5, a4
-; RV32IA-NEXT: xor a5, a3, a5
-; RV32IA-NEXT: .LBB129_3: # in Loop: Header=BB129_1 Depth=1
-; RV32IA-NEXT: sc.w.rl a5, a5, (a2)
-; RV32IA-NEXT: bnez a5, .LBB129_1
-; RV32IA-NEXT: # %bb.4:
-; RV32IA-NEXT: srl a0, a3, a0
-; RV32IA-NEXT: ret
+; RV32IA-NOZACAS-LABEL: atomicrmw_umin_i16_seq_cst:
+; RV32IA-NOZACAS: # %bb.0:
+; RV32IA-NOZACAS-NEXT: andi a2, a0, -4
+; RV32IA-NOZACAS-NEXT: slli a0, a0, 3
+; RV32IA-NOZACAS-NEXT: lui a3, 16
+; RV32IA-NOZACAS-NEXT: addi a3, a3, -1
+; RV32IA-NOZACAS-NEXT: sll a4, a3, a0
+; RV32IA-NOZACAS-NEXT: and a1, a1, a3
+; RV32IA-NOZACAS-NEXT: sll a1, a1, a0
+; RV32IA-NOZACAS-NEXT: .LBB129_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-NOZACAS-NEXT: lr.w.aqrl a3, (a2)
+; RV32IA-NOZACAS-NEXT: and a6, a3, a4
+; RV32IA-NOZACAS-NEXT: mv a5, a3
+; RV32IA-NOZACAS-NEXT: bgeu a1, a6, .LBB129_3
+; RV32IA-NOZACAS-NEXT: # %bb.2: # in Loop: Header=BB129_1 Depth=1
+; RV32IA-NOZACAS-NEXT: xor a5, a3, a1
+; RV32IA-NOZACAS-NEXT: and a5, a5, a4
+; RV32IA-NOZACAS-NEXT: xor a5, a3, a5
+; RV32IA-NOZACAS-NEXT: .LBB129_3: # in Loop: Header=BB129_1 Depth=1
+; RV32IA-NOZACAS-NEXT: sc.w.rl a5, a5, (a2)
+; RV32IA-NOZACAS-NEXT: bnez a5, .LBB129_1
+; RV32IA-NOZACAS-NEXT: # %bb.4:
+; RV32IA-NOZACAS-NEXT: srl a0, a3, a0
+; RV32IA-NOZACAS-NEXT: ret
;
; RV64I-LABEL: atomicrmw_umin_i16_seq_cst:
; RV64I: # %bb.0:
@@ -20023,6 +26078,31 @@ define i16 @atomicrmw_umin_i16_seq_cst(ptr %a, i16 %b) nounwind {
; RV64IA-NOZACAS-NEXT: srlw a0, a3, a0
; RV64IA-NOZACAS-NEXT: ret
;
+; RV32IA-ZACAS-LABEL: atomicrmw_umin_i16_seq_cst:
+; RV32IA-ZACAS: # %bb.0:
+; RV32IA-ZACAS-NEXT: andi a2, a0, -4
+; RV32IA-ZACAS-NEXT: slli a0, a0, 3
+; RV32IA-ZACAS-NEXT: lui a3, 16
+; RV32IA-ZACAS-NEXT: addi a3, a3, -1
+; RV32IA-ZACAS-NEXT: sll a4, a3, a0
+; RV32IA-ZACAS-NEXT: and a1, a1, a3
+; RV32IA-ZACAS-NEXT: sll a1, a1, a0
+; RV32IA-ZACAS-NEXT: .LBB129_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-ZACAS-NEXT: lr.w.aqrl a3, (a2)
+; RV32IA-ZACAS-NEXT: and a6, a3, a4
+; RV32IA-ZACAS-NEXT: mv a5, a3
+; RV32IA-ZACAS-NEXT: bgeu a1, a6, .LBB129_3
+; RV32IA-ZACAS-NEXT: # %bb.2: # in Loop: Header=BB129_1 Depth=1
+; RV32IA-ZACAS-NEXT: xor a5, a3, a1
+; RV32IA-ZACAS-NEXT: and a5, a5, a4
+; RV32IA-ZACAS-NEXT: xor a5, a3, a5
+; RV32IA-ZACAS-NEXT: .LBB129_3: # in Loop: Header=BB129_1 Depth=1
+; RV32IA-ZACAS-NEXT: sc.w.rl a5, a5, (a2)
+; RV32IA-ZACAS-NEXT: bnez a5, .LBB129_1
+; RV32IA-ZACAS-NEXT: # %bb.4:
+; RV32IA-ZACAS-NEXT: srl a0, a3, a0
+; RV32IA-ZACAS-NEXT: ret
+;
; RV64IA-ZACAS-LABEL: atomicrmw_umin_i16_seq_cst:
; RV64IA-ZACAS: # %bb.0:
; RV64IA-ZACAS-NEXT: andi a2, a0, -4
@@ -20048,6 +26128,16 @@ define i16 @atomicrmw_umin_i16_seq_cst(ptr %a, i16 %b) nounwind {
; RV64IA-ZACAS-NEXT: srlw a0, a3, a0
; RV64IA-ZACAS-NEXT: ret
;
+; RV32IA-WMO-ZABHA-LABEL: atomicrmw_umin_i16_seq_cst:
+; RV32IA-WMO-ZABHA: # %bb.0:
+; RV32IA-WMO-ZABHA-NEXT: amominu.h.aqrl a0, a1, (a0)
+; RV32IA-WMO-ZABHA-NEXT: ret
+;
+; RV32IA-TSO-ZABHA-LABEL: atomicrmw_umin_i16_seq_cst:
+; RV32IA-TSO-ZABHA: # %bb.0:
+; RV32IA-TSO-ZABHA-NEXT: amominu.h a0, a1, (a0)
+; RV32IA-TSO-ZABHA-NEXT: ret
+;
; RV64IA-WMO-ZABHA-LABEL: atomicrmw_umin_i16_seq_cst:
; RV64IA-WMO-ZABHA: # %bb.0:
; RV64IA-WMO-ZABHA-NEXT: amominu.h.aqrl a0, a1, (a0)
@@ -20992,6 +27082,30 @@ define i32 @atomicrmw_nand_i32_monotonic(ptr %a, i32 %b) nounwind {
; RV64IA-ZACAS-NEXT: # %bb.2: # %atomicrmw.end
; RV64IA-ZACAS-NEXT: ret
;
+; RV32IA-WMO-ZABHA-NOZACAS-LABEL: atomicrmw_nand_i32_monotonic:
+; RV32IA-WMO-ZABHA-NOZACAS: # %bb.0:
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: .LBB150_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: lr.w a2, (a0)
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: and a3, a2, a1
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: not a3, a3
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: sc.w a3, a3, (a0)
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: bnez a3, .LBB150_1
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: # %bb.2:
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: mv a0, a2
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: ret
+;
+; RV32IA-TSO-ZABHA-NOZACAS-LABEL: atomicrmw_nand_i32_monotonic:
+; RV32IA-TSO-ZABHA-NOZACAS: # %bb.0:
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: .LBB150_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: lr.w a2, (a0)
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: and a3, a2, a1
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: not a3, a3
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: sc.w a3, a3, (a0)
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: bnez a3, .LBB150_1
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: # %bb.2:
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: mv a0, a2
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: ret
+;
; RV64IA-WMO-ZABHA-NOZACAS-LABEL: atomicrmw_nand_i32_monotonic:
; RV64IA-WMO-ZABHA-NOZACAS: # %bb.0:
; RV64IA-WMO-ZABHA-NOZACAS-NEXT: .LBB150_1: # =>This Inner Loop Header: Depth=1
@@ -21016,6 +27130,34 @@ define i32 @atomicrmw_nand_i32_monotonic(ptr %a, i32 %b) nounwind {
; RV64IA-TSO-ZABHA-NOZACAS-NEXT: mv a0, a2
; RV64IA-TSO-ZABHA-NOZACAS-NEXT: ret
;
+; RV32IA-WMO-ZABHA-ZACAS-LABEL: atomicrmw_nand_i32_monotonic:
+; RV32IA-WMO-ZABHA-ZACAS: # %bb.0:
+; RV32IA-WMO-ZABHA-ZACAS-NEXT: mv a2, a0
+; RV32IA-WMO-ZABHA-ZACAS-NEXT: lw a0, 0(a0)
+; RV32IA-WMO-ZABHA-ZACAS-NEXT: .LBB150_1: # %atomicrmw.start
+; RV32IA-WMO-ZABHA-ZACAS-NEXT: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-ZABHA-ZACAS-NEXT: mv a3, a0
+; RV32IA-WMO-ZABHA-ZACAS-NEXT: and a4, a0, a1
+; RV32IA-WMO-ZABHA-ZACAS-NEXT: not a4, a4
+; RV32IA-WMO-ZABHA-ZACAS-NEXT: amocas.w a0, a4, (a2)
+; RV32IA-WMO-ZABHA-ZACAS-NEXT: bne a0, a3, .LBB150_1
+; RV32IA-WMO-ZABHA-ZACAS-NEXT: # %bb.2: # %atomicrmw.end
+; RV32IA-WMO-ZABHA-ZACAS-NEXT: ret
+;
+; RV32IA-TSO-ZABHA-ZACAS-LABEL: atomicrmw_nand_i32_monotonic:
+; RV32IA-TSO-ZABHA-ZACAS: # %bb.0:
+; RV32IA-TSO-ZABHA-ZACAS-NEXT: mv a2, a0
+; RV32IA-TSO-ZABHA-ZACAS-NEXT: lw a0, 0(a0)
+; RV32IA-TSO-ZABHA-ZACAS-NEXT: .LBB150_1: # %atomicrmw.start
+; RV32IA-TSO-ZABHA-ZACAS-NEXT: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-ZABHA-ZACAS-NEXT: mv a3, a0
+; RV32IA-TSO-ZABHA-ZACAS-NEXT: and a4, a0, a1
+; RV32IA-TSO-ZABHA-ZACAS-NEXT: not a4, a4
+; RV32IA-TSO-ZABHA-ZACAS-NEXT: amocas.w a0, a4, (a2)
+; RV32IA-TSO-ZABHA-ZACAS-NEXT: bne a0, a3, .LBB150_1
+; RV32IA-TSO-ZABHA-ZACAS-NEXT: # %bb.2: # %atomicrmw.end
+; RV32IA-TSO-ZABHA-ZACAS-NEXT: ret
+;
; RV64IA-WMO-ZABHA-ZACAS-LABEL: atomicrmw_nand_i32_monotonic:
; RV64IA-WMO-ZABHA-ZACAS: # %bb.0:
; RV64IA-WMO-ZABHA-ZACAS-NEXT: mv a2, a0
@@ -21172,6 +27314,30 @@ define i32 @atomicrmw_nand_i32_acquire(ptr %a, i32 %b) nounwind {
; RV64IA-TSO-ZACAS-NEXT: # %bb.2: # %atomicrmw.end
; RV64IA-TSO-ZACAS-NEXT: ret
;
+; RV32IA-WMO-ZABHA-NOZACAS-LABEL: atomicrmw_nand_i32_acquire:
+; RV32IA-WMO-ZABHA-NOZACAS: # %bb.0:
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: .LBB151_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: lr.w.aq a2, (a0)
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: and a3, a2, a1
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: not a3, a3
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: sc.w a3, a3, (a0)
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: bnez a3, .LBB151_1
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: # %bb.2:
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: mv a0, a2
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: ret
+;
+; RV32IA-TSO-ZABHA-NOZACAS-LABEL: atomicrmw_nand_i32_acquire:
+; RV32IA-TSO-ZABHA-NOZACAS: # %bb.0:
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: .LBB151_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: lr.w a2, (a0)
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: and a3, a2, a1
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: not a3, a3
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: sc.w a3, a3, (a0)
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: bnez a3, .LBB151_1
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: # %bb.2:
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: mv a0, a2
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: ret
+;
; RV64IA-WMO-ZABHA-NOZACAS-LABEL: atomicrmw_nand_i32_acquire:
; RV64IA-WMO-ZABHA-NOZACAS: # %bb.0:
; RV64IA-WMO-ZABHA-NOZACAS-NEXT: .LBB151_1: # =>This Inner Loop Header: Depth=1
@@ -21196,6 +27362,34 @@ define i32 @atomicrmw_nand_i32_acquire(ptr %a, i32 %b) nounwind {
; RV64IA-TSO-ZABHA-NOZACAS-NEXT: mv a0, a2
; RV64IA-TSO-ZABHA-NOZACAS-NEXT: ret
;
+; RV32IA-WMO-ZABHA-ZACAS-LABEL: atomicrmw_nand_i32_acquire:
+; RV32IA-WMO-ZABHA-ZACAS: # %bb.0:
+; RV32IA-WMO-ZABHA-ZACAS-NEXT: mv a2, a0
+; RV32IA-WMO-ZABHA-ZACAS-NEXT: lw a0, 0(a0)
+; RV32IA-WMO-ZABHA-ZACAS-NEXT: .LBB151_1: # %atomicrmw.start
+; RV32IA-WMO-ZABHA-ZACAS-NEXT: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-ZABHA-ZACAS-NEXT: mv a3, a0
+; RV32IA-WMO-ZABHA-ZACAS-NEXT: and a4, a0, a1
+; RV32IA-WMO-ZABHA-ZACAS-NEXT: not a4, a4
+; RV32IA-WMO-ZABHA-ZACAS-NEXT: amocas.w.aq a0, a4, (a2)
+; RV32IA-WMO-ZABHA-ZACAS-NEXT: bne a0, a3, .LBB151_1
+; RV32IA-WMO-ZABHA-ZACAS-NEXT: # %bb.2: # %atomicrmw.end
+; RV32IA-WMO-ZABHA-ZACAS-NEXT: ret
+;
+; RV32IA-TSO-ZABHA-ZACAS-LABEL: atomicrmw_nand_i32_acquire:
+; RV32IA-TSO-ZABHA-ZACAS: # %bb.0:
+; RV32IA-TSO-ZABHA-ZACAS-NEXT: mv a2, a0
+; RV32IA-TSO-ZABHA-ZACAS-NEXT: lw a0, 0(a0)
+; RV32IA-TSO-ZABHA-ZACAS-NEXT: .LBB151_1: # %atomicrmw.start
+; RV32IA-TSO-ZABHA-ZACAS-NEXT: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-ZABHA-ZACAS-NEXT: mv a3, a0
+; RV32IA-TSO-ZABHA-ZACAS-NEXT: and a4, a0, a1
+; RV32IA-TSO-ZABHA-ZACAS-NEXT: not a4, a4
+; RV32IA-TSO-ZABHA-ZACAS-NEXT: amocas.w a0, a4, (a2)
+; RV32IA-TSO-ZABHA-ZACAS-NEXT: bne a0, a3, .LBB151_1
+; RV32IA-TSO-ZABHA-ZACAS-NEXT: # %bb.2: # %atomicrmw.end
+; RV32IA-TSO-ZABHA-ZACAS-NEXT: ret
+;
; RV64IA-WMO-ZABHA-ZACAS-LABEL: atomicrmw_nand_i32_acquire:
; RV64IA-WMO-ZABHA-ZACAS: # %bb.0:
; RV64IA-WMO-ZABHA-ZACAS-NEXT: mv a2, a0
@@ -21352,6 +27546,30 @@ define i32 @atomicrmw_nand_i32_release(ptr %a, i32 %b) nounwind {
; RV64IA-TSO-ZACAS-NEXT: # %bb.2: # %atomicrmw.end
; RV64IA-TSO-ZACAS-NEXT: ret
;
+; RV32IA-WMO-ZABHA-NOZACAS-LABEL: atomicrmw_nand_i32_release:
+; RV32IA-WMO-ZABHA-NOZACAS: # %bb.0:
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: .LBB152_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: lr.w a2, (a0)
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: and a3, a2, a1
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: not a3, a3
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: sc.w.rl a3, a3, (a0)
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: bnez a3, .LBB152_1
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: # %bb.2:
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: mv a0, a2
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: ret
+;
+; RV32IA-TSO-ZABHA-NOZACAS-LABEL: atomicrmw_nand_i32_release:
+; RV32IA-TSO-ZABHA-NOZACAS: # %bb.0:
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: .LBB152_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: lr.w a2, (a0)
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: and a3, a2, a1
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: not a3, a3
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: sc.w a3, a3, (a0)
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: bnez a3, .LBB152_1
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: # %bb.2:
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: mv a0, a2
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: ret
+;
; RV64IA-WMO-ZABHA-NOZACAS-LABEL: atomicrmw_nand_i32_release:
; RV64IA-WMO-ZABHA-NOZACAS: # %bb.0:
; RV64IA-WMO-ZABHA-NOZACAS-NEXT: .LBB152_1: # =>This Inner Loop Header: Depth=1
@@ -21376,6 +27594,34 @@ define i32 @atomicrmw_nand_i32_release(ptr %a, i32 %b) nounwind {
; RV64IA-TSO-ZABHA-NOZACAS-NEXT: mv a0, a2
; RV64IA-TSO-ZABHA-NOZACAS-NEXT: ret
;
+; RV32IA-WMO-ZABHA-ZACAS-LABEL: atomicrmw_nand_i32_release:
+; RV32IA-WMO-ZABHA-ZACAS: # %bb.0:
+; RV32IA-WMO-ZABHA-ZACAS-NEXT: mv a2, a0
+; RV32IA-WMO-ZABHA-ZACAS-NEXT: lw a0, 0(a0)
+; RV32IA-WMO-ZABHA-ZACAS-NEXT: .LBB152_1: # %atomicrmw.start
+; RV32IA-WMO-ZABHA-ZACAS-NEXT: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-ZABHA-ZACAS-NEXT: mv a3, a0
+; RV32IA-WMO-ZABHA-ZACAS-NEXT: and a4, a0, a1
+; RV32IA-WMO-ZABHA-ZACAS-NEXT: not a4, a4
+; RV32IA-WMO-ZABHA-ZACAS-NEXT: amocas.w.rl a0, a4, (a2)
+; RV32IA-WMO-ZABHA-ZACAS-NEXT: bne a0, a3, .LBB152_1
+; RV32IA-WMO-ZABHA-ZACAS-NEXT: # %bb.2: # %atomicrmw.end
+; RV32IA-WMO-ZABHA-ZACAS-NEXT: ret
+;
+; RV32IA-TSO-ZABHA-ZACAS-LABEL: atomicrmw_nand_i32_release:
+; RV32IA-TSO-ZABHA-ZACAS: # %bb.0:
+; RV32IA-TSO-ZABHA-ZACAS-NEXT: mv a2, a0
+; RV32IA-TSO-ZABHA-ZACAS-NEXT: lw a0, 0(a0)
+; RV32IA-TSO-ZABHA-ZACAS-NEXT: .LBB152_1: # %atomicrmw.start
+; RV32IA-TSO-ZABHA-ZACAS-NEXT: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-ZABHA-ZACAS-NEXT: mv a3, a0
+; RV32IA-TSO-ZABHA-ZACAS-NEXT: and a4, a0, a1
+; RV32IA-TSO-ZABHA-ZACAS-NEXT: not a4, a4
+; RV32IA-TSO-ZABHA-ZACAS-NEXT: amocas.w a0, a4, (a2)
+; RV32IA-TSO-ZABHA-ZACAS-NEXT: bne a0, a3, .LBB152_1
+; RV32IA-TSO-ZABHA-ZACAS-NEXT: # %bb.2: # %atomicrmw.end
+; RV32IA-TSO-ZABHA-ZACAS-NEXT: ret
+;
; RV64IA-WMO-ZABHA-ZACAS-LABEL: atomicrmw_nand_i32_release:
; RV64IA-WMO-ZABHA-ZACAS: # %bb.0:
; RV64IA-WMO-ZABHA-ZACAS-NEXT: mv a2, a0
@@ -21532,6 +27778,30 @@ define i32 @atomicrmw_nand_i32_acq_rel(ptr %a, i32 %b) nounwind {
; RV64IA-TSO-ZACAS-NEXT: # %bb.2: # %atomicrmw.end
; RV64IA-TSO-ZACAS-NEXT: ret
;
+; RV32IA-WMO-ZABHA-NOZACAS-LABEL: atomicrmw_nand_i32_acq_rel:
+; RV32IA-WMO-ZABHA-NOZACAS: # %bb.0:
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: .LBB153_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: lr.w.aq a2, (a0)
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: and a3, a2, a1
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: not a3, a3
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: sc.w.rl a3, a3, (a0)
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: bnez a3, .LBB153_1
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: # %bb.2:
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: mv a0, a2
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: ret
+;
+; RV32IA-TSO-ZABHA-NOZACAS-LABEL: atomicrmw_nand_i32_acq_rel:
+; RV32IA-TSO-ZABHA-NOZACAS: # %bb.0:
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: .LBB153_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: lr.w a2, (a0)
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: and a3, a2, a1
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: not a3, a3
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: sc.w a3, a3, (a0)
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: bnez a3, .LBB153_1
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: # %bb.2:
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: mv a0, a2
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: ret
+;
; RV64IA-WMO-ZABHA-NOZACAS-LABEL: atomicrmw_nand_i32_acq_rel:
; RV64IA-WMO-ZABHA-NOZACAS: # %bb.0:
; RV64IA-WMO-ZABHA-NOZACAS-NEXT: .LBB153_1: # =>This Inner Loop Header: Depth=1
@@ -21556,6 +27826,34 @@ define i32 @atomicrmw_nand_i32_acq_rel(ptr %a, i32 %b) nounwind {
; RV64IA-TSO-ZABHA-NOZACAS-NEXT: mv a0, a2
; RV64IA-TSO-ZABHA-NOZACAS-NEXT: ret
;
+; RV32IA-WMO-ZABHA-ZACAS-LABEL: atomicrmw_nand_i32_acq_rel:
+; RV32IA-WMO-ZABHA-ZACAS: # %bb.0:
+; RV32IA-WMO-ZABHA-ZACAS-NEXT: mv a2, a0
+; RV32IA-WMO-ZABHA-ZACAS-NEXT: lw a0, 0(a0)
+; RV32IA-WMO-ZABHA-ZACAS-NEXT: .LBB153_1: # %atomicrmw.start
+; RV32IA-WMO-ZABHA-ZACAS-NEXT: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-ZABHA-ZACAS-NEXT: mv a3, a0
+; RV32IA-WMO-ZABHA-ZACAS-NEXT: and a4, a0, a1
+; RV32IA-WMO-ZABHA-ZACAS-NEXT: not a4, a4
+; RV32IA-WMO-ZABHA-ZACAS-NEXT: amocas.w.aqrl a0, a4, (a2)
+; RV32IA-WMO-ZABHA-ZACAS-NEXT: bne a0, a3, .LBB153_1
+; RV32IA-WMO-ZABHA-ZACAS-NEXT: # %bb.2: # %atomicrmw.end
+; RV32IA-WMO-ZABHA-ZACAS-NEXT: ret
+;
+; RV32IA-TSO-ZABHA-ZACAS-LABEL: atomicrmw_nand_i32_acq_rel:
+; RV32IA-TSO-ZABHA-ZACAS: # %bb.0:
+; RV32IA-TSO-ZABHA-ZACAS-NEXT: mv a2, a0
+; RV32IA-TSO-ZABHA-ZACAS-NEXT: lw a0, 0(a0)
+; RV32IA-TSO-ZABHA-ZACAS-NEXT: .LBB153_1: # %atomicrmw.start
+; RV32IA-TSO-ZABHA-ZACAS-NEXT: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-ZABHA-ZACAS-NEXT: mv a3, a0
+; RV32IA-TSO-ZABHA-ZACAS-NEXT: and a4, a0, a1
+; RV32IA-TSO-ZABHA-ZACAS-NEXT: not a4, a4
+; RV32IA-TSO-ZABHA-ZACAS-NEXT: amocas.w a0, a4, (a2)
+; RV32IA-TSO-ZABHA-ZACAS-NEXT: bne a0, a3, .LBB153_1
+; RV32IA-TSO-ZABHA-ZACAS-NEXT: # %bb.2: # %atomicrmw.end
+; RV32IA-TSO-ZABHA-ZACAS-NEXT: ret
+;
; RV64IA-WMO-ZABHA-ZACAS-LABEL: atomicrmw_nand_i32_acq_rel:
; RV64IA-WMO-ZABHA-ZACAS: # %bb.0:
; RV64IA-WMO-ZABHA-ZACAS-NEXT: mv a2, a0
@@ -21692,6 +27990,30 @@ define i32 @atomicrmw_nand_i32_seq_cst(ptr %a, i32 %b) nounwind {
; RV64IA-TSO-ZACAS-NEXT: # %bb.2: # %atomicrmw.end
; RV64IA-TSO-ZACAS-NEXT: ret
;
+; RV32IA-WMO-ZABHA-NOZACAS-LABEL: atomicrmw_nand_i32_seq_cst:
+; RV32IA-WMO-ZABHA-NOZACAS: # %bb.0:
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: .LBB154_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: lr.w.aqrl a2, (a0)
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: and a3, a2, a1
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: not a3, a3
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: sc.w.rl a3, a3, (a0)
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: bnez a3, .LBB154_1
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: # %bb.2:
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: mv a0, a2
+; RV32IA-WMO-ZABHA-NOZACAS-NEXT: ret
+;
+; RV32IA-TSO-ZABHA-NOZACAS-LABEL: atomicrmw_nand_i32_seq_cst:
+; RV32IA-TSO-ZABHA-NOZACAS: # %bb.0:
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: .LBB154_1: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: lr.w.aqrl a2, (a0)
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: and a3, a2, a1
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: not a3, a3
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: sc.w.rl a3, a3, (a0)
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: bnez a3, .LBB154_1
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: # %bb.2:
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: mv a0, a2
+; RV32IA-TSO-ZABHA-NOZACAS-NEXT: ret
+;
; RV64IA-WMO-ZABHA-NOZACAS-LABEL: atomicrmw_nand_i32_seq_cst:
; RV64IA-WMO-ZABHA-NOZACAS: # %bb.0:
; RV64IA-WMO-ZABHA-NOZACAS-NEXT: .LBB154_1: # =>This Inner Loop Header: Depth=1
@@ -21716,6 +28038,36 @@ define i32 @atomicrmw_nand_i32_seq_cst(ptr %a, i32 %b) nounwind {
; RV64IA-TSO-ZABHA-NOZACAS-NEXT: mv a0, a2
; RV64IA-TSO-ZABHA-NOZACAS-NEXT: ret
;
+; RV32IA-WMO-ZABHA-ZACAS-LABEL: atomicrmw_nand_i32_seq_cst:
+; RV32IA-WMO-ZABHA-ZACAS: # %bb.0:
+; RV32IA-WMO-ZABHA-ZACAS-NEXT: mv a2, a0
+; RV32IA-WMO-ZABHA-ZACAS-NEXT: lw a0, 0(a0)
+; RV32IA-WMO-ZABHA-ZACAS-NEXT: .LBB154_1: # %atomicrmw.start
+; RV32IA-WMO-ZABHA-ZACAS-NEXT: # =>This Inner Loop Header: Depth=1
+; RV32IA-WMO-ZABHA-ZACAS-NEXT: mv a3, a0
+; RV32IA-WMO-ZABHA-ZACAS-NEXT: and a4, a0, a1
+; RV32IA-WMO-ZABHA-ZACAS-NEXT: not a4, a4
+; RV32IA-WMO-ZABHA-ZACAS-NEXT: fence rw, rw
+; RV32IA-WMO-ZABHA-ZACAS-NEXT: amocas.w.aqrl a0, a4, (a2)
+; RV32IA-WMO-ZABHA-ZACAS-NEXT: bne a0, a3, .LBB154_1
+; RV32IA-WMO-ZABHA-ZACAS-NEXT: # %bb.2: # %atomicrmw.end
+; RV32IA-WMO-ZABHA-ZACAS-NEXT: ret
+;
+; RV32IA-TSO-ZABHA-ZACAS-LABEL: atomicrmw_nand_i32_seq_cst:
+; RV32IA-TSO-ZABHA-ZACAS: # %bb.0:
+; RV32IA-TSO-ZABHA-ZACAS-NEXT: mv a2, a0
+; RV32IA-TSO-ZABHA-ZACAS-NEXT: lw a0, 0(a0)
+; RV32IA-TSO-ZABHA-ZACAS-NEXT: .LBB154_1: # %atomicrmw.start
+; RV32IA-TSO-ZABHA-ZACAS-NEXT: # =>This Inner Loop Header: Depth=1
+; RV32IA-TSO-ZABHA-ZACAS-NEXT: mv a3, a0
+; RV32IA-TSO-ZABHA-ZACAS-NEXT: and a4, a0, a1
+; RV32IA-TSO-ZABHA-ZACAS-NEXT: not a4, a4
+; RV32IA-TSO-ZABHA-ZACAS-NEXT: fence rw, rw
+; RV32IA-TSO-ZABHA-ZACAS-NEXT: amocas.w a0, a4, (a2)
+; RV32IA-TSO-ZABHA-ZACAS-NEXT: bne a0, a3, .LBB154_1
+; RV32IA-TSO-ZABHA-ZACAS-NEXT: # %bb.2: # %atomicrmw.end
+; RV32IA-TSO-ZABHA-ZACAS-NEXT: ret
+;
; RV64IA-WMO-ZABHA-ZACAS-LABEL: atomicrmw_nand_i32_seq_cst:
; RV64IA-WMO-ZABHA-ZACAS: # %bb.0:
; RV64IA-WMO-ZABHA-ZACAS-NEXT: mv a2, a0
diff --git a/llvm/test/CodeGen/RISCV/attributes.ll b/llvm/test/CodeGen/RISCV/attributes.ll
index ead255b..f3529b1 100644
--- a/llvm/test/CodeGen/RISCV/attributes.ll
+++ b/llvm/test/CodeGen/RISCV/attributes.ll
@@ -443,7 +443,7 @@
; RV32ZVFBFWMA: .attribute 5, "rv32i2p1_f2p2_zicsr2p0_zfbfmin1p0_zve32f1p0_zve32x1p0_zvfbfmin1p0_zvfbfwma1p0_zvl32b1p0"
; RV32ZVFOFP8MIN: .attribute 5, "rv32i2p1_f2p2_zicsr2p0_zve32f1p0_zve32x1p0_zvfofp8min0p2_zvl32b1p0"
; RV32ZACAS: .attribute 5, "rv32i2p1_zaamo1p0_zacas1p0"
-; RV32ZALASR: .attribute 5, "rv32i2p1_zalasr0p1"
+; RV32ZALASR: .attribute 5, "rv32i2p1_zalasr0p9"
; RV32ZAMA16B: .attribute 5, "rv32i2p1_zama16b1p0"
; RV32ZICFILP: .attribute 5, "rv32i2p1_zicfilp1p0_zicsr2p0"
; RV32ZABHA: .attribute 5, "rv32i2p1_zaamo1p0_zabha1p0"
@@ -590,8 +590,8 @@
; RV64ZVFBFWMA: .attribute 5, "rv64i2p1_f2p2_zicsr2p0_zfbfmin1p0_zve32f1p0_zve32x1p0_zvfbfmin1p0_zvfbfwma1p0_zvl32b1p0"
; RV64ZVFOFP8MIN: .attribute 5, "rv64i2p1_f2p2_zicsr2p0_zve32f1p0_zve32x1p0_zvfofp8min0p2_zvl32b1p0"
; RV64ZACAS: .attribute 5, "rv64i2p1_zaamo1p0_zacas1p0"
-; RV64ZALASR: .attribute 5, "rv64i2p1_zalasr0p1"
-; RV64ZALASRA: .attribute 5, "rv64i2p1_a2p1_zaamo1p0_zalasr0p1_zalrsc1p0"
+; RV64ZALASR: .attribute 5, "rv64i2p1_zalasr0p9"
+; RV64ZALASRA: .attribute 5, "rv64i2p1_a2p1_zaamo1p0_zalasr0p9_zalrsc1p0"
; RV64ZICFILP: .attribute 5, "rv64i2p1_zicfilp1p0_zicsr2p0"
; RV64ZABHA: .attribute 5, "rv64i2p1_zaamo1p0_zabha1p0"
; RV64ZVBC32E: .attribute 5, "rv64i2p1_zicsr2p0_zvbc32e0p7_zve32x1p0_zvl32b1p0"
diff --git a/llvm/test/CodeGen/RISCV/float-imm.ll b/llvm/test/CodeGen/RISCV/float-imm.ll
index e4e3454..610c72b 100644
--- a/llvm/test/CodeGen/RISCV/float-imm.ll
+++ b/llvm/test/CodeGen/RISCV/float-imm.ll
@@ -4,11 +4,10 @@
; RUN: llc -mtriple=riscv64 -mattr=+f -verify-machineinstrs < %s \
; RUN: -target-abi=lp64f | FileCheck %s
; RUN: llc -mtriple=riscv32 -mattr=+zfinx -verify-machineinstrs < %s \
-; RUN: -target-abi=ilp32 | FileCheck --check-prefixes=CHECKZFINX,RV32ZFINX %s
+; RUN: -target-abi=ilp32 | FileCheck --check-prefixes=CHECKZFINX %s
; RUN: llc -mtriple=riscv64 -mattr=+zfinx -verify-machineinstrs < %s \
-; RUN: -target-abi=lp64 | FileCheck --check-prefixes=CHECKZFINX,RV64ZFINX %s
+; RUN: -target-abi=lp64 | FileCheck --check-prefixes=CHECKZFINX %s
-; TODO: constant pool shouldn't be necessary for RV64IF.
define float @float_imm() nounwind {
; CHECK-LABEL: float_imm:
; CHECK: # %bb.0:
@@ -69,6 +68,3 @@ define float @float_negative_zero(ptr %pf) nounwind {
; CHECKZFINX-NEXT: ret
ret float -0.0
}
-;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
-; RV32ZFINX: {{.*}}
-; RV64ZFINX: {{.*}}
diff --git a/llvm/test/CodeGen/RISCV/half-imm.ll b/llvm/test/CodeGen/RISCV/half-imm.ll
index 1dc0da8c..ec1a7a4 100644
--- a/llvm/test/CodeGen/RISCV/half-imm.ll
+++ b/llvm/test/CodeGen/RISCV/half-imm.ll
@@ -5,22 +5,21 @@
; RUN: -target-abi lp64f < %s | FileCheck %s
; RUN: llc -mtriple=riscv32 -mattr=+zhinx -verify-machineinstrs \
; RUN: -target-abi ilp32 < %s \
-; RUN: | FileCheck -check-prefix=RV32IZHINX %s
+; RUN: | FileCheck -check-prefixes=CHECKIZHINX %s
; RUN: llc -mtriple=riscv64 -mattr=+zhinx -verify-machineinstrs \
; RUN: -target-abi lp64 < %s \
-; RUN: | FileCheck -check-prefix=RV64IZHINX %s
+; RUN: | FileCheck -check-prefixes=CHECKIZHINX %s
; RUN: llc -mtriple=riscv32 -mattr=+zfhmin -verify-machineinstrs \
; RUN: -target-abi ilp32f < %s | FileCheck -check-prefixes=CHECKIZFHMIN %s
; RUN: llc -mtriple=riscv64 -mattr=+zfhmin -verify-machineinstrs \
; RUN: -target-abi lp64f < %s | FileCheck -check-prefixes=CHECKIZFHMIN %s
; RUN: llc -mtriple=riscv32 -mattr=+zhinxmin -verify-machineinstrs \
; RUN: -target-abi ilp32 < %s \
-; RUN: | FileCheck -check-prefixes=CHECKIZHINXMIN,RV32IZHINXMIN %s
+; RUN: | FileCheck -check-prefixes=CHECKIZHINXMIN %s
; RUN: llc -mtriple=riscv64 -mattr=+zhinxmin -verify-machineinstrs \
; RUN: -target-abi lp64 < %s \
-; RUN: | FileCheck -check-prefixes=CHECKIZHINXMIN,RV64IZHINXMIN %s
+; RUN: | FileCheck -check-prefixes=CHECKIZHINXMIN %s
-; TODO: constant pool shouldn't be necessary for RV32IZfh and RV64IZfh
define half @half_imm() nounwind {
; CHECK-LABEL: half_imm:
; CHECK: # %bb.0:
@@ -29,19 +28,12 @@ define half @half_imm() nounwind {
; CHECK-NEXT: fmv.h.x fa0, a0
; CHECK-NEXT: ret
;
-; RV32IZHINX-LABEL: half_imm:
-; RV32IZHINX: # %bb.0:
-; RV32IZHINX-NEXT: lui a0, 4
-; RV32IZHINX-NEXT: addi a0, a0, 512
-; RV32IZHINX-NEXT: # kill: def $x10_h killed $x10_h killed $x10
-; RV32IZHINX-NEXT: ret
-;
-; RV64IZHINX-LABEL: half_imm:
-; RV64IZHINX: # %bb.0:
-; RV64IZHINX-NEXT: lui a0, 4
-; RV64IZHINX-NEXT: addi a0, a0, 512
-; RV64IZHINX-NEXT: # kill: def $x10_h killed $x10_h killed $x10
-; RV64IZHINX-NEXT: ret
+; CHECKIZHINX-LABEL: half_imm:
+; CHECKIZHINX: # %bb.0:
+; CHECKIZHINX-NEXT: lui a0, 4
+; CHECKIZHINX-NEXT: addi a0, a0, 512
+; CHECKIZHINX-NEXT: # kill: def $x10_h killed $x10_h killed $x10
+; CHECKIZHINX-NEXT: ret
;
; CHECKIZFHMIN-LABEL: half_imm:
; CHECKIZFHMIN: # %bb.0:
@@ -68,19 +60,12 @@ define half @half_imm_op(half %a) nounwind {
; CHECK-NEXT: fadd.h fa0, fa0, fa5
; CHECK-NEXT: ret
;
-; RV32IZHINX-LABEL: half_imm_op:
-; RV32IZHINX: # %bb.0:
-; RV32IZHINX-NEXT: li a1, 15
-; RV32IZHINX-NEXT: slli a1, a1, 10
-; RV32IZHINX-NEXT: fadd.h a0, a0, a1
-; RV32IZHINX-NEXT: ret
-;
-; RV64IZHINX-LABEL: half_imm_op:
-; RV64IZHINX: # %bb.0:
-; RV64IZHINX-NEXT: li a1, 15
-; RV64IZHINX-NEXT: slli a1, a1, 10
-; RV64IZHINX-NEXT: fadd.h a0, a0, a1
-; RV64IZHINX-NEXT: ret
+; CHECKIZHINX-LABEL: half_imm_op:
+; CHECKIZHINX: # %bb.0:
+; CHECKIZHINX-NEXT: li a1, 15
+; CHECKIZHINX-NEXT: slli a1, a1, 10
+; CHECKIZHINX-NEXT: fadd.h a0, a0, a1
+; CHECKIZHINX-NEXT: ret
;
; CHECKIZFHMIN-LABEL: half_imm_op:
; CHECKIZFHMIN: # %bb.0:
@@ -108,15 +93,10 @@ define half @half_positive_zero(ptr %pf) nounwind {
; CHECK-NEXT: fmv.h.x fa0, zero
; CHECK-NEXT: ret
;
-; RV32IZHINX-LABEL: half_positive_zero:
-; RV32IZHINX: # %bb.0:
-; RV32IZHINX-NEXT: li a0, 0
-; RV32IZHINX-NEXT: ret
-;
-; RV64IZHINX-LABEL: half_positive_zero:
-; RV64IZHINX: # %bb.0:
-; RV64IZHINX-NEXT: li a0, 0
-; RV64IZHINX-NEXT: ret
+; CHECKIZHINX-LABEL: half_positive_zero:
+; CHECKIZHINX: # %bb.0:
+; CHECKIZHINX-NEXT: li a0, 0
+; CHECKIZHINX-NEXT: ret
;
; CHECKIZFHMIN-LABEL: half_positive_zero:
; CHECKIZFHMIN: # %bb.0:
@@ -137,15 +117,10 @@ define half @half_negative_zero(ptr %pf) nounwind {
; CHECK-NEXT: fmv.h.x fa0, a0
; CHECK-NEXT: ret
;
-; RV32IZHINX-LABEL: half_negative_zero:
-; RV32IZHINX: # %bb.0:
-; RV32IZHINX-NEXT: lui a0, 1048568
-; RV32IZHINX-NEXT: ret
-;
-; RV64IZHINX-LABEL: half_negative_zero:
-; RV64IZHINX: # %bb.0:
-; RV64IZHINX-NEXT: lui a0, 1048568
-; RV64IZHINX-NEXT: ret
+; CHECKIZHINX-LABEL: half_negative_zero:
+; CHECKIZHINX: # %bb.0:
+; CHECKIZHINX-NEXT: lui a0, 1048568
+; CHECKIZHINX-NEXT: ret
;
; CHECKIZFHMIN-LABEL: half_negative_zero:
; CHECKIZFHMIN: # %bb.0:
@@ -159,6 +134,3 @@ define half @half_negative_zero(ptr %pf) nounwind {
; CHECKIZHINXMIN-NEXT: ret
ret half -0.0
}
-;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
-; RV32IZHINXMIN: {{.*}}
-; RV64IZHINXMIN: {{.*}}
diff --git a/llvm/test/CodeGen/SPARC/atomicrmw-uinc-udec-wrap.ll b/llvm/test/CodeGen/SPARC/atomicrmw-uinc-udec-wrap.ll
index 380a4a0..d1f1c46 100644
--- a/llvm/test/CodeGen/SPARC/atomicrmw-uinc-udec-wrap.ll
+++ b/llvm/test/CodeGen/SPARC/atomicrmw-uinc-udec-wrap.ll
@@ -5,7 +5,7 @@ define i8 @atomicrmw_uinc_wrap_i8(ptr %ptr, i8 %val) {
; CHECK-LABEL: atomicrmw_uinc_wrap_i8:
; CHECK: .cfi_startproc
; CHECK-NEXT: ! %bb.0:
-; CHECK-NEXT: membar #LoadLoad | #StoreLoad | #LoadStore | #StoreStore
+; CHECK-NEXT: membar #LoadStore | #StoreStore
; CHECK-NEXT: and %o0, -4, %o2
; CHECK-NEXT: mov 3, %o3
; CHECK-NEXT: andn %o3, %o0, %o0
@@ -36,7 +36,7 @@ define i8 @atomicrmw_uinc_wrap_i8(ptr %ptr, i8 %val) {
; CHECK-NEXT: nop
; CHECK-NEXT: ! %bb.2: ! %atomicrmw.end
; CHECK-NEXT: srl %o4, %o0, %o0
-; CHECK-NEXT: membar #LoadLoad | #StoreLoad | #LoadStore | #StoreStore
+; CHECK-NEXT: membar #LoadLoad | #LoadStore
; CHECK-NEXT: retl
; CHECK-NEXT: nop
%result = atomicrmw uinc_wrap ptr %ptr, i8 %val seq_cst
@@ -47,7 +47,7 @@ define i16 @atomicrmw_uinc_wrap_i16(ptr %ptr, i16 %val) {
; CHECK-LABEL: atomicrmw_uinc_wrap_i16:
; CHECK: .cfi_startproc
; CHECK-NEXT: ! %bb.0:
-; CHECK-NEXT: membar #LoadLoad | #StoreLoad | #LoadStore | #StoreStore
+; CHECK-NEXT: membar #LoadStore | #StoreStore
; CHECK-NEXT: and %o0, -4, %o2
; CHECK-NEXT: and %o0, 3, %o0
; CHECK-NEXT: xor %o0, 2, %o0
@@ -79,7 +79,7 @@ define i16 @atomicrmw_uinc_wrap_i16(ptr %ptr, i16 %val) {
; CHECK-NEXT: nop
; CHECK-NEXT: ! %bb.2: ! %atomicrmw.end
; CHECK-NEXT: srl %o5, %o0, %o0
-; CHECK-NEXT: membar #LoadLoad | #StoreLoad | #LoadStore | #StoreStore
+; CHECK-NEXT: membar #LoadLoad | #LoadStore
; CHECK-NEXT: retl
; CHECK-NEXT: nop
%result = atomicrmw uinc_wrap ptr %ptr, i16 %val seq_cst
@@ -90,7 +90,7 @@ define i32 @atomicrmw_uinc_wrap_i32(ptr %ptr, i32 %val) {
; CHECK-LABEL: atomicrmw_uinc_wrap_i32:
; CHECK: .cfi_startproc
; CHECK-NEXT: ! %bb.0:
-; CHECK-NEXT: membar #LoadLoad | #StoreLoad | #LoadStore | #StoreStore
+; CHECK-NEXT: membar #LoadStore | #StoreStore
; CHECK-NEXT: ld [%o0], %o2
; CHECK-NEXT: .LBB2_1: ! %atomicrmw.start
; CHECK-NEXT: ! =>This Inner Loop Header: Depth=1
@@ -106,7 +106,7 @@ define i32 @atomicrmw_uinc_wrap_i32(ptr %ptr, i32 %val) {
; CHECK-NEXT: bne %icc, .LBB2_1
; CHECK-NEXT: nop
; CHECK-NEXT: ! %bb.2: ! %atomicrmw.end
-; CHECK-NEXT: membar #LoadLoad | #StoreLoad | #LoadStore | #StoreStore
+; CHECK-NEXT: membar #LoadLoad | #LoadStore
; CHECK-NEXT: retl
; CHECK-NEXT: mov %o2, %o0
%result = atomicrmw uinc_wrap ptr %ptr, i32 %val seq_cst
@@ -160,7 +160,7 @@ define i8 @atomicrmw_udec_wrap_i8(ptr %ptr, i8 %val) {
; CHECK-LABEL: atomicrmw_udec_wrap_i8:
; CHECK: .cfi_startproc
; CHECK-NEXT: ! %bb.0:
-; CHECK-NEXT: membar #LoadLoad | #StoreLoad | #LoadStore | #StoreStore
+; CHECK-NEXT: membar #LoadStore | #StoreStore
; CHECK-NEXT: and %o0, -4, %o2
; CHECK-NEXT: mov 3, %o3
; CHECK-NEXT: andn %o3, %o0, %o0
@@ -193,7 +193,7 @@ define i8 @atomicrmw_udec_wrap_i8(ptr %ptr, i8 %val) {
; CHECK-NEXT: nop
; CHECK-NEXT: ! %bb.2: ! %atomicrmw.end
; CHECK-NEXT: srl %o5, %o0, %o0
-; CHECK-NEXT: membar #LoadLoad | #StoreLoad | #LoadStore | #StoreStore
+; CHECK-NEXT: membar #LoadLoad | #LoadStore
; CHECK-NEXT: retl
; CHECK-NEXT: nop
%result = atomicrmw udec_wrap ptr %ptr, i8 %val seq_cst
@@ -204,7 +204,7 @@ define i16 @atomicrmw_udec_wrap_i16(ptr %ptr, i16 %val) {
; CHECK-LABEL: atomicrmw_udec_wrap_i16:
; CHECK: .cfi_startproc
; CHECK-NEXT: ! %bb.0:
-; CHECK-NEXT: membar #LoadLoad | #StoreLoad | #LoadStore | #StoreStore
+; CHECK-NEXT: membar #LoadStore | #StoreStore
; CHECK-NEXT: and %o0, -4, %o2
; CHECK-NEXT: and %o0, 3, %o0
; CHECK-NEXT: xor %o0, 2, %o0
@@ -238,7 +238,7 @@ define i16 @atomicrmw_udec_wrap_i16(ptr %ptr, i16 %val) {
; CHECK-NEXT: nop
; CHECK-NEXT: ! %bb.2: ! %atomicrmw.end
; CHECK-NEXT: srl %g2, %o0, %o0
-; CHECK-NEXT: membar #LoadLoad | #StoreLoad | #LoadStore | #StoreStore
+; CHECK-NEXT: membar #LoadLoad | #LoadStore
; CHECK-NEXT: retl
; CHECK-NEXT: nop
%result = atomicrmw udec_wrap ptr %ptr, i16 %val seq_cst
@@ -249,7 +249,7 @@ define i32 @atomicrmw_udec_wrap_i32(ptr %ptr, i32 %val) {
; CHECK-LABEL: atomicrmw_udec_wrap_i32:
; CHECK: .cfi_startproc
; CHECK-NEXT: ! %bb.0:
-; CHECK-NEXT: membar #LoadLoad | #StoreLoad | #LoadStore | #StoreStore
+; CHECK-NEXT: membar #LoadStore | #StoreStore
; CHECK-NEXT: ld [%o0], %o2
; CHECK-NEXT: .LBB6_1: ! %atomicrmw.start
; CHECK-NEXT: ! =>This Inner Loop Header: Depth=1
@@ -267,7 +267,7 @@ define i32 @atomicrmw_udec_wrap_i32(ptr %ptr, i32 %val) {
; CHECK-NEXT: bne %icc, .LBB6_1
; CHECK-NEXT: nop
; CHECK-NEXT: ! %bb.2: ! %atomicrmw.end
-; CHECK-NEXT: membar #LoadLoad | #StoreLoad | #LoadStore | #StoreStore
+; CHECK-NEXT: membar #LoadLoad | #LoadStore
; CHECK-NEXT: retl
; CHECK-NEXT: mov %o2, %o0
%result = atomicrmw udec_wrap ptr %ptr, i32 %val seq_cst
diff --git a/llvm/test/CodeGen/SPARC/atomics-ordering.ll b/llvm/test/CodeGen/SPARC/atomics-ordering.ll
new file mode 100644
index 0000000..7c13ac2
--- /dev/null
+++ b/llvm/test/CodeGen/SPARC/atomics-ordering.ll
@@ -0,0 +1,446 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc < %s -mtriple=sparc -verify-machineinstrs | FileCheck %s --check-prefixes=SPARC32
+; RUN: llc < %s -mtriple=sparc -mcpu=leon4 -verify-machineinstrs | FileCheck %s --check-prefixes=SPARC32-LEON4
+; RUN: llc < %s -mtriple=sparc -mcpu=v9 -verify-machineinstrs | FileCheck %s --check-prefixes=SPARC32-V9
+; RUN: llc < %s -mtriple=sparcv9 -verify-machineinstrs | FileCheck %s --check-prefixes=SPARC64
+
+define i32 @load_acq(ptr %0) nounwind {
+; SPARC32-LABEL: load_acq:
+; SPARC32: ! %bb.0:
+; SPARC32-NEXT: save %sp, -96, %sp
+; SPARC32-NEXT: mov %i0, %o0
+; SPARC32-NEXT: call __atomic_load_4
+; SPARC32-NEXT: mov 2, %o1
+; SPARC32-NEXT: ret
+; SPARC32-NEXT: restore %g0, %o0, %o0
+;
+; SPARC32-LEON4-LABEL: load_acq:
+; SPARC32-LEON4: ! %bb.0:
+; SPARC32-LEON4-NEXT: retl
+; SPARC32-LEON4-NEXT: ld [%o0], %o0
+;
+; SPARC32-V9-LABEL: load_acq:
+; SPARC32-V9: ! %bb.0:
+; SPARC32-V9-NEXT: ld [%o0], %o0
+; SPARC32-V9-NEXT: membar #LoadLoad | #LoadStore
+; SPARC32-V9-NEXT: retl
+; SPARC32-V9-NEXT: nop
+;
+; SPARC64-LABEL: load_acq:
+; SPARC64: ! %bb.0:
+; SPARC64-NEXT: ld [%o0], %o0
+; SPARC64-NEXT: membar #LoadLoad | #LoadStore
+; SPARC64-NEXT: retl
+; SPARC64-NEXT: nop
+ %2 = load atomic i32, ptr %0 acquire, align 4
+ ret i32 %2
+}
+
+define i32 @load_sc(ptr %0) nounwind {
+; SPARC32-LABEL: load_sc:
+; SPARC32: ! %bb.0:
+; SPARC32-NEXT: save %sp, -96, %sp
+; SPARC32-NEXT: mov %i0, %o0
+; SPARC32-NEXT: call __atomic_load_4
+; SPARC32-NEXT: mov 5, %o1
+; SPARC32-NEXT: ret
+; SPARC32-NEXT: restore %g0, %o0, %o0
+;
+; SPARC32-LEON4-LABEL: load_sc:
+; SPARC32-LEON4: ! %bb.0:
+; SPARC32-LEON4-NEXT: retl
+; SPARC32-LEON4-NEXT: ld [%o0], %o0
+;
+; SPARC32-V9-LABEL: load_sc:
+; SPARC32-V9: ! %bb.0:
+; SPARC32-V9-NEXT: ld [%o0], %o0
+; SPARC32-V9-NEXT: membar #LoadLoad | #LoadStore
+; SPARC32-V9-NEXT: retl
+; SPARC32-V9-NEXT: nop
+;
+; SPARC64-LABEL: load_sc:
+; SPARC64: ! %bb.0:
+; SPARC64-NEXT: ld [%o0], %o0
+; SPARC64-NEXT: membar #LoadLoad | #LoadStore
+; SPARC64-NEXT: retl
+; SPARC64-NEXT: nop
+ %2 = load atomic i32, ptr %0 seq_cst, align 4
+ ret i32 %2
+}
+
+define void @store_rel(ptr %0, i32 %1) nounwind {
+; SPARC32-LABEL: store_rel:
+; SPARC32: ! %bb.0:
+; SPARC32-NEXT: save %sp, -96, %sp
+; SPARC32-NEXT: mov %i1, %o1
+; SPARC32-NEXT: mov %i0, %o0
+; SPARC32-NEXT: call __atomic_store_4
+; SPARC32-NEXT: mov 3, %o2
+; SPARC32-NEXT: ret
+; SPARC32-NEXT: restore
+;
+; SPARC32-LEON4-LABEL: store_rel:
+; SPARC32-LEON4: ! %bb.0:
+; SPARC32-LEON4-NEXT: stbar
+; SPARC32-LEON4-NEXT: retl
+; SPARC32-LEON4-NEXT: st %o1, [%o0]
+;
+; SPARC32-V9-LABEL: store_rel:
+; SPARC32-V9: ! %bb.0:
+; SPARC32-V9-NEXT: membar #LoadStore | #StoreStore
+; SPARC32-V9-NEXT: retl
+; SPARC32-V9-NEXT: st %o1, [%o0]
+;
+; SPARC64-LABEL: store_rel:
+; SPARC64: ! %bb.0:
+; SPARC64-NEXT: membar #LoadStore | #StoreStore
+; SPARC64-NEXT: retl
+; SPARC64-NEXT: st %o1, [%o0]
+ store atomic i32 %1, ptr %0 release, align 4
+ ret void
+}
+
+define void @store_sc(ptr %0, i32 %1) nounwind {
+; SPARC32-LABEL: store_sc:
+; SPARC32: ! %bb.0:
+; SPARC32-NEXT: save %sp, -96, %sp
+; SPARC32-NEXT: mov %i1, %o1
+; SPARC32-NEXT: mov %i0, %o0
+; SPARC32-NEXT: call __atomic_store_4
+; SPARC32-NEXT: mov 5, %o2
+; SPARC32-NEXT: ret
+; SPARC32-NEXT: restore
+;
+; SPARC32-LEON4-LABEL: store_sc:
+; SPARC32-LEON4: ! %bb.0:
+; SPARC32-LEON4-NEXT: stbar
+; SPARC32-LEON4-NEXT: st %o1, [%o0]
+; SPARC32-LEON4-NEXT: stbar
+; SPARC32-LEON4-NEXT: ldstub [%sp+-1], %g0
+; SPARC32-LEON4-NEXT: retl
+; SPARC32-LEON4-NEXT: nop
+;
+; SPARC32-V9-LABEL: store_sc:
+; SPARC32-V9: ! %bb.0:
+; SPARC32-V9-NEXT: membar #LoadStore | #StoreStore
+; SPARC32-V9-NEXT: st %o1, [%o0]
+; SPARC32-V9-NEXT: membar #LoadLoad | #StoreLoad | #LoadStore | #StoreStore
+; SPARC32-V9-NEXT: retl
+; SPARC32-V9-NEXT: nop
+;
+; SPARC64-LABEL: store_sc:
+; SPARC64: ! %bb.0:
+; SPARC64-NEXT: membar #LoadStore | #StoreStore
+; SPARC64-NEXT: st %o1, [%o0]
+; SPARC64-NEXT: membar #LoadLoad | #StoreLoad | #LoadStore | #StoreStore
+; SPARC64-NEXT: retl
+; SPARC64-NEXT: nop
+ store atomic i32 %1, ptr %0 seq_cst, align 4
+ ret void
+}
+
+define i32 @rmw_acq(ptr %0, i32 %1) nounwind {
+; SPARC32-LABEL: rmw_acq:
+; SPARC32: ! %bb.0:
+; SPARC32-NEXT: save %sp, -96, %sp
+; SPARC32-NEXT: mov %i1, %o1
+; SPARC32-NEXT: mov %i0, %o0
+; SPARC32-NEXT: call __atomic_exchange_4
+; SPARC32-NEXT: mov 2, %o2
+; SPARC32-NEXT: ret
+; SPARC32-NEXT: restore %g0, %o0, %o0
+;
+; SPARC32-LEON4-LABEL: rmw_acq:
+; SPARC32-LEON4: ! %bb.0:
+; SPARC32-LEON4-NEXT: swap [%o0], %o1
+; SPARC32-LEON4-NEXT: retl
+; SPARC32-LEON4-NEXT: mov %o1, %o0
+;
+; SPARC32-V9-LABEL: rmw_acq:
+; SPARC32-V9: ! %bb.0:
+; SPARC32-V9-NEXT: swap [%o0], %o1
+; SPARC32-V9-NEXT: membar #LoadLoad | #LoadStore
+; SPARC32-V9-NEXT: retl
+; SPARC32-V9-NEXT: mov %o1, %o0
+;
+; SPARC64-LABEL: rmw_acq:
+; SPARC64: ! %bb.0:
+; SPARC64-NEXT: swap [%o0], %o1
+; SPARC64-NEXT: membar #LoadLoad | #LoadStore
+; SPARC64-NEXT: retl
+; SPARC64-NEXT: mov %o1, %o0
+ %3 = atomicrmw xchg ptr %0, i32 %1 acquire, align 4
+ ret i32 %3
+}
+
+define i32 @rmw_rel(ptr %0, i32 %1) nounwind {
+; SPARC32-LABEL: rmw_rel:
+; SPARC32: ! %bb.0:
+; SPARC32-NEXT: save %sp, -96, %sp
+; SPARC32-NEXT: mov %i1, %o1
+; SPARC32-NEXT: mov %i0, %o0
+; SPARC32-NEXT: call __atomic_exchange_4
+; SPARC32-NEXT: mov 3, %o2
+; SPARC32-NEXT: ret
+; SPARC32-NEXT: restore %g0, %o0, %o0
+;
+; SPARC32-LEON4-LABEL: rmw_rel:
+; SPARC32-LEON4: ! %bb.0:
+; SPARC32-LEON4-NEXT: stbar
+; SPARC32-LEON4-NEXT: swap [%o0], %o1
+; SPARC32-LEON4-NEXT: retl
+; SPARC32-LEON4-NEXT: mov %o1, %o0
+;
+; SPARC32-V9-LABEL: rmw_rel:
+; SPARC32-V9: ! %bb.0:
+; SPARC32-V9-NEXT: membar #LoadStore | #StoreStore
+; SPARC32-V9-NEXT: swap [%o0], %o1
+; SPARC32-V9-NEXT: retl
+; SPARC32-V9-NEXT: mov %o1, %o0
+;
+; SPARC64-LABEL: rmw_rel:
+; SPARC64: ! %bb.0:
+; SPARC64-NEXT: membar #LoadStore | #StoreStore
+; SPARC64-NEXT: swap [%o0], %o1
+; SPARC64-NEXT: retl
+; SPARC64-NEXT: mov %o1, %o0
+ %3 = atomicrmw xchg ptr %0, i32 %1 release, align 4
+ ret i32 %3
+}
+
+define i32 @rmw_acq_rel(ptr %0, i32 %1) nounwind {
+; SPARC32-LABEL: rmw_acq_rel:
+; SPARC32: ! %bb.0:
+; SPARC32-NEXT: save %sp, -96, %sp
+; SPARC32-NEXT: mov %i1, %o1
+; SPARC32-NEXT: mov %i0, %o0
+; SPARC32-NEXT: call __atomic_exchange_4
+; SPARC32-NEXT: mov 4, %o2
+; SPARC32-NEXT: ret
+; SPARC32-NEXT: restore %g0, %o0, %o0
+;
+; SPARC32-LEON4-LABEL: rmw_acq_rel:
+; SPARC32-LEON4: ! %bb.0:
+; SPARC32-LEON4-NEXT: stbar
+; SPARC32-LEON4-NEXT: swap [%o0], %o1
+; SPARC32-LEON4-NEXT: retl
+; SPARC32-LEON4-NEXT: mov %o1, %o0
+;
+; SPARC32-V9-LABEL: rmw_acq_rel:
+; SPARC32-V9: ! %bb.0:
+; SPARC32-V9-NEXT: membar #LoadStore | #StoreStore
+; SPARC32-V9-NEXT: swap [%o0], %o1
+; SPARC32-V9-NEXT: membar #LoadLoad | #LoadStore
+; SPARC32-V9-NEXT: retl
+; SPARC32-V9-NEXT: mov %o1, %o0
+;
+; SPARC64-LABEL: rmw_acq_rel:
+; SPARC64: ! %bb.0:
+; SPARC64-NEXT: membar #LoadStore | #StoreStore
+; SPARC64-NEXT: swap [%o0], %o1
+; SPARC64-NEXT: membar #LoadLoad | #LoadStore
+; SPARC64-NEXT: retl
+; SPARC64-NEXT: mov %o1, %o0
+ %3 = atomicrmw xchg ptr %0, i32 %1 acq_rel, align 4
+ ret i32 %3
+}
+
+define i32 @rmw_sc(ptr %0, i32 %1) nounwind {
+; SPARC32-LABEL: rmw_sc:
+; SPARC32: ! %bb.0:
+; SPARC32-NEXT: save %sp, -96, %sp
+; SPARC32-NEXT: mov %i1, %o1
+; SPARC32-NEXT: mov %i0, %o0
+; SPARC32-NEXT: call __atomic_exchange_4
+; SPARC32-NEXT: mov 5, %o2
+; SPARC32-NEXT: ret
+; SPARC32-NEXT: restore %g0, %o0, %o0
+;
+; SPARC32-LEON4-LABEL: rmw_sc:
+; SPARC32-LEON4: ! %bb.0:
+; SPARC32-LEON4-NEXT: stbar
+; SPARC32-LEON4-NEXT: swap [%o0], %o1
+; SPARC32-LEON4-NEXT: retl
+; SPARC32-LEON4-NEXT: mov %o1, %o0
+;
+; SPARC32-V9-LABEL: rmw_sc:
+; SPARC32-V9: ! %bb.0:
+; SPARC32-V9-NEXT: membar #LoadStore | #StoreStore
+; SPARC32-V9-NEXT: swap [%o0], %o1
+; SPARC32-V9-NEXT: membar #LoadLoad | #LoadStore
+; SPARC32-V9-NEXT: retl
+; SPARC32-V9-NEXT: mov %o1, %o0
+;
+; SPARC64-LABEL: rmw_sc:
+; SPARC64: ! %bb.0:
+; SPARC64-NEXT: membar #LoadStore | #StoreStore
+; SPARC64-NEXT: swap [%o0], %o1
+; SPARC64-NEXT: membar #LoadLoad | #LoadStore
+; SPARC64-NEXT: retl
+; SPARC64-NEXT: mov %o1, %o0
+ %3 = atomicrmw xchg ptr %0, i32 %1 seq_cst, align 4
+ ret i32 %3
+}
+
+define i32 @cas_acq(ptr %0, i32 %1, i32 %2) nounwind {
+; SPARC32-LABEL: cas_acq:
+; SPARC32: ! %bb.0:
+; SPARC32-NEXT: save %sp, -96, %sp
+; SPARC32-NEXT: mov %i2, %o2
+; SPARC32-NEXT: mov %i0, %o0
+; SPARC32-NEXT: st %i1, [%fp+-4]
+; SPARC32-NEXT: add %fp, -4, %o1
+; SPARC32-NEXT: mov 2, %o3
+; SPARC32-NEXT: call __atomic_compare_exchange_4
+; SPARC32-NEXT: mov %o3, %o4
+; SPARC32-NEXT: ld [%fp+-4], %i0
+; SPARC32-NEXT: ret
+; SPARC32-NEXT: restore
+;
+; SPARC32-LEON4-LABEL: cas_acq:
+; SPARC32-LEON4: ! %bb.0:
+; SPARC32-LEON4-NEXT: casa [%o0] 10, %o1, %o2
+; SPARC32-LEON4-NEXT: retl
+; SPARC32-LEON4-NEXT: mov %o2, %o0
+;
+; SPARC32-V9-LABEL: cas_acq:
+; SPARC32-V9: ! %bb.0:
+; SPARC32-V9-NEXT: cas [%o0], %o1, %o2
+; SPARC32-V9-NEXT: membar #LoadLoad | #LoadStore
+; SPARC32-V9-NEXT: retl
+; SPARC32-V9-NEXT: mov %o2, %o0
+;
+; SPARC64-LABEL: cas_acq:
+; SPARC64: ! %bb.0:
+; SPARC64-NEXT: cas [%o0], %o1, %o2
+; SPARC64-NEXT: membar #LoadLoad | #LoadStore
+; SPARC64-NEXT: retl
+; SPARC64-NEXT: mov %o2, %o0
+ %4 = cmpxchg ptr %0, i32 %1, i32 %2 acquire acquire, align 4
+ %5 = extractvalue { i32, i1 } %4, 0
+ ret i32 %5
+}
+
+define i32 @cas_rel(ptr %0, i32 %1, i32 %2) nounwind {
+; SPARC32-LABEL: cas_rel:
+; SPARC32: ! %bb.0:
+; SPARC32-NEXT: save %sp, -96, %sp
+; SPARC32-NEXT: mov %i2, %o2
+; SPARC32-NEXT: mov %i0, %o0
+; SPARC32-NEXT: st %i1, [%fp+-4]
+; SPARC32-NEXT: add %fp, -4, %o1
+; SPARC32-NEXT: mov 3, %o3
+; SPARC32-NEXT: call __atomic_compare_exchange_4
+; SPARC32-NEXT: mov %g0, %o4
+; SPARC32-NEXT: ld [%fp+-4], %i0
+; SPARC32-NEXT: ret
+; SPARC32-NEXT: restore
+;
+; SPARC32-LEON4-LABEL: cas_rel:
+; SPARC32-LEON4: ! %bb.0:
+; SPARC32-LEON4-NEXT: stbar
+; SPARC32-LEON4-NEXT: casa [%o0] 10, %o1, %o2
+; SPARC32-LEON4-NEXT: retl
+; SPARC32-LEON4-NEXT: mov %o2, %o0
+;
+; SPARC32-V9-LABEL: cas_rel:
+; SPARC32-V9: ! %bb.0:
+; SPARC32-V9-NEXT: membar #LoadStore | #StoreStore
+; SPARC32-V9-NEXT: cas [%o0], %o1, %o2
+; SPARC32-V9-NEXT: retl
+; SPARC32-V9-NEXT: mov %o2, %o0
+;
+; SPARC64-LABEL: cas_rel:
+; SPARC64: ! %bb.0:
+; SPARC64-NEXT: membar #LoadStore | #StoreStore
+; SPARC64-NEXT: cas [%o0], %o1, %o2
+; SPARC64-NEXT: retl
+; SPARC64-NEXT: mov %o2, %o0
+ %4 = cmpxchg ptr %0, i32 %1, i32 %2 release monotonic, align 4
+ %5 = extractvalue { i32, i1 } %4, 0
+ ret i32 %5
+}
+
+define i32 @cas_acq_rel(ptr %0, i32 %1, i32 %2) nounwind {
+; SPARC32-LABEL: cas_acq_rel:
+; SPARC32: ! %bb.0:
+; SPARC32-NEXT: save %sp, -96, %sp
+; SPARC32-NEXT: mov %i2, %o2
+; SPARC32-NEXT: mov %i0, %o0
+; SPARC32-NEXT: st %i1, [%fp+-4]
+; SPARC32-NEXT: add %fp, -4, %o1
+; SPARC32-NEXT: mov 4, %o3
+; SPARC32-NEXT: call __atomic_compare_exchange_4
+; SPARC32-NEXT: mov 2, %o4
+; SPARC32-NEXT: ld [%fp+-4], %i0
+; SPARC32-NEXT: ret
+; SPARC32-NEXT: restore
+;
+; SPARC32-LEON4-LABEL: cas_acq_rel:
+; SPARC32-LEON4: ! %bb.0:
+; SPARC32-LEON4-NEXT: stbar
+; SPARC32-LEON4-NEXT: casa [%o0] 10, %o1, %o2
+; SPARC32-LEON4-NEXT: retl
+; SPARC32-LEON4-NEXT: mov %o2, %o0
+;
+; SPARC32-V9-LABEL: cas_acq_rel:
+; SPARC32-V9: ! %bb.0:
+; SPARC32-V9-NEXT: membar #LoadStore | #StoreStore
+; SPARC32-V9-NEXT: cas [%o0], %o1, %o2
+; SPARC32-V9-NEXT: membar #LoadLoad | #LoadStore
+; SPARC32-V9-NEXT: retl
+; SPARC32-V9-NEXT: mov %o2, %o0
+;
+; SPARC64-LABEL: cas_acq_rel:
+; SPARC64: ! %bb.0:
+; SPARC64-NEXT: membar #LoadStore | #StoreStore
+; SPARC64-NEXT: cas [%o0], %o1, %o2
+; SPARC64-NEXT: membar #LoadLoad | #LoadStore
+; SPARC64-NEXT: retl
+; SPARC64-NEXT: mov %o2, %o0
+ %4 = cmpxchg ptr %0, i32 %1, i32 %2 acq_rel acquire, align 4
+ %5 = extractvalue { i32, i1 } %4, 0
+ ret i32 %5
+}
+
+define i32 @cas_sc(ptr %0, i32 %1, i32 %2) nounwind {
+; SPARC32-LABEL: cas_sc:
+; SPARC32: ! %bb.0:
+; SPARC32-NEXT: save %sp, -96, %sp
+; SPARC32-NEXT: mov %i2, %o2
+; SPARC32-NEXT: mov %i0, %o0
+; SPARC32-NEXT: st %i1, [%fp+-4]
+; SPARC32-NEXT: add %fp, -4, %o1
+; SPARC32-NEXT: mov 5, %o3
+; SPARC32-NEXT: call __atomic_compare_exchange_4
+; SPARC32-NEXT: mov %o3, %o4
+; SPARC32-NEXT: ld [%fp+-4], %i0
+; SPARC32-NEXT: ret
+; SPARC32-NEXT: restore
+;
+; SPARC32-LEON4-LABEL: cas_sc:
+; SPARC32-LEON4: ! %bb.0:
+; SPARC32-LEON4-NEXT: stbar
+; SPARC32-LEON4-NEXT: casa [%o0] 10, %o1, %o2
+; SPARC32-LEON4-NEXT: retl
+; SPARC32-LEON4-NEXT: mov %o2, %o0
+;
+; SPARC32-V9-LABEL: cas_sc:
+; SPARC32-V9: ! %bb.0:
+; SPARC32-V9-NEXT: membar #LoadStore | #StoreStore
+; SPARC32-V9-NEXT: cas [%o0], %o1, %o2
+; SPARC32-V9-NEXT: membar #LoadLoad | #LoadStore
+; SPARC32-V9-NEXT: retl
+; SPARC32-V9-NEXT: mov %o2, %o0
+;
+; SPARC64-LABEL: cas_sc:
+; SPARC64: ! %bb.0:
+; SPARC64-NEXT: membar #LoadStore | #StoreStore
+; SPARC64-NEXT: cas [%o0], %o1, %o2
+; SPARC64-NEXT: membar #LoadLoad | #LoadStore
+; SPARC64-NEXT: retl
+; SPARC64-NEXT: mov %o2, %o0
+ %4 = cmpxchg ptr %0, i32 %1, i32 %2 seq_cst seq_cst, align 4
+ %5 = extractvalue { i32, i1 } %4, 0
+ ret i32 %5
+}
diff --git a/llvm/test/CodeGen/SPIRV/llc-pipeline.ll b/llvm/test/CodeGen/SPIRV/llc-pipeline.ll
new file mode 100644
index 0000000..3fff2a8
--- /dev/null
+++ b/llvm/test/CodeGen/SPIRV/llc-pipeline.ll
@@ -0,0 +1,214 @@
+; UNSUPPORTED:expensive_checks
+; RUN:llc -O0 -mtriple=spirv-- -disable-verify -debug-pass=Structure < %s 2>&1 \
+; RUN: | FileCheck -match-full-lines -strict-whitespace -check-prefix=SPIRV-O0 %s
+; RUN:llc -O1 -mtriple=spirv-- -disable-verify -debug-pass=Structure < %s 2>&1 \
+; RUN: | FileCheck -match-full-lines -strict-whitespace -check-prefix=SPIRV-Opt %s
+; RUN:llc -O2 -mtriple=spirv-- -disable-verify -debug-pass=Structure < %s 2>&1 \
+; RUN: | FileCheck -match-full-lines -strict-whitespace -check-prefix=SPIRV-Opt %s
+; RUN:llc -O3 -mtriple=spirv-- -disable-verify -debug-pass=Structure < %s 2>&1 \
+; RUN: | FileCheck -match-full-lines -strict-whitespace -check-prefix=SPIRV-Opt %s
+;
+; REQUIRES:asserts
+
+; SPIRV-O0:Target Library Information
+; SPIRV-O0-NEXT:Target Pass Configuration
+; SPIRV-O0-NEXT:Machine Module Information
+; SPIRV-O0-NEXT:Target Transform Information
+; SPIRV-O0-NEXT:Create Garbage Collector Module Metadata
+; SPIRV-O0-NEXT:Assumption Cache Tracker
+; SPIRV-O0-NEXT:Profile summary info
+; SPIRV-O0-NEXT:Machine Branch Probability Analysis
+; SPIRV-O0-NEXT: ModulePass Manager
+; SPIRV-O0-NEXT: Pre-ISel Intrinsic Lowering
+; SPIRV-O0-NEXT: FunctionPass Manager
+; SPIRV-O0-NEXT: Expand large div/rem
+; SPIRV-O0-NEXT: Expand fp
+; SPIRV-O0-NEXT: Lower Garbage Collection Instructions
+; SPIRV-O0-NEXT: Shadow Stack GC Lowering
+; SPIRV-O0-NEXT: Remove unreachable blocks from the CFG
+; SPIRV-O0-NEXT: Instrument function entry/exit with calls to e.g. mcount() (post inlining)
+; SPIRV-O0-NEXT: Scalarize Masked Memory Intrinsics
+; SPIRV-O0-NEXT: Expand reduction intrinsics
+; SPIRV-O0-NEXT: SPIR-V Regularizer
+; SPIRV-O0-NEXT: SPIRV prepare functions
+; SPIRV-O0-NEXT: FunctionPass Manager
+; SPIRV-O0-NEXT: Lower invoke and unwind, for unwindless code generators
+; SPIRV-O0-NEXT: Remove unreachable blocks from the CFG
+; SPIRV-O0-NEXT: SPIRV strip convergent intrinsics
+; SPIRV-O0-NEXT: SPIRV Legalize Implicit Binding
+; SPIRV-O0-NEXT: SPIRV CBuffer Access
+; SPIRV-O0-NEXT: SPIRV emit intrinsics
+; SPIRV-O0-NEXT: FunctionPass Manager
+; SPIRV-O0-NEXT: SPIRV legalize bitcast pass
+; SPIRV-O0-NEXT: Prepare callbr
+; SPIRV-O0-NEXT: Safe Stack instrumentation pass
+; SPIRV-O0-NEXT: Insert stack protectors
+; SPIRV-O0-NEXT: Analysis containing CSE Info
+; SPIRV-O0-NEXT: IRTranslator
+; SPIRV-O0-NEXT: Analysis for ComputingKnownBits
+; SPIRV-O0-NEXT: MachineDominator Tree Construction
+; SPIRV-O0-NEXT: SPIRVPreLegalizerCombiner
+; SPIRV-O0-NEXT: SPIRV pre legalizer
+; SPIRV-O0-NEXT: Analysis containing CSE Info
+; SPIRV-O0-NEXT: Legalizer
+; SPIRV-O0-NEXT: SPIRV post legalizer
+; SPIRV-O0-NEXT: Analysis for ComputingKnownBits
+; SPIRV-O0-NEXT: Dominator Tree Construction
+; SPIRV-O0-NEXT: Natural Loop Information
+; SPIRV-O0-NEXT: Lazy Branch Probability Analysis
+; SPIRV-O0-NEXT: Lazy Block Frequency Analysis
+; SPIRV-O0-NEXT: InstructionSelect
+; SPIRV-O0-NEXT: ResetMachineFunction
+; SPIRV-O0-NEXT: Finalize ISel and expand pseudo-instructions
+; SPIRV-O0-NEXT: Local Stack Slot Allocation
+; SPIRV-O0-NEXT: Remove Redundant DEBUG_VALUE analysis
+; SPIRV-O0-NEXT: Fixup Statepoint Caller Saved
+; SPIRV-O0-NEXT: Lazy Machine Block Frequency Analysis
+; SPIRV-O0-NEXT: Machine Optimization Remark Emitter
+; SPIRV-O0-NEXT: Prologue/Epilogue Insertion & Frame Finalization
+; SPIRV-O0-NEXT: Post-RA pseudo instruction expansion pass
+; SPIRV-O0-NEXT: Analyze Machine Code For Garbage Collection
+; SPIRV-O0-NEXT: Insert fentry calls
+; SPIRV-O0-NEXT: Insert XRay ops
+; SPIRV-O0-NEXT: Machine Sanitizer Binary Metadata
+; SPIRV-O0-NEXT: Lazy Machine Block Frequency Analysis
+; SPIRV-O0-NEXT: Machine Optimization Remark Emitter
+; SPIRV-O0-NEXT: Stack Frame Layout Analysis
+; SPIRV-O0-NEXT: SPIRV module analysis
+; SPIRV-O0-NEXT: FunctionPass Manager
+; SPIRV-O0-NEXT: Lazy Machine Block Frequency Analysis
+; SPIRV-O0-NEXT: Machine Optimization Remark Emitter
+; SPIRV-O0-NEXT: SPIRV Assembly Printer
+; SPIRV-O0-NEXT: Free MachineFunction
+
+; SPIRV-Opt:Target Library Information
+; SPIRV-Opt-NEXT:Target Pass Configuration
+; SPIRV-Opt-NEXT:Machine Module Information
+; SPIRV-Opt-NEXT:Target Transform Information
+; SPIRV-Opt-NEXT:Assumption Cache Tracker
+; SPIRV-Opt-NEXT:Type-Based Alias Analysis
+; SPIRV-Opt-NEXT:Scoped NoAlias Alias Analysis
+; SPIRV-Opt-NEXT:Profile summary info
+; SPIRV-Opt-NEXT:Create Garbage Collector Module Metadata
+; SPIRV-Opt-NEXT:Machine Branch Probability Analysis
+; SPIRV-Opt-NEXT: ModulePass Manager
+; SPIRV-Opt-NEXT: Pre-ISel Intrinsic Lowering
+; SPIRV-Opt-NEXT: FunctionPass Manager
+; SPIRV-Opt-NEXT: Expand large div/rem
+; SPIRV-Opt-NEXT: Expand fp
+; SPIRV-Opt-NEXT: Dominator Tree Construction
+; SPIRV-Opt-NEXT: Basic Alias Analysis (stateless AA impl)
+; SPIRV-Opt-NEXT: Natural Loop Information
+; SPIRV-Opt-NEXT: Canonicalize natural loops
+; SPIRV-Opt-NEXT: Scalar Evolution Analysis
+; SPIRV-Opt-NEXT: Loop Pass Manager
+; SPIRV-Opt-NEXT: Canonicalize Freeze Instructions in Loops
+; SPIRV-Opt-NEXT: Induction Variable Users
+; SPIRV-Opt-NEXT: Loop Strength Reduction
+; SPIRV-Opt-NEXT: Basic Alias Analysis (stateless AA impl)
+; SPIRV-Opt-NEXT: Function Alias Analysis Results
+; SPIRV-Opt-NEXT: Merge contiguous icmps into a memcmp
+; SPIRV-Opt-NEXT: Natural Loop Information
+; SPIRV-Opt-NEXT: Lazy Branch Probability Analysis
+; SPIRV-Opt-NEXT: Lazy Block Frequency Analysis
+; SPIRV-Opt-NEXT: Expand memcmp() to load/stores
+; SPIRV-Opt-NEXT: Lower Garbage Collection Instructions
+; SPIRV-Opt-NEXT: Shadow Stack GC Lowering
+; SPIRV-Opt-NEXT: Remove unreachable blocks from the CFG
+; SPIRV-Opt-NEXT: Natural Loop Information
+; SPIRV-Opt-NEXT: Post-Dominator Tree Construction
+; SPIRV-Opt-NEXT: Branch Probability Analysis
+; SPIRV-Opt-NEXT: Block Frequency Analysis
+; SPIRV-Opt-NEXT: Constant Hoisting
+; SPIRV-Opt-NEXT: Replace intrinsics with calls to vector library
+; SPIRV-Opt-NEXT: Lazy Branch Probability Analysis
+; SPIRV-Opt-NEXT: Lazy Block Frequency Analysis
+; SPIRV-Opt-NEXT: Optimization Remark Emitter
+; SPIRV-Opt-NEXT: Partially inline calls to library functions
+; SPIRV-Opt-NEXT: Instrument function entry/exit with calls to e.g. mcount() (post inlining)
+; SPIRV-Opt-NEXT: Scalarize Masked Memory Intrinsics
+; SPIRV-Opt-NEXT: Expand reduction intrinsics
+; SPIRV-Opt-NEXT: SPIR-V Regularizer
+; SPIRV-Opt-NEXT: SPIRV prepare functions
+; SPIRV-Opt-NEXT: FunctionPass Manager
+; SPIRV-Opt-NEXT: Dominator Tree Construction
+; SPIRV-Opt-NEXT: Natural Loop Information
+; SPIRV-Opt-NEXT: CodeGen Prepare
+; SPIRV-Opt-NEXT: Lower invoke and unwind, for unwindless code generators
+; SPIRV-Opt-NEXT: Remove unreachable blocks from the CFG
+; SPIRV-Opt-NEXT: SPIRV strip convergent intrinsics
+; SPIRV-Opt-NEXT: SPIRV Legalize Implicit Binding
+; SPIRV-Opt-NEXT: SPIRV CBuffer Access
+; SPIRV-Opt-NEXT: SPIRV emit intrinsics
+; SPIRV-Opt-NEXT: FunctionPass Manager
+; SPIRV-Opt-NEXT: SPIRV legalize bitcast pass
+; SPIRV-Opt-NEXT: Dominator Tree Construction
+; SPIRV-Opt-NEXT: Basic Alias Analysis (stateless AA impl)
+; SPIRV-Opt-NEXT: Function Alias Analysis Results
+; SPIRV-Opt-NEXT: ObjC ARC contraction
+; SPIRV-Opt-NEXT: Prepare callbr
+; SPIRV-Opt-NEXT: Safe Stack instrumentation pass
+; SPIRV-Opt-NEXT: Insert stack protectors
+; SPIRV-Opt-NEXT: Analysis containing CSE Info
+; SPIRV-Opt-NEXT: Natural Loop Information
+; SPIRV-Opt-NEXT: Post-Dominator Tree Construction
+; SPIRV-Opt-NEXT: Branch Probability Analysis
+; SPIRV-Opt-NEXT: Basic Alias Analysis (stateless AA impl)
+; SPIRV-Opt-NEXT: Function Alias Analysis Results
+; SPIRV-Opt-NEXT: IRTranslator
+; SPIRV-Opt-NEXT: Analysis for ComputingKnownBits
+; SPIRV-Opt-NEXT: MachineDominator Tree Construction
+; SPIRV-Opt-NEXT: SPIRVPreLegalizerCombiner
+; SPIRV-Opt-NEXT: SPIRV pre legalizer
+; SPIRV-Opt-NEXT: Analysis containing CSE Info
+; SPIRV-Opt-NEXT: Legalizer
+; SPIRV-Opt-NEXT: SPIRV post legalizer
+; SPIRV-Opt-NEXT: Analysis for ComputingKnownBits
+; SPIRV-Opt-NEXT: Lazy Branch Probability Analysis
+; SPIRV-Opt-NEXT: Lazy Block Frequency Analysis
+; SPIRV-Opt-NEXT: InstructionSelect
+; SPIRV-Opt-NEXT: ResetMachineFunction
+; SPIRV-Opt-NEXT: Finalize ISel and expand pseudo-instructions
+; SPIRV-Opt-NEXT: Lazy Machine Block Frequency Analysis
+; SPIRV-Opt-NEXT: Early Tail Duplication
+; SPIRV-Opt-NEXT: Optimize machine instruction PHIs
+; SPIRV-Opt-NEXT: Slot index numbering
+; SPIRV-Opt-NEXT: Merge disjoint stack slots
+; SPIRV-Opt-NEXT: Local Stack Slot Allocation
+; SPIRV-Opt-NEXT: Remove dead machine instructions
+; SPIRV-Opt-NEXT: MachineDominator Tree Construction
+; SPIRV-Opt-NEXT: Machine Natural Loop Construction
+; SPIRV-Opt-NEXT: Machine Block Frequency Analysis
+; SPIRV-Opt-NEXT: Early Machine Loop Invariant Code Motion
+; SPIRV-Opt-NEXT: MachineDominator Tree Construction
+; SPIRV-Opt-NEXT: Machine Block Frequency Analysis
+; SPIRV-Opt-NEXT: Machine Common Subexpression Elimination
+; SPIRV-Opt-NEXT: MachinePostDominator Tree Construction
+; SPIRV-Opt-NEXT: Machine Cycle Info Analysis
+; SPIRV-Opt-NEXT: Machine code sinking
+; SPIRV-Opt-NEXT: Peephole Optimizations
+; SPIRV-Opt-NEXT: Remove dead machine instructions
+; SPIRV-Opt-NEXT: Remove Redundant DEBUG_VALUE analysis
+; SPIRV-Opt-NEXT: Fixup Statepoint Caller Saved
+; SPIRV-Opt-NEXT: Lazy Machine Block Frequency Analysis
+; SPIRV-Opt-NEXT: Machine Optimization Remark Emitter
+; SPIRV-Opt-NEXT: Prologue/Epilogue Insertion & Frame Finalization
+; SPIRV-Opt-NEXT: Tail Duplication
+; SPIRV-Opt-NEXT: Post-RA pseudo instruction expansion pass
+; SPIRV-Opt-NEXT: Analyze Machine Code For Garbage Collection
+; SPIRV-Opt-NEXT: Insert fentry calls
+; SPIRV-Opt-NEXT: Insert XRay ops
+; SPIRV-Opt-NEXT: Machine Sanitizer Binary Metadata
+; SPIRV-Opt-NEXT: Lazy Machine Block Frequency Analysis
+; SPIRV-Opt-NEXT: Machine Optimization Remark Emitter
+; SPIRV-Opt-NEXT: Stack Frame Layout Analysis
+; SPIRV-Opt-NEXT: SPIRV module analysis
+; SPIRV-Opt-NEXT: FunctionPass Manager
+; SPIRV-Opt-NEXT: Lazy Machine Block Frequency Analysis
+; SPIRV-Opt-NEXT: Machine Optimization Remark Emitter
+; SPIRV-Opt-NEXT: SPIRV Assembly Printer
+; SPIRV-Opt-NEXT: Free MachineFunction
+
+define void @empty() {
+ ret void
+}
diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/emptyblock.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/emptyblock.mir
index 021cb4c..8abe5c5 100644
--- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/emptyblock.mir
+++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/emptyblock.mir
@@ -8,7 +8,7 @@
--- |
%struct.DCT_InstanceTypeDef = type { ptr, i32, i32 }
-
+
; Function Attrs: nofree nounwind
define hidden arm_aapcs_vfpcc void @test(ptr nocapture readonly %S, ptr %pIn, ptr nocapture %pOut) {
entry:
@@ -41,7 +41,7 @@
%13 = call i32 @llvm.loop.decrement.reg.i32(i32 %8, i32 1)
%14 = icmp ne i32 %13, 0
br i1 %14, label %do.body, label %do.end
-
+
do.end: ; preds = %do.body
%15 = extractelement <4 x float> %11, i32 0
%16 = extractelement <4 x float> %11, i32 1
@@ -56,7 +56,7 @@
%sub4 = add i32 %1, -4
%cmp5201 = icmp ugt i32 %sub4, 1
br i1 %cmp5201, label %for.body.lr.ph, label %for.cond54.preheader
-
+
for.body.lr.ph: ; preds = %do.end
%scevgep = getelementptr float, ptr %pIn, i32 4
%20 = add i32 %0, 4
@@ -161,7 +161,7 @@
%63 = call i32 @llvm.loop.decrement.reg.i32(i32 %53, i32 1)
%64 = icmp ne i32 %63, 0
br i1 %64, label %do.body24, label %do.end33
-
+
do.end33: ; preds = %do.body24
%65 = bitcast ptr %lsr.iv27 to ptr
%66 = bitcast ptr %lsr.iv20 to ptr
@@ -254,7 +254,7 @@
%inc = add nuw i32 %k.1200, 1
%exitcond.not = icmp eq i32 %inc, %1
br i1 %exitcond.not, label %for.end72, label %for.body56
-
+
for.end72: ; preds = %do.end66, %for.cond54.preheader
ret void
}
@@ -428,28 +428,28 @@ body: |
renamable $lr = t2LoopDec killed renamable $lr, 1
t2LoopEnd renamable $lr, %bb.1, implicit-def dead $cpsr
tB %bb.2, 14 /* CC::al */, $noreg
-
+
bb.2.do.end:
successors: %bb.3(0x40000000), %bb.7(0x40000000)
liveins: $q0, $r2, $r3, $r4, $r5, $r11
-
- renamable $s4 = nnan ninf nsz arcp contract afn reassoc VADDS renamable $s0, renamable $s1, 14 /* CC::al */, $noreg
+
+ renamable $s4 = nnan ninf nsz arcp contract afn reassoc VADDS renamable $s0, renamable $s1, 14 /* CC::al */, $noreg, implicit $fpscr_rm
renamable $r0, dead $cpsr = tSUBi3 renamable $r3, 4, 14 /* CC::al */, $noreg
tSTRspi killed renamable $r3, $sp, 1, 14 /* CC::al */, $noreg :: (store (s32) into %stack.8)
- renamable $s4 = nnan ninf nsz arcp contract afn reassoc VADDS killed renamable $s4, renamable $s2, 14 /* CC::al */, $noreg
+ renamable $s4 = nnan ninf nsz arcp contract afn reassoc VADDS killed renamable $s4, renamable $s2, 14 /* CC::al */, $noreg, implicit $fpscr_rm
tSTRspi renamable $r0, $sp, 8, 14 /* CC::al */, $noreg :: (store (s32) into %stack.1)
- renamable $s0 = nnan ninf nsz arcp contract afn reassoc VADDS killed renamable $s4, killed renamable $s3, 14 /* CC::al */, $noreg, implicit $q0
+ renamable $s0 = nnan ninf nsz arcp contract afn reassoc VADDS killed renamable $s4, killed renamable $s3, 14 /* CC::al */, $noreg, implicit $q0, implicit $fpscr_rm
renamable $s2 = VLDRS renamable $r11, 0, 14 /* CC::al */, $noreg :: (load (s32) from %ir.2)
tCMPi8 killed renamable $r0, 2, 14 /* CC::al */, $noreg, implicit-def $cpsr
renamable $r0 = t2MOVi 1, 14 /* CC::al */, $noreg, $noreg
- renamable $s0 = nnan ninf nsz arcp contract afn reassoc VMULS killed renamable $s2, killed renamable $s0, 14 /* CC::al */, $noreg
+ renamable $s0 = nnan ninf nsz arcp contract afn reassoc VMULS killed renamable $s2, killed renamable $s0, 14 /* CC::al */, $noreg, implicit $fpscr_rm
VSTRS killed renamable $s0, renamable $r2, 0, 14 /* CC::al */, $noreg :: (store (s32) into %ir.pOut)
t2Bcc %bb.7, 3 /* CC::lo */, killed $cpsr
-
+
bb.3.for.body.lr.ph:
successors: %bb.4(0x80000000)
liveins: $r0, $r2, $r4, $r5, $r11
-
+
renamable $r6 = t2ADDri renamable $r5, 16, 14 /* CC::al */, $noreg, $noreg
renamable $r1, dead $cpsr = tSUBi3 renamable $r4, 4, 14 /* CC::al */, $noreg
tSTRspi killed renamable $r6, $sp, 4, 14 /* CC::al */, $noreg :: (store (s32) into %stack.5)
@@ -523,26 +523,26 @@ body: |
renamable $lr = t2LoopDec killed renamable $lr, 1
t2LoopEnd renamable $lr, %bb.5, implicit-def dead $cpsr
tB %bb.6, 14 /* CC::al */, $noreg
-
+
bb.6.do.end33:
successors: %bb.4(0x7c000000), %bb.7(0x04000000)
liveins: $q0, $q1, $q2, $q3, $r0, $r1, $r2, $r6, $r8, $r9, $r10, $r12
-
- renamable $s16 = nnan ninf nsz arcp contract afn reassoc VADDS renamable $s12, renamable $s13, 14 /* CC::al */, $noreg
- renamable $s18 = nnan ninf nsz arcp contract afn reassoc VADDS renamable $s8, renamable $s9, 14 /* CC::al */, $noreg
- renamable $s16 = nnan ninf nsz arcp contract afn reassoc VADDS killed renamable $s16, renamable $s14, 14 /* CC::al */, $noreg
- renamable $s18 = nnan ninf nsz arcp contract afn reassoc VADDS killed renamable $s18, renamable $s10, 14 /* CC::al */, $noreg
- renamable $s12 = nnan ninf nsz arcp contract afn reassoc VADDS killed renamable $s16, killed renamable $s15, 14 /* CC::al */, $noreg, implicit $q3
- renamable $s8 = nnan ninf nsz arcp contract afn reassoc VADDS killed renamable $s18, killed renamable $s11, 14 /* CC::al */, $noreg, implicit $q2
- renamable $s10 = nnan ninf nsz arcp contract afn reassoc VADDS renamable $s4, renamable $s5, 14 /* CC::al */, $noreg
- renamable $s14 = nnan ninf nsz arcp contract afn reassoc VADDS renamable $s0, renamable $s1, 14 /* CC::al */, $noreg
+
+ renamable $s16 = nnan ninf nsz arcp contract afn reassoc VADDS renamable $s12, renamable $s13, 14 /* CC::al */, $noreg, implicit $fpscr_rm
+ renamable $s18 = nnan ninf nsz arcp contract afn reassoc VADDS renamable $s8, renamable $s9, 14 /* CC::al */, $noreg, implicit $fpscr_rm
+ renamable $s16 = nnan ninf nsz arcp contract afn reassoc VADDS killed renamable $s16, renamable $s14, 14 /* CC::al */, $noreg, implicit $fpscr_rm
+ renamable $s18 = nnan ninf nsz arcp contract afn reassoc VADDS killed renamable $s18, renamable $s10, 14 /* CC::al */, $noreg, implicit $fpscr_rm
+ renamable $s12 = nnan ninf nsz arcp contract afn reassoc VADDS killed renamable $s16, killed renamable $s15, 14 /* CC::al */, $noreg, implicit $q3, implicit $fpscr_rm
+ renamable $s8 = nnan ninf nsz arcp contract afn reassoc VADDS killed renamable $s18, killed renamable $s11, 14 /* CC::al */, $noreg, implicit $q2, implicit $fpscr_rm
+ renamable $s10 = nnan ninf nsz arcp contract afn reassoc VADDS renamable $s4, renamable $s5, 14 /* CC::al */, $noreg, implicit $fpscr_rm
+ renamable $s14 = nnan ninf nsz arcp contract afn reassoc VADDS renamable $s0, renamable $s1, 14 /* CC::al */, $noreg, implicit $fpscr_rm
renamable $r7 = tLDRspi $sp, 9, 14 /* CC::al */, $noreg :: (load (s32) from %stack.0)
- renamable $s10 = nnan ninf nsz arcp contract afn reassoc VADDS killed renamable $s10, renamable $s6, 14 /* CC::al */, $noreg
- renamable $s14 = nnan ninf nsz arcp contract afn reassoc VADDS killed renamable $s14, renamable $s2, 14 /* CC::al */, $noreg
+ renamable $s10 = nnan ninf nsz arcp contract afn reassoc VADDS killed renamable $s10, renamable $s6, 14 /* CC::al */, $noreg, implicit $fpscr_rm
+ renamable $s14 = nnan ninf nsz arcp contract afn reassoc VADDS killed renamable $s14, renamable $s2, 14 /* CC::al */, $noreg, implicit $fpscr_rm
renamable $r3 = t2ADDrs renamable $r2, renamable $r0, 18, 14 /* CC::al */, $noreg, $noreg
renamable $r7 = t2ADDrs renamable $r2, killed renamable $r7, 18, 14 /* CC::al */, $noreg, $noreg
- renamable $s4 = nnan ninf nsz arcp contract afn reassoc VADDS killed renamable $s10, killed renamable $s7, 14 /* CC::al */, $noreg, implicit $q1
- renamable $s0 = nnan ninf nsz arcp contract afn reassoc VADDS killed renamable $s14, killed renamable $s3, 14 /* CC::al */, $noreg, implicit $q0
+ renamable $s4 = nnan ninf nsz arcp contract afn reassoc VADDS killed renamable $s10, killed renamable $s7, 14 /* CC::al */, $noreg, implicit $q1, implicit $fpscr_rm
+ renamable $s0 = nnan ninf nsz arcp contract afn reassoc VADDS killed renamable $s14, killed renamable $s3, 14 /* CC::al */, $noreg, implicit $q0, implicit $fpscr_rm
VSTRS killed renamable $s12, killed renamable $r3, 0, 14 /* CC::al */, $noreg :: (store (s32) into %ir.arrayidx37)
VSTRS killed renamable $s8, killed renamable $r7, 0, 14 /* CC::al */, $noreg :: (store (s32) into %ir.arrayidx42)
renamable $r3 = t2ADDrs renamable $r2, killed renamable $r8, 18, 14 /* CC::al */, $noreg, $noreg
@@ -597,7 +597,7 @@ body: |
bb.13:
successors: %bb.10(0x80000000)
liveins: $lr, $q0, $r0, $r1, $r2, $r3, $r4, $r5, $r6, $r7, $r11, $r12
-
+
bb.10.do.body59 (align 4):
successors: %bb.10(0x7c000000), %bb.11(0x04000000)
liveins: $lr, $q0, $r0, $r1, $r2, $r3, $r4, $r5, $r6, $r7, $r11, $r12
@@ -611,20 +611,20 @@ body: |
renamable $lr = t2LoopDec killed renamable $lr, 1
t2LoopEnd renamable $lr, %bb.10, implicit-def dead $cpsr
tB %bb.11, 14 /* CC::al */, $noreg
-
+
bb.11.do.end66:
successors: %bb.12(0x04000000), %bb.9(0x7c000000)
liveins: $q0, $r0, $r2, $r3, $r4, $r5, $r11, $r12
-
- renamable $s4 = nnan ninf nsz arcp contract afn reassoc VADDS renamable $s0, renamable $s1, 14 /* CC::al */, $noreg
+
+ renamable $s4 = nnan ninf nsz arcp contract afn reassoc VADDS renamable $s0, renamable $s1, 14 /* CC::al */, $noreg, implicit $fpscr_rm
renamable $r1 = t2ADDrs renamable $r2, renamable $r0, 18, 14 /* CC::al */, $noreg, $noreg
- renamable $s4 = nnan ninf nsz arcp contract afn reassoc VADDS killed renamable $s4, renamable $s2, 14 /* CC::al */, $noreg
+ renamable $s4 = nnan ninf nsz arcp contract afn reassoc VADDS killed renamable $s4, renamable $s2, 14 /* CC::al */, $noreg, implicit $fpscr_rm
renamable $r0, dead $cpsr = nuw tADDi8 killed renamable $r0, 1, 14 /* CC::al */, $noreg
- renamable $s0 = nnan ninf nsz arcp contract afn reassoc VADDS killed renamable $s4, killed renamable $s3, 14 /* CC::al */, $noreg, implicit $q0
+ renamable $s0 = nnan ninf nsz arcp contract afn reassoc VADDS killed renamable $s4, killed renamable $s3, 14 /* CC::al */, $noreg, implicit $q0, implicit $fpscr_rm
tCMPhir renamable $r0, renamable $r12, 14 /* CC::al */, $noreg, implicit-def $cpsr
VSTRS killed renamable $s0, killed renamable $r1, 0, 14 /* CC::al */, $noreg :: (store (s32) into %ir.arrayidx70)
tBcc %bb.9, 1 /* CC::ne */, killed $cpsr
-
+
bb.12.for.end72:
$sp = frame-destroy tADDspi $sp, 10, 14 /* CC::al */, $noreg
$sp = frame-destroy VLDMDIA_UPD $sp, 14 /* CC::al */, $noreg, def $d8, def $d9, def $d10, def $d11
diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/it-block-mov.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/it-block-mov.mir
index 31e88ea..85b826a 100644
--- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/it-block-mov.mir
+++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/it-block-mov.mir
@@ -185,15 +185,15 @@ body: |
successors: %bb.5(0x80000000)
liveins: $q0, $r0, $r1, $r2, $r4
- renamable $s4 = nnan ninf nsz VADDS renamable $s0, renamable $s1, 14, $noreg
+ renamable $s4 = nnan ninf nsz VADDS renamable $s0, renamable $s1, 14, $noreg, implicit $fpscr_rm
$lr = tMOVr $r4, 14, $noreg
$r3 = tMOVr $r1, 14, $noreg
- renamable $s4 = nnan ninf nsz VADDS renamable $s2, killed renamable $s4, 14, $noreg
- renamable $s0 = nnan ninf nsz VADDS killed renamable $s3, killed renamable $s4, 14, $noreg, implicit $q0
+ renamable $s4 = nnan ninf nsz VADDS renamable $s2, killed renamable $s4, 14, $noreg, implicit $fpscr_rm
+ renamable $s0 = nnan ninf nsz VADDS killed renamable $s3, killed renamable $s4, 14, $noreg, implicit $q0, implicit $fpscr_rm
$s2 = VMOVSR $r1, 14, $noreg
renamable $s2 = VUITOS killed renamable $s2, 14, $noreg
$lr = t2DoLoopStart killed $r4
- renamable $s4 = nnan ninf nsz VDIVS killed renamable $s0, killed renamable $s2, 14, $noreg
+ renamable $s4 = nnan ninf nsz VDIVS killed renamable $s0, killed renamable $s2, 14, $noreg, implicit $fpscr_rm
renamable $q0 = MVE_VMOVimmi32 0, 0, $noreg, $noreg, undef renamable $q0
bb.5:
@@ -215,13 +215,13 @@ body: |
bb.6:
liveins: $q0, $r1, $r2
- renamable $s4 = nnan ninf nsz VADDS renamable $s0, renamable $s1, 14, $noreg
+ renamable $s4 = nnan ninf nsz VADDS renamable $s0, renamable $s1, 14, $noreg, implicit $fpscr_rm
renamable $r0, dead $cpsr = tSUBi3 killed renamable $r1, 1, 14, $noreg
- renamable $s4 = nnan ninf nsz VADDS renamable $s2, killed renamable $s4, 14, $noreg
- renamable $s0 = nnan ninf nsz VADDS killed renamable $s3, killed renamable $s4, 14, $noreg, implicit $q0
+ renamable $s4 = nnan ninf nsz VADDS renamable $s2, killed renamable $s4, 14, $noreg, implicit $fpscr_rm
+ renamable $s0 = nnan ninf nsz VADDS killed renamable $s3, killed renamable $s4, 14, $noreg, implicit $q0, implicit $fpscr_rm
$s2 = VMOVSR killed $r0, 14, $noreg
renamable $s2 = VUITOS killed renamable $s2, 14, $noreg
- renamable $s0 = nnan ninf nsz VDIVS killed renamable $s0, killed renamable $s2, 14, $noreg
+ renamable $s0 = nnan ninf nsz VDIVS killed renamable $s0, killed renamable $s2, 14, $noreg, implicit $fpscr_rm
VSTRS killed renamable $s0, killed renamable $r2, 0, 14, $noreg
tPOP_RET 14, $noreg, def $r4, def $pc
diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/lstp-insertion-position.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/lstp-insertion-position.mir
index f5da7ac..780831c 100644
--- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/lstp-insertion-position.mir
+++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/lstp-insertion-position.mir
@@ -232,9 +232,9 @@ body: |
bb.3.middle.block:
liveins: $q1
- renamable $s0 = nnan ninf nsz arcp contract afn reassoc VADDS renamable $s6, renamable $s7, 14 /* CC::al */, $noreg
- renamable $s2 = nnan ninf nsz arcp contract afn reassoc VADDS killed renamable $s4, killed renamable $s5, 14 /* CC::al */, $noreg, implicit $q1
- renamable $s0 = nnan ninf nsz arcp contract afn reassoc VADDS killed renamable $s2, killed renamable $s0, 14 /* CC::al */, $noreg
+ renamable $s0 = nnan ninf nsz arcp contract afn reassoc VADDS renamable $s6, renamable $s7, 14 /* CC::al */, $noreg, implicit $fpscr_rm
+ renamable $s2 = nnan ninf nsz arcp contract afn reassoc VADDS killed renamable $s4, killed renamable $s5, 14 /* CC::al */, $noreg, implicit $q1, implicit $fpscr_rm
+ renamable $s0 = nnan ninf nsz arcp contract afn reassoc VADDS killed renamable $s2, killed renamable $s0, 14 /* CC::al */, $noreg, implicit $fpscr_rm
$sp = frame-destroy t2LDMIA_UPD $sp, 14 /* CC::al */, $noreg, def $r7, def $lr
tBX_RET 14 /* CC::al */, $noreg, implicit killed $s0
@@ -376,9 +376,9 @@ body: |
bb.3.middle.block:
liveins: $q1
- renamable $s0 = nnan ninf nsz arcp contract afn reassoc VADDS renamable $s6, renamable $s7, 14 /* CC::al */, $noreg
- renamable $s2 = nnan ninf nsz arcp contract afn reassoc VADDS killed renamable $s4, killed renamable $s5, 14 /* CC::al */, $noreg, implicit $q1
- renamable $s0 = nnan ninf nsz arcp contract afn reassoc VADDS killed renamable $s2, killed renamable $s0, 14 /* CC::al */, $noreg
+ renamable $s0 = nnan ninf nsz arcp contract afn reassoc VADDS renamable $s6, renamable $s7, 14 /* CC::al */, $noreg, implicit $fpscr_rm
+ renamable $s2 = nnan ninf nsz arcp contract afn reassoc VADDS killed renamable $s4, killed renamable $s5, 14 /* CC::al */, $noreg, implicit $q1, implicit $fpscr_rm
+ renamable $s0 = nnan ninf nsz arcp contract afn reassoc VADDS killed renamable $s2, killed renamable $s0, 14 /* CC::al */, $noreg, implicit $fpscr_rm
$sp = frame-destroy t2LDMIA_UPD $sp, 14 /* CC::al */, $noreg, def $r7, def $lr
tBX_RET 14 /* CC::al */, $noreg, implicit killed $s0
diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/mov-after-dlstp.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/mov-after-dlstp.mir
index c331612..5dcd0a1 100644
--- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/mov-after-dlstp.mir
+++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/mov-after-dlstp.mir
@@ -240,10 +240,10 @@ body: |
$s4 = VMOVSR $r1, 14 /* CC::al */, $noreg
$lr = tMOVr $r4, 14 /* CC::al */, $noreg
- renamable $s0 = nnan ninf nsz arcp contract afn reassoc VADDS killed renamable $s3, renamable $s3, 14 /* CC::al */, $noreg, implicit $q0
+ renamable $s0 = nnan ninf nsz arcp contract afn reassoc VADDS killed renamable $s3, renamable $s3, 14 /* CC::al */, $noreg, implicit $q0, implicit $fpscr_rm
$lr = t2DoLoopStart killed $r4
renamable $s4 = VUITOS killed renamable $s4, 14 /* CC::al */, $noreg
- renamable $s0 = nnan ninf nsz arcp contract afn reassoc VDIVS killed renamable $s0, killed renamable $s4, 14 /* CC::al */, $noreg
+ renamable $s0 = nnan ninf nsz arcp contract afn reassoc VDIVS killed renamable $s0, killed renamable $s4, 14 /* CC::al */, $noreg, implicit $fpscr_rm
renamable $r3 = VMOVRS killed renamable $s0, 14 /* CC::al */, $noreg
renamable $q0 = MVE_VMOVimmi32 0, 0, $noreg, $noreg, undef renamable $q0
renamable $q1 = MVE_VDUP32 killed renamable $r3, 0, $noreg, $noreg, undef renamable $q1
@@ -267,10 +267,10 @@ body: |
liveins: $q0, $r1, $r2
renamable $r0, dead $cpsr = tSUBi3 killed renamable $r1, 1, 14 /* CC::al */, $noreg
- renamable $s0 = nnan ninf nsz arcp contract afn reassoc VADDS killed renamable $s3, renamable $s3, 14 /* CC::al */, $noreg, implicit $q0
+ renamable $s0 = nnan ninf nsz arcp contract afn reassoc VADDS killed renamable $s3, renamable $s3, 14 /* CC::al */, $noreg, implicit $q0, implicit $fpscr_rm
$s2 = VMOVSR killed $r0, 14 /* CC::al */, $noreg
renamable $s2 = VUITOS killed renamable $s2, 14 /* CC::al */, $noreg
- renamable $s0 = nnan ninf nsz arcp contract afn reassoc VDIVS killed renamable $s0, killed renamable $s2, 14 /* CC::al */, $noreg
+ renamable $s0 = nnan ninf nsz arcp contract afn reassoc VDIVS killed renamable $s0, killed renamable $s2, 14 /* CC::al */, $noreg, implicit $fpscr_rm
VSTRS killed renamable $s0, killed renamable $r2, 0, 14 /* CC::al */, $noreg :: (store (s32) into %ir.pResult)
frame-destroy tPOP_RET 14 /* CC::al */, $noreg, def $r4, def $pc
diff --git a/llvm/test/CodeGen/Thumb2/pipeliner-inlineasm.mir b/llvm/test/CodeGen/Thumb2/pipeliner-inlineasm.mir
index 5221205..d9d2f25 100644
--- a/llvm/test/CodeGen/Thumb2/pipeliner-inlineasm.mir
+++ b/llvm/test/CodeGen/Thumb2/pipeliner-inlineasm.mir
@@ -96,7 +96,7 @@ body: |
; CHECK-NEXT: bb.6.for.body:
; CHECK-NEXT: successors: %bb.7(0x80000000), %bb.8(0x00000000)
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: [[VMULS:%[0-9]+]]:spr = nnan ninf nsz arcp contract afn reassoc VMULS [[VLDRS3]], %30, 14 /* CC::al */, $noreg
+ ; CHECK-NEXT: [[VMULS:%[0-9]+]]:spr = nnan ninf nsz arcp contract afn reassoc VMULS [[VLDRS3]], %30, 14 /* CC::al */, $noreg, implicit $fpscr_rm
; CHECK-NEXT: [[t2ADDri2:%[0-9]+]]:rgpr = t2ADDri [[COPY7]], 4, 14 /* CC::al */, $noreg, $noreg
; CHECK-NEXT: [[VLDRS4:%[0-9]+]]:spr = VLDRS [[COPY7]], 1, 14 /* CC::al */, $noreg :: (load unknown-size from %ir.scevgep7, align 4)
; CHECK-NEXT: [[t2ADDri3:%[0-9]+]]:rgpr = t2ADDri [[COPY6]], 4, 14 /* CC::al */, $noreg, $noreg
@@ -119,13 +119,13 @@ body: |
; CHECK-NEXT: [[PHI4:%[0-9]+]]:spr = PHI [[VLDRS5]], %bb.6, %47, %bb.7
; CHECK-NEXT: [[PHI5:%[0-9]+]]:spr = PHI %40, %bb.6, %55, %bb.7
; CHECK-NEXT: [[PHI6:%[0-9]+]]:spr = PHI [[VMULS]], %bb.6, %45, %bb.7
- ; CHECK-NEXT: [[VMULS1:%[0-9]+]]:spr = nnan ninf nsz arcp contract afn reassoc VMULS [[PHI4]], [[PHI5]], 14 /* CC::al */, $noreg
+ ; CHECK-NEXT: [[VMULS1:%[0-9]+]]:spr = nnan ninf nsz arcp contract afn reassoc VMULS [[PHI4]], [[PHI5]], 14 /* CC::al */, $noreg, implicit $fpscr_rm
; CHECK-NEXT: [[t2SUBri4:%[0-9]+]]:rgpr = t2SUBri [[PHI2]], 1, 14 /* CC::al */, $noreg, def $cpsr
; CHECK-NEXT: [[VLDRS6:%[0-9]+]]:spr = VLDRS [[PHI1]], 1, 14 /* CC::al */, $noreg :: (load unknown-size from %ir.scevgep3, align 4)
; CHECK-NEXT: [[VLDRS7:%[0-9]+]]:spr = VLDRS [[PHI]], 1, 14 /* CC::al */, $noreg :: (load unknown-size from %ir.scevgep7, align 4)
; CHECK-NEXT: [[t2ADDri4:%[0-9]+]]:rgpr = t2ADDri [[PHI]], 4, 14 /* CC::al */, $noreg, $noreg
; CHECK-NEXT: [[t2ADDri5:%[0-9]+]]:rgpr = t2ADDri [[PHI1]], 4, 14 /* CC::al */, $noreg, $noreg
- ; CHECK-NEXT: [[VADDS:%[0-9]+]]:spr = nnan ninf nsz arcp contract afn reassoc VADDS [[PHI6]], [[PHI3]], 14 /* CC::al */, $noreg
+ ; CHECK-NEXT: [[VADDS:%[0-9]+]]:spr = nnan ninf nsz arcp contract afn reassoc VADDS [[PHI6]], [[PHI3]], 14 /* CC::al */, $noreg, implicit $fpscr_rm
; CHECK-NEXT: [[COPY11:%[0-9]+]]:gpr = COPY [[t2ADDri4]]
; CHECK-NEXT: [[COPY12:%[0-9]+]]:gpr = COPY [[t2ADDri5]]
; CHECK-NEXT: [[COPY13:%[0-9]+]]:gpr = COPY [[t2SUBri4]]
@@ -140,7 +140,7 @@ body: |
; CHECK-NEXT: [[PHI8:%[0-9]+]]:spr = PHI [[VLDRS5]], %bb.6, [[VLDRS6]], %bb.7
; CHECK-NEXT: [[PHI9:%[0-9]+]]:spr = PHI %40, %bb.6, %55, %bb.7
; CHECK-NEXT: [[PHI10:%[0-9]+]]:spr = PHI [[VMULS]], %bb.6, [[VMULS1]], %bb.7
- ; CHECK-NEXT: [[VADDS1:%[0-9]+]]:spr = nnan ninf nsz arcp contract afn reassoc VADDS [[PHI10]], [[PHI7]], 14 /* CC::al */, $noreg
+ ; CHECK-NEXT: [[VADDS1:%[0-9]+]]:spr = nnan ninf nsz arcp contract afn reassoc VADDS [[PHI10]], [[PHI7]], 14 /* CC::al */, $noreg, implicit $fpscr_rm
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.9:
; CHECK-NEXT: successors: %bb.4(0x80000000)
@@ -148,8 +148,8 @@ body: |
; CHECK-NEXT: [[PHI11:%[0-9]+]]:spr = PHI [[VLDRS1]], %bb.5, [[VADDS1]], %bb.8
; CHECK-NEXT: [[PHI12:%[0-9]+]]:spr = PHI [[VLDRS3]], %bb.5, [[PHI8]], %bb.8
; CHECK-NEXT: [[PHI13:%[0-9]+]]:spr = PHI %30, %bb.5, [[PHI9]], %bb.8
- ; CHECK-NEXT: [[VMULS2:%[0-9]+]]:spr = nnan ninf nsz arcp contract afn reassoc VMULS [[PHI12]], [[PHI13]], 14 /* CC::al */, $noreg
- ; CHECK-NEXT: [[VADDS2:%[0-9]+]]:spr = nnan ninf nsz arcp contract afn reassoc VADDS [[VMULS2]], [[PHI11]], 14 /* CC::al */, $noreg
+ ; CHECK-NEXT: [[VMULS2:%[0-9]+]]:spr = nnan ninf nsz arcp contract afn reassoc VMULS [[PHI12]], [[PHI13]], 14 /* CC::al */, $noreg, implicit $fpscr_rm
+ ; CHECK-NEXT: [[VADDS2:%[0-9]+]]:spr = nnan ninf nsz arcp contract afn reassoc VADDS [[VMULS2]], [[PHI11]], 14 /* CC::al */, $noreg, implicit $fpscr_rm
; CHECK-NEXT: t2B %bb.4, 14 /* CC::al */, $noreg
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.4.for.end:
@@ -194,8 +194,8 @@ body: |
%20:rgpr = t2ADDri %3, 4, 14 /* CC::al */, $noreg, $noreg
%21:spr = VLDRS %3, 1, 14 /* CC::al */, $noreg :: (load (s32) from %ir.scevgep3)
INLINEASM &nop, 0 /* attdialect */, 196618 /* regdef:SPR */, def %25, 2147483657 /* reguse tiedto:$0 */, %19(tied-def 3)
- %22:spr = nnan ninf nsz arcp contract afn reassoc VMULS killed %21, killed %25, 14 /* CC::al */, $noreg
- %6:spr = nnan ninf nsz arcp contract afn reassoc VADDS killed %22, %5, 14 /* CC::al */, $noreg
+ %22:spr = nnan ninf nsz arcp contract afn reassoc VMULS killed %21, killed %25, 14 /* CC::al */, $noreg, implicit $fpscr_rm
+ %6:spr = nnan ninf nsz arcp contract afn reassoc VADDS killed %22, %5, 14 /* CC::al */, $noreg, implicit $fpscr_rm
%23:rgpr = t2SUBri %4, 1, 14 /* CC::al */, $noreg, def $cpsr
%7:gpr = COPY %23
%8:gpr = COPY %20
diff --git a/llvm/test/CodeGen/Thumb2/scavenge-lr.mir b/llvm/test/CodeGen/Thumb2/scavenge-lr.mir
index 5513bed..bfe55a5 100644
--- a/llvm/test/CodeGen/Thumb2/scavenge-lr.mir
+++ b/llvm/test/CodeGen/Thumb2/scavenge-lr.mir
@@ -147,10 +147,10 @@ body: |
$q5 = VLD1q64 $r3, 16, 14 /* CC::al */, $noreg :: (load (s128) from %ir.zzz..sroa_cast241, align 32)
$q1 = VMLAfq killed $q1, $q5, killed $q8, 14 /* CC::al */, $noreg
$s8 = VLDRS %const.0, 0, 14 /* CC::al */, $noreg :: (load (s32) from constant-pool)
- $s3 = VDIVS $s8, $s7, 14 /* CC::al */, $noreg, implicit-def $q0
- $s2 = VDIVS $s8, $s6, 14 /* CC::al */, $noreg, implicit killed $q0, implicit-def $q0
- $s1 = VDIVS $s8, $s5, 14 /* CC::al */, $noreg, implicit killed $q0, implicit-def $q0
- $s0 = VDIVS $s8, $s4, 14 /* CC::al */, $noreg, implicit killed $q1, implicit killed $q0, implicit-def $q0
+ $s3 = VDIVS $s8, $s7, 14 /* CC::al */, $noreg, implicit-def $q0, implicit $fpscr_rm
+ $s2 = VDIVS $s8, $s6, 14 /* CC::al */, $noreg, implicit killed $q0, implicit-def $q0, implicit $fpscr_rm
+ $s1 = VDIVS $s8, $s5, 14 /* CC::al */, $noreg, implicit killed $q0, implicit-def $q0, implicit $fpscr_rm
+ $s0 = VDIVS $s8, $s4, 14 /* CC::al */, $noreg, implicit killed $q1, implicit killed $q0, implicit-def $q0, implicit $fpscr_rm
$r7 = t2SUBri $r0, 64, 14 /* CC::al */, $noreg, $noreg
$q8 = VLD1q64 $r7, 16, 14 /* CC::al */, $noreg :: (load (s128) from %ir.yyy..sroa_cast244, align 32)
VSTMQIA $q8, %stack.1, 14 /* CC::al */, $noreg :: (store (s128) into %stack.1)
@@ -185,10 +185,10 @@ body: |
$r3 = VST1q32wb_fixed killed $r3, 16, killed $q10, 14 /* CC::al */, $noreg :: (store (s128) into %ir.zzz..sroa_cast241, align 32)
$q10 = VLD1q64 $r3, 16, 14 /* CC::al */, $noreg :: (load (s128) from %ir.zzz..sroa_cast241 + 16, basealign 32)
$q1 = VMLAfq killed $q1, $q10, killed $q8, 14 /* CC::al */, $noreg
- $s23 = VDIVS $s8, $s7, 14 /* CC::al */, $noreg, implicit-def $q5
- $s22 = VDIVS $s8, $s6, 14 /* CC::al */, $noreg, implicit killed $q5, implicit-def $q5
- $s21 = VDIVS $s8, $s5, 14 /* CC::al */, $noreg, implicit killed $q5, implicit-def $q5
- $s20 = VDIVS killed $s8, $s4, 14 /* CC::al */, $noreg, implicit killed $q1, implicit killed $q5, implicit-def $q5
+ $s23 = VDIVS $s8, $s7, 14 /* CC::al */, $noreg, implicit-def $q5, implicit $fpscr_rm
+ $s22 = VDIVS $s8, $s6, 14 /* CC::al */, $noreg, implicit killed $q5, implicit-def $q5, implicit $fpscr_rm
+ $s21 = VDIVS $s8, $s5, 14 /* CC::al */, $noreg, implicit killed $q5, implicit-def $q5, implicit $fpscr_rm
+ $s20 = VDIVS killed $s8, $s4, 14 /* CC::al */, $noreg, implicit killed $q1, implicit killed $q5, implicit-def $q5, implicit $fpscr_rm
VST1q64 killed $r5, 16, $q5, 14 /* CC::al */, $noreg :: (store (s128) into %ir.xxx..sroa_cast248 + 16, basealign 32)
VST1q64 killed $r6, 16, $q5, 14 /* CC::al */, $noreg :: (store (s128) into %ir.vvv..sroa_cast230 + 16, basealign 32)
$q8 = VLDMQIA %stack.0, 14 /* CC::al */, $noreg :: (load (s128) from %stack.0)
diff --git a/llvm/test/CodeGen/Thumb2/swp-exitbranchdir.mir b/llvm/test/CodeGen/Thumb2/swp-exitbranchdir.mir
index ba10045..20f044a 100644
--- a/llvm/test/CodeGen/Thumb2/swp-exitbranchdir.mir
+++ b/llvm/test/CodeGen/Thumb2/swp-exitbranchdir.mir
@@ -83,7 +83,7 @@ body: |
; CHECK-NEXT: [[VLDRS2:%[0-9]+]]:spr = VLDRS [[COPY4]], 1, 14 /* CC::al */, $noreg :: (load (s32) from %ir.scevgep7)
; CHECK-NEXT: [[t2ADDri1:%[0-9]+]]:rgpr = t2ADDri [[COPY3]], 4, 14 /* CC::al */, $noreg, $noreg
; CHECK-NEXT: [[VLDRS3:%[0-9]+]]:spr = VLDRS [[COPY3]], 1, 14 /* CC::al */, $noreg :: (load (s32) from %ir.scevgep3)
- ; CHECK-NEXT: [[VMULS:%[0-9]+]]:spr = nnan ninf nsz arcp contract afn reassoc VMULS [[VLDRS3]], [[VLDRS2]], 14 /* CC::al */, $noreg
+ ; CHECK-NEXT: [[VMULS:%[0-9]+]]:spr = nnan ninf nsz arcp contract afn reassoc VMULS [[VLDRS3]], [[VLDRS2]], 14 /* CC::al */, $noreg, implicit $fpscr_rm
; CHECK-NEXT: [[t2SUBri2:%[0-9]+]]:rgpr = t2SUBri [[COPY]], 1, 14 /* CC::al */, $noreg, def $cpsr
; CHECK-NEXT: [[COPY5:%[0-9]+]]:gprnopc = COPY [[t2SUBri2]]
; CHECK-NEXT: [[COPY6:%[0-9]+]]:gprnopc = COPY [[t2ADDri1]]
@@ -98,7 +98,7 @@ body: |
; CHECK-NEXT: [[VLDRS4:%[0-9]+]]:spr = VLDRS [[COPY7]], 1, 14 /* CC::al */, $noreg :: (load unknown-size from %ir.scevgep7, align 4)
; CHECK-NEXT: [[t2ADDri3:%[0-9]+]]:rgpr = t2ADDri [[COPY6]], 4, 14 /* CC::al */, $noreg, $noreg
; CHECK-NEXT: [[VLDRS5:%[0-9]+]]:spr = VLDRS [[COPY6]], 1, 14 /* CC::al */, $noreg :: (load unknown-size from %ir.scevgep3, align 4)
- ; CHECK-NEXT: [[VMULS1:%[0-9]+]]:spr = nnan ninf nsz arcp contract afn reassoc VMULS [[VLDRS5]], [[VLDRS4]], 14 /* CC::al */, $noreg
+ ; CHECK-NEXT: [[VMULS1:%[0-9]+]]:spr = nnan ninf nsz arcp contract afn reassoc VMULS [[VLDRS5]], [[VLDRS4]], 14 /* CC::al */, $noreg, implicit $fpscr_rm
; CHECK-NEXT: [[t2SUBri3:%[0-9]+]]:rgpr = t2SUBri [[COPY5]], 1, 14 /* CC::al */, $noreg, def $cpsr
; CHECK-NEXT: [[COPY8:%[0-9]+]]:gpr = COPY [[t2SUBri3]]
; CHECK-NEXT: [[COPY9:%[0-9]+]]:gpr = COPY [[t2ADDri3]]
@@ -115,7 +115,7 @@ body: |
; CHECK-NEXT: [[PHI3:%[0-9]+]]:spr = PHI [[VLDRS1]], %bb.6, %43, %bb.7
; CHECK-NEXT: [[PHI4:%[0-9]+]]:spr = PHI [[VMULS1]], %bb.6, %52, %bb.7
; CHECK-NEXT: [[PHI5:%[0-9]+]]:spr = PHI [[VMULS]], %bb.6, [[PHI4]], %bb.7
- ; CHECK-NEXT: [[VADDS:%[0-9]+]]:spr = nnan ninf nsz arcp contract afn reassoc VADDS [[PHI5]], [[PHI3]], 14 /* CC::al */, $noreg
+ ; CHECK-NEXT: [[VADDS:%[0-9]+]]:spr = nnan ninf nsz arcp contract afn reassoc VADDS [[PHI5]], [[PHI3]], 14 /* CC::al */, $noreg, implicit $fpscr_rm
; CHECK-NEXT: [[t2SUBri4:%[0-9]+]]:rgpr = t2SUBri [[PHI2]], 1, 14 /* CC::al */, $noreg, def $cpsr
; CHECK-NEXT: [[VLDRS6:%[0-9]+]]:spr = VLDRS [[PHI1]], 1, 14 /* CC::al */, $noreg :: (load unknown-size from %ir.scevgep3, align 4)
; CHECK-NEXT: [[VLDRS7:%[0-9]+]]:spr = VLDRS [[PHI]], 1, 14 /* CC::al */, $noreg :: (load unknown-size from %ir.scevgep7, align 4)
@@ -124,7 +124,7 @@ body: |
; CHECK-NEXT: [[COPY11:%[0-9]+]]:gpr = COPY [[t2ADDri4]]
; CHECK-NEXT: [[COPY12:%[0-9]+]]:gpr = COPY [[t2ADDri5]]
; CHECK-NEXT: [[COPY13:%[0-9]+]]:gpr = COPY [[t2SUBri4]]
- ; CHECK-NEXT: [[VMULS2:%[0-9]+]]:spr = nnan ninf nsz arcp contract afn reassoc VMULS [[VLDRS6]], [[VLDRS7]], 14 /* CC::al */, $noreg
+ ; CHECK-NEXT: [[VMULS2:%[0-9]+]]:spr = nnan ninf nsz arcp contract afn reassoc VMULS [[VLDRS6]], [[VLDRS7]], 14 /* CC::al */, $noreg, implicit $fpscr_rm
; CHECK-NEXT: t2Bcc %bb.8, 0 /* CC::eq */, $cpsr
; CHECK-NEXT: t2B %bb.7, 14 /* CC::al */, $noreg
; CHECK-NEXT: {{ $}}
@@ -134,14 +134,14 @@ body: |
; CHECK-NEXT: [[PHI6:%[0-9]+]]:spr = PHI [[VLDRS1]], %bb.6, [[VADDS]], %bb.7
; CHECK-NEXT: [[PHI7:%[0-9]+]]:spr = PHI [[VMULS1]], %bb.6, [[VMULS2]], %bb.7
; CHECK-NEXT: [[PHI8:%[0-9]+]]:spr = PHI [[VMULS]], %bb.6, [[PHI4]], %bb.7
- ; CHECK-NEXT: [[VADDS1:%[0-9]+]]:spr = nnan ninf nsz arcp contract afn reassoc VADDS [[PHI8]], [[PHI6]], 14 /* CC::al */, $noreg
+ ; CHECK-NEXT: [[VADDS1:%[0-9]+]]:spr = nnan ninf nsz arcp contract afn reassoc VADDS [[PHI8]], [[PHI6]], 14 /* CC::al */, $noreg, implicit $fpscr_rm
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.9:
; CHECK-NEXT: successors: %bb.4(0x80000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[PHI9:%[0-9]+]]:spr = PHI [[VLDRS1]], %bb.5, [[VADDS1]], %bb.8
; CHECK-NEXT: [[PHI10:%[0-9]+]]:spr = PHI [[VMULS]], %bb.5, [[PHI7]], %bb.8
- ; CHECK-NEXT: [[VADDS2:%[0-9]+]]:spr = nnan ninf nsz arcp contract afn reassoc VADDS [[PHI10]], [[PHI9]], 14 /* CC::al */, $noreg
+ ; CHECK-NEXT: [[VADDS2:%[0-9]+]]:spr = nnan ninf nsz arcp contract afn reassoc VADDS [[PHI10]], [[PHI9]], 14 /* CC::al */, $noreg, implicit $fpscr_rm
; CHECK-NEXT: t2B %bb.4, 14 /* CC::al */, $noreg
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.4.for.end:
@@ -185,8 +185,8 @@ body: |
%19:spr = VLDRS %2, 1, 14 /* CC::al */, $noreg :: (load (s32) from %ir.scevgep7)
%20:rgpr = t2ADDri %3, 4, 14 /* CC::al */, $noreg, $noreg
%21:spr = VLDRS %3, 1, 14 /* CC::al */, $noreg :: (load (s32) from %ir.scevgep3)
- %22:spr = nnan ninf nsz arcp contract afn reassoc VMULS killed %21, killed %19, 14 /* CC::al */, $noreg
- %6:spr = nnan ninf nsz arcp contract afn reassoc VADDS killed %22, %5, 14 /* CC::al */, $noreg
+ %22:spr = nnan ninf nsz arcp contract afn reassoc VMULS killed %21, killed %19, 14 /* CC::al */, $noreg, implicit $fpscr_rm
+ %6:spr = nnan ninf nsz arcp contract afn reassoc VADDS killed %22, %5, 14 /* CC::al */, $noreg, implicit $fpscr_rm
%23:rgpr = t2SUBri %4, 1, 14 /* CC::al */, $noreg, def $cpsr
%7:gpr = COPY %23
%8:gpr = COPY %20
diff --git a/llvm/test/CodeGen/Thumb2/swp-fixedii-le.mir b/llvm/test/CodeGen/Thumb2/swp-fixedii-le.mir
index 854c5b8..177c94e 100644
--- a/llvm/test/CodeGen/Thumb2/swp-fixedii-le.mir
+++ b/llvm/test/CodeGen/Thumb2/swp-fixedii-le.mir
@@ -84,7 +84,7 @@ body: |
; CHECK-NEXT: [[VLDRS2:%[0-9]+]]:spr = VLDRS [[COPY4]], 1, 14 /* CC::al */, $noreg :: (load (s32) from %ir.scevgep7)
; CHECK-NEXT: [[t2ADDri1:%[0-9]+]]:rgpr = t2ADDri [[COPY3]], 4, 14 /* CC::al */, $noreg, $noreg
; CHECK-NEXT: [[VLDRS3:%[0-9]+]]:spr = VLDRS [[COPY3]], 1, 14 /* CC::al */, $noreg :: (load (s32) from %ir.scevgep3)
- ; CHECK-NEXT: [[VMULS:%[0-9]+]]:spr = nnan ninf nsz arcp contract afn reassoc VMULS [[VLDRS3]], [[VLDRS2]], 14 /* CC::al */, $noreg
+ ; CHECK-NEXT: [[VMULS:%[0-9]+]]:spr = nnan ninf nsz arcp contract afn reassoc VMULS [[VLDRS3]], [[VLDRS2]], 14 /* CC::al */, $noreg, implicit $fpscr_rm
; CHECK-NEXT: [[COPY5:%[0-9]+]]:gprlr = COPY [[t2DoLoopStart]]
; CHECK-NEXT: [[t2LoopDec:%[0-9]+]]:gprlr = t2LoopDec [[COPY5]], 1
; CHECK-NEXT: [[COPY6:%[0-9]+]]:gpr = COPY [[t2LoopDec]]
@@ -110,8 +110,8 @@ body: |
; CHECK-NEXT: [[t2ADDri3:%[0-9]+]]:rgpr = t2ADDri [[PHI1]], 4, 14 /* CC::al */, $noreg, $noreg
; CHECK-NEXT: [[COPY10:%[0-9]+]]:gpr = COPY [[t2ADDri2]]
; CHECK-NEXT: [[COPY11:%[0-9]+]]:gpr = COPY [[t2ADDri3]]
- ; CHECK-NEXT: [[VMULS1:%[0-9]+]]:spr = nnan ninf nsz arcp contract afn reassoc VMULS [[VLDRS4]], [[VLDRS5]], 14 /* CC::al */, $noreg
- ; CHECK-NEXT: [[VADDS:%[0-9]+]]:spr = nnan ninf nsz arcp contract afn reassoc VADDS [[PHI4]], [[PHI3]], 14 /* CC::al */, $noreg
+ ; CHECK-NEXT: [[VMULS1:%[0-9]+]]:spr = nnan ninf nsz arcp contract afn reassoc VMULS [[VLDRS4]], [[VLDRS5]], 14 /* CC::al */, $noreg, implicit $fpscr_rm
+ ; CHECK-NEXT: [[VADDS:%[0-9]+]]:spr = nnan ninf nsz arcp contract afn reassoc VADDS [[PHI4]], [[PHI3]], 14 /* CC::al */, $noreg, implicit $fpscr_rm
; CHECK-NEXT: [[COPY12:%[0-9]+]]:gpr = COPY [[t2LoopDec1]]
; CHECK-NEXT: t2LoopEnd [[t2LoopDec1]], %bb.6, implicit-def $cpsr
; CHECK-NEXT: t2B %bb.7, 14 /* CC::al */, $noreg
@@ -121,7 +121,7 @@ body: |
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[PHI5:%[0-9]+]]:spr = PHI [[VLDRS1]], %bb.5, [[VADDS]], %bb.6
; CHECK-NEXT: [[PHI6:%[0-9]+]]:spr = PHI [[VMULS]], %bb.5, [[VMULS1]], %bb.6
- ; CHECK-NEXT: [[VADDS1:%[0-9]+]]:spr = nnan ninf nsz arcp contract afn reassoc VADDS [[PHI6]], [[PHI5]], 14 /* CC::al */, $noreg
+ ; CHECK-NEXT: [[VADDS1:%[0-9]+]]:spr = nnan ninf nsz arcp contract afn reassoc VADDS [[PHI6]], [[PHI5]], 14 /* CC::al */, $noreg, implicit $fpscr_rm
; CHECK-NEXT: t2B %bb.4, 14 /* CC::al */, $noreg
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.4.for.end:
@@ -166,8 +166,8 @@ body: |
%19:spr = VLDRS %2, 1, 14 /* CC::al */, $noreg :: (load (s32) from %ir.scevgep7)
%20:rgpr = t2ADDri %3, 4, 14 /* CC::al */, $noreg, $noreg
%21:spr = VLDRS %3, 1, 14 /* CC::al */, $noreg :: (load (s32) from %ir.scevgep3)
- %22:spr = nnan ninf nsz arcp contract afn reassoc VMULS killed %21, killed %19, 14 /* CC::al */, $noreg
- %6:spr = nnan ninf nsz arcp contract afn reassoc VADDS killed %22, %5, 14 /* CC::al */, $noreg
+ %22:spr = nnan ninf nsz arcp contract afn reassoc VMULS killed %21, killed %19, 14 /* CC::al */, $noreg, implicit $fpscr_rm
+ %6:spr = nnan ninf nsz arcp contract afn reassoc VADDS killed %22, %5, 14 /* CC::al */, $noreg, implicit $fpscr_rm
%42:gprlr = COPY %4
%23:gprlr = t2LoopDec %42:gprlr, 1
%7:gpr = COPY %23
diff --git a/llvm/test/CodeGen/Thumb2/swp-fixedii.mir b/llvm/test/CodeGen/Thumb2/swp-fixedii.mir
index dd02703..7939717 100644
--- a/llvm/test/CodeGen/Thumb2/swp-fixedii.mir
+++ b/llvm/test/CodeGen/Thumb2/swp-fixedii.mir
@@ -83,7 +83,7 @@ body: |
; CHECK-NEXT: [[VLDRS2:%[0-9]+]]:spr = VLDRS [[COPY4]], 1, 14 /* CC::al */, $noreg :: (load (s32) from %ir.scevgep7)
; CHECK-NEXT: [[t2ADDri1:%[0-9]+]]:rgpr = t2ADDri [[COPY3]], 4, 14 /* CC::al */, $noreg, $noreg
; CHECK-NEXT: [[VLDRS3:%[0-9]+]]:spr = VLDRS [[COPY3]], 1, 14 /* CC::al */, $noreg :: (load (s32) from %ir.scevgep3)
- ; CHECK-NEXT: [[VMULS:%[0-9]+]]:spr = nnan ninf nsz arcp contract afn reassoc VMULS [[VLDRS3]], [[VLDRS2]], 14 /* CC::al */, $noreg
+ ; CHECK-NEXT: [[VMULS:%[0-9]+]]:spr = nnan ninf nsz arcp contract afn reassoc VMULS [[VLDRS3]], [[VLDRS2]], 14 /* CC::al */, $noreg, implicit $fpscr_rm
; CHECK-NEXT: [[t2SUBri2:%[0-9]+]]:rgpr = t2SUBri [[COPY]], 1, 14 /* CC::al */, $noreg, def $cpsr
; CHECK-NEXT: [[COPY5:%[0-9]+]]:gprnopc = COPY [[t2SUBri2]]
; CHECK-NEXT: [[COPY6:%[0-9]+]]:gprnopc = COPY [[t2ADDri1]]
@@ -98,7 +98,7 @@ body: |
; CHECK-NEXT: [[VLDRS4:%[0-9]+]]:spr = VLDRS [[COPY7]], 1, 14 /* CC::al */, $noreg :: (load unknown-size from %ir.scevgep7, align 4)
; CHECK-NEXT: [[t2ADDri3:%[0-9]+]]:rgpr = t2ADDri [[COPY6]], 4, 14 /* CC::al */, $noreg, $noreg
; CHECK-NEXT: [[VLDRS5:%[0-9]+]]:spr = VLDRS [[COPY6]], 1, 14 /* CC::al */, $noreg :: (load unknown-size from %ir.scevgep3, align 4)
- ; CHECK-NEXT: [[VMULS1:%[0-9]+]]:spr = nnan ninf nsz arcp contract afn reassoc VMULS [[VLDRS5]], [[VLDRS4]], 14 /* CC::al */, $noreg
+ ; CHECK-NEXT: [[VMULS1:%[0-9]+]]:spr = nnan ninf nsz arcp contract afn reassoc VMULS [[VLDRS5]], [[VLDRS4]], 14 /* CC::al */, $noreg, implicit $fpscr_rm
; CHECK-NEXT: [[t2SUBri3:%[0-9]+]]:rgpr = t2SUBri [[COPY5]], 1, 14 /* CC::al */, $noreg, def $cpsr
; CHECK-NEXT: [[COPY8:%[0-9]+]]:gpr = COPY [[t2SUBri3]]
; CHECK-NEXT: [[COPY9:%[0-9]+]]:gpr = COPY [[t2ADDri3]]
@@ -115,7 +115,7 @@ body: |
; CHECK-NEXT: [[PHI3:%[0-9]+]]:spr = PHI [[VLDRS1]], %bb.6, %43, %bb.7
; CHECK-NEXT: [[PHI4:%[0-9]+]]:spr = PHI [[VMULS1]], %bb.6, %52, %bb.7
; CHECK-NEXT: [[PHI5:%[0-9]+]]:spr = PHI [[VMULS]], %bb.6, [[PHI4]], %bb.7
- ; CHECK-NEXT: [[VADDS:%[0-9]+]]:spr = nnan ninf nsz arcp contract afn reassoc VADDS [[PHI5]], [[PHI3]], 14 /* CC::al */, $noreg
+ ; CHECK-NEXT: [[VADDS:%[0-9]+]]:spr = nnan ninf nsz arcp contract afn reassoc VADDS [[PHI5]], [[PHI3]], 14 /* CC::al */, $noreg, implicit $fpscr_rm
; CHECK-NEXT: [[t2SUBri4:%[0-9]+]]:rgpr = t2SUBri [[PHI2]], 1, 14 /* CC::al */, $noreg, def $cpsr
; CHECK-NEXT: [[VLDRS6:%[0-9]+]]:spr = VLDRS [[PHI1]], 1, 14 /* CC::al */, $noreg :: (load unknown-size from %ir.scevgep3, align 4)
; CHECK-NEXT: [[VLDRS7:%[0-9]+]]:spr = VLDRS [[PHI]], 1, 14 /* CC::al */, $noreg :: (load unknown-size from %ir.scevgep7, align 4)
@@ -124,7 +124,7 @@ body: |
; CHECK-NEXT: [[COPY11:%[0-9]+]]:gpr = COPY [[t2ADDri4]]
; CHECK-NEXT: [[COPY12:%[0-9]+]]:gpr = COPY [[t2ADDri5]]
; CHECK-NEXT: [[COPY13:%[0-9]+]]:gpr = COPY [[t2SUBri4]]
- ; CHECK-NEXT: [[VMULS2:%[0-9]+]]:spr = nnan ninf nsz arcp contract afn reassoc VMULS [[VLDRS6]], [[VLDRS7]], 14 /* CC::al */, $noreg
+ ; CHECK-NEXT: [[VMULS2:%[0-9]+]]:spr = nnan ninf nsz arcp contract afn reassoc VMULS [[VLDRS6]], [[VLDRS7]], 14 /* CC::al */, $noreg, implicit $fpscr_rm
; CHECK-NEXT: t2Bcc %bb.7, 1 /* CC::ne */, $cpsr
; CHECK-NEXT: t2B %bb.8, 14 /* CC::al */, $noreg
; CHECK-NEXT: {{ $}}
@@ -134,14 +134,14 @@ body: |
; CHECK-NEXT: [[PHI6:%[0-9]+]]:spr = PHI [[VLDRS1]], %bb.6, [[VADDS]], %bb.7
; CHECK-NEXT: [[PHI7:%[0-9]+]]:spr = PHI [[VMULS1]], %bb.6, [[VMULS2]], %bb.7
; CHECK-NEXT: [[PHI8:%[0-9]+]]:spr = PHI [[VMULS]], %bb.6, [[PHI4]], %bb.7
- ; CHECK-NEXT: [[VADDS1:%[0-9]+]]:spr = nnan ninf nsz arcp contract afn reassoc VADDS [[PHI8]], [[PHI6]], 14 /* CC::al */, $noreg
+ ; CHECK-NEXT: [[VADDS1:%[0-9]+]]:spr = nnan ninf nsz arcp contract afn reassoc VADDS [[PHI8]], [[PHI6]], 14 /* CC::al */, $noreg, implicit $fpscr_rm
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.9:
; CHECK-NEXT: successors: %bb.4(0x80000000)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[PHI9:%[0-9]+]]:spr = PHI [[VLDRS1]], %bb.5, [[VADDS1]], %bb.8
; CHECK-NEXT: [[PHI10:%[0-9]+]]:spr = PHI [[VMULS]], %bb.5, [[PHI7]], %bb.8
- ; CHECK-NEXT: [[VADDS2:%[0-9]+]]:spr = nnan ninf nsz arcp contract afn reassoc VADDS [[PHI10]], [[PHI9]], 14 /* CC::al */, $noreg
+ ; CHECK-NEXT: [[VADDS2:%[0-9]+]]:spr = nnan ninf nsz arcp contract afn reassoc VADDS [[PHI10]], [[PHI9]], 14 /* CC::al */, $noreg, implicit $fpscr_rm
; CHECK-NEXT: t2B %bb.4, 14 /* CC::al */, $noreg
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.4.for.end:
@@ -185,8 +185,8 @@ body: |
%19:spr = VLDRS %2, 1, 14 /* CC::al */, $noreg :: (load (s32) from %ir.scevgep7)
%20:rgpr = t2ADDri %3, 4, 14 /* CC::al */, $noreg, $noreg
%21:spr = VLDRS %3, 1, 14 /* CC::al */, $noreg :: (load (s32) from %ir.scevgep3)
- %22:spr = nnan ninf nsz arcp contract afn reassoc VMULS killed %21, killed %19, 14 /* CC::al */, $noreg
- %6:spr = nnan ninf nsz arcp contract afn reassoc VADDS killed %22, %5, 14 /* CC::al */, $noreg
+ %22:spr = nnan ninf nsz arcp contract afn reassoc VMULS killed %21, killed %19, 14 /* CC::al */, $noreg, implicit $fpscr_rm
+ %6:spr = nnan ninf nsz arcp contract afn reassoc VADDS killed %22, %5, 14 /* CC::al */, $noreg, implicit $fpscr_rm
%23:rgpr = t2SUBri %4, 1, 14 /* CC::al */, $noreg, def $cpsr
%7:gpr = COPY %23
%8:gpr = COPY %20
diff --git a/llvm/test/CodeGen/Thumb2/swp-regpressure.mir b/llvm/test/CodeGen/Thumb2/swp-regpressure.mir
index 2bcb0c9..955b53df 100644
--- a/llvm/test/CodeGen/Thumb2/swp-regpressure.mir
+++ b/llvm/test/CodeGen/Thumb2/swp-regpressure.mir
@@ -148,8 +148,8 @@ body: |
; CHECK-NEXT: [[VLDRS2:%[0-9]+]]:spr = VLDRS [[PHI]], 1, 14 /* CC::al */, $noreg :: (load (s32) from %ir.scevgep7)
; CHECK-NEXT: [[t2ADDri1:%[0-9]+]]:rgpr = t2ADDri [[PHI1]], 4, 14 /* CC::al */, $noreg, $noreg
; CHECK-NEXT: [[VLDRS3:%[0-9]+]]:spr = VLDRS [[PHI1]], 1, 14 /* CC::al */, $noreg :: (load (s32) from %ir.scevgep3)
- ; CHECK-NEXT: [[VMULS:%[0-9]+]]:spr = nnan ninf nsz arcp contract afn reassoc VMULS [[VLDRS3]], [[VLDRS2]], 14 /* CC::al */, $noreg
- ; CHECK-NEXT: [[VADDS:%[0-9]+]]:spr = nnan ninf nsz arcp contract afn reassoc VADDS [[VMULS]], [[PHI3]], 14 /* CC::al */, $noreg
+ ; CHECK-NEXT: [[VMULS:%[0-9]+]]:spr = nnan ninf nsz arcp contract afn reassoc VMULS [[VLDRS3]], [[VLDRS2]], 14 /* CC::al */, $noreg, implicit $fpscr_rm
+ ; CHECK-NEXT: [[VADDS:%[0-9]+]]:spr = nnan ninf nsz arcp contract afn reassoc VADDS [[VMULS]], [[PHI3]], 14 /* CC::al */, $noreg, implicit $fpscr_rm
; CHECK-NEXT: [[t2SUBri2:%[0-9]+]]:rgpr = t2SUBri [[PHI2]], 1, 14 /* CC::al */, $noreg, def $cpsr
; CHECK-NEXT: [[COPY5:%[0-9]+]]:gpr = COPY [[t2SUBri2]]
; CHECK-NEXT: [[COPY6:%[0-9]+]]:gpr = COPY [[t2ADDri1]]
@@ -236,8 +236,8 @@ body: |
%19:spr = VLDRS %2, 1, 14 /* CC::al */, $noreg :: (load (s32) from %ir.scevgep7)
%20:rgpr = t2ADDri %3, 4, 14 /* CC::al */, $noreg, $noreg
%21:spr = VLDRS %3, 1, 14 /* CC::al */, $noreg :: (load (s32) from %ir.scevgep3)
- %22:spr = nnan ninf nsz arcp contract afn reassoc VMULS killed %21, killed %19, 14 /* CC::al */, $noreg
- %6:spr = nnan ninf nsz arcp contract afn reassoc VADDS killed %22, %5, 14 /* CC::al */, $noreg
+ %22:spr = nnan ninf nsz arcp contract afn reassoc VMULS killed %21, killed %19, 14 /* CC::al */, $noreg, implicit $fpscr_rm
+ %6:spr = nnan ninf nsz arcp contract afn reassoc VADDS killed %22, %5, 14 /* CC::al */, $noreg, implicit $fpscr_rm
%23:rgpr = t2SUBri %4, 1, 14 /* CC::al */, $noreg, def $cpsr
%7:gpr = COPY %23
%8:gpr = COPY %20
@@ -314,24 +314,24 @@ body: |
; CHECK-NEXT: [[t2SUBri2:%[0-9]+]]:rgpr = t2SUBri [[COPY]], 1, 14 /* CC::al */, $noreg, def $cpsr
; CHECK-NEXT: [[COPY5:%[0-9]+]]:gprnopc = COPY [[t2SUBri2]]
; CHECK-NEXT: [[COPY6:%[0-9]+]]:rgpr = COPY [[COPY4]]
- ; CHECK-NEXT: dead %66:rgpr = COPY [[COPY4]]
- ; CHECK-NEXT: dead %67:rgpr = COPY [[COPY4]]
- ; CHECK-NEXT: dead %68:rgpr = COPY [[COPY4]]
- ; CHECK-NEXT: dead %69:rgpr = COPY [[COPY4]]
- ; CHECK-NEXT: dead %70:rgpr = COPY [[COPY4]]
- ; CHECK-NEXT: dead %71:rgpr = COPY [[COPY4]]
- ; CHECK-NEXT: dead %72:rgpr = COPY [[COPY4]]
- ; CHECK-NEXT: dead %73:rgpr = COPY [[COPY4]]
- ; CHECK-NEXT: dead %74:rgpr = COPY [[COPY4]]
- ; CHECK-NEXT: dead %75:rgpr = COPY [[COPY4]]
- ; CHECK-NEXT: dead %76:rgpr = COPY [[COPY4]]
- ; CHECK-NEXT: dead %77:rgpr = COPY [[COPY4]]
- ; CHECK-NEXT: dead %78:rgpr = COPY [[COPY4]]
- ; CHECK-NEXT: dead %79:rgpr = COPY [[COPY4]]
- ; CHECK-NEXT: dead %80:rgpr = COPY [[COPY4]]
- ; CHECK-NEXT: dead %81:rgpr = COPY [[COPY4]]
- ; CHECK-NEXT: dead %82:rgpr = COPY [[COPY4]]
- ; CHECK-NEXT: dead %83:rgpr = COPY [[COPY4]]
+ ; CHECK-NEXT: dead [[COPY7:%[0-9]+]]:rgpr = COPY [[COPY4]]
+ ; CHECK-NEXT: dead [[COPY8:%[0-9]+]]:rgpr = COPY [[COPY4]]
+ ; CHECK-NEXT: dead [[COPY9:%[0-9]+]]:rgpr = COPY [[COPY4]]
+ ; CHECK-NEXT: dead [[COPY10:%[0-9]+]]:rgpr = COPY [[COPY4]]
+ ; CHECK-NEXT: dead [[COPY11:%[0-9]+]]:rgpr = COPY [[COPY4]]
+ ; CHECK-NEXT: dead [[COPY12:%[0-9]+]]:rgpr = COPY [[COPY4]]
+ ; CHECK-NEXT: dead [[COPY13:%[0-9]+]]:rgpr = COPY [[COPY4]]
+ ; CHECK-NEXT: dead [[COPY14:%[0-9]+]]:rgpr = COPY [[COPY4]]
+ ; CHECK-NEXT: dead [[COPY15:%[0-9]+]]:rgpr = COPY [[COPY4]]
+ ; CHECK-NEXT: dead [[COPY16:%[0-9]+]]:rgpr = COPY [[COPY4]]
+ ; CHECK-NEXT: dead [[COPY17:%[0-9]+]]:rgpr = COPY [[COPY4]]
+ ; CHECK-NEXT: dead [[COPY18:%[0-9]+]]:rgpr = COPY [[COPY4]]
+ ; CHECK-NEXT: dead [[COPY19:%[0-9]+]]:rgpr = COPY [[COPY4]]
+ ; CHECK-NEXT: dead [[COPY20:%[0-9]+]]:rgpr = COPY [[COPY4]]
+ ; CHECK-NEXT: dead [[COPY21:%[0-9]+]]:rgpr = COPY [[COPY4]]
+ ; CHECK-NEXT: dead [[COPY22:%[0-9]+]]:rgpr = COPY [[COPY4]]
+ ; CHECK-NEXT: dead [[COPY23:%[0-9]+]]:rgpr = COPY [[COPY4]]
+ ; CHECK-NEXT: dead [[COPY24:%[0-9]+]]:rgpr = COPY [[COPY4]]
; CHECK-NEXT: t2Bcc %bb.9, 0 /* CC::eq */, $cpsr
; CHECK-NEXT: t2B %bb.6, 14 /* CC::al */, $noreg
; CHECK-NEXT: {{ $}}
@@ -342,82 +342,82 @@ body: |
; CHECK-NEXT: [[VLDRS2:%[0-9]+]]:spr = VLDRS [[COPY4]], 1, 14 /* CC::al */, $noreg :: (load (s32) from %ir.scevgep7)
; CHECK-NEXT: [[t2ADDri1:%[0-9]+]]:rgpr = t2ADDri [[COPY3]], 4, 14 /* CC::al */, $noreg, $noreg
; CHECK-NEXT: [[VLDRS3:%[0-9]+]]:spr = VLDRS [[COPY3]], 1, 14 /* CC::al */, $noreg :: (load (s32) from %ir.scevgep3)
- ; CHECK-NEXT: [[VMULS:%[0-9]+]]:spr = nnan ninf nsz arcp contract afn reassoc VMULS [[VLDRS3]], [[VLDRS2]], 14 /* CC::al */, $noreg
- ; CHECK-NEXT: [[COPY7:%[0-9]+]]:gpr = COPY [[t2ADDri1]]
- ; CHECK-NEXT: [[COPY8:%[0-9]+]]:gpr = COPY [[t2ADDri]]
+ ; CHECK-NEXT: [[VMULS:%[0-9]+]]:spr = nnan ninf nsz arcp contract afn reassoc VMULS [[VLDRS3]], [[VLDRS2]], 14 /* CC::al */, $noreg, implicit $fpscr_rm
+ ; CHECK-NEXT: [[COPY25:%[0-9]+]]:gpr = COPY [[t2ADDri1]]
+ ; CHECK-NEXT: [[COPY26:%[0-9]+]]:gpr = COPY [[t2ADDri]]
; CHECK-NEXT: [[t2SUBri3:%[0-9]+]]:rgpr = t2SUBri [[COPY5]], 1, 14 /* CC::al */, $noreg, def $cpsr
- ; CHECK-NEXT: [[COPY9:%[0-9]+]]:gpr = COPY [[t2SUBri3]]
- ; CHECK-NEXT: [[COPY10:%[0-9]+]]:rgpr = COPY [[COPY6]]
- ; CHECK-NEXT: dead %94:rgpr = COPY [[COPY6]]
- ; CHECK-NEXT: dead %95:rgpr = COPY [[COPY6]]
- ; CHECK-NEXT: dead %96:rgpr = COPY [[COPY6]]
- ; CHECK-NEXT: dead %97:rgpr = COPY [[COPY6]]
- ; CHECK-NEXT: dead %98:rgpr = COPY [[COPY6]]
- ; CHECK-NEXT: dead %99:rgpr = COPY [[COPY6]]
- ; CHECK-NEXT: dead %100:rgpr = COPY [[COPY6]]
- ; CHECK-NEXT: dead %101:rgpr = COPY [[COPY6]]
- ; CHECK-NEXT: dead %102:rgpr = COPY [[COPY6]]
- ; CHECK-NEXT: dead %103:rgpr = COPY [[COPY6]]
- ; CHECK-NEXT: dead %104:rgpr = COPY [[COPY6]]
- ; CHECK-NEXT: dead %105:rgpr = COPY [[COPY6]]
- ; CHECK-NEXT: dead %106:rgpr = COPY [[COPY6]]
- ; CHECK-NEXT: dead %107:rgpr = COPY [[COPY6]]
- ; CHECK-NEXT: dead %108:rgpr = COPY [[COPY6]]
- ; CHECK-NEXT: dead %109:rgpr = COPY [[COPY6]]
- ; CHECK-NEXT: dead %110:rgpr = COPY [[COPY6]]
- ; CHECK-NEXT: dead %111:rgpr = COPY [[COPY6]]
+ ; CHECK-NEXT: [[COPY27:%[0-9]+]]:gpr = COPY [[t2SUBri3]]
+ ; CHECK-NEXT: [[COPY28:%[0-9]+]]:rgpr = COPY [[COPY6]]
+ ; CHECK-NEXT: dead [[COPY29:%[0-9]+]]:rgpr = COPY [[COPY6]]
+ ; CHECK-NEXT: dead [[COPY30:%[0-9]+]]:rgpr = COPY [[COPY6]]
+ ; CHECK-NEXT: dead [[COPY31:%[0-9]+]]:rgpr = COPY [[COPY6]]
+ ; CHECK-NEXT: dead [[COPY32:%[0-9]+]]:rgpr = COPY [[COPY6]]
+ ; CHECK-NEXT: dead [[COPY33:%[0-9]+]]:rgpr = COPY [[COPY6]]
+ ; CHECK-NEXT: dead [[COPY34:%[0-9]+]]:rgpr = COPY [[COPY6]]
+ ; CHECK-NEXT: dead [[COPY35:%[0-9]+]]:rgpr = COPY [[COPY6]]
+ ; CHECK-NEXT: dead [[COPY36:%[0-9]+]]:rgpr = COPY [[COPY6]]
+ ; CHECK-NEXT: dead [[COPY37:%[0-9]+]]:rgpr = COPY [[COPY6]]
+ ; CHECK-NEXT: dead [[COPY38:%[0-9]+]]:rgpr = COPY [[COPY6]]
+ ; CHECK-NEXT: dead [[COPY39:%[0-9]+]]:rgpr = COPY [[COPY6]]
+ ; CHECK-NEXT: dead [[COPY40:%[0-9]+]]:rgpr = COPY [[COPY6]]
+ ; CHECK-NEXT: dead [[COPY41:%[0-9]+]]:rgpr = COPY [[COPY6]]
+ ; CHECK-NEXT: dead [[COPY42:%[0-9]+]]:rgpr = COPY [[COPY6]]
+ ; CHECK-NEXT: dead [[COPY43:%[0-9]+]]:rgpr = COPY [[COPY6]]
+ ; CHECK-NEXT: dead [[COPY44:%[0-9]+]]:rgpr = COPY [[COPY6]]
+ ; CHECK-NEXT: dead [[COPY45:%[0-9]+]]:rgpr = COPY [[COPY6]]
+ ; CHECK-NEXT: dead [[COPY46:%[0-9]+]]:rgpr = COPY [[COPY6]]
; CHECK-NEXT: t2Bcc %bb.8, 0 /* CC::eq */, $cpsr
; CHECK-NEXT: t2B %bb.7, 14 /* CC::al */, $noreg
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.7.for.body:
; CHECK-NEXT: successors: %bb.8(0x04000000), %bb.7(0x7c000000)
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: [[PHI:%[0-9]+]]:gprnopc = PHI [[COPY8]], %bb.6, %116, %bb.7
- ; CHECK-NEXT: [[PHI1:%[0-9]+]]:gprnopc = PHI [[COPY7]], %bb.6, %117, %bb.7
- ; CHECK-NEXT: [[PHI2:%[0-9]+]]:gprnopc = PHI [[COPY9]], %bb.6, %140, %bb.7
+ ; CHECK-NEXT: [[PHI:%[0-9]+]]:gprnopc = PHI [[COPY26]], %bb.6, %116, %bb.7
+ ; CHECK-NEXT: [[PHI1:%[0-9]+]]:gprnopc = PHI [[COPY25]], %bb.6, %117, %bb.7
+ ; CHECK-NEXT: [[PHI2:%[0-9]+]]:gprnopc = PHI [[COPY27]], %bb.6, %140, %bb.7
; CHECK-NEXT: [[PHI3:%[0-9]+]]:spr = PHI [[VLDRS1]], %bb.6, %137, %bb.7
- ; CHECK-NEXT: [[PHI4:%[0-9]+]]:gprnopc = PHI [[COPY10]], %bb.6, %139, %bb.7
+ ; CHECK-NEXT: [[PHI4:%[0-9]+]]:gprnopc = PHI [[COPY28]], %bb.6, %139, %bb.7
; CHECK-NEXT: [[PHI5:%[0-9]+]]:spr = PHI [[VMULS]], %bb.6, %118, %bb.7
; CHECK-NEXT: [[VLDRS4:%[0-9]+]]:spr = VLDRS [[PHI1]], 1, 14 /* CC::al */, $noreg :: (load unknown-size from %ir.scevgep3, align 4)
; CHECK-NEXT: [[VLDRS5:%[0-9]+]]:spr = VLDRS [[PHI]], 1, 14 /* CC::al */, $noreg :: (load unknown-size from %ir.scevgep7, align 4)
; CHECK-NEXT: [[t2ADDri2:%[0-9]+]]:rgpr = t2ADDri [[PHI]], 4, 14 /* CC::al */, $noreg, $noreg
; CHECK-NEXT: [[t2ADDri3:%[0-9]+]]:rgpr = t2ADDri [[PHI1]], 4, 14 /* CC::al */, $noreg, $noreg
- ; CHECK-NEXT: [[COPY11:%[0-9]+]]:gpr = COPY [[t2ADDri2]]
- ; CHECK-NEXT: [[COPY12:%[0-9]+]]:gpr = COPY [[t2ADDri3]]
- ; CHECK-NEXT: [[VMULS1:%[0-9]+]]:spr = nnan ninf nsz arcp contract afn reassoc VMULS [[VLDRS4]], [[VLDRS5]], 14 /* CC::al */, $noreg
- ; CHECK-NEXT: dead %119:rgpr = COPY [[PHI4]]
- ; CHECK-NEXT: dead %120:rgpr = COPY [[PHI4]]
- ; CHECK-NEXT: dead %121:rgpr = COPY [[PHI4]]
- ; CHECK-NEXT: dead %122:rgpr = COPY [[PHI4]]
- ; CHECK-NEXT: dead %123:rgpr = COPY [[PHI4]]
- ; CHECK-NEXT: dead %124:rgpr = COPY [[PHI4]]
- ; CHECK-NEXT: dead %125:rgpr = COPY [[PHI4]]
- ; CHECK-NEXT: dead %126:rgpr = COPY [[PHI4]]
- ; CHECK-NEXT: dead %127:rgpr = COPY [[PHI4]]
- ; CHECK-NEXT: dead %128:rgpr = COPY [[PHI4]]
- ; CHECK-NEXT: dead %129:rgpr = COPY [[PHI4]]
- ; CHECK-NEXT: dead %130:rgpr = COPY [[PHI4]]
- ; CHECK-NEXT: dead %131:rgpr = COPY [[PHI4]]
- ; CHECK-NEXT: dead %132:rgpr = COPY [[PHI4]]
- ; CHECK-NEXT: dead %133:rgpr = COPY [[PHI4]]
- ; CHECK-NEXT: dead %134:rgpr = COPY [[PHI4]]
- ; CHECK-NEXT: dead %135:rgpr = COPY [[PHI4]]
- ; CHECK-NEXT: dead %136:rgpr = COPY [[PHI4]]
- ; CHECK-NEXT: [[VADDS:%[0-9]+]]:spr = nnan ninf nsz arcp contract afn reassoc VADDS [[PHI5]], [[PHI3]], 14 /* CC::al */, $noreg
+ ; CHECK-NEXT: [[COPY47:%[0-9]+]]:gpr = COPY [[t2ADDri2]]
+ ; CHECK-NEXT: [[COPY48:%[0-9]+]]:gpr = COPY [[t2ADDri3]]
+ ; CHECK-NEXT: [[VMULS1:%[0-9]+]]:spr = nnan ninf nsz arcp contract afn reassoc VMULS [[VLDRS4]], [[VLDRS5]], 14 /* CC::al */, $noreg, implicit $fpscr_rm
+ ; CHECK-NEXT: dead [[COPY49:%[0-9]+]]:rgpr = COPY [[PHI4]]
+ ; CHECK-NEXT: dead [[COPY50:%[0-9]+]]:rgpr = COPY [[PHI4]]
+ ; CHECK-NEXT: dead [[COPY51:%[0-9]+]]:rgpr = COPY [[PHI4]]
+ ; CHECK-NEXT: dead [[COPY52:%[0-9]+]]:rgpr = COPY [[PHI4]]
+ ; CHECK-NEXT: dead [[COPY53:%[0-9]+]]:rgpr = COPY [[PHI4]]
+ ; CHECK-NEXT: dead [[COPY54:%[0-9]+]]:rgpr = COPY [[PHI4]]
+ ; CHECK-NEXT: dead [[COPY55:%[0-9]+]]:rgpr = COPY [[PHI4]]
+ ; CHECK-NEXT: dead [[COPY56:%[0-9]+]]:rgpr = COPY [[PHI4]]
+ ; CHECK-NEXT: dead [[COPY57:%[0-9]+]]:rgpr = COPY [[PHI4]]
+ ; CHECK-NEXT: dead [[COPY58:%[0-9]+]]:rgpr = COPY [[PHI4]]
+ ; CHECK-NEXT: dead [[COPY59:%[0-9]+]]:rgpr = COPY [[PHI4]]
+ ; CHECK-NEXT: dead [[COPY60:%[0-9]+]]:rgpr = COPY [[PHI4]]
+ ; CHECK-NEXT: dead [[COPY61:%[0-9]+]]:rgpr = COPY [[PHI4]]
+ ; CHECK-NEXT: dead [[COPY62:%[0-9]+]]:rgpr = COPY [[PHI4]]
+ ; CHECK-NEXT: dead [[COPY63:%[0-9]+]]:rgpr = COPY [[PHI4]]
+ ; CHECK-NEXT: dead [[COPY64:%[0-9]+]]:rgpr = COPY [[PHI4]]
+ ; CHECK-NEXT: dead [[COPY65:%[0-9]+]]:rgpr = COPY [[PHI4]]
+ ; CHECK-NEXT: dead [[COPY66:%[0-9]+]]:rgpr = COPY [[PHI4]]
+ ; CHECK-NEXT: [[VADDS:%[0-9]+]]:spr = nnan ninf nsz arcp contract afn reassoc VADDS [[PHI5]], [[PHI3]], 14 /* CC::al */, $noreg, implicit $fpscr_rm
; CHECK-NEXT: [[t2SUBri4:%[0-9]+]]:rgpr = t2SUBri [[PHI2]], 1, 14 /* CC::al */, $noreg, def $cpsr
- ; CHECK-NEXT: [[COPY13:%[0-9]+]]:rgpr = COPY [[PHI4]]
- ; CHECK-NEXT: [[COPY14:%[0-9]+]]:gpr = COPY [[t2SUBri4]]
+ ; CHECK-NEXT: [[COPY67:%[0-9]+]]:rgpr = COPY [[PHI4]]
+ ; CHECK-NEXT: [[COPY68:%[0-9]+]]:gpr = COPY [[t2SUBri4]]
; CHECK-NEXT: t2Bcc %bb.7, 1 /* CC::ne */, $cpsr
; CHECK-NEXT: t2B %bb.8, 14 /* CC::al */, $noreg
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.8:
; CHECK-NEXT: successors: %bb.9(0x80000000)
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: [[PHI6:%[0-9]+]]:gprnopc = PHI [[COPY8]], %bb.6, [[COPY11]], %bb.7
- ; CHECK-NEXT: [[PHI7:%[0-9]+]]:gprnopc = PHI [[COPY7]], %bb.6, [[COPY12]], %bb.7
+ ; CHECK-NEXT: [[PHI6:%[0-9]+]]:gprnopc = PHI [[COPY26]], %bb.6, [[COPY47]], %bb.7
+ ; CHECK-NEXT: [[PHI7:%[0-9]+]]:gprnopc = PHI [[COPY25]], %bb.6, [[COPY48]], %bb.7
; CHECK-NEXT: [[PHI8:%[0-9]+]]:spr = PHI [[VLDRS1]], %bb.6, [[VADDS]], %bb.7
; CHECK-NEXT: [[PHI9:%[0-9]+]]:spr = PHI [[VMULS]], %bb.6, [[VMULS1]], %bb.7
- ; CHECK-NEXT: [[VADDS1:%[0-9]+]]:spr = nnan ninf nsz arcp contract afn reassoc VADDS [[PHI9]], [[PHI8]], 14 /* CC::al */, $noreg
+ ; CHECK-NEXT: [[VADDS1:%[0-9]+]]:spr = nnan ninf nsz arcp contract afn reassoc VADDS [[PHI9]], [[PHI8]], 14 /* CC::al */, $noreg, implicit $fpscr_rm
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.9:
; CHECK-NEXT: successors: %bb.4(0x80000000)
@@ -427,8 +427,8 @@ body: |
; CHECK-NEXT: [[PHI12:%[0-9]+]]:spr = PHI [[VLDRS1]], %bb.5, [[VADDS1]], %bb.8
; CHECK-NEXT: [[VLDRS6:%[0-9]+]]:spr = VLDRS [[PHI10]], 1, 14 /* CC::al */, $noreg :: (load unknown-size from %ir.scevgep7, align 4)
; CHECK-NEXT: [[VLDRS7:%[0-9]+]]:spr = VLDRS [[PHI11]], 1, 14 /* CC::al */, $noreg :: (load unknown-size from %ir.scevgep3, align 4)
- ; CHECK-NEXT: [[VMULS2:%[0-9]+]]:spr = nnan ninf nsz arcp contract afn reassoc VMULS [[VLDRS7]], [[VLDRS6]], 14 /* CC::al */, $noreg
- ; CHECK-NEXT: [[VADDS2:%[0-9]+]]:spr = nnan ninf nsz arcp contract afn reassoc VADDS [[VMULS2]], [[PHI12]], 14 /* CC::al */, $noreg
+ ; CHECK-NEXT: [[VMULS2:%[0-9]+]]:spr = nnan ninf nsz arcp contract afn reassoc VMULS [[VLDRS7]], [[VLDRS6]], 14 /* CC::al */, $noreg, implicit $fpscr_rm
+ ; CHECK-NEXT: [[VADDS2:%[0-9]+]]:spr = nnan ninf nsz arcp contract afn reassoc VADDS [[VMULS2]], [[PHI12]], 14 /* CC::al */, $noreg, implicit $fpscr_rm
; CHECK-NEXT: t2B %bb.4, 14 /* CC::al */, $noreg
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.4.for.end:
@@ -491,8 +491,8 @@ body: |
%19:spr = VLDRS %2, 1, 14 /* CC::al */, $noreg :: (load (s32) from %ir.scevgep7)
%20:rgpr = t2ADDri %3, 4, 14 /* CC::al */, $noreg, $noreg
%21:spr = VLDRS %3, 1, 14 /* CC::al */, $noreg :: (load (s32) from %ir.scevgep3)
- %22:spr = nnan ninf nsz arcp contract afn reassoc VMULS killed %21, killed %19, 14 /* CC::al */, $noreg
- %6:spr = nnan ninf nsz arcp contract afn reassoc VADDS killed %22, %5, 14 /* CC::al */, $noreg
+ %22:spr = nnan ninf nsz arcp contract afn reassoc VMULS killed %21, killed %19, 14 /* CC::al */, $noreg, implicit $fpscr_rm
+ %6:spr = nnan ninf nsz arcp contract afn reassoc VADDS killed %22, %5, 14 /* CC::al */, $noreg, implicit $fpscr_rm
%23:rgpr = t2SUBri %4, 1, 14 /* CC::al */, $noreg, def $cpsr
%7:gpr = COPY %23
%8:gpr = COPY %20
diff --git a/llvm/test/CodeGen/WebAssembly/fpclamptosat_vec.ll b/llvm/test/CodeGen/WebAssembly/fpclamptosat_vec.ll
index 52f57dc..a8d37be 100644
--- a/llvm/test/CodeGen/WebAssembly/fpclamptosat_vec.ll
+++ b/llvm/test/CodeGen/WebAssembly/fpclamptosat_vec.ll
@@ -434,7 +434,6 @@ entry:
define <8 x i16> @stest_f16i16(<8 x half> %x) {
; CHECK-LABEL: stest_f16i16:
; CHECK: .functype stest_f16i16 (f32, f32, f32, f32, f32, f32, f32, f32) -> (v128)
-; CHECK-NEXT: .local v128, v128, v128
; CHECK-NEXT: # %bb.0: # %entry
; CHECK-NEXT: local.get 5
; CHECK-NEXT: call __truncsfhf2
@@ -474,15 +473,6 @@ define <8 x i16> @stest_f16i16(<8 x half> %x) {
; CHECK-NEXT: call __extendhfsf2
; CHECK-NEXT: i32.trunc_sat_f32_s
; CHECK-NEXT: i32x4.replace_lane 3
-; CHECK-NEXT: v128.const 32767, 32767, 32767, 32767
-; CHECK-NEXT: local.tee 8
-; CHECK-NEXT: i32x4.min_s
-; CHECK-NEXT: v128.const -32768, -32768, -32768, -32768
-; CHECK-NEXT: local.tee 9
-; CHECK-NEXT: i32x4.max_s
-; CHECK-NEXT: v128.const 65535, 65535, 65535, 65535
-; CHECK-NEXT: local.tee 10
-; CHECK-NEXT: v128.and
; CHECK-NEXT: local.get 4
; CHECK-NEXT: i32.trunc_sat_f32_s
; CHECK-NEXT: i32x4.splat
@@ -495,13 +485,7 @@ define <8 x i16> @stest_f16i16(<8 x half> %x) {
; CHECK-NEXT: local.get 7
; CHECK-NEXT: i32.trunc_sat_f32_s
; CHECK-NEXT: i32x4.replace_lane 3
-; CHECK-NEXT: local.get 8
-; CHECK-NEXT: i32x4.min_s
-; CHECK-NEXT: local.get 9
-; CHECK-NEXT: i32x4.max_s
-; CHECK-NEXT: local.get 10
-; CHECK-NEXT: v128.and
-; CHECK-NEXT: i16x8.narrow_i32x4_u
+; CHECK-NEXT: i16x8.narrow_i32x4_s
; CHECK-NEXT: # fallthrough-return
entry:
%conv = fptosi <8 x half> %x to <8 x i32>
@@ -516,7 +500,6 @@ entry:
define <8 x i16> @utest_f16i16(<8 x half> %x) {
; CHECK-LABEL: utest_f16i16:
; CHECK: .functype utest_f16i16 (f32, f32, f32, f32, f32, f32, f32, f32) -> (v128)
-; CHECK-NEXT: .local v128
; CHECK-NEXT: # %bb.0: # %entry
; CHECK-NEXT: local.get 5
; CHECK-NEXT: call __truncsfhf2
@@ -556,9 +539,6 @@ define <8 x i16> @utest_f16i16(<8 x half> %x) {
; CHECK-NEXT: call __extendhfsf2
; CHECK-NEXT: i32.trunc_sat_f32_u
; CHECK-NEXT: i32x4.replace_lane 3
-; CHECK-NEXT: v128.const 65535, 65535, 65535, 65535
-; CHECK-NEXT: local.tee 8
-; CHECK-NEXT: i32x4.min_u
; CHECK-NEXT: local.get 4
; CHECK-NEXT: i32.trunc_sat_f32_u
; CHECK-NEXT: i32x4.splat
@@ -571,8 +551,6 @@ define <8 x i16> @utest_f16i16(<8 x half> %x) {
; CHECK-NEXT: local.get 7
; CHECK-NEXT: i32.trunc_sat_f32_u
; CHECK-NEXT: i32x4.replace_lane 3
-; CHECK-NEXT: local.get 8
-; CHECK-NEXT: i32x4.min_u
; CHECK-NEXT: i16x8.narrow_i32x4_u
; CHECK-NEXT: # fallthrough-return
entry:
@@ -1861,7 +1839,6 @@ entry:
define <8 x i16> @stest_f16i16_mm(<8 x half> %x) {
; CHECK-LABEL: stest_f16i16_mm:
; CHECK: .functype stest_f16i16_mm (f32, f32, f32, f32, f32, f32, f32, f32) -> (v128)
-; CHECK-NEXT: .local v128, v128, v128
; CHECK-NEXT: # %bb.0: # %entry
; CHECK-NEXT: local.get 5
; CHECK-NEXT: call __truncsfhf2
@@ -1901,15 +1878,6 @@ define <8 x i16> @stest_f16i16_mm(<8 x half> %x) {
; CHECK-NEXT: call __extendhfsf2
; CHECK-NEXT: i32.trunc_sat_f32_s
; CHECK-NEXT: i32x4.replace_lane 3
-; CHECK-NEXT: v128.const 32767, 32767, 32767, 32767
-; CHECK-NEXT: local.tee 8
-; CHECK-NEXT: i32x4.min_s
-; CHECK-NEXT: v128.const -32768, -32768, -32768, -32768
-; CHECK-NEXT: local.tee 9
-; CHECK-NEXT: i32x4.max_s
-; CHECK-NEXT: v128.const 65535, 65535, 65535, 65535
-; CHECK-NEXT: local.tee 10
-; CHECK-NEXT: v128.and
; CHECK-NEXT: local.get 4
; CHECK-NEXT: i32.trunc_sat_f32_s
; CHECK-NEXT: i32x4.splat
@@ -1922,13 +1890,7 @@ define <8 x i16> @stest_f16i16_mm(<8 x half> %x) {
; CHECK-NEXT: local.get 7
; CHECK-NEXT: i32.trunc_sat_f32_s
; CHECK-NEXT: i32x4.replace_lane 3
-; CHECK-NEXT: local.get 8
-; CHECK-NEXT: i32x4.min_s
-; CHECK-NEXT: local.get 9
-; CHECK-NEXT: i32x4.max_s
-; CHECK-NEXT: local.get 10
-; CHECK-NEXT: v128.and
-; CHECK-NEXT: i16x8.narrow_i32x4_u
+; CHECK-NEXT: i16x8.narrow_i32x4_s
; CHECK-NEXT: # fallthrough-return
entry:
%conv = fptosi <8 x half> %x to <8 x i32>
@@ -1941,7 +1903,6 @@ entry:
define <8 x i16> @utest_f16i16_mm(<8 x half> %x) {
; CHECK-LABEL: utest_f16i16_mm:
; CHECK: .functype utest_f16i16_mm (f32, f32, f32, f32, f32, f32, f32, f32) -> (v128)
-; CHECK-NEXT: .local v128
; CHECK-NEXT: # %bb.0: # %entry
; CHECK-NEXT: local.get 5
; CHECK-NEXT: call __truncsfhf2
@@ -1981,9 +1942,6 @@ define <8 x i16> @utest_f16i16_mm(<8 x half> %x) {
; CHECK-NEXT: call __extendhfsf2
; CHECK-NEXT: i32.trunc_sat_f32_u
; CHECK-NEXT: i32x4.replace_lane 3
-; CHECK-NEXT: v128.const 65535, 65535, 65535, 65535
-; CHECK-NEXT: local.tee 8
-; CHECK-NEXT: i32x4.min_u
; CHECK-NEXT: local.get 4
; CHECK-NEXT: i32.trunc_sat_f32_u
; CHECK-NEXT: i32x4.splat
@@ -1996,8 +1954,6 @@ define <8 x i16> @utest_f16i16_mm(<8 x half> %x) {
; CHECK-NEXT: local.get 7
; CHECK-NEXT: i32.trunc_sat_f32_u
; CHECK-NEXT: i32x4.replace_lane 3
-; CHECK-NEXT: local.get 8
-; CHECK-NEXT: i32x4.min_u
; CHECK-NEXT: i16x8.narrow_i32x4_u
; CHECK-NEXT: # fallthrough-return
entry:
diff --git a/llvm/test/CodeGen/WebAssembly/saturating-truncation.ll b/llvm/test/CodeGen/WebAssembly/saturating-truncation.ll
new file mode 100644
index 0000000..f3f3ba9
--- /dev/null
+++ b/llvm/test/CodeGen/WebAssembly/saturating-truncation.ll
@@ -0,0 +1,87 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+
+; RUN: llc < %s -verify-machineinstrs -mattr=+simd128 | FileCheck %s
+
+target triple = "wasm32-unknown-unknown"
+
+declare <8 x i32> @llvm.smin.v8i32(<8 x i32>, <8 x i32>) #2
+declare <8 x i32> @llvm.smax.v8i32(<8 x i32>, <8 x i32>) #2
+
+define <16 x i8> @i16_signed(<8 x i16> %a, <8 x i16> %b) {
+; CHECK-LABEL: i16_signed:
+; CHECK: .functype i16_signed (v128, v128) -> (v128)
+; CHECK-NEXT: # %bb.0: # %bb2
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: i8x16.narrow_i16x8_s
+; CHECK-NEXT: # fallthrough-return
+bb2:
+ %0 = shufflevector <8 x i16> %a, <8 x i16> %b, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+ %1 = tail call <16 x i16> @llvm.smax.v16i16(<16 x i16> %0, <16 x i16> splat (i16 -128))
+ %2 = tail call <16 x i16> @llvm.smin.v16i16(<16 x i16> %1, <16 x i16> splat (i16 127))
+ %3 = trunc nsw <16 x i16> %2 to <16 x i8>
+ ret <16 x i8> %3
+ ret <16 x i8> %3
+}
+
+define <8 x i16> @i32_signed(<4 x i32> %a, <4 x i32> %b) {
+; CHECK-LABEL: i32_signed:
+; CHECK: .functype i32_signed (v128, v128) -> (v128)
+; CHECK-NEXT: # %bb.0: # %bb2
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: i16x8.narrow_i32x4_s
+; CHECK-NEXT: # fallthrough-return
+bb2:
+ %0 = shufflevector <4 x i32> %a, <4 x i32> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+ %1 = tail call <8 x i32> @llvm.smax.v8i32(<8 x i32> %0, <8 x i32> splat (i32 -32768))
+ %2 = tail call <8 x i32> @llvm.smin.v8i32(<8 x i32> %1, <8 x i32> splat (i32 32767))
+ %3 = trunc nsw <8 x i32> %2 to <8 x i16>
+ ret <8 x i16> %3
+}
+
+define <8 x i16> @i32_signed_flipped(<4 x i32> %a, <4 x i32> %b) {
+; CHECK-LABEL: i32_signed_flipped:
+; CHECK: .functype i32_signed_flipped (v128, v128) -> (v128)
+; CHECK-NEXT: # %bb.0: # %bb2
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: i16x8.narrow_i32x4_s
+; CHECK-NEXT: # fallthrough-return
+bb2:
+ %0 = shufflevector <4 x i32> %a, <4 x i32> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+ %1 = tail call <8 x i32> @llvm.smin.v8i32(<8 x i32> splat (i32 32767), <8 x i32> %0)
+ %2 = tail call <8 x i32> @llvm.smax.v8i32(<8 x i32> splat (i32 -32768), <8 x i32> %1)
+ %3 = trunc nsw <8 x i32> %2 to <8 x i16>
+ ret <8 x i16> %3
+}
+
+define <16 x i8> @i16_unsigned(<8 x i16> %a, <8 x i16> %b) {
+; CHECK-LABEL: i16_unsigned:
+; CHECK: .functype i16_unsigned (v128, v128) -> (v128)
+; CHECK-NEXT: # %bb.0: # %bb2
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: i8x16.narrow_i16x8_u
+; CHECK-NEXT: # fallthrough-return
+bb2:
+ %0 = shufflevector <8 x i16> %a, <8 x i16> %b, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+ %1 = tail call <16 x i16> @llvm.umin.v16i16(<16 x i16> %0, <16 x i16> splat (i16 255))
+ %2 = trunc nuw <16 x i16> %1 to <16 x i8>
+ ret <16 x i8> %2
+}
+
+define <8 x i16> @i32_unsigned(<4 x i32> %a, <4 x i32> %b) {
+; CHECK-LABEL: i32_unsigned:
+; CHECK: .functype i32_unsigned (v128, v128) -> (v128)
+; CHECK-NEXT: # %bb.0: # %bb2
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: local.get 1
+; CHECK-NEXT: i16x8.narrow_i32x4_u
+; CHECK-NEXT: # fallthrough-return
+bb2:
+ %0 = shufflevector <4 x i32> %a, <4 x i32> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+ %1 = tail call <8 x i32> @llvm.umin.v8i32(<8 x i32> %0, <8 x i32> splat (i32 65535))
+ %2 = trunc nsw <8 x i32> %1 to <8 x i16>
+ ret <8 x i16> %2
+}
diff --git a/llvm/test/CodeGen/WebAssembly/simd-setcc-reductions.ll b/llvm/test/CodeGen/WebAssembly/simd-setcc-reductions.ll
index 172ff53..e562c4a 100644
--- a/llvm/test/CodeGen/WebAssembly/simd-setcc-reductions.ll
+++ b/llvm/test/CodeGen/WebAssembly/simd-setcc-reductions.ll
@@ -132,4 +132,17 @@ define i32 @all_true_2_4_i32(<4 x i32> %v) {
ret i32 %conv3
}
+; Regression test for the intrinsic pattern matcher with nullary intrinsics
+define i64 @other_intrinsic() #0 {
+; CHECK-LABEL: other_intrinsic:
+; CHECK: .functype other_intrinsic () -> (i64)
+; CHECK-NEXT: # %bb.0: # %entry
+; CHECK-NEXT: global.get $push0=, __tls_align
+; CHECK-NEXT: return $pop0
+entry:
+ %0 = call i64 @llvm.wasm.tls.align.i64()
+ ret i64 %0
+}
+
+attributes #0 = { "target-features"="+atomics" }
diff --git a/llvm/test/CodeGen/X86/ptrtoaddr-fast-isel.ll b/llvm/test/CodeGen/X86/ptrtoaddr-fast-isel.ll
new file mode 100644
index 0000000..c302d41
--- /dev/null
+++ b/llvm/test/CodeGen/X86/ptrtoaddr-fast-isel.ll
@@ -0,0 +1,11 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=x86_64-linux-gnu -fast-isel -fast-isel-abort=1 < %s -o - | FileCheck %s
+
+define i64 @ptrtoaddr(ptr %p) {
+; CHECK-LABEL: ptrtoaddr:
+; CHECK: # %bb.0:
+; CHECK-NEXT: movq %rdi, %rax
+; CHECK-NEXT: retq
+ %addr = ptrtoaddr ptr %p to i64
+ ret i64 %addr
+}