aboutsummaryrefslogtreecommitdiff
path: root/llvm/test/CodeGen
diff options
context:
space:
mode:
Diffstat (limited to 'llvm/test/CodeGen')
-rw-r--r--llvm/test/CodeGen/AArch64/aarch64-combine-fmul-fsub.mir58
-rw-r--r--llvm/test/CodeGen/AArch64/andcompare.ll3190
-rw-r--r--llvm/test/CodeGen/AArch64/andorbrcompare.ll532
-rw-r--r--llvm/test/CodeGen/AArch64/arm64-ccmp.ll852
-rw-r--r--llvm/test/CodeGen/AArch64/arm64-fml-combines.ll20
-rw-r--r--llvm/test/CodeGen/AArch64/cmp-chains.ll566
-rw-r--r--llvm/test/CodeGen/AArch64/dag-combine-select.ll88
-rw-r--r--llvm/test/CodeGen/AArch64/fcsel-zero.ll35
-rw-r--r--llvm/test/CodeGen/AArch64/fp16_intrinsic_scalar_1op.ll130
-rw-r--r--llvm/test/CodeGen/AArch64/fp16_intrinsic_scalar_2op.ll202
-rw-r--r--llvm/test/CodeGen/AArch64/late-taildup-computed-goto.ll162
-rw-r--r--llvm/test/CodeGen/AArch64/machine-combiner-reassociate.mir22
-rw-r--r--llvm/test/CodeGen/AArch64/machine-combiner.ll887
-rw-r--r--llvm/test/CodeGen/AArch64/machine-combiner.mir6
-rw-r--r--llvm/test/CodeGen/AArch64/preferred-function-alignment.ll7
-rw-r--r--llvm/test/CodeGen/AArch64/sched-past-vector-ldst.ll18
-rw-r--r--llvm/test/CodeGen/AArch64/sqrt-fastmath.ll53
-rw-r--r--llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fma-add-mul-post-legalize.mir2772
-rw-r--r--llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fma-add-mul-pre-legalize.mir2496
-rw-r--r--llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fma-add-mul.ll891
-rw-r--r--llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fma-unmerge-values.mir40
-rw-r--r--llvm/test/CodeGen/AMDGPU/GlobalISel/flat-atomic-fadd.f32.ll8
-rw-r--r--llvm/test/CodeGen/AMDGPU/GlobalISel/flat-atomic-fadd.f64.ll4
-rw-r--r--llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fptrunc.mir121
-rw-r--r--llvm/test/CodeGen/AMDGPU/fptrunc.f16.ll3195
-rw-r--r--llvm/test/CodeGen/AMDGPU/fptrunc.ll1173
-rw-r--r--llvm/test/CodeGen/AMDGPU/gfx1250-no-scope-cu-stores.ll100
-rw-r--r--llvm/test/CodeGen/AMDGPU/gfx1250-scratch-scope-se.ll95
-rw-r--r--llvm/test/CodeGen/AMDGPU/global-load-xcnt.ll88
-rw-r--r--llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.gfx942.ll4
-rw-r--r--llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.gfx950.bf16.ll264
-rw-r--r--llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.gfx950.ll1994
-rw-r--r--llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.scale.f32.16x16x128.f8f6f4.ll292
-rw-r--r--llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.scale.f32.32x32x64.f8f6f4.ll630
-rw-r--r--llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.xf32.gfx942.ll126
-rw-r--r--llvm/test/CodeGen/AMDGPU/llvm.amdgcn.smfmac.gfx950.ll340
-rw-r--r--llvm/test/CodeGen/AMDGPU/mad-mix-hi-bf16.ll1
-rw-r--r--llvm/test/CodeGen/AMDGPU/memory-legalizer-local-volatile.ll20
-rw-r--r--llvm/test/CodeGen/AMDGPU/partial-regcopy-and-spill-missed-at-regalloc.ll38
-rw-r--r--llvm/test/CodeGen/AMDGPU/ssubo.ll676
-rw-r--r--llvm/test/CodeGen/AMDGPU/uaddo.ll553
-rw-r--r--llvm/test/CodeGen/AMDGPU/usubo.ll499
-rw-r--r--llvm/test/CodeGen/AMDGPU/wwm-regalloc-error.ll2
-rw-r--r--llvm/test/CodeGen/ARM/fp16.ll4
-rw-r--r--llvm/test/CodeGen/ARM/preferred-function-alignment.ll8
-rw-r--r--llvm/test/CodeGen/BPF/BTF/map-def-2.ll61
-rw-r--r--llvm/test/CodeGen/BPF/BTF/map-def-3.ll42
-rw-r--r--llvm/test/CodeGen/BPF/BTF/map-def-nested-array.ll75
-rw-r--r--llvm/test/CodeGen/Hexagon/hvx-reuse-fi-base.ll5
-rw-r--r--llvm/test/CodeGen/LoongArch/lasx/build-vector.ll215
-rw-r--r--llvm/test/CodeGen/LoongArch/lsx/build-vector.ll74
-rw-r--r--llvm/test/CodeGen/MIR/AMDGPU/noalias-addrspace-expect-id.mir29
-rw-r--r--llvm/test/CodeGen/MIR/AMDGPU/noalias-addrspace-parse.mir36
-rw-r--r--llvm/test/CodeGen/MIR/AMDGPU/noalias-addrspace-undefine-matadata.mir28
-rw-r--r--llvm/test/CodeGen/MIR/X86/call-site-info-ambiguous-indirect-call-typeid.mir31
-rw-r--r--llvm/test/CodeGen/MIR/X86/call-site-info-direct-calls-typeid.mir54
-rw-r--r--llvm/test/CodeGen/MIR/X86/call-site-info-typeid.mir28
-rw-r--r--llvm/test/CodeGen/Mips/abiflags-soft-float.ll12
-rw-r--r--llvm/test/CodeGen/Mips/nan_lowering.ll25
-rw-r--r--llvm/test/CodeGen/Mips/qnan.ll14
-rw-r--r--llvm/test/CodeGen/NVPTX/aggregate-return.ll48
-rw-r--r--llvm/test/CodeGen/NVPTX/bf16x2-instructions.ll6
-rw-r--r--llvm/test/CodeGen/NVPTX/byval-const-global.ll8
-rw-r--r--llvm/test/CodeGen/NVPTX/call-with-alloca-buffer.ll10
-rw-r--r--llvm/test/CodeGen/NVPTX/call_bitcast_byval.ll16
-rw-r--r--llvm/test/CodeGen/NVPTX/combine-mad.ll4
-rw-r--r--llvm/test/CodeGen/NVPTX/compare-int.ll621
-rw-r--r--llvm/test/CodeGen/NVPTX/convert-call-to-indirect.ll172
-rw-r--r--llvm/test/CodeGen/NVPTX/dynamic_stackalloc.ll4
-rw-r--r--llvm/test/CodeGen/NVPTX/f16x2-instructions.ll12
-rw-r--r--llvm/test/CodeGen/NVPTX/f32x2-instructions.ll12
-rw-r--r--llvm/test/CodeGen/NVPTX/fma.ll8
-rw-r--r--llvm/test/CodeGen/NVPTX/forward-ld-param.ll2
-rw-r--r--llvm/test/CodeGen/NVPTX/i128-param.ll20
-rw-r--r--llvm/test/CodeGen/NVPTX/i16x2-instructions.ll12
-rw-r--r--llvm/test/CodeGen/NVPTX/i8x2-instructions.ll121
-rw-r--r--llvm/test/CodeGen/NVPTX/i8x4-instructions.ll30
-rw-r--r--llvm/test/CodeGen/NVPTX/idioms.ll2
-rw-r--r--llvm/test/CodeGen/NVPTX/indirect_byval.ll20
-rw-r--r--llvm/test/CodeGen/NVPTX/lower-args-gridconstant.ll33
-rw-r--r--llvm/test/CodeGen/NVPTX/lower-args.ll10
-rw-r--r--llvm/test/CodeGen/NVPTX/lower-byval-args.ll2
-rw-r--r--llvm/test/CodeGen/NVPTX/misched_func_call.ll15
-rw-r--r--llvm/test/CodeGen/NVPTX/param-add.ll8
-rw-r--r--llvm/test/CodeGen/NVPTX/param-load-store.ll280
-rw-r--r--llvm/test/CodeGen/NVPTX/param-overalign.ll4
-rw-r--r--llvm/test/CodeGen/NVPTX/param-vectorize-device.ll28
-rw-r--r--llvm/test/CodeGen/NVPTX/proxy-reg-erasure.mir4
-rw-r--r--llvm/test/CodeGen/NVPTX/st-param-imm.ll201
-rw-r--r--llvm/test/CodeGen/NVPTX/store-undef.ll4
-rw-r--r--llvm/test/CodeGen/NVPTX/tex-read-cuda.ll2
-rw-r--r--llvm/test/CodeGen/NVPTX/unaligned-param-load-store.ll594
-rw-r--r--llvm/test/CodeGen/NVPTX/vaargs.ll16
-rw-r--r--llvm/test/CodeGen/NVPTX/variadics-backend.ll32
-rw-r--r--llvm/test/CodeGen/PowerPC/froundeven-legalization.ll111
-rw-r--r--llvm/test/CodeGen/RISCV/rvv/vlseg-rv32.ll2923
-rw-r--r--llvm/test/CodeGen/RISCV/rvv/vlseg-rv64.ll2979
-rw-r--r--llvm/test/CodeGen/WebAssembly/lower-em-sjlj-alloca.ll129
-rw-r--r--llvm/test/CodeGen/WebAssembly/lower-em-sjlj-debuginfo.ll4
-rw-r--r--llvm/test/CodeGen/WebAssembly/lower-em-sjlj.ll6
-rw-r--r--llvm/test/CodeGen/WebAssembly/lower-wasm-ehsjlj.ll2
-rw-r--r--llvm/test/CodeGen/WebAssembly/lower-wasm-sjlj.ll6
-rw-r--r--llvm/test/CodeGen/WebAssembly/simd-arith.ll136
-rw-r--r--llvm/test/CodeGen/WebAssembly/simd-relaxed-fnma.ll145
-rw-r--r--llvm/test/CodeGen/WebAssembly/vector-reduce.ll56
-rw-r--r--llvm/test/CodeGen/X86/embed-bitcode.ll13
-rw-r--r--llvm/test/CodeGen/X86/isel-fpclass.ll526
-rw-r--r--llvm/test/CodeGen/X86/late-tail-dup-computed-goto.mir128
-rw-r--r--llvm/test/CodeGen/X86/swap.ll15
109 files changed, 19959 insertions, 13572 deletions
diff --git a/llvm/test/CodeGen/AArch64/aarch64-combine-fmul-fsub.mir b/llvm/test/CodeGen/AArch64/aarch64-combine-fmul-fsub.mir
index cf4f321..491d693 100644
--- a/llvm/test/CodeGen/AArch64/aarch64-combine-fmul-fsub.mir
+++ b/llvm/test/CodeGen/AArch64/aarch64-combine-fmul-fsub.mir
@@ -1,8 +1,8 @@
-# RUN: llc -run-pass=machine-combiner -o - -mtriple=aarch64-unknown-linux -mcpu=cortex-a57 -enable-unsafe-fp-math -machine-combiner-verify-pattern-order=true %s | FileCheck --check-prefixes=UNPROFITABLE,ALL %s
-# RUN: llc -run-pass=machine-combiner -o - -mtriple=aarch64-unknown-linux -mcpu=falkor -enable-unsafe-fp-math %s -machine-combiner-verify-pattern-order=true | FileCheck --check-prefixes=PROFITABLE,ALL %s
-# RUN: llc -run-pass=machine-combiner -o - -mtriple=aarch64-unknown-linux -mcpu=exynos-m3 -enable-unsafe-fp-math -machine-combiner-verify-pattern-order=true %s | FileCheck --check-prefixes=PROFITABLE,ALL %s
-# RUN: llc -run-pass=machine-combiner -o - -mtriple=aarch64-unknown-linux -mcpu=thunderx2t99 -enable-unsafe-fp-math -machine-combiner-verify-pattern-order=true %s | FileCheck --check-prefixes=PROFITABLE,ALL %s
-# RUN: llc -run-pass=machine-combiner -o - -mtriple=aarch64-unknown-linux -mcpu=thunderx3t110 -enable-unsafe-fp-math -machine-combiner-verify-pattern-order=true %s | FileCheck --check-prefixes=PROFITABLE,ALL %s
+# RUN: llc -run-pass=machine-combiner -o - -mtriple=aarch64-unknown-linux -mcpu=cortex-a57 -machine-combiner-verify-pattern-order=true %s | FileCheck --check-prefixes=UNPROFITABLE,ALL %s
+# RUN: llc -run-pass=machine-combiner -o - -mtriple=aarch64-unknown-linux -mcpu=falkor %s -machine-combiner-verify-pattern-order=true | FileCheck --check-prefixes=PROFITABLE,ALL %s
+# RUN: llc -run-pass=machine-combiner -o - -mtriple=aarch64-unknown-linux -mcpu=exynos-m3 -machine-combiner-verify-pattern-order=true %s | FileCheck --check-prefixes=PROFITABLE,ALL %s
+# RUN: llc -run-pass=machine-combiner -o - -mtriple=aarch64-unknown-linux -mcpu=thunderx2t99 -machine-combiner-verify-pattern-order=true %s | FileCheck --check-prefixes=PROFITABLE,ALL %s
+# RUN: llc -run-pass=machine-combiner -o - -mtriple=aarch64-unknown-linux -mcpu=thunderx3t110 -machine-combiner-verify-pattern-order=true %s | FileCheck --check-prefixes=PROFITABLE,ALL %s
#
name: f1_2s
registers:
@@ -16,18 +16,18 @@ body: |
%2:fpr64 = COPY $d2
%1:fpr64 = COPY $d1
%0:fpr64 = COPY $d0
- %3:fpr64 = FMULv2f32 %0, %1, implicit $fpcr
- %4:fpr64 = FSUBv2f32 killed %3, %2, implicit $fpcr
+ %3:fpr64 = contract FMULv2f32 %0, %1, implicit $fpcr
+ %4:fpr64 = contract FSUBv2f32 killed %3, %2, implicit $fpcr
$d0 = COPY %4
RET_ReallyLR implicit $d0
...
# UNPROFITABLE-LABEL: name: f1_2s
-# UNPROFITABLE: [[R1:%[0-9]+]]:fpr64 = FNEGv2f32 %2
+# UNPROFITABLE: [[R1:%[0-9]+]]:fpr64 = contract FNEGv2f32 %2
# UNPROFITABLE-NEXT: FMLAv2f32 killed [[R1]], %0, %1, implicit $fpcr
#
# PROFITABLE-LABEL: name: f1_2s
-# PROFITABLE: [[R1:%[0-9]+]]:fpr64 = FNEGv2f32 %2
+# PROFITABLE: [[R1:%[0-9]+]]:fpr64 = contract FNEGv2f32 %2
# PROFITABLE-NEXT: FMLAv2f32 killed [[R1]], %0, %1, implicit $fpcr
---
name: f1_4s
@@ -42,18 +42,18 @@ body: |
%2:fpr128 = COPY $q2
%1:fpr128 = COPY $q1
%0:fpr128 = COPY $q0
- %3:fpr128 = FMULv4f32 %0, %1, implicit $fpcr
- %4:fpr128 = FSUBv4f32 killed %3, %2, implicit $fpcr
+ %3:fpr128 = contract FMULv4f32 %0, %1, implicit $fpcr
+ %4:fpr128 = contract FSUBv4f32 killed %3, %2, implicit $fpcr
$q0 = COPY %4
RET_ReallyLR implicit $q0
...
# UNPROFITABLE-LABEL: name: f1_4s
-# UNPROFITABLE: [[R1:%[0-9]+]]:fpr128 = FMULv4f32 %0, %1, implicit $fpcr
+# UNPROFITABLE: [[R1:%[0-9]+]]:fpr128 = contract FMULv4f32 %0, %1, implicit $fpcr
# UNPROFITABLE-NEXT: FSUBv4f32 killed [[R1]], %2, implicit $fpcr
#
# PROFITABLE-LABEL: name: f1_4s
-# PROFITABLE: [[R1:%[0-9]+]]:fpr128 = FNEGv4f32 %2
+# PROFITABLE: [[R1:%[0-9]+]]:fpr128 = contract FNEGv4f32 %2
# PROFITABLE-NEXT: FMLAv4f32 killed [[R1]], %0, %1, implicit $fpcr
---
name: f1_2d
@@ -68,18 +68,18 @@ body: |
%2:fpr128 = COPY $q2
%1:fpr128 = COPY $q1
%0:fpr128 = COPY $q0
- %3:fpr128 = FMULv2f64 %0, %1, implicit $fpcr
- %4:fpr128 = FSUBv2f64 killed %3, %2, implicit $fpcr
+ %3:fpr128 = contract FMULv2f64 %0, %1, implicit $fpcr
+ %4:fpr128 = contract FSUBv2f64 killed %3, %2, implicit $fpcr
$q0 = COPY %4
RET_ReallyLR implicit $q0
...
# UNPROFITABLE-LABEL: name: f1_2d
-# UNPROFITABLE: %3:fpr128 = FMULv2f64 %0, %1, implicit $fpcr
+# UNPROFITABLE: %3:fpr128 = contract FMULv2f64 %0, %1, implicit $fpcr
# UNPROFITABLE-NEXT: FSUBv2f64 killed %3, %2, implicit $fpcr
#
# PROFITABLE-LABEL: name: f1_2d
-# PROFITABLE: [[R1:%[0-9]+]]:fpr128 = FNEGv2f64 %2
+# PROFITABLE: [[R1:%[0-9]+]]:fpr128 = contract FNEGv2f64 %2
# PROFITABLE-NEXT: FMLAv2f64 killed [[R1]], %0, %1, implicit $fpcr
---
name: f1_both_fmul_2s
@@ -97,15 +97,15 @@ body: |
%2:fpr64 = COPY $q2
%1:fpr64 = COPY $q1
%0:fpr64 = COPY $q0
- %4:fpr64 = FMULv2f32 %0, %1, implicit $fpcr
- %5:fpr64 = FMULv2f32 %2, %3, implicit $fpcr
- %6:fpr64 = FSUBv2f32 killed %4, %5, implicit $fpcr
+ %4:fpr64 = contract FMULv2f32 %0, %1, implicit $fpcr
+ %5:fpr64 = contract FMULv2f32 %2, %3, implicit $fpcr
+ %6:fpr64 = contract FSUBv2f32 killed %4, %5, implicit $fpcr
$q0 = COPY %6
RET_ReallyLR implicit $q0
...
# ALL-LABEL: name: f1_both_fmul_2s
-# ALL: %4:fpr64 = FMULv2f32 %0, %1, implicit $fpcr
+# ALL: %4:fpr64 = contract FMULv2f32 %0, %1, implicit $fpcr
# ALL-NEXT: FMLSv2f32 killed %4, %2, %3, implicit $fpcr
---
name: f1_both_fmul_4s
@@ -123,15 +123,15 @@ body: |
%2:fpr128 = COPY $q2
%1:fpr128 = COPY $q1
%0:fpr128 = COPY $q0
- %4:fpr128 = FMULv4f32 %0, %1, implicit $fpcr
- %5:fpr128 = FMULv4f32 %2, %3, implicit $fpcr
- %6:fpr128 = FSUBv4f32 killed %4, %5, implicit $fpcr
+ %4:fpr128 = contract FMULv4f32 %0, %1, implicit $fpcr
+ %5:fpr128 = contract FMULv4f32 %2, %3, implicit $fpcr
+ %6:fpr128 = contract FSUBv4f32 killed %4, %5, implicit $fpcr
$q0 = COPY %6
RET_ReallyLR implicit $q0
...
# ALL-LABEL: name: f1_both_fmul_4s
-# ALL: %4:fpr128 = FMULv4f32 %0, %1, implicit $fpcr
+# ALL: %4:fpr128 = contract FMULv4f32 %0, %1, implicit $fpcr
# ALL-NEXT: FMLSv4f32 killed %4, %2, %3, implicit $fpcr
---
name: f1_both_fmul_2d
@@ -149,14 +149,14 @@ body: |
%2:fpr128 = COPY $q2
%1:fpr128 = COPY $q1
%0:fpr128 = COPY $q0
- %4:fpr128 = FMULv2f64 %0, %1, implicit $fpcr
- %5:fpr128 = FMULv2f64 %2, %3, implicit $fpcr
- %6:fpr128 = FSUBv2f64 killed %4, %5, implicit $fpcr
+ %4:fpr128 = contract FMULv2f64 %0, %1, implicit $fpcr
+ %5:fpr128 = contract FMULv2f64 %2, %3, implicit $fpcr
+ %6:fpr128 = contract FSUBv2f64 killed %4, %5, implicit $fpcr
$q0 = COPY %6
RET_ReallyLR implicit $q0
...
# ALL-LABEL: name: f1_both_fmul_2d
-# ALL: %4:fpr128 = FMULv2f64 %0, %1, implicit $fpcr
+# ALL: %4:fpr128 = contract FMULv2f64 %0, %1, implicit $fpcr
# ALL-NEXT: FMLSv2f64 killed %4, %2, %3, implicit $fpcr
diff --git a/llvm/test/CodeGen/AArch64/andcompare.ll b/llvm/test/CodeGen/AArch64/andcompare.ll
index cbacd17..0e15b94 100644
--- a/llvm/test/CodeGen/AArch64/andcompare.ll
+++ b/llvm/test/CodeGen/AArch64/andcompare.ll
@@ -1,23 +1,23 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=aarch64-none-elf -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,SDISEL
-; RUN: llc -mtriple=aarch64-none-elf -global-isel -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,GISEL
+; RUN: llc -mtriple=aarch64-none-elf -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-SD
+; RUN: llc -mtriple=aarch64-none-elf -global-isel -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-GI
define i32 @and_eq_eq(i32 %s0, i32 %s1, i32 %s2, i32 %s3) {
-; SDISEL-LABEL: and_eq_eq:
-; SDISEL: // %bb.0: // %entry
-; SDISEL-NEXT: cmp w0, w1
-; SDISEL-NEXT: ccmp w2, w3, #0, eq
-; SDISEL-NEXT: cset w0, eq
-; SDISEL-NEXT: ret
-;
-; GISEL-LABEL: and_eq_eq:
-; GISEL: // %bb.0: // %entry
-; GISEL-NEXT: cmp w0, w1
-; GISEL-NEXT: cset w8, eq
-; GISEL-NEXT: cmp w2, w3
-; GISEL-NEXT: cset w9, eq
-; GISEL-NEXT: and w0, w8, w9
-; GISEL-NEXT: ret
+; CHECK-SD-LABEL: and_eq_eq:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: cmp w0, w1
+; CHECK-SD-NEXT: ccmp w2, w3, #0, eq
+; CHECK-SD-NEXT: cset w0, eq
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: and_eq_eq:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: cmp w0, w1
+; CHECK-GI-NEXT: cset w8, eq
+; CHECK-GI-NEXT: cmp w2, w3
+; CHECK-GI-NEXT: cset w9, eq
+; CHECK-GI-NEXT: and w0, w8, w9
+; CHECK-GI-NEXT: ret
entry:
%c0 = icmp eq i32 %s0, %s1
%c1 = icmp eq i32 %s2, %s3
@@ -27,21 +27,21 @@ entry:
}
define i32 @and_eq_ne(i32 %s0, i32 %s1, i32 %s2, i32 %s3) {
-; SDISEL-LABEL: and_eq_ne:
-; SDISEL: // %bb.0: // %entry
-; SDISEL-NEXT: cmp w0, w1
-; SDISEL-NEXT: ccmp w2, w3, #4, eq
-; SDISEL-NEXT: cset w0, ne
-; SDISEL-NEXT: ret
-;
-; GISEL-LABEL: and_eq_ne:
-; GISEL: // %bb.0: // %entry
-; GISEL-NEXT: cmp w0, w1
-; GISEL-NEXT: cset w8, eq
-; GISEL-NEXT: cmp w2, w3
-; GISEL-NEXT: cset w9, ne
-; GISEL-NEXT: and w0, w8, w9
-; GISEL-NEXT: ret
+; CHECK-SD-LABEL: and_eq_ne:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: cmp w0, w1
+; CHECK-SD-NEXT: ccmp w2, w3, #4, eq
+; CHECK-SD-NEXT: cset w0, ne
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: and_eq_ne:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: cmp w0, w1
+; CHECK-GI-NEXT: cset w8, eq
+; CHECK-GI-NEXT: cmp w2, w3
+; CHECK-GI-NEXT: cset w9, ne
+; CHECK-GI-NEXT: and w0, w8, w9
+; CHECK-GI-NEXT: ret
entry:
%c0 = icmp eq i32 %s0, %s1
%c1 = icmp ne i32 %s2, %s3
@@ -51,21 +51,21 @@ entry:
}
define i32 @and_eq_ult(i32 %s0, i32 %s1, i32 %s2, i32 %s3) {
-; SDISEL-LABEL: and_eq_ult:
-; SDISEL: // %bb.0: // %entry
-; SDISEL-NEXT: cmp w0, w1
-; SDISEL-NEXT: ccmp w2, w3, #2, eq
-; SDISEL-NEXT: cset w0, lo
-; SDISEL-NEXT: ret
-;
-; GISEL-LABEL: and_eq_ult:
-; GISEL: // %bb.0: // %entry
-; GISEL-NEXT: cmp w0, w1
-; GISEL-NEXT: cset w8, eq
-; GISEL-NEXT: cmp w2, w3
-; GISEL-NEXT: cset w9, lo
-; GISEL-NEXT: and w0, w8, w9
-; GISEL-NEXT: ret
+; CHECK-SD-LABEL: and_eq_ult:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: cmp w0, w1
+; CHECK-SD-NEXT: ccmp w2, w3, #2, eq
+; CHECK-SD-NEXT: cset w0, lo
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: and_eq_ult:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: cmp w0, w1
+; CHECK-GI-NEXT: cset w8, eq
+; CHECK-GI-NEXT: cmp w2, w3
+; CHECK-GI-NEXT: cset w9, lo
+; CHECK-GI-NEXT: and w0, w8, w9
+; CHECK-GI-NEXT: ret
entry:
%c0 = icmp eq i32 %s0, %s1
%c1 = icmp ult i32 %s2, %s3
@@ -75,21 +75,21 @@ entry:
}
define i32 @and_eq_ule(i32 %s0, i32 %s1, i32 %s2, i32 %s3) {
-; SDISEL-LABEL: and_eq_ule:
-; SDISEL: // %bb.0: // %entry
-; SDISEL-NEXT: cmp w0, w1
-; SDISEL-NEXT: ccmp w2, w3, #2, eq
-; SDISEL-NEXT: cset w0, ls
-; SDISEL-NEXT: ret
-;
-; GISEL-LABEL: and_eq_ule:
-; GISEL: // %bb.0: // %entry
-; GISEL-NEXT: cmp w0, w1
-; GISEL-NEXT: cset w8, eq
-; GISEL-NEXT: cmp w2, w3
-; GISEL-NEXT: cset w9, ls
-; GISEL-NEXT: and w0, w8, w9
-; GISEL-NEXT: ret
+; CHECK-SD-LABEL: and_eq_ule:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: cmp w0, w1
+; CHECK-SD-NEXT: ccmp w2, w3, #2, eq
+; CHECK-SD-NEXT: cset w0, ls
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: and_eq_ule:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: cmp w0, w1
+; CHECK-GI-NEXT: cset w8, eq
+; CHECK-GI-NEXT: cmp w2, w3
+; CHECK-GI-NEXT: cset w9, ls
+; CHECK-GI-NEXT: and w0, w8, w9
+; CHECK-GI-NEXT: ret
entry:
%c0 = icmp eq i32 %s0, %s1
%c1 = icmp ule i32 %s2, %s3
@@ -99,21 +99,21 @@ entry:
}
define i32 @and_eq_ugt(i32 %s0, i32 %s1, i32 %s2, i32 %s3) {
-; SDISEL-LABEL: and_eq_ugt:
-; SDISEL: // %bb.0: // %entry
-; SDISEL-NEXT: cmp w0, w1
-; SDISEL-NEXT: ccmp w2, w3, #0, eq
-; SDISEL-NEXT: cset w0, hi
-; SDISEL-NEXT: ret
-;
-; GISEL-LABEL: and_eq_ugt:
-; GISEL: // %bb.0: // %entry
-; GISEL-NEXT: cmp w0, w1
-; GISEL-NEXT: cset w8, eq
-; GISEL-NEXT: cmp w2, w3
-; GISEL-NEXT: cset w9, hi
-; GISEL-NEXT: and w0, w8, w9
-; GISEL-NEXT: ret
+; CHECK-SD-LABEL: and_eq_ugt:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: cmp w0, w1
+; CHECK-SD-NEXT: ccmp w2, w3, #0, eq
+; CHECK-SD-NEXT: cset w0, hi
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: and_eq_ugt:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: cmp w0, w1
+; CHECK-GI-NEXT: cset w8, eq
+; CHECK-GI-NEXT: cmp w2, w3
+; CHECK-GI-NEXT: cset w9, hi
+; CHECK-GI-NEXT: and w0, w8, w9
+; CHECK-GI-NEXT: ret
entry:
%c0 = icmp eq i32 %s0, %s1
%c1 = icmp ugt i32 %s2, %s3
@@ -123,21 +123,21 @@ entry:
}
define i32 @and_eq_uge(i32 %s0, i32 %s1, i32 %s2, i32 %s3) {
-; SDISEL-LABEL: and_eq_uge:
-; SDISEL: // %bb.0: // %entry
-; SDISEL-NEXT: cmp w0, w1
-; SDISEL-NEXT: ccmp w2, w3, #0, eq
-; SDISEL-NEXT: cset w0, hs
-; SDISEL-NEXT: ret
-;
-; GISEL-LABEL: and_eq_uge:
-; GISEL: // %bb.0: // %entry
-; GISEL-NEXT: cmp w0, w1
-; GISEL-NEXT: cset w8, eq
-; GISEL-NEXT: cmp w2, w3
-; GISEL-NEXT: cset w9, hs
-; GISEL-NEXT: and w0, w8, w9
-; GISEL-NEXT: ret
+; CHECK-SD-LABEL: and_eq_uge:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: cmp w0, w1
+; CHECK-SD-NEXT: ccmp w2, w3, #0, eq
+; CHECK-SD-NEXT: cset w0, hs
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: and_eq_uge:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: cmp w0, w1
+; CHECK-GI-NEXT: cset w8, eq
+; CHECK-GI-NEXT: cmp w2, w3
+; CHECK-GI-NEXT: cset w9, hs
+; CHECK-GI-NEXT: and w0, w8, w9
+; CHECK-GI-NEXT: ret
entry:
%c0 = icmp eq i32 %s0, %s1
%c1 = icmp uge i32 %s2, %s3
@@ -147,21 +147,21 @@ entry:
}
define i32 @and_eq_slt(i32 %s0, i32 %s1, i32 %s2, i32 %s3) {
-; SDISEL-LABEL: and_eq_slt:
-; SDISEL: // %bb.0: // %entry
-; SDISEL-NEXT: cmp w0, w1
-; SDISEL-NEXT: ccmp w2, w3, #0, eq
-; SDISEL-NEXT: cset w0, lt
-; SDISEL-NEXT: ret
-;
-; GISEL-LABEL: and_eq_slt:
-; GISEL: // %bb.0: // %entry
-; GISEL-NEXT: cmp w0, w1
-; GISEL-NEXT: cset w8, eq
-; GISEL-NEXT: cmp w2, w3
-; GISEL-NEXT: cset w9, lt
-; GISEL-NEXT: and w0, w8, w9
-; GISEL-NEXT: ret
+; CHECK-SD-LABEL: and_eq_slt:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: cmp w0, w1
+; CHECK-SD-NEXT: ccmp w2, w3, #0, eq
+; CHECK-SD-NEXT: cset w0, lt
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: and_eq_slt:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: cmp w0, w1
+; CHECK-GI-NEXT: cset w8, eq
+; CHECK-GI-NEXT: cmp w2, w3
+; CHECK-GI-NEXT: cset w9, lt
+; CHECK-GI-NEXT: and w0, w8, w9
+; CHECK-GI-NEXT: ret
entry:
%c0 = icmp eq i32 %s0, %s1
%c1 = icmp slt i32 %s2, %s3
@@ -171,21 +171,21 @@ entry:
}
define i32 @and_eq_sle(i32 %s0, i32 %s1, i32 %s2, i32 %s3) {
-; SDISEL-LABEL: and_eq_sle:
-; SDISEL: // %bb.0: // %entry
-; SDISEL-NEXT: cmp w0, w1
-; SDISEL-NEXT: ccmp w2, w3, #0, eq
-; SDISEL-NEXT: cset w0, le
-; SDISEL-NEXT: ret
-;
-; GISEL-LABEL: and_eq_sle:
-; GISEL: // %bb.0: // %entry
-; GISEL-NEXT: cmp w0, w1
-; GISEL-NEXT: cset w8, eq
-; GISEL-NEXT: cmp w2, w3
-; GISEL-NEXT: cset w9, le
-; GISEL-NEXT: and w0, w8, w9
-; GISEL-NEXT: ret
+; CHECK-SD-LABEL: and_eq_sle:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: cmp w0, w1
+; CHECK-SD-NEXT: ccmp w2, w3, #0, eq
+; CHECK-SD-NEXT: cset w0, le
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: and_eq_sle:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: cmp w0, w1
+; CHECK-GI-NEXT: cset w8, eq
+; CHECK-GI-NEXT: cmp w2, w3
+; CHECK-GI-NEXT: cset w9, le
+; CHECK-GI-NEXT: and w0, w8, w9
+; CHECK-GI-NEXT: ret
entry:
%c0 = icmp eq i32 %s0, %s1
%c1 = icmp sle i32 %s2, %s3
@@ -195,21 +195,21 @@ entry:
}
define i32 @and_eq_sgt(i32 %s0, i32 %s1, i32 %s2, i32 %s3) {
-; SDISEL-LABEL: and_eq_sgt:
-; SDISEL: // %bb.0: // %entry
-; SDISEL-NEXT: cmp w0, w1
-; SDISEL-NEXT: ccmp w2, w3, #4, eq
-; SDISEL-NEXT: cset w0, gt
-; SDISEL-NEXT: ret
-;
-; GISEL-LABEL: and_eq_sgt:
-; GISEL: // %bb.0: // %entry
-; GISEL-NEXT: cmp w0, w1
-; GISEL-NEXT: cset w8, eq
-; GISEL-NEXT: cmp w2, w3
-; GISEL-NEXT: cset w9, gt
-; GISEL-NEXT: and w0, w8, w9
-; GISEL-NEXT: ret
+; CHECK-SD-LABEL: and_eq_sgt:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: cmp w0, w1
+; CHECK-SD-NEXT: ccmp w2, w3, #4, eq
+; CHECK-SD-NEXT: cset w0, gt
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: and_eq_sgt:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: cmp w0, w1
+; CHECK-GI-NEXT: cset w8, eq
+; CHECK-GI-NEXT: cmp w2, w3
+; CHECK-GI-NEXT: cset w9, gt
+; CHECK-GI-NEXT: and w0, w8, w9
+; CHECK-GI-NEXT: ret
entry:
%c0 = icmp eq i32 %s0, %s1
%c1 = icmp sgt i32 %s2, %s3
@@ -219,21 +219,21 @@ entry:
}
define i32 @and_eq_sge(i32 %s0, i32 %s1, i32 %s2, i32 %s3) {
-; SDISEL-LABEL: and_eq_sge:
-; SDISEL: // %bb.0: // %entry
-; SDISEL-NEXT: cmp w0, w1
-; SDISEL-NEXT: ccmp w2, w3, #8, eq
-; SDISEL-NEXT: cset w0, ge
-; SDISEL-NEXT: ret
-;
-; GISEL-LABEL: and_eq_sge:
-; GISEL: // %bb.0: // %entry
-; GISEL-NEXT: cmp w0, w1
-; GISEL-NEXT: cset w8, eq
-; GISEL-NEXT: cmp w2, w3
-; GISEL-NEXT: cset w9, ge
-; GISEL-NEXT: and w0, w8, w9
-; GISEL-NEXT: ret
+; CHECK-SD-LABEL: and_eq_sge:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: cmp w0, w1
+; CHECK-SD-NEXT: ccmp w2, w3, #8, eq
+; CHECK-SD-NEXT: cset w0, ge
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: and_eq_sge:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: cmp w0, w1
+; CHECK-GI-NEXT: cset w8, eq
+; CHECK-GI-NEXT: cmp w2, w3
+; CHECK-GI-NEXT: cset w9, ge
+; CHECK-GI-NEXT: and w0, w8, w9
+; CHECK-GI-NEXT: ret
entry:
%c0 = icmp eq i32 %s0, %s1
%c1 = icmp sge i32 %s2, %s3
@@ -243,21 +243,21 @@ entry:
}
define i32 @and_ne_eq(i32 %s0, i32 %s1, i32 %s2, i32 %s3) {
-; SDISEL-LABEL: and_ne_eq:
-; SDISEL: // %bb.0: // %entry
-; SDISEL-NEXT: cmp w0, w1
-; SDISEL-NEXT: ccmp w2, w3, #0, ne
-; SDISEL-NEXT: cset w0, eq
-; SDISEL-NEXT: ret
-;
-; GISEL-LABEL: and_ne_eq:
-; GISEL: // %bb.0: // %entry
-; GISEL-NEXT: cmp w0, w1
-; GISEL-NEXT: cset w8, ne
-; GISEL-NEXT: cmp w2, w3
-; GISEL-NEXT: cset w9, eq
-; GISEL-NEXT: and w0, w8, w9
-; GISEL-NEXT: ret
+; CHECK-SD-LABEL: and_ne_eq:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: cmp w0, w1
+; CHECK-SD-NEXT: ccmp w2, w3, #0, ne
+; CHECK-SD-NEXT: cset w0, eq
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: and_ne_eq:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: cmp w0, w1
+; CHECK-GI-NEXT: cset w8, ne
+; CHECK-GI-NEXT: cmp w2, w3
+; CHECK-GI-NEXT: cset w9, eq
+; CHECK-GI-NEXT: and w0, w8, w9
+; CHECK-GI-NEXT: ret
entry:
%c0 = icmp ne i32 %s0, %s1
%c1 = icmp eq i32 %s2, %s3
@@ -267,21 +267,21 @@ entry:
}
define i32 @and_ne_ne(i32 %s0, i32 %s1, i32 %s2, i32 %s3) {
-; SDISEL-LABEL: and_ne_ne:
-; SDISEL: // %bb.0: // %entry
-; SDISEL-NEXT: cmp w0, w1
-; SDISEL-NEXT: ccmp w2, w3, #4, ne
-; SDISEL-NEXT: cset w0, ne
-; SDISEL-NEXT: ret
-;
-; GISEL-LABEL: and_ne_ne:
-; GISEL: // %bb.0: // %entry
-; GISEL-NEXT: cmp w0, w1
-; GISEL-NEXT: cset w8, ne
-; GISEL-NEXT: cmp w2, w3
-; GISEL-NEXT: cset w9, ne
-; GISEL-NEXT: and w0, w8, w9
-; GISEL-NEXT: ret
+; CHECK-SD-LABEL: and_ne_ne:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: cmp w0, w1
+; CHECK-SD-NEXT: ccmp w2, w3, #4, ne
+; CHECK-SD-NEXT: cset w0, ne
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: and_ne_ne:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: cmp w0, w1
+; CHECK-GI-NEXT: cset w8, ne
+; CHECK-GI-NEXT: cmp w2, w3
+; CHECK-GI-NEXT: cset w9, ne
+; CHECK-GI-NEXT: and w0, w8, w9
+; CHECK-GI-NEXT: ret
entry:
%c0 = icmp ne i32 %s0, %s1
%c1 = icmp ne i32 %s2, %s3
@@ -291,21 +291,21 @@ entry:
}
define i32 @and_ne_ult(i32 %s0, i32 %s1, i32 %s2, i32 %s3) {
-; SDISEL-LABEL: and_ne_ult:
-; SDISEL: // %bb.0: // %entry
-; SDISEL-NEXT: cmp w0, w1
-; SDISEL-NEXT: ccmp w2, w3, #2, ne
-; SDISEL-NEXT: cset w0, lo
-; SDISEL-NEXT: ret
-;
-; GISEL-LABEL: and_ne_ult:
-; GISEL: // %bb.0: // %entry
-; GISEL-NEXT: cmp w0, w1
-; GISEL-NEXT: cset w8, ne
-; GISEL-NEXT: cmp w2, w3
-; GISEL-NEXT: cset w9, lo
-; GISEL-NEXT: and w0, w8, w9
-; GISEL-NEXT: ret
+; CHECK-SD-LABEL: and_ne_ult:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: cmp w0, w1
+; CHECK-SD-NEXT: ccmp w2, w3, #2, ne
+; CHECK-SD-NEXT: cset w0, lo
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: and_ne_ult:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: cmp w0, w1
+; CHECK-GI-NEXT: cset w8, ne
+; CHECK-GI-NEXT: cmp w2, w3
+; CHECK-GI-NEXT: cset w9, lo
+; CHECK-GI-NEXT: and w0, w8, w9
+; CHECK-GI-NEXT: ret
entry:
%c0 = icmp ne i32 %s0, %s1
%c1 = icmp ult i32 %s2, %s3
@@ -315,21 +315,21 @@ entry:
}
define i32 @and_ne_ule(i32 %s0, i32 %s1, i32 %s2, i32 %s3) {
-; SDISEL-LABEL: and_ne_ule:
-; SDISEL: // %bb.0: // %entry
-; SDISEL-NEXT: cmp w0, w1
-; SDISEL-NEXT: ccmp w2, w3, #2, ne
-; SDISEL-NEXT: cset w0, ls
-; SDISEL-NEXT: ret
-;
-; GISEL-LABEL: and_ne_ule:
-; GISEL: // %bb.0: // %entry
-; GISEL-NEXT: cmp w0, w1
-; GISEL-NEXT: cset w8, ne
-; GISEL-NEXT: cmp w2, w3
-; GISEL-NEXT: cset w9, ls
-; GISEL-NEXT: and w0, w8, w9
-; GISEL-NEXT: ret
+; CHECK-SD-LABEL: and_ne_ule:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: cmp w0, w1
+; CHECK-SD-NEXT: ccmp w2, w3, #2, ne
+; CHECK-SD-NEXT: cset w0, ls
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: and_ne_ule:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: cmp w0, w1
+; CHECK-GI-NEXT: cset w8, ne
+; CHECK-GI-NEXT: cmp w2, w3
+; CHECK-GI-NEXT: cset w9, ls
+; CHECK-GI-NEXT: and w0, w8, w9
+; CHECK-GI-NEXT: ret
entry:
%c0 = icmp ne i32 %s0, %s1
%c1 = icmp ule i32 %s2, %s3
@@ -339,21 +339,21 @@ entry:
}
define i32 @and_ne_ugt(i32 %s0, i32 %s1, i32 %s2, i32 %s3) {
-; SDISEL-LABEL: and_ne_ugt:
-; SDISEL: // %bb.0: // %entry
-; SDISEL-NEXT: cmp w0, w1
-; SDISEL-NEXT: ccmp w2, w3, #0, ne
-; SDISEL-NEXT: cset w0, hi
-; SDISEL-NEXT: ret
-;
-; GISEL-LABEL: and_ne_ugt:
-; GISEL: // %bb.0: // %entry
-; GISEL-NEXT: cmp w0, w1
-; GISEL-NEXT: cset w8, ne
-; GISEL-NEXT: cmp w2, w3
-; GISEL-NEXT: cset w9, hi
-; GISEL-NEXT: and w0, w8, w9
-; GISEL-NEXT: ret
+; CHECK-SD-LABEL: and_ne_ugt:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: cmp w0, w1
+; CHECK-SD-NEXT: ccmp w2, w3, #0, ne
+; CHECK-SD-NEXT: cset w0, hi
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: and_ne_ugt:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: cmp w0, w1
+; CHECK-GI-NEXT: cset w8, ne
+; CHECK-GI-NEXT: cmp w2, w3
+; CHECK-GI-NEXT: cset w9, hi
+; CHECK-GI-NEXT: and w0, w8, w9
+; CHECK-GI-NEXT: ret
entry:
%c0 = icmp ne i32 %s0, %s1
%c1 = icmp ugt i32 %s2, %s3
@@ -363,21 +363,21 @@ entry:
}
define i32 @and_ne_uge(i32 %s0, i32 %s1, i32 %s2, i32 %s3) {
-; SDISEL-LABEL: and_ne_uge:
-; SDISEL: // %bb.0: // %entry
-; SDISEL-NEXT: cmp w0, w1
-; SDISEL-NEXT: ccmp w2, w3, #0, ne
-; SDISEL-NEXT: cset w0, hs
-; SDISEL-NEXT: ret
-;
-; GISEL-LABEL: and_ne_uge:
-; GISEL: // %bb.0: // %entry
-; GISEL-NEXT: cmp w0, w1
-; GISEL-NEXT: cset w8, ne
-; GISEL-NEXT: cmp w2, w3
-; GISEL-NEXT: cset w9, hs
-; GISEL-NEXT: and w0, w8, w9
-; GISEL-NEXT: ret
+; CHECK-SD-LABEL: and_ne_uge:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: cmp w0, w1
+; CHECK-SD-NEXT: ccmp w2, w3, #0, ne
+; CHECK-SD-NEXT: cset w0, hs
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: and_ne_uge:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: cmp w0, w1
+; CHECK-GI-NEXT: cset w8, ne
+; CHECK-GI-NEXT: cmp w2, w3
+; CHECK-GI-NEXT: cset w9, hs
+; CHECK-GI-NEXT: and w0, w8, w9
+; CHECK-GI-NEXT: ret
entry:
%c0 = icmp ne i32 %s0, %s1
%c1 = icmp uge i32 %s2, %s3
@@ -387,21 +387,21 @@ entry:
}
define i32 @and_ne_slt(i32 %s0, i32 %s1, i32 %s2, i32 %s3) {
-; SDISEL-LABEL: and_ne_slt:
-; SDISEL: // %bb.0: // %entry
-; SDISEL-NEXT: cmp w0, w1
-; SDISEL-NEXT: ccmp w2, w3, #0, ne
-; SDISEL-NEXT: cset w0, lt
-; SDISEL-NEXT: ret
-;
-; GISEL-LABEL: and_ne_slt:
-; GISEL: // %bb.0: // %entry
-; GISEL-NEXT: cmp w0, w1
-; GISEL-NEXT: cset w8, ne
-; GISEL-NEXT: cmp w2, w3
-; GISEL-NEXT: cset w9, lt
-; GISEL-NEXT: and w0, w8, w9
-; GISEL-NEXT: ret
+; CHECK-SD-LABEL: and_ne_slt:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: cmp w0, w1
+; CHECK-SD-NEXT: ccmp w2, w3, #0, ne
+; CHECK-SD-NEXT: cset w0, lt
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: and_ne_slt:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: cmp w0, w1
+; CHECK-GI-NEXT: cset w8, ne
+; CHECK-GI-NEXT: cmp w2, w3
+; CHECK-GI-NEXT: cset w9, lt
+; CHECK-GI-NEXT: and w0, w8, w9
+; CHECK-GI-NEXT: ret
entry:
%c0 = icmp ne i32 %s0, %s1
%c1 = icmp slt i32 %s2, %s3
@@ -411,21 +411,21 @@ entry:
}
define i32 @and_ne_sle(i32 %s0, i32 %s1, i32 %s2, i32 %s3) {
-; SDISEL-LABEL: and_ne_sle:
-; SDISEL: // %bb.0: // %entry
-; SDISEL-NEXT: cmp w0, w1
-; SDISEL-NEXT: ccmp w2, w3, #0, ne
-; SDISEL-NEXT: cset w0, le
-; SDISEL-NEXT: ret
-;
-; GISEL-LABEL: and_ne_sle:
-; GISEL: // %bb.0: // %entry
-; GISEL-NEXT: cmp w0, w1
-; GISEL-NEXT: cset w8, ne
-; GISEL-NEXT: cmp w2, w3
-; GISEL-NEXT: cset w9, le
-; GISEL-NEXT: and w0, w8, w9
-; GISEL-NEXT: ret
+; CHECK-SD-LABEL: and_ne_sle:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: cmp w0, w1
+; CHECK-SD-NEXT: ccmp w2, w3, #0, ne
+; CHECK-SD-NEXT: cset w0, le
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: and_ne_sle:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: cmp w0, w1
+; CHECK-GI-NEXT: cset w8, ne
+; CHECK-GI-NEXT: cmp w2, w3
+; CHECK-GI-NEXT: cset w9, le
+; CHECK-GI-NEXT: and w0, w8, w9
+; CHECK-GI-NEXT: ret
entry:
%c0 = icmp ne i32 %s0, %s1
%c1 = icmp sle i32 %s2, %s3
@@ -435,21 +435,21 @@ entry:
}
define i32 @and_ne_sgt(i32 %s0, i32 %s1, i32 %s2, i32 %s3) {
-; SDISEL-LABEL: and_ne_sgt:
-; SDISEL: // %bb.0: // %entry
-; SDISEL-NEXT: cmp w0, w1
-; SDISEL-NEXT: ccmp w2, w3, #4, ne
-; SDISEL-NEXT: cset w0, gt
-; SDISEL-NEXT: ret
-;
-; GISEL-LABEL: and_ne_sgt:
-; GISEL: // %bb.0: // %entry
-; GISEL-NEXT: cmp w0, w1
-; GISEL-NEXT: cset w8, ne
-; GISEL-NEXT: cmp w2, w3
-; GISEL-NEXT: cset w9, gt
-; GISEL-NEXT: and w0, w8, w9
-; GISEL-NEXT: ret
+; CHECK-SD-LABEL: and_ne_sgt:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: cmp w0, w1
+; CHECK-SD-NEXT: ccmp w2, w3, #4, ne
+; CHECK-SD-NEXT: cset w0, gt
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: and_ne_sgt:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: cmp w0, w1
+; CHECK-GI-NEXT: cset w8, ne
+; CHECK-GI-NEXT: cmp w2, w3
+; CHECK-GI-NEXT: cset w9, gt
+; CHECK-GI-NEXT: and w0, w8, w9
+; CHECK-GI-NEXT: ret
entry:
%c0 = icmp ne i32 %s0, %s1
%c1 = icmp sgt i32 %s2, %s3
@@ -459,21 +459,21 @@ entry:
}
define i32 @and_ne_sge(i32 %s0, i32 %s1, i32 %s2, i32 %s3) {
-; SDISEL-LABEL: and_ne_sge:
-; SDISEL: // %bb.0: // %entry
-; SDISEL-NEXT: cmp w0, w1
-; SDISEL-NEXT: ccmp w2, w3, #8, ne
-; SDISEL-NEXT: cset w0, ge
-; SDISEL-NEXT: ret
-;
-; GISEL-LABEL: and_ne_sge:
-; GISEL: // %bb.0: // %entry
-; GISEL-NEXT: cmp w0, w1
-; GISEL-NEXT: cset w8, ne
-; GISEL-NEXT: cmp w2, w3
-; GISEL-NEXT: cset w9, ge
-; GISEL-NEXT: and w0, w8, w9
-; GISEL-NEXT: ret
+; CHECK-SD-LABEL: and_ne_sge:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: cmp w0, w1
+; CHECK-SD-NEXT: ccmp w2, w3, #8, ne
+; CHECK-SD-NEXT: cset w0, ge
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: and_ne_sge:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: cmp w0, w1
+; CHECK-GI-NEXT: cset w8, ne
+; CHECK-GI-NEXT: cmp w2, w3
+; CHECK-GI-NEXT: cset w9, ge
+; CHECK-GI-NEXT: and w0, w8, w9
+; CHECK-GI-NEXT: ret
entry:
%c0 = icmp ne i32 %s0, %s1
%c1 = icmp sge i32 %s2, %s3
@@ -483,21 +483,21 @@ entry:
}
define i32 @and_ult_eq(i32 %s0, i32 %s1, i32 %s2, i32 %s3) {
-; SDISEL-LABEL: and_ult_eq:
-; SDISEL: // %bb.0: // %entry
-; SDISEL-NEXT: cmp w0, w1
-; SDISEL-NEXT: ccmp w2, w3, #0, lo
-; SDISEL-NEXT: cset w0, eq
-; SDISEL-NEXT: ret
-;
-; GISEL-LABEL: and_ult_eq:
-; GISEL: // %bb.0: // %entry
-; GISEL-NEXT: cmp w0, w1
-; GISEL-NEXT: cset w8, lo
-; GISEL-NEXT: cmp w2, w3
-; GISEL-NEXT: cset w9, eq
-; GISEL-NEXT: and w0, w8, w9
-; GISEL-NEXT: ret
+; CHECK-SD-LABEL: and_ult_eq:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: cmp w0, w1
+; CHECK-SD-NEXT: ccmp w2, w3, #0, lo
+; CHECK-SD-NEXT: cset w0, eq
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: and_ult_eq:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: cmp w0, w1
+; CHECK-GI-NEXT: cset w8, lo
+; CHECK-GI-NEXT: cmp w2, w3
+; CHECK-GI-NEXT: cset w9, eq
+; CHECK-GI-NEXT: and w0, w8, w9
+; CHECK-GI-NEXT: ret
entry:
%c0 = icmp ult i32 %s0, %s1
%c1 = icmp eq i32 %s2, %s3
@@ -507,21 +507,21 @@ entry:
}
define i32 @and_ult_ne(i32 %s0, i32 %s1, i32 %s2, i32 %s3) {
-; SDISEL-LABEL: and_ult_ne:
-; SDISEL: // %bb.0: // %entry
-; SDISEL-NEXT: cmp w0, w1
-; SDISEL-NEXT: ccmp w2, w3, #4, lo
-; SDISEL-NEXT: cset w0, ne
-; SDISEL-NEXT: ret
-;
-; GISEL-LABEL: and_ult_ne:
-; GISEL: // %bb.0: // %entry
-; GISEL-NEXT: cmp w0, w1
-; GISEL-NEXT: cset w8, lo
-; GISEL-NEXT: cmp w2, w3
-; GISEL-NEXT: cset w9, ne
-; GISEL-NEXT: and w0, w8, w9
-; GISEL-NEXT: ret
+; CHECK-SD-LABEL: and_ult_ne:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: cmp w0, w1
+; CHECK-SD-NEXT: ccmp w2, w3, #4, lo
+; CHECK-SD-NEXT: cset w0, ne
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: and_ult_ne:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: cmp w0, w1
+; CHECK-GI-NEXT: cset w8, lo
+; CHECK-GI-NEXT: cmp w2, w3
+; CHECK-GI-NEXT: cset w9, ne
+; CHECK-GI-NEXT: and w0, w8, w9
+; CHECK-GI-NEXT: ret
entry:
%c0 = icmp ult i32 %s0, %s1
%c1 = icmp ne i32 %s2, %s3
@@ -531,21 +531,21 @@ entry:
}
define i32 @and_ult_ult(i32 %s0, i32 %s1, i32 %s2, i32 %s3) {
-; SDISEL-LABEL: and_ult_ult:
-; SDISEL: // %bb.0: // %entry
-; SDISEL-NEXT: cmp w0, w1
-; SDISEL-NEXT: ccmp w2, w3, #2, lo
-; SDISEL-NEXT: cset w0, lo
-; SDISEL-NEXT: ret
-;
-; GISEL-LABEL: and_ult_ult:
-; GISEL: // %bb.0: // %entry
-; GISEL-NEXT: cmp w0, w1
-; GISEL-NEXT: cset w8, lo
-; GISEL-NEXT: cmp w2, w3
-; GISEL-NEXT: cset w9, lo
-; GISEL-NEXT: and w0, w8, w9
-; GISEL-NEXT: ret
+; CHECK-SD-LABEL: and_ult_ult:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: cmp w0, w1
+; CHECK-SD-NEXT: ccmp w2, w3, #2, lo
+; CHECK-SD-NEXT: cset w0, lo
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: and_ult_ult:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: cmp w0, w1
+; CHECK-GI-NEXT: cset w8, lo
+; CHECK-GI-NEXT: cmp w2, w3
+; CHECK-GI-NEXT: cset w9, lo
+; CHECK-GI-NEXT: and w0, w8, w9
+; CHECK-GI-NEXT: ret
entry:
%c0 = icmp ult i32 %s0, %s1
%c1 = icmp ult i32 %s2, %s3
@@ -555,21 +555,21 @@ entry:
}
define i32 @and_ult_ule(i32 %s0, i32 %s1, i32 %s2, i32 %s3) {
-; SDISEL-LABEL: and_ult_ule:
-; SDISEL: // %bb.0: // %entry
-; SDISEL-NEXT: cmp w0, w1
-; SDISEL-NEXT: ccmp w2, w3, #2, lo
-; SDISEL-NEXT: cset w0, ls
-; SDISEL-NEXT: ret
-;
-; GISEL-LABEL: and_ult_ule:
-; GISEL: // %bb.0: // %entry
-; GISEL-NEXT: cmp w0, w1
-; GISEL-NEXT: cset w8, lo
-; GISEL-NEXT: cmp w2, w3
-; GISEL-NEXT: cset w9, ls
-; GISEL-NEXT: and w0, w8, w9
-; GISEL-NEXT: ret
+; CHECK-SD-LABEL: and_ult_ule:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: cmp w0, w1
+; CHECK-SD-NEXT: ccmp w2, w3, #2, lo
+; CHECK-SD-NEXT: cset w0, ls
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: and_ult_ule:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: cmp w0, w1
+; CHECK-GI-NEXT: cset w8, lo
+; CHECK-GI-NEXT: cmp w2, w3
+; CHECK-GI-NEXT: cset w9, ls
+; CHECK-GI-NEXT: and w0, w8, w9
+; CHECK-GI-NEXT: ret
entry:
%c0 = icmp ult i32 %s0, %s1
%c1 = icmp ule i32 %s2, %s3
@@ -579,21 +579,21 @@ entry:
}
define i32 @and_ult_ugt(i32 %s0, i32 %s1, i32 %s2, i32 %s3) {
-; SDISEL-LABEL: and_ult_ugt:
-; SDISEL: // %bb.0: // %entry
-; SDISEL-NEXT: cmp w0, w1
-; SDISEL-NEXT: ccmp w2, w3, #0, lo
-; SDISEL-NEXT: cset w0, hi
-; SDISEL-NEXT: ret
-;
-; GISEL-LABEL: and_ult_ugt:
-; GISEL: // %bb.0: // %entry
-; GISEL-NEXT: cmp w0, w1
-; GISEL-NEXT: cset w8, lo
-; GISEL-NEXT: cmp w2, w3
-; GISEL-NEXT: cset w9, hi
-; GISEL-NEXT: and w0, w8, w9
-; GISEL-NEXT: ret
+; CHECK-SD-LABEL: and_ult_ugt:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: cmp w0, w1
+; CHECK-SD-NEXT: ccmp w2, w3, #0, lo
+; CHECK-SD-NEXT: cset w0, hi
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: and_ult_ugt:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: cmp w0, w1
+; CHECK-GI-NEXT: cset w8, lo
+; CHECK-GI-NEXT: cmp w2, w3
+; CHECK-GI-NEXT: cset w9, hi
+; CHECK-GI-NEXT: and w0, w8, w9
+; CHECK-GI-NEXT: ret
entry:
%c0 = icmp ult i32 %s0, %s1
%c1 = icmp ugt i32 %s2, %s3
@@ -603,21 +603,21 @@ entry:
}
define i32 @and_ult_uge(i32 %s0, i32 %s1, i32 %s2, i32 %s3) {
-; SDISEL-LABEL: and_ult_uge:
-; SDISEL: // %bb.0: // %entry
-; SDISEL-NEXT: cmp w0, w1
-; SDISEL-NEXT: ccmp w2, w3, #0, lo
-; SDISEL-NEXT: cset w0, hs
-; SDISEL-NEXT: ret
-;
-; GISEL-LABEL: and_ult_uge:
-; GISEL: // %bb.0: // %entry
-; GISEL-NEXT: cmp w0, w1
-; GISEL-NEXT: cset w8, lo
-; GISEL-NEXT: cmp w2, w3
-; GISEL-NEXT: cset w9, hs
-; GISEL-NEXT: and w0, w8, w9
-; GISEL-NEXT: ret
+; CHECK-SD-LABEL: and_ult_uge:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: cmp w0, w1
+; CHECK-SD-NEXT: ccmp w2, w3, #0, lo
+; CHECK-SD-NEXT: cset w0, hs
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: and_ult_uge:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: cmp w0, w1
+; CHECK-GI-NEXT: cset w8, lo
+; CHECK-GI-NEXT: cmp w2, w3
+; CHECK-GI-NEXT: cset w9, hs
+; CHECK-GI-NEXT: and w0, w8, w9
+; CHECK-GI-NEXT: ret
entry:
%c0 = icmp ult i32 %s0, %s1
%c1 = icmp uge i32 %s2, %s3
@@ -627,21 +627,21 @@ entry:
}
define i32 @and_ult_slt(i32 %s0, i32 %s1, i32 %s2, i32 %s3) {
-; SDISEL-LABEL: and_ult_slt:
-; SDISEL: // %bb.0: // %entry
-; SDISEL-NEXT: cmp w0, w1
-; SDISEL-NEXT: ccmp w2, w3, #0, lo
-; SDISEL-NEXT: cset w0, lt
-; SDISEL-NEXT: ret
-;
-; GISEL-LABEL: and_ult_slt:
-; GISEL: // %bb.0: // %entry
-; GISEL-NEXT: cmp w0, w1
-; GISEL-NEXT: cset w8, lo
-; GISEL-NEXT: cmp w2, w3
-; GISEL-NEXT: cset w9, lt
-; GISEL-NEXT: and w0, w8, w9
-; GISEL-NEXT: ret
+; CHECK-SD-LABEL: and_ult_slt:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: cmp w0, w1
+; CHECK-SD-NEXT: ccmp w2, w3, #0, lo
+; CHECK-SD-NEXT: cset w0, lt
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: and_ult_slt:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: cmp w0, w1
+; CHECK-GI-NEXT: cset w8, lo
+; CHECK-GI-NEXT: cmp w2, w3
+; CHECK-GI-NEXT: cset w9, lt
+; CHECK-GI-NEXT: and w0, w8, w9
+; CHECK-GI-NEXT: ret
entry:
%c0 = icmp ult i32 %s0, %s1
%c1 = icmp slt i32 %s2, %s3
@@ -651,21 +651,21 @@ entry:
}
define i32 @and_ult_sle(i32 %s0, i32 %s1, i32 %s2, i32 %s3) {
-; SDISEL-LABEL: and_ult_sle:
-; SDISEL: // %bb.0: // %entry
-; SDISEL-NEXT: cmp w0, w1
-; SDISEL-NEXT: ccmp w2, w3, #0, lo
-; SDISEL-NEXT: cset w0, le
-; SDISEL-NEXT: ret
-;
-; GISEL-LABEL: and_ult_sle:
-; GISEL: // %bb.0: // %entry
-; GISEL-NEXT: cmp w0, w1
-; GISEL-NEXT: cset w8, lo
-; GISEL-NEXT: cmp w2, w3
-; GISEL-NEXT: cset w9, le
-; GISEL-NEXT: and w0, w8, w9
-; GISEL-NEXT: ret
+; CHECK-SD-LABEL: and_ult_sle:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: cmp w0, w1
+; CHECK-SD-NEXT: ccmp w2, w3, #0, lo
+; CHECK-SD-NEXT: cset w0, le
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: and_ult_sle:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: cmp w0, w1
+; CHECK-GI-NEXT: cset w8, lo
+; CHECK-GI-NEXT: cmp w2, w3
+; CHECK-GI-NEXT: cset w9, le
+; CHECK-GI-NEXT: and w0, w8, w9
+; CHECK-GI-NEXT: ret
entry:
%c0 = icmp ult i32 %s0, %s1
%c1 = icmp sle i32 %s2, %s3
@@ -675,21 +675,21 @@ entry:
}
define i32 @and_ult_sgt(i32 %s0, i32 %s1, i32 %s2, i32 %s3) {
-; SDISEL-LABEL: and_ult_sgt:
-; SDISEL: // %bb.0: // %entry
-; SDISEL-NEXT: cmp w0, w1
-; SDISEL-NEXT: ccmp w2, w3, #4, lo
-; SDISEL-NEXT: cset w0, gt
-; SDISEL-NEXT: ret
-;
-; GISEL-LABEL: and_ult_sgt:
-; GISEL: // %bb.0: // %entry
-; GISEL-NEXT: cmp w0, w1
-; GISEL-NEXT: cset w8, lo
-; GISEL-NEXT: cmp w2, w3
-; GISEL-NEXT: cset w9, gt
-; GISEL-NEXT: and w0, w8, w9
-; GISEL-NEXT: ret
+; CHECK-SD-LABEL: and_ult_sgt:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: cmp w0, w1
+; CHECK-SD-NEXT: ccmp w2, w3, #4, lo
+; CHECK-SD-NEXT: cset w0, gt
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: and_ult_sgt:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: cmp w0, w1
+; CHECK-GI-NEXT: cset w8, lo
+; CHECK-GI-NEXT: cmp w2, w3
+; CHECK-GI-NEXT: cset w9, gt
+; CHECK-GI-NEXT: and w0, w8, w9
+; CHECK-GI-NEXT: ret
entry:
%c0 = icmp ult i32 %s0, %s1
%c1 = icmp sgt i32 %s2, %s3
@@ -699,21 +699,21 @@ entry:
}
define i32 @and_ult_sge(i32 %s0, i32 %s1, i32 %s2, i32 %s3) {
-; SDISEL-LABEL: and_ult_sge:
-; SDISEL: // %bb.0: // %entry
-; SDISEL-NEXT: cmp w0, w1
-; SDISEL-NEXT: ccmp w2, w3, #8, lo
-; SDISEL-NEXT: cset w0, ge
-; SDISEL-NEXT: ret
-;
-; GISEL-LABEL: and_ult_sge:
-; GISEL: // %bb.0: // %entry
-; GISEL-NEXT: cmp w0, w1
-; GISEL-NEXT: cset w8, lo
-; GISEL-NEXT: cmp w2, w3
-; GISEL-NEXT: cset w9, ge
-; GISEL-NEXT: and w0, w8, w9
-; GISEL-NEXT: ret
+; CHECK-SD-LABEL: and_ult_sge:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: cmp w0, w1
+; CHECK-SD-NEXT: ccmp w2, w3, #8, lo
+; CHECK-SD-NEXT: cset w0, ge
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: and_ult_sge:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: cmp w0, w1
+; CHECK-GI-NEXT: cset w8, lo
+; CHECK-GI-NEXT: cmp w2, w3
+; CHECK-GI-NEXT: cset w9, ge
+; CHECK-GI-NEXT: and w0, w8, w9
+; CHECK-GI-NEXT: ret
entry:
%c0 = icmp ult i32 %s0, %s1
%c1 = icmp sge i32 %s2, %s3
@@ -723,21 +723,21 @@ entry:
}
define i32 @and_ule_eq(i32 %s0, i32 %s1, i32 %s2, i32 %s3) {
-; SDISEL-LABEL: and_ule_eq:
-; SDISEL: // %bb.0: // %entry
-; SDISEL-NEXT: cmp w0, w1
-; SDISEL-NEXT: ccmp w2, w3, #0, ls
-; SDISEL-NEXT: cset w0, eq
-; SDISEL-NEXT: ret
-;
-; GISEL-LABEL: and_ule_eq:
-; GISEL: // %bb.0: // %entry
-; GISEL-NEXT: cmp w0, w1
-; GISEL-NEXT: cset w8, ls
-; GISEL-NEXT: cmp w2, w3
-; GISEL-NEXT: cset w9, eq
-; GISEL-NEXT: and w0, w8, w9
-; GISEL-NEXT: ret
+; CHECK-SD-LABEL: and_ule_eq:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: cmp w0, w1
+; CHECK-SD-NEXT: ccmp w2, w3, #0, ls
+; CHECK-SD-NEXT: cset w0, eq
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: and_ule_eq:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: cmp w0, w1
+; CHECK-GI-NEXT: cset w8, ls
+; CHECK-GI-NEXT: cmp w2, w3
+; CHECK-GI-NEXT: cset w9, eq
+; CHECK-GI-NEXT: and w0, w8, w9
+; CHECK-GI-NEXT: ret
entry:
%c0 = icmp ule i32 %s0, %s1
%c1 = icmp eq i32 %s2, %s3
@@ -747,21 +747,21 @@ entry:
}
define i32 @and_ule_ne(i32 %s0, i32 %s1, i32 %s2, i32 %s3) {
-; SDISEL-LABEL: and_ule_ne:
-; SDISEL: // %bb.0: // %entry
-; SDISEL-NEXT: cmp w0, w1
-; SDISEL-NEXT: ccmp w2, w3, #4, ls
-; SDISEL-NEXT: cset w0, ne
-; SDISEL-NEXT: ret
-;
-; GISEL-LABEL: and_ule_ne:
-; GISEL: // %bb.0: // %entry
-; GISEL-NEXT: cmp w0, w1
-; GISEL-NEXT: cset w8, ls
-; GISEL-NEXT: cmp w2, w3
-; GISEL-NEXT: cset w9, ne
-; GISEL-NEXT: and w0, w8, w9
-; GISEL-NEXT: ret
+; CHECK-SD-LABEL: and_ule_ne:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: cmp w0, w1
+; CHECK-SD-NEXT: ccmp w2, w3, #4, ls
+; CHECK-SD-NEXT: cset w0, ne
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: and_ule_ne:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: cmp w0, w1
+; CHECK-GI-NEXT: cset w8, ls
+; CHECK-GI-NEXT: cmp w2, w3
+; CHECK-GI-NEXT: cset w9, ne
+; CHECK-GI-NEXT: and w0, w8, w9
+; CHECK-GI-NEXT: ret
entry:
%c0 = icmp ule i32 %s0, %s1
%c1 = icmp ne i32 %s2, %s3
@@ -771,21 +771,21 @@ entry:
}
define i32 @and_ule_ult(i32 %s0, i32 %s1, i32 %s2, i32 %s3) {
-; SDISEL-LABEL: and_ule_ult:
-; SDISEL: // %bb.0: // %entry
-; SDISEL-NEXT: cmp w0, w1
-; SDISEL-NEXT: ccmp w2, w3, #2, ls
-; SDISEL-NEXT: cset w0, lo
-; SDISEL-NEXT: ret
-;
-; GISEL-LABEL: and_ule_ult:
-; GISEL: // %bb.0: // %entry
-; GISEL-NEXT: cmp w0, w1
-; GISEL-NEXT: cset w8, ls
-; GISEL-NEXT: cmp w2, w3
-; GISEL-NEXT: cset w9, lo
-; GISEL-NEXT: and w0, w8, w9
-; GISEL-NEXT: ret
+; CHECK-SD-LABEL: and_ule_ult:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: cmp w0, w1
+; CHECK-SD-NEXT: ccmp w2, w3, #2, ls
+; CHECK-SD-NEXT: cset w0, lo
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: and_ule_ult:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: cmp w0, w1
+; CHECK-GI-NEXT: cset w8, ls
+; CHECK-GI-NEXT: cmp w2, w3
+; CHECK-GI-NEXT: cset w9, lo
+; CHECK-GI-NEXT: and w0, w8, w9
+; CHECK-GI-NEXT: ret
entry:
%c0 = icmp ule i32 %s0, %s1
%c1 = icmp ult i32 %s2, %s3
@@ -795,21 +795,21 @@ entry:
}
define i32 @and_ule_ule(i32 %s0, i32 %s1, i32 %s2, i32 %s3) {
-; SDISEL-LABEL: and_ule_ule:
-; SDISEL: // %bb.0: // %entry
-; SDISEL-NEXT: cmp w0, w1
-; SDISEL-NEXT: ccmp w2, w3, #2, ls
-; SDISEL-NEXT: cset w0, ls
-; SDISEL-NEXT: ret
-;
-; GISEL-LABEL: and_ule_ule:
-; GISEL: // %bb.0: // %entry
-; GISEL-NEXT: cmp w0, w1
-; GISEL-NEXT: cset w8, ls
-; GISEL-NEXT: cmp w2, w3
-; GISEL-NEXT: cset w9, ls
-; GISEL-NEXT: and w0, w8, w9
-; GISEL-NEXT: ret
+; CHECK-SD-LABEL: and_ule_ule:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: cmp w0, w1
+; CHECK-SD-NEXT: ccmp w2, w3, #2, ls
+; CHECK-SD-NEXT: cset w0, ls
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: and_ule_ule:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: cmp w0, w1
+; CHECK-GI-NEXT: cset w8, ls
+; CHECK-GI-NEXT: cmp w2, w3
+; CHECK-GI-NEXT: cset w9, ls
+; CHECK-GI-NEXT: and w0, w8, w9
+; CHECK-GI-NEXT: ret
entry:
%c0 = icmp ule i32 %s0, %s1
%c1 = icmp ule i32 %s2, %s3
@@ -819,21 +819,21 @@ entry:
}
define i32 @and_ule_ugt(i32 %s0, i32 %s1, i32 %s2, i32 %s3) {
-; SDISEL-LABEL: and_ule_ugt:
-; SDISEL: // %bb.0: // %entry
-; SDISEL-NEXT: cmp w0, w1
-; SDISEL-NEXT: ccmp w2, w3, #0, ls
-; SDISEL-NEXT: cset w0, hi
-; SDISEL-NEXT: ret
-;
-; GISEL-LABEL: and_ule_ugt:
-; GISEL: // %bb.0: // %entry
-; GISEL-NEXT: cmp w0, w1
-; GISEL-NEXT: cset w8, ls
-; GISEL-NEXT: cmp w2, w3
-; GISEL-NEXT: cset w9, hi
-; GISEL-NEXT: and w0, w8, w9
-; GISEL-NEXT: ret
+; CHECK-SD-LABEL: and_ule_ugt:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: cmp w0, w1
+; CHECK-SD-NEXT: ccmp w2, w3, #0, ls
+; CHECK-SD-NEXT: cset w0, hi
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: and_ule_ugt:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: cmp w0, w1
+; CHECK-GI-NEXT: cset w8, ls
+; CHECK-GI-NEXT: cmp w2, w3
+; CHECK-GI-NEXT: cset w9, hi
+; CHECK-GI-NEXT: and w0, w8, w9
+; CHECK-GI-NEXT: ret
entry:
%c0 = icmp ule i32 %s0, %s1
%c1 = icmp ugt i32 %s2, %s3
@@ -843,21 +843,21 @@ entry:
}
define i32 @and_ule_uge(i32 %s0, i32 %s1, i32 %s2, i32 %s3) {
-; SDISEL-LABEL: and_ule_uge:
-; SDISEL: // %bb.0: // %entry
-; SDISEL-NEXT: cmp w0, w1
-; SDISEL-NEXT: ccmp w2, w3, #0, ls
-; SDISEL-NEXT: cset w0, hs
-; SDISEL-NEXT: ret
-;
-; GISEL-LABEL: and_ule_uge:
-; GISEL: // %bb.0: // %entry
-; GISEL-NEXT: cmp w0, w1
-; GISEL-NEXT: cset w8, ls
-; GISEL-NEXT: cmp w2, w3
-; GISEL-NEXT: cset w9, hs
-; GISEL-NEXT: and w0, w8, w9
-; GISEL-NEXT: ret
+; CHECK-SD-LABEL: and_ule_uge:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: cmp w0, w1
+; CHECK-SD-NEXT: ccmp w2, w3, #0, ls
+; CHECK-SD-NEXT: cset w0, hs
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: and_ule_uge:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: cmp w0, w1
+; CHECK-GI-NEXT: cset w8, ls
+; CHECK-GI-NEXT: cmp w2, w3
+; CHECK-GI-NEXT: cset w9, hs
+; CHECK-GI-NEXT: and w0, w8, w9
+; CHECK-GI-NEXT: ret
entry:
%c0 = icmp ule i32 %s0, %s1
%c1 = icmp uge i32 %s2, %s3
@@ -867,21 +867,21 @@ entry:
}
define i32 @and_ule_slt(i32 %s0, i32 %s1, i32 %s2, i32 %s3) {
-; SDISEL-LABEL: and_ule_slt:
-; SDISEL: // %bb.0: // %entry
-; SDISEL-NEXT: cmp w0, w1
-; SDISEL-NEXT: ccmp w2, w3, #0, ls
-; SDISEL-NEXT: cset w0, lt
-; SDISEL-NEXT: ret
-;
-; GISEL-LABEL: and_ule_slt:
-; GISEL: // %bb.0: // %entry
-; GISEL-NEXT: cmp w0, w1
-; GISEL-NEXT: cset w8, ls
-; GISEL-NEXT: cmp w2, w3
-; GISEL-NEXT: cset w9, lt
-; GISEL-NEXT: and w0, w8, w9
-; GISEL-NEXT: ret
+; CHECK-SD-LABEL: and_ule_slt:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: cmp w0, w1
+; CHECK-SD-NEXT: ccmp w2, w3, #0, ls
+; CHECK-SD-NEXT: cset w0, lt
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: and_ule_slt:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: cmp w0, w1
+; CHECK-GI-NEXT: cset w8, ls
+; CHECK-GI-NEXT: cmp w2, w3
+; CHECK-GI-NEXT: cset w9, lt
+; CHECK-GI-NEXT: and w0, w8, w9
+; CHECK-GI-NEXT: ret
entry:
%c0 = icmp ule i32 %s0, %s1
%c1 = icmp slt i32 %s2, %s3
@@ -891,21 +891,21 @@ entry:
}
define i32 @and_ule_sle(i32 %s0, i32 %s1, i32 %s2, i32 %s3) {
-; SDISEL-LABEL: and_ule_sle:
-; SDISEL: // %bb.0: // %entry
-; SDISEL-NEXT: cmp w0, w1
-; SDISEL-NEXT: ccmp w2, w3, #0, ls
-; SDISEL-NEXT: cset w0, le
-; SDISEL-NEXT: ret
-;
-; GISEL-LABEL: and_ule_sle:
-; GISEL: // %bb.0: // %entry
-; GISEL-NEXT: cmp w0, w1
-; GISEL-NEXT: cset w8, ls
-; GISEL-NEXT: cmp w2, w3
-; GISEL-NEXT: cset w9, le
-; GISEL-NEXT: and w0, w8, w9
-; GISEL-NEXT: ret
+; CHECK-SD-LABEL: and_ule_sle:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: cmp w0, w1
+; CHECK-SD-NEXT: ccmp w2, w3, #0, ls
+; CHECK-SD-NEXT: cset w0, le
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: and_ule_sle:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: cmp w0, w1
+; CHECK-GI-NEXT: cset w8, ls
+; CHECK-GI-NEXT: cmp w2, w3
+; CHECK-GI-NEXT: cset w9, le
+; CHECK-GI-NEXT: and w0, w8, w9
+; CHECK-GI-NEXT: ret
entry:
%c0 = icmp ule i32 %s0, %s1
%c1 = icmp sle i32 %s2, %s3
@@ -915,21 +915,21 @@ entry:
}
define i32 @and_ule_sgt(i32 %s0, i32 %s1, i32 %s2, i32 %s3) {
-; SDISEL-LABEL: and_ule_sgt:
-; SDISEL: // %bb.0: // %entry
-; SDISEL-NEXT: cmp w0, w1
-; SDISEL-NEXT: ccmp w2, w3, #4, ls
-; SDISEL-NEXT: cset w0, gt
-; SDISEL-NEXT: ret
-;
-; GISEL-LABEL: and_ule_sgt:
-; GISEL: // %bb.0: // %entry
-; GISEL-NEXT: cmp w0, w1
-; GISEL-NEXT: cset w8, ls
-; GISEL-NEXT: cmp w2, w3
-; GISEL-NEXT: cset w9, gt
-; GISEL-NEXT: and w0, w8, w9
-; GISEL-NEXT: ret
+; CHECK-SD-LABEL: and_ule_sgt:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: cmp w0, w1
+; CHECK-SD-NEXT: ccmp w2, w3, #4, ls
+; CHECK-SD-NEXT: cset w0, gt
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: and_ule_sgt:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: cmp w0, w1
+; CHECK-GI-NEXT: cset w8, ls
+; CHECK-GI-NEXT: cmp w2, w3
+; CHECK-GI-NEXT: cset w9, gt
+; CHECK-GI-NEXT: and w0, w8, w9
+; CHECK-GI-NEXT: ret
entry:
%c0 = icmp ule i32 %s0, %s1
%c1 = icmp sgt i32 %s2, %s3
@@ -939,21 +939,21 @@ entry:
}
define i32 @and_ule_sge(i32 %s0, i32 %s1, i32 %s2, i32 %s3) {
-; SDISEL-LABEL: and_ule_sge:
-; SDISEL: // %bb.0: // %entry
-; SDISEL-NEXT: cmp w0, w1
-; SDISEL-NEXT: ccmp w2, w3, #8, ls
-; SDISEL-NEXT: cset w0, ge
-; SDISEL-NEXT: ret
-;
-; GISEL-LABEL: and_ule_sge:
-; GISEL: // %bb.0: // %entry
-; GISEL-NEXT: cmp w0, w1
-; GISEL-NEXT: cset w8, ls
-; GISEL-NEXT: cmp w2, w3
-; GISEL-NEXT: cset w9, ge
-; GISEL-NEXT: and w0, w8, w9
-; GISEL-NEXT: ret
+; CHECK-SD-LABEL: and_ule_sge:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: cmp w0, w1
+; CHECK-SD-NEXT: ccmp w2, w3, #8, ls
+; CHECK-SD-NEXT: cset w0, ge
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: and_ule_sge:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: cmp w0, w1
+; CHECK-GI-NEXT: cset w8, ls
+; CHECK-GI-NEXT: cmp w2, w3
+; CHECK-GI-NEXT: cset w9, ge
+; CHECK-GI-NEXT: and w0, w8, w9
+; CHECK-GI-NEXT: ret
entry:
%c0 = icmp ule i32 %s0, %s1
%c1 = icmp sge i32 %s2, %s3
@@ -963,21 +963,21 @@ entry:
}
define i32 @and_ugt_eq(i32 %s0, i32 %s1, i32 %s2, i32 %s3) {
-; SDISEL-LABEL: and_ugt_eq:
-; SDISEL: // %bb.0: // %entry
-; SDISEL-NEXT: cmp w0, w1
-; SDISEL-NEXT: ccmp w2, w3, #0, hi
-; SDISEL-NEXT: cset w0, eq
-; SDISEL-NEXT: ret
-;
-; GISEL-LABEL: and_ugt_eq:
-; GISEL: // %bb.0: // %entry
-; GISEL-NEXT: cmp w0, w1
-; GISEL-NEXT: cset w8, hi
-; GISEL-NEXT: cmp w2, w3
-; GISEL-NEXT: cset w9, eq
-; GISEL-NEXT: and w0, w8, w9
-; GISEL-NEXT: ret
+; CHECK-SD-LABEL: and_ugt_eq:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: cmp w0, w1
+; CHECK-SD-NEXT: ccmp w2, w3, #0, hi
+; CHECK-SD-NEXT: cset w0, eq
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: and_ugt_eq:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: cmp w0, w1
+; CHECK-GI-NEXT: cset w8, hi
+; CHECK-GI-NEXT: cmp w2, w3
+; CHECK-GI-NEXT: cset w9, eq
+; CHECK-GI-NEXT: and w0, w8, w9
+; CHECK-GI-NEXT: ret
entry:
%c0 = icmp ugt i32 %s0, %s1
%c1 = icmp eq i32 %s2, %s3
@@ -987,21 +987,21 @@ entry:
}
define i32 @and_ugt_ne(i32 %s0, i32 %s1, i32 %s2, i32 %s3) {
-; SDISEL-LABEL: and_ugt_ne:
-; SDISEL: // %bb.0: // %entry
-; SDISEL-NEXT: cmp w0, w1
-; SDISEL-NEXT: ccmp w2, w3, #4, hi
-; SDISEL-NEXT: cset w0, ne
-; SDISEL-NEXT: ret
-;
-; GISEL-LABEL: and_ugt_ne:
-; GISEL: // %bb.0: // %entry
-; GISEL-NEXT: cmp w0, w1
-; GISEL-NEXT: cset w8, hi
-; GISEL-NEXT: cmp w2, w3
-; GISEL-NEXT: cset w9, ne
-; GISEL-NEXT: and w0, w8, w9
-; GISEL-NEXT: ret
+; CHECK-SD-LABEL: and_ugt_ne:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: cmp w0, w1
+; CHECK-SD-NEXT: ccmp w2, w3, #4, hi
+; CHECK-SD-NEXT: cset w0, ne
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: and_ugt_ne:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: cmp w0, w1
+; CHECK-GI-NEXT: cset w8, hi
+; CHECK-GI-NEXT: cmp w2, w3
+; CHECK-GI-NEXT: cset w9, ne
+; CHECK-GI-NEXT: and w0, w8, w9
+; CHECK-GI-NEXT: ret
entry:
%c0 = icmp ugt i32 %s0, %s1
%c1 = icmp ne i32 %s2, %s3
@@ -1011,21 +1011,21 @@ entry:
}
define i32 @and_ugt_ult(i32 %s0, i32 %s1, i32 %s2, i32 %s3) {
-; SDISEL-LABEL: and_ugt_ult:
-; SDISEL: // %bb.0: // %entry
-; SDISEL-NEXT: cmp w0, w1
-; SDISEL-NEXT: ccmp w2, w3, #2, hi
-; SDISEL-NEXT: cset w0, lo
-; SDISEL-NEXT: ret
-;
-; GISEL-LABEL: and_ugt_ult:
-; GISEL: // %bb.0: // %entry
-; GISEL-NEXT: cmp w0, w1
-; GISEL-NEXT: cset w8, hi
-; GISEL-NEXT: cmp w2, w3
-; GISEL-NEXT: cset w9, lo
-; GISEL-NEXT: and w0, w8, w9
-; GISEL-NEXT: ret
+; CHECK-SD-LABEL: and_ugt_ult:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: cmp w0, w1
+; CHECK-SD-NEXT: ccmp w2, w3, #2, hi
+; CHECK-SD-NEXT: cset w0, lo
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: and_ugt_ult:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: cmp w0, w1
+; CHECK-GI-NEXT: cset w8, hi
+; CHECK-GI-NEXT: cmp w2, w3
+; CHECK-GI-NEXT: cset w9, lo
+; CHECK-GI-NEXT: and w0, w8, w9
+; CHECK-GI-NEXT: ret
entry:
%c0 = icmp ugt i32 %s0, %s1
%c1 = icmp ult i32 %s2, %s3
@@ -1035,21 +1035,21 @@ entry:
}
define i32 @and_ugt_ule(i32 %s0, i32 %s1, i32 %s2, i32 %s3) {
-; SDISEL-LABEL: and_ugt_ule:
-; SDISEL: // %bb.0: // %entry
-; SDISEL-NEXT: cmp w0, w1
-; SDISEL-NEXT: ccmp w2, w3, #2, hi
-; SDISEL-NEXT: cset w0, ls
-; SDISEL-NEXT: ret
-;
-; GISEL-LABEL: and_ugt_ule:
-; GISEL: // %bb.0: // %entry
-; GISEL-NEXT: cmp w0, w1
-; GISEL-NEXT: cset w8, hi
-; GISEL-NEXT: cmp w2, w3
-; GISEL-NEXT: cset w9, ls
-; GISEL-NEXT: and w0, w8, w9
-; GISEL-NEXT: ret
+; CHECK-SD-LABEL: and_ugt_ule:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: cmp w0, w1
+; CHECK-SD-NEXT: ccmp w2, w3, #2, hi
+; CHECK-SD-NEXT: cset w0, ls
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: and_ugt_ule:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: cmp w0, w1
+; CHECK-GI-NEXT: cset w8, hi
+; CHECK-GI-NEXT: cmp w2, w3
+; CHECK-GI-NEXT: cset w9, ls
+; CHECK-GI-NEXT: and w0, w8, w9
+; CHECK-GI-NEXT: ret
entry:
%c0 = icmp ugt i32 %s0, %s1
%c1 = icmp ule i32 %s2, %s3
@@ -1059,21 +1059,21 @@ entry:
}
define i32 @and_ugt_ugt(i32 %s0, i32 %s1, i32 %s2, i32 %s3) {
-; SDISEL-LABEL: and_ugt_ugt:
-; SDISEL: // %bb.0: // %entry
-; SDISEL-NEXT: cmp w0, w1
-; SDISEL-NEXT: ccmp w2, w3, #0, hi
-; SDISEL-NEXT: cset w0, hi
-; SDISEL-NEXT: ret
-;
-; GISEL-LABEL: and_ugt_ugt:
-; GISEL: // %bb.0: // %entry
-; GISEL-NEXT: cmp w0, w1
-; GISEL-NEXT: cset w8, hi
-; GISEL-NEXT: cmp w2, w3
-; GISEL-NEXT: cset w9, hi
-; GISEL-NEXT: and w0, w8, w9
-; GISEL-NEXT: ret
+; CHECK-SD-LABEL: and_ugt_ugt:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: cmp w0, w1
+; CHECK-SD-NEXT: ccmp w2, w3, #0, hi
+; CHECK-SD-NEXT: cset w0, hi
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: and_ugt_ugt:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: cmp w0, w1
+; CHECK-GI-NEXT: cset w8, hi
+; CHECK-GI-NEXT: cmp w2, w3
+; CHECK-GI-NEXT: cset w9, hi
+; CHECK-GI-NEXT: and w0, w8, w9
+; CHECK-GI-NEXT: ret
entry:
%c0 = icmp ugt i32 %s0, %s1
%c1 = icmp ugt i32 %s2, %s3
@@ -1083,21 +1083,21 @@ entry:
}
define i32 @and_ugt_uge(i32 %s0, i32 %s1, i32 %s2, i32 %s3) {
-; SDISEL-LABEL: and_ugt_uge:
-; SDISEL: // %bb.0: // %entry
-; SDISEL-NEXT: cmp w0, w1
-; SDISEL-NEXT: ccmp w2, w3, #0, hi
-; SDISEL-NEXT: cset w0, hs
-; SDISEL-NEXT: ret
-;
-; GISEL-LABEL: and_ugt_uge:
-; GISEL: // %bb.0: // %entry
-; GISEL-NEXT: cmp w0, w1
-; GISEL-NEXT: cset w8, hi
-; GISEL-NEXT: cmp w2, w3
-; GISEL-NEXT: cset w9, hs
-; GISEL-NEXT: and w0, w8, w9
-; GISEL-NEXT: ret
+; CHECK-SD-LABEL: and_ugt_uge:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: cmp w0, w1
+; CHECK-SD-NEXT: ccmp w2, w3, #0, hi
+; CHECK-SD-NEXT: cset w0, hs
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: and_ugt_uge:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: cmp w0, w1
+; CHECK-GI-NEXT: cset w8, hi
+; CHECK-GI-NEXT: cmp w2, w3
+; CHECK-GI-NEXT: cset w9, hs
+; CHECK-GI-NEXT: and w0, w8, w9
+; CHECK-GI-NEXT: ret
entry:
%c0 = icmp ugt i32 %s0, %s1
%c1 = icmp uge i32 %s2, %s3
@@ -1107,21 +1107,21 @@ entry:
}
define i32 @and_ugt_slt(i32 %s0, i32 %s1, i32 %s2, i32 %s3) {
-; SDISEL-LABEL: and_ugt_slt:
-; SDISEL: // %bb.0: // %entry
-; SDISEL-NEXT: cmp w0, w1
-; SDISEL-NEXT: ccmp w2, w3, #0, hi
-; SDISEL-NEXT: cset w0, lt
-; SDISEL-NEXT: ret
-;
-; GISEL-LABEL: and_ugt_slt:
-; GISEL: // %bb.0: // %entry
-; GISEL-NEXT: cmp w0, w1
-; GISEL-NEXT: cset w8, hi
-; GISEL-NEXT: cmp w2, w3
-; GISEL-NEXT: cset w9, lt
-; GISEL-NEXT: and w0, w8, w9
-; GISEL-NEXT: ret
+; CHECK-SD-LABEL: and_ugt_slt:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: cmp w0, w1
+; CHECK-SD-NEXT: ccmp w2, w3, #0, hi
+; CHECK-SD-NEXT: cset w0, lt
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: and_ugt_slt:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: cmp w0, w1
+; CHECK-GI-NEXT: cset w8, hi
+; CHECK-GI-NEXT: cmp w2, w3
+; CHECK-GI-NEXT: cset w9, lt
+; CHECK-GI-NEXT: and w0, w8, w9
+; CHECK-GI-NEXT: ret
entry:
%c0 = icmp ugt i32 %s0, %s1
%c1 = icmp slt i32 %s2, %s3
@@ -1131,21 +1131,21 @@ entry:
}
define i32 @and_ugt_sle(i32 %s0, i32 %s1, i32 %s2, i32 %s3) {
-; SDISEL-LABEL: and_ugt_sle:
-; SDISEL: // %bb.0: // %entry
-; SDISEL-NEXT: cmp w0, w1
-; SDISEL-NEXT: ccmp w2, w3, #0, hi
-; SDISEL-NEXT: cset w0, le
-; SDISEL-NEXT: ret
-;
-; GISEL-LABEL: and_ugt_sle:
-; GISEL: // %bb.0: // %entry
-; GISEL-NEXT: cmp w0, w1
-; GISEL-NEXT: cset w8, hi
-; GISEL-NEXT: cmp w2, w3
-; GISEL-NEXT: cset w9, le
-; GISEL-NEXT: and w0, w8, w9
-; GISEL-NEXT: ret
+; CHECK-SD-LABEL: and_ugt_sle:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: cmp w0, w1
+; CHECK-SD-NEXT: ccmp w2, w3, #0, hi
+; CHECK-SD-NEXT: cset w0, le
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: and_ugt_sle:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: cmp w0, w1
+; CHECK-GI-NEXT: cset w8, hi
+; CHECK-GI-NEXT: cmp w2, w3
+; CHECK-GI-NEXT: cset w9, le
+; CHECK-GI-NEXT: and w0, w8, w9
+; CHECK-GI-NEXT: ret
entry:
%c0 = icmp ugt i32 %s0, %s1
%c1 = icmp sle i32 %s2, %s3
@@ -1155,21 +1155,21 @@ entry:
}
define i32 @and_ugt_sgt(i32 %s0, i32 %s1, i32 %s2, i32 %s3) {
-; SDISEL-LABEL: and_ugt_sgt:
-; SDISEL: // %bb.0: // %entry
-; SDISEL-NEXT: cmp w0, w1
-; SDISEL-NEXT: ccmp w2, w3, #4, hi
-; SDISEL-NEXT: cset w0, gt
-; SDISEL-NEXT: ret
-;
-; GISEL-LABEL: and_ugt_sgt:
-; GISEL: // %bb.0: // %entry
-; GISEL-NEXT: cmp w0, w1
-; GISEL-NEXT: cset w8, hi
-; GISEL-NEXT: cmp w2, w3
-; GISEL-NEXT: cset w9, gt
-; GISEL-NEXT: and w0, w8, w9
-; GISEL-NEXT: ret
+; CHECK-SD-LABEL: and_ugt_sgt:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: cmp w0, w1
+; CHECK-SD-NEXT: ccmp w2, w3, #4, hi
+; CHECK-SD-NEXT: cset w0, gt
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: and_ugt_sgt:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: cmp w0, w1
+; CHECK-GI-NEXT: cset w8, hi
+; CHECK-GI-NEXT: cmp w2, w3
+; CHECK-GI-NEXT: cset w9, gt
+; CHECK-GI-NEXT: and w0, w8, w9
+; CHECK-GI-NEXT: ret
entry:
%c0 = icmp ugt i32 %s0, %s1
%c1 = icmp sgt i32 %s2, %s3
@@ -1179,21 +1179,21 @@ entry:
}
define i32 @and_ugt_sge(i32 %s0, i32 %s1, i32 %s2, i32 %s3) {
-; SDISEL-LABEL: and_ugt_sge:
-; SDISEL: // %bb.0: // %entry
-; SDISEL-NEXT: cmp w0, w1
-; SDISEL-NEXT: ccmp w2, w3, #8, hi
-; SDISEL-NEXT: cset w0, ge
-; SDISEL-NEXT: ret
-;
-; GISEL-LABEL: and_ugt_sge:
-; GISEL: // %bb.0: // %entry
-; GISEL-NEXT: cmp w0, w1
-; GISEL-NEXT: cset w8, hi
-; GISEL-NEXT: cmp w2, w3
-; GISEL-NEXT: cset w9, ge
-; GISEL-NEXT: and w0, w8, w9
-; GISEL-NEXT: ret
+; CHECK-SD-LABEL: and_ugt_sge:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: cmp w0, w1
+; CHECK-SD-NEXT: ccmp w2, w3, #8, hi
+; CHECK-SD-NEXT: cset w0, ge
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: and_ugt_sge:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: cmp w0, w1
+; CHECK-GI-NEXT: cset w8, hi
+; CHECK-GI-NEXT: cmp w2, w3
+; CHECK-GI-NEXT: cset w9, ge
+; CHECK-GI-NEXT: and w0, w8, w9
+; CHECK-GI-NEXT: ret
entry:
%c0 = icmp ugt i32 %s0, %s1
%c1 = icmp sge i32 %s2, %s3
@@ -1203,21 +1203,21 @@ entry:
}
define i32 @and_uge_eq(i32 %s0, i32 %s1, i32 %s2, i32 %s3) {
-; SDISEL-LABEL: and_uge_eq:
-; SDISEL: // %bb.0: // %entry
-; SDISEL-NEXT: cmp w0, w1
-; SDISEL-NEXT: ccmp w2, w3, #0, hs
-; SDISEL-NEXT: cset w0, eq
-; SDISEL-NEXT: ret
-;
-; GISEL-LABEL: and_uge_eq:
-; GISEL: // %bb.0: // %entry
-; GISEL-NEXT: cmp w0, w1
-; GISEL-NEXT: cset w8, hs
-; GISEL-NEXT: cmp w2, w3
-; GISEL-NEXT: cset w9, eq
-; GISEL-NEXT: and w0, w8, w9
-; GISEL-NEXT: ret
+; CHECK-SD-LABEL: and_uge_eq:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: cmp w0, w1
+; CHECK-SD-NEXT: ccmp w2, w3, #0, hs
+; CHECK-SD-NEXT: cset w0, eq
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: and_uge_eq:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: cmp w0, w1
+; CHECK-GI-NEXT: cset w8, hs
+; CHECK-GI-NEXT: cmp w2, w3
+; CHECK-GI-NEXT: cset w9, eq
+; CHECK-GI-NEXT: and w0, w8, w9
+; CHECK-GI-NEXT: ret
entry:
%c0 = icmp uge i32 %s0, %s1
%c1 = icmp eq i32 %s2, %s3
@@ -1227,21 +1227,21 @@ entry:
}
define i32 @and_uge_ne(i32 %s0, i32 %s1, i32 %s2, i32 %s3) {
-; SDISEL-LABEL: and_uge_ne:
-; SDISEL: // %bb.0: // %entry
-; SDISEL-NEXT: cmp w0, w1
-; SDISEL-NEXT: ccmp w2, w3, #4, hs
-; SDISEL-NEXT: cset w0, ne
-; SDISEL-NEXT: ret
-;
-; GISEL-LABEL: and_uge_ne:
-; GISEL: // %bb.0: // %entry
-; GISEL-NEXT: cmp w0, w1
-; GISEL-NEXT: cset w8, hs
-; GISEL-NEXT: cmp w2, w3
-; GISEL-NEXT: cset w9, ne
-; GISEL-NEXT: and w0, w8, w9
-; GISEL-NEXT: ret
+; CHECK-SD-LABEL: and_uge_ne:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: cmp w0, w1
+; CHECK-SD-NEXT: ccmp w2, w3, #4, hs
+; CHECK-SD-NEXT: cset w0, ne
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: and_uge_ne:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: cmp w0, w1
+; CHECK-GI-NEXT: cset w8, hs
+; CHECK-GI-NEXT: cmp w2, w3
+; CHECK-GI-NEXT: cset w9, ne
+; CHECK-GI-NEXT: and w0, w8, w9
+; CHECK-GI-NEXT: ret
entry:
%c0 = icmp uge i32 %s0, %s1
%c1 = icmp ne i32 %s2, %s3
@@ -1251,21 +1251,21 @@ entry:
}
define i32 @and_uge_ult(i32 %s0, i32 %s1, i32 %s2, i32 %s3) {
-; SDISEL-LABEL: and_uge_ult:
-; SDISEL: // %bb.0: // %entry
-; SDISEL-NEXT: cmp w0, w1
-; SDISEL-NEXT: ccmp w2, w3, #2, hs
-; SDISEL-NEXT: cset w0, lo
-; SDISEL-NEXT: ret
-;
-; GISEL-LABEL: and_uge_ult:
-; GISEL: // %bb.0: // %entry
-; GISEL-NEXT: cmp w0, w1
-; GISEL-NEXT: cset w8, hs
-; GISEL-NEXT: cmp w2, w3
-; GISEL-NEXT: cset w9, lo
-; GISEL-NEXT: and w0, w8, w9
-; GISEL-NEXT: ret
+; CHECK-SD-LABEL: and_uge_ult:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: cmp w0, w1
+; CHECK-SD-NEXT: ccmp w2, w3, #2, hs
+; CHECK-SD-NEXT: cset w0, lo
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: and_uge_ult:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: cmp w0, w1
+; CHECK-GI-NEXT: cset w8, hs
+; CHECK-GI-NEXT: cmp w2, w3
+; CHECK-GI-NEXT: cset w9, lo
+; CHECK-GI-NEXT: and w0, w8, w9
+; CHECK-GI-NEXT: ret
entry:
%c0 = icmp uge i32 %s0, %s1
%c1 = icmp ult i32 %s2, %s3
@@ -1275,21 +1275,21 @@ entry:
}
define i32 @and_uge_ule(i32 %s0, i32 %s1, i32 %s2, i32 %s3) {
-; SDISEL-LABEL: and_uge_ule:
-; SDISEL: // %bb.0: // %entry
-; SDISEL-NEXT: cmp w0, w1
-; SDISEL-NEXT: ccmp w2, w3, #2, hs
-; SDISEL-NEXT: cset w0, ls
-; SDISEL-NEXT: ret
-;
-; GISEL-LABEL: and_uge_ule:
-; GISEL: // %bb.0: // %entry
-; GISEL-NEXT: cmp w0, w1
-; GISEL-NEXT: cset w8, hs
-; GISEL-NEXT: cmp w2, w3
-; GISEL-NEXT: cset w9, ls
-; GISEL-NEXT: and w0, w8, w9
-; GISEL-NEXT: ret
+; CHECK-SD-LABEL: and_uge_ule:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: cmp w0, w1
+; CHECK-SD-NEXT: ccmp w2, w3, #2, hs
+; CHECK-SD-NEXT: cset w0, ls
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: and_uge_ule:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: cmp w0, w1
+; CHECK-GI-NEXT: cset w8, hs
+; CHECK-GI-NEXT: cmp w2, w3
+; CHECK-GI-NEXT: cset w9, ls
+; CHECK-GI-NEXT: and w0, w8, w9
+; CHECK-GI-NEXT: ret
entry:
%c0 = icmp uge i32 %s0, %s1
%c1 = icmp ule i32 %s2, %s3
@@ -1299,21 +1299,21 @@ entry:
}
define i32 @and_uge_ugt(i32 %s0, i32 %s1, i32 %s2, i32 %s3) {
-; SDISEL-LABEL: and_uge_ugt:
-; SDISEL: // %bb.0: // %entry
-; SDISEL-NEXT: cmp w0, w1
-; SDISEL-NEXT: ccmp w2, w3, #0, hs
-; SDISEL-NEXT: cset w0, hi
-; SDISEL-NEXT: ret
-;
-; GISEL-LABEL: and_uge_ugt:
-; GISEL: // %bb.0: // %entry
-; GISEL-NEXT: cmp w0, w1
-; GISEL-NEXT: cset w8, hs
-; GISEL-NEXT: cmp w2, w3
-; GISEL-NEXT: cset w9, hi
-; GISEL-NEXT: and w0, w8, w9
-; GISEL-NEXT: ret
+; CHECK-SD-LABEL: and_uge_ugt:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: cmp w0, w1
+; CHECK-SD-NEXT: ccmp w2, w3, #0, hs
+; CHECK-SD-NEXT: cset w0, hi
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: and_uge_ugt:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: cmp w0, w1
+; CHECK-GI-NEXT: cset w8, hs
+; CHECK-GI-NEXT: cmp w2, w3
+; CHECK-GI-NEXT: cset w9, hi
+; CHECK-GI-NEXT: and w0, w8, w9
+; CHECK-GI-NEXT: ret
entry:
%c0 = icmp uge i32 %s0, %s1
%c1 = icmp ugt i32 %s2, %s3
@@ -1323,21 +1323,21 @@ entry:
}
define i32 @and_uge_uge(i32 %s0, i32 %s1, i32 %s2, i32 %s3) {
-; SDISEL-LABEL: and_uge_uge:
-; SDISEL: // %bb.0: // %entry
-; SDISEL-NEXT: cmp w0, w1
-; SDISEL-NEXT: ccmp w2, w3, #0, hs
-; SDISEL-NEXT: cset w0, hs
-; SDISEL-NEXT: ret
-;
-; GISEL-LABEL: and_uge_uge:
-; GISEL: // %bb.0: // %entry
-; GISEL-NEXT: cmp w0, w1
-; GISEL-NEXT: cset w8, hs
-; GISEL-NEXT: cmp w2, w3
-; GISEL-NEXT: cset w9, hs
-; GISEL-NEXT: and w0, w8, w9
-; GISEL-NEXT: ret
+; CHECK-SD-LABEL: and_uge_uge:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: cmp w0, w1
+; CHECK-SD-NEXT: ccmp w2, w3, #0, hs
+; CHECK-SD-NEXT: cset w0, hs
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: and_uge_uge:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: cmp w0, w1
+; CHECK-GI-NEXT: cset w8, hs
+; CHECK-GI-NEXT: cmp w2, w3
+; CHECK-GI-NEXT: cset w9, hs
+; CHECK-GI-NEXT: and w0, w8, w9
+; CHECK-GI-NEXT: ret
entry:
%c0 = icmp uge i32 %s0, %s1
%c1 = icmp uge i32 %s2, %s3
@@ -1347,21 +1347,21 @@ entry:
}
define i32 @and_uge_slt(i32 %s0, i32 %s1, i32 %s2, i32 %s3) {
-; SDISEL-LABEL: and_uge_slt:
-; SDISEL: // %bb.0: // %entry
-; SDISEL-NEXT: cmp w0, w1
-; SDISEL-NEXT: ccmp w2, w3, #0, hs
-; SDISEL-NEXT: cset w0, lt
-; SDISEL-NEXT: ret
-;
-; GISEL-LABEL: and_uge_slt:
-; GISEL: // %bb.0: // %entry
-; GISEL-NEXT: cmp w0, w1
-; GISEL-NEXT: cset w8, hs
-; GISEL-NEXT: cmp w2, w3
-; GISEL-NEXT: cset w9, lt
-; GISEL-NEXT: and w0, w8, w9
-; GISEL-NEXT: ret
+; CHECK-SD-LABEL: and_uge_slt:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: cmp w0, w1
+; CHECK-SD-NEXT: ccmp w2, w3, #0, hs
+; CHECK-SD-NEXT: cset w0, lt
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: and_uge_slt:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: cmp w0, w1
+; CHECK-GI-NEXT: cset w8, hs
+; CHECK-GI-NEXT: cmp w2, w3
+; CHECK-GI-NEXT: cset w9, lt
+; CHECK-GI-NEXT: and w0, w8, w9
+; CHECK-GI-NEXT: ret
entry:
%c0 = icmp uge i32 %s0, %s1
%c1 = icmp slt i32 %s2, %s3
@@ -1371,21 +1371,21 @@ entry:
}
define i32 @and_uge_sle(i32 %s0, i32 %s1, i32 %s2, i32 %s3) {
-; SDISEL-LABEL: and_uge_sle:
-; SDISEL: // %bb.0: // %entry
-; SDISEL-NEXT: cmp w0, w1
-; SDISEL-NEXT: ccmp w2, w3, #0, hs
-; SDISEL-NEXT: cset w0, le
-; SDISEL-NEXT: ret
-;
-; GISEL-LABEL: and_uge_sle:
-; GISEL: // %bb.0: // %entry
-; GISEL-NEXT: cmp w0, w1
-; GISEL-NEXT: cset w8, hs
-; GISEL-NEXT: cmp w2, w3
-; GISEL-NEXT: cset w9, le
-; GISEL-NEXT: and w0, w8, w9
-; GISEL-NEXT: ret
+; CHECK-SD-LABEL: and_uge_sle:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: cmp w0, w1
+; CHECK-SD-NEXT: ccmp w2, w3, #0, hs
+; CHECK-SD-NEXT: cset w0, le
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: and_uge_sle:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: cmp w0, w1
+; CHECK-GI-NEXT: cset w8, hs
+; CHECK-GI-NEXT: cmp w2, w3
+; CHECK-GI-NEXT: cset w9, le
+; CHECK-GI-NEXT: and w0, w8, w9
+; CHECK-GI-NEXT: ret
entry:
%c0 = icmp uge i32 %s0, %s1
%c1 = icmp sle i32 %s2, %s3
@@ -1395,21 +1395,21 @@ entry:
}
define i32 @and_uge_sgt(i32 %s0, i32 %s1, i32 %s2, i32 %s3) {
-; SDISEL-LABEL: and_uge_sgt:
-; SDISEL: // %bb.0: // %entry
-; SDISEL-NEXT: cmp w0, w1
-; SDISEL-NEXT: ccmp w2, w3, #4, hs
-; SDISEL-NEXT: cset w0, gt
-; SDISEL-NEXT: ret
-;
-; GISEL-LABEL: and_uge_sgt:
-; GISEL: // %bb.0: // %entry
-; GISEL-NEXT: cmp w0, w1
-; GISEL-NEXT: cset w8, hs
-; GISEL-NEXT: cmp w2, w3
-; GISEL-NEXT: cset w9, gt
-; GISEL-NEXT: and w0, w8, w9
-; GISEL-NEXT: ret
+; CHECK-SD-LABEL: and_uge_sgt:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: cmp w0, w1
+; CHECK-SD-NEXT: ccmp w2, w3, #4, hs
+; CHECK-SD-NEXT: cset w0, gt
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: and_uge_sgt:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: cmp w0, w1
+; CHECK-GI-NEXT: cset w8, hs
+; CHECK-GI-NEXT: cmp w2, w3
+; CHECK-GI-NEXT: cset w9, gt
+; CHECK-GI-NEXT: and w0, w8, w9
+; CHECK-GI-NEXT: ret
entry:
%c0 = icmp uge i32 %s0, %s1
%c1 = icmp sgt i32 %s2, %s3
@@ -1419,21 +1419,21 @@ entry:
}
define i32 @and_uge_sge(i32 %s0, i32 %s1, i32 %s2, i32 %s3) {
-; SDISEL-LABEL: and_uge_sge:
-; SDISEL: // %bb.0: // %entry
-; SDISEL-NEXT: cmp w0, w1
-; SDISEL-NEXT: ccmp w2, w3, #8, hs
-; SDISEL-NEXT: cset w0, ge
-; SDISEL-NEXT: ret
-;
-; GISEL-LABEL: and_uge_sge:
-; GISEL: // %bb.0: // %entry
-; GISEL-NEXT: cmp w0, w1
-; GISEL-NEXT: cset w8, hs
-; GISEL-NEXT: cmp w2, w3
-; GISEL-NEXT: cset w9, ge
-; GISEL-NEXT: and w0, w8, w9
-; GISEL-NEXT: ret
+; CHECK-SD-LABEL: and_uge_sge:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: cmp w0, w1
+; CHECK-SD-NEXT: ccmp w2, w3, #8, hs
+; CHECK-SD-NEXT: cset w0, ge
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: and_uge_sge:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: cmp w0, w1
+; CHECK-GI-NEXT: cset w8, hs
+; CHECK-GI-NEXT: cmp w2, w3
+; CHECK-GI-NEXT: cset w9, ge
+; CHECK-GI-NEXT: and w0, w8, w9
+; CHECK-GI-NEXT: ret
entry:
%c0 = icmp uge i32 %s0, %s1
%c1 = icmp sge i32 %s2, %s3
@@ -1443,21 +1443,21 @@ entry:
}
define i32 @and_slt_eq(i32 %s0, i32 %s1, i32 %s2, i32 %s3) {
-; SDISEL-LABEL: and_slt_eq:
-; SDISEL: // %bb.0: // %entry
-; SDISEL-NEXT: cmp w0, w1
-; SDISEL-NEXT: ccmp w2, w3, #0, lt
-; SDISEL-NEXT: cset w0, eq
-; SDISEL-NEXT: ret
-;
-; GISEL-LABEL: and_slt_eq:
-; GISEL: // %bb.0: // %entry
-; GISEL-NEXT: cmp w0, w1
-; GISEL-NEXT: cset w8, lt
-; GISEL-NEXT: cmp w2, w3
-; GISEL-NEXT: cset w9, eq
-; GISEL-NEXT: and w0, w8, w9
-; GISEL-NEXT: ret
+; CHECK-SD-LABEL: and_slt_eq:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: cmp w0, w1
+; CHECK-SD-NEXT: ccmp w2, w3, #0, lt
+; CHECK-SD-NEXT: cset w0, eq
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: and_slt_eq:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: cmp w0, w1
+; CHECK-GI-NEXT: cset w8, lt
+; CHECK-GI-NEXT: cmp w2, w3
+; CHECK-GI-NEXT: cset w9, eq
+; CHECK-GI-NEXT: and w0, w8, w9
+; CHECK-GI-NEXT: ret
entry:
%c0 = icmp slt i32 %s0, %s1
%c1 = icmp eq i32 %s2, %s3
@@ -1467,21 +1467,21 @@ entry:
}
define i32 @and_slt_ne(i32 %s0, i32 %s1, i32 %s2, i32 %s3) {
-; SDISEL-LABEL: and_slt_ne:
-; SDISEL: // %bb.0: // %entry
-; SDISEL-NEXT: cmp w0, w1
-; SDISEL-NEXT: ccmp w2, w3, #4, lt
-; SDISEL-NEXT: cset w0, ne
-; SDISEL-NEXT: ret
-;
-; GISEL-LABEL: and_slt_ne:
-; GISEL: // %bb.0: // %entry
-; GISEL-NEXT: cmp w0, w1
-; GISEL-NEXT: cset w8, lt
-; GISEL-NEXT: cmp w2, w3
-; GISEL-NEXT: cset w9, ne
-; GISEL-NEXT: and w0, w8, w9
-; GISEL-NEXT: ret
+; CHECK-SD-LABEL: and_slt_ne:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: cmp w0, w1
+; CHECK-SD-NEXT: ccmp w2, w3, #4, lt
+; CHECK-SD-NEXT: cset w0, ne
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: and_slt_ne:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: cmp w0, w1
+; CHECK-GI-NEXT: cset w8, lt
+; CHECK-GI-NEXT: cmp w2, w3
+; CHECK-GI-NEXT: cset w9, ne
+; CHECK-GI-NEXT: and w0, w8, w9
+; CHECK-GI-NEXT: ret
entry:
%c0 = icmp slt i32 %s0, %s1
%c1 = icmp ne i32 %s2, %s3
@@ -1491,21 +1491,21 @@ entry:
}
define i32 @and_slt_ult(i32 %s0, i32 %s1, i32 %s2, i32 %s3) {
-; SDISEL-LABEL: and_slt_ult:
-; SDISEL: // %bb.0: // %entry
-; SDISEL-NEXT: cmp w0, w1
-; SDISEL-NEXT: ccmp w2, w3, #2, lt
-; SDISEL-NEXT: cset w0, lo
-; SDISEL-NEXT: ret
-;
-; GISEL-LABEL: and_slt_ult:
-; GISEL: // %bb.0: // %entry
-; GISEL-NEXT: cmp w0, w1
-; GISEL-NEXT: cset w8, lt
-; GISEL-NEXT: cmp w2, w3
-; GISEL-NEXT: cset w9, lo
-; GISEL-NEXT: and w0, w8, w9
-; GISEL-NEXT: ret
+; CHECK-SD-LABEL: and_slt_ult:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: cmp w0, w1
+; CHECK-SD-NEXT: ccmp w2, w3, #2, lt
+; CHECK-SD-NEXT: cset w0, lo
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: and_slt_ult:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: cmp w0, w1
+; CHECK-GI-NEXT: cset w8, lt
+; CHECK-GI-NEXT: cmp w2, w3
+; CHECK-GI-NEXT: cset w9, lo
+; CHECK-GI-NEXT: and w0, w8, w9
+; CHECK-GI-NEXT: ret
entry:
%c0 = icmp slt i32 %s0, %s1
%c1 = icmp ult i32 %s2, %s3
@@ -1515,21 +1515,21 @@ entry:
}
define i32 @and_slt_ule(i32 %s0, i32 %s1, i32 %s2, i32 %s3) {
-; SDISEL-LABEL: and_slt_ule:
-; SDISEL: // %bb.0: // %entry
-; SDISEL-NEXT: cmp w0, w1
-; SDISEL-NEXT: ccmp w2, w3, #2, lt
-; SDISEL-NEXT: cset w0, ls
-; SDISEL-NEXT: ret
-;
-; GISEL-LABEL: and_slt_ule:
-; GISEL: // %bb.0: // %entry
-; GISEL-NEXT: cmp w0, w1
-; GISEL-NEXT: cset w8, lt
-; GISEL-NEXT: cmp w2, w3
-; GISEL-NEXT: cset w9, ls
-; GISEL-NEXT: and w0, w8, w9
-; GISEL-NEXT: ret
+; CHECK-SD-LABEL: and_slt_ule:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: cmp w0, w1
+; CHECK-SD-NEXT: ccmp w2, w3, #2, lt
+; CHECK-SD-NEXT: cset w0, ls
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: and_slt_ule:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: cmp w0, w1
+; CHECK-GI-NEXT: cset w8, lt
+; CHECK-GI-NEXT: cmp w2, w3
+; CHECK-GI-NEXT: cset w9, ls
+; CHECK-GI-NEXT: and w0, w8, w9
+; CHECK-GI-NEXT: ret
entry:
%c0 = icmp slt i32 %s0, %s1
%c1 = icmp ule i32 %s2, %s3
@@ -1539,21 +1539,21 @@ entry:
}
define i32 @and_slt_ugt(i32 %s0, i32 %s1, i32 %s2, i32 %s3) {
-; SDISEL-LABEL: and_slt_ugt:
-; SDISEL: // %bb.0: // %entry
-; SDISEL-NEXT: cmp w0, w1
-; SDISEL-NEXT: ccmp w2, w3, #0, lt
-; SDISEL-NEXT: cset w0, hi
-; SDISEL-NEXT: ret
-;
-; GISEL-LABEL: and_slt_ugt:
-; GISEL: // %bb.0: // %entry
-; GISEL-NEXT: cmp w0, w1
-; GISEL-NEXT: cset w8, lt
-; GISEL-NEXT: cmp w2, w3
-; GISEL-NEXT: cset w9, hi
-; GISEL-NEXT: and w0, w8, w9
-; GISEL-NEXT: ret
+; CHECK-SD-LABEL: and_slt_ugt:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: cmp w0, w1
+; CHECK-SD-NEXT: ccmp w2, w3, #0, lt
+; CHECK-SD-NEXT: cset w0, hi
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: and_slt_ugt:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: cmp w0, w1
+; CHECK-GI-NEXT: cset w8, lt
+; CHECK-GI-NEXT: cmp w2, w3
+; CHECK-GI-NEXT: cset w9, hi
+; CHECK-GI-NEXT: and w0, w8, w9
+; CHECK-GI-NEXT: ret
entry:
%c0 = icmp slt i32 %s0, %s1
%c1 = icmp ugt i32 %s2, %s3
@@ -1563,21 +1563,21 @@ entry:
}
define i32 @and_slt_uge(i32 %s0, i32 %s1, i32 %s2, i32 %s3) {
-; SDISEL-LABEL: and_slt_uge:
-; SDISEL: // %bb.0: // %entry
-; SDISEL-NEXT: cmp w0, w1
-; SDISEL-NEXT: ccmp w2, w3, #0, lt
-; SDISEL-NEXT: cset w0, hs
-; SDISEL-NEXT: ret
-;
-; GISEL-LABEL: and_slt_uge:
-; GISEL: // %bb.0: // %entry
-; GISEL-NEXT: cmp w0, w1
-; GISEL-NEXT: cset w8, lt
-; GISEL-NEXT: cmp w2, w3
-; GISEL-NEXT: cset w9, hs
-; GISEL-NEXT: and w0, w8, w9
-; GISEL-NEXT: ret
+; CHECK-SD-LABEL: and_slt_uge:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: cmp w0, w1
+; CHECK-SD-NEXT: ccmp w2, w3, #0, lt
+; CHECK-SD-NEXT: cset w0, hs
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: and_slt_uge:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: cmp w0, w1
+; CHECK-GI-NEXT: cset w8, lt
+; CHECK-GI-NEXT: cmp w2, w3
+; CHECK-GI-NEXT: cset w9, hs
+; CHECK-GI-NEXT: and w0, w8, w9
+; CHECK-GI-NEXT: ret
entry:
%c0 = icmp slt i32 %s0, %s1
%c1 = icmp uge i32 %s2, %s3
@@ -1587,21 +1587,21 @@ entry:
}
define i32 @and_slt_slt(i32 %s0, i32 %s1, i32 %s2, i32 %s3) {
-; SDISEL-LABEL: and_slt_slt:
-; SDISEL: // %bb.0: // %entry
-; SDISEL-NEXT: cmp w0, w1
-; SDISEL-NEXT: ccmp w2, w3, #0, lt
-; SDISEL-NEXT: cset w0, lt
-; SDISEL-NEXT: ret
-;
-; GISEL-LABEL: and_slt_slt:
-; GISEL: // %bb.0: // %entry
-; GISEL-NEXT: cmp w0, w1
-; GISEL-NEXT: cset w8, lt
-; GISEL-NEXT: cmp w2, w3
-; GISEL-NEXT: cset w9, lt
-; GISEL-NEXT: and w0, w8, w9
-; GISEL-NEXT: ret
+; CHECK-SD-LABEL: and_slt_slt:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: cmp w0, w1
+; CHECK-SD-NEXT: ccmp w2, w3, #0, lt
+; CHECK-SD-NEXT: cset w0, lt
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: and_slt_slt:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: cmp w0, w1
+; CHECK-GI-NEXT: cset w8, lt
+; CHECK-GI-NEXT: cmp w2, w3
+; CHECK-GI-NEXT: cset w9, lt
+; CHECK-GI-NEXT: and w0, w8, w9
+; CHECK-GI-NEXT: ret
entry:
%c0 = icmp slt i32 %s0, %s1
%c1 = icmp slt i32 %s2, %s3
@@ -1611,21 +1611,21 @@ entry:
}
define i32 @and_slt_sle(i32 %s0, i32 %s1, i32 %s2, i32 %s3) {
-; SDISEL-LABEL: and_slt_sle:
-; SDISEL: // %bb.0: // %entry
-; SDISEL-NEXT: cmp w0, w1
-; SDISEL-NEXT: ccmp w2, w3, #0, lt
-; SDISEL-NEXT: cset w0, le
-; SDISEL-NEXT: ret
-;
-; GISEL-LABEL: and_slt_sle:
-; GISEL: // %bb.0: // %entry
-; GISEL-NEXT: cmp w0, w1
-; GISEL-NEXT: cset w8, lt
-; GISEL-NEXT: cmp w2, w3
-; GISEL-NEXT: cset w9, le
-; GISEL-NEXT: and w0, w8, w9
-; GISEL-NEXT: ret
+; CHECK-SD-LABEL: and_slt_sle:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: cmp w0, w1
+; CHECK-SD-NEXT: ccmp w2, w3, #0, lt
+; CHECK-SD-NEXT: cset w0, le
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: and_slt_sle:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: cmp w0, w1
+; CHECK-GI-NEXT: cset w8, lt
+; CHECK-GI-NEXT: cmp w2, w3
+; CHECK-GI-NEXT: cset w9, le
+; CHECK-GI-NEXT: and w0, w8, w9
+; CHECK-GI-NEXT: ret
entry:
%c0 = icmp slt i32 %s0, %s1
%c1 = icmp sle i32 %s2, %s3
@@ -1635,21 +1635,21 @@ entry:
}
define i32 @and_slt_sgt(i32 %s0, i32 %s1, i32 %s2, i32 %s3) {
-; SDISEL-LABEL: and_slt_sgt:
-; SDISEL: // %bb.0: // %entry
-; SDISEL-NEXT: cmp w0, w1
-; SDISEL-NEXT: ccmp w2, w3, #4, lt
-; SDISEL-NEXT: cset w0, gt
-; SDISEL-NEXT: ret
-;
-; GISEL-LABEL: and_slt_sgt:
-; GISEL: // %bb.0: // %entry
-; GISEL-NEXT: cmp w0, w1
-; GISEL-NEXT: cset w8, lt
-; GISEL-NEXT: cmp w2, w3
-; GISEL-NEXT: cset w9, gt
-; GISEL-NEXT: and w0, w8, w9
-; GISEL-NEXT: ret
+; CHECK-SD-LABEL: and_slt_sgt:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: cmp w0, w1
+; CHECK-SD-NEXT: ccmp w2, w3, #4, lt
+; CHECK-SD-NEXT: cset w0, gt
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: and_slt_sgt:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: cmp w0, w1
+; CHECK-GI-NEXT: cset w8, lt
+; CHECK-GI-NEXT: cmp w2, w3
+; CHECK-GI-NEXT: cset w9, gt
+; CHECK-GI-NEXT: and w0, w8, w9
+; CHECK-GI-NEXT: ret
entry:
%c0 = icmp slt i32 %s0, %s1
%c1 = icmp sgt i32 %s2, %s3
@@ -1659,21 +1659,21 @@ entry:
}
define i32 @and_slt_sge(i32 %s0, i32 %s1, i32 %s2, i32 %s3) {
-; SDISEL-LABEL: and_slt_sge:
-; SDISEL: // %bb.0: // %entry
-; SDISEL-NEXT: cmp w0, w1
-; SDISEL-NEXT: ccmp w2, w3, #8, lt
-; SDISEL-NEXT: cset w0, ge
-; SDISEL-NEXT: ret
-;
-; GISEL-LABEL: and_slt_sge:
-; GISEL: // %bb.0: // %entry
-; GISEL-NEXT: cmp w0, w1
-; GISEL-NEXT: cset w8, lt
-; GISEL-NEXT: cmp w2, w3
-; GISEL-NEXT: cset w9, ge
-; GISEL-NEXT: and w0, w8, w9
-; GISEL-NEXT: ret
+; CHECK-SD-LABEL: and_slt_sge:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: cmp w0, w1
+; CHECK-SD-NEXT: ccmp w2, w3, #8, lt
+; CHECK-SD-NEXT: cset w0, ge
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: and_slt_sge:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: cmp w0, w1
+; CHECK-GI-NEXT: cset w8, lt
+; CHECK-GI-NEXT: cmp w2, w3
+; CHECK-GI-NEXT: cset w9, ge
+; CHECK-GI-NEXT: and w0, w8, w9
+; CHECK-GI-NEXT: ret
entry:
%c0 = icmp slt i32 %s0, %s1
%c1 = icmp sge i32 %s2, %s3
@@ -1683,21 +1683,21 @@ entry:
}
define i32 @and_sle_eq(i32 %s0, i32 %s1, i32 %s2, i32 %s3) {
-; SDISEL-LABEL: and_sle_eq:
-; SDISEL: // %bb.0: // %entry
-; SDISEL-NEXT: cmp w0, w1
-; SDISEL-NEXT: ccmp w2, w3, #0, le
-; SDISEL-NEXT: cset w0, eq
-; SDISEL-NEXT: ret
-;
-; GISEL-LABEL: and_sle_eq:
-; GISEL: // %bb.0: // %entry
-; GISEL-NEXT: cmp w0, w1
-; GISEL-NEXT: cset w8, le
-; GISEL-NEXT: cmp w2, w3
-; GISEL-NEXT: cset w9, eq
-; GISEL-NEXT: and w0, w8, w9
-; GISEL-NEXT: ret
+; CHECK-SD-LABEL: and_sle_eq:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: cmp w0, w1
+; CHECK-SD-NEXT: ccmp w2, w3, #0, le
+; CHECK-SD-NEXT: cset w0, eq
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: and_sle_eq:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: cmp w0, w1
+; CHECK-GI-NEXT: cset w8, le
+; CHECK-GI-NEXT: cmp w2, w3
+; CHECK-GI-NEXT: cset w9, eq
+; CHECK-GI-NEXT: and w0, w8, w9
+; CHECK-GI-NEXT: ret
entry:
%c0 = icmp sle i32 %s0, %s1
%c1 = icmp eq i32 %s2, %s3
@@ -1707,21 +1707,21 @@ entry:
}
define i32 @and_sle_ne(i32 %s0, i32 %s1, i32 %s2, i32 %s3) {
-; SDISEL-LABEL: and_sle_ne:
-; SDISEL: // %bb.0: // %entry
-; SDISEL-NEXT: cmp w0, w1
-; SDISEL-NEXT: ccmp w2, w3, #4, le
-; SDISEL-NEXT: cset w0, ne
-; SDISEL-NEXT: ret
-;
-; GISEL-LABEL: and_sle_ne:
-; GISEL: // %bb.0: // %entry
-; GISEL-NEXT: cmp w0, w1
-; GISEL-NEXT: cset w8, le
-; GISEL-NEXT: cmp w2, w3
-; GISEL-NEXT: cset w9, ne
-; GISEL-NEXT: and w0, w8, w9
-; GISEL-NEXT: ret
+; CHECK-SD-LABEL: and_sle_ne:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: cmp w0, w1
+; CHECK-SD-NEXT: ccmp w2, w3, #4, le
+; CHECK-SD-NEXT: cset w0, ne
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: and_sle_ne:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: cmp w0, w1
+; CHECK-GI-NEXT: cset w8, le
+; CHECK-GI-NEXT: cmp w2, w3
+; CHECK-GI-NEXT: cset w9, ne
+; CHECK-GI-NEXT: and w0, w8, w9
+; CHECK-GI-NEXT: ret
entry:
%c0 = icmp sle i32 %s0, %s1
%c1 = icmp ne i32 %s2, %s3
@@ -1731,21 +1731,21 @@ entry:
}
define i32 @and_sle_ult(i32 %s0, i32 %s1, i32 %s2, i32 %s3) {
-; SDISEL-LABEL: and_sle_ult:
-; SDISEL: // %bb.0: // %entry
-; SDISEL-NEXT: cmp w0, w1
-; SDISEL-NEXT: ccmp w2, w3, #2, le
-; SDISEL-NEXT: cset w0, lo
-; SDISEL-NEXT: ret
-;
-; GISEL-LABEL: and_sle_ult:
-; GISEL: // %bb.0: // %entry
-; GISEL-NEXT: cmp w0, w1
-; GISEL-NEXT: cset w8, le
-; GISEL-NEXT: cmp w2, w3
-; GISEL-NEXT: cset w9, lo
-; GISEL-NEXT: and w0, w8, w9
-; GISEL-NEXT: ret
+; CHECK-SD-LABEL: and_sle_ult:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: cmp w0, w1
+; CHECK-SD-NEXT: ccmp w2, w3, #2, le
+; CHECK-SD-NEXT: cset w0, lo
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: and_sle_ult:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: cmp w0, w1
+; CHECK-GI-NEXT: cset w8, le
+; CHECK-GI-NEXT: cmp w2, w3
+; CHECK-GI-NEXT: cset w9, lo
+; CHECK-GI-NEXT: and w0, w8, w9
+; CHECK-GI-NEXT: ret
entry:
%c0 = icmp sle i32 %s0, %s1
%c1 = icmp ult i32 %s2, %s3
@@ -1755,21 +1755,21 @@ entry:
}
define i32 @and_sle_ule(i32 %s0, i32 %s1, i32 %s2, i32 %s3) {
-; SDISEL-LABEL: and_sle_ule:
-; SDISEL: // %bb.0: // %entry
-; SDISEL-NEXT: cmp w0, w1
-; SDISEL-NEXT: ccmp w2, w3, #2, le
-; SDISEL-NEXT: cset w0, ls
-; SDISEL-NEXT: ret
-;
-; GISEL-LABEL: and_sle_ule:
-; GISEL: // %bb.0: // %entry
-; GISEL-NEXT: cmp w0, w1
-; GISEL-NEXT: cset w8, le
-; GISEL-NEXT: cmp w2, w3
-; GISEL-NEXT: cset w9, ls
-; GISEL-NEXT: and w0, w8, w9
-; GISEL-NEXT: ret
+; CHECK-SD-LABEL: and_sle_ule:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: cmp w0, w1
+; CHECK-SD-NEXT: ccmp w2, w3, #2, le
+; CHECK-SD-NEXT: cset w0, ls
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: and_sle_ule:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: cmp w0, w1
+; CHECK-GI-NEXT: cset w8, le
+; CHECK-GI-NEXT: cmp w2, w3
+; CHECK-GI-NEXT: cset w9, ls
+; CHECK-GI-NEXT: and w0, w8, w9
+; CHECK-GI-NEXT: ret
entry:
%c0 = icmp sle i32 %s0, %s1
%c1 = icmp ule i32 %s2, %s3
@@ -1779,21 +1779,21 @@ entry:
}
define i32 @and_sle_ugt(i32 %s0, i32 %s1, i32 %s2, i32 %s3) {
-; SDISEL-LABEL: and_sle_ugt:
-; SDISEL: // %bb.0: // %entry
-; SDISEL-NEXT: cmp w0, w1
-; SDISEL-NEXT: ccmp w2, w3, #0, le
-; SDISEL-NEXT: cset w0, hi
-; SDISEL-NEXT: ret
-;
-; GISEL-LABEL: and_sle_ugt:
-; GISEL: // %bb.0: // %entry
-; GISEL-NEXT: cmp w0, w1
-; GISEL-NEXT: cset w8, le
-; GISEL-NEXT: cmp w2, w3
-; GISEL-NEXT: cset w9, hi
-; GISEL-NEXT: and w0, w8, w9
-; GISEL-NEXT: ret
+; CHECK-SD-LABEL: and_sle_ugt:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: cmp w0, w1
+; CHECK-SD-NEXT: ccmp w2, w3, #0, le
+; CHECK-SD-NEXT: cset w0, hi
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: and_sle_ugt:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: cmp w0, w1
+; CHECK-GI-NEXT: cset w8, le
+; CHECK-GI-NEXT: cmp w2, w3
+; CHECK-GI-NEXT: cset w9, hi
+; CHECK-GI-NEXT: and w0, w8, w9
+; CHECK-GI-NEXT: ret
entry:
%c0 = icmp sle i32 %s0, %s1
%c1 = icmp ugt i32 %s2, %s3
@@ -1803,21 +1803,21 @@ entry:
}
define i32 @and_sle_uge(i32 %s0, i32 %s1, i32 %s2, i32 %s3) {
-; SDISEL-LABEL: and_sle_uge:
-; SDISEL: // %bb.0: // %entry
-; SDISEL-NEXT: cmp w0, w1
-; SDISEL-NEXT: ccmp w2, w3, #0, le
-; SDISEL-NEXT: cset w0, hs
-; SDISEL-NEXT: ret
-;
-; GISEL-LABEL: and_sle_uge:
-; GISEL: // %bb.0: // %entry
-; GISEL-NEXT: cmp w0, w1
-; GISEL-NEXT: cset w8, le
-; GISEL-NEXT: cmp w2, w3
-; GISEL-NEXT: cset w9, hs
-; GISEL-NEXT: and w0, w8, w9
-; GISEL-NEXT: ret
+; CHECK-SD-LABEL: and_sle_uge:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: cmp w0, w1
+; CHECK-SD-NEXT: ccmp w2, w3, #0, le
+; CHECK-SD-NEXT: cset w0, hs
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: and_sle_uge:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: cmp w0, w1
+; CHECK-GI-NEXT: cset w8, le
+; CHECK-GI-NEXT: cmp w2, w3
+; CHECK-GI-NEXT: cset w9, hs
+; CHECK-GI-NEXT: and w0, w8, w9
+; CHECK-GI-NEXT: ret
entry:
%c0 = icmp sle i32 %s0, %s1
%c1 = icmp uge i32 %s2, %s3
@@ -1827,21 +1827,21 @@ entry:
}
define i32 @and_sle_slt(i32 %s0, i32 %s1, i32 %s2, i32 %s3) {
-; SDISEL-LABEL: and_sle_slt:
-; SDISEL: // %bb.0: // %entry
-; SDISEL-NEXT: cmp w0, w1
-; SDISEL-NEXT: ccmp w2, w3, #0, le
-; SDISEL-NEXT: cset w0, lt
-; SDISEL-NEXT: ret
-;
-; GISEL-LABEL: and_sle_slt:
-; GISEL: // %bb.0: // %entry
-; GISEL-NEXT: cmp w0, w1
-; GISEL-NEXT: cset w8, le
-; GISEL-NEXT: cmp w2, w3
-; GISEL-NEXT: cset w9, lt
-; GISEL-NEXT: and w0, w8, w9
-; GISEL-NEXT: ret
+; CHECK-SD-LABEL: and_sle_slt:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: cmp w0, w1
+; CHECK-SD-NEXT: ccmp w2, w3, #0, le
+; CHECK-SD-NEXT: cset w0, lt
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: and_sle_slt:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: cmp w0, w1
+; CHECK-GI-NEXT: cset w8, le
+; CHECK-GI-NEXT: cmp w2, w3
+; CHECK-GI-NEXT: cset w9, lt
+; CHECK-GI-NEXT: and w0, w8, w9
+; CHECK-GI-NEXT: ret
entry:
%c0 = icmp sle i32 %s0, %s1
%c1 = icmp slt i32 %s2, %s3
@@ -1851,21 +1851,21 @@ entry:
}
define i32 @and_sle_sle(i32 %s0, i32 %s1, i32 %s2, i32 %s3) {
-; SDISEL-LABEL: and_sle_sle:
-; SDISEL: // %bb.0: // %entry
-; SDISEL-NEXT: cmp w0, w1
-; SDISEL-NEXT: ccmp w2, w3, #0, le
-; SDISEL-NEXT: cset w0, le
-; SDISEL-NEXT: ret
-;
-; GISEL-LABEL: and_sle_sle:
-; GISEL: // %bb.0: // %entry
-; GISEL-NEXT: cmp w0, w1
-; GISEL-NEXT: cset w8, le
-; GISEL-NEXT: cmp w2, w3
-; GISEL-NEXT: cset w9, le
-; GISEL-NEXT: and w0, w8, w9
-; GISEL-NEXT: ret
+; CHECK-SD-LABEL: and_sle_sle:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: cmp w0, w1
+; CHECK-SD-NEXT: ccmp w2, w3, #0, le
+; CHECK-SD-NEXT: cset w0, le
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: and_sle_sle:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: cmp w0, w1
+; CHECK-GI-NEXT: cset w8, le
+; CHECK-GI-NEXT: cmp w2, w3
+; CHECK-GI-NEXT: cset w9, le
+; CHECK-GI-NEXT: and w0, w8, w9
+; CHECK-GI-NEXT: ret
entry:
%c0 = icmp sle i32 %s0, %s1
%c1 = icmp sle i32 %s2, %s3
@@ -1875,21 +1875,21 @@ entry:
}
define i32 @and_sle_sgt(i32 %s0, i32 %s1, i32 %s2, i32 %s3) {
-; SDISEL-LABEL: and_sle_sgt:
-; SDISEL: // %bb.0: // %entry
-; SDISEL-NEXT: cmp w0, w1
-; SDISEL-NEXT: ccmp w2, w3, #4, le
-; SDISEL-NEXT: cset w0, gt
-; SDISEL-NEXT: ret
-;
-; GISEL-LABEL: and_sle_sgt:
-; GISEL: // %bb.0: // %entry
-; GISEL-NEXT: cmp w0, w1
-; GISEL-NEXT: cset w8, le
-; GISEL-NEXT: cmp w2, w3
-; GISEL-NEXT: cset w9, gt
-; GISEL-NEXT: and w0, w8, w9
-; GISEL-NEXT: ret
+; CHECK-SD-LABEL: and_sle_sgt:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: cmp w0, w1
+; CHECK-SD-NEXT: ccmp w2, w3, #4, le
+; CHECK-SD-NEXT: cset w0, gt
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: and_sle_sgt:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: cmp w0, w1
+; CHECK-GI-NEXT: cset w8, le
+; CHECK-GI-NEXT: cmp w2, w3
+; CHECK-GI-NEXT: cset w9, gt
+; CHECK-GI-NEXT: and w0, w8, w9
+; CHECK-GI-NEXT: ret
entry:
%c0 = icmp sle i32 %s0, %s1
%c1 = icmp sgt i32 %s2, %s3
@@ -1899,21 +1899,21 @@ entry:
}
define i32 @and_sle_sge(i32 %s0, i32 %s1, i32 %s2, i32 %s3) {
-; SDISEL-LABEL: and_sle_sge:
-; SDISEL: // %bb.0: // %entry
-; SDISEL-NEXT: cmp w0, w1
-; SDISEL-NEXT: ccmp w2, w3, #8, le
-; SDISEL-NEXT: cset w0, ge
-; SDISEL-NEXT: ret
-;
-; GISEL-LABEL: and_sle_sge:
-; GISEL: // %bb.0: // %entry
-; GISEL-NEXT: cmp w0, w1
-; GISEL-NEXT: cset w8, le
-; GISEL-NEXT: cmp w2, w3
-; GISEL-NEXT: cset w9, ge
-; GISEL-NEXT: and w0, w8, w9
-; GISEL-NEXT: ret
+; CHECK-SD-LABEL: and_sle_sge:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: cmp w0, w1
+; CHECK-SD-NEXT: ccmp w2, w3, #8, le
+; CHECK-SD-NEXT: cset w0, ge
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: and_sle_sge:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: cmp w0, w1
+; CHECK-GI-NEXT: cset w8, le
+; CHECK-GI-NEXT: cmp w2, w3
+; CHECK-GI-NEXT: cset w9, ge
+; CHECK-GI-NEXT: and w0, w8, w9
+; CHECK-GI-NEXT: ret
entry:
%c0 = icmp sle i32 %s0, %s1
%c1 = icmp sge i32 %s2, %s3
@@ -1923,21 +1923,21 @@ entry:
}
define i32 @and_sgt_eq(i32 %s0, i32 %s1, i32 %s2, i32 %s3) {
-; SDISEL-LABEL: and_sgt_eq:
-; SDISEL: // %bb.0: // %entry
-; SDISEL-NEXT: cmp w0, w1
-; SDISEL-NEXT: ccmp w2, w3, #0, gt
-; SDISEL-NEXT: cset w0, eq
-; SDISEL-NEXT: ret
-;
-; GISEL-LABEL: and_sgt_eq:
-; GISEL: // %bb.0: // %entry
-; GISEL-NEXT: cmp w0, w1
-; GISEL-NEXT: cset w8, gt
-; GISEL-NEXT: cmp w2, w3
-; GISEL-NEXT: cset w9, eq
-; GISEL-NEXT: and w0, w8, w9
-; GISEL-NEXT: ret
+; CHECK-SD-LABEL: and_sgt_eq:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: cmp w0, w1
+; CHECK-SD-NEXT: ccmp w2, w3, #0, gt
+; CHECK-SD-NEXT: cset w0, eq
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: and_sgt_eq:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: cmp w0, w1
+; CHECK-GI-NEXT: cset w8, gt
+; CHECK-GI-NEXT: cmp w2, w3
+; CHECK-GI-NEXT: cset w9, eq
+; CHECK-GI-NEXT: and w0, w8, w9
+; CHECK-GI-NEXT: ret
entry:
%c0 = icmp sgt i32 %s0, %s1
%c1 = icmp eq i32 %s2, %s3
@@ -1947,21 +1947,21 @@ entry:
}
define i32 @and_sgt_ne(i32 %s0, i32 %s1, i32 %s2, i32 %s3) {
-; SDISEL-LABEL: and_sgt_ne:
-; SDISEL: // %bb.0: // %entry
-; SDISEL-NEXT: cmp w0, w1
-; SDISEL-NEXT: ccmp w2, w3, #4, gt
-; SDISEL-NEXT: cset w0, ne
-; SDISEL-NEXT: ret
-;
-; GISEL-LABEL: and_sgt_ne:
-; GISEL: // %bb.0: // %entry
-; GISEL-NEXT: cmp w0, w1
-; GISEL-NEXT: cset w8, gt
-; GISEL-NEXT: cmp w2, w3
-; GISEL-NEXT: cset w9, ne
-; GISEL-NEXT: and w0, w8, w9
-; GISEL-NEXT: ret
+; CHECK-SD-LABEL: and_sgt_ne:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: cmp w0, w1
+; CHECK-SD-NEXT: ccmp w2, w3, #4, gt
+; CHECK-SD-NEXT: cset w0, ne
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: and_sgt_ne:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: cmp w0, w1
+; CHECK-GI-NEXT: cset w8, gt
+; CHECK-GI-NEXT: cmp w2, w3
+; CHECK-GI-NEXT: cset w9, ne
+; CHECK-GI-NEXT: and w0, w8, w9
+; CHECK-GI-NEXT: ret
entry:
%c0 = icmp sgt i32 %s0, %s1
%c1 = icmp ne i32 %s2, %s3
@@ -1971,21 +1971,21 @@ entry:
}
define i32 @and_sgt_ult(i32 %s0, i32 %s1, i32 %s2, i32 %s3) {
-; SDISEL-LABEL: and_sgt_ult:
-; SDISEL: // %bb.0: // %entry
-; SDISEL-NEXT: cmp w0, w1
-; SDISEL-NEXT: ccmp w2, w3, #2, gt
-; SDISEL-NEXT: cset w0, lo
-; SDISEL-NEXT: ret
-;
-; GISEL-LABEL: and_sgt_ult:
-; GISEL: // %bb.0: // %entry
-; GISEL-NEXT: cmp w0, w1
-; GISEL-NEXT: cset w8, gt
-; GISEL-NEXT: cmp w2, w3
-; GISEL-NEXT: cset w9, lo
-; GISEL-NEXT: and w0, w8, w9
-; GISEL-NEXT: ret
+; CHECK-SD-LABEL: and_sgt_ult:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: cmp w0, w1
+; CHECK-SD-NEXT: ccmp w2, w3, #2, gt
+; CHECK-SD-NEXT: cset w0, lo
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: and_sgt_ult:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: cmp w0, w1
+; CHECK-GI-NEXT: cset w8, gt
+; CHECK-GI-NEXT: cmp w2, w3
+; CHECK-GI-NEXT: cset w9, lo
+; CHECK-GI-NEXT: and w0, w8, w9
+; CHECK-GI-NEXT: ret
entry:
%c0 = icmp sgt i32 %s0, %s1
%c1 = icmp ult i32 %s2, %s3
@@ -1995,21 +1995,21 @@ entry:
}
define i32 @and_sgt_ule(i32 %s0, i32 %s1, i32 %s2, i32 %s3) {
-; SDISEL-LABEL: and_sgt_ule:
-; SDISEL: // %bb.0: // %entry
-; SDISEL-NEXT: cmp w0, w1
-; SDISEL-NEXT: ccmp w2, w3, #2, gt
-; SDISEL-NEXT: cset w0, ls
-; SDISEL-NEXT: ret
-;
-; GISEL-LABEL: and_sgt_ule:
-; GISEL: // %bb.0: // %entry
-; GISEL-NEXT: cmp w0, w1
-; GISEL-NEXT: cset w8, gt
-; GISEL-NEXT: cmp w2, w3
-; GISEL-NEXT: cset w9, ls
-; GISEL-NEXT: and w0, w8, w9
-; GISEL-NEXT: ret
+; CHECK-SD-LABEL: and_sgt_ule:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: cmp w0, w1
+; CHECK-SD-NEXT: ccmp w2, w3, #2, gt
+; CHECK-SD-NEXT: cset w0, ls
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: and_sgt_ule:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: cmp w0, w1
+; CHECK-GI-NEXT: cset w8, gt
+; CHECK-GI-NEXT: cmp w2, w3
+; CHECK-GI-NEXT: cset w9, ls
+; CHECK-GI-NEXT: and w0, w8, w9
+; CHECK-GI-NEXT: ret
entry:
%c0 = icmp sgt i32 %s0, %s1
%c1 = icmp ule i32 %s2, %s3
@@ -2019,21 +2019,21 @@ entry:
}
define i32 @and_sgt_ugt(i32 %s0, i32 %s1, i32 %s2, i32 %s3) {
-; SDISEL-LABEL: and_sgt_ugt:
-; SDISEL: // %bb.0: // %entry
-; SDISEL-NEXT: cmp w0, w1
-; SDISEL-NEXT: ccmp w2, w3, #0, gt
-; SDISEL-NEXT: cset w0, hi
-; SDISEL-NEXT: ret
-;
-; GISEL-LABEL: and_sgt_ugt:
-; GISEL: // %bb.0: // %entry
-; GISEL-NEXT: cmp w0, w1
-; GISEL-NEXT: cset w8, gt
-; GISEL-NEXT: cmp w2, w3
-; GISEL-NEXT: cset w9, hi
-; GISEL-NEXT: and w0, w8, w9
-; GISEL-NEXT: ret
+; CHECK-SD-LABEL: and_sgt_ugt:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: cmp w0, w1
+; CHECK-SD-NEXT: ccmp w2, w3, #0, gt
+; CHECK-SD-NEXT: cset w0, hi
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: and_sgt_ugt:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: cmp w0, w1
+; CHECK-GI-NEXT: cset w8, gt
+; CHECK-GI-NEXT: cmp w2, w3
+; CHECK-GI-NEXT: cset w9, hi
+; CHECK-GI-NEXT: and w0, w8, w9
+; CHECK-GI-NEXT: ret
entry:
%c0 = icmp sgt i32 %s0, %s1
%c1 = icmp ugt i32 %s2, %s3
@@ -2043,21 +2043,21 @@ entry:
}
define i32 @and_sgt_uge(i32 %s0, i32 %s1, i32 %s2, i32 %s3) {
-; SDISEL-LABEL: and_sgt_uge:
-; SDISEL: // %bb.0: // %entry
-; SDISEL-NEXT: cmp w0, w1
-; SDISEL-NEXT: ccmp w2, w3, #0, gt
-; SDISEL-NEXT: cset w0, hs
-; SDISEL-NEXT: ret
-;
-; GISEL-LABEL: and_sgt_uge:
-; GISEL: // %bb.0: // %entry
-; GISEL-NEXT: cmp w0, w1
-; GISEL-NEXT: cset w8, gt
-; GISEL-NEXT: cmp w2, w3
-; GISEL-NEXT: cset w9, hs
-; GISEL-NEXT: and w0, w8, w9
-; GISEL-NEXT: ret
+; CHECK-SD-LABEL: and_sgt_uge:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: cmp w0, w1
+; CHECK-SD-NEXT: ccmp w2, w3, #0, gt
+; CHECK-SD-NEXT: cset w0, hs
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: and_sgt_uge:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: cmp w0, w1
+; CHECK-GI-NEXT: cset w8, gt
+; CHECK-GI-NEXT: cmp w2, w3
+; CHECK-GI-NEXT: cset w9, hs
+; CHECK-GI-NEXT: and w0, w8, w9
+; CHECK-GI-NEXT: ret
entry:
%c0 = icmp sgt i32 %s0, %s1
%c1 = icmp uge i32 %s2, %s3
@@ -2067,21 +2067,21 @@ entry:
}
define i32 @and_sgt_slt(i32 %s0, i32 %s1, i32 %s2, i32 %s3) {
-; SDISEL-LABEL: and_sgt_slt:
-; SDISEL: // %bb.0: // %entry
-; SDISEL-NEXT: cmp w0, w1
-; SDISEL-NEXT: ccmp w2, w3, #0, gt
-; SDISEL-NEXT: cset w0, lt
-; SDISEL-NEXT: ret
-;
-; GISEL-LABEL: and_sgt_slt:
-; GISEL: // %bb.0: // %entry
-; GISEL-NEXT: cmp w0, w1
-; GISEL-NEXT: cset w8, gt
-; GISEL-NEXT: cmp w2, w3
-; GISEL-NEXT: cset w9, lt
-; GISEL-NEXT: and w0, w8, w9
-; GISEL-NEXT: ret
+; CHECK-SD-LABEL: and_sgt_slt:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: cmp w0, w1
+; CHECK-SD-NEXT: ccmp w2, w3, #0, gt
+; CHECK-SD-NEXT: cset w0, lt
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: and_sgt_slt:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: cmp w0, w1
+; CHECK-GI-NEXT: cset w8, gt
+; CHECK-GI-NEXT: cmp w2, w3
+; CHECK-GI-NEXT: cset w9, lt
+; CHECK-GI-NEXT: and w0, w8, w9
+; CHECK-GI-NEXT: ret
entry:
%c0 = icmp sgt i32 %s0, %s1
%c1 = icmp slt i32 %s2, %s3
@@ -2091,21 +2091,21 @@ entry:
}
define i32 @and_sgt_sle(i32 %s0, i32 %s1, i32 %s2, i32 %s3) {
-; SDISEL-LABEL: and_sgt_sle:
-; SDISEL: // %bb.0: // %entry
-; SDISEL-NEXT: cmp w0, w1
-; SDISEL-NEXT: ccmp w2, w3, #0, gt
-; SDISEL-NEXT: cset w0, le
-; SDISEL-NEXT: ret
-;
-; GISEL-LABEL: and_sgt_sle:
-; GISEL: // %bb.0: // %entry
-; GISEL-NEXT: cmp w0, w1
-; GISEL-NEXT: cset w8, gt
-; GISEL-NEXT: cmp w2, w3
-; GISEL-NEXT: cset w9, le
-; GISEL-NEXT: and w0, w8, w9
-; GISEL-NEXT: ret
+; CHECK-SD-LABEL: and_sgt_sle:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: cmp w0, w1
+; CHECK-SD-NEXT: ccmp w2, w3, #0, gt
+; CHECK-SD-NEXT: cset w0, le
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: and_sgt_sle:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: cmp w0, w1
+; CHECK-GI-NEXT: cset w8, gt
+; CHECK-GI-NEXT: cmp w2, w3
+; CHECK-GI-NEXT: cset w9, le
+; CHECK-GI-NEXT: and w0, w8, w9
+; CHECK-GI-NEXT: ret
entry:
%c0 = icmp sgt i32 %s0, %s1
%c1 = icmp sle i32 %s2, %s3
@@ -2115,21 +2115,21 @@ entry:
}
define i32 @and_sgt_sgt(i32 %s0, i32 %s1, i32 %s2, i32 %s3) {
-; SDISEL-LABEL: and_sgt_sgt:
-; SDISEL: // %bb.0: // %entry
-; SDISEL-NEXT: cmp w0, w1
-; SDISEL-NEXT: ccmp w2, w3, #4, gt
-; SDISEL-NEXT: cset w0, gt
-; SDISEL-NEXT: ret
-;
-; GISEL-LABEL: and_sgt_sgt:
-; GISEL: // %bb.0: // %entry
-; GISEL-NEXT: cmp w0, w1
-; GISEL-NEXT: cset w8, gt
-; GISEL-NEXT: cmp w2, w3
-; GISEL-NEXT: cset w9, gt
-; GISEL-NEXT: and w0, w8, w9
-; GISEL-NEXT: ret
+; CHECK-SD-LABEL: and_sgt_sgt:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: cmp w0, w1
+; CHECK-SD-NEXT: ccmp w2, w3, #4, gt
+; CHECK-SD-NEXT: cset w0, gt
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: and_sgt_sgt:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: cmp w0, w1
+; CHECK-GI-NEXT: cset w8, gt
+; CHECK-GI-NEXT: cmp w2, w3
+; CHECK-GI-NEXT: cset w9, gt
+; CHECK-GI-NEXT: and w0, w8, w9
+; CHECK-GI-NEXT: ret
entry:
%c0 = icmp sgt i32 %s0, %s1
%c1 = icmp sgt i32 %s2, %s3
@@ -2139,21 +2139,21 @@ entry:
}
define i32 @and_sgt_sge(i32 %s0, i32 %s1, i32 %s2, i32 %s3) {
-; SDISEL-LABEL: and_sgt_sge:
-; SDISEL: // %bb.0: // %entry
-; SDISEL-NEXT: cmp w0, w1
-; SDISEL-NEXT: ccmp w2, w3, #8, gt
-; SDISEL-NEXT: cset w0, ge
-; SDISEL-NEXT: ret
-;
-; GISEL-LABEL: and_sgt_sge:
-; GISEL: // %bb.0: // %entry
-; GISEL-NEXT: cmp w0, w1
-; GISEL-NEXT: cset w8, gt
-; GISEL-NEXT: cmp w2, w3
-; GISEL-NEXT: cset w9, ge
-; GISEL-NEXT: and w0, w8, w9
-; GISEL-NEXT: ret
+; CHECK-SD-LABEL: and_sgt_sge:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: cmp w0, w1
+; CHECK-SD-NEXT: ccmp w2, w3, #8, gt
+; CHECK-SD-NEXT: cset w0, ge
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: and_sgt_sge:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: cmp w0, w1
+; CHECK-GI-NEXT: cset w8, gt
+; CHECK-GI-NEXT: cmp w2, w3
+; CHECK-GI-NEXT: cset w9, ge
+; CHECK-GI-NEXT: and w0, w8, w9
+; CHECK-GI-NEXT: ret
entry:
%c0 = icmp sgt i32 %s0, %s1
%c1 = icmp sge i32 %s2, %s3
@@ -2163,21 +2163,21 @@ entry:
}
define i32 @and_sge_eq(i32 %s0, i32 %s1, i32 %s2, i32 %s3) {
-; SDISEL-LABEL: and_sge_eq:
-; SDISEL: // %bb.0: // %entry
-; SDISEL-NEXT: cmp w0, w1
-; SDISEL-NEXT: ccmp w2, w3, #0, ge
-; SDISEL-NEXT: cset w0, eq
-; SDISEL-NEXT: ret
-;
-; GISEL-LABEL: and_sge_eq:
-; GISEL: // %bb.0: // %entry
-; GISEL-NEXT: cmp w0, w1
-; GISEL-NEXT: cset w8, ge
-; GISEL-NEXT: cmp w2, w3
-; GISEL-NEXT: cset w9, eq
-; GISEL-NEXT: and w0, w8, w9
-; GISEL-NEXT: ret
+; CHECK-SD-LABEL: and_sge_eq:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: cmp w0, w1
+; CHECK-SD-NEXT: ccmp w2, w3, #0, ge
+; CHECK-SD-NEXT: cset w0, eq
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: and_sge_eq:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: cmp w0, w1
+; CHECK-GI-NEXT: cset w8, ge
+; CHECK-GI-NEXT: cmp w2, w3
+; CHECK-GI-NEXT: cset w9, eq
+; CHECK-GI-NEXT: and w0, w8, w9
+; CHECK-GI-NEXT: ret
entry:
%c0 = icmp sge i32 %s0, %s1
%c1 = icmp eq i32 %s2, %s3
@@ -2187,21 +2187,21 @@ entry:
}
define i32 @and_sge_ne(i32 %s0, i32 %s1, i32 %s2, i32 %s3) {
-; SDISEL-LABEL: and_sge_ne:
-; SDISEL: // %bb.0: // %entry
-; SDISEL-NEXT: cmp w0, w1
-; SDISEL-NEXT: ccmp w2, w3, #4, ge
-; SDISEL-NEXT: cset w0, ne
-; SDISEL-NEXT: ret
-;
-; GISEL-LABEL: and_sge_ne:
-; GISEL: // %bb.0: // %entry
-; GISEL-NEXT: cmp w0, w1
-; GISEL-NEXT: cset w8, ge
-; GISEL-NEXT: cmp w2, w3
-; GISEL-NEXT: cset w9, ne
-; GISEL-NEXT: and w0, w8, w9
-; GISEL-NEXT: ret
+; CHECK-SD-LABEL: and_sge_ne:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: cmp w0, w1
+; CHECK-SD-NEXT: ccmp w2, w3, #4, ge
+; CHECK-SD-NEXT: cset w0, ne
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: and_sge_ne:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: cmp w0, w1
+; CHECK-GI-NEXT: cset w8, ge
+; CHECK-GI-NEXT: cmp w2, w3
+; CHECK-GI-NEXT: cset w9, ne
+; CHECK-GI-NEXT: and w0, w8, w9
+; CHECK-GI-NEXT: ret
entry:
%c0 = icmp sge i32 %s0, %s1
%c1 = icmp ne i32 %s2, %s3
@@ -2211,21 +2211,21 @@ entry:
}
define i32 @and_sge_ult(i32 %s0, i32 %s1, i32 %s2, i32 %s3) {
-; SDISEL-LABEL: and_sge_ult:
-; SDISEL: // %bb.0: // %entry
-; SDISEL-NEXT: cmp w0, w1
-; SDISEL-NEXT: ccmp w2, w3, #2, ge
-; SDISEL-NEXT: cset w0, lo
-; SDISEL-NEXT: ret
-;
-; GISEL-LABEL: and_sge_ult:
-; GISEL: // %bb.0: // %entry
-; GISEL-NEXT: cmp w0, w1
-; GISEL-NEXT: cset w8, ge
-; GISEL-NEXT: cmp w2, w3
-; GISEL-NEXT: cset w9, lo
-; GISEL-NEXT: and w0, w8, w9
-; GISEL-NEXT: ret
+; CHECK-SD-LABEL: and_sge_ult:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: cmp w0, w1
+; CHECK-SD-NEXT: ccmp w2, w3, #2, ge
+; CHECK-SD-NEXT: cset w0, lo
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: and_sge_ult:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: cmp w0, w1
+; CHECK-GI-NEXT: cset w8, ge
+; CHECK-GI-NEXT: cmp w2, w3
+; CHECK-GI-NEXT: cset w9, lo
+; CHECK-GI-NEXT: and w0, w8, w9
+; CHECK-GI-NEXT: ret
entry:
%c0 = icmp sge i32 %s0, %s1
%c1 = icmp ult i32 %s2, %s3
@@ -2235,21 +2235,21 @@ entry:
}
define i32 @and_sge_ule(i32 %s0, i32 %s1, i32 %s2, i32 %s3) {
-; SDISEL-LABEL: and_sge_ule:
-; SDISEL: // %bb.0: // %entry
-; SDISEL-NEXT: cmp w0, w1
-; SDISEL-NEXT: ccmp w2, w3, #2, ge
-; SDISEL-NEXT: cset w0, ls
-; SDISEL-NEXT: ret
-;
-; GISEL-LABEL: and_sge_ule:
-; GISEL: // %bb.0: // %entry
-; GISEL-NEXT: cmp w0, w1
-; GISEL-NEXT: cset w8, ge
-; GISEL-NEXT: cmp w2, w3
-; GISEL-NEXT: cset w9, ls
-; GISEL-NEXT: and w0, w8, w9
-; GISEL-NEXT: ret
+; CHECK-SD-LABEL: and_sge_ule:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: cmp w0, w1
+; CHECK-SD-NEXT: ccmp w2, w3, #2, ge
+; CHECK-SD-NEXT: cset w0, ls
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: and_sge_ule:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: cmp w0, w1
+; CHECK-GI-NEXT: cset w8, ge
+; CHECK-GI-NEXT: cmp w2, w3
+; CHECK-GI-NEXT: cset w9, ls
+; CHECK-GI-NEXT: and w0, w8, w9
+; CHECK-GI-NEXT: ret
entry:
%c0 = icmp sge i32 %s0, %s1
%c1 = icmp ule i32 %s2, %s3
@@ -2259,21 +2259,21 @@ entry:
}
define i32 @and_sge_ugt(i32 %s0, i32 %s1, i32 %s2, i32 %s3) {
-; SDISEL-LABEL: and_sge_ugt:
-; SDISEL: // %bb.0: // %entry
-; SDISEL-NEXT: cmp w0, w1
-; SDISEL-NEXT: ccmp w2, w3, #0, ge
-; SDISEL-NEXT: cset w0, hi
-; SDISEL-NEXT: ret
-;
-; GISEL-LABEL: and_sge_ugt:
-; GISEL: // %bb.0: // %entry
-; GISEL-NEXT: cmp w0, w1
-; GISEL-NEXT: cset w8, ge
-; GISEL-NEXT: cmp w2, w3
-; GISEL-NEXT: cset w9, hi
-; GISEL-NEXT: and w0, w8, w9
-; GISEL-NEXT: ret
+; CHECK-SD-LABEL: and_sge_ugt:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: cmp w0, w1
+; CHECK-SD-NEXT: ccmp w2, w3, #0, ge
+; CHECK-SD-NEXT: cset w0, hi
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: and_sge_ugt:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: cmp w0, w1
+; CHECK-GI-NEXT: cset w8, ge
+; CHECK-GI-NEXT: cmp w2, w3
+; CHECK-GI-NEXT: cset w9, hi
+; CHECK-GI-NEXT: and w0, w8, w9
+; CHECK-GI-NEXT: ret
entry:
%c0 = icmp sge i32 %s0, %s1
%c1 = icmp ugt i32 %s2, %s3
@@ -2283,21 +2283,21 @@ entry:
}
define i32 @and_sge_uge(i32 %s0, i32 %s1, i32 %s2, i32 %s3) {
-; SDISEL-LABEL: and_sge_uge:
-; SDISEL: // %bb.0: // %entry
-; SDISEL-NEXT: cmp w0, w1
-; SDISEL-NEXT: ccmp w2, w3, #0, ge
-; SDISEL-NEXT: cset w0, hs
-; SDISEL-NEXT: ret
-;
-; GISEL-LABEL: and_sge_uge:
-; GISEL: // %bb.0: // %entry
-; GISEL-NEXT: cmp w0, w1
-; GISEL-NEXT: cset w8, ge
-; GISEL-NEXT: cmp w2, w3
-; GISEL-NEXT: cset w9, hs
-; GISEL-NEXT: and w0, w8, w9
-; GISEL-NEXT: ret
+; CHECK-SD-LABEL: and_sge_uge:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: cmp w0, w1
+; CHECK-SD-NEXT: ccmp w2, w3, #0, ge
+; CHECK-SD-NEXT: cset w0, hs
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: and_sge_uge:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: cmp w0, w1
+; CHECK-GI-NEXT: cset w8, ge
+; CHECK-GI-NEXT: cmp w2, w3
+; CHECK-GI-NEXT: cset w9, hs
+; CHECK-GI-NEXT: and w0, w8, w9
+; CHECK-GI-NEXT: ret
entry:
%c0 = icmp sge i32 %s0, %s1
%c1 = icmp uge i32 %s2, %s3
@@ -2307,21 +2307,21 @@ entry:
}
define i32 @and_sge_slt(i32 %s0, i32 %s1, i32 %s2, i32 %s3) {
-; SDISEL-LABEL: and_sge_slt:
-; SDISEL: // %bb.0: // %entry
-; SDISEL-NEXT: cmp w0, w1
-; SDISEL-NEXT: ccmp w2, w3, #0, ge
-; SDISEL-NEXT: cset w0, lt
-; SDISEL-NEXT: ret
-;
-; GISEL-LABEL: and_sge_slt:
-; GISEL: // %bb.0: // %entry
-; GISEL-NEXT: cmp w0, w1
-; GISEL-NEXT: cset w8, ge
-; GISEL-NEXT: cmp w2, w3
-; GISEL-NEXT: cset w9, lt
-; GISEL-NEXT: and w0, w8, w9
-; GISEL-NEXT: ret
+; CHECK-SD-LABEL: and_sge_slt:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: cmp w0, w1
+; CHECK-SD-NEXT: ccmp w2, w3, #0, ge
+; CHECK-SD-NEXT: cset w0, lt
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: and_sge_slt:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: cmp w0, w1
+; CHECK-GI-NEXT: cset w8, ge
+; CHECK-GI-NEXT: cmp w2, w3
+; CHECK-GI-NEXT: cset w9, lt
+; CHECK-GI-NEXT: and w0, w8, w9
+; CHECK-GI-NEXT: ret
entry:
%c0 = icmp sge i32 %s0, %s1
%c1 = icmp slt i32 %s2, %s3
@@ -2331,21 +2331,21 @@ entry:
}
define i32 @and_sge_sle(i32 %s0, i32 %s1, i32 %s2, i32 %s3) {
-; SDISEL-LABEL: and_sge_sle:
-; SDISEL: // %bb.0: // %entry
-; SDISEL-NEXT: cmp w0, w1
-; SDISEL-NEXT: ccmp w2, w3, #0, ge
-; SDISEL-NEXT: cset w0, le
-; SDISEL-NEXT: ret
-;
-; GISEL-LABEL: and_sge_sle:
-; GISEL: // %bb.0: // %entry
-; GISEL-NEXT: cmp w0, w1
-; GISEL-NEXT: cset w8, ge
-; GISEL-NEXT: cmp w2, w3
-; GISEL-NEXT: cset w9, le
-; GISEL-NEXT: and w0, w8, w9
-; GISEL-NEXT: ret
+; CHECK-SD-LABEL: and_sge_sle:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: cmp w0, w1
+; CHECK-SD-NEXT: ccmp w2, w3, #0, ge
+; CHECK-SD-NEXT: cset w0, le
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: and_sge_sle:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: cmp w0, w1
+; CHECK-GI-NEXT: cset w8, ge
+; CHECK-GI-NEXT: cmp w2, w3
+; CHECK-GI-NEXT: cset w9, le
+; CHECK-GI-NEXT: and w0, w8, w9
+; CHECK-GI-NEXT: ret
entry:
%c0 = icmp sge i32 %s0, %s1
%c1 = icmp sle i32 %s2, %s3
@@ -2355,21 +2355,21 @@ entry:
}
define i32 @and_sge_sgt(i32 %s0, i32 %s1, i32 %s2, i32 %s3) {
-; SDISEL-LABEL: and_sge_sgt:
-; SDISEL: // %bb.0: // %entry
-; SDISEL-NEXT: cmp w0, w1
-; SDISEL-NEXT: ccmp w2, w3, #4, ge
-; SDISEL-NEXT: cset w0, gt
-; SDISEL-NEXT: ret
-;
-; GISEL-LABEL: and_sge_sgt:
-; GISEL: // %bb.0: // %entry
-; GISEL-NEXT: cmp w0, w1
-; GISEL-NEXT: cset w8, ge
-; GISEL-NEXT: cmp w2, w3
-; GISEL-NEXT: cset w9, gt
-; GISEL-NEXT: and w0, w8, w9
-; GISEL-NEXT: ret
+; CHECK-SD-LABEL: and_sge_sgt:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: cmp w0, w1
+; CHECK-SD-NEXT: ccmp w2, w3, #4, ge
+; CHECK-SD-NEXT: cset w0, gt
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: and_sge_sgt:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: cmp w0, w1
+; CHECK-GI-NEXT: cset w8, ge
+; CHECK-GI-NEXT: cmp w2, w3
+; CHECK-GI-NEXT: cset w9, gt
+; CHECK-GI-NEXT: and w0, w8, w9
+; CHECK-GI-NEXT: ret
entry:
%c0 = icmp sge i32 %s0, %s1
%c1 = icmp sgt i32 %s2, %s3
@@ -2379,21 +2379,21 @@ entry:
}
define i32 @and_sge_sge(i32 %s0, i32 %s1, i32 %s2, i32 %s3) {
-; SDISEL-LABEL: and_sge_sge:
-; SDISEL: // %bb.0: // %entry
-; SDISEL-NEXT: cmp w0, w1
-; SDISEL-NEXT: ccmp w2, w3, #8, ge
-; SDISEL-NEXT: cset w0, ge
-; SDISEL-NEXT: ret
-;
-; GISEL-LABEL: and_sge_sge:
-; GISEL: // %bb.0: // %entry
-; GISEL-NEXT: cmp w0, w1
-; GISEL-NEXT: cset w8, ge
-; GISEL-NEXT: cmp w2, w3
-; GISEL-NEXT: cset w9, ge
-; GISEL-NEXT: and w0, w8, w9
-; GISEL-NEXT: ret
+; CHECK-SD-LABEL: and_sge_sge:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: cmp w0, w1
+; CHECK-SD-NEXT: ccmp w2, w3, #8, ge
+; CHECK-SD-NEXT: cset w0, ge
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: and_sge_sge:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: cmp w0, w1
+; CHECK-GI-NEXT: cset w8, ge
+; CHECK-GI-NEXT: cmp w2, w3
+; CHECK-GI-NEXT: cset w9, ge
+; CHECK-GI-NEXT: and w0, w8, w9
+; CHECK-GI-NEXT: ret
entry:
%c0 = icmp sge i32 %s0, %s1
%c1 = icmp sge i32 %s2, %s3
@@ -2403,19 +2403,19 @@ entry:
}
define i32 @cmp_to_ands1(i32 %num) {
-; SDISEL-LABEL: cmp_to_ands1:
-; SDISEL: // %bb.0:
-; SDISEL-NEXT: and w8, w0, #0xff
-; SDISEL-NEXT: tst w0, #0xfe
-; SDISEL-NEXT: csel w0, w8, wzr, ne
-; SDISEL-NEXT: ret
-;
-; GISEL-LABEL: cmp_to_ands1:
-; GISEL: // %bb.0:
-; GISEL-NEXT: and w8, w0, #0xff
-; GISEL-NEXT: cmp w8, #1
-; GISEL-NEXT: csel w0, w8, wzr, hi
-; GISEL-NEXT: ret
+; CHECK-SD-LABEL: cmp_to_ands1:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: and w8, w0, #0xff
+; CHECK-SD-NEXT: tst w0, #0xfe
+; CHECK-SD-NEXT: csel w0, w8, wzr, ne
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: cmp_to_ands1:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: and w8, w0, #0xff
+; CHECK-GI-NEXT: cmp w8, #1
+; CHECK-GI-NEXT: csel w0, w8, wzr, hi
+; CHECK-GI-NEXT: ret
%and = and i32 %num, 255
%cmp = icmp ugt i32 %and, 1
%r = select i1 %cmp, i32 %and, i32 0
@@ -2423,19 +2423,19 @@ define i32 @cmp_to_ands1(i32 %num) {
}
define i32 @cmp_to_ands2(i32 %num) {
-; SDISEL-LABEL: cmp_to_ands2:
-; SDISEL: // %bb.0:
-; SDISEL-NEXT: and w8, w0, #0xfe
-; SDISEL-NEXT: tst w0, #0xc0
-; SDISEL-NEXT: csel w0, w8, wzr, ne
-; SDISEL-NEXT: ret
-;
-; GISEL-LABEL: cmp_to_ands2:
-; GISEL: // %bb.0:
-; GISEL-NEXT: and w8, w0, #0xfe
-; GISEL-NEXT: cmp w8, #63
-; GISEL-NEXT: csel w0, w8, wzr, hi
-; GISEL-NEXT: ret
+; CHECK-SD-LABEL: cmp_to_ands2:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: and w8, w0, #0xfe
+; CHECK-SD-NEXT: tst w0, #0xc0
+; CHECK-SD-NEXT: csel w0, w8, wzr, ne
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: cmp_to_ands2:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: and w8, w0, #0xfe
+; CHECK-GI-NEXT: cmp w8, #63
+; CHECK-GI-NEXT: csel w0, w8, wzr, hi
+; CHECK-GI-NEXT: ret
%and = and i32 %num, 254
%cmp = icmp ugt i32 %and, 63
%r = select i1 %cmp, i32 %and, i32 0
@@ -2443,19 +2443,19 @@ define i32 @cmp_to_ands2(i32 %num) {
}
define i32 @cmp_to_ands3(i32 %num, i32 %a) {
-; SDISEL-LABEL: cmp_to_ands3:
-; SDISEL: // %bb.0:
-; SDISEL-NEXT: tst w0, #0x10
-; SDISEL-NEXT: csel w0, w1, wzr, ne
-; SDISEL-NEXT: ret
-;
-; GISEL-LABEL: cmp_to_ands3:
-; GISEL: // %bb.0:
-; GISEL-NEXT: mov w8, #23 // =0x17
-; GISEL-NEXT: and w8, w0, w8
-; GISEL-NEXT: cmp w8, #7
-; GISEL-NEXT: csel w0, w1, wzr, hi
-; GISEL-NEXT: ret
+; CHECK-SD-LABEL: cmp_to_ands3:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: tst w0, #0x10
+; CHECK-SD-NEXT: csel w0, w1, wzr, ne
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: cmp_to_ands3:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: mov w8, #23 // =0x17
+; CHECK-GI-NEXT: and w8, w0, w8
+; CHECK-GI-NEXT: cmp w8, #7
+; CHECK-GI-NEXT: csel w0, w1, wzr, hi
+; CHECK-GI-NEXT: ret
%and = and i32 %num, 23
%cmp = icmp ugt i32 %and, 7
%r = select i1 %cmp, i32 %a, i32 0
@@ -2463,19 +2463,19 @@ define i32 @cmp_to_ands3(i32 %num, i32 %a) {
}
define i32 @cmp_to_ands4(i32 %num, i32 %a) {
-; SDISEL-LABEL: cmp_to_ands4:
-; SDISEL: // %bb.0:
-; SDISEL-NEXT: and w8, w0, #0x30
-; SDISEL-NEXT: tst w0, #0x20
-; SDISEL-NEXT: csel w0, w8, w1, eq
-; SDISEL-NEXT: ret
-;
-; GISEL-LABEL: cmp_to_ands4:
-; GISEL: // %bb.0:
-; GISEL-NEXT: and w8, w0, #0x30
-; GISEL-NEXT: cmp w8, #31
-; GISEL-NEXT: csel w0, w8, w1, ls
-; GISEL-NEXT: ret
+; CHECK-SD-LABEL: cmp_to_ands4:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: and w8, w0, #0x30
+; CHECK-SD-NEXT: tst w0, #0x20
+; CHECK-SD-NEXT: csel w0, w8, w1, eq
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: cmp_to_ands4:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: and w8, w0, #0x30
+; CHECK-GI-NEXT: cmp w8, #31
+; CHECK-GI-NEXT: csel w0, w8, w1, ls
+; CHECK-GI-NEXT: ret
%and = and i32 %num, 48
%cmp = icmp ule i32 %and, 31
%r = select i1 %cmp, i32 %and, i32 %a
@@ -2483,19 +2483,19 @@ define i32 @cmp_to_ands4(i32 %num, i32 %a) {
}
define i32 @cmp_to_ands5(i32 %num, i32 %a) {
-; SDISEL-LABEL: cmp_to_ands5:
-; SDISEL: // %bb.0:
-; SDISEL-NEXT: and w8, w0, #0xf8
-; SDISEL-NEXT: tst w0, #0xc0
-; SDISEL-NEXT: csel w0, w8, w1, eq
-; SDISEL-NEXT: ret
-;
-; GISEL-LABEL: cmp_to_ands5:
-; GISEL: // %bb.0:
-; GISEL-NEXT: and w8, w0, #0xf8
-; GISEL-NEXT: cmp w8, #64
-; GISEL-NEXT: csel w0, w8, w1, lo
-; GISEL-NEXT: ret
+; CHECK-SD-LABEL: cmp_to_ands5:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: and w8, w0, #0xf8
+; CHECK-SD-NEXT: tst w0, #0xc0
+; CHECK-SD-NEXT: csel w0, w8, w1, eq
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: cmp_to_ands5:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: and w8, w0, #0xf8
+; CHECK-GI-NEXT: cmp w8, #64
+; CHECK-GI-NEXT: csel w0, w8, w1, lo
+; CHECK-GI-NEXT: ret
%and = and i32 %num, 248
%cmp = icmp ult i32 %and, 64
%r = select i1 %cmp, i32 %and, i32 %a
@@ -2503,19 +2503,19 @@ define i32 @cmp_to_ands5(i32 %num, i32 %a) {
}
define i32 @cmp_to_ands6(i32 %num) {
-; SDISEL-LABEL: cmp_to_ands6:
-; SDISEL: // %bb.0:
-; SDISEL-NEXT: and w8, w0, #0xfe
-; SDISEL-NEXT: tst w0, #0xf0
-; SDISEL-NEXT: csel w0, w8, wzr, ne
-; SDISEL-NEXT: ret
-;
-; GISEL-LABEL: cmp_to_ands6:
-; GISEL: // %bb.0:
-; GISEL-NEXT: and w8, w0, #0xfe
-; GISEL-NEXT: cmp w8, #16
-; GISEL-NEXT: csel w0, w8, wzr, hs
-; GISEL-NEXT: ret
+; CHECK-SD-LABEL: cmp_to_ands6:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: and w8, w0, #0xfe
+; CHECK-SD-NEXT: tst w0, #0xf0
+; CHECK-SD-NEXT: csel w0, w8, wzr, ne
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: cmp_to_ands6:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: and w8, w0, #0xfe
+; CHECK-GI-NEXT: cmp w8, #16
+; CHECK-GI-NEXT: csel w0, w8, wzr, hs
+; CHECK-GI-NEXT: ret
%and = and i32 %num, 254
%cmp = icmp uge i32 %and, 16
%r = select i1 %cmp, i32 %and, i32 0
@@ -2523,21 +2523,21 @@ define i32 @cmp_to_ands6(i32 %num) {
}
define i1 @and_fcmp(float %0, float %1) {
-; SDISEL-LABEL: and_fcmp:
-; SDISEL: // %bb.0:
-; SDISEL-NEXT: fcmp s1, s1
-; SDISEL-NEXT: fccmp s0, s0, #0, vs
-; SDISEL-NEXT: cset w0, vs
-; SDISEL-NEXT: ret
-;
-; GISEL-LABEL: and_fcmp:
-; GISEL: // %bb.0:
-; GISEL-NEXT: fcmp s0, #0.0
-; GISEL-NEXT: cset w8, vs
-; GISEL-NEXT: fcmp s1, #0.0
-; GISEL-NEXT: cset w9, vs
-; GISEL-NEXT: and w0, w8, w9
-; GISEL-NEXT: ret
+; CHECK-SD-LABEL: and_fcmp:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: fcmp s1, s1
+; CHECK-SD-NEXT: fccmp s0, s0, #0, vs
+; CHECK-SD-NEXT: cset w0, vs
+; CHECK-SD-NEXT: ret
+;
+; CHECK-GI-LABEL: and_fcmp:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: fcmp s0, #0.0
+; CHECK-GI-NEXT: cset w8, vs
+; CHECK-GI-NEXT: fcmp s1, #0.0
+; CHECK-GI-NEXT: cset w9, vs
+; CHECK-GI-NEXT: and w0, w8, w9
+; CHECK-GI-NEXT: ret
%3 = fcmp uno float %0, 0.000000e+00
%4 = fcmp uno float %1, 0.000000e+00
diff --git a/llvm/test/CodeGen/AArch64/andorbrcompare.ll b/llvm/test/CodeGen/AArch64/andorbrcompare.ll
index 951a5cd..5bc06ec 100644
--- a/llvm/test/CodeGen/AArch64/andorbrcompare.ll
+++ b/llvm/test/CodeGen/AArch64/andorbrcompare.ll
@@ -1,44 +1,44 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=aarch64-none-elf -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,SDISEL
-; RUN: llc -mtriple=aarch64-none-elf -global-isel -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,GISEL
+; RUN: llc -mtriple=aarch64-none-elf -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-SD
+; RUN: llc -mtriple=aarch64-none-elf -global-isel -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-GI
declare void @dummy()
define i32 @and_eq_ne_ult(i32 %s0, i32 %s1, i32 %s2, i32 %s3, i32 %s4, i32 %s5, ptr %p) {
-; SDISEL-LABEL: and_eq_ne_ult:
-; SDISEL: // %bb.0: // %entry
-; SDISEL-NEXT: cmp w2, w3
-; SDISEL-NEXT: ccmp w0, w1, #0, ne
-; SDISEL-NEXT: b.eq .LBB0_3
-; SDISEL-NEXT: // %bb.1: // %entry
-; SDISEL-NEXT: cmp w4, w5
-; SDISEL-NEXT: b.lo .LBB0_3
-; SDISEL-NEXT: // %bb.2:
-; SDISEL-NEXT: mov w0, wzr
-; SDISEL-NEXT: ret
-; SDISEL-NEXT: .LBB0_3: // %if
-; SDISEL-NEXT: mov w0, #1 // =0x1
-; SDISEL-NEXT: str w0, [x6]
-; SDISEL-NEXT: ret
+; CHECK-SD-LABEL: and_eq_ne_ult:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: cmp w2, w3
+; CHECK-SD-NEXT: ccmp w0, w1, #0, ne
+; CHECK-SD-NEXT: b.eq .LBB0_3
+; CHECK-SD-NEXT: // %bb.1: // %entry
+; CHECK-SD-NEXT: cmp w4, w5
+; CHECK-SD-NEXT: b.lo .LBB0_3
+; CHECK-SD-NEXT: // %bb.2:
+; CHECK-SD-NEXT: mov w0, wzr
+; CHECK-SD-NEXT: ret
+; CHECK-SD-NEXT: .LBB0_3: // %if
+; CHECK-SD-NEXT: mov w0, #1 // =0x1
+; CHECK-SD-NEXT: str w0, [x6]
+; CHECK-SD-NEXT: ret
;
-; GISEL-LABEL: and_eq_ne_ult:
-; GISEL: // %bb.0: // %entry
-; GISEL-NEXT: cmp w0, w1
-; GISEL-NEXT: cset w8, eq
-; GISEL-NEXT: cmp w2, w3
-; GISEL-NEXT: cset w9, ne
-; GISEL-NEXT: and w8, w8, w9
-; GISEL-NEXT: tbnz w8, #0, .LBB0_3
-; GISEL-NEXT: // %bb.1: // %entry
-; GISEL-NEXT: cmp w4, w5
-; GISEL-NEXT: mov w0, wzr
-; GISEL-NEXT: b.lo .LBB0_3
-; GISEL-NEXT: // %bb.2: // %common.ret
-; GISEL-NEXT: ret
-; GISEL-NEXT: .LBB0_3: // %if
-; GISEL-NEXT: mov w0, #1 // =0x1
-; GISEL-NEXT: str w0, [x6]
-; GISEL-NEXT: ret
+; CHECK-GI-LABEL: and_eq_ne_ult:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: cmp w0, w1
+; CHECK-GI-NEXT: cset w8, eq
+; CHECK-GI-NEXT: cmp w2, w3
+; CHECK-GI-NEXT: cset w9, ne
+; CHECK-GI-NEXT: and w8, w8, w9
+; CHECK-GI-NEXT: tbnz w8, #0, .LBB0_3
+; CHECK-GI-NEXT: // %bb.1: // %entry
+; CHECK-GI-NEXT: cmp w4, w5
+; CHECK-GI-NEXT: mov w0, wzr
+; CHECK-GI-NEXT: b.lo .LBB0_3
+; CHECK-GI-NEXT: // %bb.2: // %common.ret
+; CHECK-GI-NEXT: ret
+; CHECK-GI-NEXT: .LBB0_3: // %if
+; CHECK-GI-NEXT: mov w0, #1 // =0x1
+; CHECK-GI-NEXT: str w0, [x6]
+; CHECK-GI-NEXT: ret
entry:
%c0 = icmp eq i32 %s0, %s1
%c1 = icmp ne i32 %s2, %s3
@@ -56,40 +56,40 @@ else:
}
define i32 @and_ne_ult_ule(i32 %s0, i32 %s1, i32 %s2, i32 %s3, i32 %s4, i32 %s5, ptr %p) {
-; SDISEL-LABEL: and_ne_ult_ule:
-; SDISEL: // %bb.0: // %entry
-; SDISEL-NEXT: cmp w2, w3
-; SDISEL-NEXT: ccmp w0, w1, #4, lo
-; SDISEL-NEXT: b.ne .LBB1_3
-; SDISEL-NEXT: // %bb.1: // %entry
-; SDISEL-NEXT: cmp w4, w5
-; SDISEL-NEXT: b.ls .LBB1_3
-; SDISEL-NEXT: // %bb.2:
-; SDISEL-NEXT: mov w0, wzr
-; SDISEL-NEXT: ret
-; SDISEL-NEXT: .LBB1_3: // %if
-; SDISEL-NEXT: mov w0, #1 // =0x1
-; SDISEL-NEXT: str w0, [x6]
-; SDISEL-NEXT: ret
+; CHECK-SD-LABEL: and_ne_ult_ule:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: cmp w2, w3
+; CHECK-SD-NEXT: ccmp w0, w1, #4, lo
+; CHECK-SD-NEXT: b.ne .LBB1_3
+; CHECK-SD-NEXT: // %bb.1: // %entry
+; CHECK-SD-NEXT: cmp w4, w5
+; CHECK-SD-NEXT: b.ls .LBB1_3
+; CHECK-SD-NEXT: // %bb.2:
+; CHECK-SD-NEXT: mov w0, wzr
+; CHECK-SD-NEXT: ret
+; CHECK-SD-NEXT: .LBB1_3: // %if
+; CHECK-SD-NEXT: mov w0, #1 // =0x1
+; CHECK-SD-NEXT: str w0, [x6]
+; CHECK-SD-NEXT: ret
;
-; GISEL-LABEL: and_ne_ult_ule:
-; GISEL: // %bb.0: // %entry
-; GISEL-NEXT: cmp w0, w1
-; GISEL-NEXT: cset w8, ne
-; GISEL-NEXT: cmp w2, w3
-; GISEL-NEXT: cset w9, lo
-; GISEL-NEXT: and w8, w8, w9
-; GISEL-NEXT: tbnz w8, #0, .LBB1_3
-; GISEL-NEXT: // %bb.1: // %entry
-; GISEL-NEXT: cmp w4, w5
-; GISEL-NEXT: mov w0, wzr
-; GISEL-NEXT: b.ls .LBB1_3
-; GISEL-NEXT: // %bb.2: // %common.ret
-; GISEL-NEXT: ret
-; GISEL-NEXT: .LBB1_3: // %if
-; GISEL-NEXT: mov w0, #1 // =0x1
-; GISEL-NEXT: str w0, [x6]
-; GISEL-NEXT: ret
+; CHECK-GI-LABEL: and_ne_ult_ule:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: cmp w0, w1
+; CHECK-GI-NEXT: cset w8, ne
+; CHECK-GI-NEXT: cmp w2, w3
+; CHECK-GI-NEXT: cset w9, lo
+; CHECK-GI-NEXT: and w8, w8, w9
+; CHECK-GI-NEXT: tbnz w8, #0, .LBB1_3
+; CHECK-GI-NEXT: // %bb.1: // %entry
+; CHECK-GI-NEXT: cmp w4, w5
+; CHECK-GI-NEXT: mov w0, wzr
+; CHECK-GI-NEXT: b.ls .LBB1_3
+; CHECK-GI-NEXT: // %bb.2: // %common.ret
+; CHECK-GI-NEXT: ret
+; CHECK-GI-NEXT: .LBB1_3: // %if
+; CHECK-GI-NEXT: mov w0, #1 // =0x1
+; CHECK-GI-NEXT: str w0, [x6]
+; CHECK-GI-NEXT: ret
entry:
%c0 = icmp ne i32 %s0, %s1
%c1 = icmp ult i32 %s2, %s3
@@ -107,40 +107,40 @@ else:
}
define i32 @and_ult_ule_ugt(i32 %s0, i32 %s1, i32 %s2, i32 %s3, i32 %s4, i32 %s5, ptr %p) {
-; SDISEL-LABEL: and_ult_ule_ugt:
-; SDISEL: // %bb.0: // %entry
-; SDISEL-NEXT: cmp w2, w3
-; SDISEL-NEXT: ccmp w0, w1, #2, ls
-; SDISEL-NEXT: b.lo .LBB2_3
-; SDISEL-NEXT: // %bb.1: // %entry
-; SDISEL-NEXT: cmp w4, w5
-; SDISEL-NEXT: b.hi .LBB2_3
-; SDISEL-NEXT: // %bb.2:
-; SDISEL-NEXT: mov w0, wzr
-; SDISEL-NEXT: ret
-; SDISEL-NEXT: .LBB2_3: // %if
-; SDISEL-NEXT: mov w0, #1 // =0x1
-; SDISEL-NEXT: str w0, [x6]
-; SDISEL-NEXT: ret
+; CHECK-SD-LABEL: and_ult_ule_ugt:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: cmp w2, w3
+; CHECK-SD-NEXT: ccmp w0, w1, #2, ls
+; CHECK-SD-NEXT: b.lo .LBB2_3
+; CHECK-SD-NEXT: // %bb.1: // %entry
+; CHECK-SD-NEXT: cmp w4, w5
+; CHECK-SD-NEXT: b.hi .LBB2_3
+; CHECK-SD-NEXT: // %bb.2:
+; CHECK-SD-NEXT: mov w0, wzr
+; CHECK-SD-NEXT: ret
+; CHECK-SD-NEXT: .LBB2_3: // %if
+; CHECK-SD-NEXT: mov w0, #1 // =0x1
+; CHECK-SD-NEXT: str w0, [x6]
+; CHECK-SD-NEXT: ret
;
-; GISEL-LABEL: and_ult_ule_ugt:
-; GISEL: // %bb.0: // %entry
-; GISEL-NEXT: cmp w0, w1
-; GISEL-NEXT: cset w8, lo
-; GISEL-NEXT: cmp w2, w3
-; GISEL-NEXT: cset w9, ls
-; GISEL-NEXT: and w8, w8, w9
-; GISEL-NEXT: tbnz w8, #0, .LBB2_3
-; GISEL-NEXT: // %bb.1: // %entry
-; GISEL-NEXT: cmp w4, w5
-; GISEL-NEXT: mov w0, wzr
-; GISEL-NEXT: b.hi .LBB2_3
-; GISEL-NEXT: // %bb.2: // %common.ret
-; GISEL-NEXT: ret
-; GISEL-NEXT: .LBB2_3: // %if
-; GISEL-NEXT: mov w0, #1 // =0x1
-; GISEL-NEXT: str w0, [x6]
-; GISEL-NEXT: ret
+; CHECK-GI-LABEL: and_ult_ule_ugt:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: cmp w0, w1
+; CHECK-GI-NEXT: cset w8, lo
+; CHECK-GI-NEXT: cmp w2, w3
+; CHECK-GI-NEXT: cset w9, ls
+; CHECK-GI-NEXT: and w8, w8, w9
+; CHECK-GI-NEXT: tbnz w8, #0, .LBB2_3
+; CHECK-GI-NEXT: // %bb.1: // %entry
+; CHECK-GI-NEXT: cmp w4, w5
+; CHECK-GI-NEXT: mov w0, wzr
+; CHECK-GI-NEXT: b.hi .LBB2_3
+; CHECK-GI-NEXT: // %bb.2: // %common.ret
+; CHECK-GI-NEXT: ret
+; CHECK-GI-NEXT: .LBB2_3: // %if
+; CHECK-GI-NEXT: mov w0, #1 // =0x1
+; CHECK-GI-NEXT: str w0, [x6]
+; CHECK-GI-NEXT: ret
entry:
%c0 = icmp ult i32 %s0, %s1
%c1 = icmp ule i32 %s2, %s3
@@ -158,40 +158,40 @@ else:
}
define i32 @and_ule_ugt_uge(i32 %s0, i32 %s1, i32 %s2, i32 %s3, i32 %s4, i32 %s5, ptr %p) {
-; SDISEL-LABEL: and_ule_ugt_uge:
-; SDISEL: // %bb.0: // %entry
-; SDISEL-NEXT: cmp w2, w3
-; SDISEL-NEXT: ccmp w0, w1, #2, hi
-; SDISEL-NEXT: b.ls .LBB3_3
-; SDISEL-NEXT: // %bb.1: // %entry
-; SDISEL-NEXT: cmp w4, w5
-; SDISEL-NEXT: b.hs .LBB3_3
-; SDISEL-NEXT: // %bb.2:
-; SDISEL-NEXT: mov w0, wzr
-; SDISEL-NEXT: ret
-; SDISEL-NEXT: .LBB3_3: // %if
-; SDISEL-NEXT: mov w0, #1 // =0x1
-; SDISEL-NEXT: str w0, [x6]
-; SDISEL-NEXT: ret
+; CHECK-SD-LABEL: and_ule_ugt_uge:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: cmp w2, w3
+; CHECK-SD-NEXT: ccmp w0, w1, #2, hi
+; CHECK-SD-NEXT: b.ls .LBB3_3
+; CHECK-SD-NEXT: // %bb.1: // %entry
+; CHECK-SD-NEXT: cmp w4, w5
+; CHECK-SD-NEXT: b.hs .LBB3_3
+; CHECK-SD-NEXT: // %bb.2:
+; CHECK-SD-NEXT: mov w0, wzr
+; CHECK-SD-NEXT: ret
+; CHECK-SD-NEXT: .LBB3_3: // %if
+; CHECK-SD-NEXT: mov w0, #1 // =0x1
+; CHECK-SD-NEXT: str w0, [x6]
+; CHECK-SD-NEXT: ret
;
-; GISEL-LABEL: and_ule_ugt_uge:
-; GISEL: // %bb.0: // %entry
-; GISEL-NEXT: cmp w0, w1
-; GISEL-NEXT: cset w8, ls
-; GISEL-NEXT: cmp w2, w3
-; GISEL-NEXT: cset w9, hi
-; GISEL-NEXT: and w8, w8, w9
-; GISEL-NEXT: tbnz w8, #0, .LBB3_3
-; GISEL-NEXT: // %bb.1: // %entry
-; GISEL-NEXT: cmp w4, w5
-; GISEL-NEXT: mov w0, wzr
-; GISEL-NEXT: b.hs .LBB3_3
-; GISEL-NEXT: // %bb.2: // %common.ret
-; GISEL-NEXT: ret
-; GISEL-NEXT: .LBB3_3: // %if
-; GISEL-NEXT: mov w0, #1 // =0x1
-; GISEL-NEXT: str w0, [x6]
-; GISEL-NEXT: ret
+; CHECK-GI-LABEL: and_ule_ugt_uge:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: cmp w0, w1
+; CHECK-GI-NEXT: cset w8, ls
+; CHECK-GI-NEXT: cmp w2, w3
+; CHECK-GI-NEXT: cset w9, hi
+; CHECK-GI-NEXT: and w8, w8, w9
+; CHECK-GI-NEXT: tbnz w8, #0, .LBB3_3
+; CHECK-GI-NEXT: // %bb.1: // %entry
+; CHECK-GI-NEXT: cmp w4, w5
+; CHECK-GI-NEXT: mov w0, wzr
+; CHECK-GI-NEXT: b.hs .LBB3_3
+; CHECK-GI-NEXT: // %bb.2: // %common.ret
+; CHECK-GI-NEXT: ret
+; CHECK-GI-NEXT: .LBB3_3: // %if
+; CHECK-GI-NEXT: mov w0, #1 // =0x1
+; CHECK-GI-NEXT: str w0, [x6]
+; CHECK-GI-NEXT: ret
entry:
%c0 = icmp ule i32 %s0, %s1
%c1 = icmp ugt i32 %s2, %s3
@@ -209,40 +209,40 @@ else:
}
define i32 @and_ugt_uge_slt(i32 %s0, i32 %s1, i32 %s2, i32 %s3, i32 %s4, i32 %s5, ptr %p) {
-; SDISEL-LABEL: and_ugt_uge_slt:
-; SDISEL: // %bb.0: // %entry
-; SDISEL-NEXT: cmp w2, w3
-; SDISEL-NEXT: ccmp w0, w1, #0, hs
-; SDISEL-NEXT: b.hi .LBB4_3
-; SDISEL-NEXT: // %bb.1: // %entry
-; SDISEL-NEXT: cmp w4, w5
-; SDISEL-NEXT: b.lt .LBB4_3
-; SDISEL-NEXT: // %bb.2:
-; SDISEL-NEXT: mov w0, wzr
-; SDISEL-NEXT: ret
-; SDISEL-NEXT: .LBB4_3: // %if
-; SDISEL-NEXT: mov w0, #1 // =0x1
-; SDISEL-NEXT: str w0, [x6]
-; SDISEL-NEXT: ret
+; CHECK-SD-LABEL: and_ugt_uge_slt:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: cmp w2, w3
+; CHECK-SD-NEXT: ccmp w0, w1, #0, hs
+; CHECK-SD-NEXT: b.hi .LBB4_3
+; CHECK-SD-NEXT: // %bb.1: // %entry
+; CHECK-SD-NEXT: cmp w4, w5
+; CHECK-SD-NEXT: b.lt .LBB4_3
+; CHECK-SD-NEXT: // %bb.2:
+; CHECK-SD-NEXT: mov w0, wzr
+; CHECK-SD-NEXT: ret
+; CHECK-SD-NEXT: .LBB4_3: // %if
+; CHECK-SD-NEXT: mov w0, #1 // =0x1
+; CHECK-SD-NEXT: str w0, [x6]
+; CHECK-SD-NEXT: ret
;
-; GISEL-LABEL: and_ugt_uge_slt:
-; GISEL: // %bb.0: // %entry
-; GISEL-NEXT: cmp w0, w1
-; GISEL-NEXT: cset w8, hi
-; GISEL-NEXT: cmp w2, w3
-; GISEL-NEXT: cset w9, hs
-; GISEL-NEXT: and w8, w8, w9
-; GISEL-NEXT: tbnz w8, #0, .LBB4_3
-; GISEL-NEXT: // %bb.1: // %entry
-; GISEL-NEXT: cmp w4, w5
-; GISEL-NEXT: mov w0, wzr
-; GISEL-NEXT: b.lt .LBB4_3
-; GISEL-NEXT: // %bb.2: // %common.ret
-; GISEL-NEXT: ret
-; GISEL-NEXT: .LBB4_3: // %if
-; GISEL-NEXT: mov w0, #1 // =0x1
-; GISEL-NEXT: str w0, [x6]
-; GISEL-NEXT: ret
+; CHECK-GI-LABEL: and_ugt_uge_slt:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: cmp w0, w1
+; CHECK-GI-NEXT: cset w8, hi
+; CHECK-GI-NEXT: cmp w2, w3
+; CHECK-GI-NEXT: cset w9, hs
+; CHECK-GI-NEXT: and w8, w8, w9
+; CHECK-GI-NEXT: tbnz w8, #0, .LBB4_3
+; CHECK-GI-NEXT: // %bb.1: // %entry
+; CHECK-GI-NEXT: cmp w4, w5
+; CHECK-GI-NEXT: mov w0, wzr
+; CHECK-GI-NEXT: b.lt .LBB4_3
+; CHECK-GI-NEXT: // %bb.2: // %common.ret
+; CHECK-GI-NEXT: ret
+; CHECK-GI-NEXT: .LBB4_3: // %if
+; CHECK-GI-NEXT: mov w0, #1 // =0x1
+; CHECK-GI-NEXT: str w0, [x6]
+; CHECK-GI-NEXT: ret
entry:
%c0 = icmp ugt i32 %s0, %s1
%c1 = icmp uge i32 %s2, %s3
@@ -260,40 +260,40 @@ else:
}
define i32 @and_uge_slt_sle(i32 %s0, i32 %s1, i32 %s2, i32 %s3, i32 %s4, i32 %s5, ptr %p) {
-; SDISEL-LABEL: and_uge_slt_sle:
-; SDISEL: // %bb.0: // %entry
-; SDISEL-NEXT: cmp w2, w3
-; SDISEL-NEXT: ccmp w0, w1, #0, lt
-; SDISEL-NEXT: b.hs .LBB5_3
-; SDISEL-NEXT: // %bb.1: // %entry
-; SDISEL-NEXT: cmp w4, w5
-; SDISEL-NEXT: b.le .LBB5_3
-; SDISEL-NEXT: // %bb.2:
-; SDISEL-NEXT: mov w0, wzr
-; SDISEL-NEXT: ret
-; SDISEL-NEXT: .LBB5_3: // %if
-; SDISEL-NEXT: mov w0, #1 // =0x1
-; SDISEL-NEXT: str w0, [x6]
-; SDISEL-NEXT: ret
+; CHECK-SD-LABEL: and_uge_slt_sle:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: cmp w2, w3
+; CHECK-SD-NEXT: ccmp w0, w1, #0, lt
+; CHECK-SD-NEXT: b.hs .LBB5_3
+; CHECK-SD-NEXT: // %bb.1: // %entry
+; CHECK-SD-NEXT: cmp w4, w5
+; CHECK-SD-NEXT: b.le .LBB5_3
+; CHECK-SD-NEXT: // %bb.2:
+; CHECK-SD-NEXT: mov w0, wzr
+; CHECK-SD-NEXT: ret
+; CHECK-SD-NEXT: .LBB5_3: // %if
+; CHECK-SD-NEXT: mov w0, #1 // =0x1
+; CHECK-SD-NEXT: str w0, [x6]
+; CHECK-SD-NEXT: ret
;
-; GISEL-LABEL: and_uge_slt_sle:
-; GISEL: // %bb.0: // %entry
-; GISEL-NEXT: cmp w0, w1
-; GISEL-NEXT: cset w8, hs
-; GISEL-NEXT: cmp w2, w3
-; GISEL-NEXT: cset w9, lt
-; GISEL-NEXT: and w8, w8, w9
-; GISEL-NEXT: tbnz w8, #0, .LBB5_3
-; GISEL-NEXT: // %bb.1: // %entry
-; GISEL-NEXT: cmp w4, w5
-; GISEL-NEXT: mov w0, wzr
-; GISEL-NEXT: b.le .LBB5_3
-; GISEL-NEXT: // %bb.2: // %common.ret
-; GISEL-NEXT: ret
-; GISEL-NEXT: .LBB5_3: // %if
-; GISEL-NEXT: mov w0, #1 // =0x1
-; GISEL-NEXT: str w0, [x6]
-; GISEL-NEXT: ret
+; CHECK-GI-LABEL: and_uge_slt_sle:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: cmp w0, w1
+; CHECK-GI-NEXT: cset w8, hs
+; CHECK-GI-NEXT: cmp w2, w3
+; CHECK-GI-NEXT: cset w9, lt
+; CHECK-GI-NEXT: and w8, w8, w9
+; CHECK-GI-NEXT: tbnz w8, #0, .LBB5_3
+; CHECK-GI-NEXT: // %bb.1: // %entry
+; CHECK-GI-NEXT: cmp w4, w5
+; CHECK-GI-NEXT: mov w0, wzr
+; CHECK-GI-NEXT: b.le .LBB5_3
+; CHECK-GI-NEXT: // %bb.2: // %common.ret
+; CHECK-GI-NEXT: ret
+; CHECK-GI-NEXT: .LBB5_3: // %if
+; CHECK-GI-NEXT: mov w0, #1 // =0x1
+; CHECK-GI-NEXT: str w0, [x6]
+; CHECK-GI-NEXT: ret
entry:
%c0 = icmp uge i32 %s0, %s1
%c1 = icmp slt i32 %s2, %s3
@@ -311,40 +311,40 @@ else:
}
define i32 @and_slt_sle_sgt(i32 %s0, i32 %s1, i32 %s2, i32 %s3, i32 %s4, i32 %s5, ptr %p) {
-; SDISEL-LABEL: and_slt_sle_sgt:
-; SDISEL: // %bb.0: // %entry
-; SDISEL-NEXT: cmp w2, w3
-; SDISEL-NEXT: ccmp w0, w1, #0, le
-; SDISEL-NEXT: b.lt .LBB6_3
-; SDISEL-NEXT: // %bb.1: // %entry
-; SDISEL-NEXT: cmp w4, w5
-; SDISEL-NEXT: b.gt .LBB6_3
-; SDISEL-NEXT: // %bb.2:
-; SDISEL-NEXT: mov w0, wzr
-; SDISEL-NEXT: ret
-; SDISEL-NEXT: .LBB6_3: // %if
-; SDISEL-NEXT: mov w0, #1 // =0x1
-; SDISEL-NEXT: str w0, [x6]
-; SDISEL-NEXT: ret
+; CHECK-SD-LABEL: and_slt_sle_sgt:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: cmp w2, w3
+; CHECK-SD-NEXT: ccmp w0, w1, #0, le
+; CHECK-SD-NEXT: b.lt .LBB6_3
+; CHECK-SD-NEXT: // %bb.1: // %entry
+; CHECK-SD-NEXT: cmp w4, w5
+; CHECK-SD-NEXT: b.gt .LBB6_3
+; CHECK-SD-NEXT: // %bb.2:
+; CHECK-SD-NEXT: mov w0, wzr
+; CHECK-SD-NEXT: ret
+; CHECK-SD-NEXT: .LBB6_3: // %if
+; CHECK-SD-NEXT: mov w0, #1 // =0x1
+; CHECK-SD-NEXT: str w0, [x6]
+; CHECK-SD-NEXT: ret
;
-; GISEL-LABEL: and_slt_sle_sgt:
-; GISEL: // %bb.0: // %entry
-; GISEL-NEXT: cmp w0, w1
-; GISEL-NEXT: cset w8, lt
-; GISEL-NEXT: cmp w2, w3
-; GISEL-NEXT: cset w9, le
-; GISEL-NEXT: and w8, w8, w9
-; GISEL-NEXT: tbnz w8, #0, .LBB6_3
-; GISEL-NEXT: // %bb.1: // %entry
-; GISEL-NEXT: cmp w4, w5
-; GISEL-NEXT: mov w0, wzr
-; GISEL-NEXT: b.gt .LBB6_3
-; GISEL-NEXT: // %bb.2: // %common.ret
-; GISEL-NEXT: ret
-; GISEL-NEXT: .LBB6_3: // %if
-; GISEL-NEXT: mov w0, #1 // =0x1
-; GISEL-NEXT: str w0, [x6]
-; GISEL-NEXT: ret
+; CHECK-GI-LABEL: and_slt_sle_sgt:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: cmp w0, w1
+; CHECK-GI-NEXT: cset w8, lt
+; CHECK-GI-NEXT: cmp w2, w3
+; CHECK-GI-NEXT: cset w9, le
+; CHECK-GI-NEXT: and w8, w8, w9
+; CHECK-GI-NEXT: tbnz w8, #0, .LBB6_3
+; CHECK-GI-NEXT: // %bb.1: // %entry
+; CHECK-GI-NEXT: cmp w4, w5
+; CHECK-GI-NEXT: mov w0, wzr
+; CHECK-GI-NEXT: b.gt .LBB6_3
+; CHECK-GI-NEXT: // %bb.2: // %common.ret
+; CHECK-GI-NEXT: ret
+; CHECK-GI-NEXT: .LBB6_3: // %if
+; CHECK-GI-NEXT: mov w0, #1 // =0x1
+; CHECK-GI-NEXT: str w0, [x6]
+; CHECK-GI-NEXT: ret
entry:
%c0 = icmp slt i32 %s0, %s1
%c1 = icmp sle i32 %s2, %s3
@@ -362,40 +362,40 @@ else:
}
define i32 @and_sle_sgt_sge(i32 %s0, i32 %s1, i32 %s2, i32 %s3, i32 %s4, i32 %s5, ptr %p) {
-; SDISEL-LABEL: and_sle_sgt_sge:
-; SDISEL: // %bb.0: // %entry
-; SDISEL-NEXT: cmp w2, w3
-; SDISEL-NEXT: ccmp w0, w1, #0, gt
-; SDISEL-NEXT: b.le .LBB7_3
-; SDISEL-NEXT: // %bb.1: // %entry
-; SDISEL-NEXT: cmp w4, w5
-; SDISEL-NEXT: b.ge .LBB7_3
-; SDISEL-NEXT: // %bb.2:
-; SDISEL-NEXT: mov w0, wzr
-; SDISEL-NEXT: ret
-; SDISEL-NEXT: .LBB7_3: // %if
-; SDISEL-NEXT: mov w0, #1 // =0x1
-; SDISEL-NEXT: str w0, [x6]
-; SDISEL-NEXT: ret
+; CHECK-SD-LABEL: and_sle_sgt_sge:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: cmp w2, w3
+; CHECK-SD-NEXT: ccmp w0, w1, #0, gt
+; CHECK-SD-NEXT: b.le .LBB7_3
+; CHECK-SD-NEXT: // %bb.1: // %entry
+; CHECK-SD-NEXT: cmp w4, w5
+; CHECK-SD-NEXT: b.ge .LBB7_3
+; CHECK-SD-NEXT: // %bb.2:
+; CHECK-SD-NEXT: mov w0, wzr
+; CHECK-SD-NEXT: ret
+; CHECK-SD-NEXT: .LBB7_3: // %if
+; CHECK-SD-NEXT: mov w0, #1 // =0x1
+; CHECK-SD-NEXT: str w0, [x6]
+; CHECK-SD-NEXT: ret
;
-; GISEL-LABEL: and_sle_sgt_sge:
-; GISEL: // %bb.0: // %entry
-; GISEL-NEXT: cmp w0, w1
-; GISEL-NEXT: cset w8, le
-; GISEL-NEXT: cmp w2, w3
-; GISEL-NEXT: cset w9, gt
-; GISEL-NEXT: and w8, w8, w9
-; GISEL-NEXT: tbnz w8, #0, .LBB7_3
-; GISEL-NEXT: // %bb.1: // %entry
-; GISEL-NEXT: cmp w4, w5
-; GISEL-NEXT: mov w0, wzr
-; GISEL-NEXT: b.ge .LBB7_3
-; GISEL-NEXT: // %bb.2: // %common.ret
-; GISEL-NEXT: ret
-; GISEL-NEXT: .LBB7_3: // %if
-; GISEL-NEXT: mov w0, #1 // =0x1
-; GISEL-NEXT: str w0, [x6]
-; GISEL-NEXT: ret
+; CHECK-GI-LABEL: and_sle_sgt_sge:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: cmp w0, w1
+; CHECK-GI-NEXT: cset w8, le
+; CHECK-GI-NEXT: cmp w2, w3
+; CHECK-GI-NEXT: cset w9, gt
+; CHECK-GI-NEXT: and w8, w8, w9
+; CHECK-GI-NEXT: tbnz w8, #0, .LBB7_3
+; CHECK-GI-NEXT: // %bb.1: // %entry
+; CHECK-GI-NEXT: cmp w4, w5
+; CHECK-GI-NEXT: mov w0, wzr
+; CHECK-GI-NEXT: b.ge .LBB7_3
+; CHECK-GI-NEXT: // %bb.2: // %common.ret
+; CHECK-GI-NEXT: ret
+; CHECK-GI-NEXT: .LBB7_3: // %if
+; CHECK-GI-NEXT: mov w0, #1 // =0x1
+; CHECK-GI-NEXT: str w0, [x6]
+; CHECK-GI-NEXT: ret
entry:
%c0 = icmp sle i32 %s0, %s1
%c1 = icmp sgt i32 %s2, %s3
diff --git a/llvm/test/CodeGen/AArch64/arm64-ccmp.ll b/llvm/test/CodeGen/AArch64/arm64-ccmp.ll
index 06e957f..a546ffd 100644
--- a/llvm/test/CodeGen/AArch64/arm64-ccmp.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-ccmp.ll
@@ -1,6 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -debugify-and-strip-all-safe -mcpu=cyclone -verify-machineinstrs -aarch64-enable-ccmp -aarch64-stress-ccmp | FileCheck %s --check-prefixes=CHECK,SDISEL
-; RUN: llc < %s -debugify-and-strip-all-safe -mcpu=cyclone -verify-machineinstrs -aarch64-enable-ccmp -aarch64-stress-ccmp -global-isel | FileCheck %s --check-prefixes=CHECK,GISEL
+; RUN: llc < %s -debugify-and-strip-all-safe -mcpu=cyclone -verify-machineinstrs -aarch64-enable-ccmp -aarch64-stress-ccmp | FileCheck %s --check-prefixes=CHECK,CHECK-SD
+; RUN: llc < %s -debugify-and-strip-all-safe -mcpu=cyclone -verify-machineinstrs -aarch64-enable-ccmp -aarch64-stress-ccmp -global-isel | FileCheck %s --check-prefixes=CHECK,CHECK-GI
target triple = "arm64-apple-ios"
define i32 @single_same(i32 %a, i32 %b) nounwind ssp {
@@ -32,31 +32,31 @@ if.end:
; Different condition codes for the two compares.
define i32 @single_different(i32 %a, i32 %b) nounwind ssp {
-; SDISEL-LABEL: single_different:
-; SDISEL: ; %bb.0: ; %entry
-; SDISEL-NEXT: cmp w0, #6
-; SDISEL-NEXT: ccmp w1, #17, #0, ge
-; SDISEL-NEXT: b.eq LBB1_2
-; SDISEL-NEXT: ; %bb.1: ; %if.then
-; SDISEL-NEXT: stp x29, x30, [sp, #-16]! ; 16-byte Folded Spill
-; SDISEL-NEXT: bl _foo
-; SDISEL-NEXT: ldp x29, x30, [sp], #16 ; 16-byte Folded Reload
-; SDISEL-NEXT: LBB1_2: ; %if.end
-; SDISEL-NEXT: mov w0, #7 ; =0x7
-; SDISEL-NEXT: ret
+; CHECK-SD-LABEL: single_different:
+; CHECK-SD: ; %bb.0: ; %entry
+; CHECK-SD-NEXT: cmp w0, #6
+; CHECK-SD-NEXT: ccmp w1, #17, #0, ge
+; CHECK-SD-NEXT: b.eq LBB1_2
+; CHECK-SD-NEXT: ; %bb.1: ; %if.then
+; CHECK-SD-NEXT: stp x29, x30, [sp, #-16]! ; 16-byte Folded Spill
+; CHECK-SD-NEXT: bl _foo
+; CHECK-SD-NEXT: ldp x29, x30, [sp], #16 ; 16-byte Folded Reload
+; CHECK-SD-NEXT: LBB1_2: ; %if.end
+; CHECK-SD-NEXT: mov w0, #7 ; =0x7
+; CHECK-SD-NEXT: ret
;
-; GISEL-LABEL: single_different:
-; GISEL: ; %bb.0: ; %entry
-; GISEL-NEXT: cmp w0, #5
-; GISEL-NEXT: ccmp w1, #17, #0, gt
-; GISEL-NEXT: b.eq LBB1_2
-; GISEL-NEXT: ; %bb.1: ; %if.then
-; GISEL-NEXT: stp x29, x30, [sp, #-16]! ; 16-byte Folded Spill
-; GISEL-NEXT: bl _foo
-; GISEL-NEXT: ldp x29, x30, [sp], #16 ; 16-byte Folded Reload
-; GISEL-NEXT: LBB1_2: ; %if.end
-; GISEL-NEXT: mov w0, #7 ; =0x7
-; GISEL-NEXT: ret
+; CHECK-GI-LABEL: single_different:
+; CHECK-GI: ; %bb.0: ; %entry
+; CHECK-GI-NEXT: cmp w0, #5
+; CHECK-GI-NEXT: ccmp w1, #17, #0, gt
+; CHECK-GI-NEXT: b.eq LBB1_2
+; CHECK-GI-NEXT: ; %bb.1: ; %if.then
+; CHECK-GI-NEXT: stp x29, x30, [sp, #-16]! ; 16-byte Folded Spill
+; CHECK-GI-NEXT: bl _foo
+; CHECK-GI-NEXT: ldp x29, x30, [sp], #16 ; 16-byte Folded Reload
+; CHECK-GI-NEXT: LBB1_2: ; %if.end
+; CHECK-GI-NEXT: mov w0, #7 ; =0x7
+; CHECK-GI-NEXT: ret
entry:
%cmp = icmp sle i32 %a, 5
%cmp1 = icmp ne i32 %b, 17
@@ -73,41 +73,41 @@ if.end:
; Second block clobbers the flags, can't convert (easily).
define i32 @single_flagclobber(i32 %a, i32 %b) nounwind ssp {
-; SDISEL-LABEL: single_flagclobber:
-; SDISEL: ; %bb.0: ; %entry
-; SDISEL-NEXT: cmp w0, #5
-; SDISEL-NEXT: b.eq LBB2_2
-; SDISEL-NEXT: ; %bb.1: ; %lor.lhs.false
-; SDISEL-NEXT: lsl w8, w1, #1
-; SDISEL-NEXT: cmp w1, #7
-; SDISEL-NEXT: csinc w8, w8, w1, lt
-; SDISEL-NEXT: cmp w8, #16
-; SDISEL-NEXT: b.gt LBB2_3
-; SDISEL-NEXT: LBB2_2: ; %if.then
-; SDISEL-NEXT: stp x29, x30, [sp, #-16]! ; 16-byte Folded Spill
-; SDISEL-NEXT: bl _foo
-; SDISEL-NEXT: ldp x29, x30, [sp], #16 ; 16-byte Folded Reload
-; SDISEL-NEXT: LBB2_3: ; %if.end
-; SDISEL-NEXT: mov w0, #7 ; =0x7
-; SDISEL-NEXT: ret
+; CHECK-SD-LABEL: single_flagclobber:
+; CHECK-SD: ; %bb.0: ; %entry
+; CHECK-SD-NEXT: cmp w0, #5
+; CHECK-SD-NEXT: b.eq LBB2_2
+; CHECK-SD-NEXT: ; %bb.1: ; %lor.lhs.false
+; CHECK-SD-NEXT: lsl w8, w1, #1
+; CHECK-SD-NEXT: cmp w1, #7
+; CHECK-SD-NEXT: csinc w8, w8, w1, lt
+; CHECK-SD-NEXT: cmp w8, #16
+; CHECK-SD-NEXT: b.gt LBB2_3
+; CHECK-SD-NEXT: LBB2_2: ; %if.then
+; CHECK-SD-NEXT: stp x29, x30, [sp, #-16]! ; 16-byte Folded Spill
+; CHECK-SD-NEXT: bl _foo
+; CHECK-SD-NEXT: ldp x29, x30, [sp], #16 ; 16-byte Folded Reload
+; CHECK-SD-NEXT: LBB2_3: ; %if.end
+; CHECK-SD-NEXT: mov w0, #7 ; =0x7
+; CHECK-SD-NEXT: ret
;
-; GISEL-LABEL: single_flagclobber:
-; GISEL: ; %bb.0: ; %entry
-; GISEL-NEXT: cmp w0, #5
-; GISEL-NEXT: b.eq LBB2_2
-; GISEL-NEXT: ; %bb.1: ; %lor.lhs.false
-; GISEL-NEXT: lsl w8, w1, #1
-; GISEL-NEXT: cmp w1, #7
-; GISEL-NEXT: csinc w8, w8, w1, lt
-; GISEL-NEXT: cmp w8, #17
-; GISEL-NEXT: b.ge LBB2_3
-; GISEL-NEXT: LBB2_2: ; %if.then
-; GISEL-NEXT: stp x29, x30, [sp, #-16]! ; 16-byte Folded Spill
-; GISEL-NEXT: bl _foo
-; GISEL-NEXT: ldp x29, x30, [sp], #16 ; 16-byte Folded Reload
-; GISEL-NEXT: LBB2_3: ; %if.end
-; GISEL-NEXT: mov w0, #7 ; =0x7
-; GISEL-NEXT: ret
+; CHECK-GI-LABEL: single_flagclobber:
+; CHECK-GI: ; %bb.0: ; %entry
+; CHECK-GI-NEXT: cmp w0, #5
+; CHECK-GI-NEXT: b.eq LBB2_2
+; CHECK-GI-NEXT: ; %bb.1: ; %lor.lhs.false
+; CHECK-GI-NEXT: lsl w8, w1, #1
+; CHECK-GI-NEXT: cmp w1, #7
+; CHECK-GI-NEXT: csinc w8, w8, w1, lt
+; CHECK-GI-NEXT: cmp w8, #17
+; CHECK-GI-NEXT: b.ge LBB2_3
+; CHECK-GI-NEXT: LBB2_2: ; %if.then
+; CHECK-GI-NEXT: stp x29, x30, [sp, #-16]! ; 16-byte Folded Spill
+; CHECK-GI-NEXT: bl _foo
+; CHECK-GI-NEXT: ldp x29, x30, [sp], #16 ; 16-byte Folded Reload
+; CHECK-GI-NEXT: LBB2_3: ; %if.end
+; CHECK-GI-NEXT: mov w0, #7 ; =0x7
+; CHECK-GI-NEXT: ret
entry:
%cmp = icmp eq i32 %a, 5
br i1 %cmp, label %if.then, label %lor.lhs.false
@@ -171,37 +171,37 @@ if.end: ; preds = %if.then, %lor.lhs.f
; The sdiv/udiv instructions do not trap when the divisor is zero, so they are
; safe to speculate.
define i32 @speculate_division(i32 %a, i32 %b) nounwind ssp {
-; SDISEL-LABEL: speculate_division:
-; SDISEL: ; %bb.0: ; %entry
-; SDISEL-NEXT: cmp w0, #1
-; SDISEL-NEXT: sdiv w8, w1, w0
-; SDISEL-NEXT: ccmp w8, #16, #0, ge
-; SDISEL-NEXT: b.le LBB4_2
-; SDISEL-NEXT: ; %bb.1: ; %if.end
-; SDISEL-NEXT: mov w0, #7 ; =0x7
-; SDISEL-NEXT: ret
-; SDISEL-NEXT: LBB4_2: ; %if.then
-; SDISEL-NEXT: stp x29, x30, [sp, #-16]! ; 16-byte Folded Spill
-; SDISEL-NEXT: bl _foo
-; SDISEL-NEXT: ldp x29, x30, [sp], #16 ; 16-byte Folded Reload
-; SDISEL-NEXT: mov w0, #7 ; =0x7
-; SDISEL-NEXT: ret
+; CHECK-SD-LABEL: speculate_division:
+; CHECK-SD: ; %bb.0: ; %entry
+; CHECK-SD-NEXT: cmp w0, #1
+; CHECK-SD-NEXT: sdiv w8, w1, w0
+; CHECK-SD-NEXT: ccmp w8, #16, #0, ge
+; CHECK-SD-NEXT: b.le LBB4_2
+; CHECK-SD-NEXT: ; %bb.1: ; %if.end
+; CHECK-SD-NEXT: mov w0, #7 ; =0x7
+; CHECK-SD-NEXT: ret
+; CHECK-SD-NEXT: LBB4_2: ; %if.then
+; CHECK-SD-NEXT: stp x29, x30, [sp, #-16]! ; 16-byte Folded Spill
+; CHECK-SD-NEXT: bl _foo
+; CHECK-SD-NEXT: ldp x29, x30, [sp], #16 ; 16-byte Folded Reload
+; CHECK-SD-NEXT: mov w0, #7 ; =0x7
+; CHECK-SD-NEXT: ret
;
-; GISEL-LABEL: speculate_division:
-; GISEL: ; %bb.0: ; %entry
-; GISEL-NEXT: cmp w0, #0
-; GISEL-NEXT: sdiv w8, w1, w0
-; GISEL-NEXT: ccmp w8, #17, #0, gt
-; GISEL-NEXT: b.lt LBB4_2
-; GISEL-NEXT: ; %bb.1: ; %if.end
-; GISEL-NEXT: mov w0, #7 ; =0x7
-; GISEL-NEXT: ret
-; GISEL-NEXT: LBB4_2: ; %if.then
-; GISEL-NEXT: stp x29, x30, [sp, #-16]! ; 16-byte Folded Spill
-; GISEL-NEXT: bl _foo
-; GISEL-NEXT: ldp x29, x30, [sp], #16 ; 16-byte Folded Reload
-; GISEL-NEXT: mov w0, #7 ; =0x7
-; GISEL-NEXT: ret
+; CHECK-GI-LABEL: speculate_division:
+; CHECK-GI: ; %bb.0: ; %entry
+; CHECK-GI-NEXT: cmp w0, #0
+; CHECK-GI-NEXT: sdiv w8, w1, w0
+; CHECK-GI-NEXT: ccmp w8, #17, #0, gt
+; CHECK-GI-NEXT: b.lt LBB4_2
+; CHECK-GI-NEXT: ; %bb.1: ; %if.end
+; CHECK-GI-NEXT: mov w0, #7 ; =0x7
+; CHECK-GI-NEXT: ret
+; CHECK-GI-NEXT: LBB4_2: ; %if.then
+; CHECK-GI-NEXT: stp x29, x30, [sp, #-16]! ; 16-byte Folded Spill
+; CHECK-GI-NEXT: bl _foo
+; CHECK-GI-NEXT: ldp x29, x30, [sp], #16 ; 16-byte Folded Reload
+; CHECK-GI-NEXT: mov w0, #7 ; =0x7
+; CHECK-GI-NEXT: ret
entry:
%cmp = icmp sgt i32 %a, 0
br i1 %cmp, label %land.lhs.true, label %if.end
@@ -221,41 +221,41 @@ if.end:
; Floating point compare.
define i32 @single_fcmp(i32 %a, float %b) nounwind ssp {
-; SDISEL-LABEL: single_fcmp:
-; SDISEL: ; %bb.0: ; %entry
-; SDISEL-NEXT: cmp w0, #1
-; SDISEL-NEXT: scvtf s1, w0
-; SDISEL-NEXT: fdiv s0, s0, s1
-; SDISEL-NEXT: fmov s1, #17.00000000
-; SDISEL-NEXT: fccmp s0, s1, #8, ge
-; SDISEL-NEXT: b.ge LBB5_2
-; SDISEL-NEXT: ; %bb.1: ; %if.end
-; SDISEL-NEXT: mov w0, #7 ; =0x7
-; SDISEL-NEXT: ret
-; SDISEL-NEXT: LBB5_2: ; %if.then
-; SDISEL-NEXT: stp x29, x30, [sp, #-16]! ; 16-byte Folded Spill
-; SDISEL-NEXT: bl _foo
-; SDISEL-NEXT: ldp x29, x30, [sp], #16 ; 16-byte Folded Reload
-; SDISEL-NEXT: mov w0, #7 ; =0x7
-; SDISEL-NEXT: ret
+; CHECK-SD-LABEL: single_fcmp:
+; CHECK-SD: ; %bb.0: ; %entry
+; CHECK-SD-NEXT: cmp w0, #1
+; CHECK-SD-NEXT: scvtf s1, w0
+; CHECK-SD-NEXT: fdiv s0, s0, s1
+; CHECK-SD-NEXT: fmov s1, #17.00000000
+; CHECK-SD-NEXT: fccmp s0, s1, #8, ge
+; CHECK-SD-NEXT: b.ge LBB5_2
+; CHECK-SD-NEXT: ; %bb.1: ; %if.end
+; CHECK-SD-NEXT: mov w0, #7 ; =0x7
+; CHECK-SD-NEXT: ret
+; CHECK-SD-NEXT: LBB5_2: ; %if.then
+; CHECK-SD-NEXT: stp x29, x30, [sp, #-16]! ; 16-byte Folded Spill
+; CHECK-SD-NEXT: bl _foo
+; CHECK-SD-NEXT: ldp x29, x30, [sp], #16 ; 16-byte Folded Reload
+; CHECK-SD-NEXT: mov w0, #7 ; =0x7
+; CHECK-SD-NEXT: ret
;
-; GISEL-LABEL: single_fcmp:
-; GISEL: ; %bb.0: ; %entry
-; GISEL-NEXT: cmp w0, #0
-; GISEL-NEXT: scvtf s1, w0
-; GISEL-NEXT: fdiv s0, s0, s1
-; GISEL-NEXT: fmov s1, #17.00000000
-; GISEL-NEXT: fccmp s0, s1, #8, gt
-; GISEL-NEXT: b.ge LBB5_2
-; GISEL-NEXT: ; %bb.1: ; %if.end
-; GISEL-NEXT: mov w0, #7 ; =0x7
-; GISEL-NEXT: ret
-; GISEL-NEXT: LBB5_2: ; %if.then
-; GISEL-NEXT: stp x29, x30, [sp, #-16]! ; 16-byte Folded Spill
-; GISEL-NEXT: bl _foo
-; GISEL-NEXT: ldp x29, x30, [sp], #16 ; 16-byte Folded Reload
-; GISEL-NEXT: mov w0, #7 ; =0x7
-; GISEL-NEXT: ret
+; CHECK-GI-LABEL: single_fcmp:
+; CHECK-GI: ; %bb.0: ; %entry
+; CHECK-GI-NEXT: cmp w0, #0
+; CHECK-GI-NEXT: scvtf s1, w0
+; CHECK-GI-NEXT: fdiv s0, s0, s1
+; CHECK-GI-NEXT: fmov s1, #17.00000000
+; CHECK-GI-NEXT: fccmp s0, s1, #8, gt
+; CHECK-GI-NEXT: b.ge LBB5_2
+; CHECK-GI-NEXT: ; %bb.1: ; %if.end
+; CHECK-GI-NEXT: mov w0, #7 ; =0x7
+; CHECK-GI-NEXT: ret
+; CHECK-GI-NEXT: LBB5_2: ; %if.then
+; CHECK-GI-NEXT: stp x29, x30, [sp, #-16]! ; 16-byte Folded Spill
+; CHECK-GI-NEXT: bl _foo
+; CHECK-GI-NEXT: ldp x29, x30, [sp], #16 ; 16-byte Folded Reload
+; CHECK-GI-NEXT: mov w0, #7 ; =0x7
+; CHECK-GI-NEXT: ret
entry:
%cmp = icmp sgt i32 %a, 0
br i1 %cmp, label %land.lhs.true, label %if.end
@@ -499,28 +499,28 @@ define float @select_or_float(i32 %w0, i32 %w1, float %x2, float %x3) {
}
define i64 @gccbug(i64 %x0, i64 %x1) {
-; SDISEL-LABEL: gccbug:
-; SDISEL: ; %bb.0:
-; SDISEL-NEXT: cmp x0, #2
-; SDISEL-NEXT: ccmp x0, #4, #4, ne
-; SDISEL-NEXT: ccmp x1, #0, #0, eq
-; SDISEL-NEXT: mov w8, #1 ; =0x1
-; SDISEL-NEXT: cinc x0, x8, eq
-; SDISEL-NEXT: ret
+; CHECK-SD-LABEL: gccbug:
+; CHECK-SD: ; %bb.0:
+; CHECK-SD-NEXT: cmp x0, #2
+; CHECK-SD-NEXT: ccmp x0, #4, #4, ne
+; CHECK-SD-NEXT: ccmp x1, #0, #0, eq
+; CHECK-SD-NEXT: mov w8, #1 ; =0x1
+; CHECK-SD-NEXT: cinc x0, x8, eq
+; CHECK-SD-NEXT: ret
;
-; GISEL-LABEL: gccbug:
-; GISEL: ; %bb.0:
-; GISEL-NEXT: cmp x1, #0
-; GISEL-NEXT: cset w8, eq
-; GISEL-NEXT: cmp x0, #2
-; GISEL-NEXT: cset w9, eq
-; GISEL-NEXT: cmp x0, #4
-; GISEL-NEXT: cset w10, eq
-; GISEL-NEXT: orr w9, w10, w9
-; GISEL-NEXT: and w8, w9, w8
-; GISEL-NEXT: and x8, x8, #0x1
-; GISEL-NEXT: add x0, x8, #1
-; GISEL-NEXT: ret
+; CHECK-GI-LABEL: gccbug:
+; CHECK-GI: ; %bb.0:
+; CHECK-GI-NEXT: cmp x1, #0
+; CHECK-GI-NEXT: cset w8, eq
+; CHECK-GI-NEXT: cmp x0, #2
+; CHECK-GI-NEXT: cset w9, eq
+; CHECK-GI-NEXT: cmp x0, #4
+; CHECK-GI-NEXT: cset w10, eq
+; CHECK-GI-NEXT: orr w9, w10, w9
+; CHECK-GI-NEXT: and w8, w9, w8
+; CHECK-GI-NEXT: and x8, x8, #0x1
+; CHECK-GI-NEXT: add x0, x8, #1
+; CHECK-GI-NEXT: ret
%cmp0 = icmp eq i64 %x1, 0
%cmp1 = icmp eq i64 %x0, 2
%cmp2 = icmp eq i64 %x0, 4
@@ -570,23 +570,23 @@ define i32 @select_andor(i32 %v1, i32 %v2, i32 %v3) {
}
define i32 @select_andor32(i32 %v1, i32 %v2, i32 %v3) {
-; SDISEL-LABEL: select_andor32:
-; SDISEL: ; %bb.0:
-; SDISEL-NEXT: cmp w1, w2
-; SDISEL-NEXT: mov w8, #32 ; =0x20
-; SDISEL-NEXT: ccmp w0, w8, #4, lt
-; SDISEL-NEXT: ccmp w0, w1, #0, eq
-; SDISEL-NEXT: csel w0, w0, w1, eq
-; SDISEL-NEXT: ret
+; CHECK-SD-LABEL: select_andor32:
+; CHECK-SD: ; %bb.0:
+; CHECK-SD-NEXT: cmp w1, w2
+; CHECK-SD-NEXT: mov w8, #32 ; =0x20
+; CHECK-SD-NEXT: ccmp w0, w8, #4, lt
+; CHECK-SD-NEXT: ccmp w0, w1, #0, eq
+; CHECK-SD-NEXT: csel w0, w0, w1, eq
+; CHECK-SD-NEXT: ret
;
-; GISEL-LABEL: select_andor32:
-; GISEL: ; %bb.0:
-; GISEL-NEXT: mov w8, #32 ; =0x20
-; GISEL-NEXT: cmp w1, w2
-; GISEL-NEXT: ccmp w0, w8, #4, lt
-; GISEL-NEXT: ccmp w0, w1, #0, eq
-; GISEL-NEXT: csel w0, w0, w1, eq
-; GISEL-NEXT: ret
+; CHECK-GI-LABEL: select_andor32:
+; CHECK-GI: ; %bb.0:
+; CHECK-GI-NEXT: mov w8, #32 ; =0x20
+; CHECK-GI-NEXT: cmp w1, w2
+; CHECK-GI-NEXT: ccmp w0, w8, #4, lt
+; CHECK-GI-NEXT: ccmp w0, w1, #0, eq
+; CHECK-GI-NEXT: csel w0, w0, w1, eq
+; CHECK-GI-NEXT: ret
%c0 = icmp eq i32 %v1, %v2
%c1 = icmp sge i32 %v2, %v3
%c2 = icmp eq i32 %v1, 32
@@ -597,22 +597,22 @@ define i32 @select_andor32(i32 %v1, i32 %v2, i32 %v3) {
}
define i64 @select_noccmp1(i64 %v1, i64 %v2, i64 %v3, i64 %r) {
-; SDISEL-LABEL: select_noccmp1:
-; SDISEL: ; %bb.0:
-; SDISEL-NEXT: cmp x0, #0
-; SDISEL-NEXT: ccmp x0, #13, #4, lt
-; SDISEL-NEXT: cset w8, gt
-; SDISEL-NEXT: cmp x2, #2
-; SDISEL-NEXT: ccmp x2, #4, #4, lt
-; SDISEL-NEXT: csinc w8, w8, wzr, le
-; SDISEL-NEXT: cmp w8, #0
-; SDISEL-NEXT: csel x0, xzr, x3, ne
-; SDISEL-NEXT: ret
+; CHECK-SD-LABEL: select_noccmp1:
+; CHECK-SD: ; %bb.0:
+; CHECK-SD-NEXT: cmp x0, #0
+; CHECK-SD-NEXT: ccmp x0, #13, #4, lt
+; CHECK-SD-NEXT: cset w8, gt
+; CHECK-SD-NEXT: cmp x2, #2
+; CHECK-SD-NEXT: ccmp x2, #4, #4, lt
+; CHECK-SD-NEXT: csinc w8, w8, wzr, le
+; CHECK-SD-NEXT: cmp w8, #0
+; CHECK-SD-NEXT: csel x0, xzr, x3, ne
+; CHECK-SD-NEXT: ret
;
-; GISEL-LABEL: select_noccmp1:
-; GISEL: ; %bb.0:
-; GISEL-NEXT: mov x0, x3
-; GISEL-NEXT: ret
+; CHECK-GI-LABEL: select_noccmp1:
+; CHECK-GI: ; %bb.0:
+; CHECK-GI-NEXT: mov x0, x3
+; CHECK-GI-NEXT: ret
%c0 = icmp slt i64 %v1, 0
%c1 = icmp sgt i64 %v1, 13
%c2 = icmp slt i64 %v3, 2
@@ -627,28 +627,28 @@ define i64 @select_noccmp1(i64 %v1, i64 %v2, i64 %v3, i64 %r) {
@g = global i32 0
define i64 @select_noccmp2(i64 %v1, i64 %v2, i64 %v3, i64 %r) {
-; SDISEL-LABEL: select_noccmp2:
-; SDISEL: ; %bb.0:
-; SDISEL-NEXT: cmp x0, #0
-; SDISEL-NEXT: ccmp x0, #13, #0, ge
-; SDISEL-NEXT: cset w8, gt
-; SDISEL-NEXT: cmp w8, #0
-; SDISEL-NEXT: csel x0, xzr, x3, ne
-; SDISEL-NEXT: sbfx w8, w8, #0, #1
-; SDISEL-NEXT: adrp x9, _g@PAGE
-; SDISEL-NEXT: str w8, [x9, _g@PAGEOFF]
-; SDISEL-NEXT: ret
+; CHECK-SD-LABEL: select_noccmp2:
+; CHECK-SD: ; %bb.0:
+; CHECK-SD-NEXT: cmp x0, #0
+; CHECK-SD-NEXT: ccmp x0, #13, #0, ge
+; CHECK-SD-NEXT: cset w8, gt
+; CHECK-SD-NEXT: cmp w8, #0
+; CHECK-SD-NEXT: csel x0, xzr, x3, ne
+; CHECK-SD-NEXT: sbfx w8, w8, #0, #1
+; CHECK-SD-NEXT: adrp x9, _g@PAGE
+; CHECK-SD-NEXT: str w8, [x9, _g@PAGEOFF]
+; CHECK-SD-NEXT: ret
;
-; GISEL-LABEL: select_noccmp2:
-; GISEL: ; %bb.0:
-; GISEL-NEXT: cmp x0, #14
-; GISEL-NEXT: cset w8, hs
-; GISEL-NEXT: tst w8, #0x1
-; GISEL-NEXT: csel x0, xzr, x3, ne
-; GISEL-NEXT: sbfx w8, w8, #0, #1
-; GISEL-NEXT: adrp x9, _g@PAGE
-; GISEL-NEXT: str w8, [x9, _g@PAGEOFF]
-; GISEL-NEXT: ret
+; CHECK-GI-LABEL: select_noccmp2:
+; CHECK-GI: ; %bb.0:
+; CHECK-GI-NEXT: cmp x0, #14
+; CHECK-GI-NEXT: cset w8, hs
+; CHECK-GI-NEXT: tst w8, #0x1
+; CHECK-GI-NEXT: csel x0, xzr, x3, ne
+; CHECK-GI-NEXT: sbfx w8, w8, #0, #1
+; CHECK-GI-NEXT: adrp x9, _g@PAGE
+; CHECK-GI-NEXT: str w8, [x9, _g@PAGEOFF]
+; CHECK-GI-NEXT: ret
%c0 = icmp slt i64 %v1, 0
%c1 = icmp sgt i64 %v1, 13
%or = or i1 %c0, %c1
@@ -661,33 +661,33 @@ define i64 @select_noccmp2(i64 %v1, i64 %v2, i64 %v3, i64 %r) {
; The following is not possible to implement with a single cmp;ccmp;csel
; sequence.
define i32 @select_noccmp3(i32 %v0, i32 %v1, i32 %v2) {
-; SDISEL-LABEL: select_noccmp3:
-; SDISEL: ; %bb.0:
-; SDISEL-NEXT: cmp w0, #0
-; SDISEL-NEXT: ccmp w0, #13, #0, ge
-; SDISEL-NEXT: cset w8, gt
-; SDISEL-NEXT: cmp w0, #22
-; SDISEL-NEXT: mov w9, #44 ; =0x2c
-; SDISEL-NEXT: ccmp w0, w9, #0, ge
-; SDISEL-NEXT: csel w8, wzr, w8, le
-; SDISEL-NEXT: cmp w0, #99
-; SDISEL-NEXT: mov w9, #77 ; =0x4d
-; SDISEL-NEXT: ccmp w0, w9, #4, ne
-; SDISEL-NEXT: cset w9, eq
-; SDISEL-NEXT: tst w8, w9
-; SDISEL-NEXT: csel w0, w1, w2, ne
-; SDISEL-NEXT: ret
+; CHECK-SD-LABEL: select_noccmp3:
+; CHECK-SD: ; %bb.0:
+; CHECK-SD-NEXT: cmp w0, #0
+; CHECK-SD-NEXT: ccmp w0, #13, #0, ge
+; CHECK-SD-NEXT: cset w8, gt
+; CHECK-SD-NEXT: cmp w0, #22
+; CHECK-SD-NEXT: mov w9, #44 ; =0x2c
+; CHECK-SD-NEXT: ccmp w0, w9, #0, ge
+; CHECK-SD-NEXT: csel w8, wzr, w8, le
+; CHECK-SD-NEXT: cmp w0, #99
+; CHECK-SD-NEXT: mov w9, #77 ; =0x4d
+; CHECK-SD-NEXT: ccmp w0, w9, #4, ne
+; CHECK-SD-NEXT: cset w9, eq
+; CHECK-SD-NEXT: tst w8, w9
+; CHECK-SD-NEXT: csel w0, w1, w2, ne
+; CHECK-SD-NEXT: ret
;
-; GISEL-LABEL: select_noccmp3:
-; GISEL: ; %bb.0:
-; GISEL-NEXT: mov w8, #99 ; =0x63
-; GISEL-NEXT: sub w9, w0, #45
-; GISEL-NEXT: cmp w0, #77
-; GISEL-NEXT: ccmp w0, w8, #4, ne
-; GISEL-NEXT: ccmn w9, #23, #2, eq
-; GISEL-NEXT: ccmp w0, #14, #0, lo
-; GISEL-NEXT: csel w0, w1, w2, hs
-; GISEL-NEXT: ret
+; CHECK-GI-LABEL: select_noccmp3:
+; CHECK-GI: ; %bb.0:
+; CHECK-GI-NEXT: mov w8, #99 ; =0x63
+; CHECK-GI-NEXT: sub w9, w0, #45
+; CHECK-GI-NEXT: cmp w0, #77
+; CHECK-GI-NEXT: ccmp w0, w8, #4, ne
+; CHECK-GI-NEXT: ccmn w9, #23, #2, eq
+; CHECK-GI-NEXT: ccmp w0, #14, #0, lo
+; CHECK-GI-NEXT: csel w0, w1, w2, hs
+; CHECK-GI-NEXT: ret
%c0 = icmp slt i32 %v0, 0
%c1 = icmp sgt i32 %v0, 13
%c2 = icmp slt i32 %v0, 22
@@ -864,27 +864,27 @@ define i32 @select_or_olt_ueq_ogt(double %v0, double %v1, double %v2, double %v3
; Verify that we correctly promote f16.
define i32 @half_select_and_olt_oge(half %v0, half %v1, half %v2, half %v3, i32 %a, i32 %b) #0 {
-; SDISEL-LABEL: half_select_and_olt_oge:
-; SDISEL: ; %bb.0:
-; SDISEL-NEXT: fcvt s1, h1
-; SDISEL-NEXT: fcvt s0, h0
-; SDISEL-NEXT: fcmp s0, s1
-; SDISEL-NEXT: fcvt s0, h3
-; SDISEL-NEXT: fcvt s1, h2
-; SDISEL-NEXT: fccmp s1, s0, #8, mi
-; SDISEL-NEXT: csel w0, w0, w1, ge
-; SDISEL-NEXT: ret
+; CHECK-SD-LABEL: half_select_and_olt_oge:
+; CHECK-SD: ; %bb.0:
+; CHECK-SD-NEXT: fcvt s1, h1
+; CHECK-SD-NEXT: fcvt s0, h0
+; CHECK-SD-NEXT: fcmp s0, s1
+; CHECK-SD-NEXT: fcvt s0, h3
+; CHECK-SD-NEXT: fcvt s1, h2
+; CHECK-SD-NEXT: fccmp s1, s0, #8, mi
+; CHECK-SD-NEXT: csel w0, w0, w1, ge
+; CHECK-SD-NEXT: ret
;
-; GISEL-LABEL: half_select_and_olt_oge:
-; GISEL: ; %bb.0:
-; GISEL-NEXT: fcvt s0, h0
-; GISEL-NEXT: fcvt s1, h1
-; GISEL-NEXT: fcvt s2, h2
-; GISEL-NEXT: fcvt s3, h3
-; GISEL-NEXT: fcmp s0, s1
-; GISEL-NEXT: fccmp s2, s3, #8, mi
-; GISEL-NEXT: csel w0, w0, w1, ge
-; GISEL-NEXT: ret
+; CHECK-GI-LABEL: half_select_and_olt_oge:
+; CHECK-GI: ; %bb.0:
+; CHECK-GI-NEXT: fcvt s0, h0
+; CHECK-GI-NEXT: fcvt s1, h1
+; CHECK-GI-NEXT: fcvt s2, h2
+; CHECK-GI-NEXT: fcvt s3, h3
+; CHECK-GI-NEXT: fcmp s0, s1
+; CHECK-GI-NEXT: fccmp s2, s3, #8, mi
+; CHECK-GI-NEXT: csel w0, w0, w1, ge
+; CHECK-GI-NEXT: ret
%c0 = fcmp olt half %v0, %v1
%c1 = fcmp oge half %v2, %v3
%cr = and i1 %c1, %c0
@@ -893,29 +893,29 @@ define i32 @half_select_and_olt_oge(half %v0, half %v1, half %v2, half %v3, i32
}
define i32 @half_select_and_olt_one(half %v0, half %v1, half %v2, half %v3, i32 %a, i32 %b) #0 {
-; SDISEL-LABEL: half_select_and_olt_one:
-; SDISEL: ; %bb.0:
-; SDISEL-NEXT: fcvt s1, h1
-; SDISEL-NEXT: fcvt s0, h0
-; SDISEL-NEXT: fcmp s0, s1
-; SDISEL-NEXT: fcvt s0, h3
-; SDISEL-NEXT: fcvt s1, h2
-; SDISEL-NEXT: fccmp s1, s0, #4, mi
-; SDISEL-NEXT: fccmp s1, s0, #1, ne
-; SDISEL-NEXT: csel w0, w0, w1, vc
-; SDISEL-NEXT: ret
+; CHECK-SD-LABEL: half_select_and_olt_one:
+; CHECK-SD: ; %bb.0:
+; CHECK-SD-NEXT: fcvt s1, h1
+; CHECK-SD-NEXT: fcvt s0, h0
+; CHECK-SD-NEXT: fcmp s0, s1
+; CHECK-SD-NEXT: fcvt s0, h3
+; CHECK-SD-NEXT: fcvt s1, h2
+; CHECK-SD-NEXT: fccmp s1, s0, #4, mi
+; CHECK-SD-NEXT: fccmp s1, s0, #1, ne
+; CHECK-SD-NEXT: csel w0, w0, w1, vc
+; CHECK-SD-NEXT: ret
;
-; GISEL-LABEL: half_select_and_olt_one:
-; GISEL: ; %bb.0:
-; GISEL-NEXT: fcvt s0, h0
-; GISEL-NEXT: fcvt s1, h1
-; GISEL-NEXT: fcvt s2, h2
-; GISEL-NEXT: fcvt s3, h3
-; GISEL-NEXT: fcmp s0, s1
-; GISEL-NEXT: fccmp s2, s3, #4, mi
-; GISEL-NEXT: fccmp s2, s3, #1, ne
-; GISEL-NEXT: csel w0, w0, w1, vc
-; GISEL-NEXT: ret
+; CHECK-GI-LABEL: half_select_and_olt_one:
+; CHECK-GI: ; %bb.0:
+; CHECK-GI-NEXT: fcvt s0, h0
+; CHECK-GI-NEXT: fcvt s1, h1
+; CHECK-GI-NEXT: fcvt s2, h2
+; CHECK-GI-NEXT: fcvt s3, h3
+; CHECK-GI-NEXT: fcmp s0, s1
+; CHECK-GI-NEXT: fccmp s2, s3, #4, mi
+; CHECK-GI-NEXT: fccmp s2, s3, #1, ne
+; CHECK-GI-NEXT: csel w0, w0, w1, vc
+; CHECK-GI-NEXT: ret
%c0 = fcmp olt half %v0, %v1
%c1 = fcmp one half %v2, %v3
%cr = and i1 %c1, %c0
@@ -926,51 +926,51 @@ define i32 @half_select_and_olt_one(half %v0, half %v1, half %v2, half %v3, i32
; Also verify that we don't try to generate f128 FCCMPs, using RT calls instead.
define i32 @f128_select_and_olt_oge(fp128 %v0, fp128 %v1, fp128 %v2, fp128 %v3, i32 %a, i32 %b) #0 {
-; SDISEL-LABEL: f128_select_and_olt_oge:
-; SDISEL: ; %bb.0:
-; SDISEL-NEXT: sub sp, sp, #80
-; SDISEL-NEXT: stp x22, x21, [sp, #32] ; 16-byte Folded Spill
-; SDISEL-NEXT: stp x20, x19, [sp, #48] ; 16-byte Folded Spill
-; SDISEL-NEXT: stp x29, x30, [sp, #64] ; 16-byte Folded Spill
-; SDISEL-NEXT: mov x19, x1
-; SDISEL-NEXT: mov x20, x0
-; SDISEL-NEXT: stp q2, q3, [sp] ; 32-byte Folded Spill
-; SDISEL-NEXT: bl ___lttf2
-; SDISEL-NEXT: cmp w0, #0
-; SDISEL-NEXT: cset w21, lt
-; SDISEL-NEXT: ldp q0, q1, [sp] ; 32-byte Folded Reload
-; SDISEL-NEXT: bl ___getf2
-; SDISEL-NEXT: cmp w0, #0
-; SDISEL-NEXT: cset w8, ge
-; SDISEL-NEXT: tst w8, w21
-; SDISEL-NEXT: csel w0, w20, w19, ne
-; SDISEL-NEXT: ldp x29, x30, [sp, #64] ; 16-byte Folded Reload
-; SDISEL-NEXT: ldp x20, x19, [sp, #48] ; 16-byte Folded Reload
-; SDISEL-NEXT: ldp x22, x21, [sp, #32] ; 16-byte Folded Reload
-; SDISEL-NEXT: add sp, sp, #80
-; SDISEL-NEXT: ret
+; CHECK-SD-LABEL: f128_select_and_olt_oge:
+; CHECK-SD: ; %bb.0:
+; CHECK-SD-NEXT: sub sp, sp, #80
+; CHECK-SD-NEXT: stp x22, x21, [sp, #32] ; 16-byte Folded Spill
+; CHECK-SD-NEXT: stp x20, x19, [sp, #48] ; 16-byte Folded Spill
+; CHECK-SD-NEXT: stp x29, x30, [sp, #64] ; 16-byte Folded Spill
+; CHECK-SD-NEXT: mov x19, x1
+; CHECK-SD-NEXT: mov x20, x0
+; CHECK-SD-NEXT: stp q2, q3, [sp] ; 32-byte Folded Spill
+; CHECK-SD-NEXT: bl ___lttf2
+; CHECK-SD-NEXT: cmp w0, #0
+; CHECK-SD-NEXT: cset w21, lt
+; CHECK-SD-NEXT: ldp q0, q1, [sp] ; 32-byte Folded Reload
+; CHECK-SD-NEXT: bl ___getf2
+; CHECK-SD-NEXT: cmp w0, #0
+; CHECK-SD-NEXT: cset w8, ge
+; CHECK-SD-NEXT: tst w8, w21
+; CHECK-SD-NEXT: csel w0, w20, w19, ne
+; CHECK-SD-NEXT: ldp x29, x30, [sp, #64] ; 16-byte Folded Reload
+; CHECK-SD-NEXT: ldp x20, x19, [sp, #48] ; 16-byte Folded Reload
+; CHECK-SD-NEXT: ldp x22, x21, [sp, #32] ; 16-byte Folded Reload
+; CHECK-SD-NEXT: add sp, sp, #80
+; CHECK-SD-NEXT: ret
;
-; GISEL-LABEL: f128_select_and_olt_oge:
-; GISEL: ; %bb.0:
-; GISEL-NEXT: sub sp, sp, #80
-; GISEL-NEXT: stp x22, x21, [sp, #32] ; 16-byte Folded Spill
-; GISEL-NEXT: stp x20, x19, [sp, #48] ; 16-byte Folded Spill
-; GISEL-NEXT: stp x29, x30, [sp, #64] ; 16-byte Folded Spill
-; GISEL-NEXT: stp q3, q2, [sp] ; 32-byte Folded Spill
-; GISEL-NEXT: mov x19, x0
-; GISEL-NEXT: mov x20, x1
-; GISEL-NEXT: bl ___lttf2
-; GISEL-NEXT: mov x21, x0
-; GISEL-NEXT: ldp q1, q0, [sp] ; 32-byte Folded Reload
-; GISEL-NEXT: bl ___getf2
-; GISEL-NEXT: cmp w21, #0
-; GISEL-NEXT: ccmp w0, #0, #8, lt
-; GISEL-NEXT: csel w0, w19, w20, ge
-; GISEL-NEXT: ldp x29, x30, [sp, #64] ; 16-byte Folded Reload
-; GISEL-NEXT: ldp x20, x19, [sp, #48] ; 16-byte Folded Reload
-; GISEL-NEXT: ldp x22, x21, [sp, #32] ; 16-byte Folded Reload
-; GISEL-NEXT: add sp, sp, #80
-; GISEL-NEXT: ret
+; CHECK-GI-LABEL: f128_select_and_olt_oge:
+; CHECK-GI: ; %bb.0:
+; CHECK-GI-NEXT: sub sp, sp, #80
+; CHECK-GI-NEXT: stp x22, x21, [sp, #32] ; 16-byte Folded Spill
+; CHECK-GI-NEXT: stp x20, x19, [sp, #48] ; 16-byte Folded Spill
+; CHECK-GI-NEXT: stp x29, x30, [sp, #64] ; 16-byte Folded Spill
+; CHECK-GI-NEXT: stp q3, q2, [sp] ; 32-byte Folded Spill
+; CHECK-GI-NEXT: mov x19, x0
+; CHECK-GI-NEXT: mov x20, x1
+; CHECK-GI-NEXT: bl ___lttf2
+; CHECK-GI-NEXT: mov x21, x0
+; CHECK-GI-NEXT: ldp q1, q0, [sp] ; 32-byte Folded Reload
+; CHECK-GI-NEXT: bl ___getf2
+; CHECK-GI-NEXT: cmp w21, #0
+; CHECK-GI-NEXT: ccmp w0, #0, #8, lt
+; CHECK-GI-NEXT: csel w0, w19, w20, ge
+; CHECK-GI-NEXT: ldp x29, x30, [sp, #64] ; 16-byte Folded Reload
+; CHECK-GI-NEXT: ldp x20, x19, [sp, #48] ; 16-byte Folded Reload
+; CHECK-GI-NEXT: ldp x22, x21, [sp, #32] ; 16-byte Folded Reload
+; CHECK-GI-NEXT: add sp, sp, #80
+; CHECK-GI-NEXT: ret
%c0 = fcmp olt fp128 %v0, %v1
%c1 = fcmp oge fp128 %v2, %v3
%cr = and i1 %c1, %c0
@@ -1048,46 +1048,46 @@ define i32 @deep_or2(i32 %a0, i32 %a1, i32 %a2, i32 %a3, i32 %x, i32 %y) {
; This test is trying to test that multiple ccmp's don't get created in a way
; that they would have multiple uses. It doesn't seem to.
define i32 @multiccmp(i32 %s0, i32 %s1, i32 %s2, i32 %s3, i32 %x, i32 %y) #0 {
-; SDISEL-LABEL: multiccmp:
-; SDISEL: ; %bb.0: ; %entry
-; SDISEL-NEXT: stp x22, x21, [sp, #-48]! ; 16-byte Folded Spill
-; SDISEL-NEXT: stp x20, x19, [sp, #16] ; 16-byte Folded Spill
-; SDISEL-NEXT: stp x29, x30, [sp, #32] ; 16-byte Folded Spill
-; SDISEL-NEXT: mov x19, x5
-; SDISEL-NEXT: cmp w0, w1
-; SDISEL-NEXT: cset w20, gt
-; SDISEL-NEXT: cmp w2, w3
-; SDISEL-NEXT: cset w21, ne
-; SDISEL-NEXT: tst w20, w21
-; SDISEL-NEXT: csel w0, w5, w4, ne
-; SDISEL-NEXT: bl _callee
-; SDISEL-NEXT: tst w20, w21
-; SDISEL-NEXT: csel w0, w0, w19, ne
-; SDISEL-NEXT: bl _callee
-; SDISEL-NEXT: ldp x29, x30, [sp, #32] ; 16-byte Folded Reload
-; SDISEL-NEXT: ldp x20, x19, [sp, #16] ; 16-byte Folded Reload
-; SDISEL-NEXT: ldp x22, x21, [sp], #48 ; 16-byte Folded Reload
-; SDISEL-NEXT: ret
+; CHECK-SD-LABEL: multiccmp:
+; CHECK-SD: ; %bb.0: ; %entry
+; CHECK-SD-NEXT: stp x22, x21, [sp, #-48]! ; 16-byte Folded Spill
+; CHECK-SD-NEXT: stp x20, x19, [sp, #16] ; 16-byte Folded Spill
+; CHECK-SD-NEXT: stp x29, x30, [sp, #32] ; 16-byte Folded Spill
+; CHECK-SD-NEXT: mov x19, x5
+; CHECK-SD-NEXT: cmp w0, w1
+; CHECK-SD-NEXT: cset w20, gt
+; CHECK-SD-NEXT: cmp w2, w3
+; CHECK-SD-NEXT: cset w21, ne
+; CHECK-SD-NEXT: tst w20, w21
+; CHECK-SD-NEXT: csel w0, w5, w4, ne
+; CHECK-SD-NEXT: bl _callee
+; CHECK-SD-NEXT: tst w20, w21
+; CHECK-SD-NEXT: csel w0, w0, w19, ne
+; CHECK-SD-NEXT: bl _callee
+; CHECK-SD-NEXT: ldp x29, x30, [sp, #32] ; 16-byte Folded Reload
+; CHECK-SD-NEXT: ldp x20, x19, [sp, #16] ; 16-byte Folded Reload
+; CHECK-SD-NEXT: ldp x22, x21, [sp], #48 ; 16-byte Folded Reload
+; CHECK-SD-NEXT: ret
;
-; GISEL-LABEL: multiccmp:
-; GISEL: ; %bb.0: ; %entry
-; GISEL-NEXT: stp x20, x19, [sp, #-32]! ; 16-byte Folded Spill
-; GISEL-NEXT: stp x29, x30, [sp, #16] ; 16-byte Folded Spill
-; GISEL-NEXT: mov x19, x5
-; GISEL-NEXT: cmp w0, w1
-; GISEL-NEXT: cset w8, gt
-; GISEL-NEXT: cmp w2, w3
-; GISEL-NEXT: cset w9, ne
-; GISEL-NEXT: and w20, w8, w9
-; GISEL-NEXT: tst w20, #0x1
-; GISEL-NEXT: csel w0, w5, w4, ne
-; GISEL-NEXT: bl _callee
-; GISEL-NEXT: tst w20, #0x1
-; GISEL-NEXT: csel w0, w0, w19, ne
-; GISEL-NEXT: bl _callee
-; GISEL-NEXT: ldp x29, x30, [sp, #16] ; 16-byte Folded Reload
-; GISEL-NEXT: ldp x20, x19, [sp], #32 ; 16-byte Folded Reload
-; GISEL-NEXT: ret
+; CHECK-GI-LABEL: multiccmp:
+; CHECK-GI: ; %bb.0: ; %entry
+; CHECK-GI-NEXT: stp x20, x19, [sp, #-32]! ; 16-byte Folded Spill
+; CHECK-GI-NEXT: stp x29, x30, [sp, #16] ; 16-byte Folded Spill
+; CHECK-GI-NEXT: mov x19, x5
+; CHECK-GI-NEXT: cmp w0, w1
+; CHECK-GI-NEXT: cset w8, gt
+; CHECK-GI-NEXT: cmp w2, w3
+; CHECK-GI-NEXT: cset w9, ne
+; CHECK-GI-NEXT: and w20, w8, w9
+; CHECK-GI-NEXT: tst w20, #0x1
+; CHECK-GI-NEXT: csel w0, w5, w4, ne
+; CHECK-GI-NEXT: bl _callee
+; CHECK-GI-NEXT: tst w20, #0x1
+; CHECK-GI-NEXT: csel w0, w0, w19, ne
+; CHECK-GI-NEXT: bl _callee
+; CHECK-GI-NEXT: ldp x29, x30, [sp, #16] ; 16-byte Folded Reload
+; CHECK-GI-NEXT: ldp x20, x19, [sp], #32 ; 16-byte Folded Reload
+; CHECK-GI-NEXT: ret
entry:
%c0 = icmp sgt i32 %s0, %s1
%c1 = icmp ne i32 %s2, %s3
@@ -1100,57 +1100,57 @@ entry:
}
define i32 @multiccmp2(i32 %s0, i32 %s1, i32 %s2, i32 %s3, i32 %x, i32 %y) #0 {
-; SDISEL-LABEL: multiccmp2:
-; SDISEL: ; %bb.0: ; %entry
-; SDISEL-NEXT: stp x22, x21, [sp, #-48]! ; 16-byte Folded Spill
-; SDISEL-NEXT: stp x20, x19, [sp, #16] ; 16-byte Folded Spill
-; SDISEL-NEXT: stp x29, x30, [sp, #32] ; 16-byte Folded Spill
-; SDISEL-NEXT: mov x19, x5
-; SDISEL-NEXT: mov x20, x3
-; SDISEL-NEXT: mov x21, x0
-; SDISEL-NEXT: cmp w0, w1
-; SDISEL-NEXT: cset w8, gt
-; SDISEL-NEXT: cmp w2, w3
-; SDISEL-NEXT: cset w22, ne
-; SDISEL-NEXT: tst w8, w22
-; SDISEL-NEXT: csel w0, w5, w4, ne
-; SDISEL-NEXT: bl _callee
-; SDISEL-NEXT: cmp w21, w20
-; SDISEL-NEXT: cset w8, eq
-; SDISEL-NEXT: tst w22, w8
-; SDISEL-NEXT: csel w0, w0, w19, ne
-; SDISEL-NEXT: bl _callee
-; SDISEL-NEXT: ldp x29, x30, [sp, #32] ; 16-byte Folded Reload
-; SDISEL-NEXT: ldp x20, x19, [sp, #16] ; 16-byte Folded Reload
-; SDISEL-NEXT: ldp x22, x21, [sp], #48 ; 16-byte Folded Reload
-; SDISEL-NEXT: ret
+; CHECK-SD-LABEL: multiccmp2:
+; CHECK-SD: ; %bb.0: ; %entry
+; CHECK-SD-NEXT: stp x22, x21, [sp, #-48]! ; 16-byte Folded Spill
+; CHECK-SD-NEXT: stp x20, x19, [sp, #16] ; 16-byte Folded Spill
+; CHECK-SD-NEXT: stp x29, x30, [sp, #32] ; 16-byte Folded Spill
+; CHECK-SD-NEXT: mov x19, x5
+; CHECK-SD-NEXT: mov x20, x3
+; CHECK-SD-NEXT: mov x21, x0
+; CHECK-SD-NEXT: cmp w0, w1
+; CHECK-SD-NEXT: cset w8, gt
+; CHECK-SD-NEXT: cmp w2, w3
+; CHECK-SD-NEXT: cset w22, ne
+; CHECK-SD-NEXT: tst w8, w22
+; CHECK-SD-NEXT: csel w0, w5, w4, ne
+; CHECK-SD-NEXT: bl _callee
+; CHECK-SD-NEXT: cmp w21, w20
+; CHECK-SD-NEXT: cset w8, eq
+; CHECK-SD-NEXT: tst w22, w8
+; CHECK-SD-NEXT: csel w0, w0, w19, ne
+; CHECK-SD-NEXT: bl _callee
+; CHECK-SD-NEXT: ldp x29, x30, [sp, #32] ; 16-byte Folded Reload
+; CHECK-SD-NEXT: ldp x20, x19, [sp, #16] ; 16-byte Folded Reload
+; CHECK-SD-NEXT: ldp x22, x21, [sp], #48 ; 16-byte Folded Reload
+; CHECK-SD-NEXT: ret
;
-; GISEL-LABEL: multiccmp2:
-; GISEL: ; %bb.0: ; %entry
-; GISEL-NEXT: stp x22, x21, [sp, #-48]! ; 16-byte Folded Spill
-; GISEL-NEXT: stp x20, x19, [sp, #16] ; 16-byte Folded Spill
-; GISEL-NEXT: stp x29, x30, [sp, #32] ; 16-byte Folded Spill
-; GISEL-NEXT: mov x19, x0
-; GISEL-NEXT: mov x20, x3
-; GISEL-NEXT: mov x21, x5
-; GISEL-NEXT: cmp w0, w1
-; GISEL-NEXT: cset w8, gt
-; GISEL-NEXT: cmp w2, w3
-; GISEL-NEXT: cset w22, ne
-; GISEL-NEXT: and w8, w8, w22
-; GISEL-NEXT: tst w8, #0x1
-; GISEL-NEXT: csel w0, w5, w4, ne
-; GISEL-NEXT: bl _callee
-; GISEL-NEXT: cmp w19, w20
-; GISEL-NEXT: cset w8, eq
-; GISEL-NEXT: and w8, w22, w8
-; GISEL-NEXT: tst w8, #0x1
-; GISEL-NEXT: csel w0, w0, w21, ne
-; GISEL-NEXT: bl _callee
-; GISEL-NEXT: ldp x29, x30, [sp, #32] ; 16-byte Folded Reload
-; GISEL-NEXT: ldp x20, x19, [sp, #16] ; 16-byte Folded Reload
-; GISEL-NEXT: ldp x22, x21, [sp], #48 ; 16-byte Folded Reload
-; GISEL-NEXT: ret
+; CHECK-GI-LABEL: multiccmp2:
+; CHECK-GI: ; %bb.0: ; %entry
+; CHECK-GI-NEXT: stp x22, x21, [sp, #-48]! ; 16-byte Folded Spill
+; CHECK-GI-NEXT: stp x20, x19, [sp, #16] ; 16-byte Folded Spill
+; CHECK-GI-NEXT: stp x29, x30, [sp, #32] ; 16-byte Folded Spill
+; CHECK-GI-NEXT: mov x19, x0
+; CHECK-GI-NEXT: mov x20, x3
+; CHECK-GI-NEXT: mov x21, x5
+; CHECK-GI-NEXT: cmp w0, w1
+; CHECK-GI-NEXT: cset w8, gt
+; CHECK-GI-NEXT: cmp w2, w3
+; CHECK-GI-NEXT: cset w22, ne
+; CHECK-GI-NEXT: and w8, w8, w22
+; CHECK-GI-NEXT: tst w8, #0x1
+; CHECK-GI-NEXT: csel w0, w5, w4, ne
+; CHECK-GI-NEXT: bl _callee
+; CHECK-GI-NEXT: cmp w19, w20
+; CHECK-GI-NEXT: cset w8, eq
+; CHECK-GI-NEXT: and w8, w22, w8
+; CHECK-GI-NEXT: tst w8, #0x1
+; CHECK-GI-NEXT: csel w0, w0, w21, ne
+; CHECK-GI-NEXT: bl _callee
+; CHECK-GI-NEXT: ldp x29, x30, [sp, #32] ; 16-byte Folded Reload
+; CHECK-GI-NEXT: ldp x20, x19, [sp, #16] ; 16-byte Folded Reload
+; CHECK-GI-NEXT: ldp x22, x21, [sp], #48 ; 16-byte Folded Reload
+; CHECK-GI-NEXT: ret
entry:
%c0 = icmp sgt i32 %s0, %s1
%c1 = icmp ne i32 %s2, %s3
@@ -1168,21 +1168,21 @@ entry:
declare i32 @callee(i32)
define i1 @cmp_and_negative_const(i32 %0, i32 %1) {
-; SDISEL-LABEL: cmp_and_negative_const:
-; SDISEL: ; %bb.0:
-; SDISEL-NEXT: cmn w0, #1
-; SDISEL-NEXT: ccmn w1, #2, #0, eq
-; SDISEL-NEXT: cset w0, eq
-; SDISEL-NEXT: ret
+; CHECK-SD-LABEL: cmp_and_negative_const:
+; CHECK-SD: ; %bb.0:
+; CHECK-SD-NEXT: cmn w0, #1
+; CHECK-SD-NEXT: ccmn w1, #2, #0, eq
+; CHECK-SD-NEXT: cset w0, eq
+; CHECK-SD-NEXT: ret
;
-; GISEL-LABEL: cmp_and_negative_const:
-; GISEL: ; %bb.0:
-; GISEL-NEXT: cmn w0, #1
-; GISEL-NEXT: cset w8, eq
-; GISEL-NEXT: cmn w1, #2
-; GISEL-NEXT: cset w9, eq
-; GISEL-NEXT: and w0, w8, w9
-; GISEL-NEXT: ret
+; CHECK-GI-LABEL: cmp_and_negative_const:
+; CHECK-GI: ; %bb.0:
+; CHECK-GI-NEXT: cmn w0, #1
+; CHECK-GI-NEXT: cset w8, eq
+; CHECK-GI-NEXT: cmn w1, #2
+; CHECK-GI-NEXT: cset w9, eq
+; CHECK-GI-NEXT: and w0, w8, w9
+; CHECK-GI-NEXT: ret
%3 = icmp eq i32 %0, -1
%4 = icmp eq i32 %1, -2
%5 = and i1 %3, %4
@@ -1190,21 +1190,21 @@ define i1 @cmp_and_negative_const(i32 %0, i32 %1) {
}
define i1 @cmp_or_negative_const(i32 %a, i32 %b) {
-; SDISEL-LABEL: cmp_or_negative_const:
-; SDISEL: ; %bb.0:
-; SDISEL-NEXT: cmn w0, #1
-; SDISEL-NEXT: ccmn w1, #2, #4, ne
-; SDISEL-NEXT: cset w0, eq
-; SDISEL-NEXT: ret
+; CHECK-SD-LABEL: cmp_or_negative_const:
+; CHECK-SD: ; %bb.0:
+; CHECK-SD-NEXT: cmn w0, #1
+; CHECK-SD-NEXT: ccmn w1, #2, #4, ne
+; CHECK-SD-NEXT: cset w0, eq
+; CHECK-SD-NEXT: ret
;
-; GISEL-LABEL: cmp_or_negative_const:
-; GISEL: ; %bb.0:
-; GISEL-NEXT: cmn w0, #1
-; GISEL-NEXT: cset w8, eq
-; GISEL-NEXT: cmn w1, #2
-; GISEL-NEXT: cset w9, eq
-; GISEL-NEXT: orr w0, w8, w9
-; GISEL-NEXT: ret
+; CHECK-GI-LABEL: cmp_or_negative_const:
+; CHECK-GI: ; %bb.0:
+; CHECK-GI-NEXT: cmn w0, #1
+; CHECK-GI-NEXT: cset w8, eq
+; CHECK-GI-NEXT: cmn w1, #2
+; CHECK-GI-NEXT: cset w9, eq
+; CHECK-GI-NEXT: orr w0, w8, w9
+; CHECK-GI-NEXT: ret
%cmp = icmp eq i32 %a, -1
%cmp1 = icmp eq i32 %b, -2
%or.cond = or i1 %cmp, %cmp1
diff --git a/llvm/test/CodeGen/AArch64/arm64-fml-combines.ll b/llvm/test/CodeGen/AArch64/arm64-fml-combines.ll
index ce35810..60c48bf 100644
--- a/llvm/test/CodeGen/AArch64/arm64-fml-combines.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-fml-combines.ll
@@ -1,4 +1,4 @@
-; RUN: llc < %s -O3 -mtriple=arm64-apple-ios -enable-unsafe-fp-math -mattr=+fullfp16 | FileCheck %s
+; RUN: llc < %s -O3 -mtriple=arm64-apple-ios -mattr=+fullfp16 | FileCheck %s
; RUN: llc < %s -O3 -mtriple=arm64-apple-ios -fp-contract=fast -mattr=+fullfp16 | FileCheck %s
define void @foo_2d(ptr %src) {
@@ -130,9 +130,9 @@ for.end: ; preds = %for.body
; CHECK: fnmadd h0, h0, h1, h2
define half @test0(half %a, half %b, half %c) {
entry:
- %0 = fmul half %a, %b
- %mul = fsub half -0.000000e+00, %0
- %sub1 = fsub half %mul, %c
+ %0 = fmul contract half %a, %b
+ %mul = fsub contract half -0.000000e+00, %0
+ %sub1 = fsub contract half %mul, %c
ret half %sub1
}
@@ -140,9 +140,9 @@ entry:
; CHECK: fnmadd s0, s0, s1, s2
define float @test1(float %a, float %b, float %c) {
entry:
- %0 = fmul float %a, %b
- %mul = fsub float -0.000000e+00, %0
- %sub1 = fsub float %mul, %c
+ %0 = fmul contract float %a, %b
+ %mul = fsub contract float -0.000000e+00, %0
+ %sub1 = fsub contract float %mul, %c
ret float %sub1
}
@@ -150,9 +150,9 @@ entry:
; CHECK: fnmadd d0, d0, d1, d2
define double @test2(double %a, double %b, double %c) {
entry:
- %0 = fmul double %a, %b
- %mul = fsub double -0.000000e+00, %0
- %sub1 = fsub double %mul, %c
+ %0 = fmul contract double %a, %b
+ %mul = fsub contract double -0.000000e+00, %0
+ %sub1 = fsub contract double %mul, %c
ret double %sub1
}
diff --git a/llvm/test/CodeGen/AArch64/cmp-chains.ll b/llvm/test/CodeGen/AArch64/cmp-chains.ll
index 4b816df..3620444 100644
--- a/llvm/test/CodeGen/AArch64/cmp-chains.ll
+++ b/llvm/test/CodeGen/AArch64/cmp-chains.ll
@@ -1,26 +1,26 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=aarch64-- | FileCheck %s --check-prefixes=CHECK,SDISEL
-; RUN: llc < %s -mtriple=aarch64-- -global-isel | FileCheck %s --check-prefixes=CHECK,GISEL
+; RUN: llc < %s -mtriple=aarch64-- | FileCheck %s --check-prefixes=CHECK,CHECK-SD
+; RUN: llc < %s -mtriple=aarch64-- -global-isel | FileCheck %s --check-prefixes=CHECK,CHECK-GI
; Ensure chains of comparisons produce chains of `ccmp`
; (x0 < x1) && (x2 > x3)
define i32 @cmp_and2(i32 %0, i32 %1, i32 %2, i32 %3) {
-; SDISEL-LABEL: cmp_and2:
-; SDISEL: // %bb.0:
-; SDISEL-NEXT: cmp w0, w1
-; SDISEL-NEXT: ccmp w2, w3, #0, lo
-; SDISEL-NEXT: cset w0, hi
-; SDISEL-NEXT: ret
+; CHECK-SD-LABEL: cmp_and2:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: cmp w0, w1
+; CHECK-SD-NEXT: ccmp w2, w3, #0, lo
+; CHECK-SD-NEXT: cset w0, hi
+; CHECK-SD-NEXT: ret
;
-; GISEL-LABEL: cmp_and2:
-; GISEL: // %bb.0:
-; GISEL-NEXT: cmp w0, w1
-; GISEL-NEXT: cset w8, lo
-; GISEL-NEXT: cmp w2, w3
-; GISEL-NEXT: cset w9, hi
-; GISEL-NEXT: and w0, w8, w9
-; GISEL-NEXT: ret
+; CHECK-GI-LABEL: cmp_and2:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: cmp w0, w1
+; CHECK-GI-NEXT: cset w8, lo
+; CHECK-GI-NEXT: cmp w2, w3
+; CHECK-GI-NEXT: cset w9, hi
+; CHECK-GI-NEXT: and w0, w8, w9
+; CHECK-GI-NEXT: ret
%5 = icmp ult i32 %0, %1
%6 = icmp ugt i32 %2, %3
%7 = select i1 %5, i1 %6, i1 false
@@ -30,25 +30,25 @@ define i32 @cmp_and2(i32 %0, i32 %1, i32 %2, i32 %3) {
; (x0 < x1) && (x2 > x3) && (x4 != x5)
define i32 @cmp_and3(i32 %0, i32 %1, i32 %2, i32 %3, i32 %4, i32 %5) {
-; SDISEL-LABEL: cmp_and3:
-; SDISEL: // %bb.0:
-; SDISEL-NEXT: cmp w0, w1
-; SDISEL-NEXT: ccmp w2, w3, #0, lo
-; SDISEL-NEXT: ccmp w4, w5, #4, hi
-; SDISEL-NEXT: cset w0, ne
-; SDISEL-NEXT: ret
+; CHECK-SD-LABEL: cmp_and3:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: cmp w0, w1
+; CHECK-SD-NEXT: ccmp w2, w3, #0, lo
+; CHECK-SD-NEXT: ccmp w4, w5, #4, hi
+; CHECK-SD-NEXT: cset w0, ne
+; CHECK-SD-NEXT: ret
;
-; GISEL-LABEL: cmp_and3:
-; GISEL: // %bb.0:
-; GISEL-NEXT: cmp w0, w1
-; GISEL-NEXT: cset w8, lo
-; GISEL-NEXT: cmp w2, w3
-; GISEL-NEXT: cset w9, hi
-; GISEL-NEXT: cmp w4, w5
-; GISEL-NEXT: and w8, w8, w9
-; GISEL-NEXT: cset w9, ne
-; GISEL-NEXT: and w0, w8, w9
-; GISEL-NEXT: ret
+; CHECK-GI-LABEL: cmp_and3:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: cmp w0, w1
+; CHECK-GI-NEXT: cset w8, lo
+; CHECK-GI-NEXT: cmp w2, w3
+; CHECK-GI-NEXT: cset w9, hi
+; CHECK-GI-NEXT: cmp w4, w5
+; CHECK-GI-NEXT: and w8, w8, w9
+; CHECK-GI-NEXT: cset w9, ne
+; CHECK-GI-NEXT: and w0, w8, w9
+; CHECK-GI-NEXT: ret
%7 = icmp ult i32 %0, %1
%8 = icmp ugt i32 %2, %3
%9 = select i1 %7, i1 %8, i1 false
@@ -60,29 +60,29 @@ define i32 @cmp_and3(i32 %0, i32 %1, i32 %2, i32 %3, i32 %4, i32 %5) {
; (x0 < x1) && (x2 > x3) && (x4 != x5) && (x6 == x7)
define i32 @cmp_and4(i32 %0, i32 %1, i32 %2, i32 %3, i32 %4, i32 %5, i32 %6, i32 %7) {
-; SDISEL-LABEL: cmp_and4:
-; SDISEL: // %bb.0:
-; SDISEL-NEXT: cmp w2, w3
-; SDISEL-NEXT: ccmp w0, w1, #2, hi
-; SDISEL-NEXT: ccmp w4, w5, #4, lo
-; SDISEL-NEXT: ccmp w6, w7, #0, ne
-; SDISEL-NEXT: cset w0, eq
-; SDISEL-NEXT: ret
+; CHECK-SD-LABEL: cmp_and4:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: cmp w2, w3
+; CHECK-SD-NEXT: ccmp w0, w1, #2, hi
+; CHECK-SD-NEXT: ccmp w4, w5, #4, lo
+; CHECK-SD-NEXT: ccmp w6, w7, #0, ne
+; CHECK-SD-NEXT: cset w0, eq
+; CHECK-SD-NEXT: ret
;
-; GISEL-LABEL: cmp_and4:
-; GISEL: // %bb.0:
-; GISEL-NEXT: cmp w2, w3
-; GISEL-NEXT: cset w8, hi
-; GISEL-NEXT: cmp w0, w1
-; GISEL-NEXT: cset w9, lo
-; GISEL-NEXT: cmp w4, w5
-; GISEL-NEXT: cset w10, ne
-; GISEL-NEXT: cmp w6, w7
-; GISEL-NEXT: and w8, w8, w9
-; GISEL-NEXT: cset w11, eq
-; GISEL-NEXT: and w9, w10, w11
-; GISEL-NEXT: and w0, w8, w9
-; GISEL-NEXT: ret
+; CHECK-GI-LABEL: cmp_and4:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: cmp w2, w3
+; CHECK-GI-NEXT: cset w8, hi
+; CHECK-GI-NEXT: cmp w0, w1
+; CHECK-GI-NEXT: cset w9, lo
+; CHECK-GI-NEXT: cmp w4, w5
+; CHECK-GI-NEXT: cset w10, ne
+; CHECK-GI-NEXT: cmp w6, w7
+; CHECK-GI-NEXT: and w8, w8, w9
+; CHECK-GI-NEXT: cset w11, eq
+; CHECK-GI-NEXT: and w9, w10, w11
+; CHECK-GI-NEXT: and w0, w8, w9
+; CHECK-GI-NEXT: ret
%9 = icmp ugt i32 %2, %3
%10 = icmp ult i32 %0, %1
%11 = select i1 %9, i1 %10, i1 false
@@ -96,22 +96,22 @@ define i32 @cmp_and4(i32 %0, i32 %1, i32 %2, i32 %3, i32 %4, i32 %5, i32 %6, i32
; (x0 < x1) || (x2 > x3)
define i32 @cmp_or2(i32 %0, i32 %1, i32 %2, i32 %3) {
-; SDISEL-LABEL: cmp_or2:
-; SDISEL: // %bb.0:
-; SDISEL-NEXT: cmp w0, w1
-; SDISEL-NEXT: ccmp w2, w3, #0, hs
-; SDISEL-NEXT: cset w0, ne
-; SDISEL-NEXT: ret
+; CHECK-SD-LABEL: cmp_or2:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: cmp w0, w1
+; CHECK-SD-NEXT: ccmp w2, w3, #0, hs
+; CHECK-SD-NEXT: cset w0, ne
+; CHECK-SD-NEXT: ret
;
-; GISEL-LABEL: cmp_or2:
-; GISEL: // %bb.0:
-; GISEL-NEXT: cmp w0, w1
-; GISEL-NEXT: cset w8, lo
-; GISEL-NEXT: cmp w2, w3
-; GISEL-NEXT: cset w9, ne
-; GISEL-NEXT: orr w8, w8, w9
-; GISEL-NEXT: and w0, w8, #0x1
-; GISEL-NEXT: ret
+; CHECK-GI-LABEL: cmp_or2:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: cmp w0, w1
+; CHECK-GI-NEXT: cset w8, lo
+; CHECK-GI-NEXT: cmp w2, w3
+; CHECK-GI-NEXT: cset w9, ne
+; CHECK-GI-NEXT: orr w8, w8, w9
+; CHECK-GI-NEXT: and w0, w8, #0x1
+; CHECK-GI-NEXT: ret
%5 = icmp ult i32 %0, %1
%6 = icmp ne i32 %2, %3
%7 = select i1 %5, i1 true, i1 %6
@@ -121,26 +121,26 @@ define i32 @cmp_or2(i32 %0, i32 %1, i32 %2, i32 %3) {
; (x0 < x1) || (x2 > x3) || (x4 != x5)
define i32 @cmp_or3(i32 %0, i32 %1, i32 %2, i32 %3, i32 %4, i32 %5) {
-; SDISEL-LABEL: cmp_or3:
-; SDISEL: // %bb.0:
-; SDISEL-NEXT: cmp w0, w1
-; SDISEL-NEXT: ccmp w2, w3, #2, hs
-; SDISEL-NEXT: ccmp w4, w5, #0, ls
-; SDISEL-NEXT: cset w0, ne
-; SDISEL-NEXT: ret
+; CHECK-SD-LABEL: cmp_or3:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: cmp w0, w1
+; CHECK-SD-NEXT: ccmp w2, w3, #2, hs
+; CHECK-SD-NEXT: ccmp w4, w5, #0, ls
+; CHECK-SD-NEXT: cset w0, ne
+; CHECK-SD-NEXT: ret
;
-; GISEL-LABEL: cmp_or3:
-; GISEL: // %bb.0:
-; GISEL-NEXT: cmp w0, w1
-; GISEL-NEXT: cset w8, lo
-; GISEL-NEXT: cmp w2, w3
-; GISEL-NEXT: cset w9, hi
-; GISEL-NEXT: cmp w4, w5
-; GISEL-NEXT: orr w8, w8, w9
-; GISEL-NEXT: cset w9, ne
-; GISEL-NEXT: orr w8, w8, w9
-; GISEL-NEXT: and w0, w8, #0x1
-; GISEL-NEXT: ret
+; CHECK-GI-LABEL: cmp_or3:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: cmp w0, w1
+; CHECK-GI-NEXT: cset w8, lo
+; CHECK-GI-NEXT: cmp w2, w3
+; CHECK-GI-NEXT: cset w9, hi
+; CHECK-GI-NEXT: cmp w4, w5
+; CHECK-GI-NEXT: orr w8, w8, w9
+; CHECK-GI-NEXT: cset w9, ne
+; CHECK-GI-NEXT: orr w8, w8, w9
+; CHECK-GI-NEXT: and w0, w8, #0x1
+; CHECK-GI-NEXT: ret
%7 = icmp ult i32 %0, %1
%8 = icmp ugt i32 %2, %3
%9 = select i1 %7, i1 true, i1 %8
@@ -152,30 +152,30 @@ define i32 @cmp_or3(i32 %0, i32 %1, i32 %2, i32 %3, i32 %4, i32 %5) {
; (x0 < x1) || (x2 > x3) || (x4 != x5) || (x6 == x7)
define i32 @cmp_or4(i32 %0, i32 %1, i32 %2, i32 %3, i32 %4, i32 %5, i32 %6, i32 %7) {
-; SDISEL-LABEL: cmp_or4:
-; SDISEL: // %bb.0:
-; SDISEL-NEXT: cmp w0, w1
-; SDISEL-NEXT: ccmp w2, w3, #2, hs
-; SDISEL-NEXT: ccmp w4, w5, #0, ls
-; SDISEL-NEXT: ccmp w6, w7, #4, eq
-; SDISEL-NEXT: cset w0, eq
-; SDISEL-NEXT: ret
+; CHECK-SD-LABEL: cmp_or4:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: cmp w0, w1
+; CHECK-SD-NEXT: ccmp w2, w3, #2, hs
+; CHECK-SD-NEXT: ccmp w4, w5, #0, ls
+; CHECK-SD-NEXT: ccmp w6, w7, #4, eq
+; CHECK-SD-NEXT: cset w0, eq
+; CHECK-SD-NEXT: ret
;
-; GISEL-LABEL: cmp_or4:
-; GISEL: // %bb.0:
-; GISEL-NEXT: cmp w0, w1
-; GISEL-NEXT: cset w8, lo
-; GISEL-NEXT: cmp w2, w3
-; GISEL-NEXT: cset w9, hi
-; GISEL-NEXT: cmp w4, w5
-; GISEL-NEXT: cset w10, ne
-; GISEL-NEXT: cmp w6, w7
-; GISEL-NEXT: orr w8, w8, w9
-; GISEL-NEXT: cset w11, eq
-; GISEL-NEXT: orr w9, w10, w11
-; GISEL-NEXT: orr w8, w8, w9
-; GISEL-NEXT: and w0, w8, #0x1
-; GISEL-NEXT: ret
+; CHECK-GI-LABEL: cmp_or4:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: cmp w0, w1
+; CHECK-GI-NEXT: cset w8, lo
+; CHECK-GI-NEXT: cmp w2, w3
+; CHECK-GI-NEXT: cset w9, hi
+; CHECK-GI-NEXT: cmp w4, w5
+; CHECK-GI-NEXT: cset w10, ne
+; CHECK-GI-NEXT: cmp w6, w7
+; CHECK-GI-NEXT: orr w8, w8, w9
+; CHECK-GI-NEXT: cset w11, eq
+; CHECK-GI-NEXT: orr w9, w10, w11
+; CHECK-GI-NEXT: orr w8, w8, w9
+; CHECK-GI-NEXT: and w0, w8, #0x1
+; CHECK-GI-NEXT: ret
%9 = icmp ult i32 %0, %1
%10 = icmp ugt i32 %2, %3
%11 = select i1 %9, i1 true, i1 %10
@@ -189,22 +189,22 @@ define i32 @cmp_or4(i32 %0, i32 %1, i32 %2, i32 %3, i32 %4, i32 %5, i32 %6, i32
; (x0 != 0) || (x1 != 0)
define i32 @true_or2(i32 %0, i32 %1) {
-; SDISEL-LABEL: true_or2:
-; SDISEL: // %bb.0:
-; SDISEL-NEXT: orr w8, w0, w1
-; SDISEL-NEXT: cmp w8, #0
-; SDISEL-NEXT: cset w0, ne
-; SDISEL-NEXT: ret
+; CHECK-SD-LABEL: true_or2:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: orr w8, w0, w1
+; CHECK-SD-NEXT: cmp w8, #0
+; CHECK-SD-NEXT: cset w0, ne
+; CHECK-SD-NEXT: ret
;
-; GISEL-LABEL: true_or2:
-; GISEL: // %bb.0:
-; GISEL-NEXT: cmp w0, #0
-; GISEL-NEXT: cset w8, ne
-; GISEL-NEXT: cmp w1, #0
-; GISEL-NEXT: cset w9, ne
-; GISEL-NEXT: orr w8, w8, w9
-; GISEL-NEXT: and w0, w8, #0x1
-; GISEL-NEXT: ret
+; CHECK-GI-LABEL: true_or2:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: cmp w0, #0
+; CHECK-GI-NEXT: cset w8, ne
+; CHECK-GI-NEXT: cmp w1, #0
+; CHECK-GI-NEXT: cset w9, ne
+; CHECK-GI-NEXT: orr w8, w8, w9
+; CHECK-GI-NEXT: and w0, w8, #0x1
+; CHECK-GI-NEXT: ret
%3 = icmp ne i32 %0, 0
%4 = icmp ne i32 %1, 0
%5 = select i1 %3, i1 true, i1 %4
@@ -214,26 +214,26 @@ define i32 @true_or2(i32 %0, i32 %1) {
; (x0 != 0) || (x1 != 0) || (x2 != 0)
define i32 @true_or3(i32 %0, i32 %1, i32 %2) {
-; SDISEL-LABEL: true_or3:
-; SDISEL: // %bb.0:
-; SDISEL-NEXT: orr w8, w0, w1
-; SDISEL-NEXT: orr w8, w8, w2
-; SDISEL-NEXT: cmp w8, #0
-; SDISEL-NEXT: cset w0, ne
-; SDISEL-NEXT: ret
+; CHECK-SD-LABEL: true_or3:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: orr w8, w0, w1
+; CHECK-SD-NEXT: orr w8, w8, w2
+; CHECK-SD-NEXT: cmp w8, #0
+; CHECK-SD-NEXT: cset w0, ne
+; CHECK-SD-NEXT: ret
;
-; GISEL-LABEL: true_or3:
-; GISEL: // %bb.0:
-; GISEL-NEXT: cmp w0, #0
-; GISEL-NEXT: cset w8, ne
-; GISEL-NEXT: cmp w1, #0
-; GISEL-NEXT: cset w9, ne
-; GISEL-NEXT: cmp w2, #0
-; GISEL-NEXT: orr w8, w8, w9
-; GISEL-NEXT: cset w9, ne
-; GISEL-NEXT: orr w8, w8, w9
-; GISEL-NEXT: and w0, w8, #0x1
-; GISEL-NEXT: ret
+; CHECK-GI-LABEL: true_or3:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: cmp w0, #0
+; CHECK-GI-NEXT: cset w8, ne
+; CHECK-GI-NEXT: cmp w1, #0
+; CHECK-GI-NEXT: cset w9, ne
+; CHECK-GI-NEXT: cmp w2, #0
+; CHECK-GI-NEXT: orr w8, w8, w9
+; CHECK-GI-NEXT: cset w9, ne
+; CHECK-GI-NEXT: orr w8, w8, w9
+; CHECK-GI-NEXT: and w0, w8, #0x1
+; CHECK-GI-NEXT: ret
%4 = icmp ne i32 %0, 0
%5 = icmp ne i32 %1, 0
%6 = select i1 %4, i1 true, i1 %5
@@ -260,22 +260,22 @@ define i32 @neg_range_int(i32 %a, i32 %b, i32 %c) {
; (b > -(d | 1) && a < c)
define i32 @neg_range_int_comp(i32 %a, i32 %b, i32 %c, i32 %d) {
-; SDISEL-LABEL: neg_range_int_comp:
-; SDISEL: // %bb.0:
-; SDISEL-NEXT: orr w8, w3, #0x1
-; SDISEL-NEXT: cmp w0, w2
-; SDISEL-NEXT: ccmn w1, w8, #4, lt
-; SDISEL-NEXT: csel w0, w1, w0, gt
-; SDISEL-NEXT: ret
+; CHECK-SD-LABEL: neg_range_int_comp:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: orr w8, w3, #0x1
+; CHECK-SD-NEXT: cmp w0, w2
+; CHECK-SD-NEXT: ccmn w1, w8, #4, lt
+; CHECK-SD-NEXT: csel w0, w1, w0, gt
+; CHECK-SD-NEXT: ret
;
-; GISEL-LABEL: neg_range_int_comp:
-; GISEL: // %bb.0:
-; GISEL-NEXT: orr w8, w3, #0x1
-; GISEL-NEXT: cmp w0, w2
-; GISEL-NEXT: neg w8, w8
-; GISEL-NEXT: ccmp w1, w8, #4, lt
-; GISEL-NEXT: csel w0, w1, w0, gt
-; GISEL-NEXT: ret
+; CHECK-GI-LABEL: neg_range_int_comp:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: orr w8, w3, #0x1
+; CHECK-GI-NEXT: cmp w0, w2
+; CHECK-GI-NEXT: neg w8, w8
+; CHECK-GI-NEXT: ccmp w1, w8, #4, lt
+; CHECK-GI-NEXT: csel w0, w1, w0, gt
+; CHECK-GI-NEXT: ret
%dor = or i32 %d, 1
%negd = sub i32 0, %dor
%cmp = icmp sgt i32 %b, %negd
@@ -287,22 +287,22 @@ define i32 @neg_range_int_comp(i32 %a, i32 %b, i32 %c, i32 %d) {
; (b >u -(d | 1) && a < c)
define i32 @neg_range_int_comp_u(i32 %a, i32 %b, i32 %c, i32 %d) {
-; SDISEL-LABEL: neg_range_int_comp_u:
-; SDISEL: // %bb.0:
-; SDISEL-NEXT: orr w8, w3, #0x1
-; SDISEL-NEXT: cmp w0, w2
-; SDISEL-NEXT: ccmn w1, w8, #0, lt
-; SDISEL-NEXT: csel w0, w1, w0, hi
-; SDISEL-NEXT: ret
+; CHECK-SD-LABEL: neg_range_int_comp_u:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: orr w8, w3, #0x1
+; CHECK-SD-NEXT: cmp w0, w2
+; CHECK-SD-NEXT: ccmn w1, w8, #0, lt
+; CHECK-SD-NEXT: csel w0, w1, w0, hi
+; CHECK-SD-NEXT: ret
;
-; GISEL-LABEL: neg_range_int_comp_u:
-; GISEL: // %bb.0:
-; GISEL-NEXT: orr w8, w3, #0x1
-; GISEL-NEXT: cmp w0, w2
-; GISEL-NEXT: neg w8, w8
-; GISEL-NEXT: ccmp w1, w8, #0, lt
-; GISEL-NEXT: csel w0, w1, w0, hi
-; GISEL-NEXT: ret
+; CHECK-GI-LABEL: neg_range_int_comp_u:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: orr w8, w3, #0x1
+; CHECK-GI-NEXT: cmp w0, w2
+; CHECK-GI-NEXT: neg w8, w8
+; CHECK-GI-NEXT: ccmp w1, w8, #0, lt
+; CHECK-GI-NEXT: csel w0, w1, w0, hi
+; CHECK-GI-NEXT: ret
%dor = or i32 %d, 1
%negd = sub i32 0, %dor
%cmp = icmp ugt i32 %b, %negd
@@ -314,22 +314,22 @@ define i32 @neg_range_int_comp_u(i32 %a, i32 %b, i32 %c, i32 %d) {
; (b > -(d | 1) && a u < c)
define i32 @neg_range_int_comp_ua(i32 %a, i32 %b, i32 %c, i32 %d) {
-; SDISEL-LABEL: neg_range_int_comp_ua:
-; SDISEL: // %bb.0:
-; SDISEL-NEXT: orr w8, w3, #0x1
-; SDISEL-NEXT: cmp w0, w2
-; SDISEL-NEXT: ccmn w1, w8, #4, lo
-; SDISEL-NEXT: csel w0, w1, w0, gt
-; SDISEL-NEXT: ret
+; CHECK-SD-LABEL: neg_range_int_comp_ua:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: orr w8, w3, #0x1
+; CHECK-SD-NEXT: cmp w0, w2
+; CHECK-SD-NEXT: ccmn w1, w8, #4, lo
+; CHECK-SD-NEXT: csel w0, w1, w0, gt
+; CHECK-SD-NEXT: ret
;
-; GISEL-LABEL: neg_range_int_comp_ua:
-; GISEL: // %bb.0:
-; GISEL-NEXT: orr w8, w3, #0x1
-; GISEL-NEXT: cmp w0, w2
-; GISEL-NEXT: neg w8, w8
-; GISEL-NEXT: ccmp w1, w8, #4, lo
-; GISEL-NEXT: csel w0, w1, w0, gt
-; GISEL-NEXT: ret
+; CHECK-GI-LABEL: neg_range_int_comp_ua:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: orr w8, w3, #0x1
+; CHECK-GI-NEXT: cmp w0, w2
+; CHECK-GI-NEXT: neg w8, w8
+; CHECK-GI-NEXT: ccmp w1, w8, #4, lo
+; CHECK-GI-NEXT: csel w0, w1, w0, gt
+; CHECK-GI-NEXT: ret
%dor = or i32 %d, 1
%negd = sub i32 0, %dor
%cmp = icmp sgt i32 %b, %negd
@@ -341,19 +341,19 @@ define i32 @neg_range_int_comp_ua(i32 %a, i32 %b, i32 %c, i32 %d) {
; (b <= -3 && a > c)
define i32 @neg_range_int_2(i32 %a, i32 %b, i32 %c) {
-; SDISEL-LABEL: neg_range_int_2:
-; SDISEL: // %bb.0:
-; SDISEL-NEXT: cmp w0, w2
-; SDISEL-NEXT: ccmn w1, #4, #4, gt
-; SDISEL-NEXT: csel w0, w1, w0, gt
-; SDISEL-NEXT: ret
+; CHECK-SD-LABEL: neg_range_int_2:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: cmp w0, w2
+; CHECK-SD-NEXT: ccmn w1, #4, #4, gt
+; CHECK-SD-NEXT: csel w0, w1, w0, gt
+; CHECK-SD-NEXT: ret
;
-; GISEL-LABEL: neg_range_int_2:
-; GISEL: // %bb.0:
-; GISEL-NEXT: cmp w0, w2
-; GISEL-NEXT: ccmn w1, #3, #8, gt
-; GISEL-NEXT: csel w0, w1, w0, ge
-; GISEL-NEXT: ret
+; CHECK-GI-LABEL: neg_range_int_2:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: cmp w0, w2
+; CHECK-GI-NEXT: ccmn w1, #3, #8, gt
+; CHECK-GI-NEXT: csel w0, w1, w0, ge
+; CHECK-GI-NEXT: ret
%cmp = icmp sge i32 %b, -3
%cmp1 = icmp sgt i32 %a, %c
%or.cond = and i1 %cmp, %cmp1
@@ -363,22 +363,22 @@ define i32 @neg_range_int_2(i32 %a, i32 %b, i32 %c) {
; (b < -(d | 1) && a >= c)
define i32 @neg_range_int_comp2(i32 %a, i32 %b, i32 %c, i32 %d) {
-; SDISEL-LABEL: neg_range_int_comp2:
-; SDISEL: // %bb.0:
-; SDISEL-NEXT: orr w8, w3, #0x1
-; SDISEL-NEXT: cmp w0, w2
-; SDISEL-NEXT: ccmn w1, w8, #0, ge
-; SDISEL-NEXT: csel w0, w1, w0, lt
-; SDISEL-NEXT: ret
+; CHECK-SD-LABEL: neg_range_int_comp2:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: orr w8, w3, #0x1
+; CHECK-SD-NEXT: cmp w0, w2
+; CHECK-SD-NEXT: ccmn w1, w8, #0, ge
+; CHECK-SD-NEXT: csel w0, w1, w0, lt
+; CHECK-SD-NEXT: ret
;
-; GISEL-LABEL: neg_range_int_comp2:
-; GISEL: // %bb.0:
-; GISEL-NEXT: orr w8, w3, #0x1
-; GISEL-NEXT: cmp w0, w2
-; GISEL-NEXT: neg w8, w8
-; GISEL-NEXT: ccmp w1, w8, #0, ge
-; GISEL-NEXT: csel w0, w1, w0, lt
-; GISEL-NEXT: ret
+; CHECK-GI-LABEL: neg_range_int_comp2:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: orr w8, w3, #0x1
+; CHECK-GI-NEXT: cmp w0, w2
+; CHECK-GI-NEXT: neg w8, w8
+; CHECK-GI-NEXT: ccmp w1, w8, #0, ge
+; CHECK-GI-NEXT: csel w0, w1, w0, lt
+; CHECK-GI-NEXT: ret
%dor = or i32 %d, 1
%negd = sub i32 0, %dor
%cmp = icmp slt i32 %b, %negd
@@ -390,22 +390,22 @@ define i32 @neg_range_int_comp2(i32 %a, i32 %b, i32 %c, i32 %d) {
; (b <u -(d | 1) && a > c)
define i32 @neg_range_int_comp_u2(i32 %a, i32 %b, i32 %c, i32 %d) {
-; SDISEL-LABEL: neg_range_int_comp_u2:
-; SDISEL: // %bb.0:
-; SDISEL-NEXT: orr w8, w3, #0x1
-; SDISEL-NEXT: cmp w0, w2
-; SDISEL-NEXT: ccmn w1, w8, #2, gt
-; SDISEL-NEXT: csel w0, w1, w0, lo
-; SDISEL-NEXT: ret
+; CHECK-SD-LABEL: neg_range_int_comp_u2:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: orr w8, w3, #0x1
+; CHECK-SD-NEXT: cmp w0, w2
+; CHECK-SD-NEXT: ccmn w1, w8, #2, gt
+; CHECK-SD-NEXT: csel w0, w1, w0, lo
+; CHECK-SD-NEXT: ret
;
-; GISEL-LABEL: neg_range_int_comp_u2:
-; GISEL: // %bb.0:
-; GISEL-NEXT: orr w8, w3, #0x1
-; GISEL-NEXT: cmp w0, w2
-; GISEL-NEXT: neg w8, w8
-; GISEL-NEXT: ccmp w1, w8, #2, gt
-; GISEL-NEXT: csel w0, w1, w0, lo
-; GISEL-NEXT: ret
+; CHECK-GI-LABEL: neg_range_int_comp_u2:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: orr w8, w3, #0x1
+; CHECK-GI-NEXT: cmp w0, w2
+; CHECK-GI-NEXT: neg w8, w8
+; CHECK-GI-NEXT: ccmp w1, w8, #2, gt
+; CHECK-GI-NEXT: csel w0, w1, w0, lo
+; CHECK-GI-NEXT: ret
%dor = or i32 %d, 1
%negd = sub i32 0, %dor
%cmp = icmp ult i32 %b, %negd
@@ -417,22 +417,22 @@ define i32 @neg_range_int_comp_u2(i32 %a, i32 %b, i32 %c, i32 %d) {
; (b > -(d | 1) && a u > c)
define i32 @neg_range_int_comp_ua2(i32 %a, i32 %b, i32 %c, i32 %d) {
-; SDISEL-LABEL: neg_range_int_comp_ua2:
-; SDISEL: // %bb.0:
-; SDISEL-NEXT: orr w8, w3, #0x1
-; SDISEL-NEXT: cmp w0, w2
-; SDISEL-NEXT: ccmn w1, w8, #4, hi
-; SDISEL-NEXT: csel w0, w1, w0, gt
-; SDISEL-NEXT: ret
+; CHECK-SD-LABEL: neg_range_int_comp_ua2:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: orr w8, w3, #0x1
+; CHECK-SD-NEXT: cmp w0, w2
+; CHECK-SD-NEXT: ccmn w1, w8, #4, hi
+; CHECK-SD-NEXT: csel w0, w1, w0, gt
+; CHECK-SD-NEXT: ret
;
-; GISEL-LABEL: neg_range_int_comp_ua2:
-; GISEL: // %bb.0:
-; GISEL-NEXT: orr w8, w3, #0x1
-; GISEL-NEXT: cmp w0, w2
-; GISEL-NEXT: neg w8, w8
-; GISEL-NEXT: ccmp w1, w8, #4, hi
-; GISEL-NEXT: csel w0, w1, w0, gt
-; GISEL-NEXT: ret
+; CHECK-GI-LABEL: neg_range_int_comp_ua2:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: orr w8, w3, #0x1
+; CHECK-GI-NEXT: cmp w0, w2
+; CHECK-GI-NEXT: neg w8, w8
+; CHECK-GI-NEXT: ccmp w1, w8, #4, hi
+; CHECK-GI-NEXT: csel w0, w1, w0, gt
+; CHECK-GI-NEXT: ret
%dor = or i32 %d, 1
%negd = sub i32 0, %dor
%cmp = icmp sgt i32 %b, %negd
@@ -444,22 +444,22 @@ define i32 @neg_range_int_comp_ua2(i32 %a, i32 %b, i32 %c, i32 %d) {
; (b > -(d | 1) && a u == c)
define i32 @neg_range_int_comp_ua3(i32 %a, i32 %b, i32 %c, i32 %d) {
-; SDISEL-LABEL: neg_range_int_comp_ua3:
-; SDISEL: // %bb.0:
-; SDISEL-NEXT: orr w8, w3, #0x1
-; SDISEL-NEXT: cmp w0, w2
-; SDISEL-NEXT: ccmn w1, w8, #4, eq
-; SDISEL-NEXT: csel w0, w1, w0, gt
-; SDISEL-NEXT: ret
+; CHECK-SD-LABEL: neg_range_int_comp_ua3:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: orr w8, w3, #0x1
+; CHECK-SD-NEXT: cmp w0, w2
+; CHECK-SD-NEXT: ccmn w1, w8, #4, eq
+; CHECK-SD-NEXT: csel w0, w1, w0, gt
+; CHECK-SD-NEXT: ret
;
-; GISEL-LABEL: neg_range_int_comp_ua3:
-; GISEL: // %bb.0:
-; GISEL-NEXT: orr w8, w3, #0x1
-; GISEL-NEXT: cmp w0, w2
-; GISEL-NEXT: neg w8, w8
-; GISEL-NEXT: ccmp w1, w8, #4, eq
-; GISEL-NEXT: csel w0, w1, w0, gt
-; GISEL-NEXT: ret
+; CHECK-GI-LABEL: neg_range_int_comp_ua3:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: orr w8, w3, #0x1
+; CHECK-GI-NEXT: cmp w0, w2
+; CHECK-GI-NEXT: neg w8, w8
+; CHECK-GI-NEXT: ccmp w1, w8, #4, eq
+; CHECK-GI-NEXT: csel w0, w1, w0, gt
+; CHECK-GI-NEXT: ret
%dor = or i32 %d, 1
%negd = sub i32 0, %dor
%cmp = icmp sgt i32 %b, %negd
@@ -471,26 +471,26 @@ define i32 @neg_range_int_comp_ua3(i32 %a, i32 %b, i32 %c, i32 %d) {
; -(a | 1) > (b | 3) && a < c
define i32 @neg_range_int_c(i32 %a, i32 %b, i32 %c) {
-; SDISEL-LABEL: neg_range_int_c:
-; SDISEL: // %bb.0: // %entry
-; SDISEL-NEXT: orr w8, w0, #0x1
-; SDISEL-NEXT: orr w9, w1, #0x3
-; SDISEL-NEXT: cmn w9, w8
-; SDISEL-NEXT: ccmp w2, w0, #2, lo
-; SDISEL-NEXT: cset w0, lo
-; SDISEL-NEXT: ret
+; CHECK-SD-LABEL: neg_range_int_c:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: orr w8, w0, #0x1
+; CHECK-SD-NEXT: orr w9, w1, #0x3
+; CHECK-SD-NEXT: cmn w9, w8
+; CHECK-SD-NEXT: ccmp w2, w0, #2, lo
+; CHECK-SD-NEXT: cset w0, lo
+; CHECK-SD-NEXT: ret
;
-; GISEL-LABEL: neg_range_int_c:
-; GISEL: // %bb.0: // %entry
-; GISEL-NEXT: orr w8, w0, #0x1
-; GISEL-NEXT: orr w9, w1, #0x3
-; GISEL-NEXT: neg w8, w8
-; GISEL-NEXT: cmp w9, w8
-; GISEL-NEXT: cset w8, lo
-; GISEL-NEXT: cmp w2, w0
-; GISEL-NEXT: cset w9, lo
-; GISEL-NEXT: and w0, w8, w9
-; GISEL-NEXT: ret
+; CHECK-GI-LABEL: neg_range_int_c:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: orr w8, w0, #0x1
+; CHECK-GI-NEXT: orr w9, w1, #0x3
+; CHECK-GI-NEXT: neg w8, w8
+; CHECK-GI-NEXT: cmp w9, w8
+; CHECK-GI-NEXT: cset w8, lo
+; CHECK-GI-NEXT: cmp w2, w0
+; CHECK-GI-NEXT: cset w9, lo
+; CHECK-GI-NEXT: and w0, w8, w9
+; CHECK-GI-NEXT: ret
entry:
%or = or i32 %a, 1
%sub = sub i32 0, %or
diff --git a/llvm/test/CodeGen/AArch64/dag-combine-select.ll b/llvm/test/CodeGen/AArch64/dag-combine-select.ll
index 56208f1..02b0077 100644
--- a/llvm/test/CodeGen/AArch64/dag-combine-select.ll
+++ b/llvm/test/CodeGen/AArch64/dag-combine-select.ll
@@ -1,26 +1,26 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple arm64-none-eabi -o - %s | FileCheck %s --check-prefixes=CHECK,SDISEL
-; RUN: llc -mtriple arm64-none-eabi -global-isel -o - %s | FileCheck %s --check-prefixes=CHECK,GISEL
+; RUN: llc -mtriple arm64-none-eabi -o - %s | FileCheck %s --check-prefixes=CHECK,CHECK-SD
+; RUN: llc -mtriple arm64-none-eabi -global-isel -o - %s | FileCheck %s --check-prefixes=CHECK,CHECK-GI
@out = internal global i32 0, align 4
; Ensure that we transform select(C0, x, select(C1, x, y)) towards
; select(C0 | C1, x, y) so we can use CMP;CCMP for the implementation.
define i32 @test0(i32 %v0, i32 %v1, i32 %v2) {
-; SDISEL-LABEL: test0:
-; SDISEL: // %bb.0:
-; SDISEL-NEXT: cmp w0, #7
-; SDISEL-NEXT: ccmp w1, #0, #0, ne
-; SDISEL-NEXT: csel w0, w1, w2, gt
-; SDISEL-NEXT: ret
+; CHECK-SD-LABEL: test0:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: cmp w0, #7
+; CHECK-SD-NEXT: ccmp w1, #0, #0, ne
+; CHECK-SD-NEXT: csel w0, w1, w2, gt
+; CHECK-SD-NEXT: ret
;
-; GISEL-LABEL: test0:
-; GISEL: // %bb.0:
-; GISEL-NEXT: cmp w0, #7
-; GISEL-NEXT: csel w8, w1, w2, eq
-; GISEL-NEXT: cmp w1, #0
-; GISEL-NEXT: csel w0, w1, w8, gt
-; GISEL-NEXT: ret
+; CHECK-GI-LABEL: test0:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: cmp w0, #7
+; CHECK-GI-NEXT: csel w8, w1, w2, eq
+; CHECK-GI-NEXT: cmp w1, #0
+; CHECK-GI-NEXT: csel w0, w1, w8, gt
+; CHECK-GI-NEXT: ret
%cmp1 = icmp eq i32 %v0, 7
%cmp2 = icmp sgt i32 %v1, 0
%sel0 = select i1 %cmp1, i32 %v1, i32 %v2
@@ -32,36 +32,36 @@ define i32 @test0(i32 %v0, i32 %v1, i32 %v2) {
; sequences. This case should be transformed to select(C0, select(C1, x, y), y)
; anyway to get CSE effects.
define void @test1(i32 %bitset, i32 %val0, i32 %val1) {
-; SDISEL-LABEL: test1:
-; SDISEL: // %bb.0:
-; SDISEL-NEXT: cmp w0, #7
-; SDISEL-NEXT: adrp x9, out
-; SDISEL-NEXT: csel w8, w1, w2, eq
-; SDISEL-NEXT: cmp w8, #13
-; SDISEL-NEXT: csel w8, w1, w2, lo
-; SDISEL-NEXT: cmp w0, #42
-; SDISEL-NEXT: csel w10, w1, w8, eq
-; SDISEL-NEXT: str w8, [x9, :lo12:out]
-; SDISEL-NEXT: str w10, [x9, :lo12:out]
-; SDISEL-NEXT: ret
+; CHECK-SD-LABEL: test1:
+; CHECK-SD: // %bb.0:
+; CHECK-SD-NEXT: cmp w0, #7
+; CHECK-SD-NEXT: adrp x9, out
+; CHECK-SD-NEXT: csel w8, w1, w2, eq
+; CHECK-SD-NEXT: cmp w8, #13
+; CHECK-SD-NEXT: csel w8, w1, w2, lo
+; CHECK-SD-NEXT: cmp w0, #42
+; CHECK-SD-NEXT: csel w10, w1, w8, eq
+; CHECK-SD-NEXT: str w8, [x9, :lo12:out]
+; CHECK-SD-NEXT: str w10, [x9, :lo12:out]
+; CHECK-SD-NEXT: ret
;
-; GISEL-LABEL: test1:
-; GISEL: // %bb.0:
-; GISEL-NEXT: cmp w0, #7
-; GISEL-NEXT: csel w8, w1, w2, eq
-; GISEL-NEXT: cmp w8, #13
-; GISEL-NEXT: cset w8, lo
-; GISEL-NEXT: tst w8, #0x1
-; GISEL-NEXT: csel w9, w1, w2, ne
-; GISEL-NEXT: cmp w0, #42
-; GISEL-NEXT: cset w10, eq
-; GISEL-NEXT: orr w8, w10, w8
-; GISEL-NEXT: tst w8, #0x1
-; GISEL-NEXT: adrp x8, out
-; GISEL-NEXT: csel w10, w1, w2, ne
-; GISEL-NEXT: str w9, [x8, :lo12:out]
-; GISEL-NEXT: str w10, [x8, :lo12:out]
-; GISEL-NEXT: ret
+; CHECK-GI-LABEL: test1:
+; CHECK-GI: // %bb.0:
+; CHECK-GI-NEXT: cmp w0, #7
+; CHECK-GI-NEXT: csel w8, w1, w2, eq
+; CHECK-GI-NEXT: cmp w8, #13
+; CHECK-GI-NEXT: cset w8, lo
+; CHECK-GI-NEXT: tst w8, #0x1
+; CHECK-GI-NEXT: csel w9, w1, w2, ne
+; CHECK-GI-NEXT: cmp w0, #42
+; CHECK-GI-NEXT: cset w10, eq
+; CHECK-GI-NEXT: orr w8, w10, w8
+; CHECK-GI-NEXT: tst w8, #0x1
+; CHECK-GI-NEXT: adrp x8, out
+; CHECK-GI-NEXT: csel w10, w1, w2, ne
+; CHECK-GI-NEXT: str w9, [x8, :lo12:out]
+; CHECK-GI-NEXT: str w10, [x8, :lo12:out]
+; CHECK-GI-NEXT: ret
%cmp1 = icmp eq i32 %bitset, 7
%cond = select i1 %cmp1, i32 %val0, i32 %val1
%cmp5 = icmp ult i32 %cond, 13
diff --git a/llvm/test/CodeGen/AArch64/fcsel-zero.ll b/llvm/test/CodeGen/AArch64/fcsel-zero.ll
index 3fbcd10..3db588b 100644
--- a/llvm/test/CodeGen/AArch64/fcsel-zero.ll
+++ b/llvm/test/CodeGen/AArch64/fcsel-zero.ll
@@ -2,8 +2,8 @@
; RUN: llc -mtriple=aarch64-linux-gnu -o - < %s | FileCheck %s
-define float @foeq(float %a, float %b) #0 {
- %t = fcmp oeq float %a, 0.0
+define float @foeq(float %a, float %b) {
+ %t = fcmp nsz oeq float %a, 0.0
%v = select i1 %t, float 0.0, float %b
ret float %v
; CHECK-LABEL: foeq
@@ -11,8 +11,8 @@ define float @foeq(float %a, float %b) #0 {
; CHECK-NEXT: fcsel {{s[0-9]+}}, [[R]], {{s[0-9]+}}, eq
}
-define float @fueq(float %a, float %b) #0 {
- %t = fcmp ueq float %a, 0.0
+define float @fueq(float %a, float %b) {
+ %t = fcmp nsz ueq float %a, 0.0
%v = select i1 %t, float 0.0, float %b
ret float %v
; CHECK-LABEL: fueq
@@ -21,8 +21,8 @@ define float @fueq(float %a, float %b) #0 {
; CHECK-NEXT: fcsel {{s[0-9]+}}, [[R]], {{s[0-9]+}}, vs
}
-define float @fone(float %a, float %b) #0 {
- %t = fcmp one float %a, 0.0
+define float @fone(float %a, float %b) {
+ %t = fcmp nsz one float %a, 0.0
%v = select i1 %t, float %b, float 0.0
ret float %v
; CHECK-LABEL: fone
@@ -31,8 +31,8 @@ define float @fone(float %a, float %b) #0 {
; CHECK-NEXT: fcsel {{s[0-9]+}}, {{s[0-9]+}}, [[R]], gt
}
-define float @fune(float %a, float %b) #0 {
- %t = fcmp une float %a, 0.0
+define float @fune(float %a, float %b) {
+ %t = fcmp nsz une float %a, 0.0
%v = select i1 %t, float %b, float 0.0
ret float %v
; CHECK-LABEL: fune
@@ -40,8 +40,8 @@ define float @fune(float %a, float %b) #0 {
; CHECK-NEXT: fcsel {{s[0-9]+}}, {{s[0-9]+}}, [[R]], ne
}
-define double @doeq(double %a, double %b) #0 {
- %t = fcmp oeq double %a, 0.0
+define double @doeq(double %a, double %b) {
+ %t = fcmp nsz oeq double %a, 0.0
%v = select i1 %t, double 0.0, double %b
ret double %v
; CHECK-LABEL: doeq
@@ -49,8 +49,8 @@ define double @doeq(double %a, double %b) #0 {
; CHECK-NEXT: fcsel {{d[0-9]+}}, [[R]], {{d[0-9]+}}, eq
}
-define double @dueq(double %a, double %b) #0 {
- %t = fcmp ueq double %a, 0.0
+define double @dueq(double %a, double %b) {
+ %t = fcmp nsz ueq double %a, 0.0
%v = select i1 %t, double 0.0, double %b
ret double %v
; CHECK-LABEL: dueq
@@ -59,8 +59,8 @@ define double @dueq(double %a, double %b) #0 {
; CHECK-NEXT: fcsel {{d[0-9]+}}, [[R]], {{d[0-9]+}}, vs
}
-define double @done(double %a, double %b) #0 {
- %t = fcmp one double %a, 0.0
+define double @done(double %a, double %b) {
+ %t = fcmp nsz one double %a, 0.0
%v = select i1 %t, double %b, double 0.0
ret double %v
; CHECK-LABEL: done
@@ -69,14 +69,11 @@ define double @done(double %a, double %b) #0 {
; CHECK-NEXT: fcsel {{d[0-9]+}}, {{d[0-9]+}}, [[R]], gt
}
-define double @dune(double %a, double %b) #0 {
- %t = fcmp une double %a, 0.0
+define double @dune(double %a, double %b) {
+ %t = fcmp nsz une double %a, 0.0
%v = select i1 %t, double %b, double 0.0
ret double %v
; CHECK-LABEL: dune
; CHECK: fcmp [[R:d[0-9]+]], #0.0
; CHECK-NEXT: fcsel {{d[0-9]+}}, {{d[0-9]+}}, [[R]], ne
}
-
-attributes #0 = { nounwind "unsafe-fp-math"="true" }
-
diff --git a/llvm/test/CodeGen/AArch64/fp16_intrinsic_scalar_1op.ll b/llvm/test/CodeGen/AArch64/fp16_intrinsic_scalar_1op.ll
index 1b98954..b056460 100644
--- a/llvm/test/CodeGen/AArch64/fp16_intrinsic_scalar_1op.ll
+++ b/llvm/test/CodeGen/AArch64/fp16_intrinsic_scalar_1op.ll
@@ -1,6 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=aarch64 -global-isel=0 -mattr=+v8.2a,+fullfp16 | FileCheck %s --check-prefixes=CHECK,SDISEL
-; RUN: llc < %s -mtriple=aarch64 -global-isel=1 -mattr=+v8.2a,+fullfp16 | FileCheck %s --check-prefixes=CHECK,GISEL
+; RUN: llc < %s -mtriple=aarch64 -global-isel=0 -mattr=+v8.2a,+fullfp16 | FileCheck %s --check-prefixes=CHECK,CHECK-SD
+; RUN: llc < %s -mtriple=aarch64 -global-isel=1 -mattr=+v8.2a,+fullfp16 | FileCheck %s --check-prefixes=CHECK,CHECK-GI
declare i64 @llvm.aarch64.neon.fcvtpu.i64.f16(half)
declare i32 @llvm.aarch64.neon.fcvtpu.i32.f16(half)
@@ -27,18 +27,18 @@ declare half @llvm.aarch64.neon.frecpx.f16(half)
declare half @llvm.aarch64.neon.frecpe.f16(half)
define dso_local i16 @t2(half %a) {
-; SDISEL-LABEL: t2:
-; SDISEL: // %bb.0: // %entry
-; SDISEL-NEXT: fcmp h0, #0.0
-; SDISEL-NEXT: csetm w0, eq
-; SDISEL-NEXT: ret
+; CHECK-SD-LABEL: t2:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: fcmp h0, #0.0
+; CHECK-SD-NEXT: csetm w0, eq
+; CHECK-SD-NEXT: ret
;
-; GISEL-LABEL: t2:
-; GISEL: // %bb.0: // %entry
-; GISEL-NEXT: fcmp h0, #0.0
-; GISEL-NEXT: cset w8, eq
-; GISEL-NEXT: sbfx w0, w8, #0, #1
-; GISEL-NEXT: ret
+; CHECK-GI-LABEL: t2:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: fcmp h0, #0.0
+; CHECK-GI-NEXT: cset w8, eq
+; CHECK-GI-NEXT: sbfx w0, w8, #0, #1
+; CHECK-GI-NEXT: ret
entry:
%0 = fcmp oeq half %a, 0xH0000
%vceqz = sext i1 %0 to i16
@@ -46,18 +46,18 @@ entry:
}
define dso_local i16 @t3(half %a) {
-; SDISEL-LABEL: t3:
-; SDISEL: // %bb.0: // %entry
-; SDISEL-NEXT: fcmp h0, #0.0
-; SDISEL-NEXT: csetm w0, ge
-; SDISEL-NEXT: ret
+; CHECK-SD-LABEL: t3:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: fcmp h0, #0.0
+; CHECK-SD-NEXT: csetm w0, ge
+; CHECK-SD-NEXT: ret
;
-; GISEL-LABEL: t3:
-; GISEL: // %bb.0: // %entry
-; GISEL-NEXT: fcmp h0, #0.0
-; GISEL-NEXT: cset w8, ge
-; GISEL-NEXT: sbfx w0, w8, #0, #1
-; GISEL-NEXT: ret
+; CHECK-GI-LABEL: t3:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: fcmp h0, #0.0
+; CHECK-GI-NEXT: cset w8, ge
+; CHECK-GI-NEXT: sbfx w0, w8, #0, #1
+; CHECK-GI-NEXT: ret
entry:
%0 = fcmp oge half %a, 0xH0000
%vcgez = sext i1 %0 to i16
@@ -65,18 +65,18 @@ entry:
}
define dso_local i16 @t4(half %a) {
-; SDISEL-LABEL: t4:
-; SDISEL: // %bb.0: // %entry
-; SDISEL-NEXT: fcmp h0, #0.0
-; SDISEL-NEXT: csetm w0, gt
-; SDISEL-NEXT: ret
+; CHECK-SD-LABEL: t4:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: fcmp h0, #0.0
+; CHECK-SD-NEXT: csetm w0, gt
+; CHECK-SD-NEXT: ret
;
-; GISEL-LABEL: t4:
-; GISEL: // %bb.0: // %entry
-; GISEL-NEXT: fcmp h0, #0.0
-; GISEL-NEXT: cset w8, gt
-; GISEL-NEXT: sbfx w0, w8, #0, #1
-; GISEL-NEXT: ret
+; CHECK-GI-LABEL: t4:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: fcmp h0, #0.0
+; CHECK-GI-NEXT: cset w8, gt
+; CHECK-GI-NEXT: sbfx w0, w8, #0, #1
+; CHECK-GI-NEXT: ret
entry:
%0 = fcmp ogt half %a, 0xH0000
%vcgtz = sext i1 %0 to i16
@@ -84,18 +84,18 @@ entry:
}
define dso_local i16 @t5(half %a) {
-; SDISEL-LABEL: t5:
-; SDISEL: // %bb.0: // %entry
-; SDISEL-NEXT: fcmp h0, #0.0
-; SDISEL-NEXT: csetm w0, ls
-; SDISEL-NEXT: ret
+; CHECK-SD-LABEL: t5:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: fcmp h0, #0.0
+; CHECK-SD-NEXT: csetm w0, ls
+; CHECK-SD-NEXT: ret
;
-; GISEL-LABEL: t5:
-; GISEL: // %bb.0: // %entry
-; GISEL-NEXT: fcmp h0, #0.0
-; GISEL-NEXT: cset w8, ls
-; GISEL-NEXT: sbfx w0, w8, #0, #1
-; GISEL-NEXT: ret
+; CHECK-GI-LABEL: t5:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: fcmp h0, #0.0
+; CHECK-GI-NEXT: cset w8, ls
+; CHECK-GI-NEXT: sbfx w0, w8, #0, #1
+; CHECK-GI-NEXT: ret
entry:
%0 = fcmp ole half %a, 0xH0000
%vclez = sext i1 %0 to i16
@@ -103,18 +103,18 @@ entry:
}
define dso_local i16 @t6(half %a) {
-; SDISEL-LABEL: t6:
-; SDISEL: // %bb.0: // %entry
-; SDISEL-NEXT: fcmp h0, #0.0
-; SDISEL-NEXT: csetm w0, mi
-; SDISEL-NEXT: ret
+; CHECK-SD-LABEL: t6:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: fcmp h0, #0.0
+; CHECK-SD-NEXT: csetm w0, mi
+; CHECK-SD-NEXT: ret
;
-; GISEL-LABEL: t6:
-; GISEL: // %bb.0: // %entry
-; GISEL-NEXT: fcmp h0, #0.0
-; GISEL-NEXT: cset w8, mi
-; GISEL-NEXT: sbfx w0, w8, #0, #1
-; GISEL-NEXT: ret
+; CHECK-GI-LABEL: t6:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: fcmp h0, #0.0
+; CHECK-GI-NEXT: cset w8, mi
+; CHECK-GI-NEXT: sbfx w0, w8, #0, #1
+; CHECK-GI-NEXT: ret
entry:
%0 = fcmp olt half %a, 0xH0000
%vcltz = sext i1 %0 to i16
@@ -172,15 +172,15 @@ entry:
}
define dso_local i16 @t16(half %a) {
-; SDISEL-LABEL: t16:
-; SDISEL: // %bb.0: // %entry
-; SDISEL-NEXT: fcvtzs w0, h0
-; SDISEL-NEXT: ret
+; CHECK-SD-LABEL: t16:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: fcvtzs w0, h0
+; CHECK-SD-NEXT: ret
;
-; GISEL-LABEL: t16:
-; GISEL: // %bb.0: // %entry
-; GISEL-NEXT: fcvtzu w0, h0
-; GISEL-NEXT: ret
+; CHECK-GI-LABEL: t16:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: fcvtzu w0, h0
+; CHECK-GI-NEXT: ret
entry:
%0 = fptoui half %a to i16
ret i16 %0
diff --git a/llvm/test/CodeGen/AArch64/fp16_intrinsic_scalar_2op.ll b/llvm/test/CodeGen/AArch64/fp16_intrinsic_scalar_2op.ll
index 5b08ef2..da70599 100644
--- a/llvm/test/CodeGen/AArch64/fp16_intrinsic_scalar_2op.ll
+++ b/llvm/test/CodeGen/AArch64/fp16_intrinsic_scalar_2op.ll
@@ -1,6 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=aarch64 -global-isel=0 -mattr=+v8.2a,+fullfp16 | FileCheck %s --check-prefixes=CHECK,SDISEL
-; RUN: llc < %s -mtriple=aarch64 -global-isel=1 -mattr=+v8.2a,+fullfp16 | FileCheck %s --check-prefixes=CHECK,GISEL
+; RUN: llc < %s -mtriple=aarch64 -global-isel=0 -mattr=+v8.2a,+fullfp16 | FileCheck %s --check-prefixes=CHECK,CHECK-SD
+; RUN: llc < %s -mtriple=aarch64 -global-isel=1 -mattr=+v8.2a,+fullfp16 | FileCheck %s --check-prefixes=CHECK,CHECK-GI
declare half @llvm.aarch64.sisd.fabd.f16(half, half)
@@ -35,18 +35,18 @@ entry:
}
define dso_local i16 @t_vceqh_f16(half %a, half %b) {
-; SDISEL-LABEL: t_vceqh_f16:
-; SDISEL: // %bb.0: // %entry
-; SDISEL-NEXT: fcmp h0, h1
-; SDISEL-NEXT: csetm w0, eq
-; SDISEL-NEXT: ret
+; CHECK-SD-LABEL: t_vceqh_f16:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: fcmp h0, h1
+; CHECK-SD-NEXT: csetm w0, eq
+; CHECK-SD-NEXT: ret
;
-; GISEL-LABEL: t_vceqh_f16:
-; GISEL: // %bb.0: // %entry
-; GISEL-NEXT: fcmp h0, h1
-; GISEL-NEXT: cset w8, eq
-; GISEL-NEXT: sbfx w0, w8, #0, #1
-; GISEL-NEXT: ret
+; CHECK-GI-LABEL: t_vceqh_f16:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: fcmp h0, h1
+; CHECK-GI-NEXT: cset w8, eq
+; CHECK-GI-NEXT: sbfx w0, w8, #0, #1
+; CHECK-GI-NEXT: ret
entry:
%0 = fcmp oeq half %a, %b
%vcmpd = sext i1 %0 to i16
@@ -54,18 +54,18 @@ entry:
}
define dso_local i16 @t_vcgeh_f16(half %a, half %b) {
-; SDISEL-LABEL: t_vcgeh_f16:
-; SDISEL: // %bb.0: // %entry
-; SDISEL-NEXT: fcmp h0, h1
-; SDISEL-NEXT: csetm w0, ge
-; SDISEL-NEXT: ret
+; CHECK-SD-LABEL: t_vcgeh_f16:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: fcmp h0, h1
+; CHECK-SD-NEXT: csetm w0, ge
+; CHECK-SD-NEXT: ret
;
-; GISEL-LABEL: t_vcgeh_f16:
-; GISEL: // %bb.0: // %entry
-; GISEL-NEXT: fcmp h0, h1
-; GISEL-NEXT: cset w8, ge
-; GISEL-NEXT: sbfx w0, w8, #0, #1
-; GISEL-NEXT: ret
+; CHECK-GI-LABEL: t_vcgeh_f16:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: fcmp h0, h1
+; CHECK-GI-NEXT: cset w8, ge
+; CHECK-GI-NEXT: sbfx w0, w8, #0, #1
+; CHECK-GI-NEXT: ret
entry:
%0 = fcmp oge half %a, %b
%vcmpd = sext i1 %0 to i16
@@ -73,18 +73,18 @@ entry:
}
define dso_local i16 @t_vcgth_f16(half %a, half %b) {
-; SDISEL-LABEL: t_vcgth_f16:
-; SDISEL: // %bb.0: // %entry
-; SDISEL-NEXT: fcmp h0, h1
-; SDISEL-NEXT: csetm w0, gt
-; SDISEL-NEXT: ret
+; CHECK-SD-LABEL: t_vcgth_f16:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: fcmp h0, h1
+; CHECK-SD-NEXT: csetm w0, gt
+; CHECK-SD-NEXT: ret
;
-; GISEL-LABEL: t_vcgth_f16:
-; GISEL: // %bb.0: // %entry
-; GISEL-NEXT: fcmp h0, h1
-; GISEL-NEXT: cset w8, gt
-; GISEL-NEXT: sbfx w0, w8, #0, #1
-; GISEL-NEXT: ret
+; CHECK-GI-LABEL: t_vcgth_f16:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: fcmp h0, h1
+; CHECK-GI-NEXT: cset w8, gt
+; CHECK-GI-NEXT: sbfx w0, w8, #0, #1
+; CHECK-GI-NEXT: ret
entry:
%0 = fcmp ogt half %a, %b
%vcmpd = sext i1 %0 to i16
@@ -92,18 +92,18 @@ entry:
}
define dso_local i16 @t_vcleh_f16(half %a, half %b) {
-; SDISEL-LABEL: t_vcleh_f16:
-; SDISEL: // %bb.0: // %entry
-; SDISEL-NEXT: fcmp h0, h1
-; SDISEL-NEXT: csetm w0, ls
-; SDISEL-NEXT: ret
+; CHECK-SD-LABEL: t_vcleh_f16:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: fcmp h0, h1
+; CHECK-SD-NEXT: csetm w0, ls
+; CHECK-SD-NEXT: ret
;
-; GISEL-LABEL: t_vcleh_f16:
-; GISEL: // %bb.0: // %entry
-; GISEL-NEXT: fcmp h0, h1
-; GISEL-NEXT: cset w8, ls
-; GISEL-NEXT: sbfx w0, w8, #0, #1
-; GISEL-NEXT: ret
+; CHECK-GI-LABEL: t_vcleh_f16:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: fcmp h0, h1
+; CHECK-GI-NEXT: cset w8, ls
+; CHECK-GI-NEXT: sbfx w0, w8, #0, #1
+; CHECK-GI-NEXT: ret
entry:
%0 = fcmp ole half %a, %b
%vcmpd = sext i1 %0 to i16
@@ -111,18 +111,18 @@ entry:
}
define dso_local i16 @t_vclth_f16(half %a, half %b) {
-; SDISEL-LABEL: t_vclth_f16:
-; SDISEL: // %bb.0: // %entry
-; SDISEL-NEXT: fcmp h0, h1
-; SDISEL-NEXT: csetm w0, mi
-; SDISEL-NEXT: ret
+; CHECK-SD-LABEL: t_vclth_f16:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: fcmp h0, h1
+; CHECK-SD-NEXT: csetm w0, mi
+; CHECK-SD-NEXT: ret
;
-; GISEL-LABEL: t_vclth_f16:
-; GISEL: // %bb.0: // %entry
-; GISEL-NEXT: fcmp h0, h1
-; GISEL-NEXT: cset w8, mi
-; GISEL-NEXT: sbfx w0, w8, #0, #1
-; GISEL-NEXT: ret
+; CHECK-GI-LABEL: t_vclth_f16:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: fcmp h0, h1
+; CHECK-GI-NEXT: cset w8, mi
+; CHECK-GI-NEXT: sbfx w0, w8, #0, #1
+; CHECK-GI-NEXT: ret
entry:
%0 = fcmp olt half %a, %b
%vcmpd = sext i1 %0 to i16
@@ -187,18 +187,18 @@ declare half @llvm.aarch64.neon.vcvtfxu2fp.f16.i32(i32, i32) #1
declare i32 @llvm.aarch64.neon.vcvtfp2fxu.i32.f16(half, i32) #1
define dso_local half @test_vcvth_n_f16_s16_1(i16 %a) {
-; SDISEL-LABEL: test_vcvth_n_f16_s16_1:
-; SDISEL: // %bb.0: // %entry
-; SDISEL-NEXT: fmov s0, w0
-; SDISEL-NEXT: scvtf h0, h0, #1
-; SDISEL-NEXT: ret
+; CHECK-SD-LABEL: test_vcvth_n_f16_s16_1:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: fmov s0, w0
+; CHECK-SD-NEXT: scvtf h0, h0, #1
+; CHECK-SD-NEXT: ret
;
-; GISEL-LABEL: test_vcvth_n_f16_s16_1:
-; GISEL: // %bb.0: // %entry
-; GISEL-NEXT: sxth w8, w0
-; GISEL-NEXT: fmov s0, w8
-; GISEL-NEXT: scvtf h0, h0, #1
-; GISEL-NEXT: ret
+; CHECK-GI-LABEL: test_vcvth_n_f16_s16_1:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: sxth w8, w0
+; CHECK-GI-NEXT: fmov s0, w8
+; CHECK-GI-NEXT: scvtf h0, h0, #1
+; CHECK-GI-NEXT: ret
entry:
%sext = sext i16 %a to i32
%fcvth_n = tail call half @llvm.aarch64.neon.vcvtfxs2fp.f16.i32(i32 %sext, i32 1)
@@ -206,18 +206,18 @@ entry:
}
define dso_local half @test_vcvth_n_f16_s16_16(i16 %a) {
-; SDISEL-LABEL: test_vcvth_n_f16_s16_16:
-; SDISEL: // %bb.0: // %entry
-; SDISEL-NEXT: fmov s0, w0
-; SDISEL-NEXT: scvtf h0, h0, #16
-; SDISEL-NEXT: ret
+; CHECK-SD-LABEL: test_vcvth_n_f16_s16_16:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: fmov s0, w0
+; CHECK-SD-NEXT: scvtf h0, h0, #16
+; CHECK-SD-NEXT: ret
;
-; GISEL-LABEL: test_vcvth_n_f16_s16_16:
-; GISEL: // %bb.0: // %entry
-; GISEL-NEXT: sxth w8, w0
-; GISEL-NEXT: fmov s0, w8
-; GISEL-NEXT: scvtf h0, h0, #16
-; GISEL-NEXT: ret
+; CHECK-GI-LABEL: test_vcvth_n_f16_s16_16:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: sxth w8, w0
+; CHECK-GI-NEXT: fmov s0, w8
+; CHECK-GI-NEXT: scvtf h0, h0, #16
+; CHECK-GI-NEXT: ret
entry:
%sext = sext i16 %a to i32
%fcvth_n = tail call half @llvm.aarch64.neon.vcvtfxs2fp.f16.i32(i32 %sext, i32 16)
@@ -315,18 +315,18 @@ entry:
}
define dso_local half @test_vcvth_n_f16_u16_1(i16 %a) {
-; SDISEL-LABEL: test_vcvth_n_f16_u16_1:
-; SDISEL: // %bb.0: // %entry
-; SDISEL-NEXT: fmov s0, w0
-; SDISEL-NEXT: ucvtf h0, h0, #1
-; SDISEL-NEXT: ret
+; CHECK-SD-LABEL: test_vcvth_n_f16_u16_1:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: fmov s0, w0
+; CHECK-SD-NEXT: ucvtf h0, h0, #1
+; CHECK-SD-NEXT: ret
;
-; GISEL-LABEL: test_vcvth_n_f16_u16_1:
-; GISEL: // %bb.0: // %entry
-; GISEL-NEXT: and w8, w0, #0xffff
-; GISEL-NEXT: fmov s0, w8
-; GISEL-NEXT: ucvtf h0, h0, #1
-; GISEL-NEXT: ret
+; CHECK-GI-LABEL: test_vcvth_n_f16_u16_1:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: and w8, w0, #0xffff
+; CHECK-GI-NEXT: fmov s0, w8
+; CHECK-GI-NEXT: ucvtf h0, h0, #1
+; CHECK-GI-NEXT: ret
entry:
%0 = zext i16 %a to i32
%fcvth_n = tail call half @llvm.aarch64.neon.vcvtfxu2fp.f16.i32(i32 %0, i32 1)
@@ -334,18 +334,18 @@ entry:
}
define dso_local half @test_vcvth_n_f16_u16_16(i16 %a) {
-; SDISEL-LABEL: test_vcvth_n_f16_u16_16:
-; SDISEL: // %bb.0: // %entry
-; SDISEL-NEXT: fmov s0, w0
-; SDISEL-NEXT: ucvtf h0, h0, #16
-; SDISEL-NEXT: ret
+; CHECK-SD-LABEL: test_vcvth_n_f16_u16_16:
+; CHECK-SD: // %bb.0: // %entry
+; CHECK-SD-NEXT: fmov s0, w0
+; CHECK-SD-NEXT: ucvtf h0, h0, #16
+; CHECK-SD-NEXT: ret
;
-; GISEL-LABEL: test_vcvth_n_f16_u16_16:
-; GISEL: // %bb.0: // %entry
-; GISEL-NEXT: and w8, w0, #0xffff
-; GISEL-NEXT: fmov s0, w8
-; GISEL-NEXT: ucvtf h0, h0, #16
-; GISEL-NEXT: ret
+; CHECK-GI-LABEL: test_vcvth_n_f16_u16_16:
+; CHECK-GI: // %bb.0: // %entry
+; CHECK-GI-NEXT: and w8, w0, #0xffff
+; CHECK-GI-NEXT: fmov s0, w8
+; CHECK-GI-NEXT: ucvtf h0, h0, #16
+; CHECK-GI-NEXT: ret
entry:
%0 = zext i16 %a to i32
%fcvth_n = tail call half @llvm.aarch64.neon.vcvtfxu2fp.f16.i32(i32 %0, i32 16)
diff --git a/llvm/test/CodeGen/AArch64/late-taildup-computed-goto.ll b/llvm/test/CodeGen/AArch64/late-taildup-computed-goto.ll
new file mode 100644
index 0000000..c4a027c
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/late-taildup-computed-goto.ll
@@ -0,0 +1,162 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -tail-dup-pred-size=2 -tail-dup-succ-size=2 -o - %s | FileCheck %s
+
+target triple = "arm64-apple-macosx13.0.0"
+
+@opcode.targets = local_unnamed_addr constant [6 x ptr] [ptr blockaddress(@test_interp, %op1.bb), ptr blockaddress(@test_interp, %op6.bb), ptr blockaddress(@test_interp, %loop.header), ptr blockaddress(@test_interp, %op2.bb), ptr blockaddress(@test_interp, %op4.bb), ptr blockaddress(@test_interp, %op5.bb)]
+
+define void @test_interp(ptr %frame, ptr %dst) {
+; CHECK-LABEL: test_interp:
+; CHECK: ; %bb.0: ; %entry
+; CHECK-NEXT: stp x24, x23, [sp, #-64]! ; 16-byte Folded Spill
+; CHECK-NEXT: stp x22, x21, [sp, #16] ; 16-byte Folded Spill
+; CHECK-NEXT: stp x20, x19, [sp, #32] ; 16-byte Folded Spill
+; CHECK-NEXT: stp x29, x30, [sp, #48] ; 16-byte Folded Spill
+; CHECK-NEXT: .cfi_def_cfa_offset 64
+; CHECK-NEXT: .cfi_offset w30, -8
+; CHECK-NEXT: .cfi_offset w29, -16
+; CHECK-NEXT: .cfi_offset w19, -24
+; CHECK-NEXT: .cfi_offset w20, -32
+; CHECK-NEXT: .cfi_offset w21, -40
+; CHECK-NEXT: .cfi_offset w22, -48
+; CHECK-NEXT: .cfi_offset w23, -56
+; CHECK-NEXT: .cfi_offset w24, -64
+; CHECK-NEXT: Lloh0:
+; CHECK-NEXT: adrp x21, _opcode.targets@PAGE
+; CHECK-NEXT: Lloh1:
+; CHECK-NEXT: add x21, x21, _opcode.targets@PAGEOFF
+; CHECK-NEXT: mov x22, xzr
+; CHECK-NEXT: add x8, x21, xzr, lsl #3
+; CHECK-NEXT: mov x19, x1
+; CHECK-NEXT: mov x20, x0
+; CHECK-NEXT: add x23, x22, #1
+; CHECK-NEXT: br x8
+; CHECK-NEXT: Ltmp0: ; Block address taken
+; CHECK-NEXT: LBB0_1: ; %loop.header
+; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1
+; CHECK-NEXT: add x8, x21, x23, lsl #3
+; CHECK-NEXT: mov x20, xzr
+; CHECK-NEXT: mov x22, xzr
+; CHECK-NEXT: add x23, x23, #1
+; CHECK-NEXT: br x8
+; CHECK-NEXT: Ltmp1: ; Block address taken
+; CHECK-NEXT: LBB0_2: ; %op1.bb
+; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1
+; CHECK-NEXT: str xzr, [x19]
+; CHECK-NEXT: mov w8, #1 ; =0x1
+; CHECK-NEXT: ldr x0, [x20, #-8]!
+; CHECK-NEXT: ldr x9, [x0, #8]
+; CHECK-NEXT: str x8, [x0]
+; CHECK-NEXT: ldr x8, [x9, #48]
+; CHECK-NEXT: blr x8
+; CHECK-NEXT: add x8, x21, x23, lsl #3
+; CHECK-NEXT: add x23, x23, #1
+; CHECK-NEXT: br x8
+; CHECK-NEXT: Ltmp2: ; Block address taken
+; CHECK-NEXT: LBB0_3: ; %op2.bb
+; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1
+; CHECK-NEXT: add x8, x21, x23, lsl #3
+; CHECK-NEXT: mov x20, xzr
+; CHECK-NEXT: add x23, x23, #1
+; CHECK-NEXT: str x22, [x19]
+; CHECK-NEXT: mov x22, xzr
+; CHECK-NEXT: br x8
+; CHECK-NEXT: Ltmp3: ; Block address taken
+; CHECK-NEXT: LBB0_4: ; %op4.bb
+; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1
+; CHECK-NEXT: str x22, [x19]
+; CHECK-NEXT: add x10, x21, x23, lsl #3
+; CHECK-NEXT: add x23, x23, #1
+; CHECK-NEXT: ldur x8, [x22, #12]
+; CHECK-NEXT: ldur x9, [x20, #-8]
+; CHECK-NEXT: add x22, x22, #20
+; CHECK-NEXT: stp x8, x9, [x20, #-8]
+; CHECK-NEXT: add x20, x20, #8
+; CHECK-NEXT: br x10
+; CHECK-NEXT: Ltmp4: ; Block address taken
+; CHECK-NEXT: LBB0_5: ; %op5.bb
+; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1
+; CHECK-NEXT: str x22, [x19]
+; CHECK-NEXT: add x10, x21, x23, lsl #3
+; CHECK-NEXT: add x23, x23, #1
+; CHECK-NEXT: ldur x8, [x22, #12]
+; CHECK-NEXT: ldur x9, [x20, #-8]
+; CHECK-NEXT: add x22, x22, #20
+; CHECK-NEXT: stp x8, x9, [x20, #-8]
+; CHECK-NEXT: add x20, x20, #8
+; CHECK-NEXT: br x10
+; CHECK-NEXT: Ltmp5: ; Block address taken
+; CHECK-NEXT: LBB0_6: ; %op6.bb
+; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1
+; CHECK-NEXT: ldr x0, [x20, #-8]!
+; CHECK-NEXT: mov w8, #1 ; =0x1
+; CHECK-NEXT: ldr x9, [x0, #8]
+; CHECK-NEXT: str x8, [x0]
+; CHECK-NEXT: ldr x8, [x9, #48]
+; CHECK-NEXT: blr x8
+; CHECK-NEXT: add x8, x21, x23, lsl #3
+; CHECK-NEXT: add x23, x23, #1
+; CHECK-NEXT: br x8
+; CHECK-NEXT: .loh AdrpAdd Lloh0, Lloh1
+entry:
+ br label %loop.header
+
+loop.header:
+ %iv = phi i64 [ 0, %entry ], [ %iv.next, %op1.bb ], [ %iv.next, %op2.bb ], [ %iv.next, %op4.bb ], [ %iv.next, %op5.bb ], [ %iv.next, %op6.bb ], [ %iv.next, %loop.header ]
+ %stack.pointer = phi ptr [ %frame, %entry ], [ %stack.8, %op1.bb ], [ null, %op2.bb ], [ %stack.next, %op4.bb ], [ %stack.next.2, %op5.bb ], [ %stack.4, %op6.bb ], [ null, %loop.header ]
+ %next.instr = phi ptr [ null, %entry ], [ %next.instr, %op1.bb ], [ null, %op2.bb ], [ %next.instr.20, %op4.bb ], [ %next.instr.21, %op5.bb ], [ %next.instr, %op6.bb ], [ null, %loop.header ]
+ %iv.next = add i64 %iv, 1
+ %next_op = getelementptr [6 x ptr], ptr @opcode.targets, i64 0, i64 %iv
+ indirectbr ptr %next_op, [label %op1.bb, label %op6.bb, label %loop.header, label %op2.bb, label %op4.bb, label %op5.bb]
+
+op1.bb:
+ store ptr null, ptr %dst, align 8
+ %stack.8 = getelementptr i8, ptr %stack.pointer, i64 -8
+ %l.0 = load ptr, ptr %stack.8, align 8
+ store i64 1, ptr %l.0, align 8
+ %gep.0 = getelementptr i8, ptr %l.0, i64 8
+ %l.1 = load ptr, ptr %gep.0, align 8
+ %gep.1 = getelementptr i8, ptr %l.1, i64 48
+ %l.2 = load ptr, ptr %gep.1, align 8
+ tail call void %l.2(ptr nonnull %l.0)
+ br label %loop.header
+
+op2.bb:
+ store ptr %next.instr, ptr %dst, align 8
+ br label %loop.header
+
+op4.bb:
+ store ptr %next.instr, ptr %dst, align 8
+ %next.instr.20 = getelementptr i8, ptr %next.instr, i64 20
+ %stack.2 = getelementptr i8, ptr %stack.pointer, i64 -8
+ %l.3 = load ptr, ptr %stack.2, align 8
+ %next.instr.12 = getelementptr i8, ptr %next.instr, i64 12
+ %next.instr.12.val = load ptr, ptr %next.instr.12, align 2
+ store ptr %next.instr.12.val, ptr %stack.2, align 8
+ store ptr %l.3, ptr %stack.pointer, align 8
+ %stack.next = getelementptr i8, ptr %stack.pointer, i64 8
+ br label %loop.header
+
+op5.bb:
+ store ptr %next.instr, ptr %dst, align 8
+ %next.instr.21 = getelementptr i8, ptr %next.instr, i64 20
+ %stack.3 = getelementptr i8, ptr %stack.pointer, i64 -8
+ %l.4 = load ptr, ptr %stack.3, align 8
+ %next.instr.2 = getelementptr i8, ptr %next.instr, i64 12
+ %next.instr.2.val = load ptr, ptr %next.instr.2, align 2
+ store ptr %next.instr.2.val, ptr %stack.3, align 8
+ store ptr %l.4, ptr %stack.pointer, align 8
+ %stack.next.2 = getelementptr i8, ptr %stack.pointer, i64 8
+ br label %loop.header
+
+op6.bb:
+ %stack.4 = getelementptr i8, ptr %stack.pointer, i64 -8
+ %l.5 = load ptr, ptr %stack.4, align 8
+ store i64 1, ptr %l.5, align 8
+ %gep.5 = getelementptr i8, ptr %l.5, i64 8
+ %l.6 = load ptr, ptr %gep.5, align 8
+ %gep.6 = getelementptr i8, ptr %l.6, i64 48
+ %l.7 = load ptr, ptr %gep.6, align 8
+ tail call void %l.7(ptr nonnull %l.5)
+ br label %loop.header
+}
diff --git a/llvm/test/CodeGen/AArch64/machine-combiner-reassociate.mir b/llvm/test/CodeGen/AArch64/machine-combiner-reassociate.mir
index 525f6dd..184c9ef 100644
--- a/llvm/test/CodeGen/AArch64/machine-combiner-reassociate.mir
+++ b/llvm/test/CodeGen/AArch64/machine-combiner-reassociate.mir
@@ -1,14 +1,11 @@
-# RUN: llc -run-pass=machine-combiner -mtriple=aarch64-unknown-linux-gnu %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-SAFE
-# RUN: llc -run-pass=machine-combiner -mtriple=aarch64-unknown-linux-gnu -enable-unsafe-fp-math %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-UNSAFE
+# RUN: llc -run-pass=machine-combiner -mtriple=aarch64-unknown-linux-gnu %s -o - | FileCheck %s
# fadd without the reassoc flags can be reassociate only when unsafe fp math is
# enabled.
# CHECK-LABEL: name: fadd_no_reassoc
# CHECK: [[ADD1:%[0-9]+]]:fpr32 = FADDSrr %0, %1, implicit $fpcr
-# CHECK-SAFE-NEXT: [[ADD2:%[0-9]+]]:fpr32 = FADDSrr killed [[ADD1]], %2, implicit $fpcr
-# CHECK-SAFE-NEXT: [[ADD3:%[0-9]+]]:fpr32 = FADDSrr killed [[ADD2]], %3, implicit $fpcr
-# CHECK-UNSAFE-NEXT: [[ADD2:%[0-9]+]]:fpr32 = FADDSrr %2, %3, implicit $fpcr
-# CHECK-UNSAFE-NEXT: [[ADD3:%[0-9]+]]:fpr32 = FADDSrr killed [[ADD1]], killed [[ADD2]], implicit $fpcr
+# CHECK: [[ADD2:%[0-9]+]]:fpr32 = FADDSrr killed [[ADD1]], %2, implicit $fpcr
+# CHECK: [[ADD3:%[0-9]+]]:fpr32 = FADDSrr killed [[ADD2]], %3, implicit $fpcr
---
name: fadd_no_reassoc
alignment: 4
@@ -49,10 +46,9 @@ body: |
# the reassoc flag is ignored.
# CHECK-LABEL: name: fadd_reassoc
# CHECK: [[ADD1:%[0-9]+]]:fpr32 = reassoc FADDSrr %0, %1, implicit $fpcr
-# CHECK-SAFE-NEXT: [[ADD2:%[0-9]+]]:fpr32 = reassoc FADDSrr killed [[ADD1]], %2, implicit $fpcr
-# CHECK-SAFE-NEXT: [[ADD3:%[0-9]+]]:fpr32 = reassoc FADDSrr killed [[ADD2]], %3, implicit $fpcr
-# CHECK-UNSAFE-NEXT: [[ADD2:%[0-9]+]]:fpr32 = reassoc FADDSrr %2, %3, implicit $fpcr
-# CHECK-UNSAFE-NEXT: [[ADD3:%[0-9]+]]:fpr32 = reassoc FADDSrr killed [[ADD1]], killed [[ADD2]], implicit $fpcr
+# CHECK: [[ADD2:%[0-9]+]]:fpr32 = reassoc FADDSrr killed [[ADD1]], %2, implicit $fpcr
+# CHECK: [[ADD3:%[0-9]+]]:fpr32 = reassoc FADDSrr killed [[ADD2]], %3, implicit $fpcr
+
---
name: fadd_reassoc
alignment: 4
@@ -92,10 +88,8 @@ body: |
# Check that flags on the instructions are preserved after reassociation.
# CHECK-LABEL: name: fadd_flags
# CHECK: [[ADD1:%[0-9]+]]:fpr32 = nnan ninf nsz FADDSrr %0, %1, implicit $fpcr
-# CHECK-SAFE-NEXT: [[ADD2:%[0-9]+]]:fpr32 = nnan nsz FADDSrr killed [[ADD1]], %2, implicit $fpcr
-# CHECK-SAFE-NEXT: [[ADD3:%[0-9]+]]:fpr32 = ninf nsz FADDSrr killed [[ADD2]], %3, implicit $fpcr
-# CHECK-UNSAFE-NEXT: [[ADD2:%[0-9]+]]:fpr32 = nsz FADDSrr %2, %3, implicit $fpcr
-# CHECK-UNSAFE-NEXT: [[ADD3:%[0-9]+]]:fpr32 = nsz FADDSrr killed [[ADD1]], killed [[ADD2]], implicit $fpcr
+# CHECK: [[ADD2:%[0-9]+]]:fpr32 = nnan nsz FADDSrr killed [[ADD1]], %2, implicit $fpcr
+# CHECK: [[ADD3:%[0-9]+]]:fpr32 = ninf nsz FADDSrr killed [[ADD2]], %3, implicit $fpcr
---
name: fadd_flags
alignment: 4
diff --git a/llvm/test/CodeGen/AArch64/machine-combiner.ll b/llvm/test/CodeGen/AArch64/machine-combiner.ll
index ec61fee..65afd92 100644
--- a/llvm/test/CodeGen/AArch64/machine-combiner.ll
+++ b/llvm/test/CodeGen/AArch64/machine-combiner.ll
@@ -1,29 +1,21 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=aarch64-gnu-linux -mcpu=neoverse-n2 < %s | FileCheck %s --check-prefixes=CHECK,CHECK-STD
-; RUN: llc -mtriple=aarch64-gnu-linux -mcpu=neoverse-n2 -enable-unsafe-fp-math < %s | FileCheck %s --check-prefixes=CHECK,CHECK-UNSAFE
+; RUN: llc -mtriple=aarch64-gnu-linux -mcpu=neoverse-n2 < %s | FileCheck %s
; Incremental updates of the instruction depths should be enough for this test
; case.
-; RUN: llc -mtriple=aarch64-gnu-linux -mcpu=neoverse-n2 -enable-unsafe-fp-math \
-; RUN: -machine-combiner-inc-threshold=0 -machine-combiner-verify-pattern-order=true < %s | FileCheck %s --check-prefixes=CHECK,CHECK-UNSAFE
+; RUN: llc -mtriple=aarch64-gnu-linux -mcpu=neoverse-n2 \
+; RUN: -machine-combiner-inc-threshold=0 -machine-combiner-verify-pattern-order=true < %s | FileCheck %s
; Verify that the first two adds are independent regardless of how the inputs are
; commuted. The destination registers are used as source registers for the third add.
define float @reassociate_adds1(float %x0, float %x1, float %x2, float %x3) {
-; CHECK-STD-LABEL: reassociate_adds1:
-; CHECK-STD: // %bb.0:
-; CHECK-STD-NEXT: fadd s0, s0, s1
-; CHECK-STD-NEXT: fadd s0, s0, s2
-; CHECK-STD-NEXT: fadd s0, s0, s3
-; CHECK-STD-NEXT: ret
-;
-; CHECK-UNSAFE-LABEL: reassociate_adds1:
-; CHECK-UNSAFE: // %bb.0:
-; CHECK-UNSAFE-NEXT: fadd s0, s0, s1
-; CHECK-UNSAFE-NEXT: fadd s1, s2, s3
-; CHECK-UNSAFE-NEXT: fadd s0, s0, s1
-; CHECK-UNSAFE-NEXT: ret
+; CHECK-LABEL: reassociate_adds1:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fadd s0, s0, s1
+; CHECK-NEXT: fadd s0, s0, s2
+; CHECK-NEXT: fadd s0, s0, s3
+; CHECK-NEXT: ret
%t0 = fadd float %x0, %x1
%t1 = fadd float %t0, %x2
%t2 = fadd float %t1, %x3
@@ -44,110 +36,110 @@ define float @reassociate_adds1_fast(float %x0, float %x1, float %x2, float %x3)
}
define float @reassociate_adds1_reassoc(float %x0, float %x1, float %x2, float %x3) {
-; CHECK-STD-LABEL: reassociate_adds1_reassoc:
-; CHECK-STD: // %bb.0:
-; CHECK-STD-NEXT: fadd s0, s0, s1
-; CHECK-STD-NEXT: fadd s0, s0, s2
-; CHECK-STD-NEXT: fadd s0, s0, s3
-; CHECK-STD-NEXT: ret
-;
-; CHECK-UNSAFE-LABEL: reassociate_adds1_reassoc:
-; CHECK-UNSAFE: // %bb.0:
-; CHECK-UNSAFE-NEXT: fadd s0, s0, s1
-; CHECK-UNSAFE-NEXT: fadd s1, s2, s3
-; CHECK-UNSAFE-NEXT: fadd s0, s0, s1
-; CHECK-UNSAFE-NEXT: ret
- %t0 = fadd reassoc float %x0, %x1
- %t1 = fadd reassoc float %t0, %x2
- %t2 = fadd reassoc float %t1, %x3
+; CHECK-LABEL: reassociate_adds1_reassoc:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fadd s0, s0, s1
+; CHECK-NEXT: fadd s1, s2, s3
+; CHECK-NEXT: fadd s0, s0, s1
+; CHECK-NEXT: ret
+ %t0 = fadd reassoc nsz float %x0, %x1
+ %t1 = fadd reassoc nsz float %t0, %x2
+ %t2 = fadd reassoc nsz float %t1, %x3
ret float %t2
}
define float @reassociate_adds2(float %x0, float %x1, float %x2, float %x3) {
-; CHECK-STD-LABEL: reassociate_adds2:
-; CHECK-STD: // %bb.0:
-; CHECK-STD-NEXT: fadd s0, s0, s1
-; CHECK-STD-NEXT: fadd s0, s2, s0
-; CHECK-STD-NEXT: fadd s0, s0, s3
-; CHECK-STD-NEXT: ret
-;
-; CHECK-UNSAFE-LABEL: reassociate_adds2:
-; CHECK-UNSAFE: // %bb.0:
-; CHECK-UNSAFE-NEXT: fadd s0, s0, s1
-; CHECK-UNSAFE-NEXT: fadd s1, s2, s3
-; CHECK-UNSAFE-NEXT: fadd s0, s1, s0
-; CHECK-UNSAFE-NEXT: ret
+; CHECK-LABEL: reassociate_adds2:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fadd s0, s0, s1
+; CHECK-NEXT: fadd s0, s2, s0
+; CHECK-NEXT: fadd s0, s0, s3
+; CHECK-NEXT: ret
%t0 = fadd float %x0, %x1
%t1 = fadd float %x2, %t0
%t2 = fadd float %t1, %x3
ret float %t2
}
+define float @reassociate_adds2_reassoc(float %x0, float %x1, float %x2, float %x3) {
+; CHECK-LABEL: reassociate_adds2_reassoc:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fadd s0, s0, s1
+; CHECK-NEXT: fadd s1, s2, s3
+; CHECK-NEXT: fadd s0, s1, s0
+; CHECK-NEXT: ret
+ %t0 = fadd reassoc nsz float %x0, %x1
+ %t1 = fadd reassoc nsz float %x2, %t0
+ %t2 = fadd reassoc nsz float %t1, %x3
+ ret float %t2
+}
+
define float @reassociate_adds3(float %x0, float %x1, float %x2, float %x3) {
-; CHECK-STD-LABEL: reassociate_adds3:
-; CHECK-STD: // %bb.0:
-; CHECK-STD-NEXT: fadd s0, s0, s1
-; CHECK-STD-NEXT: fadd s0, s0, s2
-; CHECK-STD-NEXT: fadd s0, s3, s0
-; CHECK-STD-NEXT: ret
-;
-; CHECK-UNSAFE-LABEL: reassociate_adds3:
-; CHECK-UNSAFE: // %bb.0:
-; CHECK-UNSAFE-NEXT: fadd s0, s0, s1
-; CHECK-UNSAFE-NEXT: fadd s1, s3, s2
-; CHECK-UNSAFE-NEXT: fadd s0, s1, s0
-; CHECK-UNSAFE-NEXT: ret
+; CHECK-LABEL: reassociate_adds3:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fadd s0, s0, s1
+; CHECK-NEXT: fadd s0, s0, s2
+; CHECK-NEXT: fadd s0, s3, s0
+; CHECK-NEXT: ret
%t0 = fadd float %x0, %x1
%t1 = fadd float %t0, %x2
%t2 = fadd float %x3, %t1
ret float %t2
}
+define float @reassociate_adds3_reassoc(float %x0, float %x1, float %x2, float %x3) {
+; CHECK-LABEL: reassociate_adds3_reassoc:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fadd s0, s0, s1
+; CHECK-NEXT: fadd s1, s3, s2
+; CHECK-NEXT: fadd s0, s1, s0
+; CHECK-NEXT: ret
+ %t0 = fadd reassoc nsz float %x0, %x1
+ %t1 = fadd reassoc nsz float %t0, %x2
+ %t2 = fadd reassoc nsz float %x3, %t1
+ ret float %t2
+}
+
define float @reassociate_adds4(float %x0, float %x1, float %x2, float %x3) {
-; CHECK-STD-LABEL: reassociate_adds4:
-; CHECK-STD: // %bb.0:
-; CHECK-STD-NEXT: fadd s0, s0, s1
-; CHECK-STD-NEXT: fadd s0, s2, s0
-; CHECK-STD-NEXT: fadd s0, s3, s0
-; CHECK-STD-NEXT: ret
-;
-; CHECK-UNSAFE-LABEL: reassociate_adds4:
-; CHECK-UNSAFE: // %bb.0:
-; CHECK-UNSAFE-NEXT: fadd s0, s0, s1
-; CHECK-UNSAFE-NEXT: fadd s1, s3, s2
-; CHECK-UNSAFE-NEXT: fadd s0, s1, s0
-; CHECK-UNSAFE-NEXT: ret
+; CHECK-LABEL: reassociate_adds4:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fadd s0, s0, s1
+; CHECK-NEXT: fadd s0, s2, s0
+; CHECK-NEXT: fadd s0, s3, s0
+; CHECK-NEXT: ret
%t0 = fadd float %x0, %x1
%t1 = fadd float %x2, %t0
%t2 = fadd float %x3, %t1
ret float %t2
}
+define float @reassociate_adds4_reassoc(float %x0, float %x1, float %x2, float %x3) {
+; CHECK-LABEL: reassociate_adds4_reassoc:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fadd s0, s0, s1
+; CHECK-NEXT: fadd s1, s3, s2
+; CHECK-NEXT: fadd s0, s1, s0
+; CHECK-NEXT: ret
+ %t0 = fadd reassoc nsz float %x0, %x1
+ %t1 = fadd reassoc nsz float %x2, %t0
+ %t2 = fadd reassoc nsz float %x3, %t1
+ ret float %t2
+}
+
; Verify that we reassociate some of these ops. The optimal balanced tree of adds is not
; produced because that would cost more compile time.
define float @reassociate_adds5(float %x0, float %x1, float %x2, float %x3, float %x4, float %x5, float %x6, float %x7) {
-; CHECK-STD-LABEL: reassociate_adds5:
-; CHECK-STD: // %bb.0:
-; CHECK-STD-NEXT: fadd s0, s0, s1
-; CHECK-STD-NEXT: fadd s0, s0, s2
-; CHECK-STD-NEXT: fadd s0, s0, s3
-; CHECK-STD-NEXT: fadd s0, s0, s4
-; CHECK-STD-NEXT: fadd s0, s0, s5
-; CHECK-STD-NEXT: fadd s0, s0, s6
-; CHECK-STD-NEXT: fadd s0, s0, s7
-; CHECK-STD-NEXT: ret
-;
-; CHECK-UNSAFE-LABEL: reassociate_adds5:
-; CHECK-UNSAFE: // %bb.0:
-; CHECK-UNSAFE-NEXT: fadd s0, s0, s1
-; CHECK-UNSAFE-NEXT: fadd s1, s2, s3
-; CHECK-UNSAFE-NEXT: fadd s0, s0, s1
-; CHECK-UNSAFE-NEXT: fadd s1, s4, s5
-; CHECK-UNSAFE-NEXT: fadd s1, s1, s6
-; CHECK-UNSAFE-NEXT: fadd s0, s0, s1
-; CHECK-UNSAFE-NEXT: fadd s0, s0, s7
-; CHECK-UNSAFE-NEXT: ret
+; CHECK-LABEL: reassociate_adds5:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fadd s0, s0, s1
+; CHECK-NEXT: fadd s0, s0, s2
+; CHECK-NEXT: fadd s0, s0, s3
+; CHECK-NEXT: fadd s0, s0, s4
+; CHECK-NEXT: fadd s0, s0, s5
+; CHECK-NEXT: fadd s0, s0, s6
+; CHECK-NEXT: fadd s0, s0, s7
+; CHECK-NEXT: ret
%t0 = fadd float %x0, %x1
%t1 = fadd float %t0, %x2
%t2 = fadd float %t1, %x3
@@ -158,141 +150,198 @@ define float @reassociate_adds5(float %x0, float %x1, float %x2, float %x3, floa
ret float %t6
}
+define float @reassociate_adds5_reassoc(float %x0, float %x1, float %x2, float %x3, float %x4, float %x5, float %x6, float %x7) {
+; CHECK-LABEL: reassociate_adds5_reassoc:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fadd s0, s0, s1
+; CHECK-NEXT: fadd s1, s2, s3
+; CHECK-NEXT: fadd s0, s0, s1
+; CHECK-NEXT: fadd s1, s4, s5
+; CHECK-NEXT: fadd s1, s1, s6
+; CHECK-NEXT: fadd s0, s0, s1
+; CHECK-NEXT: fadd s0, s0, s7
+; CHECK-NEXT: ret
+ %t0 = fadd reassoc nsz float %x0, %x1
+ %t1 = fadd reassoc nsz float %t0, %x2
+ %t2 = fadd reassoc nsz float %t1, %x3
+ %t3 = fadd reassoc nsz float %t2, %x4
+ %t4 = fadd reassoc nsz float %t3, %x5
+ %t5 = fadd reassoc nsz float %t4, %x6
+ %t6 = fadd reassoc nsz float %t5, %x7
+ ret float %t6
+}
+
; Verify that we only need two associative operations to reassociate the operands.
; Also, we should reassociate such that the result of the high latency division
; is used by the final 'add' rather than reassociating the %x3 operand with the
; division. The latter reassociation would not improve anything.
define float @reassociate_adds6(float %x0, float %x1, float %x2, float %x3) {
-; CHECK-STD-LABEL: reassociate_adds6:
-; CHECK-STD: // %bb.0:
-; CHECK-STD-NEXT: fdiv s0, s0, s1
-; CHECK-STD-NEXT: fadd s0, s2, s0
-; CHECK-STD-NEXT: fadd s0, s3, s0
-; CHECK-STD-NEXT: ret
-;
-; CHECK-UNSAFE-LABEL: reassociate_adds6:
-; CHECK-UNSAFE: // %bb.0:
-; CHECK-UNSAFE-NEXT: fdiv s0, s0, s1
-; CHECK-UNSAFE-NEXT: fadd s1, s3, s2
-; CHECK-UNSAFE-NEXT: fadd s0, s1, s0
-; CHECK-UNSAFE-NEXT: ret
+; CHECK-LABEL: reassociate_adds6:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fdiv s0, s0, s1
+; CHECK-NEXT: fadd s0, s2, s0
+; CHECK-NEXT: fadd s0, s3, s0
+; CHECK-NEXT: ret
%t0 = fdiv float %x0, %x1
%t1 = fadd float %x2, %t0
%t2 = fadd float %x3, %t1
ret float %t2
}
+define float @reassociate_adds6_reassoc(float %x0, float %x1, float %x2, float %x3) {
+; CHECK-LABEL: reassociate_adds6_reassoc:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fdiv s0, s0, s1
+; CHECK-NEXT: fadd s1, s3, s2
+; CHECK-NEXT: fadd s0, s1, s0
+; CHECK-NEXT: ret
+ %t0 = fdiv reassoc nsz float %x0, %x1
+ %t1 = fadd reassoc nsz float %x2, %t0
+ %t2 = fadd reassoc nsz float %x3, %t1
+ ret float %t2
+}
+
; Verify that scalar single-precision multiplies are reassociated.
define float @reassociate_muls1(float %x0, float %x1, float %x2, float %x3) {
-; CHECK-STD-LABEL: reassociate_muls1:
-; CHECK-STD: // %bb.0:
-; CHECK-STD-NEXT: fdiv s0, s0, s1
-; CHECK-STD-NEXT: fmul s0, s2, s0
-; CHECK-STD-NEXT: fmul s0, s3, s0
-; CHECK-STD-NEXT: ret
-;
-; CHECK-UNSAFE-LABEL: reassociate_muls1:
-; CHECK-UNSAFE: // %bb.0:
-; CHECK-UNSAFE-NEXT: fdiv s0, s0, s1
-; CHECK-UNSAFE-NEXT: fmul s1, s3, s2
-; CHECK-UNSAFE-NEXT: fmul s0, s1, s0
-; CHECK-UNSAFE-NEXT: ret
+; CHECK-LABEL: reassociate_muls1:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fdiv s0, s0, s1
+; CHECK-NEXT: fmul s0, s2, s0
+; CHECK-NEXT: fmul s0, s3, s0
+; CHECK-NEXT: ret
%t0 = fdiv float %x0, %x1
%t1 = fmul float %x2, %t0
%t2 = fmul float %x3, %t1
ret float %t2
}
+define float @reassociate_muls1_reassoc(float %x0, float %x1, float %x2, float %x3) {
+; CHECK-LABEL: reassociate_muls1_reassoc:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fdiv s0, s0, s1
+; CHECK-NEXT: fmul s1, s3, s2
+; CHECK-NEXT: fmul s0, s1, s0
+; CHECK-NEXT: ret
+ %t0 = fdiv reassoc nsz float %x0, %x1
+ %t1 = fmul reassoc nsz float %x2, %t0
+ %t2 = fmul reassoc nsz float %x3, %t1
+ ret float %t2
+}
+
; Verify that scalar double-precision adds are reassociated.
define double @reassociate_adds_double(double %x0, double %x1, double %x2, double %x3) {
-; CHECK-STD-LABEL: reassociate_adds_double:
-; CHECK-STD: // %bb.0:
-; CHECK-STD-NEXT: fdiv d0, d0, d1
-; CHECK-STD-NEXT: fadd d0, d2, d0
-; CHECK-STD-NEXT: fadd d0, d3, d0
-; CHECK-STD-NEXT: ret
-;
-; CHECK-UNSAFE-LABEL: reassociate_adds_double:
-; CHECK-UNSAFE: // %bb.0:
-; CHECK-UNSAFE-NEXT: fdiv d0, d0, d1
-; CHECK-UNSAFE-NEXT: fadd d1, d3, d2
-; CHECK-UNSAFE-NEXT: fadd d0, d1, d0
-; CHECK-UNSAFE-NEXT: ret
+; CHECK-LABEL: reassociate_adds_double:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fdiv d0, d0, d1
+; CHECK-NEXT: fadd d0, d2, d0
+; CHECK-NEXT: fadd d0, d3, d0
+; CHECK-NEXT: ret
%t0 = fdiv double %x0, %x1
%t1 = fadd double %x2, %t0
%t2 = fadd double %x3, %t1
ret double %t2
}
+define double @reassociate_adds_double_reassoc(double %x0, double %x1, double %x2, double %x3) {
+; CHECK-LABEL: reassociate_adds_double_reassoc:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fdiv d0, d0, d1
+; CHECK-NEXT: fadd d1, d3, d2
+; CHECK-NEXT: fadd d0, d1, d0
+; CHECK-NEXT: ret
+ %t0 = fdiv reassoc nsz double %x0, %x1
+ %t1 = fadd reassoc nsz double %x2, %t0
+ %t2 = fadd reassoc nsz double %x3, %t1
+ ret double %t2
+}
+
; Verify that scalar double-precision multiplies are reassociated.
define double @reassociate_muls_double(double %x0, double %x1, double %x2, double %x3) {
-; CHECK-STD-LABEL: reassociate_muls_double:
-; CHECK-STD: // %bb.0:
-; CHECK-STD-NEXT: fdiv d0, d0, d1
-; CHECK-STD-NEXT: fmul d0, d2, d0
-; CHECK-STD-NEXT: fmul d0, d3, d0
-; CHECK-STD-NEXT: ret
-;
-; CHECK-UNSAFE-LABEL: reassociate_muls_double:
-; CHECK-UNSAFE: // %bb.0:
-; CHECK-UNSAFE-NEXT: fdiv d0, d0, d1
-; CHECK-UNSAFE-NEXT: fmul d1, d3, d2
-; CHECK-UNSAFE-NEXT: fmul d0, d1, d0
-; CHECK-UNSAFE-NEXT: ret
+; CHECK-LABEL: reassociate_muls_double:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fdiv d0, d0, d1
+; CHECK-NEXT: fmul d0, d2, d0
+; CHECK-NEXT: fmul d0, d3, d0
+; CHECK-NEXT: ret
%t0 = fdiv double %x0, %x1
%t1 = fmul double %x2, %t0
%t2 = fmul double %x3, %t1
ret double %t2
}
+define double @reassociate_muls_double_reassoc(double %x0, double %x1, double %x2, double %x3) {
+; CHECK-LABEL: reassociate_muls_double_reassoc:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fdiv d0, d0, d1
+; CHECK-NEXT: fmul d1, d3, d2
+; CHECK-NEXT: fmul d0, d1, d0
+; CHECK-NEXT: ret
+ %t0 = fdiv reassoc nsz double %x0, %x1
+ %t1 = fmul reassoc nsz double %x2, %t0
+ %t2 = fmul reassoc nsz double %x3, %t1
+ ret double %t2
+}
+
; Verify that scalar half-precision adds are reassociated.
define half @reassociate_adds_half(half %x0, half %x1, half %x2, half %x3) {
-; CHECK-STD-LABEL: reassociate_adds_half:
-; CHECK-STD: // %bb.0:
-; CHECK-STD-NEXT: fdiv h0, h0, h1
-; CHECK-STD-NEXT: fadd h0, h2, h0
-; CHECK-STD-NEXT: fadd h0, h3, h0
-; CHECK-STD-NEXT: ret
-;
-; CHECK-UNSAFE-LABEL: reassociate_adds_half:
-; CHECK-UNSAFE: // %bb.0:
-; CHECK-UNSAFE-NEXT: fdiv h0, h0, h1
-; CHECK-UNSAFE-NEXT: fadd h1, h3, h2
-; CHECK-UNSAFE-NEXT: fadd h0, h1, h0
-; CHECK-UNSAFE-NEXT: ret
+; CHECK-LABEL: reassociate_adds_half:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fdiv h0, h0, h1
+; CHECK-NEXT: fadd h0, h2, h0
+; CHECK-NEXT: fadd h0, h3, h0
+; CHECK-NEXT: ret
%t0 = fdiv half %x0, %x1
%t1 = fadd half %x2, %t0
%t2 = fadd half %x3, %t1
ret half %t2
}
+define half @reassociate_adds_half_reassoc(half %x0, half %x1, half %x2, half %x3) {
+; CHECK-LABEL: reassociate_adds_half_reassoc:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fdiv h0, h0, h1
+; CHECK-NEXT: fadd h1, h3, h2
+; CHECK-NEXT: fadd h0, h1, h0
+; CHECK-NEXT: ret
+ %t0 = fdiv reassoc nsz half %x0, %x1
+ %t1 = fadd reassoc nsz half %x2, %t0
+ %t2 = fadd reassoc nsz half %x3, %t1
+ ret half %t2
+}
+
; Verify that scalar half-precision multiplies are reassociated.
define half @reassociate_muls_half(half %x0, half %x1, half %x2, half %x3) {
-; CHECK-STD-LABEL: reassociate_muls_half:
-; CHECK-STD: // %bb.0:
-; CHECK-STD-NEXT: fdiv h0, h0, h1
-; CHECK-STD-NEXT: fmul h0, h2, h0
-; CHECK-STD-NEXT: fmul h0, h3, h0
-; CHECK-STD-NEXT: ret
-;
-; CHECK-UNSAFE-LABEL: reassociate_muls_half:
-; CHECK-UNSAFE: // %bb.0:
-; CHECK-UNSAFE-NEXT: fdiv h0, h0, h1
-; CHECK-UNSAFE-NEXT: fmul h1, h3, h2
-; CHECK-UNSAFE-NEXT: fmul h0, h1, h0
-; CHECK-UNSAFE-NEXT: ret
+; CHECK-LABEL: reassociate_muls_half:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fdiv h0, h0, h1
+; CHECK-NEXT: fmul h0, h2, h0
+; CHECK-NEXT: fmul h0, h3, h0
+; CHECK-NEXT: ret
%t0 = fdiv half %x0, %x1
%t1 = fmul half %x2, %t0
%t2 = fmul half %x3, %t1
ret half %t2
}
+define half @reassociate_muls_half_reassoc(half %x0, half %x1, half %x2, half %x3) {
+; CHECK-LABEL: reassociate_muls_half_reassoc:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fdiv h0, h0, h1
+; CHECK-NEXT: fmul h1, h3, h2
+; CHECK-NEXT: fmul h0, h1, h0
+; CHECK-NEXT: ret
+ %t0 = fdiv reassoc nsz half %x0, %x1
+ %t1 = fmul reassoc nsz half %x2, %t0
+ %t2 = fmul reassoc nsz half %x3, %t1
+ ret half %t2
+}
+
; Verify that scalar integer adds are reassociated.
define i32 @reassociate_adds_i32(i32 %x0, i32 %x1, i32 %x2, i32 %x3) {
@@ -365,173 +414,222 @@ define i32 @reassociate_xors_i32(i32 %x0, i32 %x1, i32 %x2, i32 %x3) {
; Verify that we reassociate vector instructions too.
define <4 x float> @vector_reassociate_adds1(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, <4 x float> %x3) {
-; CHECK-STD-LABEL: vector_reassociate_adds1:
-; CHECK-STD: // %bb.0:
-; CHECK-STD-NEXT: fadd v0.4s, v0.4s, v1.4s
-; CHECK-STD-NEXT: fadd v0.4s, v0.4s, v2.4s
-; CHECK-STD-NEXT: fadd v0.4s, v0.4s, v3.4s
-; CHECK-STD-NEXT: ret
-;
-; CHECK-UNSAFE-LABEL: vector_reassociate_adds1:
-; CHECK-UNSAFE: // %bb.0:
-; CHECK-UNSAFE-NEXT: fadd v0.4s, v0.4s, v1.4s
-; CHECK-UNSAFE-NEXT: fadd v1.4s, v2.4s, v3.4s
-; CHECK-UNSAFE-NEXT: fadd v0.4s, v0.4s, v1.4s
-; CHECK-UNSAFE-NEXT: ret
+; CHECK-LABEL: vector_reassociate_adds1:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fadd v0.4s, v0.4s, v1.4s
+; CHECK-NEXT: fadd v0.4s, v0.4s, v2.4s
+; CHECK-NEXT: fadd v0.4s, v0.4s, v3.4s
+; CHECK-NEXT: ret
%t0 = fadd <4 x float> %x0, %x1
%t1 = fadd <4 x float> %t0, %x2
%t2 = fadd <4 x float> %t1, %x3
ret <4 x float> %t2
}
+define <4 x float> @vector_reassociate_adds1_reassoc(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, <4 x float> %x3) {
+; CHECK-LABEL: vector_reassociate_adds1_reassoc:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fadd v0.4s, v0.4s, v1.4s
+; CHECK-NEXT: fadd v1.4s, v2.4s, v3.4s
+; CHECK-NEXT: fadd v0.4s, v0.4s, v1.4s
+; CHECK-NEXT: ret
+ %t0 = fadd reassoc nsz <4 x float> %x0, %x1
+ %t1 = fadd reassoc nsz <4 x float> %t0, %x2
+ %t2 = fadd reassoc nsz <4 x float> %t1, %x3
+ ret <4 x float> %t2
+}
+
define <4 x float> @vector_reassociate_adds2(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, <4 x float> %x3) {
-; CHECK-STD-LABEL: vector_reassociate_adds2:
-; CHECK-STD: // %bb.0:
-; CHECK-STD-NEXT: fadd v0.4s, v0.4s, v1.4s
-; CHECK-STD-NEXT: fadd v0.4s, v2.4s, v0.4s
-; CHECK-STD-NEXT: fadd v0.4s, v0.4s, v3.4s
-; CHECK-STD-NEXT: ret
-;
-; CHECK-UNSAFE-LABEL: vector_reassociate_adds2:
-; CHECK-UNSAFE: // %bb.0:
-; CHECK-UNSAFE-NEXT: fadd v0.4s, v0.4s, v1.4s
-; CHECK-UNSAFE-NEXT: fadd v1.4s, v2.4s, v3.4s
-; CHECK-UNSAFE-NEXT: fadd v0.4s, v1.4s, v0.4s
-; CHECK-UNSAFE-NEXT: ret
+; CHECK-LABEL: vector_reassociate_adds2:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fadd v0.4s, v0.4s, v1.4s
+; CHECK-NEXT: fadd v0.4s, v2.4s, v0.4s
+; CHECK-NEXT: fadd v0.4s, v0.4s, v3.4s
+; CHECK-NEXT: ret
%t0 = fadd <4 x float> %x0, %x1
%t1 = fadd <4 x float> %x2, %t0
%t2 = fadd <4 x float> %t1, %x3
ret <4 x float> %t2
}
+define <4 x float> @vector_reassociate_adds2_reassoc(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, <4 x float> %x3) {
+; CHECK-LABEL: vector_reassociate_adds2_reassoc:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fadd v0.4s, v0.4s, v1.4s
+; CHECK-NEXT: fadd v1.4s, v2.4s, v3.4s
+; CHECK-NEXT: fadd v0.4s, v1.4s, v0.4s
+; CHECK-NEXT: ret
+ %t0 = fadd reassoc nsz <4 x float> %x0, %x1
+ %t1 = fadd reassoc nsz <4 x float> %x2, %t0
+ %t2 = fadd reassoc nsz <4 x float> %t1, %x3
+ ret <4 x float> %t2
+}
+
define <4 x float> @vector_reassociate_adds3(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, <4 x float> %x3) {
-; CHECK-STD-LABEL: vector_reassociate_adds3:
-; CHECK-STD: // %bb.0:
-; CHECK-STD-NEXT: fadd v0.4s, v0.4s, v1.4s
-; CHECK-STD-NEXT: fadd v0.4s, v0.4s, v2.4s
-; CHECK-STD-NEXT: fadd v0.4s, v3.4s, v0.4s
-; CHECK-STD-NEXT: ret
-;
-; CHECK-UNSAFE-LABEL: vector_reassociate_adds3:
-; CHECK-UNSAFE: // %bb.0:
-; CHECK-UNSAFE-NEXT: fadd v0.4s, v0.4s, v1.4s
-; CHECK-UNSAFE-NEXT: fadd v1.4s, v3.4s, v2.4s
-; CHECK-UNSAFE-NEXT: fadd v0.4s, v1.4s, v0.4s
-; CHECK-UNSAFE-NEXT: ret
+; CHECK-LABEL: vector_reassociate_adds3:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fadd v0.4s, v0.4s, v1.4s
+; CHECK-NEXT: fadd v0.4s, v0.4s, v2.4s
+; CHECK-NEXT: fadd v0.4s, v3.4s, v0.4s
+; CHECK-NEXT: ret
%t0 = fadd <4 x float> %x0, %x1
%t1 = fadd <4 x float> %t0, %x2
%t2 = fadd <4 x float> %x3, %t1
ret <4 x float> %t2
}
+define <4 x float> @vector_reassociate_adds3_reassoc(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, <4 x float> %x3) {
+; CHECK-LABEL: vector_reassociate_adds3_reassoc:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fadd v0.4s, v0.4s, v1.4s
+; CHECK-NEXT: fadd v1.4s, v3.4s, v2.4s
+; CHECK-NEXT: fadd v0.4s, v1.4s, v0.4s
+; CHECK-NEXT: ret
+ %t0 = fadd reassoc nsz <4 x float> %x0, %x1
+ %t1 = fadd reassoc nsz <4 x float> %t0, %x2
+ %t2 = fadd reassoc nsz <4 x float> %x3, %t1
+ ret <4 x float> %t2
+}
+
define <4 x float> @vector_reassociate_adds4(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, <4 x float> %x3) {
-; CHECK-STD-LABEL: vector_reassociate_adds4:
-; CHECK-STD: // %bb.0:
-; CHECK-STD-NEXT: fadd v0.4s, v0.4s, v1.4s
-; CHECK-STD-NEXT: fadd v0.4s, v2.4s, v0.4s
-; CHECK-STD-NEXT: fadd v0.4s, v3.4s, v0.4s
-; CHECK-STD-NEXT: ret
-;
-; CHECK-UNSAFE-LABEL: vector_reassociate_adds4:
-; CHECK-UNSAFE: // %bb.0:
-; CHECK-UNSAFE-NEXT: fadd v0.4s, v0.4s, v1.4s
-; CHECK-UNSAFE-NEXT: fadd v1.4s, v3.4s, v2.4s
-; CHECK-UNSAFE-NEXT: fadd v0.4s, v1.4s, v0.4s
-; CHECK-UNSAFE-NEXT: ret
+; CHECK-LABEL: vector_reassociate_adds4:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fadd v0.4s, v0.4s, v1.4s
+; CHECK-NEXT: fadd v0.4s, v2.4s, v0.4s
+; CHECK-NEXT: fadd v0.4s, v3.4s, v0.4s
+; CHECK-NEXT: ret
%t0 = fadd <4 x float> %x0, %x1
%t1 = fadd <4 x float> %x2, %t0
%t2 = fadd <4 x float> %x3, %t1
ret <4 x float> %t2
}
+define <4 x float> @vector_reassociate_adds4_reassoc(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, <4 x float> %x3) {
+; CHECK-LABEL: vector_reassociate_adds4_reassoc:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fadd v0.4s, v0.4s, v1.4s
+; CHECK-NEXT: fadd v1.4s, v3.4s, v2.4s
+; CHECK-NEXT: fadd v0.4s, v1.4s, v0.4s
+; CHECK-NEXT: ret
+ %t0 = fadd reassoc nsz <4 x float> %x0, %x1
+ %t1 = fadd reassoc nsz <4 x float> %x2, %t0
+ %t2 = fadd reassoc nsz <4 x float> %x3, %t1
+ ret <4 x float> %t2
+}
+
; Verify that 64-bit vector half-precision adds are reassociated.
define <4 x half> @reassociate_adds_v4f16(<4 x half> %x0, <4 x half> %x1, <4 x half> %x2, <4 x half> %x3) {
-; CHECK-STD-LABEL: reassociate_adds_v4f16:
-; CHECK-STD: // %bb.0:
-; CHECK-STD-NEXT: fadd v0.4h, v0.4h, v1.4h
-; CHECK-STD-NEXT: fadd v0.4h, v2.4h, v0.4h
-; CHECK-STD-NEXT: fadd v0.4h, v3.4h, v0.4h
-; CHECK-STD-NEXT: ret
-;
-; CHECK-UNSAFE-LABEL: reassociate_adds_v4f16:
-; CHECK-UNSAFE: // %bb.0:
-; CHECK-UNSAFE-NEXT: fadd v0.4h, v0.4h, v1.4h
-; CHECK-UNSAFE-NEXT: fadd v1.4h, v3.4h, v2.4h
-; CHECK-UNSAFE-NEXT: fadd v0.4h, v1.4h, v0.4h
-; CHECK-UNSAFE-NEXT: ret
+; CHECK-LABEL: reassociate_adds_v4f16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fadd v0.4h, v0.4h, v1.4h
+; CHECK-NEXT: fadd v0.4h, v2.4h, v0.4h
+; CHECK-NEXT: fadd v0.4h, v3.4h, v0.4h
+; CHECK-NEXT: ret
%t0 = fadd <4 x half> %x0, %x1
%t1 = fadd <4 x half> %x2, %t0
%t2 = fadd <4 x half> %x3, %t1
ret <4 x half> %t2
}
+define <4 x half> @reassociate_adds_v4f16_reassoc(<4 x half> %x0, <4 x half> %x1, <4 x half> %x2, <4 x half> %x3) {
+; CHECK-LABEL: reassociate_adds_v4f16_reassoc:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fadd v0.4h, v0.4h, v1.4h
+; CHECK-NEXT: fadd v1.4h, v3.4h, v2.4h
+; CHECK-NEXT: fadd v0.4h, v1.4h, v0.4h
+; CHECK-NEXT: ret
+ %t0 = fadd reassoc nsz <4 x half> %x0, %x1
+ %t1 = fadd reassoc nsz <4 x half> %x2, %t0
+ %t2 = fadd reassoc nsz <4 x half> %x3, %t1
+ ret <4 x half> %t2
+}
+
; Verify that 128-bit vector half-precision multiplies are reassociated.
define <8 x half> @reassociate_muls_v8f16(<8 x half> %x0, <8 x half> %x1, <8 x half> %x2, <8 x half> %x3) {
-; CHECK-STD-LABEL: reassociate_muls_v8f16:
-; CHECK-STD: // %bb.0:
-; CHECK-STD-NEXT: fadd v0.8h, v0.8h, v1.8h
-; CHECK-STD-NEXT: fmul v0.8h, v2.8h, v0.8h
-; CHECK-STD-NEXT: fmul v0.8h, v3.8h, v0.8h
-; CHECK-STD-NEXT: ret
-;
-; CHECK-UNSAFE-LABEL: reassociate_muls_v8f16:
-; CHECK-UNSAFE: // %bb.0:
-; CHECK-UNSAFE-NEXT: fadd v0.8h, v0.8h, v1.8h
-; CHECK-UNSAFE-NEXT: fmul v1.8h, v3.8h, v2.8h
-; CHECK-UNSAFE-NEXT: fmul v0.8h, v1.8h, v0.8h
-; CHECK-UNSAFE-NEXT: ret
+; CHECK-LABEL: reassociate_muls_v8f16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fadd v0.8h, v0.8h, v1.8h
+; CHECK-NEXT: fmul v0.8h, v2.8h, v0.8h
+; CHECK-NEXT: fmul v0.8h, v3.8h, v0.8h
+; CHECK-NEXT: ret
%t0 = fadd <8 x half> %x0, %x1
%t1 = fmul <8 x half> %x2, %t0
%t2 = fmul <8 x half> %x3, %t1
ret <8 x half> %t2
}
+define <8 x half> @reassociate_muls_v8f16_reassoc(<8 x half> %x0, <8 x half> %x1, <8 x half> %x2, <8 x half> %x3) {
+; CHECK-LABEL: reassociate_muls_v8f16_reassoc:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fadd v0.8h, v0.8h, v1.8h
+; CHECK-NEXT: fmul v1.8h, v3.8h, v2.8h
+; CHECK-NEXT: fmul v0.8h, v1.8h, v0.8h
+; CHECK-NEXT: ret
+ %t0 = fadd reassoc nsz <8 x half> %x0, %x1
+ %t1 = fmul reassoc nsz <8 x half> %x2, %t0
+ %t2 = fmul reassoc nsz <8 x half> %x3, %t1
+ ret <8 x half> %t2
+}
+
; Verify that 128-bit vector single-precision multiplies are reassociated.
define <4 x float> @reassociate_muls_v4f32(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, <4 x float> %x3) {
-; CHECK-STD-LABEL: reassociate_muls_v4f32:
-; CHECK-STD: // %bb.0:
-; CHECK-STD-NEXT: fadd v0.4s, v0.4s, v1.4s
-; CHECK-STD-NEXT: fmul v0.4s, v2.4s, v0.4s
-; CHECK-STD-NEXT: fmul v0.4s, v3.4s, v0.4s
-; CHECK-STD-NEXT: ret
-;
-; CHECK-UNSAFE-LABEL: reassociate_muls_v4f32:
-; CHECK-UNSAFE: // %bb.0:
-; CHECK-UNSAFE-NEXT: fadd v0.4s, v0.4s, v1.4s
-; CHECK-UNSAFE-NEXT: fmul v1.4s, v3.4s, v2.4s
-; CHECK-UNSAFE-NEXT: fmul v0.4s, v1.4s, v0.4s
-; CHECK-UNSAFE-NEXT: ret
+; CHECK-LABEL: reassociate_muls_v4f32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fadd v0.4s, v0.4s, v1.4s
+; CHECK-NEXT: fmul v0.4s, v2.4s, v0.4s
+; CHECK-NEXT: fmul v0.4s, v3.4s, v0.4s
+; CHECK-NEXT: ret
%t0 = fadd <4 x float> %x0, %x1
%t1 = fmul <4 x float> %x2, %t0
%t2 = fmul <4 x float> %x3, %t1
ret <4 x float> %t2
}
+define <4 x float> @reassociate_muls_v4f32_reassoc(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, <4 x float> %x3) {
+; CHECK-LABEL: reassociate_muls_v4f32_reassoc:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fadd v0.4s, v0.4s, v1.4s
+; CHECK-NEXT: fmul v1.4s, v3.4s, v2.4s
+; CHECK-NEXT: fmul v0.4s, v1.4s, v0.4s
+; CHECK-NEXT: ret
+ %t0 = fadd reassoc nsz <4 x float> %x0, %x1
+ %t1 = fmul reassoc nsz <4 x float> %x2, %t0
+ %t2 = fmul reassoc nsz <4 x float> %x3, %t1
+ ret <4 x float> %t2
+}
+
; Verify that 128-bit vector double-precision multiplies are reassociated.
define <2 x double> @reassociate_muls_v2f64(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, <2 x double> %x3) {
-; CHECK-STD-LABEL: reassociate_muls_v2f64:
-; CHECK-STD: // %bb.0:
-; CHECK-STD-NEXT: fadd v0.2d, v0.2d, v1.2d
-; CHECK-STD-NEXT: fmul v0.2d, v2.2d, v0.2d
-; CHECK-STD-NEXT: fmul v0.2d, v3.2d, v0.2d
-; CHECK-STD-NEXT: ret
-;
-; CHECK-UNSAFE-LABEL: reassociate_muls_v2f64:
-; CHECK-UNSAFE: // %bb.0:
-; CHECK-UNSAFE-NEXT: fadd v0.2d, v0.2d, v1.2d
-; CHECK-UNSAFE-NEXT: fmul v1.2d, v3.2d, v2.2d
-; CHECK-UNSAFE-NEXT: fmul v0.2d, v1.2d, v0.2d
-; CHECK-UNSAFE-NEXT: ret
+; CHECK-LABEL: reassociate_muls_v2f64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fadd v0.2d, v0.2d, v1.2d
+; CHECK-NEXT: fmul v0.2d, v2.2d, v0.2d
+; CHECK-NEXT: fmul v0.2d, v3.2d, v0.2d
+; CHECK-NEXT: ret
%t0 = fadd <2 x double> %x0, %x1
%t1 = fmul <2 x double> %x2, %t0
%t2 = fmul <2 x double> %x3, %t1
ret <2 x double> %t2
}
+define <2 x double> @reassociate_muls_v2f64_reassoc(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, <2 x double> %x3) {
+; CHECK-LABEL: reassociate_muls_v2f64_reassoc:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fadd v0.2d, v0.2d, v1.2d
+; CHECK-NEXT: fmul v1.2d, v3.2d, v2.2d
+; CHECK-NEXT: fmul v0.2d, v1.2d, v0.2d
+; CHECK-NEXT: ret
+ %t0 = fadd reassoc nsz <2 x double> %x0, %x1
+ %t1 = fmul reassoc nsz <2 x double> %x2, %t0
+ %t2 = fmul reassoc nsz <2 x double> %x3, %t1
+ ret <2 x double> %t2
+}
+
+
; Verify that vector integer arithmetic operations are reassociated.
define <2 x i32> @reassociate_muls_v2i32(<2 x i32> %x0, <2 x i32> %x1, <2 x i32> %x2, <2 x i32> %x3) {
@@ -606,65 +704,83 @@ define <4 x i32> @reassociate_xors_v4i32(<4 x i32> %x0, <4 x i32> %x1, <4 x i32>
; Verify that scalable vector FP arithmetic operations are reassociated.
define <vscale x 8 x half> @reassociate_adds_nxv4f16(<vscale x 8 x half> %x0, <vscale x 8 x half> %x1, <vscale x 8 x half> %x2, <vscale x 8 x half> %x3) {
-; CHECK-STD-LABEL: reassociate_adds_nxv4f16:
-; CHECK-STD: // %bb.0:
-; CHECK-STD-NEXT: fadd z0.h, z0.h, z1.h
-; CHECK-STD-NEXT: fadd z0.h, z2.h, z0.h
-; CHECK-STD-NEXT: fadd z0.h, z3.h, z0.h
-; CHECK-STD-NEXT: ret
-;
-; CHECK-UNSAFE-LABEL: reassociate_adds_nxv4f16:
-; CHECK-UNSAFE: // %bb.0:
-; CHECK-UNSAFE-NEXT: fadd z0.h, z0.h, z1.h
-; CHECK-UNSAFE-NEXT: fadd z1.h, z3.h, z2.h
-; CHECK-UNSAFE-NEXT: fadd z0.h, z1.h, z0.h
-; CHECK-UNSAFE-NEXT: ret
+; CHECK-LABEL: reassociate_adds_nxv4f16:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fadd z0.h, z0.h, z1.h
+; CHECK-NEXT: fadd z0.h, z2.h, z0.h
+; CHECK-NEXT: fadd z0.h, z3.h, z0.h
+; CHECK-NEXT: ret
%t0 = fadd reassoc <vscale x 8 x half> %x0, %x1
%t1 = fadd reassoc <vscale x 8 x half> %x2, %t0
%t2 = fadd reassoc <vscale x 8 x half> %x3, %t1
ret <vscale x 8 x half> %t2
}
+define <vscale x 8 x half> @reassociate_adds_nxv4f16_nsz(<vscale x 8 x half> %x0, <vscale x 8 x half> %x1, <vscale x 8 x half> %x2, <vscale x 8 x half> %x3) {
+; CHECK-LABEL: reassociate_adds_nxv4f16_nsz:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fadd z0.h, z0.h, z1.h
+; CHECK-NEXT: fadd z1.h, z3.h, z2.h
+; CHECK-NEXT: fadd z0.h, z1.h, z0.h
+; CHECK-NEXT: ret
+ %t0 = fadd reassoc nsz <vscale x 8 x half> %x0, %x1
+ %t1 = fadd reassoc nsz <vscale x 8 x half> %x2, %t0
+ %t2 = fadd reassoc nsz <vscale x 8 x half> %x3, %t1
+ ret <vscale x 8 x half> %t2
+}
+
define <vscale x 4 x float> @reassociate_adds_nxv4f32(<vscale x 4 x float> %x0, <vscale x 4 x float> %x1, <vscale x 4 x float> %x2, <vscale x 4 x float> %x3) {
-; CHECK-STD-LABEL: reassociate_adds_nxv4f32:
-; CHECK-STD: // %bb.0:
-; CHECK-STD-NEXT: fadd z0.s, z0.s, z1.s
-; CHECK-STD-NEXT: fadd z0.s, z2.s, z0.s
-; CHECK-STD-NEXT: fadd z0.s, z3.s, z0.s
-; CHECK-STD-NEXT: ret
-;
-; CHECK-UNSAFE-LABEL: reassociate_adds_nxv4f32:
-; CHECK-UNSAFE: // %bb.0:
-; CHECK-UNSAFE-NEXT: fadd z0.s, z0.s, z1.s
-; CHECK-UNSAFE-NEXT: fadd z1.s, z3.s, z2.s
-; CHECK-UNSAFE-NEXT: fadd z0.s, z1.s, z0.s
-; CHECK-UNSAFE-NEXT: ret
+; CHECK-LABEL: reassociate_adds_nxv4f32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fadd z0.s, z0.s, z1.s
+; CHECK-NEXT: fadd z0.s, z2.s, z0.s
+; CHECK-NEXT: fadd z0.s, z3.s, z0.s
+; CHECK-NEXT: ret
%t0 = fadd reassoc <vscale x 4 x float> %x0, %x1
%t1 = fadd reassoc <vscale x 4 x float> %x2, %t0
%t2 = fadd reassoc <vscale x 4 x float> %x3, %t1
ret <vscale x 4 x float> %t2
}
+define <vscale x 4 x float> @reassociate_adds_nxv4f32_nsz(<vscale x 4 x float> %x0, <vscale x 4 x float> %x1, <vscale x 4 x float> %x2, <vscale x 4 x float> %x3) {
+; CHECK-LABEL: reassociate_adds_nxv4f32_nsz:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fadd z0.s, z0.s, z1.s
+; CHECK-NEXT: fadd z1.s, z3.s, z2.s
+; CHECK-NEXT: fadd z0.s, z1.s, z0.s
+; CHECK-NEXT: ret
+ %t0 = fadd reassoc nsz <vscale x 4 x float> %x0, %x1
+ %t1 = fadd reassoc nsz <vscale x 4 x float> %x2, %t0
+ %t2 = fadd reassoc nsz <vscale x 4 x float> %x3, %t1
+ ret <vscale x 4 x float> %t2
+}
+
define <vscale x 2 x double> @reassociate_muls_nxv2f64(<vscale x 2 x double> %x0, <vscale x 2 x double> %x1, <vscale x 2 x double> %x2, <vscale x 2 x double> %x3) {
-; CHECK-STD-LABEL: reassociate_muls_nxv2f64:
-; CHECK-STD: // %bb.0:
-; CHECK-STD-NEXT: fmul z0.d, z0.d, z1.d
-; CHECK-STD-NEXT: fmul z0.d, z2.d, z0.d
-; CHECK-STD-NEXT: fmul z0.d, z3.d, z0.d
-; CHECK-STD-NEXT: ret
-;
-; CHECK-UNSAFE-LABEL: reassociate_muls_nxv2f64:
-; CHECK-UNSAFE: // %bb.0:
-; CHECK-UNSAFE-NEXT: fmul z0.d, z0.d, z1.d
-; CHECK-UNSAFE-NEXT: fmul z1.d, z3.d, z2.d
-; CHECK-UNSAFE-NEXT: fmul z0.d, z1.d, z0.d
-; CHECK-UNSAFE-NEXT: ret
+; CHECK-LABEL: reassociate_muls_nxv2f64:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fmul z0.d, z0.d, z1.d
+; CHECK-NEXT: fmul z0.d, z2.d, z0.d
+; CHECK-NEXT: fmul z0.d, z3.d, z0.d
+; CHECK-NEXT: ret
%t0 = fmul reassoc <vscale x 2 x double> %x0, %x1
%t1 = fmul reassoc <vscale x 2 x double> %x2, %t0
%t2 = fmul reassoc <vscale x 2 x double> %x3, %t1
ret <vscale x 2 x double> %t2
}
+define <vscale x 2 x double> @reassociate_muls_nxv2f64_nsz(<vscale x 2 x double> %x0, <vscale x 2 x double> %x1, <vscale x 2 x double> %x2, <vscale x 2 x double> %x3) {
+; CHECK-LABEL: reassociate_muls_nxv2f64_nsz:
+; CHECK: // %bb.0:
+; CHECK-NEXT: fmul z0.d, z0.d, z1.d
+; CHECK-NEXT: fmul z1.d, z3.d, z2.d
+; CHECK-NEXT: fmul z0.d, z1.d, z0.d
+; CHECK-NEXT: ret
+ %t0 = fmul reassoc nsz <vscale x 2 x double> %x0, %x1
+ %t1 = fmul reassoc nsz <vscale x 2 x double> %x2, %t0
+ %t2 = fmul reassoc nsz <vscale x 2 x double> %x3, %t1
+ ret <vscale x 2 x double> %t2
+}
+
; Verify that scalable vector integer arithmetic operations are reassociated.
define <vscale x 16 x i8> @reassociate_muls_nxv16i8(<vscale x 16 x i8> %x0, <vscale x 16 x i8> %x1, <vscale x 16 x i8> %x2, <vscale x 16 x i8> %x3) {
@@ -753,55 +869,30 @@ define <vscale x 8 x i16> @reassociate_ors_nxv8i16(<vscale x 8 x i16> %x0, <vsca
declare double @bar()
define double @reassociate_adds_from_calls() {
-; CHECK-STD-LABEL: reassociate_adds_from_calls:
-; CHECK-STD: // %bb.0:
-; CHECK-STD-NEXT: str d10, [sp, #-32]! // 8-byte Folded Spill
-; CHECK-STD-NEXT: stp d9, d8, [sp, #8] // 16-byte Folded Spill
-; CHECK-STD-NEXT: str x30, [sp, #24] // 8-byte Folded Spill
-; CHECK-STD-NEXT: .cfi_def_cfa_offset 32
-; CHECK-STD-NEXT: .cfi_offset w30, -8
-; CHECK-STD-NEXT: .cfi_offset b8, -16
-; CHECK-STD-NEXT: .cfi_offset b9, -24
-; CHECK-STD-NEXT: .cfi_offset b10, -32
-; CHECK-STD-NEXT: bl bar
-; CHECK-STD-NEXT: fmov d8, d0
-; CHECK-STD-NEXT: bl bar
-; CHECK-STD-NEXT: fmov d9, d0
-; CHECK-STD-NEXT: bl bar
-; CHECK-STD-NEXT: fmov d10, d0
-; CHECK-STD-NEXT: bl bar
-; CHECK-STD-NEXT: fadd d1, d8, d9
-; CHECK-STD-NEXT: ldp d9, d8, [sp, #8] // 16-byte Folded Reload
-; CHECK-STD-NEXT: ldr x30, [sp, #24] // 8-byte Folded Reload
-; CHECK-STD-NEXT: fadd d1, d1, d10
-; CHECK-STD-NEXT: fadd d0, d1, d0
-; CHECK-STD-NEXT: ldr d10, [sp], #32 // 8-byte Folded Reload
-; CHECK-STD-NEXT: ret
-;
-; CHECK-UNSAFE-LABEL: reassociate_adds_from_calls:
-; CHECK-UNSAFE: // %bb.0:
-; CHECK-UNSAFE-NEXT: str d10, [sp, #-32]! // 8-byte Folded Spill
-; CHECK-UNSAFE-NEXT: stp d9, d8, [sp, #8] // 16-byte Folded Spill
-; CHECK-UNSAFE-NEXT: str x30, [sp, #24] // 8-byte Folded Spill
-; CHECK-UNSAFE-NEXT: .cfi_def_cfa_offset 32
-; CHECK-UNSAFE-NEXT: .cfi_offset w30, -8
-; CHECK-UNSAFE-NEXT: .cfi_offset b8, -16
-; CHECK-UNSAFE-NEXT: .cfi_offset b9, -24
-; CHECK-UNSAFE-NEXT: .cfi_offset b10, -32
-; CHECK-UNSAFE-NEXT: bl bar
-; CHECK-UNSAFE-NEXT: fmov d8, d0
-; CHECK-UNSAFE-NEXT: bl bar
-; CHECK-UNSAFE-NEXT: fmov d9, d0
-; CHECK-UNSAFE-NEXT: bl bar
-; CHECK-UNSAFE-NEXT: fmov d10, d0
-; CHECK-UNSAFE-NEXT: bl bar
-; CHECK-UNSAFE-NEXT: fadd d1, d8, d9
-; CHECK-UNSAFE-NEXT: ldp d9, d8, [sp, #8] // 16-byte Folded Reload
-; CHECK-UNSAFE-NEXT: ldr x30, [sp, #24] // 8-byte Folded Reload
-; CHECK-UNSAFE-NEXT: fadd d0, d10, d0
-; CHECK-UNSAFE-NEXT: fadd d0, d1, d0
-; CHECK-UNSAFE-NEXT: ldr d10, [sp], #32 // 8-byte Folded Reload
-; CHECK-UNSAFE-NEXT: ret
+; CHECK-LABEL: reassociate_adds_from_calls:
+; CHECK: // %bb.0:
+; CHECK-NEXT: str d10, [sp, #-32]! // 8-byte Folded Spill
+; CHECK-NEXT: stp d9, d8, [sp, #8] // 16-byte Folded Spill
+; CHECK-NEXT: str x30, [sp, #24] // 8-byte Folded Spill
+; CHECK-NEXT: .cfi_def_cfa_offset 32
+; CHECK-NEXT: .cfi_offset w30, -8
+; CHECK-NEXT: .cfi_offset b8, -16
+; CHECK-NEXT: .cfi_offset b9, -24
+; CHECK-NEXT: .cfi_offset b10, -32
+; CHECK-NEXT: bl bar
+; CHECK-NEXT: fmov d8, d0
+; CHECK-NEXT: bl bar
+; CHECK-NEXT: fmov d9, d0
+; CHECK-NEXT: bl bar
+; CHECK-NEXT: fmov d10, d0
+; CHECK-NEXT: bl bar
+; CHECK-NEXT: fadd d1, d8, d9
+; CHECK-NEXT: ldp d9, d8, [sp, #8] // 16-byte Folded Reload
+; CHECK-NEXT: ldr x30, [sp, #24] // 8-byte Folded Reload
+; CHECK-NEXT: fadd d1, d1, d10
+; CHECK-NEXT: fadd d0, d1, d0
+; CHECK-NEXT: ldr d10, [sp], #32 // 8-byte Folded Reload
+; CHECK-NEXT: ret
%x0 = call double @bar()
%x1 = call double @bar()
%x2 = call double @bar()
@@ -812,6 +903,41 @@ define double @reassociate_adds_from_calls() {
ret double %t2
}
+define double @reassociate_adds_from_calls_reassoc() {
+; CHECK-LABEL: reassociate_adds_from_calls_reassoc:
+; CHECK: // %bb.0:
+; CHECK-NEXT: str d10, [sp, #-32]! // 8-byte Folded Spill
+; CHECK-NEXT: stp d9, d8, [sp, #8] // 16-byte Folded Spill
+; CHECK-NEXT: str x30, [sp, #24] // 8-byte Folded Spill
+; CHECK-NEXT: .cfi_def_cfa_offset 32
+; CHECK-NEXT: .cfi_offset w30, -8
+; CHECK-NEXT: .cfi_offset b8, -16
+; CHECK-NEXT: .cfi_offset b9, -24
+; CHECK-NEXT: .cfi_offset b10, -32
+; CHECK-NEXT: bl bar
+; CHECK-NEXT: fmov d8, d0
+; CHECK-NEXT: bl bar
+; CHECK-NEXT: fmov d9, d0
+; CHECK-NEXT: bl bar
+; CHECK-NEXT: fmov d10, d0
+; CHECK-NEXT: bl bar
+; CHECK-NEXT: fadd d1, d8, d9
+; CHECK-NEXT: ldp d9, d8, [sp, #8] // 16-byte Folded Reload
+; CHECK-NEXT: ldr x30, [sp, #24] // 8-byte Folded Reload
+; CHECK-NEXT: fadd d0, d10, d0
+; CHECK-NEXT: fadd d0, d1, d0
+; CHECK-NEXT: ldr d10, [sp], #32 // 8-byte Folded Reload
+; CHECK-NEXT: ret
+ %x0 = call reassoc nsz double @bar()
+ %x1 = call reassoc nsz double @bar()
+ %x2 = call reassoc nsz double @bar()
+ %x3 = call reassoc nsz double @bar()
+ %t0 = fadd reassoc nsz double %x0, %x1
+ %t1 = fadd reassoc nsz double %t0, %x2
+ %t2 = fadd reassoc nsz double %t1, %x3
+ ret double %t2
+}
+
define double @already_reassociated() {
; CHECK-LABEL: already_reassociated:
; CHECK: // %bb.0:
@@ -846,3 +972,38 @@ define double @already_reassociated() {
%t2 = fadd double %t0, %t1
ret double %t2
}
+
+define double @already_reassociated_reassoc() {
+; CHECK-LABEL: already_reassociated_reassoc:
+; CHECK: // %bb.0:
+; CHECK-NEXT: str d10, [sp, #-32]! // 8-byte Folded Spill
+; CHECK-NEXT: stp d9, d8, [sp, #8] // 16-byte Folded Spill
+; CHECK-NEXT: str x30, [sp, #24] // 8-byte Folded Spill
+; CHECK-NEXT: .cfi_def_cfa_offset 32
+; CHECK-NEXT: .cfi_offset w30, -8
+; CHECK-NEXT: .cfi_offset b8, -16
+; CHECK-NEXT: .cfi_offset b9, -24
+; CHECK-NEXT: .cfi_offset b10, -32
+; CHECK-NEXT: bl bar
+; CHECK-NEXT: fmov d8, d0
+; CHECK-NEXT: bl bar
+; CHECK-NEXT: fmov d9, d0
+; CHECK-NEXT: bl bar
+; CHECK-NEXT: fmov d10, d0
+; CHECK-NEXT: bl bar
+; CHECK-NEXT: fadd d1, d8, d9
+; CHECK-NEXT: ldp d9, d8, [sp, #8] // 16-byte Folded Reload
+; CHECK-NEXT: ldr x30, [sp, #24] // 8-byte Folded Reload
+; CHECK-NEXT: fadd d0, d10, d0
+; CHECK-NEXT: fadd d0, d1, d0
+; CHECK-NEXT: ldr d10, [sp], #32 // 8-byte Folded Reload
+; CHECK-NEXT: ret
+ %x0 = call reassoc nsz double @bar()
+ %x1 = call reassoc nsz double @bar()
+ %x2 = call reassoc nsz double @bar()
+ %x3 = call reassoc nsz double @bar()
+ %t0 = fadd reassoc nsz double %x0, %x1
+ %t1 = fadd reassoc nsz double %x2, %x3
+ %t2 = fadd reassoc nsz double %t0, %t1
+ ret double %t2
+}
diff --git a/llvm/test/CodeGen/AArch64/machine-combiner.mir b/llvm/test/CodeGen/AArch64/machine-combiner.mir
index b967aaa..a0e1280 100644
--- a/llvm/test/CodeGen/AArch64/machine-combiner.mir
+++ b/llvm/test/CodeGen/AArch64/machine-combiner.mir
@@ -1,4 +1,4 @@
-# RUN: llc -mtriple=aarch64-none-linux-gnu -mcpu=cortex-a57 -enable-unsafe-fp-math \
+# RUN: llc -mtriple=aarch64-none-linux-gnu -mcpu=cortex-a57 \
# RUN: -run-pass machine-combiner -machine-combiner-inc-threshold=0 \
# RUN: -machine-combiner-verify-pattern-order=true -verify-machineinstrs -o - %s | FileCheck %s
---
@@ -36,8 +36,8 @@ body: |
%6 = ADDWrr %3, killed %5
%7 = SCVTFUWDri killed %6, implicit $fpcr
; CHECK: FMADDDrrr %7, %7, %0, implicit $fpcr
- %8 = FMULDrr %7, %7, implicit $fpcr
- %9 = FADDDrr %0, killed %8, implicit $fpcr
+ %8 = contract FMULDrr %7, %7, implicit $fpcr
+ %9 = contract FADDDrr %0, killed %8, implicit $fpcr
$d0 = COPY %9
RET_ReallyLR implicit $d0
diff --git a/llvm/test/CodeGen/AArch64/preferred-function-alignment.ll b/llvm/test/CodeGen/AArch64/preferred-function-alignment.ll
index 05f4fb1..a6cb712 100644
--- a/llvm/test/CodeGen/AArch64/preferred-function-alignment.ll
+++ b/llvm/test/CodeGen/AArch64/preferred-function-alignment.ll
@@ -40,3 +40,10 @@ define void @test_optsize() optsize {
; CHECK-LABEL: test_optsize
; CHECK-NEXT: .p2align 2
+
+define void @test_minsize() minsize {
+ ret void
+}
+
+; CHECK-LABEL: test_minsize
+; CHECK-NEXT: .p2align 2
diff --git a/llvm/test/CodeGen/AArch64/sched-past-vector-ldst.ll b/llvm/test/CodeGen/AArch64/sched-past-vector-ldst.ll
index cd53833..fc5012c 100644
--- a/llvm/test/CodeGen/AArch64/sched-past-vector-ldst.ll
+++ b/llvm/test/CodeGen/AArch64/sched-past-vector-ldst.ll
@@ -23,21 +23,21 @@ entry:
%scevgep = getelementptr %Struct, ptr %this, i64 0, i32 2, i64 8, i32 0
%vec1 = tail call { <4 x float>, <4 x float> } @llvm.aarch64.neon.ld2.v4f32.p0(ptr %scevgep)
%ev1 = extractvalue { <4 x float>, <4 x float> } %vec1, 1
- %fm1 = fmul <4 x float> %f, %ev1
- %av1 = fadd <4 x float> %f, %fm1
+ %fm1 = fmul contract <4 x float> %f, %ev1
+ %av1 = fadd contract <4 x float> %f, %fm1
%ev2 = extractvalue { <4 x float>, <4 x float> } %vec1, 0
- %fm2 = fmul <4 x float> %f, %ev2
- %av2 = fadd <4 x float> %f, %fm2
+ %fm2 = fmul contract <4 x float> %f, %ev2
+ %av2 = fadd contract <4 x float> %f, %fm2
%scevgep2 = getelementptr %Struct, ptr %this, i64 0, i32 3, i64 8, i32 0
tail call void @llvm.aarch64.neon.st2.v4f32.p0(<4 x float> %av2, <4 x float> %av1, ptr %scevgep2)
%scevgep3 = getelementptr %Struct, ptr %this, i64 0, i32 2, i64 12, i32 0
%vec2 = tail call { <4 x float>, <4 x float> } @llvm.aarch64.neon.ld2.v4f32.p0(ptr %scevgep3)
%ev3 = extractvalue { <4 x float>, <4 x float> } %vec2, 1
- %fm3 = fmul <4 x float> %f, %ev3
- %av3 = fadd <4 x float> %f, %fm3
+ %fm3 = fmul contract <4 x float> %f, %ev3
+ %av3 = fadd contract <4 x float> %f, %fm3
%ev4 = extractvalue { <4 x float>, <4 x float> } %vec2, 0
- %fm4 = fmul <4 x float> %f, %ev4
- %av4 = fadd <4 x float> %f, %fm4
+ %fm4 = fmul contract <4 x float> %f, %ev4
+ %av4 = fadd contract <4 x float> %f, %fm4
%scevgep4 = getelementptr %Struct, ptr %this, i64 0, i32 3, i64 12, i32 0
tail call void @llvm.aarch64.neon.st2.v4f32.p0(<4 x float> %av4, <4 x float> %av3, ptr %scevgep4)
ret void
@@ -49,6 +49,6 @@ declare { <4 x float>, <4 x float> } @llvm.aarch64.neon.ld2.v4f32.p0(ptr) #2
; Function Attrs: nounwind
declare void @llvm.aarch64.neon.st2.v4f32.p0(<4 x float>, <4 x float>, ptr nocapture) #1
-attributes #0 = { nounwind "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="true" "no-nans-fp-math"="true" "stack-protector-buffer-size"="8" "unsafe-fp-math"="true" "use-soft-float"="false" }
+attributes #0 = { nounwind "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="true" "no-nans-fp-math"="true" "stack-protector-buffer-size"="8" "use-soft-float"="false" }
attributes #1 = { nounwind }
attributes #2 = { nounwind readonly }
diff --git a/llvm/test/CodeGen/AArch64/sqrt-fastmath.ll b/llvm/test/CodeGen/AArch64/sqrt-fastmath.ll
index f73b4bd..e29993d 100644
--- a/llvm/test/CodeGen/AArch64/sqrt-fastmath.ll
+++ b/llvm/test/CodeGen/AArch64/sqrt-fastmath.ll
@@ -2,15 +2,15 @@
; RUN: llc < %s -mtriple=aarch64-unknown-linux-gnu -mattr=+neon,-use-reciprocal-square-root | FileCheck %s --check-prefix=FAULT
; RUN: llc < %s -mtriple=aarch64-unknown-linux-gnu -mattr=+neon,+use-reciprocal-square-root | FileCheck %s
-declare float @llvm.sqrt.f32(float) #0
-declare <2 x float> @llvm.sqrt.v2f32(<2 x float>) #0
-declare <4 x float> @llvm.sqrt.v4f32(<4 x float>) #0
-declare <8 x float> @llvm.sqrt.v8f32(<8 x float>) #0
-declare double @llvm.sqrt.f64(double) #0
-declare <2 x double> @llvm.sqrt.v2f64(<2 x double>) #0
-declare <4 x double> @llvm.sqrt.v4f64(<4 x double>) #0
+declare float @llvm.sqrt.f32(float)
+declare <2 x float> @llvm.sqrt.v2f32(<2 x float>)
+declare <4 x float> @llvm.sqrt.v4f32(<4 x float>)
+declare <8 x float> @llvm.sqrt.v8f32(<8 x float>)
+declare double @llvm.sqrt.f64(double)
+declare <2 x double> @llvm.sqrt.v2f64(<2 x double>)
+declare <4 x double> @llvm.sqrt.v4f64(<4 x double>)
-define float @fsqrt(float %a) #0 {
+define float @fsqrt(float %a) {
; FAULT-LABEL: fsqrt:
; FAULT: // %bb.0:
; FAULT-NEXT: fsqrt s0, s0
@@ -33,7 +33,7 @@ define float @fsqrt(float %a) #0 {
ret float %1
}
-define float @fsqrt_ieee_denorms(float %a) #1 {
+define float @fsqrt_ieee_denorms(float %a) #0 {
; FAULT-LABEL: fsqrt_ieee_denorms:
; FAULT: // %bb.0:
; FAULT-NEXT: fsqrt s0, s0
@@ -56,7 +56,7 @@ define float @fsqrt_ieee_denorms(float %a) #1 {
ret float %1
}
-define <2 x float> @f2sqrt(<2 x float> %a) #0 {
+define <2 x float> @f2sqrt(<2 x float> %a) {
; FAULT-LABEL: f2sqrt:
; FAULT: // %bb.0:
; FAULT-NEXT: fsqrt v0.2s, v0.2s
@@ -79,7 +79,7 @@ define <2 x float> @f2sqrt(<2 x float> %a) #0 {
ret <2 x float> %1
}
-define <4 x float> @f4sqrt(<4 x float> %a) #0 {
+define <4 x float> @f4sqrt(<4 x float> %a) {
; FAULT-LABEL: f4sqrt:
; FAULT: // %bb.0:
; FAULT-NEXT: fsqrt v0.4s, v0.4s
@@ -102,7 +102,7 @@ define <4 x float> @f4sqrt(<4 x float> %a) #0 {
ret <4 x float> %1
}
-define <8 x float> @f8sqrt(<8 x float> %a) #0 {
+define <8 x float> @f8sqrt(<8 x float> %a) {
; FAULT-LABEL: f8sqrt:
; FAULT: // %bb.0:
; FAULT-NEXT: fsqrt v0.4s, v0.4s
@@ -136,7 +136,7 @@ define <8 x float> @f8sqrt(<8 x float> %a) #0 {
ret <8 x float> %1
}
-define double @dsqrt(double %a) #0 {
+define double @dsqrt(double %a) {
; FAULT-LABEL: dsqrt:
; FAULT: // %bb.0:
; FAULT-NEXT: fsqrt d0, d0
@@ -162,7 +162,7 @@ define double @dsqrt(double %a) #0 {
ret double %1
}
-define double @dsqrt_ieee_denorms(double %a) #1 {
+define double @dsqrt_ieee_denorms(double %a) #0 {
; FAULT-LABEL: dsqrt_ieee_denorms:
; FAULT: // %bb.0:
; FAULT-NEXT: fsqrt d0, d0
@@ -188,7 +188,7 @@ define double @dsqrt_ieee_denorms(double %a) #1 {
ret double %1
}
-define <2 x double> @d2sqrt(<2 x double> %a) #0 {
+define <2 x double> @d2sqrt(<2 x double> %a) {
; FAULT-LABEL: d2sqrt:
; FAULT: // %bb.0:
; FAULT-NEXT: fsqrt v0.2d, v0.2d
@@ -214,7 +214,7 @@ define <2 x double> @d2sqrt(<2 x double> %a) #0 {
ret <2 x double> %1
}
-define <4 x double> @d4sqrt(<4 x double> %a) #0 {
+define <4 x double> @d4sqrt(<4 x double> %a) {
; FAULT-LABEL: d4sqrt:
; FAULT: // %bb.0:
; FAULT-NEXT: fsqrt v0.2d, v0.2d
@@ -254,7 +254,7 @@ define <4 x double> @d4sqrt(<4 x double> %a) #0 {
ret <4 x double> %1
}
-define float @frsqrt(float %a) #0 {
+define float @frsqrt(float %a) {
; FAULT-LABEL: frsqrt:
; FAULT: // %bb.0:
; FAULT-NEXT: fsqrt s0, s0
@@ -277,7 +277,7 @@ define float @frsqrt(float %a) #0 {
ret float %2
}
-define <2 x float> @f2rsqrt(<2 x float> %a) #0 {
+define <2 x float> @f2rsqrt(<2 x float> %a) {
; FAULT-LABEL: f2rsqrt:
; FAULT: // %bb.0:
; FAULT-NEXT: fsqrt v0.2s, v0.2s
@@ -300,7 +300,7 @@ define <2 x float> @f2rsqrt(<2 x float> %a) #0 {
ret <2 x float> %2
}
-define <4 x float> @f4rsqrt(<4 x float> %a) #0 {
+define <4 x float> @f4rsqrt(<4 x float> %a) {
; FAULT-LABEL: f4rsqrt:
; FAULT: // %bb.0:
; FAULT-NEXT: fsqrt v0.4s, v0.4s
@@ -323,7 +323,7 @@ define <4 x float> @f4rsqrt(<4 x float> %a) #0 {
ret <4 x float> %2
}
-define <8 x float> @f8rsqrt(<8 x float> %a) #0 {
+define <8 x float> @f8rsqrt(<8 x float> %a) {
; FAULT-LABEL: f8rsqrt:
; FAULT: // %bb.0:
; FAULT-NEXT: fsqrt v0.4s, v0.4s
@@ -355,7 +355,7 @@ define <8 x float> @f8rsqrt(<8 x float> %a) #0 {
ret <8 x float> %2
}
-define double @drsqrt(double %a) #0 {
+define double @drsqrt(double %a) {
; FAULT-LABEL: drsqrt:
; FAULT: // %bb.0:
; FAULT-NEXT: fsqrt d0, d0
@@ -381,7 +381,7 @@ define double @drsqrt(double %a) #0 {
ret double %2
}
-define <2 x double> @d2rsqrt(<2 x double> %a) #0 {
+define <2 x double> @d2rsqrt(<2 x double> %a) {
; FAULT-LABEL: d2rsqrt:
; FAULT: // %bb.0:
; FAULT-NEXT: fsqrt v0.2d, v0.2d
@@ -462,8 +462,8 @@ define double @sqrt_fdiv_common_operand(double %x) nounwind {
; CHECK-NEXT: fmul d1, d1, d2
; CHECK-NEXT: fmul d2, d1, d1
; CHECK-NEXT: frsqrts d2, d0, d2
-; CHECK-NEXT: fmul d1, d1, d2
; CHECK-NEXT: fmul d0, d0, d1
+; CHECK-NEXT: fmul d0, d0, d2
; CHECK-NEXT: ret
%sqrt = call fast double @llvm.sqrt.f64(double %x)
%r = fdiv fast double %x, %sqrt
@@ -487,8 +487,8 @@ define <2 x double> @sqrt_fdiv_common_operand_vec(<2 x double> %x) nounwind {
; CHECK-NEXT: fmul v1.2d, v1.2d, v2.2d
; CHECK-NEXT: fmul v2.2d, v1.2d, v1.2d
; CHECK-NEXT: frsqrts v2.2d, v0.2d, v2.2d
-; CHECK-NEXT: fmul v1.2d, v1.2d, v2.2d
; CHECK-NEXT: fmul v0.2d, v0.2d, v1.2d
+; CHECK-NEXT: fmul v0.2d, v0.2d, v2.2d
; CHECK-NEXT: ret
%sqrt = call <2 x double> @llvm.sqrt.v2f64(<2 x double> %x)
%r = fdiv arcp nsz reassoc <2 x double> %x, %sqrt
@@ -513,9 +513,9 @@ define double @sqrt_fdiv_common_operand_extra_use(double %x, ptr %p) nounwind {
; CHECK-NEXT: frsqrts d2, d0, d2
; CHECK-NEXT: fmul d1, d1, d2
; CHECK-NEXT: fmul d2, d1, d1
+; CHECK-NEXT: fmul d1, d0, d1
; CHECK-NEXT: frsqrts d2, d0, d2
; CHECK-NEXT: fmul d1, d1, d2
-; CHECK-NEXT: fmul d1, d0, d1
; CHECK-NEXT: fcsel d2, d0, d1, eq
; CHECK-NEXT: fmov d0, d1
; CHECK-NEXT: str d2, [x0]
@@ -671,5 +671,4 @@ define double @sqrt_simplify_before_recip_4_uses(double %x, ptr %p1, ptr %p2, pt
ret double %sqrt_fast
}
-attributes #0 = { "unsafe-fp-math"="true" }
-attributes #1 = { "unsafe-fp-math"="true" "denormal-fp-math"="ieee" }
+attributes #0 = { "denormal-fp-math"="ieee" }
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fma-add-mul-post-legalize.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fma-add-mul-post-legalize.mir
index 789385d..b770d43 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fma-add-mul-post-legalize.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fma-add-mul-post-legalize.mir
@@ -1,12 +1,8 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx900 -run-pass=amdgpu-postlegalizer-combiner %s -o - | FileCheck -check-prefix=GFX9 %s
-# RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx900 -run-pass=amdgpu-postlegalizer-combiner -fp-contract=fast %s -o - | FileCheck -check-prefix=GFX9-CONTRACT %s
# RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx900 -run-pass=amdgpu-postlegalizer-combiner --denormal-fp-math=preserve-sign %s -o - | FileCheck -check-prefix=GFX9-DENORM %s
-# RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx900 -run-pass=amdgpu-postlegalizer-combiner -enable-unsafe-fp-math %s -o - | FileCheck -check-prefix=GFX9-UNSAFE %s
# RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1010 -run-pass=amdgpu-postlegalizer-combiner %s -o - | FileCheck -check-prefix=GFX10 %s
-# RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1010 -run-pass=amdgpu-postlegalizer-combiner -fp-contract=fast %s -o - | FileCheck -check-prefix=GFX10-CONTRACT %s
# RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1010 -run-pass=amdgpu-postlegalizer-combiner --denormal-fp-math=preserve-sign %s -o - | FileCheck -check-prefix=GFX10-DENORM %s
-# RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1010 -run-pass=amdgpu-postlegalizer-combiner -enable-unsafe-fp-math %s -o - | FileCheck -check-prefix=GFX10-UNSAFE %s
---
name: test_f32_add_mul
@@ -24,15 +20,7 @@ body: |
; GFX9-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL]], [[COPY2]]
; GFX9-NEXT: $vgpr0 = COPY [[FADD]](s32)
; GFX9-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0
- ; GFX9-CONTRACT-LABEL: name: test_f32_add_mul
- ; GFX9-CONTRACT: liveins: $vgpr0, $vgpr1, $vgpr2
- ; GFX9-CONTRACT-NEXT: {{ $}}
- ; GFX9-CONTRACT-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
- ; GFX9-CONTRACT-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
- ; GFX9-CONTRACT-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
- ; GFX9-CONTRACT-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[COPY]], [[COPY1]], [[COPY2]]
- ; GFX9-CONTRACT-NEXT: $vgpr0 = COPY [[FMA]](s32)
- ; GFX9-CONTRACT-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0
+ ;
; GFX9-DENORM-LABEL: name: test_f32_add_mul
; GFX9-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2
; GFX9-DENORM-NEXT: {{ $}}
@@ -43,15 +31,7 @@ body: |
; GFX9-DENORM-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL]], [[COPY2]]
; GFX9-DENORM-NEXT: $vgpr0 = COPY [[FADD]](s32)
; GFX9-DENORM-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0
- ; GFX9-UNSAFE-LABEL: name: test_f32_add_mul
- ; GFX9-UNSAFE: liveins: $vgpr0, $vgpr1, $vgpr2
- ; GFX9-UNSAFE-NEXT: {{ $}}
- ; GFX9-UNSAFE-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
- ; GFX9-UNSAFE-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
- ; GFX9-UNSAFE-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
- ; GFX9-UNSAFE-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[COPY]], [[COPY1]], [[COPY2]]
- ; GFX9-UNSAFE-NEXT: $vgpr0 = COPY [[FMA]](s32)
- ; GFX9-UNSAFE-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0
+ ;
; GFX10-LABEL: name: test_f32_add_mul
; GFX10: liveins: $vgpr0, $vgpr1, $vgpr2
; GFX10-NEXT: {{ $}}
@@ -62,15 +42,7 @@ body: |
; GFX10-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL]], [[COPY2]]
; GFX10-NEXT: $vgpr0 = COPY [[FADD]](s32)
; GFX10-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0
- ; GFX10-CONTRACT-LABEL: name: test_f32_add_mul
- ; GFX10-CONTRACT: liveins: $vgpr0, $vgpr1, $vgpr2
- ; GFX10-CONTRACT-NEXT: {{ $}}
- ; GFX10-CONTRACT-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
- ; GFX10-CONTRACT-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
- ; GFX10-CONTRACT-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
- ; GFX10-CONTRACT-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[COPY]], [[COPY1]], [[COPY2]]
- ; GFX10-CONTRACT-NEXT: $vgpr0 = COPY [[FMA]](s32)
- ; GFX10-CONTRACT-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0
+ ;
; GFX10-DENORM-LABEL: name: test_f32_add_mul
; GFX10-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2
; GFX10-DENORM-NEXT: {{ $}}
@@ -81,15 +53,6 @@ body: |
; GFX10-DENORM-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL]], [[COPY2]]
; GFX10-DENORM-NEXT: $vgpr0 = COPY [[FADD]](s32)
; GFX10-DENORM-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0
- ; GFX10-UNSAFE-LABEL: name: test_f32_add_mul
- ; GFX10-UNSAFE: liveins: $vgpr0, $vgpr1, $vgpr2
- ; GFX10-UNSAFE-NEXT: {{ $}}
- ; GFX10-UNSAFE-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
- ; GFX10-UNSAFE-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
- ; GFX10-UNSAFE-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
- ; GFX10-UNSAFE-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[COPY]], [[COPY1]], [[COPY2]]
- ; GFX10-UNSAFE-NEXT: $vgpr0 = COPY [[FMA]](s32)
- ; GFX10-UNSAFE-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0
%0:_(s32) = COPY $vgpr0
%1:_(s32) = COPY $vgpr1
%2:_(s32) = COPY $vgpr2
@@ -100,6 +63,60 @@ body: |
...
---
+name: test_f32_add_mul_contract
+body: |
+ bb.1.entry:
+ liveins: $vgpr0, $vgpr1, $vgpr2
+
+ ; GFX9-LABEL: name: test_f32_add_mul_contract
+ ; GFX9: liveins: $vgpr0, $vgpr1, $vgpr2
+ ; GFX9-NEXT: {{ $}}
+ ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
+ ; GFX9-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[COPY]], [[COPY1]], [[COPY2]]
+ ; GFX9-NEXT: $vgpr0 = COPY [[FMA]](s32)
+ ; GFX9-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0
+ ;
+ ; GFX9-DENORM-LABEL: name: test_f32_add_mul_contract
+ ; GFX9-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2
+ ; GFX9-DENORM-NEXT: {{ $}}
+ ; GFX9-DENORM-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; GFX9-DENORM-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; GFX9-DENORM-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
+ ; GFX9-DENORM-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[COPY]], [[COPY1]], [[COPY2]]
+ ; GFX9-DENORM-NEXT: $vgpr0 = COPY [[FMA]](s32)
+ ; GFX9-DENORM-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0
+ ;
+ ; GFX10-LABEL: name: test_f32_add_mul_contract
+ ; GFX10: liveins: $vgpr0, $vgpr1, $vgpr2
+ ; GFX10-NEXT: {{ $}}
+ ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
+ ; GFX10-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[COPY]], [[COPY1]], [[COPY2]]
+ ; GFX10-NEXT: $vgpr0 = COPY [[FMA]](s32)
+ ; GFX10-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0
+ ;
+ ; GFX10-DENORM-LABEL: name: test_f32_add_mul_contract
+ ; GFX10-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2
+ ; GFX10-DENORM-NEXT: {{ $}}
+ ; GFX10-DENORM-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; GFX10-DENORM-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; GFX10-DENORM-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
+ ; GFX10-DENORM-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[COPY]], [[COPY1]], [[COPY2]]
+ ; GFX10-DENORM-NEXT: $vgpr0 = COPY [[FMA]](s32)
+ ; GFX10-DENORM-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0
+ %0:_(s32) = COPY $vgpr0
+ %1:_(s32) = COPY $vgpr1
+ %2:_(s32) = COPY $vgpr2
+ %4:_(s32) = contract G_FMUL %0, %1
+ %5:_(s32) = contract G_FADD %4, %2
+ $vgpr0 = COPY %5(s32)
+ S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0
+...
+
+---
name: test_f32_add_mul_rhs
body: |
bb.1.entry:
@@ -115,15 +132,7 @@ body: |
; GFX9-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[COPY2]], [[FMUL]]
; GFX9-NEXT: $vgpr0 = COPY [[FADD]](s32)
; GFX9-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0
- ; GFX9-CONTRACT-LABEL: name: test_f32_add_mul_rhs
- ; GFX9-CONTRACT: liveins: $vgpr0, $vgpr1, $vgpr2
- ; GFX9-CONTRACT-NEXT: {{ $}}
- ; GFX9-CONTRACT-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
- ; GFX9-CONTRACT-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
- ; GFX9-CONTRACT-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
- ; GFX9-CONTRACT-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[COPY]], [[COPY1]], [[COPY2]]
- ; GFX9-CONTRACT-NEXT: $vgpr0 = COPY [[FMA]](s32)
- ; GFX9-CONTRACT-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0
+ ;
; GFX9-DENORM-LABEL: name: test_f32_add_mul_rhs
; GFX9-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2
; GFX9-DENORM-NEXT: {{ $}}
@@ -134,15 +143,7 @@ body: |
; GFX9-DENORM-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[COPY2]], [[FMUL]]
; GFX9-DENORM-NEXT: $vgpr0 = COPY [[FADD]](s32)
; GFX9-DENORM-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0
- ; GFX9-UNSAFE-LABEL: name: test_f32_add_mul_rhs
- ; GFX9-UNSAFE: liveins: $vgpr0, $vgpr1, $vgpr2
- ; GFX9-UNSAFE-NEXT: {{ $}}
- ; GFX9-UNSAFE-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
- ; GFX9-UNSAFE-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
- ; GFX9-UNSAFE-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
- ; GFX9-UNSAFE-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[COPY]], [[COPY1]], [[COPY2]]
- ; GFX9-UNSAFE-NEXT: $vgpr0 = COPY [[FMA]](s32)
- ; GFX9-UNSAFE-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0
+ ;
; GFX10-LABEL: name: test_f32_add_mul_rhs
; GFX10: liveins: $vgpr0, $vgpr1, $vgpr2
; GFX10-NEXT: {{ $}}
@@ -153,15 +154,7 @@ body: |
; GFX10-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[COPY2]], [[FMUL]]
; GFX10-NEXT: $vgpr0 = COPY [[FADD]](s32)
; GFX10-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0
- ; GFX10-CONTRACT-LABEL: name: test_f32_add_mul_rhs
- ; GFX10-CONTRACT: liveins: $vgpr0, $vgpr1, $vgpr2
- ; GFX10-CONTRACT-NEXT: {{ $}}
- ; GFX10-CONTRACT-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
- ; GFX10-CONTRACT-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
- ; GFX10-CONTRACT-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
- ; GFX10-CONTRACT-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[COPY]], [[COPY1]], [[COPY2]]
- ; GFX10-CONTRACT-NEXT: $vgpr0 = COPY [[FMA]](s32)
- ; GFX10-CONTRACT-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0
+ ;
; GFX10-DENORM-LABEL: name: test_f32_add_mul_rhs
; GFX10-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2
; GFX10-DENORM-NEXT: {{ $}}
@@ -172,15 +165,6 @@ body: |
; GFX10-DENORM-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[COPY2]], [[FMUL]]
; GFX10-DENORM-NEXT: $vgpr0 = COPY [[FADD]](s32)
; GFX10-DENORM-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0
- ; GFX10-UNSAFE-LABEL: name: test_f32_add_mul_rhs
- ; GFX10-UNSAFE: liveins: $vgpr0, $vgpr1, $vgpr2
- ; GFX10-UNSAFE-NEXT: {{ $}}
- ; GFX10-UNSAFE-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
- ; GFX10-UNSAFE-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
- ; GFX10-UNSAFE-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
- ; GFX10-UNSAFE-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[COPY]], [[COPY1]], [[COPY2]]
- ; GFX10-UNSAFE-NEXT: $vgpr0 = COPY [[FMA]](s32)
- ; GFX10-UNSAFE-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0
%0:_(s32) = COPY $vgpr0
%1:_(s32) = COPY $vgpr1
%2:_(s32) = COPY $vgpr2
@@ -191,6 +175,60 @@ body: |
...
---
+name: test_f32_add_mul_rhs_contract
+body: |
+ bb.1.entry:
+ liveins: $vgpr0, $vgpr1, $vgpr2
+
+ ; GFX9-LABEL: name: test_f32_add_mul_rhs_contract
+ ; GFX9: liveins: $vgpr0, $vgpr1, $vgpr2
+ ; GFX9-NEXT: {{ $}}
+ ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
+ ; GFX9-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[COPY]], [[COPY1]], [[COPY2]]
+ ; GFX9-NEXT: $vgpr0 = COPY [[FMA]](s32)
+ ; GFX9-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0
+ ;
+ ; GFX9-DENORM-LABEL: name: test_f32_add_mul_rhs_contract
+ ; GFX9-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2
+ ; GFX9-DENORM-NEXT: {{ $}}
+ ; GFX9-DENORM-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; GFX9-DENORM-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; GFX9-DENORM-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
+ ; GFX9-DENORM-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[COPY]], [[COPY1]], [[COPY2]]
+ ; GFX9-DENORM-NEXT: $vgpr0 = COPY [[FMA]](s32)
+ ; GFX9-DENORM-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0
+ ;
+ ; GFX10-LABEL: name: test_f32_add_mul_rhs_contract
+ ; GFX10: liveins: $vgpr0, $vgpr1, $vgpr2
+ ; GFX10-NEXT: {{ $}}
+ ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
+ ; GFX10-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[COPY]], [[COPY1]], [[COPY2]]
+ ; GFX10-NEXT: $vgpr0 = COPY [[FMA]](s32)
+ ; GFX10-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0
+ ;
+ ; GFX10-DENORM-LABEL: name: test_f32_add_mul_rhs_contract
+ ; GFX10-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2
+ ; GFX10-DENORM-NEXT: {{ $}}
+ ; GFX10-DENORM-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; GFX10-DENORM-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; GFX10-DENORM-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
+ ; GFX10-DENORM-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[COPY]], [[COPY1]], [[COPY2]]
+ ; GFX10-DENORM-NEXT: $vgpr0 = COPY [[FMA]](s32)
+ ; GFX10-DENORM-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0
+ %0:_(s32) = COPY $vgpr0
+ %1:_(s32) = COPY $vgpr1
+ %2:_(s32) = COPY $vgpr2
+ %4:_(s32) = contract G_FMUL %0, %1
+ %5:_(s32) = contract G_FADD %2, %4
+ $vgpr0 = COPY %5(s32)
+ S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0
+...
+
+---
name: test_add_mul_multiple_defs_z
body: |
bb.1.entry:
@@ -209,18 +247,7 @@ body: |
; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>)
; GFX9-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL]], [[UV1]]
; GFX9-NEXT: $vgpr0 = COPY [[FADD]](s32)
- ; GFX9-CONTRACT-LABEL: name: test_add_mul_multiple_defs_z
- ; GFX9-CONTRACT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
- ; GFX9-CONTRACT-NEXT: {{ $}}
- ; GFX9-CONTRACT-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
- ; GFX9-CONTRACT-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
- ; GFX9-CONTRACT-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
- ; GFX9-CONTRACT-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
- ; GFX9-CONTRACT-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32)
- ; GFX9-CONTRACT-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[MV]](p1) :: (load (<2 x s32>), addrspace 1)
- ; GFX9-CONTRACT-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>)
- ; GFX9-CONTRACT-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[COPY]], [[COPY1]], [[UV1]]
- ; GFX9-CONTRACT-NEXT: $vgpr0 = COPY [[FMA]](s32)
+ ;
; GFX9-DENORM-LABEL: name: test_add_mul_multiple_defs_z
; GFX9-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
; GFX9-DENORM-NEXT: {{ $}}
@@ -234,18 +261,7 @@ body: |
; GFX9-DENORM-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>)
; GFX9-DENORM-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL]], [[UV1]]
; GFX9-DENORM-NEXT: $vgpr0 = COPY [[FADD]](s32)
- ; GFX9-UNSAFE-LABEL: name: test_add_mul_multiple_defs_z
- ; GFX9-UNSAFE: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
- ; GFX9-UNSAFE-NEXT: {{ $}}
- ; GFX9-UNSAFE-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
- ; GFX9-UNSAFE-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
- ; GFX9-UNSAFE-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
- ; GFX9-UNSAFE-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
- ; GFX9-UNSAFE-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32)
- ; GFX9-UNSAFE-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[MV]](p1) :: (load (<2 x s32>), addrspace 1)
- ; GFX9-UNSAFE-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>)
- ; GFX9-UNSAFE-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[COPY]], [[COPY1]], [[UV1]]
- ; GFX9-UNSAFE-NEXT: $vgpr0 = COPY [[FMA]](s32)
+ ;
; GFX10-LABEL: name: test_add_mul_multiple_defs_z
; GFX10: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
; GFX10-NEXT: {{ $}}
@@ -259,18 +275,7 @@ body: |
; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>)
; GFX10-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL]], [[UV1]]
; GFX10-NEXT: $vgpr0 = COPY [[FADD]](s32)
- ; GFX10-CONTRACT-LABEL: name: test_add_mul_multiple_defs_z
- ; GFX10-CONTRACT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
- ; GFX10-CONTRACT-NEXT: {{ $}}
- ; GFX10-CONTRACT-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
- ; GFX10-CONTRACT-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
- ; GFX10-CONTRACT-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
- ; GFX10-CONTRACT-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
- ; GFX10-CONTRACT-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32)
- ; GFX10-CONTRACT-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[MV]](p1) :: (load (<2 x s32>), addrspace 1)
- ; GFX10-CONTRACT-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>)
- ; GFX10-CONTRACT-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[COPY]], [[COPY1]], [[UV1]]
- ; GFX10-CONTRACT-NEXT: $vgpr0 = COPY [[FMA]](s32)
+ ;
; GFX10-DENORM-LABEL: name: test_add_mul_multiple_defs_z
; GFX10-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
; GFX10-DENORM-NEXT: {{ $}}
@@ -284,18 +289,6 @@ body: |
; GFX10-DENORM-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>)
; GFX10-DENORM-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL]], [[UV1]]
; GFX10-DENORM-NEXT: $vgpr0 = COPY [[FADD]](s32)
- ; GFX10-UNSAFE-LABEL: name: test_add_mul_multiple_defs_z
- ; GFX10-UNSAFE: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
- ; GFX10-UNSAFE-NEXT: {{ $}}
- ; GFX10-UNSAFE-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
- ; GFX10-UNSAFE-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
- ; GFX10-UNSAFE-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
- ; GFX10-UNSAFE-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
- ; GFX10-UNSAFE-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32)
- ; GFX10-UNSAFE-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[MV]](p1) :: (load (<2 x s32>), addrspace 1)
- ; GFX10-UNSAFE-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>)
- ; GFX10-UNSAFE-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[COPY]], [[COPY1]], [[UV1]]
- ; GFX10-UNSAFE-NEXT: $vgpr0 = COPY [[FMA]](s32)
%0:_(s32) = COPY $vgpr0
%1:_(s32) = COPY $vgpr1
%4:_(s32) = COPY $vgpr2
@@ -310,6 +303,76 @@ body: |
...
---
+name: test_add_mul_multiple_defs_z_contract
+body: |
+ bb.1.entry:
+ liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
+
+ ; GFX9-LABEL: name: test_add_mul_multiple_defs_z_contract
+ ; GFX9: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
+ ; GFX9-NEXT: {{ $}}
+ ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
+ ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
+ ; GFX9-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32)
+ ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[MV]](p1) :: (load (<2 x s32>), addrspace 1)
+ ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>)
+ ; GFX9-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[COPY]], [[COPY1]], [[UV1]]
+ ; GFX9-NEXT: $vgpr0 = COPY [[FMA]](s32)
+ ;
+ ; GFX9-DENORM-LABEL: name: test_add_mul_multiple_defs_z_contract
+ ; GFX9-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
+ ; GFX9-DENORM-NEXT: {{ $}}
+ ; GFX9-DENORM-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; GFX9-DENORM-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; GFX9-DENORM-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
+ ; GFX9-DENORM-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
+ ; GFX9-DENORM-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32)
+ ; GFX9-DENORM-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[MV]](p1) :: (load (<2 x s32>), addrspace 1)
+ ; GFX9-DENORM-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>)
+ ; GFX9-DENORM-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[COPY]], [[COPY1]], [[UV1]]
+ ; GFX9-DENORM-NEXT: $vgpr0 = COPY [[FMA]](s32)
+ ;
+ ; GFX10-LABEL: name: test_add_mul_multiple_defs_z_contract
+ ; GFX10: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
+ ; GFX10-NEXT: {{ $}}
+ ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
+ ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
+ ; GFX10-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32)
+ ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[MV]](p1) :: (load (<2 x s32>), addrspace 1)
+ ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>)
+ ; GFX10-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[COPY]], [[COPY1]], [[UV1]]
+ ; GFX10-NEXT: $vgpr0 = COPY [[FMA]](s32)
+ ;
+ ; GFX10-DENORM-LABEL: name: test_add_mul_multiple_defs_z_contract
+ ; GFX10-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
+ ; GFX10-DENORM-NEXT: {{ $}}
+ ; GFX10-DENORM-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; GFX10-DENORM-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; GFX10-DENORM-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
+ ; GFX10-DENORM-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
+ ; GFX10-DENORM-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32)
+ ; GFX10-DENORM-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[MV]](p1) :: (load (<2 x s32>), addrspace 1)
+ ; GFX10-DENORM-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>)
+ ; GFX10-DENORM-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[COPY]], [[COPY1]], [[UV1]]
+ ; GFX10-DENORM-NEXT: $vgpr0 = COPY [[FMA]](s32)
+ %0:_(s32) = COPY $vgpr0
+ %1:_(s32) = COPY $vgpr1
+ %4:_(s32) = COPY $vgpr2
+ %5:_(s32) = COPY $vgpr3
+ %2:_(p1) = G_MERGE_VALUES %4(s32), %5(s32)
+ %6:_(s32) = contract G_FMUL %0, %1
+ %7:_(<2 x s32>) = G_LOAD %2(p1) :: (load (<2 x s32>), addrspace 1)
+ %12:_(s32), %13:_(s32) = G_UNMERGE_VALUES %7(<2 x s32>)
+ %8:_(s32) = COPY %13(s32)
+ %10:_(s32) = contract G_FADD %6, %8
+ $vgpr0 = COPY %10(s32)
+...
+
+---
name: test_add_mul_rhs_multiple_defs_z
body: |
bb.1.entry:
@@ -328,18 +391,7 @@ body: |
; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>)
; GFX9-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[UV1]], [[FMUL]]
; GFX9-NEXT: $vgpr0 = COPY [[FADD]](s32)
- ; GFX9-CONTRACT-LABEL: name: test_add_mul_rhs_multiple_defs_z
- ; GFX9-CONTRACT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
- ; GFX9-CONTRACT-NEXT: {{ $}}
- ; GFX9-CONTRACT-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
- ; GFX9-CONTRACT-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
- ; GFX9-CONTRACT-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
- ; GFX9-CONTRACT-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
- ; GFX9-CONTRACT-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32)
- ; GFX9-CONTRACT-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[MV]](p1) :: (load (<2 x s32>), addrspace 1)
- ; GFX9-CONTRACT-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>)
- ; GFX9-CONTRACT-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[COPY]], [[COPY1]], [[UV1]]
- ; GFX9-CONTRACT-NEXT: $vgpr0 = COPY [[FMA]](s32)
+ ;
; GFX9-DENORM-LABEL: name: test_add_mul_rhs_multiple_defs_z
; GFX9-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
; GFX9-DENORM-NEXT: {{ $}}
@@ -353,18 +405,7 @@ body: |
; GFX9-DENORM-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>)
; GFX9-DENORM-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[UV1]], [[FMUL]]
; GFX9-DENORM-NEXT: $vgpr0 = COPY [[FADD]](s32)
- ; GFX9-UNSAFE-LABEL: name: test_add_mul_rhs_multiple_defs_z
- ; GFX9-UNSAFE: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
- ; GFX9-UNSAFE-NEXT: {{ $}}
- ; GFX9-UNSAFE-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
- ; GFX9-UNSAFE-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
- ; GFX9-UNSAFE-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
- ; GFX9-UNSAFE-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
- ; GFX9-UNSAFE-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32)
- ; GFX9-UNSAFE-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[MV]](p1) :: (load (<2 x s32>), addrspace 1)
- ; GFX9-UNSAFE-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>)
- ; GFX9-UNSAFE-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[COPY]], [[COPY1]], [[UV1]]
- ; GFX9-UNSAFE-NEXT: $vgpr0 = COPY [[FMA]](s32)
+ ;
; GFX10-LABEL: name: test_add_mul_rhs_multiple_defs_z
; GFX10: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
; GFX10-NEXT: {{ $}}
@@ -378,18 +419,7 @@ body: |
; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>)
; GFX10-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[UV1]], [[FMUL]]
; GFX10-NEXT: $vgpr0 = COPY [[FADD]](s32)
- ; GFX10-CONTRACT-LABEL: name: test_add_mul_rhs_multiple_defs_z
- ; GFX10-CONTRACT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
- ; GFX10-CONTRACT-NEXT: {{ $}}
- ; GFX10-CONTRACT-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
- ; GFX10-CONTRACT-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
- ; GFX10-CONTRACT-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
- ; GFX10-CONTRACT-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
- ; GFX10-CONTRACT-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32)
- ; GFX10-CONTRACT-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[MV]](p1) :: (load (<2 x s32>), addrspace 1)
- ; GFX10-CONTRACT-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>)
- ; GFX10-CONTRACT-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[COPY]], [[COPY1]], [[UV1]]
- ; GFX10-CONTRACT-NEXT: $vgpr0 = COPY [[FMA]](s32)
+ ;
; GFX10-DENORM-LABEL: name: test_add_mul_rhs_multiple_defs_z
; GFX10-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
; GFX10-DENORM-NEXT: {{ $}}
@@ -403,18 +433,6 @@ body: |
; GFX10-DENORM-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>)
; GFX10-DENORM-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[UV1]], [[FMUL]]
; GFX10-DENORM-NEXT: $vgpr0 = COPY [[FADD]](s32)
- ; GFX10-UNSAFE-LABEL: name: test_add_mul_rhs_multiple_defs_z
- ; GFX10-UNSAFE: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
- ; GFX10-UNSAFE-NEXT: {{ $}}
- ; GFX10-UNSAFE-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
- ; GFX10-UNSAFE-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
- ; GFX10-UNSAFE-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
- ; GFX10-UNSAFE-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
- ; GFX10-UNSAFE-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32)
- ; GFX10-UNSAFE-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[MV]](p1) :: (load (<2 x s32>), addrspace 1)
- ; GFX10-UNSAFE-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>)
- ; GFX10-UNSAFE-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[COPY]], [[COPY1]], [[UV1]]
- ; GFX10-UNSAFE-NEXT: $vgpr0 = COPY [[FMA]](s32)
%0:_(s32) = COPY $vgpr0
%1:_(s32) = COPY $vgpr1
%4:_(s32) = COPY $vgpr2
@@ -429,6 +447,76 @@ body: |
...
---
+name: test_add_mul_rhs_multiple_defs_z_contract
+body: |
+ bb.1.entry:
+ liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
+
+ ; GFX9-LABEL: name: test_add_mul_rhs_multiple_defs_z_contract
+ ; GFX9: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
+ ; GFX9-NEXT: {{ $}}
+ ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
+ ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
+ ; GFX9-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32)
+ ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[MV]](p1) :: (load (<2 x s32>), addrspace 1)
+ ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>)
+ ; GFX9-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[COPY]], [[COPY1]], [[UV1]]
+ ; GFX9-NEXT: $vgpr0 = COPY [[FMA]](s32)
+ ;
+ ; GFX9-DENORM-LABEL: name: test_add_mul_rhs_multiple_defs_z_contract
+ ; GFX9-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
+ ; GFX9-DENORM-NEXT: {{ $}}
+ ; GFX9-DENORM-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; GFX9-DENORM-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; GFX9-DENORM-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
+ ; GFX9-DENORM-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
+ ; GFX9-DENORM-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32)
+ ; GFX9-DENORM-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[MV]](p1) :: (load (<2 x s32>), addrspace 1)
+ ; GFX9-DENORM-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>)
+ ; GFX9-DENORM-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[COPY]], [[COPY1]], [[UV1]]
+ ; GFX9-DENORM-NEXT: $vgpr0 = COPY [[FMA]](s32)
+ ;
+ ; GFX10-LABEL: name: test_add_mul_rhs_multiple_defs_z_contract
+ ; GFX10: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
+ ; GFX10-NEXT: {{ $}}
+ ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
+ ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
+ ; GFX10-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32)
+ ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[MV]](p1) :: (load (<2 x s32>), addrspace 1)
+ ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>)
+ ; GFX10-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[COPY]], [[COPY1]], [[UV1]]
+ ; GFX10-NEXT: $vgpr0 = COPY [[FMA]](s32)
+ ;
+ ; GFX10-DENORM-LABEL: name: test_add_mul_rhs_multiple_defs_z_contract
+ ; GFX10-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3
+ ; GFX10-DENORM-NEXT: {{ $}}
+ ; GFX10-DENORM-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; GFX10-DENORM-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; GFX10-DENORM-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
+ ; GFX10-DENORM-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
+ ; GFX10-DENORM-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32)
+ ; GFX10-DENORM-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[MV]](p1) :: (load (<2 x s32>), addrspace 1)
+ ; GFX10-DENORM-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>)
+ ; GFX10-DENORM-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[COPY]], [[COPY1]], [[UV1]]
+ ; GFX10-DENORM-NEXT: $vgpr0 = COPY [[FMA]](s32)
+ %0:_(s32) = COPY $vgpr0
+ %1:_(s32) = COPY $vgpr1
+ %4:_(s32) = COPY $vgpr2
+ %5:_(s32) = COPY $vgpr3
+ %2:_(p1) = G_MERGE_VALUES %4(s32), %5(s32)
+ %6:_(s32) = contract G_FMUL %0, %1
+ %7:_(<2 x s32>) = G_LOAD %2(p1) :: (load (<2 x s32>), addrspace 1)
+ %12:_(s32), %13:_(s32) = G_UNMERGE_VALUES %7(<2 x s32>)
+ %8:_(s32) = COPY %13(s32)
+ %10:_(s32) = contract G_FADD %8, %6
+ $vgpr0 = COPY %10(s32)
+...
+
+---
name: test_half_add_mul
body: |
bb.1.entry:
@@ -448,19 +536,7 @@ body: |
; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FADD]](s16)
; GFX9-NEXT: $vgpr0 = COPY [[ANYEXT]](s32)
; GFX9-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0
- ; GFX9-CONTRACT-LABEL: name: test_half_add_mul
- ; GFX9-CONTRACT: liveins: $vgpr0, $vgpr1, $vgpr2
- ; GFX9-CONTRACT-NEXT: {{ $}}
- ; GFX9-CONTRACT-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
- ; GFX9-CONTRACT-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
- ; GFX9-CONTRACT-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
- ; GFX9-CONTRACT-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32)
- ; GFX9-CONTRACT-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
- ; GFX9-CONTRACT-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32)
- ; GFX9-CONTRACT-NEXT: [[FMA:%[0-9]+]]:_(s16) = G_FMA [[TRUNC]], [[TRUNC1]], [[TRUNC2]]
- ; GFX9-CONTRACT-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FMA]](s16)
- ; GFX9-CONTRACT-NEXT: $vgpr0 = COPY [[ANYEXT]](s32)
- ; GFX9-CONTRACT-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0
+ ;
; GFX9-DENORM-LABEL: name: test_half_add_mul
; GFX9-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2
; GFX9-DENORM-NEXT: {{ $}}
@@ -475,19 +551,7 @@ body: |
; GFX9-DENORM-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FADD]](s16)
; GFX9-DENORM-NEXT: $vgpr0 = COPY [[ANYEXT]](s32)
; GFX9-DENORM-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0
- ; GFX9-UNSAFE-LABEL: name: test_half_add_mul
- ; GFX9-UNSAFE: liveins: $vgpr0, $vgpr1, $vgpr2
- ; GFX9-UNSAFE-NEXT: {{ $}}
- ; GFX9-UNSAFE-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
- ; GFX9-UNSAFE-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
- ; GFX9-UNSAFE-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
- ; GFX9-UNSAFE-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32)
- ; GFX9-UNSAFE-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
- ; GFX9-UNSAFE-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32)
- ; GFX9-UNSAFE-NEXT: [[FMA:%[0-9]+]]:_(s16) = G_FMA [[TRUNC]], [[TRUNC1]], [[TRUNC2]]
- ; GFX9-UNSAFE-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FMA]](s16)
- ; GFX9-UNSAFE-NEXT: $vgpr0 = COPY [[ANYEXT]](s32)
- ; GFX9-UNSAFE-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0
+ ;
; GFX10-LABEL: name: test_half_add_mul
; GFX10: liveins: $vgpr0, $vgpr1, $vgpr2
; GFX10-NEXT: {{ $}}
@@ -502,19 +566,7 @@ body: |
; GFX10-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FADD]](s16)
; GFX10-NEXT: $vgpr0 = COPY [[ANYEXT]](s32)
; GFX10-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0
- ; GFX10-CONTRACT-LABEL: name: test_half_add_mul
- ; GFX10-CONTRACT: liveins: $vgpr0, $vgpr1, $vgpr2
- ; GFX10-CONTRACT-NEXT: {{ $}}
- ; GFX10-CONTRACT-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
- ; GFX10-CONTRACT-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
- ; GFX10-CONTRACT-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
- ; GFX10-CONTRACT-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32)
- ; GFX10-CONTRACT-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
- ; GFX10-CONTRACT-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32)
- ; GFX10-CONTRACT-NEXT: [[FMA:%[0-9]+]]:_(s16) = G_FMA [[TRUNC]], [[TRUNC1]], [[TRUNC2]]
- ; GFX10-CONTRACT-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FMA]](s16)
- ; GFX10-CONTRACT-NEXT: $vgpr0 = COPY [[ANYEXT]](s32)
- ; GFX10-CONTRACT-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0
+ ;
; GFX10-DENORM-LABEL: name: test_half_add_mul
; GFX10-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2
; GFX10-DENORM-NEXT: {{ $}}
@@ -529,19 +581,6 @@ body: |
; GFX10-DENORM-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FADD]](s16)
; GFX10-DENORM-NEXT: $vgpr0 = COPY [[ANYEXT]](s32)
; GFX10-DENORM-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0
- ; GFX10-UNSAFE-LABEL: name: test_half_add_mul
- ; GFX10-UNSAFE: liveins: $vgpr0, $vgpr1, $vgpr2
- ; GFX10-UNSAFE-NEXT: {{ $}}
- ; GFX10-UNSAFE-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
- ; GFX10-UNSAFE-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
- ; GFX10-UNSAFE-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
- ; GFX10-UNSAFE-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32)
- ; GFX10-UNSAFE-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
- ; GFX10-UNSAFE-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32)
- ; GFX10-UNSAFE-NEXT: [[FMA:%[0-9]+]]:_(s16) = G_FMA [[TRUNC]], [[TRUNC1]], [[TRUNC2]]
- ; GFX10-UNSAFE-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FMA]](s16)
- ; GFX10-UNSAFE-NEXT: $vgpr0 = COPY [[ANYEXT]](s32)
- ; GFX10-UNSAFE-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0
%4:_(s32) = COPY $vgpr0
%0:_(s16) = G_TRUNC %4(s32)
%5:_(s32) = COPY $vgpr1
@@ -556,6 +595,80 @@ body: |
...
---
+name: test_half_add_mul_contract
+body: |
+ bb.1.entry:
+ liveins: $vgpr0, $vgpr1, $vgpr2
+
+ ; GFX9-LABEL: name: test_half_add_mul_contract
+ ; GFX9: liveins: $vgpr0, $vgpr1, $vgpr2
+ ; GFX9-NEXT: {{ $}}
+ ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
+ ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32)
+ ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
+ ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32)
+ ; GFX9-NEXT: [[FMA:%[0-9]+]]:_(s16) = G_FMA [[TRUNC]], [[TRUNC1]], [[TRUNC2]]
+ ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FMA]](s16)
+ ; GFX9-NEXT: $vgpr0 = COPY [[ANYEXT]](s32)
+ ; GFX9-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0
+ ;
+ ; GFX9-DENORM-LABEL: name: test_half_add_mul_contract
+ ; GFX9-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2
+ ; GFX9-DENORM-NEXT: {{ $}}
+ ; GFX9-DENORM-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; GFX9-DENORM-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
+ ; GFX9-DENORM-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; GFX9-DENORM-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32)
+ ; GFX9-DENORM-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
+ ; GFX9-DENORM-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32)
+ ; GFX9-DENORM-NEXT: [[FMA:%[0-9]+]]:_(s16) = G_FMA [[TRUNC]], [[TRUNC1]], [[TRUNC2]]
+ ; GFX9-DENORM-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FMA]](s16)
+ ; GFX9-DENORM-NEXT: $vgpr0 = COPY [[ANYEXT]](s32)
+ ; GFX9-DENORM-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0
+ ;
+ ; GFX10-LABEL: name: test_half_add_mul_contract
+ ; GFX10: liveins: $vgpr0, $vgpr1, $vgpr2
+ ; GFX10-NEXT: {{ $}}
+ ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
+ ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32)
+ ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
+ ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32)
+ ; GFX10-NEXT: [[FMA:%[0-9]+]]:_(s16) = G_FMA [[TRUNC]], [[TRUNC1]], [[TRUNC2]]
+ ; GFX10-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FMA]](s16)
+ ; GFX10-NEXT: $vgpr0 = COPY [[ANYEXT]](s32)
+ ; GFX10-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0
+ ;
+ ; GFX10-DENORM-LABEL: name: test_half_add_mul_contract
+ ; GFX10-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2
+ ; GFX10-DENORM-NEXT: {{ $}}
+ ; GFX10-DENORM-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; GFX10-DENORM-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
+ ; GFX10-DENORM-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; GFX10-DENORM-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32)
+ ; GFX10-DENORM-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
+ ; GFX10-DENORM-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32)
+ ; GFX10-DENORM-NEXT: [[FMA:%[0-9]+]]:_(s16) = G_FMA [[TRUNC]], [[TRUNC1]], [[TRUNC2]]
+ ; GFX10-DENORM-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FMA]](s16)
+ ; GFX10-DENORM-NEXT: $vgpr0 = COPY [[ANYEXT]](s32)
+ ; GFX10-DENORM-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0
+ %4:_(s32) = COPY $vgpr0
+ %0:_(s16) = G_TRUNC %4(s32)
+ %5:_(s32) = COPY $vgpr1
+ %1:_(s16) = G_TRUNC %5(s32)
+ %6:_(s32) = COPY $vgpr2
+ %2:_(s16) = G_TRUNC %6(s32)
+ %7:_(s16) = contract G_FMUL %0, %1
+ %8:_(s16) = contract G_FADD %7, %2
+ %10:_(s32) = G_ANYEXT %8(s16)
+ $vgpr0 = COPY %10(s32)
+ S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0
+...
+
+---
name: test_half_add_mul_rhs
body: |
bb.1.entry:
@@ -575,19 +688,7 @@ body: |
; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FADD]](s16)
; GFX9-NEXT: $vgpr0 = COPY [[ANYEXT]](s32)
; GFX9-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0
- ; GFX9-CONTRACT-LABEL: name: test_half_add_mul_rhs
- ; GFX9-CONTRACT: liveins: $vgpr0, $vgpr1, $vgpr2
- ; GFX9-CONTRACT-NEXT: {{ $}}
- ; GFX9-CONTRACT-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
- ; GFX9-CONTRACT-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
- ; GFX9-CONTRACT-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
- ; GFX9-CONTRACT-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32)
- ; GFX9-CONTRACT-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
- ; GFX9-CONTRACT-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32)
- ; GFX9-CONTRACT-NEXT: [[FMA:%[0-9]+]]:_(s16) = G_FMA [[TRUNC]], [[TRUNC1]], [[TRUNC2]]
- ; GFX9-CONTRACT-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FMA]](s16)
- ; GFX9-CONTRACT-NEXT: $vgpr0 = COPY [[ANYEXT]](s32)
- ; GFX9-CONTRACT-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0
+ ;
; GFX9-DENORM-LABEL: name: test_half_add_mul_rhs
; GFX9-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2
; GFX9-DENORM-NEXT: {{ $}}
@@ -602,19 +703,7 @@ body: |
; GFX9-DENORM-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FADD]](s16)
; GFX9-DENORM-NEXT: $vgpr0 = COPY [[ANYEXT]](s32)
; GFX9-DENORM-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0
- ; GFX9-UNSAFE-LABEL: name: test_half_add_mul_rhs
- ; GFX9-UNSAFE: liveins: $vgpr0, $vgpr1, $vgpr2
- ; GFX9-UNSAFE-NEXT: {{ $}}
- ; GFX9-UNSAFE-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
- ; GFX9-UNSAFE-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
- ; GFX9-UNSAFE-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
- ; GFX9-UNSAFE-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32)
- ; GFX9-UNSAFE-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
- ; GFX9-UNSAFE-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32)
- ; GFX9-UNSAFE-NEXT: [[FMA:%[0-9]+]]:_(s16) = G_FMA [[TRUNC]], [[TRUNC1]], [[TRUNC2]]
- ; GFX9-UNSAFE-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FMA]](s16)
- ; GFX9-UNSAFE-NEXT: $vgpr0 = COPY [[ANYEXT]](s32)
- ; GFX9-UNSAFE-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0
+ ;
; GFX10-LABEL: name: test_half_add_mul_rhs
; GFX10: liveins: $vgpr0, $vgpr1, $vgpr2
; GFX10-NEXT: {{ $}}
@@ -629,19 +718,7 @@ body: |
; GFX10-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FADD]](s16)
; GFX10-NEXT: $vgpr0 = COPY [[ANYEXT]](s32)
; GFX10-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0
- ; GFX10-CONTRACT-LABEL: name: test_half_add_mul_rhs
- ; GFX10-CONTRACT: liveins: $vgpr0, $vgpr1, $vgpr2
- ; GFX10-CONTRACT-NEXT: {{ $}}
- ; GFX10-CONTRACT-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
- ; GFX10-CONTRACT-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
- ; GFX10-CONTRACT-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
- ; GFX10-CONTRACT-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32)
- ; GFX10-CONTRACT-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
- ; GFX10-CONTRACT-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32)
- ; GFX10-CONTRACT-NEXT: [[FMA:%[0-9]+]]:_(s16) = G_FMA [[TRUNC]], [[TRUNC1]], [[TRUNC2]]
- ; GFX10-CONTRACT-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FMA]](s16)
- ; GFX10-CONTRACT-NEXT: $vgpr0 = COPY [[ANYEXT]](s32)
- ; GFX10-CONTRACT-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0
+ ;
; GFX10-DENORM-LABEL: name: test_half_add_mul_rhs
; GFX10-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2
; GFX10-DENORM-NEXT: {{ $}}
@@ -656,19 +733,6 @@ body: |
; GFX10-DENORM-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FADD]](s16)
; GFX10-DENORM-NEXT: $vgpr0 = COPY [[ANYEXT]](s32)
; GFX10-DENORM-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0
- ; GFX10-UNSAFE-LABEL: name: test_half_add_mul_rhs
- ; GFX10-UNSAFE: liveins: $vgpr0, $vgpr1, $vgpr2
- ; GFX10-UNSAFE-NEXT: {{ $}}
- ; GFX10-UNSAFE-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
- ; GFX10-UNSAFE-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
- ; GFX10-UNSAFE-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
- ; GFX10-UNSAFE-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32)
- ; GFX10-UNSAFE-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
- ; GFX10-UNSAFE-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32)
- ; GFX10-UNSAFE-NEXT: [[FMA:%[0-9]+]]:_(s16) = G_FMA [[TRUNC]], [[TRUNC1]], [[TRUNC2]]
- ; GFX10-UNSAFE-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FMA]](s16)
- ; GFX10-UNSAFE-NEXT: $vgpr0 = COPY [[ANYEXT]](s32)
- ; GFX10-UNSAFE-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0
%4:_(s32) = COPY $vgpr0
%0:_(s16) = G_TRUNC %4(s32)
%5:_(s32) = COPY $vgpr1
@@ -683,6 +747,80 @@ body: |
...
---
+name: test_half_add_mul_rhs_contract
+body: |
+ bb.1.entry:
+ liveins: $vgpr0, $vgpr1, $vgpr2
+
+ ; GFX9-LABEL: name: test_half_add_mul_rhs_contract
+ ; GFX9: liveins: $vgpr0, $vgpr1, $vgpr2
+ ; GFX9-NEXT: {{ $}}
+ ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
+ ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32)
+ ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
+ ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32)
+ ; GFX9-NEXT: [[FMA:%[0-9]+]]:_(s16) = G_FMA [[TRUNC]], [[TRUNC1]], [[TRUNC2]]
+ ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FMA]](s16)
+ ; GFX9-NEXT: $vgpr0 = COPY [[ANYEXT]](s32)
+ ; GFX9-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0
+ ;
+ ; GFX9-DENORM-LABEL: name: test_half_add_mul_rhs_contract
+ ; GFX9-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2
+ ; GFX9-DENORM-NEXT: {{ $}}
+ ; GFX9-DENORM-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; GFX9-DENORM-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
+ ; GFX9-DENORM-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; GFX9-DENORM-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32)
+ ; GFX9-DENORM-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
+ ; GFX9-DENORM-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32)
+ ; GFX9-DENORM-NEXT: [[FMA:%[0-9]+]]:_(s16) = G_FMA [[TRUNC]], [[TRUNC1]], [[TRUNC2]]
+ ; GFX9-DENORM-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FMA]](s16)
+ ; GFX9-DENORM-NEXT: $vgpr0 = COPY [[ANYEXT]](s32)
+ ; GFX9-DENORM-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0
+ ;
+ ; GFX10-LABEL: name: test_half_add_mul_rhs_contract
+ ; GFX10: liveins: $vgpr0, $vgpr1, $vgpr2
+ ; GFX10-NEXT: {{ $}}
+ ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
+ ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32)
+ ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
+ ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32)
+ ; GFX10-NEXT: [[FMA:%[0-9]+]]:_(s16) = G_FMA [[TRUNC]], [[TRUNC1]], [[TRUNC2]]
+ ; GFX10-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FMA]](s16)
+ ; GFX10-NEXT: $vgpr0 = COPY [[ANYEXT]](s32)
+ ; GFX10-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0
+ ;
+ ; GFX10-DENORM-LABEL: name: test_half_add_mul_rhs_contract
+ ; GFX10-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2
+ ; GFX10-DENORM-NEXT: {{ $}}
+ ; GFX10-DENORM-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; GFX10-DENORM-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
+ ; GFX10-DENORM-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; GFX10-DENORM-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32)
+ ; GFX10-DENORM-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
+ ; GFX10-DENORM-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32)
+ ; GFX10-DENORM-NEXT: [[FMA:%[0-9]+]]:_(s16) = G_FMA [[TRUNC]], [[TRUNC1]], [[TRUNC2]]
+ ; GFX10-DENORM-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FMA]](s16)
+ ; GFX10-DENORM-NEXT: $vgpr0 = COPY [[ANYEXT]](s32)
+ ; GFX10-DENORM-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0
+ %4:_(s32) = COPY $vgpr0
+ %0:_(s16) = G_TRUNC %4(s32)
+ %5:_(s32) = COPY $vgpr1
+ %1:_(s16) = G_TRUNC %5(s32)
+ %6:_(s32) = COPY $vgpr2
+ %2:_(s16) = G_TRUNC %6(s32)
+ %7:_(s16) = contract G_FMUL %0, %1
+ %8:_(s16) = contract G_FADD %2, %7
+ %10:_(s32) = G_ANYEXT %8(s16)
+ $vgpr0 = COPY %10(s32)
+ S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0
+...
+
+---
name: test_double_add_mul
body: |
bb.1.entry:
@@ -706,23 +844,7 @@ body: |
; GFX9-NEXT: $vgpr0 = COPY [[UV]](s32)
; GFX9-NEXT: $vgpr1 = COPY [[UV1]](s32)
; GFX9-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1
- ; GFX9-CONTRACT-LABEL: name: test_double_add_mul
- ; GFX9-CONTRACT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5
- ; GFX9-CONTRACT-NEXT: {{ $}}
- ; GFX9-CONTRACT-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
- ; GFX9-CONTRACT-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
- ; GFX9-CONTRACT-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32)
- ; GFX9-CONTRACT-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
- ; GFX9-CONTRACT-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
- ; GFX9-CONTRACT-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32)
- ; GFX9-CONTRACT-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4
- ; GFX9-CONTRACT-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5
- ; GFX9-CONTRACT-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32)
- ; GFX9-CONTRACT-NEXT: [[FMA:%[0-9]+]]:_(s64) = G_FMA [[MV]], [[MV1]], [[MV2]]
- ; GFX9-CONTRACT-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMA]](s64)
- ; GFX9-CONTRACT-NEXT: $vgpr0 = COPY [[UV]](s32)
- ; GFX9-CONTRACT-NEXT: $vgpr1 = COPY [[UV1]](s32)
- ; GFX9-CONTRACT-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1
+ ;
; GFX9-DENORM-LABEL: name: test_double_add_mul
; GFX9-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5
; GFX9-DENORM-NEXT: {{ $}}
@@ -741,23 +863,7 @@ body: |
; GFX9-DENORM-NEXT: $vgpr0 = COPY [[UV]](s32)
; GFX9-DENORM-NEXT: $vgpr1 = COPY [[UV1]](s32)
; GFX9-DENORM-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1
- ; GFX9-UNSAFE-LABEL: name: test_double_add_mul
- ; GFX9-UNSAFE: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5
- ; GFX9-UNSAFE-NEXT: {{ $}}
- ; GFX9-UNSAFE-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
- ; GFX9-UNSAFE-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
- ; GFX9-UNSAFE-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32)
- ; GFX9-UNSAFE-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
- ; GFX9-UNSAFE-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
- ; GFX9-UNSAFE-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32)
- ; GFX9-UNSAFE-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4
- ; GFX9-UNSAFE-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5
- ; GFX9-UNSAFE-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32)
- ; GFX9-UNSAFE-NEXT: [[FMA:%[0-9]+]]:_(s64) = G_FMA [[MV]], [[MV1]], [[MV2]]
- ; GFX9-UNSAFE-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMA]](s64)
- ; GFX9-UNSAFE-NEXT: $vgpr0 = COPY [[UV]](s32)
- ; GFX9-UNSAFE-NEXT: $vgpr1 = COPY [[UV1]](s32)
- ; GFX9-UNSAFE-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1
+ ;
; GFX10-LABEL: name: test_double_add_mul
; GFX10: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5
; GFX10-NEXT: {{ $}}
@@ -776,23 +882,7 @@ body: |
; GFX10-NEXT: $vgpr0 = COPY [[UV]](s32)
; GFX10-NEXT: $vgpr1 = COPY [[UV1]](s32)
; GFX10-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1
- ; GFX10-CONTRACT-LABEL: name: test_double_add_mul
- ; GFX10-CONTRACT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5
- ; GFX10-CONTRACT-NEXT: {{ $}}
- ; GFX10-CONTRACT-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
- ; GFX10-CONTRACT-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
- ; GFX10-CONTRACT-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32)
- ; GFX10-CONTRACT-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
- ; GFX10-CONTRACT-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
- ; GFX10-CONTRACT-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32)
- ; GFX10-CONTRACT-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4
- ; GFX10-CONTRACT-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5
- ; GFX10-CONTRACT-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32)
- ; GFX10-CONTRACT-NEXT: [[FMA:%[0-9]+]]:_(s64) = G_FMA [[MV]], [[MV1]], [[MV2]]
- ; GFX10-CONTRACT-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMA]](s64)
- ; GFX10-CONTRACT-NEXT: $vgpr0 = COPY [[UV]](s32)
- ; GFX10-CONTRACT-NEXT: $vgpr1 = COPY [[UV1]](s32)
- ; GFX10-CONTRACT-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1
+ ;
; GFX10-DENORM-LABEL: name: test_double_add_mul
; GFX10-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5
; GFX10-DENORM-NEXT: {{ $}}
@@ -811,23 +901,6 @@ body: |
; GFX10-DENORM-NEXT: $vgpr0 = COPY [[UV]](s32)
; GFX10-DENORM-NEXT: $vgpr1 = COPY [[UV1]](s32)
; GFX10-DENORM-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1
- ; GFX10-UNSAFE-LABEL: name: test_double_add_mul
- ; GFX10-UNSAFE: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5
- ; GFX10-UNSAFE-NEXT: {{ $}}
- ; GFX10-UNSAFE-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
- ; GFX10-UNSAFE-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
- ; GFX10-UNSAFE-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32)
- ; GFX10-UNSAFE-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
- ; GFX10-UNSAFE-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
- ; GFX10-UNSAFE-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32)
- ; GFX10-UNSAFE-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4
- ; GFX10-UNSAFE-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5
- ; GFX10-UNSAFE-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32)
- ; GFX10-UNSAFE-NEXT: [[FMA:%[0-9]+]]:_(s64) = G_FMA [[MV]], [[MV1]], [[MV2]]
- ; GFX10-UNSAFE-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMA]](s64)
- ; GFX10-UNSAFE-NEXT: $vgpr0 = COPY [[UV]](s32)
- ; GFX10-UNSAFE-NEXT: $vgpr1 = COPY [[UV1]](s32)
- ; GFX10-UNSAFE-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1
%4:_(s32) = COPY $vgpr0
%5:_(s32) = COPY $vgpr1
%0:_(s64) = G_MERGE_VALUES %4(s32), %5(s32)
@@ -846,6 +919,101 @@ body: |
...
---
+name: test_double_add_mul_contract
+body: |
+ bb.1.entry:
+ liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5
+
+ ; GFX9-LABEL: name: test_double_add_mul_contract
+ ; GFX9: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5
+ ; GFX9-NEXT: {{ $}}
+ ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; GFX9-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32)
+ ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
+ ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
+ ; GFX9-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32)
+ ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4
+ ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5
+ ; GFX9-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32)
+ ; GFX9-NEXT: [[FMA:%[0-9]+]]:_(s64) = G_FMA [[MV]], [[MV1]], [[MV2]]
+ ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMA]](s64)
+ ; GFX9-NEXT: $vgpr0 = COPY [[UV]](s32)
+ ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](s32)
+ ; GFX9-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1
+ ;
+ ; GFX9-DENORM-LABEL: name: test_double_add_mul_contract
+ ; GFX9-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5
+ ; GFX9-DENORM-NEXT: {{ $}}
+ ; GFX9-DENORM-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; GFX9-DENORM-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; GFX9-DENORM-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32)
+ ; GFX9-DENORM-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
+ ; GFX9-DENORM-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
+ ; GFX9-DENORM-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32)
+ ; GFX9-DENORM-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4
+ ; GFX9-DENORM-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5
+ ; GFX9-DENORM-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32)
+ ; GFX9-DENORM-NEXT: [[FMA:%[0-9]+]]:_(s64) = G_FMA [[MV]], [[MV1]], [[MV2]]
+ ; GFX9-DENORM-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMA]](s64)
+ ; GFX9-DENORM-NEXT: $vgpr0 = COPY [[UV]](s32)
+ ; GFX9-DENORM-NEXT: $vgpr1 = COPY [[UV1]](s32)
+ ; GFX9-DENORM-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1
+ ;
+ ; GFX10-LABEL: name: test_double_add_mul_contract
+ ; GFX10: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5
+ ; GFX10-NEXT: {{ $}}
+ ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; GFX10-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32)
+ ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
+ ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
+ ; GFX10-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32)
+ ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4
+ ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5
+ ; GFX10-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32)
+ ; GFX10-NEXT: [[FMA:%[0-9]+]]:_(s64) = G_FMA [[MV]], [[MV1]], [[MV2]]
+ ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMA]](s64)
+ ; GFX10-NEXT: $vgpr0 = COPY [[UV]](s32)
+ ; GFX10-NEXT: $vgpr1 = COPY [[UV1]](s32)
+ ; GFX10-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1
+ ;
+ ; GFX10-DENORM-LABEL: name: test_double_add_mul_contract
+ ; GFX10-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5
+ ; GFX10-DENORM-NEXT: {{ $}}
+ ; GFX10-DENORM-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; GFX10-DENORM-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; GFX10-DENORM-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32)
+ ; GFX10-DENORM-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
+ ; GFX10-DENORM-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
+ ; GFX10-DENORM-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32)
+ ; GFX10-DENORM-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4
+ ; GFX10-DENORM-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5
+ ; GFX10-DENORM-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32)
+ ; GFX10-DENORM-NEXT: [[FMA:%[0-9]+]]:_(s64) = G_FMA [[MV]], [[MV1]], [[MV2]]
+ ; GFX10-DENORM-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMA]](s64)
+ ; GFX10-DENORM-NEXT: $vgpr0 = COPY [[UV]](s32)
+ ; GFX10-DENORM-NEXT: $vgpr1 = COPY [[UV1]](s32)
+ ; GFX10-DENORM-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1
+ %4:_(s32) = COPY $vgpr0
+ %5:_(s32) = COPY $vgpr1
+ %0:_(s64) = G_MERGE_VALUES %4(s32), %5(s32)
+ %6:_(s32) = COPY $vgpr2
+ %7:_(s32) = COPY $vgpr3
+ %1:_(s64) = G_MERGE_VALUES %6(s32), %7(s32)
+ %8:_(s32) = COPY $vgpr4
+ %9:_(s32) = COPY $vgpr5
+ %2:_(s64) = G_MERGE_VALUES %8(s32), %9(s32)
+ %10:_(s64) = contract G_FMUL %0, %1
+ %11:_(s64) = contract G_FADD %10, %2
+ %13:_(s32), %14:_(s32) = G_UNMERGE_VALUES %11(s64)
+ $vgpr0 = COPY %13(s32)
+ $vgpr1 = COPY %14(s32)
+ S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1
+...
+
+
+---
name: test_double_add_mul_rhs
body: |
bb.1.entry:
@@ -869,23 +1037,7 @@ body: |
; GFX9-NEXT: $vgpr0 = COPY [[UV]](s32)
; GFX9-NEXT: $vgpr1 = COPY [[UV1]](s32)
; GFX9-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1
- ; GFX9-CONTRACT-LABEL: name: test_double_add_mul_rhs
- ; GFX9-CONTRACT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5
- ; GFX9-CONTRACT-NEXT: {{ $}}
- ; GFX9-CONTRACT-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
- ; GFX9-CONTRACT-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
- ; GFX9-CONTRACT-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32)
- ; GFX9-CONTRACT-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
- ; GFX9-CONTRACT-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
- ; GFX9-CONTRACT-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32)
- ; GFX9-CONTRACT-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4
- ; GFX9-CONTRACT-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5
- ; GFX9-CONTRACT-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32)
- ; GFX9-CONTRACT-NEXT: [[FMA:%[0-9]+]]:_(s64) = G_FMA [[MV]], [[MV1]], [[MV2]]
- ; GFX9-CONTRACT-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMA]](s64)
- ; GFX9-CONTRACT-NEXT: $vgpr0 = COPY [[UV]](s32)
- ; GFX9-CONTRACT-NEXT: $vgpr1 = COPY [[UV1]](s32)
- ; GFX9-CONTRACT-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1
+ ;
; GFX9-DENORM-LABEL: name: test_double_add_mul_rhs
; GFX9-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5
; GFX9-DENORM-NEXT: {{ $}}
@@ -904,23 +1056,7 @@ body: |
; GFX9-DENORM-NEXT: $vgpr0 = COPY [[UV]](s32)
; GFX9-DENORM-NEXT: $vgpr1 = COPY [[UV1]](s32)
; GFX9-DENORM-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1
- ; GFX9-UNSAFE-LABEL: name: test_double_add_mul_rhs
- ; GFX9-UNSAFE: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5
- ; GFX9-UNSAFE-NEXT: {{ $}}
- ; GFX9-UNSAFE-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
- ; GFX9-UNSAFE-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
- ; GFX9-UNSAFE-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32)
- ; GFX9-UNSAFE-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
- ; GFX9-UNSAFE-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
- ; GFX9-UNSAFE-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32)
- ; GFX9-UNSAFE-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4
- ; GFX9-UNSAFE-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5
- ; GFX9-UNSAFE-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32)
- ; GFX9-UNSAFE-NEXT: [[FMA:%[0-9]+]]:_(s64) = G_FMA [[MV]], [[MV1]], [[MV2]]
- ; GFX9-UNSAFE-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMA]](s64)
- ; GFX9-UNSAFE-NEXT: $vgpr0 = COPY [[UV]](s32)
- ; GFX9-UNSAFE-NEXT: $vgpr1 = COPY [[UV1]](s32)
- ; GFX9-UNSAFE-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1
+ ;
; GFX10-LABEL: name: test_double_add_mul_rhs
; GFX10: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5
; GFX10-NEXT: {{ $}}
@@ -939,23 +1075,7 @@ body: |
; GFX10-NEXT: $vgpr0 = COPY [[UV]](s32)
; GFX10-NEXT: $vgpr1 = COPY [[UV1]](s32)
; GFX10-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1
- ; GFX10-CONTRACT-LABEL: name: test_double_add_mul_rhs
- ; GFX10-CONTRACT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5
- ; GFX10-CONTRACT-NEXT: {{ $}}
- ; GFX10-CONTRACT-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
- ; GFX10-CONTRACT-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
- ; GFX10-CONTRACT-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32)
- ; GFX10-CONTRACT-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
- ; GFX10-CONTRACT-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
- ; GFX10-CONTRACT-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32)
- ; GFX10-CONTRACT-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4
- ; GFX10-CONTRACT-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5
- ; GFX10-CONTRACT-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32)
- ; GFX10-CONTRACT-NEXT: [[FMA:%[0-9]+]]:_(s64) = G_FMA [[MV]], [[MV1]], [[MV2]]
- ; GFX10-CONTRACT-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMA]](s64)
- ; GFX10-CONTRACT-NEXT: $vgpr0 = COPY [[UV]](s32)
- ; GFX10-CONTRACT-NEXT: $vgpr1 = COPY [[UV1]](s32)
- ; GFX10-CONTRACT-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1
+ ;
; GFX10-DENORM-LABEL: name: test_double_add_mul_rhs
; GFX10-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5
; GFX10-DENORM-NEXT: {{ $}}
@@ -974,23 +1094,6 @@ body: |
; GFX10-DENORM-NEXT: $vgpr0 = COPY [[UV]](s32)
; GFX10-DENORM-NEXT: $vgpr1 = COPY [[UV1]](s32)
; GFX10-DENORM-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1
- ; GFX10-UNSAFE-LABEL: name: test_double_add_mul_rhs
- ; GFX10-UNSAFE: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5
- ; GFX10-UNSAFE-NEXT: {{ $}}
- ; GFX10-UNSAFE-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
- ; GFX10-UNSAFE-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
- ; GFX10-UNSAFE-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32)
- ; GFX10-UNSAFE-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
- ; GFX10-UNSAFE-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
- ; GFX10-UNSAFE-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32)
- ; GFX10-UNSAFE-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4
- ; GFX10-UNSAFE-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5
- ; GFX10-UNSAFE-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32)
- ; GFX10-UNSAFE-NEXT: [[FMA:%[0-9]+]]:_(s64) = G_FMA [[MV]], [[MV1]], [[MV2]]
- ; GFX10-UNSAFE-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMA]](s64)
- ; GFX10-UNSAFE-NEXT: $vgpr0 = COPY [[UV]](s32)
- ; GFX10-UNSAFE-NEXT: $vgpr1 = COPY [[UV1]](s32)
- ; GFX10-UNSAFE-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1
%4:_(s32) = COPY $vgpr0
%5:_(s32) = COPY $vgpr1
%0:_(s64) = G_MERGE_VALUES %4(s32), %5(s32)
@@ -1009,6 +1112,100 @@ body: |
...
---
+name: test_double_add_mul_rhs_contract
+body: |
+ bb.1.entry:
+ liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5
+
+ ; GFX9-LABEL: name: test_double_add_mul_rhs_contract
+ ; GFX9: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5
+ ; GFX9-NEXT: {{ $}}
+ ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; GFX9-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32)
+ ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
+ ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
+ ; GFX9-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32)
+ ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4
+ ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5
+ ; GFX9-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32)
+ ; GFX9-NEXT: [[FMA:%[0-9]+]]:_(s64) = G_FMA [[MV]], [[MV1]], [[MV2]]
+ ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMA]](s64)
+ ; GFX9-NEXT: $vgpr0 = COPY [[UV]](s32)
+ ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](s32)
+ ; GFX9-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1
+ ;
+ ; GFX9-DENORM-LABEL: name: test_double_add_mul_rhs_contract
+ ; GFX9-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5
+ ; GFX9-DENORM-NEXT: {{ $}}
+ ; GFX9-DENORM-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; GFX9-DENORM-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; GFX9-DENORM-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32)
+ ; GFX9-DENORM-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
+ ; GFX9-DENORM-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
+ ; GFX9-DENORM-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32)
+ ; GFX9-DENORM-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4
+ ; GFX9-DENORM-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5
+ ; GFX9-DENORM-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32)
+ ; GFX9-DENORM-NEXT: [[FMA:%[0-9]+]]:_(s64) = G_FMA [[MV]], [[MV1]], [[MV2]]
+ ; GFX9-DENORM-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMA]](s64)
+ ; GFX9-DENORM-NEXT: $vgpr0 = COPY [[UV]](s32)
+ ; GFX9-DENORM-NEXT: $vgpr1 = COPY [[UV1]](s32)
+ ; GFX9-DENORM-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1
+ ;
+ ; GFX10-LABEL: name: test_double_add_mul_rhs_contract
+ ; GFX10: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5
+ ; GFX10-NEXT: {{ $}}
+ ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; GFX10-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32)
+ ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
+ ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
+ ; GFX10-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32)
+ ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4
+ ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5
+ ; GFX10-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32)
+ ; GFX10-NEXT: [[FMA:%[0-9]+]]:_(s64) = G_FMA [[MV]], [[MV1]], [[MV2]]
+ ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMA]](s64)
+ ; GFX10-NEXT: $vgpr0 = COPY [[UV]](s32)
+ ; GFX10-NEXT: $vgpr1 = COPY [[UV1]](s32)
+ ; GFX10-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1
+ ;
+ ; GFX10-DENORM-LABEL: name: test_double_add_mul_rhs_contract
+ ; GFX10-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5
+ ; GFX10-DENORM-NEXT: {{ $}}
+ ; GFX10-DENORM-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; GFX10-DENORM-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; GFX10-DENORM-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32)
+ ; GFX10-DENORM-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
+ ; GFX10-DENORM-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
+ ; GFX10-DENORM-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32)
+ ; GFX10-DENORM-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4
+ ; GFX10-DENORM-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5
+ ; GFX10-DENORM-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32)
+ ; GFX10-DENORM-NEXT: [[FMA:%[0-9]+]]:_(s64) = G_FMA [[MV]], [[MV1]], [[MV2]]
+ ; GFX10-DENORM-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMA]](s64)
+ ; GFX10-DENORM-NEXT: $vgpr0 = COPY [[UV]](s32)
+ ; GFX10-DENORM-NEXT: $vgpr1 = COPY [[UV1]](s32)
+ ; GFX10-DENORM-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1
+ %4:_(s32) = COPY $vgpr0
+ %5:_(s32) = COPY $vgpr1
+ %0:_(s64) = G_MERGE_VALUES %4(s32), %5(s32)
+ %6:_(s32) = COPY $vgpr2
+ %7:_(s32) = COPY $vgpr3
+ %1:_(s64) = G_MERGE_VALUES %6(s32), %7(s32)
+ %8:_(s32) = COPY $vgpr4
+ %9:_(s32) = COPY $vgpr5
+ %2:_(s64) = G_MERGE_VALUES %8(s32), %9(s32)
+ %10:_(s64) = contract G_FMUL %0, %1
+ %11:_(s64) = contract G_FADD %2, %10
+ %13:_(s32), %14:_(s32) = G_UNMERGE_VALUES %11(s64)
+ $vgpr0 = COPY %13(s32)
+ $vgpr1 = COPY %14(s32)
+ S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1
+...
+
+---
name: test_4xfloat_add_mul
body: |
bb.1.entry:
@@ -1040,32 +1237,7 @@ body: |
; GFX9-NEXT: $vgpr2 = COPY [[UV2]](s32)
; GFX9-NEXT: $vgpr3 = COPY [[UV3]](s32)
; GFX9-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
- ; GFX9-CONTRACT-LABEL: name: test_4xfloat_add_mul
- ; GFX9-CONTRACT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10
- ; GFX9-CONTRACT-NEXT: {{ $}}
- ; GFX9-CONTRACT-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
- ; GFX9-CONTRACT-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
- ; GFX9-CONTRACT-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
- ; GFX9-CONTRACT-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
- ; GFX9-CONTRACT-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32)
- ; GFX9-CONTRACT-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4
- ; GFX9-CONTRACT-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5
- ; GFX9-CONTRACT-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6
- ; GFX9-CONTRACT-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7
- ; GFX9-CONTRACT-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
- ; GFX9-CONTRACT-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8
- ; GFX9-CONTRACT-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9
- ; GFX9-CONTRACT-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10
- ; GFX9-CONTRACT-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11
- ; GFX9-CONTRACT-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32)
- ; GFX9-CONTRACT-NEXT: [[FMUL:%[0-9]+]]:_(<4 x s32>) = G_FMUL [[BUILD_VECTOR]], [[BUILD_VECTOR1]]
- ; GFX9-CONTRACT-NEXT: [[FADD:%[0-9]+]]:_(<4 x s32>) = G_FADD [[FMUL]], [[BUILD_VECTOR2]]
- ; GFX9-CONTRACT-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](<4 x s32>)
- ; GFX9-CONTRACT-NEXT: $vgpr0 = COPY [[UV]](s32)
- ; GFX9-CONTRACT-NEXT: $vgpr1 = COPY [[UV1]](s32)
- ; GFX9-CONTRACT-NEXT: $vgpr2 = COPY [[UV2]](s32)
- ; GFX9-CONTRACT-NEXT: $vgpr3 = COPY [[UV3]](s32)
- ; GFX9-CONTRACT-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
+ ;
; GFX9-DENORM-LABEL: name: test_4xfloat_add_mul
; GFX9-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10
; GFX9-DENORM-NEXT: {{ $}}
@@ -1092,32 +1264,7 @@ body: |
; GFX9-DENORM-NEXT: $vgpr2 = COPY [[UV2]](s32)
; GFX9-DENORM-NEXT: $vgpr3 = COPY [[UV3]](s32)
; GFX9-DENORM-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
- ; GFX9-UNSAFE-LABEL: name: test_4xfloat_add_mul
- ; GFX9-UNSAFE: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10
- ; GFX9-UNSAFE-NEXT: {{ $}}
- ; GFX9-UNSAFE-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
- ; GFX9-UNSAFE-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
- ; GFX9-UNSAFE-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
- ; GFX9-UNSAFE-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
- ; GFX9-UNSAFE-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32)
- ; GFX9-UNSAFE-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4
- ; GFX9-UNSAFE-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5
- ; GFX9-UNSAFE-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6
- ; GFX9-UNSAFE-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7
- ; GFX9-UNSAFE-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
- ; GFX9-UNSAFE-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8
- ; GFX9-UNSAFE-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9
- ; GFX9-UNSAFE-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10
- ; GFX9-UNSAFE-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11
- ; GFX9-UNSAFE-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32)
- ; GFX9-UNSAFE-NEXT: [[FMUL:%[0-9]+]]:_(<4 x s32>) = G_FMUL [[BUILD_VECTOR]], [[BUILD_VECTOR1]]
- ; GFX9-UNSAFE-NEXT: [[FADD:%[0-9]+]]:_(<4 x s32>) = G_FADD [[FMUL]], [[BUILD_VECTOR2]]
- ; GFX9-UNSAFE-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](<4 x s32>)
- ; GFX9-UNSAFE-NEXT: $vgpr0 = COPY [[UV]](s32)
- ; GFX9-UNSAFE-NEXT: $vgpr1 = COPY [[UV1]](s32)
- ; GFX9-UNSAFE-NEXT: $vgpr2 = COPY [[UV2]](s32)
- ; GFX9-UNSAFE-NEXT: $vgpr3 = COPY [[UV3]](s32)
- ; GFX9-UNSAFE-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
+ ;
; GFX10-LABEL: name: test_4xfloat_add_mul
; GFX10: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10
; GFX10-NEXT: {{ $}}
@@ -1144,32 +1291,7 @@ body: |
; GFX10-NEXT: $vgpr2 = COPY [[UV2]](s32)
; GFX10-NEXT: $vgpr3 = COPY [[UV3]](s32)
; GFX10-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
- ; GFX10-CONTRACT-LABEL: name: test_4xfloat_add_mul
- ; GFX10-CONTRACT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10
- ; GFX10-CONTRACT-NEXT: {{ $}}
- ; GFX10-CONTRACT-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
- ; GFX10-CONTRACT-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
- ; GFX10-CONTRACT-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
- ; GFX10-CONTRACT-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
- ; GFX10-CONTRACT-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32)
- ; GFX10-CONTRACT-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4
- ; GFX10-CONTRACT-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5
- ; GFX10-CONTRACT-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6
- ; GFX10-CONTRACT-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7
- ; GFX10-CONTRACT-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
- ; GFX10-CONTRACT-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8
- ; GFX10-CONTRACT-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9
- ; GFX10-CONTRACT-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10
- ; GFX10-CONTRACT-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11
- ; GFX10-CONTRACT-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32)
- ; GFX10-CONTRACT-NEXT: [[FMUL:%[0-9]+]]:_(<4 x s32>) = G_FMUL [[BUILD_VECTOR]], [[BUILD_VECTOR1]]
- ; GFX10-CONTRACT-NEXT: [[FADD:%[0-9]+]]:_(<4 x s32>) = G_FADD [[FMUL]], [[BUILD_VECTOR2]]
- ; GFX10-CONTRACT-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](<4 x s32>)
- ; GFX10-CONTRACT-NEXT: $vgpr0 = COPY [[UV]](s32)
- ; GFX10-CONTRACT-NEXT: $vgpr1 = COPY [[UV1]](s32)
- ; GFX10-CONTRACT-NEXT: $vgpr2 = COPY [[UV2]](s32)
- ; GFX10-CONTRACT-NEXT: $vgpr3 = COPY [[UV3]](s32)
- ; GFX10-CONTRACT-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
+ ;
; GFX10-DENORM-LABEL: name: test_4xfloat_add_mul
; GFX10-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10
; GFX10-DENORM-NEXT: {{ $}}
@@ -1196,32 +1318,6 @@ body: |
; GFX10-DENORM-NEXT: $vgpr2 = COPY [[UV2]](s32)
; GFX10-DENORM-NEXT: $vgpr3 = COPY [[UV3]](s32)
; GFX10-DENORM-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
- ; GFX10-UNSAFE-LABEL: name: test_4xfloat_add_mul
- ; GFX10-UNSAFE: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10
- ; GFX10-UNSAFE-NEXT: {{ $}}
- ; GFX10-UNSAFE-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
- ; GFX10-UNSAFE-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
- ; GFX10-UNSAFE-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
- ; GFX10-UNSAFE-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
- ; GFX10-UNSAFE-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32)
- ; GFX10-UNSAFE-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4
- ; GFX10-UNSAFE-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5
- ; GFX10-UNSAFE-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6
- ; GFX10-UNSAFE-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7
- ; GFX10-UNSAFE-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
- ; GFX10-UNSAFE-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8
- ; GFX10-UNSAFE-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9
- ; GFX10-UNSAFE-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10
- ; GFX10-UNSAFE-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11
- ; GFX10-UNSAFE-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32)
- ; GFX10-UNSAFE-NEXT: [[FMUL:%[0-9]+]]:_(<4 x s32>) = G_FMUL [[BUILD_VECTOR]], [[BUILD_VECTOR1]]
- ; GFX10-UNSAFE-NEXT: [[FADD:%[0-9]+]]:_(<4 x s32>) = G_FADD [[FMUL]], [[BUILD_VECTOR2]]
- ; GFX10-UNSAFE-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](<4 x s32>)
- ; GFX10-UNSAFE-NEXT: $vgpr0 = COPY [[UV]](s32)
- ; GFX10-UNSAFE-NEXT: $vgpr1 = COPY [[UV1]](s32)
- ; GFX10-UNSAFE-NEXT: $vgpr2 = COPY [[UV2]](s32)
- ; GFX10-UNSAFE-NEXT: $vgpr3 = COPY [[UV3]](s32)
- ; GFX10-UNSAFE-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
%4:_(s32) = COPY $vgpr0
%5:_(s32) = COPY $vgpr1
%6:_(s32) = COPY $vgpr2
@@ -1248,6 +1344,144 @@ body: |
...
---
+name: test_4xfloat_add_mul_contract
+body: |
+ bb.1.entry:
+ liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10
+
+ ; GFX9-LABEL: name: test_4xfloat_add_mul_contract
+ ; GFX9: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10
+ ; GFX9-NEXT: {{ $}}
+ ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
+ ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
+ ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32)
+ ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4
+ ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5
+ ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6
+ ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7
+ ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
+ ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8
+ ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9
+ ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10
+ ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11
+ ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32)
+ ; GFX9-NEXT: [[FMUL:%[0-9]+]]:_(<4 x s32>) = contract G_FMUL [[BUILD_VECTOR]], [[BUILD_VECTOR1]]
+ ; GFX9-NEXT: [[FADD:%[0-9]+]]:_(<4 x s32>) = contract G_FADD [[FMUL]], [[BUILD_VECTOR2]]
+ ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](<4 x s32>)
+ ; GFX9-NEXT: $vgpr0 = COPY [[UV]](s32)
+ ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](s32)
+ ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](s32)
+ ; GFX9-NEXT: $vgpr3 = COPY [[UV3]](s32)
+ ; GFX9-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
+ ;
+ ; GFX9-DENORM-LABEL: name: test_4xfloat_add_mul_contract
+ ; GFX9-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10
+ ; GFX9-DENORM-NEXT: {{ $}}
+ ; GFX9-DENORM-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; GFX9-DENORM-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; GFX9-DENORM-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
+ ; GFX9-DENORM-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
+ ; GFX9-DENORM-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32)
+ ; GFX9-DENORM-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4
+ ; GFX9-DENORM-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5
+ ; GFX9-DENORM-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6
+ ; GFX9-DENORM-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7
+ ; GFX9-DENORM-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
+ ; GFX9-DENORM-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8
+ ; GFX9-DENORM-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9
+ ; GFX9-DENORM-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10
+ ; GFX9-DENORM-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11
+ ; GFX9-DENORM-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32)
+ ; GFX9-DENORM-NEXT: [[FMUL:%[0-9]+]]:_(<4 x s32>) = contract G_FMUL [[BUILD_VECTOR]], [[BUILD_VECTOR1]]
+ ; GFX9-DENORM-NEXT: [[FADD:%[0-9]+]]:_(<4 x s32>) = contract G_FADD [[FMUL]], [[BUILD_VECTOR2]]
+ ; GFX9-DENORM-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](<4 x s32>)
+ ; GFX9-DENORM-NEXT: $vgpr0 = COPY [[UV]](s32)
+ ; GFX9-DENORM-NEXT: $vgpr1 = COPY [[UV1]](s32)
+ ; GFX9-DENORM-NEXT: $vgpr2 = COPY [[UV2]](s32)
+ ; GFX9-DENORM-NEXT: $vgpr3 = COPY [[UV3]](s32)
+ ; GFX9-DENORM-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
+ ;
+ ; GFX10-LABEL: name: test_4xfloat_add_mul_contract
+ ; GFX10: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10
+ ; GFX10-NEXT: {{ $}}
+ ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
+ ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
+ ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32)
+ ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4
+ ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5
+ ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6
+ ; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7
+ ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
+ ; GFX10-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8
+ ; GFX10-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9
+ ; GFX10-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10
+ ; GFX10-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11
+ ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32)
+ ; GFX10-NEXT: [[FMUL:%[0-9]+]]:_(<4 x s32>) = contract G_FMUL [[BUILD_VECTOR]], [[BUILD_VECTOR1]]
+ ; GFX10-NEXT: [[FADD:%[0-9]+]]:_(<4 x s32>) = contract G_FADD [[FMUL]], [[BUILD_VECTOR2]]
+ ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](<4 x s32>)
+ ; GFX10-NEXT: $vgpr0 = COPY [[UV]](s32)
+ ; GFX10-NEXT: $vgpr1 = COPY [[UV1]](s32)
+ ; GFX10-NEXT: $vgpr2 = COPY [[UV2]](s32)
+ ; GFX10-NEXT: $vgpr3 = COPY [[UV3]](s32)
+ ; GFX10-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
+ ;
+ ; GFX10-DENORM-LABEL: name: test_4xfloat_add_mul_contract
+ ; GFX10-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10
+ ; GFX10-DENORM-NEXT: {{ $}}
+ ; GFX10-DENORM-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; GFX10-DENORM-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; GFX10-DENORM-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
+ ; GFX10-DENORM-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
+ ; GFX10-DENORM-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32)
+ ; GFX10-DENORM-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4
+ ; GFX10-DENORM-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5
+ ; GFX10-DENORM-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6
+ ; GFX10-DENORM-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7
+ ; GFX10-DENORM-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
+ ; GFX10-DENORM-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8
+ ; GFX10-DENORM-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9
+ ; GFX10-DENORM-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10
+ ; GFX10-DENORM-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11
+ ; GFX10-DENORM-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32)
+ ; GFX10-DENORM-NEXT: [[FMUL:%[0-9]+]]:_(<4 x s32>) = contract G_FMUL [[BUILD_VECTOR]], [[BUILD_VECTOR1]]
+ ; GFX10-DENORM-NEXT: [[FADD:%[0-9]+]]:_(<4 x s32>) = contract G_FADD [[FMUL]], [[BUILD_VECTOR2]]
+ ; GFX10-DENORM-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](<4 x s32>)
+ ; GFX10-DENORM-NEXT: $vgpr0 = COPY [[UV]](s32)
+ ; GFX10-DENORM-NEXT: $vgpr1 = COPY [[UV1]](s32)
+ ; GFX10-DENORM-NEXT: $vgpr2 = COPY [[UV2]](s32)
+ ; GFX10-DENORM-NEXT: $vgpr3 = COPY [[UV3]](s32)
+ ; GFX10-DENORM-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
+ %4:_(s32) = COPY $vgpr0
+ %5:_(s32) = COPY $vgpr1
+ %6:_(s32) = COPY $vgpr2
+ %7:_(s32) = COPY $vgpr3
+ %0:_(<4 x s32>) = G_BUILD_VECTOR %4(s32), %5(s32), %6(s32), %7(s32)
+ %8:_(s32) = COPY $vgpr4
+ %9:_(s32) = COPY $vgpr5
+ %10:_(s32) = COPY $vgpr6
+ %11:_(s32) = COPY $vgpr7
+ %1:_(<4 x s32>) = G_BUILD_VECTOR %8(s32), %9(s32), %10(s32), %11(s32)
+ %12:_(s32) = COPY $vgpr8
+ %13:_(s32) = COPY $vgpr9
+ %14:_(s32) = COPY $vgpr10
+ %15:_(s32) = COPY $vgpr11
+ %2:_(<4 x s32>) = G_BUILD_VECTOR %12(s32), %13(s32), %14(s32), %15(s32)
+ %16:_(<4 x s32>) = contract G_FMUL %0, %1
+ %17:_(<4 x s32>) = contract G_FADD %16, %2
+ %19:_(s32), %20:_(s32), %21:_(s32), %22:_(s32) = G_UNMERGE_VALUES %17(<4 x s32>)
+ $vgpr0 = COPY %19(s32)
+ $vgpr1 = COPY %20(s32)
+ $vgpr2 = COPY %21(s32)
+ $vgpr3 = COPY %22(s32)
+ S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
+...
+
+---
name: test_3xfloat_add_mul_rhs
body: |
bb.1.entry:
@@ -1275,28 +1509,7 @@ body: |
; GFX9-NEXT: $vgpr1 = COPY [[UV1]](s32)
; GFX9-NEXT: $vgpr2 = COPY [[UV2]](s32)
; GFX9-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2
- ; GFX9-CONTRACT-LABEL: name: test_3xfloat_add_mul_rhs
- ; GFX9-CONTRACT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8
- ; GFX9-CONTRACT-NEXT: {{ $}}
- ; GFX9-CONTRACT-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
- ; GFX9-CONTRACT-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
- ; GFX9-CONTRACT-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
- ; GFX9-CONTRACT-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32)
- ; GFX9-CONTRACT-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
- ; GFX9-CONTRACT-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4
- ; GFX9-CONTRACT-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5
- ; GFX9-CONTRACT-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32)
- ; GFX9-CONTRACT-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6
- ; GFX9-CONTRACT-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7
- ; GFX9-CONTRACT-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8
- ; GFX9-CONTRACT-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32)
- ; GFX9-CONTRACT-NEXT: [[FMUL:%[0-9]+]]:_(<3 x s32>) = G_FMUL [[BUILD_VECTOR]], [[BUILD_VECTOR1]]
- ; GFX9-CONTRACT-NEXT: [[FADD:%[0-9]+]]:_(<3 x s32>) = G_FADD [[BUILD_VECTOR2]], [[FMUL]]
- ; GFX9-CONTRACT-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](<3 x s32>)
- ; GFX9-CONTRACT-NEXT: $vgpr0 = COPY [[UV]](s32)
- ; GFX9-CONTRACT-NEXT: $vgpr1 = COPY [[UV1]](s32)
- ; GFX9-CONTRACT-NEXT: $vgpr2 = COPY [[UV2]](s32)
- ; GFX9-CONTRACT-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2
+ ;
; GFX9-DENORM-LABEL: name: test_3xfloat_add_mul_rhs
; GFX9-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8
; GFX9-DENORM-NEXT: {{ $}}
@@ -1319,28 +1532,7 @@ body: |
; GFX9-DENORM-NEXT: $vgpr1 = COPY [[UV1]](s32)
; GFX9-DENORM-NEXT: $vgpr2 = COPY [[UV2]](s32)
; GFX9-DENORM-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2
- ; GFX9-UNSAFE-LABEL: name: test_3xfloat_add_mul_rhs
- ; GFX9-UNSAFE: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8
- ; GFX9-UNSAFE-NEXT: {{ $}}
- ; GFX9-UNSAFE-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
- ; GFX9-UNSAFE-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
- ; GFX9-UNSAFE-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
- ; GFX9-UNSAFE-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32)
- ; GFX9-UNSAFE-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
- ; GFX9-UNSAFE-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4
- ; GFX9-UNSAFE-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5
- ; GFX9-UNSAFE-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32)
- ; GFX9-UNSAFE-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6
- ; GFX9-UNSAFE-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7
- ; GFX9-UNSAFE-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8
- ; GFX9-UNSAFE-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32)
- ; GFX9-UNSAFE-NEXT: [[FMUL:%[0-9]+]]:_(<3 x s32>) = G_FMUL [[BUILD_VECTOR]], [[BUILD_VECTOR1]]
- ; GFX9-UNSAFE-NEXT: [[FADD:%[0-9]+]]:_(<3 x s32>) = G_FADD [[BUILD_VECTOR2]], [[FMUL]]
- ; GFX9-UNSAFE-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](<3 x s32>)
- ; GFX9-UNSAFE-NEXT: $vgpr0 = COPY [[UV]](s32)
- ; GFX9-UNSAFE-NEXT: $vgpr1 = COPY [[UV1]](s32)
- ; GFX9-UNSAFE-NEXT: $vgpr2 = COPY [[UV2]](s32)
- ; GFX9-UNSAFE-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2
+ ;
; GFX10-LABEL: name: test_3xfloat_add_mul_rhs
; GFX10: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8
; GFX10-NEXT: {{ $}}
@@ -1363,28 +1555,7 @@ body: |
; GFX10-NEXT: $vgpr1 = COPY [[UV1]](s32)
; GFX10-NEXT: $vgpr2 = COPY [[UV2]](s32)
; GFX10-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2
- ; GFX10-CONTRACT-LABEL: name: test_3xfloat_add_mul_rhs
- ; GFX10-CONTRACT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8
- ; GFX10-CONTRACT-NEXT: {{ $}}
- ; GFX10-CONTRACT-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
- ; GFX10-CONTRACT-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
- ; GFX10-CONTRACT-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
- ; GFX10-CONTRACT-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32)
- ; GFX10-CONTRACT-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
- ; GFX10-CONTRACT-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4
- ; GFX10-CONTRACT-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5
- ; GFX10-CONTRACT-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32)
- ; GFX10-CONTRACT-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6
- ; GFX10-CONTRACT-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7
- ; GFX10-CONTRACT-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8
- ; GFX10-CONTRACT-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32)
- ; GFX10-CONTRACT-NEXT: [[FMUL:%[0-9]+]]:_(<3 x s32>) = G_FMUL [[BUILD_VECTOR]], [[BUILD_VECTOR1]]
- ; GFX10-CONTRACT-NEXT: [[FADD:%[0-9]+]]:_(<3 x s32>) = G_FADD [[BUILD_VECTOR2]], [[FMUL]]
- ; GFX10-CONTRACT-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](<3 x s32>)
- ; GFX10-CONTRACT-NEXT: $vgpr0 = COPY [[UV]](s32)
- ; GFX10-CONTRACT-NEXT: $vgpr1 = COPY [[UV1]](s32)
- ; GFX10-CONTRACT-NEXT: $vgpr2 = COPY [[UV2]](s32)
- ; GFX10-CONTRACT-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2
+ ;
; GFX10-DENORM-LABEL: name: test_3xfloat_add_mul_rhs
; GFX10-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8
; GFX10-DENORM-NEXT: {{ $}}
@@ -1407,28 +1578,6 @@ body: |
; GFX10-DENORM-NEXT: $vgpr1 = COPY [[UV1]](s32)
; GFX10-DENORM-NEXT: $vgpr2 = COPY [[UV2]](s32)
; GFX10-DENORM-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2
- ; GFX10-UNSAFE-LABEL: name: test_3xfloat_add_mul_rhs
- ; GFX10-UNSAFE: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8
- ; GFX10-UNSAFE-NEXT: {{ $}}
- ; GFX10-UNSAFE-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
- ; GFX10-UNSAFE-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
- ; GFX10-UNSAFE-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
- ; GFX10-UNSAFE-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32)
- ; GFX10-UNSAFE-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
- ; GFX10-UNSAFE-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4
- ; GFX10-UNSAFE-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5
- ; GFX10-UNSAFE-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32)
- ; GFX10-UNSAFE-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6
- ; GFX10-UNSAFE-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7
- ; GFX10-UNSAFE-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8
- ; GFX10-UNSAFE-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32)
- ; GFX10-UNSAFE-NEXT: [[FMUL:%[0-9]+]]:_(<3 x s32>) = G_FMUL [[BUILD_VECTOR]], [[BUILD_VECTOR1]]
- ; GFX10-UNSAFE-NEXT: [[FADD:%[0-9]+]]:_(<3 x s32>) = G_FADD [[BUILD_VECTOR2]], [[FMUL]]
- ; GFX10-UNSAFE-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](<3 x s32>)
- ; GFX10-UNSAFE-NEXT: $vgpr0 = COPY [[UV]](s32)
- ; GFX10-UNSAFE-NEXT: $vgpr1 = COPY [[UV1]](s32)
- ; GFX10-UNSAFE-NEXT: $vgpr2 = COPY [[UV2]](s32)
- ; GFX10-UNSAFE-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2
%4:_(s32) = COPY $vgpr0
%5:_(s32) = COPY $vgpr1
%6:_(s32) = COPY $vgpr2
@@ -1451,6 +1600,124 @@ body: |
...
---
+name: test_3xfloat_add_mul_rhs_contract
+body: |
+ bb.1.entry:
+ liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8
+
+ ; GFX9-LABEL: name: test_3xfloat_add_mul_rhs_contract
+ ; GFX9: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8
+ ; GFX9-NEXT: {{ $}}
+ ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
+ ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32)
+ ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
+ ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4
+ ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5
+ ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32)
+ ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6
+ ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7
+ ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8
+ ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32)
+ ; GFX9-NEXT: [[FMUL:%[0-9]+]]:_(<3 x s32>) = contract G_FMUL [[BUILD_VECTOR]], [[BUILD_VECTOR1]]
+ ; GFX9-NEXT: [[FADD:%[0-9]+]]:_(<3 x s32>) = contract G_FADD [[BUILD_VECTOR2]], [[FMUL]]
+ ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](<3 x s32>)
+ ; GFX9-NEXT: $vgpr0 = COPY [[UV]](s32)
+ ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](s32)
+ ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](s32)
+ ; GFX9-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2
+ ;
+ ; GFX9-DENORM-LABEL: name: test_3xfloat_add_mul_rhs_contract
+ ; GFX9-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8
+ ; GFX9-DENORM-NEXT: {{ $}}
+ ; GFX9-DENORM-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; GFX9-DENORM-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; GFX9-DENORM-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
+ ; GFX9-DENORM-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32)
+ ; GFX9-DENORM-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
+ ; GFX9-DENORM-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4
+ ; GFX9-DENORM-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5
+ ; GFX9-DENORM-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32)
+ ; GFX9-DENORM-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6
+ ; GFX9-DENORM-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7
+ ; GFX9-DENORM-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8
+ ; GFX9-DENORM-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32)
+ ; GFX9-DENORM-NEXT: [[FMUL:%[0-9]+]]:_(<3 x s32>) = contract G_FMUL [[BUILD_VECTOR]], [[BUILD_VECTOR1]]
+ ; GFX9-DENORM-NEXT: [[FADD:%[0-9]+]]:_(<3 x s32>) = contract G_FADD [[BUILD_VECTOR2]], [[FMUL]]
+ ; GFX9-DENORM-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](<3 x s32>)
+ ; GFX9-DENORM-NEXT: $vgpr0 = COPY [[UV]](s32)
+ ; GFX9-DENORM-NEXT: $vgpr1 = COPY [[UV1]](s32)
+ ; GFX9-DENORM-NEXT: $vgpr2 = COPY [[UV2]](s32)
+ ; GFX9-DENORM-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2
+ ;
+ ; GFX10-LABEL: name: test_3xfloat_add_mul_rhs_contract
+ ; GFX10: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8
+ ; GFX10-NEXT: {{ $}}
+ ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
+ ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32)
+ ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
+ ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4
+ ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5
+ ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32)
+ ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6
+ ; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7
+ ; GFX10-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8
+ ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32)
+ ; GFX10-NEXT: [[FMUL:%[0-9]+]]:_(<3 x s32>) = contract G_FMUL [[BUILD_VECTOR]], [[BUILD_VECTOR1]]
+ ; GFX10-NEXT: [[FADD:%[0-9]+]]:_(<3 x s32>) = contract G_FADD [[BUILD_VECTOR2]], [[FMUL]]
+ ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](<3 x s32>)
+ ; GFX10-NEXT: $vgpr0 = COPY [[UV]](s32)
+ ; GFX10-NEXT: $vgpr1 = COPY [[UV1]](s32)
+ ; GFX10-NEXT: $vgpr2 = COPY [[UV2]](s32)
+ ; GFX10-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2
+ ;
+ ; GFX10-DENORM-LABEL: name: test_3xfloat_add_mul_rhs_contract
+ ; GFX10-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8
+ ; GFX10-DENORM-NEXT: {{ $}}
+ ; GFX10-DENORM-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; GFX10-DENORM-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; GFX10-DENORM-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
+ ; GFX10-DENORM-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32)
+ ; GFX10-DENORM-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
+ ; GFX10-DENORM-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4
+ ; GFX10-DENORM-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5
+ ; GFX10-DENORM-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32)
+ ; GFX10-DENORM-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6
+ ; GFX10-DENORM-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7
+ ; GFX10-DENORM-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8
+ ; GFX10-DENORM-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32)
+ ; GFX10-DENORM-NEXT: [[FMUL:%[0-9]+]]:_(<3 x s32>) = contract G_FMUL [[BUILD_VECTOR]], [[BUILD_VECTOR1]]
+ ; GFX10-DENORM-NEXT: [[FADD:%[0-9]+]]:_(<3 x s32>) = contract G_FADD [[BUILD_VECTOR2]], [[FMUL]]
+ ; GFX10-DENORM-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](<3 x s32>)
+ ; GFX10-DENORM-NEXT: $vgpr0 = COPY [[UV]](s32)
+ ; GFX10-DENORM-NEXT: $vgpr1 = COPY [[UV1]](s32)
+ ; GFX10-DENORM-NEXT: $vgpr2 = COPY [[UV2]](s32)
+ ; GFX10-DENORM-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2
+ %4:_(s32) = COPY $vgpr0
+ %5:_(s32) = COPY $vgpr1
+ %6:_(s32) = COPY $vgpr2
+ %0:_(<3 x s32>) = G_BUILD_VECTOR %4(s32), %5(s32), %6(s32)
+ %7:_(s32) = COPY $vgpr3
+ %8:_(s32) = COPY $vgpr4
+ %9:_(s32) = COPY $vgpr5
+ %1:_(<3 x s32>) = G_BUILD_VECTOR %7(s32), %8(s32), %9(s32)
+ %10:_(s32) = COPY $vgpr6
+ %11:_(s32) = COPY $vgpr7
+ %12:_(s32) = COPY $vgpr8
+ %2:_(<3 x s32>) = G_BUILD_VECTOR %10(s32), %11(s32), %12(s32)
+ %13:_(<3 x s32>) = contract G_FMUL %0, %1
+ %14:_(<3 x s32>) = contract G_FADD %2, %13
+ %16:_(s32), %17:_(s32), %18:_(s32) = G_UNMERGE_VALUES %14(<3 x s32>)
+ $vgpr0 = COPY %16(s32)
+ $vgpr1 = COPY %17(s32)
+ $vgpr2 = COPY %18(s32)
+ S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2
+...
+
+---
name: test_4xhalf_add_mul
body: |
bb.1.entry:
@@ -1474,24 +1741,7 @@ body: |
; GFX9-NEXT: $vgpr0 = COPY [[UV]](<2 x s16>)
; GFX9-NEXT: $vgpr1 = COPY [[UV1]](<2 x s16>)
; GFX9-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1
- ; GFX9-CONTRACT-LABEL: name: test_4xhalf_add_mul
- ; GFX9-CONTRACT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5
- ; GFX9-CONTRACT-NEXT: {{ $}}
- ; GFX9-CONTRACT-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
- ; GFX9-CONTRACT-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1
- ; GFX9-CONTRACT-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>)
- ; GFX9-CONTRACT-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2
- ; GFX9-CONTRACT-NEXT: [[COPY3:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3
- ; GFX9-CONTRACT-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY2]](<2 x s16>), [[COPY3]](<2 x s16>)
- ; GFX9-CONTRACT-NEXT: [[COPY4:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4
- ; GFX9-CONTRACT-NEXT: [[COPY5:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5
- ; GFX9-CONTRACT-NEXT: [[CONCAT_VECTORS2:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY4]](<2 x s16>), [[COPY5]](<2 x s16>)
- ; GFX9-CONTRACT-NEXT: [[FMUL:%[0-9]+]]:_(<4 x s16>) = G_FMUL [[CONCAT_VECTORS]], [[CONCAT_VECTORS1]]
- ; GFX9-CONTRACT-NEXT: [[FADD:%[0-9]+]]:_(<4 x s16>) = G_FADD [[FMUL]], [[CONCAT_VECTORS2]]
- ; GFX9-CONTRACT-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[FADD]](<4 x s16>)
- ; GFX9-CONTRACT-NEXT: $vgpr0 = COPY [[UV]](<2 x s16>)
- ; GFX9-CONTRACT-NEXT: $vgpr1 = COPY [[UV1]](<2 x s16>)
- ; GFX9-CONTRACT-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1
+ ;
; GFX9-DENORM-LABEL: name: test_4xhalf_add_mul
; GFX9-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5
; GFX9-DENORM-NEXT: {{ $}}
@@ -1510,24 +1760,7 @@ body: |
; GFX9-DENORM-NEXT: $vgpr0 = COPY [[UV]](<2 x s16>)
; GFX9-DENORM-NEXT: $vgpr1 = COPY [[UV1]](<2 x s16>)
; GFX9-DENORM-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1
- ; GFX9-UNSAFE-LABEL: name: test_4xhalf_add_mul
- ; GFX9-UNSAFE: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5
- ; GFX9-UNSAFE-NEXT: {{ $}}
- ; GFX9-UNSAFE-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
- ; GFX9-UNSAFE-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1
- ; GFX9-UNSAFE-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>)
- ; GFX9-UNSAFE-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2
- ; GFX9-UNSAFE-NEXT: [[COPY3:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3
- ; GFX9-UNSAFE-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY2]](<2 x s16>), [[COPY3]](<2 x s16>)
- ; GFX9-UNSAFE-NEXT: [[COPY4:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4
- ; GFX9-UNSAFE-NEXT: [[COPY5:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5
- ; GFX9-UNSAFE-NEXT: [[CONCAT_VECTORS2:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY4]](<2 x s16>), [[COPY5]](<2 x s16>)
- ; GFX9-UNSAFE-NEXT: [[FMUL:%[0-9]+]]:_(<4 x s16>) = G_FMUL [[CONCAT_VECTORS]], [[CONCAT_VECTORS1]]
- ; GFX9-UNSAFE-NEXT: [[FADD:%[0-9]+]]:_(<4 x s16>) = G_FADD [[FMUL]], [[CONCAT_VECTORS2]]
- ; GFX9-UNSAFE-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[FADD]](<4 x s16>)
- ; GFX9-UNSAFE-NEXT: $vgpr0 = COPY [[UV]](<2 x s16>)
- ; GFX9-UNSAFE-NEXT: $vgpr1 = COPY [[UV1]](<2 x s16>)
- ; GFX9-UNSAFE-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1
+ ;
; GFX10-LABEL: name: test_4xhalf_add_mul
; GFX10: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5
; GFX10-NEXT: {{ $}}
@@ -1546,24 +1779,7 @@ body: |
; GFX10-NEXT: $vgpr0 = COPY [[UV]](<2 x s16>)
; GFX10-NEXT: $vgpr1 = COPY [[UV1]](<2 x s16>)
; GFX10-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1
- ; GFX10-CONTRACT-LABEL: name: test_4xhalf_add_mul
- ; GFX10-CONTRACT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5
- ; GFX10-CONTRACT-NEXT: {{ $}}
- ; GFX10-CONTRACT-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
- ; GFX10-CONTRACT-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1
- ; GFX10-CONTRACT-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>)
- ; GFX10-CONTRACT-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2
- ; GFX10-CONTRACT-NEXT: [[COPY3:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3
- ; GFX10-CONTRACT-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY2]](<2 x s16>), [[COPY3]](<2 x s16>)
- ; GFX10-CONTRACT-NEXT: [[COPY4:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4
- ; GFX10-CONTRACT-NEXT: [[COPY5:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5
- ; GFX10-CONTRACT-NEXT: [[CONCAT_VECTORS2:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY4]](<2 x s16>), [[COPY5]](<2 x s16>)
- ; GFX10-CONTRACT-NEXT: [[FMUL:%[0-9]+]]:_(<4 x s16>) = G_FMUL [[CONCAT_VECTORS]], [[CONCAT_VECTORS1]]
- ; GFX10-CONTRACT-NEXT: [[FADD:%[0-9]+]]:_(<4 x s16>) = G_FADD [[FMUL]], [[CONCAT_VECTORS2]]
- ; GFX10-CONTRACT-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[FADD]](<4 x s16>)
- ; GFX10-CONTRACT-NEXT: $vgpr0 = COPY [[UV]](<2 x s16>)
- ; GFX10-CONTRACT-NEXT: $vgpr1 = COPY [[UV1]](<2 x s16>)
- ; GFX10-CONTRACT-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1
+ ;
; GFX10-DENORM-LABEL: name: test_4xhalf_add_mul
; GFX10-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5
; GFX10-DENORM-NEXT: {{ $}}
@@ -1582,24 +1798,6 @@ body: |
; GFX10-DENORM-NEXT: $vgpr0 = COPY [[UV]](<2 x s16>)
; GFX10-DENORM-NEXT: $vgpr1 = COPY [[UV1]](<2 x s16>)
; GFX10-DENORM-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1
- ; GFX10-UNSAFE-LABEL: name: test_4xhalf_add_mul
- ; GFX10-UNSAFE: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5
- ; GFX10-UNSAFE-NEXT: {{ $}}
- ; GFX10-UNSAFE-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
- ; GFX10-UNSAFE-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1
- ; GFX10-UNSAFE-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>)
- ; GFX10-UNSAFE-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2
- ; GFX10-UNSAFE-NEXT: [[COPY3:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3
- ; GFX10-UNSAFE-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY2]](<2 x s16>), [[COPY3]](<2 x s16>)
- ; GFX10-UNSAFE-NEXT: [[COPY4:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4
- ; GFX10-UNSAFE-NEXT: [[COPY5:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5
- ; GFX10-UNSAFE-NEXT: [[CONCAT_VECTORS2:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY4]](<2 x s16>), [[COPY5]](<2 x s16>)
- ; GFX10-UNSAFE-NEXT: [[FMUL:%[0-9]+]]:_(<4 x s16>) = G_FMUL [[CONCAT_VECTORS]], [[CONCAT_VECTORS1]]
- ; GFX10-UNSAFE-NEXT: [[FADD:%[0-9]+]]:_(<4 x s16>) = G_FADD [[FMUL]], [[CONCAT_VECTORS2]]
- ; GFX10-UNSAFE-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[FADD]](<4 x s16>)
- ; GFX10-UNSAFE-NEXT: $vgpr0 = COPY [[UV]](<2 x s16>)
- ; GFX10-UNSAFE-NEXT: $vgpr1 = COPY [[UV1]](<2 x s16>)
- ; GFX10-UNSAFE-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1
%4:_(<2 x s16>) = COPY $vgpr0
%5:_(<2 x s16>) = COPY $vgpr1
%0:_(<4 x s16>) = G_CONCAT_VECTORS %4(<2 x s16>), %5(<2 x s16>)
@@ -1618,6 +1816,105 @@ body: |
...
---
+name: test_4xhalf_add_mul_contract
+body: |
+ bb.1.entry:
+ liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5
+
+ ; GFX9-LABEL: name: test_4xhalf_add_mul_contract
+ ; GFX9: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5
+ ; GFX9-NEXT: {{ $}}
+ ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
+ ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1
+ ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>)
+ ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2
+ ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3
+ ; GFX9-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY2]](<2 x s16>), [[COPY3]](<2 x s16>)
+ ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4
+ ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5
+ ; GFX9-NEXT: [[CONCAT_VECTORS2:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY4]](<2 x s16>), [[COPY5]](<2 x s16>)
+ ; GFX9-NEXT: [[FMUL:%[0-9]+]]:_(<4 x s16>) = contract G_FMUL [[CONCAT_VECTORS]], [[CONCAT_VECTORS1]]
+ ; GFX9-NEXT: [[FADD:%[0-9]+]]:_(<4 x s16>) = contract G_FADD [[FMUL]], [[CONCAT_VECTORS2]]
+ ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[FADD]](<4 x s16>)
+ ; GFX9-NEXT: $vgpr0 = COPY [[UV]](<2 x s16>)
+ ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](<2 x s16>)
+ ; GFX9-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1
+ ;
+ ; GFX9-DENORM-LABEL: name: test_4xhalf_add_mul_contract
+ ; GFX9-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5
+ ; GFX9-DENORM-NEXT: {{ $}}
+ ; GFX9-DENORM-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
+ ; GFX9-DENORM-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1
+ ; GFX9-DENORM-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>)
+ ; GFX9-DENORM-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2
+ ; GFX9-DENORM-NEXT: [[COPY3:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3
+ ; GFX9-DENORM-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY2]](<2 x s16>), [[COPY3]](<2 x s16>)
+ ; GFX9-DENORM-NEXT: [[COPY4:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4
+ ; GFX9-DENORM-NEXT: [[COPY5:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5
+ ; GFX9-DENORM-NEXT: [[CONCAT_VECTORS2:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY4]](<2 x s16>), [[COPY5]](<2 x s16>)
+ ; GFX9-DENORM-NEXT: [[FMUL:%[0-9]+]]:_(<4 x s16>) = contract G_FMUL [[CONCAT_VECTORS]], [[CONCAT_VECTORS1]]
+ ; GFX9-DENORM-NEXT: [[FADD:%[0-9]+]]:_(<4 x s16>) = contract G_FADD [[FMUL]], [[CONCAT_VECTORS2]]
+ ; GFX9-DENORM-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[FADD]](<4 x s16>)
+ ; GFX9-DENORM-NEXT: $vgpr0 = COPY [[UV]](<2 x s16>)
+ ; GFX9-DENORM-NEXT: $vgpr1 = COPY [[UV1]](<2 x s16>)
+ ; GFX9-DENORM-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1
+ ;
+ ; GFX10-LABEL: name: test_4xhalf_add_mul_contract
+ ; GFX10: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5
+ ; GFX10-NEXT: {{ $}}
+ ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
+ ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1
+ ; GFX10-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>)
+ ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2
+ ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3
+ ; GFX10-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY2]](<2 x s16>), [[COPY3]](<2 x s16>)
+ ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4
+ ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5
+ ; GFX10-NEXT: [[CONCAT_VECTORS2:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY4]](<2 x s16>), [[COPY5]](<2 x s16>)
+ ; GFX10-NEXT: [[FMUL:%[0-9]+]]:_(<4 x s16>) = contract G_FMUL [[CONCAT_VECTORS]], [[CONCAT_VECTORS1]]
+ ; GFX10-NEXT: [[FADD:%[0-9]+]]:_(<4 x s16>) = contract G_FADD [[FMUL]], [[CONCAT_VECTORS2]]
+ ; GFX10-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[FADD]](<4 x s16>)
+ ; GFX10-NEXT: $vgpr0 = COPY [[UV]](<2 x s16>)
+ ; GFX10-NEXT: $vgpr1 = COPY [[UV1]](<2 x s16>)
+ ; GFX10-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1
+ ;
+ ; GFX10-DENORM-LABEL: name: test_4xhalf_add_mul_contract
+ ; GFX10-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5
+ ; GFX10-DENORM-NEXT: {{ $}}
+ ; GFX10-DENORM-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
+ ; GFX10-DENORM-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1
+ ; GFX10-DENORM-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>)
+ ; GFX10-DENORM-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2
+ ; GFX10-DENORM-NEXT: [[COPY3:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3
+ ; GFX10-DENORM-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY2]](<2 x s16>), [[COPY3]](<2 x s16>)
+ ; GFX10-DENORM-NEXT: [[COPY4:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4
+ ; GFX10-DENORM-NEXT: [[COPY5:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5
+ ; GFX10-DENORM-NEXT: [[CONCAT_VECTORS2:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY4]](<2 x s16>), [[COPY5]](<2 x s16>)
+ ; GFX10-DENORM-NEXT: [[FMUL:%[0-9]+]]:_(<4 x s16>) = contract G_FMUL [[CONCAT_VECTORS]], [[CONCAT_VECTORS1]]
+ ; GFX10-DENORM-NEXT: [[FADD:%[0-9]+]]:_(<4 x s16>) = contract G_FADD [[FMUL]], [[CONCAT_VECTORS2]]
+ ; GFX10-DENORM-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[FADD]](<4 x s16>)
+ ; GFX10-DENORM-NEXT: $vgpr0 = COPY [[UV]](<2 x s16>)
+ ; GFX10-DENORM-NEXT: $vgpr1 = COPY [[UV1]](<2 x s16>)
+ ; GFX10-DENORM-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1
+ %4:_(<2 x s16>) = COPY $vgpr0
+ %5:_(<2 x s16>) = COPY $vgpr1
+ %0:_(<4 x s16>) = G_CONCAT_VECTORS %4(<2 x s16>), %5(<2 x s16>)
+ %6:_(<2 x s16>) = COPY $vgpr2
+ %7:_(<2 x s16>) = COPY $vgpr3
+ %1:_(<4 x s16>) = G_CONCAT_VECTORS %6(<2 x s16>), %7(<2 x s16>)
+ %8:_(<2 x s16>) = COPY $vgpr4
+ %9:_(<2 x s16>) = COPY $vgpr5
+ %2:_(<4 x s16>) = G_CONCAT_VECTORS %8(<2 x s16>), %9(<2 x s16>)
+ %10:_(<4 x s16>) = contract G_FMUL %0, %1
+ %11:_(<4 x s16>) = contract G_FADD %10, %2
+ %13:_(<2 x s16>), %14:_(<2 x s16>) = G_UNMERGE_VALUES %11(<4 x s16>)
+ $vgpr0 = COPY %13(<2 x s16>)
+ $vgpr1 = COPY %14(<2 x s16>)
+ S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1
+...
+
+
+---
name: test_3xhalf_add_mul_rhs
body: |
bb.1.entry:
@@ -1648,31 +1945,6 @@ body: |
; GFX9-NEXT: $vgpr1 = COPY [[UV7]](<2 x s16>)
; GFX9-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1
;
- ; GFX9-CONTRACT-LABEL: name: test_3xhalf_add_mul_rhs
- ; GFX9-CONTRACT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5
- ; GFX9-CONTRACT-NEXT: {{ $}}
- ; GFX9-CONTRACT-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
- ; GFX9-CONTRACT-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1
- ; GFX9-CONTRACT-NEXT: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
- ; GFX9-CONTRACT-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>), [[DEF]](<2 x s16>)
- ; GFX9-CONTRACT-NEXT: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>)
- ; GFX9-CONTRACT-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2
- ; GFX9-CONTRACT-NEXT: [[COPY3:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3
- ; GFX9-CONTRACT-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY2]](<2 x s16>), [[COPY3]](<2 x s16>), [[DEF]](<2 x s16>)
- ; GFX9-CONTRACT-NEXT: [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<6 x s16>)
- ; GFX9-CONTRACT-NEXT: [[COPY4:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4
- ; GFX9-CONTRACT-NEXT: [[COPY5:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5
- ; GFX9-CONTRACT-NEXT: [[CONCAT_VECTORS2:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY4]](<2 x s16>), [[COPY5]](<2 x s16>), [[DEF]](<2 x s16>)
- ; GFX9-CONTRACT-NEXT: [[UV4:%[0-9]+]]:_(<3 x s16>), [[UV5:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS2]](<6 x s16>)
- ; GFX9-CONTRACT-NEXT: [[FMUL:%[0-9]+]]:_(<3 x s16>) = G_FMUL [[UV]], [[UV2]]
- ; GFX9-CONTRACT-NEXT: [[FADD:%[0-9]+]]:_(<3 x s16>) = G_FADD [[UV4]], [[FMUL]]
- ; GFX9-CONTRACT-NEXT: [[DEF1:%[0-9]+]]:_(<3 x s16>) = G_IMPLICIT_DEF
- ; GFX9-CONTRACT-NEXT: [[CONCAT_VECTORS3:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[FADD]](<3 x s16>), [[DEF1]](<3 x s16>)
- ; GFX9-CONTRACT-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>), [[UV8:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS3]](<6 x s16>)
- ; GFX9-CONTRACT-NEXT: $vgpr0 = COPY [[UV6]](<2 x s16>)
- ; GFX9-CONTRACT-NEXT: $vgpr1 = COPY [[UV7]](<2 x s16>)
- ; GFX9-CONTRACT-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1
- ;
; GFX9-DENORM-LABEL: name: test_3xhalf_add_mul_rhs
; GFX9-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5
; GFX9-DENORM-NEXT: {{ $}}
@@ -1698,31 +1970,6 @@ body: |
; GFX9-DENORM-NEXT: $vgpr1 = COPY [[UV7]](<2 x s16>)
; GFX9-DENORM-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1
;
- ; GFX9-UNSAFE-LABEL: name: test_3xhalf_add_mul_rhs
- ; GFX9-UNSAFE: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5
- ; GFX9-UNSAFE-NEXT: {{ $}}
- ; GFX9-UNSAFE-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
- ; GFX9-UNSAFE-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1
- ; GFX9-UNSAFE-NEXT: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
- ; GFX9-UNSAFE-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>), [[DEF]](<2 x s16>)
- ; GFX9-UNSAFE-NEXT: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>)
- ; GFX9-UNSAFE-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2
- ; GFX9-UNSAFE-NEXT: [[COPY3:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3
- ; GFX9-UNSAFE-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY2]](<2 x s16>), [[COPY3]](<2 x s16>), [[DEF]](<2 x s16>)
- ; GFX9-UNSAFE-NEXT: [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<6 x s16>)
- ; GFX9-UNSAFE-NEXT: [[COPY4:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4
- ; GFX9-UNSAFE-NEXT: [[COPY5:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5
- ; GFX9-UNSAFE-NEXT: [[CONCAT_VECTORS2:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY4]](<2 x s16>), [[COPY5]](<2 x s16>), [[DEF]](<2 x s16>)
- ; GFX9-UNSAFE-NEXT: [[UV4:%[0-9]+]]:_(<3 x s16>), [[UV5:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS2]](<6 x s16>)
- ; GFX9-UNSAFE-NEXT: [[FMUL:%[0-9]+]]:_(<3 x s16>) = G_FMUL [[UV]], [[UV2]]
- ; GFX9-UNSAFE-NEXT: [[FADD:%[0-9]+]]:_(<3 x s16>) = G_FADD [[UV4]], [[FMUL]]
- ; GFX9-UNSAFE-NEXT: [[DEF1:%[0-9]+]]:_(<3 x s16>) = G_IMPLICIT_DEF
- ; GFX9-UNSAFE-NEXT: [[CONCAT_VECTORS3:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[FADD]](<3 x s16>), [[DEF1]](<3 x s16>)
- ; GFX9-UNSAFE-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>), [[UV8:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS3]](<6 x s16>)
- ; GFX9-UNSAFE-NEXT: $vgpr0 = COPY [[UV6]](<2 x s16>)
- ; GFX9-UNSAFE-NEXT: $vgpr1 = COPY [[UV7]](<2 x s16>)
- ; GFX9-UNSAFE-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1
- ;
; GFX10-LABEL: name: test_3xhalf_add_mul_rhs
; GFX10: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5
; GFX10-NEXT: {{ $}}
@@ -1748,31 +1995,6 @@ body: |
; GFX10-NEXT: $vgpr1 = COPY [[UV7]](<2 x s16>)
; GFX10-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1
;
- ; GFX10-CONTRACT-LABEL: name: test_3xhalf_add_mul_rhs
- ; GFX10-CONTRACT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5
- ; GFX10-CONTRACT-NEXT: {{ $}}
- ; GFX10-CONTRACT-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
- ; GFX10-CONTRACT-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1
- ; GFX10-CONTRACT-NEXT: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
- ; GFX10-CONTRACT-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>), [[DEF]](<2 x s16>)
- ; GFX10-CONTRACT-NEXT: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>)
- ; GFX10-CONTRACT-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2
- ; GFX10-CONTRACT-NEXT: [[COPY3:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3
- ; GFX10-CONTRACT-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY2]](<2 x s16>), [[COPY3]](<2 x s16>), [[DEF]](<2 x s16>)
- ; GFX10-CONTRACT-NEXT: [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<6 x s16>)
- ; GFX10-CONTRACT-NEXT: [[COPY4:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4
- ; GFX10-CONTRACT-NEXT: [[COPY5:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5
- ; GFX10-CONTRACT-NEXT: [[CONCAT_VECTORS2:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY4]](<2 x s16>), [[COPY5]](<2 x s16>), [[DEF]](<2 x s16>)
- ; GFX10-CONTRACT-NEXT: [[UV4:%[0-9]+]]:_(<3 x s16>), [[UV5:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS2]](<6 x s16>)
- ; GFX10-CONTRACT-NEXT: [[FMUL:%[0-9]+]]:_(<3 x s16>) = G_FMUL [[UV]], [[UV2]]
- ; GFX10-CONTRACT-NEXT: [[FADD:%[0-9]+]]:_(<3 x s16>) = G_FADD [[UV4]], [[FMUL]]
- ; GFX10-CONTRACT-NEXT: [[DEF1:%[0-9]+]]:_(<3 x s16>) = G_IMPLICIT_DEF
- ; GFX10-CONTRACT-NEXT: [[CONCAT_VECTORS3:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[FADD]](<3 x s16>), [[DEF1]](<3 x s16>)
- ; GFX10-CONTRACT-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>), [[UV8:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS3]](<6 x s16>)
- ; GFX10-CONTRACT-NEXT: $vgpr0 = COPY [[UV6]](<2 x s16>)
- ; GFX10-CONTRACT-NEXT: $vgpr1 = COPY [[UV7]](<2 x s16>)
- ; GFX10-CONTRACT-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1
- ;
; GFX10-DENORM-LABEL: name: test_3xhalf_add_mul_rhs
; GFX10-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5
; GFX10-DENORM-NEXT: {{ $}}
@@ -1797,31 +2019,6 @@ body: |
; GFX10-DENORM-NEXT: $vgpr0 = COPY [[UV6]](<2 x s16>)
; GFX10-DENORM-NEXT: $vgpr1 = COPY [[UV7]](<2 x s16>)
; GFX10-DENORM-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1
- ;
- ; GFX10-UNSAFE-LABEL: name: test_3xhalf_add_mul_rhs
- ; GFX10-UNSAFE: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5
- ; GFX10-UNSAFE-NEXT: {{ $}}
- ; GFX10-UNSAFE-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
- ; GFX10-UNSAFE-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1
- ; GFX10-UNSAFE-NEXT: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
- ; GFX10-UNSAFE-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>), [[DEF]](<2 x s16>)
- ; GFX10-UNSAFE-NEXT: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>)
- ; GFX10-UNSAFE-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2
- ; GFX10-UNSAFE-NEXT: [[COPY3:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3
- ; GFX10-UNSAFE-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY2]](<2 x s16>), [[COPY3]](<2 x s16>), [[DEF]](<2 x s16>)
- ; GFX10-UNSAFE-NEXT: [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<6 x s16>)
- ; GFX10-UNSAFE-NEXT: [[COPY4:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4
- ; GFX10-UNSAFE-NEXT: [[COPY5:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5
- ; GFX10-UNSAFE-NEXT: [[CONCAT_VECTORS2:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY4]](<2 x s16>), [[COPY5]](<2 x s16>), [[DEF]](<2 x s16>)
- ; GFX10-UNSAFE-NEXT: [[UV4:%[0-9]+]]:_(<3 x s16>), [[UV5:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS2]](<6 x s16>)
- ; GFX10-UNSAFE-NEXT: [[FMUL:%[0-9]+]]:_(<3 x s16>) = G_FMUL [[UV]], [[UV2]]
- ; GFX10-UNSAFE-NEXT: [[FADD:%[0-9]+]]:_(<3 x s16>) = G_FADD [[UV4]], [[FMUL]]
- ; GFX10-UNSAFE-NEXT: [[DEF1:%[0-9]+]]:_(<3 x s16>) = G_IMPLICIT_DEF
- ; GFX10-UNSAFE-NEXT: [[CONCAT_VECTORS3:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[FADD]](<3 x s16>), [[DEF1]](<3 x s16>)
- ; GFX10-UNSAFE-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>), [[UV8:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS3]](<6 x s16>)
- ; GFX10-UNSAFE-NEXT: $vgpr0 = COPY [[UV6]](<2 x s16>)
- ; GFX10-UNSAFE-NEXT: $vgpr1 = COPY [[UV7]](<2 x s16>)
- ; GFX10-UNSAFE-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1
%4:_(<2 x s16>) = COPY $vgpr0
%5:_(<2 x s16>) = COPY $vgpr1
%10:_(<2 x s16>) = G_IMPLICIT_DEF
@@ -1846,6 +2043,134 @@ body: |
...
---
+name: test_3xhalf_add_mul_rhs_contract
+body: |
+ bb.1.entry:
+ liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5
+
+ ; GFX9-LABEL: name: test_3xhalf_add_mul_rhs_contract
+ ; GFX9: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5
+ ; GFX9-NEXT: {{ $}}
+ ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
+ ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1
+ ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
+ ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>), [[DEF]](<2 x s16>)
+ ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>)
+ ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2
+ ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3
+ ; GFX9-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY2]](<2 x s16>), [[COPY3]](<2 x s16>), [[DEF]](<2 x s16>)
+ ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<6 x s16>)
+ ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4
+ ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5
+ ; GFX9-NEXT: [[CONCAT_VECTORS2:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY4]](<2 x s16>), [[COPY5]](<2 x s16>), [[DEF]](<2 x s16>)
+ ; GFX9-NEXT: [[UV4:%[0-9]+]]:_(<3 x s16>), [[UV5:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS2]](<6 x s16>)
+ ; GFX9-NEXT: [[FMUL:%[0-9]+]]:_(<3 x s16>) = contract G_FMUL [[UV]], [[UV2]]
+ ; GFX9-NEXT: [[FADD:%[0-9]+]]:_(<3 x s16>) = contract G_FADD [[UV4]], [[FMUL]]
+ ; GFX9-NEXT: [[DEF1:%[0-9]+]]:_(<3 x s16>) = G_IMPLICIT_DEF
+ ; GFX9-NEXT: [[CONCAT_VECTORS3:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[FADD]](<3 x s16>), [[DEF1]](<3 x s16>)
+ ; GFX9-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>), [[UV8:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS3]](<6 x s16>)
+ ; GFX9-NEXT: $vgpr0 = COPY [[UV6]](<2 x s16>)
+ ; GFX9-NEXT: $vgpr1 = COPY [[UV7]](<2 x s16>)
+ ; GFX9-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1
+ ;
+ ; GFX9-DENORM-LABEL: name: test_3xhalf_add_mul_rhs_contract
+ ; GFX9-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5
+ ; GFX9-DENORM-NEXT: {{ $}}
+ ; GFX9-DENORM-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
+ ; GFX9-DENORM-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1
+ ; GFX9-DENORM-NEXT: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
+ ; GFX9-DENORM-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>), [[DEF]](<2 x s16>)
+ ; GFX9-DENORM-NEXT: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>)
+ ; GFX9-DENORM-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2
+ ; GFX9-DENORM-NEXT: [[COPY3:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3
+ ; GFX9-DENORM-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY2]](<2 x s16>), [[COPY3]](<2 x s16>), [[DEF]](<2 x s16>)
+ ; GFX9-DENORM-NEXT: [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<6 x s16>)
+ ; GFX9-DENORM-NEXT: [[COPY4:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4
+ ; GFX9-DENORM-NEXT: [[COPY5:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5
+ ; GFX9-DENORM-NEXT: [[CONCAT_VECTORS2:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY4]](<2 x s16>), [[COPY5]](<2 x s16>), [[DEF]](<2 x s16>)
+ ; GFX9-DENORM-NEXT: [[UV4:%[0-9]+]]:_(<3 x s16>), [[UV5:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS2]](<6 x s16>)
+ ; GFX9-DENORM-NEXT: [[FMUL:%[0-9]+]]:_(<3 x s16>) = contract G_FMUL [[UV]], [[UV2]]
+ ; GFX9-DENORM-NEXT: [[FADD:%[0-9]+]]:_(<3 x s16>) = contract G_FADD [[UV4]], [[FMUL]]
+ ; GFX9-DENORM-NEXT: [[DEF1:%[0-9]+]]:_(<3 x s16>) = G_IMPLICIT_DEF
+ ; GFX9-DENORM-NEXT: [[CONCAT_VECTORS3:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[FADD]](<3 x s16>), [[DEF1]](<3 x s16>)
+ ; GFX9-DENORM-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>), [[UV8:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS3]](<6 x s16>)
+ ; GFX9-DENORM-NEXT: $vgpr0 = COPY [[UV6]](<2 x s16>)
+ ; GFX9-DENORM-NEXT: $vgpr1 = COPY [[UV7]](<2 x s16>)
+ ; GFX9-DENORM-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1
+ ;
+ ; GFX10-LABEL: name: test_3xhalf_add_mul_rhs_contract
+ ; GFX10: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5
+ ; GFX10-NEXT: {{ $}}
+ ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
+ ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1
+ ; GFX10-NEXT: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
+ ; GFX10-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>), [[DEF]](<2 x s16>)
+ ; GFX10-NEXT: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>)
+ ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2
+ ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3
+ ; GFX10-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY2]](<2 x s16>), [[COPY3]](<2 x s16>), [[DEF]](<2 x s16>)
+ ; GFX10-NEXT: [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<6 x s16>)
+ ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4
+ ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5
+ ; GFX10-NEXT: [[CONCAT_VECTORS2:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY4]](<2 x s16>), [[COPY5]](<2 x s16>), [[DEF]](<2 x s16>)
+ ; GFX10-NEXT: [[UV4:%[0-9]+]]:_(<3 x s16>), [[UV5:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS2]](<6 x s16>)
+ ; GFX10-NEXT: [[FMUL:%[0-9]+]]:_(<3 x s16>) = contract G_FMUL [[UV]], [[UV2]]
+ ; GFX10-NEXT: [[FADD:%[0-9]+]]:_(<3 x s16>) = contract G_FADD [[UV4]], [[FMUL]]
+ ; GFX10-NEXT: [[DEF1:%[0-9]+]]:_(<3 x s16>) = G_IMPLICIT_DEF
+ ; GFX10-NEXT: [[CONCAT_VECTORS3:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[FADD]](<3 x s16>), [[DEF1]](<3 x s16>)
+ ; GFX10-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>), [[UV8:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS3]](<6 x s16>)
+ ; GFX10-NEXT: $vgpr0 = COPY [[UV6]](<2 x s16>)
+ ; GFX10-NEXT: $vgpr1 = COPY [[UV7]](<2 x s16>)
+ ; GFX10-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1
+ ;
+ ; GFX10-DENORM-LABEL: name: test_3xhalf_add_mul_rhs_contract
+ ; GFX10-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5
+ ; GFX10-DENORM-NEXT: {{ $}}
+ ; GFX10-DENORM-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
+ ; GFX10-DENORM-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1
+ ; GFX10-DENORM-NEXT: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
+ ; GFX10-DENORM-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>), [[DEF]](<2 x s16>)
+ ; GFX10-DENORM-NEXT: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>)
+ ; GFX10-DENORM-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2
+ ; GFX10-DENORM-NEXT: [[COPY3:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3
+ ; GFX10-DENORM-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY2]](<2 x s16>), [[COPY3]](<2 x s16>), [[DEF]](<2 x s16>)
+ ; GFX10-DENORM-NEXT: [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<6 x s16>)
+ ; GFX10-DENORM-NEXT: [[COPY4:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4
+ ; GFX10-DENORM-NEXT: [[COPY5:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5
+ ; GFX10-DENORM-NEXT: [[CONCAT_VECTORS2:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY4]](<2 x s16>), [[COPY5]](<2 x s16>), [[DEF]](<2 x s16>)
+ ; GFX10-DENORM-NEXT: [[UV4:%[0-9]+]]:_(<3 x s16>), [[UV5:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS2]](<6 x s16>)
+ ; GFX10-DENORM-NEXT: [[FMUL:%[0-9]+]]:_(<3 x s16>) = contract G_FMUL [[UV]], [[UV2]]
+ ; GFX10-DENORM-NEXT: [[FADD:%[0-9]+]]:_(<3 x s16>) = contract G_FADD [[UV4]], [[FMUL]]
+ ; GFX10-DENORM-NEXT: [[DEF1:%[0-9]+]]:_(<3 x s16>) = G_IMPLICIT_DEF
+ ; GFX10-DENORM-NEXT: [[CONCAT_VECTORS3:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[FADD]](<3 x s16>), [[DEF1]](<3 x s16>)
+ ; GFX10-DENORM-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>), [[UV8:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS3]](<6 x s16>)
+ ; GFX10-DENORM-NEXT: $vgpr0 = COPY [[UV6]](<2 x s16>)
+ ; GFX10-DENORM-NEXT: $vgpr1 = COPY [[UV7]](<2 x s16>)
+ ; GFX10-DENORM-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1
+ %4:_(<2 x s16>) = COPY $vgpr0
+ %5:_(<2 x s16>) = COPY $vgpr1
+ %10:_(<2 x s16>) = G_IMPLICIT_DEF
+ %11:_(<6 x s16>) = G_CONCAT_VECTORS %4(<2 x s16>), %5(<2 x s16>), %10(<2 x s16>)
+ %0:_(<3 x s16>), %12:_(<3 x s16>) = G_UNMERGE_VALUES %11(<6 x s16>)
+ %6:_(<2 x s16>) = COPY $vgpr2
+ %7:_(<2 x s16>) = COPY $vgpr3
+ %13:_(<6 x s16>) = G_CONCAT_VECTORS %6(<2 x s16>), %7(<2 x s16>), %10(<2 x s16>)
+ %1:_(<3 x s16>), %14:_(<3 x s16>) = G_UNMERGE_VALUES %13(<6 x s16>)
+ %8:_(<2 x s16>) = COPY $vgpr4
+ %9:_(<2 x s16>) = COPY $vgpr5
+ %15:_(<6 x s16>) = G_CONCAT_VECTORS %8(<2 x s16>), %9(<2 x s16>), %10(<2 x s16>)
+ %2:_(<3 x s16>), %16:_(<3 x s16>) = G_UNMERGE_VALUES %15(<6 x s16>)
+ %17:_(<3 x s16>) = contract G_FMUL %0, %1
+ %18:_(<3 x s16>) = contract G_FADD %2, %17
+ %22:_(<3 x s16>) = G_IMPLICIT_DEF
+ %23:_(<6 x s16>) = G_CONCAT_VECTORS %18(<3 x s16>), %22(<3 x s16>)
+ %20:_(<2 x s16>), %21:_(<2 x s16>), %24:_(<2 x s16>) = G_UNMERGE_VALUES %23(<6 x s16>)
+ $vgpr0 = COPY %20(<2 x s16>)
+ $vgpr1 = COPY %21(<2 x s16>)
+ S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1
+...
+
+---
name: test_4xdouble_add_mul
body: |
bb.1.entry:
@@ -1905,60 +2230,7 @@ body: |
; GFX9-NEXT: $vgpr6 = COPY [[UV6]](s32)
; GFX9-NEXT: $vgpr7 = COPY [[UV7]](s32)
; GFX9-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7
- ; GFX9-CONTRACT-LABEL: name: test_4xdouble_add_mul
- ; GFX9-CONTRACT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23
- ; GFX9-CONTRACT-NEXT: {{ $}}
- ; GFX9-CONTRACT-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
- ; GFX9-CONTRACT-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
- ; GFX9-CONTRACT-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
- ; GFX9-CONTRACT-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
- ; GFX9-CONTRACT-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4
- ; GFX9-CONTRACT-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5
- ; GFX9-CONTRACT-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6
- ; GFX9-CONTRACT-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7
- ; GFX9-CONTRACT-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32)
- ; GFX9-CONTRACT-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32)
- ; GFX9-CONTRACT-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32)
- ; GFX9-CONTRACT-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY6]](s32), [[COPY7]](s32)
- ; GFX9-CONTRACT-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[MV3]](s64)
- ; GFX9-CONTRACT-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8
- ; GFX9-CONTRACT-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9
- ; GFX9-CONTRACT-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10
- ; GFX9-CONTRACT-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11
- ; GFX9-CONTRACT-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12
- ; GFX9-CONTRACT-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13
- ; GFX9-CONTRACT-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14
- ; GFX9-CONTRACT-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15
- ; GFX9-CONTRACT-NEXT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY8]](s32), [[COPY9]](s32)
- ; GFX9-CONTRACT-NEXT: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY10]](s32), [[COPY11]](s32)
- ; GFX9-CONTRACT-NEXT: [[MV6:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY12]](s32), [[COPY13]](s32)
- ; GFX9-CONTRACT-NEXT: [[MV7:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY14]](s32), [[COPY15]](s32)
- ; GFX9-CONTRACT-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV4]](s64), [[MV5]](s64), [[MV6]](s64), [[MV7]](s64)
- ; GFX9-CONTRACT-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16
- ; GFX9-CONTRACT-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17
- ; GFX9-CONTRACT-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr18
- ; GFX9-CONTRACT-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr19
- ; GFX9-CONTRACT-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr20
- ; GFX9-CONTRACT-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr21
- ; GFX9-CONTRACT-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr22
- ; GFX9-CONTRACT-NEXT: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr23
- ; GFX9-CONTRACT-NEXT: [[MV8:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY16]](s32), [[COPY17]](s32)
- ; GFX9-CONTRACT-NEXT: [[MV9:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY18]](s32), [[COPY19]](s32)
- ; GFX9-CONTRACT-NEXT: [[MV10:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY20]](s32), [[COPY21]](s32)
- ; GFX9-CONTRACT-NEXT: [[MV11:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY22]](s32), [[COPY23]](s32)
- ; GFX9-CONTRACT-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV8]](s64), [[MV9]](s64), [[MV10]](s64), [[MV11]](s64)
- ; GFX9-CONTRACT-NEXT: [[FMUL:%[0-9]+]]:_(<4 x s64>) = G_FMUL [[BUILD_VECTOR]], [[BUILD_VECTOR1]]
- ; GFX9-CONTRACT-NEXT: [[FADD:%[0-9]+]]:_(<4 x s64>) = G_FADD [[FMUL]], [[BUILD_VECTOR2]]
- ; GFX9-CONTRACT-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](<4 x s64>)
- ; GFX9-CONTRACT-NEXT: $vgpr0 = COPY [[UV]](s32)
- ; GFX9-CONTRACT-NEXT: $vgpr1 = COPY [[UV1]](s32)
- ; GFX9-CONTRACT-NEXT: $vgpr2 = COPY [[UV2]](s32)
- ; GFX9-CONTRACT-NEXT: $vgpr3 = COPY [[UV3]](s32)
- ; GFX9-CONTRACT-NEXT: $vgpr4 = COPY [[UV4]](s32)
- ; GFX9-CONTRACT-NEXT: $vgpr5 = COPY [[UV5]](s32)
- ; GFX9-CONTRACT-NEXT: $vgpr6 = COPY [[UV6]](s32)
- ; GFX9-CONTRACT-NEXT: $vgpr7 = COPY [[UV7]](s32)
- ; GFX9-CONTRACT-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7
+ ;
; GFX9-DENORM-LABEL: name: test_4xdouble_add_mul
; GFX9-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23
; GFX9-DENORM-NEXT: {{ $}}
@@ -2013,60 +2285,7 @@ body: |
; GFX9-DENORM-NEXT: $vgpr6 = COPY [[UV6]](s32)
; GFX9-DENORM-NEXT: $vgpr7 = COPY [[UV7]](s32)
; GFX9-DENORM-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7
- ; GFX9-UNSAFE-LABEL: name: test_4xdouble_add_mul
- ; GFX9-UNSAFE: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23
- ; GFX9-UNSAFE-NEXT: {{ $}}
- ; GFX9-UNSAFE-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
- ; GFX9-UNSAFE-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
- ; GFX9-UNSAFE-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
- ; GFX9-UNSAFE-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
- ; GFX9-UNSAFE-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4
- ; GFX9-UNSAFE-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5
- ; GFX9-UNSAFE-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6
- ; GFX9-UNSAFE-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7
- ; GFX9-UNSAFE-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32)
- ; GFX9-UNSAFE-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32)
- ; GFX9-UNSAFE-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32)
- ; GFX9-UNSAFE-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY6]](s32), [[COPY7]](s32)
- ; GFX9-UNSAFE-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[MV3]](s64)
- ; GFX9-UNSAFE-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8
- ; GFX9-UNSAFE-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9
- ; GFX9-UNSAFE-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10
- ; GFX9-UNSAFE-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11
- ; GFX9-UNSAFE-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12
- ; GFX9-UNSAFE-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13
- ; GFX9-UNSAFE-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14
- ; GFX9-UNSAFE-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15
- ; GFX9-UNSAFE-NEXT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY8]](s32), [[COPY9]](s32)
- ; GFX9-UNSAFE-NEXT: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY10]](s32), [[COPY11]](s32)
- ; GFX9-UNSAFE-NEXT: [[MV6:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY12]](s32), [[COPY13]](s32)
- ; GFX9-UNSAFE-NEXT: [[MV7:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY14]](s32), [[COPY15]](s32)
- ; GFX9-UNSAFE-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV4]](s64), [[MV5]](s64), [[MV6]](s64), [[MV7]](s64)
- ; GFX9-UNSAFE-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16
- ; GFX9-UNSAFE-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17
- ; GFX9-UNSAFE-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr18
- ; GFX9-UNSAFE-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr19
- ; GFX9-UNSAFE-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr20
- ; GFX9-UNSAFE-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr21
- ; GFX9-UNSAFE-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr22
- ; GFX9-UNSAFE-NEXT: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr23
- ; GFX9-UNSAFE-NEXT: [[MV8:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY16]](s32), [[COPY17]](s32)
- ; GFX9-UNSAFE-NEXT: [[MV9:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY18]](s32), [[COPY19]](s32)
- ; GFX9-UNSAFE-NEXT: [[MV10:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY20]](s32), [[COPY21]](s32)
- ; GFX9-UNSAFE-NEXT: [[MV11:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY22]](s32), [[COPY23]](s32)
- ; GFX9-UNSAFE-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV8]](s64), [[MV9]](s64), [[MV10]](s64), [[MV11]](s64)
- ; GFX9-UNSAFE-NEXT: [[FMUL:%[0-9]+]]:_(<4 x s64>) = G_FMUL [[BUILD_VECTOR]], [[BUILD_VECTOR1]]
- ; GFX9-UNSAFE-NEXT: [[FADD:%[0-9]+]]:_(<4 x s64>) = G_FADD [[FMUL]], [[BUILD_VECTOR2]]
- ; GFX9-UNSAFE-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](<4 x s64>)
- ; GFX9-UNSAFE-NEXT: $vgpr0 = COPY [[UV]](s32)
- ; GFX9-UNSAFE-NEXT: $vgpr1 = COPY [[UV1]](s32)
- ; GFX9-UNSAFE-NEXT: $vgpr2 = COPY [[UV2]](s32)
- ; GFX9-UNSAFE-NEXT: $vgpr3 = COPY [[UV3]](s32)
- ; GFX9-UNSAFE-NEXT: $vgpr4 = COPY [[UV4]](s32)
- ; GFX9-UNSAFE-NEXT: $vgpr5 = COPY [[UV5]](s32)
- ; GFX9-UNSAFE-NEXT: $vgpr6 = COPY [[UV6]](s32)
- ; GFX9-UNSAFE-NEXT: $vgpr7 = COPY [[UV7]](s32)
- ; GFX9-UNSAFE-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7
+ ;
; GFX10-LABEL: name: test_4xdouble_add_mul
; GFX10: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23
; GFX10-NEXT: {{ $}}
@@ -2121,60 +2340,7 @@ body: |
; GFX10-NEXT: $vgpr6 = COPY [[UV6]](s32)
; GFX10-NEXT: $vgpr7 = COPY [[UV7]](s32)
; GFX10-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7
- ; GFX10-CONTRACT-LABEL: name: test_4xdouble_add_mul
- ; GFX10-CONTRACT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23
- ; GFX10-CONTRACT-NEXT: {{ $}}
- ; GFX10-CONTRACT-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
- ; GFX10-CONTRACT-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
- ; GFX10-CONTRACT-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
- ; GFX10-CONTRACT-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
- ; GFX10-CONTRACT-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4
- ; GFX10-CONTRACT-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5
- ; GFX10-CONTRACT-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6
- ; GFX10-CONTRACT-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7
- ; GFX10-CONTRACT-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32)
- ; GFX10-CONTRACT-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32)
- ; GFX10-CONTRACT-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32)
- ; GFX10-CONTRACT-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY6]](s32), [[COPY7]](s32)
- ; GFX10-CONTRACT-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[MV3]](s64)
- ; GFX10-CONTRACT-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8
- ; GFX10-CONTRACT-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9
- ; GFX10-CONTRACT-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10
- ; GFX10-CONTRACT-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11
- ; GFX10-CONTRACT-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12
- ; GFX10-CONTRACT-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13
- ; GFX10-CONTRACT-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14
- ; GFX10-CONTRACT-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15
- ; GFX10-CONTRACT-NEXT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY8]](s32), [[COPY9]](s32)
- ; GFX10-CONTRACT-NEXT: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY10]](s32), [[COPY11]](s32)
- ; GFX10-CONTRACT-NEXT: [[MV6:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY12]](s32), [[COPY13]](s32)
- ; GFX10-CONTRACT-NEXT: [[MV7:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY14]](s32), [[COPY15]](s32)
- ; GFX10-CONTRACT-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV4]](s64), [[MV5]](s64), [[MV6]](s64), [[MV7]](s64)
- ; GFX10-CONTRACT-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16
- ; GFX10-CONTRACT-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17
- ; GFX10-CONTRACT-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr18
- ; GFX10-CONTRACT-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr19
- ; GFX10-CONTRACT-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr20
- ; GFX10-CONTRACT-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr21
- ; GFX10-CONTRACT-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr22
- ; GFX10-CONTRACT-NEXT: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr23
- ; GFX10-CONTRACT-NEXT: [[MV8:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY16]](s32), [[COPY17]](s32)
- ; GFX10-CONTRACT-NEXT: [[MV9:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY18]](s32), [[COPY19]](s32)
- ; GFX10-CONTRACT-NEXT: [[MV10:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY20]](s32), [[COPY21]](s32)
- ; GFX10-CONTRACT-NEXT: [[MV11:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY22]](s32), [[COPY23]](s32)
- ; GFX10-CONTRACT-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV8]](s64), [[MV9]](s64), [[MV10]](s64), [[MV11]](s64)
- ; GFX10-CONTRACT-NEXT: [[FMUL:%[0-9]+]]:_(<4 x s64>) = G_FMUL [[BUILD_VECTOR]], [[BUILD_VECTOR1]]
- ; GFX10-CONTRACT-NEXT: [[FADD:%[0-9]+]]:_(<4 x s64>) = G_FADD [[FMUL]], [[BUILD_VECTOR2]]
- ; GFX10-CONTRACT-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](<4 x s64>)
- ; GFX10-CONTRACT-NEXT: $vgpr0 = COPY [[UV]](s32)
- ; GFX10-CONTRACT-NEXT: $vgpr1 = COPY [[UV1]](s32)
- ; GFX10-CONTRACT-NEXT: $vgpr2 = COPY [[UV2]](s32)
- ; GFX10-CONTRACT-NEXT: $vgpr3 = COPY [[UV3]](s32)
- ; GFX10-CONTRACT-NEXT: $vgpr4 = COPY [[UV4]](s32)
- ; GFX10-CONTRACT-NEXT: $vgpr5 = COPY [[UV5]](s32)
- ; GFX10-CONTRACT-NEXT: $vgpr6 = COPY [[UV6]](s32)
- ; GFX10-CONTRACT-NEXT: $vgpr7 = COPY [[UV7]](s32)
- ; GFX10-CONTRACT-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7
+ ;
; GFX10-DENORM-LABEL: name: test_4xdouble_add_mul
; GFX10-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23
; GFX10-DENORM-NEXT: {{ $}}
@@ -2229,60 +2395,6 @@ body: |
; GFX10-DENORM-NEXT: $vgpr6 = COPY [[UV6]](s32)
; GFX10-DENORM-NEXT: $vgpr7 = COPY [[UV7]](s32)
; GFX10-DENORM-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7
- ; GFX10-UNSAFE-LABEL: name: test_4xdouble_add_mul
- ; GFX10-UNSAFE: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23
- ; GFX10-UNSAFE-NEXT: {{ $}}
- ; GFX10-UNSAFE-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
- ; GFX10-UNSAFE-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
- ; GFX10-UNSAFE-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
- ; GFX10-UNSAFE-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
- ; GFX10-UNSAFE-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4
- ; GFX10-UNSAFE-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5
- ; GFX10-UNSAFE-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6
- ; GFX10-UNSAFE-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7
- ; GFX10-UNSAFE-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32)
- ; GFX10-UNSAFE-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32)
- ; GFX10-UNSAFE-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32)
- ; GFX10-UNSAFE-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY6]](s32), [[COPY7]](s32)
- ; GFX10-UNSAFE-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[MV3]](s64)
- ; GFX10-UNSAFE-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8
- ; GFX10-UNSAFE-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9
- ; GFX10-UNSAFE-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10
- ; GFX10-UNSAFE-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11
- ; GFX10-UNSAFE-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12
- ; GFX10-UNSAFE-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13
- ; GFX10-UNSAFE-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14
- ; GFX10-UNSAFE-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15
- ; GFX10-UNSAFE-NEXT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY8]](s32), [[COPY9]](s32)
- ; GFX10-UNSAFE-NEXT: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY10]](s32), [[COPY11]](s32)
- ; GFX10-UNSAFE-NEXT: [[MV6:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY12]](s32), [[COPY13]](s32)
- ; GFX10-UNSAFE-NEXT: [[MV7:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY14]](s32), [[COPY15]](s32)
- ; GFX10-UNSAFE-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV4]](s64), [[MV5]](s64), [[MV6]](s64), [[MV7]](s64)
- ; GFX10-UNSAFE-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16
- ; GFX10-UNSAFE-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17
- ; GFX10-UNSAFE-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr18
- ; GFX10-UNSAFE-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr19
- ; GFX10-UNSAFE-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr20
- ; GFX10-UNSAFE-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr21
- ; GFX10-UNSAFE-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr22
- ; GFX10-UNSAFE-NEXT: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr23
- ; GFX10-UNSAFE-NEXT: [[MV8:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY16]](s32), [[COPY17]](s32)
- ; GFX10-UNSAFE-NEXT: [[MV9:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY18]](s32), [[COPY19]](s32)
- ; GFX10-UNSAFE-NEXT: [[MV10:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY20]](s32), [[COPY21]](s32)
- ; GFX10-UNSAFE-NEXT: [[MV11:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY22]](s32), [[COPY23]](s32)
- ; GFX10-UNSAFE-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV8]](s64), [[MV9]](s64), [[MV10]](s64), [[MV11]](s64)
- ; GFX10-UNSAFE-NEXT: [[FMUL:%[0-9]+]]:_(<4 x s64>) = G_FMUL [[BUILD_VECTOR]], [[BUILD_VECTOR1]]
- ; GFX10-UNSAFE-NEXT: [[FADD:%[0-9]+]]:_(<4 x s64>) = G_FADD [[FMUL]], [[BUILD_VECTOR2]]
- ; GFX10-UNSAFE-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](<4 x s64>)
- ; GFX10-UNSAFE-NEXT: $vgpr0 = COPY [[UV]](s32)
- ; GFX10-UNSAFE-NEXT: $vgpr1 = COPY [[UV1]](s32)
- ; GFX10-UNSAFE-NEXT: $vgpr2 = COPY [[UV2]](s32)
- ; GFX10-UNSAFE-NEXT: $vgpr3 = COPY [[UV3]](s32)
- ; GFX10-UNSAFE-NEXT: $vgpr4 = COPY [[UV4]](s32)
- ; GFX10-UNSAFE-NEXT: $vgpr5 = COPY [[UV5]](s32)
- ; GFX10-UNSAFE-NEXT: $vgpr6 = COPY [[UV6]](s32)
- ; GFX10-UNSAFE-NEXT: $vgpr7 = COPY [[UV7]](s32)
- ; GFX10-UNSAFE-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7
%4:_(s32) = COPY $vgpr0
%5:_(s32) = COPY $vgpr1
%6:_(s32) = COPY $vgpr2
@@ -2337,6 +2449,284 @@ body: |
...
---
+name: test_4xdouble_add_mul_contract
+body: |
+ bb.1.entry:
+ liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23
+
+ ; GFX9-LABEL: name: test_4xdouble_add_mul_contract
+ ; GFX9: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23
+ ; GFX9-NEXT: {{ $}}
+ ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
+ ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
+ ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4
+ ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5
+ ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6
+ ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7
+ ; GFX9-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32)
+ ; GFX9-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32)
+ ; GFX9-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32)
+ ; GFX9-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY6]](s32), [[COPY7]](s32)
+ ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[MV3]](s64)
+ ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8
+ ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9
+ ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10
+ ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11
+ ; GFX9-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12
+ ; GFX9-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13
+ ; GFX9-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14
+ ; GFX9-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15
+ ; GFX9-NEXT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY8]](s32), [[COPY9]](s32)
+ ; GFX9-NEXT: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY10]](s32), [[COPY11]](s32)
+ ; GFX9-NEXT: [[MV6:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY12]](s32), [[COPY13]](s32)
+ ; GFX9-NEXT: [[MV7:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY14]](s32), [[COPY15]](s32)
+ ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV4]](s64), [[MV5]](s64), [[MV6]](s64), [[MV7]](s64)
+ ; GFX9-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16
+ ; GFX9-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17
+ ; GFX9-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr18
+ ; GFX9-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr19
+ ; GFX9-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr20
+ ; GFX9-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr21
+ ; GFX9-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr22
+ ; GFX9-NEXT: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr23
+ ; GFX9-NEXT: [[MV8:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY16]](s32), [[COPY17]](s32)
+ ; GFX9-NEXT: [[MV9:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY18]](s32), [[COPY19]](s32)
+ ; GFX9-NEXT: [[MV10:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY20]](s32), [[COPY21]](s32)
+ ; GFX9-NEXT: [[MV11:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY22]](s32), [[COPY23]](s32)
+ ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV8]](s64), [[MV9]](s64), [[MV10]](s64), [[MV11]](s64)
+ ; GFX9-NEXT: [[FMUL:%[0-9]+]]:_(<4 x s64>) = contract G_FMUL [[BUILD_VECTOR]], [[BUILD_VECTOR1]]
+ ; GFX9-NEXT: [[FADD:%[0-9]+]]:_(<4 x s64>) = contract G_FADD [[FMUL]], [[BUILD_VECTOR2]]
+ ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](<4 x s64>)
+ ; GFX9-NEXT: $vgpr0 = COPY [[UV]](s32)
+ ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](s32)
+ ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](s32)
+ ; GFX9-NEXT: $vgpr3 = COPY [[UV3]](s32)
+ ; GFX9-NEXT: $vgpr4 = COPY [[UV4]](s32)
+ ; GFX9-NEXT: $vgpr5 = COPY [[UV5]](s32)
+ ; GFX9-NEXT: $vgpr6 = COPY [[UV6]](s32)
+ ; GFX9-NEXT: $vgpr7 = COPY [[UV7]](s32)
+ ; GFX9-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7
+ ;
+ ; GFX9-DENORM-LABEL: name: test_4xdouble_add_mul_contract
+ ; GFX9-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23
+ ; GFX9-DENORM-NEXT: {{ $}}
+ ; GFX9-DENORM-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; GFX9-DENORM-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; GFX9-DENORM-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
+ ; GFX9-DENORM-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
+ ; GFX9-DENORM-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4
+ ; GFX9-DENORM-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5
+ ; GFX9-DENORM-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6
+ ; GFX9-DENORM-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7
+ ; GFX9-DENORM-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32)
+ ; GFX9-DENORM-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32)
+ ; GFX9-DENORM-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32)
+ ; GFX9-DENORM-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY6]](s32), [[COPY7]](s32)
+ ; GFX9-DENORM-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[MV3]](s64)
+ ; GFX9-DENORM-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8
+ ; GFX9-DENORM-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9
+ ; GFX9-DENORM-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10
+ ; GFX9-DENORM-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11
+ ; GFX9-DENORM-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12
+ ; GFX9-DENORM-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13
+ ; GFX9-DENORM-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14
+ ; GFX9-DENORM-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15
+ ; GFX9-DENORM-NEXT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY8]](s32), [[COPY9]](s32)
+ ; GFX9-DENORM-NEXT: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY10]](s32), [[COPY11]](s32)
+ ; GFX9-DENORM-NEXT: [[MV6:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY12]](s32), [[COPY13]](s32)
+ ; GFX9-DENORM-NEXT: [[MV7:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY14]](s32), [[COPY15]](s32)
+ ; GFX9-DENORM-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV4]](s64), [[MV5]](s64), [[MV6]](s64), [[MV7]](s64)
+ ; GFX9-DENORM-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16
+ ; GFX9-DENORM-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17
+ ; GFX9-DENORM-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr18
+ ; GFX9-DENORM-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr19
+ ; GFX9-DENORM-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr20
+ ; GFX9-DENORM-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr21
+ ; GFX9-DENORM-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr22
+ ; GFX9-DENORM-NEXT: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr23
+ ; GFX9-DENORM-NEXT: [[MV8:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY16]](s32), [[COPY17]](s32)
+ ; GFX9-DENORM-NEXT: [[MV9:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY18]](s32), [[COPY19]](s32)
+ ; GFX9-DENORM-NEXT: [[MV10:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY20]](s32), [[COPY21]](s32)
+ ; GFX9-DENORM-NEXT: [[MV11:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY22]](s32), [[COPY23]](s32)
+ ; GFX9-DENORM-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV8]](s64), [[MV9]](s64), [[MV10]](s64), [[MV11]](s64)
+ ; GFX9-DENORM-NEXT: [[FMUL:%[0-9]+]]:_(<4 x s64>) = contract G_FMUL [[BUILD_VECTOR]], [[BUILD_VECTOR1]]
+ ; GFX9-DENORM-NEXT: [[FADD:%[0-9]+]]:_(<4 x s64>) = contract G_FADD [[FMUL]], [[BUILD_VECTOR2]]
+ ; GFX9-DENORM-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](<4 x s64>)
+ ; GFX9-DENORM-NEXT: $vgpr0 = COPY [[UV]](s32)
+ ; GFX9-DENORM-NEXT: $vgpr1 = COPY [[UV1]](s32)
+ ; GFX9-DENORM-NEXT: $vgpr2 = COPY [[UV2]](s32)
+ ; GFX9-DENORM-NEXT: $vgpr3 = COPY [[UV3]](s32)
+ ; GFX9-DENORM-NEXT: $vgpr4 = COPY [[UV4]](s32)
+ ; GFX9-DENORM-NEXT: $vgpr5 = COPY [[UV5]](s32)
+ ; GFX9-DENORM-NEXT: $vgpr6 = COPY [[UV6]](s32)
+ ; GFX9-DENORM-NEXT: $vgpr7 = COPY [[UV7]](s32)
+ ; GFX9-DENORM-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7
+ ;
+ ; GFX10-LABEL: name: test_4xdouble_add_mul_contract
+ ; GFX10: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23
+ ; GFX10-NEXT: {{ $}}
+ ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
+ ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
+ ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4
+ ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5
+ ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6
+ ; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7
+ ; GFX10-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32)
+ ; GFX10-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32)
+ ; GFX10-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32)
+ ; GFX10-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY6]](s32), [[COPY7]](s32)
+ ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[MV3]](s64)
+ ; GFX10-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8
+ ; GFX10-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9
+ ; GFX10-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10
+ ; GFX10-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11
+ ; GFX10-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12
+ ; GFX10-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13
+ ; GFX10-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14
+ ; GFX10-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15
+ ; GFX10-NEXT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY8]](s32), [[COPY9]](s32)
+ ; GFX10-NEXT: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY10]](s32), [[COPY11]](s32)
+ ; GFX10-NEXT: [[MV6:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY12]](s32), [[COPY13]](s32)
+ ; GFX10-NEXT: [[MV7:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY14]](s32), [[COPY15]](s32)
+ ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV4]](s64), [[MV5]](s64), [[MV6]](s64), [[MV7]](s64)
+ ; GFX10-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16
+ ; GFX10-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17
+ ; GFX10-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr18
+ ; GFX10-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr19
+ ; GFX10-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr20
+ ; GFX10-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr21
+ ; GFX10-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr22
+ ; GFX10-NEXT: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr23
+ ; GFX10-NEXT: [[MV8:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY16]](s32), [[COPY17]](s32)
+ ; GFX10-NEXT: [[MV9:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY18]](s32), [[COPY19]](s32)
+ ; GFX10-NEXT: [[MV10:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY20]](s32), [[COPY21]](s32)
+ ; GFX10-NEXT: [[MV11:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY22]](s32), [[COPY23]](s32)
+ ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV8]](s64), [[MV9]](s64), [[MV10]](s64), [[MV11]](s64)
+ ; GFX10-NEXT: [[FMUL:%[0-9]+]]:_(<4 x s64>) = contract G_FMUL [[BUILD_VECTOR]], [[BUILD_VECTOR1]]
+ ; GFX10-NEXT: [[FADD:%[0-9]+]]:_(<4 x s64>) = contract G_FADD [[FMUL]], [[BUILD_VECTOR2]]
+ ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](<4 x s64>)
+ ; GFX10-NEXT: $vgpr0 = COPY [[UV]](s32)
+ ; GFX10-NEXT: $vgpr1 = COPY [[UV1]](s32)
+ ; GFX10-NEXT: $vgpr2 = COPY [[UV2]](s32)
+ ; GFX10-NEXT: $vgpr3 = COPY [[UV3]](s32)
+ ; GFX10-NEXT: $vgpr4 = COPY [[UV4]](s32)
+ ; GFX10-NEXT: $vgpr5 = COPY [[UV5]](s32)
+ ; GFX10-NEXT: $vgpr6 = COPY [[UV6]](s32)
+ ; GFX10-NEXT: $vgpr7 = COPY [[UV7]](s32)
+ ; GFX10-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7
+ ;
+ ; GFX10-DENORM-LABEL: name: test_4xdouble_add_mul_contract
+ ; GFX10-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23
+ ; GFX10-DENORM-NEXT: {{ $}}
+ ; GFX10-DENORM-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; GFX10-DENORM-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; GFX10-DENORM-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
+ ; GFX10-DENORM-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
+ ; GFX10-DENORM-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4
+ ; GFX10-DENORM-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5
+ ; GFX10-DENORM-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6
+ ; GFX10-DENORM-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7
+ ; GFX10-DENORM-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32)
+ ; GFX10-DENORM-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32)
+ ; GFX10-DENORM-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32)
+ ; GFX10-DENORM-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY6]](s32), [[COPY7]](s32)
+ ; GFX10-DENORM-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[MV3]](s64)
+ ; GFX10-DENORM-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8
+ ; GFX10-DENORM-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9
+ ; GFX10-DENORM-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10
+ ; GFX10-DENORM-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11
+ ; GFX10-DENORM-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12
+ ; GFX10-DENORM-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13
+ ; GFX10-DENORM-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14
+ ; GFX10-DENORM-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15
+ ; GFX10-DENORM-NEXT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY8]](s32), [[COPY9]](s32)
+ ; GFX10-DENORM-NEXT: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY10]](s32), [[COPY11]](s32)
+ ; GFX10-DENORM-NEXT: [[MV6:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY12]](s32), [[COPY13]](s32)
+ ; GFX10-DENORM-NEXT: [[MV7:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY14]](s32), [[COPY15]](s32)
+ ; GFX10-DENORM-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV4]](s64), [[MV5]](s64), [[MV6]](s64), [[MV7]](s64)
+ ; GFX10-DENORM-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16
+ ; GFX10-DENORM-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17
+ ; GFX10-DENORM-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr18
+ ; GFX10-DENORM-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr19
+ ; GFX10-DENORM-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr20
+ ; GFX10-DENORM-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr21
+ ; GFX10-DENORM-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr22
+ ; GFX10-DENORM-NEXT: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr23
+ ; GFX10-DENORM-NEXT: [[MV8:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY16]](s32), [[COPY17]](s32)
+ ; GFX10-DENORM-NEXT: [[MV9:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY18]](s32), [[COPY19]](s32)
+ ; GFX10-DENORM-NEXT: [[MV10:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY20]](s32), [[COPY21]](s32)
+ ; GFX10-DENORM-NEXT: [[MV11:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY22]](s32), [[COPY23]](s32)
+ ; GFX10-DENORM-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV8]](s64), [[MV9]](s64), [[MV10]](s64), [[MV11]](s64)
+ ; GFX10-DENORM-NEXT: [[FMUL:%[0-9]+]]:_(<4 x s64>) = contract G_FMUL [[BUILD_VECTOR]], [[BUILD_VECTOR1]]
+ ; GFX10-DENORM-NEXT: [[FADD:%[0-9]+]]:_(<4 x s64>) = contract G_FADD [[FMUL]], [[BUILD_VECTOR2]]
+ ; GFX10-DENORM-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](<4 x s64>)
+ ; GFX10-DENORM-NEXT: $vgpr0 = COPY [[UV]](s32)
+ ; GFX10-DENORM-NEXT: $vgpr1 = COPY [[UV1]](s32)
+ ; GFX10-DENORM-NEXT: $vgpr2 = COPY [[UV2]](s32)
+ ; GFX10-DENORM-NEXT: $vgpr3 = COPY [[UV3]](s32)
+ ; GFX10-DENORM-NEXT: $vgpr4 = COPY [[UV4]](s32)
+ ; GFX10-DENORM-NEXT: $vgpr5 = COPY [[UV5]](s32)
+ ; GFX10-DENORM-NEXT: $vgpr6 = COPY [[UV6]](s32)
+ ; GFX10-DENORM-NEXT: $vgpr7 = COPY [[UV7]](s32)
+ ; GFX10-DENORM-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7
+ %4:_(s32) = COPY $vgpr0
+ %5:_(s32) = COPY $vgpr1
+ %6:_(s32) = COPY $vgpr2
+ %7:_(s32) = COPY $vgpr3
+ %8:_(s32) = COPY $vgpr4
+ %9:_(s32) = COPY $vgpr5
+ %10:_(s32) = COPY $vgpr6
+ %11:_(s32) = COPY $vgpr7
+ %28:_(s64) = G_MERGE_VALUES %4(s32), %5(s32)
+ %29:_(s64) = G_MERGE_VALUES %6(s32), %7(s32)
+ %30:_(s64) = G_MERGE_VALUES %8(s32), %9(s32)
+ %31:_(s64) = G_MERGE_VALUES %10(s32), %11(s32)
+ %0:_(<4 x s64>) = G_BUILD_VECTOR %28(s64), %29(s64), %30(s64), %31(s64)
+ %12:_(s32) = COPY $vgpr8
+ %13:_(s32) = COPY $vgpr9
+ %14:_(s32) = COPY $vgpr10
+ %15:_(s32) = COPY $vgpr11
+ %16:_(s32) = COPY $vgpr12
+ %17:_(s32) = COPY $vgpr13
+ %18:_(s32) = COPY $vgpr14
+ %19:_(s32) = COPY $vgpr15
+ %32:_(s64) = G_MERGE_VALUES %12(s32), %13(s32)
+ %33:_(s64) = G_MERGE_VALUES %14(s32), %15(s32)
+ %34:_(s64) = G_MERGE_VALUES %16(s32), %17(s32)
+ %35:_(s64) = G_MERGE_VALUES %18(s32), %19(s32)
+ %1:_(<4 x s64>) = G_BUILD_VECTOR %32(s64), %33(s64), %34(s64), %35(s64)
+ %20:_(s32) = COPY $vgpr16
+ %21:_(s32) = COPY $vgpr17
+ %22:_(s32) = COPY $vgpr18
+ %23:_(s32) = COPY $vgpr19
+ %24:_(s32) = COPY $vgpr20
+ %25:_(s32) = COPY $vgpr21
+ %26:_(s32) = COPY $vgpr22
+ %27:_(s32) = COPY $vgpr23
+ %36:_(s64) = G_MERGE_VALUES %20(s32), %21(s32)
+ %37:_(s64) = G_MERGE_VALUES %22(s32), %23(s32)
+ %38:_(s64) = G_MERGE_VALUES %24(s32), %25(s32)
+ %39:_(s64) = G_MERGE_VALUES %26(s32), %27(s32)
+ %2:_(<4 x s64>) = G_BUILD_VECTOR %36(s64), %37(s64), %38(s64), %39(s64)
+ %40:_(<4 x s64>) = contract G_FMUL %0, %1
+ %41:_(<4 x s64>) = contract G_FADD %40, %2
+ %43:_(s32), %44:_(s32), %45:_(s32), %46:_(s32), %47:_(s32), %48:_(s32), %49:_(s32), %50:_(s32) = G_UNMERGE_VALUES %41(<4 x s64>)
+ $vgpr0 = COPY %43(s32)
+ $vgpr1 = COPY %44(s32)
+ $vgpr2 = COPY %45(s32)
+ $vgpr3 = COPY %46(s32)
+ $vgpr4 = COPY %47(s32)
+ $vgpr5 = COPY %48(s32)
+ $vgpr6 = COPY %49(s32)
+ $vgpr7 = COPY %50(s32)
+ S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7
+...
+
+---
name: test_3xdouble_add_mul_rhs
body: |
bb.1.entry:
@@ -2385,49 +2775,7 @@ body: |
; GFX9-NEXT: $vgpr4 = COPY [[UV4]](s32)
; GFX9-NEXT: $vgpr5 = COPY [[UV5]](s32)
; GFX9-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5
- ; GFX9-CONTRACT-LABEL: name: test_3xdouble_add_mul_rhs
- ; GFX9-CONTRACT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17
- ; GFX9-CONTRACT-NEXT: {{ $}}
- ; GFX9-CONTRACT-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
- ; GFX9-CONTRACT-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
- ; GFX9-CONTRACT-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
- ; GFX9-CONTRACT-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
- ; GFX9-CONTRACT-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4
- ; GFX9-CONTRACT-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5
- ; GFX9-CONTRACT-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32)
- ; GFX9-CONTRACT-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32)
- ; GFX9-CONTRACT-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32)
- ; GFX9-CONTRACT-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64)
- ; GFX9-CONTRACT-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6
- ; GFX9-CONTRACT-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7
- ; GFX9-CONTRACT-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8
- ; GFX9-CONTRACT-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9
- ; GFX9-CONTRACT-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10
- ; GFX9-CONTRACT-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11
- ; GFX9-CONTRACT-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY6]](s32), [[COPY7]](s32)
- ; GFX9-CONTRACT-NEXT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY8]](s32), [[COPY9]](s32)
- ; GFX9-CONTRACT-NEXT: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY10]](s32), [[COPY11]](s32)
- ; GFX9-CONTRACT-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV3]](s64), [[MV4]](s64), [[MV5]](s64)
- ; GFX9-CONTRACT-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12
- ; GFX9-CONTRACT-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13
- ; GFX9-CONTRACT-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14
- ; GFX9-CONTRACT-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15
- ; GFX9-CONTRACT-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16
- ; GFX9-CONTRACT-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17
- ; GFX9-CONTRACT-NEXT: [[MV6:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY12]](s32), [[COPY13]](s32)
- ; GFX9-CONTRACT-NEXT: [[MV7:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY14]](s32), [[COPY15]](s32)
- ; GFX9-CONTRACT-NEXT: [[MV8:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY16]](s32), [[COPY17]](s32)
- ; GFX9-CONTRACT-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV6]](s64), [[MV7]](s64), [[MV8]](s64)
- ; GFX9-CONTRACT-NEXT: [[FMUL:%[0-9]+]]:_(<3 x s64>) = G_FMUL [[BUILD_VECTOR]], [[BUILD_VECTOR1]]
- ; GFX9-CONTRACT-NEXT: [[FADD:%[0-9]+]]:_(<3 x s64>) = G_FADD [[BUILD_VECTOR2]], [[FMUL]]
- ; GFX9-CONTRACT-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](<3 x s64>)
- ; GFX9-CONTRACT-NEXT: $vgpr0 = COPY [[UV]](s32)
- ; GFX9-CONTRACT-NEXT: $vgpr1 = COPY [[UV1]](s32)
- ; GFX9-CONTRACT-NEXT: $vgpr2 = COPY [[UV2]](s32)
- ; GFX9-CONTRACT-NEXT: $vgpr3 = COPY [[UV3]](s32)
- ; GFX9-CONTRACT-NEXT: $vgpr4 = COPY [[UV4]](s32)
- ; GFX9-CONTRACT-NEXT: $vgpr5 = COPY [[UV5]](s32)
- ; GFX9-CONTRACT-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5
+ ;
; GFX9-DENORM-LABEL: name: test_3xdouble_add_mul_rhs
; GFX9-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17
; GFX9-DENORM-NEXT: {{ $}}
@@ -2471,49 +2819,7 @@ body: |
; GFX9-DENORM-NEXT: $vgpr4 = COPY [[UV4]](s32)
; GFX9-DENORM-NEXT: $vgpr5 = COPY [[UV5]](s32)
; GFX9-DENORM-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5
- ; GFX9-UNSAFE-LABEL: name: test_3xdouble_add_mul_rhs
- ; GFX9-UNSAFE: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17
- ; GFX9-UNSAFE-NEXT: {{ $}}
- ; GFX9-UNSAFE-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
- ; GFX9-UNSAFE-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
- ; GFX9-UNSAFE-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
- ; GFX9-UNSAFE-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
- ; GFX9-UNSAFE-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4
- ; GFX9-UNSAFE-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5
- ; GFX9-UNSAFE-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32)
- ; GFX9-UNSAFE-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32)
- ; GFX9-UNSAFE-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32)
- ; GFX9-UNSAFE-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64)
- ; GFX9-UNSAFE-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6
- ; GFX9-UNSAFE-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7
- ; GFX9-UNSAFE-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8
- ; GFX9-UNSAFE-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9
- ; GFX9-UNSAFE-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10
- ; GFX9-UNSAFE-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11
- ; GFX9-UNSAFE-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY6]](s32), [[COPY7]](s32)
- ; GFX9-UNSAFE-NEXT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY8]](s32), [[COPY9]](s32)
- ; GFX9-UNSAFE-NEXT: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY10]](s32), [[COPY11]](s32)
- ; GFX9-UNSAFE-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV3]](s64), [[MV4]](s64), [[MV5]](s64)
- ; GFX9-UNSAFE-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12
- ; GFX9-UNSAFE-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13
- ; GFX9-UNSAFE-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14
- ; GFX9-UNSAFE-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15
- ; GFX9-UNSAFE-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16
- ; GFX9-UNSAFE-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17
- ; GFX9-UNSAFE-NEXT: [[MV6:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY12]](s32), [[COPY13]](s32)
- ; GFX9-UNSAFE-NEXT: [[MV7:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY14]](s32), [[COPY15]](s32)
- ; GFX9-UNSAFE-NEXT: [[MV8:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY16]](s32), [[COPY17]](s32)
- ; GFX9-UNSAFE-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV6]](s64), [[MV7]](s64), [[MV8]](s64)
- ; GFX9-UNSAFE-NEXT: [[FMUL:%[0-9]+]]:_(<3 x s64>) = G_FMUL [[BUILD_VECTOR]], [[BUILD_VECTOR1]]
- ; GFX9-UNSAFE-NEXT: [[FADD:%[0-9]+]]:_(<3 x s64>) = G_FADD [[BUILD_VECTOR2]], [[FMUL]]
- ; GFX9-UNSAFE-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](<3 x s64>)
- ; GFX9-UNSAFE-NEXT: $vgpr0 = COPY [[UV]](s32)
- ; GFX9-UNSAFE-NEXT: $vgpr1 = COPY [[UV1]](s32)
- ; GFX9-UNSAFE-NEXT: $vgpr2 = COPY [[UV2]](s32)
- ; GFX9-UNSAFE-NEXT: $vgpr3 = COPY [[UV3]](s32)
- ; GFX9-UNSAFE-NEXT: $vgpr4 = COPY [[UV4]](s32)
- ; GFX9-UNSAFE-NEXT: $vgpr5 = COPY [[UV5]](s32)
- ; GFX9-UNSAFE-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5
+ ;
; GFX10-LABEL: name: test_3xdouble_add_mul_rhs
; GFX10: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17
; GFX10-NEXT: {{ $}}
@@ -2557,49 +2863,7 @@ body: |
; GFX10-NEXT: $vgpr4 = COPY [[UV4]](s32)
; GFX10-NEXT: $vgpr5 = COPY [[UV5]](s32)
; GFX10-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5
- ; GFX10-CONTRACT-LABEL: name: test_3xdouble_add_mul_rhs
- ; GFX10-CONTRACT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17
- ; GFX10-CONTRACT-NEXT: {{ $}}
- ; GFX10-CONTRACT-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
- ; GFX10-CONTRACT-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
- ; GFX10-CONTRACT-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
- ; GFX10-CONTRACT-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
- ; GFX10-CONTRACT-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4
- ; GFX10-CONTRACT-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5
- ; GFX10-CONTRACT-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32)
- ; GFX10-CONTRACT-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32)
- ; GFX10-CONTRACT-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32)
- ; GFX10-CONTRACT-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64)
- ; GFX10-CONTRACT-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6
- ; GFX10-CONTRACT-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7
- ; GFX10-CONTRACT-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8
- ; GFX10-CONTRACT-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9
- ; GFX10-CONTRACT-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10
- ; GFX10-CONTRACT-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11
- ; GFX10-CONTRACT-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY6]](s32), [[COPY7]](s32)
- ; GFX10-CONTRACT-NEXT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY8]](s32), [[COPY9]](s32)
- ; GFX10-CONTRACT-NEXT: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY10]](s32), [[COPY11]](s32)
- ; GFX10-CONTRACT-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV3]](s64), [[MV4]](s64), [[MV5]](s64)
- ; GFX10-CONTRACT-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12
- ; GFX10-CONTRACT-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13
- ; GFX10-CONTRACT-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14
- ; GFX10-CONTRACT-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15
- ; GFX10-CONTRACT-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16
- ; GFX10-CONTRACT-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17
- ; GFX10-CONTRACT-NEXT: [[MV6:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY12]](s32), [[COPY13]](s32)
- ; GFX10-CONTRACT-NEXT: [[MV7:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY14]](s32), [[COPY15]](s32)
- ; GFX10-CONTRACT-NEXT: [[MV8:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY16]](s32), [[COPY17]](s32)
- ; GFX10-CONTRACT-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV6]](s64), [[MV7]](s64), [[MV8]](s64)
- ; GFX10-CONTRACT-NEXT: [[FMUL:%[0-9]+]]:_(<3 x s64>) = G_FMUL [[BUILD_VECTOR]], [[BUILD_VECTOR1]]
- ; GFX10-CONTRACT-NEXT: [[FADD:%[0-9]+]]:_(<3 x s64>) = G_FADD [[BUILD_VECTOR2]], [[FMUL]]
- ; GFX10-CONTRACT-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](<3 x s64>)
- ; GFX10-CONTRACT-NEXT: $vgpr0 = COPY [[UV]](s32)
- ; GFX10-CONTRACT-NEXT: $vgpr1 = COPY [[UV1]](s32)
- ; GFX10-CONTRACT-NEXT: $vgpr2 = COPY [[UV2]](s32)
- ; GFX10-CONTRACT-NEXT: $vgpr3 = COPY [[UV3]](s32)
- ; GFX10-CONTRACT-NEXT: $vgpr4 = COPY [[UV4]](s32)
- ; GFX10-CONTRACT-NEXT: $vgpr5 = COPY [[UV5]](s32)
- ; GFX10-CONTRACT-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5
+ ;
; GFX10-DENORM-LABEL: name: test_3xdouble_add_mul_rhs
; GFX10-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17
; GFX10-DENORM-NEXT: {{ $}}
@@ -2643,49 +2907,6 @@ body: |
; GFX10-DENORM-NEXT: $vgpr4 = COPY [[UV4]](s32)
; GFX10-DENORM-NEXT: $vgpr5 = COPY [[UV5]](s32)
; GFX10-DENORM-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5
- ; GFX10-UNSAFE-LABEL: name: test_3xdouble_add_mul_rhs
- ; GFX10-UNSAFE: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17
- ; GFX10-UNSAFE-NEXT: {{ $}}
- ; GFX10-UNSAFE-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
- ; GFX10-UNSAFE-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
- ; GFX10-UNSAFE-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
- ; GFX10-UNSAFE-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
- ; GFX10-UNSAFE-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4
- ; GFX10-UNSAFE-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5
- ; GFX10-UNSAFE-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32)
- ; GFX10-UNSAFE-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32)
- ; GFX10-UNSAFE-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32)
- ; GFX10-UNSAFE-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64)
- ; GFX10-UNSAFE-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6
- ; GFX10-UNSAFE-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7
- ; GFX10-UNSAFE-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8
- ; GFX10-UNSAFE-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9
- ; GFX10-UNSAFE-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10
- ; GFX10-UNSAFE-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11
- ; GFX10-UNSAFE-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY6]](s32), [[COPY7]](s32)
- ; GFX10-UNSAFE-NEXT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY8]](s32), [[COPY9]](s32)
- ; GFX10-UNSAFE-NEXT: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY10]](s32), [[COPY11]](s32)
- ; GFX10-UNSAFE-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV3]](s64), [[MV4]](s64), [[MV5]](s64)
- ; GFX10-UNSAFE-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12
- ; GFX10-UNSAFE-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13
- ; GFX10-UNSAFE-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14
- ; GFX10-UNSAFE-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15
- ; GFX10-UNSAFE-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16
- ; GFX10-UNSAFE-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17
- ; GFX10-UNSAFE-NEXT: [[MV6:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY12]](s32), [[COPY13]](s32)
- ; GFX10-UNSAFE-NEXT: [[MV7:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY14]](s32), [[COPY15]](s32)
- ; GFX10-UNSAFE-NEXT: [[MV8:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY16]](s32), [[COPY17]](s32)
- ; GFX10-UNSAFE-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV6]](s64), [[MV7]](s64), [[MV8]](s64)
- ; GFX10-UNSAFE-NEXT: [[FMUL:%[0-9]+]]:_(<3 x s64>) = G_FMUL [[BUILD_VECTOR]], [[BUILD_VECTOR1]]
- ; GFX10-UNSAFE-NEXT: [[FADD:%[0-9]+]]:_(<3 x s64>) = G_FADD [[BUILD_VECTOR2]], [[FMUL]]
- ; GFX10-UNSAFE-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](<3 x s64>)
- ; GFX10-UNSAFE-NEXT: $vgpr0 = COPY [[UV]](s32)
- ; GFX10-UNSAFE-NEXT: $vgpr1 = COPY [[UV1]](s32)
- ; GFX10-UNSAFE-NEXT: $vgpr2 = COPY [[UV2]](s32)
- ; GFX10-UNSAFE-NEXT: $vgpr3 = COPY [[UV3]](s32)
- ; GFX10-UNSAFE-NEXT: $vgpr4 = COPY [[UV4]](s32)
- ; GFX10-UNSAFE-NEXT: $vgpr5 = COPY [[UV5]](s32)
- ; GFX10-UNSAFE-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5
%4:_(s32) = COPY $vgpr0
%5:_(s32) = COPY $vgpr1
%6:_(s32) = COPY $vgpr2
@@ -2727,3 +2948,226 @@ body: |
$vgpr5 = COPY %39(s32)
S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5
...
+
+---
+name: test_3xdouble_add_mul_rhs_contract
+body: |
+ bb.1.entry:
+ liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17
+
+ ; GFX9-LABEL: name: test_3xdouble_add_mul_rhs_contract
+ ; GFX9: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17
+ ; GFX9-NEXT: {{ $}}
+ ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
+ ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
+ ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4
+ ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5
+ ; GFX9-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32)
+ ; GFX9-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32)
+ ; GFX9-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32)
+ ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64)
+ ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6
+ ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7
+ ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8
+ ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9
+ ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10
+ ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11
+ ; GFX9-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY6]](s32), [[COPY7]](s32)
+ ; GFX9-NEXT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY8]](s32), [[COPY9]](s32)
+ ; GFX9-NEXT: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY10]](s32), [[COPY11]](s32)
+ ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV3]](s64), [[MV4]](s64), [[MV5]](s64)
+ ; GFX9-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12
+ ; GFX9-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13
+ ; GFX9-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14
+ ; GFX9-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15
+ ; GFX9-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16
+ ; GFX9-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17
+ ; GFX9-NEXT: [[MV6:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY12]](s32), [[COPY13]](s32)
+ ; GFX9-NEXT: [[MV7:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY14]](s32), [[COPY15]](s32)
+ ; GFX9-NEXT: [[MV8:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY16]](s32), [[COPY17]](s32)
+ ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV6]](s64), [[MV7]](s64), [[MV8]](s64)
+ ; GFX9-NEXT: [[FMUL:%[0-9]+]]:_(<3 x s64>) = contract G_FMUL [[BUILD_VECTOR]], [[BUILD_VECTOR1]]
+ ; GFX9-NEXT: [[FADD:%[0-9]+]]:_(<3 x s64>) = contract G_FADD [[BUILD_VECTOR2]], [[FMUL]]
+ ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](<3 x s64>)
+ ; GFX9-NEXT: $vgpr0 = COPY [[UV]](s32)
+ ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](s32)
+ ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](s32)
+ ; GFX9-NEXT: $vgpr3 = COPY [[UV3]](s32)
+ ; GFX9-NEXT: $vgpr4 = COPY [[UV4]](s32)
+ ; GFX9-NEXT: $vgpr5 = COPY [[UV5]](s32)
+ ; GFX9-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5
+ ;
+ ; GFX9-DENORM-LABEL: name: test_3xdouble_add_mul_rhs_contract
+ ; GFX9-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17
+ ; GFX9-DENORM-NEXT: {{ $}}
+ ; GFX9-DENORM-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; GFX9-DENORM-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; GFX9-DENORM-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
+ ; GFX9-DENORM-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
+ ; GFX9-DENORM-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4
+ ; GFX9-DENORM-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5
+ ; GFX9-DENORM-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32)
+ ; GFX9-DENORM-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32)
+ ; GFX9-DENORM-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32)
+ ; GFX9-DENORM-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64)
+ ; GFX9-DENORM-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6
+ ; GFX9-DENORM-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7
+ ; GFX9-DENORM-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8
+ ; GFX9-DENORM-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9
+ ; GFX9-DENORM-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10
+ ; GFX9-DENORM-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11
+ ; GFX9-DENORM-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY6]](s32), [[COPY7]](s32)
+ ; GFX9-DENORM-NEXT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY8]](s32), [[COPY9]](s32)
+ ; GFX9-DENORM-NEXT: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY10]](s32), [[COPY11]](s32)
+ ; GFX9-DENORM-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV3]](s64), [[MV4]](s64), [[MV5]](s64)
+ ; GFX9-DENORM-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12
+ ; GFX9-DENORM-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13
+ ; GFX9-DENORM-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14
+ ; GFX9-DENORM-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15
+ ; GFX9-DENORM-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16
+ ; GFX9-DENORM-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17
+ ; GFX9-DENORM-NEXT: [[MV6:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY12]](s32), [[COPY13]](s32)
+ ; GFX9-DENORM-NEXT: [[MV7:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY14]](s32), [[COPY15]](s32)
+ ; GFX9-DENORM-NEXT: [[MV8:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY16]](s32), [[COPY17]](s32)
+ ; GFX9-DENORM-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV6]](s64), [[MV7]](s64), [[MV8]](s64)
+ ; GFX9-DENORM-NEXT: [[FMUL:%[0-9]+]]:_(<3 x s64>) = contract G_FMUL [[BUILD_VECTOR]], [[BUILD_VECTOR1]]
+ ; GFX9-DENORM-NEXT: [[FADD:%[0-9]+]]:_(<3 x s64>) = contract G_FADD [[BUILD_VECTOR2]], [[FMUL]]
+ ; GFX9-DENORM-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](<3 x s64>)
+ ; GFX9-DENORM-NEXT: $vgpr0 = COPY [[UV]](s32)
+ ; GFX9-DENORM-NEXT: $vgpr1 = COPY [[UV1]](s32)
+ ; GFX9-DENORM-NEXT: $vgpr2 = COPY [[UV2]](s32)
+ ; GFX9-DENORM-NEXT: $vgpr3 = COPY [[UV3]](s32)
+ ; GFX9-DENORM-NEXT: $vgpr4 = COPY [[UV4]](s32)
+ ; GFX9-DENORM-NEXT: $vgpr5 = COPY [[UV5]](s32)
+ ; GFX9-DENORM-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5
+ ;
+ ; GFX10-LABEL: name: test_3xdouble_add_mul_rhs_contract
+ ; GFX10: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17
+ ; GFX10-NEXT: {{ $}}
+ ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
+ ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
+ ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4
+ ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5
+ ; GFX10-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32)
+ ; GFX10-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32)
+ ; GFX10-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32)
+ ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64)
+ ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6
+ ; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7
+ ; GFX10-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8
+ ; GFX10-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9
+ ; GFX10-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10
+ ; GFX10-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11
+ ; GFX10-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY6]](s32), [[COPY7]](s32)
+ ; GFX10-NEXT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY8]](s32), [[COPY9]](s32)
+ ; GFX10-NEXT: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY10]](s32), [[COPY11]](s32)
+ ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV3]](s64), [[MV4]](s64), [[MV5]](s64)
+ ; GFX10-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12
+ ; GFX10-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13
+ ; GFX10-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14
+ ; GFX10-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15
+ ; GFX10-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16
+ ; GFX10-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17
+ ; GFX10-NEXT: [[MV6:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY12]](s32), [[COPY13]](s32)
+ ; GFX10-NEXT: [[MV7:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY14]](s32), [[COPY15]](s32)
+ ; GFX10-NEXT: [[MV8:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY16]](s32), [[COPY17]](s32)
+ ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV6]](s64), [[MV7]](s64), [[MV8]](s64)
+ ; GFX10-NEXT: [[FMUL:%[0-9]+]]:_(<3 x s64>) = contract G_FMUL [[BUILD_VECTOR]], [[BUILD_VECTOR1]]
+ ; GFX10-NEXT: [[FADD:%[0-9]+]]:_(<3 x s64>) = contract G_FADD [[BUILD_VECTOR2]], [[FMUL]]
+ ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](<3 x s64>)
+ ; GFX10-NEXT: $vgpr0 = COPY [[UV]](s32)
+ ; GFX10-NEXT: $vgpr1 = COPY [[UV1]](s32)
+ ; GFX10-NEXT: $vgpr2 = COPY [[UV2]](s32)
+ ; GFX10-NEXT: $vgpr3 = COPY [[UV3]](s32)
+ ; GFX10-NEXT: $vgpr4 = COPY [[UV4]](s32)
+ ; GFX10-NEXT: $vgpr5 = COPY [[UV5]](s32)
+ ; GFX10-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5
+ ;
+ ; GFX10-DENORM-LABEL: name: test_3xdouble_add_mul_rhs_contract
+ ; GFX10-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17
+ ; GFX10-DENORM-NEXT: {{ $}}
+ ; GFX10-DENORM-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; GFX10-DENORM-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; GFX10-DENORM-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
+ ; GFX10-DENORM-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
+ ; GFX10-DENORM-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4
+ ; GFX10-DENORM-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5
+ ; GFX10-DENORM-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32)
+ ; GFX10-DENORM-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32)
+ ; GFX10-DENORM-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32)
+ ; GFX10-DENORM-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64)
+ ; GFX10-DENORM-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6
+ ; GFX10-DENORM-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7
+ ; GFX10-DENORM-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8
+ ; GFX10-DENORM-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9
+ ; GFX10-DENORM-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10
+ ; GFX10-DENORM-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11
+ ; GFX10-DENORM-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY6]](s32), [[COPY7]](s32)
+ ; GFX10-DENORM-NEXT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY8]](s32), [[COPY9]](s32)
+ ; GFX10-DENORM-NEXT: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY10]](s32), [[COPY11]](s32)
+ ; GFX10-DENORM-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV3]](s64), [[MV4]](s64), [[MV5]](s64)
+ ; GFX10-DENORM-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12
+ ; GFX10-DENORM-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13
+ ; GFX10-DENORM-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14
+ ; GFX10-DENORM-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15
+ ; GFX10-DENORM-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16
+ ; GFX10-DENORM-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17
+ ; GFX10-DENORM-NEXT: [[MV6:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY12]](s32), [[COPY13]](s32)
+ ; GFX10-DENORM-NEXT: [[MV7:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY14]](s32), [[COPY15]](s32)
+ ; GFX10-DENORM-NEXT: [[MV8:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY16]](s32), [[COPY17]](s32)
+ ; GFX10-DENORM-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV6]](s64), [[MV7]](s64), [[MV8]](s64)
+ ; GFX10-DENORM-NEXT: [[FMUL:%[0-9]+]]:_(<3 x s64>) = contract G_FMUL [[BUILD_VECTOR]], [[BUILD_VECTOR1]]
+ ; GFX10-DENORM-NEXT: [[FADD:%[0-9]+]]:_(<3 x s64>) = contract G_FADD [[BUILD_VECTOR2]], [[FMUL]]
+ ; GFX10-DENORM-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](<3 x s64>)
+ ; GFX10-DENORM-NEXT: $vgpr0 = COPY [[UV]](s32)
+ ; GFX10-DENORM-NEXT: $vgpr1 = COPY [[UV1]](s32)
+ ; GFX10-DENORM-NEXT: $vgpr2 = COPY [[UV2]](s32)
+ ; GFX10-DENORM-NEXT: $vgpr3 = COPY [[UV3]](s32)
+ ; GFX10-DENORM-NEXT: $vgpr4 = COPY [[UV4]](s32)
+ ; GFX10-DENORM-NEXT: $vgpr5 = COPY [[UV5]](s32)
+ ; GFX10-DENORM-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5
+ %4:_(s32) = COPY $vgpr0
+ %5:_(s32) = COPY $vgpr1
+ %6:_(s32) = COPY $vgpr2
+ %7:_(s32) = COPY $vgpr3
+ %8:_(s32) = COPY $vgpr4
+ %9:_(s32) = COPY $vgpr5
+ %22:_(s64) = G_MERGE_VALUES %4(s32), %5(s32)
+ %23:_(s64) = G_MERGE_VALUES %6(s32), %7(s32)
+ %24:_(s64) = G_MERGE_VALUES %8(s32), %9(s32)
+ %0:_(<3 x s64>) = G_BUILD_VECTOR %22(s64), %23(s64), %24(s64)
+ %10:_(s32) = COPY $vgpr6
+ %11:_(s32) = COPY $vgpr7
+ %12:_(s32) = COPY $vgpr8
+ %13:_(s32) = COPY $vgpr9
+ %14:_(s32) = COPY $vgpr10
+ %15:_(s32) = COPY $vgpr11
+ %25:_(s64) = G_MERGE_VALUES %10(s32), %11(s32)
+ %26:_(s64) = G_MERGE_VALUES %12(s32), %13(s32)
+ %27:_(s64) = G_MERGE_VALUES %14(s32), %15(s32)
+ %1:_(<3 x s64>) = G_BUILD_VECTOR %25(s64), %26(s64), %27(s64)
+ %16:_(s32) = COPY $vgpr12
+ %17:_(s32) = COPY $vgpr13
+ %18:_(s32) = COPY $vgpr14
+ %19:_(s32) = COPY $vgpr15
+ %20:_(s32) = COPY $vgpr16
+ %21:_(s32) = COPY $vgpr17
+ %28:_(s64) = G_MERGE_VALUES %16(s32), %17(s32)
+ %29:_(s64) = G_MERGE_VALUES %18(s32), %19(s32)
+ %30:_(s64) = G_MERGE_VALUES %20(s32), %21(s32)
+ %2:_(<3 x s64>) = G_BUILD_VECTOR %28(s64), %29(s64), %30(s64)
+ %31:_(<3 x s64>) = contract G_FMUL %0, %1
+ %32:_(<3 x s64>) = contract G_FADD %2, %31
+ %34:_(s32), %35:_(s32), %36:_(s32), %37:_(s32), %38:_(s32), %39:_(s32) = G_UNMERGE_VALUES %32(<3 x s64>)
+ $vgpr0 = COPY %34(s32)
+ $vgpr1 = COPY %35(s32)
+ $vgpr2 = COPY %36(s32)
+ $vgpr3 = COPY %37(s32)
+ $vgpr4 = COPY %38(s32)
+ $vgpr5 = COPY %39(s32)
+ S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5
+...
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fma-add-mul-pre-legalize.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fma-add-mul-pre-legalize.mir
index 42e53be..8f9fc67 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fma-add-mul-pre-legalize.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fma-add-mul-pre-legalize.mir
@@ -1,12 +1,8 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
# RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx900 -run-pass=amdgpu-prelegalizer-combiner %s -o - | FileCheck -check-prefix=GFX9 %s
-# RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx900 -run-pass=amdgpu-prelegalizer-combiner -fp-contract=fast %s -o - | FileCheck -check-prefix=GFX9-CONTRACT %s
# RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx900 -run-pass=amdgpu-prelegalizer-combiner --denormal-fp-math=preserve-sign %s -o - | FileCheck -check-prefix=GFX9-DENORM %s
-# RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx900 -run-pass=amdgpu-prelegalizer-combiner -enable-unsafe-fp-math %s -o - | FileCheck -check-prefix=GFX9-UNSAFE %s
# RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1010 -run-pass=amdgpu-prelegalizer-combiner %s -o - | FileCheck -check-prefix=GFX10 %s
-# RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1010 -run-pass=amdgpu-prelegalizer-combiner -fp-contract=fast %s -o - | FileCheck -check-prefix=GFX10-CONTRACT %s
# RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1010 -run-pass=amdgpu-prelegalizer-combiner --denormal-fp-math=preserve-sign %s -o - | FileCheck -check-prefix=GFX10-DENORM %s
-# RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1010 -run-pass=amdgpu-prelegalizer-combiner -enable-unsafe-fp-math %s -o - | FileCheck -check-prefix=GFX10-UNSAFE %s
---
name: test_f32_add_mul
@@ -25,16 +21,6 @@ body: |
; GFX9-NEXT: $vgpr0 = COPY [[FADD]](s32)
; GFX9-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0
;
- ; GFX9-CONTRACT-LABEL: name: test_f32_add_mul
- ; GFX9-CONTRACT: liveins: $vgpr0, $vgpr1, $vgpr2
- ; GFX9-CONTRACT-NEXT: {{ $}}
- ; GFX9-CONTRACT-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
- ; GFX9-CONTRACT-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
- ; GFX9-CONTRACT-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
- ; GFX9-CONTRACT-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[COPY]], [[COPY1]], [[COPY2]]
- ; GFX9-CONTRACT-NEXT: $vgpr0 = COPY [[FMA]](s32)
- ; GFX9-CONTRACT-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0
- ;
; GFX9-DENORM-LABEL: name: test_f32_add_mul
; GFX9-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2
; GFX9-DENORM-NEXT: {{ $}}
@@ -46,16 +32,6 @@ body: |
; GFX9-DENORM-NEXT: $vgpr0 = COPY [[FADD]](s32)
; GFX9-DENORM-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0
;
- ; GFX9-UNSAFE-LABEL: name: test_f32_add_mul
- ; GFX9-UNSAFE: liveins: $vgpr0, $vgpr1, $vgpr2
- ; GFX9-UNSAFE-NEXT: {{ $}}
- ; GFX9-UNSAFE-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
- ; GFX9-UNSAFE-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
- ; GFX9-UNSAFE-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
- ; GFX9-UNSAFE-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[COPY]], [[COPY1]], [[COPY2]]
- ; GFX9-UNSAFE-NEXT: $vgpr0 = COPY [[FMA]](s32)
- ; GFX9-UNSAFE-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0
- ;
; GFX10-LABEL: name: test_f32_add_mul
; GFX10: liveins: $vgpr0, $vgpr1, $vgpr2
; GFX10-NEXT: {{ $}}
@@ -67,16 +43,6 @@ body: |
; GFX10-NEXT: $vgpr0 = COPY [[FADD]](s32)
; GFX10-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0
;
- ; GFX10-CONTRACT-LABEL: name: test_f32_add_mul
- ; GFX10-CONTRACT: liveins: $vgpr0, $vgpr1, $vgpr2
- ; GFX10-CONTRACT-NEXT: {{ $}}
- ; GFX10-CONTRACT-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
- ; GFX10-CONTRACT-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
- ; GFX10-CONTRACT-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
- ; GFX10-CONTRACT-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[COPY]], [[COPY1]], [[COPY2]]
- ; GFX10-CONTRACT-NEXT: $vgpr0 = COPY [[FMA]](s32)
- ; GFX10-CONTRACT-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0
- ;
; GFX10-DENORM-LABEL: name: test_f32_add_mul
; GFX10-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2
; GFX10-DENORM-NEXT: {{ $}}
@@ -87,16 +53,6 @@ body: |
; GFX10-DENORM-NEXT: [[FADD:%[0-9]+]]:_(s32) = reassoc G_FADD [[FMUL]], [[COPY2]]
; GFX10-DENORM-NEXT: $vgpr0 = COPY [[FADD]](s32)
; GFX10-DENORM-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0
- ;
- ; GFX10-UNSAFE-LABEL: name: test_f32_add_mul
- ; GFX10-UNSAFE: liveins: $vgpr0, $vgpr1, $vgpr2
- ; GFX10-UNSAFE-NEXT: {{ $}}
- ; GFX10-UNSAFE-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
- ; GFX10-UNSAFE-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
- ; GFX10-UNSAFE-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
- ; GFX10-UNSAFE-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[COPY]], [[COPY1]], [[COPY2]]
- ; GFX10-UNSAFE-NEXT: $vgpr0 = COPY [[FMA]](s32)
- ; GFX10-UNSAFE-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0
%0:_(s32) = COPY $vgpr0
%1:_(s32) = COPY $vgpr1
%2:_(s32) = COPY $vgpr2
@@ -107,6 +63,60 @@ body: |
...
---
+name: test_f32_add_mul_contract
+body: |
+ bb.1.entry:
+ liveins: $vgpr0, $vgpr1, $vgpr2
+
+ ; GFX9-LABEL: name: test_f32_add_mul_contract
+ ; GFX9: liveins: $vgpr0, $vgpr1, $vgpr2
+ ; GFX9-NEXT: {{ $}}
+ ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
+ ; GFX9-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[COPY]], [[COPY1]], [[COPY2]]
+ ; GFX9-NEXT: $vgpr0 = COPY [[FMA]](s32)
+ ; GFX9-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0
+ ;
+ ; GFX9-DENORM-LABEL: name: test_f32_add_mul_contract
+ ; GFX9-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2
+ ; GFX9-DENORM-NEXT: {{ $}}
+ ; GFX9-DENORM-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; GFX9-DENORM-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; GFX9-DENORM-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
+ ; GFX9-DENORM-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[COPY]], [[COPY1]], [[COPY2]]
+ ; GFX9-DENORM-NEXT: $vgpr0 = COPY [[FMA]](s32)
+ ; GFX9-DENORM-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0
+ ;
+ ; GFX10-LABEL: name: test_f32_add_mul_contract
+ ; GFX10: liveins: $vgpr0, $vgpr1, $vgpr2
+ ; GFX10-NEXT: {{ $}}
+ ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
+ ; GFX10-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[COPY]], [[COPY1]], [[COPY2]]
+ ; GFX10-NEXT: $vgpr0 = COPY [[FMA]](s32)
+ ; GFX10-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0
+ ;
+ ; GFX10-DENORM-LABEL: name: test_f32_add_mul_contract
+ ; GFX10-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2
+ ; GFX10-DENORM-NEXT: {{ $}}
+ ; GFX10-DENORM-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; GFX10-DENORM-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; GFX10-DENORM-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
+ ; GFX10-DENORM-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[COPY]], [[COPY1]], [[COPY2]]
+ ; GFX10-DENORM-NEXT: $vgpr0 = COPY [[FMA]](s32)
+ ; GFX10-DENORM-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0
+ %0:_(s32) = COPY $vgpr0
+ %1:_(s32) = COPY $vgpr1
+ %2:_(s32) = COPY $vgpr2
+ %4:_(s32) = reassoc contract G_FMUL %0, %1
+ %5:_(s32) = reassoc contract G_FADD %4, %2
+ $vgpr0 = COPY %5(s32)
+ S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0
+...
+
+---
name: test_f32_add_mul_rhs
body: |
bb.1.entry:
@@ -123,16 +133,6 @@ body: |
; GFX9-NEXT: $vgpr0 = COPY [[FADD]](s32)
; GFX9-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0
;
- ; GFX9-CONTRACT-LABEL: name: test_f32_add_mul_rhs
- ; GFX9-CONTRACT: liveins: $vgpr0, $vgpr1, $vgpr2
- ; GFX9-CONTRACT-NEXT: {{ $}}
- ; GFX9-CONTRACT-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
- ; GFX9-CONTRACT-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
- ; GFX9-CONTRACT-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
- ; GFX9-CONTRACT-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[COPY]], [[COPY1]], [[COPY2]]
- ; GFX9-CONTRACT-NEXT: $vgpr0 = COPY [[FMA]](s32)
- ; GFX9-CONTRACT-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0
- ;
; GFX9-DENORM-LABEL: name: test_f32_add_mul_rhs
; GFX9-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2
; GFX9-DENORM-NEXT: {{ $}}
@@ -144,16 +144,6 @@ body: |
; GFX9-DENORM-NEXT: $vgpr0 = COPY [[FADD]](s32)
; GFX9-DENORM-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0
;
- ; GFX9-UNSAFE-LABEL: name: test_f32_add_mul_rhs
- ; GFX9-UNSAFE: liveins: $vgpr0, $vgpr1, $vgpr2
- ; GFX9-UNSAFE-NEXT: {{ $}}
- ; GFX9-UNSAFE-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
- ; GFX9-UNSAFE-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
- ; GFX9-UNSAFE-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
- ; GFX9-UNSAFE-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[COPY]], [[COPY1]], [[COPY2]]
- ; GFX9-UNSAFE-NEXT: $vgpr0 = COPY [[FMA]](s32)
- ; GFX9-UNSAFE-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0
- ;
; GFX10-LABEL: name: test_f32_add_mul_rhs
; GFX10: liveins: $vgpr0, $vgpr1, $vgpr2
; GFX10-NEXT: {{ $}}
@@ -165,16 +155,6 @@ body: |
; GFX10-NEXT: $vgpr0 = COPY [[FADD]](s32)
; GFX10-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0
;
- ; GFX10-CONTRACT-LABEL: name: test_f32_add_mul_rhs
- ; GFX10-CONTRACT: liveins: $vgpr0, $vgpr1, $vgpr2
- ; GFX10-CONTRACT-NEXT: {{ $}}
- ; GFX10-CONTRACT-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
- ; GFX10-CONTRACT-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
- ; GFX10-CONTRACT-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
- ; GFX10-CONTRACT-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[COPY]], [[COPY1]], [[COPY2]]
- ; GFX10-CONTRACT-NEXT: $vgpr0 = COPY [[FMA]](s32)
- ; GFX10-CONTRACT-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0
- ;
; GFX10-DENORM-LABEL: name: test_f32_add_mul_rhs
; GFX10-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2
; GFX10-DENORM-NEXT: {{ $}}
@@ -185,16 +165,6 @@ body: |
; GFX10-DENORM-NEXT: [[FADD:%[0-9]+]]:_(s32) = reassoc G_FADD [[COPY2]], [[FMUL]]
; GFX10-DENORM-NEXT: $vgpr0 = COPY [[FADD]](s32)
; GFX10-DENORM-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0
- ;
- ; GFX10-UNSAFE-LABEL: name: test_f32_add_mul_rhs
- ; GFX10-UNSAFE: liveins: $vgpr0, $vgpr1, $vgpr2
- ; GFX10-UNSAFE-NEXT: {{ $}}
- ; GFX10-UNSAFE-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
- ; GFX10-UNSAFE-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
- ; GFX10-UNSAFE-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
- ; GFX10-UNSAFE-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[COPY]], [[COPY1]], [[COPY2]]
- ; GFX10-UNSAFE-NEXT: $vgpr0 = COPY [[FMA]](s32)
- ; GFX10-UNSAFE-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0
%0:_(s32) = COPY $vgpr0
%1:_(s32) = COPY $vgpr1
%2:_(s32) = COPY $vgpr2
@@ -205,6 +175,60 @@ body: |
...
---
+name: test_f32_add_mul_rhs_contract
+body: |
+ bb.1.entry:
+ liveins: $vgpr0, $vgpr1, $vgpr2
+
+ ; GFX9-LABEL: name: test_f32_add_mul_rhs_contract
+ ; GFX9: liveins: $vgpr0, $vgpr1, $vgpr2
+ ; GFX9-NEXT: {{ $}}
+ ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
+ ; GFX9-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[COPY]], [[COPY1]], [[COPY2]]
+ ; GFX9-NEXT: $vgpr0 = COPY [[FMA]](s32)
+ ; GFX9-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0
+ ;
+ ; GFX9-DENORM-LABEL: name: test_f32_add_mul_rhs_contract
+ ; GFX9-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2
+ ; GFX9-DENORM-NEXT: {{ $}}
+ ; GFX9-DENORM-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; GFX9-DENORM-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; GFX9-DENORM-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
+ ; GFX9-DENORM-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[COPY]], [[COPY1]], [[COPY2]]
+ ; GFX9-DENORM-NEXT: $vgpr0 = COPY [[FMA]](s32)
+ ; GFX9-DENORM-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0
+ ;
+ ; GFX10-LABEL: name: test_f32_add_mul_rhs_contract
+ ; GFX10: liveins: $vgpr0, $vgpr1, $vgpr2
+ ; GFX10-NEXT: {{ $}}
+ ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
+ ; GFX10-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[COPY]], [[COPY1]], [[COPY2]]
+ ; GFX10-NEXT: $vgpr0 = COPY [[FMA]](s32)
+ ; GFX10-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0
+ ;
+ ; GFX10-DENORM-LABEL: name: test_f32_add_mul_rhs_contract
+ ; GFX10-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2
+ ; GFX10-DENORM-NEXT: {{ $}}
+ ; GFX10-DENORM-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; GFX10-DENORM-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; GFX10-DENORM-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
+ ; GFX10-DENORM-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[COPY]], [[COPY1]], [[COPY2]]
+ ; GFX10-DENORM-NEXT: $vgpr0 = COPY [[FMA]](s32)
+ ; GFX10-DENORM-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0
+ %0:_(s32) = COPY $vgpr0
+ %1:_(s32) = COPY $vgpr1
+ %2:_(s32) = COPY $vgpr2
+ %4:_(s32) = reassoc contract G_FMUL %0, %1
+ %5:_(s32) = reassoc contract G_FADD %2, %4
+ $vgpr0 = COPY %5(s32)
+ S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0
+...
+
+---
name: test_half_add_mul
body: |
bb.1.entry:
@@ -225,20 +249,6 @@ body: |
; GFX9-NEXT: $vgpr0 = COPY [[ANYEXT]](s32)
; GFX9-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0
;
- ; GFX9-CONTRACT-LABEL: name: test_half_add_mul
- ; GFX9-CONTRACT: liveins: $vgpr0, $vgpr1, $vgpr2
- ; GFX9-CONTRACT-NEXT: {{ $}}
- ; GFX9-CONTRACT-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
- ; GFX9-CONTRACT-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
- ; GFX9-CONTRACT-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
- ; GFX9-CONTRACT-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32)
- ; GFX9-CONTRACT-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
- ; GFX9-CONTRACT-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32)
- ; GFX9-CONTRACT-NEXT: [[FMA:%[0-9]+]]:_(s16) = G_FMA [[TRUNC]], [[TRUNC1]], [[TRUNC2]]
- ; GFX9-CONTRACT-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FMA]](s16)
- ; GFX9-CONTRACT-NEXT: $vgpr0 = COPY [[ANYEXT]](s32)
- ; GFX9-CONTRACT-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0
- ;
; GFX9-DENORM-LABEL: name: test_half_add_mul
; GFX9-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2
; GFX9-DENORM-NEXT: {{ $}}
@@ -254,20 +264,6 @@ body: |
; GFX9-DENORM-NEXT: $vgpr0 = COPY [[ANYEXT]](s32)
; GFX9-DENORM-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0
;
- ; GFX9-UNSAFE-LABEL: name: test_half_add_mul
- ; GFX9-UNSAFE: liveins: $vgpr0, $vgpr1, $vgpr2
- ; GFX9-UNSAFE-NEXT: {{ $}}
- ; GFX9-UNSAFE-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
- ; GFX9-UNSAFE-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
- ; GFX9-UNSAFE-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
- ; GFX9-UNSAFE-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32)
- ; GFX9-UNSAFE-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
- ; GFX9-UNSAFE-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32)
- ; GFX9-UNSAFE-NEXT: [[FMA:%[0-9]+]]:_(s16) = G_FMA [[TRUNC]], [[TRUNC1]], [[TRUNC2]]
- ; GFX9-UNSAFE-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FMA]](s16)
- ; GFX9-UNSAFE-NEXT: $vgpr0 = COPY [[ANYEXT]](s32)
- ; GFX9-UNSAFE-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0
- ;
; GFX10-LABEL: name: test_half_add_mul
; GFX10: liveins: $vgpr0, $vgpr1, $vgpr2
; GFX10-NEXT: {{ $}}
@@ -283,20 +279,6 @@ body: |
; GFX10-NEXT: $vgpr0 = COPY [[ANYEXT]](s32)
; GFX10-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0
;
- ; GFX10-CONTRACT-LABEL: name: test_half_add_mul
- ; GFX10-CONTRACT: liveins: $vgpr0, $vgpr1, $vgpr2
- ; GFX10-CONTRACT-NEXT: {{ $}}
- ; GFX10-CONTRACT-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
- ; GFX10-CONTRACT-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
- ; GFX10-CONTRACT-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
- ; GFX10-CONTRACT-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32)
- ; GFX10-CONTRACT-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
- ; GFX10-CONTRACT-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32)
- ; GFX10-CONTRACT-NEXT: [[FMA:%[0-9]+]]:_(s16) = G_FMA [[TRUNC]], [[TRUNC1]], [[TRUNC2]]
- ; GFX10-CONTRACT-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FMA]](s16)
- ; GFX10-CONTRACT-NEXT: $vgpr0 = COPY [[ANYEXT]](s32)
- ; GFX10-CONTRACT-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0
- ;
; GFX10-DENORM-LABEL: name: test_half_add_mul
; GFX10-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2
; GFX10-DENORM-NEXT: {{ $}}
@@ -311,20 +293,6 @@ body: |
; GFX10-DENORM-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FADD]](s16)
; GFX10-DENORM-NEXT: $vgpr0 = COPY [[ANYEXT]](s32)
; GFX10-DENORM-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0
- ;
- ; GFX10-UNSAFE-LABEL: name: test_half_add_mul
- ; GFX10-UNSAFE: liveins: $vgpr0, $vgpr1, $vgpr2
- ; GFX10-UNSAFE-NEXT: {{ $}}
- ; GFX10-UNSAFE-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
- ; GFX10-UNSAFE-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
- ; GFX10-UNSAFE-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
- ; GFX10-UNSAFE-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32)
- ; GFX10-UNSAFE-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
- ; GFX10-UNSAFE-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32)
- ; GFX10-UNSAFE-NEXT: [[FMA:%[0-9]+]]:_(s16) = G_FMA [[TRUNC]], [[TRUNC1]], [[TRUNC2]]
- ; GFX10-UNSAFE-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FMA]](s16)
- ; GFX10-UNSAFE-NEXT: $vgpr0 = COPY [[ANYEXT]](s32)
- ; GFX10-UNSAFE-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0
%4:_(s32) = COPY $vgpr0
%0:_(s16) = G_TRUNC %4(s32)
%5:_(s32) = COPY $vgpr1
@@ -339,6 +307,81 @@ body: |
...
---
+name: test_half_add_mul_contract
+body: |
+ bb.1.entry:
+ liveins: $vgpr0, $vgpr1, $vgpr2
+
+ ; GFX9-LABEL: name: test_half_add_mul_contract
+ ; GFX9: liveins: $vgpr0, $vgpr1, $vgpr2
+ ; GFX9-NEXT: {{ $}}
+ ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
+ ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32)
+ ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
+ ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32)
+ ; GFX9-NEXT: [[FMA:%[0-9]+]]:_(s16) = G_FMA [[TRUNC]], [[TRUNC1]], [[TRUNC2]]
+ ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FMA]](s16)
+ ; GFX9-NEXT: $vgpr0 = COPY [[ANYEXT]](s32)
+ ; GFX9-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0
+ ;
+ ; GFX9-DENORM-LABEL: name: test_half_add_mul_contract
+ ; GFX9-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2
+ ; GFX9-DENORM-NEXT: {{ $}}
+ ; GFX9-DENORM-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; GFX9-DENORM-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
+ ; GFX9-DENORM-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; GFX9-DENORM-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32)
+ ; GFX9-DENORM-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
+ ; GFX9-DENORM-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32)
+ ; GFX9-DENORM-NEXT: [[FMA:%[0-9]+]]:_(s16) = G_FMA [[TRUNC]], [[TRUNC1]], [[TRUNC2]]
+ ; GFX9-DENORM-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FMA]](s16)
+ ; GFX9-DENORM-NEXT: $vgpr0 = COPY [[ANYEXT]](s32)
+ ; GFX9-DENORM-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0
+ ;
+ ; GFX10-LABEL: name: test_half_add_mul_contract
+ ; GFX10: liveins: $vgpr0, $vgpr1, $vgpr2
+ ; GFX10-NEXT: {{ $}}
+ ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
+ ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32)
+ ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
+ ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32)
+ ; GFX10-NEXT: [[FMA:%[0-9]+]]:_(s16) = G_FMA [[TRUNC]], [[TRUNC1]], [[TRUNC2]]
+ ; GFX10-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FMA]](s16)
+ ; GFX10-NEXT: $vgpr0 = COPY [[ANYEXT]](s32)
+ ; GFX10-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0
+ ;
+ ; GFX10-DENORM-LABEL: name: test_half_add_mul_contract
+ ; GFX10-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2
+ ; GFX10-DENORM-NEXT: {{ $}}
+ ; GFX10-DENORM-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; GFX10-DENORM-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
+ ; GFX10-DENORM-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; GFX10-DENORM-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32)
+ ; GFX10-DENORM-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
+ ; GFX10-DENORM-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32)
+ ; GFX10-DENORM-NEXT: [[FMA:%[0-9]+]]:_(s16) = G_FMA [[TRUNC]], [[TRUNC1]], [[TRUNC2]]
+ ; GFX10-DENORM-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FMA]](s16)
+ ; GFX10-DENORM-NEXT: $vgpr0 = COPY [[ANYEXT]](s32)
+ ; GFX10-DENORM-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0
+ %4:_(s32) = COPY $vgpr0
+ %0:_(s16) = G_TRUNC %4(s32)
+ %5:_(s32) = COPY $vgpr1
+ %1:_(s16) = G_TRUNC %5(s32)
+ %6:_(s32) = COPY $vgpr2
+ %2:_(s16) = G_TRUNC %6(s32)
+ %7:_(s16) = reassoc contract G_FMUL %0, %1
+ %8:_(s16) = reassoc contract G_FADD %7, %2
+ %10:_(s32) = G_ANYEXT %8(s16)
+ $vgpr0 = COPY %10(s32)
+ S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0
+...
+
+
+---
name: test_half_add_mul_rhs
body: |
bb.1.entry:
@@ -359,20 +402,6 @@ body: |
; GFX9-NEXT: $vgpr0 = COPY [[ANYEXT]](s32)
; GFX9-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0
;
- ; GFX9-CONTRACT-LABEL: name: test_half_add_mul_rhs
- ; GFX9-CONTRACT: liveins: $vgpr0, $vgpr1, $vgpr2
- ; GFX9-CONTRACT-NEXT: {{ $}}
- ; GFX9-CONTRACT-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
- ; GFX9-CONTRACT-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
- ; GFX9-CONTRACT-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
- ; GFX9-CONTRACT-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32)
- ; GFX9-CONTRACT-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
- ; GFX9-CONTRACT-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32)
- ; GFX9-CONTRACT-NEXT: [[FMA:%[0-9]+]]:_(s16) = G_FMA [[TRUNC]], [[TRUNC1]], [[TRUNC2]]
- ; GFX9-CONTRACT-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FMA]](s16)
- ; GFX9-CONTRACT-NEXT: $vgpr0 = COPY [[ANYEXT]](s32)
- ; GFX9-CONTRACT-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0
- ;
; GFX9-DENORM-LABEL: name: test_half_add_mul_rhs
; GFX9-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2
; GFX9-DENORM-NEXT: {{ $}}
@@ -388,20 +417,6 @@ body: |
; GFX9-DENORM-NEXT: $vgpr0 = COPY [[ANYEXT]](s32)
; GFX9-DENORM-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0
;
- ; GFX9-UNSAFE-LABEL: name: test_half_add_mul_rhs
- ; GFX9-UNSAFE: liveins: $vgpr0, $vgpr1, $vgpr2
- ; GFX9-UNSAFE-NEXT: {{ $}}
- ; GFX9-UNSAFE-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
- ; GFX9-UNSAFE-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
- ; GFX9-UNSAFE-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
- ; GFX9-UNSAFE-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32)
- ; GFX9-UNSAFE-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
- ; GFX9-UNSAFE-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32)
- ; GFX9-UNSAFE-NEXT: [[FMA:%[0-9]+]]:_(s16) = G_FMA [[TRUNC]], [[TRUNC1]], [[TRUNC2]]
- ; GFX9-UNSAFE-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FMA]](s16)
- ; GFX9-UNSAFE-NEXT: $vgpr0 = COPY [[ANYEXT]](s32)
- ; GFX9-UNSAFE-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0
- ;
; GFX10-LABEL: name: test_half_add_mul_rhs
; GFX10: liveins: $vgpr0, $vgpr1, $vgpr2
; GFX10-NEXT: {{ $}}
@@ -417,20 +432,6 @@ body: |
; GFX10-NEXT: $vgpr0 = COPY [[ANYEXT]](s32)
; GFX10-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0
;
- ; GFX10-CONTRACT-LABEL: name: test_half_add_mul_rhs
- ; GFX10-CONTRACT: liveins: $vgpr0, $vgpr1, $vgpr2
- ; GFX10-CONTRACT-NEXT: {{ $}}
- ; GFX10-CONTRACT-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
- ; GFX10-CONTRACT-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
- ; GFX10-CONTRACT-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
- ; GFX10-CONTRACT-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32)
- ; GFX10-CONTRACT-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
- ; GFX10-CONTRACT-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32)
- ; GFX10-CONTRACT-NEXT: [[FMA:%[0-9]+]]:_(s16) = G_FMA [[TRUNC]], [[TRUNC1]], [[TRUNC2]]
- ; GFX10-CONTRACT-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FMA]](s16)
- ; GFX10-CONTRACT-NEXT: $vgpr0 = COPY [[ANYEXT]](s32)
- ; GFX10-CONTRACT-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0
- ;
; GFX10-DENORM-LABEL: name: test_half_add_mul_rhs
; GFX10-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2
; GFX10-DENORM-NEXT: {{ $}}
@@ -445,20 +446,84 @@ body: |
; GFX10-DENORM-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FADD]](s16)
; GFX10-DENORM-NEXT: $vgpr0 = COPY [[ANYEXT]](s32)
; GFX10-DENORM-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0
+ %4:_(s32) = COPY $vgpr0
+ %0:_(s16) = G_TRUNC %4(s32)
+ %5:_(s32) = COPY $vgpr1
+ %1:_(s16) = G_TRUNC %5(s32)
+ %6:_(s32) = COPY $vgpr2
+ %2:_(s16) = G_TRUNC %6(s32)
+ %7:_(s16) = reassoc G_FMUL %0, %1
+ %8:_(s16) = reassoc G_FADD %2, %7
+ %10:_(s32) = G_ANYEXT %8(s16)
+ $vgpr0 = COPY %10(s32)
+ S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0
+...
+
+---
+name: test_half_add_mul_rhs_contract
+body: |
+ bb.1.entry:
+ liveins: $vgpr0, $vgpr1, $vgpr2
+
+ ; GFX9-LABEL: name: test_half_add_mul_rhs_contract
+ ; GFX9: liveins: $vgpr0, $vgpr1, $vgpr2
+ ; GFX9-NEXT: {{ $}}
+ ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
+ ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32)
+ ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
+ ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32)
+ ; GFX9-NEXT: [[FMUL:%[0-9]+]]:_(s16) = reassoc G_FMUL [[TRUNC]], [[TRUNC1]]
+ ; GFX9-NEXT: [[FADD:%[0-9]+]]:_(s16) = reassoc G_FADD [[TRUNC2]], [[FMUL]]
+ ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FADD]](s16)
+ ; GFX9-NEXT: $vgpr0 = COPY [[ANYEXT]](s32)
+ ; GFX9-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0
+ ;
+ ; GFX9-DENORM-LABEL: name: test_half_add_mul_rhs_contract
+ ; GFX9-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2
+ ; GFX9-DENORM-NEXT: {{ $}}
+ ; GFX9-DENORM-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; GFX9-DENORM-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
+ ; GFX9-DENORM-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; GFX9-DENORM-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32)
+ ; GFX9-DENORM-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
+ ; GFX9-DENORM-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32)
+ ; GFX9-DENORM-NEXT: [[FMUL:%[0-9]+]]:_(s16) = reassoc G_FMUL [[TRUNC]], [[TRUNC1]]
+ ; GFX9-DENORM-NEXT: [[FADD:%[0-9]+]]:_(s16) = reassoc G_FADD [[TRUNC2]], [[FMUL]]
+ ; GFX9-DENORM-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FADD]](s16)
+ ; GFX9-DENORM-NEXT: $vgpr0 = COPY [[ANYEXT]](s32)
+ ; GFX9-DENORM-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0
;
- ; GFX10-UNSAFE-LABEL: name: test_half_add_mul_rhs
- ; GFX10-UNSAFE: liveins: $vgpr0, $vgpr1, $vgpr2
- ; GFX10-UNSAFE-NEXT: {{ $}}
- ; GFX10-UNSAFE-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
- ; GFX10-UNSAFE-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
- ; GFX10-UNSAFE-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
- ; GFX10-UNSAFE-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32)
- ; GFX10-UNSAFE-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
- ; GFX10-UNSAFE-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32)
- ; GFX10-UNSAFE-NEXT: [[FMA:%[0-9]+]]:_(s16) = G_FMA [[TRUNC]], [[TRUNC1]], [[TRUNC2]]
- ; GFX10-UNSAFE-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FMA]](s16)
- ; GFX10-UNSAFE-NEXT: $vgpr0 = COPY [[ANYEXT]](s32)
- ; GFX10-UNSAFE-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0
+ ; GFX10-LABEL: name: test_half_add_mul_rhs_contract
+ ; GFX10: liveins: $vgpr0, $vgpr1, $vgpr2
+ ; GFX10-NEXT: {{ $}}
+ ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
+ ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32)
+ ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
+ ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32)
+ ; GFX10-NEXT: [[FMUL:%[0-9]+]]:_(s16) = reassoc G_FMUL [[TRUNC]], [[TRUNC1]]
+ ; GFX10-NEXT: [[FADD:%[0-9]+]]:_(s16) = reassoc G_FADD [[TRUNC2]], [[FMUL]]
+ ; GFX10-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FADD]](s16)
+ ; GFX10-NEXT: $vgpr0 = COPY [[ANYEXT]](s32)
+ ; GFX10-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0
+ ;
+ ; GFX10-DENORM-LABEL: name: test_half_add_mul_rhs_contract
+ ; GFX10-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2
+ ; GFX10-DENORM-NEXT: {{ $}}
+ ; GFX10-DENORM-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; GFX10-DENORM-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
+ ; GFX10-DENORM-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; GFX10-DENORM-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32)
+ ; GFX10-DENORM-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
+ ; GFX10-DENORM-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32)
+ ; GFX10-DENORM-NEXT: [[FMUL:%[0-9]+]]:_(s16) = reassoc G_FMUL [[TRUNC]], [[TRUNC1]]
+ ; GFX10-DENORM-NEXT: [[FADD:%[0-9]+]]:_(s16) = reassoc G_FADD [[TRUNC2]], [[FMUL]]
+ ; GFX10-DENORM-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FADD]](s16)
+ ; GFX10-DENORM-NEXT: $vgpr0 = COPY [[ANYEXT]](s32)
+ ; GFX10-DENORM-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0
%4:_(s32) = COPY $vgpr0
%0:_(s16) = G_TRUNC %4(s32)
%5:_(s32) = COPY $vgpr1
@@ -497,24 +562,6 @@ body: |
; GFX9-NEXT: $vgpr1 = COPY [[UV1]](s32)
; GFX9-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1
;
- ; GFX9-CONTRACT-LABEL: name: test_double_add_mul
- ; GFX9-CONTRACT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5
- ; GFX9-CONTRACT-NEXT: {{ $}}
- ; GFX9-CONTRACT-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
- ; GFX9-CONTRACT-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
- ; GFX9-CONTRACT-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32)
- ; GFX9-CONTRACT-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
- ; GFX9-CONTRACT-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
- ; GFX9-CONTRACT-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32)
- ; GFX9-CONTRACT-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4
- ; GFX9-CONTRACT-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5
- ; GFX9-CONTRACT-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32)
- ; GFX9-CONTRACT-NEXT: [[FMA:%[0-9]+]]:_(s64) = G_FMA [[MV]], [[MV1]], [[MV2]]
- ; GFX9-CONTRACT-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMA]](s64)
- ; GFX9-CONTRACT-NEXT: $vgpr0 = COPY [[UV]](s32)
- ; GFX9-CONTRACT-NEXT: $vgpr1 = COPY [[UV1]](s32)
- ; GFX9-CONTRACT-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1
- ;
; GFX9-DENORM-LABEL: name: test_double_add_mul
; GFX9-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5
; GFX9-DENORM-NEXT: {{ $}}
@@ -534,24 +581,6 @@ body: |
; GFX9-DENORM-NEXT: $vgpr1 = COPY [[UV1]](s32)
; GFX9-DENORM-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1
;
- ; GFX9-UNSAFE-LABEL: name: test_double_add_mul
- ; GFX9-UNSAFE: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5
- ; GFX9-UNSAFE-NEXT: {{ $}}
- ; GFX9-UNSAFE-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
- ; GFX9-UNSAFE-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
- ; GFX9-UNSAFE-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32)
- ; GFX9-UNSAFE-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
- ; GFX9-UNSAFE-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
- ; GFX9-UNSAFE-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32)
- ; GFX9-UNSAFE-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4
- ; GFX9-UNSAFE-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5
- ; GFX9-UNSAFE-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32)
- ; GFX9-UNSAFE-NEXT: [[FMA:%[0-9]+]]:_(s64) = G_FMA [[MV]], [[MV1]], [[MV2]]
- ; GFX9-UNSAFE-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMA]](s64)
- ; GFX9-UNSAFE-NEXT: $vgpr0 = COPY [[UV]](s32)
- ; GFX9-UNSAFE-NEXT: $vgpr1 = COPY [[UV1]](s32)
- ; GFX9-UNSAFE-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1
- ;
; GFX10-LABEL: name: test_double_add_mul
; GFX10: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5
; GFX10-NEXT: {{ $}}
@@ -571,24 +600,6 @@ body: |
; GFX10-NEXT: $vgpr1 = COPY [[UV1]](s32)
; GFX10-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1
;
- ; GFX10-CONTRACT-LABEL: name: test_double_add_mul
- ; GFX10-CONTRACT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5
- ; GFX10-CONTRACT-NEXT: {{ $}}
- ; GFX10-CONTRACT-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
- ; GFX10-CONTRACT-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
- ; GFX10-CONTRACT-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32)
- ; GFX10-CONTRACT-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
- ; GFX10-CONTRACT-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
- ; GFX10-CONTRACT-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32)
- ; GFX10-CONTRACT-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4
- ; GFX10-CONTRACT-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5
- ; GFX10-CONTRACT-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32)
- ; GFX10-CONTRACT-NEXT: [[FMA:%[0-9]+]]:_(s64) = G_FMA [[MV]], [[MV1]], [[MV2]]
- ; GFX10-CONTRACT-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMA]](s64)
- ; GFX10-CONTRACT-NEXT: $vgpr0 = COPY [[UV]](s32)
- ; GFX10-CONTRACT-NEXT: $vgpr1 = COPY [[UV1]](s32)
- ; GFX10-CONTRACT-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1
- ;
; GFX10-DENORM-LABEL: name: test_double_add_mul
; GFX10-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5
; GFX10-DENORM-NEXT: {{ $}}
@@ -607,24 +618,6 @@ body: |
; GFX10-DENORM-NEXT: $vgpr0 = COPY [[UV]](s32)
; GFX10-DENORM-NEXT: $vgpr1 = COPY [[UV1]](s32)
; GFX10-DENORM-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1
- ;
- ; GFX10-UNSAFE-LABEL: name: test_double_add_mul
- ; GFX10-UNSAFE: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5
- ; GFX10-UNSAFE-NEXT: {{ $}}
- ; GFX10-UNSAFE-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
- ; GFX10-UNSAFE-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
- ; GFX10-UNSAFE-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32)
- ; GFX10-UNSAFE-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
- ; GFX10-UNSAFE-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
- ; GFX10-UNSAFE-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32)
- ; GFX10-UNSAFE-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4
- ; GFX10-UNSAFE-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5
- ; GFX10-UNSAFE-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32)
- ; GFX10-UNSAFE-NEXT: [[FMA:%[0-9]+]]:_(s64) = G_FMA [[MV]], [[MV1]], [[MV2]]
- ; GFX10-UNSAFE-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMA]](s64)
- ; GFX10-UNSAFE-NEXT: $vgpr0 = COPY [[UV]](s32)
- ; GFX10-UNSAFE-NEXT: $vgpr1 = COPY [[UV1]](s32)
- ; GFX10-UNSAFE-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1
%4:_(s32) = COPY $vgpr0
%5:_(s32) = COPY $vgpr1
%0:_(s64) = G_MERGE_VALUES %4(s32), %5(s32)
@@ -643,6 +636,100 @@ body: |
...
---
+name: test_double_add_mul_contract
+body: |
+ bb.1.entry:
+ liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5
+
+ ; GFX9-LABEL: name: test_double_add_mul_contract
+ ; GFX9: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5
+ ; GFX9-NEXT: {{ $}}
+ ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; GFX9-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32)
+ ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
+ ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
+ ; GFX9-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32)
+ ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4
+ ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5
+ ; GFX9-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32)
+ ; GFX9-NEXT: [[FMA:%[0-9]+]]:_(s64) = G_FMA [[MV]], [[MV1]], [[MV2]]
+ ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMA]](s64)
+ ; GFX9-NEXT: $vgpr0 = COPY [[UV]](s32)
+ ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](s32)
+ ; GFX9-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1
+ ;
+ ; GFX9-DENORM-LABEL: name: test_double_add_mul_contract
+ ; GFX9-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5
+ ; GFX9-DENORM-NEXT: {{ $}}
+ ; GFX9-DENORM-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; GFX9-DENORM-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; GFX9-DENORM-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32)
+ ; GFX9-DENORM-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
+ ; GFX9-DENORM-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
+ ; GFX9-DENORM-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32)
+ ; GFX9-DENORM-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4
+ ; GFX9-DENORM-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5
+ ; GFX9-DENORM-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32)
+ ; GFX9-DENORM-NEXT: [[FMA:%[0-9]+]]:_(s64) = G_FMA [[MV]], [[MV1]], [[MV2]]
+ ; GFX9-DENORM-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMA]](s64)
+ ; GFX9-DENORM-NEXT: $vgpr0 = COPY [[UV]](s32)
+ ; GFX9-DENORM-NEXT: $vgpr1 = COPY [[UV1]](s32)
+ ; GFX9-DENORM-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1
+ ;
+ ; GFX10-LABEL: name: test_double_add_mul_contract
+ ; GFX10: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5
+ ; GFX10-NEXT: {{ $}}
+ ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; GFX10-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32)
+ ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
+ ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
+ ; GFX10-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32)
+ ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4
+ ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5
+ ; GFX10-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32)
+ ; GFX10-NEXT: [[FMA:%[0-9]+]]:_(s64) = G_FMA [[MV]], [[MV1]], [[MV2]]
+ ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMA]](s64)
+ ; GFX10-NEXT: $vgpr0 = COPY [[UV]](s32)
+ ; GFX10-NEXT: $vgpr1 = COPY [[UV1]](s32)
+ ; GFX10-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1
+ ;
+ ; GFX10-DENORM-LABEL: name: test_double_add_mul_contract
+ ; GFX10-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5
+ ; GFX10-DENORM-NEXT: {{ $}}
+ ; GFX10-DENORM-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; GFX10-DENORM-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; GFX10-DENORM-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32)
+ ; GFX10-DENORM-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
+ ; GFX10-DENORM-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
+ ; GFX10-DENORM-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32)
+ ; GFX10-DENORM-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4
+ ; GFX10-DENORM-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5
+ ; GFX10-DENORM-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32)
+ ; GFX10-DENORM-NEXT: [[FMA:%[0-9]+]]:_(s64) = G_FMA [[MV]], [[MV1]], [[MV2]]
+ ; GFX10-DENORM-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMA]](s64)
+ ; GFX10-DENORM-NEXT: $vgpr0 = COPY [[UV]](s32)
+ ; GFX10-DENORM-NEXT: $vgpr1 = COPY [[UV1]](s32)
+ ; GFX10-DENORM-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1
+ %4:_(s32) = COPY $vgpr0
+ %5:_(s32) = COPY $vgpr1
+ %0:_(s64) = G_MERGE_VALUES %4(s32), %5(s32)
+ %6:_(s32) = COPY $vgpr2
+ %7:_(s32) = COPY $vgpr3
+ %1:_(s64) = G_MERGE_VALUES %6(s32), %7(s32)
+ %8:_(s32) = COPY $vgpr4
+ %9:_(s32) = COPY $vgpr5
+ %2:_(s64) = G_MERGE_VALUES %8(s32), %9(s32)
+ %10:_(s64) = reassoc contract G_FMUL %0, %1
+ %11:_(s64) = reassoc contract G_FADD %10, %2
+ %13:_(s32), %14:_(s32) = G_UNMERGE_VALUES %11(s64)
+ $vgpr0 = COPY %13(s32)
+ $vgpr1 = COPY %14(s32)
+ S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1
+...
+
+---
name: test_double_add_mul_rhs
body: |
bb.1.entry:
@@ -667,24 +754,6 @@ body: |
; GFX9-NEXT: $vgpr1 = COPY [[UV1]](s32)
; GFX9-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1
;
- ; GFX9-CONTRACT-LABEL: name: test_double_add_mul_rhs
- ; GFX9-CONTRACT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5
- ; GFX9-CONTRACT-NEXT: {{ $}}
- ; GFX9-CONTRACT-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
- ; GFX9-CONTRACT-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
- ; GFX9-CONTRACT-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32)
- ; GFX9-CONTRACT-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
- ; GFX9-CONTRACT-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
- ; GFX9-CONTRACT-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32)
- ; GFX9-CONTRACT-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4
- ; GFX9-CONTRACT-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5
- ; GFX9-CONTRACT-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32)
- ; GFX9-CONTRACT-NEXT: [[FMA:%[0-9]+]]:_(s64) = G_FMA [[MV]], [[MV1]], [[MV2]]
- ; GFX9-CONTRACT-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMA]](s64)
- ; GFX9-CONTRACT-NEXT: $vgpr0 = COPY [[UV]](s32)
- ; GFX9-CONTRACT-NEXT: $vgpr1 = COPY [[UV1]](s32)
- ; GFX9-CONTRACT-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1
- ;
; GFX9-DENORM-LABEL: name: test_double_add_mul_rhs
; GFX9-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5
; GFX9-DENORM-NEXT: {{ $}}
@@ -704,24 +773,6 @@ body: |
; GFX9-DENORM-NEXT: $vgpr1 = COPY [[UV1]](s32)
; GFX9-DENORM-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1
;
- ; GFX9-UNSAFE-LABEL: name: test_double_add_mul_rhs
- ; GFX9-UNSAFE: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5
- ; GFX9-UNSAFE-NEXT: {{ $}}
- ; GFX9-UNSAFE-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
- ; GFX9-UNSAFE-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
- ; GFX9-UNSAFE-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32)
- ; GFX9-UNSAFE-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
- ; GFX9-UNSAFE-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
- ; GFX9-UNSAFE-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32)
- ; GFX9-UNSAFE-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4
- ; GFX9-UNSAFE-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5
- ; GFX9-UNSAFE-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32)
- ; GFX9-UNSAFE-NEXT: [[FMA:%[0-9]+]]:_(s64) = G_FMA [[MV]], [[MV1]], [[MV2]]
- ; GFX9-UNSAFE-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMA]](s64)
- ; GFX9-UNSAFE-NEXT: $vgpr0 = COPY [[UV]](s32)
- ; GFX9-UNSAFE-NEXT: $vgpr1 = COPY [[UV1]](s32)
- ; GFX9-UNSAFE-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1
- ;
; GFX10-LABEL: name: test_double_add_mul_rhs
; GFX10: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5
; GFX10-NEXT: {{ $}}
@@ -741,24 +792,6 @@ body: |
; GFX10-NEXT: $vgpr1 = COPY [[UV1]](s32)
; GFX10-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1
;
- ; GFX10-CONTRACT-LABEL: name: test_double_add_mul_rhs
- ; GFX10-CONTRACT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5
- ; GFX10-CONTRACT-NEXT: {{ $}}
- ; GFX10-CONTRACT-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
- ; GFX10-CONTRACT-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
- ; GFX10-CONTRACT-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32)
- ; GFX10-CONTRACT-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
- ; GFX10-CONTRACT-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
- ; GFX10-CONTRACT-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32)
- ; GFX10-CONTRACT-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4
- ; GFX10-CONTRACT-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5
- ; GFX10-CONTRACT-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32)
- ; GFX10-CONTRACT-NEXT: [[FMA:%[0-9]+]]:_(s64) = G_FMA [[MV]], [[MV1]], [[MV2]]
- ; GFX10-CONTRACT-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMA]](s64)
- ; GFX10-CONTRACT-NEXT: $vgpr0 = COPY [[UV]](s32)
- ; GFX10-CONTRACT-NEXT: $vgpr1 = COPY [[UV1]](s32)
- ; GFX10-CONTRACT-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1
- ;
; GFX10-DENORM-LABEL: name: test_double_add_mul_rhs
; GFX10-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5
; GFX10-DENORM-NEXT: {{ $}}
@@ -777,24 +810,6 @@ body: |
; GFX10-DENORM-NEXT: $vgpr0 = COPY [[UV]](s32)
; GFX10-DENORM-NEXT: $vgpr1 = COPY [[UV1]](s32)
; GFX10-DENORM-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1
- ;
- ; GFX10-UNSAFE-LABEL: name: test_double_add_mul_rhs
- ; GFX10-UNSAFE: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5
- ; GFX10-UNSAFE-NEXT: {{ $}}
- ; GFX10-UNSAFE-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
- ; GFX10-UNSAFE-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
- ; GFX10-UNSAFE-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32)
- ; GFX10-UNSAFE-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
- ; GFX10-UNSAFE-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
- ; GFX10-UNSAFE-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32)
- ; GFX10-UNSAFE-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4
- ; GFX10-UNSAFE-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5
- ; GFX10-UNSAFE-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32)
- ; GFX10-UNSAFE-NEXT: [[FMA:%[0-9]+]]:_(s64) = G_FMA [[MV]], [[MV1]], [[MV2]]
- ; GFX10-UNSAFE-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMA]](s64)
- ; GFX10-UNSAFE-NEXT: $vgpr0 = COPY [[UV]](s32)
- ; GFX10-UNSAFE-NEXT: $vgpr1 = COPY [[UV1]](s32)
- ; GFX10-UNSAFE-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1
%4:_(s32) = COPY $vgpr0
%5:_(s32) = COPY $vgpr1
%0:_(s64) = G_MERGE_VALUES %4(s32), %5(s32)
@@ -813,6 +828,100 @@ body: |
...
---
+name: test_double_add_mul_rhs_contract
+body: |
+ bb.1.entry:
+ liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5
+
+ ; GFX9-LABEL: name: test_double_add_mul_rhs_contract
+ ; GFX9: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5
+ ; GFX9-NEXT: {{ $}}
+ ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; GFX9-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32)
+ ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
+ ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
+ ; GFX9-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32)
+ ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4
+ ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5
+ ; GFX9-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32)
+ ; GFX9-NEXT: [[FMA:%[0-9]+]]:_(s64) = G_FMA [[MV]], [[MV1]], [[MV2]]
+ ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMA]](s64)
+ ; GFX9-NEXT: $vgpr0 = COPY [[UV]](s32)
+ ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](s32)
+ ; GFX9-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1
+ ;
+ ; GFX9-DENORM-LABEL: name: test_double_add_mul_rhs_contract
+ ; GFX9-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5
+ ; GFX9-DENORM-NEXT: {{ $}}
+ ; GFX9-DENORM-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; GFX9-DENORM-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; GFX9-DENORM-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32)
+ ; GFX9-DENORM-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
+ ; GFX9-DENORM-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
+ ; GFX9-DENORM-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32)
+ ; GFX9-DENORM-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4
+ ; GFX9-DENORM-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5
+ ; GFX9-DENORM-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32)
+ ; GFX9-DENORM-NEXT: [[FMA:%[0-9]+]]:_(s64) = G_FMA [[MV]], [[MV1]], [[MV2]]
+ ; GFX9-DENORM-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMA]](s64)
+ ; GFX9-DENORM-NEXT: $vgpr0 = COPY [[UV]](s32)
+ ; GFX9-DENORM-NEXT: $vgpr1 = COPY [[UV1]](s32)
+ ; GFX9-DENORM-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1
+ ;
+ ; GFX10-LABEL: name: test_double_add_mul_rhs_contract
+ ; GFX10: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5
+ ; GFX10-NEXT: {{ $}}
+ ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; GFX10-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32)
+ ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
+ ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
+ ; GFX10-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32)
+ ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4
+ ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5
+ ; GFX10-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32)
+ ; GFX10-NEXT: [[FMA:%[0-9]+]]:_(s64) = G_FMA [[MV]], [[MV1]], [[MV2]]
+ ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMA]](s64)
+ ; GFX10-NEXT: $vgpr0 = COPY [[UV]](s32)
+ ; GFX10-NEXT: $vgpr1 = COPY [[UV1]](s32)
+ ; GFX10-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1
+ ;
+ ; GFX10-DENORM-LABEL: name: test_double_add_mul_rhs_contract
+ ; GFX10-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5
+ ; GFX10-DENORM-NEXT: {{ $}}
+ ; GFX10-DENORM-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; GFX10-DENORM-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; GFX10-DENORM-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32)
+ ; GFX10-DENORM-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
+ ; GFX10-DENORM-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
+ ; GFX10-DENORM-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32)
+ ; GFX10-DENORM-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4
+ ; GFX10-DENORM-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5
+ ; GFX10-DENORM-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32)
+ ; GFX10-DENORM-NEXT: [[FMA:%[0-9]+]]:_(s64) = G_FMA [[MV]], [[MV1]], [[MV2]]
+ ; GFX10-DENORM-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMA]](s64)
+ ; GFX10-DENORM-NEXT: $vgpr0 = COPY [[UV]](s32)
+ ; GFX10-DENORM-NEXT: $vgpr1 = COPY [[UV1]](s32)
+ ; GFX10-DENORM-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1
+ %4:_(s32) = COPY $vgpr0
+ %5:_(s32) = COPY $vgpr1
+ %0:_(s64) = G_MERGE_VALUES %4(s32), %5(s32)
+ %6:_(s32) = COPY $vgpr2
+ %7:_(s32) = COPY $vgpr3
+ %1:_(s64) = G_MERGE_VALUES %6(s32), %7(s32)
+ %8:_(s32) = COPY $vgpr4
+ %9:_(s32) = COPY $vgpr5
+ %2:_(s64) = G_MERGE_VALUES %8(s32), %9(s32)
+ %10:_(s64) = reassoc contract G_FMUL %0, %1
+ %11:_(s64) = reassoc contract G_FADD %2, %10
+ %13:_(s32), %14:_(s32) = G_UNMERGE_VALUES %11(s64)
+ $vgpr0 = COPY %13(s32)
+ $vgpr1 = COPY %14(s32)
+ S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1
+...
+
+---
name: test_4xfloat_add_mul
body: |
bb.1.entry:
@@ -845,32 +954,6 @@ body: |
; GFX9-NEXT: $vgpr3 = COPY [[UV3]](s32)
; GFX9-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
;
- ; GFX9-CONTRACT-LABEL: name: test_4xfloat_add_mul
- ; GFX9-CONTRACT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11
- ; GFX9-CONTRACT-NEXT: {{ $}}
- ; GFX9-CONTRACT-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
- ; GFX9-CONTRACT-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
- ; GFX9-CONTRACT-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
- ; GFX9-CONTRACT-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
- ; GFX9-CONTRACT-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32)
- ; GFX9-CONTRACT-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4
- ; GFX9-CONTRACT-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5
- ; GFX9-CONTRACT-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6
- ; GFX9-CONTRACT-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7
- ; GFX9-CONTRACT-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
- ; GFX9-CONTRACT-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8
- ; GFX9-CONTRACT-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9
- ; GFX9-CONTRACT-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10
- ; GFX9-CONTRACT-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11
- ; GFX9-CONTRACT-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32)
- ; GFX9-CONTRACT-NEXT: [[FMA:%[0-9]+]]:_(<4 x s32>) = G_FMA [[BUILD_VECTOR]], [[BUILD_VECTOR1]], [[BUILD_VECTOR2]]
- ; GFX9-CONTRACT-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMA]](<4 x s32>)
- ; GFX9-CONTRACT-NEXT: $vgpr0 = COPY [[UV]](s32)
- ; GFX9-CONTRACT-NEXT: $vgpr1 = COPY [[UV1]](s32)
- ; GFX9-CONTRACT-NEXT: $vgpr2 = COPY [[UV2]](s32)
- ; GFX9-CONTRACT-NEXT: $vgpr3 = COPY [[UV3]](s32)
- ; GFX9-CONTRACT-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
- ;
; GFX9-DENORM-LABEL: name: test_4xfloat_add_mul
; GFX9-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11
; GFX9-DENORM-NEXT: {{ $}}
@@ -898,32 +981,6 @@ body: |
; GFX9-DENORM-NEXT: $vgpr3 = COPY [[UV3]](s32)
; GFX9-DENORM-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
;
- ; GFX9-UNSAFE-LABEL: name: test_4xfloat_add_mul
- ; GFX9-UNSAFE: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11
- ; GFX9-UNSAFE-NEXT: {{ $}}
- ; GFX9-UNSAFE-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
- ; GFX9-UNSAFE-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
- ; GFX9-UNSAFE-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
- ; GFX9-UNSAFE-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
- ; GFX9-UNSAFE-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32)
- ; GFX9-UNSAFE-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4
- ; GFX9-UNSAFE-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5
- ; GFX9-UNSAFE-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6
- ; GFX9-UNSAFE-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7
- ; GFX9-UNSAFE-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
- ; GFX9-UNSAFE-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8
- ; GFX9-UNSAFE-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9
- ; GFX9-UNSAFE-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10
- ; GFX9-UNSAFE-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11
- ; GFX9-UNSAFE-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32)
- ; GFX9-UNSAFE-NEXT: [[FMA:%[0-9]+]]:_(<4 x s32>) = G_FMA [[BUILD_VECTOR]], [[BUILD_VECTOR1]], [[BUILD_VECTOR2]]
- ; GFX9-UNSAFE-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMA]](<4 x s32>)
- ; GFX9-UNSAFE-NEXT: $vgpr0 = COPY [[UV]](s32)
- ; GFX9-UNSAFE-NEXT: $vgpr1 = COPY [[UV1]](s32)
- ; GFX9-UNSAFE-NEXT: $vgpr2 = COPY [[UV2]](s32)
- ; GFX9-UNSAFE-NEXT: $vgpr3 = COPY [[UV3]](s32)
- ; GFX9-UNSAFE-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
- ;
; GFX10-LABEL: name: test_4xfloat_add_mul
; GFX10: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11
; GFX10-NEXT: {{ $}}
@@ -951,32 +1008,6 @@ body: |
; GFX10-NEXT: $vgpr3 = COPY [[UV3]](s32)
; GFX10-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
;
- ; GFX10-CONTRACT-LABEL: name: test_4xfloat_add_mul
- ; GFX10-CONTRACT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11
- ; GFX10-CONTRACT-NEXT: {{ $}}
- ; GFX10-CONTRACT-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
- ; GFX10-CONTRACT-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
- ; GFX10-CONTRACT-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
- ; GFX10-CONTRACT-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
- ; GFX10-CONTRACT-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32)
- ; GFX10-CONTRACT-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4
- ; GFX10-CONTRACT-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5
- ; GFX10-CONTRACT-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6
- ; GFX10-CONTRACT-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7
- ; GFX10-CONTRACT-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
- ; GFX10-CONTRACT-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8
- ; GFX10-CONTRACT-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9
- ; GFX10-CONTRACT-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10
- ; GFX10-CONTRACT-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11
- ; GFX10-CONTRACT-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32)
- ; GFX10-CONTRACT-NEXT: [[FMA:%[0-9]+]]:_(<4 x s32>) = G_FMA [[BUILD_VECTOR]], [[BUILD_VECTOR1]], [[BUILD_VECTOR2]]
- ; GFX10-CONTRACT-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMA]](<4 x s32>)
- ; GFX10-CONTRACT-NEXT: $vgpr0 = COPY [[UV]](s32)
- ; GFX10-CONTRACT-NEXT: $vgpr1 = COPY [[UV1]](s32)
- ; GFX10-CONTRACT-NEXT: $vgpr2 = COPY [[UV2]](s32)
- ; GFX10-CONTRACT-NEXT: $vgpr3 = COPY [[UV3]](s32)
- ; GFX10-CONTRACT-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
- ;
; GFX10-DENORM-LABEL: name: test_4xfloat_add_mul
; GFX10-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11
; GFX10-DENORM-NEXT: {{ $}}
@@ -1003,32 +1034,6 @@ body: |
; GFX10-DENORM-NEXT: $vgpr2 = COPY [[UV2]](s32)
; GFX10-DENORM-NEXT: $vgpr3 = COPY [[UV3]](s32)
; GFX10-DENORM-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
- ;
- ; GFX10-UNSAFE-LABEL: name: test_4xfloat_add_mul
- ; GFX10-UNSAFE: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11
- ; GFX10-UNSAFE-NEXT: {{ $}}
- ; GFX10-UNSAFE-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
- ; GFX10-UNSAFE-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
- ; GFX10-UNSAFE-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
- ; GFX10-UNSAFE-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
- ; GFX10-UNSAFE-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32)
- ; GFX10-UNSAFE-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4
- ; GFX10-UNSAFE-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5
- ; GFX10-UNSAFE-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6
- ; GFX10-UNSAFE-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7
- ; GFX10-UNSAFE-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
- ; GFX10-UNSAFE-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8
- ; GFX10-UNSAFE-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9
- ; GFX10-UNSAFE-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10
- ; GFX10-UNSAFE-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11
- ; GFX10-UNSAFE-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32)
- ; GFX10-UNSAFE-NEXT: [[FMA:%[0-9]+]]:_(<4 x s32>) = G_FMA [[BUILD_VECTOR]], [[BUILD_VECTOR1]], [[BUILD_VECTOR2]]
- ; GFX10-UNSAFE-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMA]](<4 x s32>)
- ; GFX10-UNSAFE-NEXT: $vgpr0 = COPY [[UV]](s32)
- ; GFX10-UNSAFE-NEXT: $vgpr1 = COPY [[UV1]](s32)
- ; GFX10-UNSAFE-NEXT: $vgpr2 = COPY [[UV2]](s32)
- ; GFX10-UNSAFE-NEXT: $vgpr3 = COPY [[UV3]](s32)
- ; GFX10-UNSAFE-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
%4:_(s32) = COPY $vgpr0
%5:_(s32) = COPY $vgpr1
%6:_(s32) = COPY $vgpr2
@@ -1055,6 +1060,140 @@ body: |
...
---
+name: test_4xfloat_add_mul_contract
+body: |
+ bb.1.entry:
+ liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11
+
+ ; GFX9-LABEL: name: test_4xfloat_add_mul_contract
+ ; GFX9: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11
+ ; GFX9-NEXT: {{ $}}
+ ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
+ ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
+ ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32)
+ ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4
+ ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5
+ ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6
+ ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7
+ ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
+ ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8
+ ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9
+ ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10
+ ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11
+ ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32)
+ ; GFX9-NEXT: [[FMA:%[0-9]+]]:_(<4 x s32>) = G_FMA [[BUILD_VECTOR]], [[BUILD_VECTOR1]], [[BUILD_VECTOR2]]
+ ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMA]](<4 x s32>)
+ ; GFX9-NEXT: $vgpr0 = COPY [[UV]](s32)
+ ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](s32)
+ ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](s32)
+ ; GFX9-NEXT: $vgpr3 = COPY [[UV3]](s32)
+ ; GFX9-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
+ ;
+ ; GFX9-DENORM-LABEL: name: test_4xfloat_add_mul_contract
+ ; GFX9-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11
+ ; GFX9-DENORM-NEXT: {{ $}}
+ ; GFX9-DENORM-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; GFX9-DENORM-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; GFX9-DENORM-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
+ ; GFX9-DENORM-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
+ ; GFX9-DENORM-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32)
+ ; GFX9-DENORM-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4
+ ; GFX9-DENORM-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5
+ ; GFX9-DENORM-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6
+ ; GFX9-DENORM-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7
+ ; GFX9-DENORM-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
+ ; GFX9-DENORM-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8
+ ; GFX9-DENORM-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9
+ ; GFX9-DENORM-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10
+ ; GFX9-DENORM-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11
+ ; GFX9-DENORM-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32)
+ ; GFX9-DENORM-NEXT: [[FMA:%[0-9]+]]:_(<4 x s32>) = G_FMA [[BUILD_VECTOR]], [[BUILD_VECTOR1]], [[BUILD_VECTOR2]]
+ ; GFX9-DENORM-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMA]](<4 x s32>)
+ ; GFX9-DENORM-NEXT: $vgpr0 = COPY [[UV]](s32)
+ ; GFX9-DENORM-NEXT: $vgpr1 = COPY [[UV1]](s32)
+ ; GFX9-DENORM-NEXT: $vgpr2 = COPY [[UV2]](s32)
+ ; GFX9-DENORM-NEXT: $vgpr3 = COPY [[UV3]](s32)
+ ; GFX9-DENORM-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
+ ;
+ ; GFX10-LABEL: name: test_4xfloat_add_mul_contract
+ ; GFX10: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11
+ ; GFX10-NEXT: {{ $}}
+ ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
+ ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
+ ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32)
+ ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4
+ ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5
+ ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6
+ ; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7
+ ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
+ ; GFX10-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8
+ ; GFX10-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9
+ ; GFX10-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10
+ ; GFX10-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11
+ ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32)
+ ; GFX10-NEXT: [[FMA:%[0-9]+]]:_(<4 x s32>) = G_FMA [[BUILD_VECTOR]], [[BUILD_VECTOR1]], [[BUILD_VECTOR2]]
+ ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMA]](<4 x s32>)
+ ; GFX10-NEXT: $vgpr0 = COPY [[UV]](s32)
+ ; GFX10-NEXT: $vgpr1 = COPY [[UV1]](s32)
+ ; GFX10-NEXT: $vgpr2 = COPY [[UV2]](s32)
+ ; GFX10-NEXT: $vgpr3 = COPY [[UV3]](s32)
+ ; GFX10-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
+ ;
+ ; GFX10-DENORM-LABEL: name: test_4xfloat_add_mul_contract
+ ; GFX10-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11
+ ; GFX10-DENORM-NEXT: {{ $}}
+ ; GFX10-DENORM-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; GFX10-DENORM-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; GFX10-DENORM-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
+ ; GFX10-DENORM-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
+ ; GFX10-DENORM-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32)
+ ; GFX10-DENORM-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4
+ ; GFX10-DENORM-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5
+ ; GFX10-DENORM-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6
+ ; GFX10-DENORM-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7
+ ; GFX10-DENORM-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32)
+ ; GFX10-DENORM-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8
+ ; GFX10-DENORM-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9
+ ; GFX10-DENORM-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10
+ ; GFX10-DENORM-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11
+ ; GFX10-DENORM-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32)
+ ; GFX10-DENORM-NEXT: [[FMA:%[0-9]+]]:_(<4 x s32>) = G_FMA [[BUILD_VECTOR]], [[BUILD_VECTOR1]], [[BUILD_VECTOR2]]
+ ; GFX10-DENORM-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMA]](<4 x s32>)
+ ; GFX10-DENORM-NEXT: $vgpr0 = COPY [[UV]](s32)
+ ; GFX10-DENORM-NEXT: $vgpr1 = COPY [[UV1]](s32)
+ ; GFX10-DENORM-NEXT: $vgpr2 = COPY [[UV2]](s32)
+ ; GFX10-DENORM-NEXT: $vgpr3 = COPY [[UV3]](s32)
+ ; GFX10-DENORM-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
+ %4:_(s32) = COPY $vgpr0
+ %5:_(s32) = COPY $vgpr1
+ %6:_(s32) = COPY $vgpr2
+ %7:_(s32) = COPY $vgpr3
+ %0:_(<4 x s32>) = G_BUILD_VECTOR %4(s32), %5(s32), %6(s32), %7(s32)
+ %8:_(s32) = COPY $vgpr4
+ %9:_(s32) = COPY $vgpr5
+ %10:_(s32) = COPY $vgpr6
+ %11:_(s32) = COPY $vgpr7
+ %1:_(<4 x s32>) = G_BUILD_VECTOR %8(s32), %9(s32), %10(s32), %11(s32)
+ %12:_(s32) = COPY $vgpr8
+ %13:_(s32) = COPY $vgpr9
+ %14:_(s32) = COPY $vgpr10
+ %15:_(s32) = COPY $vgpr11
+ %2:_(<4 x s32>) = G_BUILD_VECTOR %12(s32), %13(s32), %14(s32), %15(s32)
+ %16:_(<4 x s32>) = reassoc contract G_FMUL %0, %1
+ %17:_(<4 x s32>) = reassoc contract G_FADD %16, %2
+ %19:_(s32), %20:_(s32), %21:_(s32), %22:_(s32) = G_UNMERGE_VALUES %17(<4 x s32>)
+ $vgpr0 = COPY %19(s32)
+ $vgpr1 = COPY %20(s32)
+ $vgpr2 = COPY %21(s32)
+ $vgpr3 = COPY %22(s32)
+ S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3
+...
+
+---
name: test_3xfloat_add_mul_rhs
body: |
bb.1.entry:
@@ -1083,28 +1222,6 @@ body: |
; GFX9-NEXT: $vgpr2 = COPY [[UV2]](s32)
; GFX9-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2
;
- ; GFX9-CONTRACT-LABEL: name: test_3xfloat_add_mul_rhs
- ; GFX9-CONTRACT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8
- ; GFX9-CONTRACT-NEXT: {{ $}}
- ; GFX9-CONTRACT-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
- ; GFX9-CONTRACT-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
- ; GFX9-CONTRACT-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
- ; GFX9-CONTRACT-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32)
- ; GFX9-CONTRACT-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
- ; GFX9-CONTRACT-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4
- ; GFX9-CONTRACT-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5
- ; GFX9-CONTRACT-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32)
- ; GFX9-CONTRACT-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6
- ; GFX9-CONTRACT-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7
- ; GFX9-CONTRACT-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8
- ; GFX9-CONTRACT-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32)
- ; GFX9-CONTRACT-NEXT: [[FMA:%[0-9]+]]:_(<3 x s32>) = G_FMA [[BUILD_VECTOR]], [[BUILD_VECTOR1]], [[BUILD_VECTOR2]]
- ; GFX9-CONTRACT-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMA]](<3 x s32>)
- ; GFX9-CONTRACT-NEXT: $vgpr0 = COPY [[UV]](s32)
- ; GFX9-CONTRACT-NEXT: $vgpr1 = COPY [[UV1]](s32)
- ; GFX9-CONTRACT-NEXT: $vgpr2 = COPY [[UV2]](s32)
- ; GFX9-CONTRACT-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2
- ;
; GFX9-DENORM-LABEL: name: test_3xfloat_add_mul_rhs
; GFX9-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8
; GFX9-DENORM-NEXT: {{ $}}
@@ -1128,28 +1245,6 @@ body: |
; GFX9-DENORM-NEXT: $vgpr2 = COPY [[UV2]](s32)
; GFX9-DENORM-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2
;
- ; GFX9-UNSAFE-LABEL: name: test_3xfloat_add_mul_rhs
- ; GFX9-UNSAFE: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8
- ; GFX9-UNSAFE-NEXT: {{ $}}
- ; GFX9-UNSAFE-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
- ; GFX9-UNSAFE-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
- ; GFX9-UNSAFE-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
- ; GFX9-UNSAFE-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32)
- ; GFX9-UNSAFE-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
- ; GFX9-UNSAFE-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4
- ; GFX9-UNSAFE-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5
- ; GFX9-UNSAFE-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32)
- ; GFX9-UNSAFE-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6
- ; GFX9-UNSAFE-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7
- ; GFX9-UNSAFE-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8
- ; GFX9-UNSAFE-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32)
- ; GFX9-UNSAFE-NEXT: [[FMA:%[0-9]+]]:_(<3 x s32>) = G_FMA [[BUILD_VECTOR]], [[BUILD_VECTOR1]], [[BUILD_VECTOR2]]
- ; GFX9-UNSAFE-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMA]](<3 x s32>)
- ; GFX9-UNSAFE-NEXT: $vgpr0 = COPY [[UV]](s32)
- ; GFX9-UNSAFE-NEXT: $vgpr1 = COPY [[UV1]](s32)
- ; GFX9-UNSAFE-NEXT: $vgpr2 = COPY [[UV2]](s32)
- ; GFX9-UNSAFE-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2
- ;
; GFX10-LABEL: name: test_3xfloat_add_mul_rhs
; GFX10: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8
; GFX10-NEXT: {{ $}}
@@ -1173,28 +1268,6 @@ body: |
; GFX10-NEXT: $vgpr2 = COPY [[UV2]](s32)
; GFX10-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2
;
- ; GFX10-CONTRACT-LABEL: name: test_3xfloat_add_mul_rhs
- ; GFX10-CONTRACT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8
- ; GFX10-CONTRACT-NEXT: {{ $}}
- ; GFX10-CONTRACT-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
- ; GFX10-CONTRACT-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
- ; GFX10-CONTRACT-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
- ; GFX10-CONTRACT-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32)
- ; GFX10-CONTRACT-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
- ; GFX10-CONTRACT-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4
- ; GFX10-CONTRACT-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5
- ; GFX10-CONTRACT-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32)
- ; GFX10-CONTRACT-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6
- ; GFX10-CONTRACT-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7
- ; GFX10-CONTRACT-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8
- ; GFX10-CONTRACT-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32)
- ; GFX10-CONTRACT-NEXT: [[FMA:%[0-9]+]]:_(<3 x s32>) = G_FMA [[BUILD_VECTOR]], [[BUILD_VECTOR1]], [[BUILD_VECTOR2]]
- ; GFX10-CONTRACT-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMA]](<3 x s32>)
- ; GFX10-CONTRACT-NEXT: $vgpr0 = COPY [[UV]](s32)
- ; GFX10-CONTRACT-NEXT: $vgpr1 = COPY [[UV1]](s32)
- ; GFX10-CONTRACT-NEXT: $vgpr2 = COPY [[UV2]](s32)
- ; GFX10-CONTRACT-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2
- ;
; GFX10-DENORM-LABEL: name: test_3xfloat_add_mul_rhs
; GFX10-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8
; GFX10-DENORM-NEXT: {{ $}}
@@ -1217,28 +1290,124 @@ body: |
; GFX10-DENORM-NEXT: $vgpr1 = COPY [[UV1]](s32)
; GFX10-DENORM-NEXT: $vgpr2 = COPY [[UV2]](s32)
; GFX10-DENORM-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2
+ %4:_(s32) = COPY $vgpr0
+ %5:_(s32) = COPY $vgpr1
+ %6:_(s32) = COPY $vgpr2
+ %0:_(<3 x s32>) = G_BUILD_VECTOR %4(s32), %5(s32), %6(s32)
+ %7:_(s32) = COPY $vgpr3
+ %8:_(s32) = COPY $vgpr4
+ %9:_(s32) = COPY $vgpr5
+ %1:_(<3 x s32>) = G_BUILD_VECTOR %7(s32), %8(s32), %9(s32)
+ %10:_(s32) = COPY $vgpr6
+ %11:_(s32) = COPY $vgpr7
+ %12:_(s32) = COPY $vgpr8
+ %2:_(<3 x s32>) = G_BUILD_VECTOR %10(s32), %11(s32), %12(s32)
+ %13:_(<3 x s32>) = reassoc G_FMUL %0, %1
+ %14:_(<3 x s32>) = reassoc G_FADD %2, %13
+ %16:_(s32), %17:_(s32), %18:_(s32) = G_UNMERGE_VALUES %14(<3 x s32>)
+ $vgpr0 = COPY %16(s32)
+ $vgpr1 = COPY %17(s32)
+ $vgpr2 = COPY %18(s32)
+ S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2
+...
+
+---
+name: test_3xfloat_add_mul_rhs_contract
+body: |
+ bb.1.entry:
+ liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8
+
+ ; GFX9-LABEL: name: test_3xfloat_add_mul_rhs_contract
+ ; GFX9: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8
+ ; GFX9-NEXT: {{ $}}
+ ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
+ ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32)
+ ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
+ ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4
+ ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5
+ ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32)
+ ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6
+ ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7
+ ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8
+ ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32)
+ ; GFX9-NEXT: [[FMUL:%[0-9]+]]:_(<3 x s32>) = reassoc G_FMUL [[BUILD_VECTOR]], [[BUILD_VECTOR1]]
+ ; GFX9-NEXT: [[FADD:%[0-9]+]]:_(<3 x s32>) = reassoc G_FADD [[BUILD_VECTOR2]], [[FMUL]]
+ ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](<3 x s32>)
+ ; GFX9-NEXT: $vgpr0 = COPY [[UV]](s32)
+ ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](s32)
+ ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](s32)
+ ; GFX9-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2
+ ;
+ ; GFX9-DENORM-LABEL: name: test_3xfloat_add_mul_rhs_contract
+ ; GFX9-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8
+ ; GFX9-DENORM-NEXT: {{ $}}
+ ; GFX9-DENORM-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; GFX9-DENORM-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; GFX9-DENORM-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
+ ; GFX9-DENORM-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32)
+ ; GFX9-DENORM-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
+ ; GFX9-DENORM-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4
+ ; GFX9-DENORM-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5
+ ; GFX9-DENORM-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32)
+ ; GFX9-DENORM-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6
+ ; GFX9-DENORM-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7
+ ; GFX9-DENORM-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8
+ ; GFX9-DENORM-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32)
+ ; GFX9-DENORM-NEXT: [[FMUL:%[0-9]+]]:_(<3 x s32>) = reassoc G_FMUL [[BUILD_VECTOR]], [[BUILD_VECTOR1]]
+ ; GFX9-DENORM-NEXT: [[FADD:%[0-9]+]]:_(<3 x s32>) = reassoc G_FADD [[BUILD_VECTOR2]], [[FMUL]]
+ ; GFX9-DENORM-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](<3 x s32>)
+ ; GFX9-DENORM-NEXT: $vgpr0 = COPY [[UV]](s32)
+ ; GFX9-DENORM-NEXT: $vgpr1 = COPY [[UV1]](s32)
+ ; GFX9-DENORM-NEXT: $vgpr2 = COPY [[UV2]](s32)
+ ; GFX9-DENORM-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2
+ ;
+ ; GFX10-LABEL: name: test_3xfloat_add_mul_rhs_contract
+ ; GFX10: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8
+ ; GFX10-NEXT: {{ $}}
+ ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
+ ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32)
+ ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
+ ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4
+ ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5
+ ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32)
+ ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6
+ ; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7
+ ; GFX10-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8
+ ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32)
+ ; GFX10-NEXT: [[FMUL:%[0-9]+]]:_(<3 x s32>) = reassoc G_FMUL [[BUILD_VECTOR]], [[BUILD_VECTOR1]]
+ ; GFX10-NEXT: [[FADD:%[0-9]+]]:_(<3 x s32>) = reassoc G_FADD [[BUILD_VECTOR2]], [[FMUL]]
+ ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](<3 x s32>)
+ ; GFX10-NEXT: $vgpr0 = COPY [[UV]](s32)
+ ; GFX10-NEXT: $vgpr1 = COPY [[UV1]](s32)
+ ; GFX10-NEXT: $vgpr2 = COPY [[UV2]](s32)
+ ; GFX10-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2
;
- ; GFX10-UNSAFE-LABEL: name: test_3xfloat_add_mul_rhs
- ; GFX10-UNSAFE: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8
- ; GFX10-UNSAFE-NEXT: {{ $}}
- ; GFX10-UNSAFE-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
- ; GFX10-UNSAFE-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
- ; GFX10-UNSAFE-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
- ; GFX10-UNSAFE-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32)
- ; GFX10-UNSAFE-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
- ; GFX10-UNSAFE-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4
- ; GFX10-UNSAFE-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5
- ; GFX10-UNSAFE-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32)
- ; GFX10-UNSAFE-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6
- ; GFX10-UNSAFE-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7
- ; GFX10-UNSAFE-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8
- ; GFX10-UNSAFE-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32)
- ; GFX10-UNSAFE-NEXT: [[FMA:%[0-9]+]]:_(<3 x s32>) = G_FMA [[BUILD_VECTOR]], [[BUILD_VECTOR1]], [[BUILD_VECTOR2]]
- ; GFX10-UNSAFE-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMA]](<3 x s32>)
- ; GFX10-UNSAFE-NEXT: $vgpr0 = COPY [[UV]](s32)
- ; GFX10-UNSAFE-NEXT: $vgpr1 = COPY [[UV1]](s32)
- ; GFX10-UNSAFE-NEXT: $vgpr2 = COPY [[UV2]](s32)
- ; GFX10-UNSAFE-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2
+ ; GFX10-DENORM-LABEL: name: test_3xfloat_add_mul_rhs_contract
+ ; GFX10-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8
+ ; GFX10-DENORM-NEXT: {{ $}}
+ ; GFX10-DENORM-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; GFX10-DENORM-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; GFX10-DENORM-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
+ ; GFX10-DENORM-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32)
+ ; GFX10-DENORM-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
+ ; GFX10-DENORM-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4
+ ; GFX10-DENORM-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5
+ ; GFX10-DENORM-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32)
+ ; GFX10-DENORM-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6
+ ; GFX10-DENORM-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7
+ ; GFX10-DENORM-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8
+ ; GFX10-DENORM-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32)
+ ; GFX10-DENORM-NEXT: [[FMUL:%[0-9]+]]:_(<3 x s32>) = reassoc G_FMUL [[BUILD_VECTOR]], [[BUILD_VECTOR1]]
+ ; GFX10-DENORM-NEXT: [[FADD:%[0-9]+]]:_(<3 x s32>) = reassoc G_FADD [[BUILD_VECTOR2]], [[FMUL]]
+ ; GFX10-DENORM-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](<3 x s32>)
+ ; GFX10-DENORM-NEXT: $vgpr0 = COPY [[UV]](s32)
+ ; GFX10-DENORM-NEXT: $vgpr1 = COPY [[UV1]](s32)
+ ; GFX10-DENORM-NEXT: $vgpr2 = COPY [[UV2]](s32)
+ ; GFX10-DENORM-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2
%4:_(s32) = COPY $vgpr0
%5:_(s32) = COPY $vgpr1
%6:_(s32) = COPY $vgpr2
@@ -1285,24 +1454,6 @@ body: |
; GFX9-NEXT: $vgpr1 = COPY [[UV1]](<2 x s16>)
; GFX9-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1
;
- ; GFX9-CONTRACT-LABEL: name: test_4xhalf_add_mul
- ; GFX9-CONTRACT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5
- ; GFX9-CONTRACT-NEXT: {{ $}}
- ; GFX9-CONTRACT-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
- ; GFX9-CONTRACT-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1
- ; GFX9-CONTRACT-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>)
- ; GFX9-CONTRACT-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2
- ; GFX9-CONTRACT-NEXT: [[COPY3:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3
- ; GFX9-CONTRACT-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY2]](<2 x s16>), [[COPY3]](<2 x s16>)
- ; GFX9-CONTRACT-NEXT: [[COPY4:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4
- ; GFX9-CONTRACT-NEXT: [[COPY5:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5
- ; GFX9-CONTRACT-NEXT: [[CONCAT_VECTORS2:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY4]](<2 x s16>), [[COPY5]](<2 x s16>)
- ; GFX9-CONTRACT-NEXT: [[FMA:%[0-9]+]]:_(<4 x s16>) = G_FMA [[CONCAT_VECTORS]], [[CONCAT_VECTORS1]], [[CONCAT_VECTORS2]]
- ; GFX9-CONTRACT-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[FMA]](<4 x s16>)
- ; GFX9-CONTRACT-NEXT: $vgpr0 = COPY [[UV]](<2 x s16>)
- ; GFX9-CONTRACT-NEXT: $vgpr1 = COPY [[UV1]](<2 x s16>)
- ; GFX9-CONTRACT-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1
- ;
; GFX9-DENORM-LABEL: name: test_4xhalf_add_mul
; GFX9-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5
; GFX9-DENORM-NEXT: {{ $}}
@@ -1322,24 +1473,6 @@ body: |
; GFX9-DENORM-NEXT: $vgpr1 = COPY [[UV1]](<2 x s16>)
; GFX9-DENORM-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1
;
- ; GFX9-UNSAFE-LABEL: name: test_4xhalf_add_mul
- ; GFX9-UNSAFE: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5
- ; GFX9-UNSAFE-NEXT: {{ $}}
- ; GFX9-UNSAFE-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
- ; GFX9-UNSAFE-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1
- ; GFX9-UNSAFE-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>)
- ; GFX9-UNSAFE-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2
- ; GFX9-UNSAFE-NEXT: [[COPY3:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3
- ; GFX9-UNSAFE-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY2]](<2 x s16>), [[COPY3]](<2 x s16>)
- ; GFX9-UNSAFE-NEXT: [[COPY4:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4
- ; GFX9-UNSAFE-NEXT: [[COPY5:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5
- ; GFX9-UNSAFE-NEXT: [[CONCAT_VECTORS2:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY4]](<2 x s16>), [[COPY5]](<2 x s16>)
- ; GFX9-UNSAFE-NEXT: [[FMA:%[0-9]+]]:_(<4 x s16>) = G_FMA [[CONCAT_VECTORS]], [[CONCAT_VECTORS1]], [[CONCAT_VECTORS2]]
- ; GFX9-UNSAFE-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[FMA]](<4 x s16>)
- ; GFX9-UNSAFE-NEXT: $vgpr0 = COPY [[UV]](<2 x s16>)
- ; GFX9-UNSAFE-NEXT: $vgpr1 = COPY [[UV1]](<2 x s16>)
- ; GFX9-UNSAFE-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1
- ;
; GFX10-LABEL: name: test_4xhalf_add_mul
; GFX10: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5
; GFX10-NEXT: {{ $}}
@@ -1359,24 +1492,6 @@ body: |
; GFX10-NEXT: $vgpr1 = COPY [[UV1]](<2 x s16>)
; GFX10-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1
;
- ; GFX10-CONTRACT-LABEL: name: test_4xhalf_add_mul
- ; GFX10-CONTRACT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5
- ; GFX10-CONTRACT-NEXT: {{ $}}
- ; GFX10-CONTRACT-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
- ; GFX10-CONTRACT-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1
- ; GFX10-CONTRACT-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>)
- ; GFX10-CONTRACT-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2
- ; GFX10-CONTRACT-NEXT: [[COPY3:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3
- ; GFX10-CONTRACT-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY2]](<2 x s16>), [[COPY3]](<2 x s16>)
- ; GFX10-CONTRACT-NEXT: [[COPY4:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4
- ; GFX10-CONTRACT-NEXT: [[COPY5:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5
- ; GFX10-CONTRACT-NEXT: [[CONCAT_VECTORS2:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY4]](<2 x s16>), [[COPY5]](<2 x s16>)
- ; GFX10-CONTRACT-NEXT: [[FMA:%[0-9]+]]:_(<4 x s16>) = G_FMA [[CONCAT_VECTORS]], [[CONCAT_VECTORS1]], [[CONCAT_VECTORS2]]
- ; GFX10-CONTRACT-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[FMA]](<4 x s16>)
- ; GFX10-CONTRACT-NEXT: $vgpr0 = COPY [[UV]](<2 x s16>)
- ; GFX10-CONTRACT-NEXT: $vgpr1 = COPY [[UV1]](<2 x s16>)
- ; GFX10-CONTRACT-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1
- ;
; GFX10-DENORM-LABEL: name: test_4xhalf_add_mul
; GFX10-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5
; GFX10-DENORM-NEXT: {{ $}}
@@ -1395,24 +1510,6 @@ body: |
; GFX10-DENORM-NEXT: $vgpr0 = COPY [[UV]](<2 x s16>)
; GFX10-DENORM-NEXT: $vgpr1 = COPY [[UV1]](<2 x s16>)
; GFX10-DENORM-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1
- ;
- ; GFX10-UNSAFE-LABEL: name: test_4xhalf_add_mul
- ; GFX10-UNSAFE: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5
- ; GFX10-UNSAFE-NEXT: {{ $}}
- ; GFX10-UNSAFE-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
- ; GFX10-UNSAFE-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1
- ; GFX10-UNSAFE-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>)
- ; GFX10-UNSAFE-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2
- ; GFX10-UNSAFE-NEXT: [[COPY3:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3
- ; GFX10-UNSAFE-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY2]](<2 x s16>), [[COPY3]](<2 x s16>)
- ; GFX10-UNSAFE-NEXT: [[COPY4:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4
- ; GFX10-UNSAFE-NEXT: [[COPY5:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5
- ; GFX10-UNSAFE-NEXT: [[CONCAT_VECTORS2:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY4]](<2 x s16>), [[COPY5]](<2 x s16>)
- ; GFX10-UNSAFE-NEXT: [[FMA:%[0-9]+]]:_(<4 x s16>) = G_FMA [[CONCAT_VECTORS]], [[CONCAT_VECTORS1]], [[CONCAT_VECTORS2]]
- ; GFX10-UNSAFE-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[FMA]](<4 x s16>)
- ; GFX10-UNSAFE-NEXT: $vgpr0 = COPY [[UV]](<2 x s16>)
- ; GFX10-UNSAFE-NEXT: $vgpr1 = COPY [[UV1]](<2 x s16>)
- ; GFX10-UNSAFE-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1
%4:_(<2 x s16>) = COPY $vgpr0
%5:_(<2 x s16>) = COPY $vgpr1
%0:_(<4 x s16>) = G_CONCAT_VECTORS %4(<2 x s16>), %5(<2 x s16>)
@@ -1431,6 +1528,100 @@ body: |
...
---
+name: test_4xhalf_add_mul_contract
+body: |
+ bb.1.entry:
+ liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5
+
+ ; GFX9-LABEL: name: test_4xhalf_add_mul_contract
+ ; GFX9: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5
+ ; GFX9-NEXT: {{ $}}
+ ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
+ ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1
+ ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>)
+ ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2
+ ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3
+ ; GFX9-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY2]](<2 x s16>), [[COPY3]](<2 x s16>)
+ ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4
+ ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5
+ ; GFX9-NEXT: [[CONCAT_VECTORS2:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY4]](<2 x s16>), [[COPY5]](<2 x s16>)
+ ; GFX9-NEXT: [[FMA:%[0-9]+]]:_(<4 x s16>) = G_FMA [[CONCAT_VECTORS]], [[CONCAT_VECTORS1]], [[CONCAT_VECTORS2]]
+ ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[FMA]](<4 x s16>)
+ ; GFX9-NEXT: $vgpr0 = COPY [[UV]](<2 x s16>)
+ ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](<2 x s16>)
+ ; GFX9-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1
+ ;
+ ; GFX9-DENORM-LABEL: name: test_4xhalf_add_mul_contract
+ ; GFX9-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5
+ ; GFX9-DENORM-NEXT: {{ $}}
+ ; GFX9-DENORM-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
+ ; GFX9-DENORM-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1
+ ; GFX9-DENORM-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>)
+ ; GFX9-DENORM-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2
+ ; GFX9-DENORM-NEXT: [[COPY3:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3
+ ; GFX9-DENORM-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY2]](<2 x s16>), [[COPY3]](<2 x s16>)
+ ; GFX9-DENORM-NEXT: [[COPY4:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4
+ ; GFX9-DENORM-NEXT: [[COPY5:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5
+ ; GFX9-DENORM-NEXT: [[CONCAT_VECTORS2:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY4]](<2 x s16>), [[COPY5]](<2 x s16>)
+ ; GFX9-DENORM-NEXT: [[FMA:%[0-9]+]]:_(<4 x s16>) = G_FMA [[CONCAT_VECTORS]], [[CONCAT_VECTORS1]], [[CONCAT_VECTORS2]]
+ ; GFX9-DENORM-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[FMA]](<4 x s16>)
+ ; GFX9-DENORM-NEXT: $vgpr0 = COPY [[UV]](<2 x s16>)
+ ; GFX9-DENORM-NEXT: $vgpr1 = COPY [[UV1]](<2 x s16>)
+ ; GFX9-DENORM-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1
+ ;
+ ; GFX10-LABEL: name: test_4xhalf_add_mul_contract
+ ; GFX10: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5
+ ; GFX10-NEXT: {{ $}}
+ ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
+ ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1
+ ; GFX10-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>)
+ ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2
+ ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3
+ ; GFX10-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY2]](<2 x s16>), [[COPY3]](<2 x s16>)
+ ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4
+ ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5
+ ; GFX10-NEXT: [[CONCAT_VECTORS2:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY4]](<2 x s16>), [[COPY5]](<2 x s16>)
+ ; GFX10-NEXT: [[FMA:%[0-9]+]]:_(<4 x s16>) = G_FMA [[CONCAT_VECTORS]], [[CONCAT_VECTORS1]], [[CONCAT_VECTORS2]]
+ ; GFX10-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[FMA]](<4 x s16>)
+ ; GFX10-NEXT: $vgpr0 = COPY [[UV]](<2 x s16>)
+ ; GFX10-NEXT: $vgpr1 = COPY [[UV1]](<2 x s16>)
+ ; GFX10-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1
+ ;
+ ; GFX10-DENORM-LABEL: name: test_4xhalf_add_mul_contract
+ ; GFX10-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5
+ ; GFX10-DENORM-NEXT: {{ $}}
+ ; GFX10-DENORM-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
+ ; GFX10-DENORM-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1
+ ; GFX10-DENORM-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>)
+ ; GFX10-DENORM-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2
+ ; GFX10-DENORM-NEXT: [[COPY3:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3
+ ; GFX10-DENORM-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY2]](<2 x s16>), [[COPY3]](<2 x s16>)
+ ; GFX10-DENORM-NEXT: [[COPY4:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4
+ ; GFX10-DENORM-NEXT: [[COPY5:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5
+ ; GFX10-DENORM-NEXT: [[CONCAT_VECTORS2:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY4]](<2 x s16>), [[COPY5]](<2 x s16>)
+ ; GFX10-DENORM-NEXT: [[FMA:%[0-9]+]]:_(<4 x s16>) = G_FMA [[CONCAT_VECTORS]], [[CONCAT_VECTORS1]], [[CONCAT_VECTORS2]]
+ ; GFX10-DENORM-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[FMA]](<4 x s16>)
+ ; GFX10-DENORM-NEXT: $vgpr0 = COPY [[UV]](<2 x s16>)
+ ; GFX10-DENORM-NEXT: $vgpr1 = COPY [[UV1]](<2 x s16>)
+ ; GFX10-DENORM-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1
+ %4:_(<2 x s16>) = COPY $vgpr0
+ %5:_(<2 x s16>) = COPY $vgpr1
+ %0:_(<4 x s16>) = G_CONCAT_VECTORS %4(<2 x s16>), %5(<2 x s16>)
+ %6:_(<2 x s16>) = COPY $vgpr2
+ %7:_(<2 x s16>) = COPY $vgpr3
+ %1:_(<4 x s16>) = G_CONCAT_VECTORS %6(<2 x s16>), %7(<2 x s16>)
+ %8:_(<2 x s16>) = COPY $vgpr4
+ %9:_(<2 x s16>) = COPY $vgpr5
+ %2:_(<4 x s16>) = G_CONCAT_VECTORS %8(<2 x s16>), %9(<2 x s16>)
+ %10:_(<4 x s16>) = reassoc contract G_FMUL %0, %1
+ %11:_(<4 x s16>) = reassoc contract G_FADD %10, %2
+ %13:_(<2 x s16>), %14:_(<2 x s16>) = G_UNMERGE_VALUES %11(<4 x s16>)
+ $vgpr0 = COPY %13(<2 x s16>)
+ $vgpr1 = COPY %14(<2 x s16>)
+ S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1
+...
+
+---
name: test_3xhalf_add_mul_rhs
body: |
bb.1.entry:
@@ -1461,30 +1652,6 @@ body: |
; GFX9-NEXT: $vgpr1 = COPY [[UV7]](<2 x s16>)
; GFX9-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1
;
- ; GFX9-CONTRACT-LABEL: name: test_3xhalf_add_mul_rhs
- ; GFX9-CONTRACT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5
- ; GFX9-CONTRACT-NEXT: {{ $}}
- ; GFX9-CONTRACT-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
- ; GFX9-CONTRACT-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1
- ; GFX9-CONTRACT-NEXT: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
- ; GFX9-CONTRACT-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>), [[DEF]](<2 x s16>)
- ; GFX9-CONTRACT-NEXT: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>)
- ; GFX9-CONTRACT-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2
- ; GFX9-CONTRACT-NEXT: [[COPY3:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3
- ; GFX9-CONTRACT-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY2]](<2 x s16>), [[COPY3]](<2 x s16>), [[DEF]](<2 x s16>)
- ; GFX9-CONTRACT-NEXT: [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<6 x s16>)
- ; GFX9-CONTRACT-NEXT: [[COPY4:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4
- ; GFX9-CONTRACT-NEXT: [[COPY5:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5
- ; GFX9-CONTRACT-NEXT: [[CONCAT_VECTORS2:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY4]](<2 x s16>), [[COPY5]](<2 x s16>), [[DEF]](<2 x s16>)
- ; GFX9-CONTRACT-NEXT: [[UV4:%[0-9]+]]:_(<3 x s16>), [[UV5:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS2]](<6 x s16>)
- ; GFX9-CONTRACT-NEXT: [[FMA:%[0-9]+]]:_(<3 x s16>) = G_FMA [[UV]], [[UV2]], [[UV4]]
- ; GFX9-CONTRACT-NEXT: [[DEF1:%[0-9]+]]:_(<3 x s16>) = G_IMPLICIT_DEF
- ; GFX9-CONTRACT-NEXT: [[CONCAT_VECTORS3:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[FMA]](<3 x s16>), [[DEF1]](<3 x s16>)
- ; GFX9-CONTRACT-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>), [[UV8:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS3]](<6 x s16>)
- ; GFX9-CONTRACT-NEXT: $vgpr0 = COPY [[UV6]](<2 x s16>)
- ; GFX9-CONTRACT-NEXT: $vgpr1 = COPY [[UV7]](<2 x s16>)
- ; GFX9-CONTRACT-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1
- ;
; GFX9-DENORM-LABEL: name: test_3xhalf_add_mul_rhs
; GFX9-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5
; GFX9-DENORM-NEXT: {{ $}}
@@ -1510,30 +1677,6 @@ body: |
; GFX9-DENORM-NEXT: $vgpr1 = COPY [[UV7]](<2 x s16>)
; GFX9-DENORM-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1
;
- ; GFX9-UNSAFE-LABEL: name: test_3xhalf_add_mul_rhs
- ; GFX9-UNSAFE: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5
- ; GFX9-UNSAFE-NEXT: {{ $}}
- ; GFX9-UNSAFE-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
- ; GFX9-UNSAFE-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1
- ; GFX9-UNSAFE-NEXT: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
- ; GFX9-UNSAFE-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>), [[DEF]](<2 x s16>)
- ; GFX9-UNSAFE-NEXT: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>)
- ; GFX9-UNSAFE-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2
- ; GFX9-UNSAFE-NEXT: [[COPY3:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3
- ; GFX9-UNSAFE-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY2]](<2 x s16>), [[COPY3]](<2 x s16>), [[DEF]](<2 x s16>)
- ; GFX9-UNSAFE-NEXT: [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<6 x s16>)
- ; GFX9-UNSAFE-NEXT: [[COPY4:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4
- ; GFX9-UNSAFE-NEXT: [[COPY5:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5
- ; GFX9-UNSAFE-NEXT: [[CONCAT_VECTORS2:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY4]](<2 x s16>), [[COPY5]](<2 x s16>), [[DEF]](<2 x s16>)
- ; GFX9-UNSAFE-NEXT: [[UV4:%[0-9]+]]:_(<3 x s16>), [[UV5:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS2]](<6 x s16>)
- ; GFX9-UNSAFE-NEXT: [[FMA:%[0-9]+]]:_(<3 x s16>) = G_FMA [[UV]], [[UV2]], [[UV4]]
- ; GFX9-UNSAFE-NEXT: [[DEF1:%[0-9]+]]:_(<3 x s16>) = G_IMPLICIT_DEF
- ; GFX9-UNSAFE-NEXT: [[CONCAT_VECTORS3:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[FMA]](<3 x s16>), [[DEF1]](<3 x s16>)
- ; GFX9-UNSAFE-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>), [[UV8:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS3]](<6 x s16>)
- ; GFX9-UNSAFE-NEXT: $vgpr0 = COPY [[UV6]](<2 x s16>)
- ; GFX9-UNSAFE-NEXT: $vgpr1 = COPY [[UV7]](<2 x s16>)
- ; GFX9-UNSAFE-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1
- ;
; GFX10-LABEL: name: test_3xhalf_add_mul_rhs
; GFX10: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5
; GFX10-NEXT: {{ $}}
@@ -1559,30 +1702,6 @@ body: |
; GFX10-NEXT: $vgpr1 = COPY [[UV7]](<2 x s16>)
; GFX10-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1
;
- ; GFX10-CONTRACT-LABEL: name: test_3xhalf_add_mul_rhs
- ; GFX10-CONTRACT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5
- ; GFX10-CONTRACT-NEXT: {{ $}}
- ; GFX10-CONTRACT-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
- ; GFX10-CONTRACT-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1
- ; GFX10-CONTRACT-NEXT: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
- ; GFX10-CONTRACT-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>), [[DEF]](<2 x s16>)
- ; GFX10-CONTRACT-NEXT: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>)
- ; GFX10-CONTRACT-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2
- ; GFX10-CONTRACT-NEXT: [[COPY3:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3
- ; GFX10-CONTRACT-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY2]](<2 x s16>), [[COPY3]](<2 x s16>), [[DEF]](<2 x s16>)
- ; GFX10-CONTRACT-NEXT: [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<6 x s16>)
- ; GFX10-CONTRACT-NEXT: [[COPY4:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4
- ; GFX10-CONTRACT-NEXT: [[COPY5:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5
- ; GFX10-CONTRACT-NEXT: [[CONCAT_VECTORS2:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY4]](<2 x s16>), [[COPY5]](<2 x s16>), [[DEF]](<2 x s16>)
- ; GFX10-CONTRACT-NEXT: [[UV4:%[0-9]+]]:_(<3 x s16>), [[UV5:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS2]](<6 x s16>)
- ; GFX10-CONTRACT-NEXT: [[FMA:%[0-9]+]]:_(<3 x s16>) = G_FMA [[UV]], [[UV2]], [[UV4]]
- ; GFX10-CONTRACT-NEXT: [[DEF1:%[0-9]+]]:_(<3 x s16>) = G_IMPLICIT_DEF
- ; GFX10-CONTRACT-NEXT: [[CONCAT_VECTORS3:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[FMA]](<3 x s16>), [[DEF1]](<3 x s16>)
- ; GFX10-CONTRACT-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>), [[UV8:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS3]](<6 x s16>)
- ; GFX10-CONTRACT-NEXT: $vgpr0 = COPY [[UV6]](<2 x s16>)
- ; GFX10-CONTRACT-NEXT: $vgpr1 = COPY [[UV7]](<2 x s16>)
- ; GFX10-CONTRACT-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1
- ;
; GFX10-DENORM-LABEL: name: test_3xhalf_add_mul_rhs
; GFX10-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5
; GFX10-DENORM-NEXT: {{ $}}
@@ -1607,30 +1726,6 @@ body: |
; GFX10-DENORM-NEXT: $vgpr0 = COPY [[UV6]](<2 x s16>)
; GFX10-DENORM-NEXT: $vgpr1 = COPY [[UV7]](<2 x s16>)
; GFX10-DENORM-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1
- ;
- ; GFX10-UNSAFE-LABEL: name: test_3xhalf_add_mul_rhs
- ; GFX10-UNSAFE: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5
- ; GFX10-UNSAFE-NEXT: {{ $}}
- ; GFX10-UNSAFE-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
- ; GFX10-UNSAFE-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1
- ; GFX10-UNSAFE-NEXT: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
- ; GFX10-UNSAFE-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>), [[DEF]](<2 x s16>)
- ; GFX10-UNSAFE-NEXT: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>)
- ; GFX10-UNSAFE-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2
- ; GFX10-UNSAFE-NEXT: [[COPY3:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3
- ; GFX10-UNSAFE-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY2]](<2 x s16>), [[COPY3]](<2 x s16>), [[DEF]](<2 x s16>)
- ; GFX10-UNSAFE-NEXT: [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<6 x s16>)
- ; GFX10-UNSAFE-NEXT: [[COPY4:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4
- ; GFX10-UNSAFE-NEXT: [[COPY5:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5
- ; GFX10-UNSAFE-NEXT: [[CONCAT_VECTORS2:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY4]](<2 x s16>), [[COPY5]](<2 x s16>), [[DEF]](<2 x s16>)
- ; GFX10-UNSAFE-NEXT: [[UV4:%[0-9]+]]:_(<3 x s16>), [[UV5:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS2]](<6 x s16>)
- ; GFX10-UNSAFE-NEXT: [[FMA:%[0-9]+]]:_(<3 x s16>) = G_FMA [[UV]], [[UV2]], [[UV4]]
- ; GFX10-UNSAFE-NEXT: [[DEF1:%[0-9]+]]:_(<3 x s16>) = G_IMPLICIT_DEF
- ; GFX10-UNSAFE-NEXT: [[CONCAT_VECTORS3:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[FMA]](<3 x s16>), [[DEF1]](<3 x s16>)
- ; GFX10-UNSAFE-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>), [[UV8:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS3]](<6 x s16>)
- ; GFX10-UNSAFE-NEXT: $vgpr0 = COPY [[UV6]](<2 x s16>)
- ; GFX10-UNSAFE-NEXT: $vgpr1 = COPY [[UV7]](<2 x s16>)
- ; GFX10-UNSAFE-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1
%4:_(<2 x s16>) = COPY $vgpr0
%5:_(<2 x s16>) = COPY $vgpr1
%10:_(<2 x s16>) = G_IMPLICIT_DEF
@@ -1655,6 +1750,130 @@ body: |
...
---
+name: test_3xhalf_add_mul_rhs_contract
+body: |
+ bb.1.entry:
+ liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5
+
+ ; GFX9-LABEL: name: test_3xhalf_add_mul_rhs_contract
+ ; GFX9: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5
+ ; GFX9-NEXT: {{ $}}
+ ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
+ ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1
+ ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
+ ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>), [[DEF]](<2 x s16>)
+ ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>)
+ ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2
+ ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3
+ ; GFX9-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY2]](<2 x s16>), [[COPY3]](<2 x s16>), [[DEF]](<2 x s16>)
+ ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<6 x s16>)
+ ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4
+ ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5
+ ; GFX9-NEXT: [[CONCAT_VECTORS2:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY4]](<2 x s16>), [[COPY5]](<2 x s16>), [[DEF]](<2 x s16>)
+ ; GFX9-NEXT: [[UV4:%[0-9]+]]:_(<3 x s16>), [[UV5:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS2]](<6 x s16>)
+ ; GFX9-NEXT: [[FMA:%[0-9]+]]:_(<3 x s16>) = G_FMA [[UV]], [[UV2]], [[UV4]]
+ ; GFX9-NEXT: [[DEF1:%[0-9]+]]:_(<3 x s16>) = G_IMPLICIT_DEF
+ ; GFX9-NEXT: [[CONCAT_VECTORS3:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[FMA]](<3 x s16>), [[DEF1]](<3 x s16>)
+ ; GFX9-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>), [[UV8:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS3]](<6 x s16>)
+ ; GFX9-NEXT: $vgpr0 = COPY [[UV6]](<2 x s16>)
+ ; GFX9-NEXT: $vgpr1 = COPY [[UV7]](<2 x s16>)
+ ; GFX9-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1
+ ;
+ ; GFX9-DENORM-LABEL: name: test_3xhalf_add_mul_rhs_contract
+ ; GFX9-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5
+ ; GFX9-DENORM-NEXT: {{ $}}
+ ; GFX9-DENORM-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
+ ; GFX9-DENORM-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1
+ ; GFX9-DENORM-NEXT: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
+ ; GFX9-DENORM-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>), [[DEF]](<2 x s16>)
+ ; GFX9-DENORM-NEXT: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>)
+ ; GFX9-DENORM-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2
+ ; GFX9-DENORM-NEXT: [[COPY3:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3
+ ; GFX9-DENORM-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY2]](<2 x s16>), [[COPY3]](<2 x s16>), [[DEF]](<2 x s16>)
+ ; GFX9-DENORM-NEXT: [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<6 x s16>)
+ ; GFX9-DENORM-NEXT: [[COPY4:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4
+ ; GFX9-DENORM-NEXT: [[COPY5:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5
+ ; GFX9-DENORM-NEXT: [[CONCAT_VECTORS2:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY4]](<2 x s16>), [[COPY5]](<2 x s16>), [[DEF]](<2 x s16>)
+ ; GFX9-DENORM-NEXT: [[UV4:%[0-9]+]]:_(<3 x s16>), [[UV5:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS2]](<6 x s16>)
+ ; GFX9-DENORM-NEXT: [[FMA:%[0-9]+]]:_(<3 x s16>) = G_FMA [[UV]], [[UV2]], [[UV4]]
+ ; GFX9-DENORM-NEXT: [[DEF1:%[0-9]+]]:_(<3 x s16>) = G_IMPLICIT_DEF
+ ; GFX9-DENORM-NEXT: [[CONCAT_VECTORS3:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[FMA]](<3 x s16>), [[DEF1]](<3 x s16>)
+ ; GFX9-DENORM-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>), [[UV8:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS3]](<6 x s16>)
+ ; GFX9-DENORM-NEXT: $vgpr0 = COPY [[UV6]](<2 x s16>)
+ ; GFX9-DENORM-NEXT: $vgpr1 = COPY [[UV7]](<2 x s16>)
+ ; GFX9-DENORM-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1
+ ;
+ ; GFX10-LABEL: name: test_3xhalf_add_mul_rhs_contract
+ ; GFX10: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5
+ ; GFX10-NEXT: {{ $}}
+ ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
+ ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1
+ ; GFX10-NEXT: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
+ ; GFX10-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>), [[DEF]](<2 x s16>)
+ ; GFX10-NEXT: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>)
+ ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2
+ ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3
+ ; GFX10-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY2]](<2 x s16>), [[COPY3]](<2 x s16>), [[DEF]](<2 x s16>)
+ ; GFX10-NEXT: [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<6 x s16>)
+ ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4
+ ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5
+ ; GFX10-NEXT: [[CONCAT_VECTORS2:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY4]](<2 x s16>), [[COPY5]](<2 x s16>), [[DEF]](<2 x s16>)
+ ; GFX10-NEXT: [[UV4:%[0-9]+]]:_(<3 x s16>), [[UV5:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS2]](<6 x s16>)
+ ; GFX10-NEXT: [[FMA:%[0-9]+]]:_(<3 x s16>) = G_FMA [[UV]], [[UV2]], [[UV4]]
+ ; GFX10-NEXT: [[DEF1:%[0-9]+]]:_(<3 x s16>) = G_IMPLICIT_DEF
+ ; GFX10-NEXT: [[CONCAT_VECTORS3:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[FMA]](<3 x s16>), [[DEF1]](<3 x s16>)
+ ; GFX10-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>), [[UV8:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS3]](<6 x s16>)
+ ; GFX10-NEXT: $vgpr0 = COPY [[UV6]](<2 x s16>)
+ ; GFX10-NEXT: $vgpr1 = COPY [[UV7]](<2 x s16>)
+ ; GFX10-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1
+ ;
+ ; GFX10-DENORM-LABEL: name: test_3xhalf_add_mul_rhs_contract
+ ; GFX10-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5
+ ; GFX10-DENORM-NEXT: {{ $}}
+ ; GFX10-DENORM-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
+ ; GFX10-DENORM-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1
+ ; GFX10-DENORM-NEXT: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF
+ ; GFX10-DENORM-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>), [[DEF]](<2 x s16>)
+ ; GFX10-DENORM-NEXT: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>)
+ ; GFX10-DENORM-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2
+ ; GFX10-DENORM-NEXT: [[COPY3:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3
+ ; GFX10-DENORM-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY2]](<2 x s16>), [[COPY3]](<2 x s16>), [[DEF]](<2 x s16>)
+ ; GFX10-DENORM-NEXT: [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<6 x s16>)
+ ; GFX10-DENORM-NEXT: [[COPY4:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4
+ ; GFX10-DENORM-NEXT: [[COPY5:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5
+ ; GFX10-DENORM-NEXT: [[CONCAT_VECTORS2:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY4]](<2 x s16>), [[COPY5]](<2 x s16>), [[DEF]](<2 x s16>)
+ ; GFX10-DENORM-NEXT: [[UV4:%[0-9]+]]:_(<3 x s16>), [[UV5:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS2]](<6 x s16>)
+ ; GFX10-DENORM-NEXT: [[FMA:%[0-9]+]]:_(<3 x s16>) = G_FMA [[UV]], [[UV2]], [[UV4]]
+ ; GFX10-DENORM-NEXT: [[DEF1:%[0-9]+]]:_(<3 x s16>) = G_IMPLICIT_DEF
+ ; GFX10-DENORM-NEXT: [[CONCAT_VECTORS3:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[FMA]](<3 x s16>), [[DEF1]](<3 x s16>)
+ ; GFX10-DENORM-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>), [[UV8:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS3]](<6 x s16>)
+ ; GFX10-DENORM-NEXT: $vgpr0 = COPY [[UV6]](<2 x s16>)
+ ; GFX10-DENORM-NEXT: $vgpr1 = COPY [[UV7]](<2 x s16>)
+ ; GFX10-DENORM-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1
+ %4:_(<2 x s16>) = COPY $vgpr0
+ %5:_(<2 x s16>) = COPY $vgpr1
+ %10:_(<2 x s16>) = G_IMPLICIT_DEF
+ %11:_(<6 x s16>) = G_CONCAT_VECTORS %4(<2 x s16>), %5(<2 x s16>), %10(<2 x s16>)
+ %0:_(<3 x s16>), %12:_(<3 x s16>) = G_UNMERGE_VALUES %11(<6 x s16>)
+ %6:_(<2 x s16>) = COPY $vgpr2
+ %7:_(<2 x s16>) = COPY $vgpr3
+ %13:_(<6 x s16>) = G_CONCAT_VECTORS %6(<2 x s16>), %7(<2 x s16>), %10(<2 x s16>)
+ %1:_(<3 x s16>), %14:_(<3 x s16>) = G_UNMERGE_VALUES %13(<6 x s16>)
+ %8:_(<2 x s16>) = COPY $vgpr4
+ %9:_(<2 x s16>) = COPY $vgpr5
+ %15:_(<6 x s16>) = G_CONCAT_VECTORS %8(<2 x s16>), %9(<2 x s16>), %10(<2 x s16>)
+ %2:_(<3 x s16>), %16:_(<3 x s16>) = G_UNMERGE_VALUES %15(<6 x s16>)
+ %17:_(<3 x s16>) = reassoc contract G_FMUL %0, %1
+ %18:_(<3 x s16>) = reassoc contract G_FADD %2, %17
+ %22:_(<3 x s16>) = G_IMPLICIT_DEF
+ %23:_(<6 x s16>) = G_CONCAT_VECTORS %18(<3 x s16>), %22(<3 x s16>)
+ %20:_(<2 x s16>), %21:_(<2 x s16>), %24:_(<2 x s16>) = G_UNMERGE_VALUES %23(<6 x s16>)
+ $vgpr0 = COPY %20(<2 x s16>)
+ $vgpr1 = COPY %21(<2 x s16>)
+ S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1
+...
+
+---
name: test_4xdouble_add_mul
body: |
bb.1.entry:
@@ -1715,60 +1934,6 @@ body: |
; GFX9-NEXT: $vgpr7 = COPY [[UV7]](s32)
; GFX9-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7
;
- ; GFX9-CONTRACT-LABEL: name: test_4xdouble_add_mul
- ; GFX9-CONTRACT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23
- ; GFX9-CONTRACT-NEXT: {{ $}}
- ; GFX9-CONTRACT-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
- ; GFX9-CONTRACT-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
- ; GFX9-CONTRACT-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
- ; GFX9-CONTRACT-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
- ; GFX9-CONTRACT-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4
- ; GFX9-CONTRACT-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5
- ; GFX9-CONTRACT-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6
- ; GFX9-CONTRACT-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7
- ; GFX9-CONTRACT-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32)
- ; GFX9-CONTRACT-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32)
- ; GFX9-CONTRACT-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32)
- ; GFX9-CONTRACT-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY6]](s32), [[COPY7]](s32)
- ; GFX9-CONTRACT-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[MV3]](s64)
- ; GFX9-CONTRACT-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8
- ; GFX9-CONTRACT-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9
- ; GFX9-CONTRACT-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10
- ; GFX9-CONTRACT-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11
- ; GFX9-CONTRACT-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12
- ; GFX9-CONTRACT-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13
- ; GFX9-CONTRACT-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14
- ; GFX9-CONTRACT-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15
- ; GFX9-CONTRACT-NEXT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY8]](s32), [[COPY9]](s32)
- ; GFX9-CONTRACT-NEXT: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY10]](s32), [[COPY11]](s32)
- ; GFX9-CONTRACT-NEXT: [[MV6:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY12]](s32), [[COPY13]](s32)
- ; GFX9-CONTRACT-NEXT: [[MV7:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY14]](s32), [[COPY15]](s32)
- ; GFX9-CONTRACT-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV4]](s64), [[MV5]](s64), [[MV6]](s64), [[MV7]](s64)
- ; GFX9-CONTRACT-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16
- ; GFX9-CONTRACT-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17
- ; GFX9-CONTRACT-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr18
- ; GFX9-CONTRACT-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr19
- ; GFX9-CONTRACT-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr20
- ; GFX9-CONTRACT-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr21
- ; GFX9-CONTRACT-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr22
- ; GFX9-CONTRACT-NEXT: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr23
- ; GFX9-CONTRACT-NEXT: [[MV8:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY16]](s32), [[COPY17]](s32)
- ; GFX9-CONTRACT-NEXT: [[MV9:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY18]](s32), [[COPY19]](s32)
- ; GFX9-CONTRACT-NEXT: [[MV10:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY20]](s32), [[COPY21]](s32)
- ; GFX9-CONTRACT-NEXT: [[MV11:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY22]](s32), [[COPY23]](s32)
- ; GFX9-CONTRACT-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV8]](s64), [[MV9]](s64), [[MV10]](s64), [[MV11]](s64)
- ; GFX9-CONTRACT-NEXT: [[FMA:%[0-9]+]]:_(<4 x s64>) = G_FMA [[BUILD_VECTOR]], [[BUILD_VECTOR1]], [[BUILD_VECTOR2]]
- ; GFX9-CONTRACT-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMA]](<4 x s64>)
- ; GFX9-CONTRACT-NEXT: $vgpr0 = COPY [[UV]](s32)
- ; GFX9-CONTRACT-NEXT: $vgpr1 = COPY [[UV1]](s32)
- ; GFX9-CONTRACT-NEXT: $vgpr2 = COPY [[UV2]](s32)
- ; GFX9-CONTRACT-NEXT: $vgpr3 = COPY [[UV3]](s32)
- ; GFX9-CONTRACT-NEXT: $vgpr4 = COPY [[UV4]](s32)
- ; GFX9-CONTRACT-NEXT: $vgpr5 = COPY [[UV5]](s32)
- ; GFX9-CONTRACT-NEXT: $vgpr6 = COPY [[UV6]](s32)
- ; GFX9-CONTRACT-NEXT: $vgpr7 = COPY [[UV7]](s32)
- ; GFX9-CONTRACT-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7
- ;
; GFX9-DENORM-LABEL: name: test_4xdouble_add_mul
; GFX9-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23
; GFX9-DENORM-NEXT: {{ $}}
@@ -1824,60 +1989,6 @@ body: |
; GFX9-DENORM-NEXT: $vgpr7 = COPY [[UV7]](s32)
; GFX9-DENORM-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7
;
- ; GFX9-UNSAFE-LABEL: name: test_4xdouble_add_mul
- ; GFX9-UNSAFE: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23
- ; GFX9-UNSAFE-NEXT: {{ $}}
- ; GFX9-UNSAFE-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
- ; GFX9-UNSAFE-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
- ; GFX9-UNSAFE-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
- ; GFX9-UNSAFE-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
- ; GFX9-UNSAFE-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4
- ; GFX9-UNSAFE-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5
- ; GFX9-UNSAFE-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6
- ; GFX9-UNSAFE-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7
- ; GFX9-UNSAFE-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32)
- ; GFX9-UNSAFE-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32)
- ; GFX9-UNSAFE-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32)
- ; GFX9-UNSAFE-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY6]](s32), [[COPY7]](s32)
- ; GFX9-UNSAFE-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[MV3]](s64)
- ; GFX9-UNSAFE-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8
- ; GFX9-UNSAFE-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9
- ; GFX9-UNSAFE-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10
- ; GFX9-UNSAFE-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11
- ; GFX9-UNSAFE-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12
- ; GFX9-UNSAFE-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13
- ; GFX9-UNSAFE-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14
- ; GFX9-UNSAFE-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15
- ; GFX9-UNSAFE-NEXT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY8]](s32), [[COPY9]](s32)
- ; GFX9-UNSAFE-NEXT: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY10]](s32), [[COPY11]](s32)
- ; GFX9-UNSAFE-NEXT: [[MV6:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY12]](s32), [[COPY13]](s32)
- ; GFX9-UNSAFE-NEXT: [[MV7:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY14]](s32), [[COPY15]](s32)
- ; GFX9-UNSAFE-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV4]](s64), [[MV5]](s64), [[MV6]](s64), [[MV7]](s64)
- ; GFX9-UNSAFE-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16
- ; GFX9-UNSAFE-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17
- ; GFX9-UNSAFE-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr18
- ; GFX9-UNSAFE-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr19
- ; GFX9-UNSAFE-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr20
- ; GFX9-UNSAFE-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr21
- ; GFX9-UNSAFE-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr22
- ; GFX9-UNSAFE-NEXT: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr23
- ; GFX9-UNSAFE-NEXT: [[MV8:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY16]](s32), [[COPY17]](s32)
- ; GFX9-UNSAFE-NEXT: [[MV9:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY18]](s32), [[COPY19]](s32)
- ; GFX9-UNSAFE-NEXT: [[MV10:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY20]](s32), [[COPY21]](s32)
- ; GFX9-UNSAFE-NEXT: [[MV11:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY22]](s32), [[COPY23]](s32)
- ; GFX9-UNSAFE-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV8]](s64), [[MV9]](s64), [[MV10]](s64), [[MV11]](s64)
- ; GFX9-UNSAFE-NEXT: [[FMA:%[0-9]+]]:_(<4 x s64>) = G_FMA [[BUILD_VECTOR]], [[BUILD_VECTOR1]], [[BUILD_VECTOR2]]
- ; GFX9-UNSAFE-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMA]](<4 x s64>)
- ; GFX9-UNSAFE-NEXT: $vgpr0 = COPY [[UV]](s32)
- ; GFX9-UNSAFE-NEXT: $vgpr1 = COPY [[UV1]](s32)
- ; GFX9-UNSAFE-NEXT: $vgpr2 = COPY [[UV2]](s32)
- ; GFX9-UNSAFE-NEXT: $vgpr3 = COPY [[UV3]](s32)
- ; GFX9-UNSAFE-NEXT: $vgpr4 = COPY [[UV4]](s32)
- ; GFX9-UNSAFE-NEXT: $vgpr5 = COPY [[UV5]](s32)
- ; GFX9-UNSAFE-NEXT: $vgpr6 = COPY [[UV6]](s32)
- ; GFX9-UNSAFE-NEXT: $vgpr7 = COPY [[UV7]](s32)
- ; GFX9-UNSAFE-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7
- ;
; GFX10-LABEL: name: test_4xdouble_add_mul
; GFX10: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23
; GFX10-NEXT: {{ $}}
@@ -1933,60 +2044,6 @@ body: |
; GFX10-NEXT: $vgpr7 = COPY [[UV7]](s32)
; GFX10-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7
;
- ; GFX10-CONTRACT-LABEL: name: test_4xdouble_add_mul
- ; GFX10-CONTRACT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23
- ; GFX10-CONTRACT-NEXT: {{ $}}
- ; GFX10-CONTRACT-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
- ; GFX10-CONTRACT-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
- ; GFX10-CONTRACT-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
- ; GFX10-CONTRACT-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
- ; GFX10-CONTRACT-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4
- ; GFX10-CONTRACT-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5
- ; GFX10-CONTRACT-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6
- ; GFX10-CONTRACT-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7
- ; GFX10-CONTRACT-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32)
- ; GFX10-CONTRACT-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32)
- ; GFX10-CONTRACT-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32)
- ; GFX10-CONTRACT-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY6]](s32), [[COPY7]](s32)
- ; GFX10-CONTRACT-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[MV3]](s64)
- ; GFX10-CONTRACT-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8
- ; GFX10-CONTRACT-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9
- ; GFX10-CONTRACT-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10
- ; GFX10-CONTRACT-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11
- ; GFX10-CONTRACT-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12
- ; GFX10-CONTRACT-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13
- ; GFX10-CONTRACT-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14
- ; GFX10-CONTRACT-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15
- ; GFX10-CONTRACT-NEXT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY8]](s32), [[COPY9]](s32)
- ; GFX10-CONTRACT-NEXT: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY10]](s32), [[COPY11]](s32)
- ; GFX10-CONTRACT-NEXT: [[MV6:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY12]](s32), [[COPY13]](s32)
- ; GFX10-CONTRACT-NEXT: [[MV7:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY14]](s32), [[COPY15]](s32)
- ; GFX10-CONTRACT-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV4]](s64), [[MV5]](s64), [[MV6]](s64), [[MV7]](s64)
- ; GFX10-CONTRACT-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16
- ; GFX10-CONTRACT-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17
- ; GFX10-CONTRACT-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr18
- ; GFX10-CONTRACT-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr19
- ; GFX10-CONTRACT-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr20
- ; GFX10-CONTRACT-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr21
- ; GFX10-CONTRACT-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr22
- ; GFX10-CONTRACT-NEXT: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr23
- ; GFX10-CONTRACT-NEXT: [[MV8:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY16]](s32), [[COPY17]](s32)
- ; GFX10-CONTRACT-NEXT: [[MV9:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY18]](s32), [[COPY19]](s32)
- ; GFX10-CONTRACT-NEXT: [[MV10:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY20]](s32), [[COPY21]](s32)
- ; GFX10-CONTRACT-NEXT: [[MV11:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY22]](s32), [[COPY23]](s32)
- ; GFX10-CONTRACT-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV8]](s64), [[MV9]](s64), [[MV10]](s64), [[MV11]](s64)
- ; GFX10-CONTRACT-NEXT: [[FMA:%[0-9]+]]:_(<4 x s64>) = G_FMA [[BUILD_VECTOR]], [[BUILD_VECTOR1]], [[BUILD_VECTOR2]]
- ; GFX10-CONTRACT-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMA]](<4 x s64>)
- ; GFX10-CONTRACT-NEXT: $vgpr0 = COPY [[UV]](s32)
- ; GFX10-CONTRACT-NEXT: $vgpr1 = COPY [[UV1]](s32)
- ; GFX10-CONTRACT-NEXT: $vgpr2 = COPY [[UV2]](s32)
- ; GFX10-CONTRACT-NEXT: $vgpr3 = COPY [[UV3]](s32)
- ; GFX10-CONTRACT-NEXT: $vgpr4 = COPY [[UV4]](s32)
- ; GFX10-CONTRACT-NEXT: $vgpr5 = COPY [[UV5]](s32)
- ; GFX10-CONTRACT-NEXT: $vgpr6 = COPY [[UV6]](s32)
- ; GFX10-CONTRACT-NEXT: $vgpr7 = COPY [[UV7]](s32)
- ; GFX10-CONTRACT-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7
- ;
; GFX10-DENORM-LABEL: name: test_4xdouble_add_mul
; GFX10-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23
; GFX10-DENORM-NEXT: {{ $}}
@@ -2041,60 +2098,6 @@ body: |
; GFX10-DENORM-NEXT: $vgpr6 = COPY [[UV6]](s32)
; GFX10-DENORM-NEXT: $vgpr7 = COPY [[UV7]](s32)
; GFX10-DENORM-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7
- ;
- ; GFX10-UNSAFE-LABEL: name: test_4xdouble_add_mul
- ; GFX10-UNSAFE: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23
- ; GFX10-UNSAFE-NEXT: {{ $}}
- ; GFX10-UNSAFE-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
- ; GFX10-UNSAFE-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
- ; GFX10-UNSAFE-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
- ; GFX10-UNSAFE-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
- ; GFX10-UNSAFE-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4
- ; GFX10-UNSAFE-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5
- ; GFX10-UNSAFE-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6
- ; GFX10-UNSAFE-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7
- ; GFX10-UNSAFE-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32)
- ; GFX10-UNSAFE-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32)
- ; GFX10-UNSAFE-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32)
- ; GFX10-UNSAFE-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY6]](s32), [[COPY7]](s32)
- ; GFX10-UNSAFE-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[MV3]](s64)
- ; GFX10-UNSAFE-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8
- ; GFX10-UNSAFE-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9
- ; GFX10-UNSAFE-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10
- ; GFX10-UNSAFE-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11
- ; GFX10-UNSAFE-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12
- ; GFX10-UNSAFE-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13
- ; GFX10-UNSAFE-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14
- ; GFX10-UNSAFE-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15
- ; GFX10-UNSAFE-NEXT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY8]](s32), [[COPY9]](s32)
- ; GFX10-UNSAFE-NEXT: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY10]](s32), [[COPY11]](s32)
- ; GFX10-UNSAFE-NEXT: [[MV6:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY12]](s32), [[COPY13]](s32)
- ; GFX10-UNSAFE-NEXT: [[MV7:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY14]](s32), [[COPY15]](s32)
- ; GFX10-UNSAFE-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV4]](s64), [[MV5]](s64), [[MV6]](s64), [[MV7]](s64)
- ; GFX10-UNSAFE-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16
- ; GFX10-UNSAFE-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17
- ; GFX10-UNSAFE-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr18
- ; GFX10-UNSAFE-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr19
- ; GFX10-UNSAFE-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr20
- ; GFX10-UNSAFE-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr21
- ; GFX10-UNSAFE-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr22
- ; GFX10-UNSAFE-NEXT: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr23
- ; GFX10-UNSAFE-NEXT: [[MV8:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY16]](s32), [[COPY17]](s32)
- ; GFX10-UNSAFE-NEXT: [[MV9:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY18]](s32), [[COPY19]](s32)
- ; GFX10-UNSAFE-NEXT: [[MV10:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY20]](s32), [[COPY21]](s32)
- ; GFX10-UNSAFE-NEXT: [[MV11:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY22]](s32), [[COPY23]](s32)
- ; GFX10-UNSAFE-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV8]](s64), [[MV9]](s64), [[MV10]](s64), [[MV11]](s64)
- ; GFX10-UNSAFE-NEXT: [[FMA:%[0-9]+]]:_(<4 x s64>) = G_FMA [[BUILD_VECTOR]], [[BUILD_VECTOR1]], [[BUILD_VECTOR2]]
- ; GFX10-UNSAFE-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMA]](<4 x s64>)
- ; GFX10-UNSAFE-NEXT: $vgpr0 = COPY [[UV]](s32)
- ; GFX10-UNSAFE-NEXT: $vgpr1 = COPY [[UV1]](s32)
- ; GFX10-UNSAFE-NEXT: $vgpr2 = COPY [[UV2]](s32)
- ; GFX10-UNSAFE-NEXT: $vgpr3 = COPY [[UV3]](s32)
- ; GFX10-UNSAFE-NEXT: $vgpr4 = COPY [[UV4]](s32)
- ; GFX10-UNSAFE-NEXT: $vgpr5 = COPY [[UV5]](s32)
- ; GFX10-UNSAFE-NEXT: $vgpr6 = COPY [[UV6]](s32)
- ; GFX10-UNSAFE-NEXT: $vgpr7 = COPY [[UV7]](s32)
- ; GFX10-UNSAFE-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7
%4:_(s32) = COPY $vgpr0
%5:_(s32) = COPY $vgpr1
%6:_(s32) = COPY $vgpr2
@@ -2149,6 +2152,280 @@ body: |
...
---
+name: test_4xdouble_add_mul_contract
+body: |
+ bb.1.entry:
+ liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23
+
+ ; GFX9-LABEL: name: test_4xdouble_add_mul_contract
+ ; GFX9: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23
+ ; GFX9-NEXT: {{ $}}
+ ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
+ ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
+ ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4
+ ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5
+ ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6
+ ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7
+ ; GFX9-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32)
+ ; GFX9-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32)
+ ; GFX9-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32)
+ ; GFX9-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY6]](s32), [[COPY7]](s32)
+ ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[MV3]](s64)
+ ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8
+ ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9
+ ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10
+ ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11
+ ; GFX9-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12
+ ; GFX9-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13
+ ; GFX9-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14
+ ; GFX9-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15
+ ; GFX9-NEXT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY8]](s32), [[COPY9]](s32)
+ ; GFX9-NEXT: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY10]](s32), [[COPY11]](s32)
+ ; GFX9-NEXT: [[MV6:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY12]](s32), [[COPY13]](s32)
+ ; GFX9-NEXT: [[MV7:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY14]](s32), [[COPY15]](s32)
+ ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV4]](s64), [[MV5]](s64), [[MV6]](s64), [[MV7]](s64)
+ ; GFX9-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16
+ ; GFX9-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17
+ ; GFX9-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr18
+ ; GFX9-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr19
+ ; GFX9-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr20
+ ; GFX9-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr21
+ ; GFX9-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr22
+ ; GFX9-NEXT: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr23
+ ; GFX9-NEXT: [[MV8:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY16]](s32), [[COPY17]](s32)
+ ; GFX9-NEXT: [[MV9:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY18]](s32), [[COPY19]](s32)
+ ; GFX9-NEXT: [[MV10:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY20]](s32), [[COPY21]](s32)
+ ; GFX9-NEXT: [[MV11:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY22]](s32), [[COPY23]](s32)
+ ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV8]](s64), [[MV9]](s64), [[MV10]](s64), [[MV11]](s64)
+ ; GFX9-NEXT: [[FMA:%[0-9]+]]:_(<4 x s64>) = G_FMA [[BUILD_VECTOR]], [[BUILD_VECTOR1]], [[BUILD_VECTOR2]]
+ ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMA]](<4 x s64>)
+ ; GFX9-NEXT: $vgpr0 = COPY [[UV]](s32)
+ ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](s32)
+ ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](s32)
+ ; GFX9-NEXT: $vgpr3 = COPY [[UV3]](s32)
+ ; GFX9-NEXT: $vgpr4 = COPY [[UV4]](s32)
+ ; GFX9-NEXT: $vgpr5 = COPY [[UV5]](s32)
+ ; GFX9-NEXT: $vgpr6 = COPY [[UV6]](s32)
+ ; GFX9-NEXT: $vgpr7 = COPY [[UV7]](s32)
+ ; GFX9-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7
+ ;
+ ; GFX9-DENORM-LABEL: name: test_4xdouble_add_mul_contract
+ ; GFX9-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23
+ ; GFX9-DENORM-NEXT: {{ $}}
+ ; GFX9-DENORM-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; GFX9-DENORM-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; GFX9-DENORM-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
+ ; GFX9-DENORM-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
+ ; GFX9-DENORM-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4
+ ; GFX9-DENORM-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5
+ ; GFX9-DENORM-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6
+ ; GFX9-DENORM-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7
+ ; GFX9-DENORM-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32)
+ ; GFX9-DENORM-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32)
+ ; GFX9-DENORM-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32)
+ ; GFX9-DENORM-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY6]](s32), [[COPY7]](s32)
+ ; GFX9-DENORM-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[MV3]](s64)
+ ; GFX9-DENORM-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8
+ ; GFX9-DENORM-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9
+ ; GFX9-DENORM-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10
+ ; GFX9-DENORM-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11
+ ; GFX9-DENORM-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12
+ ; GFX9-DENORM-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13
+ ; GFX9-DENORM-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14
+ ; GFX9-DENORM-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15
+ ; GFX9-DENORM-NEXT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY8]](s32), [[COPY9]](s32)
+ ; GFX9-DENORM-NEXT: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY10]](s32), [[COPY11]](s32)
+ ; GFX9-DENORM-NEXT: [[MV6:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY12]](s32), [[COPY13]](s32)
+ ; GFX9-DENORM-NEXT: [[MV7:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY14]](s32), [[COPY15]](s32)
+ ; GFX9-DENORM-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV4]](s64), [[MV5]](s64), [[MV6]](s64), [[MV7]](s64)
+ ; GFX9-DENORM-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16
+ ; GFX9-DENORM-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17
+ ; GFX9-DENORM-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr18
+ ; GFX9-DENORM-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr19
+ ; GFX9-DENORM-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr20
+ ; GFX9-DENORM-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr21
+ ; GFX9-DENORM-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr22
+ ; GFX9-DENORM-NEXT: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr23
+ ; GFX9-DENORM-NEXT: [[MV8:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY16]](s32), [[COPY17]](s32)
+ ; GFX9-DENORM-NEXT: [[MV9:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY18]](s32), [[COPY19]](s32)
+ ; GFX9-DENORM-NEXT: [[MV10:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY20]](s32), [[COPY21]](s32)
+ ; GFX9-DENORM-NEXT: [[MV11:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY22]](s32), [[COPY23]](s32)
+ ; GFX9-DENORM-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV8]](s64), [[MV9]](s64), [[MV10]](s64), [[MV11]](s64)
+ ; GFX9-DENORM-NEXT: [[FMA:%[0-9]+]]:_(<4 x s64>) = G_FMA [[BUILD_VECTOR]], [[BUILD_VECTOR1]], [[BUILD_VECTOR2]]
+ ; GFX9-DENORM-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMA]](<4 x s64>)
+ ; GFX9-DENORM-NEXT: $vgpr0 = COPY [[UV]](s32)
+ ; GFX9-DENORM-NEXT: $vgpr1 = COPY [[UV1]](s32)
+ ; GFX9-DENORM-NEXT: $vgpr2 = COPY [[UV2]](s32)
+ ; GFX9-DENORM-NEXT: $vgpr3 = COPY [[UV3]](s32)
+ ; GFX9-DENORM-NEXT: $vgpr4 = COPY [[UV4]](s32)
+ ; GFX9-DENORM-NEXT: $vgpr5 = COPY [[UV5]](s32)
+ ; GFX9-DENORM-NEXT: $vgpr6 = COPY [[UV6]](s32)
+ ; GFX9-DENORM-NEXT: $vgpr7 = COPY [[UV7]](s32)
+ ; GFX9-DENORM-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7
+ ;
+ ; GFX10-LABEL: name: test_4xdouble_add_mul_contract
+ ; GFX10: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23
+ ; GFX10-NEXT: {{ $}}
+ ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
+ ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
+ ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4
+ ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5
+ ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6
+ ; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7
+ ; GFX10-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32)
+ ; GFX10-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32)
+ ; GFX10-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32)
+ ; GFX10-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY6]](s32), [[COPY7]](s32)
+ ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[MV3]](s64)
+ ; GFX10-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8
+ ; GFX10-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9
+ ; GFX10-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10
+ ; GFX10-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11
+ ; GFX10-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12
+ ; GFX10-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13
+ ; GFX10-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14
+ ; GFX10-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15
+ ; GFX10-NEXT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY8]](s32), [[COPY9]](s32)
+ ; GFX10-NEXT: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY10]](s32), [[COPY11]](s32)
+ ; GFX10-NEXT: [[MV6:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY12]](s32), [[COPY13]](s32)
+ ; GFX10-NEXT: [[MV7:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY14]](s32), [[COPY15]](s32)
+ ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV4]](s64), [[MV5]](s64), [[MV6]](s64), [[MV7]](s64)
+ ; GFX10-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16
+ ; GFX10-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17
+ ; GFX10-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr18
+ ; GFX10-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr19
+ ; GFX10-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr20
+ ; GFX10-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr21
+ ; GFX10-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr22
+ ; GFX10-NEXT: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr23
+ ; GFX10-NEXT: [[MV8:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY16]](s32), [[COPY17]](s32)
+ ; GFX10-NEXT: [[MV9:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY18]](s32), [[COPY19]](s32)
+ ; GFX10-NEXT: [[MV10:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY20]](s32), [[COPY21]](s32)
+ ; GFX10-NEXT: [[MV11:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY22]](s32), [[COPY23]](s32)
+ ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV8]](s64), [[MV9]](s64), [[MV10]](s64), [[MV11]](s64)
+ ; GFX10-NEXT: [[FMA:%[0-9]+]]:_(<4 x s64>) = G_FMA [[BUILD_VECTOR]], [[BUILD_VECTOR1]], [[BUILD_VECTOR2]]
+ ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMA]](<4 x s64>)
+ ; GFX10-NEXT: $vgpr0 = COPY [[UV]](s32)
+ ; GFX10-NEXT: $vgpr1 = COPY [[UV1]](s32)
+ ; GFX10-NEXT: $vgpr2 = COPY [[UV2]](s32)
+ ; GFX10-NEXT: $vgpr3 = COPY [[UV3]](s32)
+ ; GFX10-NEXT: $vgpr4 = COPY [[UV4]](s32)
+ ; GFX10-NEXT: $vgpr5 = COPY [[UV5]](s32)
+ ; GFX10-NEXT: $vgpr6 = COPY [[UV6]](s32)
+ ; GFX10-NEXT: $vgpr7 = COPY [[UV7]](s32)
+ ; GFX10-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7
+ ;
+ ; GFX10-DENORM-LABEL: name: test_4xdouble_add_mul_contract
+ ; GFX10-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23
+ ; GFX10-DENORM-NEXT: {{ $}}
+ ; GFX10-DENORM-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; GFX10-DENORM-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; GFX10-DENORM-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
+ ; GFX10-DENORM-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
+ ; GFX10-DENORM-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4
+ ; GFX10-DENORM-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5
+ ; GFX10-DENORM-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6
+ ; GFX10-DENORM-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7
+ ; GFX10-DENORM-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32)
+ ; GFX10-DENORM-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32)
+ ; GFX10-DENORM-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32)
+ ; GFX10-DENORM-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY6]](s32), [[COPY7]](s32)
+ ; GFX10-DENORM-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[MV3]](s64)
+ ; GFX10-DENORM-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8
+ ; GFX10-DENORM-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9
+ ; GFX10-DENORM-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10
+ ; GFX10-DENORM-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11
+ ; GFX10-DENORM-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12
+ ; GFX10-DENORM-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13
+ ; GFX10-DENORM-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14
+ ; GFX10-DENORM-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15
+ ; GFX10-DENORM-NEXT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY8]](s32), [[COPY9]](s32)
+ ; GFX10-DENORM-NEXT: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY10]](s32), [[COPY11]](s32)
+ ; GFX10-DENORM-NEXT: [[MV6:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY12]](s32), [[COPY13]](s32)
+ ; GFX10-DENORM-NEXT: [[MV7:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY14]](s32), [[COPY15]](s32)
+ ; GFX10-DENORM-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV4]](s64), [[MV5]](s64), [[MV6]](s64), [[MV7]](s64)
+ ; GFX10-DENORM-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16
+ ; GFX10-DENORM-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17
+ ; GFX10-DENORM-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr18
+ ; GFX10-DENORM-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr19
+ ; GFX10-DENORM-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr20
+ ; GFX10-DENORM-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr21
+ ; GFX10-DENORM-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr22
+ ; GFX10-DENORM-NEXT: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr23
+ ; GFX10-DENORM-NEXT: [[MV8:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY16]](s32), [[COPY17]](s32)
+ ; GFX10-DENORM-NEXT: [[MV9:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY18]](s32), [[COPY19]](s32)
+ ; GFX10-DENORM-NEXT: [[MV10:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY20]](s32), [[COPY21]](s32)
+ ; GFX10-DENORM-NEXT: [[MV11:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY22]](s32), [[COPY23]](s32)
+ ; GFX10-DENORM-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV8]](s64), [[MV9]](s64), [[MV10]](s64), [[MV11]](s64)
+ ; GFX10-DENORM-NEXT: [[FMA:%[0-9]+]]:_(<4 x s64>) = G_FMA [[BUILD_VECTOR]], [[BUILD_VECTOR1]], [[BUILD_VECTOR2]]
+ ; GFX10-DENORM-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMA]](<4 x s64>)
+ ; GFX10-DENORM-NEXT: $vgpr0 = COPY [[UV]](s32)
+ ; GFX10-DENORM-NEXT: $vgpr1 = COPY [[UV1]](s32)
+ ; GFX10-DENORM-NEXT: $vgpr2 = COPY [[UV2]](s32)
+ ; GFX10-DENORM-NEXT: $vgpr3 = COPY [[UV3]](s32)
+ ; GFX10-DENORM-NEXT: $vgpr4 = COPY [[UV4]](s32)
+ ; GFX10-DENORM-NEXT: $vgpr5 = COPY [[UV5]](s32)
+ ; GFX10-DENORM-NEXT: $vgpr6 = COPY [[UV6]](s32)
+ ; GFX10-DENORM-NEXT: $vgpr7 = COPY [[UV7]](s32)
+ ; GFX10-DENORM-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7
+ %4:_(s32) = COPY $vgpr0
+ %5:_(s32) = COPY $vgpr1
+ %6:_(s32) = COPY $vgpr2
+ %7:_(s32) = COPY $vgpr3
+ %8:_(s32) = COPY $vgpr4
+ %9:_(s32) = COPY $vgpr5
+ %10:_(s32) = COPY $vgpr6
+ %11:_(s32) = COPY $vgpr7
+ %28:_(s64) = G_MERGE_VALUES %4(s32), %5(s32)
+ %29:_(s64) = G_MERGE_VALUES %6(s32), %7(s32)
+ %30:_(s64) = G_MERGE_VALUES %8(s32), %9(s32)
+ %31:_(s64) = G_MERGE_VALUES %10(s32), %11(s32)
+ %0:_(<4 x s64>) = G_BUILD_VECTOR %28(s64), %29(s64), %30(s64), %31(s64)
+ %12:_(s32) = COPY $vgpr8
+ %13:_(s32) = COPY $vgpr9
+ %14:_(s32) = COPY $vgpr10
+ %15:_(s32) = COPY $vgpr11
+ %16:_(s32) = COPY $vgpr12
+ %17:_(s32) = COPY $vgpr13
+ %18:_(s32) = COPY $vgpr14
+ %19:_(s32) = COPY $vgpr15
+ %32:_(s64) = G_MERGE_VALUES %12(s32), %13(s32)
+ %33:_(s64) = G_MERGE_VALUES %14(s32), %15(s32)
+ %34:_(s64) = G_MERGE_VALUES %16(s32), %17(s32)
+ %35:_(s64) = G_MERGE_VALUES %18(s32), %19(s32)
+ %1:_(<4 x s64>) = G_BUILD_VECTOR %32(s64), %33(s64), %34(s64), %35(s64)
+ %20:_(s32) = COPY $vgpr16
+ %21:_(s32) = COPY $vgpr17
+ %22:_(s32) = COPY $vgpr18
+ %23:_(s32) = COPY $vgpr19
+ %24:_(s32) = COPY $vgpr20
+ %25:_(s32) = COPY $vgpr21
+ %26:_(s32) = COPY $vgpr22
+ %27:_(s32) = COPY $vgpr23
+ %36:_(s64) = G_MERGE_VALUES %20(s32), %21(s32)
+ %37:_(s64) = G_MERGE_VALUES %22(s32), %23(s32)
+ %38:_(s64) = G_MERGE_VALUES %24(s32), %25(s32)
+ %39:_(s64) = G_MERGE_VALUES %26(s32), %27(s32)
+ %2:_(<4 x s64>) = G_BUILD_VECTOR %36(s64), %37(s64), %38(s64), %39(s64)
+ %40:_(<4 x s64>) = reassoc contract G_FMUL %0, %1
+ %41:_(<4 x s64>) = reassoc contract G_FADD %40, %2
+ %43:_(s32), %44:_(s32), %45:_(s32), %46:_(s32), %47:_(s32), %48:_(s32), %49:_(s32), %50:_(s32) = G_UNMERGE_VALUES %41(<4 x s64>)
+ $vgpr0 = COPY %43(s32)
+ $vgpr1 = COPY %44(s32)
+ $vgpr2 = COPY %45(s32)
+ $vgpr3 = COPY %46(s32)
+ $vgpr4 = COPY %47(s32)
+ $vgpr5 = COPY %48(s32)
+ $vgpr6 = COPY %49(s32)
+ $vgpr7 = COPY %50(s32)
+ S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7
+...
+
+---
name: test_3xdouble_add_mul_rhs
body: |
bb.1.entry:
@@ -2198,49 +2475,6 @@ body: |
; GFX9-NEXT: $vgpr5 = COPY [[UV5]](s32)
; GFX9-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5
;
- ; GFX9-CONTRACT-LABEL: name: test_3xdouble_add_mul_rhs
- ; GFX9-CONTRACT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17
- ; GFX9-CONTRACT-NEXT: {{ $}}
- ; GFX9-CONTRACT-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
- ; GFX9-CONTRACT-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
- ; GFX9-CONTRACT-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
- ; GFX9-CONTRACT-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
- ; GFX9-CONTRACT-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4
- ; GFX9-CONTRACT-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5
- ; GFX9-CONTRACT-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32)
- ; GFX9-CONTRACT-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32)
- ; GFX9-CONTRACT-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32)
- ; GFX9-CONTRACT-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64)
- ; GFX9-CONTRACT-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6
- ; GFX9-CONTRACT-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7
- ; GFX9-CONTRACT-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8
- ; GFX9-CONTRACT-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9
- ; GFX9-CONTRACT-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10
- ; GFX9-CONTRACT-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11
- ; GFX9-CONTRACT-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY6]](s32), [[COPY7]](s32)
- ; GFX9-CONTRACT-NEXT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY8]](s32), [[COPY9]](s32)
- ; GFX9-CONTRACT-NEXT: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY10]](s32), [[COPY11]](s32)
- ; GFX9-CONTRACT-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV3]](s64), [[MV4]](s64), [[MV5]](s64)
- ; GFX9-CONTRACT-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12
- ; GFX9-CONTRACT-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13
- ; GFX9-CONTRACT-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14
- ; GFX9-CONTRACT-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15
- ; GFX9-CONTRACT-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16
- ; GFX9-CONTRACT-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17
- ; GFX9-CONTRACT-NEXT: [[MV6:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY12]](s32), [[COPY13]](s32)
- ; GFX9-CONTRACT-NEXT: [[MV7:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY14]](s32), [[COPY15]](s32)
- ; GFX9-CONTRACT-NEXT: [[MV8:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY16]](s32), [[COPY17]](s32)
- ; GFX9-CONTRACT-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV6]](s64), [[MV7]](s64), [[MV8]](s64)
- ; GFX9-CONTRACT-NEXT: [[FMA:%[0-9]+]]:_(<3 x s64>) = G_FMA [[BUILD_VECTOR]], [[BUILD_VECTOR1]], [[BUILD_VECTOR2]]
- ; GFX9-CONTRACT-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMA]](<3 x s64>)
- ; GFX9-CONTRACT-NEXT: $vgpr0 = COPY [[UV]](s32)
- ; GFX9-CONTRACT-NEXT: $vgpr1 = COPY [[UV1]](s32)
- ; GFX9-CONTRACT-NEXT: $vgpr2 = COPY [[UV2]](s32)
- ; GFX9-CONTRACT-NEXT: $vgpr3 = COPY [[UV3]](s32)
- ; GFX9-CONTRACT-NEXT: $vgpr4 = COPY [[UV4]](s32)
- ; GFX9-CONTRACT-NEXT: $vgpr5 = COPY [[UV5]](s32)
- ; GFX9-CONTRACT-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5
- ;
; GFX9-DENORM-LABEL: name: test_3xdouble_add_mul_rhs
; GFX9-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17
; GFX9-DENORM-NEXT: {{ $}}
@@ -2285,49 +2519,6 @@ body: |
; GFX9-DENORM-NEXT: $vgpr5 = COPY [[UV5]](s32)
; GFX9-DENORM-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5
;
- ; GFX9-UNSAFE-LABEL: name: test_3xdouble_add_mul_rhs
- ; GFX9-UNSAFE: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17
- ; GFX9-UNSAFE-NEXT: {{ $}}
- ; GFX9-UNSAFE-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
- ; GFX9-UNSAFE-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
- ; GFX9-UNSAFE-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
- ; GFX9-UNSAFE-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
- ; GFX9-UNSAFE-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4
- ; GFX9-UNSAFE-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5
- ; GFX9-UNSAFE-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32)
- ; GFX9-UNSAFE-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32)
- ; GFX9-UNSAFE-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32)
- ; GFX9-UNSAFE-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64)
- ; GFX9-UNSAFE-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6
- ; GFX9-UNSAFE-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7
- ; GFX9-UNSAFE-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8
- ; GFX9-UNSAFE-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9
- ; GFX9-UNSAFE-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10
- ; GFX9-UNSAFE-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11
- ; GFX9-UNSAFE-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY6]](s32), [[COPY7]](s32)
- ; GFX9-UNSAFE-NEXT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY8]](s32), [[COPY9]](s32)
- ; GFX9-UNSAFE-NEXT: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY10]](s32), [[COPY11]](s32)
- ; GFX9-UNSAFE-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV3]](s64), [[MV4]](s64), [[MV5]](s64)
- ; GFX9-UNSAFE-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12
- ; GFX9-UNSAFE-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13
- ; GFX9-UNSAFE-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14
- ; GFX9-UNSAFE-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15
- ; GFX9-UNSAFE-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16
- ; GFX9-UNSAFE-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17
- ; GFX9-UNSAFE-NEXT: [[MV6:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY12]](s32), [[COPY13]](s32)
- ; GFX9-UNSAFE-NEXT: [[MV7:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY14]](s32), [[COPY15]](s32)
- ; GFX9-UNSAFE-NEXT: [[MV8:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY16]](s32), [[COPY17]](s32)
- ; GFX9-UNSAFE-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV6]](s64), [[MV7]](s64), [[MV8]](s64)
- ; GFX9-UNSAFE-NEXT: [[FMA:%[0-9]+]]:_(<3 x s64>) = G_FMA [[BUILD_VECTOR]], [[BUILD_VECTOR1]], [[BUILD_VECTOR2]]
- ; GFX9-UNSAFE-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMA]](<3 x s64>)
- ; GFX9-UNSAFE-NEXT: $vgpr0 = COPY [[UV]](s32)
- ; GFX9-UNSAFE-NEXT: $vgpr1 = COPY [[UV1]](s32)
- ; GFX9-UNSAFE-NEXT: $vgpr2 = COPY [[UV2]](s32)
- ; GFX9-UNSAFE-NEXT: $vgpr3 = COPY [[UV3]](s32)
- ; GFX9-UNSAFE-NEXT: $vgpr4 = COPY [[UV4]](s32)
- ; GFX9-UNSAFE-NEXT: $vgpr5 = COPY [[UV5]](s32)
- ; GFX9-UNSAFE-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5
- ;
; GFX10-LABEL: name: test_3xdouble_add_mul_rhs
; GFX10: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17
; GFX10-NEXT: {{ $}}
@@ -2372,49 +2563,6 @@ body: |
; GFX10-NEXT: $vgpr5 = COPY [[UV5]](s32)
; GFX10-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5
;
- ; GFX10-CONTRACT-LABEL: name: test_3xdouble_add_mul_rhs
- ; GFX10-CONTRACT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17
- ; GFX10-CONTRACT-NEXT: {{ $}}
- ; GFX10-CONTRACT-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
- ; GFX10-CONTRACT-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
- ; GFX10-CONTRACT-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
- ; GFX10-CONTRACT-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
- ; GFX10-CONTRACT-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4
- ; GFX10-CONTRACT-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5
- ; GFX10-CONTRACT-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32)
- ; GFX10-CONTRACT-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32)
- ; GFX10-CONTRACT-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32)
- ; GFX10-CONTRACT-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64)
- ; GFX10-CONTRACT-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6
- ; GFX10-CONTRACT-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7
- ; GFX10-CONTRACT-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8
- ; GFX10-CONTRACT-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9
- ; GFX10-CONTRACT-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10
- ; GFX10-CONTRACT-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11
- ; GFX10-CONTRACT-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY6]](s32), [[COPY7]](s32)
- ; GFX10-CONTRACT-NEXT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY8]](s32), [[COPY9]](s32)
- ; GFX10-CONTRACT-NEXT: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY10]](s32), [[COPY11]](s32)
- ; GFX10-CONTRACT-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV3]](s64), [[MV4]](s64), [[MV5]](s64)
- ; GFX10-CONTRACT-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12
- ; GFX10-CONTRACT-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13
- ; GFX10-CONTRACT-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14
- ; GFX10-CONTRACT-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15
- ; GFX10-CONTRACT-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16
- ; GFX10-CONTRACT-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17
- ; GFX10-CONTRACT-NEXT: [[MV6:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY12]](s32), [[COPY13]](s32)
- ; GFX10-CONTRACT-NEXT: [[MV7:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY14]](s32), [[COPY15]](s32)
- ; GFX10-CONTRACT-NEXT: [[MV8:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY16]](s32), [[COPY17]](s32)
- ; GFX10-CONTRACT-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV6]](s64), [[MV7]](s64), [[MV8]](s64)
- ; GFX10-CONTRACT-NEXT: [[FMA:%[0-9]+]]:_(<3 x s64>) = G_FMA [[BUILD_VECTOR]], [[BUILD_VECTOR1]], [[BUILD_VECTOR2]]
- ; GFX10-CONTRACT-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMA]](<3 x s64>)
- ; GFX10-CONTRACT-NEXT: $vgpr0 = COPY [[UV]](s32)
- ; GFX10-CONTRACT-NEXT: $vgpr1 = COPY [[UV1]](s32)
- ; GFX10-CONTRACT-NEXT: $vgpr2 = COPY [[UV2]](s32)
- ; GFX10-CONTRACT-NEXT: $vgpr3 = COPY [[UV3]](s32)
- ; GFX10-CONTRACT-NEXT: $vgpr4 = COPY [[UV4]](s32)
- ; GFX10-CONTRACT-NEXT: $vgpr5 = COPY [[UV5]](s32)
- ; GFX10-CONTRACT-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5
- ;
; GFX10-DENORM-LABEL: name: test_3xdouble_add_mul_rhs
; GFX10-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17
; GFX10-DENORM-NEXT: {{ $}}
@@ -2458,49 +2606,6 @@ body: |
; GFX10-DENORM-NEXT: $vgpr4 = COPY [[UV4]](s32)
; GFX10-DENORM-NEXT: $vgpr5 = COPY [[UV5]](s32)
; GFX10-DENORM-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5
- ;
- ; GFX10-UNSAFE-LABEL: name: test_3xdouble_add_mul_rhs
- ; GFX10-UNSAFE: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17
- ; GFX10-UNSAFE-NEXT: {{ $}}
- ; GFX10-UNSAFE-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
- ; GFX10-UNSAFE-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
- ; GFX10-UNSAFE-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
- ; GFX10-UNSAFE-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
- ; GFX10-UNSAFE-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4
- ; GFX10-UNSAFE-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5
- ; GFX10-UNSAFE-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32)
- ; GFX10-UNSAFE-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32)
- ; GFX10-UNSAFE-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32)
- ; GFX10-UNSAFE-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64)
- ; GFX10-UNSAFE-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6
- ; GFX10-UNSAFE-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7
- ; GFX10-UNSAFE-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8
- ; GFX10-UNSAFE-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9
- ; GFX10-UNSAFE-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10
- ; GFX10-UNSAFE-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11
- ; GFX10-UNSAFE-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY6]](s32), [[COPY7]](s32)
- ; GFX10-UNSAFE-NEXT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY8]](s32), [[COPY9]](s32)
- ; GFX10-UNSAFE-NEXT: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY10]](s32), [[COPY11]](s32)
- ; GFX10-UNSAFE-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV3]](s64), [[MV4]](s64), [[MV5]](s64)
- ; GFX10-UNSAFE-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12
- ; GFX10-UNSAFE-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13
- ; GFX10-UNSAFE-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14
- ; GFX10-UNSAFE-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15
- ; GFX10-UNSAFE-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16
- ; GFX10-UNSAFE-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17
- ; GFX10-UNSAFE-NEXT: [[MV6:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY12]](s32), [[COPY13]](s32)
- ; GFX10-UNSAFE-NEXT: [[MV7:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY14]](s32), [[COPY15]](s32)
- ; GFX10-UNSAFE-NEXT: [[MV8:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY16]](s32), [[COPY17]](s32)
- ; GFX10-UNSAFE-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV6]](s64), [[MV7]](s64), [[MV8]](s64)
- ; GFX10-UNSAFE-NEXT: [[FMA:%[0-9]+]]:_(<3 x s64>) = G_FMA [[BUILD_VECTOR]], [[BUILD_VECTOR1]], [[BUILD_VECTOR2]]
- ; GFX10-UNSAFE-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMA]](<3 x s64>)
- ; GFX10-UNSAFE-NEXT: $vgpr0 = COPY [[UV]](s32)
- ; GFX10-UNSAFE-NEXT: $vgpr1 = COPY [[UV1]](s32)
- ; GFX10-UNSAFE-NEXT: $vgpr2 = COPY [[UV2]](s32)
- ; GFX10-UNSAFE-NEXT: $vgpr3 = COPY [[UV3]](s32)
- ; GFX10-UNSAFE-NEXT: $vgpr4 = COPY [[UV4]](s32)
- ; GFX10-UNSAFE-NEXT: $vgpr5 = COPY [[UV5]](s32)
- ; GFX10-UNSAFE-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5
%4:_(s32) = COPY $vgpr0
%5:_(s32) = COPY $vgpr1
%6:_(s32) = COPY $vgpr2
@@ -2542,3 +2647,222 @@ body: |
$vgpr5 = COPY %39(s32)
S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5
...
+
+---
+name: test_3xdouble_add_mul_rhs_contract
+body: |
+ bb.1.entry:
+ liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17
+
+ ; GFX9-LABEL: name: test_3xdouble_add_mul_rhs_contract
+ ; GFX9: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17
+ ; GFX9-NEXT: {{ $}}
+ ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
+ ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
+ ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4
+ ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5
+ ; GFX9-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32)
+ ; GFX9-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32)
+ ; GFX9-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32)
+ ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64)
+ ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6
+ ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7
+ ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8
+ ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9
+ ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10
+ ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11
+ ; GFX9-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY6]](s32), [[COPY7]](s32)
+ ; GFX9-NEXT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY8]](s32), [[COPY9]](s32)
+ ; GFX9-NEXT: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY10]](s32), [[COPY11]](s32)
+ ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV3]](s64), [[MV4]](s64), [[MV5]](s64)
+ ; GFX9-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12
+ ; GFX9-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13
+ ; GFX9-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14
+ ; GFX9-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15
+ ; GFX9-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16
+ ; GFX9-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17
+ ; GFX9-NEXT: [[MV6:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY12]](s32), [[COPY13]](s32)
+ ; GFX9-NEXT: [[MV7:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY14]](s32), [[COPY15]](s32)
+ ; GFX9-NEXT: [[MV8:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY16]](s32), [[COPY17]](s32)
+ ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV6]](s64), [[MV7]](s64), [[MV8]](s64)
+ ; GFX9-NEXT: [[FMA:%[0-9]+]]:_(<3 x s64>) = G_FMA [[BUILD_VECTOR]], [[BUILD_VECTOR1]], [[BUILD_VECTOR2]]
+ ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMA]](<3 x s64>)
+ ; GFX9-NEXT: $vgpr0 = COPY [[UV]](s32)
+ ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](s32)
+ ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](s32)
+ ; GFX9-NEXT: $vgpr3 = COPY [[UV3]](s32)
+ ; GFX9-NEXT: $vgpr4 = COPY [[UV4]](s32)
+ ; GFX9-NEXT: $vgpr5 = COPY [[UV5]](s32)
+ ; GFX9-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5
+ ;
+ ; GFX9-DENORM-LABEL: name: test_3xdouble_add_mul_rhs_contract
+ ; GFX9-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17
+ ; GFX9-DENORM-NEXT: {{ $}}
+ ; GFX9-DENORM-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; GFX9-DENORM-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; GFX9-DENORM-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
+ ; GFX9-DENORM-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
+ ; GFX9-DENORM-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4
+ ; GFX9-DENORM-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5
+ ; GFX9-DENORM-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32)
+ ; GFX9-DENORM-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32)
+ ; GFX9-DENORM-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32)
+ ; GFX9-DENORM-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64)
+ ; GFX9-DENORM-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6
+ ; GFX9-DENORM-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7
+ ; GFX9-DENORM-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8
+ ; GFX9-DENORM-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9
+ ; GFX9-DENORM-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10
+ ; GFX9-DENORM-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11
+ ; GFX9-DENORM-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY6]](s32), [[COPY7]](s32)
+ ; GFX9-DENORM-NEXT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY8]](s32), [[COPY9]](s32)
+ ; GFX9-DENORM-NEXT: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY10]](s32), [[COPY11]](s32)
+ ; GFX9-DENORM-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV3]](s64), [[MV4]](s64), [[MV5]](s64)
+ ; GFX9-DENORM-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12
+ ; GFX9-DENORM-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13
+ ; GFX9-DENORM-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14
+ ; GFX9-DENORM-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15
+ ; GFX9-DENORM-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16
+ ; GFX9-DENORM-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17
+ ; GFX9-DENORM-NEXT: [[MV6:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY12]](s32), [[COPY13]](s32)
+ ; GFX9-DENORM-NEXT: [[MV7:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY14]](s32), [[COPY15]](s32)
+ ; GFX9-DENORM-NEXT: [[MV8:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY16]](s32), [[COPY17]](s32)
+ ; GFX9-DENORM-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV6]](s64), [[MV7]](s64), [[MV8]](s64)
+ ; GFX9-DENORM-NEXT: [[FMA:%[0-9]+]]:_(<3 x s64>) = G_FMA [[BUILD_VECTOR]], [[BUILD_VECTOR1]], [[BUILD_VECTOR2]]
+ ; GFX9-DENORM-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMA]](<3 x s64>)
+ ; GFX9-DENORM-NEXT: $vgpr0 = COPY [[UV]](s32)
+ ; GFX9-DENORM-NEXT: $vgpr1 = COPY [[UV1]](s32)
+ ; GFX9-DENORM-NEXT: $vgpr2 = COPY [[UV2]](s32)
+ ; GFX9-DENORM-NEXT: $vgpr3 = COPY [[UV3]](s32)
+ ; GFX9-DENORM-NEXT: $vgpr4 = COPY [[UV4]](s32)
+ ; GFX9-DENORM-NEXT: $vgpr5 = COPY [[UV5]](s32)
+ ; GFX9-DENORM-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5
+ ;
+ ; GFX10-LABEL: name: test_3xdouble_add_mul_rhs_contract
+ ; GFX10: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17
+ ; GFX10-NEXT: {{ $}}
+ ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
+ ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
+ ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4
+ ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5
+ ; GFX10-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32)
+ ; GFX10-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32)
+ ; GFX10-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32)
+ ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64)
+ ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6
+ ; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7
+ ; GFX10-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8
+ ; GFX10-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9
+ ; GFX10-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10
+ ; GFX10-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11
+ ; GFX10-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY6]](s32), [[COPY7]](s32)
+ ; GFX10-NEXT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY8]](s32), [[COPY9]](s32)
+ ; GFX10-NEXT: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY10]](s32), [[COPY11]](s32)
+ ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV3]](s64), [[MV4]](s64), [[MV5]](s64)
+ ; GFX10-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12
+ ; GFX10-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13
+ ; GFX10-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14
+ ; GFX10-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15
+ ; GFX10-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16
+ ; GFX10-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17
+ ; GFX10-NEXT: [[MV6:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY12]](s32), [[COPY13]](s32)
+ ; GFX10-NEXT: [[MV7:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY14]](s32), [[COPY15]](s32)
+ ; GFX10-NEXT: [[MV8:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY16]](s32), [[COPY17]](s32)
+ ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV6]](s64), [[MV7]](s64), [[MV8]](s64)
+ ; GFX10-NEXT: [[FMA:%[0-9]+]]:_(<3 x s64>) = G_FMA [[BUILD_VECTOR]], [[BUILD_VECTOR1]], [[BUILD_VECTOR2]]
+ ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMA]](<3 x s64>)
+ ; GFX10-NEXT: $vgpr0 = COPY [[UV]](s32)
+ ; GFX10-NEXT: $vgpr1 = COPY [[UV1]](s32)
+ ; GFX10-NEXT: $vgpr2 = COPY [[UV2]](s32)
+ ; GFX10-NEXT: $vgpr3 = COPY [[UV3]](s32)
+ ; GFX10-NEXT: $vgpr4 = COPY [[UV4]](s32)
+ ; GFX10-NEXT: $vgpr5 = COPY [[UV5]](s32)
+ ; GFX10-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5
+ ;
+ ; GFX10-DENORM-LABEL: name: test_3xdouble_add_mul_rhs_contract
+ ; GFX10-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17
+ ; GFX10-DENORM-NEXT: {{ $}}
+ ; GFX10-DENORM-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+ ; GFX10-DENORM-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+ ; GFX10-DENORM-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2
+ ; GFX10-DENORM-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3
+ ; GFX10-DENORM-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4
+ ; GFX10-DENORM-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5
+ ; GFX10-DENORM-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32)
+ ; GFX10-DENORM-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32)
+ ; GFX10-DENORM-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32)
+ ; GFX10-DENORM-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64)
+ ; GFX10-DENORM-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6
+ ; GFX10-DENORM-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7
+ ; GFX10-DENORM-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8
+ ; GFX10-DENORM-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9
+ ; GFX10-DENORM-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10
+ ; GFX10-DENORM-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11
+ ; GFX10-DENORM-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY6]](s32), [[COPY7]](s32)
+ ; GFX10-DENORM-NEXT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY8]](s32), [[COPY9]](s32)
+ ; GFX10-DENORM-NEXT: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY10]](s32), [[COPY11]](s32)
+ ; GFX10-DENORM-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV3]](s64), [[MV4]](s64), [[MV5]](s64)
+ ; GFX10-DENORM-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12
+ ; GFX10-DENORM-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13
+ ; GFX10-DENORM-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14
+ ; GFX10-DENORM-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15
+ ; GFX10-DENORM-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16
+ ; GFX10-DENORM-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17
+ ; GFX10-DENORM-NEXT: [[MV6:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY12]](s32), [[COPY13]](s32)
+ ; GFX10-DENORM-NEXT: [[MV7:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY14]](s32), [[COPY15]](s32)
+ ; GFX10-DENORM-NEXT: [[MV8:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY16]](s32), [[COPY17]](s32)
+ ; GFX10-DENORM-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV6]](s64), [[MV7]](s64), [[MV8]](s64)
+ ; GFX10-DENORM-NEXT: [[FMA:%[0-9]+]]:_(<3 x s64>) = G_FMA [[BUILD_VECTOR]], [[BUILD_VECTOR1]], [[BUILD_VECTOR2]]
+ ; GFX10-DENORM-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMA]](<3 x s64>)
+ ; GFX10-DENORM-NEXT: $vgpr0 = COPY [[UV]](s32)
+ ; GFX10-DENORM-NEXT: $vgpr1 = COPY [[UV1]](s32)
+ ; GFX10-DENORM-NEXT: $vgpr2 = COPY [[UV2]](s32)
+ ; GFX10-DENORM-NEXT: $vgpr3 = COPY [[UV3]](s32)
+ ; GFX10-DENORM-NEXT: $vgpr4 = COPY [[UV4]](s32)
+ ; GFX10-DENORM-NEXT: $vgpr5 = COPY [[UV5]](s32)
+ ; GFX10-DENORM-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5
+ %4:_(s32) = COPY $vgpr0
+ %5:_(s32) = COPY $vgpr1
+ %6:_(s32) = COPY $vgpr2
+ %7:_(s32) = COPY $vgpr3
+ %8:_(s32) = COPY $vgpr4
+ %9:_(s32) = COPY $vgpr5
+ %22:_(s64) = G_MERGE_VALUES %4(s32), %5(s32)
+ %23:_(s64) = G_MERGE_VALUES %6(s32), %7(s32)
+ %24:_(s64) = G_MERGE_VALUES %8(s32), %9(s32)
+ %0:_(<3 x s64>) = G_BUILD_VECTOR %22(s64), %23(s64), %24(s64)
+ %10:_(s32) = COPY $vgpr6
+ %11:_(s32) = COPY $vgpr7
+ %12:_(s32) = COPY $vgpr8
+ %13:_(s32) = COPY $vgpr9
+ %14:_(s32) = COPY $vgpr10
+ %15:_(s32) = COPY $vgpr11
+ %25:_(s64) = G_MERGE_VALUES %10(s32), %11(s32)
+ %26:_(s64) = G_MERGE_VALUES %12(s32), %13(s32)
+ %27:_(s64) = G_MERGE_VALUES %14(s32), %15(s32)
+ %1:_(<3 x s64>) = G_BUILD_VECTOR %25(s64), %26(s64), %27(s64)
+ %16:_(s32) = COPY $vgpr12
+ %17:_(s32) = COPY $vgpr13
+ %18:_(s32) = COPY $vgpr14
+ %19:_(s32) = COPY $vgpr15
+ %20:_(s32) = COPY $vgpr16
+ %21:_(s32) = COPY $vgpr17
+ %28:_(s64) = G_MERGE_VALUES %16(s32), %17(s32)
+ %29:_(s64) = G_MERGE_VALUES %18(s32), %19(s32)
+ %30:_(s64) = G_MERGE_VALUES %20(s32), %21(s32)
+ %2:_(<3 x s64>) = G_BUILD_VECTOR %28(s64), %29(s64), %30(s64)
+ %31:_(<3 x s64>) = reassoc contract G_FMUL %0, %1
+ %32:_(<3 x s64>) = reassoc contract G_FADD %2, %31
+ %34:_(s32), %35:_(s32), %36:_(s32), %37:_(s32), %38:_(s32), %39:_(s32) = G_UNMERGE_VALUES %32(<3 x s64>)
+ $vgpr0 = COPY %34(s32)
+ $vgpr1 = COPY %35(s32)
+ $vgpr2 = COPY %36(s32)
+ $vgpr3 = COPY %37(s32)
+ $vgpr4 = COPY %38(s32)
+ $vgpr5 = COPY %39(s32)
+ S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5
+...
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fma-add-mul.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fma-add-mul.ll
index 24dd535..3f6e3d8 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fma-add-mul.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fma-add-mul.ll
@@ -2,11 +2,9 @@
; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefix=GFX9 %s
; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx900 -fp-contract=fast < %s | FileCheck -check-prefix=GFX9-CONTRACT %s
; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx900 --denormal-fp-math=preserve-sign < %s | FileCheck -check-prefix=GFX9-DENORM %s
-; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx900 -enable-unsafe-fp-math < %s | FileCheck -check-prefix=GFX9-UNSAFE %s
; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1010 < %s | FileCheck -check-prefix=GFX10 %s
; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1010 -fp-contract=fast < %s | FileCheck -check-prefix=GFX10-CONTRACT %s
; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1010 --denormal-fp-math=preserve-sign < %s | FileCheck -check-prefix=GFX10-DENORM %s
-; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1010 -enable-unsafe-fp-math < %s | FileCheck -check-prefix=GFX10-UNSAFE %s
define float @test_f32_add_mul(float %x, float %y, float %z) {
; GFX9-LABEL: test_f32_add_mul:
@@ -28,12 +26,6 @@ define float @test_f32_add_mul(float %x, float %y, float %z) {
; GFX9-DENORM-NEXT: v_mad_f32 v0, v0, v1, v2
; GFX9-DENORM-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-UNSAFE-LABEL: test_f32_add_mul:
-; GFX9-UNSAFE: ; %bb.0: ; %.entry
-; GFX9-UNSAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-UNSAFE-NEXT: v_fma_f32 v0, v0, v1, v2
-; GFX9-UNSAFE-NEXT: s_setpc_b64 s[30:31]
-;
; GFX10-LABEL: test_f32_add_mul:
; GFX10: ; %bb.0: ; %.entry
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -52,7 +44,6 @@ define float @test_f32_add_mul(float %x, float %y, float %z) {
; GFX10-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-DENORM-NEXT: v_mad_f32 v0, v0, v1, v2
; GFX10-DENORM-NEXT: s_setpc_b64 s[30:31]
-;
; GFX10-UNSAFE-LABEL: test_f32_add_mul:
; GFX10-UNSAFE: ; %bb.0: ; %.entry
; GFX10-UNSAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -64,6 +55,58 @@ define float @test_f32_add_mul(float %x, float %y, float %z) {
ret float %b
}
+define float @test_f32_add_mul_contract(float %x, float %y, float %z) {
+; GFX9-LABEL: test_f32_add_mul_contract:
+; GFX9: ; %bb.0: ; %.entry
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: v_fma_f32 v0, v0, v1, v2
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-CONTRACT-LABEL: test_f32_add_mul_contract:
+; GFX9-CONTRACT: ; %bb.0: ; %.entry
+; GFX9-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-CONTRACT-NEXT: v_fma_f32 v0, v0, v1, v2
+; GFX9-CONTRACT-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-DENORM-LABEL: test_f32_add_mul_contract:
+; GFX9-DENORM: ; %bb.0: ; %.entry
+; GFX9-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-DENORM-NEXT: v_mad_f32 v0, v0, v1, v2
+; GFX9-DENORM-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-LABEL: test_f32_add_mul_contract:
+; GFX10: ; %bb.0: ; %.entry
+; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT: v_fma_f32 v0, v0, v1, v2
+; GFX10-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-CONTRACT-LABEL: test_f32_add_mul_contract:
+; GFX10-CONTRACT: ; %bb.0: ; %.entry
+; GFX10-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-CONTRACT-NEXT: v_fma_f32 v0, v0, v1, v2
+; GFX10-CONTRACT-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-DENORM-LABEL: test_f32_add_mul_contract:
+; GFX10-DENORM: ; %bb.0: ; %.entry
+; GFX10-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-DENORM-NEXT: v_fma_f32 v0, v0, v1, v2
+; GFX10-DENORM-NEXT: s_setpc_b64 s[30:31]
+; GFX9-UNSAFE-LABEL: test_f32_add_mul_contract:
+; GFX9-UNSAFE: ; %bb.0: ; %.entry
+; GFX9-UNSAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-UNSAFE-NEXT: v_fma_f32 v0, v0, v1, v2
+; GFX9-UNSAFE-NEXT: s_setpc_b64 s[30:31]
+; GFX10-UNSAFE-LABEL: test_f32_add_mul_contract:
+; GFX10-UNSAFE: ; %bb.0: ; %.entry
+; GFX10-UNSAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-UNSAFE-NEXT: v_fma_f32 v0, v0, v1, v2
+; GFX10-UNSAFE-NEXT: s_setpc_b64 s[30:31]
+.entry:
+ %a = fmul contract float %x, %y
+ %b = fadd contract float %a, %z
+ ret float %b
+}
+
define float @test_f32_add_mul_rhs(float %x, float %y, float %z) {
; GFX9-LABEL: test_f32_add_mul_rhs:
; GFX9: ; %bb.0: ; %.entry
@@ -84,12 +127,6 @@ define float @test_f32_add_mul_rhs(float %x, float %y, float %z) {
; GFX9-DENORM-NEXT: v_mad_f32 v0, v0, v1, v2
; GFX9-DENORM-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-UNSAFE-LABEL: test_f32_add_mul_rhs:
-; GFX9-UNSAFE: ; %bb.0: ; %.entry
-; GFX9-UNSAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-UNSAFE-NEXT: v_fma_f32 v0, v0, v1, v2
-; GFX9-UNSAFE-NEXT: s_setpc_b64 s[30:31]
-;
; GFX10-LABEL: test_f32_add_mul_rhs:
; GFX10: ; %bb.0: ; %.entry
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -108,7 +145,6 @@ define float @test_f32_add_mul_rhs(float %x, float %y, float %z) {
; GFX10-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-DENORM-NEXT: v_mad_f32 v0, v0, v1, v2
; GFX10-DENORM-NEXT: s_setpc_b64 s[30:31]
-;
; GFX10-UNSAFE-LABEL: test_f32_add_mul_rhs:
; GFX10-UNSAFE: ; %bb.0: ; %.entry
; GFX10-UNSAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -120,6 +156,58 @@ define float @test_f32_add_mul_rhs(float %x, float %y, float %z) {
ret float %b
}
+define float @test_f32_add_mul_rhs_contract(float %x, float %y, float %z) {
+; GFX9-LABEL: test_f32_add_mul_rhs_contract:
+; GFX9: ; %bb.0: ; %.entry
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: v_fma_f32 v0, v0, v1, v2
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-CONTRACT-LABEL: test_f32_add_mul_rhs_contract:
+; GFX9-CONTRACT: ; %bb.0: ; %.entry
+; GFX9-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-CONTRACT-NEXT: v_fma_f32 v0, v0, v1, v2
+; GFX9-CONTRACT-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-DENORM-LABEL: test_f32_add_mul_rhs_contract:
+; GFX9-DENORM: ; %bb.0: ; %.entry
+; GFX9-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-DENORM-NEXT: v_mad_f32 v0, v0, v1, v2
+; GFX9-DENORM-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-LABEL: test_f32_add_mul_rhs_contract:
+; GFX10: ; %bb.0: ; %.entry
+; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT: v_fma_f32 v0, v0, v1, v2
+; GFX10-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-CONTRACT-LABEL: test_f32_add_mul_rhs_contract:
+; GFX10-CONTRACT: ; %bb.0: ; %.entry
+; GFX10-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-CONTRACT-NEXT: v_fma_f32 v0, v0, v1, v2
+; GFX10-CONTRACT-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-DENORM-LABEL: test_f32_add_mul_rhs_contract:
+; GFX10-DENORM: ; %bb.0: ; %.entry
+; GFX10-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-DENORM-NEXT: v_fma_f32 v0, v0, v1, v2
+; GFX10-DENORM-NEXT: s_setpc_b64 s[30:31]
+; GFX9-UNSAFE-LABEL: test_f32_add_mul_rhs_contract:
+; GFX9-UNSAFE: ; %bb.0: ; %.entry
+; GFX9-UNSAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-UNSAFE-NEXT: v_fma_f32 v0, v0, v1, v2
+; GFX9-UNSAFE-NEXT: s_setpc_b64 s[30:31]
+; GFX10-UNSAFE-LABEL: test_f32_add_mul_rhs_contract:
+; GFX10-UNSAFE: ; %bb.0: ; %.entry
+; GFX10-UNSAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-UNSAFE-NEXT: v_fma_f32 v0, v0, v1, v2
+; GFX10-UNSAFE-NEXT: s_setpc_b64 s[30:31]
+.entry:
+ %a = fmul contract float %x, %y
+ %b = fadd contract float %z, %a
+ ret float %b
+}
+
define float @test_add_mul_multiple_defs_z(float %x, float %y, ptr addrspace(1) %vec_ptr) {
; GFX9-LABEL: test_add_mul_multiple_defs_z:
; GFX9: ; %bb.0: ; %.entry
@@ -147,14 +235,6 @@ define float @test_add_mul_multiple_defs_z(float %x, float %y, ptr addrspace(1)
; GFX9-DENORM-NEXT: v_mov_b32_e32 v0, v2
; GFX9-DENORM-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-UNSAFE-LABEL: test_add_mul_multiple_defs_z:
-; GFX9-UNSAFE: ; %bb.0: ; %.entry
-; GFX9-UNSAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-UNSAFE-NEXT: global_load_dword v2, v[2:3], off offset:4
-; GFX9-UNSAFE-NEXT: s_waitcnt vmcnt(0)
-; GFX9-UNSAFE-NEXT: v_fma_f32 v0, v0, v1, v2
-; GFX9-UNSAFE-NEXT: s_setpc_b64 s[30:31]
-;
; GFX10-LABEL: test_add_mul_multiple_defs_z:
; GFX10: ; %bb.0: ; %.entry
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -181,7 +261,6 @@ define float @test_add_mul_multiple_defs_z(float %x, float %y, ptr addrspace(1)
; GFX10-DENORM-NEXT: v_mac_f32_e32 v2, v0, v1
; GFX10-DENORM-NEXT: v_mov_b32_e32 v0, v2
; GFX10-DENORM-NEXT: s_setpc_b64 s[30:31]
-;
; GFX10-UNSAFE-LABEL: test_add_mul_multiple_defs_z:
; GFX10-UNSAFE: ; %bb.0: ; %.entry
; GFX10-UNSAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -198,17 +277,16 @@ define float @test_add_mul_multiple_defs_z(float %x, float %y, ptr addrspace(1)
ret float %b
}
-define float @test_add_mul_rhs_multiple_defs_z(float %x, float %y, ptr addrspace(1) %vec_ptr) {
-; GFX9-LABEL: test_add_mul_rhs_multiple_defs_z:
+define float @test_add_mul_multiple_defs_z_contract(float %x, float %y, ptr addrspace(1) %vec_ptr) {
+; GFX9-LABEL: test_add_mul_multiple_defs_z_contract:
; GFX9: ; %bb.0: ; %.entry
; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-NEXT: global_load_dword v2, v[2:3], off offset:4
-; GFX9-NEXT: v_mul_f32_e32 v0, v0, v1
; GFX9-NEXT: s_waitcnt vmcnt(0)
-; GFX9-NEXT: v_add_f32_e32 v0, v2, v0
+; GFX9-NEXT: v_fma_f32 v0, v0, v1, v2
; GFX9-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-CONTRACT-LABEL: test_add_mul_rhs_multiple_defs_z:
+; GFX9-CONTRACT-LABEL: test_add_mul_multiple_defs_z_contract:
; GFX9-CONTRACT: ; %bb.0: ; %.entry
; GFX9-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-CONTRACT-NEXT: global_load_dword v2, v[2:3], off offset:4
@@ -216,7 +294,7 @@ define float @test_add_mul_rhs_multiple_defs_z(float %x, float %y, ptr addrspace
; GFX9-CONTRACT-NEXT: v_fma_f32 v0, v0, v1, v2
; GFX9-CONTRACT-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-DENORM-LABEL: test_add_mul_rhs_multiple_defs_z:
+; GFX9-DENORM-LABEL: test_add_mul_multiple_defs_z_contract:
; GFX9-DENORM: ; %bb.0: ; %.entry
; GFX9-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-DENORM-NEXT: global_load_dword v2, v[2:3], off offset:4
@@ -225,13 +303,81 @@ define float @test_add_mul_rhs_multiple_defs_z(float %x, float %y, ptr addrspace
; GFX9-DENORM-NEXT: v_mov_b32_e32 v0, v2
; GFX9-DENORM-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-UNSAFE-LABEL: test_add_mul_rhs_multiple_defs_z:
+; GFX10-LABEL: test_add_mul_multiple_defs_z_contract:
+; GFX10: ; %bb.0: ; %.entry
+; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT: global_load_dword v2, v[2:3], off offset:4
+; GFX10-NEXT: s_waitcnt vmcnt(0)
+; GFX10-NEXT: v_fmac_f32_e32 v2, v0, v1
+; GFX10-NEXT: v_mov_b32_e32 v0, v2
+; GFX10-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-CONTRACT-LABEL: test_add_mul_multiple_defs_z_contract:
+; GFX10-CONTRACT: ; %bb.0: ; %.entry
+; GFX10-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-CONTRACT-NEXT: global_load_dword v2, v[2:3], off offset:4
+; GFX10-CONTRACT-NEXT: s_waitcnt vmcnt(0)
+; GFX10-CONTRACT-NEXT: v_fmac_f32_e32 v2, v0, v1
+; GFX10-CONTRACT-NEXT: v_mov_b32_e32 v0, v2
+; GFX10-CONTRACT-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-DENORM-LABEL: test_add_mul_multiple_defs_z_contract:
+; GFX10-DENORM: ; %bb.0: ; %.entry
+; GFX10-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-DENORM-NEXT: global_load_dword v2, v[2:3], off offset:4
+; GFX10-DENORM-NEXT: s_waitcnt vmcnt(0)
+; GFX10-DENORM-NEXT: v_fmac_f32_e32 v2, v0, v1
+; GFX10-DENORM-NEXT: v_mov_b32_e32 v0, v2
+; GFX10-DENORM-NEXT: s_setpc_b64 s[30:31]
+; GFX9-UNSAFE-LABEL: test_add_mul_multiple_defs_z_contract:
; GFX9-UNSAFE: ; %bb.0: ; %.entry
; GFX9-UNSAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-UNSAFE-NEXT: global_load_dword v2, v[2:3], off offset:4
; GFX9-UNSAFE-NEXT: s_waitcnt vmcnt(0)
; GFX9-UNSAFE-NEXT: v_fma_f32 v0, v0, v1, v2
; GFX9-UNSAFE-NEXT: s_setpc_b64 s[30:31]
+; GFX10-UNSAFE-LABEL: test_add_mul_multiple_defs_z_contract:
+; GFX10-UNSAFE: ; %bb.0: ; %.entry
+; GFX10-UNSAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-UNSAFE-NEXT: global_load_dword v2, v[2:3], off offset:4
+; GFX10-UNSAFE-NEXT: s_waitcnt vmcnt(0)
+; GFX10-UNSAFE-NEXT: v_fmac_f32_e32 v2, v0, v1
+; GFX10-UNSAFE-NEXT: v_mov_b32_e32 v0, v2
+; GFX10-UNSAFE-NEXT: s_setpc_b64 s[30:31]
+.entry:
+ %a = fmul contract float %x, %y
+ %vec = load <2 x float>, ptr addrspace(1) %vec_ptr
+ %z = extractelement <2 x float> %vec, i64 1
+ %b = fadd contract float %a, %z
+ ret float %b
+}
+
+define float @test_add_mul_rhs_multiple_defs_z(float %x, float %y, ptr addrspace(1) %vec_ptr) {
+; GFX9-LABEL: test_add_mul_rhs_multiple_defs_z:
+; GFX9: ; %bb.0: ; %.entry
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: global_load_dword v2, v[2:3], off offset:4
+; GFX9-NEXT: v_mul_f32_e32 v0, v0, v1
+; GFX9-NEXT: s_waitcnt vmcnt(0)
+; GFX9-NEXT: v_add_f32_e32 v0, v2, v0
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-CONTRACT-LABEL: test_add_mul_rhs_multiple_defs_z:
+; GFX9-CONTRACT: ; %bb.0: ; %.entry
+; GFX9-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-CONTRACT-NEXT: global_load_dword v2, v[2:3], off offset:4
+; GFX9-CONTRACT-NEXT: s_waitcnt vmcnt(0)
+; GFX9-CONTRACT-NEXT: v_fma_f32 v0, v0, v1, v2
+; GFX9-CONTRACT-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-DENORM-LABEL: test_add_mul_rhs_multiple_defs_z:
+; GFX9-DENORM: ; %bb.0: ; %.entry
+; GFX9-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-DENORM-NEXT: global_load_dword v2, v[2:3], off offset:4
+; GFX9-DENORM-NEXT: s_waitcnt vmcnt(0)
+; GFX9-DENORM-NEXT: v_mac_f32_e32 v2, v0, v1
+; GFX9-DENORM-NEXT: v_mov_b32_e32 v0, v2
+; GFX9-DENORM-NEXT: s_setpc_b64 s[30:31]
;
; GFX10-LABEL: test_add_mul_rhs_multiple_defs_z:
; GFX10: ; %bb.0: ; %.entry
@@ -259,7 +405,6 @@ define float @test_add_mul_rhs_multiple_defs_z(float %x, float %y, ptr addrspace
; GFX10-DENORM-NEXT: v_mac_f32_e32 v2, v0, v1
; GFX10-DENORM-NEXT: v_mov_b32_e32 v0, v2
; GFX10-DENORM-NEXT: s_setpc_b64 s[30:31]
-;
; GFX10-UNSAFE-LABEL: test_add_mul_rhs_multiple_defs_z:
; GFX10-UNSAFE: ; %bb.0: ; %.entry
; GFX10-UNSAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -296,12 +441,6 @@ define half @test_half_add_mul(half %x, half %y, half %z) {
; GFX9-DENORM-NEXT: v_mad_legacy_f16 v0, v0, v1, v2
; GFX9-DENORM-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-UNSAFE-LABEL: test_half_add_mul:
-; GFX9-UNSAFE: ; %bb.0: ; %.entry
-; GFX9-UNSAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-UNSAFE-NEXT: v_fma_f16 v0, v0, v1, v2
-; GFX9-UNSAFE-NEXT: s_setpc_b64 s[30:31]
-;
; GFX10-LABEL: test_half_add_mul:
; GFX10: ; %bb.0: ; %.entry
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -321,7 +460,6 @@ define half @test_half_add_mul(half %x, half %y, half %z) {
; GFX10-DENORM-NEXT: v_mul_f16_e32 v0, v0, v1
; GFX10-DENORM-NEXT: v_add_f16_e32 v0, v0, v2
; GFX10-DENORM-NEXT: s_setpc_b64 s[30:31]
-;
; GFX10-UNSAFE-LABEL: test_half_add_mul:
; GFX10-UNSAFE: ; %bb.0: ; %.entry
; GFX10-UNSAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -333,6 +471,59 @@ define half @test_half_add_mul(half %x, half %y, half %z) {
ret half %b
}
+define half @test_half_add_mul_contract(half %x, half %y, half %z) {
+; GFX9-LABEL: test_half_add_mul_contract:
+; GFX9: ; %bb.0: ; %.entry
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: v_fma_f16 v0, v0, v1, v2
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-CONTRACT-LABEL: test_half_add_mul_contract:
+; GFX9-CONTRACT: ; %bb.0: ; %.entry
+; GFX9-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-CONTRACT-NEXT: v_fma_f16 v0, v0, v1, v2
+; GFX9-CONTRACT-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-DENORM-LABEL: test_half_add_mul_contract:
+; GFX9-DENORM: ; %bb.0: ; %.entry
+; GFX9-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-DENORM-NEXT: v_mad_legacy_f16 v0, v0, v1, v2
+; GFX9-DENORM-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-LABEL: test_half_add_mul_contract:
+; GFX10: ; %bb.0: ; %.entry
+; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT: v_fma_f16 v0, v0, v1, v2
+; GFX10-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-CONTRACT-LABEL: test_half_add_mul_contract:
+; GFX10-CONTRACT: ; %bb.0: ; %.entry
+; GFX10-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-CONTRACT-NEXT: v_fma_f16 v0, v0, v1, v2
+; GFX10-CONTRACT-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-DENORM-LABEL: test_half_add_mul_contract:
+; GFX10-DENORM: ; %bb.0: ; %.entry
+; GFX10-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-DENORM-NEXT: v_mul_f16_e32 v0, v0, v1
+; GFX10-DENORM-NEXT: v_add_f16_e32 v0, v0, v2
+; GFX10-DENORM-NEXT: s_setpc_b64 s[30:31]
+; GFX9-UNSAFE-LABEL: test_half_add_mul_contract:
+; GFX9-UNSAFE: ; %bb.0: ; %.entry
+; GFX9-UNSAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-UNSAFE-NEXT: v_fma_f16 v0, v0, v1, v2
+; GFX9-UNSAFE-NEXT: s_setpc_b64 s[30:31]
+; GFX10-UNSAFE-LABEL: test_half_add_mul_contract:
+; GFX10-UNSAFE: ; %bb.0: ; %.entry
+; GFX10-UNSAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-UNSAFE-NEXT: v_fma_f16 v0, v0, v1, v2
+; GFX10-UNSAFE-NEXT: s_setpc_b64 s[30:31]
+.entry:
+ %a = fmul contract half %x, %y
+ %b = fadd contract half %a, %z
+ ret half %b
+}
+
define half @test_half_add_mul_rhs(half %x, half %y, half %z) {
; GFX9-LABEL: test_half_add_mul_rhs:
; GFX9: ; %bb.0: ; %.entry
@@ -353,12 +544,6 @@ define half @test_half_add_mul_rhs(half %x, half %y, half %z) {
; GFX9-DENORM-NEXT: v_mad_legacy_f16 v0, v0, v1, v2
; GFX9-DENORM-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-UNSAFE-LABEL: test_half_add_mul_rhs:
-; GFX9-UNSAFE: ; %bb.0: ; %.entry
-; GFX9-UNSAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-UNSAFE-NEXT: v_fma_f16 v0, v0, v1, v2
-; GFX9-UNSAFE-NEXT: s_setpc_b64 s[30:31]
-;
; GFX10-LABEL: test_half_add_mul_rhs:
; GFX10: ; %bb.0: ; %.entry
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -378,7 +563,6 @@ define half @test_half_add_mul_rhs(half %x, half %y, half %z) {
; GFX10-DENORM-NEXT: v_mul_f16_e32 v0, v0, v1
; GFX10-DENORM-NEXT: v_add_f16_e32 v0, v2, v0
; GFX10-DENORM-NEXT: s_setpc_b64 s[30:31]
-;
; GFX10-UNSAFE-LABEL: test_half_add_mul_rhs:
; GFX10-UNSAFE: ; %bb.0: ; %.entry
; GFX10-UNSAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -390,6 +574,59 @@ define half @test_half_add_mul_rhs(half %x, half %y, half %z) {
ret half %b
}
+define half @test_half_add_mul_rhs_contract(half %x, half %y, half %z) {
+; GFX9-LABEL: test_half_add_mul_rhs_contract:
+; GFX9: ; %bb.0: ; %.entry
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: v_fma_f16 v0, v0, v1, v2
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-CONTRACT-LABEL: test_half_add_mul_rhs_contract:
+; GFX9-CONTRACT: ; %bb.0: ; %.entry
+; GFX9-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-CONTRACT-NEXT: v_fma_f16 v0, v0, v1, v2
+; GFX9-CONTRACT-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-DENORM-LABEL: test_half_add_mul_rhs_contract:
+; GFX9-DENORM: ; %bb.0: ; %.entry
+; GFX9-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-DENORM-NEXT: v_mad_legacy_f16 v0, v0, v1, v2
+; GFX9-DENORM-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-LABEL: test_half_add_mul_rhs_contract:
+; GFX10: ; %bb.0: ; %.entry
+; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT: v_fma_f16 v0, v0, v1, v2
+; GFX10-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-CONTRACT-LABEL: test_half_add_mul_rhs_contract:
+; GFX10-CONTRACT: ; %bb.0: ; %.entry
+; GFX10-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-CONTRACT-NEXT: v_fma_f16 v0, v0, v1, v2
+; GFX10-CONTRACT-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-DENORM-LABEL: test_half_add_mul_rhs_contract:
+; GFX10-DENORM: ; %bb.0: ; %.entry
+; GFX10-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-DENORM-NEXT: v_mul_f16_e32 v0, v0, v1
+; GFX10-DENORM-NEXT: v_add_f16_e32 v0, v2, v0
+; GFX10-DENORM-NEXT: s_setpc_b64 s[30:31]
+; GFX9-UNSAFE-LABEL: test_half_add_mul_rhs_contract:
+; GFX9-UNSAFE: ; %bb.0: ; %.entry
+; GFX9-UNSAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-UNSAFE-NEXT: v_fma_f16 v0, v0, v1, v2
+; GFX9-UNSAFE-NEXT: s_setpc_b64 s[30:31]
+; GFX10-UNSAFE-LABEL: test_half_add_mul_rhs_contract:
+; GFX10-UNSAFE: ; %bb.0: ; %.entry
+; GFX10-UNSAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-UNSAFE-NEXT: v_fma_f16 v0, v0, v1, v2
+; GFX10-UNSAFE-NEXT: s_setpc_b64 s[30:31]
+.entry:
+ %a = fmul contract half %x, %y
+ %b = fadd contract half %z, %a
+ ret half %b
+}
+
define double @test_double_add_mul(double %x, double %y, double %z) {
; GFX9-LABEL: test_double_add_mul:
; GFX9: ; %bb.0: ; %.entry
@@ -411,12 +648,6 @@ define double @test_double_add_mul(double %x, double %y, double %z) {
; GFX9-DENORM-NEXT: v_add_f64 v[0:1], v[0:1], v[4:5]
; GFX9-DENORM-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-UNSAFE-LABEL: test_double_add_mul:
-; GFX9-UNSAFE: ; %bb.0: ; %.entry
-; GFX9-UNSAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-UNSAFE-NEXT: v_fma_f64 v[0:1], v[0:1], v[2:3], v[4:5]
-; GFX9-UNSAFE-NEXT: s_setpc_b64 s[30:31]
-;
; GFX10-LABEL: test_double_add_mul:
; GFX10: ; %bb.0: ; %.entry
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -436,15 +667,61 @@ define double @test_double_add_mul(double %x, double %y, double %z) {
; GFX10-DENORM-NEXT: v_mul_f64 v[0:1], v[0:1], v[2:3]
; GFX10-DENORM-NEXT: v_add_f64 v[0:1], v[0:1], v[4:5]
; GFX10-DENORM-NEXT: s_setpc_b64 s[30:31]
+.entry:
+ %a = fmul double %x, %y
+ %b = fadd double %a, %z
+ ret double %b
+}
+
+define double @test_double_add_mul_contract(double %x, double %y, double %z) {
+; GFX9-LABEL: test_double_add_mul_contract:
+; GFX9: ; %bb.0: ; %.entry
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: v_fma_f64 v[0:1], v[0:1], v[2:3], v[4:5]
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-CONTRACT-LABEL: test_double_add_mul_contract:
+; GFX9-CONTRACT: ; %bb.0: ; %.entry
+; GFX9-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-CONTRACT-NEXT: v_fma_f64 v[0:1], v[0:1], v[2:3], v[4:5]
+; GFX9-CONTRACT-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-DENORM-LABEL: test_double_add_mul_contract:
+; GFX9-DENORM: ; %bb.0: ; %.entry
+; GFX9-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-DENORM-NEXT: v_fma_f64 v[0:1], v[0:1], v[2:3], v[4:5]
+; GFX9-DENORM-NEXT: s_setpc_b64 s[30:31]
;
+; GFX10-LABEL: test_double_add_mul_contract:
+; GFX10: ; %bb.0: ; %.entry
+; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT: v_fma_f64 v[0:1], v[0:1], v[2:3], v[4:5]
+; GFX10-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-CONTRACT-LABEL: test_double_add_mul_contract:
+; GFX10-CONTRACT: ; %bb.0: ; %.entry
+; GFX10-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-CONTRACT-NEXT: v_fma_f64 v[0:1], v[0:1], v[2:3], v[4:5]
+; GFX10-CONTRACT-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-DENORM-LABEL: test_double_add_mul_contract:
+; GFX10-DENORM: ; %bb.0: ; %.entry
+; GFX10-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-DENORM-NEXT: v_fma_f64 v[0:1], v[0:1], v[2:3], v[4:5]
+; GFX10-DENORM-NEXT: s_setpc_b64 s[30:31]
+; GFX9-UNSAFE-LABEL: test_double_add_mul_contract:
+; GFX9-UNSAFE: ; %bb.0: ; %.entry
+; GFX9-UNSAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-UNSAFE-NEXT: v_fma_f64 v[0:1], v[0:1], v[2:3], v[4:5]
+; GFX9-UNSAFE-NEXT: s_setpc_b64 s[30:31]
; GFX10-UNSAFE-LABEL: test_double_add_mul:
; GFX10-UNSAFE: ; %bb.0: ; %.entry
; GFX10-UNSAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-UNSAFE-NEXT: v_fma_f64 v[0:1], v[0:1], v[2:3], v[4:5]
; GFX10-UNSAFE-NEXT: s_setpc_b64 s[30:31]
.entry:
- %a = fmul double %x, %y
- %b = fadd double %a, %z
+ %a = fmul contract double %x, %y
+ %b = fadd contract double %a, %z
ret double %b
}
@@ -469,12 +746,6 @@ define double @test_double_add_mul_rhs(double %x, double %y, double %z) {
; GFX9-DENORM-NEXT: v_add_f64 v[0:1], v[4:5], v[0:1]
; GFX9-DENORM-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-UNSAFE-LABEL: test_double_add_mul_rhs:
-; GFX9-UNSAFE: ; %bb.0: ; %.entry
-; GFX9-UNSAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-UNSAFE-NEXT: v_fma_f64 v[0:1], v[0:1], v[2:3], v[4:5]
-; GFX9-UNSAFE-NEXT: s_setpc_b64 s[30:31]
-;
; GFX10-LABEL: test_double_add_mul_rhs:
; GFX10: ; %bb.0: ; %.entry
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -494,15 +765,61 @@ define double @test_double_add_mul_rhs(double %x, double %y, double %z) {
; GFX10-DENORM-NEXT: v_mul_f64 v[0:1], v[0:1], v[2:3]
; GFX10-DENORM-NEXT: v_add_f64 v[0:1], v[4:5], v[0:1]
; GFX10-DENORM-NEXT: s_setpc_b64 s[30:31]
+.entry:
+ %a = fmul double %x, %y
+ %b = fadd double %z, %a
+ ret double %b
+}
+
+define double @test_double_add_mul_rhs_contract(double %x, double %y, double %z) {
+; GFX9-LABEL: test_double_add_mul_rhs_contract:
+; GFX9: ; %bb.0: ; %.entry
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: v_fma_f64 v[0:1], v[0:1], v[2:3], v[4:5]
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-CONTRACT-LABEL: test_double_add_mul_rhs_contract:
+; GFX9-CONTRACT: ; %bb.0: ; %.entry
+; GFX9-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-CONTRACT-NEXT: v_fma_f64 v[0:1], v[0:1], v[2:3], v[4:5]
+; GFX9-CONTRACT-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-DENORM-LABEL: test_double_add_mul_rhs_contract:
+; GFX9-DENORM: ; %bb.0: ; %.entry
+; GFX9-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-DENORM-NEXT: v_fma_f64 v[0:1], v[0:1], v[2:3], v[4:5]
+; GFX9-DENORM-NEXT: s_setpc_b64 s[30:31]
;
-; GFX10-UNSAFE-LABEL: test_double_add_mul_rhs:
+; GFX10-LABEL: test_double_add_mul_rhs_contract:
+; GFX10: ; %bb.0: ; %.entry
+; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT: v_fma_f64 v[0:1], v[0:1], v[2:3], v[4:5]
+; GFX10-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-CONTRACT-LABEL: test_double_add_mul_rhs_contract:
+; GFX10-CONTRACT: ; %bb.0: ; %.entry
+; GFX10-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-CONTRACT-NEXT: v_fma_f64 v[0:1], v[0:1], v[2:3], v[4:5]
+; GFX10-CONTRACT-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-DENORM-LABEL: test_double_add_mul_rhs_contract:
+; GFX10-DENORM: ; %bb.0: ; %.entry
+; GFX10-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-DENORM-NEXT: v_fma_f64 v[0:1], v[0:1], v[2:3], v[4:5]
+; GFX10-DENORM-NEXT: s_setpc_b64 s[30:31]
+; GFX9-UNSAFE-LABEL: test_double_add_mul_rhs_contract:
+; GFX9-UNSAFE: ; %bb.0: ; %.entry
+; GFX9-UNSAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-UNSAFE-NEXT: v_fma_f64 v[0:1], v[0:1], v[2:3], v[4:5]
+; GFX9-UNSAFE-NEXT: s_setpc_b64 s[30:31]
+; GFX10-UNSAFE-LABEL: test_double_add_mul_rhs_contract:
; GFX10-UNSAFE: ; %bb.0: ; %.entry
; GFX10-UNSAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-UNSAFE-NEXT: v_fma_f64 v[0:1], v[0:1], v[2:3], v[4:5]
; GFX10-UNSAFE-NEXT: s_setpc_b64 s[30:31]
.entry:
- %a = fmul double %x, %y
- %b = fadd double %z, %a
+ %a = fmul contract double %x, %y
+ %b = fadd contract double %z, %a
ret double %b
}
@@ -538,15 +855,6 @@ define <4 x float> @test_4xfloat_add_mul(<4 x float> %x, <4 x float> %y, <4 x fl
; GFX9-DENORM-NEXT: v_mad_f32 v3, v3, v7, v11
; GFX9-DENORM-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-UNSAFE-LABEL: test_4xfloat_add_mul:
-; GFX9-UNSAFE: ; %bb.0: ; %.entry
-; GFX9-UNSAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-UNSAFE-NEXT: v_fma_f32 v0, v0, v4, v8
-; GFX9-UNSAFE-NEXT: v_fma_f32 v1, v1, v5, v9
-; GFX9-UNSAFE-NEXT: v_fma_f32 v2, v2, v6, v10
-; GFX9-UNSAFE-NEXT: v_fma_f32 v3, v3, v7, v11
-; GFX9-UNSAFE-NEXT: s_setpc_b64 s[30:31]
-;
; GFX10-LABEL: test_4xfloat_add_mul:
; GFX10: ; %bb.0: ; %.entry
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -577,8 +885,75 @@ define <4 x float> @test_4xfloat_add_mul(<4 x float> %x, <4 x float> %y, <4 x fl
; GFX10-DENORM-NEXT: v_mad_f32 v2, v2, v6, v10
; GFX10-DENORM-NEXT: v_mad_f32 v3, v3, v7, v11
; GFX10-DENORM-NEXT: s_setpc_b64 s[30:31]
+.entry:
+ %a = fmul <4 x float> %x, %y
+ %b = fadd <4 x float> %a, %z
+ ret <4 x float> %b
+}
+
+define <4 x float> @test_4xfloat_add_mul_contract(<4 x float> %x, <4 x float> %y, <4 x float> %z) {
+; GFX9-LABEL: test_4xfloat_add_mul_contract:
+; GFX9: ; %bb.0: ; %.entry
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: v_fma_f32 v0, v0, v4, v8
+; GFX9-NEXT: v_fma_f32 v1, v1, v5, v9
+; GFX9-NEXT: v_fma_f32 v2, v2, v6, v10
+; GFX9-NEXT: v_fma_f32 v3, v3, v7, v11
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-CONTRACT-LABEL: test_4xfloat_add_mul_contract:
+; GFX9-CONTRACT: ; %bb.0: ; %.entry
+; GFX9-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-CONTRACT-NEXT: v_fma_f32 v0, v0, v4, v8
+; GFX9-CONTRACT-NEXT: v_fma_f32 v1, v1, v5, v9
+; GFX9-CONTRACT-NEXT: v_fma_f32 v2, v2, v6, v10
+; GFX9-CONTRACT-NEXT: v_fma_f32 v3, v3, v7, v11
+; GFX9-CONTRACT-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-DENORM-LABEL: test_4xfloat_add_mul_contract:
+; GFX9-DENORM: ; %bb.0: ; %.entry
+; GFX9-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-DENORM-NEXT: v_mad_f32 v0, v0, v4, v8
+; GFX9-DENORM-NEXT: v_mad_f32 v1, v1, v5, v9
+; GFX9-DENORM-NEXT: v_mad_f32 v2, v2, v6, v10
+; GFX9-DENORM-NEXT: v_mad_f32 v3, v3, v7, v11
+; GFX9-DENORM-NEXT: s_setpc_b64 s[30:31]
;
-; GFX10-UNSAFE-LABEL: test_4xfloat_add_mul:
+; GFX10-LABEL: test_4xfloat_add_mul_contract:
+; GFX10: ; %bb.0: ; %.entry
+; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT: v_fma_f32 v0, v0, v4, v8
+; GFX10-NEXT: v_fma_f32 v1, v1, v5, v9
+; GFX10-NEXT: v_fma_f32 v2, v2, v6, v10
+; GFX10-NEXT: v_fma_f32 v3, v3, v7, v11
+; GFX10-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-CONTRACT-LABEL: test_4xfloat_add_mul_contract:
+; GFX10-CONTRACT: ; %bb.0: ; %.entry
+; GFX10-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-CONTRACT-NEXT: v_fma_f32 v0, v0, v4, v8
+; GFX10-CONTRACT-NEXT: v_fma_f32 v1, v1, v5, v9
+; GFX10-CONTRACT-NEXT: v_fma_f32 v2, v2, v6, v10
+; GFX10-CONTRACT-NEXT: v_fma_f32 v3, v3, v7, v11
+; GFX10-CONTRACT-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-DENORM-LABEL: test_4xfloat_add_mul_contract:
+; GFX10-DENORM: ; %bb.0: ; %.entry
+; GFX10-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-DENORM-NEXT: v_fma_f32 v0, v0, v4, v8
+; GFX10-DENORM-NEXT: v_fma_f32 v1, v1, v5, v9
+; GFX10-DENORM-NEXT: v_fma_f32 v2, v2, v6, v10
+; GFX10-DENORM-NEXT: v_fma_f32 v3, v3, v7, v11
+; GFX10-DENORM-NEXT: s_setpc_b64 s[30:31]
+; GFX9-UNSAFE-LABEL: test_4xfloat_add_mul_contract:
+; GFX9-UNSAFE: ; %bb.0: ; %.entry
+; GFX9-UNSAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-UNSAFE-NEXT: v_fma_f32 v0, v0, v4, v8
+; GFX9-UNSAFE-NEXT: v_fma_f32 v1, v1, v5, v9
+; GFX9-UNSAFE-NEXT: v_fma_f32 v2, v2, v6, v10
+; GFX9-UNSAFE-NEXT: v_fma_f32 v3, v3, v7, v11
+; GFX9-UNSAFE-NEXT: s_setpc_b64 s[30:31]
+; GFX10-UNSAFE-LABEL: test_4xfloat_add_mul_contract:
; GFX10-UNSAFE: ; %bb.0: ; %.entry
; GFX10-UNSAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-UNSAFE-NEXT: v_fma_f32 v0, v0, v4, v8
@@ -587,8 +962,8 @@ define <4 x float> @test_4xfloat_add_mul(<4 x float> %x, <4 x float> %y, <4 x fl
; GFX10-UNSAFE-NEXT: v_fma_f32 v3, v3, v7, v11
; GFX10-UNSAFE-NEXT: s_setpc_b64 s[30:31]
.entry:
- %a = fmul <4 x float> %x, %y
- %b = fadd <4 x float> %a, %z
+ %a = fmul contract <4 x float> %x, %y
+ %b = fadd contract <4 x float> %a, %z
ret <4 x float> %b
}
@@ -620,14 +995,6 @@ define <3 x float> @test_3xfloat_add_mul_rhs(<3 x float> %x, <3 x float> %y, <3
; GFX9-DENORM-NEXT: v_mad_f32 v2, v2, v5, v8
; GFX9-DENORM-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-UNSAFE-LABEL: test_3xfloat_add_mul_rhs:
-; GFX9-UNSAFE: ; %bb.0: ; %.entry
-; GFX9-UNSAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-UNSAFE-NEXT: v_fma_f32 v0, v0, v3, v6
-; GFX9-UNSAFE-NEXT: v_fma_f32 v1, v1, v4, v7
-; GFX9-UNSAFE-NEXT: v_fma_f32 v2, v2, v5, v8
-; GFX9-UNSAFE-NEXT: s_setpc_b64 s[30:31]
-;
; GFX10-LABEL: test_3xfloat_add_mul_rhs:
; GFX10: ; %bb.0: ; %.entry
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -654,8 +1021,68 @@ define <3 x float> @test_3xfloat_add_mul_rhs(<3 x float> %x, <3 x float> %y, <3
; GFX10-DENORM-NEXT: v_mad_f32 v1, v1, v4, v7
; GFX10-DENORM-NEXT: v_mad_f32 v2, v2, v5, v8
; GFX10-DENORM-NEXT: s_setpc_b64 s[30:31]
+.entry:
+ %a = fmul <3 x float> %x, %y
+ %b = fadd <3 x float> %z, %a
+ ret <3 x float> %b
+}
+
+define <3 x float> @test_3xfloat_add_mul_rhs_contract(<3 x float> %x, <3 x float> %y, <3 x float> %z) {
+; GFX9-LABEL: test_3xfloat_add_mul_rhs_contract:
+; GFX9: ; %bb.0: ; %.entry
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: v_fma_f32 v0, v0, v3, v6
+; GFX9-NEXT: v_fma_f32 v1, v1, v4, v7
+; GFX9-NEXT: v_fma_f32 v2, v2, v5, v8
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-CONTRACT-LABEL: test_3xfloat_add_mul_rhs_contract:
+; GFX9-CONTRACT: ; %bb.0: ; %.entry
+; GFX9-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-CONTRACT-NEXT: v_fma_f32 v0, v0, v3, v6
+; GFX9-CONTRACT-NEXT: v_fma_f32 v1, v1, v4, v7
+; GFX9-CONTRACT-NEXT: v_fma_f32 v2, v2, v5, v8
+; GFX9-CONTRACT-NEXT: s_setpc_b64 s[30:31]
;
-; GFX10-UNSAFE-LABEL: test_3xfloat_add_mul_rhs:
+; GFX9-DENORM-LABEL: test_3xfloat_add_mul_rhs_contract:
+; GFX9-DENORM: ; %bb.0: ; %.entry
+; GFX9-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-DENORM-NEXT: v_mad_f32 v0, v0, v3, v6
+; GFX9-DENORM-NEXT: v_mad_f32 v1, v1, v4, v7
+; GFX9-DENORM-NEXT: v_mad_f32 v2, v2, v5, v8
+; GFX9-DENORM-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-LABEL: test_3xfloat_add_mul_rhs_contract:
+; GFX10: ; %bb.0: ; %.entry
+; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT: v_fma_f32 v0, v0, v3, v6
+; GFX10-NEXT: v_fma_f32 v1, v1, v4, v7
+; GFX10-NEXT: v_fma_f32 v2, v2, v5, v8
+; GFX10-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-CONTRACT-LABEL: test_3xfloat_add_mul_rhs_contract:
+; GFX10-CONTRACT: ; %bb.0: ; %.entry
+; GFX10-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-CONTRACT-NEXT: v_fma_f32 v0, v0, v3, v6
+; GFX10-CONTRACT-NEXT: v_fma_f32 v1, v1, v4, v7
+; GFX10-CONTRACT-NEXT: v_fma_f32 v2, v2, v5, v8
+; GFX10-CONTRACT-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-DENORM-LABEL: test_3xfloat_add_mul_rhs_contract:
+; GFX10-DENORM: ; %bb.0: ; %.entry
+; GFX10-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-DENORM-NEXT: v_fma_f32 v0, v0, v3, v6
+; GFX10-DENORM-NEXT: v_fma_f32 v1, v1, v4, v7
+; GFX10-DENORM-NEXT: v_fma_f32 v2, v2, v5, v8
+; GFX10-DENORM-NEXT: s_setpc_b64 s[30:31]
+; GFX9-UNSAFE-LABEL: test_3xfloat_add_mul_rhs_contract:
+; GFX9-UNSAFE: ; %bb.0: ; %.entry
+; GFX9-UNSAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-UNSAFE-NEXT: v_fma_f32 v0, v0, v3, v6
+; GFX9-UNSAFE-NEXT: v_fma_f32 v1, v1, v4, v7
+; GFX9-UNSAFE-NEXT: v_fma_f32 v2, v2, v5, v8
+; GFX9-UNSAFE-NEXT: s_setpc_b64 s[30:31]
+; GFX10-UNSAFE-LABEL: test_3xfloat_add_mul_rhs_contract:
; GFX10-UNSAFE: ; %bb.0: ; %.entry
; GFX10-UNSAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-UNSAFE-NEXT: v_fma_f32 v0, v0, v3, v6
@@ -663,8 +1090,8 @@ define <3 x float> @test_3xfloat_add_mul_rhs(<3 x float> %x, <3 x float> %y, <3
; GFX10-UNSAFE-NEXT: v_fma_f32 v2, v2, v5, v8
; GFX10-UNSAFE-NEXT: s_setpc_b64 s[30:31]
.entry:
- %a = fmul <3 x float> %x, %y
- %b = fadd <3 x float> %z, %a
+ %a = fmul contract <3 x float> %x, %y
+ %b = fadd contract <3 x float> %z, %a
ret <3 x float> %b
}
@@ -694,13 +1121,6 @@ define <4 x half> @test_4xhalf_add_mul(<4 x half> %x, <4 x half> %y, <4 x half>
; GFX9-DENORM-NEXT: v_pk_add_f16 v1, v1, v5
; GFX9-DENORM-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-UNSAFE-LABEL: test_4xhalf_add_mul:
-; GFX9-UNSAFE: ; %bb.0: ; %.entry
-; GFX9-UNSAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-UNSAFE-NEXT: v_pk_fma_f16 v0, v0, v2, v4
-; GFX9-UNSAFE-NEXT: v_pk_fma_f16 v1, v1, v3, v5
-; GFX9-UNSAFE-NEXT: s_setpc_b64 s[30:31]
-;
; GFX10-LABEL: test_4xhalf_add_mul:
; GFX10: ; %bb.0: ; %.entry
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -725,7 +1145,6 @@ define <4 x half> @test_4xhalf_add_mul(<4 x half> %x, <4 x half> %y, <4 x half>
; GFX10-DENORM-NEXT: v_pk_add_f16 v0, v0, v4
; GFX10-DENORM-NEXT: v_pk_add_f16 v1, v1, v5
; GFX10-DENORM-NEXT: s_setpc_b64 s[30:31]
-;
; GFX10-UNSAFE-LABEL: test_4xhalf_add_mul:
; GFX10-UNSAFE: ; %bb.0: ; %.entry
; GFX10-UNSAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -738,6 +1157,70 @@ define <4 x half> @test_4xhalf_add_mul(<4 x half> %x, <4 x half> %y, <4 x half>
ret <4 x half> %b
}
+define <4 x half> @test_4xhalf_add_mul_contract(<4 x half> %x, <4 x half> %y, <4 x half> %z) {
+; GFX9-LABEL: test_4xhalf_add_mul_contract:
+; GFX9: ; %bb.0: ; %.entry
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: v_pk_fma_f16 v0, v0, v2, v4
+; GFX9-NEXT: v_pk_fma_f16 v1, v1, v3, v5
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-CONTRACT-LABEL: test_4xhalf_add_mul_contract:
+; GFX9-CONTRACT: ; %bb.0: ; %.entry
+; GFX9-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-CONTRACT-NEXT: v_pk_fma_f16 v0, v0, v2, v4
+; GFX9-CONTRACT-NEXT: v_pk_fma_f16 v1, v1, v3, v5
+; GFX9-CONTRACT-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-DENORM-LABEL: test_4xhalf_add_mul_contract:
+; GFX9-DENORM: ; %bb.0: ; %.entry
+; GFX9-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-DENORM-NEXT: v_pk_mul_f16 v0, v0, v2
+; GFX9-DENORM-NEXT: v_pk_mul_f16 v1, v1, v3
+; GFX9-DENORM-NEXT: v_pk_add_f16 v0, v0, v4
+; GFX9-DENORM-NEXT: v_pk_add_f16 v1, v1, v5
+; GFX9-DENORM-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-LABEL: test_4xhalf_add_mul_contract:
+; GFX10: ; %bb.0: ; %.entry
+; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT: v_pk_fma_f16 v0, v0, v2, v4
+; GFX10-NEXT: v_pk_fma_f16 v1, v1, v3, v5
+; GFX10-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-CONTRACT-LABEL: test_4xhalf_add_mul_contract:
+; GFX10-CONTRACT: ; %bb.0: ; %.entry
+; GFX10-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-CONTRACT-NEXT: v_pk_fma_f16 v0, v0, v2, v4
+; GFX10-CONTRACT-NEXT: v_pk_fma_f16 v1, v1, v3, v5
+; GFX10-CONTRACT-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-DENORM-LABEL: test_4xhalf_add_mul_contract:
+; GFX10-DENORM: ; %bb.0: ; %.entry
+; GFX10-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-DENORM-NEXT: v_pk_mul_f16 v0, v0, v2
+; GFX10-DENORM-NEXT: v_pk_mul_f16 v1, v1, v3
+; GFX10-DENORM-NEXT: v_pk_add_f16 v0, v0, v4
+; GFX10-DENORM-NEXT: v_pk_add_f16 v1, v1, v5
+; GFX10-DENORM-NEXT: s_setpc_b64 s[30:31]
+; GFX9-UNSAFE-LABEL: test_4xhalf_add_mul_contract:
+; GFX9-UNSAFE: ; %bb.0: ; %.entry
+; GFX9-UNSAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-UNSAFE-NEXT: v_pk_fma_f16 v0, v0, v2, v4
+; GFX9-UNSAFE-NEXT: v_pk_fma_f16 v1, v1, v3, v5
+; GFX9-UNSAFE-NEXT: s_setpc_b64 s[30:31]
+; GFX10-UNSAFE-LABEL: test_4xhalf_add_mul_contract:
+; GFX10-UNSAFE: ; %bb.0: ; %.entry
+; GFX10-UNSAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-UNSAFE-NEXT: v_pk_fma_f16 v0, v0, v2, v4
+; GFX10-UNSAFE-NEXT: v_pk_fma_f16 v1, v1, v3, v5
+; GFX10-UNSAFE-NEXT: s_setpc_b64 s[30:31]
+.entry:
+ %a = fmul contract <4 x half> %x, %y
+ %b = fadd contract <4 x half> %a, %z
+ ret <4 x half> %b
+}
+
define <3 x half> @test_3xhalf_add_mul_rhs(<3 x half> %x, <3 x half> %y, <3 x half> %z) {
; GFX9-LABEL: test_3xhalf_add_mul_rhs:
; GFX9: ; %bb.0: ; %.entry
@@ -764,13 +1247,6 @@ define <3 x half> @test_3xhalf_add_mul_rhs(<3 x half> %x, <3 x half> %y, <3 x ha
; GFX9-DENORM-NEXT: v_pk_add_f16 v1, v5, v1
; GFX9-DENORM-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-UNSAFE-LABEL: test_3xhalf_add_mul_rhs:
-; GFX9-UNSAFE: ; %bb.0: ; %.entry
-; GFX9-UNSAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-UNSAFE-NEXT: v_pk_fma_f16 v0, v0, v2, v4
-; GFX9-UNSAFE-NEXT: v_pk_fma_f16 v1, v1, v3, v5
-; GFX9-UNSAFE-NEXT: s_setpc_b64 s[30:31]
-;
; GFX10-LABEL: test_3xhalf_add_mul_rhs:
; GFX10: ; %bb.0: ; %.entry
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -795,16 +1271,73 @@ define <3 x half> @test_3xhalf_add_mul_rhs(<3 x half> %x, <3 x half> %y, <3 x ha
; GFX10-DENORM-NEXT: v_pk_add_f16 v0, v4, v0
; GFX10-DENORM-NEXT: v_pk_add_f16 v1, v5, v1
; GFX10-DENORM-NEXT: s_setpc_b64 s[30:31]
+.entry:
+ %a = fmul <3 x half> %x, %y
+ %b = fadd <3 x half> %z, %a
+ ret <3 x half> %b
+}
+
+define <3 x half> @test_3xhalf_add_mul_rhs_contract(<3 x half> %x, <3 x half> %y, <3 x half> %z) {
+; GFX9-LABEL: test_3xhalf_add_mul_rhs_contract:
+; GFX9: ; %bb.0: ; %.entry
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: v_pk_fma_f16 v0, v0, v2, v4
+; GFX9-NEXT: v_pk_fma_f16 v1, v1, v3, v5
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-CONTRACT-LABEL: test_3xhalf_add_mul_rhs_contract:
+; GFX9-CONTRACT: ; %bb.0: ; %.entry
+; GFX9-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-CONTRACT-NEXT: v_pk_fma_f16 v0, v0, v2, v4
+; GFX9-CONTRACT-NEXT: v_pk_fma_f16 v1, v1, v3, v5
+; GFX9-CONTRACT-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-DENORM-LABEL: test_3xhalf_add_mul_rhs_contract:
+; GFX9-DENORM: ; %bb.0: ; %.entry
+; GFX9-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-DENORM-NEXT: v_pk_mul_f16 v0, v0, v2
+; GFX9-DENORM-NEXT: v_pk_mul_f16 v1, v1, v3
+; GFX9-DENORM-NEXT: v_pk_add_f16 v0, v4, v0
+; GFX9-DENORM-NEXT: v_pk_add_f16 v1, v5, v1
+; GFX9-DENORM-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-LABEL: test_3xhalf_add_mul_rhs_contract:
+; GFX10: ; %bb.0: ; %.entry
+; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT: v_pk_fma_f16 v0, v0, v2, v4
+; GFX10-NEXT: v_pk_fma_f16 v1, v1, v3, v5
+; GFX10-NEXT: s_setpc_b64 s[30:31]
;
-; GFX10-UNSAFE-LABEL: test_3xhalf_add_mul_rhs:
+; GFX10-CONTRACT-LABEL: test_3xhalf_add_mul_rhs_contract:
+; GFX10-CONTRACT: ; %bb.0: ; %.entry
+; GFX10-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-CONTRACT-NEXT: v_pk_fma_f16 v0, v0, v2, v4
+; GFX10-CONTRACT-NEXT: v_pk_fma_f16 v1, v1, v3, v5
+; GFX10-CONTRACT-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-DENORM-LABEL: test_3xhalf_add_mul_rhs_contract:
+; GFX10-DENORM: ; %bb.0: ; %.entry
+; GFX10-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-DENORM-NEXT: v_pk_mul_f16 v0, v0, v2
+; GFX10-DENORM-NEXT: v_pk_mul_f16 v1, v1, v3
+; GFX10-DENORM-NEXT: v_pk_add_f16 v0, v4, v0
+; GFX10-DENORM-NEXT: v_pk_add_f16 v1, v5, v1
+; GFX10-DENORM-NEXT: s_setpc_b64 s[30:31]
+; GFX9-UNSAFE-LABEL: test_3xhalf_add_mul_rhs_contract:
+; GFX9-UNSAFE: ; %bb.0: ; %.entry
+; GFX9-UNSAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-UNSAFE-NEXT: v_pk_fma_f16 v0, v0, v2, v4
+; GFX9-UNSAFE-NEXT: v_pk_fma_f16 v1, v1, v3, v5
+; GFX9-UNSAFE-NEXT: s_setpc_b64 s[30:31]
+; GFX10-UNSAFE-LABEL: test_3xhalf_add_mul_rhs_contract:
; GFX10-UNSAFE: ; %bb.0: ; %.entry
; GFX10-UNSAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX10-UNSAFE-NEXT: v_pk_fma_f16 v0, v0, v2, v4
; GFX10-UNSAFE-NEXT: v_pk_fma_f16 v1, v1, v3, v5
; GFX10-UNSAFE-NEXT: s_setpc_b64 s[30:31]
.entry:
- %a = fmul <3 x half> %x, %y
- %b = fadd <3 x half> %z, %a
+ %a = fmul contract <3 x half> %x, %y
+ %b = fadd contract <3 x half> %z, %a
ret <3 x half> %b
}
@@ -844,15 +1377,6 @@ define <4 x double> @test_4xdouble_add_mul(<4 x double> %x, <4 x double> %y, <4
; GFX9-DENORM-NEXT: v_add_f64 v[6:7], v[6:7], v[22:23]
; GFX9-DENORM-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-UNSAFE-LABEL: test_4xdouble_add_mul:
-; GFX9-UNSAFE: ; %bb.0: ; %.entry
-; GFX9-UNSAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-UNSAFE-NEXT: v_fma_f64 v[0:1], v[0:1], v[8:9], v[16:17]
-; GFX9-UNSAFE-NEXT: v_fma_f64 v[2:3], v[2:3], v[10:11], v[18:19]
-; GFX9-UNSAFE-NEXT: v_fma_f64 v[4:5], v[4:5], v[12:13], v[20:21]
-; GFX9-UNSAFE-NEXT: v_fma_f64 v[6:7], v[6:7], v[14:15], v[22:23]
-; GFX9-UNSAFE-NEXT: s_setpc_b64 s[30:31]
-;
; GFX10-LABEL: test_4xdouble_add_mul:
; GFX10: ; %bb.0: ; %.entry
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -887,7 +1411,14 @@ define <4 x double> @test_4xdouble_add_mul(<4 x double> %x, <4 x double> %y, <4
; GFX10-DENORM-NEXT: v_add_f64 v[4:5], v[4:5], v[20:21]
; GFX10-DENORM-NEXT: v_add_f64 v[6:7], v[6:7], v[22:23]
; GFX10-DENORM-NEXT: s_setpc_b64 s[30:31]
-;
+; GFX9-UNSAFE-LABEL: test_4xdouble_add_mul:
+; GFX9-UNSAFE: ; %bb.0: ; %.entry
+; GFX9-UNSAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-UNSAFE-NEXT: v_fma_f64 v[0:1], v[0:1], v[8:9], v[16:17]
+; GFX9-UNSAFE-NEXT: v_fma_f64 v[2:3], v[2:3], v[10:11], v[18:19]
+; GFX9-UNSAFE-NEXT: v_fma_f64 v[4:5], v[4:5], v[12:13], v[20:21]
+; GFX9-UNSAFE-NEXT: v_fma_f64 v[6:7], v[6:7], v[14:15], v[22:23]
+; GFX9-UNSAFE-NEXT: s_setpc_b64 s[30:31]
; GFX10-UNSAFE-LABEL: test_4xdouble_add_mul:
; GFX10-UNSAFE: ; %bb.0: ; %.entry
; GFX10-UNSAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -902,6 +1433,66 @@ define <4 x double> @test_4xdouble_add_mul(<4 x double> %x, <4 x double> %y, <4
ret <4 x double> %b
}
+define <4 x double> @test_4xdouble_add_mul_contract(<4 x double> %x, <4 x double> %y, <4 x double> %z) {
+; GFX9-LABEL: test_4xdouble_add_mul_contract:
+; GFX9: ; %bb.0: ; %.entry
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: v_fma_f64 v[0:1], v[0:1], v[8:9], v[16:17]
+; GFX9-NEXT: v_fma_f64 v[2:3], v[2:3], v[10:11], v[18:19]
+; GFX9-NEXT: v_fma_f64 v[4:5], v[4:5], v[12:13], v[20:21]
+; GFX9-NEXT: v_fma_f64 v[6:7], v[6:7], v[14:15], v[22:23]
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-CONTRACT-LABEL: test_4xdouble_add_mul_contract:
+; GFX9-CONTRACT: ; %bb.0: ; %.entry
+; GFX9-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-CONTRACT-NEXT: v_fma_f64 v[0:1], v[0:1], v[8:9], v[16:17]
+; GFX9-CONTRACT-NEXT: v_fma_f64 v[2:3], v[2:3], v[10:11], v[18:19]
+; GFX9-CONTRACT-NEXT: v_fma_f64 v[4:5], v[4:5], v[12:13], v[20:21]
+; GFX9-CONTRACT-NEXT: v_fma_f64 v[6:7], v[6:7], v[14:15], v[22:23]
+; GFX9-CONTRACT-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-DENORM-LABEL: test_4xdouble_add_mul_contract:
+; GFX9-DENORM: ; %bb.0: ; %.entry
+; GFX9-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-DENORM-NEXT: v_fma_f64 v[0:1], v[0:1], v[8:9], v[16:17]
+; GFX9-DENORM-NEXT: v_fma_f64 v[2:3], v[2:3], v[10:11], v[18:19]
+; GFX9-DENORM-NEXT: v_fma_f64 v[4:5], v[4:5], v[12:13], v[20:21]
+; GFX9-DENORM-NEXT: v_fma_f64 v[6:7], v[6:7], v[14:15], v[22:23]
+; GFX9-DENORM-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-LABEL: test_4xdouble_add_mul_contract:
+; GFX10: ; %bb.0: ; %.entry
+; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT: v_fma_f64 v[0:1], v[0:1], v[8:9], v[16:17]
+; GFX10-NEXT: v_fma_f64 v[2:3], v[2:3], v[10:11], v[18:19]
+; GFX10-NEXT: v_fma_f64 v[4:5], v[4:5], v[12:13], v[20:21]
+; GFX10-NEXT: v_fma_f64 v[6:7], v[6:7], v[14:15], v[22:23]
+; GFX10-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-CONTRACT-LABEL: test_4xdouble_add_mul_contract:
+; GFX10-CONTRACT: ; %bb.0: ; %.entry
+; GFX10-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-CONTRACT-NEXT: v_fma_f64 v[0:1], v[0:1], v[8:9], v[16:17]
+; GFX10-CONTRACT-NEXT: v_fma_f64 v[2:3], v[2:3], v[10:11], v[18:19]
+; GFX10-CONTRACT-NEXT: v_fma_f64 v[4:5], v[4:5], v[12:13], v[20:21]
+; GFX10-CONTRACT-NEXT: v_fma_f64 v[6:7], v[6:7], v[14:15], v[22:23]
+; GFX10-CONTRACT-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-DENORM-LABEL: test_4xdouble_add_mul_contract:
+; GFX10-DENORM: ; %bb.0: ; %.entry
+; GFX10-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-DENORM-NEXT: v_fma_f64 v[0:1], v[0:1], v[8:9], v[16:17]
+; GFX10-DENORM-NEXT: v_fma_f64 v[2:3], v[2:3], v[10:11], v[18:19]
+; GFX10-DENORM-NEXT: v_fma_f64 v[4:5], v[4:5], v[12:13], v[20:21]
+; GFX10-DENORM-NEXT: v_fma_f64 v[6:7], v[6:7], v[14:15], v[22:23]
+; GFX10-DENORM-NEXT: s_setpc_b64 s[30:31]
+.entry:
+ %a = fmul contract <4 x double> %x, %y
+ %b = fadd contract <4 x double> %a, %z
+ ret <4 x double> %b
+}
+
define <3 x double> @test_3xdouble_add_mul_rhs(<3 x double> %x, <3 x double> %y, <3 x double> %z) {
; GFX9-LABEL: test_3xdouble_add_mul_rhs:
; GFX9: ; %bb.0: ; %.entry
@@ -933,14 +1524,6 @@ define <3 x double> @test_3xdouble_add_mul_rhs(<3 x double> %x, <3 x double> %y,
; GFX9-DENORM-NEXT: v_add_f64 v[4:5], v[16:17], v[4:5]
; GFX9-DENORM-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-UNSAFE-LABEL: test_3xdouble_add_mul_rhs:
-; GFX9-UNSAFE: ; %bb.0: ; %.entry
-; GFX9-UNSAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-UNSAFE-NEXT: v_fma_f64 v[0:1], v[0:1], v[6:7], v[12:13]
-; GFX9-UNSAFE-NEXT: v_fma_f64 v[2:3], v[2:3], v[8:9], v[14:15]
-; GFX9-UNSAFE-NEXT: v_fma_f64 v[4:5], v[4:5], v[10:11], v[16:17]
-; GFX9-UNSAFE-NEXT: s_setpc_b64 s[30:31]
-;
; GFX10-LABEL: test_3xdouble_add_mul_rhs:
; GFX10: ; %bb.0: ; %.entry
; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -970,7 +1553,13 @@ define <3 x double> @test_3xdouble_add_mul_rhs(<3 x double> %x, <3 x double> %y,
; GFX10-DENORM-NEXT: v_add_f64 v[2:3], v[14:15], v[2:3]
; GFX10-DENORM-NEXT: v_add_f64 v[4:5], v[16:17], v[4:5]
; GFX10-DENORM-NEXT: s_setpc_b64 s[30:31]
-;
+; GFX9-UNSAFE-LABEL: test_3xdouble_add_mul_rhs:
+; GFX9-UNSAFE: ; %bb.0: ; %.entry
+; GFX9-UNSAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-UNSAFE-NEXT: v_fma_f64 v[0:1], v[0:1], v[6:7], v[12:13]
+; GFX9-UNSAFE-NEXT: v_fma_f64 v[2:3], v[2:3], v[8:9], v[14:15]
+; GFX9-UNSAFE-NEXT: v_fma_f64 v[4:5], v[4:5], v[10:11], v[16:17]
+; GFX9-UNSAFE-NEXT: s_setpc_b64 s[30:31]
; GFX10-UNSAFE-LABEL: test_3xdouble_add_mul_rhs:
; GFX10-UNSAFE: ; %bb.0: ; %.entry
; GFX10-UNSAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
@@ -983,3 +1572,57 @@ define <3 x double> @test_3xdouble_add_mul_rhs(<3 x double> %x, <3 x double> %y,
%b = fadd <3 x double> %z, %a
ret <3 x double> %b
}
+
+define <3 x double> @test_3xdouble_add_mul_rhs_contract(<3 x double> %x, <3 x double> %y, <3 x double> %z) {
+; GFX9-LABEL: test_3xdouble_add_mul_rhs_contract:
+; GFX9: ; %bb.0: ; %.entry
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: v_fma_f64 v[0:1], v[0:1], v[6:7], v[12:13]
+; GFX9-NEXT: v_fma_f64 v[2:3], v[2:3], v[8:9], v[14:15]
+; GFX9-NEXT: v_fma_f64 v[4:5], v[4:5], v[10:11], v[16:17]
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-CONTRACT-LABEL: test_3xdouble_add_mul_rhs_contract:
+; GFX9-CONTRACT: ; %bb.0: ; %.entry
+; GFX9-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-CONTRACT-NEXT: v_fma_f64 v[0:1], v[0:1], v[6:7], v[12:13]
+; GFX9-CONTRACT-NEXT: v_fma_f64 v[2:3], v[2:3], v[8:9], v[14:15]
+; GFX9-CONTRACT-NEXT: v_fma_f64 v[4:5], v[4:5], v[10:11], v[16:17]
+; GFX9-CONTRACT-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-DENORM-LABEL: test_3xdouble_add_mul_rhs_contract:
+; GFX9-DENORM: ; %bb.0: ; %.entry
+; GFX9-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-DENORM-NEXT: v_fma_f64 v[0:1], v[0:1], v[6:7], v[12:13]
+; GFX9-DENORM-NEXT: v_fma_f64 v[2:3], v[2:3], v[8:9], v[14:15]
+; GFX9-DENORM-NEXT: v_fma_f64 v[4:5], v[4:5], v[10:11], v[16:17]
+; GFX9-DENORM-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-LABEL: test_3xdouble_add_mul_rhs_contract:
+; GFX10: ; %bb.0: ; %.entry
+; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT: v_fma_f64 v[0:1], v[0:1], v[6:7], v[12:13]
+; GFX10-NEXT: v_fma_f64 v[2:3], v[2:3], v[8:9], v[14:15]
+; GFX10-NEXT: v_fma_f64 v[4:5], v[4:5], v[10:11], v[16:17]
+; GFX10-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-CONTRACT-LABEL: test_3xdouble_add_mul_rhs_contract:
+; GFX10-CONTRACT: ; %bb.0: ; %.entry
+; GFX10-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-CONTRACT-NEXT: v_fma_f64 v[0:1], v[0:1], v[6:7], v[12:13]
+; GFX10-CONTRACT-NEXT: v_fma_f64 v[2:3], v[2:3], v[8:9], v[14:15]
+; GFX10-CONTRACT-NEXT: v_fma_f64 v[4:5], v[4:5], v[10:11], v[16:17]
+; GFX10-CONTRACT-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-DENORM-LABEL: test_3xdouble_add_mul_rhs_contract:
+; GFX10-DENORM: ; %bb.0: ; %.entry
+; GFX10-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-DENORM-NEXT: v_fma_f64 v[0:1], v[0:1], v[6:7], v[12:13]
+; GFX10-DENORM-NEXT: v_fma_f64 v[2:3], v[2:3], v[8:9], v[14:15]
+; GFX10-DENORM-NEXT: v_fma_f64 v[4:5], v[4:5], v[10:11], v[16:17]
+; GFX10-DENORM-NEXT: s_setpc_b64 s[30:31]
+.entry:
+ %a = fmul contract <3 x double> %x, %y
+ %b = fadd contract <3 x double> %z, %a
+ ret <3 x double> %b
+}
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fma-unmerge-values.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fma-unmerge-values.mir
index 2845a63..d9ac9a7 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fma-unmerge-values.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fma-unmerge-values.mir
@@ -24,8 +24,8 @@ body: |
%ptr:_(p1) = COPY $vgpr2_vgpr3
%vec:_(<2 x s32>) = G_LOAD %ptr(p1) :: (load (<2 x s32>), addrspace 1)
%el0:_(s32), %el1:_(s32) = G_UNMERGE_VALUES %vec(<2 x s32>)
- %6:_(s32) = G_FMUL %0, %1
- %7:_(s32) = G_FADD %6, %el1
+ %6:_(s32) = contract G_FMUL %0, %1
+ %7:_(s32) = contract G_FADD %6, %el1
$vgpr0 = COPY %7(s32)
...
@@ -54,8 +54,8 @@ body: |
%ptr:_(p1) = COPY $vgpr2_vgpr3
%vec:_(<2 x s32>) = G_LOAD %ptr(p1) :: (load (<2 x s32>), addrspace 1)
%el0:_(s32), %el1:_(s32) = G_UNMERGE_VALUES %vec(<2 x s32>)
- %6:_(s32) = G_FMUL %0, %1
- %7:_(s32) = G_FADD %el1, %6
+ %6:_(s32) = contract G_FMUL %0, %1
+ %7:_(s32) = contract G_FADD %el1, %6
$vgpr0 = COPY %7(s32)
...
@@ -233,10 +233,10 @@ body: |
%7:_(s16) = G_TRUNC %6(s32)
%8:_(s32) = COPY $vgpr5
%9:_(s16) = G_TRUNC %8(s32)
- %10:_(s16) = G_FMUL %7, %9
+ %10:_(s16) = contract G_FMUL %7, %9
%11:_(s32) = G_FPEXT %10(s16)
%12:_(s32) = G_FMA %0, %1, %11
- %13:_(s32) = G_FADD %12, %el1
+ %13:_(s32) = contract G_FADD %12, %el1
$vgpr0 = COPY %13(s32)
...
@@ -282,11 +282,11 @@ body: |
%9:_(s16) = G_TRUNC %8(s32)
%10:_(s32) = COPY $vgpr5
%11:_(s16) = G_TRUNC %10(s32)
- %12:_(s16) = G_FMUL %9, %11
- %13:_(s16) = G_FMUL %1, %3
- %14:_(s16) = G_FADD %13, %12
+ %12:_(s16) = contract G_FMUL %9, %11
+ %13:_(s16) = contract G_FMUL %1, %3
+ %14:_(s16) = contract G_FADD %13, %12
%15:_(s32) = G_FPEXT %14(s16)
- %16:_(s32) = G_FADD %15, %el1
+ %16:_(s32) = contract G_FADD %15, %el1
$vgpr0 = COPY %16(s32)
...
@@ -326,10 +326,10 @@ body: |
%7:_(s16) = G_TRUNC %6(s32)
%8:_(s32) = COPY $vgpr5
%9:_(s16) = G_TRUNC %8(s32)
- %10:_(s16) = G_FMUL %7, %9
+ %10:_(s16) = contract G_FMUL %7, %9
%11:_(s32) = G_FPEXT %10(s16)
%12:_(s32) = G_FMA %4, %5, %11
- %13:_(s32) = G_FADD %el1, %12
+ %13:_(s32) = contract G_FADD %el1, %12
$vgpr0 = COPY %13(s32)
...
@@ -375,11 +375,11 @@ body: |
%9:_(s16) = G_TRUNC %8(s32)
%10:_(s32) = COPY $vgpr5
%11:_(s16) = G_TRUNC %10(s32)
- %12:_(s16) = G_FMUL %9, %11
- %13:_(s16) = G_FMUL %5, %7
- %14:_(s16) = G_FADD %13, %12
+ %12:_(s16) = contract G_FMUL %9, %11
+ %13:_(s16) = contract G_FMUL %5, %7
+ %14:_(s16) = contract G_FADD %13, %12
%15:_(s32) = G_FPEXT %14(s16)
- %16:_(s32) = G_FADD %el1, %15
+ %16:_(s32) = contract G_FADD %el1, %15
$vgpr0 = COPY %16(s32)
...
@@ -409,8 +409,8 @@ body: |
%ptr:_(p1) = COPY $vgpr0_vgpr1
%vec:_(<2 x s32>) = G_LOAD %ptr(p1) :: (load (<2 x s32>), addrspace 1)
%el0:_(s32), %el1:_(s32) = G_UNMERGE_VALUES %vec(<2 x s32>)
- %6:_(s32) = G_FMUL %0, %1
- %7:_(s32) = G_FSUB %6, %el1
+ %6:_(s32) = contract G_FMUL %0, %1
+ %7:_(s32) = contract G_FSUB %6, %el1
$vgpr0 = COPY %7(s32)
...
@@ -440,7 +440,7 @@ body: |
%ptr:_(p1) = COPY $vgpr2_vgpr3
%vec:_(<2 x s32>) = G_LOAD %ptr(p1) :: (load (<2 x s32>), addrspace 1)
%el0:_(s32), %el1:_(s32) = G_UNMERGE_VALUES %vec(<2 x s32>)
- %6:_(s32) = G_FMUL %0, %1
- %7:_(s32) = G_FSUB %el1, %6
+ %6:_(s32) = contract G_FMUL %0, %1
+ %7:_(s32) = contract G_FSUB %el1, %6
$vgpr0 = COPY %7(s32)
...
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/flat-atomic-fadd.f32.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/flat-atomic-fadd.f32.ll
index c4d57ac..da25ac0 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/flat-atomic-fadd.f32.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/flat-atomic-fadd.f32.ll
@@ -12,7 +12,7 @@ define amdgpu_ps void @flat_atomic_fadd_f32_no_rtn_intrinsic(ptr %ptr, float %da
; GFX942-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; GFX942-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1
; GFX942-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
- ; GFX942-NEXT: FLAT_ATOMIC_ADD_F32 [[REG_SEQUENCE]], [[COPY2]], 0, 0, implicit $exec, implicit $flat_scr :: (load store syncscope("agent") seq_cst (s32) on %ir.ptr)
+ ; GFX942-NEXT: FLAT_ATOMIC_ADD_F32 [[REG_SEQUENCE]], [[COPY2]], 0, 0, implicit $exec, implicit $flat_scr :: (load store syncscope("agent") seq_cst (s32) on %ir.ptr, !noalias.addrspace !0)
; GFX942-NEXT: S_ENDPGM 0
;
; GFX11-LABEL: name: flat_atomic_fadd_f32_no_rtn_intrinsic
@@ -23,7 +23,7 @@ define amdgpu_ps void @flat_atomic_fadd_f32_no_rtn_intrinsic(ptr %ptr, float %da
; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1
; GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
- ; GFX11-NEXT: FLAT_ATOMIC_ADD_F32 [[REG_SEQUENCE]], [[COPY2]], 0, 0, implicit $exec, implicit $flat_scr :: (load store syncscope("agent") seq_cst (s32) on %ir.ptr)
+ ; GFX11-NEXT: FLAT_ATOMIC_ADD_F32 [[REG_SEQUENCE]], [[COPY2]], 0, 0, implicit $exec, implicit $flat_scr :: (load store syncscope("agent") seq_cst (s32) on %ir.ptr, !noalias.addrspace !0)
; GFX11-NEXT: S_ENDPGM 0
%ret = call float @llvm.amdgcn.flat.atomic.fadd.f32.p1.f32(ptr %ptr, float %data)
ret void
@@ -38,7 +38,7 @@ define amdgpu_ps float @flat_atomic_fadd_f32_rtn_intrinsic(ptr %ptr, float %data
; GFX942-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; GFX942-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1
; GFX942-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
- ; GFX942-NEXT: [[FLAT_ATOMIC_ADD_F32_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_F32_RTN [[REG_SEQUENCE]], [[COPY2]], 0, 1, implicit $exec, implicit $flat_scr :: (load store syncscope("agent") seq_cst (s32) on %ir.ptr)
+ ; GFX942-NEXT: [[FLAT_ATOMIC_ADD_F32_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_F32_RTN [[REG_SEQUENCE]], [[COPY2]], 0, 1, implicit $exec, implicit $flat_scr :: (load store syncscope("agent") seq_cst (s32) on %ir.ptr, !noalias.addrspace !0)
; GFX942-NEXT: $vgpr0 = COPY [[FLAT_ATOMIC_ADD_F32_RTN]]
; GFX942-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0
;
@@ -50,7 +50,7 @@ define amdgpu_ps float @flat_atomic_fadd_f32_rtn_intrinsic(ptr %ptr, float %data
; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1
; GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
- ; GFX11-NEXT: [[FLAT_ATOMIC_ADD_F32_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_F32_RTN [[REG_SEQUENCE]], [[COPY2]], 0, 1, implicit $exec, implicit $flat_scr :: (load store syncscope("agent") seq_cst (s32) on %ir.ptr)
+ ; GFX11-NEXT: [[FLAT_ATOMIC_ADD_F32_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_F32_RTN [[REG_SEQUENCE]], [[COPY2]], 0, 1, implicit $exec, implicit $flat_scr :: (load store syncscope("agent") seq_cst (s32) on %ir.ptr, !noalias.addrspace !0)
; GFX11-NEXT: $vgpr0 = COPY [[FLAT_ATOMIC_ADD_F32_RTN]]
; GFX11-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0
%ret = call float @llvm.amdgcn.flat.atomic.fadd.f32.p1.f32(ptr %ptr, float %data)
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/flat-atomic-fadd.f64.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/flat-atomic-fadd.f64.ll
index c82ae2fb..bf36979 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/flat-atomic-fadd.f64.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/flat-atomic-fadd.f64.ll
@@ -13,7 +13,7 @@ define amdgpu_ps void @flat_atomic_fadd_f64_no_rtn_atomicrmw(ptr %ptr, double %d
; GFX90A_GFX942-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
; GFX90A_GFX942-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
; GFX90A_GFX942-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1
- ; GFX90A_GFX942-NEXT: FLAT_ATOMIC_ADD_F64 [[REG_SEQUENCE]], [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load store syncscope("wavefront") monotonic (s64) on %ir.ptr)
+ ; GFX90A_GFX942-NEXT: FLAT_ATOMIC_ADD_F64 [[REG_SEQUENCE]], [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load store syncscope("wavefront") monotonic (s64) on %ir.ptr, !noalias.addrspace !0)
; GFX90A_GFX942-NEXT: S_ENDPGM 0
%ret = atomicrmw fadd ptr %ptr, double %data syncscope("wavefront") monotonic, !noalias.addrspace !1, !amdgpu.no.fine.grained.memory !0
ret void
@@ -30,7 +30,7 @@ define amdgpu_ps double @flat_atomic_fadd_f64_rtn_atomicrmw(ptr %ptr, double %da
; GFX90A_GFX942-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
; GFX90A_GFX942-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3
; GFX90A_GFX942-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1
- ; GFX90A_GFX942-NEXT: [[FLAT_ATOMIC_ADD_F64_RTN:%[0-9]+]]:vreg_64_align2 = FLAT_ATOMIC_ADD_F64_RTN [[REG_SEQUENCE]], [[REG_SEQUENCE1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store syncscope("wavefront") monotonic (s64) on %ir.ptr)
+ ; GFX90A_GFX942-NEXT: [[FLAT_ATOMIC_ADD_F64_RTN:%[0-9]+]]:vreg_64_align2 = FLAT_ATOMIC_ADD_F64_RTN [[REG_SEQUENCE]], [[REG_SEQUENCE1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store syncscope("wavefront") monotonic (s64) on %ir.ptr, !noalias.addrspace !0)
; GFX90A_GFX942-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[FLAT_ATOMIC_ADD_F64_RTN]].sub0
; GFX90A_GFX942-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[FLAT_ATOMIC_ADD_F64_RTN]].sub1
; GFX90A_GFX942-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fptrunc.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fptrunc.mir
index f513de8..477ef32 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fptrunc.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fptrunc.mir
@@ -385,117 +385,16 @@ body: |
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
; CHECK-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>)
- ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](s64)
- ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 20
- ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C]](s32)
- ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2047
- ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]]
- ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1008
- ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[AND]], [[C2]]
- ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 8
- ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C3]](s32)
- ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4094
- ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C4]]
- ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 511
- ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[UV3]], [[C5]]
- ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[UV2]]
- ; CHECK-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
- ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[OR]](s32), [[C6]]
- ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[ICMP]](s1)
- ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[ZEXT]]
- ; CHECK-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 512
- ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[OR1]](s32), [[C6]]
- ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[C7]], [[C6]]
- ; CHECK-NEXT: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 31744
- ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SELECT]], [[C8]]
- ; CHECK-NEXT: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 12
- ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ADD]], [[C9]](s32)
- ; CHECK-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL]]
- ; CHECK-NEXT: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
- ; CHECK-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C10]], [[ADD]]
- ; CHECK-NEXT: [[SMAX:%[0-9]+]]:_(s32) = G_SMAX [[SUB]], [[C6]]
- ; CHECK-NEXT: [[C11:%[0-9]+]]:_(s32) = G_CONSTANT i32 13
- ; CHECK-NEXT: [[SMIN:%[0-9]+]]:_(s32) = G_SMIN [[SMAX]], [[C11]]
- ; CHECK-NEXT: [[C12:%[0-9]+]]:_(s32) = G_CONSTANT i32 4096
- ; CHECK-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[C12]]
- ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[OR4]], [[SMIN]](s32)
- ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LSHR2]], [[SMIN]](s32)
- ; CHECK-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SHL1]](s32), [[OR4]]
- ; CHECK-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[ICMP2]](s1)
- ; CHECK-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[LSHR2]], [[ZEXT1]]
- ; CHECK-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[ADD]](s32), [[C10]]
- ; CHECK-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[OR5]], [[OR3]]
- ; CHECK-NEXT: [[C13:%[0-9]+]]:_(s32) = G_CONSTANT i32 7
- ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[SELECT1]], [[C13]]
- ; CHECK-NEXT: [[C14:%[0-9]+]]:_(s32) = G_CONSTANT i32 2
- ; CHECK-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[SELECT1]], [[C14]](s32)
- ; CHECK-NEXT: [[C15:%[0-9]+]]:_(s32) = G_CONSTANT i32 3
- ; CHECK-NEXT: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[AND3]](s32), [[C15]]
- ; CHECK-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[ICMP4]](s1)
- ; CHECK-NEXT: [[C16:%[0-9]+]]:_(s32) = G_CONSTANT i32 5
- ; CHECK-NEXT: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(sgt), [[AND3]](s32), [[C16]]
- ; CHECK-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[ICMP5]](s1)
- ; CHECK-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[ZEXT3]]
- ; CHECK-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[LSHR3]], [[OR6]]
- ; CHECK-NEXT: [[C17:%[0-9]+]]:_(s32) = G_CONSTANT i32 30
- ; CHECK-NEXT: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(sgt), [[ADD]](s32), [[C17]]
- ; CHECK-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP6]](s1), [[C8]], [[ADD1]]
- ; CHECK-NEXT: [[C18:%[0-9]+]]:_(s32) = G_CONSTANT i32 1039
- ; CHECK-NEXT: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C18]]
- ; CHECK-NEXT: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP7]](s1), [[OR2]], [[SELECT2]]
- ; CHECK-NEXT: [[C19:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
- ; CHECK-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C19]](s32)
- ; CHECK-NEXT: [[C20:%[0-9]+]]:_(s32) = G_CONSTANT i32 32768
- ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR4]], [[C20]]
- ; CHECK-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SELECT3]]
- ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64)
- ; CHECK-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[UV5]], [[C]](s32)
- ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LSHR5]], [[C1]]
- ; CHECK-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[AND5]], [[C2]]
- ; CHECK-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[UV5]], [[C3]](s32)
- ; CHECK-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LSHR6]], [[C4]]
- ; CHECK-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[UV5]], [[C5]]
- ; CHECK-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[AND7]], [[UV4]]
- ; CHECK-NEXT: [[ICMP8:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[OR8]](s32), [[C6]]
- ; CHECK-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[ICMP8]](s1)
- ; CHECK-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[ZEXT4]]
- ; CHECK-NEXT: [[ICMP9:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[OR9]](s32), [[C6]]
- ; CHECK-NEXT: [[SELECT4:%[0-9]+]]:_(s32) = G_SELECT [[ICMP9]](s1), [[C7]], [[C6]]
- ; CHECK-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[SELECT4]], [[C8]]
- ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ADD2]], [[C9]](s32)
- ; CHECK-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[OR9]], [[SHL2]]
- ; CHECK-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C10]], [[ADD2]]
- ; CHECK-NEXT: [[SMAX1:%[0-9]+]]:_(s32) = G_SMAX [[SUB1]], [[C6]]
- ; CHECK-NEXT: [[SMIN1:%[0-9]+]]:_(s32) = G_SMIN [[SMAX1]], [[C11]]
- ; CHECK-NEXT: [[OR12:%[0-9]+]]:_(s32) = G_OR [[OR9]], [[C12]]
- ; CHECK-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[OR12]], [[SMIN1]](s32)
- ; CHECK-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[LSHR7]], [[SMIN1]](s32)
- ; CHECK-NEXT: [[ICMP10:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SHL3]](s32), [[OR12]]
- ; CHECK-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[ICMP10]](s1)
- ; CHECK-NEXT: [[OR13:%[0-9]+]]:_(s32) = G_OR [[LSHR7]], [[ZEXT5]]
- ; CHECK-NEXT: [[ICMP11:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[ADD2]](s32), [[C10]]
- ; CHECK-NEXT: [[SELECT5:%[0-9]+]]:_(s32) = G_SELECT [[ICMP11]](s1), [[OR13]], [[OR11]]
- ; CHECK-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[SELECT5]], [[C13]]
- ; CHECK-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[SELECT5]], [[C14]](s32)
- ; CHECK-NEXT: [[ICMP12:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[AND8]](s32), [[C15]]
- ; CHECK-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[ICMP12]](s1)
- ; CHECK-NEXT: [[ICMP13:%[0-9]+]]:_(s1) = G_ICMP intpred(sgt), [[AND8]](s32), [[C16]]
- ; CHECK-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[ICMP13]](s1)
- ; CHECK-NEXT: [[OR14:%[0-9]+]]:_(s32) = G_OR [[ZEXT6]], [[ZEXT7]]
- ; CHECK-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[LSHR8]], [[OR14]]
- ; CHECK-NEXT: [[ICMP14:%[0-9]+]]:_(s1) = G_ICMP intpred(sgt), [[ADD2]](s32), [[C17]]
- ; CHECK-NEXT: [[SELECT6:%[0-9]+]]:_(s32) = G_SELECT [[ICMP14]](s1), [[C8]], [[ADD3]]
- ; CHECK-NEXT: [[ICMP15:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[ADD2]](s32), [[C18]]
- ; CHECK-NEXT: [[SELECT7:%[0-9]+]]:_(s32) = G_SELECT [[ICMP15]](s1), [[OR10]], [[SELECT6]]
- ; CHECK-NEXT: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[UV5]], [[C19]](s32)
- ; CHECK-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LSHR9]], [[C20]]
- ; CHECK-NEXT: [[OR15:%[0-9]+]]:_(s32) = G_OR [[AND9]], [[SELECT7]]
- ; CHECK-NEXT: [[C21:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535
- ; CHECK-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[OR7]], [[C21]]
- ; CHECK-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[OR15]], [[C21]]
- ; CHECK-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C19]](s32)
- ; CHECK-NEXT: [[OR16:%[0-9]+]]:_(s32) = G_OR [[AND10]], [[SHL4]]
- ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR16]](s32)
+ ; CHECK-NEXT: [[FPTRUNC:%[0-9]+]]:_(s32) = afn G_FPTRUNC [[UV]](s64)
+ ; CHECK-NEXT: [[FPTRUNC1:%[0-9]+]]:_(s16) = afn G_FPTRUNC [[FPTRUNC]](s32)
+ ; CHECK-NEXT: [[FPTRUNC2:%[0-9]+]]:_(s32) = afn G_FPTRUNC [[UV1]](s64)
+ ; CHECK-NEXT: [[FPTRUNC3:%[0-9]+]]:_(s16) = afn G_FPTRUNC [[FPTRUNC2]](s32)
+ ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC1]](s16)
+ ; CHECK-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC3]](s16)
+ ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+ ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32)
+ ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]]
+ ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
; CHECK-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>)
%0:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
%1:_(<2 x s16>) = afn G_FPTRUNC %0
diff --git a/llvm/test/CodeGen/AMDGPU/fptrunc.f16.ll b/llvm/test/CodeGen/AMDGPU/fptrunc.f16.ll
index d0b41e1..57b4857 100644
--- a/llvm/test/CodeGen/AMDGPU/fptrunc.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/fptrunc.f16.ll
@@ -1,16 +1,16 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
-; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -global-isel=0 -enable-unsafe-fp-math < %s | FileCheck -enable-var-scope -check-prefixes=SI-SDAG %s
-; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -global-isel=1 -enable-unsafe-fp-math < %s | FileCheck -check-prefixes=SI-GISEL %s
-; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=fiji -global-isel=0 -mattr=-flat-for-global -enable-unsafe-fp-math < %s | FileCheck -enable-var-scope -check-prefixes=VI-SDAG %s
-; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=fiji -global-isel=1 -mattr=-flat-for-global -enable-unsafe-fp-math < %s | FileCheck -enable-var-scope -check-prefixes=VI-GISEL %s
-; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=gfx900 -global-isel=0 -mattr=-flat-for-global -denormal-fp-math=preserve-sign -enable-unsafe-fp-math < %s | FileCheck -enable-var-scope -check-prefixes=GFX9-SDAG %s
-; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=gfx900 -global-isel=1 -mattr=-flat-for-global -denormal-fp-math=preserve-sign -enable-unsafe-fp-math < %s | FileCheck -enable-var-scope -check-prefixes=GFX9-GISEL %s
-; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=gfx950 -global-isel=0 -mattr=-flat-for-global -denormal-fp-math=preserve-sign -enable-unsafe-fp-math < %s | FileCheck -enable-var-scope -check-prefixes=GFX950-SDAG %s
-; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=gfx950 -global-isel=1 -mattr=-flat-for-global -denormal-fp-math=preserve-sign -enable-unsafe-fp-math < %s | FileCheck -enable-var-scope -check-prefixes=GFX950-GISEL %s
-; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=gfx1100 -global-isel=0 -mattr=-flat-for-global,+real-true16 -denormal-fp-math=preserve-sign -enable-unsafe-fp-math < %s | FileCheck -enable-var-scope -check-prefixes=GFX11-SDAG-TRUE16 %s
-; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=gfx1100 -global-isel=0 -mattr=-flat-for-global,-real-true16 -denormal-fp-math=preserve-sign -enable-unsafe-fp-math < %s | FileCheck -enable-var-scope -check-prefixes=GFX11-SDAG-FAKE16 %s
-; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=gfx1100 -global-isel=1 -mattr=-flat-for-global,+real-true16 -denormal-fp-math=preserve-sign -enable-unsafe-fp-math < %s | FileCheck -enable-var-scope -check-prefixes=GFX11-GISEL-TRUE16 %s
-; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=gfx1100 -global-isel=1 -mattr=-flat-for-global,-real-true16 -denormal-fp-math=preserve-sign -enable-unsafe-fp-math < %s | FileCheck -enable-var-scope -check-prefixes=GFX11-GISEL-FAKE16 %s
+; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -global-isel=0 < %s | FileCheck -enable-var-scope -check-prefixes=SI-SDAG %s
+; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -global-isel=1 < %s | FileCheck -check-prefixes=SI-GISEL %s
+; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=fiji -global-isel=0 -mattr=-flat-for-global < %s | FileCheck -enable-var-scope -check-prefixes=VI-SDAG %s
+; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=fiji -global-isel=1 -mattr=-flat-for-global < %s | FileCheck -enable-var-scope -check-prefixes=VI-GISEL %s
+; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=gfx900 -global-isel=0 -mattr=-flat-for-global -denormal-fp-math=preserve-sign < %s | FileCheck -enable-var-scope -check-prefixes=GFX9-SDAG %s
+; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=gfx900 -global-isel=1 -mattr=-flat-for-global -denormal-fp-math=preserve-sign < %s | FileCheck -enable-var-scope -check-prefixes=GFX9-GISEL %s
+; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=gfx950 -global-isel=0 -mattr=-flat-for-global -denormal-fp-math=preserve-sign < %s | FileCheck -enable-var-scope -check-prefixes=GFX950-SDAG %s
+; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=gfx950 -global-isel=1 -mattr=-flat-for-global -denormal-fp-math=preserve-sign < %s | FileCheck -enable-var-scope -check-prefixes=GFX950-GISEL %s
+; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=gfx1100 -global-isel=0 -mattr=-flat-for-global,+real-true16 -denormal-fp-math=preserve-sign < %s | FileCheck -enable-var-scope -check-prefixes=GFX11-SDAG-TRUE16 %s
+; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=gfx1100 -global-isel=0 -mattr=-flat-for-global,-real-true16 -denormal-fp-math=preserve-sign < %s | FileCheck -enable-var-scope -check-prefixes=GFX11-SDAG-FAKE16 %s
+; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=gfx1100 -global-isel=1 -mattr=-flat-for-global,+real-true16 -denormal-fp-math=preserve-sign < %s | FileCheck -enable-var-scope -check-prefixes=GFX11-GISEL-TRUE16 %s
+; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=gfx1100 -global-isel=1 -mattr=-flat-for-global,-real-true16 -denormal-fp-math=preserve-sign < %s | FileCheck -enable-var-scope -check-prefixes=GFX11-GISEL-FAKE16 %s
define amdgpu_kernel void @fptrunc_f32_to_f16(
; SI-SDAG-LABEL: fptrunc_f32_to_f16:
@@ -201,8 +201,8 @@ entry:
ret void
}
-define amdgpu_kernel void @fptrunc_f64_to_f16(
-; SI-SDAG-LABEL: fptrunc_f64_to_f16:
+define amdgpu_kernel void @fptrunc_f32_to_f16_afn(ptr addrspace(1) %r,
+; SI-SDAG-LABEL: fptrunc_f32_to_f16_afn:
; SI-SDAG: ; %bb.0: ; %entry
; SI-SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
; SI-SDAG-NEXT: s_mov_b32 s7, 0xf000
@@ -212,29 +212,27 @@ define amdgpu_kernel void @fptrunc_f64_to_f16(
; SI-SDAG-NEXT: s_waitcnt lgkmcnt(0)
; SI-SDAG-NEXT: s_mov_b32 s8, s2
; SI-SDAG-NEXT: s_mov_b32 s9, s3
-; SI-SDAG-NEXT: buffer_load_dwordx2 v[0:1], off, s[8:11], 0
+; SI-SDAG-NEXT: buffer_load_dword v0, off, s[8:11], 0
; SI-SDAG-NEXT: s_mov_b32 s4, s0
; SI-SDAG-NEXT: s_mov_b32 s5, s1
; SI-SDAG-NEXT: s_waitcnt vmcnt(0)
-; SI-SDAG-NEXT: v_cvt_f32_f64_e32 v0, v[0:1]
; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
; SI-SDAG-NEXT: buffer_store_short v0, off, s[4:7], 0
; SI-SDAG-NEXT: s_endpgm
;
-; SI-GISEL-LABEL: fptrunc_f64_to_f16:
+; SI-GISEL-LABEL: fptrunc_f32_to_f16_afn:
; SI-GISEL: ; %bb.0: ; %entry
; SI-GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
; SI-GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; SI-GISEL-NEXT: s_load_dwordx2 s[4:5], s[2:3], 0x0
+; SI-GISEL-NEXT: s_load_dword s3, s[2:3], 0x0
; SI-GISEL-NEXT: s_mov_b32 s2, -1
; SI-GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; SI-GISEL-NEXT: v_cvt_f32_f64_e32 v0, s[4:5]
-; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0
+; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v0, s3
; SI-GISEL-NEXT: s_mov_b32 s3, 0xf000
; SI-GISEL-NEXT: buffer_store_short v0, off, s[0:3], 0
; SI-GISEL-NEXT: s_endpgm
;
-; VI-SDAG-LABEL: fptrunc_f64_to_f16:
+; VI-SDAG-LABEL: fptrunc_f32_to_f16_afn:
; VI-SDAG: ; %bb.0: ; %entry
; VI-SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
; VI-SDAG-NEXT: s_mov_b32 s7, 0xf000
@@ -244,29 +242,27 @@ define amdgpu_kernel void @fptrunc_f64_to_f16(
; VI-SDAG-NEXT: s_waitcnt lgkmcnt(0)
; VI-SDAG-NEXT: s_mov_b32 s8, s2
; VI-SDAG-NEXT: s_mov_b32 s9, s3
-; VI-SDAG-NEXT: buffer_load_dwordx2 v[0:1], off, s[8:11], 0
+; VI-SDAG-NEXT: buffer_load_dword v0, off, s[8:11], 0
; VI-SDAG-NEXT: s_mov_b32 s4, s0
; VI-SDAG-NEXT: s_mov_b32 s5, s1
; VI-SDAG-NEXT: s_waitcnt vmcnt(0)
-; VI-SDAG-NEXT: v_cvt_f32_f64_e32 v0, v[0:1]
; VI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
; VI-SDAG-NEXT: buffer_store_short v0, off, s[4:7], 0
; VI-SDAG-NEXT: s_endpgm
;
-; VI-GISEL-LABEL: fptrunc_f64_to_f16:
+; VI-GISEL-LABEL: fptrunc_f32_to_f16_afn:
; VI-GISEL: ; %bb.0: ; %entry
; VI-GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; VI-GISEL-NEXT: s_load_dwordx2 s[2:3], s[2:3], 0x0
+; VI-GISEL-NEXT: s_load_dword s2, s[2:3], 0x0
+; VI-GISEL-NEXT: s_mov_b32 s3, 0xf000
; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; VI-GISEL-NEXT: v_cvt_f32_f64_e32 v0, s[2:3]
+; VI-GISEL-NEXT: v_cvt_f16_f32_e32 v0, s2
; VI-GISEL-NEXT: s_mov_b32 s2, -1
-; VI-GISEL-NEXT: s_mov_b32 s3, 0xf000
-; VI-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0
; VI-GISEL-NEXT: buffer_store_short v0, off, s[0:3], 0
; VI-GISEL-NEXT: s_endpgm
;
-; GFX9-SDAG-LABEL: fptrunc_f64_to_f16:
+; GFX9-SDAG-LABEL: fptrunc_f32_to_f16_afn:
; GFX9-SDAG: ; %bb.0: ; %entry
; GFX9-SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
; GFX9-SDAG-NEXT: s_mov_b32 s7, 0xf000
@@ -276,29 +272,27 @@ define amdgpu_kernel void @fptrunc_f64_to_f16(
; GFX9-SDAG-NEXT: s_waitcnt lgkmcnt(0)
; GFX9-SDAG-NEXT: s_mov_b32 s8, s2
; GFX9-SDAG-NEXT: s_mov_b32 s9, s3
-; GFX9-SDAG-NEXT: buffer_load_dwordx2 v[0:1], off, s[8:11], 0
+; GFX9-SDAG-NEXT: buffer_load_dword v0, off, s[8:11], 0
; GFX9-SDAG-NEXT: s_mov_b32 s4, s0
; GFX9-SDAG-NEXT: s_mov_b32 s5, s1
; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0)
-; GFX9-SDAG-NEXT: v_cvt_f32_f64_e32 v0, v[0:1]
; GFX9-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
; GFX9-SDAG-NEXT: buffer_store_short v0, off, s[4:7], 0
; GFX9-SDAG-NEXT: s_endpgm
;
-; GFX9-GISEL-LABEL: fptrunc_f64_to_f16:
+; GFX9-GISEL-LABEL: fptrunc_f32_to_f16_afn:
; GFX9-GISEL: ; %bb.0: ; %entry
; GFX9-GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
; GFX9-GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9-GISEL-NEXT: s_load_dwordx2 s[2:3], s[2:3], 0x0
+; GFX9-GISEL-NEXT: s_load_dword s2, s[2:3], 0x0
+; GFX9-GISEL-NEXT: s_mov_b32 s3, 0xf000
; GFX9-GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9-GISEL-NEXT: v_cvt_f32_f64_e32 v0, s[2:3]
+; GFX9-GISEL-NEXT: v_cvt_f16_f32_e32 v0, s2
; GFX9-GISEL-NEXT: s_mov_b32 s2, -1
-; GFX9-GISEL-NEXT: s_mov_b32 s3, 0xf000
-; GFX9-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0
; GFX9-GISEL-NEXT: buffer_store_short v0, off, s[0:3], 0
; GFX9-GISEL-NEXT: s_endpgm
;
-; GFX950-SDAG-LABEL: fptrunc_f64_to_f16:
+; GFX950-SDAG-LABEL: fptrunc_f32_to_f16_afn:
; GFX950-SDAG: ; %bb.0: ; %entry
; GFX950-SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
; GFX950-SDAG-NEXT: s_mov_b32 s7, 0xf000
@@ -308,23 +302,541 @@ define amdgpu_kernel void @fptrunc_f64_to_f16(
; GFX950-SDAG-NEXT: s_waitcnt lgkmcnt(0)
; GFX950-SDAG-NEXT: s_mov_b32 s8, s2
; GFX950-SDAG-NEXT: s_mov_b32 s9, s3
-; GFX950-SDAG-NEXT: buffer_load_dwordx2 v[0:1], off, s[8:11], 0
+; GFX950-SDAG-NEXT: buffer_load_dword v0, off, s[8:11], 0
; GFX950-SDAG-NEXT: s_mov_b32 s4, s0
; GFX950-SDAG-NEXT: s_mov_b32 s5, s1
; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0)
-; GFX950-SDAG-NEXT: v_cvt_f32_f64_e32 v0, v[0:1]
; GFX950-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
; GFX950-SDAG-NEXT: buffer_store_short v0, off, s[4:7], 0
; GFX950-SDAG-NEXT: s_endpgm
;
+; GFX950-GISEL-LABEL: fptrunc_f32_to_f16_afn:
+; GFX950-GISEL: ; %bb.0: ; %entry
+; GFX950-GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
+; GFX950-GISEL-NEXT: s_waitcnt lgkmcnt(0)
+; GFX950-GISEL-NEXT: s_load_dword s2, s[2:3], 0x0
+; GFX950-GISEL-NEXT: s_mov_b32 s3, 0xf000
+; GFX950-GISEL-NEXT: s_waitcnt lgkmcnt(0)
+; GFX950-GISEL-NEXT: v_cvt_f16_f32_e32 v0, s2
+; GFX950-GISEL-NEXT: s_mov_b32 s2, -1
+; GFX950-GISEL-NEXT: buffer_store_short v0, off, s[0:3], 0
+; GFX950-GISEL-NEXT: s_endpgm
+;
+; GFX11-SDAG-TRUE16-LABEL: fptrunc_f32_to_f16_afn:
+; GFX11-SDAG-TRUE16: ; %bb.0: ; %entry
+; GFX11-SDAG-TRUE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
+; GFX11-SDAG-TRUE16-NEXT: s_mov_b32 s6, -1
+; GFX11-SDAG-TRUE16-NEXT: s_mov_b32 s7, 0x31016000
+; GFX11-SDAG-TRUE16-NEXT: s_mov_b32 s10, s6
+; GFX11-SDAG-TRUE16-NEXT: s_mov_b32 s11, s7
+; GFX11-SDAG-TRUE16-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-SDAG-TRUE16-NEXT: s_mov_b32 s8, s2
+; GFX11-SDAG-TRUE16-NEXT: s_mov_b32 s9, s3
+; GFX11-SDAG-TRUE16-NEXT: s_mov_b32 s4, s0
+; GFX11-SDAG-TRUE16-NEXT: buffer_load_b32 v0, off, s[8:11], 0
+; GFX11-SDAG-TRUE16-NEXT: s_mov_b32 s5, s1
+; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0)
+; GFX11-SDAG-TRUE16-NEXT: v_cvt_f16_f32_e32 v0.l, v0
+; GFX11-SDAG-TRUE16-NEXT: buffer_store_b16 v0, off, s[4:7], 0
+; GFX11-SDAG-TRUE16-NEXT: s_endpgm
+;
+; GFX11-SDAG-FAKE16-LABEL: fptrunc_f32_to_f16_afn:
+; GFX11-SDAG-FAKE16: ; %bb.0: ; %entry
+; GFX11-SDAG-FAKE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
+; GFX11-SDAG-FAKE16-NEXT: s_mov_b32 s6, -1
+; GFX11-SDAG-FAKE16-NEXT: s_mov_b32 s7, 0x31016000
+; GFX11-SDAG-FAKE16-NEXT: s_mov_b32 s10, s6
+; GFX11-SDAG-FAKE16-NEXT: s_mov_b32 s11, s7
+; GFX11-SDAG-FAKE16-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-SDAG-FAKE16-NEXT: s_mov_b32 s8, s2
+; GFX11-SDAG-FAKE16-NEXT: s_mov_b32 s9, s3
+; GFX11-SDAG-FAKE16-NEXT: s_mov_b32 s4, s0
+; GFX11-SDAG-FAKE16-NEXT: buffer_load_b32 v0, off, s[8:11], 0
+; GFX11-SDAG-FAKE16-NEXT: s_mov_b32 s5, s1
+; GFX11-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0)
+; GFX11-SDAG-FAKE16-NEXT: v_cvt_f16_f32_e32 v0, v0
+; GFX11-SDAG-FAKE16-NEXT: buffer_store_b16 v0, off, s[4:7], 0
+; GFX11-SDAG-FAKE16-NEXT: s_endpgm
+;
+; GFX11-GISEL-TRUE16-LABEL: fptrunc_f32_to_f16_afn:
+; GFX11-GISEL-TRUE16: ; %bb.0: ; %entry
+; GFX11-GISEL-TRUE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
+; GFX11-GISEL-TRUE16-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-GISEL-TRUE16-NEXT: s_load_b32 s2, s[2:3], 0x0
+; GFX11-GISEL-TRUE16-NEXT: s_mov_b32 s3, 0x31016000
+; GFX11-GISEL-TRUE16-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-GISEL-TRUE16-NEXT: v_cvt_f16_f32_e32 v0.l, s2
+; GFX11-GISEL-TRUE16-NEXT: s_mov_b32 s2, -1
+; GFX11-GISEL-TRUE16-NEXT: buffer_store_b16 v0, off, s[0:3], 0
+; GFX11-GISEL-TRUE16-NEXT: s_endpgm
+;
+; GFX11-GISEL-FAKE16-LABEL: fptrunc_f32_to_f16_afn:
+; GFX11-GISEL-FAKE16: ; %bb.0: ; %entry
+; GFX11-GISEL-FAKE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
+; GFX11-GISEL-FAKE16-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-GISEL-FAKE16-NEXT: s_load_b32 s2, s[2:3], 0x0
+; GFX11-GISEL-FAKE16-NEXT: s_mov_b32 s3, 0x31016000
+; GFX11-GISEL-FAKE16-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-GISEL-FAKE16-NEXT: v_cvt_f16_f32_e32 v0, s2
+; GFX11-GISEL-FAKE16-NEXT: s_mov_b32 s2, -1
+; GFX11-GISEL-FAKE16-NEXT: buffer_store_b16 v0, off, s[0:3], 0
+; GFX11-GISEL-FAKE16-NEXT: s_endpgm
+ ptr addrspace(1) %a) {
+entry:
+ %a.val = load float, ptr addrspace(1) %a
+ %r.val = fptrunc afn float %a.val to half
+ store half %r.val, ptr addrspace(1) %r
+ ret void
+}
+
+define amdgpu_kernel void @fptrunc_f64_to_f16(
+; SI-SDAG-LABEL: fptrunc_f64_to_f16:
+; SI-SDAG: ; %bb.0: ; %entry
+; SI-SDAG-NEXT: s_load_dwordx4 s[4:7], s[4:5], 0x9
+; SI-SDAG-NEXT: s_mov_b32 s3, 0xf000
+; SI-SDAG-NEXT: s_mov_b32 s2, -1
+; SI-SDAG-NEXT: s_mov_b32 s10, s2
+; SI-SDAG-NEXT: s_mov_b32 s11, s3
+; SI-SDAG-NEXT: s_waitcnt lgkmcnt(0)
+; SI-SDAG-NEXT: s_mov_b32 s8, s6
+; SI-SDAG-NEXT: s_mov_b32 s9, s7
+; SI-SDAG-NEXT: buffer_load_dwordx2 v[0:1], off, s[8:11], 0
+; SI-SDAG-NEXT: s_movk_i32 s0, 0x7e00
+; SI-SDAG-NEXT: s_waitcnt vmcnt(0)
+; SI-SDAG-NEXT: v_readfirstlane_b32 s1, v1
+; SI-SDAG-NEXT: s_and_b32 s6, s1, 0x1ff
+; SI-SDAG-NEXT: s_lshr_b32 s7, s1, 8
+; SI-SDAG-NEXT: s_bfe_u32 s8, s1, 0xb0014
+; SI-SDAG-NEXT: v_or_b32_e32 v0, s6, v0
+; SI-SDAG-NEXT: s_and_b32 s6, s7, 0xffe
+; SI-SDAG-NEXT: s_sub_i32 s7, 0x3f1, s8
+; SI-SDAG-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0
+; SI-SDAG-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
+; SI-SDAG-NEXT: v_med3_i32 v1, s7, 0, 13
+; SI-SDAG-NEXT: v_readfirstlane_b32 s7, v0
+; SI-SDAG-NEXT: v_readfirstlane_b32 s9, v1
+; SI-SDAG-NEXT: s_or_b32 s6, s6, s7
+; SI-SDAG-NEXT: s_or_b32 s7, s6, 0x1000
+; SI-SDAG-NEXT: s_lshr_b32 s10, s7, s9
+; SI-SDAG-NEXT: s_lshl_b32 s9, s10, s9
+; SI-SDAG-NEXT: s_cmp_lg_u32 s9, s7
+; SI-SDAG-NEXT: s_cselect_b32 s7, 1, 0
+; SI-SDAG-NEXT: s_addk_i32 s8, 0xfc10
+; SI-SDAG-NEXT: s_or_b32 s7, s10, s7
+; SI-SDAG-NEXT: s_lshl_b32 s9, s8, 12
+; SI-SDAG-NEXT: s_or_b32 s9, s6, s9
+; SI-SDAG-NEXT: s_cmp_lt_i32 s8, 1
+; SI-SDAG-NEXT: s_cselect_b32 s7, s7, s9
+; SI-SDAG-NEXT: s_and_b32 s9, s7, 7
+; SI-SDAG-NEXT: s_cmp_gt_i32 s9, 5
+; SI-SDAG-NEXT: s_cselect_b32 s10, 1, 0
+; SI-SDAG-NEXT: s_cmp_eq_u32 s9, 3
+; SI-SDAG-NEXT: s_cselect_b32 s9, 1, 0
+; SI-SDAG-NEXT: s_lshr_b32 s7, s7, 2
+; SI-SDAG-NEXT: s_or_b32 s9, s9, s10
+; SI-SDAG-NEXT: s_add_i32 s7, s7, s9
+; SI-SDAG-NEXT: s_cmp_lt_i32 s8, 31
+; SI-SDAG-NEXT: s_cselect_b32 s7, s7, 0x7c00
+; SI-SDAG-NEXT: s_cmp_lg_u32 s6, 0
+; SI-SDAG-NEXT: s_cselect_b32 s0, s0, 0x7c00
+; SI-SDAG-NEXT: s_cmpk_eq_i32 s8, 0x40f
+; SI-SDAG-NEXT: s_cselect_b32 s0, s0, s7
+; SI-SDAG-NEXT: s_lshr_b32 s1, s1, 16
+; SI-SDAG-NEXT: s_and_b32 s1, s1, 0x8000
+; SI-SDAG-NEXT: s_or_b32 s6, s1, s0
+; SI-SDAG-NEXT: s_mov_b32 s0, s4
+; SI-SDAG-NEXT: s_mov_b32 s1, s5
+; SI-SDAG-NEXT: v_mov_b32_e32 v0, s6
+; SI-SDAG-NEXT: buffer_store_short v0, off, s[0:3], 0
+; SI-SDAG-NEXT: s_endpgm
+;
+; SI-GISEL-LABEL: fptrunc_f64_to_f16:
+; SI-GISEL: ; %bb.0: ; %entry
+; SI-GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
+; SI-GISEL-NEXT: s_waitcnt lgkmcnt(0)
+; SI-GISEL-NEXT: s_load_dwordx2 s[4:5], s[2:3], 0x0
+; SI-GISEL-NEXT: s_mov_b32 s2, -1
+; SI-GISEL-NEXT: s_waitcnt lgkmcnt(0)
+; SI-GISEL-NEXT: s_bfe_u32 s3, s5, 0xb0014
+; SI-GISEL-NEXT: s_lshr_b32 s6, s5, 8
+; SI-GISEL-NEXT: s_and_b32 s7, s5, 0x1ff
+; SI-GISEL-NEXT: s_addk_i32 s3, 0xfc10
+; SI-GISEL-NEXT: s_and_b32 s6, s6, 0xffe
+; SI-GISEL-NEXT: s_or_b32 s4, s7, s4
+; SI-GISEL-NEXT: s_cmp_lg_u32 s4, 0
+; SI-GISEL-NEXT: s_cselect_b32 s4, 1, 0
+; SI-GISEL-NEXT: s_or_b32 s4, s6, s4
+; SI-GISEL-NEXT: s_cmp_lg_u32 s4, 0
+; SI-GISEL-NEXT: s_cselect_b32 s6, 1, 0
+; SI-GISEL-NEXT: s_lshl_b32 s6, s6, 9
+; SI-GISEL-NEXT: s_lshl_b32 s7, s3, 12
+; SI-GISEL-NEXT: s_sub_i32 s8, 1, s3
+; SI-GISEL-NEXT: s_or_b32 s9, s4, 0x1000
+; SI-GISEL-NEXT: s_or_b32 s6, s6, 0x7c00
+; SI-GISEL-NEXT: s_or_b32 s4, s4, s7
+; SI-GISEL-NEXT: s_max_i32 s7, s8, 0
+; SI-GISEL-NEXT: s_min_i32 s7, s7, 13
+; SI-GISEL-NEXT: s_lshr_b32 s8, s9, s7
+; SI-GISEL-NEXT: s_lshl_b32 s7, s8, s7
+; SI-GISEL-NEXT: s_cmp_lg_u32 s7, s9
+; SI-GISEL-NEXT: s_cselect_b32 s7, 1, 0
+; SI-GISEL-NEXT: s_or_b32 s7, s8, s7
+; SI-GISEL-NEXT: s_cmp_lt_i32 s3, 1
+; SI-GISEL-NEXT: s_cselect_b32 s4, s7, s4
+; SI-GISEL-NEXT: s_and_b32 s7, s4, 7
+; SI-GISEL-NEXT: s_lshr_b32 s4, s4, 2
+; SI-GISEL-NEXT: s_cmp_eq_u32 s7, 3
+; SI-GISEL-NEXT: s_cselect_b32 s8, 1, 0
+; SI-GISEL-NEXT: s_cmp_gt_i32 s7, 5
+; SI-GISEL-NEXT: s_cselect_b32 s7, 1, 0
+; SI-GISEL-NEXT: s_or_b32 s7, s8, s7
+; SI-GISEL-NEXT: s_add_i32 s4, s4, s7
+; SI-GISEL-NEXT: s_cmp_gt_i32 s3, 30
+; SI-GISEL-NEXT: s_cselect_b32 s4, 0x7c00, s4
+; SI-GISEL-NEXT: s_cmpk_eq_i32 s3, 0x40f
+; SI-GISEL-NEXT: s_cselect_b32 s3, s6, s4
+; SI-GISEL-NEXT: s_lshr_b32 s4, s5, 16
+; SI-GISEL-NEXT: s_and_b32 s4, s4, 0x8000
+; SI-GISEL-NEXT: s_or_b32 s4, s4, s3
+; SI-GISEL-NEXT: s_mov_b32 s3, 0xf000
+; SI-GISEL-NEXT: v_mov_b32_e32 v0, s4
+; SI-GISEL-NEXT: buffer_store_short v0, off, s[0:3], 0
+; SI-GISEL-NEXT: s_endpgm
+;
+; VI-SDAG-LABEL: fptrunc_f64_to_f16:
+; VI-SDAG: ; %bb.0: ; %entry
+; VI-SDAG-NEXT: s_load_dwordx4 s[4:7], s[4:5], 0x24
+; VI-SDAG-NEXT: s_mov_b32 s3, 0xf000
+; VI-SDAG-NEXT: s_mov_b32 s2, -1
+; VI-SDAG-NEXT: s_mov_b32 s10, s2
+; VI-SDAG-NEXT: s_mov_b32 s11, s3
+; VI-SDAG-NEXT: s_waitcnt lgkmcnt(0)
+; VI-SDAG-NEXT: s_mov_b32 s8, s6
+; VI-SDAG-NEXT: s_mov_b32 s9, s7
+; VI-SDAG-NEXT: buffer_load_dwordx2 v[0:1], off, s[8:11], 0
+; VI-SDAG-NEXT: s_mov_b32 s0, s4
+; VI-SDAG-NEXT: s_mov_b32 s1, s5
+; VI-SDAG-NEXT: s_movk_i32 s6, 0x7e00
+; VI-SDAG-NEXT: s_waitcnt vmcnt(0)
+; VI-SDAG-NEXT: v_readfirstlane_b32 s4, v1
+; VI-SDAG-NEXT: s_and_b32 s5, s4, 0x1ff
+; VI-SDAG-NEXT: v_or_b32_e32 v0, s5, v0
+; VI-SDAG-NEXT: s_lshr_b32 s7, s4, 8
+; VI-SDAG-NEXT: s_bfe_u32 s8, s4, 0xb0014
+; VI-SDAG-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0
+; VI-SDAG-NEXT: s_and_b32 s5, s7, 0xffe
+; VI-SDAG-NEXT: s_sub_i32 s7, 0x3f1, s8
+; VI-SDAG-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
+; VI-SDAG-NEXT: v_med3_i32 v1, s7, 0, 13
+; VI-SDAG-NEXT: v_readfirstlane_b32 s7, v0
+; VI-SDAG-NEXT: s_or_b32 s5, s5, s7
+; VI-SDAG-NEXT: v_readfirstlane_b32 s9, v1
+; VI-SDAG-NEXT: s_or_b32 s7, s5, 0x1000
+; VI-SDAG-NEXT: s_lshr_b32 s10, s7, s9
+; VI-SDAG-NEXT: s_lshl_b32 s9, s10, s9
+; VI-SDAG-NEXT: s_cmp_lg_u32 s9, s7
+; VI-SDAG-NEXT: s_cselect_b32 s7, 1, 0
+; VI-SDAG-NEXT: s_addk_i32 s8, 0xfc10
+; VI-SDAG-NEXT: s_lshl_b32 s9, s8, 12
+; VI-SDAG-NEXT: s_or_b32 s7, s10, s7
+; VI-SDAG-NEXT: s_or_b32 s9, s5, s9
+; VI-SDAG-NEXT: s_cmp_lt_i32 s8, 1
+; VI-SDAG-NEXT: s_cselect_b32 s7, s7, s9
+; VI-SDAG-NEXT: s_and_b32 s9, s7, 7
+; VI-SDAG-NEXT: s_cmp_gt_i32 s9, 5
+; VI-SDAG-NEXT: s_cselect_b32 s10, 1, 0
+; VI-SDAG-NEXT: s_cmp_eq_u32 s9, 3
+; VI-SDAG-NEXT: s_cselect_b32 s9, 1, 0
+; VI-SDAG-NEXT: s_lshr_b32 s7, s7, 2
+; VI-SDAG-NEXT: s_or_b32 s9, s9, s10
+; VI-SDAG-NEXT: s_add_i32 s7, s7, s9
+; VI-SDAG-NEXT: s_cmp_lt_i32 s8, 31
+; VI-SDAG-NEXT: s_cselect_b32 s7, s7, 0x7c00
+; VI-SDAG-NEXT: s_cmp_lg_u32 s5, 0
+; VI-SDAG-NEXT: s_cselect_b32 s5, s6, 0x7c00
+; VI-SDAG-NEXT: s_cmpk_eq_i32 s8, 0x40f
+; VI-SDAG-NEXT: s_cselect_b32 s5, s5, s7
+; VI-SDAG-NEXT: s_lshr_b32 s4, s4, 16
+; VI-SDAG-NEXT: s_and_b32 s4, s4, 0x8000
+; VI-SDAG-NEXT: s_or_b32 s4, s4, s5
+; VI-SDAG-NEXT: v_mov_b32_e32 v0, s4
+; VI-SDAG-NEXT: buffer_store_short v0, off, s[0:3], 0
+; VI-SDAG-NEXT: s_endpgm
+;
+; VI-GISEL-LABEL: fptrunc_f64_to_f16:
+; VI-GISEL: ; %bb.0: ; %entry
+; VI-GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
+; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0)
+; VI-GISEL-NEXT: s_load_dwordx2 s[2:3], s[2:3], 0x0
+; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0)
+; VI-GISEL-NEXT: s_bfe_u32 s4, s3, 0xb0014
+; VI-GISEL-NEXT: s_lshr_b32 s5, s3, 8
+; VI-GISEL-NEXT: s_and_b32 s6, s3, 0x1ff
+; VI-GISEL-NEXT: s_addk_i32 s4, 0xfc10
+; VI-GISEL-NEXT: s_and_b32 s5, s5, 0xffe
+; VI-GISEL-NEXT: s_or_b32 s2, s6, s2
+; VI-GISEL-NEXT: s_cmp_lg_u32 s2, 0
+; VI-GISEL-NEXT: s_cselect_b32 s2, 1, 0
+; VI-GISEL-NEXT: s_or_b32 s2, s5, s2
+; VI-GISEL-NEXT: s_cmp_lg_u32 s2, 0
+; VI-GISEL-NEXT: s_cselect_b32 s5, 1, 0
+; VI-GISEL-NEXT: s_sub_i32 s7, 1, s4
+; VI-GISEL-NEXT: s_lshl_b32 s6, s4, 12
+; VI-GISEL-NEXT: s_max_i32 s7, s7, 0
+; VI-GISEL-NEXT: s_or_b32 s6, s2, s6
+; VI-GISEL-NEXT: s_min_i32 s7, s7, 13
+; VI-GISEL-NEXT: s_bitset1_b32 s2, 12
+; VI-GISEL-NEXT: s_lshl_b32 s5, s5, 9
+; VI-GISEL-NEXT: s_lshr_b32 s8, s2, s7
+; VI-GISEL-NEXT: s_or_b32 s5, s5, 0x7c00
+; VI-GISEL-NEXT: s_lshl_b32 s7, s8, s7
+; VI-GISEL-NEXT: s_cmp_lg_u32 s7, s2
+; VI-GISEL-NEXT: s_cselect_b32 s2, 1, 0
+; VI-GISEL-NEXT: s_or_b32 s2, s8, s2
+; VI-GISEL-NEXT: s_cmp_lt_i32 s4, 1
+; VI-GISEL-NEXT: s_cselect_b32 s2, s2, s6
+; VI-GISEL-NEXT: s_and_b32 s6, s2, 7
+; VI-GISEL-NEXT: s_lshr_b32 s2, s2, 2
+; VI-GISEL-NEXT: s_cmp_eq_u32 s6, 3
+; VI-GISEL-NEXT: s_cselect_b32 s7, 1, 0
+; VI-GISEL-NEXT: s_cmp_gt_i32 s6, 5
+; VI-GISEL-NEXT: s_cselect_b32 s6, 1, 0
+; VI-GISEL-NEXT: s_or_b32 s6, s7, s6
+; VI-GISEL-NEXT: s_add_i32 s2, s2, s6
+; VI-GISEL-NEXT: s_cmp_gt_i32 s4, 30
+; VI-GISEL-NEXT: s_cselect_b32 s2, 0x7c00, s2
+; VI-GISEL-NEXT: s_cmpk_eq_i32 s4, 0x40f
+; VI-GISEL-NEXT: s_cselect_b32 s2, s5, s2
+; VI-GISEL-NEXT: s_lshr_b32 s3, s3, 16
+; VI-GISEL-NEXT: s_and_b32 s3, s3, 0x8000
+; VI-GISEL-NEXT: s_or_b32 s2, s3, s2
+; VI-GISEL-NEXT: v_mov_b32_e32 v0, s2
+; VI-GISEL-NEXT: s_mov_b32 s2, -1
+; VI-GISEL-NEXT: s_mov_b32 s3, 0xf000
+; VI-GISEL-NEXT: buffer_store_short v0, off, s[0:3], 0
+; VI-GISEL-NEXT: s_endpgm
+;
+; GFX9-SDAG-LABEL: fptrunc_f64_to_f16:
+; GFX9-SDAG: ; %bb.0: ; %entry
+; GFX9-SDAG-NEXT: s_load_dwordx4 s[8:11], s[4:5], 0x24
+; GFX9-SDAG-NEXT: s_mov_b32 s3, 0xf000
+; GFX9-SDAG-NEXT: s_mov_b32 s2, -1
+; GFX9-SDAG-NEXT: s_mov_b32 s6, s2
+; GFX9-SDAG-NEXT: s_mov_b32 s7, s3
+; GFX9-SDAG-NEXT: s_waitcnt lgkmcnt(0)
+; GFX9-SDAG-NEXT: s_mov_b32 s4, s10
+; GFX9-SDAG-NEXT: s_mov_b32 s5, s11
+; GFX9-SDAG-NEXT: buffer_load_dwordx2 v[0:1], off, s[4:7], 0
+; GFX9-SDAG-NEXT: s_mov_b32 s0, s8
+; GFX9-SDAG-NEXT: s_mov_b32 s1, s9
+; GFX9-SDAG-NEXT: s_movk_i32 s4, 0x7e00
+; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX9-SDAG-NEXT: v_readfirstlane_b32 s5, v1
+; GFX9-SDAG-NEXT: s_and_b32 s6, s5, 0x1ff
+; GFX9-SDAG-NEXT: v_or_b32_e32 v0, s6, v0
+; GFX9-SDAG-NEXT: s_lshr_b32 s7, s5, 8
+; GFX9-SDAG-NEXT: s_bfe_u32 s8, s5, 0xb0014
+; GFX9-SDAG-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0
+; GFX9-SDAG-NEXT: s_and_b32 s6, s7, 0xffe
+; GFX9-SDAG-NEXT: s_sub_i32 s7, 0x3f1, s8
+; GFX9-SDAG-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
+; GFX9-SDAG-NEXT: v_med3_i32 v1, s7, 0, 13
+; GFX9-SDAG-NEXT: v_readfirstlane_b32 s7, v0
+; GFX9-SDAG-NEXT: s_or_b32 s6, s6, s7
+; GFX9-SDAG-NEXT: v_readfirstlane_b32 s9, v1
+; GFX9-SDAG-NEXT: s_or_b32 s7, s6, 0x1000
+; GFX9-SDAG-NEXT: s_lshr_b32 s10, s7, s9
+; GFX9-SDAG-NEXT: s_lshl_b32 s9, s10, s9
+; GFX9-SDAG-NEXT: s_cmp_lg_u32 s9, s7
+; GFX9-SDAG-NEXT: s_cselect_b32 s7, 1, 0
+; GFX9-SDAG-NEXT: s_addk_i32 s8, 0xfc10
+; GFX9-SDAG-NEXT: s_lshl_b32 s9, s8, 12
+; GFX9-SDAG-NEXT: s_or_b32 s7, s10, s7
+; GFX9-SDAG-NEXT: s_or_b32 s9, s6, s9
+; GFX9-SDAG-NEXT: s_cmp_lt_i32 s8, 1
+; GFX9-SDAG-NEXT: s_cselect_b32 s7, s7, s9
+; GFX9-SDAG-NEXT: s_and_b32 s9, s7, 7
+; GFX9-SDAG-NEXT: s_cmp_gt_i32 s9, 5
+; GFX9-SDAG-NEXT: s_cselect_b32 s10, 1, 0
+; GFX9-SDAG-NEXT: s_cmp_eq_u32 s9, 3
+; GFX9-SDAG-NEXT: s_cselect_b32 s9, 1, 0
+; GFX9-SDAG-NEXT: s_lshr_b32 s7, s7, 2
+; GFX9-SDAG-NEXT: s_or_b32 s9, s9, s10
+; GFX9-SDAG-NEXT: s_add_i32 s7, s7, s9
+; GFX9-SDAG-NEXT: s_cmp_lt_i32 s8, 31
+; GFX9-SDAG-NEXT: s_cselect_b32 s7, s7, 0x7c00
+; GFX9-SDAG-NEXT: s_cmp_lg_u32 s6, 0
+; GFX9-SDAG-NEXT: s_cselect_b32 s4, s4, 0x7c00
+; GFX9-SDAG-NEXT: s_cmpk_eq_i32 s8, 0x40f
+; GFX9-SDAG-NEXT: s_cselect_b32 s4, s4, s7
+; GFX9-SDAG-NEXT: s_lshr_b32 s5, s5, 16
+; GFX9-SDAG-NEXT: s_and_b32 s5, s5, 0x8000
+; GFX9-SDAG-NEXT: s_or_b32 s4, s5, s4
+; GFX9-SDAG-NEXT: v_mov_b32_e32 v0, s4
+; GFX9-SDAG-NEXT: buffer_store_short v0, off, s[0:3], 0
+; GFX9-SDAG-NEXT: s_endpgm
+;
+; GFX9-GISEL-LABEL: fptrunc_f64_to_f16:
+; GFX9-GISEL: ; %bb.0: ; %entry
+; GFX9-GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
+; GFX9-GISEL-NEXT: s_waitcnt lgkmcnt(0)
+; GFX9-GISEL-NEXT: s_load_dwordx2 s[2:3], s[2:3], 0x0
+; GFX9-GISEL-NEXT: s_waitcnt lgkmcnt(0)
+; GFX9-GISEL-NEXT: s_bfe_u32 s4, s3, 0xb0014
+; GFX9-GISEL-NEXT: s_lshr_b32 s5, s3, 8
+; GFX9-GISEL-NEXT: s_and_b32 s6, s3, 0x1ff
+; GFX9-GISEL-NEXT: s_addk_i32 s4, 0xfc10
+; GFX9-GISEL-NEXT: s_and_b32 s5, s5, 0xffe
+; GFX9-GISEL-NEXT: s_or_b32 s2, s6, s2
+; GFX9-GISEL-NEXT: s_cmp_lg_u32 s2, 0
+; GFX9-GISEL-NEXT: s_cselect_b32 s2, 1, 0
+; GFX9-GISEL-NEXT: s_or_b32 s2, s5, s2
+; GFX9-GISEL-NEXT: s_cmp_lg_u32 s2, 0
+; GFX9-GISEL-NEXT: s_cselect_b32 s5, 1, 0
+; GFX9-GISEL-NEXT: s_sub_i32 s7, 1, s4
+; GFX9-GISEL-NEXT: s_lshl_b32 s6, s4, 12
+; GFX9-GISEL-NEXT: s_max_i32 s7, s7, 0
+; GFX9-GISEL-NEXT: s_or_b32 s6, s2, s6
+; GFX9-GISEL-NEXT: s_min_i32 s7, s7, 13
+; GFX9-GISEL-NEXT: s_bitset1_b32 s2, 12
+; GFX9-GISEL-NEXT: s_lshl_b32 s5, s5, 9
+; GFX9-GISEL-NEXT: s_lshr_b32 s8, s2, s7
+; GFX9-GISEL-NEXT: s_or_b32 s5, s5, 0x7c00
+; GFX9-GISEL-NEXT: s_lshl_b32 s7, s8, s7
+; GFX9-GISEL-NEXT: s_cmp_lg_u32 s7, s2
+; GFX9-GISEL-NEXT: s_cselect_b32 s2, 1, 0
+; GFX9-GISEL-NEXT: s_or_b32 s2, s8, s2
+; GFX9-GISEL-NEXT: s_cmp_lt_i32 s4, 1
+; GFX9-GISEL-NEXT: s_cselect_b32 s2, s2, s6
+; GFX9-GISEL-NEXT: s_and_b32 s6, s2, 7
+; GFX9-GISEL-NEXT: s_lshr_b32 s2, s2, 2
+; GFX9-GISEL-NEXT: s_cmp_eq_u32 s6, 3
+; GFX9-GISEL-NEXT: s_cselect_b32 s7, 1, 0
+; GFX9-GISEL-NEXT: s_cmp_gt_i32 s6, 5
+; GFX9-GISEL-NEXT: s_cselect_b32 s6, 1, 0
+; GFX9-GISEL-NEXT: s_or_b32 s6, s7, s6
+; GFX9-GISEL-NEXT: s_add_i32 s2, s2, s6
+; GFX9-GISEL-NEXT: s_cmp_gt_i32 s4, 30
+; GFX9-GISEL-NEXT: s_cselect_b32 s2, 0x7c00, s2
+; GFX9-GISEL-NEXT: s_cmpk_eq_i32 s4, 0x40f
+; GFX9-GISEL-NEXT: s_cselect_b32 s2, s5, s2
+; GFX9-GISEL-NEXT: s_lshr_b32 s3, s3, 16
+; GFX9-GISEL-NEXT: s_and_b32 s3, s3, 0x8000
+; GFX9-GISEL-NEXT: s_or_b32 s2, s3, s2
+; GFX9-GISEL-NEXT: v_mov_b32_e32 v0, s2
+; GFX9-GISEL-NEXT: s_mov_b32 s2, -1
+; GFX9-GISEL-NEXT: s_mov_b32 s3, 0xf000
+; GFX9-GISEL-NEXT: buffer_store_short v0, off, s[0:3], 0
+; GFX9-GISEL-NEXT: s_endpgm
+;
+; GFX950-SDAG-LABEL: fptrunc_f64_to_f16:
+; GFX950-SDAG: ; %bb.0: ; %entry
+; GFX950-SDAG-NEXT: s_load_dwordx4 s[8:11], s[4:5], 0x24
+; GFX950-SDAG-NEXT: s_mov_b32 s3, 0xf000
+; GFX950-SDAG-NEXT: s_mov_b32 s2, -1
+; GFX950-SDAG-NEXT: s_mov_b32 s6, s2
+; GFX950-SDAG-NEXT: s_mov_b32 s7, s3
+; GFX950-SDAG-NEXT: s_waitcnt lgkmcnt(0)
+; GFX950-SDAG-NEXT: s_mov_b32 s4, s10
+; GFX950-SDAG-NEXT: s_mov_b32 s5, s11
+; GFX950-SDAG-NEXT: buffer_load_dwordx2 v[0:1], off, s[4:7], 0
+; GFX950-SDAG-NEXT: s_mov_b32 s0, s8
+; GFX950-SDAG-NEXT: s_mov_b32 s1, s9
+; GFX950-SDAG-NEXT: s_movk_i32 s4, 0x7e00
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX950-SDAG-NEXT: v_readfirstlane_b32 s5, v1
+; GFX950-SDAG-NEXT: s_and_b32 s6, s5, 0x1ff
+; GFX950-SDAG-NEXT: v_or_b32_e32 v0, s6, v0
+; GFX950-SDAG-NEXT: s_lshr_b32 s7, s5, 8
+; GFX950-SDAG-NEXT: s_bfe_u32 s8, s5, 0xb0014
+; GFX950-SDAG-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0
+; GFX950-SDAG-NEXT: s_and_b32 s6, s7, 0xffe
+; GFX950-SDAG-NEXT: s_sub_i32 s7, 0x3f1, s8
+; GFX950-SDAG-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
+; GFX950-SDAG-NEXT: v_med3_i32 v1, s7, 0, 13
+; GFX950-SDAG-NEXT: v_readfirstlane_b32 s7, v0
+; GFX950-SDAG-NEXT: s_or_b32 s6, s6, s7
+; GFX950-SDAG-NEXT: v_readfirstlane_b32 s9, v1
+; GFX950-SDAG-NEXT: s_or_b32 s7, s6, 0x1000
+; GFX950-SDAG-NEXT: s_lshr_b32 s10, s7, s9
+; GFX950-SDAG-NEXT: s_lshl_b32 s9, s10, s9
+; GFX950-SDAG-NEXT: s_cmp_lg_u32 s9, s7
+; GFX950-SDAG-NEXT: s_cselect_b32 s7, 1, 0
+; GFX950-SDAG-NEXT: s_addk_i32 s8, 0xfc10
+; GFX950-SDAG-NEXT: s_lshl_b32 s9, s8, 12
+; GFX950-SDAG-NEXT: s_or_b32 s7, s10, s7
+; GFX950-SDAG-NEXT: s_or_b32 s9, s6, s9
+; GFX950-SDAG-NEXT: s_cmp_lt_i32 s8, 1
+; GFX950-SDAG-NEXT: s_cselect_b32 s7, s7, s9
+; GFX950-SDAG-NEXT: s_and_b32 s9, s7, 7
+; GFX950-SDAG-NEXT: s_cmp_gt_i32 s9, 5
+; GFX950-SDAG-NEXT: s_cselect_b32 s10, 1, 0
+; GFX950-SDAG-NEXT: s_cmp_eq_u32 s9, 3
+; GFX950-SDAG-NEXT: s_cselect_b32 s9, 1, 0
+; GFX950-SDAG-NEXT: s_lshr_b32 s7, s7, 2
+; GFX950-SDAG-NEXT: s_or_b32 s9, s9, s10
+; GFX950-SDAG-NEXT: s_add_i32 s7, s7, s9
+; GFX950-SDAG-NEXT: s_cmp_lt_i32 s8, 31
+; GFX950-SDAG-NEXT: s_cselect_b32 s7, s7, 0x7c00
+; GFX950-SDAG-NEXT: s_cmp_lg_u32 s6, 0
+; GFX950-SDAG-NEXT: s_cselect_b32 s4, s4, 0x7c00
+; GFX950-SDAG-NEXT: s_cmpk_eq_i32 s8, 0x40f
+; GFX950-SDAG-NEXT: s_cselect_b32 s4, s4, s7
+; GFX950-SDAG-NEXT: s_lshr_b32 s5, s5, 16
+; GFX950-SDAG-NEXT: s_and_b32 s5, s5, 0x8000
+; GFX950-SDAG-NEXT: s_or_b32 s4, s5, s4
+; GFX950-SDAG-NEXT: v_mov_b32_e32 v0, s4
+; GFX950-SDAG-NEXT: buffer_store_short v0, off, s[0:3], 0
+; GFX950-SDAG-NEXT: s_endpgm
+;
; GFX950-GISEL-LABEL: fptrunc_f64_to_f16:
; GFX950-GISEL: ; %bb.0: ; %entry
; GFX950-GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
; GFX950-GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GFX950-GISEL-NEXT: s_load_dwordx2 s[2:3], s[2:3], 0x0
; GFX950-GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX950-GISEL-NEXT: v_cvt_f32_f64_e32 v0, s[2:3]
-; GFX950-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0
+; GFX950-GISEL-NEXT: s_bfe_u32 s4, s3, 0xb0014
+; GFX950-GISEL-NEXT: s_lshr_b32 s5, s3, 8
+; GFX950-GISEL-NEXT: s_and_b32 s6, s3, 0x1ff
+; GFX950-GISEL-NEXT: s_addk_i32 s4, 0xfc10
+; GFX950-GISEL-NEXT: s_and_b32 s5, s5, 0xffe
+; GFX950-GISEL-NEXT: s_or_b32 s2, s6, s2
+; GFX950-GISEL-NEXT: s_cmp_lg_u32 s2, 0
+; GFX950-GISEL-NEXT: s_cselect_b32 s2, 1, 0
+; GFX950-GISEL-NEXT: s_or_b32 s2, s5, s2
+; GFX950-GISEL-NEXT: s_cmp_lg_u32 s2, 0
+; GFX950-GISEL-NEXT: s_cselect_b32 s5, 1, 0
+; GFX950-GISEL-NEXT: s_sub_i32 s7, 1, s4
+; GFX950-GISEL-NEXT: s_lshl_b32 s6, s4, 12
+; GFX950-GISEL-NEXT: s_max_i32 s7, s7, 0
+; GFX950-GISEL-NEXT: s_or_b32 s6, s2, s6
+; GFX950-GISEL-NEXT: s_min_i32 s7, s7, 13
+; GFX950-GISEL-NEXT: s_bitset1_b32 s2, 12
+; GFX950-GISEL-NEXT: s_lshl_b32 s5, s5, 9
+; GFX950-GISEL-NEXT: s_lshr_b32 s8, s2, s7
+; GFX950-GISEL-NEXT: s_or_b32 s5, s5, 0x7c00
+; GFX950-GISEL-NEXT: s_lshl_b32 s7, s8, s7
+; GFX950-GISEL-NEXT: s_cmp_lg_u32 s7, s2
+; GFX950-GISEL-NEXT: s_cselect_b32 s2, 1, 0
+; GFX950-GISEL-NEXT: s_or_b32 s2, s8, s2
+; GFX950-GISEL-NEXT: s_cmp_lt_i32 s4, 1
+; GFX950-GISEL-NEXT: s_cselect_b32 s2, s2, s6
+; GFX950-GISEL-NEXT: s_and_b32 s6, s2, 7
+; GFX950-GISEL-NEXT: s_lshr_b32 s2, s2, 2
+; GFX950-GISEL-NEXT: s_cmp_eq_u32 s6, 3
+; GFX950-GISEL-NEXT: s_cselect_b32 s7, 1, 0
+; GFX950-GISEL-NEXT: s_cmp_gt_i32 s6, 5
+; GFX950-GISEL-NEXT: s_cselect_b32 s6, 1, 0
+; GFX950-GISEL-NEXT: s_or_b32 s6, s7, s6
+; GFX950-GISEL-NEXT: s_add_i32 s2, s2, s6
+; GFX950-GISEL-NEXT: s_cmp_gt_i32 s4, 30
+; GFX950-GISEL-NEXT: s_cselect_b32 s2, 0x7c00, s2
+; GFX950-GISEL-NEXT: s_cmpk_eq_i32 s4, 0x40f
+; GFX950-GISEL-NEXT: s_cselect_b32 s2, s5, s2
+; GFX950-GISEL-NEXT: s_lshr_b32 s3, s3, 16
+; GFX950-GISEL-NEXT: s_and_b32 s3, s3, 0x8000
+; GFX950-GISEL-NEXT: s_or_b32 s2, s3, s2
+; GFX950-GISEL-NEXT: v_mov_b32_e32 v0, s2
; GFX950-GISEL-NEXT: s_mov_b32 s2, -1
; GFX950-GISEL-NEXT: s_mov_b32 s3, 0xf000
; GFX950-GISEL-NEXT: buffer_store_short v0, off, s[0:3], 0
@@ -340,13 +852,60 @@ define amdgpu_kernel void @fptrunc_f64_to_f16(
; GFX11-SDAG-TRUE16-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-SDAG-TRUE16-NEXT: s_mov_b32 s8, s2
; GFX11-SDAG-TRUE16-NEXT: s_mov_b32 s9, s3
-; GFX11-SDAG-TRUE16-NEXT: s_mov_b32 s4, s0
; GFX11-SDAG-TRUE16-NEXT: buffer_load_b64 v[0:1], off, s[8:11], 0
-; GFX11-SDAG-TRUE16-NEXT: s_mov_b32 s5, s1
; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0)
-; GFX11-SDAG-TRUE16-NEXT: v_cvt_f32_f64_e32 v0, v[0:1]
-; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-SDAG-TRUE16-NEXT: v_cvt_f16_f32_e32 v0.l, v0
+; GFX11-SDAG-TRUE16-NEXT: v_readfirstlane_b32 s2, v1
+; GFX11-SDAG-TRUE16-NEXT: s_and_b32 s3, s2, 0x1ff
+; GFX11-SDAG-TRUE16-NEXT: s_lshr_b32 s5, s2, 8
+; GFX11-SDAG-TRUE16-NEXT: v_or_b32_e32 v0, s3, v0
+; GFX11-SDAG-TRUE16-NEXT: s_bfe_u32 s3, s2, 0xb0014
+; GFX11-SDAG-TRUE16-NEXT: s_and_b32 s5, s5, 0xffe
+; GFX11-SDAG-TRUE16-NEXT: s_sub_i32 s4, 0x3f1, s3
+; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_2)
+; GFX11-SDAG-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0
+; GFX11-SDAG-TRUE16-NEXT: v_med3_i32 v1, s4, 0, 13
+; GFX11-SDAG-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
+; GFX11-SDAG-TRUE16-NEXT: v_readfirstlane_b32 s8, v1
+; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
+; GFX11-SDAG-TRUE16-NEXT: v_readfirstlane_b32 s4, v0
+; GFX11-SDAG-TRUE16-NEXT: s_or_b32 s4, s5, s4
+; GFX11-SDAG-TRUE16-NEXT: s_or_b32 s5, s4, 0x1000
+; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
+; GFX11-SDAG-TRUE16-NEXT: s_lshr_b32 s9, s5, s8
+; GFX11-SDAG-TRUE16-NEXT: s_lshl_b32 s8, s9, s8
+; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_4) | instid1(SALU_CYCLE_1)
+; GFX11-SDAG-TRUE16-NEXT: s_cmp_lg_u32 s8, s5
+; GFX11-SDAG-TRUE16-NEXT: s_cselect_b32 s5, 1, 0
+; GFX11-SDAG-TRUE16-NEXT: s_addk_i32 s3, 0xfc10
+; GFX11-SDAG-TRUE16-NEXT: s_or_b32 s5, s9, s5
+; GFX11-SDAG-TRUE16-NEXT: s_lshl_b32 s8, s3, 12
+; GFX11-SDAG-TRUE16-NEXT: s_or_b32 s8, s4, s8
+; GFX11-SDAG-TRUE16-NEXT: s_cmp_lt_i32 s3, 1
+; GFX11-SDAG-TRUE16-NEXT: s_cselect_b32 s5, s5, s8
+; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
+; GFX11-SDAG-TRUE16-NEXT: s_and_b32 s8, s5, 7
+; GFX11-SDAG-TRUE16-NEXT: s_cmp_gt_i32 s8, 5
+; GFX11-SDAG-TRUE16-NEXT: s_cselect_b32 s9, 1, 0
+; GFX11-SDAG-TRUE16-NEXT: s_cmp_eq_u32 s8, 3
+; GFX11-SDAG-TRUE16-NEXT: s_cselect_b32 s8, 1, 0
+; GFX11-SDAG-TRUE16-NEXT: s_lshr_b32 s5, s5, 2
+; GFX11-SDAG-TRUE16-NEXT: s_or_b32 s8, s8, s9
+; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
+; GFX11-SDAG-TRUE16-NEXT: s_add_i32 s5, s5, s8
+; GFX11-SDAG-TRUE16-NEXT: s_cmp_lt_i32 s3, 31
+; GFX11-SDAG-TRUE16-NEXT: s_movk_i32 s8, 0x7e00
+; GFX11-SDAG-TRUE16-NEXT: s_cselect_b32 s5, s5, 0x7c00
+; GFX11-SDAG-TRUE16-NEXT: s_cmp_lg_u32 s4, 0
+; GFX11-SDAG-TRUE16-NEXT: s_cselect_b32 s4, s8, 0x7c00
+; GFX11-SDAG-TRUE16-NEXT: s_cmpk_eq_i32 s3, 0x40f
+; GFX11-SDAG-TRUE16-NEXT: s_cselect_b32 s3, s4, s5
+; GFX11-SDAG-TRUE16-NEXT: s_lshr_b32 s2, s2, 16
+; GFX11-SDAG-TRUE16-NEXT: s_mov_b32 s4, s0
+; GFX11-SDAG-TRUE16-NEXT: s_and_b32 s2, s2, 0x8000
+; GFX11-SDAG-TRUE16-NEXT: s_mov_b32 s5, s1
+; GFX11-SDAG-TRUE16-NEXT: s_or_b32 s2, s2, s3
+; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
+; GFX11-SDAG-TRUE16-NEXT: v_mov_b32_e32 v0, s2
; GFX11-SDAG-TRUE16-NEXT: buffer_store_b16 v0, off, s[4:7], 0
; GFX11-SDAG-TRUE16-NEXT: s_endpgm
;
@@ -360,13 +919,60 @@ define amdgpu_kernel void @fptrunc_f64_to_f16(
; GFX11-SDAG-FAKE16-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-SDAG-FAKE16-NEXT: s_mov_b32 s8, s2
; GFX11-SDAG-FAKE16-NEXT: s_mov_b32 s9, s3
-; GFX11-SDAG-FAKE16-NEXT: s_mov_b32 s4, s0
; GFX11-SDAG-FAKE16-NEXT: buffer_load_b64 v[0:1], off, s[8:11], 0
-; GFX11-SDAG-FAKE16-NEXT: s_mov_b32 s5, s1
; GFX11-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0)
-; GFX11-SDAG-FAKE16-NEXT: v_cvt_f32_f64_e32 v0, v[0:1]
-; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-SDAG-FAKE16-NEXT: v_cvt_f16_f32_e32 v0, v0
+; GFX11-SDAG-FAKE16-NEXT: v_readfirstlane_b32 s2, v1
+; GFX11-SDAG-FAKE16-NEXT: s_and_b32 s3, s2, 0x1ff
+; GFX11-SDAG-FAKE16-NEXT: s_lshr_b32 s5, s2, 8
+; GFX11-SDAG-FAKE16-NEXT: v_or_b32_e32 v0, s3, v0
+; GFX11-SDAG-FAKE16-NEXT: s_bfe_u32 s3, s2, 0xb0014
+; GFX11-SDAG-FAKE16-NEXT: s_and_b32 s5, s5, 0xffe
+; GFX11-SDAG-FAKE16-NEXT: s_sub_i32 s4, 0x3f1, s3
+; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_2)
+; GFX11-SDAG-FAKE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0
+; GFX11-SDAG-FAKE16-NEXT: v_med3_i32 v1, s4, 0, 13
+; GFX11-SDAG-FAKE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
+; GFX11-SDAG-FAKE16-NEXT: v_readfirstlane_b32 s8, v1
+; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
+; GFX11-SDAG-FAKE16-NEXT: v_readfirstlane_b32 s4, v0
+; GFX11-SDAG-FAKE16-NEXT: s_or_b32 s4, s5, s4
+; GFX11-SDAG-FAKE16-NEXT: s_or_b32 s5, s4, 0x1000
+; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
+; GFX11-SDAG-FAKE16-NEXT: s_lshr_b32 s9, s5, s8
+; GFX11-SDAG-FAKE16-NEXT: s_lshl_b32 s8, s9, s8
+; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_4) | instid1(SALU_CYCLE_1)
+; GFX11-SDAG-FAKE16-NEXT: s_cmp_lg_u32 s8, s5
+; GFX11-SDAG-FAKE16-NEXT: s_cselect_b32 s5, 1, 0
+; GFX11-SDAG-FAKE16-NEXT: s_addk_i32 s3, 0xfc10
+; GFX11-SDAG-FAKE16-NEXT: s_or_b32 s5, s9, s5
+; GFX11-SDAG-FAKE16-NEXT: s_lshl_b32 s8, s3, 12
+; GFX11-SDAG-FAKE16-NEXT: s_or_b32 s8, s4, s8
+; GFX11-SDAG-FAKE16-NEXT: s_cmp_lt_i32 s3, 1
+; GFX11-SDAG-FAKE16-NEXT: s_cselect_b32 s5, s5, s8
+; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
+; GFX11-SDAG-FAKE16-NEXT: s_and_b32 s8, s5, 7
+; GFX11-SDAG-FAKE16-NEXT: s_cmp_gt_i32 s8, 5
+; GFX11-SDAG-FAKE16-NEXT: s_cselect_b32 s9, 1, 0
+; GFX11-SDAG-FAKE16-NEXT: s_cmp_eq_u32 s8, 3
+; GFX11-SDAG-FAKE16-NEXT: s_cselect_b32 s8, 1, 0
+; GFX11-SDAG-FAKE16-NEXT: s_lshr_b32 s5, s5, 2
+; GFX11-SDAG-FAKE16-NEXT: s_or_b32 s8, s8, s9
+; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
+; GFX11-SDAG-FAKE16-NEXT: s_add_i32 s5, s5, s8
+; GFX11-SDAG-FAKE16-NEXT: s_cmp_lt_i32 s3, 31
+; GFX11-SDAG-FAKE16-NEXT: s_movk_i32 s8, 0x7e00
+; GFX11-SDAG-FAKE16-NEXT: s_cselect_b32 s5, s5, 0x7c00
+; GFX11-SDAG-FAKE16-NEXT: s_cmp_lg_u32 s4, 0
+; GFX11-SDAG-FAKE16-NEXT: s_cselect_b32 s4, s8, 0x7c00
+; GFX11-SDAG-FAKE16-NEXT: s_cmpk_eq_i32 s3, 0x40f
+; GFX11-SDAG-FAKE16-NEXT: s_cselect_b32 s3, s4, s5
+; GFX11-SDAG-FAKE16-NEXT: s_lshr_b32 s2, s2, 16
+; GFX11-SDAG-FAKE16-NEXT: s_mov_b32 s4, s0
+; GFX11-SDAG-FAKE16-NEXT: s_and_b32 s2, s2, 0x8000
+; GFX11-SDAG-FAKE16-NEXT: s_mov_b32 s5, s1
+; GFX11-SDAG-FAKE16-NEXT: s_or_b32 s2, s2, s3
+; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
+; GFX11-SDAG-FAKE16-NEXT: v_mov_b32_e32 v0, s2
; GFX11-SDAG-FAKE16-NEXT: buffer_store_b16 v0, off, s[4:7], 0
; GFX11-SDAG-FAKE16-NEXT: s_endpgm
;
@@ -376,6 +982,555 @@ define amdgpu_kernel void @fptrunc_f64_to_f16(
; GFX11-GISEL-TRUE16-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-GISEL-TRUE16-NEXT: s_load_b64 s[2:3], s[2:3], 0x0
; GFX11-GISEL-TRUE16-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-GISEL-TRUE16-NEXT: s_and_b32 s6, s3, 0x1ff
+; GFX11-GISEL-TRUE16-NEXT: s_bfe_u32 s4, s3, 0xb0014
+; GFX11-GISEL-TRUE16-NEXT: s_lshr_b32 s5, s3, 8
+; GFX11-GISEL-TRUE16-NEXT: s_or_b32 s2, s6, s2
+; GFX11-GISEL-TRUE16-NEXT: s_addk_i32 s4, 0xfc10
+; GFX11-GISEL-TRUE16-NEXT: s_and_b32 s5, s5, 0xffe
+; GFX11-GISEL-TRUE16-NEXT: s_cmp_lg_u32 s2, 0
+; GFX11-GISEL-TRUE16-NEXT: s_cselect_b32 s2, 1, 0
+; GFX11-GISEL-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
+; GFX11-GISEL-TRUE16-NEXT: s_or_b32 s2, s5, s2
+; GFX11-GISEL-TRUE16-NEXT: s_cmp_lg_u32 s2, 0
+; GFX11-GISEL-TRUE16-NEXT: s_cselect_b32 s5, 1, 0
+; GFX11-GISEL-TRUE16-NEXT: s_sub_i32 s6, 1, s4
+; GFX11-GISEL-TRUE16-NEXT: s_or_b32 s8, s2, 0x1000
+; GFX11-GISEL-TRUE16-NEXT: s_max_i32 s6, s6, 0
+; GFX11-GISEL-TRUE16-NEXT: s_lshl_b32 s7, s4, 12
+; GFX11-GISEL-TRUE16-NEXT: s_min_i32 s6, s6, 13
+; GFX11-GISEL-TRUE16-NEXT: s_lshl_b32 s5, s5, 9
+; GFX11-GISEL-TRUE16-NEXT: s_lshr_b32 s9, s8, s6
+; GFX11-GISEL-TRUE16-NEXT: s_or_b32 s2, s2, s7
+; GFX11-GISEL-TRUE16-NEXT: s_lshl_b32 s6, s9, s6
+; GFX11-GISEL-TRUE16-NEXT: s_or_b32 s5, s5, 0x7c00
+; GFX11-GISEL-TRUE16-NEXT: s_cmp_lg_u32 s6, s8
+; GFX11-GISEL-TRUE16-NEXT: s_cselect_b32 s6, 1, 0
+; GFX11-GISEL-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_2) | instid1(SALU_CYCLE_1)
+; GFX11-GISEL-TRUE16-NEXT: s_or_b32 s6, s9, s6
+; GFX11-GISEL-TRUE16-NEXT: s_cmp_lt_i32 s4, 1
+; GFX11-GISEL-TRUE16-NEXT: s_cselect_b32 s2, s6, s2
+; GFX11-GISEL-TRUE16-NEXT: s_and_b32 s6, s2, 7
+; GFX11-GISEL-TRUE16-NEXT: s_lshr_b32 s2, s2, 2
+; GFX11-GISEL-TRUE16-NEXT: s_cmp_eq_u32 s6, 3
+; GFX11-GISEL-TRUE16-NEXT: s_cselect_b32 s7, 1, 0
+; GFX11-GISEL-TRUE16-NEXT: s_cmp_gt_i32 s6, 5
+; GFX11-GISEL-TRUE16-NEXT: s_cselect_b32 s6, 1, 0
+; GFX11-GISEL-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
+; GFX11-GISEL-TRUE16-NEXT: s_or_b32 s6, s7, s6
+; GFX11-GISEL-TRUE16-NEXT: s_add_i32 s2, s2, s6
+; GFX11-GISEL-TRUE16-NEXT: s_cmp_gt_i32 s4, 30
+; GFX11-GISEL-TRUE16-NEXT: s_cselect_b32 s2, 0x7c00, s2
+; GFX11-GISEL-TRUE16-NEXT: s_cmpk_eq_i32 s4, 0x40f
+; GFX11-GISEL-TRUE16-NEXT: s_cselect_b32 s2, s5, s2
+; GFX11-GISEL-TRUE16-NEXT: s_lshr_b32 s3, s3, 16
+; GFX11-GISEL-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
+; GFX11-GISEL-TRUE16-NEXT: s_and_b32 s3, s3, 0x8000
+; GFX11-GISEL-TRUE16-NEXT: s_or_b32 s2, s3, s2
+; GFX11-GISEL-TRUE16-NEXT: s_mov_b32 s3, 0x31016000
+; GFX11-GISEL-TRUE16-NEXT: v_mov_b32_e32 v0, s2
+; GFX11-GISEL-TRUE16-NEXT: s_mov_b32 s2, -1
+; GFX11-GISEL-TRUE16-NEXT: buffer_store_b16 v0, off, s[0:3], 0
+; GFX11-GISEL-TRUE16-NEXT: s_endpgm
+;
+; GFX11-GISEL-FAKE16-LABEL: fptrunc_f64_to_f16:
+; GFX11-GISEL-FAKE16: ; %bb.0: ; %entry
+; GFX11-GISEL-FAKE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
+; GFX11-GISEL-FAKE16-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-GISEL-FAKE16-NEXT: s_load_b64 s[2:3], s[2:3], 0x0
+; GFX11-GISEL-FAKE16-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-GISEL-FAKE16-NEXT: s_and_b32 s6, s3, 0x1ff
+; GFX11-GISEL-FAKE16-NEXT: s_bfe_u32 s4, s3, 0xb0014
+; GFX11-GISEL-FAKE16-NEXT: s_lshr_b32 s5, s3, 8
+; GFX11-GISEL-FAKE16-NEXT: s_or_b32 s2, s6, s2
+; GFX11-GISEL-FAKE16-NEXT: s_addk_i32 s4, 0xfc10
+; GFX11-GISEL-FAKE16-NEXT: s_and_b32 s5, s5, 0xffe
+; GFX11-GISEL-FAKE16-NEXT: s_cmp_lg_u32 s2, 0
+; GFX11-GISEL-FAKE16-NEXT: s_cselect_b32 s2, 1, 0
+; GFX11-GISEL-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
+; GFX11-GISEL-FAKE16-NEXT: s_or_b32 s2, s5, s2
+; GFX11-GISEL-FAKE16-NEXT: s_cmp_lg_u32 s2, 0
+; GFX11-GISEL-FAKE16-NEXT: s_cselect_b32 s5, 1, 0
+; GFX11-GISEL-FAKE16-NEXT: s_sub_i32 s6, 1, s4
+; GFX11-GISEL-FAKE16-NEXT: s_or_b32 s8, s2, 0x1000
+; GFX11-GISEL-FAKE16-NEXT: s_max_i32 s6, s6, 0
+; GFX11-GISEL-FAKE16-NEXT: s_lshl_b32 s7, s4, 12
+; GFX11-GISEL-FAKE16-NEXT: s_min_i32 s6, s6, 13
+; GFX11-GISEL-FAKE16-NEXT: s_lshl_b32 s5, s5, 9
+; GFX11-GISEL-FAKE16-NEXT: s_lshr_b32 s9, s8, s6
+; GFX11-GISEL-FAKE16-NEXT: s_or_b32 s2, s2, s7
+; GFX11-GISEL-FAKE16-NEXT: s_lshl_b32 s6, s9, s6
+; GFX11-GISEL-FAKE16-NEXT: s_or_b32 s5, s5, 0x7c00
+; GFX11-GISEL-FAKE16-NEXT: s_cmp_lg_u32 s6, s8
+; GFX11-GISEL-FAKE16-NEXT: s_cselect_b32 s6, 1, 0
+; GFX11-GISEL-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_2) | instid1(SALU_CYCLE_1)
+; GFX11-GISEL-FAKE16-NEXT: s_or_b32 s6, s9, s6
+; GFX11-GISEL-FAKE16-NEXT: s_cmp_lt_i32 s4, 1
+; GFX11-GISEL-FAKE16-NEXT: s_cselect_b32 s2, s6, s2
+; GFX11-GISEL-FAKE16-NEXT: s_and_b32 s6, s2, 7
+; GFX11-GISEL-FAKE16-NEXT: s_lshr_b32 s2, s2, 2
+; GFX11-GISEL-FAKE16-NEXT: s_cmp_eq_u32 s6, 3
+; GFX11-GISEL-FAKE16-NEXT: s_cselect_b32 s7, 1, 0
+; GFX11-GISEL-FAKE16-NEXT: s_cmp_gt_i32 s6, 5
+; GFX11-GISEL-FAKE16-NEXT: s_cselect_b32 s6, 1, 0
+; GFX11-GISEL-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
+; GFX11-GISEL-FAKE16-NEXT: s_or_b32 s6, s7, s6
+; GFX11-GISEL-FAKE16-NEXT: s_add_i32 s2, s2, s6
+; GFX11-GISEL-FAKE16-NEXT: s_cmp_gt_i32 s4, 30
+; GFX11-GISEL-FAKE16-NEXT: s_cselect_b32 s2, 0x7c00, s2
+; GFX11-GISEL-FAKE16-NEXT: s_cmpk_eq_i32 s4, 0x40f
+; GFX11-GISEL-FAKE16-NEXT: s_cselect_b32 s2, s5, s2
+; GFX11-GISEL-FAKE16-NEXT: s_lshr_b32 s3, s3, 16
+; GFX11-GISEL-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
+; GFX11-GISEL-FAKE16-NEXT: s_and_b32 s3, s3, 0x8000
+; GFX11-GISEL-FAKE16-NEXT: s_or_b32 s2, s3, s2
+; GFX11-GISEL-FAKE16-NEXT: s_mov_b32 s3, 0x31016000
+; GFX11-GISEL-FAKE16-NEXT: v_mov_b32_e32 v0, s2
+; GFX11-GISEL-FAKE16-NEXT: s_mov_b32 s2, -1
+; GFX11-GISEL-FAKE16-NEXT: buffer_store_b16 v0, off, s[0:3], 0
+; GFX11-GISEL-FAKE16-NEXT: s_endpgm
+ ptr addrspace(1) %r,
+ ptr addrspace(1) %a) {
+entry:
+ %a.val = load double, ptr addrspace(1) %a
+ %r.val = fptrunc double %a.val to half
+ store half %r.val, ptr addrspace(1) %r
+ ret void
+}
+
+define amdgpu_kernel void @fptrunc_f64_to_f16_afn(
+; SI-SDAG-LABEL: fptrunc_f64_to_f16_afn:
+; SI-SDAG: ; %bb.0: ; %entry
+; SI-SDAG-NEXT: s_load_dwordx4 s[4:7], s[4:5], 0x9
+; SI-SDAG-NEXT: s_mov_b32 s3, 0xf000
+; SI-SDAG-NEXT: s_mov_b32 s2, -1
+; SI-SDAG-NEXT: s_mov_b32 s10, s2
+; SI-SDAG-NEXT: s_mov_b32 s11, s3
+; SI-SDAG-NEXT: s_waitcnt lgkmcnt(0)
+; SI-SDAG-NEXT: s_mov_b32 s8, s6
+; SI-SDAG-NEXT: s_mov_b32 s9, s7
+; SI-SDAG-NEXT: buffer_load_dwordx2 v[0:1], off, s[8:11], 0
+; SI-SDAG-NEXT: s_movk_i32 s0, 0x7e00
+; SI-SDAG-NEXT: s_waitcnt vmcnt(0)
+; SI-SDAG-NEXT: v_readfirstlane_b32 s1, v1
+; SI-SDAG-NEXT: s_and_b32 s6, s1, 0x1ff
+; SI-SDAG-NEXT: s_lshr_b32 s7, s1, 8
+; SI-SDAG-NEXT: s_bfe_u32 s8, s1, 0xb0014
+; SI-SDAG-NEXT: v_or_b32_e32 v0, s6, v0
+; SI-SDAG-NEXT: s_and_b32 s6, s7, 0xffe
+; SI-SDAG-NEXT: s_sub_i32 s7, 0x3f1, s8
+; SI-SDAG-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0
+; SI-SDAG-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
+; SI-SDAG-NEXT: v_med3_i32 v1, s7, 0, 13
+; SI-SDAG-NEXT: v_readfirstlane_b32 s7, v0
+; SI-SDAG-NEXT: v_readfirstlane_b32 s9, v1
+; SI-SDAG-NEXT: s_or_b32 s6, s6, s7
+; SI-SDAG-NEXT: s_or_b32 s7, s6, 0x1000
+; SI-SDAG-NEXT: s_lshr_b32 s10, s7, s9
+; SI-SDAG-NEXT: s_lshl_b32 s9, s10, s9
+; SI-SDAG-NEXT: s_cmp_lg_u32 s9, s7
+; SI-SDAG-NEXT: s_cselect_b32 s7, 1, 0
+; SI-SDAG-NEXT: s_addk_i32 s8, 0xfc10
+; SI-SDAG-NEXT: s_or_b32 s7, s10, s7
+; SI-SDAG-NEXT: s_lshl_b32 s9, s8, 12
+; SI-SDAG-NEXT: s_or_b32 s9, s6, s9
+; SI-SDAG-NEXT: s_cmp_lt_i32 s8, 1
+; SI-SDAG-NEXT: s_cselect_b32 s7, s7, s9
+; SI-SDAG-NEXT: s_and_b32 s9, s7, 7
+; SI-SDAG-NEXT: s_cmp_gt_i32 s9, 5
+; SI-SDAG-NEXT: s_cselect_b32 s10, 1, 0
+; SI-SDAG-NEXT: s_cmp_eq_u32 s9, 3
+; SI-SDAG-NEXT: s_cselect_b32 s9, 1, 0
+; SI-SDAG-NEXT: s_lshr_b32 s7, s7, 2
+; SI-SDAG-NEXT: s_or_b32 s9, s9, s10
+; SI-SDAG-NEXT: s_add_i32 s7, s7, s9
+; SI-SDAG-NEXT: s_cmp_lt_i32 s8, 31
+; SI-SDAG-NEXT: s_cselect_b32 s7, s7, 0x7c00
+; SI-SDAG-NEXT: s_cmp_lg_u32 s6, 0
+; SI-SDAG-NEXT: s_cselect_b32 s0, s0, 0x7c00
+; SI-SDAG-NEXT: s_cmpk_eq_i32 s8, 0x40f
+; SI-SDAG-NEXT: s_cselect_b32 s0, s0, s7
+; SI-SDAG-NEXT: s_lshr_b32 s1, s1, 16
+; SI-SDAG-NEXT: s_and_b32 s1, s1, 0x8000
+; SI-SDAG-NEXT: s_or_b32 s6, s1, s0
+; SI-SDAG-NEXT: s_mov_b32 s0, s4
+; SI-SDAG-NEXT: s_mov_b32 s1, s5
+; SI-SDAG-NEXT: v_mov_b32_e32 v0, s6
+; SI-SDAG-NEXT: buffer_store_short v0, off, s[0:3], 0
+; SI-SDAG-NEXT: s_endpgm
+;
+; SI-GISEL-LABEL: fptrunc_f64_to_f16_afn:
+; SI-GISEL: ; %bb.0: ; %entry
+; SI-GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
+; SI-GISEL-NEXT: s_waitcnt lgkmcnt(0)
+; SI-GISEL-NEXT: s_load_dwordx2 s[4:5], s[2:3], 0x0
+; SI-GISEL-NEXT: s_mov_b32 s2, -1
+; SI-GISEL-NEXT: s_waitcnt lgkmcnt(0)
+; SI-GISEL-NEXT: v_cvt_f32_f64_e32 v0, s[4:5]
+; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0
+; SI-GISEL-NEXT: s_mov_b32 s3, 0xf000
+; SI-GISEL-NEXT: buffer_store_short v0, off, s[0:3], 0
+; SI-GISEL-NEXT: s_endpgm
+;
+; VI-SDAG-LABEL: fptrunc_f64_to_f16_afn:
+; VI-SDAG: ; %bb.0: ; %entry
+; VI-SDAG-NEXT: s_load_dwordx4 s[4:7], s[4:5], 0x24
+; VI-SDAG-NEXT: s_mov_b32 s3, 0xf000
+; VI-SDAG-NEXT: s_mov_b32 s2, -1
+; VI-SDAG-NEXT: s_mov_b32 s10, s2
+; VI-SDAG-NEXT: s_mov_b32 s11, s3
+; VI-SDAG-NEXT: s_waitcnt lgkmcnt(0)
+; VI-SDAG-NEXT: s_mov_b32 s8, s6
+; VI-SDAG-NEXT: s_mov_b32 s9, s7
+; VI-SDAG-NEXT: buffer_load_dwordx2 v[0:1], off, s[8:11], 0
+; VI-SDAG-NEXT: s_mov_b32 s0, s4
+; VI-SDAG-NEXT: s_mov_b32 s1, s5
+; VI-SDAG-NEXT: s_movk_i32 s6, 0x7e00
+; VI-SDAG-NEXT: s_waitcnt vmcnt(0)
+; VI-SDAG-NEXT: v_readfirstlane_b32 s4, v1
+; VI-SDAG-NEXT: s_and_b32 s5, s4, 0x1ff
+; VI-SDAG-NEXT: v_or_b32_e32 v0, s5, v0
+; VI-SDAG-NEXT: s_lshr_b32 s7, s4, 8
+; VI-SDAG-NEXT: s_bfe_u32 s8, s4, 0xb0014
+; VI-SDAG-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0
+; VI-SDAG-NEXT: s_and_b32 s5, s7, 0xffe
+; VI-SDAG-NEXT: s_sub_i32 s7, 0x3f1, s8
+; VI-SDAG-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
+; VI-SDAG-NEXT: v_med3_i32 v1, s7, 0, 13
+; VI-SDAG-NEXT: v_readfirstlane_b32 s7, v0
+; VI-SDAG-NEXT: s_or_b32 s5, s5, s7
+; VI-SDAG-NEXT: v_readfirstlane_b32 s9, v1
+; VI-SDAG-NEXT: s_or_b32 s7, s5, 0x1000
+; VI-SDAG-NEXT: s_lshr_b32 s10, s7, s9
+; VI-SDAG-NEXT: s_lshl_b32 s9, s10, s9
+; VI-SDAG-NEXT: s_cmp_lg_u32 s9, s7
+; VI-SDAG-NEXT: s_cselect_b32 s7, 1, 0
+; VI-SDAG-NEXT: s_addk_i32 s8, 0xfc10
+; VI-SDAG-NEXT: s_lshl_b32 s9, s8, 12
+; VI-SDAG-NEXT: s_or_b32 s7, s10, s7
+; VI-SDAG-NEXT: s_or_b32 s9, s5, s9
+; VI-SDAG-NEXT: s_cmp_lt_i32 s8, 1
+; VI-SDAG-NEXT: s_cselect_b32 s7, s7, s9
+; VI-SDAG-NEXT: s_and_b32 s9, s7, 7
+; VI-SDAG-NEXT: s_cmp_gt_i32 s9, 5
+; VI-SDAG-NEXT: s_cselect_b32 s10, 1, 0
+; VI-SDAG-NEXT: s_cmp_eq_u32 s9, 3
+; VI-SDAG-NEXT: s_cselect_b32 s9, 1, 0
+; VI-SDAG-NEXT: s_lshr_b32 s7, s7, 2
+; VI-SDAG-NEXT: s_or_b32 s9, s9, s10
+; VI-SDAG-NEXT: s_add_i32 s7, s7, s9
+; VI-SDAG-NEXT: s_cmp_lt_i32 s8, 31
+; VI-SDAG-NEXT: s_cselect_b32 s7, s7, 0x7c00
+; VI-SDAG-NEXT: s_cmp_lg_u32 s5, 0
+; VI-SDAG-NEXT: s_cselect_b32 s5, s6, 0x7c00
+; VI-SDAG-NEXT: s_cmpk_eq_i32 s8, 0x40f
+; VI-SDAG-NEXT: s_cselect_b32 s5, s5, s7
+; VI-SDAG-NEXT: s_lshr_b32 s4, s4, 16
+; VI-SDAG-NEXT: s_and_b32 s4, s4, 0x8000
+; VI-SDAG-NEXT: s_or_b32 s4, s4, s5
+; VI-SDAG-NEXT: v_mov_b32_e32 v0, s4
+; VI-SDAG-NEXT: buffer_store_short v0, off, s[0:3], 0
+; VI-SDAG-NEXT: s_endpgm
+;
+; VI-GISEL-LABEL: fptrunc_f64_to_f16_afn:
+; VI-GISEL: ; %bb.0: ; %entry
+; VI-GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
+; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0)
+; VI-GISEL-NEXT: s_load_dwordx2 s[2:3], s[2:3], 0x0
+; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0)
+; VI-GISEL-NEXT: v_cvt_f32_f64_e32 v0, s[2:3]
+; VI-GISEL-NEXT: s_mov_b32 s2, -1
+; VI-GISEL-NEXT: s_mov_b32 s3, 0xf000
+; VI-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0
+; VI-GISEL-NEXT: buffer_store_short v0, off, s[0:3], 0
+; VI-GISEL-NEXT: s_endpgm
+;
+; GFX9-SDAG-LABEL: fptrunc_f64_to_f16_afn:
+; GFX9-SDAG: ; %bb.0: ; %entry
+; GFX9-SDAG-NEXT: s_load_dwordx4 s[8:11], s[4:5], 0x24
+; GFX9-SDAG-NEXT: s_mov_b32 s3, 0xf000
+; GFX9-SDAG-NEXT: s_mov_b32 s2, -1
+; GFX9-SDAG-NEXT: s_mov_b32 s6, s2
+; GFX9-SDAG-NEXT: s_mov_b32 s7, s3
+; GFX9-SDAG-NEXT: s_waitcnt lgkmcnt(0)
+; GFX9-SDAG-NEXT: s_mov_b32 s4, s10
+; GFX9-SDAG-NEXT: s_mov_b32 s5, s11
+; GFX9-SDAG-NEXT: buffer_load_dwordx2 v[0:1], off, s[4:7], 0
+; GFX9-SDAG-NEXT: s_mov_b32 s0, s8
+; GFX9-SDAG-NEXT: s_mov_b32 s1, s9
+; GFX9-SDAG-NEXT: s_movk_i32 s4, 0x7e00
+; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX9-SDAG-NEXT: v_readfirstlane_b32 s5, v1
+; GFX9-SDAG-NEXT: s_and_b32 s6, s5, 0x1ff
+; GFX9-SDAG-NEXT: v_or_b32_e32 v0, s6, v0
+; GFX9-SDAG-NEXT: s_lshr_b32 s7, s5, 8
+; GFX9-SDAG-NEXT: s_bfe_u32 s8, s5, 0xb0014
+; GFX9-SDAG-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0
+; GFX9-SDAG-NEXT: s_and_b32 s6, s7, 0xffe
+; GFX9-SDAG-NEXT: s_sub_i32 s7, 0x3f1, s8
+; GFX9-SDAG-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
+; GFX9-SDAG-NEXT: v_med3_i32 v1, s7, 0, 13
+; GFX9-SDAG-NEXT: v_readfirstlane_b32 s7, v0
+; GFX9-SDAG-NEXT: s_or_b32 s6, s6, s7
+; GFX9-SDAG-NEXT: v_readfirstlane_b32 s9, v1
+; GFX9-SDAG-NEXT: s_or_b32 s7, s6, 0x1000
+; GFX9-SDAG-NEXT: s_lshr_b32 s10, s7, s9
+; GFX9-SDAG-NEXT: s_lshl_b32 s9, s10, s9
+; GFX9-SDAG-NEXT: s_cmp_lg_u32 s9, s7
+; GFX9-SDAG-NEXT: s_cselect_b32 s7, 1, 0
+; GFX9-SDAG-NEXT: s_addk_i32 s8, 0xfc10
+; GFX9-SDAG-NEXT: s_lshl_b32 s9, s8, 12
+; GFX9-SDAG-NEXT: s_or_b32 s7, s10, s7
+; GFX9-SDAG-NEXT: s_or_b32 s9, s6, s9
+; GFX9-SDAG-NEXT: s_cmp_lt_i32 s8, 1
+; GFX9-SDAG-NEXT: s_cselect_b32 s7, s7, s9
+; GFX9-SDAG-NEXT: s_and_b32 s9, s7, 7
+; GFX9-SDAG-NEXT: s_cmp_gt_i32 s9, 5
+; GFX9-SDAG-NEXT: s_cselect_b32 s10, 1, 0
+; GFX9-SDAG-NEXT: s_cmp_eq_u32 s9, 3
+; GFX9-SDAG-NEXT: s_cselect_b32 s9, 1, 0
+; GFX9-SDAG-NEXT: s_lshr_b32 s7, s7, 2
+; GFX9-SDAG-NEXT: s_or_b32 s9, s9, s10
+; GFX9-SDAG-NEXT: s_add_i32 s7, s7, s9
+; GFX9-SDAG-NEXT: s_cmp_lt_i32 s8, 31
+; GFX9-SDAG-NEXT: s_cselect_b32 s7, s7, 0x7c00
+; GFX9-SDAG-NEXT: s_cmp_lg_u32 s6, 0
+; GFX9-SDAG-NEXT: s_cselect_b32 s4, s4, 0x7c00
+; GFX9-SDAG-NEXT: s_cmpk_eq_i32 s8, 0x40f
+; GFX9-SDAG-NEXT: s_cselect_b32 s4, s4, s7
+; GFX9-SDAG-NEXT: s_lshr_b32 s5, s5, 16
+; GFX9-SDAG-NEXT: s_and_b32 s5, s5, 0x8000
+; GFX9-SDAG-NEXT: s_or_b32 s4, s5, s4
+; GFX9-SDAG-NEXT: v_mov_b32_e32 v0, s4
+; GFX9-SDAG-NEXT: buffer_store_short v0, off, s[0:3], 0
+; GFX9-SDAG-NEXT: s_endpgm
+;
+; GFX9-GISEL-LABEL: fptrunc_f64_to_f16_afn:
+; GFX9-GISEL: ; %bb.0: ; %entry
+; GFX9-GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
+; GFX9-GISEL-NEXT: s_waitcnt lgkmcnt(0)
+; GFX9-GISEL-NEXT: s_load_dwordx2 s[2:3], s[2:3], 0x0
+; GFX9-GISEL-NEXT: s_waitcnt lgkmcnt(0)
+; GFX9-GISEL-NEXT: v_cvt_f32_f64_e32 v0, s[2:3]
+; GFX9-GISEL-NEXT: s_mov_b32 s2, -1
+; GFX9-GISEL-NEXT: s_mov_b32 s3, 0xf000
+; GFX9-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0
+; GFX9-GISEL-NEXT: buffer_store_short v0, off, s[0:3], 0
+; GFX9-GISEL-NEXT: s_endpgm
+;
+; GFX950-SDAG-LABEL: fptrunc_f64_to_f16_afn:
+; GFX950-SDAG: ; %bb.0: ; %entry
+; GFX950-SDAG-NEXT: s_load_dwordx4 s[8:11], s[4:5], 0x24
+; GFX950-SDAG-NEXT: s_mov_b32 s3, 0xf000
+; GFX950-SDAG-NEXT: s_mov_b32 s2, -1
+; GFX950-SDAG-NEXT: s_mov_b32 s6, s2
+; GFX950-SDAG-NEXT: s_mov_b32 s7, s3
+; GFX950-SDAG-NEXT: s_waitcnt lgkmcnt(0)
+; GFX950-SDAG-NEXT: s_mov_b32 s4, s10
+; GFX950-SDAG-NEXT: s_mov_b32 s5, s11
+; GFX950-SDAG-NEXT: buffer_load_dwordx2 v[0:1], off, s[4:7], 0
+; GFX950-SDAG-NEXT: s_mov_b32 s0, s8
+; GFX950-SDAG-NEXT: s_mov_b32 s1, s9
+; GFX950-SDAG-NEXT: s_movk_i32 s4, 0x7e00
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX950-SDAG-NEXT: v_readfirstlane_b32 s5, v1
+; GFX950-SDAG-NEXT: s_and_b32 s6, s5, 0x1ff
+; GFX950-SDAG-NEXT: v_or_b32_e32 v0, s6, v0
+; GFX950-SDAG-NEXT: s_lshr_b32 s7, s5, 8
+; GFX950-SDAG-NEXT: s_bfe_u32 s8, s5, 0xb0014
+; GFX950-SDAG-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0
+; GFX950-SDAG-NEXT: s_and_b32 s6, s7, 0xffe
+; GFX950-SDAG-NEXT: s_sub_i32 s7, 0x3f1, s8
+; GFX950-SDAG-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
+; GFX950-SDAG-NEXT: v_med3_i32 v1, s7, 0, 13
+; GFX950-SDAG-NEXT: v_readfirstlane_b32 s7, v0
+; GFX950-SDAG-NEXT: s_or_b32 s6, s6, s7
+; GFX950-SDAG-NEXT: v_readfirstlane_b32 s9, v1
+; GFX950-SDAG-NEXT: s_or_b32 s7, s6, 0x1000
+; GFX950-SDAG-NEXT: s_lshr_b32 s10, s7, s9
+; GFX950-SDAG-NEXT: s_lshl_b32 s9, s10, s9
+; GFX950-SDAG-NEXT: s_cmp_lg_u32 s9, s7
+; GFX950-SDAG-NEXT: s_cselect_b32 s7, 1, 0
+; GFX950-SDAG-NEXT: s_addk_i32 s8, 0xfc10
+; GFX950-SDAG-NEXT: s_lshl_b32 s9, s8, 12
+; GFX950-SDAG-NEXT: s_or_b32 s7, s10, s7
+; GFX950-SDAG-NEXT: s_or_b32 s9, s6, s9
+; GFX950-SDAG-NEXT: s_cmp_lt_i32 s8, 1
+; GFX950-SDAG-NEXT: s_cselect_b32 s7, s7, s9
+; GFX950-SDAG-NEXT: s_and_b32 s9, s7, 7
+; GFX950-SDAG-NEXT: s_cmp_gt_i32 s9, 5
+; GFX950-SDAG-NEXT: s_cselect_b32 s10, 1, 0
+; GFX950-SDAG-NEXT: s_cmp_eq_u32 s9, 3
+; GFX950-SDAG-NEXT: s_cselect_b32 s9, 1, 0
+; GFX950-SDAG-NEXT: s_lshr_b32 s7, s7, 2
+; GFX950-SDAG-NEXT: s_or_b32 s9, s9, s10
+; GFX950-SDAG-NEXT: s_add_i32 s7, s7, s9
+; GFX950-SDAG-NEXT: s_cmp_lt_i32 s8, 31
+; GFX950-SDAG-NEXT: s_cselect_b32 s7, s7, 0x7c00
+; GFX950-SDAG-NEXT: s_cmp_lg_u32 s6, 0
+; GFX950-SDAG-NEXT: s_cselect_b32 s4, s4, 0x7c00
+; GFX950-SDAG-NEXT: s_cmpk_eq_i32 s8, 0x40f
+; GFX950-SDAG-NEXT: s_cselect_b32 s4, s4, s7
+; GFX950-SDAG-NEXT: s_lshr_b32 s5, s5, 16
+; GFX950-SDAG-NEXT: s_and_b32 s5, s5, 0x8000
+; GFX950-SDAG-NEXT: s_or_b32 s4, s5, s4
+; GFX950-SDAG-NEXT: v_mov_b32_e32 v0, s4
+; GFX950-SDAG-NEXT: buffer_store_short v0, off, s[0:3], 0
+; GFX950-SDAG-NEXT: s_endpgm
+;
+; GFX950-GISEL-LABEL: fptrunc_f64_to_f16_afn:
+; GFX950-GISEL: ; %bb.0: ; %entry
+; GFX950-GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
+; GFX950-GISEL-NEXT: s_waitcnt lgkmcnt(0)
+; GFX950-GISEL-NEXT: s_load_dwordx2 s[2:3], s[2:3], 0x0
+; GFX950-GISEL-NEXT: s_waitcnt lgkmcnt(0)
+; GFX950-GISEL-NEXT: v_cvt_f32_f64_e32 v0, s[2:3]
+; GFX950-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0
+; GFX950-GISEL-NEXT: s_mov_b32 s2, -1
+; GFX950-GISEL-NEXT: s_mov_b32 s3, 0xf000
+; GFX950-GISEL-NEXT: buffer_store_short v0, off, s[0:3], 0
+; GFX950-GISEL-NEXT: s_endpgm
+;
+; GFX11-SDAG-TRUE16-LABEL: fptrunc_f64_to_f16_afn:
+; GFX11-SDAG-TRUE16: ; %bb.0: ; %entry
+; GFX11-SDAG-TRUE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
+; GFX11-SDAG-TRUE16-NEXT: s_mov_b32 s6, -1
+; GFX11-SDAG-TRUE16-NEXT: s_mov_b32 s7, 0x31016000
+; GFX11-SDAG-TRUE16-NEXT: s_mov_b32 s10, s6
+; GFX11-SDAG-TRUE16-NEXT: s_mov_b32 s11, s7
+; GFX11-SDAG-TRUE16-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-SDAG-TRUE16-NEXT: s_mov_b32 s8, s2
+; GFX11-SDAG-TRUE16-NEXT: s_mov_b32 s9, s3
+; GFX11-SDAG-TRUE16-NEXT: buffer_load_b64 v[0:1], off, s[8:11], 0
+; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0)
+; GFX11-SDAG-TRUE16-NEXT: v_readfirstlane_b32 s2, v1
+; GFX11-SDAG-TRUE16-NEXT: s_and_b32 s3, s2, 0x1ff
+; GFX11-SDAG-TRUE16-NEXT: s_lshr_b32 s5, s2, 8
+; GFX11-SDAG-TRUE16-NEXT: v_or_b32_e32 v0, s3, v0
+; GFX11-SDAG-TRUE16-NEXT: s_bfe_u32 s3, s2, 0xb0014
+; GFX11-SDAG-TRUE16-NEXT: s_and_b32 s5, s5, 0xffe
+; GFX11-SDAG-TRUE16-NEXT: s_sub_i32 s4, 0x3f1, s3
+; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_2)
+; GFX11-SDAG-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0
+; GFX11-SDAG-TRUE16-NEXT: v_med3_i32 v1, s4, 0, 13
+; GFX11-SDAG-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
+; GFX11-SDAG-TRUE16-NEXT: v_readfirstlane_b32 s8, v1
+; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
+; GFX11-SDAG-TRUE16-NEXT: v_readfirstlane_b32 s4, v0
+; GFX11-SDAG-TRUE16-NEXT: s_or_b32 s4, s5, s4
+; GFX11-SDAG-TRUE16-NEXT: s_or_b32 s5, s4, 0x1000
+; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
+; GFX11-SDAG-TRUE16-NEXT: s_lshr_b32 s9, s5, s8
+; GFX11-SDAG-TRUE16-NEXT: s_lshl_b32 s8, s9, s8
+; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_4) | instid1(SALU_CYCLE_1)
+; GFX11-SDAG-TRUE16-NEXT: s_cmp_lg_u32 s8, s5
+; GFX11-SDAG-TRUE16-NEXT: s_cselect_b32 s5, 1, 0
+; GFX11-SDAG-TRUE16-NEXT: s_addk_i32 s3, 0xfc10
+; GFX11-SDAG-TRUE16-NEXT: s_or_b32 s5, s9, s5
+; GFX11-SDAG-TRUE16-NEXT: s_lshl_b32 s8, s3, 12
+; GFX11-SDAG-TRUE16-NEXT: s_or_b32 s8, s4, s8
+; GFX11-SDAG-TRUE16-NEXT: s_cmp_lt_i32 s3, 1
+; GFX11-SDAG-TRUE16-NEXT: s_cselect_b32 s5, s5, s8
+; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
+; GFX11-SDAG-TRUE16-NEXT: s_and_b32 s8, s5, 7
+; GFX11-SDAG-TRUE16-NEXT: s_cmp_gt_i32 s8, 5
+; GFX11-SDAG-TRUE16-NEXT: s_cselect_b32 s9, 1, 0
+; GFX11-SDAG-TRUE16-NEXT: s_cmp_eq_u32 s8, 3
+; GFX11-SDAG-TRUE16-NEXT: s_cselect_b32 s8, 1, 0
+; GFX11-SDAG-TRUE16-NEXT: s_lshr_b32 s5, s5, 2
+; GFX11-SDAG-TRUE16-NEXT: s_or_b32 s8, s8, s9
+; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
+; GFX11-SDAG-TRUE16-NEXT: s_add_i32 s5, s5, s8
+; GFX11-SDAG-TRUE16-NEXT: s_cmp_lt_i32 s3, 31
+; GFX11-SDAG-TRUE16-NEXT: s_movk_i32 s8, 0x7e00
+; GFX11-SDAG-TRUE16-NEXT: s_cselect_b32 s5, s5, 0x7c00
+; GFX11-SDAG-TRUE16-NEXT: s_cmp_lg_u32 s4, 0
+; GFX11-SDAG-TRUE16-NEXT: s_cselect_b32 s4, s8, 0x7c00
+; GFX11-SDAG-TRUE16-NEXT: s_cmpk_eq_i32 s3, 0x40f
+; GFX11-SDAG-TRUE16-NEXT: s_cselect_b32 s3, s4, s5
+; GFX11-SDAG-TRUE16-NEXT: s_lshr_b32 s2, s2, 16
+; GFX11-SDAG-TRUE16-NEXT: s_mov_b32 s4, s0
+; GFX11-SDAG-TRUE16-NEXT: s_and_b32 s2, s2, 0x8000
+; GFX11-SDAG-TRUE16-NEXT: s_mov_b32 s5, s1
+; GFX11-SDAG-TRUE16-NEXT: s_or_b32 s2, s2, s3
+; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
+; GFX11-SDAG-TRUE16-NEXT: v_mov_b32_e32 v0, s2
+; GFX11-SDAG-TRUE16-NEXT: buffer_store_b16 v0, off, s[4:7], 0
+; GFX11-SDAG-TRUE16-NEXT: s_endpgm
+;
+; GFX11-SDAG-FAKE16-LABEL: fptrunc_f64_to_f16_afn:
+; GFX11-SDAG-FAKE16: ; %bb.0: ; %entry
+; GFX11-SDAG-FAKE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
+; GFX11-SDAG-FAKE16-NEXT: s_mov_b32 s6, -1
+; GFX11-SDAG-FAKE16-NEXT: s_mov_b32 s7, 0x31016000
+; GFX11-SDAG-FAKE16-NEXT: s_mov_b32 s10, s6
+; GFX11-SDAG-FAKE16-NEXT: s_mov_b32 s11, s7
+; GFX11-SDAG-FAKE16-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-SDAG-FAKE16-NEXT: s_mov_b32 s8, s2
+; GFX11-SDAG-FAKE16-NEXT: s_mov_b32 s9, s3
+; GFX11-SDAG-FAKE16-NEXT: buffer_load_b64 v[0:1], off, s[8:11], 0
+; GFX11-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0)
+; GFX11-SDAG-FAKE16-NEXT: v_readfirstlane_b32 s2, v1
+; GFX11-SDAG-FAKE16-NEXT: s_and_b32 s3, s2, 0x1ff
+; GFX11-SDAG-FAKE16-NEXT: s_lshr_b32 s5, s2, 8
+; GFX11-SDAG-FAKE16-NEXT: v_or_b32_e32 v0, s3, v0
+; GFX11-SDAG-FAKE16-NEXT: s_bfe_u32 s3, s2, 0xb0014
+; GFX11-SDAG-FAKE16-NEXT: s_and_b32 s5, s5, 0xffe
+; GFX11-SDAG-FAKE16-NEXT: s_sub_i32 s4, 0x3f1, s3
+; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_2)
+; GFX11-SDAG-FAKE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0
+; GFX11-SDAG-FAKE16-NEXT: v_med3_i32 v1, s4, 0, 13
+; GFX11-SDAG-FAKE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
+; GFX11-SDAG-FAKE16-NEXT: v_readfirstlane_b32 s8, v1
+; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
+; GFX11-SDAG-FAKE16-NEXT: v_readfirstlane_b32 s4, v0
+; GFX11-SDAG-FAKE16-NEXT: s_or_b32 s4, s5, s4
+; GFX11-SDAG-FAKE16-NEXT: s_or_b32 s5, s4, 0x1000
+; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
+; GFX11-SDAG-FAKE16-NEXT: s_lshr_b32 s9, s5, s8
+; GFX11-SDAG-FAKE16-NEXT: s_lshl_b32 s8, s9, s8
+; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_4) | instid1(SALU_CYCLE_1)
+; GFX11-SDAG-FAKE16-NEXT: s_cmp_lg_u32 s8, s5
+; GFX11-SDAG-FAKE16-NEXT: s_cselect_b32 s5, 1, 0
+; GFX11-SDAG-FAKE16-NEXT: s_addk_i32 s3, 0xfc10
+; GFX11-SDAG-FAKE16-NEXT: s_or_b32 s5, s9, s5
+; GFX11-SDAG-FAKE16-NEXT: s_lshl_b32 s8, s3, 12
+; GFX11-SDAG-FAKE16-NEXT: s_or_b32 s8, s4, s8
+; GFX11-SDAG-FAKE16-NEXT: s_cmp_lt_i32 s3, 1
+; GFX11-SDAG-FAKE16-NEXT: s_cselect_b32 s5, s5, s8
+; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
+; GFX11-SDAG-FAKE16-NEXT: s_and_b32 s8, s5, 7
+; GFX11-SDAG-FAKE16-NEXT: s_cmp_gt_i32 s8, 5
+; GFX11-SDAG-FAKE16-NEXT: s_cselect_b32 s9, 1, 0
+; GFX11-SDAG-FAKE16-NEXT: s_cmp_eq_u32 s8, 3
+; GFX11-SDAG-FAKE16-NEXT: s_cselect_b32 s8, 1, 0
+; GFX11-SDAG-FAKE16-NEXT: s_lshr_b32 s5, s5, 2
+; GFX11-SDAG-FAKE16-NEXT: s_or_b32 s8, s8, s9
+; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
+; GFX11-SDAG-FAKE16-NEXT: s_add_i32 s5, s5, s8
+; GFX11-SDAG-FAKE16-NEXT: s_cmp_lt_i32 s3, 31
+; GFX11-SDAG-FAKE16-NEXT: s_movk_i32 s8, 0x7e00
+; GFX11-SDAG-FAKE16-NEXT: s_cselect_b32 s5, s5, 0x7c00
+; GFX11-SDAG-FAKE16-NEXT: s_cmp_lg_u32 s4, 0
+; GFX11-SDAG-FAKE16-NEXT: s_cselect_b32 s4, s8, 0x7c00
+; GFX11-SDAG-FAKE16-NEXT: s_cmpk_eq_i32 s3, 0x40f
+; GFX11-SDAG-FAKE16-NEXT: s_cselect_b32 s3, s4, s5
+; GFX11-SDAG-FAKE16-NEXT: s_lshr_b32 s2, s2, 16
+; GFX11-SDAG-FAKE16-NEXT: s_mov_b32 s4, s0
+; GFX11-SDAG-FAKE16-NEXT: s_and_b32 s2, s2, 0x8000
+; GFX11-SDAG-FAKE16-NEXT: s_mov_b32 s5, s1
+; GFX11-SDAG-FAKE16-NEXT: s_or_b32 s2, s2, s3
+; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
+; GFX11-SDAG-FAKE16-NEXT: v_mov_b32_e32 v0, s2
+; GFX11-SDAG-FAKE16-NEXT: buffer_store_b16 v0, off, s[4:7], 0
+; GFX11-SDAG-FAKE16-NEXT: s_endpgm
+;
+; GFX11-GISEL-TRUE16-LABEL: fptrunc_f64_to_f16_afn:
+; GFX11-GISEL-TRUE16: ; %bb.0: ; %entry
+; GFX11-GISEL-TRUE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
+; GFX11-GISEL-TRUE16-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-GISEL-TRUE16-NEXT: s_load_b64 s[2:3], s[2:3], 0x0
+; GFX11-GISEL-TRUE16-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-GISEL-TRUE16-NEXT: v_cvt_f32_f64_e32 v0, s[2:3]
; GFX11-GISEL-TRUE16-NEXT: s_mov_b32 s2, -1
; GFX11-GISEL-TRUE16-NEXT: s_mov_b32 s3, 0x31016000
@@ -384,7 +1539,7 @@ define amdgpu_kernel void @fptrunc_f64_to_f16(
; GFX11-GISEL-TRUE16-NEXT: buffer_store_b16 v0, off, s[0:3], 0
; GFX11-GISEL-TRUE16-NEXT: s_endpgm
;
-; GFX11-GISEL-FAKE16-LABEL: fptrunc_f64_to_f16:
+; GFX11-GISEL-FAKE16-LABEL: fptrunc_f64_to_f16_afn:
; GFX11-GISEL-FAKE16: ; %bb.0: ; %entry
; GFX11-GISEL-FAKE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
; GFX11-GISEL-FAKE16-NEXT: s_waitcnt lgkmcnt(0)
@@ -401,7 +1556,7 @@ define amdgpu_kernel void @fptrunc_f64_to_f16(
ptr addrspace(1) %a) {
entry:
%a.val = load double, ptr addrspace(1) %a
- %r.val = fptrunc double %a.val to half
+ %r.val = fptrunc afn double %a.val to half
store half %r.val, ptr addrspace(1) %r
ret void
}
@@ -626,25 +1781,106 @@ entry:
define amdgpu_kernel void @fptrunc_v2f64_to_v2f16(
; SI-SDAG-LABEL: fptrunc_v2f64_to_v2f16:
; SI-SDAG: ; %bb.0: ; %entry
-; SI-SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
-; SI-SDAG-NEXT: s_mov_b32 s7, 0xf000
-; SI-SDAG-NEXT: s_mov_b32 s6, -1
-; SI-SDAG-NEXT: s_mov_b32 s10, s6
-; SI-SDAG-NEXT: s_mov_b32 s11, s7
+; SI-SDAG-NEXT: s_load_dwordx4 s[4:7], s[4:5], 0x9
+; SI-SDAG-NEXT: s_mov_b32 s3, 0xf000
+; SI-SDAG-NEXT: s_mov_b32 s2, -1
+; SI-SDAG-NEXT: s_mov_b32 s10, s2
+; SI-SDAG-NEXT: s_mov_b32 s11, s3
; SI-SDAG-NEXT: s_waitcnt lgkmcnt(0)
-; SI-SDAG-NEXT: s_mov_b32 s8, s2
-; SI-SDAG-NEXT: s_mov_b32 s9, s3
+; SI-SDAG-NEXT: s_mov_b32 s8, s6
+; SI-SDAG-NEXT: s_mov_b32 s9, s7
; SI-SDAG-NEXT: buffer_load_dwordx4 v[0:3], off, s[8:11], 0
-; SI-SDAG-NEXT: s_mov_b32 s4, s0
-; SI-SDAG-NEXT: s_mov_b32 s5, s1
+; SI-SDAG-NEXT: s_movk_i32 s0, 0x7e00
; SI-SDAG-NEXT: s_waitcnt vmcnt(0)
-; SI-SDAG-NEXT: v_cvt_f32_f64_e32 v2, v[2:3]
-; SI-SDAG-NEXT: v_cvt_f32_f64_e32 v0, v[0:1]
-; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v2
-; SI-SDAG-NEXT: v_lshlrev_b32_e32 v1, 16, v1
-; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
-; SI-SDAG-NEXT: v_or_b32_e32 v0, v0, v1
-; SI-SDAG-NEXT: buffer_store_dword v0, off, s[4:7], 0
+; SI-SDAG-NEXT: v_readfirstlane_b32 s1, v3
+; SI-SDAG-NEXT: v_readfirstlane_b32 s6, v1
+; SI-SDAG-NEXT: s_and_b32 s7, s1, 0x1ff
+; SI-SDAG-NEXT: s_lshr_b32 s8, s1, 8
+; SI-SDAG-NEXT: s_bfe_u32 s9, s1, 0xb0014
+; SI-SDAG-NEXT: v_or_b32_e32 v1, s7, v2
+; SI-SDAG-NEXT: s_and_b32 s7, s8, 0xffe
+; SI-SDAG-NEXT: s_sub_i32 s8, 0x3f1, s9
+; SI-SDAG-NEXT: v_cmp_ne_u32_e32 vcc, 0, v1
+; SI-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
+; SI-SDAG-NEXT: v_med3_i32 v2, s8, 0, 13
+; SI-SDAG-NEXT: v_readfirstlane_b32 s8, v1
+; SI-SDAG-NEXT: v_readfirstlane_b32 s10, v2
+; SI-SDAG-NEXT: s_or_b32 s7, s7, s8
+; SI-SDAG-NEXT: s_or_b32 s8, s7, 0x1000
+; SI-SDAG-NEXT: s_lshr_b32 s11, s8, s10
+; SI-SDAG-NEXT: s_lshl_b32 s10, s11, s10
+; SI-SDAG-NEXT: s_cmp_lg_u32 s10, s8
+; SI-SDAG-NEXT: s_cselect_b32 s8, 1, 0
+; SI-SDAG-NEXT: s_addk_i32 s9, 0xfc10
+; SI-SDAG-NEXT: s_or_b32 s8, s11, s8
+; SI-SDAG-NEXT: s_lshl_b32 s10, s9, 12
+; SI-SDAG-NEXT: s_or_b32 s10, s7, s10
+; SI-SDAG-NEXT: s_cmp_lt_i32 s9, 1
+; SI-SDAG-NEXT: s_cselect_b32 s8, s8, s10
+; SI-SDAG-NEXT: s_and_b32 s10, s8, 7
+; SI-SDAG-NEXT: s_cmp_gt_i32 s10, 5
+; SI-SDAG-NEXT: s_cselect_b32 s11, 1, 0
+; SI-SDAG-NEXT: s_cmp_eq_u32 s10, 3
+; SI-SDAG-NEXT: s_cselect_b32 s10, 1, 0
+; SI-SDAG-NEXT: s_lshr_b32 s8, s8, 2
+; SI-SDAG-NEXT: s_or_b32 s10, s10, s11
+; SI-SDAG-NEXT: s_add_i32 s8, s8, s10
+; SI-SDAG-NEXT: s_cmp_lt_i32 s9, 31
+; SI-SDAG-NEXT: s_cselect_b32 s8, s8, 0x7c00
+; SI-SDAG-NEXT: s_cmp_lg_u32 s7, 0
+; SI-SDAG-NEXT: s_cselect_b32 s7, s0, 0x7c00
+; SI-SDAG-NEXT: s_cmpk_eq_i32 s9, 0x40f
+; SI-SDAG-NEXT: s_cselect_b32 s7, s7, s8
+; SI-SDAG-NEXT: s_lshr_b32 s1, s1, 16
+; SI-SDAG-NEXT: s_and_b32 s8, s6, 0x1ff
+; SI-SDAG-NEXT: s_lshr_b32 s9, s6, 8
+; SI-SDAG-NEXT: s_bfe_u32 s10, s6, 0xb0014
+; SI-SDAG-NEXT: s_and_b32 s1, s1, 0x8000
+; SI-SDAG-NEXT: v_or_b32_e32 v0, s8, v0
+; SI-SDAG-NEXT: s_and_b32 s8, s9, 0xffe
+; SI-SDAG-NEXT: s_sub_i32 s9, 0x3f1, s10
+; SI-SDAG-NEXT: s_or_b32 s1, s1, s7
+; SI-SDAG-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0
+; SI-SDAG-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
+; SI-SDAG-NEXT: v_med3_i32 v1, s9, 0, 13
+; SI-SDAG-NEXT: s_lshl_b32 s1, s1, 16
+; SI-SDAG-NEXT: v_readfirstlane_b32 s7, v0
+; SI-SDAG-NEXT: v_readfirstlane_b32 s9, v1
+; SI-SDAG-NEXT: s_or_b32 s7, s8, s7
+; SI-SDAG-NEXT: s_or_b32 s8, s7, 0x1000
+; SI-SDAG-NEXT: s_lshr_b32 s11, s8, s9
+; SI-SDAG-NEXT: s_lshl_b32 s9, s11, s9
+; SI-SDAG-NEXT: s_cmp_lg_u32 s9, s8
+; SI-SDAG-NEXT: s_cselect_b32 s8, 1, 0
+; SI-SDAG-NEXT: s_addk_i32 s10, 0xfc10
+; SI-SDAG-NEXT: s_or_b32 s8, s11, s8
+; SI-SDAG-NEXT: s_lshl_b32 s9, s10, 12
+; SI-SDAG-NEXT: s_or_b32 s9, s7, s9
+; SI-SDAG-NEXT: s_cmp_lt_i32 s10, 1
+; SI-SDAG-NEXT: s_cselect_b32 s8, s8, s9
+; SI-SDAG-NEXT: s_and_b32 s9, s8, 7
+; SI-SDAG-NEXT: s_cmp_gt_i32 s9, 5
+; SI-SDAG-NEXT: s_cselect_b32 s11, 1, 0
+; SI-SDAG-NEXT: s_cmp_eq_u32 s9, 3
+; SI-SDAG-NEXT: s_cselect_b32 s9, 1, 0
+; SI-SDAG-NEXT: s_lshr_b32 s8, s8, 2
+; SI-SDAG-NEXT: s_or_b32 s9, s9, s11
+; SI-SDAG-NEXT: s_add_i32 s8, s8, s9
+; SI-SDAG-NEXT: s_cmp_lt_i32 s10, 31
+; SI-SDAG-NEXT: s_cselect_b32 s8, s8, 0x7c00
+; SI-SDAG-NEXT: s_cmp_lg_u32 s7, 0
+; SI-SDAG-NEXT: s_cselect_b32 s0, s0, 0x7c00
+; SI-SDAG-NEXT: s_cmpk_eq_i32 s10, 0x40f
+; SI-SDAG-NEXT: s_cselect_b32 s0, s0, s8
+; SI-SDAG-NEXT: s_lshr_b32 s6, s6, 16
+; SI-SDAG-NEXT: s_and_b32 s6, s6, 0x8000
+; SI-SDAG-NEXT: s_or_b32 s0, s6, s0
+; SI-SDAG-NEXT: s_and_b32 s0, s0, 0xffff
+; SI-SDAG-NEXT: s_or_b32 s6, s0, s1
+; SI-SDAG-NEXT: s_mov_b32 s0, s4
+; SI-SDAG-NEXT: s_mov_b32 s1, s5
+; SI-SDAG-NEXT: v_mov_b32_e32 v0, s6
+; SI-SDAG-NEXT: buffer_store_dword v0, off, s[0:3], 0
; SI-SDAG-NEXT: s_endpgm
;
; SI-GISEL-LABEL: fptrunc_v2f64_to_v2f16:
@@ -654,6 +1890,1251 @@ define amdgpu_kernel void @fptrunc_v2f64_to_v2f16(
; SI-GISEL-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x0
; SI-GISEL-NEXT: s_mov_b32 s2, -1
; SI-GISEL-NEXT: s_waitcnt lgkmcnt(0)
+; SI-GISEL-NEXT: s_bfe_u32 s3, s5, 0xb0014
+; SI-GISEL-NEXT: s_lshr_b32 s8, s5, 8
+; SI-GISEL-NEXT: s_and_b32 s9, s5, 0x1ff
+; SI-GISEL-NEXT: s_addk_i32 s3, 0xfc10
+; SI-GISEL-NEXT: s_and_b32 s8, s8, 0xffe
+; SI-GISEL-NEXT: s_or_b32 s4, s9, s4
+; SI-GISEL-NEXT: s_cmp_lg_u32 s4, 0
+; SI-GISEL-NEXT: s_cselect_b32 s4, 1, 0
+; SI-GISEL-NEXT: s_or_b32 s4, s8, s4
+; SI-GISEL-NEXT: s_cmp_lg_u32 s4, 0
+; SI-GISEL-NEXT: s_cselect_b32 s8, 1, 0
+; SI-GISEL-NEXT: s_lshl_b32 s8, s8, 9
+; SI-GISEL-NEXT: s_lshl_b32 s9, s3, 12
+; SI-GISEL-NEXT: s_sub_i32 s10, 1, s3
+; SI-GISEL-NEXT: s_or_b32 s11, s4, 0x1000
+; SI-GISEL-NEXT: s_or_b32 s8, s8, 0x7c00
+; SI-GISEL-NEXT: s_or_b32 s4, s4, s9
+; SI-GISEL-NEXT: s_max_i32 s9, s10, 0
+; SI-GISEL-NEXT: s_min_i32 s9, s9, 13
+; SI-GISEL-NEXT: s_lshr_b32 s10, s11, s9
+; SI-GISEL-NEXT: s_lshl_b32 s9, s10, s9
+; SI-GISEL-NEXT: s_cmp_lg_u32 s9, s11
+; SI-GISEL-NEXT: s_cselect_b32 s9, 1, 0
+; SI-GISEL-NEXT: s_or_b32 s9, s10, s9
+; SI-GISEL-NEXT: s_cmp_lt_i32 s3, 1
+; SI-GISEL-NEXT: s_cselect_b32 s4, s9, s4
+; SI-GISEL-NEXT: s_and_b32 s9, s4, 7
+; SI-GISEL-NEXT: s_lshr_b32 s4, s4, 2
+; SI-GISEL-NEXT: s_cmp_eq_u32 s9, 3
+; SI-GISEL-NEXT: s_cselect_b32 s10, 1, 0
+; SI-GISEL-NEXT: s_cmp_gt_i32 s9, 5
+; SI-GISEL-NEXT: s_cselect_b32 s9, 1, 0
+; SI-GISEL-NEXT: s_or_b32 s9, s10, s9
+; SI-GISEL-NEXT: s_add_i32 s4, s4, s9
+; SI-GISEL-NEXT: s_cmp_gt_i32 s3, 30
+; SI-GISEL-NEXT: s_cselect_b32 s4, 0x7c00, s4
+; SI-GISEL-NEXT: s_cmpk_eq_i32 s3, 0x40f
+; SI-GISEL-NEXT: s_cselect_b32 s3, s8, s4
+; SI-GISEL-NEXT: s_lshr_b32 s4, s5, 16
+; SI-GISEL-NEXT: s_bfe_u32 s5, s7, 0xb0014
+; SI-GISEL-NEXT: s_lshr_b32 s8, s7, 8
+; SI-GISEL-NEXT: s_and_b32 s9, s7, 0x1ff
+; SI-GISEL-NEXT: s_and_b32 s4, s4, 0x8000
+; SI-GISEL-NEXT: s_addk_i32 s5, 0xfc10
+; SI-GISEL-NEXT: s_and_b32 s8, s8, 0xffe
+; SI-GISEL-NEXT: s_or_b32 s6, s9, s6
+; SI-GISEL-NEXT: s_or_b32 s3, s4, s3
+; SI-GISEL-NEXT: s_cmp_lg_u32 s6, 0
+; SI-GISEL-NEXT: s_cselect_b32 s4, 1, 0
+; SI-GISEL-NEXT: s_or_b32 s4, s8, s4
+; SI-GISEL-NEXT: s_cmp_lg_u32 s4, 0
+; SI-GISEL-NEXT: s_cselect_b32 s6, 1, 0
+; SI-GISEL-NEXT: s_lshl_b32 s6, s6, 9
+; SI-GISEL-NEXT: s_lshl_b32 s8, s5, 12
+; SI-GISEL-NEXT: s_sub_i32 s9, 1, s5
+; SI-GISEL-NEXT: s_or_b32 s10, s4, 0x1000
+; SI-GISEL-NEXT: s_or_b32 s6, s6, 0x7c00
+; SI-GISEL-NEXT: s_or_b32 s4, s4, s8
+; SI-GISEL-NEXT: s_max_i32 s8, s9, 0
+; SI-GISEL-NEXT: s_min_i32 s8, s8, 13
+; SI-GISEL-NEXT: s_lshr_b32 s9, s10, s8
+; SI-GISEL-NEXT: s_lshl_b32 s8, s9, s8
+; SI-GISEL-NEXT: s_cmp_lg_u32 s8, s10
+; SI-GISEL-NEXT: s_cselect_b32 s8, 1, 0
+; SI-GISEL-NEXT: s_or_b32 s8, s9, s8
+; SI-GISEL-NEXT: s_cmp_lt_i32 s5, 1
+; SI-GISEL-NEXT: s_cselect_b32 s4, s8, s4
+; SI-GISEL-NEXT: s_and_b32 s8, s4, 7
+; SI-GISEL-NEXT: s_lshr_b32 s4, s4, 2
+; SI-GISEL-NEXT: s_cmp_eq_u32 s8, 3
+; SI-GISEL-NEXT: s_cselect_b32 s9, 1, 0
+; SI-GISEL-NEXT: s_cmp_gt_i32 s8, 5
+; SI-GISEL-NEXT: s_cselect_b32 s8, 1, 0
+; SI-GISEL-NEXT: s_or_b32 s8, s9, s8
+; SI-GISEL-NEXT: s_add_i32 s4, s4, s8
+; SI-GISEL-NEXT: s_cmp_gt_i32 s5, 30
+; SI-GISEL-NEXT: s_cselect_b32 s4, 0x7c00, s4
+; SI-GISEL-NEXT: s_cmpk_eq_i32 s5, 0x40f
+; SI-GISEL-NEXT: s_cselect_b32 s4, s6, s4
+; SI-GISEL-NEXT: s_lshr_b32 s5, s7, 16
+; SI-GISEL-NEXT: s_and_b32 s3, s3, 0xffff
+; SI-GISEL-NEXT: s_and_b32 s5, s5, 0x8000
+; SI-GISEL-NEXT: s_or_b32 s4, s5, s4
+; SI-GISEL-NEXT: s_and_b32 s4, s4, 0xffff
+; SI-GISEL-NEXT: s_lshl_b32 s4, s4, 16
+; SI-GISEL-NEXT: s_or_b32 s4, s3, s4
+; SI-GISEL-NEXT: s_mov_b32 s3, 0xf000
+; SI-GISEL-NEXT: v_mov_b32_e32 v0, s4
+; SI-GISEL-NEXT: buffer_store_dword v0, off, s[0:3], 0
+; SI-GISEL-NEXT: s_endpgm
+;
+; VI-SDAG-LABEL: fptrunc_v2f64_to_v2f16:
+; VI-SDAG: ; %bb.0: ; %entry
+; VI-SDAG-NEXT: s_load_dwordx4 s[4:7], s[4:5], 0x24
+; VI-SDAG-NEXT: s_mov_b32 s3, 0xf000
+; VI-SDAG-NEXT: s_mov_b32 s2, -1
+; VI-SDAG-NEXT: s_mov_b32 s10, s2
+; VI-SDAG-NEXT: s_mov_b32 s11, s3
+; VI-SDAG-NEXT: s_waitcnt lgkmcnt(0)
+; VI-SDAG-NEXT: s_mov_b32 s8, s6
+; VI-SDAG-NEXT: s_mov_b32 s9, s7
+; VI-SDAG-NEXT: buffer_load_dwordx4 v[0:3], off, s[8:11], 0
+; VI-SDAG-NEXT: s_mov_b32 s0, s4
+; VI-SDAG-NEXT: s_mov_b32 s1, s5
+; VI-SDAG-NEXT: s_movk_i32 s6, 0x7e00
+; VI-SDAG-NEXT: s_waitcnt vmcnt(0)
+; VI-SDAG-NEXT: v_readfirstlane_b32 s4, v3
+; VI-SDAG-NEXT: s_and_b32 s7, s4, 0x1ff
+; VI-SDAG-NEXT: v_readfirstlane_b32 s5, v1
+; VI-SDAG-NEXT: v_or_b32_e32 v1, s7, v2
+; VI-SDAG-NEXT: s_lshr_b32 s8, s4, 8
+; VI-SDAG-NEXT: s_bfe_u32 s9, s4, 0xb0014
+; VI-SDAG-NEXT: v_cmp_ne_u32_e32 vcc, 0, v1
+; VI-SDAG-NEXT: s_and_b32 s7, s8, 0xffe
+; VI-SDAG-NEXT: s_sub_i32 s8, 0x3f1, s9
+; VI-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
+; VI-SDAG-NEXT: v_med3_i32 v2, s8, 0, 13
+; VI-SDAG-NEXT: v_readfirstlane_b32 s8, v1
+; VI-SDAG-NEXT: s_or_b32 s7, s7, s8
+; VI-SDAG-NEXT: v_readfirstlane_b32 s10, v2
+; VI-SDAG-NEXT: s_or_b32 s8, s7, 0x1000
+; VI-SDAG-NEXT: s_lshr_b32 s11, s8, s10
+; VI-SDAG-NEXT: s_lshl_b32 s10, s11, s10
+; VI-SDAG-NEXT: s_cmp_lg_u32 s10, s8
+; VI-SDAG-NEXT: s_cselect_b32 s8, 1, 0
+; VI-SDAG-NEXT: s_addk_i32 s9, 0xfc10
+; VI-SDAG-NEXT: s_lshl_b32 s10, s9, 12
+; VI-SDAG-NEXT: s_or_b32 s8, s11, s8
+; VI-SDAG-NEXT: s_or_b32 s10, s7, s10
+; VI-SDAG-NEXT: s_cmp_lt_i32 s9, 1
+; VI-SDAG-NEXT: s_cselect_b32 s8, s8, s10
+; VI-SDAG-NEXT: s_and_b32 s10, s8, 7
+; VI-SDAG-NEXT: s_cmp_gt_i32 s10, 5
+; VI-SDAG-NEXT: s_cselect_b32 s11, 1, 0
+; VI-SDAG-NEXT: s_cmp_eq_u32 s10, 3
+; VI-SDAG-NEXT: s_cselect_b32 s10, 1, 0
+; VI-SDAG-NEXT: s_lshr_b32 s8, s8, 2
+; VI-SDAG-NEXT: s_or_b32 s10, s10, s11
+; VI-SDAG-NEXT: s_add_i32 s8, s8, s10
+; VI-SDAG-NEXT: s_cmp_lt_i32 s9, 31
+; VI-SDAG-NEXT: s_cselect_b32 s8, s8, 0x7c00
+; VI-SDAG-NEXT: s_cmp_lg_u32 s7, 0
+; VI-SDAG-NEXT: s_cselect_b32 s7, s6, 0x7c00
+; VI-SDAG-NEXT: s_cmpk_eq_i32 s9, 0x40f
+; VI-SDAG-NEXT: s_cselect_b32 s7, s7, s8
+; VI-SDAG-NEXT: s_and_b32 s8, s5, 0x1ff
+; VI-SDAG-NEXT: v_or_b32_e32 v0, s8, v0
+; VI-SDAG-NEXT: s_lshr_b32 s4, s4, 16
+; VI-SDAG-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0
+; VI-SDAG-NEXT: s_lshr_b32 s9, s5, 8
+; VI-SDAG-NEXT: s_bfe_u32 s10, s5, 0xb0014
+; VI-SDAG-NEXT: s_and_b32 s4, s4, 0x8000
+; VI-SDAG-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
+; VI-SDAG-NEXT: s_and_b32 s8, s9, 0xffe
+; VI-SDAG-NEXT: s_sub_i32 s9, 0x3f1, s10
+; VI-SDAG-NEXT: s_or_b32 s4, s4, s7
+; VI-SDAG-NEXT: v_readfirstlane_b32 s7, v0
+; VI-SDAG-NEXT: v_med3_i32 v1, s9, 0, 13
+; VI-SDAG-NEXT: s_or_b32 s7, s8, s7
+; VI-SDAG-NEXT: v_readfirstlane_b32 s9, v1
+; VI-SDAG-NEXT: s_or_b32 s8, s7, 0x1000
+; VI-SDAG-NEXT: s_lshr_b32 s11, s8, s9
+; VI-SDAG-NEXT: s_lshl_b32 s4, s4, 16
+; VI-SDAG-NEXT: s_lshl_b32 s9, s11, s9
+; VI-SDAG-NEXT: s_cmp_lg_u32 s9, s8
+; VI-SDAG-NEXT: s_cselect_b32 s8, 1, 0
+; VI-SDAG-NEXT: s_addk_i32 s10, 0xfc10
+; VI-SDAG-NEXT: s_lshl_b32 s9, s10, 12
+; VI-SDAG-NEXT: s_or_b32 s8, s11, s8
+; VI-SDAG-NEXT: s_or_b32 s9, s7, s9
+; VI-SDAG-NEXT: s_cmp_lt_i32 s10, 1
+; VI-SDAG-NEXT: s_cselect_b32 s8, s8, s9
+; VI-SDAG-NEXT: s_and_b32 s9, s8, 7
+; VI-SDAG-NEXT: s_cmp_gt_i32 s9, 5
+; VI-SDAG-NEXT: s_cselect_b32 s11, 1, 0
+; VI-SDAG-NEXT: s_cmp_eq_u32 s9, 3
+; VI-SDAG-NEXT: s_cselect_b32 s9, 1, 0
+; VI-SDAG-NEXT: s_lshr_b32 s8, s8, 2
+; VI-SDAG-NEXT: s_or_b32 s9, s9, s11
+; VI-SDAG-NEXT: s_add_i32 s8, s8, s9
+; VI-SDAG-NEXT: s_cmp_lt_i32 s10, 31
+; VI-SDAG-NEXT: s_cselect_b32 s8, s8, 0x7c00
+; VI-SDAG-NEXT: s_cmp_lg_u32 s7, 0
+; VI-SDAG-NEXT: s_cselect_b32 s6, s6, 0x7c00
+; VI-SDAG-NEXT: s_cmpk_eq_i32 s10, 0x40f
+; VI-SDAG-NEXT: s_cselect_b32 s6, s6, s8
+; VI-SDAG-NEXT: s_lshr_b32 s5, s5, 16
+; VI-SDAG-NEXT: s_and_b32 s5, s5, 0x8000
+; VI-SDAG-NEXT: s_or_b32 s5, s5, s6
+; VI-SDAG-NEXT: s_and_b32 s5, s5, 0xffff
+; VI-SDAG-NEXT: s_or_b32 s4, s5, s4
+; VI-SDAG-NEXT: v_mov_b32_e32 v0, s4
+; VI-SDAG-NEXT: buffer_store_dword v0, off, s[0:3], 0
+; VI-SDAG-NEXT: s_endpgm
+;
+; VI-GISEL-LABEL: fptrunc_v2f64_to_v2f16:
+; VI-GISEL: ; %bb.0: ; %entry
+; VI-GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
+; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0)
+; VI-GISEL-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x0
+; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0)
+; VI-GISEL-NEXT: s_bfe_u32 s2, s5, 0xb0014
+; VI-GISEL-NEXT: s_lshr_b32 s3, s5, 8
+; VI-GISEL-NEXT: s_and_b32 s8, s5, 0x1ff
+; VI-GISEL-NEXT: s_addk_i32 s2, 0xfc10
+; VI-GISEL-NEXT: s_and_b32 s3, s3, 0xffe
+; VI-GISEL-NEXT: s_or_b32 s4, s8, s4
+; VI-GISEL-NEXT: s_cmp_lg_u32 s4, 0
+; VI-GISEL-NEXT: s_cselect_b32 s4, 1, 0
+; VI-GISEL-NEXT: s_or_b32 s3, s3, s4
+; VI-GISEL-NEXT: s_cmp_lg_u32 s3, 0
+; VI-GISEL-NEXT: s_cselect_b32 s4, 1, 0
+; VI-GISEL-NEXT: s_sub_i32 s9, 1, s2
+; VI-GISEL-NEXT: s_lshl_b32 s8, s2, 12
+; VI-GISEL-NEXT: s_max_i32 s9, s9, 0
+; VI-GISEL-NEXT: s_or_b32 s8, s3, s8
+; VI-GISEL-NEXT: s_min_i32 s9, s9, 13
+; VI-GISEL-NEXT: s_bitset1_b32 s3, 12
+; VI-GISEL-NEXT: s_lshl_b32 s4, s4, 9
+; VI-GISEL-NEXT: s_lshr_b32 s10, s3, s9
+; VI-GISEL-NEXT: s_or_b32 s4, s4, 0x7c00
+; VI-GISEL-NEXT: s_lshl_b32 s9, s10, s9
+; VI-GISEL-NEXT: s_cmp_lg_u32 s9, s3
+; VI-GISEL-NEXT: s_cselect_b32 s3, 1, 0
+; VI-GISEL-NEXT: s_or_b32 s3, s10, s3
+; VI-GISEL-NEXT: s_cmp_lt_i32 s2, 1
+; VI-GISEL-NEXT: s_cselect_b32 s3, s3, s8
+; VI-GISEL-NEXT: s_and_b32 s8, s3, 7
+; VI-GISEL-NEXT: s_lshr_b32 s3, s3, 2
+; VI-GISEL-NEXT: s_cmp_eq_u32 s8, 3
+; VI-GISEL-NEXT: s_cselect_b32 s9, 1, 0
+; VI-GISEL-NEXT: s_cmp_gt_i32 s8, 5
+; VI-GISEL-NEXT: s_cselect_b32 s8, 1, 0
+; VI-GISEL-NEXT: s_or_b32 s8, s9, s8
+; VI-GISEL-NEXT: s_add_i32 s3, s3, s8
+; VI-GISEL-NEXT: s_cmp_gt_i32 s2, 30
+; VI-GISEL-NEXT: s_cselect_b32 s3, 0x7c00, s3
+; VI-GISEL-NEXT: s_cmpk_eq_i32 s2, 0x40f
+; VI-GISEL-NEXT: s_cselect_b32 s2, s4, s3
+; VI-GISEL-NEXT: s_lshr_b32 s3, s5, 16
+; VI-GISEL-NEXT: s_and_b32 s3, s3, 0x8000
+; VI-GISEL-NEXT: s_or_b32 s2, s3, s2
+; VI-GISEL-NEXT: s_bfe_u32 s3, s7, 0xb0014
+; VI-GISEL-NEXT: s_lshr_b32 s4, s7, 8
+; VI-GISEL-NEXT: s_and_b32 s5, s7, 0x1ff
+; VI-GISEL-NEXT: s_addk_i32 s3, 0xfc10
+; VI-GISEL-NEXT: s_and_b32 s4, s4, 0xffe
+; VI-GISEL-NEXT: s_or_b32 s5, s5, s6
+; VI-GISEL-NEXT: s_cmp_lg_u32 s5, 0
+; VI-GISEL-NEXT: s_cselect_b32 s5, 1, 0
+; VI-GISEL-NEXT: s_or_b32 s4, s4, s5
+; VI-GISEL-NEXT: s_cmp_lg_u32 s4, 0
+; VI-GISEL-NEXT: s_cselect_b32 s5, 1, 0
+; VI-GISEL-NEXT: s_sub_i32 s8, 1, s3
+; VI-GISEL-NEXT: s_lshl_b32 s6, s3, 12
+; VI-GISEL-NEXT: s_max_i32 s8, s8, 0
+; VI-GISEL-NEXT: s_or_b32 s6, s4, s6
+; VI-GISEL-NEXT: s_min_i32 s8, s8, 13
+; VI-GISEL-NEXT: s_bitset1_b32 s4, 12
+; VI-GISEL-NEXT: s_lshl_b32 s5, s5, 9
+; VI-GISEL-NEXT: s_lshr_b32 s9, s4, s8
+; VI-GISEL-NEXT: s_or_b32 s5, s5, 0x7c00
+; VI-GISEL-NEXT: s_lshl_b32 s8, s9, s8
+; VI-GISEL-NEXT: s_cmp_lg_u32 s8, s4
+; VI-GISEL-NEXT: s_cselect_b32 s4, 1, 0
+; VI-GISEL-NEXT: s_or_b32 s4, s9, s4
+; VI-GISEL-NEXT: s_cmp_lt_i32 s3, 1
+; VI-GISEL-NEXT: s_cselect_b32 s4, s4, s6
+; VI-GISEL-NEXT: s_and_b32 s6, s4, 7
+; VI-GISEL-NEXT: s_lshr_b32 s4, s4, 2
+; VI-GISEL-NEXT: s_cmp_eq_u32 s6, 3
+; VI-GISEL-NEXT: s_cselect_b32 s8, 1, 0
+; VI-GISEL-NEXT: s_cmp_gt_i32 s6, 5
+; VI-GISEL-NEXT: s_cselect_b32 s6, 1, 0
+; VI-GISEL-NEXT: s_or_b32 s6, s8, s6
+; VI-GISEL-NEXT: s_add_i32 s4, s4, s6
+; VI-GISEL-NEXT: s_cmp_gt_i32 s3, 30
+; VI-GISEL-NEXT: s_cselect_b32 s4, 0x7c00, s4
+; VI-GISEL-NEXT: s_cmpk_eq_i32 s3, 0x40f
+; VI-GISEL-NEXT: s_cselect_b32 s3, s5, s4
+; VI-GISEL-NEXT: s_lshr_b32 s4, s7, 16
+; VI-GISEL-NEXT: s_and_b32 s4, s4, 0x8000
+; VI-GISEL-NEXT: s_or_b32 s3, s4, s3
+; VI-GISEL-NEXT: s_and_b32 s3, s3, 0xffff
+; VI-GISEL-NEXT: s_and_b32 s2, s2, 0xffff
+; VI-GISEL-NEXT: s_lshl_b32 s3, s3, 16
+; VI-GISEL-NEXT: s_or_b32 s2, s2, s3
+; VI-GISEL-NEXT: v_mov_b32_e32 v0, s2
+; VI-GISEL-NEXT: s_mov_b32 s2, -1
+; VI-GISEL-NEXT: s_mov_b32 s3, 0xf000
+; VI-GISEL-NEXT: buffer_store_dword v0, off, s[0:3], 0
+; VI-GISEL-NEXT: s_endpgm
+;
+; GFX9-SDAG-LABEL: fptrunc_v2f64_to_v2f16:
+; GFX9-SDAG: ; %bb.0: ; %entry
+; GFX9-SDAG-NEXT: s_load_dwordx4 s[8:11], s[4:5], 0x24
+; GFX9-SDAG-NEXT: s_mov_b32 s3, 0xf000
+; GFX9-SDAG-NEXT: s_mov_b32 s2, -1
+; GFX9-SDAG-NEXT: s_mov_b32 s6, s2
+; GFX9-SDAG-NEXT: s_mov_b32 s7, s3
+; GFX9-SDAG-NEXT: s_waitcnt lgkmcnt(0)
+; GFX9-SDAG-NEXT: s_mov_b32 s4, s10
+; GFX9-SDAG-NEXT: s_mov_b32 s5, s11
+; GFX9-SDAG-NEXT: buffer_load_dwordx4 v[0:3], off, s[4:7], 0
+; GFX9-SDAG-NEXT: s_mov_b32 s0, s8
+; GFX9-SDAG-NEXT: s_mov_b32 s1, s9
+; GFX9-SDAG-NEXT: s_movk_i32 s4, 0x7e00
+; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX9-SDAG-NEXT: v_readfirstlane_b32 s5, v3
+; GFX9-SDAG-NEXT: s_and_b32 s7, s5, 0x1ff
+; GFX9-SDAG-NEXT: v_readfirstlane_b32 s6, v1
+; GFX9-SDAG-NEXT: v_or_b32_e32 v1, s7, v2
+; GFX9-SDAG-NEXT: s_lshr_b32 s8, s5, 8
+; GFX9-SDAG-NEXT: s_bfe_u32 s9, s5, 0xb0014
+; GFX9-SDAG-NEXT: v_cmp_ne_u32_e32 vcc, 0, v1
+; GFX9-SDAG-NEXT: s_and_b32 s7, s8, 0xffe
+; GFX9-SDAG-NEXT: s_sub_i32 s8, 0x3f1, s9
+; GFX9-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
+; GFX9-SDAG-NEXT: v_med3_i32 v2, s8, 0, 13
+; GFX9-SDAG-NEXT: v_readfirstlane_b32 s8, v1
+; GFX9-SDAG-NEXT: s_or_b32 s7, s7, s8
+; GFX9-SDAG-NEXT: v_readfirstlane_b32 s10, v2
+; GFX9-SDAG-NEXT: s_or_b32 s8, s7, 0x1000
+; GFX9-SDAG-NEXT: s_lshr_b32 s11, s8, s10
+; GFX9-SDAG-NEXT: s_lshl_b32 s10, s11, s10
+; GFX9-SDAG-NEXT: s_cmp_lg_u32 s10, s8
+; GFX9-SDAG-NEXT: s_cselect_b32 s8, 1, 0
+; GFX9-SDAG-NEXT: s_addk_i32 s9, 0xfc10
+; GFX9-SDAG-NEXT: s_lshl_b32 s10, s9, 12
+; GFX9-SDAG-NEXT: s_or_b32 s8, s11, s8
+; GFX9-SDAG-NEXT: s_or_b32 s10, s7, s10
+; GFX9-SDAG-NEXT: s_cmp_lt_i32 s9, 1
+; GFX9-SDAG-NEXT: s_cselect_b32 s8, s8, s10
+; GFX9-SDAG-NEXT: s_and_b32 s10, s8, 7
+; GFX9-SDAG-NEXT: s_cmp_gt_i32 s10, 5
+; GFX9-SDAG-NEXT: s_cselect_b32 s11, 1, 0
+; GFX9-SDAG-NEXT: s_cmp_eq_u32 s10, 3
+; GFX9-SDAG-NEXT: s_cselect_b32 s10, 1, 0
+; GFX9-SDAG-NEXT: s_lshr_b32 s8, s8, 2
+; GFX9-SDAG-NEXT: s_or_b32 s10, s10, s11
+; GFX9-SDAG-NEXT: s_add_i32 s8, s8, s10
+; GFX9-SDAG-NEXT: s_cmp_lt_i32 s9, 31
+; GFX9-SDAG-NEXT: s_cselect_b32 s8, s8, 0x7c00
+; GFX9-SDAG-NEXT: s_cmp_lg_u32 s7, 0
+; GFX9-SDAG-NEXT: s_cselect_b32 s7, s4, 0x7c00
+; GFX9-SDAG-NEXT: s_cmpk_eq_i32 s9, 0x40f
+; GFX9-SDAG-NEXT: s_cselect_b32 s7, s7, s8
+; GFX9-SDAG-NEXT: s_and_b32 s8, s6, 0x1ff
+; GFX9-SDAG-NEXT: v_or_b32_e32 v0, s8, v0
+; GFX9-SDAG-NEXT: s_lshr_b32 s5, s5, 16
+; GFX9-SDAG-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0
+; GFX9-SDAG-NEXT: s_lshr_b32 s9, s6, 8
+; GFX9-SDAG-NEXT: s_bfe_u32 s10, s6, 0xb0014
+; GFX9-SDAG-NEXT: s_and_b32 s5, s5, 0x8000
+; GFX9-SDAG-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
+; GFX9-SDAG-NEXT: s_and_b32 s8, s9, 0xffe
+; GFX9-SDAG-NEXT: s_sub_i32 s9, 0x3f1, s10
+; GFX9-SDAG-NEXT: s_or_b32 s5, s5, s7
+; GFX9-SDAG-NEXT: v_readfirstlane_b32 s7, v0
+; GFX9-SDAG-NEXT: v_med3_i32 v1, s9, 0, 13
+; GFX9-SDAG-NEXT: s_or_b32 s7, s8, s7
+; GFX9-SDAG-NEXT: v_readfirstlane_b32 s9, v1
+; GFX9-SDAG-NEXT: s_or_b32 s8, s7, 0x1000
+; GFX9-SDAG-NEXT: s_lshr_b32 s11, s8, s9
+; GFX9-SDAG-NEXT: s_lshl_b32 s9, s11, s9
+; GFX9-SDAG-NEXT: s_cmp_lg_u32 s9, s8
+; GFX9-SDAG-NEXT: s_cselect_b32 s8, 1, 0
+; GFX9-SDAG-NEXT: s_addk_i32 s10, 0xfc10
+; GFX9-SDAG-NEXT: s_lshl_b32 s9, s10, 12
+; GFX9-SDAG-NEXT: s_or_b32 s8, s11, s8
+; GFX9-SDAG-NEXT: s_or_b32 s9, s7, s9
+; GFX9-SDAG-NEXT: s_cmp_lt_i32 s10, 1
+; GFX9-SDAG-NEXT: s_cselect_b32 s8, s8, s9
+; GFX9-SDAG-NEXT: s_and_b32 s9, s8, 7
+; GFX9-SDAG-NEXT: s_cmp_gt_i32 s9, 5
+; GFX9-SDAG-NEXT: s_cselect_b32 s11, 1, 0
+; GFX9-SDAG-NEXT: s_cmp_eq_u32 s9, 3
+; GFX9-SDAG-NEXT: s_cselect_b32 s9, 1, 0
+; GFX9-SDAG-NEXT: s_lshr_b32 s8, s8, 2
+; GFX9-SDAG-NEXT: s_or_b32 s9, s9, s11
+; GFX9-SDAG-NEXT: s_add_i32 s8, s8, s9
+; GFX9-SDAG-NEXT: s_cmp_lt_i32 s10, 31
+; GFX9-SDAG-NEXT: s_cselect_b32 s8, s8, 0x7c00
+; GFX9-SDAG-NEXT: s_cmp_lg_u32 s7, 0
+; GFX9-SDAG-NEXT: s_cselect_b32 s4, s4, 0x7c00
+; GFX9-SDAG-NEXT: s_cmpk_eq_i32 s10, 0x40f
+; GFX9-SDAG-NEXT: s_cselect_b32 s4, s4, s8
+; GFX9-SDAG-NEXT: s_lshr_b32 s6, s6, 16
+; GFX9-SDAG-NEXT: s_and_b32 s6, s6, 0x8000
+; GFX9-SDAG-NEXT: s_or_b32 s4, s6, s4
+; GFX9-SDAG-NEXT: s_pack_ll_b32_b16 s4, s4, s5
+; GFX9-SDAG-NEXT: v_mov_b32_e32 v0, s4
+; GFX9-SDAG-NEXT: buffer_store_dword v0, off, s[0:3], 0
+; GFX9-SDAG-NEXT: s_endpgm
+;
+; GFX9-GISEL-LABEL: fptrunc_v2f64_to_v2f16:
+; GFX9-GISEL: ; %bb.0: ; %entry
+; GFX9-GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
+; GFX9-GISEL-NEXT: s_waitcnt lgkmcnt(0)
+; GFX9-GISEL-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x0
+; GFX9-GISEL-NEXT: s_waitcnt lgkmcnt(0)
+; GFX9-GISEL-NEXT: s_bfe_u32 s2, s5, 0xb0014
+; GFX9-GISEL-NEXT: s_lshr_b32 s3, s5, 8
+; GFX9-GISEL-NEXT: s_and_b32 s8, s5, 0x1ff
+; GFX9-GISEL-NEXT: s_addk_i32 s2, 0xfc10
+; GFX9-GISEL-NEXT: s_and_b32 s3, s3, 0xffe
+; GFX9-GISEL-NEXT: s_or_b32 s4, s8, s4
+; GFX9-GISEL-NEXT: s_cmp_lg_u32 s4, 0
+; GFX9-GISEL-NEXT: s_cselect_b32 s4, 1, 0
+; GFX9-GISEL-NEXT: s_or_b32 s3, s3, s4
+; GFX9-GISEL-NEXT: s_cmp_lg_u32 s3, 0
+; GFX9-GISEL-NEXT: s_cselect_b32 s4, 1, 0
+; GFX9-GISEL-NEXT: s_sub_i32 s9, 1, s2
+; GFX9-GISEL-NEXT: s_lshl_b32 s8, s2, 12
+; GFX9-GISEL-NEXT: s_max_i32 s9, s9, 0
+; GFX9-GISEL-NEXT: s_or_b32 s8, s3, s8
+; GFX9-GISEL-NEXT: s_min_i32 s9, s9, 13
+; GFX9-GISEL-NEXT: s_bitset1_b32 s3, 12
+; GFX9-GISEL-NEXT: s_lshl_b32 s4, s4, 9
+; GFX9-GISEL-NEXT: s_lshr_b32 s10, s3, s9
+; GFX9-GISEL-NEXT: s_or_b32 s4, s4, 0x7c00
+; GFX9-GISEL-NEXT: s_lshl_b32 s9, s10, s9
+; GFX9-GISEL-NEXT: s_cmp_lg_u32 s9, s3
+; GFX9-GISEL-NEXT: s_cselect_b32 s3, 1, 0
+; GFX9-GISEL-NEXT: s_or_b32 s3, s10, s3
+; GFX9-GISEL-NEXT: s_cmp_lt_i32 s2, 1
+; GFX9-GISEL-NEXT: s_cselect_b32 s3, s3, s8
+; GFX9-GISEL-NEXT: s_and_b32 s8, s3, 7
+; GFX9-GISEL-NEXT: s_lshr_b32 s3, s3, 2
+; GFX9-GISEL-NEXT: s_cmp_eq_u32 s8, 3
+; GFX9-GISEL-NEXT: s_cselect_b32 s9, 1, 0
+; GFX9-GISEL-NEXT: s_cmp_gt_i32 s8, 5
+; GFX9-GISEL-NEXT: s_cselect_b32 s8, 1, 0
+; GFX9-GISEL-NEXT: s_or_b32 s8, s9, s8
+; GFX9-GISEL-NEXT: s_add_i32 s3, s3, s8
+; GFX9-GISEL-NEXT: s_cmp_gt_i32 s2, 30
+; GFX9-GISEL-NEXT: s_cselect_b32 s3, 0x7c00, s3
+; GFX9-GISEL-NEXT: s_cmpk_eq_i32 s2, 0x40f
+; GFX9-GISEL-NEXT: s_cselect_b32 s2, s4, s3
+; GFX9-GISEL-NEXT: s_lshr_b32 s3, s5, 16
+; GFX9-GISEL-NEXT: s_and_b32 s3, s3, 0x8000
+; GFX9-GISEL-NEXT: s_or_b32 s2, s3, s2
+; GFX9-GISEL-NEXT: s_bfe_u32 s3, s7, 0xb0014
+; GFX9-GISEL-NEXT: s_lshr_b32 s4, s7, 8
+; GFX9-GISEL-NEXT: s_and_b32 s5, s7, 0x1ff
+; GFX9-GISEL-NEXT: s_addk_i32 s3, 0xfc10
+; GFX9-GISEL-NEXT: s_and_b32 s4, s4, 0xffe
+; GFX9-GISEL-NEXT: s_or_b32 s5, s5, s6
+; GFX9-GISEL-NEXT: s_cmp_lg_u32 s5, 0
+; GFX9-GISEL-NEXT: s_cselect_b32 s5, 1, 0
+; GFX9-GISEL-NEXT: s_or_b32 s4, s4, s5
+; GFX9-GISEL-NEXT: s_cmp_lg_u32 s4, 0
+; GFX9-GISEL-NEXT: s_cselect_b32 s5, 1, 0
+; GFX9-GISEL-NEXT: s_sub_i32 s8, 1, s3
+; GFX9-GISEL-NEXT: s_lshl_b32 s6, s3, 12
+; GFX9-GISEL-NEXT: s_max_i32 s8, s8, 0
+; GFX9-GISEL-NEXT: s_or_b32 s6, s4, s6
+; GFX9-GISEL-NEXT: s_min_i32 s8, s8, 13
+; GFX9-GISEL-NEXT: s_bitset1_b32 s4, 12
+; GFX9-GISEL-NEXT: s_lshl_b32 s5, s5, 9
+; GFX9-GISEL-NEXT: s_lshr_b32 s9, s4, s8
+; GFX9-GISEL-NEXT: s_or_b32 s5, s5, 0x7c00
+; GFX9-GISEL-NEXT: s_lshl_b32 s8, s9, s8
+; GFX9-GISEL-NEXT: s_cmp_lg_u32 s8, s4
+; GFX9-GISEL-NEXT: s_cselect_b32 s4, 1, 0
+; GFX9-GISEL-NEXT: s_or_b32 s4, s9, s4
+; GFX9-GISEL-NEXT: s_cmp_lt_i32 s3, 1
+; GFX9-GISEL-NEXT: s_cselect_b32 s4, s4, s6
+; GFX9-GISEL-NEXT: s_and_b32 s6, s4, 7
+; GFX9-GISEL-NEXT: s_lshr_b32 s4, s4, 2
+; GFX9-GISEL-NEXT: s_cmp_eq_u32 s6, 3
+; GFX9-GISEL-NEXT: s_cselect_b32 s8, 1, 0
+; GFX9-GISEL-NEXT: s_cmp_gt_i32 s6, 5
+; GFX9-GISEL-NEXT: s_cselect_b32 s6, 1, 0
+; GFX9-GISEL-NEXT: s_or_b32 s6, s8, s6
+; GFX9-GISEL-NEXT: s_add_i32 s4, s4, s6
+; GFX9-GISEL-NEXT: s_cmp_gt_i32 s3, 30
+; GFX9-GISEL-NEXT: s_cselect_b32 s4, 0x7c00, s4
+; GFX9-GISEL-NEXT: s_cmpk_eq_i32 s3, 0x40f
+; GFX9-GISEL-NEXT: s_cselect_b32 s3, s5, s4
+; GFX9-GISEL-NEXT: s_lshr_b32 s4, s7, 16
+; GFX9-GISEL-NEXT: s_and_b32 s4, s4, 0x8000
+; GFX9-GISEL-NEXT: s_or_b32 s3, s4, s3
+; GFX9-GISEL-NEXT: s_pack_ll_b32_b16 s2, s2, s3
+; GFX9-GISEL-NEXT: v_mov_b32_e32 v0, s2
+; GFX9-GISEL-NEXT: s_mov_b32 s2, -1
+; GFX9-GISEL-NEXT: s_mov_b32 s3, 0xf000
+; GFX9-GISEL-NEXT: buffer_store_dword v0, off, s[0:3], 0
+; GFX9-GISEL-NEXT: s_endpgm
+;
+; GFX950-SDAG-LABEL: fptrunc_v2f64_to_v2f16:
+; GFX950-SDAG: ; %bb.0: ; %entry
+; GFX950-SDAG-NEXT: s_load_dwordx4 s[8:11], s[4:5], 0x24
+; GFX950-SDAG-NEXT: s_mov_b32 s3, 0xf000
+; GFX950-SDAG-NEXT: s_mov_b32 s2, -1
+; GFX950-SDAG-NEXT: s_mov_b32 s6, s2
+; GFX950-SDAG-NEXT: s_mov_b32 s7, s3
+; GFX950-SDAG-NEXT: s_waitcnt lgkmcnt(0)
+; GFX950-SDAG-NEXT: s_mov_b32 s4, s10
+; GFX950-SDAG-NEXT: s_mov_b32 s5, s11
+; GFX950-SDAG-NEXT: buffer_load_dwordx4 v[0:3], off, s[4:7], 0
+; GFX950-SDAG-NEXT: s_mov_b32 s0, s8
+; GFX950-SDAG-NEXT: s_mov_b32 s1, s9
+; GFX950-SDAG-NEXT: s_movk_i32 s4, 0x7e00
+; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0)
+; GFX950-SDAG-NEXT: v_readfirstlane_b32 s5, v3
+; GFX950-SDAG-NEXT: s_and_b32 s7, s5, 0x1ff
+; GFX950-SDAG-NEXT: v_readfirstlane_b32 s6, v1
+; GFX950-SDAG-NEXT: v_or_b32_e32 v1, s7, v2
+; GFX950-SDAG-NEXT: s_lshr_b32 s8, s5, 8
+; GFX950-SDAG-NEXT: s_bfe_u32 s9, s5, 0xb0014
+; GFX950-SDAG-NEXT: v_cmp_ne_u32_e32 vcc, 0, v1
+; GFX950-SDAG-NEXT: s_and_b32 s7, s8, 0xffe
+; GFX950-SDAG-NEXT: s_sub_i32 s8, 0x3f1, s9
+; GFX950-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
+; GFX950-SDAG-NEXT: v_med3_i32 v2, s8, 0, 13
+; GFX950-SDAG-NEXT: v_readfirstlane_b32 s8, v1
+; GFX950-SDAG-NEXT: s_or_b32 s7, s7, s8
+; GFX950-SDAG-NEXT: v_readfirstlane_b32 s10, v2
+; GFX950-SDAG-NEXT: s_or_b32 s8, s7, 0x1000
+; GFX950-SDAG-NEXT: s_lshr_b32 s11, s8, s10
+; GFX950-SDAG-NEXT: s_lshl_b32 s10, s11, s10
+; GFX950-SDAG-NEXT: s_cmp_lg_u32 s10, s8
+; GFX950-SDAG-NEXT: s_cselect_b32 s8, 1, 0
+; GFX950-SDAG-NEXT: s_addk_i32 s9, 0xfc10
+; GFX950-SDAG-NEXT: s_lshl_b32 s10, s9, 12
+; GFX950-SDAG-NEXT: s_or_b32 s8, s11, s8
+; GFX950-SDAG-NEXT: s_or_b32 s10, s7, s10
+; GFX950-SDAG-NEXT: s_cmp_lt_i32 s9, 1
+; GFX950-SDAG-NEXT: s_cselect_b32 s8, s8, s10
+; GFX950-SDAG-NEXT: s_and_b32 s10, s8, 7
+; GFX950-SDAG-NEXT: s_cmp_gt_i32 s10, 5
+; GFX950-SDAG-NEXT: s_cselect_b32 s11, 1, 0
+; GFX950-SDAG-NEXT: s_cmp_eq_u32 s10, 3
+; GFX950-SDAG-NEXT: s_cselect_b32 s10, 1, 0
+; GFX950-SDAG-NEXT: s_lshr_b32 s8, s8, 2
+; GFX950-SDAG-NEXT: s_or_b32 s10, s10, s11
+; GFX950-SDAG-NEXT: s_add_i32 s8, s8, s10
+; GFX950-SDAG-NEXT: s_cmp_lt_i32 s9, 31
+; GFX950-SDAG-NEXT: s_cselect_b32 s8, s8, 0x7c00
+; GFX950-SDAG-NEXT: s_cmp_lg_u32 s7, 0
+; GFX950-SDAG-NEXT: s_cselect_b32 s7, s4, 0x7c00
+; GFX950-SDAG-NEXT: s_cmpk_eq_i32 s9, 0x40f
+; GFX950-SDAG-NEXT: s_cselect_b32 s7, s7, s8
+; GFX950-SDAG-NEXT: s_and_b32 s8, s6, 0x1ff
+; GFX950-SDAG-NEXT: v_or_b32_e32 v0, s8, v0
+; GFX950-SDAG-NEXT: s_lshr_b32 s5, s5, 16
+; GFX950-SDAG-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0
+; GFX950-SDAG-NEXT: s_lshr_b32 s9, s6, 8
+; GFX950-SDAG-NEXT: s_bfe_u32 s10, s6, 0xb0014
+; GFX950-SDAG-NEXT: s_and_b32 s5, s5, 0x8000
+; GFX950-SDAG-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
+; GFX950-SDAG-NEXT: s_and_b32 s8, s9, 0xffe
+; GFX950-SDAG-NEXT: s_sub_i32 s9, 0x3f1, s10
+; GFX950-SDAG-NEXT: s_or_b32 s5, s5, s7
+; GFX950-SDAG-NEXT: v_readfirstlane_b32 s7, v0
+; GFX950-SDAG-NEXT: v_med3_i32 v1, s9, 0, 13
+; GFX950-SDAG-NEXT: s_or_b32 s7, s8, s7
+; GFX950-SDAG-NEXT: v_readfirstlane_b32 s9, v1
+; GFX950-SDAG-NEXT: s_or_b32 s8, s7, 0x1000
+; GFX950-SDAG-NEXT: s_lshr_b32 s11, s8, s9
+; GFX950-SDAG-NEXT: s_lshl_b32 s9, s11, s9
+; GFX950-SDAG-NEXT: s_cmp_lg_u32 s9, s8
+; GFX950-SDAG-NEXT: s_cselect_b32 s8, 1, 0
+; GFX950-SDAG-NEXT: s_addk_i32 s10, 0xfc10
+; GFX950-SDAG-NEXT: s_lshl_b32 s9, s10, 12
+; GFX950-SDAG-NEXT: s_or_b32 s8, s11, s8
+; GFX950-SDAG-NEXT: s_or_b32 s9, s7, s9
+; GFX950-SDAG-NEXT: s_cmp_lt_i32 s10, 1
+; GFX950-SDAG-NEXT: s_cselect_b32 s8, s8, s9
+; GFX950-SDAG-NEXT: s_and_b32 s9, s8, 7
+; GFX950-SDAG-NEXT: s_cmp_gt_i32 s9, 5
+; GFX950-SDAG-NEXT: s_cselect_b32 s11, 1, 0
+; GFX950-SDAG-NEXT: s_cmp_eq_u32 s9, 3
+; GFX950-SDAG-NEXT: s_cselect_b32 s9, 1, 0
+; GFX950-SDAG-NEXT: s_lshr_b32 s8, s8, 2
+; GFX950-SDAG-NEXT: s_or_b32 s9, s9, s11
+; GFX950-SDAG-NEXT: s_add_i32 s8, s8, s9
+; GFX950-SDAG-NEXT: s_cmp_lt_i32 s10, 31
+; GFX950-SDAG-NEXT: s_cselect_b32 s8, s8, 0x7c00
+; GFX950-SDAG-NEXT: s_cmp_lg_u32 s7, 0
+; GFX950-SDAG-NEXT: s_cselect_b32 s4, s4, 0x7c00
+; GFX950-SDAG-NEXT: s_cmpk_eq_i32 s10, 0x40f
+; GFX950-SDAG-NEXT: s_cselect_b32 s4, s4, s8
+; GFX950-SDAG-NEXT: s_lshr_b32 s6, s6, 16
+; GFX950-SDAG-NEXT: s_and_b32 s6, s6, 0x8000
+; GFX950-SDAG-NEXT: s_or_b32 s4, s6, s4
+; GFX950-SDAG-NEXT: s_pack_ll_b32_b16 s4, s4, s5
+; GFX950-SDAG-NEXT: v_mov_b32_e32 v0, s4
+; GFX950-SDAG-NEXT: buffer_store_dword v0, off, s[0:3], 0
+; GFX950-SDAG-NEXT: s_endpgm
+;
+; GFX950-GISEL-LABEL: fptrunc_v2f64_to_v2f16:
+; GFX950-GISEL: ; %bb.0: ; %entry
+; GFX950-GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
+; GFX950-GISEL-NEXT: s_waitcnt lgkmcnt(0)
+; GFX950-GISEL-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x0
+; GFX950-GISEL-NEXT: s_waitcnt lgkmcnt(0)
+; GFX950-GISEL-NEXT: s_bfe_u32 s2, s5, 0xb0014
+; GFX950-GISEL-NEXT: s_lshr_b32 s3, s5, 8
+; GFX950-GISEL-NEXT: s_and_b32 s8, s5, 0x1ff
+; GFX950-GISEL-NEXT: s_addk_i32 s2, 0xfc10
+; GFX950-GISEL-NEXT: s_and_b32 s3, s3, 0xffe
+; GFX950-GISEL-NEXT: s_or_b32 s4, s8, s4
+; GFX950-GISEL-NEXT: s_cmp_lg_u32 s4, 0
+; GFX950-GISEL-NEXT: s_cselect_b32 s4, 1, 0
+; GFX950-GISEL-NEXT: s_or_b32 s3, s3, s4
+; GFX950-GISEL-NEXT: s_cmp_lg_u32 s3, 0
+; GFX950-GISEL-NEXT: s_cselect_b32 s4, 1, 0
+; GFX950-GISEL-NEXT: s_sub_i32 s9, 1, s2
+; GFX950-GISEL-NEXT: s_lshl_b32 s8, s2, 12
+; GFX950-GISEL-NEXT: s_max_i32 s9, s9, 0
+; GFX950-GISEL-NEXT: s_or_b32 s8, s3, s8
+; GFX950-GISEL-NEXT: s_min_i32 s9, s9, 13
+; GFX950-GISEL-NEXT: s_bitset1_b32 s3, 12
+; GFX950-GISEL-NEXT: s_lshl_b32 s4, s4, 9
+; GFX950-GISEL-NEXT: s_lshr_b32 s10, s3, s9
+; GFX950-GISEL-NEXT: s_or_b32 s4, s4, 0x7c00
+; GFX950-GISEL-NEXT: s_lshl_b32 s9, s10, s9
+; GFX950-GISEL-NEXT: s_cmp_lg_u32 s9, s3
+; GFX950-GISEL-NEXT: s_cselect_b32 s3, 1, 0
+; GFX950-GISEL-NEXT: s_or_b32 s3, s10, s3
+; GFX950-GISEL-NEXT: s_cmp_lt_i32 s2, 1
+; GFX950-GISEL-NEXT: s_cselect_b32 s3, s3, s8
+; GFX950-GISEL-NEXT: s_and_b32 s8, s3, 7
+; GFX950-GISEL-NEXT: s_lshr_b32 s3, s3, 2
+; GFX950-GISEL-NEXT: s_cmp_eq_u32 s8, 3
+; GFX950-GISEL-NEXT: s_cselect_b32 s9, 1, 0
+; GFX950-GISEL-NEXT: s_cmp_gt_i32 s8, 5
+; GFX950-GISEL-NEXT: s_cselect_b32 s8, 1, 0
+; GFX950-GISEL-NEXT: s_or_b32 s8, s9, s8
+; GFX950-GISEL-NEXT: s_add_i32 s3, s3, s8
+; GFX950-GISEL-NEXT: s_cmp_gt_i32 s2, 30
+; GFX950-GISEL-NEXT: s_cselect_b32 s3, 0x7c00, s3
+; GFX950-GISEL-NEXT: s_cmpk_eq_i32 s2, 0x40f
+; GFX950-GISEL-NEXT: s_cselect_b32 s2, s4, s3
+; GFX950-GISEL-NEXT: s_lshr_b32 s3, s5, 16
+; GFX950-GISEL-NEXT: s_and_b32 s3, s3, 0x8000
+; GFX950-GISEL-NEXT: s_or_b32 s2, s3, s2
+; GFX950-GISEL-NEXT: s_bfe_u32 s3, s7, 0xb0014
+; GFX950-GISEL-NEXT: s_lshr_b32 s4, s7, 8
+; GFX950-GISEL-NEXT: s_and_b32 s5, s7, 0x1ff
+; GFX950-GISEL-NEXT: s_addk_i32 s3, 0xfc10
+; GFX950-GISEL-NEXT: s_and_b32 s4, s4, 0xffe
+; GFX950-GISEL-NEXT: s_or_b32 s5, s5, s6
+; GFX950-GISEL-NEXT: s_cmp_lg_u32 s5, 0
+; GFX950-GISEL-NEXT: s_cselect_b32 s5, 1, 0
+; GFX950-GISEL-NEXT: s_or_b32 s4, s4, s5
+; GFX950-GISEL-NEXT: s_cmp_lg_u32 s4, 0
+; GFX950-GISEL-NEXT: s_cselect_b32 s5, 1, 0
+; GFX950-GISEL-NEXT: s_sub_i32 s8, 1, s3
+; GFX950-GISEL-NEXT: s_lshl_b32 s6, s3, 12
+; GFX950-GISEL-NEXT: s_max_i32 s8, s8, 0
+; GFX950-GISEL-NEXT: s_or_b32 s6, s4, s6
+; GFX950-GISEL-NEXT: s_min_i32 s8, s8, 13
+; GFX950-GISEL-NEXT: s_bitset1_b32 s4, 12
+; GFX950-GISEL-NEXT: s_lshl_b32 s5, s5, 9
+; GFX950-GISEL-NEXT: s_lshr_b32 s9, s4, s8
+; GFX950-GISEL-NEXT: s_or_b32 s5, s5, 0x7c00
+; GFX950-GISEL-NEXT: s_lshl_b32 s8, s9, s8
+; GFX950-GISEL-NEXT: s_cmp_lg_u32 s8, s4
+; GFX950-GISEL-NEXT: s_cselect_b32 s4, 1, 0
+; GFX950-GISEL-NEXT: s_or_b32 s4, s9, s4
+; GFX950-GISEL-NEXT: s_cmp_lt_i32 s3, 1
+; GFX950-GISEL-NEXT: s_cselect_b32 s4, s4, s6
+; GFX950-GISEL-NEXT: s_and_b32 s6, s4, 7
+; GFX950-GISEL-NEXT: s_lshr_b32 s4, s4, 2
+; GFX950-GISEL-NEXT: s_cmp_eq_u32 s6, 3
+; GFX950-GISEL-NEXT: s_cselect_b32 s8, 1, 0
+; GFX950-GISEL-NEXT: s_cmp_gt_i32 s6, 5
+; GFX950-GISEL-NEXT: s_cselect_b32 s6, 1, 0
+; GFX950-GISEL-NEXT: s_or_b32 s6, s8, s6
+; GFX950-GISEL-NEXT: s_add_i32 s4, s4, s6
+; GFX950-GISEL-NEXT: s_cmp_gt_i32 s3, 30
+; GFX950-GISEL-NEXT: s_cselect_b32 s4, 0x7c00, s4
+; GFX950-GISEL-NEXT: s_cmpk_eq_i32 s3, 0x40f
+; GFX950-GISEL-NEXT: s_cselect_b32 s3, s5, s4
+; GFX950-GISEL-NEXT: s_lshr_b32 s4, s7, 16
+; GFX950-GISEL-NEXT: s_and_b32 s4, s4, 0x8000
+; GFX950-GISEL-NEXT: s_or_b32 s3, s4, s3
+; GFX950-GISEL-NEXT: s_pack_ll_b32_b16 s2, s2, s3
+; GFX950-GISEL-NEXT: v_mov_b32_e32 v0, s2
+; GFX950-GISEL-NEXT: s_mov_b32 s2, -1
+; GFX950-GISEL-NEXT: s_mov_b32 s3, 0xf000
+; GFX950-GISEL-NEXT: buffer_store_dword v0, off, s[0:3], 0
+; GFX950-GISEL-NEXT: s_endpgm
+;
+; GFX11-SDAG-TRUE16-LABEL: fptrunc_v2f64_to_v2f16:
+; GFX11-SDAG-TRUE16: ; %bb.0: ; %entry
+; GFX11-SDAG-TRUE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
+; GFX11-SDAG-TRUE16-NEXT: s_mov_b32 s6, -1
+; GFX11-SDAG-TRUE16-NEXT: s_mov_b32 s7, 0x31016000
+; GFX11-SDAG-TRUE16-NEXT: s_mov_b32 s10, s6
+; GFX11-SDAG-TRUE16-NEXT: s_mov_b32 s11, s7
+; GFX11-SDAG-TRUE16-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-SDAG-TRUE16-NEXT: s_mov_b32 s8, s2
+; GFX11-SDAG-TRUE16-NEXT: s_mov_b32 s9, s3
+; GFX11-SDAG-TRUE16-NEXT: buffer_load_b128 v[0:3], off, s[8:11], 0
+; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0)
+; GFX11-SDAG-TRUE16-NEXT: v_readfirstlane_b32 s2, v3
+; GFX11-SDAG-TRUE16-NEXT: s_and_b32 s3, s2, 0x1ff
+; GFX11-SDAG-TRUE16-NEXT: s_lshr_b32 s5, s2, 8
+; GFX11-SDAG-TRUE16-NEXT: v_or_b32_e32 v2, s3, v2
+; GFX11-SDAG-TRUE16-NEXT: s_bfe_u32 s3, s2, 0xb0014
+; GFX11-SDAG-TRUE16-NEXT: s_and_b32 s5, s5, 0xffe
+; GFX11-SDAG-TRUE16-NEXT: s_sub_i32 s4, 0x3f1, s3
+; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_2)
+; GFX11-SDAG-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v2
+; GFX11-SDAG-TRUE16-NEXT: v_med3_i32 v3, s4, 0, 13
+; GFX11-SDAG-TRUE16-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc_lo
+; GFX11-SDAG-TRUE16-NEXT: v_readfirstlane_b32 s8, v3
+; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
+; GFX11-SDAG-TRUE16-NEXT: v_readfirstlane_b32 s4, v2
+; GFX11-SDAG-TRUE16-NEXT: s_or_b32 s4, s5, s4
+; GFX11-SDAG-TRUE16-NEXT: s_or_b32 s5, s4, 0x1000
+; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
+; GFX11-SDAG-TRUE16-NEXT: s_lshr_b32 s9, s5, s8
+; GFX11-SDAG-TRUE16-NEXT: s_lshl_b32 s8, s9, s8
+; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_4) | instid1(SALU_CYCLE_1)
+; GFX11-SDAG-TRUE16-NEXT: s_cmp_lg_u32 s8, s5
+; GFX11-SDAG-TRUE16-NEXT: s_cselect_b32 s5, 1, 0
+; GFX11-SDAG-TRUE16-NEXT: s_addk_i32 s3, 0xfc10
+; GFX11-SDAG-TRUE16-NEXT: s_or_b32 s5, s9, s5
+; GFX11-SDAG-TRUE16-NEXT: s_lshl_b32 s8, s3, 12
+; GFX11-SDAG-TRUE16-NEXT: s_or_b32 s8, s4, s8
+; GFX11-SDAG-TRUE16-NEXT: s_cmp_lt_i32 s3, 1
+; GFX11-SDAG-TRUE16-NEXT: s_cselect_b32 s5, s5, s8
+; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
+; GFX11-SDAG-TRUE16-NEXT: s_and_b32 s8, s5, 7
+; GFX11-SDAG-TRUE16-NEXT: s_cmp_gt_i32 s8, 5
+; GFX11-SDAG-TRUE16-NEXT: s_cselect_b32 s9, 1, 0
+; GFX11-SDAG-TRUE16-NEXT: s_cmp_eq_u32 s8, 3
+; GFX11-SDAG-TRUE16-NEXT: s_cselect_b32 s8, 1, 0
+; GFX11-SDAG-TRUE16-NEXT: s_lshr_b32 s5, s5, 2
+; GFX11-SDAG-TRUE16-NEXT: s_or_b32 s8, s8, s9
+; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
+; GFX11-SDAG-TRUE16-NEXT: s_add_i32 s5, s5, s8
+; GFX11-SDAG-TRUE16-NEXT: s_cmp_lt_i32 s3, 31
+; GFX11-SDAG-TRUE16-NEXT: s_movk_i32 s8, 0x7e00
+; GFX11-SDAG-TRUE16-NEXT: s_cselect_b32 s5, s5, 0x7c00
+; GFX11-SDAG-TRUE16-NEXT: s_cmp_lg_u32 s4, 0
+; GFX11-SDAG-TRUE16-NEXT: v_readfirstlane_b32 s4, v1
+; GFX11-SDAG-TRUE16-NEXT: s_cselect_b32 s9, s8, 0x7c00
+; GFX11-SDAG-TRUE16-NEXT: s_cmpk_eq_i32 s3, 0x40f
+; GFX11-SDAG-TRUE16-NEXT: s_cselect_b32 s3, s9, s5
+; GFX11-SDAG-TRUE16-NEXT: s_and_b32 s5, s4, 0x1ff
+; GFX11-SDAG-TRUE16-NEXT: s_lshr_b32 s10, s4, 8
+; GFX11-SDAG-TRUE16-NEXT: v_or_b32_e32 v0, s5, v0
+; GFX11-SDAG-TRUE16-NEXT: s_bfe_u32 s5, s4, 0xb0014
+; GFX11-SDAG-TRUE16-NEXT: s_and_b32 s10, s10, 0xffe
+; GFX11-SDAG-TRUE16-NEXT: s_sub_i32 s9, 0x3f1, s5
+; GFX11-SDAG-TRUE16-NEXT: s_lshr_b32 s2, s2, 16
+; GFX11-SDAG-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0
+; GFX11-SDAG-TRUE16-NEXT: v_med3_i32 v1, s9, 0, 13
+; GFX11-SDAG-TRUE16-NEXT: s_and_b32 s2, s2, 0x8000
+; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX11-SDAG-TRUE16-NEXT: s_or_b32 s2, s2, s3
+; GFX11-SDAG-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
+; GFX11-SDAG-TRUE16-NEXT: v_readfirstlane_b32 s11, v1
+; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
+; GFX11-SDAG-TRUE16-NEXT: v_readfirstlane_b32 s9, v0
+; GFX11-SDAG-TRUE16-NEXT: s_or_b32 s9, s10, s9
+; GFX11-SDAG-TRUE16-NEXT: s_or_b32 s10, s9, 0x1000
+; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
+; GFX11-SDAG-TRUE16-NEXT: s_lshr_b32 s12, s10, s11
+; GFX11-SDAG-TRUE16-NEXT: s_lshl_b32 s11, s12, s11
+; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_4) | instid1(SALU_CYCLE_1)
+; GFX11-SDAG-TRUE16-NEXT: s_cmp_lg_u32 s11, s10
+; GFX11-SDAG-TRUE16-NEXT: s_cselect_b32 s3, 1, 0
+; GFX11-SDAG-TRUE16-NEXT: s_addk_i32 s5, 0xfc10
+; GFX11-SDAG-TRUE16-NEXT: s_or_b32 s3, s12, s3
+; GFX11-SDAG-TRUE16-NEXT: s_lshl_b32 s10, s5, 12
+; GFX11-SDAG-TRUE16-NEXT: s_or_b32 s10, s9, s10
+; GFX11-SDAG-TRUE16-NEXT: s_cmp_lt_i32 s5, 1
+; GFX11-SDAG-TRUE16-NEXT: s_cselect_b32 s3, s3, s10
+; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
+; GFX11-SDAG-TRUE16-NEXT: s_and_b32 s10, s3, 7
+; GFX11-SDAG-TRUE16-NEXT: s_cmp_gt_i32 s10, 5
+; GFX11-SDAG-TRUE16-NEXT: s_cselect_b32 s11, 1, 0
+; GFX11-SDAG-TRUE16-NEXT: s_cmp_eq_u32 s10, 3
+; GFX11-SDAG-TRUE16-NEXT: s_cselect_b32 s10, 1, 0
+; GFX11-SDAG-TRUE16-NEXT: s_lshr_b32 s3, s3, 2
+; GFX11-SDAG-TRUE16-NEXT: s_or_b32 s10, s10, s11
+; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
+; GFX11-SDAG-TRUE16-NEXT: s_add_i32 s3, s3, s10
+; GFX11-SDAG-TRUE16-NEXT: s_cmp_lt_i32 s5, 31
+; GFX11-SDAG-TRUE16-NEXT: s_cselect_b32 s3, s3, 0x7c00
+; GFX11-SDAG-TRUE16-NEXT: s_cmp_lg_u32 s9, 0
+; GFX11-SDAG-TRUE16-NEXT: s_cselect_b32 s8, s8, 0x7c00
+; GFX11-SDAG-TRUE16-NEXT: s_cmpk_eq_i32 s5, 0x40f
+; GFX11-SDAG-TRUE16-NEXT: s_mov_b32 s5, s1
+; GFX11-SDAG-TRUE16-NEXT: s_cselect_b32 s3, s8, s3
+; GFX11-SDAG-TRUE16-NEXT: s_lshr_b32 s4, s4, 16
+; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
+; GFX11-SDAG-TRUE16-NEXT: s_and_b32 s4, s4, 0x8000
+; GFX11-SDAG-TRUE16-NEXT: s_or_b32 s3, s4, s3
+; GFX11-SDAG-TRUE16-NEXT: s_mov_b32 s4, s0
+; GFX11-SDAG-TRUE16-NEXT: s_pack_ll_b32_b16 s2, s3, s2
+; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
+; GFX11-SDAG-TRUE16-NEXT: v_mov_b32_e32 v0, s2
+; GFX11-SDAG-TRUE16-NEXT: buffer_store_b32 v0, off, s[4:7], 0
+; GFX11-SDAG-TRUE16-NEXT: s_endpgm
+;
+; GFX11-SDAG-FAKE16-LABEL: fptrunc_v2f64_to_v2f16:
+; GFX11-SDAG-FAKE16: ; %bb.0: ; %entry
+; GFX11-SDAG-FAKE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
+; GFX11-SDAG-FAKE16-NEXT: s_mov_b32 s6, -1
+; GFX11-SDAG-FAKE16-NEXT: s_mov_b32 s7, 0x31016000
+; GFX11-SDAG-FAKE16-NEXT: s_mov_b32 s10, s6
+; GFX11-SDAG-FAKE16-NEXT: s_mov_b32 s11, s7
+; GFX11-SDAG-FAKE16-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-SDAG-FAKE16-NEXT: s_mov_b32 s8, s2
+; GFX11-SDAG-FAKE16-NEXT: s_mov_b32 s9, s3
+; GFX11-SDAG-FAKE16-NEXT: buffer_load_b128 v[0:3], off, s[8:11], 0
+; GFX11-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0)
+; GFX11-SDAG-FAKE16-NEXT: v_readfirstlane_b32 s2, v3
+; GFX11-SDAG-FAKE16-NEXT: s_and_b32 s3, s2, 0x1ff
+; GFX11-SDAG-FAKE16-NEXT: s_lshr_b32 s5, s2, 8
+; GFX11-SDAG-FAKE16-NEXT: v_or_b32_e32 v2, s3, v2
+; GFX11-SDAG-FAKE16-NEXT: s_bfe_u32 s3, s2, 0xb0014
+; GFX11-SDAG-FAKE16-NEXT: s_and_b32 s5, s5, 0xffe
+; GFX11-SDAG-FAKE16-NEXT: s_sub_i32 s4, 0x3f1, s3
+; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_2)
+; GFX11-SDAG-FAKE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v2
+; GFX11-SDAG-FAKE16-NEXT: v_med3_i32 v3, s4, 0, 13
+; GFX11-SDAG-FAKE16-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc_lo
+; GFX11-SDAG-FAKE16-NEXT: v_readfirstlane_b32 s8, v3
+; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
+; GFX11-SDAG-FAKE16-NEXT: v_readfirstlane_b32 s4, v2
+; GFX11-SDAG-FAKE16-NEXT: s_or_b32 s4, s5, s4
+; GFX11-SDAG-FAKE16-NEXT: s_or_b32 s5, s4, 0x1000
+; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
+; GFX11-SDAG-FAKE16-NEXT: s_lshr_b32 s9, s5, s8
+; GFX11-SDAG-FAKE16-NEXT: s_lshl_b32 s8, s9, s8
+; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_4) | instid1(SALU_CYCLE_1)
+; GFX11-SDAG-FAKE16-NEXT: s_cmp_lg_u32 s8, s5
+; GFX11-SDAG-FAKE16-NEXT: s_cselect_b32 s5, 1, 0
+; GFX11-SDAG-FAKE16-NEXT: s_addk_i32 s3, 0xfc10
+; GFX11-SDAG-FAKE16-NEXT: s_or_b32 s5, s9, s5
+; GFX11-SDAG-FAKE16-NEXT: s_lshl_b32 s8, s3, 12
+; GFX11-SDAG-FAKE16-NEXT: s_or_b32 s8, s4, s8
+; GFX11-SDAG-FAKE16-NEXT: s_cmp_lt_i32 s3, 1
+; GFX11-SDAG-FAKE16-NEXT: s_cselect_b32 s5, s5, s8
+; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
+; GFX11-SDAG-FAKE16-NEXT: s_and_b32 s8, s5, 7
+; GFX11-SDAG-FAKE16-NEXT: s_cmp_gt_i32 s8, 5
+; GFX11-SDAG-FAKE16-NEXT: s_cselect_b32 s9, 1, 0
+; GFX11-SDAG-FAKE16-NEXT: s_cmp_eq_u32 s8, 3
+; GFX11-SDAG-FAKE16-NEXT: s_cselect_b32 s8, 1, 0
+; GFX11-SDAG-FAKE16-NEXT: s_lshr_b32 s5, s5, 2
+; GFX11-SDAG-FAKE16-NEXT: s_or_b32 s8, s8, s9
+; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
+; GFX11-SDAG-FAKE16-NEXT: s_add_i32 s5, s5, s8
+; GFX11-SDAG-FAKE16-NEXT: s_cmp_lt_i32 s3, 31
+; GFX11-SDAG-FAKE16-NEXT: s_movk_i32 s8, 0x7e00
+; GFX11-SDAG-FAKE16-NEXT: s_cselect_b32 s5, s5, 0x7c00
+; GFX11-SDAG-FAKE16-NEXT: s_cmp_lg_u32 s4, 0
+; GFX11-SDAG-FAKE16-NEXT: v_readfirstlane_b32 s4, v1
+; GFX11-SDAG-FAKE16-NEXT: s_cselect_b32 s9, s8, 0x7c00
+; GFX11-SDAG-FAKE16-NEXT: s_cmpk_eq_i32 s3, 0x40f
+; GFX11-SDAG-FAKE16-NEXT: s_cselect_b32 s3, s9, s5
+; GFX11-SDAG-FAKE16-NEXT: s_and_b32 s5, s4, 0x1ff
+; GFX11-SDAG-FAKE16-NEXT: s_lshr_b32 s10, s4, 8
+; GFX11-SDAG-FAKE16-NEXT: v_or_b32_e32 v0, s5, v0
+; GFX11-SDAG-FAKE16-NEXT: s_bfe_u32 s5, s4, 0xb0014
+; GFX11-SDAG-FAKE16-NEXT: s_and_b32 s10, s10, 0xffe
+; GFX11-SDAG-FAKE16-NEXT: s_sub_i32 s9, 0x3f1, s5
+; GFX11-SDAG-FAKE16-NEXT: s_lshr_b32 s2, s2, 16
+; GFX11-SDAG-FAKE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0
+; GFX11-SDAG-FAKE16-NEXT: v_med3_i32 v1, s9, 0, 13
+; GFX11-SDAG-FAKE16-NEXT: s_and_b32 s2, s2, 0x8000
+; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX11-SDAG-FAKE16-NEXT: s_or_b32 s2, s2, s3
+; GFX11-SDAG-FAKE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
+; GFX11-SDAG-FAKE16-NEXT: v_readfirstlane_b32 s11, v1
+; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
+; GFX11-SDAG-FAKE16-NEXT: v_readfirstlane_b32 s9, v0
+; GFX11-SDAG-FAKE16-NEXT: s_or_b32 s9, s10, s9
+; GFX11-SDAG-FAKE16-NEXT: s_or_b32 s10, s9, 0x1000
+; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
+; GFX11-SDAG-FAKE16-NEXT: s_lshr_b32 s12, s10, s11
+; GFX11-SDAG-FAKE16-NEXT: s_lshl_b32 s11, s12, s11
+; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_4) | instid1(SALU_CYCLE_1)
+; GFX11-SDAG-FAKE16-NEXT: s_cmp_lg_u32 s11, s10
+; GFX11-SDAG-FAKE16-NEXT: s_cselect_b32 s3, 1, 0
+; GFX11-SDAG-FAKE16-NEXT: s_addk_i32 s5, 0xfc10
+; GFX11-SDAG-FAKE16-NEXT: s_or_b32 s3, s12, s3
+; GFX11-SDAG-FAKE16-NEXT: s_lshl_b32 s10, s5, 12
+; GFX11-SDAG-FAKE16-NEXT: s_or_b32 s10, s9, s10
+; GFX11-SDAG-FAKE16-NEXT: s_cmp_lt_i32 s5, 1
+; GFX11-SDAG-FAKE16-NEXT: s_cselect_b32 s3, s3, s10
+; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
+; GFX11-SDAG-FAKE16-NEXT: s_and_b32 s10, s3, 7
+; GFX11-SDAG-FAKE16-NEXT: s_cmp_gt_i32 s10, 5
+; GFX11-SDAG-FAKE16-NEXT: s_cselect_b32 s11, 1, 0
+; GFX11-SDAG-FAKE16-NEXT: s_cmp_eq_u32 s10, 3
+; GFX11-SDAG-FAKE16-NEXT: s_cselect_b32 s10, 1, 0
+; GFX11-SDAG-FAKE16-NEXT: s_lshr_b32 s3, s3, 2
+; GFX11-SDAG-FAKE16-NEXT: s_or_b32 s10, s10, s11
+; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
+; GFX11-SDAG-FAKE16-NEXT: s_add_i32 s3, s3, s10
+; GFX11-SDAG-FAKE16-NEXT: s_cmp_lt_i32 s5, 31
+; GFX11-SDAG-FAKE16-NEXT: s_cselect_b32 s3, s3, 0x7c00
+; GFX11-SDAG-FAKE16-NEXT: s_cmp_lg_u32 s9, 0
+; GFX11-SDAG-FAKE16-NEXT: s_cselect_b32 s8, s8, 0x7c00
+; GFX11-SDAG-FAKE16-NEXT: s_cmpk_eq_i32 s5, 0x40f
+; GFX11-SDAG-FAKE16-NEXT: s_mov_b32 s5, s1
+; GFX11-SDAG-FAKE16-NEXT: s_cselect_b32 s3, s8, s3
+; GFX11-SDAG-FAKE16-NEXT: s_lshr_b32 s4, s4, 16
+; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
+; GFX11-SDAG-FAKE16-NEXT: s_and_b32 s4, s4, 0x8000
+; GFX11-SDAG-FAKE16-NEXT: s_or_b32 s3, s4, s3
+; GFX11-SDAG-FAKE16-NEXT: s_mov_b32 s4, s0
+; GFX11-SDAG-FAKE16-NEXT: s_pack_ll_b32_b16 s2, s3, s2
+; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
+; GFX11-SDAG-FAKE16-NEXT: v_mov_b32_e32 v0, s2
+; GFX11-SDAG-FAKE16-NEXT: buffer_store_b32 v0, off, s[4:7], 0
+; GFX11-SDAG-FAKE16-NEXT: s_endpgm
+;
+; GFX11-GISEL-TRUE16-LABEL: fptrunc_v2f64_to_v2f16:
+; GFX11-GISEL-TRUE16: ; %bb.0: ; %entry
+; GFX11-GISEL-TRUE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
+; GFX11-GISEL-TRUE16-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-GISEL-TRUE16-NEXT: s_load_b128 s[4:7], s[2:3], 0x0
+; GFX11-GISEL-TRUE16-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-GISEL-TRUE16-NEXT: s_and_b32 s8, s5, 0x1ff
+; GFX11-GISEL-TRUE16-NEXT: s_bfe_u32 s2, s5, 0xb0014
+; GFX11-GISEL-TRUE16-NEXT: s_lshr_b32 s3, s5, 8
+; GFX11-GISEL-TRUE16-NEXT: s_or_b32 s4, s8, s4
+; GFX11-GISEL-TRUE16-NEXT: s_addk_i32 s2, 0xfc10
+; GFX11-GISEL-TRUE16-NEXT: s_and_b32 s3, s3, 0xffe
+; GFX11-GISEL-TRUE16-NEXT: s_cmp_lg_u32 s4, 0
+; GFX11-GISEL-TRUE16-NEXT: s_cselect_b32 s4, 1, 0
+; GFX11-GISEL-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
+; GFX11-GISEL-TRUE16-NEXT: s_or_b32 s3, s3, s4
+; GFX11-GISEL-TRUE16-NEXT: s_cmp_lg_u32 s3, 0
+; GFX11-GISEL-TRUE16-NEXT: s_cselect_b32 s4, 1, 0
+; GFX11-GISEL-TRUE16-NEXT: s_sub_i32 s8, 1, s2
+; GFX11-GISEL-TRUE16-NEXT: s_or_b32 s10, s3, 0x1000
+; GFX11-GISEL-TRUE16-NEXT: s_max_i32 s8, s8, 0
+; GFX11-GISEL-TRUE16-NEXT: s_lshl_b32 s9, s2, 12
+; GFX11-GISEL-TRUE16-NEXT: s_min_i32 s8, s8, 13
+; GFX11-GISEL-TRUE16-NEXT: s_lshl_b32 s4, s4, 9
+; GFX11-GISEL-TRUE16-NEXT: s_lshr_b32 s11, s10, s8
+; GFX11-GISEL-TRUE16-NEXT: s_or_b32 s3, s3, s9
+; GFX11-GISEL-TRUE16-NEXT: s_lshl_b32 s8, s11, s8
+; GFX11-GISEL-TRUE16-NEXT: s_or_b32 s4, s4, 0x7c00
+; GFX11-GISEL-TRUE16-NEXT: s_cmp_lg_u32 s8, s10
+; GFX11-GISEL-TRUE16-NEXT: s_cselect_b32 s8, 1, 0
+; GFX11-GISEL-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_2) | instid1(SALU_CYCLE_1)
+; GFX11-GISEL-TRUE16-NEXT: s_or_b32 s8, s11, s8
+; GFX11-GISEL-TRUE16-NEXT: s_cmp_lt_i32 s2, 1
+; GFX11-GISEL-TRUE16-NEXT: s_cselect_b32 s3, s8, s3
+; GFX11-GISEL-TRUE16-NEXT: s_and_b32 s8, s3, 7
+; GFX11-GISEL-TRUE16-NEXT: s_lshr_b32 s3, s3, 2
+; GFX11-GISEL-TRUE16-NEXT: s_cmp_eq_u32 s8, 3
+; GFX11-GISEL-TRUE16-NEXT: s_cselect_b32 s9, 1, 0
+; GFX11-GISEL-TRUE16-NEXT: s_cmp_gt_i32 s8, 5
+; GFX11-GISEL-TRUE16-NEXT: s_cselect_b32 s8, 1, 0
+; GFX11-GISEL-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
+; GFX11-GISEL-TRUE16-NEXT: s_or_b32 s8, s9, s8
+; GFX11-GISEL-TRUE16-NEXT: s_add_i32 s3, s3, s8
+; GFX11-GISEL-TRUE16-NEXT: s_cmp_gt_i32 s2, 30
+; GFX11-GISEL-TRUE16-NEXT: s_cselect_b32 s3, 0x7c00, s3
+; GFX11-GISEL-TRUE16-NEXT: s_cmpk_eq_i32 s2, 0x40f
+; GFX11-GISEL-TRUE16-NEXT: s_cselect_b32 s2, s4, s3
+; GFX11-GISEL-TRUE16-NEXT: s_lshr_b32 s3, s5, 16
+; GFX11-GISEL-TRUE16-NEXT: s_and_b32 s8, s7, 0x1ff
+; GFX11-GISEL-TRUE16-NEXT: s_bfe_u32 s4, s7, 0xb0014
+; GFX11-GISEL-TRUE16-NEXT: s_lshr_b32 s5, s7, 8
+; GFX11-GISEL-TRUE16-NEXT: s_and_b32 s3, s3, 0x8000
+; GFX11-GISEL-TRUE16-NEXT: s_or_b32 s6, s8, s6
+; GFX11-GISEL-TRUE16-NEXT: s_addk_i32 s4, 0xfc10
+; GFX11-GISEL-TRUE16-NEXT: s_and_b32 s5, s5, 0xffe
+; GFX11-GISEL-TRUE16-NEXT: s_or_b32 s2, s3, s2
+; GFX11-GISEL-TRUE16-NEXT: s_cmp_lg_u32 s6, 0
+; GFX11-GISEL-TRUE16-NEXT: s_cselect_b32 s3, 1, 0
+; GFX11-GISEL-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
+; GFX11-GISEL-TRUE16-NEXT: s_or_b32 s3, s5, s3
+; GFX11-GISEL-TRUE16-NEXT: s_cmp_lg_u32 s3, 0
+; GFX11-GISEL-TRUE16-NEXT: s_cselect_b32 s5, 1, 0
+; GFX11-GISEL-TRUE16-NEXT: s_sub_i32 s6, 1, s4
+; GFX11-GISEL-TRUE16-NEXT: s_or_b32 s9, s3, 0x1000
+; GFX11-GISEL-TRUE16-NEXT: s_max_i32 s6, s6, 0
+; GFX11-GISEL-TRUE16-NEXT: s_lshl_b32 s8, s4, 12
+; GFX11-GISEL-TRUE16-NEXT: s_min_i32 s6, s6, 13
+; GFX11-GISEL-TRUE16-NEXT: s_lshl_b32 s5, s5, 9
+; GFX11-GISEL-TRUE16-NEXT: s_lshr_b32 s10, s9, s6
+; GFX11-GISEL-TRUE16-NEXT: s_or_b32 s3, s3, s8
+; GFX11-GISEL-TRUE16-NEXT: s_lshl_b32 s6, s10, s6
+; GFX11-GISEL-TRUE16-NEXT: s_or_b32 s5, s5, 0x7c00
+; GFX11-GISEL-TRUE16-NEXT: s_cmp_lg_u32 s6, s9
+; GFX11-GISEL-TRUE16-NEXT: s_cselect_b32 s6, 1, 0
+; GFX11-GISEL-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_2) | instid1(SALU_CYCLE_1)
+; GFX11-GISEL-TRUE16-NEXT: s_or_b32 s6, s10, s6
+; GFX11-GISEL-TRUE16-NEXT: s_cmp_lt_i32 s4, 1
+; GFX11-GISEL-TRUE16-NEXT: s_cselect_b32 s3, s6, s3
+; GFX11-GISEL-TRUE16-NEXT: s_and_b32 s6, s3, 7
+; GFX11-GISEL-TRUE16-NEXT: s_lshr_b32 s3, s3, 2
+; GFX11-GISEL-TRUE16-NEXT: s_cmp_eq_u32 s6, 3
+; GFX11-GISEL-TRUE16-NEXT: s_cselect_b32 s8, 1, 0
+; GFX11-GISEL-TRUE16-NEXT: s_cmp_gt_i32 s6, 5
+; GFX11-GISEL-TRUE16-NEXT: s_cselect_b32 s6, 1, 0
+; GFX11-GISEL-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
+; GFX11-GISEL-TRUE16-NEXT: s_or_b32 s6, s8, s6
+; GFX11-GISEL-TRUE16-NEXT: s_add_i32 s3, s3, s6
+; GFX11-GISEL-TRUE16-NEXT: s_cmp_gt_i32 s4, 30
+; GFX11-GISEL-TRUE16-NEXT: s_cselect_b32 s3, 0x7c00, s3
+; GFX11-GISEL-TRUE16-NEXT: s_cmpk_eq_i32 s4, 0x40f
+; GFX11-GISEL-TRUE16-NEXT: s_cselect_b32 s3, s5, s3
+; GFX11-GISEL-TRUE16-NEXT: s_lshr_b32 s4, s7, 16
+; GFX11-GISEL-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
+; GFX11-GISEL-TRUE16-NEXT: s_and_b32 s4, s4, 0x8000
+; GFX11-GISEL-TRUE16-NEXT: s_or_b32 s3, s4, s3
+; GFX11-GISEL-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
+; GFX11-GISEL-TRUE16-NEXT: s_pack_ll_b32_b16 s2, s2, s3
+; GFX11-GISEL-TRUE16-NEXT: s_mov_b32 s3, 0x31016000
+; GFX11-GISEL-TRUE16-NEXT: v_mov_b32_e32 v0, s2
+; GFX11-GISEL-TRUE16-NEXT: s_mov_b32 s2, -1
+; GFX11-GISEL-TRUE16-NEXT: buffer_store_b32 v0, off, s[0:3], 0
+; GFX11-GISEL-TRUE16-NEXT: s_endpgm
+;
+; GFX11-GISEL-FAKE16-LABEL: fptrunc_v2f64_to_v2f16:
+; GFX11-GISEL-FAKE16: ; %bb.0: ; %entry
+; GFX11-GISEL-FAKE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
+; GFX11-GISEL-FAKE16-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-GISEL-FAKE16-NEXT: s_load_b128 s[4:7], s[2:3], 0x0
+; GFX11-GISEL-FAKE16-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-GISEL-FAKE16-NEXT: s_and_b32 s8, s5, 0x1ff
+; GFX11-GISEL-FAKE16-NEXT: s_bfe_u32 s2, s5, 0xb0014
+; GFX11-GISEL-FAKE16-NEXT: s_lshr_b32 s3, s5, 8
+; GFX11-GISEL-FAKE16-NEXT: s_or_b32 s4, s8, s4
+; GFX11-GISEL-FAKE16-NEXT: s_addk_i32 s2, 0xfc10
+; GFX11-GISEL-FAKE16-NEXT: s_and_b32 s3, s3, 0xffe
+; GFX11-GISEL-FAKE16-NEXT: s_cmp_lg_u32 s4, 0
+; GFX11-GISEL-FAKE16-NEXT: s_cselect_b32 s4, 1, 0
+; GFX11-GISEL-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
+; GFX11-GISEL-FAKE16-NEXT: s_or_b32 s3, s3, s4
+; GFX11-GISEL-FAKE16-NEXT: s_cmp_lg_u32 s3, 0
+; GFX11-GISEL-FAKE16-NEXT: s_cselect_b32 s4, 1, 0
+; GFX11-GISEL-FAKE16-NEXT: s_sub_i32 s8, 1, s2
+; GFX11-GISEL-FAKE16-NEXT: s_or_b32 s10, s3, 0x1000
+; GFX11-GISEL-FAKE16-NEXT: s_max_i32 s8, s8, 0
+; GFX11-GISEL-FAKE16-NEXT: s_lshl_b32 s9, s2, 12
+; GFX11-GISEL-FAKE16-NEXT: s_min_i32 s8, s8, 13
+; GFX11-GISEL-FAKE16-NEXT: s_lshl_b32 s4, s4, 9
+; GFX11-GISEL-FAKE16-NEXT: s_lshr_b32 s11, s10, s8
+; GFX11-GISEL-FAKE16-NEXT: s_or_b32 s3, s3, s9
+; GFX11-GISEL-FAKE16-NEXT: s_lshl_b32 s8, s11, s8
+; GFX11-GISEL-FAKE16-NEXT: s_or_b32 s4, s4, 0x7c00
+; GFX11-GISEL-FAKE16-NEXT: s_cmp_lg_u32 s8, s10
+; GFX11-GISEL-FAKE16-NEXT: s_cselect_b32 s8, 1, 0
+; GFX11-GISEL-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_2) | instid1(SALU_CYCLE_1)
+; GFX11-GISEL-FAKE16-NEXT: s_or_b32 s8, s11, s8
+; GFX11-GISEL-FAKE16-NEXT: s_cmp_lt_i32 s2, 1
+; GFX11-GISEL-FAKE16-NEXT: s_cselect_b32 s3, s8, s3
+; GFX11-GISEL-FAKE16-NEXT: s_and_b32 s8, s3, 7
+; GFX11-GISEL-FAKE16-NEXT: s_lshr_b32 s3, s3, 2
+; GFX11-GISEL-FAKE16-NEXT: s_cmp_eq_u32 s8, 3
+; GFX11-GISEL-FAKE16-NEXT: s_cselect_b32 s9, 1, 0
+; GFX11-GISEL-FAKE16-NEXT: s_cmp_gt_i32 s8, 5
+; GFX11-GISEL-FAKE16-NEXT: s_cselect_b32 s8, 1, 0
+; GFX11-GISEL-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
+; GFX11-GISEL-FAKE16-NEXT: s_or_b32 s8, s9, s8
+; GFX11-GISEL-FAKE16-NEXT: s_add_i32 s3, s3, s8
+; GFX11-GISEL-FAKE16-NEXT: s_cmp_gt_i32 s2, 30
+; GFX11-GISEL-FAKE16-NEXT: s_cselect_b32 s3, 0x7c00, s3
+; GFX11-GISEL-FAKE16-NEXT: s_cmpk_eq_i32 s2, 0x40f
+; GFX11-GISEL-FAKE16-NEXT: s_cselect_b32 s2, s4, s3
+; GFX11-GISEL-FAKE16-NEXT: s_lshr_b32 s3, s5, 16
+; GFX11-GISEL-FAKE16-NEXT: s_and_b32 s8, s7, 0x1ff
+; GFX11-GISEL-FAKE16-NEXT: s_bfe_u32 s4, s7, 0xb0014
+; GFX11-GISEL-FAKE16-NEXT: s_lshr_b32 s5, s7, 8
+; GFX11-GISEL-FAKE16-NEXT: s_and_b32 s3, s3, 0x8000
+; GFX11-GISEL-FAKE16-NEXT: s_or_b32 s6, s8, s6
+; GFX11-GISEL-FAKE16-NEXT: s_addk_i32 s4, 0xfc10
+; GFX11-GISEL-FAKE16-NEXT: s_and_b32 s5, s5, 0xffe
+; GFX11-GISEL-FAKE16-NEXT: s_or_b32 s2, s3, s2
+; GFX11-GISEL-FAKE16-NEXT: s_cmp_lg_u32 s6, 0
+; GFX11-GISEL-FAKE16-NEXT: s_cselect_b32 s3, 1, 0
+; GFX11-GISEL-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
+; GFX11-GISEL-FAKE16-NEXT: s_or_b32 s3, s5, s3
+; GFX11-GISEL-FAKE16-NEXT: s_cmp_lg_u32 s3, 0
+; GFX11-GISEL-FAKE16-NEXT: s_cselect_b32 s5, 1, 0
+; GFX11-GISEL-FAKE16-NEXT: s_sub_i32 s6, 1, s4
+; GFX11-GISEL-FAKE16-NEXT: s_or_b32 s9, s3, 0x1000
+; GFX11-GISEL-FAKE16-NEXT: s_max_i32 s6, s6, 0
+; GFX11-GISEL-FAKE16-NEXT: s_lshl_b32 s8, s4, 12
+; GFX11-GISEL-FAKE16-NEXT: s_min_i32 s6, s6, 13
+; GFX11-GISEL-FAKE16-NEXT: s_lshl_b32 s5, s5, 9
+; GFX11-GISEL-FAKE16-NEXT: s_lshr_b32 s10, s9, s6
+; GFX11-GISEL-FAKE16-NEXT: s_or_b32 s3, s3, s8
+; GFX11-GISEL-FAKE16-NEXT: s_lshl_b32 s6, s10, s6
+; GFX11-GISEL-FAKE16-NEXT: s_or_b32 s5, s5, 0x7c00
+; GFX11-GISEL-FAKE16-NEXT: s_cmp_lg_u32 s6, s9
+; GFX11-GISEL-FAKE16-NEXT: s_cselect_b32 s6, 1, 0
+; GFX11-GISEL-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_2) | instid1(SALU_CYCLE_1)
+; GFX11-GISEL-FAKE16-NEXT: s_or_b32 s6, s10, s6
+; GFX11-GISEL-FAKE16-NEXT: s_cmp_lt_i32 s4, 1
+; GFX11-GISEL-FAKE16-NEXT: s_cselect_b32 s3, s6, s3
+; GFX11-GISEL-FAKE16-NEXT: s_and_b32 s6, s3, 7
+; GFX11-GISEL-FAKE16-NEXT: s_lshr_b32 s3, s3, 2
+; GFX11-GISEL-FAKE16-NEXT: s_cmp_eq_u32 s6, 3
+; GFX11-GISEL-FAKE16-NEXT: s_cselect_b32 s8, 1, 0
+; GFX11-GISEL-FAKE16-NEXT: s_cmp_gt_i32 s6, 5
+; GFX11-GISEL-FAKE16-NEXT: s_cselect_b32 s6, 1, 0
+; GFX11-GISEL-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
+; GFX11-GISEL-FAKE16-NEXT: s_or_b32 s6, s8, s6
+; GFX11-GISEL-FAKE16-NEXT: s_add_i32 s3, s3, s6
+; GFX11-GISEL-FAKE16-NEXT: s_cmp_gt_i32 s4, 30
+; GFX11-GISEL-FAKE16-NEXT: s_cselect_b32 s3, 0x7c00, s3
+; GFX11-GISEL-FAKE16-NEXT: s_cmpk_eq_i32 s4, 0x40f
+; GFX11-GISEL-FAKE16-NEXT: s_cselect_b32 s3, s5, s3
+; GFX11-GISEL-FAKE16-NEXT: s_lshr_b32 s4, s7, 16
+; GFX11-GISEL-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
+; GFX11-GISEL-FAKE16-NEXT: s_and_b32 s4, s4, 0x8000
+; GFX11-GISEL-FAKE16-NEXT: s_or_b32 s3, s4, s3
+; GFX11-GISEL-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
+; GFX11-GISEL-FAKE16-NEXT: s_pack_ll_b32_b16 s2, s2, s3
+; GFX11-GISEL-FAKE16-NEXT: s_mov_b32 s3, 0x31016000
+; GFX11-GISEL-FAKE16-NEXT: v_mov_b32_e32 v0, s2
+; GFX11-GISEL-FAKE16-NEXT: s_mov_b32 s2, -1
+; GFX11-GISEL-FAKE16-NEXT: buffer_store_b32 v0, off, s[0:3], 0
+; GFX11-GISEL-FAKE16-NEXT: s_endpgm
+ ptr addrspace(1) %r,
+ ptr addrspace(1) %a) {
+entry:
+ %a.val = load <2 x double>, ptr addrspace(1) %a
+ %r.val = fptrunc <2 x double> %a.val to <2 x half>
+ store <2 x half> %r.val, ptr addrspace(1) %r
+ ret void
+}
+
+define amdgpu_kernel void @fptrunc_v2f64_to_v2f16_afn(
+; SI-SDAG-LABEL: fptrunc_v2f64_to_v2f16_afn:
+; SI-SDAG: ; %bb.0: ; %entry
+; SI-SDAG-NEXT: s_load_dwordx4 s[4:7], s[4:5], 0x9
+; SI-SDAG-NEXT: s_mov_b32 s3, 0xf000
+; SI-SDAG-NEXT: s_mov_b32 s2, -1
+; SI-SDAG-NEXT: s_mov_b32 s10, s2
+; SI-SDAG-NEXT: s_mov_b32 s11, s3
+; SI-SDAG-NEXT: s_waitcnt lgkmcnt(0)
+; SI-SDAG-NEXT: s_mov_b32 s8, s6
+; SI-SDAG-NEXT: s_mov_b32 s9, s7
+; SI-SDAG-NEXT: buffer_load_dwordx4 v[0:3], off, s[8:11], 0
+; SI-SDAG-NEXT: s_movk_i32 s0, 0x7e00
+; SI-SDAG-NEXT: s_waitcnt vmcnt(0)
+; SI-SDAG-NEXT: v_readfirstlane_b32 s1, v3
+; SI-SDAG-NEXT: v_readfirstlane_b32 s6, v1
+; SI-SDAG-NEXT: s_and_b32 s7, s1, 0x1ff
+; SI-SDAG-NEXT: s_lshr_b32 s8, s1, 8
+; SI-SDAG-NEXT: s_bfe_u32 s9, s1, 0xb0014
+; SI-SDAG-NEXT: v_or_b32_e32 v1, s7, v2
+; SI-SDAG-NEXT: s_and_b32 s7, s8, 0xffe
+; SI-SDAG-NEXT: s_sub_i32 s8, 0x3f1, s9
+; SI-SDAG-NEXT: v_cmp_ne_u32_e32 vcc, 0, v1
+; SI-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
+; SI-SDAG-NEXT: v_med3_i32 v2, s8, 0, 13
+; SI-SDAG-NEXT: v_readfirstlane_b32 s8, v1
+; SI-SDAG-NEXT: v_readfirstlane_b32 s10, v2
+; SI-SDAG-NEXT: s_or_b32 s7, s7, s8
+; SI-SDAG-NEXT: s_or_b32 s8, s7, 0x1000
+; SI-SDAG-NEXT: s_lshr_b32 s11, s8, s10
+; SI-SDAG-NEXT: s_lshl_b32 s10, s11, s10
+; SI-SDAG-NEXT: s_cmp_lg_u32 s10, s8
+; SI-SDAG-NEXT: s_cselect_b32 s8, 1, 0
+; SI-SDAG-NEXT: s_addk_i32 s9, 0xfc10
+; SI-SDAG-NEXT: s_or_b32 s8, s11, s8
+; SI-SDAG-NEXT: s_lshl_b32 s10, s9, 12
+; SI-SDAG-NEXT: s_or_b32 s10, s7, s10
+; SI-SDAG-NEXT: s_cmp_lt_i32 s9, 1
+; SI-SDAG-NEXT: s_cselect_b32 s8, s8, s10
+; SI-SDAG-NEXT: s_and_b32 s10, s8, 7
+; SI-SDAG-NEXT: s_cmp_gt_i32 s10, 5
+; SI-SDAG-NEXT: s_cselect_b32 s11, 1, 0
+; SI-SDAG-NEXT: s_cmp_eq_u32 s10, 3
+; SI-SDAG-NEXT: s_cselect_b32 s10, 1, 0
+; SI-SDAG-NEXT: s_lshr_b32 s8, s8, 2
+; SI-SDAG-NEXT: s_or_b32 s10, s10, s11
+; SI-SDAG-NEXT: s_add_i32 s8, s8, s10
+; SI-SDAG-NEXT: s_cmp_lt_i32 s9, 31
+; SI-SDAG-NEXT: s_cselect_b32 s8, s8, 0x7c00
+; SI-SDAG-NEXT: s_cmp_lg_u32 s7, 0
+; SI-SDAG-NEXT: s_cselect_b32 s7, s0, 0x7c00
+; SI-SDAG-NEXT: s_cmpk_eq_i32 s9, 0x40f
+; SI-SDAG-NEXT: s_cselect_b32 s7, s7, s8
+; SI-SDAG-NEXT: s_lshr_b32 s1, s1, 16
+; SI-SDAG-NEXT: s_and_b32 s8, s6, 0x1ff
+; SI-SDAG-NEXT: s_lshr_b32 s9, s6, 8
+; SI-SDAG-NEXT: s_bfe_u32 s10, s6, 0xb0014
+; SI-SDAG-NEXT: s_and_b32 s1, s1, 0x8000
+; SI-SDAG-NEXT: v_or_b32_e32 v0, s8, v0
+; SI-SDAG-NEXT: s_and_b32 s8, s9, 0xffe
+; SI-SDAG-NEXT: s_sub_i32 s9, 0x3f1, s10
+; SI-SDAG-NEXT: s_or_b32 s1, s1, s7
+; SI-SDAG-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0
+; SI-SDAG-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
+; SI-SDAG-NEXT: v_med3_i32 v1, s9, 0, 13
+; SI-SDAG-NEXT: s_lshl_b32 s1, s1, 16
+; SI-SDAG-NEXT: v_readfirstlane_b32 s7, v0
+; SI-SDAG-NEXT: v_readfirstlane_b32 s9, v1
+; SI-SDAG-NEXT: s_or_b32 s7, s8, s7
+; SI-SDAG-NEXT: s_or_b32 s8, s7, 0x1000
+; SI-SDAG-NEXT: s_lshr_b32 s11, s8, s9
+; SI-SDAG-NEXT: s_lshl_b32 s9, s11, s9
+; SI-SDAG-NEXT: s_cmp_lg_u32 s9, s8
+; SI-SDAG-NEXT: s_cselect_b32 s8, 1, 0
+; SI-SDAG-NEXT: s_addk_i32 s10, 0xfc10
+; SI-SDAG-NEXT: s_or_b32 s8, s11, s8
+; SI-SDAG-NEXT: s_lshl_b32 s9, s10, 12
+; SI-SDAG-NEXT: s_or_b32 s9, s7, s9
+; SI-SDAG-NEXT: s_cmp_lt_i32 s10, 1
+; SI-SDAG-NEXT: s_cselect_b32 s8, s8, s9
+; SI-SDAG-NEXT: s_and_b32 s9, s8, 7
+; SI-SDAG-NEXT: s_cmp_gt_i32 s9, 5
+; SI-SDAG-NEXT: s_cselect_b32 s11, 1, 0
+; SI-SDAG-NEXT: s_cmp_eq_u32 s9, 3
+; SI-SDAG-NEXT: s_cselect_b32 s9, 1, 0
+; SI-SDAG-NEXT: s_lshr_b32 s8, s8, 2
+; SI-SDAG-NEXT: s_or_b32 s9, s9, s11
+; SI-SDAG-NEXT: s_add_i32 s8, s8, s9
+; SI-SDAG-NEXT: s_cmp_lt_i32 s10, 31
+; SI-SDAG-NEXT: s_cselect_b32 s8, s8, 0x7c00
+; SI-SDAG-NEXT: s_cmp_lg_u32 s7, 0
+; SI-SDAG-NEXT: s_cselect_b32 s0, s0, 0x7c00
+; SI-SDAG-NEXT: s_cmpk_eq_i32 s10, 0x40f
+; SI-SDAG-NEXT: s_cselect_b32 s0, s0, s8
+; SI-SDAG-NEXT: s_lshr_b32 s6, s6, 16
+; SI-SDAG-NEXT: s_and_b32 s6, s6, 0x8000
+; SI-SDAG-NEXT: s_or_b32 s0, s6, s0
+; SI-SDAG-NEXT: s_and_b32 s0, s0, 0xffff
+; SI-SDAG-NEXT: s_or_b32 s6, s0, s1
+; SI-SDAG-NEXT: s_mov_b32 s0, s4
+; SI-SDAG-NEXT: s_mov_b32 s1, s5
+; SI-SDAG-NEXT: v_mov_b32_e32 v0, s6
+; SI-SDAG-NEXT: buffer_store_dword v0, off, s[0:3], 0
+; SI-SDAG-NEXT: s_endpgm
+;
+; SI-GISEL-LABEL: fptrunc_v2f64_to_v2f16_afn:
+; SI-GISEL: ; %bb.0: ; %entry
+; SI-GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
+; SI-GISEL-NEXT: s_waitcnt lgkmcnt(0)
+; SI-GISEL-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x0
+; SI-GISEL-NEXT: s_mov_b32 s2, -1
+; SI-GISEL-NEXT: s_waitcnt lgkmcnt(0)
; SI-GISEL-NEXT: v_cvt_f32_f64_e32 v0, s[4:5]
; SI-GISEL-NEXT: v_cvt_f32_f64_e32 v1, s[6:7]
; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0
@@ -664,29 +3145,111 @@ define amdgpu_kernel void @fptrunc_v2f64_to_v2f16(
; SI-GISEL-NEXT: buffer_store_dword v0, off, s[0:3], 0
; SI-GISEL-NEXT: s_endpgm
;
-; VI-SDAG-LABEL: fptrunc_v2f64_to_v2f16:
+; VI-SDAG-LABEL: fptrunc_v2f64_to_v2f16_afn:
; VI-SDAG: ; %bb.0: ; %entry
-; VI-SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
-; VI-SDAG-NEXT: s_mov_b32 s7, 0xf000
-; VI-SDAG-NEXT: s_mov_b32 s6, -1
-; VI-SDAG-NEXT: s_mov_b32 s10, s6
-; VI-SDAG-NEXT: s_mov_b32 s11, s7
+; VI-SDAG-NEXT: s_load_dwordx4 s[4:7], s[4:5], 0x24
+; VI-SDAG-NEXT: s_mov_b32 s3, 0xf000
+; VI-SDAG-NEXT: s_mov_b32 s2, -1
+; VI-SDAG-NEXT: s_mov_b32 s10, s2
+; VI-SDAG-NEXT: s_mov_b32 s11, s3
; VI-SDAG-NEXT: s_waitcnt lgkmcnt(0)
-; VI-SDAG-NEXT: s_mov_b32 s8, s2
-; VI-SDAG-NEXT: s_mov_b32 s9, s3
+; VI-SDAG-NEXT: s_mov_b32 s8, s6
+; VI-SDAG-NEXT: s_mov_b32 s9, s7
; VI-SDAG-NEXT: buffer_load_dwordx4 v[0:3], off, s[8:11], 0
-; VI-SDAG-NEXT: s_mov_b32 s4, s0
-; VI-SDAG-NEXT: s_mov_b32 s5, s1
+; VI-SDAG-NEXT: s_mov_b32 s0, s4
+; VI-SDAG-NEXT: s_mov_b32 s1, s5
+; VI-SDAG-NEXT: s_movk_i32 s6, 0x7e00
; VI-SDAG-NEXT: s_waitcnt vmcnt(0)
-; VI-SDAG-NEXT: v_cvt_f32_f64_e32 v2, v[2:3]
-; VI-SDAG-NEXT: v_cvt_f32_f64_e32 v0, v[0:1]
-; VI-SDAG-NEXT: v_cvt_f16_f32_sdwa v1, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD
-; VI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
-; VI-SDAG-NEXT: v_or_b32_e32 v0, v0, v1
-; VI-SDAG-NEXT: buffer_store_dword v0, off, s[4:7], 0
+; VI-SDAG-NEXT: v_readfirstlane_b32 s4, v3
+; VI-SDAG-NEXT: s_and_b32 s7, s4, 0x1ff
+; VI-SDAG-NEXT: v_readfirstlane_b32 s5, v1
+; VI-SDAG-NEXT: v_or_b32_e32 v1, s7, v2
+; VI-SDAG-NEXT: s_lshr_b32 s8, s4, 8
+; VI-SDAG-NEXT: s_bfe_u32 s9, s4, 0xb0014
+; VI-SDAG-NEXT: v_cmp_ne_u32_e32 vcc, 0, v1
+; VI-SDAG-NEXT: s_and_b32 s7, s8, 0xffe
+; VI-SDAG-NEXT: s_sub_i32 s8, 0x3f1, s9
+; VI-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
+; VI-SDAG-NEXT: v_med3_i32 v2, s8, 0, 13
+; VI-SDAG-NEXT: v_readfirstlane_b32 s8, v1
+; VI-SDAG-NEXT: s_or_b32 s7, s7, s8
+; VI-SDAG-NEXT: v_readfirstlane_b32 s10, v2
+; VI-SDAG-NEXT: s_or_b32 s8, s7, 0x1000
+; VI-SDAG-NEXT: s_lshr_b32 s11, s8, s10
+; VI-SDAG-NEXT: s_lshl_b32 s10, s11, s10
+; VI-SDAG-NEXT: s_cmp_lg_u32 s10, s8
+; VI-SDAG-NEXT: s_cselect_b32 s8, 1, 0
+; VI-SDAG-NEXT: s_addk_i32 s9, 0xfc10
+; VI-SDAG-NEXT: s_lshl_b32 s10, s9, 12
+; VI-SDAG-NEXT: s_or_b32 s8, s11, s8
+; VI-SDAG-NEXT: s_or_b32 s10, s7, s10
+; VI-SDAG-NEXT: s_cmp_lt_i32 s9, 1
+; VI-SDAG-NEXT: s_cselect_b32 s8, s8, s10
+; VI-SDAG-NEXT: s_and_b32 s10, s8, 7
+; VI-SDAG-NEXT: s_cmp_gt_i32 s10, 5
+; VI-SDAG-NEXT: s_cselect_b32 s11, 1, 0
+; VI-SDAG-NEXT: s_cmp_eq_u32 s10, 3
+; VI-SDAG-NEXT: s_cselect_b32 s10, 1, 0
+; VI-SDAG-NEXT: s_lshr_b32 s8, s8, 2
+; VI-SDAG-NEXT: s_or_b32 s10, s10, s11
+; VI-SDAG-NEXT: s_add_i32 s8, s8, s10
+; VI-SDAG-NEXT: s_cmp_lt_i32 s9, 31
+; VI-SDAG-NEXT: s_cselect_b32 s8, s8, 0x7c00
+; VI-SDAG-NEXT: s_cmp_lg_u32 s7, 0
+; VI-SDAG-NEXT: s_cselect_b32 s7, s6, 0x7c00
+; VI-SDAG-NEXT: s_cmpk_eq_i32 s9, 0x40f
+; VI-SDAG-NEXT: s_cselect_b32 s7, s7, s8
+; VI-SDAG-NEXT: s_and_b32 s8, s5, 0x1ff
+; VI-SDAG-NEXT: v_or_b32_e32 v0, s8, v0
+; VI-SDAG-NEXT: s_lshr_b32 s4, s4, 16
+; VI-SDAG-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0
+; VI-SDAG-NEXT: s_lshr_b32 s9, s5, 8
+; VI-SDAG-NEXT: s_bfe_u32 s10, s5, 0xb0014
+; VI-SDAG-NEXT: s_and_b32 s4, s4, 0x8000
+; VI-SDAG-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
+; VI-SDAG-NEXT: s_and_b32 s8, s9, 0xffe
+; VI-SDAG-NEXT: s_sub_i32 s9, 0x3f1, s10
+; VI-SDAG-NEXT: s_or_b32 s4, s4, s7
+; VI-SDAG-NEXT: v_readfirstlane_b32 s7, v0
+; VI-SDAG-NEXT: v_med3_i32 v1, s9, 0, 13
+; VI-SDAG-NEXT: s_or_b32 s7, s8, s7
+; VI-SDAG-NEXT: v_readfirstlane_b32 s9, v1
+; VI-SDAG-NEXT: s_or_b32 s8, s7, 0x1000
+; VI-SDAG-NEXT: s_lshr_b32 s11, s8, s9
+; VI-SDAG-NEXT: s_lshl_b32 s4, s4, 16
+; VI-SDAG-NEXT: s_lshl_b32 s9, s11, s9
+; VI-SDAG-NEXT: s_cmp_lg_u32 s9, s8
+; VI-SDAG-NEXT: s_cselect_b32 s8, 1, 0
+; VI-SDAG-NEXT: s_addk_i32 s10, 0xfc10
+; VI-SDAG-NEXT: s_lshl_b32 s9, s10, 12
+; VI-SDAG-NEXT: s_or_b32 s8, s11, s8
+; VI-SDAG-NEXT: s_or_b32 s9, s7, s9
+; VI-SDAG-NEXT: s_cmp_lt_i32 s10, 1
+; VI-SDAG-NEXT: s_cselect_b32 s8, s8, s9
+; VI-SDAG-NEXT: s_and_b32 s9, s8, 7
+; VI-SDAG-NEXT: s_cmp_gt_i32 s9, 5
+; VI-SDAG-NEXT: s_cselect_b32 s11, 1, 0
+; VI-SDAG-NEXT: s_cmp_eq_u32 s9, 3
+; VI-SDAG-NEXT: s_cselect_b32 s9, 1, 0
+; VI-SDAG-NEXT: s_lshr_b32 s8, s8, 2
+; VI-SDAG-NEXT: s_or_b32 s9, s9, s11
+; VI-SDAG-NEXT: s_add_i32 s8, s8, s9
+; VI-SDAG-NEXT: s_cmp_lt_i32 s10, 31
+; VI-SDAG-NEXT: s_cselect_b32 s8, s8, 0x7c00
+; VI-SDAG-NEXT: s_cmp_lg_u32 s7, 0
+; VI-SDAG-NEXT: s_cselect_b32 s6, s6, 0x7c00
+; VI-SDAG-NEXT: s_cmpk_eq_i32 s10, 0x40f
+; VI-SDAG-NEXT: s_cselect_b32 s6, s6, s8
+; VI-SDAG-NEXT: s_lshr_b32 s5, s5, 16
+; VI-SDAG-NEXT: s_and_b32 s5, s5, 0x8000
+; VI-SDAG-NEXT: s_or_b32 s5, s5, s6
+; VI-SDAG-NEXT: s_and_b32 s5, s5, 0xffff
+; VI-SDAG-NEXT: s_or_b32 s4, s5, s4
+; VI-SDAG-NEXT: v_mov_b32_e32 v0, s4
+; VI-SDAG-NEXT: buffer_store_dword v0, off, s[0:3], 0
; VI-SDAG-NEXT: s_endpgm
;
-; VI-GISEL-LABEL: fptrunc_v2f64_to_v2f16:
+; VI-GISEL-LABEL: fptrunc_v2f64_to_v2f16_afn:
; VI-GISEL: ; %bb.0: ; %entry
; VI-GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0)
@@ -702,29 +3265,109 @@ define amdgpu_kernel void @fptrunc_v2f64_to_v2f16(
; VI-GISEL-NEXT: buffer_store_dword v0, off, s[0:3], 0
; VI-GISEL-NEXT: s_endpgm
;
-; GFX9-SDAG-LABEL: fptrunc_v2f64_to_v2f16:
+; GFX9-SDAG-LABEL: fptrunc_v2f64_to_v2f16_afn:
; GFX9-SDAG: ; %bb.0: ; %entry
-; GFX9-SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
-; GFX9-SDAG-NEXT: s_mov_b32 s7, 0xf000
-; GFX9-SDAG-NEXT: s_mov_b32 s6, -1
-; GFX9-SDAG-NEXT: s_mov_b32 s10, s6
-; GFX9-SDAG-NEXT: s_mov_b32 s11, s7
+; GFX9-SDAG-NEXT: s_load_dwordx4 s[8:11], s[4:5], 0x24
+; GFX9-SDAG-NEXT: s_mov_b32 s3, 0xf000
+; GFX9-SDAG-NEXT: s_mov_b32 s2, -1
+; GFX9-SDAG-NEXT: s_mov_b32 s6, s2
+; GFX9-SDAG-NEXT: s_mov_b32 s7, s3
; GFX9-SDAG-NEXT: s_waitcnt lgkmcnt(0)
-; GFX9-SDAG-NEXT: s_mov_b32 s8, s2
-; GFX9-SDAG-NEXT: s_mov_b32 s9, s3
-; GFX9-SDAG-NEXT: buffer_load_dwordx4 v[0:3], off, s[8:11], 0
-; GFX9-SDAG-NEXT: s_mov_b32 s4, s0
-; GFX9-SDAG-NEXT: s_mov_b32 s5, s1
+; GFX9-SDAG-NEXT: s_mov_b32 s4, s10
+; GFX9-SDAG-NEXT: s_mov_b32 s5, s11
+; GFX9-SDAG-NEXT: buffer_load_dwordx4 v[0:3], off, s[4:7], 0
+; GFX9-SDAG-NEXT: s_mov_b32 s0, s8
+; GFX9-SDAG-NEXT: s_mov_b32 s1, s9
+; GFX9-SDAG-NEXT: s_movk_i32 s4, 0x7e00
; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0)
-; GFX9-SDAG-NEXT: v_cvt_f32_f64_e32 v2, v[2:3]
-; GFX9-SDAG-NEXT: v_cvt_f32_f64_e32 v0, v[0:1]
-; GFX9-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v2
-; GFX9-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
-; GFX9-SDAG-NEXT: v_pack_b32_f16 v0, v0, v1
-; GFX9-SDAG-NEXT: buffer_store_dword v0, off, s[4:7], 0
+; GFX9-SDAG-NEXT: v_readfirstlane_b32 s5, v3
+; GFX9-SDAG-NEXT: s_and_b32 s7, s5, 0x1ff
+; GFX9-SDAG-NEXT: v_readfirstlane_b32 s6, v1
+; GFX9-SDAG-NEXT: v_or_b32_e32 v1, s7, v2
+; GFX9-SDAG-NEXT: s_lshr_b32 s8, s5, 8
+; GFX9-SDAG-NEXT: s_bfe_u32 s9, s5, 0xb0014
+; GFX9-SDAG-NEXT: v_cmp_ne_u32_e32 vcc, 0, v1
+; GFX9-SDAG-NEXT: s_and_b32 s7, s8, 0xffe
+; GFX9-SDAG-NEXT: s_sub_i32 s8, 0x3f1, s9
+; GFX9-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
+; GFX9-SDAG-NEXT: v_med3_i32 v2, s8, 0, 13
+; GFX9-SDAG-NEXT: v_readfirstlane_b32 s8, v1
+; GFX9-SDAG-NEXT: s_or_b32 s7, s7, s8
+; GFX9-SDAG-NEXT: v_readfirstlane_b32 s10, v2
+; GFX9-SDAG-NEXT: s_or_b32 s8, s7, 0x1000
+; GFX9-SDAG-NEXT: s_lshr_b32 s11, s8, s10
+; GFX9-SDAG-NEXT: s_lshl_b32 s10, s11, s10
+; GFX9-SDAG-NEXT: s_cmp_lg_u32 s10, s8
+; GFX9-SDAG-NEXT: s_cselect_b32 s8, 1, 0
+; GFX9-SDAG-NEXT: s_addk_i32 s9, 0xfc10
+; GFX9-SDAG-NEXT: s_lshl_b32 s10, s9, 12
+; GFX9-SDAG-NEXT: s_or_b32 s8, s11, s8
+; GFX9-SDAG-NEXT: s_or_b32 s10, s7, s10
+; GFX9-SDAG-NEXT: s_cmp_lt_i32 s9, 1
+; GFX9-SDAG-NEXT: s_cselect_b32 s8, s8, s10
+; GFX9-SDAG-NEXT: s_and_b32 s10, s8, 7
+; GFX9-SDAG-NEXT: s_cmp_gt_i32 s10, 5
+; GFX9-SDAG-NEXT: s_cselect_b32 s11, 1, 0
+; GFX9-SDAG-NEXT: s_cmp_eq_u32 s10, 3
+; GFX9-SDAG-NEXT: s_cselect_b32 s10, 1, 0
+; GFX9-SDAG-NEXT: s_lshr_b32 s8, s8, 2
+; GFX9-SDAG-NEXT: s_or_b32 s10, s10, s11
+; GFX9-SDAG-NEXT: s_add_i32 s8, s8, s10
+; GFX9-SDAG-NEXT: s_cmp_lt_i32 s9, 31
+; GFX9-SDAG-NEXT: s_cselect_b32 s8, s8, 0x7c00
+; GFX9-SDAG-NEXT: s_cmp_lg_u32 s7, 0
+; GFX9-SDAG-NEXT: s_cselect_b32 s7, s4, 0x7c00
+; GFX9-SDAG-NEXT: s_cmpk_eq_i32 s9, 0x40f
+; GFX9-SDAG-NEXT: s_cselect_b32 s7, s7, s8
+; GFX9-SDAG-NEXT: s_and_b32 s8, s6, 0x1ff
+; GFX9-SDAG-NEXT: v_or_b32_e32 v0, s8, v0
+; GFX9-SDAG-NEXT: s_lshr_b32 s5, s5, 16
+; GFX9-SDAG-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0
+; GFX9-SDAG-NEXT: s_lshr_b32 s9, s6, 8
+; GFX9-SDAG-NEXT: s_bfe_u32 s10, s6, 0xb0014
+; GFX9-SDAG-NEXT: s_and_b32 s5, s5, 0x8000
+; GFX9-SDAG-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
+; GFX9-SDAG-NEXT: s_and_b32 s8, s9, 0xffe
+; GFX9-SDAG-NEXT: s_sub_i32 s9, 0x3f1, s10
+; GFX9-SDAG-NEXT: s_or_b32 s5, s5, s7
+; GFX9-SDAG-NEXT: v_readfirstlane_b32 s7, v0
+; GFX9-SDAG-NEXT: v_med3_i32 v1, s9, 0, 13
+; GFX9-SDAG-NEXT: s_or_b32 s7, s8, s7
+; GFX9-SDAG-NEXT: v_readfirstlane_b32 s9, v1
+; GFX9-SDAG-NEXT: s_or_b32 s8, s7, 0x1000
+; GFX9-SDAG-NEXT: s_lshr_b32 s11, s8, s9
+; GFX9-SDAG-NEXT: s_lshl_b32 s9, s11, s9
+; GFX9-SDAG-NEXT: s_cmp_lg_u32 s9, s8
+; GFX9-SDAG-NEXT: s_cselect_b32 s8, 1, 0
+; GFX9-SDAG-NEXT: s_addk_i32 s10, 0xfc10
+; GFX9-SDAG-NEXT: s_lshl_b32 s9, s10, 12
+; GFX9-SDAG-NEXT: s_or_b32 s8, s11, s8
+; GFX9-SDAG-NEXT: s_or_b32 s9, s7, s9
+; GFX9-SDAG-NEXT: s_cmp_lt_i32 s10, 1
+; GFX9-SDAG-NEXT: s_cselect_b32 s8, s8, s9
+; GFX9-SDAG-NEXT: s_and_b32 s9, s8, 7
+; GFX9-SDAG-NEXT: s_cmp_gt_i32 s9, 5
+; GFX9-SDAG-NEXT: s_cselect_b32 s11, 1, 0
+; GFX9-SDAG-NEXT: s_cmp_eq_u32 s9, 3
+; GFX9-SDAG-NEXT: s_cselect_b32 s9, 1, 0
+; GFX9-SDAG-NEXT: s_lshr_b32 s8, s8, 2
+; GFX9-SDAG-NEXT: s_or_b32 s9, s9, s11
+; GFX9-SDAG-NEXT: s_add_i32 s8, s8, s9
+; GFX9-SDAG-NEXT: s_cmp_lt_i32 s10, 31
+; GFX9-SDAG-NEXT: s_cselect_b32 s8, s8, 0x7c00
+; GFX9-SDAG-NEXT: s_cmp_lg_u32 s7, 0
+; GFX9-SDAG-NEXT: s_cselect_b32 s4, s4, 0x7c00
+; GFX9-SDAG-NEXT: s_cmpk_eq_i32 s10, 0x40f
+; GFX9-SDAG-NEXT: s_cselect_b32 s4, s4, s8
+; GFX9-SDAG-NEXT: s_lshr_b32 s6, s6, 16
+; GFX9-SDAG-NEXT: s_and_b32 s6, s6, 0x8000
+; GFX9-SDAG-NEXT: s_or_b32 s4, s6, s4
+; GFX9-SDAG-NEXT: s_pack_ll_b32_b16 s4, s4, s5
+; GFX9-SDAG-NEXT: v_mov_b32_e32 v0, s4
+; GFX9-SDAG-NEXT: buffer_store_dword v0, off, s[0:3], 0
; GFX9-SDAG-NEXT: s_endpgm
;
-; GFX9-GISEL-LABEL: fptrunc_v2f64_to_v2f16:
+; GFX9-GISEL-LABEL: fptrunc_v2f64_to_v2f16_afn:
; GFX9-GISEL: ; %bb.0: ; %entry
; GFX9-GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
; GFX9-GISEL-NEXT: s_waitcnt lgkmcnt(0)
@@ -740,27 +3383,109 @@ define amdgpu_kernel void @fptrunc_v2f64_to_v2f16(
; GFX9-GISEL-NEXT: buffer_store_dword v0, off, s[0:3], 0
; GFX9-GISEL-NEXT: s_endpgm
;
-; GFX950-SDAG-LABEL: fptrunc_v2f64_to_v2f16:
+; GFX950-SDAG-LABEL: fptrunc_v2f64_to_v2f16_afn:
; GFX950-SDAG: ; %bb.0: ; %entry
-; GFX950-SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
-; GFX950-SDAG-NEXT: s_mov_b32 s7, 0xf000
-; GFX950-SDAG-NEXT: s_mov_b32 s6, -1
-; GFX950-SDAG-NEXT: s_mov_b32 s10, s6
-; GFX950-SDAG-NEXT: s_mov_b32 s11, s7
+; GFX950-SDAG-NEXT: s_load_dwordx4 s[8:11], s[4:5], 0x24
+; GFX950-SDAG-NEXT: s_mov_b32 s3, 0xf000
+; GFX950-SDAG-NEXT: s_mov_b32 s2, -1
+; GFX950-SDAG-NEXT: s_mov_b32 s6, s2
+; GFX950-SDAG-NEXT: s_mov_b32 s7, s3
; GFX950-SDAG-NEXT: s_waitcnt lgkmcnt(0)
-; GFX950-SDAG-NEXT: s_mov_b32 s8, s2
-; GFX950-SDAG-NEXT: s_mov_b32 s9, s3
-; GFX950-SDAG-NEXT: buffer_load_dwordx4 v[0:3], off, s[8:11], 0
-; GFX950-SDAG-NEXT: s_mov_b32 s4, s0
-; GFX950-SDAG-NEXT: s_mov_b32 s5, s1
+; GFX950-SDAG-NEXT: s_mov_b32 s4, s10
+; GFX950-SDAG-NEXT: s_mov_b32 s5, s11
+; GFX950-SDAG-NEXT: buffer_load_dwordx4 v[0:3], off, s[4:7], 0
+; GFX950-SDAG-NEXT: s_mov_b32 s0, s8
+; GFX950-SDAG-NEXT: s_mov_b32 s1, s9
+; GFX950-SDAG-NEXT: s_movk_i32 s4, 0x7e00
; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0)
-; GFX950-SDAG-NEXT: v_cvt_f32_f64_e32 v2, v[2:3]
-; GFX950-SDAG-NEXT: v_cvt_f32_f64_e32 v0, v[0:1]
-; GFX950-SDAG-NEXT: v_cvt_pk_f16_f32 v0, v0, v2
-; GFX950-SDAG-NEXT: buffer_store_dword v0, off, s[4:7], 0
+; GFX950-SDAG-NEXT: v_readfirstlane_b32 s5, v3
+; GFX950-SDAG-NEXT: s_and_b32 s7, s5, 0x1ff
+; GFX950-SDAG-NEXT: v_readfirstlane_b32 s6, v1
+; GFX950-SDAG-NEXT: v_or_b32_e32 v1, s7, v2
+; GFX950-SDAG-NEXT: s_lshr_b32 s8, s5, 8
+; GFX950-SDAG-NEXT: s_bfe_u32 s9, s5, 0xb0014
+; GFX950-SDAG-NEXT: v_cmp_ne_u32_e32 vcc, 0, v1
+; GFX950-SDAG-NEXT: s_and_b32 s7, s8, 0xffe
+; GFX950-SDAG-NEXT: s_sub_i32 s8, 0x3f1, s9
+; GFX950-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
+; GFX950-SDAG-NEXT: v_med3_i32 v2, s8, 0, 13
+; GFX950-SDAG-NEXT: v_readfirstlane_b32 s8, v1
+; GFX950-SDAG-NEXT: s_or_b32 s7, s7, s8
+; GFX950-SDAG-NEXT: v_readfirstlane_b32 s10, v2
+; GFX950-SDAG-NEXT: s_or_b32 s8, s7, 0x1000
+; GFX950-SDAG-NEXT: s_lshr_b32 s11, s8, s10
+; GFX950-SDAG-NEXT: s_lshl_b32 s10, s11, s10
+; GFX950-SDAG-NEXT: s_cmp_lg_u32 s10, s8
+; GFX950-SDAG-NEXT: s_cselect_b32 s8, 1, 0
+; GFX950-SDAG-NEXT: s_addk_i32 s9, 0xfc10
+; GFX950-SDAG-NEXT: s_lshl_b32 s10, s9, 12
+; GFX950-SDAG-NEXT: s_or_b32 s8, s11, s8
+; GFX950-SDAG-NEXT: s_or_b32 s10, s7, s10
+; GFX950-SDAG-NEXT: s_cmp_lt_i32 s9, 1
+; GFX950-SDAG-NEXT: s_cselect_b32 s8, s8, s10
+; GFX950-SDAG-NEXT: s_and_b32 s10, s8, 7
+; GFX950-SDAG-NEXT: s_cmp_gt_i32 s10, 5
+; GFX950-SDAG-NEXT: s_cselect_b32 s11, 1, 0
+; GFX950-SDAG-NEXT: s_cmp_eq_u32 s10, 3
+; GFX950-SDAG-NEXT: s_cselect_b32 s10, 1, 0
+; GFX950-SDAG-NEXT: s_lshr_b32 s8, s8, 2
+; GFX950-SDAG-NEXT: s_or_b32 s10, s10, s11
+; GFX950-SDAG-NEXT: s_add_i32 s8, s8, s10
+; GFX950-SDAG-NEXT: s_cmp_lt_i32 s9, 31
+; GFX950-SDAG-NEXT: s_cselect_b32 s8, s8, 0x7c00
+; GFX950-SDAG-NEXT: s_cmp_lg_u32 s7, 0
+; GFX950-SDAG-NEXT: s_cselect_b32 s7, s4, 0x7c00
+; GFX950-SDAG-NEXT: s_cmpk_eq_i32 s9, 0x40f
+; GFX950-SDAG-NEXT: s_cselect_b32 s7, s7, s8
+; GFX950-SDAG-NEXT: s_and_b32 s8, s6, 0x1ff
+; GFX950-SDAG-NEXT: v_or_b32_e32 v0, s8, v0
+; GFX950-SDAG-NEXT: s_lshr_b32 s5, s5, 16
+; GFX950-SDAG-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0
+; GFX950-SDAG-NEXT: s_lshr_b32 s9, s6, 8
+; GFX950-SDAG-NEXT: s_bfe_u32 s10, s6, 0xb0014
+; GFX950-SDAG-NEXT: s_and_b32 s5, s5, 0x8000
+; GFX950-SDAG-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
+; GFX950-SDAG-NEXT: s_and_b32 s8, s9, 0xffe
+; GFX950-SDAG-NEXT: s_sub_i32 s9, 0x3f1, s10
+; GFX950-SDAG-NEXT: s_or_b32 s5, s5, s7
+; GFX950-SDAG-NEXT: v_readfirstlane_b32 s7, v0
+; GFX950-SDAG-NEXT: v_med3_i32 v1, s9, 0, 13
+; GFX950-SDAG-NEXT: s_or_b32 s7, s8, s7
+; GFX950-SDAG-NEXT: v_readfirstlane_b32 s9, v1
+; GFX950-SDAG-NEXT: s_or_b32 s8, s7, 0x1000
+; GFX950-SDAG-NEXT: s_lshr_b32 s11, s8, s9
+; GFX950-SDAG-NEXT: s_lshl_b32 s9, s11, s9
+; GFX950-SDAG-NEXT: s_cmp_lg_u32 s9, s8
+; GFX950-SDAG-NEXT: s_cselect_b32 s8, 1, 0
+; GFX950-SDAG-NEXT: s_addk_i32 s10, 0xfc10
+; GFX950-SDAG-NEXT: s_lshl_b32 s9, s10, 12
+; GFX950-SDAG-NEXT: s_or_b32 s8, s11, s8
+; GFX950-SDAG-NEXT: s_or_b32 s9, s7, s9
+; GFX950-SDAG-NEXT: s_cmp_lt_i32 s10, 1
+; GFX950-SDAG-NEXT: s_cselect_b32 s8, s8, s9
+; GFX950-SDAG-NEXT: s_and_b32 s9, s8, 7
+; GFX950-SDAG-NEXT: s_cmp_gt_i32 s9, 5
+; GFX950-SDAG-NEXT: s_cselect_b32 s11, 1, 0
+; GFX950-SDAG-NEXT: s_cmp_eq_u32 s9, 3
+; GFX950-SDAG-NEXT: s_cselect_b32 s9, 1, 0
+; GFX950-SDAG-NEXT: s_lshr_b32 s8, s8, 2
+; GFX950-SDAG-NEXT: s_or_b32 s9, s9, s11
+; GFX950-SDAG-NEXT: s_add_i32 s8, s8, s9
+; GFX950-SDAG-NEXT: s_cmp_lt_i32 s10, 31
+; GFX950-SDAG-NEXT: s_cselect_b32 s8, s8, 0x7c00
+; GFX950-SDAG-NEXT: s_cmp_lg_u32 s7, 0
+; GFX950-SDAG-NEXT: s_cselect_b32 s4, s4, 0x7c00
+; GFX950-SDAG-NEXT: s_cmpk_eq_i32 s10, 0x40f
+; GFX950-SDAG-NEXT: s_cselect_b32 s4, s4, s8
+; GFX950-SDAG-NEXT: s_lshr_b32 s6, s6, 16
+; GFX950-SDAG-NEXT: s_and_b32 s6, s6, 0x8000
+; GFX950-SDAG-NEXT: s_or_b32 s4, s6, s4
+; GFX950-SDAG-NEXT: s_pack_ll_b32_b16 s4, s4, s5
+; GFX950-SDAG-NEXT: v_mov_b32_e32 v0, s4
+; GFX950-SDAG-NEXT: buffer_store_dword v0, off, s[0:3], 0
; GFX950-SDAG-NEXT: s_endpgm
;
-; GFX950-GISEL-LABEL: fptrunc_v2f64_to_v2f16:
+; GFX950-GISEL-LABEL: fptrunc_v2f64_to_v2f16_afn:
; GFX950-GISEL: ; %bb.0: ; %entry
; GFX950-GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
; GFX950-GISEL-NEXT: s_waitcnt lgkmcnt(0)
@@ -776,7 +3501,7 @@ define amdgpu_kernel void @fptrunc_v2f64_to_v2f16(
; GFX950-GISEL-NEXT: buffer_store_dword v0, off, s[0:3], 0
; GFX950-GISEL-NEXT: s_endpgm
;
-; GFX11-SDAG-TRUE16-LABEL: fptrunc_v2f64_to_v2f16:
+; GFX11-SDAG-TRUE16-LABEL: fptrunc_v2f64_to_v2f16_afn:
; GFX11-SDAG-TRUE16: ; %bb.0: ; %entry
; GFX11-SDAG-TRUE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
; GFX11-SDAG-TRUE16-NEXT: s_mov_b32 s6, -1
@@ -786,21 +3511,113 @@ define amdgpu_kernel void @fptrunc_v2f64_to_v2f16(
; GFX11-SDAG-TRUE16-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-SDAG-TRUE16-NEXT: s_mov_b32 s8, s2
; GFX11-SDAG-TRUE16-NEXT: s_mov_b32 s9, s3
-; GFX11-SDAG-TRUE16-NEXT: s_mov_b32 s4, s0
; GFX11-SDAG-TRUE16-NEXT: buffer_load_b128 v[0:3], off, s[8:11], 0
-; GFX11-SDAG-TRUE16-NEXT: s_mov_b32 s5, s1
; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0)
-; GFX11-SDAG-TRUE16-NEXT: v_cvt_f32_f64_e32 v2, v[2:3]
-; GFX11-SDAG-TRUE16-NEXT: v_cvt_f32_f64_e32 v1, v[0:1]
-; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-SDAG-TRUE16-NEXT: v_cvt_f16_f32_e32 v0.l, v2
-; GFX11-SDAG-TRUE16-NEXT: v_cvt_f16_f32_e32 v0.h, v1
-; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-SDAG-TRUE16-NEXT: v_pack_b32_f16 v0, v0.h, v0.l
+; GFX11-SDAG-TRUE16-NEXT: v_readfirstlane_b32 s2, v3
+; GFX11-SDAG-TRUE16-NEXT: s_and_b32 s3, s2, 0x1ff
+; GFX11-SDAG-TRUE16-NEXT: s_lshr_b32 s5, s2, 8
+; GFX11-SDAG-TRUE16-NEXT: v_or_b32_e32 v2, s3, v2
+; GFX11-SDAG-TRUE16-NEXT: s_bfe_u32 s3, s2, 0xb0014
+; GFX11-SDAG-TRUE16-NEXT: s_and_b32 s5, s5, 0xffe
+; GFX11-SDAG-TRUE16-NEXT: s_sub_i32 s4, 0x3f1, s3
+; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_2)
+; GFX11-SDAG-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v2
+; GFX11-SDAG-TRUE16-NEXT: v_med3_i32 v3, s4, 0, 13
+; GFX11-SDAG-TRUE16-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc_lo
+; GFX11-SDAG-TRUE16-NEXT: v_readfirstlane_b32 s8, v3
+; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
+; GFX11-SDAG-TRUE16-NEXT: v_readfirstlane_b32 s4, v2
+; GFX11-SDAG-TRUE16-NEXT: s_or_b32 s4, s5, s4
+; GFX11-SDAG-TRUE16-NEXT: s_or_b32 s5, s4, 0x1000
+; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
+; GFX11-SDAG-TRUE16-NEXT: s_lshr_b32 s9, s5, s8
+; GFX11-SDAG-TRUE16-NEXT: s_lshl_b32 s8, s9, s8
+; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_4) | instid1(SALU_CYCLE_1)
+; GFX11-SDAG-TRUE16-NEXT: s_cmp_lg_u32 s8, s5
+; GFX11-SDAG-TRUE16-NEXT: s_cselect_b32 s5, 1, 0
+; GFX11-SDAG-TRUE16-NEXT: s_addk_i32 s3, 0xfc10
+; GFX11-SDAG-TRUE16-NEXT: s_or_b32 s5, s9, s5
+; GFX11-SDAG-TRUE16-NEXT: s_lshl_b32 s8, s3, 12
+; GFX11-SDAG-TRUE16-NEXT: s_or_b32 s8, s4, s8
+; GFX11-SDAG-TRUE16-NEXT: s_cmp_lt_i32 s3, 1
+; GFX11-SDAG-TRUE16-NEXT: s_cselect_b32 s5, s5, s8
+; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
+; GFX11-SDAG-TRUE16-NEXT: s_and_b32 s8, s5, 7
+; GFX11-SDAG-TRUE16-NEXT: s_cmp_gt_i32 s8, 5
+; GFX11-SDAG-TRUE16-NEXT: s_cselect_b32 s9, 1, 0
+; GFX11-SDAG-TRUE16-NEXT: s_cmp_eq_u32 s8, 3
+; GFX11-SDAG-TRUE16-NEXT: s_cselect_b32 s8, 1, 0
+; GFX11-SDAG-TRUE16-NEXT: s_lshr_b32 s5, s5, 2
+; GFX11-SDAG-TRUE16-NEXT: s_or_b32 s8, s8, s9
+; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
+; GFX11-SDAG-TRUE16-NEXT: s_add_i32 s5, s5, s8
+; GFX11-SDAG-TRUE16-NEXT: s_cmp_lt_i32 s3, 31
+; GFX11-SDAG-TRUE16-NEXT: s_movk_i32 s8, 0x7e00
+; GFX11-SDAG-TRUE16-NEXT: s_cselect_b32 s5, s5, 0x7c00
+; GFX11-SDAG-TRUE16-NEXT: s_cmp_lg_u32 s4, 0
+; GFX11-SDAG-TRUE16-NEXT: v_readfirstlane_b32 s4, v1
+; GFX11-SDAG-TRUE16-NEXT: s_cselect_b32 s9, s8, 0x7c00
+; GFX11-SDAG-TRUE16-NEXT: s_cmpk_eq_i32 s3, 0x40f
+; GFX11-SDAG-TRUE16-NEXT: s_cselect_b32 s3, s9, s5
+; GFX11-SDAG-TRUE16-NEXT: s_and_b32 s5, s4, 0x1ff
+; GFX11-SDAG-TRUE16-NEXT: s_lshr_b32 s10, s4, 8
+; GFX11-SDAG-TRUE16-NEXT: v_or_b32_e32 v0, s5, v0
+; GFX11-SDAG-TRUE16-NEXT: s_bfe_u32 s5, s4, 0xb0014
+; GFX11-SDAG-TRUE16-NEXT: s_and_b32 s10, s10, 0xffe
+; GFX11-SDAG-TRUE16-NEXT: s_sub_i32 s9, 0x3f1, s5
+; GFX11-SDAG-TRUE16-NEXT: s_lshr_b32 s2, s2, 16
+; GFX11-SDAG-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0
+; GFX11-SDAG-TRUE16-NEXT: v_med3_i32 v1, s9, 0, 13
+; GFX11-SDAG-TRUE16-NEXT: s_and_b32 s2, s2, 0x8000
+; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX11-SDAG-TRUE16-NEXT: s_or_b32 s2, s2, s3
+; GFX11-SDAG-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
+; GFX11-SDAG-TRUE16-NEXT: v_readfirstlane_b32 s11, v1
+; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
+; GFX11-SDAG-TRUE16-NEXT: v_readfirstlane_b32 s9, v0
+; GFX11-SDAG-TRUE16-NEXT: s_or_b32 s9, s10, s9
+; GFX11-SDAG-TRUE16-NEXT: s_or_b32 s10, s9, 0x1000
+; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
+; GFX11-SDAG-TRUE16-NEXT: s_lshr_b32 s12, s10, s11
+; GFX11-SDAG-TRUE16-NEXT: s_lshl_b32 s11, s12, s11
+; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_4) | instid1(SALU_CYCLE_1)
+; GFX11-SDAG-TRUE16-NEXT: s_cmp_lg_u32 s11, s10
+; GFX11-SDAG-TRUE16-NEXT: s_cselect_b32 s3, 1, 0
+; GFX11-SDAG-TRUE16-NEXT: s_addk_i32 s5, 0xfc10
+; GFX11-SDAG-TRUE16-NEXT: s_or_b32 s3, s12, s3
+; GFX11-SDAG-TRUE16-NEXT: s_lshl_b32 s10, s5, 12
+; GFX11-SDAG-TRUE16-NEXT: s_or_b32 s10, s9, s10
+; GFX11-SDAG-TRUE16-NEXT: s_cmp_lt_i32 s5, 1
+; GFX11-SDAG-TRUE16-NEXT: s_cselect_b32 s3, s3, s10
+; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
+; GFX11-SDAG-TRUE16-NEXT: s_and_b32 s10, s3, 7
+; GFX11-SDAG-TRUE16-NEXT: s_cmp_gt_i32 s10, 5
+; GFX11-SDAG-TRUE16-NEXT: s_cselect_b32 s11, 1, 0
+; GFX11-SDAG-TRUE16-NEXT: s_cmp_eq_u32 s10, 3
+; GFX11-SDAG-TRUE16-NEXT: s_cselect_b32 s10, 1, 0
+; GFX11-SDAG-TRUE16-NEXT: s_lshr_b32 s3, s3, 2
+; GFX11-SDAG-TRUE16-NEXT: s_or_b32 s10, s10, s11
+; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
+; GFX11-SDAG-TRUE16-NEXT: s_add_i32 s3, s3, s10
+; GFX11-SDAG-TRUE16-NEXT: s_cmp_lt_i32 s5, 31
+; GFX11-SDAG-TRUE16-NEXT: s_cselect_b32 s3, s3, 0x7c00
+; GFX11-SDAG-TRUE16-NEXT: s_cmp_lg_u32 s9, 0
+; GFX11-SDAG-TRUE16-NEXT: s_cselect_b32 s8, s8, 0x7c00
+; GFX11-SDAG-TRUE16-NEXT: s_cmpk_eq_i32 s5, 0x40f
+; GFX11-SDAG-TRUE16-NEXT: s_mov_b32 s5, s1
+; GFX11-SDAG-TRUE16-NEXT: s_cselect_b32 s3, s8, s3
+; GFX11-SDAG-TRUE16-NEXT: s_lshr_b32 s4, s4, 16
+; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
+; GFX11-SDAG-TRUE16-NEXT: s_and_b32 s4, s4, 0x8000
+; GFX11-SDAG-TRUE16-NEXT: s_or_b32 s3, s4, s3
+; GFX11-SDAG-TRUE16-NEXT: s_mov_b32 s4, s0
+; GFX11-SDAG-TRUE16-NEXT: s_pack_ll_b32_b16 s2, s3, s2
+; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
+; GFX11-SDAG-TRUE16-NEXT: v_mov_b32_e32 v0, s2
; GFX11-SDAG-TRUE16-NEXT: buffer_store_b32 v0, off, s[4:7], 0
; GFX11-SDAG-TRUE16-NEXT: s_endpgm
;
-; GFX11-SDAG-FAKE16-LABEL: fptrunc_v2f64_to_v2f16:
+; GFX11-SDAG-FAKE16-LABEL: fptrunc_v2f64_to_v2f16_afn:
; GFX11-SDAG-FAKE16: ; %bb.0: ; %entry
; GFX11-SDAG-FAKE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
; GFX11-SDAG-FAKE16-NEXT: s_mov_b32 s6, -1
@@ -810,21 +3627,113 @@ define amdgpu_kernel void @fptrunc_v2f64_to_v2f16(
; GFX11-SDAG-FAKE16-NEXT: s_waitcnt lgkmcnt(0)
; GFX11-SDAG-FAKE16-NEXT: s_mov_b32 s8, s2
; GFX11-SDAG-FAKE16-NEXT: s_mov_b32 s9, s3
-; GFX11-SDAG-FAKE16-NEXT: s_mov_b32 s4, s0
; GFX11-SDAG-FAKE16-NEXT: buffer_load_b128 v[0:3], off, s[8:11], 0
-; GFX11-SDAG-FAKE16-NEXT: s_mov_b32 s5, s1
; GFX11-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0)
-; GFX11-SDAG-FAKE16-NEXT: v_cvt_f32_f64_e32 v2, v[2:3]
-; GFX11-SDAG-FAKE16-NEXT: v_cvt_f32_f64_e32 v0, v[0:1]
-; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-SDAG-FAKE16-NEXT: v_cvt_f16_f32_e32 v1, v2
-; GFX11-SDAG-FAKE16-NEXT: v_cvt_f16_f32_e32 v0, v0
-; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX11-SDAG-FAKE16-NEXT: v_pack_b32_f16 v0, v0, v1
+; GFX11-SDAG-FAKE16-NEXT: v_readfirstlane_b32 s2, v3
+; GFX11-SDAG-FAKE16-NEXT: s_and_b32 s3, s2, 0x1ff
+; GFX11-SDAG-FAKE16-NEXT: s_lshr_b32 s5, s2, 8
+; GFX11-SDAG-FAKE16-NEXT: v_or_b32_e32 v2, s3, v2
+; GFX11-SDAG-FAKE16-NEXT: s_bfe_u32 s3, s2, 0xb0014
+; GFX11-SDAG-FAKE16-NEXT: s_and_b32 s5, s5, 0xffe
+; GFX11-SDAG-FAKE16-NEXT: s_sub_i32 s4, 0x3f1, s3
+; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_2)
+; GFX11-SDAG-FAKE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v2
+; GFX11-SDAG-FAKE16-NEXT: v_med3_i32 v3, s4, 0, 13
+; GFX11-SDAG-FAKE16-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc_lo
+; GFX11-SDAG-FAKE16-NEXT: v_readfirstlane_b32 s8, v3
+; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
+; GFX11-SDAG-FAKE16-NEXT: v_readfirstlane_b32 s4, v2
+; GFX11-SDAG-FAKE16-NEXT: s_or_b32 s4, s5, s4
+; GFX11-SDAG-FAKE16-NEXT: s_or_b32 s5, s4, 0x1000
+; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
+; GFX11-SDAG-FAKE16-NEXT: s_lshr_b32 s9, s5, s8
+; GFX11-SDAG-FAKE16-NEXT: s_lshl_b32 s8, s9, s8
+; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_4) | instid1(SALU_CYCLE_1)
+; GFX11-SDAG-FAKE16-NEXT: s_cmp_lg_u32 s8, s5
+; GFX11-SDAG-FAKE16-NEXT: s_cselect_b32 s5, 1, 0
+; GFX11-SDAG-FAKE16-NEXT: s_addk_i32 s3, 0xfc10
+; GFX11-SDAG-FAKE16-NEXT: s_or_b32 s5, s9, s5
+; GFX11-SDAG-FAKE16-NEXT: s_lshl_b32 s8, s3, 12
+; GFX11-SDAG-FAKE16-NEXT: s_or_b32 s8, s4, s8
+; GFX11-SDAG-FAKE16-NEXT: s_cmp_lt_i32 s3, 1
+; GFX11-SDAG-FAKE16-NEXT: s_cselect_b32 s5, s5, s8
+; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
+; GFX11-SDAG-FAKE16-NEXT: s_and_b32 s8, s5, 7
+; GFX11-SDAG-FAKE16-NEXT: s_cmp_gt_i32 s8, 5
+; GFX11-SDAG-FAKE16-NEXT: s_cselect_b32 s9, 1, 0
+; GFX11-SDAG-FAKE16-NEXT: s_cmp_eq_u32 s8, 3
+; GFX11-SDAG-FAKE16-NEXT: s_cselect_b32 s8, 1, 0
+; GFX11-SDAG-FAKE16-NEXT: s_lshr_b32 s5, s5, 2
+; GFX11-SDAG-FAKE16-NEXT: s_or_b32 s8, s8, s9
+; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
+; GFX11-SDAG-FAKE16-NEXT: s_add_i32 s5, s5, s8
+; GFX11-SDAG-FAKE16-NEXT: s_cmp_lt_i32 s3, 31
+; GFX11-SDAG-FAKE16-NEXT: s_movk_i32 s8, 0x7e00
+; GFX11-SDAG-FAKE16-NEXT: s_cselect_b32 s5, s5, 0x7c00
+; GFX11-SDAG-FAKE16-NEXT: s_cmp_lg_u32 s4, 0
+; GFX11-SDAG-FAKE16-NEXT: v_readfirstlane_b32 s4, v1
+; GFX11-SDAG-FAKE16-NEXT: s_cselect_b32 s9, s8, 0x7c00
+; GFX11-SDAG-FAKE16-NEXT: s_cmpk_eq_i32 s3, 0x40f
+; GFX11-SDAG-FAKE16-NEXT: s_cselect_b32 s3, s9, s5
+; GFX11-SDAG-FAKE16-NEXT: s_and_b32 s5, s4, 0x1ff
+; GFX11-SDAG-FAKE16-NEXT: s_lshr_b32 s10, s4, 8
+; GFX11-SDAG-FAKE16-NEXT: v_or_b32_e32 v0, s5, v0
+; GFX11-SDAG-FAKE16-NEXT: s_bfe_u32 s5, s4, 0xb0014
+; GFX11-SDAG-FAKE16-NEXT: s_and_b32 s10, s10, 0xffe
+; GFX11-SDAG-FAKE16-NEXT: s_sub_i32 s9, 0x3f1, s5
+; GFX11-SDAG-FAKE16-NEXT: s_lshr_b32 s2, s2, 16
+; GFX11-SDAG-FAKE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0
+; GFX11-SDAG-FAKE16-NEXT: v_med3_i32 v1, s9, 0, 13
+; GFX11-SDAG-FAKE16-NEXT: s_and_b32 s2, s2, 0x8000
+; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX11-SDAG-FAKE16-NEXT: s_or_b32 s2, s2, s3
+; GFX11-SDAG-FAKE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
+; GFX11-SDAG-FAKE16-NEXT: v_readfirstlane_b32 s11, v1
+; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
+; GFX11-SDAG-FAKE16-NEXT: v_readfirstlane_b32 s9, v0
+; GFX11-SDAG-FAKE16-NEXT: s_or_b32 s9, s10, s9
+; GFX11-SDAG-FAKE16-NEXT: s_or_b32 s10, s9, 0x1000
+; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
+; GFX11-SDAG-FAKE16-NEXT: s_lshr_b32 s12, s10, s11
+; GFX11-SDAG-FAKE16-NEXT: s_lshl_b32 s11, s12, s11
+; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_4) | instid1(SALU_CYCLE_1)
+; GFX11-SDAG-FAKE16-NEXT: s_cmp_lg_u32 s11, s10
+; GFX11-SDAG-FAKE16-NEXT: s_cselect_b32 s3, 1, 0
+; GFX11-SDAG-FAKE16-NEXT: s_addk_i32 s5, 0xfc10
+; GFX11-SDAG-FAKE16-NEXT: s_or_b32 s3, s12, s3
+; GFX11-SDAG-FAKE16-NEXT: s_lshl_b32 s10, s5, 12
+; GFX11-SDAG-FAKE16-NEXT: s_or_b32 s10, s9, s10
+; GFX11-SDAG-FAKE16-NEXT: s_cmp_lt_i32 s5, 1
+; GFX11-SDAG-FAKE16-NEXT: s_cselect_b32 s3, s3, s10
+; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
+; GFX11-SDAG-FAKE16-NEXT: s_and_b32 s10, s3, 7
+; GFX11-SDAG-FAKE16-NEXT: s_cmp_gt_i32 s10, 5
+; GFX11-SDAG-FAKE16-NEXT: s_cselect_b32 s11, 1, 0
+; GFX11-SDAG-FAKE16-NEXT: s_cmp_eq_u32 s10, 3
+; GFX11-SDAG-FAKE16-NEXT: s_cselect_b32 s10, 1, 0
+; GFX11-SDAG-FAKE16-NEXT: s_lshr_b32 s3, s3, 2
+; GFX11-SDAG-FAKE16-NEXT: s_or_b32 s10, s10, s11
+; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
+; GFX11-SDAG-FAKE16-NEXT: s_add_i32 s3, s3, s10
+; GFX11-SDAG-FAKE16-NEXT: s_cmp_lt_i32 s5, 31
+; GFX11-SDAG-FAKE16-NEXT: s_cselect_b32 s3, s3, 0x7c00
+; GFX11-SDAG-FAKE16-NEXT: s_cmp_lg_u32 s9, 0
+; GFX11-SDAG-FAKE16-NEXT: s_cselect_b32 s8, s8, 0x7c00
+; GFX11-SDAG-FAKE16-NEXT: s_cmpk_eq_i32 s5, 0x40f
+; GFX11-SDAG-FAKE16-NEXT: s_mov_b32 s5, s1
+; GFX11-SDAG-FAKE16-NEXT: s_cselect_b32 s3, s8, s3
+; GFX11-SDAG-FAKE16-NEXT: s_lshr_b32 s4, s4, 16
+; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
+; GFX11-SDAG-FAKE16-NEXT: s_and_b32 s4, s4, 0x8000
+; GFX11-SDAG-FAKE16-NEXT: s_or_b32 s3, s4, s3
+; GFX11-SDAG-FAKE16-NEXT: s_mov_b32 s4, s0
+; GFX11-SDAG-FAKE16-NEXT: s_pack_ll_b32_b16 s2, s3, s2
+; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
+; GFX11-SDAG-FAKE16-NEXT: v_mov_b32_e32 v0, s2
; GFX11-SDAG-FAKE16-NEXT: buffer_store_b32 v0, off, s[4:7], 0
; GFX11-SDAG-FAKE16-NEXT: s_endpgm
;
-; GFX11-GISEL-TRUE16-LABEL: fptrunc_v2f64_to_v2f16:
+; GFX11-GISEL-TRUE16-LABEL: fptrunc_v2f64_to_v2f16_afn:
; GFX11-GISEL-TRUE16: ; %bb.0: ; %entry
; GFX11-GISEL-TRUE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
; GFX11-GISEL-TRUE16-NEXT: s_waitcnt lgkmcnt(0)
@@ -842,7 +3751,7 @@ define amdgpu_kernel void @fptrunc_v2f64_to_v2f16(
; GFX11-GISEL-TRUE16-NEXT: buffer_store_b32 v0, off, s[0:3], 0
; GFX11-GISEL-TRUE16-NEXT: s_endpgm
;
-; GFX11-GISEL-FAKE16-LABEL: fptrunc_v2f64_to_v2f16:
+; GFX11-GISEL-FAKE16-LABEL: fptrunc_v2f64_to_v2f16_afn:
; GFX11-GISEL-FAKE16: ; %bb.0: ; %entry
; GFX11-GISEL-FAKE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
; GFX11-GISEL-FAKE16-NEXT: s_waitcnt lgkmcnt(0)
@@ -863,7 +3772,7 @@ define amdgpu_kernel void @fptrunc_v2f64_to_v2f16(
ptr addrspace(1) %a) {
entry:
%a.val = load <2 x double>, ptr addrspace(1) %a
- %r.val = fptrunc <2 x double> %a.val to <2 x half>
+ %r.val = fptrunc afn <2 x double> %a.val to <2 x half>
store <2 x half> %r.val, ptr addrspace(1) %r
ret void
}
diff --git a/llvm/test/CodeGen/AMDGPU/fptrunc.ll b/llvm/test/CodeGen/AMDGPU/fptrunc.ll
index 2bd3659..4f8eab1 100644
--- a/llvm/test/CodeGen/AMDGPU/fptrunc.ll
+++ b/llvm/test/CodeGen/AMDGPU/fptrunc.ll
@@ -3,17 +3,15 @@
; RUN: llc -mtriple=amdgcn -mcpu=tonga -global-isel=0 -mattr=-flat-for-global < %s | FileCheck -check-prefixes=VI-SDAG,VI-SAFE-SDAG %s
; RUN: llc -mtriple=amdgcn -mcpu=tonga -global-isel=1 -mattr=-flat-for-global < %s | FileCheck -check-prefixes=VI-GISEL,VI-SAFE-GISEL %s
; RUN: llc -mtriple=amdgcn -mcpu=tonga -global-isel=0 -mattr=-flat-for-global -enable-unsafe-fp-math < %s | FileCheck -check-prefixes=VI-SDAG,VI-UNSAFE-SDAG %s
-; RUN: llc -mtriple=amdgcn -mcpu=tonga -global-isel=1 -mattr=-flat-for-global -enable-unsafe-fp-math < %s | FileCheck -check-prefixes=VI-GISEL,VI-UNSAFE-GISEL %s
; RUN: llc -mtriple=amdgcn -mcpu=gfx1030 -global-isel=0 -mattr=-flat-for-global < %s | FileCheck -check-prefixes=GFX10-SDAG,GFX10-SAFE-SDAG %s
; RUN: llc -mtriple=amdgcn -mcpu=gfx1030 -global-isel=1 -mattr=-flat-for-global < %s | FileCheck -check-prefixes=GFX10-GISEL,GFX10-SAFE-GISEL %s
; RUN: llc -mtriple=amdgcn -mcpu=gfx1030 -global-isel=0 -mattr=-flat-for-global -enable-unsafe-fp-math < %s | FileCheck -check-prefixes=GFX10-SDAG,GFX10-UNSAFE-SDAG %s
-; RUN: llc -mtriple=amdgcn -mcpu=gfx1030 -global-isel=1 -mattr=-flat-for-global -enable-unsafe-fp-math < %s | FileCheck -check-prefixes=GFX10-GISEL,GFX10-UNSAFE-GISEL %s
; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -global-isel=0 -mattr=-flat-for-global < %s | FileCheck -check-prefixes=GFX11-SDAG,GFX11-SAFE-SDAG %s
; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -global-isel=1 -mattr=-flat-for-global < %s | FileCheck -check-prefixes=GFX11-GISEL,GFX11-SAFE-GISEL %s
; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -global-isel=0 -mattr=-flat-for-global,+real-true16 -enable-unsafe-fp-math < %s | FileCheck -check-prefixes=GFX11-SDAG,GFX11-UNSAFE-DAG-TRUE16 %s
; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -global-isel=0 -mattr=-flat-for-global,-real-true16 -enable-unsafe-fp-math < %s | FileCheck -check-prefixes=GFX11-SDAG,GFX11-UNSAFE-DAG-FAKE16 %s
-; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -global-isel=1 -mattr=-flat-for-global,+real-true16 -enable-unsafe-fp-math < %s | FileCheck -check-prefixes=GFX11-GISEL,GFX11-UNSAFE-GISEL-TRUE16 %s
-; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -global-isel=1 -mattr=-flat-for-global,-real-true16 -enable-unsafe-fp-math < %s | FileCheck -check-prefixes=GFX11-GISEL,GFX11-UNSAFE-GISEL-FAKE16 %s
+; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -global-isel=1 -mattr=-flat-for-global,+real-true16 < %s | FileCheck -check-prefixes=GFX11-GISEL,GFX11-UNSAFE-GISEL-TRUE16 %s
+; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -global-isel=1 -mattr=-flat-for-global,-real-true16 < %s | FileCheck -check-prefixes=GFX11-GISEL,GFX11-UNSAFE-GISEL-FAKE16 %s
define amdgpu_kernel void @fptrunc_f64_to_f32(ptr addrspace(1) %out, double %in) {
; SI-LABEL: fptrunc_f64_to_f32:
@@ -94,6 +92,85 @@ define amdgpu_kernel void @fptrunc_f64_to_f32(ptr addrspace(1) %out, double %in)
ret void
}
+define amdgpu_kernel void @fptrunc_f64_to_f32_afn(ptr addrspace(1) %out, double %in) {
+; SI-LABEL: fptrunc_f64_to_f32_afn:
+; SI: ; %bb.0:
+; SI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
+; SI-NEXT: s_mov_b32 s7, 0xf000
+; SI-NEXT: s_mov_b32 s6, -1
+; SI-NEXT: s_waitcnt lgkmcnt(0)
+; SI-NEXT: s_mov_b32 s4, s0
+; SI-NEXT: s_mov_b32 s5, s1
+; SI-NEXT: v_cvt_f32_f64_e32 v0, s[2:3]
+; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0
+; SI-NEXT: s_endpgm
+;
+; VI-SDAG-LABEL: fptrunc_f64_to_f32_afn:
+; VI-SDAG: ; %bb.0:
+; VI-SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
+; VI-SDAG-NEXT: s_mov_b32 s7, 0xf000
+; VI-SDAG-NEXT: s_mov_b32 s6, -1
+; VI-SDAG-NEXT: s_waitcnt lgkmcnt(0)
+; VI-SDAG-NEXT: v_cvt_f32_f64_e32 v0, s[2:3]
+; VI-SDAG-NEXT: s_mov_b32 s4, s0
+; VI-SDAG-NEXT: s_mov_b32 s5, s1
+; VI-SDAG-NEXT: buffer_store_dword v0, off, s[4:7], 0
+; VI-SDAG-NEXT: s_endpgm
+;
+; VI-GISEL-LABEL: fptrunc_f64_to_f32_afn:
+; VI-GISEL: ; %bb.0:
+; VI-GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
+; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0)
+; VI-GISEL-NEXT: v_cvt_f32_f64_e32 v0, s[2:3]
+; VI-GISEL-NEXT: s_mov_b32 s2, -1
+; VI-GISEL-NEXT: s_mov_b32 s3, 0xf000
+; VI-GISEL-NEXT: buffer_store_dword v0, off, s[0:3], 0
+; VI-GISEL-NEXT: s_endpgm
+;
+; GFX10-SDAG-LABEL: fptrunc_f64_to_f32_afn:
+; GFX10-SDAG: ; %bb.0:
+; GFX10-SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
+; GFX10-SDAG-NEXT: s_waitcnt lgkmcnt(0)
+; GFX10-SDAG-NEXT: v_cvt_f32_f64_e32 v0, s[2:3]
+; GFX10-SDAG-NEXT: s_mov_b32 s3, 0x31016000
+; GFX10-SDAG-NEXT: s_mov_b32 s2, -1
+; GFX10-SDAG-NEXT: buffer_store_dword v0, off, s[0:3], 0
+; GFX10-SDAG-NEXT: s_endpgm
+;
+; GFX10-GISEL-LABEL: fptrunc_f64_to_f32_afn:
+; GFX10-GISEL: ; %bb.0:
+; GFX10-GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
+; GFX10-GISEL-NEXT: s_waitcnt lgkmcnt(0)
+; GFX10-GISEL-NEXT: v_cvt_f32_f64_e32 v0, s[2:3]
+; GFX10-GISEL-NEXT: s_mov_b32 s2, -1
+; GFX10-GISEL-NEXT: s_mov_b32 s3, 0x31016000
+; GFX10-GISEL-NEXT: buffer_store_dword v0, off, s[0:3], 0
+; GFX10-GISEL-NEXT: s_endpgm
+;
+; GFX11-SDAG-LABEL: fptrunc_f64_to_f32_afn:
+; GFX11-SDAG: ; %bb.0:
+; GFX11-SDAG-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
+; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-SDAG-NEXT: v_cvt_f32_f64_e32 v0, s[2:3]
+; GFX11-SDAG-NEXT: s_mov_b32 s3, 0x31016000
+; GFX11-SDAG-NEXT: s_mov_b32 s2, -1
+; GFX11-SDAG-NEXT: buffer_store_b32 v0, off, s[0:3], 0
+; GFX11-SDAG-NEXT: s_endpgm
+;
+; GFX11-GISEL-LABEL: fptrunc_f64_to_f32_afn:
+; GFX11-GISEL: ; %bb.0:
+; GFX11-GISEL-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
+; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-GISEL-NEXT: v_cvt_f32_f64_e32 v0, s[2:3]
+; GFX11-GISEL-NEXT: s_mov_b32 s2, -1
+; GFX11-GISEL-NEXT: s_mov_b32 s3, 0x31016000
+; GFX11-GISEL-NEXT: buffer_store_b32 v0, off, s[0:3], 0
+; GFX11-GISEL-NEXT: s_endpgm
+ %result = fptrunc afn double %in to float
+ store float %result, ptr addrspace(1) %out
+ ret void
+}
+
define amdgpu_kernel void @fptrunc_f64_to_f16(ptr addrspace(1) %out, double %in) {
; SI-LABEL: fptrunc_f64_to_f16:
; SI: ; %bb.0:
@@ -203,56 +280,56 @@ define amdgpu_kernel void @fptrunc_f64_to_f16(ptr addrspace(1) %out, double %in)
; VI-SAFE-SDAG-NEXT: buffer_store_short v0, off, s[0:3], 0
; VI-SAFE-SDAG-NEXT: s_endpgm
;
-; VI-SAFE-GISEL-LABEL: fptrunc_f64_to_f16:
-; VI-SAFE-GISEL: ; %bb.0:
-; VI-SAFE-GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
-; VI-SAFE-GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; VI-SAFE-GISEL-NEXT: s_bfe_u32 s4, s3, 0xb0014
-; VI-SAFE-GISEL-NEXT: s_lshr_b32 s5, s3, 8
-; VI-SAFE-GISEL-NEXT: s_and_b32 s6, s3, 0x1ff
-; VI-SAFE-GISEL-NEXT: s_addk_i32 s4, 0xfc10
-; VI-SAFE-GISEL-NEXT: s_and_b32 s5, s5, 0xffe
-; VI-SAFE-GISEL-NEXT: s_or_b32 s2, s6, s2
-; VI-SAFE-GISEL-NEXT: s_cmp_lg_u32 s2, 0
-; VI-SAFE-GISEL-NEXT: s_cselect_b32 s2, 1, 0
-; VI-SAFE-GISEL-NEXT: s_or_b32 s2, s5, s2
-; VI-SAFE-GISEL-NEXT: s_cmp_lg_u32 s2, 0
-; VI-SAFE-GISEL-NEXT: s_cselect_b32 s5, 1, 0
-; VI-SAFE-GISEL-NEXT: s_sub_i32 s7, 1, s4
-; VI-SAFE-GISEL-NEXT: s_lshl_b32 s6, s4, 12
-; VI-SAFE-GISEL-NEXT: s_max_i32 s7, s7, 0
-; VI-SAFE-GISEL-NEXT: s_or_b32 s6, s2, s6
-; VI-SAFE-GISEL-NEXT: s_min_i32 s7, s7, 13
-; VI-SAFE-GISEL-NEXT: s_bitset1_b32 s2, 12
-; VI-SAFE-GISEL-NEXT: s_lshl_b32 s5, s5, 9
-; VI-SAFE-GISEL-NEXT: s_lshr_b32 s8, s2, s7
-; VI-SAFE-GISEL-NEXT: s_or_b32 s5, s5, 0x7c00
-; VI-SAFE-GISEL-NEXT: s_lshl_b32 s7, s8, s7
-; VI-SAFE-GISEL-NEXT: s_cmp_lg_u32 s7, s2
-; VI-SAFE-GISEL-NEXT: s_cselect_b32 s2, 1, 0
-; VI-SAFE-GISEL-NEXT: s_or_b32 s2, s8, s2
-; VI-SAFE-GISEL-NEXT: s_cmp_lt_i32 s4, 1
-; VI-SAFE-GISEL-NEXT: s_cselect_b32 s2, s2, s6
-; VI-SAFE-GISEL-NEXT: s_and_b32 s6, s2, 7
-; VI-SAFE-GISEL-NEXT: s_lshr_b32 s2, s2, 2
-; VI-SAFE-GISEL-NEXT: s_cmp_eq_u32 s6, 3
-; VI-SAFE-GISEL-NEXT: s_cselect_b32 s7, 1, 0
-; VI-SAFE-GISEL-NEXT: s_cmp_gt_i32 s6, 5
-; VI-SAFE-GISEL-NEXT: s_cselect_b32 s6, 1, 0
-; VI-SAFE-GISEL-NEXT: s_or_b32 s6, s7, s6
-; VI-SAFE-GISEL-NEXT: s_add_i32 s2, s2, s6
-; VI-SAFE-GISEL-NEXT: s_cmp_gt_i32 s4, 30
-; VI-SAFE-GISEL-NEXT: s_cselect_b32 s2, 0x7c00, s2
-; VI-SAFE-GISEL-NEXT: s_cmpk_eq_i32 s4, 0x40f
-; VI-SAFE-GISEL-NEXT: s_cselect_b32 s2, s5, s2
-; VI-SAFE-GISEL-NEXT: s_lshr_b32 s3, s3, 16
-; VI-SAFE-GISEL-NEXT: s_and_b32 s3, s3, 0x8000
-; VI-SAFE-GISEL-NEXT: s_or_b32 s2, s3, s2
-; VI-SAFE-GISEL-NEXT: v_mov_b32_e32 v0, s2
-; VI-SAFE-GISEL-NEXT: s_mov_b32 s2, -1
-; VI-SAFE-GISEL-NEXT: s_mov_b32 s3, 0xf000
-; VI-SAFE-GISEL-NEXT: buffer_store_short v0, off, s[0:3], 0
-; VI-SAFE-GISEL-NEXT: s_endpgm
+; VI-GISEL-LABEL: fptrunc_f64_to_f16:
+; VI-GISEL: ; %bb.0:
+; VI-GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
+; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0)
+; VI-GISEL-NEXT: s_bfe_u32 s4, s3, 0xb0014
+; VI-GISEL-NEXT: s_lshr_b32 s5, s3, 8
+; VI-GISEL-NEXT: s_and_b32 s6, s3, 0x1ff
+; VI-GISEL-NEXT: s_addk_i32 s4, 0xfc10
+; VI-GISEL-NEXT: s_and_b32 s5, s5, 0xffe
+; VI-GISEL-NEXT: s_or_b32 s2, s6, s2
+; VI-GISEL-NEXT: s_cmp_lg_u32 s2, 0
+; VI-GISEL-NEXT: s_cselect_b32 s2, 1, 0
+; VI-GISEL-NEXT: s_or_b32 s2, s5, s2
+; VI-GISEL-NEXT: s_cmp_lg_u32 s2, 0
+; VI-GISEL-NEXT: s_cselect_b32 s5, 1, 0
+; VI-GISEL-NEXT: s_sub_i32 s7, 1, s4
+; VI-GISEL-NEXT: s_lshl_b32 s6, s4, 12
+; VI-GISEL-NEXT: s_max_i32 s7, s7, 0
+; VI-GISEL-NEXT: s_or_b32 s6, s2, s6
+; VI-GISEL-NEXT: s_min_i32 s7, s7, 13
+; VI-GISEL-NEXT: s_bitset1_b32 s2, 12
+; VI-GISEL-NEXT: s_lshl_b32 s5, s5, 9
+; VI-GISEL-NEXT: s_lshr_b32 s8, s2, s7
+; VI-GISEL-NEXT: s_or_b32 s5, s5, 0x7c00
+; VI-GISEL-NEXT: s_lshl_b32 s7, s8, s7
+; VI-GISEL-NEXT: s_cmp_lg_u32 s7, s2
+; VI-GISEL-NEXT: s_cselect_b32 s2, 1, 0
+; VI-GISEL-NEXT: s_or_b32 s2, s8, s2
+; VI-GISEL-NEXT: s_cmp_lt_i32 s4, 1
+; VI-GISEL-NEXT: s_cselect_b32 s2, s2, s6
+; VI-GISEL-NEXT: s_and_b32 s6, s2, 7
+; VI-GISEL-NEXT: s_lshr_b32 s2, s2, 2
+; VI-GISEL-NEXT: s_cmp_eq_u32 s6, 3
+; VI-GISEL-NEXT: s_cselect_b32 s7, 1, 0
+; VI-GISEL-NEXT: s_cmp_gt_i32 s6, 5
+; VI-GISEL-NEXT: s_cselect_b32 s6, 1, 0
+; VI-GISEL-NEXT: s_or_b32 s6, s7, s6
+; VI-GISEL-NEXT: s_add_i32 s2, s2, s6
+; VI-GISEL-NEXT: s_cmp_gt_i32 s4, 30
+; VI-GISEL-NEXT: s_cselect_b32 s2, 0x7c00, s2
+; VI-GISEL-NEXT: s_cmpk_eq_i32 s4, 0x40f
+; VI-GISEL-NEXT: s_cselect_b32 s2, s5, s2
+; VI-GISEL-NEXT: s_lshr_b32 s3, s3, 16
+; VI-GISEL-NEXT: s_and_b32 s3, s3, 0x8000
+; VI-GISEL-NEXT: s_or_b32 s2, s3, s2
+; VI-GISEL-NEXT: v_mov_b32_e32 v0, s2
+; VI-GISEL-NEXT: s_mov_b32 s2, -1
+; VI-GISEL-NEXT: s_mov_b32 s3, 0xf000
+; VI-GISEL-NEXT: buffer_store_short v0, off, s[0:3], 0
+; VI-GISEL-NEXT: s_endpgm
;
; VI-UNSAFE-SDAG-LABEL: fptrunc_f64_to_f16:
; VI-UNSAFE-SDAG: ; %bb.0:
@@ -265,17 +342,6 @@ define amdgpu_kernel void @fptrunc_f64_to_f16(ptr addrspace(1) %out, double %in)
; VI-UNSAFE-SDAG-NEXT: buffer_store_short v0, off, s[0:3], 0
; VI-UNSAFE-SDAG-NEXT: s_endpgm
;
-; VI-UNSAFE-GISEL-LABEL: fptrunc_f64_to_f16:
-; VI-UNSAFE-GISEL: ; %bb.0:
-; VI-UNSAFE-GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
-; VI-UNSAFE-GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; VI-UNSAFE-GISEL-NEXT: v_cvt_f32_f64_e32 v0, s[2:3]
-; VI-UNSAFE-GISEL-NEXT: s_mov_b32 s2, -1
-; VI-UNSAFE-GISEL-NEXT: s_mov_b32 s3, 0xf000
-; VI-UNSAFE-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0
-; VI-UNSAFE-GISEL-NEXT: buffer_store_short v0, off, s[0:3], 0
-; VI-UNSAFE-GISEL-NEXT: s_endpgm
-;
; GFX10-SAFE-SDAG-LABEL: fptrunc_f64_to_f16:
; GFX10-SAFE-SDAG: ; %bb.0:
; GFX10-SAFE-SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
@@ -328,56 +394,56 @@ define amdgpu_kernel void @fptrunc_f64_to_f16(ptr addrspace(1) %out, double %in)
; GFX10-SAFE-SDAG-NEXT: buffer_store_short v0, off, s[0:3], 0
; GFX10-SAFE-SDAG-NEXT: s_endpgm
;
-; GFX10-SAFE-GISEL-LABEL: fptrunc_f64_to_f16:
-; GFX10-SAFE-GISEL: ; %bb.0:
-; GFX10-SAFE-GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
-; GFX10-SAFE-GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX10-SAFE-GISEL-NEXT: s_and_b32 s6, s3, 0x1ff
-; GFX10-SAFE-GISEL-NEXT: s_bfe_u32 s4, s3, 0xb0014
-; GFX10-SAFE-GISEL-NEXT: s_lshr_b32 s5, s3, 8
-; GFX10-SAFE-GISEL-NEXT: s_or_b32 s2, s6, s2
-; GFX10-SAFE-GISEL-NEXT: s_addk_i32 s4, 0xfc10
-; GFX10-SAFE-GISEL-NEXT: s_and_b32 s5, s5, 0xffe
-; GFX10-SAFE-GISEL-NEXT: s_cmp_lg_u32 s2, 0
-; GFX10-SAFE-GISEL-NEXT: s_cselect_b32 s2, 1, 0
-; GFX10-SAFE-GISEL-NEXT: s_or_b32 s2, s5, s2
-; GFX10-SAFE-GISEL-NEXT: s_cmp_lg_u32 s2, 0
-; GFX10-SAFE-GISEL-NEXT: s_cselect_b32 s5, 1, 0
-; GFX10-SAFE-GISEL-NEXT: s_sub_i32 s6, 1, s4
-; GFX10-SAFE-GISEL-NEXT: s_or_b32 s8, s2, 0x1000
-; GFX10-SAFE-GISEL-NEXT: s_max_i32 s6, s6, 0
-; GFX10-SAFE-GISEL-NEXT: s_lshl_b32 s7, s4, 12
-; GFX10-SAFE-GISEL-NEXT: s_min_i32 s6, s6, 13
-; GFX10-SAFE-GISEL-NEXT: s_lshl_b32 s5, s5, 9
-; GFX10-SAFE-GISEL-NEXT: s_lshr_b32 s9, s8, s6
-; GFX10-SAFE-GISEL-NEXT: s_or_b32 s2, s2, s7
-; GFX10-SAFE-GISEL-NEXT: s_lshl_b32 s6, s9, s6
-; GFX10-SAFE-GISEL-NEXT: s_or_b32 s5, s5, 0x7c00
-; GFX10-SAFE-GISEL-NEXT: s_cmp_lg_u32 s6, s8
-; GFX10-SAFE-GISEL-NEXT: s_cselect_b32 s6, 1, 0
-; GFX10-SAFE-GISEL-NEXT: s_or_b32 s6, s9, s6
-; GFX10-SAFE-GISEL-NEXT: s_cmp_lt_i32 s4, 1
-; GFX10-SAFE-GISEL-NEXT: s_cselect_b32 s2, s6, s2
-; GFX10-SAFE-GISEL-NEXT: s_and_b32 s6, s2, 7
-; GFX10-SAFE-GISEL-NEXT: s_lshr_b32 s2, s2, 2
-; GFX10-SAFE-GISEL-NEXT: s_cmp_eq_u32 s6, 3
-; GFX10-SAFE-GISEL-NEXT: s_cselect_b32 s7, 1, 0
-; GFX10-SAFE-GISEL-NEXT: s_cmp_gt_i32 s6, 5
-; GFX10-SAFE-GISEL-NEXT: s_cselect_b32 s6, 1, 0
-; GFX10-SAFE-GISEL-NEXT: s_or_b32 s6, s7, s6
-; GFX10-SAFE-GISEL-NEXT: s_add_i32 s2, s2, s6
-; GFX10-SAFE-GISEL-NEXT: s_cmp_gt_i32 s4, 30
-; GFX10-SAFE-GISEL-NEXT: s_cselect_b32 s2, 0x7c00, s2
-; GFX10-SAFE-GISEL-NEXT: s_cmpk_eq_i32 s4, 0x40f
-; GFX10-SAFE-GISEL-NEXT: s_cselect_b32 s2, s5, s2
-; GFX10-SAFE-GISEL-NEXT: s_lshr_b32 s3, s3, 16
-; GFX10-SAFE-GISEL-NEXT: s_and_b32 s3, s3, 0x8000
-; GFX10-SAFE-GISEL-NEXT: s_or_b32 s2, s3, s2
-; GFX10-SAFE-GISEL-NEXT: s_mov_b32 s3, 0x31016000
-; GFX10-SAFE-GISEL-NEXT: v_mov_b32_e32 v0, s2
-; GFX10-SAFE-GISEL-NEXT: s_mov_b32 s2, -1
-; GFX10-SAFE-GISEL-NEXT: buffer_store_short v0, off, s[0:3], 0
-; GFX10-SAFE-GISEL-NEXT: s_endpgm
+; GFX10-GISEL-LABEL: fptrunc_f64_to_f16:
+; GFX10-GISEL: ; %bb.0:
+; GFX10-GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
+; GFX10-GISEL-NEXT: s_waitcnt lgkmcnt(0)
+; GFX10-GISEL-NEXT: s_and_b32 s6, s3, 0x1ff
+; GFX10-GISEL-NEXT: s_bfe_u32 s4, s3, 0xb0014
+; GFX10-GISEL-NEXT: s_lshr_b32 s5, s3, 8
+; GFX10-GISEL-NEXT: s_or_b32 s2, s6, s2
+; GFX10-GISEL-NEXT: s_addk_i32 s4, 0xfc10
+; GFX10-GISEL-NEXT: s_and_b32 s5, s5, 0xffe
+; GFX10-GISEL-NEXT: s_cmp_lg_u32 s2, 0
+; GFX10-GISEL-NEXT: s_cselect_b32 s2, 1, 0
+; GFX10-GISEL-NEXT: s_or_b32 s2, s5, s2
+; GFX10-GISEL-NEXT: s_cmp_lg_u32 s2, 0
+; GFX10-GISEL-NEXT: s_cselect_b32 s5, 1, 0
+; GFX10-GISEL-NEXT: s_sub_i32 s6, 1, s4
+; GFX10-GISEL-NEXT: s_or_b32 s8, s2, 0x1000
+; GFX10-GISEL-NEXT: s_max_i32 s6, s6, 0
+; GFX10-GISEL-NEXT: s_lshl_b32 s7, s4, 12
+; GFX10-GISEL-NEXT: s_min_i32 s6, s6, 13
+; GFX10-GISEL-NEXT: s_lshl_b32 s5, s5, 9
+; GFX10-GISEL-NEXT: s_lshr_b32 s9, s8, s6
+; GFX10-GISEL-NEXT: s_or_b32 s2, s2, s7
+; GFX10-GISEL-NEXT: s_lshl_b32 s6, s9, s6
+; GFX10-GISEL-NEXT: s_or_b32 s5, s5, 0x7c00
+; GFX10-GISEL-NEXT: s_cmp_lg_u32 s6, s8
+; GFX10-GISEL-NEXT: s_cselect_b32 s6, 1, 0
+; GFX10-GISEL-NEXT: s_or_b32 s6, s9, s6
+; GFX10-GISEL-NEXT: s_cmp_lt_i32 s4, 1
+; GFX10-GISEL-NEXT: s_cselect_b32 s2, s6, s2
+; GFX10-GISEL-NEXT: s_and_b32 s6, s2, 7
+; GFX10-GISEL-NEXT: s_lshr_b32 s2, s2, 2
+; GFX10-GISEL-NEXT: s_cmp_eq_u32 s6, 3
+; GFX10-GISEL-NEXT: s_cselect_b32 s7, 1, 0
+; GFX10-GISEL-NEXT: s_cmp_gt_i32 s6, 5
+; GFX10-GISEL-NEXT: s_cselect_b32 s6, 1, 0
+; GFX10-GISEL-NEXT: s_or_b32 s6, s7, s6
+; GFX10-GISEL-NEXT: s_add_i32 s2, s2, s6
+; GFX10-GISEL-NEXT: s_cmp_gt_i32 s4, 30
+; GFX10-GISEL-NEXT: s_cselect_b32 s2, 0x7c00, s2
+; GFX10-GISEL-NEXT: s_cmpk_eq_i32 s4, 0x40f
+; GFX10-GISEL-NEXT: s_cselect_b32 s2, s5, s2
+; GFX10-GISEL-NEXT: s_lshr_b32 s3, s3, 16
+; GFX10-GISEL-NEXT: s_and_b32 s3, s3, 0x8000
+; GFX10-GISEL-NEXT: s_or_b32 s2, s3, s2
+; GFX10-GISEL-NEXT: s_mov_b32 s3, 0x31016000
+; GFX10-GISEL-NEXT: v_mov_b32_e32 v0, s2
+; GFX10-GISEL-NEXT: s_mov_b32 s2, -1
+; GFX10-GISEL-NEXT: buffer_store_short v0, off, s[0:3], 0
+; GFX10-GISEL-NEXT: s_endpgm
;
; GFX10-UNSAFE-SDAG-LABEL: fptrunc_f64_to_f16:
; GFX10-UNSAFE-SDAG: ; %bb.0:
@@ -390,17 +456,6 @@ define amdgpu_kernel void @fptrunc_f64_to_f16(ptr addrspace(1) %out, double %in)
; GFX10-UNSAFE-SDAG-NEXT: buffer_store_short v0, off, s[0:3], 0
; GFX10-UNSAFE-SDAG-NEXT: s_endpgm
;
-; GFX10-UNSAFE-GISEL-LABEL: fptrunc_f64_to_f16:
-; GFX10-UNSAFE-GISEL: ; %bb.0:
-; GFX10-UNSAFE-GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
-; GFX10-UNSAFE-GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX10-UNSAFE-GISEL-NEXT: v_cvt_f32_f64_e32 v0, s[2:3]
-; GFX10-UNSAFE-GISEL-NEXT: s_mov_b32 s2, -1
-; GFX10-UNSAFE-GISEL-NEXT: s_mov_b32 s3, 0x31016000
-; GFX10-UNSAFE-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0
-; GFX10-UNSAFE-GISEL-NEXT: buffer_store_short v0, off, s[0:3], 0
-; GFX10-UNSAFE-GISEL-NEXT: s_endpgm
-;
; GFX11-SAFE-SDAG-LABEL: fptrunc_f64_to_f16:
; GFX11-SAFE-SDAG: ; %bb.0:
; GFX11-SAFE-SDAG-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
@@ -461,62 +516,368 @@ define amdgpu_kernel void @fptrunc_f64_to_f16(ptr addrspace(1) %out, double %in)
; GFX11-SAFE-SDAG-NEXT: buffer_store_b16 v0, off, s[0:3], 0
; GFX11-SAFE-SDAG-NEXT: s_endpgm
;
-; GFX11-SAFE-GISEL-LABEL: fptrunc_f64_to_f16:
+; GFX11-GISEL-LABEL: fptrunc_f64_to_f16:
+; GFX11-GISEL: ; %bb.0:
+; GFX11-GISEL-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
+; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-GISEL-NEXT: s_and_b32 s6, s3, 0x1ff
+; GFX11-GISEL-NEXT: s_bfe_u32 s4, s3, 0xb0014
+; GFX11-GISEL-NEXT: s_lshr_b32 s5, s3, 8
+; GFX11-GISEL-NEXT: s_or_b32 s2, s6, s2
+; GFX11-GISEL-NEXT: s_addk_i32 s4, 0xfc10
+; GFX11-GISEL-NEXT: s_and_b32 s5, s5, 0xffe
+; GFX11-GISEL-NEXT: s_cmp_lg_u32 s2, 0
+; GFX11-GISEL-NEXT: s_cselect_b32 s2, 1, 0
+; GFX11-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
+; GFX11-GISEL-NEXT: s_or_b32 s2, s5, s2
+; GFX11-GISEL-NEXT: s_cmp_lg_u32 s2, 0
+; GFX11-GISEL-NEXT: s_cselect_b32 s5, 1, 0
+; GFX11-GISEL-NEXT: s_sub_i32 s6, 1, s4
+; GFX11-GISEL-NEXT: s_or_b32 s8, s2, 0x1000
+; GFX11-GISEL-NEXT: s_max_i32 s6, s6, 0
+; GFX11-GISEL-NEXT: s_lshl_b32 s7, s4, 12
+; GFX11-GISEL-NEXT: s_min_i32 s6, s6, 13
+; GFX11-GISEL-NEXT: s_lshl_b32 s5, s5, 9
+; GFX11-GISEL-NEXT: s_lshr_b32 s9, s8, s6
+; GFX11-GISEL-NEXT: s_or_b32 s2, s2, s7
+; GFX11-GISEL-NEXT: s_lshl_b32 s6, s9, s6
+; GFX11-GISEL-NEXT: s_or_b32 s5, s5, 0x7c00
+; GFX11-GISEL-NEXT: s_cmp_lg_u32 s6, s8
+; GFX11-GISEL-NEXT: s_cselect_b32 s6, 1, 0
+; GFX11-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_2) | instid1(SALU_CYCLE_1)
+; GFX11-GISEL-NEXT: s_or_b32 s6, s9, s6
+; GFX11-GISEL-NEXT: s_cmp_lt_i32 s4, 1
+; GFX11-GISEL-NEXT: s_cselect_b32 s2, s6, s2
+; GFX11-GISEL-NEXT: s_and_b32 s6, s2, 7
+; GFX11-GISEL-NEXT: s_lshr_b32 s2, s2, 2
+; GFX11-GISEL-NEXT: s_cmp_eq_u32 s6, 3
+; GFX11-GISEL-NEXT: s_cselect_b32 s7, 1, 0
+; GFX11-GISEL-NEXT: s_cmp_gt_i32 s6, 5
+; GFX11-GISEL-NEXT: s_cselect_b32 s6, 1, 0
+; GFX11-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
+; GFX11-GISEL-NEXT: s_or_b32 s6, s7, s6
+; GFX11-GISEL-NEXT: s_add_i32 s2, s2, s6
+; GFX11-GISEL-NEXT: s_cmp_gt_i32 s4, 30
+; GFX11-GISEL-NEXT: s_cselect_b32 s2, 0x7c00, s2
+; GFX11-GISEL-NEXT: s_cmpk_eq_i32 s4, 0x40f
+; GFX11-GISEL-NEXT: s_cselect_b32 s2, s5, s2
+; GFX11-GISEL-NEXT: s_lshr_b32 s3, s3, 16
+; GFX11-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
+; GFX11-GISEL-NEXT: s_and_b32 s3, s3, 0x8000
+; GFX11-GISEL-NEXT: s_or_b32 s2, s3, s2
+; GFX11-GISEL-NEXT: s_mov_b32 s3, 0x31016000
+; GFX11-GISEL-NEXT: v_mov_b32_e32 v0, s2
+; GFX11-GISEL-NEXT: s_mov_b32 s2, -1
+; GFX11-GISEL-NEXT: buffer_store_b16 v0, off, s[0:3], 0
+; GFX11-GISEL-NEXT: s_endpgm
+;
+; GFX11-UNSAFE-DAG-TRUE16-LABEL: fptrunc_f64_to_f16:
+; GFX11-UNSAFE-DAG-TRUE16: ; %bb.0:
+; GFX11-UNSAFE-DAG-TRUE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
+; GFX11-UNSAFE-DAG-TRUE16-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-UNSAFE-DAG-TRUE16-NEXT: v_cvt_f32_f64_e32 v0, s[2:3]
+; GFX11-UNSAFE-DAG-TRUE16-NEXT: s_mov_b32 s3, 0x31016000
+; GFX11-UNSAFE-DAG-TRUE16-NEXT: s_mov_b32 s2, -1
+; GFX11-UNSAFE-DAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-UNSAFE-DAG-TRUE16-NEXT: v_cvt_f16_f32_e32 v0.l, v0
+; GFX11-UNSAFE-DAG-TRUE16-NEXT: buffer_store_b16 v0, off, s[0:3], 0
+; GFX11-UNSAFE-DAG-TRUE16-NEXT: s_endpgm
+;
+; GFX11-UNSAFE-DAG-FAKE16-LABEL: fptrunc_f64_to_f16:
+; GFX11-UNSAFE-DAG-FAKE16: ; %bb.0:
+; GFX11-UNSAFE-DAG-FAKE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
+; GFX11-UNSAFE-DAG-FAKE16-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-UNSAFE-DAG-FAKE16-NEXT: v_cvt_f32_f64_e32 v0, s[2:3]
+; GFX11-UNSAFE-DAG-FAKE16-NEXT: s_mov_b32 s3, 0x31016000
+; GFX11-UNSAFE-DAG-FAKE16-NEXT: s_mov_b32 s2, -1
+; GFX11-UNSAFE-DAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-UNSAFE-DAG-FAKE16-NEXT: v_cvt_f16_f32_e32 v0, v0
+; GFX11-UNSAFE-DAG-FAKE16-NEXT: buffer_store_b16 v0, off, s[0:3], 0
+; GFX11-UNSAFE-DAG-FAKE16-NEXT: s_endpgm
+ %result = fptrunc double %in to half
+ %result_i16 = bitcast half %result to i16
+ store i16 %result_i16, ptr addrspace(1) %out
+ ret void
+}
+
+define amdgpu_kernel void @fptrunc_f64_to_f16_afn(ptr addrspace(1) %out, double %in) {
+; SI-LABEL: fptrunc_f64_to_f16_afn:
+; SI: ; %bb.0:
+; SI-NEXT: s_load_dwordx4 s[4:7], s[4:5], 0x9
+; SI-NEXT: s_mov_b32 s3, 0xf000
+; SI-NEXT: s_movk_i32 s2, 0x7e00
+; SI-NEXT: s_waitcnt lgkmcnt(0)
+; SI-NEXT: s_lshr_b32 s0, s7, 8
+; SI-NEXT: s_and_b32 s1, s7, 0x1ff
+; SI-NEXT: s_and_b32 s8, s0, 0xffe
+; SI-NEXT: s_or_b32 s0, s1, s6
+; SI-NEXT: s_cmp_lg_u32 s0, 0
+; SI-NEXT: s_cselect_b64 s[0:1], -1, 0
+; SI-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[0:1]
+; SI-NEXT: s_bfe_u32 s0, s7, 0xb0014
+; SI-NEXT: v_readfirstlane_b32 s1, v0
+; SI-NEXT: s_sub_i32 s6, 0x3f1, s0
+; SI-NEXT: s_or_b32 s1, s8, s1
+; SI-NEXT: v_med3_i32 v0, s6, 0, 13
+; SI-NEXT: s_or_b32 s6, s1, 0x1000
+; SI-NEXT: v_readfirstlane_b32 s8, v0
+; SI-NEXT: s_lshr_b32 s9, s6, s8
+; SI-NEXT: s_lshl_b32 s8, s9, s8
+; SI-NEXT: s_cmp_lg_u32 s8, s6
+; SI-NEXT: s_cselect_b32 s6, 1, 0
+; SI-NEXT: s_addk_i32 s0, 0xfc10
+; SI-NEXT: s_or_b32 s6, s9, s6
+; SI-NEXT: s_lshl_b32 s8, s0, 12
+; SI-NEXT: s_or_b32 s8, s1, s8
+; SI-NEXT: s_cmp_lt_i32 s0, 1
+; SI-NEXT: s_cselect_b32 s6, s6, s8
+; SI-NEXT: s_and_b32 s8, s6, 7
+; SI-NEXT: s_cmp_gt_i32 s8, 5
+; SI-NEXT: s_cselect_b32 s9, 1, 0
+; SI-NEXT: s_cmp_eq_u32 s8, 3
+; SI-NEXT: s_cselect_b32 s8, 1, 0
+; SI-NEXT: s_lshr_b32 s6, s6, 2
+; SI-NEXT: s_or_b32 s8, s8, s9
+; SI-NEXT: s_add_i32 s6, s6, s8
+; SI-NEXT: s_cmp_lt_i32 s0, 31
+; SI-NEXT: s_cselect_b32 s6, s6, 0x7c00
+; SI-NEXT: s_cmp_lg_u32 s1, 0
+; SI-NEXT: s_cselect_b32 s1, s2, 0x7c00
+; SI-NEXT: s_cmpk_eq_i32 s0, 0x40f
+; SI-NEXT: s_cselect_b32 s0, s1, s6
+; SI-NEXT: s_lshr_b32 s1, s7, 16
+; SI-NEXT: s_and_b32 s1, s1, 0x8000
+; SI-NEXT: s_or_b32 s6, s1, s0
+; SI-NEXT: s_mov_b32 s2, -1
+; SI-NEXT: s_mov_b32 s0, s4
+; SI-NEXT: s_mov_b32 s1, s5
+; SI-NEXT: v_mov_b32_e32 v0, s6
+; SI-NEXT: buffer_store_short v0, off, s[0:3], 0
+; SI-NEXT: s_endpgm
+;
+; VI-SAFE-SDAG-LABEL: fptrunc_f64_to_f16_afn:
+; VI-SAFE-SDAG: ; %bb.0:
+; VI-SAFE-SDAG-NEXT: s_load_dwordx4 s[4:7], s[4:5], 0x24
+; VI-SAFE-SDAG-NEXT: s_mov_b32 s3, 0xf000
+; VI-SAFE-SDAG-NEXT: s_mov_b32 s2, -1
+; VI-SAFE-SDAG-NEXT: s_waitcnt lgkmcnt(0)
+; VI-SAFE-SDAG-NEXT: s_mov_b32 s0, s4
+; VI-SAFE-SDAG-NEXT: s_lshr_b32 s4, s7, 8
+; VI-SAFE-SDAG-NEXT: s_and_b32 s8, s4, 0xffe
+; VI-SAFE-SDAG-NEXT: s_and_b32 s4, s7, 0x1ff
+; VI-SAFE-SDAG-NEXT: s_or_b32 s4, s4, s6
+; VI-SAFE-SDAG-NEXT: s_cmp_lg_u32 s4, 0
+; VI-SAFE-SDAG-NEXT: s_mov_b32 s1, s5
+; VI-SAFE-SDAG-NEXT: s_cselect_b64 s[4:5], -1, 0
+; VI-SAFE-SDAG-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5]
+; VI-SAFE-SDAG-NEXT: v_readfirstlane_b32 s4, v0
+; VI-SAFE-SDAG-NEXT: s_bfe_u32 s6, s7, 0xb0014
+; VI-SAFE-SDAG-NEXT: s_or_b32 s4, s8, s4
+; VI-SAFE-SDAG-NEXT: s_sub_i32 s8, 0x3f1, s6
+; VI-SAFE-SDAG-NEXT: v_med3_i32 v0, s8, 0, 13
+; VI-SAFE-SDAG-NEXT: s_or_b32 s5, s4, 0x1000
+; VI-SAFE-SDAG-NEXT: v_readfirstlane_b32 s8, v0
+; VI-SAFE-SDAG-NEXT: s_lshr_b32 s9, s5, s8
+; VI-SAFE-SDAG-NEXT: s_lshl_b32 s8, s9, s8
+; VI-SAFE-SDAG-NEXT: s_cmp_lg_u32 s8, s5
+; VI-SAFE-SDAG-NEXT: s_cselect_b32 s5, 1, 0
+; VI-SAFE-SDAG-NEXT: s_addk_i32 s6, 0xfc10
+; VI-SAFE-SDAG-NEXT: s_lshl_b32 s8, s6, 12
+; VI-SAFE-SDAG-NEXT: s_or_b32 s5, s9, s5
+; VI-SAFE-SDAG-NEXT: s_or_b32 s8, s4, s8
+; VI-SAFE-SDAG-NEXT: s_cmp_lt_i32 s6, 1
+; VI-SAFE-SDAG-NEXT: s_cselect_b32 s5, s5, s8
+; VI-SAFE-SDAG-NEXT: s_and_b32 s8, s5, 7
+; VI-SAFE-SDAG-NEXT: s_cmp_gt_i32 s8, 5
+; VI-SAFE-SDAG-NEXT: s_cselect_b32 s9, 1, 0
+; VI-SAFE-SDAG-NEXT: s_cmp_eq_u32 s8, 3
+; VI-SAFE-SDAG-NEXT: s_cselect_b32 s8, 1, 0
+; VI-SAFE-SDAG-NEXT: s_or_b32 s8, s8, s9
+; VI-SAFE-SDAG-NEXT: s_lshr_b32 s5, s5, 2
+; VI-SAFE-SDAG-NEXT: s_add_i32 s5, s5, s8
+; VI-SAFE-SDAG-NEXT: s_cmp_lt_i32 s6, 31
+; VI-SAFE-SDAG-NEXT: s_cselect_b32 s5, s5, 0x7c00
+; VI-SAFE-SDAG-NEXT: s_cmp_lg_u32 s4, 0
+; VI-SAFE-SDAG-NEXT: s_movk_i32 s4, 0x7e00
+; VI-SAFE-SDAG-NEXT: s_cselect_b32 s4, s4, 0x7c00
+; VI-SAFE-SDAG-NEXT: s_cmpk_eq_i32 s6, 0x40f
+; VI-SAFE-SDAG-NEXT: s_cselect_b32 s4, s4, s5
+; VI-SAFE-SDAG-NEXT: s_lshr_b32 s5, s7, 16
+; VI-SAFE-SDAG-NEXT: s_and_b32 s5, s5, 0x8000
+; VI-SAFE-SDAG-NEXT: s_or_b32 s4, s5, s4
+; VI-SAFE-SDAG-NEXT: v_mov_b32_e32 v0, s4
+; VI-SAFE-SDAG-NEXT: buffer_store_short v0, off, s[0:3], 0
+; VI-SAFE-SDAG-NEXT: s_endpgm
+;
+; VI-GISEL-LABEL: fptrunc_f64_to_f16_afn:
+; VI-GISEL: ; %bb.0:
+; VI-GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
+; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0)
+; VI-GISEL-NEXT: v_cvt_f32_f64_e32 v0, s[2:3]
+; VI-GISEL-NEXT: s_mov_b32 s2, -1
+; VI-GISEL-NEXT: s_mov_b32 s3, 0xf000
+; VI-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0
+; VI-GISEL-NEXT: buffer_store_short v0, off, s[0:3], 0
+; VI-GISEL-NEXT: s_endpgm
+;
+; VI-UNSAFE-SDAG-LABEL: fptrunc_f64_to_f16_afn:
+; VI-UNSAFE-SDAG: ; %bb.0:
+; VI-UNSAFE-SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
+; VI-UNSAFE-SDAG-NEXT: s_waitcnt lgkmcnt(0)
+; VI-UNSAFE-SDAG-NEXT: v_cvt_f32_f64_e32 v0, s[2:3]
+; VI-UNSAFE-SDAG-NEXT: s_mov_b32 s3, 0xf000
+; VI-UNSAFE-SDAG-NEXT: s_mov_b32 s2, -1
+; VI-UNSAFE-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
+; VI-UNSAFE-SDAG-NEXT: buffer_store_short v0, off, s[0:3], 0
+; VI-UNSAFE-SDAG-NEXT: s_endpgm
+;
+; GFX10-SAFE-SDAG-LABEL: fptrunc_f64_to_f16_afn:
+; GFX10-SAFE-SDAG: ; %bb.0:
+; GFX10-SAFE-SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
+; GFX10-SAFE-SDAG-NEXT: s_waitcnt lgkmcnt(0)
+; GFX10-SAFE-SDAG-NEXT: s_and_b32 s4, s3, 0x1ff
+; GFX10-SAFE-SDAG-NEXT: s_lshr_b32 s5, s3, 8
+; GFX10-SAFE-SDAG-NEXT: s_or_b32 s2, s4, s2
+; GFX10-SAFE-SDAG-NEXT: s_and_b32 s4, s5, 0xffe
+; GFX10-SAFE-SDAG-NEXT: s_cmp_lg_u32 s2, 0
+; GFX10-SAFE-SDAG-NEXT: s_cselect_b32 s2, -1, 0
+; GFX10-SAFE-SDAG-NEXT: v_cndmask_b32_e64 v0, 0, 1, s2
+; GFX10-SAFE-SDAG-NEXT: s_bfe_u32 s2, s3, 0xb0014
+; GFX10-SAFE-SDAG-NEXT: s_sub_i32 s5, 0x3f1, s2
+; GFX10-SAFE-SDAG-NEXT: v_med3_i32 v1, s5, 0, 13
+; GFX10-SAFE-SDAG-NEXT: v_readfirstlane_b32 s5, v0
+; GFX10-SAFE-SDAG-NEXT: v_readfirstlane_b32 s6, v1
+; GFX10-SAFE-SDAG-NEXT: s_or_b32 s4, s4, s5
+; GFX10-SAFE-SDAG-NEXT: s_or_b32 s5, s4, 0x1000
+; GFX10-SAFE-SDAG-NEXT: s_lshr_b32 s7, s5, s6
+; GFX10-SAFE-SDAG-NEXT: s_lshl_b32 s6, s7, s6
+; GFX10-SAFE-SDAG-NEXT: s_cmp_lg_u32 s6, s5
+; GFX10-SAFE-SDAG-NEXT: s_cselect_b32 s5, 1, 0
+; GFX10-SAFE-SDAG-NEXT: s_addk_i32 s2, 0xfc10
+; GFX10-SAFE-SDAG-NEXT: s_or_b32 s5, s7, s5
+; GFX10-SAFE-SDAG-NEXT: s_lshl_b32 s6, s2, 12
+; GFX10-SAFE-SDAG-NEXT: s_or_b32 s6, s4, s6
+; GFX10-SAFE-SDAG-NEXT: s_cmp_lt_i32 s2, 1
+; GFX10-SAFE-SDAG-NEXT: s_cselect_b32 s5, s5, s6
+; GFX10-SAFE-SDAG-NEXT: s_and_b32 s6, s5, 7
+; GFX10-SAFE-SDAG-NEXT: s_cmp_gt_i32 s6, 5
+; GFX10-SAFE-SDAG-NEXT: s_cselect_b32 s7, 1, 0
+; GFX10-SAFE-SDAG-NEXT: s_cmp_eq_u32 s6, 3
+; GFX10-SAFE-SDAG-NEXT: s_cselect_b32 s6, 1, 0
+; GFX10-SAFE-SDAG-NEXT: s_lshr_b32 s5, s5, 2
+; GFX10-SAFE-SDAG-NEXT: s_or_b32 s6, s6, s7
+; GFX10-SAFE-SDAG-NEXT: s_add_i32 s5, s5, s6
+; GFX10-SAFE-SDAG-NEXT: s_cmp_lt_i32 s2, 31
+; GFX10-SAFE-SDAG-NEXT: s_movk_i32 s6, 0x7e00
+; GFX10-SAFE-SDAG-NEXT: s_cselect_b32 s5, s5, 0x7c00
+; GFX10-SAFE-SDAG-NEXT: s_cmp_lg_u32 s4, 0
+; GFX10-SAFE-SDAG-NEXT: s_cselect_b32 s4, s6, 0x7c00
+; GFX10-SAFE-SDAG-NEXT: s_cmpk_eq_i32 s2, 0x40f
+; GFX10-SAFE-SDAG-NEXT: s_cselect_b32 s2, s4, s5
+; GFX10-SAFE-SDAG-NEXT: s_lshr_b32 s3, s3, 16
+; GFX10-SAFE-SDAG-NEXT: s_and_b32 s3, s3, 0x8000
+; GFX10-SAFE-SDAG-NEXT: s_or_b32 s2, s3, s2
+; GFX10-SAFE-SDAG-NEXT: s_mov_b32 s3, 0x31016000
+; GFX10-SAFE-SDAG-NEXT: v_mov_b32_e32 v0, s2
+; GFX10-SAFE-SDAG-NEXT: s_mov_b32 s2, -1
+; GFX10-SAFE-SDAG-NEXT: buffer_store_short v0, off, s[0:3], 0
+; GFX10-SAFE-SDAG-NEXT: s_endpgm
+;
+; GFX10-GISEL-LABEL: fptrunc_f64_to_f16_afn:
+; GFX10-GISEL: ; %bb.0:
+; GFX10-GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
+; GFX10-GISEL-NEXT: s_waitcnt lgkmcnt(0)
+; GFX10-GISEL-NEXT: v_cvt_f32_f64_e32 v0, s[2:3]
+; GFX10-GISEL-NEXT: s_mov_b32 s2, -1
+; GFX10-GISEL-NEXT: s_mov_b32 s3, 0x31016000
+; GFX10-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0
+; GFX10-GISEL-NEXT: buffer_store_short v0, off, s[0:3], 0
+; GFX10-GISEL-NEXT: s_endpgm
+;
+; GFX10-UNSAFE-SDAG-LABEL: fptrunc_f64_to_f16_afn:
+; GFX10-UNSAFE-SDAG: ; %bb.0:
+; GFX10-UNSAFE-SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
+; GFX10-UNSAFE-SDAG-NEXT: s_waitcnt lgkmcnt(0)
+; GFX10-UNSAFE-SDAG-NEXT: v_cvt_f32_f64_e32 v0, s[2:3]
+; GFX10-UNSAFE-SDAG-NEXT: s_mov_b32 s3, 0x31016000
+; GFX10-UNSAFE-SDAG-NEXT: s_mov_b32 s2, -1
+; GFX10-UNSAFE-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0
+; GFX10-UNSAFE-SDAG-NEXT: buffer_store_short v0, off, s[0:3], 0
+; GFX10-UNSAFE-SDAG-NEXT: s_endpgm
+;
+; GFX11-SAFE-SDAG-LABEL: fptrunc_f64_to_f16_afn:
+; GFX11-SAFE-SDAG: ; %bb.0:
+; GFX11-SAFE-SDAG-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
+; GFX11-SAFE-SDAG-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-SAFE-SDAG-NEXT: s_and_b32 s4, s3, 0x1ff
+; GFX11-SAFE-SDAG-NEXT: s_lshr_b32 s5, s3, 8
+; GFX11-SAFE-SDAG-NEXT: s_or_b32 s2, s4, s2
+; GFX11-SAFE-SDAG-NEXT: s_and_b32 s4, s5, 0xffe
+; GFX11-SAFE-SDAG-NEXT: s_cmp_lg_u32 s2, 0
+; GFX11-SAFE-SDAG-NEXT: s_cselect_b32 s2, -1, 0
+; GFX11-SAFE-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
+; GFX11-SAFE-SDAG-NEXT: v_cndmask_b32_e64 v0, 0, 1, s2
+; GFX11-SAFE-SDAG-NEXT: s_bfe_u32 s2, s3, 0xb0014
+; GFX11-SAFE-SDAG-NEXT: s_sub_i32 s5, 0x3f1, s2
+; GFX11-SAFE-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-SAFE-SDAG-NEXT: v_med3_i32 v1, s5, 0, 13
+; GFX11-SAFE-SDAG-NEXT: v_readfirstlane_b32 s5, v0
+; GFX11-SAFE-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(SALU_CYCLE_1)
+; GFX11-SAFE-SDAG-NEXT: v_readfirstlane_b32 s6, v1
+; GFX11-SAFE-SDAG-NEXT: s_or_b32 s4, s4, s5
+; GFX11-SAFE-SDAG-NEXT: s_or_b32 s5, s4, 0x1000
+; GFX11-SAFE-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
+; GFX11-SAFE-SDAG-NEXT: s_lshr_b32 s7, s5, s6
+; GFX11-SAFE-SDAG-NEXT: s_lshl_b32 s6, s7, s6
+; GFX11-SAFE-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_4) | instid1(SALU_CYCLE_1)
+; GFX11-SAFE-SDAG-NEXT: s_cmp_lg_u32 s6, s5
+; GFX11-SAFE-SDAG-NEXT: s_cselect_b32 s5, 1, 0
+; GFX11-SAFE-SDAG-NEXT: s_addk_i32 s2, 0xfc10
+; GFX11-SAFE-SDAG-NEXT: s_or_b32 s5, s7, s5
+; GFX11-SAFE-SDAG-NEXT: s_lshl_b32 s6, s2, 12
+; GFX11-SAFE-SDAG-NEXT: s_or_b32 s6, s4, s6
+; GFX11-SAFE-SDAG-NEXT: s_cmp_lt_i32 s2, 1
+; GFX11-SAFE-SDAG-NEXT: s_cselect_b32 s5, s5, s6
+; GFX11-SAFE-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
+; GFX11-SAFE-SDAG-NEXT: s_and_b32 s6, s5, 7
+; GFX11-SAFE-SDAG-NEXT: s_cmp_gt_i32 s6, 5
+; GFX11-SAFE-SDAG-NEXT: s_cselect_b32 s7, 1, 0
+; GFX11-SAFE-SDAG-NEXT: s_cmp_eq_u32 s6, 3
+; GFX11-SAFE-SDAG-NEXT: s_cselect_b32 s6, 1, 0
+; GFX11-SAFE-SDAG-NEXT: s_lshr_b32 s5, s5, 2
+; GFX11-SAFE-SDAG-NEXT: s_or_b32 s6, s6, s7
+; GFX11-SAFE-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
+; GFX11-SAFE-SDAG-NEXT: s_add_i32 s5, s5, s6
+; GFX11-SAFE-SDAG-NEXT: s_cmp_lt_i32 s2, 31
+; GFX11-SAFE-SDAG-NEXT: s_movk_i32 s6, 0x7e00
+; GFX11-SAFE-SDAG-NEXT: s_cselect_b32 s5, s5, 0x7c00
+; GFX11-SAFE-SDAG-NEXT: s_cmp_lg_u32 s4, 0
+; GFX11-SAFE-SDAG-NEXT: s_cselect_b32 s4, s6, 0x7c00
+; GFX11-SAFE-SDAG-NEXT: s_cmpk_eq_i32 s2, 0x40f
+; GFX11-SAFE-SDAG-NEXT: s_cselect_b32 s2, s4, s5
+; GFX11-SAFE-SDAG-NEXT: s_lshr_b32 s3, s3, 16
+; GFX11-SAFE-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
+; GFX11-SAFE-SDAG-NEXT: s_and_b32 s3, s3, 0x8000
+; GFX11-SAFE-SDAG-NEXT: s_or_b32 s2, s3, s2
+; GFX11-SAFE-SDAG-NEXT: s_mov_b32 s3, 0x31016000
+; GFX11-SAFE-SDAG-NEXT: v_mov_b32_e32 v0, s2
+; GFX11-SAFE-SDAG-NEXT: s_mov_b32 s2, -1
+; GFX11-SAFE-SDAG-NEXT: buffer_store_b16 v0, off, s[0:3], 0
+; GFX11-SAFE-SDAG-NEXT: s_endpgm
+;
+; GFX11-SAFE-GISEL-LABEL: fptrunc_f64_to_f16_afn:
; GFX11-SAFE-GISEL: ; %bb.0:
; GFX11-SAFE-GISEL-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
; GFX11-SAFE-GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GFX11-SAFE-GISEL-NEXT: s_and_b32 s6, s3, 0x1ff
-; GFX11-SAFE-GISEL-NEXT: s_bfe_u32 s4, s3, 0xb0014
-; GFX11-SAFE-GISEL-NEXT: s_lshr_b32 s5, s3, 8
-; GFX11-SAFE-GISEL-NEXT: s_or_b32 s2, s6, s2
-; GFX11-SAFE-GISEL-NEXT: s_addk_i32 s4, 0xfc10
-; GFX11-SAFE-GISEL-NEXT: s_and_b32 s5, s5, 0xffe
-; GFX11-SAFE-GISEL-NEXT: s_cmp_lg_u32 s2, 0
-; GFX11-SAFE-GISEL-NEXT: s_cselect_b32 s2, 1, 0
-; GFX11-SAFE-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
-; GFX11-SAFE-GISEL-NEXT: s_or_b32 s2, s5, s2
-; GFX11-SAFE-GISEL-NEXT: s_cmp_lg_u32 s2, 0
-; GFX11-SAFE-GISEL-NEXT: s_cselect_b32 s5, 1, 0
-; GFX11-SAFE-GISEL-NEXT: s_sub_i32 s6, 1, s4
-; GFX11-SAFE-GISEL-NEXT: s_or_b32 s8, s2, 0x1000
-; GFX11-SAFE-GISEL-NEXT: s_max_i32 s6, s6, 0
-; GFX11-SAFE-GISEL-NEXT: s_lshl_b32 s7, s4, 12
-; GFX11-SAFE-GISEL-NEXT: s_min_i32 s6, s6, 13
-; GFX11-SAFE-GISEL-NEXT: s_lshl_b32 s5, s5, 9
-; GFX11-SAFE-GISEL-NEXT: s_lshr_b32 s9, s8, s6
-; GFX11-SAFE-GISEL-NEXT: s_or_b32 s2, s2, s7
-; GFX11-SAFE-GISEL-NEXT: s_lshl_b32 s6, s9, s6
-; GFX11-SAFE-GISEL-NEXT: s_or_b32 s5, s5, 0x7c00
-; GFX11-SAFE-GISEL-NEXT: s_cmp_lg_u32 s6, s8
-; GFX11-SAFE-GISEL-NEXT: s_cselect_b32 s6, 1, 0
-; GFX11-SAFE-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_2) | instid1(SALU_CYCLE_1)
-; GFX11-SAFE-GISEL-NEXT: s_or_b32 s6, s9, s6
-; GFX11-SAFE-GISEL-NEXT: s_cmp_lt_i32 s4, 1
-; GFX11-SAFE-GISEL-NEXT: s_cselect_b32 s2, s6, s2
-; GFX11-SAFE-GISEL-NEXT: s_and_b32 s6, s2, 7
-; GFX11-SAFE-GISEL-NEXT: s_lshr_b32 s2, s2, 2
-; GFX11-SAFE-GISEL-NEXT: s_cmp_eq_u32 s6, 3
-; GFX11-SAFE-GISEL-NEXT: s_cselect_b32 s7, 1, 0
-; GFX11-SAFE-GISEL-NEXT: s_cmp_gt_i32 s6, 5
-; GFX11-SAFE-GISEL-NEXT: s_cselect_b32 s6, 1, 0
-; GFX11-SAFE-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
-; GFX11-SAFE-GISEL-NEXT: s_or_b32 s6, s7, s6
-; GFX11-SAFE-GISEL-NEXT: s_add_i32 s2, s2, s6
-; GFX11-SAFE-GISEL-NEXT: s_cmp_gt_i32 s4, 30
-; GFX11-SAFE-GISEL-NEXT: s_cselect_b32 s2, 0x7c00, s2
-; GFX11-SAFE-GISEL-NEXT: s_cmpk_eq_i32 s4, 0x40f
-; GFX11-SAFE-GISEL-NEXT: s_cselect_b32 s2, s5, s2
-; GFX11-SAFE-GISEL-NEXT: s_lshr_b32 s3, s3, 16
-; GFX11-SAFE-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1)
-; GFX11-SAFE-GISEL-NEXT: s_and_b32 s3, s3, 0x8000
-; GFX11-SAFE-GISEL-NEXT: s_or_b32 s2, s3, s2
-; GFX11-SAFE-GISEL-NEXT: s_mov_b32 s3, 0x31016000
-; GFX11-SAFE-GISEL-NEXT: v_mov_b32_e32 v0, s2
+; GFX11-SAFE-GISEL-NEXT: v_cvt_f32_f64_e32 v0, s[2:3]
; GFX11-SAFE-GISEL-NEXT: s_mov_b32 s2, -1
+; GFX11-SAFE-GISEL-NEXT: s_mov_b32 s3, 0x31016000
+; GFX11-SAFE-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-SAFE-GISEL-NEXT: v_cvt_f16_f32_e32 v0.l, v0
; GFX11-SAFE-GISEL-NEXT: buffer_store_b16 v0, off, s[0:3], 0
; GFX11-SAFE-GISEL-NEXT: s_endpgm
;
-; GFX11-UNSAFE-DAG-TRUE16-LABEL: fptrunc_f64_to_f16:
+; GFX11-UNSAFE-DAG-TRUE16-LABEL: fptrunc_f64_to_f16_afn:
; GFX11-UNSAFE-DAG-TRUE16: ; %bb.0:
; GFX11-UNSAFE-DAG-TRUE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
; GFX11-UNSAFE-DAG-TRUE16-NEXT: s_waitcnt lgkmcnt(0)
@@ -528,7 +889,7 @@ define amdgpu_kernel void @fptrunc_f64_to_f16(ptr addrspace(1) %out, double %in)
; GFX11-UNSAFE-DAG-TRUE16-NEXT: buffer_store_b16 v0, off, s[0:3], 0
; GFX11-UNSAFE-DAG-TRUE16-NEXT: s_endpgm
;
-; GFX11-UNSAFE-DAG-FAKE16-LABEL: fptrunc_f64_to_f16:
+; GFX11-UNSAFE-DAG-FAKE16-LABEL: fptrunc_f64_to_f16_afn:
; GFX11-UNSAFE-DAG-FAKE16: ; %bb.0:
; GFX11-UNSAFE-DAG-FAKE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
; GFX11-UNSAFE-DAG-FAKE16-NEXT: s_waitcnt lgkmcnt(0)
@@ -540,7 +901,7 @@ define amdgpu_kernel void @fptrunc_f64_to_f16(ptr addrspace(1) %out, double %in)
; GFX11-UNSAFE-DAG-FAKE16-NEXT: buffer_store_b16 v0, off, s[0:3], 0
; GFX11-UNSAFE-DAG-FAKE16-NEXT: s_endpgm
;
-; GFX11-UNSAFE-GISEL-TRUE16-LABEL: fptrunc_f64_to_f16:
+; GFX11-UNSAFE-GISEL-TRUE16-LABEL: fptrunc_f64_to_f16_afn:
; GFX11-UNSAFE-GISEL-TRUE16: ; %bb.0:
; GFX11-UNSAFE-GISEL-TRUE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
; GFX11-UNSAFE-GISEL-TRUE16-NEXT: s_waitcnt lgkmcnt(0)
@@ -552,7 +913,7 @@ define amdgpu_kernel void @fptrunc_f64_to_f16(ptr addrspace(1) %out, double %in)
; GFX11-UNSAFE-GISEL-TRUE16-NEXT: buffer_store_b16 v0, off, s[0:3], 0
; GFX11-UNSAFE-GISEL-TRUE16-NEXT: s_endpgm
;
-; GFX11-UNSAFE-GISEL-FAKE16-LABEL: fptrunc_f64_to_f16:
+; GFX11-UNSAFE-GISEL-FAKE16-LABEL: fptrunc_f64_to_f16_afn:
; GFX11-UNSAFE-GISEL-FAKE16: ; %bb.0:
; GFX11-UNSAFE-GISEL-FAKE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
; GFX11-UNSAFE-GISEL-FAKE16-NEXT: s_waitcnt lgkmcnt(0)
@@ -563,7 +924,7 @@ define amdgpu_kernel void @fptrunc_f64_to_f16(ptr addrspace(1) %out, double %in)
; GFX11-UNSAFE-GISEL-FAKE16-NEXT: v_cvt_f16_f32_e32 v0, v0
; GFX11-UNSAFE-GISEL-FAKE16-NEXT: buffer_store_b16 v0, off, s[0:3], 0
; GFX11-UNSAFE-GISEL-FAKE16-NEXT: s_endpgm
- %result = fptrunc double %in to half
+ %result = fptrunc afn double %in to half
%result_i16 = bitcast half %result to i16
store i16 %result_i16, ptr addrspace(1) %out
ret void
@@ -662,6 +1023,99 @@ define amdgpu_kernel void @fptrunc_v2f64_to_v2f32(ptr addrspace(1) %out, <2 x do
ret void
}
+define amdgpu_kernel void @fptrunc_v2f64_to_v2f32_afn(ptr addrspace(1) %out, <2 x double> %in) {
+; SI-LABEL: fptrunc_v2f64_to_v2f32_afn:
+; SI: ; %bb.0:
+; SI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0xd
+; SI-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x9
+; SI-NEXT: s_mov_b32 s7, 0xf000
+; SI-NEXT: s_mov_b32 s6, -1
+; SI-NEXT: s_waitcnt lgkmcnt(0)
+; SI-NEXT: v_cvt_f32_f64_e32 v1, s[2:3]
+; SI-NEXT: v_cvt_f32_f64_e32 v0, s[0:1]
+; SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0
+; SI-NEXT: s_endpgm
+;
+; VI-SDAG-LABEL: fptrunc_v2f64_to_v2f32_afn:
+; VI-SDAG: ; %bb.0:
+; VI-SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x34
+; VI-SDAG-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x24
+; VI-SDAG-NEXT: s_mov_b32 s7, 0xf000
+; VI-SDAG-NEXT: s_mov_b32 s6, -1
+; VI-SDAG-NEXT: s_waitcnt lgkmcnt(0)
+; VI-SDAG-NEXT: v_cvt_f32_f64_e32 v1, s[2:3]
+; VI-SDAG-NEXT: v_cvt_f32_f64_e32 v0, s[0:1]
+; VI-SDAG-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0
+; VI-SDAG-NEXT: s_endpgm
+;
+; VI-GISEL-LABEL: fptrunc_v2f64_to_v2f32_afn:
+; VI-GISEL: ; %bb.0:
+; VI-GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x34
+; VI-GISEL-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x24
+; VI-GISEL-NEXT: s_mov_b32 s6, -1
+; VI-GISEL-NEXT: s_mov_b32 s7, 0xf000
+; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0)
+; VI-GISEL-NEXT: v_cvt_f32_f64_e32 v0, s[0:1]
+; VI-GISEL-NEXT: v_cvt_f32_f64_e32 v1, s[2:3]
+; VI-GISEL-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0
+; VI-GISEL-NEXT: s_endpgm
+;
+; GFX10-SDAG-LABEL: fptrunc_v2f64_to_v2f32_afn:
+; GFX10-SDAG: ; %bb.0:
+; GFX10-SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x34
+; GFX10-SDAG-NEXT: s_waitcnt lgkmcnt(0)
+; GFX10-SDAG-NEXT: v_cvt_f32_f64_e32 v1, s[2:3]
+; GFX10-SDAG-NEXT: v_cvt_f32_f64_e32 v0, s[0:1]
+; GFX10-SDAG-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX10-SDAG-NEXT: s_mov_b32 s3, 0x31016000
+; GFX10-SDAG-NEXT: s_mov_b32 s2, -1
+; GFX10-SDAG-NEXT: s_waitcnt lgkmcnt(0)
+; GFX10-SDAG-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
+; GFX10-SDAG-NEXT: s_endpgm
+;
+; GFX10-GISEL-LABEL: fptrunc_v2f64_to_v2f32_afn:
+; GFX10-GISEL: ; %bb.0:
+; GFX10-GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x34
+; GFX10-GISEL-NEXT: s_waitcnt lgkmcnt(0)
+; GFX10-GISEL-NEXT: v_cvt_f32_f64_e32 v0, s[0:1]
+; GFX10-GISEL-NEXT: v_cvt_f32_f64_e32 v1, s[2:3]
+; GFX10-GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX10-GISEL-NEXT: s_mov_b32 s2, -1
+; GFX10-GISEL-NEXT: s_mov_b32 s3, 0x31016000
+; GFX10-GISEL-NEXT: s_waitcnt lgkmcnt(0)
+; GFX10-GISEL-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
+; GFX10-GISEL-NEXT: s_endpgm
+;
+; GFX11-SDAG-LABEL: fptrunc_v2f64_to_v2f32_afn:
+; GFX11-SDAG: ; %bb.0:
+; GFX11-SDAG-NEXT: s_load_b128 s[0:3], s[4:5], 0x34
+; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-SDAG-NEXT: v_cvt_f32_f64_e32 v1, s[2:3]
+; GFX11-SDAG-NEXT: v_cvt_f32_f64_e32 v0, s[0:1]
+; GFX11-SDAG-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
+; GFX11-SDAG-NEXT: s_mov_b32 s3, 0x31016000
+; GFX11-SDAG-NEXT: s_mov_b32 s2, -1
+; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-SDAG-NEXT: buffer_store_b64 v[0:1], off, s[0:3], 0
+; GFX11-SDAG-NEXT: s_endpgm
+;
+; GFX11-GISEL-LABEL: fptrunc_v2f64_to_v2f32_afn:
+; GFX11-GISEL: ; %bb.0:
+; GFX11-GISEL-NEXT: s_load_b128 s[0:3], s[4:5], 0x34
+; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-GISEL-NEXT: v_cvt_f32_f64_e32 v0, s[0:1]
+; GFX11-GISEL-NEXT: v_cvt_f32_f64_e32 v1, s[2:3]
+; GFX11-GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
+; GFX11-GISEL-NEXT: s_mov_b32 s2, -1
+; GFX11-GISEL-NEXT: s_mov_b32 s3, 0x31016000
+; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-GISEL-NEXT: buffer_store_b64 v[0:1], off, s[0:3], 0
+; GFX11-GISEL-NEXT: s_endpgm
+ %result = fptrunc afn <2 x double> %in to <2 x float>
+ store <2 x float> %result, ptr addrspace(1) %out
+ ret void
+}
+
define amdgpu_kernel void @fptrunc_v3f64_to_v3f32(ptr addrspace(1) %out, <3 x double> %in) {
; SI-LABEL: fptrunc_v3f64_to_v3f32:
; SI: ; %bb.0:
@@ -769,6 +1223,113 @@ define amdgpu_kernel void @fptrunc_v3f64_to_v3f32(ptr addrspace(1) %out, <3 x do
ret void
}
+define amdgpu_kernel void @fptrunc_v3f64_to_v3f32_afn(ptr addrspace(1) %out, <3 x double> %in) {
+; SI-LABEL: fptrunc_v3f64_to_v3f32_afn:
+; SI: ; %bb.0:
+; SI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9
+; SI-NEXT: s_load_dwordx4 s[8:11], s[4:5], 0x11
+; SI-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x15
+; SI-NEXT: s_mov_b32 s3, 0xf000
+; SI-NEXT: s_mov_b32 s2, -1
+; SI-NEXT: s_waitcnt lgkmcnt(0)
+; SI-NEXT: v_cvt_f32_f64_e32 v1, s[10:11]
+; SI-NEXT: v_cvt_f32_f64_e32 v0, s[8:9]
+; SI-NEXT: v_cvt_f32_f64_e32 v2, s[4:5]
+; SI-NEXT: buffer_store_dword v2, off, s[0:3], 0 offset:8
+; SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0
+; SI-NEXT: s_endpgm
+;
+; VI-SDAG-LABEL: fptrunc_v3f64_to_v3f32_afn:
+; VI-SDAG: ; %bb.0:
+; VI-SDAG-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x54
+; VI-SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x44
+; VI-SDAG-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x24
+; VI-SDAG-NEXT: s_waitcnt lgkmcnt(0)
+; VI-SDAG-NEXT: v_cvt_f32_f64_e32 v2, s[6:7]
+; VI-SDAG-NEXT: v_cvt_f32_f64_e32 v1, s[2:3]
+; VI-SDAG-NEXT: v_cvt_f32_f64_e32 v0, s[0:1]
+; VI-SDAG-NEXT: s_mov_b32 s7, 0xf000
+; VI-SDAG-NEXT: s_mov_b32 s6, -1
+; VI-SDAG-NEXT: buffer_store_dwordx3 v[0:2], off, s[4:7], 0
+; VI-SDAG-NEXT: s_endpgm
+;
+; VI-GISEL-LABEL: fptrunc_v3f64_to_v3f32_afn:
+; VI-GISEL: ; %bb.0:
+; VI-GISEL-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0x44
+; VI-GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
+; VI-GISEL-NEXT: s_mov_b32 s2, -1
+; VI-GISEL-NEXT: s_mov_b32 s3, 0xf000
+; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0)
+; VI-GISEL-NEXT: v_cvt_f32_f64_e32 v0, s[8:9]
+; VI-GISEL-NEXT: v_cvt_f32_f64_e32 v1, s[10:11]
+; VI-GISEL-NEXT: v_cvt_f32_f64_e32 v2, s[12:13]
+; VI-GISEL-NEXT: buffer_store_dwordx3 v[0:2], off, s[0:3], 0
+; VI-GISEL-NEXT: s_endpgm
+;
+; GFX10-SDAG-LABEL: fptrunc_v3f64_to_v3f32_afn:
+; GFX10-SDAG: ; %bb.0:
+; GFX10-SDAG-NEXT: s_clause 0x1
+; GFX10-SDAG-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x54
+; GFX10-SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x44
+; GFX10-SDAG-NEXT: s_waitcnt lgkmcnt(0)
+; GFX10-SDAG-NEXT: v_cvt_f32_f64_e32 v2, s[6:7]
+; GFX10-SDAG-NEXT: v_cvt_f32_f64_e32 v1, s[2:3]
+; GFX10-SDAG-NEXT: v_cvt_f32_f64_e32 v0, s[0:1]
+; GFX10-SDAG-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX10-SDAG-NEXT: s_mov_b32 s3, 0x31016000
+; GFX10-SDAG-NEXT: s_mov_b32 s2, -1
+; GFX10-SDAG-NEXT: s_waitcnt lgkmcnt(0)
+; GFX10-SDAG-NEXT: buffer_store_dwordx3 v[0:2], off, s[0:3], 0
+; GFX10-SDAG-NEXT: s_endpgm
+;
+; GFX10-GISEL-LABEL: fptrunc_v3f64_to_v3f32_afn:
+; GFX10-GISEL: ; %bb.0:
+; GFX10-GISEL-NEXT: s_clause 0x1
+; GFX10-GISEL-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0x44
+; GFX10-GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX10-GISEL-NEXT: s_mov_b32 s2, -1
+; GFX10-GISEL-NEXT: s_mov_b32 s3, 0x31016000
+; GFX10-GISEL-NEXT: s_waitcnt lgkmcnt(0)
+; GFX10-GISEL-NEXT: v_cvt_f32_f64_e32 v0, s[8:9]
+; GFX10-GISEL-NEXT: v_cvt_f32_f64_e32 v1, s[10:11]
+; GFX10-GISEL-NEXT: v_cvt_f32_f64_e32 v2, s[12:13]
+; GFX10-GISEL-NEXT: buffer_store_dwordx3 v[0:2], off, s[0:3], 0
+; GFX10-GISEL-NEXT: s_endpgm
+;
+; GFX11-SDAG-LABEL: fptrunc_v3f64_to_v3f32_afn:
+; GFX11-SDAG: ; %bb.0:
+; GFX11-SDAG-NEXT: s_clause 0x1
+; GFX11-SDAG-NEXT: s_load_b64 s[6:7], s[4:5], 0x54
+; GFX11-SDAG-NEXT: s_load_b128 s[0:3], s[4:5], 0x44
+; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-SDAG-NEXT: v_cvt_f32_f64_e32 v2, s[6:7]
+; GFX11-SDAG-NEXT: v_cvt_f32_f64_e32 v1, s[2:3]
+; GFX11-SDAG-NEXT: v_cvt_f32_f64_e32 v0, s[0:1]
+; GFX11-SDAG-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
+; GFX11-SDAG-NEXT: s_mov_b32 s3, 0x31016000
+; GFX11-SDAG-NEXT: s_mov_b32 s2, -1
+; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-SDAG-NEXT: buffer_store_b96 v[0:2], off, s[0:3], 0
+; GFX11-SDAG-NEXT: s_endpgm
+;
+; GFX11-GISEL-LABEL: fptrunc_v3f64_to_v3f32_afn:
+; GFX11-GISEL: ; %bb.0:
+; GFX11-GISEL-NEXT: s_clause 0x1
+; GFX11-GISEL-NEXT: s_load_b256 s[8:15], s[4:5], 0x44
+; GFX11-GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
+; GFX11-GISEL-NEXT: s_mov_b32 s2, -1
+; GFX11-GISEL-NEXT: s_mov_b32 s3, 0x31016000
+; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-GISEL-NEXT: v_cvt_f32_f64_e32 v0, s[8:9]
+; GFX11-GISEL-NEXT: v_cvt_f32_f64_e32 v1, s[10:11]
+; GFX11-GISEL-NEXT: v_cvt_f32_f64_e32 v2, s[12:13]
+; GFX11-GISEL-NEXT: buffer_store_b96 v[0:2], off, s[0:3], 0
+; GFX11-GISEL-NEXT: s_endpgm
+ %result = fptrunc afn <3 x double> %in to <3 x float>
+ store <3 x float> %result, ptr addrspace(1) %out
+ ret void
+}
+
define amdgpu_kernel void @fptrunc_v4f64_to_v4f32(ptr addrspace(1) %out, <4 x double> %in) {
; SI-LABEL: fptrunc_v4f64_to_v4f32:
; SI: ; %bb.0:
@@ -876,6 +1437,113 @@ define amdgpu_kernel void @fptrunc_v4f64_to_v4f32(ptr addrspace(1) %out, <4 x do
ret void
}
+define amdgpu_kernel void @fptrunc_v4f64_to_v4f32_afn(ptr addrspace(1) %out, <4 x double> %in) {
+; SI-LABEL: fptrunc_v4f64_to_v4f32_afn:
+; SI: ; %bb.0:
+; SI-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0x11
+; SI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9
+; SI-NEXT: s_mov_b32 s3, 0xf000
+; SI-NEXT: s_mov_b32 s2, -1
+; SI-NEXT: s_waitcnt lgkmcnt(0)
+; SI-NEXT: v_cvt_f32_f64_e32 v3, s[14:15]
+; SI-NEXT: v_cvt_f32_f64_e32 v2, s[12:13]
+; SI-NEXT: v_cvt_f32_f64_e32 v1, s[10:11]
+; SI-NEXT: v_cvt_f32_f64_e32 v0, s[8:9]
+; SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0
+; SI-NEXT: s_endpgm
+;
+; VI-SDAG-LABEL: fptrunc_v4f64_to_v4f32_afn:
+; VI-SDAG: ; %bb.0:
+; VI-SDAG-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0x44
+; VI-SDAG-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
+; VI-SDAG-NEXT: s_mov_b32 s3, 0xf000
+; VI-SDAG-NEXT: s_mov_b32 s2, -1
+; VI-SDAG-NEXT: s_waitcnt lgkmcnt(0)
+; VI-SDAG-NEXT: v_cvt_f32_f64_e32 v3, s[14:15]
+; VI-SDAG-NEXT: v_cvt_f32_f64_e32 v2, s[12:13]
+; VI-SDAG-NEXT: v_cvt_f32_f64_e32 v1, s[10:11]
+; VI-SDAG-NEXT: v_cvt_f32_f64_e32 v0, s[8:9]
+; VI-SDAG-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0
+; VI-SDAG-NEXT: s_endpgm
+;
+; VI-GISEL-LABEL: fptrunc_v4f64_to_v4f32_afn:
+; VI-GISEL: ; %bb.0:
+; VI-GISEL-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0x44
+; VI-GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
+; VI-GISEL-NEXT: s_mov_b32 s2, -1
+; VI-GISEL-NEXT: s_mov_b32 s3, 0xf000
+; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0)
+; VI-GISEL-NEXT: v_cvt_f32_f64_e32 v0, s[8:9]
+; VI-GISEL-NEXT: v_cvt_f32_f64_e32 v1, s[10:11]
+; VI-GISEL-NEXT: v_cvt_f32_f64_e32 v2, s[12:13]
+; VI-GISEL-NEXT: v_cvt_f32_f64_e32 v3, s[14:15]
+; VI-GISEL-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0
+; VI-GISEL-NEXT: s_endpgm
+;
+; GFX10-SDAG-LABEL: fptrunc_v4f64_to_v4f32_afn:
+; GFX10-SDAG: ; %bb.0:
+; GFX10-SDAG-NEXT: s_clause 0x1
+; GFX10-SDAG-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0x44
+; GFX10-SDAG-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX10-SDAG-NEXT: s_mov_b32 s3, 0x31016000
+; GFX10-SDAG-NEXT: s_mov_b32 s2, -1
+; GFX10-SDAG-NEXT: s_waitcnt lgkmcnt(0)
+; GFX10-SDAG-NEXT: v_cvt_f32_f64_e32 v3, s[14:15]
+; GFX10-SDAG-NEXT: v_cvt_f32_f64_e32 v2, s[12:13]
+; GFX10-SDAG-NEXT: v_cvt_f32_f64_e32 v1, s[10:11]
+; GFX10-SDAG-NEXT: v_cvt_f32_f64_e32 v0, s[8:9]
+; GFX10-SDAG-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0
+; GFX10-SDAG-NEXT: s_endpgm
+;
+; GFX10-GISEL-LABEL: fptrunc_v4f64_to_v4f32_afn:
+; GFX10-GISEL: ; %bb.0:
+; GFX10-GISEL-NEXT: s_clause 0x1
+; GFX10-GISEL-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0x44
+; GFX10-GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX10-GISEL-NEXT: s_mov_b32 s2, -1
+; GFX10-GISEL-NEXT: s_mov_b32 s3, 0x31016000
+; GFX10-GISEL-NEXT: s_waitcnt lgkmcnt(0)
+; GFX10-GISEL-NEXT: v_cvt_f32_f64_e32 v0, s[8:9]
+; GFX10-GISEL-NEXT: v_cvt_f32_f64_e32 v1, s[10:11]
+; GFX10-GISEL-NEXT: v_cvt_f32_f64_e32 v2, s[12:13]
+; GFX10-GISEL-NEXT: v_cvt_f32_f64_e32 v3, s[14:15]
+; GFX10-GISEL-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0
+; GFX10-GISEL-NEXT: s_endpgm
+;
+; GFX11-SDAG-LABEL: fptrunc_v4f64_to_v4f32_afn:
+; GFX11-SDAG: ; %bb.0:
+; GFX11-SDAG-NEXT: s_clause 0x1
+; GFX11-SDAG-NEXT: s_load_b256 s[8:15], s[4:5], 0x44
+; GFX11-SDAG-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
+; GFX11-SDAG-NEXT: s_mov_b32 s3, 0x31016000
+; GFX11-SDAG-NEXT: s_mov_b32 s2, -1
+; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-SDAG-NEXT: v_cvt_f32_f64_e32 v3, s[14:15]
+; GFX11-SDAG-NEXT: v_cvt_f32_f64_e32 v2, s[12:13]
+; GFX11-SDAG-NEXT: v_cvt_f32_f64_e32 v1, s[10:11]
+; GFX11-SDAG-NEXT: v_cvt_f32_f64_e32 v0, s[8:9]
+; GFX11-SDAG-NEXT: buffer_store_b128 v[0:3], off, s[0:3], 0
+; GFX11-SDAG-NEXT: s_endpgm
+;
+; GFX11-GISEL-LABEL: fptrunc_v4f64_to_v4f32_afn:
+; GFX11-GISEL: ; %bb.0:
+; GFX11-GISEL-NEXT: s_clause 0x1
+; GFX11-GISEL-NEXT: s_load_b256 s[8:15], s[4:5], 0x44
+; GFX11-GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
+; GFX11-GISEL-NEXT: s_mov_b32 s2, -1
+; GFX11-GISEL-NEXT: s_mov_b32 s3, 0x31016000
+; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-GISEL-NEXT: v_cvt_f32_f64_e32 v0, s[8:9]
+; GFX11-GISEL-NEXT: v_cvt_f32_f64_e32 v1, s[10:11]
+; GFX11-GISEL-NEXT: v_cvt_f32_f64_e32 v2, s[12:13]
+; GFX11-GISEL-NEXT: v_cvt_f32_f64_e32 v3, s[14:15]
+; GFX11-GISEL-NEXT: buffer_store_b128 v[0:3], off, s[0:3], 0
+; GFX11-GISEL-NEXT: s_endpgm
+ %result = fptrunc afn <4 x double> %in to <4 x float>
+ store <4 x float> %result, ptr addrspace(1) %out
+ ret void
+}
+
define amdgpu_kernel void @fptrunc_v8f64_to_v8f32(ptr addrspace(1) %out, <8 x double> %in) {
; SI-LABEL: fptrunc_v8f64_to_v8f32:
; SI: ; %bb.0:
@@ -1019,3 +1687,150 @@ define amdgpu_kernel void @fptrunc_v8f64_to_v8f32(ptr addrspace(1) %out, <8 x do
store <8 x float> %result, ptr addrspace(1) %out
ret void
}
+
+define amdgpu_kernel void @fptrunc_v8f64_to_v8f32_afn(ptr addrspace(1) %out, <8 x double> %in) {
+; SI-LABEL: fptrunc_v8f64_to_v8f32_afn:
+; SI: ; %bb.0:
+; SI-NEXT: s_load_dwordx16 s[8:23], s[4:5], 0x19
+; SI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9
+; SI-NEXT: s_mov_b32 s3, 0xf000
+; SI-NEXT: s_mov_b32 s2, -1
+; SI-NEXT: s_waitcnt lgkmcnt(0)
+; SI-NEXT: v_cvt_f32_f64_e32 v3, s[14:15]
+; SI-NEXT: v_cvt_f32_f64_e32 v2, s[12:13]
+; SI-NEXT: v_cvt_f32_f64_e32 v1, s[10:11]
+; SI-NEXT: v_cvt_f32_f64_e32 v0, s[8:9]
+; SI-NEXT: v_cvt_f32_f64_e32 v7, s[22:23]
+; SI-NEXT: v_cvt_f32_f64_e32 v6, s[20:21]
+; SI-NEXT: v_cvt_f32_f64_e32 v5, s[18:19]
+; SI-NEXT: v_cvt_f32_f64_e32 v4, s[16:17]
+; SI-NEXT: buffer_store_dwordx4 v[4:7], off, s[0:3], 0 offset:16
+; SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0
+; SI-NEXT: s_endpgm
+;
+; VI-SDAG-LABEL: fptrunc_v8f64_to_v8f32_afn:
+; VI-SDAG: ; %bb.0:
+; VI-SDAG-NEXT: s_load_dwordx16 s[8:23], s[4:5], 0x64
+; VI-SDAG-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
+; VI-SDAG-NEXT: s_mov_b32 s3, 0xf000
+; VI-SDAG-NEXT: s_mov_b32 s2, -1
+; VI-SDAG-NEXT: s_waitcnt lgkmcnt(0)
+; VI-SDAG-NEXT: v_cvt_f32_f64_e32 v7, s[22:23]
+; VI-SDAG-NEXT: v_cvt_f32_f64_e32 v6, s[20:21]
+; VI-SDAG-NEXT: v_cvt_f32_f64_e32 v5, s[18:19]
+; VI-SDAG-NEXT: v_cvt_f32_f64_e32 v4, s[16:17]
+; VI-SDAG-NEXT: v_cvt_f32_f64_e32 v3, s[14:15]
+; VI-SDAG-NEXT: v_cvt_f32_f64_e32 v2, s[12:13]
+; VI-SDAG-NEXT: v_cvt_f32_f64_e32 v1, s[10:11]
+; VI-SDAG-NEXT: v_cvt_f32_f64_e32 v0, s[8:9]
+; VI-SDAG-NEXT: buffer_store_dwordx4 v[4:7], off, s[0:3], 0 offset:16
+; VI-SDAG-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0
+; VI-SDAG-NEXT: s_endpgm
+;
+; VI-GISEL-LABEL: fptrunc_v8f64_to_v8f32_afn:
+; VI-GISEL: ; %bb.0:
+; VI-GISEL-NEXT: s_load_dwordx16 s[8:23], s[4:5], 0x64
+; VI-GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
+; VI-GISEL-NEXT: s_mov_b32 s2, -1
+; VI-GISEL-NEXT: s_mov_b32 s3, 0xf000
+; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0)
+; VI-GISEL-NEXT: v_cvt_f32_f64_e32 v0, s[8:9]
+; VI-GISEL-NEXT: v_cvt_f32_f64_e32 v1, s[10:11]
+; VI-GISEL-NEXT: v_cvt_f32_f64_e32 v2, s[12:13]
+; VI-GISEL-NEXT: v_cvt_f32_f64_e32 v3, s[14:15]
+; VI-GISEL-NEXT: v_cvt_f32_f64_e32 v4, s[16:17]
+; VI-GISEL-NEXT: v_cvt_f32_f64_e32 v5, s[18:19]
+; VI-GISEL-NEXT: v_cvt_f32_f64_e32 v6, s[20:21]
+; VI-GISEL-NEXT: v_cvt_f32_f64_e32 v7, s[22:23]
+; VI-GISEL-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0
+; VI-GISEL-NEXT: buffer_store_dwordx4 v[4:7], off, s[0:3], 0 offset:16
+; VI-GISEL-NEXT: s_endpgm
+;
+; GFX10-SDAG-LABEL: fptrunc_v8f64_to_v8f32_afn:
+; GFX10-SDAG: ; %bb.0:
+; GFX10-SDAG-NEXT: s_clause 0x1
+; GFX10-SDAG-NEXT: s_load_dwordx16 s[8:23], s[4:5], 0x64
+; GFX10-SDAG-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX10-SDAG-NEXT: s_mov_b32 s3, 0x31016000
+; GFX10-SDAG-NEXT: s_mov_b32 s2, -1
+; GFX10-SDAG-NEXT: s_waitcnt lgkmcnt(0)
+; GFX10-SDAG-NEXT: v_cvt_f32_f64_e32 v7, s[22:23]
+; GFX10-SDAG-NEXT: v_cvt_f32_f64_e32 v6, s[20:21]
+; GFX10-SDAG-NEXT: v_cvt_f32_f64_e32 v5, s[18:19]
+; GFX10-SDAG-NEXT: v_cvt_f32_f64_e32 v4, s[16:17]
+; GFX10-SDAG-NEXT: v_cvt_f32_f64_e32 v3, s[14:15]
+; GFX10-SDAG-NEXT: v_cvt_f32_f64_e32 v2, s[12:13]
+; GFX10-SDAG-NEXT: v_cvt_f32_f64_e32 v1, s[10:11]
+; GFX10-SDAG-NEXT: v_cvt_f32_f64_e32 v0, s[8:9]
+; GFX10-SDAG-NEXT: buffer_store_dwordx4 v[4:7], off, s[0:3], 0 offset:16
+; GFX10-SDAG-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0
+; GFX10-SDAG-NEXT: s_endpgm
+;
+; GFX10-GISEL-LABEL: fptrunc_v8f64_to_v8f32_afn:
+; GFX10-GISEL: ; %bb.0:
+; GFX10-GISEL-NEXT: s_clause 0x1
+; GFX10-GISEL-NEXT: s_load_dwordx16 s[8:23], s[4:5], 0x64
+; GFX10-GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24
+; GFX10-GISEL-NEXT: s_mov_b32 s2, -1
+; GFX10-GISEL-NEXT: s_mov_b32 s3, 0x31016000
+; GFX10-GISEL-NEXT: s_waitcnt lgkmcnt(0)
+; GFX10-GISEL-NEXT: v_cvt_f32_f64_e32 v0, s[8:9]
+; GFX10-GISEL-NEXT: v_cvt_f32_f64_e32 v1, s[10:11]
+; GFX10-GISEL-NEXT: v_cvt_f32_f64_e32 v2, s[12:13]
+; GFX10-GISEL-NEXT: v_cvt_f32_f64_e32 v3, s[14:15]
+; GFX10-GISEL-NEXT: v_cvt_f32_f64_e32 v4, s[16:17]
+; GFX10-GISEL-NEXT: v_cvt_f32_f64_e32 v5, s[18:19]
+; GFX10-GISEL-NEXT: v_cvt_f32_f64_e32 v6, s[20:21]
+; GFX10-GISEL-NEXT: v_cvt_f32_f64_e32 v7, s[22:23]
+; GFX10-GISEL-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0
+; GFX10-GISEL-NEXT: buffer_store_dwordx4 v[4:7], off, s[0:3], 0 offset:16
+; GFX10-GISEL-NEXT: s_endpgm
+;
+; GFX11-SDAG-LABEL: fptrunc_v8f64_to_v8f32_afn:
+; GFX11-SDAG: ; %bb.0:
+; GFX11-SDAG-NEXT: s_clause 0x1
+; GFX11-SDAG-NEXT: s_load_b512 s[8:23], s[4:5], 0x64
+; GFX11-SDAG-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
+; GFX11-SDAG-NEXT: s_mov_b32 s3, 0x31016000
+; GFX11-SDAG-NEXT: s_mov_b32 s2, -1
+; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-SDAG-NEXT: v_cvt_f32_f64_e32 v7, s[22:23]
+; GFX11-SDAG-NEXT: v_cvt_f32_f64_e32 v6, s[20:21]
+; GFX11-SDAG-NEXT: v_cvt_f32_f64_e32 v5, s[18:19]
+; GFX11-SDAG-NEXT: v_cvt_f32_f64_e32 v4, s[16:17]
+; GFX11-SDAG-NEXT: v_cvt_f32_f64_e32 v3, s[14:15]
+; GFX11-SDAG-NEXT: v_cvt_f32_f64_e32 v2, s[12:13]
+; GFX11-SDAG-NEXT: v_cvt_f32_f64_e32 v1, s[10:11]
+; GFX11-SDAG-NEXT: v_cvt_f32_f64_e32 v0, s[8:9]
+; GFX11-SDAG-NEXT: s_clause 0x1
+; GFX11-SDAG-NEXT: buffer_store_b128 v[4:7], off, s[0:3], 0 offset:16
+; GFX11-SDAG-NEXT: buffer_store_b128 v[0:3], off, s[0:3], 0
+; GFX11-SDAG-NEXT: s_endpgm
+;
+; GFX11-GISEL-LABEL: fptrunc_v8f64_to_v8f32_afn:
+; GFX11-GISEL: ; %bb.0:
+; GFX11-GISEL-NEXT: s_clause 0x1
+; GFX11-GISEL-NEXT: s_load_b512 s[8:23], s[4:5], 0x64
+; GFX11-GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24
+; GFX11-GISEL-NEXT: s_mov_b32 s2, -1
+; GFX11-GISEL-NEXT: s_mov_b32 s3, 0x31016000
+; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-GISEL-NEXT: v_cvt_f32_f64_e32 v0, s[8:9]
+; GFX11-GISEL-NEXT: v_cvt_f32_f64_e32 v1, s[10:11]
+; GFX11-GISEL-NEXT: v_cvt_f32_f64_e32 v2, s[12:13]
+; GFX11-GISEL-NEXT: v_cvt_f32_f64_e32 v3, s[14:15]
+; GFX11-GISEL-NEXT: v_cvt_f32_f64_e32 v4, s[16:17]
+; GFX11-GISEL-NEXT: v_cvt_f32_f64_e32 v5, s[18:19]
+; GFX11-GISEL-NEXT: v_cvt_f32_f64_e32 v6, s[20:21]
+; GFX11-GISEL-NEXT: v_cvt_f32_f64_e32 v7, s[22:23]
+; GFX11-GISEL-NEXT: s_clause 0x1
+; GFX11-GISEL-NEXT: buffer_store_b128 v[0:3], off, s[0:3], 0
+; GFX11-GISEL-NEXT: buffer_store_b128 v[4:7], off, s[0:3], 0 offset:16
+; GFX11-GISEL-NEXT: s_endpgm
+ %result = fptrunc <8 x double> %in to <8 x float>
+ store <8 x float> %result, ptr addrspace(1) %out
+ ret void
+}
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; GFX10-SAFE-GISEL: {{.*}}
+; VI-SAFE-GISEL: {{.*}}
diff --git a/llvm/test/CodeGen/AMDGPU/gfx1250-no-scope-cu-stores.ll b/llvm/test/CodeGen/AMDGPU/gfx1250-no-scope-cu-stores.ll
new file mode 100644
index 0000000..d13d76f
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/gfx1250-no-scope-cu-stores.ll
@@ -0,0 +1,100 @@
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -O3 -mcpu=gfx1250 < %s | FileCheck --check-prefixes=GCN,CU %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -O3 -mcpu=gfx1250 -mattr=-cu-stores < %s | FileCheck --check-prefixes=GCN,NOCU %s
+
+; Check that if -cu-stores is used, we use SCOPE_SE minimum on all stores.
+
+; GCN: flat_store:
+; CU: flat_store_b32 v{{.*}}, v{{.*}}, s{{.*}} scope:SCOPE_SE
+; NOCU: flat_store_b32 v{{.*}}, v{{.*}}, s{{.*}} scope:SCOPE_SE
+; GCN: .amdhsa_kernel flat_store
+; CU: .amdhsa_uses_cu_stores 1
+; NOCU: .amdhsa_uses_cu_stores 0
+define amdgpu_kernel void @flat_store(ptr %dst, i32 %val) {
+entry:
+ store i32 %val, ptr %dst
+ ret void
+}
+
+; GCN: global_store:
+; CU: global_store_b32 v{{.*}}, v{{.*}}, s{{.*}}{{$}}
+; NOCU: global_store_b32 v{{.*}}, v{{.*}}, s{{.*}} scope:SCOPE_SE
+; GCN: .amdhsa_kernel global_store
+; CU: .amdhsa_uses_cu_stores 1
+; NOCU: .amdhsa_uses_cu_stores 0
+define amdgpu_kernel void @global_store(ptr addrspace(1) %dst, i32 %val) {
+entry:
+ store i32 %val, ptr addrspace(1) %dst
+ ret void
+}
+
+; GCN: local_store:
+; CU: ds_store_b32 v{{.*}}, v{{.*}}{{$}}
+; NOCU: ds_store_b32 v{{.*}}, v{{.*}}{{$}}
+; GCN: .amdhsa_kernel local_store
+; CU: .amdhsa_uses_cu_stores 1
+; NOCU: .amdhsa_uses_cu_stores 0
+define amdgpu_kernel void @local_store(ptr addrspace(3) %dst, i32 %val) {
+entry:
+ store i32 %val, ptr addrspace(3) %dst
+ ret void
+}
+
+; GCN: scratch_store:
+; CU: scratch_store_b32 off, v{{.*}}, s{{.*}} scope:SCOPE_SE
+; NOCU: scratch_store_b32 off, v{{.*}}, s{{.*}} scope:SCOPE_SE
+; GCN: .amdhsa_kernel scratch_store
+; CU: .amdhsa_uses_cu_stores 1
+; NOCU: .amdhsa_uses_cu_stores 0
+define amdgpu_kernel void @scratch_store(ptr addrspace(5) %dst, i32 %val) {
+entry:
+ store i32 %val, ptr addrspace(5) %dst
+ ret void
+}
+
+; GCN: flat_atomic_store:
+; CU: flat_store_b32 v{{.*}}, v{{.*}}, s{{.*}} scope:SCOPE_SE
+; NOCU: flat_store_b32 v{{.*}}, v{{.*}}, s{{.*}} scope:SCOPE_SE
+; GCN: .amdhsa_kernel flat_atomic_store
+; CU: .amdhsa_uses_cu_stores 1
+; NOCU: .amdhsa_uses_cu_stores 0
+define amdgpu_kernel void @flat_atomic_store(ptr %dst, i32 %val) {
+entry:
+ store atomic i32 %val, ptr %dst syncscope("wavefront") unordered, align 4
+ ret void
+}
+
+; GCN: global_atomic_store:
+; CU: global_store_b32 v{{.*}}, v{{.*}}, s{{.*}}{{$}}
+; NOCU: global_store_b32 v{{.*}}, v{{.*}}, s{{.*}} scope:SCOPE_SE
+; GCN: .amdhsa_kernel global_atomic_store
+; CU: .amdhsa_uses_cu_stores 1
+; NOCU: .amdhsa_uses_cu_stores 0
+define amdgpu_kernel void @global_atomic_store(ptr addrspace(1) %dst, i32 %val) {
+entry:
+ store atomic i32 %val, ptr addrspace(1) %dst syncscope("wavefront") unordered, align 4
+ ret void
+}
+
+; GCN: local_atomic_store:
+; CU: ds_store_b32 v{{.*}}, v{{.*}}{{$}}
+; NOCU: ds_store_b32 v{{.*}}, v{{.*}}{{$}}
+; GCN: .amdhsa_kernel local_atomic_store
+; CU: .amdhsa_uses_cu_stores 1
+; NOCU: .amdhsa_uses_cu_stores 0
+define amdgpu_kernel void @local_atomic_store(ptr addrspace(3) %dst, i32 %val) {
+entry:
+ store atomic i32 %val, ptr addrspace(3) %dst syncscope("wavefront") unordered, align 4
+ ret void
+}
+
+; GCN: scratch_atomic_store:
+; CU: scratch_store_b32 off, v{{.*}}, s{{.*}} scope:SCOPE_SE
+; NOCU: scratch_store_b32 off, v{{.*}}, s{{.*}} scope:SCOPE_SE
+; GCN: .amdhsa_kernel scratch_atomic_store
+; CU: .amdhsa_uses_cu_stores 1
+; NOCU: .amdhsa_uses_cu_stores 0
+define amdgpu_kernel void @scratch_atomic_store(ptr addrspace(5) %dst, i32 %val) {
+entry:
+ store atomic i32 %val, ptr addrspace(5) %dst syncscope("wavefront") unordered, align 4
+ ret void
+}
diff --git a/llvm/test/CodeGen/AMDGPU/gfx1250-scratch-scope-se.ll b/llvm/test/CodeGen/AMDGPU/gfx1250-scratch-scope-se.ll
new file mode 100644
index 0000000..d1e82a0
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/gfx1250-scratch-scope-se.ll
@@ -0,0 +1,95 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1250 < %s | FileCheck -check-prefixes=GCN,GCN-SDAG %s
+; RUN: llc -global-isel -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1250 < %s | FileCheck -check-prefixes=GCN,GCN-GISEL %s
+
+; Test that stores that may hit scratch are correctly promoted to SCOPE_SE.
+
+define void @test_scratch_store(ptr addrspace(5) %ptr, i32 %val) {
+; GCN-LABEL: test_scratch_store:
+; GCN: ; %bb.0:
+; GCN-NEXT: s_wait_loadcnt_dscnt 0x0
+; GCN-NEXT: s_wait_kmcnt 0x0
+; GCN-NEXT: scratch_store_b32 v0, v1, off scope:SCOPE_SE
+; GCN-NEXT: s_set_pc_i64 s[30:31]
+ store i32 %val, ptr addrspace(5) %ptr
+ ret void
+}
+
+define void @test_unknown_flat_store(ptr %ptr, i32 %val) {
+; GCN-LABEL: test_unknown_flat_store:
+; GCN: ; %bb.0:
+; GCN-NEXT: s_wait_loadcnt_dscnt 0x0
+; GCN-NEXT: s_wait_kmcnt 0x0
+; GCN-NEXT: flat_store_b32 v[0:1], v2 scope:SCOPE_SE
+; GCN-NEXT: s_wait_dscnt 0x0
+; GCN-NEXT: s_set_pc_i64 s[30:31]
+ store i32 %val, ptr %ptr
+ ret void
+}
+
+define void @test_flat_store_no_scratch_alloc(ptr %ptr, i32 %val) #0 {
+; GCN-LABEL: test_flat_store_no_scratch_alloc:
+; GCN: ; %bb.0:
+; GCN-NEXT: s_wait_loadcnt_dscnt 0x0
+; GCN-NEXT: s_wait_kmcnt 0x0
+; GCN-NEXT: flat_store_b32 v[0:1], v2
+; GCN-NEXT: s_wait_dscnt 0x0
+; GCN-NEXT: s_set_pc_i64 s[30:31]
+ store i32 %val, ptr %ptr
+ ret void
+}
+
+; TODO: handle
+define void @test_flat_store_noalias_addrspace(ptr %ptr, i32 %val) {
+; GCN-LABEL: test_flat_store_noalias_addrspace:
+; GCN: ; %bb.0:
+; GCN-NEXT: s_wait_loadcnt_dscnt 0x0
+; GCN-NEXT: s_wait_kmcnt 0x0
+; GCN-NEXT: flat_store_b32 v[0:1], v2 scope:SCOPE_SE
+; GCN-NEXT: s_wait_dscnt 0x0
+; GCN-NEXT: s_set_pc_i64 s[30:31]
+ store i32 %val, ptr %ptr, !noalias.addrspace !{i32 5, i32 6}
+ ret void
+}
+
+; TODO: would be nice to handle too
+define void @test_flat_store_select(ptr addrspace(1) %a, ptr addrspace(3) %b, i1 %cond, i32 %val) {
+; GCN-SDAG-LABEL: test_flat_store_select:
+; GCN-SDAG: ; %bb.0:
+; GCN-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
+; GCN-SDAG-NEXT: s_wait_kmcnt 0x0
+; GCN-SDAG-NEXT: v_cmp_ne_u32_e32 vcc_lo, -1, v2
+; GCN-SDAG-NEXT: v_and_b32_e32 v3, 1, v3
+; GCN-SDAG-NEXT: s_mov_b64 s[0:1], src_shared_base
+; GCN-SDAG-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc_lo
+; GCN-SDAG-NEXT: v_cndmask_b32_e64 v5, 0, s1, vcc_lo
+; GCN-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GCN-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, 1, v3
+; GCN-SDAG-NEXT: v_dual_cndmask_b32 v1, v5, v1 :: v_dual_cndmask_b32 v0, v2, v0
+; GCN-SDAG-NEXT: flat_store_b32 v[0:1], v4 scope:SCOPE_SE
+; GCN-SDAG-NEXT: s_wait_dscnt 0x0
+; GCN-SDAG-NEXT: s_set_pc_i64 s[30:31]
+;
+; GCN-GISEL-LABEL: test_flat_store_select:
+; GCN-GISEL: ; %bb.0:
+; GCN-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0
+; GCN-GISEL-NEXT: s_wait_kmcnt 0x0
+; GCN-GISEL-NEXT: v_cmp_ne_u32_e32 vcc_lo, -1, v2
+; GCN-GISEL-NEXT: v_and_b32_e32 v3, 1, v3
+; GCN-GISEL-NEXT: s_mov_b64 s[0:1], src_shared_base
+; GCN-GISEL-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc_lo
+; GCN-GISEL-NEXT: v_cndmask_b32_e64 v5, 0, s1, vcc_lo
+; GCN-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GCN-GISEL-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v3
+; GCN-GISEL-NEXT: v_dual_cndmask_b32 v0, v2, v0 :: v_dual_cndmask_b32 v1, v5, v1
+; GCN-GISEL-NEXT: flat_store_b32 v[0:1], v4 scope:SCOPE_SE
+; GCN-GISEL-NEXT: s_wait_dscnt 0x0
+; GCN-GISEL-NEXT: s_set_pc_i64 s[30:31]
+ %a.ascast = addrspacecast ptr addrspace(1) %a to ptr
+ %b.ascast = addrspacecast ptr addrspace(3) %b to ptr
+ %ptr = select i1 %cond, ptr %a.ascast, ptr %b.ascast
+ store i32 %val, ptr %ptr
+ ret void
+}
+
+attributes #0 = { "amdgpu-no-flat-scratch-init" }
diff --git a/llvm/test/CodeGen/AMDGPU/global-load-xcnt.ll b/llvm/test/CodeGen/AMDGPU/global-load-xcnt.ll
index fd644a3..3a898a9 100644
--- a/llvm/test/CodeGen/AMDGPU/global-load-xcnt.ll
+++ b/llvm/test/CodeGen/AMDGPU/global-load-xcnt.ll
@@ -124,27 +124,27 @@ define i32 @test_v64i32_load_store(ptr addrspace(1) %ptr, i32 %idx, ptr addrspac
; GCN-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
; GCN-SDAG-NEXT: s_wait_kmcnt 0x0
; GCN-SDAG-NEXT: s_clause 0xd
-; GCN-SDAG-NEXT: scratch_store_b32 off, v40, s32 offset:52
-; GCN-SDAG-NEXT: scratch_store_b32 off, v41, s32 offset:48
-; GCN-SDAG-NEXT: scratch_store_b32 off, v42, s32 offset:44
-; GCN-SDAG-NEXT: scratch_store_b32 off, v43, s32 offset:40
-; GCN-SDAG-NEXT: scratch_store_b32 off, v44, s32 offset:36
-; GCN-SDAG-NEXT: scratch_store_b32 off, v45, s32 offset:32
-; GCN-SDAG-NEXT: scratch_store_b32 off, v56, s32 offset:28
-; GCN-SDAG-NEXT: scratch_store_b32 off, v57, s32 offset:24
-; GCN-SDAG-NEXT: scratch_store_b32 off, v58, s32 offset:20
-; GCN-SDAG-NEXT: scratch_store_b32 off, v59, s32 offset:16
-; GCN-SDAG-NEXT: scratch_store_b32 off, v60, s32 offset:12
-; GCN-SDAG-NEXT: scratch_store_b32 off, v61, s32 offset:8
-; GCN-SDAG-NEXT: scratch_store_b32 off, v62, s32 offset:4
-; GCN-SDAG-NEXT: scratch_store_b32 off, v63, s32
+; GCN-SDAG-NEXT: scratch_store_b32 off, v40, s32 offset:52 scope:SCOPE_SE
+; GCN-SDAG-NEXT: scratch_store_b32 off, v41, s32 offset:48 scope:SCOPE_SE
+; GCN-SDAG-NEXT: scratch_store_b32 off, v42, s32 offset:44 scope:SCOPE_SE
+; GCN-SDAG-NEXT: scratch_store_b32 off, v43, s32 offset:40 scope:SCOPE_SE
+; GCN-SDAG-NEXT: scratch_store_b32 off, v44, s32 offset:36 scope:SCOPE_SE
+; GCN-SDAG-NEXT: scratch_store_b32 off, v45, s32 offset:32 scope:SCOPE_SE
+; GCN-SDAG-NEXT: scratch_store_b32 off, v56, s32 offset:28 scope:SCOPE_SE
+; GCN-SDAG-NEXT: scratch_store_b32 off, v57, s32 offset:24 scope:SCOPE_SE
+; GCN-SDAG-NEXT: scratch_store_b32 off, v58, s32 offset:20 scope:SCOPE_SE
+; GCN-SDAG-NEXT: scratch_store_b32 off, v59, s32 offset:16 scope:SCOPE_SE
+; GCN-SDAG-NEXT: scratch_store_b32 off, v60, s32 offset:12 scope:SCOPE_SE
+; GCN-SDAG-NEXT: scratch_store_b32 off, v61, s32 offset:8 scope:SCOPE_SE
+; GCN-SDAG-NEXT: scratch_store_b32 off, v62, s32 offset:4 scope:SCOPE_SE
+; GCN-SDAG-NEXT: scratch_store_b32 off, v63, s32 scope:SCOPE_SE
; GCN-SDAG-NEXT: global_load_b128 v[6:9], v[0:1], off offset:224
; GCN-SDAG-NEXT: v_dual_mov_b32 v5, v4 :: v_dual_mov_b32 v4, v3
; GCN-SDAG-NEXT: s_wait_loadcnt 0x0
-; GCN-SDAG-NEXT: scratch_store_b128 off, v[6:9], s32 offset:56 ; 16-byte Folded Spill
+; GCN-SDAG-NEXT: scratch_store_b128 off, v[6:9], s32 offset:56 scope:SCOPE_SE ; 16-byte Folded Spill
; GCN-SDAG-NEXT: global_load_b128 v[6:9], v[0:1], off offset:240
; GCN-SDAG-NEXT: s_wait_loadcnt 0x0
-; GCN-SDAG-NEXT: scratch_store_b128 off, v[6:9], s32 offset:72 ; 16-byte Folded Spill
+; GCN-SDAG-NEXT: scratch_store_b128 off, v[6:9], s32 offset:72 scope:SCOPE_SE ; 16-byte Folded Spill
; GCN-SDAG-NEXT: s_clause 0xd
; GCN-SDAG-NEXT: global_load_b128 v[10:13], v[0:1], off offset:192
; GCN-SDAG-NEXT: global_load_b128 v[14:17], v[0:1], off offset:208
@@ -206,27 +206,27 @@ define i32 @test_v64i32_load_store(ptr addrspace(1) %ptr, i32 %idx, ptr addrspac
; GCN-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0
; GCN-GISEL-NEXT: s_wait_kmcnt 0x0
; GCN-GISEL-NEXT: s_clause 0xf
-; GCN-GISEL-NEXT: scratch_store_b32 off, v40, s32 offset:60
-; GCN-GISEL-NEXT: scratch_store_b32 off, v41, s32 offset:56
-; GCN-GISEL-NEXT: scratch_store_b32 off, v42, s32 offset:52
-; GCN-GISEL-NEXT: scratch_store_b32 off, v43, s32 offset:48
-; GCN-GISEL-NEXT: scratch_store_b32 off, v44, s32 offset:44
-; GCN-GISEL-NEXT: scratch_store_b32 off, v45, s32 offset:40
-; GCN-GISEL-NEXT: scratch_store_b32 off, v46, s32 offset:36
-; GCN-GISEL-NEXT: scratch_store_b32 off, v47, s32 offset:32
-; GCN-GISEL-NEXT: scratch_store_b32 off, v56, s32 offset:28
-; GCN-GISEL-NEXT: scratch_store_b32 off, v57, s32 offset:24
-; GCN-GISEL-NEXT: scratch_store_b32 off, v58, s32 offset:20
-; GCN-GISEL-NEXT: scratch_store_b32 off, v59, s32 offset:16
-; GCN-GISEL-NEXT: scratch_store_b32 off, v60, s32 offset:12
-; GCN-GISEL-NEXT: scratch_store_b32 off, v61, s32 offset:8
-; GCN-GISEL-NEXT: scratch_store_b32 off, v62, s32 offset:4
-; GCN-GISEL-NEXT: scratch_store_b32 off, v63, s32
+; GCN-GISEL-NEXT: scratch_store_b32 off, v40, s32 offset:60 scope:SCOPE_SE
+; GCN-GISEL-NEXT: scratch_store_b32 off, v41, s32 offset:56 scope:SCOPE_SE
+; GCN-GISEL-NEXT: scratch_store_b32 off, v42, s32 offset:52 scope:SCOPE_SE
+; GCN-GISEL-NEXT: scratch_store_b32 off, v43, s32 offset:48 scope:SCOPE_SE
+; GCN-GISEL-NEXT: scratch_store_b32 off, v44, s32 offset:44 scope:SCOPE_SE
+; GCN-GISEL-NEXT: scratch_store_b32 off, v45, s32 offset:40 scope:SCOPE_SE
+; GCN-GISEL-NEXT: scratch_store_b32 off, v46, s32 offset:36 scope:SCOPE_SE
+; GCN-GISEL-NEXT: scratch_store_b32 off, v47, s32 offset:32 scope:SCOPE_SE
+; GCN-GISEL-NEXT: scratch_store_b32 off, v56, s32 offset:28 scope:SCOPE_SE
+; GCN-GISEL-NEXT: scratch_store_b32 off, v57, s32 offset:24 scope:SCOPE_SE
+; GCN-GISEL-NEXT: scratch_store_b32 off, v58, s32 offset:20 scope:SCOPE_SE
+; GCN-GISEL-NEXT: scratch_store_b32 off, v59, s32 offset:16 scope:SCOPE_SE
+; GCN-GISEL-NEXT: scratch_store_b32 off, v60, s32 offset:12 scope:SCOPE_SE
+; GCN-GISEL-NEXT: scratch_store_b32 off, v61, s32 offset:8 scope:SCOPE_SE
+; GCN-GISEL-NEXT: scratch_store_b32 off, v62, s32 offset:4 scope:SCOPE_SE
+; GCN-GISEL-NEXT: scratch_store_b32 off, v63, s32 scope:SCOPE_SE
; GCN-GISEL-NEXT: s_wait_xcnt 0x8
; GCN-GISEL-NEXT: v_dual_mov_b32 v46, v3 :: v_dual_mov_b32 v47, v4
; GCN-GISEL-NEXT: global_load_b128 v[2:5], v[0:1], off offset:32
; GCN-GISEL-NEXT: s_wait_loadcnt 0x0
-; GCN-GISEL-NEXT: scratch_store_b128 off, v[2:5], s32 offset:80 ; 16-byte Folded Spill
+; GCN-GISEL-NEXT: scratch_store_b128 off, v[2:5], s32 offset:80 scope:SCOPE_SE ; 16-byte Folded Spill
; GCN-GISEL-NEXT: s_clause 0xe
; GCN-GISEL-NEXT: global_load_b128 v[6:9], v[0:1], off offset:48
; GCN-GISEL-NEXT: global_load_b128 v[10:13], v[0:1], off offset:64
@@ -244,7 +244,7 @@ define i32 @test_v64i32_load_store(ptr addrspace(1) %ptr, i32 %idx, ptr addrspac
; GCN-GISEL-NEXT: global_load_b128 v[60:63], v[0:1], off offset:16
; GCN-GISEL-NEXT: global_load_b128 v[0:3], v[0:1], off offset:240
; GCN-GISEL-NEXT: s_wait_loadcnt 0x0
-; GCN-GISEL-NEXT: scratch_store_b128 off, v[0:3], s32 offset:64 ; 16-byte Folded Spill
+; GCN-GISEL-NEXT: scratch_store_b128 off, v[0:3], s32 offset:64 scope:SCOPE_SE ; 16-byte Folded Spill
; GCN-GISEL-NEXT: scratch_load_b128 v[0:3], off, s32 offset:80 th:TH_LOAD_LU ; 16-byte Folded Reload
; GCN-GISEL-NEXT: s_wait_loadcnt 0x0
; GCN-GISEL-NEXT: s_clause 0xe
@@ -299,10 +299,10 @@ define i64 @test_v16i64_load_store(ptr addrspace(1) %ptr_a, ptr addrspace(1) %pt
; GCN-SDAG-NEXT: s_wait_loadcnt_dscnt 0x0
; GCN-SDAG-NEXT: s_wait_kmcnt 0x0
; GCN-SDAG-NEXT: s_clause 0x3
-; GCN-SDAG-NEXT: scratch_store_b32 off, v40, s32 offset:12
-; GCN-SDAG-NEXT: scratch_store_b32 off, v41, s32 offset:8
-; GCN-SDAG-NEXT: scratch_store_b32 off, v42, s32 offset:4
-; GCN-SDAG-NEXT: scratch_store_b32 off, v43, s32
+; GCN-SDAG-NEXT: scratch_store_b32 off, v40, s32 offset:12 scope:SCOPE_SE
+; GCN-SDAG-NEXT: scratch_store_b32 off, v41, s32 offset:8 scope:SCOPE_SE
+; GCN-SDAG-NEXT: scratch_store_b32 off, v42, s32 offset:4 scope:SCOPE_SE
+; GCN-SDAG-NEXT: scratch_store_b32 off, v43, s32 scope:SCOPE_SE
; GCN-SDAG-NEXT: s_clause 0x7
; GCN-SDAG-NEXT: global_load_b128 v[10:13], v[0:1], off offset:112
; GCN-SDAG-NEXT: global_load_b128 v[18:21], v[0:1], off offset:96
@@ -385,12 +385,12 @@ define i64 @test_v16i64_load_store(ptr addrspace(1) %ptr_a, ptr addrspace(1) %pt
; GCN-GISEL-NEXT: s_wait_loadcnt_dscnt 0x0
; GCN-GISEL-NEXT: s_wait_kmcnt 0x0
; GCN-GISEL-NEXT: s_clause 0x5
-; GCN-GISEL-NEXT: scratch_store_b32 off, v40, s32 offset:20
-; GCN-GISEL-NEXT: scratch_store_b32 off, v41, s32 offset:16
-; GCN-GISEL-NEXT: scratch_store_b32 off, v42, s32 offset:12
-; GCN-GISEL-NEXT: scratch_store_b32 off, v43, s32 offset:8
-; GCN-GISEL-NEXT: scratch_store_b32 off, v44, s32 offset:4
-; GCN-GISEL-NEXT: scratch_store_b32 off, v45, s32
+; GCN-GISEL-NEXT: scratch_store_b32 off, v40, s32 offset:20 scope:SCOPE_SE
+; GCN-GISEL-NEXT: scratch_store_b32 off, v41, s32 offset:16 scope:SCOPE_SE
+; GCN-GISEL-NEXT: scratch_store_b32 off, v42, s32 offset:12 scope:SCOPE_SE
+; GCN-GISEL-NEXT: scratch_store_b32 off, v43, s32 offset:8 scope:SCOPE_SE
+; GCN-GISEL-NEXT: scratch_store_b32 off, v44, s32 offset:4 scope:SCOPE_SE
+; GCN-GISEL-NEXT: scratch_store_b32 off, v45, s32 scope:SCOPE_SE
; GCN-GISEL-NEXT: s_clause 0x7
; GCN-GISEL-NEXT: global_load_b128 v[6:9], v[0:1], off offset:80
; GCN-GISEL-NEXT: global_load_b128 v[10:13], v[0:1], off
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.gfx942.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.gfx942.ll
index 7d85d34..beda16c 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.gfx942.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.gfx942.ll
@@ -1,13 +1,9 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx942 < %s | FileCheck --check-prefixes=GFX942,GFX942-VGPRCD,GFX942-SDAG,GFX942-VGPRCD-SDAG %s
; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx942 < %s | FileCheck --check-prefixes=GFX942,GFX942-VGPRCD,GFX942-GISEL,GFX942-VGPRCD-GISEL %s
-; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx942 -stress-regalloc=10 < %s | FileCheck --check-prefixes=GFX942,GFX942-AGPRCD,GFX942-SDAG,GFX942-AGPRCD-SDAG %s
-; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx942 -stress-regalloc=10 < %s | FileCheck --check-prefixes=GFX942,GFX942-AGPRCD,GFX942-GISEL,GFX942-AGPRCD-GISEL %s
; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx950 < %s | FileCheck --check-prefixes=GFX950,GFX950-VGPRCD,GFX950-SDAG,GFX950-VGPRCD-SDAG %s
; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx950 < %s | FileCheck --check-prefixes=GFX950,GFX950-VGPRCD,GFX950-GISEL,GFX950-VGPRCD-GISEL %s
-; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx950 -stress-regalloc=10 < %s | FileCheck --check-prefixes=GFX950,GFX950-AGPRCD,GFX950-SDAG,GFX950-AGPRCD-SDAG %s
-; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx950 -stress-regalloc=10 < %s | FileCheck --check-prefixes=GFX950,GFX950-AGPRCD,GFX950-GISEL,GFX950-AGPRCD-GISEL %s
declare <4 x i32> @llvm.amdgcn.mfma.i32.16x16x32.i8(i64, i64, <4 x i32>, i32, i32, i32)
declare <16 x i32> @llvm.amdgcn.mfma.i32.32x32x16.i8(i64, i64, <16 x i32>, i32, i32, i32)
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.gfx950.bf16.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.gfx950.bf16.ll
index d358837..8081a15 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.gfx950.bf16.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.gfx950.bf16.ll
@@ -252,62 +252,55 @@ define amdgpu_kernel void @test_mfma_f32_32x32x16_bf16__vgprcd(<8 x bfloat> %arg
; GCN-NEXT: s_load_dwordx8 s[24:31], s[4:5], 0x24
; GCN-NEXT: s_load_dwordx16 s[8:23], s[4:5], 0x64
; GCN-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0xa4
-; GCN-NEXT: v_mov_b32_e32 v8, 0
+; GCN-NEXT: v_mov_b32_e32 v44, 0
; GCN-NEXT: s_waitcnt lgkmcnt(0)
-; GCN-NEXT: v_mov_b64_e32 v[0:1], s[24:25]
-; GCN-NEXT: v_mov_b64_e32 v[2:3], s[26:27]
-; GCN-NEXT: v_mov_b64_e32 v[4:5], s[28:29]
-; GCN-NEXT: v_accvgpr_write_b32 a31, s23
-; GCN-NEXT: v_mov_b64_e32 v[6:7], s[30:31]
-; GCN-NEXT: v_accvgpr_write_b32 a30, s22
-; GCN-NEXT: v_accvgpr_write_b32 a29, s21
-; GCN-NEXT: v_accvgpr_write_b32 a28, s20
-; GCN-NEXT: v_accvgpr_write_b32 a27, s19
-; GCN-NEXT: v_accvgpr_write_b32 a26, s18
-; GCN-NEXT: v_accvgpr_write_b32 a25, s17
-; GCN-NEXT: v_accvgpr_write_b32 a24, s16
-; GCN-NEXT: v_accvgpr_write_b32 a23, s15
-; GCN-NEXT: v_accvgpr_write_b32 a22, s14
-; GCN-NEXT: v_accvgpr_write_b32 a21, s13
-; GCN-NEXT: v_accvgpr_write_b32 a20, s12
-; GCN-NEXT: v_accvgpr_write_b32 a19, s11
-; GCN-NEXT: v_accvgpr_write_b32 a18, s10
-; GCN-NEXT: v_accvgpr_write_b32 a17, s9
-; GCN-NEXT: v_accvgpr_write_b32 a16, s8
-; GCN-NEXT: v_mov_b32_e32 v10, s20
-; GCN-NEXT: v_mov_b32_e32 v11, s21
-; GCN-NEXT: v_mfma_f32_32x32x16_bf16 a[0:15], v[0:3], v[4:7], a[16:31]
-; GCN-NEXT: v_mov_b32_e32 v12, s22
-; GCN-NEXT: v_mov_b32_e32 v13, s23
-; GCN-NEXT: v_mov_b32_e32 v0, s16
-; GCN-NEXT: v_mov_b32_e32 v1, s17
-; GCN-NEXT: v_mov_b32_e32 v2, s18
-; GCN-NEXT: v_mov_b32_e32 v3, s19
-; GCN-NEXT: global_store_dwordx4 v8, v[10:13], s[0:1] offset:48 sc0 sc1
-; GCN-NEXT: s_waitcnt vmcnt(0)
-; GCN-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:32 sc0 sc1
+; GCN-NEXT: v_mov_b64_e32 v[34:35], s[26:27]
+; GCN-NEXT: v_mov_b64_e32 v[32:33], s[24:25]
+; GCN-NEXT: v_mov_b64_e32 v[38:39], s[30:31]
+; GCN-NEXT: v_mov_b64_e32 v[30:31], s[22:23]
+; GCN-NEXT: v_mov_b64_e32 v[36:37], s[28:29]
+; GCN-NEXT: v_mov_b64_e32 v[28:29], s[20:21]
+; GCN-NEXT: v_mov_b64_e32 v[26:27], s[18:19]
+; GCN-NEXT: v_mov_b64_e32 v[24:25], s[16:17]
+; GCN-NEXT: v_mov_b64_e32 v[22:23], s[14:15]
+; GCN-NEXT: v_mov_b64_e32 v[20:21], s[12:13]
+; GCN-NEXT: v_mov_b64_e32 v[18:19], s[10:11]
+; GCN-NEXT: v_mov_b64_e32 v[16:17], s[8:9]
+; GCN-NEXT: v_mov_b32_e32 v40, s20
+; GCN-NEXT: v_mov_b32_e32 v41, s21
+; GCN-NEXT: v_mfma_f32_32x32x16_bf16 v[0:15], v[32:35], v[36:39], v[16:31]
+; GCN-NEXT: v_mov_b32_e32 v42, s22
+; GCN-NEXT: v_mov_b32_e32 v43, s23
+; GCN-NEXT: global_store_dwordx4 v44, v[40:43], s[0:1] offset:48 sc0 sc1
+; GCN-NEXT: s_waitcnt vmcnt(0)
+; GCN-NEXT: s_nop 2
+; GCN-NEXT: v_mov_b32_e32 v16, s16
+; GCN-NEXT: v_mov_b32_e32 v17, s17
+; GCN-NEXT: v_mov_b32_e32 v18, s18
+; GCN-NEXT: v_mov_b32_e32 v19, s19
+; GCN-NEXT: global_store_dwordx4 v44, v[16:19], s[0:1] offset:32 sc0 sc1
; GCN-NEXT: s_waitcnt vmcnt(0)
; GCN-NEXT: s_nop 0
-; GCN-NEXT: v_mov_b32_e32 v0, s12
-; GCN-NEXT: v_mov_b32_e32 v1, s13
-; GCN-NEXT: v_mov_b32_e32 v2, s14
-; GCN-NEXT: v_mov_b32_e32 v3, s15
-; GCN-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16 sc0 sc1
+; GCN-NEXT: v_mov_b32_e32 v16, s12
+; GCN-NEXT: v_mov_b32_e32 v17, s13
+; GCN-NEXT: v_mov_b32_e32 v18, s14
+; GCN-NEXT: v_mov_b32_e32 v19, s15
+; GCN-NEXT: global_store_dwordx4 v44, v[16:19], s[0:1] offset:16 sc0 sc1
; GCN-NEXT: s_waitcnt vmcnt(0)
; GCN-NEXT: s_nop 0
-; GCN-NEXT: v_mov_b32_e32 v0, s8
-; GCN-NEXT: v_mov_b32_e32 v1, s9
-; GCN-NEXT: v_mov_b32_e32 v2, s10
-; GCN-NEXT: v_mov_b32_e32 v3, s11
-; GCN-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] sc0 sc1
+; GCN-NEXT: v_mov_b32_e32 v16, s8
+; GCN-NEXT: v_mov_b32_e32 v17, s9
+; GCN-NEXT: v_mov_b32_e32 v18, s10
+; GCN-NEXT: v_mov_b32_e32 v19, s11
+; GCN-NEXT: global_store_dwordx4 v44, v[16:19], s[0:1] sc0 sc1
; GCN-NEXT: s_waitcnt vmcnt(0)
-; GCN-NEXT: global_store_dwordx4 v8, a[8:11], s[0:1] offset:32 sc0 sc1
+; GCN-NEXT: global_store_dwordx4 v44, v[8:11], s[0:1] offset:32 sc0 sc1
; GCN-NEXT: s_waitcnt vmcnt(0)
-; GCN-NEXT: global_store_dwordx4 v8, a[12:15], s[0:1] offset:48 sc0 sc1
+; GCN-NEXT: global_store_dwordx4 v44, v[12:15], s[0:1] offset:48 sc0 sc1
; GCN-NEXT: s_waitcnt vmcnt(0)
-; GCN-NEXT: global_store_dwordx4 v8, a[0:3], s[0:1] sc0 sc1
+; GCN-NEXT: global_store_dwordx4 v44, v[0:3], s[0:1] sc0 sc1
; GCN-NEXT: s_waitcnt vmcnt(0)
-; GCN-NEXT: global_store_dwordx4 v8, a[4:7], s[0:1] offset:16 sc0 sc1
+; GCN-NEXT: global_store_dwordx4 v44, v[4:7], s[0:1] offset:16 sc0 sc1
; GCN-NEXT: s_waitcnt vmcnt(0)
; GCN-NEXT: s_endpgm
%result = call <16 x float> @llvm.amdgcn.mfma.f32.32x32x16.bf16(<8 x bfloat> %arg0, <8 x bfloat> %arg1, <16 x float> %arg2, i32 0, i32 0, i32 0)
@@ -322,62 +315,55 @@ define amdgpu_kernel void @test_mfma_f32_32x32x16_bf16__vgprcd__flags(<8 x bfloa
; GCN-NEXT: s_load_dwordx8 s[24:31], s[4:5], 0x24
; GCN-NEXT: s_load_dwordx16 s[8:23], s[4:5], 0x64
; GCN-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0xa4
-; GCN-NEXT: v_mov_b32_e32 v8, 0
+; GCN-NEXT: v_mov_b32_e32 v44, 0
; GCN-NEXT: s_waitcnt lgkmcnt(0)
-; GCN-NEXT: v_mov_b64_e32 v[0:1], s[24:25]
-; GCN-NEXT: v_mov_b64_e32 v[2:3], s[26:27]
-; GCN-NEXT: v_mov_b64_e32 v[4:5], s[28:29]
-; GCN-NEXT: v_accvgpr_write_b32 a31, s23
-; GCN-NEXT: v_mov_b64_e32 v[6:7], s[30:31]
-; GCN-NEXT: v_accvgpr_write_b32 a30, s22
-; GCN-NEXT: v_accvgpr_write_b32 a29, s21
-; GCN-NEXT: v_accvgpr_write_b32 a28, s20
-; GCN-NEXT: v_accvgpr_write_b32 a27, s19
-; GCN-NEXT: v_accvgpr_write_b32 a26, s18
-; GCN-NEXT: v_accvgpr_write_b32 a25, s17
-; GCN-NEXT: v_accvgpr_write_b32 a24, s16
-; GCN-NEXT: v_accvgpr_write_b32 a23, s15
-; GCN-NEXT: v_accvgpr_write_b32 a22, s14
-; GCN-NEXT: v_accvgpr_write_b32 a21, s13
-; GCN-NEXT: v_accvgpr_write_b32 a20, s12
-; GCN-NEXT: v_accvgpr_write_b32 a19, s11
-; GCN-NEXT: v_accvgpr_write_b32 a18, s10
-; GCN-NEXT: v_accvgpr_write_b32 a17, s9
-; GCN-NEXT: v_accvgpr_write_b32 a16, s8
-; GCN-NEXT: v_mov_b32_e32 v10, s20
-; GCN-NEXT: v_mov_b32_e32 v11, s21
-; GCN-NEXT: v_mfma_f32_32x32x16_bf16 a[0:15], v[0:3], v[4:7], a[16:31] cbsz:1 abid:2 blgp:3
-; GCN-NEXT: v_mov_b32_e32 v12, s22
-; GCN-NEXT: v_mov_b32_e32 v13, s23
-; GCN-NEXT: v_mov_b32_e32 v0, s16
-; GCN-NEXT: v_mov_b32_e32 v1, s17
-; GCN-NEXT: v_mov_b32_e32 v2, s18
-; GCN-NEXT: v_mov_b32_e32 v3, s19
-; GCN-NEXT: global_store_dwordx4 v8, v[10:13], s[0:1] offset:48 sc0 sc1
-; GCN-NEXT: s_waitcnt vmcnt(0)
-; GCN-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:32 sc0 sc1
+; GCN-NEXT: v_mov_b64_e32 v[34:35], s[26:27]
+; GCN-NEXT: v_mov_b64_e32 v[32:33], s[24:25]
+; GCN-NEXT: v_mov_b64_e32 v[38:39], s[30:31]
+; GCN-NEXT: v_mov_b64_e32 v[30:31], s[22:23]
+; GCN-NEXT: v_mov_b64_e32 v[36:37], s[28:29]
+; GCN-NEXT: v_mov_b64_e32 v[28:29], s[20:21]
+; GCN-NEXT: v_mov_b64_e32 v[26:27], s[18:19]
+; GCN-NEXT: v_mov_b64_e32 v[24:25], s[16:17]
+; GCN-NEXT: v_mov_b64_e32 v[22:23], s[14:15]
+; GCN-NEXT: v_mov_b64_e32 v[20:21], s[12:13]
+; GCN-NEXT: v_mov_b64_e32 v[18:19], s[10:11]
+; GCN-NEXT: v_mov_b64_e32 v[16:17], s[8:9]
+; GCN-NEXT: v_mov_b32_e32 v40, s20
+; GCN-NEXT: v_mov_b32_e32 v41, s21
+; GCN-NEXT: v_mfma_f32_32x32x16_bf16 v[0:15], v[32:35], v[36:39], v[16:31] cbsz:1 abid:2 blgp:3
+; GCN-NEXT: v_mov_b32_e32 v42, s22
+; GCN-NEXT: v_mov_b32_e32 v43, s23
+; GCN-NEXT: global_store_dwordx4 v44, v[40:43], s[0:1] offset:48 sc0 sc1
+; GCN-NEXT: s_waitcnt vmcnt(0)
+; GCN-NEXT: s_nop 2
+; GCN-NEXT: v_mov_b32_e32 v16, s16
+; GCN-NEXT: v_mov_b32_e32 v17, s17
+; GCN-NEXT: v_mov_b32_e32 v18, s18
+; GCN-NEXT: v_mov_b32_e32 v19, s19
+; GCN-NEXT: global_store_dwordx4 v44, v[16:19], s[0:1] offset:32 sc0 sc1
; GCN-NEXT: s_waitcnt vmcnt(0)
; GCN-NEXT: s_nop 0
-; GCN-NEXT: v_mov_b32_e32 v0, s12
-; GCN-NEXT: v_mov_b32_e32 v1, s13
-; GCN-NEXT: v_mov_b32_e32 v2, s14
-; GCN-NEXT: v_mov_b32_e32 v3, s15
-; GCN-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16 sc0 sc1
+; GCN-NEXT: v_mov_b32_e32 v16, s12
+; GCN-NEXT: v_mov_b32_e32 v17, s13
+; GCN-NEXT: v_mov_b32_e32 v18, s14
+; GCN-NEXT: v_mov_b32_e32 v19, s15
+; GCN-NEXT: global_store_dwordx4 v44, v[16:19], s[0:1] offset:16 sc0 sc1
; GCN-NEXT: s_waitcnt vmcnt(0)
; GCN-NEXT: s_nop 0
-; GCN-NEXT: v_mov_b32_e32 v0, s8
-; GCN-NEXT: v_mov_b32_e32 v1, s9
-; GCN-NEXT: v_mov_b32_e32 v2, s10
-; GCN-NEXT: v_mov_b32_e32 v3, s11
-; GCN-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] sc0 sc1
+; GCN-NEXT: v_mov_b32_e32 v16, s8
+; GCN-NEXT: v_mov_b32_e32 v17, s9
+; GCN-NEXT: v_mov_b32_e32 v18, s10
+; GCN-NEXT: v_mov_b32_e32 v19, s11
+; GCN-NEXT: global_store_dwordx4 v44, v[16:19], s[0:1] sc0 sc1
; GCN-NEXT: s_waitcnt vmcnt(0)
-; GCN-NEXT: global_store_dwordx4 v8, a[8:11], s[0:1] offset:32 sc0 sc1
+; GCN-NEXT: global_store_dwordx4 v44, v[8:11], s[0:1] offset:32 sc0 sc1
; GCN-NEXT: s_waitcnt vmcnt(0)
-; GCN-NEXT: global_store_dwordx4 v8, a[12:15], s[0:1] offset:48 sc0 sc1
+; GCN-NEXT: global_store_dwordx4 v44, v[12:15], s[0:1] offset:48 sc0 sc1
; GCN-NEXT: s_waitcnt vmcnt(0)
-; GCN-NEXT: global_store_dwordx4 v8, a[0:3], s[0:1] sc0 sc1
+; GCN-NEXT: global_store_dwordx4 v44, v[0:3], s[0:1] sc0 sc1
; GCN-NEXT: s_waitcnt vmcnt(0)
-; GCN-NEXT: global_store_dwordx4 v8, a[4:7], s[0:1] offset:16 sc0 sc1
+; GCN-NEXT: global_store_dwordx4 v44, v[4:7], s[0:1] offset:16 sc0 sc1
; GCN-NEXT: s_waitcnt vmcnt(0)
; GCN-NEXT: s_endpgm
%result = call <16 x float> @llvm.amdgcn.mfma.f32.32x32x16.bf16(<8 x bfloat> %arg0, <8 x bfloat> %arg1, <16 x float> %arg2, i32 1, i32 2, i32 3)
@@ -393,35 +379,27 @@ define amdgpu_kernel void @test_mfma_f32_32x32x16_bf16__vgprcd_mac(<8 x bfloat>
; GCN-NEXT: s_load_dwordx16 s[8:23], s[4:5], 0x64
; GCN-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0xa4
; GCN-NEXT: s_waitcnt lgkmcnt(0)
-; GCN-NEXT: v_mov_b64_e32 v[0:1], s[24:25]
-; GCN-NEXT: v_mov_b64_e32 v[2:3], s[26:27]
-; GCN-NEXT: v_mov_b64_e32 v[4:5], s[28:29]
-; GCN-NEXT: v_accvgpr_write_b32 a0, s8
-; GCN-NEXT: v_mov_b64_e32 v[6:7], s[30:31]
-; GCN-NEXT: v_accvgpr_write_b32 a1, s9
-; GCN-NEXT: v_accvgpr_write_b32 a2, s10
-; GCN-NEXT: v_accvgpr_write_b32 a3, s11
-; GCN-NEXT: v_accvgpr_write_b32 a4, s12
-; GCN-NEXT: v_accvgpr_write_b32 a5, s13
-; GCN-NEXT: v_accvgpr_write_b32 a6, s14
-; GCN-NEXT: v_accvgpr_write_b32 a7, s15
-; GCN-NEXT: v_accvgpr_write_b32 a8, s16
-; GCN-NEXT: v_accvgpr_write_b32 a9, s17
-; GCN-NEXT: v_accvgpr_write_b32 a10, s18
-; GCN-NEXT: v_accvgpr_write_b32 a11, s19
-; GCN-NEXT: v_accvgpr_write_b32 a12, s20
-; GCN-NEXT: v_accvgpr_write_b32 a13, s21
-; GCN-NEXT: v_accvgpr_write_b32 a14, s22
-; GCN-NEXT: v_accvgpr_write_b32 a15, s23
+; GCN-NEXT: v_mov_b64_e32 v[16:17], s[24:25]
+; GCN-NEXT: v_mov_b64_e32 v[18:19], s[26:27]
+; GCN-NEXT: v_mov_b64_e32 v[20:21], s[28:29]
+; GCN-NEXT: v_mov_b64_e32 v[0:1], s[8:9]
+; GCN-NEXT: v_mov_b64_e32 v[22:23], s[30:31]
+; GCN-NEXT: v_mov_b64_e32 v[2:3], s[10:11]
+; GCN-NEXT: v_mov_b64_e32 v[4:5], s[12:13]
+; GCN-NEXT: v_mov_b64_e32 v[6:7], s[14:15]
+; GCN-NEXT: v_mov_b64_e32 v[8:9], s[16:17]
+; GCN-NEXT: v_mov_b64_e32 v[10:11], s[18:19]
+; GCN-NEXT: v_mov_b64_e32 v[12:13], s[20:21]
+; GCN-NEXT: v_mov_b64_e32 v[14:15], s[22:23]
; GCN-NEXT: s_nop 1
-; GCN-NEXT: v_mfma_f32_32x32x16_bf16 a[0:15], v[0:3], v[4:7], a[0:15]
-; GCN-NEXT: v_mov_b32_e32 v0, 0
+; GCN-NEXT: v_mfma_f32_32x32x16_bf16 v[0:15], v[16:19], v[20:23], v[0:15]
+; GCN-NEXT: v_mov_b32_e32 v16, 0
; GCN-NEXT: s_nop 7
; GCN-NEXT: s_nop 2
-; GCN-NEXT: global_store_dwordx4 v0, a[12:15], s[0:1] offset:48
-; GCN-NEXT: global_store_dwordx4 v0, a[8:11], s[0:1] offset:32
-; GCN-NEXT: global_store_dwordx4 v0, a[4:7], s[0:1] offset:16
-; GCN-NEXT: global_store_dwordx4 v0, a[0:3], s[0:1]
+; GCN-NEXT: global_store_dwordx4 v16, v[12:15], s[0:1] offset:48
+; GCN-NEXT: global_store_dwordx4 v16, v[8:11], s[0:1] offset:32
+; GCN-NEXT: global_store_dwordx4 v16, v[4:7], s[0:1] offset:16
+; GCN-NEXT: global_store_dwordx4 v16, v[0:3], s[0:1]
; GCN-NEXT: s_endpgm
%result = call <16 x float> @llvm.amdgcn.mfma.f32.32x32x16.bf16(<8 x bfloat> %arg0, <8 x bfloat> %arg1, <16 x float> %arg2, i32 0, i32 0, i32 0)
store <16 x float> %result, ptr addrspace(1) %out
@@ -435,40 +413,32 @@ define amdgpu_kernel void @test_mfma_f32_32x32x16_bf16__vgprcd_mac_flags(<8 x bf
; GCN-NEXT: s_load_dwordx16 s[8:23], s[4:5], 0x64
; GCN-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0xa4
; GCN-NEXT: s_waitcnt lgkmcnt(0)
-; GCN-NEXT: v_mov_b64_e32 v[0:1], s[24:25]
-; GCN-NEXT: v_mov_b64_e32 v[2:3], s[26:27]
-; GCN-NEXT: v_mov_b64_e32 v[4:5], s[28:29]
-; GCN-NEXT: v_accvgpr_write_b32 a0, s8
-; GCN-NEXT: v_mov_b64_e32 v[6:7], s[30:31]
-; GCN-NEXT: v_accvgpr_write_b32 a1, s9
-; GCN-NEXT: v_accvgpr_write_b32 a2, s10
-; GCN-NEXT: v_accvgpr_write_b32 a3, s11
-; GCN-NEXT: v_accvgpr_write_b32 a4, s12
-; GCN-NEXT: v_accvgpr_write_b32 a5, s13
-; GCN-NEXT: v_accvgpr_write_b32 a6, s14
-; GCN-NEXT: v_accvgpr_write_b32 a7, s15
-; GCN-NEXT: v_accvgpr_write_b32 a8, s16
-; GCN-NEXT: v_accvgpr_write_b32 a9, s17
-; GCN-NEXT: v_accvgpr_write_b32 a10, s18
-; GCN-NEXT: v_accvgpr_write_b32 a11, s19
-; GCN-NEXT: v_accvgpr_write_b32 a12, s20
-; GCN-NEXT: v_accvgpr_write_b32 a13, s21
-; GCN-NEXT: v_accvgpr_write_b32 a14, s22
-; GCN-NEXT: v_accvgpr_write_b32 a15, s23
+; GCN-NEXT: v_mov_b64_e32 v[16:17], s[24:25]
+; GCN-NEXT: v_mov_b64_e32 v[18:19], s[26:27]
+; GCN-NEXT: v_mov_b64_e32 v[20:21], s[28:29]
+; GCN-NEXT: v_mov_b64_e32 v[0:1], s[8:9]
+; GCN-NEXT: v_mov_b64_e32 v[22:23], s[30:31]
+; GCN-NEXT: v_mov_b64_e32 v[2:3], s[10:11]
+; GCN-NEXT: v_mov_b64_e32 v[4:5], s[12:13]
+; GCN-NEXT: v_mov_b64_e32 v[6:7], s[14:15]
+; GCN-NEXT: v_mov_b64_e32 v[8:9], s[16:17]
+; GCN-NEXT: v_mov_b64_e32 v[10:11], s[18:19]
+; GCN-NEXT: v_mov_b64_e32 v[12:13], s[20:21]
+; GCN-NEXT: v_mov_b64_e32 v[14:15], s[22:23]
; GCN-NEXT: s_nop 1
-; GCN-NEXT: v_mfma_f32_32x32x16_bf16 a[0:15], v[0:3], v[4:7], a[0:15] cbsz:3 abid:2 blgp:1
-; GCN-NEXT: v_mov_b32_e32 v0, 0
+; GCN-NEXT: v_mfma_f32_32x32x16_bf16 v[0:15], v[16:19], v[20:23], v[0:15] cbsz:3 abid:2 blgp:1
+; GCN-NEXT: v_mov_b32_e32 v16, 0
; GCN-NEXT: s_nop 7
; GCN-NEXT: s_nop 2
-; GCN-NEXT: global_store_dwordx4 v0, a[12:15], s[0:1] offset:48
-; GCN-NEXT: global_store_dwordx4 v0, a[8:11], s[0:1] offset:32
-; GCN-NEXT: global_store_dwordx4 v0, a[4:7], s[0:1] offset:16
-; GCN-NEXT: global_store_dwordx4 v0, a[0:3], s[0:1]
+; GCN-NEXT: global_store_dwordx4 v16, v[12:15], s[0:1] offset:48
+; GCN-NEXT: global_store_dwordx4 v16, v[8:11], s[0:1] offset:32
+; GCN-NEXT: global_store_dwordx4 v16, v[4:7], s[0:1] offset:16
+; GCN-NEXT: global_store_dwordx4 v16, v[0:3], s[0:1]
; GCN-NEXT: s_endpgm
%result = call <16 x float> @llvm.amdgcn.mfma.f32.32x32x16.bf16(<8 x bfloat> %arg0, <8 x bfloat> %arg1, <16 x float> %arg2, i32 3, i32 2, i32 1)
store <16 x float> %result, ptr addrspace(1) %out
ret void
}
-attributes #0 = { "amdgpu-flat-work-group-size"="512,512" }
+attributes #0 = { "amdgpu-flat-work-group-size"="512,512" "amdgpu-agpr-alloc"="0,0" }
attributes #1 = { "amdgpu-flat-work-group-size"="1,64" }
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.gfx950.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.gfx950.ll
index 21465be..d81ec1c 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.gfx950.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.gfx950.ll
@@ -141,20 +141,18 @@ define amdgpu_kernel void @test_mfma_f32_16x16x32_f16_no_agpr__vgprcd(ptr addrsp
; SDAG-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0x34
; SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x54
; SDAG-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x24
-; SDAG-NEXT: v_mov_b32_e32 v8, 0
+; SDAG-NEXT: v_mov_b32_e32 v12, 0
; SDAG-NEXT: s_waitcnt lgkmcnt(0)
; SDAG-NEXT: v_mov_b64_e32 v[0:1], s[8:9]
; SDAG-NEXT: v_mov_b64_e32 v[2:3], s[10:11]
; SDAG-NEXT: v_mov_b64_e32 v[4:5], s[12:13]
-; SDAG-NEXT: v_accvgpr_write_b32 a0, s0
+; SDAG-NEXT: v_mov_b64_e32 v[10:11], s[2:3]
; SDAG-NEXT: v_mov_b64_e32 v[6:7], s[14:15]
-; SDAG-NEXT: v_accvgpr_write_b32 a1, s1
-; SDAG-NEXT: v_accvgpr_write_b32 a2, s2
-; SDAG-NEXT: v_accvgpr_write_b32 a3, s3
+; SDAG-NEXT: v_mov_b64_e32 v[8:9], s[0:1]
; SDAG-NEXT: s_nop 1
-; SDAG-NEXT: v_mfma_f32_16x16x32_f16 a[0:3], v[0:3], v[4:7], a[0:3]
+; SDAG-NEXT: v_mfma_f32_16x16x32_f16 v[0:3], v[0:3], v[4:7], v[8:11]
; SDAG-NEXT: s_nop 7
-; SDAG-NEXT: global_store_dwordx4 v8, a[0:3], s[6:7]
+; SDAG-NEXT: global_store_dwordx4 v12, v[0:3], s[6:7]
; SDAG-NEXT: s_endpgm
;
; GISEL-LABEL: test_mfma_f32_16x16x32_f16_no_agpr__vgprcd:
@@ -166,16 +164,14 @@ define amdgpu_kernel void @test_mfma_f32_16x16x32_f16_no_agpr__vgprcd(ptr addrsp
; GISEL-NEXT: v_mov_b64_e32 v[0:1], s[8:9]
; GISEL-NEXT: v_mov_b64_e32 v[2:3], s[10:11]
; GISEL-NEXT: v_mov_b64_e32 v[4:5], s[12:13]
-; GISEL-NEXT: v_accvgpr_write_b32 a0, s0
+; GISEL-NEXT: v_mov_b64_e32 v[10:11], s[2:3]
; GISEL-NEXT: v_mov_b64_e32 v[6:7], s[14:15]
-; GISEL-NEXT: v_accvgpr_write_b32 a1, s1
-; GISEL-NEXT: v_accvgpr_write_b32 a2, s2
-; GISEL-NEXT: v_accvgpr_write_b32 a3, s3
+; GISEL-NEXT: v_mov_b64_e32 v[8:9], s[0:1]
; GISEL-NEXT: s_nop 1
-; GISEL-NEXT: v_mfma_f32_16x16x32_f16 a[0:3], v[0:3], v[4:7], a[0:3]
-; GISEL-NEXT: v_mov_b32_e32 v0, 0
+; GISEL-NEXT: v_mfma_f32_16x16x32_f16 v[0:3], v[0:3], v[4:7], v[8:11]
+; GISEL-NEXT: v_mov_b32_e32 v4, 0
; GISEL-NEXT: s_nop 6
-; GISEL-NEXT: global_store_dwordx4 v0, a[0:3], s[6:7]
+; GISEL-NEXT: global_store_dwordx4 v4, v[0:3], s[6:7]
; GISEL-NEXT: s_endpgm
;
; HEURRC-LABEL: test_mfma_f32_16x16x32_f16_no_agpr__vgprcd:
@@ -183,20 +179,18 @@ define amdgpu_kernel void @test_mfma_f32_16x16x32_f16_no_agpr__vgprcd(ptr addrsp
; HEURRC-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0x34
; HEURRC-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x54
; HEURRC-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x24
-; HEURRC-NEXT: v_mov_b32_e32 v8, 0
+; HEURRC-NEXT: v_mov_b32_e32 v12, 0
; HEURRC-NEXT: s_waitcnt lgkmcnt(0)
; HEURRC-NEXT: v_mov_b64_e32 v[0:1], s[8:9]
; HEURRC-NEXT: v_mov_b64_e32 v[2:3], s[10:11]
; HEURRC-NEXT: v_mov_b64_e32 v[4:5], s[12:13]
-; HEURRC-NEXT: v_accvgpr_write_b32 a0, s0
+; HEURRC-NEXT: v_mov_b64_e32 v[10:11], s[2:3]
; HEURRC-NEXT: v_mov_b64_e32 v[6:7], s[14:15]
-; HEURRC-NEXT: v_accvgpr_write_b32 a1, s1
-; HEURRC-NEXT: v_accvgpr_write_b32 a2, s2
-; HEURRC-NEXT: v_accvgpr_write_b32 a3, s3
+; HEURRC-NEXT: v_mov_b64_e32 v[8:9], s[0:1]
; HEURRC-NEXT: s_nop 1
-; HEURRC-NEXT: v_mfma_f32_16x16x32_f16 a[0:3], v[0:3], v[4:7], a[0:3]
+; HEURRC-NEXT: v_mfma_f32_16x16x32_f16 v[0:3], v[0:3], v[4:7], v[8:11]
; HEURRC-NEXT: s_nop 7
-; HEURRC-NEXT: global_store_dwordx4 v8, a[0:3], s[6:7]
+; HEURRC-NEXT: global_store_dwordx4 v12, v[0:3], s[6:7]
; HEURRC-NEXT: s_endpgm
;
; VGPRRC-LABEL: test_mfma_f32_16x16x32_f16_no_agpr__vgprcd:
@@ -266,20 +260,18 @@ define amdgpu_kernel void @test_mfma_f32_16x16x32_f16_no_agpr__vgprcd__flags(ptr
; SDAG-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0x34
; SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x54
; SDAG-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x24
-; SDAG-NEXT: v_mov_b32_e32 v8, 0
+; SDAG-NEXT: v_mov_b32_e32 v12, 0
; SDAG-NEXT: s_waitcnt lgkmcnt(0)
; SDAG-NEXT: v_mov_b64_e32 v[0:1], s[8:9]
; SDAG-NEXT: v_mov_b64_e32 v[2:3], s[10:11]
; SDAG-NEXT: v_mov_b64_e32 v[4:5], s[12:13]
-; SDAG-NEXT: v_accvgpr_write_b32 a0, s0
+; SDAG-NEXT: v_mov_b64_e32 v[10:11], s[2:3]
; SDAG-NEXT: v_mov_b64_e32 v[6:7], s[14:15]
-; SDAG-NEXT: v_accvgpr_write_b32 a1, s1
-; SDAG-NEXT: v_accvgpr_write_b32 a2, s2
-; SDAG-NEXT: v_accvgpr_write_b32 a3, s3
+; SDAG-NEXT: v_mov_b64_e32 v[8:9], s[0:1]
; SDAG-NEXT: s_nop 1
-; SDAG-NEXT: v_mfma_f32_16x16x32_f16 a[0:3], v[0:3], v[4:7], a[0:3] cbsz:3 abid:2 blgp:1
+; SDAG-NEXT: v_mfma_f32_16x16x32_f16 v[0:3], v[0:3], v[4:7], v[8:11] cbsz:3 abid:2 blgp:1
; SDAG-NEXT: s_nop 7
-; SDAG-NEXT: global_store_dwordx4 v8, a[0:3], s[6:7]
+; SDAG-NEXT: global_store_dwordx4 v12, v[0:3], s[6:7]
; SDAG-NEXT: s_endpgm
;
; GISEL-LABEL: test_mfma_f32_16x16x32_f16_no_agpr__vgprcd__flags:
@@ -291,16 +283,14 @@ define amdgpu_kernel void @test_mfma_f32_16x16x32_f16_no_agpr__vgprcd__flags(ptr
; GISEL-NEXT: v_mov_b64_e32 v[0:1], s[8:9]
; GISEL-NEXT: v_mov_b64_e32 v[2:3], s[10:11]
; GISEL-NEXT: v_mov_b64_e32 v[4:5], s[12:13]
-; GISEL-NEXT: v_accvgpr_write_b32 a0, s0
+; GISEL-NEXT: v_mov_b64_e32 v[10:11], s[2:3]
; GISEL-NEXT: v_mov_b64_e32 v[6:7], s[14:15]
-; GISEL-NEXT: v_accvgpr_write_b32 a1, s1
-; GISEL-NEXT: v_accvgpr_write_b32 a2, s2
-; GISEL-NEXT: v_accvgpr_write_b32 a3, s3
+; GISEL-NEXT: v_mov_b64_e32 v[8:9], s[0:1]
; GISEL-NEXT: s_nop 1
-; GISEL-NEXT: v_mfma_f32_16x16x32_f16 a[0:3], v[0:3], v[4:7], a[0:3] cbsz:3 abid:2 blgp:1
-; GISEL-NEXT: v_mov_b32_e32 v0, 0
+; GISEL-NEXT: v_mfma_f32_16x16x32_f16 v[0:3], v[0:3], v[4:7], v[8:11] cbsz:3 abid:2 blgp:1
+; GISEL-NEXT: v_mov_b32_e32 v4, 0
; GISEL-NEXT: s_nop 6
-; GISEL-NEXT: global_store_dwordx4 v0, a[0:3], s[6:7]
+; GISEL-NEXT: global_store_dwordx4 v4, v[0:3], s[6:7]
; GISEL-NEXT: s_endpgm
;
; HEURRC-LABEL: test_mfma_f32_16x16x32_f16_no_agpr__vgprcd__flags:
@@ -308,20 +298,18 @@ define amdgpu_kernel void @test_mfma_f32_16x16x32_f16_no_agpr__vgprcd__flags(ptr
; HEURRC-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0x34
; HEURRC-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x54
; HEURRC-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x24
-; HEURRC-NEXT: v_mov_b32_e32 v8, 0
+; HEURRC-NEXT: v_mov_b32_e32 v12, 0
; HEURRC-NEXT: s_waitcnt lgkmcnt(0)
; HEURRC-NEXT: v_mov_b64_e32 v[0:1], s[8:9]
; HEURRC-NEXT: v_mov_b64_e32 v[2:3], s[10:11]
; HEURRC-NEXT: v_mov_b64_e32 v[4:5], s[12:13]
-; HEURRC-NEXT: v_accvgpr_write_b32 a0, s0
+; HEURRC-NEXT: v_mov_b64_e32 v[10:11], s[2:3]
; HEURRC-NEXT: v_mov_b64_e32 v[6:7], s[14:15]
-; HEURRC-NEXT: v_accvgpr_write_b32 a1, s1
-; HEURRC-NEXT: v_accvgpr_write_b32 a2, s2
-; HEURRC-NEXT: v_accvgpr_write_b32 a3, s3
+; HEURRC-NEXT: v_mov_b64_e32 v[8:9], s[0:1]
; HEURRC-NEXT: s_nop 1
-; HEURRC-NEXT: v_mfma_f32_16x16x32_f16 a[0:3], v[0:3], v[4:7], a[0:3] cbsz:3 abid:2 blgp:1
+; HEURRC-NEXT: v_mfma_f32_16x16x32_f16 v[0:3], v[0:3], v[4:7], v[8:11] cbsz:3 abid:2 blgp:1
; HEURRC-NEXT: s_nop 7
-; HEURRC-NEXT: global_store_dwordx4 v8, a[0:3], s[6:7]
+; HEURRC-NEXT: global_store_dwordx4 v12, v[0:3], s[6:7]
; HEURRC-NEXT: s_endpgm
;
; VGPRRC-LABEL: test_mfma_f32_16x16x32_f16_no_agpr__vgprcd__flags:
@@ -1505,62 +1493,55 @@ define amdgpu_kernel void @test_mfma_f32_32x32x16_f16__vgprcd(<8 x half> %arg0,
; SDAG-NEXT: s_load_dwordx8 s[24:31], s[4:5], 0x24
; SDAG-NEXT: s_load_dwordx16 s[8:23], s[4:5], 0x64
; SDAG-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0xa4
-; SDAG-NEXT: v_mov_b32_e32 v8, 0
+; SDAG-NEXT: v_mov_b32_e32 v44, 0
; SDAG-NEXT: s_waitcnt lgkmcnt(0)
-; SDAG-NEXT: v_mov_b64_e32 v[0:1], s[24:25]
-; SDAG-NEXT: v_mov_b64_e32 v[2:3], s[26:27]
-; SDAG-NEXT: v_mov_b64_e32 v[4:5], s[28:29]
-; SDAG-NEXT: v_accvgpr_write_b32 a31, s23
-; SDAG-NEXT: v_mov_b64_e32 v[6:7], s[30:31]
-; SDAG-NEXT: v_accvgpr_write_b32 a30, s22
-; SDAG-NEXT: v_accvgpr_write_b32 a29, s21
-; SDAG-NEXT: v_accvgpr_write_b32 a28, s20
-; SDAG-NEXT: v_accvgpr_write_b32 a27, s19
-; SDAG-NEXT: v_accvgpr_write_b32 a26, s18
-; SDAG-NEXT: v_accvgpr_write_b32 a25, s17
-; SDAG-NEXT: v_accvgpr_write_b32 a24, s16
-; SDAG-NEXT: v_accvgpr_write_b32 a23, s15
-; SDAG-NEXT: v_accvgpr_write_b32 a22, s14
-; SDAG-NEXT: v_accvgpr_write_b32 a21, s13
-; SDAG-NEXT: v_accvgpr_write_b32 a20, s12
-; SDAG-NEXT: v_accvgpr_write_b32 a19, s11
-; SDAG-NEXT: v_accvgpr_write_b32 a18, s10
-; SDAG-NEXT: v_accvgpr_write_b32 a17, s9
-; SDAG-NEXT: v_accvgpr_write_b32 a16, s8
-; SDAG-NEXT: v_mov_b32_e32 v10, s20
-; SDAG-NEXT: v_mov_b32_e32 v11, s21
-; SDAG-NEXT: v_mfma_f32_32x32x16_f16 a[0:15], v[0:3], v[4:7], a[16:31]
-; SDAG-NEXT: v_mov_b32_e32 v12, s22
-; SDAG-NEXT: v_mov_b32_e32 v13, s23
-; SDAG-NEXT: v_mov_b32_e32 v0, s16
-; SDAG-NEXT: v_mov_b32_e32 v1, s17
-; SDAG-NEXT: v_mov_b32_e32 v2, s18
-; SDAG-NEXT: v_mov_b32_e32 v3, s19
-; SDAG-NEXT: global_store_dwordx4 v8, v[10:13], s[0:1] offset:48 sc0 sc1
+; SDAG-NEXT: v_mov_b64_e32 v[34:35], s[26:27]
+; SDAG-NEXT: v_mov_b64_e32 v[32:33], s[24:25]
+; SDAG-NEXT: v_mov_b64_e32 v[38:39], s[30:31]
+; SDAG-NEXT: v_mov_b64_e32 v[30:31], s[22:23]
+; SDAG-NEXT: v_mov_b64_e32 v[36:37], s[28:29]
+; SDAG-NEXT: v_mov_b64_e32 v[28:29], s[20:21]
+; SDAG-NEXT: v_mov_b64_e32 v[26:27], s[18:19]
+; SDAG-NEXT: v_mov_b64_e32 v[24:25], s[16:17]
+; SDAG-NEXT: v_mov_b64_e32 v[22:23], s[14:15]
+; SDAG-NEXT: v_mov_b64_e32 v[20:21], s[12:13]
+; SDAG-NEXT: v_mov_b64_e32 v[18:19], s[10:11]
+; SDAG-NEXT: v_mov_b64_e32 v[16:17], s[8:9]
+; SDAG-NEXT: v_mov_b32_e32 v40, s20
+; SDAG-NEXT: v_mov_b32_e32 v41, s21
+; SDAG-NEXT: v_mfma_f32_32x32x16_f16 v[0:15], v[32:35], v[36:39], v[16:31]
+; SDAG-NEXT: v_mov_b32_e32 v42, s22
+; SDAG-NEXT: v_mov_b32_e32 v43, s23
+; SDAG-NEXT: global_store_dwordx4 v44, v[40:43], s[0:1] offset:48 sc0 sc1
; SDAG-NEXT: s_waitcnt vmcnt(0)
-; SDAG-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:32 sc0 sc1
+; SDAG-NEXT: s_nop 2
+; SDAG-NEXT: v_mov_b32_e32 v16, s16
+; SDAG-NEXT: v_mov_b32_e32 v17, s17
+; SDAG-NEXT: v_mov_b32_e32 v18, s18
+; SDAG-NEXT: v_mov_b32_e32 v19, s19
+; SDAG-NEXT: global_store_dwordx4 v44, v[16:19], s[0:1] offset:32 sc0 sc1
; SDAG-NEXT: s_waitcnt vmcnt(0)
; SDAG-NEXT: s_nop 0
-; SDAG-NEXT: v_mov_b32_e32 v0, s12
-; SDAG-NEXT: v_mov_b32_e32 v1, s13
-; SDAG-NEXT: v_mov_b32_e32 v2, s14
-; SDAG-NEXT: v_mov_b32_e32 v3, s15
-; SDAG-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16 sc0 sc1
+; SDAG-NEXT: v_mov_b32_e32 v16, s12
+; SDAG-NEXT: v_mov_b32_e32 v17, s13
+; SDAG-NEXT: v_mov_b32_e32 v18, s14
+; SDAG-NEXT: v_mov_b32_e32 v19, s15
+; SDAG-NEXT: global_store_dwordx4 v44, v[16:19], s[0:1] offset:16 sc0 sc1
; SDAG-NEXT: s_waitcnt vmcnt(0)
; SDAG-NEXT: s_nop 0
-; SDAG-NEXT: v_mov_b32_e32 v0, s8
-; SDAG-NEXT: v_mov_b32_e32 v1, s9
-; SDAG-NEXT: v_mov_b32_e32 v2, s10
-; SDAG-NEXT: v_mov_b32_e32 v3, s11
-; SDAG-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] sc0 sc1
+; SDAG-NEXT: v_mov_b32_e32 v16, s8
+; SDAG-NEXT: v_mov_b32_e32 v17, s9
+; SDAG-NEXT: v_mov_b32_e32 v18, s10
+; SDAG-NEXT: v_mov_b32_e32 v19, s11
+; SDAG-NEXT: global_store_dwordx4 v44, v[16:19], s[0:1] sc0 sc1
; SDAG-NEXT: s_waitcnt vmcnt(0)
-; SDAG-NEXT: global_store_dwordx4 v8, a[8:11], s[0:1] offset:32 sc0 sc1
+; SDAG-NEXT: global_store_dwordx4 v44, v[8:11], s[0:1] offset:32 sc0 sc1
; SDAG-NEXT: s_waitcnt vmcnt(0)
-; SDAG-NEXT: global_store_dwordx4 v8, a[12:15], s[0:1] offset:48 sc0 sc1
+; SDAG-NEXT: global_store_dwordx4 v44, v[12:15], s[0:1] offset:48 sc0 sc1
; SDAG-NEXT: s_waitcnt vmcnt(0)
-; SDAG-NEXT: global_store_dwordx4 v8, a[0:3], s[0:1] sc0 sc1
+; SDAG-NEXT: global_store_dwordx4 v44, v[0:3], s[0:1] sc0 sc1
; SDAG-NEXT: s_waitcnt vmcnt(0)
-; SDAG-NEXT: global_store_dwordx4 v8, a[4:7], s[0:1] offset:16 sc0 sc1
+; SDAG-NEXT: global_store_dwordx4 v44, v[4:7], s[0:1] offset:16 sc0 sc1
; SDAG-NEXT: s_waitcnt vmcnt(0)
; SDAG-NEXT: s_endpgm
;
@@ -1569,52 +1550,44 @@ define amdgpu_kernel void @test_mfma_f32_32x32x16_f16__vgprcd(<8 x half> %arg0,
; GISEL-NEXT: s_load_dwordx8 s[24:31], s[4:5], 0x24
; GISEL-NEXT: s_load_dwordx16 s[8:23], s[4:5], 0x64
; GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0xa4
-; GISEL-NEXT: v_mov_b32_e32 v24, 0
+; GISEL-NEXT: v_mov_b32_e32 v56, 0
; GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GISEL-NEXT: v_mov_b64_e32 v[0:1], s[24:25]
-; GISEL-NEXT: v_mov_b64_e32 v[2:3], s[26:27]
-; GISEL-NEXT: v_mov_b64_e32 v[4:5], s[28:29]
-; GISEL-NEXT: v_accvgpr_write_b32 a0, s8
-; GISEL-NEXT: v_mov_b64_e32 v[6:7], s[30:31]
-; GISEL-NEXT: v_accvgpr_write_b32 a1, s9
-; GISEL-NEXT: v_accvgpr_write_b32 a2, s10
-; GISEL-NEXT: v_accvgpr_write_b32 a3, s11
-; GISEL-NEXT: v_accvgpr_write_b32 a4, s12
-; GISEL-NEXT: v_accvgpr_write_b32 a5, s13
-; GISEL-NEXT: v_accvgpr_write_b32 a6, s14
-; GISEL-NEXT: v_accvgpr_write_b32 a7, s15
-; GISEL-NEXT: v_accvgpr_write_b32 a8, s16
-; GISEL-NEXT: v_accvgpr_write_b32 a9, s17
-; GISEL-NEXT: v_accvgpr_write_b32 a10, s18
-; GISEL-NEXT: v_accvgpr_write_b32 a11, s19
-; GISEL-NEXT: v_accvgpr_write_b32 a12, s20
-; GISEL-NEXT: v_accvgpr_write_b32 a13, s21
-; GISEL-NEXT: v_accvgpr_write_b32 a14, s22
-; GISEL-NEXT: v_accvgpr_write_b32 a15, s23
-; GISEL-NEXT: v_mov_b64_e32 v[8:9], s[8:9]
-; GISEL-NEXT: v_mov_b64_e32 v[10:11], s[10:11]
-; GISEL-NEXT: v_mfma_f32_32x32x16_f16 a[16:31], v[0:3], v[4:7], a[0:15]
-; GISEL-NEXT: v_mov_b64_e32 v[12:13], s[12:13]
-; GISEL-NEXT: v_mov_b64_e32 v[16:17], s[16:17]
-; GISEL-NEXT: v_mov_b64_e32 v[20:21], s[20:21]
-; GISEL-NEXT: v_mov_b64_e32 v[14:15], s[14:15]
-; GISEL-NEXT: v_mov_b64_e32 v[18:19], s[18:19]
-; GISEL-NEXT: v_mov_b64_e32 v[22:23], s[22:23]
-; GISEL-NEXT: global_store_dwordx4 v24, v[8:11], s[0:1] sc0 sc1
+; GISEL-NEXT: v_mov_b64_e32 v[34:35], s[26:27]
+; GISEL-NEXT: v_mov_b64_e32 v[32:33], s[24:25]
+; GISEL-NEXT: v_mov_b64_e32 v[38:39], s[30:31]
+; GISEL-NEXT: v_mov_b64_e32 v[0:1], s[8:9]
+; GISEL-NEXT: v_mov_b64_e32 v[36:37], s[28:29]
+; GISEL-NEXT: v_mov_b64_e32 v[2:3], s[10:11]
+; GISEL-NEXT: v_mov_b64_e32 v[4:5], s[12:13]
+; GISEL-NEXT: v_mov_b64_e32 v[6:7], s[14:15]
+; GISEL-NEXT: v_mov_b64_e32 v[8:9], s[16:17]
+; GISEL-NEXT: v_mov_b64_e32 v[10:11], s[18:19]
+; GISEL-NEXT: v_mov_b64_e32 v[12:13], s[20:21]
+; GISEL-NEXT: v_mov_b64_e32 v[14:15], s[22:23]
+; GISEL-NEXT: v_mov_b64_e32 v[42:43], s[10:11]
+; GISEL-NEXT: v_mov_b64_e32 v[40:41], s[8:9]
+; GISEL-NEXT: v_mfma_f32_32x32x16_f16 v[16:31], v[32:35], v[36:39], v[0:15]
+; GISEL-NEXT: v_mov_b64_e32 v[46:47], s[14:15]
+; GISEL-NEXT: v_mov_b64_e32 v[50:51], s[18:19]
+; GISEL-NEXT: v_mov_b64_e32 v[54:55], s[22:23]
+; GISEL-NEXT: v_mov_b64_e32 v[44:45], s[12:13]
+; GISEL-NEXT: v_mov_b64_e32 v[48:49], s[16:17]
+; GISEL-NEXT: v_mov_b64_e32 v[52:53], s[20:21]
+; GISEL-NEXT: global_store_dwordx4 v56, v[40:43], s[0:1] sc0 sc1
; GISEL-NEXT: s_waitcnt vmcnt(0)
-; GISEL-NEXT: global_store_dwordx4 v24, v[12:15], s[0:1] offset:16 sc0 sc1
+; GISEL-NEXT: global_store_dwordx4 v56, v[44:47], s[0:1] offset:16 sc0 sc1
; GISEL-NEXT: s_waitcnt vmcnt(0)
-; GISEL-NEXT: global_store_dwordx4 v24, v[16:19], s[0:1] offset:32 sc0 sc1
+; GISEL-NEXT: global_store_dwordx4 v56, v[48:51], s[0:1] offset:32 sc0 sc1
; GISEL-NEXT: s_waitcnt vmcnt(0)
-; GISEL-NEXT: global_store_dwordx4 v24, v[20:23], s[0:1] offset:48 sc0 sc1
+; GISEL-NEXT: global_store_dwordx4 v56, v[52:55], s[0:1] offset:48 sc0 sc1
; GISEL-NEXT: s_waitcnt vmcnt(0)
-; GISEL-NEXT: global_store_dwordx4 v24, a[16:19], s[0:1] sc0 sc1
+; GISEL-NEXT: global_store_dwordx4 v56, v[16:19], s[0:1] sc0 sc1
; GISEL-NEXT: s_waitcnt vmcnt(0)
-; GISEL-NEXT: global_store_dwordx4 v24, a[20:23], s[0:1] offset:16 sc0 sc1
+; GISEL-NEXT: global_store_dwordx4 v56, v[20:23], s[0:1] offset:16 sc0 sc1
; GISEL-NEXT: s_waitcnt vmcnt(0)
-; GISEL-NEXT: global_store_dwordx4 v24, a[24:27], s[0:1] offset:32 sc0 sc1
+; GISEL-NEXT: global_store_dwordx4 v56, v[24:27], s[0:1] offset:32 sc0 sc1
; GISEL-NEXT: s_waitcnt vmcnt(0)
-; GISEL-NEXT: global_store_dwordx4 v24, a[28:31], s[0:1] offset:48 sc0 sc1
+; GISEL-NEXT: global_store_dwordx4 v56, v[28:31], s[0:1] offset:48 sc0 sc1
; GISEL-NEXT: s_waitcnt vmcnt(0)
; GISEL-NEXT: s_endpgm
;
@@ -1623,62 +1596,55 @@ define amdgpu_kernel void @test_mfma_f32_32x32x16_f16__vgprcd(<8 x half> %arg0,
; HEURRC-NEXT: s_load_dwordx8 s[24:31], s[4:5], 0x24
; HEURRC-NEXT: s_load_dwordx16 s[8:23], s[4:5], 0x64
; HEURRC-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0xa4
-; HEURRC-NEXT: v_mov_b32_e32 v8, 0
+; HEURRC-NEXT: v_mov_b32_e32 v44, 0
; HEURRC-NEXT: s_waitcnt lgkmcnt(0)
-; HEURRC-NEXT: v_mov_b64_e32 v[0:1], s[24:25]
-; HEURRC-NEXT: v_mov_b64_e32 v[2:3], s[26:27]
-; HEURRC-NEXT: v_mov_b64_e32 v[4:5], s[28:29]
-; HEURRC-NEXT: v_accvgpr_write_b32 a31, s23
-; HEURRC-NEXT: v_mov_b64_e32 v[6:7], s[30:31]
-; HEURRC-NEXT: v_accvgpr_write_b32 a30, s22
-; HEURRC-NEXT: v_accvgpr_write_b32 a29, s21
-; HEURRC-NEXT: v_accvgpr_write_b32 a28, s20
-; HEURRC-NEXT: v_accvgpr_write_b32 a27, s19
-; HEURRC-NEXT: v_accvgpr_write_b32 a26, s18
-; HEURRC-NEXT: v_accvgpr_write_b32 a25, s17
-; HEURRC-NEXT: v_accvgpr_write_b32 a24, s16
-; HEURRC-NEXT: v_accvgpr_write_b32 a23, s15
-; HEURRC-NEXT: v_accvgpr_write_b32 a22, s14
-; HEURRC-NEXT: v_accvgpr_write_b32 a21, s13
-; HEURRC-NEXT: v_accvgpr_write_b32 a20, s12
-; HEURRC-NEXT: v_accvgpr_write_b32 a19, s11
-; HEURRC-NEXT: v_accvgpr_write_b32 a18, s10
-; HEURRC-NEXT: v_accvgpr_write_b32 a17, s9
-; HEURRC-NEXT: v_accvgpr_write_b32 a16, s8
-; HEURRC-NEXT: v_mov_b32_e32 v10, s20
-; HEURRC-NEXT: v_mov_b32_e32 v11, s21
-; HEURRC-NEXT: v_mfma_f32_32x32x16_f16 a[0:15], v[0:3], v[4:7], a[16:31]
-; HEURRC-NEXT: v_mov_b32_e32 v12, s22
-; HEURRC-NEXT: v_mov_b32_e32 v13, s23
-; HEURRC-NEXT: v_mov_b32_e32 v0, s16
-; HEURRC-NEXT: v_mov_b32_e32 v1, s17
-; HEURRC-NEXT: v_mov_b32_e32 v2, s18
-; HEURRC-NEXT: v_mov_b32_e32 v3, s19
-; HEURRC-NEXT: global_store_dwordx4 v8, v[10:13], s[0:1] offset:48 sc0 sc1
+; HEURRC-NEXT: v_mov_b64_e32 v[34:35], s[26:27]
+; HEURRC-NEXT: v_mov_b64_e32 v[32:33], s[24:25]
+; HEURRC-NEXT: v_mov_b64_e32 v[38:39], s[30:31]
+; HEURRC-NEXT: v_mov_b64_e32 v[30:31], s[22:23]
+; HEURRC-NEXT: v_mov_b64_e32 v[36:37], s[28:29]
+; HEURRC-NEXT: v_mov_b64_e32 v[28:29], s[20:21]
+; HEURRC-NEXT: v_mov_b64_e32 v[26:27], s[18:19]
+; HEURRC-NEXT: v_mov_b64_e32 v[24:25], s[16:17]
+; HEURRC-NEXT: v_mov_b64_e32 v[22:23], s[14:15]
+; HEURRC-NEXT: v_mov_b64_e32 v[20:21], s[12:13]
+; HEURRC-NEXT: v_mov_b64_e32 v[18:19], s[10:11]
+; HEURRC-NEXT: v_mov_b64_e32 v[16:17], s[8:9]
+; HEURRC-NEXT: v_mov_b32_e32 v40, s20
+; HEURRC-NEXT: v_mov_b32_e32 v41, s21
+; HEURRC-NEXT: v_mfma_f32_32x32x16_f16 v[0:15], v[32:35], v[36:39], v[16:31]
+; HEURRC-NEXT: v_mov_b32_e32 v42, s22
+; HEURRC-NEXT: v_mov_b32_e32 v43, s23
+; HEURRC-NEXT: global_store_dwordx4 v44, v[40:43], s[0:1] offset:48 sc0 sc1
; HEURRC-NEXT: s_waitcnt vmcnt(0)
-; HEURRC-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:32 sc0 sc1
+; HEURRC-NEXT: s_nop 2
+; HEURRC-NEXT: v_mov_b32_e32 v16, s16
+; HEURRC-NEXT: v_mov_b32_e32 v17, s17
+; HEURRC-NEXT: v_mov_b32_e32 v18, s18
+; HEURRC-NEXT: v_mov_b32_e32 v19, s19
+; HEURRC-NEXT: global_store_dwordx4 v44, v[16:19], s[0:1] offset:32 sc0 sc1
; HEURRC-NEXT: s_waitcnt vmcnt(0)
; HEURRC-NEXT: s_nop 0
-; HEURRC-NEXT: v_mov_b32_e32 v0, s12
-; HEURRC-NEXT: v_mov_b32_e32 v1, s13
-; HEURRC-NEXT: v_mov_b32_e32 v2, s14
-; HEURRC-NEXT: v_mov_b32_e32 v3, s15
-; HEURRC-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16 sc0 sc1
+; HEURRC-NEXT: v_mov_b32_e32 v16, s12
+; HEURRC-NEXT: v_mov_b32_e32 v17, s13
+; HEURRC-NEXT: v_mov_b32_e32 v18, s14
+; HEURRC-NEXT: v_mov_b32_e32 v19, s15
+; HEURRC-NEXT: global_store_dwordx4 v44, v[16:19], s[0:1] offset:16 sc0 sc1
; HEURRC-NEXT: s_waitcnt vmcnt(0)
; HEURRC-NEXT: s_nop 0
-; HEURRC-NEXT: v_mov_b32_e32 v0, s8
-; HEURRC-NEXT: v_mov_b32_e32 v1, s9
-; HEURRC-NEXT: v_mov_b32_e32 v2, s10
-; HEURRC-NEXT: v_mov_b32_e32 v3, s11
-; HEURRC-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] sc0 sc1
+; HEURRC-NEXT: v_mov_b32_e32 v16, s8
+; HEURRC-NEXT: v_mov_b32_e32 v17, s9
+; HEURRC-NEXT: v_mov_b32_e32 v18, s10
+; HEURRC-NEXT: v_mov_b32_e32 v19, s11
+; HEURRC-NEXT: global_store_dwordx4 v44, v[16:19], s[0:1] sc0 sc1
; HEURRC-NEXT: s_waitcnt vmcnt(0)
-; HEURRC-NEXT: global_store_dwordx4 v8, a[8:11], s[0:1] offset:32 sc0 sc1
+; HEURRC-NEXT: global_store_dwordx4 v44, v[8:11], s[0:1] offset:32 sc0 sc1
; HEURRC-NEXT: s_waitcnt vmcnt(0)
-; HEURRC-NEXT: global_store_dwordx4 v8, a[12:15], s[0:1] offset:48 sc0 sc1
+; HEURRC-NEXT: global_store_dwordx4 v44, v[12:15], s[0:1] offset:48 sc0 sc1
; HEURRC-NEXT: s_waitcnt vmcnt(0)
-; HEURRC-NEXT: global_store_dwordx4 v8, a[0:3], s[0:1] sc0 sc1
+; HEURRC-NEXT: global_store_dwordx4 v44, v[0:3], s[0:1] sc0 sc1
; HEURRC-NEXT: s_waitcnt vmcnt(0)
-; HEURRC-NEXT: global_store_dwordx4 v8, a[4:7], s[0:1] offset:16 sc0 sc1
+; HEURRC-NEXT: global_store_dwordx4 v44, v[4:7], s[0:1] offset:16 sc0 sc1
; HEURRC-NEXT: s_waitcnt vmcnt(0)
; HEURRC-NEXT: s_endpgm
;
@@ -1687,7 +1653,7 @@ define amdgpu_kernel void @test_mfma_f32_32x32x16_f16__vgprcd(<8 x half> %arg0,
; VGPRRC-NEXT: s_load_dwordx8 s[24:31], s[4:5], 0x24
; VGPRRC-NEXT: s_load_dwordx16 s[8:23], s[4:5], 0x64
; VGPRRC-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0xa4
-; VGPRRC-NEXT: v_mov_b32_e32 v40, 0
+; VGPRRC-NEXT: v_mov_b32_e32 v44, 0
; VGPRRC-NEXT: s_waitcnt lgkmcnt(0)
; VGPRRC-NEXT: v_mov_b64_e32 v[34:35], s[26:27]
; VGPRRC-NEXT: v_mov_b64_e32 v[32:33], s[24:25]
@@ -1701,41 +1667,41 @@ define amdgpu_kernel void @test_mfma_f32_32x32x16_f16__vgprcd(<8 x half> %arg0,
; VGPRRC-NEXT: v_mov_b64_e32 v[20:21], s[12:13]
; VGPRRC-NEXT: v_mov_b64_e32 v[18:19], s[10:11]
; VGPRRC-NEXT: v_mov_b64_e32 v[16:17], s[8:9]
-; VGPRRC-NEXT: v_mov_b32_e32 v42, s20
-; VGPRRC-NEXT: v_mov_b32_e32 v43, s21
+; VGPRRC-NEXT: v_mov_b32_e32 v40, s20
+; VGPRRC-NEXT: v_mov_b32_e32 v41, s21
; VGPRRC-NEXT: v_mfma_f32_32x32x16_f16 v[0:15], v[32:35], v[36:39], v[16:31]
-; VGPRRC-NEXT: v_mov_b32_e32 v44, s22
-; VGPRRC-NEXT: v_mov_b32_e32 v45, s23
-; VGPRRC-NEXT: global_store_dwordx4 v40, v[42:45], s[0:1] offset:48 sc0 sc1
+; VGPRRC-NEXT: v_mov_b32_e32 v42, s22
+; VGPRRC-NEXT: v_mov_b32_e32 v43, s23
+; VGPRRC-NEXT: global_store_dwordx4 v44, v[40:43], s[0:1] offset:48 sc0 sc1
; VGPRRC-NEXT: s_waitcnt vmcnt(0)
; VGPRRC-NEXT: s_nop 2
; VGPRRC-NEXT: v_mov_b32_e32 v16, s16
; VGPRRC-NEXT: v_mov_b32_e32 v17, s17
; VGPRRC-NEXT: v_mov_b32_e32 v18, s18
; VGPRRC-NEXT: v_mov_b32_e32 v19, s19
-; VGPRRC-NEXT: global_store_dwordx4 v40, v[16:19], s[0:1] offset:32 sc0 sc1
+; VGPRRC-NEXT: global_store_dwordx4 v44, v[16:19], s[0:1] offset:32 sc0 sc1
; VGPRRC-NEXT: s_waitcnt vmcnt(0)
; VGPRRC-NEXT: s_nop 0
; VGPRRC-NEXT: v_mov_b32_e32 v16, s12
; VGPRRC-NEXT: v_mov_b32_e32 v17, s13
; VGPRRC-NEXT: v_mov_b32_e32 v18, s14
; VGPRRC-NEXT: v_mov_b32_e32 v19, s15
-; VGPRRC-NEXT: global_store_dwordx4 v40, v[16:19], s[0:1] offset:16 sc0 sc1
+; VGPRRC-NEXT: global_store_dwordx4 v44, v[16:19], s[0:1] offset:16 sc0 sc1
; VGPRRC-NEXT: s_waitcnt vmcnt(0)
; VGPRRC-NEXT: s_nop 0
; VGPRRC-NEXT: v_mov_b32_e32 v16, s8
; VGPRRC-NEXT: v_mov_b32_e32 v17, s9
; VGPRRC-NEXT: v_mov_b32_e32 v18, s10
; VGPRRC-NEXT: v_mov_b32_e32 v19, s11
-; VGPRRC-NEXT: global_store_dwordx4 v40, v[16:19], s[0:1] sc0 sc1
+; VGPRRC-NEXT: global_store_dwordx4 v44, v[16:19], s[0:1] sc0 sc1
; VGPRRC-NEXT: s_waitcnt vmcnt(0)
-; VGPRRC-NEXT: global_store_dwordx4 v40, v[8:11], s[0:1] offset:32 sc0 sc1
+; VGPRRC-NEXT: global_store_dwordx4 v44, v[8:11], s[0:1] offset:32 sc0 sc1
; VGPRRC-NEXT: s_waitcnt vmcnt(0)
-; VGPRRC-NEXT: global_store_dwordx4 v40, v[12:15], s[0:1] offset:48 sc0 sc1
+; VGPRRC-NEXT: global_store_dwordx4 v44, v[12:15], s[0:1] offset:48 sc0 sc1
; VGPRRC-NEXT: s_waitcnt vmcnt(0)
-; VGPRRC-NEXT: global_store_dwordx4 v40, v[0:3], s[0:1] sc0 sc1
+; VGPRRC-NEXT: global_store_dwordx4 v44, v[0:3], s[0:1] sc0 sc1
; VGPRRC-NEXT: s_waitcnt vmcnt(0)
-; VGPRRC-NEXT: global_store_dwordx4 v40, v[4:7], s[0:1] offset:16 sc0 sc1
+; VGPRRC-NEXT: global_store_dwordx4 v44, v[4:7], s[0:1] offset:16 sc0 sc1
; VGPRRC-NEXT: s_waitcnt vmcnt(0)
; VGPRRC-NEXT: s_endpgm
; AGPR-LABEL: test_mfma_f32_32x32x16_f16__vgprcd:
@@ -1869,62 +1835,55 @@ define amdgpu_kernel void @test_mfma_f32_32x32x16_f16__vgprcd__flags(<8 x half>
; SDAG-NEXT: s_load_dwordx8 s[24:31], s[4:5], 0x24
; SDAG-NEXT: s_load_dwordx16 s[8:23], s[4:5], 0x64
; SDAG-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0xa4
-; SDAG-NEXT: v_mov_b32_e32 v8, 0
+; SDAG-NEXT: v_mov_b32_e32 v44, 0
; SDAG-NEXT: s_waitcnt lgkmcnt(0)
-; SDAG-NEXT: v_mov_b64_e32 v[0:1], s[24:25]
-; SDAG-NEXT: v_mov_b64_e32 v[2:3], s[26:27]
-; SDAG-NEXT: v_mov_b64_e32 v[4:5], s[28:29]
-; SDAG-NEXT: v_accvgpr_write_b32 a31, s23
-; SDAG-NEXT: v_mov_b64_e32 v[6:7], s[30:31]
-; SDAG-NEXT: v_accvgpr_write_b32 a30, s22
-; SDAG-NEXT: v_accvgpr_write_b32 a29, s21
-; SDAG-NEXT: v_accvgpr_write_b32 a28, s20
-; SDAG-NEXT: v_accvgpr_write_b32 a27, s19
-; SDAG-NEXT: v_accvgpr_write_b32 a26, s18
-; SDAG-NEXT: v_accvgpr_write_b32 a25, s17
-; SDAG-NEXT: v_accvgpr_write_b32 a24, s16
-; SDAG-NEXT: v_accvgpr_write_b32 a23, s15
-; SDAG-NEXT: v_accvgpr_write_b32 a22, s14
-; SDAG-NEXT: v_accvgpr_write_b32 a21, s13
-; SDAG-NEXT: v_accvgpr_write_b32 a20, s12
-; SDAG-NEXT: v_accvgpr_write_b32 a19, s11
-; SDAG-NEXT: v_accvgpr_write_b32 a18, s10
-; SDAG-NEXT: v_accvgpr_write_b32 a17, s9
-; SDAG-NEXT: v_accvgpr_write_b32 a16, s8
-; SDAG-NEXT: v_mov_b32_e32 v10, s20
-; SDAG-NEXT: v_mov_b32_e32 v11, s21
-; SDAG-NEXT: v_mfma_f32_32x32x16_f16 a[0:15], v[0:3], v[4:7], a[16:31] cbsz:1 abid:2 blgp:3
-; SDAG-NEXT: v_mov_b32_e32 v12, s22
-; SDAG-NEXT: v_mov_b32_e32 v13, s23
-; SDAG-NEXT: v_mov_b32_e32 v0, s16
-; SDAG-NEXT: v_mov_b32_e32 v1, s17
-; SDAG-NEXT: v_mov_b32_e32 v2, s18
-; SDAG-NEXT: v_mov_b32_e32 v3, s19
-; SDAG-NEXT: global_store_dwordx4 v8, v[10:13], s[0:1] offset:48 sc0 sc1
+; SDAG-NEXT: v_mov_b64_e32 v[34:35], s[26:27]
+; SDAG-NEXT: v_mov_b64_e32 v[32:33], s[24:25]
+; SDAG-NEXT: v_mov_b64_e32 v[38:39], s[30:31]
+; SDAG-NEXT: v_mov_b64_e32 v[30:31], s[22:23]
+; SDAG-NEXT: v_mov_b64_e32 v[36:37], s[28:29]
+; SDAG-NEXT: v_mov_b64_e32 v[28:29], s[20:21]
+; SDAG-NEXT: v_mov_b64_e32 v[26:27], s[18:19]
+; SDAG-NEXT: v_mov_b64_e32 v[24:25], s[16:17]
+; SDAG-NEXT: v_mov_b64_e32 v[22:23], s[14:15]
+; SDAG-NEXT: v_mov_b64_e32 v[20:21], s[12:13]
+; SDAG-NEXT: v_mov_b64_e32 v[18:19], s[10:11]
+; SDAG-NEXT: v_mov_b64_e32 v[16:17], s[8:9]
+; SDAG-NEXT: v_mov_b32_e32 v40, s20
+; SDAG-NEXT: v_mov_b32_e32 v41, s21
+; SDAG-NEXT: v_mfma_f32_32x32x16_f16 v[0:15], v[32:35], v[36:39], v[16:31] cbsz:1 abid:2 blgp:3
+; SDAG-NEXT: v_mov_b32_e32 v42, s22
+; SDAG-NEXT: v_mov_b32_e32 v43, s23
+; SDAG-NEXT: global_store_dwordx4 v44, v[40:43], s[0:1] offset:48 sc0 sc1
; SDAG-NEXT: s_waitcnt vmcnt(0)
-; SDAG-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:32 sc0 sc1
+; SDAG-NEXT: s_nop 2
+; SDAG-NEXT: v_mov_b32_e32 v16, s16
+; SDAG-NEXT: v_mov_b32_e32 v17, s17
+; SDAG-NEXT: v_mov_b32_e32 v18, s18
+; SDAG-NEXT: v_mov_b32_e32 v19, s19
+; SDAG-NEXT: global_store_dwordx4 v44, v[16:19], s[0:1] offset:32 sc0 sc1
; SDAG-NEXT: s_waitcnt vmcnt(0)
; SDAG-NEXT: s_nop 0
-; SDAG-NEXT: v_mov_b32_e32 v0, s12
-; SDAG-NEXT: v_mov_b32_e32 v1, s13
-; SDAG-NEXT: v_mov_b32_e32 v2, s14
-; SDAG-NEXT: v_mov_b32_e32 v3, s15
-; SDAG-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16 sc0 sc1
+; SDAG-NEXT: v_mov_b32_e32 v16, s12
+; SDAG-NEXT: v_mov_b32_e32 v17, s13
+; SDAG-NEXT: v_mov_b32_e32 v18, s14
+; SDAG-NEXT: v_mov_b32_e32 v19, s15
+; SDAG-NEXT: global_store_dwordx4 v44, v[16:19], s[0:1] offset:16 sc0 sc1
; SDAG-NEXT: s_waitcnt vmcnt(0)
; SDAG-NEXT: s_nop 0
-; SDAG-NEXT: v_mov_b32_e32 v0, s8
-; SDAG-NEXT: v_mov_b32_e32 v1, s9
-; SDAG-NEXT: v_mov_b32_e32 v2, s10
-; SDAG-NEXT: v_mov_b32_e32 v3, s11
-; SDAG-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] sc0 sc1
+; SDAG-NEXT: v_mov_b32_e32 v16, s8
+; SDAG-NEXT: v_mov_b32_e32 v17, s9
+; SDAG-NEXT: v_mov_b32_e32 v18, s10
+; SDAG-NEXT: v_mov_b32_e32 v19, s11
+; SDAG-NEXT: global_store_dwordx4 v44, v[16:19], s[0:1] sc0 sc1
; SDAG-NEXT: s_waitcnt vmcnt(0)
-; SDAG-NEXT: global_store_dwordx4 v8, a[8:11], s[0:1] offset:32 sc0 sc1
+; SDAG-NEXT: global_store_dwordx4 v44, v[8:11], s[0:1] offset:32 sc0 sc1
; SDAG-NEXT: s_waitcnt vmcnt(0)
-; SDAG-NEXT: global_store_dwordx4 v8, a[12:15], s[0:1] offset:48 sc0 sc1
+; SDAG-NEXT: global_store_dwordx4 v44, v[12:15], s[0:1] offset:48 sc0 sc1
; SDAG-NEXT: s_waitcnt vmcnt(0)
-; SDAG-NEXT: global_store_dwordx4 v8, a[0:3], s[0:1] sc0 sc1
+; SDAG-NEXT: global_store_dwordx4 v44, v[0:3], s[0:1] sc0 sc1
; SDAG-NEXT: s_waitcnt vmcnt(0)
-; SDAG-NEXT: global_store_dwordx4 v8, a[4:7], s[0:1] offset:16 sc0 sc1
+; SDAG-NEXT: global_store_dwordx4 v44, v[4:7], s[0:1] offset:16 sc0 sc1
; SDAG-NEXT: s_waitcnt vmcnt(0)
; SDAG-NEXT: s_endpgm
;
@@ -1933,52 +1892,44 @@ define amdgpu_kernel void @test_mfma_f32_32x32x16_f16__vgprcd__flags(<8 x half>
; GISEL-NEXT: s_load_dwordx8 s[24:31], s[4:5], 0x24
; GISEL-NEXT: s_load_dwordx16 s[8:23], s[4:5], 0x64
; GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0xa4
-; GISEL-NEXT: v_mov_b32_e32 v24, 0
+; GISEL-NEXT: v_mov_b32_e32 v56, 0
; GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GISEL-NEXT: v_mov_b64_e32 v[0:1], s[24:25]
-; GISEL-NEXT: v_mov_b64_e32 v[2:3], s[26:27]
-; GISEL-NEXT: v_mov_b64_e32 v[4:5], s[28:29]
-; GISEL-NEXT: v_accvgpr_write_b32 a0, s8
-; GISEL-NEXT: v_mov_b64_e32 v[6:7], s[30:31]
-; GISEL-NEXT: v_accvgpr_write_b32 a1, s9
-; GISEL-NEXT: v_accvgpr_write_b32 a2, s10
-; GISEL-NEXT: v_accvgpr_write_b32 a3, s11
-; GISEL-NEXT: v_accvgpr_write_b32 a4, s12
-; GISEL-NEXT: v_accvgpr_write_b32 a5, s13
-; GISEL-NEXT: v_accvgpr_write_b32 a6, s14
-; GISEL-NEXT: v_accvgpr_write_b32 a7, s15
-; GISEL-NEXT: v_accvgpr_write_b32 a8, s16
-; GISEL-NEXT: v_accvgpr_write_b32 a9, s17
-; GISEL-NEXT: v_accvgpr_write_b32 a10, s18
-; GISEL-NEXT: v_accvgpr_write_b32 a11, s19
-; GISEL-NEXT: v_accvgpr_write_b32 a12, s20
-; GISEL-NEXT: v_accvgpr_write_b32 a13, s21
-; GISEL-NEXT: v_accvgpr_write_b32 a14, s22
-; GISEL-NEXT: v_accvgpr_write_b32 a15, s23
-; GISEL-NEXT: v_mov_b64_e32 v[8:9], s[8:9]
-; GISEL-NEXT: v_mov_b64_e32 v[10:11], s[10:11]
-; GISEL-NEXT: v_mfma_f32_32x32x16_f16 a[16:31], v[0:3], v[4:7], a[0:15] cbsz:1 abid:2 blgp:3
-; GISEL-NEXT: v_mov_b64_e32 v[12:13], s[12:13]
-; GISEL-NEXT: v_mov_b64_e32 v[16:17], s[16:17]
-; GISEL-NEXT: v_mov_b64_e32 v[20:21], s[20:21]
-; GISEL-NEXT: v_mov_b64_e32 v[14:15], s[14:15]
-; GISEL-NEXT: v_mov_b64_e32 v[18:19], s[18:19]
-; GISEL-NEXT: v_mov_b64_e32 v[22:23], s[22:23]
-; GISEL-NEXT: global_store_dwordx4 v24, v[8:11], s[0:1] sc0 sc1
+; GISEL-NEXT: v_mov_b64_e32 v[34:35], s[26:27]
+; GISEL-NEXT: v_mov_b64_e32 v[32:33], s[24:25]
+; GISEL-NEXT: v_mov_b64_e32 v[38:39], s[30:31]
+; GISEL-NEXT: v_mov_b64_e32 v[0:1], s[8:9]
+; GISEL-NEXT: v_mov_b64_e32 v[36:37], s[28:29]
+; GISEL-NEXT: v_mov_b64_e32 v[2:3], s[10:11]
+; GISEL-NEXT: v_mov_b64_e32 v[4:5], s[12:13]
+; GISEL-NEXT: v_mov_b64_e32 v[6:7], s[14:15]
+; GISEL-NEXT: v_mov_b64_e32 v[8:9], s[16:17]
+; GISEL-NEXT: v_mov_b64_e32 v[10:11], s[18:19]
+; GISEL-NEXT: v_mov_b64_e32 v[12:13], s[20:21]
+; GISEL-NEXT: v_mov_b64_e32 v[14:15], s[22:23]
+; GISEL-NEXT: v_mov_b64_e32 v[42:43], s[10:11]
+; GISEL-NEXT: v_mov_b64_e32 v[40:41], s[8:9]
+; GISEL-NEXT: v_mfma_f32_32x32x16_f16 v[16:31], v[32:35], v[36:39], v[0:15] cbsz:1 abid:2 blgp:3
+; GISEL-NEXT: v_mov_b64_e32 v[46:47], s[14:15]
+; GISEL-NEXT: v_mov_b64_e32 v[50:51], s[18:19]
+; GISEL-NEXT: v_mov_b64_e32 v[54:55], s[22:23]
+; GISEL-NEXT: v_mov_b64_e32 v[44:45], s[12:13]
+; GISEL-NEXT: v_mov_b64_e32 v[48:49], s[16:17]
+; GISEL-NEXT: v_mov_b64_e32 v[52:53], s[20:21]
+; GISEL-NEXT: global_store_dwordx4 v56, v[40:43], s[0:1] sc0 sc1
; GISEL-NEXT: s_waitcnt vmcnt(0)
-; GISEL-NEXT: global_store_dwordx4 v24, v[12:15], s[0:1] offset:16 sc0 sc1
+; GISEL-NEXT: global_store_dwordx4 v56, v[44:47], s[0:1] offset:16 sc0 sc1
; GISEL-NEXT: s_waitcnt vmcnt(0)
-; GISEL-NEXT: global_store_dwordx4 v24, v[16:19], s[0:1] offset:32 sc0 sc1
+; GISEL-NEXT: global_store_dwordx4 v56, v[48:51], s[0:1] offset:32 sc0 sc1
; GISEL-NEXT: s_waitcnt vmcnt(0)
-; GISEL-NEXT: global_store_dwordx4 v24, v[20:23], s[0:1] offset:48 sc0 sc1
+; GISEL-NEXT: global_store_dwordx4 v56, v[52:55], s[0:1] offset:48 sc0 sc1
; GISEL-NEXT: s_waitcnt vmcnt(0)
-; GISEL-NEXT: global_store_dwordx4 v24, a[16:19], s[0:1] sc0 sc1
+; GISEL-NEXT: global_store_dwordx4 v56, v[16:19], s[0:1] sc0 sc1
; GISEL-NEXT: s_waitcnt vmcnt(0)
-; GISEL-NEXT: global_store_dwordx4 v24, a[20:23], s[0:1] offset:16 sc0 sc1
+; GISEL-NEXT: global_store_dwordx4 v56, v[20:23], s[0:1] offset:16 sc0 sc1
; GISEL-NEXT: s_waitcnt vmcnt(0)
-; GISEL-NEXT: global_store_dwordx4 v24, a[24:27], s[0:1] offset:32 sc0 sc1
+; GISEL-NEXT: global_store_dwordx4 v56, v[24:27], s[0:1] offset:32 sc0 sc1
; GISEL-NEXT: s_waitcnt vmcnt(0)
-; GISEL-NEXT: global_store_dwordx4 v24, a[28:31], s[0:1] offset:48 sc0 sc1
+; GISEL-NEXT: global_store_dwordx4 v56, v[28:31], s[0:1] offset:48 sc0 sc1
; GISEL-NEXT: s_waitcnt vmcnt(0)
; GISEL-NEXT: s_endpgm
;
@@ -1987,62 +1938,55 @@ define amdgpu_kernel void @test_mfma_f32_32x32x16_f16__vgprcd__flags(<8 x half>
; HEURRC-NEXT: s_load_dwordx8 s[24:31], s[4:5], 0x24
; HEURRC-NEXT: s_load_dwordx16 s[8:23], s[4:5], 0x64
; HEURRC-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0xa4
-; HEURRC-NEXT: v_mov_b32_e32 v8, 0
+; HEURRC-NEXT: v_mov_b32_e32 v44, 0
; HEURRC-NEXT: s_waitcnt lgkmcnt(0)
-; HEURRC-NEXT: v_mov_b64_e32 v[0:1], s[24:25]
-; HEURRC-NEXT: v_mov_b64_e32 v[2:3], s[26:27]
-; HEURRC-NEXT: v_mov_b64_e32 v[4:5], s[28:29]
-; HEURRC-NEXT: v_accvgpr_write_b32 a31, s23
-; HEURRC-NEXT: v_mov_b64_e32 v[6:7], s[30:31]
-; HEURRC-NEXT: v_accvgpr_write_b32 a30, s22
-; HEURRC-NEXT: v_accvgpr_write_b32 a29, s21
-; HEURRC-NEXT: v_accvgpr_write_b32 a28, s20
-; HEURRC-NEXT: v_accvgpr_write_b32 a27, s19
-; HEURRC-NEXT: v_accvgpr_write_b32 a26, s18
-; HEURRC-NEXT: v_accvgpr_write_b32 a25, s17
-; HEURRC-NEXT: v_accvgpr_write_b32 a24, s16
-; HEURRC-NEXT: v_accvgpr_write_b32 a23, s15
-; HEURRC-NEXT: v_accvgpr_write_b32 a22, s14
-; HEURRC-NEXT: v_accvgpr_write_b32 a21, s13
-; HEURRC-NEXT: v_accvgpr_write_b32 a20, s12
-; HEURRC-NEXT: v_accvgpr_write_b32 a19, s11
-; HEURRC-NEXT: v_accvgpr_write_b32 a18, s10
-; HEURRC-NEXT: v_accvgpr_write_b32 a17, s9
-; HEURRC-NEXT: v_accvgpr_write_b32 a16, s8
-; HEURRC-NEXT: v_mov_b32_e32 v10, s20
-; HEURRC-NEXT: v_mov_b32_e32 v11, s21
-; HEURRC-NEXT: v_mfma_f32_32x32x16_f16 a[0:15], v[0:3], v[4:7], a[16:31] cbsz:1 abid:2 blgp:3
-; HEURRC-NEXT: v_mov_b32_e32 v12, s22
-; HEURRC-NEXT: v_mov_b32_e32 v13, s23
-; HEURRC-NEXT: v_mov_b32_e32 v0, s16
-; HEURRC-NEXT: v_mov_b32_e32 v1, s17
-; HEURRC-NEXT: v_mov_b32_e32 v2, s18
-; HEURRC-NEXT: v_mov_b32_e32 v3, s19
-; HEURRC-NEXT: global_store_dwordx4 v8, v[10:13], s[0:1] offset:48 sc0 sc1
+; HEURRC-NEXT: v_mov_b64_e32 v[34:35], s[26:27]
+; HEURRC-NEXT: v_mov_b64_e32 v[32:33], s[24:25]
+; HEURRC-NEXT: v_mov_b64_e32 v[38:39], s[30:31]
+; HEURRC-NEXT: v_mov_b64_e32 v[30:31], s[22:23]
+; HEURRC-NEXT: v_mov_b64_e32 v[36:37], s[28:29]
+; HEURRC-NEXT: v_mov_b64_e32 v[28:29], s[20:21]
+; HEURRC-NEXT: v_mov_b64_e32 v[26:27], s[18:19]
+; HEURRC-NEXT: v_mov_b64_e32 v[24:25], s[16:17]
+; HEURRC-NEXT: v_mov_b64_e32 v[22:23], s[14:15]
+; HEURRC-NEXT: v_mov_b64_e32 v[20:21], s[12:13]
+; HEURRC-NEXT: v_mov_b64_e32 v[18:19], s[10:11]
+; HEURRC-NEXT: v_mov_b64_e32 v[16:17], s[8:9]
+; HEURRC-NEXT: v_mov_b32_e32 v40, s20
+; HEURRC-NEXT: v_mov_b32_e32 v41, s21
+; HEURRC-NEXT: v_mfma_f32_32x32x16_f16 v[0:15], v[32:35], v[36:39], v[16:31] cbsz:1 abid:2 blgp:3
+; HEURRC-NEXT: v_mov_b32_e32 v42, s22
+; HEURRC-NEXT: v_mov_b32_e32 v43, s23
+; HEURRC-NEXT: global_store_dwordx4 v44, v[40:43], s[0:1] offset:48 sc0 sc1
; HEURRC-NEXT: s_waitcnt vmcnt(0)
-; HEURRC-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:32 sc0 sc1
+; HEURRC-NEXT: s_nop 2
+; HEURRC-NEXT: v_mov_b32_e32 v16, s16
+; HEURRC-NEXT: v_mov_b32_e32 v17, s17
+; HEURRC-NEXT: v_mov_b32_e32 v18, s18
+; HEURRC-NEXT: v_mov_b32_e32 v19, s19
+; HEURRC-NEXT: global_store_dwordx4 v44, v[16:19], s[0:1] offset:32 sc0 sc1
; HEURRC-NEXT: s_waitcnt vmcnt(0)
; HEURRC-NEXT: s_nop 0
-; HEURRC-NEXT: v_mov_b32_e32 v0, s12
-; HEURRC-NEXT: v_mov_b32_e32 v1, s13
-; HEURRC-NEXT: v_mov_b32_e32 v2, s14
-; HEURRC-NEXT: v_mov_b32_e32 v3, s15
-; HEURRC-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] offset:16 sc0 sc1
+; HEURRC-NEXT: v_mov_b32_e32 v16, s12
+; HEURRC-NEXT: v_mov_b32_e32 v17, s13
+; HEURRC-NEXT: v_mov_b32_e32 v18, s14
+; HEURRC-NEXT: v_mov_b32_e32 v19, s15
+; HEURRC-NEXT: global_store_dwordx4 v44, v[16:19], s[0:1] offset:16 sc0 sc1
; HEURRC-NEXT: s_waitcnt vmcnt(0)
; HEURRC-NEXT: s_nop 0
-; HEURRC-NEXT: v_mov_b32_e32 v0, s8
-; HEURRC-NEXT: v_mov_b32_e32 v1, s9
-; HEURRC-NEXT: v_mov_b32_e32 v2, s10
-; HEURRC-NEXT: v_mov_b32_e32 v3, s11
-; HEURRC-NEXT: global_store_dwordx4 v8, v[0:3], s[0:1] sc0 sc1
+; HEURRC-NEXT: v_mov_b32_e32 v16, s8
+; HEURRC-NEXT: v_mov_b32_e32 v17, s9
+; HEURRC-NEXT: v_mov_b32_e32 v18, s10
+; HEURRC-NEXT: v_mov_b32_e32 v19, s11
+; HEURRC-NEXT: global_store_dwordx4 v44, v[16:19], s[0:1] sc0 sc1
; HEURRC-NEXT: s_waitcnt vmcnt(0)
-; HEURRC-NEXT: global_store_dwordx4 v8, a[8:11], s[0:1] offset:32 sc0 sc1
+; HEURRC-NEXT: global_store_dwordx4 v44, v[8:11], s[0:1] offset:32 sc0 sc1
; HEURRC-NEXT: s_waitcnt vmcnt(0)
-; HEURRC-NEXT: global_store_dwordx4 v8, a[12:15], s[0:1] offset:48 sc0 sc1
+; HEURRC-NEXT: global_store_dwordx4 v44, v[12:15], s[0:1] offset:48 sc0 sc1
; HEURRC-NEXT: s_waitcnt vmcnt(0)
-; HEURRC-NEXT: global_store_dwordx4 v8, a[0:3], s[0:1] sc0 sc1
+; HEURRC-NEXT: global_store_dwordx4 v44, v[0:3], s[0:1] sc0 sc1
; HEURRC-NEXT: s_waitcnt vmcnt(0)
-; HEURRC-NEXT: global_store_dwordx4 v8, a[4:7], s[0:1] offset:16 sc0 sc1
+; HEURRC-NEXT: global_store_dwordx4 v44, v[4:7], s[0:1] offset:16 sc0 sc1
; HEURRC-NEXT: s_waitcnt vmcnt(0)
; HEURRC-NEXT: s_endpgm
;
@@ -2051,7 +1995,7 @@ define amdgpu_kernel void @test_mfma_f32_32x32x16_f16__vgprcd__flags(<8 x half>
; VGPRRC-NEXT: s_load_dwordx8 s[24:31], s[4:5], 0x24
; VGPRRC-NEXT: s_load_dwordx16 s[8:23], s[4:5], 0x64
; VGPRRC-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0xa4
-; VGPRRC-NEXT: v_mov_b32_e32 v40, 0
+; VGPRRC-NEXT: v_mov_b32_e32 v44, 0
; VGPRRC-NEXT: s_waitcnt lgkmcnt(0)
; VGPRRC-NEXT: v_mov_b64_e32 v[34:35], s[26:27]
; VGPRRC-NEXT: v_mov_b64_e32 v[32:33], s[24:25]
@@ -2065,41 +2009,41 @@ define amdgpu_kernel void @test_mfma_f32_32x32x16_f16__vgprcd__flags(<8 x half>
; VGPRRC-NEXT: v_mov_b64_e32 v[20:21], s[12:13]
; VGPRRC-NEXT: v_mov_b64_e32 v[18:19], s[10:11]
; VGPRRC-NEXT: v_mov_b64_e32 v[16:17], s[8:9]
-; VGPRRC-NEXT: v_mov_b32_e32 v42, s20
-; VGPRRC-NEXT: v_mov_b32_e32 v43, s21
+; VGPRRC-NEXT: v_mov_b32_e32 v40, s20
+; VGPRRC-NEXT: v_mov_b32_e32 v41, s21
; VGPRRC-NEXT: v_mfma_f32_32x32x16_f16 v[0:15], v[32:35], v[36:39], v[16:31] cbsz:1 abid:2 blgp:3
-; VGPRRC-NEXT: v_mov_b32_e32 v44, s22
-; VGPRRC-NEXT: v_mov_b32_e32 v45, s23
-; VGPRRC-NEXT: global_store_dwordx4 v40, v[42:45], s[0:1] offset:48 sc0 sc1
+; VGPRRC-NEXT: v_mov_b32_e32 v42, s22
+; VGPRRC-NEXT: v_mov_b32_e32 v43, s23
+; VGPRRC-NEXT: global_store_dwordx4 v44, v[40:43], s[0:1] offset:48 sc0 sc1
; VGPRRC-NEXT: s_waitcnt vmcnt(0)
; VGPRRC-NEXT: s_nop 2
; VGPRRC-NEXT: v_mov_b32_e32 v16, s16
; VGPRRC-NEXT: v_mov_b32_e32 v17, s17
; VGPRRC-NEXT: v_mov_b32_e32 v18, s18
; VGPRRC-NEXT: v_mov_b32_e32 v19, s19
-; VGPRRC-NEXT: global_store_dwordx4 v40, v[16:19], s[0:1] offset:32 sc0 sc1
+; VGPRRC-NEXT: global_store_dwordx4 v44, v[16:19], s[0:1] offset:32 sc0 sc1
; VGPRRC-NEXT: s_waitcnt vmcnt(0)
; VGPRRC-NEXT: s_nop 0
; VGPRRC-NEXT: v_mov_b32_e32 v16, s12
; VGPRRC-NEXT: v_mov_b32_e32 v17, s13
; VGPRRC-NEXT: v_mov_b32_e32 v18, s14
; VGPRRC-NEXT: v_mov_b32_e32 v19, s15
-; VGPRRC-NEXT: global_store_dwordx4 v40, v[16:19], s[0:1] offset:16 sc0 sc1
+; VGPRRC-NEXT: global_store_dwordx4 v44, v[16:19], s[0:1] offset:16 sc0 sc1
; VGPRRC-NEXT: s_waitcnt vmcnt(0)
; VGPRRC-NEXT: s_nop 0
; VGPRRC-NEXT: v_mov_b32_e32 v16, s8
; VGPRRC-NEXT: v_mov_b32_e32 v17, s9
; VGPRRC-NEXT: v_mov_b32_e32 v18, s10
; VGPRRC-NEXT: v_mov_b32_e32 v19, s11
-; VGPRRC-NEXT: global_store_dwordx4 v40, v[16:19], s[0:1] sc0 sc1
+; VGPRRC-NEXT: global_store_dwordx4 v44, v[16:19], s[0:1] sc0 sc1
; VGPRRC-NEXT: s_waitcnt vmcnt(0)
-; VGPRRC-NEXT: global_store_dwordx4 v40, v[8:11], s[0:1] offset:32 sc0 sc1
+; VGPRRC-NEXT: global_store_dwordx4 v44, v[8:11], s[0:1] offset:32 sc0 sc1
; VGPRRC-NEXT: s_waitcnt vmcnt(0)
-; VGPRRC-NEXT: global_store_dwordx4 v40, v[12:15], s[0:1] offset:48 sc0 sc1
+; VGPRRC-NEXT: global_store_dwordx4 v44, v[12:15], s[0:1] offset:48 sc0 sc1
; VGPRRC-NEXT: s_waitcnt vmcnt(0)
-; VGPRRC-NEXT: global_store_dwordx4 v40, v[0:3], s[0:1] sc0 sc1
+; VGPRRC-NEXT: global_store_dwordx4 v44, v[0:3], s[0:1] sc0 sc1
; VGPRRC-NEXT: s_waitcnt vmcnt(0)
-; VGPRRC-NEXT: global_store_dwordx4 v40, v[4:7], s[0:1] offset:16 sc0 sc1
+; VGPRRC-NEXT: global_store_dwordx4 v44, v[4:7], s[0:1] offset:16 sc0 sc1
; VGPRRC-NEXT: s_waitcnt vmcnt(0)
; VGPRRC-NEXT: s_endpgm
; AGPR-LABEL: test_mfma_f32_32x32x16_f16__vgprcd__flags:
@@ -2234,35 +2178,27 @@ define amdgpu_kernel void @test_mfma_f32_32x32x16_f16__vgprcd_mac(<8 x half> %ar
; SDAG-NEXT: s_load_dwordx16 s[8:23], s[4:5], 0x64
; SDAG-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0xa4
; SDAG-NEXT: s_waitcnt lgkmcnt(0)
-; SDAG-NEXT: v_mov_b64_e32 v[0:1], s[24:25]
-; SDAG-NEXT: v_mov_b64_e32 v[2:3], s[26:27]
-; SDAG-NEXT: v_mov_b64_e32 v[4:5], s[28:29]
-; SDAG-NEXT: v_accvgpr_write_b32 a0, s8
-; SDAG-NEXT: v_mov_b64_e32 v[6:7], s[30:31]
-; SDAG-NEXT: v_accvgpr_write_b32 a1, s9
-; SDAG-NEXT: v_accvgpr_write_b32 a2, s10
-; SDAG-NEXT: v_accvgpr_write_b32 a3, s11
-; SDAG-NEXT: v_accvgpr_write_b32 a4, s12
-; SDAG-NEXT: v_accvgpr_write_b32 a5, s13
-; SDAG-NEXT: v_accvgpr_write_b32 a6, s14
-; SDAG-NEXT: v_accvgpr_write_b32 a7, s15
-; SDAG-NEXT: v_accvgpr_write_b32 a8, s16
-; SDAG-NEXT: v_accvgpr_write_b32 a9, s17
-; SDAG-NEXT: v_accvgpr_write_b32 a10, s18
-; SDAG-NEXT: v_accvgpr_write_b32 a11, s19
-; SDAG-NEXT: v_accvgpr_write_b32 a12, s20
-; SDAG-NEXT: v_accvgpr_write_b32 a13, s21
-; SDAG-NEXT: v_accvgpr_write_b32 a14, s22
-; SDAG-NEXT: v_accvgpr_write_b32 a15, s23
+; SDAG-NEXT: v_mov_b64_e32 v[16:17], s[24:25]
+; SDAG-NEXT: v_mov_b64_e32 v[18:19], s[26:27]
+; SDAG-NEXT: v_mov_b64_e32 v[20:21], s[28:29]
+; SDAG-NEXT: v_mov_b64_e32 v[0:1], s[8:9]
+; SDAG-NEXT: v_mov_b64_e32 v[22:23], s[30:31]
+; SDAG-NEXT: v_mov_b64_e32 v[2:3], s[10:11]
+; SDAG-NEXT: v_mov_b64_e32 v[4:5], s[12:13]
+; SDAG-NEXT: v_mov_b64_e32 v[6:7], s[14:15]
+; SDAG-NEXT: v_mov_b64_e32 v[8:9], s[16:17]
+; SDAG-NEXT: v_mov_b64_e32 v[10:11], s[18:19]
+; SDAG-NEXT: v_mov_b64_e32 v[12:13], s[20:21]
+; SDAG-NEXT: v_mov_b64_e32 v[14:15], s[22:23]
; SDAG-NEXT: s_nop 1
-; SDAG-NEXT: v_mfma_f32_32x32x16_f16 a[0:15], v[0:3], v[4:7], a[0:15]
-; SDAG-NEXT: v_mov_b32_e32 v0, 0
+; SDAG-NEXT: v_mfma_f32_32x32x16_f16 v[0:15], v[16:19], v[20:23], v[0:15]
+; SDAG-NEXT: v_mov_b32_e32 v16, 0
; SDAG-NEXT: s_nop 7
; SDAG-NEXT: s_nop 2
-; SDAG-NEXT: global_store_dwordx4 v0, a[12:15], s[0:1] offset:48
-; SDAG-NEXT: global_store_dwordx4 v0, a[8:11], s[0:1] offset:32
-; SDAG-NEXT: global_store_dwordx4 v0, a[4:7], s[0:1] offset:16
-; SDAG-NEXT: global_store_dwordx4 v0, a[0:3], s[0:1]
+; SDAG-NEXT: global_store_dwordx4 v16, v[12:15], s[0:1] offset:48
+; SDAG-NEXT: global_store_dwordx4 v16, v[8:11], s[0:1] offset:32
+; SDAG-NEXT: global_store_dwordx4 v16, v[4:7], s[0:1] offset:16
+; SDAG-NEXT: global_store_dwordx4 v16, v[0:3], s[0:1]
; SDAG-NEXT: s_endpgm
;
; GISEL-LABEL: test_mfma_f32_32x32x16_f16__vgprcd_mac:
@@ -2271,35 +2207,27 @@ define amdgpu_kernel void @test_mfma_f32_32x32x16_f16__vgprcd_mac(<8 x half> %ar
; GISEL-NEXT: s_load_dwordx16 s[8:23], s[4:5], 0x64
; GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0xa4
; GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GISEL-NEXT: v_mov_b64_e32 v[0:1], s[24:25]
-; GISEL-NEXT: v_mov_b64_e32 v[2:3], s[26:27]
-; GISEL-NEXT: v_mov_b64_e32 v[4:5], s[28:29]
-; GISEL-NEXT: v_accvgpr_write_b32 a0, s8
-; GISEL-NEXT: v_mov_b64_e32 v[6:7], s[30:31]
-; GISEL-NEXT: v_accvgpr_write_b32 a1, s9
-; GISEL-NEXT: v_accvgpr_write_b32 a2, s10
-; GISEL-NEXT: v_accvgpr_write_b32 a3, s11
-; GISEL-NEXT: v_accvgpr_write_b32 a4, s12
-; GISEL-NEXT: v_accvgpr_write_b32 a5, s13
-; GISEL-NEXT: v_accvgpr_write_b32 a6, s14
-; GISEL-NEXT: v_accvgpr_write_b32 a7, s15
-; GISEL-NEXT: v_accvgpr_write_b32 a8, s16
-; GISEL-NEXT: v_accvgpr_write_b32 a9, s17
-; GISEL-NEXT: v_accvgpr_write_b32 a10, s18
-; GISEL-NEXT: v_accvgpr_write_b32 a11, s19
-; GISEL-NEXT: v_accvgpr_write_b32 a12, s20
-; GISEL-NEXT: v_accvgpr_write_b32 a13, s21
-; GISEL-NEXT: v_accvgpr_write_b32 a14, s22
-; GISEL-NEXT: v_accvgpr_write_b32 a15, s23
+; GISEL-NEXT: v_mov_b64_e32 v[16:17], s[24:25]
+; GISEL-NEXT: v_mov_b64_e32 v[18:19], s[26:27]
+; GISEL-NEXT: v_mov_b64_e32 v[20:21], s[28:29]
+; GISEL-NEXT: v_mov_b64_e32 v[0:1], s[8:9]
+; GISEL-NEXT: v_mov_b64_e32 v[22:23], s[30:31]
+; GISEL-NEXT: v_mov_b64_e32 v[2:3], s[10:11]
+; GISEL-NEXT: v_mov_b64_e32 v[4:5], s[12:13]
+; GISEL-NEXT: v_mov_b64_e32 v[6:7], s[14:15]
+; GISEL-NEXT: v_mov_b64_e32 v[8:9], s[16:17]
+; GISEL-NEXT: v_mov_b64_e32 v[10:11], s[18:19]
+; GISEL-NEXT: v_mov_b64_e32 v[12:13], s[20:21]
+; GISEL-NEXT: v_mov_b64_e32 v[14:15], s[22:23]
; GISEL-NEXT: s_nop 1
-; GISEL-NEXT: v_mfma_f32_32x32x16_f16 a[0:15], v[0:3], v[4:7], a[0:15]
-; GISEL-NEXT: v_mov_b32_e32 v0, 0
+; GISEL-NEXT: v_mfma_f32_32x32x16_f16 v[0:15], v[16:19], v[20:23], v[0:15]
+; GISEL-NEXT: v_mov_b32_e32 v16, 0
; GISEL-NEXT: s_nop 7
; GISEL-NEXT: s_nop 2
-; GISEL-NEXT: global_store_dwordx4 v0, a[0:3], s[0:1]
-; GISEL-NEXT: global_store_dwordx4 v0, a[4:7], s[0:1] offset:16
-; GISEL-NEXT: global_store_dwordx4 v0, a[8:11], s[0:1] offset:32
-; GISEL-NEXT: global_store_dwordx4 v0, a[12:15], s[0:1] offset:48
+; GISEL-NEXT: global_store_dwordx4 v16, v[0:3], s[0:1]
+; GISEL-NEXT: global_store_dwordx4 v16, v[4:7], s[0:1] offset:16
+; GISEL-NEXT: global_store_dwordx4 v16, v[8:11], s[0:1] offset:32
+; GISEL-NEXT: global_store_dwordx4 v16, v[12:15], s[0:1] offset:48
; GISEL-NEXT: s_endpgm
;
; HEURRC-LABEL: test_mfma_f32_32x32x16_f16__vgprcd_mac:
@@ -2308,35 +2236,27 @@ define amdgpu_kernel void @test_mfma_f32_32x32x16_f16__vgprcd_mac(<8 x half> %ar
; HEURRC-NEXT: s_load_dwordx16 s[8:23], s[4:5], 0x64
; HEURRC-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0xa4
; HEURRC-NEXT: s_waitcnt lgkmcnt(0)
-; HEURRC-NEXT: v_mov_b64_e32 v[0:1], s[24:25]
-; HEURRC-NEXT: v_mov_b64_e32 v[2:3], s[26:27]
-; HEURRC-NEXT: v_mov_b64_e32 v[4:5], s[28:29]
-; HEURRC-NEXT: v_accvgpr_write_b32 a0, s8
-; HEURRC-NEXT: v_mov_b64_e32 v[6:7], s[30:31]
-; HEURRC-NEXT: v_accvgpr_write_b32 a1, s9
-; HEURRC-NEXT: v_accvgpr_write_b32 a2, s10
-; HEURRC-NEXT: v_accvgpr_write_b32 a3, s11
-; HEURRC-NEXT: v_accvgpr_write_b32 a4, s12
-; HEURRC-NEXT: v_accvgpr_write_b32 a5, s13
-; HEURRC-NEXT: v_accvgpr_write_b32 a6, s14
-; HEURRC-NEXT: v_accvgpr_write_b32 a7, s15
-; HEURRC-NEXT: v_accvgpr_write_b32 a8, s16
-; HEURRC-NEXT: v_accvgpr_write_b32 a9, s17
-; HEURRC-NEXT: v_accvgpr_write_b32 a10, s18
-; HEURRC-NEXT: v_accvgpr_write_b32 a11, s19
-; HEURRC-NEXT: v_accvgpr_write_b32 a12, s20
-; HEURRC-NEXT: v_accvgpr_write_b32 a13, s21
-; HEURRC-NEXT: v_accvgpr_write_b32 a14, s22
-; HEURRC-NEXT: v_accvgpr_write_b32 a15, s23
+; HEURRC-NEXT: v_mov_b64_e32 v[16:17], s[24:25]
+; HEURRC-NEXT: v_mov_b64_e32 v[18:19], s[26:27]
+; HEURRC-NEXT: v_mov_b64_e32 v[20:21], s[28:29]
+; HEURRC-NEXT: v_mov_b64_e32 v[0:1], s[8:9]
+; HEURRC-NEXT: v_mov_b64_e32 v[22:23], s[30:31]
+; HEURRC-NEXT: v_mov_b64_e32 v[2:3], s[10:11]
+; HEURRC-NEXT: v_mov_b64_e32 v[4:5], s[12:13]
+; HEURRC-NEXT: v_mov_b64_e32 v[6:7], s[14:15]
+; HEURRC-NEXT: v_mov_b64_e32 v[8:9], s[16:17]
+; HEURRC-NEXT: v_mov_b64_e32 v[10:11], s[18:19]
+; HEURRC-NEXT: v_mov_b64_e32 v[12:13], s[20:21]
+; HEURRC-NEXT: v_mov_b64_e32 v[14:15], s[22:23]
; HEURRC-NEXT: s_nop 1
-; HEURRC-NEXT: v_mfma_f32_32x32x16_f16 a[0:15], v[0:3], v[4:7], a[0:15]
-; HEURRC-NEXT: v_mov_b32_e32 v0, 0
+; HEURRC-NEXT: v_mfma_f32_32x32x16_f16 v[0:15], v[16:19], v[20:23], v[0:15]
+; HEURRC-NEXT: v_mov_b32_e32 v16, 0
; HEURRC-NEXT: s_nop 7
; HEURRC-NEXT: s_nop 2
-; HEURRC-NEXT: global_store_dwordx4 v0, a[12:15], s[0:1] offset:48
-; HEURRC-NEXT: global_store_dwordx4 v0, a[8:11], s[0:1] offset:32
-; HEURRC-NEXT: global_store_dwordx4 v0, a[4:7], s[0:1] offset:16
-; HEURRC-NEXT: global_store_dwordx4 v0, a[0:3], s[0:1]
+; HEURRC-NEXT: global_store_dwordx4 v16, v[12:15], s[0:1] offset:48
+; HEURRC-NEXT: global_store_dwordx4 v16, v[8:11], s[0:1] offset:32
+; HEURRC-NEXT: global_store_dwordx4 v16, v[4:7], s[0:1] offset:16
+; HEURRC-NEXT: global_store_dwordx4 v16, v[0:3], s[0:1]
; HEURRC-NEXT: s_endpgm
;
; VGPRRC-LABEL: test_mfma_f32_32x32x16_f16__vgprcd_mac:
@@ -2443,35 +2363,27 @@ define amdgpu_kernel void @test_mfma_f32_32x32x16_f16__vgprcd_mac_flags(<8 x hal
; SDAG-NEXT: s_load_dwordx16 s[8:23], s[4:5], 0x64
; SDAG-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0xa4
; SDAG-NEXT: s_waitcnt lgkmcnt(0)
-; SDAG-NEXT: v_mov_b64_e32 v[0:1], s[24:25]
-; SDAG-NEXT: v_mov_b64_e32 v[2:3], s[26:27]
-; SDAG-NEXT: v_mov_b64_e32 v[4:5], s[28:29]
-; SDAG-NEXT: v_accvgpr_write_b32 a0, s8
-; SDAG-NEXT: v_mov_b64_e32 v[6:7], s[30:31]
-; SDAG-NEXT: v_accvgpr_write_b32 a1, s9
-; SDAG-NEXT: v_accvgpr_write_b32 a2, s10
-; SDAG-NEXT: v_accvgpr_write_b32 a3, s11
-; SDAG-NEXT: v_accvgpr_write_b32 a4, s12
-; SDAG-NEXT: v_accvgpr_write_b32 a5, s13
-; SDAG-NEXT: v_accvgpr_write_b32 a6, s14
-; SDAG-NEXT: v_accvgpr_write_b32 a7, s15
-; SDAG-NEXT: v_accvgpr_write_b32 a8, s16
-; SDAG-NEXT: v_accvgpr_write_b32 a9, s17
-; SDAG-NEXT: v_accvgpr_write_b32 a10, s18
-; SDAG-NEXT: v_accvgpr_write_b32 a11, s19
-; SDAG-NEXT: v_accvgpr_write_b32 a12, s20
-; SDAG-NEXT: v_accvgpr_write_b32 a13, s21
-; SDAG-NEXT: v_accvgpr_write_b32 a14, s22
-; SDAG-NEXT: v_accvgpr_write_b32 a15, s23
+; SDAG-NEXT: v_mov_b64_e32 v[16:17], s[24:25]
+; SDAG-NEXT: v_mov_b64_e32 v[18:19], s[26:27]
+; SDAG-NEXT: v_mov_b64_e32 v[20:21], s[28:29]
+; SDAG-NEXT: v_mov_b64_e32 v[0:1], s[8:9]
+; SDAG-NEXT: v_mov_b64_e32 v[22:23], s[30:31]
+; SDAG-NEXT: v_mov_b64_e32 v[2:3], s[10:11]
+; SDAG-NEXT: v_mov_b64_e32 v[4:5], s[12:13]
+; SDAG-NEXT: v_mov_b64_e32 v[6:7], s[14:15]
+; SDAG-NEXT: v_mov_b64_e32 v[8:9], s[16:17]
+; SDAG-NEXT: v_mov_b64_e32 v[10:11], s[18:19]
+; SDAG-NEXT: v_mov_b64_e32 v[12:13], s[20:21]
+; SDAG-NEXT: v_mov_b64_e32 v[14:15], s[22:23]
; SDAG-NEXT: s_nop 1
-; SDAG-NEXT: v_mfma_f32_32x32x16_f16 a[0:15], v[0:3], v[4:7], a[0:15] cbsz:3 abid:2 blgp:1
-; SDAG-NEXT: v_mov_b32_e32 v0, 0
+; SDAG-NEXT: v_mfma_f32_32x32x16_f16 v[0:15], v[16:19], v[20:23], v[0:15] cbsz:3 abid:2 blgp:1
+; SDAG-NEXT: v_mov_b32_e32 v16, 0
; SDAG-NEXT: s_nop 7
; SDAG-NEXT: s_nop 2
-; SDAG-NEXT: global_store_dwordx4 v0, a[12:15], s[0:1] offset:48
-; SDAG-NEXT: global_store_dwordx4 v0, a[8:11], s[0:1] offset:32
-; SDAG-NEXT: global_store_dwordx4 v0, a[4:7], s[0:1] offset:16
-; SDAG-NEXT: global_store_dwordx4 v0, a[0:3], s[0:1]
+; SDAG-NEXT: global_store_dwordx4 v16, v[12:15], s[0:1] offset:48
+; SDAG-NEXT: global_store_dwordx4 v16, v[8:11], s[0:1] offset:32
+; SDAG-NEXT: global_store_dwordx4 v16, v[4:7], s[0:1] offset:16
+; SDAG-NEXT: global_store_dwordx4 v16, v[0:3], s[0:1]
; SDAG-NEXT: s_endpgm
;
; GISEL-LABEL: test_mfma_f32_32x32x16_f16__vgprcd_mac_flags:
@@ -2480,35 +2392,27 @@ define amdgpu_kernel void @test_mfma_f32_32x32x16_f16__vgprcd_mac_flags(<8 x hal
; GISEL-NEXT: s_load_dwordx16 s[8:23], s[4:5], 0x64
; GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0xa4
; GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GISEL-NEXT: v_mov_b64_e32 v[0:1], s[24:25]
-; GISEL-NEXT: v_mov_b64_e32 v[2:3], s[26:27]
-; GISEL-NEXT: v_mov_b64_e32 v[4:5], s[28:29]
-; GISEL-NEXT: v_accvgpr_write_b32 a0, s8
-; GISEL-NEXT: v_mov_b64_e32 v[6:7], s[30:31]
-; GISEL-NEXT: v_accvgpr_write_b32 a1, s9
-; GISEL-NEXT: v_accvgpr_write_b32 a2, s10
-; GISEL-NEXT: v_accvgpr_write_b32 a3, s11
-; GISEL-NEXT: v_accvgpr_write_b32 a4, s12
-; GISEL-NEXT: v_accvgpr_write_b32 a5, s13
-; GISEL-NEXT: v_accvgpr_write_b32 a6, s14
-; GISEL-NEXT: v_accvgpr_write_b32 a7, s15
-; GISEL-NEXT: v_accvgpr_write_b32 a8, s16
-; GISEL-NEXT: v_accvgpr_write_b32 a9, s17
-; GISEL-NEXT: v_accvgpr_write_b32 a10, s18
-; GISEL-NEXT: v_accvgpr_write_b32 a11, s19
-; GISEL-NEXT: v_accvgpr_write_b32 a12, s20
-; GISEL-NEXT: v_accvgpr_write_b32 a13, s21
-; GISEL-NEXT: v_accvgpr_write_b32 a14, s22
-; GISEL-NEXT: v_accvgpr_write_b32 a15, s23
+; GISEL-NEXT: v_mov_b64_e32 v[16:17], s[24:25]
+; GISEL-NEXT: v_mov_b64_e32 v[18:19], s[26:27]
+; GISEL-NEXT: v_mov_b64_e32 v[20:21], s[28:29]
+; GISEL-NEXT: v_mov_b64_e32 v[0:1], s[8:9]
+; GISEL-NEXT: v_mov_b64_e32 v[22:23], s[30:31]
+; GISEL-NEXT: v_mov_b64_e32 v[2:3], s[10:11]
+; GISEL-NEXT: v_mov_b64_e32 v[4:5], s[12:13]
+; GISEL-NEXT: v_mov_b64_e32 v[6:7], s[14:15]
+; GISEL-NEXT: v_mov_b64_e32 v[8:9], s[16:17]
+; GISEL-NEXT: v_mov_b64_e32 v[10:11], s[18:19]
+; GISEL-NEXT: v_mov_b64_e32 v[12:13], s[20:21]
+; GISEL-NEXT: v_mov_b64_e32 v[14:15], s[22:23]
; GISEL-NEXT: s_nop 1
-; GISEL-NEXT: v_mfma_f32_32x32x16_f16 a[0:15], v[0:3], v[4:7], a[0:15] cbsz:3 abid:2 blgp:1
-; GISEL-NEXT: v_mov_b32_e32 v0, 0
+; GISEL-NEXT: v_mfma_f32_32x32x16_f16 v[0:15], v[16:19], v[20:23], v[0:15] cbsz:3 abid:2 blgp:1
+; GISEL-NEXT: v_mov_b32_e32 v16, 0
; GISEL-NEXT: s_nop 7
; GISEL-NEXT: s_nop 2
-; GISEL-NEXT: global_store_dwordx4 v0, a[0:3], s[0:1]
-; GISEL-NEXT: global_store_dwordx4 v0, a[4:7], s[0:1] offset:16
-; GISEL-NEXT: global_store_dwordx4 v0, a[8:11], s[0:1] offset:32
-; GISEL-NEXT: global_store_dwordx4 v0, a[12:15], s[0:1] offset:48
+; GISEL-NEXT: global_store_dwordx4 v16, v[0:3], s[0:1]
+; GISEL-NEXT: global_store_dwordx4 v16, v[4:7], s[0:1] offset:16
+; GISEL-NEXT: global_store_dwordx4 v16, v[8:11], s[0:1] offset:32
+; GISEL-NEXT: global_store_dwordx4 v16, v[12:15], s[0:1] offset:48
; GISEL-NEXT: s_endpgm
;
; HEURRC-LABEL: test_mfma_f32_32x32x16_f16__vgprcd_mac_flags:
@@ -2517,35 +2421,27 @@ define amdgpu_kernel void @test_mfma_f32_32x32x16_f16__vgprcd_mac_flags(<8 x hal
; HEURRC-NEXT: s_load_dwordx16 s[8:23], s[4:5], 0x64
; HEURRC-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0xa4
; HEURRC-NEXT: s_waitcnt lgkmcnt(0)
-; HEURRC-NEXT: v_mov_b64_e32 v[0:1], s[24:25]
-; HEURRC-NEXT: v_mov_b64_e32 v[2:3], s[26:27]
-; HEURRC-NEXT: v_mov_b64_e32 v[4:5], s[28:29]
-; HEURRC-NEXT: v_accvgpr_write_b32 a0, s8
-; HEURRC-NEXT: v_mov_b64_e32 v[6:7], s[30:31]
-; HEURRC-NEXT: v_accvgpr_write_b32 a1, s9
-; HEURRC-NEXT: v_accvgpr_write_b32 a2, s10
-; HEURRC-NEXT: v_accvgpr_write_b32 a3, s11
-; HEURRC-NEXT: v_accvgpr_write_b32 a4, s12
-; HEURRC-NEXT: v_accvgpr_write_b32 a5, s13
-; HEURRC-NEXT: v_accvgpr_write_b32 a6, s14
-; HEURRC-NEXT: v_accvgpr_write_b32 a7, s15
-; HEURRC-NEXT: v_accvgpr_write_b32 a8, s16
-; HEURRC-NEXT: v_accvgpr_write_b32 a9, s17
-; HEURRC-NEXT: v_accvgpr_write_b32 a10, s18
-; HEURRC-NEXT: v_accvgpr_write_b32 a11, s19
-; HEURRC-NEXT: v_accvgpr_write_b32 a12, s20
-; HEURRC-NEXT: v_accvgpr_write_b32 a13, s21
-; HEURRC-NEXT: v_accvgpr_write_b32 a14, s22
-; HEURRC-NEXT: v_accvgpr_write_b32 a15, s23
+; HEURRC-NEXT: v_mov_b64_e32 v[16:17], s[24:25]
+; HEURRC-NEXT: v_mov_b64_e32 v[18:19], s[26:27]
+; HEURRC-NEXT: v_mov_b64_e32 v[20:21], s[28:29]
+; HEURRC-NEXT: v_mov_b64_e32 v[0:1], s[8:9]
+; HEURRC-NEXT: v_mov_b64_e32 v[22:23], s[30:31]
+; HEURRC-NEXT: v_mov_b64_e32 v[2:3], s[10:11]
+; HEURRC-NEXT: v_mov_b64_e32 v[4:5], s[12:13]
+; HEURRC-NEXT: v_mov_b64_e32 v[6:7], s[14:15]
+; HEURRC-NEXT: v_mov_b64_e32 v[8:9], s[16:17]
+; HEURRC-NEXT: v_mov_b64_e32 v[10:11], s[18:19]
+; HEURRC-NEXT: v_mov_b64_e32 v[12:13], s[20:21]
+; HEURRC-NEXT: v_mov_b64_e32 v[14:15], s[22:23]
; HEURRC-NEXT: s_nop 1
-; HEURRC-NEXT: v_mfma_f32_32x32x16_f16 a[0:15], v[0:3], v[4:7], a[0:15] cbsz:3 abid:2 blgp:1
-; HEURRC-NEXT: v_mov_b32_e32 v0, 0
+; HEURRC-NEXT: v_mfma_f32_32x32x16_f16 v[0:15], v[16:19], v[20:23], v[0:15] cbsz:3 abid:2 blgp:1
+; HEURRC-NEXT: v_mov_b32_e32 v16, 0
; HEURRC-NEXT: s_nop 7
; HEURRC-NEXT: s_nop 2
-; HEURRC-NEXT: global_store_dwordx4 v0, a[12:15], s[0:1] offset:48
-; HEURRC-NEXT: global_store_dwordx4 v0, a[8:11], s[0:1] offset:32
-; HEURRC-NEXT: global_store_dwordx4 v0, a[4:7], s[0:1] offset:16
-; HEURRC-NEXT: global_store_dwordx4 v0, a[0:3], s[0:1]
+; HEURRC-NEXT: global_store_dwordx4 v16, v[12:15], s[0:1] offset:48
+; HEURRC-NEXT: global_store_dwordx4 v16, v[8:11], s[0:1] offset:32
+; HEURRC-NEXT: global_store_dwordx4 v16, v[4:7], s[0:1] offset:16
+; HEURRC-NEXT: global_store_dwordx4 v16, v[0:3], s[0:1]
; HEURRC-NEXT: s_endpgm
;
; VGPRRC-LABEL: test_mfma_f32_32x32x16_f16__vgprcd_mac_flags:
@@ -2781,24 +2677,24 @@ define amdgpu_kernel void @test_mfma_i32_16x16x64_i8_no_agpr__vgprcd(ptr addrspa
; SDAG-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0x34
; SDAG-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x24
; SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x54
-; SDAG-NEXT: v_mov_b32_e32 v0, 0
+; SDAG-NEXT: v_mov_b32_e32 v12, 0
; SDAG-NEXT: s_waitcnt lgkmcnt(0)
-; SDAG-NEXT: v_mov_b32_e32 v2, s8
-; SDAG-NEXT: v_mov_b32_e32 v3, s9
-; SDAG-NEXT: v_mov_b32_e32 v4, s10
-; SDAG-NEXT: v_mov_b32_e32 v5, s11
-; SDAG-NEXT: v_mov_b32_e32 v6, s12
-; SDAG-NEXT: v_mov_b32_e32 v7, s13
-; SDAG-NEXT: v_mov_b32_e32 v8, s14
-; SDAG-NEXT: v_mov_b32_e32 v9, s15
-; SDAG-NEXT: v_accvgpr_write_b32 a0, s0
-; SDAG-NEXT: v_accvgpr_write_b32 a1, s1
-; SDAG-NEXT: v_accvgpr_write_b32 a2, s2
-; SDAG-NEXT: v_accvgpr_write_b32 a3, s3
+; SDAG-NEXT: v_mov_b32_e32 v0, s8
+; SDAG-NEXT: v_mov_b32_e32 v1, s9
+; SDAG-NEXT: v_mov_b32_e32 v2, s10
+; SDAG-NEXT: v_mov_b32_e32 v3, s11
+; SDAG-NEXT: v_mov_b32_e32 v4, s12
+; SDAG-NEXT: v_mov_b32_e32 v5, s13
+; SDAG-NEXT: v_mov_b32_e32 v6, s14
+; SDAG-NEXT: v_mov_b32_e32 v7, s15
+; SDAG-NEXT: v_mov_b32_e32 v8, s0
+; SDAG-NEXT: v_mov_b32_e32 v9, s1
+; SDAG-NEXT: v_mov_b32_e32 v10, s2
+; SDAG-NEXT: v_mov_b32_e32 v11, s3
; SDAG-NEXT: s_nop 1
-; SDAG-NEXT: v_mfma_i32_16x16x64_i8 a[0:3], v[2:5], v[6:9], a[0:3]
+; SDAG-NEXT: v_mfma_i32_16x16x64_i8 v[0:3], v[0:3], v[4:7], v[8:11]
; SDAG-NEXT: s_nop 7
-; SDAG-NEXT: global_store_dwordx4 v0, a[0:3], s[6:7]
+; SDAG-NEXT: global_store_dwordx4 v12, v[0:3], s[6:7]
; SDAG-NEXT: s_endpgm
;
; GISEL-LABEL: test_mfma_i32_16x16x64_i8_no_agpr__vgprcd:
@@ -2810,16 +2706,14 @@ define amdgpu_kernel void @test_mfma_i32_16x16x64_i8_no_agpr__vgprcd(ptr addrspa
; GISEL-NEXT: v_mov_b64_e32 v[0:1], s[8:9]
; GISEL-NEXT: v_mov_b64_e32 v[2:3], s[10:11]
; GISEL-NEXT: v_mov_b64_e32 v[4:5], s[12:13]
-; GISEL-NEXT: v_accvgpr_write_b32 a0, s0
+; GISEL-NEXT: v_mov_b64_e32 v[10:11], s[2:3]
; GISEL-NEXT: v_mov_b64_e32 v[6:7], s[14:15]
-; GISEL-NEXT: v_accvgpr_write_b32 a1, s1
-; GISEL-NEXT: v_accvgpr_write_b32 a2, s2
-; GISEL-NEXT: v_accvgpr_write_b32 a3, s3
+; GISEL-NEXT: v_mov_b64_e32 v[8:9], s[0:1]
; GISEL-NEXT: s_nop 1
-; GISEL-NEXT: v_mfma_i32_16x16x64_i8 a[0:3], v[0:3], v[4:7], a[0:3]
-; GISEL-NEXT: v_mov_b32_e32 v0, 0
+; GISEL-NEXT: v_mfma_i32_16x16x64_i8 v[0:3], v[0:3], v[4:7], v[8:11]
+; GISEL-NEXT: v_mov_b32_e32 v4, 0
; GISEL-NEXT: s_nop 6
-; GISEL-NEXT: global_store_dwordx4 v0, a[0:3], s[6:7]
+; GISEL-NEXT: global_store_dwordx4 v4, v[0:3], s[6:7]
; GISEL-NEXT: s_endpgm
;
; HEURRC-LABEL: test_mfma_i32_16x16x64_i8_no_agpr__vgprcd:
@@ -2827,24 +2721,24 @@ define amdgpu_kernel void @test_mfma_i32_16x16x64_i8_no_agpr__vgprcd(ptr addrspa
; HEURRC-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0x34
; HEURRC-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x24
; HEURRC-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x54
-; HEURRC-NEXT: v_mov_b32_e32 v0, 0
+; HEURRC-NEXT: v_mov_b32_e32 v12, 0
; HEURRC-NEXT: s_waitcnt lgkmcnt(0)
-; HEURRC-NEXT: v_mov_b32_e32 v2, s8
-; HEURRC-NEXT: v_mov_b32_e32 v3, s9
-; HEURRC-NEXT: v_mov_b32_e32 v4, s10
-; HEURRC-NEXT: v_mov_b32_e32 v5, s11
-; HEURRC-NEXT: v_mov_b32_e32 v6, s12
-; HEURRC-NEXT: v_mov_b32_e32 v7, s13
-; HEURRC-NEXT: v_mov_b32_e32 v8, s14
-; HEURRC-NEXT: v_mov_b32_e32 v9, s15
-; HEURRC-NEXT: v_accvgpr_write_b32 a0, s0
-; HEURRC-NEXT: v_accvgpr_write_b32 a1, s1
-; HEURRC-NEXT: v_accvgpr_write_b32 a2, s2
-; HEURRC-NEXT: v_accvgpr_write_b32 a3, s3
+; HEURRC-NEXT: v_mov_b32_e32 v0, s8
+; HEURRC-NEXT: v_mov_b32_e32 v1, s9
+; HEURRC-NEXT: v_mov_b32_e32 v2, s10
+; HEURRC-NEXT: v_mov_b32_e32 v3, s11
+; HEURRC-NEXT: v_mov_b32_e32 v4, s12
+; HEURRC-NEXT: v_mov_b32_e32 v5, s13
+; HEURRC-NEXT: v_mov_b32_e32 v6, s14
+; HEURRC-NEXT: v_mov_b32_e32 v7, s15
+; HEURRC-NEXT: v_mov_b32_e32 v8, s0
+; HEURRC-NEXT: v_mov_b32_e32 v9, s1
+; HEURRC-NEXT: v_mov_b32_e32 v10, s2
+; HEURRC-NEXT: v_mov_b32_e32 v11, s3
; HEURRC-NEXT: s_nop 1
-; HEURRC-NEXT: v_mfma_i32_16x16x64_i8 a[0:3], v[2:5], v[6:9], a[0:3]
+; HEURRC-NEXT: v_mfma_i32_16x16x64_i8 v[0:3], v[0:3], v[4:7], v[8:11]
; HEURRC-NEXT: s_nop 7
-; HEURRC-NEXT: global_store_dwordx4 v0, a[0:3], s[6:7]
+; HEURRC-NEXT: global_store_dwordx4 v12, v[0:3], s[6:7]
; HEURRC-NEXT: s_endpgm
;
; VGPRRC-LABEL: test_mfma_i32_16x16x64_i8_no_agpr__vgprcd:
@@ -2852,24 +2746,24 @@ define amdgpu_kernel void @test_mfma_i32_16x16x64_i8_no_agpr__vgprcd(ptr addrspa
; VGPRRC-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0x34
; VGPRRC-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x24
; VGPRRC-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x54
-; VGPRRC-NEXT: v_mov_b32_e32 v4, 0
+; VGPRRC-NEXT: v_mov_b32_e32 v12, 0
; VGPRRC-NEXT: s_waitcnt lgkmcnt(0)
-; VGPRRC-NEXT: v_mov_b32_e32 v6, s8
-; VGPRRC-NEXT: v_mov_b32_e32 v7, s9
-; VGPRRC-NEXT: v_mov_b32_e32 v8, s10
-; VGPRRC-NEXT: v_mov_b32_e32 v9, s11
-; VGPRRC-NEXT: v_mov_b32_e32 v10, s12
-; VGPRRC-NEXT: v_mov_b32_e32 v11, s13
-; VGPRRC-NEXT: v_mov_b32_e32 v12, s14
-; VGPRRC-NEXT: v_mov_b32_e32 v13, s15
-; VGPRRC-NEXT: v_mov_b32_e32 v0, s0
-; VGPRRC-NEXT: v_mov_b32_e32 v1, s1
-; VGPRRC-NEXT: v_mov_b32_e32 v2, s2
-; VGPRRC-NEXT: v_mov_b32_e32 v3, s3
+; VGPRRC-NEXT: v_mov_b32_e32 v0, s8
+; VGPRRC-NEXT: v_mov_b32_e32 v1, s9
+; VGPRRC-NEXT: v_mov_b32_e32 v2, s10
+; VGPRRC-NEXT: v_mov_b32_e32 v3, s11
+; VGPRRC-NEXT: v_mov_b32_e32 v4, s12
+; VGPRRC-NEXT: v_mov_b32_e32 v5, s13
+; VGPRRC-NEXT: v_mov_b32_e32 v6, s14
+; VGPRRC-NEXT: v_mov_b32_e32 v7, s15
+; VGPRRC-NEXT: v_mov_b32_e32 v8, s0
+; VGPRRC-NEXT: v_mov_b32_e32 v9, s1
+; VGPRRC-NEXT: v_mov_b32_e32 v10, s2
+; VGPRRC-NEXT: v_mov_b32_e32 v11, s3
; VGPRRC-NEXT: s_nop 1
-; VGPRRC-NEXT: v_mfma_i32_16x16x64_i8 v[0:3], v[6:9], v[10:13], v[0:3]
+; VGPRRC-NEXT: v_mfma_i32_16x16x64_i8 v[0:3], v[0:3], v[4:7], v[8:11]
; VGPRRC-NEXT: s_nop 7
-; VGPRRC-NEXT: global_store_dwordx4 v4, v[0:3], s[6:7]
+; VGPRRC-NEXT: global_store_dwordx4 v12, v[0:3], s[6:7]
; VGPRRC-NEXT: s_endpgm
; AGPR-LABEL: test_mfma_i32_16x16x64_i8_no_agpr__vgprcd:
; AGPR: ; %bb.0:
@@ -2930,24 +2824,24 @@ define amdgpu_kernel void @test_mfma_i32_16x16x64_i8_no_agpr__vgprcd__flags(ptr
; SDAG-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0x34
; SDAG-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x24
; SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x54
-; SDAG-NEXT: v_mov_b32_e32 v0, 0
+; SDAG-NEXT: v_mov_b32_e32 v12, 0
; SDAG-NEXT: s_waitcnt lgkmcnt(0)
-; SDAG-NEXT: v_mov_b32_e32 v2, s8
-; SDAG-NEXT: v_mov_b32_e32 v3, s9
-; SDAG-NEXT: v_mov_b32_e32 v4, s10
-; SDAG-NEXT: v_mov_b32_e32 v5, s11
-; SDAG-NEXT: v_mov_b32_e32 v6, s12
-; SDAG-NEXT: v_mov_b32_e32 v7, s13
-; SDAG-NEXT: v_mov_b32_e32 v8, s14
-; SDAG-NEXT: v_mov_b32_e32 v9, s15
-; SDAG-NEXT: v_accvgpr_write_b32 a0, s0
-; SDAG-NEXT: v_accvgpr_write_b32 a1, s1
-; SDAG-NEXT: v_accvgpr_write_b32 a2, s2
-; SDAG-NEXT: v_accvgpr_write_b32 a3, s3
+; SDAG-NEXT: v_mov_b32_e32 v0, s8
+; SDAG-NEXT: v_mov_b32_e32 v1, s9
+; SDAG-NEXT: v_mov_b32_e32 v2, s10
+; SDAG-NEXT: v_mov_b32_e32 v3, s11
+; SDAG-NEXT: v_mov_b32_e32 v4, s12
+; SDAG-NEXT: v_mov_b32_e32 v5, s13
+; SDAG-NEXT: v_mov_b32_e32 v6, s14
+; SDAG-NEXT: v_mov_b32_e32 v7, s15
+; SDAG-NEXT: v_mov_b32_e32 v8, s0
+; SDAG-NEXT: v_mov_b32_e32 v9, s1
+; SDAG-NEXT: v_mov_b32_e32 v10, s2
+; SDAG-NEXT: v_mov_b32_e32 v11, s3
; SDAG-NEXT: s_nop 1
-; SDAG-NEXT: v_mfma_i32_16x16x64_i8 a[0:3], v[2:5], v[6:9], a[0:3] cbsz:3 abid:2 blgp:1
+; SDAG-NEXT: v_mfma_i32_16x16x64_i8 v[0:3], v[0:3], v[4:7], v[8:11] cbsz:3 abid:2 blgp:1
; SDAG-NEXT: s_nop 7
-; SDAG-NEXT: global_store_dwordx4 v0, a[0:3], s[6:7]
+; SDAG-NEXT: global_store_dwordx4 v12, v[0:3], s[6:7]
; SDAG-NEXT: s_endpgm
;
; GISEL-LABEL: test_mfma_i32_16x16x64_i8_no_agpr__vgprcd__flags:
@@ -2959,16 +2853,14 @@ define amdgpu_kernel void @test_mfma_i32_16x16x64_i8_no_agpr__vgprcd__flags(ptr
; GISEL-NEXT: v_mov_b64_e32 v[0:1], s[8:9]
; GISEL-NEXT: v_mov_b64_e32 v[2:3], s[10:11]
; GISEL-NEXT: v_mov_b64_e32 v[4:5], s[12:13]
-; GISEL-NEXT: v_accvgpr_write_b32 a0, s0
+; GISEL-NEXT: v_mov_b64_e32 v[10:11], s[2:3]
; GISEL-NEXT: v_mov_b64_e32 v[6:7], s[14:15]
-; GISEL-NEXT: v_accvgpr_write_b32 a1, s1
-; GISEL-NEXT: v_accvgpr_write_b32 a2, s2
-; GISEL-NEXT: v_accvgpr_write_b32 a3, s3
+; GISEL-NEXT: v_mov_b64_e32 v[8:9], s[0:1]
; GISEL-NEXT: s_nop 1
-; GISEL-NEXT: v_mfma_i32_16x16x64_i8 a[0:3], v[0:3], v[4:7], a[0:3] cbsz:3 abid:2 blgp:1
-; GISEL-NEXT: v_mov_b32_e32 v0, 0
+; GISEL-NEXT: v_mfma_i32_16x16x64_i8 v[0:3], v[0:3], v[4:7], v[8:11] cbsz:3 abid:2 blgp:1
+; GISEL-NEXT: v_mov_b32_e32 v4, 0
; GISEL-NEXT: s_nop 6
-; GISEL-NEXT: global_store_dwordx4 v0, a[0:3], s[6:7]
+; GISEL-NEXT: global_store_dwordx4 v4, v[0:3], s[6:7]
; GISEL-NEXT: s_endpgm
;
; HEURRC-LABEL: test_mfma_i32_16x16x64_i8_no_agpr__vgprcd__flags:
@@ -2976,24 +2868,24 @@ define amdgpu_kernel void @test_mfma_i32_16x16x64_i8_no_agpr__vgprcd__flags(ptr
; HEURRC-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0x34
; HEURRC-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x24
; HEURRC-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x54
-; HEURRC-NEXT: v_mov_b32_e32 v0, 0
+; HEURRC-NEXT: v_mov_b32_e32 v12, 0
; HEURRC-NEXT: s_waitcnt lgkmcnt(0)
-; HEURRC-NEXT: v_mov_b32_e32 v2, s8
-; HEURRC-NEXT: v_mov_b32_e32 v3, s9
-; HEURRC-NEXT: v_mov_b32_e32 v4, s10
-; HEURRC-NEXT: v_mov_b32_e32 v5, s11
-; HEURRC-NEXT: v_mov_b32_e32 v6, s12
-; HEURRC-NEXT: v_mov_b32_e32 v7, s13
-; HEURRC-NEXT: v_mov_b32_e32 v8, s14
-; HEURRC-NEXT: v_mov_b32_e32 v9, s15
-; HEURRC-NEXT: v_accvgpr_write_b32 a0, s0
-; HEURRC-NEXT: v_accvgpr_write_b32 a1, s1
-; HEURRC-NEXT: v_accvgpr_write_b32 a2, s2
-; HEURRC-NEXT: v_accvgpr_write_b32 a3, s3
+; HEURRC-NEXT: v_mov_b32_e32 v0, s8
+; HEURRC-NEXT: v_mov_b32_e32 v1, s9
+; HEURRC-NEXT: v_mov_b32_e32 v2, s10
+; HEURRC-NEXT: v_mov_b32_e32 v3, s11
+; HEURRC-NEXT: v_mov_b32_e32 v4, s12
+; HEURRC-NEXT: v_mov_b32_e32 v5, s13
+; HEURRC-NEXT: v_mov_b32_e32 v6, s14
+; HEURRC-NEXT: v_mov_b32_e32 v7, s15
+; HEURRC-NEXT: v_mov_b32_e32 v8, s0
+; HEURRC-NEXT: v_mov_b32_e32 v9, s1
+; HEURRC-NEXT: v_mov_b32_e32 v10, s2
+; HEURRC-NEXT: v_mov_b32_e32 v11, s3
; HEURRC-NEXT: s_nop 1
-; HEURRC-NEXT: v_mfma_i32_16x16x64_i8 a[0:3], v[2:5], v[6:9], a[0:3] cbsz:3 abid:2 blgp:1
+; HEURRC-NEXT: v_mfma_i32_16x16x64_i8 v[0:3], v[0:3], v[4:7], v[8:11] cbsz:3 abid:2 blgp:1
; HEURRC-NEXT: s_nop 7
-; HEURRC-NEXT: global_store_dwordx4 v0, a[0:3], s[6:7]
+; HEURRC-NEXT: global_store_dwordx4 v12, v[0:3], s[6:7]
; HEURRC-NEXT: s_endpgm
;
; VGPRRC-LABEL: test_mfma_i32_16x16x64_i8_no_agpr__vgprcd__flags:
@@ -3001,24 +2893,24 @@ define amdgpu_kernel void @test_mfma_i32_16x16x64_i8_no_agpr__vgprcd__flags(ptr
; VGPRRC-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0x34
; VGPRRC-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x24
; VGPRRC-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x54
-; VGPRRC-NEXT: v_mov_b32_e32 v4, 0
+; VGPRRC-NEXT: v_mov_b32_e32 v12, 0
; VGPRRC-NEXT: s_waitcnt lgkmcnt(0)
-; VGPRRC-NEXT: v_mov_b32_e32 v6, s8
-; VGPRRC-NEXT: v_mov_b32_e32 v7, s9
-; VGPRRC-NEXT: v_mov_b32_e32 v8, s10
-; VGPRRC-NEXT: v_mov_b32_e32 v9, s11
-; VGPRRC-NEXT: v_mov_b32_e32 v10, s12
-; VGPRRC-NEXT: v_mov_b32_e32 v11, s13
-; VGPRRC-NEXT: v_mov_b32_e32 v12, s14
-; VGPRRC-NEXT: v_mov_b32_e32 v13, s15
-; VGPRRC-NEXT: v_mov_b32_e32 v0, s0
-; VGPRRC-NEXT: v_mov_b32_e32 v1, s1
-; VGPRRC-NEXT: v_mov_b32_e32 v2, s2
-; VGPRRC-NEXT: v_mov_b32_e32 v3, s3
+; VGPRRC-NEXT: v_mov_b32_e32 v0, s8
+; VGPRRC-NEXT: v_mov_b32_e32 v1, s9
+; VGPRRC-NEXT: v_mov_b32_e32 v2, s10
+; VGPRRC-NEXT: v_mov_b32_e32 v3, s11
+; VGPRRC-NEXT: v_mov_b32_e32 v4, s12
+; VGPRRC-NEXT: v_mov_b32_e32 v5, s13
+; VGPRRC-NEXT: v_mov_b32_e32 v6, s14
+; VGPRRC-NEXT: v_mov_b32_e32 v7, s15
+; VGPRRC-NEXT: v_mov_b32_e32 v8, s0
+; VGPRRC-NEXT: v_mov_b32_e32 v9, s1
+; VGPRRC-NEXT: v_mov_b32_e32 v10, s2
+; VGPRRC-NEXT: v_mov_b32_e32 v11, s3
; VGPRRC-NEXT: s_nop 1
-; VGPRRC-NEXT: v_mfma_i32_16x16x64_i8 v[0:3], v[6:9], v[10:13], v[0:3] cbsz:3 abid:2 blgp:1
+; VGPRRC-NEXT: v_mfma_i32_16x16x64_i8 v[0:3], v[0:3], v[4:7], v[8:11] cbsz:3 abid:2 blgp:1
; VGPRRC-NEXT: s_nop 7
-; VGPRRC-NEXT: global_store_dwordx4 v4, v[0:3], s[6:7]
+; VGPRRC-NEXT: global_store_dwordx4 v12, v[0:3], s[6:7]
; VGPRRC-NEXT: s_endpgm
; AGPR-LABEL: test_mfma_i32_16x16x64_i8_no_agpr__vgprcd__flags:
; AGPR: ; %bb.0:
@@ -4246,70 +4138,63 @@ define amdgpu_kernel void @test_mfma_i32_32x32x32_i8__vgprcd(<4 x i32> %arg0, <4
; SDAG: ; %bb.0:
; SDAG-NEXT: s_load_dwordx8 s[20:27], s[4:5], 0x24
; SDAG-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0xa4
-; SDAG-NEXT: v_mov_b32_e32 v0, 0
+; SDAG-NEXT: v_mov_b32_e32 v40, 0
; SDAG-NEXT: s_waitcnt lgkmcnt(0)
-; SDAG-NEXT: v_mov_b32_e32 v2, s20
-; SDAG-NEXT: v_mov_b32_e32 v3, s21
-; SDAG-NEXT: v_mov_b32_e32 v4, s22
-; SDAG-NEXT: v_mov_b32_e32 v5, s23
+; SDAG-NEXT: v_mov_b32_e32 v32, s20
+; SDAG-NEXT: v_mov_b32_e32 v33, s21
+; SDAG-NEXT: v_mov_b32_e32 v34, s22
+; SDAG-NEXT: v_mov_b32_e32 v35, s23
; SDAG-NEXT: s_load_dwordx16 s[8:23], s[4:5], 0x64
-; SDAG-NEXT: v_mov_b32_e32 v6, s24
-; SDAG-NEXT: v_mov_b32_e32 v7, s25
-; SDAG-NEXT: v_mov_b32_e32 v8, s26
-; SDAG-NEXT: v_mov_b32_e32 v9, s27
+; SDAG-NEXT: v_mov_b32_e32 v36, s24
+; SDAG-NEXT: v_mov_b32_e32 v37, s25
+; SDAG-NEXT: v_mov_b32_e32 v38, s26
+; SDAG-NEXT: v_mov_b32_e32 v39, s27
; SDAG-NEXT: s_waitcnt lgkmcnt(0)
-; SDAG-NEXT: v_accvgpr_write_b32 a31, s23
-; SDAG-NEXT: v_accvgpr_write_b32 a30, s22
-; SDAG-NEXT: v_accvgpr_write_b32 a29, s21
-; SDAG-NEXT: v_accvgpr_write_b32 a28, s20
-; SDAG-NEXT: v_accvgpr_write_b32 a27, s19
-; SDAG-NEXT: v_accvgpr_write_b32 a26, s18
-; SDAG-NEXT: v_accvgpr_write_b32 a25, s17
-; SDAG-NEXT: v_accvgpr_write_b32 a24, s16
-; SDAG-NEXT: v_accvgpr_write_b32 a23, s15
-; SDAG-NEXT: v_accvgpr_write_b32 a22, s14
-; SDAG-NEXT: v_accvgpr_write_b32 a21, s13
-; SDAG-NEXT: v_accvgpr_write_b32 a20, s12
-; SDAG-NEXT: v_accvgpr_write_b32 a19, s11
-; SDAG-NEXT: v_accvgpr_write_b32 a18, s10
-; SDAG-NEXT: v_accvgpr_write_b32 a17, s9
-; SDAG-NEXT: v_accvgpr_write_b32 a16, s8
+; SDAG-NEXT: v_mov_b64_e32 v[30:31], s[22:23]
+; SDAG-NEXT: v_mov_b64_e32 v[28:29], s[20:21]
+; SDAG-NEXT: v_mov_b64_e32 v[26:27], s[18:19]
+; SDAG-NEXT: v_mov_b64_e32 v[24:25], s[16:17]
+; SDAG-NEXT: v_mov_b64_e32 v[22:23], s[14:15]
+; SDAG-NEXT: v_mov_b64_e32 v[20:21], s[12:13]
+; SDAG-NEXT: v_mov_b64_e32 v[18:19], s[10:11]
+; SDAG-NEXT: v_mov_b64_e32 v[16:17], s[8:9]
; SDAG-NEXT: s_nop 1
-; SDAG-NEXT: v_mfma_i32_32x32x32_i8 a[0:15], v[2:5], v[6:9], a[16:31]
-; SDAG-NEXT: v_mov_b32_e32 v2, s20
-; SDAG-NEXT: v_mov_b32_e32 v3, s21
-; SDAG-NEXT: v_mov_b32_e32 v4, s22
-; SDAG-NEXT: v_mov_b32_e32 v5, s23
-; SDAG-NEXT: global_store_dwordx4 v0, v[2:5], s[0:1] offset:48 sc0 sc1
+; SDAG-NEXT: v_mfma_i32_32x32x32_i8 v[0:15], v[32:35], v[36:39], v[16:31]
+; SDAG-NEXT: s_nop 6
+; SDAG-NEXT: v_mov_b32_e32 v16, s20
+; SDAG-NEXT: v_mov_b32_e32 v17, s21
+; SDAG-NEXT: v_mov_b32_e32 v18, s22
+; SDAG-NEXT: v_mov_b32_e32 v19, s23
+; SDAG-NEXT: global_store_dwordx4 v40, v[16:19], s[0:1] offset:48 sc0 sc1
; SDAG-NEXT: s_waitcnt vmcnt(0)
; SDAG-NEXT: s_nop 0
-; SDAG-NEXT: v_mov_b32_e32 v2, s16
-; SDAG-NEXT: v_mov_b32_e32 v3, s17
-; SDAG-NEXT: v_mov_b32_e32 v4, s18
-; SDAG-NEXT: v_mov_b32_e32 v5, s19
-; SDAG-NEXT: global_store_dwordx4 v0, v[2:5], s[0:1] offset:32 sc0 sc1
+; SDAG-NEXT: v_mov_b32_e32 v16, s16
+; SDAG-NEXT: v_mov_b32_e32 v17, s17
+; SDAG-NEXT: v_mov_b32_e32 v18, s18
+; SDAG-NEXT: v_mov_b32_e32 v19, s19
+; SDAG-NEXT: global_store_dwordx4 v40, v[16:19], s[0:1] offset:32 sc0 sc1
; SDAG-NEXT: s_waitcnt vmcnt(0)
; SDAG-NEXT: s_nop 0
-; SDAG-NEXT: v_mov_b32_e32 v2, s12
-; SDAG-NEXT: v_mov_b32_e32 v3, s13
-; SDAG-NEXT: v_mov_b32_e32 v4, s14
-; SDAG-NEXT: v_mov_b32_e32 v5, s15
-; SDAG-NEXT: global_store_dwordx4 v0, v[2:5], s[0:1] offset:16 sc0 sc1
+; SDAG-NEXT: v_mov_b32_e32 v16, s12
+; SDAG-NEXT: v_mov_b32_e32 v17, s13
+; SDAG-NEXT: v_mov_b32_e32 v18, s14
+; SDAG-NEXT: v_mov_b32_e32 v19, s15
+; SDAG-NEXT: global_store_dwordx4 v40, v[16:19], s[0:1] offset:16 sc0 sc1
; SDAG-NEXT: s_waitcnt vmcnt(0)
; SDAG-NEXT: s_nop 0
-; SDAG-NEXT: v_mov_b32_e32 v2, s8
-; SDAG-NEXT: v_mov_b32_e32 v3, s9
-; SDAG-NEXT: v_mov_b32_e32 v4, s10
-; SDAG-NEXT: v_mov_b32_e32 v5, s11
-; SDAG-NEXT: global_store_dwordx4 v0, v[2:5], s[0:1] sc0 sc1
+; SDAG-NEXT: v_mov_b32_e32 v16, s8
+; SDAG-NEXT: v_mov_b32_e32 v17, s9
+; SDAG-NEXT: v_mov_b32_e32 v18, s10
+; SDAG-NEXT: v_mov_b32_e32 v19, s11
+; SDAG-NEXT: global_store_dwordx4 v40, v[16:19], s[0:1] sc0 sc1
; SDAG-NEXT: s_waitcnt vmcnt(0)
-; SDAG-NEXT: global_store_dwordx4 v0, a[8:11], s[0:1] offset:32 sc0 sc1
+; SDAG-NEXT: global_store_dwordx4 v40, v[8:11], s[0:1] offset:32 sc0 sc1
; SDAG-NEXT: s_waitcnt vmcnt(0)
-; SDAG-NEXT: global_store_dwordx4 v0, a[12:15], s[0:1] offset:48 sc0 sc1
+; SDAG-NEXT: global_store_dwordx4 v40, v[12:15], s[0:1] offset:48 sc0 sc1
; SDAG-NEXT: s_waitcnt vmcnt(0)
-; SDAG-NEXT: global_store_dwordx4 v0, a[0:3], s[0:1] sc0 sc1
+; SDAG-NEXT: global_store_dwordx4 v40, v[0:3], s[0:1] sc0 sc1
; SDAG-NEXT: s_waitcnt vmcnt(0)
-; SDAG-NEXT: global_store_dwordx4 v0, a[4:7], s[0:1] offset:16 sc0 sc1
+; SDAG-NEXT: global_store_dwordx4 v40, v[4:7], s[0:1] offset:16 sc0 sc1
; SDAG-NEXT: s_waitcnt vmcnt(0)
; SDAG-NEXT: s_endpgm
;
@@ -4318,52 +4203,44 @@ define amdgpu_kernel void @test_mfma_i32_32x32x32_i8__vgprcd(<4 x i32> %arg0, <4
; GISEL-NEXT: s_load_dwordx8 s[24:31], s[4:5], 0x24
; GISEL-NEXT: s_load_dwordx16 s[8:23], s[4:5], 0x64
; GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0xa4
-; GISEL-NEXT: v_mov_b32_e32 v24, 0
+; GISEL-NEXT: v_mov_b32_e32 v56, 0
; GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GISEL-NEXT: v_mov_b64_e32 v[0:1], s[24:25]
-; GISEL-NEXT: v_mov_b64_e32 v[2:3], s[26:27]
-; GISEL-NEXT: v_mov_b64_e32 v[4:5], s[28:29]
-; GISEL-NEXT: v_accvgpr_write_b32 a0, s8
-; GISEL-NEXT: v_mov_b64_e32 v[6:7], s[30:31]
-; GISEL-NEXT: v_accvgpr_write_b32 a1, s9
-; GISEL-NEXT: v_accvgpr_write_b32 a2, s10
-; GISEL-NEXT: v_accvgpr_write_b32 a3, s11
-; GISEL-NEXT: v_accvgpr_write_b32 a4, s12
-; GISEL-NEXT: v_accvgpr_write_b32 a5, s13
-; GISEL-NEXT: v_accvgpr_write_b32 a6, s14
-; GISEL-NEXT: v_accvgpr_write_b32 a7, s15
-; GISEL-NEXT: v_accvgpr_write_b32 a8, s16
-; GISEL-NEXT: v_accvgpr_write_b32 a9, s17
-; GISEL-NEXT: v_accvgpr_write_b32 a10, s18
-; GISEL-NEXT: v_accvgpr_write_b32 a11, s19
-; GISEL-NEXT: v_accvgpr_write_b32 a12, s20
-; GISEL-NEXT: v_accvgpr_write_b32 a13, s21
-; GISEL-NEXT: v_accvgpr_write_b32 a14, s22
-; GISEL-NEXT: v_accvgpr_write_b32 a15, s23
-; GISEL-NEXT: v_mov_b64_e32 v[8:9], s[8:9]
-; GISEL-NEXT: v_mov_b64_e32 v[10:11], s[10:11]
-; GISEL-NEXT: v_mfma_i32_32x32x32_i8 a[16:31], v[0:3], v[4:7], a[0:15]
-; GISEL-NEXT: v_mov_b64_e32 v[12:13], s[12:13]
-; GISEL-NEXT: v_mov_b64_e32 v[16:17], s[16:17]
-; GISEL-NEXT: v_mov_b64_e32 v[20:21], s[20:21]
-; GISEL-NEXT: v_mov_b64_e32 v[14:15], s[14:15]
-; GISEL-NEXT: v_mov_b64_e32 v[18:19], s[18:19]
-; GISEL-NEXT: v_mov_b64_e32 v[22:23], s[22:23]
-; GISEL-NEXT: global_store_dwordx4 v24, v[8:11], s[0:1] sc0 sc1
+; GISEL-NEXT: v_mov_b64_e32 v[34:35], s[26:27]
+; GISEL-NEXT: v_mov_b64_e32 v[32:33], s[24:25]
+; GISEL-NEXT: v_mov_b64_e32 v[38:39], s[30:31]
+; GISEL-NEXT: v_mov_b64_e32 v[0:1], s[8:9]
+; GISEL-NEXT: v_mov_b64_e32 v[36:37], s[28:29]
+; GISEL-NEXT: v_mov_b64_e32 v[2:3], s[10:11]
+; GISEL-NEXT: v_mov_b64_e32 v[4:5], s[12:13]
+; GISEL-NEXT: v_mov_b64_e32 v[6:7], s[14:15]
+; GISEL-NEXT: v_mov_b64_e32 v[8:9], s[16:17]
+; GISEL-NEXT: v_mov_b64_e32 v[10:11], s[18:19]
+; GISEL-NEXT: v_mov_b64_e32 v[12:13], s[20:21]
+; GISEL-NEXT: v_mov_b64_e32 v[14:15], s[22:23]
+; GISEL-NEXT: v_mov_b64_e32 v[42:43], s[10:11]
+; GISEL-NEXT: v_mov_b64_e32 v[40:41], s[8:9]
+; GISEL-NEXT: v_mfma_i32_32x32x32_i8 v[16:31], v[32:35], v[36:39], v[0:15]
+; GISEL-NEXT: v_mov_b64_e32 v[46:47], s[14:15]
+; GISEL-NEXT: v_mov_b64_e32 v[50:51], s[18:19]
+; GISEL-NEXT: v_mov_b64_e32 v[54:55], s[22:23]
+; GISEL-NEXT: v_mov_b64_e32 v[44:45], s[12:13]
+; GISEL-NEXT: v_mov_b64_e32 v[48:49], s[16:17]
+; GISEL-NEXT: v_mov_b64_e32 v[52:53], s[20:21]
+; GISEL-NEXT: global_store_dwordx4 v56, v[40:43], s[0:1] sc0 sc1
; GISEL-NEXT: s_waitcnt vmcnt(0)
-; GISEL-NEXT: global_store_dwordx4 v24, v[12:15], s[0:1] offset:16 sc0 sc1
+; GISEL-NEXT: global_store_dwordx4 v56, v[44:47], s[0:1] offset:16 sc0 sc1
; GISEL-NEXT: s_waitcnt vmcnt(0)
-; GISEL-NEXT: global_store_dwordx4 v24, v[16:19], s[0:1] offset:32 sc0 sc1
+; GISEL-NEXT: global_store_dwordx4 v56, v[48:51], s[0:1] offset:32 sc0 sc1
; GISEL-NEXT: s_waitcnt vmcnt(0)
-; GISEL-NEXT: global_store_dwordx4 v24, v[20:23], s[0:1] offset:48 sc0 sc1
+; GISEL-NEXT: global_store_dwordx4 v56, v[52:55], s[0:1] offset:48 sc0 sc1
; GISEL-NEXT: s_waitcnt vmcnt(0)
-; GISEL-NEXT: global_store_dwordx4 v24, a[16:19], s[0:1] sc0 sc1
+; GISEL-NEXT: global_store_dwordx4 v56, v[16:19], s[0:1] sc0 sc1
; GISEL-NEXT: s_waitcnt vmcnt(0)
-; GISEL-NEXT: global_store_dwordx4 v24, a[20:23], s[0:1] offset:16 sc0 sc1
+; GISEL-NEXT: global_store_dwordx4 v56, v[20:23], s[0:1] offset:16 sc0 sc1
; GISEL-NEXT: s_waitcnt vmcnt(0)
-; GISEL-NEXT: global_store_dwordx4 v24, a[24:27], s[0:1] offset:32 sc0 sc1
+; GISEL-NEXT: global_store_dwordx4 v56, v[24:27], s[0:1] offset:32 sc0 sc1
; GISEL-NEXT: s_waitcnt vmcnt(0)
-; GISEL-NEXT: global_store_dwordx4 v24, a[28:31], s[0:1] offset:48 sc0 sc1
+; GISEL-NEXT: global_store_dwordx4 v56, v[28:31], s[0:1] offset:48 sc0 sc1
; GISEL-NEXT: s_waitcnt vmcnt(0)
; GISEL-NEXT: s_endpgm
;
@@ -4371,70 +4248,63 @@ define amdgpu_kernel void @test_mfma_i32_32x32x32_i8__vgprcd(<4 x i32> %arg0, <4
; HEURRC: ; %bb.0:
; HEURRC-NEXT: s_load_dwordx8 s[20:27], s[4:5], 0x24
; HEURRC-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0xa4
-; HEURRC-NEXT: v_mov_b32_e32 v0, 0
+; HEURRC-NEXT: v_mov_b32_e32 v40, 0
; HEURRC-NEXT: s_waitcnt lgkmcnt(0)
-; HEURRC-NEXT: v_mov_b32_e32 v2, s20
-; HEURRC-NEXT: v_mov_b32_e32 v3, s21
-; HEURRC-NEXT: v_mov_b32_e32 v4, s22
-; HEURRC-NEXT: v_mov_b32_e32 v5, s23
+; HEURRC-NEXT: v_mov_b32_e32 v32, s20
+; HEURRC-NEXT: v_mov_b32_e32 v33, s21
+; HEURRC-NEXT: v_mov_b32_e32 v34, s22
+; HEURRC-NEXT: v_mov_b32_e32 v35, s23
; HEURRC-NEXT: s_load_dwordx16 s[8:23], s[4:5], 0x64
-; HEURRC-NEXT: v_mov_b32_e32 v6, s24
-; HEURRC-NEXT: v_mov_b32_e32 v7, s25
-; HEURRC-NEXT: v_mov_b32_e32 v8, s26
-; HEURRC-NEXT: v_mov_b32_e32 v9, s27
+; HEURRC-NEXT: v_mov_b32_e32 v36, s24
+; HEURRC-NEXT: v_mov_b32_e32 v37, s25
+; HEURRC-NEXT: v_mov_b32_e32 v38, s26
+; HEURRC-NEXT: v_mov_b32_e32 v39, s27
; HEURRC-NEXT: s_waitcnt lgkmcnt(0)
-; HEURRC-NEXT: v_accvgpr_write_b32 a31, s23
-; HEURRC-NEXT: v_accvgpr_write_b32 a30, s22
-; HEURRC-NEXT: v_accvgpr_write_b32 a29, s21
-; HEURRC-NEXT: v_accvgpr_write_b32 a28, s20
-; HEURRC-NEXT: v_accvgpr_write_b32 a27, s19
-; HEURRC-NEXT: v_accvgpr_write_b32 a26, s18
-; HEURRC-NEXT: v_accvgpr_write_b32 a25, s17
-; HEURRC-NEXT: v_accvgpr_write_b32 a24, s16
-; HEURRC-NEXT: v_accvgpr_write_b32 a23, s15
-; HEURRC-NEXT: v_accvgpr_write_b32 a22, s14
-; HEURRC-NEXT: v_accvgpr_write_b32 a21, s13
-; HEURRC-NEXT: v_accvgpr_write_b32 a20, s12
-; HEURRC-NEXT: v_accvgpr_write_b32 a19, s11
-; HEURRC-NEXT: v_accvgpr_write_b32 a18, s10
-; HEURRC-NEXT: v_accvgpr_write_b32 a17, s9
-; HEURRC-NEXT: v_accvgpr_write_b32 a16, s8
+; HEURRC-NEXT: v_mov_b64_e32 v[30:31], s[22:23]
+; HEURRC-NEXT: v_mov_b64_e32 v[28:29], s[20:21]
+; HEURRC-NEXT: v_mov_b64_e32 v[26:27], s[18:19]
+; HEURRC-NEXT: v_mov_b64_e32 v[24:25], s[16:17]
+; HEURRC-NEXT: v_mov_b64_e32 v[22:23], s[14:15]
+; HEURRC-NEXT: v_mov_b64_e32 v[20:21], s[12:13]
+; HEURRC-NEXT: v_mov_b64_e32 v[18:19], s[10:11]
+; HEURRC-NEXT: v_mov_b64_e32 v[16:17], s[8:9]
; HEURRC-NEXT: s_nop 1
-; HEURRC-NEXT: v_mfma_i32_32x32x32_i8 a[0:15], v[2:5], v[6:9], a[16:31]
-; HEURRC-NEXT: v_mov_b32_e32 v2, s20
-; HEURRC-NEXT: v_mov_b32_e32 v3, s21
-; HEURRC-NEXT: v_mov_b32_e32 v4, s22
-; HEURRC-NEXT: v_mov_b32_e32 v5, s23
-; HEURRC-NEXT: global_store_dwordx4 v0, v[2:5], s[0:1] offset:48 sc0 sc1
+; HEURRC-NEXT: v_mfma_i32_32x32x32_i8 v[0:15], v[32:35], v[36:39], v[16:31]
+; HEURRC-NEXT: s_nop 6
+; HEURRC-NEXT: v_mov_b32_e32 v16, s20
+; HEURRC-NEXT: v_mov_b32_e32 v17, s21
+; HEURRC-NEXT: v_mov_b32_e32 v18, s22
+; HEURRC-NEXT: v_mov_b32_e32 v19, s23
+; HEURRC-NEXT: global_store_dwordx4 v40, v[16:19], s[0:1] offset:48 sc0 sc1
; HEURRC-NEXT: s_waitcnt vmcnt(0)
; HEURRC-NEXT: s_nop 0
-; HEURRC-NEXT: v_mov_b32_e32 v2, s16
-; HEURRC-NEXT: v_mov_b32_e32 v3, s17
-; HEURRC-NEXT: v_mov_b32_e32 v4, s18
-; HEURRC-NEXT: v_mov_b32_e32 v5, s19
-; HEURRC-NEXT: global_store_dwordx4 v0, v[2:5], s[0:1] offset:32 sc0 sc1
+; HEURRC-NEXT: v_mov_b32_e32 v16, s16
+; HEURRC-NEXT: v_mov_b32_e32 v17, s17
+; HEURRC-NEXT: v_mov_b32_e32 v18, s18
+; HEURRC-NEXT: v_mov_b32_e32 v19, s19
+; HEURRC-NEXT: global_store_dwordx4 v40, v[16:19], s[0:1] offset:32 sc0 sc1
; HEURRC-NEXT: s_waitcnt vmcnt(0)
; HEURRC-NEXT: s_nop 0
-; HEURRC-NEXT: v_mov_b32_e32 v2, s12
-; HEURRC-NEXT: v_mov_b32_e32 v3, s13
-; HEURRC-NEXT: v_mov_b32_e32 v4, s14
-; HEURRC-NEXT: v_mov_b32_e32 v5, s15
-; HEURRC-NEXT: global_store_dwordx4 v0, v[2:5], s[0:1] offset:16 sc0 sc1
+; HEURRC-NEXT: v_mov_b32_e32 v16, s12
+; HEURRC-NEXT: v_mov_b32_e32 v17, s13
+; HEURRC-NEXT: v_mov_b32_e32 v18, s14
+; HEURRC-NEXT: v_mov_b32_e32 v19, s15
+; HEURRC-NEXT: global_store_dwordx4 v40, v[16:19], s[0:1] offset:16 sc0 sc1
; HEURRC-NEXT: s_waitcnt vmcnt(0)
; HEURRC-NEXT: s_nop 0
-; HEURRC-NEXT: v_mov_b32_e32 v2, s8
-; HEURRC-NEXT: v_mov_b32_e32 v3, s9
-; HEURRC-NEXT: v_mov_b32_e32 v4, s10
-; HEURRC-NEXT: v_mov_b32_e32 v5, s11
-; HEURRC-NEXT: global_store_dwordx4 v0, v[2:5], s[0:1] sc0 sc1
+; HEURRC-NEXT: v_mov_b32_e32 v16, s8
+; HEURRC-NEXT: v_mov_b32_e32 v17, s9
+; HEURRC-NEXT: v_mov_b32_e32 v18, s10
+; HEURRC-NEXT: v_mov_b32_e32 v19, s11
+; HEURRC-NEXT: global_store_dwordx4 v40, v[16:19], s[0:1] sc0 sc1
; HEURRC-NEXT: s_waitcnt vmcnt(0)
-; HEURRC-NEXT: global_store_dwordx4 v0, a[8:11], s[0:1] offset:32 sc0 sc1
+; HEURRC-NEXT: global_store_dwordx4 v40, v[8:11], s[0:1] offset:32 sc0 sc1
; HEURRC-NEXT: s_waitcnt vmcnt(0)
-; HEURRC-NEXT: global_store_dwordx4 v0, a[12:15], s[0:1] offset:48 sc0 sc1
+; HEURRC-NEXT: global_store_dwordx4 v40, v[12:15], s[0:1] offset:48 sc0 sc1
; HEURRC-NEXT: s_waitcnt vmcnt(0)
-; HEURRC-NEXT: global_store_dwordx4 v0, a[0:3], s[0:1] sc0 sc1
+; HEURRC-NEXT: global_store_dwordx4 v40, v[0:3], s[0:1] sc0 sc1
; HEURRC-NEXT: s_waitcnt vmcnt(0)
-; HEURRC-NEXT: global_store_dwordx4 v0, a[4:7], s[0:1] offset:16 sc0 sc1
+; HEURRC-NEXT: global_store_dwordx4 v40, v[4:7], s[0:1] offset:16 sc0 sc1
; HEURRC-NEXT: s_waitcnt vmcnt(0)
; HEURRC-NEXT: s_endpgm
;
@@ -4442,17 +4312,17 @@ define amdgpu_kernel void @test_mfma_i32_32x32x32_i8__vgprcd(<4 x i32> %arg0, <4
; VGPRRC: ; %bb.0:
; VGPRRC-NEXT: s_load_dwordx8 s[20:27], s[4:5], 0x24
; VGPRRC-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0xa4
-; VGPRRC-NEXT: v_mov_b32_e32 v32, 0
+; VGPRRC-NEXT: v_mov_b32_e32 v40, 0
; VGPRRC-NEXT: s_waitcnt lgkmcnt(0)
-; VGPRRC-NEXT: v_mov_b32_e32 v34, s20
-; VGPRRC-NEXT: v_mov_b32_e32 v35, s21
-; VGPRRC-NEXT: v_mov_b32_e32 v36, s22
-; VGPRRC-NEXT: v_mov_b32_e32 v37, s23
+; VGPRRC-NEXT: v_mov_b32_e32 v32, s20
+; VGPRRC-NEXT: v_mov_b32_e32 v33, s21
+; VGPRRC-NEXT: v_mov_b32_e32 v34, s22
+; VGPRRC-NEXT: v_mov_b32_e32 v35, s23
; VGPRRC-NEXT: s_load_dwordx16 s[8:23], s[4:5], 0x64
-; VGPRRC-NEXT: v_mov_b32_e32 v38, s24
-; VGPRRC-NEXT: v_mov_b32_e32 v39, s25
-; VGPRRC-NEXT: v_mov_b32_e32 v40, s26
-; VGPRRC-NEXT: v_mov_b32_e32 v41, s27
+; VGPRRC-NEXT: v_mov_b32_e32 v36, s24
+; VGPRRC-NEXT: v_mov_b32_e32 v37, s25
+; VGPRRC-NEXT: v_mov_b32_e32 v38, s26
+; VGPRRC-NEXT: v_mov_b32_e32 v39, s27
; VGPRRC-NEXT: s_waitcnt lgkmcnt(0)
; VGPRRC-NEXT: v_mov_b64_e32 v[30:31], s[22:23]
; VGPRRC-NEXT: v_mov_b64_e32 v[28:29], s[20:21]
@@ -4463,42 +4333,42 @@ define amdgpu_kernel void @test_mfma_i32_32x32x32_i8__vgprcd(<4 x i32> %arg0, <4
; VGPRRC-NEXT: v_mov_b64_e32 v[18:19], s[10:11]
; VGPRRC-NEXT: v_mov_b64_e32 v[16:17], s[8:9]
; VGPRRC-NEXT: s_nop 1
-; VGPRRC-NEXT: v_mfma_i32_32x32x32_i8 v[0:15], v[34:37], v[38:41], v[16:31]
+; VGPRRC-NEXT: v_mfma_i32_32x32x32_i8 v[0:15], v[32:35], v[36:39], v[16:31]
; VGPRRC-NEXT: s_nop 6
; VGPRRC-NEXT: v_mov_b32_e32 v16, s20
; VGPRRC-NEXT: v_mov_b32_e32 v17, s21
; VGPRRC-NEXT: v_mov_b32_e32 v18, s22
; VGPRRC-NEXT: v_mov_b32_e32 v19, s23
-; VGPRRC-NEXT: global_store_dwordx4 v32, v[16:19], s[0:1] offset:48 sc0 sc1
+; VGPRRC-NEXT: global_store_dwordx4 v40, v[16:19], s[0:1] offset:48 sc0 sc1
; VGPRRC-NEXT: s_waitcnt vmcnt(0)
; VGPRRC-NEXT: s_nop 0
; VGPRRC-NEXT: v_mov_b32_e32 v16, s16
; VGPRRC-NEXT: v_mov_b32_e32 v17, s17
; VGPRRC-NEXT: v_mov_b32_e32 v18, s18
; VGPRRC-NEXT: v_mov_b32_e32 v19, s19
-; VGPRRC-NEXT: global_store_dwordx4 v32, v[16:19], s[0:1] offset:32 sc0 sc1
+; VGPRRC-NEXT: global_store_dwordx4 v40, v[16:19], s[0:1] offset:32 sc0 sc1
; VGPRRC-NEXT: s_waitcnt vmcnt(0)
; VGPRRC-NEXT: s_nop 0
; VGPRRC-NEXT: v_mov_b32_e32 v16, s12
; VGPRRC-NEXT: v_mov_b32_e32 v17, s13
; VGPRRC-NEXT: v_mov_b32_e32 v18, s14
; VGPRRC-NEXT: v_mov_b32_e32 v19, s15
-; VGPRRC-NEXT: global_store_dwordx4 v32, v[16:19], s[0:1] offset:16 sc0 sc1
+; VGPRRC-NEXT: global_store_dwordx4 v40, v[16:19], s[0:1] offset:16 sc0 sc1
; VGPRRC-NEXT: s_waitcnt vmcnt(0)
; VGPRRC-NEXT: s_nop 0
; VGPRRC-NEXT: v_mov_b32_e32 v16, s8
; VGPRRC-NEXT: v_mov_b32_e32 v17, s9
; VGPRRC-NEXT: v_mov_b32_e32 v18, s10
; VGPRRC-NEXT: v_mov_b32_e32 v19, s11
-; VGPRRC-NEXT: global_store_dwordx4 v32, v[16:19], s[0:1] sc0 sc1
+; VGPRRC-NEXT: global_store_dwordx4 v40, v[16:19], s[0:1] sc0 sc1
; VGPRRC-NEXT: s_waitcnt vmcnt(0)
-; VGPRRC-NEXT: global_store_dwordx4 v32, v[8:11], s[0:1] offset:32 sc0 sc1
+; VGPRRC-NEXT: global_store_dwordx4 v40, v[8:11], s[0:1] offset:32 sc0 sc1
; VGPRRC-NEXT: s_waitcnt vmcnt(0)
-; VGPRRC-NEXT: global_store_dwordx4 v32, v[12:15], s[0:1] offset:48 sc0 sc1
+; VGPRRC-NEXT: global_store_dwordx4 v40, v[12:15], s[0:1] offset:48 sc0 sc1
; VGPRRC-NEXT: s_waitcnt vmcnt(0)
-; VGPRRC-NEXT: global_store_dwordx4 v32, v[0:3], s[0:1] sc0 sc1
+; VGPRRC-NEXT: global_store_dwordx4 v40, v[0:3], s[0:1] sc0 sc1
; VGPRRC-NEXT: s_waitcnt vmcnt(0)
-; VGPRRC-NEXT: global_store_dwordx4 v32, v[4:7], s[0:1] offset:16 sc0 sc1
+; VGPRRC-NEXT: global_store_dwordx4 v40, v[4:7], s[0:1] offset:16 sc0 sc1
; VGPRRC-NEXT: s_waitcnt vmcnt(0)
; VGPRRC-NEXT: s_endpgm
; AGPR-LABEL: test_mfma_i32_32x32x32_i8__vgprcd:
@@ -4645,70 +4515,63 @@ define amdgpu_kernel void @test_mfma_i32_32x32x32_i8__vgprcd__flags(<4 x i32> %a
; SDAG: ; %bb.0:
; SDAG-NEXT: s_load_dwordx8 s[20:27], s[4:5], 0x24
; SDAG-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0xa4
-; SDAG-NEXT: v_mov_b32_e32 v0, 0
+; SDAG-NEXT: v_mov_b32_e32 v40, 0
; SDAG-NEXT: s_waitcnt lgkmcnt(0)
-; SDAG-NEXT: v_mov_b32_e32 v2, s20
-; SDAG-NEXT: v_mov_b32_e32 v3, s21
-; SDAG-NEXT: v_mov_b32_e32 v4, s22
-; SDAG-NEXT: v_mov_b32_e32 v5, s23
+; SDAG-NEXT: v_mov_b32_e32 v32, s20
+; SDAG-NEXT: v_mov_b32_e32 v33, s21
+; SDAG-NEXT: v_mov_b32_e32 v34, s22
+; SDAG-NEXT: v_mov_b32_e32 v35, s23
; SDAG-NEXT: s_load_dwordx16 s[8:23], s[4:5], 0x64
-; SDAG-NEXT: v_mov_b32_e32 v6, s24
-; SDAG-NEXT: v_mov_b32_e32 v7, s25
-; SDAG-NEXT: v_mov_b32_e32 v8, s26
-; SDAG-NEXT: v_mov_b32_e32 v9, s27
+; SDAG-NEXT: v_mov_b32_e32 v36, s24
+; SDAG-NEXT: v_mov_b32_e32 v37, s25
+; SDAG-NEXT: v_mov_b32_e32 v38, s26
+; SDAG-NEXT: v_mov_b32_e32 v39, s27
; SDAG-NEXT: s_waitcnt lgkmcnt(0)
-; SDAG-NEXT: v_accvgpr_write_b32 a31, s23
-; SDAG-NEXT: v_accvgpr_write_b32 a30, s22
-; SDAG-NEXT: v_accvgpr_write_b32 a29, s21
-; SDAG-NEXT: v_accvgpr_write_b32 a28, s20
-; SDAG-NEXT: v_accvgpr_write_b32 a27, s19
-; SDAG-NEXT: v_accvgpr_write_b32 a26, s18
-; SDAG-NEXT: v_accvgpr_write_b32 a25, s17
-; SDAG-NEXT: v_accvgpr_write_b32 a24, s16
-; SDAG-NEXT: v_accvgpr_write_b32 a23, s15
-; SDAG-NEXT: v_accvgpr_write_b32 a22, s14
-; SDAG-NEXT: v_accvgpr_write_b32 a21, s13
-; SDAG-NEXT: v_accvgpr_write_b32 a20, s12
-; SDAG-NEXT: v_accvgpr_write_b32 a19, s11
-; SDAG-NEXT: v_accvgpr_write_b32 a18, s10
-; SDAG-NEXT: v_accvgpr_write_b32 a17, s9
-; SDAG-NEXT: v_accvgpr_write_b32 a16, s8
+; SDAG-NEXT: v_mov_b64_e32 v[30:31], s[22:23]
+; SDAG-NEXT: v_mov_b64_e32 v[28:29], s[20:21]
+; SDAG-NEXT: v_mov_b64_e32 v[26:27], s[18:19]
+; SDAG-NEXT: v_mov_b64_e32 v[24:25], s[16:17]
+; SDAG-NEXT: v_mov_b64_e32 v[22:23], s[14:15]
+; SDAG-NEXT: v_mov_b64_e32 v[20:21], s[12:13]
+; SDAG-NEXT: v_mov_b64_e32 v[18:19], s[10:11]
+; SDAG-NEXT: v_mov_b64_e32 v[16:17], s[8:9]
; SDAG-NEXT: s_nop 1
-; SDAG-NEXT: v_mfma_i32_32x32x32_i8 a[0:15], v[2:5], v[6:9], a[16:31] cbsz:1 abid:2 blgp:3
-; SDAG-NEXT: v_mov_b32_e32 v2, s20
-; SDAG-NEXT: v_mov_b32_e32 v3, s21
-; SDAG-NEXT: v_mov_b32_e32 v4, s22
-; SDAG-NEXT: v_mov_b32_e32 v5, s23
-; SDAG-NEXT: global_store_dwordx4 v0, v[2:5], s[0:1] offset:48 sc0 sc1
+; SDAG-NEXT: v_mfma_i32_32x32x32_i8 v[0:15], v[32:35], v[36:39], v[16:31] cbsz:1 abid:2 blgp:3
+; SDAG-NEXT: s_nop 6
+; SDAG-NEXT: v_mov_b32_e32 v16, s20
+; SDAG-NEXT: v_mov_b32_e32 v17, s21
+; SDAG-NEXT: v_mov_b32_e32 v18, s22
+; SDAG-NEXT: v_mov_b32_e32 v19, s23
+; SDAG-NEXT: global_store_dwordx4 v40, v[16:19], s[0:1] offset:48 sc0 sc1
; SDAG-NEXT: s_waitcnt vmcnt(0)
; SDAG-NEXT: s_nop 0
-; SDAG-NEXT: v_mov_b32_e32 v2, s16
-; SDAG-NEXT: v_mov_b32_e32 v3, s17
-; SDAG-NEXT: v_mov_b32_e32 v4, s18
-; SDAG-NEXT: v_mov_b32_e32 v5, s19
-; SDAG-NEXT: global_store_dwordx4 v0, v[2:5], s[0:1] offset:32 sc0 sc1
+; SDAG-NEXT: v_mov_b32_e32 v16, s16
+; SDAG-NEXT: v_mov_b32_e32 v17, s17
+; SDAG-NEXT: v_mov_b32_e32 v18, s18
+; SDAG-NEXT: v_mov_b32_e32 v19, s19
+; SDAG-NEXT: global_store_dwordx4 v40, v[16:19], s[0:1] offset:32 sc0 sc1
; SDAG-NEXT: s_waitcnt vmcnt(0)
; SDAG-NEXT: s_nop 0
-; SDAG-NEXT: v_mov_b32_e32 v2, s12
-; SDAG-NEXT: v_mov_b32_e32 v3, s13
-; SDAG-NEXT: v_mov_b32_e32 v4, s14
-; SDAG-NEXT: v_mov_b32_e32 v5, s15
-; SDAG-NEXT: global_store_dwordx4 v0, v[2:5], s[0:1] offset:16 sc0 sc1
+; SDAG-NEXT: v_mov_b32_e32 v16, s12
+; SDAG-NEXT: v_mov_b32_e32 v17, s13
+; SDAG-NEXT: v_mov_b32_e32 v18, s14
+; SDAG-NEXT: v_mov_b32_e32 v19, s15
+; SDAG-NEXT: global_store_dwordx4 v40, v[16:19], s[0:1] offset:16 sc0 sc1
; SDAG-NEXT: s_waitcnt vmcnt(0)
; SDAG-NEXT: s_nop 0
-; SDAG-NEXT: v_mov_b32_e32 v2, s8
-; SDAG-NEXT: v_mov_b32_e32 v3, s9
-; SDAG-NEXT: v_mov_b32_e32 v4, s10
-; SDAG-NEXT: v_mov_b32_e32 v5, s11
-; SDAG-NEXT: global_store_dwordx4 v0, v[2:5], s[0:1] sc0 sc1
+; SDAG-NEXT: v_mov_b32_e32 v16, s8
+; SDAG-NEXT: v_mov_b32_e32 v17, s9
+; SDAG-NEXT: v_mov_b32_e32 v18, s10
+; SDAG-NEXT: v_mov_b32_e32 v19, s11
+; SDAG-NEXT: global_store_dwordx4 v40, v[16:19], s[0:1] sc0 sc1
; SDAG-NEXT: s_waitcnt vmcnt(0)
-; SDAG-NEXT: global_store_dwordx4 v0, a[8:11], s[0:1] offset:32 sc0 sc1
+; SDAG-NEXT: global_store_dwordx4 v40, v[8:11], s[0:1] offset:32 sc0 sc1
; SDAG-NEXT: s_waitcnt vmcnt(0)
-; SDAG-NEXT: global_store_dwordx4 v0, a[12:15], s[0:1] offset:48 sc0 sc1
+; SDAG-NEXT: global_store_dwordx4 v40, v[12:15], s[0:1] offset:48 sc0 sc1
; SDAG-NEXT: s_waitcnt vmcnt(0)
-; SDAG-NEXT: global_store_dwordx4 v0, a[0:3], s[0:1] sc0 sc1
+; SDAG-NEXT: global_store_dwordx4 v40, v[0:3], s[0:1] sc0 sc1
; SDAG-NEXT: s_waitcnt vmcnt(0)
-; SDAG-NEXT: global_store_dwordx4 v0, a[4:7], s[0:1] offset:16 sc0 sc1
+; SDAG-NEXT: global_store_dwordx4 v40, v[4:7], s[0:1] offset:16 sc0 sc1
; SDAG-NEXT: s_waitcnt vmcnt(0)
; SDAG-NEXT: s_endpgm
;
@@ -4717,52 +4580,44 @@ define amdgpu_kernel void @test_mfma_i32_32x32x32_i8__vgprcd__flags(<4 x i32> %a
; GISEL-NEXT: s_load_dwordx8 s[24:31], s[4:5], 0x24
; GISEL-NEXT: s_load_dwordx16 s[8:23], s[4:5], 0x64
; GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0xa4
-; GISEL-NEXT: v_mov_b32_e32 v24, 0
+; GISEL-NEXT: v_mov_b32_e32 v56, 0
; GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GISEL-NEXT: v_mov_b64_e32 v[0:1], s[24:25]
-; GISEL-NEXT: v_mov_b64_e32 v[2:3], s[26:27]
-; GISEL-NEXT: v_mov_b64_e32 v[4:5], s[28:29]
-; GISEL-NEXT: v_accvgpr_write_b32 a0, s8
-; GISEL-NEXT: v_mov_b64_e32 v[6:7], s[30:31]
-; GISEL-NEXT: v_accvgpr_write_b32 a1, s9
-; GISEL-NEXT: v_accvgpr_write_b32 a2, s10
-; GISEL-NEXT: v_accvgpr_write_b32 a3, s11
-; GISEL-NEXT: v_accvgpr_write_b32 a4, s12
-; GISEL-NEXT: v_accvgpr_write_b32 a5, s13
-; GISEL-NEXT: v_accvgpr_write_b32 a6, s14
-; GISEL-NEXT: v_accvgpr_write_b32 a7, s15
-; GISEL-NEXT: v_accvgpr_write_b32 a8, s16
-; GISEL-NEXT: v_accvgpr_write_b32 a9, s17
-; GISEL-NEXT: v_accvgpr_write_b32 a10, s18
-; GISEL-NEXT: v_accvgpr_write_b32 a11, s19
-; GISEL-NEXT: v_accvgpr_write_b32 a12, s20
-; GISEL-NEXT: v_accvgpr_write_b32 a13, s21
-; GISEL-NEXT: v_accvgpr_write_b32 a14, s22
-; GISEL-NEXT: v_accvgpr_write_b32 a15, s23
-; GISEL-NEXT: v_mov_b64_e32 v[8:9], s[8:9]
-; GISEL-NEXT: v_mov_b64_e32 v[10:11], s[10:11]
-; GISEL-NEXT: v_mfma_i32_32x32x32_i8 a[16:31], v[0:3], v[4:7], a[0:15] cbsz:1 abid:2 blgp:3
-; GISEL-NEXT: v_mov_b64_e32 v[12:13], s[12:13]
-; GISEL-NEXT: v_mov_b64_e32 v[16:17], s[16:17]
-; GISEL-NEXT: v_mov_b64_e32 v[20:21], s[20:21]
-; GISEL-NEXT: v_mov_b64_e32 v[14:15], s[14:15]
-; GISEL-NEXT: v_mov_b64_e32 v[18:19], s[18:19]
-; GISEL-NEXT: v_mov_b64_e32 v[22:23], s[22:23]
-; GISEL-NEXT: global_store_dwordx4 v24, v[8:11], s[0:1] sc0 sc1
+; GISEL-NEXT: v_mov_b64_e32 v[34:35], s[26:27]
+; GISEL-NEXT: v_mov_b64_e32 v[32:33], s[24:25]
+; GISEL-NEXT: v_mov_b64_e32 v[38:39], s[30:31]
+; GISEL-NEXT: v_mov_b64_e32 v[0:1], s[8:9]
+; GISEL-NEXT: v_mov_b64_e32 v[36:37], s[28:29]
+; GISEL-NEXT: v_mov_b64_e32 v[2:3], s[10:11]
+; GISEL-NEXT: v_mov_b64_e32 v[4:5], s[12:13]
+; GISEL-NEXT: v_mov_b64_e32 v[6:7], s[14:15]
+; GISEL-NEXT: v_mov_b64_e32 v[8:9], s[16:17]
+; GISEL-NEXT: v_mov_b64_e32 v[10:11], s[18:19]
+; GISEL-NEXT: v_mov_b64_e32 v[12:13], s[20:21]
+; GISEL-NEXT: v_mov_b64_e32 v[14:15], s[22:23]
+; GISEL-NEXT: v_mov_b64_e32 v[42:43], s[10:11]
+; GISEL-NEXT: v_mov_b64_e32 v[40:41], s[8:9]
+; GISEL-NEXT: v_mfma_i32_32x32x32_i8 v[16:31], v[32:35], v[36:39], v[0:15] cbsz:1 abid:2 blgp:3
+; GISEL-NEXT: v_mov_b64_e32 v[46:47], s[14:15]
+; GISEL-NEXT: v_mov_b64_e32 v[50:51], s[18:19]
+; GISEL-NEXT: v_mov_b64_e32 v[54:55], s[22:23]
+; GISEL-NEXT: v_mov_b64_e32 v[44:45], s[12:13]
+; GISEL-NEXT: v_mov_b64_e32 v[48:49], s[16:17]
+; GISEL-NEXT: v_mov_b64_e32 v[52:53], s[20:21]
+; GISEL-NEXT: global_store_dwordx4 v56, v[40:43], s[0:1] sc0 sc1
; GISEL-NEXT: s_waitcnt vmcnt(0)
-; GISEL-NEXT: global_store_dwordx4 v24, v[12:15], s[0:1] offset:16 sc0 sc1
+; GISEL-NEXT: global_store_dwordx4 v56, v[44:47], s[0:1] offset:16 sc0 sc1
; GISEL-NEXT: s_waitcnt vmcnt(0)
-; GISEL-NEXT: global_store_dwordx4 v24, v[16:19], s[0:1] offset:32 sc0 sc1
+; GISEL-NEXT: global_store_dwordx4 v56, v[48:51], s[0:1] offset:32 sc0 sc1
; GISEL-NEXT: s_waitcnt vmcnt(0)
-; GISEL-NEXT: global_store_dwordx4 v24, v[20:23], s[0:1] offset:48 sc0 sc1
+; GISEL-NEXT: global_store_dwordx4 v56, v[52:55], s[0:1] offset:48 sc0 sc1
; GISEL-NEXT: s_waitcnt vmcnt(0)
-; GISEL-NEXT: global_store_dwordx4 v24, a[16:19], s[0:1] sc0 sc1
+; GISEL-NEXT: global_store_dwordx4 v56, v[16:19], s[0:1] sc0 sc1
; GISEL-NEXT: s_waitcnt vmcnt(0)
-; GISEL-NEXT: global_store_dwordx4 v24, a[20:23], s[0:1] offset:16 sc0 sc1
+; GISEL-NEXT: global_store_dwordx4 v56, v[20:23], s[0:1] offset:16 sc0 sc1
; GISEL-NEXT: s_waitcnt vmcnt(0)
-; GISEL-NEXT: global_store_dwordx4 v24, a[24:27], s[0:1] offset:32 sc0 sc1
+; GISEL-NEXT: global_store_dwordx4 v56, v[24:27], s[0:1] offset:32 sc0 sc1
; GISEL-NEXT: s_waitcnt vmcnt(0)
-; GISEL-NEXT: global_store_dwordx4 v24, a[28:31], s[0:1] offset:48 sc0 sc1
+; GISEL-NEXT: global_store_dwordx4 v56, v[28:31], s[0:1] offset:48 sc0 sc1
; GISEL-NEXT: s_waitcnt vmcnt(0)
; GISEL-NEXT: s_endpgm
;
@@ -4770,70 +4625,63 @@ define amdgpu_kernel void @test_mfma_i32_32x32x32_i8__vgprcd__flags(<4 x i32> %a
; HEURRC: ; %bb.0:
; HEURRC-NEXT: s_load_dwordx8 s[20:27], s[4:5], 0x24
; HEURRC-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0xa4
-; HEURRC-NEXT: v_mov_b32_e32 v0, 0
+; HEURRC-NEXT: v_mov_b32_e32 v40, 0
; HEURRC-NEXT: s_waitcnt lgkmcnt(0)
-; HEURRC-NEXT: v_mov_b32_e32 v2, s20
-; HEURRC-NEXT: v_mov_b32_e32 v3, s21
-; HEURRC-NEXT: v_mov_b32_e32 v4, s22
-; HEURRC-NEXT: v_mov_b32_e32 v5, s23
+; HEURRC-NEXT: v_mov_b32_e32 v32, s20
+; HEURRC-NEXT: v_mov_b32_e32 v33, s21
+; HEURRC-NEXT: v_mov_b32_e32 v34, s22
+; HEURRC-NEXT: v_mov_b32_e32 v35, s23
; HEURRC-NEXT: s_load_dwordx16 s[8:23], s[4:5], 0x64
-; HEURRC-NEXT: v_mov_b32_e32 v6, s24
-; HEURRC-NEXT: v_mov_b32_e32 v7, s25
-; HEURRC-NEXT: v_mov_b32_e32 v8, s26
-; HEURRC-NEXT: v_mov_b32_e32 v9, s27
+; HEURRC-NEXT: v_mov_b32_e32 v36, s24
+; HEURRC-NEXT: v_mov_b32_e32 v37, s25
+; HEURRC-NEXT: v_mov_b32_e32 v38, s26
+; HEURRC-NEXT: v_mov_b32_e32 v39, s27
; HEURRC-NEXT: s_waitcnt lgkmcnt(0)
-; HEURRC-NEXT: v_accvgpr_write_b32 a31, s23
-; HEURRC-NEXT: v_accvgpr_write_b32 a30, s22
-; HEURRC-NEXT: v_accvgpr_write_b32 a29, s21
-; HEURRC-NEXT: v_accvgpr_write_b32 a28, s20
-; HEURRC-NEXT: v_accvgpr_write_b32 a27, s19
-; HEURRC-NEXT: v_accvgpr_write_b32 a26, s18
-; HEURRC-NEXT: v_accvgpr_write_b32 a25, s17
-; HEURRC-NEXT: v_accvgpr_write_b32 a24, s16
-; HEURRC-NEXT: v_accvgpr_write_b32 a23, s15
-; HEURRC-NEXT: v_accvgpr_write_b32 a22, s14
-; HEURRC-NEXT: v_accvgpr_write_b32 a21, s13
-; HEURRC-NEXT: v_accvgpr_write_b32 a20, s12
-; HEURRC-NEXT: v_accvgpr_write_b32 a19, s11
-; HEURRC-NEXT: v_accvgpr_write_b32 a18, s10
-; HEURRC-NEXT: v_accvgpr_write_b32 a17, s9
-; HEURRC-NEXT: v_accvgpr_write_b32 a16, s8
+; HEURRC-NEXT: v_mov_b64_e32 v[30:31], s[22:23]
+; HEURRC-NEXT: v_mov_b64_e32 v[28:29], s[20:21]
+; HEURRC-NEXT: v_mov_b64_e32 v[26:27], s[18:19]
+; HEURRC-NEXT: v_mov_b64_e32 v[24:25], s[16:17]
+; HEURRC-NEXT: v_mov_b64_e32 v[22:23], s[14:15]
+; HEURRC-NEXT: v_mov_b64_e32 v[20:21], s[12:13]
+; HEURRC-NEXT: v_mov_b64_e32 v[18:19], s[10:11]
+; HEURRC-NEXT: v_mov_b64_e32 v[16:17], s[8:9]
; HEURRC-NEXT: s_nop 1
-; HEURRC-NEXT: v_mfma_i32_32x32x32_i8 a[0:15], v[2:5], v[6:9], a[16:31] cbsz:1 abid:2 blgp:3
-; HEURRC-NEXT: v_mov_b32_e32 v2, s20
-; HEURRC-NEXT: v_mov_b32_e32 v3, s21
-; HEURRC-NEXT: v_mov_b32_e32 v4, s22
-; HEURRC-NEXT: v_mov_b32_e32 v5, s23
-; HEURRC-NEXT: global_store_dwordx4 v0, v[2:5], s[0:1] offset:48 sc0 sc1
+; HEURRC-NEXT: v_mfma_i32_32x32x32_i8 v[0:15], v[32:35], v[36:39], v[16:31] cbsz:1 abid:2 blgp:3
+; HEURRC-NEXT: s_nop 6
+; HEURRC-NEXT: v_mov_b32_e32 v16, s20
+; HEURRC-NEXT: v_mov_b32_e32 v17, s21
+; HEURRC-NEXT: v_mov_b32_e32 v18, s22
+; HEURRC-NEXT: v_mov_b32_e32 v19, s23
+; HEURRC-NEXT: global_store_dwordx4 v40, v[16:19], s[0:1] offset:48 sc0 sc1
; HEURRC-NEXT: s_waitcnt vmcnt(0)
; HEURRC-NEXT: s_nop 0
-; HEURRC-NEXT: v_mov_b32_e32 v2, s16
-; HEURRC-NEXT: v_mov_b32_e32 v3, s17
-; HEURRC-NEXT: v_mov_b32_e32 v4, s18
-; HEURRC-NEXT: v_mov_b32_e32 v5, s19
-; HEURRC-NEXT: global_store_dwordx4 v0, v[2:5], s[0:1] offset:32 sc0 sc1
+; HEURRC-NEXT: v_mov_b32_e32 v16, s16
+; HEURRC-NEXT: v_mov_b32_e32 v17, s17
+; HEURRC-NEXT: v_mov_b32_e32 v18, s18
+; HEURRC-NEXT: v_mov_b32_e32 v19, s19
+; HEURRC-NEXT: global_store_dwordx4 v40, v[16:19], s[0:1] offset:32 sc0 sc1
; HEURRC-NEXT: s_waitcnt vmcnt(0)
; HEURRC-NEXT: s_nop 0
-; HEURRC-NEXT: v_mov_b32_e32 v2, s12
-; HEURRC-NEXT: v_mov_b32_e32 v3, s13
-; HEURRC-NEXT: v_mov_b32_e32 v4, s14
-; HEURRC-NEXT: v_mov_b32_e32 v5, s15
-; HEURRC-NEXT: global_store_dwordx4 v0, v[2:5], s[0:1] offset:16 sc0 sc1
+; HEURRC-NEXT: v_mov_b32_e32 v16, s12
+; HEURRC-NEXT: v_mov_b32_e32 v17, s13
+; HEURRC-NEXT: v_mov_b32_e32 v18, s14
+; HEURRC-NEXT: v_mov_b32_e32 v19, s15
+; HEURRC-NEXT: global_store_dwordx4 v40, v[16:19], s[0:1] offset:16 sc0 sc1
; HEURRC-NEXT: s_waitcnt vmcnt(0)
; HEURRC-NEXT: s_nop 0
-; HEURRC-NEXT: v_mov_b32_e32 v2, s8
-; HEURRC-NEXT: v_mov_b32_e32 v3, s9
-; HEURRC-NEXT: v_mov_b32_e32 v4, s10
-; HEURRC-NEXT: v_mov_b32_e32 v5, s11
-; HEURRC-NEXT: global_store_dwordx4 v0, v[2:5], s[0:1] sc0 sc1
+; HEURRC-NEXT: v_mov_b32_e32 v16, s8
+; HEURRC-NEXT: v_mov_b32_e32 v17, s9
+; HEURRC-NEXT: v_mov_b32_e32 v18, s10
+; HEURRC-NEXT: v_mov_b32_e32 v19, s11
+; HEURRC-NEXT: global_store_dwordx4 v40, v[16:19], s[0:1] sc0 sc1
; HEURRC-NEXT: s_waitcnt vmcnt(0)
-; HEURRC-NEXT: global_store_dwordx4 v0, a[8:11], s[0:1] offset:32 sc0 sc1
+; HEURRC-NEXT: global_store_dwordx4 v40, v[8:11], s[0:1] offset:32 sc0 sc1
; HEURRC-NEXT: s_waitcnt vmcnt(0)
-; HEURRC-NEXT: global_store_dwordx4 v0, a[12:15], s[0:1] offset:48 sc0 sc1
+; HEURRC-NEXT: global_store_dwordx4 v40, v[12:15], s[0:1] offset:48 sc0 sc1
; HEURRC-NEXT: s_waitcnt vmcnt(0)
-; HEURRC-NEXT: global_store_dwordx4 v0, a[0:3], s[0:1] sc0 sc1
+; HEURRC-NEXT: global_store_dwordx4 v40, v[0:3], s[0:1] sc0 sc1
; HEURRC-NEXT: s_waitcnt vmcnt(0)
-; HEURRC-NEXT: global_store_dwordx4 v0, a[4:7], s[0:1] offset:16 sc0 sc1
+; HEURRC-NEXT: global_store_dwordx4 v40, v[4:7], s[0:1] offset:16 sc0 sc1
; HEURRC-NEXT: s_waitcnt vmcnt(0)
; HEURRC-NEXT: s_endpgm
;
@@ -4841,17 +4689,17 @@ define amdgpu_kernel void @test_mfma_i32_32x32x32_i8__vgprcd__flags(<4 x i32> %a
; VGPRRC: ; %bb.0:
; VGPRRC-NEXT: s_load_dwordx8 s[20:27], s[4:5], 0x24
; VGPRRC-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0xa4
-; VGPRRC-NEXT: v_mov_b32_e32 v32, 0
+; VGPRRC-NEXT: v_mov_b32_e32 v40, 0
; VGPRRC-NEXT: s_waitcnt lgkmcnt(0)
-; VGPRRC-NEXT: v_mov_b32_e32 v34, s20
-; VGPRRC-NEXT: v_mov_b32_e32 v35, s21
-; VGPRRC-NEXT: v_mov_b32_e32 v36, s22
-; VGPRRC-NEXT: v_mov_b32_e32 v37, s23
+; VGPRRC-NEXT: v_mov_b32_e32 v32, s20
+; VGPRRC-NEXT: v_mov_b32_e32 v33, s21
+; VGPRRC-NEXT: v_mov_b32_e32 v34, s22
+; VGPRRC-NEXT: v_mov_b32_e32 v35, s23
; VGPRRC-NEXT: s_load_dwordx16 s[8:23], s[4:5], 0x64
-; VGPRRC-NEXT: v_mov_b32_e32 v38, s24
-; VGPRRC-NEXT: v_mov_b32_e32 v39, s25
-; VGPRRC-NEXT: v_mov_b32_e32 v40, s26
-; VGPRRC-NEXT: v_mov_b32_e32 v41, s27
+; VGPRRC-NEXT: v_mov_b32_e32 v36, s24
+; VGPRRC-NEXT: v_mov_b32_e32 v37, s25
+; VGPRRC-NEXT: v_mov_b32_e32 v38, s26
+; VGPRRC-NEXT: v_mov_b32_e32 v39, s27
; VGPRRC-NEXT: s_waitcnt lgkmcnt(0)
; VGPRRC-NEXT: v_mov_b64_e32 v[30:31], s[22:23]
; VGPRRC-NEXT: v_mov_b64_e32 v[28:29], s[20:21]
@@ -4862,42 +4710,42 @@ define amdgpu_kernel void @test_mfma_i32_32x32x32_i8__vgprcd__flags(<4 x i32> %a
; VGPRRC-NEXT: v_mov_b64_e32 v[18:19], s[10:11]
; VGPRRC-NEXT: v_mov_b64_e32 v[16:17], s[8:9]
; VGPRRC-NEXT: s_nop 1
-; VGPRRC-NEXT: v_mfma_i32_32x32x32_i8 v[0:15], v[34:37], v[38:41], v[16:31] cbsz:1 abid:2 blgp:3
+; VGPRRC-NEXT: v_mfma_i32_32x32x32_i8 v[0:15], v[32:35], v[36:39], v[16:31] cbsz:1 abid:2 blgp:3
; VGPRRC-NEXT: s_nop 6
; VGPRRC-NEXT: v_mov_b32_e32 v16, s20
; VGPRRC-NEXT: v_mov_b32_e32 v17, s21
; VGPRRC-NEXT: v_mov_b32_e32 v18, s22
; VGPRRC-NEXT: v_mov_b32_e32 v19, s23
-; VGPRRC-NEXT: global_store_dwordx4 v32, v[16:19], s[0:1] offset:48 sc0 sc1
+; VGPRRC-NEXT: global_store_dwordx4 v40, v[16:19], s[0:1] offset:48 sc0 sc1
; VGPRRC-NEXT: s_waitcnt vmcnt(0)
; VGPRRC-NEXT: s_nop 0
; VGPRRC-NEXT: v_mov_b32_e32 v16, s16
; VGPRRC-NEXT: v_mov_b32_e32 v17, s17
; VGPRRC-NEXT: v_mov_b32_e32 v18, s18
; VGPRRC-NEXT: v_mov_b32_e32 v19, s19
-; VGPRRC-NEXT: global_store_dwordx4 v32, v[16:19], s[0:1] offset:32 sc0 sc1
+; VGPRRC-NEXT: global_store_dwordx4 v40, v[16:19], s[0:1] offset:32 sc0 sc1
; VGPRRC-NEXT: s_waitcnt vmcnt(0)
; VGPRRC-NEXT: s_nop 0
; VGPRRC-NEXT: v_mov_b32_e32 v16, s12
; VGPRRC-NEXT: v_mov_b32_e32 v17, s13
; VGPRRC-NEXT: v_mov_b32_e32 v18, s14
; VGPRRC-NEXT: v_mov_b32_e32 v19, s15
-; VGPRRC-NEXT: global_store_dwordx4 v32, v[16:19], s[0:1] offset:16 sc0 sc1
+; VGPRRC-NEXT: global_store_dwordx4 v40, v[16:19], s[0:1] offset:16 sc0 sc1
; VGPRRC-NEXT: s_waitcnt vmcnt(0)
; VGPRRC-NEXT: s_nop 0
; VGPRRC-NEXT: v_mov_b32_e32 v16, s8
; VGPRRC-NEXT: v_mov_b32_e32 v17, s9
; VGPRRC-NEXT: v_mov_b32_e32 v18, s10
; VGPRRC-NEXT: v_mov_b32_e32 v19, s11
-; VGPRRC-NEXT: global_store_dwordx4 v32, v[16:19], s[0:1] sc0 sc1
+; VGPRRC-NEXT: global_store_dwordx4 v40, v[16:19], s[0:1] sc0 sc1
; VGPRRC-NEXT: s_waitcnt vmcnt(0)
-; VGPRRC-NEXT: global_store_dwordx4 v32, v[8:11], s[0:1] offset:32 sc0 sc1
+; VGPRRC-NEXT: global_store_dwordx4 v40, v[8:11], s[0:1] offset:32 sc0 sc1
; VGPRRC-NEXT: s_waitcnt vmcnt(0)
-; VGPRRC-NEXT: global_store_dwordx4 v32, v[12:15], s[0:1] offset:48 sc0 sc1
+; VGPRRC-NEXT: global_store_dwordx4 v40, v[12:15], s[0:1] offset:48 sc0 sc1
; VGPRRC-NEXT: s_waitcnt vmcnt(0)
-; VGPRRC-NEXT: global_store_dwordx4 v32, v[0:3], s[0:1] sc0 sc1
+; VGPRRC-NEXT: global_store_dwordx4 v40, v[0:3], s[0:1] sc0 sc1
; VGPRRC-NEXT: s_waitcnt vmcnt(0)
-; VGPRRC-NEXT: global_store_dwordx4 v32, v[4:7], s[0:1] offset:16 sc0 sc1
+; VGPRRC-NEXT: global_store_dwordx4 v40, v[4:7], s[0:1] offset:16 sc0 sc1
; VGPRRC-NEXT: s_waitcnt vmcnt(0)
; VGPRRC-NEXT: s_endpgm
; AGPR-LABEL: test_mfma_i32_32x32x32_i8__vgprcd__flags:
@@ -5045,41 +4893,33 @@ define amdgpu_kernel void @test_mfma_i32_32x32x32_i8__vgprcd_mac(<4 x i32> %arg0
; SDAG-NEXT: s_load_dwordx8 s[20:27], s[4:5], 0x24
; SDAG-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0xa4
; SDAG-NEXT: s_waitcnt lgkmcnt(0)
-; SDAG-NEXT: v_mov_b32_e32 v0, s20
-; SDAG-NEXT: v_mov_b32_e32 v1, s21
-; SDAG-NEXT: v_mov_b32_e32 v2, s22
-; SDAG-NEXT: v_mov_b32_e32 v3, s23
+; SDAG-NEXT: v_mov_b32_e32 v16, s20
+; SDAG-NEXT: v_mov_b32_e32 v17, s21
+; SDAG-NEXT: v_mov_b32_e32 v18, s22
+; SDAG-NEXT: v_mov_b32_e32 v19, s23
; SDAG-NEXT: s_load_dwordx16 s[8:23], s[4:5], 0x64
-; SDAG-NEXT: v_mov_b32_e32 v4, s24
-; SDAG-NEXT: v_mov_b32_e32 v5, s25
-; SDAG-NEXT: v_mov_b32_e32 v6, s26
-; SDAG-NEXT: v_mov_b32_e32 v7, s27
+; SDAG-NEXT: v_mov_b32_e32 v20, s24
+; SDAG-NEXT: v_mov_b32_e32 v21, s25
+; SDAG-NEXT: v_mov_b32_e32 v22, s26
+; SDAG-NEXT: v_mov_b32_e32 v23, s27
; SDAG-NEXT: s_waitcnt lgkmcnt(0)
-; SDAG-NEXT: v_accvgpr_write_b32 a0, s8
-; SDAG-NEXT: v_accvgpr_write_b32 a1, s9
-; SDAG-NEXT: v_accvgpr_write_b32 a2, s10
-; SDAG-NEXT: v_accvgpr_write_b32 a3, s11
-; SDAG-NEXT: v_accvgpr_write_b32 a4, s12
-; SDAG-NEXT: v_accvgpr_write_b32 a5, s13
-; SDAG-NEXT: v_accvgpr_write_b32 a6, s14
-; SDAG-NEXT: v_accvgpr_write_b32 a7, s15
-; SDAG-NEXT: v_accvgpr_write_b32 a8, s16
-; SDAG-NEXT: v_accvgpr_write_b32 a9, s17
-; SDAG-NEXT: v_accvgpr_write_b32 a10, s18
-; SDAG-NEXT: v_accvgpr_write_b32 a11, s19
-; SDAG-NEXT: v_accvgpr_write_b32 a12, s20
-; SDAG-NEXT: v_accvgpr_write_b32 a13, s21
-; SDAG-NEXT: v_accvgpr_write_b32 a14, s22
-; SDAG-NEXT: v_accvgpr_write_b32 a15, s23
+; SDAG-NEXT: v_mov_b64_e32 v[0:1], s[8:9]
+; SDAG-NEXT: v_mov_b64_e32 v[2:3], s[10:11]
+; SDAG-NEXT: v_mov_b64_e32 v[4:5], s[12:13]
+; SDAG-NEXT: v_mov_b64_e32 v[6:7], s[14:15]
+; SDAG-NEXT: v_mov_b64_e32 v[8:9], s[16:17]
+; SDAG-NEXT: v_mov_b64_e32 v[10:11], s[18:19]
+; SDAG-NEXT: v_mov_b64_e32 v[12:13], s[20:21]
+; SDAG-NEXT: v_mov_b64_e32 v[14:15], s[22:23]
; SDAG-NEXT: s_nop 1
-; SDAG-NEXT: v_mfma_i32_32x32x32_i8 a[0:15], v[0:3], v[4:7], a[0:15]
-; SDAG-NEXT: v_mov_b32_e32 v0, 0
+; SDAG-NEXT: v_mfma_i32_32x32x32_i8 v[0:15], v[16:19], v[20:23], v[0:15]
+; SDAG-NEXT: v_mov_b32_e32 v16, 0
; SDAG-NEXT: s_nop 7
; SDAG-NEXT: s_nop 2
-; SDAG-NEXT: global_store_dwordx4 v0, a[12:15], s[0:1] offset:48
-; SDAG-NEXT: global_store_dwordx4 v0, a[8:11], s[0:1] offset:32
-; SDAG-NEXT: global_store_dwordx4 v0, a[4:7], s[0:1] offset:16
-; SDAG-NEXT: global_store_dwordx4 v0, a[0:3], s[0:1]
+; SDAG-NEXT: global_store_dwordx4 v16, v[12:15], s[0:1] offset:48
+; SDAG-NEXT: global_store_dwordx4 v16, v[8:11], s[0:1] offset:32
+; SDAG-NEXT: global_store_dwordx4 v16, v[4:7], s[0:1] offset:16
+; SDAG-NEXT: global_store_dwordx4 v16, v[0:3], s[0:1]
; SDAG-NEXT: s_endpgm
;
; GISEL-LABEL: test_mfma_i32_32x32x32_i8__vgprcd_mac:
@@ -5088,35 +4928,27 @@ define amdgpu_kernel void @test_mfma_i32_32x32x32_i8__vgprcd_mac(<4 x i32> %arg0
; GISEL-NEXT: s_load_dwordx16 s[8:23], s[4:5], 0x64
; GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0xa4
; GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GISEL-NEXT: v_mov_b64_e32 v[0:1], s[24:25]
-; GISEL-NEXT: v_mov_b64_e32 v[2:3], s[26:27]
-; GISEL-NEXT: v_mov_b64_e32 v[4:5], s[28:29]
-; GISEL-NEXT: v_accvgpr_write_b32 a0, s8
-; GISEL-NEXT: v_mov_b64_e32 v[6:7], s[30:31]
-; GISEL-NEXT: v_accvgpr_write_b32 a1, s9
-; GISEL-NEXT: v_accvgpr_write_b32 a2, s10
-; GISEL-NEXT: v_accvgpr_write_b32 a3, s11
-; GISEL-NEXT: v_accvgpr_write_b32 a4, s12
-; GISEL-NEXT: v_accvgpr_write_b32 a5, s13
-; GISEL-NEXT: v_accvgpr_write_b32 a6, s14
-; GISEL-NEXT: v_accvgpr_write_b32 a7, s15
-; GISEL-NEXT: v_accvgpr_write_b32 a8, s16
-; GISEL-NEXT: v_accvgpr_write_b32 a9, s17
-; GISEL-NEXT: v_accvgpr_write_b32 a10, s18
-; GISEL-NEXT: v_accvgpr_write_b32 a11, s19
-; GISEL-NEXT: v_accvgpr_write_b32 a12, s20
-; GISEL-NEXT: v_accvgpr_write_b32 a13, s21
-; GISEL-NEXT: v_accvgpr_write_b32 a14, s22
-; GISEL-NEXT: v_accvgpr_write_b32 a15, s23
+; GISEL-NEXT: v_mov_b64_e32 v[16:17], s[24:25]
+; GISEL-NEXT: v_mov_b64_e32 v[18:19], s[26:27]
+; GISEL-NEXT: v_mov_b64_e32 v[20:21], s[28:29]
+; GISEL-NEXT: v_mov_b64_e32 v[0:1], s[8:9]
+; GISEL-NEXT: v_mov_b64_e32 v[22:23], s[30:31]
+; GISEL-NEXT: v_mov_b64_e32 v[2:3], s[10:11]
+; GISEL-NEXT: v_mov_b64_e32 v[4:5], s[12:13]
+; GISEL-NEXT: v_mov_b64_e32 v[6:7], s[14:15]
+; GISEL-NEXT: v_mov_b64_e32 v[8:9], s[16:17]
+; GISEL-NEXT: v_mov_b64_e32 v[10:11], s[18:19]
+; GISEL-NEXT: v_mov_b64_e32 v[12:13], s[20:21]
+; GISEL-NEXT: v_mov_b64_e32 v[14:15], s[22:23]
; GISEL-NEXT: s_nop 1
-; GISEL-NEXT: v_mfma_i32_32x32x32_i8 a[0:15], v[0:3], v[4:7], a[0:15]
-; GISEL-NEXT: v_mov_b32_e32 v0, 0
+; GISEL-NEXT: v_mfma_i32_32x32x32_i8 v[0:15], v[16:19], v[20:23], v[0:15]
+; GISEL-NEXT: v_mov_b32_e32 v16, 0
; GISEL-NEXT: s_nop 7
; GISEL-NEXT: s_nop 2
-; GISEL-NEXT: global_store_dwordx4 v0, a[0:3], s[0:1]
-; GISEL-NEXT: global_store_dwordx4 v0, a[4:7], s[0:1] offset:16
-; GISEL-NEXT: global_store_dwordx4 v0, a[8:11], s[0:1] offset:32
-; GISEL-NEXT: global_store_dwordx4 v0, a[12:15], s[0:1] offset:48
+; GISEL-NEXT: global_store_dwordx4 v16, v[0:3], s[0:1]
+; GISEL-NEXT: global_store_dwordx4 v16, v[4:7], s[0:1] offset:16
+; GISEL-NEXT: global_store_dwordx4 v16, v[8:11], s[0:1] offset:32
+; GISEL-NEXT: global_store_dwordx4 v16, v[12:15], s[0:1] offset:48
; GISEL-NEXT: s_endpgm
;
; HEURRC-LABEL: test_mfma_i32_32x32x32_i8__vgprcd_mac:
@@ -5124,41 +4956,33 @@ define amdgpu_kernel void @test_mfma_i32_32x32x32_i8__vgprcd_mac(<4 x i32> %arg0
; HEURRC-NEXT: s_load_dwordx8 s[20:27], s[4:5], 0x24
; HEURRC-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0xa4
; HEURRC-NEXT: s_waitcnt lgkmcnt(0)
-; HEURRC-NEXT: v_mov_b32_e32 v0, s20
-; HEURRC-NEXT: v_mov_b32_e32 v1, s21
-; HEURRC-NEXT: v_mov_b32_e32 v2, s22
-; HEURRC-NEXT: v_mov_b32_e32 v3, s23
+; HEURRC-NEXT: v_mov_b32_e32 v16, s20
+; HEURRC-NEXT: v_mov_b32_e32 v17, s21
+; HEURRC-NEXT: v_mov_b32_e32 v18, s22
+; HEURRC-NEXT: v_mov_b32_e32 v19, s23
; HEURRC-NEXT: s_load_dwordx16 s[8:23], s[4:5], 0x64
-; HEURRC-NEXT: v_mov_b32_e32 v4, s24
-; HEURRC-NEXT: v_mov_b32_e32 v5, s25
-; HEURRC-NEXT: v_mov_b32_e32 v6, s26
-; HEURRC-NEXT: v_mov_b32_e32 v7, s27
+; HEURRC-NEXT: v_mov_b32_e32 v20, s24
+; HEURRC-NEXT: v_mov_b32_e32 v21, s25
+; HEURRC-NEXT: v_mov_b32_e32 v22, s26
+; HEURRC-NEXT: v_mov_b32_e32 v23, s27
; HEURRC-NEXT: s_waitcnt lgkmcnt(0)
-; HEURRC-NEXT: v_accvgpr_write_b32 a0, s8
-; HEURRC-NEXT: v_accvgpr_write_b32 a1, s9
-; HEURRC-NEXT: v_accvgpr_write_b32 a2, s10
-; HEURRC-NEXT: v_accvgpr_write_b32 a3, s11
-; HEURRC-NEXT: v_accvgpr_write_b32 a4, s12
-; HEURRC-NEXT: v_accvgpr_write_b32 a5, s13
-; HEURRC-NEXT: v_accvgpr_write_b32 a6, s14
-; HEURRC-NEXT: v_accvgpr_write_b32 a7, s15
-; HEURRC-NEXT: v_accvgpr_write_b32 a8, s16
-; HEURRC-NEXT: v_accvgpr_write_b32 a9, s17
-; HEURRC-NEXT: v_accvgpr_write_b32 a10, s18
-; HEURRC-NEXT: v_accvgpr_write_b32 a11, s19
-; HEURRC-NEXT: v_accvgpr_write_b32 a12, s20
-; HEURRC-NEXT: v_accvgpr_write_b32 a13, s21
-; HEURRC-NEXT: v_accvgpr_write_b32 a14, s22
-; HEURRC-NEXT: v_accvgpr_write_b32 a15, s23
+; HEURRC-NEXT: v_mov_b64_e32 v[0:1], s[8:9]
+; HEURRC-NEXT: v_mov_b64_e32 v[2:3], s[10:11]
+; HEURRC-NEXT: v_mov_b64_e32 v[4:5], s[12:13]
+; HEURRC-NEXT: v_mov_b64_e32 v[6:7], s[14:15]
+; HEURRC-NEXT: v_mov_b64_e32 v[8:9], s[16:17]
+; HEURRC-NEXT: v_mov_b64_e32 v[10:11], s[18:19]
+; HEURRC-NEXT: v_mov_b64_e32 v[12:13], s[20:21]
+; HEURRC-NEXT: v_mov_b64_e32 v[14:15], s[22:23]
; HEURRC-NEXT: s_nop 1
-; HEURRC-NEXT: v_mfma_i32_32x32x32_i8 a[0:15], v[0:3], v[4:7], a[0:15]
-; HEURRC-NEXT: v_mov_b32_e32 v0, 0
+; HEURRC-NEXT: v_mfma_i32_32x32x32_i8 v[0:15], v[16:19], v[20:23], v[0:15]
+; HEURRC-NEXT: v_mov_b32_e32 v16, 0
; HEURRC-NEXT: s_nop 7
; HEURRC-NEXT: s_nop 2
-; HEURRC-NEXT: global_store_dwordx4 v0, a[12:15], s[0:1] offset:48
-; HEURRC-NEXT: global_store_dwordx4 v0, a[8:11], s[0:1] offset:32
-; HEURRC-NEXT: global_store_dwordx4 v0, a[4:7], s[0:1] offset:16
-; HEURRC-NEXT: global_store_dwordx4 v0, a[0:3], s[0:1]
+; HEURRC-NEXT: global_store_dwordx4 v16, v[12:15], s[0:1] offset:48
+; HEURRC-NEXT: global_store_dwordx4 v16, v[8:11], s[0:1] offset:32
+; HEURRC-NEXT: global_store_dwordx4 v16, v[4:7], s[0:1] offset:16
+; HEURRC-NEXT: global_store_dwordx4 v16, v[0:3], s[0:1]
; HEURRC-NEXT: s_endpgm
;
; VGPRRC-LABEL: test_mfma_i32_32x32x32_i8__vgprcd_mac:
@@ -5279,41 +5103,33 @@ define amdgpu_kernel void @test_mfma_i32_32x32x32_i8__vgprcd_mac_flags(<4 x i32>
; SDAG-NEXT: s_load_dwordx8 s[20:27], s[4:5], 0x24
; SDAG-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0xa4
; SDAG-NEXT: s_waitcnt lgkmcnt(0)
-; SDAG-NEXT: v_mov_b32_e32 v0, s20
-; SDAG-NEXT: v_mov_b32_e32 v1, s21
-; SDAG-NEXT: v_mov_b32_e32 v2, s22
-; SDAG-NEXT: v_mov_b32_e32 v3, s23
+; SDAG-NEXT: v_mov_b32_e32 v16, s20
+; SDAG-NEXT: v_mov_b32_e32 v17, s21
+; SDAG-NEXT: v_mov_b32_e32 v18, s22
+; SDAG-NEXT: v_mov_b32_e32 v19, s23
; SDAG-NEXT: s_load_dwordx16 s[8:23], s[4:5], 0x64
-; SDAG-NEXT: v_mov_b32_e32 v4, s24
-; SDAG-NEXT: v_mov_b32_e32 v5, s25
-; SDAG-NEXT: v_mov_b32_e32 v6, s26
-; SDAG-NEXT: v_mov_b32_e32 v7, s27
+; SDAG-NEXT: v_mov_b32_e32 v20, s24
+; SDAG-NEXT: v_mov_b32_e32 v21, s25
+; SDAG-NEXT: v_mov_b32_e32 v22, s26
+; SDAG-NEXT: v_mov_b32_e32 v23, s27
; SDAG-NEXT: s_waitcnt lgkmcnt(0)
-; SDAG-NEXT: v_accvgpr_write_b32 a0, s8
-; SDAG-NEXT: v_accvgpr_write_b32 a1, s9
-; SDAG-NEXT: v_accvgpr_write_b32 a2, s10
-; SDAG-NEXT: v_accvgpr_write_b32 a3, s11
-; SDAG-NEXT: v_accvgpr_write_b32 a4, s12
-; SDAG-NEXT: v_accvgpr_write_b32 a5, s13
-; SDAG-NEXT: v_accvgpr_write_b32 a6, s14
-; SDAG-NEXT: v_accvgpr_write_b32 a7, s15
-; SDAG-NEXT: v_accvgpr_write_b32 a8, s16
-; SDAG-NEXT: v_accvgpr_write_b32 a9, s17
-; SDAG-NEXT: v_accvgpr_write_b32 a10, s18
-; SDAG-NEXT: v_accvgpr_write_b32 a11, s19
-; SDAG-NEXT: v_accvgpr_write_b32 a12, s20
-; SDAG-NEXT: v_accvgpr_write_b32 a13, s21
-; SDAG-NEXT: v_accvgpr_write_b32 a14, s22
-; SDAG-NEXT: v_accvgpr_write_b32 a15, s23
+; SDAG-NEXT: v_mov_b64_e32 v[0:1], s[8:9]
+; SDAG-NEXT: v_mov_b64_e32 v[2:3], s[10:11]
+; SDAG-NEXT: v_mov_b64_e32 v[4:5], s[12:13]
+; SDAG-NEXT: v_mov_b64_e32 v[6:7], s[14:15]
+; SDAG-NEXT: v_mov_b64_e32 v[8:9], s[16:17]
+; SDAG-NEXT: v_mov_b64_e32 v[10:11], s[18:19]
+; SDAG-NEXT: v_mov_b64_e32 v[12:13], s[20:21]
+; SDAG-NEXT: v_mov_b64_e32 v[14:15], s[22:23]
; SDAG-NEXT: s_nop 1
-; SDAG-NEXT: v_mfma_i32_32x32x32_i8 a[0:15], v[0:3], v[4:7], a[0:15] cbsz:3 abid:2 blgp:1
-; SDAG-NEXT: v_mov_b32_e32 v0, 0
+; SDAG-NEXT: v_mfma_i32_32x32x32_i8 v[0:15], v[16:19], v[20:23], v[0:15] cbsz:3 abid:2 blgp:1
+; SDAG-NEXT: v_mov_b32_e32 v16, 0
; SDAG-NEXT: s_nop 7
; SDAG-NEXT: s_nop 2
-; SDAG-NEXT: global_store_dwordx4 v0, a[12:15], s[0:1] offset:48
-; SDAG-NEXT: global_store_dwordx4 v0, a[8:11], s[0:1] offset:32
-; SDAG-NEXT: global_store_dwordx4 v0, a[4:7], s[0:1] offset:16
-; SDAG-NEXT: global_store_dwordx4 v0, a[0:3], s[0:1]
+; SDAG-NEXT: global_store_dwordx4 v16, v[12:15], s[0:1] offset:48
+; SDAG-NEXT: global_store_dwordx4 v16, v[8:11], s[0:1] offset:32
+; SDAG-NEXT: global_store_dwordx4 v16, v[4:7], s[0:1] offset:16
+; SDAG-NEXT: global_store_dwordx4 v16, v[0:3], s[0:1]
; SDAG-NEXT: s_endpgm
;
; GISEL-LABEL: test_mfma_i32_32x32x32_i8__vgprcd_mac_flags:
@@ -5322,35 +5138,27 @@ define amdgpu_kernel void @test_mfma_i32_32x32x32_i8__vgprcd_mac_flags(<4 x i32>
; GISEL-NEXT: s_load_dwordx16 s[8:23], s[4:5], 0x64
; GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0xa4
; GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GISEL-NEXT: v_mov_b64_e32 v[0:1], s[24:25]
-; GISEL-NEXT: v_mov_b64_e32 v[2:3], s[26:27]
-; GISEL-NEXT: v_mov_b64_e32 v[4:5], s[28:29]
-; GISEL-NEXT: v_accvgpr_write_b32 a0, s8
-; GISEL-NEXT: v_mov_b64_e32 v[6:7], s[30:31]
-; GISEL-NEXT: v_accvgpr_write_b32 a1, s9
-; GISEL-NEXT: v_accvgpr_write_b32 a2, s10
-; GISEL-NEXT: v_accvgpr_write_b32 a3, s11
-; GISEL-NEXT: v_accvgpr_write_b32 a4, s12
-; GISEL-NEXT: v_accvgpr_write_b32 a5, s13
-; GISEL-NEXT: v_accvgpr_write_b32 a6, s14
-; GISEL-NEXT: v_accvgpr_write_b32 a7, s15
-; GISEL-NEXT: v_accvgpr_write_b32 a8, s16
-; GISEL-NEXT: v_accvgpr_write_b32 a9, s17
-; GISEL-NEXT: v_accvgpr_write_b32 a10, s18
-; GISEL-NEXT: v_accvgpr_write_b32 a11, s19
-; GISEL-NEXT: v_accvgpr_write_b32 a12, s20
-; GISEL-NEXT: v_accvgpr_write_b32 a13, s21
-; GISEL-NEXT: v_accvgpr_write_b32 a14, s22
-; GISEL-NEXT: v_accvgpr_write_b32 a15, s23
+; GISEL-NEXT: v_mov_b64_e32 v[16:17], s[24:25]
+; GISEL-NEXT: v_mov_b64_e32 v[18:19], s[26:27]
+; GISEL-NEXT: v_mov_b64_e32 v[20:21], s[28:29]
+; GISEL-NEXT: v_mov_b64_e32 v[0:1], s[8:9]
+; GISEL-NEXT: v_mov_b64_e32 v[22:23], s[30:31]
+; GISEL-NEXT: v_mov_b64_e32 v[2:3], s[10:11]
+; GISEL-NEXT: v_mov_b64_e32 v[4:5], s[12:13]
+; GISEL-NEXT: v_mov_b64_e32 v[6:7], s[14:15]
+; GISEL-NEXT: v_mov_b64_e32 v[8:9], s[16:17]
+; GISEL-NEXT: v_mov_b64_e32 v[10:11], s[18:19]
+; GISEL-NEXT: v_mov_b64_e32 v[12:13], s[20:21]
+; GISEL-NEXT: v_mov_b64_e32 v[14:15], s[22:23]
; GISEL-NEXT: s_nop 1
-; GISEL-NEXT: v_mfma_i32_32x32x32_i8 a[0:15], v[0:3], v[4:7], a[0:15] cbsz:3 abid:2 blgp:1
-; GISEL-NEXT: v_mov_b32_e32 v0, 0
+; GISEL-NEXT: v_mfma_i32_32x32x32_i8 v[0:15], v[16:19], v[20:23], v[0:15] cbsz:3 abid:2 blgp:1
+; GISEL-NEXT: v_mov_b32_e32 v16, 0
; GISEL-NEXT: s_nop 7
; GISEL-NEXT: s_nop 2
-; GISEL-NEXT: global_store_dwordx4 v0, a[0:3], s[0:1]
-; GISEL-NEXT: global_store_dwordx4 v0, a[4:7], s[0:1] offset:16
-; GISEL-NEXT: global_store_dwordx4 v0, a[8:11], s[0:1] offset:32
-; GISEL-NEXT: global_store_dwordx4 v0, a[12:15], s[0:1] offset:48
+; GISEL-NEXT: global_store_dwordx4 v16, v[0:3], s[0:1]
+; GISEL-NEXT: global_store_dwordx4 v16, v[4:7], s[0:1] offset:16
+; GISEL-NEXT: global_store_dwordx4 v16, v[8:11], s[0:1] offset:32
+; GISEL-NEXT: global_store_dwordx4 v16, v[12:15], s[0:1] offset:48
; GISEL-NEXT: s_endpgm
;
; HEURRC-LABEL: test_mfma_i32_32x32x32_i8__vgprcd_mac_flags:
@@ -5358,41 +5166,33 @@ define amdgpu_kernel void @test_mfma_i32_32x32x32_i8__vgprcd_mac_flags(<4 x i32>
; HEURRC-NEXT: s_load_dwordx8 s[20:27], s[4:5], 0x24
; HEURRC-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0xa4
; HEURRC-NEXT: s_waitcnt lgkmcnt(0)
-; HEURRC-NEXT: v_mov_b32_e32 v0, s20
-; HEURRC-NEXT: v_mov_b32_e32 v1, s21
-; HEURRC-NEXT: v_mov_b32_e32 v2, s22
-; HEURRC-NEXT: v_mov_b32_e32 v3, s23
+; HEURRC-NEXT: v_mov_b32_e32 v16, s20
+; HEURRC-NEXT: v_mov_b32_e32 v17, s21
+; HEURRC-NEXT: v_mov_b32_e32 v18, s22
+; HEURRC-NEXT: v_mov_b32_e32 v19, s23
; HEURRC-NEXT: s_load_dwordx16 s[8:23], s[4:5], 0x64
-; HEURRC-NEXT: v_mov_b32_e32 v4, s24
-; HEURRC-NEXT: v_mov_b32_e32 v5, s25
-; HEURRC-NEXT: v_mov_b32_e32 v6, s26
-; HEURRC-NEXT: v_mov_b32_e32 v7, s27
+; HEURRC-NEXT: v_mov_b32_e32 v20, s24
+; HEURRC-NEXT: v_mov_b32_e32 v21, s25
+; HEURRC-NEXT: v_mov_b32_e32 v22, s26
+; HEURRC-NEXT: v_mov_b32_e32 v23, s27
; HEURRC-NEXT: s_waitcnt lgkmcnt(0)
-; HEURRC-NEXT: v_accvgpr_write_b32 a0, s8
-; HEURRC-NEXT: v_accvgpr_write_b32 a1, s9
-; HEURRC-NEXT: v_accvgpr_write_b32 a2, s10
-; HEURRC-NEXT: v_accvgpr_write_b32 a3, s11
-; HEURRC-NEXT: v_accvgpr_write_b32 a4, s12
-; HEURRC-NEXT: v_accvgpr_write_b32 a5, s13
-; HEURRC-NEXT: v_accvgpr_write_b32 a6, s14
-; HEURRC-NEXT: v_accvgpr_write_b32 a7, s15
-; HEURRC-NEXT: v_accvgpr_write_b32 a8, s16
-; HEURRC-NEXT: v_accvgpr_write_b32 a9, s17
-; HEURRC-NEXT: v_accvgpr_write_b32 a10, s18
-; HEURRC-NEXT: v_accvgpr_write_b32 a11, s19
-; HEURRC-NEXT: v_accvgpr_write_b32 a12, s20
-; HEURRC-NEXT: v_accvgpr_write_b32 a13, s21
-; HEURRC-NEXT: v_accvgpr_write_b32 a14, s22
-; HEURRC-NEXT: v_accvgpr_write_b32 a15, s23
+; HEURRC-NEXT: v_mov_b64_e32 v[0:1], s[8:9]
+; HEURRC-NEXT: v_mov_b64_e32 v[2:3], s[10:11]
+; HEURRC-NEXT: v_mov_b64_e32 v[4:5], s[12:13]
+; HEURRC-NEXT: v_mov_b64_e32 v[6:7], s[14:15]
+; HEURRC-NEXT: v_mov_b64_e32 v[8:9], s[16:17]
+; HEURRC-NEXT: v_mov_b64_e32 v[10:11], s[18:19]
+; HEURRC-NEXT: v_mov_b64_e32 v[12:13], s[20:21]
+; HEURRC-NEXT: v_mov_b64_e32 v[14:15], s[22:23]
; HEURRC-NEXT: s_nop 1
-; HEURRC-NEXT: v_mfma_i32_32x32x32_i8 a[0:15], v[0:3], v[4:7], a[0:15] cbsz:3 abid:2 blgp:1
-; HEURRC-NEXT: v_mov_b32_e32 v0, 0
+; HEURRC-NEXT: v_mfma_i32_32x32x32_i8 v[0:15], v[16:19], v[20:23], v[0:15] cbsz:3 abid:2 blgp:1
+; HEURRC-NEXT: v_mov_b32_e32 v16, 0
; HEURRC-NEXT: s_nop 7
; HEURRC-NEXT: s_nop 2
-; HEURRC-NEXT: global_store_dwordx4 v0, a[12:15], s[0:1] offset:48
-; HEURRC-NEXT: global_store_dwordx4 v0, a[8:11], s[0:1] offset:32
-; HEURRC-NEXT: global_store_dwordx4 v0, a[4:7], s[0:1] offset:16
-; HEURRC-NEXT: global_store_dwordx4 v0, a[0:3], s[0:1]
+; HEURRC-NEXT: global_store_dwordx4 v16, v[12:15], s[0:1] offset:48
+; HEURRC-NEXT: global_store_dwordx4 v16, v[8:11], s[0:1] offset:32
+; HEURRC-NEXT: global_store_dwordx4 v16, v[4:7], s[0:1] offset:16
+; HEURRC-NEXT: global_store_dwordx4 v16, v[0:3], s[0:1]
; HEURRC-NEXT: s_endpgm
;
; VGPRRC-LABEL: test_mfma_i32_32x32x32_i8__vgprcd_mac_flags:
@@ -5643,20 +5443,18 @@ define amdgpu_kernel void @test_mfma_f32_16x16x32_bf16_no_agpr__vgprcd(ptr addrs
; GCN-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0x34
; GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x54
; GCN-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x24
-; GCN-NEXT: v_mov_b32_e32 v8, 0
+; GCN-NEXT: v_mov_b32_e32 v12, 0
; GCN-NEXT: s_waitcnt lgkmcnt(0)
; GCN-NEXT: v_mov_b64_e32 v[0:1], s[8:9]
; GCN-NEXT: v_mov_b64_e32 v[2:3], s[10:11]
; GCN-NEXT: v_mov_b64_e32 v[4:5], s[12:13]
-; GCN-NEXT: v_accvgpr_write_b32 a0, s0
+; GCN-NEXT: v_mov_b64_e32 v[10:11], s[2:3]
; GCN-NEXT: v_mov_b64_e32 v[6:7], s[14:15]
-; GCN-NEXT: v_accvgpr_write_b32 a1, s1
-; GCN-NEXT: v_accvgpr_write_b32 a2, s2
-; GCN-NEXT: v_accvgpr_write_b32 a3, s3
+; GCN-NEXT: v_mov_b64_e32 v[8:9], s[0:1]
; GCN-NEXT: s_nop 1
-; GCN-NEXT: v_mfma_f32_16x16x32_bf16 a[0:3], v[0:3], v[4:7], a[0:3]
+; GCN-NEXT: v_mfma_f32_16x16x32_bf16 v[0:3], v[0:3], v[4:7], v[8:11]
; GCN-NEXT: s_nop 7
-; GCN-NEXT: global_store_dwordx4 v8, a[0:3], s[6:7]
+; GCN-NEXT: global_store_dwordx4 v12, v[0:3], s[6:7]
; GCN-NEXT: s_endpgm
;
; HEURRC-LABEL: test_mfma_f32_16x16x32_bf16_no_agpr__vgprcd:
@@ -5664,20 +5462,18 @@ define amdgpu_kernel void @test_mfma_f32_16x16x32_bf16_no_agpr__vgprcd(ptr addrs
; HEURRC-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0x34
; HEURRC-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x54
; HEURRC-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x24
-; HEURRC-NEXT: v_mov_b32_e32 v8, 0
+; HEURRC-NEXT: v_mov_b32_e32 v12, 0
; HEURRC-NEXT: s_waitcnt lgkmcnt(0)
; HEURRC-NEXT: v_mov_b64_e32 v[0:1], s[8:9]
; HEURRC-NEXT: v_mov_b64_e32 v[2:3], s[10:11]
; HEURRC-NEXT: v_mov_b64_e32 v[4:5], s[12:13]
-; HEURRC-NEXT: v_accvgpr_write_b32 a0, s0
+; HEURRC-NEXT: v_mov_b64_e32 v[10:11], s[2:3]
; HEURRC-NEXT: v_mov_b64_e32 v[6:7], s[14:15]
-; HEURRC-NEXT: v_accvgpr_write_b32 a1, s1
-; HEURRC-NEXT: v_accvgpr_write_b32 a2, s2
-; HEURRC-NEXT: v_accvgpr_write_b32 a3, s3
+; HEURRC-NEXT: v_mov_b64_e32 v[8:9], s[0:1]
; HEURRC-NEXT: s_nop 1
-; HEURRC-NEXT: v_mfma_f32_16x16x32_bf16 a[0:3], v[0:3], v[4:7], a[0:3]
+; HEURRC-NEXT: v_mfma_f32_16x16x32_bf16 v[0:3], v[0:3], v[4:7], v[8:11]
; HEURRC-NEXT: s_nop 7
-; HEURRC-NEXT: global_store_dwordx4 v8, a[0:3], s[6:7]
+; HEURRC-NEXT: global_store_dwordx4 v12, v[0:3], s[6:7]
; HEURRC-NEXT: s_endpgm
;
; VGPRRC-LABEL: test_mfma_f32_16x16x32_bf16_no_agpr__vgprcd:
@@ -5747,20 +5543,18 @@ define amdgpu_kernel void @test_mfma_f32_16x16x32_bf16_no_agpr__vgprcd__flags(pt
; GCN-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0x34
; GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x54
; GCN-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x24
-; GCN-NEXT: v_mov_b32_e32 v8, 0
+; GCN-NEXT: v_mov_b32_e32 v12, 0
; GCN-NEXT: s_waitcnt lgkmcnt(0)
; GCN-NEXT: v_mov_b64_e32 v[0:1], s[8:9]
; GCN-NEXT: v_mov_b64_e32 v[2:3], s[10:11]
; GCN-NEXT: v_mov_b64_e32 v[4:5], s[12:13]
-; GCN-NEXT: v_accvgpr_write_b32 a0, s0
+; GCN-NEXT: v_mov_b64_e32 v[10:11], s[2:3]
; GCN-NEXT: v_mov_b64_e32 v[6:7], s[14:15]
-; GCN-NEXT: v_accvgpr_write_b32 a1, s1
-; GCN-NEXT: v_accvgpr_write_b32 a2, s2
-; GCN-NEXT: v_accvgpr_write_b32 a3, s3
+; GCN-NEXT: v_mov_b64_e32 v[8:9], s[0:1]
; GCN-NEXT: s_nop 1
-; GCN-NEXT: v_mfma_f32_16x16x32_bf16 a[0:3], v[0:3], v[4:7], a[0:3] cbsz:3 abid:2 blgp:1
+; GCN-NEXT: v_mfma_f32_16x16x32_bf16 v[0:3], v[0:3], v[4:7], v[8:11] cbsz:3 abid:2 blgp:1
; GCN-NEXT: s_nop 7
-; GCN-NEXT: global_store_dwordx4 v8, a[0:3], s[6:7]
+; GCN-NEXT: global_store_dwordx4 v12, v[0:3], s[6:7]
; GCN-NEXT: s_endpgm
;
; HEURRC-LABEL: test_mfma_f32_16x16x32_bf16_no_agpr__vgprcd__flags:
@@ -5768,20 +5562,18 @@ define amdgpu_kernel void @test_mfma_f32_16x16x32_bf16_no_agpr__vgprcd__flags(pt
; HEURRC-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0x34
; HEURRC-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x54
; HEURRC-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x24
-; HEURRC-NEXT: v_mov_b32_e32 v8, 0
+; HEURRC-NEXT: v_mov_b32_e32 v12, 0
; HEURRC-NEXT: s_waitcnt lgkmcnt(0)
; HEURRC-NEXT: v_mov_b64_e32 v[0:1], s[8:9]
; HEURRC-NEXT: v_mov_b64_e32 v[2:3], s[10:11]
; HEURRC-NEXT: v_mov_b64_e32 v[4:5], s[12:13]
-; HEURRC-NEXT: v_accvgpr_write_b32 a0, s0
+; HEURRC-NEXT: v_mov_b64_e32 v[10:11], s[2:3]
; HEURRC-NEXT: v_mov_b64_e32 v[6:7], s[14:15]
-; HEURRC-NEXT: v_accvgpr_write_b32 a1, s1
-; HEURRC-NEXT: v_accvgpr_write_b32 a2, s2
-; HEURRC-NEXT: v_accvgpr_write_b32 a3, s3
+; HEURRC-NEXT: v_mov_b64_e32 v[8:9], s[0:1]
; HEURRC-NEXT: s_nop 1
-; HEURRC-NEXT: v_mfma_f32_16x16x32_bf16 a[0:3], v[0:3], v[4:7], a[0:3] cbsz:3 abid:2 blgp:1
+; HEURRC-NEXT: v_mfma_f32_16x16x32_bf16 v[0:3], v[0:3], v[4:7], v[8:11] cbsz:3 abid:2 blgp:1
; HEURRC-NEXT: s_nop 7
-; HEURRC-NEXT: global_store_dwordx4 v8, a[0:3], s[6:7]
+; HEURRC-NEXT: global_store_dwordx4 v12, v[0:3], s[6:7]
; HEURRC-NEXT: s_endpgm
;
; VGPRRC-LABEL: test_mfma_f32_16x16x32_bf16_no_agpr__vgprcd__flags:
@@ -5845,5 +5637,5 @@ define amdgpu_kernel void @test_mfma_f32_16x16x32_bf16_no_agpr__vgprcd__flags(pt
ret void
}
-attributes #0 = { "amdgpu-flat-work-group-size"="512,512" }
+attributes #0 = { "amdgpu-flat-work-group-size"="512,512" "amdgpu-agpr-alloc"="0,0" }
attributes #1 = { "amdgpu-flat-work-group-size"="1,64" }
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.scale.f32.16x16x128.f8f6f4.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.scale.f32.16x16x128.f8f6f4.ll
index 37809da..f78ea92 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.scale.f32.16x16x128.f8f6f4.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.scale.f32.16x16x128.f8f6f4.ll
@@ -1895,36 +1895,36 @@ define amdgpu_kernel void @test_mfma_scale_f32_16x16x128_f8f6f4__vgprcd(<8 x i32
; SDAG-LABEL: test_mfma_scale_f32_16x16x128_f8f6f4__vgprcd:
; SDAG: ; %bb.0:
; SDAG-NEXT: s_load_dwordx16 s[8:23], s[4:5], 0x0
-; SDAG-NEXT: v_mov_b32_e32 v0, 0
+; SDAG-NEXT: v_mov_b32_e32 v20, 0
; SDAG-NEXT: s_waitcnt lgkmcnt(0)
-; SDAG-NEXT: v_mov_b32_e32 v2, s8
-; SDAG-NEXT: v_mov_b32_e32 v3, s9
-; SDAG-NEXT: v_mov_b32_e32 v4, s10
-; SDAG-NEXT: v_mov_b32_e32 v5, s11
-; SDAG-NEXT: v_mov_b32_e32 v6, s12
-; SDAG-NEXT: v_mov_b32_e32 v7, s13
-; SDAG-NEXT: v_mov_b32_e32 v8, s14
-; SDAG-NEXT: v_mov_b32_e32 v9, s15
+; SDAG-NEXT: v_mov_b32_e32 v0, s8
+; SDAG-NEXT: v_mov_b32_e32 v1, s9
+; SDAG-NEXT: v_mov_b32_e32 v2, s10
+; SDAG-NEXT: v_mov_b32_e32 v3, s11
+; SDAG-NEXT: v_mov_b32_e32 v4, s12
+; SDAG-NEXT: v_mov_b32_e32 v5, s13
+; SDAG-NEXT: v_mov_b32_e32 v6, s14
+; SDAG-NEXT: v_mov_b32_e32 v7, s15
; SDAG-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0x40
-; SDAG-NEXT: v_mov_b32_e32 v10, s16
-; SDAG-NEXT: v_mov_b32_e32 v11, s17
-; SDAG-NEXT: v_mov_b32_e32 v12, s18
-; SDAG-NEXT: v_mov_b32_e32 v13, s19
-; SDAG-NEXT: v_mov_b32_e32 v14, s20
-; SDAG-NEXT: v_mov_b32_e32 v15, s21
-; SDAG-NEXT: v_mov_b32_e32 v16, s22
-; SDAG-NEXT: v_mov_b32_e32 v17, s23
+; SDAG-NEXT: v_mov_b32_e32 v8, s16
+; SDAG-NEXT: v_mov_b32_e32 v9, s17
+; SDAG-NEXT: v_mov_b32_e32 v10, s18
+; SDAG-NEXT: v_mov_b32_e32 v11, s19
+; SDAG-NEXT: v_mov_b32_e32 v12, s20
+; SDAG-NEXT: v_mov_b32_e32 v13, s21
+; SDAG-NEXT: v_mov_b32_e32 v14, s22
+; SDAG-NEXT: v_mov_b32_e32 v15, s23
; SDAG-NEXT: s_waitcnt lgkmcnt(0)
-; SDAG-NEXT: v_accvgpr_write_b32 a0, s8
-; SDAG-NEXT: v_accvgpr_write_b32 a1, s9
-; SDAG-NEXT: v_accvgpr_write_b32 a2, s10
-; SDAG-NEXT: v_accvgpr_write_b32 a3, s11
-; SDAG-NEXT: v_mov_b32_e32 v1, s13
+; SDAG-NEXT: v_mov_b32_e32 v16, s8
+; SDAG-NEXT: v_mov_b32_e32 v17, s9
+; SDAG-NEXT: v_mov_b32_e32 v18, s10
+; SDAG-NEXT: v_mov_b32_e32 v19, s11
+; SDAG-NEXT: v_mov_b32_e32 v21, s13
; SDAG-NEXT: s_nop 1
-; SDAG-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[2:9], v[10:17], a[0:3], s12, v1 op_sel:[1,1,0] op_sel_hi:[1,0,0] blgp:2
+; SDAG-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 v[0:3], v[0:7], v[8:15], v[16:19], s12, v21 op_sel:[1,1,0] op_sel_hi:[1,0,0] blgp:2
; SDAG-NEXT: s_nop 7
; SDAG-NEXT: s_nop 3
-; SDAG-NEXT: global_store_dwordx4 v0, a[0:3], s[14:15]
+; SDAG-NEXT: global_store_dwordx4 v20, v[0:3], s[14:15]
; SDAG-NEXT: s_endpgm
;
; GISEL-LABEL: test_mfma_scale_f32_16x16x128_f8f6f4__vgprcd:
@@ -1937,20 +1937,18 @@ define amdgpu_kernel void @test_mfma_scale_f32_16x16x128_f8f6f4__vgprcd(<8 x i32
; GISEL-NEXT: v_mov_b64_e32 v[4:5], s[12:13]
; GISEL-NEXT: v_mov_b64_e32 v[6:7], s[14:15]
; GISEL-NEXT: v_mov_b64_e32 v[8:9], s[16:17]
-; GISEL-NEXT: v_accvgpr_write_b32 a0, s24
+; GISEL-NEXT: v_mov_b64_e32 v[16:17], s[24:25]
; GISEL-NEXT: v_mov_b64_e32 v[10:11], s[18:19]
; GISEL-NEXT: v_mov_b64_e32 v[12:13], s[20:21]
; GISEL-NEXT: v_mov_b64_e32 v[14:15], s[22:23]
-; GISEL-NEXT: v_accvgpr_write_b32 a1, s25
-; GISEL-NEXT: v_accvgpr_write_b32 a2, s26
-; GISEL-NEXT: v_accvgpr_write_b32 a3, s27
-; GISEL-NEXT: v_mov_b32_e32 v16, s29
+; GISEL-NEXT: v_mov_b64_e32 v[18:19], s[26:27]
+; GISEL-NEXT: v_mov_b32_e32 v20, s29
; GISEL-NEXT: s_nop 1
-; GISEL-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], s28, v16 op_sel:[1,1,0] op_sel_hi:[1,0,0] blgp:2
-; GISEL-NEXT: v_mov_b32_e32 v0, 0
+; GISEL-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 v[0:3], v[0:7], v[8:15], v[16:19], s28, v20 op_sel:[1,1,0] op_sel_hi:[1,0,0] blgp:2
+; GISEL-NEXT: v_mov_b32_e32 v4, 0
; GISEL-NEXT: s_nop 7
; GISEL-NEXT: s_nop 2
-; GISEL-NEXT: global_store_dwordx4 v0, a[0:3], s[30:31]
+; GISEL-NEXT: global_store_dwordx4 v4, v[0:3], s[30:31]
; GISEL-NEXT: s_endpgm
%result = call <4 x float> @llvm.amdgcn.mfma.scale.f32.16x16x128.f8f6f4.v8i32.v8i32(<8 x i32> %arg0, <8 x i32> %arg1, <4 x float> %arg2, i32 0, i32 2, i32 3, i32 %scale0, i32 1, i32 %scale1)
store <4 x float> %result, ptr addrspace(1) %ptr, align 16
@@ -1964,40 +1962,38 @@ define amdgpu_kernel void @test_mfma_scale_f32_16x16x128_f8f6f4__vgprcd___scaleA
; SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x40
; SDAG-NEXT: s_movk_i32 s6, 0x41
; SDAG-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x50
-; SDAG-NEXT: v_mov_b32_e32 v0, 0
+; SDAG-NEXT: v_mov_b32_e32 v20, 0
; SDAG-NEXT: s_waitcnt lgkmcnt(0)
-; SDAG-NEXT: v_mov_b32_e32 v2, s8
-; SDAG-NEXT: v_mov_b32_e32 v3, s9
-; SDAG-NEXT: v_mov_b32_e32 v4, s10
-; SDAG-NEXT: v_mov_b32_e32 v5, s11
-; SDAG-NEXT: v_mov_b32_e32 v6, s12
-; SDAG-NEXT: v_mov_b32_e32 v7, s13
-; SDAG-NEXT: v_mov_b32_e32 v8, s14
-; SDAG-NEXT: v_mov_b32_e32 v9, s15
-; SDAG-NEXT: v_accvgpr_write_b32 a0, s0
-; SDAG-NEXT: v_mov_b32_e32 v10, s16
-; SDAG-NEXT: v_mov_b32_e32 v11, s17
-; SDAG-NEXT: v_mov_b32_e32 v12, s18
-; SDAG-NEXT: v_mov_b32_e32 v13, s19
-; SDAG-NEXT: v_mov_b32_e32 v14, s20
-; SDAG-NEXT: v_mov_b32_e32 v15, s21
-; SDAG-NEXT: v_mov_b32_e32 v16, s22
-; SDAG-NEXT: v_mov_b32_e32 v17, s23
-; SDAG-NEXT: v_accvgpr_write_b32 a1, s1
-; SDAG-NEXT: v_accvgpr_write_b32 a2, s2
-; SDAG-NEXT: v_accvgpr_write_b32 a3, s3
+; SDAG-NEXT: v_mov_b32_e32 v0, s8
+; SDAG-NEXT: v_mov_b32_e32 v1, s9
+; SDAG-NEXT: v_mov_b32_e32 v2, s10
+; SDAG-NEXT: v_mov_b32_e32 v3, s11
+; SDAG-NEXT: v_mov_b32_e32 v4, s12
+; SDAG-NEXT: v_mov_b32_e32 v5, s13
+; SDAG-NEXT: v_mov_b32_e32 v6, s14
+; SDAG-NEXT: v_mov_b32_e32 v7, s15
+; SDAG-NEXT: v_mov_b64_e32 v[18:19], s[2:3]
+; SDAG-NEXT: v_mov_b32_e32 v8, s16
+; SDAG-NEXT: v_mov_b32_e32 v9, s17
+; SDAG-NEXT: v_mov_b32_e32 v10, s18
+; SDAG-NEXT: v_mov_b32_e32 v11, s19
+; SDAG-NEXT: v_mov_b32_e32 v12, s20
+; SDAG-NEXT: v_mov_b32_e32 v13, s21
+; SDAG-NEXT: v_mov_b32_e32 v14, s22
+; SDAG-NEXT: v_mov_b32_e32 v15, s23
+; SDAG-NEXT: v_mov_b64_e32 v[16:17], s[0:1]
; SDAG-NEXT: s_nop 1
-; SDAG-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[2:9], v[10:17], a[0:3], s6, -2 op_sel:[1,1,0] op_sel_hi:[1,0,0]
+; SDAG-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 v[0:3], v[0:7], v[8:15], v[16:19], s6, -2 op_sel:[1,1,0] op_sel_hi:[1,0,0]
; SDAG-NEXT: s_nop 7
; SDAG-NEXT: s_nop 3
-; SDAG-NEXT: global_store_dwordx4 v0, a[0:3], s[4:5]
+; SDAG-NEXT: global_store_dwordx4 v20, v[0:3], s[4:5]
; SDAG-NEXT: s_endpgm
;
; GISEL-LABEL: test_mfma_scale_f32_16x16x128_f8f6f4__vgprcd___scaleA_kimm__scaleB__inlineimm:
; GISEL: ; %bb.0:
; GISEL-NEXT: s_load_dwordx16 s[8:23], s[4:5], 0x0
; GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x40
-; GISEL-NEXT: v_mov_b32_e32 v16, 0x41
+; GISEL-NEXT: v_mov_b32_e32 v20, 0x41
; GISEL-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x50
; GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GISEL-NEXT: v_mov_b64_e32 v[0:1], s[8:9]
@@ -2005,19 +2001,17 @@ define amdgpu_kernel void @test_mfma_scale_f32_16x16x128_f8f6f4__vgprcd___scaleA
; GISEL-NEXT: v_mov_b64_e32 v[4:5], s[12:13]
; GISEL-NEXT: v_mov_b64_e32 v[6:7], s[14:15]
; GISEL-NEXT: v_mov_b64_e32 v[8:9], s[16:17]
-; GISEL-NEXT: v_accvgpr_write_b32 a0, s0
+; GISEL-NEXT: v_mov_b64_e32 v[18:19], s[2:3]
; GISEL-NEXT: v_mov_b64_e32 v[10:11], s[18:19]
; GISEL-NEXT: v_mov_b64_e32 v[12:13], s[20:21]
; GISEL-NEXT: v_mov_b64_e32 v[14:15], s[22:23]
-; GISEL-NEXT: v_accvgpr_write_b32 a1, s1
-; GISEL-NEXT: v_accvgpr_write_b32 a2, s2
-; GISEL-NEXT: v_accvgpr_write_b32 a3, s3
+; GISEL-NEXT: v_mov_b64_e32 v[16:17], s[0:1]
; GISEL-NEXT: s_nop 1
-; GISEL-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], v16, -2 op_sel:[1,1,0] op_sel_hi:[1,0,0]
-; GISEL-NEXT: v_mov_b32_e32 v0, 0
+; GISEL-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 v[0:3], v[0:7], v[8:15], v[16:19], v20, -2 op_sel:[1,1,0] op_sel_hi:[1,0,0]
+; GISEL-NEXT: v_mov_b32_e32 v4, 0
; GISEL-NEXT: s_nop 7
; GISEL-NEXT: s_nop 2
-; GISEL-NEXT: global_store_dwordx4 v0, a[0:3], s[4:5]
+; GISEL-NEXT: global_store_dwordx4 v4, v[0:3], s[4:5]
; GISEL-NEXT: s_endpgm
%result = call <4 x float> @llvm.amdgcn.mfma.scale.f32.16x16x128.f8f6f4.v8i32.v8i32(<8 x i32> %arg0, <8 x i32> %arg1, <4 x float> %arg2, i32 0, i32 0, i32 3, i32 65, i32 1, i32 -2)
store <4 x float> %result, ptr addrspace(1) %ptr, align 16
@@ -2031,40 +2025,38 @@ define amdgpu_kernel void @test_mfma_scale_f32_16x16x128_f8f6f4__vgprcd___scaleA
; SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x40
; SDAG-NEXT: s_movk_i32 s6, 0x41
; SDAG-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x50
-; SDAG-NEXT: v_mov_b32_e32 v0, 0
+; SDAG-NEXT: v_mov_b32_e32 v20, 0
; SDAG-NEXT: s_waitcnt lgkmcnt(0)
-; SDAG-NEXT: v_mov_b32_e32 v2, s8
-; SDAG-NEXT: v_mov_b32_e32 v3, s9
-; SDAG-NEXT: v_mov_b32_e32 v4, s10
-; SDAG-NEXT: v_mov_b32_e32 v5, s11
-; SDAG-NEXT: v_mov_b32_e32 v6, s12
-; SDAG-NEXT: v_mov_b32_e32 v7, s13
-; SDAG-NEXT: v_mov_b32_e32 v8, s14
-; SDAG-NEXT: v_mov_b32_e32 v9, s15
-; SDAG-NEXT: v_accvgpr_write_b32 a0, s0
-; SDAG-NEXT: v_mov_b32_e32 v10, s16
-; SDAG-NEXT: v_mov_b32_e32 v11, s17
-; SDAG-NEXT: v_mov_b32_e32 v12, s18
-; SDAG-NEXT: v_mov_b32_e32 v13, s19
-; SDAG-NEXT: v_mov_b32_e32 v14, s20
-; SDAG-NEXT: v_mov_b32_e32 v15, s21
-; SDAG-NEXT: v_mov_b32_e32 v16, s22
-; SDAG-NEXT: v_mov_b32_e32 v17, s23
-; SDAG-NEXT: v_accvgpr_write_b32 a1, s1
-; SDAG-NEXT: v_accvgpr_write_b32 a2, s2
-; SDAG-NEXT: v_accvgpr_write_b32 a3, s3
+; SDAG-NEXT: v_mov_b32_e32 v0, s8
+; SDAG-NEXT: v_mov_b32_e32 v1, s9
+; SDAG-NEXT: v_mov_b32_e32 v2, s10
+; SDAG-NEXT: v_mov_b32_e32 v3, s11
+; SDAG-NEXT: v_mov_b32_e32 v4, s12
+; SDAG-NEXT: v_mov_b32_e32 v5, s13
+; SDAG-NEXT: v_mov_b32_e32 v6, s14
+; SDAG-NEXT: v_mov_b32_e32 v7, s15
+; SDAG-NEXT: v_mov_b64_e32 v[18:19], s[2:3]
+; SDAG-NEXT: v_mov_b32_e32 v8, s16
+; SDAG-NEXT: v_mov_b32_e32 v9, s17
+; SDAG-NEXT: v_mov_b32_e32 v10, s18
+; SDAG-NEXT: v_mov_b32_e32 v11, s19
+; SDAG-NEXT: v_mov_b32_e32 v12, s20
+; SDAG-NEXT: v_mov_b32_e32 v13, s21
+; SDAG-NEXT: v_mov_b32_e32 v14, s22
+; SDAG-NEXT: v_mov_b32_e32 v15, s23
+; SDAG-NEXT: v_mov_b64_e32 v[16:17], s[0:1]
; SDAG-NEXT: s_nop 1
-; SDAG-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[2:9], v[10:17], a[0:3], s6, 1.0 op_sel:[1,1,0] op_sel_hi:[1,0,0]
+; SDAG-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 v[0:3], v[0:7], v[8:15], v[16:19], s6, 1.0 op_sel:[1,1,0] op_sel_hi:[1,0,0]
; SDAG-NEXT: s_nop 7
; SDAG-NEXT: s_nop 3
-; SDAG-NEXT: global_store_dwordx4 v0, a[0:3], s[4:5]
+; SDAG-NEXT: global_store_dwordx4 v20, v[0:3], s[4:5]
; SDAG-NEXT: s_endpgm
;
; GISEL-LABEL: test_mfma_scale_f32_16x16x128_f8f6f4__vgprcd___scaleA_kimm__scaleB__FP_literal:
; GISEL: ; %bb.0:
; GISEL-NEXT: s_load_dwordx16 s[8:23], s[4:5], 0x0
; GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x40
-; GISEL-NEXT: v_mov_b32_e32 v16, 0x41
+; GISEL-NEXT: v_mov_b32_e32 v20, 0x41
; GISEL-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x50
; GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GISEL-NEXT: v_mov_b64_e32 v[0:1], s[8:9]
@@ -2072,19 +2064,17 @@ define amdgpu_kernel void @test_mfma_scale_f32_16x16x128_f8f6f4__vgprcd___scaleA
; GISEL-NEXT: v_mov_b64_e32 v[4:5], s[12:13]
; GISEL-NEXT: v_mov_b64_e32 v[6:7], s[14:15]
; GISEL-NEXT: v_mov_b64_e32 v[8:9], s[16:17]
-; GISEL-NEXT: v_accvgpr_write_b32 a0, s0
+; GISEL-NEXT: v_mov_b64_e32 v[18:19], s[2:3]
; GISEL-NEXT: v_mov_b64_e32 v[10:11], s[18:19]
; GISEL-NEXT: v_mov_b64_e32 v[12:13], s[20:21]
; GISEL-NEXT: v_mov_b64_e32 v[14:15], s[22:23]
-; GISEL-NEXT: v_accvgpr_write_b32 a1, s1
-; GISEL-NEXT: v_accvgpr_write_b32 a2, s2
-; GISEL-NEXT: v_accvgpr_write_b32 a3, s3
+; GISEL-NEXT: v_mov_b64_e32 v[16:17], s[0:1]
; GISEL-NEXT: s_nop 1
-; GISEL-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], v16, 1.0 op_sel:[1,1,0] op_sel_hi:[1,0,0]
-; GISEL-NEXT: v_mov_b32_e32 v0, 0
+; GISEL-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 v[0:3], v[0:7], v[8:15], v[16:19], v20, 1.0 op_sel:[1,1,0] op_sel_hi:[1,0,0]
+; GISEL-NEXT: v_mov_b32_e32 v4, 0
; GISEL-NEXT: s_nop 7
; GISEL-NEXT: s_nop 2
-; GISEL-NEXT: global_store_dwordx4 v0, a[0:3], s[4:5]
+; GISEL-NEXT: global_store_dwordx4 v4, v[0:3], s[4:5]
; GISEL-NEXT: s_endpgm
%result = call <4 x float> @llvm.amdgcn.mfma.scale.f32.16x16x128.f8f6f4.v8i32.v8i32(<8 x i32> %arg0, <8 x i32> %arg1, <4 x float> %arg2, i32 0, i32 0, i32 3, i32 65, i32 1, i32 1065353216)
store <4 x float> %result, ptr addrspace(1) %ptr, align 16
@@ -2096,34 +2086,32 @@ define amdgpu_kernel void @test_mfma_scale_f32_16x16x128_f8f6f4__vgprcd___scaleA
; SDAG: ; %bb.0:
; SDAG-NEXT: s_load_dwordx16 s[8:23], s[4:5], 0x0
; SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x40
-; SDAG-NEXT: v_mov_b32_e32 v0, 0
+; SDAG-NEXT: v_mov_b32_e32 v20, 0
; SDAG-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x50
; SDAG-NEXT: s_waitcnt lgkmcnt(0)
-; SDAG-NEXT: v_mov_b32_e32 v2, s8
-; SDAG-NEXT: v_mov_b32_e32 v3, s9
-; SDAG-NEXT: v_mov_b32_e32 v4, s10
-; SDAG-NEXT: v_mov_b32_e32 v5, s11
-; SDAG-NEXT: v_mov_b32_e32 v6, s12
-; SDAG-NEXT: v_mov_b32_e32 v7, s13
-; SDAG-NEXT: v_mov_b32_e32 v8, s14
-; SDAG-NEXT: v_mov_b32_e32 v9, s15
-; SDAG-NEXT: v_accvgpr_write_b32 a0, s0
-; SDAG-NEXT: v_mov_b32_e32 v10, s16
-; SDAG-NEXT: v_mov_b32_e32 v11, s17
-; SDAG-NEXT: v_mov_b32_e32 v12, s18
-; SDAG-NEXT: v_mov_b32_e32 v13, s19
-; SDAG-NEXT: v_mov_b32_e32 v14, s20
-; SDAG-NEXT: v_mov_b32_e32 v15, s21
-; SDAG-NEXT: v_mov_b32_e32 v16, s22
-; SDAG-NEXT: v_mov_b32_e32 v17, s23
-; SDAG-NEXT: v_accvgpr_write_b32 a1, s1
-; SDAG-NEXT: v_accvgpr_write_b32 a2, s2
-; SDAG-NEXT: v_accvgpr_write_b32 a3, s3
+; SDAG-NEXT: v_mov_b32_e32 v0, s8
+; SDAG-NEXT: v_mov_b32_e32 v1, s9
+; SDAG-NEXT: v_mov_b32_e32 v2, s10
+; SDAG-NEXT: v_mov_b32_e32 v3, s11
+; SDAG-NEXT: v_mov_b32_e32 v4, s12
+; SDAG-NEXT: v_mov_b32_e32 v5, s13
+; SDAG-NEXT: v_mov_b32_e32 v6, s14
+; SDAG-NEXT: v_mov_b32_e32 v7, s15
+; SDAG-NEXT: v_mov_b64_e32 v[18:19], s[2:3]
+; SDAG-NEXT: v_mov_b32_e32 v8, s16
+; SDAG-NEXT: v_mov_b32_e32 v9, s17
+; SDAG-NEXT: v_mov_b32_e32 v10, s18
+; SDAG-NEXT: v_mov_b32_e32 v11, s19
+; SDAG-NEXT: v_mov_b32_e32 v12, s20
+; SDAG-NEXT: v_mov_b32_e32 v13, s21
+; SDAG-NEXT: v_mov_b32_e32 v14, s22
+; SDAG-NEXT: v_mov_b32_e32 v15, s23
+; SDAG-NEXT: v_mov_b64_e32 v[16:17], s[0:1]
; SDAG-NEXT: s_nop 1
-; SDAG-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[2:9], v[10:17], a[0:3], 1.0, -2 op_sel:[1,1,0] op_sel_hi:[1,0,0]
+; SDAG-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 v[0:3], v[0:7], v[8:15], v[16:19], 1.0, -2 op_sel:[1,1,0] op_sel_hi:[1,0,0]
; SDAG-NEXT: s_nop 7
; SDAG-NEXT: s_nop 3
-; SDAG-NEXT: global_store_dwordx4 v0, a[0:3], s[4:5]
+; SDAG-NEXT: global_store_dwordx4 v20, v[0:3], s[4:5]
; SDAG-NEXT: s_endpgm
;
; GISEL-LABEL: test_mfma_scale_f32_16x16x128_f8f6f4__vgprcd___scaleA_FP_literal__scaleB__inline_imm:
@@ -2136,21 +2124,19 @@ define amdgpu_kernel void @test_mfma_scale_f32_16x16x128_f8f6f4__vgprcd___scaleA
; GISEL-NEXT: v_mov_b64_e32 v[4:5], s[12:13]
; GISEL-NEXT: v_mov_b64_e32 v[6:7], s[14:15]
; GISEL-NEXT: v_mov_b64_e32 v[8:9], s[16:17]
-; GISEL-NEXT: v_accvgpr_write_b32 a0, s0
+; GISEL-NEXT: v_mov_b64_e32 v[18:19], s[2:3]
; GISEL-NEXT: v_mov_b64_e32 v[10:11], s[18:19]
; GISEL-NEXT: v_mov_b64_e32 v[12:13], s[20:21]
; GISEL-NEXT: v_mov_b64_e32 v[14:15], s[22:23]
-; GISEL-NEXT: v_accvgpr_write_b32 a1, s1
-; GISEL-NEXT: v_accvgpr_write_b32 a2, s2
-; GISEL-NEXT: v_accvgpr_write_b32 a3, s3
+; GISEL-NEXT: v_mov_b64_e32 v[16:17], s[0:1]
; GISEL-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x50
; GISEL-NEXT: s_nop 0
-; GISEL-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], 1.0, -2 op_sel:[1,1,0] op_sel_hi:[1,0,0]
-; GISEL-NEXT: v_mov_b32_e32 v0, 0
+; GISEL-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 v[0:3], v[0:7], v[8:15], v[16:19], 1.0, -2 op_sel:[1,1,0] op_sel_hi:[1,0,0]
+; GISEL-NEXT: v_mov_b32_e32 v4, 0
; GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GISEL-NEXT: s_nop 7
; GISEL-NEXT: s_nop 1
-; GISEL-NEXT: global_store_dwordx4 v0, a[0:3], s[4:5]
+; GISEL-NEXT: global_store_dwordx4 v4, v[0:3], s[4:5]
; GISEL-NEXT: s_endpgm
%result = call <4 x float> @llvm.amdgcn.mfma.scale.f32.16x16x128.f8f6f4.v8i32.v8i32(<8 x i32> %arg0, <8 x i32> %arg1, <4 x float> %arg2, i32 0, i32 0, i32 3, i32 1065353216, i32 1, i32 -2)
store <4 x float> %result, ptr addrspace(1) %ptr, align 16
@@ -2162,34 +2148,32 @@ define amdgpu_kernel void @test_mfma_scale_f32_16x16x128_f8f6f4__vgprcd___scaleA
; SDAG: ; %bb.0:
; SDAG-NEXT: s_load_dwordx16 s[8:23], s[4:5], 0x0
; SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x40
-; SDAG-NEXT: v_mov_b32_e32 v0, 0
+; SDAG-NEXT: v_mov_b32_e32 v20, 0
; SDAG-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x50
; SDAG-NEXT: s_waitcnt lgkmcnt(0)
-; SDAG-NEXT: v_mov_b32_e32 v2, s8
-; SDAG-NEXT: v_mov_b32_e32 v3, s9
-; SDAG-NEXT: v_mov_b32_e32 v4, s10
-; SDAG-NEXT: v_mov_b32_e32 v5, s11
-; SDAG-NEXT: v_mov_b32_e32 v6, s12
-; SDAG-NEXT: v_mov_b32_e32 v7, s13
-; SDAG-NEXT: v_mov_b32_e32 v8, s14
-; SDAG-NEXT: v_mov_b32_e32 v9, s15
-; SDAG-NEXT: v_accvgpr_write_b32 a0, s0
-; SDAG-NEXT: v_mov_b32_e32 v10, s16
-; SDAG-NEXT: v_mov_b32_e32 v11, s17
-; SDAG-NEXT: v_mov_b32_e32 v12, s18
-; SDAG-NEXT: v_mov_b32_e32 v13, s19
-; SDAG-NEXT: v_mov_b32_e32 v14, s20
-; SDAG-NEXT: v_mov_b32_e32 v15, s21
-; SDAG-NEXT: v_mov_b32_e32 v16, s22
-; SDAG-NEXT: v_mov_b32_e32 v17, s23
-; SDAG-NEXT: v_accvgpr_write_b32 a1, s1
-; SDAG-NEXT: v_accvgpr_write_b32 a2, s2
-; SDAG-NEXT: v_accvgpr_write_b32 a3, s3
+; SDAG-NEXT: v_mov_b32_e32 v0, s8
+; SDAG-NEXT: v_mov_b32_e32 v1, s9
+; SDAG-NEXT: v_mov_b32_e32 v2, s10
+; SDAG-NEXT: v_mov_b32_e32 v3, s11
+; SDAG-NEXT: v_mov_b32_e32 v4, s12
+; SDAG-NEXT: v_mov_b32_e32 v5, s13
+; SDAG-NEXT: v_mov_b32_e32 v6, s14
+; SDAG-NEXT: v_mov_b32_e32 v7, s15
+; SDAG-NEXT: v_mov_b64_e32 v[18:19], s[2:3]
+; SDAG-NEXT: v_mov_b32_e32 v8, s16
+; SDAG-NEXT: v_mov_b32_e32 v9, s17
+; SDAG-NEXT: v_mov_b32_e32 v10, s18
+; SDAG-NEXT: v_mov_b32_e32 v11, s19
+; SDAG-NEXT: v_mov_b32_e32 v12, s20
+; SDAG-NEXT: v_mov_b32_e32 v13, s21
+; SDAG-NEXT: v_mov_b32_e32 v14, s22
+; SDAG-NEXT: v_mov_b32_e32 v15, s23
+; SDAG-NEXT: v_mov_b64_e32 v[16:17], s[0:1]
; SDAG-NEXT: s_nop 1
-; SDAG-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[2:9], v[10:17], a[0:3], 1.0, 0.15915494 op_sel:[1,1,0] op_sel_hi:[1,0,0]
+; SDAG-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 v[0:3], v[0:7], v[8:15], v[16:19], 1.0, 0.15915494 op_sel:[1,1,0] op_sel_hi:[1,0,0]
; SDAG-NEXT: s_nop 7
; SDAG-NEXT: s_nop 3
-; SDAG-NEXT: global_store_dwordx4 v0, a[0:3], s[4:5]
+; SDAG-NEXT: global_store_dwordx4 v20, v[0:3], s[4:5]
; SDAG-NEXT: s_endpgm
;
; GISEL-LABEL: test_mfma_scale_f32_16x16x128_f8f6f4__vgprcd___scaleA_FP_literal__scaleB__FP_literal:
@@ -2202,21 +2186,19 @@ define amdgpu_kernel void @test_mfma_scale_f32_16x16x128_f8f6f4__vgprcd___scaleA
; GISEL-NEXT: v_mov_b64_e32 v[4:5], s[12:13]
; GISEL-NEXT: v_mov_b64_e32 v[6:7], s[14:15]
; GISEL-NEXT: v_mov_b64_e32 v[8:9], s[16:17]
-; GISEL-NEXT: v_accvgpr_write_b32 a0, s0
+; GISEL-NEXT: v_mov_b64_e32 v[18:19], s[2:3]
; GISEL-NEXT: v_mov_b64_e32 v[10:11], s[18:19]
; GISEL-NEXT: v_mov_b64_e32 v[12:13], s[20:21]
; GISEL-NEXT: v_mov_b64_e32 v[14:15], s[22:23]
-; GISEL-NEXT: v_accvgpr_write_b32 a1, s1
-; GISEL-NEXT: v_accvgpr_write_b32 a2, s2
-; GISEL-NEXT: v_accvgpr_write_b32 a3, s3
+; GISEL-NEXT: v_mov_b64_e32 v[16:17], s[0:1]
; GISEL-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x50
; GISEL-NEXT: s_nop 0
-; GISEL-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 a[0:3], v[0:7], v[8:15], a[0:3], 1.0, 0.15915494 op_sel:[1,1,0] op_sel_hi:[1,0,0]
-; GISEL-NEXT: v_mov_b32_e32 v0, 0
+; GISEL-NEXT: v_mfma_scale_f32_16x16x128_f8f6f4 v[0:3], v[0:7], v[8:15], v[16:19], 1.0, 0.15915494 op_sel:[1,1,0] op_sel_hi:[1,0,0]
+; GISEL-NEXT: v_mov_b32_e32 v4, 0
; GISEL-NEXT: s_waitcnt lgkmcnt(0)
; GISEL-NEXT: s_nop 7
; GISEL-NEXT: s_nop 1
-; GISEL-NEXT: global_store_dwordx4 v0, a[0:3], s[4:5]
+; GISEL-NEXT: global_store_dwordx4 v4, v[0:3], s[4:5]
; GISEL-NEXT: s_endpgm
%result = call <4 x float> @llvm.amdgcn.mfma.scale.f32.16x16x128.f8f6f4.v8i32.v8i32(<8 x i32> %arg0, <8 x i32> %arg1, <4 x float> %arg2, i32 0, i32 0, i32 3, i32 1065353216, i32 1, i32 1042479491)
store <4 x float> %result, ptr addrspace(1) %ptr, align 16
@@ -2559,5 +2541,5 @@ declare <4 x float> @llvm.amdgcn.mfma.scale.f32.16x16x128.f8f6f4.v6i32.v8i32(<6
declare <4 x float> @llvm.amdgcn.mfma.scale.f32.16x16x128.f8f6f4.v8i32.v4i32(<8 x i32>, <4 x i32>, <4 x float>, i32 immarg, i32 immarg, i32 immarg, i32, i32 immarg, i32) #1
declare <4 x float> @llvm.amdgcn.mfma.scale.f32.16x16x128.f8f6f4.v8i32.v6i32(<8 x i32>, <6 x i32>, <4 x float>, i32 immarg, i32 immarg, i32 immarg, i32, i32 immarg, i32) #1
-attributes #0 = { "amdgpu-flat-work-group-size"="512,512" }
+attributes #0 = { "amdgpu-flat-work-group-size"="512,512" "amdgpu-agpr-alloc"="0,0" }
attributes #1 = { convergent nocallback nofree nosync nounwind willreturn memory(none) }
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.scale.f32.32x32x64.f8f6f4.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.scale.f32.32x32x64.f8f6f4.ll
index bc50058..0b2818f 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.scale.f32.32x32x64.f8f6f4.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.scale.f32.32x32x64.f8f6f4.ll
@@ -4539,49 +4539,41 @@ define amdgpu_kernel void @test_mfma_scale_f32_32x32x64_f8f6f4__vgprcd(<8 x i32>
; SDAG-NEXT: s_load_dwordx16 s[36:51], s[4:5], 0x40
; SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x80
; SDAG-NEXT: s_waitcnt lgkmcnt(0)
-; SDAG-NEXT: v_accvgpr_write_b32 a0, s36
-; SDAG-NEXT: v_mov_b32_e32 v2, s8
-; SDAG-NEXT: v_mov_b32_e32 v3, s9
-; SDAG-NEXT: v_mov_b32_e32 v4, s10
-; SDAG-NEXT: v_mov_b32_e32 v5, s11
-; SDAG-NEXT: v_mov_b32_e32 v6, s12
-; SDAG-NEXT: v_mov_b32_e32 v7, s13
-; SDAG-NEXT: v_mov_b32_e32 v8, s14
-; SDAG-NEXT: v_mov_b32_e32 v9, s15
-; SDAG-NEXT: v_mov_b32_e32 v10, s16
-; SDAG-NEXT: v_mov_b32_e32 v11, s17
-; SDAG-NEXT: v_mov_b32_e32 v12, s18
-; SDAG-NEXT: v_mov_b32_e32 v13, s19
-; SDAG-NEXT: v_mov_b32_e32 v14, s20
-; SDAG-NEXT: v_mov_b32_e32 v15, s21
-; SDAG-NEXT: v_mov_b32_e32 v16, s22
-; SDAG-NEXT: v_mov_b32_e32 v17, s23
-; SDAG-NEXT: v_accvgpr_write_b32 a1, s37
-; SDAG-NEXT: v_accvgpr_write_b32 a2, s38
-; SDAG-NEXT: v_accvgpr_write_b32 a3, s39
-; SDAG-NEXT: v_accvgpr_write_b32 a4, s40
-; SDAG-NEXT: v_accvgpr_write_b32 a5, s41
-; SDAG-NEXT: v_accvgpr_write_b32 a6, s42
-; SDAG-NEXT: v_accvgpr_write_b32 a7, s43
-; SDAG-NEXT: v_accvgpr_write_b32 a8, s44
-; SDAG-NEXT: v_accvgpr_write_b32 a9, s45
-; SDAG-NEXT: v_accvgpr_write_b32 a10, s46
-; SDAG-NEXT: v_accvgpr_write_b32 a11, s47
-; SDAG-NEXT: v_accvgpr_write_b32 a12, s48
-; SDAG-NEXT: v_accvgpr_write_b32 a13, s49
-; SDAG-NEXT: v_accvgpr_write_b32 a14, s50
-; SDAG-NEXT: v_accvgpr_write_b32 a15, s51
-; SDAG-NEXT: v_mov_b32_e32 v0, s1
+; SDAG-NEXT: v_mov_b64_e32 v[0:1], s[36:37]
+; SDAG-NEXT: v_mov_b32_e32 v16, s8
+; SDAG-NEXT: v_mov_b32_e32 v17, s9
+; SDAG-NEXT: v_mov_b32_e32 v18, s10
+; SDAG-NEXT: v_mov_b32_e32 v19, s11
+; SDAG-NEXT: v_mov_b32_e32 v20, s12
+; SDAG-NEXT: v_mov_b32_e32 v21, s13
+; SDAG-NEXT: v_mov_b32_e32 v22, s14
+; SDAG-NEXT: v_mov_b32_e32 v23, s15
+; SDAG-NEXT: v_mov_b32_e32 v24, s16
+; SDAG-NEXT: v_mov_b32_e32 v25, s17
+; SDAG-NEXT: v_mov_b32_e32 v26, s18
+; SDAG-NEXT: v_mov_b32_e32 v27, s19
+; SDAG-NEXT: v_mov_b32_e32 v28, s20
+; SDAG-NEXT: v_mov_b32_e32 v29, s21
+; SDAG-NEXT: v_mov_b32_e32 v30, s22
+; SDAG-NEXT: v_mov_b32_e32 v31, s23
+; SDAG-NEXT: v_mov_b64_e32 v[2:3], s[38:39]
+; SDAG-NEXT: v_mov_b64_e32 v[4:5], s[40:41]
+; SDAG-NEXT: v_mov_b64_e32 v[6:7], s[42:43]
+; SDAG-NEXT: v_mov_b64_e32 v[8:9], s[44:45]
+; SDAG-NEXT: v_mov_b64_e32 v[10:11], s[46:47]
+; SDAG-NEXT: v_mov_b64_e32 v[12:13], s[48:49]
+; SDAG-NEXT: v_mov_b64_e32 v[14:15], s[50:51]
+; SDAG-NEXT: v_mov_b32_e32 v32, s1
; SDAG-NEXT: s_nop 1
-; SDAG-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 a[0:15], v[2:9], v[10:17], a[0:15], s0, v0 op_sel:[1,1,0] op_sel_hi:[1,0,0] blgp:2
-; SDAG-NEXT: v_mov_b32_e32 v0, 0
+; SDAG-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 v[0:15], v[16:23], v[24:31], v[0:15], s0, v32 op_sel:[1,1,0] op_sel_hi:[1,0,0] blgp:2
+; SDAG-NEXT: v_mov_b32_e32 v16, 0
; SDAG-NEXT: s_nop 7
; SDAG-NEXT: s_nop 7
; SDAG-NEXT: s_nop 2
-; SDAG-NEXT: global_store_dwordx4 v0, a[12:15], s[2:3] offset:48
-; SDAG-NEXT: global_store_dwordx4 v0, a[8:11], s[2:3] offset:32
-; SDAG-NEXT: global_store_dwordx4 v0, a[4:7], s[2:3] offset:16
-; SDAG-NEXT: global_store_dwordx4 v0, a[0:3], s[2:3]
+; SDAG-NEXT: global_store_dwordx4 v16, v[12:15], s[2:3] offset:48
+; SDAG-NEXT: global_store_dwordx4 v16, v[8:11], s[2:3] offset:32
+; SDAG-NEXT: global_store_dwordx4 v16, v[4:7], s[2:3] offset:16
+; SDAG-NEXT: global_store_dwordx4 v16, v[0:3], s[2:3]
; SDAG-NEXT: s_endpgm
;
; GISEL-LABEL: test_mfma_scale_f32_32x32x64_f8f6f4__vgprcd:
@@ -4590,41 +4582,33 @@ define amdgpu_kernel void @test_mfma_scale_f32_32x32x64_f8f6f4__vgprcd(<8 x i32>
; GISEL-NEXT: s_load_dwordx16 s[36:51], s[4:5], 0x40
; GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x80
; GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GISEL-NEXT: v_mov_b64_e32 v[0:1], s[8:9]
-; GISEL-NEXT: v_mov_b64_e32 v[2:3], s[10:11]
-; GISEL-NEXT: v_mov_b64_e32 v[4:5], s[12:13]
-; GISEL-NEXT: v_mov_b64_e32 v[6:7], s[14:15]
-; GISEL-NEXT: v_mov_b64_e32 v[8:9], s[16:17]
-; GISEL-NEXT: v_accvgpr_write_b32 a0, s36
-; GISEL-NEXT: v_mov_b64_e32 v[10:11], s[18:19]
-; GISEL-NEXT: v_mov_b64_e32 v[12:13], s[20:21]
-; GISEL-NEXT: v_mov_b64_e32 v[14:15], s[22:23]
-; GISEL-NEXT: v_accvgpr_write_b32 a1, s37
-; GISEL-NEXT: v_accvgpr_write_b32 a2, s38
-; GISEL-NEXT: v_accvgpr_write_b32 a3, s39
-; GISEL-NEXT: v_accvgpr_write_b32 a4, s40
-; GISEL-NEXT: v_accvgpr_write_b32 a5, s41
-; GISEL-NEXT: v_accvgpr_write_b32 a6, s42
-; GISEL-NEXT: v_accvgpr_write_b32 a7, s43
-; GISEL-NEXT: v_accvgpr_write_b32 a8, s44
-; GISEL-NEXT: v_accvgpr_write_b32 a9, s45
-; GISEL-NEXT: v_accvgpr_write_b32 a10, s46
-; GISEL-NEXT: v_accvgpr_write_b32 a11, s47
-; GISEL-NEXT: v_accvgpr_write_b32 a12, s48
-; GISEL-NEXT: v_accvgpr_write_b32 a13, s49
-; GISEL-NEXT: v_accvgpr_write_b32 a14, s50
-; GISEL-NEXT: v_accvgpr_write_b32 a15, s51
-; GISEL-NEXT: v_mov_b32_e32 v16, s1
+; GISEL-NEXT: v_mov_b64_e32 v[22:23], s[14:15]
+; GISEL-NEXT: v_mov_b64_e32 v[20:21], s[12:13]
+; GISEL-NEXT: v_mov_b64_e32 v[18:19], s[10:11]
+; GISEL-NEXT: v_mov_b64_e32 v[16:17], s[8:9]
+; GISEL-NEXT: v_mov_b64_e32 v[30:31], s[22:23]
+; GISEL-NEXT: v_mov_b64_e32 v[0:1], s[36:37]
+; GISEL-NEXT: v_mov_b64_e32 v[28:29], s[20:21]
+; GISEL-NEXT: v_mov_b64_e32 v[26:27], s[18:19]
+; GISEL-NEXT: v_mov_b64_e32 v[24:25], s[16:17]
+; GISEL-NEXT: v_mov_b64_e32 v[2:3], s[38:39]
+; GISEL-NEXT: v_mov_b64_e32 v[4:5], s[40:41]
+; GISEL-NEXT: v_mov_b64_e32 v[6:7], s[42:43]
+; GISEL-NEXT: v_mov_b64_e32 v[8:9], s[44:45]
+; GISEL-NEXT: v_mov_b64_e32 v[10:11], s[46:47]
+; GISEL-NEXT: v_mov_b64_e32 v[12:13], s[48:49]
+; GISEL-NEXT: v_mov_b64_e32 v[14:15], s[50:51]
+; GISEL-NEXT: v_mov_b32_e32 v32, s1
; GISEL-NEXT: s_nop 1
-; GISEL-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 a[0:15], v[0:7], v[8:15], a[0:15], s0, v16 op_sel:[1,1,0] op_sel_hi:[1,0,0] blgp:2
-; GISEL-NEXT: v_mov_b32_e32 v0, 0
+; GISEL-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 v[0:15], v[16:23], v[24:31], v[0:15], s0, v32 op_sel:[1,1,0] op_sel_hi:[1,0,0] blgp:2
+; GISEL-NEXT: v_mov_b32_e32 v16, 0
; GISEL-NEXT: s_nop 7
; GISEL-NEXT: s_nop 7
; GISEL-NEXT: s_nop 2
-; GISEL-NEXT: global_store_dwordx4 v0, a[0:3], s[2:3]
-; GISEL-NEXT: global_store_dwordx4 v0, a[4:7], s[2:3] offset:16
-; GISEL-NEXT: global_store_dwordx4 v0, a[8:11], s[2:3] offset:32
-; GISEL-NEXT: global_store_dwordx4 v0, a[12:15], s[2:3] offset:48
+; GISEL-NEXT: global_store_dwordx4 v16, v[0:3], s[2:3]
+; GISEL-NEXT: global_store_dwordx4 v16, v[4:7], s[2:3] offset:16
+; GISEL-NEXT: global_store_dwordx4 v16, v[8:11], s[2:3] offset:32
+; GISEL-NEXT: global_store_dwordx4 v16, v[12:15], s[2:3] offset:48
; GISEL-NEXT: s_endpgm
%result = call <16 x float> @llvm.amdgcn.mfma.scale.f32.32x32x64.f8f6f4.v8i32.v8i32(<8 x i32> %arg0, <8 x i32> %arg1, <16 x float> %arg2, i32 0, i32 2, i32 3, i32 %scale0, i32 1, i32 %scale1)
store <16 x float> %result, ptr addrspace(1) %ptr, align 64
@@ -4639,91 +4623,75 @@ define amdgpu_kernel void @test_mfma_scale_f32_32x32x64_f8f6f4__vgprcd___scaleA_
; SDAG-NEXT: s_movk_i32 s2, 0x41
; SDAG-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x80
; SDAG-NEXT: s_waitcnt lgkmcnt(0)
-; SDAG-NEXT: v_mov_b32_e32 v0, s8
-; SDAG-NEXT: v_mov_b32_e32 v1, s9
-; SDAG-NEXT: v_mov_b32_e32 v2, s10
-; SDAG-NEXT: v_mov_b32_e32 v3, s11
-; SDAG-NEXT: v_mov_b32_e32 v4, s12
-; SDAG-NEXT: v_mov_b32_e32 v5, s13
-; SDAG-NEXT: v_mov_b32_e32 v6, s14
-; SDAG-NEXT: v_mov_b32_e32 v7, s15
-; SDAG-NEXT: v_accvgpr_write_b32 a0, s36
-; SDAG-NEXT: v_mov_b32_e32 v8, s16
-; SDAG-NEXT: v_mov_b32_e32 v9, s17
-; SDAG-NEXT: v_mov_b32_e32 v10, s18
-; SDAG-NEXT: v_mov_b32_e32 v11, s19
-; SDAG-NEXT: v_mov_b32_e32 v12, s20
-; SDAG-NEXT: v_mov_b32_e32 v13, s21
-; SDAG-NEXT: v_mov_b32_e32 v14, s22
-; SDAG-NEXT: v_mov_b32_e32 v15, s23
-; SDAG-NEXT: v_accvgpr_write_b32 a1, s37
-; SDAG-NEXT: v_accvgpr_write_b32 a2, s38
-; SDAG-NEXT: v_accvgpr_write_b32 a3, s39
-; SDAG-NEXT: v_accvgpr_write_b32 a4, s40
-; SDAG-NEXT: v_accvgpr_write_b32 a5, s41
-; SDAG-NEXT: v_accvgpr_write_b32 a6, s42
-; SDAG-NEXT: v_accvgpr_write_b32 a7, s43
-; SDAG-NEXT: v_accvgpr_write_b32 a8, s44
-; SDAG-NEXT: v_accvgpr_write_b32 a9, s45
-; SDAG-NEXT: v_accvgpr_write_b32 a10, s46
-; SDAG-NEXT: v_accvgpr_write_b32 a11, s47
-; SDAG-NEXT: v_accvgpr_write_b32 a12, s48
-; SDAG-NEXT: v_accvgpr_write_b32 a13, s49
-; SDAG-NEXT: v_accvgpr_write_b32 a14, s50
-; SDAG-NEXT: v_accvgpr_write_b32 a15, s51
+; SDAG-NEXT: v_mov_b32_e32 v16, s8
+; SDAG-NEXT: v_mov_b32_e32 v17, s9
+; SDAG-NEXT: v_mov_b32_e32 v18, s10
+; SDAG-NEXT: v_mov_b32_e32 v19, s11
+; SDAG-NEXT: v_mov_b32_e32 v20, s12
+; SDAG-NEXT: v_mov_b32_e32 v21, s13
+; SDAG-NEXT: v_mov_b32_e32 v22, s14
+; SDAG-NEXT: v_mov_b32_e32 v23, s15
+; SDAG-NEXT: v_mov_b64_e32 v[0:1], s[36:37]
+; SDAG-NEXT: v_mov_b32_e32 v24, s16
+; SDAG-NEXT: v_mov_b32_e32 v25, s17
+; SDAG-NEXT: v_mov_b32_e32 v26, s18
+; SDAG-NEXT: v_mov_b32_e32 v27, s19
+; SDAG-NEXT: v_mov_b32_e32 v28, s20
+; SDAG-NEXT: v_mov_b32_e32 v29, s21
+; SDAG-NEXT: v_mov_b32_e32 v30, s22
+; SDAG-NEXT: v_mov_b32_e32 v31, s23
+; SDAG-NEXT: v_mov_b64_e32 v[2:3], s[38:39]
+; SDAG-NEXT: v_mov_b64_e32 v[4:5], s[40:41]
+; SDAG-NEXT: v_mov_b64_e32 v[6:7], s[42:43]
+; SDAG-NEXT: v_mov_b64_e32 v[8:9], s[44:45]
+; SDAG-NEXT: v_mov_b64_e32 v[10:11], s[46:47]
+; SDAG-NEXT: v_mov_b64_e32 v[12:13], s[48:49]
+; SDAG-NEXT: v_mov_b64_e32 v[14:15], s[50:51]
; SDAG-NEXT: s_nop 1
-; SDAG-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 a[0:15], v[0:7], v[8:15], a[0:15], s2, -2 op_sel:[1,1,0] op_sel_hi:[1,0,0] blgp:2
-; SDAG-NEXT: v_mov_b32_e32 v0, 0
+; SDAG-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 v[0:15], v[16:23], v[24:31], v[0:15], s2, -2 op_sel:[1,1,0] op_sel_hi:[1,0,0] blgp:2
+; SDAG-NEXT: v_mov_b32_e32 v16, 0
; SDAG-NEXT: s_nop 7
; SDAG-NEXT: s_nop 7
; SDAG-NEXT: s_nop 2
-; SDAG-NEXT: global_store_dwordx4 v0, a[12:15], s[0:1] offset:48
-; SDAG-NEXT: global_store_dwordx4 v0, a[8:11], s[0:1] offset:32
-; SDAG-NEXT: global_store_dwordx4 v0, a[4:7], s[0:1] offset:16
-; SDAG-NEXT: global_store_dwordx4 v0, a[0:3], s[0:1]
+; SDAG-NEXT: global_store_dwordx4 v16, v[12:15], s[0:1] offset:48
+; SDAG-NEXT: global_store_dwordx4 v16, v[8:11], s[0:1] offset:32
+; SDAG-NEXT: global_store_dwordx4 v16, v[4:7], s[0:1] offset:16
+; SDAG-NEXT: global_store_dwordx4 v16, v[0:3], s[0:1]
; SDAG-NEXT: s_endpgm
;
; GISEL-LABEL: test_mfma_scale_f32_32x32x64_f8f6f4__vgprcd___scaleA_kimm__scaleB__inlineimm:
; GISEL: ; %bb.0:
; GISEL-NEXT: s_load_dwordx16 s[8:23], s[4:5], 0x0
; GISEL-NEXT: s_load_dwordx16 s[36:51], s[4:5], 0x40
-; GISEL-NEXT: v_mov_b32_e32 v16, 0x41
+; GISEL-NEXT: v_mov_b32_e32 v32, 0x41
; GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x80
; GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GISEL-NEXT: v_mov_b64_e32 v[0:1], s[8:9]
-; GISEL-NEXT: v_mov_b64_e32 v[2:3], s[10:11]
-; GISEL-NEXT: v_mov_b64_e32 v[4:5], s[12:13]
-; GISEL-NEXT: v_mov_b64_e32 v[6:7], s[14:15]
-; GISEL-NEXT: v_mov_b64_e32 v[8:9], s[16:17]
-; GISEL-NEXT: v_accvgpr_write_b32 a0, s36
-; GISEL-NEXT: v_mov_b64_e32 v[10:11], s[18:19]
-; GISEL-NEXT: v_mov_b64_e32 v[12:13], s[20:21]
-; GISEL-NEXT: v_mov_b64_e32 v[14:15], s[22:23]
-; GISEL-NEXT: v_accvgpr_write_b32 a1, s37
-; GISEL-NEXT: v_accvgpr_write_b32 a2, s38
-; GISEL-NEXT: v_accvgpr_write_b32 a3, s39
-; GISEL-NEXT: v_accvgpr_write_b32 a4, s40
-; GISEL-NEXT: v_accvgpr_write_b32 a5, s41
-; GISEL-NEXT: v_accvgpr_write_b32 a6, s42
-; GISEL-NEXT: v_accvgpr_write_b32 a7, s43
-; GISEL-NEXT: v_accvgpr_write_b32 a8, s44
-; GISEL-NEXT: v_accvgpr_write_b32 a9, s45
-; GISEL-NEXT: v_accvgpr_write_b32 a10, s46
-; GISEL-NEXT: v_accvgpr_write_b32 a11, s47
-; GISEL-NEXT: v_accvgpr_write_b32 a12, s48
-; GISEL-NEXT: v_accvgpr_write_b32 a13, s49
-; GISEL-NEXT: v_accvgpr_write_b32 a14, s50
-; GISEL-NEXT: v_accvgpr_write_b32 a15, s51
+; GISEL-NEXT: v_mov_b64_e32 v[22:23], s[14:15]
+; GISEL-NEXT: v_mov_b64_e32 v[20:21], s[12:13]
+; GISEL-NEXT: v_mov_b64_e32 v[18:19], s[10:11]
+; GISEL-NEXT: v_mov_b64_e32 v[16:17], s[8:9]
+; GISEL-NEXT: v_mov_b64_e32 v[30:31], s[22:23]
+; GISEL-NEXT: v_mov_b64_e32 v[0:1], s[36:37]
+; GISEL-NEXT: v_mov_b64_e32 v[28:29], s[20:21]
+; GISEL-NEXT: v_mov_b64_e32 v[26:27], s[18:19]
+; GISEL-NEXT: v_mov_b64_e32 v[24:25], s[16:17]
+; GISEL-NEXT: v_mov_b64_e32 v[2:3], s[38:39]
+; GISEL-NEXT: v_mov_b64_e32 v[4:5], s[40:41]
+; GISEL-NEXT: v_mov_b64_e32 v[6:7], s[42:43]
+; GISEL-NEXT: v_mov_b64_e32 v[8:9], s[44:45]
+; GISEL-NEXT: v_mov_b64_e32 v[10:11], s[46:47]
+; GISEL-NEXT: v_mov_b64_e32 v[12:13], s[48:49]
+; GISEL-NEXT: v_mov_b64_e32 v[14:15], s[50:51]
; GISEL-NEXT: s_nop 1
-; GISEL-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 a[0:15], v[0:7], v[8:15], a[0:15], v16, -2 op_sel:[1,1,0] op_sel_hi:[1,0,0] blgp:2
-; GISEL-NEXT: v_mov_b32_e32 v0, 0
+; GISEL-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 v[0:15], v[16:23], v[24:31], v[0:15], v32, -2 op_sel:[1,1,0] op_sel_hi:[1,0,0] blgp:2
+; GISEL-NEXT: v_mov_b32_e32 v16, 0
; GISEL-NEXT: s_nop 7
; GISEL-NEXT: s_nop 7
; GISEL-NEXT: s_nop 2
-; GISEL-NEXT: global_store_dwordx4 v0, a[0:3], s[0:1]
-; GISEL-NEXT: global_store_dwordx4 v0, a[4:7], s[0:1] offset:16
-; GISEL-NEXT: global_store_dwordx4 v0, a[8:11], s[0:1] offset:32
-; GISEL-NEXT: global_store_dwordx4 v0, a[12:15], s[0:1] offset:48
+; GISEL-NEXT: global_store_dwordx4 v16, v[0:3], s[0:1]
+; GISEL-NEXT: global_store_dwordx4 v16, v[4:7], s[0:1] offset:16
+; GISEL-NEXT: global_store_dwordx4 v16, v[8:11], s[0:1] offset:32
+; GISEL-NEXT: global_store_dwordx4 v16, v[12:15], s[0:1] offset:48
; GISEL-NEXT: s_endpgm
%result = call <16 x float> @llvm.amdgcn.mfma.scale.f32.32x32x64.f8f6f4.v8i32.v8i32(<8 x i32> %arg0, <8 x i32> %arg1, <16 x float> %arg2, i32 0, i32 2, i32 3, i32 65, i32 1, i32 -2)
store <16 x float> %result, ptr addrspace(1) %ptr, align 64
@@ -5031,77 +4999,72 @@ define amdgpu_kernel void @test_mfma_scale_f32_32x32x64_f8f6f4_0_0__vgprcd_nonma
; SDAG: ; %bb.0:
; SDAG-NEXT: s_load_dwordx16 s[12:27], s[4:5], 0x0
; SDAG-NEXT: s_waitcnt lgkmcnt(0)
-; SDAG-NEXT: v_mov_b32_e32 v0, s12
-; SDAG-NEXT: v_mov_b32_e32 v1, s13
-; SDAG-NEXT: v_mov_b32_e32 v2, s14
-; SDAG-NEXT: v_mov_b32_e32 v3, s15
-; SDAG-NEXT: v_mov_b32_e32 v4, s16
-; SDAG-NEXT: v_mov_b32_e32 v5, s17
-; SDAG-NEXT: v_mov_b32_e32 v6, s18
-; SDAG-NEXT: v_mov_b32_e32 v7, s19
-; SDAG-NEXT: v_mov_b32_e32 v8, s20
-; SDAG-NEXT: v_mov_b32_e32 v9, s21
-; SDAG-NEXT: v_mov_b32_e32 v10, s22
-; SDAG-NEXT: v_mov_b32_e32 v11, s23
+; SDAG-NEXT: v_mov_b32_e32 v32, s12
+; SDAG-NEXT: v_mov_b32_e32 v33, s13
+; SDAG-NEXT: v_mov_b32_e32 v34, s14
+; SDAG-NEXT: v_mov_b32_e32 v35, s15
+; SDAG-NEXT: v_mov_b32_e32 v36, s16
+; SDAG-NEXT: v_mov_b32_e32 v37, s17
+; SDAG-NEXT: v_mov_b32_e32 v38, s18
+; SDAG-NEXT: v_mov_b32_e32 v39, s19
+; SDAG-NEXT: v_mov_b32_e32 v40, s20
+; SDAG-NEXT: v_mov_b32_e32 v41, s21
+; SDAG-NEXT: v_mov_b32_e32 v42, s22
+; SDAG-NEXT: v_mov_b32_e32 v43, s23
; SDAG-NEXT: s_load_dwordx16 s[8:23], s[4:5], 0x40
-; SDAG-NEXT: v_mov_b32_e32 v12, s24
-; SDAG-NEXT: v_mov_b32_e32 v13, s25
-; SDAG-NEXT: v_mov_b32_e32 v14, s26
-; SDAG-NEXT: v_mov_b32_e32 v15, s27
+; SDAG-NEXT: v_mov_b32_e32 v44, s24
+; SDAG-NEXT: v_mov_b32_e32 v45, s25
+; SDAG-NEXT: v_mov_b32_e32 v46, s26
+; SDAG-NEXT: v_mov_b32_e32 v47, s27
; SDAG-NEXT: s_waitcnt lgkmcnt(0)
-; SDAG-NEXT: v_accvgpr_write_b32 a31, s23
-; SDAG-NEXT: v_accvgpr_write_b32 a30, s22
-; SDAG-NEXT: v_accvgpr_write_b32 a29, s21
-; SDAG-NEXT: v_accvgpr_write_b32 a28, s20
-; SDAG-NEXT: v_accvgpr_write_b32 a27, s19
-; SDAG-NEXT: v_accvgpr_write_b32 a26, s18
-; SDAG-NEXT: v_accvgpr_write_b32 a25, s17
-; SDAG-NEXT: v_accvgpr_write_b32 a24, s16
-; SDAG-NEXT: v_accvgpr_write_b32 a23, s15
-; SDAG-NEXT: v_accvgpr_write_b32 a22, s14
-; SDAG-NEXT: v_accvgpr_write_b32 a21, s13
-; SDAG-NEXT: v_accvgpr_write_b32 a20, s12
-; SDAG-NEXT: v_accvgpr_write_b32 a19, s11
-; SDAG-NEXT: v_accvgpr_write_b32 a18, s10
-; SDAG-NEXT: v_accvgpr_write_b32 a17, s9
-; SDAG-NEXT: v_accvgpr_write_b32 a16, s8
+; SDAG-NEXT: v_mov_b64_e32 v[30:31], s[22:23]
+; SDAG-NEXT: v_mov_b64_e32 v[28:29], s[20:21]
+; SDAG-NEXT: v_mov_b64_e32 v[26:27], s[18:19]
+; SDAG-NEXT: v_mov_b64_e32 v[24:25], s[16:17]
+; SDAG-NEXT: v_mov_b64_e32 v[22:23], s[14:15]
+; SDAG-NEXT: v_mov_b64_e32 v[20:21], s[12:13]
+; SDAG-NEXT: v_mov_b64_e32 v[18:19], s[10:11]
+; SDAG-NEXT: v_mov_b64_e32 v[16:17], s[8:9]
; SDAG-NEXT: s_nop 1
-; SDAG-NEXT: v_mfma_f32_32x32x64_f8f6f4 a[0:15], v[0:7], v[8:15], a[16:31] blgp:2
-; SDAG-NEXT: v_mov_b32_e32 v2, s20
-; SDAG-NEXT: v_mov_b32_e32 v3, s21
-; SDAG-NEXT: v_mov_b32_e32 v4, s22
-; SDAG-NEXT: v_mov_b32_e32 v5, s23
-; SDAG-NEXT: v_mov_b64_e32 v[0:1], 48
-; SDAG-NEXT: global_store_dwordx4 v[0:1], v[2:5], off sc0 sc1
+; SDAG-NEXT: v_mfma_f32_32x32x64_f8f6f4 v[0:15], v[32:39], v[40:47], v[16:31] blgp:2
+; SDAG-NEXT: s_nop 7
+; SDAG-NEXT: s_nop 6
+; SDAG-NEXT: v_mov_b32_e32 v16, s20
+; SDAG-NEXT: v_mov_b32_e32 v17, s21
+; SDAG-NEXT: v_mov_b32_e32 v18, s22
+; SDAG-NEXT: v_mov_b32_e32 v19, s23
+; SDAG-NEXT: v_mov_b64_e32 v[20:21], 48
+; SDAG-NEXT: global_store_dwordx4 v[20:21], v[16:19], off sc0 sc1
; SDAG-NEXT: s_waitcnt vmcnt(0)
-; SDAG-NEXT: v_mov_b32_e32 v6, s18
-; SDAG-NEXT: v_mov_b32_e32 v7, s19
-; SDAG-NEXT: v_mov_b32_e32 v4, s16
-; SDAG-NEXT: v_mov_b32_e32 v5, s17
-; SDAG-NEXT: v_mov_b64_e32 v[2:3], 32
-; SDAG-NEXT: global_store_dwordx4 v[2:3], v[4:7], off sc0 sc1
+; SDAG-NEXT: v_mov_b64_e32 v[22:23], 32
+; SDAG-NEXT: v_mov_b64_e32 v[24:25], 16
+; SDAG-NEXT: v_mov_b32_e32 v16, s16
+; SDAG-NEXT: v_mov_b32_e32 v17, s17
+; SDAG-NEXT: v_mov_b32_e32 v18, s18
+; SDAG-NEXT: v_mov_b32_e32 v19, s19
+; SDAG-NEXT: global_store_dwordx4 v[22:23], v[16:19], off sc0 sc1
; SDAG-NEXT: s_waitcnt vmcnt(0)
-; SDAG-NEXT: v_mov_b32_e32 v8, s14
-; SDAG-NEXT: v_mov_b32_e32 v9, s15
-; SDAG-NEXT: v_mov_b32_e32 v6, s12
-; SDAG-NEXT: v_mov_b32_e32 v7, s13
-; SDAG-NEXT: v_mov_b64_e32 v[4:5], 16
-; SDAG-NEXT: global_store_dwordx4 v[4:5], v[6:9], off sc0 sc1
+; SDAG-NEXT: v_mov_b64_e32 v[26:27], 0
+; SDAG-NEXT: v_mov_b32_e32 v16, s12
+; SDAG-NEXT: v_mov_b32_e32 v17, s13
+; SDAG-NEXT: v_mov_b32_e32 v18, s14
+; SDAG-NEXT: v_mov_b32_e32 v19, s15
+; SDAG-NEXT: global_store_dwordx4 v[24:25], v[16:19], off sc0 sc1
; SDAG-NEXT: s_waitcnt vmcnt(0)
-; SDAG-NEXT: v_mov_b32_e32 v10, s10
-; SDAG-NEXT: v_mov_b32_e32 v11, s11
-; SDAG-NEXT: v_mov_b32_e32 v8, s8
-; SDAG-NEXT: v_mov_b32_e32 v9, s9
-; SDAG-NEXT: v_mov_b64_e32 v[6:7], 0
-; SDAG-NEXT: global_store_dwordx4 v[6:7], v[8:11], off sc0 sc1
+; SDAG-NEXT: s_nop 0
+; SDAG-NEXT: v_mov_b32_e32 v16, s8
+; SDAG-NEXT: v_mov_b32_e32 v17, s9
+; SDAG-NEXT: v_mov_b32_e32 v18, s10
+; SDAG-NEXT: v_mov_b32_e32 v19, s11
+; SDAG-NEXT: global_store_dwordx4 v[26:27], v[16:19], off sc0 sc1
; SDAG-NEXT: s_waitcnt vmcnt(0)
-; SDAG-NEXT: global_store_dwordx4 v[2:3], a[8:11], off sc0 sc1
+; SDAG-NEXT: global_store_dwordx4 v[22:23], v[8:11], off sc0 sc1
; SDAG-NEXT: s_waitcnt vmcnt(0)
-; SDAG-NEXT: global_store_dwordx4 v[0:1], a[12:15], off sc0 sc1
+; SDAG-NEXT: global_store_dwordx4 v[20:21], v[12:15], off sc0 sc1
; SDAG-NEXT: s_waitcnt vmcnt(0)
-; SDAG-NEXT: global_store_dwordx4 v[6:7], a[0:3], off sc0 sc1
+; SDAG-NEXT: global_store_dwordx4 v[26:27], v[0:3], off sc0 sc1
; SDAG-NEXT: s_waitcnt vmcnt(0)
-; SDAG-NEXT: global_store_dwordx4 v[4:5], a[4:7], off sc0 sc1
+; SDAG-NEXT: global_store_dwordx4 v[24:25], v[4:7], off sc0 sc1
; SDAG-NEXT: s_waitcnt vmcnt(0)
; SDAG-NEXT: s_endpgm
;
@@ -5109,61 +5072,45 @@ define amdgpu_kernel void @test_mfma_scale_f32_32x32x64_f8f6f4_0_0__vgprcd_nonma
; GISEL: ; %bb.0:
; GISEL-NEXT: s_load_dwordx16 s[36:51], s[4:5], 0x0
; GISEL-NEXT: s_load_dwordx16 s[8:23], s[4:5], 0x40
-; GISEL-NEXT: v_mov_b64_e32 v[16:17], 0
-; GISEL-NEXT: v_mov_b64_e32 v[18:19], 16
-; GISEL-NEXT: v_mov_b64_e32 v[20:21], 32
; GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GISEL-NEXT: v_mov_b64_e32 v[0:1], s[36:37]
-; GISEL-NEXT: v_mov_b64_e32 v[2:3], s[38:39]
-; GISEL-NEXT: v_mov_b64_e32 v[4:5], s[40:41]
-; GISEL-NEXT: v_mov_b64_e32 v[6:7], s[42:43]
-; GISEL-NEXT: v_mov_b64_e32 v[8:9], s[44:45]
-; GISEL-NEXT: v_accvgpr_write_b32 a31, s23
-; GISEL-NEXT: v_mov_b64_e32 v[10:11], s[46:47]
-; GISEL-NEXT: v_mov_b64_e32 v[12:13], s[48:49]
-; GISEL-NEXT: v_mov_b64_e32 v[14:15], s[50:51]
-; GISEL-NEXT: v_accvgpr_write_b32 a30, s22
-; GISEL-NEXT: v_accvgpr_write_b32 a29, s21
-; GISEL-NEXT: v_accvgpr_write_b32 a28, s20
-; GISEL-NEXT: v_accvgpr_write_b32 a27, s19
-; GISEL-NEXT: v_accvgpr_write_b32 a26, s18
-; GISEL-NEXT: v_accvgpr_write_b32 a25, s17
-; GISEL-NEXT: v_accvgpr_write_b32 a24, s16
-; GISEL-NEXT: v_accvgpr_write_b32 a23, s15
-; GISEL-NEXT: v_accvgpr_write_b32 a22, s14
-; GISEL-NEXT: v_accvgpr_write_b32 a21, s13
-; GISEL-NEXT: v_accvgpr_write_b32 a20, s12
-; GISEL-NEXT: v_accvgpr_write_b32 a19, s11
-; GISEL-NEXT: v_accvgpr_write_b32 a18, s10
-; GISEL-NEXT: v_accvgpr_write_b32 a17, s9
-; GISEL-NEXT: v_accvgpr_write_b32 a16, s8
-; GISEL-NEXT: v_mov_b64_e32 v[22:23], 48
-; GISEL-NEXT: s_nop 0
-; GISEL-NEXT: v_mfma_f32_32x32x64_f8f6f4 a[0:15], v[0:7], v[8:15], a[16:31] blgp:2
-; GISEL-NEXT: v_mov_b64_e32 v[0:1], s[8:9]
-; GISEL-NEXT: v_mov_b64_e32 v[2:3], s[10:11]
-; GISEL-NEXT: v_mov_b64_e32 v[4:5], s[12:13]
-; GISEL-NEXT: v_mov_b64_e32 v[8:9], s[16:17]
-; GISEL-NEXT: v_mov_b64_e32 v[12:13], s[20:21]
-; GISEL-NEXT: v_mov_b64_e32 v[6:7], s[14:15]
-; GISEL-NEXT: v_mov_b64_e32 v[10:11], s[18:19]
-; GISEL-NEXT: v_mov_b64_e32 v[14:15], s[22:23]
-; GISEL-NEXT: global_store_dwordx4 v[16:17], v[0:3], off sc0 sc1
+; GISEL-NEXT: v_mov_b64_e32 v[32:33], s[36:37]
+; GISEL-NEXT: v_mov_b64_e32 v[34:35], s[38:39]
+; GISEL-NEXT: v_mov_b64_e32 v[36:37], s[40:41]
+; GISEL-NEXT: v_mov_b64_e32 v[38:39], s[42:43]
+; GISEL-NEXT: v_mov_b64_e32 v[40:41], s[44:45]
+; GISEL-NEXT: v_mov_b64_e32 v[30:31], s[22:23]
+; GISEL-NEXT: v_mov_b64_e32 v[42:43], s[46:47]
+; GISEL-NEXT: v_mov_b64_e32 v[44:45], s[48:49]
+; GISEL-NEXT: v_mov_b64_e32 v[46:47], s[50:51]
+; GISEL-NEXT: v_mov_b64_e32 v[28:29], s[20:21]
+; GISEL-NEXT: v_mov_b64_e32 v[26:27], s[18:19]
+; GISEL-NEXT: v_mov_b64_e32 v[24:25], s[16:17]
+; GISEL-NEXT: v_mov_b64_e32 v[22:23], s[14:15]
+; GISEL-NEXT: v_mov_b64_e32 v[20:21], s[12:13]
+; GISEL-NEXT: v_mov_b64_e32 v[18:19], s[10:11]
+; GISEL-NEXT: v_mov_b64_e32 v[16:17], s[8:9]
+; GISEL-NEXT: s_nop 1
+; GISEL-NEXT: v_mfma_f32_32x32x64_f8f6f4 v[0:15], v[32:39], v[40:47], v[16:31] blgp:2
+; GISEL-NEXT: v_mov_b64_e32 v[32:33], 0
+; GISEL-NEXT: v_mov_b64_e32 v[34:35], 16
+; GISEL-NEXT: v_mov_b64_e32 v[36:37], 32
+; GISEL-NEXT: v_mov_b64_e32 v[38:39], 48
+; GISEL-NEXT: global_store_dwordx4 v[32:33], v[16:19], off sc0 sc1
; GISEL-NEXT: s_waitcnt vmcnt(0)
-; GISEL-NEXT: global_store_dwordx4 v[18:19], v[4:7], off sc0 sc1
+; GISEL-NEXT: global_store_dwordx4 v[34:35], v[20:23], off sc0 sc1
; GISEL-NEXT: s_waitcnt vmcnt(0)
-; GISEL-NEXT: global_store_dwordx4 v[20:21], v[8:11], off sc0 sc1
+; GISEL-NEXT: global_store_dwordx4 v[36:37], v[24:27], off sc0 sc1
; GISEL-NEXT: s_waitcnt vmcnt(0)
-; GISEL-NEXT: global_store_dwordx4 v[22:23], v[12:15], off sc0 sc1
+; GISEL-NEXT: global_store_dwordx4 v[38:39], v[28:31], off sc0 sc1
; GISEL-NEXT: s_waitcnt vmcnt(0)
-; GISEL-NEXT: s_nop 3
-; GISEL-NEXT: global_store_dwordx4 v[16:17], a[0:3], off sc0 sc1
+; GISEL-NEXT: s_nop 7
+; GISEL-NEXT: global_store_dwordx4 v[32:33], v[0:3], off sc0 sc1
; GISEL-NEXT: s_waitcnt vmcnt(0)
-; GISEL-NEXT: global_store_dwordx4 v[18:19], a[4:7], off sc0 sc1
+; GISEL-NEXT: global_store_dwordx4 v[34:35], v[4:7], off sc0 sc1
; GISEL-NEXT: s_waitcnt vmcnt(0)
-; GISEL-NEXT: global_store_dwordx4 v[20:21], a[8:11], off sc0 sc1
+; GISEL-NEXT: global_store_dwordx4 v[36:37], v[8:11], off sc0 sc1
; GISEL-NEXT: s_waitcnt vmcnt(0)
-; GISEL-NEXT: global_store_dwordx4 v[22:23], a[12:15], off sc0 sc1
+; GISEL-NEXT: global_store_dwordx4 v[38:39], v[12:15], off sc0 sc1
; GISEL-NEXT: s_waitcnt vmcnt(0)
; GISEL-NEXT: s_endpgm
%result = call <16 x float> @llvm.amdgcn.mfma.scale.f32.32x32x64.f8f6f4.v8i32.v8i32(<8 x i32> %arg0, <8 x i32> %arg1, <16 x float> %arg2, i32 0, i32 2, i32 0, i32 0, i32 0, i32 0)
@@ -5177,77 +5124,70 @@ define amdgpu_kernel void @test_mfma_scale_f32_32x32x64_f8f6f4_25_42__vgprcd_non
; SDAG: ; %bb.0:
; SDAG-NEXT: s_load_dwordx16 s[12:27], s[4:5], 0x0
; SDAG-NEXT: s_waitcnt lgkmcnt(0)
-; SDAG-NEXT: v_mov_b32_e32 v0, s12
-; SDAG-NEXT: v_mov_b32_e32 v1, s13
-; SDAG-NEXT: v_mov_b32_e32 v2, s14
-; SDAG-NEXT: v_mov_b32_e32 v3, s15
-; SDAG-NEXT: v_mov_b32_e32 v4, s16
-; SDAG-NEXT: v_mov_b32_e32 v5, s17
-; SDAG-NEXT: v_mov_b32_e32 v6, s18
-; SDAG-NEXT: v_mov_b32_e32 v7, s19
-; SDAG-NEXT: v_mov_b32_e32 v8, s20
-; SDAG-NEXT: v_mov_b32_e32 v9, s21
-; SDAG-NEXT: v_mov_b32_e32 v10, s22
-; SDAG-NEXT: v_mov_b32_e32 v11, s23
+; SDAG-NEXT: v_mov_b32_e32 v16, s12
+; SDAG-NEXT: v_mov_b32_e32 v17, s13
+; SDAG-NEXT: v_mov_b32_e32 v18, s14
+; SDAG-NEXT: v_mov_b32_e32 v19, s15
+; SDAG-NEXT: v_mov_b32_e32 v20, s16
+; SDAG-NEXT: v_mov_b32_e32 v21, s17
+; SDAG-NEXT: v_mov_b32_e32 v22, s18
+; SDAG-NEXT: v_mov_b32_e32 v23, s19
+; SDAG-NEXT: v_mov_b32_e32 v24, s20
+; SDAG-NEXT: v_mov_b32_e32 v25, s21
+; SDAG-NEXT: v_mov_b32_e32 v26, s22
+; SDAG-NEXT: v_mov_b32_e32 v27, s23
; SDAG-NEXT: s_load_dwordx16 s[8:23], s[4:5], 0x40
-; SDAG-NEXT: v_mov_b32_e32 v12, s24
-; SDAG-NEXT: v_mov_b32_e32 v13, s25
-; SDAG-NEXT: v_mov_b32_e32 v14, s26
-; SDAG-NEXT: v_mov_b32_e32 v15, s27
+; SDAG-NEXT: v_mov_b32_e32 v28, s24
+; SDAG-NEXT: v_mov_b32_e32 v29, s25
+; SDAG-NEXT: v_mov_b32_e32 v30, s26
+; SDAG-NEXT: v_mov_b32_e32 v31, s27
; SDAG-NEXT: s_waitcnt lgkmcnt(0)
-; SDAG-NEXT: v_accvgpr_write_b32 a0, s8
-; SDAG-NEXT: v_accvgpr_write_b32 a1, s9
-; SDAG-NEXT: v_accvgpr_write_b32 a2, s10
-; SDAG-NEXT: v_accvgpr_write_b32 a3, s11
-; SDAG-NEXT: v_accvgpr_write_b32 a4, s12
-; SDAG-NEXT: v_accvgpr_write_b32 a5, s13
-; SDAG-NEXT: v_accvgpr_write_b32 a6, s14
-; SDAG-NEXT: v_accvgpr_write_b32 a7, s15
-; SDAG-NEXT: v_accvgpr_write_b32 a8, s16
-; SDAG-NEXT: v_accvgpr_write_b32 a9, s17
-; SDAG-NEXT: v_accvgpr_write_b32 a10, s18
-; SDAG-NEXT: v_accvgpr_write_b32 a11, s19
-; SDAG-NEXT: v_accvgpr_write_b32 a12, s20
-; SDAG-NEXT: v_accvgpr_write_b32 a13, s21
-; SDAG-NEXT: v_accvgpr_write_b32 a14, s22
-; SDAG-NEXT: v_accvgpr_write_b32 a15, s23
+; SDAG-NEXT: v_mov_b64_e32 v[0:1], s[8:9]
+; SDAG-NEXT: v_mov_b64_e32 v[2:3], s[10:11]
+; SDAG-NEXT: v_mov_b64_e32 v[4:5], s[12:13]
+; SDAG-NEXT: v_mov_b64_e32 v[6:7], s[14:15]
+; SDAG-NEXT: v_mov_b64_e32 v[8:9], s[16:17]
+; SDAG-NEXT: v_mov_b64_e32 v[10:11], s[18:19]
+; SDAG-NEXT: v_mov_b64_e32 v[12:13], s[20:21]
+; SDAG-NEXT: v_mov_b64_e32 v[14:15], s[22:23]
; SDAG-NEXT: s_nop 1
-; SDAG-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 a[0:15], v[0:7], v[8:15], a[0:15], 25, 42 op_sel_hi:[0,0,0] blgp:2
-; SDAG-NEXT: v_mov_b32_e32 v2, s20
-; SDAG-NEXT: v_mov_b32_e32 v3, s21
-; SDAG-NEXT: v_mov_b32_e32 v4, s22
-; SDAG-NEXT: v_mov_b32_e32 v5, s23
-; SDAG-NEXT: v_mov_b64_e32 v[0:1], 48
-; SDAG-NEXT: global_store_dwordx4 v[0:1], v[2:5], off sc0 sc1
+; SDAG-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 v[0:15], v[16:23], v[24:31], v[0:15], 25, 42 op_sel_hi:[0,0,0] blgp:2
+; SDAG-NEXT: v_mov_b32_e32 v16, s20
+; SDAG-NEXT: v_mov_b32_e32 v17, s21
+; SDAG-NEXT: v_mov_b32_e32 v18, s22
+; SDAG-NEXT: v_mov_b32_e32 v19, s23
+; SDAG-NEXT: v_mov_b64_e32 v[20:21], 48
+; SDAG-NEXT: global_store_dwordx4 v[20:21], v[16:19], off sc0 sc1
; SDAG-NEXT: s_waitcnt vmcnt(0)
-; SDAG-NEXT: v_mov_b32_e32 v6, s18
-; SDAG-NEXT: v_mov_b32_e32 v7, s19
-; SDAG-NEXT: v_mov_b32_e32 v4, s16
-; SDAG-NEXT: v_mov_b32_e32 v5, s17
-; SDAG-NEXT: v_mov_b64_e32 v[2:3], 32
-; SDAG-NEXT: global_store_dwordx4 v[2:3], v[4:7], off sc0 sc1
+; SDAG-NEXT: v_mov_b64_e32 v[22:23], 32
+; SDAG-NEXT: v_mov_b64_e32 v[24:25], 16
+; SDAG-NEXT: v_mov_b32_e32 v16, s16
+; SDAG-NEXT: v_mov_b32_e32 v17, s17
+; SDAG-NEXT: v_mov_b32_e32 v18, s18
+; SDAG-NEXT: v_mov_b32_e32 v19, s19
+; SDAG-NEXT: global_store_dwordx4 v[22:23], v[16:19], off sc0 sc1
; SDAG-NEXT: s_waitcnt vmcnt(0)
-; SDAG-NEXT: v_mov_b32_e32 v8, s14
-; SDAG-NEXT: v_mov_b32_e32 v9, s15
-; SDAG-NEXT: v_mov_b32_e32 v6, s12
-; SDAG-NEXT: v_mov_b32_e32 v7, s13
-; SDAG-NEXT: v_mov_b64_e32 v[4:5], 16
-; SDAG-NEXT: global_store_dwordx4 v[4:5], v[6:9], off sc0 sc1
+; SDAG-NEXT: v_mov_b64_e32 v[26:27], 0
+; SDAG-NEXT: v_mov_b32_e32 v16, s12
+; SDAG-NEXT: v_mov_b32_e32 v17, s13
+; SDAG-NEXT: v_mov_b32_e32 v18, s14
+; SDAG-NEXT: v_mov_b32_e32 v19, s15
+; SDAG-NEXT: global_store_dwordx4 v[24:25], v[16:19], off sc0 sc1
; SDAG-NEXT: s_waitcnt vmcnt(0)
-; SDAG-NEXT: v_mov_b32_e32 v10, s10
-; SDAG-NEXT: v_mov_b32_e32 v11, s11
-; SDAG-NEXT: v_mov_b32_e32 v8, s8
-; SDAG-NEXT: v_mov_b32_e32 v9, s9
-; SDAG-NEXT: v_mov_b64_e32 v[6:7], 0
-; SDAG-NEXT: global_store_dwordx4 v[6:7], v[8:11], off sc0 sc1
+; SDAG-NEXT: s_nop 0
+; SDAG-NEXT: v_mov_b32_e32 v16, s8
+; SDAG-NEXT: v_mov_b32_e32 v17, s9
+; SDAG-NEXT: v_mov_b32_e32 v18, s10
+; SDAG-NEXT: v_mov_b32_e32 v19, s11
+; SDAG-NEXT: global_store_dwordx4 v[26:27], v[16:19], off sc0 sc1
; SDAG-NEXT: s_waitcnt vmcnt(0)
-; SDAG-NEXT: global_store_dwordx4 v[2:3], a[8:11], off sc0 sc1
+; SDAG-NEXT: global_store_dwordx4 v[22:23], v[8:11], off sc0 sc1
; SDAG-NEXT: s_waitcnt vmcnt(0)
-; SDAG-NEXT: global_store_dwordx4 v[0:1], a[12:15], off sc0 sc1
+; SDAG-NEXT: global_store_dwordx4 v[20:21], v[12:15], off sc0 sc1
; SDAG-NEXT: s_waitcnt vmcnt(0)
-; SDAG-NEXT: global_store_dwordx4 v[6:7], a[0:3], off sc0 sc1
+; SDAG-NEXT: global_store_dwordx4 v[26:27], v[0:3], off sc0 sc1
; SDAG-NEXT: s_waitcnt vmcnt(0)
-; SDAG-NEXT: global_store_dwordx4 v[4:5], a[4:7], off sc0 sc1
+; SDAG-NEXT: global_store_dwordx4 v[24:25], v[4:7], off sc0 sc1
; SDAG-NEXT: s_waitcnt vmcnt(0)
; SDAG-NEXT: s_endpgm
;
@@ -5255,61 +5195,53 @@ define amdgpu_kernel void @test_mfma_scale_f32_32x32x64_f8f6f4_25_42__vgprcd_non
; GISEL: ; %bb.0:
; GISEL-NEXT: s_load_dwordx16 s[36:51], s[4:5], 0x0
; GISEL-NEXT: s_load_dwordx16 s[8:23], s[4:5], 0x40
-; GISEL-NEXT: v_mov_b64_e32 v[16:17], 0
-; GISEL-NEXT: v_mov_b64_e32 v[18:19], 16
-; GISEL-NEXT: v_mov_b64_e32 v[20:21], 32
+; GISEL-NEXT: v_mov_b64_e32 v[32:33], 0
+; GISEL-NEXT: v_mov_b64_e32 v[34:35], 16
+; GISEL-NEXT: v_mov_b64_e32 v[36:37], 32
; GISEL-NEXT: s_waitcnt lgkmcnt(0)
-; GISEL-NEXT: v_mov_b64_e32 v[0:1], s[36:37]
-; GISEL-NEXT: v_mov_b64_e32 v[2:3], s[38:39]
-; GISEL-NEXT: v_mov_b64_e32 v[4:5], s[40:41]
-; GISEL-NEXT: v_mov_b64_e32 v[6:7], s[42:43]
-; GISEL-NEXT: v_mov_b64_e32 v[8:9], s[44:45]
-; GISEL-NEXT: v_accvgpr_write_b32 a0, s8
-; GISEL-NEXT: v_mov_b64_e32 v[10:11], s[46:47]
-; GISEL-NEXT: v_mov_b64_e32 v[12:13], s[48:49]
-; GISEL-NEXT: v_mov_b64_e32 v[14:15], s[50:51]
-; GISEL-NEXT: v_accvgpr_write_b32 a1, s9
-; GISEL-NEXT: v_accvgpr_write_b32 a2, s10
-; GISEL-NEXT: v_accvgpr_write_b32 a3, s11
-; GISEL-NEXT: v_accvgpr_write_b32 a4, s12
-; GISEL-NEXT: v_accvgpr_write_b32 a5, s13
-; GISEL-NEXT: v_accvgpr_write_b32 a6, s14
-; GISEL-NEXT: v_accvgpr_write_b32 a7, s15
-; GISEL-NEXT: v_accvgpr_write_b32 a8, s16
-; GISEL-NEXT: v_accvgpr_write_b32 a9, s17
-; GISEL-NEXT: v_accvgpr_write_b32 a10, s18
-; GISEL-NEXT: v_accvgpr_write_b32 a11, s19
-; GISEL-NEXT: v_accvgpr_write_b32 a12, s20
-; GISEL-NEXT: v_accvgpr_write_b32 a13, s21
-; GISEL-NEXT: v_accvgpr_write_b32 a14, s22
-; GISEL-NEXT: v_accvgpr_write_b32 a15, s23
-; GISEL-NEXT: v_mov_b64_e32 v[22:23], 48
-; GISEL-NEXT: s_nop 0
-; GISEL-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 a[0:15], v[0:7], v[8:15], a[0:15], 25, 42 op_sel_hi:[0,0,0] blgp:2
+; GISEL-NEXT: v_mov_b64_e32 v[16:17], s[36:37]
+; GISEL-NEXT: v_mov_b64_e32 v[18:19], s[38:39]
+; GISEL-NEXT: v_mov_b64_e32 v[20:21], s[40:41]
+; GISEL-NEXT: v_mov_b64_e32 v[22:23], s[42:43]
+; GISEL-NEXT: v_mov_b64_e32 v[24:25], s[44:45]
; GISEL-NEXT: v_mov_b64_e32 v[0:1], s[8:9]
+; GISEL-NEXT: v_mov_b64_e32 v[26:27], s[46:47]
+; GISEL-NEXT: v_mov_b64_e32 v[28:29], s[48:49]
+; GISEL-NEXT: v_mov_b64_e32 v[30:31], s[50:51]
; GISEL-NEXT: v_mov_b64_e32 v[2:3], s[10:11]
; GISEL-NEXT: v_mov_b64_e32 v[4:5], s[12:13]
-; GISEL-NEXT: v_mov_b64_e32 v[8:9], s[16:17]
-; GISEL-NEXT: v_mov_b64_e32 v[12:13], s[20:21]
; GISEL-NEXT: v_mov_b64_e32 v[6:7], s[14:15]
+; GISEL-NEXT: v_mov_b64_e32 v[8:9], s[16:17]
; GISEL-NEXT: v_mov_b64_e32 v[10:11], s[18:19]
+; GISEL-NEXT: v_mov_b64_e32 v[12:13], s[20:21]
; GISEL-NEXT: v_mov_b64_e32 v[14:15], s[22:23]
-; GISEL-NEXT: global_store_dwordx4 v[16:17], v[0:3], off sc0 sc1
+; GISEL-NEXT: v_mov_b64_e32 v[38:39], 48
+; GISEL-NEXT: s_nop 0
+; GISEL-NEXT: v_mfma_scale_f32_32x32x64_f8f6f4 v[0:15], v[16:23], v[24:31], v[0:15], 25, 42 op_sel_hi:[0,0,0] blgp:2
+; GISEL-NEXT: v_mov_b64_e32 v[18:19], s[10:11]
+; GISEL-NEXT: v_mov_b64_e32 v[16:17], s[8:9]
+; GISEL-NEXT: v_mov_b64_e32 v[22:23], s[14:15]
+; GISEL-NEXT: v_mov_b64_e32 v[26:27], s[18:19]
+; GISEL-NEXT: v_mov_b64_e32 v[30:31], s[22:23]
+; GISEL-NEXT: v_mov_b64_e32 v[20:21], s[12:13]
+; GISEL-NEXT: v_mov_b64_e32 v[24:25], s[16:17]
+; GISEL-NEXT: v_mov_b64_e32 v[28:29], s[20:21]
+; GISEL-NEXT: global_store_dwordx4 v[32:33], v[16:19], off sc0 sc1
; GISEL-NEXT: s_waitcnt vmcnt(0)
-; GISEL-NEXT: global_store_dwordx4 v[18:19], v[4:7], off sc0 sc1
+; GISEL-NEXT: global_store_dwordx4 v[34:35], v[20:23], off sc0 sc1
; GISEL-NEXT: s_waitcnt vmcnt(0)
-; GISEL-NEXT: global_store_dwordx4 v[20:21], v[8:11], off sc0 sc1
+; GISEL-NEXT: global_store_dwordx4 v[36:37], v[24:27], off sc0 sc1
; GISEL-NEXT: s_waitcnt vmcnt(0)
-; GISEL-NEXT: global_store_dwordx4 v[22:23], v[12:15], off sc0 sc1
+; GISEL-NEXT: global_store_dwordx4 v[38:39], v[28:31], off sc0 sc1
; GISEL-NEXT: s_waitcnt vmcnt(0)
; GISEL-NEXT: s_nop 3
-; GISEL-NEXT: global_store_dwordx4 v[16:17], a[0:3], off sc0 sc1
+; GISEL-NEXT: global_store_dwordx4 v[32:33], v[0:3], off sc0 sc1
; GISEL-NEXT: s_waitcnt vmcnt(0)
-; GISEL-NEXT: global_store_dwordx4 v[18:19], a[4:7], off sc0 sc1
+; GISEL-NEXT: global_store_dwordx4 v[34:35], v[4:7], off sc0 sc1
; GISEL-NEXT: s_waitcnt vmcnt(0)
-; GISEL-NEXT: global_store_dwordx4 v[20:21], a[8:11], off sc0 sc1
+; GISEL-NEXT: global_store_dwordx4 v[36:37], v[8:11], off sc0 sc1
; GISEL-NEXT: s_waitcnt vmcnt(0)
-; GISEL-NEXT: global_store_dwordx4 v[22:23], a[12:15], off sc0 sc1
+; GISEL-NEXT: global_store_dwordx4 v[38:39], v[12:15], off sc0 sc1
; GISEL-NEXT: s_waitcnt vmcnt(0)
; GISEL-NEXT: s_endpgm
%result = call <16 x float> @llvm.amdgcn.mfma.scale.f32.32x32x64.f8f6f4.v8i32.v8i32(<8 x i32> %arg0, <8 x i32> %arg1, <16 x float> %arg2, i32 0, i32 2, i32 0, i32 25, i32 0, i32 42)
@@ -6298,6 +6230,6 @@ declare <16 x float> @llvm.amdgcn.mfma.scale.f32.32x32x64.f8f6f4.v6i32.v8i32(<6
declare <16 x float> @llvm.amdgcn.mfma.scale.f32.32x32x64.f8f6f4.v8i32.v4i32(<8 x i32>, <4 x i32>, <16 x float>, i32 immarg, i32 immarg, i32 immarg, i32, i32 immarg, i32) #2
declare <16 x float> @llvm.amdgcn.mfma.scale.f32.32x32x64.f8f6f4.v8i32.v6i32(<8 x i32>, <6 x i32>, <16 x float>, i32 immarg, i32 immarg, i32 immarg, i32, i32 immarg, i32) #2
-attributes #0 = { "amdgpu-flat-work-group-size"="512,512" }
+attributes #0 = { "amdgpu-flat-work-group-size"="512,512" "amdgpu-agpr-alloc"="0,0" }
attributes #1 = { "amdgpu-flat-work-group-size"="128,128" }
attributes #2 = { convergent nocallback nofree nosync nounwind willreturn memory(none) }
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.xf32.gfx942.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.xf32.gfx942.ll
index fb1e46d..31a48de 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.xf32.gfx942.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mfma.xf32.gfx942.ll
@@ -1,13 +1,54 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
-; RUN: llc -mtriple=amdgcn -mcpu=gfx942 < %s | FileCheck --check-prefixes=GCN,GFX942 %s
-; RUN: llc -mtriple=amdgcn -mcpu=gfx942 -global-isel < %s | FileCheck --check-prefixes=GCN,GISEL %s
-; RUN: llc -mtriple=amdgcn -mcpu=gfx942 -stress-regalloc=10 < %s | FileCheck --check-prefixes=GCN,GFX942 %s
-; RUN: llc -mtriple=amdgcn -mcpu=gfx942 -stress-regalloc=10 -global-isel < %s | FileCheck --check-prefixes=GCN,GISEL %s
+; RUN: llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx942 < %s | FileCheck --check-prefixes=GFX942,GFX942-SDAG %s
+; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx942 < %s | FileCheck --check-prefixes=GFX942,GFX942-GISEL %s
declare <4 x float> @llvm.amdgcn.mfma.f32.16x16x8.xf32(<2 x float>, <2 x float>, <4 x float>, i32, i32, i32)
declare <16 x float> @llvm.amdgcn.mfma.f32.32x32x4.xf32(<2 x float>, <2 x float>, <16 x float>, i32, i32, i32)
define amdgpu_kernel void @test_mfma_f32_16x16x8xf32(ptr addrspace(1) %arg) #0 {
+; GFX942-SDAG-LABEL: test_mfma_f32_16x16x8xf32:
+; GFX942-SDAG: ; %bb.0: ; %bb
+; GFX942-SDAG-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x24
+; GFX942-SDAG-NEXT: v_mov_b32_e32 v4, 1.0
+; GFX942-SDAG-NEXT: v_mov_b32_e32 v5, 2.0
+; GFX942-SDAG-NEXT: v_mov_b32_e32 v0, 0x40400000
+; GFX942-SDAG-NEXT: v_mov_b32_e32 v1, 4.0
+; GFX942-SDAG-NEXT: s_waitcnt lgkmcnt(0)
+; GFX942-SDAG-NEXT: s_load_dwordx4 s[0:3], s[6:7], 0x0
+; GFX942-SDAG-NEXT: v_mov_b32_e32 v2, 0
+; GFX942-SDAG-NEXT: s_waitcnt lgkmcnt(0)
+; GFX942-SDAG-NEXT: v_accvgpr_write_b32 a0, s0
+; GFX942-SDAG-NEXT: v_accvgpr_write_b32 a1, s1
+; GFX942-SDAG-NEXT: v_accvgpr_write_b32 a2, s2
+; GFX942-SDAG-NEXT: v_accvgpr_write_b32 a3, s3
+; GFX942-SDAG-NEXT: s_nop 1
+; GFX942-SDAG-NEXT: v_mfma_f32_16x16x8_xf32 a[0:3], v[4:5], v[0:1], a[0:3] cbsz:1 abid:2 blgp:3
+; GFX942-SDAG-NEXT: s_nop 6
+; GFX942-SDAG-NEXT: global_store_dwordx4 v2, a[0:3], s[6:7]
+; GFX942-SDAG-NEXT: s_endpgm
+;
+; GFX942-GISEL-LABEL: test_mfma_f32_16x16x8xf32:
+; GFX942-GISEL: ; %bb.0: ; %bb
+; GFX942-GISEL-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x24
+; GFX942-GISEL-NEXT: s_mov_b32 s4, 1.0
+; GFX942-GISEL-NEXT: s_mov_b32 s5, 2.0
+; GFX942-GISEL-NEXT: v_mov_b64_e32 v[0:1], s[4:5]
+; GFX942-GISEL-NEXT: s_mov_b32 s4, 0x40400000
+; GFX942-GISEL-NEXT: s_waitcnt lgkmcnt(0)
+; GFX942-GISEL-NEXT: s_load_dwordx4 s[0:3], s[6:7], 0x0
+; GFX942-GISEL-NEXT: s_mov_b32 s5, 4.0
+; GFX942-GISEL-NEXT: v_mov_b64_e32 v[2:3], s[4:5]
+; GFX942-GISEL-NEXT: s_waitcnt lgkmcnt(0)
+; GFX942-GISEL-NEXT: v_accvgpr_write_b32 a0, s0
+; GFX942-GISEL-NEXT: v_accvgpr_write_b32 a1, s1
+; GFX942-GISEL-NEXT: v_accvgpr_write_b32 a2, s2
+; GFX942-GISEL-NEXT: v_accvgpr_write_b32 a3, s3
+; GFX942-GISEL-NEXT: s_nop 1
+; GFX942-GISEL-NEXT: v_mfma_f32_16x16x8_xf32 a[0:3], v[0:1], v[2:3], a[0:3] cbsz:1 abid:2 blgp:3
+; GFX942-GISEL-NEXT: v_mov_b32_e32 v0, 0
+; GFX942-GISEL-NEXT: s_nop 5
+; GFX942-GISEL-NEXT: global_store_dwordx4 v0, a[0:3], s[6:7]
+; GFX942-GISEL-NEXT: s_endpgm
bb:
%in.1 = load <4 x float>, ptr addrspace(1) %arg
%mai.1 = tail call <4 x float> @llvm.amdgcn.mfma.f32.16x16x8.xf32(<2 x float> <float 1.0, float 2.0>, <2 x float> <float 3.0, float 4.0>, <4 x float> %in.1, i32 1, i32 2, i32 3)
@@ -16,6 +57,81 @@ bb:
}
define amdgpu_kernel void @test_mfma_f32_32x32x4xf32(ptr addrspace(1) %arg) #0 {
+; GFX942-SDAG-LABEL: test_mfma_f32_32x32x4xf32:
+; GFX942-SDAG: ; %bb.0: ; %bb
+; GFX942-SDAG-NEXT: s_load_dwordx2 s[16:17], s[4:5], 0x24
+; GFX942-SDAG-NEXT: v_mov_b32_e32 v2, 1.0
+; GFX942-SDAG-NEXT: v_mov_b32_e32 v3, 2.0
+; GFX942-SDAG-NEXT: v_mov_b32_e32 v0, 0x40400000
+; GFX942-SDAG-NEXT: v_mov_b32_e32 v1, 4.0
+; GFX942-SDAG-NEXT: s_waitcnt lgkmcnt(0)
+; GFX942-SDAG-NEXT: s_load_dwordx16 s[0:15], s[16:17], 0x0
+; GFX942-SDAG-NEXT: s_waitcnt lgkmcnt(0)
+; GFX942-SDAG-NEXT: v_accvgpr_write_b32 a0, s0
+; GFX942-SDAG-NEXT: v_accvgpr_write_b32 a1, s1
+; GFX942-SDAG-NEXT: v_accvgpr_write_b32 a2, s2
+; GFX942-SDAG-NEXT: v_accvgpr_write_b32 a3, s3
+; GFX942-SDAG-NEXT: v_accvgpr_write_b32 a4, s4
+; GFX942-SDAG-NEXT: v_accvgpr_write_b32 a5, s5
+; GFX942-SDAG-NEXT: v_accvgpr_write_b32 a6, s6
+; GFX942-SDAG-NEXT: v_accvgpr_write_b32 a7, s7
+; GFX942-SDAG-NEXT: v_accvgpr_write_b32 a8, s8
+; GFX942-SDAG-NEXT: v_accvgpr_write_b32 a9, s9
+; GFX942-SDAG-NEXT: v_accvgpr_write_b32 a10, s10
+; GFX942-SDAG-NEXT: v_accvgpr_write_b32 a11, s11
+; GFX942-SDAG-NEXT: v_accvgpr_write_b32 a12, s12
+; GFX942-SDAG-NEXT: v_accvgpr_write_b32 a13, s13
+; GFX942-SDAG-NEXT: v_accvgpr_write_b32 a14, s14
+; GFX942-SDAG-NEXT: v_accvgpr_write_b32 a15, s15
+; GFX942-SDAG-NEXT: s_nop 1
+; GFX942-SDAG-NEXT: v_mfma_f32_32x32x4_xf32 a[0:15], v[2:3], v[0:1], a[0:15] cbsz:1 abid:2 blgp:3
+; GFX942-SDAG-NEXT: v_mov_b32_e32 v0, 0
+; GFX942-SDAG-NEXT: s_nop 7
+; GFX942-SDAG-NEXT: s_nop 1
+; GFX942-SDAG-NEXT: global_store_dwordx4 v0, a[12:15], s[16:17] offset:48
+; GFX942-SDAG-NEXT: global_store_dwordx4 v0, a[8:11], s[16:17] offset:32
+; GFX942-SDAG-NEXT: global_store_dwordx4 v0, a[4:7], s[16:17] offset:16
+; GFX942-SDAG-NEXT: global_store_dwordx4 v0, a[0:3], s[16:17]
+; GFX942-SDAG-NEXT: s_endpgm
+;
+; GFX942-GISEL-LABEL: test_mfma_f32_32x32x4xf32:
+; GFX942-GISEL: ; %bb.0: ; %bb
+; GFX942-GISEL-NEXT: s_load_dwordx2 s[16:17], s[4:5], 0x24
+; GFX942-GISEL-NEXT: s_mov_b32 s18, 1.0
+; GFX942-GISEL-NEXT: s_mov_b32 s19, 2.0
+; GFX942-GISEL-NEXT: v_mov_b64_e32 v[0:1], s[18:19]
+; GFX942-GISEL-NEXT: s_mov_b32 s18, 0x40400000
+; GFX942-GISEL-NEXT: s_waitcnt lgkmcnt(0)
+; GFX942-GISEL-NEXT: s_load_dwordx16 s[0:15], s[16:17], 0x0
+; GFX942-GISEL-NEXT: s_mov_b32 s19, 4.0
+; GFX942-GISEL-NEXT: v_mov_b64_e32 v[2:3], s[18:19]
+; GFX942-GISEL-NEXT: s_waitcnt lgkmcnt(0)
+; GFX942-GISEL-NEXT: v_accvgpr_write_b32 a0, s0
+; GFX942-GISEL-NEXT: v_accvgpr_write_b32 a1, s1
+; GFX942-GISEL-NEXT: v_accvgpr_write_b32 a2, s2
+; GFX942-GISEL-NEXT: v_accvgpr_write_b32 a3, s3
+; GFX942-GISEL-NEXT: v_accvgpr_write_b32 a4, s4
+; GFX942-GISEL-NEXT: v_accvgpr_write_b32 a5, s5
+; GFX942-GISEL-NEXT: v_accvgpr_write_b32 a6, s6
+; GFX942-GISEL-NEXT: v_accvgpr_write_b32 a7, s7
+; GFX942-GISEL-NEXT: v_accvgpr_write_b32 a8, s8
+; GFX942-GISEL-NEXT: v_accvgpr_write_b32 a9, s9
+; GFX942-GISEL-NEXT: v_accvgpr_write_b32 a10, s10
+; GFX942-GISEL-NEXT: v_accvgpr_write_b32 a11, s11
+; GFX942-GISEL-NEXT: v_accvgpr_write_b32 a12, s12
+; GFX942-GISEL-NEXT: v_accvgpr_write_b32 a13, s13
+; GFX942-GISEL-NEXT: v_accvgpr_write_b32 a14, s14
+; GFX942-GISEL-NEXT: v_accvgpr_write_b32 a15, s15
+; GFX942-GISEL-NEXT: s_nop 1
+; GFX942-GISEL-NEXT: v_mfma_f32_32x32x4_xf32 a[0:15], v[0:1], v[2:3], a[0:15] cbsz:1 abid:2 blgp:3
+; GFX942-GISEL-NEXT: v_mov_b32_e32 v0, 0
+; GFX942-GISEL-NEXT: s_nop 7
+; GFX942-GISEL-NEXT: s_nop 1
+; GFX942-GISEL-NEXT: global_store_dwordx4 v0, a[0:3], s[16:17]
+; GFX942-GISEL-NEXT: global_store_dwordx4 v0, a[4:7], s[16:17] offset:16
+; GFX942-GISEL-NEXT: global_store_dwordx4 v0, a[8:11], s[16:17] offset:32
+; GFX942-GISEL-NEXT: global_store_dwordx4 v0, a[12:15], s[16:17] offset:48
+; GFX942-GISEL-NEXT: s_endpgm
bb:
%in.1 = load <16 x float>, ptr addrspace(1) %arg
%mai.1 = tail call <16 x float> @llvm.amdgcn.mfma.f32.32x32x4.xf32(<2 x float> <float 1.0, float 2.0>, <2 x float> <float 3.0, float 4.0>, <16 x float> %in.1, i32 1, i32 2, i32 3)
@@ -25,6 +141,4 @@ bb:
attributes #0 = { "amdgpu-flat-work-group-size"="1,256" }
;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
-; GCN: {{.*}}
; GFX942: {{.*}}
-; GISEL: {{.*}}
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.smfmac.gfx950.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.smfmac.gfx950.ll
index 8056881..b25fe83 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.smfmac.gfx950.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.smfmac.gfx950.ll
@@ -17,24 +17,24 @@ define amdgpu_kernel void @test_smfmac_f32_16x16x64_f16__vgpr(ptr addrspace(1) %
; SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x34
; SDAG-NEXT: v_and_b32_e32 v0, 0x3ff, v0
; SDAG-NEXT: v_lshlrev_b32_e32 v0, 4, v0
-; SDAG-NEXT: v_mov_b32_e32 v12, 0
+; SDAG-NEXT: v_mov_b32_e32 v16, 0
; SDAG-NEXT: s_waitcnt lgkmcnt(0)
-; SDAG-NEXT: global_load_dwordx4 v[14:17], v0, s[6:7]
+; SDAG-NEXT: global_load_dwordx4 v[8:11], v0, s[6:7]
; SDAG-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0x44
; SDAG-NEXT: s_load_dword s16, s[4:5], 0x64
-; SDAG-NEXT: v_mov_b64_e32 v[10:11], s[2:3]
-; SDAG-NEXT: v_mov_b64_e32 v[8:9], s[0:1]
+; SDAG-NEXT: v_mov_b64_e32 v[14:15], s[2:3]
+; SDAG-NEXT: v_mov_b64_e32 v[12:13], s[0:1]
; SDAG-NEXT: s_waitcnt lgkmcnt(0)
; SDAG-NEXT: v_mov_b64_e32 v[0:1], s[8:9]
; SDAG-NEXT: v_mov_b64_e32 v[2:3], s[10:11]
; SDAG-NEXT: v_mov_b64_e32 v[4:5], s[12:13]
; SDAG-NEXT: v_mov_b64_e32 v[6:7], s[14:15]
-; SDAG-NEXT: v_mov_b32_e32 v13, s16
+; SDAG-NEXT: v_mov_b32_e32 v17, s16
; SDAG-NEXT: s_waitcnt vmcnt(0)
; SDAG-NEXT: s_nop 0
-; SDAG-NEXT: v_smfmac_f32_16x16x64_f16 v[14:17], v[8:11], v[0:7], v13 cbsz:1 abid:2
+; SDAG-NEXT: v_smfmac_f32_16x16x64_f16 v[8:11], v[12:15], v[0:7], v17 cbsz:1 abid:2
; SDAG-NEXT: s_nop 7
-; SDAG-NEXT: global_store_dwordx4 v12, v[14:17], s[6:7]
+; SDAG-NEXT: global_store_dwordx4 v16, v[8:11], s[6:7]
; SDAG-NEXT: s_endpgm
;
; GISEL-LABEL: test_smfmac_f32_16x16x64_f16__vgpr:
@@ -547,24 +547,24 @@ define amdgpu_kernel void @test_smfmac_f32_16x16x64_bf16__vgpr(ptr addrspace(1)
; GCN-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x34
; GCN-NEXT: v_and_b32_e32 v0, 0x3ff, v0
; GCN-NEXT: v_lshlrev_b32_e32 v0, 4, v0
-; GCN-NEXT: v_mov_b32_e32 v12, 0
+; GCN-NEXT: v_mov_b32_e32 v16, 0
; GCN-NEXT: s_waitcnt lgkmcnt(0)
-; GCN-NEXT: global_load_dwordx4 v[14:17], v0, s[6:7]
+; GCN-NEXT: global_load_dwordx4 v[8:11], v0, s[6:7]
; GCN-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0x44
; GCN-NEXT: s_load_dword s16, s[4:5], 0x64
-; GCN-NEXT: v_mov_b64_e32 v[10:11], s[2:3]
-; GCN-NEXT: v_mov_b64_e32 v[8:9], s[0:1]
+; GCN-NEXT: v_mov_b64_e32 v[14:15], s[2:3]
+; GCN-NEXT: v_mov_b64_e32 v[12:13], s[0:1]
; GCN-NEXT: s_waitcnt lgkmcnt(0)
; GCN-NEXT: v_mov_b64_e32 v[0:1], s[8:9]
; GCN-NEXT: v_mov_b64_e32 v[2:3], s[10:11]
; GCN-NEXT: v_mov_b64_e32 v[4:5], s[12:13]
; GCN-NEXT: v_mov_b64_e32 v[6:7], s[14:15]
-; GCN-NEXT: v_mov_b32_e32 v13, s16
+; GCN-NEXT: v_mov_b32_e32 v17, s16
; GCN-NEXT: s_waitcnt vmcnt(0)
; GCN-NEXT: s_nop 0
-; GCN-NEXT: v_smfmac_f32_16x16x64_bf16 v[14:17], v[8:11], v[0:7], v13 cbsz:1 abid:2
+; GCN-NEXT: v_smfmac_f32_16x16x64_bf16 v[8:11], v[12:15], v[0:7], v17 cbsz:1 abid:2
; GCN-NEXT: s_nop 7
-; GCN-NEXT: global_store_dwordx4 v12, v[14:17], s[6:7]
+; GCN-NEXT: global_store_dwordx4 v16, v[8:11], s[6:7]
; GCN-NEXT: s_endpgm
bb:
%id = call i32 @llvm.amdgcn.workitem.id.x()
@@ -855,30 +855,30 @@ define amdgpu_kernel void @test_smfmac_i32_16x16x128_i8__vgpr(ptr addrspace(1) %
; SDAG-NEXT: v_and_b32_e32 v0, 0x3ff, v0
; SDAG-NEXT: v_lshlrev_b32_e32 v0, 4, v0
; SDAG-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0x34
+; SDAG-NEXT: v_mov_b32_e32 v16, 0
; SDAG-NEXT: s_waitcnt lgkmcnt(0)
-; SDAG-NEXT: global_load_dwordx4 v[10:13], v0, s[6:7]
+; SDAG-NEXT: global_load_dwordx4 v[8:11], v0, s[6:7]
; SDAG-NEXT: s_load_dword s16, s[4:5], 0x64
; SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x54
-; SDAG-NEXT: v_mov_b32_e32 v0, 0
-; SDAG-NEXT: v_mov_b32_e32 v14, s8
-; SDAG-NEXT: v_mov_b32_e32 v15, s9
-; SDAG-NEXT: v_mov_b32_e32 v16, s10
-; SDAG-NEXT: v_mov_b32_e32 v17, s11
-; SDAG-NEXT: v_mov_b32_e32 v2, s12
-; SDAG-NEXT: v_mov_b32_e32 v3, s13
-; SDAG-NEXT: v_mov_b32_e32 v4, s14
-; SDAG-NEXT: v_mov_b32_e32 v5, s15
+; SDAG-NEXT: v_mov_b32_e32 v12, s8
+; SDAG-NEXT: v_mov_b32_e32 v13, s9
+; SDAG-NEXT: v_mov_b32_e32 v14, s10
+; SDAG-NEXT: v_mov_b32_e32 v15, s11
+; SDAG-NEXT: v_mov_b32_e32 v0, s12
+; SDAG-NEXT: v_mov_b32_e32 v1, s13
+; SDAG-NEXT: v_mov_b32_e32 v2, s14
+; SDAG-NEXT: v_mov_b32_e32 v3, s15
; SDAG-NEXT: s_waitcnt lgkmcnt(0)
-; SDAG-NEXT: v_mov_b32_e32 v6, s0
-; SDAG-NEXT: v_mov_b32_e32 v7, s1
-; SDAG-NEXT: v_mov_b32_e32 v8, s2
-; SDAG-NEXT: v_mov_b32_e32 v9, s3
-; SDAG-NEXT: v_mov_b32_e32 v1, s16
+; SDAG-NEXT: v_mov_b32_e32 v4, s0
+; SDAG-NEXT: v_mov_b32_e32 v5, s1
+; SDAG-NEXT: v_mov_b32_e32 v6, s2
+; SDAG-NEXT: v_mov_b32_e32 v7, s3
+; SDAG-NEXT: v_mov_b32_e32 v17, s16
; SDAG-NEXT: s_waitcnt vmcnt(0)
; SDAG-NEXT: s_nop 0
-; SDAG-NEXT: v_smfmac_i32_16x16x128_i8 v[10:13], v[14:17], v[2:9], v1 cbsz:1 abid:2
+; SDAG-NEXT: v_smfmac_i32_16x16x128_i8 v[8:11], v[12:15], v[0:7], v17 cbsz:1 abid:2
; SDAG-NEXT: s_nop 7
-; SDAG-NEXT: global_store_dwordx4 v0, v[10:13], s[6:7]
+; SDAG-NEXT: global_store_dwordx4 v16, v[8:11], s[6:7]
; SDAG-NEXT: s_endpgm
;
; GISEL-LABEL: test_smfmac_i32_16x16x128_i8__vgpr:
@@ -1032,22 +1032,22 @@ define amdgpu_kernel void @test_smfmac_i32_32x32x64_i8__vgpr(ptr addrspace(1) %a
; SDAG-NEXT: s_load_dword s16, s[4:5], 0x64
; SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x54
; SDAG-NEXT: s_waitcnt lgkmcnt(0)
-; SDAG-NEXT: v_mov_b32_e32 v26, s8
-; SDAG-NEXT: v_mov_b32_e32 v27, s9
-; SDAG-NEXT: v_mov_b32_e32 v28, s10
-; SDAG-NEXT: v_mov_b32_e32 v29, s11
-; SDAG-NEXT: v_mov_b32_e32 v18, s12
-; SDAG-NEXT: v_mov_b32_e32 v19, s13
-; SDAG-NEXT: v_mov_b32_e32 v20, s14
-; SDAG-NEXT: v_mov_b32_e32 v21, s15
-; SDAG-NEXT: v_mov_b32_e32 v22, s0
-; SDAG-NEXT: v_mov_b32_e32 v23, s1
-; SDAG-NEXT: v_mov_b32_e32 v24, s2
-; SDAG-NEXT: v_mov_b32_e32 v25, s3
-; SDAG-NEXT: v_mov_b32_e32 v16, s16
+; SDAG-NEXT: v_mov_b32_e32 v24, s8
+; SDAG-NEXT: v_mov_b32_e32 v25, s9
+; SDAG-NEXT: v_mov_b32_e32 v26, s10
+; SDAG-NEXT: v_mov_b32_e32 v27, s11
+; SDAG-NEXT: v_mov_b32_e32 v16, s12
+; SDAG-NEXT: v_mov_b32_e32 v17, s13
+; SDAG-NEXT: v_mov_b32_e32 v18, s14
+; SDAG-NEXT: v_mov_b32_e32 v19, s15
+; SDAG-NEXT: v_mov_b32_e32 v20, s0
+; SDAG-NEXT: v_mov_b32_e32 v21, s1
+; SDAG-NEXT: v_mov_b32_e32 v22, s2
+; SDAG-NEXT: v_mov_b32_e32 v23, s3
+; SDAG-NEXT: v_mov_b32_e32 v28, s16
; SDAG-NEXT: s_waitcnt vmcnt(0)
; SDAG-NEXT: s_nop 0
-; SDAG-NEXT: v_smfmac_i32_32x32x64_i8 v[0:15], v[26:29], v[18:25], v16 cbsz:1 abid:2
+; SDAG-NEXT: v_smfmac_i32_32x32x64_i8 v[0:15], v[24:27], v[16:23], v28 cbsz:1 abid:2
; SDAG-NEXT: v_mov_b32_e32 v16, 0
; SDAG-NEXT: s_nop 7
; SDAG-NEXT: s_nop 2
@@ -1397,30 +1397,30 @@ define amdgpu_kernel void @test_smfmac_f32_16x16x128_bf8_bf8__vgpr(ptr addrspace
; SDAG-NEXT: v_and_b32_e32 v0, 0x3ff, v0
; SDAG-NEXT: v_lshlrev_b32_e32 v0, 4, v0
; SDAG-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0x34
+; SDAG-NEXT: v_mov_b32_e32 v16, 0
; SDAG-NEXT: s_waitcnt lgkmcnt(0)
-; SDAG-NEXT: global_load_dwordx4 v[10:13], v0, s[6:7]
+; SDAG-NEXT: global_load_dwordx4 v[8:11], v0, s[6:7]
; SDAG-NEXT: s_load_dword s16, s[4:5], 0x64
; SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x54
-; SDAG-NEXT: v_mov_b32_e32 v0, 0
-; SDAG-NEXT: v_mov_b32_e32 v14, s8
-; SDAG-NEXT: v_mov_b32_e32 v15, s9
-; SDAG-NEXT: v_mov_b32_e32 v16, s10
-; SDAG-NEXT: v_mov_b32_e32 v17, s11
-; SDAG-NEXT: v_mov_b32_e32 v2, s12
-; SDAG-NEXT: v_mov_b32_e32 v3, s13
-; SDAG-NEXT: v_mov_b32_e32 v4, s14
-; SDAG-NEXT: v_mov_b32_e32 v5, s15
+; SDAG-NEXT: v_mov_b32_e32 v12, s8
+; SDAG-NEXT: v_mov_b32_e32 v13, s9
+; SDAG-NEXT: v_mov_b32_e32 v14, s10
+; SDAG-NEXT: v_mov_b32_e32 v15, s11
+; SDAG-NEXT: v_mov_b32_e32 v0, s12
+; SDAG-NEXT: v_mov_b32_e32 v1, s13
+; SDAG-NEXT: v_mov_b32_e32 v2, s14
+; SDAG-NEXT: v_mov_b32_e32 v3, s15
; SDAG-NEXT: s_waitcnt lgkmcnt(0)
-; SDAG-NEXT: v_mov_b32_e32 v6, s0
-; SDAG-NEXT: v_mov_b32_e32 v7, s1
-; SDAG-NEXT: v_mov_b32_e32 v8, s2
-; SDAG-NEXT: v_mov_b32_e32 v9, s3
-; SDAG-NEXT: v_mov_b32_e32 v1, s16
+; SDAG-NEXT: v_mov_b32_e32 v4, s0
+; SDAG-NEXT: v_mov_b32_e32 v5, s1
+; SDAG-NEXT: v_mov_b32_e32 v6, s2
+; SDAG-NEXT: v_mov_b32_e32 v7, s3
+; SDAG-NEXT: v_mov_b32_e32 v17, s16
; SDAG-NEXT: s_waitcnt vmcnt(0)
; SDAG-NEXT: s_nop 0
-; SDAG-NEXT: v_smfmac_f32_16x16x128_bf8_bf8 v[10:13], v[14:17], v[2:9], v1 cbsz:1 abid:2
+; SDAG-NEXT: v_smfmac_f32_16x16x128_bf8_bf8 v[8:11], v[12:15], v[0:7], v17 cbsz:1 abid:2
; SDAG-NEXT: s_nop 7
-; SDAG-NEXT: global_store_dwordx4 v0, v[10:13], s[6:7]
+; SDAG-NEXT: global_store_dwordx4 v16, v[8:11], s[6:7]
; SDAG-NEXT: s_endpgm
;
; GISEL-LABEL: test_smfmac_f32_16x16x128_bf8_bf8__vgpr:
@@ -1566,30 +1566,30 @@ define amdgpu_kernel void @test_smfmac_f32_16x16x128_bf8_fp8__vgpr(ptr addrspace
; SDAG-NEXT: v_and_b32_e32 v0, 0x3ff, v0
; SDAG-NEXT: v_lshlrev_b32_e32 v0, 4, v0
; SDAG-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0x34
+; SDAG-NEXT: v_mov_b32_e32 v16, 0
; SDAG-NEXT: s_waitcnt lgkmcnt(0)
-; SDAG-NEXT: global_load_dwordx4 v[10:13], v0, s[6:7]
+; SDAG-NEXT: global_load_dwordx4 v[8:11], v0, s[6:7]
; SDAG-NEXT: s_load_dword s16, s[4:5], 0x64
; SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x54
-; SDAG-NEXT: v_mov_b32_e32 v0, 0
-; SDAG-NEXT: v_mov_b32_e32 v14, s8
-; SDAG-NEXT: v_mov_b32_e32 v15, s9
-; SDAG-NEXT: v_mov_b32_e32 v16, s10
-; SDAG-NEXT: v_mov_b32_e32 v17, s11
-; SDAG-NEXT: v_mov_b32_e32 v2, s12
-; SDAG-NEXT: v_mov_b32_e32 v3, s13
-; SDAG-NEXT: v_mov_b32_e32 v4, s14
-; SDAG-NEXT: v_mov_b32_e32 v5, s15
+; SDAG-NEXT: v_mov_b32_e32 v12, s8
+; SDAG-NEXT: v_mov_b32_e32 v13, s9
+; SDAG-NEXT: v_mov_b32_e32 v14, s10
+; SDAG-NEXT: v_mov_b32_e32 v15, s11
+; SDAG-NEXT: v_mov_b32_e32 v0, s12
+; SDAG-NEXT: v_mov_b32_e32 v1, s13
+; SDAG-NEXT: v_mov_b32_e32 v2, s14
+; SDAG-NEXT: v_mov_b32_e32 v3, s15
; SDAG-NEXT: s_waitcnt lgkmcnt(0)
-; SDAG-NEXT: v_mov_b32_e32 v6, s0
-; SDAG-NEXT: v_mov_b32_e32 v7, s1
-; SDAG-NEXT: v_mov_b32_e32 v8, s2
-; SDAG-NEXT: v_mov_b32_e32 v9, s3
-; SDAG-NEXT: v_mov_b32_e32 v1, s16
+; SDAG-NEXT: v_mov_b32_e32 v4, s0
+; SDAG-NEXT: v_mov_b32_e32 v5, s1
+; SDAG-NEXT: v_mov_b32_e32 v6, s2
+; SDAG-NEXT: v_mov_b32_e32 v7, s3
+; SDAG-NEXT: v_mov_b32_e32 v17, s16
; SDAG-NEXT: s_waitcnt vmcnt(0)
; SDAG-NEXT: s_nop 0
-; SDAG-NEXT: v_smfmac_f32_16x16x128_bf8_fp8 v[10:13], v[14:17], v[2:9], v1 cbsz:1 abid:2
+; SDAG-NEXT: v_smfmac_f32_16x16x128_bf8_fp8 v[8:11], v[12:15], v[0:7], v17 cbsz:1 abid:2
; SDAG-NEXT: s_nop 7
-; SDAG-NEXT: global_store_dwordx4 v0, v[10:13], s[6:7]
+; SDAG-NEXT: global_store_dwordx4 v16, v[8:11], s[6:7]
; SDAG-NEXT: s_endpgm
;
; GISEL-LABEL: test_smfmac_f32_16x16x128_bf8_fp8__vgpr:
@@ -1735,30 +1735,30 @@ define amdgpu_kernel void @test_smfmac_f32_16x16x128_fp8_bf8__vgpr(ptr addrspace
; SDAG-NEXT: v_and_b32_e32 v0, 0x3ff, v0
; SDAG-NEXT: v_lshlrev_b32_e32 v0, 4, v0
; SDAG-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0x34
+; SDAG-NEXT: v_mov_b32_e32 v16, 0
; SDAG-NEXT: s_waitcnt lgkmcnt(0)
-; SDAG-NEXT: global_load_dwordx4 v[10:13], v0, s[6:7]
+; SDAG-NEXT: global_load_dwordx4 v[8:11], v0, s[6:7]
; SDAG-NEXT: s_load_dword s16, s[4:5], 0x64
; SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x54
-; SDAG-NEXT: v_mov_b32_e32 v0, 0
-; SDAG-NEXT: v_mov_b32_e32 v14, s8
-; SDAG-NEXT: v_mov_b32_e32 v15, s9
-; SDAG-NEXT: v_mov_b32_e32 v16, s10
-; SDAG-NEXT: v_mov_b32_e32 v17, s11
-; SDAG-NEXT: v_mov_b32_e32 v2, s12
-; SDAG-NEXT: v_mov_b32_e32 v3, s13
-; SDAG-NEXT: v_mov_b32_e32 v4, s14
-; SDAG-NEXT: v_mov_b32_e32 v5, s15
+; SDAG-NEXT: v_mov_b32_e32 v12, s8
+; SDAG-NEXT: v_mov_b32_e32 v13, s9
+; SDAG-NEXT: v_mov_b32_e32 v14, s10
+; SDAG-NEXT: v_mov_b32_e32 v15, s11
+; SDAG-NEXT: v_mov_b32_e32 v0, s12
+; SDAG-NEXT: v_mov_b32_e32 v1, s13
+; SDAG-NEXT: v_mov_b32_e32 v2, s14
+; SDAG-NEXT: v_mov_b32_e32 v3, s15
; SDAG-NEXT: s_waitcnt lgkmcnt(0)
-; SDAG-NEXT: v_mov_b32_e32 v6, s0
-; SDAG-NEXT: v_mov_b32_e32 v7, s1
-; SDAG-NEXT: v_mov_b32_e32 v8, s2
-; SDAG-NEXT: v_mov_b32_e32 v9, s3
-; SDAG-NEXT: v_mov_b32_e32 v1, s16
+; SDAG-NEXT: v_mov_b32_e32 v4, s0
+; SDAG-NEXT: v_mov_b32_e32 v5, s1
+; SDAG-NEXT: v_mov_b32_e32 v6, s2
+; SDAG-NEXT: v_mov_b32_e32 v7, s3
+; SDAG-NEXT: v_mov_b32_e32 v17, s16
; SDAG-NEXT: s_waitcnt vmcnt(0)
; SDAG-NEXT: s_nop 0
-; SDAG-NEXT: v_smfmac_f32_16x16x128_fp8_bf8 v[10:13], v[14:17], v[2:9], v1 cbsz:1 abid:2
+; SDAG-NEXT: v_smfmac_f32_16x16x128_fp8_bf8 v[8:11], v[12:15], v[0:7], v17 cbsz:1 abid:2
; SDAG-NEXT: s_nop 7
-; SDAG-NEXT: global_store_dwordx4 v0, v[10:13], s[6:7]
+; SDAG-NEXT: global_store_dwordx4 v16, v[8:11], s[6:7]
; SDAG-NEXT: s_endpgm
;
; GISEL-LABEL: test_smfmac_f32_16x16x128_fp8_bf8__vgpr:
@@ -1904,30 +1904,30 @@ define amdgpu_kernel void @test_smfmac_f32_16x16x128_fp8_fp8__vgpr(ptr addrspace
; SDAG-NEXT: v_and_b32_e32 v0, 0x3ff, v0
; SDAG-NEXT: v_lshlrev_b32_e32 v0, 4, v0
; SDAG-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0x34
+; SDAG-NEXT: v_mov_b32_e32 v16, 0
; SDAG-NEXT: s_waitcnt lgkmcnt(0)
-; SDAG-NEXT: global_load_dwordx4 v[10:13], v0, s[6:7]
+; SDAG-NEXT: global_load_dwordx4 v[8:11], v0, s[6:7]
; SDAG-NEXT: s_load_dword s16, s[4:5], 0x64
; SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x54
-; SDAG-NEXT: v_mov_b32_e32 v0, 0
-; SDAG-NEXT: v_mov_b32_e32 v14, s8
-; SDAG-NEXT: v_mov_b32_e32 v15, s9
-; SDAG-NEXT: v_mov_b32_e32 v16, s10
-; SDAG-NEXT: v_mov_b32_e32 v17, s11
-; SDAG-NEXT: v_mov_b32_e32 v2, s12
-; SDAG-NEXT: v_mov_b32_e32 v3, s13
-; SDAG-NEXT: v_mov_b32_e32 v4, s14
-; SDAG-NEXT: v_mov_b32_e32 v5, s15
+; SDAG-NEXT: v_mov_b32_e32 v12, s8
+; SDAG-NEXT: v_mov_b32_e32 v13, s9
+; SDAG-NEXT: v_mov_b32_e32 v14, s10
+; SDAG-NEXT: v_mov_b32_e32 v15, s11
+; SDAG-NEXT: v_mov_b32_e32 v0, s12
+; SDAG-NEXT: v_mov_b32_e32 v1, s13
+; SDAG-NEXT: v_mov_b32_e32 v2, s14
+; SDAG-NEXT: v_mov_b32_e32 v3, s15
; SDAG-NEXT: s_waitcnt lgkmcnt(0)
-; SDAG-NEXT: v_mov_b32_e32 v6, s0
-; SDAG-NEXT: v_mov_b32_e32 v7, s1
-; SDAG-NEXT: v_mov_b32_e32 v8, s2
-; SDAG-NEXT: v_mov_b32_e32 v9, s3
-; SDAG-NEXT: v_mov_b32_e32 v1, s16
+; SDAG-NEXT: v_mov_b32_e32 v4, s0
+; SDAG-NEXT: v_mov_b32_e32 v5, s1
+; SDAG-NEXT: v_mov_b32_e32 v6, s2
+; SDAG-NEXT: v_mov_b32_e32 v7, s3
+; SDAG-NEXT: v_mov_b32_e32 v17, s16
; SDAG-NEXT: s_waitcnt vmcnt(0)
; SDAG-NEXT: s_nop 0
-; SDAG-NEXT: v_smfmac_f32_16x16x128_fp8_fp8 v[10:13], v[14:17], v[2:9], v1 cbsz:1 abid:2
+; SDAG-NEXT: v_smfmac_f32_16x16x128_fp8_fp8 v[8:11], v[12:15], v[0:7], v17 cbsz:1 abid:2
; SDAG-NEXT: s_nop 7
-; SDAG-NEXT: global_store_dwordx4 v0, v[10:13], s[6:7]
+; SDAG-NEXT: global_store_dwordx4 v16, v[8:11], s[6:7]
; SDAG-NEXT: s_endpgm
;
; GISEL-LABEL: test_smfmac_f32_16x16x128_fp8_fp8__vgpr:
@@ -2081,22 +2081,22 @@ define amdgpu_kernel void @test_smfmac_f32_32x32x64_bf8_bf8__vgpr(ptr addrspace(
; SDAG-NEXT: s_load_dword s16, s[4:5], 0x64
; SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x54
; SDAG-NEXT: s_waitcnt lgkmcnt(0)
-; SDAG-NEXT: v_mov_b32_e32 v26, s8
-; SDAG-NEXT: v_mov_b32_e32 v27, s9
-; SDAG-NEXT: v_mov_b32_e32 v28, s10
-; SDAG-NEXT: v_mov_b32_e32 v29, s11
-; SDAG-NEXT: v_mov_b32_e32 v18, s12
-; SDAG-NEXT: v_mov_b32_e32 v19, s13
-; SDAG-NEXT: v_mov_b32_e32 v20, s14
-; SDAG-NEXT: v_mov_b32_e32 v21, s15
-; SDAG-NEXT: v_mov_b32_e32 v22, s0
-; SDAG-NEXT: v_mov_b32_e32 v23, s1
-; SDAG-NEXT: v_mov_b32_e32 v24, s2
-; SDAG-NEXT: v_mov_b32_e32 v25, s3
-; SDAG-NEXT: v_mov_b32_e32 v16, s16
+; SDAG-NEXT: v_mov_b32_e32 v24, s8
+; SDAG-NEXT: v_mov_b32_e32 v25, s9
+; SDAG-NEXT: v_mov_b32_e32 v26, s10
+; SDAG-NEXT: v_mov_b32_e32 v27, s11
+; SDAG-NEXT: v_mov_b32_e32 v16, s12
+; SDAG-NEXT: v_mov_b32_e32 v17, s13
+; SDAG-NEXT: v_mov_b32_e32 v18, s14
+; SDAG-NEXT: v_mov_b32_e32 v19, s15
+; SDAG-NEXT: v_mov_b32_e32 v20, s0
+; SDAG-NEXT: v_mov_b32_e32 v21, s1
+; SDAG-NEXT: v_mov_b32_e32 v22, s2
+; SDAG-NEXT: v_mov_b32_e32 v23, s3
+; SDAG-NEXT: v_mov_b32_e32 v28, s16
; SDAG-NEXT: s_waitcnt vmcnt(0)
; SDAG-NEXT: s_nop 0
-; SDAG-NEXT: v_smfmac_f32_32x32x64_bf8_bf8 v[0:15], v[26:29], v[18:25], v16 cbsz:1 abid:2
+; SDAG-NEXT: v_smfmac_f32_32x32x64_bf8_bf8 v[0:15], v[24:27], v[16:23], v28 cbsz:1 abid:2
; SDAG-NEXT: v_mov_b32_e32 v16, 0
; SDAG-NEXT: s_nop 7
; SDAG-NEXT: s_nop 2
@@ -2454,22 +2454,22 @@ define amdgpu_kernel void @test_smfmac_f32_32x32x64_bf8_fp8__vgpr(ptr addrspace(
; SDAG-NEXT: s_load_dword s16, s[4:5], 0x64
; SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x54
; SDAG-NEXT: s_waitcnt lgkmcnt(0)
-; SDAG-NEXT: v_mov_b32_e32 v26, s8
-; SDAG-NEXT: v_mov_b32_e32 v27, s9
-; SDAG-NEXT: v_mov_b32_e32 v28, s10
-; SDAG-NEXT: v_mov_b32_e32 v29, s11
-; SDAG-NEXT: v_mov_b32_e32 v18, s12
-; SDAG-NEXT: v_mov_b32_e32 v19, s13
-; SDAG-NEXT: v_mov_b32_e32 v20, s14
-; SDAG-NEXT: v_mov_b32_e32 v21, s15
-; SDAG-NEXT: v_mov_b32_e32 v22, s0
-; SDAG-NEXT: v_mov_b32_e32 v23, s1
-; SDAG-NEXT: v_mov_b32_e32 v24, s2
-; SDAG-NEXT: v_mov_b32_e32 v25, s3
-; SDAG-NEXT: v_mov_b32_e32 v16, s16
+; SDAG-NEXT: v_mov_b32_e32 v24, s8
+; SDAG-NEXT: v_mov_b32_e32 v25, s9
+; SDAG-NEXT: v_mov_b32_e32 v26, s10
+; SDAG-NEXT: v_mov_b32_e32 v27, s11
+; SDAG-NEXT: v_mov_b32_e32 v16, s12
+; SDAG-NEXT: v_mov_b32_e32 v17, s13
+; SDAG-NEXT: v_mov_b32_e32 v18, s14
+; SDAG-NEXT: v_mov_b32_e32 v19, s15
+; SDAG-NEXT: v_mov_b32_e32 v20, s0
+; SDAG-NEXT: v_mov_b32_e32 v21, s1
+; SDAG-NEXT: v_mov_b32_e32 v22, s2
+; SDAG-NEXT: v_mov_b32_e32 v23, s3
+; SDAG-NEXT: v_mov_b32_e32 v28, s16
; SDAG-NEXT: s_waitcnt vmcnt(0)
; SDAG-NEXT: s_nop 0
-; SDAG-NEXT: v_smfmac_f32_32x32x64_bf8_fp8 v[0:15], v[26:29], v[18:25], v16 cbsz:1 abid:2
+; SDAG-NEXT: v_smfmac_f32_32x32x64_bf8_fp8 v[0:15], v[24:27], v[16:23], v28 cbsz:1 abid:2
; SDAG-NEXT: v_mov_b32_e32 v16, 0
; SDAG-NEXT: s_nop 7
; SDAG-NEXT: s_nop 2
@@ -2827,22 +2827,22 @@ define amdgpu_kernel void @test_smfmac_f32_32x32x64_fp8_bf8__vgpr(ptr addrspace(
; SDAG-NEXT: s_load_dword s16, s[4:5], 0x64
; SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x54
; SDAG-NEXT: s_waitcnt lgkmcnt(0)
-; SDAG-NEXT: v_mov_b32_e32 v26, s8
-; SDAG-NEXT: v_mov_b32_e32 v27, s9
-; SDAG-NEXT: v_mov_b32_e32 v28, s10
-; SDAG-NEXT: v_mov_b32_e32 v29, s11
-; SDAG-NEXT: v_mov_b32_e32 v18, s12
-; SDAG-NEXT: v_mov_b32_e32 v19, s13
-; SDAG-NEXT: v_mov_b32_e32 v20, s14
-; SDAG-NEXT: v_mov_b32_e32 v21, s15
-; SDAG-NEXT: v_mov_b32_e32 v22, s0
-; SDAG-NEXT: v_mov_b32_e32 v23, s1
-; SDAG-NEXT: v_mov_b32_e32 v24, s2
-; SDAG-NEXT: v_mov_b32_e32 v25, s3
-; SDAG-NEXT: v_mov_b32_e32 v16, s16
+; SDAG-NEXT: v_mov_b32_e32 v24, s8
+; SDAG-NEXT: v_mov_b32_e32 v25, s9
+; SDAG-NEXT: v_mov_b32_e32 v26, s10
+; SDAG-NEXT: v_mov_b32_e32 v27, s11
+; SDAG-NEXT: v_mov_b32_e32 v16, s12
+; SDAG-NEXT: v_mov_b32_e32 v17, s13
+; SDAG-NEXT: v_mov_b32_e32 v18, s14
+; SDAG-NEXT: v_mov_b32_e32 v19, s15
+; SDAG-NEXT: v_mov_b32_e32 v20, s0
+; SDAG-NEXT: v_mov_b32_e32 v21, s1
+; SDAG-NEXT: v_mov_b32_e32 v22, s2
+; SDAG-NEXT: v_mov_b32_e32 v23, s3
+; SDAG-NEXT: v_mov_b32_e32 v28, s16
; SDAG-NEXT: s_waitcnt vmcnt(0)
; SDAG-NEXT: s_nop 0
-; SDAG-NEXT: v_smfmac_f32_32x32x64_fp8_bf8 v[0:15], v[26:29], v[18:25], v16 cbsz:1 abid:2
+; SDAG-NEXT: v_smfmac_f32_32x32x64_fp8_bf8 v[0:15], v[24:27], v[16:23], v28 cbsz:1 abid:2
; SDAG-NEXT: v_mov_b32_e32 v16, 0
; SDAG-NEXT: s_nop 7
; SDAG-NEXT: s_nop 2
@@ -3200,22 +3200,22 @@ define amdgpu_kernel void @test_smfmac_f32_32x32x64_fp8_fp8__vgpr(ptr addrspace(
; SDAG-NEXT: s_load_dword s16, s[4:5], 0x64
; SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x54
; SDAG-NEXT: s_waitcnt lgkmcnt(0)
-; SDAG-NEXT: v_mov_b32_e32 v26, s8
-; SDAG-NEXT: v_mov_b32_e32 v27, s9
-; SDAG-NEXT: v_mov_b32_e32 v28, s10
-; SDAG-NEXT: v_mov_b32_e32 v29, s11
-; SDAG-NEXT: v_mov_b32_e32 v18, s12
-; SDAG-NEXT: v_mov_b32_e32 v19, s13
-; SDAG-NEXT: v_mov_b32_e32 v20, s14
-; SDAG-NEXT: v_mov_b32_e32 v21, s15
-; SDAG-NEXT: v_mov_b32_e32 v22, s0
-; SDAG-NEXT: v_mov_b32_e32 v23, s1
-; SDAG-NEXT: v_mov_b32_e32 v24, s2
-; SDAG-NEXT: v_mov_b32_e32 v25, s3
-; SDAG-NEXT: v_mov_b32_e32 v16, s16
+; SDAG-NEXT: v_mov_b32_e32 v24, s8
+; SDAG-NEXT: v_mov_b32_e32 v25, s9
+; SDAG-NEXT: v_mov_b32_e32 v26, s10
+; SDAG-NEXT: v_mov_b32_e32 v27, s11
+; SDAG-NEXT: v_mov_b32_e32 v16, s12
+; SDAG-NEXT: v_mov_b32_e32 v17, s13
+; SDAG-NEXT: v_mov_b32_e32 v18, s14
+; SDAG-NEXT: v_mov_b32_e32 v19, s15
+; SDAG-NEXT: v_mov_b32_e32 v20, s0
+; SDAG-NEXT: v_mov_b32_e32 v21, s1
+; SDAG-NEXT: v_mov_b32_e32 v22, s2
+; SDAG-NEXT: v_mov_b32_e32 v23, s3
+; SDAG-NEXT: v_mov_b32_e32 v28, s16
; SDAG-NEXT: s_waitcnt vmcnt(0)
; SDAG-NEXT: s_nop 0
-; SDAG-NEXT: v_smfmac_f32_32x32x64_fp8_fp8 v[0:15], v[26:29], v[18:25], v16 cbsz:1 abid:2
+; SDAG-NEXT: v_smfmac_f32_32x32x64_fp8_fp8 v[0:15], v[24:27], v[16:23], v28 cbsz:1 abid:2
; SDAG-NEXT: v_mov_b32_e32 v16, 0
; SDAG-NEXT: s_nop 7
; SDAG-NEXT: s_nop 2
@@ -3552,4 +3552,4 @@ define <16 x float> @test_smfmac_f32_32x32x64_fp8_fp8__sgpr(<4 x i32> inreg %arg
ret <16 x float> %result
}
-attributes #0 = { "amdgpu-flat-work-group-size"="1,256" }
+attributes #0 = { "amdgpu-flat-work-group-size"="1,256" "amdgpu-agpr-alloc"="0,0" }
diff --git a/llvm/test/CodeGen/AMDGPU/mad-mix-hi-bf16.ll b/llvm/test/CodeGen/AMDGPU/mad-mix-hi-bf16.ll
index 84123e6..393581f 100644
--- a/llvm/test/CodeGen/AMDGPU/mad-mix-hi-bf16.ll
+++ b/llvm/test/CodeGen/AMDGPU/mad-mix-hi-bf16.ll
@@ -141,7 +141,6 @@ define <2 x bfloat> @v_mad_mixhi_bf16_bf16lo_bf16lo_bf16lo_undeflo_clamp_postcvt
; GFX1250-NEXT: s_wait_kmcnt 0x0
; GFX1250-NEXT: v_fma_mixlo_bf16 v3, v0, v1, v2 op_sel_hi:[1,1,1]
; GFX1250-NEXT: v_fma_mixhi_bf16 v0, v0, v1, v2 op_sel_hi:[1,1,1] clamp
-; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: global_store_b16 v[0:1], v3, off scope:SCOPE_SYS
; GFX1250-NEXT: s_wait_storecnt 0x0
; GFX1250-NEXT: s_set_pc_i64 s[30:31]
diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-local-volatile.ll b/llvm/test/CodeGen/AMDGPU/memory-legalizer-local-volatile.ll
index bc25084..5e5e3bf 100644
--- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-local-volatile.ll
+++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-local-volatile.ll
@@ -415,11 +415,6 @@ define amdgpu_kernel void @local_volatile_store_0(
; GFX12-WGP-NEXT: v_mov_b32_e32 v0, s1
; GFX12-WGP-NEXT: s_wait_kmcnt 0x0
; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s0
-; GFX12-WGP-NEXT: s_wait_loadcnt 0x0
-; GFX12-WGP-NEXT: s_wait_samplecnt 0x0
-; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0
-; GFX12-WGP-NEXT: s_wait_kmcnt 0x0
-; GFX12-WGP-NEXT: s_wait_storecnt 0x0
; GFX12-WGP-NEXT: ds_store_b32 v0, v1
; GFX12-WGP-NEXT: s_endpgm
;
@@ -432,11 +427,6 @@ define amdgpu_kernel void @local_volatile_store_0(
; GFX12-CU-NEXT: v_mov_b32_e32 v0, s1
; GFX12-CU-NEXT: s_wait_kmcnt 0x0
; GFX12-CU-NEXT: v_mov_b32_e32 v1, s0
-; GFX12-CU-NEXT: s_wait_loadcnt 0x0
-; GFX12-CU-NEXT: s_wait_samplecnt 0x0
-; GFX12-CU-NEXT: s_wait_bvhcnt 0x0
-; GFX12-CU-NEXT: s_wait_kmcnt 0x0
-; GFX12-CU-NEXT: s_wait_storecnt 0x0
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
ptr addrspace(1) %in, ptr addrspace(3) %out) {
@@ -562,11 +552,6 @@ define amdgpu_kernel void @local_volatile_store_1(
; GFX12-WGP-NEXT: v_lshl_add_u32 v0, v0, s1, s2
; GFX12-WGP-NEXT: s_wait_kmcnt 0x0
; GFX12-WGP-NEXT: v_mov_b32_e32 v1, s0
-; GFX12-WGP-NEXT: s_wait_loadcnt 0x0
-; GFX12-WGP-NEXT: s_wait_samplecnt 0x0
-; GFX12-WGP-NEXT: s_wait_bvhcnt 0x0
-; GFX12-WGP-NEXT: s_wait_kmcnt 0x0
-; GFX12-WGP-NEXT: s_wait_storecnt 0x0
; GFX12-WGP-NEXT: ds_store_b32 v0, v1
; GFX12-WGP-NEXT: s_endpgm
;
@@ -583,11 +568,6 @@ define amdgpu_kernel void @local_volatile_store_1(
; GFX12-CU-NEXT: v_lshl_add_u32 v0, v0, s1, s2
; GFX12-CU-NEXT: s_wait_kmcnt 0x0
; GFX12-CU-NEXT: v_mov_b32_e32 v1, s0
-; GFX12-CU-NEXT: s_wait_loadcnt 0x0
-; GFX12-CU-NEXT: s_wait_samplecnt 0x0
-; GFX12-CU-NEXT: s_wait_bvhcnt 0x0
-; GFX12-CU-NEXT: s_wait_kmcnt 0x0
-; GFX12-CU-NEXT: s_wait_storecnt 0x0
; GFX12-CU-NEXT: ds_store_b32 v0, v1
; GFX12-CU-NEXT: s_endpgm
ptr addrspace(1) %in, ptr addrspace(3) %out) {
diff --git a/llvm/test/CodeGen/AMDGPU/partial-regcopy-and-spill-missed-at-regalloc.ll b/llvm/test/CodeGen/AMDGPU/partial-regcopy-and-spill-missed-at-regalloc.ll
index c3164b8..f54a383 100644
--- a/llvm/test/CodeGen/AMDGPU/partial-regcopy-and-spill-missed-at-regalloc.ll
+++ b/llvm/test/CodeGen/AMDGPU/partial-regcopy-and-spill-missed-at-regalloc.ll
@@ -1,7 +1,7 @@
; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-;RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 --stop-after=greedy,1 < %s | FileCheck -check-prefix=REGALLOC-GFX908 %s
+;RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 --stop-after=greedy,2 < %s | FileCheck -check-prefix=REGALLOC-GFX908 %s
;RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 --stop-after=prologepilog < %s | FileCheck -check-prefix=PEI-GFX908 %s
-;RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a --stop-after=greedy,1 < %s | FileCheck -check-prefix=REGALLOC-GFX90A %s
+;RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a --stop-after=greedy,2 < %s | FileCheck -check-prefix=REGALLOC-GFX90A %s
;RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a --stop-after=prologepilog < %s | FileCheck -check-prefix=PEI-GFX90A %s
; Partial reg copy and spill missed during regalloc handled later at frame lowering.
@@ -12,17 +12,21 @@ define amdgpu_kernel void @partial_copy(<4 x i32> %arg) #0 {
; REGALLOC-GFX908-NEXT: liveins: $sgpr4_sgpr5
; REGALLOC-GFX908-NEXT: {{ $}}
; REGALLOC-GFX908-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2162697 /* reguse:AGPR_32 */, undef %6:agpr_32
- ; REGALLOC-GFX908-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 6094858 /* regdef:VReg_128 */, def %7
- ; REGALLOC-GFX908-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 3538954 /* regdef:VReg_64 */, def %8
- ; REGALLOC-GFX908-NEXT: GLOBAL_STORE_DWORDX4 undef %15:vreg_64, %7, 0, 0, implicit $exec :: (volatile store (s128) into `ptr addrspace(1) poison`, addrspace 1)
+ ; REGALLOC-GFX908-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 6094858 /* regdef:VReg_128 */, def %25
+ ; REGALLOC-GFX908-NEXT: [[COPY:%[0-9]+]]:av_128 = COPY %25
+ ; REGALLOC-GFX908-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 3538954 /* regdef:VReg_64 */, def %27
+ ; REGALLOC-GFX908-NEXT: SI_SPILL_AV64_SAVE %27, %stack.0, $sgpr32, 0, implicit $exec :: (store (s64) into %stack.0, align 4, addrspace 5)
+ ; REGALLOC-GFX908-NEXT: [[COPY1:%[0-9]+]]:vreg_128 = COPY [[COPY]]
+ ; REGALLOC-GFX908-NEXT: GLOBAL_STORE_DWORDX4 undef %15:vreg_64, [[COPY1]], 0, 0, implicit $exec :: (volatile store (s128) into `ptr addrspace(1) poison`, addrspace 1)
; REGALLOC-GFX908-NEXT: renamable $sgpr0_sgpr1_sgpr2_sgpr3 = S_LOAD_DWORDX4_IMM killed renamable $sgpr4_sgpr5, 0, 0 :: (dereferenceable invariant load (s128) from %ir.arg.kernarg.offset1, addrspace 4)
- ; REGALLOC-GFX908-NEXT: [[COPY:%[0-9]+]]:areg_128 = COPY killed renamable $sgpr0_sgpr1_sgpr2_sgpr3
+ ; REGALLOC-GFX908-NEXT: [[COPY2:%[0-9]+]]:areg_128 = COPY killed renamable $sgpr0_sgpr1_sgpr2_sgpr3
; REGALLOC-GFX908-NEXT: [[AV_MOV_:%[0-9]+]]:vgpr_32 = AV_MOV_B32_IMM_PSEUDO 1, implicit $exec
; REGALLOC-GFX908-NEXT: [[AV_MOV_1:%[0-9]+]]:vgpr_32 = AV_MOV_B32_IMM_PSEUDO 2, implicit $exec
- ; REGALLOC-GFX908-NEXT: [[V_MFMA_I32_4X4X4I8_e64_:%[0-9]+]]:areg_128 = V_MFMA_I32_4X4X4I8_e64 [[AV_MOV_]], [[AV_MOV_1]], [[COPY]], 0, 0, 0, implicit $mode, implicit $exec
- ; REGALLOC-GFX908-NEXT: GLOBAL_STORE_DWORDX2 undef %17:vreg_64, %8, 0, 0, implicit $exec :: (volatile store (s64) into `ptr addrspace(1) poison`, addrspace 1)
- ; REGALLOC-GFX908-NEXT: [[COPY1:%[0-9]+]]:vreg_128 = COPY [[V_MFMA_I32_4X4X4I8_e64_]]
- ; REGALLOC-GFX908-NEXT: GLOBAL_STORE_DWORDX4 undef %19:vreg_64, [[COPY1]], 0, 0, implicit $exec :: (volatile store (s128) into `ptr addrspace(1) poison`, addrspace 1)
+ ; REGALLOC-GFX908-NEXT: [[V_MFMA_I32_4X4X4I8_e64_:%[0-9]+]]:areg_128 = V_MFMA_I32_4X4X4I8_e64 [[AV_MOV_]], [[AV_MOV_1]], [[COPY2]], 0, 0, 0, implicit $mode, implicit $exec
+ ; REGALLOC-GFX908-NEXT: [[SI_SPILL_AV64_RESTORE:%[0-9]+]]:vreg_64 = SI_SPILL_AV64_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s64) from %stack.0, align 4, addrspace 5)
+ ; REGALLOC-GFX908-NEXT: GLOBAL_STORE_DWORDX2 undef %17:vreg_64, [[SI_SPILL_AV64_RESTORE]], 0, 0, implicit $exec :: (volatile store (s64) into `ptr addrspace(1) poison`, addrspace 1)
+ ; REGALLOC-GFX908-NEXT: [[COPY3:%[0-9]+]]:vreg_128 = COPY [[V_MFMA_I32_4X4X4I8_e64_]]
+ ; REGALLOC-GFX908-NEXT: GLOBAL_STORE_DWORDX4 undef %19:vreg_64, [[COPY3]], 0, 0, implicit $exec :: (volatile store (s128) into `ptr addrspace(1) poison`, addrspace 1)
; REGALLOC-GFX908-NEXT: S_ENDPGM 0
;
; PEI-GFX908-LABEL: name: partial_copy
@@ -57,15 +61,17 @@ define amdgpu_kernel void @partial_copy(<4 x i32> %arg) #0 {
; REGALLOC-GFX90A-NEXT: liveins: $sgpr4_sgpr5
; REGALLOC-GFX90A-NEXT: {{ $}}
; REGALLOC-GFX90A-NEXT: INLINEASM &"; use $0", 1 /* sideeffect attdialect */, 2162697 /* reguse:AGPR_32 */, undef %6:agpr_32
- ; REGALLOC-GFX90A-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 6422538 /* regdef:VReg_128_Align2 */, def %7
- ; REGALLOC-GFX90A-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 3866634 /* regdef:VReg_64_Align2 */, def %8
- ; REGALLOC-GFX90A-NEXT: GLOBAL_STORE_DWORDX4 undef %15:vreg_64_align2, %7, 0, 0, implicit $exec :: (volatile store (s128) into `ptr addrspace(1) poison`, addrspace 1)
+ ; REGALLOC-GFX90A-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 6422538 /* regdef:VReg_128_Align2 */, def %24
+ ; REGALLOC-GFX90A-NEXT: [[COPY:%[0-9]+]]:av_128_align2 = COPY %24
+ ; REGALLOC-GFX90A-NEXT: INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 3866634 /* regdef:VReg_64_Align2 */, def %22
+ ; REGALLOC-GFX90A-NEXT: [[COPY1:%[0-9]+]]:av_64_align2 = COPY %22
+ ; REGALLOC-GFX90A-NEXT: GLOBAL_STORE_DWORDX4 undef %15:vreg_64_align2, [[COPY]], 0, 0, implicit $exec :: (volatile store (s128) into `ptr addrspace(1) poison`, addrspace 1)
; REGALLOC-GFX90A-NEXT: renamable $sgpr0_sgpr1_sgpr2_sgpr3 = S_LOAD_DWORDX4_IMM killed renamable $sgpr4_sgpr5, 0, 0 :: (dereferenceable invariant load (s128) from %ir.arg.kernarg.offset1, addrspace 4)
- ; REGALLOC-GFX90A-NEXT: [[COPY:%[0-9]+]]:areg_128_align2 = COPY killed renamable $sgpr0_sgpr1_sgpr2_sgpr3
+ ; REGALLOC-GFX90A-NEXT: [[COPY2:%[0-9]+]]:areg_128_align2 = COPY killed renamable $sgpr0_sgpr1_sgpr2_sgpr3
; REGALLOC-GFX90A-NEXT: [[AV_MOV_:%[0-9]+]]:vgpr_32 = AV_MOV_B32_IMM_PSEUDO 1, implicit $exec
; REGALLOC-GFX90A-NEXT: [[AV_MOV_1:%[0-9]+]]:vgpr_32 = AV_MOV_B32_IMM_PSEUDO 2, implicit $exec
- ; REGALLOC-GFX90A-NEXT: [[V_MFMA_I32_4X4X4I8_e64_:%[0-9]+]]:areg_128_align2 = V_MFMA_I32_4X4X4I8_e64 [[AV_MOV_]], [[AV_MOV_1]], [[COPY]], 0, 0, 0, implicit $mode, implicit $exec
- ; REGALLOC-GFX90A-NEXT: GLOBAL_STORE_DWORDX2 undef %17:vreg_64_align2, %8, 0, 0, implicit $exec :: (volatile store (s64) into `ptr addrspace(1) poison`, addrspace 1)
+ ; REGALLOC-GFX90A-NEXT: [[V_MFMA_I32_4X4X4I8_e64_:%[0-9]+]]:areg_128_align2 = V_MFMA_I32_4X4X4I8_e64 [[AV_MOV_]], [[AV_MOV_1]], [[COPY2]], 0, 0, 0, implicit $mode, implicit $exec
+ ; REGALLOC-GFX90A-NEXT: GLOBAL_STORE_DWORDX2 undef %17:vreg_64_align2, [[COPY1]], 0, 0, implicit $exec :: (volatile store (s64) into `ptr addrspace(1) poison`, addrspace 1)
; REGALLOC-GFX90A-NEXT: GLOBAL_STORE_DWORDX4 undef %19:vreg_64_align2, [[V_MFMA_I32_4X4X4I8_e64_]], 0, 0, implicit $exec :: (volatile store (s128) into `ptr addrspace(1) poison`, addrspace 1)
; REGALLOC-GFX90A-NEXT: S_ENDPGM 0
;
diff --git a/llvm/test/CodeGen/AMDGPU/ssubo.ll b/llvm/test/CodeGen/AMDGPU/ssubo.ll
index 053038d..382d892 100644
--- a/llvm/test/CodeGen/AMDGPU/ssubo.ll
+++ b/llvm/test/CodeGen/AMDGPU/ssubo.ll
@@ -1,14 +1,116 @@
-; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=tahiti < %s | FileCheck -check-prefixes=GCN,SI,FUNC %s
-; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=tonga < %s | FileCheck -check-prefixes=GCN,VI,FUNC %s
-; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,GFX9,FUNC %s
-
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc < %s -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=tahiti | FileCheck %s --check-prefix=SI
+; RUN: llc < %s -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=tonga | FileCheck %s --check-prefix=VI
+; RUN: llc < %s -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=gfx900 | FileCheck %s --check-prefix=GFX9
+; RUN: llc < %s -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=gfx1010 | FileCheck %s --check-prefix=GFX10
+; RUN: llc < %s -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=gfx1100 | FileCheck %s --check-prefix=GFX11
declare { i32, i1 } @llvm.ssub.with.overflow.i32(i32, i32) nounwind readnone
declare { i64, i1 } @llvm.ssub.with.overflow.i64(i64, i64) nounwind readnone
declare { <2 x i32>, <2 x i1> } @llvm.ssub.with.overflow.v2i32(<2 x i32>, <2 x i32>) nounwind readnone
-; FUNC-LABEL: {{^}}ssubo_i64_zext:
define amdgpu_kernel void @ssubo_i64_zext(ptr addrspace(1) %out, i64 %a, i64 %b) nounwind {
+; SI-LABEL: ssubo_i64_zext:
+; SI: ; %bb.0:
+; SI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
+; SI-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0xd
+; SI-NEXT: s_mov_b32 s7, 0xf000
+; SI-NEXT: s_mov_b32 s6, -1
+; SI-NEXT: s_waitcnt lgkmcnt(0)
+; SI-NEXT: v_mov_b32_e32 v0, s2
+; SI-NEXT: s_sub_u32 s10, s2, s8
+; SI-NEXT: s_subb_u32 s11, s3, s9
+; SI-NEXT: v_mov_b32_e32 v1, s3
+; SI-NEXT: v_cmp_lt_i64_e32 vcc, s[10:11], v[0:1]
+; SI-NEXT: v_cmp_gt_i64_e64 s[2:3], s[8:9], 0
+; SI-NEXT: s_mov_b32 s4, s0
+; SI-NEXT: s_mov_b32 s5, s1
+; SI-NEXT: s_xor_b64 s[0:1], s[2:3], vcc
+; SI-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[0:1]
+; SI-NEXT: v_mov_b32_e32 v1, s11
+; SI-NEXT: v_add_i32_e32 v0, vcc, s10, v0
+; SI-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
+; SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0
+; SI-NEXT: s_endpgm
+;
+; VI-LABEL: ssubo_i64_zext:
+; VI: ; %bb.0:
+; VI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
+; VI-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x34
+; VI-NEXT: s_waitcnt lgkmcnt(0)
+; VI-NEXT: v_mov_b32_e32 v1, s2
+; VI-NEXT: s_sub_u32 s6, s2, s4
+; VI-NEXT: v_mov_b32_e32 v2, s3
+; VI-NEXT: s_subb_u32 s7, s3, s5
+; VI-NEXT: v_cmp_gt_i64_e64 s[8:9], s[4:5], 0
+; VI-NEXT: v_cmp_lt_i64_e32 vcc, s[6:7], v[1:2]
+; VI-NEXT: v_mov_b32_e32 v0, s0
+; VI-NEXT: v_mov_b32_e32 v1, s1
+; VI-NEXT: s_xor_b64 s[0:1], s[8:9], vcc
+; VI-NEXT: v_cndmask_b32_e64 v2, 0, 1, s[0:1]
+; VI-NEXT: v_mov_b32_e32 v3, s7
+; VI-NEXT: v_add_u32_e32 v2, vcc, s6, v2
+; VI-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc
+; VI-NEXT: flat_store_dwordx2 v[0:1], v[2:3]
+; VI-NEXT: s_endpgm
+;
+; GFX9-LABEL: ssubo_i64_zext:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
+; GFX9-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34
+; GFX9-NEXT: v_mov_b32_e32 v2, 0
+; GFX9-NEXT: s_waitcnt lgkmcnt(0)
+; GFX9-NEXT: v_mov_b32_e32 v0, s2
+; GFX9-NEXT: s_sub_u32 s4, s2, s6
+; GFX9-NEXT: v_mov_b32_e32 v1, s3
+; GFX9-NEXT: s_subb_u32 s5, s3, s7
+; GFX9-NEXT: v_cmp_gt_i64_e64 s[8:9], s[6:7], 0
+; GFX9-NEXT: v_cmp_lt_i64_e32 vcc, s[4:5], v[0:1]
+; GFX9-NEXT: v_mov_b32_e32 v1, s5
+; GFX9-NEXT: s_xor_b64 s[2:3], s[8:9], vcc
+; GFX9-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[2:3]
+; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, s4, v0
+; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX9-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1]
+; GFX9-NEXT: s_endpgm
+;
+; GFX10-LABEL: ssubo_i64_zext:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: s_clause 0x1
+; GFX10-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
+; GFX10-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34
+; GFX10-NEXT: v_mov_b32_e32 v2, 0
+; GFX10-NEXT: s_waitcnt lgkmcnt(0)
+; GFX10-NEXT: s_sub_u32 s4, s2, s6
+; GFX10-NEXT: s_subb_u32 s5, s3, s7
+; GFX10-NEXT: v_cmp_gt_i64_e64 s6, s[6:7], 0
+; GFX10-NEXT: v_cmp_lt_i64_e64 s2, s[4:5], s[2:3]
+; GFX10-NEXT: s_xor_b32 s2, s6, s2
+; GFX10-NEXT: v_cndmask_b32_e64 v0, 0, 1, s2
+; GFX10-NEXT: v_add_co_u32 v0, s2, s4, v0
+; GFX10-NEXT: v_add_co_ci_u32_e64 v1, s2, s5, 0, s2
+; GFX10-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1]
+; GFX10-NEXT: s_endpgm
+;
+; GFX11-LABEL: ssubo_i64_zext:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_clause 0x1
+; GFX11-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
+; GFX11-NEXT: s_load_b64 s[4:5], s[4:5], 0x34
+; GFX11-NEXT: v_mov_b32_e32 v2, 0
+; GFX11-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-NEXT: s_sub_u32 s6, s2, s4
+; GFX11-NEXT: s_subb_u32 s7, s3, s5
+; GFX11-NEXT: v_cmp_gt_i64_e64 s4, s[4:5], 0
+; GFX11-NEXT: v_cmp_lt_i64_e64 s2, s[6:7], s[2:3]
+; GFX11-NEXT: s_xor_b32 s2, s4, s2
+; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-NEXT: v_cndmask_b32_e64 v0, 0, 1, s2
+; GFX11-NEXT: v_add_co_u32 v0, s2, s6, v0
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-NEXT: v_add_co_ci_u32_e64 v1, null, s7, 0, s2
+; GFX11-NEXT: global_store_b64 v2, v[0:1], s[0:1]
+; GFX11-NEXT: s_endpgm
%ssub = call { i64, i1 } @llvm.ssub.with.overflow.i64(i64 %a, i64 %b) nounwind
%val = extractvalue { i64, i1 } %ssub, 0
%carry = extractvalue { i64, i1 } %ssub, 1
@@ -18,8 +120,102 @@ define amdgpu_kernel void @ssubo_i64_zext(ptr addrspace(1) %out, i64 %a, i64 %b)
ret void
}
-; FUNC-LABEL: {{^}}s_ssubo_i32:
define amdgpu_kernel void @s_ssubo_i32(ptr addrspace(1) %out, ptr addrspace(1) %carryout, i32 %a, i32 %b) nounwind {
+; SI-LABEL: s_ssubo_i32:
+; SI: ; %bb.0:
+; SI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
+; SI-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0xd
+; SI-NEXT: s_mov_b32 s7, 0xf000
+; SI-NEXT: s_mov_b32 s6, -1
+; SI-NEXT: s_waitcnt lgkmcnt(0)
+; SI-NEXT: s_mov_b32 s4, s0
+; SI-NEXT: s_sub_i32 s12, s8, s9
+; SI-NEXT: s_cmp_gt_i32 s9, 0
+; SI-NEXT: s_cselect_b64 s[10:11], -1, 0
+; SI-NEXT: s_cmp_lt_i32 s12, s8
+; SI-NEXT: s_mov_b32 s5, s1
+; SI-NEXT: s_cselect_b64 s[8:9], -1, 0
+; SI-NEXT: v_mov_b32_e32 v0, s12
+; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0
+; SI-NEXT: s_xor_b64 s[4:5], s[10:11], s[8:9]
+; SI-NEXT: s_mov_b32 s0, s2
+; SI-NEXT: s_mov_b32 s1, s3
+; SI-NEXT: s_mov_b32 s2, s6
+; SI-NEXT: s_mov_b32 s3, s7
+; SI-NEXT: s_waitcnt expcnt(0)
+; SI-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5]
+; SI-NEXT: buffer_store_byte v0, off, s[0:3], 0
+; SI-NEXT: s_endpgm
+;
+; VI-LABEL: s_ssubo_i32:
+; VI: ; %bb.0:
+; VI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
+; VI-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x34
+; VI-NEXT: s_waitcnt lgkmcnt(0)
+; VI-NEXT: v_mov_b32_e32 v0, s0
+; VI-NEXT: s_sub_i32 s6, s4, s5
+; VI-NEXT: s_cmp_gt_i32 s5, 0
+; VI-NEXT: v_mov_b32_e32 v1, s1
+; VI-NEXT: s_cselect_b64 s[0:1], -1, 0
+; VI-NEXT: s_cmp_lt_i32 s6, s4
+; VI-NEXT: v_mov_b32_e32 v2, s2
+; VI-NEXT: v_mov_b32_e32 v3, s3
+; VI-NEXT: s_cselect_b64 s[2:3], -1, 0
+; VI-NEXT: v_mov_b32_e32 v4, s6
+; VI-NEXT: s_xor_b64 s[0:1], s[0:1], s[2:3]
+; VI-NEXT: flat_store_dword v[0:1], v4
+; VI-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[0:1]
+; VI-NEXT: flat_store_byte v[2:3], v0
+; VI-NEXT: s_endpgm
+;
+; GFX9-LABEL: s_ssubo_i32:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34
+; GFX9-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
+; GFX9-NEXT: v_mov_b32_e32 v0, 0
+; GFX9-NEXT: s_waitcnt lgkmcnt(0)
+; GFX9-NEXT: v_mov_b32_e32 v1, s7
+; GFX9-NEXT: s_sub_i32 s4, s6, s7
+; GFX9-NEXT: v_sub_i32 v1, s6, v1 clamp
+; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, s4, v1
+; GFX9-NEXT: v_mov_b32_e32 v2, s4
+; GFX9-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
+; GFX9-NEXT: global_store_dword v0, v2, s[0:1]
+; GFX9-NEXT: global_store_byte v0, v1, s[2:3]
+; GFX9-NEXT: s_endpgm
+;
+; GFX10-LABEL: s_ssubo_i32:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: s_clause 0x1
+; GFX10-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34
+; GFX10-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
+; GFX10-NEXT: v_mov_b32_e32 v1, 0
+; GFX10-NEXT: s_waitcnt lgkmcnt(0)
+; GFX10-NEXT: v_sub_nc_i32 v0, s6, s7 clamp
+; GFX10-NEXT: s_sub_i32 s4, s6, s7
+; GFX10-NEXT: v_mov_b32_e32 v2, s4
+; GFX10-NEXT: v_cmp_ne_u32_e32 vcc_lo, s4, v0
+; GFX10-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
+; GFX10-NEXT: global_store_dword v1, v2, s[0:1]
+; GFX10-NEXT: global_store_byte v1, v0, s[2:3]
+; GFX10-NEXT: s_endpgm
+;
+; GFX11-LABEL: s_ssubo_i32:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_clause 0x1
+; GFX11-NEXT: s_load_b64 s[6:7], s[4:5], 0x34
+; GFX11-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
+; GFX11-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-NEXT: v_sub_nc_i32 v0, s6, s7 clamp
+; GFX11-NEXT: s_sub_i32 s4, s6, s7
+; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_2)
+; GFX11-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v2, s4
+; GFX11-NEXT: v_cmp_ne_u32_e32 vcc_lo, s4, v0
+; GFX11-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
+; GFX11-NEXT: s_clause 0x1
+; GFX11-NEXT: global_store_b32 v1, v2, s[0:1]
+; GFX11-NEXT: global_store_b8 v1, v0, s[2:3]
+; GFX11-NEXT: s_endpgm
%ssub = call { i32, i1 } @llvm.ssub.with.overflow.i32(i32 %a, i32 %b) nounwind
%val = extractvalue { i32, i1 } %ssub, 0
%carry = extractvalue { i32, i1 } %ssub, 1
@@ -28,8 +224,112 @@ define amdgpu_kernel void @s_ssubo_i32(ptr addrspace(1) %out, ptr addrspace(1) %
ret void
}
-; FUNC-LABEL: {{^}}v_ssubo_i32:
define amdgpu_kernel void @v_ssubo_i32(ptr addrspace(1) %out, ptr addrspace(1) %carryout, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr) nounwind {
+; SI-LABEL: v_ssubo_i32:
+; SI: ; %bb.0:
+; SI-NEXT: s_load_dwordx8 s[0:7], s[4:5], 0x9
+; SI-NEXT: s_mov_b32 s11, 0xf000
+; SI-NEXT: s_mov_b32 s10, -1
+; SI-NEXT: s_mov_b32 s14, s10
+; SI-NEXT: s_mov_b32 s15, s11
+; SI-NEXT: s_waitcnt lgkmcnt(0)
+; SI-NEXT: s_mov_b32 s12, s4
+; SI-NEXT: s_mov_b32 s13, s5
+; SI-NEXT: s_mov_b32 s4, s6
+; SI-NEXT: s_mov_b32 s5, s7
+; SI-NEXT: s_mov_b32 s6, s10
+; SI-NEXT: s_mov_b32 s7, s11
+; SI-NEXT: buffer_load_dword v0, off, s[12:15], 0
+; SI-NEXT: buffer_load_dword v1, off, s[4:7], 0
+; SI-NEXT: s_mov_b32 s8, s0
+; SI-NEXT: s_mov_b32 s9, s1
+; SI-NEXT: s_mov_b32 s4, s2
+; SI-NEXT: s_mov_b32 s5, s3
+; SI-NEXT: s_waitcnt vmcnt(0)
+; SI-NEXT: v_sub_i32_e32 v2, vcc, v0, v1
+; SI-NEXT: v_cmp_lt_i32_e32 vcc, 0, v1
+; SI-NEXT: v_cmp_lt_i32_e64 s[0:1], v2, v0
+; SI-NEXT: s_xor_b64 s[0:1], vcc, s[0:1]
+; SI-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[0:1]
+; SI-NEXT: buffer_store_dword v2, off, s[8:11], 0
+; SI-NEXT: buffer_store_byte v0, off, s[4:7], 0
+; SI-NEXT: s_endpgm
+;
+; VI-LABEL: v_ssubo_i32:
+; VI: ; %bb.0:
+; VI-NEXT: s_load_dwordx8 s[0:7], s[4:5], 0x24
+; VI-NEXT: s_waitcnt lgkmcnt(0)
+; VI-NEXT: v_mov_b32_e32 v0, s4
+; VI-NEXT: v_mov_b32_e32 v1, s5
+; VI-NEXT: v_mov_b32_e32 v2, s6
+; VI-NEXT: v_mov_b32_e32 v3, s7
+; VI-NEXT: flat_load_dword v4, v[0:1]
+; VI-NEXT: flat_load_dword v5, v[2:3]
+; VI-NEXT: v_mov_b32_e32 v0, s0
+; VI-NEXT: v_mov_b32_e32 v1, s1
+; VI-NEXT: v_mov_b32_e32 v2, s2
+; VI-NEXT: v_mov_b32_e32 v3, s3
+; VI-NEXT: s_waitcnt vmcnt(0)
+; VI-NEXT: v_sub_u32_e32 v6, vcc, v4, v5
+; VI-NEXT: v_cmp_lt_i32_e32 vcc, 0, v5
+; VI-NEXT: v_cmp_lt_i32_e64 s[0:1], v6, v4
+; VI-NEXT: s_xor_b64 s[0:1], vcc, s[0:1]
+; VI-NEXT: flat_store_dword v[0:1], v6
+; VI-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[0:1]
+; VI-NEXT: flat_store_byte v[2:3], v0
+; VI-NEXT: s_endpgm
+;
+; GFX9-LABEL: v_ssubo_i32:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0x24
+; GFX9-NEXT: v_mov_b32_e32 v0, 0
+; GFX9-NEXT: s_waitcnt lgkmcnt(0)
+; GFX9-NEXT: global_load_dword v1, v0, s[12:13]
+; GFX9-NEXT: global_load_dword v2, v0, s[14:15]
+; GFX9-NEXT: s_waitcnt vmcnt(0)
+; GFX9-NEXT: v_sub_i32 v3, v1, v2 clamp
+; GFX9-NEXT: v_sub_u32_e32 v1, v1, v2
+; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, v1, v3
+; GFX9-NEXT: global_store_dword v0, v1, s[8:9]
+; GFX9-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
+; GFX9-NEXT: global_store_byte v0, v1, s[10:11]
+; GFX9-NEXT: s_endpgm
+;
+; GFX10-LABEL: v_ssubo_i32:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0x24
+; GFX10-NEXT: v_mov_b32_e32 v0, 0
+; GFX10-NEXT: s_waitcnt lgkmcnt(0)
+; GFX10-NEXT: s_clause 0x1
+; GFX10-NEXT: global_load_dword v1, v0, s[12:13]
+; GFX10-NEXT: global_load_dword v2, v0, s[14:15]
+; GFX10-NEXT: s_waitcnt vmcnt(0)
+; GFX10-NEXT: v_sub_nc_i32 v3, v1, v2 clamp
+; GFX10-NEXT: v_sub_nc_u32_e32 v1, v1, v2
+; GFX10-NEXT: v_cmp_ne_u32_e32 vcc_lo, v1, v3
+; GFX10-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc_lo
+; GFX10-NEXT: global_store_dword v0, v1, s[8:9]
+; GFX10-NEXT: global_store_byte v0, v2, s[10:11]
+; GFX10-NEXT: s_endpgm
+;
+; GFX11-LABEL: v_ssubo_i32:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_load_b256 s[0:7], s[4:5], 0x24
+; GFX11-NEXT: v_mov_b32_e32 v0, 0
+; GFX11-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-NEXT: s_clause 0x1
+; GFX11-NEXT: global_load_b32 v1, v0, s[4:5]
+; GFX11-NEXT: global_load_b32 v2, v0, s[6:7]
+; GFX11-NEXT: s_waitcnt vmcnt(0)
+; GFX11-NEXT: v_sub_nc_i32 v3, v1, v2 clamp
+; GFX11-NEXT: v_sub_nc_u32_e32 v1, v1, v2
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-NEXT: v_cmp_ne_u32_e32 vcc_lo, v1, v3
+; GFX11-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc_lo
+; GFX11-NEXT: s_clause 0x1
+; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-NEXT: global_store_b8 v0, v2, s[2:3]
+; GFX11-NEXT: s_endpgm
%a = load i32, ptr addrspace(1) %aptr, align 4
%b = load i32, ptr addrspace(1) %bptr, align 4
%ssub = call { i32, i1 } @llvm.ssub.with.overflow.i32(i32 %a, i32 %b) nounwind
@@ -40,10 +340,109 @@ define amdgpu_kernel void @v_ssubo_i32(ptr addrspace(1) %out, ptr addrspace(1) %
ret void
}
-; FUNC-LABEL: {{^}}s_ssubo_i64:
-; GCN: s_sub_u32
-; GCN: s_subb_u32
define amdgpu_kernel void @s_ssubo_i64(ptr addrspace(1) %out, ptr addrspace(1) %carryout, i64 %a, i64 %b) nounwind {
+; SI-LABEL: s_ssubo_i64:
+; SI: ; %bb.0:
+; SI-NEXT: s_load_dwordx8 s[0:7], s[4:5], 0x9
+; SI-NEXT: s_mov_b32 s11, 0xf000
+; SI-NEXT: s_mov_b32 s10, -1
+; SI-NEXT: s_waitcnt lgkmcnt(0)
+; SI-NEXT: s_sub_u32 s12, s4, s6
+; SI-NEXT: v_mov_b32_e32 v0, s4
+; SI-NEXT: s_subb_u32 s13, s5, s7
+; SI-NEXT: v_mov_b32_e32 v1, s5
+; SI-NEXT: v_cmp_lt_i64_e32 vcc, s[12:13], v[0:1]
+; SI-NEXT: v_cmp_gt_i64_e64 s[4:5], s[6:7], 0
+; SI-NEXT: v_mov_b32_e32 v0, s12
+; SI-NEXT: s_mov_b32 s8, s0
+; SI-NEXT: s_mov_b32 s9, s1
+; SI-NEXT: v_mov_b32_e32 v1, s13
+; SI-NEXT: s_xor_b64 s[4:5], s[4:5], vcc
+; SI-NEXT: s_mov_b32 s0, s2
+; SI-NEXT: s_mov_b32 s1, s3
+; SI-NEXT: s_mov_b32 s2, s10
+; SI-NEXT: s_mov_b32 s3, s11
+; SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[8:11], 0
+; SI-NEXT: s_waitcnt expcnt(0)
+; SI-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5]
+; SI-NEXT: buffer_store_byte v0, off, s[0:3], 0
+; SI-NEXT: s_endpgm
+;
+; VI-LABEL: s_ssubo_i64:
+; VI: ; %bb.0:
+; VI-NEXT: s_load_dwordx8 s[0:7], s[4:5], 0x24
+; VI-NEXT: s_waitcnt lgkmcnt(0)
+; VI-NEXT: v_mov_b32_e32 v0, s0
+; VI-NEXT: s_sub_u32 s0, s4, s6
+; VI-NEXT: v_mov_b32_e32 v4, s4
+; VI-NEXT: v_mov_b32_e32 v1, s1
+; VI-NEXT: s_subb_u32 s1, s5, s7
+; VI-NEXT: v_mov_b32_e32 v5, s5
+; VI-NEXT: v_mov_b32_e32 v2, s2
+; VI-NEXT: v_mov_b32_e32 v3, s3
+; VI-NEXT: v_cmp_lt_i64_e32 vcc, s[0:1], v[4:5]
+; VI-NEXT: v_cmp_gt_i64_e64 s[2:3], s[6:7], 0
+; VI-NEXT: v_mov_b32_e32 v5, s1
+; VI-NEXT: v_mov_b32_e32 v4, s0
+; VI-NEXT: s_xor_b64 s[0:1], s[2:3], vcc
+; VI-NEXT: flat_store_dwordx2 v[0:1], v[4:5]
+; VI-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[0:1]
+; VI-NEXT: flat_store_byte v[2:3], v0
+; VI-NEXT: s_endpgm
+;
+; GFX9-LABEL: s_ssubo_i64:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0x24
+; GFX9-NEXT: v_mov_b32_e32 v2, 0
+; GFX9-NEXT: s_waitcnt lgkmcnt(0)
+; GFX9-NEXT: s_sub_u32 s0, s12, s14
+; GFX9-NEXT: v_mov_b32_e32 v0, s12
+; GFX9-NEXT: v_mov_b32_e32 v1, s13
+; GFX9-NEXT: s_subb_u32 s1, s13, s15
+; GFX9-NEXT: v_cmp_gt_i64_e64 s[2:3], s[14:15], 0
+; GFX9-NEXT: v_cmp_lt_i64_e32 vcc, s[0:1], v[0:1]
+; GFX9-NEXT: v_mov_b32_e32 v0, s0
+; GFX9-NEXT: v_mov_b32_e32 v1, s1
+; GFX9-NEXT: s_xor_b64 s[0:1], s[2:3], vcc
+; GFX9-NEXT: global_store_dwordx2 v2, v[0:1], s[8:9]
+; GFX9-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[0:1]
+; GFX9-NEXT: global_store_byte v2, v0, s[10:11]
+; GFX9-NEXT: s_endpgm
+;
+; GFX10-LABEL: s_ssubo_i64:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0x24
+; GFX10-NEXT: v_mov_b32_e32 v2, 0
+; GFX10-NEXT: s_waitcnt lgkmcnt(0)
+; GFX10-NEXT: s_sub_u32 s0, s12, s14
+; GFX10-NEXT: s_subb_u32 s1, s13, s15
+; GFX10-NEXT: v_cmp_gt_i64_e64 s2, s[14:15], 0
+; GFX10-NEXT: v_cmp_lt_i64_e64 s3, s[0:1], s[12:13]
+; GFX10-NEXT: v_mov_b32_e32 v0, s0
+; GFX10-NEXT: v_mov_b32_e32 v1, s1
+; GFX10-NEXT: s_xor_b32 s0, s2, s3
+; GFX10-NEXT: v_cndmask_b32_e64 v3, 0, 1, s0
+; GFX10-NEXT: global_store_dwordx2 v2, v[0:1], s[8:9]
+; GFX10-NEXT: global_store_byte v2, v3, s[10:11]
+; GFX10-NEXT: s_endpgm
+;
+; GFX11-LABEL: s_ssubo_i64:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_load_b256 s[0:7], s[4:5], 0x24
+; GFX11-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-NEXT: s_sub_u32 s8, s4, s6
+; GFX11-NEXT: s_subb_u32 s9, s5, s7
+; GFX11-NEXT: v_cmp_gt_i64_e64 s6, s[6:7], 0
+; GFX11-NEXT: v_cmp_lt_i64_e64 s4, s[8:9], s[4:5]
+; GFX11-NEXT: v_mov_b32_e32 v0, s8
+; GFX11-NEXT: v_dual_mov_b32 v2, 0 :: v_dual_mov_b32 v1, s9
+; GFX11-NEXT: s_xor_b32 s4, s6, s4
+; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
+; GFX11-NEXT: v_cndmask_b32_e64 v3, 0, 1, s4
+; GFX11-NEXT: s_clause 0x1
+; GFX11-NEXT: global_store_b64 v2, v[0:1], s[0:1]
+; GFX11-NEXT: global_store_b8 v2, v3, s[2:3]
+; GFX11-NEXT: s_endpgm
%ssub = call { i64, i1 } @llvm.ssub.with.overflow.i64(i64 %a, i64 %b) nounwind
%val = extractvalue { i64, i1 } %ssub, 0
%carry = extractvalue { i64, i1 } %ssub, 1
@@ -52,16 +451,121 @@ define amdgpu_kernel void @s_ssubo_i64(ptr addrspace(1) %out, ptr addrspace(1) %
ret void
}
-; FUNC-LABEL: {{^}}v_ssubo_i64:
-; SI: v_sub_i32_e32 v{{[0-9]+}}, vcc,
-; SI: v_subb_u32_e32 v{{[0-9]+}}, vcc,
-
-; VI: v_sub_u32_e32 v{{[0-9]+}}, vcc,
-; VI: v_subb_u32_e32 v{{[0-9]+}}, vcc,
-
-; GFX9: v_sub_co_u32_e32 v{{[0-9]+}}, vcc,
-; GFX9: v_subb_co_u32_e32 v{{[0-9]+}}, vcc,
define amdgpu_kernel void @v_ssubo_i64(ptr addrspace(1) %out, ptr addrspace(1) %carryout, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr) nounwind {
+; SI-LABEL: v_ssubo_i64:
+; SI: ; %bb.0:
+; SI-NEXT: s_load_dwordx8 s[0:7], s[4:5], 0x9
+; SI-NEXT: s_mov_b32 s11, 0xf000
+; SI-NEXT: s_mov_b32 s10, -1
+; SI-NEXT: s_mov_b32 s14, s10
+; SI-NEXT: s_mov_b32 s15, s11
+; SI-NEXT: s_waitcnt lgkmcnt(0)
+; SI-NEXT: s_mov_b32 s12, s4
+; SI-NEXT: s_mov_b32 s13, s5
+; SI-NEXT: s_mov_b32 s4, s6
+; SI-NEXT: s_mov_b32 s5, s7
+; SI-NEXT: s_mov_b32 s6, s10
+; SI-NEXT: s_mov_b32 s7, s11
+; SI-NEXT: buffer_load_dwordx2 v[0:1], off, s[12:15], 0
+; SI-NEXT: buffer_load_dwordx2 v[2:3], off, s[4:7], 0
+; SI-NEXT: s_mov_b32 s8, s0
+; SI-NEXT: s_mov_b32 s9, s1
+; SI-NEXT: s_mov_b32 s4, s2
+; SI-NEXT: s_mov_b32 s5, s3
+; SI-NEXT: s_waitcnt vmcnt(0)
+; SI-NEXT: v_sub_i32_e32 v4, vcc, v0, v2
+; SI-NEXT: v_subb_u32_e32 v5, vcc, v1, v3, vcc
+; SI-NEXT: v_cmp_lt_i64_e32 vcc, 0, v[2:3]
+; SI-NEXT: v_cmp_lt_i64_e64 s[0:1], v[4:5], v[0:1]
+; SI-NEXT: buffer_store_dwordx2 v[4:5], off, s[8:11], 0
+; SI-NEXT: s_xor_b64 s[0:1], vcc, s[0:1]
+; SI-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[0:1]
+; SI-NEXT: buffer_store_byte v0, off, s[4:7], 0
+; SI-NEXT: s_endpgm
+;
+; VI-LABEL: v_ssubo_i64:
+; VI: ; %bb.0:
+; VI-NEXT: s_load_dwordx8 s[0:7], s[4:5], 0x24
+; VI-NEXT: s_waitcnt lgkmcnt(0)
+; VI-NEXT: v_mov_b32_e32 v0, s4
+; VI-NEXT: v_mov_b32_e32 v1, s5
+; VI-NEXT: v_mov_b32_e32 v2, s6
+; VI-NEXT: v_mov_b32_e32 v3, s7
+; VI-NEXT: flat_load_dwordx2 v[0:1], v[0:1]
+; VI-NEXT: flat_load_dwordx2 v[2:3], v[2:3]
+; VI-NEXT: v_mov_b32_e32 v4, s0
+; VI-NEXT: v_mov_b32_e32 v5, s1
+; VI-NEXT: v_mov_b32_e32 v6, s2
+; VI-NEXT: v_mov_b32_e32 v7, s3
+; VI-NEXT: s_waitcnt vmcnt(0)
+; VI-NEXT: v_sub_u32_e32 v8, vcc, v0, v2
+; VI-NEXT: v_subb_u32_e32 v9, vcc, v1, v3, vcc
+; VI-NEXT: v_cmp_lt_i64_e32 vcc, 0, v[2:3]
+; VI-NEXT: v_cmp_lt_i64_e64 s[0:1], v[8:9], v[0:1]
+; VI-NEXT: flat_store_dwordx2 v[4:5], v[8:9]
+; VI-NEXT: s_xor_b64 s[0:1], vcc, s[0:1]
+; VI-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[0:1]
+; VI-NEXT: flat_store_byte v[6:7], v0
+; VI-NEXT: s_endpgm
+;
+; GFX9-LABEL: v_ssubo_i64:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0x24
+; GFX9-NEXT: v_mov_b32_e32 v6, 0
+; GFX9-NEXT: s_waitcnt lgkmcnt(0)
+; GFX9-NEXT: global_load_dwordx2 v[0:1], v6, s[12:13]
+; GFX9-NEXT: global_load_dwordx2 v[2:3], v6, s[14:15]
+; GFX9-NEXT: s_waitcnt vmcnt(0)
+; GFX9-NEXT: v_sub_co_u32_e32 v4, vcc, v0, v2
+; GFX9-NEXT: v_subb_co_u32_e32 v5, vcc, v1, v3, vcc
+; GFX9-NEXT: v_cmp_lt_i64_e32 vcc, 0, v[2:3]
+; GFX9-NEXT: v_cmp_lt_i64_e64 s[0:1], v[4:5], v[0:1]
+; GFX9-NEXT: global_store_dwordx2 v6, v[4:5], s[8:9]
+; GFX9-NEXT: s_xor_b64 s[0:1], vcc, s[0:1]
+; GFX9-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[0:1]
+; GFX9-NEXT: global_store_byte v6, v0, s[10:11]
+; GFX9-NEXT: s_endpgm
+;
+; GFX10-LABEL: v_ssubo_i64:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0x24
+; GFX10-NEXT: v_mov_b32_e32 v6, 0
+; GFX10-NEXT: s_waitcnt lgkmcnt(0)
+; GFX10-NEXT: s_clause 0x1
+; GFX10-NEXT: global_load_dwordx2 v[0:1], v6, s[12:13]
+; GFX10-NEXT: global_load_dwordx2 v[2:3], v6, s[14:15]
+; GFX10-NEXT: s_waitcnt vmcnt(0)
+; GFX10-NEXT: v_sub_co_u32 v4, vcc_lo, v0, v2
+; GFX10-NEXT: v_sub_co_ci_u32_e32 v5, vcc_lo, v1, v3, vcc_lo
+; GFX10-NEXT: v_cmp_lt_i64_e32 vcc_lo, 0, v[2:3]
+; GFX10-NEXT: v_cmp_lt_i64_e64 s0, v[4:5], v[0:1]
+; GFX10-NEXT: s_xor_b32 s0, vcc_lo, s0
+; GFX10-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0
+; GFX10-NEXT: global_store_dwordx2 v6, v[4:5], s[8:9]
+; GFX10-NEXT: global_store_byte v6, v0, s[10:11]
+; GFX10-NEXT: s_endpgm
+;
+; GFX11-LABEL: v_ssubo_i64:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_load_b256 s[4:11], s[4:5], 0x24
+; GFX11-NEXT: v_mov_b32_e32 v6, 0
+; GFX11-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-NEXT: s_clause 0x1
+; GFX11-NEXT: global_load_b64 v[0:1], v6, s[8:9]
+; GFX11-NEXT: global_load_b64 v[2:3], v6, s[10:11]
+; GFX11-NEXT: s_waitcnt vmcnt(0)
+; GFX11-NEXT: v_sub_co_u32 v4, vcc_lo, v0, v2
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
+; GFX11-NEXT: v_sub_co_ci_u32_e64 v5, null, v1, v3, vcc_lo
+; GFX11-NEXT: v_cmp_lt_i64_e32 vcc_lo, 0, v[2:3]
+; GFX11-NEXT: v_cmp_lt_i64_e64 s0, v[4:5], v[0:1]
+; GFX11-NEXT: s_xor_b32 s0, vcc_lo, s0
+; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
+; GFX11-NEXT: v_cndmask_b32_e64 v0, 0, 1, s0
+; GFX11-NEXT: s_clause 0x1
+; GFX11-NEXT: global_store_b64 v6, v[4:5], s[4:5]
+; GFX11-NEXT: global_store_b8 v6, v0, s[6:7]
+; GFX11-NEXT: s_endpgm
%a = load i64, ptr addrspace(1) %aptr, align 4
%b = load i64, ptr addrspace(1) %bptr, align 4
%ssub = call { i64, i1 } @llvm.ssub.with.overflow.i64(i64 %a, i64 %b) nounwind
@@ -72,14 +576,134 @@ define amdgpu_kernel void @v_ssubo_i64(ptr addrspace(1) %out, ptr addrspace(1) %
ret void
}
-; FUNC-LABEL: {{^}}v_ssubo_v2i32:
-; SICIVI: v_cmp_lt_i32
-; SICIVI: v_cmp_lt_i32
-; SICIVI: v_sub_{{[iu]}}32
-; SICIVI: v_cmp_lt_i32
-; SICIVI: v_cmp_lt_i32
-; SICIVI: v_sub_{{[iu]}}32
define amdgpu_kernel void @v_ssubo_v2i32(ptr addrspace(1) %out, ptr addrspace(1) %carryout, ptr addrspace(1) %aptr, ptr addrspace(1) %bptr) nounwind {
+; SI-LABEL: v_ssubo_v2i32:
+; SI: ; %bb.0:
+; SI-NEXT: s_load_dwordx8 s[0:7], s[4:5], 0x9
+; SI-NEXT: s_mov_b32 s11, 0xf000
+; SI-NEXT: s_mov_b32 s10, -1
+; SI-NEXT: s_mov_b32 s14, s10
+; SI-NEXT: s_mov_b32 s15, s11
+; SI-NEXT: s_waitcnt lgkmcnt(0)
+; SI-NEXT: s_mov_b32 s12, s4
+; SI-NEXT: s_mov_b32 s13, s5
+; SI-NEXT: s_mov_b32 s4, s6
+; SI-NEXT: s_mov_b32 s5, s7
+; SI-NEXT: s_mov_b32 s6, s10
+; SI-NEXT: s_mov_b32 s7, s11
+; SI-NEXT: buffer_load_dwordx2 v[0:1], off, s[12:15], 0
+; SI-NEXT: buffer_load_dwordx2 v[2:3], off, s[4:7], 0
+; SI-NEXT: s_mov_b32 s8, s0
+; SI-NEXT: s_mov_b32 s9, s1
+; SI-NEXT: s_mov_b32 s12, s2
+; SI-NEXT: s_mov_b32 s13, s3
+; SI-NEXT: s_waitcnt vmcnt(0)
+; SI-NEXT: v_sub_i32_e32 v5, vcc, v1, v3
+; SI-NEXT: v_sub_i32_e32 v4, vcc, v0, v2
+; SI-NEXT: v_cmp_lt_i32_e64 s[0:1], 0, v3
+; SI-NEXT: v_cmp_lt_i32_e64 s[4:5], v5, v1
+; SI-NEXT: v_cmp_lt_i32_e32 vcc, 0, v2
+; SI-NEXT: v_cmp_lt_i32_e64 s[2:3], v4, v0
+; SI-NEXT: s_xor_b64 s[0:1], s[0:1], s[4:5]
+; SI-NEXT: v_cndmask_b32_e64 v1, 0, 1, s[0:1]
+; SI-NEXT: s_xor_b64 s[0:1], vcc, s[2:3]
+; SI-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[0:1]
+; SI-NEXT: buffer_store_dwordx2 v[4:5], off, s[8:11], 0
+; SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[12:15], 0
+; SI-NEXT: s_endpgm
+;
+; VI-LABEL: v_ssubo_v2i32:
+; VI: ; %bb.0:
+; VI-NEXT: s_load_dwordx8 s[0:7], s[4:5], 0x24
+; VI-NEXT: s_waitcnt lgkmcnt(0)
+; VI-NEXT: v_mov_b32_e32 v0, s4
+; VI-NEXT: v_mov_b32_e32 v1, s5
+; VI-NEXT: v_mov_b32_e32 v2, s6
+; VI-NEXT: v_mov_b32_e32 v3, s7
+; VI-NEXT: flat_load_dwordx2 v[0:1], v[0:1]
+; VI-NEXT: flat_load_dwordx2 v[2:3], v[2:3]
+; VI-NEXT: v_mov_b32_e32 v4, s0
+; VI-NEXT: v_mov_b32_e32 v5, s1
+; VI-NEXT: v_mov_b32_e32 v6, s2
+; VI-NEXT: v_mov_b32_e32 v7, s3
+; VI-NEXT: s_waitcnt vmcnt(0)
+; VI-NEXT: v_sub_u32_e32 v9, vcc, v1, v3
+; VI-NEXT: v_sub_u32_e32 v8, vcc, v0, v2
+; VI-NEXT: v_cmp_lt_i32_e64 s[0:1], 0, v3
+; VI-NEXT: v_cmp_lt_i32_e64 s[4:5], v9, v1
+; VI-NEXT: v_cmp_lt_i32_e32 vcc, 0, v2
+; VI-NEXT: v_cmp_lt_i32_e64 s[2:3], v8, v0
+; VI-NEXT: s_xor_b64 s[0:1], s[0:1], s[4:5]
+; VI-NEXT: v_cndmask_b32_e64 v1, 0, 1, s[0:1]
+; VI-NEXT: s_xor_b64 s[0:1], vcc, s[2:3]
+; VI-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[0:1]
+; VI-NEXT: flat_store_dwordx2 v[4:5], v[8:9]
+; VI-NEXT: flat_store_dwordx2 v[6:7], v[0:1]
+; VI-NEXT: s_endpgm
+;
+; GFX9-LABEL: v_ssubo_v2i32:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0x24
+; GFX9-NEXT: v_mov_b32_e32 v6, 0
+; GFX9-NEXT: s_waitcnt lgkmcnt(0)
+; GFX9-NEXT: global_load_dwordx2 v[0:1], v6, s[12:13]
+; GFX9-NEXT: global_load_dwordx2 v[2:3], v6, s[14:15]
+; GFX9-NEXT: s_waitcnt vmcnt(0)
+; GFX9-NEXT: v_sub_u32_e32 v5, v1, v3
+; GFX9-NEXT: v_sub_i32 v1, v1, v3 clamp
+; GFX9-NEXT: v_sub_u32_e32 v4, v0, v2
+; GFX9-NEXT: v_sub_i32 v0, v0, v2 clamp
+; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, v5, v1
+; GFX9-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
+; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, v4, v0
+; GFX9-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
+; GFX9-NEXT: global_store_dwordx2 v6, v[4:5], s[8:9]
+; GFX9-NEXT: global_store_dwordx2 v6, v[0:1], s[10:11]
+; GFX9-NEXT: s_endpgm
+;
+; GFX10-LABEL: v_ssubo_v2i32:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0x24
+; GFX10-NEXT: v_mov_b32_e32 v5, 0
+; GFX10-NEXT: s_waitcnt lgkmcnt(0)
+; GFX10-NEXT: s_clause 0x1
+; GFX10-NEXT: global_load_dwordx2 v[0:1], v5, s[12:13]
+; GFX10-NEXT: global_load_dwordx2 v[2:3], v5, s[14:15]
+; GFX10-NEXT: s_waitcnt vmcnt(0)
+; GFX10-NEXT: v_sub_nc_u32_e32 v4, v1, v3
+; GFX10-NEXT: v_sub_nc_i32 v1, v1, v3 clamp
+; GFX10-NEXT: v_sub_nc_u32_e32 v3, v0, v2
+; GFX10-NEXT: v_sub_nc_i32 v0, v0, v2 clamp
+; GFX10-NEXT: v_cmp_ne_u32_e32 vcc_lo, v4, v1
+; GFX10-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc_lo
+; GFX10-NEXT: v_cmp_ne_u32_e32 vcc_lo, v3, v0
+; GFX10-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
+; GFX10-NEXT: global_store_dwordx2 v5, v[3:4], s[8:9]
+; GFX10-NEXT: global_store_dwordx2 v5, v[0:1], s[10:11]
+; GFX10-NEXT: s_endpgm
+;
+; GFX11-LABEL: v_ssubo_v2i32:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_load_b256 s[0:7], s[4:5], 0x24
+; GFX11-NEXT: v_mov_b32_e32 v5, 0
+; GFX11-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-NEXT: s_clause 0x1
+; GFX11-NEXT: global_load_b64 v[0:1], v5, s[4:5]
+; GFX11-NEXT: global_load_b64 v[2:3], v5, s[6:7]
+; GFX11-NEXT: s_waitcnt vmcnt(0)
+; GFX11-NEXT: v_sub_nc_u32_e32 v4, v1, v3
+; GFX11-NEXT: v_sub_nc_i32 v1, v1, v3 clamp
+; GFX11-NEXT: v_sub_nc_u32_e32 v3, v0, v2
+; GFX11-NEXT: v_sub_nc_i32 v0, v0, v2 clamp
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(SKIP_1) | instid1(VALU_DEP_3)
+; GFX11-NEXT: v_cmp_ne_u32_e32 vcc_lo, v4, v1
+; GFX11-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc_lo
+; GFX11-NEXT: v_cmp_ne_u32_e32 vcc_lo, v3, v0
+; GFX11-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
+; GFX11-NEXT: s_clause 0x1
+; GFX11-NEXT: global_store_b64 v5, v[3:4], s[0:1]
+; GFX11-NEXT: global_store_b64 v5, v[0:1], s[2:3]
+; GFX11-NEXT: s_endpgm
%a = load <2 x i32>, ptr addrspace(1) %aptr, align 4
%b = load <2 x i32>, ptr addrspace(1) %bptr, align 4
%sadd = call { <2 x i32>, <2 x i1> } @llvm.ssub.with.overflow.v2i32(<2 x i32> %a, <2 x i32> %b) nounwind
diff --git a/llvm/test/CodeGen/AMDGPU/uaddo.ll b/llvm/test/CodeGen/AMDGPU/uaddo.ll
index d230ff5..e1574dc 100644
--- a/llvm/test/CodeGen/AMDGPU/uaddo.ll
+++ b/llvm/test/CodeGen/AMDGPU/uaddo.ll
@@ -1,7 +1,9 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn < %s | FileCheck -check-prefixes=SI %s
-; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=tonga < %s | FileCheck -check-prefixes=VI %s
-; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefixes=GFX9 %s
+; RUN: llc < %s -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=tahiti | FileCheck %s --check-prefix=SI
+; RUN: llc < %s -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=tonga | FileCheck %s --check-prefix=VI
+; RUN: llc < %s -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=gfx900 | FileCheck %s --check-prefix=GFX9
+; RUN: llc < %s -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=gfx1010 | FileCheck %s --check-prefix=GFX10
+; RUN: llc < %s -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=gfx1100 | FileCheck %s --check-prefix=GFX11
define amdgpu_kernel void @s_uaddo_i64_zext(ptr addrspace(1) %out, i64 %a, i64 %b) #0 {
; SI-LABEL: s_uaddo_i64_zext:
@@ -12,14 +14,14 @@ define amdgpu_kernel void @s_uaddo_i64_zext(ptr addrspace(1) %out, i64 %a, i64 %
; SI-NEXT: s_mov_b32 s6, -1
; SI-NEXT: s_waitcnt lgkmcnt(0)
; SI-NEXT: s_mov_b32 s4, s0
-; SI-NEXT: s_mov_b32 s5, s1
; SI-NEXT: s_add_u32 s0, s2, s8
; SI-NEXT: v_mov_b32_e32 v0, s2
-; SI-NEXT: v_mov_b32_e32 v1, s3
+; SI-NEXT: s_mov_b32 s5, s1
; SI-NEXT: s_addc_u32 s1, s3, s9
+; SI-NEXT: v_mov_b32_e32 v1, s3
; SI-NEXT: v_cmp_lt_u64_e32 vcc, s[0:1], v[0:1]
-; SI-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
; SI-NEXT: v_mov_b32_e32 v1, s1
+; SI-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
; SI-NEXT: v_add_i32_e32 v0, vcc, s0, v0
; SI-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
; SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0
@@ -61,6 +63,40 @@ define amdgpu_kernel void @s_uaddo_i64_zext(ptr addrspace(1) %out, i64 %a, i64 %
; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
; GFX9-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1]
; GFX9-NEXT: s_endpgm
+;
+; GFX10-LABEL: s_uaddo_i64_zext:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: s_clause 0x1
+; GFX10-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
+; GFX10-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34
+; GFX10-NEXT: v_mov_b32_e32 v2, 0
+; GFX10-NEXT: s_waitcnt lgkmcnt(0)
+; GFX10-NEXT: s_add_u32 s4, s2, s6
+; GFX10-NEXT: s_addc_u32 s5, s3, s7
+; GFX10-NEXT: v_cmp_lt_u64_e64 s2, s[4:5], s[2:3]
+; GFX10-NEXT: v_cndmask_b32_e64 v0, 0, 1, s2
+; GFX10-NEXT: v_add_co_u32 v0, s2, s4, v0
+; GFX10-NEXT: v_add_co_ci_u32_e64 v1, s2, s5, 0, s2
+; GFX10-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1]
+; GFX10-NEXT: s_endpgm
+;
+; GFX11-LABEL: s_uaddo_i64_zext:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_clause 0x1
+; GFX11-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
+; GFX11-NEXT: s_load_b64 s[4:5], s[4:5], 0x34
+; GFX11-NEXT: v_mov_b32_e32 v2, 0
+; GFX11-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-NEXT: s_add_u32 s4, s2, s4
+; GFX11-NEXT: s_addc_u32 s5, s3, s5
+; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-NEXT: v_cmp_lt_u64_e64 s2, s[4:5], s[2:3]
+; GFX11-NEXT: v_cndmask_b32_e64 v0, 0, 1, s2
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-NEXT: v_add_co_u32 v0, s2, s4, v0
+; GFX11-NEXT: v_add_co_ci_u32_e64 v1, null, s5, 0, s2
+; GFX11-NEXT: global_store_b64 v2, v[0:1], s[0:1]
+; GFX11-NEXT: s_endpgm
%uadd = call { i64, i1 } @llvm.uadd.with.overflow.i64(i64 %a, i64 %b)
%val = extractvalue { i64, i1 } %uadd, 0
%carry = extractvalue { i64, i1 } %uadd, 1
@@ -76,21 +112,21 @@ define amdgpu_kernel void @s_uaddo_i32(ptr addrspace(1) %out, ptr addrspace(1) %
; SI-LABEL: s_uaddo_i32:
; SI: ; %bb.0:
; SI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
-; SI-NEXT: s_load_dwordx2 s[12:13], s[4:5], 0xd
+; SI-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0xd
; SI-NEXT: s_mov_b32 s7, 0xf000
; SI-NEXT: s_mov_b32 s6, -1
-; SI-NEXT: s_mov_b32 s10, s6
-; SI-NEXT: s_mov_b32 s11, s7
; SI-NEXT: s_waitcnt lgkmcnt(0)
; SI-NEXT: s_mov_b32 s4, s0
+; SI-NEXT: v_mov_b32_e32 v0, s9
; SI-NEXT: s_mov_b32 s5, s1
-; SI-NEXT: s_mov_b32 s8, s2
-; SI-NEXT: s_mov_b32 s9, s3
-; SI-NEXT: v_mov_b32_e32 v0, s13
-; SI-NEXT: v_add_i32_e32 v0, vcc, s12, v0
+; SI-NEXT: v_add_i32_e32 v0, vcc, s8, v0
+; SI-NEXT: s_mov_b32 s0, s2
+; SI-NEXT: s_mov_b32 s1, s3
+; SI-NEXT: s_mov_b32 s2, s6
+; SI-NEXT: s_mov_b32 s3, s7
; SI-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0
-; SI-NEXT: buffer_store_byte v1, off, s[8:11], 0
+; SI-NEXT: buffer_store_byte v1, off, s[0:3], 0
; SI-NEXT: s_endpgm
;
; VI-LABEL: s_uaddo_i32:
@@ -121,6 +157,34 @@ define amdgpu_kernel void @s_uaddo_i32(ptr addrspace(1) %out, ptr addrspace(1) %
; GFX9-NEXT: global_store_dword v0, v1, s[0:1]
; GFX9-NEXT: global_store_byte v0, v2, s[2:3]
; GFX9-NEXT: s_endpgm
+;
+; GFX10-LABEL: s_uaddo_i32:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: s_clause 0x1
+; GFX10-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34
+; GFX10-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
+; GFX10-NEXT: v_mov_b32_e32 v0, 0
+; GFX10-NEXT: s_waitcnt lgkmcnt(0)
+; GFX10-NEXT: v_add_co_u32 v1, s4, s6, s7
+; GFX10-NEXT: v_cndmask_b32_e64 v2, 0, 1, s4
+; GFX10-NEXT: global_store_dword v0, v1, s[0:1]
+; GFX10-NEXT: global_store_byte v0, v2, s[2:3]
+; GFX10-NEXT: s_endpgm
+;
+; GFX11-LABEL: s_uaddo_i32:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_clause 0x1
+; GFX11-NEXT: s_load_b64 s[6:7], s[4:5], 0x34
+; GFX11-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
+; GFX11-NEXT: v_mov_b32_e32 v0, 0
+; GFX11-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-NEXT: v_add_co_u32 v1, s4, s6, s7
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-NEXT: v_cndmask_b32_e64 v2, 0, 1, s4
+; GFX11-NEXT: s_clause 0x1
+; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-NEXT: global_store_b8 v0, v2, s[2:3]
+; GFX11-NEXT: s_endpgm
%uadd = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 %a, i32 %b)
%val = extractvalue { i32, i1 } %uadd, 0
%carry = extractvalue { i32, i1 } %uadd, 1
@@ -137,17 +201,15 @@ define amdgpu_kernel void @v_uaddo_i32(ptr addrspace(1) %out, ptr addrspace(1) %
; SI-NEXT: s_mov_b32 s10, -1
; SI-NEXT: s_mov_b32 s14, s10
; SI-NEXT: s_mov_b32 s15, s11
-; SI-NEXT: s_mov_b32 s18, s10
-; SI-NEXT: s_mov_b32 s19, s11
; SI-NEXT: s_waitcnt lgkmcnt(0)
; SI-NEXT: s_mov_b32 s12, s4
; SI-NEXT: s_mov_b32 s13, s5
-; SI-NEXT: s_mov_b32 s16, s6
-; SI-NEXT: s_mov_b32 s17, s7
-; SI-NEXT: buffer_load_dword v0, off, s[12:15], 0
-; SI-NEXT: buffer_load_dword v1, off, s[16:19], 0
+; SI-NEXT: s_mov_b32 s4, s6
+; SI-NEXT: s_mov_b32 s5, s7
; SI-NEXT: s_mov_b32 s6, s10
; SI-NEXT: s_mov_b32 s7, s11
+; SI-NEXT: buffer_load_dword v0, off, s[12:15], 0
+; SI-NEXT: buffer_load_dword v1, off, s[4:7], 0
; SI-NEXT: s_mov_b32 s8, s0
; SI-NEXT: s_mov_b32 s9, s1
; SI-NEXT: s_mov_b32 s4, s2
@@ -193,6 +255,38 @@ define amdgpu_kernel void @v_uaddo_i32(ptr addrspace(1) %out, ptr addrspace(1) %
; GFX9-NEXT: global_store_dword v0, v1, s[8:9]
; GFX9-NEXT: global_store_byte v0, v2, s[10:11]
; GFX9-NEXT: s_endpgm
+;
+; GFX10-LABEL: v_uaddo_i32:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0x24
+; GFX10-NEXT: v_mov_b32_e32 v0, 0
+; GFX10-NEXT: s_waitcnt lgkmcnt(0)
+; GFX10-NEXT: s_clause 0x1
+; GFX10-NEXT: global_load_dword v1, v0, s[12:13]
+; GFX10-NEXT: global_load_dword v2, v0, s[14:15]
+; GFX10-NEXT: s_waitcnt vmcnt(0)
+; GFX10-NEXT: v_add_co_u32 v1, s0, v1, v2
+; GFX10-NEXT: v_cndmask_b32_e64 v2, 0, 1, s0
+; GFX10-NEXT: global_store_dword v0, v1, s[8:9]
+; GFX10-NEXT: global_store_byte v0, v2, s[10:11]
+; GFX10-NEXT: s_endpgm
+;
+; GFX11-LABEL: v_uaddo_i32:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_load_b256 s[0:7], s[4:5], 0x24
+; GFX11-NEXT: v_mov_b32_e32 v0, 0
+; GFX11-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-NEXT: s_clause 0x1
+; GFX11-NEXT: global_load_b32 v1, v0, s[4:5]
+; GFX11-NEXT: global_load_b32 v2, v0, s[6:7]
+; GFX11-NEXT: s_waitcnt vmcnt(0)
+; GFX11-NEXT: v_add_co_u32 v1, s4, v1, v2
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-NEXT: v_cndmask_b32_e64 v2, 0, 1, s4
+; GFX11-NEXT: s_clause 0x1
+; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-NEXT: global_store_b8 v0, v2, s[2:3]
+; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%tid.ext = sext i32 %tid to i64
%a.gep = getelementptr inbounds i32, ptr addrspace(1) %a.ptr
@@ -215,17 +309,15 @@ define amdgpu_kernel void @v_uaddo_i32_novcc(ptr addrspace(1) %out, ptr addrspac
; SI-NEXT: s_mov_b32 s10, -1
; SI-NEXT: s_mov_b32 s14, s10
; SI-NEXT: s_mov_b32 s15, s11
-; SI-NEXT: s_mov_b32 s18, s10
-; SI-NEXT: s_mov_b32 s19, s11
; SI-NEXT: s_waitcnt lgkmcnt(0)
; SI-NEXT: s_mov_b32 s12, s4
; SI-NEXT: s_mov_b32 s13, s5
-; SI-NEXT: s_mov_b32 s16, s6
-; SI-NEXT: s_mov_b32 s17, s7
-; SI-NEXT: buffer_load_dword v0, off, s[12:15], 0
-; SI-NEXT: buffer_load_dword v1, off, s[16:19], 0
+; SI-NEXT: s_mov_b32 s4, s6
+; SI-NEXT: s_mov_b32 s5, s7
; SI-NEXT: s_mov_b32 s6, s10
; SI-NEXT: s_mov_b32 s7, s11
+; SI-NEXT: buffer_load_dword v0, off, s[12:15], 0
+; SI-NEXT: buffer_load_dword v1, off, s[4:7], 0
; SI-NEXT: s_mov_b32 s8, s0
; SI-NEXT: s_mov_b32 s9, s1
; SI-NEXT: s_mov_b32 s4, s2
@@ -283,6 +375,45 @@ define amdgpu_kernel void @v_uaddo_i32_novcc(ptr addrspace(1) %out, ptr addrspac
; GFX9-NEXT: global_store_byte v0, v2, s[10:11]
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: s_endpgm
+;
+; GFX10-LABEL: v_uaddo_i32_novcc:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0x24
+; GFX10-NEXT: v_mov_b32_e32 v0, 0
+; GFX10-NEXT: s_waitcnt lgkmcnt(0)
+; GFX10-NEXT: s_clause 0x1
+; GFX10-NEXT: global_load_dword v1, v0, s[12:13]
+; GFX10-NEXT: global_load_dword v2, v0, s[14:15]
+; GFX10-NEXT: s_waitcnt vmcnt(0)
+; GFX10-NEXT: v_add_co_u32 v1, s0, v1, v2
+; GFX10-NEXT: v_cndmask_b32_e64 v2, 0, 1, s0
+; GFX10-NEXT: global_store_dword v0, v1, s[8:9]
+; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
+; GFX10-NEXT: ;;#ASMSTART
+; GFX10-NEXT: ;;#ASMEND
+; GFX10-NEXT: global_store_byte v0, v2, s[10:11]
+; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
+; GFX10-NEXT: s_endpgm
+;
+; GFX11-LABEL: v_uaddo_i32_novcc:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_load_b256 s[0:7], s[4:5], 0x24
+; GFX11-NEXT: v_mov_b32_e32 v0, 0
+; GFX11-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-NEXT: s_clause 0x1
+; GFX11-NEXT: global_load_b32 v1, v0, s[4:5]
+; GFX11-NEXT: global_load_b32 v2, v0, s[6:7]
+; GFX11-NEXT: s_waitcnt vmcnt(0)
+; GFX11-NEXT: v_add_co_u32 v1, s4, v1, v2
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-NEXT: v_cndmask_b32_e64 v2, 0, 1, s4
+; GFX11-NEXT: global_store_b32 v0, v1, s[0:1] dlc
+; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
+; GFX11-NEXT: ;;#ASMSTART
+; GFX11-NEXT: ;;#ASMEND
+; GFX11-NEXT: global_store_b8 v0, v2, s[2:3] dlc
+; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
+; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%tid.ext = sext i32 %tid to i64
%a.gep = getelementptr inbounds i32, ptr addrspace(1) %a.ptr
@@ -306,21 +437,21 @@ define amdgpu_kernel void @s_uaddo_i64(ptr addrspace(1) %out, ptr addrspace(1) %
; SI-NEXT: s_mov_b32 s10, -1
; SI-NEXT: s_waitcnt lgkmcnt(0)
; SI-NEXT: s_add_u32 s6, s4, s6
-; SI-NEXT: s_addc_u32 s7, s5, s7
-; SI-NEXT: s_mov_b32 s14, s10
-; SI-NEXT: s_mov_b32 s15, s11
-; SI-NEXT: s_mov_b32 s8, s0
-; SI-NEXT: s_mov_b32 s9, s1
-; SI-NEXT: s_mov_b32 s12, s2
-; SI-NEXT: s_mov_b32 s13, s3
; SI-NEXT: v_mov_b32_e32 v0, s4
+; SI-NEXT: s_addc_u32 s7, s5, s7
; SI-NEXT: v_mov_b32_e32 v1, s5
+; SI-NEXT: v_cmp_lt_u64_e32 vcc, s[6:7], v[0:1]
; SI-NEXT: v_mov_b32_e32 v2, s6
+; SI-NEXT: s_mov_b32 s8, s0
+; SI-NEXT: s_mov_b32 s9, s1
+; SI-NEXT: s_mov_b32 s0, s2
+; SI-NEXT: s_mov_b32 s1, s3
+; SI-NEXT: s_mov_b32 s2, s10
+; SI-NEXT: s_mov_b32 s3, s11
; SI-NEXT: v_mov_b32_e32 v3, s7
-; SI-NEXT: buffer_store_dwordx2 v[2:3], off, s[8:11], 0
-; SI-NEXT: v_cmp_lt_u64_e32 vcc, s[6:7], v[0:1]
; SI-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
-; SI-NEXT: buffer_store_byte v0, off, s[12:15], 0
+; SI-NEXT: buffer_store_dwordx2 v[2:3], off, s[8:11], 0
+; SI-NEXT: buffer_store_byte v0, off, s[0:3], 0
; SI-NEXT: s_endpgm
;
; VI-LABEL: s_uaddo_i64:
@@ -359,6 +490,37 @@ define amdgpu_kernel void @s_uaddo_i64(ptr addrspace(1) %out, ptr addrspace(1) %
; GFX9-NEXT: global_store_dwordx2 v4, v[2:3], s[8:9]
; GFX9-NEXT: global_store_byte v4, v0, s[10:11]
; GFX9-NEXT: s_endpgm
+;
+; GFX10-LABEL: s_uaddo_i64:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0x24
+; GFX10-NEXT: v_mov_b32_e32 v2, 0
+; GFX10-NEXT: s_waitcnt lgkmcnt(0)
+; GFX10-NEXT: s_add_u32 s0, s12, s14
+; GFX10-NEXT: s_addc_u32 s1, s13, s15
+; GFX10-NEXT: v_mov_b32_e32 v0, s0
+; GFX10-NEXT: v_mov_b32_e32 v1, s1
+; GFX10-NEXT: v_cmp_lt_u64_e64 s0, s[0:1], s[12:13]
+; GFX10-NEXT: v_cndmask_b32_e64 v3, 0, 1, s0
+; GFX10-NEXT: global_store_dwordx2 v2, v[0:1], s[8:9]
+; GFX10-NEXT: global_store_byte v2, v3, s[10:11]
+; GFX10-NEXT: s_endpgm
+;
+; GFX11-LABEL: s_uaddo_i64:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_load_b256 s[0:7], s[4:5], 0x24
+; GFX11-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-NEXT: s_add_u32 s6, s4, s6
+; GFX11-NEXT: s_addc_u32 s7, s5, s7
+; GFX11-NEXT: v_mov_b32_e32 v0, s6
+; GFX11-NEXT: v_cmp_lt_u64_e64 s4, s[6:7], s[4:5]
+; GFX11-NEXT: v_dual_mov_b32 v2, 0 :: v_dual_mov_b32 v1, s7
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2)
+; GFX11-NEXT: v_cndmask_b32_e64 v3, 0, 1, s4
+; GFX11-NEXT: s_clause 0x1
+; GFX11-NEXT: global_store_b64 v2, v[0:1], s[0:1]
+; GFX11-NEXT: global_store_b8 v2, v3, s[2:3]
+; GFX11-NEXT: s_endpgm
%uadd = call { i64, i1 } @llvm.uadd.with.overflow.i64(i64 %a, i64 %b)
%val = extractvalue { i64, i1 } %uadd, 0
%carry = extractvalue { i64, i1 } %uadd, 1
@@ -375,17 +537,15 @@ define amdgpu_kernel void @v_uaddo_i64(ptr addrspace(1) %out, ptr addrspace(1) %
; SI-NEXT: s_mov_b32 s10, -1
; SI-NEXT: s_mov_b32 s14, s10
; SI-NEXT: s_mov_b32 s15, s11
-; SI-NEXT: s_mov_b32 s18, s10
-; SI-NEXT: s_mov_b32 s19, s11
; SI-NEXT: s_waitcnt lgkmcnt(0)
; SI-NEXT: s_mov_b32 s12, s4
; SI-NEXT: s_mov_b32 s13, s5
-; SI-NEXT: s_mov_b32 s16, s6
-; SI-NEXT: s_mov_b32 s17, s7
-; SI-NEXT: buffer_load_dwordx2 v[0:1], off, s[12:15], 0
-; SI-NEXT: buffer_load_dwordx2 v[2:3], off, s[16:19], 0
+; SI-NEXT: s_mov_b32 s4, s6
+; SI-NEXT: s_mov_b32 s5, s7
; SI-NEXT: s_mov_b32 s6, s10
; SI-NEXT: s_mov_b32 s7, s11
+; SI-NEXT: buffer_load_dwordx2 v[0:1], off, s[12:15], 0
+; SI-NEXT: buffer_load_dwordx2 v[2:3], off, s[4:7], 0
; SI-NEXT: s_mov_b32 s8, s0
; SI-NEXT: s_mov_b32 s9, s1
; SI-NEXT: s_mov_b32 s4, s2
@@ -393,8 +553,8 @@ define amdgpu_kernel void @v_uaddo_i64(ptr addrspace(1) %out, ptr addrspace(1) %
; SI-NEXT: s_waitcnt vmcnt(0)
; SI-NEXT: v_add_i32_e32 v2, vcc, v0, v2
; SI-NEXT: v_addc_u32_e32 v3, vcc, v1, v3, vcc
-; SI-NEXT: buffer_store_dwordx2 v[2:3], off, s[8:11], 0
; SI-NEXT: v_cmp_lt_u64_e32 vcc, v[2:3], v[0:1]
+; SI-NEXT: buffer_store_dwordx2 v[2:3], off, s[8:11], 0
; SI-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
; SI-NEXT: buffer_store_byte v0, off, s[4:7], 0
; SI-NEXT: s_endpgm
@@ -437,6 +597,42 @@ define amdgpu_kernel void @v_uaddo_i64(ptr addrspace(1) %out, ptr addrspace(1) %
; GFX9-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
; GFX9-NEXT: global_store_byte v4, v0, s[10:11]
; GFX9-NEXT: s_endpgm
+;
+; GFX10-LABEL: v_uaddo_i64:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0x24
+; GFX10-NEXT: v_mov_b32_e32 v4, 0
+; GFX10-NEXT: s_waitcnt lgkmcnt(0)
+; GFX10-NEXT: s_clause 0x1
+; GFX10-NEXT: global_load_dwordx2 v[0:1], v4, s[12:13]
+; GFX10-NEXT: global_load_dwordx2 v[2:3], v4, s[14:15]
+; GFX10-NEXT: s_waitcnt vmcnt(0)
+; GFX10-NEXT: v_add_co_u32 v2, vcc_lo, v0, v2
+; GFX10-NEXT: v_add_co_ci_u32_e32 v3, vcc_lo, v1, v3, vcc_lo
+; GFX10-NEXT: v_cmp_lt_u64_e32 vcc_lo, v[2:3], v[0:1]
+; GFX10-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
+; GFX10-NEXT: global_store_dwordx2 v4, v[2:3], s[8:9]
+; GFX10-NEXT: global_store_byte v4, v0, s[10:11]
+; GFX10-NEXT: s_endpgm
+;
+; GFX11-LABEL: v_uaddo_i64:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_load_b256 s[0:7], s[4:5], 0x24
+; GFX11-NEXT: v_mov_b32_e32 v4, 0
+; GFX11-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-NEXT: s_clause 0x1
+; GFX11-NEXT: global_load_b64 v[0:1], v4, s[4:5]
+; GFX11-NEXT: global_load_b64 v[2:3], v4, s[6:7]
+; GFX11-NEXT: s_waitcnt vmcnt(0)
+; GFX11-NEXT: v_add_co_u32 v2, vcc_lo, v0, v2
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-NEXT: v_add_co_ci_u32_e64 v3, null, v1, v3, vcc_lo
+; GFX11-NEXT: v_cmp_lt_u64_e32 vcc_lo, v[2:3], v[0:1]
+; GFX11-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
+; GFX11-NEXT: s_clause 0x1
+; GFX11-NEXT: global_store_b64 v4, v[2:3], s[0:1]
+; GFX11-NEXT: global_store_b8 v4, v0, s[2:3]
+; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%tid.ext = sext i32 %tid to i64
%a.gep = getelementptr inbounds i64, ptr addrspace(1) %a.ptr
@@ -459,17 +655,15 @@ define amdgpu_kernel void @v_uaddo_i16(ptr addrspace(1) %out, ptr addrspace(1) %
; SI-NEXT: s_mov_b32 s10, -1
; SI-NEXT: s_mov_b32 s14, s10
; SI-NEXT: s_mov_b32 s15, s11
-; SI-NEXT: s_mov_b32 s18, s10
-; SI-NEXT: s_mov_b32 s19, s11
; SI-NEXT: s_waitcnt lgkmcnt(0)
; SI-NEXT: s_mov_b32 s12, s4
; SI-NEXT: s_mov_b32 s13, s5
-; SI-NEXT: s_mov_b32 s16, s6
-; SI-NEXT: s_mov_b32 s17, s7
-; SI-NEXT: buffer_load_ushort v0, off, s[12:15], 0
-; SI-NEXT: buffer_load_ushort v1, off, s[16:19], 0
+; SI-NEXT: s_mov_b32 s4, s6
+; SI-NEXT: s_mov_b32 s5, s7
; SI-NEXT: s_mov_b32 s6, s10
; SI-NEXT: s_mov_b32 s7, s11
+; SI-NEXT: buffer_load_ushort v0, off, s[12:15], 0
+; SI-NEXT: buffer_load_ushort v1, off, s[4:7], 0
; SI-NEXT: s_mov_b32 s8, s0
; SI-NEXT: s_mov_b32 s9, s1
; SI-NEXT: s_mov_b32 s4, s2
@@ -477,8 +671,8 @@ define amdgpu_kernel void @v_uaddo_i16(ptr addrspace(1) %out, ptr addrspace(1) %
; SI-NEXT: s_waitcnt vmcnt(0)
; SI-NEXT: v_add_i32_e32 v0, vcc, v0, v1
; SI-NEXT: v_and_b32_e32 v1, 0xffff, v0
-; SI-NEXT: buffer_store_short v0, off, s[8:11], 0
; SI-NEXT: v_cmp_ne_u32_e32 vcc, v1, v0
+; SI-NEXT: buffer_store_short v0, off, s[8:11], 0
; SI-NEXT: s_waitcnt expcnt(0)
; SI-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
; SI-NEXT: buffer_store_byte v0, off, s[4:7], 0
@@ -522,6 +716,42 @@ define amdgpu_kernel void @v_uaddo_i16(ptr addrspace(1) %out, ptr addrspace(1) %
; GFX9-NEXT: global_store_short v0, v2, s[8:9]
; GFX9-NEXT: global_store_byte v0, v1, s[10:11]
; GFX9-NEXT: s_endpgm
+;
+; GFX10-LABEL: v_uaddo_i16:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0x24
+; GFX10-NEXT: v_mov_b32_e32 v0, 0
+; GFX10-NEXT: s_waitcnt lgkmcnt(0)
+; GFX10-NEXT: s_clause 0x1
+; GFX10-NEXT: global_load_ushort v1, v0, s[12:13]
+; GFX10-NEXT: global_load_ushort v2, v0, s[14:15]
+; GFX10-NEXT: s_waitcnt vmcnt(0)
+; GFX10-NEXT: v_add_nc_u32_e32 v2, v1, v2
+; GFX10-NEXT: v_cmp_lt_u32_sdwa s0, v2, v1 src0_sel:WORD_0 src1_sel:WORD_0
+; GFX10-NEXT: v_cndmask_b32_e64 v1, 0, 1, s0
+; GFX10-NEXT: global_store_short v0, v2, s[8:9]
+; GFX10-NEXT: global_store_byte v0, v1, s[10:11]
+; GFX10-NEXT: s_endpgm
+;
+; GFX11-LABEL: v_uaddo_i16:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_load_b256 s[0:7], s[4:5], 0x24
+; GFX11-NEXT: v_mov_b32_e32 v0, 0
+; GFX11-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-NEXT: s_clause 0x1
+; GFX11-NEXT: global_load_d16_b16 v1, v0, s[4:5]
+; GFX11-NEXT: global_load_u16 v2, v0, s[6:7]
+; GFX11-NEXT: s_waitcnt vmcnt(0)
+; GFX11-NEXT: v_add_nc_u32_e32 v2, v1, v2
+; GFX11-NEXT: v_and_b32_e32 v1, 0xffff, v1
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-NEXT: v_and_b32_e32 v3, 0xffff, v2
+; GFX11-NEXT: v_cmp_lt_u32_e32 vcc_lo, v3, v1
+; GFX11-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc_lo
+; GFX11-NEXT: s_clause 0x1
+; GFX11-NEXT: global_store_b16 v0, v2, s[0:1]
+; GFX11-NEXT: global_store_b8 v0, v1, s[2:3]
+; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%tid.ext = sext i32 %tid to i64
%a.gep = getelementptr inbounds i16, ptr addrspace(1) %a.ptr
@@ -544,17 +774,15 @@ define amdgpu_kernel void @v_uaddo_v2i32(ptr addrspace(1) %out, ptr addrspace(1)
; SI-NEXT: s_mov_b32 s10, -1
; SI-NEXT: s_mov_b32 s14, s10
; SI-NEXT: s_mov_b32 s15, s11
-; SI-NEXT: s_mov_b32 s18, s10
-; SI-NEXT: s_mov_b32 s19, s11
; SI-NEXT: s_waitcnt lgkmcnt(0)
; SI-NEXT: s_mov_b32 s12, s4
; SI-NEXT: s_mov_b32 s13, s5
-; SI-NEXT: s_mov_b32 s16, s6
-; SI-NEXT: s_mov_b32 s17, s7
-; SI-NEXT: buffer_load_dwordx2 v[0:1], off, s[12:15], 0
-; SI-NEXT: buffer_load_dwordx2 v[2:3], off, s[16:19], 0
+; SI-NEXT: s_mov_b32 s4, s6
+; SI-NEXT: s_mov_b32 s5, s7
; SI-NEXT: s_mov_b32 s6, s10
; SI-NEXT: s_mov_b32 s7, s11
+; SI-NEXT: buffer_load_dwordx2 v[0:1], off, s[12:15], 0
+; SI-NEXT: buffer_load_dwordx2 v[2:3], off, s[4:7], 0
; SI-NEXT: s_mov_b32 s8, s0
; SI-NEXT: s_mov_b32 s9, s1
; SI-NEXT: s_mov_b32 s4, s2
@@ -606,6 +834,42 @@ define amdgpu_kernel void @v_uaddo_v2i32(ptr addrspace(1) %out, ptr addrspace(1)
; GFX9-NEXT: global_store_dwordx2 v4, v[0:1], s[8:9]
; GFX9-NEXT: global_store_dwordx2 v4, v[2:3], s[10:11]
; GFX9-NEXT: s_endpgm
+;
+; GFX10-LABEL: v_uaddo_v2i32:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0x24
+; GFX10-NEXT: v_mov_b32_e32 v4, 0
+; GFX10-NEXT: s_waitcnt lgkmcnt(0)
+; GFX10-NEXT: s_clause 0x1
+; GFX10-NEXT: global_load_dwordx2 v[0:1], v4, s[12:13]
+; GFX10-NEXT: global_load_dwordx2 v[2:3], v4, s[14:15]
+; GFX10-NEXT: s_waitcnt vmcnt(0)
+; GFX10-NEXT: v_add_co_u32 v1, s0, v1, v3
+; GFX10-NEXT: v_cndmask_b32_e64 v3, 0, 1, s0
+; GFX10-NEXT: v_add_co_u32 v0, s0, v0, v2
+; GFX10-NEXT: v_cndmask_b32_e64 v2, 0, 1, s0
+; GFX10-NEXT: global_store_dwordx2 v4, v[0:1], s[8:9]
+; GFX10-NEXT: global_store_dwordx2 v4, v[2:3], s[10:11]
+; GFX10-NEXT: s_endpgm
+;
+; GFX11-LABEL: v_uaddo_v2i32:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_load_b256 s[0:7], s[4:5], 0x24
+; GFX11-NEXT: v_mov_b32_e32 v4, 0
+; GFX11-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-NEXT: s_clause 0x1
+; GFX11-NEXT: global_load_b64 v[0:1], v4, s[4:5]
+; GFX11-NEXT: global_load_b64 v[2:3], v4, s[6:7]
+; GFX11-NEXT: s_waitcnt vmcnt(0)
+; GFX11-NEXT: v_add_co_u32 v1, s4, v1, v3
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
+; GFX11-NEXT: v_cndmask_b32_e64 v3, 0, 1, s4
+; GFX11-NEXT: v_add_co_u32 v0, s4, v0, v2
+; GFX11-NEXT: v_cndmask_b32_e64 v2, 0, 1, s4
+; GFX11-NEXT: s_clause 0x1
+; GFX11-NEXT: global_store_b64 v4, v[0:1], s[0:1]
+; GFX11-NEXT: global_store_b64 v4, v[2:3], s[2:3]
+; GFX11-NEXT: s_endpgm
%a = load <2 x i32>, ptr addrspace(1) %aptr, align 4
%b = load <2 x i32>, ptr addrspace(1) %bptr, align 4
%sadd = call { <2 x i32>, <2 x i1> } @llvm.uadd.with.overflow.v2i32(<2 x i32> %a, <2 x i32> %b) nounwind
@@ -623,26 +887,27 @@ define amdgpu_kernel void @s_uaddo_clamp_bit(ptr addrspace(1) %out, ptr addrspac
; SI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0xd
; SI-NEXT: s_waitcnt lgkmcnt(0)
; SI-NEXT: v_mov_b32_e32 v0, s1
-; SI-NEXT: v_add_i32_e32 v0, vcc, s0, v0
; SI-NEXT: s_cmp_eq_u32 s0, s1
+; SI-NEXT: v_add_i32_e32 v0, vcc, s0, v0
; SI-NEXT: s_mov_b64 s[0:1], 0
; SI-NEXT: s_cbranch_scc1 .LBB8_2
; SI-NEXT: ; %bb.1: ; %if
; SI-NEXT: s_xor_b64 s[0:1], vcc, -1
; SI-NEXT: .LBB8_2: ; %exit
; SI-NEXT: s_load_dwordx4 s[4:7], s[4:5], 0x9
-; SI-NEXT: s_mov_b32 s3, 0xf000
-; SI-NEXT: s_mov_b32 s2, -1
-; SI-NEXT: v_cndmask_b32_e64 v1, 0, 1, s[0:1]
-; SI-NEXT: s_mov_b32 s10, s2
-; SI-NEXT: s_mov_b32 s11, s3
+; SI-NEXT: s_mov_b32 s11, 0xf000
+; SI-NEXT: s_mov_b32 s10, -1
; SI-NEXT: s_waitcnt lgkmcnt(0)
-; SI-NEXT: s_mov_b32 s0, s4
-; SI-NEXT: s_mov_b32 s1, s5
-; SI-NEXT: s_mov_b32 s8, s6
-; SI-NEXT: s_mov_b32 s9, s7
-; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0
-; SI-NEXT: buffer_store_byte v1, off, s[8:11], 0
+; SI-NEXT: s_mov_b32 s8, s4
+; SI-NEXT: s_mov_b32 s9, s5
+; SI-NEXT: s_mov_b32 s4, s6
+; SI-NEXT: s_mov_b32 s5, s7
+; SI-NEXT: s_mov_b32 s6, s10
+; SI-NEXT: s_mov_b32 s7, s11
+; SI-NEXT: buffer_store_dword v0, off, s[8:11], 0
+; SI-NEXT: s_waitcnt expcnt(0)
+; SI-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[0:1]
+; SI-NEXT: buffer_store_byte v0, off, s[4:7], 0
; SI-NEXT: s_endpgm
;
; VI-LABEL: s_uaddo_clamp_bit:
@@ -687,6 +952,45 @@ define amdgpu_kernel void @s_uaddo_clamp_bit(ptr addrspace(1) %out, ptr addrspac
; GFX9-NEXT: global_store_dword v1, v0, s[8:9]
; GFX9-NEXT: global_store_byte v1, v2, s[10:11]
; GFX9-NEXT: s_endpgm
+;
+; GFX10-LABEL: s_uaddo_clamp_bit:
+; GFX10: ; %bb.0: ; %entry
+; GFX10-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x34
+; GFX10-NEXT: s_mov_b32 s0, 0
+; GFX10-NEXT: s_waitcnt lgkmcnt(0)
+; GFX10-NEXT: v_add_co_u32 v0, s1, s2, s3
+; GFX10-NEXT: s_cmp_eq_u32 s2, s3
+; GFX10-NEXT: s_cbranch_scc1 .LBB8_2
+; GFX10-NEXT: ; %bb.1: ; %if
+; GFX10-NEXT: s_xor_b32 s0, s1, -1
+; GFX10-NEXT: .LBB8_2: ; %exit
+; GFX10-NEXT: s_load_dwordx4 s[8:11], s[4:5], 0x24
+; GFX10-NEXT: v_mov_b32_e32 v1, 0
+; GFX10-NEXT: v_cndmask_b32_e64 v2, 0, 1, s0
+; GFX10-NEXT: s_waitcnt lgkmcnt(0)
+; GFX10-NEXT: global_store_dword v1, v0, s[8:9]
+; GFX10-NEXT: global_store_byte v1, v2, s[10:11]
+; GFX10-NEXT: s_endpgm
+;
+; GFX11-LABEL: s_uaddo_clamp_bit:
+; GFX11: ; %bb.0: ; %entry
+; GFX11-NEXT: s_load_b64 s[2:3], s[4:5], 0x34
+; GFX11-NEXT: s_mov_b32 s0, 0
+; GFX11-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-NEXT: v_add_co_u32 v0, s1, s2, s3
+; GFX11-NEXT: s_cmp_eq_u32 s2, s3
+; GFX11-NEXT: s_cbranch_scc1 .LBB8_2
+; GFX11-NEXT: ; %bb.1: ; %if
+; GFX11-NEXT: s_xor_b32 s0, s1, -1
+; GFX11-NEXT: .LBB8_2: ; %exit
+; GFX11-NEXT: s_load_b128 s[4:7], s[4:5], 0x24
+; GFX11-NEXT: v_mov_b32_e32 v1, 0
+; GFX11-NEXT: v_cndmask_b32_e64 v2, 0, 1, s0
+; GFX11-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-NEXT: s_clause 0x1
+; GFX11-NEXT: global_store_b32 v1, v0, s[4:5]
+; GFX11-NEXT: global_store_b8 v1, v2, s[6:7]
+; GFX11-NEXT: s_endpgm
entry:
%uadd = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 %a, i32 %b)
%val = extractvalue { i32, i1 } %uadd, 0
@@ -711,19 +1015,19 @@ define amdgpu_kernel void @v_uaddo_clamp_bit(ptr addrspace(1) %out, ptr addrspac
; SI-NEXT: s_load_dwordx8 s[4:11], s[4:5], 0x9
; SI-NEXT: s_mov_b32 s3, 0xf000
; SI-NEXT: s_mov_b32 s2, -1
-; SI-NEXT: s_mov_b32 s14, s2
-; SI-NEXT: s_mov_b32 s15, s3
; SI-NEXT: s_waitcnt lgkmcnt(0)
; SI-NEXT: s_mov_b32 s0, s8
; SI-NEXT: s_mov_b32 s1, s9
-; SI-NEXT: s_mov_b32 s12, s10
-; SI-NEXT: s_mov_b32 s13, s11
+; SI-NEXT: s_mov_b32 s8, s10
+; SI-NEXT: s_mov_b32 s9, s11
+; SI-NEXT: s_mov_b32 s10, s2
+; SI-NEXT: s_mov_b32 s11, s3
; SI-NEXT: buffer_load_dword v1, off, s[0:3], 0
-; SI-NEXT: buffer_load_dword v2, off, s[12:15], 0
+; SI-NEXT: buffer_load_dword v2, off, s[8:11], 0
+; SI-NEXT: s_mov_b64 s[8:9], 0
; SI-NEXT: s_waitcnt vmcnt(0)
-; SI-NEXT: v_add_i32_e64 v0, s[0:1], v1, v2
; SI-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2
-; SI-NEXT: s_mov_b64 s[8:9], 0
+; SI-NEXT: v_add_i32_e64 v0, s[0:1], v1, v2
; SI-NEXT: s_cbranch_vccnz .LBB9_2
; SI-NEXT: ; %bb.1: ; %if
; SI-NEXT: s_xor_b64 s[8:9], s[0:1], -1
@@ -786,6 +1090,50 @@ define amdgpu_kernel void @v_uaddo_clamp_bit(ptr addrspace(1) %out, ptr addrspac
; GFX9-NEXT: v_cndmask_b32_e64 v1, 0, 1, s[2:3]
; GFX9-NEXT: global_store_byte v0, v1, s[10:11]
; GFX9-NEXT: s_endpgm
+;
+; GFX10-LABEL: v_uaddo_clamp_bit:
+; GFX10: ; %bb.0: ; %entry
+; GFX10-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0x24
+; GFX10-NEXT: v_mov_b32_e32 v0, 0
+; GFX10-NEXT: s_mov_b32 s0, 0
+; GFX10-NEXT: s_waitcnt lgkmcnt(0)
+; GFX10-NEXT: s_clause 0x1
+; GFX10-NEXT: global_load_dword v1, v0, s[12:13]
+; GFX10-NEXT: global_load_dword v2, v0, s[14:15]
+; GFX10-NEXT: s_waitcnt vmcnt(0)
+; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
+; GFX10-NEXT: v_add_co_u32 v1, s1, v1, v2
+; GFX10-NEXT: s_cbranch_vccnz .LBB9_2
+; GFX10-NEXT: ; %bb.1: ; %if
+; GFX10-NEXT: s_xor_b32 s0, s1, -1
+; GFX10-NEXT: .LBB9_2: ; %exit
+; GFX10-NEXT: v_cndmask_b32_e64 v2, 0, 1, s0
+; GFX10-NEXT: global_store_dword v0, v1, s[8:9]
+; GFX10-NEXT: global_store_byte v0, v2, s[10:11]
+; GFX10-NEXT: s_endpgm
+;
+; GFX11-LABEL: v_uaddo_clamp_bit:
+; GFX11: ; %bb.0: ; %entry
+; GFX11-NEXT: s_load_b256 s[0:7], s[4:5], 0x24
+; GFX11-NEXT: v_mov_b32_e32 v0, 0
+; GFX11-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-NEXT: s_clause 0x1
+; GFX11-NEXT: global_load_b32 v1, v0, s[4:5]
+; GFX11-NEXT: global_load_b32 v2, v0, s[6:7]
+; GFX11-NEXT: s_mov_b32 s4, 0
+; GFX11-NEXT: s_waitcnt vmcnt(0)
+; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
+; GFX11-NEXT: v_add_co_u32 v1, s5, v1, v2
+; GFX11-NEXT: s_cbranch_vccnz .LBB9_2
+; GFX11-NEXT: ; %bb.1: ; %if
+; GFX11-NEXT: s_xor_b32 s4, s5, -1
+; GFX11-NEXT: .LBB9_2: ; %exit
+; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
+; GFX11-NEXT: v_cndmask_b32_e64 v2, 0, 1, s4
+; GFX11-NEXT: s_clause 0x1
+; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-NEXT: global_store_b8 v0, v2, s[2:3]
+; GFX11-NEXT: s_endpgm
entry:
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%tid.ext = sext i32 %tid to i64
@@ -813,23 +1161,23 @@ exit:
define amdgpu_cs void @sv_uaddo_i128(ptr addrspace(1) %out, i128 inreg %a, i128 %b) {
; SI-LABEL: sv_uaddo_i128:
; SI: ; %bb.0:
-; SI-NEXT: s_mov_b32 s7, 0xf000
-; SI-NEXT: s_mov_b32 s6, 0
; SI-NEXT: v_add_i32_e32 v2, vcc, s0, v2
; SI-NEXT: v_mov_b32_e32 v6, s1
-; SI-NEXT: v_mov_b32_e32 v7, s2
-; SI-NEXT: v_mov_b32_e32 v8, s3
-; SI-NEXT: s_mov_b32 s4, s6
-; SI-NEXT: s_mov_b32 s5, s6
; SI-NEXT: v_addc_u32_e32 v3, vcc, v6, v3, vcc
-; SI-NEXT: v_addc_u32_e32 v4, vcc, v7, v4, vcc
-; SI-NEXT: v_cmp_gt_u64_e64 s[0:1], s[0:1], v[2:3]
-; SI-NEXT: v_cndmask_b32_e64 v2, 0, 1, s[0:1]
-; SI-NEXT: v_addc_u32_e32 v5, vcc, v8, v5, vcc
+; SI-NEXT: v_mov_b32_e32 v6, s2
+; SI-NEXT: v_addc_u32_e32 v4, vcc, v6, v4, vcc
+; SI-NEXT: v_mov_b32_e32 v6, s3
+; SI-NEXT: v_addc_u32_e32 v5, vcc, v6, v5, vcc
+; SI-NEXT: v_cmp_gt_u64_e32 vcc, s[0:1], v[2:3]
+; SI-NEXT: s_mov_b32 s6, 0
+; SI-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc
; SI-NEXT: v_cmp_gt_u64_e32 vcc, s[2:3], v[4:5]
+; SI-NEXT: s_mov_b32 s7, 0xf000
; SI-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc
; SI-NEXT: v_cmp_eq_u64_e32 vcc, s[2:3], v[4:5]
+; SI-NEXT: s_mov_b32 s4, s6
; SI-NEXT: v_cndmask_b32_e32 v2, v3, v2, vcc
+; SI-NEXT: s_mov_b32 s5, s6
; SI-NEXT: v_and_b32_e32 v2, 1, v2
; SI-NEXT: buffer_store_dword v2, v[0:1], s[4:7], 0 addr64
; SI-NEXT: s_endpgm
@@ -871,6 +1219,41 @@ define amdgpu_cs void @sv_uaddo_i128(ptr addrspace(1) %out, i128 inreg %a, i128
; GFX9-NEXT: v_and_b32_e32 v2, 1, v2
; GFX9-NEXT: global_store_dword v[0:1], v2, off
; GFX9-NEXT: s_endpgm
+;
+; GFX10-LABEL: sv_uaddo_i128:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: v_add_co_u32 v2, vcc_lo, s0, v2
+; GFX10-NEXT: v_add_co_ci_u32_e32 v3, vcc_lo, s1, v3, vcc_lo
+; GFX10-NEXT: v_add_co_ci_u32_e32 v4, vcc_lo, s2, v4, vcc_lo
+; GFX10-NEXT: v_add_co_ci_u32_e32 v5, vcc_lo, s3, v5, vcc_lo
+; GFX10-NEXT: v_cmp_gt_u64_e32 vcc_lo, s[0:1], v[2:3]
+; GFX10-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc_lo
+; GFX10-NEXT: v_cmp_gt_u64_e32 vcc_lo, s[2:3], v[4:5]
+; GFX10-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc_lo
+; GFX10-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[2:3], v[4:5]
+; GFX10-NEXT: v_cndmask_b32_e32 v2, v3, v2, vcc_lo
+; GFX10-NEXT: v_and_b32_e32 v2, 1, v2
+; GFX10-NEXT: global_store_dword v[0:1], v2, off
+; GFX10-NEXT: s_endpgm
+;
+; GFX11-LABEL: sv_uaddo_i128:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: v_add_co_u32 v2, vcc_lo, s0, v2
+; GFX11-NEXT: v_add_co_ci_u32_e32 v3, vcc_lo, s1, v3, vcc_lo
+; GFX11-NEXT: v_add_co_ci_u32_e32 v4, vcc_lo, s2, v4, vcc_lo
+; GFX11-NEXT: v_add_co_ci_u32_e32 v5, vcc_lo, s3, v5, vcc_lo
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_2)
+; GFX11-NEXT: v_cmp_gt_u64_e32 vcc_lo, s[2:3], v[4:5]
+; GFX11-NEXT: v_cndmask_b32_e64 v6, 0, 1, vcc_lo
+; GFX11-NEXT: v_cmp_gt_u64_e32 vcc_lo, s[0:1], v[2:3]
+; GFX11-NEXT: v_mov_b16_e32 v2.l, v6.l
+; GFX11-NEXT: v_cndmask_b32_e64 v3, 0, 1, vcc_lo
+; GFX11-NEXT: v_cmp_eq_u64_e32 vcc_lo, s[2:3], v[4:5]
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-NEXT: v_cndmask_b16 v2.l, v2.l, v3.l, vcc_lo
+; GFX11-NEXT: v_and_b32_e32 v2, 1, v2
+; GFX11-NEXT: global_store_b32 v[0:1], v2, off
+; GFX11-NEXT: s_endpgm
%uadd = call { i128, i1 } @llvm.uadd.with.overflow.i128(i128 %a, i128 %b)
%carry = extractvalue { i128, i1 } %uadd, 1
%carry.ext = zext i1 %carry to i32
diff --git a/llvm/test/CodeGen/AMDGPU/usubo.ll b/llvm/test/CodeGen/AMDGPU/usubo.ll
index 7d7f1b4..0289dab 100644
--- a/llvm/test/CodeGen/AMDGPU/usubo.ll
+++ b/llvm/test/CodeGen/AMDGPU/usubo.ll
@@ -1,8 +1,9 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn < %s | FileCheck -check-prefixes=SI %s
-; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=tonga < %s | FileCheck -check-prefixes=VI %s
-; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefixes=GFX9 %s
-
+; RUN: llc < %s -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=tahiti | FileCheck %s --check-prefix=SI
+; RUN: llc < %s -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=tonga | FileCheck %s --check-prefix=VI
+; RUN: llc < %s -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=gfx900 | FileCheck %s --check-prefix=GFX9
+; RUN: llc < %s -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=gfx1010 | FileCheck %s --check-prefix=GFX10
+; RUN: llc < %s -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=gfx1100 | FileCheck %s --check-prefix=GFX11
define amdgpu_kernel void @s_usubo_i64_zext(ptr addrspace(1) %out, i64 %a, i64 %b) #0 {
; SI-LABEL: s_usubo_i64_zext:
@@ -13,14 +14,14 @@ define amdgpu_kernel void @s_usubo_i64_zext(ptr addrspace(1) %out, i64 %a, i64 %
; SI-NEXT: s_mov_b32 s6, -1
; SI-NEXT: s_waitcnt lgkmcnt(0)
; SI-NEXT: s_mov_b32 s4, s0
-; SI-NEXT: s_mov_b32 s5, s1
; SI-NEXT: s_sub_u32 s0, s2, s8
; SI-NEXT: v_mov_b32_e32 v0, s2
-; SI-NEXT: v_mov_b32_e32 v1, s3
+; SI-NEXT: s_mov_b32 s5, s1
; SI-NEXT: s_subb_u32 s1, s3, s9
+; SI-NEXT: v_mov_b32_e32 v1, s3
; SI-NEXT: v_cmp_gt_u64_e32 vcc, s[0:1], v[0:1]
-; SI-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
; SI-NEXT: v_mov_b32_e32 v1, s1
+; SI-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
; SI-NEXT: v_add_i32_e32 v0, vcc, s0, v0
; SI-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
; SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0
@@ -62,6 +63,40 @@ define amdgpu_kernel void @s_usubo_i64_zext(ptr addrspace(1) %out, i64 %a, i64 %
; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
; GFX9-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1]
; GFX9-NEXT: s_endpgm
+;
+; GFX10-LABEL: s_usubo_i64_zext:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: s_clause 0x1
+; GFX10-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
+; GFX10-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34
+; GFX10-NEXT: v_mov_b32_e32 v2, 0
+; GFX10-NEXT: s_waitcnt lgkmcnt(0)
+; GFX10-NEXT: s_sub_u32 s4, s2, s6
+; GFX10-NEXT: s_subb_u32 s5, s3, s7
+; GFX10-NEXT: v_cmp_gt_u64_e64 s2, s[4:5], s[2:3]
+; GFX10-NEXT: v_cndmask_b32_e64 v0, 0, 1, s2
+; GFX10-NEXT: v_add_co_u32 v0, s2, s4, v0
+; GFX10-NEXT: v_add_co_ci_u32_e64 v1, s2, s5, 0, s2
+; GFX10-NEXT: global_store_dwordx2 v2, v[0:1], s[0:1]
+; GFX10-NEXT: s_endpgm
+;
+; GFX11-LABEL: s_usubo_i64_zext:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_clause 0x1
+; GFX11-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
+; GFX11-NEXT: s_load_b64 s[4:5], s[4:5], 0x34
+; GFX11-NEXT: v_mov_b32_e32 v2, 0
+; GFX11-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-NEXT: s_sub_u32 s4, s2, s4
+; GFX11-NEXT: s_subb_u32 s5, s3, s5
+; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-NEXT: v_cmp_gt_u64_e64 s2, s[4:5], s[2:3]
+; GFX11-NEXT: v_cndmask_b32_e64 v0, 0, 1, s2
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-NEXT: v_add_co_u32 v0, s2, s4, v0
+; GFX11-NEXT: v_add_co_ci_u32_e64 v1, null, s5, 0, s2
+; GFX11-NEXT: global_store_b64 v2, v[0:1], s[0:1]
+; GFX11-NEXT: s_endpgm
%usub = call { i64, i1 } @llvm.usub.with.overflow.i64(i64 %a, i64 %b) #0
%val = extractvalue { i64, i1 } %usub, 0
%carry = extractvalue { i64, i1 } %usub, 1
@@ -76,21 +111,21 @@ define amdgpu_kernel void @s_usubo_i32(ptr addrspace(1) %out, ptr addrspace(1) %
; SI-LABEL: s_usubo_i32:
; SI: ; %bb.0:
; SI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9
-; SI-NEXT: s_load_dwordx2 s[12:13], s[4:5], 0xd
+; SI-NEXT: s_load_dwordx2 s[8:9], s[4:5], 0xd
; SI-NEXT: s_mov_b32 s7, 0xf000
; SI-NEXT: s_mov_b32 s6, -1
-; SI-NEXT: s_mov_b32 s10, s6
-; SI-NEXT: s_mov_b32 s11, s7
; SI-NEXT: s_waitcnt lgkmcnt(0)
; SI-NEXT: s_mov_b32 s4, s0
+; SI-NEXT: v_mov_b32_e32 v0, s9
; SI-NEXT: s_mov_b32 s5, s1
-; SI-NEXT: s_mov_b32 s8, s2
-; SI-NEXT: s_mov_b32 s9, s3
-; SI-NEXT: v_mov_b32_e32 v0, s13
-; SI-NEXT: v_sub_i32_e32 v0, vcc, s12, v0
+; SI-NEXT: v_sub_i32_e32 v0, vcc, s8, v0
+; SI-NEXT: s_mov_b32 s0, s2
+; SI-NEXT: s_mov_b32 s1, s3
+; SI-NEXT: s_mov_b32 s2, s6
+; SI-NEXT: s_mov_b32 s3, s7
; SI-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc
; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0
-; SI-NEXT: buffer_store_byte v1, off, s[8:11], 0
+; SI-NEXT: buffer_store_byte v1, off, s[0:3], 0
; SI-NEXT: s_endpgm
;
; VI-LABEL: s_usubo_i32:
@@ -121,6 +156,34 @@ define amdgpu_kernel void @s_usubo_i32(ptr addrspace(1) %out, ptr addrspace(1) %
; GFX9-NEXT: global_store_dword v0, v1, s[0:1]
; GFX9-NEXT: global_store_byte v0, v2, s[2:3]
; GFX9-NEXT: s_endpgm
+;
+; GFX10-LABEL: s_usubo_i32:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: s_clause 0x1
+; GFX10-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x34
+; GFX10-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
+; GFX10-NEXT: v_mov_b32_e32 v0, 0
+; GFX10-NEXT: s_waitcnt lgkmcnt(0)
+; GFX10-NEXT: v_sub_co_u32 v1, s4, s6, s7
+; GFX10-NEXT: v_cndmask_b32_e64 v2, 0, 1, s4
+; GFX10-NEXT: global_store_dword v0, v1, s[0:1]
+; GFX10-NEXT: global_store_byte v0, v2, s[2:3]
+; GFX10-NEXT: s_endpgm
+;
+; GFX11-LABEL: s_usubo_i32:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_clause 0x1
+; GFX11-NEXT: s_load_b64 s[6:7], s[4:5], 0x34
+; GFX11-NEXT: s_load_b128 s[0:3], s[4:5], 0x24
+; GFX11-NEXT: v_mov_b32_e32 v0, 0
+; GFX11-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-NEXT: v_sub_co_u32 v1, s4, s6, s7
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-NEXT: v_cndmask_b32_e64 v2, 0, 1, s4
+; GFX11-NEXT: s_clause 0x1
+; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-NEXT: global_store_b8 v0, v2, s[2:3]
+; GFX11-NEXT: s_endpgm
%usub = call { i32, i1 } @llvm.usub.with.overflow.i32(i32 %a, i32 %b)
%val = extractvalue { i32, i1 } %usub, 0
%carry = extractvalue { i32, i1 } %usub, 1
@@ -137,17 +200,15 @@ define amdgpu_kernel void @v_usubo_i32(ptr addrspace(1) %out, ptr addrspace(1) %
; SI-NEXT: s_mov_b32 s10, -1
; SI-NEXT: s_mov_b32 s14, s10
; SI-NEXT: s_mov_b32 s15, s11
-; SI-NEXT: s_mov_b32 s18, s10
-; SI-NEXT: s_mov_b32 s19, s11
; SI-NEXT: s_waitcnt lgkmcnt(0)
; SI-NEXT: s_mov_b32 s12, s4
; SI-NEXT: s_mov_b32 s13, s5
-; SI-NEXT: s_mov_b32 s16, s6
-; SI-NEXT: s_mov_b32 s17, s7
-; SI-NEXT: buffer_load_dword v0, off, s[12:15], 0
-; SI-NEXT: buffer_load_dword v1, off, s[16:19], 0
+; SI-NEXT: s_mov_b32 s4, s6
+; SI-NEXT: s_mov_b32 s5, s7
; SI-NEXT: s_mov_b32 s6, s10
; SI-NEXT: s_mov_b32 s7, s11
+; SI-NEXT: buffer_load_dword v0, off, s[12:15], 0
+; SI-NEXT: buffer_load_dword v1, off, s[4:7], 0
; SI-NEXT: s_mov_b32 s8, s0
; SI-NEXT: s_mov_b32 s9, s1
; SI-NEXT: s_mov_b32 s4, s2
@@ -193,6 +254,38 @@ define amdgpu_kernel void @v_usubo_i32(ptr addrspace(1) %out, ptr addrspace(1) %
; GFX9-NEXT: global_store_dword v0, v1, s[8:9]
; GFX9-NEXT: global_store_byte v0, v2, s[10:11]
; GFX9-NEXT: s_endpgm
+;
+; GFX10-LABEL: v_usubo_i32:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0x24
+; GFX10-NEXT: v_mov_b32_e32 v0, 0
+; GFX10-NEXT: s_waitcnt lgkmcnt(0)
+; GFX10-NEXT: s_clause 0x1
+; GFX10-NEXT: global_load_dword v1, v0, s[12:13]
+; GFX10-NEXT: global_load_dword v2, v0, s[14:15]
+; GFX10-NEXT: s_waitcnt vmcnt(0)
+; GFX10-NEXT: v_sub_co_u32 v1, s0, v1, v2
+; GFX10-NEXT: v_cndmask_b32_e64 v2, 0, 1, s0
+; GFX10-NEXT: global_store_dword v0, v1, s[8:9]
+; GFX10-NEXT: global_store_byte v0, v2, s[10:11]
+; GFX10-NEXT: s_endpgm
+;
+; GFX11-LABEL: v_usubo_i32:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_load_b256 s[0:7], s[4:5], 0x24
+; GFX11-NEXT: v_mov_b32_e32 v0, 0
+; GFX11-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-NEXT: s_clause 0x1
+; GFX11-NEXT: global_load_b32 v1, v0, s[4:5]
+; GFX11-NEXT: global_load_b32 v2, v0, s[6:7]
+; GFX11-NEXT: s_waitcnt vmcnt(0)
+; GFX11-NEXT: v_sub_co_u32 v1, s4, v1, v2
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-NEXT: v_cndmask_b32_e64 v2, 0, 1, s4
+; GFX11-NEXT: s_clause 0x1
+; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-NEXT: global_store_b8 v0, v2, s[2:3]
+; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%tid.ext = sext i32 %tid to i64
%a.gep = getelementptr inbounds i32, ptr addrspace(1) %a.ptr
@@ -215,17 +308,15 @@ define amdgpu_kernel void @v_usubo_i32_novcc(ptr addrspace(1) %out, ptr addrspac
; SI-NEXT: s_mov_b32 s10, -1
; SI-NEXT: s_mov_b32 s14, s10
; SI-NEXT: s_mov_b32 s15, s11
-; SI-NEXT: s_mov_b32 s18, s10
-; SI-NEXT: s_mov_b32 s19, s11
; SI-NEXT: s_waitcnt lgkmcnt(0)
; SI-NEXT: s_mov_b32 s12, s4
; SI-NEXT: s_mov_b32 s13, s5
-; SI-NEXT: s_mov_b32 s16, s6
-; SI-NEXT: s_mov_b32 s17, s7
-; SI-NEXT: buffer_load_dword v0, off, s[12:15], 0
-; SI-NEXT: buffer_load_dword v1, off, s[16:19], 0
+; SI-NEXT: s_mov_b32 s4, s6
+; SI-NEXT: s_mov_b32 s5, s7
; SI-NEXT: s_mov_b32 s6, s10
; SI-NEXT: s_mov_b32 s7, s11
+; SI-NEXT: buffer_load_dword v0, off, s[12:15], 0
+; SI-NEXT: buffer_load_dword v1, off, s[4:7], 0
; SI-NEXT: s_mov_b32 s8, s0
; SI-NEXT: s_mov_b32 s9, s1
; SI-NEXT: s_mov_b32 s4, s2
@@ -283,6 +374,45 @@ define amdgpu_kernel void @v_usubo_i32_novcc(ptr addrspace(1) %out, ptr addrspac
; GFX9-NEXT: global_store_byte v0, v2, s[10:11]
; GFX9-NEXT: s_waitcnt vmcnt(0)
; GFX9-NEXT: s_endpgm
+;
+; GFX10-LABEL: v_usubo_i32_novcc:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0x24
+; GFX10-NEXT: v_mov_b32_e32 v0, 0
+; GFX10-NEXT: s_waitcnt lgkmcnt(0)
+; GFX10-NEXT: s_clause 0x1
+; GFX10-NEXT: global_load_dword v1, v0, s[12:13]
+; GFX10-NEXT: global_load_dword v2, v0, s[14:15]
+; GFX10-NEXT: s_waitcnt vmcnt(0)
+; GFX10-NEXT: v_sub_co_u32 v1, s0, v1, v2
+; GFX10-NEXT: v_cndmask_b32_e64 v2, 0, 1, s0
+; GFX10-NEXT: global_store_dword v0, v1, s[8:9]
+; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
+; GFX10-NEXT: ;;#ASMSTART
+; GFX10-NEXT: ;;#ASMEND
+; GFX10-NEXT: global_store_byte v0, v2, s[10:11]
+; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
+; GFX10-NEXT: s_endpgm
+;
+; GFX11-LABEL: v_usubo_i32_novcc:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_load_b256 s[0:7], s[4:5], 0x24
+; GFX11-NEXT: v_mov_b32_e32 v0, 0
+; GFX11-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-NEXT: s_clause 0x1
+; GFX11-NEXT: global_load_b32 v1, v0, s[4:5]
+; GFX11-NEXT: global_load_b32 v2, v0, s[6:7]
+; GFX11-NEXT: s_waitcnt vmcnt(0)
+; GFX11-NEXT: v_sub_co_u32 v1, s4, v1, v2
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-NEXT: v_cndmask_b32_e64 v2, 0, 1, s4
+; GFX11-NEXT: global_store_b32 v0, v1, s[0:1] dlc
+; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
+; GFX11-NEXT: ;;#ASMSTART
+; GFX11-NEXT: ;;#ASMEND
+; GFX11-NEXT: global_store_b8 v0, v2, s[2:3] dlc
+; GFX11-NEXT: s_waitcnt_vscnt null, 0x0
+; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%tid.ext = sext i32 %tid to i64
%a.gep = getelementptr inbounds i32, ptr addrspace(1) %a.ptr
@@ -306,21 +436,21 @@ define amdgpu_kernel void @s_usubo_i64(ptr addrspace(1) %out, ptr addrspace(1) %
; SI-NEXT: s_mov_b32 s10, -1
; SI-NEXT: s_waitcnt lgkmcnt(0)
; SI-NEXT: s_sub_u32 s6, s4, s6
-; SI-NEXT: s_subb_u32 s7, s5, s7
-; SI-NEXT: s_mov_b32 s14, s10
-; SI-NEXT: s_mov_b32 s15, s11
-; SI-NEXT: s_mov_b32 s8, s0
-; SI-NEXT: s_mov_b32 s9, s1
-; SI-NEXT: s_mov_b32 s12, s2
-; SI-NEXT: s_mov_b32 s13, s3
; SI-NEXT: v_mov_b32_e32 v0, s4
+; SI-NEXT: s_subb_u32 s7, s5, s7
; SI-NEXT: v_mov_b32_e32 v1, s5
+; SI-NEXT: v_cmp_gt_u64_e32 vcc, s[6:7], v[0:1]
; SI-NEXT: v_mov_b32_e32 v2, s6
+; SI-NEXT: s_mov_b32 s8, s0
+; SI-NEXT: s_mov_b32 s9, s1
+; SI-NEXT: s_mov_b32 s0, s2
+; SI-NEXT: s_mov_b32 s1, s3
+; SI-NEXT: s_mov_b32 s2, s10
+; SI-NEXT: s_mov_b32 s3, s11
; SI-NEXT: v_mov_b32_e32 v3, s7
-; SI-NEXT: buffer_store_dwordx2 v[2:3], off, s[8:11], 0
-; SI-NEXT: v_cmp_gt_u64_e32 vcc, s[6:7], v[0:1]
; SI-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
-; SI-NEXT: buffer_store_byte v0, off, s[12:15], 0
+; SI-NEXT: buffer_store_dwordx2 v[2:3], off, s[8:11], 0
+; SI-NEXT: buffer_store_byte v0, off, s[0:3], 0
; SI-NEXT: s_endpgm
;
; VI-LABEL: s_usubo_i64:
@@ -359,6 +489,37 @@ define amdgpu_kernel void @s_usubo_i64(ptr addrspace(1) %out, ptr addrspace(1) %
; GFX9-NEXT: global_store_dwordx2 v4, v[2:3], s[8:9]
; GFX9-NEXT: global_store_byte v4, v0, s[10:11]
; GFX9-NEXT: s_endpgm
+;
+; GFX10-LABEL: s_usubo_i64:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0x24
+; GFX10-NEXT: v_mov_b32_e32 v2, 0
+; GFX10-NEXT: s_waitcnt lgkmcnt(0)
+; GFX10-NEXT: s_sub_u32 s0, s12, s14
+; GFX10-NEXT: s_subb_u32 s1, s13, s15
+; GFX10-NEXT: v_mov_b32_e32 v0, s0
+; GFX10-NEXT: v_mov_b32_e32 v1, s1
+; GFX10-NEXT: v_cmp_gt_u64_e64 s0, s[0:1], s[12:13]
+; GFX10-NEXT: v_cndmask_b32_e64 v3, 0, 1, s0
+; GFX10-NEXT: global_store_dwordx2 v2, v[0:1], s[8:9]
+; GFX10-NEXT: global_store_byte v2, v3, s[10:11]
+; GFX10-NEXT: s_endpgm
+;
+; GFX11-LABEL: s_usubo_i64:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_load_b256 s[0:7], s[4:5], 0x24
+; GFX11-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-NEXT: s_sub_u32 s6, s4, s6
+; GFX11-NEXT: s_subb_u32 s7, s5, s7
+; GFX11-NEXT: v_mov_b32_e32 v0, s6
+; GFX11-NEXT: v_cmp_gt_u64_e64 s4, s[6:7], s[4:5]
+; GFX11-NEXT: v_dual_mov_b32 v2, 0 :: v_dual_mov_b32 v1, s7
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2)
+; GFX11-NEXT: v_cndmask_b32_e64 v3, 0, 1, s4
+; GFX11-NEXT: s_clause 0x1
+; GFX11-NEXT: global_store_b64 v2, v[0:1], s[0:1]
+; GFX11-NEXT: global_store_b8 v2, v3, s[2:3]
+; GFX11-NEXT: s_endpgm
%usub = call { i64, i1 } @llvm.usub.with.overflow.i64(i64 %a, i64 %b)
%val = extractvalue { i64, i1 } %usub, 0
%carry = extractvalue { i64, i1 } %usub, 1
@@ -375,17 +536,15 @@ define amdgpu_kernel void @v_usubo_i64(ptr addrspace(1) %out, ptr addrspace(1) %
; SI-NEXT: s_mov_b32 s10, -1
; SI-NEXT: s_mov_b32 s14, s10
; SI-NEXT: s_mov_b32 s15, s11
-; SI-NEXT: s_mov_b32 s18, s10
-; SI-NEXT: s_mov_b32 s19, s11
; SI-NEXT: s_waitcnt lgkmcnt(0)
; SI-NEXT: s_mov_b32 s12, s4
; SI-NEXT: s_mov_b32 s13, s5
-; SI-NEXT: s_mov_b32 s16, s6
-; SI-NEXT: s_mov_b32 s17, s7
-; SI-NEXT: buffer_load_dwordx2 v[0:1], off, s[12:15], 0
-; SI-NEXT: buffer_load_dwordx2 v[2:3], off, s[16:19], 0
+; SI-NEXT: s_mov_b32 s4, s6
+; SI-NEXT: s_mov_b32 s5, s7
; SI-NEXT: s_mov_b32 s6, s10
; SI-NEXT: s_mov_b32 s7, s11
+; SI-NEXT: buffer_load_dwordx2 v[0:1], off, s[12:15], 0
+; SI-NEXT: buffer_load_dwordx2 v[2:3], off, s[4:7], 0
; SI-NEXT: s_mov_b32 s8, s0
; SI-NEXT: s_mov_b32 s9, s1
; SI-NEXT: s_mov_b32 s4, s2
@@ -393,8 +552,8 @@ define amdgpu_kernel void @v_usubo_i64(ptr addrspace(1) %out, ptr addrspace(1) %
; SI-NEXT: s_waitcnt vmcnt(0)
; SI-NEXT: v_sub_i32_e32 v2, vcc, v0, v2
; SI-NEXT: v_subb_u32_e32 v3, vcc, v1, v3, vcc
-; SI-NEXT: buffer_store_dwordx2 v[2:3], off, s[8:11], 0
; SI-NEXT: v_cmp_gt_u64_e32 vcc, v[2:3], v[0:1]
+; SI-NEXT: buffer_store_dwordx2 v[2:3], off, s[8:11], 0
; SI-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
; SI-NEXT: buffer_store_byte v0, off, s[4:7], 0
; SI-NEXT: s_endpgm
@@ -437,6 +596,42 @@ define amdgpu_kernel void @v_usubo_i64(ptr addrspace(1) %out, ptr addrspace(1) %
; GFX9-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
; GFX9-NEXT: global_store_byte v4, v0, s[10:11]
; GFX9-NEXT: s_endpgm
+;
+; GFX10-LABEL: v_usubo_i64:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0x24
+; GFX10-NEXT: v_mov_b32_e32 v4, 0
+; GFX10-NEXT: s_waitcnt lgkmcnt(0)
+; GFX10-NEXT: s_clause 0x1
+; GFX10-NEXT: global_load_dwordx2 v[0:1], v4, s[12:13]
+; GFX10-NEXT: global_load_dwordx2 v[2:3], v4, s[14:15]
+; GFX10-NEXT: s_waitcnt vmcnt(0)
+; GFX10-NEXT: v_sub_co_u32 v2, vcc_lo, v0, v2
+; GFX10-NEXT: v_sub_co_ci_u32_e32 v3, vcc_lo, v1, v3, vcc_lo
+; GFX10-NEXT: v_cmp_gt_u64_e32 vcc_lo, v[2:3], v[0:1]
+; GFX10-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
+; GFX10-NEXT: global_store_dwordx2 v4, v[2:3], s[8:9]
+; GFX10-NEXT: global_store_byte v4, v0, s[10:11]
+; GFX10-NEXT: s_endpgm
+;
+; GFX11-LABEL: v_usubo_i64:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_load_b256 s[0:7], s[4:5], 0x24
+; GFX11-NEXT: v_mov_b32_e32 v4, 0
+; GFX11-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-NEXT: s_clause 0x1
+; GFX11-NEXT: global_load_b64 v[0:1], v4, s[4:5]
+; GFX11-NEXT: global_load_b64 v[2:3], v4, s[6:7]
+; GFX11-NEXT: s_waitcnt vmcnt(0)
+; GFX11-NEXT: v_sub_co_u32 v2, vcc_lo, v0, v2
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-NEXT: v_sub_co_ci_u32_e64 v3, null, v1, v3, vcc_lo
+; GFX11-NEXT: v_cmp_gt_u64_e32 vcc_lo, v[2:3], v[0:1]
+; GFX11-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo
+; GFX11-NEXT: s_clause 0x1
+; GFX11-NEXT: global_store_b64 v4, v[2:3], s[0:1]
+; GFX11-NEXT: global_store_b8 v4, v0, s[2:3]
+; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%tid.ext = sext i32 %tid to i64
%a.gep = getelementptr inbounds i64, ptr addrspace(1) %a.ptr
@@ -459,17 +654,15 @@ define amdgpu_kernel void @v_usubo_i16(ptr addrspace(1) %out, ptr addrspace(1) %
; SI-NEXT: s_mov_b32 s10, -1
; SI-NEXT: s_mov_b32 s14, s10
; SI-NEXT: s_mov_b32 s15, s11
-; SI-NEXT: s_mov_b32 s18, s10
-; SI-NEXT: s_mov_b32 s19, s11
; SI-NEXT: s_waitcnt lgkmcnt(0)
; SI-NEXT: s_mov_b32 s12, s4
; SI-NEXT: s_mov_b32 s13, s5
-; SI-NEXT: s_mov_b32 s16, s6
-; SI-NEXT: s_mov_b32 s17, s7
-; SI-NEXT: buffer_load_ushort v0, off, s[12:15], 0
-; SI-NEXT: buffer_load_ushort v1, off, s[16:19], 0
+; SI-NEXT: s_mov_b32 s4, s6
+; SI-NEXT: s_mov_b32 s5, s7
; SI-NEXT: s_mov_b32 s6, s10
; SI-NEXT: s_mov_b32 s7, s11
+; SI-NEXT: buffer_load_ushort v0, off, s[12:15], 0
+; SI-NEXT: buffer_load_ushort v1, off, s[4:7], 0
; SI-NEXT: s_mov_b32 s8, s0
; SI-NEXT: s_mov_b32 s9, s1
; SI-NEXT: s_mov_b32 s4, s2
@@ -477,8 +670,8 @@ define amdgpu_kernel void @v_usubo_i16(ptr addrspace(1) %out, ptr addrspace(1) %
; SI-NEXT: s_waitcnt vmcnt(0)
; SI-NEXT: v_sub_i32_e32 v0, vcc, v0, v1
; SI-NEXT: v_and_b32_e32 v1, 0xffff, v0
-; SI-NEXT: buffer_store_short v0, off, s[8:11], 0
; SI-NEXT: v_cmp_ne_u32_e32 vcc, v1, v0
+; SI-NEXT: buffer_store_short v0, off, s[8:11], 0
; SI-NEXT: s_waitcnt expcnt(0)
; SI-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
; SI-NEXT: buffer_store_byte v0, off, s[4:7], 0
@@ -522,6 +715,42 @@ define amdgpu_kernel void @v_usubo_i16(ptr addrspace(1) %out, ptr addrspace(1) %
; GFX9-NEXT: global_store_short v0, v2, s[8:9]
; GFX9-NEXT: global_store_byte v0, v1, s[10:11]
; GFX9-NEXT: s_endpgm
+;
+; GFX10-LABEL: v_usubo_i16:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0x24
+; GFX10-NEXT: v_mov_b32_e32 v0, 0
+; GFX10-NEXT: s_waitcnt lgkmcnt(0)
+; GFX10-NEXT: s_clause 0x1
+; GFX10-NEXT: global_load_ushort v1, v0, s[12:13]
+; GFX10-NEXT: global_load_ushort v2, v0, s[14:15]
+; GFX10-NEXT: s_waitcnt vmcnt(0)
+; GFX10-NEXT: v_sub_nc_u32_e32 v2, v1, v2
+; GFX10-NEXT: v_cmp_gt_u32_sdwa s0, v2, v1 src0_sel:WORD_0 src1_sel:WORD_0
+; GFX10-NEXT: v_cndmask_b32_e64 v1, 0, 1, s0
+; GFX10-NEXT: global_store_short v0, v2, s[8:9]
+; GFX10-NEXT: global_store_byte v0, v1, s[10:11]
+; GFX10-NEXT: s_endpgm
+;
+; GFX11-LABEL: v_usubo_i16:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_load_b256 s[0:7], s[4:5], 0x24
+; GFX11-NEXT: v_mov_b32_e32 v0, 0
+; GFX11-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-NEXT: s_clause 0x1
+; GFX11-NEXT: global_load_d16_b16 v1, v0, s[4:5]
+; GFX11-NEXT: global_load_u16 v2, v0, s[6:7]
+; GFX11-NEXT: s_waitcnt vmcnt(0)
+; GFX11-NEXT: v_sub_nc_u32_e32 v2, v1, v2
+; GFX11-NEXT: v_and_b32_e32 v1, 0xffff, v1
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-NEXT: v_and_b32_e32 v3, 0xffff, v2
+; GFX11-NEXT: v_cmp_gt_u32_e32 vcc_lo, v3, v1
+; GFX11-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc_lo
+; GFX11-NEXT: s_clause 0x1
+; GFX11-NEXT: global_store_b16 v0, v2, s[0:1]
+; GFX11-NEXT: global_store_b8 v0, v1, s[2:3]
+; GFX11-NEXT: s_endpgm
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%tid.ext = sext i32 %tid to i64
%a.gep = getelementptr inbounds i16, ptr addrspace(1) %a.ptr
@@ -544,17 +773,15 @@ define amdgpu_kernel void @v_usubo_v2i32(ptr addrspace(1) %out, ptr addrspace(1)
; SI-NEXT: s_mov_b32 s10, -1
; SI-NEXT: s_mov_b32 s14, s10
; SI-NEXT: s_mov_b32 s15, s11
-; SI-NEXT: s_mov_b32 s18, s10
-; SI-NEXT: s_mov_b32 s19, s11
; SI-NEXT: s_waitcnt lgkmcnt(0)
; SI-NEXT: s_mov_b32 s12, s4
; SI-NEXT: s_mov_b32 s13, s5
-; SI-NEXT: s_mov_b32 s16, s6
-; SI-NEXT: s_mov_b32 s17, s7
-; SI-NEXT: buffer_load_dwordx2 v[0:1], off, s[12:15], 0
-; SI-NEXT: buffer_load_dwordx2 v[2:3], off, s[16:19], 0
+; SI-NEXT: s_mov_b32 s4, s6
+; SI-NEXT: s_mov_b32 s5, s7
; SI-NEXT: s_mov_b32 s6, s10
; SI-NEXT: s_mov_b32 s7, s11
+; SI-NEXT: buffer_load_dwordx2 v[0:1], off, s[12:15], 0
+; SI-NEXT: buffer_load_dwordx2 v[2:3], off, s[4:7], 0
; SI-NEXT: s_mov_b32 s8, s0
; SI-NEXT: s_mov_b32 s9, s1
; SI-NEXT: s_mov_b32 s4, s2
@@ -606,6 +833,42 @@ define amdgpu_kernel void @v_usubo_v2i32(ptr addrspace(1) %out, ptr addrspace(1)
; GFX9-NEXT: global_store_dwordx2 v4, v[0:1], s[8:9]
; GFX9-NEXT: global_store_dwordx2 v4, v[2:3], s[10:11]
; GFX9-NEXT: s_endpgm
+;
+; GFX10-LABEL: v_usubo_v2i32:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0x24
+; GFX10-NEXT: v_mov_b32_e32 v4, 0
+; GFX10-NEXT: s_waitcnt lgkmcnt(0)
+; GFX10-NEXT: s_clause 0x1
+; GFX10-NEXT: global_load_dwordx2 v[0:1], v4, s[12:13]
+; GFX10-NEXT: global_load_dwordx2 v[2:3], v4, s[14:15]
+; GFX10-NEXT: s_waitcnt vmcnt(0)
+; GFX10-NEXT: v_sub_co_u32 v1, s0, v1, v3
+; GFX10-NEXT: v_cndmask_b32_e64 v3, 0, 1, s0
+; GFX10-NEXT: v_sub_co_u32 v0, s0, v0, v2
+; GFX10-NEXT: v_cndmask_b32_e64 v2, 0, 1, s0
+; GFX10-NEXT: global_store_dwordx2 v4, v[0:1], s[8:9]
+; GFX10-NEXT: global_store_dwordx2 v4, v[2:3], s[10:11]
+; GFX10-NEXT: s_endpgm
+;
+; GFX11-LABEL: v_usubo_v2i32:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_load_b256 s[0:7], s[4:5], 0x24
+; GFX11-NEXT: v_mov_b32_e32 v4, 0
+; GFX11-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-NEXT: s_clause 0x1
+; GFX11-NEXT: global_load_b64 v[0:1], v4, s[4:5]
+; GFX11-NEXT: global_load_b64 v[2:3], v4, s[6:7]
+; GFX11-NEXT: s_waitcnt vmcnt(0)
+; GFX11-NEXT: v_sub_co_u32 v1, s4, v1, v3
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
+; GFX11-NEXT: v_cndmask_b32_e64 v3, 0, 1, s4
+; GFX11-NEXT: v_sub_co_u32 v0, s4, v0, v2
+; GFX11-NEXT: v_cndmask_b32_e64 v2, 0, 1, s4
+; GFX11-NEXT: s_clause 0x1
+; GFX11-NEXT: global_store_b64 v4, v[0:1], s[0:1]
+; GFX11-NEXT: global_store_b64 v4, v[2:3], s[2:3]
+; GFX11-NEXT: s_endpgm
%a = load <2 x i32>, ptr addrspace(1) %aptr, align 4
%b = load <2 x i32>, ptr addrspace(1) %bptr, align 4
%sadd = call { <2 x i32>, <2 x i1> } @llvm.usub.with.overflow.v2i32(<2 x i32> %a, <2 x i32> %b) nounwind
@@ -623,26 +886,27 @@ define amdgpu_kernel void @s_usubo_clamp_bit(ptr addrspace(1) %out, ptr addrspac
; SI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0xd
; SI-NEXT: s_waitcnt lgkmcnt(0)
; SI-NEXT: v_mov_b32_e32 v0, s1
-; SI-NEXT: v_sub_i32_e32 v0, vcc, s0, v0
; SI-NEXT: s_cmp_eq_u32 s0, s1
+; SI-NEXT: v_sub_i32_e32 v0, vcc, s0, v0
; SI-NEXT: s_mov_b64 s[0:1], 0
; SI-NEXT: s_cbranch_scc1 .LBB8_2
; SI-NEXT: ; %bb.1: ; %if
; SI-NEXT: s_xor_b64 s[0:1], vcc, -1
; SI-NEXT: .LBB8_2: ; %exit
; SI-NEXT: s_load_dwordx4 s[4:7], s[4:5], 0x9
-; SI-NEXT: s_mov_b32 s3, 0xf000
-; SI-NEXT: s_mov_b32 s2, -1
-; SI-NEXT: v_cndmask_b32_e64 v1, 0, 1, s[0:1]
-; SI-NEXT: s_mov_b32 s10, s2
-; SI-NEXT: s_mov_b32 s11, s3
+; SI-NEXT: s_mov_b32 s11, 0xf000
+; SI-NEXT: s_mov_b32 s10, -1
; SI-NEXT: s_waitcnt lgkmcnt(0)
-; SI-NEXT: s_mov_b32 s0, s4
-; SI-NEXT: s_mov_b32 s1, s5
-; SI-NEXT: s_mov_b32 s8, s6
-; SI-NEXT: s_mov_b32 s9, s7
-; SI-NEXT: buffer_store_dword v0, off, s[0:3], 0
-; SI-NEXT: buffer_store_byte v1, off, s[8:11], 0
+; SI-NEXT: s_mov_b32 s8, s4
+; SI-NEXT: s_mov_b32 s9, s5
+; SI-NEXT: s_mov_b32 s4, s6
+; SI-NEXT: s_mov_b32 s5, s7
+; SI-NEXT: s_mov_b32 s6, s10
+; SI-NEXT: s_mov_b32 s7, s11
+; SI-NEXT: buffer_store_dword v0, off, s[8:11], 0
+; SI-NEXT: s_waitcnt expcnt(0)
+; SI-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[0:1]
+; SI-NEXT: buffer_store_byte v0, off, s[4:7], 0
; SI-NEXT: s_endpgm
;
; VI-LABEL: s_usubo_clamp_bit:
@@ -687,6 +951,45 @@ define amdgpu_kernel void @s_usubo_clamp_bit(ptr addrspace(1) %out, ptr addrspac
; GFX9-NEXT: global_store_dword v1, v0, s[8:9]
; GFX9-NEXT: global_store_byte v1, v2, s[10:11]
; GFX9-NEXT: s_endpgm
+;
+; GFX10-LABEL: s_usubo_clamp_bit:
+; GFX10: ; %bb.0: ; %entry
+; GFX10-NEXT: s_load_dwordx2 s[2:3], s[4:5], 0x34
+; GFX10-NEXT: s_mov_b32 s0, 0
+; GFX10-NEXT: s_waitcnt lgkmcnt(0)
+; GFX10-NEXT: v_sub_co_u32 v0, s1, s2, s3
+; GFX10-NEXT: s_cmp_eq_u32 s2, s3
+; GFX10-NEXT: s_cbranch_scc1 .LBB8_2
+; GFX10-NEXT: ; %bb.1: ; %if
+; GFX10-NEXT: s_xor_b32 s0, s1, -1
+; GFX10-NEXT: .LBB8_2: ; %exit
+; GFX10-NEXT: s_load_dwordx4 s[8:11], s[4:5], 0x24
+; GFX10-NEXT: v_mov_b32_e32 v1, 0
+; GFX10-NEXT: v_cndmask_b32_e64 v2, 0, 1, s0
+; GFX10-NEXT: s_waitcnt lgkmcnt(0)
+; GFX10-NEXT: global_store_dword v1, v0, s[8:9]
+; GFX10-NEXT: global_store_byte v1, v2, s[10:11]
+; GFX10-NEXT: s_endpgm
+;
+; GFX11-LABEL: s_usubo_clamp_bit:
+; GFX11: ; %bb.0: ; %entry
+; GFX11-NEXT: s_load_b64 s[2:3], s[4:5], 0x34
+; GFX11-NEXT: s_mov_b32 s0, 0
+; GFX11-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-NEXT: v_sub_co_u32 v0, s1, s2, s3
+; GFX11-NEXT: s_cmp_eq_u32 s2, s3
+; GFX11-NEXT: s_cbranch_scc1 .LBB8_2
+; GFX11-NEXT: ; %bb.1: ; %if
+; GFX11-NEXT: s_xor_b32 s0, s1, -1
+; GFX11-NEXT: .LBB8_2: ; %exit
+; GFX11-NEXT: s_load_b128 s[4:7], s[4:5], 0x24
+; GFX11-NEXT: v_mov_b32_e32 v1, 0
+; GFX11-NEXT: v_cndmask_b32_e64 v2, 0, 1, s0
+; GFX11-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-NEXT: s_clause 0x1
+; GFX11-NEXT: global_store_b32 v1, v0, s[4:5]
+; GFX11-NEXT: global_store_b8 v1, v2, s[6:7]
+; GFX11-NEXT: s_endpgm
entry:
%usub = call { i32, i1 } @llvm.usub.with.overflow.i32(i32 %a, i32 %b)
%val = extractvalue { i32, i1 } %usub, 0
@@ -712,19 +1015,19 @@ define amdgpu_kernel void @v_usubo_clamp_bit(ptr addrspace(1) %out, ptr addrspac
; SI-NEXT: s_load_dwordx8 s[4:11], s[4:5], 0x9
; SI-NEXT: s_mov_b32 s3, 0xf000
; SI-NEXT: s_mov_b32 s2, -1
-; SI-NEXT: s_mov_b32 s14, s2
-; SI-NEXT: s_mov_b32 s15, s3
; SI-NEXT: s_waitcnt lgkmcnt(0)
; SI-NEXT: s_mov_b32 s0, s8
; SI-NEXT: s_mov_b32 s1, s9
-; SI-NEXT: s_mov_b32 s12, s10
-; SI-NEXT: s_mov_b32 s13, s11
+; SI-NEXT: s_mov_b32 s8, s10
+; SI-NEXT: s_mov_b32 s9, s11
+; SI-NEXT: s_mov_b32 s10, s2
+; SI-NEXT: s_mov_b32 s11, s3
; SI-NEXT: buffer_load_dword v1, off, s[0:3], 0
-; SI-NEXT: buffer_load_dword v2, off, s[12:15], 0
+; SI-NEXT: buffer_load_dword v2, off, s[8:11], 0
+; SI-NEXT: s_mov_b64 s[8:9], 0
; SI-NEXT: s_waitcnt vmcnt(0)
-; SI-NEXT: v_sub_i32_e64 v0, s[0:1], v1, v2
; SI-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2
-; SI-NEXT: s_mov_b64 s[8:9], 0
+; SI-NEXT: v_sub_i32_e64 v0, s[0:1], v1, v2
; SI-NEXT: s_cbranch_vccnz .LBB9_2
; SI-NEXT: ; %bb.1: ; %if
; SI-NEXT: s_xor_b64 s[8:9], s[0:1], -1
@@ -787,6 +1090,50 @@ define amdgpu_kernel void @v_usubo_clamp_bit(ptr addrspace(1) %out, ptr addrspac
; GFX9-NEXT: v_cndmask_b32_e64 v1, 0, 1, s[2:3]
; GFX9-NEXT: global_store_byte v0, v1, s[10:11]
; GFX9-NEXT: s_endpgm
+;
+; GFX10-LABEL: v_usubo_clamp_bit:
+; GFX10: ; %bb.0: ; %entry
+; GFX10-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0x24
+; GFX10-NEXT: v_mov_b32_e32 v0, 0
+; GFX10-NEXT: s_mov_b32 s0, 0
+; GFX10-NEXT: s_waitcnt lgkmcnt(0)
+; GFX10-NEXT: s_clause 0x1
+; GFX10-NEXT: global_load_dword v1, v0, s[12:13]
+; GFX10-NEXT: global_load_dword v2, v0, s[14:15]
+; GFX10-NEXT: s_waitcnt vmcnt(0)
+; GFX10-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
+; GFX10-NEXT: v_sub_co_u32 v1, s1, v1, v2
+; GFX10-NEXT: s_cbranch_vccnz .LBB9_2
+; GFX10-NEXT: ; %bb.1: ; %if
+; GFX10-NEXT: s_xor_b32 s0, s1, -1
+; GFX10-NEXT: .LBB9_2: ; %exit
+; GFX10-NEXT: v_cndmask_b32_e64 v2, 0, 1, s0
+; GFX10-NEXT: global_store_dword v0, v1, s[8:9]
+; GFX10-NEXT: global_store_byte v0, v2, s[10:11]
+; GFX10-NEXT: s_endpgm
+;
+; GFX11-LABEL: v_usubo_clamp_bit:
+; GFX11: ; %bb.0: ; %entry
+; GFX11-NEXT: s_load_b256 s[0:7], s[4:5], 0x24
+; GFX11-NEXT: v_mov_b32_e32 v0, 0
+; GFX11-NEXT: s_waitcnt lgkmcnt(0)
+; GFX11-NEXT: s_clause 0x1
+; GFX11-NEXT: global_load_b32 v1, v0, s[4:5]
+; GFX11-NEXT: global_load_b32 v2, v0, s[6:7]
+; GFX11-NEXT: s_mov_b32 s4, 0
+; GFX11-NEXT: s_waitcnt vmcnt(0)
+; GFX11-NEXT: v_cmp_eq_u32_e32 vcc_lo, v1, v2
+; GFX11-NEXT: v_sub_co_u32 v1, s5, v1, v2
+; GFX11-NEXT: s_cbranch_vccnz .LBB9_2
+; GFX11-NEXT: ; %bb.1: ; %if
+; GFX11-NEXT: s_xor_b32 s4, s5, -1
+; GFX11-NEXT: .LBB9_2: ; %exit
+; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
+; GFX11-NEXT: v_cndmask_b32_e64 v2, 0, 1, s4
+; GFX11-NEXT: s_clause 0x1
+; GFX11-NEXT: global_store_b32 v0, v1, s[0:1]
+; GFX11-NEXT: global_store_b8 v0, v2, s[2:3]
+; GFX11-NEXT: s_endpgm
entry:
%tid = call i32 @llvm.amdgcn.workitem.id.x()
%tid.ext = sext i32 %tid to i64
diff --git a/llvm/test/CodeGen/AMDGPU/wwm-regalloc-error.ll b/llvm/test/CodeGen/AMDGPU/wwm-regalloc-error.ll
index 145f1e4..ff18b32 100644
--- a/llvm/test/CodeGen/AMDGPU/wwm-regalloc-error.ll
+++ b/llvm/test/CodeGen/AMDGPU/wwm-regalloc-error.ll
@@ -2,7 +2,7 @@
; A negative test to capture the expected error when the VGPRs are insufficient for wwm-regalloc.
-; CHECK: error: can't find enough VGPRs for wwm-regalloc
+; CHECK: error: cannot find enough VGPRs for wwm-regalloc
define amdgpu_kernel void @test(i32 %in) {
entry:
diff --git a/llvm/test/CodeGen/ARM/fp16.ll b/llvm/test/CodeGen/ARM/fp16.ll
index dc35fa3..9ff7010 100644
--- a/llvm/test/CodeGen/ARM/fp16.ll
+++ b/llvm/test/CodeGen/ARM/fp16.ll
@@ -86,8 +86,8 @@ define i16 @test_to_fp16(double %in) {
; CHECK-FP16-SAFE: bl __aeabi_d2h
-; CHECK-FP16-UNSAFE: vcvt.f32.f64 s0, d0
-; CHECK-FP16-UNSAFE-NEXT: vcvtb.f16.f32 s0, s0
+; CHECK-FP16-UNSAFE: vmov r0, r1, d0
+; CHECK-FP16-UNSAFE-NEXT: bl __aeabi_d2h
; CHECK-ARMV8: vcvtb.f16.f64 [[TMP:s[0-9]+]], d0
; CHECK-ARMV8: vmov r0, [[TMP]]
diff --git a/llvm/test/CodeGen/ARM/preferred-function-alignment.ll b/llvm/test/CodeGen/ARM/preferred-function-alignment.ll
index f3a227c..2fc6790 100644
--- a/llvm/test/CodeGen/ARM/preferred-function-alignment.ll
+++ b/llvm/test/CodeGen/ARM/preferred-function-alignment.ll
@@ -22,3 +22,11 @@ define void @test() {
define void @test_optsize() optsize {
ret void
}
+
+; CHECK-LABEL: test_minsize
+; ALIGN-CS-16: .p2align 1
+; ALIGN-CS-32: .p2align 2
+
+define void @test_minsize() minsize {
+ ret void
+}
diff --git a/llvm/test/CodeGen/BPF/BTF/map-def-2.ll b/llvm/test/CodeGen/BPF/BTF/map-def-2.ll
index 5f971ec..d4c836f 100644
--- a/llvm/test/CodeGen/BPF/BTF/map-def-2.ll
+++ b/llvm/test/CodeGen/BPF/BTF/map-def-2.ll
@@ -1,5 +1,6 @@
-; RUN: llc -mtriple=bpfel -filetype=asm -o - %s | FileCheck -check-prefixes=CHECK %s
-; RUN: llc -mtriple=bpfeb -filetype=asm -o - %s | FileCheck -check-prefixes=CHECK %s
+; RUN: llc -mtriple=bpfel -mcpu=v3 -filetype=obj -o %t1 %s
+; RUN: llvm-objcopy --dump-section='.BTF'=%t2 %t1
+; RUN: %python %p/print_btf.py %t2 | FileCheck -check-prefixes=CHECK-BTF %s
;
; Source code:
; struct key_type {
@@ -18,51 +19,17 @@
@hash_map = dso_local local_unnamed_addr global %struct.map_type zeroinitializer, section ".maps", align 8, !dbg !0
-; CHECK: .long 0 # BTF_KIND_PTR(id = 1)
-; CHECK-NEXT: .long 33554432 # 0x2000000
-; CHECK-NEXT: .long 2
-; CHECK-NEXT: .long 1 # BTF_KIND_STRUCT(id = 2)
-; CHECK-NEXT: .long 67108865 # 0x4000001
-; CHECK-NEXT: .long 4
-; CHECK-NEXT: .long 10
-; CHECK-NEXT: .long 3
-; CHECK-NEXT: .long 0 # 0x0
-; CHECK-NEXT: .long 13 # BTF_KIND_INT(id = 3)
-; CHECK-NEXT: .long 16777216 # 0x1000000
-; CHECK-NEXT: .long 4
-; CHECK-NEXT: .long 16777248 # 0x1000020
-; CHECK-NEXT: .long 17 # BTF_KIND_TYPEDEF(id = 4)
-; CHECK-NEXT: .long 134217728 # 0x8000000
-; CHECK-NEXT: .long 5
-; CHECK-NEXT: .long 28 # BTF_KIND_TYPEDEF(id = 5)
-; CHECK-NEXT: .long 134217728 # 0x8000000
-; CHECK-NEXT: .long 6
-; CHECK-NEXT: .long 38 # BTF_KIND_STRUCT(id = 6)
-; CHECK-NEXT: .long 67108865 # 0x4000001
-; CHECK-NEXT: .long 8
-; CHECK-NEXT: .long 47
-; CHECK-NEXT: .long 1
-; CHECK-NEXT: .long 0 # 0x0
-; CHECK-NEXT: .long 51 # BTF_KIND_VAR(id = 7)
-; CHECK-NEXT: .long 234881024 # 0xe000000
-; CHECK-NEXT: .long 4
-; CHECK-NEXT: .long 1
-; CHECK-NEXT: .long 60 # BTF_KIND_DATASEC(id = 8)
-; CHECK-NEXT: .long 251658241 # 0xf000001
-; CHECK-NEXT: .long 0
-; CHECK-NEXT: .long 7
-; CHECK-NEXT: .long hash_map
-; CHECK-NEXT: .long 8
-
-; CHECK: .ascii "key_type" # string offset=1
-; CHECK: .ascii "a1" # string offset=10
-; CHECK: .ascii "int" # string offset=13
-; CHECK: .ascii "__map_type" # string offset=17
-; CHECK: .ascii "_map_type" # string offset=28
-; CHECK: .ascii "map_type" # string offset=38
-; CHECK: .ascii "key" # string offset=47
-; CHECK: .ascii "hash_map" # string offset=51
-; CHECK: .ascii ".maps" # string offset=60
+; CHECK-BTF: [1] PTR '(anon)' type_id=2
+; CHECK-BTF-NEXT: [2] STRUCT 'key_type' size=4 vlen=1
+; CHECK-BTF-NEXT: 'a1' type_id=3 bits_offset=0
+; CHECK-BTF-NEXT: [3] INT 'int' size=4 bits_offset=0 nr_bits=32 encoding=SIGNED
+; CHECK-BTF-NEXT: [4] STRUCT 'map_type' size=8 vlen=1
+; CHECK-BTF-NEXT: 'key' type_id=1 bits_offset=0
+; CHECK-BTF-NEXT: [5] TYPEDEF '_map_type' type_id=4
+; CHECK-BTF-NEXT: [6] TYPEDEF '__map_type' type_id=5
+; CHECK-BTF-NEXT: [7] VAR 'hash_map' type_id=6, linkage=global
+; CHECK-BTF-NEXT: [8] DATASEC '.maps' size=0 vlen=1
+; CHECK-BTF-NEXT: type_id=7 offset=0 size=8
!llvm.dbg.cu = !{!2}
!llvm.module.flags = !{!16, !17, !18}
diff --git a/llvm/test/CodeGen/BPF/BTF/map-def-3.ll b/llvm/test/CodeGen/BPF/BTF/map-def-3.ll
index 6aa8af9..1d95f03 100644
--- a/llvm/test/CodeGen/BPF/BTF/map-def-3.ll
+++ b/llvm/test/CodeGen/BPF/BTF/map-def-3.ll
@@ -1,5 +1,6 @@
-; RUN: llc -mtriple=bpfel -filetype=asm -o - %s | FileCheck -check-prefixes=CHECK %s
-; RUN: llc -mtriple=bpfeb -filetype=asm -o - %s | FileCheck -check-prefixes=CHECK %s
+; RUN: llc -mtriple=bpfel -mcpu=v3 -filetype=obj -o %t1 %s
+; RUN: llvm-objcopy --dump-section='.BTF'=%t2 %t1
+; RUN: %python %p/print_btf.py %t2 | FileCheck -check-prefixes=CHECK-BTF %s
;
; Source code:
; struct key_type {
@@ -13,36 +14,13 @@
@hash_map = dso_local local_unnamed_addr constant %struct.key_type zeroinitializer, section ".maps", align 4, !dbg !0
-; CHECK: .long 1 # BTF_KIND_INT(id = 1)
-; CHECK-NEXT: .long 16777216 # 0x1000000
-; CHECK-NEXT: .long 4
-; CHECK-NEXT: .long 16777248 # 0x1000020
-; CHECK-NEXT: .long 0 # BTF_KIND_CONST(id = 2)
-; CHECK-NEXT: .long 167772160 # 0xa000000
-; CHECK-NEXT: .long 3
-; CHECK-NEXT: .long 5 # BTF_KIND_STRUCT(id = 3)
-; CHECK-NEXT: .long 67108865 # 0x4000001
-; CHECK-NEXT: .long 4
-; CHECK-NEXT: .long 14
-; CHECK-NEXT: .long 1
-; CHECK-NEXT: .long 0 # 0x0
-; CHECK-NEXT: .long 17 # BTF_KIND_VAR(id = 4)
-; CHECK-NEXT: .long 234881024 # 0xe000000
-; CHECK-NEXT: .long 2
-; CHECK-NEXT: .long 1
-; CHECK-NEXT: .long 26 # BTF_KIND_DATASEC(id = 5)
-; CHECK-NEXT: .long 251658241 # 0xf000001
-; CHECK-NEXT: .long 0
-; CHECK-NEXT: .long 4
-; CHECK-NEXT: .long hash_map
-; CHECK-NEXT: .long 4
-
-; CHECK: .ascii "int" # string offset=1
-; CHECK: .ascii "key_type" # string offset=5
-; CHECK: .ascii "a1" # string offset=14
-; CHECK: .ascii "hash_map" # string offset=17
-; CHECK: .ascii ".maps" # string offset=26
-
+; CHECK-BTF: [1] INT 'int' size=4 bits_offset=0 nr_bits=32 encoding=SIGNED
+; CHECK-BTF-NEXT: [2] STRUCT 'key_type' size=4 vlen=1
+; CHECK-BTF-NEXT: 'a1' type_id=1 bits_offset=0
+; CHECK-BTF-NEXT: [3] CONST '(anon)' type_id=2
+; CHECK-BTF-NEXT: [4] VAR 'hash_map' type_id=3, linkage=global
+; CHECK-BTF-NEXT: [5] DATASEC '.maps' size=0 vlen=1
+; CHECK-BTF-NEXT: type_id=4 offset=0 size=4
!llvm.dbg.cu = !{!2}
!llvm.module.flags = !{!11, !12, !13}
diff --git a/llvm/test/CodeGen/BPF/BTF/map-def-nested-array.ll b/llvm/test/CodeGen/BPF/BTF/map-def-nested-array.ll
new file mode 100644
index 0000000..fc95daf
--- /dev/null
+++ b/llvm/test/CodeGen/BPF/BTF/map-def-nested-array.ll
@@ -0,0 +1,75 @@
+; RUN: llc -mtriple=bpfel -mcpu=v3 -filetype=obj -o %t1 %s
+; RUN: llvm-objcopy --dump-section='.BTF'=%t2 %t1
+; RUN: %python %p/print_btf.py %t2 | FileCheck -check-prefixes=CHECK-BTF-SHORT %s
+; RUN: %python %p/print_btf.py %t2 | FileCheck -check-prefixes=CHECK-BTF %s
+; Source:
+; struct nested_value_type {
+; int a1;
+; };
+; struct map_type {
+; struct {
+; struct nested_value_type *value;
+; } *values[];
+; };
+; Compilation flags:
+; clang -target bpf -g -O2 -S -emit-llvm prog.c
+
+; ModuleID = 'prog.c'
+source_filename = "prog.c"
+target datalayout = "e-m:e-p:64:64-i64:64-i128:128-n32:64-S128"
+target triple = "bpf"
+
+%struct.map_type = type { [0 x ptr] }
+
+@array_of_maps = dso_local local_unnamed_addr global %struct.map_type zeroinitializer, section ".maps", align 8, !dbg !0
+
+; We expect no forward declarations.
+;
+; CHECK-BTF-SHORT-NOT: FWD
+
+; Assert the whole BTF.
+;
+; CHECK-BTF: [1] PTR '(anon)' type_id=2
+; CHECK-BTF-NEXT: [2] STRUCT 'nested_value_type' size=4 vlen=1
+; CHECK-BTF-NEXT: 'a1' type_id=3 bits_offset=0
+; CHECK-BTF-NEXT: [3] INT 'int' size=4 bits_offset=0 nr_bits=32 encoding=SIGNED
+; CHECK-BTF-NEXT: [4] STRUCT '(anon)' size=8 vlen=1
+; CHECK-BTF-NEXT: 'value' type_id=1 bits_offset=0
+; CHECK-BTF-NEXT: [5] PTR '(anon)' type_id=4
+; CHECK-BTF-NEXT: [6] ARRAY '(anon)' type_id=5 index_type_id=7 nr_elems=0
+; CHECK-BTF-NEXT: [7] INT '__ARRAY_SIZE_TYPE__' size=4 bits_offset=0 nr_bits=32 encoding=(none)
+; CHECK-BTF-NEXT: [8] STRUCT 'map_type' size=0 vlen=1
+; CHECK-BTF-NEXT: 'values' type_id=6 bits_offset=0
+; CHECK-BTF-NEXT: [9] VAR 'array_of_maps' type_id=8, linkage=global
+; CHECK-BTF-NEXT: [10] DATASEC '.maps' size=0 vlen=1
+; CHECK-BTF-NEXT: type_id=9 offset=0 size=0
+
+!llvm.dbg.cu = !{!2}
+!llvm.module.flags = !{!20, !21, !22, !23}
+!llvm.ident = !{!24}
+
+!0 = !DIGlobalVariableExpression(var: !1, expr: !DIExpression())
+!1 = distinct !DIGlobalVariable(name: "array_of_maps", scope: !2, file: !3, line: 9, type: !5, isLocal: false, isDefinition: true)
+!2 = distinct !DICompileUnit(language: DW_LANG_C11, file: !3, producer: "clang version 22.0.0git (git@github.com:llvm/llvm-project.git ed93eaa421b714028b85cc887d80c45991d7207f)", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, globals: !4, splitDebugInlining: false, nameTableKind: None)
+!3 = !DIFile(filename: "prog.c", directory: "/home/mtardy/llvm-bug-repro", checksumkind: CSK_MD5, checksum: "9381d9e83e9c0b235a14704224815e96")
+!4 = !{!0}
+!5 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "map_type", file: !3, line: 4, elements: !6)
+!6 = !{!7}
+!7 = !DIDerivedType(tag: DW_TAG_member, name: "values", scope: !5, file: !3, line: 7, baseType: !8)
+!8 = !DICompositeType(tag: DW_TAG_array_type, baseType: !9, elements: !18)
+!9 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !10, size: 64)
+!10 = distinct !DICompositeType(tag: DW_TAG_structure_type, scope: !5, file: !3, line: 5, size: 64, elements: !11)
+!11 = !{!12}
+!12 = !DIDerivedType(tag: DW_TAG_member, name: "value", scope: !10, file: !3, line: 6, baseType: !13, size: 64)
+!13 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !14, size: 64)
+!14 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "nested_value_type", file: !3, line: 1, size: 32, elements: !15)
+!15 = !{!16}
+!16 = !DIDerivedType(tag: DW_TAG_member, name: "a1", scope: !14, file: !3, line: 2, baseType: !17, size: 32)
+!17 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed)
+!18 = !{!19}
+!19 = !DISubrange(count: -1)
+!20 = !{i32 7, !"Dwarf Version", i32 5}
+!21 = !{i32 2, !"Debug Info Version", i32 3}
+!22 = !{i32 1, !"wchar_size", i32 4}
+!23 = !{i32 7, !"frame-pointer", i32 2}
+!24 = !{!"clang version 22.0.0git (git@github.com:llvm/llvm-project.git ed93eaa421b714028b85cc887d80c45991d7207f)"}
diff --git a/llvm/test/CodeGen/Hexagon/hvx-reuse-fi-base.ll b/llvm/test/CodeGen/Hexagon/hvx-reuse-fi-base.ll
index 16cc1f3..e5a6aa4 100644
--- a/llvm/test/CodeGen/Hexagon/hvx-reuse-fi-base.ll
+++ b/llvm/test/CodeGen/Hexagon/hvx-reuse-fi-base.ll
@@ -183,7 +183,7 @@ b0:
%v11 = call <64 x i32> @llvm.hexagon.V6.vaddubh.128B(<32 x i32> %v10, <32 x i32> undef)
%v12 = call <64 x i32> @llvm.hexagon.V6.vrmpyubi.128B(<64 x i32> %v11, i32 2147483647, i32 1)
store <64 x i32> %v12, ptr @g0, align 128
- call void (ptr, ...) @f1(ptr @g3) #2
+ call void (ptr, ...) @f1(ptr @g3) #3
%v13 = call <32 x i32> @llvm.hexagon.V6.lvsplatw.128B(i32 2)
%v14 = call <64 x i32> @llvm.hexagon.V6.vaddubh.128B(<32 x i32> undef, <32 x i32> %v13)
%v15 = call <64 x i32> @llvm.hexagon.V6.vrmpyubi.128B(<64 x i32> %v14, i32 -2147483648, i32 1)
@@ -193,7 +193,7 @@ b0:
%v17 = call <64 x i32> @llvm.hexagon.V6.vaddubh.128B(<32 x i32> undef, <32 x i32> %v16)
%v18 = call <64 x i32> @llvm.hexagon.V6.vrmpyubi.128B(<64 x i32> %v17, i32 0, i32 1)
store <64 x i32> %v18, ptr @g0, align 128
- call void @f0() #2
+ call void @f0() #3
%v19 = call <32 x i32> @llvm.hexagon.V6.lvsplatw.128B(i32 1)
%v20 = call <32 x i32> @llvm.hexagon.V6.lvsplatw.128B(i32 2)
%v21 = call <64 x i32> @llvm.hexagon.V6.vaddubh.128B(<32 x i32> %v19, <32 x i32> %v20)
@@ -205,3 +205,4 @@ b0:
attributes #0 = { nounwind "use-soft-float"="false" "target-cpu"="hexagonv66" "target-features"="+hvxv66,+hvx-length128b" }
attributes #1 = { nounwind readnone }
attributes #2 = { nounwind optsize }
+attributes #3 = { nounwind minsize }
diff --git a/llvm/test/CodeGen/LoongArch/lasx/build-vector.ll b/llvm/test/CodeGen/LoongArch/lasx/build-vector.ll
index 5130865..c18c637 100644
--- a/llvm/test/CodeGen/LoongArch/lasx/build-vector.ll
+++ b/llvm/test/CodeGen/LoongArch/lasx/build-vector.ll
@@ -436,49 +436,47 @@ entry:
define void @buildvector_v32i8_with_constant(ptr %dst, i8 %a0, i8 %a1, i8 %a2, i8 %a5, i8 %a8, i8 %a9, i8 %a15, i8 %a17, i8 %a18, i8 %a20, i8 %a22, i8 %a23, i8 %a27, i8 %a28, i8 %a31) nounwind {
; CHECK-LABEL: buildvector_v32i8_with_constant:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: addi.d $sp, $sp, -96
-; CHECK-NEXT: st.d $ra, $sp, 88 # 8-byte Folded Spill
-; CHECK-NEXT: st.d $fp, $sp, 80 # 8-byte Folded Spill
-; CHECK-NEXT: addi.d $fp, $sp, 96
-; CHECK-NEXT: bstrins.d $sp, $zero, 4, 0
-; CHECK-NEXT: ld.b $t0, $fp, 0
-; CHECK-NEXT: ld.b $t1, $fp, 8
-; CHECK-NEXT: ld.b $t2, $fp, 16
-; CHECK-NEXT: ld.b $t3, $fp, 24
-; CHECK-NEXT: ld.b $t4, $fp, 56
-; CHECK-NEXT: ld.b $t5, $fp, 32
-; CHECK-NEXT: ld.b $t6, $fp, 48
-; CHECK-NEXT: ld.b $t7, $fp, 40
-; CHECK-NEXT: st.b $t4, $sp, 63
-; CHECK-NEXT: st.b $zero, $sp, 61
-; CHECK-NEXT: st.b $t6, $sp, 60
-; CHECK-NEXT: st.b $t7, $sp, 59
-; CHECK-NEXT: st.b $zero, $sp, 56
-; CHECK-NEXT: st.b $t5, $sp, 55
-; CHECK-NEXT: st.b $t3, $sp, 54
-; CHECK-NEXT: st.b $zero, $sp, 53
-; CHECK-NEXT: st.b $t2, $sp, 52
-; CHECK-NEXT: st.b $zero, $sp, 51
-; CHECK-NEXT: st.b $t1, $sp, 50
-; CHECK-NEXT: st.b $t0, $sp, 49
-; CHECK-NEXT: st.b $zero, $sp, 48
-; CHECK-NEXT: st.b $a7, $sp, 47
-; CHECK-NEXT: st.h $zero, $sp, 44
-; CHECK-NEXT: st.b $zero, $sp, 42
-; CHECK-NEXT: st.b $a6, $sp, 41
-; CHECK-NEXT: st.b $a5, $sp, 40
-; CHECK-NEXT: st.b $zero, $sp, 39
-; CHECK-NEXT: st.b $a4, $sp, 37
-; CHECK-NEXT: st.h $zero, $sp, 35
-; CHECK-NEXT: st.b $a3, $sp, 34
-; CHECK-NEXT: st.b $a2, $sp, 33
-; CHECK-NEXT: st.b $a1, $sp, 32
-; CHECK-NEXT: xvld $xr0, $sp, 32
-; CHECK-NEXT: xvst $xr0, $a0, 0
-; CHECK-NEXT: addi.d $sp, $fp, -96
-; CHECK-NEXT: ld.d $fp, $sp, 80 # 8-byte Folded Reload
-; CHECK-NEXT: ld.d $ra, $sp, 88 # 8-byte Folded Reload
-; CHECK-NEXT: addi.d $sp, $sp, 96
+; CHECK-NEXT: ld.b $t0, $sp, 56
+; CHECK-NEXT: ld.b $t1, $sp, 48
+; CHECK-NEXT: ld.b $t2, $sp, 40
+; CHECK-NEXT: ld.b $t3, $sp, 32
+; CHECK-NEXT: ld.b $t4, $sp, 24
+; CHECK-NEXT: ld.b $t5, $sp, 16
+; CHECK-NEXT: ld.b $t6, $sp, 8
+; CHECK-NEXT: ld.b $t7, $sp, 0
+; CHECK-NEXT: xvrepli.b $xr0, 0
+; CHECK-NEXT: vinsgr2vr.b $vr0, $a1, 0
+; CHECK-NEXT: vinsgr2vr.b $vr0, $a2, 1
+; CHECK-NEXT: vinsgr2vr.b $vr0, $a3, 2
+; CHECK-NEXT: vinsgr2vr.b $vr0, $a4, 5
+; CHECK-NEXT: vinsgr2vr.b $vr0, $a5, 8
+; CHECK-NEXT: vinsgr2vr.b $vr0, $a6, 9
+; CHECK-NEXT: vinsgr2vr.b $vr0, $a7, 15
+; CHECK-NEXT: xvpermi.d $xr1, $xr0, 14
+; CHECK-NEXT: vinsgr2vr.b $vr1, $t7, 1
+; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2
+; CHECK-NEXT: xvpermi.d $xr1, $xr0, 14
+; CHECK-NEXT: vinsgr2vr.b $vr1, $t6, 2
+; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2
+; CHECK-NEXT: xvpermi.d $xr1, $xr0, 14
+; CHECK-NEXT: vinsgr2vr.b $vr1, $t5, 4
+; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2
+; CHECK-NEXT: xvpermi.d $xr1, $xr0, 14
+; CHECK-NEXT: vinsgr2vr.b $vr1, $t4, 6
+; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2
+; CHECK-NEXT: xvpermi.d $xr1, $xr0, 14
+; CHECK-NEXT: vinsgr2vr.b $vr1, $t3, 7
+; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2
+; CHECK-NEXT: xvpermi.d $xr1, $xr0, 14
+; CHECK-NEXT: vinsgr2vr.b $vr1, $t2, 11
+; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2
+; CHECK-NEXT: xvpermi.d $xr1, $xr0, 14
+; CHECK-NEXT: vinsgr2vr.b $vr1, $t1, 12
+; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2
+; CHECK-NEXT: xvpermi.d $xr1, $xr0, 14
+; CHECK-NEXT: vinsgr2vr.b $vr1, $t0, 15
+; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2
+; CHECK-NEXT: xvst $xr0, $a0, 0
; CHECK-NEXT: ret
entry:
%ins0 = insertelement <32 x i8> undef, i8 %a0, i32 0
@@ -624,32 +622,19 @@ entry:
define void @buildvector_v16i16_with_constant(ptr %dst, i16 %a2, i16 %a3, i16 %a5, i16 %a6, i16 %a7, i16 %a12, i16 %a13) nounwind {
; CHECK-LABEL: buildvector_v16i16_with_constant:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: addi.d $sp, $sp, -96
-; CHECK-NEXT: st.d $ra, $sp, 88 # 8-byte Folded Spill
-; CHECK-NEXT: st.d $fp, $sp, 80 # 8-byte Folded Spill
-; CHECK-NEXT: addi.d $fp, $sp, 96
-; CHECK-NEXT: bstrins.d $sp, $zero, 4, 0
-; CHECK-NEXT: st.h $a7, $sp, 58
-; CHECK-NEXT: st.h $a6, $sp, 56
-; CHECK-NEXT: st.h $a5, $sp, 46
-; CHECK-NEXT: st.h $a4, $sp, 44
-; CHECK-NEXT: st.h $a3, $sp, 42
-; CHECK-NEXT: ori $a3, $zero, 2
-; CHECK-NEXT: st.h $a3, $sp, 40
-; CHECK-NEXT: st.h $a2, $sp, 38
-; CHECK-NEXT: st.h $a1, $sp, 36
-; CHECK-NEXT: lu12i.w $a1, 32
-; CHECK-NEXT: ori $a1, $a1, 2
-; CHECK-NEXT: st.w $a1, $sp, 60
-; CHECK-NEXT: st.w $a1, $sp, 32
-; CHECK-NEXT: lu32i.d $a1, 131074
-; CHECK-NEXT: st.d $a1, $sp, 48
-; CHECK-NEXT: xvld $xr0, $sp, 32
-; CHECK-NEXT: xvst $xr0, $a0, 0
-; CHECK-NEXT: addi.d $sp, $fp, -96
-; CHECK-NEXT: ld.d $fp, $sp, 80 # 8-byte Folded Reload
-; CHECK-NEXT: ld.d $ra, $sp, 88 # 8-byte Folded Reload
-; CHECK-NEXT: addi.d $sp, $sp, 96
+; CHECK-NEXT: xvrepli.h $xr0, 2
+; CHECK-NEXT: vinsgr2vr.h $vr0, $a1, 2
+; CHECK-NEXT: vinsgr2vr.h $vr0, $a2, 3
+; CHECK-NEXT: vinsgr2vr.h $vr0, $a3, 5
+; CHECK-NEXT: vinsgr2vr.h $vr0, $a4, 6
+; CHECK-NEXT: vinsgr2vr.h $vr0, $a5, 7
+; CHECK-NEXT: xvpermi.d $xr1, $xr0, 14
+; CHECK-NEXT: vinsgr2vr.h $vr1, $a6, 4
+; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2
+; CHECK-NEXT: xvpermi.d $xr1, $xr0, 14
+; CHECK-NEXT: vinsgr2vr.h $vr1, $a7, 5
+; CHECK-NEXT: xvpermi.q $xr0, $xr1, 2
+; CHECK-NEXT: xvst $xr0, $a0, 0
; CHECK-NEXT: ret
entry:
%ins0 = insertelement <16 x i16> undef, i16 2, i32 0
@@ -724,24 +709,12 @@ entry:
define void @buildvector_v8i32_with_constant(ptr %dst, i32 %a2, i32 %a4, i32 %a5, i32 %a6) nounwind {
; CHECK-LABEL: buildvector_v8i32_with_constant:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: addi.d $sp, $sp, -96
-; CHECK-NEXT: st.d $ra, $sp, 88 # 8-byte Folded Spill
-; CHECK-NEXT: st.d $fp, $sp, 80 # 8-byte Folded Spill
-; CHECK-NEXT: addi.d $fp, $sp, 96
-; CHECK-NEXT: bstrins.d $sp, $zero, 4, 0
-; CHECK-NEXT: st.w $zero, $sp, 60
-; CHECK-NEXT: st.w $a4, $sp, 56
-; CHECK-NEXT: st.w $a3, $sp, 52
-; CHECK-NEXT: st.w $a2, $sp, 48
-; CHECK-NEXT: st.w $zero, $sp, 44
-; CHECK-NEXT: st.w $a1, $sp, 40
-; CHECK-NEXT: st.d $zero, $sp, 32
-; CHECK-NEXT: xvld $xr0, $sp, 32
-; CHECK-NEXT: xvst $xr0, $a0, 0
-; CHECK-NEXT: addi.d $sp, $fp, -96
-; CHECK-NEXT: ld.d $fp, $sp, 80 # 8-byte Folded Reload
-; CHECK-NEXT: ld.d $ra, $sp, 88 # 8-byte Folded Reload
-; CHECK-NEXT: addi.d $sp, $sp, 96
+; CHECK-NEXT: xvrepli.b $xr0, 0
+; CHECK-NEXT: xvinsgr2vr.w $xr0, $a1, 2
+; CHECK-NEXT: xvinsgr2vr.w $xr0, $a2, 4
+; CHECK-NEXT: xvinsgr2vr.w $xr0, $a3, 5
+; CHECK-NEXT: xvinsgr2vr.w $xr0, $a4, 6
+; CHECK-NEXT: xvst $xr0, $a0, 0
; CHECK-NEXT: ret
entry:
%ins0 = insertelement <8 x i32> undef, i32 0, i32 0
@@ -793,21 +766,10 @@ entry:
define void @buildvector_v4i64_with_constant(ptr %dst, i64 %a0, i64 %a2) nounwind {
; CHECK-LABEL: buildvector_v4i64_with_constant:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: addi.d $sp, $sp, -96
-; CHECK-NEXT: st.d $ra, $sp, 88 # 8-byte Folded Spill
-; CHECK-NEXT: st.d $fp, $sp, 80 # 8-byte Folded Spill
-; CHECK-NEXT: addi.d $fp, $sp, 96
-; CHECK-NEXT: bstrins.d $sp, $zero, 4, 0
-; CHECK-NEXT: st.d $zero, $sp, 56
-; CHECK-NEXT: st.d $a2, $sp, 48
-; CHECK-NEXT: st.d $zero, $sp, 40
-; CHECK-NEXT: st.d $a1, $sp, 32
-; CHECK-NEXT: xvld $xr0, $sp, 32
+; CHECK-NEXT: xvrepli.b $xr0, 0
+; CHECK-NEXT: xvinsgr2vr.d $xr0, $a1, 0
+; CHECK-NEXT: xvinsgr2vr.d $xr0, $a2, 2
; CHECK-NEXT: xvst $xr0, $a0, 0
-; CHECK-NEXT: addi.d $sp, $fp, -96
-; CHECK-NEXT: ld.d $fp, $sp, 80 # 8-byte Folded Reload
-; CHECK-NEXT: ld.d $ra, $sp, 88 # 8-byte Folded Reload
-; CHECK-NEXT: addi.d $sp, $sp, 96
; CHECK-NEXT: ret
entry:
%ins0 = insertelement <4 x i64> undef, i64 %a0, i32 0
@@ -880,27 +842,17 @@ entry:
define void @buildvector_v8f32_with_constant(ptr %dst, float %a1, float %a2, float %a5, float %a7) nounwind {
; CHECK-LABEL: buildvector_v8f32_with_constant:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: addi.d $sp, $sp, -96
-; CHECK-NEXT: st.d $ra, $sp, 88 # 8-byte Folded Spill
-; CHECK-NEXT: st.d $fp, $sp, 80 # 8-byte Folded Spill
-; CHECK-NEXT: addi.d $fp, $sp, 96
-; CHECK-NEXT: bstrins.d $sp, $zero, 4, 0
-; CHECK-NEXT: fst.s $fa3, $sp, 60
-; CHECK-NEXT: fst.s $fa2, $sp, 52
-; CHECK-NEXT: fst.s $fa1, $sp, 40
-; CHECK-NEXT: fst.s $fa0, $sp, 36
-; CHECK-NEXT: vldi $vr0, -1280
-; CHECK-NEXT: fst.s $fa0, $sp, 56
+; CHECK-NEXT: # kill: def $f3 killed $f3 def $xr3
+; CHECK-NEXT: # kill: def $f2 killed $f2 def $xr2
+; CHECK-NEXT: # kill: def $f1 killed $f1 def $xr1
+; CHECK-NEXT: # kill: def $f0 killed $f0 def $xr0
; CHECK-NEXT: lu12i.w $a1, 262144
-; CHECK-NEXT: lu52i.d $a1, $a1, 1024
-; CHECK-NEXT: st.d $a1, $sp, 44
-; CHECK-NEXT: fst.s $fa0, $sp, 32
-; CHECK-NEXT: xvld $xr0, $sp, 32
-; CHECK-NEXT: xvst $xr0, $a0, 0
-; CHECK-NEXT: addi.d $sp, $fp, -96
-; CHECK-NEXT: ld.d $fp, $sp, 80 # 8-byte Folded Reload
-; CHECK-NEXT: ld.d $ra, $sp, 88 # 8-byte Folded Reload
-; CHECK-NEXT: addi.d $sp, $sp, 96
+; CHECK-NEXT: xvreplgr2vr.w $xr4, $a1
+; CHECK-NEXT: xvinsve0.w $xr4, $xr0, 1
+; CHECK-NEXT: xvinsve0.w $xr4, $xr1, 2
+; CHECK-NEXT: xvinsve0.w $xr4, $xr2, 5
+; CHECK-NEXT: xvinsve0.w $xr4, $xr3, 7
+; CHECK-NEXT: xvst $xr4, $a0, 0
; CHECK-NEXT: ret
entry:
%ins0 = insertelement <8 x float> undef, float 2.0, i32 0
@@ -956,21 +908,12 @@ entry:
define void @buildvector_v4f64_with_constant(ptr %dst, double %a0, double %a3) nounwind {
; CHECK-LABEL: buildvector_v4f64_with_constant:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: addi.d $sp, $sp, -96
-; CHECK-NEXT: st.d $ra, $sp, 88 # 8-byte Folded Spill
-; CHECK-NEXT: st.d $fp, $sp, 80 # 8-byte Folded Spill
-; CHECK-NEXT: addi.d $fp, $sp, 96
-; CHECK-NEXT: bstrins.d $sp, $zero, 4, 0
-; CHECK-NEXT: fst.d $fa1, $sp, 56
-; CHECK-NEXT: vrepli.b $vr1, 0
-; CHECK-NEXT: vst $vr1, $sp, 40
-; CHECK-NEXT: fst.d $fa0, $sp, 32
-; CHECK-NEXT: xvld $xr0, $sp, 32
-; CHECK-NEXT: xvst $xr0, $a0, 0
-; CHECK-NEXT: addi.d $sp, $fp, -96
-; CHECK-NEXT: ld.d $fp, $sp, 80 # 8-byte Folded Reload
-; CHECK-NEXT: ld.d $ra, $sp, 88 # 8-byte Folded Reload
-; CHECK-NEXT: addi.d $sp, $sp, 96
+; CHECK-NEXT: # kill: def $f1_64 killed $f1_64 def $xr1
+; CHECK-NEXT: # kill: def $f0_64 killed $f0_64 def $xr0
+; CHECK-NEXT: xvrepli.b $xr2, 0
+; CHECK-NEXT: xvinsve0.d $xr2, $xr0, 0
+; CHECK-NEXT: xvinsve0.d $xr2, $xr1, 3
+; CHECK-NEXT: xvst $xr2, $a0, 0
; CHECK-NEXT: ret
entry:
%ins0 = insertelement <4 x double> undef, double %a0, i32 0
diff --git a/llvm/test/CodeGen/LoongArch/lsx/build-vector.ll b/llvm/test/CodeGen/LoongArch/lsx/build-vector.ll
index 78588c5..9517558 100644
--- a/llvm/test/CodeGen/LoongArch/lsx/build-vector.ll
+++ b/llvm/test/CodeGen/LoongArch/lsx/build-vector.ll
@@ -307,23 +307,15 @@ entry:
define void @buildvector_v16i8_with_constant(ptr %dst, i8 %a0, i8 %a4, i8 %a6, i8 %a8, i8 %a11, i8 %a12, i8 %a15) nounwind {
; CHECK-LABEL: buildvector_v16i8_with_constant:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: addi.d $sp, $sp, -16
-; CHECK-NEXT: st.b $a7, $sp, 15
-; CHECK-NEXT: st.h $zero, $sp, 13
-; CHECK-NEXT: st.b $a6, $sp, 12
-; CHECK-NEXT: st.b $a5, $sp, 11
-; CHECK-NEXT: st.h $zero, $sp, 9
-; CHECK-NEXT: st.b $a4, $sp, 8
-; CHECK-NEXT: st.b $zero, $sp, 7
-; CHECK-NEXT: st.b $a3, $sp, 6
-; CHECK-NEXT: st.b $zero, $sp, 5
-; CHECK-NEXT: st.b $a2, $sp, 4
-; CHECK-NEXT: st.b $zero, $sp, 3
-; CHECK-NEXT: st.h $zero, $sp, 1
-; CHECK-NEXT: st.b $a1, $sp, 0
-; CHECK-NEXT: vld $vr0, $sp, 0
-; CHECK-NEXT: vst $vr0, $a0, 0
-; CHECK-NEXT: addi.d $sp, $sp, 16
+; CHECK-NEXT: vrepli.b $vr0, 0
+; CHECK-NEXT: vinsgr2vr.b $vr0, $a1, 0
+; CHECK-NEXT: vinsgr2vr.b $vr0, $a2, 4
+; CHECK-NEXT: vinsgr2vr.b $vr0, $a3, 6
+; CHECK-NEXT: vinsgr2vr.b $vr0, $a4, 8
+; CHECK-NEXT: vinsgr2vr.b $vr0, $a5, 11
+; CHECK-NEXT: vinsgr2vr.b $vr0, $a6, 12
+; CHECK-NEXT: vinsgr2vr.b $vr0, $a7, 15
+; CHECK-NEXT: vst $vr0, $a0, 0
; CHECK-NEXT: ret
entry:
%ins0 = insertelement <16 x i8> undef, i8 %a0, i32 0
@@ -398,16 +390,12 @@ entry:
define void @buildvector_v8i16_with_constant(ptr %dst, i16 %a0, i16 %a3, i16 %a4, i16 %a5) nounwind {
; CHECK-LABEL: buildvector_v8i16_with_constant:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: addi.d $sp, $sp, -16
-; CHECK-NEXT: st.h $zero, $sp, 12
-; CHECK-NEXT: st.h $a4, $sp, 10
-; CHECK-NEXT: st.h $a3, $sp, 8
-; CHECK-NEXT: st.h $a2, $sp, 6
-; CHECK-NEXT: st.h $zero, $sp, 2
-; CHECK-NEXT: st.h $a1, $sp, 0
-; CHECK-NEXT: vld $vr0, $sp, 0
+; CHECK-NEXT: vrepli.b $vr0, 0
+; CHECK-NEXT: vinsgr2vr.h $vr0, $a1, 0
+; CHECK-NEXT: vinsgr2vr.h $vr0, $a2, 3
+; CHECK-NEXT: vinsgr2vr.h $vr0, $a3, 4
+; CHECK-NEXT: vinsgr2vr.h $vr0, $a4, 5
; CHECK-NEXT: vst $vr0, $a0, 0
-; CHECK-NEXT: addi.d $sp, $sp, 16
; CHECK-NEXT: ret
entry:
%ins0 = insertelement <8 x i16> undef, i16 %a0, i32 0
@@ -459,15 +447,11 @@ entry:
define void @buildvector_v4i32_with_constant(ptr %dst, i32 %a0, i32 %a2, i32 %a3) nounwind {
; CHECK-LABEL: buildvector_v4i32_with_constant:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: addi.d $sp, $sp, -16
-; CHECK-NEXT: st.w $a3, $sp, 12
-; CHECK-NEXT: st.w $a2, $sp, 8
-; CHECK-NEXT: ori $a2, $zero, 2
-; CHECK-NEXT: st.w $a2, $sp, 4
-; CHECK-NEXT: st.w $a1, $sp, 0
-; CHECK-NEXT: vld $vr0, $sp, 0
+; CHECK-NEXT: vrepli.w $vr0, 2
+; CHECK-NEXT: vinsgr2vr.w $vr0, $a1, 0
+; CHECK-NEXT: vinsgr2vr.w $vr0, $a2, 2
+; CHECK-NEXT: vinsgr2vr.w $vr0, $a3, 3
; CHECK-NEXT: vst $vr0, $a0, 0
-; CHECK-NEXT: addi.d $sp, $sp, 16
; CHECK-NEXT: ret
entry:
%ins0 = insertelement <4 x i32> undef, i32 %a0, i32 0
@@ -508,9 +492,8 @@ entry:
define void @buildvector_v2i64_with_constant(ptr %dst, i64 %a1) nounwind {
; CHECK-LABEL: buildvector_v2i64_with_constant:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vinsgr2vr.d $vr0, $zero, 0
-; CHECK-NEXT: vinsgr2vr.d $vr1, $a1, 0
-; CHECK-NEXT: vpackev.d $vr0, $vr1, $vr0
+; CHECK-NEXT: vrepli.b $vr0, 0
+; CHECK-NEXT: vinsgr2vr.d $vr0, $a1, 1
; CHECK-NEXT: vst $vr0, $a0, 0
; CHECK-NEXT: ret
entry:
@@ -561,15 +544,14 @@ entry:
define void @buildvector_v4f32_with_constant(ptr %dst, float %a1, float %a2, float %a3) nounwind {
; CHECK-LABEL: buildvector_v4f32_with_constant:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: addi.d $sp, $sp, -16
-; CHECK-NEXT: fst.s $fa2, $sp, 12
-; CHECK-NEXT: fst.s $fa1, $sp, 8
-; CHECK-NEXT: fst.s $fa0, $sp, 4
-; CHECK-NEXT: movgr2fr.w $fa0, $zero
-; CHECK-NEXT: fst.s $fa0, $sp, 0
-; CHECK-NEXT: vld $vr0, $sp, 0
-; CHECK-NEXT: vst $vr0, $a0, 0
-; CHECK-NEXT: addi.d $sp, $sp, 16
+; CHECK-NEXT: # kill: def $f2 killed $f2 def $vr2
+; CHECK-NEXT: # kill: def $f1 killed $f1 def $vr1
+; CHECK-NEXT: # kill: def $f0 killed $f0 def $vr0
+; CHECK-NEXT: vrepli.b $vr3, 0
+; CHECK-NEXT: vextrins.w $vr3, $vr0, 16
+; CHECK-NEXT: vextrins.w $vr3, $vr1, 32
+; CHECK-NEXT: vextrins.w $vr3, $vr2, 48
+; CHECK-NEXT: vst $vr3, $a0, 0
; CHECK-NEXT: ret
entry:
%ins0 = insertelement <4 x float> undef, float 0.0, i32 0
diff --git a/llvm/test/CodeGen/MIR/AMDGPU/noalias-addrspace-expect-id.mir b/llvm/test/CodeGen/MIR/AMDGPU/noalias-addrspace-expect-id.mir
new file mode 100644
index 0000000..4179ff2
--- /dev/null
+++ b/llvm/test/CodeGen/MIR/AMDGPU/noalias-addrspace-expect-id.mir
@@ -0,0 +1,29 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
+# RUN: not llc -mtriple=amdgcn -mcpu=gfx1200 -run-pass none -o /dev/null %s 2>&1 | FileCheck %s
+
+--- |
+ define void @expect_id(ptr %ptr, float %data) #0 {
+ %1 = atomicrmw fadd ptr %ptr, float %data syncscope("agent") seq_cst, align 4, !noalias.addrspace !0
+ ret void
+ }
+
+ attributes #0 = { "target-cpu"="gfx1200" }
+
+ !0 = !{i32 5, i32 6}
+...
+
+---
+name: expect_id
+
+body: |
+ bb.1 (%ir-block.0):
+ liveins: $vgpr0, $vgpr1, $vgpr2
+
+ ; CHECK: expected metadata id after '!'
+ %2:vgpr_32 = COPY $vgpr0
+ %3:vgpr_32 = COPY $vgpr1
+ %0:vreg_64 = REG_SEQUENCE %2, %subreg.sub0, %3, %subreg.sub1
+ %1:vgpr_32 = COPY $vgpr2
+ FLAT_ATOMIC_ADD_F32 %0, %1, 0, 0, implicit $exec, implicit $flat_scr :: (load store syncscope("agent") seq_cst (s32) on %ir.ptr, !noalias.addrspace !!)
+ S_ENDPGM 0
+...
diff --git a/llvm/test/CodeGen/MIR/AMDGPU/noalias-addrspace-parse.mir b/llvm/test/CodeGen/MIR/AMDGPU/noalias-addrspace-parse.mir
new file mode 100644
index 0000000..7fe6aa9
--- /dev/null
+++ b/llvm/test/CodeGen/MIR/AMDGPU/noalias-addrspace-parse.mir
@@ -0,0 +1,36 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
+# RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -run-pass=none -o - %s | FileCheck %s
+
+
+--- |
+ define void @test_parsing_printing(ptr %ptr, float %data) {
+ %1 = atomicrmw fadd ptr %ptr, float %data syncscope("agent") seq_cst, align 4, !noalias.addrspace !0
+ ret void
+ }
+
+ !0 = !{i32 5, i32 6}
+...
+
+---
+name: test_parsing_printing
+
+body: |
+ bb.1 (%ir-block.0):
+ liveins: $vgpr0, $vgpr1, $vgpr2
+
+ ; CHECK-LABEL: name: test_parsing_printing
+ ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+ ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
+ ; CHECK-NEXT: FLAT_ATOMIC_ADD_F32 [[REG_SEQUENCE]], [[COPY2]], 0, 0, implicit $exec, implicit $flat_scr :: (load store syncscope("agent") seq_cst (s32) on %ir.ptr, !noalias.addrspace !0)
+ ; CHECK-NEXT: S_ENDPGM 0
+ %2:vgpr_32 = COPY $vgpr0
+ %3:vgpr_32 = COPY $vgpr1
+ %0:vreg_64 = REG_SEQUENCE %2, %subreg.sub0, %3, %subreg.sub1
+ %1:vgpr_32 = COPY $vgpr2
+ FLAT_ATOMIC_ADD_F32 %0, %1, 0, 0, implicit $exec, implicit $flat_scr :: (load store syncscope("agent") seq_cst (s32) on %ir.ptr, !noalias.addrspace !0)
+ S_ENDPGM 0
+...
diff --git a/llvm/test/CodeGen/MIR/AMDGPU/noalias-addrspace-undefine-matadata.mir b/llvm/test/CodeGen/MIR/AMDGPU/noalias-addrspace-undefine-matadata.mir
new file mode 100644
index 0000000..505b514
--- /dev/null
+++ b/llvm/test/CodeGen/MIR/AMDGPU/noalias-addrspace-undefine-matadata.mir
@@ -0,0 +1,28 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
+# RUN: not llc -mtriple=amdgcn -mcpu=gfx1200 -run-pass none -o /dev/null %s 2>&1 | FileCheck %s
+
+
+--- |
+ define void @undefined_metadata(ptr %ptr, float %data) {
+ %1 = atomicrmw fadd ptr %ptr, float %data syncscope("agent") seq_cst, align 4, !noalias.addrspace !0
+ ret void
+ }
+
+ !0 = !{i32 5, i32 6}
+...
+
+---
+name: undefined_metadata
+
+body: |
+ bb.1 (%ir-block.0):
+ liveins: $vgpr0, $vgpr1, $vgpr2
+
+ ; CHECK: use of undefined metadata '!3'
+ %2:vgpr_32 = COPY $vgpr0
+ %3:vgpr_32 = COPY $vgpr1
+ %0:vreg_64 = REG_SEQUENCE %2, %subreg.sub0, %3, %subreg.sub1
+ %1:vgpr_32 = COPY $vgpr2
+ FLAT_ATOMIC_ADD_F32 %0, %1, 0, 0, implicit $exec, implicit $flat_scr :: (load store syncscope("agent") seq_cst (s32) on %ir.ptr, !noalias.addrspace !3)
+ S_ENDPGM 0
+...
diff --git a/llvm/test/CodeGen/MIR/X86/call-site-info-ambiguous-indirect-call-typeid.mir b/llvm/test/CodeGen/MIR/X86/call-site-info-ambiguous-indirect-call-typeid.mir
new file mode 100644
index 0000000..cb78898
--- /dev/null
+++ b/llvm/test/CodeGen/MIR/X86/call-site-info-ambiguous-indirect-call-typeid.mir
@@ -0,0 +1,31 @@
+# Test MIR printer and parser to check if a call instruction with multiple
+# callee types are handled correctly.
+
+# RUN: llc -mtriple=x86_64 --call-graph-section %s -run-pass=none -o - | FileCheck --match-full-lines %s
+# CHECK: name: ambiguous_caller
+# CHECK: callSites:
+# CHECK-NEXT: - { bb: {{.*}}, offset: {{.*}}, fwdArgRegs: {{.*}}, calleeTypeIds:
+# CHECK-NEXT: [ 1234, 5678 ] }
+
+--- |
+ define ptr @ambiguous_caller() {
+ entry:
+ %fn = alloca ptr, align 8
+ %call1 = call ptr %fn(i64 4), !callee_type !0
+ ret ptr %call1
+ }
+
+ !0 = !{!1, !2}
+ !1 = !{i64 0, !"callee_type0.generalized"}
+ !2 = !{i64 0, !"callee_type2.generalized"}
+...
+---
+name: ambiguous_caller
+callSites:
+ - { bb: 0, offset: 1, fwdArgRegs: [], calleeTypeIds: [ 1234, 5678 ] }
+body: |
+ bb.0.entry:
+ %0:gr64 = MOV32ri64 4
+ CALL64r killed %0, csr_64, implicit $rsp, implicit $ssp, implicit $rdi, implicit-def $rsp, implicit-def $ssp, implicit-def $rax
+ RET 0, $rax
+...
diff --git a/llvm/test/CodeGen/MIR/X86/call-site-info-direct-calls-typeid.mir b/llvm/test/CodeGen/MIR/X86/call-site-info-direct-calls-typeid.mir
new file mode 100644
index 0000000..faa021c
--- /dev/null
+++ b/llvm/test/CodeGen/MIR/X86/call-site-info-direct-calls-typeid.mir
@@ -0,0 +1,54 @@
+# Test MIR printer and parser to NOT have `CalleeTypeIds` field in callSites.
+# `CalleeTypeId` is used for propagating call site type identifiers for
+# indirect targets only. This test does not contain any indirect targets.
+
+# RUN: llc -mtriple=x86_64 --call-graph-section %s -run-pass=none -o - | FileCheck --match-full-lines %s
+# CHECK-NOT: calleeTypeIds
+# CHECK: name: bar
+# CHECK: callSites:
+# CHECK-NEXT: - { bb: {{.*}}, offset: {{.*}}, fwdArgRegs: [] }
+# CHECK-NEXT: - { bb: {{.*}}, offset: {{.*}}, fwdArgRegs: [] }
+# CHECK: name: foo
+# CHECK: callSites:
+# CHECK-NEXT: - { bb: {{.*}}, offset: {{.*}}, fwdArgRegs: [] }
+
+--- |
+ declare i32 @fizz(i32, i32)
+
+ declare i32 @buzz(i32, i32)
+
+ define i32 @bar(i32 %x, i32 %y) !type !0 {
+ entry:
+ %call = call i32 @buzz(i32 %x, i32 %x)
+ %call1 = call i32 @fizz(i32 %x, i32 %x)
+ ret i32 0
+ }
+
+ define i32 @foo(i32 %x, i32 %y) !type !0 {
+ entry:
+ %call1 = call i32 @bar(i32 %x, i32 %x)
+ ret i32 0
+ }
+
+ !0 = !{i64 0, !"_ZTSFiiiE.generalized"}
+...
+---
+name: bar
+callSites:
+ - { bb: 0, offset: 0, fwdArgRegs: [] }
+ - { bb: 0, offset: 1, fwdArgRegs: [] }
+body: |
+ bb.0.entry:
+ CALL64pcrel32 target-flags(x86-plt) @buzz, csr_64, implicit $rsp, implicit $ssp, implicit $edi, implicit $esi, implicit-def $rsp, implicit-def $ssp, implicit-def $eax
+ CALL64pcrel32 target-flags(x86-plt) @fizz, csr_64, implicit $rsp, implicit $ssp, implicit $edi, implicit $esi, implicit-def $rsp, implicit-def $ssp, implicit-def $eax
+
+...
+---
+name: foo
+callSites:
+ - { bb: 0, offset: 0, fwdArgRegs: [] }
+body: |
+ bb.0.entry:
+ CALL64pcrel32 target-flags(x86-plt) @bar, csr_64, implicit $rsp, implicit $ssp, implicit $edi, implicit $esi, implicit-def $rsp, implicit-def $ssp, implicit-def $eax
+
+...
diff --git a/llvm/test/CodeGen/MIR/X86/call-site-info-typeid.mir b/llvm/test/CodeGen/MIR/X86/call-site-info-typeid.mir
new file mode 100644
index 0000000..303b8fa
--- /dev/null
+++ b/llvm/test/CodeGen/MIR/X86/call-site-info-typeid.mir
@@ -0,0 +1,28 @@
+# Test MIR printer and parser for type id field in callSites. It is used
+# for propagating call site type identifiers to emit in the call graph section.
+
+# RUN: llc -mtriple=x86_64 --call-graph-section %s -run-pass=none -o - | FileCheck --match-full-lines %s
+# CHECK: name: call_foo
+# CHECK: callSites:
+# CHECK-NEXT: - { bb: {{.*}}, offset: {{.*}}, fwdArgRegs: [], calleeTypeIds:
+# CHECK-NEXT: [ 123456789 ] }
+
+--- |
+ define i32 @call_foo() {
+ entry:
+ %0 = load ptr, ptr null, align 8
+ call void %0(i8 0), !callee_type !0
+ ret i32 0
+ }
+
+ !0 = !{!1}
+ !1 = !{i64 0, !"_ZTSFvcE.generalized"}
+...
+---
+name: call_foo
+callSites:
+ - { bb: 0, offset: 0, fwdArgRegs: [], calleeTypeIds: [ 123456789 ] }
+body: |
+ bb.0.entry:
+ CALL64m $noreg, 1, $noreg, 0, $noreg, csr_64, implicit $rsp, implicit $ssp, implicit $edi, implicit-def $rsp, implicit-def $ssp :: (load (s64) from `ptr null`)
+...
diff --git a/llvm/test/CodeGen/Mips/abiflags-soft-float.ll b/llvm/test/CodeGen/Mips/abiflags-soft-float.ll
new file mode 100644
index 0000000..01821f2
--- /dev/null
+++ b/llvm/test/CodeGen/Mips/abiflags-soft-float.ll
@@ -0,0 +1,12 @@
+; RUN: llc -filetype=obj -mtriple mipsel-unknown-linux -mcpu=mips32 %s -o tmp.o
+; RUN: llvm-readobj -A tmp.o | FileCheck %s -check-prefix=OBJ
+; RUN: llc -filetype=asm -mtriple mipsel-unknown-linux -mcpu=mips32 %s -o - | \
+; RUN: FileCheck %s -check-prefix=ASM
+
+; OBJ: FP ABI: Soft float
+; ASM: .module softfloat
+
+define dso_local void @asm_is_null() "use-soft-float"="true" {
+ call void asm sideeffect "", ""()
+ ret void
+}
diff --git a/llvm/test/CodeGen/Mips/nan_lowering.ll b/llvm/test/CodeGen/Mips/nan_lowering.ll
new file mode 100644
index 0000000..2a11278
--- /dev/null
+++ b/llvm/test/CodeGen/Mips/nan_lowering.ll
@@ -0,0 +1,25 @@
+; RUN: llc -mtriple=mips-linux-gnu -mattr=-nan2008 < %s | FileCheck %s
+; RUN: llc -mtriple=mips-linux-gnu -mattr=+nan2008 < %s | FileCheck %s
+
+; Make sure that lowering does not corrupt the value of NaN values,
+; regardless of what the NaN mode is.
+
+define float @test1() {
+; CHECK: .4byte 0x7fc00000
+ ret float bitcast (i32 u0x7fc00000 to float)
+}
+
+define float @test2() {
+; CHECK: .4byte 0x7fc00001
+ ret float bitcast (i32 u0x7fc00001 to float)
+}
+
+define float @test3() {
+; CHECK: .4byte 0x7f800000
+ ret float bitcast (i32 u0x7f800000 to float)
+}
+
+define float @test4() {
+; CHECK: .4byte 0x7f800001
+ ret float bitcast (i32 u0x7f800001 to float)
+}
diff --git a/llvm/test/CodeGen/Mips/qnan.ll b/llvm/test/CodeGen/Mips/qnan.ll
deleted file mode 100644
index e5b4aa1..0000000
--- a/llvm/test/CodeGen/Mips/qnan.ll
+++ /dev/null
@@ -1,14 +0,0 @@
-; RUN: llc -O3 -mcpu=mips32r2 -mtriple=mips-linux-gnu < %s -o - | FileCheck %s -check-prefixes=MIPS_Legacy
-; RUN: llc -O3 -mcpu=mips32r2 -mtriple=mips-linux-gnu -mattr=+nan2008 < %s -o - | FileCheck %s -check-prefixes=MIPS_NaN2008
-
-define dso_local float @nan(float noundef %a, float noundef %b) local_unnamed_addr #0 {
-; MIPS_Legacy: $CPI0_0:
-; MIPS_Legacy-NEXT: .4byte 0x7fa00000 # float NaN
-
-; MIPS_NaN2008: $CPI0_0:
-; MIPS_NaN2008-NEXT: .4byte 0x7fc00000 # float NaN
-
-entry:
- %0 = tail call float @llvm.minimum.f32(float %a, float %b)
- ret float %0
-}
diff --git a/llvm/test/CodeGen/NVPTX/aggregate-return.ll b/llvm/test/CodeGen/NVPTX/aggregate-return.ll
index 7f52e52..abc873e 100644
--- a/llvm/test/CodeGen/NVPTX/aggregate-return.ll
+++ b/llvm/test/CodeGen/NVPTX/aggregate-return.ll
@@ -16,8 +16,8 @@ define void @test_v2f32(<2 x float> %input, ptr %output) {
; CHECK-NEXT: ld.param.b64 %rd1, [test_v2f32_param_0];
; CHECK-NEXT: { // callseq 0, 0
; CHECK-NEXT: .param .align 8 .b8 param0[8];
-; CHECK-NEXT: st.param.b64 [param0], %rd1;
; CHECK-NEXT: .param .align 8 .b8 retval0[8];
+; CHECK-NEXT: st.param.b64 [param0], %rd1;
; CHECK-NEXT: call.uni (retval0), barv, (param0);
; CHECK-NEXT: ld.param.b64 %rd2, [retval0];
; CHECK-NEXT: } // callseq 0
@@ -32,24 +32,24 @@ define void @test_v2f32(<2 x float> %input, ptr %output) {
define void @test_v3f32(<3 x float> %input, ptr %output) {
; CHECK-LABEL: test_v3f32(
; CHECK: {
-; CHECK-NEXT: .reg .b32 %r<10>;
-; CHECK-NEXT: .reg .b64 %rd<2>;
+; CHECK-NEXT: .reg .b32 %r<4>;
+; CHECK-NEXT: .reg .b64 %rd<5>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
-; CHECK-NEXT: ld.param.v2.b32 {%r1, %r2}, [test_v3f32_param_0];
-; CHECK-NEXT: ld.param.b32 %r3, [test_v3f32_param_0+8];
+; CHECK-NEXT: ld.param.b64 %rd1, [test_v3f32_param_0];
+; CHECK-NEXT: ld.param.b32 %r1, [test_v3f32_param_0+8];
; CHECK-NEXT: { // callseq 1, 0
; CHECK-NEXT: .param .align 16 .b8 param0[16];
-; CHECK-NEXT: st.param.v2.b32 [param0], {%r1, %r2};
-; CHECK-NEXT: st.param.b32 [param0+8], %r3;
; CHECK-NEXT: .param .align 16 .b8 retval0[16];
+; CHECK-NEXT: st.param.b32 [param0+8], %r1;
+; CHECK-NEXT: st.param.b64 [param0], %rd1;
; CHECK-NEXT: call.uni (retval0), barv3, (param0);
-; CHECK-NEXT: ld.param.v2.b32 {%r4, %r5}, [retval0];
-; CHECK-NEXT: ld.param.b32 %r6, [retval0+8];
+; CHECK-NEXT: ld.param.b32 %r2, [retval0+8];
+; CHECK-NEXT: ld.param.b64 %rd2, [retval0];
; CHECK-NEXT: } // callseq 1
-; CHECK-NEXT: ld.param.b64 %rd1, [test_v3f32_param_1];
-; CHECK-NEXT: st.v2.b32 [%rd1], {%r4, %r5};
-; CHECK-NEXT: st.b32 [%rd1+8], %r6;
+; CHECK-NEXT: ld.param.b64 %rd4, [test_v3f32_param_1];
+; CHECK-NEXT: st.b32 [%rd4+8], %r2;
+; CHECK-NEXT: st.b64 [%rd4], %rd2;
; CHECK-NEXT: ret;
%call = tail call <3 x float> @barv3(<3 x float> %input)
; Make sure we don't load more values than than we need to.
@@ -68,16 +68,16 @@ define void @test_a2f32([2 x float] %input, ptr %output) {
; CHECK-NEXT: ld.param.b32 %r2, [test_a2f32_param_0+4];
; CHECK-NEXT: { // callseq 2, 0
; CHECK-NEXT: .param .align 4 .b8 param0[8];
-; CHECK-NEXT: st.param.b32 [param0], %r1;
-; CHECK-NEXT: st.param.b32 [param0+4], %r2;
; CHECK-NEXT: .param .align 4 .b8 retval0[8];
+; CHECK-NEXT: st.param.b32 [param0+4], %r2;
+; CHECK-NEXT: st.param.b32 [param0], %r1;
; CHECK-NEXT: call.uni (retval0), bara, (param0);
-; CHECK-NEXT: ld.param.b32 %r3, [retval0];
-; CHECK-NEXT: ld.param.b32 %r4, [retval0+4];
+; CHECK-NEXT: ld.param.b32 %r3, [retval0+4];
+; CHECK-NEXT: ld.param.b32 %r4, [retval0];
; CHECK-NEXT: } // callseq 2
; CHECK-NEXT: ld.param.b64 %rd1, [test_a2f32_param_1];
-; CHECK-NEXT: st.b32 [%rd1+4], %r4;
-; CHECK-NEXT: st.b32 [%rd1], %r3;
+; CHECK-NEXT: st.b32 [%rd1+4], %r3;
+; CHECK-NEXT: st.b32 [%rd1], %r4;
; CHECK-NEXT: ret;
%call = tail call [2 x float] @bara([2 x float] %input)
store [2 x float] %call, ptr %output, align 4
@@ -95,16 +95,16 @@ define void @test_s2f32({float, float} %input, ptr %output) {
; CHECK-NEXT: ld.param.b32 %r2, [test_s2f32_param_0+4];
; CHECK-NEXT: { // callseq 3, 0
; CHECK-NEXT: .param .align 4 .b8 param0[8];
-; CHECK-NEXT: st.param.b32 [param0], %r1;
-; CHECK-NEXT: st.param.b32 [param0+4], %r2;
; CHECK-NEXT: .param .align 4 .b8 retval0[8];
+; CHECK-NEXT: st.param.b32 [param0+4], %r2;
+; CHECK-NEXT: st.param.b32 [param0], %r1;
; CHECK-NEXT: call.uni (retval0), bars, (param0);
-; CHECK-NEXT: ld.param.b32 %r3, [retval0];
-; CHECK-NEXT: ld.param.b32 %r4, [retval0+4];
+; CHECK-NEXT: ld.param.b32 %r3, [retval0+4];
+; CHECK-NEXT: ld.param.b32 %r4, [retval0];
; CHECK-NEXT: } // callseq 3
; CHECK-NEXT: ld.param.b64 %rd1, [test_s2f32_param_1];
-; CHECK-NEXT: st.b32 [%rd1+4], %r4;
-; CHECK-NEXT: st.b32 [%rd1], %r3;
+; CHECK-NEXT: st.b32 [%rd1+4], %r3;
+; CHECK-NEXT: st.b32 [%rd1], %r4;
; CHECK-NEXT: ret;
%call = tail call {float, float} @bars({float, float} %input)
store {float, float} %call, ptr %output, align 4
diff --git a/llvm/test/CodeGen/NVPTX/bf16x2-instructions.ll b/llvm/test/CodeGen/NVPTX/bf16x2-instructions.ll
index ba5813c..b4641d0 100644
--- a/llvm/test/CodeGen/NVPTX/bf16x2-instructions.ll
+++ b/llvm/test/CodeGen/NVPTX/bf16x2-instructions.ll
@@ -208,13 +208,13 @@ define <2 x bfloat> @test_call(<2 x bfloat> %a, <2 x bfloat> %b) #0 {
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.b32 %r1, [test_call_param_0];
-; CHECK-NEXT: ld.param.b32 %r2, [test_call_param_1];
; CHECK-NEXT: { // callseq 0, 0
; CHECK-NEXT: .param .align 4 .b8 param0[4];
-; CHECK-NEXT: st.param.b32 [param0], %r1;
; CHECK-NEXT: .param .align 4 .b8 param1[4];
-; CHECK-NEXT: st.param.b32 [param1], %r2;
; CHECK-NEXT: .param .align 4 .b8 retval0[4];
+; CHECK-NEXT: ld.param.b32 %r2, [test_call_param_1];
+; CHECK-NEXT: st.param.b32 [param1], %r2;
+; CHECK-NEXT: st.param.b32 [param0], %r1;
; CHECK-NEXT: call.uni (retval0), test_callee, (param0, param1);
; CHECK-NEXT: ld.param.b32 %r3, [retval0];
; CHECK-NEXT: } // callseq 0
diff --git a/llvm/test/CodeGen/NVPTX/byval-const-global.ll b/llvm/test/CodeGen/NVPTX/byval-const-global.ll
index ad9e4b0..b4934e1a 100644
--- a/llvm/test/CodeGen/NVPTX/byval-const-global.ll
+++ b/llvm/test/CodeGen/NVPTX/byval-const-global.ll
@@ -13,12 +13,12 @@ define void @foo() {
; CHECK-NEXT: .reg .b64 %rd<3>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
-; CHECK-NEXT: ld.global.b64 %rd1, [G];
-; CHECK-NEXT: ld.global.b64 %rd2, [G+8];
; CHECK-NEXT: { // callseq 0, 0
; CHECK-NEXT: .param .align 8 .b8 param0[16];
-; CHECK-NEXT: st.param.b64 [param0], %rd1;
-; CHECK-NEXT: st.param.b64 [param0+8], %rd2;
+; CHECK-NEXT: ld.global.b64 %rd1, [G+8];
+; CHECK-NEXT: st.param.b64 [param0+8], %rd1;
+; CHECK-NEXT: ld.global.b64 %rd2, [G];
+; CHECK-NEXT: st.param.b64 [param0], %rd2;
; CHECK-NEXT: call.uni bar, (param0);
; CHECK-NEXT: } // callseq 0
; CHECK-NEXT: ret;
diff --git a/llvm/test/CodeGen/NVPTX/call-with-alloca-buffer.ll b/llvm/test/CodeGen/NVPTX/call-with-alloca-buffer.ll
index 0cd7058..0eb7f64 100644
--- a/llvm/test/CodeGen/NVPTX/call-with-alloca-buffer.ll
+++ b/llvm/test/CodeGen/NVPTX/call-with-alloca-buffer.ll
@@ -44,11 +44,11 @@ entry:
%arrayidx7 = getelementptr inbounds [16 x i8], ptr %buf, i64 0, i64 3
store float %3, ptr %arrayidx7, align 4
-; CHECK: .param .b64 param0;
-; CHECK-NEXT: st.param.b64 [param0], %rd[[A_REG]]
-; CHECK-NEXT: .param .b64 param1;
-; CHECK-NEXT: st.param.b64 [param1], %rd[[SP_REG]]
-; CHECK-NEXT: call.uni callee,
+; CHECK-DAG: .param .b64 param0;
+; CHECK-DAG: .param .b64 param1;
+; CHECK-DAG: st.param.b64 [param0], %rd[[A_REG]]
+; CHECK-DAG: st.param.b64 [param1], %rd[[SP_REG]]
+; CHECK: call.uni callee,
call void @callee(ptr %a, ptr %buf) #2
ret void
diff --git a/llvm/test/CodeGen/NVPTX/call_bitcast_byval.ll b/llvm/test/CodeGen/NVPTX/call_bitcast_byval.ll
index f67145d..483d48a 100644
--- a/llvm/test/CodeGen/NVPTX/call_bitcast_byval.ll
+++ b/llvm/test/CodeGen/NVPTX/call_bitcast_byval.ll
@@ -14,11 +14,11 @@ target triple = "nvptx64-nvidia-cuda"
%complex_half = type { half, half }
; CHECK: .param .align 2 .b8 param2[4];
-; CHECK: st.param.b16 [param2], %rs1;
-; CHECK: st.param.b16 [param2+2], %rs2;
; CHECK: .param .align 2 .b8 retval0[4];
-; CHECK-NEXT: prototype_0 : .callprototype (.param .align 2 .b8 _[4]) _ (.param .b32 _, .param .b32 _, .param .align 2 .b8 _[4]);
-; CHECK-NEXT: call (retval0),
+; CHECK-DAG: st.param.b16 [param2], %rs{{[0-9]+}};
+; CHECK-DAG: st.param.b16 [param2+2], %rs{{[0-9]+}};
+; CHECK: prototype_0 : .callprototype (.param .align 2 .b8 _[4]) _ (.param .b32 _, .param .b32 _, .param .align 2 .b8 _[4]);
+; CHECK: call (retval0),
define weak_odr void @foo() {
entry:
%call.i.i.i = tail call %"class.complex" @_Z20__spirv_GroupCMulKHRjjN5__spv12complex_halfE(i32 0, i32 0, ptr byval(%"class.complex") null)
@@ -36,10 +36,10 @@ define internal void @callee(ptr byval(%"class.complex") %byval_arg) {
}
define void @boom() {
%fp = call ptr @usefp(ptr @callee)
- ; CHECK: .param .align 2 .b8 param0[4];
- ; CHECK: st.param.b16 [param0], %rs1;
- ; CHECK: st.param.b16 [param0+2], %rs2;
- ; CHECK: .callprototype ()_ (.param .align 2 .b8 _[4]);
+ ; CHECK-DAG: .param .align 2 .b8 param0[4];
+ ; CHECK-DAG: st.param.b16 [param0], %rs{{[0-9]+}};
+ ; CHECK-DAG: st.param.b16 [param0+2], %rs{{[0-9]+}};
+ ; CHECK-DAG: .callprototype ()_ (.param .align 2 .b8 _[4]);
call void %fp(ptr byval(%"class.complex") null)
ret void
}
diff --git a/llvm/test/CodeGen/NVPTX/combine-mad.ll b/llvm/test/CodeGen/NVPTX/combine-mad.ll
index 2232810..da303b7 100644
--- a/llvm/test/CodeGen/NVPTX/combine-mad.ll
+++ b/llvm/test/CodeGen/NVPTX/combine-mad.ll
@@ -199,10 +199,10 @@ define i32 @test_mad_multi_use(i32 %a, i32 %b, i32 %c) {
; CHECK-NEXT: add.s32 %r5, %r3, %r4;
; CHECK-NEXT: { // callseq 0, 0
; CHECK-NEXT: .param .b32 param0;
-; CHECK-NEXT: st.param.b32 [param0], %r3;
; CHECK-NEXT: .param .b32 param1;
-; CHECK-NEXT: st.param.b32 [param1], %r5;
; CHECK-NEXT: .param .b32 retval0;
+; CHECK-NEXT: st.param.b32 [param0], %r3;
+; CHECK-NEXT: st.param.b32 [param1], %r5;
; CHECK-NEXT: call.uni (retval0), use, (param0, param1);
; CHECK-NEXT: ld.param.b32 %r6, [retval0];
; CHECK-NEXT: } // callseq 0
diff --git a/llvm/test/CodeGen/NVPTX/compare-int.ll b/llvm/test/CodeGen/NVPTX/compare-int.ll
index b44ae47..9338172d 100644
--- a/llvm/test/CodeGen/NVPTX/compare-int.ll
+++ b/llvm/test/CodeGen/NVPTX/compare-int.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc < %s -mtriple=nvptx -mcpu=sm_20 | FileCheck %s
; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_20 | FileCheck %s
; RUN: %if ptxas && !ptxas-12.0 %{ llc < %s -mtriple=nvptx -mcpu=sm_20 | %ptxas-verify %}
@@ -11,90 +12,180 @@
;;; i64
define i64 @icmp_eq_i64(i64 %a, i64 %b) {
-; CHECK: setp.eq.b64 %p[[P0:[0-9]+]], %rd{{[0-9]+}}, %rd{{[0-9]+}}
-; CHECK: selp.b64 %rd{{[0-9]+}}, 1, 0, %p[[P0]]
-; CHECK: ret
+; CHECK-LABEL: icmp_eq_i64(
+; CHECK: {
+; CHECK-NEXT: .reg .pred %p<2>;
+; CHECK-NEXT: .reg .b64 %rd<4>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: ld.param.b64 %rd1, [icmp_eq_i64_param_0];
+; CHECK-NEXT: ld.param.b64 %rd2, [icmp_eq_i64_param_1];
+; CHECK-NEXT: setp.eq.b64 %p1, %rd1, %rd2;
+; CHECK-NEXT: selp.b64 %rd3, 1, 0, %p1;
+; CHECK-NEXT: st.param.b64 [func_retval0], %rd3;
+; CHECK-NEXT: ret;
%cmp = icmp eq i64 %a, %b
%ret = zext i1 %cmp to i64
ret i64 %ret
}
define i64 @icmp_ne_i64(i64 %a, i64 %b) {
-; CHECK: setp.ne.b64 %p[[P0:[0-9]+]], %rd{{[0-9]+}}, %rd{{[0-9]+}}
-; CHECK: selp.b64 %rd{{[0-9]+}}, 1, 0, %p[[P0]]
-; CHECK: ret
+; CHECK-LABEL: icmp_ne_i64(
+; CHECK: {
+; CHECK-NEXT: .reg .pred %p<2>;
+; CHECK-NEXT: .reg .b64 %rd<4>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: ld.param.b64 %rd1, [icmp_ne_i64_param_0];
+; CHECK-NEXT: ld.param.b64 %rd2, [icmp_ne_i64_param_1];
+; CHECK-NEXT: setp.ne.b64 %p1, %rd1, %rd2;
+; CHECK-NEXT: selp.b64 %rd3, 1, 0, %p1;
+; CHECK-NEXT: st.param.b64 [func_retval0], %rd3;
+; CHECK-NEXT: ret;
%cmp = icmp ne i64 %a, %b
%ret = zext i1 %cmp to i64
ret i64 %ret
}
define i64 @icmp_ugt_i64(i64 %a, i64 %b) {
-; CHECK: setp.gt.u64 %p[[P0:[0-9]+]], %rd{{[0-9]+}}, %rd{{[0-9]+}}
-; CHECK: selp.b64 %rd{{[0-9]+}}, 1, 0, %p[[P0]]
-; CHECK: ret
+; CHECK-LABEL: icmp_ugt_i64(
+; CHECK: {
+; CHECK-NEXT: .reg .pred %p<2>;
+; CHECK-NEXT: .reg .b64 %rd<4>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: ld.param.b64 %rd1, [icmp_ugt_i64_param_0];
+; CHECK-NEXT: ld.param.b64 %rd2, [icmp_ugt_i64_param_1];
+; CHECK-NEXT: setp.gt.u64 %p1, %rd1, %rd2;
+; CHECK-NEXT: selp.b64 %rd3, 1, 0, %p1;
+; CHECK-NEXT: st.param.b64 [func_retval0], %rd3;
+; CHECK-NEXT: ret;
%cmp = icmp ugt i64 %a, %b
%ret = zext i1 %cmp to i64
ret i64 %ret
}
define i64 @icmp_uge_i64(i64 %a, i64 %b) {
-; CHECK: setp.ge.u64 %p[[P0:[0-9]+]], %rd{{[0-9]+}}, %rd{{[0-9]+}}
-; CHECK: selp.b64 %rd{{[0-9]+}}, 1, 0, %p[[P0]]
-; CHECK: ret
+; CHECK-LABEL: icmp_uge_i64(
+; CHECK: {
+; CHECK-NEXT: .reg .pred %p<2>;
+; CHECK-NEXT: .reg .b64 %rd<4>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: ld.param.b64 %rd1, [icmp_uge_i64_param_0];
+; CHECK-NEXT: ld.param.b64 %rd2, [icmp_uge_i64_param_1];
+; CHECK-NEXT: setp.ge.u64 %p1, %rd1, %rd2;
+; CHECK-NEXT: selp.b64 %rd3, 1, 0, %p1;
+; CHECK-NEXT: st.param.b64 [func_retval0], %rd3;
+; CHECK-NEXT: ret;
%cmp = icmp uge i64 %a, %b
%ret = zext i1 %cmp to i64
ret i64 %ret
}
define i64 @icmp_ult_i64(i64 %a, i64 %b) {
-; CHECK: setp.lt.u64 %p[[P0:[0-9]+]], %rd{{[0-9]+}}, %rd{{[0-9]+}}
-; CHECK: selp.b64 %rd{{[0-9]+}}, 1, 0, %p[[P0]]
-; CHECK: ret
+; CHECK-LABEL: icmp_ult_i64(
+; CHECK: {
+; CHECK-NEXT: .reg .pred %p<2>;
+; CHECK-NEXT: .reg .b64 %rd<4>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: ld.param.b64 %rd1, [icmp_ult_i64_param_0];
+; CHECK-NEXT: ld.param.b64 %rd2, [icmp_ult_i64_param_1];
+; CHECK-NEXT: setp.lt.u64 %p1, %rd1, %rd2;
+; CHECK-NEXT: selp.b64 %rd3, 1, 0, %p1;
+; CHECK-NEXT: st.param.b64 [func_retval0], %rd3;
+; CHECK-NEXT: ret;
%cmp = icmp ult i64 %a, %b
%ret = zext i1 %cmp to i64
ret i64 %ret
}
define i64 @icmp_ule_i64(i64 %a, i64 %b) {
-; CHECK: setp.le.u64 %p[[P0:[0-9]+]], %rd{{[0-9]+}}, %rd{{[0-9]+}}
-; CHECK: selp.b64 %rd{{[0-9]+}}, 1, 0, %p[[P0]]
-; CHECK: ret
+; CHECK-LABEL: icmp_ule_i64(
+; CHECK: {
+; CHECK-NEXT: .reg .pred %p<2>;
+; CHECK-NEXT: .reg .b64 %rd<4>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: ld.param.b64 %rd1, [icmp_ule_i64_param_0];
+; CHECK-NEXT: ld.param.b64 %rd2, [icmp_ule_i64_param_1];
+; CHECK-NEXT: setp.le.u64 %p1, %rd1, %rd2;
+; CHECK-NEXT: selp.b64 %rd3, 1, 0, %p1;
+; CHECK-NEXT: st.param.b64 [func_retval0], %rd3;
+; CHECK-NEXT: ret;
%cmp = icmp ule i64 %a, %b
%ret = zext i1 %cmp to i64
ret i64 %ret
}
define i64 @icmp_sgt_i64(i64 %a, i64 %b) {
-; CHECK: setp.gt.s64 %p[[P0:[0-9]+]], %rd{{[0-9]+}}, %rd{{[0-9]+}}
-; CHECK: selp.b64 %rd{{[0-9]+}}, 1, 0, %p[[P0]]
-; CHECK: ret
+; CHECK-LABEL: icmp_sgt_i64(
+; CHECK: {
+; CHECK-NEXT: .reg .pred %p<2>;
+; CHECK-NEXT: .reg .b64 %rd<4>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: ld.param.b64 %rd1, [icmp_sgt_i64_param_0];
+; CHECK-NEXT: ld.param.b64 %rd2, [icmp_sgt_i64_param_1];
+; CHECK-NEXT: setp.gt.s64 %p1, %rd1, %rd2;
+; CHECK-NEXT: selp.b64 %rd3, 1, 0, %p1;
+; CHECK-NEXT: st.param.b64 [func_retval0], %rd3;
+; CHECK-NEXT: ret;
%cmp = icmp sgt i64 %a, %b
%ret = zext i1 %cmp to i64
ret i64 %ret
}
define i64 @icmp_sge_i64(i64 %a, i64 %b) {
-; CHECK: setp.ge.s64 %p[[P0:[0-9]+]], %rd{{[0-9]+}}, %rd{{[0-9]+}}
-; CHECK: selp.b64 %rd{{[0-9]+}}, 1, 0, %p[[P0]]
-; CHECK: ret
+; CHECK-LABEL: icmp_sge_i64(
+; CHECK: {
+; CHECK-NEXT: .reg .pred %p<2>;
+; CHECK-NEXT: .reg .b64 %rd<4>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: ld.param.b64 %rd1, [icmp_sge_i64_param_0];
+; CHECK-NEXT: ld.param.b64 %rd2, [icmp_sge_i64_param_1];
+; CHECK-NEXT: setp.ge.s64 %p1, %rd1, %rd2;
+; CHECK-NEXT: selp.b64 %rd3, 1, 0, %p1;
+; CHECK-NEXT: st.param.b64 [func_retval0], %rd3;
+; CHECK-NEXT: ret;
%cmp = icmp sge i64 %a, %b
%ret = zext i1 %cmp to i64
ret i64 %ret
}
define i64 @icmp_slt_i64(i64 %a, i64 %b) {
-; CHECK: setp.lt.s64 %p[[P0:[0-9]+]], %rd{{[0-9]+}}, %rd{{[0-9]+}}
-; CHECK: selp.b64 %rd{{[0-9]+}}, 1, 0, %p[[P0]]
-; CHECK: ret
+; CHECK-LABEL: icmp_slt_i64(
+; CHECK: {
+; CHECK-NEXT: .reg .pred %p<2>;
+; CHECK-NEXT: .reg .b64 %rd<4>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: ld.param.b64 %rd1, [icmp_slt_i64_param_0];
+; CHECK-NEXT: ld.param.b64 %rd2, [icmp_slt_i64_param_1];
+; CHECK-NEXT: setp.lt.s64 %p1, %rd1, %rd2;
+; CHECK-NEXT: selp.b64 %rd3, 1, 0, %p1;
+; CHECK-NEXT: st.param.b64 [func_retval0], %rd3;
+; CHECK-NEXT: ret;
%cmp = icmp slt i64 %a, %b
%ret = zext i1 %cmp to i64
ret i64 %ret
}
define i64 @icmp_sle_i64(i64 %a, i64 %b) {
-; CHECK: setp.le.s64 %p[[P0:[0-9]+]], %rd{{[0-9]+}}, %rd{{[0-9]+}}
-; CHECK: selp.b64 %rd{{[0-9]+}}, 1, 0, %p[[P0]]
-; CHECK: ret
+; CHECK-LABEL: icmp_sle_i64(
+; CHECK: {
+; CHECK-NEXT: .reg .pred %p<2>;
+; CHECK-NEXT: .reg .b64 %rd<4>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: ld.param.b64 %rd1, [icmp_sle_i64_param_0];
+; CHECK-NEXT: ld.param.b64 %rd2, [icmp_sle_i64_param_1];
+; CHECK-NEXT: setp.le.s64 %p1, %rd1, %rd2;
+; CHECK-NEXT: selp.b64 %rd3, 1, 0, %p1;
+; CHECK-NEXT: st.param.b64 [func_retval0], %rd3;
+; CHECK-NEXT: ret;
%cmp = icmp sle i64 %a, %b
%ret = zext i1 %cmp to i64
ret i64 %ret
@@ -103,90 +194,180 @@ define i64 @icmp_sle_i64(i64 %a, i64 %b) {
;;; i32
define i32 @icmp_eq_i32(i32 %a, i32 %b) {
-; CHECK: setp.eq.b32 %p[[P0:[0-9]+]], %r{{[0-9]+}}, %r{{[0-9]+}}
-; CHECK: selp.b32 %r{{[0-9]+}}, 1, 0, %p[[P0]]
-; CHECK: ret
+; CHECK-LABEL: icmp_eq_i32(
+; CHECK: {
+; CHECK-NEXT: .reg .pred %p<2>;
+; CHECK-NEXT: .reg .b32 %r<4>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: ld.param.b32 %r1, [icmp_eq_i32_param_0];
+; CHECK-NEXT: ld.param.b32 %r2, [icmp_eq_i32_param_1];
+; CHECK-NEXT: setp.eq.b32 %p1, %r1, %r2;
+; CHECK-NEXT: selp.b32 %r3, 1, 0, %p1;
+; CHECK-NEXT: st.param.b32 [func_retval0], %r3;
+; CHECK-NEXT: ret;
%cmp = icmp eq i32 %a, %b
%ret = zext i1 %cmp to i32
ret i32 %ret
}
define i32 @icmp_ne_i32(i32 %a, i32 %b) {
-; CHECK: setp.ne.b32 %p[[P0:[0-9]+]], %r{{[0-9]+}}, %r{{[0-9]+}}
-; CHECK: selp.b32 %r{{[0-9]+}}, 1, 0, %p[[P0]]
-; CHECK: ret
+; CHECK-LABEL: icmp_ne_i32(
+; CHECK: {
+; CHECK-NEXT: .reg .pred %p<2>;
+; CHECK-NEXT: .reg .b32 %r<4>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: ld.param.b32 %r1, [icmp_ne_i32_param_0];
+; CHECK-NEXT: ld.param.b32 %r2, [icmp_ne_i32_param_1];
+; CHECK-NEXT: setp.ne.b32 %p1, %r1, %r2;
+; CHECK-NEXT: selp.b32 %r3, 1, 0, %p1;
+; CHECK-NEXT: st.param.b32 [func_retval0], %r3;
+; CHECK-NEXT: ret;
%cmp = icmp ne i32 %a, %b
%ret = zext i1 %cmp to i32
ret i32 %ret
}
define i32 @icmp_ugt_i32(i32 %a, i32 %b) {
-; CHECK: setp.gt.u32 %p[[P0:[0-9]+]], %r{{[0-9]+}}, %r{{[0-9]+}}
-; CHECK: selp.b32 %r{{[0-9]+}}, 1, 0, %p[[P0]]
-; CHECK: ret
+; CHECK-LABEL: icmp_ugt_i32(
+; CHECK: {
+; CHECK-NEXT: .reg .pred %p<2>;
+; CHECK-NEXT: .reg .b32 %r<4>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: ld.param.b32 %r1, [icmp_ugt_i32_param_0];
+; CHECK-NEXT: ld.param.b32 %r2, [icmp_ugt_i32_param_1];
+; CHECK-NEXT: setp.gt.u32 %p1, %r1, %r2;
+; CHECK-NEXT: selp.b32 %r3, 1, 0, %p1;
+; CHECK-NEXT: st.param.b32 [func_retval0], %r3;
+; CHECK-NEXT: ret;
%cmp = icmp ugt i32 %a, %b
%ret = zext i1 %cmp to i32
ret i32 %ret
}
define i32 @icmp_uge_i32(i32 %a, i32 %b) {
-; CHECK: setp.ge.u32 %p[[P0:[0-9]+]], %r{{[0-9]+}}, %r{{[0-9]+}}
-; CHECK: selp.b32 %r{{[0-9]+}}, 1, 0, %p[[P0]]
-; CHECK: ret
+; CHECK-LABEL: icmp_uge_i32(
+; CHECK: {
+; CHECK-NEXT: .reg .pred %p<2>;
+; CHECK-NEXT: .reg .b32 %r<4>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: ld.param.b32 %r1, [icmp_uge_i32_param_0];
+; CHECK-NEXT: ld.param.b32 %r2, [icmp_uge_i32_param_1];
+; CHECK-NEXT: setp.ge.u32 %p1, %r1, %r2;
+; CHECK-NEXT: selp.b32 %r3, 1, 0, %p1;
+; CHECK-NEXT: st.param.b32 [func_retval0], %r3;
+; CHECK-NEXT: ret;
%cmp = icmp uge i32 %a, %b
%ret = zext i1 %cmp to i32
ret i32 %ret
}
define i32 @icmp_ult_i32(i32 %a, i32 %b) {
-; CHECK: setp.lt.u32 %p[[P0:[0-9]+]], %r{{[0-9]+}}, %r{{[0-9]+}}
-; CHECK: selp.b32 %r{{[0-9]+}}, 1, 0, %p[[P0]]
-; CHECK: ret
+; CHECK-LABEL: icmp_ult_i32(
+; CHECK: {
+; CHECK-NEXT: .reg .pred %p<2>;
+; CHECK-NEXT: .reg .b32 %r<4>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: ld.param.b32 %r1, [icmp_ult_i32_param_0];
+; CHECK-NEXT: ld.param.b32 %r2, [icmp_ult_i32_param_1];
+; CHECK-NEXT: setp.lt.u32 %p1, %r1, %r2;
+; CHECK-NEXT: selp.b32 %r3, 1, 0, %p1;
+; CHECK-NEXT: st.param.b32 [func_retval0], %r3;
+; CHECK-NEXT: ret;
%cmp = icmp ult i32 %a, %b
%ret = zext i1 %cmp to i32
ret i32 %ret
}
define i32 @icmp_ule_i32(i32 %a, i32 %b) {
-; CHECK: setp.le.u32 %p[[P0:[0-9]+]], %r{{[0-9]+}}, %r{{[0-9]+}}
-; CHECK: selp.b32 %r{{[0-9]+}}, 1, 0, %p[[P0]]
-; CHECK: ret
+; CHECK-LABEL: icmp_ule_i32(
+; CHECK: {
+; CHECK-NEXT: .reg .pred %p<2>;
+; CHECK-NEXT: .reg .b32 %r<4>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: ld.param.b32 %r1, [icmp_ule_i32_param_0];
+; CHECK-NEXT: ld.param.b32 %r2, [icmp_ule_i32_param_1];
+; CHECK-NEXT: setp.le.u32 %p1, %r1, %r2;
+; CHECK-NEXT: selp.b32 %r3, 1, 0, %p1;
+; CHECK-NEXT: st.param.b32 [func_retval0], %r3;
+; CHECK-NEXT: ret;
%cmp = icmp ule i32 %a, %b
%ret = zext i1 %cmp to i32
ret i32 %ret
}
define i32 @icmp_sgt_i32(i32 %a, i32 %b) {
-; CHECK: setp.gt.s32 %p[[P0:[0-9]+]], %r{{[0-9]+}}, %r{{[0-9]+}}
-; CHECK: selp.b32 %r{{[0-9]+}}, 1, 0, %p[[P0]]
-; CHECK: ret
+; CHECK-LABEL: icmp_sgt_i32(
+; CHECK: {
+; CHECK-NEXT: .reg .pred %p<2>;
+; CHECK-NEXT: .reg .b32 %r<4>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: ld.param.b32 %r1, [icmp_sgt_i32_param_0];
+; CHECK-NEXT: ld.param.b32 %r2, [icmp_sgt_i32_param_1];
+; CHECK-NEXT: setp.gt.s32 %p1, %r1, %r2;
+; CHECK-NEXT: selp.b32 %r3, 1, 0, %p1;
+; CHECK-NEXT: st.param.b32 [func_retval0], %r3;
+; CHECK-NEXT: ret;
%cmp = icmp sgt i32 %a, %b
%ret = zext i1 %cmp to i32
ret i32 %ret
}
define i32 @icmp_sge_i32(i32 %a, i32 %b) {
-; CHECK: setp.ge.s32 %p[[P0:[0-9]+]], %r{{[0-9]+}}, %r{{[0-9]+}}
-; CHECK: selp.b32 %r{{[0-9]+}}, 1, 0, %p[[P0]]
-; CHECK: ret
+; CHECK-LABEL: icmp_sge_i32(
+; CHECK: {
+; CHECK-NEXT: .reg .pred %p<2>;
+; CHECK-NEXT: .reg .b32 %r<4>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: ld.param.b32 %r1, [icmp_sge_i32_param_0];
+; CHECK-NEXT: ld.param.b32 %r2, [icmp_sge_i32_param_1];
+; CHECK-NEXT: setp.ge.s32 %p1, %r1, %r2;
+; CHECK-NEXT: selp.b32 %r3, 1, 0, %p1;
+; CHECK-NEXT: st.param.b32 [func_retval0], %r3;
+; CHECK-NEXT: ret;
%cmp = icmp sge i32 %a, %b
%ret = zext i1 %cmp to i32
ret i32 %ret
}
define i32 @icmp_slt_i32(i32 %a, i32 %b) {
-; CHECK: setp.lt.s32 %p[[P0:[0-9]+]], %r{{[0-9]+}}, %r{{[0-9]+}}
-; CHECK: selp.b32 %r{{[0-9]+}}, 1, 0, %p[[P0]]
-; CHECK: ret
+; CHECK-LABEL: icmp_slt_i32(
+; CHECK: {
+; CHECK-NEXT: .reg .pred %p<2>;
+; CHECK-NEXT: .reg .b32 %r<4>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: ld.param.b32 %r1, [icmp_slt_i32_param_0];
+; CHECK-NEXT: ld.param.b32 %r2, [icmp_slt_i32_param_1];
+; CHECK-NEXT: setp.lt.s32 %p1, %r1, %r2;
+; CHECK-NEXT: selp.b32 %r3, 1, 0, %p1;
+; CHECK-NEXT: st.param.b32 [func_retval0], %r3;
+; CHECK-NEXT: ret;
%cmp = icmp slt i32 %a, %b
%ret = zext i1 %cmp to i32
ret i32 %ret
}
define i32 @icmp_sle_i32(i32 %a, i32 %b) {
-; CHECK: setp.le.s32 %p[[P0:[0-9]+]], %r{{[0-9]+}}, %r{{[0-9]+}}
-; CHECK: selp.b32 %r{{[0-9]+}}, 1, 0, %p[[P0]]
-; CHECK: ret
+; CHECK-LABEL: icmp_sle_i32(
+; CHECK: {
+; CHECK-NEXT: .reg .pred %p<2>;
+; CHECK-NEXT: .reg .b32 %r<4>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: ld.param.b32 %r1, [icmp_sle_i32_param_0];
+; CHECK-NEXT: ld.param.b32 %r2, [icmp_sle_i32_param_1];
+; CHECK-NEXT: setp.le.s32 %p1, %r1, %r2;
+; CHECK-NEXT: selp.b32 %r3, 1, 0, %p1;
+; CHECK-NEXT: st.param.b32 [func_retval0], %r3;
+; CHECK-NEXT: ret;
%cmp = icmp sle i32 %a, %b
%ret = zext i1 %cmp to i32
ret i32 %ret
@@ -196,90 +377,190 @@ define i32 @icmp_sle_i32(i32 %a, i32 %b) {
;;; i16
define i16 @icmp_eq_i16(i16 %a, i16 %b) {
-; CHECK: setp.eq.b16 %p[[P0:[0-9]+]], %rs{{[0-9]+}}, %rs{{[0-9]+}}
-; CHECK: selp.b32 %r{{[0-9]+}}, 1, 0, %p[[P0]]
-; CHECK: ret
+; CHECK-LABEL: icmp_eq_i16(
+; CHECK: {
+; CHECK-NEXT: .reg .pred %p<2>;
+; CHECK-NEXT: .reg .b16 %rs<3>;
+; CHECK-NEXT: .reg .b32 %r<2>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: ld.param.b16 %rs1, [icmp_eq_i16_param_0];
+; CHECK-NEXT: ld.param.b16 %rs2, [icmp_eq_i16_param_1];
+; CHECK-NEXT: setp.eq.b16 %p1, %rs1, %rs2;
+; CHECK-NEXT: selp.b32 %r1, 1, 0, %p1;
+; CHECK-NEXT: st.param.b32 [func_retval0], %r1;
+; CHECK-NEXT: ret;
%cmp = icmp eq i16 %a, %b
%ret = zext i1 %cmp to i16
ret i16 %ret
}
define i16 @icmp_ne_i16(i16 %a, i16 %b) {
-; CHECK: setp.ne.b16 %p[[P0:[0-9]+]], %rs{{[0-9]+}}, %rs{{[0-9]+}}
-; CHECK: selp.b32 %r{{[0-9]+}}, 1, 0, %p[[P0]]
-; CHECK: ret
+; CHECK-LABEL: icmp_ne_i16(
+; CHECK: {
+; CHECK-NEXT: .reg .pred %p<2>;
+; CHECK-NEXT: .reg .b16 %rs<3>;
+; CHECK-NEXT: .reg .b32 %r<2>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: ld.param.b16 %rs1, [icmp_ne_i16_param_0];
+; CHECK-NEXT: ld.param.b16 %rs2, [icmp_ne_i16_param_1];
+; CHECK-NEXT: setp.ne.b16 %p1, %rs1, %rs2;
+; CHECK-NEXT: selp.b32 %r1, 1, 0, %p1;
+; CHECK-NEXT: st.param.b32 [func_retval0], %r1;
+; CHECK-NEXT: ret;
%cmp = icmp ne i16 %a, %b
%ret = zext i1 %cmp to i16
ret i16 %ret
}
define i16 @icmp_ugt_i16(i16 %a, i16 %b) {
-; CHECK: setp.gt.u16 %p[[P0:[0-9]+]], %rs{{[0-9]+}}, %rs{{[0-9]+}}
-; CHECK: selp.b32 %r{{[0-9]+}}, 1, 0, %p[[P0]]
-; CHECK: ret
+; CHECK-LABEL: icmp_ugt_i16(
+; CHECK: {
+; CHECK-NEXT: .reg .pred %p<2>;
+; CHECK-NEXT: .reg .b16 %rs<3>;
+; CHECK-NEXT: .reg .b32 %r<2>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: ld.param.b16 %rs1, [icmp_ugt_i16_param_0];
+; CHECK-NEXT: ld.param.b16 %rs2, [icmp_ugt_i16_param_1];
+; CHECK-NEXT: setp.gt.u16 %p1, %rs1, %rs2;
+; CHECK-NEXT: selp.b32 %r1, 1, 0, %p1;
+; CHECK-NEXT: st.param.b32 [func_retval0], %r1;
+; CHECK-NEXT: ret;
%cmp = icmp ugt i16 %a, %b
%ret = zext i1 %cmp to i16
ret i16 %ret
}
define i16 @icmp_uge_i16(i16 %a, i16 %b) {
-; CHECK: setp.ge.u16 %p[[P0:[0-9]+]], %rs{{[0-9]+}}, %rs{{[0-9]+}}
-; CHECK: selp.b32 %r{{[0-9]+}}, 1, 0, %p[[P0]]
-; CHECK: ret
+; CHECK-LABEL: icmp_uge_i16(
+; CHECK: {
+; CHECK-NEXT: .reg .pred %p<2>;
+; CHECK-NEXT: .reg .b16 %rs<3>;
+; CHECK-NEXT: .reg .b32 %r<2>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: ld.param.b16 %rs1, [icmp_uge_i16_param_0];
+; CHECK-NEXT: ld.param.b16 %rs2, [icmp_uge_i16_param_1];
+; CHECK-NEXT: setp.ge.u16 %p1, %rs1, %rs2;
+; CHECK-NEXT: selp.b32 %r1, 1, 0, %p1;
+; CHECK-NEXT: st.param.b32 [func_retval0], %r1;
+; CHECK-NEXT: ret;
%cmp = icmp uge i16 %a, %b
%ret = zext i1 %cmp to i16
ret i16 %ret
}
define i16 @icmp_ult_i16(i16 %a, i16 %b) {
-; CHECK: setp.lt.u16 %p[[P0:[0-9]+]], %rs{{[0-9]+}}, %rs{{[0-9]+}}
-; CHECK: selp.b32 %r{{[0-9]+}}, 1, 0, %p[[P0]]
-; CHECK: ret
+; CHECK-LABEL: icmp_ult_i16(
+; CHECK: {
+; CHECK-NEXT: .reg .pred %p<2>;
+; CHECK-NEXT: .reg .b16 %rs<3>;
+; CHECK-NEXT: .reg .b32 %r<2>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: ld.param.b16 %rs1, [icmp_ult_i16_param_0];
+; CHECK-NEXT: ld.param.b16 %rs2, [icmp_ult_i16_param_1];
+; CHECK-NEXT: setp.lt.u16 %p1, %rs1, %rs2;
+; CHECK-NEXT: selp.b32 %r1, 1, 0, %p1;
+; CHECK-NEXT: st.param.b32 [func_retval0], %r1;
+; CHECK-NEXT: ret;
%cmp = icmp ult i16 %a, %b
%ret = zext i1 %cmp to i16
ret i16 %ret
}
define i16 @icmp_ule_i16(i16 %a, i16 %b) {
-; CHECK: setp.le.u16 %p[[P0:[0-9]+]], %rs{{[0-9]+}}, %rs{{[0-9]+}}
-; CHECK: selp.b32 %r{{[0-9]+}}, 1, 0, %p[[P0]]
-; CHECK: ret
+; CHECK-LABEL: icmp_ule_i16(
+; CHECK: {
+; CHECK-NEXT: .reg .pred %p<2>;
+; CHECK-NEXT: .reg .b16 %rs<3>;
+; CHECK-NEXT: .reg .b32 %r<2>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: ld.param.b16 %rs1, [icmp_ule_i16_param_0];
+; CHECK-NEXT: ld.param.b16 %rs2, [icmp_ule_i16_param_1];
+; CHECK-NEXT: setp.le.u16 %p1, %rs1, %rs2;
+; CHECK-NEXT: selp.b32 %r1, 1, 0, %p1;
+; CHECK-NEXT: st.param.b32 [func_retval0], %r1;
+; CHECK-NEXT: ret;
%cmp = icmp ule i16 %a, %b
%ret = zext i1 %cmp to i16
ret i16 %ret
}
define i16 @icmp_sgt_i16(i16 %a, i16 %b) {
-; CHECK: setp.gt.s16 %p[[P0:[0-9]+]], %rs{{[0-9]+}}, %rs{{[0-9]+}}
-; CHECK: selp.b32 %r{{[0-9]+}}, 1, 0, %p[[P0]]
-; CHECK: ret
+; CHECK-LABEL: icmp_sgt_i16(
+; CHECK: {
+; CHECK-NEXT: .reg .pred %p<2>;
+; CHECK-NEXT: .reg .b16 %rs<3>;
+; CHECK-NEXT: .reg .b32 %r<2>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: ld.param.b16 %rs1, [icmp_sgt_i16_param_0];
+; CHECK-NEXT: ld.param.b16 %rs2, [icmp_sgt_i16_param_1];
+; CHECK-NEXT: setp.gt.s16 %p1, %rs1, %rs2;
+; CHECK-NEXT: selp.b32 %r1, 1, 0, %p1;
+; CHECK-NEXT: st.param.b32 [func_retval0], %r1;
+; CHECK-NEXT: ret;
%cmp = icmp sgt i16 %a, %b
%ret = zext i1 %cmp to i16
ret i16 %ret
}
define i16 @icmp_sge_i16(i16 %a, i16 %b) {
-; CHECK: setp.ge.s16 %p[[P0:[0-9]+]], %rs{{[0-9]+}}, %rs{{[0-9]+}}
-; CHECK: selp.b32 %r{{[0-9]+}}, 1, 0, %p[[P0]]
-; CHECK: ret
+; CHECK-LABEL: icmp_sge_i16(
+; CHECK: {
+; CHECK-NEXT: .reg .pred %p<2>;
+; CHECK-NEXT: .reg .b16 %rs<3>;
+; CHECK-NEXT: .reg .b32 %r<2>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: ld.param.b16 %rs1, [icmp_sge_i16_param_0];
+; CHECK-NEXT: ld.param.b16 %rs2, [icmp_sge_i16_param_1];
+; CHECK-NEXT: setp.ge.s16 %p1, %rs1, %rs2;
+; CHECK-NEXT: selp.b32 %r1, 1, 0, %p1;
+; CHECK-NEXT: st.param.b32 [func_retval0], %r1;
+; CHECK-NEXT: ret;
%cmp = icmp sge i16 %a, %b
%ret = zext i1 %cmp to i16
ret i16 %ret
}
define i16 @icmp_slt_i16(i16 %a, i16 %b) {
-; CHECK: setp.lt.s16 %p[[P0:[0-9]+]], %rs{{[0-9]+}}, %rs{{[0-9]+}}
-; CHECK: selp.b32 %r{{[0-9]+}}, 1, 0, %p[[P0]]
-; CHECK: ret
+; CHECK-LABEL: icmp_slt_i16(
+; CHECK: {
+; CHECK-NEXT: .reg .pred %p<2>;
+; CHECK-NEXT: .reg .b16 %rs<3>;
+; CHECK-NEXT: .reg .b32 %r<2>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: ld.param.b16 %rs1, [icmp_slt_i16_param_0];
+; CHECK-NEXT: ld.param.b16 %rs2, [icmp_slt_i16_param_1];
+; CHECK-NEXT: setp.lt.s16 %p1, %rs1, %rs2;
+; CHECK-NEXT: selp.b32 %r1, 1, 0, %p1;
+; CHECK-NEXT: st.param.b32 [func_retval0], %r1;
+; CHECK-NEXT: ret;
%cmp = icmp slt i16 %a, %b
%ret = zext i1 %cmp to i16
ret i16 %ret
}
define i16 @icmp_sle_i16(i16 %a, i16 %b) {
-; CHECK: setp.le.s16 %p[[P0:[0-9]+]], %rs{{[0-9]+}}, %rs{{[0-9]+}}
-; CHECK: selp.b32 %r{{[0-9]+}}, 1, 0, %p[[P0]]
-; CHECK: ret
+; CHECK-LABEL: icmp_sle_i16(
+; CHECK: {
+; CHECK-NEXT: .reg .pred %p<2>;
+; CHECK-NEXT: .reg .b16 %rs<3>;
+; CHECK-NEXT: .reg .b32 %r<2>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: ld.param.b16 %rs1, [icmp_sle_i16_param_0];
+; CHECK-NEXT: ld.param.b16 %rs2, [icmp_sle_i16_param_1];
+; CHECK-NEXT: setp.le.s16 %p1, %rs1, %rs2;
+; CHECK-NEXT: selp.b32 %r1, 1, 0, %p1;
+; CHECK-NEXT: st.param.b32 [func_retval0], %r1;
+; CHECK-NEXT: ret;
%cmp = icmp sle i16 %a, %b
%ret = zext i1 %cmp to i16
ret i16 %ret
@@ -290,9 +571,19 @@ define i16 @icmp_sle_i16(i16 %a, i16 %b) {
define i8 @icmp_eq_i8(i8 %a, i8 %b) {
; Comparison happens in 16-bit
-; CHECK: setp.eq.b16 %p[[P0:[0-9]+]], %rs{{[0-9]+}}, %rs{{[0-9]+}}
-; CHECK: selp.b32 %r{{[0-9]+}}, 1, 0, %p[[P0]]
-; CHECK: ret
+; CHECK-LABEL: icmp_eq_i8(
+; CHECK: {
+; CHECK-NEXT: .reg .pred %p<2>;
+; CHECK-NEXT: .reg .b16 %rs<3>;
+; CHECK-NEXT: .reg .b32 %r<2>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: ld.param.b8 %rs1, [icmp_eq_i8_param_0];
+; CHECK-NEXT: ld.param.b8 %rs2, [icmp_eq_i8_param_1];
+; CHECK-NEXT: setp.eq.b16 %p1, %rs1, %rs2;
+; CHECK-NEXT: selp.b32 %r1, 1, 0, %p1;
+; CHECK-NEXT: st.param.b32 [func_retval0], %r1;
+; CHECK-NEXT: ret;
%cmp = icmp eq i8 %a, %b
%ret = zext i1 %cmp to i8
ret i8 %ret
@@ -300,9 +591,19 @@ define i8 @icmp_eq_i8(i8 %a, i8 %b) {
define i8 @icmp_ne_i8(i8 %a, i8 %b) {
; Comparison happens in 16-bit
-; CHECK: setp.ne.b16 %p[[P0:[0-9]+]], %rs{{[0-9]+}}, %rs{{[0-9]+}}
-; CHECK: selp.b32 %r{{[0-9]+}}, 1, 0, %p[[P0]]
-; CHECK: ret
+; CHECK-LABEL: icmp_ne_i8(
+; CHECK: {
+; CHECK-NEXT: .reg .pred %p<2>;
+; CHECK-NEXT: .reg .b16 %rs<3>;
+; CHECK-NEXT: .reg .b32 %r<2>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: ld.param.b8 %rs1, [icmp_ne_i8_param_0];
+; CHECK-NEXT: ld.param.b8 %rs2, [icmp_ne_i8_param_1];
+; CHECK-NEXT: setp.ne.b16 %p1, %rs1, %rs2;
+; CHECK-NEXT: selp.b32 %r1, 1, 0, %p1;
+; CHECK-NEXT: st.param.b32 [func_retval0], %r1;
+; CHECK-NEXT: ret;
%cmp = icmp ne i8 %a, %b
%ret = zext i1 %cmp to i8
ret i8 %ret
@@ -310,9 +611,19 @@ define i8 @icmp_ne_i8(i8 %a, i8 %b) {
define i8 @icmp_ugt_i8(i8 %a, i8 %b) {
; Comparison happens in 16-bit
-; CHECK: setp.gt.u16 %p[[P0:[0-9]+]], %rs{{[0-9]+}}, %rs{{[0-9]+}}
-; CHECK: selp.b32 %r{{[0-9]+}}, 1, 0, %p[[P0]]
-; CHECK: ret
+; CHECK-LABEL: icmp_ugt_i8(
+; CHECK: {
+; CHECK-NEXT: .reg .pred %p<2>;
+; CHECK-NEXT: .reg .b16 %rs<3>;
+; CHECK-NEXT: .reg .b32 %r<2>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: ld.param.b8 %rs1, [icmp_ugt_i8_param_0];
+; CHECK-NEXT: ld.param.b8 %rs2, [icmp_ugt_i8_param_1];
+; CHECK-NEXT: setp.gt.u16 %p1, %rs1, %rs2;
+; CHECK-NEXT: selp.b32 %r1, 1, 0, %p1;
+; CHECK-NEXT: st.param.b32 [func_retval0], %r1;
+; CHECK-NEXT: ret;
%cmp = icmp ugt i8 %a, %b
%ret = zext i1 %cmp to i8
ret i8 %ret
@@ -320,9 +631,19 @@ define i8 @icmp_ugt_i8(i8 %a, i8 %b) {
define i8 @icmp_uge_i8(i8 %a, i8 %b) {
; Comparison happens in 16-bit
-; CHECK: setp.ge.u16 %p[[P0:[0-9]+]], %rs{{[0-9]+}}, %rs{{[0-9]+}}
-; CHECK: selp.b32 %r{{[0-9]+}}, 1, 0, %p[[P0]]
-; CHECK: ret
+; CHECK-LABEL: icmp_uge_i8(
+; CHECK: {
+; CHECK-NEXT: .reg .pred %p<2>;
+; CHECK-NEXT: .reg .b16 %rs<3>;
+; CHECK-NEXT: .reg .b32 %r<2>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: ld.param.b8 %rs1, [icmp_uge_i8_param_0];
+; CHECK-NEXT: ld.param.b8 %rs2, [icmp_uge_i8_param_1];
+; CHECK-NEXT: setp.ge.u16 %p1, %rs1, %rs2;
+; CHECK-NEXT: selp.b32 %r1, 1, 0, %p1;
+; CHECK-NEXT: st.param.b32 [func_retval0], %r1;
+; CHECK-NEXT: ret;
%cmp = icmp uge i8 %a, %b
%ret = zext i1 %cmp to i8
ret i8 %ret
@@ -330,9 +651,19 @@ define i8 @icmp_uge_i8(i8 %a, i8 %b) {
define i8 @icmp_ult_i8(i8 %a, i8 %b) {
; Comparison happens in 16-bit
-; CHECK: setp.lt.u16 %p[[P0:[0-9]+]], %rs{{[0-9]+}}, %rs{{[0-9]+}}
-; CHECK: selp.b32 %r{{[0-9]+}}, 1, 0, %p[[P0]]
-; CHECK: ret
+; CHECK-LABEL: icmp_ult_i8(
+; CHECK: {
+; CHECK-NEXT: .reg .pred %p<2>;
+; CHECK-NEXT: .reg .b16 %rs<3>;
+; CHECK-NEXT: .reg .b32 %r<2>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: ld.param.b8 %rs1, [icmp_ult_i8_param_0];
+; CHECK-NEXT: ld.param.b8 %rs2, [icmp_ult_i8_param_1];
+; CHECK-NEXT: setp.lt.u16 %p1, %rs1, %rs2;
+; CHECK-NEXT: selp.b32 %r1, 1, 0, %p1;
+; CHECK-NEXT: st.param.b32 [func_retval0], %r1;
+; CHECK-NEXT: ret;
%cmp = icmp ult i8 %a, %b
%ret = zext i1 %cmp to i8
ret i8 %ret
@@ -340,9 +671,19 @@ define i8 @icmp_ult_i8(i8 %a, i8 %b) {
define i8 @icmp_ule_i8(i8 %a, i8 %b) {
; Comparison happens in 16-bit
-; CHECK: setp.le.u16 %p[[P0:[0-9]+]], %rs{{[0-9]+}}, %rs{{[0-9]+}}
-; CHECK: selp.b32 %r{{[0-9]+}}, 1, 0, %p[[P0]]
-; CHECK: ret
+; CHECK-LABEL: icmp_ule_i8(
+; CHECK: {
+; CHECK-NEXT: .reg .pred %p<2>;
+; CHECK-NEXT: .reg .b16 %rs<3>;
+; CHECK-NEXT: .reg .b32 %r<2>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: ld.param.b8 %rs1, [icmp_ule_i8_param_0];
+; CHECK-NEXT: ld.param.b8 %rs2, [icmp_ule_i8_param_1];
+; CHECK-NEXT: setp.le.u16 %p1, %rs1, %rs2;
+; CHECK-NEXT: selp.b32 %r1, 1, 0, %p1;
+; CHECK-NEXT: st.param.b32 [func_retval0], %r1;
+; CHECK-NEXT: ret;
%cmp = icmp ule i8 %a, %b
%ret = zext i1 %cmp to i8
ret i8 %ret
@@ -350,9 +691,19 @@ define i8 @icmp_ule_i8(i8 %a, i8 %b) {
define i8 @icmp_sgt_i8(i8 %a, i8 %b) {
; Comparison happens in 16-bit
-; CHECK: setp.gt.s16 %p[[P0:[0-9]+]], %rs{{[0-9]+}}, %rs{{[0-9]+}}
-; CHECK: selp.b32 %r{{[0-9]+}}, 1, 0, %p[[P0]]
-; CHECK: ret
+; CHECK-LABEL: icmp_sgt_i8(
+; CHECK: {
+; CHECK-NEXT: .reg .pred %p<2>;
+; CHECK-NEXT: .reg .b16 %rs<3>;
+; CHECK-NEXT: .reg .b32 %r<2>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: ld.param.s8 %rs1, [icmp_sgt_i8_param_0];
+; CHECK-NEXT: ld.param.s8 %rs2, [icmp_sgt_i8_param_1];
+; CHECK-NEXT: setp.gt.s16 %p1, %rs1, %rs2;
+; CHECK-NEXT: selp.b32 %r1, 1, 0, %p1;
+; CHECK-NEXT: st.param.b32 [func_retval0], %r1;
+; CHECK-NEXT: ret;
%cmp = icmp sgt i8 %a, %b
%ret = zext i1 %cmp to i8
ret i8 %ret
@@ -360,9 +711,19 @@ define i8 @icmp_sgt_i8(i8 %a, i8 %b) {
define i8 @icmp_sge_i8(i8 %a, i8 %b) {
; Comparison happens in 16-bit
-; CHECK: setp.ge.s16 %p[[P0:[0-9]+]], %rs{{[0-9]+}}, %rs{{[0-9]+}}
-; CHECK: selp.b32 %r{{[0-9]+}}, 1, 0, %p[[P0]]
-; CHECK: ret
+; CHECK-LABEL: icmp_sge_i8(
+; CHECK: {
+; CHECK-NEXT: .reg .pred %p<2>;
+; CHECK-NEXT: .reg .b16 %rs<3>;
+; CHECK-NEXT: .reg .b32 %r<2>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: ld.param.s8 %rs1, [icmp_sge_i8_param_0];
+; CHECK-NEXT: ld.param.s8 %rs2, [icmp_sge_i8_param_1];
+; CHECK-NEXT: setp.ge.s16 %p1, %rs1, %rs2;
+; CHECK-NEXT: selp.b32 %r1, 1, 0, %p1;
+; CHECK-NEXT: st.param.b32 [func_retval0], %r1;
+; CHECK-NEXT: ret;
%cmp = icmp sge i8 %a, %b
%ret = zext i1 %cmp to i8
ret i8 %ret
@@ -370,9 +731,19 @@ define i8 @icmp_sge_i8(i8 %a, i8 %b) {
define i8 @icmp_slt_i8(i8 %a, i8 %b) {
; Comparison happens in 16-bit
-; CHECK: setp.lt.s16 %p[[P0:[0-9]+]], %rs{{[0-9]+}}, %rs{{[0-9]+}}
-; CHECK: selp.b32 %r{{[0-9]+}}, 1, 0, %p[[P0]]
-; CHECK: ret
+; CHECK-LABEL: icmp_slt_i8(
+; CHECK: {
+; CHECK-NEXT: .reg .pred %p<2>;
+; CHECK-NEXT: .reg .b16 %rs<3>;
+; CHECK-NEXT: .reg .b32 %r<2>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: ld.param.s8 %rs1, [icmp_slt_i8_param_0];
+; CHECK-NEXT: ld.param.s8 %rs2, [icmp_slt_i8_param_1];
+; CHECK-NEXT: setp.lt.s16 %p1, %rs1, %rs2;
+; CHECK-NEXT: selp.b32 %r1, 1, 0, %p1;
+; CHECK-NEXT: st.param.b32 [func_retval0], %r1;
+; CHECK-NEXT: ret;
%cmp = icmp slt i8 %a, %b
%ret = zext i1 %cmp to i8
ret i8 %ret
@@ -380,9 +751,19 @@ define i8 @icmp_slt_i8(i8 %a, i8 %b) {
define i8 @icmp_sle_i8(i8 %a, i8 %b) {
; Comparison happens in 16-bit
-; CHECK: setp.le.s16 %p[[P0:[0-9]+]], %rs{{[0-9]+}}, %rs{{[0-9]+}}
-; CHECK: selp.b32 %r{{[0-9]+}}, 1, 0, %p[[P0]]
-; CHECK: ret
+; CHECK-LABEL: icmp_sle_i8(
+; CHECK: {
+; CHECK-NEXT: .reg .pred %p<2>;
+; CHECK-NEXT: .reg .b16 %rs<3>;
+; CHECK-NEXT: .reg .b32 %r<2>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: ld.param.s8 %rs1, [icmp_sle_i8_param_0];
+; CHECK-NEXT: ld.param.s8 %rs2, [icmp_sle_i8_param_1];
+; CHECK-NEXT: setp.le.s16 %p1, %rs1, %rs2;
+; CHECK-NEXT: selp.b32 %r1, 1, 0, %p1;
+; CHECK-NEXT: st.param.b32 [func_retval0], %r1;
+; CHECK-NEXT: ret;
%cmp = icmp sle i8 %a, %b
%ret = zext i1 %cmp to i8
ret i8 %ret
diff --git a/llvm/test/CodeGen/NVPTX/convert-call-to-indirect.ll b/llvm/test/CodeGen/NVPTX/convert-call-to-indirect.ll
index d1b478d..48209a8 100644
--- a/llvm/test/CodeGen/NVPTX/convert-call-to-indirect.ll
+++ b/llvm/test/CodeGen/NVPTX/convert-call-to-indirect.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc < %s -march=nvptx64 -mcpu=sm_90 | FileCheck %s
; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_90 | %ptxas-verify -arch=sm_90 %}
@@ -7,52 +8,203 @@ declare i64 @callee_variadic(ptr %p, ...);
define %struct.64 @test_return_type_mismatch(ptr %p) {
; CHECK-LABEL: test_return_type_mismatch(
-; CHECK: .param .align 1 .b8 retval0[8];
+; CHECK: {
+; CHECK-NEXT: .reg .b64 %rd<40>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: ld.param.b64 %rd2, [test_return_type_mismatch_param_0];
+; CHECK-NEXT: { // callseq 0, 0
+; CHECK-NEXT: .param .b64 param0;
+; CHECK-NEXT: .param .align 1 .b8 retval0[8];
+; CHECK-NEXT: st.param.b64 [param0], %rd2;
; CHECK-NEXT: prototype_0 : .callprototype (.param .align 1 .b8 _[8]) _ (.param .b64 _);
-; CHECK-NEXT: call (retval0), %rd{{[0-9]+}}, (param0), prototype_0;
+; CHECK-NEXT: mov.b64 %rd1, callee;
+; CHECK-NEXT: call (retval0), %rd1, (param0), prototype_0;
+; CHECK-NEXT: ld.param.b8 %rd3, [retval0+7];
+; CHECK-NEXT: ld.param.b8 %rd4, [retval0+6];
+; CHECK-NEXT: ld.param.b8 %rd5, [retval0+5];
+; CHECK-NEXT: ld.param.b8 %rd6, [retval0+4];
+; CHECK-NEXT: ld.param.b8 %rd7, [retval0+3];
+; CHECK-NEXT: ld.param.b8 %rd8, [retval0+2];
+; CHECK-NEXT: ld.param.b8 %rd9, [retval0+1];
+; CHECK-NEXT: ld.param.b8 %rd10, [retval0];
+; CHECK-NEXT: } // callseq 0
+; CHECK-NEXT: shl.b64 %rd13, %rd9, 8;
+; CHECK-NEXT: or.b64 %rd14, %rd13, %rd10;
+; CHECK-NEXT: shl.b64 %rd16, %rd8, 16;
+; CHECK-NEXT: shl.b64 %rd18, %rd7, 24;
+; CHECK-NEXT: or.b64 %rd19, %rd18, %rd16;
+; CHECK-NEXT: or.b64 %rd20, %rd19, %rd14;
+; CHECK-NEXT: shl.b64 %rd23, %rd5, 8;
+; CHECK-NEXT: or.b64 %rd24, %rd23, %rd6;
+; CHECK-NEXT: shl.b64 %rd26, %rd4, 16;
+; CHECK-NEXT: shl.b64 %rd28, %rd3, 24;
+; CHECK-NEXT: or.b64 %rd29, %rd28, %rd26;
+; CHECK-NEXT: or.b64 %rd30, %rd29, %rd24;
+; CHECK-NEXT: shl.b64 %rd31, %rd30, 32;
+; CHECK-NEXT: or.b64 %rd32, %rd31, %rd20;
+; CHECK-NEXT: st.param.b8 [func_retval0], %rd10;
+; CHECK-NEXT: shr.u64 %rd33, %rd32, 56;
+; CHECK-NEXT: st.param.b8 [func_retval0+7], %rd33;
+; CHECK-NEXT: shr.u64 %rd34, %rd32, 48;
+; CHECK-NEXT: st.param.b8 [func_retval0+6], %rd34;
+; CHECK-NEXT: shr.u64 %rd35, %rd32, 40;
+; CHECK-NEXT: st.param.b8 [func_retval0+5], %rd35;
+; CHECK-NEXT: shr.u64 %rd36, %rd32, 32;
+; CHECK-NEXT: st.param.b8 [func_retval0+4], %rd36;
+; CHECK-NEXT: shr.u64 %rd37, %rd32, 24;
+; CHECK-NEXT: st.param.b8 [func_retval0+3], %rd37;
+; CHECK-NEXT: shr.u64 %rd38, %rd32, 16;
+; CHECK-NEXT: st.param.b8 [func_retval0+2], %rd38;
+; CHECK-NEXT: shr.u64 %rd39, %rd32, 8;
+; CHECK-NEXT: st.param.b8 [func_retval0+1], %rd39;
+; CHECK-NEXT: ret;
%ret = call %struct.64 @callee(ptr %p)
ret %struct.64 %ret
}
define i64 @test_param_type_mismatch(ptr %p) {
; CHECK-LABEL: test_param_type_mismatch(
-; CHECK: .param .b64 retval0;
+; CHECK: {
+; CHECK-NEXT: .reg .b64 %rd<4>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: { // callseq 1, 0
+; CHECK-NEXT: .param .b64 param0;
+; CHECK-NEXT: .param .b64 retval0;
; CHECK-NEXT: prototype_1 : .callprototype (.param .b64 _) _ (.param .b64 _);
-; CHECK-NEXT: call (retval0), %rd{{[0-9]+}}, (param0), prototype_1;
+; CHECK-NEXT: st.param.b64 [param0], 7;
+; CHECK-NEXT: mov.b64 %rd1, callee;
+; CHECK-NEXT: call (retval0), %rd1, (param0), prototype_1;
+; CHECK-NEXT: ld.param.b64 %rd2, [retval0];
+; CHECK-NEXT: } // callseq 1
+; CHECK-NEXT: st.param.b64 [func_retval0], %rd2;
+; CHECK-NEXT: ret;
%ret = call i64 @callee(i64 7)
ret i64 %ret
}
define i64 @test_param_count_mismatch(ptr %p) {
; CHECK-LABEL: test_param_count_mismatch(
-; CHECK: .param .b64 retval0;
+; CHECK: {
+; CHECK-NEXT: .reg .b64 %rd<5>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: ld.param.b64 %rd2, [test_param_count_mismatch_param_0];
+; CHECK-NEXT: { // callseq 2, 0
+; CHECK-NEXT: .param .b64 param0;
+; CHECK-NEXT: .param .b64 param1;
+; CHECK-NEXT: .param .b64 retval0;
+; CHECK-NEXT: st.param.b64 [param0], %rd2;
; CHECK-NEXT: prototype_2 : .callprototype (.param .b64 _) _ (.param .b64 _, .param .b64 _);
-; CHECK-NEXT: call (retval0), %rd{{[0-9]+}}, (param0, param1), prototype_2;
+; CHECK-NEXT: st.param.b64 [param1], 7;
+; CHECK-NEXT: mov.b64 %rd1, callee;
+; CHECK-NEXT: call (retval0), %rd1, (param0, param1), prototype_2;
+; CHECK-NEXT: ld.param.b64 %rd3, [retval0];
+; CHECK-NEXT: } // callseq 2
+; CHECK-NEXT: st.param.b64 [func_retval0], %rd3;
+; CHECK-NEXT: ret;
%ret = call i64 @callee(ptr %p, i64 7)
ret i64 %ret
}
define %struct.64 @test_return_type_mismatch_variadic(ptr %p) {
; CHECK-LABEL: test_return_type_mismatch_variadic(
-; CHECK: .param .align 1 .b8 retval0[8];
+; CHECK: {
+; CHECK-NEXT: .reg .b64 %rd<40>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: ld.param.b64 %rd2, [test_return_type_mismatch_variadic_param_0];
+; CHECK-NEXT: { // callseq 3, 0
+; CHECK-NEXT: .param .b64 param0;
+; CHECK-NEXT: .param .align 1 .b8 retval0[8];
+; CHECK-NEXT: st.param.b64 [param0], %rd2;
; CHECK-NEXT: prototype_3 : .callprototype (.param .align 1 .b8 _[8]) _ (.param .b64 _);
-; CHECK-NEXT: call (retval0), %rd{{[0-9]+}}, (param0), prototype_3;
+; CHECK-NEXT: mov.b64 %rd1, callee_variadic;
+; CHECK-NEXT: call (retval0), %rd1, (param0), prototype_3;
+; CHECK-NEXT: ld.param.b8 %rd3, [retval0+7];
+; CHECK-NEXT: ld.param.b8 %rd4, [retval0+6];
+; CHECK-NEXT: ld.param.b8 %rd5, [retval0+5];
+; CHECK-NEXT: ld.param.b8 %rd6, [retval0+4];
+; CHECK-NEXT: ld.param.b8 %rd7, [retval0+3];
+; CHECK-NEXT: ld.param.b8 %rd8, [retval0+2];
+; CHECK-NEXT: ld.param.b8 %rd9, [retval0+1];
+; CHECK-NEXT: ld.param.b8 %rd10, [retval0];
+; CHECK-NEXT: } // callseq 3
+; CHECK-NEXT: shl.b64 %rd13, %rd9, 8;
+; CHECK-NEXT: or.b64 %rd14, %rd13, %rd10;
+; CHECK-NEXT: shl.b64 %rd16, %rd8, 16;
+; CHECK-NEXT: shl.b64 %rd18, %rd7, 24;
+; CHECK-NEXT: or.b64 %rd19, %rd18, %rd16;
+; CHECK-NEXT: or.b64 %rd20, %rd19, %rd14;
+; CHECK-NEXT: shl.b64 %rd23, %rd5, 8;
+; CHECK-NEXT: or.b64 %rd24, %rd23, %rd6;
+; CHECK-NEXT: shl.b64 %rd26, %rd4, 16;
+; CHECK-NEXT: shl.b64 %rd28, %rd3, 24;
+; CHECK-NEXT: or.b64 %rd29, %rd28, %rd26;
+; CHECK-NEXT: or.b64 %rd30, %rd29, %rd24;
+; CHECK-NEXT: shl.b64 %rd31, %rd30, 32;
+; CHECK-NEXT: or.b64 %rd32, %rd31, %rd20;
+; CHECK-NEXT: st.param.b8 [func_retval0], %rd10;
+; CHECK-NEXT: shr.u64 %rd33, %rd32, 56;
+; CHECK-NEXT: st.param.b8 [func_retval0+7], %rd33;
+; CHECK-NEXT: shr.u64 %rd34, %rd32, 48;
+; CHECK-NEXT: st.param.b8 [func_retval0+6], %rd34;
+; CHECK-NEXT: shr.u64 %rd35, %rd32, 40;
+; CHECK-NEXT: st.param.b8 [func_retval0+5], %rd35;
+; CHECK-NEXT: shr.u64 %rd36, %rd32, 32;
+; CHECK-NEXT: st.param.b8 [func_retval0+4], %rd36;
+; CHECK-NEXT: shr.u64 %rd37, %rd32, 24;
+; CHECK-NEXT: st.param.b8 [func_retval0+3], %rd37;
+; CHECK-NEXT: shr.u64 %rd38, %rd32, 16;
+; CHECK-NEXT: st.param.b8 [func_retval0+2], %rd38;
+; CHECK-NEXT: shr.u64 %rd39, %rd32, 8;
+; CHECK-NEXT: st.param.b8 [func_retval0+1], %rd39;
+; CHECK-NEXT: ret;
%ret = call %struct.64 (ptr, ...) @callee_variadic(ptr %p)
ret %struct.64 %ret
}
define i64 @test_param_type_mismatch_variadic(ptr %p) {
; CHECK-LABEL: test_param_type_mismatch_variadic(
-; CHECK: .param .b64 retval0;
+; CHECK: {
+; CHECK-NEXT: .reg .b64 %rd<4>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: ld.param.b64 %rd1, [test_param_type_mismatch_variadic_param_0];
+; CHECK-NEXT: { // callseq 4, 0
+; CHECK-NEXT: .param .align 8 .b8 param1[8];
+; CHECK-NEXT: .param .b64 param0;
+; CHECK-NEXT: .param .b64 retval0;
+; CHECK-NEXT: st.param.b64 [param0], %rd1;
+; CHECK-NEXT: st.param.b64 [param1], 7;
; CHECK-NEXT: call.uni (retval0), callee_variadic, (param0, param1);
+; CHECK-NEXT: ld.param.b64 %rd2, [retval0];
+; CHECK-NEXT: } // callseq 4
+; CHECK-NEXT: st.param.b64 [func_retval0], %rd2;
+; CHECK-NEXT: ret;
%ret = call i64 (ptr, ...) @callee_variadic(ptr %p, i64 7)
ret i64 %ret
}
define i64 @test_param_count_mismatch_variadic(ptr %p) {
; CHECK-LABEL: test_param_count_mismatch_variadic(
-; CHECK: .param .b64 retval0;
+; CHECK: {
+; CHECK-NEXT: .reg .b64 %rd<4>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: ld.param.b64 %rd1, [test_param_count_mismatch_variadic_param_0];
+; CHECK-NEXT: { // callseq 5, 0
+; CHECK-NEXT: .param .align 8 .b8 param1[8];
+; CHECK-NEXT: .param .b64 param0;
+; CHECK-NEXT: .param .b64 retval0;
+; CHECK-NEXT: st.param.b64 [param0], %rd1;
+; CHECK-NEXT: st.param.b64 [param1], 7;
; CHECK-NEXT: call.uni (retval0), callee_variadic, (param0, param1);
+; CHECK-NEXT: ld.param.b64 %rd2, [retval0];
+; CHECK-NEXT: } // callseq 5
+; CHECK-NEXT: st.param.b64 [func_retval0], %rd2;
+; CHECK-NEXT: ret;
%ret = call i64 (ptr, ...) @callee_variadic(ptr %p, i64 7)
ret i64 %ret
}
diff --git a/llvm/test/CodeGen/NVPTX/dynamic_stackalloc.ll b/llvm/test/CodeGen/NVPTX/dynamic_stackalloc.ll
index 4d2ba7d..06fb8d2 100644
--- a/llvm/test/CodeGen/NVPTX/dynamic_stackalloc.ll
+++ b/llvm/test/CodeGen/NVPTX/dynamic_stackalloc.ll
@@ -22,8 +22,8 @@ define i32 @test_dynamic_stackalloc(i64 %n) {
; CHECK-32-NEXT: cvta.local.u32 %r5, %r4;
; CHECK-32-NEXT: { // callseq 0, 0
; CHECK-32-NEXT: .param .b32 param0;
-; CHECK-32-NEXT: st.param.b32 [param0], %r5;
; CHECK-32-NEXT: .param .b32 retval0;
+; CHECK-32-NEXT: st.param.b32 [param0], %r5;
; CHECK-32-NEXT: call.uni (retval0), bar, (param0);
; CHECK-32-NEXT: ld.param.b32 %r6, [retval0];
; CHECK-32-NEXT: } // callseq 0
@@ -43,8 +43,8 @@ define i32 @test_dynamic_stackalloc(i64 %n) {
; CHECK-64-NEXT: cvta.local.u64 %rd5, %rd4;
; CHECK-64-NEXT: { // callseq 0, 0
; CHECK-64-NEXT: .param .b64 param0;
-; CHECK-64-NEXT: st.param.b64 [param0], %rd5;
; CHECK-64-NEXT: .param .b32 retval0;
+; CHECK-64-NEXT: st.param.b64 [param0], %rd5;
; CHECK-64-NEXT: call.uni (retval0), bar, (param0);
; CHECK-64-NEXT: ld.param.b32 %r1, [retval0];
; CHECK-64-NEXT: } // callseq 0
diff --git a/llvm/test/CodeGen/NVPTX/f16x2-instructions.ll b/llvm/test/CodeGen/NVPTX/f16x2-instructions.ll
index 8918fbd..d4fcea3 100644
--- a/llvm/test/CodeGen/NVPTX/f16x2-instructions.ll
+++ b/llvm/test/CodeGen/NVPTX/f16x2-instructions.ll
@@ -462,10 +462,10 @@ define <2 x half> @test_call(<2 x half> %a, <2 x half> %b) #0 {
; CHECK-NEXT: ld.param.b32 %r1, [test_call_param_0];
; CHECK-NEXT: { // callseq 0, 0
; CHECK-NEXT: .param .align 4 .b8 param0[4];
-; CHECK-NEXT: st.param.b32 [param0], %r1;
; CHECK-NEXT: .param .align 4 .b8 param1[4];
-; CHECK-NEXT: st.param.b32 [param1], %r2;
; CHECK-NEXT: .param .align 4 .b8 retval0[4];
+; CHECK-NEXT: st.param.b32 [param1], %r2;
+; CHECK-NEXT: st.param.b32 [param0], %r1;
; CHECK-NEXT: call.uni (retval0), test_callee, (param0, param1);
; CHECK-NEXT: ld.param.b32 %r3, [retval0];
; CHECK-NEXT: } // callseq 0
@@ -485,10 +485,10 @@ define <2 x half> @test_call_flipped(<2 x half> %a, <2 x half> %b) #0 {
; CHECK-NEXT: ld.param.b32 %r1, [test_call_flipped_param_0];
; CHECK-NEXT: { // callseq 1, 0
; CHECK-NEXT: .param .align 4 .b8 param0[4];
-; CHECK-NEXT: st.param.b32 [param0], %r2;
; CHECK-NEXT: .param .align 4 .b8 param1[4];
-; CHECK-NEXT: st.param.b32 [param1], %r1;
; CHECK-NEXT: .param .align 4 .b8 retval0[4];
+; CHECK-NEXT: st.param.b32 [param1], %r1;
+; CHECK-NEXT: st.param.b32 [param0], %r2;
; CHECK-NEXT: call.uni (retval0), test_callee, (param0, param1);
; CHECK-NEXT: ld.param.b32 %r3, [retval0];
; CHECK-NEXT: } // callseq 1
@@ -508,10 +508,10 @@ define <2 x half> @test_tailcall_flipped(<2 x half> %a, <2 x half> %b) #0 {
; CHECK-NEXT: ld.param.b32 %r1, [test_tailcall_flipped_param_0];
; CHECK-NEXT: { // callseq 2, 0
; CHECK-NEXT: .param .align 4 .b8 param0[4];
-; CHECK-NEXT: st.param.b32 [param0], %r2;
; CHECK-NEXT: .param .align 4 .b8 param1[4];
-; CHECK-NEXT: st.param.b32 [param1], %r1;
; CHECK-NEXT: .param .align 4 .b8 retval0[4];
+; CHECK-NEXT: st.param.b32 [param1], %r1;
+; CHECK-NEXT: st.param.b32 [param0], %r2;
; CHECK-NEXT: call.uni (retval0), test_callee, (param0, param1);
; CHECK-NEXT: ld.param.b32 %r3, [retval0];
; CHECK-NEXT: } // callseq 2
diff --git a/llvm/test/CodeGen/NVPTX/f32x2-instructions.ll b/llvm/test/CodeGen/NVPTX/f32x2-instructions.ll
index 30afd69..b84a0ec 100644
--- a/llvm/test/CodeGen/NVPTX/f32x2-instructions.ll
+++ b/llvm/test/CodeGen/NVPTX/f32x2-instructions.ll
@@ -859,10 +859,10 @@ define <2 x float> @test_call(<2 x float> %a, <2 x float> %b) #0 {
; CHECK-NEXT: ld.param.b64 %rd1, [test_call_param_0];
; CHECK-NEXT: { // callseq 0, 0
; CHECK-NEXT: .param .align 8 .b8 param0[8];
-; CHECK-NEXT: st.param.b64 [param0], %rd1;
; CHECK-NEXT: .param .align 8 .b8 param1[8];
-; CHECK-NEXT: st.param.b64 [param1], %rd2;
; CHECK-NEXT: .param .align 8 .b8 retval0[8];
+; CHECK-NEXT: st.param.b64 [param1], %rd2;
+; CHECK-NEXT: st.param.b64 [param0], %rd1;
; CHECK-NEXT: call.uni (retval0), test_callee, (param0, param1);
; CHECK-NEXT: ld.param.b64 %rd3, [retval0];
; CHECK-NEXT: } // callseq 0
@@ -882,10 +882,10 @@ define <2 x float> @test_call_flipped(<2 x float> %a, <2 x float> %b) #0 {
; CHECK-NEXT: ld.param.b64 %rd1, [test_call_flipped_param_0];
; CHECK-NEXT: { // callseq 1, 0
; CHECK-NEXT: .param .align 8 .b8 param0[8];
-; CHECK-NEXT: st.param.b64 [param0], %rd2;
; CHECK-NEXT: .param .align 8 .b8 param1[8];
-; CHECK-NEXT: st.param.b64 [param1], %rd1;
; CHECK-NEXT: .param .align 8 .b8 retval0[8];
+; CHECK-NEXT: st.param.b64 [param1], %rd1;
+; CHECK-NEXT: st.param.b64 [param0], %rd2;
; CHECK-NEXT: call.uni (retval0), test_callee, (param0, param1);
; CHECK-NEXT: ld.param.b64 %rd3, [retval0];
; CHECK-NEXT: } // callseq 1
@@ -905,10 +905,10 @@ define <2 x float> @test_tailcall_flipped(<2 x float> %a, <2 x float> %b) #0 {
; CHECK-NEXT: ld.param.b64 %rd1, [test_tailcall_flipped_param_0];
; CHECK-NEXT: { // callseq 2, 0
; CHECK-NEXT: .param .align 8 .b8 param0[8];
-; CHECK-NEXT: st.param.b64 [param0], %rd2;
; CHECK-NEXT: .param .align 8 .b8 param1[8];
-; CHECK-NEXT: st.param.b64 [param1], %rd1;
; CHECK-NEXT: .param .align 8 .b8 retval0[8];
+; CHECK-NEXT: st.param.b64 [param1], %rd1;
+; CHECK-NEXT: st.param.b64 [param0], %rd2;
; CHECK-NEXT: call.uni (retval0), test_callee, (param0, param1);
; CHECK-NEXT: ld.param.b64 %rd3, [retval0];
; CHECK-NEXT: } // callseq 2
diff --git a/llvm/test/CodeGen/NVPTX/fma.ll b/llvm/test/CodeGen/NVPTX/fma.ll
index 5aa12b0..87274aa 100644
--- a/llvm/test/CodeGen/NVPTX/fma.ll
+++ b/llvm/test/CodeGen/NVPTX/fma.ll
@@ -36,10 +36,10 @@ define ptx_device float @t2_f32(float %x, float %y, float %z, float %w) {
; CHECK-NEXT: fma.rn.f32 %r6, %r1, %r2, %r5;
; CHECK-NEXT: { // callseq 0, 0
; CHECK-NEXT: .param .b32 param0;
-; CHECK-NEXT: st.param.b32 [param0], %r4;
; CHECK-NEXT: .param .b32 param1;
-; CHECK-NEXT: st.param.b32 [param1], %r6;
; CHECK-NEXT: .param .b32 retval0;
+; CHECK-NEXT: st.param.b32 [param1], %r6;
+; CHECK-NEXT: st.param.b32 [param0], %r4;
; CHECK-NEXT: call.uni (retval0), dummy_f32, (param0, param1);
; CHECK-NEXT: ld.param.b32 %r7, [retval0];
; CHECK-NEXT: } // callseq 0
@@ -83,10 +83,10 @@ define ptx_device double @t2_f64(double %x, double %y, double %z, double %w) {
; CHECK-NEXT: fma.rn.f64 %rd6, %rd1, %rd2, %rd5;
; CHECK-NEXT: { // callseq 1, 0
; CHECK-NEXT: .param .b64 param0;
-; CHECK-NEXT: st.param.b64 [param0], %rd4;
; CHECK-NEXT: .param .b64 param1;
-; CHECK-NEXT: st.param.b64 [param1], %rd6;
; CHECK-NEXT: .param .b64 retval0;
+; CHECK-NEXT: st.param.b64 [param1], %rd6;
+; CHECK-NEXT: st.param.b64 [param0], %rd4;
; CHECK-NEXT: call.uni (retval0), dummy_f64, (param0, param1);
; CHECK-NEXT: ld.param.b64 %rd7, [retval0];
; CHECK-NEXT: } // callseq 1
diff --git a/llvm/test/CodeGen/NVPTX/forward-ld-param.ll b/llvm/test/CodeGen/NVPTX/forward-ld-param.ll
index ed8f6b4..636e12b 100644
--- a/llvm/test/CodeGen/NVPTX/forward-ld-param.ll
+++ b/llvm/test/CodeGen/NVPTX/forward-ld-param.ll
@@ -64,9 +64,9 @@ define void @test_ld_param_byval(ptr byval(i32) %a) {
; CHECK-NEXT: .reg .b64 %rd<2>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
-; CHECK-NEXT: ld.param.b32 %r1, [test_ld_param_byval_param_0];
; CHECK-NEXT: { // callseq 1, 0
; CHECK-NEXT: .param .align 4 .b8 param0[4];
+; CHECK-NEXT: ld.param.b32 %r1, [test_ld_param_byval_param_0];
; CHECK-NEXT: st.param.b32 [param0], %r1;
; CHECK-NEXT: call.uni byval_user, (param0);
; CHECK-NEXT: } // callseq 1
diff --git a/llvm/test/CodeGen/NVPTX/i128-param.ll b/llvm/test/CodeGen/NVPTX/i128-param.ll
index 4f4c2fe..79abca0 100644
--- a/llvm/test/CodeGen/NVPTX/i128-param.ll
+++ b/llvm/test/CodeGen/NVPTX/i128-param.ll
@@ -29,11 +29,11 @@ start:
; CHECK-DAG: ld.param.v2.b64 {%[[REG2:rd[0-9]+]], %[[REG3:rd[0-9]+]]}, [caller_kernel_param_1];
; CHECK: { // callseq [[CALLSEQ_ID:[0-9]]], 0
- ; CHECK: .param .align 16 .b8 param0[16];
- ; CHECK-NEXT: st.param.v2.b64 [param0], {%[[REG0]], %[[REG1]]}
- ; CHECK: .param .align 16 .b8 param1[16];
- ; CHECK-NEXT: st.param.v2.b64 [param1], {%[[REG2]], %[[REG3]]}
- ; CHECK: } // callseq [[CALLSEQ_ID]]
+ ; CHECK-DAG: .param .align 16 .b8 param0[16];
+ ; CHECK-DAG: .param .align 16 .b8 param1[16];
+ ; CHECK-DAG: st.param.v2.b64 [param0], {%[[REG0]], %[[REG1]]}
+ ; CHECK-DAG: st.param.v2.b64 [param1], {%[[REG2]], %[[REG3]]}
+ ; CHECK: } // callseq [[CALLSEQ_ID]]
call void @callee(i128 %0, i128 %1, ptr %2)
ret void
@@ -48,11 +48,11 @@ start:
; CHECK-DAG: ld.param.v2.b64 {%[[REG2:rd[0-9]+]], %[[REG3:rd[0-9]+]]}, [caller_func_param_1]
; CHECK: { // callseq [[CALLSEQ_ID:[0-9]]], 0
- ; CHECK: .param .align 16 .b8 param0[16];
- ; CHECK: st.param.v2.b64 [param0], {%[[REG0]], %[[REG1]]}
- ; CHECK: .param .align 16 .b8 param1[16];
- ; CHECK: st.param.v2.b64 [param1], {%[[REG2]], %[[REG3]]}
- ; CHECK: } // callseq [[CALLSEQ_ID]]
+ ; CHECK-DAG: .param .align 16 .b8 param0[16];
+ ; CHECK-DAG: .param .align 16 .b8 param1[16];
+ ; CHECK-DAG: st.param.v2.b64 [param0], {%[[REG0]], %[[REG1]]}
+ ; CHECK-DAG: st.param.v2.b64 [param1], {%[[REG2]], %[[REG3]]}
+ ; CHECK: } // callseq [[CALLSEQ_ID]]
call void @callee(i128 %0, i128 %1, ptr %2)
ret void
diff --git a/llvm/test/CodeGen/NVPTX/i16x2-instructions.ll b/llvm/test/CodeGen/NVPTX/i16x2-instructions.ll
index 2b7a06c..74136bb 100644
--- a/llvm/test/CodeGen/NVPTX/i16x2-instructions.ll
+++ b/llvm/test/CodeGen/NVPTX/i16x2-instructions.ll
@@ -642,10 +642,10 @@ define <2 x i16> @test_call(<2 x i16> %a, <2 x i16> %b) #0 {
; COMMON-NEXT: ld.param.b32 %r1, [test_call_param_0];
; COMMON-NEXT: { // callseq 0, 0
; COMMON-NEXT: .param .align 4 .b8 param0[4];
-; COMMON-NEXT: st.param.b32 [param0], %r1;
; COMMON-NEXT: .param .align 4 .b8 param1[4];
-; COMMON-NEXT: st.param.b32 [param1], %r2;
; COMMON-NEXT: .param .align 4 .b8 retval0[4];
+; COMMON-NEXT: st.param.b32 [param1], %r2;
+; COMMON-NEXT: st.param.b32 [param0], %r1;
; COMMON-NEXT: call.uni (retval0), test_callee, (param0, param1);
; COMMON-NEXT: ld.param.b32 %r3, [retval0];
; COMMON-NEXT: } // callseq 0
@@ -665,10 +665,10 @@ define <2 x i16> @test_call_flipped(<2 x i16> %a, <2 x i16> %b) #0 {
; COMMON-NEXT: ld.param.b32 %r1, [test_call_flipped_param_0];
; COMMON-NEXT: { // callseq 1, 0
; COMMON-NEXT: .param .align 4 .b8 param0[4];
-; COMMON-NEXT: st.param.b32 [param0], %r2;
; COMMON-NEXT: .param .align 4 .b8 param1[4];
-; COMMON-NEXT: st.param.b32 [param1], %r1;
; COMMON-NEXT: .param .align 4 .b8 retval0[4];
+; COMMON-NEXT: st.param.b32 [param1], %r1;
+; COMMON-NEXT: st.param.b32 [param0], %r2;
; COMMON-NEXT: call.uni (retval0), test_callee, (param0, param1);
; COMMON-NEXT: ld.param.b32 %r3, [retval0];
; COMMON-NEXT: } // callseq 1
@@ -688,10 +688,10 @@ define <2 x i16> @test_tailcall_flipped(<2 x i16> %a, <2 x i16> %b) #0 {
; COMMON-NEXT: ld.param.b32 %r1, [test_tailcall_flipped_param_0];
; COMMON-NEXT: { // callseq 2, 0
; COMMON-NEXT: .param .align 4 .b8 param0[4];
-; COMMON-NEXT: st.param.b32 [param0], %r2;
; COMMON-NEXT: .param .align 4 .b8 param1[4];
-; COMMON-NEXT: st.param.b32 [param1], %r1;
; COMMON-NEXT: .param .align 4 .b8 retval0[4];
+; COMMON-NEXT: st.param.b32 [param1], %r1;
+; COMMON-NEXT: st.param.b32 [param0], %r2;
; COMMON-NEXT: call.uni (retval0), test_callee, (param0, param1);
; COMMON-NEXT: ld.param.b32 %r3, [retval0];
; COMMON-NEXT: } // callseq 2
diff --git a/llvm/test/CodeGen/NVPTX/i8x2-instructions.ll b/llvm/test/CodeGen/NVPTX/i8x2-instructions.ll
index 3edd4e4..98f94bb 100644
--- a/llvm/test/CodeGen/NVPTX/i8x2-instructions.ll
+++ b/llvm/test/CodeGen/NVPTX/i8x2-instructions.ll
@@ -1,42 +1,107 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
-; RUN: llc < %s -mtriple=nvptx64-nvidia-cuda -mcpu=sm_90 -mattr=+ptx80 \
-; RUN: -O0 -disable-post-ra -frame-pointer=all -verify-machineinstrs \
-; RUN: | FileCheck %s
-; RUN: %if ptxas %{ \
-; RUN: llc < %s -mtriple=nvptx64-nvidia-cuda -mcpu=sm_90 -asm-verbose=false \
-; RUN: -O0 -disable-post-ra -frame-pointer=all -verify-machineinstrs \
-; RUN: | %ptxas-verify -arch=sm_90 \
+; RUN: llc < %s -mcpu=sm_90 -mattr=+ptx80 -disable-post-ra -frame-pointer=all \
+; RUN: -verify-machineinstrs -O0 | FileCheck %s --check-prefixes=O0,COMMON
+; RUN: llc < %s -mcpu=sm_90 -mattr=+ptx80 -disable-post-ra -frame-pointer=all \
+; RUN: -verify-machineinstrs | FileCheck %s --check-prefixes=O3,COMMON
+; RUN: %if ptxas %{ \
+; RUN: llc < %s -mcpu=sm_90 -mattr=+ptx80 -disable-post-ra -frame-pointer=all \
+; RUN: -verify-machineinstrs -O0 \
+; RUN: | %ptxas-verify -arch=sm_90 \
+; RUN: %}
+; RUN: %if ptxas %{ \
+; RUN: llc < %s -mcpu=sm_90 -mattr=+ptx80 -disable-post-ra -frame-pointer=all \
+; RUN: -verify-machineinstrs \
+; RUN: | %ptxas-verify -arch=sm_90 \
; RUN: %}
+target triple = "nvptx64-nvidia-cuda"
target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
define i16 @test_bitcast_2xi8_i16(<2 x i8> %a) {
-; CHECK-LABEL: test_bitcast_2xi8_i16(
-; CHECK: {
-; CHECK-NEXT: .reg .b16 %rs<5>;
-; CHECK-NEXT: .reg .b32 %r<3>;
-; CHECK-EMPTY:
-; CHECK-NEXT: // %bb.0:
-; CHECK-NEXT: ld.param.v2.b8 {%rs1, %rs2}, [test_bitcast_2xi8_i16_param_0];
-; CHECK-NEXT: mov.b32 %r1, {%rs1, %rs2};
-; CHECK-NEXT: shl.b16 %rs3, %rs2, 8;
-; CHECK-NEXT: or.b16 %rs4, %rs1, %rs3;
-; CHECK-NEXT: cvt.u32.u16 %r2, %rs4;
-; CHECK-NEXT: st.param.b32 [func_retval0], %r2;
-; CHECK-NEXT: ret;
+; O0-LABEL: test_bitcast_2xi8_i16(
+; O0: {
+; O0-NEXT: .reg .b16 %rs<5>;
+; O0-NEXT: .reg .b32 %r<3>;
+; O0-EMPTY:
+; O0-NEXT: // %bb.0:
+; O0-NEXT: ld.param.v2.b8 {%rs1, %rs2}, [test_bitcast_2xi8_i16_param_0];
+; O0-NEXT: mov.b32 %r1, {%rs1, %rs2};
+; O0-NEXT: shl.b16 %rs3, %rs2, 8;
+; O0-NEXT: or.b16 %rs4, %rs1, %rs3;
+; O0-NEXT: cvt.u32.u16 %r2, %rs4;
+; O0-NEXT: st.param.b32 [func_retval0], %r2;
+; O0-NEXT: ret;
+;
+; O3-LABEL: test_bitcast_2xi8_i16(
+; O3: {
+; O3-NEXT: .reg .b32 %r<2>;
+; O3-EMPTY:
+; O3-NEXT: // %bb.0:
+; O3-NEXT: ld.param.b16 %r1, [test_bitcast_2xi8_i16_param_0];
+; O3-NEXT: st.param.b32 [func_retval0], %r1;
+; O3-NEXT: ret;
%res = bitcast <2 x i8> %a to i16
ret i16 %res
}
define <2 x i8> @test_bitcast_i16_2xi8(i16 %a) {
-; CHECK-LABEL: test_bitcast_i16_2xi8(
-; CHECK: {
-; CHECK-NEXT: .reg .b16 %rs<2>;
-; CHECK-EMPTY:
-; CHECK-NEXT: // %bb.0:
-; CHECK-NEXT: ld.param.b16 %rs1, [test_bitcast_i16_2xi8_param_0];
-; CHECK-NEXT: st.param.b16 [func_retval0], %rs1;
-; CHECK-NEXT: ret;
+; O0-LABEL: test_bitcast_i16_2xi8(
+; O0: {
+; O0-NEXT: .reg .b16 %rs<2>;
+; O0-EMPTY:
+; O0-NEXT: // %bb.0:
+; O0-NEXT: ld.param.b16 %rs1, [test_bitcast_i16_2xi8_param_0];
+; O0-NEXT: st.param.b16 [func_retval0], %rs1;
+; O0-NEXT: ret;
+;
+; O3-LABEL: test_bitcast_i16_2xi8(
+; O3: {
+; O3-NEXT: .reg .b16 %rs<2>;
+; O3-EMPTY:
+; O3-NEXT: // %bb.0:
+; O3-NEXT: ld.param.b16 %rs1, [test_bitcast_i16_2xi8_param_0];
+; O3-NEXT: st.param.b16 [func_retval0], %rs1;
+; O3-NEXT: ret;
%res = bitcast i16 %a to <2 x i8>
ret <2 x i8> %res
}
+
+define <2 x i8> @test_call_2xi8(<2 x i8> %a) {
+; O0-LABEL: test_call_2xi8(
+; O0: {
+; O0-NEXT: .reg .b16 %rs<7>;
+; O0-NEXT: .reg .b32 %r<2>;
+; O0-EMPTY:
+; O0-NEXT: // %bb.0:
+; O0-NEXT: ld.param.v2.b8 {%rs1, %rs2}, [test_call_2xi8_param_0];
+; O0-NEXT: mov.b32 %r1, {%rs1, %rs2};
+; O0-NEXT: { // callseq 0, 0
+; O0-NEXT: .param .align 2 .b8 param0[2];
+; O0-NEXT: .param .align 2 .b8 retval0[2];
+; O0-NEXT: st.param.v2.b8 [param0], {%rs1, %rs2};
+; O0-NEXT: call.uni (retval0), test_call_2xi8, (param0);
+; O0-NEXT: ld.param.v2.b8 {%rs3, %rs4}, [retval0];
+; O0-NEXT: } // callseq 0
+; O0-NEXT: st.param.v2.b8 [func_retval0], {%rs3, %rs4};
+; O0-NEXT: ret;
+;
+; O3-LABEL: test_call_2xi8(
+; O3: {
+; O3-NEXT: .reg .b16 %rs<7>;
+; O3-EMPTY:
+; O3-NEXT: // %bb.0:
+; O3-NEXT: ld.param.v2.b8 {%rs1, %rs2}, [test_call_2xi8_param_0];
+; O3-NEXT: { // callseq 0, 0
+; O3-NEXT: .param .align 2 .b8 param0[2];
+; O3-NEXT: .param .align 2 .b8 retval0[2];
+; O3-NEXT: st.param.v2.b8 [param0], {%rs1, %rs2};
+; O3-NEXT: call.uni (retval0), test_call_2xi8, (param0);
+; O3-NEXT: ld.param.v2.b8 {%rs3, %rs4}, [retval0];
+; O3-NEXT: } // callseq 0
+; O3-NEXT: st.param.v2.b8 [func_retval0], {%rs3, %rs4};
+; O3-NEXT: ret;
+ %res = call <2 x i8> @test_call_2xi8(<2 x i8> %a)
+ ret <2 x i8> %res
+}
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; COMMON: {{.*}}
diff --git a/llvm/test/CodeGen/NVPTX/i8x4-instructions.ll b/llvm/test/CodeGen/NVPTX/i8x4-instructions.ll
index da99cec..06c2cc8 100644
--- a/llvm/test/CodeGen/NVPTX/i8x4-instructions.ll
+++ b/llvm/test/CodeGen/NVPTX/i8x4-instructions.ll
@@ -1273,10 +1273,10 @@ define <4 x i8> @test_call(<4 x i8> %a, <4 x i8> %b) #0 {
; O0-NEXT: ld.param.b32 %r1, [test_call_param_0];
; O0-NEXT: { // callseq 0, 0
; O0-NEXT: .param .align 4 .b8 param0[4];
-; O0-NEXT: st.param.b32 [param0], %r1;
; O0-NEXT: .param .align 4 .b8 param1[4];
-; O0-NEXT: st.param.b32 [param1], %r2;
; O0-NEXT: .param .align 4 .b8 retval0[4];
+; O0-NEXT: st.param.b32 [param1], %r2;
+; O0-NEXT: st.param.b32 [param0], %r1;
; O0-NEXT: call.uni (retval0), test_callee, (param0, param1);
; O0-NEXT: ld.param.b32 %r3, [retval0];
; O0-NEXT: } // callseq 0
@@ -1289,13 +1289,13 @@ define <4 x i8> @test_call(<4 x i8> %a, <4 x i8> %b) #0 {
; O3-EMPTY:
; O3-NEXT: // %bb.0:
; O3-NEXT: ld.param.b32 %r1, [test_call_param_0];
-; O3-NEXT: ld.param.b32 %r2, [test_call_param_1];
; O3-NEXT: { // callseq 0, 0
; O3-NEXT: .param .align 4 .b8 param0[4];
-; O3-NEXT: st.param.b32 [param0], %r1;
; O3-NEXT: .param .align 4 .b8 param1[4];
-; O3-NEXT: st.param.b32 [param1], %r2;
; O3-NEXT: .param .align 4 .b8 retval0[4];
+; O3-NEXT: ld.param.b32 %r2, [test_call_param_1];
+; O3-NEXT: st.param.b32 [param1], %r2;
+; O3-NEXT: st.param.b32 [param0], %r1;
; O3-NEXT: call.uni (retval0), test_callee, (param0, param1);
; O3-NEXT: ld.param.b32 %r3, [retval0];
; O3-NEXT: } // callseq 0
@@ -1315,10 +1315,10 @@ define <4 x i8> @test_call_flipped(<4 x i8> %a, <4 x i8> %b) #0 {
; O0-NEXT: ld.param.b32 %r1, [test_call_flipped_param_0];
; O0-NEXT: { // callseq 1, 0
; O0-NEXT: .param .align 4 .b8 param0[4];
-; O0-NEXT: st.param.b32 [param0], %r2;
; O0-NEXT: .param .align 4 .b8 param1[4];
-; O0-NEXT: st.param.b32 [param1], %r1;
; O0-NEXT: .param .align 4 .b8 retval0[4];
+; O0-NEXT: st.param.b32 [param1], %r1;
+; O0-NEXT: st.param.b32 [param0], %r2;
; O0-NEXT: call.uni (retval0), test_callee, (param0, param1);
; O0-NEXT: ld.param.b32 %r3, [retval0];
; O0-NEXT: } // callseq 1
@@ -1331,13 +1331,13 @@ define <4 x i8> @test_call_flipped(<4 x i8> %a, <4 x i8> %b) #0 {
; O3-EMPTY:
; O3-NEXT: // %bb.0:
; O3-NEXT: ld.param.b32 %r1, [test_call_flipped_param_0];
-; O3-NEXT: ld.param.b32 %r2, [test_call_flipped_param_1];
; O3-NEXT: { // callseq 1, 0
; O3-NEXT: .param .align 4 .b8 param0[4];
-; O3-NEXT: st.param.b32 [param0], %r2;
; O3-NEXT: .param .align 4 .b8 param1[4];
-; O3-NEXT: st.param.b32 [param1], %r1;
; O3-NEXT: .param .align 4 .b8 retval0[4];
+; O3-NEXT: st.param.b32 [param1], %r1;
+; O3-NEXT: ld.param.b32 %r2, [test_call_flipped_param_1];
+; O3-NEXT: st.param.b32 [param0], %r2;
; O3-NEXT: call.uni (retval0), test_callee, (param0, param1);
; O3-NEXT: ld.param.b32 %r3, [retval0];
; O3-NEXT: } // callseq 1
@@ -1357,10 +1357,10 @@ define <4 x i8> @test_tailcall_flipped(<4 x i8> %a, <4 x i8> %b) #0 {
; O0-NEXT: ld.param.b32 %r1, [test_tailcall_flipped_param_0];
; O0-NEXT: { // callseq 2, 0
; O0-NEXT: .param .align 4 .b8 param0[4];
-; O0-NEXT: st.param.b32 [param0], %r2;
; O0-NEXT: .param .align 4 .b8 param1[4];
-; O0-NEXT: st.param.b32 [param1], %r1;
; O0-NEXT: .param .align 4 .b8 retval0[4];
+; O0-NEXT: st.param.b32 [param1], %r1;
+; O0-NEXT: st.param.b32 [param0], %r2;
; O0-NEXT: call.uni (retval0), test_callee, (param0, param1);
; O0-NEXT: ld.param.b32 %r3, [retval0];
; O0-NEXT: } // callseq 2
@@ -1373,13 +1373,13 @@ define <4 x i8> @test_tailcall_flipped(<4 x i8> %a, <4 x i8> %b) #0 {
; O3-EMPTY:
; O3-NEXT: // %bb.0:
; O3-NEXT: ld.param.b32 %r1, [test_tailcall_flipped_param_0];
-; O3-NEXT: ld.param.b32 %r2, [test_tailcall_flipped_param_1];
; O3-NEXT: { // callseq 2, 0
; O3-NEXT: .param .align 4 .b8 param0[4];
-; O3-NEXT: st.param.b32 [param0], %r2;
; O3-NEXT: .param .align 4 .b8 param1[4];
-; O3-NEXT: st.param.b32 [param1], %r1;
; O3-NEXT: .param .align 4 .b8 retval0[4];
+; O3-NEXT: st.param.b32 [param1], %r1;
+; O3-NEXT: ld.param.b32 %r2, [test_tailcall_flipped_param_1];
+; O3-NEXT: st.param.b32 [param0], %r2;
; O3-NEXT: call.uni (retval0), test_callee, (param0, param1);
; O3-NEXT: ld.param.b32 %r3, [retval0];
; O3-NEXT: } // callseq 2
diff --git a/llvm/test/CodeGen/NVPTX/idioms.ll b/llvm/test/CodeGen/NVPTX/idioms.ll
index be84f9b..a3bf892 100644
--- a/llvm/test/CodeGen/NVPTX/idioms.ll
+++ b/llvm/test/CodeGen/NVPTX/idioms.ll
@@ -173,8 +173,8 @@ define %struct.S16 @i32_to_2xi16_shr(i32 noundef %i){
; CHECK-NEXT: } // callseq 0
; CHECK-NEXT: shr.s32 %r2, %r1, 16;
; CHECK-NEXT: shr.u32 %r3, %r2, 16;
-; CHECK-NEXT: st.param.b16 [func_retval0], %r2;
; CHECK-NEXT: st.param.b16 [func_retval0+2], %r3;
+; CHECK-NEXT: st.param.b16 [func_retval0], %r2;
; CHECK-NEXT: ret;
call void @escape_int(i32 %i); // Force %i to be loaded completely.
%i1 = ashr i32 %i, 16
diff --git a/llvm/test/CodeGen/NVPTX/indirect_byval.ll b/llvm/test/CodeGen/NVPTX/indirect_byval.ll
index eae0321..782e672 100644
--- a/llvm/test/CodeGen/NVPTX/indirect_byval.ll
+++ b/llvm/test/CodeGen/NVPTX/indirect_byval.ll
@@ -23,15 +23,15 @@ define internal i32 @foo() {
; CHECK-NEXT: mov.b64 %SPL, __local_depot0;
; CHECK-NEXT: cvta.local.u64 %SP, %SPL;
; CHECK-NEXT: ld.global.b64 %rd1, [ptr];
-; CHECK-NEXT: add.u64 %rd3, %SPL, 1;
-; CHECK-NEXT: ld.local.b8 %rs1, [%rd3];
-; CHECK-NEXT: add.u64 %rd4, %SP, 0;
; CHECK-NEXT: { // callseq 0, 0
; CHECK-NEXT: .param .align 1 .b8 param0[1];
-; CHECK-NEXT: st.param.b8 [param0], %rs1;
; CHECK-NEXT: .param .b64 param1;
-; CHECK-NEXT: st.param.b64 [param1], %rd4;
; CHECK-NEXT: .param .b32 retval0;
+; CHECK-NEXT: add.u64 %rd2, %SP, 0;
+; CHECK-NEXT: st.param.b64 [param1], %rd2;
+; CHECK-NEXT: add.u64 %rd4, %SPL, 1;
+; CHECK-NEXT: ld.local.b8 %rs1, [%rd4];
+; CHECK-NEXT: st.param.b8 [param0], %rs1;
; CHECK-NEXT: prototype_0 : .callprototype (.param .b32 _) _ (.param .align 1 .b8 _[1], .param .b64 _);
; CHECK-NEXT: call (retval0), %rd1, (param0, param1), prototype_0;
; CHECK-NEXT: ld.param.b32 %r1, [retval0];
@@ -60,15 +60,15 @@ define internal i32 @bar() {
; CHECK-NEXT: mov.b64 %SPL, __local_depot1;
; CHECK-NEXT: cvta.local.u64 %SP, %SPL;
; CHECK-NEXT: ld.global.b64 %rd1, [ptr];
-; CHECK-NEXT: add.u64 %rd3, %SPL, 8;
-; CHECK-NEXT: ld.local.b64 %rd4, [%rd3];
-; CHECK-NEXT: add.u64 %rd5, %SP, 0;
; CHECK-NEXT: { // callseq 1, 0
; CHECK-NEXT: .param .align 8 .b8 param0[8];
-; CHECK-NEXT: st.param.b64 [param0], %rd4;
; CHECK-NEXT: .param .b64 param1;
-; CHECK-NEXT: st.param.b64 [param1], %rd5;
; CHECK-NEXT: .param .b32 retval0;
+; CHECK-NEXT: add.u64 %rd2, %SP, 0;
+; CHECK-NEXT: st.param.b64 [param1], %rd2;
+; CHECK-NEXT: add.u64 %rd4, %SPL, 8;
+; CHECK-NEXT: ld.local.b64 %rd5, [%rd4];
+; CHECK-NEXT: st.param.b64 [param0], %rd5;
; CHECK-NEXT: prototype_1 : .callprototype (.param .b32 _) _ (.param .align 8 .b8 _[8], .param .b64 _);
; CHECK-NEXT: call (retval0), %rd1, (param0, param1), prototype_1;
; CHECK-NEXT: ld.param.b32 %r1, [retval0];
diff --git a/llvm/test/CodeGen/NVPTX/lower-args-gridconstant.ll b/llvm/test/CodeGen/NVPTX/lower-args-gridconstant.ll
index 321a624..38185c7b 100644
--- a/llvm/test/CodeGen/NVPTX/lower-args-gridconstant.ll
+++ b/llvm/test/CodeGen/NVPTX/lower-args-gridconstant.ll
@@ -121,20 +121,18 @@ define ptx_kernel void @grid_const_struct(ptr byval(%struct.s) align 4 %input, p
define ptx_kernel void @grid_const_escape(ptr byval(%struct.s) align 4 %input) {
; PTX-LABEL: grid_const_escape(
; PTX: {
-; PTX-NEXT: .reg .b32 %r<2>;
; PTX-NEXT: .reg .b64 %rd<4>;
; PTX-EMPTY:
; PTX-NEXT: // %bb.0:
; PTX-NEXT: mov.b64 %rd2, grid_const_escape_param_0;
; PTX-NEXT: cvta.param.u64 %rd3, %rd2;
-; PTX-NEXT: mov.b64 %rd1, escape;
; PTX-NEXT: { // callseq 0, 0
; PTX-NEXT: .param .b64 param0;
-; PTX-NEXT: st.param.b64 [param0], %rd3;
; PTX-NEXT: .param .b32 retval0;
+; PTX-NEXT: st.param.b64 [param0], %rd3;
; PTX-NEXT: prototype_0 : .callprototype (.param .b32 _) _ (.param .b64 _);
+; PTX-NEXT: mov.b64 %rd1, escape;
; PTX-NEXT: call (retval0), %rd1, (param0), prototype_0;
-; PTX-NEXT: ld.param.b32 %r1, [retval0];
; PTX-NEXT: } // callseq 0
; PTX-NEXT: ret;
; OPT-LABEL: define ptx_kernel void @grid_const_escape(
@@ -153,7 +151,7 @@ define ptx_kernel void @multiple_grid_const_escape(ptr byval(%struct.s) align 4
; PTX-NEXT: .local .align 4 .b8 __local_depot4[4];
; PTX-NEXT: .reg .b64 %SP;
; PTX-NEXT: .reg .b64 %SPL;
-; PTX-NEXT: .reg .b32 %r<3>;
+; PTX-NEXT: .reg .b32 %r<2>;
; PTX-NEXT: .reg .b64 %rd<8>;
; PTX-EMPTY:
; PTX-NEXT: // %bb.0:
@@ -167,18 +165,17 @@ define ptx_kernel void @multiple_grid_const_escape(ptr byval(%struct.s) align 4
; PTX-NEXT: add.u64 %rd6, %SP, 0;
; PTX-NEXT: add.u64 %rd7, %SPL, 0;
; PTX-NEXT: st.local.b32 [%rd7], %r1;
-; PTX-NEXT: mov.b64 %rd1, escape3;
; PTX-NEXT: { // callseq 1, 0
; PTX-NEXT: .param .b64 param0;
-; PTX-NEXT: st.param.b64 [param0], %rd5;
; PTX-NEXT: .param .b64 param1;
-; PTX-NEXT: st.param.b64 [param1], %rd6;
; PTX-NEXT: .param .b64 param2;
-; PTX-NEXT: st.param.b64 [param2], %rd4;
; PTX-NEXT: .param .b32 retval0;
+; PTX-NEXT: st.param.b64 [param2], %rd4;
+; PTX-NEXT: st.param.b64 [param1], %rd6;
+; PTX-NEXT: st.param.b64 [param0], %rd5;
; PTX-NEXT: prototype_1 : .callprototype (.param .b32 _) _ (.param .b64 _, .param .b64 _, .param .b64 _);
+; PTX-NEXT: mov.b64 %rd1, escape3;
; PTX-NEXT: call (retval0), %rd1, (param0, param1, param2), prototype_1;
-; PTX-NEXT: ld.param.b32 %r2, [retval0];
; PTX-NEXT: } // callseq 1
; PTX-NEXT: ret;
; OPT-LABEL: define ptx_kernel void @multiple_grid_const_escape(
@@ -255,7 +252,7 @@ define ptx_kernel void @grid_const_inlineasm_escape(ptr byval(%struct.s) align 4
define ptx_kernel void @grid_const_partial_escape(ptr byval(i32) %input, ptr %output) {
; PTX-LABEL: grid_const_partial_escape(
; PTX: {
-; PTX-NEXT: .reg .b32 %r<4>;
+; PTX-NEXT: .reg .b32 %r<3>;
; PTX-NEXT: .reg .b64 %rd<6>;
; PTX-EMPTY:
; PTX-NEXT: // %bb.0:
@@ -266,14 +263,13 @@ define ptx_kernel void @grid_const_partial_escape(ptr byval(i32) %input, ptr %ou
; PTX-NEXT: ld.param.b32 %r1, [grid_const_partial_escape_param_0];
; PTX-NEXT: add.s32 %r2, %r1, %r1;
; PTX-NEXT: st.global.b32 [%rd4], %r2;
-; PTX-NEXT: mov.b64 %rd1, escape;
; PTX-NEXT: { // callseq 2, 0
; PTX-NEXT: .param .b64 param0;
-; PTX-NEXT: st.param.b64 [param0], %rd5;
; PTX-NEXT: .param .b32 retval0;
+; PTX-NEXT: st.param.b64 [param0], %rd5;
; PTX-NEXT: prototype_2 : .callprototype (.param .b32 _) _ (.param .b64 _);
+; PTX-NEXT: mov.b64 %rd1, escape;
; PTX-NEXT: call (retval0), %rd1, (param0), prototype_2;
-; PTX-NEXT: ld.param.b32 %r3, [retval0];
; PTX-NEXT: } // callseq 2
; PTX-NEXT: ret;
; OPT-LABEL: define ptx_kernel void @grid_const_partial_escape(
@@ -295,7 +291,7 @@ define ptx_kernel void @grid_const_partial_escape(ptr byval(i32) %input, ptr %ou
define ptx_kernel i32 @grid_const_partial_escapemem(ptr byval(%struct.s) %input, ptr %output) {
; PTX-LABEL: grid_const_partial_escapemem(
; PTX: {
-; PTX-NEXT: .reg .b32 %r<5>;
+; PTX-NEXT: .reg .b32 %r<4>;
; PTX-NEXT: .reg .b64 %rd<6>;
; PTX-EMPTY:
; PTX-NEXT: // %bb.0:
@@ -307,14 +303,13 @@ define ptx_kernel i32 @grid_const_partial_escapemem(ptr byval(%struct.s) %input,
; PTX-NEXT: ld.param.b32 %r2, [grid_const_partial_escapemem_param_0+4];
; PTX-NEXT: st.global.b64 [%rd4], %rd5;
; PTX-NEXT: add.s32 %r3, %r1, %r2;
-; PTX-NEXT: mov.b64 %rd1, escape;
; PTX-NEXT: { // callseq 3, 0
; PTX-NEXT: .param .b64 param0;
-; PTX-NEXT: st.param.b64 [param0], %rd5;
; PTX-NEXT: .param .b32 retval0;
+; PTX-NEXT: st.param.b64 [param0], %rd5;
; PTX-NEXT: prototype_3 : .callprototype (.param .b32 _) _ (.param .b64 _);
+; PTX-NEXT: mov.b64 %rd1, escape;
; PTX-NEXT: call (retval0), %rd1, (param0), prototype_3;
-; PTX-NEXT: ld.param.b32 %r4, [retval0];
; PTX-NEXT: } // callseq 3
; PTX-NEXT: st.param.b32 [func_retval0], %r3;
; PTX-NEXT: ret;
@@ -535,9 +530,9 @@ define ptx_kernel void @test_forward_byval_arg(ptr byval(i32) align 4 %input) {
; PTX-NEXT: .reg .b32 %r<2>;
; PTX-EMPTY:
; PTX-NEXT: // %bb.0:
-; PTX-NEXT: ld.param.b32 %r1, [test_forward_byval_arg_param_0];
; PTX-NEXT: { // callseq 4, 0
; PTX-NEXT: .param .align 4 .b8 param0[4];
+; PTX-NEXT: ld.param.b32 %r1, [test_forward_byval_arg_param_0];
; PTX-NEXT: st.param.b32 [param0], %r1;
; PTX-NEXT: call.uni device_func, (param0);
; PTX-NEXT: } // callseq 4
diff --git a/llvm/test/CodeGen/NVPTX/lower-args.ll b/llvm/test/CodeGen/NVPTX/lower-args.ll
index c165de7..7c029ab 100644
--- a/llvm/test/CodeGen/NVPTX/lower-args.ll
+++ b/llvm/test/CodeGen/NVPTX/lower-args.ll
@@ -31,7 +31,7 @@ define void @load_alignment(ptr nocapture readonly byval(%class.outer) align 8 %
; PTX-LABEL: load_alignment(
; PTX: {
; PTX-NEXT: .reg .b32 %r<4>;
-; PTX-NEXT: .reg .b64 %rd<7>;
+; PTX-NEXT: .reg .b64 %rd<6>;
; PTX-EMPTY:
; PTX-NEXT: // %bb.0: // %entry
; PTX-NEXT: mov.b64 %rd1, load_alignment_param_0;
@@ -45,10 +45,9 @@ define void @load_alignment(ptr nocapture readonly byval(%class.outer) align 8 %
; PTX-NEXT: st.b32 [%rd3], %r3;
; PTX-NEXT: { // callseq 0, 0
; PTX-NEXT: .param .b64 param0;
-; PTX-NEXT: st.param.b64 [param0], %rd5;
; PTX-NEXT: .param .b64 retval0;
+; PTX-NEXT: st.param.b64 [param0], %rd5;
; PTX-NEXT: call.uni (retval0), escape, (param0);
-; PTX-NEXT: ld.param.b64 %rd6, [retval0];
; PTX-NEXT: } // callseq 0
; PTX-NEXT: ret;
entry:
@@ -76,17 +75,16 @@ define void @load_padding(ptr nocapture readonly byval(%class.padded) %arg) {
;
; PTX-LABEL: load_padding(
; PTX: {
-; PTX-NEXT: .reg .b64 %rd<4>;
+; PTX-NEXT: .reg .b64 %rd<3>;
; PTX-EMPTY:
; PTX-NEXT: // %bb.0:
; PTX-NEXT: mov.b64 %rd1, load_padding_param_0;
; PTX-NEXT: cvta.local.u64 %rd2, %rd1;
; PTX-NEXT: { // callseq 1, 0
; PTX-NEXT: .param .b64 param0;
-; PTX-NEXT: st.param.b64 [param0], %rd2;
; PTX-NEXT: .param .b64 retval0;
+; PTX-NEXT: st.param.b64 [param0], %rd2;
; PTX-NEXT: call.uni (retval0), escape, (param0);
-; PTX-NEXT: ld.param.b64 %rd3, [retval0];
; PTX-NEXT: } // callseq 1
; PTX-NEXT: ret;
%tmp = call ptr @escape(ptr nonnull align 16 %arg)
diff --git a/llvm/test/CodeGen/NVPTX/lower-byval-args.ll b/llvm/test/CodeGen/NVPTX/lower-byval-args.ll
index 4784d70..20a3519 100644
--- a/llvm/test/CodeGen/NVPTX/lower-byval-args.ll
+++ b/llvm/test/CodeGen/NVPTX/lower-byval-args.ll
@@ -911,9 +911,9 @@ define void @device_func(ptr byval(i32) align 4 %input) {
; PTX-NEXT: .reg .b64 %rd<2>;
; PTX-EMPTY:
; PTX-NEXT: // %bb.0:
-; PTX-NEXT: ld.param.b32 %r1, [device_func_param_0];
; PTX-NEXT: { // callseq 3, 0
; PTX-NEXT: .param .align 4 .b8 param0[4];
+; PTX-NEXT: ld.param.b32 %r1, [device_func_param_0];
; PTX-NEXT: st.param.b32 [param0], %r1;
; PTX-NEXT: call.uni device_func, (param0);
; PTX-NEXT: } // callseq 3
diff --git a/llvm/test/CodeGen/NVPTX/misched_func_call.ll b/llvm/test/CodeGen/NVPTX/misched_func_call.ll
index 8401f45..b2994c0 100644
--- a/llvm/test/CodeGen/NVPTX/misched_func_call.ll
+++ b/llvm/test/CodeGen/NVPTX/misched_func_call.ll
@@ -8,7 +8,7 @@ define ptx_kernel void @wombat(i32 %arg, i32 %arg1, i32 %arg2) {
; CHECK-LABEL: wombat(
; CHECK: {
; CHECK-NEXT: .reg .b32 %r<11>;
-; CHECK-NEXT: .reg .b64 %rd<6>;
+; CHECK-NEXT: .reg .b64 %rd<5>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0: // %bb
; CHECK-NEXT: ld.param.b32 %r4, [wombat_param_2];
@@ -19,19 +19,18 @@ define ptx_kernel void @wombat(i32 %arg, i32 %arg1, i32 %arg2) {
; CHECK-NEXT: // =>This Inner Loop Header: Depth=1
; CHECK-NEXT: { // callseq 0, 0
; CHECK-NEXT: .param .b64 param0;
-; CHECK-NEXT: st.param.b64 [param0], 0d0000000000000000;
; CHECK-NEXT: .param .b64 retval0;
+; CHECK-NEXT: st.param.b64 [param0], 0;
; CHECK-NEXT: call.uni (retval0), quux, (param0);
-; CHECK-NEXT: ld.param.b64 %rd1, [retval0];
; CHECK-NEXT: } // callseq 0
; CHECK-NEXT: mul.lo.s32 %r7, %r10, %r3;
; CHECK-NEXT: or.b32 %r8, %r4, %r7;
; CHECK-NEXT: mul.lo.s32 %r9, %r2, %r8;
-; CHECK-NEXT: cvt.rn.f64.s32 %rd2, %r9;
-; CHECK-NEXT: cvt.rn.f64.u32 %rd3, %r10;
-; CHECK-NEXT: add.rn.f64 %rd4, %rd3, %rd2;
-; CHECK-NEXT: mov.b64 %rd5, 0;
-; CHECK-NEXT: st.global.b64 [%rd5], %rd4;
+; CHECK-NEXT: cvt.rn.f64.s32 %rd1, %r9;
+; CHECK-NEXT: cvt.rn.f64.u32 %rd2, %r10;
+; CHECK-NEXT: add.rn.f64 %rd3, %rd2, %rd1;
+; CHECK-NEXT: mov.b64 %rd4, 0;
+; CHECK-NEXT: st.global.b64 [%rd4], %rd3;
; CHECK-NEXT: mov.b32 %r10, 1;
; CHECK-NEXT: bra.uni $L__BB0_1;
bb:
diff --git a/llvm/test/CodeGen/NVPTX/param-add.ll b/llvm/test/CodeGen/NVPTX/param-add.ll
index 4fa1235..c5ea9f8 100644
--- a/llvm/test/CodeGen/NVPTX/param-add.ll
+++ b/llvm/test/CodeGen/NVPTX/param-add.ll
@@ -18,16 +18,16 @@ define i32 @test(%struct.1float alignstack(32) %data) {
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.b32 %r1, [test_param_0];
-; CHECK-NEXT: shr.u32 %r2, %r1, 8;
-; CHECK-NEXT: shr.u32 %r3, %r1, 16;
-; CHECK-NEXT: shr.u32 %r4, %r1, 24;
; CHECK-NEXT: { // callseq 0, 0
; CHECK-NEXT: .param .align 1 .b8 param0[4];
+; CHECK-NEXT: .param .b32 retval0;
; CHECK-NEXT: st.param.b8 [param0], %r1;
+; CHECK-NEXT: shr.u32 %r2, %r1, 8;
; CHECK-NEXT: st.param.b8 [param0+1], %r2;
+; CHECK-NEXT: shr.u32 %r3, %r1, 16;
; CHECK-NEXT: st.param.b8 [param0+2], %r3;
+; CHECK-NEXT: shr.u32 %r4, %r3, 8;
; CHECK-NEXT: st.param.b8 [param0+3], %r4;
-; CHECK-NEXT: .param .b32 retval0;
; CHECK-NEXT: call.uni (retval0), callee, (param0);
; CHECK-NEXT: ld.param.b32 %r5, [retval0];
; CHECK-NEXT: } // callseq 0
diff --git a/llvm/test/CodeGen/NVPTX/param-load-store.ll b/llvm/test/CodeGen/NVPTX/param-load-store.ll
index 6c52bfd..db3fbbc 100644
--- a/llvm/test/CodeGen/NVPTX/param-load-store.ll
+++ b/llvm/test/CodeGen/NVPTX/param-load-store.ll
@@ -27,10 +27,10 @@
; CHECK: ld.param.b8 [[A8:%rs[0-9]+]], [test_i1_param_0];
; CHECK: and.b16 [[A:%rs[0-9]+]], [[A8]], 1;
; CHECK: setp.ne.b16 %p1, [[A]], 0
+; CHECK-DAG: .param .b32 param0;
+; CHECK-DAG: .param .b32 retval0;
; CHECK: cvt.u32.u16 [[B:%r[0-9]+]], [[A8]]
-; CHECK: .param .b32 param0;
-; CHECK: st.param.b32 [param0], [[B]]
-; CHECK: .param .b32 retval0;
+; CHECK-DAG: st.param.b32 [param0], [[B]]
; CHECK: call.uni (retval0), test_i1,
; CHECK: ld.param.b32 [[R8:%r[0-9]+]], [retval0];
; CHECK: st.param.b32 [func_retval0], [[R8]];
@@ -47,11 +47,11 @@ define i1 @test_i1(i1 %a) {
; CHECK-NEXT: .param .b32 test_i1s_param_0
; CHECK: ld.param.b8 [[A8:%rs[0-9]+]], [test_i1s_param_0];
; CHECK: cvt.u32.u16 [[A32:%r[0-9]+]], [[A8]];
+; CHECK: .param .b32 param0;
+; CHECK: .param .b32 retval0;
; CHECK: and.b32 [[A1:%r[0-9]+]], [[A32]], 1;
; CHECK: neg.s32 [[A:%r[0-9]+]], [[A1]];
-; CHECK: .param .b32 param0;
; CHECK: st.param.b32 [param0], [[A]];
-; CHECK: .param .b32 retval0;
; CHECK: call.uni
; CHECK: ld.param.b32 [[R8:%r[0-9]+]], [retval0];
; CHECK: and.b32 [[R1:%r[0-9]+]], [[R8]], 1;
@@ -70,9 +70,9 @@ define signext i1 @test_i1s(i1 signext %a) {
; CHECK-DAG: ld.param.b8 [[E2:%rs[0-9]+]], [test_v3i1_param_0+2];
; CHECK-DAG: ld.param.b8 [[E0:%rs[0-9]+]], [test_v3i1_param_0]
; CHECK: .param .align 1 .b8 param0[1];
+; CHECK: .param .align 1 .b8 retval0[1];
; CHECK-DAG: st.param.b8 [param0], [[E0]];
; CHECK-DAG: st.param.b8 [param0+2], [[E2]];
-; CHECK: .param .align 1 .b8 retval0[1];
; CHECK: call.uni (retval0), test_v3i1,
; CHECK-DAG: ld.param.b8 [[RE0:%rs[0-9]+]], [retval0];
; CHECK-DAG: ld.param.b8 [[RE2:%rs[0-9]+]], [retval0+2];
@@ -89,8 +89,8 @@ define <3 x i1> @test_v3i1(<3 x i1> %a) {
; CHECK-NEXT: .param .align 1 .b8 test_v4i1_param_0[1]
; CHECK: ld.param.b8 [[E0:%rs[0-9]+]], [test_v4i1_param_0]
; CHECK: .param .align 1 .b8 param0[1];
-; CHECK: st.param.b8 [param0], [[E0]];
; CHECK: .param .align 1 .b8 retval0[1];
+; CHECK: st.param.b8 [param0], [[E0]];
; CHECK: call.uni (retval0), test_v4i1,
; CHECK: ld.param.b8 [[RE0:%rs[0-9]+]], [retval0];
; CHECK: ld.param.b8 [[RE1:%rs[0-9]+]], [retval0+1];
@@ -112,9 +112,9 @@ define <4 x i1> @test_v4i1(<4 x i1> %a) {
; CHECK-DAG: ld.param.b8 [[E4:%rs[0-9]+]], [test_v5i1_param_0+4];
; CHECK-DAG: ld.param.b8 [[E0:%rs[0-9]+]], [test_v5i1_param_0]
; CHECK: .param .align 1 .b8 param0[1];
+; CHECK: .param .align 1 .b8 retval0[1];
; CHECK-DAG: st.param.b8 [param0], [[E0]];
; CHECK-DAG: st.param.b8 [param0+4], [[E4]];
-; CHECK: .param .align 1 .b8 retval0[1];
; CHECK: call.uni (retval0), test_v5i1,
; CHECK-DAG: ld.param.b8 [[RE0:%rs[0-9]+]], [retval0];
; CHECK-DAG: ld.param.b8 [[RE4:%rs[0-9]+]], [retval0+4];
@@ -131,8 +131,8 @@ define <5 x i1> @test_v5i1(<5 x i1> %a) {
; CHECK-NEXT: .param .b32 test_i2_param_0
; CHECK: ld.param.b8 {{%rs[0-9]+}}, [test_i2_param_0];
; CHECK: .param .b32 param0;
-; CHECK: st.param.b32 [param0], {{%r[0-9]+}};
; CHECK: .param .b32 retval0;
+; CHECK: st.param.b32 [param0], {{%r[0-9]+}};
; CHECK: call.uni (retval0), test_i2,
; CHECK: ld.param.b32 {{%r[0-9]+}}, [retval0];
; CHECK: st.param.b32 [func_retval0], {{%r[0-9]+}};
@@ -147,8 +147,8 @@ define i2 @test_i2(i2 %a) {
; CHECK-NEXT: .param .b32 test_i3_param_0
; CHECK: ld.param.b8 {{%rs[0-9]+}}, [test_i3_param_0];
; CHECK: .param .b32 param0;
-; CHECK: st.param.b32 [param0], {{%r[0-9]+}};
; CHECK: .param .b32 retval0;
+; CHECK: st.param.b32 [param0], {{%r[0-9]+}};
; CHECK: call.uni (retval0), test_i3,
; CHECK: ld.param.b32 {{%r[0-9]+}}, [retval0];
; CHECK: st.param.b32 [func_retval0], {{%r[0-9]+}};
@@ -163,10 +163,10 @@ define i3 @test_i3(i3 %a) {
; CHECK-LABEL: test_i8(
; CHECK-NEXT: .param .b32 test_i8_param_0
; CHECK: ld.param.b8 [[A8:%rs[0-9]+]], [test_i8_param_0];
-; CHECK: cvt.u32.u16 [[A32:%r[0-9]+]], [[A8]];
; CHECK: .param .b32 param0;
-; CHECK: st.param.b32 [param0], [[A32]];
; CHECK: .param .b32 retval0;
+; CHECK: cvt.u32.u16 [[A32:%r[0-9]+]], [[A8]];
+; CHECK: st.param.b32 [param0], [[A32]];
; CHECK: call.uni (retval0), test_i8,
; CHECK: ld.param.b32 [[R32:%r[0-9]+]], [retval0];
; CHECK: st.param.b32 [func_retval0], [[R32]];
@@ -181,10 +181,10 @@ define i8 @test_i8(i8 %a) {
; CHECK-LABEL: test_i8s(
; CHECK-NEXT: .param .b32 test_i8s_param_0
; CHECK: ld.param.s8 [[A8:%rs[0-9]+]], [test_i8s_param_0];
-; CHECK: cvt.s32.s16 [[A:%r[0-9]+]], [[A8]];
; CHECK: .param .b32 param0;
-; CHECK: st.param.b32 [param0], [[A]];
; CHECK: .param .b32 retval0;
+; CHECK: cvt.s32.s16 [[A:%r[0-9]+]], [[A8]];
+; CHECK: st.param.b32 [param0], [[A]];
; CHECK: call.uni (retval0), test_i8s,
; CHECK: ld.param.b32 [[R32:%r[0-9]+]], [retval0];
; -- This is suspicious (though correct) -- why not cvt.u8.u32, cvt.s8.s32 ?
@@ -202,8 +202,8 @@ define signext i8 @test_i8s(i8 signext %a) {
; CHECK-NEXT: .param .align 4 .b8 test_v3i8_param_0[4]
; CHECK: ld.param.b32 [[R:%r[0-9]+]], [test_v3i8_param_0];
; CHECK: .param .align 4 .b8 param0[4];
-; CHECK: st.param.b32 [param0], [[R]]
; CHECK: .param .align 4 .b8 retval0[4];
+; CHECK: st.param.b32 [param0], [[R]]
; CHECK: call.uni (retval0), test_v3i8,
; CHECK: ld.param.b32 [[RE:%r[0-9]+]], [retval0];
; v4i8/i32->{v3i8 elements}->v4i8/i32 conversion is messy and not very
@@ -220,8 +220,8 @@ define <3 x i8> @test_v3i8(<3 x i8> %a) {
; CHECK-NEXT: .param .align 4 .b8 test_v4i8_param_0[4]
; CHECK: ld.param.b32 [[R:%r[0-9]+]], [test_v4i8_param_0]
; CHECK: .param .align 4 .b8 param0[4];
-; CHECK: st.param.b32 [param0], [[R]];
; CHECK: .param .align 4 .b8 retval0[4];
+; CHECK: st.param.b32 [param0], [[R]];
; CHECK: call.uni (retval0), test_v4i8,
; CHECK: ld.param.b32 [[RET:%r[0-9]+]], [retval0];
; CHECK: st.param.b32 [func_retval0], [[RET]];
@@ -237,20 +237,13 @@ define <4 x i8> @test_v4i8(<4 x i8> %a) {
; CHECK-DAG: ld.param.b32 [[E0:%r[0-9]+]], [test_v5i8_param_0]
; CHECK-DAG: ld.param.b8 [[E4:%rs[0-9]+]], [test_v5i8_param_0+4];
; CHECK: .param .align 8 .b8 param0[8];
-; CHECK-DAG: st.param.v4.b8 [param0],
-; CHECK-DAG: st.param.b8 [param0+4], [[E4]];
; CHECK: .param .align 8 .b8 retval0[8];
+; CHECK-DAG: st.param.b32 [param0], [[E0]];
+; CHECK-DAG: st.param.b8 [param0+4], [[E4]];
; CHECK: call.uni (retval0), test_v5i8,
-; CHECK-DAG: ld.param.v4.b8 {[[RE0:%rs[0-9]+]], [[RE1:%rs[0-9]+]], [[RE2:%rs[0-9]+]], [[RE3:%rs[0-9]+]]}, [retval0];
+; CHECK-DAG: ld.param.b32 [[RE0:%r[0-9]+]], [retval0];
; CHECK-DAG: ld.param.b8 [[RE4:%rs[0-9]+]], [retval0+4];
-; CHECK-DAG: cvt.u32.u16 [[R3:%r[0-9]+]], [[RE3]];
-; CHECK-DAG: cvt.u32.u16 [[R2:%r[0-9]+]], [[RE2]];
-; CHECK-DAG: prmt.b32 [[P0:%r[0-9]+]], [[R2]], [[R3]], 0x3340U;
-; CHECK-DAG: cvt.u32.u16 [[R1:%r[0-9]+]], [[RE1]];
-; CHECK-DAG: cvt.u32.u16 [[R0:%r[0-9]+]], [[RE0]];
-; CHECK-DAG: prmt.b32 [[P1:%r[0-9]+]], [[R0]], [[R1]], 0x3340U;
-; CHECK-DAG: prmt.b32 [[P2:%r[0-9]+]], [[P1]], [[P0]], 0x5410U;
-; CHECK-DAG: st.param.b32 [func_retval0], [[P2]];
+; CHECK-DAG: st.param.b32 [func_retval0], [[RE0]];
; CHECK-DAG: st.param.b8 [func_retval0+4], [[RE4]];
; CHECK-NEXT: ret;
define <5 x i8> @test_v5i8(<5 x i8> %a) {
@@ -262,8 +255,8 @@ define <5 x i8> @test_v5i8(<5 x i8> %a) {
; CHECK-LABEL: test_i11(
; CHECK-NEXT: .param .b32 test_i11_param_0
; CHECK: ld.param.b16 {{%rs[0-9]+}}, [test_i11_param_0];
-; CHECK: st.param.b32 [param0], {{%r[0-9]+}};
; CHECK: .param .b32 retval0;
+; CHECK: st.param.b32 [param0], {{%r[0-9]+}};
; CHECK: call.uni (retval0), test_i11,
; CHECK: ld.param.b32 {{%r[0-9]+}}, [retval0];
; CHECK: st.param.b32 [func_retval0], {{%r[0-9]+}};
@@ -277,10 +270,10 @@ define i11 @test_i11(i11 %a) {
; CHECK-LABEL: test_i16(
; CHECK-NEXT: .param .b32 test_i16_param_0
; CHECK: ld.param.b16 [[E16:%rs[0-9]+]], [test_i16_param_0];
-; CHECK: cvt.u32.u16 [[E32:%r[0-9]+]], [[E16]];
; CHECK: .param .b32 param0;
-; CHECK: st.param.b32 [param0], [[E32]];
; CHECK: .param .b32 retval0;
+; CHECK: cvt.u32.u16 [[E32:%r[0-9]+]], [[E16]];
+; CHECK: st.param.b32 [param0], [[E32]];
; CHECK: call.uni (retval0), test_i16,
; CHECK: ld.param.b32 [[RE32:%r[0-9]+]], [retval0];
; CHECK: st.param.b32 [func_retval0], [[RE32]];
@@ -294,10 +287,10 @@ define i16 @test_i16(i16 %a) {
; CHECK-LABEL: test_i16s(
; CHECK-NEXT: .param .b32 test_i16s_param_0
; CHECK: ld.param.b16 [[E16:%rs[0-9]+]], [test_i16s_param_0];
-; CHECK: cvt.s32.s16 [[E32:%r[0-9]+]], [[E16]];
; CHECK: .param .b32 param0;
-; CHECK: st.param.b32 [param0], [[E32]];
; CHECK: .param .b32 retval0;
+; CHECK: cvt.s32.s16 [[E32:%r[0-9]+]], [[E16]];
+; CHECK: st.param.b32 [param0], [[E32]];
; CHECK: call.uni (retval0), test_i16s,
; CHECK: ld.param.b32 [[RE32:%r[0-9]+]], [retval0];
; CHECK: cvt.s32.s16 [[R:%r[0-9]+]], [[RE32]];
@@ -312,14 +305,15 @@ define signext i16 @test_i16s(i16 signext %a) {
; CHECK-LABEL: test_v3i16(
; CHECK-NEXT: .param .align 8 .b8 test_v3i16_param_0[8]
; CHECK-DAG: ld.param.b16 [[E2:%rs[0-9]+]], [test_v3i16_param_0+4];
-; CHECK-DAG: ld.param.v2.b16 {[[E0:%rs[0-9]+]], [[E1:%rs[0-9]+]]}, [test_v3i16_param_0];
+; CHECK-DAG: ld.param.b32 [[E0:%r[0-9]+]], [test_v3i16_param_0];
; CHECK: .param .align 8 .b8 param0[8];
-; CHECK: st.param.v2.b16 [param0], {[[E0]], [[E1]]};
-; CHECK: st.param.b16 [param0+4], [[E2]];
; CHECK: .param .align 8 .b8 retval0[8];
+; CHECK-DAG: st.param.b32 [param0], [[E0]];
+; CHECK-DAG: st.param.b16 [param0+4], [[E2]];
; CHECK: call.uni (retval0), test_v3i16,
-; CHECK: ld.param.v2.b16 {[[RE0:%rs[0-9]+]], [[RE1:%rs[0-9]+]]}, [retval0];
+; CHECK: ld.param.b32 [[RE:%r[0-9]+]], [retval0];
; CHECK: ld.param.b16 [[RE2:%rs[0-9]+]], [retval0+4];
+; CHECK-DAG: mov.b32 {[[RE0:%rs[0-9]+]], [[RE1:%rs[0-9]+]]}, [[RE]];
; CHECK-DAG: st.param.v2.b16 [func_retval0], {[[RE0]], [[RE1]]};
; CHECK-DAG: st.param.b16 [func_retval0+4], [[RE2]];
; CHECK-NEXT: ret;
@@ -333,8 +327,8 @@ define <3 x i16> @test_v3i16(<3 x i16> %a) {
; CHECK-NEXT: .param .align 8 .b8 test_v4i16_param_0[8]
; CHECK: ld.param.v2.b32 {[[E0:%r[0-9]+]], [[E1:%r[0-9]+]]}, [test_v4i16_param_0]
; CHECK: .param .align 8 .b8 param0[8];
-; CHECK: st.param.v2.b32 [param0], {[[E0]], [[E1]]};
; CHECK: .param .align 8 .b8 retval0[8];
+; CHECK: st.param.v2.b32 [param0], {[[E0]], [[E1]]};
; CHECK: call.uni (retval0), test_v4i16,
; CHECK: ld.param.v2.b32 {[[RE0:%r[0-9]+]], [[RE1:%r[0-9]+]]}, [retval0];
; CHECK: st.param.v2.b32 [func_retval0], {[[RE0]], [[RE1]]}
@@ -348,15 +342,15 @@ define <4 x i16> @test_v4i16(<4 x i16> %a) {
; CHECK-LABEL: test_v5i16(
; CHECK-NEXT: .param .align 16 .b8 test_v5i16_param_0[16]
; CHECK-DAG: ld.param.b16 [[E4:%rs[0-9]+]], [test_v5i16_param_0+8];
-; CHECK-DAG: ld.param.v4.b16 {[[E0:%rs[0-9]+]], [[E1:%rs[0-9]+]], [[E2:%rs[0-9]+]], [[E3:%rs[0-9]+]]}, [test_v5i16_param_0]
+; CHECK-DAG: ld.param.v2.b32 {[[E0:%r[0-9]+]], [[E1:%r[0-9]+]]}, [test_v5i16_param_0]
; CHECK: .param .align 16 .b8 param0[16];
-; CHECK-DAG: st.param.v4.b16 [param0], {[[E0]], [[E1]], [[E2]], [[E3]]};
-; CHECK-DAG: st.param.b16 [param0+8], [[E4]];
; CHECK: .param .align 16 .b8 retval0[16];
+; CHECK-DAG: st.param.v2.b32 [param0], {[[E0]], [[E1]]};
+; CHECK-DAG: st.param.b16 [param0+8], [[E4]];
; CHECK: call.uni (retval0), test_v5i16,
-; CHECK-DAG: ld.param.v4.b16 {[[RE0:%rs[0-9]+]], [[RE1:%rs[0-9]+]], [[RE2:%rs[0-9]+]], [[RE3:%rs[0-9]+]]}, [retval0];
+; CHECK-DAG: ld.param.v2.b32 {[[RE0:%r[0-9]+]], [[RE1:%r[0-9]+]]}, [retval0];
; CHECK-DAG: ld.param.b16 [[RE4:%rs[0-9]+]], [retval0+8];
-; CHECK-DAG: st.param.v4.b16 [func_retval0], {[[RE0]], [[RE1]], [[RE2]], [[RE3]]}
+; CHECK-DAG: st.param.v2.b32 [func_retval0], {[[RE0]], [[RE1]]}
; CHECK-DAG: st.param.b16 [func_retval0+8], [[RE4]];
; CHECK-NEXT: ret;
define <5 x i16> @test_v5i16(<5 x i16> %a) {
@@ -369,8 +363,8 @@ define <5 x i16> @test_v5i16(<5 x i16> %a) {
; CHECK-NEXT: .param .align 2 .b8 test_f16_param_0[2]
; CHECK: ld.param.b16 [[E:%rs[0-9]+]], [test_f16_param_0];
; CHECK: .param .align 2 .b8 param0[2];
-; CHECK: st.param.b16 [param0], [[E]];
; CHECK: .param .align 2 .b8 retval0[2];
+; CHECK: st.param.b16 [param0], [[E]];
; CHECK: call.uni (retval0), test_f16,
; CHECK: ld.param.b16 [[R:%rs[0-9]+]], [retval0];
; CHECK: st.param.b16 [func_retval0], [[R]]
@@ -385,8 +379,8 @@ define half @test_f16(half %a) {
; CHECK-NEXT: .param .align 4 .b8 test_v2f16_param_0[4]
; CHECK: ld.param.b32 [[E:%r[0-9]+]], [test_v2f16_param_0];
; CHECK: .param .align 4 .b8 param0[4];
-; CHECK: st.param.b32 [param0], [[E]];
; CHECK: .param .align 4 .b8 retval0[4];
+; CHECK: st.param.b32 [param0], [[E]];
; CHECK: call.uni (retval0), test_v2f16,
; CHECK: ld.param.b32 [[R:%r[0-9]+]], [retval0];
; CHECK: st.param.b32 [func_retval0], [[R]]
@@ -401,8 +395,8 @@ define <2 x half> @test_v2f16(<2 x half> %a) {
; CHECK-NEXT: .param .align 2 .b8 test_bf16_param_0[2]
; CHECK: ld.param.b16 [[E:%rs[0-9]+]], [test_bf16_param_0];
; CHECK: .param .align 2 .b8 param0[2];
-; CHECK: st.param.b16 [param0], [[E]];
; CHECK: .param .align 2 .b8 retval0[2];
+; CHECK: st.param.b16 [param0], [[E]];
; CHECK: call.uni (retval0), test_bf16,
; CHECK: ld.param.b16 [[R:%rs[0-9]+]], [retval0];
; CHECK: st.param.b16 [func_retval0], [[R]]
@@ -417,8 +411,8 @@ define bfloat @test_bf16(bfloat %a) {
; CHECK-NEXT: .param .align 4 .b8 test_v2bf16_param_0[4]
; CHECK: ld.param.b32 [[E:%r[0-9]+]], [test_v2bf16_param_0];
; CHECK: .param .align 4 .b8 param0[4];
-; CHECK: st.param.b32 [param0], [[E]];
; CHECK: .param .align 4 .b8 retval0[4];
+; CHECK: st.param.b32 [param0], [[E]];
; CHECK: call.uni (retval0), test_v2bf16,
; CHECK: ld.param.b32 [[R:%r[0-9]+]], [retval0];
; CHECK: st.param.b32 [func_retval0], [[R]]
@@ -432,15 +426,16 @@ define <2 x bfloat> @test_v2bf16(<2 x bfloat> %a) {
; CHECK:.func (.param .align 8 .b8 func_retval0[8])
; CHECK-LABEL: test_v3f16(
; CHECK: .param .align 8 .b8 test_v3f16_param_0[8]
-; CHECK-DAG: ld.param.v2.b16 {[[E0:%rs[0-9]+]], [[E1:%rs[0-9]+]]}, [test_v3f16_param_0];
+; CHECK-DAG: ld.param.b32 [[E0:%r[0-9]+]], [test_v3f16_param_0];
; CHECK-DAG: ld.param.b16 [[E2:%rs[0-9]+]], [test_v3f16_param_0+4];
; CHECK: .param .align 8 .b8 param0[8];
-; CHECK-DAG: st.param.v2.b16 [param0], {[[E0]], [[E1]]};
-; CHECK-DAG: st.param.b16 [param0+4], [[E2]];
; CHECK: .param .align 8 .b8 retval0[8];
+; CHECK-DAG: st.param.b32 [param0], [[E0]];
+; CHECK-DAG: st.param.b16 [param0+4], [[E2]];
; CHECK: call.uni (retval0), test_v3f16,
-; CHECK-DAG: ld.param.v2.b16 {[[R0:%rs[0-9]+]], [[R1:%rs[0-9]+]]}, [retval0];
+; CHECK-DAG: ld.param.b32 [[R:%r[0-9]+]], [retval0];
; CHECK-DAG: ld.param.b16 [[R2:%rs[0-9]+]], [retval0+4];
+; CHECK-DAG: mov.b32 {[[R0:%rs[0-9]+]], [[R1:%rs[0-9]+]]}, [[R]];
; CHECK-DAG: st.param.v2.b16 [func_retval0], {[[R0]], [[R1]]};
; CHECK-DAG: st.param.b16 [func_retval0+4], [[R2]];
; CHECK: ret;
@@ -454,8 +449,8 @@ define <3 x half> @test_v3f16(<3 x half> %a) {
; CHECK: .param .align 8 .b8 test_v4f16_param_0[8]
; CHECK: ld.param.v2.b32 {[[R01:%r[0-9]+]], [[R23:%r[0-9]+]]}, [test_v4f16_param_0];
; CHECK: .param .align 8 .b8 param0[8];
-; CHECK: st.param.v2.b32 [param0], {[[R01]], [[R23]]};
; CHECK: .param .align 8 .b8 retval0[8];
+; CHECK: st.param.v2.b32 [param0], {[[R01]], [[R23]]};
; CHECK: call.uni (retval0), test_v4f16,
; CHECK: ld.param.v2.b32 {[[RH01:%r[0-9]+]], [[RH23:%r[0-9]+]]}, [retval0];
; CHECK: st.param.v2.b32 [func_retval0], {[[RH01]], [[RH23]]};
@@ -468,16 +463,16 @@ define <4 x half> @test_v4f16(<4 x half> %a) {
; CHECK:.func (.param .align 16 .b8 func_retval0[16])
; CHECK-LABEL: test_v5f16(
; CHECK: .param .align 16 .b8 test_v5f16_param_0[16]
-; CHECK-DAG: ld.param.v4.b16 {[[E0:%rs[0-9]+]], [[E1:%rs[0-9]+]], [[E2:%rs[0-9]+]], [[E3:%rs[0-9]+]]}, [test_v5f16_param_0];
+; CHECK-DAG: ld.param.v2.b32 {[[E0:%r[0-9]+]], [[E1:%r[0-9]+]]}, [test_v5f16_param_0];
; CHECK-DAG: ld.param.b16 [[E4:%rs[0-9]+]], [test_v5f16_param_0+8];
; CHECK: .param .align 16 .b8 param0[16];
-; CHECK-DAG: st.param.v4.b16 [param0],
-; CHECK-DAG: st.param.b16 [param0+8], [[E4]];
; CHECK: .param .align 16 .b8 retval0[16];
+; CHECK-DAG: st.param.v2.b32 [param0], {[[E0]], [[E1]]};
+; CHECK-DAG: st.param.b16 [param0+8], [[E4]];
; CHECK: call.uni (retval0), test_v5f16,
-; CHECK-DAG: ld.param.v4.b16 {[[R0:%rs[0-9]+]], [[R1:%rs[0-9]+]], [[R2:%rs[0-9]+]], [[R3:%rs[0-9]+]]}, [retval0];
+; CHECK-DAG: ld.param.v2.b32 {[[R0:%r[0-9]+]], [[R1:%r[0-9]+]]}, [retval0];
; CHECK-DAG: ld.param.b16 [[R4:%rs[0-9]+]], [retval0+8];
-; CHECK-DAG: st.param.v4.b16 [func_retval0], {[[R0]], [[R1]], [[R2]], [[R3]]};
+; CHECK-DAG: st.param.v2.b32 [func_retval0], {[[R0]], [[R1]]};
; CHECK-DAG: st.param.b16 [func_retval0+8], [[R4]];
; CHECK: ret;
define <5 x half> @test_v5f16(<5 x half> %a) {
@@ -490,8 +485,8 @@ define <5 x half> @test_v5f16(<5 x half> %a) {
; CHECK: .param .align 16 .b8 test_v8f16_param_0[16]
; CHECK: ld.param.v4.b32 {[[R01:%r[0-9]+]], [[R23:%r[0-9]+]], [[R45:%r[0-9]+]], [[R67:%r[0-9]+]]}, [test_v8f16_param_0];
; CHECK: .param .align 16 .b8 param0[16];
-; CHECK: st.param.v4.b32 [param0], {[[R01]], [[R23]], [[R45]], [[R67]]};
; CHECK: .param .align 16 .b8 retval0[16];
+; CHECK: st.param.v4.b32 [param0], {[[R01]], [[R23]], [[R45]], [[R67]]};
; CHECK: call.uni (retval0), test_v8f16,
; CHECK: ld.param.v4.b32 {[[RH01:%r[0-9]+]], [[RH23:%r[0-9]+]], [[RH45:%r[0-9]+]], [[RH67:%r[0-9]+]]}, [retval0];
; CHECK: st.param.v4.b32 [func_retval0], {[[RH01]], [[RH23]], [[RH45]], [[RH67]]};
@@ -504,20 +499,20 @@ define <8 x half> @test_v8f16(<8 x half> %a) {
; CHECK:.func (.param .align 32 .b8 func_retval0[32])
; CHECK-LABEL: test_v9f16(
; CHECK: .param .align 32 .b8 test_v9f16_param_0[32]
-; CHECK-DAG: ld.param.v4.b16 {[[E0:%rs[0-9]+]], [[E1:%rs[0-9]+]], [[E2:%rs[0-9]+]], [[E3:%rs[0-9]+]]}, [test_v9f16_param_0];
-; CHECK-DAG: ld.param.v4.b16 {[[E4:%rs[0-9]+]], [[E5:%rs[0-9]+]], [[E6:%rs[0-9]+]], [[E7:%rs[0-9]+]]}, [test_v9f16_param_0+8];
+; CHECK-DAG: ld.param.v2.b32 {[[E0:%r[0-9]+]], [[E1:%r[0-9]+]]}, [test_v9f16_param_0];
+; CHECK-DAG: ld.param.v2.b32 {[[E2:%r[0-9]+]], [[E3:%r[0-9]+]]}, [test_v9f16_param_0+8];
; CHECK-DAG: ld.param.b16 [[E8:%rs[0-9]+]], [test_v9f16_param_0+16];
; CHECK: .param .align 32 .b8 param0[32];
-; CHECK-DAG: st.param.v4.b16 [param0],
-; CHECK-DAG: st.param.v4.b16 [param0+8],
-; CHECK-DAG: st.param.b16 [param0+16], [[E8]];
; CHECK: .param .align 32 .b8 retval0[32];
+; CHECK-DAG: st.param.v2.b32 [param0], {[[E0]], [[E1]]};
+; CHECK-DAG: st.param.v2.b32 [param0+8], {[[E2]], [[E3]]};
+; CHECK-DAG: st.param.b16 [param0+16], [[E8]];
; CHECK: call.uni (retval0), test_v9f16,
-; CHECK-DAG: ld.param.v4.b16 {[[R0:%rs[0-9]+]], [[R1:%rs[0-9]+]], [[R2:%rs[0-9]+]], [[R3:%rs[0-9]+]]}, [retval0];
-; CHECK-DAG: ld.param.v4.b16 {[[R4:%rs[0-9]+]], [[R5:%rs[0-9]+]], [[R6:%rs[0-9]+]], [[R7:%rs[0-9]+]]}, [retval0+8];
+; CHECK-DAG: ld.param.v2.b32 {[[R0:%r[0-9]+]], [[R1:%r[0-9]+]]}, [retval0];
+; CHECK-DAG: ld.param.v2.b32 {[[R2:%r[0-9]+]], [[R3:%r[0-9]+]]}, [retval0+8];
; CHECK-DAG: ld.param.b16 [[R8:%rs[0-9]+]], [retval0+16];
-; CHECK-DAG: st.param.v4.b16 [func_retval0], {[[R0]], [[R1]], [[R2]], [[R3]]};
-; CHECK-DAG: st.param.v4.b16 [func_retval0+8], {[[R4]], [[R5]], [[R6]], [[R7]]};
+; CHECK-DAG: st.param.v2.b32 [func_retval0], {[[R0]], [[R1]]};
+; CHECK-DAG: st.param.v2.b32 [func_retval0+8], {[[R2]], [[R3]]};
; CHECK-DAG: st.param.b16 [func_retval0+16], [[R8]];
; CHECK: ret;
define <9 x half> @test_v9f16(<9 x half> %a) {
@@ -531,8 +526,8 @@ define <9 x half> @test_v9f16(<9 x half> %a) {
; CHECK-DAG: ld.param.b16 {{%r[0-9]+}}, [test_i19_param_0];
; CHECK-DAG: ld.param.b8 {{%r[0-9]+}}, [test_i19_param_0+2];
; CHECK: .param .b32 param0;
-; CHECK: st.param.b32 [param0], {{%r[0-9]+}};
; CHECK: .param .b32 retval0;
+; CHECK: st.param.b32 [param0], {{%r[0-9]+}};
; CHECK: call.uni (retval0), test_i19,
; CHECK: ld.param.b32 {{%r[0-9]+}}, [retval0];
; CHECK: st.param.b32 [func_retval0], {{%r[0-9]+}};
@@ -548,8 +543,8 @@ define i19 @test_i19(i19 %a) {
; CHECK-DAG: ld.param.b16 {{%r[0-9]+}}, [test_i23_param_0];
; CHECK-DAG: ld.param.b8 {{%r[0-9]+}}, [test_i23_param_0+2];
; CHECK: .param .b32 param0;
-; CHECK: st.param.b32 [param0], {{%r[0-9]+}};
; CHECK: .param .b32 retval0;
+; CHECK: st.param.b32 [param0], {{%r[0-9]+}};
; CHECK: call.uni (retval0), test_i23,
; CHECK: ld.param.b32 {{%r[0-9]+}}, [retval0];
; CHECK: st.param.b32 [func_retval0], {{%r[0-9]+}};
@@ -565,8 +560,8 @@ define i23 @test_i23(i23 %a) {
; CHECK-DAG: ld.param.b8 {{%r[0-9]+}}, [test_i24_param_0+2];
; CHECK-DAG: ld.param.b16 {{%r[0-9]+}}, [test_i24_param_0];
; CHECK: .param .b32 param0;
-; CHECK: st.param.b32 [param0], {{%r[0-9]+}};
; CHECK: .param .b32 retval0;
+; CHECK: st.param.b32 [param0], {{%r[0-9]+}};
; CHECK: call.uni (retval0), test_i24,
; CHECK: ld.param.b32 {{%r[0-9]+}}, [retval0];
; CHECK: st.param.b32 [func_retval0], {{%r[0-9]+}};
@@ -581,8 +576,8 @@ define i24 @test_i24(i24 %a) {
; CHECK-NEXT: .param .b32 test_i29_param_0
; CHECK: ld.param.b32 {{%r[0-9]+}}, [test_i29_param_0];
; CHECK: .param .b32 param0;
-; CHECK: st.param.b32 [param0], {{%r[0-9]+}};
; CHECK: .param .b32 retval0;
+; CHECK: st.param.b32 [param0], {{%r[0-9]+}};
; CHECK: call.uni (retval0), test_i29,
; CHECK: ld.param.b32 {{%r[0-9]+}}, [retval0];
; CHECK: st.param.b32 [func_retval0], {{%r[0-9]+}};
@@ -597,8 +592,8 @@ define i29 @test_i29(i29 %a) {
; CHECK-NEXT: .param .b32 test_i32_param_0
; CHECK: ld.param.b32 [[E:%r[0-9]+]], [test_i32_param_0];
; CHECK: .param .b32 param0;
-; CHECK: st.param.b32 [param0], [[E]];
; CHECK: .param .b32 retval0;
+; CHECK: st.param.b32 [param0], [[E]];
; CHECK: call.uni (retval0), test_i32,
; CHECK: ld.param.b32 [[R:%r[0-9]+]], [retval0];
; CHECK: st.param.b32 [func_retval0], [[R]];
@@ -613,10 +608,10 @@ define i32 @test_i32(i32 %a) {
; CHECK-NEXT: .param .align 16 .b8 test_v3i32_param_0[16]
; CHECK-DAG: ld.param.b32 [[E2:%r[0-9]+]], [test_v3i32_param_0+8];
; CHECK-DAG: ld.param.v2.b32 {[[E0:%r[0-9]+]], [[E1:%r[0-9]+]]}, [test_v3i32_param_0];
-; CHECK: .param .align 16 .b8 param0[16];
-; CHECK: st.param.v2.b32 [param0], {[[E0]], [[E1]]};
-; CHECK: st.param.b32 [param0+8], [[E2]];
-; CHECK: .param .align 16 .b8 retval0[16];
+; CHECK-DAG: .param .align 16 .b8 param0[16];
+; CHECK-DAG: .param .align 16 .b8 retval0[16];
+; CHECK-DAG: st.param.v2.b32 [param0], {[[E0]], [[E1]]};
+; CHECK-DAG: st.param.b32 [param0+8], [[E2]];
; CHECK: call.uni (retval0), test_v3i32,
; CHECK: ld.param.v2.b32 {[[RE0:%r[0-9]+]], [[RE1:%r[0-9]+]]}, [retval0];
; CHECK: ld.param.b32 [[RE2:%r[0-9]+]], [retval0+8];
@@ -632,9 +627,9 @@ define <3 x i32> @test_v3i32(<3 x i32> %a) {
; CHECK-LABEL: test_v4i32(
; CHECK-NEXT: .param .align 16 .b8 test_v4i32_param_0[16]
; CHECK: ld.param.v4.b32 {[[E0:%r[0-9]+]], [[E1:%r[0-9]+]], [[E2:%r[0-9]+]], [[E3:%r[0-9]+]]}, [test_v4i32_param_0]
-; CHECK: .param .align 16 .b8 param0[16];
-; CHECK: st.param.v4.b32 [param0], {[[E0]], [[E1]], [[E2]], [[E3]]};
-; CHECK: .param .align 16 .b8 retval0[16];
+; CHECK-DAG: .param .align 16 .b8 param0[16];
+; CHECK-DAG: .param .align 16 .b8 retval0[16];
+; CHECK-DAG: st.param.v4.b32 [param0], {[[E0]], [[E1]], [[E2]], [[E3]]};
; CHECK: call.uni (retval0), test_v4i32,
; CHECK: ld.param.v4.b32 {[[RE0:%r[0-9]+]], [[RE1:%r[0-9]+]], [[RE2:%r[0-9]+]], [[RE3:%r[0-9]+]]}, [retval0];
; CHECK: st.param.v4.b32 [func_retval0], {[[RE0]], [[RE1]], [[RE2]], [[RE3]]}
@@ -650,9 +645,9 @@ define <4 x i32> @test_v4i32(<4 x i32> %a) {
; CHECK-DAG: ld.param.b32 [[E4:%r[0-9]+]], [test_v5i32_param_0+16];
; CHECK-DAG: ld.param.v4.b32 {[[E0:%r[0-9]+]], [[E1:%r[0-9]+]], [[E2:%r[0-9]+]], [[E3:%r[0-9]+]]}, [test_v5i32_param_0]
; CHECK: .param .align 32 .b8 param0[32];
+; CHECK: .param .align 32 .b8 retval0[32];
; CHECK-DAG: st.param.v4.b32 [param0], {[[E0]], [[E1]], [[E2]], [[E3]]};
; CHECK-DAG: st.param.b32 [param0+16], [[E4]];
-; CHECK: .param .align 32 .b8 retval0[32];
; CHECK: call.uni (retval0), test_v5i32,
; CHECK-DAG: ld.param.v4.b32 {[[RE0:%r[0-9]+]], [[RE1:%r[0-9]+]], [[RE2:%r[0-9]+]], [[RE3:%r[0-9]+]]}, [retval0];
; CHECK-DAG: ld.param.b32 [[RE4:%r[0-9]+]], [retval0+16];
@@ -669,8 +664,8 @@ define <5 x i32> @test_v5i32(<5 x i32> %a) {
; CHECK-NEXT: .param .b32 test_f32_param_0
; CHECK: ld.param.b32 [[E:%r[0-9]+]], [test_f32_param_0];
; CHECK: .param .b32 param0;
-; CHECK: st.param.b32 [param0], [[E]];
; CHECK: .param .b32 retval0;
+; CHECK: st.param.b32 [param0], [[E]];
; CHECK: call.uni (retval0), test_f32,
; CHECK: ld.param.b32 [[R:%r[0-9]+]], [retval0];
; CHECK: st.param.b32 [func_retval0], [[R]];
@@ -686,8 +681,8 @@ define float @test_f32(float %a) {
; CHECK-DAG: ld.param.b8 {{%rd[0-9]+}}, [test_i40_param_0+4];
; CHECK-DAG: ld.param.b32 {{%rd[0-9]+}}, [test_i40_param_0];
; CHECK: .param .b64 param0;
-; CHECK: st.param.b64 [param0], {{%rd[0-9]+}};
; CHECK: .param .b64 retval0;
+; CHECK: st.param.b64 [param0], {{%rd[0-9]+}};
; CHECK: call.uni (retval0), test_i40,
; CHECK: ld.param.b64 {{%rd[0-9]+}}, [retval0];
; CHECK: st.param.b64 [func_retval0], {{%rd[0-9]+}};
@@ -703,8 +698,8 @@ define i40 @test_i40(i40 %a) {
; CHECK-DAG: ld.param.b16 {{%rd[0-9]+}}, [test_i47_param_0+4];
; CHECK-DAG: ld.param.b32 {{%rd[0-9]+}}, [test_i47_param_0];
; CHECK: .param .b64 param0;
-; CHECK: st.param.b64 [param0], {{%rd[0-9]+}};
; CHECK: .param .b64 retval0;
+; CHECK: st.param.b64 [param0], {{%rd[0-9]+}};
; CHECK: call.uni (retval0), test_i47,
; CHECK: ld.param.b64 {{%rd[0-9]+}}, [retval0];
; CHECK: st.param.b64 [func_retval0], {{%rd[0-9]+}};
@@ -720,8 +715,8 @@ define i47 @test_i47(i47 %a) {
; CHECK-DAG: ld.param.b16 {{%rd[0-9]+}}, [test_i48_param_0+4];
; CHECK-DAG: ld.param.b32 {{%rd[0-9]+}}, [test_i48_param_0];
; CHECK: .param .b64 param0;
-; CHECK: st.param.b64 [param0], {{%rd[0-9]+}};
; CHECK: .param .b64 retval0;
+; CHECK: st.param.b64 [param0], {{%rd[0-9]+}};
; CHECK: call.uni (retval0), test_i48,
; CHECK: ld.param.b64 {{%rd[0-9]+}}, [retval0];
; CHECK: st.param.b64 [func_retval0], {{%rd[0-9]+}};
@@ -738,8 +733,8 @@ define i48 @test_i48(i48 %a) {
; CHECK-DAG: ld.param.b16 {{%rd[0-9]+}}, [test_i51_param_0+4];
; CHECK-DAG: ld.param.b32 {{%rd[0-9]+}}, [test_i51_param_0];
; CHECK: .param .b64 param0;
-; CHECK: st.param.b64 [param0], {{%rd[0-9]+}};
; CHECK: .param .b64 retval0;
+; CHECK: st.param.b64 [param0], {{%rd[0-9]+}};
; CHECK: call.uni (retval0), test_i51,
; CHECK: ld.param.b64 {{%rd[0-9]+}}, [retval0];
; CHECK: st.param.b64 [func_retval0], {{%rd[0-9]+}};
@@ -756,8 +751,8 @@ define i51 @test_i51(i51 %a) {
; CHECK-DAG: ld.param.b16 {{%rd[0-9]+}}, [test_i56_param_0+4];
; CHECK-DAG: ld.param.b32 {{%rd[0-9]+}}, [test_i56_param_0];
; CHECK: .param .b64 param0;
-; CHECK: st.param.b64 [param0], {{%rd[0-9]+}};
; CHECK: .param .b64 retval0;
+; CHECK: st.param.b64 [param0], {{%rd[0-9]+}};
; CHECK: call.uni (retval0), test_i56,
; CHECK: ld.param.b64 {{%rd[0-9]+}}, [retval0];
; CHECK: st.param.b64 [func_retval0], {{%rd[0-9]+}};
@@ -772,8 +767,8 @@ define i56 @test_i56(i56 %a) {
; CHECK-NEXT: .param .b64 test_i57_param_0
; CHECK: ld.param.b64 {{%rd[0-9]+}}, [test_i57_param_0];
; CHECK: .param .b64 param0;
-; CHECK: st.param.b64 [param0], {{%rd[0-9]+}};
; CHECK: .param .b64 retval0;
+; CHECK: st.param.b64 [param0], {{%rd[0-9]+}};
; CHECK: call.uni (retval0), test_i57,
; CHECK: ld.param.b64 {{%rd[0-9]+}}, [retval0];
; CHECK: st.param.b64 [func_retval0], {{%rd[0-9]+}};
@@ -788,8 +783,8 @@ define i57 @test_i57(i57 %a) {
; CHECK-NEXT: .param .b64 test_i64_param_0
; CHECK: ld.param.b64 [[E:%rd[0-9]+]], [test_i64_param_0];
; CHECK: .param .b64 param0;
-; CHECK: st.param.b64 [param0], [[E]];
; CHECK: .param .b64 retval0;
+; CHECK: st.param.b64 [param0], [[E]];
; CHECK: call.uni (retval0), test_i64,
; CHECK: ld.param.b64 [[R:%rd[0-9]+]], [retval0];
; CHECK: st.param.b64 [func_retval0], [[R]];
@@ -805,9 +800,9 @@ define i64 @test_i64(i64 %a) {
; CHECK-DAG: ld.param.b64 [[E2:%rd[0-9]+]], [test_v3i64_param_0+16];
; CHECK-DAG: ld.param.v2.b64 {[[E0:%rd[0-9]+]], [[E1:%rd[0-9]+]]}, [test_v3i64_param_0];
; CHECK: .param .align 32 .b8 param0[32];
-; CHECK: st.param.v2.b64 [param0], {[[E0]], [[E1]]};
-; CHECK: st.param.b64 [param0+16], [[E2]];
; CHECK: .param .align 32 .b8 retval0[32];
+; CHECK-DAG: st.param.v2.b64 [param0], {[[E0]], [[E1]]};
+; CHECK-DAG: st.param.b64 [param0+16], [[E2]];
; CHECK: call.uni (retval0), test_v3i64,
; CHECK: ld.param.v2.b64 {[[RE0:%rd[0-9]+]], [[RE1:%rd[0-9]+]]}, [retval0];
; CHECK: ld.param.b64 [[RE2:%rd[0-9]+]], [retval0+16];
@@ -828,9 +823,9 @@ define <3 x i64> @test_v3i64(<3 x i64> %a) {
; CHECK-DAG: ld.param.v2.b64 {[[E2:%rd[0-9]+]], [[E3:%rd[0-9]+]]}, [test_v4i64_param_0+16];
; CHECK-DAG: ld.param.v2.b64 {[[E0:%rd[0-9]+]], [[E1:%rd[0-9]+]]}, [test_v4i64_param_0];
; CHECK: .param .align 32 .b8 param0[32];
-; CHECK: st.param.v2.b64 [param0], {[[E0]], [[E1]]};
-; CHECK: st.param.v2.b64 [param0+16], {[[E2]], [[E3]]};
; CHECK: .param .align 32 .b8 retval0[32];
+; CHECK-DAG: st.param.v2.b64 [param0], {[[E0]], [[E1]]};
+; CHECK-DAG: st.param.v2.b64 [param0+16], {[[E2]], [[E3]]};
; CHECK: call.uni (retval0), test_v4i64,
; CHECK: ld.param.v2.b64 {[[RE0:%rd[0-9]+]], [[RE1:%rd[0-9]+]]}, [retval0];
; CHECK: ld.param.v2.b64 {[[RE2:%rd[0-9]+]], [[RE3:%rd[0-9]+]]}, [retval0+16];
@@ -849,8 +844,8 @@ define <4 x i64> @test_v4i64(<4 x i64> %a) {
; CHECK-NEXT: .align 1 .b8 test_s_i1_param_0[1]
; CHECK: ld.param.b8 [[A:%rs[0-9]+]], [test_s_i1_param_0];
; CHECK: .param .align 1 .b8 param0[1];
-; CHECK: st.param.b8 [param0], [[A]]
; CHECK: .param .align 1 .b8 retval0[1];
+; CHECK: st.param.b8 [param0], [[A]]
; CHECK: call.uni (retval0), test_s_i1,
; CHECK: ld.param.b8 [[R:%rs[0-9]+]], [retval0];
; CHECK: st.param.b8 [func_retval0], [[R]];
@@ -865,8 +860,8 @@ define %s_i1 @test_s_i1(%s_i1 %a) {
; CHECK-NEXT: .param .align 1 .b8 test_s_i8_param_0[1]
; CHECK: ld.param.b8 [[A:%rs[0-9]+]], [test_s_i8_param_0];
; CHECK: .param .align 1 .b8 param0[1];
-; CHECK: st.param.b8 [param0], [[A]]
; CHECK: .param .align 1 .b8 retval0[1];
+; CHECK: st.param.b8 [param0], [[A]]
; CHECK: call.uni (retval0), test_s_i8,
; CHECK: ld.param.b8 [[R:%rs[0-9]+]], [retval0];
; CHECK: st.param.b8 [func_retval0], [[R]];
@@ -881,8 +876,8 @@ define %s_i8 @test_s_i8(%s_i8 %a) {
; CHECK-NEXT: .param .align 2 .b8 test_s_i16_param_0[2]
; CHECK: ld.param.b16 [[A:%rs[0-9]+]], [test_s_i16_param_0];
; CHECK: .param .align 2 .b8 param0[2];
-; CHECK: st.param.b16 [param0], [[A]]
; CHECK: .param .align 2 .b8 retval0[2];
+; CHECK: st.param.b16 [param0], [[A]]
; CHECK: call.uni (retval0), test_s_i16,
; CHECK: ld.param.b16 [[R:%rs[0-9]+]], [retval0];
; CHECK: st.param.b16 [func_retval0], [[R]];
@@ -897,8 +892,8 @@ define %s_i16 @test_s_i16(%s_i16 %a) {
; CHECK-NEXT: .param .align 2 .b8 test_s_f16_param_0[2]
; CHECK: ld.param.b16 [[A:%rs[0-9]+]], [test_s_f16_param_0];
; CHECK: .param .align 2 .b8 param0[2];
-; CHECK: st.param.b16 [param0], [[A]]
; CHECK: .param .align 2 .b8 retval0[2];
+; CHECK: st.param.b16 [param0], [[A]]
; CHECK: call.uni (retval0), test_s_f16,
; CHECK: ld.param.b16 [[R:%rs[0-9]+]], [retval0];
; CHECK: st.param.b16 [func_retval0], [[R]];
@@ -913,8 +908,8 @@ define %s_f16 @test_s_f16(%s_f16 %a) {
; CHECK-NEXT: .param .align 4 .b8 test_s_i32_param_0[4]
; CHECK: ld.param.b32 [[E:%r[0-9]+]], [test_s_i32_param_0];
; CHECK: .param .align 4 .b8 param0[4]
-; CHECK: st.param.b32 [param0], [[E]];
; CHECK: .param .align 4 .b8 retval0[4];
+; CHECK: st.param.b32 [param0], [[E]];
; CHECK: call.uni (retval0), test_s_i32,
; CHECK: ld.param.b32 [[R:%r[0-9]+]], [retval0];
; CHECK: st.param.b32 [func_retval0], [[R]];
@@ -929,8 +924,8 @@ define %s_i32 @test_s_i32(%s_i32 %a) {
; CHECK-NEXT: .param .align 4 .b8 test_s_f32_param_0[4]
; CHECK: ld.param.b32 [[E:%r[0-9]+]], [test_s_f32_param_0];
; CHECK: .param .align 4 .b8 param0[4]
-; CHECK: st.param.b32 [param0], [[E]];
; CHECK: .param .align 4 .b8 retval0[4];
+; CHECK: st.param.b32 [param0], [[E]];
; CHECK: call.uni (retval0), test_s_f32,
; CHECK: ld.param.b32 [[R:%r[0-9]+]], [retval0];
; CHECK: st.param.b32 [func_retval0], [[R]];
@@ -945,8 +940,8 @@ define %s_f32 @test_s_f32(%s_f32 %a) {
; CHECK-NEXT: .param .align 8 .b8 test_s_i64_param_0[8]
; CHECK: ld.param.b64 [[E:%rd[0-9]+]], [test_s_i64_param_0];
; CHECK: .param .align 8 .b8 param0[8];
-; CHECK: st.param.b64 [param0], [[E]];
; CHECK: .param .align 8 .b8 retval0[8];
+; CHECK: st.param.b64 [param0], [[E]];
; CHECK: call.uni (retval0), test_s_i64,
; CHECK: ld.param.b64 [[R:%rd[0-9]+]], [retval0];
; CHECK: st.param.b64 [func_retval0], [[R]];
@@ -966,12 +961,12 @@ define %s_i64 @test_s_i64(%s_i64 %a) {
; CHECK-DAG: ld.param.b32 [[E1:%r[0-9]+]], [test_s_i32f32_param_0+4];
; CHECK-DAG: ld.param.b32 [[E0:%r[0-9]+]], [test_s_i32f32_param_0];
; CHECK: .param .align 8 .b8 param0[24];
+; CHECK: .param .align 8 .b8 retval0[24];
; CHECK-DAG: st.param.b32 [param0], [[E0]];
; CHECK-DAG: st.param.b32 [param0+4], [[E1]];
; CHECK-DAG: st.param.b32 [param0+8], [[E2]];
; CHECK-DAG: st.param.b32 [param0+12], [[E3]];
; CHECK-DAG: st.param.b64 [param0+16], [[E4]];
-; CHECK: .param .align 8 .b8 retval0[24];
; CHECK: call.uni (retval0), test_s_i32f32,
; CHECK-DAG: ld.param.b32 [[RE0:%r[0-9]+]], [retval0];
; CHECK-DAG: ld.param.b32 [[RE1:%r[0-9]+]], [retval0+4];
@@ -997,10 +992,10 @@ define %s_i32f32 @test_s_i32f32(%s_i32f32 %a) {
; CHECK-DAG: ld.param.v2.b32 {[[E2:%r[0-9]+]], [[E3:%r[0-9]+]]}, [test_s_i32x4_param_0+8];
; CHECK-DAG: ld.param.v2.b32 {[[E0:%r[0-9]+]], [[E1:%r[0-9]+]]}, [test_s_i32x4_param_0];
; CHECK: .param .align 8 .b8 param0[24];
-; CHECK: st.param.v2.b32 [param0], {[[E0]], [[E1]]};
-; CHECK: st.param.v2.b32 [param0+8], {[[E2]], [[E3]]};
-; CHECK: st.param.b64 [param0+16], [[E4]];
; CHECK: .param .align 8 .b8 retval0[24];
+; CHECK-DAG: st.param.v2.b32 [param0], {[[E0]], [[E1]]};
+; CHECK-DAG: st.param.v2.b32 [param0+8], {[[E2]], [[E3]]};
+; CHECK-DAG: st.param.b64 [param0+16], [[E4]];
; CHECK: call.uni (retval0), test_s_i32x4,
; CHECK: ld.param.v2.b32 {[[RE0:%r[0-9]+]], [[RE1:%r[0-9]+]]}, [retval0];
; CHECK: ld.param.v2.b32 {[[RE2:%r[0-9]+]], [[RE3:%r[0-9]+]]}, [retval0+8];
@@ -1024,16 +1019,13 @@ define %s_i32x4 @test_s_i32x4(%s_i32x4 %a) {
; CHECK: ld.param.b8 [[E2:%rs[0-9]+]], [test_s_i1i32x4_param_0+8];
; CHECK: ld.param.v2.b32 {[[E0:%r[0-9]+]], [[E1:%r[0-9]+]]}, [test_s_i1i32x4_param_0];
; CHECK: .param .align 8 .b8 param0[32];
-; CHECK: st.param.v2.b32 [param0], {[[E0]], [[E1]]};
-; CHECK: st.param.b8 [param0+8], [[E2]];
-; CHECK: st.param.b32 [param0+12], [[E3]];
-; CHECK: st.param.b32 [param0+16], [[E4]];
-; CHECK: st.param.b64 [param0+24], [[E5]];
; CHECK: .param .align 8 .b8 retval0[32];
-; CHECK: call.uni (retval0), test_s_i1i32x4,
-; CHECK: (
-; CHECK: param0
-; CHECK: );
+; CHECK-DAG: st.param.v2.b32 [param0], {[[E0]], [[E1]]};
+; CHECK-DAG: st.param.b8 [param0+8], [[E2]];
+; CHECK-DAG: st.param.b32 [param0+12], [[E3]];
+; CHECK-DAG: st.param.b32 [param0+16], [[E4]];
+; CHECK-DAG: st.param.b64 [param0+24], [[E5]];
+; CHECK: call.uni (retval0), test_s_i1i32x4, (param0);
; CHECK: ld.param.v2.b32 {[[RE0:%r[0-9]+]], [[RE1:%r[0-9]+]]}, [retval0];
; CHECK: ld.param.b8 [[RE2:%rs[0-9]+]], [retval0+8];
; CHECK: ld.param.b32 [[RE3:%r[0-9]+]], [retval0+12];
@@ -1082,6 +1074,7 @@ define %s_i8i32x4 @test_s_i1i32x4(%s_i8i32x4 %a) {
; CHECK-DAG: ld.param.b8 %r{{.*}}, [test_s_i1i32x4p_param_0+1];
; CHECK-DAG: ld.param.b8 %r{{.*}}, [test_s_i1i32x4p_param_0];
; CHECK: .param .align 1 .b8 param0[25];
+; CHECK: .param .align 1 .b8 retval0[25];
; CHECK-DAG: st.param.b8 [param0],
; CHECK-DAG: st.param.b8 [param0+1],
; CHECK-DAG: st.param.b8 [param0+2],
@@ -1107,33 +1100,32 @@ define %s_i8i32x4 @test_s_i1i32x4(%s_i8i32x4 %a) {
; CHECK-DAG: st.param.b8 [param0+22],
; CHECK-DAG: st.param.b8 [param0+23],
; CHECK-DAG: st.param.b8 [param0+24],
-; CHECK: .param .align 1 .b8 retval0[25];
-; CHECK: call.uni (retval0), test_s_i1i32x4p,
-; CHECK-DAG: ld.param.b8 %rs{{[0-9]+}}, [retval0];
-; CHECK-DAG: ld.param.b8 %rs{{[0-9]+}}, [retval0+1];
-; CHECK-DAG: ld.param.b8 %rs{{[0-9]+}}, [retval0+2];
-; CHECK-DAG: ld.param.b8 %rs{{[0-9]+}}, [retval0+3];
-; CHECK-DAG: ld.param.b8 %rs{{[0-9]+}}, [retval0+4];
-; CHECK-DAG: ld.param.b8 %rs{{[0-9]+}}, [retval0+5];
-; CHECK-DAG: ld.param.b8 %rs{{[0-9]+}}, [retval0+6];
-; CHECK-DAG: ld.param.b8 %rs{{[0-9]+}}, [retval0+7];
-; CHECK-DAG: ld.param.b8 %rs{{[0-9]+}}, [retval0+8];
-; CHECK-DAG: ld.param.b8 %rs{{[0-9]+}}, [retval0+9];
-; CHECK-DAG: ld.param.b8 %rs{{[0-9]+}}, [retval0+10];
-; CHECK-DAG: ld.param.b8 %rs{{[0-9]+}}, [retval0+11];
-; CHECK-DAG: ld.param.b8 %rs{{[0-9]+}}, [retval0+12];
-; CHECK-DAG: ld.param.b8 %rs{{[0-9]+}}, [retval0+13];
-; CHECK-DAG: ld.param.b8 %rs{{[0-9]+}}, [retval0+14];
-; CHECK-DAG: ld.param.b8 %rs{{[0-9]+}}, [retval0+15];
-; CHECK-DAG: ld.param.b8 %rs{{[0-9]+}}, [retval0+16];
-; CHECK-DAG: ld.param.b8 %rs{{[0-9]+}}, [retval0+17];
-; CHECK-DAG: ld.param.b8 %rs{{[0-9]+}}, [retval0+18];
-; CHECK-DAG: ld.param.b8 %rs{{[0-9]+}}, [retval0+19];
-; CHECK-DAG: ld.param.b8 %rs{{[0-9]+}}, [retval0+20];
-; CHECK-DAG: ld.param.b8 %rs{{[0-9]+}}, [retval0+21];
-; CHECK-DAG: ld.param.b8 %rs{{[0-9]+}}, [retval0+22];
-; CHECK-DAG: ld.param.b8 %rs{{[0-9]+}}, [retval0+23];
-; CHECK-DAG: ld.param.b8 %rs{{[0-9]+}}, [retval0+24];
+; CHECK: call.uni (retval0), test_s_i1i32x4p, (param0);
+; CHECK-DAG: ld.param.b8 %rs{{[0-9]+}}, [retval0+8];
+; CHECK-DAG: ld.param.b8 %r{{[0-9]+}}, [retval0+3];
+; CHECK-DAG: ld.param.b8 %r{{[0-9]+}}, [retval0+2];
+; CHECK-DAG: ld.param.b8 %r{{[0-9]+}}, [retval0+1];
+; CHECK-DAG: ld.param.b8 %r{{[0-9]+}}, [retval0];
+; CHECK-DAG: ld.param.b8 %r{{[0-9]+}}, [retval0+7];
+; CHECK-DAG: ld.param.b8 %r{{[0-9]+}}, [retval0+6];
+; CHECK-DAG: ld.param.b8 %r{{[0-9]+}}, [retval0+5];
+; CHECK-DAG: ld.param.b8 %r{{[0-9]+}}, [retval0+4];
+; CHECK-DAG: ld.param.b8 %r{{[0-9]+}}, [retval0+12];
+; CHECK-DAG: ld.param.b8 %r{{[0-9]+}}, [retval0+11];
+; CHECK-DAG: ld.param.b8 %r{{[0-9]+}}, [retval0+10];
+; CHECK-DAG: ld.param.b8 %r{{[0-9]+}}, [retval0+9];
+; CHECK-DAG: ld.param.b8 %r{{[0-9]+}}, [retval0+16];
+; CHECK-DAG: ld.param.b8 %r{{[0-9]+}}, [retval0+15];
+; CHECK-DAG: ld.param.b8 %r{{[0-9]+}}, [retval0+14];
+; CHECK-DAG: ld.param.b8 %r{{[0-9]+}}, [retval0+13];
+; CHECK-DAG: ld.param.b8 %rd{{[0-9]+}}, [retval0+24];
+; CHECK-DAG: ld.param.b8 %rd{{[0-9]+}}, [retval0+23];
+; CHECK-DAG: ld.param.b8 %rd{{[0-9]+}}, [retval0+22];
+; CHECK-DAG: ld.param.b8 %rd{{[0-9]+}}, [retval0+21];
+; CHECK-DAG: ld.param.b8 %rd{{[0-9]+}}, [retval0+20];
+; CHECK-DAG: ld.param.b8 %rd{{[0-9]+}}, [retval0+19];
+; CHECK-DAG: ld.param.b8 %rd{{[0-9]+}}, [retval0+18];
+; CHECK-DAG: ld.param.b8 %rd{{[0-9]+}}, [retval0+17];
; CHECK: } // callseq
; CHECK-DAG: st.param.b8 [func_retval0],
; CHECK-DAG: st.param.b8 [func_retval0+1],
@@ -1177,13 +1169,13 @@ define %s_i8i32x4p @test_s_i1i32x4p(%s_i8i32x4p %a) {
; CHECK: ld.param.b32 [[E2:%r[0-9]+]], [test_s_crossfield_param_0+8];
; CHECK: ld.param.v2.b32 {[[E0:%r[0-9]+]], [[E1:%r[0-9]+]]}, [test_s_crossfield_param_0];
; CHECK: .param .align 16 .b8 param0[80];
-; CHECK: st.param.v2.b32 [param0], {[[E0]], [[E1]]};
-; CHECK: st.param.b32 [param0+8], [[E2]];
-; CHECK: st.param.v4.b32 [param0+16], {[[E3]], [[E4]], [[E5]], [[E6]]};
-; CHECK: st.param.v4.b32 [param0+32], {[[E7]], [[E8]], [[E9]], [[E10]]};
-; CHECK: st.param.v4.b32 [param0+48], {[[E11]], [[E12]], [[E13]], [[E14]]};
-; CHECK: st.param.b32 [param0+64], [[E15]];
; CHECK: .param .align 16 .b8 retval0[80];
+; CHECK-DAG: st.param.v2.b32 [param0], {[[E0]], [[E1]]};
+; CHECK-DAG: st.param.b32 [param0+8], [[E2]];
+; CHECK-DAG: st.param.v4.b32 [param0+16], {[[E3]], [[E4]], [[E5]], [[E6]]};
+; CHECK-DAG: st.param.v4.b32 [param0+32], {[[E7]], [[E8]], [[E9]], [[E10]]};
+; CHECK-DAG: st.param.v4.b32 [param0+48], {[[E11]], [[E12]], [[E13]], [[E14]]};
+; CHECK-DAG: st.param.b32 [param0+64], [[E15]];
; CHECK: call.uni (retval0), test_s_crossfield,
; CHECK: ld.param.v2.b32 {[[RE0:%r[0-9]+]], [[RE1:%r[0-9]+]]}, [retval0];
; CHECK: ld.param.b32 [[RE2:%r[0-9]+]], [retval0+8];
diff --git a/llvm/test/CodeGen/NVPTX/param-overalign.ll b/llvm/test/CodeGen/NVPTX/param-overalign.ll
index 88ad0b0..2155fb4 100644
--- a/llvm/test/CodeGen/NVPTX/param-overalign.ll
+++ b/llvm/test/CodeGen/NVPTX/param-overalign.ll
@@ -28,8 +28,8 @@ define float @caller_md(float %a, float %b) {
; CHECK-NEXT: ld.param.b32 %r2, [caller_md_param_1];
; CHECK-NEXT: { // callseq 0, 0
; CHECK-NEXT: .param .align 8 .b8 param0[8];
-; CHECK-NEXT: st.param.v2.b32 [param0], {%r1, %r2};
; CHECK-NEXT: .param .b32 retval0;
+; CHECK-NEXT: st.param.v2.b32 [param0], {%r1, %r2};
; CHECK-NEXT: call.uni (retval0), callee_md, (param0);
; CHECK-NEXT: ld.param.b32 %r3, [retval0];
; CHECK-NEXT: } // callseq 0
@@ -69,8 +69,8 @@ define float @caller(float %a, float %b) {
; CHECK-NEXT: ld.param.b32 %r2, [caller_param_1];
; CHECK-NEXT: { // callseq 1, 0
; CHECK-NEXT: .param .align 8 .b8 param0[8];
-; CHECK-NEXT: st.param.v2.b32 [param0], {%r1, %r2};
; CHECK-NEXT: .param .b32 retval0;
+; CHECK-NEXT: st.param.v2.b32 [param0], {%r1, %r2};
; CHECK-NEXT: call.uni (retval0), callee, (param0);
; CHECK-NEXT: ld.param.b32 %r3, [retval0];
; CHECK-NEXT: } // callseq 1
diff --git a/llvm/test/CodeGen/NVPTX/param-vectorize-device.ll b/llvm/test/CodeGen/NVPTX/param-vectorize-device.ll
index a480984a..a592b82 100644
--- a/llvm/test/CodeGen/NVPTX/param-vectorize-device.ll
+++ b/llvm/test/CodeGen/NVPTX/param-vectorize-device.ll
@@ -84,8 +84,8 @@ define dso_local void @caller_St4x1(ptr nocapture noundef readonly byval(%struct
; CHECK: .param .b64 caller_St4x1_param_1
; CHECK: )
; CHECK: .param .b32 param0;
- ; CHECK: st.param.b32 [param0], {{%r[0-9]+}};
; CHECK: .param .align 16 .b8 retval0[4];
+ ; CHECK: st.param.b32 [param0], {{%r[0-9]+}};
; CHECK: call.uni (retval0), callee_St4x1, (param0);
; CHECK: ld.param.b32 {{%r[0-9]+}}, [retval0];
%1 = load i32, ptr %in, align 4
@@ -112,8 +112,8 @@ define dso_local void @caller_St4x2(ptr nocapture noundef readonly byval(%struct
; CHECK: .param .b64 caller_St4x2_param_1
; CHECK: )
; CHECK: .param .align 16 .b8 param0[8];
- ; CHECK: st.param.v2.b32 [param0], {{{%r[0-9]+}}, {{%r[0-9]+}}};
; CHECK: .param .align 16 .b8 retval0[8];
+ ; CHECK: st.param.v2.b32 [param0], {{{%r[0-9]+}}, {{%r[0-9]+}}};
; CHECK: call.uni (retval0), callee_St4x2, (param0);
; CHECK: ld.param.v2.b32 {{{%r[0-9]+}}, {{%r[0-9]+}}}, [retval0];
%agg.tmp = alloca %struct.St4x2, align 8
@@ -149,9 +149,9 @@ define dso_local void @caller_St4x3(ptr nocapture noundef readonly byval(%struct
; CHECK: .param .b64 caller_St4x3_param_1
; CHECK: )
; CHECK: .param .align 16 .b8 param0[12];
+ ; CHECK: .param .align 16 .b8 retval0[12];
; CHECK: st.param.v2.b32 [param0], {{{%r[0-9]+}}, {{%r[0-9]+}}};
; CHECK: st.param.b32 [param0+8], {{%r[0-9]+}};
- ; CHECK: .param .align 16 .b8 retval0[12];
; CHECK: call.uni (retval0), callee_St4x3, (param0);
; CHECK: ld.param.v2.b32 {{{%r[0-9]+}}, {{%r[0-9]+}}}, [retval0];
; CHECK: ld.param.b32 {{%r[0-9]+}}, [retval0+8];
@@ -193,8 +193,8 @@ define dso_local void @caller_St4x4(ptr nocapture noundef readonly byval(%struct
; CHECK: .param .b64 caller_St4x4_param_1
; CHECK: )
; CHECK: .param .align 16 .b8 param0[16];
- ; CHECK: st.param.v4.b32 [param0], {{{%r[0-9]+}}, {{%r[0-9]+}}, {{%r[0-9]+}}, {{%r[0-9]+}}};
; CHECK: .param .align 16 .b8 retval0[16];
+ ; CHECK: st.param.v4.b32 [param0], {{{%r[0-9]+}}, {{%r[0-9]+}}, {{%r[0-9]+}}, {{%r[0-9]+}}};
; CHECK: call.uni (retval0), callee_St4x4, (param0);
; CHECK: ld.param.v4.b32 {{{%r[0-9]+}}, {{%r[0-9]+}}, {{%r[0-9]+}}, {{%r[0-9]+}}}, [retval0];
%call = tail call fastcc [4 x i32] @callee_St4x4(ptr noundef nonnull byval(%struct.St4x4) align 4 %in) #2
@@ -239,9 +239,9 @@ define dso_local void @caller_St4x5(ptr nocapture noundef readonly byval(%struct
; CHECK: .param .b64 caller_St4x5_param_1
; CHECK: )
; CHECK: .param .align 16 .b8 param0[20];
+ ; CHECK: .param .align 16 .b8 retval0[20];
; CHECK: st.param.v4.b32 [param0], {{{%r[0-9]+}}, {{%r[0-9]+}}, {{%r[0-9]+}}, {{%r[0-9]+}}};
; CHECK: st.param.b32 [param0+16], {{%r[0-9]+}};
- ; CHECK: .param .align 16 .b8 retval0[20];
; CHECK: call.uni (retval0), callee_St4x5, (param0);
; CHECK: ld.param.v4.b32 {{{%r[0-9]+}}, {{%r[0-9]+}}, {{%r[0-9]+}}, {{%r[0-9]+}}}, [retval0];
; CHECK: ld.param.b32 {{%r[0-9]+}}, [retval0+16];
@@ -295,9 +295,9 @@ define dso_local void @caller_St4x6(ptr nocapture noundef readonly byval(%struct
; CHECK: .param .b64 caller_St4x6_param_1
; CHECK: )
; CHECK: .param .align 16 .b8 param0[24];
+ ; CHECK: .param .align 16 .b8 retval0[24];
; CHECK: st.param.v4.b32 [param0], {{{%r[0-9]+}}, {{%r[0-9]+}}, {{%r[0-9]+}}, {{%r[0-9]+}}};
; CHECK: st.param.v2.b32 [param0+16], {{{%r[0-9]+}}, {{%r[0-9]+}}};
- ; CHECK: .param .align 16 .b8 retval0[24];
; CHECK: call.uni (retval0), callee_St4x6, (param0);
; CHECK: ld.param.v4.b32 {{{%r[0-9]+}}, {{%r[0-9]+}}, {{%r[0-9]+}}, {{%r[0-9]+}}}, [retval0];
; CHECK: ld.param.v2.b32 {{{%r[0-9]+}}, {{%r[0-9]+}}}, [retval0+16];
@@ -357,10 +357,10 @@ define dso_local void @caller_St4x7(ptr nocapture noundef readonly byval(%struct
; CHECK: .param .b64 caller_St4x7_param_1
; CHECK: )
; CHECK: .param .align 16 .b8 param0[28];
+ ; CHECK: .param .align 16 .b8 retval0[28];
; CHECK: st.param.v4.b32 [param0], {{{%r[0-9]+}}, {{%r[0-9]+}}, {{%r[0-9]+}}, {{%r[0-9]+}}};
; CHECK: st.param.v2.b32 [param0+16], {{{%r[0-9]+}}, {{%r[0-9]+}}};
; CHECK: st.param.b32 [param0+24], {{%r[0-9]+}};
- ; CHECK: .param .align 16 .b8 retval0[28];
; CHECK: call.uni (retval0), callee_St4x7, (param0);
; CHECK: ld.param.v4.b32 {{{%r[0-9]+}}, {{%r[0-9]+}}, {{%r[0-9]+}}, {{%r[0-9]+}}}, [retval0];
; CHECK: ld.param.v2.b32 {{{%r[0-9]+}}, {{%r[0-9]+}}}, [retval0+16];
@@ -429,9 +429,9 @@ define dso_local void @caller_St4x8(ptr nocapture noundef readonly byval(%struct
; CHECK: .param .b64 caller_St4x8_param_1
; CHECK: )
; CHECK: .param .align 16 .b8 param0[32];
- ; CHECK: st.param.v4.b32 [param0], {{{%r[0-9]+}}, {{%r[0-9]+}}, {{%r[0-9]+}}, {{%r[0-9]+}}};
- ; CHECK: st.param.v4.b32 [param0+16], {{{%r[0-9]+}}, {{%r[0-9]+}}, {{%r[0-9]+}}, {{%r[0-9]+}}};
; CHECK: .param .align 16 .b8 retval0[32];
+ ; CHECK-DAG: st.param.v4.b32 [param0], {{{%r[0-9]+}}, {{%r[0-9]+}}, {{%r[0-9]+}}, {{%r[0-9]+}}};
+ ; CHECK-DAG: st.param.v4.b32 [param0+16], {{{%r[0-9]+}}, {{%r[0-9]+}}, {{%r[0-9]+}}, {{%r[0-9]+}}};
; CHECK: call.uni (retval0), callee_St4x8, (param0);
; CHECK: ld.param.v4.b32 {{{%r[0-9]+}}, {{%r[0-9]+}}, {{%r[0-9]+}}, {{%r[0-9]+}}}, [retval0];
; CHECK: ld.param.v4.b32 {{{%r[0-9]+}}, {{%r[0-9]+}}, {{%r[0-9]+}}, {{%r[0-9]+}}}, [retval0+16];
@@ -503,8 +503,8 @@ define dso_local void @caller_St8x1(ptr nocapture noundef readonly byval(%struct
; CHECK: .param .b64 caller_St8x1_param_1
; CHECK: )
; CHECK: .param .b64 param0;
- ; CHECK: st.param.b64 [param0], {{%rd[0-9]+}};
; CHECK: .param .align 16 .b8 retval0[8];
+ ; CHECK: st.param.b64 [param0], {{%rd[0-9]+}};
; CHECK: call.uni (retval0), callee_St8x1, (param0);
; CHECK: ld.param.b64 {{%rd[0-9]+}}, [retval0];
%1 = load i64, ptr %in, align 8
@@ -531,8 +531,8 @@ define dso_local void @caller_St8x2(ptr nocapture noundef readonly byval(%struct
; CHECK: .param .b64 caller_St8x2_param_1
; CHECK: )
; CHECK: .param .align 16 .b8 param0[16];
- ; CHECK: st.param.v2.b64 [param0], {{{%rd[0-9]+}}, {{%rd[0-9]+}}};
; CHECK: .param .align 16 .b8 retval0[16];
+ ; CHECK: st.param.v2.b64 [param0], {{{%rd[0-9]+}}, {{%rd[0-9]+}}};
; CHECK: call.uni (retval0), callee_St8x2, (param0);
; CHECK: ld.param.v2.b64 {{{%rd[0-9]+}}, {{%rd[0-9]+}}}, [retval0];
%call = tail call fastcc [2 x i64] @callee_St8x2(ptr noundef nonnull byval(%struct.St8x2) align 8 %in) #2
@@ -565,9 +565,9 @@ define dso_local void @caller_St8x3(ptr nocapture noundef readonly byval(%struct
; CHECK: .param .b64 caller_St8x3_param_1
; CHECK: )
; CHECK: .param .align 16 .b8 param0[24];
+ ; CHECK: .param .align 16 .b8 retval0[24];
; CHECK: st.param.v2.b64 [param0], {{{%rd[0-9]+}}, {{%rd[0-9]+}}};
; CHECK: st.param.b64 [param0+16], {{%rd[0-9]+}};
- ; CHECK: .param .align 16 .b8 retval0[24];
; CHECK: call.uni (retval0), callee_St8x3, (param0);
; CHECK: ld.param.v2.b64 {{{%rd[0-9]+}}, {{%rd[0-9]+}}}, [retval0];
; CHECK: ld.param.b64 {{%rd[0-9]+}}, [retval0+16];
@@ -609,9 +609,9 @@ define dso_local void @caller_St8x4(ptr nocapture noundef readonly byval(%struct
; CHECK: .param .b64 caller_St8x4_param_1
; CHECK: )
; CHECK: .param .align 16 .b8 param0[32];
- ; CHECK: st.param.v2.b64 [param0], {{{%rd[0-9]+}}, {{%rd[0-9]+}}};
- ; CHECK: st.param.v2.b64 [param0+16], {{{%rd[0-9]+}}, {{%rd[0-9]+}}};
; CHECK: .param .align 16 .b8 retval0[32];
+ ; CHECK-DAG: st.param.v2.b64 [param0], {{{%rd[0-9]+}}, {{%rd[0-9]+}}};
+ ; CHECK-DAG: st.param.v2.b64 [param0+16], {{{%rd[0-9]+}}, {{%rd[0-9]+}}};
; CHECK: call.uni (retval0), callee_St8x4, (param0);
; CHECK: ld.param.v2.b64 {{{%rd[0-9]+}}, {{%rd[0-9]+}}}, [retval0];
; CHECK: ld.param.v2.b64 {{{%rd[0-9]+}}, {{%rd[0-9]+}}}, [retval0+16];
diff --git a/llvm/test/CodeGen/NVPTX/proxy-reg-erasure.mir b/llvm/test/CodeGen/NVPTX/proxy-reg-erasure.mir
index 5d0d6f6..4a53152 100644
--- a/llvm/test/CodeGen/NVPTX/proxy-reg-erasure.mir
+++ b/llvm/test/CodeGen/NVPTX/proxy-reg-erasure.mir
@@ -77,7 +77,7 @@ constants: []
machineFunctionInfo: {}
body: |
bb.0:
- %0:b32, %1:b32, %2:b32, %3:b32 = LoadParamMemV4I32 0
+ %0:b32, %1:b32, %2:b32, %3:b32 = LDV_i32_v4 0, 0, 101, 3, 32, &retval0, 0 :: (load (s128), addrspace 101)
; CHECK-NOT: ProxyReg
%4:b32 = ProxyRegB32 killed %0
%5:b32 = ProxyRegB32 killed %1
@@ -86,7 +86,7 @@ body: |
; CHECK: STV_i32_v4 killed %0, killed %1, killed %2, killed %3
STV_i32_v4 killed %4, killed %5, killed %6, killed %7, 0, 0, 101, 32, &func_retval0, 0 :: (store (s128), addrspace 101)
- %8:b32 = LoadParamMemI32 0
+ %8:b32 = LD_i32 0, 0, 101, 3, 32, &retval0, 0 :: (load (s32), addrspace 101)
; CHECK-NOT: ProxyReg
%9:b32 = ProxyRegB32 killed %8
%10:b32 = ProxyRegB32 killed %9
diff --git a/llvm/test/CodeGen/NVPTX/st-param-imm.ll b/llvm/test/CodeGen/NVPTX/st-param-imm.ll
index 6aa1119..f90435a 100644
--- a/llvm/test/CodeGen/NVPTX/st-param-imm.ll
+++ b/llvm/test/CodeGen/NVPTX/st-param-imm.ll
@@ -26,8 +26,8 @@ define void @st_param_i8_i16() {
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: { // callseq 0, 0
; CHECK-NEXT: .param .align 2 .b8 param0[4];
-; CHECK-NEXT: st.param.b8 [param0], 1;
; CHECK-NEXT: st.param.b16 [param0+2], 2;
+; CHECK-NEXT: st.param.b8 [param0], 1;
; CHECK-NEXT: call.uni call_i8_i16, (param0);
; CHECK-NEXT: } // callseq 0
; CHECK-NEXT: ret;
@@ -75,7 +75,7 @@ define void @st_param_f32() {
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: { // callseq 3, 0
; CHECK-NEXT: .param .b32 param0;
-; CHECK-NEXT: st.param.b32 [param0], 0f40A00000;
+; CHECK-NEXT: st.param.b32 [param0], 1084227584;
; CHECK-NEXT: call.uni call_f32, (param0);
; CHECK-NEXT: } // callseq 3
; CHECK-NEXT: ret;
@@ -91,7 +91,7 @@ define void @st_param_f64() {
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: { // callseq 4, 0
; CHECK-NEXT: .param .b64 param0;
-; CHECK-NEXT: st.param.b64 [param0], 0d4018000000000000;
+; CHECK-NEXT: st.param.b64 [param0], 4618441417868443648;
; CHECK-NEXT: call.uni call_f64, (param0);
; CHECK-NEXT: } // callseq 4
; CHECK-NEXT: ret;
@@ -165,7 +165,7 @@ define void @st_param_v2_i16_ii() {
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: { // callseq 8, 0
; CHECK-NEXT: .param .align 4 .b8 param0[4];
-; CHECK-NEXT: st.param.v2.b16 [param0], {1, 2};
+; CHECK-NEXT: st.param.b32 [param0], 131073;
; CHECK-NEXT: call.uni call_v2_i16, (param0);
; CHECK-NEXT: } // callseq 8
; CHECK-NEXT: ret;
@@ -432,7 +432,7 @@ define void @st_param_v4_i8_iiii() {
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: { // callseq 23, 0
; CHECK-NEXT: .param .align 4 .b8 param0[4];
-; CHECK-NEXT: st.param.v4.b8 [param0], {1, 2, 3, 4};
+; CHECK-NEXT: st.param.b32 [param0], 67305985;
; CHECK-NEXT: call.uni call_v4_i8, (param0);
; CHECK-NEXT: } // callseq 23
; CHECK-NEXT: ret;
@@ -442,15 +442,18 @@ define void @st_param_v4_i8_iiii() {
define void @st_param_v4_i8_irrr(i8 %b, i8 %c, i8 %d) {
; CHECK-LABEL: st_param_v4_i8_irrr(
; CHECK: {
-; CHECK-NEXT: .reg .b16 %rs<4>;
+; CHECK-NEXT: .reg .b32 %r<7>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
-; CHECK-NEXT: ld.param.b8 %rs1, [st_param_v4_i8_irrr_param_2];
-; CHECK-NEXT: ld.param.b8 %rs2, [st_param_v4_i8_irrr_param_1];
-; CHECK-NEXT: ld.param.b8 %rs3, [st_param_v4_i8_irrr_param_0];
+; CHECK-NEXT: ld.param.b8 %r1, [st_param_v4_i8_irrr_param_2];
+; CHECK-NEXT: ld.param.b8 %r2, [st_param_v4_i8_irrr_param_1];
+; CHECK-NEXT: prmt.b32 %r3, %r2, %r1, 0x3340U;
+; CHECK-NEXT: ld.param.b8 %r4, [st_param_v4_i8_irrr_param_0];
+; CHECK-NEXT: prmt.b32 %r5, 1, %r4, 0x3340U;
+; CHECK-NEXT: prmt.b32 %r6, %r5, %r3, 0x5410U;
; CHECK-NEXT: { // callseq 24, 0
; CHECK-NEXT: .param .align 4 .b8 param0[4];
-; CHECK-NEXT: st.param.v4.b8 [param0], {1, %rs3, %rs2, %rs1};
+; CHECK-NEXT: st.param.b32 [param0], %r6;
; CHECK-NEXT: call.uni call_v4_i8, (param0);
; CHECK-NEXT: } // callseq 24
; CHECK-NEXT: ret;
@@ -464,15 +467,18 @@ define void @st_param_v4_i8_irrr(i8 %b, i8 %c, i8 %d) {
define void @st_param_v4_i8_rirr(i8 %a, i8 %c, i8 %d) {
; CHECK-LABEL: st_param_v4_i8_rirr(
; CHECK: {
-; CHECK-NEXT: .reg .b16 %rs<4>;
+; CHECK-NEXT: .reg .b32 %r<7>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
-; CHECK-NEXT: ld.param.b8 %rs1, [st_param_v4_i8_rirr_param_2];
-; CHECK-NEXT: ld.param.b8 %rs2, [st_param_v4_i8_rirr_param_1];
-; CHECK-NEXT: ld.param.b8 %rs3, [st_param_v4_i8_rirr_param_0];
+; CHECK-NEXT: ld.param.b8 %r1, [st_param_v4_i8_rirr_param_2];
+; CHECK-NEXT: ld.param.b8 %r2, [st_param_v4_i8_rirr_param_1];
+; CHECK-NEXT: prmt.b32 %r3, %r2, %r1, 0x3340U;
+; CHECK-NEXT: ld.param.b8 %r4, [st_param_v4_i8_rirr_param_0];
+; CHECK-NEXT: prmt.b32 %r5, %r4, 2, 0x3340U;
+; CHECK-NEXT: prmt.b32 %r6, %r5, %r3, 0x5410U;
; CHECK-NEXT: { // callseq 25, 0
; CHECK-NEXT: .param .align 4 .b8 param0[4];
-; CHECK-NEXT: st.param.v4.b8 [param0], {%rs3, 2, %rs2, %rs1};
+; CHECK-NEXT: st.param.b32 [param0], %r6;
; CHECK-NEXT: call.uni call_v4_i8, (param0);
; CHECK-NEXT: } // callseq 25
; CHECK-NEXT: ret;
@@ -486,15 +492,18 @@ define void @st_param_v4_i8_rirr(i8 %a, i8 %c, i8 %d) {
define void @st_param_v4_i8_rrir(i8 %a, i8 %b, i8 %d) {
; CHECK-LABEL: st_param_v4_i8_rrir(
; CHECK: {
-; CHECK-NEXT: .reg .b16 %rs<4>;
+; CHECK-NEXT: .reg .b32 %r<7>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
-; CHECK-NEXT: ld.param.b8 %rs1, [st_param_v4_i8_rrir_param_2];
-; CHECK-NEXT: ld.param.b8 %rs2, [st_param_v4_i8_rrir_param_1];
-; CHECK-NEXT: ld.param.b8 %rs3, [st_param_v4_i8_rrir_param_0];
+; CHECK-NEXT: ld.param.b8 %r1, [st_param_v4_i8_rrir_param_1];
+; CHECK-NEXT: ld.param.b8 %r2, [st_param_v4_i8_rrir_param_0];
+; CHECK-NEXT: prmt.b32 %r3, %r2, %r1, 0x3340U;
+; CHECK-NEXT: ld.param.b8 %r4, [st_param_v4_i8_rrir_param_2];
+; CHECK-NEXT: prmt.b32 %r5, 3, %r4, 0x3340U;
+; CHECK-NEXT: prmt.b32 %r6, %r3, %r5, 0x5410U;
; CHECK-NEXT: { // callseq 26, 0
; CHECK-NEXT: .param .align 4 .b8 param0[4];
-; CHECK-NEXT: st.param.v4.b8 [param0], {%rs3, %rs2, 3, %rs1};
+; CHECK-NEXT: st.param.b32 [param0], %r6;
; CHECK-NEXT: call.uni call_v4_i8, (param0);
; CHECK-NEXT: } // callseq 26
; CHECK-NEXT: ret;
@@ -508,15 +517,18 @@ define void @st_param_v4_i8_rrir(i8 %a, i8 %b, i8 %d) {
define void @st_param_v4_i8_rrri(i8 %a, i8 %b, i8 %c) {
; CHECK-LABEL: st_param_v4_i8_rrri(
; CHECK: {
-; CHECK-NEXT: .reg .b16 %rs<4>;
+; CHECK-NEXT: .reg .b32 %r<7>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
-; CHECK-NEXT: ld.param.b8 %rs1, [st_param_v4_i8_rrri_param_2];
-; CHECK-NEXT: ld.param.b8 %rs2, [st_param_v4_i8_rrri_param_1];
-; CHECK-NEXT: ld.param.b8 %rs3, [st_param_v4_i8_rrri_param_0];
+; CHECK-NEXT: ld.param.b8 %r1, [st_param_v4_i8_rrri_param_1];
+; CHECK-NEXT: ld.param.b8 %r2, [st_param_v4_i8_rrri_param_0];
+; CHECK-NEXT: prmt.b32 %r3, %r2, %r1, 0x3340U;
+; CHECK-NEXT: ld.param.b8 %r4, [st_param_v4_i8_rrri_param_2];
+; CHECK-NEXT: prmt.b32 %r5, %r4, 4, 0x3340U;
+; CHECK-NEXT: prmt.b32 %r6, %r3, %r5, 0x5410U;
; CHECK-NEXT: { // callseq 27, 0
; CHECK-NEXT: .param .align 4 .b8 param0[4];
-; CHECK-NEXT: st.param.v4.b8 [param0], {%rs3, %rs2, %rs1, 4};
+; CHECK-NEXT: st.param.b32 [param0], %r6;
; CHECK-NEXT: call.uni call_v4_i8, (param0);
; CHECK-NEXT: } // callseq 27
; CHECK-NEXT: ret;
@@ -530,14 +542,16 @@ define void @st_param_v4_i8_rrri(i8 %a, i8 %b, i8 %c) {
define void @st_param_v4_i8_iirr(i8 %c, i8 %d) {
; CHECK-LABEL: st_param_v4_i8_iirr(
; CHECK: {
-; CHECK-NEXT: .reg .b16 %rs<3>;
+; CHECK-NEXT: .reg .b32 %r<5>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
-; CHECK-NEXT: ld.param.b8 %rs1, [st_param_v4_i8_iirr_param_1];
-; CHECK-NEXT: ld.param.b8 %rs2, [st_param_v4_i8_iirr_param_0];
+; CHECK-NEXT: ld.param.b8 %r1, [st_param_v4_i8_iirr_param_1];
+; CHECK-NEXT: ld.param.b8 %r2, [st_param_v4_i8_iirr_param_0];
+; CHECK-NEXT: prmt.b32 %r3, %r2, %r1, 0x3340U;
+; CHECK-NEXT: prmt.b32 %r4, 513, %r3, 0x5410U;
; CHECK-NEXT: { // callseq 28, 0
; CHECK-NEXT: .param .align 4 .b8 param0[4];
-; CHECK-NEXT: st.param.v4.b8 [param0], {1, 2, %rs2, %rs1};
+; CHECK-NEXT: st.param.b32 [param0], %r4;
; CHECK-NEXT: call.uni call_v4_i8, (param0);
; CHECK-NEXT: } // callseq 28
; CHECK-NEXT: ret;
@@ -551,14 +565,17 @@ define void @st_param_v4_i8_iirr(i8 %c, i8 %d) {
define void @st_param_v4_i8_irir(i8 %b, i8 %d) {
; CHECK-LABEL: st_param_v4_i8_irir(
; CHECK: {
-; CHECK-NEXT: .reg .b16 %rs<3>;
+; CHECK-NEXT: .reg .b32 %r<6>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
-; CHECK-NEXT: ld.param.b8 %rs1, [st_param_v4_i8_irir_param_1];
-; CHECK-NEXT: ld.param.b8 %rs2, [st_param_v4_i8_irir_param_0];
+; CHECK-NEXT: ld.param.b8 %r1, [st_param_v4_i8_irir_param_1];
+; CHECK-NEXT: prmt.b32 %r2, 3, %r1, 0x3340U;
+; CHECK-NEXT: ld.param.b8 %r3, [st_param_v4_i8_irir_param_0];
+; CHECK-NEXT: prmt.b32 %r4, 1, %r3, 0x3340U;
+; CHECK-NEXT: prmt.b32 %r5, %r4, %r2, 0x5410U;
; CHECK-NEXT: { // callseq 29, 0
; CHECK-NEXT: .param .align 4 .b8 param0[4];
-; CHECK-NEXT: st.param.v4.b8 [param0], {1, %rs2, 3, %rs1};
+; CHECK-NEXT: st.param.b32 [param0], %r5;
; CHECK-NEXT: call.uni call_v4_i8, (param0);
; CHECK-NEXT: } // callseq 29
; CHECK-NEXT: ret;
@@ -572,14 +589,17 @@ define void @st_param_v4_i8_irir(i8 %b, i8 %d) {
define void @st_param_v4_i8_irri(i8 %b, i8 %c) {
; CHECK-LABEL: st_param_v4_i8_irri(
; CHECK: {
-; CHECK-NEXT: .reg .b16 %rs<3>;
+; CHECK-NEXT: .reg .b32 %r<6>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
-; CHECK-NEXT: ld.param.b8 %rs1, [st_param_v4_i8_irri_param_1];
-; CHECK-NEXT: ld.param.b8 %rs2, [st_param_v4_i8_irri_param_0];
+; CHECK-NEXT: ld.param.b8 %r1, [st_param_v4_i8_irri_param_1];
+; CHECK-NEXT: prmt.b32 %r2, %r1, 4, 0x3340U;
+; CHECK-NEXT: ld.param.b8 %r3, [st_param_v4_i8_irri_param_0];
+; CHECK-NEXT: prmt.b32 %r4, 1, %r3, 0x3340U;
+; CHECK-NEXT: prmt.b32 %r5, %r4, %r2, 0x5410U;
; CHECK-NEXT: { // callseq 30, 0
; CHECK-NEXT: .param .align 4 .b8 param0[4];
-; CHECK-NEXT: st.param.v4.b8 [param0], {1, %rs2, %rs1, 4};
+; CHECK-NEXT: st.param.b32 [param0], %r5;
; CHECK-NEXT: call.uni call_v4_i8, (param0);
; CHECK-NEXT: } // callseq 30
; CHECK-NEXT: ret;
@@ -593,14 +613,17 @@ define void @st_param_v4_i8_irri(i8 %b, i8 %c) {
define void @st_param_v4_i8_riir(i8 %a, i8 %d) {
; CHECK-LABEL: st_param_v4_i8_riir(
; CHECK: {
-; CHECK-NEXT: .reg .b16 %rs<3>;
+; CHECK-NEXT: .reg .b32 %r<6>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
-; CHECK-NEXT: ld.param.b8 %rs1, [st_param_v4_i8_riir_param_1];
-; CHECK-NEXT: ld.param.b8 %rs2, [st_param_v4_i8_riir_param_0];
+; CHECK-NEXT: ld.param.b8 %r1, [st_param_v4_i8_riir_param_1];
+; CHECK-NEXT: prmt.b32 %r2, 3, %r1, 0x3340U;
+; CHECK-NEXT: ld.param.b8 %r3, [st_param_v4_i8_riir_param_0];
+; CHECK-NEXT: prmt.b32 %r4, %r3, 2, 0x3340U;
+; CHECK-NEXT: prmt.b32 %r5, %r4, %r2, 0x5410U;
; CHECK-NEXT: { // callseq 31, 0
; CHECK-NEXT: .param .align 4 .b8 param0[4];
-; CHECK-NEXT: st.param.v4.b8 [param0], {%rs2, 2, 3, %rs1};
+; CHECK-NEXT: st.param.b32 [param0], %r5;
; CHECK-NEXT: call.uni call_v4_i8, (param0);
; CHECK-NEXT: } // callseq 31
; CHECK-NEXT: ret;
@@ -614,14 +637,17 @@ define void @st_param_v4_i8_riir(i8 %a, i8 %d) {
define void @st_param_v4_i8_riri(i8 %a, i8 %c) {
; CHECK-LABEL: st_param_v4_i8_riri(
; CHECK: {
-; CHECK-NEXT: .reg .b16 %rs<3>;
+; CHECK-NEXT: .reg .b32 %r<6>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
-; CHECK-NEXT: ld.param.b8 %rs1, [st_param_v4_i8_riri_param_1];
-; CHECK-NEXT: ld.param.b8 %rs2, [st_param_v4_i8_riri_param_0];
+; CHECK-NEXT: ld.param.b8 %r1, [st_param_v4_i8_riri_param_1];
+; CHECK-NEXT: prmt.b32 %r2, %r1, 4, 0x3340U;
+; CHECK-NEXT: ld.param.b8 %r3, [st_param_v4_i8_riri_param_0];
+; CHECK-NEXT: prmt.b32 %r4, %r3, 2, 0x3340U;
+; CHECK-NEXT: prmt.b32 %r5, %r4, %r2, 0x5410U;
; CHECK-NEXT: { // callseq 32, 0
; CHECK-NEXT: .param .align 4 .b8 param0[4];
-; CHECK-NEXT: st.param.v4.b8 [param0], {%rs2, 2, %rs1, 4};
+; CHECK-NEXT: st.param.b32 [param0], %r5;
; CHECK-NEXT: call.uni call_v4_i8, (param0);
; CHECK-NEXT: } // callseq 32
; CHECK-NEXT: ret;
@@ -635,14 +661,16 @@ define void @st_param_v4_i8_riri(i8 %a, i8 %c) {
define void @st_param_v4_i8_rrii(i8 %a, i8 %b) {
; CHECK-LABEL: st_param_v4_i8_rrii(
; CHECK: {
-; CHECK-NEXT: .reg .b16 %rs<3>;
+; CHECK-NEXT: .reg .b32 %r<5>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
-; CHECK-NEXT: ld.param.b8 %rs1, [st_param_v4_i8_rrii_param_1];
-; CHECK-NEXT: ld.param.b8 %rs2, [st_param_v4_i8_rrii_param_0];
+; CHECK-NEXT: ld.param.b8 %r1, [st_param_v4_i8_rrii_param_1];
+; CHECK-NEXT: ld.param.b8 %r2, [st_param_v4_i8_rrii_param_0];
+; CHECK-NEXT: prmt.b32 %r3, %r2, %r1, 0x3340U;
+; CHECK-NEXT: prmt.b32 %r4, %r3, 1027, 0x5410U;
; CHECK-NEXT: { // callseq 33, 0
; CHECK-NEXT: .param .align 4 .b8 param0[4];
-; CHECK-NEXT: st.param.v4.b8 [param0], {%rs2, %rs1, 3, 4};
+; CHECK-NEXT: st.param.b32 [param0], %r4;
; CHECK-NEXT: call.uni call_v4_i8, (param0);
; CHECK-NEXT: } // callseq 33
; CHECK-NEXT: ret;
@@ -656,13 +684,15 @@ define void @st_param_v4_i8_rrii(i8 %a, i8 %b) {
define void @st_param_v4_i8_iiir(i8 %d) {
; CHECK-LABEL: st_param_v4_i8_iiir(
; CHECK: {
-; CHECK-NEXT: .reg .b16 %rs<2>;
+; CHECK-NEXT: .reg .b32 %r<4>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
-; CHECK-NEXT: ld.param.b8 %rs1, [st_param_v4_i8_iiir_param_0];
; CHECK-NEXT: { // callseq 34, 0
; CHECK-NEXT: .param .align 4 .b8 param0[4];
-; CHECK-NEXT: st.param.v4.b8 [param0], {1, 2, 3, %rs1};
+; CHECK-NEXT: ld.param.b8 %r1, [st_param_v4_i8_iiir_param_0];
+; CHECK-NEXT: prmt.b32 %r2, 3, %r1, 0x3340U;
+; CHECK-NEXT: prmt.b32 %r3, 513, %r2, 0x5410U;
+; CHECK-NEXT: st.param.b32 [param0], %r3;
; CHECK-NEXT: call.uni call_v4_i8, (param0);
; CHECK-NEXT: } // callseq 34
; CHECK-NEXT: ret;
@@ -676,13 +706,15 @@ define void @st_param_v4_i8_iiir(i8 %d) {
define void @st_param_v4_i8_iiri(i8 %c) {
; CHECK-LABEL: st_param_v4_i8_iiri(
; CHECK: {
-; CHECK-NEXT: .reg .b16 %rs<2>;
+; CHECK-NEXT: .reg .b32 %r<4>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
-; CHECK-NEXT: ld.param.b8 %rs1, [st_param_v4_i8_iiri_param_0];
; CHECK-NEXT: { // callseq 35, 0
; CHECK-NEXT: .param .align 4 .b8 param0[4];
-; CHECK-NEXT: st.param.v4.b8 [param0], {1, 2, %rs1, 4};
+; CHECK-NEXT: ld.param.b8 %r1, [st_param_v4_i8_iiri_param_0];
+; CHECK-NEXT: prmt.b32 %r2, %r1, 4, 0x3340U;
+; CHECK-NEXT: prmt.b32 %r3, 513, %r2, 0x5410U;
+; CHECK-NEXT: st.param.b32 [param0], %r3;
; CHECK-NEXT: call.uni call_v4_i8, (param0);
; CHECK-NEXT: } // callseq 35
; CHECK-NEXT: ret;
@@ -696,13 +728,15 @@ define void @st_param_v4_i8_iiri(i8 %c) {
define void @st_param_v4_i8_irii(i8 %b) {
; CHECK-LABEL: st_param_v4_i8_irii(
; CHECK: {
-; CHECK-NEXT: .reg .b16 %rs<2>;
+; CHECK-NEXT: .reg .b32 %r<4>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
-; CHECK-NEXT: ld.param.b8 %rs1, [st_param_v4_i8_irii_param_0];
; CHECK-NEXT: { // callseq 36, 0
; CHECK-NEXT: .param .align 4 .b8 param0[4];
-; CHECK-NEXT: st.param.v4.b8 [param0], {1, %rs1, 3, 4};
+; CHECK-NEXT: ld.param.b8 %r1, [st_param_v4_i8_irii_param_0];
+; CHECK-NEXT: prmt.b32 %r2, 1, %r1, 0x3340U;
+; CHECK-NEXT: prmt.b32 %r3, %r2, 1027, 0x5410U;
+; CHECK-NEXT: st.param.b32 [param0], %r3;
; CHECK-NEXT: call.uni call_v4_i8, (param0);
; CHECK-NEXT: } // callseq 36
; CHECK-NEXT: ret;
@@ -716,13 +750,15 @@ define void @st_param_v4_i8_irii(i8 %b) {
define void @st_param_v4_i8_riii(i8 %a) {
; CHECK-LABEL: st_param_v4_i8_riii(
; CHECK: {
-; CHECK-NEXT: .reg .b16 %rs<2>;
+; CHECK-NEXT: .reg .b32 %r<4>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
-; CHECK-NEXT: ld.param.b8 %rs1, [st_param_v4_i8_riii_param_0];
; CHECK-NEXT: { // callseq 37, 0
; CHECK-NEXT: .param .align 4 .b8 param0[4];
-; CHECK-NEXT: st.param.v4.b8 [param0], {%rs1, 2, 3, 4};
+; CHECK-NEXT: ld.param.b8 %r1, [st_param_v4_i8_riii_param_0];
+; CHECK-NEXT: prmt.b32 %r2, %r1, 2, 0x3340U;
+; CHECK-NEXT: prmt.b32 %r3, %r2, 1027, 0x5410U;
+; CHECK-NEXT: st.param.b32 [param0], %r3;
; CHECK-NEXT: call.uni call_v4_i8, (param0);
; CHECK-NEXT: } // callseq 37
; CHECK-NEXT: ret;
@@ -742,7 +778,7 @@ define void @st_param_v4_i16_iiii() {
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: { // callseq 38, 0
; CHECK-NEXT: .param .align 8 .b8 param0[8];
-; CHECK-NEXT: st.param.v4.b16 [param0], {1, 2, 3, 4};
+; CHECK-NEXT: st.param.v2.b32 [param0], {131073, 262147};
; CHECK-NEXT: call.uni call_v4_i16, (param0);
; CHECK-NEXT: } // callseq 38
; CHECK-NEXT: ret;
@@ -841,13 +877,15 @@ define void @st_param_v4_i16_iirr(i16 %c, i16 %d) {
; CHECK-LABEL: st_param_v4_i16_iirr(
; CHECK: {
; CHECK-NEXT: .reg .b16 %rs<3>;
+; CHECK-NEXT: .reg .b32 %r<2>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.b16 %rs1, [st_param_v4_i16_iirr_param_0];
; CHECK-NEXT: ld.param.b16 %rs2, [st_param_v4_i16_iirr_param_1];
+; CHECK-NEXT: mov.b32 %r1, {%rs1, %rs2};
; CHECK-NEXT: { // callseq 43, 0
; CHECK-NEXT: .param .align 8 .b8 param0[8];
-; CHECK-NEXT: st.param.v4.b16 [param0], {1, 2, %rs1, %rs2};
+; CHECK-NEXT: st.param.v2.b32 [param0], {131073, %r1};
; CHECK-NEXT: call.uni call_v4_i16, (param0);
; CHECK-NEXT: } // callseq 43
; CHECK-NEXT: ret;
@@ -946,13 +984,15 @@ define void @st_param_v4_i16_rrii(i16 %a, i16 %b) {
; CHECK-LABEL: st_param_v4_i16_rrii(
; CHECK: {
; CHECK-NEXT: .reg .b16 %rs<3>;
+; CHECK-NEXT: .reg .b32 %r<2>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.b16 %rs1, [st_param_v4_i16_rrii_param_0];
; CHECK-NEXT: ld.param.b16 %rs2, [st_param_v4_i16_rrii_param_1];
+; CHECK-NEXT: mov.b32 %r1, {%rs1, %rs2};
; CHECK-NEXT: { // callseq 48, 0
; CHECK-NEXT: .param .align 8 .b8 param0[8];
-; CHECK-NEXT: st.param.v4.b16 [param0], {%rs1, %rs2, 3, 4};
+; CHECK-NEXT: st.param.v2.b32 [param0], {%r1, 262147};
; CHECK-NEXT: call.uni call_v4_i16, (param0);
; CHECK-NEXT: } // callseq 48
; CHECK-NEXT: ret;
@@ -966,13 +1006,16 @@ define void @st_param_v4_i16_rrii(i16 %a, i16 %b) {
define void @st_param_v4_i16_iiir(i16 %d) {
; CHECK-LABEL: st_param_v4_i16_iiir(
; CHECK: {
-; CHECK-NEXT: .reg .b16 %rs<2>;
+; CHECK-NEXT: .reg .b16 %rs<3>;
+; CHECK-NEXT: .reg .b32 %r<2>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.b16 %rs1, [st_param_v4_i16_iiir_param_0];
+; CHECK-NEXT: mov.b16 %rs2, 3;
+; CHECK-NEXT: mov.b32 %r1, {%rs2, %rs1};
; CHECK-NEXT: { // callseq 49, 0
; CHECK-NEXT: .param .align 8 .b8 param0[8];
-; CHECK-NEXT: st.param.v4.b16 [param0], {1, 2, 3, %rs1};
+; CHECK-NEXT: st.param.v2.b32 [param0], {131073, %r1};
; CHECK-NEXT: call.uni call_v4_i16, (param0);
; CHECK-NEXT: } // callseq 49
; CHECK-NEXT: ret;
@@ -986,13 +1029,16 @@ define void @st_param_v4_i16_iiir(i16 %d) {
define void @st_param_v4_i16_iiri(i16 %c) {
; CHECK-LABEL: st_param_v4_i16_iiri(
; CHECK: {
-; CHECK-NEXT: .reg .b16 %rs<2>;
+; CHECK-NEXT: .reg .b16 %rs<3>;
+; CHECK-NEXT: .reg .b32 %r<2>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.b16 %rs1, [st_param_v4_i16_iiri_param_0];
+; CHECK-NEXT: mov.b16 %rs2, 4;
+; CHECK-NEXT: mov.b32 %r1, {%rs1, %rs2};
; CHECK-NEXT: { // callseq 50, 0
; CHECK-NEXT: .param .align 8 .b8 param0[8];
-; CHECK-NEXT: st.param.v4.b16 [param0], {1, 2, %rs1, 4};
+; CHECK-NEXT: st.param.v2.b32 [param0], {131073, %r1};
; CHECK-NEXT: call.uni call_v4_i16, (param0);
; CHECK-NEXT: } // callseq 50
; CHECK-NEXT: ret;
@@ -1006,13 +1052,16 @@ define void @st_param_v4_i16_iiri(i16 %c) {
define void @st_param_v4_i16_irii(i16 %b) {
; CHECK-LABEL: st_param_v4_i16_irii(
; CHECK: {
-; CHECK-NEXT: .reg .b16 %rs<2>;
+; CHECK-NEXT: .reg .b16 %rs<3>;
+; CHECK-NEXT: .reg .b32 %r<2>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.b16 %rs1, [st_param_v4_i16_irii_param_0];
+; CHECK-NEXT: mov.b16 %rs2, 1;
+; CHECK-NEXT: mov.b32 %r1, {%rs2, %rs1};
; CHECK-NEXT: { // callseq 51, 0
; CHECK-NEXT: .param .align 8 .b8 param0[8];
-; CHECK-NEXT: st.param.v4.b16 [param0], {1, %rs1, 3, 4};
+; CHECK-NEXT: st.param.v2.b32 [param0], {%r1, 262147};
; CHECK-NEXT: call.uni call_v4_i16, (param0);
; CHECK-NEXT: } // callseq 51
; CHECK-NEXT: ret;
@@ -1026,13 +1075,16 @@ define void @st_param_v4_i16_irii(i16 %b) {
define void @st_param_v4_i16_riii(i16 %a) {
; CHECK-LABEL: st_param_v4_i16_riii(
; CHECK: {
-; CHECK-NEXT: .reg .b16 %rs<2>;
+; CHECK-NEXT: .reg .b16 %rs<3>;
+; CHECK-NEXT: .reg .b32 %r<2>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.b16 %rs1, [st_param_v4_i16_riii_param_0];
+; CHECK-NEXT: mov.b16 %rs2, 2;
+; CHECK-NEXT: mov.b32 %r1, {%rs1, %rs2};
; CHECK-NEXT: { // callseq 52, 0
; CHECK-NEXT: .param .align 8 .b8 param0[8];
-; CHECK-NEXT: st.param.v4.b16 [param0], {%rs1, 2, 3, 4};
+; CHECK-NEXT: st.param.v2.b32 [param0], {%r1, 262147};
; CHECK-NEXT: call.uni call_v4_i16, (param0);
; CHECK-NEXT: } // callseq 52
; CHECK-NEXT: ret;
@@ -1672,13 +1724,12 @@ declare void @call_v4_f32(%struct.float4 alignstack(16))
define void @st_param_bfloat() {
; CHECK-LABEL: st_param_bfloat(
; CHECK: {
-; CHECK-NEXT: .reg .b16 %rs<2>;
+; CHECK-EMPTY:
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
-; CHECK-NEXT: mov.b16 %rs1, 0x4100;
; CHECK-NEXT: { // callseq 83, 0
; CHECK-NEXT: .param .align 2 .b8 param0[2];
-; CHECK-NEXT: st.param.b16 [param0], %rs1;
+; CHECK-NEXT: st.param.b16 [param0], 0x4100;
; CHECK-NEXT: call.uni call_bfloat, (param0);
; CHECK-NEXT: } // callseq 83
; CHECK-NEXT: ret;
diff --git a/llvm/test/CodeGen/NVPTX/store-undef.ll b/llvm/test/CodeGen/NVPTX/store-undef.ll
index 5b31b5e..c8ca6b6 100644
--- a/llvm/test/CodeGen/NVPTX/store-undef.ll
+++ b/llvm/test/CodeGen/NVPTX/store-undef.ll
@@ -34,9 +34,9 @@ define void @test_store_param_def(i64 %param0, i32 %param1) {
; CHECK-NEXT: ld.param.b32 %r1, [test_store_param_def_param_1];
; CHECK-NEXT: { // callseq 1, 0
; CHECK-NEXT: .param .align 16 .b8 param0[32];
+; CHECK-NEXT: st.param.v4.b32 [param0+16], {%r2, %r1, %r3, %r4};
+; CHECK-NEXT: st.param.v2.b32 [param0+8], {%r5, %r1};
; CHECK-NEXT: st.param.b64 [param0], %rd1;
-; CHECK-NEXT: st.param.v2.b32 [param0+8], {%r2, %r1};
-; CHECK-NEXT: st.param.v4.b32 [param0+16], {%r3, %r1, %r4, %r5};
; CHECK-NEXT: call.uni test_call, (param0);
; CHECK-NEXT: } // callseq 1
; CHECK-NEXT: ret;
diff --git a/llvm/test/CodeGen/NVPTX/tex-read-cuda.ll b/llvm/test/CodeGen/NVPTX/tex-read-cuda.ll
index d6961a9..3138d7c 100644
--- a/llvm/test/CodeGen/NVPTX/tex-read-cuda.ll
+++ b/llvm/test/CodeGen/NVPTX/tex-read-cuda.ll
@@ -69,8 +69,8 @@ define ptx_kernel void @baz(ptr %red, i32 %idx) {
; CHECK-NEXT: tex.1d.v4.f32.s32 {%r2, %r3, %r4, %r5}, [tex0, {%r1}];
; CHECK-NEXT: { // callseq 0, 0
; CHECK-NEXT: .param .b64 param0;
-; CHECK-NEXT: st.param.b64 [param0], %rd3;
; CHECK-NEXT: .param .b32 retval0;
+; CHECK-NEXT: st.param.b64 [param0], %rd3;
; CHECK-NEXT: call.uni (retval0), texfunc, (param0);
; CHECK-NEXT: ld.param.b32 %r6, [retval0];
; CHECK-NEXT: } // callseq 0
diff --git a/llvm/test/CodeGen/NVPTX/unaligned-param-load-store.ll b/llvm/test/CodeGen/NVPTX/unaligned-param-load-store.ll
index 87e46b1..697eb90 100644
--- a/llvm/test/CodeGen/NVPTX/unaligned-param-load-store.ll
+++ b/llvm/test/CodeGen/NVPTX/unaligned-param-load-store.ll
@@ -1,7 +1,7 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; Verifies correctness of load/store of parameters and return values.
-; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_35 -O0 -verify-machineinstrs | FileCheck -allow-deprecated-dag-overlap %s
-; RUN: %if ptxas %{ llc < %s -mtriple=nvptx64 -mcpu=sm_35 -O0 -verify-machineinstrs | %ptxas-verify %}
+; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_35 -verify-machineinstrs | FileCheck -allow-deprecated-dag-overlap %s
+; RUN: %if ptxas %{ llc < %s -mtriple=nvptx64 -mcpu=sm_35 -verify-machineinstrs | %ptxas-verify %}
%s_i8i16p = type { <{ i16, i8, i16 }>, i64 }
%s_i8i32p = type { <{ i32, i8, i32 }>, i64 }
@@ -24,37 +24,35 @@
define %s_i8i16p @test_s_i8i16p(%s_i8i16p %a) {
; CHECK-LABEL: test_s_i8i16p(
; CHECK: {
-; CHECK-NEXT: .reg .b16 %rs<15>;
+; CHECK-NEXT: .reg .b16 %rs<13>;
+; CHECK-NEXT: .reg .b32 %r<2>;
; CHECK-NEXT: .reg .b64 %rd<4>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
-; CHECK-NEXT: ld.param.b8 %rs4, [test_s_i8i16p_param_0+4];
-; CHECK-NEXT: shl.b16 %rs5, %rs4, 8;
-; CHECK-NEXT: ld.param.b8 %rs6, [test_s_i8i16p_param_0+3];
-; CHECK-NEXT: or.b16 %rs3, %rs5, %rs6;
+; CHECK-NEXT: ld.param.b32 %r1, [test_s_i8i16p_param_0];
; CHECK-NEXT: ld.param.b64 %rd1, [test_s_i8i16p_param_0+8];
-; CHECK-NEXT: ld.param.b8 %rs2, [test_s_i8i16p_param_0+2];
-; CHECK-NEXT: ld.param.b16 %rs1, [test_s_i8i16p_param_0];
+; CHECK-NEXT: ld.param.b8 %rs1, [test_s_i8i16p_param_0+4];
; CHECK-NEXT: { // callseq 0, 0
; CHECK-NEXT: .param .align 8 .b8 param0[16];
-; CHECK-NEXT: st.param.b16 [param0], %rs1;
-; CHECK-NEXT: st.param.b8 [param0+2], %rs2;
-; CHECK-NEXT: st.param.b8 [param0+3], %rs3;
-; CHECK-NEXT: st.param.b8 [param0+4], %rs4;
-; CHECK-NEXT: st.param.b64 [param0+8], %rd1;
; CHECK-NEXT: .param .align 8 .b8 retval0[16];
+; CHECK-NEXT: st.param.b8 [param0+4], %rs1;
+; CHECK-NEXT: st.param.b64 [param0+8], %rd1;
+; CHECK-NEXT: st.param.b32 [param0], %r1;
; CHECK-NEXT: call.uni (retval0), test_s_i8i16p, (param0);
-; CHECK-NEXT: ld.param.b16 %rs7, [retval0];
-; CHECK-NEXT: ld.param.b8 %rs8, [retval0+2];
-; CHECK-NEXT: ld.param.b8 %rs9, [retval0+3];
-; CHECK-NEXT: ld.param.b8 %rs10, [retval0+4];
; CHECK-NEXT: ld.param.b64 %rd2, [retval0+8];
+; CHECK-NEXT: ld.param.b8 %rs2, [retval0+2];
+; CHECK-NEXT: ld.param.b16 %rs3, [retval0];
+; CHECK-NEXT: ld.param.b8 %rs4, [retval0+4];
+; CHECK-NEXT: ld.param.b8 %rs5, [retval0+3];
; CHECK-NEXT: } // callseq 0
-; CHECK-NEXT: st.param.b16 [func_retval0], %rs7;
-; CHECK-NEXT: st.param.b8 [func_retval0+2], %rs8;
-; CHECK-NEXT: st.param.b8 [func_retval0+4], %rs10;
-; CHECK-NEXT: st.param.b8 [func_retval0+3], %rs9;
+; CHECK-NEXT: shl.b16 %rs8, %rs4, 8;
+; CHECK-NEXT: or.b16 %rs9, %rs8, %rs5;
+; CHECK-NEXT: st.param.b8 [func_retval0+3], %rs5;
; CHECK-NEXT: st.param.b64 [func_retval0+8], %rd2;
+; CHECK-NEXT: st.param.b8 [func_retval0+2], %rs2;
+; CHECK-NEXT: st.param.b16 [func_retval0], %rs3;
+; CHECK-NEXT: shr.u16 %rs12, %rs9, 8;
+; CHECK-NEXT: st.param.b8 [func_retval0+4], %rs12;
; CHECK-NEXT: ret;
%r = tail call %s_i8i16p @test_s_i8i16p(%s_i8i16p %a)
ret %s_i8i16p %r
@@ -64,56 +62,51 @@ define %s_i8i16p @test_s_i8i16p(%s_i8i16p %a) {
define %s_i8i32p @test_s_i8i32p(%s_i8i32p %a) {
; CHECK-LABEL: test_s_i8i32p(
; CHECK: {
-; CHECK-NEXT: .reg .b16 %rs<12>;
-; CHECK-NEXT: .reg .b32 %r<20>;
+; CHECK-NEXT: .reg .b16 %rs<4>;
+; CHECK-NEXT: .reg .b32 %r<24>;
; CHECK-NEXT: .reg .b64 %rd<4>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
-; CHECK-NEXT: ld.param.b8 %r3, [test_s_i8i32p_param_0+6];
-; CHECK-NEXT: shl.b32 %r4, %r3, 8;
-; CHECK-NEXT: ld.param.b8 %r5, [test_s_i8i32p_param_0+5];
-; CHECK-NEXT: or.b32 %r6, %r4, %r5;
-; CHECK-NEXT: ld.param.b8 %r7, [test_s_i8i32p_param_0+7];
-; CHECK-NEXT: shl.b32 %r8, %r7, 16;
-; CHECK-NEXT: ld.param.b8 %r9, [test_s_i8i32p_param_0+8];
-; CHECK-NEXT: shl.b32 %r10, %r9, 24;
-; CHECK-NEXT: or.b32 %r11, %r10, %r8;
-; CHECK-NEXT: or.b32 %r2, %r11, %r6;
-; CHECK-NEXT: ld.param.b64 %rd1, [test_s_i8i32p_param_0+16];
-; CHECK-NEXT: ld.param.b8 %rs1, [test_s_i8i32p_param_0+4];
; CHECK-NEXT: ld.param.b32 %r1, [test_s_i8i32p_param_0];
-; CHECK-NEXT: shr.u32 %r12, %r2, 8;
-; CHECK-NEXT: shr.u32 %r13, %r11, 16;
+; CHECK-NEXT: ld.param.b16 %rs1, [test_s_i8i32p_param_0+4];
+; CHECK-NEXT: ld.param.b64 %rd1, [test_s_i8i32p_param_0+16];
+; CHECK-NEXT: ld.param.b8 %r2, [test_s_i8i32p_param_0+6];
+; CHECK-NEXT: ld.param.b8 %r3, [test_s_i8i32p_param_0+7];
+; CHECK-NEXT: ld.param.b8 %r4, [test_s_i8i32p_param_0+8];
; CHECK-NEXT: { // callseq 1, 0
; CHECK-NEXT: .param .align 8 .b8 param0[24];
-; CHECK-NEXT: st.param.b32 [param0], %r1;
-; CHECK-NEXT: st.param.b8 [param0+4], %rs1;
-; CHECK-NEXT: st.param.b8 [param0+5], %r2;
-; CHECK-NEXT: st.param.b8 [param0+6], %r12;
-; CHECK-NEXT: st.param.b8 [param0+7], %r13;
-; CHECK-NEXT: st.param.b8 [param0+8], %r9;
-; CHECK-NEXT: st.param.b64 [param0+16], %rd1;
; CHECK-NEXT: .param .align 8 .b8 retval0[24];
+; CHECK-NEXT: st.param.b8 [param0+8], %r4;
+; CHECK-NEXT: st.param.b8 [param0+7], %r3;
+; CHECK-NEXT: st.param.b8 [param0+6], %r2;
+; CHECK-NEXT: st.param.b64 [param0+16], %rd1;
+; CHECK-NEXT: st.param.b16 [param0+4], %rs1;
+; CHECK-NEXT: st.param.b32 [param0], %r1;
; CHECK-NEXT: call.uni (retval0), test_s_i8i32p, (param0);
-; CHECK-NEXT: ld.param.b32 %r14, [retval0];
-; CHECK-NEXT: ld.param.b8 %rs2, [retval0+4];
-; CHECK-NEXT: ld.param.b8 %rs3, [retval0+5];
-; CHECK-NEXT: ld.param.b8 %rs4, [retval0+6];
-; CHECK-NEXT: ld.param.b8 %rs5, [retval0+7];
-; CHECK-NEXT: ld.param.b8 %rs6, [retval0+8];
; CHECK-NEXT: ld.param.b64 %rd2, [retval0+16];
+; CHECK-NEXT: ld.param.b8 %rs2, [retval0+4];
+; CHECK-NEXT: ld.param.b32 %r5, [retval0];
+; CHECK-NEXT: ld.param.b8 %r6, [retval0+8];
+; CHECK-NEXT: ld.param.b8 %r7, [retval0+7];
+; CHECK-NEXT: ld.param.b8 %r8, [retval0+6];
+; CHECK-NEXT: ld.param.b8 %r9, [retval0+5];
; CHECK-NEXT: } // callseq 1
-; CHECK-NEXT: cvt.u32.u16 %r15, %rs3;
-; CHECK-NEXT: cvt.u32.u16 %r16, %rs4;
-; CHECK-NEXT: cvt.u32.u16 %r17, %rs5;
-; CHECK-NEXT: cvt.u32.u16 %r18, %rs6;
-; CHECK-NEXT: st.param.b32 [func_retval0], %r14;
-; CHECK-NEXT: st.param.b8 [func_retval0+4], %rs2;
-; CHECK-NEXT: st.param.b8 [func_retval0+8], %r18;
-; CHECK-NEXT: st.param.b8 [func_retval0+7], %r17;
-; CHECK-NEXT: st.param.b8 [func_retval0+6], %r16;
-; CHECK-NEXT: st.param.b8 [func_retval0+5], %r15;
+; CHECK-NEXT: shl.b32 %r12, %r8, 8;
+; CHECK-NEXT: or.b32 %r13, %r12, %r9;
+; CHECK-NEXT: shl.b32 %r15, %r7, 16;
+; CHECK-NEXT: shl.b32 %r17, %r6, 24;
+; CHECK-NEXT: or.b32 %r18, %r17, %r15;
+; CHECK-NEXT: or.b32 %r19, %r18, %r13;
+; CHECK-NEXT: st.param.b8 [func_retval0+5], %r9;
; CHECK-NEXT: st.param.b64 [func_retval0+16], %rd2;
+; CHECK-NEXT: st.param.b8 [func_retval0+4], %rs2;
+; CHECK-NEXT: st.param.b32 [func_retval0], %r5;
+; CHECK-NEXT: shr.u32 %r21, %r19, 24;
+; CHECK-NEXT: st.param.b8 [func_retval0+8], %r21;
+; CHECK-NEXT: shr.u32 %r22, %r19, 16;
+; CHECK-NEXT: st.param.b8 [func_retval0+7], %r22;
+; CHECK-NEXT: shr.u32 %r23, %r19, 8;
+; CHECK-NEXT: st.param.b8 [func_retval0+6], %r23;
; CHECK-NEXT: ret;
%r = tail call %s_i8i32p @test_s_i8i32p(%s_i8i32p %a)
ret %s_i8i32p %r
@@ -123,112 +116,66 @@ define %s_i8i32p @test_s_i8i32p(%s_i8i32p %a) {
define %s_i8i64p @test_s_i8i64p(%s_i8i64p %a) {
; CHECK-LABEL: test_s_i8i64p(
; CHECK: {
-; CHECK-NEXT: .reg .b16 %rs<20>;
-; CHECK-NEXT: .reg .b64 %rd<68>;
+; CHECK-NEXT: .reg .b16 %rs<3>;
+; CHECK-NEXT: .reg .b64 %rd<46>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
-; CHECK-NEXT: ld.param.b8 %rd4, [test_s_i8i64p_param_0+10];
-; CHECK-NEXT: shl.b64 %rd5, %rd4, 8;
-; CHECK-NEXT: ld.param.b8 %rd6, [test_s_i8i64p_param_0+9];
-; CHECK-NEXT: or.b64 %rd7, %rd5, %rd6;
-; CHECK-NEXT: ld.param.b8 %rd8, [test_s_i8i64p_param_0+11];
-; CHECK-NEXT: shl.b64 %rd9, %rd8, 16;
-; CHECK-NEXT: ld.param.b8 %rd10, [test_s_i8i64p_param_0+12];
-; CHECK-NEXT: shl.b64 %rd11, %rd10, 24;
-; CHECK-NEXT: or.b64 %rd12, %rd11, %rd9;
-; CHECK-NEXT: or.b64 %rd13, %rd12, %rd7;
-; CHECK-NEXT: ld.param.b8 %rd14, [test_s_i8i64p_param_0+14];
-; CHECK-NEXT: shl.b64 %rd15, %rd14, 8;
-; CHECK-NEXT: ld.param.b8 %rd16, [test_s_i8i64p_param_0+13];
-; CHECK-NEXT: or.b64 %rd17, %rd15, %rd16;
-; CHECK-NEXT: ld.param.b8 %rd18, [test_s_i8i64p_param_0+15];
-; CHECK-NEXT: shl.b64 %rd19, %rd18, 16;
-; CHECK-NEXT: ld.param.b8 %rd20, [test_s_i8i64p_param_0+16];
-; CHECK-NEXT: shl.b64 %rd21, %rd20, 24;
-; CHECK-NEXT: or.b64 %rd22, %rd21, %rd19;
-; CHECK-NEXT: or.b64 %rd23, %rd22, %rd17;
-; CHECK-NEXT: shl.b64 %rd24, %rd23, 32;
-; CHECK-NEXT: or.b64 %rd2, %rd24, %rd13;
-; CHECK-NEXT: ld.param.b64 %rd3, [test_s_i8i64p_param_0+24];
-; CHECK-NEXT: ld.param.b8 %rs1, [test_s_i8i64p_param_0+8];
; CHECK-NEXT: ld.param.b64 %rd1, [test_s_i8i64p_param_0];
-; CHECK-NEXT: shr.u64 %rd25, %rd2, 8;
-; CHECK-NEXT: shr.u64 %rd26, %rd2, 16;
-; CHECK-NEXT: shr.u64 %rd27, %rd2, 24;
-; CHECK-NEXT: bfe.u64 %rd28, %rd23, 8, 24;
-; CHECK-NEXT: bfe.u64 %rd29, %rd23, 16, 16;
-; CHECK-NEXT: bfe.u64 %rd30, %rd23, 24, 8;
+; CHECK-NEXT: ld.param.b64 %rd2, [test_s_i8i64p_param_0+8];
+; CHECK-NEXT: ld.param.b64 %rd3, [test_s_i8i64p_param_0+24];
+; CHECK-NEXT: ld.param.b8 %rd4, [test_s_i8i64p_param_0+16];
; CHECK-NEXT: { // callseq 2, 0
; CHECK-NEXT: .param .align 8 .b8 param0[32];
-; CHECK-NEXT: st.param.b64 [param0], %rd1;
-; CHECK-NEXT: st.param.b8 [param0+8], %rs1;
-; CHECK-NEXT: st.param.b8 [param0+9], %rd2;
-; CHECK-NEXT: st.param.b8 [param0+10], %rd25;
-; CHECK-NEXT: st.param.b8 [param0+11], %rd26;
-; CHECK-NEXT: st.param.b8 [param0+12], %rd27;
-; CHECK-NEXT: st.param.b8 [param0+13], %rd23;
-; CHECK-NEXT: st.param.b8 [param0+14], %rd28;
-; CHECK-NEXT: st.param.b8 [param0+15], %rd29;
-; CHECK-NEXT: st.param.b8 [param0+16], %rd30;
-; CHECK-NEXT: st.param.b64 [param0+24], %rd3;
; CHECK-NEXT: .param .align 8 .b8 retval0[32];
+; CHECK-NEXT: st.param.b8 [param0+16], %rd4;
+; CHECK-NEXT: st.param.b64 [param0+24], %rd3;
+; CHECK-NEXT: st.param.b64 [param0+8], %rd2;
+; CHECK-NEXT: st.param.b64 [param0], %rd1;
; CHECK-NEXT: call.uni (retval0), test_s_i8i64p, (param0);
-; CHECK-NEXT: ld.param.b64 %rd31, [retval0];
-; CHECK-NEXT: ld.param.b8 %rs2, [retval0+8];
-; CHECK-NEXT: ld.param.b8 %rs3, [retval0+9];
-; CHECK-NEXT: ld.param.b8 %rs4, [retval0+10];
-; CHECK-NEXT: ld.param.b8 %rs5, [retval0+11];
-; CHECK-NEXT: ld.param.b8 %rs6, [retval0+12];
-; CHECK-NEXT: ld.param.b8 %rs7, [retval0+13];
-; CHECK-NEXT: ld.param.b8 %rs8, [retval0+14];
-; CHECK-NEXT: ld.param.b8 %rs9, [retval0+15];
-; CHECK-NEXT: ld.param.b8 %rs10, [retval0+16];
-; CHECK-NEXT: ld.param.b64 %rd32, [retval0+24];
+; CHECK-NEXT: ld.param.b64 %rd5, [retval0+24];
+; CHECK-NEXT: ld.param.b8 %rs1, [retval0+8];
+; CHECK-NEXT: ld.param.b64 %rd6, [retval0];
+; CHECK-NEXT: ld.param.b8 %rd7, [retval0+16];
+; CHECK-NEXT: ld.param.b8 %rd8, [retval0+15];
+; CHECK-NEXT: ld.param.b8 %rd9, [retval0+14];
+; CHECK-NEXT: ld.param.b8 %rd10, [retval0+13];
+; CHECK-NEXT: ld.param.b8 %rd11, [retval0+12];
+; CHECK-NEXT: ld.param.b8 %rd12, [retval0+11];
+; CHECK-NEXT: ld.param.b8 %rd13, [retval0+10];
+; CHECK-NEXT: ld.param.b8 %rd14, [retval0+9];
; CHECK-NEXT: } // callseq 2
-; CHECK-NEXT: cvt.u64.u16 %rd33, %rs3;
-; CHECK-NEXT: and.b64 %rd34, %rd33, 255;
-; CHECK-NEXT: cvt.u64.u16 %rd35, %rs4;
-; CHECK-NEXT: and.b64 %rd36, %rd35, 255;
-; CHECK-NEXT: shl.b64 %rd37, %rd36, 8;
-; CHECK-NEXT: or.b64 %rd38, %rd34, %rd37;
-; CHECK-NEXT: cvt.u64.u16 %rd39, %rs5;
-; CHECK-NEXT: and.b64 %rd40, %rd39, 255;
-; CHECK-NEXT: shl.b64 %rd41, %rd40, 16;
-; CHECK-NEXT: or.b64 %rd42, %rd38, %rd41;
-; CHECK-NEXT: cvt.u64.u16 %rd43, %rs6;
-; CHECK-NEXT: and.b64 %rd44, %rd43, 255;
-; CHECK-NEXT: shl.b64 %rd45, %rd44, 24;
-; CHECK-NEXT: or.b64 %rd46, %rd42, %rd45;
-; CHECK-NEXT: cvt.u64.u16 %rd47, %rs7;
-; CHECK-NEXT: and.b64 %rd48, %rd47, 255;
-; CHECK-NEXT: shl.b64 %rd49, %rd48, 32;
-; CHECK-NEXT: or.b64 %rd50, %rd46, %rd49;
-; CHECK-NEXT: cvt.u64.u16 %rd51, %rs8;
-; CHECK-NEXT: and.b64 %rd52, %rd51, 255;
-; CHECK-NEXT: shl.b64 %rd53, %rd52, 40;
-; CHECK-NEXT: or.b64 %rd54, %rd50, %rd53;
-; CHECK-NEXT: cvt.u64.u16 %rd55, %rs9;
-; CHECK-NEXT: and.b64 %rd56, %rd55, 255;
-; CHECK-NEXT: shl.b64 %rd57, %rd56, 48;
-; CHECK-NEXT: or.b64 %rd58, %rd54, %rd57;
-; CHECK-NEXT: cvt.u64.u16 %rd59, %rs10;
-; CHECK-NEXT: shl.b64 %rd60, %rd59, 56;
-; CHECK-NEXT: or.b64 %rd61, %rd58, %rd60;
-; CHECK-NEXT: st.param.b64 [func_retval0], %rd31;
-; CHECK-NEXT: st.param.b8 [func_retval0+8], %rs2;
+; CHECK-NEXT: shl.b64 %rd17, %rd13, 8;
+; CHECK-NEXT: or.b64 %rd18, %rd17, %rd14;
+; CHECK-NEXT: shl.b64 %rd20, %rd12, 16;
+; CHECK-NEXT: shl.b64 %rd22, %rd11, 24;
+; CHECK-NEXT: or.b64 %rd23, %rd22, %rd20;
+; CHECK-NEXT: or.b64 %rd24, %rd23, %rd18;
+; CHECK-NEXT: shl.b64 %rd27, %rd9, 8;
+; CHECK-NEXT: or.b64 %rd28, %rd27, %rd10;
+; CHECK-NEXT: shl.b64 %rd30, %rd8, 16;
+; CHECK-NEXT: shl.b64 %rd32, %rd7, 24;
+; CHECK-NEXT: or.b64 %rd33, %rd32, %rd30;
+; CHECK-NEXT: or.b64 %rd34, %rd33, %rd28;
+; CHECK-NEXT: shl.b64 %rd35, %rd34, 32;
+; CHECK-NEXT: or.b64 %rd36, %rd35, %rd24;
+; CHECK-NEXT: st.param.b8 [func_retval0+9], %rd14;
+; CHECK-NEXT: st.param.b64 [func_retval0+24], %rd5;
+; CHECK-NEXT: st.param.b8 [func_retval0+8], %rs1;
+; CHECK-NEXT: st.param.b64 [func_retval0], %rd6;
+; CHECK-NEXT: shr.u64 %rd39, %rd36, 56;
+; CHECK-NEXT: st.param.b8 [func_retval0+16], %rd39;
+; CHECK-NEXT: shr.u64 %rd40, %rd36, 48;
+; CHECK-NEXT: st.param.b8 [func_retval0+15], %rd40;
+; CHECK-NEXT: shr.u64 %rd41, %rd36, 40;
+; CHECK-NEXT: st.param.b8 [func_retval0+14], %rd41;
+; CHECK-NEXT: shr.u64 %rd42, %rd36, 32;
+; CHECK-NEXT: st.param.b8 [func_retval0+13], %rd42;
+; CHECK-NEXT: shr.u64 %rd43, %rd36, 24;
; CHECK-NEXT: st.param.b8 [func_retval0+12], %rd43;
-; CHECK-NEXT: st.param.b8 [func_retval0+11], %rd39;
-; CHECK-NEXT: st.param.b8 [func_retval0+10], %rd35;
-; CHECK-NEXT: st.param.b8 [func_retval0+9], %rd33;
-; CHECK-NEXT: shr.u64 %rd64, %rd50, 32;
-; CHECK-NEXT: st.param.b8 [func_retval0+13], %rd64;
-; CHECK-NEXT: shr.u64 %rd65, %rd54, 40;
-; CHECK-NEXT: st.param.b8 [func_retval0+14], %rd65;
-; CHECK-NEXT: shr.u64 %rd66, %rd58, 48;
-; CHECK-NEXT: st.param.b8 [func_retval0+15], %rd66;
-; CHECK-NEXT: shr.u64 %rd67, %rd61, 56;
-; CHECK-NEXT: st.param.b8 [func_retval0+16], %rd67;
-; CHECK-NEXT: st.param.b64 [func_retval0+24], %rd32;
+; CHECK-NEXT: shr.u64 %rd44, %rd36, 16;
+; CHECK-NEXT: st.param.b8 [func_retval0+11], %rd44;
+; CHECK-NEXT: shr.u64 %rd45, %rd36, 8;
+; CHECK-NEXT: st.param.b8 [func_retval0+10], %rd45;
; CHECK-NEXT: ret;
%r = tail call %s_i8i64p @test_s_i8i64p(%s_i8i64p %a)
ret %s_i8i64p %r
@@ -242,33 +189,32 @@ define %s_i8f16p @test_s_i8f16p(%s_i8f16p %a) {
; CHECK-NEXT: .reg .b64 %rd<4>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
-; CHECK-NEXT: ld.param.b8 %rs4, [test_s_i8f16p_param_0+4];
-; CHECK-NEXT: shl.b16 %rs5, %rs4, 8;
-; CHECK-NEXT: ld.param.b8 %rs6, [test_s_i8f16p_param_0+3];
-; CHECK-NEXT: or.b16 %rs3, %rs5, %rs6;
-; CHECK-NEXT: ld.param.b64 %rd1, [test_s_i8f16p_param_0+8];
-; CHECK-NEXT: ld.param.b8 %rs2, [test_s_i8f16p_param_0+2];
; CHECK-NEXT: ld.param.b16 %rs1, [test_s_i8f16p_param_0];
+; CHECK-NEXT: ld.param.b16 %rs2, [test_s_i8f16p_param_0+2];
+; CHECK-NEXT: ld.param.b64 %rd1, [test_s_i8f16p_param_0+8];
+; CHECK-NEXT: ld.param.b8 %rs3, [test_s_i8f16p_param_0+4];
; CHECK-NEXT: { // callseq 3, 0
; CHECK-NEXT: .param .align 8 .b8 param0[16];
-; CHECK-NEXT: st.param.b16 [param0], %rs1;
-; CHECK-NEXT: st.param.b8 [param0+2], %rs2;
-; CHECK-NEXT: st.param.b8 [param0+3], %rs3;
-; CHECK-NEXT: st.param.b8 [param0+4], %rs4;
-; CHECK-NEXT: st.param.b64 [param0+8], %rd1;
; CHECK-NEXT: .param .align 8 .b8 retval0[16];
+; CHECK-NEXT: st.param.b8 [param0+4], %rs3;
+; CHECK-NEXT: st.param.b64 [param0+8], %rd1;
+; CHECK-NEXT: st.param.b16 [param0+2], %rs2;
+; CHECK-NEXT: st.param.b16 [param0], %rs1;
; CHECK-NEXT: call.uni (retval0), test_s_i8f16p, (param0);
-; CHECK-NEXT: ld.param.b16 %rs7, [retval0];
-; CHECK-NEXT: ld.param.b8 %rs8, [retval0+2];
-; CHECK-NEXT: ld.param.b8 %rs9, [retval0+3];
-; CHECK-NEXT: ld.param.b8 %rs10, [retval0+4];
; CHECK-NEXT: ld.param.b64 %rd2, [retval0+8];
+; CHECK-NEXT: ld.param.b8 %rs4, [retval0+2];
+; CHECK-NEXT: ld.param.b16 %rs5, [retval0];
+; CHECK-NEXT: ld.param.b8 %rs6, [retval0+4];
+; CHECK-NEXT: ld.param.b8 %rs7, [retval0+3];
; CHECK-NEXT: } // callseq 3
-; CHECK-NEXT: st.param.b16 [func_retval0], %rs7;
-; CHECK-NEXT: st.param.b8 [func_retval0+2], %rs8;
-; CHECK-NEXT: st.param.b8 [func_retval0+4], %rs10;
-; CHECK-NEXT: st.param.b8 [func_retval0+3], %rs9;
+; CHECK-NEXT: shl.b16 %rs10, %rs6, 8;
+; CHECK-NEXT: or.b16 %rs11, %rs10, %rs7;
+; CHECK-NEXT: st.param.b8 [func_retval0+3], %rs7;
; CHECK-NEXT: st.param.b64 [func_retval0+8], %rd2;
+; CHECK-NEXT: st.param.b8 [func_retval0+2], %rs4;
+; CHECK-NEXT: st.param.b16 [func_retval0], %rs5;
+; CHECK-NEXT: shr.u16 %rs14, %rs11, 8;
+; CHECK-NEXT: st.param.b8 [func_retval0+4], %rs14;
; CHECK-NEXT: ret;
%r = tail call %s_i8f16p @test_s_i8f16p(%s_i8f16p %a)
ret %s_i8f16p %r
@@ -278,56 +224,51 @@ define %s_i8f16p @test_s_i8f16p(%s_i8f16p %a) {
define %s_i8f16x2p @test_s_i8f16x2p(%s_i8f16x2p %a) {
; CHECK-LABEL: test_s_i8f16x2p(
; CHECK: {
-; CHECK-NEXT: .reg .b16 %rs<12>;
-; CHECK-NEXT: .reg .b32 %r<20>;
+; CHECK-NEXT: .reg .b16 %rs<4>;
+; CHECK-NEXT: .reg .b32 %r<24>;
; CHECK-NEXT: .reg .b64 %rd<4>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
-; CHECK-NEXT: ld.param.b8 %r3, [test_s_i8f16x2p_param_0+6];
-; CHECK-NEXT: shl.b32 %r4, %r3, 8;
-; CHECK-NEXT: ld.param.b8 %r5, [test_s_i8f16x2p_param_0+5];
-; CHECK-NEXT: or.b32 %r6, %r4, %r5;
-; CHECK-NEXT: ld.param.b8 %r7, [test_s_i8f16x2p_param_0+7];
-; CHECK-NEXT: shl.b32 %r8, %r7, 16;
-; CHECK-NEXT: ld.param.b8 %r9, [test_s_i8f16x2p_param_0+8];
-; CHECK-NEXT: shl.b32 %r10, %r9, 24;
-; CHECK-NEXT: or.b32 %r11, %r10, %r8;
-; CHECK-NEXT: or.b32 %r2, %r11, %r6;
-; CHECK-NEXT: ld.param.b64 %rd1, [test_s_i8f16x2p_param_0+16];
-; CHECK-NEXT: ld.param.b8 %rs1, [test_s_i8f16x2p_param_0+4];
; CHECK-NEXT: ld.param.b32 %r1, [test_s_i8f16x2p_param_0];
-; CHECK-NEXT: shr.u32 %r12, %r2, 8;
-; CHECK-NEXT: shr.u32 %r13, %r11, 16;
+; CHECK-NEXT: ld.param.b16 %rs1, [test_s_i8f16x2p_param_0+4];
+; CHECK-NEXT: ld.param.b64 %rd1, [test_s_i8f16x2p_param_0+16];
+; CHECK-NEXT: ld.param.b8 %r2, [test_s_i8f16x2p_param_0+6];
+; CHECK-NEXT: ld.param.b8 %r3, [test_s_i8f16x2p_param_0+7];
+; CHECK-NEXT: ld.param.b8 %r4, [test_s_i8f16x2p_param_0+8];
; CHECK-NEXT: { // callseq 4, 0
; CHECK-NEXT: .param .align 8 .b8 param0[24];
-; CHECK-NEXT: st.param.b32 [param0], %r1;
-; CHECK-NEXT: st.param.b8 [param0+4], %rs1;
-; CHECK-NEXT: st.param.b8 [param0+5], %r2;
-; CHECK-NEXT: st.param.b8 [param0+6], %r12;
-; CHECK-NEXT: st.param.b8 [param0+7], %r13;
-; CHECK-NEXT: st.param.b8 [param0+8], %r9;
-; CHECK-NEXT: st.param.b64 [param0+16], %rd1;
; CHECK-NEXT: .param .align 8 .b8 retval0[24];
+; CHECK-NEXT: st.param.b8 [param0+8], %r4;
+; CHECK-NEXT: st.param.b8 [param0+7], %r3;
+; CHECK-NEXT: st.param.b8 [param0+6], %r2;
+; CHECK-NEXT: st.param.b64 [param0+16], %rd1;
+; CHECK-NEXT: st.param.b16 [param0+4], %rs1;
+; CHECK-NEXT: st.param.b32 [param0], %r1;
; CHECK-NEXT: call.uni (retval0), test_s_i8f16x2p, (param0);
-; CHECK-NEXT: ld.param.b32 %r14, [retval0];
-; CHECK-NEXT: ld.param.b8 %rs2, [retval0+4];
-; CHECK-NEXT: ld.param.b8 %rs3, [retval0+5];
-; CHECK-NEXT: ld.param.b8 %rs4, [retval0+6];
-; CHECK-NEXT: ld.param.b8 %rs5, [retval0+7];
-; CHECK-NEXT: ld.param.b8 %rs6, [retval0+8];
; CHECK-NEXT: ld.param.b64 %rd2, [retval0+16];
+; CHECK-NEXT: ld.param.b8 %rs2, [retval0+4];
+; CHECK-NEXT: ld.param.b32 %r5, [retval0];
+; CHECK-NEXT: ld.param.b8 %r6, [retval0+8];
+; CHECK-NEXT: ld.param.b8 %r7, [retval0+7];
+; CHECK-NEXT: ld.param.b8 %r8, [retval0+6];
+; CHECK-NEXT: ld.param.b8 %r9, [retval0+5];
; CHECK-NEXT: } // callseq 4
-; CHECK-NEXT: cvt.u32.u16 %r15, %rs3;
-; CHECK-NEXT: cvt.u32.u16 %r16, %rs4;
-; CHECK-NEXT: cvt.u32.u16 %r17, %rs5;
-; CHECK-NEXT: cvt.u32.u16 %r18, %rs6;
-; CHECK-NEXT: st.param.b32 [func_retval0], %r14;
-; CHECK-NEXT: st.param.b8 [func_retval0+4], %rs2;
-; CHECK-NEXT: st.param.b8 [func_retval0+8], %r18;
-; CHECK-NEXT: st.param.b8 [func_retval0+7], %r17;
-; CHECK-NEXT: st.param.b8 [func_retval0+6], %r16;
-; CHECK-NEXT: st.param.b8 [func_retval0+5], %r15;
+; CHECK-NEXT: shl.b32 %r12, %r8, 8;
+; CHECK-NEXT: or.b32 %r13, %r12, %r9;
+; CHECK-NEXT: shl.b32 %r15, %r7, 16;
+; CHECK-NEXT: shl.b32 %r17, %r6, 24;
+; CHECK-NEXT: or.b32 %r18, %r17, %r15;
+; CHECK-NEXT: or.b32 %r19, %r18, %r13;
+; CHECK-NEXT: st.param.b8 [func_retval0+5], %r9;
; CHECK-NEXT: st.param.b64 [func_retval0+16], %rd2;
+; CHECK-NEXT: st.param.b8 [func_retval0+4], %rs2;
+; CHECK-NEXT: st.param.b32 [func_retval0], %r5;
+; CHECK-NEXT: shr.u32 %r21, %r19, 24;
+; CHECK-NEXT: st.param.b8 [func_retval0+8], %r21;
+; CHECK-NEXT: shr.u32 %r22, %r19, 16;
+; CHECK-NEXT: st.param.b8 [func_retval0+7], %r22;
+; CHECK-NEXT: shr.u32 %r23, %r19, 8;
+; CHECK-NEXT: st.param.b8 [func_retval0+6], %r23;
; CHECK-NEXT: ret;
%r = tail call %s_i8f16x2p @test_s_i8f16x2p(%s_i8f16x2p %a)
ret %s_i8f16x2p %r
@@ -337,56 +278,51 @@ define %s_i8f16x2p @test_s_i8f16x2p(%s_i8f16x2p %a) {
define %s_i8f32p @test_s_i8f32p(%s_i8f32p %a) {
; CHECK-LABEL: test_s_i8f32p(
; CHECK: {
-; CHECK-NEXT: .reg .b16 %rs<12>;
-; CHECK-NEXT: .reg .b32 %r<20>;
+; CHECK-NEXT: .reg .b16 %rs<4>;
+; CHECK-NEXT: .reg .b32 %r<24>;
; CHECK-NEXT: .reg .b64 %rd<4>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
-; CHECK-NEXT: ld.param.b8 %r3, [test_s_i8f32p_param_0+6];
-; CHECK-NEXT: shl.b32 %r4, %r3, 8;
-; CHECK-NEXT: ld.param.b8 %r5, [test_s_i8f32p_param_0+5];
-; CHECK-NEXT: or.b32 %r6, %r4, %r5;
-; CHECK-NEXT: ld.param.b8 %r7, [test_s_i8f32p_param_0+7];
-; CHECK-NEXT: shl.b32 %r8, %r7, 16;
-; CHECK-NEXT: ld.param.b8 %r9, [test_s_i8f32p_param_0+8];
-; CHECK-NEXT: shl.b32 %r10, %r9, 24;
-; CHECK-NEXT: or.b32 %r11, %r10, %r8;
-; CHECK-NEXT: or.b32 %r2, %r11, %r6;
-; CHECK-NEXT: ld.param.b64 %rd1, [test_s_i8f32p_param_0+16];
-; CHECK-NEXT: ld.param.b8 %rs1, [test_s_i8f32p_param_0+4];
; CHECK-NEXT: ld.param.b32 %r1, [test_s_i8f32p_param_0];
-; CHECK-NEXT: shr.u32 %r12, %r2, 8;
-; CHECK-NEXT: shr.u32 %r13, %r11, 16;
+; CHECK-NEXT: ld.param.b16 %rs1, [test_s_i8f32p_param_0+4];
+; CHECK-NEXT: ld.param.b64 %rd1, [test_s_i8f32p_param_0+16];
+; CHECK-NEXT: ld.param.b8 %r2, [test_s_i8f32p_param_0+6];
+; CHECK-NEXT: ld.param.b8 %r3, [test_s_i8f32p_param_0+7];
+; CHECK-NEXT: ld.param.b8 %r4, [test_s_i8f32p_param_0+8];
; CHECK-NEXT: { // callseq 5, 0
; CHECK-NEXT: .param .align 8 .b8 param0[24];
-; CHECK-NEXT: st.param.b32 [param0], %r1;
-; CHECK-NEXT: st.param.b8 [param0+4], %rs1;
-; CHECK-NEXT: st.param.b8 [param0+5], %r2;
-; CHECK-NEXT: st.param.b8 [param0+6], %r12;
-; CHECK-NEXT: st.param.b8 [param0+7], %r13;
-; CHECK-NEXT: st.param.b8 [param0+8], %r9;
-; CHECK-NEXT: st.param.b64 [param0+16], %rd1;
; CHECK-NEXT: .param .align 8 .b8 retval0[24];
+; CHECK-NEXT: st.param.b8 [param0+8], %r4;
+; CHECK-NEXT: st.param.b8 [param0+7], %r3;
+; CHECK-NEXT: st.param.b8 [param0+6], %r2;
+; CHECK-NEXT: st.param.b64 [param0+16], %rd1;
+; CHECK-NEXT: st.param.b16 [param0+4], %rs1;
+; CHECK-NEXT: st.param.b32 [param0], %r1;
; CHECK-NEXT: call.uni (retval0), test_s_i8f32p, (param0);
-; CHECK-NEXT: ld.param.b32 %r14, [retval0];
-; CHECK-NEXT: ld.param.b8 %rs2, [retval0+4];
-; CHECK-NEXT: ld.param.b8 %rs3, [retval0+5];
-; CHECK-NEXT: ld.param.b8 %rs4, [retval0+6];
-; CHECK-NEXT: ld.param.b8 %rs5, [retval0+7];
-; CHECK-NEXT: ld.param.b8 %rs6, [retval0+8];
; CHECK-NEXT: ld.param.b64 %rd2, [retval0+16];
+; CHECK-NEXT: ld.param.b8 %rs2, [retval0+4];
+; CHECK-NEXT: ld.param.b32 %r5, [retval0];
+; CHECK-NEXT: ld.param.b8 %r6, [retval0+8];
+; CHECK-NEXT: ld.param.b8 %r7, [retval0+7];
+; CHECK-NEXT: ld.param.b8 %r8, [retval0+6];
+; CHECK-NEXT: ld.param.b8 %r9, [retval0+5];
; CHECK-NEXT: } // callseq 5
-; CHECK-NEXT: cvt.u32.u16 %r15, %rs3;
-; CHECK-NEXT: cvt.u32.u16 %r16, %rs4;
-; CHECK-NEXT: cvt.u32.u16 %r17, %rs5;
-; CHECK-NEXT: cvt.u32.u16 %r18, %rs6;
-; CHECK-NEXT: st.param.b32 [func_retval0], %r14;
-; CHECK-NEXT: st.param.b8 [func_retval0+4], %rs2;
-; CHECK-NEXT: st.param.b8 [func_retval0+8], %r18;
-; CHECK-NEXT: st.param.b8 [func_retval0+7], %r17;
-; CHECK-NEXT: st.param.b8 [func_retval0+6], %r16;
-; CHECK-NEXT: st.param.b8 [func_retval0+5], %r15;
+; CHECK-NEXT: shl.b32 %r12, %r8, 8;
+; CHECK-NEXT: or.b32 %r13, %r12, %r9;
+; CHECK-NEXT: shl.b32 %r15, %r7, 16;
+; CHECK-NEXT: shl.b32 %r17, %r6, 24;
+; CHECK-NEXT: or.b32 %r18, %r17, %r15;
+; CHECK-NEXT: or.b32 %r19, %r18, %r13;
+; CHECK-NEXT: st.param.b8 [func_retval0+5], %r9;
; CHECK-NEXT: st.param.b64 [func_retval0+16], %rd2;
+; CHECK-NEXT: st.param.b8 [func_retval0+4], %rs2;
+; CHECK-NEXT: st.param.b32 [func_retval0], %r5;
+; CHECK-NEXT: shr.u32 %r21, %r19, 24;
+; CHECK-NEXT: st.param.b8 [func_retval0+8], %r21;
+; CHECK-NEXT: shr.u32 %r22, %r19, 16;
+; CHECK-NEXT: st.param.b8 [func_retval0+7], %r22;
+; CHECK-NEXT: shr.u32 %r23, %r19, 8;
+; CHECK-NEXT: st.param.b8 [func_retval0+6], %r23;
; CHECK-NEXT: ret;
%r = tail call %s_i8f32p @test_s_i8f32p(%s_i8f32p %a)
ret %s_i8f32p %r
@@ -396,112 +332,66 @@ define %s_i8f32p @test_s_i8f32p(%s_i8f32p %a) {
define %s_i8f64p @test_s_i8f64p(%s_i8f64p %a) {
; CHECK-LABEL: test_s_i8f64p(
; CHECK: {
-; CHECK-NEXT: .reg .b16 %rs<20>;
-; CHECK-NEXT: .reg .b64 %rd<68>;
+; CHECK-NEXT: .reg .b16 %rs<3>;
+; CHECK-NEXT: .reg .b64 %rd<46>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
-; CHECK-NEXT: ld.param.b8 %rd4, [test_s_i8f64p_param_0+10];
-; CHECK-NEXT: shl.b64 %rd5, %rd4, 8;
-; CHECK-NEXT: ld.param.b8 %rd6, [test_s_i8f64p_param_0+9];
-; CHECK-NEXT: or.b64 %rd7, %rd5, %rd6;
-; CHECK-NEXT: ld.param.b8 %rd8, [test_s_i8f64p_param_0+11];
-; CHECK-NEXT: shl.b64 %rd9, %rd8, 16;
-; CHECK-NEXT: ld.param.b8 %rd10, [test_s_i8f64p_param_0+12];
-; CHECK-NEXT: shl.b64 %rd11, %rd10, 24;
-; CHECK-NEXT: or.b64 %rd12, %rd11, %rd9;
-; CHECK-NEXT: or.b64 %rd13, %rd12, %rd7;
-; CHECK-NEXT: ld.param.b8 %rd14, [test_s_i8f64p_param_0+14];
-; CHECK-NEXT: shl.b64 %rd15, %rd14, 8;
-; CHECK-NEXT: ld.param.b8 %rd16, [test_s_i8f64p_param_0+13];
-; CHECK-NEXT: or.b64 %rd17, %rd15, %rd16;
-; CHECK-NEXT: ld.param.b8 %rd18, [test_s_i8f64p_param_0+15];
-; CHECK-NEXT: shl.b64 %rd19, %rd18, 16;
-; CHECK-NEXT: ld.param.b8 %rd20, [test_s_i8f64p_param_0+16];
-; CHECK-NEXT: shl.b64 %rd21, %rd20, 24;
-; CHECK-NEXT: or.b64 %rd22, %rd21, %rd19;
-; CHECK-NEXT: or.b64 %rd23, %rd22, %rd17;
-; CHECK-NEXT: shl.b64 %rd24, %rd23, 32;
-; CHECK-NEXT: or.b64 %rd2, %rd24, %rd13;
-; CHECK-NEXT: ld.param.b64 %rd3, [test_s_i8f64p_param_0+24];
-; CHECK-NEXT: ld.param.b8 %rs1, [test_s_i8f64p_param_0+8];
; CHECK-NEXT: ld.param.b64 %rd1, [test_s_i8f64p_param_0];
-; CHECK-NEXT: shr.u64 %rd25, %rd2, 8;
-; CHECK-NEXT: shr.u64 %rd26, %rd2, 16;
-; CHECK-NEXT: shr.u64 %rd27, %rd2, 24;
-; CHECK-NEXT: bfe.u64 %rd28, %rd23, 8, 24;
-; CHECK-NEXT: bfe.u64 %rd29, %rd23, 16, 16;
-; CHECK-NEXT: bfe.u64 %rd30, %rd23, 24, 8;
+; CHECK-NEXT: ld.param.b64 %rd2, [test_s_i8f64p_param_0+8];
+; CHECK-NEXT: ld.param.b64 %rd3, [test_s_i8f64p_param_0+24];
+; CHECK-NEXT: ld.param.b8 %rd4, [test_s_i8f64p_param_0+16];
; CHECK-NEXT: { // callseq 6, 0
; CHECK-NEXT: .param .align 8 .b8 param0[32];
-; CHECK-NEXT: st.param.b64 [param0], %rd1;
-; CHECK-NEXT: st.param.b8 [param0+8], %rs1;
-; CHECK-NEXT: st.param.b8 [param0+9], %rd2;
-; CHECK-NEXT: st.param.b8 [param0+10], %rd25;
-; CHECK-NEXT: st.param.b8 [param0+11], %rd26;
-; CHECK-NEXT: st.param.b8 [param0+12], %rd27;
-; CHECK-NEXT: st.param.b8 [param0+13], %rd23;
-; CHECK-NEXT: st.param.b8 [param0+14], %rd28;
-; CHECK-NEXT: st.param.b8 [param0+15], %rd29;
-; CHECK-NEXT: st.param.b8 [param0+16], %rd30;
-; CHECK-NEXT: st.param.b64 [param0+24], %rd3;
; CHECK-NEXT: .param .align 8 .b8 retval0[32];
+; CHECK-NEXT: st.param.b8 [param0+16], %rd4;
+; CHECK-NEXT: st.param.b64 [param0+24], %rd3;
+; CHECK-NEXT: st.param.b64 [param0+8], %rd2;
+; CHECK-NEXT: st.param.b64 [param0], %rd1;
; CHECK-NEXT: call.uni (retval0), test_s_i8f64p, (param0);
-; CHECK-NEXT: ld.param.b64 %rd31, [retval0];
-; CHECK-NEXT: ld.param.b8 %rs2, [retval0+8];
-; CHECK-NEXT: ld.param.b8 %rs3, [retval0+9];
-; CHECK-NEXT: ld.param.b8 %rs4, [retval0+10];
-; CHECK-NEXT: ld.param.b8 %rs5, [retval0+11];
-; CHECK-NEXT: ld.param.b8 %rs6, [retval0+12];
-; CHECK-NEXT: ld.param.b8 %rs7, [retval0+13];
-; CHECK-NEXT: ld.param.b8 %rs8, [retval0+14];
-; CHECK-NEXT: ld.param.b8 %rs9, [retval0+15];
-; CHECK-NEXT: ld.param.b8 %rs10, [retval0+16];
-; CHECK-NEXT: ld.param.b64 %rd32, [retval0+24];
+; CHECK-NEXT: ld.param.b64 %rd5, [retval0+24];
+; CHECK-NEXT: ld.param.b8 %rs1, [retval0+8];
+; CHECK-NEXT: ld.param.b64 %rd6, [retval0];
+; CHECK-NEXT: ld.param.b8 %rd7, [retval0+16];
+; CHECK-NEXT: ld.param.b8 %rd8, [retval0+15];
+; CHECK-NEXT: ld.param.b8 %rd9, [retval0+14];
+; CHECK-NEXT: ld.param.b8 %rd10, [retval0+13];
+; CHECK-NEXT: ld.param.b8 %rd11, [retval0+12];
+; CHECK-NEXT: ld.param.b8 %rd12, [retval0+11];
+; CHECK-NEXT: ld.param.b8 %rd13, [retval0+10];
+; CHECK-NEXT: ld.param.b8 %rd14, [retval0+9];
; CHECK-NEXT: } // callseq 6
-; CHECK-NEXT: cvt.u64.u16 %rd33, %rs3;
-; CHECK-NEXT: and.b64 %rd34, %rd33, 255;
-; CHECK-NEXT: cvt.u64.u16 %rd35, %rs4;
-; CHECK-NEXT: and.b64 %rd36, %rd35, 255;
-; CHECK-NEXT: shl.b64 %rd37, %rd36, 8;
-; CHECK-NEXT: or.b64 %rd38, %rd34, %rd37;
-; CHECK-NEXT: cvt.u64.u16 %rd39, %rs5;
-; CHECK-NEXT: and.b64 %rd40, %rd39, 255;
-; CHECK-NEXT: shl.b64 %rd41, %rd40, 16;
-; CHECK-NEXT: or.b64 %rd42, %rd38, %rd41;
-; CHECK-NEXT: cvt.u64.u16 %rd43, %rs6;
-; CHECK-NEXT: and.b64 %rd44, %rd43, 255;
-; CHECK-NEXT: shl.b64 %rd45, %rd44, 24;
-; CHECK-NEXT: or.b64 %rd46, %rd42, %rd45;
-; CHECK-NEXT: cvt.u64.u16 %rd47, %rs7;
-; CHECK-NEXT: and.b64 %rd48, %rd47, 255;
-; CHECK-NEXT: shl.b64 %rd49, %rd48, 32;
-; CHECK-NEXT: or.b64 %rd50, %rd46, %rd49;
-; CHECK-NEXT: cvt.u64.u16 %rd51, %rs8;
-; CHECK-NEXT: and.b64 %rd52, %rd51, 255;
-; CHECK-NEXT: shl.b64 %rd53, %rd52, 40;
-; CHECK-NEXT: or.b64 %rd54, %rd50, %rd53;
-; CHECK-NEXT: cvt.u64.u16 %rd55, %rs9;
-; CHECK-NEXT: and.b64 %rd56, %rd55, 255;
-; CHECK-NEXT: shl.b64 %rd57, %rd56, 48;
-; CHECK-NEXT: or.b64 %rd58, %rd54, %rd57;
-; CHECK-NEXT: cvt.u64.u16 %rd59, %rs10;
-; CHECK-NEXT: shl.b64 %rd60, %rd59, 56;
-; CHECK-NEXT: or.b64 %rd61, %rd58, %rd60;
-; CHECK-NEXT: st.param.b64 [func_retval0], %rd31;
-; CHECK-NEXT: st.param.b8 [func_retval0+8], %rs2;
+; CHECK-NEXT: shl.b64 %rd17, %rd13, 8;
+; CHECK-NEXT: or.b64 %rd18, %rd17, %rd14;
+; CHECK-NEXT: shl.b64 %rd20, %rd12, 16;
+; CHECK-NEXT: shl.b64 %rd22, %rd11, 24;
+; CHECK-NEXT: or.b64 %rd23, %rd22, %rd20;
+; CHECK-NEXT: or.b64 %rd24, %rd23, %rd18;
+; CHECK-NEXT: shl.b64 %rd27, %rd9, 8;
+; CHECK-NEXT: or.b64 %rd28, %rd27, %rd10;
+; CHECK-NEXT: shl.b64 %rd30, %rd8, 16;
+; CHECK-NEXT: shl.b64 %rd32, %rd7, 24;
+; CHECK-NEXT: or.b64 %rd33, %rd32, %rd30;
+; CHECK-NEXT: or.b64 %rd34, %rd33, %rd28;
+; CHECK-NEXT: shl.b64 %rd35, %rd34, 32;
+; CHECK-NEXT: or.b64 %rd36, %rd35, %rd24;
+; CHECK-NEXT: st.param.b8 [func_retval0+9], %rd14;
+; CHECK-NEXT: st.param.b64 [func_retval0+24], %rd5;
+; CHECK-NEXT: st.param.b8 [func_retval0+8], %rs1;
+; CHECK-NEXT: st.param.b64 [func_retval0], %rd6;
+; CHECK-NEXT: shr.u64 %rd39, %rd36, 56;
+; CHECK-NEXT: st.param.b8 [func_retval0+16], %rd39;
+; CHECK-NEXT: shr.u64 %rd40, %rd36, 48;
+; CHECK-NEXT: st.param.b8 [func_retval0+15], %rd40;
+; CHECK-NEXT: shr.u64 %rd41, %rd36, 40;
+; CHECK-NEXT: st.param.b8 [func_retval0+14], %rd41;
+; CHECK-NEXT: shr.u64 %rd42, %rd36, 32;
+; CHECK-NEXT: st.param.b8 [func_retval0+13], %rd42;
+; CHECK-NEXT: shr.u64 %rd43, %rd36, 24;
; CHECK-NEXT: st.param.b8 [func_retval0+12], %rd43;
-; CHECK-NEXT: st.param.b8 [func_retval0+11], %rd39;
-; CHECK-NEXT: st.param.b8 [func_retval0+10], %rd35;
-; CHECK-NEXT: st.param.b8 [func_retval0+9], %rd33;
-; CHECK-NEXT: shr.u64 %rd64, %rd50, 32;
-; CHECK-NEXT: st.param.b8 [func_retval0+13], %rd64;
-; CHECK-NEXT: shr.u64 %rd65, %rd54, 40;
-; CHECK-NEXT: st.param.b8 [func_retval0+14], %rd65;
-; CHECK-NEXT: shr.u64 %rd66, %rd58, 48;
-; CHECK-NEXT: st.param.b8 [func_retval0+15], %rd66;
-; CHECK-NEXT: shr.u64 %rd67, %rd61, 56;
-; CHECK-NEXT: st.param.b8 [func_retval0+16], %rd67;
-; CHECK-NEXT: st.param.b64 [func_retval0+24], %rd32;
+; CHECK-NEXT: shr.u64 %rd44, %rd36, 16;
+; CHECK-NEXT: st.param.b8 [func_retval0+11], %rd44;
+; CHECK-NEXT: shr.u64 %rd45, %rd36, 8;
+; CHECK-NEXT: st.param.b8 [func_retval0+10], %rd45;
; CHECK-NEXT: ret;
%r = tail call %s_i8f64p @test_s_i8f64p(%s_i8f64p %a)
ret %s_i8f64p %r
diff --git a/llvm/test/CodeGen/NVPTX/vaargs.ll b/llvm/test/CodeGen/NVPTX/vaargs.ll
index 3ca729f..9e312a2 100644
--- a/llvm/test/CodeGen/NVPTX/vaargs.ll
+++ b/llvm/test/CodeGen/NVPTX/vaargs.ll
@@ -89,14 +89,14 @@ define i32 @test_foo(i32 %i, i64 %l, double %d, ptr %p) {
; CHECK-NEXT: ld.param.b32 [[ARG_I32:%r[0-9]+]], [test_foo_param_0];
; Store arguments to an array
-; CHECK32: .param .align 8 .b8 param1[28];
-; CHECK64: .param .align 8 .b8 param1[32];
-; CHECK-NEXT: st.param.b32 [param1], [[ARG_I32]];
-; CHECK-NEXT: st.param.b64 [param1+8], [[ARG_I64]];
-; CHECK-NEXT: st.param.b64 [param1+16], [[ARG_DOUBLE]];
-; CHECK-NEXT: st.param.b[[BITS]] [param1+24], [[ARG_VOID_PTR]];
-; CHECK-NEXT: .param .b32 retval0;
-; CHECK-NEXT: prototype_1 : .callprototype (.param .b32 _) _ (.param .b32 _, .param .align 8 .b8 _[]
+; CHECK32: .param .align 8 .b8 param1[28];
+; CHECK64: .param .align 8 .b8 param1[32];
+; CHECK-DAG: .param .b32 retval0;
+; CHECK-DAG: st.param.b32 [param1], [[ARG_I32]];
+; CHECK-DAG: st.param.b64 [param1+8], [[ARG_I64]];
+; CHECK-DAG: st.param.b64 [param1+16], [[ARG_DOUBLE]];
+; CHECK-DAG: st.param.b[[BITS]] [param1+24], [[ARG_VOID_PTR]];
+; CHECK-DAG: prototype_1 : .callprototype (.param .b32 _) _ (.param .b32 _, .param .align 8 .b8 _[]
entry:
%ptr = load ptr, ptr addrspacecast (ptr addrspace(1) @foo_ptr to ptr), align 8
diff --git a/llvm/test/CodeGen/NVPTX/variadics-backend.ll b/llvm/test/CodeGen/NVPTX/variadics-backend.ll
index ad2e704..a9b3675 100644
--- a/llvm/test/CodeGen/NVPTX/variadics-backend.ll
+++ b/llvm/test/CodeGen/NVPTX/variadics-backend.ll
@@ -115,13 +115,13 @@ define dso_local i32 @foo() {
; CHECK-PTX-NEXT: st.b64 [%SP+16], 1;
; CHECK-PTX-NEXT: st.b64 [%SP+24], 4607182418800017408;
; CHECK-PTX-NEXT: st.b64 [%SP+32], 4607182418800017408;
-; CHECK-PTX-NEXT: add.u64 %rd1, %SP, 0;
; CHECK-PTX-NEXT: { // callseq 0, 0
; CHECK-PTX-NEXT: .param .b32 param0;
-; CHECK-PTX-NEXT: st.param.b32 [param0], 1;
; CHECK-PTX-NEXT: .param .b64 param1;
-; CHECK-PTX-NEXT: st.param.b64 [param1], %rd1;
; CHECK-PTX-NEXT: .param .b32 retval0;
+; CHECK-PTX-NEXT: add.u64 %rd1, %SP, 0;
+; CHECK-PTX-NEXT: st.param.b64 [param1], %rd1;
+; CHECK-PTX-NEXT: st.param.b32 [param0], 1;
; CHECK-PTX-NEXT: call.uni (retval0), variadics1, (param0, param1);
; CHECK-PTX-NEXT: ld.param.b32 %r1, [retval0];
; CHECK-PTX-NEXT: } // callseq 0
@@ -218,13 +218,13 @@ define dso_local i32 @bar() {
; CHECK-PTX-NEXT: st.b32 [%SP+8], 1;
; CHECK-PTX-NEXT: st.b8 [%SP+12], 1;
; CHECK-PTX-NEXT: st.b64 [%SP+16], 1;
-; CHECK-PTX-NEXT: add.u64 %rd3, %SP, 8;
; CHECK-PTX-NEXT: { // callseq 1, 0
; CHECK-PTX-NEXT: .param .b32 param0;
-; CHECK-PTX-NEXT: st.param.b32 [param0], 1;
; CHECK-PTX-NEXT: .param .b64 param1;
-; CHECK-PTX-NEXT: st.param.b64 [param1], %rd3;
; CHECK-PTX-NEXT: .param .b32 retval0;
+; CHECK-PTX-NEXT: add.u64 %rd3, %SP, 8;
+; CHECK-PTX-NEXT: st.param.b64 [param1], %rd3;
+; CHECK-PTX-NEXT: st.param.b32 [param0], 1;
; CHECK-PTX-NEXT: call.uni (retval0), variadics2, (param0, param1);
; CHECK-PTX-NEXT: ld.param.b32 %r1, [retval0];
; CHECK-PTX-NEXT: } // callseq 1
@@ -289,13 +289,13 @@ define dso_local i32 @baz() {
; CHECK-PTX-NEXT: mov.b64 %SPL, __local_depot5;
; CHECK-PTX-NEXT: cvta.local.u64 %SP, %SPL;
; CHECK-PTX-NEXT: st.v4.b32 [%SP], {1, 1, 1, 1};
-; CHECK-PTX-NEXT: add.u64 %rd1, %SP, 0;
; CHECK-PTX-NEXT: { // callseq 2, 0
; CHECK-PTX-NEXT: .param .b32 param0;
-; CHECK-PTX-NEXT: st.param.b32 [param0], 1;
; CHECK-PTX-NEXT: .param .b64 param1;
-; CHECK-PTX-NEXT: st.param.b64 [param1], %rd1;
; CHECK-PTX-NEXT: .param .b32 retval0;
+; CHECK-PTX-NEXT: add.u64 %rd1, %SP, 0;
+; CHECK-PTX-NEXT: st.param.b64 [param1], %rd1;
+; CHECK-PTX-NEXT: st.param.b32 [param0], 1;
; CHECK-PTX-NEXT: call.uni (retval0), variadics3, (param0, param1);
; CHECK-PTX-NEXT: ld.param.b32 %r1, [retval0];
; CHECK-PTX-NEXT: } // callseq 2
@@ -348,7 +348,6 @@ define dso_local void @qux() {
; CHECK-PTX-NEXT: .local .align 8 .b8 __local_depot7[24];
; CHECK-PTX-NEXT: .reg .b64 %SP;
; CHECK-PTX-NEXT: .reg .b64 %SPL;
-; CHECK-PTX-NEXT: .reg .b32 %r<2>;
; CHECK-PTX-NEXT: .reg .b64 %rd<8>;
; CHECK-PTX-EMPTY:
; CHECK-PTX-NEXT: // %bb.0: // %entry
@@ -360,18 +359,17 @@ define dso_local void @qux() {
; CHECK-PTX-NEXT: ld.global.nc.b64 %rd4, [__const_$_qux_$_s];
; CHECK-PTX-NEXT: st.local.b64 [%rd2], %rd4;
; CHECK-PTX-NEXT: st.b64 [%SP+16], 1;
-; CHECK-PTX-NEXT: ld.local.b64 %rd5, [%rd2];
-; CHECK-PTX-NEXT: ld.local.b64 %rd6, [%rd2+8];
-; CHECK-PTX-NEXT: add.u64 %rd7, %SP, 16;
; CHECK-PTX-NEXT: { // callseq 3, 0
; CHECK-PTX-NEXT: .param .align 8 .b8 param0[16];
-; CHECK-PTX-NEXT: st.param.b64 [param0], %rd5;
-; CHECK-PTX-NEXT: st.param.b64 [param0+8], %rd6;
; CHECK-PTX-NEXT: .param .b64 param1;
-; CHECK-PTX-NEXT: st.param.b64 [param1], %rd7;
; CHECK-PTX-NEXT: .param .b32 retval0;
+; CHECK-PTX-NEXT: add.u64 %rd5, %SP, 16;
+; CHECK-PTX-NEXT: st.param.b64 [param1], %rd5;
+; CHECK-PTX-NEXT: ld.local.b64 %rd6, [%rd2+8];
+; CHECK-PTX-NEXT: st.param.b64 [param0+8], %rd6;
+; CHECK-PTX-NEXT: ld.local.b64 %rd7, [%rd2];
+; CHECK-PTX-NEXT: st.param.b64 [param0], %rd7;
; CHECK-PTX-NEXT: call.uni (retval0), variadics4, (param0, param1);
-; CHECK-PTX-NEXT: ld.param.b32 %r1, [retval0];
; CHECK-PTX-NEXT: } // callseq 3
; CHECK-PTX-NEXT: ret;
entry:
diff --git a/llvm/test/CodeGen/PowerPC/froundeven-legalization.ll b/llvm/test/CodeGen/PowerPC/froundeven-legalization.ll
new file mode 100644
index 0000000..238e200
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/froundeven-legalization.ll
@@ -0,0 +1,111 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -mtriple=powerpc64le < %s | FileCheck %s
+
+define void @test(ptr %p1, ptr %p2) nounwind {
+; CHECK-LABEL: test:
+; CHECK: # %bb.0:
+; CHECK-NEXT: mflr 0
+; CHECK-NEXT: stdu 1, -224(1)
+; CHECK-NEXT: li 5, 48
+; CHECK-NEXT: std 0, 240(1)
+; CHECK-NEXT: std 27, 184(1) # 8-byte Folded Spill
+; CHECK-NEXT: li 27, 16
+; CHECK-NEXT: std 28, 192(1) # 8-byte Folded Spill
+; CHECK-NEXT: std 29, 200(1) # 8-byte Folded Spill
+; CHECK-NEXT: li 29, 32
+; CHECK-NEXT: li 28, 48
+; CHECK-NEXT: stxvd2x 56, 1, 5 # 16-byte Folded Spill
+; CHECK-NEXT: li 5, 64
+; CHECK-NEXT: std 30, 208(1) # 8-byte Folded Spill
+; CHECK-NEXT: mr 30, 4
+; CHECK-NEXT: stxvd2x 57, 1, 5 # 16-byte Folded Spill
+; CHECK-NEXT: li 5, 80
+; CHECK-NEXT: stxvd2x 58, 1, 5 # 16-byte Folded Spill
+; CHECK-NEXT: li 5, 96
+; CHECK-NEXT: lxvd2x 58, 0, 3
+; CHECK-NEXT: stxvd2x 59, 1, 5 # 16-byte Folded Spill
+; CHECK-NEXT: li 5, 112
+; CHECK-NEXT: lxvd2x 59, 3, 27
+; CHECK-NEXT: stxvd2x 60, 1, 5 # 16-byte Folded Spill
+; CHECK-NEXT: li 5, 128
+; CHECK-NEXT: stxvd2x 61, 1, 5 # 16-byte Folded Spill
+; CHECK-NEXT: li 5, 144
+; CHECK-NEXT: stxvd2x 62, 1, 5 # 16-byte Folded Spill
+; CHECK-NEXT: li 5, 160
+; CHECK-NEXT: lxvd2x 62, 3, 28
+; CHECK-NEXT: stxvd2x 63, 1, 5 # 16-byte Folded Spill
+; CHECK-NEXT: lxvd2x 63, 3, 29
+; CHECK-NEXT: xxswapd 57, 58
+; CHECK-NEXT: xxswapd 1, 59
+; CHECK-NEXT: xxswapd 60, 62
+; CHECK-NEXT: xxswapd 61, 63
+; CHECK-NEXT: bl roundeven
+; CHECK-NEXT: nop
+; CHECK-NEXT: xxswapd 56, 1
+; CHECK-NEXT: xxlor 1, 59, 59
+; CHECK-NEXT: bl roundeven
+; CHECK-NEXT: nop
+; CHECK-NEXT: xxswapd 0, 1
+; CHECK-NEXT: xxlor 1, 60, 60
+; CHECK-NEXT: xxmrgld 59, 0, 56
+; CHECK-NEXT: bl roundeven
+; CHECK-NEXT: nop
+; CHECK-NEXT: xxswapd 60, 1
+; CHECK-NEXT: xxlor 1, 62, 62
+; CHECK-NEXT: bl roundeven
+; CHECK-NEXT: nop
+; CHECK-NEXT: xxswapd 0, 1
+; CHECK-NEXT: xxlor 1, 61, 61
+; CHECK-NEXT: xxmrgld 62, 0, 60
+; CHECK-NEXT: bl roundeven
+; CHECK-NEXT: nop
+; CHECK-NEXT: xxswapd 61, 1
+; CHECK-NEXT: xxlor 1, 63, 63
+; CHECK-NEXT: bl roundeven
+; CHECK-NEXT: nop
+; CHECK-NEXT: xxswapd 0, 1
+; CHECK-NEXT: xxlor 1, 57, 57
+; CHECK-NEXT: xxmrgld 63, 0, 61
+; CHECK-NEXT: bl roundeven
+; CHECK-NEXT: nop
+; CHECK-NEXT: xxswapd 61, 1
+; CHECK-NEXT: xxlor 1, 58, 58
+; CHECK-NEXT: bl roundeven
+; CHECK-NEXT: nop
+; CHECK-NEXT: li 3, 160
+; CHECK-NEXT: stxvd2x 63, 30, 29
+; CHECK-NEXT: xxswapd 0, 1
+; CHECK-NEXT: stxvd2x 62, 30, 28
+; CHECK-NEXT: stxvd2x 59, 30, 27
+; CHECK-NEXT: ld 29, 200(1) # 8-byte Folded Reload
+; CHECK-NEXT: ld 28, 192(1) # 8-byte Folded Reload
+; CHECK-NEXT: ld 27, 184(1) # 8-byte Folded Reload
+; CHECK-NEXT: lxvd2x 63, 1, 3 # 16-byte Folded Reload
+; CHECK-NEXT: li 3, 144
+; CHECK-NEXT: xxmrgld 0, 0, 61
+; CHECK-NEXT: lxvd2x 62, 1, 3 # 16-byte Folded Reload
+; CHECK-NEXT: li 3, 128
+; CHECK-NEXT: stxvd2x 0, 0, 30
+; CHECK-NEXT: ld 30, 208(1) # 8-byte Folded Reload
+; CHECK-NEXT: lxvd2x 61, 1, 3 # 16-byte Folded Reload
+; CHECK-NEXT: li 3, 112
+; CHECK-NEXT: lxvd2x 60, 1, 3 # 16-byte Folded Reload
+; CHECK-NEXT: li 3, 96
+; CHECK-NEXT: lxvd2x 59, 1, 3 # 16-byte Folded Reload
+; CHECK-NEXT: li 3, 80
+; CHECK-NEXT: lxvd2x 58, 1, 3 # 16-byte Folded Reload
+; CHECK-NEXT: li 3, 64
+; CHECK-NEXT: lxvd2x 57, 1, 3 # 16-byte Folded Reload
+; CHECK-NEXT: li 3, 48
+; CHECK-NEXT: lxvd2x 56, 1, 3 # 16-byte Folded Reload
+; CHECK-NEXT: addi 1, 1, 224
+; CHECK-NEXT: ld 0, 16(1)
+; CHECK-NEXT: mtlr 0
+; CHECK-NEXT: blr
+ %v = load <8 x double>, ptr %p1, align 64
+ %res = call <8 x double> @llvm.roundeven.v8f64(<8 x double> %v)
+ store <8 x double> %res, ptr %p2, align 64
+ ret void
+}
+
+declare <8 x double> @llvm.roundeven.v8f64(<8 x double>)
diff --git a/llvm/test/CodeGen/RISCV/rvv/vlseg-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vlseg-rv32.ll
index e6a98c9..eb3422d 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vlseg-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vlseg-rv32.ll
@@ -2,4246 +2,3303 @@
; RUN: llc -mtriple=riscv32 -mattr=+zve64d,+f,+d,+zvfh,+zvfbfmin \
; RUN: -verify-machineinstrs < %s | FileCheck %s
-declare target("riscv.vector.tuple", <vscale x 1 x i8>, 2) @llvm.riscv.vlseg2.triscv.vector.tuple_nxv1i8_2t(target("riscv.vector.tuple", <vscale x 1 x i8>, 2), ptr, i32, i32)
-declare target("riscv.vector.tuple", <vscale x 1 x i8>, 2) @llvm.riscv.vlseg2.mask.triscv.vector.tuple_nxv1i8_2t.nxv1i1(target("riscv.vector.tuple", <vscale x 1 x i8>, 2), ptr, <vscale x 1 x i1>, i32, i32, i32)
-
-define <vscale x 1 x i8> @test_vlseg2_nxv1i8_triscv.vector.tuple_nxv1i8_2t(ptr %base, i32 %vl) {
+define target("riscv.vector.tuple", <vscale x 1 x i8>, 2) @test_vlseg2_nxv1i8_triscv.vector.tuple_nxv1i8_2t(ptr %base, i32 %vl) {
; CHECK-LABEL: test_vlseg2_nxv1i8_triscv.vector.tuple_nxv1i8_2t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma
-; CHECK-NEXT: vlseg2e8.v v7, (a0)
+; CHECK-NEXT: vlseg2e8.v v8, (a0)
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 1 x i8>, 2) @llvm.riscv.vlseg2.triscv.vector.tuple_nxv1i8_2t(target("riscv.vector.tuple", <vscale x 1 x i8>, 2) undef, ptr %base, i32 %vl, i32 3)
- %1 = call <vscale x 1 x i8> @llvm.riscv.tuple.extract.nxv1i8.triscv.vector.tuple_nxv1i8_2t(target("riscv.vector.tuple", <vscale x 1 x i8>, 2) %0, i32 1)
- ret <vscale x 1 x i8> %1
+ ret target("riscv.vector.tuple", <vscale x 1 x i8>, 2) %0
}
-
-define <vscale x 1 x i8> @test_vlseg2_mask_nxv1i8_triscv.vector.tuple_nxv1i8_2t(ptr %base, i32 %vl, <vscale x 1 x i1> %mask) {
+define target("riscv.vector.tuple", <vscale x 1 x i8>, 2) @test_vlseg2_mask_nxv1i8_triscv.vector.tuple_nxv1i8_2t(ptr %base, i32 %vl, <vscale x 1 x i1> %mask) {
; CHECK-LABEL: test_vlseg2_mask_nxv1i8_triscv.vector.tuple_nxv1i8_2t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma
-; CHECK-NEXT: vlseg2e8.v v7, (a0), v0.t
+; CHECK-NEXT: vlseg2e8.v v8, (a0), v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 1 x i8>, 2) @llvm.riscv.vlseg2.mask.triscv.vector.tuple_nxv1i8_2t.nxv1i1(target("riscv.vector.tuple", <vscale x 1 x i8>, 2) undef, ptr %base, <vscale x 1 x i1> %mask, i32 %vl, i32 1, i32 3)
- %1 = call <vscale x 1 x i8> @llvm.riscv.tuple.extract.nxv1i8.triscv.vector.tuple_nxv1i8_2t(target("riscv.vector.tuple", <vscale x 1 x i8>, 2) %0, i32 1)
- ret <vscale x 1 x i8> %1
+ ret target("riscv.vector.tuple", <vscale x 1 x i8>, 2) %0
}
-
-declare target("riscv.vector.tuple", <vscale x 2 x i8>, 2) @llvm.riscv.vlseg2.triscv.vector.tuple_nxv2i8_2t(target("riscv.vector.tuple", <vscale x 2 x i8>, 2), ptr, i32, i32)
-declare target("riscv.vector.tuple", <vscale x 2 x i8>, 2) @llvm.riscv.vlseg2.mask.triscv.vector.tuple_nxv2i8_2t.nxv2i1(target("riscv.vector.tuple", <vscale x 2 x i8>, 2), ptr, <vscale x 2 x i1>, i32, i32, i32)
-
-define <vscale x 2 x i8> @test_vlseg2_nxv2i8_triscv.vector.tuple_nxv2i8_2t(ptr %base, i32 %vl) {
+define target("riscv.vector.tuple", <vscale x 2 x i8>, 2) @test_vlseg2_nxv2i8_triscv.vector.tuple_nxv2i8_2t(ptr %base, i32 %vl) {
; CHECK-LABEL: test_vlseg2_nxv2i8_triscv.vector.tuple_nxv2i8_2t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma
-; CHECK-NEXT: vlseg2e8.v v7, (a0)
+; CHECK-NEXT: vlseg2e8.v v8, (a0)
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 2 x i8>, 2) @llvm.riscv.vlseg2.triscv.vector.tuple_nxv2i8_2t(target("riscv.vector.tuple", <vscale x 2 x i8>, 2) undef, ptr %base, i32 %vl, i32 3)
- %1 = call <vscale x 2 x i8> @llvm.riscv.tuple.extract.nxv2i8.triscv.vector.tuple_nxv2i8_2t(target("riscv.vector.tuple", <vscale x 2 x i8>, 2) %0, i32 1)
- ret <vscale x 2 x i8> %1
+ ret target("riscv.vector.tuple", <vscale x 2 x i8>, 2) %0
}
-
-define <vscale x 2 x i8> @test_vlseg2_mask_nxv2i8_triscv.vector.tuple_nxv2i8_2t(ptr %base, i32 %vl, <vscale x 2 x i1> %mask) {
+define target("riscv.vector.tuple", <vscale x 2 x i8>, 2) @test_vlseg2_mask_nxv2i8_triscv.vector.tuple_nxv2i8_2t(ptr %base, i32 %vl, <vscale x 2 x i1> %mask) {
; CHECK-LABEL: test_vlseg2_mask_nxv2i8_triscv.vector.tuple_nxv2i8_2t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma
-; CHECK-NEXT: vlseg2e8.v v7, (a0), v0.t
+; CHECK-NEXT: vlseg2e8.v v8, (a0), v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 2 x i8>, 2) @llvm.riscv.vlseg2.mask.triscv.vector.tuple_nxv2i8_2t.nxv2i1(target("riscv.vector.tuple", <vscale x 2 x i8>, 2) undef, ptr %base, <vscale x 2 x i1> %mask, i32 %vl, i32 1, i32 3)
- %1 = call <vscale x 2 x i8> @llvm.riscv.tuple.extract.nxv2i8.triscv.vector.tuple_nxv2i8_2t(target("riscv.vector.tuple", <vscale x 2 x i8>, 2) %0, i32 1)
- ret <vscale x 2 x i8> %1
+ ret target("riscv.vector.tuple", <vscale x 2 x i8>, 2) %0
}
-
-declare target("riscv.vector.tuple", <vscale x 4 x i8>, 2) @llvm.riscv.vlseg2.triscv.vector.tuple_nxv4i8_2t(target("riscv.vector.tuple", <vscale x 4 x i8>, 2), ptr, i32, i32)
-declare target("riscv.vector.tuple", <vscale x 4 x i8>, 2) @llvm.riscv.vlseg2.mask.triscv.vector.tuple_nxv4i8_2t.nxv4i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 2), ptr, <vscale x 4 x i1>, i32, i32, i32)
-
-define <vscale x 4 x i8> @test_vlseg2_nxv4i8_triscv.vector.tuple_nxv4i8_2t(ptr %base, i32 %vl) {
+define target("riscv.vector.tuple", <vscale x 4 x i8>, 2) @test_vlseg2_nxv4i8_triscv.vector.tuple_nxv4i8_2t(ptr %base, i32 %vl) {
; CHECK-LABEL: test_vlseg2_nxv4i8_triscv.vector.tuple_nxv4i8_2t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma
-; CHECK-NEXT: vlseg2e8.v v7, (a0)
+; CHECK-NEXT: vlseg2e8.v v8, (a0)
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 2) @llvm.riscv.vlseg2.triscv.vector.tuple_nxv4i8_2t(target("riscv.vector.tuple", <vscale x 4 x i8>, 2) undef, ptr %base, i32 %vl, i32 3)
- %1 = call <vscale x 4 x i8> @llvm.riscv.tuple.extract.nxv4i8.triscv.vector.tuple_nxv4i8_2t(target("riscv.vector.tuple", <vscale x 4 x i8>, 2) %0, i32 1)
- ret <vscale x 4 x i8> %1
+ ret target("riscv.vector.tuple", <vscale x 4 x i8>, 2) %0
}
-
-define <vscale x 4 x i8> @test_vlseg2_mask_nxv4i8_triscv.vector.tuple_nxv4i8_2t(ptr %base, i32 %vl, <vscale x 4 x i1> %mask) {
+define target("riscv.vector.tuple", <vscale x 4 x i8>, 2) @test_vlseg2_mask_nxv4i8_triscv.vector.tuple_nxv4i8_2t(ptr %base, i32 %vl, <vscale x 4 x i1> %mask) {
; CHECK-LABEL: test_vlseg2_mask_nxv4i8_triscv.vector.tuple_nxv4i8_2t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma
-; CHECK-NEXT: vlseg2e8.v v7, (a0), v0.t
+; CHECK-NEXT: vlseg2e8.v v8, (a0), v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 2) @llvm.riscv.vlseg2.mask.triscv.vector.tuple_nxv4i8_2t.nxv4i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 2) undef, ptr %base, <vscale x 4 x i1> %mask, i32 %vl, i32 1, i32 3)
- %1 = call <vscale x 4 x i8> @llvm.riscv.tuple.extract.nxv4i8.triscv.vector.tuple_nxv4i8_2t(target("riscv.vector.tuple", <vscale x 4 x i8>, 2) %0, i32 1)
- ret <vscale x 4 x i8> %1
+ ret target("riscv.vector.tuple", <vscale x 4 x i8>, 2) %0
}
-
-declare target("riscv.vector.tuple", <vscale x 8 x i8>, 2) @llvm.riscv.vlseg2.triscv.vector.tuple_nxv8i8_2t(target("riscv.vector.tuple", <vscale x 8 x i8>, 2), ptr, i32, i32)
-declare target("riscv.vector.tuple", <vscale x 8 x i8>, 2) @llvm.riscv.vlseg2.mask.triscv.vector.tuple_nxv8i8_2t.nxv8i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 2), ptr, <vscale x 8 x i1>, i32, i32, i32)
-
-define <vscale x 8 x i8> @test_vlseg2_nxv8i8_triscv.vector.tuple_nxv8i8_2t(ptr %base, i32 %vl) {
+define target("riscv.vector.tuple", <vscale x 8 x i8>, 2) @test_vlseg2_nxv8i8_triscv.vector.tuple_nxv8i8_2t(ptr %base, i32 %vl) {
; CHECK-LABEL: test_vlseg2_nxv8i8_triscv.vector.tuple_nxv8i8_2t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma
-; CHECK-NEXT: vlseg2e8.v v7, (a0)
+; CHECK-NEXT: vlseg2e8.v v8, (a0)
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 2) @llvm.riscv.vlseg2.triscv.vector.tuple_nxv8i8_2t(target("riscv.vector.tuple", <vscale x 8 x i8>, 2) undef, ptr %base, i32 %vl, i32 3)
- %1 = call <vscale x 8 x i8> @llvm.riscv.tuple.extract.nxv8i8.triscv.vector.tuple_nxv8i8_2t(target("riscv.vector.tuple", <vscale x 8 x i8>, 2) %0, i32 1)
- ret <vscale x 8 x i8> %1
+ ret target("riscv.vector.tuple", <vscale x 8 x i8>, 2) %0
}
-
-define <vscale x 8 x i8> @test_vlseg2_mask_nxv8i8_triscv.vector.tuple_nxv8i8_2t(ptr %base, i32 %vl, <vscale x 8 x i1> %mask) {
+define target("riscv.vector.tuple", <vscale x 8 x i8>, 2) @test_vlseg2_mask_nxv8i8_triscv.vector.tuple_nxv8i8_2t(ptr %base, i32 %vl, <vscale x 8 x i1> %mask) {
; CHECK-LABEL: test_vlseg2_mask_nxv8i8_triscv.vector.tuple_nxv8i8_2t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma
-; CHECK-NEXT: vlseg2e8.v v7, (a0), v0.t
+; CHECK-NEXT: vlseg2e8.v v8, (a0), v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 2) @llvm.riscv.vlseg2.mask.triscv.vector.tuple_nxv8i8_2t.nxv8i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 2) undef, ptr %base, <vscale x 8 x i1> %mask, i32 %vl, i32 1, i32 3)
- %1 = call <vscale x 8 x i8> @llvm.riscv.tuple.extract.nxv8i8.triscv.vector.tuple_nxv8i8_2t(target("riscv.vector.tuple", <vscale x 8 x i8>, 2) %0, i32 1)
- ret <vscale x 8 x i8> %1
+ ret target("riscv.vector.tuple", <vscale x 8 x i8>, 2) %0
}
-
-declare target("riscv.vector.tuple", <vscale x 16 x i8>, 2) @llvm.riscv.vlseg2.triscv.vector.tuple_nxv16i8_2t(target("riscv.vector.tuple", <vscale x 16 x i8>, 2), ptr, i32, i32)
-declare target("riscv.vector.tuple", <vscale x 16 x i8>, 2) @llvm.riscv.vlseg2.mask.triscv.vector.tuple_nxv16i8_2t.nxv16i1(target("riscv.vector.tuple", <vscale x 16 x i8>, 2), ptr, <vscale x 16 x i1>, i32, i32, i32)
-
-define <vscale x 16 x i8> @test_vlseg2_nxv16i8_triscv.vector.tuple_nxv16i8_2t(ptr %base, i32 %vl) {
+define target("riscv.vector.tuple", <vscale x 16 x i8>, 2) @test_vlseg2_nxv16i8_triscv.vector.tuple_nxv16i8_2t(ptr %base, i32 %vl) {
; CHECK-LABEL: test_vlseg2_nxv16i8_triscv.vector.tuple_nxv16i8_2t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma
-; CHECK-NEXT: vlseg2e8.v v6, (a0)
+; CHECK-NEXT: vlseg2e8.v v8, (a0)
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 2) @llvm.riscv.vlseg2.triscv.vector.tuple_nxv16i8_2t(target("riscv.vector.tuple", <vscale x 16 x i8>, 2) undef, ptr %base, i32 %vl, i32 3)
- %1 = call <vscale x 16 x i8> @llvm.riscv.tuple.extract.nxv16i8.triscv.vector.tuple_nxv16i8_2t(target("riscv.vector.tuple", <vscale x 16 x i8>, 2) %0, i32 1)
- ret <vscale x 16 x i8> %1
+ ret target("riscv.vector.tuple", <vscale x 16 x i8>, 2) %0
}
-
-define <vscale x 16 x i8> @test_vlseg2_mask_nxv16i8_triscv.vector.tuple_nxv16i8_2t(ptr %base, i32 %vl, <vscale x 16 x i1> %mask) {
+define target("riscv.vector.tuple", <vscale x 16 x i8>, 2) @test_vlseg2_mask_nxv16i8_triscv.vector.tuple_nxv16i8_2t(ptr %base, i32 %vl, <vscale x 16 x i1> %mask) {
; CHECK-LABEL: test_vlseg2_mask_nxv16i8_triscv.vector.tuple_nxv16i8_2t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma
-; CHECK-NEXT: vlseg2e8.v v6, (a0), v0.t
+; CHECK-NEXT: vlseg2e8.v v8, (a0), v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 2) @llvm.riscv.vlseg2.mask.triscv.vector.tuple_nxv16i8_2t.nxv16i1(target("riscv.vector.tuple", <vscale x 16 x i8>, 2) undef, ptr %base, <vscale x 16 x i1> %mask, i32 %vl, i32 1, i32 3)
- %1 = call <vscale x 16 x i8> @llvm.riscv.tuple.extract.nxv16i8.triscv.vector.tuple_nxv16i8_2t(target("riscv.vector.tuple", <vscale x 16 x i8>, 2) %0, i32 1)
- ret <vscale x 16 x i8> %1
+ ret target("riscv.vector.tuple", <vscale x 16 x i8>, 2) %0
}
-
-declare target("riscv.vector.tuple", <vscale x 32 x i8>, 2) @llvm.riscv.vlseg2.triscv.vector.tuple_nxv32i8_2t(target("riscv.vector.tuple", <vscale x 32 x i8>, 2), ptr, i32, i32)
-declare target("riscv.vector.tuple", <vscale x 32 x i8>, 2) @llvm.riscv.vlseg2.mask.triscv.vector.tuple_nxv32i8_2t.nxv32i1(target("riscv.vector.tuple", <vscale x 32 x i8>, 2), ptr, <vscale x 32 x i1>, i32, i32, i32)
-
-define <vscale x 32 x i8> @test_vlseg2_nxv32i8_triscv.vector.tuple_nxv32i8_2t(ptr %base, i32 %vl) {
+define target("riscv.vector.tuple", <vscale x 32 x i8>, 2) @test_vlseg2_nxv32i8_triscv.vector.tuple_nxv32i8_2t(ptr %base, i32 %vl) {
; CHECK-LABEL: test_vlseg2_nxv32i8_triscv.vector.tuple_nxv32i8_2t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, ma
-; CHECK-NEXT: vlseg2e8.v v4, (a0)
+; CHECK-NEXT: vlseg2e8.v v8, (a0)
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 32 x i8>, 2) @llvm.riscv.vlseg2.triscv.vector.tuple_nxv32i8_2t(target("riscv.vector.tuple", <vscale x 32 x i8>, 2) undef, ptr %base, i32 %vl, i32 3)
- %1 = call <vscale x 32 x i8> @llvm.riscv.tuple.extract.nxv32i8.triscv.vector.tuple_nxv32i8_2t(target("riscv.vector.tuple", <vscale x 32 x i8>, 2) %0, i32 1)
- ret <vscale x 32 x i8> %1
+ ret target("riscv.vector.tuple", <vscale x 32 x i8>, 2) %0
}
-
-define <vscale x 32 x i8> @test_vlseg2_mask_nxv32i8_triscv.vector.tuple_nxv32i8_2t(ptr %base, i32 %vl, <vscale x 32 x i1> %mask) {
+define target("riscv.vector.tuple", <vscale x 32 x i8>, 2) @test_vlseg2_mask_nxv32i8_triscv.vector.tuple_nxv32i8_2t(ptr %base, i32 %vl, <vscale x 32 x i1> %mask) {
; CHECK-LABEL: test_vlseg2_mask_nxv32i8_triscv.vector.tuple_nxv32i8_2t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, ma
-; CHECK-NEXT: vlseg2e8.v v4, (a0), v0.t
+; CHECK-NEXT: vlseg2e8.v v8, (a0), v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 32 x i8>, 2) @llvm.riscv.vlseg2.mask.triscv.vector.tuple_nxv32i8_2t.nxv32i1(target("riscv.vector.tuple", <vscale x 32 x i8>, 2) undef, ptr %base, <vscale x 32 x i1> %mask, i32 %vl, i32 1, i32 3)
- %1 = call <vscale x 32 x i8> @llvm.riscv.tuple.extract.nxv32i8.triscv.vector.tuple_nxv32i8_2t(target("riscv.vector.tuple", <vscale x 32 x i8>, 2) %0, i32 1)
- ret <vscale x 32 x i8> %1
+ ret target("riscv.vector.tuple", <vscale x 32 x i8>, 2) %0
}
-
-declare target("riscv.vector.tuple", <vscale x 1 x i8>, 3) @llvm.riscv.vlseg3.triscv.vector.tuple_nxv1i8_3t(target("riscv.vector.tuple", <vscale x 1 x i8>, 3), ptr, i32, i32)
-declare target("riscv.vector.tuple", <vscale x 1 x i8>, 3) @llvm.riscv.vlseg3.mask.triscv.vector.tuple_nxv1i8_3t.nxv1i1(target("riscv.vector.tuple", <vscale x 1 x i8>, 3), ptr, <vscale x 1 x i1>, i32, i32, i32)
-
-define <vscale x 1 x i8> @test_vlseg3_nxv1i8_triscv.vector.tuple_nxv1i8_3t(ptr %base, i32 %vl) {
+define target("riscv.vector.tuple", <vscale x 1 x i8>, 3) @test_vlseg3_nxv1i8_triscv.vector.tuple_nxv1i8_3t(ptr %base, i32 %vl) {
; CHECK-LABEL: test_vlseg3_nxv1i8_triscv.vector.tuple_nxv1i8_3t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma
-; CHECK-NEXT: vlseg3e8.v v7, (a0)
+; CHECK-NEXT: vlseg3e8.v v8, (a0)
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 1 x i8>, 3) @llvm.riscv.vlseg3.triscv.vector.tuple_nxv1i8_3t(target("riscv.vector.tuple", <vscale x 1 x i8>, 3) undef, ptr %base, i32 %vl, i32 3)
- %1 = call <vscale x 1 x i8> @llvm.riscv.tuple.extract.nxv1i8.triscv.vector.tuple_nxv1i8_3t(target("riscv.vector.tuple", <vscale x 1 x i8>, 3) %0, i32 1)
- ret <vscale x 1 x i8> %1
+ ret target("riscv.vector.tuple", <vscale x 1 x i8>, 3) %0
}
-
-define <vscale x 1 x i8> @test_vlseg3_mask_nxv1i8_triscv.vector.tuple_nxv1i8_3t(ptr %base, i32 %vl, <vscale x 1 x i1> %mask) {
+define target("riscv.vector.tuple", <vscale x 1 x i8>, 3) @test_vlseg3_mask_nxv1i8_triscv.vector.tuple_nxv1i8_3t(ptr %base, i32 %vl, <vscale x 1 x i1> %mask) {
; CHECK-LABEL: test_vlseg3_mask_nxv1i8_triscv.vector.tuple_nxv1i8_3t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma
-; CHECK-NEXT: vlseg3e8.v v7, (a0), v0.t
+; CHECK-NEXT: vlseg3e8.v v8, (a0), v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 1 x i8>, 3) @llvm.riscv.vlseg3.mask.triscv.vector.tuple_nxv1i8_3t.nxv1i1(target("riscv.vector.tuple", <vscale x 1 x i8>, 3) undef, ptr %base, <vscale x 1 x i1> %mask, i32 %vl, i32 1, i32 3)
- %1 = call <vscale x 1 x i8> @llvm.riscv.tuple.extract.nxv1i8.triscv.vector.tuple_nxv1i8_3t(target("riscv.vector.tuple", <vscale x 1 x i8>, 3) %0, i32 1)
- ret <vscale x 1 x i8> %1
+ ret target("riscv.vector.tuple", <vscale x 1 x i8>, 3) %0
}
-
-declare target("riscv.vector.tuple", <vscale x 2 x i8>, 3) @llvm.riscv.vlseg3.triscv.vector.tuple_nxv2i8_3t(target("riscv.vector.tuple", <vscale x 2 x i8>, 3), ptr, i32, i32)
-declare target("riscv.vector.tuple", <vscale x 2 x i8>, 3) @llvm.riscv.vlseg3.mask.triscv.vector.tuple_nxv2i8_3t.nxv2i1(target("riscv.vector.tuple", <vscale x 2 x i8>, 3), ptr, <vscale x 2 x i1>, i32, i32, i32)
-
-define <vscale x 2 x i8> @test_vlseg3_nxv2i8_triscv.vector.tuple_nxv2i8_3t(ptr %base, i32 %vl) {
+define target("riscv.vector.tuple", <vscale x 2 x i8>, 3) @test_vlseg3_nxv2i8_triscv.vector.tuple_nxv2i8_3t(ptr %base, i32 %vl) {
; CHECK-LABEL: test_vlseg3_nxv2i8_triscv.vector.tuple_nxv2i8_3t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma
-; CHECK-NEXT: vlseg3e8.v v7, (a0)
+; CHECK-NEXT: vlseg3e8.v v8, (a0)
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 2 x i8>, 3) @llvm.riscv.vlseg3.triscv.vector.tuple_nxv2i8_3t(target("riscv.vector.tuple", <vscale x 2 x i8>, 3) undef, ptr %base, i32 %vl, i32 3)
- %1 = call <vscale x 2 x i8> @llvm.riscv.tuple.extract.nxv2i8.triscv.vector.tuple_nxv2i8_3t(target("riscv.vector.tuple", <vscale x 2 x i8>, 3) %0, i32 1)
- ret <vscale x 2 x i8> %1
+ ret target("riscv.vector.tuple", <vscale x 2 x i8>, 3) %0
}
-
-define <vscale x 2 x i8> @test_vlseg3_mask_nxv2i8_triscv.vector.tuple_nxv2i8_3t(ptr %base, i32 %vl, <vscale x 2 x i1> %mask) {
+define target("riscv.vector.tuple", <vscale x 2 x i8>, 3) @test_vlseg3_mask_nxv2i8_triscv.vector.tuple_nxv2i8_3t(ptr %base, i32 %vl, <vscale x 2 x i1> %mask) {
; CHECK-LABEL: test_vlseg3_mask_nxv2i8_triscv.vector.tuple_nxv2i8_3t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma
-; CHECK-NEXT: vlseg3e8.v v7, (a0), v0.t
+; CHECK-NEXT: vlseg3e8.v v8, (a0), v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 2 x i8>, 3) @llvm.riscv.vlseg3.mask.triscv.vector.tuple_nxv2i8_3t.nxv2i1(target("riscv.vector.tuple", <vscale x 2 x i8>, 3) undef, ptr %base, <vscale x 2 x i1> %mask, i32 %vl, i32 1, i32 3)
- %1 = call <vscale x 2 x i8> @llvm.riscv.tuple.extract.nxv2i8.triscv.vector.tuple_nxv2i8_3t(target("riscv.vector.tuple", <vscale x 2 x i8>, 3) %0, i32 1)
- ret <vscale x 2 x i8> %1
+ ret target("riscv.vector.tuple", <vscale x 2 x i8>, 3) %0
}
-
-declare target("riscv.vector.tuple", <vscale x 4 x i8>, 3) @llvm.riscv.vlseg3.triscv.vector.tuple_nxv4i8_3t(target("riscv.vector.tuple", <vscale x 4 x i8>, 3), ptr, i32, i32)
-declare target("riscv.vector.tuple", <vscale x 4 x i8>, 3) @llvm.riscv.vlseg3.mask.triscv.vector.tuple_nxv4i8_3t.nxv4i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 3), ptr, <vscale x 4 x i1>, i32, i32, i32)
-
-define <vscale x 4 x i8> @test_vlseg3_nxv4i8_triscv.vector.tuple_nxv4i8_3t(ptr %base, i32 %vl) {
+define target("riscv.vector.tuple", <vscale x 4 x i8>, 3) @test_vlseg3_nxv4i8_triscv.vector.tuple_nxv4i8_3t(ptr %base, i32 %vl) {
; CHECK-LABEL: test_vlseg3_nxv4i8_triscv.vector.tuple_nxv4i8_3t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma
-; CHECK-NEXT: vlseg3e8.v v7, (a0)
+; CHECK-NEXT: vlseg3e8.v v8, (a0)
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 3) @llvm.riscv.vlseg3.triscv.vector.tuple_nxv4i8_3t(target("riscv.vector.tuple", <vscale x 4 x i8>, 3) undef, ptr %base, i32 %vl, i32 3)
- %1 = call <vscale x 4 x i8> @llvm.riscv.tuple.extract.nxv4i8.triscv.vector.tuple_nxv4i8_3t(target("riscv.vector.tuple", <vscale x 4 x i8>, 3) %0, i32 1)
- ret <vscale x 4 x i8> %1
+ ret target("riscv.vector.tuple", <vscale x 4 x i8>, 3) %0
}
-
-define <vscale x 4 x i8> @test_vlseg3_mask_nxv4i8_triscv.vector.tuple_nxv4i8_3t(ptr %base, i32 %vl, <vscale x 4 x i1> %mask) {
+define target("riscv.vector.tuple", <vscale x 4 x i8>, 3) @test_vlseg3_mask_nxv4i8_triscv.vector.tuple_nxv4i8_3t(ptr %base, i32 %vl, <vscale x 4 x i1> %mask) {
; CHECK-LABEL: test_vlseg3_mask_nxv4i8_triscv.vector.tuple_nxv4i8_3t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma
-; CHECK-NEXT: vlseg3e8.v v7, (a0), v0.t
+; CHECK-NEXT: vlseg3e8.v v8, (a0), v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 3) @llvm.riscv.vlseg3.mask.triscv.vector.tuple_nxv4i8_3t.nxv4i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 3) undef, ptr %base, <vscale x 4 x i1> %mask, i32 %vl, i32 1, i32 3)
- %1 = call <vscale x 4 x i8> @llvm.riscv.tuple.extract.nxv4i8.triscv.vector.tuple_nxv4i8_3t(target("riscv.vector.tuple", <vscale x 4 x i8>, 3) %0, i32 1)
- ret <vscale x 4 x i8> %1
+ ret target("riscv.vector.tuple", <vscale x 4 x i8>, 3) %0
}
-
-declare target("riscv.vector.tuple", <vscale x 8 x i8>, 3) @llvm.riscv.vlseg3.triscv.vector.tuple_nxv8i8_3t(target("riscv.vector.tuple", <vscale x 8 x i8>, 3), ptr, i32, i32)
-declare target("riscv.vector.tuple", <vscale x 8 x i8>, 3) @llvm.riscv.vlseg3.mask.triscv.vector.tuple_nxv8i8_3t.nxv8i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 3), ptr, <vscale x 8 x i1>, i32, i32, i32)
-
-define <vscale x 8 x i8> @test_vlseg3_nxv8i8_triscv.vector.tuple_nxv8i8_3t(ptr %base, i32 %vl) {
+define target("riscv.vector.tuple", <vscale x 8 x i8>, 3) @test_vlseg3_nxv8i8_triscv.vector.tuple_nxv8i8_3t(ptr %base, i32 %vl) {
; CHECK-LABEL: test_vlseg3_nxv8i8_triscv.vector.tuple_nxv8i8_3t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma
-; CHECK-NEXT: vlseg3e8.v v7, (a0)
+; CHECK-NEXT: vlseg3e8.v v8, (a0)
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 3) @llvm.riscv.vlseg3.triscv.vector.tuple_nxv8i8_3t(target("riscv.vector.tuple", <vscale x 8 x i8>, 3) undef, ptr %base, i32 %vl, i32 3)
- %1 = call <vscale x 8 x i8> @llvm.riscv.tuple.extract.nxv8i8.triscv.vector.tuple_nxv8i8_3t(target("riscv.vector.tuple", <vscale x 8 x i8>, 3) %0, i32 1)
- ret <vscale x 8 x i8> %1
+ ret target("riscv.vector.tuple", <vscale x 8 x i8>, 3) %0
}
-
-define <vscale x 8 x i8> @test_vlseg3_mask_nxv8i8_triscv.vector.tuple_nxv8i8_3t(ptr %base, i32 %vl, <vscale x 8 x i1> %mask) {
+define target("riscv.vector.tuple", <vscale x 8 x i8>, 3) @test_vlseg3_mask_nxv8i8_triscv.vector.tuple_nxv8i8_3t(ptr %base, i32 %vl, <vscale x 8 x i1> %mask) {
; CHECK-LABEL: test_vlseg3_mask_nxv8i8_triscv.vector.tuple_nxv8i8_3t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma
-; CHECK-NEXT: vlseg3e8.v v7, (a0), v0.t
+; CHECK-NEXT: vlseg3e8.v v8, (a0), v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 3) @llvm.riscv.vlseg3.mask.triscv.vector.tuple_nxv8i8_3t.nxv8i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 3) undef, ptr %base, <vscale x 8 x i1> %mask, i32 %vl, i32 1, i32 3)
- %1 = call <vscale x 8 x i8> @llvm.riscv.tuple.extract.nxv8i8.triscv.vector.tuple_nxv8i8_3t(target("riscv.vector.tuple", <vscale x 8 x i8>, 3) %0, i32 1)
- ret <vscale x 8 x i8> %1
+ ret target("riscv.vector.tuple", <vscale x 8 x i8>, 3) %0
}
-
-declare target("riscv.vector.tuple", <vscale x 16 x i8>, 3) @llvm.riscv.vlseg3.triscv.vector.tuple_nxv16i8_3t(target("riscv.vector.tuple", <vscale x 16 x i8>, 3), ptr, i32, i32)
-declare target("riscv.vector.tuple", <vscale x 16 x i8>, 3) @llvm.riscv.vlseg3.mask.triscv.vector.tuple_nxv16i8_3t.nxv16i1(target("riscv.vector.tuple", <vscale x 16 x i8>, 3), ptr, <vscale x 16 x i1>, i32, i32, i32)
-
-define <vscale x 16 x i8> @test_vlseg3_nxv16i8_triscv.vector.tuple_nxv16i8_3t(ptr %base, i32 %vl) {
+define target("riscv.vector.tuple", <vscale x 16 x i8>, 3) @test_vlseg3_nxv16i8_triscv.vector.tuple_nxv16i8_3t(ptr %base, i32 %vl) {
; CHECK-LABEL: test_vlseg3_nxv16i8_triscv.vector.tuple_nxv16i8_3t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma
-; CHECK-NEXT: vlseg3e8.v v6, (a0)
+; CHECK-NEXT: vlseg3e8.v v8, (a0)
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 3) @llvm.riscv.vlseg3.triscv.vector.tuple_nxv16i8_3t(target("riscv.vector.tuple", <vscale x 16 x i8>, 3) undef, ptr %base, i32 %vl, i32 3)
- %1 = call <vscale x 16 x i8> @llvm.riscv.tuple.extract.nxv16i8.triscv.vector.tuple_nxv16i8_3t(target("riscv.vector.tuple", <vscale x 16 x i8>, 3) %0, i32 1)
- ret <vscale x 16 x i8> %1
+ ret target("riscv.vector.tuple", <vscale x 16 x i8>, 3) %0
}
-
-define <vscale x 16 x i8> @test_vlseg3_mask_nxv16i8_triscv.vector.tuple_nxv16i8_3t(ptr %base, i32 %vl, <vscale x 16 x i1> %mask) {
+define target("riscv.vector.tuple", <vscale x 16 x i8>, 3) @test_vlseg3_mask_nxv16i8_triscv.vector.tuple_nxv16i8_3t(ptr %base, i32 %vl, <vscale x 16 x i1> %mask) {
; CHECK-LABEL: test_vlseg3_mask_nxv16i8_triscv.vector.tuple_nxv16i8_3t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma
-; CHECK-NEXT: vlseg3e8.v v6, (a0), v0.t
+; CHECK-NEXT: vlseg3e8.v v8, (a0), v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 3) @llvm.riscv.vlseg3.mask.triscv.vector.tuple_nxv16i8_3t.nxv16i1(target("riscv.vector.tuple", <vscale x 16 x i8>, 3) undef, ptr %base, <vscale x 16 x i1> %mask, i32 %vl, i32 1, i32 3)
- %1 = call <vscale x 16 x i8> @llvm.riscv.tuple.extract.nxv16i8.triscv.vector.tuple_nxv16i8_3t(target("riscv.vector.tuple", <vscale x 16 x i8>, 3) %0, i32 1)
- ret <vscale x 16 x i8> %1
+ ret target("riscv.vector.tuple", <vscale x 16 x i8>, 3) %0
}
-
-declare target("riscv.vector.tuple", <vscale x 1 x i8>, 4) @llvm.riscv.vlseg4.triscv.vector.tuple_nxv1i8_4t(target("riscv.vector.tuple", <vscale x 1 x i8>, 4), ptr, i32, i32)
-declare target("riscv.vector.tuple", <vscale x 1 x i8>, 4) @llvm.riscv.vlseg4.mask.triscv.vector.tuple_nxv1i8_4t.nxv1i1(target("riscv.vector.tuple", <vscale x 1 x i8>, 4), ptr, <vscale x 1 x i1>, i32, i32, i32)
-
-define <vscale x 1 x i8> @test_vlseg4_nxv1i8_triscv.vector.tuple_nxv1i8_4t(ptr %base, i32 %vl) {
+define target("riscv.vector.tuple", <vscale x 1 x i8>, 4) @test_vlseg4_nxv1i8_triscv.vector.tuple_nxv1i8_4t(ptr %base, i32 %vl) {
; CHECK-LABEL: test_vlseg4_nxv1i8_triscv.vector.tuple_nxv1i8_4t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma
-; CHECK-NEXT: vlseg4e8.v v7, (a0)
+; CHECK-NEXT: vlseg4e8.v v8, (a0)
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 1 x i8>, 4) @llvm.riscv.vlseg4.triscv.vector.tuple_nxv1i8_4t(target("riscv.vector.tuple", <vscale x 1 x i8>, 4) undef, ptr %base, i32 %vl, i32 3)
- %1 = call <vscale x 1 x i8> @llvm.riscv.tuple.extract.nxv1i8.triscv.vector.tuple_nxv1i8_4t(target("riscv.vector.tuple", <vscale x 1 x i8>, 4) %0, i32 1)
- ret <vscale x 1 x i8> %1
+ ret target("riscv.vector.tuple", <vscale x 1 x i8>, 4) %0
}
-
-define <vscale x 1 x i8> @test_vlseg4_mask_nxv1i8_triscv.vector.tuple_nxv1i8_4t(ptr %base, i32 %vl, <vscale x 1 x i1> %mask) {
+define target("riscv.vector.tuple", <vscale x 1 x i8>, 4) @test_vlseg4_mask_nxv1i8_triscv.vector.tuple_nxv1i8_4t(ptr %base, i32 %vl, <vscale x 1 x i1> %mask) {
; CHECK-LABEL: test_vlseg4_mask_nxv1i8_triscv.vector.tuple_nxv1i8_4t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma
-; CHECK-NEXT: vlseg4e8.v v7, (a0), v0.t
+; CHECK-NEXT: vlseg4e8.v v8, (a0), v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 1 x i8>, 4) @llvm.riscv.vlseg4.mask.triscv.vector.tuple_nxv1i8_4t.nxv1i1(target("riscv.vector.tuple", <vscale x 1 x i8>, 4) undef, ptr %base, <vscale x 1 x i1> %mask, i32 %vl, i32 1, i32 3)
- %1 = call <vscale x 1 x i8> @llvm.riscv.tuple.extract.nxv1i8.triscv.vector.tuple_nxv1i8_4t(target("riscv.vector.tuple", <vscale x 1 x i8>, 4) %0, i32 1)
- ret <vscale x 1 x i8> %1
+ ret target("riscv.vector.tuple", <vscale x 1 x i8>, 4) %0
}
-
-declare target("riscv.vector.tuple", <vscale x 2 x i8>, 4) @llvm.riscv.vlseg4.triscv.vector.tuple_nxv2i8_4t(target("riscv.vector.tuple", <vscale x 2 x i8>, 4), ptr, i32, i32)
-declare target("riscv.vector.tuple", <vscale x 2 x i8>, 4) @llvm.riscv.vlseg4.mask.triscv.vector.tuple_nxv2i8_4t.nxv2i1(target("riscv.vector.tuple", <vscale x 2 x i8>, 4), ptr, <vscale x 2 x i1>, i32, i32, i32)
-
-define <vscale x 2 x i8> @test_vlseg4_nxv2i8_triscv.vector.tuple_nxv2i8_4t(ptr %base, i32 %vl) {
+define target("riscv.vector.tuple", <vscale x 2 x i8>, 4) @test_vlseg4_nxv2i8_triscv.vector.tuple_nxv2i8_4t(ptr %base, i32 %vl) {
; CHECK-LABEL: test_vlseg4_nxv2i8_triscv.vector.tuple_nxv2i8_4t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma
-; CHECK-NEXT: vlseg4e8.v v7, (a0)
+; CHECK-NEXT: vlseg4e8.v v8, (a0)
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 2 x i8>, 4) @llvm.riscv.vlseg4.triscv.vector.tuple_nxv2i8_4t(target("riscv.vector.tuple", <vscale x 2 x i8>, 4) undef, ptr %base, i32 %vl, i32 3)
- %1 = call <vscale x 2 x i8> @llvm.riscv.tuple.extract.nxv2i8.triscv.vector.tuple_nxv2i8_4t(target("riscv.vector.tuple", <vscale x 2 x i8>, 4) %0, i32 1)
- ret <vscale x 2 x i8> %1
+ ret target("riscv.vector.tuple", <vscale x 2 x i8>, 4) %0
}
-
-define <vscale x 2 x i8> @test_vlseg4_mask_nxv2i8_triscv.vector.tuple_nxv2i8_4t(ptr %base, i32 %vl, <vscale x 2 x i1> %mask) {
+define target("riscv.vector.tuple", <vscale x 2 x i8>, 4) @test_vlseg4_mask_nxv2i8_triscv.vector.tuple_nxv2i8_4t(ptr %base, i32 %vl, <vscale x 2 x i1> %mask) {
; CHECK-LABEL: test_vlseg4_mask_nxv2i8_triscv.vector.tuple_nxv2i8_4t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma
-; CHECK-NEXT: vlseg4e8.v v7, (a0), v0.t
+; CHECK-NEXT: vlseg4e8.v v8, (a0), v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 2 x i8>, 4) @llvm.riscv.vlseg4.mask.triscv.vector.tuple_nxv2i8_4t.nxv2i1(target("riscv.vector.tuple", <vscale x 2 x i8>, 4) undef, ptr %base, <vscale x 2 x i1> %mask, i32 %vl, i32 1, i32 3)
- %1 = call <vscale x 2 x i8> @llvm.riscv.tuple.extract.nxv2i8.triscv.vector.tuple_nxv2i8_4t(target("riscv.vector.tuple", <vscale x 2 x i8>, 4) %0, i32 1)
- ret <vscale x 2 x i8> %1
+ ret target("riscv.vector.tuple", <vscale x 2 x i8>, 4) %0
}
-
-declare target("riscv.vector.tuple", <vscale x 4 x i8>, 4) @llvm.riscv.vlseg4.triscv.vector.tuple_nxv4i8_4t(target("riscv.vector.tuple", <vscale x 4 x i8>, 4), ptr, i32, i32)
-declare target("riscv.vector.tuple", <vscale x 4 x i8>, 4) @llvm.riscv.vlseg4.mask.triscv.vector.tuple_nxv4i8_4t.nxv4i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 4), ptr, <vscale x 4 x i1>, i32, i32, i32)
-
-define <vscale x 4 x i8> @test_vlseg4_nxv4i8_triscv.vector.tuple_nxv4i8_4t(ptr %base, i32 %vl) {
+define target("riscv.vector.tuple", <vscale x 4 x i8>, 4) @test_vlseg4_nxv4i8_triscv.vector.tuple_nxv4i8_4t(ptr %base, i32 %vl) {
; CHECK-LABEL: test_vlseg4_nxv4i8_triscv.vector.tuple_nxv4i8_4t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma
-; CHECK-NEXT: vlseg4e8.v v7, (a0)
+; CHECK-NEXT: vlseg4e8.v v8, (a0)
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 4) @llvm.riscv.vlseg4.triscv.vector.tuple_nxv4i8_4t(target("riscv.vector.tuple", <vscale x 4 x i8>, 4) undef, ptr %base, i32 %vl, i32 3)
- %1 = call <vscale x 4 x i8> @llvm.riscv.tuple.extract.nxv4i8.triscv.vector.tuple_nxv4i8_4t(target("riscv.vector.tuple", <vscale x 4 x i8>, 4) %0, i32 1)
- ret <vscale x 4 x i8> %1
+ ret target("riscv.vector.tuple", <vscale x 4 x i8>, 4) %0
}
-
-define <vscale x 4 x i8> @test_vlseg4_mask_nxv4i8_triscv.vector.tuple_nxv4i8_4t(ptr %base, i32 %vl, <vscale x 4 x i1> %mask) {
+define target("riscv.vector.tuple", <vscale x 4 x i8>, 4) @test_vlseg4_mask_nxv4i8_triscv.vector.tuple_nxv4i8_4t(ptr %base, i32 %vl, <vscale x 4 x i1> %mask) {
; CHECK-LABEL: test_vlseg4_mask_nxv4i8_triscv.vector.tuple_nxv4i8_4t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma
-; CHECK-NEXT: vlseg4e8.v v7, (a0), v0.t
+; CHECK-NEXT: vlseg4e8.v v8, (a0), v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 4) @llvm.riscv.vlseg4.mask.triscv.vector.tuple_nxv4i8_4t.nxv4i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 4) undef, ptr %base, <vscale x 4 x i1> %mask, i32 %vl, i32 1, i32 3)
- %1 = call <vscale x 4 x i8> @llvm.riscv.tuple.extract.nxv4i8.triscv.vector.tuple_nxv4i8_4t(target("riscv.vector.tuple", <vscale x 4 x i8>, 4) %0, i32 1)
- ret <vscale x 4 x i8> %1
+ ret target("riscv.vector.tuple", <vscale x 4 x i8>, 4) %0
}
-
-declare target("riscv.vector.tuple", <vscale x 8 x i8>, 4) @llvm.riscv.vlseg4.triscv.vector.tuple_nxv8i8_4t(target("riscv.vector.tuple", <vscale x 8 x i8>, 4), ptr, i32, i32)
-declare target("riscv.vector.tuple", <vscale x 8 x i8>, 4) @llvm.riscv.vlseg4.mask.triscv.vector.tuple_nxv8i8_4t.nxv8i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 4), ptr, <vscale x 8 x i1>, i32, i32, i32)
-
-define <vscale x 8 x i8> @test_vlseg4_nxv8i8_triscv.vector.tuple_nxv8i8_4t(ptr %base, i32 %vl) {
+define target("riscv.vector.tuple", <vscale x 8 x i8>, 4) @test_vlseg4_nxv8i8_triscv.vector.tuple_nxv8i8_4t(ptr %base, i32 %vl) {
; CHECK-LABEL: test_vlseg4_nxv8i8_triscv.vector.tuple_nxv8i8_4t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma
-; CHECK-NEXT: vlseg4e8.v v7, (a0)
+; CHECK-NEXT: vlseg4e8.v v8, (a0)
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 4) @llvm.riscv.vlseg4.triscv.vector.tuple_nxv8i8_4t(target("riscv.vector.tuple", <vscale x 8 x i8>, 4) undef, ptr %base, i32 %vl, i32 3)
- %1 = call <vscale x 8 x i8> @llvm.riscv.tuple.extract.nxv8i8.triscv.vector.tuple_nxv8i8_4t(target("riscv.vector.tuple", <vscale x 8 x i8>, 4) %0, i32 1)
- ret <vscale x 8 x i8> %1
+ ret target("riscv.vector.tuple", <vscale x 8 x i8>, 4) %0
}
-
-define <vscale x 8 x i8> @test_vlseg4_mask_nxv8i8_triscv.vector.tuple_nxv8i8_4t(ptr %base, i32 %vl, <vscale x 8 x i1> %mask) {
+define target("riscv.vector.tuple", <vscale x 8 x i8>, 4) @test_vlseg4_mask_nxv8i8_triscv.vector.tuple_nxv8i8_4t(ptr %base, i32 %vl, <vscale x 8 x i1> %mask) {
; CHECK-LABEL: test_vlseg4_mask_nxv8i8_triscv.vector.tuple_nxv8i8_4t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma
-; CHECK-NEXT: vlseg4e8.v v7, (a0), v0.t
+; CHECK-NEXT: vlseg4e8.v v8, (a0), v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 4) @llvm.riscv.vlseg4.mask.triscv.vector.tuple_nxv8i8_4t.nxv8i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 4) undef, ptr %base, <vscale x 8 x i1> %mask, i32 %vl, i32 1, i32 3)
- %1 = call <vscale x 8 x i8> @llvm.riscv.tuple.extract.nxv8i8.triscv.vector.tuple_nxv8i8_4t(target("riscv.vector.tuple", <vscale x 8 x i8>, 4) %0, i32 1)
- ret <vscale x 8 x i8> %1
+ ret target("riscv.vector.tuple", <vscale x 8 x i8>, 4) %0
}
-
-declare target("riscv.vector.tuple", <vscale x 16 x i8>, 4) @llvm.riscv.vlseg4.triscv.vector.tuple_nxv16i8_4t(target("riscv.vector.tuple", <vscale x 16 x i8>, 4), ptr, i32, i32)
-declare target("riscv.vector.tuple", <vscale x 16 x i8>, 4) @llvm.riscv.vlseg4.mask.triscv.vector.tuple_nxv16i8_4t.nxv16i1(target("riscv.vector.tuple", <vscale x 16 x i8>, 4), ptr, <vscale x 16 x i1>, i32, i32, i32)
-
-define <vscale x 16 x i8> @test_vlseg4_nxv16i8_triscv.vector.tuple_nxv16i8_4t(ptr %base, i32 %vl) {
+define target("riscv.vector.tuple", <vscale x 16 x i8>, 4) @test_vlseg4_nxv16i8_triscv.vector.tuple_nxv16i8_4t(ptr %base, i32 %vl) {
; CHECK-LABEL: test_vlseg4_nxv16i8_triscv.vector.tuple_nxv16i8_4t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma
-; CHECK-NEXT: vlseg4e8.v v6, (a0)
+; CHECK-NEXT: vlseg4e8.v v8, (a0)
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 4) @llvm.riscv.vlseg4.triscv.vector.tuple_nxv16i8_4t(target("riscv.vector.tuple", <vscale x 16 x i8>, 4) undef, ptr %base, i32 %vl, i32 3)
- %1 = call <vscale x 16 x i8> @llvm.riscv.tuple.extract.nxv16i8.triscv.vector.tuple_nxv16i8_4t(target("riscv.vector.tuple", <vscale x 16 x i8>, 4) %0, i32 1)
- ret <vscale x 16 x i8> %1
+ ret target("riscv.vector.tuple", <vscale x 16 x i8>, 4) %0
}
-
-define <vscale x 16 x i8> @test_vlseg4_mask_nxv16i8_triscv.vector.tuple_nxv16i8_4t(ptr %base, i32 %vl, <vscale x 16 x i1> %mask) {
+define target("riscv.vector.tuple", <vscale x 16 x i8>, 4) @test_vlseg4_mask_nxv16i8_triscv.vector.tuple_nxv16i8_4t(ptr %base, i32 %vl, <vscale x 16 x i1> %mask) {
; CHECK-LABEL: test_vlseg4_mask_nxv16i8_triscv.vector.tuple_nxv16i8_4t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma
-; CHECK-NEXT: vlseg4e8.v v6, (a0), v0.t
+; CHECK-NEXT: vlseg4e8.v v8, (a0), v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 4) @llvm.riscv.vlseg4.mask.triscv.vector.tuple_nxv16i8_4t.nxv16i1(target("riscv.vector.tuple", <vscale x 16 x i8>, 4) undef, ptr %base, <vscale x 16 x i1> %mask, i32 %vl, i32 1, i32 3)
- %1 = call <vscale x 16 x i8> @llvm.riscv.tuple.extract.nxv16i8.triscv.vector.tuple_nxv16i8_4t(target("riscv.vector.tuple", <vscale x 16 x i8>, 4) %0, i32 1)
- ret <vscale x 16 x i8> %1
+ ret target("riscv.vector.tuple", <vscale x 16 x i8>, 4) %0
}
-
-declare target("riscv.vector.tuple", <vscale x 1 x i8>, 5) @llvm.riscv.vlseg5.triscv.vector.tuple_nxv1i8_5t(target("riscv.vector.tuple", <vscale x 1 x i8>, 5), ptr, i32, i32)
-declare target("riscv.vector.tuple", <vscale x 1 x i8>, 5) @llvm.riscv.vlseg5.mask.triscv.vector.tuple_nxv1i8_5t.nxv1i1(target("riscv.vector.tuple", <vscale x 1 x i8>, 5), ptr, <vscale x 1 x i1>, i32, i32, i32)
-
-define <vscale x 1 x i8> @test_vlseg5_nxv1i8_triscv.vector.tuple_nxv1i8_5t(ptr %base, i32 %vl) {
+define target("riscv.vector.tuple", <vscale x 1 x i8>, 5) @test_vlseg5_nxv1i8_triscv.vector.tuple_nxv1i8_5t(ptr %base, i32 %vl) {
; CHECK-LABEL: test_vlseg5_nxv1i8_triscv.vector.tuple_nxv1i8_5t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma
-; CHECK-NEXT: vlseg5e8.v v7, (a0)
+; CHECK-NEXT: vlseg5e8.v v8, (a0)
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 1 x i8>, 5) @llvm.riscv.vlseg5.triscv.vector.tuple_nxv1i8_5t(target("riscv.vector.tuple", <vscale x 1 x i8>, 5) undef, ptr %base, i32 %vl, i32 3)
- %1 = call <vscale x 1 x i8> @llvm.riscv.tuple.extract.nxv1i8.triscv.vector.tuple_nxv1i8_5t(target("riscv.vector.tuple", <vscale x 1 x i8>, 5) %0, i32 1)
- ret <vscale x 1 x i8> %1
+ ret target("riscv.vector.tuple", <vscale x 1 x i8>, 5) %0
}
-
-define <vscale x 1 x i8> @test_vlseg5_mask_nxv1i8_triscv.vector.tuple_nxv1i8_5t(ptr %base, i32 %vl, <vscale x 1 x i1> %mask) {
+define target("riscv.vector.tuple", <vscale x 1 x i8>, 5) @test_vlseg5_mask_nxv1i8_triscv.vector.tuple_nxv1i8_5t(ptr %base, i32 %vl, <vscale x 1 x i1> %mask) {
; CHECK-LABEL: test_vlseg5_mask_nxv1i8_triscv.vector.tuple_nxv1i8_5t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma
-; CHECK-NEXT: vlseg5e8.v v7, (a0), v0.t
+; CHECK-NEXT: vlseg5e8.v v8, (a0), v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 1 x i8>, 5) @llvm.riscv.vlseg5.mask.triscv.vector.tuple_nxv1i8_5t.nxv1i1(target("riscv.vector.tuple", <vscale x 1 x i8>, 5) undef, ptr %base, <vscale x 1 x i1> %mask, i32 %vl, i32 1, i32 3)
- %1 = call <vscale x 1 x i8> @llvm.riscv.tuple.extract.nxv1i8.triscv.vector.tuple_nxv1i8_5t(target("riscv.vector.tuple", <vscale x 1 x i8>, 5) %0, i32 1)
- ret <vscale x 1 x i8> %1
+ ret target("riscv.vector.tuple", <vscale x 1 x i8>, 5) %0
}
-
-declare target("riscv.vector.tuple", <vscale x 2 x i8>, 5) @llvm.riscv.vlseg5.triscv.vector.tuple_nxv2i8_5t(target("riscv.vector.tuple", <vscale x 2 x i8>, 5), ptr, i32, i32)
-declare target("riscv.vector.tuple", <vscale x 2 x i8>, 5) @llvm.riscv.vlseg5.mask.triscv.vector.tuple_nxv2i8_5t.nxv2i1(target("riscv.vector.tuple", <vscale x 2 x i8>, 5), ptr, <vscale x 2 x i1>, i32, i32, i32)
-
-define <vscale x 2 x i8> @test_vlseg5_nxv2i8_triscv.vector.tuple_nxv2i8_5t(ptr %base, i32 %vl) {
+define target("riscv.vector.tuple", <vscale x 2 x i8>, 5) @test_vlseg5_nxv2i8_triscv.vector.tuple_nxv2i8_5t(ptr %base, i32 %vl) {
; CHECK-LABEL: test_vlseg5_nxv2i8_triscv.vector.tuple_nxv2i8_5t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma
-; CHECK-NEXT: vlseg5e8.v v7, (a0)
+; CHECK-NEXT: vlseg5e8.v v8, (a0)
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 2 x i8>, 5) @llvm.riscv.vlseg5.triscv.vector.tuple_nxv2i8_5t(target("riscv.vector.tuple", <vscale x 2 x i8>, 5) undef, ptr %base, i32 %vl, i32 3)
- %1 = call <vscale x 2 x i8> @llvm.riscv.tuple.extract.nxv2i8.triscv.vector.tuple_nxv2i8_5t(target("riscv.vector.tuple", <vscale x 2 x i8>, 5) %0, i32 1)
- ret <vscale x 2 x i8> %1
+ ret target("riscv.vector.tuple", <vscale x 2 x i8>, 5) %0
}
-
-define <vscale x 2 x i8> @test_vlseg5_mask_nxv2i8_triscv.vector.tuple_nxv2i8_5t(ptr %base, i32 %vl, <vscale x 2 x i1> %mask) {
+define target("riscv.vector.tuple", <vscale x 2 x i8>, 5) @test_vlseg5_mask_nxv2i8_triscv.vector.tuple_nxv2i8_5t(ptr %base, i32 %vl, <vscale x 2 x i1> %mask) {
; CHECK-LABEL: test_vlseg5_mask_nxv2i8_triscv.vector.tuple_nxv2i8_5t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma
-; CHECK-NEXT: vlseg5e8.v v7, (a0), v0.t
+; CHECK-NEXT: vlseg5e8.v v8, (a0), v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 2 x i8>, 5) @llvm.riscv.vlseg5.mask.triscv.vector.tuple_nxv2i8_5t.nxv2i1(target("riscv.vector.tuple", <vscale x 2 x i8>, 5) undef, ptr %base, <vscale x 2 x i1> %mask, i32 %vl, i32 1, i32 3)
- %1 = call <vscale x 2 x i8> @llvm.riscv.tuple.extract.nxv2i8.triscv.vector.tuple_nxv2i8_5t(target("riscv.vector.tuple", <vscale x 2 x i8>, 5) %0, i32 1)
- ret <vscale x 2 x i8> %1
+ ret target("riscv.vector.tuple", <vscale x 2 x i8>, 5) %0
}
-
-declare target("riscv.vector.tuple", <vscale x 4 x i8>, 5) @llvm.riscv.vlseg5.triscv.vector.tuple_nxv4i8_5t(target("riscv.vector.tuple", <vscale x 4 x i8>, 5), ptr, i32, i32)
-declare target("riscv.vector.tuple", <vscale x 4 x i8>, 5) @llvm.riscv.vlseg5.mask.triscv.vector.tuple_nxv4i8_5t.nxv4i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 5), ptr, <vscale x 4 x i1>, i32, i32, i32)
-
-define <vscale x 4 x i8> @test_vlseg5_nxv4i8_triscv.vector.tuple_nxv4i8_5t(ptr %base, i32 %vl) {
+define target("riscv.vector.tuple", <vscale x 4 x i8>, 5) @test_vlseg5_nxv4i8_triscv.vector.tuple_nxv4i8_5t(ptr %base, i32 %vl) {
; CHECK-LABEL: test_vlseg5_nxv4i8_triscv.vector.tuple_nxv4i8_5t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma
-; CHECK-NEXT: vlseg5e8.v v7, (a0)
+; CHECK-NEXT: vlseg5e8.v v8, (a0)
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 5) @llvm.riscv.vlseg5.triscv.vector.tuple_nxv4i8_5t(target("riscv.vector.tuple", <vscale x 4 x i8>, 5) undef, ptr %base, i32 %vl, i32 3)
- %1 = call <vscale x 4 x i8> @llvm.riscv.tuple.extract.nxv4i8.triscv.vector.tuple_nxv4i8_5t(target("riscv.vector.tuple", <vscale x 4 x i8>, 5) %0, i32 1)
- ret <vscale x 4 x i8> %1
+ ret target("riscv.vector.tuple", <vscale x 4 x i8>, 5) %0
}
-
-define <vscale x 4 x i8> @test_vlseg5_mask_nxv4i8_triscv.vector.tuple_nxv4i8_5t(ptr %base, i32 %vl, <vscale x 4 x i1> %mask) {
+define target("riscv.vector.tuple", <vscale x 4 x i8>, 5) @test_vlseg5_mask_nxv4i8_triscv.vector.tuple_nxv4i8_5t(ptr %base, i32 %vl, <vscale x 4 x i1> %mask) {
; CHECK-LABEL: test_vlseg5_mask_nxv4i8_triscv.vector.tuple_nxv4i8_5t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma
-; CHECK-NEXT: vlseg5e8.v v7, (a0), v0.t
+; CHECK-NEXT: vlseg5e8.v v8, (a0), v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 5) @llvm.riscv.vlseg5.mask.triscv.vector.tuple_nxv4i8_5t.nxv4i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 5) undef, ptr %base, <vscale x 4 x i1> %mask, i32 %vl, i32 1, i32 3)
- %1 = call <vscale x 4 x i8> @llvm.riscv.tuple.extract.nxv4i8.triscv.vector.tuple_nxv4i8_5t(target("riscv.vector.tuple", <vscale x 4 x i8>, 5) %0, i32 1)
- ret <vscale x 4 x i8> %1
+ ret target("riscv.vector.tuple", <vscale x 4 x i8>, 5) %0
}
-
-declare target("riscv.vector.tuple", <vscale x 8 x i8>, 5) @llvm.riscv.vlseg5.triscv.vector.tuple_nxv8i8_5t(target("riscv.vector.tuple", <vscale x 8 x i8>, 5), ptr, i32, i32)
-declare target("riscv.vector.tuple", <vscale x 8 x i8>, 5) @llvm.riscv.vlseg5.mask.triscv.vector.tuple_nxv8i8_5t.nxv8i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 5), ptr, <vscale x 8 x i1>, i32, i32, i32)
-
-define <vscale x 8 x i8> @test_vlseg5_nxv8i8_triscv.vector.tuple_nxv8i8_5t(ptr %base, i32 %vl) {
+define target("riscv.vector.tuple", <vscale x 8 x i8>, 5) @test_vlseg5_nxv8i8_triscv.vector.tuple_nxv8i8_5t(ptr %base, i32 %vl) {
; CHECK-LABEL: test_vlseg5_nxv8i8_triscv.vector.tuple_nxv8i8_5t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma
-; CHECK-NEXT: vlseg5e8.v v7, (a0)
+; CHECK-NEXT: vlseg5e8.v v8, (a0)
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 5) @llvm.riscv.vlseg5.triscv.vector.tuple_nxv8i8_5t(target("riscv.vector.tuple", <vscale x 8 x i8>, 5) undef, ptr %base, i32 %vl, i32 3)
- %1 = call <vscale x 8 x i8> @llvm.riscv.tuple.extract.nxv8i8.triscv.vector.tuple_nxv8i8_5t(target("riscv.vector.tuple", <vscale x 8 x i8>, 5) %0, i32 1)
- ret <vscale x 8 x i8> %1
+ ret target("riscv.vector.tuple", <vscale x 8 x i8>, 5) %0
}
-
-define <vscale x 8 x i8> @test_vlseg5_mask_nxv8i8_triscv.vector.tuple_nxv8i8_5t(ptr %base, i32 %vl, <vscale x 8 x i1> %mask) {
+define target("riscv.vector.tuple", <vscale x 8 x i8>, 5) @test_vlseg5_mask_nxv8i8_triscv.vector.tuple_nxv8i8_5t(ptr %base, i32 %vl, <vscale x 8 x i1> %mask) {
; CHECK-LABEL: test_vlseg5_mask_nxv8i8_triscv.vector.tuple_nxv8i8_5t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma
-; CHECK-NEXT: vlseg5e8.v v7, (a0), v0.t
+; CHECK-NEXT: vlseg5e8.v v8, (a0), v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 5) @llvm.riscv.vlseg5.mask.triscv.vector.tuple_nxv8i8_5t.nxv8i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 5) undef, ptr %base, <vscale x 8 x i1> %mask, i32 %vl, i32 1, i32 3)
- %1 = call <vscale x 8 x i8> @llvm.riscv.tuple.extract.nxv8i8.triscv.vector.tuple_nxv8i8_5t(target("riscv.vector.tuple", <vscale x 8 x i8>, 5) %0, i32 1)
- ret <vscale x 8 x i8> %1
+ ret target("riscv.vector.tuple", <vscale x 8 x i8>, 5) %0
}
-
-declare target("riscv.vector.tuple", <vscale x 1 x i8>, 6) @llvm.riscv.vlseg6.triscv.vector.tuple_nxv1i8_6t(target("riscv.vector.tuple", <vscale x 1 x i8>, 6), ptr, i32, i32)
-declare target("riscv.vector.tuple", <vscale x 1 x i8>, 6) @llvm.riscv.vlseg6.mask.triscv.vector.tuple_nxv1i8_6t.nxv1i1(target("riscv.vector.tuple", <vscale x 1 x i8>, 6), ptr, <vscale x 1 x i1>, i32, i32, i32)
-
-define <vscale x 1 x i8> @test_vlseg6_nxv1i8_triscv.vector.tuple_nxv1i8_6t(ptr %base, i32 %vl) {
+define target("riscv.vector.tuple", <vscale x 1 x i8>, 6) @test_vlseg6_nxv1i8_triscv.vector.tuple_nxv1i8_6t(ptr %base, i32 %vl) {
; CHECK-LABEL: test_vlseg6_nxv1i8_triscv.vector.tuple_nxv1i8_6t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma
-; CHECK-NEXT: vlseg6e8.v v7, (a0)
+; CHECK-NEXT: vlseg6e8.v v8, (a0)
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 1 x i8>, 6) @llvm.riscv.vlseg6.triscv.vector.tuple_nxv1i8_6t(target("riscv.vector.tuple", <vscale x 1 x i8>, 6) undef, ptr %base, i32 %vl, i32 3)
- %1 = call <vscale x 1 x i8> @llvm.riscv.tuple.extract.nxv1i8.triscv.vector.tuple_nxv1i8_6t(target("riscv.vector.tuple", <vscale x 1 x i8>, 6) %0, i32 1)
- ret <vscale x 1 x i8> %1
+ ret target("riscv.vector.tuple", <vscale x 1 x i8>, 6) %0
}
-
-define <vscale x 1 x i8> @test_vlseg6_mask_nxv1i8_triscv.vector.tuple_nxv1i8_6t(ptr %base, i32 %vl, <vscale x 1 x i1> %mask) {
+define target("riscv.vector.tuple", <vscale x 1 x i8>, 6) @test_vlseg6_mask_nxv1i8_triscv.vector.tuple_nxv1i8_6t(ptr %base, i32 %vl, <vscale x 1 x i1> %mask) {
; CHECK-LABEL: test_vlseg6_mask_nxv1i8_triscv.vector.tuple_nxv1i8_6t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma
-; CHECK-NEXT: vlseg6e8.v v7, (a0), v0.t
+; CHECK-NEXT: vlseg6e8.v v8, (a0), v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 1 x i8>, 6) @llvm.riscv.vlseg6.mask.triscv.vector.tuple_nxv1i8_6t.nxv1i1(target("riscv.vector.tuple", <vscale x 1 x i8>, 6) undef, ptr %base, <vscale x 1 x i1> %mask, i32 %vl, i32 1, i32 3)
- %1 = call <vscale x 1 x i8> @llvm.riscv.tuple.extract.nxv1i8.triscv.vector.tuple_nxv1i8_6t(target("riscv.vector.tuple", <vscale x 1 x i8>, 6) %0, i32 1)
- ret <vscale x 1 x i8> %1
+ ret target("riscv.vector.tuple", <vscale x 1 x i8>, 6) %0
}
-
-declare target("riscv.vector.tuple", <vscale x 2 x i8>, 6) @llvm.riscv.vlseg6.triscv.vector.tuple_nxv2i8_6t(target("riscv.vector.tuple", <vscale x 2 x i8>, 6), ptr, i32, i32)
-declare target("riscv.vector.tuple", <vscale x 2 x i8>, 6) @llvm.riscv.vlseg6.mask.triscv.vector.tuple_nxv2i8_6t.nxv2i1(target("riscv.vector.tuple", <vscale x 2 x i8>, 6), ptr, <vscale x 2 x i1>, i32, i32, i32)
-
-define <vscale x 2 x i8> @test_vlseg6_nxv2i8_triscv.vector.tuple_nxv2i8_6t(ptr %base, i32 %vl) {
+define target("riscv.vector.tuple", <vscale x 2 x i8>, 6) @test_vlseg6_nxv2i8_triscv.vector.tuple_nxv2i8_6t(ptr %base, i32 %vl) {
; CHECK-LABEL: test_vlseg6_nxv2i8_triscv.vector.tuple_nxv2i8_6t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma
-; CHECK-NEXT: vlseg6e8.v v7, (a0)
+; CHECK-NEXT: vlseg6e8.v v8, (a0)
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 2 x i8>, 6) @llvm.riscv.vlseg6.triscv.vector.tuple_nxv2i8_6t(target("riscv.vector.tuple", <vscale x 2 x i8>, 6) undef, ptr %base, i32 %vl, i32 3)
- %1 = call <vscale x 2 x i8> @llvm.riscv.tuple.extract.nxv2i8.triscv.vector.tuple_nxv2i8_6t(target("riscv.vector.tuple", <vscale x 2 x i8>, 6) %0, i32 1)
- ret <vscale x 2 x i8> %1
+ ret target("riscv.vector.tuple", <vscale x 2 x i8>, 6) %0
}
-
-define <vscale x 2 x i8> @test_vlseg6_mask_nxv2i8_triscv.vector.tuple_nxv2i8_6t(ptr %base, i32 %vl, <vscale x 2 x i1> %mask) {
+define target("riscv.vector.tuple", <vscale x 2 x i8>, 6) @test_vlseg6_mask_nxv2i8_triscv.vector.tuple_nxv2i8_6t(ptr %base, i32 %vl, <vscale x 2 x i1> %mask) {
; CHECK-LABEL: test_vlseg6_mask_nxv2i8_triscv.vector.tuple_nxv2i8_6t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma
-; CHECK-NEXT: vlseg6e8.v v7, (a0), v0.t
+; CHECK-NEXT: vlseg6e8.v v8, (a0), v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 2 x i8>, 6) @llvm.riscv.vlseg6.mask.triscv.vector.tuple_nxv2i8_6t.nxv2i1(target("riscv.vector.tuple", <vscale x 2 x i8>, 6) undef, ptr %base, <vscale x 2 x i1> %mask, i32 %vl, i32 1, i32 3)
- %1 = call <vscale x 2 x i8> @llvm.riscv.tuple.extract.nxv2i8.triscv.vector.tuple_nxv2i8_6t(target("riscv.vector.tuple", <vscale x 2 x i8>, 6) %0, i32 1)
- ret <vscale x 2 x i8> %1
+ ret target("riscv.vector.tuple", <vscale x 2 x i8>, 6) %0
}
-
-declare target("riscv.vector.tuple", <vscale x 4 x i8>, 6) @llvm.riscv.vlseg6.triscv.vector.tuple_nxv4i8_6t(target("riscv.vector.tuple", <vscale x 4 x i8>, 6), ptr, i32, i32)
-declare target("riscv.vector.tuple", <vscale x 4 x i8>, 6) @llvm.riscv.vlseg6.mask.triscv.vector.tuple_nxv4i8_6t.nxv4i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 6), ptr, <vscale x 4 x i1>, i32, i32, i32)
-
-define <vscale x 4 x i8> @test_vlseg6_nxv4i8_triscv.vector.tuple_nxv4i8_6t(ptr %base, i32 %vl) {
+define target("riscv.vector.tuple", <vscale x 4 x i8>, 6) @test_vlseg6_nxv4i8_triscv.vector.tuple_nxv4i8_6t(ptr %base, i32 %vl) {
; CHECK-LABEL: test_vlseg6_nxv4i8_triscv.vector.tuple_nxv4i8_6t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma
-; CHECK-NEXT: vlseg6e8.v v7, (a0)
+; CHECK-NEXT: vlseg6e8.v v8, (a0)
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 6) @llvm.riscv.vlseg6.triscv.vector.tuple_nxv4i8_6t(target("riscv.vector.tuple", <vscale x 4 x i8>, 6) undef, ptr %base, i32 %vl, i32 3)
- %1 = call <vscale x 4 x i8> @llvm.riscv.tuple.extract.nxv4i8.triscv.vector.tuple_nxv4i8_6t(target("riscv.vector.tuple", <vscale x 4 x i8>, 6) %0, i32 1)
- ret <vscale x 4 x i8> %1
+ ret target("riscv.vector.tuple", <vscale x 4 x i8>, 6) %0
}
-
-define <vscale x 4 x i8> @test_vlseg6_mask_nxv4i8_triscv.vector.tuple_nxv4i8_6t(ptr %base, i32 %vl, <vscale x 4 x i1> %mask) {
+define target("riscv.vector.tuple", <vscale x 4 x i8>, 6) @test_vlseg6_mask_nxv4i8_triscv.vector.tuple_nxv4i8_6t(ptr %base, i32 %vl, <vscale x 4 x i1> %mask) {
; CHECK-LABEL: test_vlseg6_mask_nxv4i8_triscv.vector.tuple_nxv4i8_6t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma
-; CHECK-NEXT: vlseg6e8.v v7, (a0), v0.t
+; CHECK-NEXT: vlseg6e8.v v8, (a0), v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 6) @llvm.riscv.vlseg6.mask.triscv.vector.tuple_nxv4i8_6t.nxv4i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 6) undef, ptr %base, <vscale x 4 x i1> %mask, i32 %vl, i32 1, i32 3)
- %1 = call <vscale x 4 x i8> @llvm.riscv.tuple.extract.nxv4i8.triscv.vector.tuple_nxv4i8_6t(target("riscv.vector.tuple", <vscale x 4 x i8>, 6) %0, i32 1)
- ret <vscale x 4 x i8> %1
+ ret target("riscv.vector.tuple", <vscale x 4 x i8>, 6) %0
}
-
-declare target("riscv.vector.tuple", <vscale x 8 x i8>, 6) @llvm.riscv.vlseg6.triscv.vector.tuple_nxv8i8_6t(target("riscv.vector.tuple", <vscale x 8 x i8>, 6), ptr, i32, i32)
-declare target("riscv.vector.tuple", <vscale x 8 x i8>, 6) @llvm.riscv.vlseg6.mask.triscv.vector.tuple_nxv8i8_6t.nxv8i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 6), ptr, <vscale x 8 x i1>, i32, i32, i32)
-
-define <vscale x 8 x i8> @test_vlseg6_nxv8i8_triscv.vector.tuple_nxv8i8_6t(ptr %base, i32 %vl) {
+define target("riscv.vector.tuple", <vscale x 8 x i8>, 6) @test_vlseg6_nxv8i8_triscv.vector.tuple_nxv8i8_6t(ptr %base, i32 %vl) {
; CHECK-LABEL: test_vlseg6_nxv8i8_triscv.vector.tuple_nxv8i8_6t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma
-; CHECK-NEXT: vlseg6e8.v v7, (a0)
+; CHECK-NEXT: vlseg6e8.v v8, (a0)
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 6) @llvm.riscv.vlseg6.triscv.vector.tuple_nxv8i8_6t(target("riscv.vector.tuple", <vscale x 8 x i8>, 6) undef, ptr %base, i32 %vl, i32 3)
- %1 = call <vscale x 8 x i8> @llvm.riscv.tuple.extract.nxv8i8.triscv.vector.tuple_nxv8i8_6t(target("riscv.vector.tuple", <vscale x 8 x i8>, 6) %0, i32 1)
- ret <vscale x 8 x i8> %1
+ ret target("riscv.vector.tuple", <vscale x 8 x i8>, 6) %0
}
-
-define <vscale x 8 x i8> @test_vlseg6_mask_nxv8i8_triscv.vector.tuple_nxv8i8_6t(ptr %base, i32 %vl, <vscale x 8 x i1> %mask) {
+define target("riscv.vector.tuple", <vscale x 8 x i8>, 6) @test_vlseg6_mask_nxv8i8_triscv.vector.tuple_nxv8i8_6t(ptr %base, i32 %vl, <vscale x 8 x i1> %mask) {
; CHECK-LABEL: test_vlseg6_mask_nxv8i8_triscv.vector.tuple_nxv8i8_6t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma
-; CHECK-NEXT: vlseg6e8.v v7, (a0), v0.t
+; CHECK-NEXT: vlseg6e8.v v8, (a0), v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 6) @llvm.riscv.vlseg6.mask.triscv.vector.tuple_nxv8i8_6t.nxv8i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 6) undef, ptr %base, <vscale x 8 x i1> %mask, i32 %vl, i32 1, i32 3)
- %1 = call <vscale x 8 x i8> @llvm.riscv.tuple.extract.nxv8i8.triscv.vector.tuple_nxv8i8_6t(target("riscv.vector.tuple", <vscale x 8 x i8>, 6) %0, i32 1)
- ret <vscale x 8 x i8> %1
+ ret target("riscv.vector.tuple", <vscale x 8 x i8>, 6) %0
}
-
-declare target("riscv.vector.tuple", <vscale x 1 x i8>, 7) @llvm.riscv.vlseg7.triscv.vector.tuple_nxv1i8_7t(target("riscv.vector.tuple", <vscale x 1 x i8>, 7), ptr, i32, i32)
-declare target("riscv.vector.tuple", <vscale x 1 x i8>, 7) @llvm.riscv.vlseg7.mask.triscv.vector.tuple_nxv1i8_7t.nxv1i1(target("riscv.vector.tuple", <vscale x 1 x i8>, 7), ptr, <vscale x 1 x i1>, i32, i32, i32)
-
-define <vscale x 1 x i8> @test_vlseg7_nxv1i8_triscv.vector.tuple_nxv1i8_7t(ptr %base, i32 %vl) {
+define target("riscv.vector.tuple", <vscale x 1 x i8>, 7) @test_vlseg7_nxv1i8_triscv.vector.tuple_nxv1i8_7t(ptr %base, i32 %vl) {
; CHECK-LABEL: test_vlseg7_nxv1i8_triscv.vector.tuple_nxv1i8_7t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma
-; CHECK-NEXT: vlseg7e8.v v7, (a0)
+; CHECK-NEXT: vlseg7e8.v v8, (a0)
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 1 x i8>, 7) @llvm.riscv.vlseg7.triscv.vector.tuple_nxv1i8_7t(target("riscv.vector.tuple", <vscale x 1 x i8>, 7) undef, ptr %base, i32 %vl, i32 3)
- %1 = call <vscale x 1 x i8> @llvm.riscv.tuple.extract.nxv1i8.triscv.vector.tuple_nxv1i8_7t(target("riscv.vector.tuple", <vscale x 1 x i8>, 7) %0, i32 1)
- ret <vscale x 1 x i8> %1
+ ret target("riscv.vector.tuple", <vscale x 1 x i8>, 7) %0
}
-
-define <vscale x 1 x i8> @test_vlseg7_mask_nxv1i8_triscv.vector.tuple_nxv1i8_7t(ptr %base, i32 %vl, <vscale x 1 x i1> %mask) {
+define target("riscv.vector.tuple", <vscale x 1 x i8>, 7) @test_vlseg7_mask_nxv1i8_triscv.vector.tuple_nxv1i8_7t(ptr %base, i32 %vl, <vscale x 1 x i1> %mask) {
; CHECK-LABEL: test_vlseg7_mask_nxv1i8_triscv.vector.tuple_nxv1i8_7t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma
-; CHECK-NEXT: vlseg7e8.v v7, (a0), v0.t
+; CHECK-NEXT: vlseg7e8.v v8, (a0), v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 1 x i8>, 7) @llvm.riscv.vlseg7.mask.triscv.vector.tuple_nxv1i8_7t.nxv1i1(target("riscv.vector.tuple", <vscale x 1 x i8>, 7) undef, ptr %base, <vscale x 1 x i1> %mask, i32 %vl, i32 1, i32 3)
- %1 = call <vscale x 1 x i8> @llvm.riscv.tuple.extract.nxv1i8.triscv.vector.tuple_nxv1i8_7t(target("riscv.vector.tuple", <vscale x 1 x i8>, 7) %0, i32 1)
- ret <vscale x 1 x i8> %1
+ ret target("riscv.vector.tuple", <vscale x 1 x i8>, 7) %0
}
-
-declare target("riscv.vector.tuple", <vscale x 2 x i8>, 7) @llvm.riscv.vlseg7.triscv.vector.tuple_nxv2i8_7t(target("riscv.vector.tuple", <vscale x 2 x i8>, 7), ptr, i32, i32)
-declare target("riscv.vector.tuple", <vscale x 2 x i8>, 7) @llvm.riscv.vlseg7.mask.triscv.vector.tuple_nxv2i8_7t.nxv2i1(target("riscv.vector.tuple", <vscale x 2 x i8>, 7), ptr, <vscale x 2 x i1>, i32, i32, i32)
-
-define <vscale x 2 x i8> @test_vlseg7_nxv2i8_triscv.vector.tuple_nxv2i8_7t(ptr %base, i32 %vl) {
+define target("riscv.vector.tuple", <vscale x 2 x i8>, 7) @test_vlseg7_nxv2i8_triscv.vector.tuple_nxv2i8_7t(ptr %base, i32 %vl) {
; CHECK-LABEL: test_vlseg7_nxv2i8_triscv.vector.tuple_nxv2i8_7t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma
-; CHECK-NEXT: vlseg7e8.v v7, (a0)
+; CHECK-NEXT: vlseg7e8.v v8, (a0)
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 2 x i8>, 7) @llvm.riscv.vlseg7.triscv.vector.tuple_nxv2i8_7t(target("riscv.vector.tuple", <vscale x 2 x i8>, 7) undef, ptr %base, i32 %vl, i32 3)
- %1 = call <vscale x 2 x i8> @llvm.riscv.tuple.extract.nxv2i8.triscv.vector.tuple_nxv2i8_7t(target("riscv.vector.tuple", <vscale x 2 x i8>, 7) %0, i32 1)
- ret <vscale x 2 x i8> %1
+ ret target("riscv.vector.tuple", <vscale x 2 x i8>, 7) %0
}
-
-define <vscale x 2 x i8> @test_vlseg7_mask_nxv2i8_triscv.vector.tuple_nxv2i8_7t(ptr %base, i32 %vl, <vscale x 2 x i1> %mask) {
+define target("riscv.vector.tuple", <vscale x 2 x i8>, 7) @test_vlseg7_mask_nxv2i8_triscv.vector.tuple_nxv2i8_7t(ptr %base, i32 %vl, <vscale x 2 x i1> %mask) {
; CHECK-LABEL: test_vlseg7_mask_nxv2i8_triscv.vector.tuple_nxv2i8_7t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma
-; CHECK-NEXT: vlseg7e8.v v7, (a0), v0.t
+; CHECK-NEXT: vlseg7e8.v v8, (a0), v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 2 x i8>, 7) @llvm.riscv.vlseg7.mask.triscv.vector.tuple_nxv2i8_7t.nxv2i1(target("riscv.vector.tuple", <vscale x 2 x i8>, 7) undef, ptr %base, <vscale x 2 x i1> %mask, i32 %vl, i32 1, i32 3)
- %1 = call <vscale x 2 x i8> @llvm.riscv.tuple.extract.nxv2i8.triscv.vector.tuple_nxv2i8_7t(target("riscv.vector.tuple", <vscale x 2 x i8>, 7) %0, i32 1)
- ret <vscale x 2 x i8> %1
+ ret target("riscv.vector.tuple", <vscale x 2 x i8>, 7) %0
}
-
-declare target("riscv.vector.tuple", <vscale x 4 x i8>, 7) @llvm.riscv.vlseg7.triscv.vector.tuple_nxv4i8_7t(target("riscv.vector.tuple", <vscale x 4 x i8>, 7), ptr, i32, i32)
-declare target("riscv.vector.tuple", <vscale x 4 x i8>, 7) @llvm.riscv.vlseg7.mask.triscv.vector.tuple_nxv4i8_7t.nxv4i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 7), ptr, <vscale x 4 x i1>, i32, i32, i32)
-
-define <vscale x 4 x i8> @test_vlseg7_nxv4i8_triscv.vector.tuple_nxv4i8_7t(ptr %base, i32 %vl) {
+define target("riscv.vector.tuple", <vscale x 4 x i8>, 7) @test_vlseg7_nxv4i8_triscv.vector.tuple_nxv4i8_7t(ptr %base, i32 %vl) {
; CHECK-LABEL: test_vlseg7_nxv4i8_triscv.vector.tuple_nxv4i8_7t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma
-; CHECK-NEXT: vlseg7e8.v v7, (a0)
+; CHECK-NEXT: vlseg7e8.v v8, (a0)
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 7) @llvm.riscv.vlseg7.triscv.vector.tuple_nxv4i8_7t(target("riscv.vector.tuple", <vscale x 4 x i8>, 7) undef, ptr %base, i32 %vl, i32 3)
- %1 = call <vscale x 4 x i8> @llvm.riscv.tuple.extract.nxv4i8.triscv.vector.tuple_nxv4i8_7t(target("riscv.vector.tuple", <vscale x 4 x i8>, 7) %0, i32 1)
- ret <vscale x 4 x i8> %1
+ ret target("riscv.vector.tuple", <vscale x 4 x i8>, 7) %0
}
-
-define <vscale x 4 x i8> @test_vlseg7_mask_nxv4i8_triscv.vector.tuple_nxv4i8_7t(ptr %base, i32 %vl, <vscale x 4 x i1> %mask) {
+define target("riscv.vector.tuple", <vscale x 4 x i8>, 7) @test_vlseg7_mask_nxv4i8_triscv.vector.tuple_nxv4i8_7t(ptr %base, i32 %vl, <vscale x 4 x i1> %mask) {
; CHECK-LABEL: test_vlseg7_mask_nxv4i8_triscv.vector.tuple_nxv4i8_7t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma
-; CHECK-NEXT: vlseg7e8.v v7, (a0), v0.t
+; CHECK-NEXT: vlseg7e8.v v8, (a0), v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 7) @llvm.riscv.vlseg7.mask.triscv.vector.tuple_nxv4i8_7t.nxv4i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 7) undef, ptr %base, <vscale x 4 x i1> %mask, i32 %vl, i32 1, i32 3)
- %1 = call <vscale x 4 x i8> @llvm.riscv.tuple.extract.nxv4i8.triscv.vector.tuple_nxv4i8_7t(target("riscv.vector.tuple", <vscale x 4 x i8>, 7) %0, i32 1)
- ret <vscale x 4 x i8> %1
+ ret target("riscv.vector.tuple", <vscale x 4 x i8>, 7) %0
}
-
-declare target("riscv.vector.tuple", <vscale x 8 x i8>, 7) @llvm.riscv.vlseg7.triscv.vector.tuple_nxv8i8_7t(target("riscv.vector.tuple", <vscale x 8 x i8>, 7), ptr, i32, i32)
-declare target("riscv.vector.tuple", <vscale x 8 x i8>, 7) @llvm.riscv.vlseg7.mask.triscv.vector.tuple_nxv8i8_7t.nxv8i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 7), ptr, <vscale x 8 x i1>, i32, i32, i32)
-
-define <vscale x 8 x i8> @test_vlseg7_nxv8i8_triscv.vector.tuple_nxv8i8_7t(ptr %base, i32 %vl) {
+define target("riscv.vector.tuple", <vscale x 8 x i8>, 7) @test_vlseg7_nxv8i8_triscv.vector.tuple_nxv8i8_7t(ptr %base, i32 %vl) {
; CHECK-LABEL: test_vlseg7_nxv8i8_triscv.vector.tuple_nxv8i8_7t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma
-; CHECK-NEXT: vlseg7e8.v v7, (a0)
+; CHECK-NEXT: vlseg7e8.v v8, (a0)
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 7) @llvm.riscv.vlseg7.triscv.vector.tuple_nxv8i8_7t(target("riscv.vector.tuple", <vscale x 8 x i8>, 7) undef, ptr %base, i32 %vl, i32 3)
- %1 = call <vscale x 8 x i8> @llvm.riscv.tuple.extract.nxv8i8.triscv.vector.tuple_nxv8i8_7t(target("riscv.vector.tuple", <vscale x 8 x i8>, 7) %0, i32 1)
- ret <vscale x 8 x i8> %1
+ ret target("riscv.vector.tuple", <vscale x 8 x i8>, 7) %0
}
-
-define <vscale x 8 x i8> @test_vlseg7_mask_nxv8i8_triscv.vector.tuple_nxv8i8_7t(ptr %base, i32 %vl, <vscale x 8 x i1> %mask) {
+define target("riscv.vector.tuple", <vscale x 8 x i8>, 7) @test_vlseg7_mask_nxv8i8_triscv.vector.tuple_nxv8i8_7t(ptr %base, i32 %vl, <vscale x 8 x i1> %mask) {
; CHECK-LABEL: test_vlseg7_mask_nxv8i8_triscv.vector.tuple_nxv8i8_7t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma
-; CHECK-NEXT: vlseg7e8.v v7, (a0), v0.t
+; CHECK-NEXT: vlseg7e8.v v8, (a0), v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 7) @llvm.riscv.vlseg7.mask.triscv.vector.tuple_nxv8i8_7t.nxv8i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 7) undef, ptr %base, <vscale x 8 x i1> %mask, i32 %vl, i32 1, i32 3)
- %1 = call <vscale x 8 x i8> @llvm.riscv.tuple.extract.nxv8i8.triscv.vector.tuple_nxv8i8_7t(target("riscv.vector.tuple", <vscale x 8 x i8>, 7) %0, i32 1)
- ret <vscale x 8 x i8> %1
+ ret target("riscv.vector.tuple", <vscale x 8 x i8>, 7) %0
}
-
-declare target("riscv.vector.tuple", <vscale x 1 x i8>, 8) @llvm.riscv.vlseg8.triscv.vector.tuple_nxv1i8_8t(target("riscv.vector.tuple", <vscale x 1 x i8>, 8), ptr, i32, i32)
-declare target("riscv.vector.tuple", <vscale x 1 x i8>, 8) @llvm.riscv.vlseg8.mask.triscv.vector.tuple_nxv1i8_8t.nxv1i1(target("riscv.vector.tuple", <vscale x 1 x i8>, 8), ptr, <vscale x 1 x i1>, i32, i32, i32)
-
-define <vscale x 1 x i8> @test_vlseg8_nxv1i8_triscv.vector.tuple_nxv1i8_8t(ptr %base, i32 %vl) {
+define target("riscv.vector.tuple", <vscale x 1 x i8>, 8) @test_vlseg8_nxv1i8_triscv.vector.tuple_nxv1i8_8t(ptr %base, i32 %vl) {
; CHECK-LABEL: test_vlseg8_nxv1i8_triscv.vector.tuple_nxv1i8_8t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma
-; CHECK-NEXT: vlseg8e8.v v7, (a0)
+; CHECK-NEXT: vlseg8e8.v v8, (a0)
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 1 x i8>, 8) @llvm.riscv.vlseg8.triscv.vector.tuple_nxv1i8_8t(target("riscv.vector.tuple", <vscale x 1 x i8>, 8) undef, ptr %base, i32 %vl, i32 3)
- %1 = call <vscale x 1 x i8> @llvm.riscv.tuple.extract.nxv1i8.triscv.vector.tuple_nxv1i8_8t(target("riscv.vector.tuple", <vscale x 1 x i8>, 8) %0, i32 1)
- ret <vscale x 1 x i8> %1
+ ret target("riscv.vector.tuple", <vscale x 1 x i8>, 8) %0
}
-
-define <vscale x 1 x i8> @test_vlseg8_mask_nxv1i8_triscv.vector.tuple_nxv1i8_8t(ptr %base, i32 %vl, <vscale x 1 x i1> %mask) {
+define target("riscv.vector.tuple", <vscale x 1 x i8>, 8) @test_vlseg8_mask_nxv1i8_triscv.vector.tuple_nxv1i8_8t(ptr %base, i32 %vl, <vscale x 1 x i1> %mask) {
; CHECK-LABEL: test_vlseg8_mask_nxv1i8_triscv.vector.tuple_nxv1i8_8t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma
-; CHECK-NEXT: vlseg8e8.v v7, (a0), v0.t
+; CHECK-NEXT: vlseg8e8.v v8, (a0), v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 1 x i8>, 8) @llvm.riscv.vlseg8.mask.triscv.vector.tuple_nxv1i8_8t.nxv1i1(target("riscv.vector.tuple", <vscale x 1 x i8>, 8) undef, ptr %base, <vscale x 1 x i1> %mask, i32 %vl, i32 1, i32 3)
- %1 = call <vscale x 1 x i8> @llvm.riscv.tuple.extract.nxv1i8.triscv.vector.tuple_nxv1i8_8t(target("riscv.vector.tuple", <vscale x 1 x i8>, 8) %0, i32 1)
- ret <vscale x 1 x i8> %1
+ ret target("riscv.vector.tuple", <vscale x 1 x i8>, 8) %0
}
-
-declare target("riscv.vector.tuple", <vscale x 2 x i8>, 8) @llvm.riscv.vlseg8.triscv.vector.tuple_nxv2i8_8t(target("riscv.vector.tuple", <vscale x 2 x i8>, 8), ptr, i32, i32)
-declare target("riscv.vector.tuple", <vscale x 2 x i8>, 8) @llvm.riscv.vlseg8.mask.triscv.vector.tuple_nxv2i8_8t.nxv2i1(target("riscv.vector.tuple", <vscale x 2 x i8>, 8), ptr, <vscale x 2 x i1>, i32, i32, i32)
-
-define <vscale x 2 x i8> @test_vlseg8_nxv2i8_triscv.vector.tuple_nxv2i8_8t(ptr %base, i32 %vl) {
+define target("riscv.vector.tuple", <vscale x 2 x i8>, 8) @test_vlseg8_nxv2i8_triscv.vector.tuple_nxv2i8_8t(ptr %base, i32 %vl) {
; CHECK-LABEL: test_vlseg8_nxv2i8_triscv.vector.tuple_nxv2i8_8t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma
-; CHECK-NEXT: vlseg8e8.v v7, (a0)
+; CHECK-NEXT: vlseg8e8.v v8, (a0)
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 2 x i8>, 8) @llvm.riscv.vlseg8.triscv.vector.tuple_nxv2i8_8t(target("riscv.vector.tuple", <vscale x 2 x i8>, 8) undef, ptr %base, i32 %vl, i32 3)
- %1 = call <vscale x 2 x i8> @llvm.riscv.tuple.extract.nxv2i8.triscv.vector.tuple_nxv2i8_8t(target("riscv.vector.tuple", <vscale x 2 x i8>, 8) %0, i32 1)
- ret <vscale x 2 x i8> %1
+ ret target("riscv.vector.tuple", <vscale x 2 x i8>, 8) %0
}
-
-define <vscale x 2 x i8> @test_vlseg8_mask_nxv2i8_triscv.vector.tuple_nxv2i8_8t(ptr %base, i32 %vl, <vscale x 2 x i1> %mask) {
+define target("riscv.vector.tuple", <vscale x 2 x i8>, 8) @test_vlseg8_mask_nxv2i8_triscv.vector.tuple_nxv2i8_8t(ptr %base, i32 %vl, <vscale x 2 x i1> %mask) {
; CHECK-LABEL: test_vlseg8_mask_nxv2i8_triscv.vector.tuple_nxv2i8_8t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma
-; CHECK-NEXT: vlseg8e8.v v7, (a0), v0.t
+; CHECK-NEXT: vlseg8e8.v v8, (a0), v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 2 x i8>, 8) @llvm.riscv.vlseg8.mask.triscv.vector.tuple_nxv2i8_8t.nxv2i1(target("riscv.vector.tuple", <vscale x 2 x i8>, 8) undef, ptr %base, <vscale x 2 x i1> %mask, i32 %vl, i32 1, i32 3)
- %1 = call <vscale x 2 x i8> @llvm.riscv.tuple.extract.nxv2i8.triscv.vector.tuple_nxv2i8_8t(target("riscv.vector.tuple", <vscale x 2 x i8>, 8) %0, i32 1)
- ret <vscale x 2 x i8> %1
+ ret target("riscv.vector.tuple", <vscale x 2 x i8>, 8) %0
}
-
-declare target("riscv.vector.tuple", <vscale x 4 x i8>, 8) @llvm.riscv.vlseg8.triscv.vector.tuple_nxv4i8_8t(target("riscv.vector.tuple", <vscale x 4 x i8>, 8), ptr, i32, i32)
-declare target("riscv.vector.tuple", <vscale x 4 x i8>, 8) @llvm.riscv.vlseg8.mask.triscv.vector.tuple_nxv4i8_8t.nxv4i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 8), ptr, <vscale x 4 x i1>, i32, i32, i32)
-
-define <vscale x 4 x i8> @test_vlseg8_nxv4i8_triscv.vector.tuple_nxv4i8_8t(ptr %base, i32 %vl) {
+define target("riscv.vector.tuple", <vscale x 4 x i8>, 8) @test_vlseg8_nxv4i8_triscv.vector.tuple_nxv4i8_8t(ptr %base, i32 %vl) {
; CHECK-LABEL: test_vlseg8_nxv4i8_triscv.vector.tuple_nxv4i8_8t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma
-; CHECK-NEXT: vlseg8e8.v v7, (a0)
+; CHECK-NEXT: vlseg8e8.v v8, (a0)
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 8) @llvm.riscv.vlseg8.triscv.vector.tuple_nxv4i8_8t(target("riscv.vector.tuple", <vscale x 4 x i8>, 8) undef, ptr %base, i32 %vl, i32 3)
- %1 = call <vscale x 4 x i8> @llvm.riscv.tuple.extract.nxv4i8.triscv.vector.tuple_nxv4i8_8t(target("riscv.vector.tuple", <vscale x 4 x i8>, 8) %0, i32 1)
- ret <vscale x 4 x i8> %1
+ ret target("riscv.vector.tuple", <vscale x 4 x i8>, 8) %0
}
-
-define <vscale x 4 x i8> @test_vlseg8_mask_nxv4i8_triscv.vector.tuple_nxv4i8_8t(ptr %base, i32 %vl, <vscale x 4 x i1> %mask) {
+define target("riscv.vector.tuple", <vscale x 4 x i8>, 8) @test_vlseg8_mask_nxv4i8_triscv.vector.tuple_nxv4i8_8t(ptr %base, i32 %vl, <vscale x 4 x i1> %mask) {
; CHECK-LABEL: test_vlseg8_mask_nxv4i8_triscv.vector.tuple_nxv4i8_8t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma
-; CHECK-NEXT: vlseg8e8.v v7, (a0), v0.t
+; CHECK-NEXT: vlseg8e8.v v8, (a0), v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 8) @llvm.riscv.vlseg8.mask.triscv.vector.tuple_nxv4i8_8t.nxv4i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 8) undef, ptr %base, <vscale x 4 x i1> %mask, i32 %vl, i32 1, i32 3)
- %1 = call <vscale x 4 x i8> @llvm.riscv.tuple.extract.nxv4i8.triscv.vector.tuple_nxv4i8_8t(target("riscv.vector.tuple", <vscale x 4 x i8>, 8) %0, i32 1)
- ret <vscale x 4 x i8> %1
+ ret target("riscv.vector.tuple", <vscale x 4 x i8>, 8) %0
}
-
-declare target("riscv.vector.tuple", <vscale x 8 x i8>, 8) @llvm.riscv.vlseg8.triscv.vector.tuple_nxv8i8_8t(target("riscv.vector.tuple", <vscale x 8 x i8>, 8), ptr, i32, i32)
-declare target("riscv.vector.tuple", <vscale x 8 x i8>, 8) @llvm.riscv.vlseg8.mask.triscv.vector.tuple_nxv8i8_8t.nxv8i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 8), ptr, <vscale x 8 x i1>, i32, i32, i32)
-
-define <vscale x 8 x i8> @test_vlseg8_nxv8i8_triscv.vector.tuple_nxv8i8_8t(ptr %base, i32 %vl) {
+define target("riscv.vector.tuple", <vscale x 8 x i8>, 8) @test_vlseg8_nxv8i8_triscv.vector.tuple_nxv8i8_8t(ptr %base, i32 %vl) {
; CHECK-LABEL: test_vlseg8_nxv8i8_triscv.vector.tuple_nxv8i8_8t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma
-; CHECK-NEXT: vlseg8e8.v v7, (a0)
+; CHECK-NEXT: vlseg8e8.v v8, (a0)
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 8) @llvm.riscv.vlseg8.triscv.vector.tuple_nxv8i8_8t(target("riscv.vector.tuple", <vscale x 8 x i8>, 8) undef, ptr %base, i32 %vl, i32 3)
- %1 = call <vscale x 8 x i8> @llvm.riscv.tuple.extract.nxv8i8.triscv.vector.tuple_nxv8i8_8t(target("riscv.vector.tuple", <vscale x 8 x i8>, 8) %0, i32 1)
- ret <vscale x 8 x i8> %1
+ ret target("riscv.vector.tuple", <vscale x 8 x i8>, 8) %0
}
-
-define <vscale x 8 x i8> @test_vlseg8_mask_nxv8i8_triscv.vector.tuple_nxv8i8_8t(ptr %base, i32 %vl, <vscale x 8 x i1> %mask) {
+define target("riscv.vector.tuple", <vscale x 8 x i8>, 8) @test_vlseg8_mask_nxv8i8_triscv.vector.tuple_nxv8i8_8t(ptr %base, i32 %vl, <vscale x 8 x i1> %mask) {
; CHECK-LABEL: test_vlseg8_mask_nxv8i8_triscv.vector.tuple_nxv8i8_8t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma
-; CHECK-NEXT: vlseg8e8.v v7, (a0), v0.t
+; CHECK-NEXT: vlseg8e8.v v8, (a0), v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 8) @llvm.riscv.vlseg8.mask.triscv.vector.tuple_nxv8i8_8t.nxv8i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 8) undef, ptr %base, <vscale x 8 x i1> %mask, i32 %vl, i32 1, i32 3)
- %1 = call <vscale x 8 x i8> @llvm.riscv.tuple.extract.nxv8i8.triscv.vector.tuple_nxv8i8_8t(target("riscv.vector.tuple", <vscale x 8 x i8>, 8) %0, i32 1)
- ret <vscale x 8 x i8> %1
+ ret target("riscv.vector.tuple", <vscale x 8 x i8>, 8) %0
}
-
-declare target("riscv.vector.tuple", <vscale x 2 x i8>, 2) @llvm.riscv.vlseg2.mask.triscv.vector.tuple_nxv2i8_2t.nxv1i1(target("riscv.vector.tuple", <vscale x 2 x i8>, 2), ptr, <vscale x 1 x i1>, i32, i32, i32)
-
-define <vscale x 1 x i16> @test_vlseg2_nxv1i16_triscv.vector.tuple_nxv2i8_2t(ptr %base, i32 %vl) {
+define target("riscv.vector.tuple", <vscale x 2 x i8>, 2) @test_vlseg2_nxv1i16_triscv.vector.tuple_nxv2i8_2t(ptr %base, i32 %vl) {
; CHECK-LABEL: test_vlseg2_nxv1i16_triscv.vector.tuple_nxv2i8_2t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma
-; CHECK-NEXT: vlseg2e16.v v7, (a0)
+; CHECK-NEXT: vlseg2e16.v v8, (a0)
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 2 x i8>, 2) @llvm.riscv.vlseg2.triscv.vector.tuple_nxv2i8_2t(target("riscv.vector.tuple", <vscale x 2 x i8>, 2) undef, ptr %base, i32 %vl, i32 4)
- %1 = call <vscale x 1 x i16> @llvm.riscv.tuple.extract.nxv1i16.triscv.vector.tuple_nxv2i8_2t(target("riscv.vector.tuple", <vscale x 2 x i8>, 2) %0, i32 1)
- ret <vscale x 1 x i16> %1
+ ret target("riscv.vector.tuple", <vscale x 2 x i8>, 2) %0
}
-
-define <vscale x 1 x i16> @test_vlseg2_mask_nxv1i16_triscv.vector.tuple_nxv2i8_2t(ptr %base, i32 %vl, <vscale x 1 x i1> %mask) {
+define target("riscv.vector.tuple", <vscale x 2 x i8>, 2) @test_vlseg2_mask_nxv1i16_triscv.vector.tuple_nxv2i8_2t(ptr %base, i32 %vl, <vscale x 1 x i1> %mask) {
; CHECK-LABEL: test_vlseg2_mask_nxv1i16_triscv.vector.tuple_nxv2i8_2t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma
-; CHECK-NEXT: vlseg2e16.v v7, (a0), v0.t
+; CHECK-NEXT: vlseg2e16.v v8, (a0), v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 2 x i8>, 2) @llvm.riscv.vlseg2.mask.triscv.vector.tuple_nxv2i8_2t.nxv1i1(target("riscv.vector.tuple", <vscale x 2 x i8>, 2) undef, ptr %base, <vscale x 1 x i1> %mask, i32 %vl, i32 1, i32 4)
- %1 = call <vscale x 1 x i16> @llvm.riscv.tuple.extract.nxv1i16.triscv.vector.tuple_nxv2i8_2t(target("riscv.vector.tuple", <vscale x 2 x i8>, 2) %0, i32 1)
- ret <vscale x 1 x i16> %1
+ ret target("riscv.vector.tuple", <vscale x 2 x i8>, 2) %0
}
-
-declare target("riscv.vector.tuple", <vscale x 4 x i8>, 2) @llvm.riscv.vlseg2.mask.triscv.vector.tuple_nxv4i8_2t.nxv2i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 2), ptr, <vscale x 2 x i1>, i32, i32, i32)
-
-define <vscale x 2 x i16> @test_vlseg2_nxv2i16_triscv.vector.tuple_nxv4i8_2t(ptr %base, i32 %vl) {
+define target("riscv.vector.tuple", <vscale x 4 x i8>, 2) @test_vlseg2_nxv2i16_triscv.vector.tuple_nxv4i8_2t(ptr %base, i32 %vl) {
; CHECK-LABEL: test_vlseg2_nxv2i16_triscv.vector.tuple_nxv4i8_2t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma
-; CHECK-NEXT: vlseg2e16.v v7, (a0)
+; CHECK-NEXT: vlseg2e16.v v8, (a0)
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 2) @llvm.riscv.vlseg2.triscv.vector.tuple_nxv4i8_2t(target("riscv.vector.tuple", <vscale x 4 x i8>, 2) undef, ptr %base, i32 %vl, i32 4)
- %1 = call <vscale x 2 x i16> @llvm.riscv.tuple.extract.nxv2i16.triscv.vector.tuple_nxv4i8_2t(target("riscv.vector.tuple", <vscale x 4 x i8>, 2) %0, i32 1)
- ret <vscale x 2 x i16> %1
+ ret target("riscv.vector.tuple", <vscale x 4 x i8>, 2) %0
}
-
-define <vscale x 2 x i16> @test_vlseg2_mask_nxv2i16_triscv.vector.tuple_nxv4i8_2t(ptr %base, i32 %vl, <vscale x 2 x i1> %mask) {
+define target("riscv.vector.tuple", <vscale x 4 x i8>, 2) @test_vlseg2_mask_nxv2i16_triscv.vector.tuple_nxv4i8_2t(ptr %base, i32 %vl, <vscale x 2 x i1> %mask) {
; CHECK-LABEL: test_vlseg2_mask_nxv2i16_triscv.vector.tuple_nxv4i8_2t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma
-; CHECK-NEXT: vlseg2e16.v v7, (a0), v0.t
+; CHECK-NEXT: vlseg2e16.v v8, (a0), v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 2) @llvm.riscv.vlseg2.mask.triscv.vector.tuple_nxv4i8_2t.nxv2i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 2) undef, ptr %base, <vscale x 2 x i1> %mask, i32 %vl, i32 1, i32 4)
- %1 = call <vscale x 2 x i16> @llvm.riscv.tuple.extract.nxv2i16.triscv.vector.tuple_nxv4i8_2t(target("riscv.vector.tuple", <vscale x 4 x i8>, 2) %0, i32 1)
- ret <vscale x 2 x i16> %1
+ ret target("riscv.vector.tuple", <vscale x 4 x i8>, 2) %0
}
-
-declare target("riscv.vector.tuple", <vscale x 8 x i8>, 2) @llvm.riscv.vlseg2.mask.triscv.vector.tuple_nxv8i8_2t.nxv4i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 2), ptr, <vscale x 4 x i1>, i32, i32, i32)
-
-define <vscale x 4 x i16> @test_vlseg2_nxv4i16_triscv.vector.tuple_nxv8i8_2t(ptr %base, i32 %vl) {
+define target("riscv.vector.tuple", <vscale x 8 x i8>, 2) @test_vlseg2_nxv4i16_triscv.vector.tuple_nxv8i8_2t(ptr %base, i32 %vl) {
; CHECK-LABEL: test_vlseg2_nxv4i16_triscv.vector.tuple_nxv8i8_2t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma
-; CHECK-NEXT: vlseg2e16.v v7, (a0)
+; CHECK-NEXT: vlseg2e16.v v8, (a0)
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 2) @llvm.riscv.vlseg2.triscv.vector.tuple_nxv8i8_2t(target("riscv.vector.tuple", <vscale x 8 x i8>, 2) undef, ptr %base, i32 %vl, i32 4)
- %1 = call <vscale x 4 x i16> @llvm.riscv.tuple.extract.nxv4i16.triscv.vector.tuple_nxv8i8_2t(target("riscv.vector.tuple", <vscale x 8 x i8>, 2) %0, i32 1)
- ret <vscale x 4 x i16> %1
+ ret target("riscv.vector.tuple", <vscale x 8 x i8>, 2) %0
}
-
-define <vscale x 4 x i16> @test_vlseg2_mask_nxv4i16_triscv.vector.tuple_nxv8i8_2t(ptr %base, i32 %vl, <vscale x 4 x i1> %mask) {
+define target("riscv.vector.tuple", <vscale x 8 x i8>, 2) @test_vlseg2_mask_nxv4i16_triscv.vector.tuple_nxv8i8_2t(ptr %base, i32 %vl, <vscale x 4 x i1> %mask) {
; CHECK-LABEL: test_vlseg2_mask_nxv4i16_triscv.vector.tuple_nxv8i8_2t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma
-; CHECK-NEXT: vlseg2e16.v v7, (a0), v0.t
+; CHECK-NEXT: vlseg2e16.v v8, (a0), v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 2) @llvm.riscv.vlseg2.mask.triscv.vector.tuple_nxv8i8_2t.nxv4i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 2) undef, ptr %base, <vscale x 4 x i1> %mask, i32 %vl, i32 1, i32 4)
- %1 = call <vscale x 4 x i16> @llvm.riscv.tuple.extract.nxv4i16.triscv.vector.tuple_nxv8i8_2t(target("riscv.vector.tuple", <vscale x 8 x i8>, 2) %0, i32 1)
- ret <vscale x 4 x i16> %1
+ ret target("riscv.vector.tuple", <vscale x 8 x i8>, 2) %0
}
-
-declare target("riscv.vector.tuple", <vscale x 16 x i8>, 2) @llvm.riscv.vlseg2.mask.triscv.vector.tuple_nxv16i8_2t.nxv8i1(target("riscv.vector.tuple", <vscale x 16 x i8>, 2), ptr, <vscale x 8 x i1>, i32, i32, i32)
-
-define <vscale x 8 x i16> @test_vlseg2_nxv8i16_triscv.vector.tuple_nxv16i8_2t(ptr %base, i32 %vl) {
+define target("riscv.vector.tuple", <vscale x 16 x i8>, 2) @test_vlseg2_nxv8i16_triscv.vector.tuple_nxv16i8_2t(ptr %base, i32 %vl) {
; CHECK-LABEL: test_vlseg2_nxv8i16_triscv.vector.tuple_nxv16i8_2t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, ma
-; CHECK-NEXT: vlseg2e16.v v6, (a0)
+; CHECK-NEXT: vlseg2e16.v v8, (a0)
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 2) @llvm.riscv.vlseg2.triscv.vector.tuple_nxv16i8_2t(target("riscv.vector.tuple", <vscale x 16 x i8>, 2) undef, ptr %base, i32 %vl, i32 4)
- %1 = call <vscale x 8 x i16> @llvm.riscv.tuple.extract.nxv8i16.triscv.vector.tuple_nxv16i8_2t(target("riscv.vector.tuple", <vscale x 16 x i8>, 2) %0, i32 1)
- ret <vscale x 8 x i16> %1
+ ret target("riscv.vector.tuple", <vscale x 16 x i8>, 2) %0
}
-
-define <vscale x 8 x i16> @test_vlseg2_mask_nxv8i16_triscv.vector.tuple_nxv16i8_2t(ptr %base, i32 %vl, <vscale x 8 x i1> %mask) {
+define target("riscv.vector.tuple", <vscale x 16 x i8>, 2) @test_vlseg2_mask_nxv8i16_triscv.vector.tuple_nxv16i8_2t(ptr %base, i32 %vl, <vscale x 8 x i1> %mask) {
; CHECK-LABEL: test_vlseg2_mask_nxv8i16_triscv.vector.tuple_nxv16i8_2t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, ma
-; CHECK-NEXT: vlseg2e16.v v6, (a0), v0.t
+; CHECK-NEXT: vlseg2e16.v v8, (a0), v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 2) @llvm.riscv.vlseg2.mask.triscv.vector.tuple_nxv16i8_2t.nxv8i1(target("riscv.vector.tuple", <vscale x 16 x i8>, 2) undef, ptr %base, <vscale x 8 x i1> %mask, i32 %vl, i32 1, i32 4)
- %1 = call <vscale x 8 x i16> @llvm.riscv.tuple.extract.nxv8i16.triscv.vector.tuple_nxv16i8_2t(target("riscv.vector.tuple", <vscale x 16 x i8>, 2) %0, i32 1)
- ret <vscale x 8 x i16> %1
+ ret target("riscv.vector.tuple", <vscale x 16 x i8>, 2) %0
}
-
-declare target("riscv.vector.tuple", <vscale x 32 x i8>, 2) @llvm.riscv.vlseg2.mask.triscv.vector.tuple_nxv32i8_2t.nxv16i1(target("riscv.vector.tuple", <vscale x 32 x i8>, 2), ptr, <vscale x 16 x i1>, i32, i32, i32)
-
-define <vscale x 16 x i16> @test_vlseg2_nxv16i16_triscv.vector.tuple_nxv32i8_2t(ptr %base, i32 %vl) {
+define target("riscv.vector.tuple", <vscale x 32 x i8>, 2) @test_vlseg2_nxv16i16_triscv.vector.tuple_nxv32i8_2t(ptr %base, i32 %vl) {
; CHECK-LABEL: test_vlseg2_nxv16i16_triscv.vector.tuple_nxv32i8_2t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma
-; CHECK-NEXT: vlseg2e16.v v4, (a0)
+; CHECK-NEXT: vlseg2e16.v v8, (a0)
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 32 x i8>, 2) @llvm.riscv.vlseg2.triscv.vector.tuple_nxv32i8_2t(target("riscv.vector.tuple", <vscale x 32 x i8>, 2) undef, ptr %base, i32 %vl, i32 4)
- %1 = call <vscale x 16 x i16> @llvm.riscv.tuple.extract.nxv16i16.triscv.vector.tuple_nxv32i8_2t(target("riscv.vector.tuple", <vscale x 32 x i8>, 2) %0, i32 1)
- ret <vscale x 16 x i16> %1
+ ret target("riscv.vector.tuple", <vscale x 32 x i8>, 2) %0
}
-
-define <vscale x 16 x i16> @test_vlseg2_mask_nxv16i16_triscv.vector.tuple_nxv32i8_2t(ptr %base, i32 %vl, <vscale x 16 x i1> %mask) {
+define target("riscv.vector.tuple", <vscale x 32 x i8>, 2) @test_vlseg2_mask_nxv16i16_triscv.vector.tuple_nxv32i8_2t(ptr %base, i32 %vl, <vscale x 16 x i1> %mask) {
; CHECK-LABEL: test_vlseg2_mask_nxv16i16_triscv.vector.tuple_nxv32i8_2t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma
-; CHECK-NEXT: vlseg2e16.v v4, (a0), v0.t
+; CHECK-NEXT: vlseg2e16.v v8, (a0), v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 32 x i8>, 2) @llvm.riscv.vlseg2.mask.triscv.vector.tuple_nxv32i8_2t.nxv16i1(target("riscv.vector.tuple", <vscale x 32 x i8>, 2) undef, ptr %base, <vscale x 16 x i1> %mask, i32 %vl, i32 1, i32 4)
- %1 = call <vscale x 16 x i16> @llvm.riscv.tuple.extract.nxv16i16.triscv.vector.tuple_nxv32i8_2t(target("riscv.vector.tuple", <vscale x 32 x i8>, 2) %0, i32 1)
- ret <vscale x 16 x i16> %1
+ ret target("riscv.vector.tuple", <vscale x 32 x i8>, 2) %0
}
-
-declare target("riscv.vector.tuple", <vscale x 2 x i8>, 3) @llvm.riscv.vlseg3.mask.triscv.vector.tuple_nxv2i8_3t.nxv1i1(target("riscv.vector.tuple", <vscale x 2 x i8>, 3), ptr, <vscale x 1 x i1>, i32, i32, i32)
-
-define <vscale x 1 x i16> @test_vlseg3_nxv1i16_triscv.vector.tuple_nxv2i8_3t(ptr %base, i32 %vl) {
+define target("riscv.vector.tuple", <vscale x 2 x i8>, 3) @test_vlseg3_nxv1i16_triscv.vector.tuple_nxv2i8_3t(ptr %base, i32 %vl) {
; CHECK-LABEL: test_vlseg3_nxv1i16_triscv.vector.tuple_nxv2i8_3t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma
-; CHECK-NEXT: vlseg3e16.v v7, (a0)
+; CHECK-NEXT: vlseg3e16.v v8, (a0)
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 2 x i8>, 3) @llvm.riscv.vlseg3.triscv.vector.tuple_nxv2i8_3t(target("riscv.vector.tuple", <vscale x 2 x i8>, 3) undef, ptr %base, i32 %vl, i32 4)
- %1 = call <vscale x 1 x i16> @llvm.riscv.tuple.extract.nxv1i16.triscv.vector.tuple_nxv2i8_3t(target("riscv.vector.tuple", <vscale x 2 x i8>, 3) %0, i32 1)
- ret <vscale x 1 x i16> %1
+ ret target("riscv.vector.tuple", <vscale x 2 x i8>, 3) %0
}
-
-define <vscale x 1 x i16> @test_vlseg3_mask_nxv1i16_triscv.vector.tuple_nxv2i8_3t(ptr %base, i32 %vl, <vscale x 1 x i1> %mask) {
+define target("riscv.vector.tuple", <vscale x 2 x i8>, 3) @test_vlseg3_mask_nxv1i16_triscv.vector.tuple_nxv2i8_3t(ptr %base, i32 %vl, <vscale x 1 x i1> %mask) {
; CHECK-LABEL: test_vlseg3_mask_nxv1i16_triscv.vector.tuple_nxv2i8_3t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma
-; CHECK-NEXT: vlseg3e16.v v7, (a0), v0.t
+; CHECK-NEXT: vlseg3e16.v v8, (a0), v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 2 x i8>, 3) @llvm.riscv.vlseg3.mask.triscv.vector.tuple_nxv2i8_3t.nxv1i1(target("riscv.vector.tuple", <vscale x 2 x i8>, 3) undef, ptr %base, <vscale x 1 x i1> %mask, i32 %vl, i32 1, i32 4)
- %1 = call <vscale x 1 x i16> @llvm.riscv.tuple.extract.nxv1i16.triscv.vector.tuple_nxv2i8_3t(target("riscv.vector.tuple", <vscale x 2 x i8>, 3) %0, i32 1)
- ret <vscale x 1 x i16> %1
+ ret target("riscv.vector.tuple", <vscale x 2 x i8>, 3) %0
}
-
-declare target("riscv.vector.tuple", <vscale x 4 x i8>, 3) @llvm.riscv.vlseg3.mask.triscv.vector.tuple_nxv4i8_3t.nxv2i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 3), ptr, <vscale x 2 x i1>, i32, i32, i32)
-
-define <vscale x 2 x i16> @test_vlseg3_nxv2i16_triscv.vector.tuple_nxv4i8_3t(ptr %base, i32 %vl) {
+define target("riscv.vector.tuple", <vscale x 4 x i8>, 3) @test_vlseg3_nxv2i16_triscv.vector.tuple_nxv4i8_3t(ptr %base, i32 %vl) {
; CHECK-LABEL: test_vlseg3_nxv2i16_triscv.vector.tuple_nxv4i8_3t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma
-; CHECK-NEXT: vlseg3e16.v v7, (a0)
+; CHECK-NEXT: vlseg3e16.v v8, (a0)
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 3) @llvm.riscv.vlseg3.triscv.vector.tuple_nxv4i8_3t(target("riscv.vector.tuple", <vscale x 4 x i8>, 3) undef, ptr %base, i32 %vl, i32 4)
- %1 = call <vscale x 2 x i16> @llvm.riscv.tuple.extract.nxv2i16.triscv.vector.tuple_nxv4i8_3t(target("riscv.vector.tuple", <vscale x 4 x i8>, 3) %0, i32 1)
- ret <vscale x 2 x i16> %1
+ ret target("riscv.vector.tuple", <vscale x 4 x i8>, 3) %0
}
-
-define <vscale x 2 x i16> @test_vlseg3_mask_nxv2i16_triscv.vector.tuple_nxv4i8_3t(ptr %base, i32 %vl, <vscale x 2 x i1> %mask) {
+define target("riscv.vector.tuple", <vscale x 4 x i8>, 3) @test_vlseg3_mask_nxv2i16_triscv.vector.tuple_nxv4i8_3t(ptr %base, i32 %vl, <vscale x 2 x i1> %mask) {
; CHECK-LABEL: test_vlseg3_mask_nxv2i16_triscv.vector.tuple_nxv4i8_3t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma
-; CHECK-NEXT: vlseg3e16.v v7, (a0), v0.t
+; CHECK-NEXT: vlseg3e16.v v8, (a0), v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 3) @llvm.riscv.vlseg3.mask.triscv.vector.tuple_nxv4i8_3t.nxv2i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 3) undef, ptr %base, <vscale x 2 x i1> %mask, i32 %vl, i32 1, i32 4)
- %1 = call <vscale x 2 x i16> @llvm.riscv.tuple.extract.nxv2i16.triscv.vector.tuple_nxv4i8_3t(target("riscv.vector.tuple", <vscale x 4 x i8>, 3) %0, i32 1)
- ret <vscale x 2 x i16> %1
+ ret target("riscv.vector.tuple", <vscale x 4 x i8>, 3) %0
}
-
-declare target("riscv.vector.tuple", <vscale x 8 x i8>, 3) @llvm.riscv.vlseg3.mask.triscv.vector.tuple_nxv8i8_3t.nxv4i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 3), ptr, <vscale x 4 x i1>, i32, i32, i32)
-
-define <vscale x 4 x i16> @test_vlseg3_nxv4i16_triscv.vector.tuple_nxv8i8_3t(ptr %base, i32 %vl) {
+define target("riscv.vector.tuple", <vscale x 8 x i8>, 3) @test_vlseg3_nxv4i16_triscv.vector.tuple_nxv8i8_3t(ptr %base, i32 %vl) {
; CHECK-LABEL: test_vlseg3_nxv4i16_triscv.vector.tuple_nxv8i8_3t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma
-; CHECK-NEXT: vlseg3e16.v v7, (a0)
+; CHECK-NEXT: vlseg3e16.v v8, (a0)
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 3) @llvm.riscv.vlseg3.triscv.vector.tuple_nxv8i8_3t(target("riscv.vector.tuple", <vscale x 8 x i8>, 3) undef, ptr %base, i32 %vl, i32 4)
- %1 = call <vscale x 4 x i16> @llvm.riscv.tuple.extract.nxv4i16.triscv.vector.tuple_nxv8i8_3t(target("riscv.vector.tuple", <vscale x 8 x i8>, 3) %0, i32 1)
- ret <vscale x 4 x i16> %1
+ ret target("riscv.vector.tuple", <vscale x 8 x i8>, 3) %0
}
-
-define <vscale x 4 x i16> @test_vlseg3_mask_nxv4i16_triscv.vector.tuple_nxv8i8_3t(ptr %base, i32 %vl, <vscale x 4 x i1> %mask) {
+define target("riscv.vector.tuple", <vscale x 8 x i8>, 3) @test_vlseg3_mask_nxv4i16_triscv.vector.tuple_nxv8i8_3t(ptr %base, i32 %vl, <vscale x 4 x i1> %mask) {
; CHECK-LABEL: test_vlseg3_mask_nxv4i16_triscv.vector.tuple_nxv8i8_3t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma
-; CHECK-NEXT: vlseg3e16.v v7, (a0), v0.t
+; CHECK-NEXT: vlseg3e16.v v8, (a0), v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 3) @llvm.riscv.vlseg3.mask.triscv.vector.tuple_nxv8i8_3t.nxv4i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 3) undef, ptr %base, <vscale x 4 x i1> %mask, i32 %vl, i32 1, i32 4)
- %1 = call <vscale x 4 x i16> @llvm.riscv.tuple.extract.nxv4i16.triscv.vector.tuple_nxv8i8_3t(target("riscv.vector.tuple", <vscale x 8 x i8>, 3) %0, i32 1)
- ret <vscale x 4 x i16> %1
+ ret target("riscv.vector.tuple", <vscale x 8 x i8>, 3) %0
}
-
-declare target("riscv.vector.tuple", <vscale x 16 x i8>, 3) @llvm.riscv.vlseg3.mask.triscv.vector.tuple_nxv16i8_3t.nxv8i1(target("riscv.vector.tuple", <vscale x 16 x i8>, 3), ptr, <vscale x 8 x i1>, i32, i32, i32)
-
-define <vscale x 8 x i16> @test_vlseg3_nxv8i16_triscv.vector.tuple_nxv16i8_3t(ptr %base, i32 %vl) {
+define target("riscv.vector.tuple", <vscale x 16 x i8>, 3) @test_vlseg3_nxv8i16_triscv.vector.tuple_nxv16i8_3t(ptr %base, i32 %vl) {
; CHECK-LABEL: test_vlseg3_nxv8i16_triscv.vector.tuple_nxv16i8_3t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, ma
-; CHECK-NEXT: vlseg3e16.v v6, (a0)
+; CHECK-NEXT: vlseg3e16.v v8, (a0)
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 3) @llvm.riscv.vlseg3.triscv.vector.tuple_nxv16i8_3t(target("riscv.vector.tuple", <vscale x 16 x i8>, 3) undef, ptr %base, i32 %vl, i32 4)
- %1 = call <vscale x 8 x i16> @llvm.riscv.tuple.extract.nxv8i16.triscv.vector.tuple_nxv16i8_3t(target("riscv.vector.tuple", <vscale x 16 x i8>, 3) %0, i32 1)
- ret <vscale x 8 x i16> %1
+ ret target("riscv.vector.tuple", <vscale x 16 x i8>, 3) %0
}
-
-define <vscale x 8 x i16> @test_vlseg3_mask_nxv8i16_triscv.vector.tuple_nxv16i8_3t(ptr %base, i32 %vl, <vscale x 8 x i1> %mask) {
+define target("riscv.vector.tuple", <vscale x 16 x i8>, 3) @test_vlseg3_mask_nxv8i16_triscv.vector.tuple_nxv16i8_3t(ptr %base, i32 %vl, <vscale x 8 x i1> %mask) {
; CHECK-LABEL: test_vlseg3_mask_nxv8i16_triscv.vector.tuple_nxv16i8_3t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, ma
-; CHECK-NEXT: vlseg3e16.v v6, (a0), v0.t
+; CHECK-NEXT: vlseg3e16.v v8, (a0), v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 3) @llvm.riscv.vlseg3.mask.triscv.vector.tuple_nxv16i8_3t.nxv8i1(target("riscv.vector.tuple", <vscale x 16 x i8>, 3) undef, ptr %base, <vscale x 8 x i1> %mask, i32 %vl, i32 1, i32 4)
- %1 = call <vscale x 8 x i16> @llvm.riscv.tuple.extract.nxv8i16.triscv.vector.tuple_nxv16i8_3t(target("riscv.vector.tuple", <vscale x 16 x i8>, 3) %0, i32 1)
- ret <vscale x 8 x i16> %1
+ ret target("riscv.vector.tuple", <vscale x 16 x i8>, 3) %0
}
-
-declare target("riscv.vector.tuple", <vscale x 2 x i8>, 4) @llvm.riscv.vlseg4.mask.triscv.vector.tuple_nxv2i8_4t.nxv1i1(target("riscv.vector.tuple", <vscale x 2 x i8>, 4), ptr, <vscale x 1 x i1>, i32, i32, i32)
-
-define <vscale x 1 x i16> @test_vlseg4_nxv1i16_triscv.vector.tuple_nxv2i8_4t(ptr %base, i32 %vl) {
+define target("riscv.vector.tuple", <vscale x 2 x i8>, 4) @test_vlseg4_nxv1i16_triscv.vector.tuple_nxv2i8_4t(ptr %base, i32 %vl) {
; CHECK-LABEL: test_vlseg4_nxv1i16_triscv.vector.tuple_nxv2i8_4t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma
-; CHECK-NEXT: vlseg4e16.v v7, (a0)
+; CHECK-NEXT: vlseg4e16.v v8, (a0)
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 2 x i8>, 4) @llvm.riscv.vlseg4.triscv.vector.tuple_nxv2i8_4t(target("riscv.vector.tuple", <vscale x 2 x i8>, 4) undef, ptr %base, i32 %vl, i32 4)
- %1 = call <vscale x 1 x i16> @llvm.riscv.tuple.extract.nxv1i16.triscv.vector.tuple_nxv2i8_4t(target("riscv.vector.tuple", <vscale x 2 x i8>, 4) %0, i32 1)
- ret <vscale x 1 x i16> %1
+ ret target("riscv.vector.tuple", <vscale x 2 x i8>, 4) %0
}
-
-define <vscale x 1 x i16> @test_vlseg4_mask_nxv1i16_triscv.vector.tuple_nxv2i8_4t(ptr %base, i32 %vl, <vscale x 1 x i1> %mask) {
+define target("riscv.vector.tuple", <vscale x 2 x i8>, 4) @test_vlseg4_mask_nxv1i16_triscv.vector.tuple_nxv2i8_4t(ptr %base, i32 %vl, <vscale x 1 x i1> %mask) {
; CHECK-LABEL: test_vlseg4_mask_nxv1i16_triscv.vector.tuple_nxv2i8_4t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma
-; CHECK-NEXT: vlseg4e16.v v7, (a0), v0.t
+; CHECK-NEXT: vlseg4e16.v v8, (a0), v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 2 x i8>, 4) @llvm.riscv.vlseg4.mask.triscv.vector.tuple_nxv2i8_4t.nxv1i1(target("riscv.vector.tuple", <vscale x 2 x i8>, 4) undef, ptr %base, <vscale x 1 x i1> %mask, i32 %vl, i32 1, i32 4)
- %1 = call <vscale x 1 x i16> @llvm.riscv.tuple.extract.nxv1i16.triscv.vector.tuple_nxv2i8_4t(target("riscv.vector.tuple", <vscale x 2 x i8>, 4) %0, i32 1)
- ret <vscale x 1 x i16> %1
+ ret target("riscv.vector.tuple", <vscale x 2 x i8>, 4) %0
}
-
-declare target("riscv.vector.tuple", <vscale x 4 x i8>, 4) @llvm.riscv.vlseg4.mask.triscv.vector.tuple_nxv4i8_4t.nxv2i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 4), ptr, <vscale x 2 x i1>, i32, i32, i32)
-
-define <vscale x 2 x i16> @test_vlseg4_nxv2i16_triscv.vector.tuple_nxv4i8_4t(ptr %base, i32 %vl) {
+define target("riscv.vector.tuple", <vscale x 4 x i8>, 4) @test_vlseg4_nxv2i16_triscv.vector.tuple_nxv4i8_4t(ptr %base, i32 %vl) {
; CHECK-LABEL: test_vlseg4_nxv2i16_triscv.vector.tuple_nxv4i8_4t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma
-; CHECK-NEXT: vlseg4e16.v v7, (a0)
+; CHECK-NEXT: vlseg4e16.v v8, (a0)
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 4) @llvm.riscv.vlseg4.triscv.vector.tuple_nxv4i8_4t(target("riscv.vector.tuple", <vscale x 4 x i8>, 4) undef, ptr %base, i32 %vl, i32 4)
- %1 = call <vscale x 2 x i16> @llvm.riscv.tuple.extract.nxv2i16.triscv.vector.tuple_nxv4i8_4t(target("riscv.vector.tuple", <vscale x 4 x i8>, 4) %0, i32 1)
- ret <vscale x 2 x i16> %1
+ ret target("riscv.vector.tuple", <vscale x 4 x i8>, 4) %0
}
-
-define <vscale x 2 x i16> @test_vlseg4_mask_nxv2i16_triscv.vector.tuple_nxv4i8_4t(ptr %base, i32 %vl, <vscale x 2 x i1> %mask) {
+define target("riscv.vector.tuple", <vscale x 4 x i8>, 4) @test_vlseg4_mask_nxv2i16_triscv.vector.tuple_nxv4i8_4t(ptr %base, i32 %vl, <vscale x 2 x i1> %mask) {
; CHECK-LABEL: test_vlseg4_mask_nxv2i16_triscv.vector.tuple_nxv4i8_4t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma
-; CHECK-NEXT: vlseg4e16.v v7, (a0), v0.t
+; CHECK-NEXT: vlseg4e16.v v8, (a0), v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 4) @llvm.riscv.vlseg4.mask.triscv.vector.tuple_nxv4i8_4t.nxv2i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 4) undef, ptr %base, <vscale x 2 x i1> %mask, i32 %vl, i32 1, i32 4)
- %1 = call <vscale x 2 x i16> @llvm.riscv.tuple.extract.nxv2i16.triscv.vector.tuple_nxv4i8_4t(target("riscv.vector.tuple", <vscale x 4 x i8>, 4) %0, i32 1)
- ret <vscale x 2 x i16> %1
+ ret target("riscv.vector.tuple", <vscale x 4 x i8>, 4) %0
}
-
-declare target("riscv.vector.tuple", <vscale x 8 x i8>, 4) @llvm.riscv.vlseg4.mask.triscv.vector.tuple_nxv8i8_4t.nxv4i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 4), ptr, <vscale x 4 x i1>, i32, i32, i32)
-
-define <vscale x 4 x i16> @test_vlseg4_nxv4i16_triscv.vector.tuple_nxv8i8_4t(ptr %base, i32 %vl) {
+define target("riscv.vector.tuple", <vscale x 8 x i8>, 4) @test_vlseg4_nxv4i16_triscv.vector.tuple_nxv8i8_4t(ptr %base, i32 %vl) {
; CHECK-LABEL: test_vlseg4_nxv4i16_triscv.vector.tuple_nxv8i8_4t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma
-; CHECK-NEXT: vlseg4e16.v v7, (a0)
+; CHECK-NEXT: vlseg4e16.v v8, (a0)
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 4) @llvm.riscv.vlseg4.triscv.vector.tuple_nxv8i8_4t(target("riscv.vector.tuple", <vscale x 8 x i8>, 4) undef, ptr %base, i32 %vl, i32 4)
- %1 = call <vscale x 4 x i16> @llvm.riscv.tuple.extract.nxv4i16.triscv.vector.tuple_nxv8i8_4t(target("riscv.vector.tuple", <vscale x 8 x i8>, 4) %0, i32 1)
- ret <vscale x 4 x i16> %1
+ ret target("riscv.vector.tuple", <vscale x 8 x i8>, 4) %0
}
-
-define <vscale x 4 x i16> @test_vlseg4_mask_nxv4i16_triscv.vector.tuple_nxv8i8_4t(ptr %base, i32 %vl, <vscale x 4 x i1> %mask) {
+define target("riscv.vector.tuple", <vscale x 8 x i8>, 4) @test_vlseg4_mask_nxv4i16_triscv.vector.tuple_nxv8i8_4t(ptr %base, i32 %vl, <vscale x 4 x i1> %mask) {
; CHECK-LABEL: test_vlseg4_mask_nxv4i16_triscv.vector.tuple_nxv8i8_4t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma
-; CHECK-NEXT: vlseg4e16.v v7, (a0), v0.t
+; CHECK-NEXT: vlseg4e16.v v8, (a0), v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 4) @llvm.riscv.vlseg4.mask.triscv.vector.tuple_nxv8i8_4t.nxv4i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 4) undef, ptr %base, <vscale x 4 x i1> %mask, i32 %vl, i32 1, i32 4)
- %1 = call <vscale x 4 x i16> @llvm.riscv.tuple.extract.nxv4i16.triscv.vector.tuple_nxv8i8_4t(target("riscv.vector.tuple", <vscale x 8 x i8>, 4) %0, i32 1)
- ret <vscale x 4 x i16> %1
+ ret target("riscv.vector.tuple", <vscale x 8 x i8>, 4) %0
}
-
-declare target("riscv.vector.tuple", <vscale x 16 x i8>, 4) @llvm.riscv.vlseg4.mask.triscv.vector.tuple_nxv16i8_4t.nxv8i1(target("riscv.vector.tuple", <vscale x 16 x i8>, 4), ptr, <vscale x 8 x i1>, i32, i32, i32)
-
-define <vscale x 8 x i16> @test_vlseg4_nxv8i16_triscv.vector.tuple_nxv16i8_4t(ptr %base, i32 %vl) {
+define target("riscv.vector.tuple", <vscale x 16 x i8>, 4) @test_vlseg4_nxv8i16_triscv.vector.tuple_nxv16i8_4t(ptr %base, i32 %vl) {
; CHECK-LABEL: test_vlseg4_nxv8i16_triscv.vector.tuple_nxv16i8_4t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, ma
-; CHECK-NEXT: vlseg4e16.v v6, (a0)
+; CHECK-NEXT: vlseg4e16.v v8, (a0)
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 4) @llvm.riscv.vlseg4.triscv.vector.tuple_nxv16i8_4t(target("riscv.vector.tuple", <vscale x 16 x i8>, 4) undef, ptr %base, i32 %vl, i32 4)
- %1 = call <vscale x 8 x i16> @llvm.riscv.tuple.extract.nxv8i16.triscv.vector.tuple_nxv16i8_4t(target("riscv.vector.tuple", <vscale x 16 x i8>, 4) %0, i32 1)
- ret <vscale x 8 x i16> %1
+ ret target("riscv.vector.tuple", <vscale x 16 x i8>, 4) %0
}
-
-define <vscale x 8 x i16> @test_vlseg4_mask_nxv8i16_triscv.vector.tuple_nxv16i8_4t(ptr %base, i32 %vl, <vscale x 8 x i1> %mask) {
+define target("riscv.vector.tuple", <vscale x 16 x i8>, 4) @test_vlseg4_mask_nxv8i16_triscv.vector.tuple_nxv16i8_4t(ptr %base, i32 %vl, <vscale x 8 x i1> %mask) {
; CHECK-LABEL: test_vlseg4_mask_nxv8i16_triscv.vector.tuple_nxv16i8_4t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, ma
-; CHECK-NEXT: vlseg4e16.v v6, (a0), v0.t
+; CHECK-NEXT: vlseg4e16.v v8, (a0), v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 4) @llvm.riscv.vlseg4.mask.triscv.vector.tuple_nxv16i8_4t.nxv8i1(target("riscv.vector.tuple", <vscale x 16 x i8>, 4) undef, ptr %base, <vscale x 8 x i1> %mask, i32 %vl, i32 1, i32 4)
- %1 = call <vscale x 8 x i16> @llvm.riscv.tuple.extract.nxv8i16.triscv.vector.tuple_nxv16i8_4t(target("riscv.vector.tuple", <vscale x 16 x i8>, 4) %0, i32 1)
- ret <vscale x 8 x i16> %1
+ ret target("riscv.vector.tuple", <vscale x 16 x i8>, 4) %0
}
-
-declare target("riscv.vector.tuple", <vscale x 2 x i8>, 5) @llvm.riscv.vlseg5.mask.triscv.vector.tuple_nxv2i8_5t.nxv1i1(target("riscv.vector.tuple", <vscale x 2 x i8>, 5), ptr, <vscale x 1 x i1>, i32, i32, i32)
-
-define <vscale x 1 x i16> @test_vlseg5_nxv1i16_triscv.vector.tuple_nxv2i8_5t(ptr %base, i32 %vl) {
+define target("riscv.vector.tuple", <vscale x 2 x i8>, 5) @test_vlseg5_nxv1i16_triscv.vector.tuple_nxv2i8_5t(ptr %base, i32 %vl) {
; CHECK-LABEL: test_vlseg5_nxv1i16_triscv.vector.tuple_nxv2i8_5t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma
-; CHECK-NEXT: vlseg5e16.v v7, (a0)
+; CHECK-NEXT: vlseg5e16.v v8, (a0)
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 2 x i8>, 5) @llvm.riscv.vlseg5.triscv.vector.tuple_nxv2i8_5t(target("riscv.vector.tuple", <vscale x 2 x i8>, 5) undef, ptr %base, i32 %vl, i32 4)
- %1 = call <vscale x 1 x i16> @llvm.riscv.tuple.extract.nxv1i16.triscv.vector.tuple_nxv2i8_5t(target("riscv.vector.tuple", <vscale x 2 x i8>, 5) %0, i32 1)
- ret <vscale x 1 x i16> %1
+ ret target("riscv.vector.tuple", <vscale x 2 x i8>, 5) %0
}
-
-define <vscale x 1 x i16> @test_vlseg5_mask_nxv1i16_triscv.vector.tuple_nxv2i8_5t(ptr %base, i32 %vl, <vscale x 1 x i1> %mask) {
+define target("riscv.vector.tuple", <vscale x 2 x i8>, 5) @test_vlseg5_mask_nxv1i16_triscv.vector.tuple_nxv2i8_5t(ptr %base, i32 %vl, <vscale x 1 x i1> %mask) {
; CHECK-LABEL: test_vlseg5_mask_nxv1i16_triscv.vector.tuple_nxv2i8_5t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma
-; CHECK-NEXT: vlseg5e16.v v7, (a0), v0.t
+; CHECK-NEXT: vlseg5e16.v v8, (a0), v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 2 x i8>, 5) @llvm.riscv.vlseg5.mask.triscv.vector.tuple_nxv2i8_5t.nxv1i1(target("riscv.vector.tuple", <vscale x 2 x i8>, 5) undef, ptr %base, <vscale x 1 x i1> %mask, i32 %vl, i32 1, i32 4)
- %1 = call <vscale x 1 x i16> @llvm.riscv.tuple.extract.nxv1i16.triscv.vector.tuple_nxv2i8_5t(target("riscv.vector.tuple", <vscale x 2 x i8>, 5) %0, i32 1)
- ret <vscale x 1 x i16> %1
+ ret target("riscv.vector.tuple", <vscale x 2 x i8>, 5) %0
}
-
-declare target("riscv.vector.tuple", <vscale x 4 x i8>, 5) @llvm.riscv.vlseg5.mask.triscv.vector.tuple_nxv4i8_5t.nxv2i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 5), ptr, <vscale x 2 x i1>, i32, i32, i32)
-
-define <vscale x 2 x i16> @test_vlseg5_nxv2i16_triscv.vector.tuple_nxv4i8_5t(ptr %base, i32 %vl) {
+define target("riscv.vector.tuple", <vscale x 4 x i8>, 5) @test_vlseg5_nxv2i16_triscv.vector.tuple_nxv4i8_5t(ptr %base, i32 %vl) {
; CHECK-LABEL: test_vlseg5_nxv2i16_triscv.vector.tuple_nxv4i8_5t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma
-; CHECK-NEXT: vlseg5e16.v v7, (a0)
+; CHECK-NEXT: vlseg5e16.v v8, (a0)
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 5) @llvm.riscv.vlseg5.triscv.vector.tuple_nxv4i8_5t(target("riscv.vector.tuple", <vscale x 4 x i8>, 5) undef, ptr %base, i32 %vl, i32 4)
- %1 = call <vscale x 2 x i16> @llvm.riscv.tuple.extract.nxv2i16.triscv.vector.tuple_nxv4i8_5t(target("riscv.vector.tuple", <vscale x 4 x i8>, 5) %0, i32 1)
- ret <vscale x 2 x i16> %1
+ ret target("riscv.vector.tuple", <vscale x 4 x i8>, 5) %0
}
-
-define <vscale x 2 x i16> @test_vlseg5_mask_nxv2i16_triscv.vector.tuple_nxv4i8_5t(ptr %base, i32 %vl, <vscale x 2 x i1> %mask) {
+define target("riscv.vector.tuple", <vscale x 4 x i8>, 5) @test_vlseg5_mask_nxv2i16_triscv.vector.tuple_nxv4i8_5t(ptr %base, i32 %vl, <vscale x 2 x i1> %mask) {
; CHECK-LABEL: test_vlseg5_mask_nxv2i16_triscv.vector.tuple_nxv4i8_5t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma
-; CHECK-NEXT: vlseg5e16.v v7, (a0), v0.t
+; CHECK-NEXT: vlseg5e16.v v8, (a0), v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 5) @llvm.riscv.vlseg5.mask.triscv.vector.tuple_nxv4i8_5t.nxv2i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 5) undef, ptr %base, <vscale x 2 x i1> %mask, i32 %vl, i32 1, i32 4)
- %1 = call <vscale x 2 x i16> @llvm.riscv.tuple.extract.nxv2i16.triscv.vector.tuple_nxv4i8_5t(target("riscv.vector.tuple", <vscale x 4 x i8>, 5) %0, i32 1)
- ret <vscale x 2 x i16> %1
+ ret target("riscv.vector.tuple", <vscale x 4 x i8>, 5) %0
}
-
-declare target("riscv.vector.tuple", <vscale x 8 x i8>, 5) @llvm.riscv.vlseg5.mask.triscv.vector.tuple_nxv8i8_5t.nxv4i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 5), ptr, <vscale x 4 x i1>, i32, i32, i32)
-
-define <vscale x 4 x i16> @test_vlseg5_nxv4i16_triscv.vector.tuple_nxv8i8_5t(ptr %base, i32 %vl) {
+define target("riscv.vector.tuple", <vscale x 8 x i8>, 5) @test_vlseg5_nxv4i16_triscv.vector.tuple_nxv8i8_5t(ptr %base, i32 %vl) {
; CHECK-LABEL: test_vlseg5_nxv4i16_triscv.vector.tuple_nxv8i8_5t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma
-; CHECK-NEXT: vlseg5e16.v v7, (a0)
+; CHECK-NEXT: vlseg5e16.v v8, (a0)
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 5) @llvm.riscv.vlseg5.triscv.vector.tuple_nxv8i8_5t(target("riscv.vector.tuple", <vscale x 8 x i8>, 5) undef, ptr %base, i32 %vl, i32 4)
- %1 = call <vscale x 4 x i16> @llvm.riscv.tuple.extract.nxv4i16.triscv.vector.tuple_nxv8i8_5t(target("riscv.vector.tuple", <vscale x 8 x i8>, 5) %0, i32 1)
- ret <vscale x 4 x i16> %1
+ ret target("riscv.vector.tuple", <vscale x 8 x i8>, 5) %0
}
-
-define <vscale x 4 x i16> @test_vlseg5_mask_nxv4i16_triscv.vector.tuple_nxv8i8_5t(ptr %base, i32 %vl, <vscale x 4 x i1> %mask) {
+define target("riscv.vector.tuple", <vscale x 8 x i8>, 5) @test_vlseg5_mask_nxv4i16_triscv.vector.tuple_nxv8i8_5t(ptr %base, i32 %vl, <vscale x 4 x i1> %mask) {
; CHECK-LABEL: test_vlseg5_mask_nxv4i16_triscv.vector.tuple_nxv8i8_5t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma
-; CHECK-NEXT: vlseg5e16.v v7, (a0), v0.t
+; CHECK-NEXT: vlseg5e16.v v8, (a0), v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 5) @llvm.riscv.vlseg5.mask.triscv.vector.tuple_nxv8i8_5t.nxv4i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 5) undef, ptr %base, <vscale x 4 x i1> %mask, i32 %vl, i32 1, i32 4)
- %1 = call <vscale x 4 x i16> @llvm.riscv.tuple.extract.nxv4i16.triscv.vector.tuple_nxv8i8_5t(target("riscv.vector.tuple", <vscale x 8 x i8>, 5) %0, i32 1)
- ret <vscale x 4 x i16> %1
+ ret target("riscv.vector.tuple", <vscale x 8 x i8>, 5) %0
}
-
-declare target("riscv.vector.tuple", <vscale x 2 x i8>, 6) @llvm.riscv.vlseg6.mask.triscv.vector.tuple_nxv2i8_6t.nxv1i1(target("riscv.vector.tuple", <vscale x 2 x i8>, 6), ptr, <vscale x 1 x i1>, i32, i32, i32)
-
-define <vscale x 1 x i16> @test_vlseg6_nxv1i16_triscv.vector.tuple_nxv2i8_6t(ptr %base, i32 %vl) {
+define target("riscv.vector.tuple", <vscale x 2 x i8>, 6) @test_vlseg6_nxv1i16_triscv.vector.tuple_nxv2i8_6t(ptr %base, i32 %vl) {
; CHECK-LABEL: test_vlseg6_nxv1i16_triscv.vector.tuple_nxv2i8_6t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma
-; CHECK-NEXT: vlseg6e16.v v7, (a0)
+; CHECK-NEXT: vlseg6e16.v v8, (a0)
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 2 x i8>, 6) @llvm.riscv.vlseg6.triscv.vector.tuple_nxv2i8_6t(target("riscv.vector.tuple", <vscale x 2 x i8>, 6) undef, ptr %base, i32 %vl, i32 4)
- %1 = call <vscale x 1 x i16> @llvm.riscv.tuple.extract.nxv1i16.triscv.vector.tuple_nxv2i8_6t(target("riscv.vector.tuple", <vscale x 2 x i8>, 6) %0, i32 1)
- ret <vscale x 1 x i16> %1
+ ret target("riscv.vector.tuple", <vscale x 2 x i8>, 6) %0
}
-
-define <vscale x 1 x i16> @test_vlseg6_mask_nxv1i16_triscv.vector.tuple_nxv2i8_6t(ptr %base, i32 %vl, <vscale x 1 x i1> %mask) {
+define target("riscv.vector.tuple", <vscale x 2 x i8>, 6) @test_vlseg6_mask_nxv1i16_triscv.vector.tuple_nxv2i8_6t(ptr %base, i32 %vl, <vscale x 1 x i1> %mask) {
; CHECK-LABEL: test_vlseg6_mask_nxv1i16_triscv.vector.tuple_nxv2i8_6t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma
-; CHECK-NEXT: vlseg6e16.v v7, (a0), v0.t
+; CHECK-NEXT: vlseg6e16.v v8, (a0), v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 2 x i8>, 6) @llvm.riscv.vlseg6.mask.triscv.vector.tuple_nxv2i8_6t.nxv1i1(target("riscv.vector.tuple", <vscale x 2 x i8>, 6) undef, ptr %base, <vscale x 1 x i1> %mask, i32 %vl, i32 1, i32 4)
- %1 = call <vscale x 1 x i16> @llvm.riscv.tuple.extract.nxv1i16.triscv.vector.tuple_nxv2i8_6t(target("riscv.vector.tuple", <vscale x 2 x i8>, 6) %0, i32 1)
- ret <vscale x 1 x i16> %1
+ ret target("riscv.vector.tuple", <vscale x 2 x i8>, 6) %0
}
-
-declare target("riscv.vector.tuple", <vscale x 4 x i8>, 6) @llvm.riscv.vlseg6.mask.triscv.vector.tuple_nxv4i8_6t.nxv2i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 6), ptr, <vscale x 2 x i1>, i32, i32, i32)
-
-define <vscale x 2 x i16> @test_vlseg6_nxv2i16_triscv.vector.tuple_nxv4i8_6t(ptr %base, i32 %vl) {
+define target("riscv.vector.tuple", <vscale x 4 x i8>, 6) @test_vlseg6_nxv2i16_triscv.vector.tuple_nxv4i8_6t(ptr %base, i32 %vl) {
; CHECK-LABEL: test_vlseg6_nxv2i16_triscv.vector.tuple_nxv4i8_6t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma
-; CHECK-NEXT: vlseg6e16.v v7, (a0)
+; CHECK-NEXT: vlseg6e16.v v8, (a0)
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 6) @llvm.riscv.vlseg6.triscv.vector.tuple_nxv4i8_6t(target("riscv.vector.tuple", <vscale x 4 x i8>, 6) undef, ptr %base, i32 %vl, i32 4)
- %1 = call <vscale x 2 x i16> @llvm.riscv.tuple.extract.nxv2i16.triscv.vector.tuple_nxv4i8_6t(target("riscv.vector.tuple", <vscale x 4 x i8>, 6) %0, i32 1)
- ret <vscale x 2 x i16> %1
+ ret target("riscv.vector.tuple", <vscale x 4 x i8>, 6) %0
}
-
-define <vscale x 2 x i16> @test_vlseg6_mask_nxv2i16_triscv.vector.tuple_nxv4i8_6t(ptr %base, i32 %vl, <vscale x 2 x i1> %mask) {
+define target("riscv.vector.tuple", <vscale x 4 x i8>, 6) @test_vlseg6_mask_nxv2i16_triscv.vector.tuple_nxv4i8_6t(ptr %base, i32 %vl, <vscale x 2 x i1> %mask) {
; CHECK-LABEL: test_vlseg6_mask_nxv2i16_triscv.vector.tuple_nxv4i8_6t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma
-; CHECK-NEXT: vlseg6e16.v v7, (a0), v0.t
+; CHECK-NEXT: vlseg6e16.v v8, (a0), v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 6) @llvm.riscv.vlseg6.mask.triscv.vector.tuple_nxv4i8_6t.nxv2i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 6) undef, ptr %base, <vscale x 2 x i1> %mask, i32 %vl, i32 1, i32 4)
- %1 = call <vscale x 2 x i16> @llvm.riscv.tuple.extract.nxv2i16.triscv.vector.tuple_nxv4i8_6t(target("riscv.vector.tuple", <vscale x 4 x i8>, 6) %0, i32 1)
- ret <vscale x 2 x i16> %1
+ ret target("riscv.vector.tuple", <vscale x 4 x i8>, 6) %0
}
-
-declare target("riscv.vector.tuple", <vscale x 8 x i8>, 6) @llvm.riscv.vlseg6.mask.triscv.vector.tuple_nxv8i8_6t.nxv4i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 6), ptr, <vscale x 4 x i1>, i32, i32, i32)
-
-define <vscale x 4 x i16> @test_vlseg6_nxv4i16_triscv.vector.tuple_nxv8i8_6t(ptr %base, i32 %vl) {
+define target("riscv.vector.tuple", <vscale x 8 x i8>, 6) @test_vlseg6_nxv4i16_triscv.vector.tuple_nxv8i8_6t(ptr %base, i32 %vl) {
; CHECK-LABEL: test_vlseg6_nxv4i16_triscv.vector.tuple_nxv8i8_6t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma
-; CHECK-NEXT: vlseg6e16.v v7, (a0)
+; CHECK-NEXT: vlseg6e16.v v8, (a0)
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 6) @llvm.riscv.vlseg6.triscv.vector.tuple_nxv8i8_6t(target("riscv.vector.tuple", <vscale x 8 x i8>, 6) undef, ptr %base, i32 %vl, i32 4)
- %1 = call <vscale x 4 x i16> @llvm.riscv.tuple.extract.nxv4i16.triscv.vector.tuple_nxv8i8_6t(target("riscv.vector.tuple", <vscale x 8 x i8>, 6) %0, i32 1)
- ret <vscale x 4 x i16> %1
+ ret target("riscv.vector.tuple", <vscale x 8 x i8>, 6) %0
}
-
-define <vscale x 4 x i16> @test_vlseg6_mask_nxv4i16_triscv.vector.tuple_nxv8i8_6t(ptr %base, i32 %vl, <vscale x 4 x i1> %mask) {
+define target("riscv.vector.tuple", <vscale x 8 x i8>, 6) @test_vlseg6_mask_nxv4i16_triscv.vector.tuple_nxv8i8_6t(ptr %base, i32 %vl, <vscale x 4 x i1> %mask) {
; CHECK-LABEL: test_vlseg6_mask_nxv4i16_triscv.vector.tuple_nxv8i8_6t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma
-; CHECK-NEXT: vlseg6e16.v v7, (a0), v0.t
+; CHECK-NEXT: vlseg6e16.v v8, (a0), v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 6) @llvm.riscv.vlseg6.mask.triscv.vector.tuple_nxv8i8_6t.nxv4i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 6) undef, ptr %base, <vscale x 4 x i1> %mask, i32 %vl, i32 1, i32 4)
- %1 = call <vscale x 4 x i16> @llvm.riscv.tuple.extract.nxv4i16.triscv.vector.tuple_nxv8i8_6t(target("riscv.vector.tuple", <vscale x 8 x i8>, 6) %0, i32 1)
- ret <vscale x 4 x i16> %1
+ ret target("riscv.vector.tuple", <vscale x 8 x i8>, 6) %0
}
-
-declare target("riscv.vector.tuple", <vscale x 2 x i8>, 7) @llvm.riscv.vlseg7.mask.triscv.vector.tuple_nxv2i8_7t.nxv1i1(target("riscv.vector.tuple", <vscale x 2 x i8>, 7), ptr, <vscale x 1 x i1>, i32, i32, i32)
-
-define <vscale x 1 x i16> @test_vlseg7_nxv1i16_triscv.vector.tuple_nxv2i8_7t(ptr %base, i32 %vl) {
+define target("riscv.vector.tuple", <vscale x 2 x i8>, 7) @test_vlseg7_nxv1i16_triscv.vector.tuple_nxv2i8_7t(ptr %base, i32 %vl) {
; CHECK-LABEL: test_vlseg7_nxv1i16_triscv.vector.tuple_nxv2i8_7t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma
-; CHECK-NEXT: vlseg7e16.v v7, (a0)
+; CHECK-NEXT: vlseg7e16.v v8, (a0)
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 2 x i8>, 7) @llvm.riscv.vlseg7.triscv.vector.tuple_nxv2i8_7t(target("riscv.vector.tuple", <vscale x 2 x i8>, 7) undef, ptr %base, i32 %vl, i32 4)
- %1 = call <vscale x 1 x i16> @llvm.riscv.tuple.extract.nxv1i16.triscv.vector.tuple_nxv2i8_7t(target("riscv.vector.tuple", <vscale x 2 x i8>, 7) %0, i32 1)
- ret <vscale x 1 x i16> %1
+ ret target("riscv.vector.tuple", <vscale x 2 x i8>, 7) %0
}
-
-define <vscale x 1 x i16> @test_vlseg7_mask_nxv1i16_triscv.vector.tuple_nxv2i8_7t(ptr %base, i32 %vl, <vscale x 1 x i1> %mask) {
+define target("riscv.vector.tuple", <vscale x 2 x i8>, 7) @test_vlseg7_mask_nxv1i16_triscv.vector.tuple_nxv2i8_7t(ptr %base, i32 %vl, <vscale x 1 x i1> %mask) {
; CHECK-LABEL: test_vlseg7_mask_nxv1i16_triscv.vector.tuple_nxv2i8_7t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma
-; CHECK-NEXT: vlseg7e16.v v7, (a0), v0.t
+; CHECK-NEXT: vlseg7e16.v v8, (a0), v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 2 x i8>, 7) @llvm.riscv.vlseg7.mask.triscv.vector.tuple_nxv2i8_7t.nxv1i1(target("riscv.vector.tuple", <vscale x 2 x i8>, 7) undef, ptr %base, <vscale x 1 x i1> %mask, i32 %vl, i32 1, i32 4)
- %1 = call <vscale x 1 x i16> @llvm.riscv.tuple.extract.nxv1i16.triscv.vector.tuple_nxv2i8_7t(target("riscv.vector.tuple", <vscale x 2 x i8>, 7) %0, i32 1)
- ret <vscale x 1 x i16> %1
+ ret target("riscv.vector.tuple", <vscale x 2 x i8>, 7) %0
}
-
-declare target("riscv.vector.tuple", <vscale x 4 x i8>, 7) @llvm.riscv.vlseg7.mask.triscv.vector.tuple_nxv4i8_7t.nxv2i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 7), ptr, <vscale x 2 x i1>, i32, i32, i32)
-
-define <vscale x 2 x i16> @test_vlseg7_nxv2i16_triscv.vector.tuple_nxv4i8_7t(ptr %base, i32 %vl) {
+define target("riscv.vector.tuple", <vscale x 4 x i8>, 7) @test_vlseg7_nxv2i16_triscv.vector.tuple_nxv4i8_7t(ptr %base, i32 %vl) {
; CHECK-LABEL: test_vlseg7_nxv2i16_triscv.vector.tuple_nxv4i8_7t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma
-; CHECK-NEXT: vlseg7e16.v v7, (a0)
+; CHECK-NEXT: vlseg7e16.v v8, (a0)
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 7) @llvm.riscv.vlseg7.triscv.vector.tuple_nxv4i8_7t(target("riscv.vector.tuple", <vscale x 4 x i8>, 7) undef, ptr %base, i32 %vl, i32 4)
- %1 = call <vscale x 2 x i16> @llvm.riscv.tuple.extract.nxv2i16.triscv.vector.tuple_nxv4i8_7t(target("riscv.vector.tuple", <vscale x 4 x i8>, 7) %0, i32 1)
- ret <vscale x 2 x i16> %1
+ ret target("riscv.vector.tuple", <vscale x 4 x i8>, 7) %0
}
-
-define <vscale x 2 x i16> @test_vlseg7_mask_nxv2i16_triscv.vector.tuple_nxv4i8_7t(ptr %base, i32 %vl, <vscale x 2 x i1> %mask) {
+define target("riscv.vector.tuple", <vscale x 4 x i8>, 7) @test_vlseg7_mask_nxv2i16_triscv.vector.tuple_nxv4i8_7t(ptr %base, i32 %vl, <vscale x 2 x i1> %mask) {
; CHECK-LABEL: test_vlseg7_mask_nxv2i16_triscv.vector.tuple_nxv4i8_7t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma
-; CHECK-NEXT: vlseg7e16.v v7, (a0), v0.t
+; CHECK-NEXT: vlseg7e16.v v8, (a0), v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 7) @llvm.riscv.vlseg7.mask.triscv.vector.tuple_nxv4i8_7t.nxv2i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 7) undef, ptr %base, <vscale x 2 x i1> %mask, i32 %vl, i32 1, i32 4)
- %1 = call <vscale x 2 x i16> @llvm.riscv.tuple.extract.nxv2i16.triscv.vector.tuple_nxv4i8_7t(target("riscv.vector.tuple", <vscale x 4 x i8>, 7) %0, i32 1)
- ret <vscale x 2 x i16> %1
+ ret target("riscv.vector.tuple", <vscale x 4 x i8>, 7) %0
}
-
-declare target("riscv.vector.tuple", <vscale x 8 x i8>, 7) @llvm.riscv.vlseg7.mask.triscv.vector.tuple_nxv8i8_7t.nxv4i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 7), ptr, <vscale x 4 x i1>, i32, i32, i32)
-
-define <vscale x 4 x i16> @test_vlseg7_nxv4i16_triscv.vector.tuple_nxv8i8_7t(ptr %base, i32 %vl) {
+define target("riscv.vector.tuple", <vscale x 8 x i8>, 7) @test_vlseg7_nxv4i16_triscv.vector.tuple_nxv8i8_7t(ptr %base, i32 %vl) {
; CHECK-LABEL: test_vlseg7_nxv4i16_triscv.vector.tuple_nxv8i8_7t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma
-; CHECK-NEXT: vlseg7e16.v v7, (a0)
+; CHECK-NEXT: vlseg7e16.v v8, (a0)
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 7) @llvm.riscv.vlseg7.triscv.vector.tuple_nxv8i8_7t(target("riscv.vector.tuple", <vscale x 8 x i8>, 7) undef, ptr %base, i32 %vl, i32 4)
- %1 = call <vscale x 4 x i16> @llvm.riscv.tuple.extract.nxv4i16.triscv.vector.tuple_nxv8i8_7t(target("riscv.vector.tuple", <vscale x 8 x i8>, 7) %0, i32 1)
- ret <vscale x 4 x i16> %1
+ ret target("riscv.vector.tuple", <vscale x 8 x i8>, 7) %0
}
-
-define <vscale x 4 x i16> @test_vlseg7_mask_nxv4i16_triscv.vector.tuple_nxv8i8_7t(ptr %base, i32 %vl, <vscale x 4 x i1> %mask) {
+define target("riscv.vector.tuple", <vscale x 8 x i8>, 7) @test_vlseg7_mask_nxv4i16_triscv.vector.tuple_nxv8i8_7t(ptr %base, i32 %vl, <vscale x 4 x i1> %mask) {
; CHECK-LABEL: test_vlseg7_mask_nxv4i16_triscv.vector.tuple_nxv8i8_7t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma
-; CHECK-NEXT: vlseg7e16.v v7, (a0), v0.t
+; CHECK-NEXT: vlseg7e16.v v8, (a0), v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 7) @llvm.riscv.vlseg7.mask.triscv.vector.tuple_nxv8i8_7t.nxv4i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 7) undef, ptr %base, <vscale x 4 x i1> %mask, i32 %vl, i32 1, i32 4)
- %1 = call <vscale x 4 x i16> @llvm.riscv.tuple.extract.nxv4i16.triscv.vector.tuple_nxv8i8_7t(target("riscv.vector.tuple", <vscale x 8 x i8>, 7) %0, i32 1)
- ret <vscale x 4 x i16> %1
+ ret target("riscv.vector.tuple", <vscale x 8 x i8>, 7) %0
}
-
-declare target("riscv.vector.tuple", <vscale x 2 x i8>, 8) @llvm.riscv.vlseg8.mask.triscv.vector.tuple_nxv2i8_8t.nxv1i1(target("riscv.vector.tuple", <vscale x 2 x i8>, 8), ptr, <vscale x 1 x i1>, i32, i32, i32)
-
-define <vscale x 1 x i16> @test_vlseg8_nxv1i16_triscv.vector.tuple_nxv2i8_8t(ptr %base, i32 %vl) {
+define target("riscv.vector.tuple", <vscale x 2 x i8>, 8) @test_vlseg8_nxv1i16_triscv.vector.tuple_nxv2i8_8t(ptr %base, i32 %vl) {
; CHECK-LABEL: test_vlseg8_nxv1i16_triscv.vector.tuple_nxv2i8_8t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma
-; CHECK-NEXT: vlseg8e16.v v7, (a0)
+; CHECK-NEXT: vlseg8e16.v v8, (a0)
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 2 x i8>, 8) @llvm.riscv.vlseg8.triscv.vector.tuple_nxv2i8_8t(target("riscv.vector.tuple", <vscale x 2 x i8>, 8) undef, ptr %base, i32 %vl, i32 4)
- %1 = call <vscale x 1 x i16> @llvm.riscv.tuple.extract.nxv1i16.triscv.vector.tuple_nxv2i8_8t(target("riscv.vector.tuple", <vscale x 2 x i8>, 8) %0, i32 1)
- ret <vscale x 1 x i16> %1
+ ret target("riscv.vector.tuple", <vscale x 2 x i8>, 8) %0
}
-
-define <vscale x 1 x i16> @test_vlseg8_mask_nxv1i16_triscv.vector.tuple_nxv2i8_8t(ptr %base, i32 %vl, <vscale x 1 x i1> %mask) {
+define target("riscv.vector.tuple", <vscale x 2 x i8>, 8) @test_vlseg8_mask_nxv1i16_triscv.vector.tuple_nxv2i8_8t(ptr %base, i32 %vl, <vscale x 1 x i1> %mask) {
; CHECK-LABEL: test_vlseg8_mask_nxv1i16_triscv.vector.tuple_nxv2i8_8t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma
-; CHECK-NEXT: vlseg8e16.v v7, (a0), v0.t
+; CHECK-NEXT: vlseg8e16.v v8, (a0), v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 2 x i8>, 8) @llvm.riscv.vlseg8.mask.triscv.vector.tuple_nxv2i8_8t.nxv1i1(target("riscv.vector.tuple", <vscale x 2 x i8>, 8) undef, ptr %base, <vscale x 1 x i1> %mask, i32 %vl, i32 1, i32 4)
- %1 = call <vscale x 1 x i16> @llvm.riscv.tuple.extract.nxv1i16.triscv.vector.tuple_nxv2i8_8t(target("riscv.vector.tuple", <vscale x 2 x i8>, 8) %0, i32 1)
- ret <vscale x 1 x i16> %1
+ ret target("riscv.vector.tuple", <vscale x 2 x i8>, 8) %0
}
-
-declare target("riscv.vector.tuple", <vscale x 4 x i8>, 8) @llvm.riscv.vlseg8.mask.triscv.vector.tuple_nxv4i8_8t.nxv2i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 8), ptr, <vscale x 2 x i1>, i32, i32, i32)
-
-define <vscale x 2 x i16> @test_vlseg8_nxv2i16_triscv.vector.tuple_nxv4i8_8t(ptr %base, i32 %vl) {
+define target("riscv.vector.tuple", <vscale x 4 x i8>, 8) @test_vlseg8_nxv2i16_triscv.vector.tuple_nxv4i8_8t(ptr %base, i32 %vl) {
; CHECK-LABEL: test_vlseg8_nxv2i16_triscv.vector.tuple_nxv4i8_8t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma
-; CHECK-NEXT: vlseg8e16.v v7, (a0)
+; CHECK-NEXT: vlseg8e16.v v8, (a0)
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 8) @llvm.riscv.vlseg8.triscv.vector.tuple_nxv4i8_8t(target("riscv.vector.tuple", <vscale x 4 x i8>, 8) undef, ptr %base, i32 %vl, i32 4)
- %1 = call <vscale x 2 x i16> @llvm.riscv.tuple.extract.nxv2i16.triscv.vector.tuple_nxv4i8_8t(target("riscv.vector.tuple", <vscale x 4 x i8>, 8) %0, i32 1)
- ret <vscale x 2 x i16> %1
+ ret target("riscv.vector.tuple", <vscale x 4 x i8>, 8) %0
}
-
-define <vscale x 2 x i16> @test_vlseg8_mask_nxv2i16_triscv.vector.tuple_nxv4i8_8t(ptr %base, i32 %vl, <vscale x 2 x i1> %mask) {
+define target("riscv.vector.tuple", <vscale x 4 x i8>, 8) @test_vlseg8_mask_nxv2i16_triscv.vector.tuple_nxv4i8_8t(ptr %base, i32 %vl, <vscale x 2 x i1> %mask) {
; CHECK-LABEL: test_vlseg8_mask_nxv2i16_triscv.vector.tuple_nxv4i8_8t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma
-; CHECK-NEXT: vlseg8e16.v v7, (a0), v0.t
+; CHECK-NEXT: vlseg8e16.v v8, (a0), v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 8) @llvm.riscv.vlseg8.mask.triscv.vector.tuple_nxv4i8_8t.nxv2i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 8) undef, ptr %base, <vscale x 2 x i1> %mask, i32 %vl, i32 1, i32 4)
- %1 = call <vscale x 2 x i16> @llvm.riscv.tuple.extract.nxv2i16.triscv.vector.tuple_nxv4i8_8t(target("riscv.vector.tuple", <vscale x 4 x i8>, 8) %0, i32 1)
- ret <vscale x 2 x i16> %1
+ ret target("riscv.vector.tuple", <vscale x 4 x i8>, 8) %0
}
-
-declare target("riscv.vector.tuple", <vscale x 8 x i8>, 8) @llvm.riscv.vlseg8.mask.triscv.vector.tuple_nxv8i8_8t.nxv4i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 8), ptr, <vscale x 4 x i1>, i32, i32, i32)
-
-define <vscale x 4 x i16> @test_vlseg8_nxv4i16_triscv.vector.tuple_nxv8i8_8t(ptr %base, i32 %vl) {
+define target("riscv.vector.tuple", <vscale x 8 x i8>, 8) @test_vlseg8_nxv4i16_triscv.vector.tuple_nxv8i8_8t(ptr %base, i32 %vl) {
; CHECK-LABEL: test_vlseg8_nxv4i16_triscv.vector.tuple_nxv8i8_8t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma
-; CHECK-NEXT: vlseg8e16.v v7, (a0)
+; CHECK-NEXT: vlseg8e16.v v8, (a0)
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 8) @llvm.riscv.vlseg8.triscv.vector.tuple_nxv8i8_8t(target("riscv.vector.tuple", <vscale x 8 x i8>, 8) undef, ptr %base, i32 %vl, i32 4)
- %1 = call <vscale x 4 x i16> @llvm.riscv.tuple.extract.nxv4i16.triscv.vector.tuple_nxv8i8_8t(target("riscv.vector.tuple", <vscale x 8 x i8>, 8) %0, i32 1)
- ret <vscale x 4 x i16> %1
+ ret target("riscv.vector.tuple", <vscale x 8 x i8>, 8) %0
}
-
-define <vscale x 4 x i16> @test_vlseg8_mask_nxv4i16_triscv.vector.tuple_nxv8i8_8t(ptr %base, i32 %vl, <vscale x 4 x i1> %mask) {
+define target("riscv.vector.tuple", <vscale x 8 x i8>, 8) @test_vlseg8_mask_nxv4i16_triscv.vector.tuple_nxv8i8_8t(ptr %base, i32 %vl, <vscale x 4 x i1> %mask) {
; CHECK-LABEL: test_vlseg8_mask_nxv4i16_triscv.vector.tuple_nxv8i8_8t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma
-; CHECK-NEXT: vlseg8e16.v v7, (a0), v0.t
+; CHECK-NEXT: vlseg8e16.v v8, (a0), v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 8) @llvm.riscv.vlseg8.mask.triscv.vector.tuple_nxv8i8_8t.nxv4i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 8) undef, ptr %base, <vscale x 4 x i1> %mask, i32 %vl, i32 1, i32 4)
- %1 = call <vscale x 4 x i16> @llvm.riscv.tuple.extract.nxv4i16.triscv.vector.tuple_nxv8i8_8t(target("riscv.vector.tuple", <vscale x 8 x i8>, 8) %0, i32 1)
- ret <vscale x 4 x i16> %1
+ ret target("riscv.vector.tuple", <vscale x 8 x i8>, 8) %0
}
-
-declare target("riscv.vector.tuple", <vscale x 4 x i8>, 2) @llvm.riscv.vlseg2.mask.triscv.vector.tuple_nxv4i8_2t.nxv1i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 2), ptr, <vscale x 1 x i1>, i32, i32, i32)
-
-define <vscale x 1 x i32> @test_vlseg2_nxv1i32_triscv.vector.tuple_nxv4i8_2t(ptr %base, i32 %vl) {
+define target("riscv.vector.tuple", <vscale x 4 x i8>, 2) @test_vlseg2_nxv1i32_triscv.vector.tuple_nxv4i8_2t(ptr %base, i32 %vl) {
; CHECK-LABEL: test_vlseg2_nxv1i32_triscv.vector.tuple_nxv4i8_2t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma
-; CHECK-NEXT: vlseg2e32.v v7, (a0)
+; CHECK-NEXT: vlseg2e32.v v8, (a0)
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 2) @llvm.riscv.vlseg2.triscv.vector.tuple_nxv4i8_2t(target("riscv.vector.tuple", <vscale x 4 x i8>, 2) undef, ptr %base, i32 %vl, i32 5)
- %1 = call <vscale x 1 x i32> @llvm.riscv.tuple.extract.nxv1i32.triscv.vector.tuple_nxv4i8_2t(target("riscv.vector.tuple", <vscale x 4 x i8>, 2) %0, i32 1)
- ret <vscale x 1 x i32> %1
+ ret target("riscv.vector.tuple", <vscale x 4 x i8>, 2) %0
}
-
-define <vscale x 1 x i32> @test_vlseg2_mask_nxv1i32_triscv.vector.tuple_nxv4i8_2t(ptr %base, i32 %vl, <vscale x 1 x i1> %mask) {
+define target("riscv.vector.tuple", <vscale x 4 x i8>, 2) @test_vlseg2_mask_nxv1i32_triscv.vector.tuple_nxv4i8_2t(ptr %base, i32 %vl, <vscale x 1 x i1> %mask) {
; CHECK-LABEL: test_vlseg2_mask_nxv1i32_triscv.vector.tuple_nxv4i8_2t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma
-; CHECK-NEXT: vlseg2e32.v v7, (a0), v0.t
+; CHECK-NEXT: vlseg2e32.v v8, (a0), v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 2) @llvm.riscv.vlseg2.mask.triscv.vector.tuple_nxv4i8_2t.nxv1i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 2) undef, ptr %base, <vscale x 1 x i1> %mask, i32 %vl, i32 1, i32 5)
- %1 = call <vscale x 1 x i32> @llvm.riscv.tuple.extract.nxv1i32.triscv.vector.tuple_nxv4i8_2t(target("riscv.vector.tuple", <vscale x 4 x i8>, 2) %0, i32 1)
- ret <vscale x 1 x i32> %1
+ ret target("riscv.vector.tuple", <vscale x 4 x i8>, 2) %0
}
-
-declare target("riscv.vector.tuple", <vscale x 8 x i8>, 2) @llvm.riscv.vlseg2.mask.triscv.vector.tuple_nxv8i8_2t.nxv2i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 2), ptr, <vscale x 2 x i1>, i32, i32, i32)
-
-define <vscale x 2 x i32> @test_vlseg2_nxv2i32_triscv.vector.tuple_nxv8i8_2t(ptr %base, i32 %vl) {
+define target("riscv.vector.tuple", <vscale x 8 x i8>, 2) @test_vlseg2_nxv2i32_triscv.vector.tuple_nxv8i8_2t(ptr %base, i32 %vl) {
; CHECK-LABEL: test_vlseg2_nxv2i32_triscv.vector.tuple_nxv8i8_2t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma
-; CHECK-NEXT: vlseg2e32.v v7, (a0)
+; CHECK-NEXT: vlseg2e32.v v8, (a0)
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 2) @llvm.riscv.vlseg2.triscv.vector.tuple_nxv8i8_2t(target("riscv.vector.tuple", <vscale x 8 x i8>, 2) undef, ptr %base, i32 %vl, i32 5)
- %1 = call <vscale x 2 x i32> @llvm.riscv.tuple.extract.nxv2i32.triscv.vector.tuple_nxv8i8_2t(target("riscv.vector.tuple", <vscale x 8 x i8>, 2) %0, i32 1)
- ret <vscale x 2 x i32> %1
+ ret target("riscv.vector.tuple", <vscale x 8 x i8>, 2) %0
}
-
-define <vscale x 2 x i32> @test_vlseg2_mask_nxv2i32_triscv.vector.tuple_nxv8i8_2t(ptr %base, i32 %vl, <vscale x 2 x i1> %mask) {
+define target("riscv.vector.tuple", <vscale x 8 x i8>, 2) @test_vlseg2_mask_nxv2i32_triscv.vector.tuple_nxv8i8_2t(ptr %base, i32 %vl, <vscale x 2 x i1> %mask) {
; CHECK-LABEL: test_vlseg2_mask_nxv2i32_triscv.vector.tuple_nxv8i8_2t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma
-; CHECK-NEXT: vlseg2e32.v v7, (a0), v0.t
+; CHECK-NEXT: vlseg2e32.v v8, (a0), v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 2) @llvm.riscv.vlseg2.mask.triscv.vector.tuple_nxv8i8_2t.nxv2i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 2) undef, ptr %base, <vscale x 2 x i1> %mask, i32 %vl, i32 1, i32 5)
- %1 = call <vscale x 2 x i32> @llvm.riscv.tuple.extract.nxv2i32.triscv.vector.tuple_nxv8i8_2t(target("riscv.vector.tuple", <vscale x 8 x i8>, 2) %0, i32 1)
- ret <vscale x 2 x i32> %1
+ ret target("riscv.vector.tuple", <vscale x 8 x i8>, 2) %0
}
-
-declare target("riscv.vector.tuple", <vscale x 16 x i8>, 2) @llvm.riscv.vlseg2.mask.triscv.vector.tuple_nxv16i8_2t.nxv4i1(target("riscv.vector.tuple", <vscale x 16 x i8>, 2), ptr, <vscale x 4 x i1>, i32, i32, i32)
-
-define <vscale x 4 x i32> @test_vlseg2_nxv4i32_triscv.vector.tuple_nxv16i8_2t(ptr %base, i32 %vl) {
+define target("riscv.vector.tuple", <vscale x 16 x i8>, 2) @test_vlseg2_nxv4i32_triscv.vector.tuple_nxv16i8_2t(ptr %base, i32 %vl) {
; CHECK-LABEL: test_vlseg2_nxv4i32_triscv.vector.tuple_nxv16i8_2t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma
-; CHECK-NEXT: vlseg2e32.v v6, (a0)
+; CHECK-NEXT: vlseg2e32.v v8, (a0)
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 2) @llvm.riscv.vlseg2.triscv.vector.tuple_nxv16i8_2t(target("riscv.vector.tuple", <vscale x 16 x i8>, 2) undef, ptr %base, i32 %vl, i32 5)
- %1 = call <vscale x 4 x i32> @llvm.riscv.tuple.extract.nxv4i32.triscv.vector.tuple_nxv16i8_2t(target("riscv.vector.tuple", <vscale x 16 x i8>, 2) %0, i32 1)
- ret <vscale x 4 x i32> %1
+ ret target("riscv.vector.tuple", <vscale x 16 x i8>, 2) %0
}
-
-define <vscale x 4 x i32> @test_vlseg2_mask_nxv4i32_triscv.vector.tuple_nxv16i8_2t(ptr %base, i32 %vl, <vscale x 4 x i1> %mask) {
+define target("riscv.vector.tuple", <vscale x 16 x i8>, 2) @test_vlseg2_mask_nxv4i32_triscv.vector.tuple_nxv16i8_2t(ptr %base, i32 %vl, <vscale x 4 x i1> %mask) {
; CHECK-LABEL: test_vlseg2_mask_nxv4i32_triscv.vector.tuple_nxv16i8_2t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma
-; CHECK-NEXT: vlseg2e32.v v6, (a0), v0.t
+; CHECK-NEXT: vlseg2e32.v v8, (a0), v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 2) @llvm.riscv.vlseg2.mask.triscv.vector.tuple_nxv16i8_2t.nxv4i1(target("riscv.vector.tuple", <vscale x 16 x i8>, 2) undef, ptr %base, <vscale x 4 x i1> %mask, i32 %vl, i32 1, i32 5)
- %1 = call <vscale x 4 x i32> @llvm.riscv.tuple.extract.nxv4i32.triscv.vector.tuple_nxv16i8_2t(target("riscv.vector.tuple", <vscale x 16 x i8>, 2) %0, i32 1)
- ret <vscale x 4 x i32> %1
+ ret target("riscv.vector.tuple", <vscale x 16 x i8>, 2) %0
}
-
-declare target("riscv.vector.tuple", <vscale x 32 x i8>, 2) @llvm.riscv.vlseg2.mask.triscv.vector.tuple_nxv32i8_2t.nxv8i1(target("riscv.vector.tuple", <vscale x 32 x i8>, 2), ptr, <vscale x 8 x i1>, i32, i32, i32)
-
-define <vscale x 8 x i32> @test_vlseg2_nxv8i32_triscv.vector.tuple_nxv32i8_2t(ptr %base, i32 %vl) {
+define target("riscv.vector.tuple", <vscale x 32 x i8>, 2) @test_vlseg2_nxv8i32_triscv.vector.tuple_nxv32i8_2t(ptr %base, i32 %vl) {
; CHECK-LABEL: test_vlseg2_nxv8i32_triscv.vector.tuple_nxv32i8_2t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, ma
-; CHECK-NEXT: vlseg2e32.v v4, (a0)
+; CHECK-NEXT: vlseg2e32.v v8, (a0)
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 32 x i8>, 2) @llvm.riscv.vlseg2.triscv.vector.tuple_nxv32i8_2t(target("riscv.vector.tuple", <vscale x 32 x i8>, 2) undef, ptr %base, i32 %vl, i32 5)
- %1 = call <vscale x 8 x i32> @llvm.riscv.tuple.extract.nxv8i32.triscv.vector.tuple_nxv32i8_2t(target("riscv.vector.tuple", <vscale x 32 x i8>, 2) %0, i32 1)
- ret <vscale x 8 x i32> %1
+ ret target("riscv.vector.tuple", <vscale x 32 x i8>, 2) %0
}
-
-define <vscale x 8 x i32> @test_vlseg2_mask_nxv8i32_triscv.vector.tuple_nxv32i8_2t(ptr %base, i32 %vl, <vscale x 8 x i1> %mask) {
+define target("riscv.vector.tuple", <vscale x 32 x i8>, 2) @test_vlseg2_mask_nxv8i32_triscv.vector.tuple_nxv32i8_2t(ptr %base, i32 %vl, <vscale x 8 x i1> %mask) {
; CHECK-LABEL: test_vlseg2_mask_nxv8i32_triscv.vector.tuple_nxv32i8_2t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, ma
-; CHECK-NEXT: vlseg2e32.v v4, (a0), v0.t
+; CHECK-NEXT: vlseg2e32.v v8, (a0), v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 32 x i8>, 2) @llvm.riscv.vlseg2.mask.triscv.vector.tuple_nxv32i8_2t.nxv8i1(target("riscv.vector.tuple", <vscale x 32 x i8>, 2) undef, ptr %base, <vscale x 8 x i1> %mask, i32 %vl, i32 1, i32 5)
- %1 = call <vscale x 8 x i32> @llvm.riscv.tuple.extract.nxv8i32.triscv.vector.tuple_nxv32i8_2t(target("riscv.vector.tuple", <vscale x 32 x i8>, 2) %0, i32 1)
- ret <vscale x 8 x i32> %1
+ ret target("riscv.vector.tuple", <vscale x 32 x i8>, 2) %0
}
-
-declare target("riscv.vector.tuple", <vscale x 4 x i8>, 3) @llvm.riscv.vlseg3.mask.triscv.vector.tuple_nxv4i8_3t.nxv1i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 3), ptr, <vscale x 1 x i1>, i32, i32, i32)
-
-define <vscale x 1 x i32> @test_vlseg3_nxv1i32_triscv.vector.tuple_nxv4i8_3t(ptr %base, i32 %vl) {
+define target("riscv.vector.tuple", <vscale x 4 x i8>, 3) @test_vlseg3_nxv1i32_triscv.vector.tuple_nxv4i8_3t(ptr %base, i32 %vl) {
; CHECK-LABEL: test_vlseg3_nxv1i32_triscv.vector.tuple_nxv4i8_3t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma
-; CHECK-NEXT: vlseg3e32.v v7, (a0)
+; CHECK-NEXT: vlseg3e32.v v8, (a0)
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 3) @llvm.riscv.vlseg3.triscv.vector.tuple_nxv4i8_3t(target("riscv.vector.tuple", <vscale x 4 x i8>, 3) undef, ptr %base, i32 %vl, i32 5)
- %1 = call <vscale x 1 x i32> @llvm.riscv.tuple.extract.nxv1i32.triscv.vector.tuple_nxv4i8_3t(target("riscv.vector.tuple", <vscale x 4 x i8>, 3) %0, i32 1)
- ret <vscale x 1 x i32> %1
+ ret target("riscv.vector.tuple", <vscale x 4 x i8>, 3) %0
}
-
-define <vscale x 1 x i32> @test_vlseg3_mask_nxv1i32_triscv.vector.tuple_nxv4i8_3t(ptr %base, i32 %vl, <vscale x 1 x i1> %mask) {
+define target("riscv.vector.tuple", <vscale x 4 x i8>, 3) @test_vlseg3_mask_nxv1i32_triscv.vector.tuple_nxv4i8_3t(ptr %base, i32 %vl, <vscale x 1 x i1> %mask) {
; CHECK-LABEL: test_vlseg3_mask_nxv1i32_triscv.vector.tuple_nxv4i8_3t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma
-; CHECK-NEXT: vlseg3e32.v v7, (a0), v0.t
+; CHECK-NEXT: vlseg3e32.v v8, (a0), v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 3) @llvm.riscv.vlseg3.mask.triscv.vector.tuple_nxv4i8_3t.nxv1i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 3) undef, ptr %base, <vscale x 1 x i1> %mask, i32 %vl, i32 1, i32 5)
- %1 = call <vscale x 1 x i32> @llvm.riscv.tuple.extract.nxv1i32.triscv.vector.tuple_nxv4i8_3t(target("riscv.vector.tuple", <vscale x 4 x i8>, 3) %0, i32 1)
- ret <vscale x 1 x i32> %1
+ ret target("riscv.vector.tuple", <vscale x 4 x i8>, 3) %0
}
-
-declare target("riscv.vector.tuple", <vscale x 8 x i8>, 3) @llvm.riscv.vlseg3.mask.triscv.vector.tuple_nxv8i8_3t.nxv2i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 3), ptr, <vscale x 2 x i1>, i32, i32, i32)
-
-define <vscale x 2 x i32> @test_vlseg3_nxv2i32_triscv.vector.tuple_nxv8i8_3t(ptr %base, i32 %vl) {
+define target("riscv.vector.tuple", <vscale x 8 x i8>, 3) @test_vlseg3_nxv2i32_triscv.vector.tuple_nxv8i8_3t(ptr %base, i32 %vl) {
; CHECK-LABEL: test_vlseg3_nxv2i32_triscv.vector.tuple_nxv8i8_3t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma
-; CHECK-NEXT: vlseg3e32.v v7, (a0)
+; CHECK-NEXT: vlseg3e32.v v8, (a0)
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 3) @llvm.riscv.vlseg3.triscv.vector.tuple_nxv8i8_3t(target("riscv.vector.tuple", <vscale x 8 x i8>, 3) undef, ptr %base, i32 %vl, i32 5)
- %1 = call <vscale x 2 x i32> @llvm.riscv.tuple.extract.nxv2i32.triscv.vector.tuple_nxv8i8_3t(target("riscv.vector.tuple", <vscale x 8 x i8>, 3) %0, i32 1)
- ret <vscale x 2 x i32> %1
+ ret target("riscv.vector.tuple", <vscale x 8 x i8>, 3) %0
}
-
-define <vscale x 2 x i32> @test_vlseg3_mask_nxv2i32_triscv.vector.tuple_nxv8i8_3t(ptr %base, i32 %vl, <vscale x 2 x i1> %mask) {
+define target("riscv.vector.tuple", <vscale x 8 x i8>, 3) @test_vlseg3_mask_nxv2i32_triscv.vector.tuple_nxv8i8_3t(ptr %base, i32 %vl, <vscale x 2 x i1> %mask) {
; CHECK-LABEL: test_vlseg3_mask_nxv2i32_triscv.vector.tuple_nxv8i8_3t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma
-; CHECK-NEXT: vlseg3e32.v v7, (a0), v0.t
+; CHECK-NEXT: vlseg3e32.v v8, (a0), v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 3) @llvm.riscv.vlseg3.mask.triscv.vector.tuple_nxv8i8_3t.nxv2i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 3) undef, ptr %base, <vscale x 2 x i1> %mask, i32 %vl, i32 1, i32 5)
- %1 = call <vscale x 2 x i32> @llvm.riscv.tuple.extract.nxv2i32.triscv.vector.tuple_nxv8i8_3t(target("riscv.vector.tuple", <vscale x 8 x i8>, 3) %0, i32 1)
- ret <vscale x 2 x i32> %1
+ ret target("riscv.vector.tuple", <vscale x 8 x i8>, 3) %0
}
-
-declare target("riscv.vector.tuple", <vscale x 16 x i8>, 3) @llvm.riscv.vlseg3.mask.triscv.vector.tuple_nxv16i8_3t.nxv4i1(target("riscv.vector.tuple", <vscale x 16 x i8>, 3), ptr, <vscale x 4 x i1>, i32, i32, i32)
-
-define <vscale x 4 x i32> @test_vlseg3_nxv4i32_triscv.vector.tuple_nxv16i8_3t(ptr %base, i32 %vl) {
+define target("riscv.vector.tuple", <vscale x 16 x i8>, 3) @test_vlseg3_nxv4i32_triscv.vector.tuple_nxv16i8_3t(ptr %base, i32 %vl) {
; CHECK-LABEL: test_vlseg3_nxv4i32_triscv.vector.tuple_nxv16i8_3t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma
-; CHECK-NEXT: vlseg3e32.v v6, (a0)
+; CHECK-NEXT: vlseg3e32.v v8, (a0)
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 3) @llvm.riscv.vlseg3.triscv.vector.tuple_nxv16i8_3t(target("riscv.vector.tuple", <vscale x 16 x i8>, 3) undef, ptr %base, i32 %vl, i32 5)
- %1 = call <vscale x 4 x i32> @llvm.riscv.tuple.extract.nxv4i32.triscv.vector.tuple_nxv16i8_3t(target("riscv.vector.tuple", <vscale x 16 x i8>, 3) %0, i32 1)
- ret <vscale x 4 x i32> %1
+ ret target("riscv.vector.tuple", <vscale x 16 x i8>, 3) %0
}
-
-define <vscale x 4 x i32> @test_vlseg3_mask_nxv4i32_triscv.vector.tuple_nxv16i8_3t(ptr %base, i32 %vl, <vscale x 4 x i1> %mask) {
+define target("riscv.vector.tuple", <vscale x 16 x i8>, 3) @test_vlseg3_mask_nxv4i32_triscv.vector.tuple_nxv16i8_3t(ptr %base, i32 %vl, <vscale x 4 x i1> %mask) {
; CHECK-LABEL: test_vlseg3_mask_nxv4i32_triscv.vector.tuple_nxv16i8_3t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma
-; CHECK-NEXT: vlseg3e32.v v6, (a0), v0.t
+; CHECK-NEXT: vlseg3e32.v v8, (a0), v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 3) @llvm.riscv.vlseg3.mask.triscv.vector.tuple_nxv16i8_3t.nxv4i1(target("riscv.vector.tuple", <vscale x 16 x i8>, 3) undef, ptr %base, <vscale x 4 x i1> %mask, i32 %vl, i32 1, i32 5)
- %1 = call <vscale x 4 x i32> @llvm.riscv.tuple.extract.nxv4i32.triscv.vector.tuple_nxv16i8_3t(target("riscv.vector.tuple", <vscale x 16 x i8>, 3) %0, i32 1)
- ret <vscale x 4 x i32> %1
+ ret target("riscv.vector.tuple", <vscale x 16 x i8>, 3) %0
}
-
-declare target("riscv.vector.tuple", <vscale x 4 x i8>, 4) @llvm.riscv.vlseg4.mask.triscv.vector.tuple_nxv4i8_4t.nxv1i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 4), ptr, <vscale x 1 x i1>, i32, i32, i32)
-
-define <vscale x 1 x i32> @test_vlseg4_nxv1i32_triscv.vector.tuple_nxv4i8_4t(ptr %base, i32 %vl) {
+define target("riscv.vector.tuple", <vscale x 4 x i8>, 4) @test_vlseg4_nxv1i32_triscv.vector.tuple_nxv4i8_4t(ptr %base, i32 %vl) {
; CHECK-LABEL: test_vlseg4_nxv1i32_triscv.vector.tuple_nxv4i8_4t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma
-; CHECK-NEXT: vlseg4e32.v v7, (a0)
+; CHECK-NEXT: vlseg4e32.v v8, (a0)
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 4) @llvm.riscv.vlseg4.triscv.vector.tuple_nxv4i8_4t(target("riscv.vector.tuple", <vscale x 4 x i8>, 4) undef, ptr %base, i32 %vl, i32 5)
- %1 = call <vscale x 1 x i32> @llvm.riscv.tuple.extract.nxv1i32.triscv.vector.tuple_nxv4i8_4t(target("riscv.vector.tuple", <vscale x 4 x i8>, 4) %0, i32 1)
- ret <vscale x 1 x i32> %1
+ ret target("riscv.vector.tuple", <vscale x 4 x i8>, 4) %0
}
-
-define <vscale x 1 x i32> @test_vlseg4_mask_nxv1i32_triscv.vector.tuple_nxv4i8_4t(ptr %base, i32 %vl, <vscale x 1 x i1> %mask) {
+define target("riscv.vector.tuple", <vscale x 4 x i8>, 4) @test_vlseg4_mask_nxv1i32_triscv.vector.tuple_nxv4i8_4t(ptr %base, i32 %vl, <vscale x 1 x i1> %mask) {
; CHECK-LABEL: test_vlseg4_mask_nxv1i32_triscv.vector.tuple_nxv4i8_4t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma
-; CHECK-NEXT: vlseg4e32.v v7, (a0), v0.t
+; CHECK-NEXT: vlseg4e32.v v8, (a0), v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 4) @llvm.riscv.vlseg4.mask.triscv.vector.tuple_nxv4i8_4t.nxv1i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 4) undef, ptr %base, <vscale x 1 x i1> %mask, i32 %vl, i32 1, i32 5)
- %1 = call <vscale x 1 x i32> @llvm.riscv.tuple.extract.nxv1i32.triscv.vector.tuple_nxv4i8_4t(target("riscv.vector.tuple", <vscale x 4 x i8>, 4) %0, i32 1)
- ret <vscale x 1 x i32> %1
+ ret target("riscv.vector.tuple", <vscale x 4 x i8>, 4) %0
}
-
-declare target("riscv.vector.tuple", <vscale x 8 x i8>, 4) @llvm.riscv.vlseg4.mask.triscv.vector.tuple_nxv8i8_4t.nxv2i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 4), ptr, <vscale x 2 x i1>, i32, i32, i32)
-
-define <vscale x 2 x i32> @test_vlseg4_nxv2i32_triscv.vector.tuple_nxv8i8_4t(ptr %base, i32 %vl) {
+define target("riscv.vector.tuple", <vscale x 8 x i8>, 4) @test_vlseg4_nxv2i32_triscv.vector.tuple_nxv8i8_4t(ptr %base, i32 %vl) {
; CHECK-LABEL: test_vlseg4_nxv2i32_triscv.vector.tuple_nxv8i8_4t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma
-; CHECK-NEXT: vlseg4e32.v v7, (a0)
+; CHECK-NEXT: vlseg4e32.v v8, (a0)
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 4) @llvm.riscv.vlseg4.triscv.vector.tuple_nxv8i8_4t(target("riscv.vector.tuple", <vscale x 8 x i8>, 4) undef, ptr %base, i32 %vl, i32 5)
- %1 = call <vscale x 2 x i32> @llvm.riscv.tuple.extract.nxv2i32.triscv.vector.tuple_nxv8i8_4t(target("riscv.vector.tuple", <vscale x 8 x i8>, 4) %0, i32 1)
- ret <vscale x 2 x i32> %1
+ ret target("riscv.vector.tuple", <vscale x 8 x i8>, 4) %0
}
-
-define <vscale x 2 x i32> @test_vlseg4_mask_nxv2i32_triscv.vector.tuple_nxv8i8_4t(ptr %base, i32 %vl, <vscale x 2 x i1> %mask) {
+define target("riscv.vector.tuple", <vscale x 8 x i8>, 4) @test_vlseg4_mask_nxv2i32_triscv.vector.tuple_nxv8i8_4t(ptr %base, i32 %vl, <vscale x 2 x i1> %mask) {
; CHECK-LABEL: test_vlseg4_mask_nxv2i32_triscv.vector.tuple_nxv8i8_4t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma
-; CHECK-NEXT: vlseg4e32.v v7, (a0), v0.t
+; CHECK-NEXT: vlseg4e32.v v8, (a0), v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 4) @llvm.riscv.vlseg4.mask.triscv.vector.tuple_nxv8i8_4t.nxv2i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 4) undef, ptr %base, <vscale x 2 x i1> %mask, i32 %vl, i32 1, i32 5)
- %1 = call <vscale x 2 x i32> @llvm.riscv.tuple.extract.nxv2i32.triscv.vector.tuple_nxv8i8_4t(target("riscv.vector.tuple", <vscale x 8 x i8>, 4) %0, i32 1)
- ret <vscale x 2 x i32> %1
+ ret target("riscv.vector.tuple", <vscale x 8 x i8>, 4) %0
}
-
-declare target("riscv.vector.tuple", <vscale x 16 x i8>, 4) @llvm.riscv.vlseg4.mask.triscv.vector.tuple_nxv16i8_4t.nxv4i1(target("riscv.vector.tuple", <vscale x 16 x i8>, 4), ptr, <vscale x 4 x i1>, i32, i32, i32)
-
-define <vscale x 4 x i32> @test_vlseg4_nxv4i32_triscv.vector.tuple_nxv16i8_4t(ptr %base, i32 %vl) {
+define target("riscv.vector.tuple", <vscale x 16 x i8>, 4) @test_vlseg4_nxv4i32_triscv.vector.tuple_nxv16i8_4t(ptr %base, i32 %vl) {
; CHECK-LABEL: test_vlseg4_nxv4i32_triscv.vector.tuple_nxv16i8_4t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma
-; CHECK-NEXT: vlseg4e32.v v6, (a0)
+; CHECK-NEXT: vlseg4e32.v v8, (a0)
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 4) @llvm.riscv.vlseg4.triscv.vector.tuple_nxv16i8_4t(target("riscv.vector.tuple", <vscale x 16 x i8>, 4) undef, ptr %base, i32 %vl, i32 5)
- %1 = call <vscale x 4 x i32> @llvm.riscv.tuple.extract.nxv4i32.triscv.vector.tuple_nxv16i8_4t(target("riscv.vector.tuple", <vscale x 16 x i8>, 4) %0, i32 1)
- ret <vscale x 4 x i32> %1
+ ret target("riscv.vector.tuple", <vscale x 16 x i8>, 4) %0
}
-
-define <vscale x 4 x i32> @test_vlseg4_mask_nxv4i32_triscv.vector.tuple_nxv16i8_4t(ptr %base, i32 %vl, <vscale x 4 x i1> %mask) {
+define target("riscv.vector.tuple", <vscale x 16 x i8>, 4) @test_vlseg4_mask_nxv4i32_triscv.vector.tuple_nxv16i8_4t(ptr %base, i32 %vl, <vscale x 4 x i1> %mask) {
; CHECK-LABEL: test_vlseg4_mask_nxv4i32_triscv.vector.tuple_nxv16i8_4t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma
-; CHECK-NEXT: vlseg4e32.v v6, (a0), v0.t
+; CHECK-NEXT: vlseg4e32.v v8, (a0), v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 4) @llvm.riscv.vlseg4.mask.triscv.vector.tuple_nxv16i8_4t.nxv4i1(target("riscv.vector.tuple", <vscale x 16 x i8>, 4) undef, ptr %base, <vscale x 4 x i1> %mask, i32 %vl, i32 1, i32 5)
- %1 = call <vscale x 4 x i32> @llvm.riscv.tuple.extract.nxv4i32.triscv.vector.tuple_nxv16i8_4t(target("riscv.vector.tuple", <vscale x 16 x i8>, 4) %0, i32 1)
- ret <vscale x 4 x i32> %1
+ ret target("riscv.vector.tuple", <vscale x 16 x i8>, 4) %0
}
-
-declare target("riscv.vector.tuple", <vscale x 4 x i8>, 5) @llvm.riscv.vlseg5.mask.triscv.vector.tuple_nxv4i8_5t.nxv1i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 5), ptr, <vscale x 1 x i1>, i32, i32, i32)
-
-define <vscale x 1 x i32> @test_vlseg5_nxv1i32_triscv.vector.tuple_nxv4i8_5t(ptr %base, i32 %vl) {
+define target("riscv.vector.tuple", <vscale x 4 x i8>, 5) @test_vlseg5_nxv1i32_triscv.vector.tuple_nxv4i8_5t(ptr %base, i32 %vl) {
; CHECK-LABEL: test_vlseg5_nxv1i32_triscv.vector.tuple_nxv4i8_5t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma
-; CHECK-NEXT: vlseg5e32.v v7, (a0)
+; CHECK-NEXT: vlseg5e32.v v8, (a0)
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 5) @llvm.riscv.vlseg5.triscv.vector.tuple_nxv4i8_5t(target("riscv.vector.tuple", <vscale x 4 x i8>, 5) undef, ptr %base, i32 %vl, i32 5)
- %1 = call <vscale x 1 x i32> @llvm.riscv.tuple.extract.nxv1i32.triscv.vector.tuple_nxv4i8_5t(target("riscv.vector.tuple", <vscale x 4 x i8>, 5) %0, i32 1)
- ret <vscale x 1 x i32> %1
+ ret target("riscv.vector.tuple", <vscale x 4 x i8>, 5) %0
}
-
-define <vscale x 1 x i32> @test_vlseg5_mask_nxv1i32_triscv.vector.tuple_nxv4i8_5t(ptr %base, i32 %vl, <vscale x 1 x i1> %mask) {
+define target("riscv.vector.tuple", <vscale x 4 x i8>, 5) @test_vlseg5_mask_nxv1i32_triscv.vector.tuple_nxv4i8_5t(ptr %base, i32 %vl, <vscale x 1 x i1> %mask) {
; CHECK-LABEL: test_vlseg5_mask_nxv1i32_triscv.vector.tuple_nxv4i8_5t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma
-; CHECK-NEXT: vlseg5e32.v v7, (a0), v0.t
+; CHECK-NEXT: vlseg5e32.v v8, (a0), v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 5) @llvm.riscv.vlseg5.mask.triscv.vector.tuple_nxv4i8_5t.nxv1i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 5) undef, ptr %base, <vscale x 1 x i1> %mask, i32 %vl, i32 1, i32 5)
- %1 = call <vscale x 1 x i32> @llvm.riscv.tuple.extract.nxv1i32.triscv.vector.tuple_nxv4i8_5t(target("riscv.vector.tuple", <vscale x 4 x i8>, 5) %0, i32 1)
- ret <vscale x 1 x i32> %1
+ ret target("riscv.vector.tuple", <vscale x 4 x i8>, 5) %0
}
-
-declare target("riscv.vector.tuple", <vscale x 8 x i8>, 5) @llvm.riscv.vlseg5.mask.triscv.vector.tuple_nxv8i8_5t.nxv2i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 5), ptr, <vscale x 2 x i1>, i32, i32, i32)
-
-define <vscale x 2 x i32> @test_vlseg5_nxv2i32_triscv.vector.tuple_nxv8i8_5t(ptr %base, i32 %vl) {
+define target("riscv.vector.tuple", <vscale x 8 x i8>, 5) @test_vlseg5_nxv2i32_triscv.vector.tuple_nxv8i8_5t(ptr %base, i32 %vl) {
; CHECK-LABEL: test_vlseg5_nxv2i32_triscv.vector.tuple_nxv8i8_5t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma
-; CHECK-NEXT: vlseg5e32.v v7, (a0)
+; CHECK-NEXT: vlseg5e32.v v8, (a0)
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 5) @llvm.riscv.vlseg5.triscv.vector.tuple_nxv8i8_5t(target("riscv.vector.tuple", <vscale x 8 x i8>, 5) undef, ptr %base, i32 %vl, i32 5)
- %1 = call <vscale x 2 x i32> @llvm.riscv.tuple.extract.nxv2i32.triscv.vector.tuple_nxv8i8_5t(target("riscv.vector.tuple", <vscale x 8 x i8>, 5) %0, i32 1)
- ret <vscale x 2 x i32> %1
+ ret target("riscv.vector.tuple", <vscale x 8 x i8>, 5) %0
}
-
-define <vscale x 2 x i32> @test_vlseg5_mask_nxv2i32_triscv.vector.tuple_nxv8i8_5t(ptr %base, i32 %vl, <vscale x 2 x i1> %mask) {
+define target("riscv.vector.tuple", <vscale x 8 x i8>, 5) @test_vlseg5_mask_nxv2i32_triscv.vector.tuple_nxv8i8_5t(ptr %base, i32 %vl, <vscale x 2 x i1> %mask) {
; CHECK-LABEL: test_vlseg5_mask_nxv2i32_triscv.vector.tuple_nxv8i8_5t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma
-; CHECK-NEXT: vlseg5e32.v v7, (a0), v0.t
+; CHECK-NEXT: vlseg5e32.v v8, (a0), v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 5) @llvm.riscv.vlseg5.mask.triscv.vector.tuple_nxv8i8_5t.nxv2i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 5) undef, ptr %base, <vscale x 2 x i1> %mask, i32 %vl, i32 1, i32 5)
- %1 = call <vscale x 2 x i32> @llvm.riscv.tuple.extract.nxv2i32.triscv.vector.tuple_nxv8i8_5t(target("riscv.vector.tuple", <vscale x 8 x i8>, 5) %0, i32 1)
- ret <vscale x 2 x i32> %1
+ ret target("riscv.vector.tuple", <vscale x 8 x i8>, 5) %0
}
-
-declare target("riscv.vector.tuple", <vscale x 4 x i8>, 6) @llvm.riscv.vlseg6.mask.triscv.vector.tuple_nxv4i8_6t.nxv1i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 6), ptr, <vscale x 1 x i1>, i32, i32, i32)
-
-define <vscale x 1 x i32> @test_vlseg6_nxv1i32_triscv.vector.tuple_nxv4i8_6t(ptr %base, i32 %vl) {
+define target("riscv.vector.tuple", <vscale x 4 x i8>, 6) @test_vlseg6_nxv1i32_triscv.vector.tuple_nxv4i8_6t(ptr %base, i32 %vl) {
; CHECK-LABEL: test_vlseg6_nxv1i32_triscv.vector.tuple_nxv4i8_6t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma
-; CHECK-NEXT: vlseg6e32.v v7, (a0)
+; CHECK-NEXT: vlseg6e32.v v8, (a0)
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 6) @llvm.riscv.vlseg6.triscv.vector.tuple_nxv4i8_6t(target("riscv.vector.tuple", <vscale x 4 x i8>, 6) undef, ptr %base, i32 %vl, i32 5)
- %1 = call <vscale x 1 x i32> @llvm.riscv.tuple.extract.nxv1i32.triscv.vector.tuple_nxv4i8_6t(target("riscv.vector.tuple", <vscale x 4 x i8>, 6) %0, i32 1)
- ret <vscale x 1 x i32> %1
+ ret target("riscv.vector.tuple", <vscale x 4 x i8>, 6) %0
}
-
-define <vscale x 1 x i32> @test_vlseg6_mask_nxv1i32_triscv.vector.tuple_nxv4i8_6t(ptr %base, i32 %vl, <vscale x 1 x i1> %mask) {
+define target("riscv.vector.tuple", <vscale x 4 x i8>, 6) @test_vlseg6_mask_nxv1i32_triscv.vector.tuple_nxv4i8_6t(ptr %base, i32 %vl, <vscale x 1 x i1> %mask) {
; CHECK-LABEL: test_vlseg6_mask_nxv1i32_triscv.vector.tuple_nxv4i8_6t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma
-; CHECK-NEXT: vlseg6e32.v v7, (a0), v0.t
+; CHECK-NEXT: vlseg6e32.v v8, (a0), v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 6) @llvm.riscv.vlseg6.mask.triscv.vector.tuple_nxv4i8_6t.nxv1i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 6) undef, ptr %base, <vscale x 1 x i1> %mask, i32 %vl, i32 1, i32 5)
- %1 = call <vscale x 1 x i32> @llvm.riscv.tuple.extract.nxv1i32.triscv.vector.tuple_nxv4i8_6t(target("riscv.vector.tuple", <vscale x 4 x i8>, 6) %0, i32 1)
- ret <vscale x 1 x i32> %1
+ ret target("riscv.vector.tuple", <vscale x 4 x i8>, 6) %0
}
-
-declare target("riscv.vector.tuple", <vscale x 8 x i8>, 6) @llvm.riscv.vlseg6.mask.triscv.vector.tuple_nxv8i8_6t.nxv2i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 6), ptr, <vscale x 2 x i1>, i32, i32, i32)
-
-define <vscale x 2 x i32> @test_vlseg6_nxv2i32_triscv.vector.tuple_nxv8i8_6t(ptr %base, i32 %vl) {
+define target("riscv.vector.tuple", <vscale x 8 x i8>, 6) @test_vlseg6_nxv2i32_triscv.vector.tuple_nxv8i8_6t(ptr %base, i32 %vl) {
; CHECK-LABEL: test_vlseg6_nxv2i32_triscv.vector.tuple_nxv8i8_6t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma
-; CHECK-NEXT: vlseg6e32.v v7, (a0)
+; CHECK-NEXT: vlseg6e32.v v8, (a0)
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 6) @llvm.riscv.vlseg6.triscv.vector.tuple_nxv8i8_6t(target("riscv.vector.tuple", <vscale x 8 x i8>, 6) undef, ptr %base, i32 %vl, i32 5)
- %1 = call <vscale x 2 x i32> @llvm.riscv.tuple.extract.nxv2i32.triscv.vector.tuple_nxv8i8_6t(target("riscv.vector.tuple", <vscale x 8 x i8>, 6) %0, i32 1)
- ret <vscale x 2 x i32> %1
+ ret target("riscv.vector.tuple", <vscale x 8 x i8>, 6) %0
}
-
-define <vscale x 2 x i32> @test_vlseg6_mask_nxv2i32_triscv.vector.tuple_nxv8i8_6t(ptr %base, i32 %vl, <vscale x 2 x i1> %mask) {
+define target("riscv.vector.tuple", <vscale x 8 x i8>, 6) @test_vlseg6_mask_nxv2i32_triscv.vector.tuple_nxv8i8_6t(ptr %base, i32 %vl, <vscale x 2 x i1> %mask) {
; CHECK-LABEL: test_vlseg6_mask_nxv2i32_triscv.vector.tuple_nxv8i8_6t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma
-; CHECK-NEXT: vlseg6e32.v v7, (a0), v0.t
+; CHECK-NEXT: vlseg6e32.v v8, (a0), v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 6) @llvm.riscv.vlseg6.mask.triscv.vector.tuple_nxv8i8_6t.nxv2i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 6) undef, ptr %base, <vscale x 2 x i1> %mask, i32 %vl, i32 1, i32 5)
- %1 = call <vscale x 2 x i32> @llvm.riscv.tuple.extract.nxv2i32.triscv.vector.tuple_nxv8i8_6t(target("riscv.vector.tuple", <vscale x 8 x i8>, 6) %0, i32 1)
- ret <vscale x 2 x i32> %1
+ ret target("riscv.vector.tuple", <vscale x 8 x i8>, 6) %0
}
-
-declare target("riscv.vector.tuple", <vscale x 4 x i8>, 7) @llvm.riscv.vlseg7.mask.triscv.vector.tuple_nxv4i8_7t.nxv1i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 7), ptr, <vscale x 1 x i1>, i32, i32, i32)
-
-define <vscale x 1 x i32> @test_vlseg7_nxv1i32_triscv.vector.tuple_nxv4i8_7t(ptr %base, i32 %vl) {
+define target("riscv.vector.tuple", <vscale x 4 x i8>, 7) @test_vlseg7_nxv1i32_triscv.vector.tuple_nxv4i8_7t(ptr %base, i32 %vl) {
; CHECK-LABEL: test_vlseg7_nxv1i32_triscv.vector.tuple_nxv4i8_7t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma
-; CHECK-NEXT: vlseg7e32.v v7, (a0)
+; CHECK-NEXT: vlseg7e32.v v8, (a0)
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 7) @llvm.riscv.vlseg7.triscv.vector.tuple_nxv4i8_7t(target("riscv.vector.tuple", <vscale x 4 x i8>, 7) undef, ptr %base, i32 %vl, i32 5)
- %1 = call <vscale x 1 x i32> @llvm.riscv.tuple.extract.nxv1i32.triscv.vector.tuple_nxv4i8_7t(target("riscv.vector.tuple", <vscale x 4 x i8>, 7) %0, i32 1)
- ret <vscale x 1 x i32> %1
+ ret target("riscv.vector.tuple", <vscale x 4 x i8>, 7) %0
}
-
-define <vscale x 1 x i32> @test_vlseg7_mask_nxv1i32_triscv.vector.tuple_nxv4i8_7t(ptr %base, i32 %vl, <vscale x 1 x i1> %mask) {
+define target("riscv.vector.tuple", <vscale x 4 x i8>, 7) @test_vlseg7_mask_nxv1i32_triscv.vector.tuple_nxv4i8_7t(ptr %base, i32 %vl, <vscale x 1 x i1> %mask) {
; CHECK-LABEL: test_vlseg7_mask_nxv1i32_triscv.vector.tuple_nxv4i8_7t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma
-; CHECK-NEXT: vlseg7e32.v v7, (a0), v0.t
+; CHECK-NEXT: vlseg7e32.v v8, (a0), v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 7) @llvm.riscv.vlseg7.mask.triscv.vector.tuple_nxv4i8_7t.nxv1i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 7) undef, ptr %base, <vscale x 1 x i1> %mask, i32 %vl, i32 1, i32 5)
- %1 = call <vscale x 1 x i32> @llvm.riscv.tuple.extract.nxv1i32.triscv.vector.tuple_nxv4i8_7t(target("riscv.vector.tuple", <vscale x 4 x i8>, 7) %0, i32 1)
- ret <vscale x 1 x i32> %1
+ ret target("riscv.vector.tuple", <vscale x 4 x i8>, 7) %0
}
-
-declare target("riscv.vector.tuple", <vscale x 8 x i8>, 7) @llvm.riscv.vlseg7.mask.triscv.vector.tuple_nxv8i8_7t.nxv2i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 7), ptr, <vscale x 2 x i1>, i32, i32, i32)
-
-define <vscale x 2 x i32> @test_vlseg7_nxv2i32_triscv.vector.tuple_nxv8i8_7t(ptr %base, i32 %vl) {
+define target("riscv.vector.tuple", <vscale x 8 x i8>, 7) @test_vlseg7_nxv2i32_triscv.vector.tuple_nxv8i8_7t(ptr %base, i32 %vl) {
; CHECK-LABEL: test_vlseg7_nxv2i32_triscv.vector.tuple_nxv8i8_7t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma
-; CHECK-NEXT: vlseg7e32.v v7, (a0)
+; CHECK-NEXT: vlseg7e32.v v8, (a0)
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 7) @llvm.riscv.vlseg7.triscv.vector.tuple_nxv8i8_7t(target("riscv.vector.tuple", <vscale x 8 x i8>, 7) undef, ptr %base, i32 %vl, i32 5)
- %1 = call <vscale x 2 x i32> @llvm.riscv.tuple.extract.nxv2i32.triscv.vector.tuple_nxv8i8_7t(target("riscv.vector.tuple", <vscale x 8 x i8>, 7) %0, i32 1)
- ret <vscale x 2 x i32> %1
+ ret target("riscv.vector.tuple", <vscale x 8 x i8>, 7) %0
}
-
-define <vscale x 2 x i32> @test_vlseg7_mask_nxv2i32_triscv.vector.tuple_nxv8i8_7t(ptr %base, i32 %vl, <vscale x 2 x i1> %mask) {
+define target("riscv.vector.tuple", <vscale x 8 x i8>, 7) @test_vlseg7_mask_nxv2i32_triscv.vector.tuple_nxv8i8_7t(ptr %base, i32 %vl, <vscale x 2 x i1> %mask) {
; CHECK-LABEL: test_vlseg7_mask_nxv2i32_triscv.vector.tuple_nxv8i8_7t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma
-; CHECK-NEXT: vlseg7e32.v v7, (a0), v0.t
+; CHECK-NEXT: vlseg7e32.v v8, (a0), v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 7) @llvm.riscv.vlseg7.mask.triscv.vector.tuple_nxv8i8_7t.nxv2i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 7) undef, ptr %base, <vscale x 2 x i1> %mask, i32 %vl, i32 1, i32 5)
- %1 = call <vscale x 2 x i32> @llvm.riscv.tuple.extract.nxv2i32.triscv.vector.tuple_nxv8i8_7t(target("riscv.vector.tuple", <vscale x 8 x i8>, 7) %0, i32 1)
- ret <vscale x 2 x i32> %1
+ ret target("riscv.vector.tuple", <vscale x 8 x i8>, 7) %0
}
-
-declare target("riscv.vector.tuple", <vscale x 4 x i8>, 8) @llvm.riscv.vlseg8.mask.triscv.vector.tuple_nxv4i8_8t.nxv1i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 8), ptr, <vscale x 1 x i1>, i32, i32, i32)
-
-define <vscale x 1 x i32> @test_vlseg8_nxv1i32_triscv.vector.tuple_nxv4i8_8t(ptr %base, i32 %vl) {
+define target("riscv.vector.tuple", <vscale x 4 x i8>, 8) @test_vlseg8_nxv1i32_triscv.vector.tuple_nxv4i8_8t(ptr %base, i32 %vl) {
; CHECK-LABEL: test_vlseg8_nxv1i32_triscv.vector.tuple_nxv4i8_8t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma
-; CHECK-NEXT: vlseg8e32.v v7, (a0)
+; CHECK-NEXT: vlseg8e32.v v8, (a0)
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 8) @llvm.riscv.vlseg8.triscv.vector.tuple_nxv4i8_8t(target("riscv.vector.tuple", <vscale x 4 x i8>, 8) undef, ptr %base, i32 %vl, i32 5)
- %1 = call <vscale x 1 x i32> @llvm.riscv.tuple.extract.nxv1i32.triscv.vector.tuple_nxv4i8_8t(target("riscv.vector.tuple", <vscale x 4 x i8>, 8) %0, i32 1)
- ret <vscale x 1 x i32> %1
+ ret target("riscv.vector.tuple", <vscale x 4 x i8>, 8) %0
}
-
-define <vscale x 1 x i32> @test_vlseg8_mask_nxv1i32_triscv.vector.tuple_nxv4i8_8t(ptr %base, i32 %vl, <vscale x 1 x i1> %mask) {
+define target("riscv.vector.tuple", <vscale x 4 x i8>, 8) @test_vlseg8_mask_nxv1i32_triscv.vector.tuple_nxv4i8_8t(ptr %base, i32 %vl, <vscale x 1 x i1> %mask) {
; CHECK-LABEL: test_vlseg8_mask_nxv1i32_triscv.vector.tuple_nxv4i8_8t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma
-; CHECK-NEXT: vlseg8e32.v v7, (a0), v0.t
+; CHECK-NEXT: vlseg8e32.v v8, (a0), v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 8) @llvm.riscv.vlseg8.mask.triscv.vector.tuple_nxv4i8_8t.nxv1i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 8) undef, ptr %base, <vscale x 1 x i1> %mask, i32 %vl, i32 1, i32 5)
- %1 = call <vscale x 1 x i32> @llvm.riscv.tuple.extract.nxv1i32.triscv.vector.tuple_nxv4i8_8t(target("riscv.vector.tuple", <vscale x 4 x i8>, 8) %0, i32 1)
- ret <vscale x 1 x i32> %1
+ ret target("riscv.vector.tuple", <vscale x 4 x i8>, 8) %0
}
-
-declare target("riscv.vector.tuple", <vscale x 8 x i8>, 8) @llvm.riscv.vlseg8.mask.triscv.vector.tuple_nxv8i8_8t.nxv2i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 8), ptr, <vscale x 2 x i1>, i32, i32, i32)
-
-define <vscale x 2 x i32> @test_vlseg8_nxv2i32_triscv.vector.tuple_nxv8i8_8t(ptr %base, i32 %vl) {
+define target("riscv.vector.tuple", <vscale x 8 x i8>, 8) @test_vlseg8_nxv2i32_triscv.vector.tuple_nxv8i8_8t(ptr %base, i32 %vl) {
; CHECK-LABEL: test_vlseg8_nxv2i32_triscv.vector.tuple_nxv8i8_8t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma
-; CHECK-NEXT: vlseg8e32.v v7, (a0)
+; CHECK-NEXT: vlseg8e32.v v8, (a0)
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 8) @llvm.riscv.vlseg8.triscv.vector.tuple_nxv8i8_8t(target("riscv.vector.tuple", <vscale x 8 x i8>, 8) undef, ptr %base, i32 %vl, i32 5)
- %1 = call <vscale x 2 x i32> @llvm.riscv.tuple.extract.nxv2i32.triscv.vector.tuple_nxv8i8_8t(target("riscv.vector.tuple", <vscale x 8 x i8>, 8) %0, i32 1)
- ret <vscale x 2 x i32> %1
+ ret target("riscv.vector.tuple", <vscale x 8 x i8>, 8) %0
}
-
-define <vscale x 2 x i32> @test_vlseg8_mask_nxv2i32_triscv.vector.tuple_nxv8i8_8t(ptr %base, i32 %vl, <vscale x 2 x i1> %mask) {
+define target("riscv.vector.tuple", <vscale x 8 x i8>, 8) @test_vlseg8_mask_nxv2i32_triscv.vector.tuple_nxv8i8_8t(ptr %base, i32 %vl, <vscale x 2 x i1> %mask) {
; CHECK-LABEL: test_vlseg8_mask_nxv2i32_triscv.vector.tuple_nxv8i8_8t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma
-; CHECK-NEXT: vlseg8e32.v v7, (a0), v0.t
+; CHECK-NEXT: vlseg8e32.v v8, (a0), v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 8) @llvm.riscv.vlseg8.mask.triscv.vector.tuple_nxv8i8_8t.nxv2i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 8) undef, ptr %base, <vscale x 2 x i1> %mask, i32 %vl, i32 1, i32 5)
- %1 = call <vscale x 2 x i32> @llvm.riscv.tuple.extract.nxv2i32.triscv.vector.tuple_nxv8i8_8t(target("riscv.vector.tuple", <vscale x 8 x i8>, 8) %0, i32 1)
- ret <vscale x 2 x i32> %1
+ ret target("riscv.vector.tuple", <vscale x 8 x i8>, 8) %0
}
-
-declare target("riscv.vector.tuple", <vscale x 8 x i8>, 2) @llvm.riscv.vlseg2.mask.triscv.vector.tuple_nxv8i8_2t.nxv1i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 2), ptr, <vscale x 1 x i1>, i32, i32, i32)
-
-define <vscale x 1 x i64> @test_vlseg2_nxv1i64_triscv.vector.tuple_nxv8i8_2t(ptr %base, i32 %vl) {
+define target("riscv.vector.tuple", <vscale x 8 x i8>, 2) @test_vlseg2_nxv1i64_triscv.vector.tuple_nxv8i8_2t(ptr %base, i32 %vl) {
; CHECK-LABEL: test_vlseg2_nxv1i64_triscv.vector.tuple_nxv8i8_2t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, ma
-; CHECK-NEXT: vlseg2e64.v v7, (a0)
+; CHECK-NEXT: vlseg2e64.v v8, (a0)
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 2) @llvm.riscv.vlseg2.triscv.vector.tuple_nxv8i8_2t(target("riscv.vector.tuple", <vscale x 8 x i8>, 2) undef, ptr %base, i32 %vl, i32 6)
- %1 = call <vscale x 1 x i64> @llvm.riscv.tuple.extract.nxv1i64.triscv.vector.tuple_nxv8i8_2t(target("riscv.vector.tuple", <vscale x 8 x i8>, 2) %0, i32 1)
- ret <vscale x 1 x i64> %1
+ ret target("riscv.vector.tuple", <vscale x 8 x i8>, 2) %0
}
-
-define <vscale x 1 x i64> @test_vlseg2_mask_nxv1i64_triscv.vector.tuple_nxv8i8_2t(ptr %base, i32 %vl, <vscale x 1 x i1> %mask) {
+define target("riscv.vector.tuple", <vscale x 8 x i8>, 2) @test_vlseg2_mask_nxv1i64_triscv.vector.tuple_nxv8i8_2t(ptr %base, i32 %vl, <vscale x 1 x i1> %mask) {
; CHECK-LABEL: test_vlseg2_mask_nxv1i64_triscv.vector.tuple_nxv8i8_2t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, ma
-; CHECK-NEXT: vlseg2e64.v v7, (a0), v0.t
+; CHECK-NEXT: vlseg2e64.v v8, (a0), v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 2) @llvm.riscv.vlseg2.mask.triscv.vector.tuple_nxv8i8_2t.nxv1i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 2) undef, ptr %base, <vscale x 1 x i1> %mask, i32 %vl, i32 1, i32 6)
- %1 = call <vscale x 1 x i64> @llvm.riscv.tuple.extract.nxv1i64.triscv.vector.tuple_nxv8i8_2t(target("riscv.vector.tuple", <vscale x 8 x i8>, 2) %0, i32 1)
- ret <vscale x 1 x i64> %1
+ ret target("riscv.vector.tuple", <vscale x 8 x i8>, 2) %0
}
-
-declare target("riscv.vector.tuple", <vscale x 16 x i8>, 2) @llvm.riscv.vlseg2.mask.triscv.vector.tuple_nxv16i8_2t.nxv2i1(target("riscv.vector.tuple", <vscale x 16 x i8>, 2), ptr, <vscale x 2 x i1>, i32, i32, i32)
-
-define <vscale x 2 x i64> @test_vlseg2_nxv2i64_triscv.vector.tuple_nxv16i8_2t(ptr %base, i32 %vl) {
+define target("riscv.vector.tuple", <vscale x 16 x i8>, 2) @test_vlseg2_nxv2i64_triscv.vector.tuple_nxv16i8_2t(ptr %base, i32 %vl) {
; CHECK-LABEL: test_vlseg2_nxv2i64_triscv.vector.tuple_nxv16i8_2t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, ma
-; CHECK-NEXT: vlseg2e64.v v6, (a0)
+; CHECK-NEXT: vlseg2e64.v v8, (a0)
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 2) @llvm.riscv.vlseg2.triscv.vector.tuple_nxv16i8_2t(target("riscv.vector.tuple", <vscale x 16 x i8>, 2) undef, ptr %base, i32 %vl, i32 6)
- %1 = call <vscale x 2 x i64> @llvm.riscv.tuple.extract.nxv2i64.triscv.vector.tuple_nxv16i8_2t(target("riscv.vector.tuple", <vscale x 16 x i8>, 2) %0, i32 1)
- ret <vscale x 2 x i64> %1
+ ret target("riscv.vector.tuple", <vscale x 16 x i8>, 2) %0
}
-
-define <vscale x 2 x i64> @test_vlseg2_mask_nxv2i64_triscv.vector.tuple_nxv16i8_2t(ptr %base, i32 %vl, <vscale x 2 x i1> %mask) {
+define target("riscv.vector.tuple", <vscale x 16 x i8>, 2) @test_vlseg2_mask_nxv2i64_triscv.vector.tuple_nxv16i8_2t(ptr %base, i32 %vl, <vscale x 2 x i1> %mask) {
; CHECK-LABEL: test_vlseg2_mask_nxv2i64_triscv.vector.tuple_nxv16i8_2t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, ma
-; CHECK-NEXT: vlseg2e64.v v6, (a0), v0.t
+; CHECK-NEXT: vlseg2e64.v v8, (a0), v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 2) @llvm.riscv.vlseg2.mask.triscv.vector.tuple_nxv16i8_2t.nxv2i1(target("riscv.vector.tuple", <vscale x 16 x i8>, 2) undef, ptr %base, <vscale x 2 x i1> %mask, i32 %vl, i32 1, i32 6)
- %1 = call <vscale x 2 x i64> @llvm.riscv.tuple.extract.nxv2i64.triscv.vector.tuple_nxv16i8_2t(target("riscv.vector.tuple", <vscale x 16 x i8>, 2) %0, i32 1)
- ret <vscale x 2 x i64> %1
+ ret target("riscv.vector.tuple", <vscale x 16 x i8>, 2) %0
}
-
-declare target("riscv.vector.tuple", <vscale x 32 x i8>, 2) @llvm.riscv.vlseg2.mask.triscv.vector.tuple_nxv32i8_2t.nxv4i1(target("riscv.vector.tuple", <vscale x 32 x i8>, 2), ptr, <vscale x 4 x i1>, i32, i32, i32)
-
-define <vscale x 4 x i64> @test_vlseg2_nxv4i64_triscv.vector.tuple_nxv32i8_2t(ptr %base, i32 %vl) {
+define target("riscv.vector.tuple", <vscale x 32 x i8>, 2) @test_vlseg2_nxv4i64_triscv.vector.tuple_nxv32i8_2t(ptr %base, i32 %vl) {
; CHECK-LABEL: test_vlseg2_nxv4i64_triscv.vector.tuple_nxv32i8_2t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e64, m4, ta, ma
-; CHECK-NEXT: vlseg2e64.v v4, (a0)
+; CHECK-NEXT: vlseg2e64.v v8, (a0)
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 32 x i8>, 2) @llvm.riscv.vlseg2.triscv.vector.tuple_nxv32i8_2t(target("riscv.vector.tuple", <vscale x 32 x i8>, 2) undef, ptr %base, i32 %vl, i32 6)
- %1 = call <vscale x 4 x i64> @llvm.riscv.tuple.extract.nxv4i64.triscv.vector.tuple_nxv32i8_2t(target("riscv.vector.tuple", <vscale x 32 x i8>, 2) %0, i32 1)
- ret <vscale x 4 x i64> %1
+ ret target("riscv.vector.tuple", <vscale x 32 x i8>, 2) %0
}
-
-define <vscale x 4 x i64> @test_vlseg2_mask_nxv4i64_triscv.vector.tuple_nxv32i8_2t(ptr %base, i32 %vl, <vscale x 4 x i1> %mask) {
+define target("riscv.vector.tuple", <vscale x 32 x i8>, 2) @test_vlseg2_mask_nxv4i64_triscv.vector.tuple_nxv32i8_2t(ptr %base, i32 %vl, <vscale x 4 x i1> %mask) {
; CHECK-LABEL: test_vlseg2_mask_nxv4i64_triscv.vector.tuple_nxv32i8_2t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e64, m4, ta, ma
-; CHECK-NEXT: vlseg2e64.v v4, (a0), v0.t
+; CHECK-NEXT: vlseg2e64.v v8, (a0), v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 32 x i8>, 2) @llvm.riscv.vlseg2.mask.triscv.vector.tuple_nxv32i8_2t.nxv4i1(target("riscv.vector.tuple", <vscale x 32 x i8>, 2) undef, ptr %base, <vscale x 4 x i1> %mask, i32 %vl, i32 1, i32 6)
- %1 = call <vscale x 4 x i64> @llvm.riscv.tuple.extract.nxv4i64.triscv.vector.tuple_nxv32i8_2t(target("riscv.vector.tuple", <vscale x 32 x i8>, 2) %0, i32 1)
- ret <vscale x 4 x i64> %1
+ ret target("riscv.vector.tuple", <vscale x 32 x i8>, 2) %0
}
-
-declare target("riscv.vector.tuple", <vscale x 8 x i8>, 3) @llvm.riscv.vlseg3.mask.triscv.vector.tuple_nxv8i8_3t.nxv1i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 3), ptr, <vscale x 1 x i1>, i32, i32, i32)
-
-define <vscale x 1 x i64> @test_vlseg3_nxv1i64_triscv.vector.tuple_nxv8i8_3t(ptr %base, i32 %vl) {
+define target("riscv.vector.tuple", <vscale x 8 x i8>, 3) @test_vlseg3_nxv1i64_triscv.vector.tuple_nxv8i8_3t(ptr %base, i32 %vl) {
; CHECK-LABEL: test_vlseg3_nxv1i64_triscv.vector.tuple_nxv8i8_3t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, ma
-; CHECK-NEXT: vlseg3e64.v v7, (a0)
+; CHECK-NEXT: vlseg3e64.v v8, (a0)
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 3) @llvm.riscv.vlseg3.triscv.vector.tuple_nxv8i8_3t(target("riscv.vector.tuple", <vscale x 8 x i8>, 3) undef, ptr %base, i32 %vl, i32 6)
- %1 = call <vscale x 1 x i64> @llvm.riscv.tuple.extract.nxv1i64.triscv.vector.tuple_nxv8i8_3t(target("riscv.vector.tuple", <vscale x 8 x i8>, 3) %0, i32 1)
- ret <vscale x 1 x i64> %1
+ ret target("riscv.vector.tuple", <vscale x 8 x i8>, 3) %0
}
-
-define <vscale x 1 x i64> @test_vlseg3_mask_nxv1i64_triscv.vector.tuple_nxv8i8_3t(ptr %base, i32 %vl, <vscale x 1 x i1> %mask) {
+define target("riscv.vector.tuple", <vscale x 8 x i8>, 3) @test_vlseg3_mask_nxv1i64_triscv.vector.tuple_nxv8i8_3t(ptr %base, i32 %vl, <vscale x 1 x i1> %mask) {
; CHECK-LABEL: test_vlseg3_mask_nxv1i64_triscv.vector.tuple_nxv8i8_3t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, ma
-; CHECK-NEXT: vlseg3e64.v v7, (a0), v0.t
+; CHECK-NEXT: vlseg3e64.v v8, (a0), v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 3) @llvm.riscv.vlseg3.mask.triscv.vector.tuple_nxv8i8_3t.nxv1i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 3) undef, ptr %base, <vscale x 1 x i1> %mask, i32 %vl, i32 1, i32 6)
- %1 = call <vscale x 1 x i64> @llvm.riscv.tuple.extract.nxv1i64.triscv.vector.tuple_nxv8i8_3t(target("riscv.vector.tuple", <vscale x 8 x i8>, 3) %0, i32 1)
- ret <vscale x 1 x i64> %1
+ ret target("riscv.vector.tuple", <vscale x 8 x i8>, 3) %0
}
-
-declare target("riscv.vector.tuple", <vscale x 16 x i8>, 3) @llvm.riscv.vlseg3.mask.triscv.vector.tuple_nxv16i8_3t.nxv2i1(target("riscv.vector.tuple", <vscale x 16 x i8>, 3), ptr, <vscale x 2 x i1>, i32, i32, i32)
-
-define <vscale x 2 x i64> @test_vlseg3_nxv2i64_triscv.vector.tuple_nxv16i8_3t(ptr %base, i32 %vl) {
+define target("riscv.vector.tuple", <vscale x 16 x i8>, 3) @test_vlseg3_nxv2i64_triscv.vector.tuple_nxv16i8_3t(ptr %base, i32 %vl) {
; CHECK-LABEL: test_vlseg3_nxv2i64_triscv.vector.tuple_nxv16i8_3t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, ma
-; CHECK-NEXT: vlseg3e64.v v6, (a0)
+; CHECK-NEXT: vlseg3e64.v v8, (a0)
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 3) @llvm.riscv.vlseg3.triscv.vector.tuple_nxv16i8_3t(target("riscv.vector.tuple", <vscale x 16 x i8>, 3) undef, ptr %base, i32 %vl, i32 6)
- %1 = call <vscale x 2 x i64> @llvm.riscv.tuple.extract.nxv2i64.triscv.vector.tuple_nxv16i8_3t(target("riscv.vector.tuple", <vscale x 16 x i8>, 3) %0, i32 1)
- ret <vscale x 2 x i64> %1
+ ret target("riscv.vector.tuple", <vscale x 16 x i8>, 3) %0
}
-
-define <vscale x 2 x i64> @test_vlseg3_mask_nxv2i64_triscv.vector.tuple_nxv16i8_3t(ptr %base, i32 %vl, <vscale x 2 x i1> %mask) {
+define target("riscv.vector.tuple", <vscale x 16 x i8>, 3) @test_vlseg3_mask_nxv2i64_triscv.vector.tuple_nxv16i8_3t(ptr %base, i32 %vl, <vscale x 2 x i1> %mask) {
; CHECK-LABEL: test_vlseg3_mask_nxv2i64_triscv.vector.tuple_nxv16i8_3t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, ma
-; CHECK-NEXT: vlseg3e64.v v6, (a0), v0.t
+; CHECK-NEXT: vlseg3e64.v v8, (a0), v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 3) @llvm.riscv.vlseg3.mask.triscv.vector.tuple_nxv16i8_3t.nxv2i1(target("riscv.vector.tuple", <vscale x 16 x i8>, 3) undef, ptr %base, <vscale x 2 x i1> %mask, i32 %vl, i32 1, i32 6)
- %1 = call <vscale x 2 x i64> @llvm.riscv.tuple.extract.nxv2i64.triscv.vector.tuple_nxv16i8_3t(target("riscv.vector.tuple", <vscale x 16 x i8>, 3) %0, i32 1)
- ret <vscale x 2 x i64> %1
+ ret target("riscv.vector.tuple", <vscale x 16 x i8>, 3) %0
}
-
-declare target("riscv.vector.tuple", <vscale x 8 x i8>, 4) @llvm.riscv.vlseg4.mask.triscv.vector.tuple_nxv8i8_4t.nxv1i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 4), ptr, <vscale x 1 x i1>, i32, i32, i32)
-
-define <vscale x 1 x i64> @test_vlseg4_nxv1i64_triscv.vector.tuple_nxv8i8_4t(ptr %base, i32 %vl) {
+define target("riscv.vector.tuple", <vscale x 8 x i8>, 4) @test_vlseg4_nxv1i64_triscv.vector.tuple_nxv8i8_4t(ptr %base, i32 %vl) {
; CHECK-LABEL: test_vlseg4_nxv1i64_triscv.vector.tuple_nxv8i8_4t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, ma
-; CHECK-NEXT: vlseg4e64.v v7, (a0)
+; CHECK-NEXT: vlseg4e64.v v8, (a0)
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 4) @llvm.riscv.vlseg4.triscv.vector.tuple_nxv8i8_4t(target("riscv.vector.tuple", <vscale x 8 x i8>, 4) undef, ptr %base, i32 %vl, i32 6)
- %1 = call <vscale x 1 x i64> @llvm.riscv.tuple.extract.nxv1i64.triscv.vector.tuple_nxv8i8_4t(target("riscv.vector.tuple", <vscale x 8 x i8>, 4) %0, i32 1)
- ret <vscale x 1 x i64> %1
+ ret target("riscv.vector.tuple", <vscale x 8 x i8>, 4) %0
}
-
-define <vscale x 1 x i64> @test_vlseg4_mask_nxv1i64_triscv.vector.tuple_nxv8i8_4t(ptr %base, i32 %vl, <vscale x 1 x i1> %mask) {
+define target("riscv.vector.tuple", <vscale x 8 x i8>, 4) @test_vlseg4_mask_nxv1i64_triscv.vector.tuple_nxv8i8_4t(ptr %base, i32 %vl, <vscale x 1 x i1> %mask) {
; CHECK-LABEL: test_vlseg4_mask_nxv1i64_triscv.vector.tuple_nxv8i8_4t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, ma
-; CHECK-NEXT: vlseg4e64.v v7, (a0), v0.t
+; CHECK-NEXT: vlseg4e64.v v8, (a0), v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 4) @llvm.riscv.vlseg4.mask.triscv.vector.tuple_nxv8i8_4t.nxv1i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 4) undef, ptr %base, <vscale x 1 x i1> %mask, i32 %vl, i32 1, i32 6)
- %1 = call <vscale x 1 x i64> @llvm.riscv.tuple.extract.nxv1i64.triscv.vector.tuple_nxv8i8_4t(target("riscv.vector.tuple", <vscale x 8 x i8>, 4) %0, i32 1)
- ret <vscale x 1 x i64> %1
+ ret target("riscv.vector.tuple", <vscale x 8 x i8>, 4) %0
}
-
-declare target("riscv.vector.tuple", <vscale x 16 x i8>, 4) @llvm.riscv.vlseg4.mask.triscv.vector.tuple_nxv16i8_4t.nxv2i1(target("riscv.vector.tuple", <vscale x 16 x i8>, 4), ptr, <vscale x 2 x i1>, i32, i32, i32)
-
-define <vscale x 2 x i64> @test_vlseg4_nxv2i64_triscv.vector.tuple_nxv16i8_4t(ptr %base, i32 %vl) {
+define target("riscv.vector.tuple", <vscale x 16 x i8>, 4) @test_vlseg4_nxv2i64_triscv.vector.tuple_nxv16i8_4t(ptr %base, i32 %vl) {
; CHECK-LABEL: test_vlseg4_nxv2i64_triscv.vector.tuple_nxv16i8_4t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, ma
-; CHECK-NEXT: vlseg4e64.v v6, (a0)
+; CHECK-NEXT: vlseg4e64.v v8, (a0)
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 4) @llvm.riscv.vlseg4.triscv.vector.tuple_nxv16i8_4t(target("riscv.vector.tuple", <vscale x 16 x i8>, 4) undef, ptr %base, i32 %vl, i32 6)
- %1 = call <vscale x 2 x i64> @llvm.riscv.tuple.extract.nxv2i64.triscv.vector.tuple_nxv16i8_4t(target("riscv.vector.tuple", <vscale x 16 x i8>, 4) %0, i32 1)
- ret <vscale x 2 x i64> %1
+ ret target("riscv.vector.tuple", <vscale x 16 x i8>, 4) %0
}
-
-define <vscale x 2 x i64> @test_vlseg4_mask_nxv2i64_triscv.vector.tuple_nxv16i8_4t(ptr %base, i32 %vl, <vscale x 2 x i1> %mask) {
+define target("riscv.vector.tuple", <vscale x 16 x i8>, 4) @test_vlseg4_mask_nxv2i64_triscv.vector.tuple_nxv16i8_4t(ptr %base, i32 %vl, <vscale x 2 x i1> %mask) {
; CHECK-LABEL: test_vlseg4_mask_nxv2i64_triscv.vector.tuple_nxv16i8_4t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, ma
-; CHECK-NEXT: vlseg4e64.v v6, (a0), v0.t
+; CHECK-NEXT: vlseg4e64.v v8, (a0), v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 4) @llvm.riscv.vlseg4.mask.triscv.vector.tuple_nxv16i8_4t.nxv2i1(target("riscv.vector.tuple", <vscale x 16 x i8>, 4) undef, ptr %base, <vscale x 2 x i1> %mask, i32 %vl, i32 1, i32 6)
- %1 = call <vscale x 2 x i64> @llvm.riscv.tuple.extract.nxv2i64.triscv.vector.tuple_nxv16i8_4t(target("riscv.vector.tuple", <vscale x 16 x i8>, 4) %0, i32 1)
- ret <vscale x 2 x i64> %1
+ ret target("riscv.vector.tuple", <vscale x 16 x i8>, 4) %0
}
-
-declare target("riscv.vector.tuple", <vscale x 8 x i8>, 5) @llvm.riscv.vlseg5.mask.triscv.vector.tuple_nxv8i8_5t.nxv1i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 5), ptr, <vscale x 1 x i1>, i32, i32, i32)
-
-define <vscale x 1 x i64> @test_vlseg5_nxv1i64_triscv.vector.tuple_nxv8i8_5t(ptr %base, i32 %vl) {
+define target("riscv.vector.tuple", <vscale x 8 x i8>, 5) @test_vlseg5_nxv1i64_triscv.vector.tuple_nxv8i8_5t(ptr %base, i32 %vl) {
; CHECK-LABEL: test_vlseg5_nxv1i64_triscv.vector.tuple_nxv8i8_5t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, ma
-; CHECK-NEXT: vlseg5e64.v v7, (a0)
+; CHECK-NEXT: vlseg5e64.v v8, (a0)
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 5) @llvm.riscv.vlseg5.triscv.vector.tuple_nxv8i8_5t(target("riscv.vector.tuple", <vscale x 8 x i8>, 5) undef, ptr %base, i32 %vl, i32 6)
- %1 = call <vscale x 1 x i64> @llvm.riscv.tuple.extract.nxv1i64.triscv.vector.tuple_nxv8i8_5t(target("riscv.vector.tuple", <vscale x 8 x i8>, 5) %0, i32 1)
- ret <vscale x 1 x i64> %1
+ ret target("riscv.vector.tuple", <vscale x 8 x i8>, 5) %0
}
-
-define <vscale x 1 x i64> @test_vlseg5_mask_nxv1i64_triscv.vector.tuple_nxv8i8_5t(ptr %base, i32 %vl, <vscale x 1 x i1> %mask) {
+define target("riscv.vector.tuple", <vscale x 8 x i8>, 5) @test_vlseg5_mask_nxv1i64_triscv.vector.tuple_nxv8i8_5t(ptr %base, i32 %vl, <vscale x 1 x i1> %mask) {
; CHECK-LABEL: test_vlseg5_mask_nxv1i64_triscv.vector.tuple_nxv8i8_5t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, ma
-; CHECK-NEXT: vlseg5e64.v v7, (a0), v0.t
+; CHECK-NEXT: vlseg5e64.v v8, (a0), v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 5) @llvm.riscv.vlseg5.mask.triscv.vector.tuple_nxv8i8_5t.nxv1i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 5) undef, ptr %base, <vscale x 1 x i1> %mask, i32 %vl, i32 1, i32 6)
- %1 = call <vscale x 1 x i64> @llvm.riscv.tuple.extract.nxv1i64.triscv.vector.tuple_nxv8i8_5t(target("riscv.vector.tuple", <vscale x 8 x i8>, 5) %0, i32 1)
- ret <vscale x 1 x i64> %1
+ ret target("riscv.vector.tuple", <vscale x 8 x i8>, 5) %0
}
-
-declare target("riscv.vector.tuple", <vscale x 8 x i8>, 6) @llvm.riscv.vlseg6.mask.triscv.vector.tuple_nxv8i8_6t.nxv1i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 6), ptr, <vscale x 1 x i1>, i32, i32, i32)
-
-define <vscale x 1 x i64> @test_vlseg6_nxv1i64_triscv.vector.tuple_nxv8i8_6t(ptr %base, i32 %vl) {
+define target("riscv.vector.tuple", <vscale x 8 x i8>, 6) @test_vlseg6_nxv1i64_triscv.vector.tuple_nxv8i8_6t(ptr %base, i32 %vl) {
; CHECK-LABEL: test_vlseg6_nxv1i64_triscv.vector.tuple_nxv8i8_6t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, ma
-; CHECK-NEXT: vlseg6e64.v v7, (a0)
+; CHECK-NEXT: vlseg6e64.v v8, (a0)
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 6) @llvm.riscv.vlseg6.triscv.vector.tuple_nxv8i8_6t(target("riscv.vector.tuple", <vscale x 8 x i8>, 6) undef, ptr %base, i32 %vl, i32 6)
- %1 = call <vscale x 1 x i64> @llvm.riscv.tuple.extract.nxv1i64.triscv.vector.tuple_nxv8i8_6t(target("riscv.vector.tuple", <vscale x 8 x i8>, 6) %0, i32 1)
- ret <vscale x 1 x i64> %1
+ ret target("riscv.vector.tuple", <vscale x 8 x i8>, 6) %0
}
-
-define <vscale x 1 x i64> @test_vlseg6_mask_nxv1i64_triscv.vector.tuple_nxv8i8_6t(ptr %base, i32 %vl, <vscale x 1 x i1> %mask) {
+define target("riscv.vector.tuple", <vscale x 8 x i8>, 6) @test_vlseg6_mask_nxv1i64_triscv.vector.tuple_nxv8i8_6t(ptr %base, i32 %vl, <vscale x 1 x i1> %mask) {
; CHECK-LABEL: test_vlseg6_mask_nxv1i64_triscv.vector.tuple_nxv8i8_6t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, ma
-; CHECK-NEXT: vlseg6e64.v v7, (a0), v0.t
+; CHECK-NEXT: vlseg6e64.v v8, (a0), v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 6) @llvm.riscv.vlseg6.mask.triscv.vector.tuple_nxv8i8_6t.nxv1i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 6) undef, ptr %base, <vscale x 1 x i1> %mask, i32 %vl, i32 1, i32 6)
- %1 = call <vscale x 1 x i64> @llvm.riscv.tuple.extract.nxv1i64.triscv.vector.tuple_nxv8i8_6t(target("riscv.vector.tuple", <vscale x 8 x i8>, 6) %0, i32 1)
- ret <vscale x 1 x i64> %1
+ ret target("riscv.vector.tuple", <vscale x 8 x i8>, 6) %0
}
-
-declare target("riscv.vector.tuple", <vscale x 8 x i8>, 7) @llvm.riscv.vlseg7.mask.triscv.vector.tuple_nxv8i8_7t.nxv1i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 7), ptr, <vscale x 1 x i1>, i32, i32, i32)
-
-define <vscale x 1 x i64> @test_vlseg7_nxv1i64_triscv.vector.tuple_nxv8i8_7t(ptr %base, i32 %vl) {
+define target("riscv.vector.tuple", <vscale x 8 x i8>, 7) @test_vlseg7_nxv1i64_triscv.vector.tuple_nxv8i8_7t(ptr %base, i32 %vl) {
; CHECK-LABEL: test_vlseg7_nxv1i64_triscv.vector.tuple_nxv8i8_7t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, ma
-; CHECK-NEXT: vlseg7e64.v v7, (a0)
+; CHECK-NEXT: vlseg7e64.v v8, (a0)
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 7) @llvm.riscv.vlseg7.triscv.vector.tuple_nxv8i8_7t(target("riscv.vector.tuple", <vscale x 8 x i8>, 7) undef, ptr %base, i32 %vl, i32 6)
- %1 = call <vscale x 1 x i64> @llvm.riscv.tuple.extract.nxv1i64.triscv.vector.tuple_nxv8i8_7t(target("riscv.vector.tuple", <vscale x 8 x i8>, 7) %0, i32 1)
- ret <vscale x 1 x i64> %1
+ ret target("riscv.vector.tuple", <vscale x 8 x i8>, 7) %0
}
-
-define <vscale x 1 x i64> @test_vlseg7_mask_nxv1i64_triscv.vector.tuple_nxv8i8_7t(ptr %base, i32 %vl, <vscale x 1 x i1> %mask) {
+define target("riscv.vector.tuple", <vscale x 8 x i8>, 7) @test_vlseg7_mask_nxv1i64_triscv.vector.tuple_nxv8i8_7t(ptr %base, i32 %vl, <vscale x 1 x i1> %mask) {
; CHECK-LABEL: test_vlseg7_mask_nxv1i64_triscv.vector.tuple_nxv8i8_7t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, ma
-; CHECK-NEXT: vlseg7e64.v v7, (a0), v0.t
+; CHECK-NEXT: vlseg7e64.v v8, (a0), v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 7) @llvm.riscv.vlseg7.mask.triscv.vector.tuple_nxv8i8_7t.nxv1i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 7) undef, ptr %base, <vscale x 1 x i1> %mask, i32 %vl, i32 1, i32 6)
- %1 = call <vscale x 1 x i64> @llvm.riscv.tuple.extract.nxv1i64.triscv.vector.tuple_nxv8i8_7t(target("riscv.vector.tuple", <vscale x 8 x i8>, 7) %0, i32 1)
- ret <vscale x 1 x i64> %1
+ ret target("riscv.vector.tuple", <vscale x 8 x i8>, 7) %0
}
-
-declare target("riscv.vector.tuple", <vscale x 8 x i8>, 8) @llvm.riscv.vlseg8.mask.triscv.vector.tuple_nxv8i8_8t.nxv1i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 8), ptr, <vscale x 1 x i1>, i32, i32, i32)
-
-define <vscale x 1 x i64> @test_vlseg8_nxv1i64_triscv.vector.tuple_nxv8i8_8t(ptr %base, i32 %vl) {
+define target("riscv.vector.tuple", <vscale x 8 x i8>, 8) @test_vlseg8_nxv1i64_triscv.vector.tuple_nxv8i8_8t(ptr %base, i32 %vl) {
; CHECK-LABEL: test_vlseg8_nxv1i64_triscv.vector.tuple_nxv8i8_8t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, ma
-; CHECK-NEXT: vlseg8e64.v v7, (a0)
+; CHECK-NEXT: vlseg8e64.v v8, (a0)
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 8) @llvm.riscv.vlseg8.triscv.vector.tuple_nxv8i8_8t(target("riscv.vector.tuple", <vscale x 8 x i8>, 8) undef, ptr %base, i32 %vl, i32 6)
- %1 = call <vscale x 1 x i64> @llvm.riscv.tuple.extract.nxv1i64.triscv.vector.tuple_nxv8i8_8t(target("riscv.vector.tuple", <vscale x 8 x i8>, 8) %0, i32 1)
- ret <vscale x 1 x i64> %1
+ ret target("riscv.vector.tuple", <vscale x 8 x i8>, 8) %0
}
-
-define <vscale x 1 x i64> @test_vlseg8_mask_nxv1i64_triscv.vector.tuple_nxv8i8_8t(ptr %base, i32 %vl, <vscale x 1 x i1> %mask) {
+define target("riscv.vector.tuple", <vscale x 8 x i8>, 8) @test_vlseg8_mask_nxv1i64_triscv.vector.tuple_nxv8i8_8t(ptr %base, i32 %vl, <vscale x 1 x i1> %mask) {
; CHECK-LABEL: test_vlseg8_mask_nxv1i64_triscv.vector.tuple_nxv8i8_8t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, ma
-; CHECK-NEXT: vlseg8e64.v v7, (a0), v0.t
+; CHECK-NEXT: vlseg8e64.v v8, (a0), v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 8) @llvm.riscv.vlseg8.mask.triscv.vector.tuple_nxv8i8_8t.nxv1i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 8) undef, ptr %base, <vscale x 1 x i1> %mask, i32 %vl, i32 1, i32 6)
- %1 = call <vscale x 1 x i64> @llvm.riscv.tuple.extract.nxv1i64.triscv.vector.tuple_nxv8i8_8t(target("riscv.vector.tuple", <vscale x 8 x i8>, 8) %0, i32 1)
- ret <vscale x 1 x i64> %1
+ ret target("riscv.vector.tuple", <vscale x 8 x i8>, 8) %0
}
-
-
-define <vscale x 1 x half> @test_vlseg2_nxv1f16_triscv.vector.tuple_nxv2i8_2t(ptr %base, i32 %vl) {
+define target("riscv.vector.tuple", <vscale x 2 x i8>, 2) @test_vlseg2_nxv1f16_triscv.vector.tuple_nxv2i8_2t(ptr %base, i32 %vl) {
; CHECK-LABEL: test_vlseg2_nxv1f16_triscv.vector.tuple_nxv2i8_2t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma
-; CHECK-NEXT: vlseg2e16.v v7, (a0)
+; CHECK-NEXT: vlseg2e16.v v8, (a0)
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 2 x i8>, 2) @llvm.riscv.vlseg2.triscv.vector.tuple_nxv2i8_2t(target("riscv.vector.tuple", <vscale x 2 x i8>, 2) undef, ptr %base, i32 %vl, i32 4)
- %1 = call <vscale x 1 x half> @llvm.riscv.tuple.extract.nxv1f16.triscv.vector.tuple_nxv2i8_2t(target("riscv.vector.tuple", <vscale x 2 x i8>, 2) %0, i32 1)
- ret <vscale x 1 x half> %1
+ ret target("riscv.vector.tuple", <vscale x 2 x i8>, 2) %0
}
-
-define <vscale x 1 x half> @test_vlseg2_mask_nxv1f16_triscv.vector.tuple_nxv2i8_2t(ptr %base, i32 %vl, <vscale x 1 x i1> %mask) {
+define target("riscv.vector.tuple", <vscale x 2 x i8>, 2) @test_vlseg2_mask_nxv1f16_triscv.vector.tuple_nxv2i8_2t(ptr %base, i32 %vl, <vscale x 1 x i1> %mask) {
; CHECK-LABEL: test_vlseg2_mask_nxv1f16_triscv.vector.tuple_nxv2i8_2t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma
-; CHECK-NEXT: vlseg2e16.v v7, (a0), v0.t
+; CHECK-NEXT: vlseg2e16.v v8, (a0), v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 2 x i8>, 2) @llvm.riscv.vlseg2.mask.triscv.vector.tuple_nxv2i8_2t.nxv1i1(target("riscv.vector.tuple", <vscale x 2 x i8>, 2) undef, ptr %base, <vscale x 1 x i1> %mask, i32 %vl, i32 1, i32 4)
- %1 = call <vscale x 1 x half> @llvm.riscv.tuple.extract.nxv1f16.triscv.vector.tuple_nxv2i8_2t(target("riscv.vector.tuple", <vscale x 2 x i8>, 2) %0, i32 1)
- ret <vscale x 1 x half> %1
+ ret target("riscv.vector.tuple", <vscale x 2 x i8>, 2) %0
}
-
-
-define <vscale x 2 x half> @test_vlseg2_nxv2f16_triscv.vector.tuple_nxv4i8_2t(ptr %base, i32 %vl) {
+define target("riscv.vector.tuple", <vscale x 4 x i8>, 2) @test_vlseg2_nxv2f16_triscv.vector.tuple_nxv4i8_2t(ptr %base, i32 %vl) {
; CHECK-LABEL: test_vlseg2_nxv2f16_triscv.vector.tuple_nxv4i8_2t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma
-; CHECK-NEXT: vlseg2e16.v v7, (a0)
+; CHECK-NEXT: vlseg2e16.v v8, (a0)
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 2) @llvm.riscv.vlseg2.triscv.vector.tuple_nxv4i8_2t(target("riscv.vector.tuple", <vscale x 4 x i8>, 2) undef, ptr %base, i32 %vl, i32 4)
- %1 = call <vscale x 2 x half> @llvm.riscv.tuple.extract.nxv2f16.triscv.vector.tuple_nxv4i8_2t(target("riscv.vector.tuple", <vscale x 4 x i8>, 2) %0, i32 1)
- ret <vscale x 2 x half> %1
+ ret target("riscv.vector.tuple", <vscale x 4 x i8>, 2) %0
}
-
-define <vscale x 2 x half> @test_vlseg2_mask_nxv2f16_triscv.vector.tuple_nxv4i8_2t(ptr %base, i32 %vl, <vscale x 2 x i1> %mask) {
+define target("riscv.vector.tuple", <vscale x 4 x i8>, 2) @test_vlseg2_mask_nxv2f16_triscv.vector.tuple_nxv4i8_2t(ptr %base, i32 %vl, <vscale x 2 x i1> %mask) {
; CHECK-LABEL: test_vlseg2_mask_nxv2f16_triscv.vector.tuple_nxv4i8_2t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma
-; CHECK-NEXT: vlseg2e16.v v7, (a0), v0.t
+; CHECK-NEXT: vlseg2e16.v v8, (a0), v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 2) @llvm.riscv.vlseg2.mask.triscv.vector.tuple_nxv4i8_2t.nxv2i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 2) undef, ptr %base, <vscale x 2 x i1> %mask, i32 %vl, i32 1, i32 4)
- %1 = call <vscale x 2 x half> @llvm.riscv.tuple.extract.nxv2f16.triscv.vector.tuple_nxv4i8_2t(target("riscv.vector.tuple", <vscale x 4 x i8>, 2) %0, i32 1)
- ret <vscale x 2 x half> %1
+ ret target("riscv.vector.tuple", <vscale x 4 x i8>, 2) %0
}
-
-
-define <vscale x 4 x half> @test_vlseg2_nxv4f16_triscv.vector.tuple_nxv8i8_2t(ptr %base, i32 %vl) {
+define target("riscv.vector.tuple", <vscale x 8 x i8>, 2) @test_vlseg2_nxv4f16_triscv.vector.tuple_nxv8i8_2t(ptr %base, i32 %vl) {
; CHECK-LABEL: test_vlseg2_nxv4f16_triscv.vector.tuple_nxv8i8_2t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma
-; CHECK-NEXT: vlseg2e16.v v7, (a0)
+; CHECK-NEXT: vlseg2e16.v v8, (a0)
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 2) @llvm.riscv.vlseg2.triscv.vector.tuple_nxv8i8_2t(target("riscv.vector.tuple", <vscale x 8 x i8>, 2) undef, ptr %base, i32 %vl, i32 4)
- %1 = call <vscale x 4 x half> @llvm.riscv.tuple.extract.nxv4f16.triscv.vector.tuple_nxv8i8_2t(target("riscv.vector.tuple", <vscale x 8 x i8>, 2) %0, i32 1)
- ret <vscale x 4 x half> %1
+ ret target("riscv.vector.tuple", <vscale x 8 x i8>, 2) %0
}
-
-define <vscale x 4 x half> @test_vlseg2_mask_nxv4f16_triscv.vector.tuple_nxv8i8_2t(ptr %base, i32 %vl, <vscale x 4 x i1> %mask) {
+define target("riscv.vector.tuple", <vscale x 8 x i8>, 2) @test_vlseg2_mask_nxv4f16_triscv.vector.tuple_nxv8i8_2t(ptr %base, i32 %vl, <vscale x 4 x i1> %mask) {
; CHECK-LABEL: test_vlseg2_mask_nxv4f16_triscv.vector.tuple_nxv8i8_2t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma
-; CHECK-NEXT: vlseg2e16.v v7, (a0), v0.t
+; CHECK-NEXT: vlseg2e16.v v8, (a0), v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 2) @llvm.riscv.vlseg2.mask.triscv.vector.tuple_nxv8i8_2t.nxv4i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 2) undef, ptr %base, <vscale x 4 x i1> %mask, i32 %vl, i32 1, i32 4)
- %1 = call <vscale x 4 x half> @llvm.riscv.tuple.extract.nxv4f16.triscv.vector.tuple_nxv8i8_2t(target("riscv.vector.tuple", <vscale x 8 x i8>, 2) %0, i32 1)
- ret <vscale x 4 x half> %1
+ ret target("riscv.vector.tuple", <vscale x 8 x i8>, 2) %0
}
-
-
-define <vscale x 8 x half> @test_vlseg2_nxv8f16_triscv.vector.tuple_nxv16i8_2t(ptr %base, i32 %vl) {
+define target("riscv.vector.tuple", <vscale x 16 x i8>, 2) @test_vlseg2_nxv8f16_triscv.vector.tuple_nxv16i8_2t(ptr %base, i32 %vl) {
; CHECK-LABEL: test_vlseg2_nxv8f16_triscv.vector.tuple_nxv16i8_2t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, ma
-; CHECK-NEXT: vlseg2e16.v v6, (a0)
+; CHECK-NEXT: vlseg2e16.v v8, (a0)
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 2) @llvm.riscv.vlseg2.triscv.vector.tuple_nxv16i8_2t(target("riscv.vector.tuple", <vscale x 16 x i8>, 2) undef, ptr %base, i32 %vl, i32 4)
- %1 = call <vscale x 8 x half> @llvm.riscv.tuple.extract.nxv8f16.triscv.vector.tuple_nxv16i8_2t(target("riscv.vector.tuple", <vscale x 16 x i8>, 2) %0, i32 1)
- ret <vscale x 8 x half> %1
+ ret target("riscv.vector.tuple", <vscale x 16 x i8>, 2) %0
}
-
-define <vscale x 8 x half> @test_vlseg2_mask_nxv8f16_triscv.vector.tuple_nxv16i8_2t(ptr %base, i32 %vl, <vscale x 8 x i1> %mask) {
+define target("riscv.vector.tuple", <vscale x 16 x i8>, 2) @test_vlseg2_mask_nxv8f16_triscv.vector.tuple_nxv16i8_2t(ptr %base, i32 %vl, <vscale x 8 x i1> %mask) {
; CHECK-LABEL: test_vlseg2_mask_nxv8f16_triscv.vector.tuple_nxv16i8_2t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, ma
-; CHECK-NEXT: vlseg2e16.v v6, (a0), v0.t
+; CHECK-NEXT: vlseg2e16.v v8, (a0), v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 2) @llvm.riscv.vlseg2.mask.triscv.vector.tuple_nxv16i8_2t.nxv8i1(target("riscv.vector.tuple", <vscale x 16 x i8>, 2) undef, ptr %base, <vscale x 8 x i1> %mask, i32 %vl, i32 1, i32 4)
- %1 = call <vscale x 8 x half> @llvm.riscv.tuple.extract.nxv8f16.triscv.vector.tuple_nxv16i8_2t(target("riscv.vector.tuple", <vscale x 16 x i8>, 2) %0, i32 1)
- ret <vscale x 8 x half> %1
+ ret target("riscv.vector.tuple", <vscale x 16 x i8>, 2) %0
}
-
-
-define <vscale x 16 x half> @test_vlseg2_nxv16f16_triscv.vector.tuple_nxv32i8_2t(ptr %base, i32 %vl) {
+define target("riscv.vector.tuple", <vscale x 32 x i8>, 2) @test_vlseg2_nxv16f16_triscv.vector.tuple_nxv32i8_2t(ptr %base, i32 %vl) {
; CHECK-LABEL: test_vlseg2_nxv16f16_triscv.vector.tuple_nxv32i8_2t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma
-; CHECK-NEXT: vlseg2e16.v v4, (a0)
+; CHECK-NEXT: vlseg2e16.v v8, (a0)
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 32 x i8>, 2) @llvm.riscv.vlseg2.triscv.vector.tuple_nxv32i8_2t(target("riscv.vector.tuple", <vscale x 32 x i8>, 2) undef, ptr %base, i32 %vl, i32 4)
- %1 = call <vscale x 16 x half> @llvm.riscv.tuple.extract.nxv16f16.triscv.vector.tuple_nxv32i8_2t(target("riscv.vector.tuple", <vscale x 32 x i8>, 2) %0, i32 1)
- ret <vscale x 16 x half> %1
+ ret target("riscv.vector.tuple", <vscale x 32 x i8>, 2) %0
}
-
-define <vscale x 16 x half> @test_vlseg2_mask_nxv16f16_triscv.vector.tuple_nxv32i8_2t(ptr %base, i32 %vl, <vscale x 16 x i1> %mask) {
+define target("riscv.vector.tuple", <vscale x 32 x i8>, 2) @test_vlseg2_mask_nxv16f16_triscv.vector.tuple_nxv32i8_2t(ptr %base, i32 %vl, <vscale x 16 x i1> %mask) {
; CHECK-LABEL: test_vlseg2_mask_nxv16f16_triscv.vector.tuple_nxv32i8_2t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma
-; CHECK-NEXT: vlseg2e16.v v4, (a0), v0.t
+; CHECK-NEXT: vlseg2e16.v v8, (a0), v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 32 x i8>, 2) @llvm.riscv.vlseg2.mask.triscv.vector.tuple_nxv32i8_2t.nxv16i1(target("riscv.vector.tuple", <vscale x 32 x i8>, 2) undef, ptr %base, <vscale x 16 x i1> %mask, i32 %vl, i32 1, i32 4)
- %1 = call <vscale x 16 x half> @llvm.riscv.tuple.extract.nxv16f16.triscv.vector.tuple_nxv32i8_2t(target("riscv.vector.tuple", <vscale x 32 x i8>, 2) %0, i32 1)
- ret <vscale x 16 x half> %1
+ ret target("riscv.vector.tuple", <vscale x 32 x i8>, 2) %0
}
-
-
-define <vscale x 1 x half> @test_vlseg3_nxv1f16_triscv.vector.tuple_nxv2i8_3t(ptr %base, i32 %vl) {
+define target("riscv.vector.tuple", <vscale x 2 x i8>, 3) @test_vlseg3_nxv1f16_triscv.vector.tuple_nxv2i8_3t(ptr %base, i32 %vl) {
; CHECK-LABEL: test_vlseg3_nxv1f16_triscv.vector.tuple_nxv2i8_3t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma
-; CHECK-NEXT: vlseg3e16.v v7, (a0)
+; CHECK-NEXT: vlseg3e16.v v8, (a0)
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 2 x i8>, 3) @llvm.riscv.vlseg3.triscv.vector.tuple_nxv2i8_3t(target("riscv.vector.tuple", <vscale x 2 x i8>, 3) undef, ptr %base, i32 %vl, i32 4)
- %1 = call <vscale x 1 x half> @llvm.riscv.tuple.extract.nxv1f16.triscv.vector.tuple_nxv2i8_3t(target("riscv.vector.tuple", <vscale x 2 x i8>, 3) %0, i32 1)
- ret <vscale x 1 x half> %1
+ ret target("riscv.vector.tuple", <vscale x 2 x i8>, 3) %0
}
-
-define <vscale x 1 x half> @test_vlseg3_mask_nxv1f16_triscv.vector.tuple_nxv2i8_3t(ptr %base, i32 %vl, <vscale x 1 x i1> %mask) {
+define target("riscv.vector.tuple", <vscale x 2 x i8>, 3) @test_vlseg3_mask_nxv1f16_triscv.vector.tuple_nxv2i8_3t(ptr %base, i32 %vl, <vscale x 1 x i1> %mask) {
; CHECK-LABEL: test_vlseg3_mask_nxv1f16_triscv.vector.tuple_nxv2i8_3t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma
-; CHECK-NEXT: vlseg3e16.v v7, (a0), v0.t
+; CHECK-NEXT: vlseg3e16.v v8, (a0), v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 2 x i8>, 3) @llvm.riscv.vlseg3.mask.triscv.vector.tuple_nxv2i8_3t.nxv1i1(target("riscv.vector.tuple", <vscale x 2 x i8>, 3) undef, ptr %base, <vscale x 1 x i1> %mask, i32 %vl, i32 1, i32 4)
- %1 = call <vscale x 1 x half> @llvm.riscv.tuple.extract.nxv1f16.triscv.vector.tuple_nxv2i8_3t(target("riscv.vector.tuple", <vscale x 2 x i8>, 3) %0, i32 1)
- ret <vscale x 1 x half> %1
+ ret target("riscv.vector.tuple", <vscale x 2 x i8>, 3) %0
}
-
-
-define <vscale x 2 x half> @test_vlseg3_nxv2f16_triscv.vector.tuple_nxv4i8_3t(ptr %base, i32 %vl) {
+define target("riscv.vector.tuple", <vscale x 4 x i8>, 3) @test_vlseg3_nxv2f16_triscv.vector.tuple_nxv4i8_3t(ptr %base, i32 %vl) {
; CHECK-LABEL: test_vlseg3_nxv2f16_triscv.vector.tuple_nxv4i8_3t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma
-; CHECK-NEXT: vlseg3e16.v v7, (a0)
+; CHECK-NEXT: vlseg3e16.v v8, (a0)
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 3) @llvm.riscv.vlseg3.triscv.vector.tuple_nxv4i8_3t(target("riscv.vector.tuple", <vscale x 4 x i8>, 3) undef, ptr %base, i32 %vl, i32 4)
- %1 = call <vscale x 2 x half> @llvm.riscv.tuple.extract.nxv2f16.triscv.vector.tuple_nxv4i8_3t(target("riscv.vector.tuple", <vscale x 4 x i8>, 3) %0, i32 1)
- ret <vscale x 2 x half> %1
+ ret target("riscv.vector.tuple", <vscale x 4 x i8>, 3) %0
}
-
-define <vscale x 2 x half> @test_vlseg3_mask_nxv2f16_triscv.vector.tuple_nxv4i8_3t(ptr %base, i32 %vl, <vscale x 2 x i1> %mask) {
+define target("riscv.vector.tuple", <vscale x 4 x i8>, 3) @test_vlseg3_mask_nxv2f16_triscv.vector.tuple_nxv4i8_3t(ptr %base, i32 %vl, <vscale x 2 x i1> %mask) {
; CHECK-LABEL: test_vlseg3_mask_nxv2f16_triscv.vector.tuple_nxv4i8_3t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma
-; CHECK-NEXT: vlseg3e16.v v7, (a0), v0.t
+; CHECK-NEXT: vlseg3e16.v v8, (a0), v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 3) @llvm.riscv.vlseg3.mask.triscv.vector.tuple_nxv4i8_3t.nxv2i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 3) undef, ptr %base, <vscale x 2 x i1> %mask, i32 %vl, i32 1, i32 4)
- %1 = call <vscale x 2 x half> @llvm.riscv.tuple.extract.nxv2f16.triscv.vector.tuple_nxv4i8_3t(target("riscv.vector.tuple", <vscale x 4 x i8>, 3) %0, i32 1)
- ret <vscale x 2 x half> %1
+ ret target("riscv.vector.tuple", <vscale x 4 x i8>, 3) %0
}
-
-
-define <vscale x 4 x half> @test_vlseg3_nxv4f16_triscv.vector.tuple_nxv8i8_3t(ptr %base, i32 %vl) {
+define target("riscv.vector.tuple", <vscale x 8 x i8>, 3) @test_vlseg3_nxv4f16_triscv.vector.tuple_nxv8i8_3t(ptr %base, i32 %vl) {
; CHECK-LABEL: test_vlseg3_nxv4f16_triscv.vector.tuple_nxv8i8_3t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma
-; CHECK-NEXT: vlseg3e16.v v7, (a0)
+; CHECK-NEXT: vlseg3e16.v v8, (a0)
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 3) @llvm.riscv.vlseg3.triscv.vector.tuple_nxv8i8_3t(target("riscv.vector.tuple", <vscale x 8 x i8>, 3) undef, ptr %base, i32 %vl, i32 4)
- %1 = call <vscale x 4 x half> @llvm.riscv.tuple.extract.nxv4f16.triscv.vector.tuple_nxv8i8_3t(target("riscv.vector.tuple", <vscale x 8 x i8>, 3) %0, i32 1)
- ret <vscale x 4 x half> %1
+ ret target("riscv.vector.tuple", <vscale x 8 x i8>, 3) %0
}
-
-define <vscale x 4 x half> @test_vlseg3_mask_nxv4f16_triscv.vector.tuple_nxv8i8_3t(ptr %base, i32 %vl, <vscale x 4 x i1> %mask) {
+define target("riscv.vector.tuple", <vscale x 8 x i8>, 3) @test_vlseg3_mask_nxv4f16_triscv.vector.tuple_nxv8i8_3t(ptr %base, i32 %vl, <vscale x 4 x i1> %mask) {
; CHECK-LABEL: test_vlseg3_mask_nxv4f16_triscv.vector.tuple_nxv8i8_3t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma
-; CHECK-NEXT: vlseg3e16.v v7, (a0), v0.t
+; CHECK-NEXT: vlseg3e16.v v8, (a0), v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 3) @llvm.riscv.vlseg3.mask.triscv.vector.tuple_nxv8i8_3t.nxv4i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 3) undef, ptr %base, <vscale x 4 x i1> %mask, i32 %vl, i32 1, i32 4)
- %1 = call <vscale x 4 x half> @llvm.riscv.tuple.extract.nxv4f16.triscv.vector.tuple_nxv8i8_3t(target("riscv.vector.tuple", <vscale x 8 x i8>, 3) %0, i32 1)
- ret <vscale x 4 x half> %1
+ ret target("riscv.vector.tuple", <vscale x 8 x i8>, 3) %0
}
-
-
-define <vscale x 8 x half> @test_vlseg3_nxv8f16_triscv.vector.tuple_nxv16i8_3t(ptr %base, i32 %vl) {
+define target("riscv.vector.tuple", <vscale x 16 x i8>, 3) @test_vlseg3_nxv8f16_triscv.vector.tuple_nxv16i8_3t(ptr %base, i32 %vl) {
; CHECK-LABEL: test_vlseg3_nxv8f16_triscv.vector.tuple_nxv16i8_3t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, ma
-; CHECK-NEXT: vlseg3e16.v v6, (a0)
+; CHECK-NEXT: vlseg3e16.v v8, (a0)
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 3) @llvm.riscv.vlseg3.triscv.vector.tuple_nxv16i8_3t(target("riscv.vector.tuple", <vscale x 16 x i8>, 3) undef, ptr %base, i32 %vl, i32 4)
- %1 = call <vscale x 8 x half> @llvm.riscv.tuple.extract.nxv8f16.triscv.vector.tuple_nxv16i8_3t(target("riscv.vector.tuple", <vscale x 16 x i8>, 3) %0, i32 1)
- ret <vscale x 8 x half> %1
+ ret target("riscv.vector.tuple", <vscale x 16 x i8>, 3) %0
}
-
-define <vscale x 8 x half> @test_vlseg3_mask_nxv8f16_triscv.vector.tuple_nxv16i8_3t(ptr %base, i32 %vl, <vscale x 8 x i1> %mask) {
+define target("riscv.vector.tuple", <vscale x 16 x i8>, 3) @test_vlseg3_mask_nxv8f16_triscv.vector.tuple_nxv16i8_3t(ptr %base, i32 %vl, <vscale x 8 x i1> %mask) {
; CHECK-LABEL: test_vlseg3_mask_nxv8f16_triscv.vector.tuple_nxv16i8_3t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, ma
-; CHECK-NEXT: vlseg3e16.v v6, (a0), v0.t
+; CHECK-NEXT: vlseg3e16.v v8, (a0), v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 3) @llvm.riscv.vlseg3.mask.triscv.vector.tuple_nxv16i8_3t.nxv8i1(target("riscv.vector.tuple", <vscale x 16 x i8>, 3) undef, ptr %base, <vscale x 8 x i1> %mask, i32 %vl, i32 1, i32 4)
- %1 = call <vscale x 8 x half> @llvm.riscv.tuple.extract.nxv8f16.triscv.vector.tuple_nxv16i8_3t(target("riscv.vector.tuple", <vscale x 16 x i8>, 3) %0, i32 1)
- ret <vscale x 8 x half> %1
+ ret target("riscv.vector.tuple", <vscale x 16 x i8>, 3) %0
}
-
-
-define <vscale x 1 x half> @test_vlseg4_nxv1f16_triscv.vector.tuple_nxv2i8_4t(ptr %base, i32 %vl) {
+define target("riscv.vector.tuple", <vscale x 2 x i8>, 4) @test_vlseg4_nxv1f16_triscv.vector.tuple_nxv2i8_4t(ptr %base, i32 %vl) {
; CHECK-LABEL: test_vlseg4_nxv1f16_triscv.vector.tuple_nxv2i8_4t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma
-; CHECK-NEXT: vlseg4e16.v v7, (a0)
+; CHECK-NEXT: vlseg4e16.v v8, (a0)
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 2 x i8>, 4) @llvm.riscv.vlseg4.triscv.vector.tuple_nxv2i8_4t(target("riscv.vector.tuple", <vscale x 2 x i8>, 4) undef, ptr %base, i32 %vl, i32 4)
- %1 = call <vscale x 1 x half> @llvm.riscv.tuple.extract.nxv1f16.triscv.vector.tuple_nxv2i8_4t(target("riscv.vector.tuple", <vscale x 2 x i8>, 4) %0, i32 1)
- ret <vscale x 1 x half> %1
+ ret target("riscv.vector.tuple", <vscale x 2 x i8>, 4) %0
}
-
-define <vscale x 1 x half> @test_vlseg4_mask_nxv1f16_triscv.vector.tuple_nxv2i8_4t(ptr %base, i32 %vl, <vscale x 1 x i1> %mask) {
+define target("riscv.vector.tuple", <vscale x 2 x i8>, 4) @test_vlseg4_mask_nxv1f16_triscv.vector.tuple_nxv2i8_4t(ptr %base, i32 %vl, <vscale x 1 x i1> %mask) {
; CHECK-LABEL: test_vlseg4_mask_nxv1f16_triscv.vector.tuple_nxv2i8_4t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma
-; CHECK-NEXT: vlseg4e16.v v7, (a0), v0.t
+; CHECK-NEXT: vlseg4e16.v v8, (a0), v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 2 x i8>, 4) @llvm.riscv.vlseg4.mask.triscv.vector.tuple_nxv2i8_4t.nxv1i1(target("riscv.vector.tuple", <vscale x 2 x i8>, 4) undef, ptr %base, <vscale x 1 x i1> %mask, i32 %vl, i32 1, i32 4)
- %1 = call <vscale x 1 x half> @llvm.riscv.tuple.extract.nxv1f16.triscv.vector.tuple_nxv2i8_4t(target("riscv.vector.tuple", <vscale x 2 x i8>, 4) %0, i32 1)
- ret <vscale x 1 x half> %1
+ ret target("riscv.vector.tuple", <vscale x 2 x i8>, 4) %0
}
-
-
-define <vscale x 2 x half> @test_vlseg4_nxv2f16_triscv.vector.tuple_nxv4i8_4t(ptr %base, i32 %vl) {
+define target("riscv.vector.tuple", <vscale x 4 x i8>, 4) @test_vlseg4_nxv2f16_triscv.vector.tuple_nxv4i8_4t(ptr %base, i32 %vl) {
; CHECK-LABEL: test_vlseg4_nxv2f16_triscv.vector.tuple_nxv4i8_4t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma
-; CHECK-NEXT: vlseg4e16.v v7, (a0)
+; CHECK-NEXT: vlseg4e16.v v8, (a0)
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 4) @llvm.riscv.vlseg4.triscv.vector.tuple_nxv4i8_4t(target("riscv.vector.tuple", <vscale x 4 x i8>, 4) undef, ptr %base, i32 %vl, i32 4)
- %1 = call <vscale x 2 x half> @llvm.riscv.tuple.extract.nxv2f16.triscv.vector.tuple_nxv4i8_4t(target("riscv.vector.tuple", <vscale x 4 x i8>, 4) %0, i32 1)
- ret <vscale x 2 x half> %1
+ ret target("riscv.vector.tuple", <vscale x 4 x i8>, 4) %0
}
-
-define <vscale x 2 x half> @test_vlseg4_mask_nxv2f16_triscv.vector.tuple_nxv4i8_4t(ptr %base, i32 %vl, <vscale x 2 x i1> %mask) {
+define target("riscv.vector.tuple", <vscale x 4 x i8>, 4) @test_vlseg4_mask_nxv2f16_triscv.vector.tuple_nxv4i8_4t(ptr %base, i32 %vl, <vscale x 2 x i1> %mask) {
; CHECK-LABEL: test_vlseg4_mask_nxv2f16_triscv.vector.tuple_nxv4i8_4t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma
-; CHECK-NEXT: vlseg4e16.v v7, (a0), v0.t
+; CHECK-NEXT: vlseg4e16.v v8, (a0), v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 4) @llvm.riscv.vlseg4.mask.triscv.vector.tuple_nxv4i8_4t.nxv2i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 4) undef, ptr %base, <vscale x 2 x i1> %mask, i32 %vl, i32 1, i32 4)
- %1 = call <vscale x 2 x half> @llvm.riscv.tuple.extract.nxv2f16.triscv.vector.tuple_nxv4i8_4t(target("riscv.vector.tuple", <vscale x 4 x i8>, 4) %0, i32 1)
- ret <vscale x 2 x half> %1
+ ret target("riscv.vector.tuple", <vscale x 4 x i8>, 4) %0
}
-
-
-define <vscale x 4 x half> @test_vlseg4_nxv4f16_triscv.vector.tuple_nxv8i8_4t(ptr %base, i32 %vl) {
+define target("riscv.vector.tuple", <vscale x 8 x i8>, 4) @test_vlseg4_nxv4f16_triscv.vector.tuple_nxv8i8_4t(ptr %base, i32 %vl) {
; CHECK-LABEL: test_vlseg4_nxv4f16_triscv.vector.tuple_nxv8i8_4t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma
-; CHECK-NEXT: vlseg4e16.v v7, (a0)
+; CHECK-NEXT: vlseg4e16.v v8, (a0)
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 4) @llvm.riscv.vlseg4.triscv.vector.tuple_nxv8i8_4t(target("riscv.vector.tuple", <vscale x 8 x i8>, 4) undef, ptr %base, i32 %vl, i32 4)
- %1 = call <vscale x 4 x half> @llvm.riscv.tuple.extract.nxv4f16.triscv.vector.tuple_nxv8i8_4t(target("riscv.vector.tuple", <vscale x 8 x i8>, 4) %0, i32 1)
- ret <vscale x 4 x half> %1
+ ret target("riscv.vector.tuple", <vscale x 8 x i8>, 4) %0
}
-
-define <vscale x 4 x half> @test_vlseg4_mask_nxv4f16_triscv.vector.tuple_nxv8i8_4t(ptr %base, i32 %vl, <vscale x 4 x i1> %mask) {
+define target("riscv.vector.tuple", <vscale x 8 x i8>, 4) @test_vlseg4_mask_nxv4f16_triscv.vector.tuple_nxv8i8_4t(ptr %base, i32 %vl, <vscale x 4 x i1> %mask) {
; CHECK-LABEL: test_vlseg4_mask_nxv4f16_triscv.vector.tuple_nxv8i8_4t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma
-; CHECK-NEXT: vlseg4e16.v v7, (a0), v0.t
+; CHECK-NEXT: vlseg4e16.v v8, (a0), v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 4) @llvm.riscv.vlseg4.mask.triscv.vector.tuple_nxv8i8_4t.nxv4i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 4) undef, ptr %base, <vscale x 4 x i1> %mask, i32 %vl, i32 1, i32 4)
- %1 = call <vscale x 4 x half> @llvm.riscv.tuple.extract.nxv4f16.triscv.vector.tuple_nxv8i8_4t(target("riscv.vector.tuple", <vscale x 8 x i8>, 4) %0, i32 1)
- ret <vscale x 4 x half> %1
+ ret target("riscv.vector.tuple", <vscale x 8 x i8>, 4) %0
}
-
-
-define <vscale x 8 x half> @test_vlseg4_nxv8f16_triscv.vector.tuple_nxv16i8_4t(ptr %base, i32 %vl) {
+define target("riscv.vector.tuple", <vscale x 16 x i8>, 4) @test_vlseg4_nxv8f16_triscv.vector.tuple_nxv16i8_4t(ptr %base, i32 %vl) {
; CHECK-LABEL: test_vlseg4_nxv8f16_triscv.vector.tuple_nxv16i8_4t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, ma
-; CHECK-NEXT: vlseg4e16.v v6, (a0)
+; CHECK-NEXT: vlseg4e16.v v8, (a0)
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 4) @llvm.riscv.vlseg4.triscv.vector.tuple_nxv16i8_4t(target("riscv.vector.tuple", <vscale x 16 x i8>, 4) undef, ptr %base, i32 %vl, i32 4)
- %1 = call <vscale x 8 x half> @llvm.riscv.tuple.extract.nxv8f16.triscv.vector.tuple_nxv16i8_4t(target("riscv.vector.tuple", <vscale x 16 x i8>, 4) %0, i32 1)
- ret <vscale x 8 x half> %1
+ ret target("riscv.vector.tuple", <vscale x 16 x i8>, 4) %0
}
-
-define <vscale x 8 x half> @test_vlseg4_mask_nxv8f16_triscv.vector.tuple_nxv16i8_4t(ptr %base, i32 %vl, <vscale x 8 x i1> %mask) {
+define target("riscv.vector.tuple", <vscale x 16 x i8>, 4) @test_vlseg4_mask_nxv8f16_triscv.vector.tuple_nxv16i8_4t(ptr %base, i32 %vl, <vscale x 8 x i1> %mask) {
; CHECK-LABEL: test_vlseg4_mask_nxv8f16_triscv.vector.tuple_nxv16i8_4t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, ma
-; CHECK-NEXT: vlseg4e16.v v6, (a0), v0.t
+; CHECK-NEXT: vlseg4e16.v v8, (a0), v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 4) @llvm.riscv.vlseg4.mask.triscv.vector.tuple_nxv16i8_4t.nxv8i1(target("riscv.vector.tuple", <vscale x 16 x i8>, 4) undef, ptr %base, <vscale x 8 x i1> %mask, i32 %vl, i32 1, i32 4)
- %1 = call <vscale x 8 x half> @llvm.riscv.tuple.extract.nxv8f16.triscv.vector.tuple_nxv16i8_4t(target("riscv.vector.tuple", <vscale x 16 x i8>, 4) %0, i32 1)
- ret <vscale x 8 x half> %1
+ ret target("riscv.vector.tuple", <vscale x 16 x i8>, 4) %0
}
-
-
-define <vscale x 1 x half> @test_vlseg5_nxv1f16_triscv.vector.tuple_nxv2i8_5t(ptr %base, i32 %vl) {
+define target("riscv.vector.tuple", <vscale x 2 x i8>, 5) @test_vlseg5_nxv1f16_triscv.vector.tuple_nxv2i8_5t(ptr %base, i32 %vl) {
; CHECK-LABEL: test_vlseg5_nxv1f16_triscv.vector.tuple_nxv2i8_5t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma
-; CHECK-NEXT: vlseg5e16.v v7, (a0)
+; CHECK-NEXT: vlseg5e16.v v8, (a0)
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 2 x i8>, 5) @llvm.riscv.vlseg5.triscv.vector.tuple_nxv2i8_5t(target("riscv.vector.tuple", <vscale x 2 x i8>, 5) undef, ptr %base, i32 %vl, i32 4)
- %1 = call <vscale x 1 x half> @llvm.riscv.tuple.extract.nxv1f16.triscv.vector.tuple_nxv2i8_5t(target("riscv.vector.tuple", <vscale x 2 x i8>, 5) %0, i32 1)
- ret <vscale x 1 x half> %1
+ ret target("riscv.vector.tuple", <vscale x 2 x i8>, 5) %0
}
-
-define <vscale x 1 x half> @test_vlseg5_mask_nxv1f16_triscv.vector.tuple_nxv2i8_5t(ptr %base, i32 %vl, <vscale x 1 x i1> %mask) {
+define target("riscv.vector.tuple", <vscale x 2 x i8>, 5) @test_vlseg5_mask_nxv1f16_triscv.vector.tuple_nxv2i8_5t(ptr %base, i32 %vl, <vscale x 1 x i1> %mask) {
; CHECK-LABEL: test_vlseg5_mask_nxv1f16_triscv.vector.tuple_nxv2i8_5t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma
-; CHECK-NEXT: vlseg5e16.v v7, (a0), v0.t
+; CHECK-NEXT: vlseg5e16.v v8, (a0), v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 2 x i8>, 5) @llvm.riscv.vlseg5.mask.triscv.vector.tuple_nxv2i8_5t.nxv1i1(target("riscv.vector.tuple", <vscale x 2 x i8>, 5) undef, ptr %base, <vscale x 1 x i1> %mask, i32 %vl, i32 1, i32 4)
- %1 = call <vscale x 1 x half> @llvm.riscv.tuple.extract.nxv1f16.triscv.vector.tuple_nxv2i8_5t(target("riscv.vector.tuple", <vscale x 2 x i8>, 5) %0, i32 1)
- ret <vscale x 1 x half> %1
+ ret target("riscv.vector.tuple", <vscale x 2 x i8>, 5) %0
}
-
-
-define <vscale x 2 x half> @test_vlseg5_nxv2f16_triscv.vector.tuple_nxv4i8_5t(ptr %base, i32 %vl) {
+define target("riscv.vector.tuple", <vscale x 4 x i8>, 5) @test_vlseg5_nxv2f16_triscv.vector.tuple_nxv4i8_5t(ptr %base, i32 %vl) {
; CHECK-LABEL: test_vlseg5_nxv2f16_triscv.vector.tuple_nxv4i8_5t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma
-; CHECK-NEXT: vlseg5e16.v v7, (a0)
+; CHECK-NEXT: vlseg5e16.v v8, (a0)
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 5) @llvm.riscv.vlseg5.triscv.vector.tuple_nxv4i8_5t(target("riscv.vector.tuple", <vscale x 4 x i8>, 5) undef, ptr %base, i32 %vl, i32 4)
- %1 = call <vscale x 2 x half> @llvm.riscv.tuple.extract.nxv2f16.triscv.vector.tuple_nxv4i8_5t(target("riscv.vector.tuple", <vscale x 4 x i8>, 5) %0, i32 1)
- ret <vscale x 2 x half> %1
+ ret target("riscv.vector.tuple", <vscale x 4 x i8>, 5) %0
}
-
-define <vscale x 2 x half> @test_vlseg5_mask_nxv2f16_triscv.vector.tuple_nxv4i8_5t(ptr %base, i32 %vl, <vscale x 2 x i1> %mask) {
+define target("riscv.vector.tuple", <vscale x 4 x i8>, 5) @test_vlseg5_mask_nxv2f16_triscv.vector.tuple_nxv4i8_5t(ptr %base, i32 %vl, <vscale x 2 x i1> %mask) {
; CHECK-LABEL: test_vlseg5_mask_nxv2f16_triscv.vector.tuple_nxv4i8_5t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma
-; CHECK-NEXT: vlseg5e16.v v7, (a0), v0.t
+; CHECK-NEXT: vlseg5e16.v v8, (a0), v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 5) @llvm.riscv.vlseg5.mask.triscv.vector.tuple_nxv4i8_5t.nxv2i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 5) undef, ptr %base, <vscale x 2 x i1> %mask, i32 %vl, i32 1, i32 4)
- %1 = call <vscale x 2 x half> @llvm.riscv.tuple.extract.nxv2f16.triscv.vector.tuple_nxv4i8_5t(target("riscv.vector.tuple", <vscale x 4 x i8>, 5) %0, i32 1)
- ret <vscale x 2 x half> %1
+ ret target("riscv.vector.tuple", <vscale x 4 x i8>, 5) %0
}
-
-
-define <vscale x 4 x half> @test_vlseg5_nxv4f16_triscv.vector.tuple_nxv8i8_5t(ptr %base, i32 %vl) {
+define target("riscv.vector.tuple", <vscale x 8 x i8>, 5) @test_vlseg5_nxv4f16_triscv.vector.tuple_nxv8i8_5t(ptr %base, i32 %vl) {
; CHECK-LABEL: test_vlseg5_nxv4f16_triscv.vector.tuple_nxv8i8_5t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma
-; CHECK-NEXT: vlseg5e16.v v7, (a0)
+; CHECK-NEXT: vlseg5e16.v v8, (a0)
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 5) @llvm.riscv.vlseg5.triscv.vector.tuple_nxv8i8_5t(target("riscv.vector.tuple", <vscale x 8 x i8>, 5) undef, ptr %base, i32 %vl, i32 4)
- %1 = call <vscale x 4 x half> @llvm.riscv.tuple.extract.nxv4f16.triscv.vector.tuple_nxv8i8_5t(target("riscv.vector.tuple", <vscale x 8 x i8>, 5) %0, i32 1)
- ret <vscale x 4 x half> %1
+ ret target("riscv.vector.tuple", <vscale x 8 x i8>, 5) %0
}
-
-define <vscale x 4 x half> @test_vlseg5_mask_nxv4f16_triscv.vector.tuple_nxv8i8_5t(ptr %base, i32 %vl, <vscale x 4 x i1> %mask) {
+define target("riscv.vector.tuple", <vscale x 8 x i8>, 5) @test_vlseg5_mask_nxv4f16_triscv.vector.tuple_nxv8i8_5t(ptr %base, i32 %vl, <vscale x 4 x i1> %mask) {
; CHECK-LABEL: test_vlseg5_mask_nxv4f16_triscv.vector.tuple_nxv8i8_5t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma
-; CHECK-NEXT: vlseg5e16.v v7, (a0), v0.t
+; CHECK-NEXT: vlseg5e16.v v8, (a0), v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 5) @llvm.riscv.vlseg5.mask.triscv.vector.tuple_nxv8i8_5t.nxv4i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 5) undef, ptr %base, <vscale x 4 x i1> %mask, i32 %vl, i32 1, i32 4)
- %1 = call <vscale x 4 x half> @llvm.riscv.tuple.extract.nxv4f16.triscv.vector.tuple_nxv8i8_5t(target("riscv.vector.tuple", <vscale x 8 x i8>, 5) %0, i32 1)
- ret <vscale x 4 x half> %1
+ ret target("riscv.vector.tuple", <vscale x 8 x i8>, 5) %0
}
-
-
-define <vscale x 1 x half> @test_vlseg6_nxv1f16_triscv.vector.tuple_nxv2i8_6t(ptr %base, i32 %vl) {
+define target("riscv.vector.tuple", <vscale x 2 x i8>, 6) @test_vlseg6_nxv1f16_triscv.vector.tuple_nxv2i8_6t(ptr %base, i32 %vl) {
; CHECK-LABEL: test_vlseg6_nxv1f16_triscv.vector.tuple_nxv2i8_6t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma
-; CHECK-NEXT: vlseg6e16.v v7, (a0)
+; CHECK-NEXT: vlseg6e16.v v8, (a0)
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 2 x i8>, 6) @llvm.riscv.vlseg6.triscv.vector.tuple_nxv2i8_6t(target("riscv.vector.tuple", <vscale x 2 x i8>, 6) undef, ptr %base, i32 %vl, i32 4)
- %1 = call <vscale x 1 x half> @llvm.riscv.tuple.extract.nxv1f16.triscv.vector.tuple_nxv2i8_6t(target("riscv.vector.tuple", <vscale x 2 x i8>, 6) %0, i32 1)
- ret <vscale x 1 x half> %1
+ ret target("riscv.vector.tuple", <vscale x 2 x i8>, 6) %0
}
-
-define <vscale x 1 x half> @test_vlseg6_mask_nxv1f16_triscv.vector.tuple_nxv2i8_6t(ptr %base, i32 %vl, <vscale x 1 x i1> %mask) {
+define target("riscv.vector.tuple", <vscale x 2 x i8>, 6) @test_vlseg6_mask_nxv1f16_triscv.vector.tuple_nxv2i8_6t(ptr %base, i32 %vl, <vscale x 1 x i1> %mask) {
; CHECK-LABEL: test_vlseg6_mask_nxv1f16_triscv.vector.tuple_nxv2i8_6t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma
-; CHECK-NEXT: vlseg6e16.v v7, (a0), v0.t
+; CHECK-NEXT: vlseg6e16.v v8, (a0), v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 2 x i8>, 6) @llvm.riscv.vlseg6.mask.triscv.vector.tuple_nxv2i8_6t.nxv1i1(target("riscv.vector.tuple", <vscale x 2 x i8>, 6) undef, ptr %base, <vscale x 1 x i1> %mask, i32 %vl, i32 1, i32 4)
- %1 = call <vscale x 1 x half> @llvm.riscv.tuple.extract.nxv1f16.triscv.vector.tuple_nxv2i8_6t(target("riscv.vector.tuple", <vscale x 2 x i8>, 6) %0, i32 1)
- ret <vscale x 1 x half> %1
+ ret target("riscv.vector.tuple", <vscale x 2 x i8>, 6) %0
}
-
-
-define <vscale x 2 x half> @test_vlseg6_nxv2f16_triscv.vector.tuple_nxv4i8_6t(ptr %base, i32 %vl) {
+define target("riscv.vector.tuple", <vscale x 4 x i8>, 6) @test_vlseg6_nxv2f16_triscv.vector.tuple_nxv4i8_6t(ptr %base, i32 %vl) {
; CHECK-LABEL: test_vlseg6_nxv2f16_triscv.vector.tuple_nxv4i8_6t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma
-; CHECK-NEXT: vlseg6e16.v v7, (a0)
+; CHECK-NEXT: vlseg6e16.v v8, (a0)
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 6) @llvm.riscv.vlseg6.triscv.vector.tuple_nxv4i8_6t(target("riscv.vector.tuple", <vscale x 4 x i8>, 6) undef, ptr %base, i32 %vl, i32 4)
- %1 = call <vscale x 2 x half> @llvm.riscv.tuple.extract.nxv2f16.triscv.vector.tuple_nxv4i8_6t(target("riscv.vector.tuple", <vscale x 4 x i8>, 6) %0, i32 1)
- ret <vscale x 2 x half> %1
+ ret target("riscv.vector.tuple", <vscale x 4 x i8>, 6) %0
}
-
-define <vscale x 2 x half> @test_vlseg6_mask_nxv2f16_triscv.vector.tuple_nxv4i8_6t(ptr %base, i32 %vl, <vscale x 2 x i1> %mask) {
+define target("riscv.vector.tuple", <vscale x 4 x i8>, 6) @test_vlseg6_mask_nxv2f16_triscv.vector.tuple_nxv4i8_6t(ptr %base, i32 %vl, <vscale x 2 x i1> %mask) {
; CHECK-LABEL: test_vlseg6_mask_nxv2f16_triscv.vector.tuple_nxv4i8_6t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma
-; CHECK-NEXT: vlseg6e16.v v7, (a0), v0.t
+; CHECK-NEXT: vlseg6e16.v v8, (a0), v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 6) @llvm.riscv.vlseg6.mask.triscv.vector.tuple_nxv4i8_6t.nxv2i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 6) undef, ptr %base, <vscale x 2 x i1> %mask, i32 %vl, i32 1, i32 4)
- %1 = call <vscale x 2 x half> @llvm.riscv.tuple.extract.nxv2f16.triscv.vector.tuple_nxv4i8_6t(target("riscv.vector.tuple", <vscale x 4 x i8>, 6) %0, i32 1)
- ret <vscale x 2 x half> %1
+ ret target("riscv.vector.tuple", <vscale x 4 x i8>, 6) %0
}
-
-
-define <vscale x 4 x half> @test_vlseg6_nxv4f16_triscv.vector.tuple_nxv8i8_6t(ptr %base, i32 %vl) {
+define target("riscv.vector.tuple", <vscale x 8 x i8>, 6) @test_vlseg6_nxv4f16_triscv.vector.tuple_nxv8i8_6t(ptr %base, i32 %vl) {
; CHECK-LABEL: test_vlseg6_nxv4f16_triscv.vector.tuple_nxv8i8_6t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma
-; CHECK-NEXT: vlseg6e16.v v7, (a0)
+; CHECK-NEXT: vlseg6e16.v v8, (a0)
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 6) @llvm.riscv.vlseg6.triscv.vector.tuple_nxv8i8_6t(target("riscv.vector.tuple", <vscale x 8 x i8>, 6) undef, ptr %base, i32 %vl, i32 4)
- %1 = call <vscale x 4 x half> @llvm.riscv.tuple.extract.nxv4f16.triscv.vector.tuple_nxv8i8_6t(target("riscv.vector.tuple", <vscale x 8 x i8>, 6) %0, i32 1)
- ret <vscale x 4 x half> %1
+ ret target("riscv.vector.tuple", <vscale x 8 x i8>, 6) %0
}
-
-define <vscale x 4 x half> @test_vlseg6_mask_nxv4f16_triscv.vector.tuple_nxv8i8_6t(ptr %base, i32 %vl, <vscale x 4 x i1> %mask) {
+define target("riscv.vector.tuple", <vscale x 8 x i8>, 6) @test_vlseg6_mask_nxv4f16_triscv.vector.tuple_nxv8i8_6t(ptr %base, i32 %vl, <vscale x 4 x i1> %mask) {
; CHECK-LABEL: test_vlseg6_mask_nxv4f16_triscv.vector.tuple_nxv8i8_6t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma
-; CHECK-NEXT: vlseg6e16.v v7, (a0), v0.t
+; CHECK-NEXT: vlseg6e16.v v8, (a0), v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 6) @llvm.riscv.vlseg6.mask.triscv.vector.tuple_nxv8i8_6t.nxv4i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 6) undef, ptr %base, <vscale x 4 x i1> %mask, i32 %vl, i32 1, i32 4)
- %1 = call <vscale x 4 x half> @llvm.riscv.tuple.extract.nxv4f16.triscv.vector.tuple_nxv8i8_6t(target("riscv.vector.tuple", <vscale x 8 x i8>, 6) %0, i32 1)
- ret <vscale x 4 x half> %1
+ ret target("riscv.vector.tuple", <vscale x 8 x i8>, 6) %0
}
-
-
-define <vscale x 1 x half> @test_vlseg7_nxv1f16_triscv.vector.tuple_nxv2i8_7t(ptr %base, i32 %vl) {
+define target("riscv.vector.tuple", <vscale x 2 x i8>, 7) @test_vlseg7_nxv1f16_triscv.vector.tuple_nxv2i8_7t(ptr %base, i32 %vl) {
; CHECK-LABEL: test_vlseg7_nxv1f16_triscv.vector.tuple_nxv2i8_7t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma
-; CHECK-NEXT: vlseg7e16.v v7, (a0)
+; CHECK-NEXT: vlseg7e16.v v8, (a0)
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 2 x i8>, 7) @llvm.riscv.vlseg7.triscv.vector.tuple_nxv2i8_7t(target("riscv.vector.tuple", <vscale x 2 x i8>, 7) undef, ptr %base, i32 %vl, i32 4)
- %1 = call <vscale x 1 x half> @llvm.riscv.tuple.extract.nxv1f16.triscv.vector.tuple_nxv2i8_7t(target("riscv.vector.tuple", <vscale x 2 x i8>, 7) %0, i32 1)
- ret <vscale x 1 x half> %1
+ ret target("riscv.vector.tuple", <vscale x 2 x i8>, 7) %0
}
-
-define <vscale x 1 x half> @test_vlseg7_mask_nxv1f16_triscv.vector.tuple_nxv2i8_7t(ptr %base, i32 %vl, <vscale x 1 x i1> %mask) {
+define target("riscv.vector.tuple", <vscale x 2 x i8>, 7) @test_vlseg7_mask_nxv1f16_triscv.vector.tuple_nxv2i8_7t(ptr %base, i32 %vl, <vscale x 1 x i1> %mask) {
; CHECK-LABEL: test_vlseg7_mask_nxv1f16_triscv.vector.tuple_nxv2i8_7t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma
-; CHECK-NEXT: vlseg7e16.v v7, (a0), v0.t
+; CHECK-NEXT: vlseg7e16.v v8, (a0), v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 2 x i8>, 7) @llvm.riscv.vlseg7.mask.triscv.vector.tuple_nxv2i8_7t.nxv1i1(target("riscv.vector.tuple", <vscale x 2 x i8>, 7) undef, ptr %base, <vscale x 1 x i1> %mask, i32 %vl, i32 1, i32 4)
- %1 = call <vscale x 1 x half> @llvm.riscv.tuple.extract.nxv1f16.triscv.vector.tuple_nxv2i8_7t(target("riscv.vector.tuple", <vscale x 2 x i8>, 7) %0, i32 1)
- ret <vscale x 1 x half> %1
+ ret target("riscv.vector.tuple", <vscale x 2 x i8>, 7) %0
}
-
-
-define <vscale x 2 x half> @test_vlseg7_nxv2f16_triscv.vector.tuple_nxv4i8_7t(ptr %base, i32 %vl) {
+define target("riscv.vector.tuple", <vscale x 4 x i8>, 7) @test_vlseg7_nxv2f16_triscv.vector.tuple_nxv4i8_7t(ptr %base, i32 %vl) {
; CHECK-LABEL: test_vlseg7_nxv2f16_triscv.vector.tuple_nxv4i8_7t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma
-; CHECK-NEXT: vlseg7e16.v v7, (a0)
+; CHECK-NEXT: vlseg7e16.v v8, (a0)
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 7) @llvm.riscv.vlseg7.triscv.vector.tuple_nxv4i8_7t(target("riscv.vector.tuple", <vscale x 4 x i8>, 7) undef, ptr %base, i32 %vl, i32 4)
- %1 = call <vscale x 2 x half> @llvm.riscv.tuple.extract.nxv2f16.triscv.vector.tuple_nxv4i8_7t(target("riscv.vector.tuple", <vscale x 4 x i8>, 7) %0, i32 1)
- ret <vscale x 2 x half> %1
+ ret target("riscv.vector.tuple", <vscale x 4 x i8>, 7) %0
}
-
-define <vscale x 2 x half> @test_vlseg7_mask_nxv2f16_triscv.vector.tuple_nxv4i8_7t(ptr %base, i32 %vl, <vscale x 2 x i1> %mask) {
+define target("riscv.vector.tuple", <vscale x 4 x i8>, 7) @test_vlseg7_mask_nxv2f16_triscv.vector.tuple_nxv4i8_7t(ptr %base, i32 %vl, <vscale x 2 x i1> %mask) {
; CHECK-LABEL: test_vlseg7_mask_nxv2f16_triscv.vector.tuple_nxv4i8_7t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma
-; CHECK-NEXT: vlseg7e16.v v7, (a0), v0.t
+; CHECK-NEXT: vlseg7e16.v v8, (a0), v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 7) @llvm.riscv.vlseg7.mask.triscv.vector.tuple_nxv4i8_7t.nxv2i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 7) undef, ptr %base, <vscale x 2 x i1> %mask, i32 %vl, i32 1, i32 4)
- %1 = call <vscale x 2 x half> @llvm.riscv.tuple.extract.nxv2f16.triscv.vector.tuple_nxv4i8_7t(target("riscv.vector.tuple", <vscale x 4 x i8>, 7) %0, i32 1)
- ret <vscale x 2 x half> %1
+ ret target("riscv.vector.tuple", <vscale x 4 x i8>, 7) %0
}
-
-
-define <vscale x 4 x half> @test_vlseg7_nxv4f16_triscv.vector.tuple_nxv8i8_7t(ptr %base, i32 %vl) {
+define target("riscv.vector.tuple", <vscale x 8 x i8>, 7) @test_vlseg7_nxv4f16_triscv.vector.tuple_nxv8i8_7t(ptr %base, i32 %vl) {
; CHECK-LABEL: test_vlseg7_nxv4f16_triscv.vector.tuple_nxv8i8_7t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma
-; CHECK-NEXT: vlseg7e16.v v7, (a0)
+; CHECK-NEXT: vlseg7e16.v v8, (a0)
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 7) @llvm.riscv.vlseg7.triscv.vector.tuple_nxv8i8_7t(target("riscv.vector.tuple", <vscale x 8 x i8>, 7) undef, ptr %base, i32 %vl, i32 4)
- %1 = call <vscale x 4 x half> @llvm.riscv.tuple.extract.nxv4f16.triscv.vector.tuple_nxv8i8_7t(target("riscv.vector.tuple", <vscale x 8 x i8>, 7) %0, i32 1)
- ret <vscale x 4 x half> %1
+ ret target("riscv.vector.tuple", <vscale x 8 x i8>, 7) %0
}
-
-define <vscale x 4 x half> @test_vlseg7_mask_nxv4f16_triscv.vector.tuple_nxv8i8_7t(ptr %base, i32 %vl, <vscale x 4 x i1> %mask) {
+define target("riscv.vector.tuple", <vscale x 8 x i8>, 7) @test_vlseg7_mask_nxv4f16_triscv.vector.tuple_nxv8i8_7t(ptr %base, i32 %vl, <vscale x 4 x i1> %mask) {
; CHECK-LABEL: test_vlseg7_mask_nxv4f16_triscv.vector.tuple_nxv8i8_7t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma
-; CHECK-NEXT: vlseg7e16.v v7, (a0), v0.t
+; CHECK-NEXT: vlseg7e16.v v8, (a0), v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 7) @llvm.riscv.vlseg7.mask.triscv.vector.tuple_nxv8i8_7t.nxv4i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 7) undef, ptr %base, <vscale x 4 x i1> %mask, i32 %vl, i32 1, i32 4)
- %1 = call <vscale x 4 x half> @llvm.riscv.tuple.extract.nxv4f16.triscv.vector.tuple_nxv8i8_7t(target("riscv.vector.tuple", <vscale x 8 x i8>, 7) %0, i32 1)
- ret <vscale x 4 x half> %1
+ ret target("riscv.vector.tuple", <vscale x 8 x i8>, 7) %0
}
-
-
-define <vscale x 1 x half> @test_vlseg8_nxv1f16_triscv.vector.tuple_nxv2i8_8t(ptr %base, i32 %vl) {
+define target("riscv.vector.tuple", <vscale x 2 x i8>, 8) @test_vlseg8_nxv1f16_triscv.vector.tuple_nxv2i8_8t(ptr %base, i32 %vl) {
; CHECK-LABEL: test_vlseg8_nxv1f16_triscv.vector.tuple_nxv2i8_8t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma
-; CHECK-NEXT: vlseg8e16.v v7, (a0)
+; CHECK-NEXT: vlseg8e16.v v8, (a0)
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 2 x i8>, 8) @llvm.riscv.vlseg8.triscv.vector.tuple_nxv2i8_8t(target("riscv.vector.tuple", <vscale x 2 x i8>, 8) undef, ptr %base, i32 %vl, i32 4)
- %1 = call <vscale x 1 x half> @llvm.riscv.tuple.extract.nxv1f16.triscv.vector.tuple_nxv2i8_8t(target("riscv.vector.tuple", <vscale x 2 x i8>, 8) %0, i32 1)
- ret <vscale x 1 x half> %1
+ ret target("riscv.vector.tuple", <vscale x 2 x i8>, 8) %0
}
-
-define <vscale x 1 x half> @test_vlseg8_mask_nxv1f16_triscv.vector.tuple_nxv2i8_8t(ptr %base, i32 %vl, <vscale x 1 x i1> %mask) {
+define target("riscv.vector.tuple", <vscale x 2 x i8>, 8) @test_vlseg8_mask_nxv1f16_triscv.vector.tuple_nxv2i8_8t(ptr %base, i32 %vl, <vscale x 1 x i1> %mask) {
; CHECK-LABEL: test_vlseg8_mask_nxv1f16_triscv.vector.tuple_nxv2i8_8t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma
-; CHECK-NEXT: vlseg8e16.v v7, (a0), v0.t
+; CHECK-NEXT: vlseg8e16.v v8, (a0), v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 2 x i8>, 8) @llvm.riscv.vlseg8.mask.triscv.vector.tuple_nxv2i8_8t.nxv1i1(target("riscv.vector.tuple", <vscale x 2 x i8>, 8) undef, ptr %base, <vscale x 1 x i1> %mask, i32 %vl, i32 1, i32 4)
- %1 = call <vscale x 1 x half> @llvm.riscv.tuple.extract.nxv1f16.triscv.vector.tuple_nxv2i8_8t(target("riscv.vector.tuple", <vscale x 2 x i8>, 8) %0, i32 1)
- ret <vscale x 1 x half> %1
+ ret target("riscv.vector.tuple", <vscale x 2 x i8>, 8) %0
}
-
-
-define <vscale x 2 x half> @test_vlseg8_nxv2f16_triscv.vector.tuple_nxv4i8_8t(ptr %base, i32 %vl) {
+define target("riscv.vector.tuple", <vscale x 4 x i8>, 8) @test_vlseg8_nxv2f16_triscv.vector.tuple_nxv4i8_8t(ptr %base, i32 %vl) {
; CHECK-LABEL: test_vlseg8_nxv2f16_triscv.vector.tuple_nxv4i8_8t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma
-; CHECK-NEXT: vlseg8e16.v v7, (a0)
+; CHECK-NEXT: vlseg8e16.v v8, (a0)
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 8) @llvm.riscv.vlseg8.triscv.vector.tuple_nxv4i8_8t(target("riscv.vector.tuple", <vscale x 4 x i8>, 8) undef, ptr %base, i32 %vl, i32 4)
- %1 = call <vscale x 2 x half> @llvm.riscv.tuple.extract.nxv2f16.triscv.vector.tuple_nxv4i8_8t(target("riscv.vector.tuple", <vscale x 4 x i8>, 8) %0, i32 1)
- ret <vscale x 2 x half> %1
+ ret target("riscv.vector.tuple", <vscale x 4 x i8>, 8) %0
}
-
-define <vscale x 2 x half> @test_vlseg8_mask_nxv2f16_triscv.vector.tuple_nxv4i8_8t(ptr %base, i32 %vl, <vscale x 2 x i1> %mask) {
+define target("riscv.vector.tuple", <vscale x 4 x i8>, 8) @test_vlseg8_mask_nxv2f16_triscv.vector.tuple_nxv4i8_8t(ptr %base, i32 %vl, <vscale x 2 x i1> %mask) {
; CHECK-LABEL: test_vlseg8_mask_nxv2f16_triscv.vector.tuple_nxv4i8_8t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma
-; CHECK-NEXT: vlseg8e16.v v7, (a0), v0.t
+; CHECK-NEXT: vlseg8e16.v v8, (a0), v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 8) @llvm.riscv.vlseg8.mask.triscv.vector.tuple_nxv4i8_8t.nxv2i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 8) undef, ptr %base, <vscale x 2 x i1> %mask, i32 %vl, i32 1, i32 4)
- %1 = call <vscale x 2 x half> @llvm.riscv.tuple.extract.nxv2f16.triscv.vector.tuple_nxv4i8_8t(target("riscv.vector.tuple", <vscale x 4 x i8>, 8) %0, i32 1)
- ret <vscale x 2 x half> %1
+ ret target("riscv.vector.tuple", <vscale x 4 x i8>, 8) %0
}
-
-
-define <vscale x 4 x half> @test_vlseg8_nxv4f16_triscv.vector.tuple_nxv8i8_8t(ptr %base, i32 %vl) {
+define target("riscv.vector.tuple", <vscale x 8 x i8>, 8) @test_vlseg8_nxv4f16_triscv.vector.tuple_nxv8i8_8t(ptr %base, i32 %vl) {
; CHECK-LABEL: test_vlseg8_nxv4f16_triscv.vector.tuple_nxv8i8_8t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma
-; CHECK-NEXT: vlseg8e16.v v7, (a0)
+; CHECK-NEXT: vlseg8e16.v v8, (a0)
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 8) @llvm.riscv.vlseg8.triscv.vector.tuple_nxv8i8_8t(target("riscv.vector.tuple", <vscale x 8 x i8>, 8) undef, ptr %base, i32 %vl, i32 4)
- %1 = call <vscale x 4 x half> @llvm.riscv.tuple.extract.nxv4f16.triscv.vector.tuple_nxv8i8_8t(target("riscv.vector.tuple", <vscale x 8 x i8>, 8) %0, i32 1)
- ret <vscale x 4 x half> %1
+ ret target("riscv.vector.tuple", <vscale x 8 x i8>, 8) %0
}
-
-define <vscale x 4 x half> @test_vlseg8_mask_nxv4f16_triscv.vector.tuple_nxv8i8_8t(ptr %base, i32 %vl, <vscale x 4 x i1> %mask) {
+define target("riscv.vector.tuple", <vscale x 8 x i8>, 8) @test_vlseg8_mask_nxv4f16_triscv.vector.tuple_nxv8i8_8t(ptr %base, i32 %vl, <vscale x 4 x i1> %mask) {
; CHECK-LABEL: test_vlseg8_mask_nxv4f16_triscv.vector.tuple_nxv8i8_8t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma
-; CHECK-NEXT: vlseg8e16.v v7, (a0), v0.t
+; CHECK-NEXT: vlseg8e16.v v8, (a0), v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 8) @llvm.riscv.vlseg8.mask.triscv.vector.tuple_nxv8i8_8t.nxv4i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 8) undef, ptr %base, <vscale x 4 x i1> %mask, i32 %vl, i32 1, i32 4)
- %1 = call <vscale x 4 x half> @llvm.riscv.tuple.extract.nxv4f16.triscv.vector.tuple_nxv8i8_8t(target("riscv.vector.tuple", <vscale x 8 x i8>, 8) %0, i32 1)
- ret <vscale x 4 x half> %1
+ ret target("riscv.vector.tuple", <vscale x 8 x i8>, 8) %0
}
-
-
-define <vscale x 1 x float> @test_vlseg2_nxv1f32_triscv.vector.tuple_nxv4i8_2t(ptr %base, i32 %vl) {
+define target("riscv.vector.tuple", <vscale x 4 x i8>, 2) @test_vlseg2_nxv1f32_triscv.vector.tuple_nxv4i8_2t(ptr %base, i32 %vl) {
; CHECK-LABEL: test_vlseg2_nxv1f32_triscv.vector.tuple_nxv4i8_2t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma
-; CHECK-NEXT: vlseg2e32.v v7, (a0)
+; CHECK-NEXT: vlseg2e32.v v8, (a0)
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 2) @llvm.riscv.vlseg2.triscv.vector.tuple_nxv4i8_2t(target("riscv.vector.tuple", <vscale x 4 x i8>, 2) undef, ptr %base, i32 %vl, i32 5)
- %1 = call <vscale x 1 x float> @llvm.riscv.tuple.extract.nxv1f32.triscv.vector.tuple_nxv4i8_2t(target("riscv.vector.tuple", <vscale x 4 x i8>, 2) %0, i32 1)
- ret <vscale x 1 x float> %1
+ ret target("riscv.vector.tuple", <vscale x 4 x i8>, 2) %0
}
-
-define <vscale x 1 x float> @test_vlseg2_mask_nxv1f32_triscv.vector.tuple_nxv4i8_2t(ptr %base, i32 %vl, <vscale x 1 x i1> %mask) {
+define target("riscv.vector.tuple", <vscale x 4 x i8>, 2) @test_vlseg2_mask_nxv1f32_triscv.vector.tuple_nxv4i8_2t(ptr %base, i32 %vl, <vscale x 1 x i1> %mask) {
; CHECK-LABEL: test_vlseg2_mask_nxv1f32_triscv.vector.tuple_nxv4i8_2t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma
-; CHECK-NEXT: vlseg2e32.v v7, (a0), v0.t
+; CHECK-NEXT: vlseg2e32.v v8, (a0), v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 2) @llvm.riscv.vlseg2.mask.triscv.vector.tuple_nxv4i8_2t.nxv1i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 2) undef, ptr %base, <vscale x 1 x i1> %mask, i32 %vl, i32 1, i32 5)
- %1 = call <vscale x 1 x float> @llvm.riscv.tuple.extract.nxv1f32.triscv.vector.tuple_nxv4i8_2t(target("riscv.vector.tuple", <vscale x 4 x i8>, 2) %0, i32 1)
- ret <vscale x 1 x float> %1
+ ret target("riscv.vector.tuple", <vscale x 4 x i8>, 2) %0
}
-
-
-define <vscale x 2 x float> @test_vlseg2_nxv2f32_triscv.vector.tuple_nxv8i8_2t(ptr %base, i32 %vl) {
+define target("riscv.vector.tuple", <vscale x 8 x i8>, 2) @test_vlseg2_nxv2f32_triscv.vector.tuple_nxv8i8_2t(ptr %base, i32 %vl) {
; CHECK-LABEL: test_vlseg2_nxv2f32_triscv.vector.tuple_nxv8i8_2t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma
-; CHECK-NEXT: vlseg2e32.v v7, (a0)
+; CHECK-NEXT: vlseg2e32.v v8, (a0)
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 2) @llvm.riscv.vlseg2.triscv.vector.tuple_nxv8i8_2t(target("riscv.vector.tuple", <vscale x 8 x i8>, 2) undef, ptr %base, i32 %vl, i32 5)
- %1 = call <vscale x 2 x float> @llvm.riscv.tuple.extract.nxv2f32.triscv.vector.tuple_nxv8i8_2t(target("riscv.vector.tuple", <vscale x 8 x i8>, 2) %0, i32 1)
- ret <vscale x 2 x float> %1
+ ret target("riscv.vector.tuple", <vscale x 8 x i8>, 2) %0
}
-
-define <vscale x 2 x float> @test_vlseg2_mask_nxv2f32_triscv.vector.tuple_nxv8i8_2t(ptr %base, i32 %vl, <vscale x 2 x i1> %mask) {
+define target("riscv.vector.tuple", <vscale x 8 x i8>, 2) @test_vlseg2_mask_nxv2f32_triscv.vector.tuple_nxv8i8_2t(ptr %base, i32 %vl, <vscale x 2 x i1> %mask) {
; CHECK-LABEL: test_vlseg2_mask_nxv2f32_triscv.vector.tuple_nxv8i8_2t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma
-; CHECK-NEXT: vlseg2e32.v v7, (a0), v0.t
+; CHECK-NEXT: vlseg2e32.v v8, (a0), v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 2) @llvm.riscv.vlseg2.mask.triscv.vector.tuple_nxv8i8_2t.nxv2i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 2) undef, ptr %base, <vscale x 2 x i1> %mask, i32 %vl, i32 1, i32 5)
- %1 = call <vscale x 2 x float> @llvm.riscv.tuple.extract.nxv2f32.triscv.vector.tuple_nxv8i8_2t(target("riscv.vector.tuple", <vscale x 8 x i8>, 2) %0, i32 1)
- ret <vscale x 2 x float> %1
+ ret target("riscv.vector.tuple", <vscale x 8 x i8>, 2) %0
}
-
-
-define <vscale x 4 x float> @test_vlseg2_nxv4f32_triscv.vector.tuple_nxv16i8_2t(ptr %base, i32 %vl) {
+define target("riscv.vector.tuple", <vscale x 16 x i8>, 2) @test_vlseg2_nxv4f32_triscv.vector.tuple_nxv16i8_2t(ptr %base, i32 %vl) {
; CHECK-LABEL: test_vlseg2_nxv4f32_triscv.vector.tuple_nxv16i8_2t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma
-; CHECK-NEXT: vlseg2e32.v v6, (a0)
+; CHECK-NEXT: vlseg2e32.v v8, (a0)
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 2) @llvm.riscv.vlseg2.triscv.vector.tuple_nxv16i8_2t(target("riscv.vector.tuple", <vscale x 16 x i8>, 2) undef, ptr %base, i32 %vl, i32 5)
- %1 = call <vscale x 4 x float> @llvm.riscv.tuple.extract.nxv4f32.triscv.vector.tuple_nxv16i8_2t(target("riscv.vector.tuple", <vscale x 16 x i8>, 2) %0, i32 1)
- ret <vscale x 4 x float> %1
+ ret target("riscv.vector.tuple", <vscale x 16 x i8>, 2) %0
}
-
-define <vscale x 4 x float> @test_vlseg2_mask_nxv4f32_triscv.vector.tuple_nxv16i8_2t(ptr %base, i32 %vl, <vscale x 4 x i1> %mask) {
+define target("riscv.vector.tuple", <vscale x 16 x i8>, 2) @test_vlseg2_mask_nxv4f32_triscv.vector.tuple_nxv16i8_2t(ptr %base, i32 %vl, <vscale x 4 x i1> %mask) {
; CHECK-LABEL: test_vlseg2_mask_nxv4f32_triscv.vector.tuple_nxv16i8_2t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma
-; CHECK-NEXT: vlseg2e32.v v6, (a0), v0.t
+; CHECK-NEXT: vlseg2e32.v v8, (a0), v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 2) @llvm.riscv.vlseg2.mask.triscv.vector.tuple_nxv16i8_2t.nxv4i1(target("riscv.vector.tuple", <vscale x 16 x i8>, 2) undef, ptr %base, <vscale x 4 x i1> %mask, i32 %vl, i32 1, i32 5)
- %1 = call <vscale x 4 x float> @llvm.riscv.tuple.extract.nxv4f32.triscv.vector.tuple_nxv16i8_2t(target("riscv.vector.tuple", <vscale x 16 x i8>, 2) %0, i32 1)
- ret <vscale x 4 x float> %1
+ ret target("riscv.vector.tuple", <vscale x 16 x i8>, 2) %0
}
-
-
-define <vscale x 8 x float> @test_vlseg2_nxv8f32_triscv.vector.tuple_nxv32i8_2t(ptr %base, i32 %vl) {
+define target("riscv.vector.tuple", <vscale x 32 x i8>, 2) @test_vlseg2_nxv8f32_triscv.vector.tuple_nxv32i8_2t(ptr %base, i32 %vl) {
; CHECK-LABEL: test_vlseg2_nxv8f32_triscv.vector.tuple_nxv32i8_2t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, ma
-; CHECK-NEXT: vlseg2e32.v v4, (a0)
+; CHECK-NEXT: vlseg2e32.v v8, (a0)
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 32 x i8>, 2) @llvm.riscv.vlseg2.triscv.vector.tuple_nxv32i8_2t(target("riscv.vector.tuple", <vscale x 32 x i8>, 2) undef, ptr %base, i32 %vl, i32 5)
- %1 = call <vscale x 8 x float> @llvm.riscv.tuple.extract.nxv8f32.triscv.vector.tuple_nxv32i8_2t(target("riscv.vector.tuple", <vscale x 32 x i8>, 2) %0, i32 1)
- ret <vscale x 8 x float> %1
+ ret target("riscv.vector.tuple", <vscale x 32 x i8>, 2) %0
}
-
-define <vscale x 8 x float> @test_vlseg2_mask_nxv8f32_triscv.vector.tuple_nxv32i8_2t(ptr %base, i32 %vl, <vscale x 8 x i1> %mask) {
+define target("riscv.vector.tuple", <vscale x 32 x i8>, 2) @test_vlseg2_mask_nxv8f32_triscv.vector.tuple_nxv32i8_2t(ptr %base, i32 %vl, <vscale x 8 x i1> %mask) {
; CHECK-LABEL: test_vlseg2_mask_nxv8f32_triscv.vector.tuple_nxv32i8_2t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, ma
-; CHECK-NEXT: vlseg2e32.v v4, (a0), v0.t
+; CHECK-NEXT: vlseg2e32.v v8, (a0), v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 32 x i8>, 2) @llvm.riscv.vlseg2.mask.triscv.vector.tuple_nxv32i8_2t.nxv8i1(target("riscv.vector.tuple", <vscale x 32 x i8>, 2) undef, ptr %base, <vscale x 8 x i1> %mask, i32 %vl, i32 1, i32 5)
- %1 = call <vscale x 8 x float> @llvm.riscv.tuple.extract.nxv8f32.triscv.vector.tuple_nxv32i8_2t(target("riscv.vector.tuple", <vscale x 32 x i8>, 2) %0, i32 1)
- ret <vscale x 8 x float> %1
+ ret target("riscv.vector.tuple", <vscale x 32 x i8>, 2) %0
}
-
-
-define <vscale x 1 x float> @test_vlseg3_nxv1f32_triscv.vector.tuple_nxv4i8_3t(ptr %base, i32 %vl) {
+define target("riscv.vector.tuple", <vscale x 4 x i8>, 3) @test_vlseg3_nxv1f32_triscv.vector.tuple_nxv4i8_3t(ptr %base, i32 %vl) {
; CHECK-LABEL: test_vlseg3_nxv1f32_triscv.vector.tuple_nxv4i8_3t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma
-; CHECK-NEXT: vlseg3e32.v v7, (a0)
+; CHECK-NEXT: vlseg3e32.v v8, (a0)
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 3) @llvm.riscv.vlseg3.triscv.vector.tuple_nxv4i8_3t(target("riscv.vector.tuple", <vscale x 4 x i8>, 3) undef, ptr %base, i32 %vl, i32 5)
- %1 = call <vscale x 1 x float> @llvm.riscv.tuple.extract.nxv1f32.triscv.vector.tuple_nxv4i8_3t(target("riscv.vector.tuple", <vscale x 4 x i8>, 3) %0, i32 1)
- ret <vscale x 1 x float> %1
+ ret target("riscv.vector.tuple", <vscale x 4 x i8>, 3) %0
}
-
-define <vscale x 1 x float> @test_vlseg3_mask_nxv1f32_triscv.vector.tuple_nxv4i8_3t(ptr %base, i32 %vl, <vscale x 1 x i1> %mask) {
+define target("riscv.vector.tuple", <vscale x 4 x i8>, 3) @test_vlseg3_mask_nxv1f32_triscv.vector.tuple_nxv4i8_3t(ptr %base, i32 %vl, <vscale x 1 x i1> %mask) {
; CHECK-LABEL: test_vlseg3_mask_nxv1f32_triscv.vector.tuple_nxv4i8_3t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma
-; CHECK-NEXT: vlseg3e32.v v7, (a0), v0.t
+; CHECK-NEXT: vlseg3e32.v v8, (a0), v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 3) @llvm.riscv.vlseg3.mask.triscv.vector.tuple_nxv4i8_3t.nxv1i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 3) undef, ptr %base, <vscale x 1 x i1> %mask, i32 %vl, i32 1, i32 5)
- %1 = call <vscale x 1 x float> @llvm.riscv.tuple.extract.nxv1f32.triscv.vector.tuple_nxv4i8_3t(target("riscv.vector.tuple", <vscale x 4 x i8>, 3) %0, i32 1)
- ret <vscale x 1 x float> %1
+ ret target("riscv.vector.tuple", <vscale x 4 x i8>, 3) %0
}
-
-
-define <vscale x 2 x float> @test_vlseg3_nxv2f32_triscv.vector.tuple_nxv8i8_3t(ptr %base, i32 %vl) {
+define target("riscv.vector.tuple", <vscale x 8 x i8>, 3) @test_vlseg3_nxv2f32_triscv.vector.tuple_nxv8i8_3t(ptr %base, i32 %vl) {
; CHECK-LABEL: test_vlseg3_nxv2f32_triscv.vector.tuple_nxv8i8_3t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma
-; CHECK-NEXT: vlseg3e32.v v7, (a0)
+; CHECK-NEXT: vlseg3e32.v v8, (a0)
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 3) @llvm.riscv.vlseg3.triscv.vector.tuple_nxv8i8_3t(target("riscv.vector.tuple", <vscale x 8 x i8>, 3) undef, ptr %base, i32 %vl, i32 5)
- %1 = call <vscale x 2 x float> @llvm.riscv.tuple.extract.nxv2f32.triscv.vector.tuple_nxv8i8_3t(target("riscv.vector.tuple", <vscale x 8 x i8>, 3) %0, i32 1)
- ret <vscale x 2 x float> %1
+ ret target("riscv.vector.tuple", <vscale x 8 x i8>, 3) %0
}
-
-define <vscale x 2 x float> @test_vlseg3_mask_nxv2f32_triscv.vector.tuple_nxv8i8_3t(ptr %base, i32 %vl, <vscale x 2 x i1> %mask) {
+define target("riscv.vector.tuple", <vscale x 8 x i8>, 3) @test_vlseg3_mask_nxv2f32_triscv.vector.tuple_nxv8i8_3t(ptr %base, i32 %vl, <vscale x 2 x i1> %mask) {
; CHECK-LABEL: test_vlseg3_mask_nxv2f32_triscv.vector.tuple_nxv8i8_3t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma
-; CHECK-NEXT: vlseg3e32.v v7, (a0), v0.t
+; CHECK-NEXT: vlseg3e32.v v8, (a0), v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 3) @llvm.riscv.vlseg3.mask.triscv.vector.tuple_nxv8i8_3t.nxv2i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 3) undef, ptr %base, <vscale x 2 x i1> %mask, i32 %vl, i32 1, i32 5)
- %1 = call <vscale x 2 x float> @llvm.riscv.tuple.extract.nxv2f32.triscv.vector.tuple_nxv8i8_3t(target("riscv.vector.tuple", <vscale x 8 x i8>, 3) %0, i32 1)
- ret <vscale x 2 x float> %1
+ ret target("riscv.vector.tuple", <vscale x 8 x i8>, 3) %0
}
-
-
-define <vscale x 4 x float> @test_vlseg3_nxv4f32_triscv.vector.tuple_nxv16i8_3t(ptr %base, i32 %vl) {
+define target("riscv.vector.tuple", <vscale x 16 x i8>, 3) @test_vlseg3_nxv4f32_triscv.vector.tuple_nxv16i8_3t(ptr %base, i32 %vl) {
; CHECK-LABEL: test_vlseg3_nxv4f32_triscv.vector.tuple_nxv16i8_3t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma
-; CHECK-NEXT: vlseg3e32.v v6, (a0)
+; CHECK-NEXT: vlseg3e32.v v8, (a0)
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 3) @llvm.riscv.vlseg3.triscv.vector.tuple_nxv16i8_3t(target("riscv.vector.tuple", <vscale x 16 x i8>, 3) undef, ptr %base, i32 %vl, i32 5)
- %1 = call <vscale x 4 x float> @llvm.riscv.tuple.extract.nxv4f32.triscv.vector.tuple_nxv16i8_3t(target("riscv.vector.tuple", <vscale x 16 x i8>, 3) %0, i32 1)
- ret <vscale x 4 x float> %1
+ ret target("riscv.vector.tuple", <vscale x 16 x i8>, 3) %0
}
-
-define <vscale x 4 x float> @test_vlseg3_mask_nxv4f32_triscv.vector.tuple_nxv16i8_3t(ptr %base, i32 %vl, <vscale x 4 x i1> %mask) {
+define target("riscv.vector.tuple", <vscale x 16 x i8>, 3) @test_vlseg3_mask_nxv4f32_triscv.vector.tuple_nxv16i8_3t(ptr %base, i32 %vl, <vscale x 4 x i1> %mask) {
; CHECK-LABEL: test_vlseg3_mask_nxv4f32_triscv.vector.tuple_nxv16i8_3t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma
-; CHECK-NEXT: vlseg3e32.v v6, (a0), v0.t
+; CHECK-NEXT: vlseg3e32.v v8, (a0), v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 3) @llvm.riscv.vlseg3.mask.triscv.vector.tuple_nxv16i8_3t.nxv4i1(target("riscv.vector.tuple", <vscale x 16 x i8>, 3) undef, ptr %base, <vscale x 4 x i1> %mask, i32 %vl, i32 1, i32 5)
- %1 = call <vscale x 4 x float> @llvm.riscv.tuple.extract.nxv4f32.triscv.vector.tuple_nxv16i8_3t(target("riscv.vector.tuple", <vscale x 16 x i8>, 3) %0, i32 1)
- ret <vscale x 4 x float> %1
+ ret target("riscv.vector.tuple", <vscale x 16 x i8>, 3) %0
}
-
-
-define <vscale x 1 x float> @test_vlseg4_nxv1f32_triscv.vector.tuple_nxv4i8_4t(ptr %base, i32 %vl) {
+define target("riscv.vector.tuple", <vscale x 4 x i8>, 4) @test_vlseg4_nxv1f32_triscv.vector.tuple_nxv4i8_4t(ptr %base, i32 %vl) {
; CHECK-LABEL: test_vlseg4_nxv1f32_triscv.vector.tuple_nxv4i8_4t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma
-; CHECK-NEXT: vlseg4e32.v v7, (a0)
+; CHECK-NEXT: vlseg4e32.v v8, (a0)
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 4) @llvm.riscv.vlseg4.triscv.vector.tuple_nxv4i8_4t(target("riscv.vector.tuple", <vscale x 4 x i8>, 4) undef, ptr %base, i32 %vl, i32 5)
- %1 = call <vscale x 1 x float> @llvm.riscv.tuple.extract.nxv1f32.triscv.vector.tuple_nxv4i8_4t(target("riscv.vector.tuple", <vscale x 4 x i8>, 4) %0, i32 1)
- ret <vscale x 1 x float> %1
+ ret target("riscv.vector.tuple", <vscale x 4 x i8>, 4) %0
}
-
-define <vscale x 1 x float> @test_vlseg4_mask_nxv1f32_triscv.vector.tuple_nxv4i8_4t(ptr %base, i32 %vl, <vscale x 1 x i1> %mask) {
+define target("riscv.vector.tuple", <vscale x 4 x i8>, 4) @test_vlseg4_mask_nxv1f32_triscv.vector.tuple_nxv4i8_4t(ptr %base, i32 %vl, <vscale x 1 x i1> %mask) {
; CHECK-LABEL: test_vlseg4_mask_nxv1f32_triscv.vector.tuple_nxv4i8_4t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma
-; CHECK-NEXT: vlseg4e32.v v7, (a0), v0.t
+; CHECK-NEXT: vlseg4e32.v v8, (a0), v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 4) @llvm.riscv.vlseg4.mask.triscv.vector.tuple_nxv4i8_4t.nxv1i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 4) undef, ptr %base, <vscale x 1 x i1> %mask, i32 %vl, i32 1, i32 5)
- %1 = call <vscale x 1 x float> @llvm.riscv.tuple.extract.nxv1f32.triscv.vector.tuple_nxv4i8_4t(target("riscv.vector.tuple", <vscale x 4 x i8>, 4) %0, i32 1)
- ret <vscale x 1 x float> %1
+ ret target("riscv.vector.tuple", <vscale x 4 x i8>, 4) %0
}
-
-
-define <vscale x 2 x float> @test_vlseg4_nxv2f32_triscv.vector.tuple_nxv8i8_4t(ptr %base, i32 %vl) {
+define target("riscv.vector.tuple", <vscale x 8 x i8>, 4) @test_vlseg4_nxv2f32_triscv.vector.tuple_nxv8i8_4t(ptr %base, i32 %vl) {
; CHECK-LABEL: test_vlseg4_nxv2f32_triscv.vector.tuple_nxv8i8_4t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma
-; CHECK-NEXT: vlseg4e32.v v7, (a0)
+; CHECK-NEXT: vlseg4e32.v v8, (a0)
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 4) @llvm.riscv.vlseg4.triscv.vector.tuple_nxv8i8_4t(target("riscv.vector.tuple", <vscale x 8 x i8>, 4) undef, ptr %base, i32 %vl, i32 5)
- %1 = call <vscale x 2 x float> @llvm.riscv.tuple.extract.nxv2f32.triscv.vector.tuple_nxv8i8_4t(target("riscv.vector.tuple", <vscale x 8 x i8>, 4) %0, i32 1)
- ret <vscale x 2 x float> %1
+ ret target("riscv.vector.tuple", <vscale x 8 x i8>, 4) %0
}
-
-define <vscale x 2 x float> @test_vlseg4_mask_nxv2f32_triscv.vector.tuple_nxv8i8_4t(ptr %base, i32 %vl, <vscale x 2 x i1> %mask) {
+define target("riscv.vector.tuple", <vscale x 8 x i8>, 4) @test_vlseg4_mask_nxv2f32_triscv.vector.tuple_nxv8i8_4t(ptr %base, i32 %vl, <vscale x 2 x i1> %mask) {
; CHECK-LABEL: test_vlseg4_mask_nxv2f32_triscv.vector.tuple_nxv8i8_4t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma
-; CHECK-NEXT: vlseg4e32.v v7, (a0), v0.t
+; CHECK-NEXT: vlseg4e32.v v8, (a0), v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 4) @llvm.riscv.vlseg4.mask.triscv.vector.tuple_nxv8i8_4t.nxv2i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 4) undef, ptr %base, <vscale x 2 x i1> %mask, i32 %vl, i32 1, i32 5)
- %1 = call <vscale x 2 x float> @llvm.riscv.tuple.extract.nxv2f32.triscv.vector.tuple_nxv8i8_4t(target("riscv.vector.tuple", <vscale x 8 x i8>, 4) %0, i32 1)
- ret <vscale x 2 x float> %1
+ ret target("riscv.vector.tuple", <vscale x 8 x i8>, 4) %0
}
-
-
-define <vscale x 4 x float> @test_vlseg4_nxv4f32_triscv.vector.tuple_nxv16i8_4t(ptr %base, i32 %vl) {
+define target("riscv.vector.tuple", <vscale x 16 x i8>, 4) @test_vlseg4_nxv4f32_triscv.vector.tuple_nxv16i8_4t(ptr %base, i32 %vl) {
; CHECK-LABEL: test_vlseg4_nxv4f32_triscv.vector.tuple_nxv16i8_4t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma
-; CHECK-NEXT: vlseg4e32.v v6, (a0)
+; CHECK-NEXT: vlseg4e32.v v8, (a0)
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 4) @llvm.riscv.vlseg4.triscv.vector.tuple_nxv16i8_4t(target("riscv.vector.tuple", <vscale x 16 x i8>, 4) undef, ptr %base, i32 %vl, i32 5)
- %1 = call <vscale x 4 x float> @llvm.riscv.tuple.extract.nxv4f32.triscv.vector.tuple_nxv16i8_4t(target("riscv.vector.tuple", <vscale x 16 x i8>, 4) %0, i32 1)
- ret <vscale x 4 x float> %1
+ ret target("riscv.vector.tuple", <vscale x 16 x i8>, 4) %0
}
-
-define <vscale x 4 x float> @test_vlseg4_mask_nxv4f32_triscv.vector.tuple_nxv16i8_4t(ptr %base, i32 %vl, <vscale x 4 x i1> %mask) {
+define target("riscv.vector.tuple", <vscale x 16 x i8>, 4) @test_vlseg4_mask_nxv4f32_triscv.vector.tuple_nxv16i8_4t(ptr %base, i32 %vl, <vscale x 4 x i1> %mask) {
; CHECK-LABEL: test_vlseg4_mask_nxv4f32_triscv.vector.tuple_nxv16i8_4t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma
-; CHECK-NEXT: vlseg4e32.v v6, (a0), v0.t
+; CHECK-NEXT: vlseg4e32.v v8, (a0), v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 4) @llvm.riscv.vlseg4.mask.triscv.vector.tuple_nxv16i8_4t.nxv4i1(target("riscv.vector.tuple", <vscale x 16 x i8>, 4) undef, ptr %base, <vscale x 4 x i1> %mask, i32 %vl, i32 1, i32 5)
- %1 = call <vscale x 4 x float> @llvm.riscv.tuple.extract.nxv4f32.triscv.vector.tuple_nxv16i8_4t(target("riscv.vector.tuple", <vscale x 16 x i8>, 4) %0, i32 1)
- ret <vscale x 4 x float> %1
+ ret target("riscv.vector.tuple", <vscale x 16 x i8>, 4) %0
}
-
-
-define <vscale x 1 x float> @test_vlseg5_nxv1f32_triscv.vector.tuple_nxv4i8_5t(ptr %base, i32 %vl) {
+define target("riscv.vector.tuple", <vscale x 4 x i8>, 5) @test_vlseg5_nxv1f32_triscv.vector.tuple_nxv4i8_5t(ptr %base, i32 %vl) {
; CHECK-LABEL: test_vlseg5_nxv1f32_triscv.vector.tuple_nxv4i8_5t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma
-; CHECK-NEXT: vlseg5e32.v v7, (a0)
+; CHECK-NEXT: vlseg5e32.v v8, (a0)
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 5) @llvm.riscv.vlseg5.triscv.vector.tuple_nxv4i8_5t(target("riscv.vector.tuple", <vscale x 4 x i8>, 5) undef, ptr %base, i32 %vl, i32 5)
- %1 = call <vscale x 1 x float> @llvm.riscv.tuple.extract.nxv1f32.triscv.vector.tuple_nxv4i8_5t(target("riscv.vector.tuple", <vscale x 4 x i8>, 5) %0, i32 1)
- ret <vscale x 1 x float> %1
+ ret target("riscv.vector.tuple", <vscale x 4 x i8>, 5) %0
}
-
-define <vscale x 1 x float> @test_vlseg5_mask_nxv1f32_triscv.vector.tuple_nxv4i8_5t(ptr %base, i32 %vl, <vscale x 1 x i1> %mask) {
+define target("riscv.vector.tuple", <vscale x 4 x i8>, 5) @test_vlseg5_mask_nxv1f32_triscv.vector.tuple_nxv4i8_5t(ptr %base, i32 %vl, <vscale x 1 x i1> %mask) {
; CHECK-LABEL: test_vlseg5_mask_nxv1f32_triscv.vector.tuple_nxv4i8_5t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma
-; CHECK-NEXT: vlseg5e32.v v7, (a0), v0.t
+; CHECK-NEXT: vlseg5e32.v v8, (a0), v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 5) @llvm.riscv.vlseg5.mask.triscv.vector.tuple_nxv4i8_5t.nxv1i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 5) undef, ptr %base, <vscale x 1 x i1> %mask, i32 %vl, i32 1, i32 5)
- %1 = call <vscale x 1 x float> @llvm.riscv.tuple.extract.nxv1f32.triscv.vector.tuple_nxv4i8_5t(target("riscv.vector.tuple", <vscale x 4 x i8>, 5) %0, i32 1)
- ret <vscale x 1 x float> %1
+ ret target("riscv.vector.tuple", <vscale x 4 x i8>, 5) %0
}
-
-
-define <vscale x 2 x float> @test_vlseg5_nxv2f32_triscv.vector.tuple_nxv8i8_5t(ptr %base, i32 %vl) {
+define target("riscv.vector.tuple", <vscale x 8 x i8>, 5) @test_vlseg5_nxv2f32_triscv.vector.tuple_nxv8i8_5t(ptr %base, i32 %vl) {
; CHECK-LABEL: test_vlseg5_nxv2f32_triscv.vector.tuple_nxv8i8_5t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma
-; CHECK-NEXT: vlseg5e32.v v7, (a0)
+; CHECK-NEXT: vlseg5e32.v v8, (a0)
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 5) @llvm.riscv.vlseg5.triscv.vector.tuple_nxv8i8_5t(target("riscv.vector.tuple", <vscale x 8 x i8>, 5) undef, ptr %base, i32 %vl, i32 5)
- %1 = call <vscale x 2 x float> @llvm.riscv.tuple.extract.nxv2f32.triscv.vector.tuple_nxv8i8_5t(target("riscv.vector.tuple", <vscale x 8 x i8>, 5) %0, i32 1)
- ret <vscale x 2 x float> %1
+ ret target("riscv.vector.tuple", <vscale x 8 x i8>, 5) %0
}
-
-define <vscale x 2 x float> @test_vlseg5_mask_nxv2f32_triscv.vector.tuple_nxv8i8_5t(ptr %base, i32 %vl, <vscale x 2 x i1> %mask) {
+define target("riscv.vector.tuple", <vscale x 8 x i8>, 5) @test_vlseg5_mask_nxv2f32_triscv.vector.tuple_nxv8i8_5t(ptr %base, i32 %vl, <vscale x 2 x i1> %mask) {
; CHECK-LABEL: test_vlseg5_mask_nxv2f32_triscv.vector.tuple_nxv8i8_5t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma
-; CHECK-NEXT: vlseg5e32.v v7, (a0), v0.t
+; CHECK-NEXT: vlseg5e32.v v8, (a0), v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 5) @llvm.riscv.vlseg5.mask.triscv.vector.tuple_nxv8i8_5t.nxv2i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 5) undef, ptr %base, <vscale x 2 x i1> %mask, i32 %vl, i32 1, i32 5)
- %1 = call <vscale x 2 x float> @llvm.riscv.tuple.extract.nxv2f32.triscv.vector.tuple_nxv8i8_5t(target("riscv.vector.tuple", <vscale x 8 x i8>, 5) %0, i32 1)
- ret <vscale x 2 x float> %1
+ ret target("riscv.vector.tuple", <vscale x 8 x i8>, 5) %0
}
-
-
-define <vscale x 1 x float> @test_vlseg6_nxv1f32_triscv.vector.tuple_nxv4i8_6t(ptr %base, i32 %vl) {
+define target("riscv.vector.tuple", <vscale x 4 x i8>, 6) @test_vlseg6_nxv1f32_triscv.vector.tuple_nxv4i8_6t(ptr %base, i32 %vl) {
; CHECK-LABEL: test_vlseg6_nxv1f32_triscv.vector.tuple_nxv4i8_6t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma
-; CHECK-NEXT: vlseg6e32.v v7, (a0)
+; CHECK-NEXT: vlseg6e32.v v8, (a0)
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 6) @llvm.riscv.vlseg6.triscv.vector.tuple_nxv4i8_6t(target("riscv.vector.tuple", <vscale x 4 x i8>, 6) undef, ptr %base, i32 %vl, i32 5)
- %1 = call <vscale x 1 x float> @llvm.riscv.tuple.extract.nxv1f32.triscv.vector.tuple_nxv4i8_6t(target("riscv.vector.tuple", <vscale x 4 x i8>, 6) %0, i32 1)
- ret <vscale x 1 x float> %1
+ ret target("riscv.vector.tuple", <vscale x 4 x i8>, 6) %0
}
-
-define <vscale x 1 x float> @test_vlseg6_mask_nxv1f32_triscv.vector.tuple_nxv4i8_6t(ptr %base, i32 %vl, <vscale x 1 x i1> %mask) {
+define target("riscv.vector.tuple", <vscale x 4 x i8>, 6) @test_vlseg6_mask_nxv1f32_triscv.vector.tuple_nxv4i8_6t(ptr %base, i32 %vl, <vscale x 1 x i1> %mask) {
; CHECK-LABEL: test_vlseg6_mask_nxv1f32_triscv.vector.tuple_nxv4i8_6t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma
-; CHECK-NEXT: vlseg6e32.v v7, (a0), v0.t
+; CHECK-NEXT: vlseg6e32.v v8, (a0), v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 6) @llvm.riscv.vlseg6.mask.triscv.vector.tuple_nxv4i8_6t.nxv1i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 6) undef, ptr %base, <vscale x 1 x i1> %mask, i32 %vl, i32 1, i32 5)
- %1 = call <vscale x 1 x float> @llvm.riscv.tuple.extract.nxv1f32.triscv.vector.tuple_nxv4i8_6t(target("riscv.vector.tuple", <vscale x 4 x i8>, 6) %0, i32 1)
- ret <vscale x 1 x float> %1
+ ret target("riscv.vector.tuple", <vscale x 4 x i8>, 6) %0
}
-
-
-define <vscale x 2 x float> @test_vlseg6_nxv2f32_triscv.vector.tuple_nxv8i8_6t(ptr %base, i32 %vl) {
+define target("riscv.vector.tuple", <vscale x 8 x i8>, 6) @test_vlseg6_nxv2f32_triscv.vector.tuple_nxv8i8_6t(ptr %base, i32 %vl) {
; CHECK-LABEL: test_vlseg6_nxv2f32_triscv.vector.tuple_nxv8i8_6t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma
-; CHECK-NEXT: vlseg6e32.v v7, (a0)
+; CHECK-NEXT: vlseg6e32.v v8, (a0)
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 6) @llvm.riscv.vlseg6.triscv.vector.tuple_nxv8i8_6t(target("riscv.vector.tuple", <vscale x 8 x i8>, 6) undef, ptr %base, i32 %vl, i32 5)
- %1 = call <vscale x 2 x float> @llvm.riscv.tuple.extract.nxv2f32.triscv.vector.tuple_nxv8i8_6t(target("riscv.vector.tuple", <vscale x 8 x i8>, 6) %0, i32 1)
- ret <vscale x 2 x float> %1
+ ret target("riscv.vector.tuple", <vscale x 8 x i8>, 6) %0
}
-
-define <vscale x 2 x float> @test_vlseg6_mask_nxv2f32_triscv.vector.tuple_nxv8i8_6t(ptr %base, i32 %vl, <vscale x 2 x i1> %mask) {
+define target("riscv.vector.tuple", <vscale x 8 x i8>, 6) @test_vlseg6_mask_nxv2f32_triscv.vector.tuple_nxv8i8_6t(ptr %base, i32 %vl, <vscale x 2 x i1> %mask) {
; CHECK-LABEL: test_vlseg6_mask_nxv2f32_triscv.vector.tuple_nxv8i8_6t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma
-; CHECK-NEXT: vlseg6e32.v v7, (a0), v0.t
+; CHECK-NEXT: vlseg6e32.v v8, (a0), v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 6) @llvm.riscv.vlseg6.mask.triscv.vector.tuple_nxv8i8_6t.nxv2i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 6) undef, ptr %base, <vscale x 2 x i1> %mask, i32 %vl, i32 1, i32 5)
- %1 = call <vscale x 2 x float> @llvm.riscv.tuple.extract.nxv2f32.triscv.vector.tuple_nxv8i8_6t(target("riscv.vector.tuple", <vscale x 8 x i8>, 6) %0, i32 1)
- ret <vscale x 2 x float> %1
+ ret target("riscv.vector.tuple", <vscale x 8 x i8>, 6) %0
}
-
-
-define <vscale x 1 x float> @test_vlseg7_nxv1f32_triscv.vector.tuple_nxv4i8_7t(ptr %base, i32 %vl) {
+define target("riscv.vector.tuple", <vscale x 4 x i8>, 7) @test_vlseg7_nxv1f32_triscv.vector.tuple_nxv4i8_7t(ptr %base, i32 %vl) {
; CHECK-LABEL: test_vlseg7_nxv1f32_triscv.vector.tuple_nxv4i8_7t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma
-; CHECK-NEXT: vlseg7e32.v v7, (a0)
+; CHECK-NEXT: vlseg7e32.v v8, (a0)
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 7) @llvm.riscv.vlseg7.triscv.vector.tuple_nxv4i8_7t(target("riscv.vector.tuple", <vscale x 4 x i8>, 7) undef, ptr %base, i32 %vl, i32 5)
- %1 = call <vscale x 1 x float> @llvm.riscv.tuple.extract.nxv1f32.triscv.vector.tuple_nxv4i8_7t(target("riscv.vector.tuple", <vscale x 4 x i8>, 7) %0, i32 1)
- ret <vscale x 1 x float> %1
+ ret target("riscv.vector.tuple", <vscale x 4 x i8>, 7) %0
}
-
-define <vscale x 1 x float> @test_vlseg7_mask_nxv1f32_triscv.vector.tuple_nxv4i8_7t(ptr %base, i32 %vl, <vscale x 1 x i1> %mask) {
+define target("riscv.vector.tuple", <vscale x 4 x i8>, 7) @test_vlseg7_mask_nxv1f32_triscv.vector.tuple_nxv4i8_7t(ptr %base, i32 %vl, <vscale x 1 x i1> %mask) {
; CHECK-LABEL: test_vlseg7_mask_nxv1f32_triscv.vector.tuple_nxv4i8_7t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma
-; CHECK-NEXT: vlseg7e32.v v7, (a0), v0.t
+; CHECK-NEXT: vlseg7e32.v v8, (a0), v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 7) @llvm.riscv.vlseg7.mask.triscv.vector.tuple_nxv4i8_7t.nxv1i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 7) undef, ptr %base, <vscale x 1 x i1> %mask, i32 %vl, i32 1, i32 5)
- %1 = call <vscale x 1 x float> @llvm.riscv.tuple.extract.nxv1f32.triscv.vector.tuple_nxv4i8_7t(target("riscv.vector.tuple", <vscale x 4 x i8>, 7) %0, i32 1)
- ret <vscale x 1 x float> %1
+ ret target("riscv.vector.tuple", <vscale x 4 x i8>, 7) %0
}
-
-
-define <vscale x 2 x float> @test_vlseg7_nxv2f32_triscv.vector.tuple_nxv8i8_7t(ptr %base, i32 %vl) {
+define target("riscv.vector.tuple", <vscale x 8 x i8>, 7) @test_vlseg7_nxv2f32_triscv.vector.tuple_nxv8i8_7t(ptr %base, i32 %vl) {
; CHECK-LABEL: test_vlseg7_nxv2f32_triscv.vector.tuple_nxv8i8_7t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma
-; CHECK-NEXT: vlseg7e32.v v7, (a0)
+; CHECK-NEXT: vlseg7e32.v v8, (a0)
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 7) @llvm.riscv.vlseg7.triscv.vector.tuple_nxv8i8_7t(target("riscv.vector.tuple", <vscale x 8 x i8>, 7) undef, ptr %base, i32 %vl, i32 5)
- %1 = call <vscale x 2 x float> @llvm.riscv.tuple.extract.nxv2f32.triscv.vector.tuple_nxv8i8_7t(target("riscv.vector.tuple", <vscale x 8 x i8>, 7) %0, i32 1)
- ret <vscale x 2 x float> %1
+ ret target("riscv.vector.tuple", <vscale x 8 x i8>, 7) %0
}
-
-define <vscale x 2 x float> @test_vlseg7_mask_nxv2f32_triscv.vector.tuple_nxv8i8_7t(ptr %base, i32 %vl, <vscale x 2 x i1> %mask) {
+define target("riscv.vector.tuple", <vscale x 8 x i8>, 7) @test_vlseg7_mask_nxv2f32_triscv.vector.tuple_nxv8i8_7t(ptr %base, i32 %vl, <vscale x 2 x i1> %mask) {
; CHECK-LABEL: test_vlseg7_mask_nxv2f32_triscv.vector.tuple_nxv8i8_7t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma
-; CHECK-NEXT: vlseg7e32.v v7, (a0), v0.t
+; CHECK-NEXT: vlseg7e32.v v8, (a0), v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 7) @llvm.riscv.vlseg7.mask.triscv.vector.tuple_nxv8i8_7t.nxv2i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 7) undef, ptr %base, <vscale x 2 x i1> %mask, i32 %vl, i32 1, i32 5)
- %1 = call <vscale x 2 x float> @llvm.riscv.tuple.extract.nxv2f32.triscv.vector.tuple_nxv8i8_7t(target("riscv.vector.tuple", <vscale x 8 x i8>, 7) %0, i32 1)
- ret <vscale x 2 x float> %1
+ ret target("riscv.vector.tuple", <vscale x 8 x i8>, 7) %0
}
-
-
-define <vscale x 1 x float> @test_vlseg8_nxv1f32_triscv.vector.tuple_nxv4i8_8t(ptr %base, i32 %vl) {
+define target("riscv.vector.tuple", <vscale x 4 x i8>, 8) @test_vlseg8_nxv1f32_triscv.vector.tuple_nxv4i8_8t(ptr %base, i32 %vl) {
; CHECK-LABEL: test_vlseg8_nxv1f32_triscv.vector.tuple_nxv4i8_8t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma
-; CHECK-NEXT: vlseg8e32.v v7, (a0)
+; CHECK-NEXT: vlseg8e32.v v8, (a0)
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 8) @llvm.riscv.vlseg8.triscv.vector.tuple_nxv4i8_8t(target("riscv.vector.tuple", <vscale x 4 x i8>, 8) undef, ptr %base, i32 %vl, i32 5)
- %1 = call <vscale x 1 x float> @llvm.riscv.tuple.extract.nxv1f32.triscv.vector.tuple_nxv4i8_8t(target("riscv.vector.tuple", <vscale x 4 x i8>, 8) %0, i32 1)
- ret <vscale x 1 x float> %1
+ ret target("riscv.vector.tuple", <vscale x 4 x i8>, 8) %0
}
-
-define <vscale x 1 x float> @test_vlseg8_mask_nxv1f32_triscv.vector.tuple_nxv4i8_8t(ptr %base, i32 %vl, <vscale x 1 x i1> %mask) {
+define target("riscv.vector.tuple", <vscale x 4 x i8>, 8) @test_vlseg8_mask_nxv1f32_triscv.vector.tuple_nxv4i8_8t(ptr %base, i32 %vl, <vscale x 1 x i1> %mask) {
; CHECK-LABEL: test_vlseg8_mask_nxv1f32_triscv.vector.tuple_nxv4i8_8t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma
-; CHECK-NEXT: vlseg8e32.v v7, (a0), v0.t
+; CHECK-NEXT: vlseg8e32.v v8, (a0), v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 8) @llvm.riscv.vlseg8.mask.triscv.vector.tuple_nxv4i8_8t.nxv1i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 8) undef, ptr %base, <vscale x 1 x i1> %mask, i32 %vl, i32 1, i32 5)
- %1 = call <vscale x 1 x float> @llvm.riscv.tuple.extract.nxv1f32.triscv.vector.tuple_nxv4i8_8t(target("riscv.vector.tuple", <vscale x 4 x i8>, 8) %0, i32 1)
- ret <vscale x 1 x float> %1
+ ret target("riscv.vector.tuple", <vscale x 4 x i8>, 8) %0
}
-
-
-define <vscale x 2 x float> @test_vlseg8_nxv2f32_triscv.vector.tuple_nxv8i8_8t(ptr %base, i32 %vl) {
+define target("riscv.vector.tuple", <vscale x 8 x i8>, 8) @test_vlseg8_nxv2f32_triscv.vector.tuple_nxv8i8_8t(ptr %base, i32 %vl) {
; CHECK-LABEL: test_vlseg8_nxv2f32_triscv.vector.tuple_nxv8i8_8t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma
-; CHECK-NEXT: vlseg8e32.v v7, (a0)
+; CHECK-NEXT: vlseg8e32.v v8, (a0)
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 8) @llvm.riscv.vlseg8.triscv.vector.tuple_nxv8i8_8t(target("riscv.vector.tuple", <vscale x 8 x i8>, 8) undef, ptr %base, i32 %vl, i32 5)
- %1 = call <vscale x 2 x float> @llvm.riscv.tuple.extract.nxv2f32.triscv.vector.tuple_nxv8i8_8t(target("riscv.vector.tuple", <vscale x 8 x i8>, 8) %0, i32 1)
- ret <vscale x 2 x float> %1
+ ret target("riscv.vector.tuple", <vscale x 8 x i8>, 8) %0
}
-
-define <vscale x 2 x float> @test_vlseg8_mask_nxv2f32_triscv.vector.tuple_nxv8i8_8t(ptr %base, i32 %vl, <vscale x 2 x i1> %mask) {
+define target("riscv.vector.tuple", <vscale x 8 x i8>, 8) @test_vlseg8_mask_nxv2f32_triscv.vector.tuple_nxv8i8_8t(ptr %base, i32 %vl, <vscale x 2 x i1> %mask) {
; CHECK-LABEL: test_vlseg8_mask_nxv2f32_triscv.vector.tuple_nxv8i8_8t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma
-; CHECK-NEXT: vlseg8e32.v v7, (a0), v0.t
+; CHECK-NEXT: vlseg8e32.v v8, (a0), v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 8) @llvm.riscv.vlseg8.mask.triscv.vector.tuple_nxv8i8_8t.nxv2i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 8) undef, ptr %base, <vscale x 2 x i1> %mask, i32 %vl, i32 1, i32 5)
- %1 = call <vscale x 2 x float> @llvm.riscv.tuple.extract.nxv2f32.triscv.vector.tuple_nxv8i8_8t(target("riscv.vector.tuple", <vscale x 8 x i8>, 8) %0, i32 1)
- ret <vscale x 2 x float> %1
+ ret target("riscv.vector.tuple", <vscale x 8 x i8>, 8) %0
}
-
-
-define <vscale x 1 x double> @test_vlseg2_nxv1f64_triscv.vector.tuple_nxv8i8_2t(ptr %base, i32 %vl) {
+define target("riscv.vector.tuple", <vscale x 8 x i8>, 2) @test_vlseg2_nxv1f64_triscv.vector.tuple_nxv8i8_2t(ptr %base, i32 %vl) {
; CHECK-LABEL: test_vlseg2_nxv1f64_triscv.vector.tuple_nxv8i8_2t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, ma
-; CHECK-NEXT: vlseg2e64.v v7, (a0)
+; CHECK-NEXT: vlseg2e64.v v8, (a0)
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 2) @llvm.riscv.vlseg2.triscv.vector.tuple_nxv8i8_2t(target("riscv.vector.tuple", <vscale x 8 x i8>, 2) undef, ptr %base, i32 %vl, i32 6)
- %1 = call <vscale x 1 x double> @llvm.riscv.tuple.extract.nxv1f64.triscv.vector.tuple_nxv8i8_2t(target("riscv.vector.tuple", <vscale x 8 x i8>, 2) %0, i32 1)
- ret <vscale x 1 x double> %1
+ ret target("riscv.vector.tuple", <vscale x 8 x i8>, 2) %0
}
-
-define <vscale x 1 x double> @test_vlseg2_mask_nxv1f64_triscv.vector.tuple_nxv8i8_2t(ptr %base, i32 %vl, <vscale x 1 x i1> %mask) {
+define target("riscv.vector.tuple", <vscale x 8 x i8>, 2) @test_vlseg2_mask_nxv1f64_triscv.vector.tuple_nxv8i8_2t(ptr %base, i32 %vl, <vscale x 1 x i1> %mask) {
; CHECK-LABEL: test_vlseg2_mask_nxv1f64_triscv.vector.tuple_nxv8i8_2t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, ma
-; CHECK-NEXT: vlseg2e64.v v7, (a0), v0.t
+; CHECK-NEXT: vlseg2e64.v v8, (a0), v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 2) @llvm.riscv.vlseg2.mask.triscv.vector.tuple_nxv8i8_2t.nxv1i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 2) undef, ptr %base, <vscale x 1 x i1> %mask, i32 %vl, i32 1, i32 6)
- %1 = call <vscale x 1 x double> @llvm.riscv.tuple.extract.nxv1f64.triscv.vector.tuple_nxv8i8_2t(target("riscv.vector.tuple", <vscale x 8 x i8>, 2) %0, i32 1)
- ret <vscale x 1 x double> %1
+ ret target("riscv.vector.tuple", <vscale x 8 x i8>, 2) %0
}
-
-
-define <vscale x 2 x double> @test_vlseg2_nxv2f64_triscv.vector.tuple_nxv16i8_2t(ptr %base, i32 %vl) {
+define target("riscv.vector.tuple", <vscale x 16 x i8>, 2) @test_vlseg2_nxv2f64_triscv.vector.tuple_nxv16i8_2t(ptr %base, i32 %vl) {
; CHECK-LABEL: test_vlseg2_nxv2f64_triscv.vector.tuple_nxv16i8_2t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, ma
-; CHECK-NEXT: vlseg2e64.v v6, (a0)
+; CHECK-NEXT: vlseg2e64.v v8, (a0)
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 2) @llvm.riscv.vlseg2.triscv.vector.tuple_nxv16i8_2t(target("riscv.vector.tuple", <vscale x 16 x i8>, 2) undef, ptr %base, i32 %vl, i32 6)
- %1 = call <vscale x 2 x double> @llvm.riscv.tuple.extract.nxv2f64.triscv.vector.tuple_nxv16i8_2t(target("riscv.vector.tuple", <vscale x 16 x i8>, 2) %0, i32 1)
- ret <vscale x 2 x double> %1
+ ret target("riscv.vector.tuple", <vscale x 16 x i8>, 2) %0
}
-
-define <vscale x 2 x double> @test_vlseg2_mask_nxv2f64_triscv.vector.tuple_nxv16i8_2t(ptr %base, i32 %vl, <vscale x 2 x i1> %mask) {
+define target("riscv.vector.tuple", <vscale x 16 x i8>, 2) @test_vlseg2_mask_nxv2f64_triscv.vector.tuple_nxv16i8_2t(ptr %base, i32 %vl, <vscale x 2 x i1> %mask) {
; CHECK-LABEL: test_vlseg2_mask_nxv2f64_triscv.vector.tuple_nxv16i8_2t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, ma
-; CHECK-NEXT: vlseg2e64.v v6, (a0), v0.t
+; CHECK-NEXT: vlseg2e64.v v8, (a0), v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 2) @llvm.riscv.vlseg2.mask.triscv.vector.tuple_nxv16i8_2t.nxv2i1(target("riscv.vector.tuple", <vscale x 16 x i8>, 2) undef, ptr %base, <vscale x 2 x i1> %mask, i32 %vl, i32 1, i32 6)
- %1 = call <vscale x 2 x double> @llvm.riscv.tuple.extract.nxv2f64.triscv.vector.tuple_nxv16i8_2t(target("riscv.vector.tuple", <vscale x 16 x i8>, 2) %0, i32 1)
- ret <vscale x 2 x double> %1
+ ret target("riscv.vector.tuple", <vscale x 16 x i8>, 2) %0
}
-
-
-define <vscale x 4 x double> @test_vlseg2_nxv4f64_triscv.vector.tuple_nxv32i8_2t(ptr %base, i32 %vl) {
+define target("riscv.vector.tuple", <vscale x 32 x i8>, 2) @test_vlseg2_nxv4f64_triscv.vector.tuple_nxv32i8_2t(ptr %base, i32 %vl) {
; CHECK-LABEL: test_vlseg2_nxv4f64_triscv.vector.tuple_nxv32i8_2t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e64, m4, ta, ma
-; CHECK-NEXT: vlseg2e64.v v4, (a0)
+; CHECK-NEXT: vlseg2e64.v v8, (a0)
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 32 x i8>, 2) @llvm.riscv.vlseg2.triscv.vector.tuple_nxv32i8_2t(target("riscv.vector.tuple", <vscale x 32 x i8>, 2) undef, ptr %base, i32 %vl, i32 6)
- %1 = call <vscale x 4 x double> @llvm.riscv.tuple.extract.nxv4f64.triscv.vector.tuple_nxv32i8_2t(target("riscv.vector.tuple", <vscale x 32 x i8>, 2) %0, i32 1)
- ret <vscale x 4 x double> %1
+ ret target("riscv.vector.tuple", <vscale x 32 x i8>, 2) %0
}
-
-define <vscale x 4 x double> @test_vlseg2_mask_nxv4f64_triscv.vector.tuple_nxv32i8_2t(ptr %base, i32 %vl, <vscale x 4 x i1> %mask) {
+define target("riscv.vector.tuple", <vscale x 32 x i8>, 2) @test_vlseg2_mask_nxv4f64_triscv.vector.tuple_nxv32i8_2t(ptr %base, i32 %vl, <vscale x 4 x i1> %mask) {
; CHECK-LABEL: test_vlseg2_mask_nxv4f64_triscv.vector.tuple_nxv32i8_2t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e64, m4, ta, ma
-; CHECK-NEXT: vlseg2e64.v v4, (a0), v0.t
+; CHECK-NEXT: vlseg2e64.v v8, (a0), v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 32 x i8>, 2) @llvm.riscv.vlseg2.mask.triscv.vector.tuple_nxv32i8_2t.nxv4i1(target("riscv.vector.tuple", <vscale x 32 x i8>, 2) undef, ptr %base, <vscale x 4 x i1> %mask, i32 %vl, i32 1, i32 6)
- %1 = call <vscale x 4 x double> @llvm.riscv.tuple.extract.nxv4f64.triscv.vector.tuple_nxv32i8_2t(target("riscv.vector.tuple", <vscale x 32 x i8>, 2) %0, i32 1)
- ret <vscale x 4 x double> %1
+ ret target("riscv.vector.tuple", <vscale x 32 x i8>, 2) %0
}
-
-
-define <vscale x 1 x double> @test_vlseg3_nxv1f64_triscv.vector.tuple_nxv8i8_3t(ptr %base, i32 %vl) {
+define target("riscv.vector.tuple", <vscale x 8 x i8>, 3) @test_vlseg3_nxv1f64_triscv.vector.tuple_nxv8i8_3t(ptr %base, i32 %vl) {
; CHECK-LABEL: test_vlseg3_nxv1f64_triscv.vector.tuple_nxv8i8_3t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, ma
-; CHECK-NEXT: vlseg3e64.v v7, (a0)
+; CHECK-NEXT: vlseg3e64.v v8, (a0)
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 3) @llvm.riscv.vlseg3.triscv.vector.tuple_nxv8i8_3t(target("riscv.vector.tuple", <vscale x 8 x i8>, 3) undef, ptr %base, i32 %vl, i32 6)
- %1 = call <vscale x 1 x double> @llvm.riscv.tuple.extract.nxv1f64.triscv.vector.tuple_nxv8i8_3t(target("riscv.vector.tuple", <vscale x 8 x i8>, 3) %0, i32 1)
- ret <vscale x 1 x double> %1
+ ret target("riscv.vector.tuple", <vscale x 8 x i8>, 3) %0
}
-
-define <vscale x 1 x double> @test_vlseg3_mask_nxv1f64_triscv.vector.tuple_nxv8i8_3t(ptr %base, i32 %vl, <vscale x 1 x i1> %mask) {
+define target("riscv.vector.tuple", <vscale x 8 x i8>, 3) @test_vlseg3_mask_nxv1f64_triscv.vector.tuple_nxv8i8_3t(ptr %base, i32 %vl, <vscale x 1 x i1> %mask) {
; CHECK-LABEL: test_vlseg3_mask_nxv1f64_triscv.vector.tuple_nxv8i8_3t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, ma
-; CHECK-NEXT: vlseg3e64.v v7, (a0), v0.t
+; CHECK-NEXT: vlseg3e64.v v8, (a0), v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 3) @llvm.riscv.vlseg3.mask.triscv.vector.tuple_nxv8i8_3t.nxv1i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 3) undef, ptr %base, <vscale x 1 x i1> %mask, i32 %vl, i32 1, i32 6)
- %1 = call <vscale x 1 x double> @llvm.riscv.tuple.extract.nxv1f64.triscv.vector.tuple_nxv8i8_3t(target("riscv.vector.tuple", <vscale x 8 x i8>, 3) %0, i32 1)
- ret <vscale x 1 x double> %1
+ ret target("riscv.vector.tuple", <vscale x 8 x i8>, 3) %0
}
-
-
-define <vscale x 2 x double> @test_vlseg3_nxv2f64_triscv.vector.tuple_nxv16i8_3t(ptr %base, i32 %vl) {
+define target("riscv.vector.tuple", <vscale x 16 x i8>, 3) @test_vlseg3_nxv2f64_triscv.vector.tuple_nxv16i8_3t(ptr %base, i32 %vl) {
; CHECK-LABEL: test_vlseg3_nxv2f64_triscv.vector.tuple_nxv16i8_3t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, ma
-; CHECK-NEXT: vlseg3e64.v v6, (a0)
+; CHECK-NEXT: vlseg3e64.v v8, (a0)
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 3) @llvm.riscv.vlseg3.triscv.vector.tuple_nxv16i8_3t(target("riscv.vector.tuple", <vscale x 16 x i8>, 3) undef, ptr %base, i32 %vl, i32 6)
- %1 = call <vscale x 2 x double> @llvm.riscv.tuple.extract.nxv2f64.triscv.vector.tuple_nxv16i8_3t(target("riscv.vector.tuple", <vscale x 16 x i8>, 3) %0, i32 1)
- ret <vscale x 2 x double> %1
+ ret target("riscv.vector.tuple", <vscale x 16 x i8>, 3) %0
}
-
-define <vscale x 2 x double> @test_vlseg3_mask_nxv2f64_triscv.vector.tuple_nxv16i8_3t(ptr %base, i32 %vl, <vscale x 2 x i1> %mask) {
+define target("riscv.vector.tuple", <vscale x 16 x i8>, 3) @test_vlseg3_mask_nxv2f64_triscv.vector.tuple_nxv16i8_3t(ptr %base, i32 %vl, <vscale x 2 x i1> %mask) {
; CHECK-LABEL: test_vlseg3_mask_nxv2f64_triscv.vector.tuple_nxv16i8_3t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, ma
-; CHECK-NEXT: vlseg3e64.v v6, (a0), v0.t
+; CHECK-NEXT: vlseg3e64.v v8, (a0), v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 3) @llvm.riscv.vlseg3.mask.triscv.vector.tuple_nxv16i8_3t.nxv2i1(target("riscv.vector.tuple", <vscale x 16 x i8>, 3) undef, ptr %base, <vscale x 2 x i1> %mask, i32 %vl, i32 1, i32 6)
- %1 = call <vscale x 2 x double> @llvm.riscv.tuple.extract.nxv2f64.triscv.vector.tuple_nxv16i8_3t(target("riscv.vector.tuple", <vscale x 16 x i8>, 3) %0, i32 1)
- ret <vscale x 2 x double> %1
+ ret target("riscv.vector.tuple", <vscale x 16 x i8>, 3) %0
}
-
-
-define <vscale x 1 x double> @test_vlseg4_nxv1f64_triscv.vector.tuple_nxv8i8_4t(ptr %base, i32 %vl) {
+define target("riscv.vector.tuple", <vscale x 8 x i8>, 4) @test_vlseg4_nxv1f64_triscv.vector.tuple_nxv8i8_4t(ptr %base, i32 %vl) {
; CHECK-LABEL: test_vlseg4_nxv1f64_triscv.vector.tuple_nxv8i8_4t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, ma
-; CHECK-NEXT: vlseg4e64.v v7, (a0)
+; CHECK-NEXT: vlseg4e64.v v8, (a0)
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 4) @llvm.riscv.vlseg4.triscv.vector.tuple_nxv8i8_4t(target("riscv.vector.tuple", <vscale x 8 x i8>, 4) undef, ptr %base, i32 %vl, i32 6)
- %1 = call <vscale x 1 x double> @llvm.riscv.tuple.extract.nxv1f64.triscv.vector.tuple_nxv8i8_4t(target("riscv.vector.tuple", <vscale x 8 x i8>, 4) %0, i32 1)
- ret <vscale x 1 x double> %1
+ ret target("riscv.vector.tuple", <vscale x 8 x i8>, 4) %0
}
-
-define <vscale x 1 x double> @test_vlseg4_mask_nxv1f64_triscv.vector.tuple_nxv8i8_4t(ptr %base, i32 %vl, <vscale x 1 x i1> %mask) {
+define target("riscv.vector.tuple", <vscale x 8 x i8>, 4) @test_vlseg4_mask_nxv1f64_triscv.vector.tuple_nxv8i8_4t(ptr %base, i32 %vl, <vscale x 1 x i1> %mask) {
; CHECK-LABEL: test_vlseg4_mask_nxv1f64_triscv.vector.tuple_nxv8i8_4t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, ma
-; CHECK-NEXT: vlseg4e64.v v7, (a0), v0.t
+; CHECK-NEXT: vlseg4e64.v v8, (a0), v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 4) @llvm.riscv.vlseg4.mask.triscv.vector.tuple_nxv8i8_4t.nxv1i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 4) undef, ptr %base, <vscale x 1 x i1> %mask, i32 %vl, i32 1, i32 6)
- %1 = call <vscale x 1 x double> @llvm.riscv.tuple.extract.nxv1f64.triscv.vector.tuple_nxv8i8_4t(target("riscv.vector.tuple", <vscale x 8 x i8>, 4) %0, i32 1)
- ret <vscale x 1 x double> %1
+ ret target("riscv.vector.tuple", <vscale x 8 x i8>, 4) %0
}
-
-
-define <vscale x 2 x double> @test_vlseg4_nxv2f64_triscv.vector.tuple_nxv16i8_4t(ptr %base, i32 %vl) {
+define target("riscv.vector.tuple", <vscale x 16 x i8>, 4) @test_vlseg4_nxv2f64_triscv.vector.tuple_nxv16i8_4t(ptr %base, i32 %vl) {
; CHECK-LABEL: test_vlseg4_nxv2f64_triscv.vector.tuple_nxv16i8_4t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, ma
-; CHECK-NEXT: vlseg4e64.v v6, (a0)
+; CHECK-NEXT: vlseg4e64.v v8, (a0)
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 4) @llvm.riscv.vlseg4.triscv.vector.tuple_nxv16i8_4t(target("riscv.vector.tuple", <vscale x 16 x i8>, 4) undef, ptr %base, i32 %vl, i32 6)
- %1 = call <vscale x 2 x double> @llvm.riscv.tuple.extract.nxv2f64.triscv.vector.tuple_nxv16i8_4t(target("riscv.vector.tuple", <vscale x 16 x i8>, 4) %0, i32 1)
- ret <vscale x 2 x double> %1
+ ret target("riscv.vector.tuple", <vscale x 16 x i8>, 4) %0
}
-
-define <vscale x 2 x double> @test_vlseg4_mask_nxv2f64_triscv.vector.tuple_nxv16i8_4t(ptr %base, i32 %vl, <vscale x 2 x i1> %mask) {
+define target("riscv.vector.tuple", <vscale x 16 x i8>, 4) @test_vlseg4_mask_nxv2f64_triscv.vector.tuple_nxv16i8_4t(ptr %base, i32 %vl, <vscale x 2 x i1> %mask) {
; CHECK-LABEL: test_vlseg4_mask_nxv2f64_triscv.vector.tuple_nxv16i8_4t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, ma
-; CHECK-NEXT: vlseg4e64.v v6, (a0), v0.t
+; CHECK-NEXT: vlseg4e64.v v8, (a0), v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 4) @llvm.riscv.vlseg4.mask.triscv.vector.tuple_nxv16i8_4t.nxv2i1(target("riscv.vector.tuple", <vscale x 16 x i8>, 4) undef, ptr %base, <vscale x 2 x i1> %mask, i32 %vl, i32 1, i32 6)
- %1 = call <vscale x 2 x double> @llvm.riscv.tuple.extract.nxv2f64.triscv.vector.tuple_nxv16i8_4t(target("riscv.vector.tuple", <vscale x 16 x i8>, 4) %0, i32 1)
- ret <vscale x 2 x double> %1
+ ret target("riscv.vector.tuple", <vscale x 16 x i8>, 4) %0
}
-
-
-define <vscale x 1 x double> @test_vlseg5_nxv1f64_triscv.vector.tuple_nxv8i8_5t(ptr %base, i32 %vl) {
+define target("riscv.vector.tuple", <vscale x 8 x i8>, 5) @test_vlseg5_nxv1f64_triscv.vector.tuple_nxv8i8_5t(ptr %base, i32 %vl) {
; CHECK-LABEL: test_vlseg5_nxv1f64_triscv.vector.tuple_nxv8i8_5t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, ma
-; CHECK-NEXT: vlseg5e64.v v7, (a0)
+; CHECK-NEXT: vlseg5e64.v v8, (a0)
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 5) @llvm.riscv.vlseg5.triscv.vector.tuple_nxv8i8_5t(target("riscv.vector.tuple", <vscale x 8 x i8>, 5) undef, ptr %base, i32 %vl, i32 6)
- %1 = call <vscale x 1 x double> @llvm.riscv.tuple.extract.nxv1f64.triscv.vector.tuple_nxv8i8_5t(target("riscv.vector.tuple", <vscale x 8 x i8>, 5) %0, i32 1)
- ret <vscale x 1 x double> %1
+ ret target("riscv.vector.tuple", <vscale x 8 x i8>, 5) %0
}
-
-define <vscale x 1 x double> @test_vlseg5_mask_nxv1f64_triscv.vector.tuple_nxv8i8_5t(ptr %base, i32 %vl, <vscale x 1 x i1> %mask) {
+define target("riscv.vector.tuple", <vscale x 8 x i8>, 5) @test_vlseg5_mask_nxv1f64_triscv.vector.tuple_nxv8i8_5t(ptr %base, i32 %vl, <vscale x 1 x i1> %mask) {
; CHECK-LABEL: test_vlseg5_mask_nxv1f64_triscv.vector.tuple_nxv8i8_5t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, ma
-; CHECK-NEXT: vlseg5e64.v v7, (a0), v0.t
+; CHECK-NEXT: vlseg5e64.v v8, (a0), v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 5) @llvm.riscv.vlseg5.mask.triscv.vector.tuple_nxv8i8_5t.nxv1i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 5) undef, ptr %base, <vscale x 1 x i1> %mask, i32 %vl, i32 1, i32 6)
- %1 = call <vscale x 1 x double> @llvm.riscv.tuple.extract.nxv1f64.triscv.vector.tuple_nxv8i8_5t(target("riscv.vector.tuple", <vscale x 8 x i8>, 5) %0, i32 1)
- ret <vscale x 1 x double> %1
+ ret target("riscv.vector.tuple", <vscale x 8 x i8>, 5) %0
}
-
-
-define <vscale x 1 x double> @test_vlseg6_nxv1f64_triscv.vector.tuple_nxv8i8_6t(ptr %base, i32 %vl) {
+define target("riscv.vector.tuple", <vscale x 8 x i8>, 6) @test_vlseg6_nxv1f64_triscv.vector.tuple_nxv8i8_6t(ptr %base, i32 %vl) {
; CHECK-LABEL: test_vlseg6_nxv1f64_triscv.vector.tuple_nxv8i8_6t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, ma
-; CHECK-NEXT: vlseg6e64.v v7, (a0)
+; CHECK-NEXT: vlseg6e64.v v8, (a0)
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 6) @llvm.riscv.vlseg6.triscv.vector.tuple_nxv8i8_6t(target("riscv.vector.tuple", <vscale x 8 x i8>, 6) undef, ptr %base, i32 %vl, i32 6)
- %1 = call <vscale x 1 x double> @llvm.riscv.tuple.extract.nxv1f64.triscv.vector.tuple_nxv8i8_6t(target("riscv.vector.tuple", <vscale x 8 x i8>, 6) %0, i32 1)
- ret <vscale x 1 x double> %1
+ ret target("riscv.vector.tuple", <vscale x 8 x i8>, 6) %0
}
-
-define <vscale x 1 x double> @test_vlseg6_mask_nxv1f64_triscv.vector.tuple_nxv8i8_6t(ptr %base, i32 %vl, <vscale x 1 x i1> %mask) {
+define target("riscv.vector.tuple", <vscale x 8 x i8>, 6) @test_vlseg6_mask_nxv1f64_triscv.vector.tuple_nxv8i8_6t(ptr %base, i32 %vl, <vscale x 1 x i1> %mask) {
; CHECK-LABEL: test_vlseg6_mask_nxv1f64_triscv.vector.tuple_nxv8i8_6t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, ma
-; CHECK-NEXT: vlseg6e64.v v7, (a0), v0.t
+; CHECK-NEXT: vlseg6e64.v v8, (a0), v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 6) @llvm.riscv.vlseg6.mask.triscv.vector.tuple_nxv8i8_6t.nxv1i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 6) undef, ptr %base, <vscale x 1 x i1> %mask, i32 %vl, i32 1, i32 6)
- %1 = call <vscale x 1 x double> @llvm.riscv.tuple.extract.nxv1f64.triscv.vector.tuple_nxv8i8_6t(target("riscv.vector.tuple", <vscale x 8 x i8>, 6) %0, i32 1)
- ret <vscale x 1 x double> %1
+ ret target("riscv.vector.tuple", <vscale x 8 x i8>, 6) %0
}
-
-
-define <vscale x 1 x double> @test_vlseg7_nxv1f64_triscv.vector.tuple_nxv8i8_7t(ptr %base, i32 %vl) {
+define target("riscv.vector.tuple", <vscale x 8 x i8>, 7) @test_vlseg7_nxv1f64_triscv.vector.tuple_nxv8i8_7t(ptr %base, i32 %vl) {
; CHECK-LABEL: test_vlseg7_nxv1f64_triscv.vector.tuple_nxv8i8_7t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, ma
-; CHECK-NEXT: vlseg7e64.v v7, (a0)
+; CHECK-NEXT: vlseg7e64.v v8, (a0)
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 7) @llvm.riscv.vlseg7.triscv.vector.tuple_nxv8i8_7t(target("riscv.vector.tuple", <vscale x 8 x i8>, 7) undef, ptr %base, i32 %vl, i32 6)
- %1 = call <vscale x 1 x double> @llvm.riscv.tuple.extract.nxv1f64.triscv.vector.tuple_nxv8i8_7t(target("riscv.vector.tuple", <vscale x 8 x i8>, 7) %0, i32 1)
- ret <vscale x 1 x double> %1
+ ret target("riscv.vector.tuple", <vscale x 8 x i8>, 7) %0
}
-
-define <vscale x 1 x double> @test_vlseg7_mask_nxv1f64_triscv.vector.tuple_nxv8i8_7t(ptr %base, i32 %vl, <vscale x 1 x i1> %mask) {
+define target("riscv.vector.tuple", <vscale x 8 x i8>, 7) @test_vlseg7_mask_nxv1f64_triscv.vector.tuple_nxv8i8_7t(ptr %base, i32 %vl, <vscale x 1 x i1> %mask) {
; CHECK-LABEL: test_vlseg7_mask_nxv1f64_triscv.vector.tuple_nxv8i8_7t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, ma
-; CHECK-NEXT: vlseg7e64.v v7, (a0), v0.t
+; CHECK-NEXT: vlseg7e64.v v8, (a0), v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 7) @llvm.riscv.vlseg7.mask.triscv.vector.tuple_nxv8i8_7t.nxv1i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 7) undef, ptr %base, <vscale x 1 x i1> %mask, i32 %vl, i32 1, i32 6)
- %1 = call <vscale x 1 x double> @llvm.riscv.tuple.extract.nxv1f64.triscv.vector.tuple_nxv8i8_7t(target("riscv.vector.tuple", <vscale x 8 x i8>, 7) %0, i32 1)
- ret <vscale x 1 x double> %1
+ ret target("riscv.vector.tuple", <vscale x 8 x i8>, 7) %0
}
-
-
-define <vscale x 1 x double> @test_vlseg8_nxv1f64_triscv.vector.tuple_nxv8i8_8t(ptr %base, i32 %vl) {
+define target("riscv.vector.tuple", <vscale x 8 x i8>, 8) @test_vlseg8_nxv1f64_triscv.vector.tuple_nxv8i8_8t(ptr %base, i32 %vl) {
; CHECK-LABEL: test_vlseg8_nxv1f64_triscv.vector.tuple_nxv8i8_8t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, ma
-; CHECK-NEXT: vlseg8e64.v v7, (a0)
+; CHECK-NEXT: vlseg8e64.v v8, (a0)
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 8) @llvm.riscv.vlseg8.triscv.vector.tuple_nxv8i8_8t(target("riscv.vector.tuple", <vscale x 8 x i8>, 8) undef, ptr %base, i32 %vl, i32 6)
- %1 = call <vscale x 1 x double> @llvm.riscv.tuple.extract.nxv1f64.triscv.vector.tuple_nxv8i8_8t(target("riscv.vector.tuple", <vscale x 8 x i8>, 8) %0, i32 1)
- ret <vscale x 1 x double> %1
+ ret target("riscv.vector.tuple", <vscale x 8 x i8>, 8) %0
}
-
-define <vscale x 1 x double> @test_vlseg8_mask_nxv1f64_triscv.vector.tuple_nxv8i8_8t(ptr %base, i32 %vl, <vscale x 1 x i1> %mask) {
+define target("riscv.vector.tuple", <vscale x 8 x i8>, 8) @test_vlseg8_mask_nxv1f64_triscv.vector.tuple_nxv8i8_8t(ptr %base, i32 %vl, <vscale x 1 x i1> %mask) {
; CHECK-LABEL: test_vlseg8_mask_nxv1f64_triscv.vector.tuple_nxv8i8_8t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, ma
-; CHECK-NEXT: vlseg8e64.v v7, (a0), v0.t
+; CHECK-NEXT: vlseg8e64.v v8, (a0), v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 8) @llvm.riscv.vlseg8.mask.triscv.vector.tuple_nxv8i8_8t.nxv1i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 8) undef, ptr %base, <vscale x 1 x i1> %mask, i32 %vl, i32 1, i32 6)
- %1 = call <vscale x 1 x double> @llvm.riscv.tuple.extract.nxv1f64.triscv.vector.tuple_nxv8i8_8t(target("riscv.vector.tuple", <vscale x 8 x i8>, 8) %0, i32 1)
- ret <vscale x 1 x double> %1
+ ret target("riscv.vector.tuple", <vscale x 8 x i8>, 8) %0
}
-
-
-define <vscale x 1 x bfloat> @test_vlseg2_nxv1bf16_triscv.vector.tuple_nxv2i8_2t(ptr %base, i32 %vl) {
+define target("riscv.vector.tuple", <vscale x 2 x i8>, 2) @test_vlseg2_nxv1bf16_triscv.vector.tuple_nxv2i8_2t(ptr %base, i32 %vl) {
; CHECK-LABEL: test_vlseg2_nxv1bf16_triscv.vector.tuple_nxv2i8_2t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma
-; CHECK-NEXT: vlseg2e16.v v7, (a0)
+; CHECK-NEXT: vlseg2e16.v v8, (a0)
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 2 x i8>, 2) @llvm.riscv.vlseg2.triscv.vector.tuple_nxv2i8_2t(target("riscv.vector.tuple", <vscale x 2 x i8>, 2) undef, ptr %base, i32 %vl, i32 4)
- %1 = call <vscale x 1 x bfloat> @llvm.riscv.tuple.extract.nxv1bf16.triscv.vector.tuple_nxv2i8_2t(target("riscv.vector.tuple", <vscale x 2 x i8>, 2) %0, i32 1)
- ret <vscale x 1 x bfloat> %1
+ ret target("riscv.vector.tuple", <vscale x 2 x i8>, 2) %0
}
-
-define <vscale x 1 x bfloat> @test_vlseg2_mask_nxv1bf16_triscv.vector.tuple_nxv2i8_2t(ptr %base, i32 %vl, <vscale x 1 x i1> %mask) {
+define target("riscv.vector.tuple", <vscale x 2 x i8>, 2) @test_vlseg2_mask_nxv1bf16_triscv.vector.tuple_nxv2i8_2t(ptr %base, i32 %vl, <vscale x 1 x i1> %mask) {
; CHECK-LABEL: test_vlseg2_mask_nxv1bf16_triscv.vector.tuple_nxv2i8_2t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma
-; CHECK-NEXT: vlseg2e16.v v7, (a0), v0.t
+; CHECK-NEXT: vlseg2e16.v v8, (a0), v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 2 x i8>, 2) @llvm.riscv.vlseg2.mask.triscv.vector.tuple_nxv2i8_2t.nxv1i1(target("riscv.vector.tuple", <vscale x 2 x i8>, 2) undef, ptr %base, <vscale x 1 x i1> %mask, i32 %vl, i32 1, i32 4)
- %1 = call <vscale x 1 x bfloat> @llvm.riscv.tuple.extract.nxv1bf16.triscv.vector.tuple_nxv2i8_2t(target("riscv.vector.tuple", <vscale x 2 x i8>, 2) %0, i32 1)
- ret <vscale x 1 x bfloat> %1
+ ret target("riscv.vector.tuple", <vscale x 2 x i8>, 2) %0
}
-
-
-define <vscale x 2 x bfloat> @test_vlseg2_nxv2bf16_triscv.vector.tuple_nxv4i8_2t(ptr %base, i32 %vl) {
+define target("riscv.vector.tuple", <vscale x 4 x i8>, 2) @test_vlseg2_nxv2bf16_triscv.vector.tuple_nxv4i8_2t(ptr %base, i32 %vl) {
; CHECK-LABEL: test_vlseg2_nxv2bf16_triscv.vector.tuple_nxv4i8_2t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma
-; CHECK-NEXT: vlseg2e16.v v7, (a0)
+; CHECK-NEXT: vlseg2e16.v v8, (a0)
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 2) @llvm.riscv.vlseg2.triscv.vector.tuple_nxv4i8_2t(target("riscv.vector.tuple", <vscale x 4 x i8>, 2) undef, ptr %base, i32 %vl, i32 4)
- %1 = call <vscale x 2 x bfloat> @llvm.riscv.tuple.extract.nxv2bf16.triscv.vector.tuple_nxv4i8_2t(target("riscv.vector.tuple", <vscale x 4 x i8>, 2) %0, i32 1)
- ret <vscale x 2 x bfloat> %1
+ ret target("riscv.vector.tuple", <vscale x 4 x i8>, 2) %0
}
-
-define <vscale x 2 x bfloat> @test_vlseg2_mask_nxv2bf16_triscv.vector.tuple_nxv4i8_2t(ptr %base, i32 %vl, <vscale x 2 x i1> %mask) {
+define target("riscv.vector.tuple", <vscale x 4 x i8>, 2) @test_vlseg2_mask_nxv2bf16_triscv.vector.tuple_nxv4i8_2t(ptr %base, i32 %vl, <vscale x 2 x i1> %mask) {
; CHECK-LABEL: test_vlseg2_mask_nxv2bf16_triscv.vector.tuple_nxv4i8_2t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma
-; CHECK-NEXT: vlseg2e16.v v7, (a0), v0.t
+; CHECK-NEXT: vlseg2e16.v v8, (a0), v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 2) @llvm.riscv.vlseg2.mask.triscv.vector.tuple_nxv4i8_2t.nxv2i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 2) undef, ptr %base, <vscale x 2 x i1> %mask, i32 %vl, i32 1, i32 4)
- %1 = call <vscale x 2 x bfloat> @llvm.riscv.tuple.extract.nxv2bf16.triscv.vector.tuple_nxv4i8_2t(target("riscv.vector.tuple", <vscale x 4 x i8>, 2) %0, i32 1)
- ret <vscale x 2 x bfloat> %1
+ ret target("riscv.vector.tuple", <vscale x 4 x i8>, 2) %0
}
-
-
-define <vscale x 4 x bfloat> @test_vlseg2_nxv4bf16_triscv.vector.tuple_nxv8i8_2t(ptr %base, i32 %vl) {
+define target("riscv.vector.tuple", <vscale x 8 x i8>, 2) @test_vlseg2_nxv4bf16_triscv.vector.tuple_nxv8i8_2t(ptr %base, i32 %vl) {
; CHECK-LABEL: test_vlseg2_nxv4bf16_triscv.vector.tuple_nxv8i8_2t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma
-; CHECK-NEXT: vlseg2e16.v v7, (a0)
+; CHECK-NEXT: vlseg2e16.v v8, (a0)
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 2) @llvm.riscv.vlseg2.triscv.vector.tuple_nxv8i8_2t(target("riscv.vector.tuple", <vscale x 8 x i8>, 2) undef, ptr %base, i32 %vl, i32 4)
- %1 = call <vscale x 4 x bfloat> @llvm.riscv.tuple.extract.nxv4bf16.triscv.vector.tuple_nxv8i8_2t(target("riscv.vector.tuple", <vscale x 8 x i8>, 2) %0, i32 1)
- ret <vscale x 4 x bfloat> %1
+ ret target("riscv.vector.tuple", <vscale x 8 x i8>, 2) %0
}
-
-define <vscale x 4 x bfloat> @test_vlseg2_mask_nxv4bf16_triscv.vector.tuple_nxv8i8_2t(ptr %base, i32 %vl, <vscale x 4 x i1> %mask) {
+define target("riscv.vector.tuple", <vscale x 8 x i8>, 2) @test_vlseg2_mask_nxv4bf16_triscv.vector.tuple_nxv8i8_2t(ptr %base, i32 %vl, <vscale x 4 x i1> %mask) {
; CHECK-LABEL: test_vlseg2_mask_nxv4bf16_triscv.vector.tuple_nxv8i8_2t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma
-; CHECK-NEXT: vlseg2e16.v v7, (a0), v0.t
+; CHECK-NEXT: vlseg2e16.v v8, (a0), v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 2) @llvm.riscv.vlseg2.mask.triscv.vector.tuple_nxv8i8_2t.nxv4i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 2) undef, ptr %base, <vscale x 4 x i1> %mask, i32 %vl, i32 1, i32 4)
- %1 = call <vscale x 4 x bfloat> @llvm.riscv.tuple.extract.nxv4bf16.triscv.vector.tuple_nxv8i8_2t(target("riscv.vector.tuple", <vscale x 8 x i8>, 2) %0, i32 1)
- ret <vscale x 4 x bfloat> %1
+ ret target("riscv.vector.tuple", <vscale x 8 x i8>, 2) %0
}
-
-
-define <vscale x 8 x bfloat> @test_vlseg2_nxv8bf16_triscv.vector.tuple_nxv16i8_2t(ptr %base, i32 %vl) {
+define target("riscv.vector.tuple", <vscale x 16 x i8>, 2) @test_vlseg2_nxv8bf16_triscv.vector.tuple_nxv16i8_2t(ptr %base, i32 %vl) {
; CHECK-LABEL: test_vlseg2_nxv8bf16_triscv.vector.tuple_nxv16i8_2t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, ma
-; CHECK-NEXT: vlseg2e16.v v6, (a0)
+; CHECK-NEXT: vlseg2e16.v v8, (a0)
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 2) @llvm.riscv.vlseg2.triscv.vector.tuple_nxv16i8_2t(target("riscv.vector.tuple", <vscale x 16 x i8>, 2) undef, ptr %base, i32 %vl, i32 4)
- %1 = call <vscale x 8 x bfloat> @llvm.riscv.tuple.extract.nxv8bf16.triscv.vector.tuple_nxv16i8_2t(target("riscv.vector.tuple", <vscale x 16 x i8>, 2) %0, i32 1)
- ret <vscale x 8 x bfloat> %1
+ ret target("riscv.vector.tuple", <vscale x 16 x i8>, 2) %0
}
-
-define <vscale x 8 x bfloat> @test_vlseg2_mask_nxv8bf16_triscv.vector.tuple_nxv16i8_2t(ptr %base, i32 %vl, <vscale x 8 x i1> %mask) {
+define target("riscv.vector.tuple", <vscale x 16 x i8>, 2) @test_vlseg2_mask_nxv8bf16_triscv.vector.tuple_nxv16i8_2t(ptr %base, i32 %vl, <vscale x 8 x i1> %mask) {
; CHECK-LABEL: test_vlseg2_mask_nxv8bf16_triscv.vector.tuple_nxv16i8_2t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, ma
-; CHECK-NEXT: vlseg2e16.v v6, (a0), v0.t
+; CHECK-NEXT: vlseg2e16.v v8, (a0), v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 2) @llvm.riscv.vlseg2.mask.triscv.vector.tuple_nxv16i8_2t.nxv8i1(target("riscv.vector.tuple", <vscale x 16 x i8>, 2) undef, ptr %base, <vscale x 8 x i1> %mask, i32 %vl, i32 1, i32 4)
- %1 = call <vscale x 8 x bfloat> @llvm.riscv.tuple.extract.nxv8bf16.triscv.vector.tuple_nxv16i8_2t(target("riscv.vector.tuple", <vscale x 16 x i8>, 2) %0, i32 1)
- ret <vscale x 8 x bfloat> %1
+ ret target("riscv.vector.tuple", <vscale x 16 x i8>, 2) %0
}
-
-
-define <vscale x 16 x bfloat> @test_vlseg2_nxv16bf16_triscv.vector.tuple_nxv32i8_2t(ptr %base, i32 %vl) {
+define target("riscv.vector.tuple", <vscale x 32 x i8>, 2) @test_vlseg2_nxv16bf16_triscv.vector.tuple_nxv32i8_2t(ptr %base, i32 %vl) {
; CHECK-LABEL: test_vlseg2_nxv16bf16_triscv.vector.tuple_nxv32i8_2t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma
-; CHECK-NEXT: vlseg2e16.v v4, (a0)
+; CHECK-NEXT: vlseg2e16.v v8, (a0)
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 32 x i8>, 2) @llvm.riscv.vlseg2.triscv.vector.tuple_nxv32i8_2t(target("riscv.vector.tuple", <vscale x 32 x i8>, 2) undef, ptr %base, i32 %vl, i32 4)
- %1 = call <vscale x 16 x bfloat> @llvm.riscv.tuple.extract.nxv16bf16.triscv.vector.tuple_nxv32i8_2t(target("riscv.vector.tuple", <vscale x 32 x i8>, 2) %0, i32 1)
- ret <vscale x 16 x bfloat> %1
+ ret target("riscv.vector.tuple", <vscale x 32 x i8>, 2) %0
}
-
-define <vscale x 16 x bfloat> @test_vlseg2_mask_nxv16bf16_triscv.vector.tuple_nxv32i8_2t(ptr %base, i32 %vl, <vscale x 16 x i1> %mask) {
+define target("riscv.vector.tuple", <vscale x 32 x i8>, 2) @test_vlseg2_mask_nxv16bf16_triscv.vector.tuple_nxv32i8_2t(ptr %base, i32 %vl, <vscale x 16 x i1> %mask) {
; CHECK-LABEL: test_vlseg2_mask_nxv16bf16_triscv.vector.tuple_nxv32i8_2t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma
-; CHECK-NEXT: vlseg2e16.v v4, (a0), v0.t
+; CHECK-NEXT: vlseg2e16.v v8, (a0), v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 32 x i8>, 2) @llvm.riscv.vlseg2.mask.triscv.vector.tuple_nxv32i8_2t.nxv16i1(target("riscv.vector.tuple", <vscale x 32 x i8>, 2) undef, ptr %base, <vscale x 16 x i1> %mask, i32 %vl, i32 1, i32 4)
- %1 = call <vscale x 16 x bfloat> @llvm.riscv.tuple.extract.nxv16bf16.triscv.vector.tuple_nxv32i8_2t(target("riscv.vector.tuple", <vscale x 32 x i8>, 2) %0, i32 1)
- ret <vscale x 16 x bfloat> %1
+ ret target("riscv.vector.tuple", <vscale x 32 x i8>, 2) %0
}
-
-
-define <vscale x 1 x bfloat> @test_vlseg3_nxv1bf16_triscv.vector.tuple_nxv2i8_3t(ptr %base, i32 %vl) {
+define target("riscv.vector.tuple", <vscale x 2 x i8>, 3) @test_vlseg3_nxv1bf16_triscv.vector.tuple_nxv2i8_3t(ptr %base, i32 %vl) {
; CHECK-LABEL: test_vlseg3_nxv1bf16_triscv.vector.tuple_nxv2i8_3t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma
-; CHECK-NEXT: vlseg3e16.v v7, (a0)
+; CHECK-NEXT: vlseg3e16.v v8, (a0)
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 2 x i8>, 3) @llvm.riscv.vlseg3.triscv.vector.tuple_nxv2i8_3t(target("riscv.vector.tuple", <vscale x 2 x i8>, 3) undef, ptr %base, i32 %vl, i32 4)
- %1 = call <vscale x 1 x bfloat> @llvm.riscv.tuple.extract.nxv1bf16.triscv.vector.tuple_nxv2i8_3t(target("riscv.vector.tuple", <vscale x 2 x i8>, 3) %0, i32 1)
- ret <vscale x 1 x bfloat> %1
+ ret target("riscv.vector.tuple", <vscale x 2 x i8>, 3) %0
}
-
-define <vscale x 1 x bfloat> @test_vlseg3_mask_nxv1bf16_triscv.vector.tuple_nxv2i8_3t(ptr %base, i32 %vl, <vscale x 1 x i1> %mask) {
+define target("riscv.vector.tuple", <vscale x 2 x i8>, 3) @test_vlseg3_mask_nxv1bf16_triscv.vector.tuple_nxv2i8_3t(ptr %base, i32 %vl, <vscale x 1 x i1> %mask) {
; CHECK-LABEL: test_vlseg3_mask_nxv1bf16_triscv.vector.tuple_nxv2i8_3t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma
-; CHECK-NEXT: vlseg3e16.v v7, (a0), v0.t
+; CHECK-NEXT: vlseg3e16.v v8, (a0), v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 2 x i8>, 3) @llvm.riscv.vlseg3.mask.triscv.vector.tuple_nxv2i8_3t.nxv1i1(target("riscv.vector.tuple", <vscale x 2 x i8>, 3) undef, ptr %base, <vscale x 1 x i1> %mask, i32 %vl, i32 1, i32 4)
- %1 = call <vscale x 1 x bfloat> @llvm.riscv.tuple.extract.nxv1bf16.triscv.vector.tuple_nxv2i8_3t(target("riscv.vector.tuple", <vscale x 2 x i8>, 3) %0, i32 1)
- ret <vscale x 1 x bfloat> %1
+ ret target("riscv.vector.tuple", <vscale x 2 x i8>, 3) %0
}
-
-
-define <vscale x 2 x bfloat> @test_vlseg3_nxv2bf16_triscv.vector.tuple_nxv4i8_3t(ptr %base, i32 %vl) {
+define target("riscv.vector.tuple", <vscale x 4 x i8>, 3) @test_vlseg3_nxv2bf16_triscv.vector.tuple_nxv4i8_3t(ptr %base, i32 %vl) {
; CHECK-LABEL: test_vlseg3_nxv2bf16_triscv.vector.tuple_nxv4i8_3t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma
-; CHECK-NEXT: vlseg3e16.v v7, (a0)
+; CHECK-NEXT: vlseg3e16.v v8, (a0)
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 3) @llvm.riscv.vlseg3.triscv.vector.tuple_nxv4i8_3t(target("riscv.vector.tuple", <vscale x 4 x i8>, 3) undef, ptr %base, i32 %vl, i32 4)
- %1 = call <vscale x 2 x bfloat> @llvm.riscv.tuple.extract.nxv2bf16.triscv.vector.tuple_nxv4i8_3t(target("riscv.vector.tuple", <vscale x 4 x i8>, 3) %0, i32 1)
- ret <vscale x 2 x bfloat> %1
+ ret target("riscv.vector.tuple", <vscale x 4 x i8>, 3) %0
}
-
-define <vscale x 2 x bfloat> @test_vlseg3_mask_nxv2bf16_triscv.vector.tuple_nxv4i8_3t(ptr %base, i32 %vl, <vscale x 2 x i1> %mask) {
+define target("riscv.vector.tuple", <vscale x 4 x i8>, 3) @test_vlseg3_mask_nxv2bf16_triscv.vector.tuple_nxv4i8_3t(ptr %base, i32 %vl, <vscale x 2 x i1> %mask) {
; CHECK-LABEL: test_vlseg3_mask_nxv2bf16_triscv.vector.tuple_nxv4i8_3t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma
-; CHECK-NEXT: vlseg3e16.v v7, (a0), v0.t
+; CHECK-NEXT: vlseg3e16.v v8, (a0), v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 3) @llvm.riscv.vlseg3.mask.triscv.vector.tuple_nxv4i8_3t.nxv2i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 3) undef, ptr %base, <vscale x 2 x i1> %mask, i32 %vl, i32 1, i32 4)
- %1 = call <vscale x 2 x bfloat> @llvm.riscv.tuple.extract.nxv2bf16.triscv.vector.tuple_nxv4i8_3t(target("riscv.vector.tuple", <vscale x 4 x i8>, 3) %0, i32 1)
- ret <vscale x 2 x bfloat> %1
+ ret target("riscv.vector.tuple", <vscale x 4 x i8>, 3) %0
}
-
-
-define <vscale x 4 x bfloat> @test_vlseg3_nxv4bf16_triscv.vector.tuple_nxv8i8_3t(ptr %base, i32 %vl) {
+define target("riscv.vector.tuple", <vscale x 8 x i8>, 3) @test_vlseg3_nxv4bf16_triscv.vector.tuple_nxv8i8_3t(ptr %base, i32 %vl) {
; CHECK-LABEL: test_vlseg3_nxv4bf16_triscv.vector.tuple_nxv8i8_3t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma
-; CHECK-NEXT: vlseg3e16.v v7, (a0)
+; CHECK-NEXT: vlseg3e16.v v8, (a0)
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 3) @llvm.riscv.vlseg3.triscv.vector.tuple_nxv8i8_3t(target("riscv.vector.tuple", <vscale x 8 x i8>, 3) undef, ptr %base, i32 %vl, i32 4)
- %1 = call <vscale x 4 x bfloat> @llvm.riscv.tuple.extract.nxv4bf16.triscv.vector.tuple_nxv8i8_3t(target("riscv.vector.tuple", <vscale x 8 x i8>, 3) %0, i32 1)
- ret <vscale x 4 x bfloat> %1
+ ret target("riscv.vector.tuple", <vscale x 8 x i8>, 3) %0
}
-
-define <vscale x 4 x bfloat> @test_vlseg3_mask_nxv4bf16_triscv.vector.tuple_nxv8i8_3t(ptr %base, i32 %vl, <vscale x 4 x i1> %mask) {
+define target("riscv.vector.tuple", <vscale x 8 x i8>, 3) @test_vlseg3_mask_nxv4bf16_triscv.vector.tuple_nxv8i8_3t(ptr %base, i32 %vl, <vscale x 4 x i1> %mask) {
; CHECK-LABEL: test_vlseg3_mask_nxv4bf16_triscv.vector.tuple_nxv8i8_3t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma
-; CHECK-NEXT: vlseg3e16.v v7, (a0), v0.t
+; CHECK-NEXT: vlseg3e16.v v8, (a0), v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 3) @llvm.riscv.vlseg3.mask.triscv.vector.tuple_nxv8i8_3t.nxv4i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 3) undef, ptr %base, <vscale x 4 x i1> %mask, i32 %vl, i32 1, i32 4)
- %1 = call <vscale x 4 x bfloat> @llvm.riscv.tuple.extract.nxv4bf16.triscv.vector.tuple_nxv8i8_3t(target("riscv.vector.tuple", <vscale x 8 x i8>, 3) %0, i32 1)
- ret <vscale x 4 x bfloat> %1
+ ret target("riscv.vector.tuple", <vscale x 8 x i8>, 3) %0
}
-
-
-define <vscale x 8 x bfloat> @test_vlseg3_nxv8bf16_triscv.vector.tuple_nxv16i8_3t(ptr %base, i32 %vl) {
+define target("riscv.vector.tuple", <vscale x 16 x i8>, 3) @test_vlseg3_nxv8bf16_triscv.vector.tuple_nxv16i8_3t(ptr %base, i32 %vl) {
; CHECK-LABEL: test_vlseg3_nxv8bf16_triscv.vector.tuple_nxv16i8_3t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, ma
-; CHECK-NEXT: vlseg3e16.v v6, (a0)
+; CHECK-NEXT: vlseg3e16.v v8, (a0)
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 3) @llvm.riscv.vlseg3.triscv.vector.tuple_nxv16i8_3t(target("riscv.vector.tuple", <vscale x 16 x i8>, 3) undef, ptr %base, i32 %vl, i32 4)
- %1 = call <vscale x 8 x bfloat> @llvm.riscv.tuple.extract.nxv8bf16.triscv.vector.tuple_nxv16i8_3t(target("riscv.vector.tuple", <vscale x 16 x i8>, 3) %0, i32 1)
- ret <vscale x 8 x bfloat> %1
+ ret target("riscv.vector.tuple", <vscale x 16 x i8>, 3) %0
}
-
-define <vscale x 8 x bfloat> @test_vlseg3_mask_nxv8bf16_triscv.vector.tuple_nxv16i8_3t(ptr %base, i32 %vl, <vscale x 8 x i1> %mask) {
+define target("riscv.vector.tuple", <vscale x 16 x i8>, 3) @test_vlseg3_mask_nxv8bf16_triscv.vector.tuple_nxv16i8_3t(ptr %base, i32 %vl, <vscale x 8 x i1> %mask) {
; CHECK-LABEL: test_vlseg3_mask_nxv8bf16_triscv.vector.tuple_nxv16i8_3t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, ma
-; CHECK-NEXT: vlseg3e16.v v6, (a0), v0.t
+; CHECK-NEXT: vlseg3e16.v v8, (a0), v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 3) @llvm.riscv.vlseg3.mask.triscv.vector.tuple_nxv16i8_3t.nxv8i1(target("riscv.vector.tuple", <vscale x 16 x i8>, 3) undef, ptr %base, <vscale x 8 x i1> %mask, i32 %vl, i32 1, i32 4)
- %1 = call <vscale x 8 x bfloat> @llvm.riscv.tuple.extract.nxv8bf16.triscv.vector.tuple_nxv16i8_3t(target("riscv.vector.tuple", <vscale x 16 x i8>, 3) %0, i32 1)
- ret <vscale x 8 x bfloat> %1
+ ret target("riscv.vector.tuple", <vscale x 16 x i8>, 3) %0
}
-
-
-define <vscale x 1 x bfloat> @test_vlseg4_nxv1bf16_triscv.vector.tuple_nxv2i8_4t(ptr %base, i32 %vl) {
+define target("riscv.vector.tuple", <vscale x 2 x i8>, 4) @test_vlseg4_nxv1bf16_triscv.vector.tuple_nxv2i8_4t(ptr %base, i32 %vl) {
; CHECK-LABEL: test_vlseg4_nxv1bf16_triscv.vector.tuple_nxv2i8_4t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma
-; CHECK-NEXT: vlseg4e16.v v7, (a0)
+; CHECK-NEXT: vlseg4e16.v v8, (a0)
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 2 x i8>, 4) @llvm.riscv.vlseg4.triscv.vector.tuple_nxv2i8_4t(target("riscv.vector.tuple", <vscale x 2 x i8>, 4) undef, ptr %base, i32 %vl, i32 4)
- %1 = call <vscale x 1 x bfloat> @llvm.riscv.tuple.extract.nxv1bf16.triscv.vector.tuple_nxv2i8_4t(target("riscv.vector.tuple", <vscale x 2 x i8>, 4) %0, i32 1)
- ret <vscale x 1 x bfloat> %1
+ ret target("riscv.vector.tuple", <vscale x 2 x i8>, 4) %0
}
-
-define <vscale x 1 x bfloat> @test_vlseg4_mask_nxv1bf16_triscv.vector.tuple_nxv2i8_4t(ptr %base, i32 %vl, <vscale x 1 x i1> %mask) {
+define target("riscv.vector.tuple", <vscale x 2 x i8>, 4) @test_vlseg4_mask_nxv1bf16_triscv.vector.tuple_nxv2i8_4t(ptr %base, i32 %vl, <vscale x 1 x i1> %mask) {
; CHECK-LABEL: test_vlseg4_mask_nxv1bf16_triscv.vector.tuple_nxv2i8_4t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma
-; CHECK-NEXT: vlseg4e16.v v7, (a0), v0.t
+; CHECK-NEXT: vlseg4e16.v v8, (a0), v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 2 x i8>, 4) @llvm.riscv.vlseg4.mask.triscv.vector.tuple_nxv2i8_4t.nxv1i1(target("riscv.vector.tuple", <vscale x 2 x i8>, 4) undef, ptr %base, <vscale x 1 x i1> %mask, i32 %vl, i32 1, i32 4)
- %1 = call <vscale x 1 x bfloat> @llvm.riscv.tuple.extract.nxv1bf16.triscv.vector.tuple_nxv2i8_4t(target("riscv.vector.tuple", <vscale x 2 x i8>, 4) %0, i32 1)
- ret <vscale x 1 x bfloat> %1
+ ret target("riscv.vector.tuple", <vscale x 2 x i8>, 4) %0
}
-
-
-define <vscale x 2 x bfloat> @test_vlseg4_nxv2bf16_triscv.vector.tuple_nxv4i8_4t(ptr %base, i32 %vl) {
+define target("riscv.vector.tuple", <vscale x 4 x i8>, 4) @test_vlseg4_nxv2bf16_triscv.vector.tuple_nxv4i8_4t(ptr %base, i32 %vl) {
; CHECK-LABEL: test_vlseg4_nxv2bf16_triscv.vector.tuple_nxv4i8_4t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma
-; CHECK-NEXT: vlseg4e16.v v7, (a0)
+; CHECK-NEXT: vlseg4e16.v v8, (a0)
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 4) @llvm.riscv.vlseg4.triscv.vector.tuple_nxv4i8_4t(target("riscv.vector.tuple", <vscale x 4 x i8>, 4) undef, ptr %base, i32 %vl, i32 4)
- %1 = call <vscale x 2 x bfloat> @llvm.riscv.tuple.extract.nxv2bf16.triscv.vector.tuple_nxv4i8_4t(target("riscv.vector.tuple", <vscale x 4 x i8>, 4) %0, i32 1)
- ret <vscale x 2 x bfloat> %1
+ ret target("riscv.vector.tuple", <vscale x 4 x i8>, 4) %0
}
-
-define <vscale x 2 x bfloat> @test_vlseg4_mask_nxv2bf16_triscv.vector.tuple_nxv4i8_4t(ptr %base, i32 %vl, <vscale x 2 x i1> %mask) {
+define target("riscv.vector.tuple", <vscale x 4 x i8>, 4) @test_vlseg4_mask_nxv2bf16_triscv.vector.tuple_nxv4i8_4t(ptr %base, i32 %vl, <vscale x 2 x i1> %mask) {
; CHECK-LABEL: test_vlseg4_mask_nxv2bf16_triscv.vector.tuple_nxv4i8_4t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma
-; CHECK-NEXT: vlseg4e16.v v7, (a0), v0.t
+; CHECK-NEXT: vlseg4e16.v v8, (a0), v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 4) @llvm.riscv.vlseg4.mask.triscv.vector.tuple_nxv4i8_4t.nxv2i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 4) undef, ptr %base, <vscale x 2 x i1> %mask, i32 %vl, i32 1, i32 4)
- %1 = call <vscale x 2 x bfloat> @llvm.riscv.tuple.extract.nxv2bf16.triscv.vector.tuple_nxv4i8_4t(target("riscv.vector.tuple", <vscale x 4 x i8>, 4) %0, i32 1)
- ret <vscale x 2 x bfloat> %1
+ ret target("riscv.vector.tuple", <vscale x 4 x i8>, 4) %0
}
-
-
-define <vscale x 4 x bfloat> @test_vlseg4_nxv4bf16_triscv.vector.tuple_nxv8i8_4t(ptr %base, i32 %vl) {
+define target("riscv.vector.tuple", <vscale x 8 x i8>, 4) @test_vlseg4_nxv4bf16_triscv.vector.tuple_nxv8i8_4t(ptr %base, i32 %vl) {
; CHECK-LABEL: test_vlseg4_nxv4bf16_triscv.vector.tuple_nxv8i8_4t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma
-; CHECK-NEXT: vlseg4e16.v v7, (a0)
+; CHECK-NEXT: vlseg4e16.v v8, (a0)
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 4) @llvm.riscv.vlseg4.triscv.vector.tuple_nxv8i8_4t(target("riscv.vector.tuple", <vscale x 8 x i8>, 4) undef, ptr %base, i32 %vl, i32 4)
- %1 = call <vscale x 4 x bfloat> @llvm.riscv.tuple.extract.nxv4bf16.triscv.vector.tuple_nxv8i8_4t(target("riscv.vector.tuple", <vscale x 8 x i8>, 4) %0, i32 1)
- ret <vscale x 4 x bfloat> %1
+ ret target("riscv.vector.tuple", <vscale x 8 x i8>, 4) %0
}
-
-define <vscale x 4 x bfloat> @test_vlseg4_mask_nxv4bf16_triscv.vector.tuple_nxv8i8_4t(ptr %base, i32 %vl, <vscale x 4 x i1> %mask) {
+define target("riscv.vector.tuple", <vscale x 8 x i8>, 4) @test_vlseg4_mask_nxv4bf16_triscv.vector.tuple_nxv8i8_4t(ptr %base, i32 %vl, <vscale x 4 x i1> %mask) {
; CHECK-LABEL: test_vlseg4_mask_nxv4bf16_triscv.vector.tuple_nxv8i8_4t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma
-; CHECK-NEXT: vlseg4e16.v v7, (a0), v0.t
+; CHECK-NEXT: vlseg4e16.v v8, (a0), v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 4) @llvm.riscv.vlseg4.mask.triscv.vector.tuple_nxv8i8_4t.nxv4i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 4) undef, ptr %base, <vscale x 4 x i1> %mask, i32 %vl, i32 1, i32 4)
- %1 = call <vscale x 4 x bfloat> @llvm.riscv.tuple.extract.nxv4bf16.triscv.vector.tuple_nxv8i8_4t(target("riscv.vector.tuple", <vscale x 8 x i8>, 4) %0, i32 1)
- ret <vscale x 4 x bfloat> %1
+ ret target("riscv.vector.tuple", <vscale x 8 x i8>, 4) %0
}
-
-
-define <vscale x 8 x bfloat> @test_vlseg4_nxv8bf16_triscv.vector.tuple_nxv16i8_4t(ptr %base, i32 %vl) {
+define target("riscv.vector.tuple", <vscale x 16 x i8>, 4) @test_vlseg4_nxv8bf16_triscv.vector.tuple_nxv16i8_4t(ptr %base, i32 %vl) {
; CHECK-LABEL: test_vlseg4_nxv8bf16_triscv.vector.tuple_nxv16i8_4t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, ma
-; CHECK-NEXT: vlseg4e16.v v6, (a0)
+; CHECK-NEXT: vlseg4e16.v v8, (a0)
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 4) @llvm.riscv.vlseg4.triscv.vector.tuple_nxv16i8_4t(target("riscv.vector.tuple", <vscale x 16 x i8>, 4) undef, ptr %base, i32 %vl, i32 4)
- %1 = call <vscale x 8 x bfloat> @llvm.riscv.tuple.extract.nxv8bf16.triscv.vector.tuple_nxv16i8_4t(target("riscv.vector.tuple", <vscale x 16 x i8>, 4) %0, i32 1)
- ret <vscale x 8 x bfloat> %1
+ ret target("riscv.vector.tuple", <vscale x 16 x i8>, 4) %0
}
-
-define <vscale x 8 x bfloat> @test_vlseg4_mask_nxv8bf16_triscv.vector.tuple_nxv16i8_4t(ptr %base, i32 %vl, <vscale x 8 x i1> %mask) {
+define target("riscv.vector.tuple", <vscale x 16 x i8>, 4) @test_vlseg4_mask_nxv8bf16_triscv.vector.tuple_nxv16i8_4t(ptr %base, i32 %vl, <vscale x 8 x i1> %mask) {
; CHECK-LABEL: test_vlseg4_mask_nxv8bf16_triscv.vector.tuple_nxv16i8_4t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, ma
-; CHECK-NEXT: vlseg4e16.v v6, (a0), v0.t
+; CHECK-NEXT: vlseg4e16.v v8, (a0), v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 4) @llvm.riscv.vlseg4.mask.triscv.vector.tuple_nxv16i8_4t.nxv8i1(target("riscv.vector.tuple", <vscale x 16 x i8>, 4) undef, ptr %base, <vscale x 8 x i1> %mask, i32 %vl, i32 1, i32 4)
- %1 = call <vscale x 8 x bfloat> @llvm.riscv.tuple.extract.nxv8bf16.triscv.vector.tuple_nxv16i8_4t(target("riscv.vector.tuple", <vscale x 16 x i8>, 4) %0, i32 1)
- ret <vscale x 8 x bfloat> %1
+ ret target("riscv.vector.tuple", <vscale x 16 x i8>, 4) %0
}
-
-
-define <vscale x 1 x bfloat> @test_vlseg5_nxv1bf16_triscv.vector.tuple_nxv2i8_5t(ptr %base, i32 %vl) {
+define target("riscv.vector.tuple", <vscale x 2 x i8>, 5) @test_vlseg5_nxv1bf16_triscv.vector.tuple_nxv2i8_5t(ptr %base, i32 %vl) {
; CHECK-LABEL: test_vlseg5_nxv1bf16_triscv.vector.tuple_nxv2i8_5t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma
-; CHECK-NEXT: vlseg5e16.v v7, (a0)
+; CHECK-NEXT: vlseg5e16.v v8, (a0)
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 2 x i8>, 5) @llvm.riscv.vlseg5.triscv.vector.tuple_nxv2i8_5t(target("riscv.vector.tuple", <vscale x 2 x i8>, 5) undef, ptr %base, i32 %vl, i32 4)
- %1 = call <vscale x 1 x bfloat> @llvm.riscv.tuple.extract.nxv1bf16.triscv.vector.tuple_nxv2i8_5t(target("riscv.vector.tuple", <vscale x 2 x i8>, 5) %0, i32 1)
- ret <vscale x 1 x bfloat> %1
+ ret target("riscv.vector.tuple", <vscale x 2 x i8>, 5) %0
}
-
-define <vscale x 1 x bfloat> @test_vlseg5_mask_nxv1bf16_triscv.vector.tuple_nxv2i8_5t(ptr %base, i32 %vl, <vscale x 1 x i1> %mask) {
+define target("riscv.vector.tuple", <vscale x 2 x i8>, 5) @test_vlseg5_mask_nxv1bf16_triscv.vector.tuple_nxv2i8_5t(ptr %base, i32 %vl, <vscale x 1 x i1> %mask) {
; CHECK-LABEL: test_vlseg5_mask_nxv1bf16_triscv.vector.tuple_nxv2i8_5t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma
-; CHECK-NEXT: vlseg5e16.v v7, (a0), v0.t
+; CHECK-NEXT: vlseg5e16.v v8, (a0), v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 2 x i8>, 5) @llvm.riscv.vlseg5.mask.triscv.vector.tuple_nxv2i8_5t.nxv1i1(target("riscv.vector.tuple", <vscale x 2 x i8>, 5) undef, ptr %base, <vscale x 1 x i1> %mask, i32 %vl, i32 1, i32 4)
- %1 = call <vscale x 1 x bfloat> @llvm.riscv.tuple.extract.nxv1bf16.triscv.vector.tuple_nxv2i8_5t(target("riscv.vector.tuple", <vscale x 2 x i8>, 5) %0, i32 1)
- ret <vscale x 1 x bfloat> %1
+ ret target("riscv.vector.tuple", <vscale x 2 x i8>, 5) %0
}
-
-
-define <vscale x 2 x bfloat> @test_vlseg5_nxv2bf16_triscv.vector.tuple_nxv4i8_5t(ptr %base, i32 %vl) {
+define target("riscv.vector.tuple", <vscale x 4 x i8>, 5) @test_vlseg5_nxv2bf16_triscv.vector.tuple_nxv4i8_5t(ptr %base, i32 %vl) {
; CHECK-LABEL: test_vlseg5_nxv2bf16_triscv.vector.tuple_nxv4i8_5t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma
-; CHECK-NEXT: vlseg5e16.v v7, (a0)
+; CHECK-NEXT: vlseg5e16.v v8, (a0)
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 5) @llvm.riscv.vlseg5.triscv.vector.tuple_nxv4i8_5t(target("riscv.vector.tuple", <vscale x 4 x i8>, 5) undef, ptr %base, i32 %vl, i32 4)
- %1 = call <vscale x 2 x bfloat> @llvm.riscv.tuple.extract.nxv2bf16.triscv.vector.tuple_nxv4i8_5t(target("riscv.vector.tuple", <vscale x 4 x i8>, 5) %0, i32 1)
- ret <vscale x 2 x bfloat> %1
+ ret target("riscv.vector.tuple", <vscale x 4 x i8>, 5) %0
}
-
-define <vscale x 2 x bfloat> @test_vlseg5_mask_nxv2bf16_triscv.vector.tuple_nxv4i8_5t(ptr %base, i32 %vl, <vscale x 2 x i1> %mask) {
+define target("riscv.vector.tuple", <vscale x 4 x i8>, 5) @test_vlseg5_mask_nxv2bf16_triscv.vector.tuple_nxv4i8_5t(ptr %base, i32 %vl, <vscale x 2 x i1> %mask) {
; CHECK-LABEL: test_vlseg5_mask_nxv2bf16_triscv.vector.tuple_nxv4i8_5t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma
-; CHECK-NEXT: vlseg5e16.v v7, (a0), v0.t
+; CHECK-NEXT: vlseg5e16.v v8, (a0), v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 5) @llvm.riscv.vlseg5.mask.triscv.vector.tuple_nxv4i8_5t.nxv2i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 5) undef, ptr %base, <vscale x 2 x i1> %mask, i32 %vl, i32 1, i32 4)
- %1 = call <vscale x 2 x bfloat> @llvm.riscv.tuple.extract.nxv2bf16.triscv.vector.tuple_nxv4i8_5t(target("riscv.vector.tuple", <vscale x 4 x i8>, 5) %0, i32 1)
- ret <vscale x 2 x bfloat> %1
+ ret target("riscv.vector.tuple", <vscale x 4 x i8>, 5) %0
}
-
-
-define <vscale x 4 x bfloat> @test_vlseg5_nxv4bf16_triscv.vector.tuple_nxv8i8_5t(ptr %base, i32 %vl) {
+define target("riscv.vector.tuple", <vscale x 8 x i8>, 5) @test_vlseg5_nxv4bf16_triscv.vector.tuple_nxv8i8_5t(ptr %base, i32 %vl) {
; CHECK-LABEL: test_vlseg5_nxv4bf16_triscv.vector.tuple_nxv8i8_5t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma
-; CHECK-NEXT: vlseg5e16.v v7, (a0)
+; CHECK-NEXT: vlseg5e16.v v8, (a0)
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 5) @llvm.riscv.vlseg5.triscv.vector.tuple_nxv8i8_5t(target("riscv.vector.tuple", <vscale x 8 x i8>, 5) undef, ptr %base, i32 %vl, i32 4)
- %1 = call <vscale x 4 x bfloat> @llvm.riscv.tuple.extract.nxv4bf16.triscv.vector.tuple_nxv8i8_5t(target("riscv.vector.tuple", <vscale x 8 x i8>, 5) %0, i32 1)
- ret <vscale x 4 x bfloat> %1
+ ret target("riscv.vector.tuple", <vscale x 8 x i8>, 5) %0
}
-
-define <vscale x 4 x bfloat> @test_vlseg5_mask_nxv4bf16_triscv.vector.tuple_nxv8i8_5t(ptr %base, i32 %vl, <vscale x 4 x i1> %mask) {
+define target("riscv.vector.tuple", <vscale x 8 x i8>, 5) @test_vlseg5_mask_nxv4bf16_triscv.vector.tuple_nxv8i8_5t(ptr %base, i32 %vl, <vscale x 4 x i1> %mask) {
; CHECK-LABEL: test_vlseg5_mask_nxv4bf16_triscv.vector.tuple_nxv8i8_5t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma
-; CHECK-NEXT: vlseg5e16.v v7, (a0), v0.t
+; CHECK-NEXT: vlseg5e16.v v8, (a0), v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 5) @llvm.riscv.vlseg5.mask.triscv.vector.tuple_nxv8i8_5t.nxv4i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 5) undef, ptr %base, <vscale x 4 x i1> %mask, i32 %vl, i32 1, i32 4)
- %1 = call <vscale x 4 x bfloat> @llvm.riscv.tuple.extract.nxv4bf16.triscv.vector.tuple_nxv8i8_5t(target("riscv.vector.tuple", <vscale x 8 x i8>, 5) %0, i32 1)
- ret <vscale x 4 x bfloat> %1
+ ret target("riscv.vector.tuple", <vscale x 8 x i8>, 5) %0
}
-
-
-define <vscale x 1 x bfloat> @test_vlseg6_nxv1bf16_triscv.vector.tuple_nxv2i8_6t(ptr %base, i32 %vl) {
+define target("riscv.vector.tuple", <vscale x 2 x i8>, 6) @test_vlseg6_nxv1bf16_triscv.vector.tuple_nxv2i8_6t(ptr %base, i32 %vl) {
; CHECK-LABEL: test_vlseg6_nxv1bf16_triscv.vector.tuple_nxv2i8_6t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma
-; CHECK-NEXT: vlseg6e16.v v7, (a0)
+; CHECK-NEXT: vlseg6e16.v v8, (a0)
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 2 x i8>, 6) @llvm.riscv.vlseg6.triscv.vector.tuple_nxv2i8_6t(target("riscv.vector.tuple", <vscale x 2 x i8>, 6) undef, ptr %base, i32 %vl, i32 4)
- %1 = call <vscale x 1 x bfloat> @llvm.riscv.tuple.extract.nxv1bf16.triscv.vector.tuple_nxv2i8_6t(target("riscv.vector.tuple", <vscale x 2 x i8>, 6) %0, i32 1)
- ret <vscale x 1 x bfloat> %1
+ ret target("riscv.vector.tuple", <vscale x 2 x i8>, 6) %0
}
-
-define <vscale x 1 x bfloat> @test_vlseg6_mask_nxv1bf16_triscv.vector.tuple_nxv2i8_6t(ptr %base, i32 %vl, <vscale x 1 x i1> %mask) {
+define target("riscv.vector.tuple", <vscale x 2 x i8>, 6) @test_vlseg6_mask_nxv1bf16_triscv.vector.tuple_nxv2i8_6t(ptr %base, i32 %vl, <vscale x 1 x i1> %mask) {
; CHECK-LABEL: test_vlseg6_mask_nxv1bf16_triscv.vector.tuple_nxv2i8_6t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma
-; CHECK-NEXT: vlseg6e16.v v7, (a0), v0.t
+; CHECK-NEXT: vlseg6e16.v v8, (a0), v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 2 x i8>, 6) @llvm.riscv.vlseg6.mask.triscv.vector.tuple_nxv2i8_6t.nxv1i1(target("riscv.vector.tuple", <vscale x 2 x i8>, 6) undef, ptr %base, <vscale x 1 x i1> %mask, i32 %vl, i32 1, i32 4)
- %1 = call <vscale x 1 x bfloat> @llvm.riscv.tuple.extract.nxv1bf16.triscv.vector.tuple_nxv2i8_6t(target("riscv.vector.tuple", <vscale x 2 x i8>, 6) %0, i32 1)
- ret <vscale x 1 x bfloat> %1
+ ret target("riscv.vector.tuple", <vscale x 2 x i8>, 6) %0
}
-
-
-define <vscale x 2 x bfloat> @test_vlseg6_nxv2bf16_triscv.vector.tuple_nxv4i8_6t(ptr %base, i32 %vl) {
+define target("riscv.vector.tuple", <vscale x 4 x i8>, 6) @test_vlseg6_nxv2bf16_triscv.vector.tuple_nxv4i8_6t(ptr %base, i32 %vl) {
; CHECK-LABEL: test_vlseg6_nxv2bf16_triscv.vector.tuple_nxv4i8_6t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma
-; CHECK-NEXT: vlseg6e16.v v7, (a0)
+; CHECK-NEXT: vlseg6e16.v v8, (a0)
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 6) @llvm.riscv.vlseg6.triscv.vector.tuple_nxv4i8_6t(target("riscv.vector.tuple", <vscale x 4 x i8>, 6) undef, ptr %base, i32 %vl, i32 4)
- %1 = call <vscale x 2 x bfloat> @llvm.riscv.tuple.extract.nxv2bf16.triscv.vector.tuple_nxv4i8_6t(target("riscv.vector.tuple", <vscale x 4 x i8>, 6) %0, i32 1)
- ret <vscale x 2 x bfloat> %1
+ ret target("riscv.vector.tuple", <vscale x 4 x i8>, 6) %0
}
-
-define <vscale x 2 x bfloat> @test_vlseg6_mask_nxv2bf16_triscv.vector.tuple_nxv4i8_6t(ptr %base, i32 %vl, <vscale x 2 x i1> %mask) {
+define target("riscv.vector.tuple", <vscale x 4 x i8>, 6) @test_vlseg6_mask_nxv2bf16_triscv.vector.tuple_nxv4i8_6t(ptr %base, i32 %vl, <vscale x 2 x i1> %mask) {
; CHECK-LABEL: test_vlseg6_mask_nxv2bf16_triscv.vector.tuple_nxv4i8_6t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma
-; CHECK-NEXT: vlseg6e16.v v7, (a0), v0.t
+; CHECK-NEXT: vlseg6e16.v v8, (a0), v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 6) @llvm.riscv.vlseg6.mask.triscv.vector.tuple_nxv4i8_6t.nxv2i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 6) undef, ptr %base, <vscale x 2 x i1> %mask, i32 %vl, i32 1, i32 4)
- %1 = call <vscale x 2 x bfloat> @llvm.riscv.tuple.extract.nxv2bf16.triscv.vector.tuple_nxv4i8_6t(target("riscv.vector.tuple", <vscale x 4 x i8>, 6) %0, i32 1)
- ret <vscale x 2 x bfloat> %1
+ ret target("riscv.vector.tuple", <vscale x 4 x i8>, 6) %0
}
-
-
-define <vscale x 4 x bfloat> @test_vlseg6_nxv4bf16_triscv.vector.tuple_nxv8i8_6t(ptr %base, i32 %vl) {
+define target("riscv.vector.tuple", <vscale x 8 x i8>, 6) @test_vlseg6_nxv4bf16_triscv.vector.tuple_nxv8i8_6t(ptr %base, i32 %vl) {
; CHECK-LABEL: test_vlseg6_nxv4bf16_triscv.vector.tuple_nxv8i8_6t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma
-; CHECK-NEXT: vlseg6e16.v v7, (a0)
+; CHECK-NEXT: vlseg6e16.v v8, (a0)
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 6) @llvm.riscv.vlseg6.triscv.vector.tuple_nxv8i8_6t(target("riscv.vector.tuple", <vscale x 8 x i8>, 6) undef, ptr %base, i32 %vl, i32 4)
- %1 = call <vscale x 4 x bfloat> @llvm.riscv.tuple.extract.nxv4bf16.triscv.vector.tuple_nxv8i8_6t(target("riscv.vector.tuple", <vscale x 8 x i8>, 6) %0, i32 1)
- ret <vscale x 4 x bfloat> %1
+ ret target("riscv.vector.tuple", <vscale x 8 x i8>, 6) %0
}
-
-define <vscale x 4 x bfloat> @test_vlseg6_mask_nxv4bf16_triscv.vector.tuple_nxv8i8_6t(ptr %base, i32 %vl, <vscale x 4 x i1> %mask) {
+define target("riscv.vector.tuple", <vscale x 8 x i8>, 6) @test_vlseg6_mask_nxv4bf16_triscv.vector.tuple_nxv8i8_6t(ptr %base, i32 %vl, <vscale x 4 x i1> %mask) {
; CHECK-LABEL: test_vlseg6_mask_nxv4bf16_triscv.vector.tuple_nxv8i8_6t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma
-; CHECK-NEXT: vlseg6e16.v v7, (a0), v0.t
+; CHECK-NEXT: vlseg6e16.v v8, (a0), v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 6) @llvm.riscv.vlseg6.mask.triscv.vector.tuple_nxv8i8_6t.nxv4i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 6) undef, ptr %base, <vscale x 4 x i1> %mask, i32 %vl, i32 1, i32 4)
- %1 = call <vscale x 4 x bfloat> @llvm.riscv.tuple.extract.nxv4bf16.triscv.vector.tuple_nxv8i8_6t(target("riscv.vector.tuple", <vscale x 8 x i8>, 6) %0, i32 1)
- ret <vscale x 4 x bfloat> %1
+ ret target("riscv.vector.tuple", <vscale x 8 x i8>, 6) %0
}
-
-
-define <vscale x 1 x bfloat> @test_vlseg7_nxv1bf16_triscv.vector.tuple_nxv2i8_7t(ptr %base, i32 %vl) {
+define target("riscv.vector.tuple", <vscale x 2 x i8>, 7) @test_vlseg7_nxv1bf16_triscv.vector.tuple_nxv2i8_7t(ptr %base, i32 %vl) {
; CHECK-LABEL: test_vlseg7_nxv1bf16_triscv.vector.tuple_nxv2i8_7t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma
-; CHECK-NEXT: vlseg7e16.v v7, (a0)
+; CHECK-NEXT: vlseg7e16.v v8, (a0)
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 2 x i8>, 7) @llvm.riscv.vlseg7.triscv.vector.tuple_nxv2i8_7t(target("riscv.vector.tuple", <vscale x 2 x i8>, 7) undef, ptr %base, i32 %vl, i32 4)
- %1 = call <vscale x 1 x bfloat> @llvm.riscv.tuple.extract.nxv1bf16.triscv.vector.tuple_nxv2i8_7t(target("riscv.vector.tuple", <vscale x 2 x i8>, 7) %0, i32 1)
- ret <vscale x 1 x bfloat> %1
+ ret target("riscv.vector.tuple", <vscale x 2 x i8>, 7) %0
}
-
-define <vscale x 1 x bfloat> @test_vlseg7_mask_nxv1bf16_triscv.vector.tuple_nxv2i8_7t(ptr %base, i32 %vl, <vscale x 1 x i1> %mask) {
+define target("riscv.vector.tuple", <vscale x 2 x i8>, 7) @test_vlseg7_mask_nxv1bf16_triscv.vector.tuple_nxv2i8_7t(ptr %base, i32 %vl, <vscale x 1 x i1> %mask) {
; CHECK-LABEL: test_vlseg7_mask_nxv1bf16_triscv.vector.tuple_nxv2i8_7t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma
-; CHECK-NEXT: vlseg7e16.v v7, (a0), v0.t
+; CHECK-NEXT: vlseg7e16.v v8, (a0), v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 2 x i8>, 7) @llvm.riscv.vlseg7.mask.triscv.vector.tuple_nxv2i8_7t.nxv1i1(target("riscv.vector.tuple", <vscale x 2 x i8>, 7) undef, ptr %base, <vscale x 1 x i1> %mask, i32 %vl, i32 1, i32 4)
- %1 = call <vscale x 1 x bfloat> @llvm.riscv.tuple.extract.nxv1bf16.triscv.vector.tuple_nxv2i8_7t(target("riscv.vector.tuple", <vscale x 2 x i8>, 7) %0, i32 1)
- ret <vscale x 1 x bfloat> %1
+ ret target("riscv.vector.tuple", <vscale x 2 x i8>, 7) %0
}
-
-
-define <vscale x 2 x bfloat> @test_vlseg7_nxv2bf16_triscv.vector.tuple_nxv4i8_7t(ptr %base, i32 %vl) {
+define target("riscv.vector.tuple", <vscale x 4 x i8>, 7) @test_vlseg7_nxv2bf16_triscv.vector.tuple_nxv4i8_7t(ptr %base, i32 %vl) {
; CHECK-LABEL: test_vlseg7_nxv2bf16_triscv.vector.tuple_nxv4i8_7t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma
-; CHECK-NEXT: vlseg7e16.v v7, (a0)
+; CHECK-NEXT: vlseg7e16.v v8, (a0)
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 7) @llvm.riscv.vlseg7.triscv.vector.tuple_nxv4i8_7t(target("riscv.vector.tuple", <vscale x 4 x i8>, 7) undef, ptr %base, i32 %vl, i32 4)
- %1 = call <vscale x 2 x bfloat> @llvm.riscv.tuple.extract.nxv2bf16.triscv.vector.tuple_nxv4i8_7t(target("riscv.vector.tuple", <vscale x 4 x i8>, 7) %0, i32 1)
- ret <vscale x 2 x bfloat> %1
+ ret target("riscv.vector.tuple", <vscale x 4 x i8>, 7) %0
}
-
-define <vscale x 2 x bfloat> @test_vlseg7_mask_nxv2bf16_triscv.vector.tuple_nxv4i8_7t(ptr %base, i32 %vl, <vscale x 2 x i1> %mask) {
+define target("riscv.vector.tuple", <vscale x 4 x i8>, 7) @test_vlseg7_mask_nxv2bf16_triscv.vector.tuple_nxv4i8_7t(ptr %base, i32 %vl, <vscale x 2 x i1> %mask) {
; CHECK-LABEL: test_vlseg7_mask_nxv2bf16_triscv.vector.tuple_nxv4i8_7t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma
-; CHECK-NEXT: vlseg7e16.v v7, (a0), v0.t
+; CHECK-NEXT: vlseg7e16.v v8, (a0), v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 7) @llvm.riscv.vlseg7.mask.triscv.vector.tuple_nxv4i8_7t.nxv2i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 7) undef, ptr %base, <vscale x 2 x i1> %mask, i32 %vl, i32 1, i32 4)
- %1 = call <vscale x 2 x bfloat> @llvm.riscv.tuple.extract.nxv2bf16.triscv.vector.tuple_nxv4i8_7t(target("riscv.vector.tuple", <vscale x 4 x i8>, 7) %0, i32 1)
- ret <vscale x 2 x bfloat> %1
+ ret target("riscv.vector.tuple", <vscale x 4 x i8>, 7) %0
}
-
-
-define <vscale x 4 x bfloat> @test_vlseg7_nxv4bf16_triscv.vector.tuple_nxv8i8_7t(ptr %base, i32 %vl) {
+define target("riscv.vector.tuple", <vscale x 8 x i8>, 7) @test_vlseg7_nxv4bf16_triscv.vector.tuple_nxv8i8_7t(ptr %base, i32 %vl) {
; CHECK-LABEL: test_vlseg7_nxv4bf16_triscv.vector.tuple_nxv8i8_7t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma
-; CHECK-NEXT: vlseg7e16.v v7, (a0)
+; CHECK-NEXT: vlseg7e16.v v8, (a0)
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 7) @llvm.riscv.vlseg7.triscv.vector.tuple_nxv8i8_7t(target("riscv.vector.tuple", <vscale x 8 x i8>, 7) undef, ptr %base, i32 %vl, i32 4)
- %1 = call <vscale x 4 x bfloat> @llvm.riscv.tuple.extract.nxv4bf16.triscv.vector.tuple_nxv8i8_7t(target("riscv.vector.tuple", <vscale x 8 x i8>, 7) %0, i32 1)
- ret <vscale x 4 x bfloat> %1
+ ret target("riscv.vector.tuple", <vscale x 8 x i8>, 7) %0
}
-
-define <vscale x 4 x bfloat> @test_vlseg7_mask_nxv4bf16_triscv.vector.tuple_nxv8i8_7t(ptr %base, i32 %vl, <vscale x 4 x i1> %mask) {
+define target("riscv.vector.tuple", <vscale x 8 x i8>, 7) @test_vlseg7_mask_nxv4bf16_triscv.vector.tuple_nxv8i8_7t(ptr %base, i32 %vl, <vscale x 4 x i1> %mask) {
; CHECK-LABEL: test_vlseg7_mask_nxv4bf16_triscv.vector.tuple_nxv8i8_7t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma
-; CHECK-NEXT: vlseg7e16.v v7, (a0), v0.t
+; CHECK-NEXT: vlseg7e16.v v8, (a0), v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 7) @llvm.riscv.vlseg7.mask.triscv.vector.tuple_nxv8i8_7t.nxv4i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 7) undef, ptr %base, <vscale x 4 x i1> %mask, i32 %vl, i32 1, i32 4)
- %1 = call <vscale x 4 x bfloat> @llvm.riscv.tuple.extract.nxv4bf16.triscv.vector.tuple_nxv8i8_7t(target("riscv.vector.tuple", <vscale x 8 x i8>, 7) %0, i32 1)
- ret <vscale x 4 x bfloat> %1
+ ret target("riscv.vector.tuple", <vscale x 8 x i8>, 7) %0
}
-
-
-define <vscale x 1 x bfloat> @test_vlseg8_nxv1bf16_triscv.vector.tuple_nxv2i8_8t(ptr %base, i32 %vl) {
+define target("riscv.vector.tuple", <vscale x 2 x i8>, 8) @test_vlseg8_nxv1bf16_triscv.vector.tuple_nxv2i8_8t(ptr %base, i32 %vl) {
; CHECK-LABEL: test_vlseg8_nxv1bf16_triscv.vector.tuple_nxv2i8_8t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma
-; CHECK-NEXT: vlseg8e16.v v7, (a0)
+; CHECK-NEXT: vlseg8e16.v v8, (a0)
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 2 x i8>, 8) @llvm.riscv.vlseg8.triscv.vector.tuple_nxv2i8_8t(target("riscv.vector.tuple", <vscale x 2 x i8>, 8) undef, ptr %base, i32 %vl, i32 4)
- %1 = call <vscale x 1 x bfloat> @llvm.riscv.tuple.extract.nxv1bf16.triscv.vector.tuple_nxv2i8_8t(target("riscv.vector.tuple", <vscale x 2 x i8>, 8) %0, i32 1)
- ret <vscale x 1 x bfloat> %1
+ ret target("riscv.vector.tuple", <vscale x 2 x i8>, 8) %0
}
-
-define <vscale x 1 x bfloat> @test_vlseg8_mask_nxv1bf16_triscv.vector.tuple_nxv2i8_8t(ptr %base, i32 %vl, <vscale x 1 x i1> %mask) {
+define target("riscv.vector.tuple", <vscale x 2 x i8>, 8) @test_vlseg8_mask_nxv1bf16_triscv.vector.tuple_nxv2i8_8t(ptr %base, i32 %vl, <vscale x 1 x i1> %mask) {
; CHECK-LABEL: test_vlseg8_mask_nxv1bf16_triscv.vector.tuple_nxv2i8_8t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma
-; CHECK-NEXT: vlseg8e16.v v7, (a0), v0.t
+; CHECK-NEXT: vlseg8e16.v v8, (a0), v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 2 x i8>, 8) @llvm.riscv.vlseg8.mask.triscv.vector.tuple_nxv2i8_8t.nxv1i1(target("riscv.vector.tuple", <vscale x 2 x i8>, 8) undef, ptr %base, <vscale x 1 x i1> %mask, i32 %vl, i32 1, i32 4)
- %1 = call <vscale x 1 x bfloat> @llvm.riscv.tuple.extract.nxv1bf16.triscv.vector.tuple_nxv2i8_8t(target("riscv.vector.tuple", <vscale x 2 x i8>, 8) %0, i32 1)
- ret <vscale x 1 x bfloat> %1
+ ret target("riscv.vector.tuple", <vscale x 2 x i8>, 8) %0
}
-
-
-define <vscale x 2 x bfloat> @test_vlseg8_nxv2bf16_triscv.vector.tuple_nxv4i8_8t(ptr %base, i32 %vl) {
+define target("riscv.vector.tuple", <vscale x 4 x i8>, 8) @test_vlseg8_nxv2bf16_triscv.vector.tuple_nxv4i8_8t(ptr %base, i32 %vl) {
; CHECK-LABEL: test_vlseg8_nxv2bf16_triscv.vector.tuple_nxv4i8_8t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma
-; CHECK-NEXT: vlseg8e16.v v7, (a0)
+; CHECK-NEXT: vlseg8e16.v v8, (a0)
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 8) @llvm.riscv.vlseg8.triscv.vector.tuple_nxv4i8_8t(target("riscv.vector.tuple", <vscale x 4 x i8>, 8) undef, ptr %base, i32 %vl, i32 4)
- %1 = call <vscale x 2 x bfloat> @llvm.riscv.tuple.extract.nxv2bf16.triscv.vector.tuple_nxv4i8_8t(target("riscv.vector.tuple", <vscale x 4 x i8>, 8) %0, i32 1)
- ret <vscale x 2 x bfloat> %1
+ ret target("riscv.vector.tuple", <vscale x 4 x i8>, 8) %0
}
-
-define <vscale x 2 x bfloat> @test_vlseg8_mask_nxv2bf16_triscv.vector.tuple_nxv4i8_8t(ptr %base, i32 %vl, <vscale x 2 x i1> %mask) {
+define target("riscv.vector.tuple", <vscale x 4 x i8>, 8) @test_vlseg8_mask_nxv2bf16_triscv.vector.tuple_nxv4i8_8t(ptr %base, i32 %vl, <vscale x 2 x i1> %mask) {
; CHECK-LABEL: test_vlseg8_mask_nxv2bf16_triscv.vector.tuple_nxv4i8_8t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma
-; CHECK-NEXT: vlseg8e16.v v7, (a0), v0.t
+; CHECK-NEXT: vlseg8e16.v v8, (a0), v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 8) @llvm.riscv.vlseg8.mask.triscv.vector.tuple_nxv4i8_8t.nxv2i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 8) undef, ptr %base, <vscale x 2 x i1> %mask, i32 %vl, i32 1, i32 4)
- %1 = call <vscale x 2 x bfloat> @llvm.riscv.tuple.extract.nxv2bf16.triscv.vector.tuple_nxv4i8_8t(target("riscv.vector.tuple", <vscale x 4 x i8>, 8) %0, i32 1)
- ret <vscale x 2 x bfloat> %1
+ ret target("riscv.vector.tuple", <vscale x 4 x i8>, 8) %0
}
-
-
-define <vscale x 4 x bfloat> @test_vlseg8_nxv4bf16_triscv.vector.tuple_nxv8i8_8t(ptr %base, i32 %vl) {
+define target("riscv.vector.tuple", <vscale x 8 x i8>, 8) @test_vlseg8_nxv4bf16_triscv.vector.tuple_nxv8i8_8t(ptr %base, i32 %vl) {
; CHECK-LABEL: test_vlseg8_nxv4bf16_triscv.vector.tuple_nxv8i8_8t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma
-; CHECK-NEXT: vlseg8e16.v v7, (a0)
+; CHECK-NEXT: vlseg8e16.v v8, (a0)
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 8) @llvm.riscv.vlseg8.triscv.vector.tuple_nxv8i8_8t(target("riscv.vector.tuple", <vscale x 8 x i8>, 8) undef, ptr %base, i32 %vl, i32 4)
- %1 = call <vscale x 4 x bfloat> @llvm.riscv.tuple.extract.nxv4bf16.triscv.vector.tuple_nxv8i8_8t(target("riscv.vector.tuple", <vscale x 8 x i8>, 8) %0, i32 1)
- ret <vscale x 4 x bfloat> %1
+ ret target("riscv.vector.tuple", <vscale x 8 x i8>, 8) %0
}
-
-define <vscale x 4 x bfloat> @test_vlseg8_mask_nxv4bf16_triscv.vector.tuple_nxv8i8_8t(ptr %base, i32 %vl, <vscale x 4 x i1> %mask) {
+define target("riscv.vector.tuple", <vscale x 8 x i8>, 8) @test_vlseg8_mask_nxv4bf16_triscv.vector.tuple_nxv8i8_8t(ptr %base, i32 %vl, <vscale x 4 x i1> %mask) {
; CHECK-LABEL: test_vlseg8_mask_nxv4bf16_triscv.vector.tuple_nxv8i8_8t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma
-; CHECK-NEXT: vlseg8e16.v v7, (a0), v0.t
+; CHECK-NEXT: vlseg8e16.v v8, (a0), v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 8) @llvm.riscv.vlseg8.mask.triscv.vector.tuple_nxv8i8_8t.nxv4i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 8) undef, ptr %base, <vscale x 4 x i1> %mask, i32 %vl, i32 1, i32 4)
- %1 = call <vscale x 4 x bfloat> @llvm.riscv.tuple.extract.nxv4bf16.triscv.vector.tuple_nxv8i8_8t(target("riscv.vector.tuple", <vscale x 8 x i8>, 8) %0, i32 1)
- ret <vscale x 4 x bfloat> %1
+ ret target("riscv.vector.tuple", <vscale x 8 x i8>, 8) %0
}
-
diff --git a/llvm/test/CodeGen/RISCV/rvv/vlseg-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vlseg-rv64.ll
index 16e5e7b9..faeabaf 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vlseg-rv64.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vlseg-rv64.ll
@@ -2,4330 +2,3373 @@
; RUN: llc -mtriple=riscv64 -mattr=+zve64d,+f,+d,+zvfh,+zvfbfmin \
; RUN: -verify-machineinstrs < %s | FileCheck %s
-declare target("riscv.vector.tuple", <vscale x 1 x i8>, 2) @llvm.riscv.vlseg2.triscv.vector.tuple_nxv1i8_2t(target("riscv.vector.tuple", <vscale x 1 x i8>, 2), ptr, i64, i64)
-declare target("riscv.vector.tuple", <vscale x 1 x i8>, 2) @llvm.riscv.vlseg2.mask.triscv.vector.tuple_nxv1i8_2t.nxv1i1(target("riscv.vector.tuple", <vscale x 1 x i8>, 2), ptr, <vscale x 1 x i1>, i64, i64, i64)
-
-define <vscale x 1 x i8> @test_vlseg2_nxv1i8_triscv.vector.tuple_nxv1i8_2t(ptr %base, i64 %vl) {
+define target("riscv.vector.tuple", <vscale x 1 x i8>, 2) @test_vlseg2_nxv1i8_triscv.vector.tuple_nxv1i8_2t(ptr %base, i64 %vl) {
; CHECK-LABEL: test_vlseg2_nxv1i8_triscv.vector.tuple_nxv1i8_2t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma
-; CHECK-NEXT: vlseg2e8.v v7, (a0)
+; CHECK-NEXT: vlseg2e8.v v8, (a0)
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 1 x i8>, 2) @llvm.riscv.vlseg2.triscv.vector.tuple_nxv1i8_2t(target("riscv.vector.tuple", <vscale x 1 x i8>, 2) undef, ptr %base, i64 %vl, i64 3)
- %1 = call <vscale x 1 x i8> @llvm.riscv.tuple.extract.nxv1i8.triscv.vector.tuple_nxv1i8_2t(target("riscv.vector.tuple", <vscale x 1 x i8>, 2) %0, i32 1)
- ret <vscale x 1 x i8> %1
+ ret target("riscv.vector.tuple", <vscale x 1 x i8>, 2) %0
}
-
-define <vscale x 1 x i8> @test_vlseg2_mask_nxv1i8_triscv.vector.tuple_nxv1i8_2t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) {
+define target("riscv.vector.tuple", <vscale x 1 x i8>, 2) @test_vlseg2_mask_nxv1i8_triscv.vector.tuple_nxv1i8_2t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) {
; CHECK-LABEL: test_vlseg2_mask_nxv1i8_triscv.vector.tuple_nxv1i8_2t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma
-; CHECK-NEXT: vlseg2e8.v v7, (a0), v0.t
+; CHECK-NEXT: vlseg2e8.v v8, (a0), v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 1 x i8>, 2) @llvm.riscv.vlseg2.mask.triscv.vector.tuple_nxv1i8_2t.nxv1i1(target("riscv.vector.tuple", <vscale x 1 x i8>, 2) undef, ptr %base, <vscale x 1 x i1> %mask, i64 %vl, i64 1, i64 3)
- %1 = call <vscale x 1 x i8> @llvm.riscv.tuple.extract.nxv1i8.triscv.vector.tuple_nxv1i8_2t(target("riscv.vector.tuple", <vscale x 1 x i8>, 2) %0, i32 1)
- ret <vscale x 1 x i8> %1
+ ret target("riscv.vector.tuple", <vscale x 1 x i8>, 2) %0
}
-
-define <vscale x 1 x i8> @test_vlseg2_allonesmask_nxv1i8_triscv.vector.tuple_nxv1i8_2t(ptr %base, i64 %vl) {
+define target("riscv.vector.tuple", <vscale x 1 x i8>, 2) @test_vlseg2_allonesmask_nxv1i8_triscv.vector.tuple_nxv1i8_2t(ptr %base, i64 %vl) {
; CHECK-LABEL: test_vlseg2_allonesmask_nxv1i8_triscv.vector.tuple_nxv1i8_2t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma
-; CHECK-NEXT: vlseg2e8.v v7, (a0)
+; CHECK-NEXT: vlseg2e8.v v8, (a0)
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 1 x i8>, 2) @llvm.riscv.vlseg2.mask.triscv.vector.tuple_nxv1i8_2t.nxv1i1(target("riscv.vector.tuple", <vscale x 1 x i8>, 2) undef, ptr %base, <vscale x 1 x i1> splat (i1 true), i64 %vl, i64 1, i64 3)
- %1 = call <vscale x 1 x i8> @llvm.riscv.tuple.extract.nxv1i8.triscv.vector.tuple_nxv1i8_2t(target("riscv.vector.tuple", <vscale x 1 x i8>, 2) %0, i32 1)
- ret <vscale x 1 x i8> %1
+ ret target("riscv.vector.tuple", <vscale x 1 x i8>, 2) %0
}
-
-declare target("riscv.vector.tuple", <vscale x 2 x i8>, 2) @llvm.riscv.vlseg2.triscv.vector.tuple_nxv2i8_2t(target("riscv.vector.tuple", <vscale x 2 x i8>, 2), ptr, i64, i64)
-declare target("riscv.vector.tuple", <vscale x 2 x i8>, 2) @llvm.riscv.vlseg2.mask.triscv.vector.tuple_nxv2i8_2t.nxv2i1(target("riscv.vector.tuple", <vscale x 2 x i8>, 2), ptr, <vscale x 2 x i1>, i64, i64, i64)
-
-define <vscale x 2 x i8> @test_vlseg2_nxv2i8_triscv.vector.tuple_nxv2i8_2t(ptr %base, i64 %vl) {
+define target("riscv.vector.tuple", <vscale x 2 x i8>, 2) @test_vlseg2_nxv2i8_triscv.vector.tuple_nxv2i8_2t(ptr %base, i64 %vl) {
; CHECK-LABEL: test_vlseg2_nxv2i8_triscv.vector.tuple_nxv2i8_2t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma
-; CHECK-NEXT: vlseg2e8.v v7, (a0)
+; CHECK-NEXT: vlseg2e8.v v8, (a0)
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 2 x i8>, 2) @llvm.riscv.vlseg2.triscv.vector.tuple_nxv2i8_2t(target("riscv.vector.tuple", <vscale x 2 x i8>, 2) undef, ptr %base, i64 %vl, i64 3)
- %1 = call <vscale x 2 x i8> @llvm.riscv.tuple.extract.nxv2i8.triscv.vector.tuple_nxv2i8_2t(target("riscv.vector.tuple", <vscale x 2 x i8>, 2) %0, i32 1)
- ret <vscale x 2 x i8> %1
+ ret target("riscv.vector.tuple", <vscale x 2 x i8>, 2) %0
}
-
-define <vscale x 2 x i8> @test_vlseg2_mask_nxv2i8_triscv.vector.tuple_nxv2i8_2t(ptr %base, i64 %vl, <vscale x 2 x i1> %mask) {
+define target("riscv.vector.tuple", <vscale x 2 x i8>, 2) @test_vlseg2_mask_nxv2i8_triscv.vector.tuple_nxv2i8_2t(ptr %base, i64 %vl, <vscale x 2 x i1> %mask) {
; CHECK-LABEL: test_vlseg2_mask_nxv2i8_triscv.vector.tuple_nxv2i8_2t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma
-; CHECK-NEXT: vlseg2e8.v v7, (a0), v0.t
+; CHECK-NEXT: vlseg2e8.v v8, (a0), v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 2 x i8>, 2) @llvm.riscv.vlseg2.mask.triscv.vector.tuple_nxv2i8_2t.nxv2i1(target("riscv.vector.tuple", <vscale x 2 x i8>, 2) undef, ptr %base, <vscale x 2 x i1> %mask, i64 %vl, i64 1, i64 3)
- %1 = call <vscale x 2 x i8> @llvm.riscv.tuple.extract.nxv2i8.triscv.vector.tuple_nxv2i8_2t(target("riscv.vector.tuple", <vscale x 2 x i8>, 2) %0, i32 1)
- ret <vscale x 2 x i8> %1
+ ret target("riscv.vector.tuple", <vscale x 2 x i8>, 2) %0
}
-
-declare target("riscv.vector.tuple", <vscale x 4 x i8>, 2) @llvm.riscv.vlseg2.triscv.vector.tuple_nxv4i8_2t(target("riscv.vector.tuple", <vscale x 4 x i8>, 2), ptr, i64, i64)
-declare target("riscv.vector.tuple", <vscale x 4 x i8>, 2) @llvm.riscv.vlseg2.mask.triscv.vector.tuple_nxv4i8_2t.nxv4i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 2), ptr, <vscale x 4 x i1>, i64, i64, i64)
-
-define <vscale x 4 x i8> @test_vlseg2_nxv4i8_triscv.vector.tuple_nxv4i8_2t(ptr %base, i64 %vl) {
+define target("riscv.vector.tuple", <vscale x 4 x i8>, 2) @test_vlseg2_nxv4i8_triscv.vector.tuple_nxv4i8_2t(ptr %base, i64 %vl) {
; CHECK-LABEL: test_vlseg2_nxv4i8_triscv.vector.tuple_nxv4i8_2t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma
-; CHECK-NEXT: vlseg2e8.v v7, (a0)
+; CHECK-NEXT: vlseg2e8.v v8, (a0)
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 2) @llvm.riscv.vlseg2.triscv.vector.tuple_nxv4i8_2t(target("riscv.vector.tuple", <vscale x 4 x i8>, 2) undef, ptr %base, i64 %vl, i64 3)
- %1 = call <vscale x 4 x i8> @llvm.riscv.tuple.extract.nxv4i8.triscv.vector.tuple_nxv4i8_2t(target("riscv.vector.tuple", <vscale x 4 x i8>, 2) %0, i32 1)
- ret <vscale x 4 x i8> %1
+ ret target("riscv.vector.tuple", <vscale x 4 x i8>, 2) %0
}
-
-define <vscale x 4 x i8> @test_vlseg2_mask_nxv4i8_triscv.vector.tuple_nxv4i8_2t(ptr %base, i64 %vl, <vscale x 4 x i1> %mask) {
+define target("riscv.vector.tuple", <vscale x 4 x i8>, 2) @test_vlseg2_mask_nxv4i8_triscv.vector.tuple_nxv4i8_2t(ptr %base, i64 %vl, <vscale x 4 x i1> %mask) {
; CHECK-LABEL: test_vlseg2_mask_nxv4i8_triscv.vector.tuple_nxv4i8_2t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma
-; CHECK-NEXT: vlseg2e8.v v7, (a0), v0.t
+; CHECK-NEXT: vlseg2e8.v v8, (a0), v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 2) @llvm.riscv.vlseg2.mask.triscv.vector.tuple_nxv4i8_2t.nxv4i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 2) undef, ptr %base, <vscale x 4 x i1> %mask, i64 %vl, i64 1, i64 3)
- %1 = call <vscale x 4 x i8> @llvm.riscv.tuple.extract.nxv4i8.triscv.vector.tuple_nxv4i8_2t(target("riscv.vector.tuple", <vscale x 4 x i8>, 2) %0, i32 1)
- ret <vscale x 4 x i8> %1
+ ret target("riscv.vector.tuple", <vscale x 4 x i8>, 2) %0
}
-
-declare target("riscv.vector.tuple", <vscale x 8 x i8>, 2) @llvm.riscv.vlseg2.triscv.vector.tuple_nxv8i8_2t(target("riscv.vector.tuple", <vscale x 8 x i8>, 2), ptr, i64, i64)
-declare target("riscv.vector.tuple", <vscale x 8 x i8>, 2) @llvm.riscv.vlseg2.mask.triscv.vector.tuple_nxv8i8_2t.nxv8i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 2), ptr, <vscale x 8 x i1>, i64, i64, i64)
-
-define <vscale x 8 x i8> @test_vlseg2_nxv8i8_triscv.vector.tuple_nxv8i8_2t(ptr %base, i64 %vl) {
+define target("riscv.vector.tuple", <vscale x 8 x i8>, 2) @test_vlseg2_nxv8i8_triscv.vector.tuple_nxv8i8_2t(ptr %base, i64 %vl) {
; CHECK-LABEL: test_vlseg2_nxv8i8_triscv.vector.tuple_nxv8i8_2t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma
-; CHECK-NEXT: vlseg2e8.v v7, (a0)
+; CHECK-NEXT: vlseg2e8.v v8, (a0)
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 2) @llvm.riscv.vlseg2.triscv.vector.tuple_nxv8i8_2t(target("riscv.vector.tuple", <vscale x 8 x i8>, 2) undef, ptr %base, i64 %vl, i64 3)
- %1 = call <vscale x 8 x i8> @llvm.riscv.tuple.extract.nxv8i8.triscv.vector.tuple_nxv8i8_2t(target("riscv.vector.tuple", <vscale x 8 x i8>, 2) %0, i32 1)
- ret <vscale x 8 x i8> %1
+ ret target("riscv.vector.tuple", <vscale x 8 x i8>, 2) %0
}
-
-define <vscale x 8 x i8> @test_vlseg2_mask_nxv8i8_triscv.vector.tuple_nxv8i8_2t(ptr %base, i64 %vl, <vscale x 8 x i1> %mask) {
+define target("riscv.vector.tuple", <vscale x 8 x i8>, 2) @test_vlseg2_mask_nxv8i8_triscv.vector.tuple_nxv8i8_2t(ptr %base, i64 %vl, <vscale x 8 x i1> %mask) {
; CHECK-LABEL: test_vlseg2_mask_nxv8i8_triscv.vector.tuple_nxv8i8_2t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma
-; CHECK-NEXT: vlseg2e8.v v7, (a0), v0.t
+; CHECK-NEXT: vlseg2e8.v v8, (a0), v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 2) @llvm.riscv.vlseg2.mask.triscv.vector.tuple_nxv8i8_2t.nxv8i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 2) undef, ptr %base, <vscale x 8 x i1> %mask, i64 %vl, i64 1, i64 3)
- %1 = call <vscale x 8 x i8> @llvm.riscv.tuple.extract.nxv8i8.triscv.vector.tuple_nxv8i8_2t(target("riscv.vector.tuple", <vscale x 8 x i8>, 2) %0, i32 1)
- ret <vscale x 8 x i8> %1
+ ret target("riscv.vector.tuple", <vscale x 8 x i8>, 2) %0
}
-
-declare target("riscv.vector.tuple", <vscale x 16 x i8>, 2) @llvm.riscv.vlseg2.triscv.vector.tuple_nxv16i8_2t(target("riscv.vector.tuple", <vscale x 16 x i8>, 2), ptr, i64, i64)
-declare target("riscv.vector.tuple", <vscale x 16 x i8>, 2) @llvm.riscv.vlseg2.mask.triscv.vector.tuple_nxv16i8_2t.nxv16i1(target("riscv.vector.tuple", <vscale x 16 x i8>, 2), ptr, <vscale x 16 x i1>, i64, i64, i64)
-
-define <vscale x 16 x i8> @test_vlseg2_nxv16i8_triscv.vector.tuple_nxv16i8_2t(ptr %base, i64 %vl) {
+define target("riscv.vector.tuple", <vscale x 16 x i8>, 2) @test_vlseg2_nxv16i8_triscv.vector.tuple_nxv16i8_2t(ptr %base, i64 %vl) {
; CHECK-LABEL: test_vlseg2_nxv16i8_triscv.vector.tuple_nxv16i8_2t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma
-; CHECK-NEXT: vlseg2e8.v v6, (a0)
+; CHECK-NEXT: vlseg2e8.v v8, (a0)
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 2) @llvm.riscv.vlseg2.triscv.vector.tuple_nxv16i8_2t(target("riscv.vector.tuple", <vscale x 16 x i8>, 2) undef, ptr %base, i64 %vl, i64 3)
- %1 = call <vscale x 16 x i8> @llvm.riscv.tuple.extract.nxv16i8.triscv.vector.tuple_nxv16i8_2t(target("riscv.vector.tuple", <vscale x 16 x i8>, 2) %0, i32 1)
- ret <vscale x 16 x i8> %1
+ ret target("riscv.vector.tuple", <vscale x 16 x i8>, 2) %0
}
-
-define <vscale x 16 x i8> @test_vlseg2_mask_nxv16i8_triscv.vector.tuple_nxv16i8_2t(ptr %base, i64 %vl, <vscale x 16 x i1> %mask) {
+define target("riscv.vector.tuple", <vscale x 16 x i8>, 2) @test_vlseg2_mask_nxv16i8_triscv.vector.tuple_nxv16i8_2t(ptr %base, i64 %vl, <vscale x 16 x i1> %mask) {
; CHECK-LABEL: test_vlseg2_mask_nxv16i8_triscv.vector.tuple_nxv16i8_2t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma
-; CHECK-NEXT: vlseg2e8.v v6, (a0), v0.t
+; CHECK-NEXT: vlseg2e8.v v8, (a0), v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 2) @llvm.riscv.vlseg2.mask.triscv.vector.tuple_nxv16i8_2t.nxv16i1(target("riscv.vector.tuple", <vscale x 16 x i8>, 2) undef, ptr %base, <vscale x 16 x i1> %mask, i64 %vl, i64 1, i64 3)
- %1 = call <vscale x 16 x i8> @llvm.riscv.tuple.extract.nxv16i8.triscv.vector.tuple_nxv16i8_2t(target("riscv.vector.tuple", <vscale x 16 x i8>, 2) %0, i32 1)
- ret <vscale x 16 x i8> %1
+ ret target("riscv.vector.tuple", <vscale x 16 x i8>, 2) %0
}
-
-declare target("riscv.vector.tuple", <vscale x 32 x i8>, 2) @llvm.riscv.vlseg2.triscv.vector.tuple_nxv32i8_2t(target("riscv.vector.tuple", <vscale x 32 x i8>, 2), ptr, i64, i64)
-declare target("riscv.vector.tuple", <vscale x 32 x i8>, 2) @llvm.riscv.vlseg2.mask.triscv.vector.tuple_nxv32i8_2t.nxv32i1(target("riscv.vector.tuple", <vscale x 32 x i8>, 2), ptr, <vscale x 32 x i1>, i64, i64, i64)
-
-define <vscale x 32 x i8> @test_vlseg2_nxv32i8_triscv.vector.tuple_nxv32i8_2t(ptr %base, i64 %vl) {
+define target("riscv.vector.tuple", <vscale x 32 x i8>, 2) @test_vlseg2_nxv32i8_triscv.vector.tuple_nxv32i8_2t(ptr %base, i64 %vl) {
; CHECK-LABEL: test_vlseg2_nxv32i8_triscv.vector.tuple_nxv32i8_2t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, ma
-; CHECK-NEXT: vlseg2e8.v v4, (a0)
+; CHECK-NEXT: vlseg2e8.v v8, (a0)
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 32 x i8>, 2) @llvm.riscv.vlseg2.triscv.vector.tuple_nxv32i8_2t(target("riscv.vector.tuple", <vscale x 32 x i8>, 2) undef, ptr %base, i64 %vl, i64 3)
- %1 = call <vscale x 32 x i8> @llvm.riscv.tuple.extract.nxv32i8.triscv.vector.tuple_nxv32i8_2t(target("riscv.vector.tuple", <vscale x 32 x i8>, 2) %0, i32 1)
- ret <vscale x 32 x i8> %1
+ ret target("riscv.vector.tuple", <vscale x 32 x i8>, 2) %0
}
-
-define <vscale x 32 x i8> @test_vlseg2_mask_nxv32i8_triscv.vector.tuple_nxv32i8_2t(ptr %base, i64 %vl, <vscale x 32 x i1> %mask) {
+define target("riscv.vector.tuple", <vscale x 32 x i8>, 2) @test_vlseg2_mask_nxv32i8_triscv.vector.tuple_nxv32i8_2t(ptr %base, i64 %vl, <vscale x 32 x i1> %mask) {
; CHECK-LABEL: test_vlseg2_mask_nxv32i8_triscv.vector.tuple_nxv32i8_2t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, ma
-; CHECK-NEXT: vlseg2e8.v v4, (a0), v0.t
+; CHECK-NEXT: vlseg2e8.v v8, (a0), v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 32 x i8>, 2) @llvm.riscv.vlseg2.mask.triscv.vector.tuple_nxv32i8_2t.nxv32i1(target("riscv.vector.tuple", <vscale x 32 x i8>, 2) undef, ptr %base, <vscale x 32 x i1> %mask, i64 %vl, i64 1, i64 3)
- %1 = call <vscale x 32 x i8> @llvm.riscv.tuple.extract.nxv32i8.triscv.vector.tuple_nxv32i8_2t(target("riscv.vector.tuple", <vscale x 32 x i8>, 2) %0, i32 1)
- ret <vscale x 32 x i8> %1
+ ret target("riscv.vector.tuple", <vscale x 32 x i8>, 2) %0
}
-
-declare target("riscv.vector.tuple", <vscale x 1 x i8>, 3) @llvm.riscv.vlseg3.triscv.vector.tuple_nxv1i8_3t(target("riscv.vector.tuple", <vscale x 1 x i8>, 3), ptr, i64, i64)
-declare target("riscv.vector.tuple", <vscale x 1 x i8>, 3) @llvm.riscv.vlseg3.mask.triscv.vector.tuple_nxv1i8_3t.nxv1i1(target("riscv.vector.tuple", <vscale x 1 x i8>, 3), ptr, <vscale x 1 x i1>, i64, i64, i64)
-
-define <vscale x 1 x i8> @test_vlseg3_nxv1i8_triscv.vector.tuple_nxv1i8_3t(ptr %base, i64 %vl) {
+define target("riscv.vector.tuple", <vscale x 1 x i8>, 3) @test_vlseg3_nxv1i8_triscv.vector.tuple_nxv1i8_3t(ptr %base, i64 %vl) {
; CHECK-LABEL: test_vlseg3_nxv1i8_triscv.vector.tuple_nxv1i8_3t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma
-; CHECK-NEXT: vlseg3e8.v v7, (a0)
+; CHECK-NEXT: vlseg3e8.v v8, (a0)
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 1 x i8>, 3) @llvm.riscv.vlseg3.triscv.vector.tuple_nxv1i8_3t(target("riscv.vector.tuple", <vscale x 1 x i8>, 3) undef, ptr %base, i64 %vl, i64 3)
- %1 = call <vscale x 1 x i8> @llvm.riscv.tuple.extract.nxv1i8.triscv.vector.tuple_nxv1i8_3t(target("riscv.vector.tuple", <vscale x 1 x i8>, 3) %0, i32 1)
- ret <vscale x 1 x i8> %1
+ ret target("riscv.vector.tuple", <vscale x 1 x i8>, 3) %0
}
-
-define <vscale x 1 x i8> @test_vlseg3_mask_nxv1i8_triscv.vector.tuple_nxv1i8_3t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) {
+define target("riscv.vector.tuple", <vscale x 1 x i8>, 3) @test_vlseg3_mask_nxv1i8_triscv.vector.tuple_nxv1i8_3t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) {
; CHECK-LABEL: test_vlseg3_mask_nxv1i8_triscv.vector.tuple_nxv1i8_3t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma
-; CHECK-NEXT: vlseg3e8.v v7, (a0), v0.t
+; CHECK-NEXT: vlseg3e8.v v8, (a0), v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 1 x i8>, 3) @llvm.riscv.vlseg3.mask.triscv.vector.tuple_nxv1i8_3t.nxv1i1(target("riscv.vector.tuple", <vscale x 1 x i8>, 3) undef, ptr %base, <vscale x 1 x i1> %mask, i64 %vl, i64 1, i64 3)
- %1 = call <vscale x 1 x i8> @llvm.riscv.tuple.extract.nxv1i8.triscv.vector.tuple_nxv1i8_3t(target("riscv.vector.tuple", <vscale x 1 x i8>, 3) %0, i32 1)
- ret <vscale x 1 x i8> %1
+ ret target("riscv.vector.tuple", <vscale x 1 x i8>, 3) %0
}
-
-define <vscale x 1 x i8> @test_vlseg3_allonesmask_nxv1i8_triscv.vector.tuple_nxv1i8_3t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) {
+define target("riscv.vector.tuple", <vscale x 1 x i8>, 3) @test_vlseg3_allonesmask_nxv1i8_triscv.vector.tuple_nxv1i8_3t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) {
; CHECK-LABEL: test_vlseg3_allonesmask_nxv1i8_triscv.vector.tuple_nxv1i8_3t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma
-; CHECK-NEXT: vlseg3e8.v v7, (a0)
+; CHECK-NEXT: vlseg3e8.v v8, (a0)
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 1 x i8>, 3) @llvm.riscv.vlseg3.mask.triscv.vector.tuple_nxv1i8_3t.nxv1i1(target("riscv.vector.tuple", <vscale x 1 x i8>, 3) undef, ptr %base, <vscale x 1 x i1> splat (i1 true), i64 %vl, i64 1, i64 3)
- %1 = call <vscale x 1 x i8> @llvm.riscv.tuple.extract.nxv1i8.triscv.vector.tuple_nxv1i8_3t(target("riscv.vector.tuple", <vscale x 1 x i8>, 3) %0, i32 1)
- ret <vscale x 1 x i8> %1
+ ret target("riscv.vector.tuple", <vscale x 1 x i8>, 3) %0
}
-
-declare target("riscv.vector.tuple", <vscale x 2 x i8>, 3) @llvm.riscv.vlseg3.triscv.vector.tuple_nxv2i8_3t(target("riscv.vector.tuple", <vscale x 2 x i8>, 3), ptr, i64, i64)
-declare target("riscv.vector.tuple", <vscale x 2 x i8>, 3) @llvm.riscv.vlseg3.mask.triscv.vector.tuple_nxv2i8_3t.nxv2i1(target("riscv.vector.tuple", <vscale x 2 x i8>, 3), ptr, <vscale x 2 x i1>, i64, i64, i64)
-
-define <vscale x 2 x i8> @test_vlseg3_nxv2i8_triscv.vector.tuple_nxv2i8_3t(ptr %base, i64 %vl) {
+define target("riscv.vector.tuple", <vscale x 2 x i8>, 3) @test_vlseg3_nxv2i8_triscv.vector.tuple_nxv2i8_3t(ptr %base, i64 %vl) {
; CHECK-LABEL: test_vlseg3_nxv2i8_triscv.vector.tuple_nxv2i8_3t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma
-; CHECK-NEXT: vlseg3e8.v v7, (a0)
+; CHECK-NEXT: vlseg3e8.v v8, (a0)
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 2 x i8>, 3) @llvm.riscv.vlseg3.triscv.vector.tuple_nxv2i8_3t(target("riscv.vector.tuple", <vscale x 2 x i8>, 3) undef, ptr %base, i64 %vl, i64 3)
- %1 = call <vscale x 2 x i8> @llvm.riscv.tuple.extract.nxv2i8.triscv.vector.tuple_nxv2i8_3t(target("riscv.vector.tuple", <vscale x 2 x i8>, 3) %0, i32 1)
- ret <vscale x 2 x i8> %1
+ ret target("riscv.vector.tuple", <vscale x 2 x i8>, 3) %0
}
-
-define <vscale x 2 x i8> @test_vlseg3_mask_nxv2i8_triscv.vector.tuple_nxv2i8_3t(ptr %base, i64 %vl, <vscale x 2 x i1> %mask) {
+define target("riscv.vector.tuple", <vscale x 2 x i8>, 3) @test_vlseg3_mask_nxv2i8_triscv.vector.tuple_nxv2i8_3t(ptr %base, i64 %vl, <vscale x 2 x i1> %mask) {
; CHECK-LABEL: test_vlseg3_mask_nxv2i8_triscv.vector.tuple_nxv2i8_3t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma
-; CHECK-NEXT: vlseg3e8.v v7, (a0), v0.t
+; CHECK-NEXT: vlseg3e8.v v8, (a0), v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 2 x i8>, 3) @llvm.riscv.vlseg3.mask.triscv.vector.tuple_nxv2i8_3t.nxv2i1(target("riscv.vector.tuple", <vscale x 2 x i8>, 3) undef, ptr %base, <vscale x 2 x i1> %mask, i64 %vl, i64 1, i64 3)
- %1 = call <vscale x 2 x i8> @llvm.riscv.tuple.extract.nxv2i8.triscv.vector.tuple_nxv2i8_3t(target("riscv.vector.tuple", <vscale x 2 x i8>, 3) %0, i32 1)
- ret <vscale x 2 x i8> %1
+ ret target("riscv.vector.tuple", <vscale x 2 x i8>, 3) %0
}
-
-declare target("riscv.vector.tuple", <vscale x 4 x i8>, 3) @llvm.riscv.vlseg3.triscv.vector.tuple_nxv4i8_3t(target("riscv.vector.tuple", <vscale x 4 x i8>, 3), ptr, i64, i64)
-declare target("riscv.vector.tuple", <vscale x 4 x i8>, 3) @llvm.riscv.vlseg3.mask.triscv.vector.tuple_nxv4i8_3t.nxv4i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 3), ptr, <vscale x 4 x i1>, i64, i64, i64)
-
-define <vscale x 4 x i8> @test_vlseg3_nxv4i8_triscv.vector.tuple_nxv4i8_3t(ptr %base, i64 %vl) {
+define target("riscv.vector.tuple", <vscale x 4 x i8>, 3) @test_vlseg3_nxv4i8_triscv.vector.tuple_nxv4i8_3t(ptr %base, i64 %vl) {
; CHECK-LABEL: test_vlseg3_nxv4i8_triscv.vector.tuple_nxv4i8_3t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma
-; CHECK-NEXT: vlseg3e8.v v7, (a0)
+; CHECK-NEXT: vlseg3e8.v v8, (a0)
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 3) @llvm.riscv.vlseg3.triscv.vector.tuple_nxv4i8_3t(target("riscv.vector.tuple", <vscale x 4 x i8>, 3) undef, ptr %base, i64 %vl, i64 3)
- %1 = call <vscale x 4 x i8> @llvm.riscv.tuple.extract.nxv4i8.triscv.vector.tuple_nxv4i8_3t(target("riscv.vector.tuple", <vscale x 4 x i8>, 3) %0, i32 1)
- ret <vscale x 4 x i8> %1
+ ret target("riscv.vector.tuple", <vscale x 4 x i8>, 3) %0
}
-
-define <vscale x 4 x i8> @test_vlseg3_mask_nxv4i8_triscv.vector.tuple_nxv4i8_3t(ptr %base, i64 %vl, <vscale x 4 x i1> %mask) {
+define target("riscv.vector.tuple", <vscale x 4 x i8>, 3) @test_vlseg3_mask_nxv4i8_triscv.vector.tuple_nxv4i8_3t(ptr %base, i64 %vl, <vscale x 4 x i1> %mask) {
; CHECK-LABEL: test_vlseg3_mask_nxv4i8_triscv.vector.tuple_nxv4i8_3t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma
-; CHECK-NEXT: vlseg3e8.v v7, (a0), v0.t
+; CHECK-NEXT: vlseg3e8.v v8, (a0), v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 3) @llvm.riscv.vlseg3.mask.triscv.vector.tuple_nxv4i8_3t.nxv4i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 3) undef, ptr %base, <vscale x 4 x i1> %mask, i64 %vl, i64 1, i64 3)
- %1 = call <vscale x 4 x i8> @llvm.riscv.tuple.extract.nxv4i8.triscv.vector.tuple_nxv4i8_3t(target("riscv.vector.tuple", <vscale x 4 x i8>, 3) %0, i32 1)
- ret <vscale x 4 x i8> %1
+ ret target("riscv.vector.tuple", <vscale x 4 x i8>, 3) %0
}
-
-declare target("riscv.vector.tuple", <vscale x 8 x i8>, 3) @llvm.riscv.vlseg3.triscv.vector.tuple_nxv8i8_3t(target("riscv.vector.tuple", <vscale x 8 x i8>, 3), ptr, i64, i64)
-declare target("riscv.vector.tuple", <vscale x 8 x i8>, 3) @llvm.riscv.vlseg3.mask.triscv.vector.tuple_nxv8i8_3t.nxv8i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 3), ptr, <vscale x 8 x i1>, i64, i64, i64)
-
-define <vscale x 8 x i8> @test_vlseg3_nxv8i8_triscv.vector.tuple_nxv8i8_3t(ptr %base, i64 %vl) {
+define target("riscv.vector.tuple", <vscale x 8 x i8>, 3) @test_vlseg3_nxv8i8_triscv.vector.tuple_nxv8i8_3t(ptr %base, i64 %vl) {
; CHECK-LABEL: test_vlseg3_nxv8i8_triscv.vector.tuple_nxv8i8_3t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma
-; CHECK-NEXT: vlseg3e8.v v7, (a0)
+; CHECK-NEXT: vlseg3e8.v v8, (a0)
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 3) @llvm.riscv.vlseg3.triscv.vector.tuple_nxv8i8_3t(target("riscv.vector.tuple", <vscale x 8 x i8>, 3) undef, ptr %base, i64 %vl, i64 3)
- %1 = call <vscale x 8 x i8> @llvm.riscv.tuple.extract.nxv8i8.triscv.vector.tuple_nxv8i8_3t(target("riscv.vector.tuple", <vscale x 8 x i8>, 3) %0, i32 1)
- ret <vscale x 8 x i8> %1
+ ret target("riscv.vector.tuple", <vscale x 8 x i8>, 3) %0
}
-
-define <vscale x 8 x i8> @test_vlseg3_mask_nxv8i8_triscv.vector.tuple_nxv8i8_3t(ptr %base, i64 %vl, <vscale x 8 x i1> %mask) {
+define target("riscv.vector.tuple", <vscale x 8 x i8>, 3) @test_vlseg3_mask_nxv8i8_triscv.vector.tuple_nxv8i8_3t(ptr %base, i64 %vl, <vscale x 8 x i1> %mask) {
; CHECK-LABEL: test_vlseg3_mask_nxv8i8_triscv.vector.tuple_nxv8i8_3t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma
-; CHECK-NEXT: vlseg3e8.v v7, (a0), v0.t
+; CHECK-NEXT: vlseg3e8.v v8, (a0), v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 3) @llvm.riscv.vlseg3.mask.triscv.vector.tuple_nxv8i8_3t.nxv8i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 3) undef, ptr %base, <vscale x 8 x i1> %mask, i64 %vl, i64 1, i64 3)
- %1 = call <vscale x 8 x i8> @llvm.riscv.tuple.extract.nxv8i8.triscv.vector.tuple_nxv8i8_3t(target("riscv.vector.tuple", <vscale x 8 x i8>, 3) %0, i32 1)
- ret <vscale x 8 x i8> %1
+ ret target("riscv.vector.tuple", <vscale x 8 x i8>, 3) %0
}
-
-declare target("riscv.vector.tuple", <vscale x 16 x i8>, 3) @llvm.riscv.vlseg3.triscv.vector.tuple_nxv16i8_3t(target("riscv.vector.tuple", <vscale x 16 x i8>, 3), ptr, i64, i64)
-declare target("riscv.vector.tuple", <vscale x 16 x i8>, 3) @llvm.riscv.vlseg3.mask.triscv.vector.tuple_nxv16i8_3t.nxv16i1(target("riscv.vector.tuple", <vscale x 16 x i8>, 3), ptr, <vscale x 16 x i1>, i64, i64, i64)
-
-define <vscale x 16 x i8> @test_vlseg3_nxv16i8_triscv.vector.tuple_nxv16i8_3t(ptr %base, i64 %vl) {
+define target("riscv.vector.tuple", <vscale x 16 x i8>, 3) @test_vlseg3_nxv16i8_triscv.vector.tuple_nxv16i8_3t(ptr %base, i64 %vl) {
; CHECK-LABEL: test_vlseg3_nxv16i8_triscv.vector.tuple_nxv16i8_3t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma
-; CHECK-NEXT: vlseg3e8.v v6, (a0)
+; CHECK-NEXT: vlseg3e8.v v8, (a0)
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 3) @llvm.riscv.vlseg3.triscv.vector.tuple_nxv16i8_3t(target("riscv.vector.tuple", <vscale x 16 x i8>, 3) undef, ptr %base, i64 %vl, i64 3)
- %1 = call <vscale x 16 x i8> @llvm.riscv.tuple.extract.nxv16i8.triscv.vector.tuple_nxv16i8_3t(target("riscv.vector.tuple", <vscale x 16 x i8>, 3) %0, i32 1)
- ret <vscale x 16 x i8> %1
+ ret target("riscv.vector.tuple", <vscale x 16 x i8>, 3) %0
}
-
-define <vscale x 16 x i8> @test_vlseg3_mask_nxv16i8_triscv.vector.tuple_nxv16i8_3t(ptr %base, i64 %vl, <vscale x 16 x i1> %mask) {
+define target("riscv.vector.tuple", <vscale x 16 x i8>, 3) @test_vlseg3_mask_nxv16i8_triscv.vector.tuple_nxv16i8_3t(ptr %base, i64 %vl, <vscale x 16 x i1> %mask) {
; CHECK-LABEL: test_vlseg3_mask_nxv16i8_triscv.vector.tuple_nxv16i8_3t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma
-; CHECK-NEXT: vlseg3e8.v v6, (a0), v0.t
+; CHECK-NEXT: vlseg3e8.v v8, (a0), v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 3) @llvm.riscv.vlseg3.mask.triscv.vector.tuple_nxv16i8_3t.nxv16i1(target("riscv.vector.tuple", <vscale x 16 x i8>, 3) undef, ptr %base, <vscale x 16 x i1> %mask, i64 %vl, i64 1, i64 3)
- %1 = call <vscale x 16 x i8> @llvm.riscv.tuple.extract.nxv16i8.triscv.vector.tuple_nxv16i8_3t(target("riscv.vector.tuple", <vscale x 16 x i8>, 3) %0, i32 1)
- ret <vscale x 16 x i8> %1
+ ret target("riscv.vector.tuple", <vscale x 16 x i8>, 3) %0
}
-
-declare target("riscv.vector.tuple", <vscale x 1 x i8>, 4) @llvm.riscv.vlseg4.triscv.vector.tuple_nxv1i8_4t(target("riscv.vector.tuple", <vscale x 1 x i8>, 4), ptr, i64, i64)
-declare target("riscv.vector.tuple", <vscale x 1 x i8>, 4) @llvm.riscv.vlseg4.mask.triscv.vector.tuple_nxv1i8_4t.nxv1i1(target("riscv.vector.tuple", <vscale x 1 x i8>, 4), ptr, <vscale x 1 x i1>, i64, i64, i64)
-
-define <vscale x 1 x i8> @test_vlseg4_nxv1i8_triscv.vector.tuple_nxv1i8_4t(ptr %base, i64 %vl) {
+define target("riscv.vector.tuple", <vscale x 1 x i8>, 4) @test_vlseg4_nxv1i8_triscv.vector.tuple_nxv1i8_4t(ptr %base, i64 %vl) {
; CHECK-LABEL: test_vlseg4_nxv1i8_triscv.vector.tuple_nxv1i8_4t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma
-; CHECK-NEXT: vlseg4e8.v v7, (a0)
+; CHECK-NEXT: vlseg4e8.v v8, (a0)
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 1 x i8>, 4) @llvm.riscv.vlseg4.triscv.vector.tuple_nxv1i8_4t(target("riscv.vector.tuple", <vscale x 1 x i8>, 4) undef, ptr %base, i64 %vl, i64 3)
- %1 = call <vscale x 1 x i8> @llvm.riscv.tuple.extract.nxv1i8.triscv.vector.tuple_nxv1i8_4t(target("riscv.vector.tuple", <vscale x 1 x i8>, 4) %0, i32 1)
- ret <vscale x 1 x i8> %1
+ ret target("riscv.vector.tuple", <vscale x 1 x i8>, 4) %0
}
-
-define <vscale x 1 x i8> @test_vlseg4_mask_nxv1i8_triscv.vector.tuple_nxv1i8_4t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) {
+define target("riscv.vector.tuple", <vscale x 1 x i8>, 4) @test_vlseg4_mask_nxv1i8_triscv.vector.tuple_nxv1i8_4t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) {
; CHECK-LABEL: test_vlseg4_mask_nxv1i8_triscv.vector.tuple_nxv1i8_4t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma
-; CHECK-NEXT: vlseg4e8.v v7, (a0), v0.t
+; CHECK-NEXT: vlseg4e8.v v8, (a0), v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 1 x i8>, 4) @llvm.riscv.vlseg4.mask.triscv.vector.tuple_nxv1i8_4t.nxv1i1(target("riscv.vector.tuple", <vscale x 1 x i8>, 4) undef, ptr %base, <vscale x 1 x i1> %mask, i64 %vl, i64 1, i64 3)
- %1 = call <vscale x 1 x i8> @llvm.riscv.tuple.extract.nxv1i8.triscv.vector.tuple_nxv1i8_4t(target("riscv.vector.tuple", <vscale x 1 x i8>, 4) %0, i32 1)
- ret <vscale x 1 x i8> %1
+ ret target("riscv.vector.tuple", <vscale x 1 x i8>, 4) %0
}
-
-define <vscale x 1 x i8> @test_vlseg4_allonesmask_nxv1i8_triscv.vector.tuple_nxv1i8_4t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) {
+define target("riscv.vector.tuple", <vscale x 1 x i8>, 4) @test_vlseg4_allonesmask_nxv1i8_triscv.vector.tuple_nxv1i8_4t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) {
; CHECK-LABEL: test_vlseg4_allonesmask_nxv1i8_triscv.vector.tuple_nxv1i8_4t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma
-; CHECK-NEXT: vlseg4e8.v v7, (a0)
+; CHECK-NEXT: vlseg4e8.v v8, (a0)
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 1 x i8>, 4) @llvm.riscv.vlseg4.mask.triscv.vector.tuple_nxv1i8_4t.nxv1i1(target("riscv.vector.tuple", <vscale x 1 x i8>, 4) undef, ptr %base, <vscale x 1 x i1> splat (i1 true), i64 %vl, i64 1, i64 3)
- %1 = call <vscale x 1 x i8> @llvm.riscv.tuple.extract.nxv1i8.triscv.vector.tuple_nxv1i8_4t(target("riscv.vector.tuple", <vscale x 1 x i8>, 4) %0, i32 1)
- ret <vscale x 1 x i8> %1
+ ret target("riscv.vector.tuple", <vscale x 1 x i8>, 4) %0
}
-
-declare target("riscv.vector.tuple", <vscale x 2 x i8>, 4) @llvm.riscv.vlseg4.triscv.vector.tuple_nxv2i8_4t(target("riscv.vector.tuple", <vscale x 2 x i8>, 4), ptr, i64, i64)
-declare target("riscv.vector.tuple", <vscale x 2 x i8>, 4) @llvm.riscv.vlseg4.mask.triscv.vector.tuple_nxv2i8_4t.nxv2i1(target("riscv.vector.tuple", <vscale x 2 x i8>, 4), ptr, <vscale x 2 x i1>, i64, i64, i64)
-
-define <vscale x 2 x i8> @test_vlseg4_nxv2i8_triscv.vector.tuple_nxv2i8_4t(ptr %base, i64 %vl) {
+define target("riscv.vector.tuple", <vscale x 2 x i8>, 4) @test_vlseg4_nxv2i8_triscv.vector.tuple_nxv2i8_4t(ptr %base, i64 %vl) {
; CHECK-LABEL: test_vlseg4_nxv2i8_triscv.vector.tuple_nxv2i8_4t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma
-; CHECK-NEXT: vlseg4e8.v v7, (a0)
+; CHECK-NEXT: vlseg4e8.v v8, (a0)
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 2 x i8>, 4) @llvm.riscv.vlseg4.triscv.vector.tuple_nxv2i8_4t(target("riscv.vector.tuple", <vscale x 2 x i8>, 4) undef, ptr %base, i64 %vl, i64 3)
- %1 = call <vscale x 2 x i8> @llvm.riscv.tuple.extract.nxv2i8.triscv.vector.tuple_nxv2i8_4t(target("riscv.vector.tuple", <vscale x 2 x i8>, 4) %0, i32 1)
- ret <vscale x 2 x i8> %1
+ ret target("riscv.vector.tuple", <vscale x 2 x i8>, 4) %0
}
-
-define <vscale x 2 x i8> @test_vlseg4_mask_nxv2i8_triscv.vector.tuple_nxv2i8_4t(ptr %base, i64 %vl, <vscale x 2 x i1> %mask) {
+define target("riscv.vector.tuple", <vscale x 2 x i8>, 4) @test_vlseg4_mask_nxv2i8_triscv.vector.tuple_nxv2i8_4t(ptr %base, i64 %vl, <vscale x 2 x i1> %mask) {
; CHECK-LABEL: test_vlseg4_mask_nxv2i8_triscv.vector.tuple_nxv2i8_4t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma
-; CHECK-NEXT: vlseg4e8.v v7, (a0), v0.t
+; CHECK-NEXT: vlseg4e8.v v8, (a0), v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 2 x i8>, 4) @llvm.riscv.vlseg4.mask.triscv.vector.tuple_nxv2i8_4t.nxv2i1(target("riscv.vector.tuple", <vscale x 2 x i8>, 4) undef, ptr %base, <vscale x 2 x i1> %mask, i64 %vl, i64 1, i64 3)
- %1 = call <vscale x 2 x i8> @llvm.riscv.tuple.extract.nxv2i8.triscv.vector.tuple_nxv2i8_4t(target("riscv.vector.tuple", <vscale x 2 x i8>, 4) %0, i32 1)
- ret <vscale x 2 x i8> %1
+ ret target("riscv.vector.tuple", <vscale x 2 x i8>, 4) %0
}
-
-declare target("riscv.vector.tuple", <vscale x 4 x i8>, 4) @llvm.riscv.vlseg4.triscv.vector.tuple_nxv4i8_4t(target("riscv.vector.tuple", <vscale x 4 x i8>, 4), ptr, i64, i64)
-declare target("riscv.vector.tuple", <vscale x 4 x i8>, 4) @llvm.riscv.vlseg4.mask.triscv.vector.tuple_nxv4i8_4t.nxv4i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 4), ptr, <vscale x 4 x i1>, i64, i64, i64)
-
-define <vscale x 4 x i8> @test_vlseg4_nxv4i8_triscv.vector.tuple_nxv4i8_4t(ptr %base, i64 %vl) {
+define target("riscv.vector.tuple", <vscale x 4 x i8>, 4) @test_vlseg4_nxv4i8_triscv.vector.tuple_nxv4i8_4t(ptr %base, i64 %vl) {
; CHECK-LABEL: test_vlseg4_nxv4i8_triscv.vector.tuple_nxv4i8_4t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma
-; CHECK-NEXT: vlseg4e8.v v7, (a0)
+; CHECK-NEXT: vlseg4e8.v v8, (a0)
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 4) @llvm.riscv.vlseg4.triscv.vector.tuple_nxv4i8_4t(target("riscv.vector.tuple", <vscale x 4 x i8>, 4) undef, ptr %base, i64 %vl, i64 3)
- %1 = call <vscale x 4 x i8> @llvm.riscv.tuple.extract.nxv4i8.triscv.vector.tuple_nxv4i8_4t(target("riscv.vector.tuple", <vscale x 4 x i8>, 4) %0, i32 1)
- ret <vscale x 4 x i8> %1
+ ret target("riscv.vector.tuple", <vscale x 4 x i8>, 4) %0
}
-
-define <vscale x 4 x i8> @test_vlseg4_mask_nxv4i8_triscv.vector.tuple_nxv4i8_4t(ptr %base, i64 %vl, <vscale x 4 x i1> %mask) {
+define target("riscv.vector.tuple", <vscale x 4 x i8>, 4) @test_vlseg4_mask_nxv4i8_triscv.vector.tuple_nxv4i8_4t(ptr %base, i64 %vl, <vscale x 4 x i1> %mask) {
; CHECK-LABEL: test_vlseg4_mask_nxv4i8_triscv.vector.tuple_nxv4i8_4t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma
-; CHECK-NEXT: vlseg4e8.v v7, (a0), v0.t
+; CHECK-NEXT: vlseg4e8.v v8, (a0), v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 4) @llvm.riscv.vlseg4.mask.triscv.vector.tuple_nxv4i8_4t.nxv4i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 4) undef, ptr %base, <vscale x 4 x i1> %mask, i64 %vl, i64 1, i64 3)
- %1 = call <vscale x 4 x i8> @llvm.riscv.tuple.extract.nxv4i8.triscv.vector.tuple_nxv4i8_4t(target("riscv.vector.tuple", <vscale x 4 x i8>, 4) %0, i32 1)
- ret <vscale x 4 x i8> %1
+ ret target("riscv.vector.tuple", <vscale x 4 x i8>, 4) %0
}
-
-declare target("riscv.vector.tuple", <vscale x 8 x i8>, 4) @llvm.riscv.vlseg4.triscv.vector.tuple_nxv8i8_4t(target("riscv.vector.tuple", <vscale x 8 x i8>, 4), ptr, i64, i64)
-declare target("riscv.vector.tuple", <vscale x 8 x i8>, 4) @llvm.riscv.vlseg4.mask.triscv.vector.tuple_nxv8i8_4t.nxv8i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 4), ptr, <vscale x 8 x i1>, i64, i64, i64)
-
-define <vscale x 8 x i8> @test_vlseg4_nxv8i8_triscv.vector.tuple_nxv8i8_4t(ptr %base, i64 %vl) {
+define target("riscv.vector.tuple", <vscale x 8 x i8>, 4) @test_vlseg4_nxv8i8_triscv.vector.tuple_nxv8i8_4t(ptr %base, i64 %vl) {
; CHECK-LABEL: test_vlseg4_nxv8i8_triscv.vector.tuple_nxv8i8_4t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma
-; CHECK-NEXT: vlseg4e8.v v7, (a0)
+; CHECK-NEXT: vlseg4e8.v v8, (a0)
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 4) @llvm.riscv.vlseg4.triscv.vector.tuple_nxv8i8_4t(target("riscv.vector.tuple", <vscale x 8 x i8>, 4) undef, ptr %base, i64 %vl, i64 3)
- %1 = call <vscale x 8 x i8> @llvm.riscv.tuple.extract.nxv8i8.triscv.vector.tuple_nxv8i8_4t(target("riscv.vector.tuple", <vscale x 8 x i8>, 4) %0, i32 1)
- ret <vscale x 8 x i8> %1
+ ret target("riscv.vector.tuple", <vscale x 8 x i8>, 4) %0
}
-
-define <vscale x 8 x i8> @test_vlseg4_mask_nxv8i8_triscv.vector.tuple_nxv8i8_4t(ptr %base, i64 %vl, <vscale x 8 x i1> %mask) {
+define target("riscv.vector.tuple", <vscale x 8 x i8>, 4) @test_vlseg4_mask_nxv8i8_triscv.vector.tuple_nxv8i8_4t(ptr %base, i64 %vl, <vscale x 8 x i1> %mask) {
; CHECK-LABEL: test_vlseg4_mask_nxv8i8_triscv.vector.tuple_nxv8i8_4t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma
-; CHECK-NEXT: vlseg4e8.v v7, (a0), v0.t
+; CHECK-NEXT: vlseg4e8.v v8, (a0), v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 4) @llvm.riscv.vlseg4.mask.triscv.vector.tuple_nxv8i8_4t.nxv8i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 4) undef, ptr %base, <vscale x 8 x i1> %mask, i64 %vl, i64 1, i64 3)
- %1 = call <vscale x 8 x i8> @llvm.riscv.tuple.extract.nxv8i8.triscv.vector.tuple_nxv8i8_4t(target("riscv.vector.tuple", <vscale x 8 x i8>, 4) %0, i32 1)
- ret <vscale x 8 x i8> %1
+ ret target("riscv.vector.tuple", <vscale x 8 x i8>, 4) %0
}
-
-declare target("riscv.vector.tuple", <vscale x 16 x i8>, 4) @llvm.riscv.vlseg4.triscv.vector.tuple_nxv16i8_4t(target("riscv.vector.tuple", <vscale x 16 x i8>, 4), ptr, i64, i64)
-declare target("riscv.vector.tuple", <vscale x 16 x i8>, 4) @llvm.riscv.vlseg4.mask.triscv.vector.tuple_nxv16i8_4t.nxv16i1(target("riscv.vector.tuple", <vscale x 16 x i8>, 4), ptr, <vscale x 16 x i1>, i64, i64, i64)
-
-define <vscale x 16 x i8> @test_vlseg4_nxv16i8_triscv.vector.tuple_nxv16i8_4t(ptr %base, i64 %vl) {
+define target("riscv.vector.tuple", <vscale x 16 x i8>, 4) @test_vlseg4_nxv16i8_triscv.vector.tuple_nxv16i8_4t(ptr %base, i64 %vl) {
; CHECK-LABEL: test_vlseg4_nxv16i8_triscv.vector.tuple_nxv16i8_4t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma
-; CHECK-NEXT: vlseg4e8.v v6, (a0)
+; CHECK-NEXT: vlseg4e8.v v8, (a0)
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 4) @llvm.riscv.vlseg4.triscv.vector.tuple_nxv16i8_4t(target("riscv.vector.tuple", <vscale x 16 x i8>, 4) undef, ptr %base, i64 %vl, i64 3)
- %1 = call <vscale x 16 x i8> @llvm.riscv.tuple.extract.nxv16i8.triscv.vector.tuple_nxv16i8_4t(target("riscv.vector.tuple", <vscale x 16 x i8>, 4) %0, i32 1)
- ret <vscale x 16 x i8> %1
+ ret target("riscv.vector.tuple", <vscale x 16 x i8>, 4) %0
}
-
-define <vscale x 16 x i8> @test_vlseg4_mask_nxv16i8_triscv.vector.tuple_nxv16i8_4t(ptr %base, i64 %vl, <vscale x 16 x i1> %mask) {
+define target("riscv.vector.tuple", <vscale x 16 x i8>, 4) @test_vlseg4_mask_nxv16i8_triscv.vector.tuple_nxv16i8_4t(ptr %base, i64 %vl, <vscale x 16 x i1> %mask) {
; CHECK-LABEL: test_vlseg4_mask_nxv16i8_triscv.vector.tuple_nxv16i8_4t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma
-; CHECK-NEXT: vlseg4e8.v v6, (a0), v0.t
+; CHECK-NEXT: vlseg4e8.v v8, (a0), v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 4) @llvm.riscv.vlseg4.mask.triscv.vector.tuple_nxv16i8_4t.nxv16i1(target("riscv.vector.tuple", <vscale x 16 x i8>, 4) undef, ptr %base, <vscale x 16 x i1> %mask, i64 %vl, i64 1, i64 3)
- %1 = call <vscale x 16 x i8> @llvm.riscv.tuple.extract.nxv16i8.triscv.vector.tuple_nxv16i8_4t(target("riscv.vector.tuple", <vscale x 16 x i8>, 4) %0, i32 1)
- ret <vscale x 16 x i8> %1
+ ret target("riscv.vector.tuple", <vscale x 16 x i8>, 4) %0
}
-
-declare target("riscv.vector.tuple", <vscale x 1 x i8>, 5) @llvm.riscv.vlseg5.triscv.vector.tuple_nxv1i8_5t(target("riscv.vector.tuple", <vscale x 1 x i8>, 5), ptr, i64, i64)
-declare target("riscv.vector.tuple", <vscale x 1 x i8>, 5) @llvm.riscv.vlseg5.mask.triscv.vector.tuple_nxv1i8_5t.nxv1i1(target("riscv.vector.tuple", <vscale x 1 x i8>, 5), ptr, <vscale x 1 x i1>, i64, i64, i64)
-
-define <vscale x 1 x i8> @test_vlseg5_nxv1i8_triscv.vector.tuple_nxv1i8_5t(ptr %base, i64 %vl) {
+define target("riscv.vector.tuple", <vscale x 1 x i8>, 5) @test_vlseg5_nxv1i8_triscv.vector.tuple_nxv1i8_5t(ptr %base, i64 %vl) {
; CHECK-LABEL: test_vlseg5_nxv1i8_triscv.vector.tuple_nxv1i8_5t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma
-; CHECK-NEXT: vlseg5e8.v v7, (a0)
+; CHECK-NEXT: vlseg5e8.v v8, (a0)
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 1 x i8>, 5) @llvm.riscv.vlseg5.triscv.vector.tuple_nxv1i8_5t(target("riscv.vector.tuple", <vscale x 1 x i8>, 5) undef, ptr %base, i64 %vl, i64 3)
- %1 = call <vscale x 1 x i8> @llvm.riscv.tuple.extract.nxv1i8.triscv.vector.tuple_nxv1i8_5t(target("riscv.vector.tuple", <vscale x 1 x i8>, 5) %0, i32 1)
- ret <vscale x 1 x i8> %1
+ ret target("riscv.vector.tuple", <vscale x 1 x i8>, 5) %0
}
-
-define <vscale x 1 x i8> @test_vlseg5_mask_nxv1i8_triscv.vector.tuple_nxv1i8_5t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) {
+define target("riscv.vector.tuple", <vscale x 1 x i8>, 5) @test_vlseg5_mask_nxv1i8_triscv.vector.tuple_nxv1i8_5t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) {
; CHECK-LABEL: test_vlseg5_mask_nxv1i8_triscv.vector.tuple_nxv1i8_5t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma
-; CHECK-NEXT: vlseg5e8.v v7, (a0), v0.t
+; CHECK-NEXT: vlseg5e8.v v8, (a0), v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 1 x i8>, 5) @llvm.riscv.vlseg5.mask.triscv.vector.tuple_nxv1i8_5t.nxv1i1(target("riscv.vector.tuple", <vscale x 1 x i8>, 5) undef, ptr %base, <vscale x 1 x i1> %mask, i64 %vl, i64 1, i64 3)
- %1 = call <vscale x 1 x i8> @llvm.riscv.tuple.extract.nxv1i8.triscv.vector.tuple_nxv1i8_5t(target("riscv.vector.tuple", <vscale x 1 x i8>, 5) %0, i32 1)
- ret <vscale x 1 x i8> %1
+ ret target("riscv.vector.tuple", <vscale x 1 x i8>, 5) %0
}
-
-define <vscale x 1 x i8> @test_vlseg5_allonesmask_nxv1i8_triscv.vector.tuple_nxv1i8_5t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) {
+define target("riscv.vector.tuple", <vscale x 1 x i8>, 5) @test_vlseg5_allonesmask_nxv1i8_triscv.vector.tuple_nxv1i8_5t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) {
; CHECK-LABEL: test_vlseg5_allonesmask_nxv1i8_triscv.vector.tuple_nxv1i8_5t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma
-; CHECK-NEXT: vlseg5e8.v v7, (a0)
+; CHECK-NEXT: vlseg5e8.v v8, (a0)
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 1 x i8>, 5) @llvm.riscv.vlseg5.mask.triscv.vector.tuple_nxv1i8_5t.nxv1i1(target("riscv.vector.tuple", <vscale x 1 x i8>, 5) undef, ptr %base, <vscale x 1 x i1> splat (i1 true), i64 %vl, i64 1, i64 3)
- %1 = call <vscale x 1 x i8> @llvm.riscv.tuple.extract.nxv1i8.triscv.vector.tuple_nxv1i8_5t(target("riscv.vector.tuple", <vscale x 1 x i8>, 5) %0, i32 1)
- ret <vscale x 1 x i8> %1
+ ret target("riscv.vector.tuple", <vscale x 1 x i8>, 5) %0
}
-
-declare target("riscv.vector.tuple", <vscale x 2 x i8>, 5) @llvm.riscv.vlseg5.triscv.vector.tuple_nxv2i8_5t(target("riscv.vector.tuple", <vscale x 2 x i8>, 5), ptr, i64, i64)
-declare target("riscv.vector.tuple", <vscale x 2 x i8>, 5) @llvm.riscv.vlseg5.mask.triscv.vector.tuple_nxv2i8_5t.nxv2i1(target("riscv.vector.tuple", <vscale x 2 x i8>, 5), ptr, <vscale x 2 x i1>, i64, i64, i64)
-
-define <vscale x 2 x i8> @test_vlseg5_nxv2i8_triscv.vector.tuple_nxv2i8_5t(ptr %base, i64 %vl) {
+define target("riscv.vector.tuple", <vscale x 2 x i8>, 5) @test_vlseg5_nxv2i8_triscv.vector.tuple_nxv2i8_5t(ptr %base, i64 %vl) {
; CHECK-LABEL: test_vlseg5_nxv2i8_triscv.vector.tuple_nxv2i8_5t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma
-; CHECK-NEXT: vlseg5e8.v v7, (a0)
+; CHECK-NEXT: vlseg5e8.v v8, (a0)
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 2 x i8>, 5) @llvm.riscv.vlseg5.triscv.vector.tuple_nxv2i8_5t(target("riscv.vector.tuple", <vscale x 2 x i8>, 5) undef, ptr %base, i64 %vl, i64 3)
- %1 = call <vscale x 2 x i8> @llvm.riscv.tuple.extract.nxv2i8.triscv.vector.tuple_nxv2i8_5t(target("riscv.vector.tuple", <vscale x 2 x i8>, 5) %0, i32 1)
- ret <vscale x 2 x i8> %1
+ ret target("riscv.vector.tuple", <vscale x 2 x i8>, 5) %0
}
-
-define <vscale x 2 x i8> @test_vlseg5_mask_nxv2i8_triscv.vector.tuple_nxv2i8_5t(ptr %base, i64 %vl, <vscale x 2 x i1> %mask) {
+define target("riscv.vector.tuple", <vscale x 2 x i8>, 5) @test_vlseg5_mask_nxv2i8_triscv.vector.tuple_nxv2i8_5t(ptr %base, i64 %vl, <vscale x 2 x i1> %mask) {
; CHECK-LABEL: test_vlseg5_mask_nxv2i8_triscv.vector.tuple_nxv2i8_5t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma
-; CHECK-NEXT: vlseg5e8.v v7, (a0), v0.t
+; CHECK-NEXT: vlseg5e8.v v8, (a0), v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 2 x i8>, 5) @llvm.riscv.vlseg5.mask.triscv.vector.tuple_nxv2i8_5t.nxv2i1(target("riscv.vector.tuple", <vscale x 2 x i8>, 5) undef, ptr %base, <vscale x 2 x i1> %mask, i64 %vl, i64 1, i64 3)
- %1 = call <vscale x 2 x i8> @llvm.riscv.tuple.extract.nxv2i8.triscv.vector.tuple_nxv2i8_5t(target("riscv.vector.tuple", <vscale x 2 x i8>, 5) %0, i32 1)
- ret <vscale x 2 x i8> %1
+ ret target("riscv.vector.tuple", <vscale x 2 x i8>, 5) %0
}
-
-declare target("riscv.vector.tuple", <vscale x 4 x i8>, 5) @llvm.riscv.vlseg5.triscv.vector.tuple_nxv4i8_5t(target("riscv.vector.tuple", <vscale x 4 x i8>, 5), ptr, i64, i64)
-declare target("riscv.vector.tuple", <vscale x 4 x i8>, 5) @llvm.riscv.vlseg5.mask.triscv.vector.tuple_nxv4i8_5t.nxv4i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 5), ptr, <vscale x 4 x i1>, i64, i64, i64)
-
-define <vscale x 4 x i8> @test_vlseg5_nxv4i8_triscv.vector.tuple_nxv4i8_5t(ptr %base, i64 %vl) {
+define target("riscv.vector.tuple", <vscale x 4 x i8>, 5) @test_vlseg5_nxv4i8_triscv.vector.tuple_nxv4i8_5t(ptr %base, i64 %vl) {
; CHECK-LABEL: test_vlseg5_nxv4i8_triscv.vector.tuple_nxv4i8_5t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma
-; CHECK-NEXT: vlseg5e8.v v7, (a0)
+; CHECK-NEXT: vlseg5e8.v v8, (a0)
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 5) @llvm.riscv.vlseg5.triscv.vector.tuple_nxv4i8_5t(target("riscv.vector.tuple", <vscale x 4 x i8>, 5) undef, ptr %base, i64 %vl, i64 3)
- %1 = call <vscale x 4 x i8> @llvm.riscv.tuple.extract.nxv4i8.triscv.vector.tuple_nxv4i8_5t(target("riscv.vector.tuple", <vscale x 4 x i8>, 5) %0, i32 1)
- ret <vscale x 4 x i8> %1
+ ret target("riscv.vector.tuple", <vscale x 4 x i8>, 5) %0
}
-
-define <vscale x 4 x i8> @test_vlseg5_mask_nxv4i8_triscv.vector.tuple_nxv4i8_5t(ptr %base, i64 %vl, <vscale x 4 x i1> %mask) {
+define target("riscv.vector.tuple", <vscale x 4 x i8>, 5) @test_vlseg5_mask_nxv4i8_triscv.vector.tuple_nxv4i8_5t(ptr %base, i64 %vl, <vscale x 4 x i1> %mask) {
; CHECK-LABEL: test_vlseg5_mask_nxv4i8_triscv.vector.tuple_nxv4i8_5t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma
-; CHECK-NEXT: vlseg5e8.v v7, (a0), v0.t
+; CHECK-NEXT: vlseg5e8.v v8, (a0), v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 5) @llvm.riscv.vlseg5.mask.triscv.vector.tuple_nxv4i8_5t.nxv4i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 5) undef, ptr %base, <vscale x 4 x i1> %mask, i64 %vl, i64 1, i64 3)
- %1 = call <vscale x 4 x i8> @llvm.riscv.tuple.extract.nxv4i8.triscv.vector.tuple_nxv4i8_5t(target("riscv.vector.tuple", <vscale x 4 x i8>, 5) %0, i32 1)
- ret <vscale x 4 x i8> %1
+ ret target("riscv.vector.tuple", <vscale x 4 x i8>, 5) %0
}
-
-declare target("riscv.vector.tuple", <vscale x 8 x i8>, 5) @llvm.riscv.vlseg5.triscv.vector.tuple_nxv8i8_5t(target("riscv.vector.tuple", <vscale x 8 x i8>, 5), ptr, i64, i64)
-declare target("riscv.vector.tuple", <vscale x 8 x i8>, 5) @llvm.riscv.vlseg5.mask.triscv.vector.tuple_nxv8i8_5t.nxv8i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 5), ptr, <vscale x 8 x i1>, i64, i64, i64)
-
-define <vscale x 8 x i8> @test_vlseg5_nxv8i8_triscv.vector.tuple_nxv8i8_5t(ptr %base, i64 %vl) {
+define target("riscv.vector.tuple", <vscale x 8 x i8>, 5) @test_vlseg5_nxv8i8_triscv.vector.tuple_nxv8i8_5t(ptr %base, i64 %vl) {
; CHECK-LABEL: test_vlseg5_nxv8i8_triscv.vector.tuple_nxv8i8_5t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma
-; CHECK-NEXT: vlseg5e8.v v7, (a0)
+; CHECK-NEXT: vlseg5e8.v v8, (a0)
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 5) @llvm.riscv.vlseg5.triscv.vector.tuple_nxv8i8_5t(target("riscv.vector.tuple", <vscale x 8 x i8>, 5) undef, ptr %base, i64 %vl, i64 3)
- %1 = call <vscale x 8 x i8> @llvm.riscv.tuple.extract.nxv8i8.triscv.vector.tuple_nxv8i8_5t(target("riscv.vector.tuple", <vscale x 8 x i8>, 5) %0, i32 1)
- ret <vscale x 8 x i8> %1
+ ret target("riscv.vector.tuple", <vscale x 8 x i8>, 5) %0
}
-
-define <vscale x 8 x i8> @test_vlseg5_mask_nxv8i8_triscv.vector.tuple_nxv8i8_5t(ptr %base, i64 %vl, <vscale x 8 x i1> %mask) {
+define target("riscv.vector.tuple", <vscale x 8 x i8>, 5) @test_vlseg5_mask_nxv8i8_triscv.vector.tuple_nxv8i8_5t(ptr %base, i64 %vl, <vscale x 8 x i1> %mask) {
; CHECK-LABEL: test_vlseg5_mask_nxv8i8_triscv.vector.tuple_nxv8i8_5t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma
-; CHECK-NEXT: vlseg5e8.v v7, (a0), v0.t
+; CHECK-NEXT: vlseg5e8.v v8, (a0), v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 5) @llvm.riscv.vlseg5.mask.triscv.vector.tuple_nxv8i8_5t.nxv8i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 5) undef, ptr %base, <vscale x 8 x i1> %mask, i64 %vl, i64 1, i64 3)
- %1 = call <vscale x 8 x i8> @llvm.riscv.tuple.extract.nxv8i8.triscv.vector.tuple_nxv8i8_5t(target("riscv.vector.tuple", <vscale x 8 x i8>, 5) %0, i32 1)
- ret <vscale x 8 x i8> %1
+ ret target("riscv.vector.tuple", <vscale x 8 x i8>, 5) %0
}
-
-declare target("riscv.vector.tuple", <vscale x 1 x i8>, 6) @llvm.riscv.vlseg6.triscv.vector.tuple_nxv1i8_6t(target("riscv.vector.tuple", <vscale x 1 x i8>, 6), ptr, i64, i64)
-declare target("riscv.vector.tuple", <vscale x 1 x i8>, 6) @llvm.riscv.vlseg6.mask.triscv.vector.tuple_nxv1i8_6t.nxv1i1(target("riscv.vector.tuple", <vscale x 1 x i8>, 6), ptr, <vscale x 1 x i1>, i64, i64, i64)
-
-define <vscale x 1 x i8> @test_vlseg6_nxv1i8_triscv.vector.tuple_nxv1i8_6t(ptr %base, i64 %vl) {
+define target("riscv.vector.tuple", <vscale x 1 x i8>, 6) @test_vlseg6_nxv1i8_triscv.vector.tuple_nxv1i8_6t(ptr %base, i64 %vl) {
; CHECK-LABEL: test_vlseg6_nxv1i8_triscv.vector.tuple_nxv1i8_6t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma
-; CHECK-NEXT: vlseg6e8.v v7, (a0)
+; CHECK-NEXT: vlseg6e8.v v8, (a0)
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 1 x i8>, 6) @llvm.riscv.vlseg6.triscv.vector.tuple_nxv1i8_6t(target("riscv.vector.tuple", <vscale x 1 x i8>, 6) undef, ptr %base, i64 %vl, i64 3)
- %1 = call <vscale x 1 x i8> @llvm.riscv.tuple.extract.nxv1i8.triscv.vector.tuple_nxv1i8_6t(target("riscv.vector.tuple", <vscale x 1 x i8>, 6) %0, i32 1)
- ret <vscale x 1 x i8> %1
+ ret target("riscv.vector.tuple", <vscale x 1 x i8>, 6) %0
}
-
-define <vscale x 1 x i8> @test_vlseg6_mask_nxv1i8_triscv.vector.tuple_nxv1i8_6t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) {
+define target("riscv.vector.tuple", <vscale x 1 x i8>, 6) @test_vlseg6_mask_nxv1i8_triscv.vector.tuple_nxv1i8_6t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) {
; CHECK-LABEL: test_vlseg6_mask_nxv1i8_triscv.vector.tuple_nxv1i8_6t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma
-; CHECK-NEXT: vlseg6e8.v v7, (a0), v0.t
+; CHECK-NEXT: vlseg6e8.v v8, (a0), v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 1 x i8>, 6) @llvm.riscv.vlseg6.mask.triscv.vector.tuple_nxv1i8_6t.nxv1i1(target("riscv.vector.tuple", <vscale x 1 x i8>, 6) undef, ptr %base, <vscale x 1 x i1> %mask, i64 %vl, i64 1, i64 3)
- %1 = call <vscale x 1 x i8> @llvm.riscv.tuple.extract.nxv1i8.triscv.vector.tuple_nxv1i8_6t(target("riscv.vector.tuple", <vscale x 1 x i8>, 6) %0, i32 1)
- ret <vscale x 1 x i8> %1
+ ret target("riscv.vector.tuple", <vscale x 1 x i8>, 6) %0
}
-
-define <vscale x 1 x i8> @test_vlseg6_allonesmask_nxv1i8_triscv.vector.tuple_nxv1i8_6t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) {
+define target("riscv.vector.tuple", <vscale x 1 x i8>, 6) @test_vlseg6_allonesmask_nxv1i8_triscv.vector.tuple_nxv1i8_6t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) {
; CHECK-LABEL: test_vlseg6_allonesmask_nxv1i8_triscv.vector.tuple_nxv1i8_6t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma
-; CHECK-NEXT: vlseg6e8.v v7, (a0)
+; CHECK-NEXT: vlseg6e8.v v8, (a0)
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 1 x i8>, 6) @llvm.riscv.vlseg6.mask.triscv.vector.tuple_nxv1i8_6t.nxv1i1(target("riscv.vector.tuple", <vscale x 1 x i8>, 6) undef, ptr %base, <vscale x 1 x i1> splat (i1 true), i64 %vl, i64 1, i64 3)
- %1 = call <vscale x 1 x i8> @llvm.riscv.tuple.extract.nxv1i8.triscv.vector.tuple_nxv1i8_6t(target("riscv.vector.tuple", <vscale x 1 x i8>, 6) %0, i32 1)
- ret <vscale x 1 x i8> %1
+ ret target("riscv.vector.tuple", <vscale x 1 x i8>, 6) %0
}
-
-declare target("riscv.vector.tuple", <vscale x 2 x i8>, 6) @llvm.riscv.vlseg6.triscv.vector.tuple_nxv2i8_6t(target("riscv.vector.tuple", <vscale x 2 x i8>, 6), ptr, i64, i64)
-declare target("riscv.vector.tuple", <vscale x 2 x i8>, 6) @llvm.riscv.vlseg6.mask.triscv.vector.tuple_nxv2i8_6t.nxv2i1(target("riscv.vector.tuple", <vscale x 2 x i8>, 6), ptr, <vscale x 2 x i1>, i64, i64, i64)
-
-define <vscale x 2 x i8> @test_vlseg6_nxv2i8_triscv.vector.tuple_nxv2i8_6t(ptr %base, i64 %vl) {
+define target("riscv.vector.tuple", <vscale x 2 x i8>, 6) @test_vlseg6_nxv2i8_triscv.vector.tuple_nxv2i8_6t(ptr %base, i64 %vl) {
; CHECK-LABEL: test_vlseg6_nxv2i8_triscv.vector.tuple_nxv2i8_6t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma
-; CHECK-NEXT: vlseg6e8.v v7, (a0)
+; CHECK-NEXT: vlseg6e8.v v8, (a0)
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 2 x i8>, 6) @llvm.riscv.vlseg6.triscv.vector.tuple_nxv2i8_6t(target("riscv.vector.tuple", <vscale x 2 x i8>, 6) undef, ptr %base, i64 %vl, i64 3)
- %1 = call <vscale x 2 x i8> @llvm.riscv.tuple.extract.nxv2i8.triscv.vector.tuple_nxv2i8_6t(target("riscv.vector.tuple", <vscale x 2 x i8>, 6) %0, i32 1)
- ret <vscale x 2 x i8> %1
+ ret target("riscv.vector.tuple", <vscale x 2 x i8>, 6) %0
}
-
-define <vscale x 2 x i8> @test_vlseg6_mask_nxv2i8_triscv.vector.tuple_nxv2i8_6t(ptr %base, i64 %vl, <vscale x 2 x i1> %mask) {
+define target("riscv.vector.tuple", <vscale x 2 x i8>, 6) @test_vlseg6_mask_nxv2i8_triscv.vector.tuple_nxv2i8_6t(ptr %base, i64 %vl, <vscale x 2 x i1> %mask) {
; CHECK-LABEL: test_vlseg6_mask_nxv2i8_triscv.vector.tuple_nxv2i8_6t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma
-; CHECK-NEXT: vlseg6e8.v v7, (a0), v0.t
+; CHECK-NEXT: vlseg6e8.v v8, (a0), v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 2 x i8>, 6) @llvm.riscv.vlseg6.mask.triscv.vector.tuple_nxv2i8_6t.nxv2i1(target("riscv.vector.tuple", <vscale x 2 x i8>, 6) undef, ptr %base, <vscale x 2 x i1> %mask, i64 %vl, i64 1, i64 3)
- %1 = call <vscale x 2 x i8> @llvm.riscv.tuple.extract.nxv2i8.triscv.vector.tuple_nxv2i8_6t(target("riscv.vector.tuple", <vscale x 2 x i8>, 6) %0, i32 1)
- ret <vscale x 2 x i8> %1
+ ret target("riscv.vector.tuple", <vscale x 2 x i8>, 6) %0
}
-
-declare target("riscv.vector.tuple", <vscale x 4 x i8>, 6) @llvm.riscv.vlseg6.triscv.vector.tuple_nxv4i8_6t(target("riscv.vector.tuple", <vscale x 4 x i8>, 6), ptr, i64, i64)
-declare target("riscv.vector.tuple", <vscale x 4 x i8>, 6) @llvm.riscv.vlseg6.mask.triscv.vector.tuple_nxv4i8_6t.nxv4i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 6), ptr, <vscale x 4 x i1>, i64, i64, i64)
-
-define <vscale x 4 x i8> @test_vlseg6_nxv4i8_triscv.vector.tuple_nxv4i8_6t(ptr %base, i64 %vl) {
+define target("riscv.vector.tuple", <vscale x 4 x i8>, 6) @test_vlseg6_nxv4i8_triscv.vector.tuple_nxv4i8_6t(ptr %base, i64 %vl) {
; CHECK-LABEL: test_vlseg6_nxv4i8_triscv.vector.tuple_nxv4i8_6t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma
-; CHECK-NEXT: vlseg6e8.v v7, (a0)
+; CHECK-NEXT: vlseg6e8.v v8, (a0)
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 6) @llvm.riscv.vlseg6.triscv.vector.tuple_nxv4i8_6t(target("riscv.vector.tuple", <vscale x 4 x i8>, 6) undef, ptr %base, i64 %vl, i64 3)
- %1 = call <vscale x 4 x i8> @llvm.riscv.tuple.extract.nxv4i8.triscv.vector.tuple_nxv4i8_6t(target("riscv.vector.tuple", <vscale x 4 x i8>, 6) %0, i32 1)
- ret <vscale x 4 x i8> %1
+ ret target("riscv.vector.tuple", <vscale x 4 x i8>, 6) %0
}
-
-define <vscale x 4 x i8> @test_vlseg6_mask_nxv4i8_triscv.vector.tuple_nxv4i8_6t(ptr %base, i64 %vl, <vscale x 4 x i1> %mask) {
+define target("riscv.vector.tuple", <vscale x 4 x i8>, 6) @test_vlseg6_mask_nxv4i8_triscv.vector.tuple_nxv4i8_6t(ptr %base, i64 %vl, <vscale x 4 x i1> %mask) {
; CHECK-LABEL: test_vlseg6_mask_nxv4i8_triscv.vector.tuple_nxv4i8_6t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma
-; CHECK-NEXT: vlseg6e8.v v7, (a0), v0.t
+; CHECK-NEXT: vlseg6e8.v v8, (a0), v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 6) @llvm.riscv.vlseg6.mask.triscv.vector.tuple_nxv4i8_6t.nxv4i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 6) undef, ptr %base, <vscale x 4 x i1> %mask, i64 %vl, i64 1, i64 3)
- %1 = call <vscale x 4 x i8> @llvm.riscv.tuple.extract.nxv4i8.triscv.vector.tuple_nxv4i8_6t(target("riscv.vector.tuple", <vscale x 4 x i8>, 6) %0, i32 1)
- ret <vscale x 4 x i8> %1
+ ret target("riscv.vector.tuple", <vscale x 4 x i8>, 6) %0
}
-
-declare target("riscv.vector.tuple", <vscale x 8 x i8>, 6) @llvm.riscv.vlseg6.triscv.vector.tuple_nxv8i8_6t(target("riscv.vector.tuple", <vscale x 8 x i8>, 6), ptr, i64, i64)
-declare target("riscv.vector.tuple", <vscale x 8 x i8>, 6) @llvm.riscv.vlseg6.mask.triscv.vector.tuple_nxv8i8_6t.nxv8i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 6), ptr, <vscale x 8 x i1>, i64, i64, i64)
-
-define <vscale x 8 x i8> @test_vlseg6_nxv8i8_triscv.vector.tuple_nxv8i8_6t(ptr %base, i64 %vl) {
+define target("riscv.vector.tuple", <vscale x 8 x i8>, 6) @test_vlseg6_nxv8i8_triscv.vector.tuple_nxv8i8_6t(ptr %base, i64 %vl) {
; CHECK-LABEL: test_vlseg6_nxv8i8_triscv.vector.tuple_nxv8i8_6t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma
-; CHECK-NEXT: vlseg6e8.v v7, (a0)
+; CHECK-NEXT: vlseg6e8.v v8, (a0)
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 6) @llvm.riscv.vlseg6.triscv.vector.tuple_nxv8i8_6t(target("riscv.vector.tuple", <vscale x 8 x i8>, 6) undef, ptr %base, i64 %vl, i64 3)
- %1 = call <vscale x 8 x i8> @llvm.riscv.tuple.extract.nxv8i8.triscv.vector.tuple_nxv8i8_6t(target("riscv.vector.tuple", <vscale x 8 x i8>, 6) %0, i32 1)
- ret <vscale x 8 x i8> %1
+ ret target("riscv.vector.tuple", <vscale x 8 x i8>, 6) %0
}
-
-define <vscale x 8 x i8> @test_vlseg6_mask_nxv8i8_triscv.vector.tuple_nxv8i8_6t(ptr %base, i64 %vl, <vscale x 8 x i1> %mask) {
+define target("riscv.vector.tuple", <vscale x 8 x i8>, 6) @test_vlseg6_mask_nxv8i8_triscv.vector.tuple_nxv8i8_6t(ptr %base, i64 %vl, <vscale x 8 x i1> %mask) {
; CHECK-LABEL: test_vlseg6_mask_nxv8i8_triscv.vector.tuple_nxv8i8_6t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma
-; CHECK-NEXT: vlseg6e8.v v7, (a0), v0.t
+; CHECK-NEXT: vlseg6e8.v v8, (a0), v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 6) @llvm.riscv.vlseg6.mask.triscv.vector.tuple_nxv8i8_6t.nxv8i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 6) undef, ptr %base, <vscale x 8 x i1> %mask, i64 %vl, i64 1, i64 3)
- %1 = call <vscale x 8 x i8> @llvm.riscv.tuple.extract.nxv8i8.triscv.vector.tuple_nxv8i8_6t(target("riscv.vector.tuple", <vscale x 8 x i8>, 6) %0, i32 1)
- ret <vscale x 8 x i8> %1
+ ret target("riscv.vector.tuple", <vscale x 8 x i8>, 6) %0
}
-
-declare target("riscv.vector.tuple", <vscale x 1 x i8>, 7) @llvm.riscv.vlseg7.triscv.vector.tuple_nxv1i8_7t(target("riscv.vector.tuple", <vscale x 1 x i8>, 7), ptr, i64, i64)
-declare target("riscv.vector.tuple", <vscale x 1 x i8>, 7) @llvm.riscv.vlseg7.mask.triscv.vector.tuple_nxv1i8_7t.nxv1i1(target("riscv.vector.tuple", <vscale x 1 x i8>, 7), ptr, <vscale x 1 x i1>, i64, i64, i64)
-
-define <vscale x 1 x i8> @test_vlseg7_nxv1i8_triscv.vector.tuple_nxv1i8_7t(ptr %base, i64 %vl) {
+define target("riscv.vector.tuple", <vscale x 1 x i8>, 7) @test_vlseg7_nxv1i8_triscv.vector.tuple_nxv1i8_7t(ptr %base, i64 %vl) {
; CHECK-LABEL: test_vlseg7_nxv1i8_triscv.vector.tuple_nxv1i8_7t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma
-; CHECK-NEXT: vlseg7e8.v v7, (a0)
+; CHECK-NEXT: vlseg7e8.v v8, (a0)
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 1 x i8>, 7) @llvm.riscv.vlseg7.triscv.vector.tuple_nxv1i8_7t(target("riscv.vector.tuple", <vscale x 1 x i8>, 7) undef, ptr %base, i64 %vl, i64 3)
- %1 = call <vscale x 1 x i8> @llvm.riscv.tuple.extract.nxv1i8.triscv.vector.tuple_nxv1i8_7t(target("riscv.vector.tuple", <vscale x 1 x i8>, 7) %0, i32 1)
- ret <vscale x 1 x i8> %1
+ ret target("riscv.vector.tuple", <vscale x 1 x i8>, 7) %0
}
-
-define <vscale x 1 x i8> @test_vlseg7_mask_nxv1i8_triscv.vector.tuple_nxv1i8_7t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) {
+define target("riscv.vector.tuple", <vscale x 1 x i8>, 7) @test_vlseg7_mask_nxv1i8_triscv.vector.tuple_nxv1i8_7t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) {
; CHECK-LABEL: test_vlseg7_mask_nxv1i8_triscv.vector.tuple_nxv1i8_7t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma
-; CHECK-NEXT: vlseg7e8.v v7, (a0), v0.t
+; CHECK-NEXT: vlseg7e8.v v8, (a0), v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 1 x i8>, 7) @llvm.riscv.vlseg7.mask.triscv.vector.tuple_nxv1i8_7t.nxv1i1(target("riscv.vector.tuple", <vscale x 1 x i8>, 7) undef, ptr %base, <vscale x 1 x i1> %mask, i64 %vl, i64 1, i64 3)
- %1 = call <vscale x 1 x i8> @llvm.riscv.tuple.extract.nxv1i8.triscv.vector.tuple_nxv1i8_7t(target("riscv.vector.tuple", <vscale x 1 x i8>, 7) %0, i32 1)
- ret <vscale x 1 x i8> %1
+ ret target("riscv.vector.tuple", <vscale x 1 x i8>, 7) %0
}
-
-define <vscale x 1 x i8> @test_vlseg7_allonesmask_nxv1i8_triscv.vector.tuple_nxv1i8_7t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) {
+define target("riscv.vector.tuple", <vscale x 1 x i8>, 7) @test_vlseg7_allonesmask_nxv1i8_triscv.vector.tuple_nxv1i8_7t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) {
; CHECK-LABEL: test_vlseg7_allonesmask_nxv1i8_triscv.vector.tuple_nxv1i8_7t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma
-; CHECK-NEXT: vlseg7e8.v v7, (a0)
+; CHECK-NEXT: vlseg7e8.v v8, (a0)
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 1 x i8>, 7) @llvm.riscv.vlseg7.mask.triscv.vector.tuple_nxv1i8_7t.nxv1i1(target("riscv.vector.tuple", <vscale x 1 x i8>, 7) undef, ptr %base, <vscale x 1 x i1> splat (i1 true), i64 %vl, i64 1, i64 3)
- %1 = call <vscale x 1 x i8> @llvm.riscv.tuple.extract.nxv1i8.triscv.vector.tuple_nxv1i8_7t(target("riscv.vector.tuple", <vscale x 1 x i8>, 7) %0, i32 1)
- ret <vscale x 1 x i8> %1
+ ret target("riscv.vector.tuple", <vscale x 1 x i8>, 7) %0
}
-
-declare target("riscv.vector.tuple", <vscale x 2 x i8>, 7) @llvm.riscv.vlseg7.triscv.vector.tuple_nxv2i8_7t(target("riscv.vector.tuple", <vscale x 2 x i8>, 7), ptr, i64, i64)
-declare target("riscv.vector.tuple", <vscale x 2 x i8>, 7) @llvm.riscv.vlseg7.mask.triscv.vector.tuple_nxv2i8_7t.nxv2i1(target("riscv.vector.tuple", <vscale x 2 x i8>, 7), ptr, <vscale x 2 x i1>, i64, i64, i64)
-
-define <vscale x 2 x i8> @test_vlseg7_nxv2i8_triscv.vector.tuple_nxv2i8_7t(ptr %base, i64 %vl) {
+define target("riscv.vector.tuple", <vscale x 2 x i8>, 7) @test_vlseg7_nxv2i8_triscv.vector.tuple_nxv2i8_7t(ptr %base, i64 %vl) {
; CHECK-LABEL: test_vlseg7_nxv2i8_triscv.vector.tuple_nxv2i8_7t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma
-; CHECK-NEXT: vlseg7e8.v v7, (a0)
+; CHECK-NEXT: vlseg7e8.v v8, (a0)
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 2 x i8>, 7) @llvm.riscv.vlseg7.triscv.vector.tuple_nxv2i8_7t(target("riscv.vector.tuple", <vscale x 2 x i8>, 7) undef, ptr %base, i64 %vl, i64 3)
- %1 = call <vscale x 2 x i8> @llvm.riscv.tuple.extract.nxv2i8.triscv.vector.tuple_nxv2i8_7t(target("riscv.vector.tuple", <vscale x 2 x i8>, 7) %0, i32 1)
- ret <vscale x 2 x i8> %1
+ ret target("riscv.vector.tuple", <vscale x 2 x i8>, 7) %0
}
-
-define <vscale x 2 x i8> @test_vlseg7_mask_nxv2i8_triscv.vector.tuple_nxv2i8_7t(ptr %base, i64 %vl, <vscale x 2 x i1> %mask) {
+define target("riscv.vector.tuple", <vscale x 2 x i8>, 7) @test_vlseg7_mask_nxv2i8_triscv.vector.tuple_nxv2i8_7t(ptr %base, i64 %vl, <vscale x 2 x i1> %mask) {
; CHECK-LABEL: test_vlseg7_mask_nxv2i8_triscv.vector.tuple_nxv2i8_7t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma
-; CHECK-NEXT: vlseg7e8.v v7, (a0), v0.t
+; CHECK-NEXT: vlseg7e8.v v8, (a0), v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 2 x i8>, 7) @llvm.riscv.vlseg7.mask.triscv.vector.tuple_nxv2i8_7t.nxv2i1(target("riscv.vector.tuple", <vscale x 2 x i8>, 7) undef, ptr %base, <vscale x 2 x i1> %mask, i64 %vl, i64 1, i64 3)
- %1 = call <vscale x 2 x i8> @llvm.riscv.tuple.extract.nxv2i8.triscv.vector.tuple_nxv2i8_7t(target("riscv.vector.tuple", <vscale x 2 x i8>, 7) %0, i32 1)
- ret <vscale x 2 x i8> %1
+ ret target("riscv.vector.tuple", <vscale x 2 x i8>, 7) %0
}
-
-declare target("riscv.vector.tuple", <vscale x 4 x i8>, 7) @llvm.riscv.vlseg7.triscv.vector.tuple_nxv4i8_7t(target("riscv.vector.tuple", <vscale x 4 x i8>, 7), ptr, i64, i64)
-declare target("riscv.vector.tuple", <vscale x 4 x i8>, 7) @llvm.riscv.vlseg7.mask.triscv.vector.tuple_nxv4i8_7t.nxv4i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 7), ptr, <vscale x 4 x i1>, i64, i64, i64)
-
-define <vscale x 4 x i8> @test_vlseg7_nxv4i8_triscv.vector.tuple_nxv4i8_7t(ptr %base, i64 %vl) {
+define target("riscv.vector.tuple", <vscale x 4 x i8>, 7) @test_vlseg7_nxv4i8_triscv.vector.tuple_nxv4i8_7t(ptr %base, i64 %vl) {
; CHECK-LABEL: test_vlseg7_nxv4i8_triscv.vector.tuple_nxv4i8_7t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma
-; CHECK-NEXT: vlseg7e8.v v7, (a0)
+; CHECK-NEXT: vlseg7e8.v v8, (a0)
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 7) @llvm.riscv.vlseg7.triscv.vector.tuple_nxv4i8_7t(target("riscv.vector.tuple", <vscale x 4 x i8>, 7) undef, ptr %base, i64 %vl, i64 3)
- %1 = call <vscale x 4 x i8> @llvm.riscv.tuple.extract.nxv4i8.triscv.vector.tuple_nxv4i8_7t(target("riscv.vector.tuple", <vscale x 4 x i8>, 7) %0, i32 1)
- ret <vscale x 4 x i8> %1
+ ret target("riscv.vector.tuple", <vscale x 4 x i8>, 7) %0
}
-
-define <vscale x 4 x i8> @test_vlseg7_mask_nxv4i8_triscv.vector.tuple_nxv4i8_7t(ptr %base, i64 %vl, <vscale x 4 x i1> %mask) {
+define target("riscv.vector.tuple", <vscale x 4 x i8>, 7) @test_vlseg7_mask_nxv4i8_triscv.vector.tuple_nxv4i8_7t(ptr %base, i64 %vl, <vscale x 4 x i1> %mask) {
; CHECK-LABEL: test_vlseg7_mask_nxv4i8_triscv.vector.tuple_nxv4i8_7t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma
-; CHECK-NEXT: vlseg7e8.v v7, (a0), v0.t
+; CHECK-NEXT: vlseg7e8.v v8, (a0), v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 7) @llvm.riscv.vlseg7.mask.triscv.vector.tuple_nxv4i8_7t.nxv4i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 7) undef, ptr %base, <vscale x 4 x i1> %mask, i64 %vl, i64 1, i64 3)
- %1 = call <vscale x 4 x i8> @llvm.riscv.tuple.extract.nxv4i8.triscv.vector.tuple_nxv4i8_7t(target("riscv.vector.tuple", <vscale x 4 x i8>, 7) %0, i32 1)
- ret <vscale x 4 x i8> %1
+ ret target("riscv.vector.tuple", <vscale x 4 x i8>, 7) %0
}
-
-declare target("riscv.vector.tuple", <vscale x 8 x i8>, 7) @llvm.riscv.vlseg7.triscv.vector.tuple_nxv8i8_7t(target("riscv.vector.tuple", <vscale x 8 x i8>, 7), ptr, i64, i64)
-declare target("riscv.vector.tuple", <vscale x 8 x i8>, 7) @llvm.riscv.vlseg7.mask.triscv.vector.tuple_nxv8i8_7t.nxv8i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 7), ptr, <vscale x 8 x i1>, i64, i64, i64)
-
-define <vscale x 8 x i8> @test_vlseg7_nxv8i8_triscv.vector.tuple_nxv8i8_7t(ptr %base, i64 %vl) {
+define target("riscv.vector.tuple", <vscale x 8 x i8>, 7) @test_vlseg7_nxv8i8_triscv.vector.tuple_nxv8i8_7t(ptr %base, i64 %vl) {
; CHECK-LABEL: test_vlseg7_nxv8i8_triscv.vector.tuple_nxv8i8_7t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma
-; CHECK-NEXT: vlseg7e8.v v7, (a0)
+; CHECK-NEXT: vlseg7e8.v v8, (a0)
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 7) @llvm.riscv.vlseg7.triscv.vector.tuple_nxv8i8_7t(target("riscv.vector.tuple", <vscale x 8 x i8>, 7) undef, ptr %base, i64 %vl, i64 3)
- %1 = call <vscale x 8 x i8> @llvm.riscv.tuple.extract.nxv8i8.triscv.vector.tuple_nxv8i8_7t(target("riscv.vector.tuple", <vscale x 8 x i8>, 7) %0, i32 1)
- ret <vscale x 8 x i8> %1
+ ret target("riscv.vector.tuple", <vscale x 8 x i8>, 7) %0
}
-
-define <vscale x 8 x i8> @test_vlseg7_mask_nxv8i8_triscv.vector.tuple_nxv8i8_7t(ptr %base, i64 %vl, <vscale x 8 x i1> %mask) {
+define target("riscv.vector.tuple", <vscale x 8 x i8>, 7) @test_vlseg7_mask_nxv8i8_triscv.vector.tuple_nxv8i8_7t(ptr %base, i64 %vl, <vscale x 8 x i1> %mask) {
; CHECK-LABEL: test_vlseg7_mask_nxv8i8_triscv.vector.tuple_nxv8i8_7t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma
-; CHECK-NEXT: vlseg7e8.v v7, (a0), v0.t
+; CHECK-NEXT: vlseg7e8.v v8, (a0), v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 7) @llvm.riscv.vlseg7.mask.triscv.vector.tuple_nxv8i8_7t.nxv8i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 7) undef, ptr %base, <vscale x 8 x i1> %mask, i64 %vl, i64 1, i64 3)
- %1 = call <vscale x 8 x i8> @llvm.riscv.tuple.extract.nxv8i8.triscv.vector.tuple_nxv8i8_7t(target("riscv.vector.tuple", <vscale x 8 x i8>, 7) %0, i32 1)
- ret <vscale x 8 x i8> %1
+ ret target("riscv.vector.tuple", <vscale x 8 x i8>, 7) %0
}
-
-declare target("riscv.vector.tuple", <vscale x 1 x i8>, 8) @llvm.riscv.vlseg8.triscv.vector.tuple_nxv1i8_8t(target("riscv.vector.tuple", <vscale x 1 x i8>, 8), ptr, i64, i64)
-declare target("riscv.vector.tuple", <vscale x 1 x i8>, 8) @llvm.riscv.vlseg8.mask.triscv.vector.tuple_nxv1i8_8t.nxv1i1(target("riscv.vector.tuple", <vscale x 1 x i8>, 8), ptr, <vscale x 1 x i1>, i64, i64, i64)
-
-define <vscale x 1 x i8> @test_vlseg8_nxv1i8_triscv.vector.tuple_nxv1i8_8t(ptr %base, i64 %vl) {
+define target("riscv.vector.tuple", <vscale x 1 x i8>, 8) @test_vlseg8_nxv1i8_triscv.vector.tuple_nxv1i8_8t(ptr %base, i64 %vl) {
; CHECK-LABEL: test_vlseg8_nxv1i8_triscv.vector.tuple_nxv1i8_8t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma
-; CHECK-NEXT: vlseg8e8.v v7, (a0)
+; CHECK-NEXT: vlseg8e8.v v8, (a0)
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 1 x i8>, 8) @llvm.riscv.vlseg8.triscv.vector.tuple_nxv1i8_8t(target("riscv.vector.tuple", <vscale x 1 x i8>, 8) undef, ptr %base, i64 %vl, i64 3)
- %1 = call <vscale x 1 x i8> @llvm.riscv.tuple.extract.nxv1i8.triscv.vector.tuple_nxv1i8_8t(target("riscv.vector.tuple", <vscale x 1 x i8>, 8) %0, i32 1)
- ret <vscale x 1 x i8> %1
+ ret target("riscv.vector.tuple", <vscale x 1 x i8>, 8) %0
}
-
-define <vscale x 1 x i8> @test_vlseg8_mask_nxv1i8_triscv.vector.tuple_nxv1i8_8t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) {
+define target("riscv.vector.tuple", <vscale x 1 x i8>, 8) @test_vlseg8_mask_nxv1i8_triscv.vector.tuple_nxv1i8_8t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) {
; CHECK-LABEL: test_vlseg8_mask_nxv1i8_triscv.vector.tuple_nxv1i8_8t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma
-; CHECK-NEXT: vlseg8e8.v v7, (a0), v0.t
+; CHECK-NEXT: vlseg8e8.v v8, (a0), v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 1 x i8>, 8) @llvm.riscv.vlseg8.mask.triscv.vector.tuple_nxv1i8_8t.nxv1i1(target("riscv.vector.tuple", <vscale x 1 x i8>, 8) undef, ptr %base, <vscale x 1 x i1> %mask, i64 %vl, i64 1, i64 3)
- %1 = call <vscale x 1 x i8> @llvm.riscv.tuple.extract.nxv1i8.triscv.vector.tuple_nxv1i8_8t(target("riscv.vector.tuple", <vscale x 1 x i8>, 8) %0, i32 1)
- ret <vscale x 1 x i8> %1
+ ret target("riscv.vector.tuple", <vscale x 1 x i8>, 8) %0
}
-
-define <vscale x 1 x i8> @test_vlseg8_allonesmask_nxv1i8_triscv.vector.tuple_nxv1i8_8t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) {
+define target("riscv.vector.tuple", <vscale x 1 x i8>, 8) @test_vlseg8_allonesmask_nxv1i8_triscv.vector.tuple_nxv1i8_8t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) {
; CHECK-LABEL: test_vlseg8_allonesmask_nxv1i8_triscv.vector.tuple_nxv1i8_8t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma
-; CHECK-NEXT: vlseg8e8.v v7, (a0)
+; CHECK-NEXT: vlseg8e8.v v8, (a0)
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 1 x i8>, 8) @llvm.riscv.vlseg8.mask.triscv.vector.tuple_nxv1i8_8t.nxv1i1(target("riscv.vector.tuple", <vscale x 1 x i8>, 8) undef, ptr %base, <vscale x 1 x i1> splat (i1 true), i64 %vl, i64 1, i64 3)
- %1 = call <vscale x 1 x i8> @llvm.riscv.tuple.extract.nxv1i8.triscv.vector.tuple_nxv1i8_8t(target("riscv.vector.tuple", <vscale x 1 x i8>, 8) %0, i32 1)
- ret <vscale x 1 x i8> %1
+ ret target("riscv.vector.tuple", <vscale x 1 x i8>, 8) %0
}
-
-declare target("riscv.vector.tuple", <vscale x 2 x i8>, 8) @llvm.riscv.vlseg8.triscv.vector.tuple_nxv2i8_8t(target("riscv.vector.tuple", <vscale x 2 x i8>, 8), ptr, i64, i64)
-declare target("riscv.vector.tuple", <vscale x 2 x i8>, 8) @llvm.riscv.vlseg8.mask.triscv.vector.tuple_nxv2i8_8t.nxv2i1(target("riscv.vector.tuple", <vscale x 2 x i8>, 8), ptr, <vscale x 2 x i1>, i64, i64, i64)
-
-define <vscale x 2 x i8> @test_vlseg8_nxv2i8_triscv.vector.tuple_nxv2i8_8t(ptr %base, i64 %vl) {
+define target("riscv.vector.tuple", <vscale x 2 x i8>, 8) @test_vlseg8_nxv2i8_triscv.vector.tuple_nxv2i8_8t(ptr %base, i64 %vl) {
; CHECK-LABEL: test_vlseg8_nxv2i8_triscv.vector.tuple_nxv2i8_8t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma
-; CHECK-NEXT: vlseg8e8.v v7, (a0)
+; CHECK-NEXT: vlseg8e8.v v8, (a0)
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 2 x i8>, 8) @llvm.riscv.vlseg8.triscv.vector.tuple_nxv2i8_8t(target("riscv.vector.tuple", <vscale x 2 x i8>, 8) undef, ptr %base, i64 %vl, i64 3)
- %1 = call <vscale x 2 x i8> @llvm.riscv.tuple.extract.nxv2i8.triscv.vector.tuple_nxv2i8_8t(target("riscv.vector.tuple", <vscale x 2 x i8>, 8) %0, i32 1)
- ret <vscale x 2 x i8> %1
+ ret target("riscv.vector.tuple", <vscale x 2 x i8>, 8) %0
}
-
-define <vscale x 2 x i8> @test_vlseg8_mask_nxv2i8_triscv.vector.tuple_nxv2i8_8t(ptr %base, i64 %vl, <vscale x 2 x i1> %mask) {
+define target("riscv.vector.tuple", <vscale x 2 x i8>, 8) @test_vlseg8_mask_nxv2i8_triscv.vector.tuple_nxv2i8_8t(ptr %base, i64 %vl, <vscale x 2 x i1> %mask) {
; CHECK-LABEL: test_vlseg8_mask_nxv2i8_triscv.vector.tuple_nxv2i8_8t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma
-; CHECK-NEXT: vlseg8e8.v v7, (a0), v0.t
+; CHECK-NEXT: vlseg8e8.v v8, (a0), v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 2 x i8>, 8) @llvm.riscv.vlseg8.mask.triscv.vector.tuple_nxv2i8_8t.nxv2i1(target("riscv.vector.tuple", <vscale x 2 x i8>, 8) undef, ptr %base, <vscale x 2 x i1> %mask, i64 %vl, i64 1, i64 3)
- %1 = call <vscale x 2 x i8> @llvm.riscv.tuple.extract.nxv2i8.triscv.vector.tuple_nxv2i8_8t(target("riscv.vector.tuple", <vscale x 2 x i8>, 8) %0, i32 1)
- ret <vscale x 2 x i8> %1
+ ret target("riscv.vector.tuple", <vscale x 2 x i8>, 8) %0
}
-
-declare target("riscv.vector.tuple", <vscale x 4 x i8>, 8) @llvm.riscv.vlseg8.triscv.vector.tuple_nxv4i8_8t(target("riscv.vector.tuple", <vscale x 4 x i8>, 8), ptr, i64, i64)
-declare target("riscv.vector.tuple", <vscale x 4 x i8>, 8) @llvm.riscv.vlseg8.mask.triscv.vector.tuple_nxv4i8_8t.nxv4i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 8), ptr, <vscale x 4 x i1>, i64, i64, i64)
-
-define <vscale x 4 x i8> @test_vlseg8_nxv4i8_triscv.vector.tuple_nxv4i8_8t(ptr %base, i64 %vl) {
+define target("riscv.vector.tuple", <vscale x 4 x i8>, 8) @test_vlseg8_nxv4i8_triscv.vector.tuple_nxv4i8_8t(ptr %base, i64 %vl) {
; CHECK-LABEL: test_vlseg8_nxv4i8_triscv.vector.tuple_nxv4i8_8t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma
-; CHECK-NEXT: vlseg8e8.v v7, (a0)
+; CHECK-NEXT: vlseg8e8.v v8, (a0)
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 8) @llvm.riscv.vlseg8.triscv.vector.tuple_nxv4i8_8t(target("riscv.vector.tuple", <vscale x 4 x i8>, 8) undef, ptr %base, i64 %vl, i64 3)
- %1 = call <vscale x 4 x i8> @llvm.riscv.tuple.extract.nxv4i8.triscv.vector.tuple_nxv4i8_8t(target("riscv.vector.tuple", <vscale x 4 x i8>, 8) %0, i32 1)
- ret <vscale x 4 x i8> %1
+ ret target("riscv.vector.tuple", <vscale x 4 x i8>, 8) %0
}
-
-define <vscale x 4 x i8> @test_vlseg8_mask_nxv4i8_triscv.vector.tuple_nxv4i8_8t(ptr %base, i64 %vl, <vscale x 4 x i1> %mask) {
+define target("riscv.vector.tuple", <vscale x 4 x i8>, 8) @test_vlseg8_mask_nxv4i8_triscv.vector.tuple_nxv4i8_8t(ptr %base, i64 %vl, <vscale x 4 x i1> %mask) {
; CHECK-LABEL: test_vlseg8_mask_nxv4i8_triscv.vector.tuple_nxv4i8_8t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma
-; CHECK-NEXT: vlseg8e8.v v7, (a0), v0.t
+; CHECK-NEXT: vlseg8e8.v v8, (a0), v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 8) @llvm.riscv.vlseg8.mask.triscv.vector.tuple_nxv4i8_8t.nxv4i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 8) undef, ptr %base, <vscale x 4 x i1> %mask, i64 %vl, i64 1, i64 3)
- %1 = call <vscale x 4 x i8> @llvm.riscv.tuple.extract.nxv4i8.triscv.vector.tuple_nxv4i8_8t(target("riscv.vector.tuple", <vscale x 4 x i8>, 8) %0, i32 1)
- ret <vscale x 4 x i8> %1
+ ret target("riscv.vector.tuple", <vscale x 4 x i8>, 8) %0
}
-
-declare target("riscv.vector.tuple", <vscale x 8 x i8>, 8) @llvm.riscv.vlseg8.triscv.vector.tuple_nxv8i8_8t(target("riscv.vector.tuple", <vscale x 8 x i8>, 8), ptr, i64, i64)
-declare target("riscv.vector.tuple", <vscale x 8 x i8>, 8) @llvm.riscv.vlseg8.mask.triscv.vector.tuple_nxv8i8_8t.nxv8i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 8), ptr, <vscale x 8 x i1>, i64, i64, i64)
-
-define <vscale x 8 x i8> @test_vlseg8_nxv8i8_triscv.vector.tuple_nxv8i8_8t(ptr %base, i64 %vl) {
+define target("riscv.vector.tuple", <vscale x 8 x i8>, 8) @test_vlseg8_nxv8i8_triscv.vector.tuple_nxv8i8_8t(ptr %base, i64 %vl) {
; CHECK-LABEL: test_vlseg8_nxv8i8_triscv.vector.tuple_nxv8i8_8t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma
-; CHECK-NEXT: vlseg8e8.v v7, (a0)
+; CHECK-NEXT: vlseg8e8.v v8, (a0)
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 8) @llvm.riscv.vlseg8.triscv.vector.tuple_nxv8i8_8t(target("riscv.vector.tuple", <vscale x 8 x i8>, 8) undef, ptr %base, i64 %vl, i64 3)
- %1 = call <vscale x 8 x i8> @llvm.riscv.tuple.extract.nxv8i8.triscv.vector.tuple_nxv8i8_8t(target("riscv.vector.tuple", <vscale x 8 x i8>, 8) %0, i32 1)
- ret <vscale x 8 x i8> %1
+ ret target("riscv.vector.tuple", <vscale x 8 x i8>, 8) %0
}
-
-define <vscale x 8 x i8> @test_vlseg8_mask_nxv8i8_triscv.vector.tuple_nxv8i8_8t(ptr %base, i64 %vl, <vscale x 8 x i1> %mask) {
+define target("riscv.vector.tuple", <vscale x 8 x i8>, 8) @test_vlseg8_mask_nxv8i8_triscv.vector.tuple_nxv8i8_8t(ptr %base, i64 %vl, <vscale x 8 x i1> %mask) {
; CHECK-LABEL: test_vlseg8_mask_nxv8i8_triscv.vector.tuple_nxv8i8_8t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma
-; CHECK-NEXT: vlseg8e8.v v7, (a0), v0.t
+; CHECK-NEXT: vlseg8e8.v v8, (a0), v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 8) @llvm.riscv.vlseg8.mask.triscv.vector.tuple_nxv8i8_8t.nxv8i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 8) undef, ptr %base, <vscale x 8 x i1> %mask, i64 %vl, i64 1, i64 3)
- %1 = call <vscale x 8 x i8> @llvm.riscv.tuple.extract.nxv8i8.triscv.vector.tuple_nxv8i8_8t(target("riscv.vector.tuple", <vscale x 8 x i8>, 8) %0, i32 1)
- ret <vscale x 8 x i8> %1
+ ret target("riscv.vector.tuple", <vscale x 8 x i8>, 8) %0
}
-
-declare target("riscv.vector.tuple", <vscale x 2 x i8>, 2) @llvm.riscv.vlseg2.mask.triscv.vector.tuple_nxv2i8_2t.nxv1i1(target("riscv.vector.tuple", <vscale x 2 x i8>, 2), ptr, <vscale x 1 x i1>, i64, i64, i64)
-
-define <vscale x 1 x i16> @test_vlseg2_nxv1i16_triscv.vector.tuple_nxv2i8_2t(ptr %base, i64 %vl) {
+define target("riscv.vector.tuple", <vscale x 2 x i8>, 2) @test_vlseg2_nxv1i16_triscv.vector.tuple_nxv2i8_2t(ptr %base, i64 %vl) {
; CHECK-LABEL: test_vlseg2_nxv1i16_triscv.vector.tuple_nxv2i8_2t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma
-; CHECK-NEXT: vlseg2e16.v v7, (a0)
+; CHECK-NEXT: vlseg2e16.v v8, (a0)
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 2 x i8>, 2) @llvm.riscv.vlseg2.triscv.vector.tuple_nxv2i8_2t(target("riscv.vector.tuple", <vscale x 2 x i8>, 2) undef, ptr %base, i64 %vl, i64 4)
- %1 = call <vscale x 1 x i16> @llvm.riscv.tuple.extract.nxv1i16.triscv.vector.tuple_nxv2i8_2t(target("riscv.vector.tuple", <vscale x 2 x i8>, 2) %0, i32 1)
- ret <vscale x 1 x i16> %1
+ ret target("riscv.vector.tuple", <vscale x 2 x i8>, 2) %0
}
-
-define <vscale x 1 x i16> @test_vlseg2_mask_nxv1i16_triscv.vector.tuple_nxv2i8_2t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) {
+define target("riscv.vector.tuple", <vscale x 2 x i8>, 2) @test_vlseg2_mask_nxv1i16_triscv.vector.tuple_nxv2i8_2t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) {
; CHECK-LABEL: test_vlseg2_mask_nxv1i16_triscv.vector.tuple_nxv2i8_2t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma
-; CHECK-NEXT: vlseg2e16.v v7, (a0), v0.t
+; CHECK-NEXT: vlseg2e16.v v8, (a0), v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 2 x i8>, 2) @llvm.riscv.vlseg2.mask.triscv.vector.tuple_nxv2i8_2t.nxv1i1(target("riscv.vector.tuple", <vscale x 2 x i8>, 2) undef, ptr %base, <vscale x 1 x i1> %mask, i64 %vl, i64 1, i64 4)
- %1 = call <vscale x 1 x i16> @llvm.riscv.tuple.extract.nxv1i16.triscv.vector.tuple_nxv2i8_2t(target("riscv.vector.tuple", <vscale x 2 x i8>, 2) %0, i32 1)
- ret <vscale x 1 x i16> %1
+ ret target("riscv.vector.tuple", <vscale x 2 x i8>, 2) %0
}
-
-declare target("riscv.vector.tuple", <vscale x 4 x i8>, 2) @llvm.riscv.vlseg2.mask.triscv.vector.tuple_nxv4i8_2t.nxv2i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 2), ptr, <vscale x 2 x i1>, i64, i64, i64)
-
-define <vscale x 2 x i16> @test_vlseg2_nxv2i16_triscv.vector.tuple_nxv4i8_2t(ptr %base, i64 %vl) {
+define target("riscv.vector.tuple", <vscale x 4 x i8>, 2) @test_vlseg2_nxv2i16_triscv.vector.tuple_nxv4i8_2t(ptr %base, i64 %vl) {
; CHECK-LABEL: test_vlseg2_nxv2i16_triscv.vector.tuple_nxv4i8_2t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma
-; CHECK-NEXT: vlseg2e16.v v7, (a0)
+; CHECK-NEXT: vlseg2e16.v v8, (a0)
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 2) @llvm.riscv.vlseg2.triscv.vector.tuple_nxv4i8_2t(target("riscv.vector.tuple", <vscale x 4 x i8>, 2) undef, ptr %base, i64 %vl, i64 4)
- %1 = call <vscale x 2 x i16> @llvm.riscv.tuple.extract.nxv2i16.triscv.vector.tuple_nxv4i8_2t(target("riscv.vector.tuple", <vscale x 4 x i8>, 2) %0, i32 1)
- ret <vscale x 2 x i16> %1
+ ret target("riscv.vector.tuple", <vscale x 4 x i8>, 2) %0
}
-
-define <vscale x 2 x i16> @test_vlseg2_mask_nxv2i16_triscv.vector.tuple_nxv4i8_2t(ptr %base, i64 %vl, <vscale x 2 x i1> %mask) {
+define target("riscv.vector.tuple", <vscale x 4 x i8>, 2) @test_vlseg2_mask_nxv2i16_triscv.vector.tuple_nxv4i8_2t(ptr %base, i64 %vl, <vscale x 2 x i1> %mask) {
; CHECK-LABEL: test_vlseg2_mask_nxv2i16_triscv.vector.tuple_nxv4i8_2t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma
-; CHECK-NEXT: vlseg2e16.v v7, (a0), v0.t
+; CHECK-NEXT: vlseg2e16.v v8, (a0), v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 2) @llvm.riscv.vlseg2.mask.triscv.vector.tuple_nxv4i8_2t.nxv2i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 2) undef, ptr %base, <vscale x 2 x i1> %mask, i64 %vl, i64 1, i64 4)
- %1 = call <vscale x 2 x i16> @llvm.riscv.tuple.extract.nxv2i16.triscv.vector.tuple_nxv4i8_2t(target("riscv.vector.tuple", <vscale x 4 x i8>, 2) %0, i32 1)
- ret <vscale x 2 x i16> %1
+ ret target("riscv.vector.tuple", <vscale x 4 x i8>, 2) %0
}
-
-declare target("riscv.vector.tuple", <vscale x 8 x i8>, 2) @llvm.riscv.vlseg2.mask.triscv.vector.tuple_nxv8i8_2t.nxv4i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 2), ptr, <vscale x 4 x i1>, i64, i64, i64)
-
-define <vscale x 4 x i16> @test_vlseg2_nxv4i16_triscv.vector.tuple_nxv8i8_2t(ptr %base, i64 %vl) {
+define target("riscv.vector.tuple", <vscale x 8 x i8>, 2) @test_vlseg2_nxv4i16_triscv.vector.tuple_nxv8i8_2t(ptr %base, i64 %vl) {
; CHECK-LABEL: test_vlseg2_nxv4i16_triscv.vector.tuple_nxv8i8_2t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma
-; CHECK-NEXT: vlseg2e16.v v7, (a0)
+; CHECK-NEXT: vlseg2e16.v v8, (a0)
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 2) @llvm.riscv.vlseg2.triscv.vector.tuple_nxv8i8_2t(target("riscv.vector.tuple", <vscale x 8 x i8>, 2) undef, ptr %base, i64 %vl, i64 4)
- %1 = call <vscale x 4 x i16> @llvm.riscv.tuple.extract.nxv4i16.triscv.vector.tuple_nxv8i8_2t(target("riscv.vector.tuple", <vscale x 8 x i8>, 2) %0, i32 1)
- ret <vscale x 4 x i16> %1
+ ret target("riscv.vector.tuple", <vscale x 8 x i8>, 2) %0
}
-
-define <vscale x 4 x i16> @test_vlseg2_mask_nxv4i16_triscv.vector.tuple_nxv8i8_2t(ptr %base, i64 %vl, <vscale x 4 x i1> %mask) {
+define target("riscv.vector.tuple", <vscale x 8 x i8>, 2) @test_vlseg2_mask_nxv4i16_triscv.vector.tuple_nxv8i8_2t(ptr %base, i64 %vl, <vscale x 4 x i1> %mask) {
; CHECK-LABEL: test_vlseg2_mask_nxv4i16_triscv.vector.tuple_nxv8i8_2t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma
-; CHECK-NEXT: vlseg2e16.v v7, (a0), v0.t
+; CHECK-NEXT: vlseg2e16.v v8, (a0), v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 2) @llvm.riscv.vlseg2.mask.triscv.vector.tuple_nxv8i8_2t.nxv4i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 2) undef, ptr %base, <vscale x 4 x i1> %mask, i64 %vl, i64 1, i64 4)
- %1 = call <vscale x 4 x i16> @llvm.riscv.tuple.extract.nxv4i16.triscv.vector.tuple_nxv8i8_2t(target("riscv.vector.tuple", <vscale x 8 x i8>, 2) %0, i32 1)
- ret <vscale x 4 x i16> %1
+ ret target("riscv.vector.tuple", <vscale x 8 x i8>, 2) %0
}
-
-declare target("riscv.vector.tuple", <vscale x 16 x i8>, 2) @llvm.riscv.vlseg2.mask.triscv.vector.tuple_nxv16i8_2t.nxv8i1(target("riscv.vector.tuple", <vscale x 16 x i8>, 2), ptr, <vscale x 8 x i1>, i64, i64, i64)
-
-define <vscale x 8 x i16> @test_vlseg2_nxv8i16_triscv.vector.tuple_nxv16i8_2t(ptr %base, i64 %vl) {
+define target("riscv.vector.tuple", <vscale x 16 x i8>, 2) @test_vlseg2_nxv8i16_triscv.vector.tuple_nxv16i8_2t(ptr %base, i64 %vl) {
; CHECK-LABEL: test_vlseg2_nxv8i16_triscv.vector.tuple_nxv16i8_2t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, ma
-; CHECK-NEXT: vlseg2e16.v v6, (a0)
+; CHECK-NEXT: vlseg2e16.v v8, (a0)
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 2) @llvm.riscv.vlseg2.triscv.vector.tuple_nxv16i8_2t(target("riscv.vector.tuple", <vscale x 16 x i8>, 2) undef, ptr %base, i64 %vl, i64 4)
- %1 = call <vscale x 8 x i16> @llvm.riscv.tuple.extract.nxv8i16.triscv.vector.tuple_nxv16i8_2t(target("riscv.vector.tuple", <vscale x 16 x i8>, 2) %0, i32 1)
- ret <vscale x 8 x i16> %1
+ ret target("riscv.vector.tuple", <vscale x 16 x i8>, 2) %0
}
-
-define <vscale x 8 x i16> @test_vlseg2_mask_nxv8i16_triscv.vector.tuple_nxv16i8_2t(ptr %base, i64 %vl, <vscale x 8 x i1> %mask) {
+define target("riscv.vector.tuple", <vscale x 16 x i8>, 2) @test_vlseg2_mask_nxv8i16_triscv.vector.tuple_nxv16i8_2t(ptr %base, i64 %vl, <vscale x 8 x i1> %mask) {
; CHECK-LABEL: test_vlseg2_mask_nxv8i16_triscv.vector.tuple_nxv16i8_2t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, ma
-; CHECK-NEXT: vlseg2e16.v v6, (a0), v0.t
+; CHECK-NEXT: vlseg2e16.v v8, (a0), v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 2) @llvm.riscv.vlseg2.mask.triscv.vector.tuple_nxv16i8_2t.nxv8i1(target("riscv.vector.tuple", <vscale x 16 x i8>, 2) undef, ptr %base, <vscale x 8 x i1> %mask, i64 %vl, i64 1, i64 4)
- %1 = call <vscale x 8 x i16> @llvm.riscv.tuple.extract.nxv8i16.triscv.vector.tuple_nxv16i8_2t(target("riscv.vector.tuple", <vscale x 16 x i8>, 2) %0, i32 1)
- ret <vscale x 8 x i16> %1
+ ret target("riscv.vector.tuple", <vscale x 16 x i8>, 2) %0
}
-
-declare target("riscv.vector.tuple", <vscale x 32 x i8>, 2) @llvm.riscv.vlseg2.mask.triscv.vector.tuple_nxv32i8_2t.nxv16i1(target("riscv.vector.tuple", <vscale x 32 x i8>, 2), ptr, <vscale x 16 x i1>, i64, i64, i64)
-
-define <vscale x 16 x i16> @test_vlseg2_nxv16i16_triscv.vector.tuple_nxv32i8_2t(ptr %base, i64 %vl) {
+define target("riscv.vector.tuple", <vscale x 32 x i8>, 2) @test_vlseg2_nxv16i16_triscv.vector.tuple_nxv32i8_2t(ptr %base, i64 %vl) {
; CHECK-LABEL: test_vlseg2_nxv16i16_triscv.vector.tuple_nxv32i8_2t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma
-; CHECK-NEXT: vlseg2e16.v v4, (a0)
+; CHECK-NEXT: vlseg2e16.v v8, (a0)
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 32 x i8>, 2) @llvm.riscv.vlseg2.triscv.vector.tuple_nxv32i8_2t(target("riscv.vector.tuple", <vscale x 32 x i8>, 2) undef, ptr %base, i64 %vl, i64 4)
- %1 = call <vscale x 16 x i16> @llvm.riscv.tuple.extract.nxv16i16.triscv.vector.tuple_nxv32i8_2t(target("riscv.vector.tuple", <vscale x 32 x i8>, 2) %0, i32 1)
- ret <vscale x 16 x i16> %1
+ ret target("riscv.vector.tuple", <vscale x 32 x i8>, 2) %0
}
-
-define <vscale x 16 x i16> @test_vlseg2_mask_nxv16i16_triscv.vector.tuple_nxv32i8_2t(ptr %base, i64 %vl, <vscale x 16 x i1> %mask) {
+define target("riscv.vector.tuple", <vscale x 32 x i8>, 2) @test_vlseg2_mask_nxv16i16_triscv.vector.tuple_nxv32i8_2t(ptr %base, i64 %vl, <vscale x 16 x i1> %mask) {
; CHECK-LABEL: test_vlseg2_mask_nxv16i16_triscv.vector.tuple_nxv32i8_2t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma
-; CHECK-NEXT: vlseg2e16.v v4, (a0), v0.t
+; CHECK-NEXT: vlseg2e16.v v8, (a0), v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 32 x i8>, 2) @llvm.riscv.vlseg2.mask.triscv.vector.tuple_nxv32i8_2t.nxv16i1(target("riscv.vector.tuple", <vscale x 32 x i8>, 2) undef, ptr %base, <vscale x 16 x i1> %mask, i64 %vl, i64 1, i64 4)
- %1 = call <vscale x 16 x i16> @llvm.riscv.tuple.extract.nxv16i16.triscv.vector.tuple_nxv32i8_2t(target("riscv.vector.tuple", <vscale x 32 x i8>, 2) %0, i32 1)
- ret <vscale x 16 x i16> %1
+ ret target("riscv.vector.tuple", <vscale x 32 x i8>, 2) %0
}
-
-declare target("riscv.vector.tuple", <vscale x 2 x i8>, 3) @llvm.riscv.vlseg3.mask.triscv.vector.tuple_nxv2i8_3t.nxv1i1(target("riscv.vector.tuple", <vscale x 2 x i8>, 3), ptr, <vscale x 1 x i1>, i64, i64, i64)
-
-define <vscale x 1 x i16> @test_vlseg3_nxv1i16_triscv.vector.tuple_nxv2i8_3t(ptr %base, i64 %vl) {
+define target("riscv.vector.tuple", <vscale x 2 x i8>, 3) @test_vlseg3_nxv1i16_triscv.vector.tuple_nxv2i8_3t(ptr %base, i64 %vl) {
; CHECK-LABEL: test_vlseg3_nxv1i16_triscv.vector.tuple_nxv2i8_3t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma
-; CHECK-NEXT: vlseg3e16.v v7, (a0)
+; CHECK-NEXT: vlseg3e16.v v8, (a0)
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 2 x i8>, 3) @llvm.riscv.vlseg3.triscv.vector.tuple_nxv2i8_3t(target("riscv.vector.tuple", <vscale x 2 x i8>, 3) undef, ptr %base, i64 %vl, i64 4)
- %1 = call <vscale x 1 x i16> @llvm.riscv.tuple.extract.nxv1i16.triscv.vector.tuple_nxv2i8_3t(target("riscv.vector.tuple", <vscale x 2 x i8>, 3) %0, i32 1)
- ret <vscale x 1 x i16> %1
+ ret target("riscv.vector.tuple", <vscale x 2 x i8>, 3) %0
}
-
-define <vscale x 1 x i16> @test_vlseg3_mask_nxv1i16_triscv.vector.tuple_nxv2i8_3t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) {
+define target("riscv.vector.tuple", <vscale x 2 x i8>, 3) @test_vlseg3_mask_nxv1i16_triscv.vector.tuple_nxv2i8_3t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) {
; CHECK-LABEL: test_vlseg3_mask_nxv1i16_triscv.vector.tuple_nxv2i8_3t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma
-; CHECK-NEXT: vlseg3e16.v v7, (a0), v0.t
+; CHECK-NEXT: vlseg3e16.v v8, (a0), v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 2 x i8>, 3) @llvm.riscv.vlseg3.mask.triscv.vector.tuple_nxv2i8_3t.nxv1i1(target("riscv.vector.tuple", <vscale x 2 x i8>, 3) undef, ptr %base, <vscale x 1 x i1> %mask, i64 %vl, i64 1, i64 4)
- %1 = call <vscale x 1 x i16> @llvm.riscv.tuple.extract.nxv1i16.triscv.vector.tuple_nxv2i8_3t(target("riscv.vector.tuple", <vscale x 2 x i8>, 3) %0, i32 1)
- ret <vscale x 1 x i16> %1
+ ret target("riscv.vector.tuple", <vscale x 2 x i8>, 3) %0
}
-
-declare target("riscv.vector.tuple", <vscale x 4 x i8>, 3) @llvm.riscv.vlseg3.mask.triscv.vector.tuple_nxv4i8_3t.nxv2i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 3), ptr, <vscale x 2 x i1>, i64, i64, i64)
-
-define <vscale x 2 x i16> @test_vlseg3_nxv2i16_triscv.vector.tuple_nxv4i8_3t(ptr %base, i64 %vl) {
+define target("riscv.vector.tuple", <vscale x 4 x i8>, 3) @test_vlseg3_nxv2i16_triscv.vector.tuple_nxv4i8_3t(ptr %base, i64 %vl) {
; CHECK-LABEL: test_vlseg3_nxv2i16_triscv.vector.tuple_nxv4i8_3t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma
-; CHECK-NEXT: vlseg3e16.v v7, (a0)
+; CHECK-NEXT: vlseg3e16.v v8, (a0)
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 3) @llvm.riscv.vlseg3.triscv.vector.tuple_nxv4i8_3t(target("riscv.vector.tuple", <vscale x 4 x i8>, 3) undef, ptr %base, i64 %vl, i64 4)
- %1 = call <vscale x 2 x i16> @llvm.riscv.tuple.extract.nxv2i16.triscv.vector.tuple_nxv4i8_3t(target("riscv.vector.tuple", <vscale x 4 x i8>, 3) %0, i32 1)
- ret <vscale x 2 x i16> %1
+ ret target("riscv.vector.tuple", <vscale x 4 x i8>, 3) %0
}
-
-define <vscale x 2 x i16> @test_vlseg3_mask_nxv2i16_triscv.vector.tuple_nxv4i8_3t(ptr %base, i64 %vl, <vscale x 2 x i1> %mask) {
+define target("riscv.vector.tuple", <vscale x 4 x i8>, 3) @test_vlseg3_mask_nxv2i16_triscv.vector.tuple_nxv4i8_3t(ptr %base, i64 %vl, <vscale x 2 x i1> %mask) {
; CHECK-LABEL: test_vlseg3_mask_nxv2i16_triscv.vector.tuple_nxv4i8_3t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma
-; CHECK-NEXT: vlseg3e16.v v7, (a0), v0.t
+; CHECK-NEXT: vlseg3e16.v v8, (a0), v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 3) @llvm.riscv.vlseg3.mask.triscv.vector.tuple_nxv4i8_3t.nxv2i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 3) undef, ptr %base, <vscale x 2 x i1> %mask, i64 %vl, i64 1, i64 4)
- %1 = call <vscale x 2 x i16> @llvm.riscv.tuple.extract.nxv2i16.triscv.vector.tuple_nxv4i8_3t(target("riscv.vector.tuple", <vscale x 4 x i8>, 3) %0, i32 1)
- ret <vscale x 2 x i16> %1
+ ret target("riscv.vector.tuple", <vscale x 4 x i8>, 3) %0
}
-
-declare target("riscv.vector.tuple", <vscale x 8 x i8>, 3) @llvm.riscv.vlseg3.mask.triscv.vector.tuple_nxv8i8_3t.nxv4i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 3), ptr, <vscale x 4 x i1>, i64, i64, i64)
-
-define <vscale x 4 x i16> @test_vlseg3_nxv4i16_triscv.vector.tuple_nxv8i8_3t(ptr %base, i64 %vl) {
+define target("riscv.vector.tuple", <vscale x 8 x i8>, 3) @test_vlseg3_nxv4i16_triscv.vector.tuple_nxv8i8_3t(ptr %base, i64 %vl) {
; CHECK-LABEL: test_vlseg3_nxv4i16_triscv.vector.tuple_nxv8i8_3t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma
-; CHECK-NEXT: vlseg3e16.v v7, (a0)
+; CHECK-NEXT: vlseg3e16.v v8, (a0)
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 3) @llvm.riscv.vlseg3.triscv.vector.tuple_nxv8i8_3t(target("riscv.vector.tuple", <vscale x 8 x i8>, 3) undef, ptr %base, i64 %vl, i64 4)
- %1 = call <vscale x 4 x i16> @llvm.riscv.tuple.extract.nxv4i16.triscv.vector.tuple_nxv8i8_3t(target("riscv.vector.tuple", <vscale x 8 x i8>, 3) %0, i32 1)
- ret <vscale x 4 x i16> %1
+ ret target("riscv.vector.tuple", <vscale x 8 x i8>, 3) %0
}
-
-define <vscale x 4 x i16> @test_vlseg3_mask_nxv4i16_triscv.vector.tuple_nxv8i8_3t(ptr %base, i64 %vl, <vscale x 4 x i1> %mask) {
+define target("riscv.vector.tuple", <vscale x 8 x i8>, 3) @test_vlseg3_mask_nxv4i16_triscv.vector.tuple_nxv8i8_3t(ptr %base, i64 %vl, <vscale x 4 x i1> %mask) {
; CHECK-LABEL: test_vlseg3_mask_nxv4i16_triscv.vector.tuple_nxv8i8_3t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma
-; CHECK-NEXT: vlseg3e16.v v7, (a0), v0.t
+; CHECK-NEXT: vlseg3e16.v v8, (a0), v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 3) @llvm.riscv.vlseg3.mask.triscv.vector.tuple_nxv8i8_3t.nxv4i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 3) undef, ptr %base, <vscale x 4 x i1> %mask, i64 %vl, i64 1, i64 4)
- %1 = call <vscale x 4 x i16> @llvm.riscv.tuple.extract.nxv4i16.triscv.vector.tuple_nxv8i8_3t(target("riscv.vector.tuple", <vscale x 8 x i8>, 3) %0, i32 1)
- ret <vscale x 4 x i16> %1
+ ret target("riscv.vector.tuple", <vscale x 8 x i8>, 3) %0
}
-
-declare target("riscv.vector.tuple", <vscale x 16 x i8>, 3) @llvm.riscv.vlseg3.mask.triscv.vector.tuple_nxv16i8_3t.nxv8i1(target("riscv.vector.tuple", <vscale x 16 x i8>, 3), ptr, <vscale x 8 x i1>, i64, i64, i64)
-
-define <vscale x 8 x i16> @test_vlseg3_nxv8i16_triscv.vector.tuple_nxv16i8_3t(ptr %base, i64 %vl) {
+define target("riscv.vector.tuple", <vscale x 16 x i8>, 3) @test_vlseg3_nxv8i16_triscv.vector.tuple_nxv16i8_3t(ptr %base, i64 %vl) {
; CHECK-LABEL: test_vlseg3_nxv8i16_triscv.vector.tuple_nxv16i8_3t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, ma
-; CHECK-NEXT: vlseg3e16.v v6, (a0)
+; CHECK-NEXT: vlseg3e16.v v8, (a0)
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 3) @llvm.riscv.vlseg3.triscv.vector.tuple_nxv16i8_3t(target("riscv.vector.tuple", <vscale x 16 x i8>, 3) undef, ptr %base, i64 %vl, i64 4)
- %1 = call <vscale x 8 x i16> @llvm.riscv.tuple.extract.nxv8i16.triscv.vector.tuple_nxv16i8_3t(target("riscv.vector.tuple", <vscale x 16 x i8>, 3) %0, i32 1)
- ret <vscale x 8 x i16> %1
+ ret target("riscv.vector.tuple", <vscale x 16 x i8>, 3) %0
}
-
-define <vscale x 8 x i16> @test_vlseg3_mask_nxv8i16_triscv.vector.tuple_nxv16i8_3t(ptr %base, i64 %vl, <vscale x 8 x i1> %mask) {
+define target("riscv.vector.tuple", <vscale x 16 x i8>, 3) @test_vlseg3_mask_nxv8i16_triscv.vector.tuple_nxv16i8_3t(ptr %base, i64 %vl, <vscale x 8 x i1> %mask) {
; CHECK-LABEL: test_vlseg3_mask_nxv8i16_triscv.vector.tuple_nxv16i8_3t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, ma
-; CHECK-NEXT: vlseg3e16.v v6, (a0), v0.t
+; CHECK-NEXT: vlseg3e16.v v8, (a0), v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 3) @llvm.riscv.vlseg3.mask.triscv.vector.tuple_nxv16i8_3t.nxv8i1(target("riscv.vector.tuple", <vscale x 16 x i8>, 3) undef, ptr %base, <vscale x 8 x i1> %mask, i64 %vl, i64 1, i64 4)
- %1 = call <vscale x 8 x i16> @llvm.riscv.tuple.extract.nxv8i16.triscv.vector.tuple_nxv16i8_3t(target("riscv.vector.tuple", <vscale x 16 x i8>, 3) %0, i32 1)
- ret <vscale x 8 x i16> %1
+ ret target("riscv.vector.tuple", <vscale x 16 x i8>, 3) %0
}
-
-declare target("riscv.vector.tuple", <vscale x 2 x i8>, 4) @llvm.riscv.vlseg4.mask.triscv.vector.tuple_nxv2i8_4t.nxv1i1(target("riscv.vector.tuple", <vscale x 2 x i8>, 4), ptr, <vscale x 1 x i1>, i64, i64, i64)
-
-define <vscale x 1 x i16> @test_vlseg4_nxv1i16_triscv.vector.tuple_nxv2i8_4t(ptr %base, i64 %vl) {
+define target("riscv.vector.tuple", <vscale x 2 x i8>, 4) @test_vlseg4_nxv1i16_triscv.vector.tuple_nxv2i8_4t(ptr %base, i64 %vl) {
; CHECK-LABEL: test_vlseg4_nxv1i16_triscv.vector.tuple_nxv2i8_4t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma
-; CHECK-NEXT: vlseg4e16.v v7, (a0)
+; CHECK-NEXT: vlseg4e16.v v8, (a0)
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 2 x i8>, 4) @llvm.riscv.vlseg4.triscv.vector.tuple_nxv2i8_4t(target("riscv.vector.tuple", <vscale x 2 x i8>, 4) undef, ptr %base, i64 %vl, i64 4)
- %1 = call <vscale x 1 x i16> @llvm.riscv.tuple.extract.nxv1i16.triscv.vector.tuple_nxv2i8_4t(target("riscv.vector.tuple", <vscale x 2 x i8>, 4) %0, i32 1)
- ret <vscale x 1 x i16> %1
+ ret target("riscv.vector.tuple", <vscale x 2 x i8>, 4) %0
}
-
-define <vscale x 1 x i16> @test_vlseg4_mask_nxv1i16_triscv.vector.tuple_nxv2i8_4t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) {
+define target("riscv.vector.tuple", <vscale x 2 x i8>, 4) @test_vlseg4_mask_nxv1i16_triscv.vector.tuple_nxv2i8_4t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) {
; CHECK-LABEL: test_vlseg4_mask_nxv1i16_triscv.vector.tuple_nxv2i8_4t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma
-; CHECK-NEXT: vlseg4e16.v v7, (a0), v0.t
+; CHECK-NEXT: vlseg4e16.v v8, (a0), v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 2 x i8>, 4) @llvm.riscv.vlseg4.mask.triscv.vector.tuple_nxv2i8_4t.nxv1i1(target("riscv.vector.tuple", <vscale x 2 x i8>, 4) undef, ptr %base, <vscale x 1 x i1> %mask, i64 %vl, i64 1, i64 4)
- %1 = call <vscale x 1 x i16> @llvm.riscv.tuple.extract.nxv1i16.triscv.vector.tuple_nxv2i8_4t(target("riscv.vector.tuple", <vscale x 2 x i8>, 4) %0, i32 1)
- ret <vscale x 1 x i16> %1
+ ret target("riscv.vector.tuple", <vscale x 2 x i8>, 4) %0
}
-
-declare target("riscv.vector.tuple", <vscale x 4 x i8>, 4) @llvm.riscv.vlseg4.mask.triscv.vector.tuple_nxv4i8_4t.nxv2i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 4), ptr, <vscale x 2 x i1>, i64, i64, i64)
-
-define <vscale x 2 x i16> @test_vlseg4_nxv2i16_triscv.vector.tuple_nxv4i8_4t(ptr %base, i64 %vl) {
+define target("riscv.vector.tuple", <vscale x 4 x i8>, 4) @test_vlseg4_nxv2i16_triscv.vector.tuple_nxv4i8_4t(ptr %base, i64 %vl) {
; CHECK-LABEL: test_vlseg4_nxv2i16_triscv.vector.tuple_nxv4i8_4t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma
-; CHECK-NEXT: vlseg4e16.v v7, (a0)
+; CHECK-NEXT: vlseg4e16.v v8, (a0)
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 4) @llvm.riscv.vlseg4.triscv.vector.tuple_nxv4i8_4t(target("riscv.vector.tuple", <vscale x 4 x i8>, 4) undef, ptr %base, i64 %vl, i64 4)
- %1 = call <vscale x 2 x i16> @llvm.riscv.tuple.extract.nxv2i16.triscv.vector.tuple_nxv4i8_4t(target("riscv.vector.tuple", <vscale x 4 x i8>, 4) %0, i32 1)
- ret <vscale x 2 x i16> %1
+ ret target("riscv.vector.tuple", <vscale x 4 x i8>, 4) %0
}
-
-define <vscale x 2 x i16> @test_vlseg4_mask_nxv2i16_triscv.vector.tuple_nxv4i8_4t(ptr %base, i64 %vl, <vscale x 2 x i1> %mask) {
+define target("riscv.vector.tuple", <vscale x 4 x i8>, 4) @test_vlseg4_mask_nxv2i16_triscv.vector.tuple_nxv4i8_4t(ptr %base, i64 %vl, <vscale x 2 x i1> %mask) {
; CHECK-LABEL: test_vlseg4_mask_nxv2i16_triscv.vector.tuple_nxv4i8_4t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma
-; CHECK-NEXT: vlseg4e16.v v7, (a0), v0.t
+; CHECK-NEXT: vlseg4e16.v v8, (a0), v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 4) @llvm.riscv.vlseg4.mask.triscv.vector.tuple_nxv4i8_4t.nxv2i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 4) undef, ptr %base, <vscale x 2 x i1> %mask, i64 %vl, i64 1, i64 4)
- %1 = call <vscale x 2 x i16> @llvm.riscv.tuple.extract.nxv2i16.triscv.vector.tuple_nxv4i8_4t(target("riscv.vector.tuple", <vscale x 4 x i8>, 4) %0, i32 1)
- ret <vscale x 2 x i16> %1
+ ret target("riscv.vector.tuple", <vscale x 4 x i8>, 4) %0
}
-
-declare target("riscv.vector.tuple", <vscale x 8 x i8>, 4) @llvm.riscv.vlseg4.mask.triscv.vector.tuple_nxv8i8_4t.nxv4i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 4), ptr, <vscale x 4 x i1>, i64, i64, i64)
-
-define <vscale x 4 x i16> @test_vlseg4_nxv4i16_triscv.vector.tuple_nxv8i8_4t(ptr %base, i64 %vl) {
+define target("riscv.vector.tuple", <vscale x 8 x i8>, 4) @test_vlseg4_nxv4i16_triscv.vector.tuple_nxv8i8_4t(ptr %base, i64 %vl) {
; CHECK-LABEL: test_vlseg4_nxv4i16_triscv.vector.tuple_nxv8i8_4t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma
-; CHECK-NEXT: vlseg4e16.v v7, (a0)
+; CHECK-NEXT: vlseg4e16.v v8, (a0)
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 4) @llvm.riscv.vlseg4.triscv.vector.tuple_nxv8i8_4t(target("riscv.vector.tuple", <vscale x 8 x i8>, 4) undef, ptr %base, i64 %vl, i64 4)
- %1 = call <vscale x 4 x i16> @llvm.riscv.tuple.extract.nxv4i16.triscv.vector.tuple_nxv8i8_4t(target("riscv.vector.tuple", <vscale x 8 x i8>, 4) %0, i32 1)
- ret <vscale x 4 x i16> %1
+ ret target("riscv.vector.tuple", <vscale x 8 x i8>, 4) %0
}
-
-define <vscale x 4 x i16> @test_vlseg4_mask_nxv4i16_triscv.vector.tuple_nxv8i8_4t(ptr %base, i64 %vl, <vscale x 4 x i1> %mask) {
+define target("riscv.vector.tuple", <vscale x 8 x i8>, 4) @test_vlseg4_mask_nxv4i16_triscv.vector.tuple_nxv8i8_4t(ptr %base, i64 %vl, <vscale x 4 x i1> %mask) {
; CHECK-LABEL: test_vlseg4_mask_nxv4i16_triscv.vector.tuple_nxv8i8_4t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma
-; CHECK-NEXT: vlseg4e16.v v7, (a0), v0.t
+; CHECK-NEXT: vlseg4e16.v v8, (a0), v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 4) @llvm.riscv.vlseg4.mask.triscv.vector.tuple_nxv8i8_4t.nxv4i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 4) undef, ptr %base, <vscale x 4 x i1> %mask, i64 %vl, i64 1, i64 4)
- %1 = call <vscale x 4 x i16> @llvm.riscv.tuple.extract.nxv4i16.triscv.vector.tuple_nxv8i8_4t(target("riscv.vector.tuple", <vscale x 8 x i8>, 4) %0, i32 1)
- ret <vscale x 4 x i16> %1
+ ret target("riscv.vector.tuple", <vscale x 8 x i8>, 4) %0
}
-
-declare target("riscv.vector.tuple", <vscale x 16 x i8>, 4) @llvm.riscv.vlseg4.mask.triscv.vector.tuple_nxv16i8_4t.nxv8i1(target("riscv.vector.tuple", <vscale x 16 x i8>, 4), ptr, <vscale x 8 x i1>, i64, i64, i64)
-
-define <vscale x 8 x i16> @test_vlseg4_nxv8i16_triscv.vector.tuple_nxv16i8_4t(ptr %base, i64 %vl) {
+define target("riscv.vector.tuple", <vscale x 16 x i8>, 4) @test_vlseg4_nxv8i16_triscv.vector.tuple_nxv16i8_4t(ptr %base, i64 %vl) {
; CHECK-LABEL: test_vlseg4_nxv8i16_triscv.vector.tuple_nxv16i8_4t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, ma
-; CHECK-NEXT: vlseg4e16.v v6, (a0)
+; CHECK-NEXT: vlseg4e16.v v8, (a0)
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 4) @llvm.riscv.vlseg4.triscv.vector.tuple_nxv16i8_4t(target("riscv.vector.tuple", <vscale x 16 x i8>, 4) undef, ptr %base, i64 %vl, i64 4)
- %1 = call <vscale x 8 x i16> @llvm.riscv.tuple.extract.nxv8i16.triscv.vector.tuple_nxv16i8_4t(target("riscv.vector.tuple", <vscale x 16 x i8>, 4) %0, i32 1)
- ret <vscale x 8 x i16> %1
+ ret target("riscv.vector.tuple", <vscale x 16 x i8>, 4) %0
}
-
-define <vscale x 8 x i16> @test_vlseg4_mask_nxv8i16_triscv.vector.tuple_nxv16i8_4t(ptr %base, i64 %vl, <vscale x 8 x i1> %mask) {
+define target("riscv.vector.tuple", <vscale x 16 x i8>, 4) @test_vlseg4_mask_nxv8i16_triscv.vector.tuple_nxv16i8_4t(ptr %base, i64 %vl, <vscale x 8 x i1> %mask) {
; CHECK-LABEL: test_vlseg4_mask_nxv8i16_triscv.vector.tuple_nxv16i8_4t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, ma
-; CHECK-NEXT: vlseg4e16.v v6, (a0), v0.t
+; CHECK-NEXT: vlseg4e16.v v8, (a0), v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 4) @llvm.riscv.vlseg4.mask.triscv.vector.tuple_nxv16i8_4t.nxv8i1(target("riscv.vector.tuple", <vscale x 16 x i8>, 4) undef, ptr %base, <vscale x 8 x i1> %mask, i64 %vl, i64 1, i64 4)
- %1 = call <vscale x 8 x i16> @llvm.riscv.tuple.extract.nxv8i16.triscv.vector.tuple_nxv16i8_4t(target("riscv.vector.tuple", <vscale x 16 x i8>, 4) %0, i32 1)
- ret <vscale x 8 x i16> %1
+ ret target("riscv.vector.tuple", <vscale x 16 x i8>, 4) %0
}
-
-declare target("riscv.vector.tuple", <vscale x 2 x i8>, 5) @llvm.riscv.vlseg5.mask.triscv.vector.tuple_nxv2i8_5t.nxv1i1(target("riscv.vector.tuple", <vscale x 2 x i8>, 5), ptr, <vscale x 1 x i1>, i64, i64, i64)
-
-define <vscale x 1 x i16> @test_vlseg5_nxv1i16_triscv.vector.tuple_nxv2i8_5t(ptr %base, i64 %vl) {
+define target("riscv.vector.tuple", <vscale x 2 x i8>, 5) @test_vlseg5_nxv1i16_triscv.vector.tuple_nxv2i8_5t(ptr %base, i64 %vl) {
; CHECK-LABEL: test_vlseg5_nxv1i16_triscv.vector.tuple_nxv2i8_5t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma
-; CHECK-NEXT: vlseg5e16.v v7, (a0)
+; CHECK-NEXT: vlseg5e16.v v8, (a0)
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 2 x i8>, 5) @llvm.riscv.vlseg5.triscv.vector.tuple_nxv2i8_5t(target("riscv.vector.tuple", <vscale x 2 x i8>, 5) undef, ptr %base, i64 %vl, i64 4)
- %1 = call <vscale x 1 x i16> @llvm.riscv.tuple.extract.nxv1i16.triscv.vector.tuple_nxv2i8_5t(target("riscv.vector.tuple", <vscale x 2 x i8>, 5) %0, i32 1)
- ret <vscale x 1 x i16> %1
+ ret target("riscv.vector.tuple", <vscale x 2 x i8>, 5) %0
}
-
-define <vscale x 1 x i16> @test_vlseg5_mask_nxv1i16_triscv.vector.tuple_nxv2i8_5t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) {
+define target("riscv.vector.tuple", <vscale x 2 x i8>, 5) @test_vlseg5_mask_nxv1i16_triscv.vector.tuple_nxv2i8_5t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) {
; CHECK-LABEL: test_vlseg5_mask_nxv1i16_triscv.vector.tuple_nxv2i8_5t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma
-; CHECK-NEXT: vlseg5e16.v v7, (a0), v0.t
+; CHECK-NEXT: vlseg5e16.v v8, (a0), v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 2 x i8>, 5) @llvm.riscv.vlseg5.mask.triscv.vector.tuple_nxv2i8_5t.nxv1i1(target("riscv.vector.tuple", <vscale x 2 x i8>, 5) undef, ptr %base, <vscale x 1 x i1> %mask, i64 %vl, i64 1, i64 4)
- %1 = call <vscale x 1 x i16> @llvm.riscv.tuple.extract.nxv1i16.triscv.vector.tuple_nxv2i8_5t(target("riscv.vector.tuple", <vscale x 2 x i8>, 5) %0, i32 1)
- ret <vscale x 1 x i16> %1
+ ret target("riscv.vector.tuple", <vscale x 2 x i8>, 5) %0
}
-
-declare target("riscv.vector.tuple", <vscale x 4 x i8>, 5) @llvm.riscv.vlseg5.mask.triscv.vector.tuple_nxv4i8_5t.nxv2i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 5), ptr, <vscale x 2 x i1>, i64, i64, i64)
-
-define <vscale x 2 x i16> @test_vlseg5_nxv2i16_triscv.vector.tuple_nxv4i8_5t(ptr %base, i64 %vl) {
+define target("riscv.vector.tuple", <vscale x 4 x i8>, 5) @test_vlseg5_nxv2i16_triscv.vector.tuple_nxv4i8_5t(ptr %base, i64 %vl) {
; CHECK-LABEL: test_vlseg5_nxv2i16_triscv.vector.tuple_nxv4i8_5t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma
-; CHECK-NEXT: vlseg5e16.v v7, (a0)
+; CHECK-NEXT: vlseg5e16.v v8, (a0)
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 5) @llvm.riscv.vlseg5.triscv.vector.tuple_nxv4i8_5t(target("riscv.vector.tuple", <vscale x 4 x i8>, 5) undef, ptr %base, i64 %vl, i64 4)
- %1 = call <vscale x 2 x i16> @llvm.riscv.tuple.extract.nxv2i16.triscv.vector.tuple_nxv4i8_5t(target("riscv.vector.tuple", <vscale x 4 x i8>, 5) %0, i32 1)
- ret <vscale x 2 x i16> %1
+ ret target("riscv.vector.tuple", <vscale x 4 x i8>, 5) %0
}
-
-define <vscale x 2 x i16> @test_vlseg5_mask_nxv2i16_triscv.vector.tuple_nxv4i8_5t(ptr %base, i64 %vl, <vscale x 2 x i1> %mask) {
+define target("riscv.vector.tuple", <vscale x 4 x i8>, 5) @test_vlseg5_mask_nxv2i16_triscv.vector.tuple_nxv4i8_5t(ptr %base, i64 %vl, <vscale x 2 x i1> %mask) {
; CHECK-LABEL: test_vlseg5_mask_nxv2i16_triscv.vector.tuple_nxv4i8_5t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma
-; CHECK-NEXT: vlseg5e16.v v7, (a0), v0.t
+; CHECK-NEXT: vlseg5e16.v v8, (a0), v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 5) @llvm.riscv.vlseg5.mask.triscv.vector.tuple_nxv4i8_5t.nxv2i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 5) undef, ptr %base, <vscale x 2 x i1> %mask, i64 %vl, i64 1, i64 4)
- %1 = call <vscale x 2 x i16> @llvm.riscv.tuple.extract.nxv2i16.triscv.vector.tuple_nxv4i8_5t(target("riscv.vector.tuple", <vscale x 4 x i8>, 5) %0, i32 1)
- ret <vscale x 2 x i16> %1
+ ret target("riscv.vector.tuple", <vscale x 4 x i8>, 5) %0
}
-
-declare target("riscv.vector.tuple", <vscale x 8 x i8>, 5) @llvm.riscv.vlseg5.mask.triscv.vector.tuple_nxv8i8_5t.nxv4i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 5), ptr, <vscale x 4 x i1>, i64, i64, i64)
-
-define <vscale x 4 x i16> @test_vlseg5_nxv4i16_triscv.vector.tuple_nxv8i8_5t(ptr %base, i64 %vl) {
+define target("riscv.vector.tuple", <vscale x 8 x i8>, 5) @test_vlseg5_nxv4i16_triscv.vector.tuple_nxv8i8_5t(ptr %base, i64 %vl) {
; CHECK-LABEL: test_vlseg5_nxv4i16_triscv.vector.tuple_nxv8i8_5t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma
-; CHECK-NEXT: vlseg5e16.v v7, (a0)
+; CHECK-NEXT: vlseg5e16.v v8, (a0)
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 5) @llvm.riscv.vlseg5.triscv.vector.tuple_nxv8i8_5t(target("riscv.vector.tuple", <vscale x 8 x i8>, 5) undef, ptr %base, i64 %vl, i64 4)
- %1 = call <vscale x 4 x i16> @llvm.riscv.tuple.extract.nxv4i16.triscv.vector.tuple_nxv8i8_5t(target("riscv.vector.tuple", <vscale x 8 x i8>, 5) %0, i32 1)
- ret <vscale x 4 x i16> %1
+ ret target("riscv.vector.tuple", <vscale x 8 x i8>, 5) %0
}
-
-define <vscale x 4 x i16> @test_vlseg5_mask_nxv4i16_triscv.vector.tuple_nxv8i8_5t(ptr %base, i64 %vl, <vscale x 4 x i1> %mask) {
+define target("riscv.vector.tuple", <vscale x 8 x i8>, 5) @test_vlseg5_mask_nxv4i16_triscv.vector.tuple_nxv8i8_5t(ptr %base, i64 %vl, <vscale x 4 x i1> %mask) {
; CHECK-LABEL: test_vlseg5_mask_nxv4i16_triscv.vector.tuple_nxv8i8_5t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma
-; CHECK-NEXT: vlseg5e16.v v7, (a0), v0.t
+; CHECK-NEXT: vlseg5e16.v v8, (a0), v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 5) @llvm.riscv.vlseg5.mask.triscv.vector.tuple_nxv8i8_5t.nxv4i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 5) undef, ptr %base, <vscale x 4 x i1> %mask, i64 %vl, i64 1, i64 4)
- %1 = call <vscale x 4 x i16> @llvm.riscv.tuple.extract.nxv4i16.triscv.vector.tuple_nxv8i8_5t(target("riscv.vector.tuple", <vscale x 8 x i8>, 5) %0, i32 1)
- ret <vscale x 4 x i16> %1
+ ret target("riscv.vector.tuple", <vscale x 8 x i8>, 5) %0
}
-
-declare target("riscv.vector.tuple", <vscale x 2 x i8>, 6) @llvm.riscv.vlseg6.mask.triscv.vector.tuple_nxv2i8_6t.nxv1i1(target("riscv.vector.tuple", <vscale x 2 x i8>, 6), ptr, <vscale x 1 x i1>, i64, i64, i64)
-
-define <vscale x 1 x i16> @test_vlseg6_nxv1i16_triscv.vector.tuple_nxv2i8_6t(ptr %base, i64 %vl) {
+define target("riscv.vector.tuple", <vscale x 2 x i8>, 6) @test_vlseg6_nxv1i16_triscv.vector.tuple_nxv2i8_6t(ptr %base, i64 %vl) {
; CHECK-LABEL: test_vlseg6_nxv1i16_triscv.vector.tuple_nxv2i8_6t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma
-; CHECK-NEXT: vlseg6e16.v v7, (a0)
+; CHECK-NEXT: vlseg6e16.v v8, (a0)
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 2 x i8>, 6) @llvm.riscv.vlseg6.triscv.vector.tuple_nxv2i8_6t(target("riscv.vector.tuple", <vscale x 2 x i8>, 6) undef, ptr %base, i64 %vl, i64 4)
- %1 = call <vscale x 1 x i16> @llvm.riscv.tuple.extract.nxv1i16.triscv.vector.tuple_nxv2i8_6t(target("riscv.vector.tuple", <vscale x 2 x i8>, 6) %0, i32 1)
- ret <vscale x 1 x i16> %1
+ ret target("riscv.vector.tuple", <vscale x 2 x i8>, 6) %0
}
-
-define <vscale x 1 x i16> @test_vlseg6_mask_nxv1i16_triscv.vector.tuple_nxv2i8_6t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) {
+define target("riscv.vector.tuple", <vscale x 2 x i8>, 6) @test_vlseg6_mask_nxv1i16_triscv.vector.tuple_nxv2i8_6t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) {
; CHECK-LABEL: test_vlseg6_mask_nxv1i16_triscv.vector.tuple_nxv2i8_6t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma
-; CHECK-NEXT: vlseg6e16.v v7, (a0), v0.t
+; CHECK-NEXT: vlseg6e16.v v8, (a0), v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 2 x i8>, 6) @llvm.riscv.vlseg6.mask.triscv.vector.tuple_nxv2i8_6t.nxv1i1(target("riscv.vector.tuple", <vscale x 2 x i8>, 6) undef, ptr %base, <vscale x 1 x i1> %mask, i64 %vl, i64 1, i64 4)
- %1 = call <vscale x 1 x i16> @llvm.riscv.tuple.extract.nxv1i16.triscv.vector.tuple_nxv2i8_6t(target("riscv.vector.tuple", <vscale x 2 x i8>, 6) %0, i32 1)
- ret <vscale x 1 x i16> %1
+ ret target("riscv.vector.tuple", <vscale x 2 x i8>, 6) %0
}
-
-declare target("riscv.vector.tuple", <vscale x 4 x i8>, 6) @llvm.riscv.vlseg6.mask.triscv.vector.tuple_nxv4i8_6t.nxv2i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 6), ptr, <vscale x 2 x i1>, i64, i64, i64)
-
-define <vscale x 2 x i16> @test_vlseg6_nxv2i16_triscv.vector.tuple_nxv4i8_6t(ptr %base, i64 %vl) {
+define target("riscv.vector.tuple", <vscale x 4 x i8>, 6) @test_vlseg6_nxv2i16_triscv.vector.tuple_nxv4i8_6t(ptr %base, i64 %vl) {
; CHECK-LABEL: test_vlseg6_nxv2i16_triscv.vector.tuple_nxv4i8_6t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma
-; CHECK-NEXT: vlseg6e16.v v7, (a0)
+; CHECK-NEXT: vlseg6e16.v v8, (a0)
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 6) @llvm.riscv.vlseg6.triscv.vector.tuple_nxv4i8_6t(target("riscv.vector.tuple", <vscale x 4 x i8>, 6) undef, ptr %base, i64 %vl, i64 4)
- %1 = call <vscale x 2 x i16> @llvm.riscv.tuple.extract.nxv2i16.triscv.vector.tuple_nxv4i8_6t(target("riscv.vector.tuple", <vscale x 4 x i8>, 6) %0, i32 1)
- ret <vscale x 2 x i16> %1
+ ret target("riscv.vector.tuple", <vscale x 4 x i8>, 6) %0
}
-
-define <vscale x 2 x i16> @test_vlseg6_mask_nxv2i16_triscv.vector.tuple_nxv4i8_6t(ptr %base, i64 %vl, <vscale x 2 x i1> %mask) {
+define target("riscv.vector.tuple", <vscale x 4 x i8>, 6) @test_vlseg6_mask_nxv2i16_triscv.vector.tuple_nxv4i8_6t(ptr %base, i64 %vl, <vscale x 2 x i1> %mask) {
; CHECK-LABEL: test_vlseg6_mask_nxv2i16_triscv.vector.tuple_nxv4i8_6t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma
-; CHECK-NEXT: vlseg6e16.v v7, (a0), v0.t
+; CHECK-NEXT: vlseg6e16.v v8, (a0), v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 6) @llvm.riscv.vlseg6.mask.triscv.vector.tuple_nxv4i8_6t.nxv2i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 6) undef, ptr %base, <vscale x 2 x i1> %mask, i64 %vl, i64 1, i64 4)
- %1 = call <vscale x 2 x i16> @llvm.riscv.tuple.extract.nxv2i16.triscv.vector.tuple_nxv4i8_6t(target("riscv.vector.tuple", <vscale x 4 x i8>, 6) %0, i32 1)
- ret <vscale x 2 x i16> %1
+ ret target("riscv.vector.tuple", <vscale x 4 x i8>, 6) %0
}
-
-declare target("riscv.vector.tuple", <vscale x 8 x i8>, 6) @llvm.riscv.vlseg6.mask.triscv.vector.tuple_nxv8i8_6t.nxv4i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 6), ptr, <vscale x 4 x i1>, i64, i64, i64)
-
-define <vscale x 4 x i16> @test_vlseg6_nxv4i16_triscv.vector.tuple_nxv8i8_6t(ptr %base, i64 %vl) {
+define target("riscv.vector.tuple", <vscale x 8 x i8>, 6) @test_vlseg6_nxv4i16_triscv.vector.tuple_nxv8i8_6t(ptr %base, i64 %vl) {
; CHECK-LABEL: test_vlseg6_nxv4i16_triscv.vector.tuple_nxv8i8_6t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma
-; CHECK-NEXT: vlseg6e16.v v7, (a0)
+; CHECK-NEXT: vlseg6e16.v v8, (a0)
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 6) @llvm.riscv.vlseg6.triscv.vector.tuple_nxv8i8_6t(target("riscv.vector.tuple", <vscale x 8 x i8>, 6) undef, ptr %base, i64 %vl, i64 4)
- %1 = call <vscale x 4 x i16> @llvm.riscv.tuple.extract.nxv4i16.triscv.vector.tuple_nxv8i8_6t(target("riscv.vector.tuple", <vscale x 8 x i8>, 6) %0, i32 1)
- ret <vscale x 4 x i16> %1
+ ret target("riscv.vector.tuple", <vscale x 8 x i8>, 6) %0
}
-
-define <vscale x 4 x i16> @test_vlseg6_mask_nxv4i16_triscv.vector.tuple_nxv8i8_6t(ptr %base, i64 %vl, <vscale x 4 x i1> %mask) {
+define target("riscv.vector.tuple", <vscale x 8 x i8>, 6) @test_vlseg6_mask_nxv4i16_triscv.vector.tuple_nxv8i8_6t(ptr %base, i64 %vl, <vscale x 4 x i1> %mask) {
; CHECK-LABEL: test_vlseg6_mask_nxv4i16_triscv.vector.tuple_nxv8i8_6t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma
-; CHECK-NEXT: vlseg6e16.v v7, (a0), v0.t
+; CHECK-NEXT: vlseg6e16.v v8, (a0), v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 6) @llvm.riscv.vlseg6.mask.triscv.vector.tuple_nxv8i8_6t.nxv4i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 6) undef, ptr %base, <vscale x 4 x i1> %mask, i64 %vl, i64 1, i64 4)
- %1 = call <vscale x 4 x i16> @llvm.riscv.tuple.extract.nxv4i16.triscv.vector.tuple_nxv8i8_6t(target("riscv.vector.tuple", <vscale x 8 x i8>, 6) %0, i32 1)
- ret <vscale x 4 x i16> %1
+ ret target("riscv.vector.tuple", <vscale x 8 x i8>, 6) %0
}
-
-declare target("riscv.vector.tuple", <vscale x 2 x i8>, 7) @llvm.riscv.vlseg7.mask.triscv.vector.tuple_nxv2i8_7t.nxv1i1(target("riscv.vector.tuple", <vscale x 2 x i8>, 7), ptr, <vscale x 1 x i1>, i64, i64, i64)
-
-define <vscale x 1 x i16> @test_vlseg7_nxv1i16_triscv.vector.tuple_nxv2i8_7t(ptr %base, i64 %vl) {
+define target("riscv.vector.tuple", <vscale x 2 x i8>, 7) @test_vlseg7_nxv1i16_triscv.vector.tuple_nxv2i8_7t(ptr %base, i64 %vl) {
; CHECK-LABEL: test_vlseg7_nxv1i16_triscv.vector.tuple_nxv2i8_7t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma
-; CHECK-NEXT: vlseg7e16.v v7, (a0)
+; CHECK-NEXT: vlseg7e16.v v8, (a0)
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 2 x i8>, 7) @llvm.riscv.vlseg7.triscv.vector.tuple_nxv2i8_7t(target("riscv.vector.tuple", <vscale x 2 x i8>, 7) undef, ptr %base, i64 %vl, i64 4)
- %1 = call <vscale x 1 x i16> @llvm.riscv.tuple.extract.nxv1i16.triscv.vector.tuple_nxv2i8_7t(target("riscv.vector.tuple", <vscale x 2 x i8>, 7) %0, i32 1)
- ret <vscale x 1 x i16> %1
+ ret target("riscv.vector.tuple", <vscale x 2 x i8>, 7) %0
}
-
-define <vscale x 1 x i16> @test_vlseg7_mask_nxv1i16_triscv.vector.tuple_nxv2i8_7t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) {
+define target("riscv.vector.tuple", <vscale x 2 x i8>, 7) @test_vlseg7_mask_nxv1i16_triscv.vector.tuple_nxv2i8_7t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) {
; CHECK-LABEL: test_vlseg7_mask_nxv1i16_triscv.vector.tuple_nxv2i8_7t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma
-; CHECK-NEXT: vlseg7e16.v v7, (a0), v0.t
+; CHECK-NEXT: vlseg7e16.v v8, (a0), v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 2 x i8>, 7) @llvm.riscv.vlseg7.mask.triscv.vector.tuple_nxv2i8_7t.nxv1i1(target("riscv.vector.tuple", <vscale x 2 x i8>, 7) undef, ptr %base, <vscale x 1 x i1> %mask, i64 %vl, i64 1, i64 4)
- %1 = call <vscale x 1 x i16> @llvm.riscv.tuple.extract.nxv1i16.triscv.vector.tuple_nxv2i8_7t(target("riscv.vector.tuple", <vscale x 2 x i8>, 7) %0, i32 1)
- ret <vscale x 1 x i16> %1
+ ret target("riscv.vector.tuple", <vscale x 2 x i8>, 7) %0
}
-
-declare target("riscv.vector.tuple", <vscale x 4 x i8>, 7) @llvm.riscv.vlseg7.mask.triscv.vector.tuple_nxv4i8_7t.nxv2i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 7), ptr, <vscale x 2 x i1>, i64, i64, i64)
-
-define <vscale x 2 x i16> @test_vlseg7_nxv2i16_triscv.vector.tuple_nxv4i8_7t(ptr %base, i64 %vl) {
+define target("riscv.vector.tuple", <vscale x 4 x i8>, 7) @test_vlseg7_nxv2i16_triscv.vector.tuple_nxv4i8_7t(ptr %base, i64 %vl) {
; CHECK-LABEL: test_vlseg7_nxv2i16_triscv.vector.tuple_nxv4i8_7t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma
-; CHECK-NEXT: vlseg7e16.v v7, (a0)
+; CHECK-NEXT: vlseg7e16.v v8, (a0)
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 7) @llvm.riscv.vlseg7.triscv.vector.tuple_nxv4i8_7t(target("riscv.vector.tuple", <vscale x 4 x i8>, 7) undef, ptr %base, i64 %vl, i64 4)
- %1 = call <vscale x 2 x i16> @llvm.riscv.tuple.extract.nxv2i16.triscv.vector.tuple_nxv4i8_7t(target("riscv.vector.tuple", <vscale x 4 x i8>, 7) %0, i32 1)
- ret <vscale x 2 x i16> %1
+ ret target("riscv.vector.tuple", <vscale x 4 x i8>, 7) %0
}
-
-define <vscale x 2 x i16> @test_vlseg7_mask_nxv2i16_triscv.vector.tuple_nxv4i8_7t(ptr %base, i64 %vl, <vscale x 2 x i1> %mask) {
+define target("riscv.vector.tuple", <vscale x 4 x i8>, 7) @test_vlseg7_mask_nxv2i16_triscv.vector.tuple_nxv4i8_7t(ptr %base, i64 %vl, <vscale x 2 x i1> %mask) {
; CHECK-LABEL: test_vlseg7_mask_nxv2i16_triscv.vector.tuple_nxv4i8_7t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma
-; CHECK-NEXT: vlseg7e16.v v7, (a0), v0.t
+; CHECK-NEXT: vlseg7e16.v v8, (a0), v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 7) @llvm.riscv.vlseg7.mask.triscv.vector.tuple_nxv4i8_7t.nxv2i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 7) undef, ptr %base, <vscale x 2 x i1> %mask, i64 %vl, i64 1, i64 4)
- %1 = call <vscale x 2 x i16> @llvm.riscv.tuple.extract.nxv2i16.triscv.vector.tuple_nxv4i8_7t(target("riscv.vector.tuple", <vscale x 4 x i8>, 7) %0, i32 1)
- ret <vscale x 2 x i16> %1
+ ret target("riscv.vector.tuple", <vscale x 4 x i8>, 7) %0
}
-
-declare target("riscv.vector.tuple", <vscale x 8 x i8>, 7) @llvm.riscv.vlseg7.mask.triscv.vector.tuple_nxv8i8_7t.nxv4i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 7), ptr, <vscale x 4 x i1>, i64, i64, i64)
-
-define <vscale x 4 x i16> @test_vlseg7_nxv4i16_triscv.vector.tuple_nxv8i8_7t(ptr %base, i64 %vl) {
+define target("riscv.vector.tuple", <vscale x 8 x i8>, 7) @test_vlseg7_nxv4i16_triscv.vector.tuple_nxv8i8_7t(ptr %base, i64 %vl) {
; CHECK-LABEL: test_vlseg7_nxv4i16_triscv.vector.tuple_nxv8i8_7t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma
-; CHECK-NEXT: vlseg7e16.v v7, (a0)
+; CHECK-NEXT: vlseg7e16.v v8, (a0)
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 7) @llvm.riscv.vlseg7.triscv.vector.tuple_nxv8i8_7t(target("riscv.vector.tuple", <vscale x 8 x i8>, 7) undef, ptr %base, i64 %vl, i64 4)
- %1 = call <vscale x 4 x i16> @llvm.riscv.tuple.extract.nxv4i16.triscv.vector.tuple_nxv8i8_7t(target("riscv.vector.tuple", <vscale x 8 x i8>, 7) %0, i32 1)
- ret <vscale x 4 x i16> %1
+ ret target("riscv.vector.tuple", <vscale x 8 x i8>, 7) %0
}
-
-define <vscale x 4 x i16> @test_vlseg7_mask_nxv4i16_triscv.vector.tuple_nxv8i8_7t(ptr %base, i64 %vl, <vscale x 4 x i1> %mask) {
+define target("riscv.vector.tuple", <vscale x 8 x i8>, 7) @test_vlseg7_mask_nxv4i16_triscv.vector.tuple_nxv8i8_7t(ptr %base, i64 %vl, <vscale x 4 x i1> %mask) {
; CHECK-LABEL: test_vlseg7_mask_nxv4i16_triscv.vector.tuple_nxv8i8_7t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma
-; CHECK-NEXT: vlseg7e16.v v7, (a0), v0.t
+; CHECK-NEXT: vlseg7e16.v v8, (a0), v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 7) @llvm.riscv.vlseg7.mask.triscv.vector.tuple_nxv8i8_7t.nxv4i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 7) undef, ptr %base, <vscale x 4 x i1> %mask, i64 %vl, i64 1, i64 4)
- %1 = call <vscale x 4 x i16> @llvm.riscv.tuple.extract.nxv4i16.triscv.vector.tuple_nxv8i8_7t(target("riscv.vector.tuple", <vscale x 8 x i8>, 7) %0, i32 1)
- ret <vscale x 4 x i16> %1
+ ret target("riscv.vector.tuple", <vscale x 8 x i8>, 7) %0
}
-
-declare target("riscv.vector.tuple", <vscale x 2 x i8>, 8) @llvm.riscv.vlseg8.mask.triscv.vector.tuple_nxv2i8_8t.nxv1i1(target("riscv.vector.tuple", <vscale x 2 x i8>, 8), ptr, <vscale x 1 x i1>, i64, i64, i64)
-
-define <vscale x 1 x i16> @test_vlseg8_nxv1i16_triscv.vector.tuple_nxv2i8_8t(ptr %base, i64 %vl) {
+define target("riscv.vector.tuple", <vscale x 2 x i8>, 8) @test_vlseg8_nxv1i16_triscv.vector.tuple_nxv2i8_8t(ptr %base, i64 %vl) {
; CHECK-LABEL: test_vlseg8_nxv1i16_triscv.vector.tuple_nxv2i8_8t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma
-; CHECK-NEXT: vlseg8e16.v v7, (a0)
+; CHECK-NEXT: vlseg8e16.v v8, (a0)
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 2 x i8>, 8) @llvm.riscv.vlseg8.triscv.vector.tuple_nxv2i8_8t(target("riscv.vector.tuple", <vscale x 2 x i8>, 8) undef, ptr %base, i64 %vl, i64 4)
- %1 = call <vscale x 1 x i16> @llvm.riscv.tuple.extract.nxv1i16.triscv.vector.tuple_nxv2i8_8t(target("riscv.vector.tuple", <vscale x 2 x i8>, 8) %0, i32 1)
- ret <vscale x 1 x i16> %1
+ ret target("riscv.vector.tuple", <vscale x 2 x i8>, 8) %0
}
-
-define <vscale x 1 x i16> @test_vlseg8_mask_nxv1i16_triscv.vector.tuple_nxv2i8_8t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) {
+define target("riscv.vector.tuple", <vscale x 2 x i8>, 8) @test_vlseg8_mask_nxv1i16_triscv.vector.tuple_nxv2i8_8t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) {
; CHECK-LABEL: test_vlseg8_mask_nxv1i16_triscv.vector.tuple_nxv2i8_8t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma
-; CHECK-NEXT: vlseg8e16.v v7, (a0), v0.t
+; CHECK-NEXT: vlseg8e16.v v8, (a0), v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 2 x i8>, 8) @llvm.riscv.vlseg8.mask.triscv.vector.tuple_nxv2i8_8t.nxv1i1(target("riscv.vector.tuple", <vscale x 2 x i8>, 8) undef, ptr %base, <vscale x 1 x i1> %mask, i64 %vl, i64 1, i64 4)
- %1 = call <vscale x 1 x i16> @llvm.riscv.tuple.extract.nxv1i16.triscv.vector.tuple_nxv2i8_8t(target("riscv.vector.tuple", <vscale x 2 x i8>, 8) %0, i32 1)
- ret <vscale x 1 x i16> %1
+ ret target("riscv.vector.tuple", <vscale x 2 x i8>, 8) %0
}
-
-declare target("riscv.vector.tuple", <vscale x 4 x i8>, 8) @llvm.riscv.vlseg8.mask.triscv.vector.tuple_nxv4i8_8t.nxv2i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 8), ptr, <vscale x 2 x i1>, i64, i64, i64)
-
-define <vscale x 2 x i16> @test_vlseg8_nxv2i16_triscv.vector.tuple_nxv4i8_8t(ptr %base, i64 %vl) {
+define target("riscv.vector.tuple", <vscale x 4 x i8>, 8) @test_vlseg8_nxv2i16_triscv.vector.tuple_nxv4i8_8t(ptr %base, i64 %vl) {
; CHECK-LABEL: test_vlseg8_nxv2i16_triscv.vector.tuple_nxv4i8_8t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma
-; CHECK-NEXT: vlseg8e16.v v7, (a0)
+; CHECK-NEXT: vlseg8e16.v v8, (a0)
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 8) @llvm.riscv.vlseg8.triscv.vector.tuple_nxv4i8_8t(target("riscv.vector.tuple", <vscale x 4 x i8>, 8) undef, ptr %base, i64 %vl, i64 4)
- %1 = call <vscale x 2 x i16> @llvm.riscv.tuple.extract.nxv2i16.triscv.vector.tuple_nxv4i8_8t(target("riscv.vector.tuple", <vscale x 4 x i8>, 8) %0, i32 1)
- ret <vscale x 2 x i16> %1
+ ret target("riscv.vector.tuple", <vscale x 4 x i8>, 8) %0
}
-
-define <vscale x 2 x i16> @test_vlseg8_mask_nxv2i16_triscv.vector.tuple_nxv4i8_8t(ptr %base, i64 %vl, <vscale x 2 x i1> %mask) {
+define target("riscv.vector.tuple", <vscale x 4 x i8>, 8) @test_vlseg8_mask_nxv2i16_triscv.vector.tuple_nxv4i8_8t(ptr %base, i64 %vl, <vscale x 2 x i1> %mask) {
; CHECK-LABEL: test_vlseg8_mask_nxv2i16_triscv.vector.tuple_nxv4i8_8t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma
-; CHECK-NEXT: vlseg8e16.v v7, (a0), v0.t
+; CHECK-NEXT: vlseg8e16.v v8, (a0), v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 8) @llvm.riscv.vlseg8.mask.triscv.vector.tuple_nxv4i8_8t.nxv2i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 8) undef, ptr %base, <vscale x 2 x i1> %mask, i64 %vl, i64 1, i64 4)
- %1 = call <vscale x 2 x i16> @llvm.riscv.tuple.extract.nxv2i16.triscv.vector.tuple_nxv4i8_8t(target("riscv.vector.tuple", <vscale x 4 x i8>, 8) %0, i32 1)
- ret <vscale x 2 x i16> %1
+ ret target("riscv.vector.tuple", <vscale x 4 x i8>, 8) %0
}
-
-declare target("riscv.vector.tuple", <vscale x 8 x i8>, 8) @llvm.riscv.vlseg8.mask.triscv.vector.tuple_nxv8i8_8t.nxv4i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 8), ptr, <vscale x 4 x i1>, i64, i64, i64)
-
-define <vscale x 4 x i16> @test_vlseg8_nxv4i16_triscv.vector.tuple_nxv8i8_8t(ptr %base, i64 %vl) {
+define target("riscv.vector.tuple", <vscale x 8 x i8>, 8) @test_vlseg8_nxv4i16_triscv.vector.tuple_nxv8i8_8t(ptr %base, i64 %vl) {
; CHECK-LABEL: test_vlseg8_nxv4i16_triscv.vector.tuple_nxv8i8_8t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma
-; CHECK-NEXT: vlseg8e16.v v7, (a0)
+; CHECK-NEXT: vlseg8e16.v v8, (a0)
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 8) @llvm.riscv.vlseg8.triscv.vector.tuple_nxv8i8_8t(target("riscv.vector.tuple", <vscale x 8 x i8>, 8) undef, ptr %base, i64 %vl, i64 4)
- %1 = call <vscale x 4 x i16> @llvm.riscv.tuple.extract.nxv4i16.triscv.vector.tuple_nxv8i8_8t(target("riscv.vector.tuple", <vscale x 8 x i8>, 8) %0, i32 1)
- ret <vscale x 4 x i16> %1
+ ret target("riscv.vector.tuple", <vscale x 8 x i8>, 8) %0
}
-
-define <vscale x 4 x i16> @test_vlseg8_mask_nxv4i16_triscv.vector.tuple_nxv8i8_8t(ptr %base, i64 %vl, <vscale x 4 x i1> %mask) {
+define target("riscv.vector.tuple", <vscale x 8 x i8>, 8) @test_vlseg8_mask_nxv4i16_triscv.vector.tuple_nxv8i8_8t(ptr %base, i64 %vl, <vscale x 4 x i1> %mask) {
; CHECK-LABEL: test_vlseg8_mask_nxv4i16_triscv.vector.tuple_nxv8i8_8t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma
-; CHECK-NEXT: vlseg8e16.v v7, (a0), v0.t
+; CHECK-NEXT: vlseg8e16.v v8, (a0), v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 8) @llvm.riscv.vlseg8.mask.triscv.vector.tuple_nxv8i8_8t.nxv4i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 8) undef, ptr %base, <vscale x 4 x i1> %mask, i64 %vl, i64 1, i64 4)
- %1 = call <vscale x 4 x i16> @llvm.riscv.tuple.extract.nxv4i16.triscv.vector.tuple_nxv8i8_8t(target("riscv.vector.tuple", <vscale x 8 x i8>, 8) %0, i32 1)
- ret <vscale x 4 x i16> %1
+ ret target("riscv.vector.tuple", <vscale x 8 x i8>, 8) %0
}
-
-declare target("riscv.vector.tuple", <vscale x 4 x i8>, 2) @llvm.riscv.vlseg2.mask.triscv.vector.tuple_nxv4i8_2t.nxv1i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 2), ptr, <vscale x 1 x i1>, i64, i64, i64)
-
-define <vscale x 1 x i32> @test_vlseg2_nxv1i32_triscv.vector.tuple_nxv4i8_2t(ptr %base, i64 %vl) {
+define target("riscv.vector.tuple", <vscale x 4 x i8>, 2) @test_vlseg2_nxv1i32_triscv.vector.tuple_nxv4i8_2t(ptr %base, i64 %vl) {
; CHECK-LABEL: test_vlseg2_nxv1i32_triscv.vector.tuple_nxv4i8_2t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma
-; CHECK-NEXT: vlseg2e32.v v7, (a0)
+; CHECK-NEXT: vlseg2e32.v v8, (a0)
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 2) @llvm.riscv.vlseg2.triscv.vector.tuple_nxv4i8_2t(target("riscv.vector.tuple", <vscale x 4 x i8>, 2) undef, ptr %base, i64 %vl, i64 5)
- %1 = call <vscale x 1 x i32> @llvm.riscv.tuple.extract.nxv1i32.triscv.vector.tuple_nxv4i8_2t(target("riscv.vector.tuple", <vscale x 4 x i8>, 2) %0, i32 1)
- ret <vscale x 1 x i32> %1
+ ret target("riscv.vector.tuple", <vscale x 4 x i8>, 2) %0
}
-
-define <vscale x 1 x i32> @test_vlseg2_mask_nxv1i32_triscv.vector.tuple_nxv4i8_2t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) {
+define target("riscv.vector.tuple", <vscale x 4 x i8>, 2) @test_vlseg2_mask_nxv1i32_triscv.vector.tuple_nxv4i8_2t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) {
; CHECK-LABEL: test_vlseg2_mask_nxv1i32_triscv.vector.tuple_nxv4i8_2t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma
-; CHECK-NEXT: vlseg2e32.v v7, (a0), v0.t
+; CHECK-NEXT: vlseg2e32.v v8, (a0), v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 2) @llvm.riscv.vlseg2.mask.triscv.vector.tuple_nxv4i8_2t.nxv1i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 2) undef, ptr %base, <vscale x 1 x i1> %mask, i64 %vl, i64 1, i64 5)
- %1 = call <vscale x 1 x i32> @llvm.riscv.tuple.extract.nxv1i32.triscv.vector.tuple_nxv4i8_2t(target("riscv.vector.tuple", <vscale x 4 x i8>, 2) %0, i32 1)
- ret <vscale x 1 x i32> %1
+ ret target("riscv.vector.tuple", <vscale x 4 x i8>, 2) %0
}
-
-declare target("riscv.vector.tuple", <vscale x 8 x i8>, 2) @llvm.riscv.vlseg2.mask.triscv.vector.tuple_nxv8i8_2t.nxv2i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 2), ptr, <vscale x 2 x i1>, i64, i64, i64)
-
-define <vscale x 2 x i32> @test_vlseg2_nxv2i32_triscv.vector.tuple_nxv8i8_2t(ptr %base, i64 %vl) {
+define target("riscv.vector.tuple", <vscale x 8 x i8>, 2) @test_vlseg2_nxv2i32_triscv.vector.tuple_nxv8i8_2t(ptr %base, i64 %vl) {
; CHECK-LABEL: test_vlseg2_nxv2i32_triscv.vector.tuple_nxv8i8_2t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma
-; CHECK-NEXT: vlseg2e32.v v7, (a0)
+; CHECK-NEXT: vlseg2e32.v v8, (a0)
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 2) @llvm.riscv.vlseg2.triscv.vector.tuple_nxv8i8_2t(target("riscv.vector.tuple", <vscale x 8 x i8>, 2) undef, ptr %base, i64 %vl, i64 5)
- %1 = call <vscale x 2 x i32> @llvm.riscv.tuple.extract.nxv2i32.triscv.vector.tuple_nxv8i8_2t(target("riscv.vector.tuple", <vscale x 8 x i8>, 2) %0, i32 1)
- ret <vscale x 2 x i32> %1
+ ret target("riscv.vector.tuple", <vscale x 8 x i8>, 2) %0
}
-
-define <vscale x 2 x i32> @test_vlseg2_mask_nxv2i32_triscv.vector.tuple_nxv8i8_2t(ptr %base, i64 %vl, <vscale x 2 x i1> %mask) {
+define target("riscv.vector.tuple", <vscale x 8 x i8>, 2) @test_vlseg2_mask_nxv2i32_triscv.vector.tuple_nxv8i8_2t(ptr %base, i64 %vl, <vscale x 2 x i1> %mask) {
; CHECK-LABEL: test_vlseg2_mask_nxv2i32_triscv.vector.tuple_nxv8i8_2t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma
-; CHECK-NEXT: vlseg2e32.v v7, (a0), v0.t
+; CHECK-NEXT: vlseg2e32.v v8, (a0), v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 2) @llvm.riscv.vlseg2.mask.triscv.vector.tuple_nxv8i8_2t.nxv2i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 2) undef, ptr %base, <vscale x 2 x i1> %mask, i64 %vl, i64 1, i64 5)
- %1 = call <vscale x 2 x i32> @llvm.riscv.tuple.extract.nxv2i32.triscv.vector.tuple_nxv8i8_2t(target("riscv.vector.tuple", <vscale x 8 x i8>, 2) %0, i32 1)
- ret <vscale x 2 x i32> %1
+ ret target("riscv.vector.tuple", <vscale x 8 x i8>, 2) %0
}
-
-declare target("riscv.vector.tuple", <vscale x 16 x i8>, 2) @llvm.riscv.vlseg2.mask.triscv.vector.tuple_nxv16i8_2t.nxv4i1(target("riscv.vector.tuple", <vscale x 16 x i8>, 2), ptr, <vscale x 4 x i1>, i64, i64, i64)
-
-define <vscale x 4 x i32> @test_vlseg2_nxv4i32_triscv.vector.tuple_nxv16i8_2t(ptr %base, i64 %vl) {
+define target("riscv.vector.tuple", <vscale x 16 x i8>, 2) @test_vlseg2_nxv4i32_triscv.vector.tuple_nxv16i8_2t(ptr %base, i64 %vl) {
; CHECK-LABEL: test_vlseg2_nxv4i32_triscv.vector.tuple_nxv16i8_2t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma
-; CHECK-NEXT: vlseg2e32.v v6, (a0)
+; CHECK-NEXT: vlseg2e32.v v8, (a0)
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 2) @llvm.riscv.vlseg2.triscv.vector.tuple_nxv16i8_2t(target("riscv.vector.tuple", <vscale x 16 x i8>, 2) undef, ptr %base, i64 %vl, i64 5)
- %1 = call <vscale x 4 x i32> @llvm.riscv.tuple.extract.nxv4i32.triscv.vector.tuple_nxv16i8_2t(target("riscv.vector.tuple", <vscale x 16 x i8>, 2) %0, i32 1)
- ret <vscale x 4 x i32> %1
+ ret target("riscv.vector.tuple", <vscale x 16 x i8>, 2) %0
}
-
-define <vscale x 4 x i32> @test_vlseg2_mask_nxv4i32_triscv.vector.tuple_nxv16i8_2t(ptr %base, i64 %vl, <vscale x 4 x i1> %mask) {
+define target("riscv.vector.tuple", <vscale x 16 x i8>, 2) @test_vlseg2_mask_nxv4i32_triscv.vector.tuple_nxv16i8_2t(ptr %base, i64 %vl, <vscale x 4 x i1> %mask) {
; CHECK-LABEL: test_vlseg2_mask_nxv4i32_triscv.vector.tuple_nxv16i8_2t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma
-; CHECK-NEXT: vlseg2e32.v v6, (a0), v0.t
+; CHECK-NEXT: vlseg2e32.v v8, (a0), v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 2) @llvm.riscv.vlseg2.mask.triscv.vector.tuple_nxv16i8_2t.nxv4i1(target("riscv.vector.tuple", <vscale x 16 x i8>, 2) undef, ptr %base, <vscale x 4 x i1> %mask, i64 %vl, i64 1, i64 5)
- %1 = call <vscale x 4 x i32> @llvm.riscv.tuple.extract.nxv4i32.triscv.vector.tuple_nxv16i8_2t(target("riscv.vector.tuple", <vscale x 16 x i8>, 2) %0, i32 1)
- ret <vscale x 4 x i32> %1
+ ret target("riscv.vector.tuple", <vscale x 16 x i8>, 2) %0
}
-
-declare target("riscv.vector.tuple", <vscale x 32 x i8>, 2) @llvm.riscv.vlseg2.mask.triscv.vector.tuple_nxv32i8_2t.nxv8i1(target("riscv.vector.tuple", <vscale x 32 x i8>, 2), ptr, <vscale x 8 x i1>, i64, i64, i64)
-
-define <vscale x 8 x i32> @test_vlseg2_nxv8i32_triscv.vector.tuple_nxv32i8_2t(ptr %base, i64 %vl) {
+define target("riscv.vector.tuple", <vscale x 32 x i8>, 2) @test_vlseg2_nxv8i32_triscv.vector.tuple_nxv32i8_2t(ptr %base, i64 %vl) {
; CHECK-LABEL: test_vlseg2_nxv8i32_triscv.vector.tuple_nxv32i8_2t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, ma
-; CHECK-NEXT: vlseg2e32.v v4, (a0)
+; CHECK-NEXT: vlseg2e32.v v8, (a0)
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 32 x i8>, 2) @llvm.riscv.vlseg2.triscv.vector.tuple_nxv32i8_2t(target("riscv.vector.tuple", <vscale x 32 x i8>, 2) undef, ptr %base, i64 %vl, i64 5)
- %1 = call <vscale x 8 x i32> @llvm.riscv.tuple.extract.nxv8i32.triscv.vector.tuple_nxv32i8_2t(target("riscv.vector.tuple", <vscale x 32 x i8>, 2) %0, i32 1)
- ret <vscale x 8 x i32> %1
+ ret target("riscv.vector.tuple", <vscale x 32 x i8>, 2) %0
}
-
-define <vscale x 8 x i32> @test_vlseg2_mask_nxv8i32_triscv.vector.tuple_nxv32i8_2t(ptr %base, i64 %vl, <vscale x 8 x i1> %mask) {
+define target("riscv.vector.tuple", <vscale x 32 x i8>, 2) @test_vlseg2_mask_nxv8i32_triscv.vector.tuple_nxv32i8_2t(ptr %base, i64 %vl, <vscale x 8 x i1> %mask) {
; CHECK-LABEL: test_vlseg2_mask_nxv8i32_triscv.vector.tuple_nxv32i8_2t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, ma
-; CHECK-NEXT: vlseg2e32.v v4, (a0), v0.t
+; CHECK-NEXT: vlseg2e32.v v8, (a0), v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 32 x i8>, 2) @llvm.riscv.vlseg2.mask.triscv.vector.tuple_nxv32i8_2t.nxv8i1(target("riscv.vector.tuple", <vscale x 32 x i8>, 2) undef, ptr %base, <vscale x 8 x i1> %mask, i64 %vl, i64 1, i64 5)
- %1 = call <vscale x 8 x i32> @llvm.riscv.tuple.extract.nxv8i32.triscv.vector.tuple_nxv32i8_2t(target("riscv.vector.tuple", <vscale x 32 x i8>, 2) %0, i32 1)
- ret <vscale x 8 x i32> %1
+ ret target("riscv.vector.tuple", <vscale x 32 x i8>, 2) %0
}
-
-declare target("riscv.vector.tuple", <vscale x 4 x i8>, 3) @llvm.riscv.vlseg3.mask.triscv.vector.tuple_nxv4i8_3t.nxv1i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 3), ptr, <vscale x 1 x i1>, i64, i64, i64)
-
-define <vscale x 1 x i32> @test_vlseg3_nxv1i32_triscv.vector.tuple_nxv4i8_3t(ptr %base, i64 %vl) {
+define target("riscv.vector.tuple", <vscale x 4 x i8>, 3) @test_vlseg3_nxv1i32_triscv.vector.tuple_nxv4i8_3t(ptr %base, i64 %vl) {
; CHECK-LABEL: test_vlseg3_nxv1i32_triscv.vector.tuple_nxv4i8_3t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma
-; CHECK-NEXT: vlseg3e32.v v7, (a0)
+; CHECK-NEXT: vlseg3e32.v v8, (a0)
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 3) @llvm.riscv.vlseg3.triscv.vector.tuple_nxv4i8_3t(target("riscv.vector.tuple", <vscale x 4 x i8>, 3) undef, ptr %base, i64 %vl, i64 5)
- %1 = call <vscale x 1 x i32> @llvm.riscv.tuple.extract.nxv1i32.triscv.vector.tuple_nxv4i8_3t(target("riscv.vector.tuple", <vscale x 4 x i8>, 3) %0, i32 1)
- ret <vscale x 1 x i32> %1
+ ret target("riscv.vector.tuple", <vscale x 4 x i8>, 3) %0
}
-
-define <vscale x 1 x i32> @test_vlseg3_mask_nxv1i32_triscv.vector.tuple_nxv4i8_3t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) {
+define target("riscv.vector.tuple", <vscale x 4 x i8>, 3) @test_vlseg3_mask_nxv1i32_triscv.vector.tuple_nxv4i8_3t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) {
; CHECK-LABEL: test_vlseg3_mask_nxv1i32_triscv.vector.tuple_nxv4i8_3t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma
-; CHECK-NEXT: vlseg3e32.v v7, (a0), v0.t
+; CHECK-NEXT: vlseg3e32.v v8, (a0), v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 3) @llvm.riscv.vlseg3.mask.triscv.vector.tuple_nxv4i8_3t.nxv1i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 3) undef, ptr %base, <vscale x 1 x i1> %mask, i64 %vl, i64 1, i64 5)
- %1 = call <vscale x 1 x i32> @llvm.riscv.tuple.extract.nxv1i32.triscv.vector.tuple_nxv4i8_3t(target("riscv.vector.tuple", <vscale x 4 x i8>, 3) %0, i32 1)
- ret <vscale x 1 x i32> %1
+ ret target("riscv.vector.tuple", <vscale x 4 x i8>, 3) %0
}
-
-declare target("riscv.vector.tuple", <vscale x 8 x i8>, 3) @llvm.riscv.vlseg3.mask.triscv.vector.tuple_nxv8i8_3t.nxv2i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 3), ptr, <vscale x 2 x i1>, i64, i64, i64)
-
-define <vscale x 2 x i32> @test_vlseg3_nxv2i32_triscv.vector.tuple_nxv8i8_3t(ptr %base, i64 %vl) {
+define target("riscv.vector.tuple", <vscale x 8 x i8>, 3) @test_vlseg3_nxv2i32_triscv.vector.tuple_nxv8i8_3t(ptr %base, i64 %vl) {
; CHECK-LABEL: test_vlseg3_nxv2i32_triscv.vector.tuple_nxv8i8_3t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma
-; CHECK-NEXT: vlseg3e32.v v7, (a0)
+; CHECK-NEXT: vlseg3e32.v v8, (a0)
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 3) @llvm.riscv.vlseg3.triscv.vector.tuple_nxv8i8_3t(target("riscv.vector.tuple", <vscale x 8 x i8>, 3) undef, ptr %base, i64 %vl, i64 5)
- %1 = call <vscale x 2 x i32> @llvm.riscv.tuple.extract.nxv2i32.triscv.vector.tuple_nxv8i8_3t(target("riscv.vector.tuple", <vscale x 8 x i8>, 3) %0, i32 1)
- ret <vscale x 2 x i32> %1
+ ret target("riscv.vector.tuple", <vscale x 8 x i8>, 3) %0
}
-
-define <vscale x 2 x i32> @test_vlseg3_mask_nxv2i32_triscv.vector.tuple_nxv8i8_3t(ptr %base, i64 %vl, <vscale x 2 x i1> %mask) {
+define target("riscv.vector.tuple", <vscale x 8 x i8>, 3) @test_vlseg3_mask_nxv2i32_triscv.vector.tuple_nxv8i8_3t(ptr %base, i64 %vl, <vscale x 2 x i1> %mask) {
; CHECK-LABEL: test_vlseg3_mask_nxv2i32_triscv.vector.tuple_nxv8i8_3t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma
-; CHECK-NEXT: vlseg3e32.v v7, (a0), v0.t
+; CHECK-NEXT: vlseg3e32.v v8, (a0), v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 3) @llvm.riscv.vlseg3.mask.triscv.vector.tuple_nxv8i8_3t.nxv2i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 3) undef, ptr %base, <vscale x 2 x i1> %mask, i64 %vl, i64 1, i64 5)
- %1 = call <vscale x 2 x i32> @llvm.riscv.tuple.extract.nxv2i32.triscv.vector.tuple_nxv8i8_3t(target("riscv.vector.tuple", <vscale x 8 x i8>, 3) %0, i32 1)
- ret <vscale x 2 x i32> %1
+ ret target("riscv.vector.tuple", <vscale x 8 x i8>, 3) %0
}
-
-declare target("riscv.vector.tuple", <vscale x 16 x i8>, 3) @llvm.riscv.vlseg3.mask.triscv.vector.tuple_nxv16i8_3t.nxv4i1(target("riscv.vector.tuple", <vscale x 16 x i8>, 3), ptr, <vscale x 4 x i1>, i64, i64, i64)
-
-define <vscale x 4 x i32> @test_vlseg3_nxv4i32_triscv.vector.tuple_nxv16i8_3t(ptr %base, i64 %vl) {
+define target("riscv.vector.tuple", <vscale x 16 x i8>, 3) @test_vlseg3_nxv4i32_triscv.vector.tuple_nxv16i8_3t(ptr %base, i64 %vl) {
; CHECK-LABEL: test_vlseg3_nxv4i32_triscv.vector.tuple_nxv16i8_3t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma
-; CHECK-NEXT: vlseg3e32.v v6, (a0)
+; CHECK-NEXT: vlseg3e32.v v8, (a0)
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 3) @llvm.riscv.vlseg3.triscv.vector.tuple_nxv16i8_3t(target("riscv.vector.tuple", <vscale x 16 x i8>, 3) undef, ptr %base, i64 %vl, i64 5)
- %1 = call <vscale x 4 x i32> @llvm.riscv.tuple.extract.nxv4i32.triscv.vector.tuple_nxv16i8_3t(target("riscv.vector.tuple", <vscale x 16 x i8>, 3) %0, i32 1)
- ret <vscale x 4 x i32> %1
+ ret target("riscv.vector.tuple", <vscale x 16 x i8>, 3) %0
}
-
-define <vscale x 4 x i32> @test_vlseg3_mask_nxv4i32_triscv.vector.tuple_nxv16i8_3t(ptr %base, i64 %vl, <vscale x 4 x i1> %mask) {
+define target("riscv.vector.tuple", <vscale x 16 x i8>, 3) @test_vlseg3_mask_nxv4i32_triscv.vector.tuple_nxv16i8_3t(ptr %base, i64 %vl, <vscale x 4 x i1> %mask) {
; CHECK-LABEL: test_vlseg3_mask_nxv4i32_triscv.vector.tuple_nxv16i8_3t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma
-; CHECK-NEXT: vlseg3e32.v v6, (a0), v0.t
+; CHECK-NEXT: vlseg3e32.v v8, (a0), v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 3) @llvm.riscv.vlseg3.mask.triscv.vector.tuple_nxv16i8_3t.nxv4i1(target("riscv.vector.tuple", <vscale x 16 x i8>, 3) undef, ptr %base, <vscale x 4 x i1> %mask, i64 %vl, i64 1, i64 5)
- %1 = call <vscale x 4 x i32> @llvm.riscv.tuple.extract.nxv4i32.triscv.vector.tuple_nxv16i8_3t(target("riscv.vector.tuple", <vscale x 16 x i8>, 3) %0, i32 1)
- ret <vscale x 4 x i32> %1
+ ret target("riscv.vector.tuple", <vscale x 16 x i8>, 3) %0
}
-
-declare target("riscv.vector.tuple", <vscale x 4 x i8>, 4) @llvm.riscv.vlseg4.mask.triscv.vector.tuple_nxv4i8_4t.nxv1i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 4), ptr, <vscale x 1 x i1>, i64, i64, i64)
-
-define <vscale x 1 x i32> @test_vlseg4_nxv1i32_triscv.vector.tuple_nxv4i8_4t(ptr %base, i64 %vl) {
+define target("riscv.vector.tuple", <vscale x 4 x i8>, 4) @test_vlseg4_nxv1i32_triscv.vector.tuple_nxv4i8_4t(ptr %base, i64 %vl) {
; CHECK-LABEL: test_vlseg4_nxv1i32_triscv.vector.tuple_nxv4i8_4t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma
-; CHECK-NEXT: vlseg4e32.v v7, (a0)
+; CHECK-NEXT: vlseg4e32.v v8, (a0)
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 4) @llvm.riscv.vlseg4.triscv.vector.tuple_nxv4i8_4t(target("riscv.vector.tuple", <vscale x 4 x i8>, 4) undef, ptr %base, i64 %vl, i64 5)
- %1 = call <vscale x 1 x i32> @llvm.riscv.tuple.extract.nxv1i32.triscv.vector.tuple_nxv4i8_4t(target("riscv.vector.tuple", <vscale x 4 x i8>, 4) %0, i32 1)
- ret <vscale x 1 x i32> %1
+ ret target("riscv.vector.tuple", <vscale x 4 x i8>, 4) %0
}
-
-define <vscale x 1 x i32> @test_vlseg4_mask_nxv1i32_triscv.vector.tuple_nxv4i8_4t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) {
+define target("riscv.vector.tuple", <vscale x 4 x i8>, 4) @test_vlseg4_mask_nxv1i32_triscv.vector.tuple_nxv4i8_4t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) {
; CHECK-LABEL: test_vlseg4_mask_nxv1i32_triscv.vector.tuple_nxv4i8_4t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma
-; CHECK-NEXT: vlseg4e32.v v7, (a0), v0.t
+; CHECK-NEXT: vlseg4e32.v v8, (a0), v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 4) @llvm.riscv.vlseg4.mask.triscv.vector.tuple_nxv4i8_4t.nxv1i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 4) undef, ptr %base, <vscale x 1 x i1> %mask, i64 %vl, i64 1, i64 5)
- %1 = call <vscale x 1 x i32> @llvm.riscv.tuple.extract.nxv1i32.triscv.vector.tuple_nxv4i8_4t(target("riscv.vector.tuple", <vscale x 4 x i8>, 4) %0, i32 1)
- ret <vscale x 1 x i32> %1
+ ret target("riscv.vector.tuple", <vscale x 4 x i8>, 4) %0
}
-
-declare target("riscv.vector.tuple", <vscale x 8 x i8>, 4) @llvm.riscv.vlseg4.mask.triscv.vector.tuple_nxv8i8_4t.nxv2i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 4), ptr, <vscale x 2 x i1>, i64, i64, i64)
-
-define <vscale x 2 x i32> @test_vlseg4_nxv2i32_triscv.vector.tuple_nxv8i8_4t(ptr %base, i64 %vl) {
+define target("riscv.vector.tuple", <vscale x 8 x i8>, 4) @test_vlseg4_nxv2i32_triscv.vector.tuple_nxv8i8_4t(ptr %base, i64 %vl) {
; CHECK-LABEL: test_vlseg4_nxv2i32_triscv.vector.tuple_nxv8i8_4t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma
-; CHECK-NEXT: vlseg4e32.v v7, (a0)
+; CHECK-NEXT: vlseg4e32.v v8, (a0)
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 4) @llvm.riscv.vlseg4.triscv.vector.tuple_nxv8i8_4t(target("riscv.vector.tuple", <vscale x 8 x i8>, 4) undef, ptr %base, i64 %vl, i64 5)
- %1 = call <vscale x 2 x i32> @llvm.riscv.tuple.extract.nxv2i32.triscv.vector.tuple_nxv8i8_4t(target("riscv.vector.tuple", <vscale x 8 x i8>, 4) %0, i32 1)
- ret <vscale x 2 x i32> %1
+ ret target("riscv.vector.tuple", <vscale x 8 x i8>, 4) %0
}
-
-define <vscale x 2 x i32> @test_vlseg4_mask_nxv2i32_triscv.vector.tuple_nxv8i8_4t(ptr %base, i64 %vl, <vscale x 2 x i1> %mask) {
+define target("riscv.vector.tuple", <vscale x 8 x i8>, 4) @test_vlseg4_mask_nxv2i32_triscv.vector.tuple_nxv8i8_4t(ptr %base, i64 %vl, <vscale x 2 x i1> %mask) {
; CHECK-LABEL: test_vlseg4_mask_nxv2i32_triscv.vector.tuple_nxv8i8_4t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma
-; CHECK-NEXT: vlseg4e32.v v7, (a0), v0.t
+; CHECK-NEXT: vlseg4e32.v v8, (a0), v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 4) @llvm.riscv.vlseg4.mask.triscv.vector.tuple_nxv8i8_4t.nxv2i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 4) undef, ptr %base, <vscale x 2 x i1> %mask, i64 %vl, i64 1, i64 5)
- %1 = call <vscale x 2 x i32> @llvm.riscv.tuple.extract.nxv2i32.triscv.vector.tuple_nxv8i8_4t(target("riscv.vector.tuple", <vscale x 8 x i8>, 4) %0, i32 1)
- ret <vscale x 2 x i32> %1
+ ret target("riscv.vector.tuple", <vscale x 8 x i8>, 4) %0
}
-
-declare target("riscv.vector.tuple", <vscale x 16 x i8>, 4) @llvm.riscv.vlseg4.mask.triscv.vector.tuple_nxv16i8_4t.nxv4i1(target("riscv.vector.tuple", <vscale x 16 x i8>, 4), ptr, <vscale x 4 x i1>, i64, i64, i64)
-
-define <vscale x 4 x i32> @test_vlseg4_nxv4i32_triscv.vector.tuple_nxv16i8_4t(ptr %base, i64 %vl) {
+define target("riscv.vector.tuple", <vscale x 16 x i8>, 4) @test_vlseg4_nxv4i32_triscv.vector.tuple_nxv16i8_4t(ptr %base, i64 %vl) {
; CHECK-LABEL: test_vlseg4_nxv4i32_triscv.vector.tuple_nxv16i8_4t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma
-; CHECK-NEXT: vlseg4e32.v v6, (a0)
+; CHECK-NEXT: vlseg4e32.v v8, (a0)
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 4) @llvm.riscv.vlseg4.triscv.vector.tuple_nxv16i8_4t(target("riscv.vector.tuple", <vscale x 16 x i8>, 4) undef, ptr %base, i64 %vl, i64 5)
- %1 = call <vscale x 4 x i32> @llvm.riscv.tuple.extract.nxv4i32.triscv.vector.tuple_nxv16i8_4t(target("riscv.vector.tuple", <vscale x 16 x i8>, 4) %0, i32 1)
- ret <vscale x 4 x i32> %1
+ ret target("riscv.vector.tuple", <vscale x 16 x i8>, 4) %0
}
-
-define <vscale x 4 x i32> @test_vlseg4_mask_nxv4i32_triscv.vector.tuple_nxv16i8_4t(ptr %base, i64 %vl, <vscale x 4 x i1> %mask) {
+define target("riscv.vector.tuple", <vscale x 16 x i8>, 4) @test_vlseg4_mask_nxv4i32_triscv.vector.tuple_nxv16i8_4t(ptr %base, i64 %vl, <vscale x 4 x i1> %mask) {
; CHECK-LABEL: test_vlseg4_mask_nxv4i32_triscv.vector.tuple_nxv16i8_4t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma
-; CHECK-NEXT: vlseg4e32.v v6, (a0), v0.t
+; CHECK-NEXT: vlseg4e32.v v8, (a0), v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 4) @llvm.riscv.vlseg4.mask.triscv.vector.tuple_nxv16i8_4t.nxv4i1(target("riscv.vector.tuple", <vscale x 16 x i8>, 4) undef, ptr %base, <vscale x 4 x i1> %mask, i64 %vl, i64 1, i64 5)
- %1 = call <vscale x 4 x i32> @llvm.riscv.tuple.extract.nxv4i32.triscv.vector.tuple_nxv16i8_4t(target("riscv.vector.tuple", <vscale x 16 x i8>, 4) %0, i32 1)
- ret <vscale x 4 x i32> %1
+ ret target("riscv.vector.tuple", <vscale x 16 x i8>, 4) %0
}
-
-declare target("riscv.vector.tuple", <vscale x 4 x i8>, 5) @llvm.riscv.vlseg5.mask.triscv.vector.tuple_nxv4i8_5t.nxv1i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 5), ptr, <vscale x 1 x i1>, i64, i64, i64)
-
-define <vscale x 1 x i32> @test_vlseg5_nxv1i32_triscv.vector.tuple_nxv4i8_5t(ptr %base, i64 %vl) {
+define target("riscv.vector.tuple", <vscale x 4 x i8>, 5) @test_vlseg5_nxv1i32_triscv.vector.tuple_nxv4i8_5t(ptr %base, i64 %vl) {
; CHECK-LABEL: test_vlseg5_nxv1i32_triscv.vector.tuple_nxv4i8_5t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma
-; CHECK-NEXT: vlseg5e32.v v7, (a0)
+; CHECK-NEXT: vlseg5e32.v v8, (a0)
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 5) @llvm.riscv.vlseg5.triscv.vector.tuple_nxv4i8_5t(target("riscv.vector.tuple", <vscale x 4 x i8>, 5) undef, ptr %base, i64 %vl, i64 5)
- %1 = call <vscale x 1 x i32> @llvm.riscv.tuple.extract.nxv1i32.triscv.vector.tuple_nxv4i8_5t(target("riscv.vector.tuple", <vscale x 4 x i8>, 5) %0, i32 1)
- ret <vscale x 1 x i32> %1
+ ret target("riscv.vector.tuple", <vscale x 4 x i8>, 5) %0
}
-
-define <vscale x 1 x i32> @test_vlseg5_mask_nxv1i32_triscv.vector.tuple_nxv4i8_5t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) {
+define target("riscv.vector.tuple", <vscale x 4 x i8>, 5) @test_vlseg5_mask_nxv1i32_triscv.vector.tuple_nxv4i8_5t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) {
; CHECK-LABEL: test_vlseg5_mask_nxv1i32_triscv.vector.tuple_nxv4i8_5t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma
-; CHECK-NEXT: vlseg5e32.v v7, (a0), v0.t
+; CHECK-NEXT: vlseg5e32.v v8, (a0), v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 5) @llvm.riscv.vlseg5.mask.triscv.vector.tuple_nxv4i8_5t.nxv1i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 5) undef, ptr %base, <vscale x 1 x i1> %mask, i64 %vl, i64 1, i64 5)
- %1 = call <vscale x 1 x i32> @llvm.riscv.tuple.extract.nxv1i32.triscv.vector.tuple_nxv4i8_5t(target("riscv.vector.tuple", <vscale x 4 x i8>, 5) %0, i32 1)
- ret <vscale x 1 x i32> %1
+ ret target("riscv.vector.tuple", <vscale x 4 x i8>, 5) %0
}
-
-declare target("riscv.vector.tuple", <vscale x 8 x i8>, 5) @llvm.riscv.vlseg5.mask.triscv.vector.tuple_nxv8i8_5t.nxv2i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 5), ptr, <vscale x 2 x i1>, i64, i64, i64)
-
-define <vscale x 2 x i32> @test_vlseg5_nxv2i32_triscv.vector.tuple_nxv8i8_5t(ptr %base, i64 %vl) {
+define target("riscv.vector.tuple", <vscale x 8 x i8>, 5) @test_vlseg5_nxv2i32_triscv.vector.tuple_nxv8i8_5t(ptr %base, i64 %vl) {
; CHECK-LABEL: test_vlseg5_nxv2i32_triscv.vector.tuple_nxv8i8_5t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma
-; CHECK-NEXT: vlseg5e32.v v7, (a0)
+; CHECK-NEXT: vlseg5e32.v v8, (a0)
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 5) @llvm.riscv.vlseg5.triscv.vector.tuple_nxv8i8_5t(target("riscv.vector.tuple", <vscale x 8 x i8>, 5) undef, ptr %base, i64 %vl, i64 5)
- %1 = call <vscale x 2 x i32> @llvm.riscv.tuple.extract.nxv2i32.triscv.vector.tuple_nxv8i8_5t(target("riscv.vector.tuple", <vscale x 8 x i8>, 5) %0, i32 1)
- ret <vscale x 2 x i32> %1
+ ret target("riscv.vector.tuple", <vscale x 8 x i8>, 5) %0
}
-
-define <vscale x 2 x i32> @test_vlseg5_mask_nxv2i32_triscv.vector.tuple_nxv8i8_5t(ptr %base, i64 %vl, <vscale x 2 x i1> %mask) {
+define target("riscv.vector.tuple", <vscale x 8 x i8>, 5) @test_vlseg5_mask_nxv2i32_triscv.vector.tuple_nxv8i8_5t(ptr %base, i64 %vl, <vscale x 2 x i1> %mask) {
; CHECK-LABEL: test_vlseg5_mask_nxv2i32_triscv.vector.tuple_nxv8i8_5t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma
-; CHECK-NEXT: vlseg5e32.v v7, (a0), v0.t
+; CHECK-NEXT: vlseg5e32.v v8, (a0), v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 5) @llvm.riscv.vlseg5.mask.triscv.vector.tuple_nxv8i8_5t.nxv2i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 5) undef, ptr %base, <vscale x 2 x i1> %mask, i64 %vl, i64 1, i64 5)
- %1 = call <vscale x 2 x i32> @llvm.riscv.tuple.extract.nxv2i32.triscv.vector.tuple_nxv8i8_5t(target("riscv.vector.tuple", <vscale x 8 x i8>, 5) %0, i32 1)
- ret <vscale x 2 x i32> %1
+ ret target("riscv.vector.tuple", <vscale x 8 x i8>, 5) %0
}
-
-declare target("riscv.vector.tuple", <vscale x 4 x i8>, 6) @llvm.riscv.vlseg6.mask.triscv.vector.tuple_nxv4i8_6t.nxv1i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 6), ptr, <vscale x 1 x i1>, i64, i64, i64)
-
-define <vscale x 1 x i32> @test_vlseg6_nxv1i32_triscv.vector.tuple_nxv4i8_6t(ptr %base, i64 %vl) {
+define target("riscv.vector.tuple", <vscale x 4 x i8>, 6) @test_vlseg6_nxv1i32_triscv.vector.tuple_nxv4i8_6t(ptr %base, i64 %vl) {
; CHECK-LABEL: test_vlseg6_nxv1i32_triscv.vector.tuple_nxv4i8_6t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma
-; CHECK-NEXT: vlseg6e32.v v7, (a0)
+; CHECK-NEXT: vlseg6e32.v v8, (a0)
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 6) @llvm.riscv.vlseg6.triscv.vector.tuple_nxv4i8_6t(target("riscv.vector.tuple", <vscale x 4 x i8>, 6) undef, ptr %base, i64 %vl, i64 5)
- %1 = call <vscale x 1 x i32> @llvm.riscv.tuple.extract.nxv1i32.triscv.vector.tuple_nxv4i8_6t(target("riscv.vector.tuple", <vscale x 4 x i8>, 6) %0, i32 1)
- ret <vscale x 1 x i32> %1
+ ret target("riscv.vector.tuple", <vscale x 4 x i8>, 6) %0
}
-
-define <vscale x 1 x i32> @test_vlseg6_mask_nxv1i32_triscv.vector.tuple_nxv4i8_6t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) {
+define target("riscv.vector.tuple", <vscale x 4 x i8>, 6) @test_vlseg6_mask_nxv1i32_triscv.vector.tuple_nxv4i8_6t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) {
; CHECK-LABEL: test_vlseg6_mask_nxv1i32_triscv.vector.tuple_nxv4i8_6t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma
-; CHECK-NEXT: vlseg6e32.v v7, (a0), v0.t
+; CHECK-NEXT: vlseg6e32.v v8, (a0), v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 6) @llvm.riscv.vlseg6.mask.triscv.vector.tuple_nxv4i8_6t.nxv1i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 6) undef, ptr %base, <vscale x 1 x i1> %mask, i64 %vl, i64 1, i64 5)
- %1 = call <vscale x 1 x i32> @llvm.riscv.tuple.extract.nxv1i32.triscv.vector.tuple_nxv4i8_6t(target("riscv.vector.tuple", <vscale x 4 x i8>, 6) %0, i32 1)
- ret <vscale x 1 x i32> %1
+ ret target("riscv.vector.tuple", <vscale x 4 x i8>, 6) %0
}
-
-declare target("riscv.vector.tuple", <vscale x 8 x i8>, 6) @llvm.riscv.vlseg6.mask.triscv.vector.tuple_nxv8i8_6t.nxv2i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 6), ptr, <vscale x 2 x i1>, i64, i64, i64)
-
-define <vscale x 2 x i32> @test_vlseg6_nxv2i32_triscv.vector.tuple_nxv8i8_6t(ptr %base, i64 %vl) {
+define target("riscv.vector.tuple", <vscale x 8 x i8>, 6) @test_vlseg6_nxv2i32_triscv.vector.tuple_nxv8i8_6t(ptr %base, i64 %vl) {
; CHECK-LABEL: test_vlseg6_nxv2i32_triscv.vector.tuple_nxv8i8_6t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma
-; CHECK-NEXT: vlseg6e32.v v7, (a0)
+; CHECK-NEXT: vlseg6e32.v v8, (a0)
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 6) @llvm.riscv.vlseg6.triscv.vector.tuple_nxv8i8_6t(target("riscv.vector.tuple", <vscale x 8 x i8>, 6) undef, ptr %base, i64 %vl, i64 5)
- %1 = call <vscale x 2 x i32> @llvm.riscv.tuple.extract.nxv2i32.triscv.vector.tuple_nxv8i8_6t(target("riscv.vector.tuple", <vscale x 8 x i8>, 6) %0, i32 1)
- ret <vscale x 2 x i32> %1
+ ret target("riscv.vector.tuple", <vscale x 8 x i8>, 6) %0
}
-
-define <vscale x 2 x i32> @test_vlseg6_mask_nxv2i32_triscv.vector.tuple_nxv8i8_6t(ptr %base, i64 %vl, <vscale x 2 x i1> %mask) {
+define target("riscv.vector.tuple", <vscale x 8 x i8>, 6) @test_vlseg6_mask_nxv2i32_triscv.vector.tuple_nxv8i8_6t(ptr %base, i64 %vl, <vscale x 2 x i1> %mask) {
; CHECK-LABEL: test_vlseg6_mask_nxv2i32_triscv.vector.tuple_nxv8i8_6t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma
-; CHECK-NEXT: vlseg6e32.v v7, (a0), v0.t
+; CHECK-NEXT: vlseg6e32.v v8, (a0), v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 6) @llvm.riscv.vlseg6.mask.triscv.vector.tuple_nxv8i8_6t.nxv2i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 6) undef, ptr %base, <vscale x 2 x i1> %mask, i64 %vl, i64 1, i64 5)
- %1 = call <vscale x 2 x i32> @llvm.riscv.tuple.extract.nxv2i32.triscv.vector.tuple_nxv8i8_6t(target("riscv.vector.tuple", <vscale x 8 x i8>, 6) %0, i32 1)
- ret <vscale x 2 x i32> %1
+ ret target("riscv.vector.tuple", <vscale x 8 x i8>, 6) %0
}
-
-declare target("riscv.vector.tuple", <vscale x 4 x i8>, 7) @llvm.riscv.vlseg7.mask.triscv.vector.tuple_nxv4i8_7t.nxv1i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 7), ptr, <vscale x 1 x i1>, i64, i64, i64)
-
-define <vscale x 1 x i32> @test_vlseg7_nxv1i32_triscv.vector.tuple_nxv4i8_7t(ptr %base, i64 %vl) {
+define target("riscv.vector.tuple", <vscale x 4 x i8>, 7) @test_vlseg7_nxv1i32_triscv.vector.tuple_nxv4i8_7t(ptr %base, i64 %vl) {
; CHECK-LABEL: test_vlseg7_nxv1i32_triscv.vector.tuple_nxv4i8_7t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma
-; CHECK-NEXT: vlseg7e32.v v7, (a0)
+; CHECK-NEXT: vlseg7e32.v v8, (a0)
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 7) @llvm.riscv.vlseg7.triscv.vector.tuple_nxv4i8_7t(target("riscv.vector.tuple", <vscale x 4 x i8>, 7) undef, ptr %base, i64 %vl, i64 5)
- %1 = call <vscale x 1 x i32> @llvm.riscv.tuple.extract.nxv1i32.triscv.vector.tuple_nxv4i8_7t(target("riscv.vector.tuple", <vscale x 4 x i8>, 7) %0, i32 1)
- ret <vscale x 1 x i32> %1
+ ret target("riscv.vector.tuple", <vscale x 4 x i8>, 7) %0
}
-
-define <vscale x 1 x i32> @test_vlseg7_mask_nxv1i32_triscv.vector.tuple_nxv4i8_7t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) {
+define target("riscv.vector.tuple", <vscale x 4 x i8>, 7) @test_vlseg7_mask_nxv1i32_triscv.vector.tuple_nxv4i8_7t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) {
; CHECK-LABEL: test_vlseg7_mask_nxv1i32_triscv.vector.tuple_nxv4i8_7t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma
-; CHECK-NEXT: vlseg7e32.v v7, (a0), v0.t
+; CHECK-NEXT: vlseg7e32.v v8, (a0), v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 7) @llvm.riscv.vlseg7.mask.triscv.vector.tuple_nxv4i8_7t.nxv1i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 7) undef, ptr %base, <vscale x 1 x i1> %mask, i64 %vl, i64 1, i64 5)
- %1 = call <vscale x 1 x i32> @llvm.riscv.tuple.extract.nxv1i32.triscv.vector.tuple_nxv4i8_7t(target("riscv.vector.tuple", <vscale x 4 x i8>, 7) %0, i32 1)
- ret <vscale x 1 x i32> %1
+ ret target("riscv.vector.tuple", <vscale x 4 x i8>, 7) %0
}
-
-declare target("riscv.vector.tuple", <vscale x 8 x i8>, 7) @llvm.riscv.vlseg7.mask.triscv.vector.tuple_nxv8i8_7t.nxv2i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 7), ptr, <vscale x 2 x i1>, i64, i64, i64)
-
-define <vscale x 2 x i32> @test_vlseg7_nxv2i32_triscv.vector.tuple_nxv8i8_7t(ptr %base, i64 %vl) {
+define target("riscv.vector.tuple", <vscale x 8 x i8>, 7) @test_vlseg7_nxv2i32_triscv.vector.tuple_nxv8i8_7t(ptr %base, i64 %vl) {
; CHECK-LABEL: test_vlseg7_nxv2i32_triscv.vector.tuple_nxv8i8_7t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma
-; CHECK-NEXT: vlseg7e32.v v7, (a0)
+; CHECK-NEXT: vlseg7e32.v v8, (a0)
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 7) @llvm.riscv.vlseg7.triscv.vector.tuple_nxv8i8_7t(target("riscv.vector.tuple", <vscale x 8 x i8>, 7) undef, ptr %base, i64 %vl, i64 5)
- %1 = call <vscale x 2 x i32> @llvm.riscv.tuple.extract.nxv2i32.triscv.vector.tuple_nxv8i8_7t(target("riscv.vector.tuple", <vscale x 8 x i8>, 7) %0, i32 1)
- ret <vscale x 2 x i32> %1
+ ret target("riscv.vector.tuple", <vscale x 8 x i8>, 7) %0
}
-
-define <vscale x 2 x i32> @test_vlseg7_mask_nxv2i32_triscv.vector.tuple_nxv8i8_7t(ptr %base, i64 %vl, <vscale x 2 x i1> %mask) {
+define target("riscv.vector.tuple", <vscale x 8 x i8>, 7) @test_vlseg7_mask_nxv2i32_triscv.vector.tuple_nxv8i8_7t(ptr %base, i64 %vl, <vscale x 2 x i1> %mask) {
; CHECK-LABEL: test_vlseg7_mask_nxv2i32_triscv.vector.tuple_nxv8i8_7t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma
-; CHECK-NEXT: vlseg7e32.v v7, (a0), v0.t
+; CHECK-NEXT: vlseg7e32.v v8, (a0), v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 7) @llvm.riscv.vlseg7.mask.triscv.vector.tuple_nxv8i8_7t.nxv2i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 7) undef, ptr %base, <vscale x 2 x i1> %mask, i64 %vl, i64 1, i64 5)
- %1 = call <vscale x 2 x i32> @llvm.riscv.tuple.extract.nxv2i32.triscv.vector.tuple_nxv8i8_7t(target("riscv.vector.tuple", <vscale x 8 x i8>, 7) %0, i32 1)
- ret <vscale x 2 x i32> %1
+ ret target("riscv.vector.tuple", <vscale x 8 x i8>, 7) %0
}
-
-declare target("riscv.vector.tuple", <vscale x 4 x i8>, 8) @llvm.riscv.vlseg8.mask.triscv.vector.tuple_nxv4i8_8t.nxv1i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 8), ptr, <vscale x 1 x i1>, i64, i64, i64)
-
-define <vscale x 1 x i32> @test_vlseg8_nxv1i32_triscv.vector.tuple_nxv4i8_8t(ptr %base, i64 %vl) {
+define target("riscv.vector.tuple", <vscale x 4 x i8>, 8) @test_vlseg8_nxv1i32_triscv.vector.tuple_nxv4i8_8t(ptr %base, i64 %vl) {
; CHECK-LABEL: test_vlseg8_nxv1i32_triscv.vector.tuple_nxv4i8_8t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma
-; CHECK-NEXT: vlseg8e32.v v7, (a0)
+; CHECK-NEXT: vlseg8e32.v v8, (a0)
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 8) @llvm.riscv.vlseg8.triscv.vector.tuple_nxv4i8_8t(target("riscv.vector.tuple", <vscale x 4 x i8>, 8) undef, ptr %base, i64 %vl, i64 5)
- %1 = call <vscale x 1 x i32> @llvm.riscv.tuple.extract.nxv1i32.triscv.vector.tuple_nxv4i8_8t(target("riscv.vector.tuple", <vscale x 4 x i8>, 8) %0, i32 1)
- ret <vscale x 1 x i32> %1
+ ret target("riscv.vector.tuple", <vscale x 4 x i8>, 8) %0
}
-
-define <vscale x 1 x i32> @test_vlseg8_mask_nxv1i32_triscv.vector.tuple_nxv4i8_8t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) {
+define target("riscv.vector.tuple", <vscale x 4 x i8>, 8) @test_vlseg8_mask_nxv1i32_triscv.vector.tuple_nxv4i8_8t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) {
; CHECK-LABEL: test_vlseg8_mask_nxv1i32_triscv.vector.tuple_nxv4i8_8t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma
-; CHECK-NEXT: vlseg8e32.v v7, (a0), v0.t
+; CHECK-NEXT: vlseg8e32.v v8, (a0), v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 8) @llvm.riscv.vlseg8.mask.triscv.vector.tuple_nxv4i8_8t.nxv1i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 8) undef, ptr %base, <vscale x 1 x i1> %mask, i64 %vl, i64 1, i64 5)
- %1 = call <vscale x 1 x i32> @llvm.riscv.tuple.extract.nxv1i32.triscv.vector.tuple_nxv4i8_8t(target("riscv.vector.tuple", <vscale x 4 x i8>, 8) %0, i32 1)
- ret <vscale x 1 x i32> %1
+ ret target("riscv.vector.tuple", <vscale x 4 x i8>, 8) %0
}
-
-declare target("riscv.vector.tuple", <vscale x 8 x i8>, 8) @llvm.riscv.vlseg8.mask.triscv.vector.tuple_nxv8i8_8t.nxv2i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 8), ptr, <vscale x 2 x i1>, i64, i64, i64)
-
-define <vscale x 2 x i32> @test_vlseg8_nxv2i32_triscv.vector.tuple_nxv8i8_8t(ptr %base, i64 %vl) {
+define target("riscv.vector.tuple", <vscale x 8 x i8>, 8) @test_vlseg8_nxv2i32_triscv.vector.tuple_nxv8i8_8t(ptr %base, i64 %vl) {
; CHECK-LABEL: test_vlseg8_nxv2i32_triscv.vector.tuple_nxv8i8_8t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma
-; CHECK-NEXT: vlseg8e32.v v7, (a0)
+; CHECK-NEXT: vlseg8e32.v v8, (a0)
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 8) @llvm.riscv.vlseg8.triscv.vector.tuple_nxv8i8_8t(target("riscv.vector.tuple", <vscale x 8 x i8>, 8) undef, ptr %base, i64 %vl, i64 5)
- %1 = call <vscale x 2 x i32> @llvm.riscv.tuple.extract.nxv2i32.triscv.vector.tuple_nxv8i8_8t(target("riscv.vector.tuple", <vscale x 8 x i8>, 8) %0, i32 1)
- ret <vscale x 2 x i32> %1
+ ret target("riscv.vector.tuple", <vscale x 8 x i8>, 8) %0
}
-
-define <vscale x 2 x i32> @test_vlseg8_mask_nxv2i32_triscv.vector.tuple_nxv8i8_8t(ptr %base, i64 %vl, <vscale x 2 x i1> %mask) {
+define target("riscv.vector.tuple", <vscale x 8 x i8>, 8) @test_vlseg8_mask_nxv2i32_triscv.vector.tuple_nxv8i8_8t(ptr %base, i64 %vl, <vscale x 2 x i1> %mask) {
; CHECK-LABEL: test_vlseg8_mask_nxv2i32_triscv.vector.tuple_nxv8i8_8t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma
-; CHECK-NEXT: vlseg8e32.v v7, (a0), v0.t
+; CHECK-NEXT: vlseg8e32.v v8, (a0), v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 8) @llvm.riscv.vlseg8.mask.triscv.vector.tuple_nxv8i8_8t.nxv2i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 8) undef, ptr %base, <vscale x 2 x i1> %mask, i64 %vl, i64 1, i64 5)
- %1 = call <vscale x 2 x i32> @llvm.riscv.tuple.extract.nxv2i32.triscv.vector.tuple_nxv8i8_8t(target("riscv.vector.tuple", <vscale x 8 x i8>, 8) %0, i32 1)
- ret <vscale x 2 x i32> %1
+ ret target("riscv.vector.tuple", <vscale x 8 x i8>, 8) %0
}
-
-declare target("riscv.vector.tuple", <vscale x 8 x i8>, 2) @llvm.riscv.vlseg2.mask.triscv.vector.tuple_nxv8i8_2t.nxv1i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 2), ptr, <vscale x 1 x i1>, i64, i64, i64)
-
-define <vscale x 1 x i64> @test_vlseg2_nxv1i64_triscv.vector.tuple_nxv8i8_2t(ptr %base, i64 %vl) {
+define target("riscv.vector.tuple", <vscale x 8 x i8>, 2) @test_vlseg2_nxv1i64_triscv.vector.tuple_nxv8i8_2t(ptr %base, i64 %vl) {
; CHECK-LABEL: test_vlseg2_nxv1i64_triscv.vector.tuple_nxv8i8_2t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, ma
-; CHECK-NEXT: vlseg2e64.v v7, (a0)
+; CHECK-NEXT: vlseg2e64.v v8, (a0)
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 2) @llvm.riscv.vlseg2.triscv.vector.tuple_nxv8i8_2t(target("riscv.vector.tuple", <vscale x 8 x i8>, 2) undef, ptr %base, i64 %vl, i64 6)
- %1 = call <vscale x 1 x i64> @llvm.riscv.tuple.extract.nxv1i64.triscv.vector.tuple_nxv8i8_2t(target("riscv.vector.tuple", <vscale x 8 x i8>, 2) %0, i32 1)
- ret <vscale x 1 x i64> %1
+ ret target("riscv.vector.tuple", <vscale x 8 x i8>, 2) %0
}
-
-define <vscale x 1 x i64> @test_vlseg2_mask_nxv1i64_triscv.vector.tuple_nxv8i8_2t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) {
+define target("riscv.vector.tuple", <vscale x 8 x i8>, 2) @test_vlseg2_mask_nxv1i64_triscv.vector.tuple_nxv8i8_2t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) {
; CHECK-LABEL: test_vlseg2_mask_nxv1i64_triscv.vector.tuple_nxv8i8_2t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, ma
-; CHECK-NEXT: vlseg2e64.v v7, (a0), v0.t
+; CHECK-NEXT: vlseg2e64.v v8, (a0), v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 2) @llvm.riscv.vlseg2.mask.triscv.vector.tuple_nxv8i8_2t.nxv1i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 2) undef, ptr %base, <vscale x 1 x i1> %mask, i64 %vl, i64 1, i64 6)
- %1 = call <vscale x 1 x i64> @llvm.riscv.tuple.extract.nxv1i64.triscv.vector.tuple_nxv8i8_2t(target("riscv.vector.tuple", <vscale x 8 x i8>, 2) %0, i32 1)
- ret <vscale x 1 x i64> %1
+ ret target("riscv.vector.tuple", <vscale x 8 x i8>, 2) %0
}
-
-declare target("riscv.vector.tuple", <vscale x 16 x i8>, 2) @llvm.riscv.vlseg2.mask.triscv.vector.tuple_nxv16i8_2t.nxv2i1(target("riscv.vector.tuple", <vscale x 16 x i8>, 2), ptr, <vscale x 2 x i1>, i64, i64, i64)
-
-define <vscale x 2 x i64> @test_vlseg2_nxv2i64_triscv.vector.tuple_nxv16i8_2t(ptr %base, i64 %vl) {
+define target("riscv.vector.tuple", <vscale x 16 x i8>, 2) @test_vlseg2_nxv2i64_triscv.vector.tuple_nxv16i8_2t(ptr %base, i64 %vl) {
; CHECK-LABEL: test_vlseg2_nxv2i64_triscv.vector.tuple_nxv16i8_2t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, ma
-; CHECK-NEXT: vlseg2e64.v v6, (a0)
+; CHECK-NEXT: vlseg2e64.v v8, (a0)
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 2) @llvm.riscv.vlseg2.triscv.vector.tuple_nxv16i8_2t(target("riscv.vector.tuple", <vscale x 16 x i8>, 2) undef, ptr %base, i64 %vl, i64 6)
- %1 = call <vscale x 2 x i64> @llvm.riscv.tuple.extract.nxv2i64.triscv.vector.tuple_nxv16i8_2t(target("riscv.vector.tuple", <vscale x 16 x i8>, 2) %0, i32 1)
- ret <vscale x 2 x i64> %1
+ ret target("riscv.vector.tuple", <vscale x 16 x i8>, 2) %0
}
-
-define <vscale x 2 x i64> @test_vlseg2_mask_nxv2i64_triscv.vector.tuple_nxv16i8_2t(ptr %base, i64 %vl, <vscale x 2 x i1> %mask) {
+define target("riscv.vector.tuple", <vscale x 16 x i8>, 2) @test_vlseg2_mask_nxv2i64_triscv.vector.tuple_nxv16i8_2t(ptr %base, i64 %vl, <vscale x 2 x i1> %mask) {
; CHECK-LABEL: test_vlseg2_mask_nxv2i64_triscv.vector.tuple_nxv16i8_2t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, ma
-; CHECK-NEXT: vlseg2e64.v v6, (a0), v0.t
+; CHECK-NEXT: vlseg2e64.v v8, (a0), v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 2) @llvm.riscv.vlseg2.mask.triscv.vector.tuple_nxv16i8_2t.nxv2i1(target("riscv.vector.tuple", <vscale x 16 x i8>, 2) undef, ptr %base, <vscale x 2 x i1> %mask, i64 %vl, i64 1, i64 6)
- %1 = call <vscale x 2 x i64> @llvm.riscv.tuple.extract.nxv2i64.triscv.vector.tuple_nxv16i8_2t(target("riscv.vector.tuple", <vscale x 16 x i8>, 2) %0, i32 1)
- ret <vscale x 2 x i64> %1
+ ret target("riscv.vector.tuple", <vscale x 16 x i8>, 2) %0
}
-
-declare target("riscv.vector.tuple", <vscale x 32 x i8>, 2) @llvm.riscv.vlseg2.mask.triscv.vector.tuple_nxv32i8_2t.nxv4i1(target("riscv.vector.tuple", <vscale x 32 x i8>, 2), ptr, <vscale x 4 x i1>, i64, i64, i64)
-
-define <vscale x 4 x i64> @test_vlseg2_nxv4i64_triscv.vector.tuple_nxv32i8_2t(ptr %base, i64 %vl) {
+define target("riscv.vector.tuple", <vscale x 32 x i8>, 2) @test_vlseg2_nxv4i64_triscv.vector.tuple_nxv32i8_2t(ptr %base, i64 %vl) {
; CHECK-LABEL: test_vlseg2_nxv4i64_triscv.vector.tuple_nxv32i8_2t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e64, m4, ta, ma
-; CHECK-NEXT: vlseg2e64.v v4, (a0)
+; CHECK-NEXT: vlseg2e64.v v8, (a0)
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 32 x i8>, 2) @llvm.riscv.vlseg2.triscv.vector.tuple_nxv32i8_2t(target("riscv.vector.tuple", <vscale x 32 x i8>, 2) undef, ptr %base, i64 %vl, i64 6)
- %1 = call <vscale x 4 x i64> @llvm.riscv.tuple.extract.nxv4i64.triscv.vector.tuple_nxv32i8_2t(target("riscv.vector.tuple", <vscale x 32 x i8>, 2) %0, i32 1)
- ret <vscale x 4 x i64> %1
+ ret target("riscv.vector.tuple", <vscale x 32 x i8>, 2) %0
}
-
-define <vscale x 4 x i64> @test_vlseg2_mask_nxv4i64_triscv.vector.tuple_nxv32i8_2t(ptr %base, i64 %vl, <vscale x 4 x i1> %mask) {
+define target("riscv.vector.tuple", <vscale x 32 x i8>, 2) @test_vlseg2_mask_nxv4i64_triscv.vector.tuple_nxv32i8_2t(ptr %base, i64 %vl, <vscale x 4 x i1> %mask) {
; CHECK-LABEL: test_vlseg2_mask_nxv4i64_triscv.vector.tuple_nxv32i8_2t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e64, m4, ta, ma
-; CHECK-NEXT: vlseg2e64.v v4, (a0), v0.t
+; CHECK-NEXT: vlseg2e64.v v8, (a0), v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 32 x i8>, 2) @llvm.riscv.vlseg2.mask.triscv.vector.tuple_nxv32i8_2t.nxv4i1(target("riscv.vector.tuple", <vscale x 32 x i8>, 2) undef, ptr %base, <vscale x 4 x i1> %mask, i64 %vl, i64 1, i64 6)
- %1 = call <vscale x 4 x i64> @llvm.riscv.tuple.extract.nxv4i64.triscv.vector.tuple_nxv32i8_2t(target("riscv.vector.tuple", <vscale x 32 x i8>, 2) %0, i32 1)
- ret <vscale x 4 x i64> %1
+ ret target("riscv.vector.tuple", <vscale x 32 x i8>, 2) %0
}
-
-declare target("riscv.vector.tuple", <vscale x 8 x i8>, 3) @llvm.riscv.vlseg3.mask.triscv.vector.tuple_nxv8i8_3t.nxv1i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 3), ptr, <vscale x 1 x i1>, i64, i64, i64)
-
-define <vscale x 1 x i64> @test_vlseg3_nxv1i64_triscv.vector.tuple_nxv8i8_3t(ptr %base, i64 %vl) {
+define target("riscv.vector.tuple", <vscale x 8 x i8>, 3) @test_vlseg3_nxv1i64_triscv.vector.tuple_nxv8i8_3t(ptr %base, i64 %vl) {
; CHECK-LABEL: test_vlseg3_nxv1i64_triscv.vector.tuple_nxv8i8_3t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, ma
-; CHECK-NEXT: vlseg3e64.v v7, (a0)
+; CHECK-NEXT: vlseg3e64.v v8, (a0)
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 3) @llvm.riscv.vlseg3.triscv.vector.tuple_nxv8i8_3t(target("riscv.vector.tuple", <vscale x 8 x i8>, 3) undef, ptr %base, i64 %vl, i64 6)
- %1 = call <vscale x 1 x i64> @llvm.riscv.tuple.extract.nxv1i64.triscv.vector.tuple_nxv8i8_3t(target("riscv.vector.tuple", <vscale x 8 x i8>, 3) %0, i32 1)
- ret <vscale x 1 x i64> %1
+ ret target("riscv.vector.tuple", <vscale x 8 x i8>, 3) %0
}
-
-define <vscale x 1 x i64> @test_vlseg3_mask_nxv1i64_triscv.vector.tuple_nxv8i8_3t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) {
+define target("riscv.vector.tuple", <vscale x 8 x i8>, 3) @test_vlseg3_mask_nxv1i64_triscv.vector.tuple_nxv8i8_3t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) {
; CHECK-LABEL: test_vlseg3_mask_nxv1i64_triscv.vector.tuple_nxv8i8_3t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, ma
-; CHECK-NEXT: vlseg3e64.v v7, (a0), v0.t
+; CHECK-NEXT: vlseg3e64.v v8, (a0), v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 3) @llvm.riscv.vlseg3.mask.triscv.vector.tuple_nxv8i8_3t.nxv1i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 3) undef, ptr %base, <vscale x 1 x i1> %mask, i64 %vl, i64 1, i64 6)
- %1 = call <vscale x 1 x i64> @llvm.riscv.tuple.extract.nxv1i64.triscv.vector.tuple_nxv8i8_3t(target("riscv.vector.tuple", <vscale x 8 x i8>, 3) %0, i32 1)
- ret <vscale x 1 x i64> %1
+ ret target("riscv.vector.tuple", <vscale x 8 x i8>, 3) %0
}
-
-declare target("riscv.vector.tuple", <vscale x 16 x i8>, 3) @llvm.riscv.vlseg3.mask.triscv.vector.tuple_nxv16i8_3t.nxv2i1(target("riscv.vector.tuple", <vscale x 16 x i8>, 3), ptr, <vscale x 2 x i1>, i64, i64, i64)
-
-define <vscale x 2 x i64> @test_vlseg3_nxv2i64_triscv.vector.tuple_nxv16i8_3t(ptr %base, i64 %vl) {
+define target("riscv.vector.tuple", <vscale x 16 x i8>, 3) @test_vlseg3_nxv2i64_triscv.vector.tuple_nxv16i8_3t(ptr %base, i64 %vl) {
; CHECK-LABEL: test_vlseg3_nxv2i64_triscv.vector.tuple_nxv16i8_3t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, ma
-; CHECK-NEXT: vlseg3e64.v v6, (a0)
+; CHECK-NEXT: vlseg3e64.v v8, (a0)
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 3) @llvm.riscv.vlseg3.triscv.vector.tuple_nxv16i8_3t(target("riscv.vector.tuple", <vscale x 16 x i8>, 3) undef, ptr %base, i64 %vl, i64 6)
- %1 = call <vscale x 2 x i64> @llvm.riscv.tuple.extract.nxv2i64.triscv.vector.tuple_nxv16i8_3t(target("riscv.vector.tuple", <vscale x 16 x i8>, 3) %0, i32 1)
- ret <vscale x 2 x i64> %1
+ ret target("riscv.vector.tuple", <vscale x 16 x i8>, 3) %0
}
-
-define <vscale x 2 x i64> @test_vlseg3_mask_nxv2i64_triscv.vector.tuple_nxv16i8_3t(ptr %base, i64 %vl, <vscale x 2 x i1> %mask) {
+define target("riscv.vector.tuple", <vscale x 16 x i8>, 3) @test_vlseg3_mask_nxv2i64_triscv.vector.tuple_nxv16i8_3t(ptr %base, i64 %vl, <vscale x 2 x i1> %mask) {
; CHECK-LABEL: test_vlseg3_mask_nxv2i64_triscv.vector.tuple_nxv16i8_3t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, ma
-; CHECK-NEXT: vlseg3e64.v v6, (a0), v0.t
+; CHECK-NEXT: vlseg3e64.v v8, (a0), v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 3) @llvm.riscv.vlseg3.mask.triscv.vector.tuple_nxv16i8_3t.nxv2i1(target("riscv.vector.tuple", <vscale x 16 x i8>, 3) undef, ptr %base, <vscale x 2 x i1> %mask, i64 %vl, i64 1, i64 6)
- %1 = call <vscale x 2 x i64> @llvm.riscv.tuple.extract.nxv2i64.triscv.vector.tuple_nxv16i8_3t(target("riscv.vector.tuple", <vscale x 16 x i8>, 3) %0, i32 1)
- ret <vscale x 2 x i64> %1
+ ret target("riscv.vector.tuple", <vscale x 16 x i8>, 3) %0
}
-
-declare target("riscv.vector.tuple", <vscale x 8 x i8>, 4) @llvm.riscv.vlseg4.mask.triscv.vector.tuple_nxv8i8_4t.nxv1i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 4), ptr, <vscale x 1 x i1>, i64, i64, i64)
-
-define <vscale x 1 x i64> @test_vlseg4_nxv1i64_triscv.vector.tuple_nxv8i8_4t(ptr %base, i64 %vl) {
+define target("riscv.vector.tuple", <vscale x 8 x i8>, 4) @test_vlseg4_nxv1i64_triscv.vector.tuple_nxv8i8_4t(ptr %base, i64 %vl) {
; CHECK-LABEL: test_vlseg4_nxv1i64_triscv.vector.tuple_nxv8i8_4t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, ma
-; CHECK-NEXT: vlseg4e64.v v7, (a0)
+; CHECK-NEXT: vlseg4e64.v v8, (a0)
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 4) @llvm.riscv.vlseg4.triscv.vector.tuple_nxv8i8_4t(target("riscv.vector.tuple", <vscale x 8 x i8>, 4) undef, ptr %base, i64 %vl, i64 6)
- %1 = call <vscale x 1 x i64> @llvm.riscv.tuple.extract.nxv1i64.triscv.vector.tuple_nxv8i8_4t(target("riscv.vector.tuple", <vscale x 8 x i8>, 4) %0, i32 1)
- ret <vscale x 1 x i64> %1
+ ret target("riscv.vector.tuple", <vscale x 8 x i8>, 4) %0
}
-
-define <vscale x 1 x i64> @test_vlseg4_mask_nxv1i64_triscv.vector.tuple_nxv8i8_4t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) {
+define target("riscv.vector.tuple", <vscale x 8 x i8>, 4) @test_vlseg4_mask_nxv1i64_triscv.vector.tuple_nxv8i8_4t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) {
; CHECK-LABEL: test_vlseg4_mask_nxv1i64_triscv.vector.tuple_nxv8i8_4t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, ma
-; CHECK-NEXT: vlseg4e64.v v7, (a0), v0.t
+; CHECK-NEXT: vlseg4e64.v v8, (a0), v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 4) @llvm.riscv.vlseg4.mask.triscv.vector.tuple_nxv8i8_4t.nxv1i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 4) undef, ptr %base, <vscale x 1 x i1> %mask, i64 %vl, i64 1, i64 6)
- %1 = call <vscale x 1 x i64> @llvm.riscv.tuple.extract.nxv1i64.triscv.vector.tuple_nxv8i8_4t(target("riscv.vector.tuple", <vscale x 8 x i8>, 4) %0, i32 1)
- ret <vscale x 1 x i64> %1
+ ret target("riscv.vector.tuple", <vscale x 8 x i8>, 4) %0
}
-
-declare target("riscv.vector.tuple", <vscale x 16 x i8>, 4) @llvm.riscv.vlseg4.mask.triscv.vector.tuple_nxv16i8_4t.nxv2i1(target("riscv.vector.tuple", <vscale x 16 x i8>, 4), ptr, <vscale x 2 x i1>, i64, i64, i64)
-
-define <vscale x 2 x i64> @test_vlseg4_nxv2i64_triscv.vector.tuple_nxv16i8_4t(ptr %base, i64 %vl) {
+define target("riscv.vector.tuple", <vscale x 16 x i8>, 4) @test_vlseg4_nxv2i64_triscv.vector.tuple_nxv16i8_4t(ptr %base, i64 %vl) {
; CHECK-LABEL: test_vlseg4_nxv2i64_triscv.vector.tuple_nxv16i8_4t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, ma
-; CHECK-NEXT: vlseg4e64.v v6, (a0)
+; CHECK-NEXT: vlseg4e64.v v8, (a0)
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 4) @llvm.riscv.vlseg4.triscv.vector.tuple_nxv16i8_4t(target("riscv.vector.tuple", <vscale x 16 x i8>, 4) undef, ptr %base, i64 %vl, i64 6)
- %1 = call <vscale x 2 x i64> @llvm.riscv.tuple.extract.nxv2i64.triscv.vector.tuple_nxv16i8_4t(target("riscv.vector.tuple", <vscale x 16 x i8>, 4) %0, i32 1)
- ret <vscale x 2 x i64> %1
+ ret target("riscv.vector.tuple", <vscale x 16 x i8>, 4) %0
}
-
-define <vscale x 2 x i64> @test_vlseg4_mask_nxv2i64_triscv.vector.tuple_nxv16i8_4t(ptr %base, i64 %vl, <vscale x 2 x i1> %mask) {
+define target("riscv.vector.tuple", <vscale x 16 x i8>, 4) @test_vlseg4_mask_nxv2i64_triscv.vector.tuple_nxv16i8_4t(ptr %base, i64 %vl, <vscale x 2 x i1> %mask) {
; CHECK-LABEL: test_vlseg4_mask_nxv2i64_triscv.vector.tuple_nxv16i8_4t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, ma
-; CHECK-NEXT: vlseg4e64.v v6, (a0), v0.t
+; CHECK-NEXT: vlseg4e64.v v8, (a0), v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 4) @llvm.riscv.vlseg4.mask.triscv.vector.tuple_nxv16i8_4t.nxv2i1(target("riscv.vector.tuple", <vscale x 16 x i8>, 4) undef, ptr %base, <vscale x 2 x i1> %mask, i64 %vl, i64 1, i64 6)
- %1 = call <vscale x 2 x i64> @llvm.riscv.tuple.extract.nxv2i64.triscv.vector.tuple_nxv16i8_4t(target("riscv.vector.tuple", <vscale x 16 x i8>, 4) %0, i32 1)
- ret <vscale x 2 x i64> %1
+ ret target("riscv.vector.tuple", <vscale x 16 x i8>, 4) %0
}
-
-declare target("riscv.vector.tuple", <vscale x 8 x i8>, 5) @llvm.riscv.vlseg5.mask.triscv.vector.tuple_nxv8i8_5t.nxv1i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 5), ptr, <vscale x 1 x i1>, i64, i64, i64)
-
-define <vscale x 1 x i64> @test_vlseg5_nxv1i64_triscv.vector.tuple_nxv8i8_5t(ptr %base, i64 %vl) {
+define target("riscv.vector.tuple", <vscale x 8 x i8>, 5) @test_vlseg5_nxv1i64_triscv.vector.tuple_nxv8i8_5t(ptr %base, i64 %vl) {
; CHECK-LABEL: test_vlseg5_nxv1i64_triscv.vector.tuple_nxv8i8_5t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, ma
-; CHECK-NEXT: vlseg5e64.v v7, (a0)
+; CHECK-NEXT: vlseg5e64.v v8, (a0)
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 5) @llvm.riscv.vlseg5.triscv.vector.tuple_nxv8i8_5t(target("riscv.vector.tuple", <vscale x 8 x i8>, 5) undef, ptr %base, i64 %vl, i64 6)
- %1 = call <vscale x 1 x i64> @llvm.riscv.tuple.extract.nxv1i64.triscv.vector.tuple_nxv8i8_5t(target("riscv.vector.tuple", <vscale x 8 x i8>, 5) %0, i32 1)
- ret <vscale x 1 x i64> %1
+ ret target("riscv.vector.tuple", <vscale x 8 x i8>, 5) %0
}
-
-define <vscale x 1 x i64> @test_vlseg5_mask_nxv1i64_triscv.vector.tuple_nxv8i8_5t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) {
+define target("riscv.vector.tuple", <vscale x 8 x i8>, 5) @test_vlseg5_mask_nxv1i64_triscv.vector.tuple_nxv8i8_5t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) {
; CHECK-LABEL: test_vlseg5_mask_nxv1i64_triscv.vector.tuple_nxv8i8_5t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, ma
-; CHECK-NEXT: vlseg5e64.v v7, (a0), v0.t
+; CHECK-NEXT: vlseg5e64.v v8, (a0), v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 5) @llvm.riscv.vlseg5.mask.triscv.vector.tuple_nxv8i8_5t.nxv1i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 5) undef, ptr %base, <vscale x 1 x i1> %mask, i64 %vl, i64 1, i64 6)
- %1 = call <vscale x 1 x i64> @llvm.riscv.tuple.extract.nxv1i64.triscv.vector.tuple_nxv8i8_5t(target("riscv.vector.tuple", <vscale x 8 x i8>, 5) %0, i32 1)
- ret <vscale x 1 x i64> %1
+ ret target("riscv.vector.tuple", <vscale x 8 x i8>, 5) %0
}
-
-declare target("riscv.vector.tuple", <vscale x 8 x i8>, 6) @llvm.riscv.vlseg6.mask.triscv.vector.tuple_nxv8i8_6t.nxv1i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 6), ptr, <vscale x 1 x i1>, i64, i64, i64)
-
-define <vscale x 1 x i64> @test_vlseg6_nxv1i64_triscv.vector.tuple_nxv8i8_6t(ptr %base, i64 %vl) {
+define target("riscv.vector.tuple", <vscale x 8 x i8>, 6) @test_vlseg6_nxv1i64_triscv.vector.tuple_nxv8i8_6t(ptr %base, i64 %vl) {
; CHECK-LABEL: test_vlseg6_nxv1i64_triscv.vector.tuple_nxv8i8_6t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, ma
-; CHECK-NEXT: vlseg6e64.v v7, (a0)
+; CHECK-NEXT: vlseg6e64.v v8, (a0)
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 6) @llvm.riscv.vlseg6.triscv.vector.tuple_nxv8i8_6t(target("riscv.vector.tuple", <vscale x 8 x i8>, 6) undef, ptr %base, i64 %vl, i64 6)
- %1 = call <vscale x 1 x i64> @llvm.riscv.tuple.extract.nxv1i64.triscv.vector.tuple_nxv8i8_6t(target("riscv.vector.tuple", <vscale x 8 x i8>, 6) %0, i32 1)
- ret <vscale x 1 x i64> %1
+ ret target("riscv.vector.tuple", <vscale x 8 x i8>, 6) %0
}
-
-define <vscale x 1 x i64> @test_vlseg6_mask_nxv1i64_triscv.vector.tuple_nxv8i8_6t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) {
+define target("riscv.vector.tuple", <vscale x 8 x i8>, 6) @test_vlseg6_mask_nxv1i64_triscv.vector.tuple_nxv8i8_6t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) {
; CHECK-LABEL: test_vlseg6_mask_nxv1i64_triscv.vector.tuple_nxv8i8_6t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, ma
-; CHECK-NEXT: vlseg6e64.v v7, (a0), v0.t
+; CHECK-NEXT: vlseg6e64.v v8, (a0), v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 6) @llvm.riscv.vlseg6.mask.triscv.vector.tuple_nxv8i8_6t.nxv1i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 6) undef, ptr %base, <vscale x 1 x i1> %mask, i64 %vl, i64 1, i64 6)
- %1 = call <vscale x 1 x i64> @llvm.riscv.tuple.extract.nxv1i64.triscv.vector.tuple_nxv8i8_6t(target("riscv.vector.tuple", <vscale x 8 x i8>, 6) %0, i32 1)
- ret <vscale x 1 x i64> %1
+ ret target("riscv.vector.tuple", <vscale x 8 x i8>, 6) %0
}
-
-declare target("riscv.vector.tuple", <vscale x 8 x i8>, 7) @llvm.riscv.vlseg7.mask.triscv.vector.tuple_nxv8i8_7t.nxv1i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 7), ptr, <vscale x 1 x i1>, i64, i64, i64)
-
-define <vscale x 1 x i64> @test_vlseg7_nxv1i64_triscv.vector.tuple_nxv8i8_7t(ptr %base, i64 %vl) {
+define target("riscv.vector.tuple", <vscale x 8 x i8>, 7) @test_vlseg7_nxv1i64_triscv.vector.tuple_nxv8i8_7t(ptr %base, i64 %vl) {
; CHECK-LABEL: test_vlseg7_nxv1i64_triscv.vector.tuple_nxv8i8_7t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, ma
-; CHECK-NEXT: vlseg7e64.v v7, (a0)
+; CHECK-NEXT: vlseg7e64.v v8, (a0)
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 7) @llvm.riscv.vlseg7.triscv.vector.tuple_nxv8i8_7t(target("riscv.vector.tuple", <vscale x 8 x i8>, 7) undef, ptr %base, i64 %vl, i64 6)
- %1 = call <vscale x 1 x i64> @llvm.riscv.tuple.extract.nxv1i64.triscv.vector.tuple_nxv8i8_7t(target("riscv.vector.tuple", <vscale x 8 x i8>, 7) %0, i32 1)
- ret <vscale x 1 x i64> %1
+ ret target("riscv.vector.tuple", <vscale x 8 x i8>, 7) %0
}
-
-define <vscale x 1 x i64> @test_vlseg7_mask_nxv1i64_triscv.vector.tuple_nxv8i8_7t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) {
+define target("riscv.vector.tuple", <vscale x 8 x i8>, 7) @test_vlseg7_mask_nxv1i64_triscv.vector.tuple_nxv8i8_7t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) {
; CHECK-LABEL: test_vlseg7_mask_nxv1i64_triscv.vector.tuple_nxv8i8_7t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, ma
-; CHECK-NEXT: vlseg7e64.v v7, (a0), v0.t
+; CHECK-NEXT: vlseg7e64.v v8, (a0), v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 7) @llvm.riscv.vlseg7.mask.triscv.vector.tuple_nxv8i8_7t.nxv1i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 7) undef, ptr %base, <vscale x 1 x i1> %mask, i64 %vl, i64 1, i64 6)
- %1 = call <vscale x 1 x i64> @llvm.riscv.tuple.extract.nxv1i64.triscv.vector.tuple_nxv8i8_7t(target("riscv.vector.tuple", <vscale x 8 x i8>, 7) %0, i32 1)
- ret <vscale x 1 x i64> %1
+ ret target("riscv.vector.tuple", <vscale x 8 x i8>, 7) %0
}
-
-declare target("riscv.vector.tuple", <vscale x 8 x i8>, 8) @llvm.riscv.vlseg8.mask.triscv.vector.tuple_nxv8i8_8t.nxv1i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 8), ptr, <vscale x 1 x i1>, i64, i64, i64)
-
-define <vscale x 1 x i64> @test_vlseg8_nxv1i64_triscv.vector.tuple_nxv8i8_8t(ptr %base, i64 %vl) {
+define target("riscv.vector.tuple", <vscale x 8 x i8>, 8) @test_vlseg8_nxv1i64_triscv.vector.tuple_nxv8i8_8t(ptr %base, i64 %vl) {
; CHECK-LABEL: test_vlseg8_nxv1i64_triscv.vector.tuple_nxv8i8_8t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, ma
-; CHECK-NEXT: vlseg8e64.v v7, (a0)
+; CHECK-NEXT: vlseg8e64.v v8, (a0)
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 8) @llvm.riscv.vlseg8.triscv.vector.tuple_nxv8i8_8t(target("riscv.vector.tuple", <vscale x 8 x i8>, 8) undef, ptr %base, i64 %vl, i64 6)
- %1 = call <vscale x 1 x i64> @llvm.riscv.tuple.extract.nxv1i64.triscv.vector.tuple_nxv8i8_8t(target("riscv.vector.tuple", <vscale x 8 x i8>, 8) %0, i32 1)
- ret <vscale x 1 x i64> %1
+ ret target("riscv.vector.tuple", <vscale x 8 x i8>, 8) %0
}
-
-define <vscale x 1 x i64> @test_vlseg8_mask_nxv1i64_triscv.vector.tuple_nxv8i8_8t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) {
+define target("riscv.vector.tuple", <vscale x 8 x i8>, 8) @test_vlseg8_mask_nxv1i64_triscv.vector.tuple_nxv8i8_8t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) {
; CHECK-LABEL: test_vlseg8_mask_nxv1i64_triscv.vector.tuple_nxv8i8_8t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, ma
-; CHECK-NEXT: vlseg8e64.v v7, (a0), v0.t
+; CHECK-NEXT: vlseg8e64.v v8, (a0), v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 8) @llvm.riscv.vlseg8.mask.triscv.vector.tuple_nxv8i8_8t.nxv1i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 8) undef, ptr %base, <vscale x 1 x i1> %mask, i64 %vl, i64 1, i64 6)
- %1 = call <vscale x 1 x i64> @llvm.riscv.tuple.extract.nxv1i64.triscv.vector.tuple_nxv8i8_8t(target("riscv.vector.tuple", <vscale x 8 x i8>, 8) %0, i32 1)
- ret <vscale x 1 x i64> %1
+ ret target("riscv.vector.tuple", <vscale x 8 x i8>, 8) %0
}
-
-
-define <vscale x 1 x half> @test_vlseg2_nxv1f16_triscv.vector.tuple_nxv2i8_2t(ptr %base, i64 %vl) {
+define target("riscv.vector.tuple", <vscale x 2 x i8>, 2) @test_vlseg2_nxv1f16_triscv.vector.tuple_nxv2i8_2t(ptr %base, i64 %vl) {
; CHECK-LABEL: test_vlseg2_nxv1f16_triscv.vector.tuple_nxv2i8_2t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma
-; CHECK-NEXT: vlseg2e16.v v7, (a0)
+; CHECK-NEXT: vlseg2e16.v v8, (a0)
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 2 x i8>, 2) @llvm.riscv.vlseg2.triscv.vector.tuple_nxv2i8_2t(target("riscv.vector.tuple", <vscale x 2 x i8>, 2) undef, ptr %base, i64 %vl, i64 4)
- %1 = call <vscale x 1 x half> @llvm.riscv.tuple.extract.nxv1f16.triscv.vector.tuple_nxv2i8_2t(target("riscv.vector.tuple", <vscale x 2 x i8>, 2) %0, i32 1)
- ret <vscale x 1 x half> %1
+ ret target("riscv.vector.tuple", <vscale x 2 x i8>, 2) %0
}
-
-define <vscale x 1 x half> @test_vlseg2_mask_nxv1f16_triscv.vector.tuple_nxv2i8_2t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) {
+define target("riscv.vector.tuple", <vscale x 2 x i8>, 2) @test_vlseg2_mask_nxv1f16_triscv.vector.tuple_nxv2i8_2t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) {
; CHECK-LABEL: test_vlseg2_mask_nxv1f16_triscv.vector.tuple_nxv2i8_2t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma
-; CHECK-NEXT: vlseg2e16.v v7, (a0), v0.t
+; CHECK-NEXT: vlseg2e16.v v8, (a0), v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 2 x i8>, 2) @llvm.riscv.vlseg2.mask.triscv.vector.tuple_nxv2i8_2t.nxv1i1(target("riscv.vector.tuple", <vscale x 2 x i8>, 2) undef, ptr %base, <vscale x 1 x i1> %mask, i64 %vl, i64 1, i64 4)
- %1 = call <vscale x 1 x half> @llvm.riscv.tuple.extract.nxv1f16.triscv.vector.tuple_nxv2i8_2t(target("riscv.vector.tuple", <vscale x 2 x i8>, 2) %0, i32 1)
- ret <vscale x 1 x half> %1
+ ret target("riscv.vector.tuple", <vscale x 2 x i8>, 2) %0
}
-
-
-define <vscale x 2 x half> @test_vlseg2_nxv2f16_triscv.vector.tuple_nxv4i8_2t(ptr %base, i64 %vl) {
+define target("riscv.vector.tuple", <vscale x 4 x i8>, 2) @test_vlseg2_nxv2f16_triscv.vector.tuple_nxv4i8_2t(ptr %base, i64 %vl) {
; CHECK-LABEL: test_vlseg2_nxv2f16_triscv.vector.tuple_nxv4i8_2t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma
-; CHECK-NEXT: vlseg2e16.v v7, (a0)
+; CHECK-NEXT: vlseg2e16.v v8, (a0)
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 2) @llvm.riscv.vlseg2.triscv.vector.tuple_nxv4i8_2t(target("riscv.vector.tuple", <vscale x 4 x i8>, 2) undef, ptr %base, i64 %vl, i64 4)
- %1 = call <vscale x 2 x half> @llvm.riscv.tuple.extract.nxv2f16.triscv.vector.tuple_nxv4i8_2t(target("riscv.vector.tuple", <vscale x 4 x i8>, 2) %0, i32 1)
- ret <vscale x 2 x half> %1
+ ret target("riscv.vector.tuple", <vscale x 4 x i8>, 2) %0
}
-
-define <vscale x 2 x half> @test_vlseg2_mask_nxv2f16_triscv.vector.tuple_nxv4i8_2t(ptr %base, i64 %vl, <vscale x 2 x i1> %mask) {
+define target("riscv.vector.tuple", <vscale x 4 x i8>, 2) @test_vlseg2_mask_nxv2f16_triscv.vector.tuple_nxv4i8_2t(ptr %base, i64 %vl, <vscale x 2 x i1> %mask) {
; CHECK-LABEL: test_vlseg2_mask_nxv2f16_triscv.vector.tuple_nxv4i8_2t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma
-; CHECK-NEXT: vlseg2e16.v v7, (a0), v0.t
+; CHECK-NEXT: vlseg2e16.v v8, (a0), v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 2) @llvm.riscv.vlseg2.mask.triscv.vector.tuple_nxv4i8_2t.nxv2i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 2) undef, ptr %base, <vscale x 2 x i1> %mask, i64 %vl, i64 1, i64 4)
- %1 = call <vscale x 2 x half> @llvm.riscv.tuple.extract.nxv2f16.triscv.vector.tuple_nxv4i8_2t(target("riscv.vector.tuple", <vscale x 4 x i8>, 2) %0, i32 1)
- ret <vscale x 2 x half> %1
+ ret target("riscv.vector.tuple", <vscale x 4 x i8>, 2) %0
}
-
-
-define <vscale x 4 x half> @test_vlseg2_nxv4f16_triscv.vector.tuple_nxv8i8_2t(ptr %base, i64 %vl) {
+define target("riscv.vector.tuple", <vscale x 8 x i8>, 2) @test_vlseg2_nxv4f16_triscv.vector.tuple_nxv8i8_2t(ptr %base, i64 %vl) {
; CHECK-LABEL: test_vlseg2_nxv4f16_triscv.vector.tuple_nxv8i8_2t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma
-; CHECK-NEXT: vlseg2e16.v v7, (a0)
+; CHECK-NEXT: vlseg2e16.v v8, (a0)
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 2) @llvm.riscv.vlseg2.triscv.vector.tuple_nxv8i8_2t(target("riscv.vector.tuple", <vscale x 8 x i8>, 2) undef, ptr %base, i64 %vl, i64 4)
- %1 = call <vscale x 4 x half> @llvm.riscv.tuple.extract.nxv4f16.triscv.vector.tuple_nxv8i8_2t(target("riscv.vector.tuple", <vscale x 8 x i8>, 2) %0, i32 1)
- ret <vscale x 4 x half> %1
+ ret target("riscv.vector.tuple", <vscale x 8 x i8>, 2) %0
}
-
-define <vscale x 4 x half> @test_vlseg2_mask_nxv4f16_triscv.vector.tuple_nxv8i8_2t(ptr %base, i64 %vl, <vscale x 4 x i1> %mask) {
+define target("riscv.vector.tuple", <vscale x 8 x i8>, 2) @test_vlseg2_mask_nxv4f16_triscv.vector.tuple_nxv8i8_2t(ptr %base, i64 %vl, <vscale x 4 x i1> %mask) {
; CHECK-LABEL: test_vlseg2_mask_nxv4f16_triscv.vector.tuple_nxv8i8_2t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma
-; CHECK-NEXT: vlseg2e16.v v7, (a0), v0.t
+; CHECK-NEXT: vlseg2e16.v v8, (a0), v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 2) @llvm.riscv.vlseg2.mask.triscv.vector.tuple_nxv8i8_2t.nxv4i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 2) undef, ptr %base, <vscale x 4 x i1> %mask, i64 %vl, i64 1, i64 4)
- %1 = call <vscale x 4 x half> @llvm.riscv.tuple.extract.nxv4f16.triscv.vector.tuple_nxv8i8_2t(target("riscv.vector.tuple", <vscale x 8 x i8>, 2) %0, i32 1)
- ret <vscale x 4 x half> %1
+ ret target("riscv.vector.tuple", <vscale x 8 x i8>, 2) %0
}
-
-
-define <vscale x 8 x half> @test_vlseg2_nxv8f16_triscv.vector.tuple_nxv16i8_2t(ptr %base, i64 %vl) {
+define target("riscv.vector.tuple", <vscale x 16 x i8>, 2) @test_vlseg2_nxv8f16_triscv.vector.tuple_nxv16i8_2t(ptr %base, i64 %vl) {
; CHECK-LABEL: test_vlseg2_nxv8f16_triscv.vector.tuple_nxv16i8_2t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, ma
-; CHECK-NEXT: vlseg2e16.v v6, (a0)
+; CHECK-NEXT: vlseg2e16.v v8, (a0)
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 2) @llvm.riscv.vlseg2.triscv.vector.tuple_nxv16i8_2t(target("riscv.vector.tuple", <vscale x 16 x i8>, 2) undef, ptr %base, i64 %vl, i64 4)
- %1 = call <vscale x 8 x half> @llvm.riscv.tuple.extract.nxv8f16.triscv.vector.tuple_nxv16i8_2t(target("riscv.vector.tuple", <vscale x 16 x i8>, 2) %0, i32 1)
- ret <vscale x 8 x half> %1
+ ret target("riscv.vector.tuple", <vscale x 16 x i8>, 2) %0
}
-
-define <vscale x 8 x half> @test_vlseg2_mask_nxv8f16_triscv.vector.tuple_nxv16i8_2t(ptr %base, i64 %vl, <vscale x 8 x i1> %mask) {
+define target("riscv.vector.tuple", <vscale x 16 x i8>, 2) @test_vlseg2_mask_nxv8f16_triscv.vector.tuple_nxv16i8_2t(ptr %base, i64 %vl, <vscale x 8 x i1> %mask) {
; CHECK-LABEL: test_vlseg2_mask_nxv8f16_triscv.vector.tuple_nxv16i8_2t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, ma
-; CHECK-NEXT: vlseg2e16.v v6, (a0), v0.t
+; CHECK-NEXT: vlseg2e16.v v8, (a0), v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 2) @llvm.riscv.vlseg2.mask.triscv.vector.tuple_nxv16i8_2t.nxv8i1(target("riscv.vector.tuple", <vscale x 16 x i8>, 2) undef, ptr %base, <vscale x 8 x i1> %mask, i64 %vl, i64 1, i64 4)
- %1 = call <vscale x 8 x half> @llvm.riscv.tuple.extract.nxv8f16.triscv.vector.tuple_nxv16i8_2t(target("riscv.vector.tuple", <vscale x 16 x i8>, 2) %0, i32 1)
- ret <vscale x 8 x half> %1
+ ret target("riscv.vector.tuple", <vscale x 16 x i8>, 2) %0
}
-
-
-define <vscale x 16 x half> @test_vlseg2_nxv16f16_triscv.vector.tuple_nxv32i8_2t(ptr %base, i64 %vl) {
+define target("riscv.vector.tuple", <vscale x 32 x i8>, 2) @test_vlseg2_nxv16f16_triscv.vector.tuple_nxv32i8_2t(ptr %base, i64 %vl) {
; CHECK-LABEL: test_vlseg2_nxv16f16_triscv.vector.tuple_nxv32i8_2t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma
-; CHECK-NEXT: vlseg2e16.v v4, (a0)
+; CHECK-NEXT: vlseg2e16.v v8, (a0)
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 32 x i8>, 2) @llvm.riscv.vlseg2.triscv.vector.tuple_nxv32i8_2t(target("riscv.vector.tuple", <vscale x 32 x i8>, 2) undef, ptr %base, i64 %vl, i64 4)
- %1 = call <vscale x 16 x half> @llvm.riscv.tuple.extract.nxv16f16.triscv.vector.tuple_nxv32i8_2t(target("riscv.vector.tuple", <vscale x 32 x i8>, 2) %0, i32 1)
- ret <vscale x 16 x half> %1
+ ret target("riscv.vector.tuple", <vscale x 32 x i8>, 2) %0
}
-
-define <vscale x 16 x half> @test_vlseg2_mask_nxv16f16_triscv.vector.tuple_nxv32i8_2t(ptr %base, i64 %vl, <vscale x 16 x i1> %mask) {
+define target("riscv.vector.tuple", <vscale x 32 x i8>, 2) @test_vlseg2_mask_nxv16f16_triscv.vector.tuple_nxv32i8_2t(ptr %base, i64 %vl, <vscale x 16 x i1> %mask) {
; CHECK-LABEL: test_vlseg2_mask_nxv16f16_triscv.vector.tuple_nxv32i8_2t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma
-; CHECK-NEXT: vlseg2e16.v v4, (a0), v0.t
+; CHECK-NEXT: vlseg2e16.v v8, (a0), v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 32 x i8>, 2) @llvm.riscv.vlseg2.mask.triscv.vector.tuple_nxv32i8_2t.nxv16i1(target("riscv.vector.tuple", <vscale x 32 x i8>, 2) undef, ptr %base, <vscale x 16 x i1> %mask, i64 %vl, i64 1, i64 4)
- %1 = call <vscale x 16 x half> @llvm.riscv.tuple.extract.nxv16f16.triscv.vector.tuple_nxv32i8_2t(target("riscv.vector.tuple", <vscale x 32 x i8>, 2) %0, i32 1)
- ret <vscale x 16 x half> %1
+ ret target("riscv.vector.tuple", <vscale x 32 x i8>, 2) %0
}
-
-
-define <vscale x 1 x half> @test_vlseg3_nxv1f16_triscv.vector.tuple_nxv2i8_3t(ptr %base, i64 %vl) {
+define target("riscv.vector.tuple", <vscale x 2 x i8>, 3) @test_vlseg3_nxv1f16_triscv.vector.tuple_nxv2i8_3t(ptr %base, i64 %vl) {
; CHECK-LABEL: test_vlseg3_nxv1f16_triscv.vector.tuple_nxv2i8_3t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma
-; CHECK-NEXT: vlseg3e16.v v7, (a0)
+; CHECK-NEXT: vlseg3e16.v v8, (a0)
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 2 x i8>, 3) @llvm.riscv.vlseg3.triscv.vector.tuple_nxv2i8_3t(target("riscv.vector.tuple", <vscale x 2 x i8>, 3) undef, ptr %base, i64 %vl, i64 4)
- %1 = call <vscale x 1 x half> @llvm.riscv.tuple.extract.nxv1f16.triscv.vector.tuple_nxv2i8_3t(target("riscv.vector.tuple", <vscale x 2 x i8>, 3) %0, i32 1)
- ret <vscale x 1 x half> %1
+ ret target("riscv.vector.tuple", <vscale x 2 x i8>, 3) %0
}
-
-define <vscale x 1 x half> @test_vlseg3_mask_nxv1f16_triscv.vector.tuple_nxv2i8_3t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) {
+define target("riscv.vector.tuple", <vscale x 2 x i8>, 3) @test_vlseg3_mask_nxv1f16_triscv.vector.tuple_nxv2i8_3t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) {
; CHECK-LABEL: test_vlseg3_mask_nxv1f16_triscv.vector.tuple_nxv2i8_3t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma
-; CHECK-NEXT: vlseg3e16.v v7, (a0), v0.t
+; CHECK-NEXT: vlseg3e16.v v8, (a0), v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 2 x i8>, 3) @llvm.riscv.vlseg3.mask.triscv.vector.tuple_nxv2i8_3t.nxv1i1(target("riscv.vector.tuple", <vscale x 2 x i8>, 3) undef, ptr %base, <vscale x 1 x i1> %mask, i64 %vl, i64 1, i64 4)
- %1 = call <vscale x 1 x half> @llvm.riscv.tuple.extract.nxv1f16.triscv.vector.tuple_nxv2i8_3t(target("riscv.vector.tuple", <vscale x 2 x i8>, 3) %0, i32 1)
- ret <vscale x 1 x half> %1
+ ret target("riscv.vector.tuple", <vscale x 2 x i8>, 3) %0
}
-
-
-define <vscale x 2 x half> @test_vlseg3_nxv2f16_triscv.vector.tuple_nxv4i8_3t(ptr %base, i64 %vl) {
+define target("riscv.vector.tuple", <vscale x 4 x i8>, 3) @test_vlseg3_nxv2f16_triscv.vector.tuple_nxv4i8_3t(ptr %base, i64 %vl) {
; CHECK-LABEL: test_vlseg3_nxv2f16_triscv.vector.tuple_nxv4i8_3t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma
-; CHECK-NEXT: vlseg3e16.v v7, (a0)
+; CHECK-NEXT: vlseg3e16.v v8, (a0)
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 3) @llvm.riscv.vlseg3.triscv.vector.tuple_nxv4i8_3t(target("riscv.vector.tuple", <vscale x 4 x i8>, 3) undef, ptr %base, i64 %vl, i64 4)
- %1 = call <vscale x 2 x half> @llvm.riscv.tuple.extract.nxv2f16.triscv.vector.tuple_nxv4i8_3t(target("riscv.vector.tuple", <vscale x 4 x i8>, 3) %0, i32 1)
- ret <vscale x 2 x half> %1
+ ret target("riscv.vector.tuple", <vscale x 4 x i8>, 3) %0
}
-
-define <vscale x 2 x half> @test_vlseg3_mask_nxv2f16_triscv.vector.tuple_nxv4i8_3t(ptr %base, i64 %vl, <vscale x 2 x i1> %mask) {
+define target("riscv.vector.tuple", <vscale x 4 x i8>, 3) @test_vlseg3_mask_nxv2f16_triscv.vector.tuple_nxv4i8_3t(ptr %base, i64 %vl, <vscale x 2 x i1> %mask) {
; CHECK-LABEL: test_vlseg3_mask_nxv2f16_triscv.vector.tuple_nxv4i8_3t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma
-; CHECK-NEXT: vlseg3e16.v v7, (a0), v0.t
+; CHECK-NEXT: vlseg3e16.v v8, (a0), v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 3) @llvm.riscv.vlseg3.mask.triscv.vector.tuple_nxv4i8_3t.nxv2i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 3) undef, ptr %base, <vscale x 2 x i1> %mask, i64 %vl, i64 1, i64 4)
- %1 = call <vscale x 2 x half> @llvm.riscv.tuple.extract.nxv2f16.triscv.vector.tuple_nxv4i8_3t(target("riscv.vector.tuple", <vscale x 4 x i8>, 3) %0, i32 1)
- ret <vscale x 2 x half> %1
+ ret target("riscv.vector.tuple", <vscale x 4 x i8>, 3) %0
}
-
-
-define <vscale x 4 x half> @test_vlseg3_nxv4f16_triscv.vector.tuple_nxv8i8_3t(ptr %base, i64 %vl) {
+define target("riscv.vector.tuple", <vscale x 8 x i8>, 3) @test_vlseg3_nxv4f16_triscv.vector.tuple_nxv8i8_3t(ptr %base, i64 %vl) {
; CHECK-LABEL: test_vlseg3_nxv4f16_triscv.vector.tuple_nxv8i8_3t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma
-; CHECK-NEXT: vlseg3e16.v v7, (a0)
+; CHECK-NEXT: vlseg3e16.v v8, (a0)
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 3) @llvm.riscv.vlseg3.triscv.vector.tuple_nxv8i8_3t(target("riscv.vector.tuple", <vscale x 8 x i8>, 3) undef, ptr %base, i64 %vl, i64 4)
- %1 = call <vscale x 4 x half> @llvm.riscv.tuple.extract.nxv4f16.triscv.vector.tuple_nxv8i8_3t(target("riscv.vector.tuple", <vscale x 8 x i8>, 3) %0, i32 1)
- ret <vscale x 4 x half> %1
+ ret target("riscv.vector.tuple", <vscale x 8 x i8>, 3) %0
}
-
-define <vscale x 4 x half> @test_vlseg3_mask_nxv4f16_triscv.vector.tuple_nxv8i8_3t(ptr %base, i64 %vl, <vscale x 4 x i1> %mask) {
+define target("riscv.vector.tuple", <vscale x 8 x i8>, 3) @test_vlseg3_mask_nxv4f16_triscv.vector.tuple_nxv8i8_3t(ptr %base, i64 %vl, <vscale x 4 x i1> %mask) {
; CHECK-LABEL: test_vlseg3_mask_nxv4f16_triscv.vector.tuple_nxv8i8_3t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma
-; CHECK-NEXT: vlseg3e16.v v7, (a0), v0.t
+; CHECK-NEXT: vlseg3e16.v v8, (a0), v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 3) @llvm.riscv.vlseg3.mask.triscv.vector.tuple_nxv8i8_3t.nxv4i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 3) undef, ptr %base, <vscale x 4 x i1> %mask, i64 %vl, i64 1, i64 4)
- %1 = call <vscale x 4 x half> @llvm.riscv.tuple.extract.nxv4f16.triscv.vector.tuple_nxv8i8_3t(target("riscv.vector.tuple", <vscale x 8 x i8>, 3) %0, i32 1)
- ret <vscale x 4 x half> %1
+ ret target("riscv.vector.tuple", <vscale x 8 x i8>, 3) %0
}
-
-
-define <vscale x 8 x half> @test_vlseg3_nxv8f16_triscv.vector.tuple_nxv16i8_3t(ptr %base, i64 %vl) {
+define target("riscv.vector.tuple", <vscale x 16 x i8>, 3) @test_vlseg3_nxv8f16_triscv.vector.tuple_nxv16i8_3t(ptr %base, i64 %vl) {
; CHECK-LABEL: test_vlseg3_nxv8f16_triscv.vector.tuple_nxv16i8_3t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, ma
-; CHECK-NEXT: vlseg3e16.v v6, (a0)
+; CHECK-NEXT: vlseg3e16.v v8, (a0)
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 3) @llvm.riscv.vlseg3.triscv.vector.tuple_nxv16i8_3t(target("riscv.vector.tuple", <vscale x 16 x i8>, 3) undef, ptr %base, i64 %vl, i64 4)
- %1 = call <vscale x 8 x half> @llvm.riscv.tuple.extract.nxv8f16.triscv.vector.tuple_nxv16i8_3t(target("riscv.vector.tuple", <vscale x 16 x i8>, 3) %0, i32 1)
- ret <vscale x 8 x half> %1
+ ret target("riscv.vector.tuple", <vscale x 16 x i8>, 3) %0
}
-
-define <vscale x 8 x half> @test_vlseg3_mask_nxv8f16_triscv.vector.tuple_nxv16i8_3t(ptr %base, i64 %vl, <vscale x 8 x i1> %mask) {
+define target("riscv.vector.tuple", <vscale x 16 x i8>, 3) @test_vlseg3_mask_nxv8f16_triscv.vector.tuple_nxv16i8_3t(ptr %base, i64 %vl, <vscale x 8 x i1> %mask) {
; CHECK-LABEL: test_vlseg3_mask_nxv8f16_triscv.vector.tuple_nxv16i8_3t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, ma
-; CHECK-NEXT: vlseg3e16.v v6, (a0), v0.t
+; CHECK-NEXT: vlseg3e16.v v8, (a0), v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 3) @llvm.riscv.vlseg3.mask.triscv.vector.tuple_nxv16i8_3t.nxv8i1(target("riscv.vector.tuple", <vscale x 16 x i8>, 3) undef, ptr %base, <vscale x 8 x i1> %mask, i64 %vl, i64 1, i64 4)
- %1 = call <vscale x 8 x half> @llvm.riscv.tuple.extract.nxv8f16.triscv.vector.tuple_nxv16i8_3t(target("riscv.vector.tuple", <vscale x 16 x i8>, 3) %0, i32 1)
- ret <vscale x 8 x half> %1
+ ret target("riscv.vector.tuple", <vscale x 16 x i8>, 3) %0
}
-
-
-define <vscale x 1 x half> @test_vlseg4_nxv1f16_triscv.vector.tuple_nxv2i8_4t(ptr %base, i64 %vl) {
+define target("riscv.vector.tuple", <vscale x 2 x i8>, 4) @test_vlseg4_nxv1f16_triscv.vector.tuple_nxv2i8_4t(ptr %base, i64 %vl) {
; CHECK-LABEL: test_vlseg4_nxv1f16_triscv.vector.tuple_nxv2i8_4t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma
-; CHECK-NEXT: vlseg4e16.v v7, (a0)
+; CHECK-NEXT: vlseg4e16.v v8, (a0)
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 2 x i8>, 4) @llvm.riscv.vlseg4.triscv.vector.tuple_nxv2i8_4t(target("riscv.vector.tuple", <vscale x 2 x i8>, 4) undef, ptr %base, i64 %vl, i64 4)
- %1 = call <vscale x 1 x half> @llvm.riscv.tuple.extract.nxv1f16.triscv.vector.tuple_nxv2i8_4t(target("riscv.vector.tuple", <vscale x 2 x i8>, 4) %0, i32 1)
- ret <vscale x 1 x half> %1
+ ret target("riscv.vector.tuple", <vscale x 2 x i8>, 4) %0
}
-
-define <vscale x 1 x half> @test_vlseg4_mask_nxv1f16_triscv.vector.tuple_nxv2i8_4t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) {
+define target("riscv.vector.tuple", <vscale x 2 x i8>, 4) @test_vlseg4_mask_nxv1f16_triscv.vector.tuple_nxv2i8_4t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) {
; CHECK-LABEL: test_vlseg4_mask_nxv1f16_triscv.vector.tuple_nxv2i8_4t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma
-; CHECK-NEXT: vlseg4e16.v v7, (a0), v0.t
+; CHECK-NEXT: vlseg4e16.v v8, (a0), v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 2 x i8>, 4) @llvm.riscv.vlseg4.mask.triscv.vector.tuple_nxv2i8_4t.nxv1i1(target("riscv.vector.tuple", <vscale x 2 x i8>, 4) undef, ptr %base, <vscale x 1 x i1> %mask, i64 %vl, i64 1, i64 4)
- %1 = call <vscale x 1 x half> @llvm.riscv.tuple.extract.nxv1f16.triscv.vector.tuple_nxv2i8_4t(target("riscv.vector.tuple", <vscale x 2 x i8>, 4) %0, i32 1)
- ret <vscale x 1 x half> %1
+ ret target("riscv.vector.tuple", <vscale x 2 x i8>, 4) %0
}
-
-
-define <vscale x 2 x half> @test_vlseg4_nxv2f16_triscv.vector.tuple_nxv4i8_4t(ptr %base, i64 %vl) {
+define target("riscv.vector.tuple", <vscale x 4 x i8>, 4) @test_vlseg4_nxv2f16_triscv.vector.tuple_nxv4i8_4t(ptr %base, i64 %vl) {
; CHECK-LABEL: test_vlseg4_nxv2f16_triscv.vector.tuple_nxv4i8_4t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma
-; CHECK-NEXT: vlseg4e16.v v7, (a0)
+; CHECK-NEXT: vlseg4e16.v v8, (a0)
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 4) @llvm.riscv.vlseg4.triscv.vector.tuple_nxv4i8_4t(target("riscv.vector.tuple", <vscale x 4 x i8>, 4) undef, ptr %base, i64 %vl, i64 4)
- %1 = call <vscale x 2 x half> @llvm.riscv.tuple.extract.nxv2f16.triscv.vector.tuple_nxv4i8_4t(target("riscv.vector.tuple", <vscale x 4 x i8>, 4) %0, i32 1)
- ret <vscale x 2 x half> %1
+ ret target("riscv.vector.tuple", <vscale x 4 x i8>, 4) %0
}
-
-define <vscale x 2 x half> @test_vlseg4_mask_nxv2f16_triscv.vector.tuple_nxv4i8_4t(ptr %base, i64 %vl, <vscale x 2 x i1> %mask) {
+define target("riscv.vector.tuple", <vscale x 4 x i8>, 4) @test_vlseg4_mask_nxv2f16_triscv.vector.tuple_nxv4i8_4t(ptr %base, i64 %vl, <vscale x 2 x i1> %mask) {
; CHECK-LABEL: test_vlseg4_mask_nxv2f16_triscv.vector.tuple_nxv4i8_4t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma
-; CHECK-NEXT: vlseg4e16.v v7, (a0), v0.t
+; CHECK-NEXT: vlseg4e16.v v8, (a0), v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 4) @llvm.riscv.vlseg4.mask.triscv.vector.tuple_nxv4i8_4t.nxv2i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 4) undef, ptr %base, <vscale x 2 x i1> %mask, i64 %vl, i64 1, i64 4)
- %1 = call <vscale x 2 x half> @llvm.riscv.tuple.extract.nxv2f16.triscv.vector.tuple_nxv4i8_4t(target("riscv.vector.tuple", <vscale x 4 x i8>, 4) %0, i32 1)
- ret <vscale x 2 x half> %1
+ ret target("riscv.vector.tuple", <vscale x 4 x i8>, 4) %0
}
-
-
-define <vscale x 4 x half> @test_vlseg4_nxv4f16_triscv.vector.tuple_nxv8i8_4t(ptr %base, i64 %vl) {
+define target("riscv.vector.tuple", <vscale x 8 x i8>, 4) @test_vlseg4_nxv4f16_triscv.vector.tuple_nxv8i8_4t(ptr %base, i64 %vl) {
; CHECK-LABEL: test_vlseg4_nxv4f16_triscv.vector.tuple_nxv8i8_4t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma
-; CHECK-NEXT: vlseg4e16.v v7, (a0)
+; CHECK-NEXT: vlseg4e16.v v8, (a0)
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 4) @llvm.riscv.vlseg4.triscv.vector.tuple_nxv8i8_4t(target("riscv.vector.tuple", <vscale x 8 x i8>, 4) undef, ptr %base, i64 %vl, i64 4)
- %1 = call <vscale x 4 x half> @llvm.riscv.tuple.extract.nxv4f16.triscv.vector.tuple_nxv8i8_4t(target("riscv.vector.tuple", <vscale x 8 x i8>, 4) %0, i32 1)
- ret <vscale x 4 x half> %1
+ ret target("riscv.vector.tuple", <vscale x 8 x i8>, 4) %0
}
-
-define <vscale x 4 x half> @test_vlseg4_mask_nxv4f16_triscv.vector.tuple_nxv8i8_4t(ptr %base, i64 %vl, <vscale x 4 x i1> %mask) {
+define target("riscv.vector.tuple", <vscale x 8 x i8>, 4) @test_vlseg4_mask_nxv4f16_triscv.vector.tuple_nxv8i8_4t(ptr %base, i64 %vl, <vscale x 4 x i1> %mask) {
; CHECK-LABEL: test_vlseg4_mask_nxv4f16_triscv.vector.tuple_nxv8i8_4t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma
-; CHECK-NEXT: vlseg4e16.v v7, (a0), v0.t
+; CHECK-NEXT: vlseg4e16.v v8, (a0), v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 4) @llvm.riscv.vlseg4.mask.triscv.vector.tuple_nxv8i8_4t.nxv4i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 4) undef, ptr %base, <vscale x 4 x i1> %mask, i64 %vl, i64 1, i64 4)
- %1 = call <vscale x 4 x half> @llvm.riscv.tuple.extract.nxv4f16.triscv.vector.tuple_nxv8i8_4t(target("riscv.vector.tuple", <vscale x 8 x i8>, 4) %0, i32 1)
- ret <vscale x 4 x half> %1
+ ret target("riscv.vector.tuple", <vscale x 8 x i8>, 4) %0
}
-
-
-define <vscale x 8 x half> @test_vlseg4_nxv8f16_triscv.vector.tuple_nxv16i8_4t(ptr %base, i64 %vl) {
+define target("riscv.vector.tuple", <vscale x 16 x i8>, 4) @test_vlseg4_nxv8f16_triscv.vector.tuple_nxv16i8_4t(ptr %base, i64 %vl) {
; CHECK-LABEL: test_vlseg4_nxv8f16_triscv.vector.tuple_nxv16i8_4t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, ma
-; CHECK-NEXT: vlseg4e16.v v6, (a0)
+; CHECK-NEXT: vlseg4e16.v v8, (a0)
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 4) @llvm.riscv.vlseg4.triscv.vector.tuple_nxv16i8_4t(target("riscv.vector.tuple", <vscale x 16 x i8>, 4) undef, ptr %base, i64 %vl, i64 4)
- %1 = call <vscale x 8 x half> @llvm.riscv.tuple.extract.nxv8f16.triscv.vector.tuple_nxv16i8_4t(target("riscv.vector.tuple", <vscale x 16 x i8>, 4) %0, i32 1)
- ret <vscale x 8 x half> %1
+ ret target("riscv.vector.tuple", <vscale x 16 x i8>, 4) %0
}
-
-define <vscale x 8 x half> @test_vlseg4_mask_nxv8f16_triscv.vector.tuple_nxv16i8_4t(ptr %base, i64 %vl, <vscale x 8 x i1> %mask) {
+define target("riscv.vector.tuple", <vscale x 16 x i8>, 4) @test_vlseg4_mask_nxv8f16_triscv.vector.tuple_nxv16i8_4t(ptr %base, i64 %vl, <vscale x 8 x i1> %mask) {
; CHECK-LABEL: test_vlseg4_mask_nxv8f16_triscv.vector.tuple_nxv16i8_4t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, ma
-; CHECK-NEXT: vlseg4e16.v v6, (a0), v0.t
+; CHECK-NEXT: vlseg4e16.v v8, (a0), v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 4) @llvm.riscv.vlseg4.mask.triscv.vector.tuple_nxv16i8_4t.nxv8i1(target("riscv.vector.tuple", <vscale x 16 x i8>, 4) undef, ptr %base, <vscale x 8 x i1> %mask, i64 %vl, i64 1, i64 4)
- %1 = call <vscale x 8 x half> @llvm.riscv.tuple.extract.nxv8f16.triscv.vector.tuple_nxv16i8_4t(target("riscv.vector.tuple", <vscale x 16 x i8>, 4) %0, i32 1)
- ret <vscale x 8 x half> %1
+ ret target("riscv.vector.tuple", <vscale x 16 x i8>, 4) %0
}
-
-
-define <vscale x 1 x half> @test_vlseg5_nxv1f16_triscv.vector.tuple_nxv2i8_5t(ptr %base, i64 %vl) {
+define target("riscv.vector.tuple", <vscale x 2 x i8>, 5) @test_vlseg5_nxv1f16_triscv.vector.tuple_nxv2i8_5t(ptr %base, i64 %vl) {
; CHECK-LABEL: test_vlseg5_nxv1f16_triscv.vector.tuple_nxv2i8_5t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma
-; CHECK-NEXT: vlseg5e16.v v7, (a0)
+; CHECK-NEXT: vlseg5e16.v v8, (a0)
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 2 x i8>, 5) @llvm.riscv.vlseg5.triscv.vector.tuple_nxv2i8_5t(target("riscv.vector.tuple", <vscale x 2 x i8>, 5) undef, ptr %base, i64 %vl, i64 4)
- %1 = call <vscale x 1 x half> @llvm.riscv.tuple.extract.nxv1f16.triscv.vector.tuple_nxv2i8_5t(target("riscv.vector.tuple", <vscale x 2 x i8>, 5) %0, i32 1)
- ret <vscale x 1 x half> %1
+ ret target("riscv.vector.tuple", <vscale x 2 x i8>, 5) %0
}
-
-define <vscale x 1 x half> @test_vlseg5_mask_nxv1f16_triscv.vector.tuple_nxv2i8_5t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) {
+define target("riscv.vector.tuple", <vscale x 2 x i8>, 5) @test_vlseg5_mask_nxv1f16_triscv.vector.tuple_nxv2i8_5t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) {
; CHECK-LABEL: test_vlseg5_mask_nxv1f16_triscv.vector.tuple_nxv2i8_5t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma
-; CHECK-NEXT: vlseg5e16.v v7, (a0), v0.t
+; CHECK-NEXT: vlseg5e16.v v8, (a0), v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 2 x i8>, 5) @llvm.riscv.vlseg5.mask.triscv.vector.tuple_nxv2i8_5t.nxv1i1(target("riscv.vector.tuple", <vscale x 2 x i8>, 5) undef, ptr %base, <vscale x 1 x i1> %mask, i64 %vl, i64 1, i64 4)
- %1 = call <vscale x 1 x half> @llvm.riscv.tuple.extract.nxv1f16.triscv.vector.tuple_nxv2i8_5t(target("riscv.vector.tuple", <vscale x 2 x i8>, 5) %0, i32 1)
- ret <vscale x 1 x half> %1
+ ret target("riscv.vector.tuple", <vscale x 2 x i8>, 5) %0
}
-
-
-define <vscale x 2 x half> @test_vlseg5_nxv2f16_triscv.vector.tuple_nxv4i8_5t(ptr %base, i64 %vl) {
+define target("riscv.vector.tuple", <vscale x 4 x i8>, 5) @test_vlseg5_nxv2f16_triscv.vector.tuple_nxv4i8_5t(ptr %base, i64 %vl) {
; CHECK-LABEL: test_vlseg5_nxv2f16_triscv.vector.tuple_nxv4i8_5t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma
-; CHECK-NEXT: vlseg5e16.v v7, (a0)
+; CHECK-NEXT: vlseg5e16.v v8, (a0)
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 5) @llvm.riscv.vlseg5.triscv.vector.tuple_nxv4i8_5t(target("riscv.vector.tuple", <vscale x 4 x i8>, 5) undef, ptr %base, i64 %vl, i64 4)
- %1 = call <vscale x 2 x half> @llvm.riscv.tuple.extract.nxv2f16.triscv.vector.tuple_nxv4i8_5t(target("riscv.vector.tuple", <vscale x 4 x i8>, 5) %0, i32 1)
- ret <vscale x 2 x half> %1
+ ret target("riscv.vector.tuple", <vscale x 4 x i8>, 5) %0
}
-
-define <vscale x 2 x half> @test_vlseg5_mask_nxv2f16_triscv.vector.tuple_nxv4i8_5t(ptr %base, i64 %vl, <vscale x 2 x i1> %mask) {
+define target("riscv.vector.tuple", <vscale x 4 x i8>, 5) @test_vlseg5_mask_nxv2f16_triscv.vector.tuple_nxv4i8_5t(ptr %base, i64 %vl, <vscale x 2 x i1> %mask) {
; CHECK-LABEL: test_vlseg5_mask_nxv2f16_triscv.vector.tuple_nxv4i8_5t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma
-; CHECK-NEXT: vlseg5e16.v v7, (a0), v0.t
+; CHECK-NEXT: vlseg5e16.v v8, (a0), v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 5) @llvm.riscv.vlseg5.mask.triscv.vector.tuple_nxv4i8_5t.nxv2i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 5) undef, ptr %base, <vscale x 2 x i1> %mask, i64 %vl, i64 1, i64 4)
- %1 = call <vscale x 2 x half> @llvm.riscv.tuple.extract.nxv2f16.triscv.vector.tuple_nxv4i8_5t(target("riscv.vector.tuple", <vscale x 4 x i8>, 5) %0, i32 1)
- ret <vscale x 2 x half> %1
+ ret target("riscv.vector.tuple", <vscale x 4 x i8>, 5) %0
}
-
-
-define <vscale x 4 x half> @test_vlseg5_nxv4f16_triscv.vector.tuple_nxv8i8_5t(ptr %base, i64 %vl) {
+define target("riscv.vector.tuple", <vscale x 8 x i8>, 5) @test_vlseg5_nxv4f16_triscv.vector.tuple_nxv8i8_5t(ptr %base, i64 %vl) {
; CHECK-LABEL: test_vlseg5_nxv4f16_triscv.vector.tuple_nxv8i8_5t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma
-; CHECK-NEXT: vlseg5e16.v v7, (a0)
+; CHECK-NEXT: vlseg5e16.v v8, (a0)
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 5) @llvm.riscv.vlseg5.triscv.vector.tuple_nxv8i8_5t(target("riscv.vector.tuple", <vscale x 8 x i8>, 5) undef, ptr %base, i64 %vl, i64 4)
- %1 = call <vscale x 4 x half> @llvm.riscv.tuple.extract.nxv4f16.triscv.vector.tuple_nxv8i8_5t(target("riscv.vector.tuple", <vscale x 8 x i8>, 5) %0, i32 1)
- ret <vscale x 4 x half> %1
+ ret target("riscv.vector.tuple", <vscale x 8 x i8>, 5) %0
}
-
-define <vscale x 4 x half> @test_vlseg5_mask_nxv4f16_triscv.vector.tuple_nxv8i8_5t(ptr %base, i64 %vl, <vscale x 4 x i1> %mask) {
+define target("riscv.vector.tuple", <vscale x 8 x i8>, 5) @test_vlseg5_mask_nxv4f16_triscv.vector.tuple_nxv8i8_5t(ptr %base, i64 %vl, <vscale x 4 x i1> %mask) {
; CHECK-LABEL: test_vlseg5_mask_nxv4f16_triscv.vector.tuple_nxv8i8_5t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma
-; CHECK-NEXT: vlseg5e16.v v7, (a0), v0.t
+; CHECK-NEXT: vlseg5e16.v v8, (a0), v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 5) @llvm.riscv.vlseg5.mask.triscv.vector.tuple_nxv8i8_5t.nxv4i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 5) undef, ptr %base, <vscale x 4 x i1> %mask, i64 %vl, i64 1, i64 4)
- %1 = call <vscale x 4 x half> @llvm.riscv.tuple.extract.nxv4f16.triscv.vector.tuple_nxv8i8_5t(target("riscv.vector.tuple", <vscale x 8 x i8>, 5) %0, i32 1)
- ret <vscale x 4 x half> %1
+ ret target("riscv.vector.tuple", <vscale x 8 x i8>, 5) %0
}
-
-
-define <vscale x 1 x half> @test_vlseg6_nxv1f16_triscv.vector.tuple_nxv2i8_6t(ptr %base, i64 %vl) {
+define target("riscv.vector.tuple", <vscale x 2 x i8>, 6) @test_vlseg6_nxv1f16_triscv.vector.tuple_nxv2i8_6t(ptr %base, i64 %vl) {
; CHECK-LABEL: test_vlseg6_nxv1f16_triscv.vector.tuple_nxv2i8_6t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma
-; CHECK-NEXT: vlseg6e16.v v7, (a0)
+; CHECK-NEXT: vlseg6e16.v v8, (a0)
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 2 x i8>, 6) @llvm.riscv.vlseg6.triscv.vector.tuple_nxv2i8_6t(target("riscv.vector.tuple", <vscale x 2 x i8>, 6) undef, ptr %base, i64 %vl, i64 4)
- %1 = call <vscale x 1 x half> @llvm.riscv.tuple.extract.nxv1f16.triscv.vector.tuple_nxv2i8_6t(target("riscv.vector.tuple", <vscale x 2 x i8>, 6) %0, i32 1)
- ret <vscale x 1 x half> %1
+ ret target("riscv.vector.tuple", <vscale x 2 x i8>, 6) %0
}
-
-define <vscale x 1 x half> @test_vlseg6_mask_nxv1f16_triscv.vector.tuple_nxv2i8_6t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) {
+define target("riscv.vector.tuple", <vscale x 2 x i8>, 6) @test_vlseg6_mask_nxv1f16_triscv.vector.tuple_nxv2i8_6t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) {
; CHECK-LABEL: test_vlseg6_mask_nxv1f16_triscv.vector.tuple_nxv2i8_6t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma
-; CHECK-NEXT: vlseg6e16.v v7, (a0), v0.t
+; CHECK-NEXT: vlseg6e16.v v8, (a0), v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 2 x i8>, 6) @llvm.riscv.vlseg6.mask.triscv.vector.tuple_nxv2i8_6t.nxv1i1(target("riscv.vector.tuple", <vscale x 2 x i8>, 6) undef, ptr %base, <vscale x 1 x i1> %mask, i64 %vl, i64 1, i64 4)
- %1 = call <vscale x 1 x half> @llvm.riscv.tuple.extract.nxv1f16.triscv.vector.tuple_nxv2i8_6t(target("riscv.vector.tuple", <vscale x 2 x i8>, 6) %0, i32 1)
- ret <vscale x 1 x half> %1
+ ret target("riscv.vector.tuple", <vscale x 2 x i8>, 6) %0
}
-
-
-define <vscale x 2 x half> @test_vlseg6_nxv2f16_triscv.vector.tuple_nxv4i8_6t(ptr %base, i64 %vl) {
+define target("riscv.vector.tuple", <vscale x 4 x i8>, 6) @test_vlseg6_nxv2f16_triscv.vector.tuple_nxv4i8_6t(ptr %base, i64 %vl) {
; CHECK-LABEL: test_vlseg6_nxv2f16_triscv.vector.tuple_nxv4i8_6t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma
-; CHECK-NEXT: vlseg6e16.v v7, (a0)
+; CHECK-NEXT: vlseg6e16.v v8, (a0)
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 6) @llvm.riscv.vlseg6.triscv.vector.tuple_nxv4i8_6t(target("riscv.vector.tuple", <vscale x 4 x i8>, 6) undef, ptr %base, i64 %vl, i64 4)
- %1 = call <vscale x 2 x half> @llvm.riscv.tuple.extract.nxv2f16.triscv.vector.tuple_nxv4i8_6t(target("riscv.vector.tuple", <vscale x 4 x i8>, 6) %0, i32 1)
- ret <vscale x 2 x half> %1
+ ret target("riscv.vector.tuple", <vscale x 4 x i8>, 6) %0
}
-
-define <vscale x 2 x half> @test_vlseg6_mask_nxv2f16_triscv.vector.tuple_nxv4i8_6t(ptr %base, i64 %vl, <vscale x 2 x i1> %mask) {
+define target("riscv.vector.tuple", <vscale x 4 x i8>, 6) @test_vlseg6_mask_nxv2f16_triscv.vector.tuple_nxv4i8_6t(ptr %base, i64 %vl, <vscale x 2 x i1> %mask) {
; CHECK-LABEL: test_vlseg6_mask_nxv2f16_triscv.vector.tuple_nxv4i8_6t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma
-; CHECK-NEXT: vlseg6e16.v v7, (a0), v0.t
+; CHECK-NEXT: vlseg6e16.v v8, (a0), v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 6) @llvm.riscv.vlseg6.mask.triscv.vector.tuple_nxv4i8_6t.nxv2i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 6) undef, ptr %base, <vscale x 2 x i1> %mask, i64 %vl, i64 1, i64 4)
- %1 = call <vscale x 2 x half> @llvm.riscv.tuple.extract.nxv2f16.triscv.vector.tuple_nxv4i8_6t(target("riscv.vector.tuple", <vscale x 4 x i8>, 6) %0, i32 1)
- ret <vscale x 2 x half> %1
+ ret target("riscv.vector.tuple", <vscale x 4 x i8>, 6) %0
}
-
-
-define <vscale x 4 x half> @test_vlseg6_nxv4f16_triscv.vector.tuple_nxv8i8_6t(ptr %base, i64 %vl) {
+define target("riscv.vector.tuple", <vscale x 8 x i8>, 6) @test_vlseg6_nxv4f16_triscv.vector.tuple_nxv8i8_6t(ptr %base, i64 %vl) {
; CHECK-LABEL: test_vlseg6_nxv4f16_triscv.vector.tuple_nxv8i8_6t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma
-; CHECK-NEXT: vlseg6e16.v v7, (a0)
+; CHECK-NEXT: vlseg6e16.v v8, (a0)
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 6) @llvm.riscv.vlseg6.triscv.vector.tuple_nxv8i8_6t(target("riscv.vector.tuple", <vscale x 8 x i8>, 6) undef, ptr %base, i64 %vl, i64 4)
- %1 = call <vscale x 4 x half> @llvm.riscv.tuple.extract.nxv4f16.triscv.vector.tuple_nxv8i8_6t(target("riscv.vector.tuple", <vscale x 8 x i8>, 6) %0, i32 1)
- ret <vscale x 4 x half> %1
+ ret target("riscv.vector.tuple", <vscale x 8 x i8>, 6) %0
}
-
-define <vscale x 4 x half> @test_vlseg6_mask_nxv4f16_triscv.vector.tuple_nxv8i8_6t(ptr %base, i64 %vl, <vscale x 4 x i1> %mask) {
+define target("riscv.vector.tuple", <vscale x 8 x i8>, 6) @test_vlseg6_mask_nxv4f16_triscv.vector.tuple_nxv8i8_6t(ptr %base, i64 %vl, <vscale x 4 x i1> %mask) {
; CHECK-LABEL: test_vlseg6_mask_nxv4f16_triscv.vector.tuple_nxv8i8_6t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma
-; CHECK-NEXT: vlseg6e16.v v7, (a0), v0.t
+; CHECK-NEXT: vlseg6e16.v v8, (a0), v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 6) @llvm.riscv.vlseg6.mask.triscv.vector.tuple_nxv8i8_6t.nxv4i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 6) undef, ptr %base, <vscale x 4 x i1> %mask, i64 %vl, i64 1, i64 4)
- %1 = call <vscale x 4 x half> @llvm.riscv.tuple.extract.nxv4f16.triscv.vector.tuple_nxv8i8_6t(target("riscv.vector.tuple", <vscale x 8 x i8>, 6) %0, i32 1)
- ret <vscale x 4 x half> %1
+ ret target("riscv.vector.tuple", <vscale x 8 x i8>, 6) %0
}
-
-
-define <vscale x 1 x half> @test_vlseg7_nxv1f16_triscv.vector.tuple_nxv2i8_7t(ptr %base, i64 %vl) {
+define target("riscv.vector.tuple", <vscale x 2 x i8>, 7) @test_vlseg7_nxv1f16_triscv.vector.tuple_nxv2i8_7t(ptr %base, i64 %vl) {
; CHECK-LABEL: test_vlseg7_nxv1f16_triscv.vector.tuple_nxv2i8_7t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma
-; CHECK-NEXT: vlseg7e16.v v7, (a0)
+; CHECK-NEXT: vlseg7e16.v v8, (a0)
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 2 x i8>, 7) @llvm.riscv.vlseg7.triscv.vector.tuple_nxv2i8_7t(target("riscv.vector.tuple", <vscale x 2 x i8>, 7) undef, ptr %base, i64 %vl, i64 4)
- %1 = call <vscale x 1 x half> @llvm.riscv.tuple.extract.nxv1f16.triscv.vector.tuple_nxv2i8_7t(target("riscv.vector.tuple", <vscale x 2 x i8>, 7) %0, i32 1)
- ret <vscale x 1 x half> %1
+ ret target("riscv.vector.tuple", <vscale x 2 x i8>, 7) %0
}
-
-define <vscale x 1 x half> @test_vlseg7_mask_nxv1f16_triscv.vector.tuple_nxv2i8_7t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) {
+define target("riscv.vector.tuple", <vscale x 2 x i8>, 7) @test_vlseg7_mask_nxv1f16_triscv.vector.tuple_nxv2i8_7t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) {
; CHECK-LABEL: test_vlseg7_mask_nxv1f16_triscv.vector.tuple_nxv2i8_7t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma
-; CHECK-NEXT: vlseg7e16.v v7, (a0), v0.t
+; CHECK-NEXT: vlseg7e16.v v8, (a0), v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 2 x i8>, 7) @llvm.riscv.vlseg7.mask.triscv.vector.tuple_nxv2i8_7t.nxv1i1(target("riscv.vector.tuple", <vscale x 2 x i8>, 7) undef, ptr %base, <vscale x 1 x i1> %mask, i64 %vl, i64 1, i64 4)
- %1 = call <vscale x 1 x half> @llvm.riscv.tuple.extract.nxv1f16.triscv.vector.tuple_nxv2i8_7t(target("riscv.vector.tuple", <vscale x 2 x i8>, 7) %0, i32 1)
- ret <vscale x 1 x half> %1
+ ret target("riscv.vector.tuple", <vscale x 2 x i8>, 7) %0
}
-
-
-define <vscale x 2 x half> @test_vlseg7_nxv2f16_triscv.vector.tuple_nxv4i8_7t(ptr %base, i64 %vl) {
+define target("riscv.vector.tuple", <vscale x 4 x i8>, 7) @test_vlseg7_nxv2f16_triscv.vector.tuple_nxv4i8_7t(ptr %base, i64 %vl) {
; CHECK-LABEL: test_vlseg7_nxv2f16_triscv.vector.tuple_nxv4i8_7t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma
-; CHECK-NEXT: vlseg7e16.v v7, (a0)
+; CHECK-NEXT: vlseg7e16.v v8, (a0)
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 7) @llvm.riscv.vlseg7.triscv.vector.tuple_nxv4i8_7t(target("riscv.vector.tuple", <vscale x 4 x i8>, 7) undef, ptr %base, i64 %vl, i64 4)
- %1 = call <vscale x 2 x half> @llvm.riscv.tuple.extract.nxv2f16.triscv.vector.tuple_nxv4i8_7t(target("riscv.vector.tuple", <vscale x 4 x i8>, 7) %0, i32 1)
- ret <vscale x 2 x half> %1
+ ret target("riscv.vector.tuple", <vscale x 4 x i8>, 7) %0
}
-
-define <vscale x 2 x half> @test_vlseg7_mask_nxv2f16_triscv.vector.tuple_nxv4i8_7t(ptr %base, i64 %vl, <vscale x 2 x i1> %mask) {
+define target("riscv.vector.tuple", <vscale x 4 x i8>, 7) @test_vlseg7_mask_nxv2f16_triscv.vector.tuple_nxv4i8_7t(ptr %base, i64 %vl, <vscale x 2 x i1> %mask) {
; CHECK-LABEL: test_vlseg7_mask_nxv2f16_triscv.vector.tuple_nxv4i8_7t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma
-; CHECK-NEXT: vlseg7e16.v v7, (a0), v0.t
+; CHECK-NEXT: vlseg7e16.v v8, (a0), v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 7) @llvm.riscv.vlseg7.mask.triscv.vector.tuple_nxv4i8_7t.nxv2i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 7) undef, ptr %base, <vscale x 2 x i1> %mask, i64 %vl, i64 1, i64 4)
- %1 = call <vscale x 2 x half> @llvm.riscv.tuple.extract.nxv2f16.triscv.vector.tuple_nxv4i8_7t(target("riscv.vector.tuple", <vscale x 4 x i8>, 7) %0, i32 1)
- ret <vscale x 2 x half> %1
+ ret target("riscv.vector.tuple", <vscale x 4 x i8>, 7) %0
}
-
-
-define <vscale x 4 x half> @test_vlseg7_nxv4f16_triscv.vector.tuple_nxv8i8_7t(ptr %base, i64 %vl) {
+define target("riscv.vector.tuple", <vscale x 8 x i8>, 7) @test_vlseg7_nxv4f16_triscv.vector.tuple_nxv8i8_7t(ptr %base, i64 %vl) {
; CHECK-LABEL: test_vlseg7_nxv4f16_triscv.vector.tuple_nxv8i8_7t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma
-; CHECK-NEXT: vlseg7e16.v v7, (a0)
+; CHECK-NEXT: vlseg7e16.v v8, (a0)
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 7) @llvm.riscv.vlseg7.triscv.vector.tuple_nxv8i8_7t(target("riscv.vector.tuple", <vscale x 8 x i8>, 7) undef, ptr %base, i64 %vl, i64 4)
- %1 = call <vscale x 4 x half> @llvm.riscv.tuple.extract.nxv4f16.triscv.vector.tuple_nxv8i8_7t(target("riscv.vector.tuple", <vscale x 8 x i8>, 7) %0, i32 1)
- ret <vscale x 4 x half> %1
+ ret target("riscv.vector.tuple", <vscale x 8 x i8>, 7) %0
}
-
-define <vscale x 4 x half> @test_vlseg7_mask_nxv4f16_triscv.vector.tuple_nxv8i8_7t(ptr %base, i64 %vl, <vscale x 4 x i1> %mask) {
+define target("riscv.vector.tuple", <vscale x 8 x i8>, 7) @test_vlseg7_mask_nxv4f16_triscv.vector.tuple_nxv8i8_7t(ptr %base, i64 %vl, <vscale x 4 x i1> %mask) {
; CHECK-LABEL: test_vlseg7_mask_nxv4f16_triscv.vector.tuple_nxv8i8_7t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma
-; CHECK-NEXT: vlseg7e16.v v7, (a0), v0.t
+; CHECK-NEXT: vlseg7e16.v v8, (a0), v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 7) @llvm.riscv.vlseg7.mask.triscv.vector.tuple_nxv8i8_7t.nxv4i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 7) undef, ptr %base, <vscale x 4 x i1> %mask, i64 %vl, i64 1, i64 4)
- %1 = call <vscale x 4 x half> @llvm.riscv.tuple.extract.nxv4f16.triscv.vector.tuple_nxv8i8_7t(target("riscv.vector.tuple", <vscale x 8 x i8>, 7) %0, i32 1)
- ret <vscale x 4 x half> %1
+ ret target("riscv.vector.tuple", <vscale x 8 x i8>, 7) %0
}
-
-
-define <vscale x 1 x half> @test_vlseg8_nxv1f16_triscv.vector.tuple_nxv2i8_8t(ptr %base, i64 %vl) {
+define target("riscv.vector.tuple", <vscale x 2 x i8>, 8) @test_vlseg8_nxv1f16_triscv.vector.tuple_nxv2i8_8t(ptr %base, i64 %vl) {
; CHECK-LABEL: test_vlseg8_nxv1f16_triscv.vector.tuple_nxv2i8_8t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma
-; CHECK-NEXT: vlseg8e16.v v7, (a0)
+; CHECK-NEXT: vlseg8e16.v v8, (a0)
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 2 x i8>, 8) @llvm.riscv.vlseg8.triscv.vector.tuple_nxv2i8_8t(target("riscv.vector.tuple", <vscale x 2 x i8>, 8) undef, ptr %base, i64 %vl, i64 4)
- %1 = call <vscale x 1 x half> @llvm.riscv.tuple.extract.nxv1f16.triscv.vector.tuple_nxv2i8_8t(target("riscv.vector.tuple", <vscale x 2 x i8>, 8) %0, i32 1)
- ret <vscale x 1 x half> %1
+ ret target("riscv.vector.tuple", <vscale x 2 x i8>, 8) %0
}
-
-define <vscale x 1 x half> @test_vlseg8_mask_nxv1f16_triscv.vector.tuple_nxv2i8_8t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) {
+define target("riscv.vector.tuple", <vscale x 2 x i8>, 8) @test_vlseg8_mask_nxv1f16_triscv.vector.tuple_nxv2i8_8t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) {
; CHECK-LABEL: test_vlseg8_mask_nxv1f16_triscv.vector.tuple_nxv2i8_8t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma
-; CHECK-NEXT: vlseg8e16.v v7, (a0), v0.t
+; CHECK-NEXT: vlseg8e16.v v8, (a0), v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 2 x i8>, 8) @llvm.riscv.vlseg8.mask.triscv.vector.tuple_nxv2i8_8t.nxv1i1(target("riscv.vector.tuple", <vscale x 2 x i8>, 8) undef, ptr %base, <vscale x 1 x i1> %mask, i64 %vl, i64 1, i64 4)
- %1 = call <vscale x 1 x half> @llvm.riscv.tuple.extract.nxv1f16.triscv.vector.tuple_nxv2i8_8t(target("riscv.vector.tuple", <vscale x 2 x i8>, 8) %0, i32 1)
- ret <vscale x 1 x half> %1
+ ret target("riscv.vector.tuple", <vscale x 2 x i8>, 8) %0
}
-
-
-define <vscale x 2 x half> @test_vlseg8_nxv2f16_triscv.vector.tuple_nxv4i8_8t(ptr %base, i64 %vl) {
+define target("riscv.vector.tuple", <vscale x 4 x i8>, 8) @test_vlseg8_nxv2f16_triscv.vector.tuple_nxv4i8_8t(ptr %base, i64 %vl) {
; CHECK-LABEL: test_vlseg8_nxv2f16_triscv.vector.tuple_nxv4i8_8t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma
-; CHECK-NEXT: vlseg8e16.v v7, (a0)
+; CHECK-NEXT: vlseg8e16.v v8, (a0)
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 8) @llvm.riscv.vlseg8.triscv.vector.tuple_nxv4i8_8t(target("riscv.vector.tuple", <vscale x 4 x i8>, 8) undef, ptr %base, i64 %vl, i64 4)
- %1 = call <vscale x 2 x half> @llvm.riscv.tuple.extract.nxv2f16.triscv.vector.tuple_nxv4i8_8t(target("riscv.vector.tuple", <vscale x 4 x i8>, 8) %0, i32 1)
- ret <vscale x 2 x half> %1
+ ret target("riscv.vector.tuple", <vscale x 4 x i8>, 8) %0
}
-
-define <vscale x 2 x half> @test_vlseg8_mask_nxv2f16_triscv.vector.tuple_nxv4i8_8t(ptr %base, i64 %vl, <vscale x 2 x i1> %mask) {
+define target("riscv.vector.tuple", <vscale x 4 x i8>, 8) @test_vlseg8_mask_nxv2f16_triscv.vector.tuple_nxv4i8_8t(ptr %base, i64 %vl, <vscale x 2 x i1> %mask) {
; CHECK-LABEL: test_vlseg8_mask_nxv2f16_triscv.vector.tuple_nxv4i8_8t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma
-; CHECK-NEXT: vlseg8e16.v v7, (a0), v0.t
+; CHECK-NEXT: vlseg8e16.v v8, (a0), v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 8) @llvm.riscv.vlseg8.mask.triscv.vector.tuple_nxv4i8_8t.nxv2i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 8) undef, ptr %base, <vscale x 2 x i1> %mask, i64 %vl, i64 1, i64 4)
- %1 = call <vscale x 2 x half> @llvm.riscv.tuple.extract.nxv2f16.triscv.vector.tuple_nxv4i8_8t(target("riscv.vector.tuple", <vscale x 4 x i8>, 8) %0, i32 1)
- ret <vscale x 2 x half> %1
+ ret target("riscv.vector.tuple", <vscale x 4 x i8>, 8) %0
}
-
-
-define <vscale x 4 x half> @test_vlseg8_nxv4f16_triscv.vector.tuple_nxv8i8_8t(ptr %base, i64 %vl) {
+define target("riscv.vector.tuple", <vscale x 8 x i8>, 8) @test_vlseg8_nxv4f16_triscv.vector.tuple_nxv8i8_8t(ptr %base, i64 %vl) {
; CHECK-LABEL: test_vlseg8_nxv4f16_triscv.vector.tuple_nxv8i8_8t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma
-; CHECK-NEXT: vlseg8e16.v v7, (a0)
+; CHECK-NEXT: vlseg8e16.v v8, (a0)
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 8) @llvm.riscv.vlseg8.triscv.vector.tuple_nxv8i8_8t(target("riscv.vector.tuple", <vscale x 8 x i8>, 8) undef, ptr %base, i64 %vl, i64 4)
- %1 = call <vscale x 4 x half> @llvm.riscv.tuple.extract.nxv4f16.triscv.vector.tuple_nxv8i8_8t(target("riscv.vector.tuple", <vscale x 8 x i8>, 8) %0, i32 1)
- ret <vscale x 4 x half> %1
+ ret target("riscv.vector.tuple", <vscale x 8 x i8>, 8) %0
}
-
-define <vscale x 4 x half> @test_vlseg8_mask_nxv4f16_triscv.vector.tuple_nxv8i8_8t(ptr %base, i64 %vl, <vscale x 4 x i1> %mask) {
+define target("riscv.vector.tuple", <vscale x 8 x i8>, 8) @test_vlseg8_mask_nxv4f16_triscv.vector.tuple_nxv8i8_8t(ptr %base, i64 %vl, <vscale x 4 x i1> %mask) {
; CHECK-LABEL: test_vlseg8_mask_nxv4f16_triscv.vector.tuple_nxv8i8_8t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma
-; CHECK-NEXT: vlseg8e16.v v7, (a0), v0.t
+; CHECK-NEXT: vlseg8e16.v v8, (a0), v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 8) @llvm.riscv.vlseg8.mask.triscv.vector.tuple_nxv8i8_8t.nxv4i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 8) undef, ptr %base, <vscale x 4 x i1> %mask, i64 %vl, i64 1, i64 4)
- %1 = call <vscale x 4 x half> @llvm.riscv.tuple.extract.nxv4f16.triscv.vector.tuple_nxv8i8_8t(target("riscv.vector.tuple", <vscale x 8 x i8>, 8) %0, i32 1)
- ret <vscale x 4 x half> %1
+ ret target("riscv.vector.tuple", <vscale x 8 x i8>, 8) %0
}
-
-
-define <vscale x 1 x float> @test_vlseg2_nxv1f32_triscv.vector.tuple_nxv4i8_2t(ptr %base, i64 %vl) {
+define target("riscv.vector.tuple", <vscale x 4 x i8>, 2) @test_vlseg2_nxv1f32_triscv.vector.tuple_nxv4i8_2t(ptr %base, i64 %vl) {
; CHECK-LABEL: test_vlseg2_nxv1f32_triscv.vector.tuple_nxv4i8_2t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma
-; CHECK-NEXT: vlseg2e32.v v7, (a0)
+; CHECK-NEXT: vlseg2e32.v v8, (a0)
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 2) @llvm.riscv.vlseg2.triscv.vector.tuple_nxv4i8_2t(target("riscv.vector.tuple", <vscale x 4 x i8>, 2) undef, ptr %base, i64 %vl, i64 5)
- %1 = call <vscale x 1 x float> @llvm.riscv.tuple.extract.nxv1f32.triscv.vector.tuple_nxv4i8_2t(target("riscv.vector.tuple", <vscale x 4 x i8>, 2) %0, i32 1)
- ret <vscale x 1 x float> %1
+ ret target("riscv.vector.tuple", <vscale x 4 x i8>, 2) %0
}
-
-define <vscale x 1 x float> @test_vlseg2_mask_nxv1f32_triscv.vector.tuple_nxv4i8_2t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) {
+define target("riscv.vector.tuple", <vscale x 4 x i8>, 2) @test_vlseg2_mask_nxv1f32_triscv.vector.tuple_nxv4i8_2t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) {
; CHECK-LABEL: test_vlseg2_mask_nxv1f32_triscv.vector.tuple_nxv4i8_2t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma
-; CHECK-NEXT: vlseg2e32.v v7, (a0), v0.t
+; CHECK-NEXT: vlseg2e32.v v8, (a0), v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 2) @llvm.riscv.vlseg2.mask.triscv.vector.tuple_nxv4i8_2t.nxv1i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 2) undef, ptr %base, <vscale x 1 x i1> %mask, i64 %vl, i64 1, i64 5)
- %1 = call <vscale x 1 x float> @llvm.riscv.tuple.extract.nxv1f32.triscv.vector.tuple_nxv4i8_2t(target("riscv.vector.tuple", <vscale x 4 x i8>, 2) %0, i32 1)
- ret <vscale x 1 x float> %1
+ ret target("riscv.vector.tuple", <vscale x 4 x i8>, 2) %0
}
-
-
-define <vscale x 2 x float> @test_vlseg2_nxv2f32_triscv.vector.tuple_nxv8i8_2t(ptr %base, i64 %vl) {
+define target("riscv.vector.tuple", <vscale x 8 x i8>, 2) @test_vlseg2_nxv2f32_triscv.vector.tuple_nxv8i8_2t(ptr %base, i64 %vl) {
; CHECK-LABEL: test_vlseg2_nxv2f32_triscv.vector.tuple_nxv8i8_2t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma
-; CHECK-NEXT: vlseg2e32.v v7, (a0)
+; CHECK-NEXT: vlseg2e32.v v8, (a0)
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 2) @llvm.riscv.vlseg2.triscv.vector.tuple_nxv8i8_2t(target("riscv.vector.tuple", <vscale x 8 x i8>, 2) undef, ptr %base, i64 %vl, i64 5)
- %1 = call <vscale x 2 x float> @llvm.riscv.tuple.extract.nxv2f32.triscv.vector.tuple_nxv8i8_2t(target("riscv.vector.tuple", <vscale x 8 x i8>, 2) %0, i32 1)
- ret <vscale x 2 x float> %1
+ ret target("riscv.vector.tuple", <vscale x 8 x i8>, 2) %0
}
-
-define <vscale x 2 x float> @test_vlseg2_mask_nxv2f32_triscv.vector.tuple_nxv8i8_2t(ptr %base, i64 %vl, <vscale x 2 x i1> %mask) {
+define target("riscv.vector.tuple", <vscale x 8 x i8>, 2) @test_vlseg2_mask_nxv2f32_triscv.vector.tuple_nxv8i8_2t(ptr %base, i64 %vl, <vscale x 2 x i1> %mask) {
; CHECK-LABEL: test_vlseg2_mask_nxv2f32_triscv.vector.tuple_nxv8i8_2t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma
-; CHECK-NEXT: vlseg2e32.v v7, (a0), v0.t
+; CHECK-NEXT: vlseg2e32.v v8, (a0), v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 2) @llvm.riscv.vlseg2.mask.triscv.vector.tuple_nxv8i8_2t.nxv2i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 2) undef, ptr %base, <vscale x 2 x i1> %mask, i64 %vl, i64 1, i64 5)
- %1 = call <vscale x 2 x float> @llvm.riscv.tuple.extract.nxv2f32.triscv.vector.tuple_nxv8i8_2t(target("riscv.vector.tuple", <vscale x 8 x i8>, 2) %0, i32 1)
- ret <vscale x 2 x float> %1
+ ret target("riscv.vector.tuple", <vscale x 8 x i8>, 2) %0
}
-
-
-define <vscale x 4 x float> @test_vlseg2_nxv4f32_triscv.vector.tuple_nxv16i8_2t(ptr %base, i64 %vl) {
+define target("riscv.vector.tuple", <vscale x 16 x i8>, 2) @test_vlseg2_nxv4f32_triscv.vector.tuple_nxv16i8_2t(ptr %base, i64 %vl) {
; CHECK-LABEL: test_vlseg2_nxv4f32_triscv.vector.tuple_nxv16i8_2t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma
-; CHECK-NEXT: vlseg2e32.v v6, (a0)
+; CHECK-NEXT: vlseg2e32.v v8, (a0)
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 2) @llvm.riscv.vlseg2.triscv.vector.tuple_nxv16i8_2t(target("riscv.vector.tuple", <vscale x 16 x i8>, 2) undef, ptr %base, i64 %vl, i64 5)
- %1 = call <vscale x 4 x float> @llvm.riscv.tuple.extract.nxv4f32.triscv.vector.tuple_nxv16i8_2t(target("riscv.vector.tuple", <vscale x 16 x i8>, 2) %0, i32 1)
- ret <vscale x 4 x float> %1
+ ret target("riscv.vector.tuple", <vscale x 16 x i8>, 2) %0
}
-
-define <vscale x 4 x float> @test_vlseg2_mask_nxv4f32_triscv.vector.tuple_nxv16i8_2t(ptr %base, i64 %vl, <vscale x 4 x i1> %mask) {
+define target("riscv.vector.tuple", <vscale x 16 x i8>, 2) @test_vlseg2_mask_nxv4f32_triscv.vector.tuple_nxv16i8_2t(ptr %base, i64 %vl, <vscale x 4 x i1> %mask) {
; CHECK-LABEL: test_vlseg2_mask_nxv4f32_triscv.vector.tuple_nxv16i8_2t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma
-; CHECK-NEXT: vlseg2e32.v v6, (a0), v0.t
+; CHECK-NEXT: vlseg2e32.v v8, (a0), v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 2) @llvm.riscv.vlseg2.mask.triscv.vector.tuple_nxv16i8_2t.nxv4i1(target("riscv.vector.tuple", <vscale x 16 x i8>, 2) undef, ptr %base, <vscale x 4 x i1> %mask, i64 %vl, i64 1, i64 5)
- %1 = call <vscale x 4 x float> @llvm.riscv.tuple.extract.nxv4f32.triscv.vector.tuple_nxv16i8_2t(target("riscv.vector.tuple", <vscale x 16 x i8>, 2) %0, i32 1)
- ret <vscale x 4 x float> %1
+ ret target("riscv.vector.tuple", <vscale x 16 x i8>, 2) %0
}
-
-
-define <vscale x 8 x float> @test_vlseg2_nxv8f32_triscv.vector.tuple_nxv32i8_2t(ptr %base, i64 %vl) {
+define target("riscv.vector.tuple", <vscale x 32 x i8>, 2) @test_vlseg2_nxv8f32_triscv.vector.tuple_nxv32i8_2t(ptr %base, i64 %vl) {
; CHECK-LABEL: test_vlseg2_nxv8f32_triscv.vector.tuple_nxv32i8_2t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, ma
-; CHECK-NEXT: vlseg2e32.v v4, (a0)
+; CHECK-NEXT: vlseg2e32.v v8, (a0)
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 32 x i8>, 2) @llvm.riscv.vlseg2.triscv.vector.tuple_nxv32i8_2t(target("riscv.vector.tuple", <vscale x 32 x i8>, 2) undef, ptr %base, i64 %vl, i64 5)
- %1 = call <vscale x 8 x float> @llvm.riscv.tuple.extract.nxv8f32.triscv.vector.tuple_nxv32i8_2t(target("riscv.vector.tuple", <vscale x 32 x i8>, 2) %0, i32 1)
- ret <vscale x 8 x float> %1
+ ret target("riscv.vector.tuple", <vscale x 32 x i8>, 2) %0
}
-
-define <vscale x 8 x float> @test_vlseg2_mask_nxv8f32_triscv.vector.tuple_nxv32i8_2t(ptr %base, i64 %vl, <vscale x 8 x i1> %mask) {
+define target("riscv.vector.tuple", <vscale x 32 x i8>, 2) @test_vlseg2_mask_nxv8f32_triscv.vector.tuple_nxv32i8_2t(ptr %base, i64 %vl, <vscale x 8 x i1> %mask) {
; CHECK-LABEL: test_vlseg2_mask_nxv8f32_triscv.vector.tuple_nxv32i8_2t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, ma
-; CHECK-NEXT: vlseg2e32.v v4, (a0), v0.t
+; CHECK-NEXT: vlseg2e32.v v8, (a0), v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 32 x i8>, 2) @llvm.riscv.vlseg2.mask.triscv.vector.tuple_nxv32i8_2t.nxv8i1(target("riscv.vector.tuple", <vscale x 32 x i8>, 2) undef, ptr %base, <vscale x 8 x i1> %mask, i64 %vl, i64 1, i64 5)
- %1 = call <vscale x 8 x float> @llvm.riscv.tuple.extract.nxv8f32.triscv.vector.tuple_nxv32i8_2t(target("riscv.vector.tuple", <vscale x 32 x i8>, 2) %0, i32 1)
- ret <vscale x 8 x float> %1
+ ret target("riscv.vector.tuple", <vscale x 32 x i8>, 2) %0
}
-
-
-define <vscale x 1 x float> @test_vlseg3_nxv1f32_triscv.vector.tuple_nxv4i8_3t(ptr %base, i64 %vl) {
+define target("riscv.vector.tuple", <vscale x 4 x i8>, 3) @test_vlseg3_nxv1f32_triscv.vector.tuple_nxv4i8_3t(ptr %base, i64 %vl) {
; CHECK-LABEL: test_vlseg3_nxv1f32_triscv.vector.tuple_nxv4i8_3t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma
-; CHECK-NEXT: vlseg3e32.v v7, (a0)
+; CHECK-NEXT: vlseg3e32.v v8, (a0)
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 3) @llvm.riscv.vlseg3.triscv.vector.tuple_nxv4i8_3t(target("riscv.vector.tuple", <vscale x 4 x i8>, 3) undef, ptr %base, i64 %vl, i64 5)
- %1 = call <vscale x 1 x float> @llvm.riscv.tuple.extract.nxv1f32.triscv.vector.tuple_nxv4i8_3t(target("riscv.vector.tuple", <vscale x 4 x i8>, 3) %0, i32 1)
- ret <vscale x 1 x float> %1
+ ret target("riscv.vector.tuple", <vscale x 4 x i8>, 3) %0
}
-
-define <vscale x 1 x float> @test_vlseg3_mask_nxv1f32_triscv.vector.tuple_nxv4i8_3t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) {
+define target("riscv.vector.tuple", <vscale x 4 x i8>, 3) @test_vlseg3_mask_nxv1f32_triscv.vector.tuple_nxv4i8_3t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) {
; CHECK-LABEL: test_vlseg3_mask_nxv1f32_triscv.vector.tuple_nxv4i8_3t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma
-; CHECK-NEXT: vlseg3e32.v v7, (a0), v0.t
+; CHECK-NEXT: vlseg3e32.v v8, (a0), v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 3) @llvm.riscv.vlseg3.mask.triscv.vector.tuple_nxv4i8_3t.nxv1i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 3) undef, ptr %base, <vscale x 1 x i1> %mask, i64 %vl, i64 1, i64 5)
- %1 = call <vscale x 1 x float> @llvm.riscv.tuple.extract.nxv1f32.triscv.vector.tuple_nxv4i8_3t(target("riscv.vector.tuple", <vscale x 4 x i8>, 3) %0, i32 1)
- ret <vscale x 1 x float> %1
+ ret target("riscv.vector.tuple", <vscale x 4 x i8>, 3) %0
}
-
-
-define <vscale x 2 x float> @test_vlseg3_nxv2f32_triscv.vector.tuple_nxv8i8_3t(ptr %base, i64 %vl) {
+define target("riscv.vector.tuple", <vscale x 8 x i8>, 3) @test_vlseg3_nxv2f32_triscv.vector.tuple_nxv8i8_3t(ptr %base, i64 %vl) {
; CHECK-LABEL: test_vlseg3_nxv2f32_triscv.vector.tuple_nxv8i8_3t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma
-; CHECK-NEXT: vlseg3e32.v v7, (a0)
+; CHECK-NEXT: vlseg3e32.v v8, (a0)
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 3) @llvm.riscv.vlseg3.triscv.vector.tuple_nxv8i8_3t(target("riscv.vector.tuple", <vscale x 8 x i8>, 3) undef, ptr %base, i64 %vl, i64 5)
- %1 = call <vscale x 2 x float> @llvm.riscv.tuple.extract.nxv2f32.triscv.vector.tuple_nxv8i8_3t(target("riscv.vector.tuple", <vscale x 8 x i8>, 3) %0, i32 1)
- ret <vscale x 2 x float> %1
+ ret target("riscv.vector.tuple", <vscale x 8 x i8>, 3) %0
}
-
-define <vscale x 2 x float> @test_vlseg3_mask_nxv2f32_triscv.vector.tuple_nxv8i8_3t(ptr %base, i64 %vl, <vscale x 2 x i1> %mask) {
+define target("riscv.vector.tuple", <vscale x 8 x i8>, 3) @test_vlseg3_mask_nxv2f32_triscv.vector.tuple_nxv8i8_3t(ptr %base, i64 %vl, <vscale x 2 x i1> %mask) {
; CHECK-LABEL: test_vlseg3_mask_nxv2f32_triscv.vector.tuple_nxv8i8_3t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma
-; CHECK-NEXT: vlseg3e32.v v7, (a0), v0.t
+; CHECK-NEXT: vlseg3e32.v v8, (a0), v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 3) @llvm.riscv.vlseg3.mask.triscv.vector.tuple_nxv8i8_3t.nxv2i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 3) undef, ptr %base, <vscale x 2 x i1> %mask, i64 %vl, i64 1, i64 5)
- %1 = call <vscale x 2 x float> @llvm.riscv.tuple.extract.nxv2f32.triscv.vector.tuple_nxv8i8_3t(target("riscv.vector.tuple", <vscale x 8 x i8>, 3) %0, i32 1)
- ret <vscale x 2 x float> %1
+ ret target("riscv.vector.tuple", <vscale x 8 x i8>, 3) %0
}
-
-
-define <vscale x 4 x float> @test_vlseg3_nxv4f32_triscv.vector.tuple_nxv16i8_3t(ptr %base, i64 %vl) {
+define target("riscv.vector.tuple", <vscale x 16 x i8>, 3) @test_vlseg3_nxv4f32_triscv.vector.tuple_nxv16i8_3t(ptr %base, i64 %vl) {
; CHECK-LABEL: test_vlseg3_nxv4f32_triscv.vector.tuple_nxv16i8_3t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma
-; CHECK-NEXT: vlseg3e32.v v6, (a0)
+; CHECK-NEXT: vlseg3e32.v v8, (a0)
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 3) @llvm.riscv.vlseg3.triscv.vector.tuple_nxv16i8_3t(target("riscv.vector.tuple", <vscale x 16 x i8>, 3) undef, ptr %base, i64 %vl, i64 5)
- %1 = call <vscale x 4 x float> @llvm.riscv.tuple.extract.nxv4f32.triscv.vector.tuple_nxv16i8_3t(target("riscv.vector.tuple", <vscale x 16 x i8>, 3) %0, i32 1)
- ret <vscale x 4 x float> %1
+ ret target("riscv.vector.tuple", <vscale x 16 x i8>, 3) %0
}
-
-define <vscale x 4 x float> @test_vlseg3_mask_nxv4f32_triscv.vector.tuple_nxv16i8_3t(ptr %base, i64 %vl, <vscale x 4 x i1> %mask) {
+define target("riscv.vector.tuple", <vscale x 16 x i8>, 3) @test_vlseg3_mask_nxv4f32_triscv.vector.tuple_nxv16i8_3t(ptr %base, i64 %vl, <vscale x 4 x i1> %mask) {
; CHECK-LABEL: test_vlseg3_mask_nxv4f32_triscv.vector.tuple_nxv16i8_3t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma
-; CHECK-NEXT: vlseg3e32.v v6, (a0), v0.t
+; CHECK-NEXT: vlseg3e32.v v8, (a0), v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 3) @llvm.riscv.vlseg3.mask.triscv.vector.tuple_nxv16i8_3t.nxv4i1(target("riscv.vector.tuple", <vscale x 16 x i8>, 3) undef, ptr %base, <vscale x 4 x i1> %mask, i64 %vl, i64 1, i64 5)
- %1 = call <vscale x 4 x float> @llvm.riscv.tuple.extract.nxv4f32.triscv.vector.tuple_nxv16i8_3t(target("riscv.vector.tuple", <vscale x 16 x i8>, 3) %0, i32 1)
- ret <vscale x 4 x float> %1
+ ret target("riscv.vector.tuple", <vscale x 16 x i8>, 3) %0
}
-
-
-define <vscale x 1 x float> @test_vlseg4_nxv1f32_triscv.vector.tuple_nxv4i8_4t(ptr %base, i64 %vl) {
+define target("riscv.vector.tuple", <vscale x 4 x i8>, 4) @test_vlseg4_nxv1f32_triscv.vector.tuple_nxv4i8_4t(ptr %base, i64 %vl) {
; CHECK-LABEL: test_vlseg4_nxv1f32_triscv.vector.tuple_nxv4i8_4t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma
-; CHECK-NEXT: vlseg4e32.v v7, (a0)
+; CHECK-NEXT: vlseg4e32.v v8, (a0)
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 4) @llvm.riscv.vlseg4.triscv.vector.tuple_nxv4i8_4t(target("riscv.vector.tuple", <vscale x 4 x i8>, 4) undef, ptr %base, i64 %vl, i64 5)
- %1 = call <vscale x 1 x float> @llvm.riscv.tuple.extract.nxv1f32.triscv.vector.tuple_nxv4i8_4t(target("riscv.vector.tuple", <vscale x 4 x i8>, 4) %0, i32 1)
- ret <vscale x 1 x float> %1
+ ret target("riscv.vector.tuple", <vscale x 4 x i8>, 4) %0
}
-
-define <vscale x 1 x float> @test_vlseg4_mask_nxv1f32_triscv.vector.tuple_nxv4i8_4t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) {
+define target("riscv.vector.tuple", <vscale x 4 x i8>, 4) @test_vlseg4_mask_nxv1f32_triscv.vector.tuple_nxv4i8_4t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) {
; CHECK-LABEL: test_vlseg4_mask_nxv1f32_triscv.vector.tuple_nxv4i8_4t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma
-; CHECK-NEXT: vlseg4e32.v v7, (a0), v0.t
+; CHECK-NEXT: vlseg4e32.v v8, (a0), v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 4) @llvm.riscv.vlseg4.mask.triscv.vector.tuple_nxv4i8_4t.nxv1i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 4) undef, ptr %base, <vscale x 1 x i1> %mask, i64 %vl, i64 1, i64 5)
- %1 = call <vscale x 1 x float> @llvm.riscv.tuple.extract.nxv1f32.triscv.vector.tuple_nxv4i8_4t(target("riscv.vector.tuple", <vscale x 4 x i8>, 4) %0, i32 1)
- ret <vscale x 1 x float> %1
+ ret target("riscv.vector.tuple", <vscale x 4 x i8>, 4) %0
}
-
-
-define <vscale x 2 x float> @test_vlseg4_nxv2f32_triscv.vector.tuple_nxv8i8_4t(ptr %base, i64 %vl) {
+define target("riscv.vector.tuple", <vscale x 8 x i8>, 4) @test_vlseg4_nxv2f32_triscv.vector.tuple_nxv8i8_4t(ptr %base, i64 %vl) {
; CHECK-LABEL: test_vlseg4_nxv2f32_triscv.vector.tuple_nxv8i8_4t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma
-; CHECK-NEXT: vlseg4e32.v v7, (a0)
+; CHECK-NEXT: vlseg4e32.v v8, (a0)
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 4) @llvm.riscv.vlseg4.triscv.vector.tuple_nxv8i8_4t(target("riscv.vector.tuple", <vscale x 8 x i8>, 4) undef, ptr %base, i64 %vl, i64 5)
- %1 = call <vscale x 2 x float> @llvm.riscv.tuple.extract.nxv2f32.triscv.vector.tuple_nxv8i8_4t(target("riscv.vector.tuple", <vscale x 8 x i8>, 4) %0, i32 1)
- ret <vscale x 2 x float> %1
+ ret target("riscv.vector.tuple", <vscale x 8 x i8>, 4) %0
}
-
-define <vscale x 2 x float> @test_vlseg4_mask_nxv2f32_triscv.vector.tuple_nxv8i8_4t(ptr %base, i64 %vl, <vscale x 2 x i1> %mask) {
+define target("riscv.vector.tuple", <vscale x 8 x i8>, 4) @test_vlseg4_mask_nxv2f32_triscv.vector.tuple_nxv8i8_4t(ptr %base, i64 %vl, <vscale x 2 x i1> %mask) {
; CHECK-LABEL: test_vlseg4_mask_nxv2f32_triscv.vector.tuple_nxv8i8_4t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma
-; CHECK-NEXT: vlseg4e32.v v7, (a0), v0.t
+; CHECK-NEXT: vlseg4e32.v v8, (a0), v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 4) @llvm.riscv.vlseg4.mask.triscv.vector.tuple_nxv8i8_4t.nxv2i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 4) undef, ptr %base, <vscale x 2 x i1> %mask, i64 %vl, i64 1, i64 5)
- %1 = call <vscale x 2 x float> @llvm.riscv.tuple.extract.nxv2f32.triscv.vector.tuple_nxv8i8_4t(target("riscv.vector.tuple", <vscale x 8 x i8>, 4) %0, i32 1)
- ret <vscale x 2 x float> %1
+ ret target("riscv.vector.tuple", <vscale x 8 x i8>, 4) %0
}
-
-
-define <vscale x 4 x float> @test_vlseg4_nxv4f32_triscv.vector.tuple_nxv16i8_4t(ptr %base, i64 %vl) {
+define target("riscv.vector.tuple", <vscale x 16 x i8>, 4) @test_vlseg4_nxv4f32_triscv.vector.tuple_nxv16i8_4t(ptr %base, i64 %vl) {
; CHECK-LABEL: test_vlseg4_nxv4f32_triscv.vector.tuple_nxv16i8_4t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma
-; CHECK-NEXT: vlseg4e32.v v6, (a0)
+; CHECK-NEXT: vlseg4e32.v v8, (a0)
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 4) @llvm.riscv.vlseg4.triscv.vector.tuple_nxv16i8_4t(target("riscv.vector.tuple", <vscale x 16 x i8>, 4) undef, ptr %base, i64 %vl, i64 5)
- %1 = call <vscale x 4 x float> @llvm.riscv.tuple.extract.nxv4f32.triscv.vector.tuple_nxv16i8_4t(target("riscv.vector.tuple", <vscale x 16 x i8>, 4) %0, i32 1)
- ret <vscale x 4 x float> %1
+ ret target("riscv.vector.tuple", <vscale x 16 x i8>, 4) %0
}
-
-define <vscale x 4 x float> @test_vlseg4_mask_nxv4f32_triscv.vector.tuple_nxv16i8_4t(ptr %base, i64 %vl, <vscale x 4 x i1> %mask) {
+define target("riscv.vector.tuple", <vscale x 16 x i8>, 4) @test_vlseg4_mask_nxv4f32_triscv.vector.tuple_nxv16i8_4t(ptr %base, i64 %vl, <vscale x 4 x i1> %mask) {
; CHECK-LABEL: test_vlseg4_mask_nxv4f32_triscv.vector.tuple_nxv16i8_4t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma
-; CHECK-NEXT: vlseg4e32.v v6, (a0), v0.t
+; CHECK-NEXT: vlseg4e32.v v8, (a0), v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 4) @llvm.riscv.vlseg4.mask.triscv.vector.tuple_nxv16i8_4t.nxv4i1(target("riscv.vector.tuple", <vscale x 16 x i8>, 4) undef, ptr %base, <vscale x 4 x i1> %mask, i64 %vl, i64 1, i64 5)
- %1 = call <vscale x 4 x float> @llvm.riscv.tuple.extract.nxv4f32.triscv.vector.tuple_nxv16i8_4t(target("riscv.vector.tuple", <vscale x 16 x i8>, 4) %0, i32 1)
- ret <vscale x 4 x float> %1
+ ret target("riscv.vector.tuple", <vscale x 16 x i8>, 4) %0
}
-
-
-define <vscale x 1 x float> @test_vlseg5_nxv1f32_triscv.vector.tuple_nxv4i8_5t(ptr %base, i64 %vl) {
+define target("riscv.vector.tuple", <vscale x 4 x i8>, 5) @test_vlseg5_nxv1f32_triscv.vector.tuple_nxv4i8_5t(ptr %base, i64 %vl) {
; CHECK-LABEL: test_vlseg5_nxv1f32_triscv.vector.tuple_nxv4i8_5t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma
-; CHECK-NEXT: vlseg5e32.v v7, (a0)
+; CHECK-NEXT: vlseg5e32.v v8, (a0)
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 5) @llvm.riscv.vlseg5.triscv.vector.tuple_nxv4i8_5t(target("riscv.vector.tuple", <vscale x 4 x i8>, 5) undef, ptr %base, i64 %vl, i64 5)
- %1 = call <vscale x 1 x float> @llvm.riscv.tuple.extract.nxv1f32.triscv.vector.tuple_nxv4i8_5t(target("riscv.vector.tuple", <vscale x 4 x i8>, 5) %0, i32 1)
- ret <vscale x 1 x float> %1
+ ret target("riscv.vector.tuple", <vscale x 4 x i8>, 5) %0
}
-
-define <vscale x 1 x float> @test_vlseg5_mask_nxv1f32_triscv.vector.tuple_nxv4i8_5t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) {
+define target("riscv.vector.tuple", <vscale x 4 x i8>, 5) @test_vlseg5_mask_nxv1f32_triscv.vector.tuple_nxv4i8_5t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) {
; CHECK-LABEL: test_vlseg5_mask_nxv1f32_triscv.vector.tuple_nxv4i8_5t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma
-; CHECK-NEXT: vlseg5e32.v v7, (a0), v0.t
+; CHECK-NEXT: vlseg5e32.v v8, (a0), v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 5) @llvm.riscv.vlseg5.mask.triscv.vector.tuple_nxv4i8_5t.nxv1i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 5) undef, ptr %base, <vscale x 1 x i1> %mask, i64 %vl, i64 1, i64 5)
- %1 = call <vscale x 1 x float> @llvm.riscv.tuple.extract.nxv1f32.triscv.vector.tuple_nxv4i8_5t(target("riscv.vector.tuple", <vscale x 4 x i8>, 5) %0, i32 1)
- ret <vscale x 1 x float> %1
+ ret target("riscv.vector.tuple", <vscale x 4 x i8>, 5) %0
}
-
-
-define <vscale x 2 x float> @test_vlseg5_nxv2f32_triscv.vector.tuple_nxv8i8_5t(ptr %base, i64 %vl) {
+define target("riscv.vector.tuple", <vscale x 8 x i8>, 5) @test_vlseg5_nxv2f32_triscv.vector.tuple_nxv8i8_5t(ptr %base, i64 %vl) {
; CHECK-LABEL: test_vlseg5_nxv2f32_triscv.vector.tuple_nxv8i8_5t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma
-; CHECK-NEXT: vlseg5e32.v v7, (a0)
+; CHECK-NEXT: vlseg5e32.v v8, (a0)
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 5) @llvm.riscv.vlseg5.triscv.vector.tuple_nxv8i8_5t(target("riscv.vector.tuple", <vscale x 8 x i8>, 5) undef, ptr %base, i64 %vl, i64 5)
- %1 = call <vscale x 2 x float> @llvm.riscv.tuple.extract.nxv2f32.triscv.vector.tuple_nxv8i8_5t(target("riscv.vector.tuple", <vscale x 8 x i8>, 5) %0, i32 1)
- ret <vscale x 2 x float> %1
+ ret target("riscv.vector.tuple", <vscale x 8 x i8>, 5) %0
}
-
-define <vscale x 2 x float> @test_vlseg5_mask_nxv2f32_triscv.vector.tuple_nxv8i8_5t(ptr %base, i64 %vl, <vscale x 2 x i1> %mask) {
+define target("riscv.vector.tuple", <vscale x 8 x i8>, 5) @test_vlseg5_mask_nxv2f32_triscv.vector.tuple_nxv8i8_5t(ptr %base, i64 %vl, <vscale x 2 x i1> %mask) {
; CHECK-LABEL: test_vlseg5_mask_nxv2f32_triscv.vector.tuple_nxv8i8_5t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma
-; CHECK-NEXT: vlseg5e32.v v7, (a0), v0.t
+; CHECK-NEXT: vlseg5e32.v v8, (a0), v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 5) @llvm.riscv.vlseg5.mask.triscv.vector.tuple_nxv8i8_5t.nxv2i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 5) undef, ptr %base, <vscale x 2 x i1> %mask, i64 %vl, i64 1, i64 5)
- %1 = call <vscale x 2 x float> @llvm.riscv.tuple.extract.nxv2f32.triscv.vector.tuple_nxv8i8_5t(target("riscv.vector.tuple", <vscale x 8 x i8>, 5) %0, i32 1)
- ret <vscale x 2 x float> %1
+ ret target("riscv.vector.tuple", <vscale x 8 x i8>, 5) %0
}
-
-
-define <vscale x 1 x float> @test_vlseg6_nxv1f32_triscv.vector.tuple_nxv4i8_6t(ptr %base, i64 %vl) {
+define target("riscv.vector.tuple", <vscale x 4 x i8>, 6) @test_vlseg6_nxv1f32_triscv.vector.tuple_nxv4i8_6t(ptr %base, i64 %vl) {
; CHECK-LABEL: test_vlseg6_nxv1f32_triscv.vector.tuple_nxv4i8_6t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma
-; CHECK-NEXT: vlseg6e32.v v7, (a0)
+; CHECK-NEXT: vlseg6e32.v v8, (a0)
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 6) @llvm.riscv.vlseg6.triscv.vector.tuple_nxv4i8_6t(target("riscv.vector.tuple", <vscale x 4 x i8>, 6) undef, ptr %base, i64 %vl, i64 5)
- %1 = call <vscale x 1 x float> @llvm.riscv.tuple.extract.nxv1f32.triscv.vector.tuple_nxv4i8_6t(target("riscv.vector.tuple", <vscale x 4 x i8>, 6) %0, i32 1)
- ret <vscale x 1 x float> %1
+ ret target("riscv.vector.tuple", <vscale x 4 x i8>, 6) %0
}
-
-define <vscale x 1 x float> @test_vlseg6_mask_nxv1f32_triscv.vector.tuple_nxv4i8_6t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) {
+define target("riscv.vector.tuple", <vscale x 4 x i8>, 6) @test_vlseg6_mask_nxv1f32_triscv.vector.tuple_nxv4i8_6t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) {
; CHECK-LABEL: test_vlseg6_mask_nxv1f32_triscv.vector.tuple_nxv4i8_6t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma
-; CHECK-NEXT: vlseg6e32.v v7, (a0), v0.t
+; CHECK-NEXT: vlseg6e32.v v8, (a0), v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 6) @llvm.riscv.vlseg6.mask.triscv.vector.tuple_nxv4i8_6t.nxv1i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 6) undef, ptr %base, <vscale x 1 x i1> %mask, i64 %vl, i64 1, i64 5)
- %1 = call <vscale x 1 x float> @llvm.riscv.tuple.extract.nxv1f32.triscv.vector.tuple_nxv4i8_6t(target("riscv.vector.tuple", <vscale x 4 x i8>, 6) %0, i32 1)
- ret <vscale x 1 x float> %1
+ ret target("riscv.vector.tuple", <vscale x 4 x i8>, 6) %0
}
-
-
-define <vscale x 2 x float> @test_vlseg6_nxv2f32_triscv.vector.tuple_nxv8i8_6t(ptr %base, i64 %vl) {
+define target("riscv.vector.tuple", <vscale x 8 x i8>, 6) @test_vlseg6_nxv2f32_triscv.vector.tuple_nxv8i8_6t(ptr %base, i64 %vl) {
; CHECK-LABEL: test_vlseg6_nxv2f32_triscv.vector.tuple_nxv8i8_6t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma
-; CHECK-NEXT: vlseg6e32.v v7, (a0)
+; CHECK-NEXT: vlseg6e32.v v8, (a0)
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 6) @llvm.riscv.vlseg6.triscv.vector.tuple_nxv8i8_6t(target("riscv.vector.tuple", <vscale x 8 x i8>, 6) undef, ptr %base, i64 %vl, i64 5)
- %1 = call <vscale x 2 x float> @llvm.riscv.tuple.extract.nxv2f32.triscv.vector.tuple_nxv8i8_6t(target("riscv.vector.tuple", <vscale x 8 x i8>, 6) %0, i32 1)
- ret <vscale x 2 x float> %1
+ ret target("riscv.vector.tuple", <vscale x 8 x i8>, 6) %0
}
-
-define <vscale x 2 x float> @test_vlseg6_mask_nxv2f32_triscv.vector.tuple_nxv8i8_6t(ptr %base, i64 %vl, <vscale x 2 x i1> %mask) {
+define target("riscv.vector.tuple", <vscale x 8 x i8>, 6) @test_vlseg6_mask_nxv2f32_triscv.vector.tuple_nxv8i8_6t(ptr %base, i64 %vl, <vscale x 2 x i1> %mask) {
; CHECK-LABEL: test_vlseg6_mask_nxv2f32_triscv.vector.tuple_nxv8i8_6t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma
-; CHECK-NEXT: vlseg6e32.v v7, (a0), v0.t
+; CHECK-NEXT: vlseg6e32.v v8, (a0), v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 6) @llvm.riscv.vlseg6.mask.triscv.vector.tuple_nxv8i8_6t.nxv2i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 6) undef, ptr %base, <vscale x 2 x i1> %mask, i64 %vl, i64 1, i64 5)
- %1 = call <vscale x 2 x float> @llvm.riscv.tuple.extract.nxv2f32.triscv.vector.tuple_nxv8i8_6t(target("riscv.vector.tuple", <vscale x 8 x i8>, 6) %0, i32 1)
- ret <vscale x 2 x float> %1
+ ret target("riscv.vector.tuple", <vscale x 8 x i8>, 6) %0
}
-
-
-define <vscale x 1 x float> @test_vlseg7_nxv1f32_triscv.vector.tuple_nxv4i8_7t(ptr %base, i64 %vl) {
+define target("riscv.vector.tuple", <vscale x 4 x i8>, 7) @test_vlseg7_nxv1f32_triscv.vector.tuple_nxv4i8_7t(ptr %base, i64 %vl) {
; CHECK-LABEL: test_vlseg7_nxv1f32_triscv.vector.tuple_nxv4i8_7t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma
-; CHECK-NEXT: vlseg7e32.v v7, (a0)
+; CHECK-NEXT: vlseg7e32.v v8, (a0)
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 7) @llvm.riscv.vlseg7.triscv.vector.tuple_nxv4i8_7t(target("riscv.vector.tuple", <vscale x 4 x i8>, 7) undef, ptr %base, i64 %vl, i64 5)
- %1 = call <vscale x 1 x float> @llvm.riscv.tuple.extract.nxv1f32.triscv.vector.tuple_nxv4i8_7t(target("riscv.vector.tuple", <vscale x 4 x i8>, 7) %0, i32 1)
- ret <vscale x 1 x float> %1
+ ret target("riscv.vector.tuple", <vscale x 4 x i8>, 7) %0
}
-
-define <vscale x 1 x float> @test_vlseg7_mask_nxv1f32_triscv.vector.tuple_nxv4i8_7t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) {
+define target("riscv.vector.tuple", <vscale x 4 x i8>, 7) @test_vlseg7_mask_nxv1f32_triscv.vector.tuple_nxv4i8_7t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) {
; CHECK-LABEL: test_vlseg7_mask_nxv1f32_triscv.vector.tuple_nxv4i8_7t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma
-; CHECK-NEXT: vlseg7e32.v v7, (a0), v0.t
+; CHECK-NEXT: vlseg7e32.v v8, (a0), v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 7) @llvm.riscv.vlseg7.mask.triscv.vector.tuple_nxv4i8_7t.nxv1i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 7) undef, ptr %base, <vscale x 1 x i1> %mask, i64 %vl, i64 1, i64 5)
- %1 = call <vscale x 1 x float> @llvm.riscv.tuple.extract.nxv1f32.triscv.vector.tuple_nxv4i8_7t(target("riscv.vector.tuple", <vscale x 4 x i8>, 7) %0, i32 1)
- ret <vscale x 1 x float> %1
+ ret target("riscv.vector.tuple", <vscale x 4 x i8>, 7) %0
}
-
-
-define <vscale x 2 x float> @test_vlseg7_nxv2f32_triscv.vector.tuple_nxv8i8_7t(ptr %base, i64 %vl) {
+define target("riscv.vector.tuple", <vscale x 8 x i8>, 7) @test_vlseg7_nxv2f32_triscv.vector.tuple_nxv8i8_7t(ptr %base, i64 %vl) {
; CHECK-LABEL: test_vlseg7_nxv2f32_triscv.vector.tuple_nxv8i8_7t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma
-; CHECK-NEXT: vlseg7e32.v v7, (a0)
+; CHECK-NEXT: vlseg7e32.v v8, (a0)
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 7) @llvm.riscv.vlseg7.triscv.vector.tuple_nxv8i8_7t(target("riscv.vector.tuple", <vscale x 8 x i8>, 7) undef, ptr %base, i64 %vl, i64 5)
- %1 = call <vscale x 2 x float> @llvm.riscv.tuple.extract.nxv2f32.triscv.vector.tuple_nxv8i8_7t(target("riscv.vector.tuple", <vscale x 8 x i8>, 7) %0, i32 1)
- ret <vscale x 2 x float> %1
+ ret target("riscv.vector.tuple", <vscale x 8 x i8>, 7) %0
}
-
-define <vscale x 2 x float> @test_vlseg7_mask_nxv2f32_triscv.vector.tuple_nxv8i8_7t(ptr %base, i64 %vl, <vscale x 2 x i1> %mask) {
+define target("riscv.vector.tuple", <vscale x 8 x i8>, 7) @test_vlseg7_mask_nxv2f32_triscv.vector.tuple_nxv8i8_7t(ptr %base, i64 %vl, <vscale x 2 x i1> %mask) {
; CHECK-LABEL: test_vlseg7_mask_nxv2f32_triscv.vector.tuple_nxv8i8_7t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma
-; CHECK-NEXT: vlseg7e32.v v7, (a0), v0.t
+; CHECK-NEXT: vlseg7e32.v v8, (a0), v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 7) @llvm.riscv.vlseg7.mask.triscv.vector.tuple_nxv8i8_7t.nxv2i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 7) undef, ptr %base, <vscale x 2 x i1> %mask, i64 %vl, i64 1, i64 5)
- %1 = call <vscale x 2 x float> @llvm.riscv.tuple.extract.nxv2f32.triscv.vector.tuple_nxv8i8_7t(target("riscv.vector.tuple", <vscale x 8 x i8>, 7) %0, i32 1)
- ret <vscale x 2 x float> %1
+ ret target("riscv.vector.tuple", <vscale x 8 x i8>, 7) %0
}
-
-
-define <vscale x 1 x float> @test_vlseg8_nxv1f32_triscv.vector.tuple_nxv4i8_8t(ptr %base, i64 %vl) {
+define target("riscv.vector.tuple", <vscale x 4 x i8>, 8) @test_vlseg8_nxv1f32_triscv.vector.tuple_nxv4i8_8t(ptr %base, i64 %vl) {
; CHECK-LABEL: test_vlseg8_nxv1f32_triscv.vector.tuple_nxv4i8_8t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma
-; CHECK-NEXT: vlseg8e32.v v7, (a0)
+; CHECK-NEXT: vlseg8e32.v v8, (a0)
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 8) @llvm.riscv.vlseg8.triscv.vector.tuple_nxv4i8_8t(target("riscv.vector.tuple", <vscale x 4 x i8>, 8) undef, ptr %base, i64 %vl, i64 5)
- %1 = call <vscale x 1 x float> @llvm.riscv.tuple.extract.nxv1f32.triscv.vector.tuple_nxv4i8_8t(target("riscv.vector.tuple", <vscale x 4 x i8>, 8) %0, i32 1)
- ret <vscale x 1 x float> %1
+ ret target("riscv.vector.tuple", <vscale x 4 x i8>, 8) %0
}
-
-define <vscale x 1 x float> @test_vlseg8_mask_nxv1f32_triscv.vector.tuple_nxv4i8_8t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) {
+define target("riscv.vector.tuple", <vscale x 4 x i8>, 8) @test_vlseg8_mask_nxv1f32_triscv.vector.tuple_nxv4i8_8t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) {
; CHECK-LABEL: test_vlseg8_mask_nxv1f32_triscv.vector.tuple_nxv4i8_8t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma
-; CHECK-NEXT: vlseg8e32.v v7, (a0), v0.t
+; CHECK-NEXT: vlseg8e32.v v8, (a0), v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 8) @llvm.riscv.vlseg8.mask.triscv.vector.tuple_nxv4i8_8t.nxv1i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 8) undef, ptr %base, <vscale x 1 x i1> %mask, i64 %vl, i64 1, i64 5)
- %1 = call <vscale x 1 x float> @llvm.riscv.tuple.extract.nxv1f32.triscv.vector.tuple_nxv4i8_8t(target("riscv.vector.tuple", <vscale x 4 x i8>, 8) %0, i32 1)
- ret <vscale x 1 x float> %1
+ ret target("riscv.vector.tuple", <vscale x 4 x i8>, 8) %0
}
-
-
-define <vscale x 2 x float> @test_vlseg8_nxv2f32_triscv.vector.tuple_nxv8i8_8t(ptr %base, i64 %vl) {
+define target("riscv.vector.tuple", <vscale x 8 x i8>, 8) @test_vlseg8_nxv2f32_triscv.vector.tuple_nxv8i8_8t(ptr %base, i64 %vl) {
; CHECK-LABEL: test_vlseg8_nxv2f32_triscv.vector.tuple_nxv8i8_8t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma
-; CHECK-NEXT: vlseg8e32.v v7, (a0)
+; CHECK-NEXT: vlseg8e32.v v8, (a0)
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 8) @llvm.riscv.vlseg8.triscv.vector.tuple_nxv8i8_8t(target("riscv.vector.tuple", <vscale x 8 x i8>, 8) undef, ptr %base, i64 %vl, i64 5)
- %1 = call <vscale x 2 x float> @llvm.riscv.tuple.extract.nxv2f32.triscv.vector.tuple_nxv8i8_8t(target("riscv.vector.tuple", <vscale x 8 x i8>, 8) %0, i32 1)
- ret <vscale x 2 x float> %1
+ ret target("riscv.vector.tuple", <vscale x 8 x i8>, 8) %0
}
-
-define <vscale x 2 x float> @test_vlseg8_mask_nxv2f32_triscv.vector.tuple_nxv8i8_8t(ptr %base, i64 %vl, <vscale x 2 x i1> %mask) {
+define target("riscv.vector.tuple", <vscale x 8 x i8>, 8) @test_vlseg8_mask_nxv2f32_triscv.vector.tuple_nxv8i8_8t(ptr %base, i64 %vl, <vscale x 2 x i1> %mask) {
; CHECK-LABEL: test_vlseg8_mask_nxv2f32_triscv.vector.tuple_nxv8i8_8t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma
-; CHECK-NEXT: vlseg8e32.v v7, (a0), v0.t
+; CHECK-NEXT: vlseg8e32.v v8, (a0), v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 8) @llvm.riscv.vlseg8.mask.triscv.vector.tuple_nxv8i8_8t.nxv2i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 8) undef, ptr %base, <vscale x 2 x i1> %mask, i64 %vl, i64 1, i64 5)
- %1 = call <vscale x 2 x float> @llvm.riscv.tuple.extract.nxv2f32.triscv.vector.tuple_nxv8i8_8t(target("riscv.vector.tuple", <vscale x 8 x i8>, 8) %0, i32 1)
- ret <vscale x 2 x float> %1
+ ret target("riscv.vector.tuple", <vscale x 8 x i8>, 8) %0
}
-
-
-define <vscale x 1 x double> @test_vlseg2_nxv1f64_triscv.vector.tuple_nxv8i8_2t(ptr %base, i64 %vl) {
+define target("riscv.vector.tuple", <vscale x 8 x i8>, 2) @test_vlseg2_nxv1f64_triscv.vector.tuple_nxv8i8_2t(ptr %base, i64 %vl) {
; CHECK-LABEL: test_vlseg2_nxv1f64_triscv.vector.tuple_nxv8i8_2t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, ma
-; CHECK-NEXT: vlseg2e64.v v7, (a0)
+; CHECK-NEXT: vlseg2e64.v v8, (a0)
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 2) @llvm.riscv.vlseg2.triscv.vector.tuple_nxv8i8_2t(target("riscv.vector.tuple", <vscale x 8 x i8>, 2) undef, ptr %base, i64 %vl, i64 6)
- %1 = call <vscale x 1 x double> @llvm.riscv.tuple.extract.nxv1f64.triscv.vector.tuple_nxv8i8_2t(target("riscv.vector.tuple", <vscale x 8 x i8>, 2) %0, i32 1)
- ret <vscale x 1 x double> %1
+ ret target("riscv.vector.tuple", <vscale x 8 x i8>, 2) %0
}
-
-define <vscale x 1 x double> @test_vlseg2_mask_nxv1f64_triscv.vector.tuple_nxv8i8_2t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) {
+define target("riscv.vector.tuple", <vscale x 8 x i8>, 2) @test_vlseg2_mask_nxv1f64_triscv.vector.tuple_nxv8i8_2t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) {
; CHECK-LABEL: test_vlseg2_mask_nxv1f64_triscv.vector.tuple_nxv8i8_2t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, ma
-; CHECK-NEXT: vlseg2e64.v v7, (a0), v0.t
+; CHECK-NEXT: vlseg2e64.v v8, (a0), v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 2) @llvm.riscv.vlseg2.mask.triscv.vector.tuple_nxv8i8_2t.nxv1i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 2) undef, ptr %base, <vscale x 1 x i1> %mask, i64 %vl, i64 1, i64 6)
- %1 = call <vscale x 1 x double> @llvm.riscv.tuple.extract.nxv1f64.triscv.vector.tuple_nxv8i8_2t(target("riscv.vector.tuple", <vscale x 8 x i8>, 2) %0, i32 1)
- ret <vscale x 1 x double> %1
+ ret target("riscv.vector.tuple", <vscale x 8 x i8>, 2) %0
}
-
-
-define <vscale x 2 x double> @test_vlseg2_nxv2f64_triscv.vector.tuple_nxv16i8_2t(ptr %base, i64 %vl) {
+define target("riscv.vector.tuple", <vscale x 16 x i8>, 2) @test_vlseg2_nxv2f64_triscv.vector.tuple_nxv16i8_2t(ptr %base, i64 %vl) {
; CHECK-LABEL: test_vlseg2_nxv2f64_triscv.vector.tuple_nxv16i8_2t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, ma
-; CHECK-NEXT: vlseg2e64.v v6, (a0)
+; CHECK-NEXT: vlseg2e64.v v8, (a0)
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 2) @llvm.riscv.vlseg2.triscv.vector.tuple_nxv16i8_2t(target("riscv.vector.tuple", <vscale x 16 x i8>, 2) undef, ptr %base, i64 %vl, i64 6)
- %1 = call <vscale x 2 x double> @llvm.riscv.tuple.extract.nxv2f64.triscv.vector.tuple_nxv16i8_2t(target("riscv.vector.tuple", <vscale x 16 x i8>, 2) %0, i32 1)
- ret <vscale x 2 x double> %1
+ ret target("riscv.vector.tuple", <vscale x 16 x i8>, 2) %0
}
-
-define <vscale x 2 x double> @test_vlseg2_mask_nxv2f64_triscv.vector.tuple_nxv16i8_2t(ptr %base, i64 %vl, <vscale x 2 x i1> %mask) {
+define target("riscv.vector.tuple", <vscale x 16 x i8>, 2) @test_vlseg2_mask_nxv2f64_triscv.vector.tuple_nxv16i8_2t(ptr %base, i64 %vl, <vscale x 2 x i1> %mask) {
; CHECK-LABEL: test_vlseg2_mask_nxv2f64_triscv.vector.tuple_nxv16i8_2t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, ma
-; CHECK-NEXT: vlseg2e64.v v6, (a0), v0.t
+; CHECK-NEXT: vlseg2e64.v v8, (a0), v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 2) @llvm.riscv.vlseg2.mask.triscv.vector.tuple_nxv16i8_2t.nxv2i1(target("riscv.vector.tuple", <vscale x 16 x i8>, 2) undef, ptr %base, <vscale x 2 x i1> %mask, i64 %vl, i64 1, i64 6)
- %1 = call <vscale x 2 x double> @llvm.riscv.tuple.extract.nxv2f64.triscv.vector.tuple_nxv16i8_2t(target("riscv.vector.tuple", <vscale x 16 x i8>, 2) %0, i32 1)
- ret <vscale x 2 x double> %1
+ ret target("riscv.vector.tuple", <vscale x 16 x i8>, 2) %0
}
-
-
-define <vscale x 4 x double> @test_vlseg2_nxv4f64_triscv.vector.tuple_nxv32i8_2t(ptr %base, i64 %vl) {
+define target("riscv.vector.tuple", <vscale x 32 x i8>, 2) @test_vlseg2_nxv4f64_triscv.vector.tuple_nxv32i8_2t(ptr %base, i64 %vl) {
; CHECK-LABEL: test_vlseg2_nxv4f64_triscv.vector.tuple_nxv32i8_2t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e64, m4, ta, ma
-; CHECK-NEXT: vlseg2e64.v v4, (a0)
+; CHECK-NEXT: vlseg2e64.v v8, (a0)
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 32 x i8>, 2) @llvm.riscv.vlseg2.triscv.vector.tuple_nxv32i8_2t(target("riscv.vector.tuple", <vscale x 32 x i8>, 2) undef, ptr %base, i64 %vl, i64 6)
- %1 = call <vscale x 4 x double> @llvm.riscv.tuple.extract.nxv4f64.triscv.vector.tuple_nxv32i8_2t(target("riscv.vector.tuple", <vscale x 32 x i8>, 2) %0, i32 1)
- ret <vscale x 4 x double> %1
+ ret target("riscv.vector.tuple", <vscale x 32 x i8>, 2) %0
}
-
-define <vscale x 4 x double> @test_vlseg2_mask_nxv4f64_triscv.vector.tuple_nxv32i8_2t(ptr %base, i64 %vl, <vscale x 4 x i1> %mask) {
+define target("riscv.vector.tuple", <vscale x 32 x i8>, 2) @test_vlseg2_mask_nxv4f64_triscv.vector.tuple_nxv32i8_2t(ptr %base, i64 %vl, <vscale x 4 x i1> %mask) {
; CHECK-LABEL: test_vlseg2_mask_nxv4f64_triscv.vector.tuple_nxv32i8_2t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e64, m4, ta, ma
-; CHECK-NEXT: vlseg2e64.v v4, (a0), v0.t
+; CHECK-NEXT: vlseg2e64.v v8, (a0), v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 32 x i8>, 2) @llvm.riscv.vlseg2.mask.triscv.vector.tuple_nxv32i8_2t.nxv4i1(target("riscv.vector.tuple", <vscale x 32 x i8>, 2) undef, ptr %base, <vscale x 4 x i1> %mask, i64 %vl, i64 1, i64 6)
- %1 = call <vscale x 4 x double> @llvm.riscv.tuple.extract.nxv4f64.triscv.vector.tuple_nxv32i8_2t(target("riscv.vector.tuple", <vscale x 32 x i8>, 2) %0, i32 1)
- ret <vscale x 4 x double> %1
+ ret target("riscv.vector.tuple", <vscale x 32 x i8>, 2) %0
}
-
-
-define <vscale x 1 x double> @test_vlseg3_nxv1f64_triscv.vector.tuple_nxv8i8_3t(ptr %base, i64 %vl) {
+define target("riscv.vector.tuple", <vscale x 8 x i8>, 3) @test_vlseg3_nxv1f64_triscv.vector.tuple_nxv8i8_3t(ptr %base, i64 %vl) {
; CHECK-LABEL: test_vlseg3_nxv1f64_triscv.vector.tuple_nxv8i8_3t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, ma
-; CHECK-NEXT: vlseg3e64.v v7, (a0)
+; CHECK-NEXT: vlseg3e64.v v8, (a0)
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 3) @llvm.riscv.vlseg3.triscv.vector.tuple_nxv8i8_3t(target("riscv.vector.tuple", <vscale x 8 x i8>, 3) undef, ptr %base, i64 %vl, i64 6)
- %1 = call <vscale x 1 x double> @llvm.riscv.tuple.extract.nxv1f64.triscv.vector.tuple_nxv8i8_3t(target("riscv.vector.tuple", <vscale x 8 x i8>, 3) %0, i32 1)
- ret <vscale x 1 x double> %1
+ ret target("riscv.vector.tuple", <vscale x 8 x i8>, 3) %0
}
-
-define <vscale x 1 x double> @test_vlseg3_mask_nxv1f64_triscv.vector.tuple_nxv8i8_3t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) {
+define target("riscv.vector.tuple", <vscale x 8 x i8>, 3) @test_vlseg3_mask_nxv1f64_triscv.vector.tuple_nxv8i8_3t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) {
; CHECK-LABEL: test_vlseg3_mask_nxv1f64_triscv.vector.tuple_nxv8i8_3t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, ma
-; CHECK-NEXT: vlseg3e64.v v7, (a0), v0.t
+; CHECK-NEXT: vlseg3e64.v v8, (a0), v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 3) @llvm.riscv.vlseg3.mask.triscv.vector.tuple_nxv8i8_3t.nxv1i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 3) undef, ptr %base, <vscale x 1 x i1> %mask, i64 %vl, i64 1, i64 6)
- %1 = call <vscale x 1 x double> @llvm.riscv.tuple.extract.nxv1f64.triscv.vector.tuple_nxv8i8_3t(target("riscv.vector.tuple", <vscale x 8 x i8>, 3) %0, i32 1)
- ret <vscale x 1 x double> %1
+ ret target("riscv.vector.tuple", <vscale x 8 x i8>, 3) %0
}
-
-
-define <vscale x 2 x double> @test_vlseg3_nxv2f64_triscv.vector.tuple_nxv16i8_3t(ptr %base, i64 %vl) {
+define target("riscv.vector.tuple", <vscale x 16 x i8>, 3) @test_vlseg3_nxv2f64_triscv.vector.tuple_nxv16i8_3t(ptr %base, i64 %vl) {
; CHECK-LABEL: test_vlseg3_nxv2f64_triscv.vector.tuple_nxv16i8_3t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, ma
-; CHECK-NEXT: vlseg3e64.v v6, (a0)
+; CHECK-NEXT: vlseg3e64.v v8, (a0)
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 3) @llvm.riscv.vlseg3.triscv.vector.tuple_nxv16i8_3t(target("riscv.vector.tuple", <vscale x 16 x i8>, 3) undef, ptr %base, i64 %vl, i64 6)
- %1 = call <vscale x 2 x double> @llvm.riscv.tuple.extract.nxv2f64.triscv.vector.tuple_nxv16i8_3t(target("riscv.vector.tuple", <vscale x 16 x i8>, 3) %0, i32 1)
- ret <vscale x 2 x double> %1
+ ret target("riscv.vector.tuple", <vscale x 16 x i8>, 3) %0
}
-
-define <vscale x 2 x double> @test_vlseg3_mask_nxv2f64_triscv.vector.tuple_nxv16i8_3t(ptr %base, i64 %vl, <vscale x 2 x i1> %mask) {
+define target("riscv.vector.tuple", <vscale x 16 x i8>, 3) @test_vlseg3_mask_nxv2f64_triscv.vector.tuple_nxv16i8_3t(ptr %base, i64 %vl, <vscale x 2 x i1> %mask) {
; CHECK-LABEL: test_vlseg3_mask_nxv2f64_triscv.vector.tuple_nxv16i8_3t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, ma
-; CHECK-NEXT: vlseg3e64.v v6, (a0), v0.t
+; CHECK-NEXT: vlseg3e64.v v8, (a0), v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 3) @llvm.riscv.vlseg3.mask.triscv.vector.tuple_nxv16i8_3t.nxv2i1(target("riscv.vector.tuple", <vscale x 16 x i8>, 3) undef, ptr %base, <vscale x 2 x i1> %mask, i64 %vl, i64 1, i64 6)
- %1 = call <vscale x 2 x double> @llvm.riscv.tuple.extract.nxv2f64.triscv.vector.tuple_nxv16i8_3t(target("riscv.vector.tuple", <vscale x 16 x i8>, 3) %0, i32 1)
- ret <vscale x 2 x double> %1
+ ret target("riscv.vector.tuple", <vscale x 16 x i8>, 3) %0
}
-
-
-define <vscale x 1 x double> @test_vlseg4_nxv1f64_triscv.vector.tuple_nxv8i8_4t(ptr %base, i64 %vl) {
+define target("riscv.vector.tuple", <vscale x 8 x i8>, 4) @test_vlseg4_nxv1f64_triscv.vector.tuple_nxv8i8_4t(ptr %base, i64 %vl) {
; CHECK-LABEL: test_vlseg4_nxv1f64_triscv.vector.tuple_nxv8i8_4t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, ma
-; CHECK-NEXT: vlseg4e64.v v7, (a0)
+; CHECK-NEXT: vlseg4e64.v v8, (a0)
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 4) @llvm.riscv.vlseg4.triscv.vector.tuple_nxv8i8_4t(target("riscv.vector.tuple", <vscale x 8 x i8>, 4) undef, ptr %base, i64 %vl, i64 6)
- %1 = call <vscale x 1 x double> @llvm.riscv.tuple.extract.nxv1f64.triscv.vector.tuple_nxv8i8_4t(target("riscv.vector.tuple", <vscale x 8 x i8>, 4) %0, i32 1)
- ret <vscale x 1 x double> %1
+ ret target("riscv.vector.tuple", <vscale x 8 x i8>, 4) %0
}
-
-define <vscale x 1 x double> @test_vlseg4_mask_nxv1f64_triscv.vector.tuple_nxv8i8_4t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) {
+define target("riscv.vector.tuple", <vscale x 8 x i8>, 4) @test_vlseg4_mask_nxv1f64_triscv.vector.tuple_nxv8i8_4t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) {
; CHECK-LABEL: test_vlseg4_mask_nxv1f64_triscv.vector.tuple_nxv8i8_4t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, ma
-; CHECK-NEXT: vlseg4e64.v v7, (a0), v0.t
+; CHECK-NEXT: vlseg4e64.v v8, (a0), v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 4) @llvm.riscv.vlseg4.mask.triscv.vector.tuple_nxv8i8_4t.nxv1i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 4) undef, ptr %base, <vscale x 1 x i1> %mask, i64 %vl, i64 1, i64 6)
- %1 = call <vscale x 1 x double> @llvm.riscv.tuple.extract.nxv1f64.triscv.vector.tuple_nxv8i8_4t(target("riscv.vector.tuple", <vscale x 8 x i8>, 4) %0, i32 1)
- ret <vscale x 1 x double> %1
+ ret target("riscv.vector.tuple", <vscale x 8 x i8>, 4) %0
}
-
-
-define <vscale x 2 x double> @test_vlseg4_nxv2f64_triscv.vector.tuple_nxv16i8_4t(ptr %base, i64 %vl) {
+define target("riscv.vector.tuple", <vscale x 16 x i8>, 4) @test_vlseg4_nxv2f64_triscv.vector.tuple_nxv16i8_4t(ptr %base, i64 %vl) {
; CHECK-LABEL: test_vlseg4_nxv2f64_triscv.vector.tuple_nxv16i8_4t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, ma
-; CHECK-NEXT: vlseg4e64.v v6, (a0)
+; CHECK-NEXT: vlseg4e64.v v8, (a0)
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 4) @llvm.riscv.vlseg4.triscv.vector.tuple_nxv16i8_4t(target("riscv.vector.tuple", <vscale x 16 x i8>, 4) undef, ptr %base, i64 %vl, i64 6)
- %1 = call <vscale x 2 x double> @llvm.riscv.tuple.extract.nxv2f64.triscv.vector.tuple_nxv16i8_4t(target("riscv.vector.tuple", <vscale x 16 x i8>, 4) %0, i32 1)
- ret <vscale x 2 x double> %1
+ ret target("riscv.vector.tuple", <vscale x 16 x i8>, 4) %0
}
-
-define <vscale x 2 x double> @test_vlseg4_mask_nxv2f64_triscv.vector.tuple_nxv16i8_4t(ptr %base, i64 %vl, <vscale x 2 x i1> %mask) {
+define target("riscv.vector.tuple", <vscale x 16 x i8>, 4) @test_vlseg4_mask_nxv2f64_triscv.vector.tuple_nxv16i8_4t(ptr %base, i64 %vl, <vscale x 2 x i1> %mask) {
; CHECK-LABEL: test_vlseg4_mask_nxv2f64_triscv.vector.tuple_nxv16i8_4t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, ma
-; CHECK-NEXT: vlseg4e64.v v6, (a0), v0.t
+; CHECK-NEXT: vlseg4e64.v v8, (a0), v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 4) @llvm.riscv.vlseg4.mask.triscv.vector.tuple_nxv16i8_4t.nxv2i1(target("riscv.vector.tuple", <vscale x 16 x i8>, 4) undef, ptr %base, <vscale x 2 x i1> %mask, i64 %vl, i64 1, i64 6)
- %1 = call <vscale x 2 x double> @llvm.riscv.tuple.extract.nxv2f64.triscv.vector.tuple_nxv16i8_4t(target("riscv.vector.tuple", <vscale x 16 x i8>, 4) %0, i32 1)
- ret <vscale x 2 x double> %1
+ ret target("riscv.vector.tuple", <vscale x 16 x i8>, 4) %0
}
-
-
-define <vscale x 1 x double> @test_vlseg5_nxv1f64_triscv.vector.tuple_nxv8i8_5t(ptr %base, i64 %vl) {
+define target("riscv.vector.tuple", <vscale x 8 x i8>, 5) @test_vlseg5_nxv1f64_triscv.vector.tuple_nxv8i8_5t(ptr %base, i64 %vl) {
; CHECK-LABEL: test_vlseg5_nxv1f64_triscv.vector.tuple_nxv8i8_5t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, ma
-; CHECK-NEXT: vlseg5e64.v v7, (a0)
+; CHECK-NEXT: vlseg5e64.v v8, (a0)
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 5) @llvm.riscv.vlseg5.triscv.vector.tuple_nxv8i8_5t(target("riscv.vector.tuple", <vscale x 8 x i8>, 5) undef, ptr %base, i64 %vl, i64 6)
- %1 = call <vscale x 1 x double> @llvm.riscv.tuple.extract.nxv1f64.triscv.vector.tuple_nxv8i8_5t(target("riscv.vector.tuple", <vscale x 8 x i8>, 5) %0, i32 1)
- ret <vscale x 1 x double> %1
+ ret target("riscv.vector.tuple", <vscale x 8 x i8>, 5) %0
}
-
-define <vscale x 1 x double> @test_vlseg5_mask_nxv1f64_triscv.vector.tuple_nxv8i8_5t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) {
+define target("riscv.vector.tuple", <vscale x 8 x i8>, 5) @test_vlseg5_mask_nxv1f64_triscv.vector.tuple_nxv8i8_5t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) {
; CHECK-LABEL: test_vlseg5_mask_nxv1f64_triscv.vector.tuple_nxv8i8_5t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, ma
-; CHECK-NEXT: vlseg5e64.v v7, (a0), v0.t
+; CHECK-NEXT: vlseg5e64.v v8, (a0), v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 5) @llvm.riscv.vlseg5.mask.triscv.vector.tuple_nxv8i8_5t.nxv1i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 5) undef, ptr %base, <vscale x 1 x i1> %mask, i64 %vl, i64 1, i64 6)
- %1 = call <vscale x 1 x double> @llvm.riscv.tuple.extract.nxv1f64.triscv.vector.tuple_nxv8i8_5t(target("riscv.vector.tuple", <vscale x 8 x i8>, 5) %0, i32 1)
- ret <vscale x 1 x double> %1
+ ret target("riscv.vector.tuple", <vscale x 8 x i8>, 5) %0
}
-
-
-define <vscale x 1 x double> @test_vlseg6_nxv1f64_triscv.vector.tuple_nxv8i8_6t(ptr %base, i64 %vl) {
+define target("riscv.vector.tuple", <vscale x 8 x i8>, 6) @test_vlseg6_nxv1f64_triscv.vector.tuple_nxv8i8_6t(ptr %base, i64 %vl) {
; CHECK-LABEL: test_vlseg6_nxv1f64_triscv.vector.tuple_nxv8i8_6t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, ma
-; CHECK-NEXT: vlseg6e64.v v7, (a0)
+; CHECK-NEXT: vlseg6e64.v v8, (a0)
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 6) @llvm.riscv.vlseg6.triscv.vector.tuple_nxv8i8_6t(target("riscv.vector.tuple", <vscale x 8 x i8>, 6) undef, ptr %base, i64 %vl, i64 6)
- %1 = call <vscale x 1 x double> @llvm.riscv.tuple.extract.nxv1f64.triscv.vector.tuple_nxv8i8_6t(target("riscv.vector.tuple", <vscale x 8 x i8>, 6) %0, i32 1)
- ret <vscale x 1 x double> %1
+ ret target("riscv.vector.tuple", <vscale x 8 x i8>, 6) %0
}
-
-define <vscale x 1 x double> @test_vlseg6_mask_nxv1f64_triscv.vector.tuple_nxv8i8_6t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) {
+define target("riscv.vector.tuple", <vscale x 8 x i8>, 6) @test_vlseg6_mask_nxv1f64_triscv.vector.tuple_nxv8i8_6t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) {
; CHECK-LABEL: test_vlseg6_mask_nxv1f64_triscv.vector.tuple_nxv8i8_6t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, ma
-; CHECK-NEXT: vlseg6e64.v v7, (a0), v0.t
+; CHECK-NEXT: vlseg6e64.v v8, (a0), v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 6) @llvm.riscv.vlseg6.mask.triscv.vector.tuple_nxv8i8_6t.nxv1i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 6) undef, ptr %base, <vscale x 1 x i1> %mask, i64 %vl, i64 1, i64 6)
- %1 = call <vscale x 1 x double> @llvm.riscv.tuple.extract.nxv1f64.triscv.vector.tuple_nxv8i8_6t(target("riscv.vector.tuple", <vscale x 8 x i8>, 6) %0, i32 1)
- ret <vscale x 1 x double> %1
+ ret target("riscv.vector.tuple", <vscale x 8 x i8>, 6) %0
}
-
-
-define <vscale x 1 x double> @test_vlseg7_nxv1f64_triscv.vector.tuple_nxv8i8_7t(ptr %base, i64 %vl) {
+define target("riscv.vector.tuple", <vscale x 8 x i8>, 7) @test_vlseg7_nxv1f64_triscv.vector.tuple_nxv8i8_7t(ptr %base, i64 %vl) {
; CHECK-LABEL: test_vlseg7_nxv1f64_triscv.vector.tuple_nxv8i8_7t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, ma
-; CHECK-NEXT: vlseg7e64.v v7, (a0)
+; CHECK-NEXT: vlseg7e64.v v8, (a0)
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 7) @llvm.riscv.vlseg7.triscv.vector.tuple_nxv8i8_7t(target("riscv.vector.tuple", <vscale x 8 x i8>, 7) undef, ptr %base, i64 %vl, i64 6)
- %1 = call <vscale x 1 x double> @llvm.riscv.tuple.extract.nxv1f64.triscv.vector.tuple_nxv8i8_7t(target("riscv.vector.tuple", <vscale x 8 x i8>, 7) %0, i32 1)
- ret <vscale x 1 x double> %1
+ ret target("riscv.vector.tuple", <vscale x 8 x i8>, 7) %0
}
-
-define <vscale x 1 x double> @test_vlseg7_mask_nxv1f64_triscv.vector.tuple_nxv8i8_7t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) {
+define target("riscv.vector.tuple", <vscale x 8 x i8>, 7) @test_vlseg7_mask_nxv1f64_triscv.vector.tuple_nxv8i8_7t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) {
; CHECK-LABEL: test_vlseg7_mask_nxv1f64_triscv.vector.tuple_nxv8i8_7t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, ma
-; CHECK-NEXT: vlseg7e64.v v7, (a0), v0.t
+; CHECK-NEXT: vlseg7e64.v v8, (a0), v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 7) @llvm.riscv.vlseg7.mask.triscv.vector.tuple_nxv8i8_7t.nxv1i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 7) undef, ptr %base, <vscale x 1 x i1> %mask, i64 %vl, i64 1, i64 6)
- %1 = call <vscale x 1 x double> @llvm.riscv.tuple.extract.nxv1f64.triscv.vector.tuple_nxv8i8_7t(target("riscv.vector.tuple", <vscale x 8 x i8>, 7) %0, i32 1)
- ret <vscale x 1 x double> %1
+ ret target("riscv.vector.tuple", <vscale x 8 x i8>, 7) %0
}
-
-
-define <vscale x 1 x double> @test_vlseg8_nxv1f64_triscv.vector.tuple_nxv8i8_8t(ptr %base, i64 %vl) {
+define target("riscv.vector.tuple", <vscale x 8 x i8>, 8) @test_vlseg8_nxv1f64_triscv.vector.tuple_nxv8i8_8t(ptr %base, i64 %vl) {
; CHECK-LABEL: test_vlseg8_nxv1f64_triscv.vector.tuple_nxv8i8_8t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, ma
-; CHECK-NEXT: vlseg8e64.v v7, (a0)
+; CHECK-NEXT: vlseg8e64.v v8, (a0)
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 8) @llvm.riscv.vlseg8.triscv.vector.tuple_nxv8i8_8t(target("riscv.vector.tuple", <vscale x 8 x i8>, 8) undef, ptr %base, i64 %vl, i64 6)
- %1 = call <vscale x 1 x double> @llvm.riscv.tuple.extract.nxv1f64.triscv.vector.tuple_nxv8i8_8t(target("riscv.vector.tuple", <vscale x 8 x i8>, 8) %0, i32 1)
- ret <vscale x 1 x double> %1
+ ret target("riscv.vector.tuple", <vscale x 8 x i8>, 8) %0
}
-
-define <vscale x 1 x double> @test_vlseg8_mask_nxv1f64_triscv.vector.tuple_nxv8i8_8t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) {
+define target("riscv.vector.tuple", <vscale x 8 x i8>, 8) @test_vlseg8_mask_nxv1f64_triscv.vector.tuple_nxv8i8_8t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) {
; CHECK-LABEL: test_vlseg8_mask_nxv1f64_triscv.vector.tuple_nxv8i8_8t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, ma
-; CHECK-NEXT: vlseg8e64.v v7, (a0), v0.t
+; CHECK-NEXT: vlseg8e64.v v8, (a0), v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 8) @llvm.riscv.vlseg8.mask.triscv.vector.tuple_nxv8i8_8t.nxv1i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 8) undef, ptr %base, <vscale x 1 x i1> %mask, i64 %vl, i64 1, i64 6)
- %1 = call <vscale x 1 x double> @llvm.riscv.tuple.extract.nxv1f64.triscv.vector.tuple_nxv8i8_8t(target("riscv.vector.tuple", <vscale x 8 x i8>, 8) %0, i32 1)
- ret <vscale x 1 x double> %1
+ ret target("riscv.vector.tuple", <vscale x 8 x i8>, 8) %0
}
-
-
-define <vscale x 1 x bfloat> @test_vlseg2_nxv1bf16_triscv.vector.tuple_nxv2i8_2t(ptr %base, i64 %vl) {
+define target("riscv.vector.tuple", <vscale x 2 x i8>, 2) @test_vlseg2_nxv1bf16_triscv.vector.tuple_nxv2i8_2t(ptr %base, i64 %vl) {
; CHECK-LABEL: test_vlseg2_nxv1bf16_triscv.vector.tuple_nxv2i8_2t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma
-; CHECK-NEXT: vlseg2e16.v v7, (a0)
+; CHECK-NEXT: vlseg2e16.v v8, (a0)
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 2 x i8>, 2) @llvm.riscv.vlseg2.triscv.vector.tuple_nxv2i8_2t(target("riscv.vector.tuple", <vscale x 2 x i8>, 2) undef, ptr %base, i64 %vl, i64 4)
- %1 = call <vscale x 1 x bfloat> @llvm.riscv.tuple.extract.nxv1bf16.triscv.vector.tuple_nxv2i8_2t(target("riscv.vector.tuple", <vscale x 2 x i8>, 2) %0, i32 1)
- ret <vscale x 1 x bfloat> %1
+ ret target("riscv.vector.tuple", <vscale x 2 x i8>, 2) %0
}
-
-define <vscale x 1 x bfloat> @test_vlseg2_mask_nxv1bf16_triscv.vector.tuple_nxv2i8_2t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) {
+define target("riscv.vector.tuple", <vscale x 2 x i8>, 2) @test_vlseg2_mask_nxv1bf16_triscv.vector.tuple_nxv2i8_2t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) {
; CHECK-LABEL: test_vlseg2_mask_nxv1bf16_triscv.vector.tuple_nxv2i8_2t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma
-; CHECK-NEXT: vlseg2e16.v v7, (a0), v0.t
+; CHECK-NEXT: vlseg2e16.v v8, (a0), v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 2 x i8>, 2) @llvm.riscv.vlseg2.mask.triscv.vector.tuple_nxv2i8_2t.nxv1i1(target("riscv.vector.tuple", <vscale x 2 x i8>, 2) undef, ptr %base, <vscale x 1 x i1> %mask, i64 %vl, i64 1, i64 4)
- %1 = call <vscale x 1 x bfloat> @llvm.riscv.tuple.extract.nxv1bf16.triscv.vector.tuple_nxv2i8_2t(target("riscv.vector.tuple", <vscale x 2 x i8>, 2) %0, i32 1)
- ret <vscale x 1 x bfloat> %1
+ ret target("riscv.vector.tuple", <vscale x 2 x i8>, 2) %0
}
-
-
-define <vscale x 2 x bfloat> @test_vlseg2_nxv2bf16_triscv.vector.tuple_nxv4i8_2t(ptr %base, i64 %vl) {
+define target("riscv.vector.tuple", <vscale x 4 x i8>, 2) @test_vlseg2_nxv2bf16_triscv.vector.tuple_nxv4i8_2t(ptr %base, i64 %vl) {
; CHECK-LABEL: test_vlseg2_nxv2bf16_triscv.vector.tuple_nxv4i8_2t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma
-; CHECK-NEXT: vlseg2e16.v v7, (a0)
+; CHECK-NEXT: vlseg2e16.v v8, (a0)
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 2) @llvm.riscv.vlseg2.triscv.vector.tuple_nxv4i8_2t(target("riscv.vector.tuple", <vscale x 4 x i8>, 2) undef, ptr %base, i64 %vl, i64 4)
- %1 = call <vscale x 2 x bfloat> @llvm.riscv.tuple.extract.nxv2bf16.triscv.vector.tuple_nxv4i8_2t(target("riscv.vector.tuple", <vscale x 4 x i8>, 2) %0, i32 1)
- ret <vscale x 2 x bfloat> %1
+ ret target("riscv.vector.tuple", <vscale x 4 x i8>, 2) %0
}
-
-define <vscale x 2 x bfloat> @test_vlseg2_mask_nxv2bf16_triscv.vector.tuple_nxv4i8_2t(ptr %base, i64 %vl, <vscale x 2 x i1> %mask) {
+define target("riscv.vector.tuple", <vscale x 4 x i8>, 2) @test_vlseg2_mask_nxv2bf16_triscv.vector.tuple_nxv4i8_2t(ptr %base, i64 %vl, <vscale x 2 x i1> %mask) {
; CHECK-LABEL: test_vlseg2_mask_nxv2bf16_triscv.vector.tuple_nxv4i8_2t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma
-; CHECK-NEXT: vlseg2e16.v v7, (a0), v0.t
+; CHECK-NEXT: vlseg2e16.v v8, (a0), v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 2) @llvm.riscv.vlseg2.mask.triscv.vector.tuple_nxv4i8_2t.nxv2i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 2) undef, ptr %base, <vscale x 2 x i1> %mask, i64 %vl, i64 1, i64 4)
- %1 = call <vscale x 2 x bfloat> @llvm.riscv.tuple.extract.nxv2bf16.triscv.vector.tuple_nxv4i8_2t(target("riscv.vector.tuple", <vscale x 4 x i8>, 2) %0, i32 1)
- ret <vscale x 2 x bfloat> %1
+ ret target("riscv.vector.tuple", <vscale x 4 x i8>, 2) %0
}
-
-
-define <vscale x 4 x bfloat> @test_vlseg2_nxv4bf16_triscv.vector.tuple_nxv8i8_2t(ptr %base, i64 %vl) {
+define target("riscv.vector.tuple", <vscale x 8 x i8>, 2) @test_vlseg2_nxv4bf16_triscv.vector.tuple_nxv8i8_2t(ptr %base, i64 %vl) {
; CHECK-LABEL: test_vlseg2_nxv4bf16_triscv.vector.tuple_nxv8i8_2t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma
-; CHECK-NEXT: vlseg2e16.v v7, (a0)
+; CHECK-NEXT: vlseg2e16.v v8, (a0)
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 2) @llvm.riscv.vlseg2.triscv.vector.tuple_nxv8i8_2t(target("riscv.vector.tuple", <vscale x 8 x i8>, 2) undef, ptr %base, i64 %vl, i64 4)
- %1 = call <vscale x 4 x bfloat> @llvm.riscv.tuple.extract.nxv4bf16.triscv.vector.tuple_nxv8i8_2t(target("riscv.vector.tuple", <vscale x 8 x i8>, 2) %0, i32 1)
- ret <vscale x 4 x bfloat> %1
+ ret target("riscv.vector.tuple", <vscale x 8 x i8>, 2) %0
}
-
-define <vscale x 4 x bfloat> @test_vlseg2_mask_nxv4bf16_triscv.vector.tuple_nxv8i8_2t(ptr %base, i64 %vl, <vscale x 4 x i1> %mask) {
+define target("riscv.vector.tuple", <vscale x 8 x i8>, 2) @test_vlseg2_mask_nxv4bf16_triscv.vector.tuple_nxv8i8_2t(ptr %base, i64 %vl, <vscale x 4 x i1> %mask) {
; CHECK-LABEL: test_vlseg2_mask_nxv4bf16_triscv.vector.tuple_nxv8i8_2t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma
-; CHECK-NEXT: vlseg2e16.v v7, (a0), v0.t
+; CHECK-NEXT: vlseg2e16.v v8, (a0), v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 2) @llvm.riscv.vlseg2.mask.triscv.vector.tuple_nxv8i8_2t.nxv4i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 2) undef, ptr %base, <vscale x 4 x i1> %mask, i64 %vl, i64 1, i64 4)
- %1 = call <vscale x 4 x bfloat> @llvm.riscv.tuple.extract.nxv4bf16.triscv.vector.tuple_nxv8i8_2t(target("riscv.vector.tuple", <vscale x 8 x i8>, 2) %0, i32 1)
- ret <vscale x 4 x bfloat> %1
+ ret target("riscv.vector.tuple", <vscale x 8 x i8>, 2) %0
}
-
-
-define <vscale x 8 x bfloat> @test_vlseg2_nxv8bf16_triscv.vector.tuple_nxv16i8_2t(ptr %base, i64 %vl) {
+define target("riscv.vector.tuple", <vscale x 16 x i8>, 2) @test_vlseg2_nxv8bf16_triscv.vector.tuple_nxv16i8_2t(ptr %base, i64 %vl) {
; CHECK-LABEL: test_vlseg2_nxv8bf16_triscv.vector.tuple_nxv16i8_2t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, ma
-; CHECK-NEXT: vlseg2e16.v v6, (a0)
+; CHECK-NEXT: vlseg2e16.v v8, (a0)
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 2) @llvm.riscv.vlseg2.triscv.vector.tuple_nxv16i8_2t(target("riscv.vector.tuple", <vscale x 16 x i8>, 2) undef, ptr %base, i64 %vl, i64 4)
- %1 = call <vscale x 8 x bfloat> @llvm.riscv.tuple.extract.nxv8bf16.triscv.vector.tuple_nxv16i8_2t(target("riscv.vector.tuple", <vscale x 16 x i8>, 2) %0, i32 1)
- ret <vscale x 8 x bfloat> %1
+ ret target("riscv.vector.tuple", <vscale x 16 x i8>, 2) %0
}
-
-define <vscale x 8 x bfloat> @test_vlseg2_mask_nxv8bf16_triscv.vector.tuple_nxv16i8_2t(ptr %base, i64 %vl, <vscale x 8 x i1> %mask) {
+define target("riscv.vector.tuple", <vscale x 16 x i8>, 2) @test_vlseg2_mask_nxv8bf16_triscv.vector.tuple_nxv16i8_2t(ptr %base, i64 %vl, <vscale x 8 x i1> %mask) {
; CHECK-LABEL: test_vlseg2_mask_nxv8bf16_triscv.vector.tuple_nxv16i8_2t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, ma
-; CHECK-NEXT: vlseg2e16.v v6, (a0), v0.t
+; CHECK-NEXT: vlseg2e16.v v8, (a0), v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 2) @llvm.riscv.vlseg2.mask.triscv.vector.tuple_nxv16i8_2t.nxv8i1(target("riscv.vector.tuple", <vscale x 16 x i8>, 2) undef, ptr %base, <vscale x 8 x i1> %mask, i64 %vl, i64 1, i64 4)
- %1 = call <vscale x 8 x bfloat> @llvm.riscv.tuple.extract.nxv8bf16.triscv.vector.tuple_nxv16i8_2t(target("riscv.vector.tuple", <vscale x 16 x i8>, 2) %0, i32 1)
- ret <vscale x 8 x bfloat> %1
+ ret target("riscv.vector.tuple", <vscale x 16 x i8>, 2) %0
}
-
-
-define <vscale x 16 x bfloat> @test_vlseg2_nxv16bf16_triscv.vector.tuple_nxv32i8_2t(ptr %base, i64 %vl) {
+define target("riscv.vector.tuple", <vscale x 32 x i8>, 2) @test_vlseg2_nxv16bf16_triscv.vector.tuple_nxv32i8_2t(ptr %base, i64 %vl) {
; CHECK-LABEL: test_vlseg2_nxv16bf16_triscv.vector.tuple_nxv32i8_2t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma
-; CHECK-NEXT: vlseg2e16.v v4, (a0)
+; CHECK-NEXT: vlseg2e16.v v8, (a0)
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 32 x i8>, 2) @llvm.riscv.vlseg2.triscv.vector.tuple_nxv32i8_2t(target("riscv.vector.tuple", <vscale x 32 x i8>, 2) undef, ptr %base, i64 %vl, i64 4)
- %1 = call <vscale x 16 x bfloat> @llvm.riscv.tuple.extract.nxv16bf16.triscv.vector.tuple_nxv32i8_2t(target("riscv.vector.tuple", <vscale x 32 x i8>, 2) %0, i32 1)
- ret <vscale x 16 x bfloat> %1
+ ret target("riscv.vector.tuple", <vscale x 32 x i8>, 2) %0
}
-
-define <vscale x 16 x bfloat> @test_vlseg2_mask_nxv16bf16_triscv.vector.tuple_nxv32i8_2t(ptr %base, i64 %vl, <vscale x 16 x i1> %mask) {
+define target("riscv.vector.tuple", <vscale x 32 x i8>, 2) @test_vlseg2_mask_nxv16bf16_triscv.vector.tuple_nxv32i8_2t(ptr %base, i64 %vl, <vscale x 16 x i1> %mask) {
; CHECK-LABEL: test_vlseg2_mask_nxv16bf16_triscv.vector.tuple_nxv32i8_2t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma
-; CHECK-NEXT: vlseg2e16.v v4, (a0), v0.t
+; CHECK-NEXT: vlseg2e16.v v8, (a0), v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 32 x i8>, 2) @llvm.riscv.vlseg2.mask.triscv.vector.tuple_nxv32i8_2t.nxv16i1(target("riscv.vector.tuple", <vscale x 32 x i8>, 2) undef, ptr %base, <vscale x 16 x i1> %mask, i64 %vl, i64 1, i64 4)
- %1 = call <vscale x 16 x bfloat> @llvm.riscv.tuple.extract.nxv16bf16.triscv.vector.tuple_nxv32i8_2t(target("riscv.vector.tuple", <vscale x 32 x i8>, 2) %0, i32 1)
- ret <vscale x 16 x bfloat> %1
+ ret target("riscv.vector.tuple", <vscale x 32 x i8>, 2) %0
}
-
-
-define <vscale x 1 x bfloat> @test_vlseg3_nxv1bf16_triscv.vector.tuple_nxv2i8_3t(ptr %base, i64 %vl) {
+define target("riscv.vector.tuple", <vscale x 2 x i8>, 3) @test_vlseg3_nxv1bf16_triscv.vector.tuple_nxv2i8_3t(ptr %base, i64 %vl) {
; CHECK-LABEL: test_vlseg3_nxv1bf16_triscv.vector.tuple_nxv2i8_3t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma
-; CHECK-NEXT: vlseg3e16.v v7, (a0)
+; CHECK-NEXT: vlseg3e16.v v8, (a0)
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 2 x i8>, 3) @llvm.riscv.vlseg3.triscv.vector.tuple_nxv2i8_3t(target("riscv.vector.tuple", <vscale x 2 x i8>, 3) undef, ptr %base, i64 %vl, i64 4)
- %1 = call <vscale x 1 x bfloat> @llvm.riscv.tuple.extract.nxv1bf16.triscv.vector.tuple_nxv2i8_3t(target("riscv.vector.tuple", <vscale x 2 x i8>, 3) %0, i32 1)
- ret <vscale x 1 x bfloat> %1
+ ret target("riscv.vector.tuple", <vscale x 2 x i8>, 3) %0
}
-
-define <vscale x 1 x bfloat> @test_vlseg3_mask_nxv1bf16_triscv.vector.tuple_nxv2i8_3t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) {
+define target("riscv.vector.tuple", <vscale x 2 x i8>, 3) @test_vlseg3_mask_nxv1bf16_triscv.vector.tuple_nxv2i8_3t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) {
; CHECK-LABEL: test_vlseg3_mask_nxv1bf16_triscv.vector.tuple_nxv2i8_3t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma
-; CHECK-NEXT: vlseg3e16.v v7, (a0), v0.t
+; CHECK-NEXT: vlseg3e16.v v8, (a0), v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 2 x i8>, 3) @llvm.riscv.vlseg3.mask.triscv.vector.tuple_nxv2i8_3t.nxv1i1(target("riscv.vector.tuple", <vscale x 2 x i8>, 3) undef, ptr %base, <vscale x 1 x i1> %mask, i64 %vl, i64 1, i64 4)
- %1 = call <vscale x 1 x bfloat> @llvm.riscv.tuple.extract.nxv1bf16.triscv.vector.tuple_nxv2i8_3t(target("riscv.vector.tuple", <vscale x 2 x i8>, 3) %0, i32 1)
- ret <vscale x 1 x bfloat> %1
+ ret target("riscv.vector.tuple", <vscale x 2 x i8>, 3) %0
}
-
-
-define <vscale x 2 x bfloat> @test_vlseg3_nxv2bf16_triscv.vector.tuple_nxv4i8_3t(ptr %base, i64 %vl) {
+define target("riscv.vector.tuple", <vscale x 4 x i8>, 3) @test_vlseg3_nxv2bf16_triscv.vector.tuple_nxv4i8_3t(ptr %base, i64 %vl) {
; CHECK-LABEL: test_vlseg3_nxv2bf16_triscv.vector.tuple_nxv4i8_3t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma
-; CHECK-NEXT: vlseg3e16.v v7, (a0)
+; CHECK-NEXT: vlseg3e16.v v8, (a0)
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 3) @llvm.riscv.vlseg3.triscv.vector.tuple_nxv4i8_3t(target("riscv.vector.tuple", <vscale x 4 x i8>, 3) undef, ptr %base, i64 %vl, i64 4)
- %1 = call <vscale x 2 x bfloat> @llvm.riscv.tuple.extract.nxv2bf16.triscv.vector.tuple_nxv4i8_3t(target("riscv.vector.tuple", <vscale x 4 x i8>, 3) %0, i32 1)
- ret <vscale x 2 x bfloat> %1
+ ret target("riscv.vector.tuple", <vscale x 4 x i8>, 3) %0
}
-
-define <vscale x 2 x bfloat> @test_vlseg3_mask_nxv2bf16_triscv.vector.tuple_nxv4i8_3t(ptr %base, i64 %vl, <vscale x 2 x i1> %mask) {
+define target("riscv.vector.tuple", <vscale x 4 x i8>, 3) @test_vlseg3_mask_nxv2bf16_triscv.vector.tuple_nxv4i8_3t(ptr %base, i64 %vl, <vscale x 2 x i1> %mask) {
; CHECK-LABEL: test_vlseg3_mask_nxv2bf16_triscv.vector.tuple_nxv4i8_3t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma
-; CHECK-NEXT: vlseg3e16.v v7, (a0), v0.t
+; CHECK-NEXT: vlseg3e16.v v8, (a0), v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 3) @llvm.riscv.vlseg3.mask.triscv.vector.tuple_nxv4i8_3t.nxv2i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 3) undef, ptr %base, <vscale x 2 x i1> %mask, i64 %vl, i64 1, i64 4)
- %1 = call <vscale x 2 x bfloat> @llvm.riscv.tuple.extract.nxv2bf16.triscv.vector.tuple_nxv4i8_3t(target("riscv.vector.tuple", <vscale x 4 x i8>, 3) %0, i32 1)
- ret <vscale x 2 x bfloat> %1
+ ret target("riscv.vector.tuple", <vscale x 4 x i8>, 3) %0
}
-
-
-define <vscale x 4 x bfloat> @test_vlseg3_nxv4bf16_triscv.vector.tuple_nxv8i8_3t(ptr %base, i64 %vl) {
+define target("riscv.vector.tuple", <vscale x 8 x i8>, 3) @test_vlseg3_nxv4bf16_triscv.vector.tuple_nxv8i8_3t(ptr %base, i64 %vl) {
; CHECK-LABEL: test_vlseg3_nxv4bf16_triscv.vector.tuple_nxv8i8_3t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma
-; CHECK-NEXT: vlseg3e16.v v7, (a0)
+; CHECK-NEXT: vlseg3e16.v v8, (a0)
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 3) @llvm.riscv.vlseg3.triscv.vector.tuple_nxv8i8_3t(target("riscv.vector.tuple", <vscale x 8 x i8>, 3) undef, ptr %base, i64 %vl, i64 4)
- %1 = call <vscale x 4 x bfloat> @llvm.riscv.tuple.extract.nxv4bf16.triscv.vector.tuple_nxv8i8_3t(target("riscv.vector.tuple", <vscale x 8 x i8>, 3) %0, i32 1)
- ret <vscale x 4 x bfloat> %1
+ ret target("riscv.vector.tuple", <vscale x 8 x i8>, 3) %0
}
-
-define <vscale x 4 x bfloat> @test_vlseg3_mask_nxv4bf16_triscv.vector.tuple_nxv8i8_3t(ptr %base, i64 %vl, <vscale x 4 x i1> %mask) {
+define target("riscv.vector.tuple", <vscale x 8 x i8>, 3) @test_vlseg3_mask_nxv4bf16_triscv.vector.tuple_nxv8i8_3t(ptr %base, i64 %vl, <vscale x 4 x i1> %mask) {
; CHECK-LABEL: test_vlseg3_mask_nxv4bf16_triscv.vector.tuple_nxv8i8_3t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma
-; CHECK-NEXT: vlseg3e16.v v7, (a0), v0.t
+; CHECK-NEXT: vlseg3e16.v v8, (a0), v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 3) @llvm.riscv.vlseg3.mask.triscv.vector.tuple_nxv8i8_3t.nxv4i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 3) undef, ptr %base, <vscale x 4 x i1> %mask, i64 %vl, i64 1, i64 4)
- %1 = call <vscale x 4 x bfloat> @llvm.riscv.tuple.extract.nxv4bf16.triscv.vector.tuple_nxv8i8_3t(target("riscv.vector.tuple", <vscale x 8 x i8>, 3) %0, i32 1)
- ret <vscale x 4 x bfloat> %1
+ ret target("riscv.vector.tuple", <vscale x 8 x i8>, 3) %0
}
-
-
-define <vscale x 8 x bfloat> @test_vlseg3_nxv8bf16_triscv.vector.tuple_nxv16i8_3t(ptr %base, i64 %vl) {
+define target("riscv.vector.tuple", <vscale x 16 x i8>, 3) @test_vlseg3_nxv8bf16_triscv.vector.tuple_nxv16i8_3t(ptr %base, i64 %vl) {
; CHECK-LABEL: test_vlseg3_nxv8bf16_triscv.vector.tuple_nxv16i8_3t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, ma
-; CHECK-NEXT: vlseg3e16.v v6, (a0)
+; CHECK-NEXT: vlseg3e16.v v8, (a0)
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 3) @llvm.riscv.vlseg3.triscv.vector.tuple_nxv16i8_3t(target("riscv.vector.tuple", <vscale x 16 x i8>, 3) undef, ptr %base, i64 %vl, i64 4)
- %1 = call <vscale x 8 x bfloat> @llvm.riscv.tuple.extract.nxv8bf16.triscv.vector.tuple_nxv16i8_3t(target("riscv.vector.tuple", <vscale x 16 x i8>, 3) %0, i32 1)
- ret <vscale x 8 x bfloat> %1
+ ret target("riscv.vector.tuple", <vscale x 16 x i8>, 3) %0
}
-
-define <vscale x 8 x bfloat> @test_vlseg3_mask_nxv8bf16_triscv.vector.tuple_nxv16i8_3t(ptr %base, i64 %vl, <vscale x 8 x i1> %mask) {
+define target("riscv.vector.tuple", <vscale x 16 x i8>, 3) @test_vlseg3_mask_nxv8bf16_triscv.vector.tuple_nxv16i8_3t(ptr %base, i64 %vl, <vscale x 8 x i1> %mask) {
; CHECK-LABEL: test_vlseg3_mask_nxv8bf16_triscv.vector.tuple_nxv16i8_3t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, ma
-; CHECK-NEXT: vlseg3e16.v v6, (a0), v0.t
+; CHECK-NEXT: vlseg3e16.v v8, (a0), v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 3) @llvm.riscv.vlseg3.mask.triscv.vector.tuple_nxv16i8_3t.nxv8i1(target("riscv.vector.tuple", <vscale x 16 x i8>, 3) undef, ptr %base, <vscale x 8 x i1> %mask, i64 %vl, i64 1, i64 4)
- %1 = call <vscale x 8 x bfloat> @llvm.riscv.tuple.extract.nxv8bf16.triscv.vector.tuple_nxv16i8_3t(target("riscv.vector.tuple", <vscale x 16 x i8>, 3) %0, i32 1)
- ret <vscale x 8 x bfloat> %1
+ ret target("riscv.vector.tuple", <vscale x 16 x i8>, 3) %0
}
-
-
-define <vscale x 1 x bfloat> @test_vlseg4_nxv1bf16_triscv.vector.tuple_nxv2i8_4t(ptr %base, i64 %vl) {
+define target("riscv.vector.tuple", <vscale x 2 x i8>, 4) @test_vlseg4_nxv1bf16_triscv.vector.tuple_nxv2i8_4t(ptr %base, i64 %vl) {
; CHECK-LABEL: test_vlseg4_nxv1bf16_triscv.vector.tuple_nxv2i8_4t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma
-; CHECK-NEXT: vlseg4e16.v v7, (a0)
+; CHECK-NEXT: vlseg4e16.v v8, (a0)
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 2 x i8>, 4) @llvm.riscv.vlseg4.triscv.vector.tuple_nxv2i8_4t(target("riscv.vector.tuple", <vscale x 2 x i8>, 4) undef, ptr %base, i64 %vl, i64 4)
- %1 = call <vscale x 1 x bfloat> @llvm.riscv.tuple.extract.nxv1bf16.triscv.vector.tuple_nxv2i8_4t(target("riscv.vector.tuple", <vscale x 2 x i8>, 4) %0, i32 1)
- ret <vscale x 1 x bfloat> %1
+ ret target("riscv.vector.tuple", <vscale x 2 x i8>, 4) %0
}
-
-define <vscale x 1 x bfloat> @test_vlseg4_mask_nxv1bf16_triscv.vector.tuple_nxv2i8_4t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) {
+define target("riscv.vector.tuple", <vscale x 2 x i8>, 4) @test_vlseg4_mask_nxv1bf16_triscv.vector.tuple_nxv2i8_4t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) {
; CHECK-LABEL: test_vlseg4_mask_nxv1bf16_triscv.vector.tuple_nxv2i8_4t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma
-; CHECK-NEXT: vlseg4e16.v v7, (a0), v0.t
+; CHECK-NEXT: vlseg4e16.v v8, (a0), v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 2 x i8>, 4) @llvm.riscv.vlseg4.mask.triscv.vector.tuple_nxv2i8_4t.nxv1i1(target("riscv.vector.tuple", <vscale x 2 x i8>, 4) undef, ptr %base, <vscale x 1 x i1> %mask, i64 %vl, i64 1, i64 4)
- %1 = call <vscale x 1 x bfloat> @llvm.riscv.tuple.extract.nxv1bf16.triscv.vector.tuple_nxv2i8_4t(target("riscv.vector.tuple", <vscale x 2 x i8>, 4) %0, i32 1)
- ret <vscale x 1 x bfloat> %1
+ ret target("riscv.vector.tuple", <vscale x 2 x i8>, 4) %0
}
-
-
-define <vscale x 2 x bfloat> @test_vlseg4_nxv2bf16_triscv.vector.tuple_nxv4i8_4t(ptr %base, i64 %vl) {
+define target("riscv.vector.tuple", <vscale x 4 x i8>, 4) @test_vlseg4_nxv2bf16_triscv.vector.tuple_nxv4i8_4t(ptr %base, i64 %vl) {
; CHECK-LABEL: test_vlseg4_nxv2bf16_triscv.vector.tuple_nxv4i8_4t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma
-; CHECK-NEXT: vlseg4e16.v v7, (a0)
+; CHECK-NEXT: vlseg4e16.v v8, (a0)
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 4) @llvm.riscv.vlseg4.triscv.vector.tuple_nxv4i8_4t(target("riscv.vector.tuple", <vscale x 4 x i8>, 4) undef, ptr %base, i64 %vl, i64 4)
- %1 = call <vscale x 2 x bfloat> @llvm.riscv.tuple.extract.nxv2bf16.triscv.vector.tuple_nxv4i8_4t(target("riscv.vector.tuple", <vscale x 4 x i8>, 4) %0, i32 1)
- ret <vscale x 2 x bfloat> %1
+ ret target("riscv.vector.tuple", <vscale x 4 x i8>, 4) %0
}
-
-define <vscale x 2 x bfloat> @test_vlseg4_mask_nxv2bf16_triscv.vector.tuple_nxv4i8_4t(ptr %base, i64 %vl, <vscale x 2 x i1> %mask) {
+define target("riscv.vector.tuple", <vscale x 4 x i8>, 4) @test_vlseg4_mask_nxv2bf16_triscv.vector.tuple_nxv4i8_4t(ptr %base, i64 %vl, <vscale x 2 x i1> %mask) {
; CHECK-LABEL: test_vlseg4_mask_nxv2bf16_triscv.vector.tuple_nxv4i8_4t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma
-; CHECK-NEXT: vlseg4e16.v v7, (a0), v0.t
+; CHECK-NEXT: vlseg4e16.v v8, (a0), v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 4) @llvm.riscv.vlseg4.mask.triscv.vector.tuple_nxv4i8_4t.nxv2i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 4) undef, ptr %base, <vscale x 2 x i1> %mask, i64 %vl, i64 1, i64 4)
- %1 = call <vscale x 2 x bfloat> @llvm.riscv.tuple.extract.nxv2bf16.triscv.vector.tuple_nxv4i8_4t(target("riscv.vector.tuple", <vscale x 4 x i8>, 4) %0, i32 1)
- ret <vscale x 2 x bfloat> %1
+ ret target("riscv.vector.tuple", <vscale x 4 x i8>, 4) %0
}
-
-
-define <vscale x 4 x bfloat> @test_vlseg4_nxv4bf16_triscv.vector.tuple_nxv8i8_4t(ptr %base, i64 %vl) {
+define target("riscv.vector.tuple", <vscale x 8 x i8>, 4) @test_vlseg4_nxv4bf16_triscv.vector.tuple_nxv8i8_4t(ptr %base, i64 %vl) {
; CHECK-LABEL: test_vlseg4_nxv4bf16_triscv.vector.tuple_nxv8i8_4t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma
-; CHECK-NEXT: vlseg4e16.v v7, (a0)
+; CHECK-NEXT: vlseg4e16.v v8, (a0)
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 4) @llvm.riscv.vlseg4.triscv.vector.tuple_nxv8i8_4t(target("riscv.vector.tuple", <vscale x 8 x i8>, 4) undef, ptr %base, i64 %vl, i64 4)
- %1 = call <vscale x 4 x bfloat> @llvm.riscv.tuple.extract.nxv4bf16.triscv.vector.tuple_nxv8i8_4t(target("riscv.vector.tuple", <vscale x 8 x i8>, 4) %0, i32 1)
- ret <vscale x 4 x bfloat> %1
+ ret target("riscv.vector.tuple", <vscale x 8 x i8>, 4) %0
}
-
-define <vscale x 4 x bfloat> @test_vlseg4_mask_nxv4bf16_triscv.vector.tuple_nxv8i8_4t(ptr %base, i64 %vl, <vscale x 4 x i1> %mask) {
+define target("riscv.vector.tuple", <vscale x 8 x i8>, 4) @test_vlseg4_mask_nxv4bf16_triscv.vector.tuple_nxv8i8_4t(ptr %base, i64 %vl, <vscale x 4 x i1> %mask) {
; CHECK-LABEL: test_vlseg4_mask_nxv4bf16_triscv.vector.tuple_nxv8i8_4t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma
-; CHECK-NEXT: vlseg4e16.v v7, (a0), v0.t
+; CHECK-NEXT: vlseg4e16.v v8, (a0), v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 4) @llvm.riscv.vlseg4.mask.triscv.vector.tuple_nxv8i8_4t.nxv4i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 4) undef, ptr %base, <vscale x 4 x i1> %mask, i64 %vl, i64 1, i64 4)
- %1 = call <vscale x 4 x bfloat> @llvm.riscv.tuple.extract.nxv4bf16.triscv.vector.tuple_nxv8i8_4t(target("riscv.vector.tuple", <vscale x 8 x i8>, 4) %0, i32 1)
- ret <vscale x 4 x bfloat> %1
+ ret target("riscv.vector.tuple", <vscale x 8 x i8>, 4) %0
}
-
-
-define <vscale x 8 x bfloat> @test_vlseg4_nxv8bf16_triscv.vector.tuple_nxv16i8_4t(ptr %base, i64 %vl) {
+define target("riscv.vector.tuple", <vscale x 16 x i8>, 4) @test_vlseg4_nxv8bf16_triscv.vector.tuple_nxv16i8_4t(ptr %base, i64 %vl) {
; CHECK-LABEL: test_vlseg4_nxv8bf16_triscv.vector.tuple_nxv16i8_4t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, ma
-; CHECK-NEXT: vlseg4e16.v v6, (a0)
+; CHECK-NEXT: vlseg4e16.v v8, (a0)
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 4) @llvm.riscv.vlseg4.triscv.vector.tuple_nxv16i8_4t(target("riscv.vector.tuple", <vscale x 16 x i8>, 4) undef, ptr %base, i64 %vl, i64 4)
- %1 = call <vscale x 8 x bfloat> @llvm.riscv.tuple.extract.nxv8bf16.triscv.vector.tuple_nxv16i8_4t(target("riscv.vector.tuple", <vscale x 16 x i8>, 4) %0, i32 1)
- ret <vscale x 8 x bfloat> %1
+ ret target("riscv.vector.tuple", <vscale x 16 x i8>, 4) %0
}
-
-define <vscale x 8 x bfloat> @test_vlseg4_mask_nxv8bf16_triscv.vector.tuple_nxv16i8_4t(ptr %base, i64 %vl, <vscale x 8 x i1> %mask) {
+define target("riscv.vector.tuple", <vscale x 16 x i8>, 4) @test_vlseg4_mask_nxv8bf16_triscv.vector.tuple_nxv16i8_4t(ptr %base, i64 %vl, <vscale x 8 x i1> %mask) {
; CHECK-LABEL: test_vlseg4_mask_nxv8bf16_triscv.vector.tuple_nxv16i8_4t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, ma
-; CHECK-NEXT: vlseg4e16.v v6, (a0), v0.t
+; CHECK-NEXT: vlseg4e16.v v8, (a0), v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 4) @llvm.riscv.vlseg4.mask.triscv.vector.tuple_nxv16i8_4t.nxv8i1(target("riscv.vector.tuple", <vscale x 16 x i8>, 4) undef, ptr %base, <vscale x 8 x i1> %mask, i64 %vl, i64 1, i64 4)
- %1 = call <vscale x 8 x bfloat> @llvm.riscv.tuple.extract.nxv8bf16.triscv.vector.tuple_nxv16i8_4t(target("riscv.vector.tuple", <vscale x 16 x i8>, 4) %0, i32 1)
- ret <vscale x 8 x bfloat> %1
+ ret target("riscv.vector.tuple", <vscale x 16 x i8>, 4) %0
}
-
-
-define <vscale x 1 x bfloat> @test_vlseg5_nxv1bf16_triscv.vector.tuple_nxv2i8_5t(ptr %base, i64 %vl) {
+define target("riscv.vector.tuple", <vscale x 2 x i8>, 5) @test_vlseg5_nxv1bf16_triscv.vector.tuple_nxv2i8_5t(ptr %base, i64 %vl) {
; CHECK-LABEL: test_vlseg5_nxv1bf16_triscv.vector.tuple_nxv2i8_5t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma
-; CHECK-NEXT: vlseg5e16.v v7, (a0)
+; CHECK-NEXT: vlseg5e16.v v8, (a0)
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 2 x i8>, 5) @llvm.riscv.vlseg5.triscv.vector.tuple_nxv2i8_5t(target("riscv.vector.tuple", <vscale x 2 x i8>, 5) undef, ptr %base, i64 %vl, i64 4)
- %1 = call <vscale x 1 x bfloat> @llvm.riscv.tuple.extract.nxv1bf16.triscv.vector.tuple_nxv2i8_5t(target("riscv.vector.tuple", <vscale x 2 x i8>, 5) %0, i32 1)
- ret <vscale x 1 x bfloat> %1
+ ret target("riscv.vector.tuple", <vscale x 2 x i8>, 5) %0
}
-
-define <vscale x 1 x bfloat> @test_vlseg5_mask_nxv1bf16_triscv.vector.tuple_nxv2i8_5t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) {
+define target("riscv.vector.tuple", <vscale x 2 x i8>, 5) @test_vlseg5_mask_nxv1bf16_triscv.vector.tuple_nxv2i8_5t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) {
; CHECK-LABEL: test_vlseg5_mask_nxv1bf16_triscv.vector.tuple_nxv2i8_5t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma
-; CHECK-NEXT: vlseg5e16.v v7, (a0), v0.t
+; CHECK-NEXT: vlseg5e16.v v8, (a0), v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 2 x i8>, 5) @llvm.riscv.vlseg5.mask.triscv.vector.tuple_nxv2i8_5t.nxv1i1(target("riscv.vector.tuple", <vscale x 2 x i8>, 5) undef, ptr %base, <vscale x 1 x i1> %mask, i64 %vl, i64 1, i64 4)
- %1 = call <vscale x 1 x bfloat> @llvm.riscv.tuple.extract.nxv1bf16.triscv.vector.tuple_nxv2i8_5t(target("riscv.vector.tuple", <vscale x 2 x i8>, 5) %0, i32 1)
- ret <vscale x 1 x bfloat> %1
+ ret target("riscv.vector.tuple", <vscale x 2 x i8>, 5) %0
}
-
-
-define <vscale x 2 x bfloat> @test_vlseg5_nxv2bf16_triscv.vector.tuple_nxv4i8_5t(ptr %base, i64 %vl) {
+define target("riscv.vector.tuple", <vscale x 4 x i8>, 5) @test_vlseg5_nxv2bf16_triscv.vector.tuple_nxv4i8_5t(ptr %base, i64 %vl) {
; CHECK-LABEL: test_vlseg5_nxv2bf16_triscv.vector.tuple_nxv4i8_5t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma
-; CHECK-NEXT: vlseg5e16.v v7, (a0)
+; CHECK-NEXT: vlseg5e16.v v8, (a0)
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 5) @llvm.riscv.vlseg5.triscv.vector.tuple_nxv4i8_5t(target("riscv.vector.tuple", <vscale x 4 x i8>, 5) undef, ptr %base, i64 %vl, i64 4)
- %1 = call <vscale x 2 x bfloat> @llvm.riscv.tuple.extract.nxv2bf16.triscv.vector.tuple_nxv4i8_5t(target("riscv.vector.tuple", <vscale x 4 x i8>, 5) %0, i32 1)
- ret <vscale x 2 x bfloat> %1
+ ret target("riscv.vector.tuple", <vscale x 4 x i8>, 5) %0
}
-
-define <vscale x 2 x bfloat> @test_vlseg5_mask_nxv2bf16_triscv.vector.tuple_nxv4i8_5t(ptr %base, i64 %vl, <vscale x 2 x i1> %mask) {
+define target("riscv.vector.tuple", <vscale x 4 x i8>, 5) @test_vlseg5_mask_nxv2bf16_triscv.vector.tuple_nxv4i8_5t(ptr %base, i64 %vl, <vscale x 2 x i1> %mask) {
; CHECK-LABEL: test_vlseg5_mask_nxv2bf16_triscv.vector.tuple_nxv4i8_5t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma
-; CHECK-NEXT: vlseg5e16.v v7, (a0), v0.t
+; CHECK-NEXT: vlseg5e16.v v8, (a0), v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 5) @llvm.riscv.vlseg5.mask.triscv.vector.tuple_nxv4i8_5t.nxv2i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 5) undef, ptr %base, <vscale x 2 x i1> %mask, i64 %vl, i64 1, i64 4)
- %1 = call <vscale x 2 x bfloat> @llvm.riscv.tuple.extract.nxv2bf16.triscv.vector.tuple_nxv4i8_5t(target("riscv.vector.tuple", <vscale x 4 x i8>, 5) %0, i32 1)
- ret <vscale x 2 x bfloat> %1
+ ret target("riscv.vector.tuple", <vscale x 4 x i8>, 5) %0
}
-
-
-define <vscale x 4 x bfloat> @test_vlseg5_nxv4bf16_triscv.vector.tuple_nxv8i8_5t(ptr %base, i64 %vl) {
+define target("riscv.vector.tuple", <vscale x 8 x i8>, 5) @test_vlseg5_nxv4bf16_triscv.vector.tuple_nxv8i8_5t(ptr %base, i64 %vl) {
; CHECK-LABEL: test_vlseg5_nxv4bf16_triscv.vector.tuple_nxv8i8_5t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma
-; CHECK-NEXT: vlseg5e16.v v7, (a0)
+; CHECK-NEXT: vlseg5e16.v v8, (a0)
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 5) @llvm.riscv.vlseg5.triscv.vector.tuple_nxv8i8_5t(target("riscv.vector.tuple", <vscale x 8 x i8>, 5) undef, ptr %base, i64 %vl, i64 4)
- %1 = call <vscale x 4 x bfloat> @llvm.riscv.tuple.extract.nxv4bf16.triscv.vector.tuple_nxv8i8_5t(target("riscv.vector.tuple", <vscale x 8 x i8>, 5) %0, i32 1)
- ret <vscale x 4 x bfloat> %1
+ ret target("riscv.vector.tuple", <vscale x 8 x i8>, 5) %0
}
-
-define <vscale x 4 x bfloat> @test_vlseg5_mask_nxv4bf16_triscv.vector.tuple_nxv8i8_5t(ptr %base, i64 %vl, <vscale x 4 x i1> %mask) {
+define target("riscv.vector.tuple", <vscale x 8 x i8>, 5) @test_vlseg5_mask_nxv4bf16_triscv.vector.tuple_nxv8i8_5t(ptr %base, i64 %vl, <vscale x 4 x i1> %mask) {
; CHECK-LABEL: test_vlseg5_mask_nxv4bf16_triscv.vector.tuple_nxv8i8_5t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma
-; CHECK-NEXT: vlseg5e16.v v7, (a0), v0.t
+; CHECK-NEXT: vlseg5e16.v v8, (a0), v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 5) @llvm.riscv.vlseg5.mask.triscv.vector.tuple_nxv8i8_5t.nxv4i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 5) undef, ptr %base, <vscale x 4 x i1> %mask, i64 %vl, i64 1, i64 4)
- %1 = call <vscale x 4 x bfloat> @llvm.riscv.tuple.extract.nxv4bf16.triscv.vector.tuple_nxv8i8_5t(target("riscv.vector.tuple", <vscale x 8 x i8>, 5) %0, i32 1)
- ret <vscale x 4 x bfloat> %1
+ ret target("riscv.vector.tuple", <vscale x 8 x i8>, 5) %0
}
-
-
-define <vscale x 1 x bfloat> @test_vlseg6_nxv1bf16_triscv.vector.tuple_nxv2i8_6t(ptr %base, i64 %vl) {
+define target("riscv.vector.tuple", <vscale x 2 x i8>, 6) @test_vlseg6_nxv1bf16_triscv.vector.tuple_nxv2i8_6t(ptr %base, i64 %vl) {
; CHECK-LABEL: test_vlseg6_nxv1bf16_triscv.vector.tuple_nxv2i8_6t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma
-; CHECK-NEXT: vlseg6e16.v v7, (a0)
+; CHECK-NEXT: vlseg6e16.v v8, (a0)
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 2 x i8>, 6) @llvm.riscv.vlseg6.triscv.vector.tuple_nxv2i8_6t(target("riscv.vector.tuple", <vscale x 2 x i8>, 6) undef, ptr %base, i64 %vl, i64 4)
- %1 = call <vscale x 1 x bfloat> @llvm.riscv.tuple.extract.nxv1bf16.triscv.vector.tuple_nxv2i8_6t(target("riscv.vector.tuple", <vscale x 2 x i8>, 6) %0, i32 1)
- ret <vscale x 1 x bfloat> %1
+ ret target("riscv.vector.tuple", <vscale x 2 x i8>, 6) %0
}
-
-define <vscale x 1 x bfloat> @test_vlseg6_mask_nxv1bf16_triscv.vector.tuple_nxv2i8_6t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) {
+define target("riscv.vector.tuple", <vscale x 2 x i8>, 6) @test_vlseg6_mask_nxv1bf16_triscv.vector.tuple_nxv2i8_6t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) {
; CHECK-LABEL: test_vlseg6_mask_nxv1bf16_triscv.vector.tuple_nxv2i8_6t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma
-; CHECK-NEXT: vlseg6e16.v v7, (a0), v0.t
+; CHECK-NEXT: vlseg6e16.v v8, (a0), v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 2 x i8>, 6) @llvm.riscv.vlseg6.mask.triscv.vector.tuple_nxv2i8_6t.nxv1i1(target("riscv.vector.tuple", <vscale x 2 x i8>, 6) undef, ptr %base, <vscale x 1 x i1> %mask, i64 %vl, i64 1, i64 4)
- %1 = call <vscale x 1 x bfloat> @llvm.riscv.tuple.extract.nxv1bf16.triscv.vector.tuple_nxv2i8_6t(target("riscv.vector.tuple", <vscale x 2 x i8>, 6) %0, i32 1)
- ret <vscale x 1 x bfloat> %1
+ ret target("riscv.vector.tuple", <vscale x 2 x i8>, 6) %0
}
-
-
-define <vscale x 2 x bfloat> @test_vlseg6_nxv2bf16_triscv.vector.tuple_nxv4i8_6t(ptr %base, i64 %vl) {
+define target("riscv.vector.tuple", <vscale x 4 x i8>, 6) @test_vlseg6_nxv2bf16_triscv.vector.tuple_nxv4i8_6t(ptr %base, i64 %vl) {
; CHECK-LABEL: test_vlseg6_nxv2bf16_triscv.vector.tuple_nxv4i8_6t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma
-; CHECK-NEXT: vlseg6e16.v v7, (a0)
+; CHECK-NEXT: vlseg6e16.v v8, (a0)
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 6) @llvm.riscv.vlseg6.triscv.vector.tuple_nxv4i8_6t(target("riscv.vector.tuple", <vscale x 4 x i8>, 6) undef, ptr %base, i64 %vl, i64 4)
- %1 = call <vscale x 2 x bfloat> @llvm.riscv.tuple.extract.nxv2bf16.triscv.vector.tuple_nxv4i8_6t(target("riscv.vector.tuple", <vscale x 4 x i8>, 6) %0, i32 1)
- ret <vscale x 2 x bfloat> %1
+ ret target("riscv.vector.tuple", <vscale x 4 x i8>, 6) %0
}
-
-define <vscale x 2 x bfloat> @test_vlseg6_mask_nxv2bf16_triscv.vector.tuple_nxv4i8_6t(ptr %base, i64 %vl, <vscale x 2 x i1> %mask) {
+define target("riscv.vector.tuple", <vscale x 4 x i8>, 6) @test_vlseg6_mask_nxv2bf16_triscv.vector.tuple_nxv4i8_6t(ptr %base, i64 %vl, <vscale x 2 x i1> %mask) {
; CHECK-LABEL: test_vlseg6_mask_nxv2bf16_triscv.vector.tuple_nxv4i8_6t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma
-; CHECK-NEXT: vlseg6e16.v v7, (a0), v0.t
+; CHECK-NEXT: vlseg6e16.v v8, (a0), v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 6) @llvm.riscv.vlseg6.mask.triscv.vector.tuple_nxv4i8_6t.nxv2i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 6) undef, ptr %base, <vscale x 2 x i1> %mask, i64 %vl, i64 1, i64 4)
- %1 = call <vscale x 2 x bfloat> @llvm.riscv.tuple.extract.nxv2bf16.triscv.vector.tuple_nxv4i8_6t(target("riscv.vector.tuple", <vscale x 4 x i8>, 6) %0, i32 1)
- ret <vscale x 2 x bfloat> %1
+ ret target("riscv.vector.tuple", <vscale x 4 x i8>, 6) %0
}
-
-
-define <vscale x 4 x bfloat> @test_vlseg6_nxv4bf16_triscv.vector.tuple_nxv8i8_6t(ptr %base, i64 %vl) {
+define target("riscv.vector.tuple", <vscale x 8 x i8>, 6) @test_vlseg6_nxv4bf16_triscv.vector.tuple_nxv8i8_6t(ptr %base, i64 %vl) {
; CHECK-LABEL: test_vlseg6_nxv4bf16_triscv.vector.tuple_nxv8i8_6t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma
-; CHECK-NEXT: vlseg6e16.v v7, (a0)
+; CHECK-NEXT: vlseg6e16.v v8, (a0)
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 6) @llvm.riscv.vlseg6.triscv.vector.tuple_nxv8i8_6t(target("riscv.vector.tuple", <vscale x 8 x i8>, 6) undef, ptr %base, i64 %vl, i64 4)
- %1 = call <vscale x 4 x bfloat> @llvm.riscv.tuple.extract.nxv4bf16.triscv.vector.tuple_nxv8i8_6t(target("riscv.vector.tuple", <vscale x 8 x i8>, 6) %0, i32 1)
- ret <vscale x 4 x bfloat> %1
+ ret target("riscv.vector.tuple", <vscale x 8 x i8>, 6) %0
}
-
-define <vscale x 4 x bfloat> @test_vlseg6_mask_nxv4bf16_triscv.vector.tuple_nxv8i8_6t(ptr %base, i64 %vl, <vscale x 4 x i1> %mask) {
+define target("riscv.vector.tuple", <vscale x 8 x i8>, 6) @test_vlseg6_mask_nxv4bf16_triscv.vector.tuple_nxv8i8_6t(ptr %base, i64 %vl, <vscale x 4 x i1> %mask) {
; CHECK-LABEL: test_vlseg6_mask_nxv4bf16_triscv.vector.tuple_nxv8i8_6t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma
-; CHECK-NEXT: vlseg6e16.v v7, (a0), v0.t
+; CHECK-NEXT: vlseg6e16.v v8, (a0), v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 6) @llvm.riscv.vlseg6.mask.triscv.vector.tuple_nxv8i8_6t.nxv4i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 6) undef, ptr %base, <vscale x 4 x i1> %mask, i64 %vl, i64 1, i64 4)
- %1 = call <vscale x 4 x bfloat> @llvm.riscv.tuple.extract.nxv4bf16.triscv.vector.tuple_nxv8i8_6t(target("riscv.vector.tuple", <vscale x 8 x i8>, 6) %0, i32 1)
- ret <vscale x 4 x bfloat> %1
+ ret target("riscv.vector.tuple", <vscale x 8 x i8>, 6) %0
}
-
-
-define <vscale x 1 x bfloat> @test_vlseg7_nxv1bf16_triscv.vector.tuple_nxv2i8_7t(ptr %base, i64 %vl) {
+define target("riscv.vector.tuple", <vscale x 2 x i8>, 7) @test_vlseg7_nxv1bf16_triscv.vector.tuple_nxv2i8_7t(ptr %base, i64 %vl) {
; CHECK-LABEL: test_vlseg7_nxv1bf16_triscv.vector.tuple_nxv2i8_7t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma
-; CHECK-NEXT: vlseg7e16.v v7, (a0)
+; CHECK-NEXT: vlseg7e16.v v8, (a0)
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 2 x i8>, 7) @llvm.riscv.vlseg7.triscv.vector.tuple_nxv2i8_7t(target("riscv.vector.tuple", <vscale x 2 x i8>, 7) undef, ptr %base, i64 %vl, i64 4)
- %1 = call <vscale x 1 x bfloat> @llvm.riscv.tuple.extract.nxv1bf16.triscv.vector.tuple_nxv2i8_7t(target("riscv.vector.tuple", <vscale x 2 x i8>, 7) %0, i32 1)
- ret <vscale x 1 x bfloat> %1
+ ret target("riscv.vector.tuple", <vscale x 2 x i8>, 7) %0
}
-
-define <vscale x 1 x bfloat> @test_vlseg7_mask_nxv1bf16_triscv.vector.tuple_nxv2i8_7t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) {
+define target("riscv.vector.tuple", <vscale x 2 x i8>, 7) @test_vlseg7_mask_nxv1bf16_triscv.vector.tuple_nxv2i8_7t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) {
; CHECK-LABEL: test_vlseg7_mask_nxv1bf16_triscv.vector.tuple_nxv2i8_7t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma
-; CHECK-NEXT: vlseg7e16.v v7, (a0), v0.t
+; CHECK-NEXT: vlseg7e16.v v8, (a0), v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 2 x i8>, 7) @llvm.riscv.vlseg7.mask.triscv.vector.tuple_nxv2i8_7t.nxv1i1(target("riscv.vector.tuple", <vscale x 2 x i8>, 7) undef, ptr %base, <vscale x 1 x i1> %mask, i64 %vl, i64 1, i64 4)
- %1 = call <vscale x 1 x bfloat> @llvm.riscv.tuple.extract.nxv1bf16.triscv.vector.tuple_nxv2i8_7t(target("riscv.vector.tuple", <vscale x 2 x i8>, 7) %0, i32 1)
- ret <vscale x 1 x bfloat> %1
+ ret target("riscv.vector.tuple", <vscale x 2 x i8>, 7) %0
}
-
-
-define <vscale x 2 x bfloat> @test_vlseg7_nxv2bf16_triscv.vector.tuple_nxv4i8_7t(ptr %base, i64 %vl) {
+define target("riscv.vector.tuple", <vscale x 4 x i8>, 7) @test_vlseg7_nxv2bf16_triscv.vector.tuple_nxv4i8_7t(ptr %base, i64 %vl) {
; CHECK-LABEL: test_vlseg7_nxv2bf16_triscv.vector.tuple_nxv4i8_7t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma
-; CHECK-NEXT: vlseg7e16.v v7, (a0)
+; CHECK-NEXT: vlseg7e16.v v8, (a0)
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 7) @llvm.riscv.vlseg7.triscv.vector.tuple_nxv4i8_7t(target("riscv.vector.tuple", <vscale x 4 x i8>, 7) undef, ptr %base, i64 %vl, i64 4)
- %1 = call <vscale x 2 x bfloat> @llvm.riscv.tuple.extract.nxv2bf16.triscv.vector.tuple_nxv4i8_7t(target("riscv.vector.tuple", <vscale x 4 x i8>, 7) %0, i32 1)
- ret <vscale x 2 x bfloat> %1
+ ret target("riscv.vector.tuple", <vscale x 4 x i8>, 7) %0
}
-
-define <vscale x 2 x bfloat> @test_vlseg7_mask_nxv2bf16_triscv.vector.tuple_nxv4i8_7t(ptr %base, i64 %vl, <vscale x 2 x i1> %mask) {
+define target("riscv.vector.tuple", <vscale x 4 x i8>, 7) @test_vlseg7_mask_nxv2bf16_triscv.vector.tuple_nxv4i8_7t(ptr %base, i64 %vl, <vscale x 2 x i1> %mask) {
; CHECK-LABEL: test_vlseg7_mask_nxv2bf16_triscv.vector.tuple_nxv4i8_7t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma
-; CHECK-NEXT: vlseg7e16.v v7, (a0), v0.t
+; CHECK-NEXT: vlseg7e16.v v8, (a0), v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 7) @llvm.riscv.vlseg7.mask.triscv.vector.tuple_nxv4i8_7t.nxv2i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 7) undef, ptr %base, <vscale x 2 x i1> %mask, i64 %vl, i64 1, i64 4)
- %1 = call <vscale x 2 x bfloat> @llvm.riscv.tuple.extract.nxv2bf16.triscv.vector.tuple_nxv4i8_7t(target("riscv.vector.tuple", <vscale x 4 x i8>, 7) %0, i32 1)
- ret <vscale x 2 x bfloat> %1
+ ret target("riscv.vector.tuple", <vscale x 4 x i8>, 7) %0
}
-
-
-define <vscale x 4 x bfloat> @test_vlseg7_nxv4bf16_triscv.vector.tuple_nxv8i8_7t(ptr %base, i64 %vl) {
+define target("riscv.vector.tuple", <vscale x 8 x i8>, 7) @test_vlseg7_nxv4bf16_triscv.vector.tuple_nxv8i8_7t(ptr %base, i64 %vl) {
; CHECK-LABEL: test_vlseg7_nxv4bf16_triscv.vector.tuple_nxv8i8_7t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma
-; CHECK-NEXT: vlseg7e16.v v7, (a0)
+; CHECK-NEXT: vlseg7e16.v v8, (a0)
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 7) @llvm.riscv.vlseg7.triscv.vector.tuple_nxv8i8_7t(target("riscv.vector.tuple", <vscale x 8 x i8>, 7) undef, ptr %base, i64 %vl, i64 4)
- %1 = call <vscale x 4 x bfloat> @llvm.riscv.tuple.extract.nxv4bf16.triscv.vector.tuple_nxv8i8_7t(target("riscv.vector.tuple", <vscale x 8 x i8>, 7) %0, i32 1)
- ret <vscale x 4 x bfloat> %1
+ ret target("riscv.vector.tuple", <vscale x 8 x i8>, 7) %0
}
-
-define <vscale x 4 x bfloat> @test_vlseg7_mask_nxv4bf16_triscv.vector.tuple_nxv8i8_7t(ptr %base, i64 %vl, <vscale x 4 x i1> %mask) {
+define target("riscv.vector.tuple", <vscale x 8 x i8>, 7) @test_vlseg7_mask_nxv4bf16_triscv.vector.tuple_nxv8i8_7t(ptr %base, i64 %vl, <vscale x 4 x i1> %mask) {
; CHECK-LABEL: test_vlseg7_mask_nxv4bf16_triscv.vector.tuple_nxv8i8_7t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma
-; CHECK-NEXT: vlseg7e16.v v7, (a0), v0.t
+; CHECK-NEXT: vlseg7e16.v v8, (a0), v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 7) @llvm.riscv.vlseg7.mask.triscv.vector.tuple_nxv8i8_7t.nxv4i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 7) undef, ptr %base, <vscale x 4 x i1> %mask, i64 %vl, i64 1, i64 4)
- %1 = call <vscale x 4 x bfloat> @llvm.riscv.tuple.extract.nxv4bf16.triscv.vector.tuple_nxv8i8_7t(target("riscv.vector.tuple", <vscale x 8 x i8>, 7) %0, i32 1)
- ret <vscale x 4 x bfloat> %1
+ ret target("riscv.vector.tuple", <vscale x 8 x i8>, 7) %0
}
-
-
-define <vscale x 1 x bfloat> @test_vlseg8_nxv1bf16_triscv.vector.tuple_nxv2i8_8t(ptr %base, i64 %vl) {
+define target("riscv.vector.tuple", <vscale x 2 x i8>, 8) @test_vlseg8_nxv1bf16_triscv.vector.tuple_nxv2i8_8t(ptr %base, i64 %vl) {
; CHECK-LABEL: test_vlseg8_nxv1bf16_triscv.vector.tuple_nxv2i8_8t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma
-; CHECK-NEXT: vlseg8e16.v v7, (a0)
+; CHECK-NEXT: vlseg8e16.v v8, (a0)
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 2 x i8>, 8) @llvm.riscv.vlseg8.triscv.vector.tuple_nxv2i8_8t(target("riscv.vector.tuple", <vscale x 2 x i8>, 8) undef, ptr %base, i64 %vl, i64 4)
- %1 = call <vscale x 1 x bfloat> @llvm.riscv.tuple.extract.nxv1bf16.triscv.vector.tuple_nxv2i8_8t(target("riscv.vector.tuple", <vscale x 2 x i8>, 8) %0, i32 1)
- ret <vscale x 1 x bfloat> %1
+ ret target("riscv.vector.tuple", <vscale x 2 x i8>, 8) %0
}
-
-define <vscale x 1 x bfloat> @test_vlseg8_mask_nxv1bf16_triscv.vector.tuple_nxv2i8_8t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) {
+define target("riscv.vector.tuple", <vscale x 2 x i8>, 8) @test_vlseg8_mask_nxv1bf16_triscv.vector.tuple_nxv2i8_8t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) {
; CHECK-LABEL: test_vlseg8_mask_nxv1bf16_triscv.vector.tuple_nxv2i8_8t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma
-; CHECK-NEXT: vlseg8e16.v v7, (a0), v0.t
+; CHECK-NEXT: vlseg8e16.v v8, (a0), v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 2 x i8>, 8) @llvm.riscv.vlseg8.mask.triscv.vector.tuple_nxv2i8_8t.nxv1i1(target("riscv.vector.tuple", <vscale x 2 x i8>, 8) undef, ptr %base, <vscale x 1 x i1> %mask, i64 %vl, i64 1, i64 4)
- %1 = call <vscale x 1 x bfloat> @llvm.riscv.tuple.extract.nxv1bf16.triscv.vector.tuple_nxv2i8_8t(target("riscv.vector.tuple", <vscale x 2 x i8>, 8) %0, i32 1)
- ret <vscale x 1 x bfloat> %1
+ ret target("riscv.vector.tuple", <vscale x 2 x i8>, 8) %0
}
-
-
-define <vscale x 2 x bfloat> @test_vlseg8_nxv2bf16_triscv.vector.tuple_nxv4i8_8t(ptr %base, i64 %vl) {
+define target("riscv.vector.tuple", <vscale x 4 x i8>, 8) @test_vlseg8_nxv2bf16_triscv.vector.tuple_nxv4i8_8t(ptr %base, i64 %vl) {
; CHECK-LABEL: test_vlseg8_nxv2bf16_triscv.vector.tuple_nxv4i8_8t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma
-; CHECK-NEXT: vlseg8e16.v v7, (a0)
+; CHECK-NEXT: vlseg8e16.v v8, (a0)
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 8) @llvm.riscv.vlseg8.triscv.vector.tuple_nxv4i8_8t(target("riscv.vector.tuple", <vscale x 4 x i8>, 8) undef, ptr %base, i64 %vl, i64 4)
- %1 = call <vscale x 2 x bfloat> @llvm.riscv.tuple.extract.nxv2bf16.triscv.vector.tuple_nxv4i8_8t(target("riscv.vector.tuple", <vscale x 4 x i8>, 8) %0, i32 1)
- ret <vscale x 2 x bfloat> %1
+ ret target("riscv.vector.tuple", <vscale x 4 x i8>, 8) %0
}
-
-define <vscale x 2 x bfloat> @test_vlseg8_mask_nxv2bf16_triscv.vector.tuple_nxv4i8_8t(ptr %base, i64 %vl, <vscale x 2 x i1> %mask) {
+define target("riscv.vector.tuple", <vscale x 4 x i8>, 8) @test_vlseg8_mask_nxv2bf16_triscv.vector.tuple_nxv4i8_8t(ptr %base, i64 %vl, <vscale x 2 x i1> %mask) {
; CHECK-LABEL: test_vlseg8_mask_nxv2bf16_triscv.vector.tuple_nxv4i8_8t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma
-; CHECK-NEXT: vlseg8e16.v v7, (a0), v0.t
+; CHECK-NEXT: vlseg8e16.v v8, (a0), v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 8) @llvm.riscv.vlseg8.mask.triscv.vector.tuple_nxv4i8_8t.nxv2i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 8) undef, ptr %base, <vscale x 2 x i1> %mask, i64 %vl, i64 1, i64 4)
- %1 = call <vscale x 2 x bfloat> @llvm.riscv.tuple.extract.nxv2bf16.triscv.vector.tuple_nxv4i8_8t(target("riscv.vector.tuple", <vscale x 4 x i8>, 8) %0, i32 1)
- ret <vscale x 2 x bfloat> %1
+ ret target("riscv.vector.tuple", <vscale x 4 x i8>, 8) %0
}
-
-
-define <vscale x 4 x bfloat> @test_vlseg8_nxv4bf16_triscv.vector.tuple_nxv8i8_8t(ptr %base, i64 %vl) {
+define target("riscv.vector.tuple", <vscale x 8 x i8>, 8) @test_vlseg8_nxv4bf16_triscv.vector.tuple_nxv8i8_8t(ptr %base, i64 %vl) {
; CHECK-LABEL: test_vlseg8_nxv4bf16_triscv.vector.tuple_nxv8i8_8t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma
-; CHECK-NEXT: vlseg8e16.v v7, (a0)
+; CHECK-NEXT: vlseg8e16.v v8, (a0)
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 8) @llvm.riscv.vlseg8.triscv.vector.tuple_nxv8i8_8t(target("riscv.vector.tuple", <vscale x 8 x i8>, 8) undef, ptr %base, i64 %vl, i64 4)
- %1 = call <vscale x 4 x bfloat> @llvm.riscv.tuple.extract.nxv4bf16.triscv.vector.tuple_nxv8i8_8t(target("riscv.vector.tuple", <vscale x 8 x i8>, 8) %0, i32 1)
- ret <vscale x 4 x bfloat> %1
+ ret target("riscv.vector.tuple", <vscale x 8 x i8>, 8) %0
}
-
-define <vscale x 4 x bfloat> @test_vlseg8_mask_nxv4bf16_triscv.vector.tuple_nxv8i8_8t(ptr %base, i64 %vl, <vscale x 4 x i1> %mask) {
+define target("riscv.vector.tuple", <vscale x 8 x i8>, 8) @test_vlseg8_mask_nxv4bf16_triscv.vector.tuple_nxv8i8_8t(ptr %base, i64 %vl, <vscale x 4 x i1> %mask) {
; CHECK-LABEL: test_vlseg8_mask_nxv4bf16_triscv.vector.tuple_nxv8i8_8t:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma
-; CHECK-NEXT: vlseg8e16.v v7, (a0), v0.t
+; CHECK-NEXT: vlseg8e16.v v8, (a0), v0.t
; CHECK-NEXT: ret
entry:
%0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 8) @llvm.riscv.vlseg8.mask.triscv.vector.tuple_nxv8i8_8t.nxv4i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 8) undef, ptr %base, <vscale x 4 x i1> %mask, i64 %vl, i64 1, i64 4)
- %1 = call <vscale x 4 x bfloat> @llvm.riscv.tuple.extract.nxv4bf16.triscv.vector.tuple_nxv8i8_8t(target("riscv.vector.tuple", <vscale x 8 x i8>, 8) %0, i32 1)
- ret <vscale x 4 x bfloat> %1
+ ret target("riscv.vector.tuple", <vscale x 8 x i8>, 8) %0
}
-
diff --git a/llvm/test/CodeGen/WebAssembly/lower-em-sjlj-alloca.ll b/llvm/test/CodeGen/WebAssembly/lower-em-sjlj-alloca.ll
new file mode 100644
index 0000000..0f968de
--- /dev/null
+++ b/llvm/test/CodeGen/WebAssembly/lower-em-sjlj-alloca.ll
@@ -0,0 +1,129 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -S -wasm-lower-em-ehsjlj -wasm-enable-sjlj -mtriple=wasm32-unknown-emscripten < %s | FileCheck %s
+
+@buf = external global i8
+declare i32 @setjmp(ptr) returns_twice
+declare void @dummy()
+
+define void @test_static() {
+; CHECK-LABEL: define void @test_static() personality ptr @__gxx_wasm_personality_v0 {
+; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: [[X:%.*]] = alloca i32, align 4
+; CHECK-NEXT: [[FUNCTIONINVOCATIONID:%.*]] = alloca i32, align 4
+; CHECK-NEXT: br label %[[SETJMP_DISPATCH:.*]]
+; CHECK: [[SETJMP_DISPATCH]]:
+; CHECK-NEXT: [[VAL1:%.*]] = phi i32 [ [[VAL:%.*]], %[[IF_END:.*]] ], [ undef, %[[ENTRY]] ]
+; CHECK-NEXT: [[LABEL_PHI:%.*]] = phi i32 [ [[LABEL:%.*]], %[[IF_END]] ], [ -1, %[[ENTRY]] ]
+; CHECK-NEXT: switch i32 [[LABEL_PHI]], label %[[ENTRY_SPLIT:.*]] [
+; CHECK-NEXT: i32 1, label %[[ENTRY_SPLIT_SPLIT:.*]]
+; CHECK-NEXT: ]
+; CHECK: [[ENTRY_SPLIT]]:
+; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr [[X]])
+; CHECK-NEXT: call void @__wasm_setjmp(ptr @buf, i32 1, ptr [[FUNCTIONINVOCATIONID]])
+; CHECK-NEXT: br label %[[ENTRY_SPLIT_SPLIT]]
+; CHECK: [[ENTRY_SPLIT_SPLIT]]:
+; CHECK-NEXT: [[SETJMP_RET:%.*]] = phi i32 [ 0, %[[ENTRY_SPLIT]] ], [ [[VAL1]], %[[SETJMP_DISPATCH]] ]
+; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[SETJMP_RET]], 0
+; CHECK-NEXT: br i1 [[CMP]], label %[[IF:.*]], label %[[ELSE:.*]]
+; CHECK: [[IF]]:
+; CHECK-NEXT: invoke void @dummy()
+; CHECK-NEXT: to [[DOTNOEXC:label %.*]] unwind label %[[CATCH_DISPATCH_LONGJMP:.*]]
+; CHECK: [[_NOEXC:.*:]]
+; CHECK-NEXT: ret void
+; CHECK: [[ELSE]]:
+; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr [[X]])
+; CHECK-NEXT: ret void
+; CHECK: [[CATCH_DISPATCH_LONGJMP]]:
+; CHECK-NEXT: [[TMP0:%.*]] = catchswitch within none [label %catch.longjmp] unwind to caller
+; CHECK: [[CATCH_LONGJMP:.*:]]
+; CHECK-NEXT: [[TMP1:%.*]] = catchpad within [[TMP0]] []
+; CHECK-NEXT: [[THROWN:%.*]] = call ptr @llvm.wasm.catch(i32 1)
+; CHECK-NEXT: [[ENV_GEP:%.*]] = getelementptr { ptr, i32 }, ptr [[THROWN]], i32 0, i32 0
+; CHECK-NEXT: [[VAL_GEP:%.*]] = getelementptr { ptr, i32 }, ptr [[THROWN]], i32 0, i32 1
+; CHECK-NEXT: [[ENV:%.*]] = load ptr, ptr [[ENV_GEP]], align 4
+; CHECK-NEXT: [[VAL]] = load i32, ptr [[VAL_GEP]], align 4
+; CHECK-NEXT: [[LABEL]] = call i32 @__wasm_setjmp_test(ptr [[ENV]], ptr [[FUNCTIONINVOCATIONID]]) [ "funclet"(token [[TMP1]]) ]
+; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i32 [[LABEL]], 0
+; CHECK-NEXT: br i1 [[TMP2]], label %[[IF_THEN:.*]], label %[[IF_END]]
+; CHECK: [[IF_THEN]]:
+; CHECK-NEXT: call void @__wasm_longjmp(ptr [[ENV]], i32 [[VAL]]) [ "funclet"(token [[TMP1]]) ]
+; CHECK-NEXT: unreachable
+; CHECK: [[IF_END]]:
+; CHECK-NEXT: catchret from [[TMP1]] to label %[[SETJMP_DISPATCH]]
+;
+entry:
+ %x = alloca i32, align 4
+ call void @llvm.lifetime.start.p0(i64 4, ptr %x)
+ %call = call i32 @setjmp(ptr @buf) returns_twice
+ %cmp = icmp eq i32 %call, 0
+ br i1 %cmp, label %if, label %else
+
+if:
+ call void @dummy()
+ ret void
+
+else:
+ call void @llvm.lifetime.end.p0(i64 4, ptr %x)
+ ret void
+}
+
+define void @test_dynamic(i32 %size) {
+; CHECK-LABEL: define void @test_dynamic(
+; CHECK-SAME: i32 [[SIZE:%.*]]) personality ptr @__gxx_wasm_personality_v0 {
+; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: [[FUNCTIONINVOCATIONID:%.*]] = alloca i32, align 4
+; CHECK-NEXT: br label %[[SETJMP_DISPATCH:.*]]
+; CHECK: [[SETJMP_DISPATCH]]:
+; CHECK-NEXT: [[VAL1:%.*]] = phi i32 [ [[VAL:%.*]], %[[IF_END:.*]] ], [ undef, %[[ENTRY]] ]
+; CHECK-NEXT: [[LABEL_PHI:%.*]] = phi i32 [ [[LABEL:%.*]], %[[IF_END]] ], [ -1, %[[ENTRY]] ]
+; CHECK-NEXT: switch i32 [[LABEL_PHI]], label %[[ENTRY_SPLIT:.*]] [
+; CHECK-NEXT: i32 1, label %[[ENTRY_SPLIT_SPLIT:.*]]
+; CHECK-NEXT: ]
+; CHECK: [[ENTRY_SPLIT]]:
+; CHECK-NEXT: [[X:%.*]] = alloca i32, i32 [[SIZE]], align 4
+; CHECK-NEXT: call void @__wasm_setjmp(ptr @buf, i32 1, ptr [[FUNCTIONINVOCATIONID]])
+; CHECK-NEXT: br label %[[ENTRY_SPLIT_SPLIT]]
+; CHECK: [[ENTRY_SPLIT_SPLIT]]:
+; CHECK-NEXT: [[SETJMP_RET:%.*]] = phi i32 [ 0, %[[ENTRY_SPLIT]] ], [ [[VAL1]], %[[SETJMP_DISPATCH]] ]
+; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[SETJMP_RET]], 0
+; CHECK-NEXT: br i1 [[CMP]], label %[[IF:.*]], label %[[ELSE:.*]]
+; CHECK: [[IF]]:
+; CHECK-NEXT: invoke void @dummy()
+; CHECK-NEXT: to [[DOTNOEXC:label %.*]] unwind label %[[CATCH_DISPATCH_LONGJMP:.*]]
+; CHECK: [[_NOEXC:.*:]]
+; CHECK-NEXT: ret void
+; CHECK: [[ELSE]]:
+; CHECK-NEXT: ret void
+; CHECK: [[CATCH_DISPATCH_LONGJMP]]:
+; CHECK-NEXT: [[TMP0:%.*]] = catchswitch within none [label %catch.longjmp] unwind to caller
+; CHECK: [[CATCH_LONGJMP:.*:]]
+; CHECK-NEXT: [[TMP1:%.*]] = catchpad within [[TMP0]] []
+; CHECK-NEXT: [[THROWN:%.*]] = call ptr @llvm.wasm.catch(i32 1)
+; CHECK-NEXT: [[ENV_GEP:%.*]] = getelementptr { ptr, i32 }, ptr [[THROWN]], i32 0, i32 0
+; CHECK-NEXT: [[VAL_GEP:%.*]] = getelementptr { ptr, i32 }, ptr [[THROWN]], i32 0, i32 1
+; CHECK-NEXT: [[ENV:%.*]] = load ptr, ptr [[ENV_GEP]], align 4
+; CHECK-NEXT: [[VAL]] = load i32, ptr [[VAL_GEP]], align 4
+; CHECK-NEXT: [[LABEL]] = call i32 @__wasm_setjmp_test(ptr [[ENV]], ptr [[FUNCTIONINVOCATIONID]]) [ "funclet"(token [[TMP1]]) ]
+; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i32 [[LABEL]], 0
+; CHECK-NEXT: br i1 [[TMP2]], label %[[IF_THEN:.*]], label %[[IF_END]]
+; CHECK: [[IF_THEN]]:
+; CHECK-NEXT: call void @__wasm_longjmp(ptr [[ENV]], i32 [[VAL]]) [ "funclet"(token [[TMP1]]) ]
+; CHECK-NEXT: unreachable
+; CHECK: [[IF_END]]:
+; CHECK-NEXT: catchret from [[TMP1]] to label %[[SETJMP_DISPATCH]]
+;
+entry:
+ %x = alloca i32, i32 %size, align 4
+ call void @llvm.lifetime.start.p0(i64 -1, ptr %x)
+ %call = call i32 @setjmp(ptr @buf) returns_twice
+ %cmp = icmp eq i32 %call, 0
+ br i1 %cmp, label %if, label %else
+
+if:
+ call void @dummy()
+ ret void
+
+else:
+ call void @llvm.lifetime.end.p0(i64 -1, ptr %x)
+ ret void
+}
diff --git a/llvm/test/CodeGen/WebAssembly/lower-em-sjlj-debuginfo.ll b/llvm/test/CodeGen/WebAssembly/lower-em-sjlj-debuginfo.ll
index fec9836..bab8403 100644
--- a/llvm/test/CodeGen/WebAssembly/lower-em-sjlj-debuginfo.ll
+++ b/llvm/test/CodeGen/WebAssembly/lower-em-sjlj-debuginfo.ll
@@ -16,10 +16,10 @@ entry:
call void @foo(), !dbg !7
ret void, !dbg !8
; CHECK: entry:
- ; CHECK-NEXT: %functionInvocationId = alloca i32, align 4, !dbg ![[DL0:.*]]
+ ; CHECK-NEXT: %buf = alloca [1 x %struct.__jmp_buf_tag], align 16, !dbg ![[DL0:.*]]
+ ; CHECK-NEXT: %functionInvocationId = alloca i32, align 4, !dbg ![[DL0]]
; CHECK: entry.split:
- ; CHECK: alloca {{.*}}, !dbg ![[DL0]]
; CHECK: call void @__wasm_setjmp{{.*}}, !dbg ![[DL1:.*]]
; CHECK-NEXT: br {{.*}}, !dbg ![[DL2:.*]]
diff --git a/llvm/test/CodeGen/WebAssembly/lower-em-sjlj.ll b/llvm/test/CodeGen/WebAssembly/lower-em-sjlj.ll
index b584342..51dcf2f 100644
--- a/llvm/test/CodeGen/WebAssembly/lower-em-sjlj.ll
+++ b/llvm/test/CodeGen/WebAssembly/lower-em-sjlj.ll
@@ -22,17 +22,17 @@ entry:
call void @longjmp(ptr %buf, i32 1) #1
unreachable
; CHECK: entry:
+; CHECK-NEXT: %buf = alloca [1 x %struct.__jmp_buf_tag], align 16
; CHECK-NEXT: %functionInvocationId = alloca i32, align 4
; CHECK-NEXT: br label %entry.split
; CHECK: entry.split
-; CHECK-NEXT: %[[BUF:.*]] = alloca [1 x %struct.__jmp_buf_tag]
-; CHECK-NEXT: call void @__wasm_setjmp(ptr %[[BUF]], i32 1, ptr %functionInvocationId)
+; CHECK-NEXT: call void @__wasm_setjmp(ptr %buf, i32 1, ptr %functionInvocationId)
; CHECK-NEXT: br label %entry.split.split
; CHECK: entry.split.split:
; CHECK-NEXT: phi i32 [ 0, %entry.split ], [ %[[LONGJMP_RESULT:.*]], %if.end ]
-; CHECK-NEXT: %[[JMPBUF:.*]] = ptrtoint ptr %[[BUF]] to [[PTR]]
+; CHECK-NEXT: %[[JMPBUF:.*]] = ptrtoint ptr %buf to [[PTR]]
; CHECK-NEXT: store [[PTR]] 0, ptr @__THREW__
; CHECK-NEXT: call cc{{.*}} void @__invoke_void_[[PTR]]_i32(ptr @emscripten_longjmp, [[PTR]] %[[JMPBUF]], i32 1)
; CHECK-NEXT: %[[__THREW__VAL:.*]] = load [[PTR]], ptr @__THREW__
diff --git a/llvm/test/CodeGen/WebAssembly/lower-wasm-ehsjlj.ll b/llvm/test/CodeGen/WebAssembly/lower-wasm-ehsjlj.ll
index b4c93c4..9de6652 100644
--- a/llvm/test/CodeGen/WebAssembly/lower-wasm-ehsjlj.ll
+++ b/llvm/test/CodeGen/WebAssembly/lower-wasm-ehsjlj.ll
@@ -108,7 +108,7 @@ catch: ; preds = %catch.start
call void @__cxa_end_catch() [ "funclet"(token %2) ]
catchret from %2 to label %catchret.dest
; CHECK: catch: ; preds = %catch.start
-; CHECK-NEXT: %exn = load ptr, ptr %exn.slot6, align 4
+; CHECK-NEXT: %exn = load ptr, ptr %exn.slot, align 4
; CHECK-NEXT: %5 = call ptr @__cxa_begin_catch(ptr %exn) #3 [ "funclet"(token %2) ]
; CHECK-NEXT: invoke void @__cxa_end_catch() [ "funclet"(token %2) ]
; CHECK-NEXT: to label %.noexc unwind label %catch.dispatch.longjmp
diff --git a/llvm/test/CodeGen/WebAssembly/lower-wasm-sjlj.ll b/llvm/test/CodeGen/WebAssembly/lower-wasm-sjlj.ll
index 82c04e2..e1cb859 100644
--- a/llvm/test/CodeGen/WebAssembly/lower-wasm-sjlj.ll
+++ b/llvm/test/CodeGen/WebAssembly/lower-wasm-sjlj.ll
@@ -25,26 +25,24 @@ entry:
unreachable
; CHECK: entry:
+; CHECK-NEXT: %buf = alloca [1 x %struct.__jmp_buf_tag], align 16
; CHECK-NEXT: %functionInvocationId = alloca i32, align 4
; CHECK-NEXT: br label %setjmp.dispatch
; CHECK: setjmp.dispatch:
; CHECK-NEXT: %[[VAL2:.*]] = phi i32 [ %val, %if.end ], [ undef, %entry ]
-; CHECK-NEXT: %[[BUF:.*]] = phi ptr [ %[[BUF2:.*]], %if.end ], [ undef, %entry ]
; CHECK-NEXT: %label.phi = phi i32 [ %label, %if.end ], [ -1, %entry ]
; CHECK-NEXT: switch i32 %label.phi, label %entry.split [
; CHECK-NEXT: i32 1, label %entry.split.split
; CHECK-NEXT: ]
; CHECK: entry.split:
-; CHECK-NEXT: %buf = alloca [1 x %struct.__jmp_buf_tag], align 16
; CHECK-NEXT: call void @__wasm_setjmp(ptr %buf, i32 1, ptr %functionInvocationId)
; CHECK-NEXT: br label %entry.split.split
; CHECK: entry.split.split:
-; CHECK-NEXT: %[[BUF2]] = phi ptr [ %[[BUF]], %setjmp.dispatch ], [ %buf, %entry.split ]
; CHECK-NEXT: %setjmp.ret = phi i32 [ 0, %entry.split ], [ %[[VAL2]], %setjmp.dispatch ]
-; CHECK-NEXT: invoke void @__wasm_longjmp(ptr %[[BUF2]], i32 1)
+; CHECK-NEXT: invoke void @__wasm_longjmp(ptr %buf, i32 1)
; CHECK-NEXT: to label %.noexc unwind label %catch.dispatch.longjmp
; CHECK: .noexc:
diff --git a/llvm/test/CodeGen/WebAssembly/simd-arith.ll b/llvm/test/CodeGen/WebAssembly/simd-arith.ll
index e3607e1..36637e1 100644
--- a/llvm/test/CodeGen/WebAssembly/simd-arith.ll
+++ b/llvm/test/CodeGen/WebAssembly/simd-arith.ll
@@ -199,139 +199,17 @@ define <16 x i8> @mul_v16i8(<16 x i8> %x, <16 x i8> %y) {
; SIMD128-LABEL: mul_v16i8:
; SIMD128: .functype mul_v16i8 (v128, v128) -> (v128)
; SIMD128-NEXT: # %bb.0:
-; SIMD128-NEXT: i8x16.extract_lane_u $push4=, $0, 0
-; SIMD128-NEXT: i8x16.extract_lane_u $push3=, $1, 0
-; SIMD128-NEXT: i32.mul $push5=, $pop4, $pop3
-; SIMD128-NEXT: i8x16.splat $push6=, $pop5
-; SIMD128-NEXT: i8x16.extract_lane_u $push1=, $0, 1
-; SIMD128-NEXT: i8x16.extract_lane_u $push0=, $1, 1
-; SIMD128-NEXT: i32.mul $push2=, $pop1, $pop0
-; SIMD128-NEXT: i8x16.replace_lane $push7=, $pop6, 1, $pop2
-; SIMD128-NEXT: i8x16.extract_lane_u $push9=, $0, 2
-; SIMD128-NEXT: i8x16.extract_lane_u $push8=, $1, 2
-; SIMD128-NEXT: i32.mul $push10=, $pop9, $pop8
-; SIMD128-NEXT: i8x16.replace_lane $push11=, $pop7, 2, $pop10
-; SIMD128-NEXT: i8x16.extract_lane_u $push13=, $0, 3
-; SIMD128-NEXT: i8x16.extract_lane_u $push12=, $1, 3
-; SIMD128-NEXT: i32.mul $push14=, $pop13, $pop12
-; SIMD128-NEXT: i8x16.replace_lane $push15=, $pop11, 3, $pop14
-; SIMD128-NEXT: i8x16.extract_lane_u $push17=, $0, 4
-; SIMD128-NEXT: i8x16.extract_lane_u $push16=, $1, 4
-; SIMD128-NEXT: i32.mul $push18=, $pop17, $pop16
-; SIMD128-NEXT: i8x16.replace_lane $push19=, $pop15, 4, $pop18
-; SIMD128-NEXT: i8x16.extract_lane_u $push21=, $0, 5
-; SIMD128-NEXT: i8x16.extract_lane_u $push20=, $1, 5
-; SIMD128-NEXT: i32.mul $push22=, $pop21, $pop20
-; SIMD128-NEXT: i8x16.replace_lane $push23=, $pop19, 5, $pop22
-; SIMD128-NEXT: i8x16.extract_lane_u $push25=, $0, 6
-; SIMD128-NEXT: i8x16.extract_lane_u $push24=, $1, 6
-; SIMD128-NEXT: i32.mul $push26=, $pop25, $pop24
-; SIMD128-NEXT: i8x16.replace_lane $push27=, $pop23, 6, $pop26
-; SIMD128-NEXT: i8x16.extract_lane_u $push29=, $0, 7
-; SIMD128-NEXT: i8x16.extract_lane_u $push28=, $1, 7
-; SIMD128-NEXT: i32.mul $push30=, $pop29, $pop28
-; SIMD128-NEXT: i8x16.replace_lane $push31=, $pop27, 7, $pop30
-; SIMD128-NEXT: i8x16.extract_lane_u $push33=, $0, 8
-; SIMD128-NEXT: i8x16.extract_lane_u $push32=, $1, 8
-; SIMD128-NEXT: i32.mul $push34=, $pop33, $pop32
-; SIMD128-NEXT: i8x16.replace_lane $push35=, $pop31, 8, $pop34
-; SIMD128-NEXT: i8x16.extract_lane_u $push37=, $0, 9
-; SIMD128-NEXT: i8x16.extract_lane_u $push36=, $1, 9
-; SIMD128-NEXT: i32.mul $push38=, $pop37, $pop36
-; SIMD128-NEXT: i8x16.replace_lane $push39=, $pop35, 9, $pop38
-; SIMD128-NEXT: i8x16.extract_lane_u $push41=, $0, 10
-; SIMD128-NEXT: i8x16.extract_lane_u $push40=, $1, 10
-; SIMD128-NEXT: i32.mul $push42=, $pop41, $pop40
-; SIMD128-NEXT: i8x16.replace_lane $push43=, $pop39, 10, $pop42
-; SIMD128-NEXT: i8x16.extract_lane_u $push45=, $0, 11
-; SIMD128-NEXT: i8x16.extract_lane_u $push44=, $1, 11
-; SIMD128-NEXT: i32.mul $push46=, $pop45, $pop44
-; SIMD128-NEXT: i8x16.replace_lane $push47=, $pop43, 11, $pop46
-; SIMD128-NEXT: i8x16.extract_lane_u $push49=, $0, 12
-; SIMD128-NEXT: i8x16.extract_lane_u $push48=, $1, 12
-; SIMD128-NEXT: i32.mul $push50=, $pop49, $pop48
-; SIMD128-NEXT: i8x16.replace_lane $push51=, $pop47, 12, $pop50
-; SIMD128-NEXT: i8x16.extract_lane_u $push53=, $0, 13
-; SIMD128-NEXT: i8x16.extract_lane_u $push52=, $1, 13
-; SIMD128-NEXT: i32.mul $push54=, $pop53, $pop52
-; SIMD128-NEXT: i8x16.replace_lane $push55=, $pop51, 13, $pop54
-; SIMD128-NEXT: i8x16.extract_lane_u $push57=, $0, 14
-; SIMD128-NEXT: i8x16.extract_lane_u $push56=, $1, 14
-; SIMD128-NEXT: i32.mul $push58=, $pop57, $pop56
-; SIMD128-NEXT: i8x16.replace_lane $push59=, $pop55, 14, $pop58
-; SIMD128-NEXT: i8x16.extract_lane_u $push61=, $0, 15
-; SIMD128-NEXT: i8x16.extract_lane_u $push60=, $1, 15
-; SIMD128-NEXT: i32.mul $push62=, $pop61, $pop60
-; SIMD128-NEXT: i8x16.replace_lane $push63=, $pop59, 15, $pop62
-; SIMD128-NEXT: return $pop63
+; SIMD128-NEXT: i16x8.extmul_low_i8x16_u $push1=, $0, $1
+; SIMD128-NEXT: i16x8.extmul_high_i8x16_u $push0=, $0, $1
+; SIMD128-NEXT: i8x16.shuffle $push2=, $pop1, $pop0, 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30
+; SIMD128-NEXT: return $pop2
;
; SIMD128-FAST-LABEL: mul_v16i8:
; SIMD128-FAST: .functype mul_v16i8 (v128, v128) -> (v128)
; SIMD128-FAST-NEXT: # %bb.0:
-; SIMD128-FAST-NEXT: i8x16.extract_lane_u $push5=, $0, 0
-; SIMD128-FAST-NEXT: i8x16.extract_lane_u $push4=, $1, 0
-; SIMD128-FAST-NEXT: i32.mul $push6=, $pop5, $pop4
-; SIMD128-FAST-NEXT: i8x16.splat $push7=, $pop6
-; SIMD128-FAST-NEXT: i8x16.extract_lane_u $push2=, $0, 1
-; SIMD128-FAST-NEXT: i8x16.extract_lane_u $push1=, $1, 1
-; SIMD128-FAST-NEXT: i32.mul $push3=, $pop2, $pop1
-; SIMD128-FAST-NEXT: i8x16.replace_lane $push8=, $pop7, 1, $pop3
-; SIMD128-FAST-NEXT: i8x16.extract_lane_u $push10=, $0, 2
-; SIMD128-FAST-NEXT: i8x16.extract_lane_u $push9=, $1, 2
-; SIMD128-FAST-NEXT: i32.mul $push11=, $pop10, $pop9
-; SIMD128-FAST-NEXT: i8x16.replace_lane $push12=, $pop8, 2, $pop11
-; SIMD128-FAST-NEXT: i8x16.extract_lane_u $push14=, $0, 3
-; SIMD128-FAST-NEXT: i8x16.extract_lane_u $push13=, $1, 3
-; SIMD128-FAST-NEXT: i32.mul $push15=, $pop14, $pop13
-; SIMD128-FAST-NEXT: i8x16.replace_lane $push16=, $pop12, 3, $pop15
-; SIMD128-FAST-NEXT: i8x16.extract_lane_u $push18=, $0, 4
-; SIMD128-FAST-NEXT: i8x16.extract_lane_u $push17=, $1, 4
-; SIMD128-FAST-NEXT: i32.mul $push19=, $pop18, $pop17
-; SIMD128-FAST-NEXT: i8x16.replace_lane $push20=, $pop16, 4, $pop19
-; SIMD128-FAST-NEXT: i8x16.extract_lane_u $push22=, $0, 5
-; SIMD128-FAST-NEXT: i8x16.extract_lane_u $push21=, $1, 5
-; SIMD128-FAST-NEXT: i32.mul $push23=, $pop22, $pop21
-; SIMD128-FAST-NEXT: i8x16.replace_lane $push24=, $pop20, 5, $pop23
-; SIMD128-FAST-NEXT: i8x16.extract_lane_u $push26=, $0, 6
-; SIMD128-FAST-NEXT: i8x16.extract_lane_u $push25=, $1, 6
-; SIMD128-FAST-NEXT: i32.mul $push27=, $pop26, $pop25
-; SIMD128-FAST-NEXT: i8x16.replace_lane $push28=, $pop24, 6, $pop27
-; SIMD128-FAST-NEXT: i8x16.extract_lane_u $push30=, $0, 7
-; SIMD128-FAST-NEXT: i8x16.extract_lane_u $push29=, $1, 7
-; SIMD128-FAST-NEXT: i32.mul $push31=, $pop30, $pop29
-; SIMD128-FAST-NEXT: i8x16.replace_lane $push32=, $pop28, 7, $pop31
-; SIMD128-FAST-NEXT: i8x16.extract_lane_u $push34=, $0, 8
-; SIMD128-FAST-NEXT: i8x16.extract_lane_u $push33=, $1, 8
-; SIMD128-FAST-NEXT: i32.mul $push35=, $pop34, $pop33
-; SIMD128-FAST-NEXT: i8x16.replace_lane $push36=, $pop32, 8, $pop35
-; SIMD128-FAST-NEXT: i8x16.extract_lane_u $push38=, $0, 9
-; SIMD128-FAST-NEXT: i8x16.extract_lane_u $push37=, $1, 9
-; SIMD128-FAST-NEXT: i32.mul $push39=, $pop38, $pop37
-; SIMD128-FAST-NEXT: i8x16.replace_lane $push40=, $pop36, 9, $pop39
-; SIMD128-FAST-NEXT: i8x16.extract_lane_u $push42=, $0, 10
-; SIMD128-FAST-NEXT: i8x16.extract_lane_u $push41=, $1, 10
-; SIMD128-FAST-NEXT: i32.mul $push43=, $pop42, $pop41
-; SIMD128-FAST-NEXT: i8x16.replace_lane $push44=, $pop40, 10, $pop43
-; SIMD128-FAST-NEXT: i8x16.extract_lane_u $push46=, $0, 11
-; SIMD128-FAST-NEXT: i8x16.extract_lane_u $push45=, $1, 11
-; SIMD128-FAST-NEXT: i32.mul $push47=, $pop46, $pop45
-; SIMD128-FAST-NEXT: i8x16.replace_lane $push48=, $pop44, 11, $pop47
-; SIMD128-FAST-NEXT: i8x16.extract_lane_u $push50=, $0, 12
-; SIMD128-FAST-NEXT: i8x16.extract_lane_u $push49=, $1, 12
-; SIMD128-FAST-NEXT: i32.mul $push51=, $pop50, $pop49
-; SIMD128-FAST-NEXT: i8x16.replace_lane $push52=, $pop48, 12, $pop51
-; SIMD128-FAST-NEXT: i8x16.extract_lane_u $push54=, $0, 13
-; SIMD128-FAST-NEXT: i8x16.extract_lane_u $push53=, $1, 13
-; SIMD128-FAST-NEXT: i32.mul $push55=, $pop54, $pop53
-; SIMD128-FAST-NEXT: i8x16.replace_lane $push56=, $pop52, 13, $pop55
-; SIMD128-FAST-NEXT: i8x16.extract_lane_u $push58=, $0, 14
-; SIMD128-FAST-NEXT: i8x16.extract_lane_u $push57=, $1, 14
-; SIMD128-FAST-NEXT: i32.mul $push59=, $pop58, $pop57
-; SIMD128-FAST-NEXT: i8x16.replace_lane $push60=, $pop56, 14, $pop59
-; SIMD128-FAST-NEXT: i8x16.extract_lane_u $push62=, $0, 15
-; SIMD128-FAST-NEXT: i8x16.extract_lane_u $push61=, $1, 15
-; SIMD128-FAST-NEXT: i32.mul $push63=, $pop62, $pop61
-; SIMD128-FAST-NEXT: i8x16.replace_lane $push0=, $pop60, 15, $pop63
+; SIMD128-FAST-NEXT: i16x8.extmul_low_i8x16_u $push2=, $0, $1
+; SIMD128-FAST-NEXT: i16x8.extmul_high_i8x16_u $push1=, $0, $1
+; SIMD128-FAST-NEXT: i8x16.shuffle $push0=, $pop2, $pop1, 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30
; SIMD128-FAST-NEXT: return $pop0
;
; NO-SIMD128-LABEL: mul_v16i8:
diff --git a/llvm/test/CodeGen/WebAssembly/simd-relaxed-fnma.ll b/llvm/test/CodeGen/WebAssembly/simd-relaxed-fnma.ll
new file mode 100644
index 0000000..6e2d860
--- /dev/null
+++ b/llvm/test/CodeGen/WebAssembly/simd-relaxed-fnma.ll
@@ -0,0 +1,145 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc < %s -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers -mattr=+fp16,+simd128,+relaxed-simd | FileCheck %s --check-prefix=RELAXED
+; RUN: llc < %s -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers -mattr=+fp16,+simd128, | FileCheck %s --check-prefix=STRICT
+
+target triple = "wasm32"
+
+define double @fsub_fmul_contract_f64(double %a, double %b, double %c) {
+; RELAXED-LABEL: fsub_fmul_contract_f64:
+; RELAXED: .functype fsub_fmul_contract_f64 (f64, f64, f64) -> (f64)
+; RELAXED-NEXT: # %bb.0:
+; RELAXED-NEXT: f64.mul $push0=, $1, $0
+; RELAXED-NEXT: f64.sub $push1=, $2, $pop0
+; RELAXED-NEXT: return $pop1
+;
+; STRICT-LABEL: fsub_fmul_contract_f64:
+; STRICT: .functype fsub_fmul_contract_f64 (f64, f64, f64) -> (f64)
+; STRICT-NEXT: # %bb.0:
+; STRICT-NEXT: f64.mul $push0=, $1, $0
+; STRICT-NEXT: f64.sub $push1=, $2, $pop0
+; STRICT-NEXT: return $pop1
+ %mul = fmul contract double %b, %a
+ %sub = fsub contract double %c, %mul
+ ret double %sub
+}
+
+define <4 x float> @fsub_fmul_contract_4xf32(<4 x float> %a, <4 x float> %b, <4 x float> %c) {
+; RELAXED-LABEL: fsub_fmul_contract_4xf32:
+; RELAXED: .functype fsub_fmul_contract_4xf32 (v128, v128, v128) -> (v128)
+; RELAXED-NEXT: # %bb.0:
+; RELAXED-NEXT: f32x4.relaxed_nmadd $push0=, $2, $1, $0
+; RELAXED-NEXT: return $pop0
+;
+; STRICT-LABEL: fsub_fmul_contract_4xf32:
+; STRICT: .functype fsub_fmul_contract_4xf32 (v128, v128, v128) -> (v128)
+; STRICT-NEXT: # %bb.0:
+; STRICT-NEXT: f32x4.mul $push0=, $1, $0
+; STRICT-NEXT: f32x4.sub $push1=, $2, $pop0
+; STRICT-NEXT: return $pop1
+ %mul = fmul contract <4 x float> %b, %a
+ %sub = fsub contract <4 x float> %c, %mul
+ ret <4 x float> %sub
+}
+
+
+define <8 x half> @fsub_fmul_contract_8xf16(<8 x half> %a, <8 x half> %b, <8 x half> %c) {
+; RELAXED-LABEL: fsub_fmul_contract_8xf16:
+; RELAXED: .functype fsub_fmul_contract_8xf16 (v128, v128, v128) -> (v128)
+; RELAXED-NEXT: # %bb.0:
+; RELAXED-NEXT: f16x8.relaxed_nmadd $push0=, $2, $1, $0
+; RELAXED-NEXT: return $pop0
+;
+; STRICT-LABEL: fsub_fmul_contract_8xf16:
+; STRICT: .functype fsub_fmul_contract_8xf16 (v128, v128, v128) -> (v128)
+; STRICT-NEXT: # %bb.0:
+; STRICT-NEXT: f16x8.mul $push0=, $1, $0
+; STRICT-NEXT: f16x8.sub $push1=, $2, $pop0
+; STRICT-NEXT: return $pop1
+ %mul = fmul contract <8 x half> %b, %a
+ %sub = fsub contract <8 x half> %c, %mul
+ ret <8 x half> %sub
+}
+
+
+define <4 x float> @fsub_fmul_4xf32(<4 x float> %a, <4 x float> %b, <4 x float> %c) {
+; RELAXED-LABEL: fsub_fmul_4xf32:
+; RELAXED: .functype fsub_fmul_4xf32 (v128, v128, v128) -> (v128)
+; RELAXED-NEXT: # %bb.0:
+; RELAXED-NEXT: f32x4.mul $push0=, $1, $0
+; RELAXED-NEXT: f32x4.sub $push1=, $2, $pop0
+; RELAXED-NEXT: return $pop1
+;
+; STRICT-LABEL: fsub_fmul_4xf32:
+; STRICT: .functype fsub_fmul_4xf32 (v128, v128, v128) -> (v128)
+; STRICT-NEXT: # %bb.0:
+; STRICT-NEXT: f32x4.mul $push0=, $1, $0
+; STRICT-NEXT: f32x4.sub $push1=, $2, $pop0
+; STRICT-NEXT: return $pop1
+ %mul = fmul <4 x float> %b, %a
+ %sub = fsub contract <4 x float> %c, %mul
+ ret <4 x float> %sub
+}
+
+define <8 x float> @fsub_fmul_contract_8xf32(<8 x float> %a, <8 x float> %b, <8 x float> %c) {
+; RELAXED-LABEL: fsub_fmul_contract_8xf32:
+; RELAXED: .functype fsub_fmul_contract_8xf32 (i32, v128, v128, v128, v128, v128, v128) -> ()
+; RELAXED-NEXT: # %bb.0:
+; RELAXED-NEXT: f32x4.relaxed_nmadd $push0=, $6, $4, $2
+; RELAXED-NEXT: v128.store 16($0), $pop0
+; RELAXED-NEXT: f32x4.relaxed_nmadd $push1=, $5, $3, $1
+; RELAXED-NEXT: v128.store 0($0), $pop1
+; RELAXED-NEXT: return
+;
+; STRICT-LABEL: fsub_fmul_contract_8xf32:
+; STRICT: .functype fsub_fmul_contract_8xf32 (i32, v128, v128, v128, v128, v128, v128) -> ()
+; STRICT-NEXT: # %bb.0:
+; STRICT-NEXT: f32x4.mul $push0=, $4, $2
+; STRICT-NEXT: f32x4.sub $push1=, $6, $pop0
+; STRICT-NEXT: v128.store 16($0), $pop1
+; STRICT-NEXT: f32x4.mul $push2=, $3, $1
+; STRICT-NEXT: f32x4.sub $push3=, $5, $pop2
+; STRICT-NEXT: v128.store 0($0), $pop3
+; STRICT-NEXT: return
+ %mul = fmul contract <8 x float> %b, %a
+ %sub = fsub contract <8 x float> %c, %mul
+ ret <8 x float> %sub
+}
+
+
+define <2 x double> @fsub_fmul_contract_2xf64(<2 x double> %a, <2 x double> %b, <2 x double> %c) {
+; RELAXED-LABEL: fsub_fmul_contract_2xf64:
+; RELAXED: .functype fsub_fmul_contract_2xf64 (v128, v128, v128) -> (v128)
+; RELAXED-NEXT: # %bb.0:
+; RELAXED-NEXT: f64x2.relaxed_nmadd $push0=, $2, $1, $0
+; RELAXED-NEXT: return $pop0
+;
+; STRICT-LABEL: fsub_fmul_contract_2xf64:
+; STRICT: .functype fsub_fmul_contract_2xf64 (v128, v128, v128) -> (v128)
+; STRICT-NEXT: # %bb.0:
+; STRICT-NEXT: f64x2.mul $push0=, $1, $0
+; STRICT-NEXT: f64x2.sub $push1=, $2, $pop0
+; STRICT-NEXT: return $pop1
+ %mul = fmul contract <2 x double> %b, %a
+ %sub = fsub contract <2 x double> %c, %mul
+ ret <2 x double> %sub
+}
+
+define float @fsub_fmul_contract_f32(float %a, float %b, float %c) {
+; RELAXED-LABEL: fsub_fmul_contract_f32:
+; RELAXED: .functype fsub_fmul_contract_f32 (f32, f32, f32) -> (f32)
+; RELAXED-NEXT: # %bb.0:
+; RELAXED-NEXT: f32.mul $push0=, $1, $0
+; RELAXED-NEXT: f32.sub $push1=, $2, $pop0
+; RELAXED-NEXT: return $pop1
+;
+; STRICT-LABEL: fsub_fmul_contract_f32:
+; STRICT: .functype fsub_fmul_contract_f32 (f32, f32, f32) -> (f32)
+; STRICT-NEXT: # %bb.0:
+; STRICT-NEXT: f32.mul $push0=, $1, $0
+; STRICT-NEXT: f32.sub $push1=, $2, $pop0
+; STRICT-NEXT: return $pop1
+ %mul = fmul contract float %b, %a
+ %sub = fsub contract float %c, %mul
+ ret float %sub
+}
+
diff --git a/llvm/test/CodeGen/WebAssembly/vector-reduce.ll b/llvm/test/CodeGen/WebAssembly/vector-reduce.ll
index 1d194b6..4c30a3a 100644
--- a/llvm/test/CodeGen/WebAssembly/vector-reduce.ll
+++ b/llvm/test/CodeGen/WebAssembly/vector-reduce.ll
@@ -116,40 +116,28 @@ define i8 @pairwise_mul_v16i8(<16 x i8> %arg) {
; SIMD128-LABEL: pairwise_mul_v16i8:
; SIMD128: .functype pairwise_mul_v16i8 (v128) -> (i32)
; SIMD128-NEXT: # %bb.0:
-; SIMD128-NEXT: i8x16.extract_lane_u $push26=, $0, 0
-; SIMD128-NEXT: i8x16.shuffle $push32=, $0, $0, 8, 9, 10, 11, 12, 13, 14, 15, 0, 0, 0, 0, 0, 0, 0, 0
-; SIMD128-NEXT: local.tee $push31=, $1=, $pop32
-; SIMD128-NEXT: i8x16.extract_lane_u $push25=, $pop31, 0
-; SIMD128-NEXT: i32.mul $push27=, $pop26, $pop25
-; SIMD128-NEXT: i8x16.extract_lane_u $push23=, $0, 4
-; SIMD128-NEXT: i8x16.extract_lane_u $push22=, $1, 4
-; SIMD128-NEXT: i32.mul $push24=, $pop23, $pop22
-; SIMD128-NEXT: i32.mul $push28=, $pop27, $pop24
-; SIMD128-NEXT: i8x16.extract_lane_u $push19=, $0, 2
-; SIMD128-NEXT: i8x16.extract_lane_u $push18=, $1, 2
-; SIMD128-NEXT: i32.mul $push20=, $pop19, $pop18
-; SIMD128-NEXT: i8x16.extract_lane_u $push16=, $0, 6
-; SIMD128-NEXT: i8x16.extract_lane_u $push15=, $1, 6
-; SIMD128-NEXT: i32.mul $push17=, $pop16, $pop15
-; SIMD128-NEXT: i32.mul $push21=, $pop20, $pop17
-; SIMD128-NEXT: i32.mul $push29=, $pop28, $pop21
-; SIMD128-NEXT: i8x16.extract_lane_u $push11=, $0, 1
-; SIMD128-NEXT: i8x16.extract_lane_u $push10=, $1, 1
-; SIMD128-NEXT: i32.mul $push12=, $pop11, $pop10
-; SIMD128-NEXT: i8x16.extract_lane_u $push8=, $0, 5
-; SIMD128-NEXT: i8x16.extract_lane_u $push7=, $1, 5
-; SIMD128-NEXT: i32.mul $push9=, $pop8, $pop7
-; SIMD128-NEXT: i32.mul $push13=, $pop12, $pop9
-; SIMD128-NEXT: i8x16.extract_lane_u $push4=, $0, 3
-; SIMD128-NEXT: i8x16.extract_lane_u $push3=, $1, 3
-; SIMD128-NEXT: i32.mul $push5=, $pop4, $pop3
-; SIMD128-NEXT: i8x16.extract_lane_u $push1=, $0, 7
-; SIMD128-NEXT: i8x16.extract_lane_u $push0=, $1, 7
-; SIMD128-NEXT: i32.mul $push2=, $pop1, $pop0
-; SIMD128-NEXT: i32.mul $push6=, $pop5, $pop2
-; SIMD128-NEXT: i32.mul $push14=, $pop13, $pop6
-; SIMD128-NEXT: i32.mul $push30=, $pop29, $pop14
-; SIMD128-NEXT: return $pop30
+; SIMD128-NEXT: i8x16.shuffle $push20=, $0, $0, 8, 9, 10, 11, 12, 13, 14, 15, 0, 0, 0, 0, 0, 0, 0, 0
+; SIMD128-NEXT: local.tee $push19=, $1=, $pop20
+; SIMD128-NEXT: i16x8.extmul_low_i8x16_u $push1=, $0, $pop19
+; SIMD128-NEXT: i16x8.extmul_high_i8x16_u $push0=, $0, $1
+; SIMD128-NEXT: i8x16.shuffle $push18=, $pop1, $pop0, 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30
+; SIMD128-NEXT: local.tee $push17=, $0=, $pop18
+; SIMD128-NEXT: i8x16.shuffle $push16=, $0, $0, 4, 5, 6, 7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+; SIMD128-NEXT: local.tee $push15=, $1=, $pop16
+; SIMD128-NEXT: i16x8.extmul_low_i8x16_u $push3=, $pop17, $pop15
+; SIMD128-NEXT: i16x8.extmul_high_i8x16_u $push2=, $0, $1
+; SIMD128-NEXT: i8x16.shuffle $push14=, $pop3, $pop2, 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30
+; SIMD128-NEXT: local.tee $push13=, $0=, $pop14
+; SIMD128-NEXT: i8x16.shuffle $push12=, $0, $0, 2, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+; SIMD128-NEXT: local.tee $push11=, $1=, $pop12
+; SIMD128-NEXT: i16x8.extmul_low_i8x16_u $push5=, $pop13, $pop11
+; SIMD128-NEXT: i16x8.extmul_high_i8x16_u $push4=, $0, $1
+; SIMD128-NEXT: i8x16.shuffle $push10=, $pop5, $pop4, 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30
+; SIMD128-NEXT: local.tee $push9=, $0=, $pop10
+; SIMD128-NEXT: i8x16.shuffle $push6=, $0, $0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+; SIMD128-NEXT: i16x8.extmul_low_i8x16_u $push7=, $pop9, $pop6
+; SIMD128-NEXT: i8x16.extract_lane_u $push8=, $pop7, 0
+; SIMD128-NEXT: return $pop8
%res = tail call i8 @llvm.vector.reduce.mul.v16i8(<16 x i8> %arg)
ret i8 %res
}
diff --git a/llvm/test/CodeGen/X86/embed-bitcode.ll b/llvm/test/CodeGen/X86/embed-bitcode.ll
index 0d66ba8..d4af954 100644
--- a/llvm/test/CodeGen/X86/embed-bitcode.ll
+++ b/llvm/test/CodeGen/X86/embed-bitcode.ll
@@ -1,10 +1,23 @@
; RUN: llc -filetype=obj -mtriple=x86_64 %s -o %t
; RUN: llvm-readelf -S %t | FileCheck %s
+; RUN: llc -filetype=obj -mtriple=x86_64-pc-windows-msvc %s -o %t
+; RUN: llvm-readobj -S %t | FileCheck %s --check-prefix=COFF
; CHECK: .text PROGBITS 0000000000000000 [[#%x,OFF:]] 000000 00 AX 0
; CHECK-NEXT: .llvmbc PROGBITS 0000000000000000 [[#%x,OFF:]] 000004 00 0
; CHECK-NEXT: .llvmcmd PROGBITS 0000000000000000 [[#%x,OFF:]] 000005 00 0
+; COFF: Name: .llvmbc (2E 6C 6C 76 6D 62 63 00)
+; COFF: Characteristics [
+; COFF-NEXT: IMAGE_SCN_ALIGN_1BYTES
+; COFF-NEXT: IMAGE_SCN_MEM_DISCARDABLE
+; COFF-NEXT: ]
+; COFF: Name: .llvmcmd (2E 6C 6C 76 6D 63 6D 64)
+; COFF: Characteristics [
+; COFF-NEXT: IMAGE_SCN_ALIGN_1BYTES
+; COFF-NEXT: IMAGE_SCN_MEM_DISCARDABLE
+; COFF-NEXT: ]
+
@llvm.embedded.module = private constant [4 x i8] c"BC\C0\DE", section ".llvmbc", align 1
@llvm.cmdline = private constant [5 x i8] c"-cc1\00", section ".llvmcmd", align 1
@llvm.compiler.used = appending global [2 x ptr] [ptr @llvm.embedded.module, ptr @llvm.cmdline], section "llvm.metadata"
diff --git a/llvm/test/CodeGen/X86/isel-fpclass.ll b/llvm/test/CodeGen/X86/isel-fpclass.ll
new file mode 100644
index 0000000..960bbf5
--- /dev/null
+++ b/llvm/test/CodeGen/X86/isel-fpclass.ll
@@ -0,0 +1,526 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc < %s -mtriple=i686-linux | FileCheck %s -check-prefixes=X86-SDAGISEL
+; RUN: llc < %s -mtriple=x86_64-linux | FileCheck %s -check-prefixes=X64,X64-SDAGISEL
+; RUN: llc < %s -mtriple=i686-linux -fast-isel -fast-isel-abort=1 | FileCheck %s -check-prefixes=X86-FASTISEL
+; RUN: llc < %s -mtriple=x86_64-linux -fast-isel -fast-isel-abort=1 | FileCheck %s -check-prefixes=X64,X64-FASTISEL
+
+; FIXME: We can reuse/delete llvm/test/CodeGen/X86/is_fpclass.ll when all patches are included.
+
+define i1 @isnone_f(float %x) {
+; X86-SDAGISEL-LABEL: isnone_f:
+; X86-SDAGISEL: # %bb.0: # %entry
+; X86-SDAGISEL-NEXT: xorl %eax, %eax
+; X86-SDAGISEL-NEXT: retl
+;
+; X64-LABEL: isnone_f:
+; X64: # %bb.0: # %entry
+; X64-NEXT: xorl %eax, %eax
+; X64-NEXT: retq
+;
+; X86-FASTISEL-LABEL: isnone_f:
+; X86-FASTISEL: # %bb.0: # %entry
+; X86-FASTISEL-NEXT: flds {{[0-9]+}}(%esp)
+; X86-FASTISEL-NEXT: fstp %st(0)
+; X86-FASTISEL-NEXT: xorl %eax, %eax
+; X86-FASTISEL-NEXT: retl
+entry:
+ %0 = tail call i1 @llvm.is.fpclass.f32(float %x, i32 0)
+ ret i1 %0
+}
+
+define i1 @isany_f(float %x) {
+; X86-SDAGISEL-LABEL: isany_f:
+; X86-SDAGISEL: # %bb.0: # %entry
+; X86-SDAGISEL-NEXT: movb $1, %al
+; X86-SDAGISEL-NEXT: retl
+;
+; X64-LABEL: isany_f:
+; X64: # %bb.0: # %entry
+; X64-NEXT: movb $1, %al
+; X64-NEXT: retq
+;
+; X86-FASTISEL-LABEL: isany_f:
+; X86-FASTISEL: # %bb.0: # %entry
+; X86-FASTISEL-NEXT: flds {{[0-9]+}}(%esp)
+; X86-FASTISEL-NEXT: fstp %st(0)
+; X86-FASTISEL-NEXT: movb $1, %al
+; X86-FASTISEL-NEXT: retl
+entry:
+ %0 = tail call i1 @llvm.is.fpclass.f32(float %x, i32 1023)
+ ret i1 %0
+}
+
+define i1 @issignaling_f(float %x) {
+; X86-SDAGISEL-LABEL: issignaling_f:
+; X86-SDAGISEL: # %bb.0:
+; X86-SDAGISEL-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF
+; X86-SDAGISEL-NEXT: andl {{[0-9]+}}(%esp), %eax
+; X86-SDAGISEL-NEXT: cmpl $2143289344, %eax # imm = 0x7FC00000
+; X86-SDAGISEL-NEXT: setl %cl
+; X86-SDAGISEL-NEXT: cmpl $2139095041, %eax # imm = 0x7F800001
+; X86-SDAGISEL-NEXT: setge %al
+; X86-SDAGISEL-NEXT: andb %cl, %al
+; X86-SDAGISEL-NEXT: retl
+;
+; X64-LABEL: issignaling_f:
+; X64: # %bb.0:
+; X64-NEXT: movd %xmm0, %eax
+; X64-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF
+; X64-NEXT: cmpl $2143289344, %eax # imm = 0x7FC00000
+; X64-NEXT: setl %cl
+; X64-NEXT: cmpl $2139095041, %eax # imm = 0x7F800001
+; X64-NEXT: setge %al
+; X64-NEXT: andb %cl, %al
+; X64-NEXT: retq
+;
+; X86-FASTISEL-LABEL: issignaling_f:
+; X86-FASTISEL: # %bb.0:
+; X86-FASTISEL-NEXT: pushl %eax
+; X86-FASTISEL-NEXT: .cfi_def_cfa_offset 8
+; X86-FASTISEL-NEXT: flds {{[0-9]+}}(%esp)
+; X86-FASTISEL-NEXT: fstps (%esp)
+; X86-FASTISEL-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF
+; X86-FASTISEL-NEXT: andl (%esp), %eax
+; X86-FASTISEL-NEXT: cmpl $2143289344, %eax # imm = 0x7FC00000
+; X86-FASTISEL-NEXT: setl %cl
+; X86-FASTISEL-NEXT: cmpl $2139095041, %eax # imm = 0x7F800001
+; X86-FASTISEL-NEXT: setge %al
+; X86-FASTISEL-NEXT: andb %cl, %al
+; X86-FASTISEL-NEXT: popl %ecx
+; X86-FASTISEL-NEXT: .cfi_def_cfa_offset 4
+; X86-FASTISEL-NEXT: retl
+ %a0 = tail call i1 @llvm.is.fpclass.f32(float %x, i32 1) ; "snan"
+ ret i1 %a0
+}
+
+ define i1 @isquiet_f(float %x) {
+; X86-SDAGISEL-LABEL: isquiet_f:
+; X86-SDAGISEL: # %bb.0: # %entry
+; X86-SDAGISEL-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF
+; X86-SDAGISEL-NEXT: andl {{[0-9]+}}(%esp), %eax
+; X86-SDAGISEL-NEXT: cmpl $2143289344, %eax # imm = 0x7FC00000
+; X86-SDAGISEL-NEXT: setge %al
+; X86-SDAGISEL-NEXT: retl
+;
+; X64-LABEL: isquiet_f:
+; X64: # %bb.0: # %entry
+; X64-NEXT: movd %xmm0, %eax
+; X64-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF
+; X64-NEXT: cmpl $2143289344, %eax # imm = 0x7FC00000
+; X64-NEXT: setge %al
+; X64-NEXT: retq
+;
+; X86-FASTISEL-LABEL: isquiet_f:
+; X86-FASTISEL: # %bb.0: # %entry
+; X86-FASTISEL-NEXT: pushl %eax
+; X86-FASTISEL-NEXT: .cfi_def_cfa_offset 8
+; X86-FASTISEL-NEXT: flds {{[0-9]+}}(%esp)
+; X86-FASTISEL-NEXT: fstps (%esp)
+; X86-FASTISEL-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF
+; X86-FASTISEL-NEXT: andl (%esp), %eax
+; X86-FASTISEL-NEXT: cmpl $2143289344, %eax # imm = 0x7FC00000
+; X86-FASTISEL-NEXT: setge %al
+; X86-FASTISEL-NEXT: popl %ecx
+; X86-FASTISEL-NEXT: .cfi_def_cfa_offset 4
+; X86-FASTISEL-NEXT: retl
+ entry:
+ %0 = tail call i1 @llvm.is.fpclass.f32(float %x, i32 2) ; "qnan"
+ ret i1 %0
+}
+
+define i1 @not_isquiet_f(float %x) {
+; X86-SDAGISEL-LABEL: not_isquiet_f:
+; X86-SDAGISEL: # %bb.0: # %entry
+; X86-SDAGISEL-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF
+; X86-SDAGISEL-NEXT: andl {{[0-9]+}}(%esp), %eax
+; X86-SDAGISEL-NEXT: cmpl $2143289344, %eax # imm = 0x7FC00000
+; X86-SDAGISEL-NEXT: setl %al
+; X86-SDAGISEL-NEXT: retl
+;
+; X64-LABEL: not_isquiet_f:
+; X64: # %bb.0: # %entry
+; X64-NEXT: movd %xmm0, %eax
+; X64-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF
+; X64-NEXT: cmpl $2143289344, %eax # imm = 0x7FC00000
+; X64-NEXT: setl %al
+; X64-NEXT: retq
+;
+; X86-FASTISEL-LABEL: not_isquiet_f:
+; X86-FASTISEL: # %bb.0: # %entry
+; X86-FASTISEL-NEXT: pushl %eax
+; X86-FASTISEL-NEXT: .cfi_def_cfa_offset 8
+; X86-FASTISEL-NEXT: flds {{[0-9]+}}(%esp)
+; X86-FASTISEL-NEXT: fstps (%esp)
+; X86-FASTISEL-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF
+; X86-FASTISEL-NEXT: andl (%esp), %eax
+; X86-FASTISEL-NEXT: cmpl $2143289344, %eax # imm = 0x7FC00000
+; X86-FASTISEL-NEXT: setl %al
+; X86-FASTISEL-NEXT: popl %ecx
+; X86-FASTISEL-NEXT: .cfi_def_cfa_offset 4
+; X86-FASTISEL-NEXT: retl
+entry:
+ %0 = tail call i1 @llvm.is.fpclass.f32(float %x, i32 1021) ; ~"qnan"
+ ret i1 %0
+}
+
+define i1 @isinf_f(float %x) {
+; X86-SDAGISEL-LABEL: isinf_f:
+; X86-SDAGISEL: # %bb.0: # %entry
+; X86-SDAGISEL-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF
+; X86-SDAGISEL-NEXT: andl {{[0-9]+}}(%esp), %eax
+; X86-SDAGISEL-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000
+; X86-SDAGISEL-NEXT: sete %al
+; X86-SDAGISEL-NEXT: retl
+;
+; X64-LABEL: isinf_f:
+; X64: # %bb.0: # %entry
+; X64-NEXT: movd %xmm0, %eax
+; X64-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF
+; X64-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000
+; X64-NEXT: sete %al
+; X64-NEXT: retq
+;
+; X86-FASTISEL-LABEL: isinf_f:
+; X86-FASTISEL: # %bb.0: # %entry
+; X86-FASTISEL-NEXT: pushl %eax
+; X86-FASTISEL-NEXT: .cfi_def_cfa_offset 8
+; X86-FASTISEL-NEXT: flds {{[0-9]+}}(%esp)
+; X86-FASTISEL-NEXT: fstps (%esp)
+; X86-FASTISEL-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF
+; X86-FASTISEL-NEXT: andl (%esp), %eax
+; X86-FASTISEL-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000
+; X86-FASTISEL-NEXT: sete %al
+; X86-FASTISEL-NEXT: popl %ecx
+; X86-FASTISEL-NEXT: .cfi_def_cfa_offset 4
+; X86-FASTISEL-NEXT: retl
+entry:
+ %0 = tail call i1 @llvm.is.fpclass.f32(float %x, i32 516) ; 0x204 = "inf"
+ ret i1 %0
+}
+
+define i1 @not_isinf_f(float %x) {
+; X86-SDAGISEL-LABEL: not_isinf_f:
+; X86-SDAGISEL: # %bb.0: # %entry
+; X86-SDAGISEL-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF
+; X86-SDAGISEL-NEXT: andl {{[0-9]+}}(%esp), %eax
+; X86-SDAGISEL-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000
+; X86-SDAGISEL-NEXT: setne %al
+; X86-SDAGISEL-NEXT: retl
+;
+; X64-LABEL: not_isinf_f:
+; X64: # %bb.0: # %entry
+; X64-NEXT: movd %xmm0, %eax
+; X64-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF
+; X64-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000
+; X64-NEXT: setne %al
+; X64-NEXT: retq
+;
+; X86-FASTISEL-LABEL: not_isinf_f:
+; X86-FASTISEL: # %bb.0: # %entry
+; X86-FASTISEL-NEXT: pushl %eax
+; X86-FASTISEL-NEXT: .cfi_def_cfa_offset 8
+; X86-FASTISEL-NEXT: flds {{[0-9]+}}(%esp)
+; X86-FASTISEL-NEXT: fstps (%esp)
+; X86-FASTISEL-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF
+; X86-FASTISEL-NEXT: andl (%esp), %eax
+; X86-FASTISEL-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000
+; X86-FASTISEL-NEXT: setne %al
+; X86-FASTISEL-NEXT: popl %ecx
+; X86-FASTISEL-NEXT: .cfi_def_cfa_offset 4
+; X86-FASTISEL-NEXT: retl
+entry:
+ %0 = tail call i1 @llvm.is.fpclass.f32(float %x, i32 507) ; ~0x204 = "~inf"
+ ret i1 %0
+}
+
+define i1 @is_plus_inf_f(float %x) {
+; X86-SDAGISEL-LABEL: is_plus_inf_f:
+; X86-SDAGISEL: # %bb.0: # %entry
+; X86-SDAGISEL-NEXT: cmpl $2139095040, {{[0-9]+}}(%esp) # imm = 0x7F800000
+; X86-SDAGISEL-NEXT: sete %al
+; X86-SDAGISEL-NEXT: retl
+;
+; X64-LABEL: is_plus_inf_f:
+; X64: # %bb.0: # %entry
+; X64-NEXT: movd %xmm0, %eax
+; X64-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000
+; X64-NEXT: sete %al
+; X64-NEXT: retq
+;
+; X86-FASTISEL-LABEL: is_plus_inf_f:
+; X86-FASTISEL: # %bb.0: # %entry
+; X86-FASTISEL-NEXT: pushl %eax
+; X86-FASTISEL-NEXT: .cfi_def_cfa_offset 8
+; X86-FASTISEL-NEXT: flds {{[0-9]+}}(%esp)
+; X86-FASTISEL-NEXT: fstps (%esp)
+; X86-FASTISEL-NEXT: cmpl $2139095040, (%esp) # imm = 0x7F800000
+; X86-FASTISEL-NEXT: sete %al
+; X86-FASTISEL-NEXT: popl %ecx
+; X86-FASTISEL-NEXT: .cfi_def_cfa_offset 4
+; X86-FASTISEL-NEXT: retl
+entry:
+ %0 = tail call i1 @llvm.is.fpclass.f32(float %x, i32 512) ; 0x200 = "+inf"
+ ret i1 %0
+}
+
+define i1 @is_minus_inf_f(float %x) {
+; X86-SDAGISEL-LABEL: is_minus_inf_f:
+; X86-SDAGISEL: # %bb.0: # %entry
+; X86-SDAGISEL-NEXT: cmpl $-8388608, {{[0-9]+}}(%esp) # imm = 0xFF800000
+; X86-SDAGISEL-NEXT: sete %al
+; X86-SDAGISEL-NEXT: retl
+;
+; X64-LABEL: is_minus_inf_f:
+; X64: # %bb.0: # %entry
+; X64-NEXT: movd %xmm0, %eax
+; X64-NEXT: cmpl $-8388608, %eax # imm = 0xFF800000
+; X64-NEXT: sete %al
+; X64-NEXT: retq
+;
+; X86-FASTISEL-LABEL: is_minus_inf_f:
+; X86-FASTISEL: # %bb.0: # %entry
+; X86-FASTISEL-NEXT: pushl %eax
+; X86-FASTISEL-NEXT: .cfi_def_cfa_offset 8
+; X86-FASTISEL-NEXT: flds {{[0-9]+}}(%esp)
+; X86-FASTISEL-NEXT: fstps (%esp)
+; X86-FASTISEL-NEXT: cmpl $-8388608, (%esp) # imm = 0xFF800000
+; X86-FASTISEL-NEXT: sete %al
+; X86-FASTISEL-NEXT: popl %ecx
+; X86-FASTISEL-NEXT: .cfi_def_cfa_offset 4
+; X86-FASTISEL-NEXT: retl
+entry:
+ %0 = tail call i1 @llvm.is.fpclass.f32(float %x, i32 4) ; "-inf"
+ ret i1 %0
+}
+
+define i1 @not_is_minus_inf_f(float %x) {
+; X86-SDAGISEL-LABEL: not_is_minus_inf_f:
+; X86-SDAGISEL: # %bb.0: # %entry
+; X86-SDAGISEL-NEXT: cmpl $-8388608, {{[0-9]+}}(%esp) # imm = 0xFF800000
+; X86-SDAGISEL-NEXT: setne %al
+; X86-SDAGISEL-NEXT: retl
+;
+; X64-LABEL: not_is_minus_inf_f:
+; X64: # %bb.0: # %entry
+; X64-NEXT: movd %xmm0, %eax
+; X64-NEXT: cmpl $-8388608, %eax # imm = 0xFF800000
+; X64-NEXT: setne %al
+; X64-NEXT: retq
+;
+; X86-FASTISEL-LABEL: not_is_minus_inf_f:
+; X86-FASTISEL: # %bb.0: # %entry
+; X86-FASTISEL-NEXT: pushl %eax
+; X86-FASTISEL-NEXT: .cfi_def_cfa_offset 8
+; X86-FASTISEL-NEXT: flds {{[0-9]+}}(%esp)
+; X86-FASTISEL-NEXT: fstps (%esp)
+; X86-FASTISEL-NEXT: cmpl $-8388608, (%esp) # imm = 0xFF800000
+; X86-FASTISEL-NEXT: setne %al
+; X86-FASTISEL-NEXT: popl %ecx
+; X86-FASTISEL-NEXT: .cfi_def_cfa_offset 4
+; X86-FASTISEL-NEXT: retl
+entry:
+ %0 = tail call i1 @llvm.is.fpclass.f32(float %x, i32 1019) ; ~"-inf"
+ ret i1 %0
+}
+
+define i1 @isfinite_f(float %x) {
+; X86-SDAGISEL-LABEL: isfinite_f:
+; X86-SDAGISEL: # %bb.0: # %entry
+; X86-SDAGISEL-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF
+; X86-SDAGISEL-NEXT: andl {{[0-9]+}}(%esp), %eax
+; X86-SDAGISEL-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000
+; X86-SDAGISEL-NEXT: setl %al
+; X86-SDAGISEL-NEXT: retl
+;
+; X64-LABEL: isfinite_f:
+; X64: # %bb.0: # %entry
+; X64-NEXT: movd %xmm0, %eax
+; X64-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF
+; X64-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000
+; X64-NEXT: setl %al
+; X64-NEXT: retq
+;
+; X86-FASTISEL-LABEL: isfinite_f:
+; X86-FASTISEL: # %bb.0: # %entry
+; X86-FASTISEL-NEXT: pushl %eax
+; X86-FASTISEL-NEXT: .cfi_def_cfa_offset 8
+; X86-FASTISEL-NEXT: flds {{[0-9]+}}(%esp)
+; X86-FASTISEL-NEXT: fstps (%esp)
+; X86-FASTISEL-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF
+; X86-FASTISEL-NEXT: andl (%esp), %eax
+; X86-FASTISEL-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000
+; X86-FASTISEL-NEXT: setl %al
+; X86-FASTISEL-NEXT: popl %ecx
+; X86-FASTISEL-NEXT: .cfi_def_cfa_offset 4
+; X86-FASTISEL-NEXT: retl
+entry:
+ %0 = tail call i1 @llvm.is.fpclass.f32(float %x, i32 504) ; 0x1f8 = "finite"
+ ret i1 %0
+}
+
+define i1 @not_isfinite_f(float %x) {
+; X86-SDAGISEL-LABEL: not_isfinite_f:
+; X86-SDAGISEL: # %bb.0: # %entry
+; X86-SDAGISEL-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF
+; X86-SDAGISEL-NEXT: andl {{[0-9]+}}(%esp), %eax
+; X86-SDAGISEL-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000
+; X86-SDAGISEL-NEXT: setge %al
+; X86-SDAGISEL-NEXT: retl
+;
+; X64-LABEL: not_isfinite_f:
+; X64: # %bb.0: # %entry
+; X64-NEXT: movd %xmm0, %eax
+; X64-NEXT: andl $2147483647, %eax # imm = 0x7FFFFFFF
+; X64-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000
+; X64-NEXT: setge %al
+; X64-NEXT: retq
+;
+; X86-FASTISEL-LABEL: not_isfinite_f:
+; X86-FASTISEL: # %bb.0: # %entry
+; X86-FASTISEL-NEXT: pushl %eax
+; X86-FASTISEL-NEXT: .cfi_def_cfa_offset 8
+; X86-FASTISEL-NEXT: flds {{[0-9]+}}(%esp)
+; X86-FASTISEL-NEXT: fstps (%esp)
+; X86-FASTISEL-NEXT: movl $2147483647, %eax # imm = 0x7FFFFFFF
+; X86-FASTISEL-NEXT: andl (%esp), %eax
+; X86-FASTISEL-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000
+; X86-FASTISEL-NEXT: setge %al
+; X86-FASTISEL-NEXT: popl %ecx
+; X86-FASTISEL-NEXT: .cfi_def_cfa_offset 4
+; X86-FASTISEL-NEXT: retl
+entry:
+ %0 = tail call i1 @llvm.is.fpclass.f32(float %x, i32 519) ; ~0x1f8 = "~finite"
+ ret i1 %0
+}
+
+define i1 @is_plus_finite_f(float %x) {
+; X86-SDAGISEL-LABEL: is_plus_finite_f:
+; X86-SDAGISEL: # %bb.0: # %entry
+; X86-SDAGISEL-NEXT: cmpl $2139095040, {{[0-9]+}}(%esp) # imm = 0x7F800000
+; X86-SDAGISEL-NEXT: setb %al
+; X86-SDAGISEL-NEXT: retl
+;
+; X64-LABEL: is_plus_finite_f:
+; X64: # %bb.0: # %entry
+; X64-NEXT: movd %xmm0, %eax
+; X64-NEXT: cmpl $2139095040, %eax # imm = 0x7F800000
+; X64-NEXT: setb %al
+; X64-NEXT: retq
+;
+; X86-FASTISEL-LABEL: is_plus_finite_f:
+; X86-FASTISEL: # %bb.0: # %entry
+; X86-FASTISEL-NEXT: pushl %eax
+; X86-FASTISEL-NEXT: .cfi_def_cfa_offset 8
+; X86-FASTISEL-NEXT: flds {{[0-9]+}}(%esp)
+; X86-FASTISEL-NEXT: fstps (%esp)
+; X86-FASTISEL-NEXT: cmpl $2139095040, (%esp) # imm = 0x7F800000
+; X86-FASTISEL-NEXT: setb %al
+; X86-FASTISEL-NEXT: popl %ecx
+; X86-FASTISEL-NEXT: .cfi_def_cfa_offset 4
+; X86-FASTISEL-NEXT: retl
+entry:
+ %0 = tail call i1 @llvm.is.fpclass.f32(float %x, i32 448) ; 0x1c0 = "+finite"
+ ret i1 %0
+}
+
+define i1 @isnone_d(double %x) nounwind {
+; X86-SDAGISEL-LABEL: isnone_d:
+; X86-SDAGISEL: # %bb.0: # %entry
+; X86-SDAGISEL-NEXT: xorl %eax, %eax
+; X86-SDAGISEL-NEXT: retl
+;
+; X64-LABEL: isnone_d:
+; X64: # %bb.0: # %entry
+; X64-NEXT: xorl %eax, %eax
+; X64-NEXT: retq
+;
+; X86-FASTISEL-LABEL: isnone_d:
+; X86-FASTISEL: # %bb.0: # %entry
+; X86-FASTISEL-NEXT: fldl {{[0-9]+}}(%esp)
+; X86-FASTISEL-NEXT: fstp %st(0)
+; X86-FASTISEL-NEXT: xorl %eax, %eax
+; X86-FASTISEL-NEXT: retl
+entry:
+ %0 = tail call i1 @llvm.is.fpclass.f64(double %x, i32 0)
+ ret i1 %0
+}
+
+define i1 @isany_d(double %x) nounwind {
+; X86-SDAGISEL-LABEL: isany_d:
+; X86-SDAGISEL: # %bb.0: # %entry
+; X86-SDAGISEL-NEXT: movb $1, %al
+; X86-SDAGISEL-NEXT: retl
+;
+; X64-LABEL: isany_d:
+; X64: # %bb.0: # %entry
+; X64-NEXT: movb $1, %al
+; X64-NEXT: retq
+;
+; X86-FASTISEL-LABEL: isany_d:
+; X86-FASTISEL: # %bb.0: # %entry
+; X86-FASTISEL-NEXT: fldl {{[0-9]+}}(%esp)
+; X86-FASTISEL-NEXT: fstp %st(0)
+; X86-FASTISEL-NEXT: movb $1, %al
+; X86-FASTISEL-NEXT: retl
+entry:
+ %0 = tail call i1 @llvm.is.fpclass.f64(double %x, i32 1023)
+ ret i1 %0
+}
+
+define i1 @isnone_f80(x86_fp80 %x) nounwind {
+; X86-SDAGISEL-LABEL: isnone_f80:
+; X86-SDAGISEL: # %bb.0: # %entry
+; X86-SDAGISEL-NEXT: xorl %eax, %eax
+; X86-SDAGISEL-NEXT: retl
+;
+; X64-SDAGISEL-LABEL: isnone_f80:
+; X64-SDAGISEL: # %bb.0: # %entry
+; X64-SDAGISEL-NEXT: xorl %eax, %eax
+; X64-SDAGISEL-NEXT: retq
+;
+; X86-FASTISEL-LABEL: isnone_f80:
+; X86-FASTISEL: # %bb.0: # %entry
+; X86-FASTISEL-NEXT: fldt {{[0-9]+}}(%esp)
+; X86-FASTISEL-NEXT: fstp %st(0)
+; X86-FASTISEL-NEXT: xorl %eax, %eax
+; X86-FASTISEL-NEXT: retl
+;
+; X64-FASTISEL-LABEL: isnone_f80:
+; X64-FASTISEL: # %bb.0: # %entry
+; X64-FASTISEL-NEXT: fldt {{[0-9]+}}(%rsp)
+; X64-FASTISEL-NEXT: fstp %st(0)
+; X64-FASTISEL-NEXT: xorl %eax, %eax
+; X64-FASTISEL-NEXT: retq
+entry:
+%0 = tail call i1 @llvm.is.fpclass.f80(x86_fp80 %x, i32 0)
+ret i1 %0
+}
+
+define i1 @isany_f80(x86_fp80 %x) nounwind {
+; X86-SDAGISEL-LABEL: isany_f80:
+; X86-SDAGISEL: # %bb.0: # %entry
+; X86-SDAGISEL-NEXT: movb $1, %al
+; X86-SDAGISEL-NEXT: retl
+;
+; X64-SDAGISEL-LABEL: isany_f80:
+; X64-SDAGISEL: # %bb.0: # %entry
+; X64-SDAGISEL-NEXT: movb $1, %al
+; X64-SDAGISEL-NEXT: retq
+;
+; X86-FASTISEL-LABEL: isany_f80:
+; X86-FASTISEL: # %bb.0: # %entry
+; X86-FASTISEL-NEXT: fldt {{[0-9]+}}(%esp)
+; X86-FASTISEL-NEXT: fstp %st(0)
+; X86-FASTISEL-NEXT: movb $1, %al
+; X86-FASTISEL-NEXT: retl
+;
+; X64-FASTISEL-LABEL: isany_f80:
+; X64-FASTISEL: # %bb.0: # %entry
+; X64-FASTISEL-NEXT: fldt {{[0-9]+}}(%rsp)
+; X64-FASTISEL-NEXT: fstp %st(0)
+; X64-FASTISEL-NEXT: movb $1, %al
+; X64-FASTISEL-NEXT: retq
+entry:
+ %0 = tail call i1 @llvm.is.fpclass.f80(x86_fp80 %x, i32 1023)
+ ret i1 %0
+}
diff --git a/llvm/test/CodeGen/X86/late-tail-dup-computed-goto.mir b/llvm/test/CodeGen/X86/late-tail-dup-computed-goto.mir
new file mode 100644
index 0000000..e272e7e
--- /dev/null
+++ b/llvm/test/CodeGen/X86/late-tail-dup-computed-goto.mir
@@ -0,0 +1,128 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5
+# RUN: llc -mtriple=x86_64-unknown-linux-gnu -run-pass=tailduplication -tail-dup-pred-size=1 -tail-dup-succ-size=1 %s -o - | FileCheck %s
+#
+# Check that only the computed gotos are duplicated aggressively.
+#
+--- |
+ @computed_goto.dispatch = constant [5 x ptr] [ptr null, ptr blockaddress(@computed_goto, %bb1), ptr blockaddress(@computed_goto, %bb2), ptr blockaddress(@computed_goto, %bb3), ptr blockaddress(@computed_goto, %bb4)]
+ declare i64 @f0()
+ declare i64 @f1()
+ declare i64 @f2()
+ declare i64 @f3()
+ declare i64 @f4()
+ declare i64 @f5()
+ define void @computed_goto() {
+ start:
+ ret void
+ bb1:
+ ret void
+ bb2:
+ ret void
+ bb3:
+ ret void
+ bb4:
+ ret void
+ }
+ define void @jump_table() { ret void }
+ define void @jump_table_pic() { ret void }
+...
+---
+name: computed_goto
+alignment: 1
+exposesReturnsTwice: false
+legalized: false
+regBankSelected: false
+selected: false
+failedISel: false
+tracksRegLiveness: true
+body: |
+ ; CHECK-LABEL: name: computed_goto
+ ; CHECK: bb.0:
+ ; CHECK-NEXT: successors: %bb.1(0x20000000), %bb.2(0x20000000), %bb.3(0x20000000), %bb.4(0x20000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: CALL64pcrel32 target-flags(x86-plt) @f0, csr_64, implicit $rsp, implicit $ssp, implicit-def $rsp, implicit-def $ssp, implicit-def $rax
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:gr64 = COPY $rax
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gr64_nosp = COPY [[COPY]]
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gr64_nosp = COPY [[COPY1]]
+ ; CHECK-NEXT: JMP64m $noreg, 8, [[COPY2]], @computed_goto.dispatch, $noreg
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.1.bb1 (ir-block-address-taken %ir-block.bb1):
+ ; CHECK-NEXT: successors: %bb.1(0x20000000), %bb.2(0x20000000), %bb.3(0x20000000), %bb.4(0x20000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: CALL64pcrel32 target-flags(x86-plt) @f1, csr_64, implicit $rsp, implicit $ssp, implicit-def $rsp, implicit-def $ssp, implicit-def $rax
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:gr64 = COPY $rax
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gr64_nosp = COPY [[COPY3]]
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gr64_nosp = COPY [[COPY1]]
+ ; CHECK-NEXT: JMP64m $noreg, 8, [[COPY2]], @computed_goto.dispatch, $noreg
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.2.bb2 (ir-block-address-taken %ir-block.bb2):
+ ; CHECK-NEXT: successors: %bb.1(0x20000000), %bb.2(0x20000000), %bb.3(0x20000000), %bb.4(0x20000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: CALL64pcrel32 target-flags(x86-plt) @f2, csr_64, implicit $rsp, implicit $ssp, implicit-def $rsp, implicit-def $ssp, implicit-def $rax
+ ; CHECK-NEXT: [[COPY4:%[0-9]+]]:gr64 = COPY $rax
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gr64_nosp = COPY [[COPY4]]
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gr64_nosp = COPY [[COPY1]]
+ ; CHECK-NEXT: JMP64m $noreg, 8, [[COPY2]], @computed_goto.dispatch, $noreg
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.3.bb3 (ir-block-address-taken %ir-block.bb3):
+ ; CHECK-NEXT: successors: %bb.1(0x20000000), %bb.2(0x20000000), %bb.3(0x20000000), %bb.4(0x20000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: CALL64pcrel32 target-flags(x86-plt) @f3, csr_64, implicit $rsp, implicit $ssp, implicit-def $rsp, implicit-def $ssp, implicit-def $rax
+ ; CHECK-NEXT: [[COPY5:%[0-9]+]]:gr64 = COPY $rax
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gr64_nosp = COPY [[COPY5]]
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gr64_nosp = COPY [[COPY1]]
+ ; CHECK-NEXT: JMP64m $noreg, 8, [[COPY2]], @computed_goto.dispatch, $noreg
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.4.bb4 (ir-block-address-taken %ir-block.bb4):
+ ; CHECK-NEXT: successors: %bb.1(0x20000000), %bb.2(0x20000000), %bb.3(0x20000000), %bb.4(0x20000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: CALL64pcrel32 target-flags(x86-plt) @f4, csr_64, implicit $rsp, implicit $ssp, implicit-def $rsp, implicit-def $ssp, implicit-def $rax
+ ; CHECK-NEXT: [[COPY6:%[0-9]+]]:gr64 = COPY $rax
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gr64_nosp = COPY [[COPY6]]
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gr64_nosp = COPY [[COPY1]]
+ ; CHECK-NEXT: JMP64m $noreg, 8, [[COPY2]], @computed_goto.dispatch, $noreg
+ bb.0:
+ successors: %bb.5(0x80000000)
+
+ CALL64pcrel32 target-flags(x86-plt) @f0, csr_64, implicit $rsp, implicit $ssp, implicit-def $rsp, implicit-def $ssp, implicit-def $rax
+ %0:gr64 = COPY $rax
+ %6:gr64_nosp = COPY %0
+ JMP_1 %bb.5
+
+ bb.1.bb1 (ir-block-address-taken %ir-block.bb1):
+ successors: %bb.5(0x80000000)
+
+ CALL64pcrel32 target-flags(x86-plt) @f1, csr_64, implicit $rsp, implicit $ssp, implicit-def $rsp, implicit-def $ssp, implicit-def $rax
+ %1:gr64 = COPY $rax
+ %6:gr64_nosp = COPY %1
+ JMP_1 %bb.5
+
+ bb.2.bb2 (ir-block-address-taken %ir-block.bb2):
+ successors: %bb.5(0x80000000)
+
+ CALL64pcrel32 target-flags(x86-plt) @f2, csr_64, implicit $rsp, implicit $ssp, implicit-def $rsp, implicit-def $ssp, implicit-def $rax
+ %2:gr64 = COPY $rax
+ %6:gr64_nosp = COPY %2
+ JMP_1 %bb.5
+
+ bb.3.bb3 (ir-block-address-taken %ir-block.bb3):
+ successors: %bb.5(0x80000000)
+
+ CALL64pcrel32 target-flags(x86-plt) @f3, csr_64, implicit $rsp, implicit $ssp, implicit-def $rsp, implicit-def $ssp, implicit-def $rax
+ %3:gr64 = COPY $rax
+ %6:gr64_nosp = COPY %3
+ JMP_1 %bb.5
+
+ bb.4.bb4 (ir-block-address-taken %ir-block.bb4):
+ successors: %bb.5(0x80000000)
+
+ CALL64pcrel32 target-flags(x86-plt) @f4, csr_64, implicit $rsp, implicit $ssp, implicit-def $rsp, implicit-def $ssp, implicit-def $rax
+ %4:gr64 = COPY $rax
+ %6:gr64_nosp = COPY %4
+
+ bb.5:
+ successors: %bb.1(0x20000000), %bb.2(0x20000000), %bb.3(0x20000000), %bb.4(0x20000000)
+
+ %5:gr64_nosp = COPY %6
+ JMP64m $noreg, 8, %5, @computed_goto.dispatch, $noreg
+...
diff --git a/llvm/test/CodeGen/X86/swap.ll b/llvm/test/CodeGen/X86/swap.ll
index 1dc454dd..3330403 100644
--- a/llvm/test/CodeGen/X86/swap.ll
+++ b/llvm/test/CodeGen/X86/swap.ll
@@ -113,21 +113,17 @@ define dso_local void @onealloc_readback_1(ptr nocapture %a, ptr nocapture %b) l
;
; AA-LABEL: onealloc_readback_1:
; AA: # %bb.0: # %entry
-; AA-NEXT: vmovups (%rdi), %xmm0
-; AA-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp)
; AA-NEXT: vmovups (%rsi), %xmm0
; AA-NEXT: vmovups %xmm0, (%rdi)
; AA-NEXT: retq
entry:
%alloc = alloca [16 x i8], i8 2, align 1
%part1 = getelementptr inbounds [16 x i8], ptr %alloc, i64 1, i64 0
- call void @llvm.lifetime.start.p0(i64 16, ptr nonnull %part1)
- call void @llvm.lifetime.start.p0(i64 16, ptr nonnull %alloc)
+ call void @llvm.lifetime.start.p0(i64 32, ptr nonnull %alloc)
call void @llvm.memcpy.p0.p0.i64(ptr nonnull align 1 %part1, ptr align 1 %a, i64 16, i1 false)
call void @llvm.memcpy.p0.p0.i64(ptr nonnull align 1 %alloc, ptr align 1 %b, i64 16, i1 false)
- call void @llvm.lifetime.end.p0(i64 16, ptr nonnull %part1)
tail call void @llvm.memcpy.p0.p0.i64(ptr nonnull align 1 %a, ptr align 1 %alloc, i64 16, i1 false)
- call void @llvm.lifetime.end.p0(i64 16, ptr nonnull %alloc)
+ call void @llvm.lifetime.end.p0(i64 32, ptr nonnull %alloc)
ret void
}
@@ -144,19 +140,16 @@ define dso_local void @onealloc_readback_2(ptr nocapture %a, ptr nocapture %b) l
; AA-LABEL: onealloc_readback_2:
; AA: # %bb.0: # %entry
; AA-NEXT: vmovups (%rsi), %xmm0
-; AA-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp)
; AA-NEXT: vmovups %xmm0, (%rdi)
; AA-NEXT: retq
entry:
%alloc = alloca [16 x i8], i8 2, align 1
%part2 = getelementptr inbounds [16 x i8], ptr %alloc, i64 1, i64 0
- call void @llvm.lifetime.start.p0(i64 16, ptr nonnull %alloc)
- call void @llvm.lifetime.start.p0(i64 16, ptr nonnull %part2)
+ call void @llvm.lifetime.start.p0(i64 32, ptr nonnull %alloc)
call void @llvm.memcpy.p0.p0.i64(ptr nonnull align 1 %alloc, ptr align 1 %a, i64 16, i1 false)
call void @llvm.memcpy.p0.p0.i64(ptr nonnull align 1 %part2, ptr align 1 %b, i64 16, i1 false)
- call void @llvm.lifetime.end.p0(i64 16, ptr nonnull %alloc)
tail call void @llvm.memcpy.p0.p0.i64(ptr nonnull align 1 %a, ptr align 1 %part2, i64 16, i1 false)
- call void @llvm.lifetime.end.p0(i64 16, ptr nonnull %part2)
+ call void @llvm.lifetime.end.p0(i64 32, ptr nonnull %alloc)
ret void
}