diff options
Diffstat (limited to 'llvm/test')
95 files changed, 14665 insertions, 8432 deletions
diff --git a/llvm/test/CodeGen/AArch64/aarch64-combine-fmul-fsub.mir b/llvm/test/CodeGen/AArch64/aarch64-combine-fmul-fsub.mir index cf4f321..491d693 100644 --- a/llvm/test/CodeGen/AArch64/aarch64-combine-fmul-fsub.mir +++ b/llvm/test/CodeGen/AArch64/aarch64-combine-fmul-fsub.mir @@ -1,8 +1,8 @@ -# RUN: llc -run-pass=machine-combiner -o - -mtriple=aarch64-unknown-linux -mcpu=cortex-a57 -enable-unsafe-fp-math -machine-combiner-verify-pattern-order=true %s | FileCheck --check-prefixes=UNPROFITABLE,ALL %s -# RUN: llc -run-pass=machine-combiner -o - -mtriple=aarch64-unknown-linux -mcpu=falkor -enable-unsafe-fp-math %s -machine-combiner-verify-pattern-order=true | FileCheck --check-prefixes=PROFITABLE,ALL %s -# RUN: llc -run-pass=machine-combiner -o - -mtriple=aarch64-unknown-linux -mcpu=exynos-m3 -enable-unsafe-fp-math -machine-combiner-verify-pattern-order=true %s | FileCheck --check-prefixes=PROFITABLE,ALL %s -# RUN: llc -run-pass=machine-combiner -o - -mtriple=aarch64-unknown-linux -mcpu=thunderx2t99 -enable-unsafe-fp-math -machine-combiner-verify-pattern-order=true %s | FileCheck --check-prefixes=PROFITABLE,ALL %s -# RUN: llc -run-pass=machine-combiner -o - -mtriple=aarch64-unknown-linux -mcpu=thunderx3t110 -enable-unsafe-fp-math -machine-combiner-verify-pattern-order=true %s | FileCheck --check-prefixes=PROFITABLE,ALL %s +# RUN: llc -run-pass=machine-combiner -o - -mtriple=aarch64-unknown-linux -mcpu=cortex-a57 -machine-combiner-verify-pattern-order=true %s | FileCheck --check-prefixes=UNPROFITABLE,ALL %s +# RUN: llc -run-pass=machine-combiner -o - -mtriple=aarch64-unknown-linux -mcpu=falkor %s -machine-combiner-verify-pattern-order=true | FileCheck --check-prefixes=PROFITABLE,ALL %s +# RUN: llc -run-pass=machine-combiner -o - -mtriple=aarch64-unknown-linux -mcpu=exynos-m3 -machine-combiner-verify-pattern-order=true %s | FileCheck --check-prefixes=PROFITABLE,ALL %s +# RUN: llc -run-pass=machine-combiner -o - -mtriple=aarch64-unknown-linux -mcpu=thunderx2t99 -machine-combiner-verify-pattern-order=true %s | FileCheck --check-prefixes=PROFITABLE,ALL %s +# RUN: llc -run-pass=machine-combiner -o - -mtriple=aarch64-unknown-linux -mcpu=thunderx3t110 -machine-combiner-verify-pattern-order=true %s | FileCheck --check-prefixes=PROFITABLE,ALL %s # name: f1_2s registers: @@ -16,18 +16,18 @@ body: | %2:fpr64 = COPY $d2 %1:fpr64 = COPY $d1 %0:fpr64 = COPY $d0 - %3:fpr64 = FMULv2f32 %0, %1, implicit $fpcr - %4:fpr64 = FSUBv2f32 killed %3, %2, implicit $fpcr + %3:fpr64 = contract FMULv2f32 %0, %1, implicit $fpcr + %4:fpr64 = contract FSUBv2f32 killed %3, %2, implicit $fpcr $d0 = COPY %4 RET_ReallyLR implicit $d0 ... # UNPROFITABLE-LABEL: name: f1_2s -# UNPROFITABLE: [[R1:%[0-9]+]]:fpr64 = FNEGv2f32 %2 +# UNPROFITABLE: [[R1:%[0-9]+]]:fpr64 = contract FNEGv2f32 %2 # UNPROFITABLE-NEXT: FMLAv2f32 killed [[R1]], %0, %1, implicit $fpcr # # PROFITABLE-LABEL: name: f1_2s -# PROFITABLE: [[R1:%[0-9]+]]:fpr64 = FNEGv2f32 %2 +# PROFITABLE: [[R1:%[0-9]+]]:fpr64 = contract FNEGv2f32 %2 # PROFITABLE-NEXT: FMLAv2f32 killed [[R1]], %0, %1, implicit $fpcr --- name: f1_4s @@ -42,18 +42,18 @@ body: | %2:fpr128 = COPY $q2 %1:fpr128 = COPY $q1 %0:fpr128 = COPY $q0 - %3:fpr128 = FMULv4f32 %0, %1, implicit $fpcr - %4:fpr128 = FSUBv4f32 killed %3, %2, implicit $fpcr + %3:fpr128 = contract FMULv4f32 %0, %1, implicit $fpcr + %4:fpr128 = contract FSUBv4f32 killed %3, %2, implicit $fpcr $q0 = COPY %4 RET_ReallyLR implicit $q0 ... # UNPROFITABLE-LABEL: name: f1_4s -# UNPROFITABLE: [[R1:%[0-9]+]]:fpr128 = FMULv4f32 %0, %1, implicit $fpcr +# UNPROFITABLE: [[R1:%[0-9]+]]:fpr128 = contract FMULv4f32 %0, %1, implicit $fpcr # UNPROFITABLE-NEXT: FSUBv4f32 killed [[R1]], %2, implicit $fpcr # # PROFITABLE-LABEL: name: f1_4s -# PROFITABLE: [[R1:%[0-9]+]]:fpr128 = FNEGv4f32 %2 +# PROFITABLE: [[R1:%[0-9]+]]:fpr128 = contract FNEGv4f32 %2 # PROFITABLE-NEXT: FMLAv4f32 killed [[R1]], %0, %1, implicit $fpcr --- name: f1_2d @@ -68,18 +68,18 @@ body: | %2:fpr128 = COPY $q2 %1:fpr128 = COPY $q1 %0:fpr128 = COPY $q0 - %3:fpr128 = FMULv2f64 %0, %1, implicit $fpcr - %4:fpr128 = FSUBv2f64 killed %3, %2, implicit $fpcr + %3:fpr128 = contract FMULv2f64 %0, %1, implicit $fpcr + %4:fpr128 = contract FSUBv2f64 killed %3, %2, implicit $fpcr $q0 = COPY %4 RET_ReallyLR implicit $q0 ... # UNPROFITABLE-LABEL: name: f1_2d -# UNPROFITABLE: %3:fpr128 = FMULv2f64 %0, %1, implicit $fpcr +# UNPROFITABLE: %3:fpr128 = contract FMULv2f64 %0, %1, implicit $fpcr # UNPROFITABLE-NEXT: FSUBv2f64 killed %3, %2, implicit $fpcr # # PROFITABLE-LABEL: name: f1_2d -# PROFITABLE: [[R1:%[0-9]+]]:fpr128 = FNEGv2f64 %2 +# PROFITABLE: [[R1:%[0-9]+]]:fpr128 = contract FNEGv2f64 %2 # PROFITABLE-NEXT: FMLAv2f64 killed [[R1]], %0, %1, implicit $fpcr --- name: f1_both_fmul_2s @@ -97,15 +97,15 @@ body: | %2:fpr64 = COPY $q2 %1:fpr64 = COPY $q1 %0:fpr64 = COPY $q0 - %4:fpr64 = FMULv2f32 %0, %1, implicit $fpcr - %5:fpr64 = FMULv2f32 %2, %3, implicit $fpcr - %6:fpr64 = FSUBv2f32 killed %4, %5, implicit $fpcr + %4:fpr64 = contract FMULv2f32 %0, %1, implicit $fpcr + %5:fpr64 = contract FMULv2f32 %2, %3, implicit $fpcr + %6:fpr64 = contract FSUBv2f32 killed %4, %5, implicit $fpcr $q0 = COPY %6 RET_ReallyLR implicit $q0 ... # ALL-LABEL: name: f1_both_fmul_2s -# ALL: %4:fpr64 = FMULv2f32 %0, %1, implicit $fpcr +# ALL: %4:fpr64 = contract FMULv2f32 %0, %1, implicit $fpcr # ALL-NEXT: FMLSv2f32 killed %4, %2, %3, implicit $fpcr --- name: f1_both_fmul_4s @@ -123,15 +123,15 @@ body: | %2:fpr128 = COPY $q2 %1:fpr128 = COPY $q1 %0:fpr128 = COPY $q0 - %4:fpr128 = FMULv4f32 %0, %1, implicit $fpcr - %5:fpr128 = FMULv4f32 %2, %3, implicit $fpcr - %6:fpr128 = FSUBv4f32 killed %4, %5, implicit $fpcr + %4:fpr128 = contract FMULv4f32 %0, %1, implicit $fpcr + %5:fpr128 = contract FMULv4f32 %2, %3, implicit $fpcr + %6:fpr128 = contract FSUBv4f32 killed %4, %5, implicit $fpcr $q0 = COPY %6 RET_ReallyLR implicit $q0 ... # ALL-LABEL: name: f1_both_fmul_4s -# ALL: %4:fpr128 = FMULv4f32 %0, %1, implicit $fpcr +# ALL: %4:fpr128 = contract FMULv4f32 %0, %1, implicit $fpcr # ALL-NEXT: FMLSv4f32 killed %4, %2, %3, implicit $fpcr --- name: f1_both_fmul_2d @@ -149,14 +149,14 @@ body: | %2:fpr128 = COPY $q2 %1:fpr128 = COPY $q1 %0:fpr128 = COPY $q0 - %4:fpr128 = FMULv2f64 %0, %1, implicit $fpcr - %5:fpr128 = FMULv2f64 %2, %3, implicit $fpcr - %6:fpr128 = FSUBv2f64 killed %4, %5, implicit $fpcr + %4:fpr128 = contract FMULv2f64 %0, %1, implicit $fpcr + %5:fpr128 = contract FMULv2f64 %2, %3, implicit $fpcr + %6:fpr128 = contract FSUBv2f64 killed %4, %5, implicit $fpcr $q0 = COPY %6 RET_ReallyLR implicit $q0 ... # ALL-LABEL: name: f1_both_fmul_2d -# ALL: %4:fpr128 = FMULv2f64 %0, %1, implicit $fpcr +# ALL: %4:fpr128 = contract FMULv2f64 %0, %1, implicit $fpcr # ALL-NEXT: FMLSv2f64 killed %4, %2, %3, implicit $fpcr diff --git a/llvm/test/CodeGen/AArch64/arm64-fml-combines.ll b/llvm/test/CodeGen/AArch64/arm64-fml-combines.ll index ce35810..60c48bf 100644 --- a/llvm/test/CodeGen/AArch64/arm64-fml-combines.ll +++ b/llvm/test/CodeGen/AArch64/arm64-fml-combines.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -O3 -mtriple=arm64-apple-ios -enable-unsafe-fp-math -mattr=+fullfp16 | FileCheck %s +; RUN: llc < %s -O3 -mtriple=arm64-apple-ios -mattr=+fullfp16 | FileCheck %s ; RUN: llc < %s -O3 -mtriple=arm64-apple-ios -fp-contract=fast -mattr=+fullfp16 | FileCheck %s define void @foo_2d(ptr %src) { @@ -130,9 +130,9 @@ for.end: ; preds = %for.body ; CHECK: fnmadd h0, h0, h1, h2 define half @test0(half %a, half %b, half %c) { entry: - %0 = fmul half %a, %b - %mul = fsub half -0.000000e+00, %0 - %sub1 = fsub half %mul, %c + %0 = fmul contract half %a, %b + %mul = fsub contract half -0.000000e+00, %0 + %sub1 = fsub contract half %mul, %c ret half %sub1 } @@ -140,9 +140,9 @@ entry: ; CHECK: fnmadd s0, s0, s1, s2 define float @test1(float %a, float %b, float %c) { entry: - %0 = fmul float %a, %b - %mul = fsub float -0.000000e+00, %0 - %sub1 = fsub float %mul, %c + %0 = fmul contract float %a, %b + %mul = fsub contract float -0.000000e+00, %0 + %sub1 = fsub contract float %mul, %c ret float %sub1 } @@ -150,9 +150,9 @@ entry: ; CHECK: fnmadd d0, d0, d1, d2 define double @test2(double %a, double %b, double %c) { entry: - %0 = fmul double %a, %b - %mul = fsub double -0.000000e+00, %0 - %sub1 = fsub double %mul, %c + %0 = fmul contract double %a, %b + %mul = fsub contract double -0.000000e+00, %0 + %sub1 = fsub contract double %mul, %c ret double %sub1 } diff --git a/llvm/test/CodeGen/AArch64/fcsel-zero.ll b/llvm/test/CodeGen/AArch64/fcsel-zero.ll index 3fbcd10..3db588b 100644 --- a/llvm/test/CodeGen/AArch64/fcsel-zero.ll +++ b/llvm/test/CodeGen/AArch64/fcsel-zero.ll @@ -2,8 +2,8 @@ ; RUN: llc -mtriple=aarch64-linux-gnu -o - < %s | FileCheck %s -define float @foeq(float %a, float %b) #0 { - %t = fcmp oeq float %a, 0.0 +define float @foeq(float %a, float %b) { + %t = fcmp nsz oeq float %a, 0.0 %v = select i1 %t, float 0.0, float %b ret float %v ; CHECK-LABEL: foeq @@ -11,8 +11,8 @@ define float @foeq(float %a, float %b) #0 { ; CHECK-NEXT: fcsel {{s[0-9]+}}, [[R]], {{s[0-9]+}}, eq } -define float @fueq(float %a, float %b) #0 { - %t = fcmp ueq float %a, 0.0 +define float @fueq(float %a, float %b) { + %t = fcmp nsz ueq float %a, 0.0 %v = select i1 %t, float 0.0, float %b ret float %v ; CHECK-LABEL: fueq @@ -21,8 +21,8 @@ define float @fueq(float %a, float %b) #0 { ; CHECK-NEXT: fcsel {{s[0-9]+}}, [[R]], {{s[0-9]+}}, vs } -define float @fone(float %a, float %b) #0 { - %t = fcmp one float %a, 0.0 +define float @fone(float %a, float %b) { + %t = fcmp nsz one float %a, 0.0 %v = select i1 %t, float %b, float 0.0 ret float %v ; CHECK-LABEL: fone @@ -31,8 +31,8 @@ define float @fone(float %a, float %b) #0 { ; CHECK-NEXT: fcsel {{s[0-9]+}}, {{s[0-9]+}}, [[R]], gt } -define float @fune(float %a, float %b) #0 { - %t = fcmp une float %a, 0.0 +define float @fune(float %a, float %b) { + %t = fcmp nsz une float %a, 0.0 %v = select i1 %t, float %b, float 0.0 ret float %v ; CHECK-LABEL: fune @@ -40,8 +40,8 @@ define float @fune(float %a, float %b) #0 { ; CHECK-NEXT: fcsel {{s[0-9]+}}, {{s[0-9]+}}, [[R]], ne } -define double @doeq(double %a, double %b) #0 { - %t = fcmp oeq double %a, 0.0 +define double @doeq(double %a, double %b) { + %t = fcmp nsz oeq double %a, 0.0 %v = select i1 %t, double 0.0, double %b ret double %v ; CHECK-LABEL: doeq @@ -49,8 +49,8 @@ define double @doeq(double %a, double %b) #0 { ; CHECK-NEXT: fcsel {{d[0-9]+}}, [[R]], {{d[0-9]+}}, eq } -define double @dueq(double %a, double %b) #0 { - %t = fcmp ueq double %a, 0.0 +define double @dueq(double %a, double %b) { + %t = fcmp nsz ueq double %a, 0.0 %v = select i1 %t, double 0.0, double %b ret double %v ; CHECK-LABEL: dueq @@ -59,8 +59,8 @@ define double @dueq(double %a, double %b) #0 { ; CHECK-NEXT: fcsel {{d[0-9]+}}, [[R]], {{d[0-9]+}}, vs } -define double @done(double %a, double %b) #0 { - %t = fcmp one double %a, 0.0 +define double @done(double %a, double %b) { + %t = fcmp nsz one double %a, 0.0 %v = select i1 %t, double %b, double 0.0 ret double %v ; CHECK-LABEL: done @@ -69,14 +69,11 @@ define double @done(double %a, double %b) #0 { ; CHECK-NEXT: fcsel {{d[0-9]+}}, {{d[0-9]+}}, [[R]], gt } -define double @dune(double %a, double %b) #0 { - %t = fcmp une double %a, 0.0 +define double @dune(double %a, double %b) { + %t = fcmp nsz une double %a, 0.0 %v = select i1 %t, double %b, double 0.0 ret double %v ; CHECK-LABEL: dune ; CHECK: fcmp [[R:d[0-9]+]], #0.0 ; CHECK-NEXT: fcsel {{d[0-9]+}}, {{d[0-9]+}}, [[R]], ne } - -attributes #0 = { nounwind "unsafe-fp-math"="true" } - diff --git a/llvm/test/CodeGen/AArch64/machine-combiner-reassociate.mir b/llvm/test/CodeGen/AArch64/machine-combiner-reassociate.mir index 525f6dd..184c9ef 100644 --- a/llvm/test/CodeGen/AArch64/machine-combiner-reassociate.mir +++ b/llvm/test/CodeGen/AArch64/machine-combiner-reassociate.mir @@ -1,14 +1,11 @@ -# RUN: llc -run-pass=machine-combiner -mtriple=aarch64-unknown-linux-gnu %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-SAFE -# RUN: llc -run-pass=machine-combiner -mtriple=aarch64-unknown-linux-gnu -enable-unsafe-fp-math %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-UNSAFE +# RUN: llc -run-pass=machine-combiner -mtriple=aarch64-unknown-linux-gnu %s -o - | FileCheck %s # fadd without the reassoc flags can be reassociate only when unsafe fp math is # enabled. # CHECK-LABEL: name: fadd_no_reassoc # CHECK: [[ADD1:%[0-9]+]]:fpr32 = FADDSrr %0, %1, implicit $fpcr -# CHECK-SAFE-NEXT: [[ADD2:%[0-9]+]]:fpr32 = FADDSrr killed [[ADD1]], %2, implicit $fpcr -# CHECK-SAFE-NEXT: [[ADD3:%[0-9]+]]:fpr32 = FADDSrr killed [[ADD2]], %3, implicit $fpcr -# CHECK-UNSAFE-NEXT: [[ADD2:%[0-9]+]]:fpr32 = FADDSrr %2, %3, implicit $fpcr -# CHECK-UNSAFE-NEXT: [[ADD3:%[0-9]+]]:fpr32 = FADDSrr killed [[ADD1]], killed [[ADD2]], implicit $fpcr +# CHECK: [[ADD2:%[0-9]+]]:fpr32 = FADDSrr killed [[ADD1]], %2, implicit $fpcr +# CHECK: [[ADD3:%[0-9]+]]:fpr32 = FADDSrr killed [[ADD2]], %3, implicit $fpcr --- name: fadd_no_reassoc alignment: 4 @@ -49,10 +46,9 @@ body: | # the reassoc flag is ignored. # CHECK-LABEL: name: fadd_reassoc # CHECK: [[ADD1:%[0-9]+]]:fpr32 = reassoc FADDSrr %0, %1, implicit $fpcr -# CHECK-SAFE-NEXT: [[ADD2:%[0-9]+]]:fpr32 = reassoc FADDSrr killed [[ADD1]], %2, implicit $fpcr -# CHECK-SAFE-NEXT: [[ADD3:%[0-9]+]]:fpr32 = reassoc FADDSrr killed [[ADD2]], %3, implicit $fpcr -# CHECK-UNSAFE-NEXT: [[ADD2:%[0-9]+]]:fpr32 = reassoc FADDSrr %2, %3, implicit $fpcr -# CHECK-UNSAFE-NEXT: [[ADD3:%[0-9]+]]:fpr32 = reassoc FADDSrr killed [[ADD1]], killed [[ADD2]], implicit $fpcr +# CHECK: [[ADD2:%[0-9]+]]:fpr32 = reassoc FADDSrr killed [[ADD1]], %2, implicit $fpcr +# CHECK: [[ADD3:%[0-9]+]]:fpr32 = reassoc FADDSrr killed [[ADD2]], %3, implicit $fpcr + --- name: fadd_reassoc alignment: 4 @@ -92,10 +88,8 @@ body: | # Check that flags on the instructions are preserved after reassociation. # CHECK-LABEL: name: fadd_flags # CHECK: [[ADD1:%[0-9]+]]:fpr32 = nnan ninf nsz FADDSrr %0, %1, implicit $fpcr -# CHECK-SAFE-NEXT: [[ADD2:%[0-9]+]]:fpr32 = nnan nsz FADDSrr killed [[ADD1]], %2, implicit $fpcr -# CHECK-SAFE-NEXT: [[ADD3:%[0-9]+]]:fpr32 = ninf nsz FADDSrr killed [[ADD2]], %3, implicit $fpcr -# CHECK-UNSAFE-NEXT: [[ADD2:%[0-9]+]]:fpr32 = nsz FADDSrr %2, %3, implicit $fpcr -# CHECK-UNSAFE-NEXT: [[ADD3:%[0-9]+]]:fpr32 = nsz FADDSrr killed [[ADD1]], killed [[ADD2]], implicit $fpcr +# CHECK: [[ADD2:%[0-9]+]]:fpr32 = nnan nsz FADDSrr killed [[ADD1]], %2, implicit $fpcr +# CHECK: [[ADD3:%[0-9]+]]:fpr32 = ninf nsz FADDSrr killed [[ADD2]], %3, implicit $fpcr --- name: fadd_flags alignment: 4 diff --git a/llvm/test/CodeGen/AArch64/machine-combiner.ll b/llvm/test/CodeGen/AArch64/machine-combiner.ll index ec61fee..65afd92 100644 --- a/llvm/test/CodeGen/AArch64/machine-combiner.ll +++ b/llvm/test/CodeGen/AArch64/machine-combiner.ll @@ -1,29 +1,21 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -mtriple=aarch64-gnu-linux -mcpu=neoverse-n2 < %s | FileCheck %s --check-prefixes=CHECK,CHECK-STD -; RUN: llc -mtriple=aarch64-gnu-linux -mcpu=neoverse-n2 -enable-unsafe-fp-math < %s | FileCheck %s --check-prefixes=CHECK,CHECK-UNSAFE +; RUN: llc -mtriple=aarch64-gnu-linux -mcpu=neoverse-n2 < %s | FileCheck %s ; Incremental updates of the instruction depths should be enough for this test ; case. -; RUN: llc -mtriple=aarch64-gnu-linux -mcpu=neoverse-n2 -enable-unsafe-fp-math \ -; RUN: -machine-combiner-inc-threshold=0 -machine-combiner-verify-pattern-order=true < %s | FileCheck %s --check-prefixes=CHECK,CHECK-UNSAFE +; RUN: llc -mtriple=aarch64-gnu-linux -mcpu=neoverse-n2 \ +; RUN: -machine-combiner-inc-threshold=0 -machine-combiner-verify-pattern-order=true < %s | FileCheck %s ; Verify that the first two adds are independent regardless of how the inputs are ; commuted. The destination registers are used as source registers for the third add. define float @reassociate_adds1(float %x0, float %x1, float %x2, float %x3) { -; CHECK-STD-LABEL: reassociate_adds1: -; CHECK-STD: // %bb.0: -; CHECK-STD-NEXT: fadd s0, s0, s1 -; CHECK-STD-NEXT: fadd s0, s0, s2 -; CHECK-STD-NEXT: fadd s0, s0, s3 -; CHECK-STD-NEXT: ret -; -; CHECK-UNSAFE-LABEL: reassociate_adds1: -; CHECK-UNSAFE: // %bb.0: -; CHECK-UNSAFE-NEXT: fadd s0, s0, s1 -; CHECK-UNSAFE-NEXT: fadd s1, s2, s3 -; CHECK-UNSAFE-NEXT: fadd s0, s0, s1 -; CHECK-UNSAFE-NEXT: ret +; CHECK-LABEL: reassociate_adds1: +; CHECK: // %bb.0: +; CHECK-NEXT: fadd s0, s0, s1 +; CHECK-NEXT: fadd s0, s0, s2 +; CHECK-NEXT: fadd s0, s0, s3 +; CHECK-NEXT: ret %t0 = fadd float %x0, %x1 %t1 = fadd float %t0, %x2 %t2 = fadd float %t1, %x3 @@ -44,110 +36,110 @@ define float @reassociate_adds1_fast(float %x0, float %x1, float %x2, float %x3) } define float @reassociate_adds1_reassoc(float %x0, float %x1, float %x2, float %x3) { -; CHECK-STD-LABEL: reassociate_adds1_reassoc: -; CHECK-STD: // %bb.0: -; CHECK-STD-NEXT: fadd s0, s0, s1 -; CHECK-STD-NEXT: fadd s0, s0, s2 -; CHECK-STD-NEXT: fadd s0, s0, s3 -; CHECK-STD-NEXT: ret -; -; CHECK-UNSAFE-LABEL: reassociate_adds1_reassoc: -; CHECK-UNSAFE: // %bb.0: -; CHECK-UNSAFE-NEXT: fadd s0, s0, s1 -; CHECK-UNSAFE-NEXT: fadd s1, s2, s3 -; CHECK-UNSAFE-NEXT: fadd s0, s0, s1 -; CHECK-UNSAFE-NEXT: ret - %t0 = fadd reassoc float %x0, %x1 - %t1 = fadd reassoc float %t0, %x2 - %t2 = fadd reassoc float %t1, %x3 +; CHECK-LABEL: reassociate_adds1_reassoc: +; CHECK: // %bb.0: +; CHECK-NEXT: fadd s0, s0, s1 +; CHECK-NEXT: fadd s1, s2, s3 +; CHECK-NEXT: fadd s0, s0, s1 +; CHECK-NEXT: ret + %t0 = fadd reassoc nsz float %x0, %x1 + %t1 = fadd reassoc nsz float %t0, %x2 + %t2 = fadd reassoc nsz float %t1, %x3 ret float %t2 } define float @reassociate_adds2(float %x0, float %x1, float %x2, float %x3) { -; CHECK-STD-LABEL: reassociate_adds2: -; CHECK-STD: // %bb.0: -; CHECK-STD-NEXT: fadd s0, s0, s1 -; CHECK-STD-NEXT: fadd s0, s2, s0 -; CHECK-STD-NEXT: fadd s0, s0, s3 -; CHECK-STD-NEXT: ret -; -; CHECK-UNSAFE-LABEL: reassociate_adds2: -; CHECK-UNSAFE: // %bb.0: -; CHECK-UNSAFE-NEXT: fadd s0, s0, s1 -; CHECK-UNSAFE-NEXT: fadd s1, s2, s3 -; CHECK-UNSAFE-NEXT: fadd s0, s1, s0 -; CHECK-UNSAFE-NEXT: ret +; CHECK-LABEL: reassociate_adds2: +; CHECK: // %bb.0: +; CHECK-NEXT: fadd s0, s0, s1 +; CHECK-NEXT: fadd s0, s2, s0 +; CHECK-NEXT: fadd s0, s0, s3 +; CHECK-NEXT: ret %t0 = fadd float %x0, %x1 %t1 = fadd float %x2, %t0 %t2 = fadd float %t1, %x3 ret float %t2 } +define float @reassociate_adds2_reassoc(float %x0, float %x1, float %x2, float %x3) { +; CHECK-LABEL: reassociate_adds2_reassoc: +; CHECK: // %bb.0: +; CHECK-NEXT: fadd s0, s0, s1 +; CHECK-NEXT: fadd s1, s2, s3 +; CHECK-NEXT: fadd s0, s1, s0 +; CHECK-NEXT: ret + %t0 = fadd reassoc nsz float %x0, %x1 + %t1 = fadd reassoc nsz float %x2, %t0 + %t2 = fadd reassoc nsz float %t1, %x3 + ret float %t2 +} + define float @reassociate_adds3(float %x0, float %x1, float %x2, float %x3) { -; CHECK-STD-LABEL: reassociate_adds3: -; CHECK-STD: // %bb.0: -; CHECK-STD-NEXT: fadd s0, s0, s1 -; CHECK-STD-NEXT: fadd s0, s0, s2 -; CHECK-STD-NEXT: fadd s0, s3, s0 -; CHECK-STD-NEXT: ret -; -; CHECK-UNSAFE-LABEL: reassociate_adds3: -; CHECK-UNSAFE: // %bb.0: -; CHECK-UNSAFE-NEXT: fadd s0, s0, s1 -; CHECK-UNSAFE-NEXT: fadd s1, s3, s2 -; CHECK-UNSAFE-NEXT: fadd s0, s1, s0 -; CHECK-UNSAFE-NEXT: ret +; CHECK-LABEL: reassociate_adds3: +; CHECK: // %bb.0: +; CHECK-NEXT: fadd s0, s0, s1 +; CHECK-NEXT: fadd s0, s0, s2 +; CHECK-NEXT: fadd s0, s3, s0 +; CHECK-NEXT: ret %t0 = fadd float %x0, %x1 %t1 = fadd float %t0, %x2 %t2 = fadd float %x3, %t1 ret float %t2 } +define float @reassociate_adds3_reassoc(float %x0, float %x1, float %x2, float %x3) { +; CHECK-LABEL: reassociate_adds3_reassoc: +; CHECK: // %bb.0: +; CHECK-NEXT: fadd s0, s0, s1 +; CHECK-NEXT: fadd s1, s3, s2 +; CHECK-NEXT: fadd s0, s1, s0 +; CHECK-NEXT: ret + %t0 = fadd reassoc nsz float %x0, %x1 + %t1 = fadd reassoc nsz float %t0, %x2 + %t2 = fadd reassoc nsz float %x3, %t1 + ret float %t2 +} + define float @reassociate_adds4(float %x0, float %x1, float %x2, float %x3) { -; CHECK-STD-LABEL: reassociate_adds4: -; CHECK-STD: // %bb.0: -; CHECK-STD-NEXT: fadd s0, s0, s1 -; CHECK-STD-NEXT: fadd s0, s2, s0 -; CHECK-STD-NEXT: fadd s0, s3, s0 -; CHECK-STD-NEXT: ret -; -; CHECK-UNSAFE-LABEL: reassociate_adds4: -; CHECK-UNSAFE: // %bb.0: -; CHECK-UNSAFE-NEXT: fadd s0, s0, s1 -; CHECK-UNSAFE-NEXT: fadd s1, s3, s2 -; CHECK-UNSAFE-NEXT: fadd s0, s1, s0 -; CHECK-UNSAFE-NEXT: ret +; CHECK-LABEL: reassociate_adds4: +; CHECK: // %bb.0: +; CHECK-NEXT: fadd s0, s0, s1 +; CHECK-NEXT: fadd s0, s2, s0 +; CHECK-NEXT: fadd s0, s3, s0 +; CHECK-NEXT: ret %t0 = fadd float %x0, %x1 %t1 = fadd float %x2, %t0 %t2 = fadd float %x3, %t1 ret float %t2 } +define float @reassociate_adds4_reassoc(float %x0, float %x1, float %x2, float %x3) { +; CHECK-LABEL: reassociate_adds4_reassoc: +; CHECK: // %bb.0: +; CHECK-NEXT: fadd s0, s0, s1 +; CHECK-NEXT: fadd s1, s3, s2 +; CHECK-NEXT: fadd s0, s1, s0 +; CHECK-NEXT: ret + %t0 = fadd reassoc nsz float %x0, %x1 + %t1 = fadd reassoc nsz float %x2, %t0 + %t2 = fadd reassoc nsz float %x3, %t1 + ret float %t2 +} + ; Verify that we reassociate some of these ops. The optimal balanced tree of adds is not ; produced because that would cost more compile time. define float @reassociate_adds5(float %x0, float %x1, float %x2, float %x3, float %x4, float %x5, float %x6, float %x7) { -; CHECK-STD-LABEL: reassociate_adds5: -; CHECK-STD: // %bb.0: -; CHECK-STD-NEXT: fadd s0, s0, s1 -; CHECK-STD-NEXT: fadd s0, s0, s2 -; CHECK-STD-NEXT: fadd s0, s0, s3 -; CHECK-STD-NEXT: fadd s0, s0, s4 -; CHECK-STD-NEXT: fadd s0, s0, s5 -; CHECK-STD-NEXT: fadd s0, s0, s6 -; CHECK-STD-NEXT: fadd s0, s0, s7 -; CHECK-STD-NEXT: ret -; -; CHECK-UNSAFE-LABEL: reassociate_adds5: -; CHECK-UNSAFE: // %bb.0: -; CHECK-UNSAFE-NEXT: fadd s0, s0, s1 -; CHECK-UNSAFE-NEXT: fadd s1, s2, s3 -; CHECK-UNSAFE-NEXT: fadd s0, s0, s1 -; CHECK-UNSAFE-NEXT: fadd s1, s4, s5 -; CHECK-UNSAFE-NEXT: fadd s1, s1, s6 -; CHECK-UNSAFE-NEXT: fadd s0, s0, s1 -; CHECK-UNSAFE-NEXT: fadd s0, s0, s7 -; CHECK-UNSAFE-NEXT: ret +; CHECK-LABEL: reassociate_adds5: +; CHECK: // %bb.0: +; CHECK-NEXT: fadd s0, s0, s1 +; CHECK-NEXT: fadd s0, s0, s2 +; CHECK-NEXT: fadd s0, s0, s3 +; CHECK-NEXT: fadd s0, s0, s4 +; CHECK-NEXT: fadd s0, s0, s5 +; CHECK-NEXT: fadd s0, s0, s6 +; CHECK-NEXT: fadd s0, s0, s7 +; CHECK-NEXT: ret %t0 = fadd float %x0, %x1 %t1 = fadd float %t0, %x2 %t2 = fadd float %t1, %x3 @@ -158,141 +150,198 @@ define float @reassociate_adds5(float %x0, float %x1, float %x2, float %x3, floa ret float %t6 } +define float @reassociate_adds5_reassoc(float %x0, float %x1, float %x2, float %x3, float %x4, float %x5, float %x6, float %x7) { +; CHECK-LABEL: reassociate_adds5_reassoc: +; CHECK: // %bb.0: +; CHECK-NEXT: fadd s0, s0, s1 +; CHECK-NEXT: fadd s1, s2, s3 +; CHECK-NEXT: fadd s0, s0, s1 +; CHECK-NEXT: fadd s1, s4, s5 +; CHECK-NEXT: fadd s1, s1, s6 +; CHECK-NEXT: fadd s0, s0, s1 +; CHECK-NEXT: fadd s0, s0, s7 +; CHECK-NEXT: ret + %t0 = fadd reassoc nsz float %x0, %x1 + %t1 = fadd reassoc nsz float %t0, %x2 + %t2 = fadd reassoc nsz float %t1, %x3 + %t3 = fadd reassoc nsz float %t2, %x4 + %t4 = fadd reassoc nsz float %t3, %x5 + %t5 = fadd reassoc nsz float %t4, %x6 + %t6 = fadd reassoc nsz float %t5, %x7 + ret float %t6 +} + ; Verify that we only need two associative operations to reassociate the operands. ; Also, we should reassociate such that the result of the high latency division ; is used by the final 'add' rather than reassociating the %x3 operand with the ; division. The latter reassociation would not improve anything. define float @reassociate_adds6(float %x0, float %x1, float %x2, float %x3) { -; CHECK-STD-LABEL: reassociate_adds6: -; CHECK-STD: // %bb.0: -; CHECK-STD-NEXT: fdiv s0, s0, s1 -; CHECK-STD-NEXT: fadd s0, s2, s0 -; CHECK-STD-NEXT: fadd s0, s3, s0 -; CHECK-STD-NEXT: ret -; -; CHECK-UNSAFE-LABEL: reassociate_adds6: -; CHECK-UNSAFE: // %bb.0: -; CHECK-UNSAFE-NEXT: fdiv s0, s0, s1 -; CHECK-UNSAFE-NEXT: fadd s1, s3, s2 -; CHECK-UNSAFE-NEXT: fadd s0, s1, s0 -; CHECK-UNSAFE-NEXT: ret +; CHECK-LABEL: reassociate_adds6: +; CHECK: // %bb.0: +; CHECK-NEXT: fdiv s0, s0, s1 +; CHECK-NEXT: fadd s0, s2, s0 +; CHECK-NEXT: fadd s0, s3, s0 +; CHECK-NEXT: ret %t0 = fdiv float %x0, %x1 %t1 = fadd float %x2, %t0 %t2 = fadd float %x3, %t1 ret float %t2 } +define float @reassociate_adds6_reassoc(float %x0, float %x1, float %x2, float %x3) { +; CHECK-LABEL: reassociate_adds6_reassoc: +; CHECK: // %bb.0: +; CHECK-NEXT: fdiv s0, s0, s1 +; CHECK-NEXT: fadd s1, s3, s2 +; CHECK-NEXT: fadd s0, s1, s0 +; CHECK-NEXT: ret + %t0 = fdiv reassoc nsz float %x0, %x1 + %t1 = fadd reassoc nsz float %x2, %t0 + %t2 = fadd reassoc nsz float %x3, %t1 + ret float %t2 +} + ; Verify that scalar single-precision multiplies are reassociated. define float @reassociate_muls1(float %x0, float %x1, float %x2, float %x3) { -; CHECK-STD-LABEL: reassociate_muls1: -; CHECK-STD: // %bb.0: -; CHECK-STD-NEXT: fdiv s0, s0, s1 -; CHECK-STD-NEXT: fmul s0, s2, s0 -; CHECK-STD-NEXT: fmul s0, s3, s0 -; CHECK-STD-NEXT: ret -; -; CHECK-UNSAFE-LABEL: reassociate_muls1: -; CHECK-UNSAFE: // %bb.0: -; CHECK-UNSAFE-NEXT: fdiv s0, s0, s1 -; CHECK-UNSAFE-NEXT: fmul s1, s3, s2 -; CHECK-UNSAFE-NEXT: fmul s0, s1, s0 -; CHECK-UNSAFE-NEXT: ret +; CHECK-LABEL: reassociate_muls1: +; CHECK: // %bb.0: +; CHECK-NEXT: fdiv s0, s0, s1 +; CHECK-NEXT: fmul s0, s2, s0 +; CHECK-NEXT: fmul s0, s3, s0 +; CHECK-NEXT: ret %t0 = fdiv float %x0, %x1 %t1 = fmul float %x2, %t0 %t2 = fmul float %x3, %t1 ret float %t2 } +define float @reassociate_muls1_reassoc(float %x0, float %x1, float %x2, float %x3) { +; CHECK-LABEL: reassociate_muls1_reassoc: +; CHECK: // %bb.0: +; CHECK-NEXT: fdiv s0, s0, s1 +; CHECK-NEXT: fmul s1, s3, s2 +; CHECK-NEXT: fmul s0, s1, s0 +; CHECK-NEXT: ret + %t0 = fdiv reassoc nsz float %x0, %x1 + %t1 = fmul reassoc nsz float %x2, %t0 + %t2 = fmul reassoc nsz float %x3, %t1 + ret float %t2 +} + ; Verify that scalar double-precision adds are reassociated. define double @reassociate_adds_double(double %x0, double %x1, double %x2, double %x3) { -; CHECK-STD-LABEL: reassociate_adds_double: -; CHECK-STD: // %bb.0: -; CHECK-STD-NEXT: fdiv d0, d0, d1 -; CHECK-STD-NEXT: fadd d0, d2, d0 -; CHECK-STD-NEXT: fadd d0, d3, d0 -; CHECK-STD-NEXT: ret -; -; CHECK-UNSAFE-LABEL: reassociate_adds_double: -; CHECK-UNSAFE: // %bb.0: -; CHECK-UNSAFE-NEXT: fdiv d0, d0, d1 -; CHECK-UNSAFE-NEXT: fadd d1, d3, d2 -; CHECK-UNSAFE-NEXT: fadd d0, d1, d0 -; CHECK-UNSAFE-NEXT: ret +; CHECK-LABEL: reassociate_adds_double: +; CHECK: // %bb.0: +; CHECK-NEXT: fdiv d0, d0, d1 +; CHECK-NEXT: fadd d0, d2, d0 +; CHECK-NEXT: fadd d0, d3, d0 +; CHECK-NEXT: ret %t0 = fdiv double %x0, %x1 %t1 = fadd double %x2, %t0 %t2 = fadd double %x3, %t1 ret double %t2 } +define double @reassociate_adds_double_reassoc(double %x0, double %x1, double %x2, double %x3) { +; CHECK-LABEL: reassociate_adds_double_reassoc: +; CHECK: // %bb.0: +; CHECK-NEXT: fdiv d0, d0, d1 +; CHECK-NEXT: fadd d1, d3, d2 +; CHECK-NEXT: fadd d0, d1, d0 +; CHECK-NEXT: ret + %t0 = fdiv reassoc nsz double %x0, %x1 + %t1 = fadd reassoc nsz double %x2, %t0 + %t2 = fadd reassoc nsz double %x3, %t1 + ret double %t2 +} + ; Verify that scalar double-precision multiplies are reassociated. define double @reassociate_muls_double(double %x0, double %x1, double %x2, double %x3) { -; CHECK-STD-LABEL: reassociate_muls_double: -; CHECK-STD: // %bb.0: -; CHECK-STD-NEXT: fdiv d0, d0, d1 -; CHECK-STD-NEXT: fmul d0, d2, d0 -; CHECK-STD-NEXT: fmul d0, d3, d0 -; CHECK-STD-NEXT: ret -; -; CHECK-UNSAFE-LABEL: reassociate_muls_double: -; CHECK-UNSAFE: // %bb.0: -; CHECK-UNSAFE-NEXT: fdiv d0, d0, d1 -; CHECK-UNSAFE-NEXT: fmul d1, d3, d2 -; CHECK-UNSAFE-NEXT: fmul d0, d1, d0 -; CHECK-UNSAFE-NEXT: ret +; CHECK-LABEL: reassociate_muls_double: +; CHECK: // %bb.0: +; CHECK-NEXT: fdiv d0, d0, d1 +; CHECK-NEXT: fmul d0, d2, d0 +; CHECK-NEXT: fmul d0, d3, d0 +; CHECK-NEXT: ret %t0 = fdiv double %x0, %x1 %t1 = fmul double %x2, %t0 %t2 = fmul double %x3, %t1 ret double %t2 } +define double @reassociate_muls_double_reassoc(double %x0, double %x1, double %x2, double %x3) { +; CHECK-LABEL: reassociate_muls_double_reassoc: +; CHECK: // %bb.0: +; CHECK-NEXT: fdiv d0, d0, d1 +; CHECK-NEXT: fmul d1, d3, d2 +; CHECK-NEXT: fmul d0, d1, d0 +; CHECK-NEXT: ret + %t0 = fdiv reassoc nsz double %x0, %x1 + %t1 = fmul reassoc nsz double %x2, %t0 + %t2 = fmul reassoc nsz double %x3, %t1 + ret double %t2 +} + ; Verify that scalar half-precision adds are reassociated. define half @reassociate_adds_half(half %x0, half %x1, half %x2, half %x3) { -; CHECK-STD-LABEL: reassociate_adds_half: -; CHECK-STD: // %bb.0: -; CHECK-STD-NEXT: fdiv h0, h0, h1 -; CHECK-STD-NEXT: fadd h0, h2, h0 -; CHECK-STD-NEXT: fadd h0, h3, h0 -; CHECK-STD-NEXT: ret -; -; CHECK-UNSAFE-LABEL: reassociate_adds_half: -; CHECK-UNSAFE: // %bb.0: -; CHECK-UNSAFE-NEXT: fdiv h0, h0, h1 -; CHECK-UNSAFE-NEXT: fadd h1, h3, h2 -; CHECK-UNSAFE-NEXT: fadd h0, h1, h0 -; CHECK-UNSAFE-NEXT: ret +; CHECK-LABEL: reassociate_adds_half: +; CHECK: // %bb.0: +; CHECK-NEXT: fdiv h0, h0, h1 +; CHECK-NEXT: fadd h0, h2, h0 +; CHECK-NEXT: fadd h0, h3, h0 +; CHECK-NEXT: ret %t0 = fdiv half %x0, %x1 %t1 = fadd half %x2, %t0 %t2 = fadd half %x3, %t1 ret half %t2 } +define half @reassociate_adds_half_reassoc(half %x0, half %x1, half %x2, half %x3) { +; CHECK-LABEL: reassociate_adds_half_reassoc: +; CHECK: // %bb.0: +; CHECK-NEXT: fdiv h0, h0, h1 +; CHECK-NEXT: fadd h1, h3, h2 +; CHECK-NEXT: fadd h0, h1, h0 +; CHECK-NEXT: ret + %t0 = fdiv reassoc nsz half %x0, %x1 + %t1 = fadd reassoc nsz half %x2, %t0 + %t2 = fadd reassoc nsz half %x3, %t1 + ret half %t2 +} + ; Verify that scalar half-precision multiplies are reassociated. define half @reassociate_muls_half(half %x0, half %x1, half %x2, half %x3) { -; CHECK-STD-LABEL: reassociate_muls_half: -; CHECK-STD: // %bb.0: -; CHECK-STD-NEXT: fdiv h0, h0, h1 -; CHECK-STD-NEXT: fmul h0, h2, h0 -; CHECK-STD-NEXT: fmul h0, h3, h0 -; CHECK-STD-NEXT: ret -; -; CHECK-UNSAFE-LABEL: reassociate_muls_half: -; CHECK-UNSAFE: // %bb.0: -; CHECK-UNSAFE-NEXT: fdiv h0, h0, h1 -; CHECK-UNSAFE-NEXT: fmul h1, h3, h2 -; CHECK-UNSAFE-NEXT: fmul h0, h1, h0 -; CHECK-UNSAFE-NEXT: ret +; CHECK-LABEL: reassociate_muls_half: +; CHECK: // %bb.0: +; CHECK-NEXT: fdiv h0, h0, h1 +; CHECK-NEXT: fmul h0, h2, h0 +; CHECK-NEXT: fmul h0, h3, h0 +; CHECK-NEXT: ret %t0 = fdiv half %x0, %x1 %t1 = fmul half %x2, %t0 %t2 = fmul half %x3, %t1 ret half %t2 } +define half @reassociate_muls_half_reassoc(half %x0, half %x1, half %x2, half %x3) { +; CHECK-LABEL: reassociate_muls_half_reassoc: +; CHECK: // %bb.0: +; CHECK-NEXT: fdiv h0, h0, h1 +; CHECK-NEXT: fmul h1, h3, h2 +; CHECK-NEXT: fmul h0, h1, h0 +; CHECK-NEXT: ret + %t0 = fdiv reassoc nsz half %x0, %x1 + %t1 = fmul reassoc nsz half %x2, %t0 + %t2 = fmul reassoc nsz half %x3, %t1 + ret half %t2 +} + ; Verify that scalar integer adds are reassociated. define i32 @reassociate_adds_i32(i32 %x0, i32 %x1, i32 %x2, i32 %x3) { @@ -365,173 +414,222 @@ define i32 @reassociate_xors_i32(i32 %x0, i32 %x1, i32 %x2, i32 %x3) { ; Verify that we reassociate vector instructions too. define <4 x float> @vector_reassociate_adds1(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, <4 x float> %x3) { -; CHECK-STD-LABEL: vector_reassociate_adds1: -; CHECK-STD: // %bb.0: -; CHECK-STD-NEXT: fadd v0.4s, v0.4s, v1.4s -; CHECK-STD-NEXT: fadd v0.4s, v0.4s, v2.4s -; CHECK-STD-NEXT: fadd v0.4s, v0.4s, v3.4s -; CHECK-STD-NEXT: ret -; -; CHECK-UNSAFE-LABEL: vector_reassociate_adds1: -; CHECK-UNSAFE: // %bb.0: -; CHECK-UNSAFE-NEXT: fadd v0.4s, v0.4s, v1.4s -; CHECK-UNSAFE-NEXT: fadd v1.4s, v2.4s, v3.4s -; CHECK-UNSAFE-NEXT: fadd v0.4s, v0.4s, v1.4s -; CHECK-UNSAFE-NEXT: ret +; CHECK-LABEL: vector_reassociate_adds1: +; CHECK: // %bb.0: +; CHECK-NEXT: fadd v0.4s, v0.4s, v1.4s +; CHECK-NEXT: fadd v0.4s, v0.4s, v2.4s +; CHECK-NEXT: fadd v0.4s, v0.4s, v3.4s +; CHECK-NEXT: ret %t0 = fadd <4 x float> %x0, %x1 %t1 = fadd <4 x float> %t0, %x2 %t2 = fadd <4 x float> %t1, %x3 ret <4 x float> %t2 } +define <4 x float> @vector_reassociate_adds1_reassoc(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, <4 x float> %x3) { +; CHECK-LABEL: vector_reassociate_adds1_reassoc: +; CHECK: // %bb.0: +; CHECK-NEXT: fadd v0.4s, v0.4s, v1.4s +; CHECK-NEXT: fadd v1.4s, v2.4s, v3.4s +; CHECK-NEXT: fadd v0.4s, v0.4s, v1.4s +; CHECK-NEXT: ret + %t0 = fadd reassoc nsz <4 x float> %x0, %x1 + %t1 = fadd reassoc nsz <4 x float> %t0, %x2 + %t2 = fadd reassoc nsz <4 x float> %t1, %x3 + ret <4 x float> %t2 +} + define <4 x float> @vector_reassociate_adds2(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, <4 x float> %x3) { -; CHECK-STD-LABEL: vector_reassociate_adds2: -; CHECK-STD: // %bb.0: -; CHECK-STD-NEXT: fadd v0.4s, v0.4s, v1.4s -; CHECK-STD-NEXT: fadd v0.4s, v2.4s, v0.4s -; CHECK-STD-NEXT: fadd v0.4s, v0.4s, v3.4s -; CHECK-STD-NEXT: ret -; -; CHECK-UNSAFE-LABEL: vector_reassociate_adds2: -; CHECK-UNSAFE: // %bb.0: -; CHECK-UNSAFE-NEXT: fadd v0.4s, v0.4s, v1.4s -; CHECK-UNSAFE-NEXT: fadd v1.4s, v2.4s, v3.4s -; CHECK-UNSAFE-NEXT: fadd v0.4s, v1.4s, v0.4s -; CHECK-UNSAFE-NEXT: ret +; CHECK-LABEL: vector_reassociate_adds2: +; CHECK: // %bb.0: +; CHECK-NEXT: fadd v0.4s, v0.4s, v1.4s +; CHECK-NEXT: fadd v0.4s, v2.4s, v0.4s +; CHECK-NEXT: fadd v0.4s, v0.4s, v3.4s +; CHECK-NEXT: ret %t0 = fadd <4 x float> %x0, %x1 %t1 = fadd <4 x float> %x2, %t0 %t2 = fadd <4 x float> %t1, %x3 ret <4 x float> %t2 } +define <4 x float> @vector_reassociate_adds2_reassoc(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, <4 x float> %x3) { +; CHECK-LABEL: vector_reassociate_adds2_reassoc: +; CHECK: // %bb.0: +; CHECK-NEXT: fadd v0.4s, v0.4s, v1.4s +; CHECK-NEXT: fadd v1.4s, v2.4s, v3.4s +; CHECK-NEXT: fadd v0.4s, v1.4s, v0.4s +; CHECK-NEXT: ret + %t0 = fadd reassoc nsz <4 x float> %x0, %x1 + %t1 = fadd reassoc nsz <4 x float> %x2, %t0 + %t2 = fadd reassoc nsz <4 x float> %t1, %x3 + ret <4 x float> %t2 +} + define <4 x float> @vector_reassociate_adds3(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, <4 x float> %x3) { -; CHECK-STD-LABEL: vector_reassociate_adds3: -; CHECK-STD: // %bb.0: -; CHECK-STD-NEXT: fadd v0.4s, v0.4s, v1.4s -; CHECK-STD-NEXT: fadd v0.4s, v0.4s, v2.4s -; CHECK-STD-NEXT: fadd v0.4s, v3.4s, v0.4s -; CHECK-STD-NEXT: ret -; -; CHECK-UNSAFE-LABEL: vector_reassociate_adds3: -; CHECK-UNSAFE: // %bb.0: -; CHECK-UNSAFE-NEXT: fadd v0.4s, v0.4s, v1.4s -; CHECK-UNSAFE-NEXT: fadd v1.4s, v3.4s, v2.4s -; CHECK-UNSAFE-NEXT: fadd v0.4s, v1.4s, v0.4s -; CHECK-UNSAFE-NEXT: ret +; CHECK-LABEL: vector_reassociate_adds3: +; CHECK: // %bb.0: +; CHECK-NEXT: fadd v0.4s, v0.4s, v1.4s +; CHECK-NEXT: fadd v0.4s, v0.4s, v2.4s +; CHECK-NEXT: fadd v0.4s, v3.4s, v0.4s +; CHECK-NEXT: ret %t0 = fadd <4 x float> %x0, %x1 %t1 = fadd <4 x float> %t0, %x2 %t2 = fadd <4 x float> %x3, %t1 ret <4 x float> %t2 } +define <4 x float> @vector_reassociate_adds3_reassoc(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, <4 x float> %x3) { +; CHECK-LABEL: vector_reassociate_adds3_reassoc: +; CHECK: // %bb.0: +; CHECK-NEXT: fadd v0.4s, v0.4s, v1.4s +; CHECK-NEXT: fadd v1.4s, v3.4s, v2.4s +; CHECK-NEXT: fadd v0.4s, v1.4s, v0.4s +; CHECK-NEXT: ret + %t0 = fadd reassoc nsz <4 x float> %x0, %x1 + %t1 = fadd reassoc nsz <4 x float> %t0, %x2 + %t2 = fadd reassoc nsz <4 x float> %x3, %t1 + ret <4 x float> %t2 +} + define <4 x float> @vector_reassociate_adds4(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, <4 x float> %x3) { -; CHECK-STD-LABEL: vector_reassociate_adds4: -; CHECK-STD: // %bb.0: -; CHECK-STD-NEXT: fadd v0.4s, v0.4s, v1.4s -; CHECK-STD-NEXT: fadd v0.4s, v2.4s, v0.4s -; CHECK-STD-NEXT: fadd v0.4s, v3.4s, v0.4s -; CHECK-STD-NEXT: ret -; -; CHECK-UNSAFE-LABEL: vector_reassociate_adds4: -; CHECK-UNSAFE: // %bb.0: -; CHECK-UNSAFE-NEXT: fadd v0.4s, v0.4s, v1.4s -; CHECK-UNSAFE-NEXT: fadd v1.4s, v3.4s, v2.4s -; CHECK-UNSAFE-NEXT: fadd v0.4s, v1.4s, v0.4s -; CHECK-UNSAFE-NEXT: ret +; CHECK-LABEL: vector_reassociate_adds4: +; CHECK: // %bb.0: +; CHECK-NEXT: fadd v0.4s, v0.4s, v1.4s +; CHECK-NEXT: fadd v0.4s, v2.4s, v0.4s +; CHECK-NEXT: fadd v0.4s, v3.4s, v0.4s +; CHECK-NEXT: ret %t0 = fadd <4 x float> %x0, %x1 %t1 = fadd <4 x float> %x2, %t0 %t2 = fadd <4 x float> %x3, %t1 ret <4 x float> %t2 } +define <4 x float> @vector_reassociate_adds4_reassoc(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, <4 x float> %x3) { +; CHECK-LABEL: vector_reassociate_adds4_reassoc: +; CHECK: // %bb.0: +; CHECK-NEXT: fadd v0.4s, v0.4s, v1.4s +; CHECK-NEXT: fadd v1.4s, v3.4s, v2.4s +; CHECK-NEXT: fadd v0.4s, v1.4s, v0.4s +; CHECK-NEXT: ret + %t0 = fadd reassoc nsz <4 x float> %x0, %x1 + %t1 = fadd reassoc nsz <4 x float> %x2, %t0 + %t2 = fadd reassoc nsz <4 x float> %x3, %t1 + ret <4 x float> %t2 +} + ; Verify that 64-bit vector half-precision adds are reassociated. define <4 x half> @reassociate_adds_v4f16(<4 x half> %x0, <4 x half> %x1, <4 x half> %x2, <4 x half> %x3) { -; CHECK-STD-LABEL: reassociate_adds_v4f16: -; CHECK-STD: // %bb.0: -; CHECK-STD-NEXT: fadd v0.4h, v0.4h, v1.4h -; CHECK-STD-NEXT: fadd v0.4h, v2.4h, v0.4h -; CHECK-STD-NEXT: fadd v0.4h, v3.4h, v0.4h -; CHECK-STD-NEXT: ret -; -; CHECK-UNSAFE-LABEL: reassociate_adds_v4f16: -; CHECK-UNSAFE: // %bb.0: -; CHECK-UNSAFE-NEXT: fadd v0.4h, v0.4h, v1.4h -; CHECK-UNSAFE-NEXT: fadd v1.4h, v3.4h, v2.4h -; CHECK-UNSAFE-NEXT: fadd v0.4h, v1.4h, v0.4h -; CHECK-UNSAFE-NEXT: ret +; CHECK-LABEL: reassociate_adds_v4f16: +; CHECK: // %bb.0: +; CHECK-NEXT: fadd v0.4h, v0.4h, v1.4h +; CHECK-NEXT: fadd v0.4h, v2.4h, v0.4h +; CHECK-NEXT: fadd v0.4h, v3.4h, v0.4h +; CHECK-NEXT: ret %t0 = fadd <4 x half> %x0, %x1 %t1 = fadd <4 x half> %x2, %t0 %t2 = fadd <4 x half> %x3, %t1 ret <4 x half> %t2 } +define <4 x half> @reassociate_adds_v4f16_reassoc(<4 x half> %x0, <4 x half> %x1, <4 x half> %x2, <4 x half> %x3) { +; CHECK-LABEL: reassociate_adds_v4f16_reassoc: +; CHECK: // %bb.0: +; CHECK-NEXT: fadd v0.4h, v0.4h, v1.4h +; CHECK-NEXT: fadd v1.4h, v3.4h, v2.4h +; CHECK-NEXT: fadd v0.4h, v1.4h, v0.4h +; CHECK-NEXT: ret + %t0 = fadd reassoc nsz <4 x half> %x0, %x1 + %t1 = fadd reassoc nsz <4 x half> %x2, %t0 + %t2 = fadd reassoc nsz <4 x half> %x3, %t1 + ret <4 x half> %t2 +} + ; Verify that 128-bit vector half-precision multiplies are reassociated. define <8 x half> @reassociate_muls_v8f16(<8 x half> %x0, <8 x half> %x1, <8 x half> %x2, <8 x half> %x3) { -; CHECK-STD-LABEL: reassociate_muls_v8f16: -; CHECK-STD: // %bb.0: -; CHECK-STD-NEXT: fadd v0.8h, v0.8h, v1.8h -; CHECK-STD-NEXT: fmul v0.8h, v2.8h, v0.8h -; CHECK-STD-NEXT: fmul v0.8h, v3.8h, v0.8h -; CHECK-STD-NEXT: ret -; -; CHECK-UNSAFE-LABEL: reassociate_muls_v8f16: -; CHECK-UNSAFE: // %bb.0: -; CHECK-UNSAFE-NEXT: fadd v0.8h, v0.8h, v1.8h -; CHECK-UNSAFE-NEXT: fmul v1.8h, v3.8h, v2.8h -; CHECK-UNSAFE-NEXT: fmul v0.8h, v1.8h, v0.8h -; CHECK-UNSAFE-NEXT: ret +; CHECK-LABEL: reassociate_muls_v8f16: +; CHECK: // %bb.0: +; CHECK-NEXT: fadd v0.8h, v0.8h, v1.8h +; CHECK-NEXT: fmul v0.8h, v2.8h, v0.8h +; CHECK-NEXT: fmul v0.8h, v3.8h, v0.8h +; CHECK-NEXT: ret %t0 = fadd <8 x half> %x0, %x1 %t1 = fmul <8 x half> %x2, %t0 %t2 = fmul <8 x half> %x3, %t1 ret <8 x half> %t2 } +define <8 x half> @reassociate_muls_v8f16_reassoc(<8 x half> %x0, <8 x half> %x1, <8 x half> %x2, <8 x half> %x3) { +; CHECK-LABEL: reassociate_muls_v8f16_reassoc: +; CHECK: // %bb.0: +; CHECK-NEXT: fadd v0.8h, v0.8h, v1.8h +; CHECK-NEXT: fmul v1.8h, v3.8h, v2.8h +; CHECK-NEXT: fmul v0.8h, v1.8h, v0.8h +; CHECK-NEXT: ret + %t0 = fadd reassoc nsz <8 x half> %x0, %x1 + %t1 = fmul reassoc nsz <8 x half> %x2, %t0 + %t2 = fmul reassoc nsz <8 x half> %x3, %t1 + ret <8 x half> %t2 +} + ; Verify that 128-bit vector single-precision multiplies are reassociated. define <4 x float> @reassociate_muls_v4f32(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, <4 x float> %x3) { -; CHECK-STD-LABEL: reassociate_muls_v4f32: -; CHECK-STD: // %bb.0: -; CHECK-STD-NEXT: fadd v0.4s, v0.4s, v1.4s -; CHECK-STD-NEXT: fmul v0.4s, v2.4s, v0.4s -; CHECK-STD-NEXT: fmul v0.4s, v3.4s, v0.4s -; CHECK-STD-NEXT: ret -; -; CHECK-UNSAFE-LABEL: reassociate_muls_v4f32: -; CHECK-UNSAFE: // %bb.0: -; CHECK-UNSAFE-NEXT: fadd v0.4s, v0.4s, v1.4s -; CHECK-UNSAFE-NEXT: fmul v1.4s, v3.4s, v2.4s -; CHECK-UNSAFE-NEXT: fmul v0.4s, v1.4s, v0.4s -; CHECK-UNSAFE-NEXT: ret +; CHECK-LABEL: reassociate_muls_v4f32: +; CHECK: // %bb.0: +; CHECK-NEXT: fadd v0.4s, v0.4s, v1.4s +; CHECK-NEXT: fmul v0.4s, v2.4s, v0.4s +; CHECK-NEXT: fmul v0.4s, v3.4s, v0.4s +; CHECK-NEXT: ret %t0 = fadd <4 x float> %x0, %x1 %t1 = fmul <4 x float> %x2, %t0 %t2 = fmul <4 x float> %x3, %t1 ret <4 x float> %t2 } +define <4 x float> @reassociate_muls_v4f32_reassoc(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, <4 x float> %x3) { +; CHECK-LABEL: reassociate_muls_v4f32_reassoc: +; CHECK: // %bb.0: +; CHECK-NEXT: fadd v0.4s, v0.4s, v1.4s +; CHECK-NEXT: fmul v1.4s, v3.4s, v2.4s +; CHECK-NEXT: fmul v0.4s, v1.4s, v0.4s +; CHECK-NEXT: ret + %t0 = fadd reassoc nsz <4 x float> %x0, %x1 + %t1 = fmul reassoc nsz <4 x float> %x2, %t0 + %t2 = fmul reassoc nsz <4 x float> %x3, %t1 + ret <4 x float> %t2 +} + ; Verify that 128-bit vector double-precision multiplies are reassociated. define <2 x double> @reassociate_muls_v2f64(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, <2 x double> %x3) { -; CHECK-STD-LABEL: reassociate_muls_v2f64: -; CHECK-STD: // %bb.0: -; CHECK-STD-NEXT: fadd v0.2d, v0.2d, v1.2d -; CHECK-STD-NEXT: fmul v0.2d, v2.2d, v0.2d -; CHECK-STD-NEXT: fmul v0.2d, v3.2d, v0.2d -; CHECK-STD-NEXT: ret -; -; CHECK-UNSAFE-LABEL: reassociate_muls_v2f64: -; CHECK-UNSAFE: // %bb.0: -; CHECK-UNSAFE-NEXT: fadd v0.2d, v0.2d, v1.2d -; CHECK-UNSAFE-NEXT: fmul v1.2d, v3.2d, v2.2d -; CHECK-UNSAFE-NEXT: fmul v0.2d, v1.2d, v0.2d -; CHECK-UNSAFE-NEXT: ret +; CHECK-LABEL: reassociate_muls_v2f64: +; CHECK: // %bb.0: +; CHECK-NEXT: fadd v0.2d, v0.2d, v1.2d +; CHECK-NEXT: fmul v0.2d, v2.2d, v0.2d +; CHECK-NEXT: fmul v0.2d, v3.2d, v0.2d +; CHECK-NEXT: ret %t0 = fadd <2 x double> %x0, %x1 %t1 = fmul <2 x double> %x2, %t0 %t2 = fmul <2 x double> %x3, %t1 ret <2 x double> %t2 } +define <2 x double> @reassociate_muls_v2f64_reassoc(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, <2 x double> %x3) { +; CHECK-LABEL: reassociate_muls_v2f64_reassoc: +; CHECK: // %bb.0: +; CHECK-NEXT: fadd v0.2d, v0.2d, v1.2d +; CHECK-NEXT: fmul v1.2d, v3.2d, v2.2d +; CHECK-NEXT: fmul v0.2d, v1.2d, v0.2d +; CHECK-NEXT: ret + %t0 = fadd reassoc nsz <2 x double> %x0, %x1 + %t1 = fmul reassoc nsz <2 x double> %x2, %t0 + %t2 = fmul reassoc nsz <2 x double> %x3, %t1 + ret <2 x double> %t2 +} + + ; Verify that vector integer arithmetic operations are reassociated. define <2 x i32> @reassociate_muls_v2i32(<2 x i32> %x0, <2 x i32> %x1, <2 x i32> %x2, <2 x i32> %x3) { @@ -606,65 +704,83 @@ define <4 x i32> @reassociate_xors_v4i32(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> ; Verify that scalable vector FP arithmetic operations are reassociated. define <vscale x 8 x half> @reassociate_adds_nxv4f16(<vscale x 8 x half> %x0, <vscale x 8 x half> %x1, <vscale x 8 x half> %x2, <vscale x 8 x half> %x3) { -; CHECK-STD-LABEL: reassociate_adds_nxv4f16: -; CHECK-STD: // %bb.0: -; CHECK-STD-NEXT: fadd z0.h, z0.h, z1.h -; CHECK-STD-NEXT: fadd z0.h, z2.h, z0.h -; CHECK-STD-NEXT: fadd z0.h, z3.h, z0.h -; CHECK-STD-NEXT: ret -; -; CHECK-UNSAFE-LABEL: reassociate_adds_nxv4f16: -; CHECK-UNSAFE: // %bb.0: -; CHECK-UNSAFE-NEXT: fadd z0.h, z0.h, z1.h -; CHECK-UNSAFE-NEXT: fadd z1.h, z3.h, z2.h -; CHECK-UNSAFE-NEXT: fadd z0.h, z1.h, z0.h -; CHECK-UNSAFE-NEXT: ret +; CHECK-LABEL: reassociate_adds_nxv4f16: +; CHECK: // %bb.0: +; CHECK-NEXT: fadd z0.h, z0.h, z1.h +; CHECK-NEXT: fadd z0.h, z2.h, z0.h +; CHECK-NEXT: fadd z0.h, z3.h, z0.h +; CHECK-NEXT: ret %t0 = fadd reassoc <vscale x 8 x half> %x0, %x1 %t1 = fadd reassoc <vscale x 8 x half> %x2, %t0 %t2 = fadd reassoc <vscale x 8 x half> %x3, %t1 ret <vscale x 8 x half> %t2 } +define <vscale x 8 x half> @reassociate_adds_nxv4f16_nsz(<vscale x 8 x half> %x0, <vscale x 8 x half> %x1, <vscale x 8 x half> %x2, <vscale x 8 x half> %x3) { +; CHECK-LABEL: reassociate_adds_nxv4f16_nsz: +; CHECK: // %bb.0: +; CHECK-NEXT: fadd z0.h, z0.h, z1.h +; CHECK-NEXT: fadd z1.h, z3.h, z2.h +; CHECK-NEXT: fadd z0.h, z1.h, z0.h +; CHECK-NEXT: ret + %t0 = fadd reassoc nsz <vscale x 8 x half> %x0, %x1 + %t1 = fadd reassoc nsz <vscale x 8 x half> %x2, %t0 + %t2 = fadd reassoc nsz <vscale x 8 x half> %x3, %t1 + ret <vscale x 8 x half> %t2 +} + define <vscale x 4 x float> @reassociate_adds_nxv4f32(<vscale x 4 x float> %x0, <vscale x 4 x float> %x1, <vscale x 4 x float> %x2, <vscale x 4 x float> %x3) { -; CHECK-STD-LABEL: reassociate_adds_nxv4f32: -; CHECK-STD: // %bb.0: -; CHECK-STD-NEXT: fadd z0.s, z0.s, z1.s -; CHECK-STD-NEXT: fadd z0.s, z2.s, z0.s -; CHECK-STD-NEXT: fadd z0.s, z3.s, z0.s -; CHECK-STD-NEXT: ret -; -; CHECK-UNSAFE-LABEL: reassociate_adds_nxv4f32: -; CHECK-UNSAFE: // %bb.0: -; CHECK-UNSAFE-NEXT: fadd z0.s, z0.s, z1.s -; CHECK-UNSAFE-NEXT: fadd z1.s, z3.s, z2.s -; CHECK-UNSAFE-NEXT: fadd z0.s, z1.s, z0.s -; CHECK-UNSAFE-NEXT: ret +; CHECK-LABEL: reassociate_adds_nxv4f32: +; CHECK: // %bb.0: +; CHECK-NEXT: fadd z0.s, z0.s, z1.s +; CHECK-NEXT: fadd z0.s, z2.s, z0.s +; CHECK-NEXT: fadd z0.s, z3.s, z0.s +; CHECK-NEXT: ret %t0 = fadd reassoc <vscale x 4 x float> %x0, %x1 %t1 = fadd reassoc <vscale x 4 x float> %x2, %t0 %t2 = fadd reassoc <vscale x 4 x float> %x3, %t1 ret <vscale x 4 x float> %t2 } +define <vscale x 4 x float> @reassociate_adds_nxv4f32_nsz(<vscale x 4 x float> %x0, <vscale x 4 x float> %x1, <vscale x 4 x float> %x2, <vscale x 4 x float> %x3) { +; CHECK-LABEL: reassociate_adds_nxv4f32_nsz: +; CHECK: // %bb.0: +; CHECK-NEXT: fadd z0.s, z0.s, z1.s +; CHECK-NEXT: fadd z1.s, z3.s, z2.s +; CHECK-NEXT: fadd z0.s, z1.s, z0.s +; CHECK-NEXT: ret + %t0 = fadd reassoc nsz <vscale x 4 x float> %x0, %x1 + %t1 = fadd reassoc nsz <vscale x 4 x float> %x2, %t0 + %t2 = fadd reassoc nsz <vscale x 4 x float> %x3, %t1 + ret <vscale x 4 x float> %t2 +} + define <vscale x 2 x double> @reassociate_muls_nxv2f64(<vscale x 2 x double> %x0, <vscale x 2 x double> %x1, <vscale x 2 x double> %x2, <vscale x 2 x double> %x3) { -; CHECK-STD-LABEL: reassociate_muls_nxv2f64: -; CHECK-STD: // %bb.0: -; CHECK-STD-NEXT: fmul z0.d, z0.d, z1.d -; CHECK-STD-NEXT: fmul z0.d, z2.d, z0.d -; CHECK-STD-NEXT: fmul z0.d, z3.d, z0.d -; CHECK-STD-NEXT: ret -; -; CHECK-UNSAFE-LABEL: reassociate_muls_nxv2f64: -; CHECK-UNSAFE: // %bb.0: -; CHECK-UNSAFE-NEXT: fmul z0.d, z0.d, z1.d -; CHECK-UNSAFE-NEXT: fmul z1.d, z3.d, z2.d -; CHECK-UNSAFE-NEXT: fmul z0.d, z1.d, z0.d -; CHECK-UNSAFE-NEXT: ret +; CHECK-LABEL: reassociate_muls_nxv2f64: +; CHECK: // %bb.0: +; CHECK-NEXT: fmul z0.d, z0.d, z1.d +; CHECK-NEXT: fmul z0.d, z2.d, z0.d +; CHECK-NEXT: fmul z0.d, z3.d, z0.d +; CHECK-NEXT: ret %t0 = fmul reassoc <vscale x 2 x double> %x0, %x1 %t1 = fmul reassoc <vscale x 2 x double> %x2, %t0 %t2 = fmul reassoc <vscale x 2 x double> %x3, %t1 ret <vscale x 2 x double> %t2 } +define <vscale x 2 x double> @reassociate_muls_nxv2f64_nsz(<vscale x 2 x double> %x0, <vscale x 2 x double> %x1, <vscale x 2 x double> %x2, <vscale x 2 x double> %x3) { +; CHECK-LABEL: reassociate_muls_nxv2f64_nsz: +; CHECK: // %bb.0: +; CHECK-NEXT: fmul z0.d, z0.d, z1.d +; CHECK-NEXT: fmul z1.d, z3.d, z2.d +; CHECK-NEXT: fmul z0.d, z1.d, z0.d +; CHECK-NEXT: ret + %t0 = fmul reassoc nsz <vscale x 2 x double> %x0, %x1 + %t1 = fmul reassoc nsz <vscale x 2 x double> %x2, %t0 + %t2 = fmul reassoc nsz <vscale x 2 x double> %x3, %t1 + ret <vscale x 2 x double> %t2 +} + ; Verify that scalable vector integer arithmetic operations are reassociated. define <vscale x 16 x i8> @reassociate_muls_nxv16i8(<vscale x 16 x i8> %x0, <vscale x 16 x i8> %x1, <vscale x 16 x i8> %x2, <vscale x 16 x i8> %x3) { @@ -753,55 +869,30 @@ define <vscale x 8 x i16> @reassociate_ors_nxv8i16(<vscale x 8 x i16> %x0, <vsca declare double @bar() define double @reassociate_adds_from_calls() { -; CHECK-STD-LABEL: reassociate_adds_from_calls: -; CHECK-STD: // %bb.0: -; CHECK-STD-NEXT: str d10, [sp, #-32]! // 8-byte Folded Spill -; CHECK-STD-NEXT: stp d9, d8, [sp, #8] // 16-byte Folded Spill -; CHECK-STD-NEXT: str x30, [sp, #24] // 8-byte Folded Spill -; CHECK-STD-NEXT: .cfi_def_cfa_offset 32 -; CHECK-STD-NEXT: .cfi_offset w30, -8 -; CHECK-STD-NEXT: .cfi_offset b8, -16 -; CHECK-STD-NEXT: .cfi_offset b9, -24 -; CHECK-STD-NEXT: .cfi_offset b10, -32 -; CHECK-STD-NEXT: bl bar -; CHECK-STD-NEXT: fmov d8, d0 -; CHECK-STD-NEXT: bl bar -; CHECK-STD-NEXT: fmov d9, d0 -; CHECK-STD-NEXT: bl bar -; CHECK-STD-NEXT: fmov d10, d0 -; CHECK-STD-NEXT: bl bar -; CHECK-STD-NEXT: fadd d1, d8, d9 -; CHECK-STD-NEXT: ldp d9, d8, [sp, #8] // 16-byte Folded Reload -; CHECK-STD-NEXT: ldr x30, [sp, #24] // 8-byte Folded Reload -; CHECK-STD-NEXT: fadd d1, d1, d10 -; CHECK-STD-NEXT: fadd d0, d1, d0 -; CHECK-STD-NEXT: ldr d10, [sp], #32 // 8-byte Folded Reload -; CHECK-STD-NEXT: ret -; -; CHECK-UNSAFE-LABEL: reassociate_adds_from_calls: -; CHECK-UNSAFE: // %bb.0: -; CHECK-UNSAFE-NEXT: str d10, [sp, #-32]! // 8-byte Folded Spill -; CHECK-UNSAFE-NEXT: stp d9, d8, [sp, #8] // 16-byte Folded Spill -; CHECK-UNSAFE-NEXT: str x30, [sp, #24] // 8-byte Folded Spill -; CHECK-UNSAFE-NEXT: .cfi_def_cfa_offset 32 -; CHECK-UNSAFE-NEXT: .cfi_offset w30, -8 -; CHECK-UNSAFE-NEXT: .cfi_offset b8, -16 -; CHECK-UNSAFE-NEXT: .cfi_offset b9, -24 -; CHECK-UNSAFE-NEXT: .cfi_offset b10, -32 -; CHECK-UNSAFE-NEXT: bl bar -; CHECK-UNSAFE-NEXT: fmov d8, d0 -; CHECK-UNSAFE-NEXT: bl bar -; CHECK-UNSAFE-NEXT: fmov d9, d0 -; CHECK-UNSAFE-NEXT: bl bar -; CHECK-UNSAFE-NEXT: fmov d10, d0 -; CHECK-UNSAFE-NEXT: bl bar -; CHECK-UNSAFE-NEXT: fadd d1, d8, d9 -; CHECK-UNSAFE-NEXT: ldp d9, d8, [sp, #8] // 16-byte Folded Reload -; CHECK-UNSAFE-NEXT: ldr x30, [sp, #24] // 8-byte Folded Reload -; CHECK-UNSAFE-NEXT: fadd d0, d10, d0 -; CHECK-UNSAFE-NEXT: fadd d0, d1, d0 -; CHECK-UNSAFE-NEXT: ldr d10, [sp], #32 // 8-byte Folded Reload -; CHECK-UNSAFE-NEXT: ret +; CHECK-LABEL: reassociate_adds_from_calls: +; CHECK: // %bb.0: +; CHECK-NEXT: str d10, [sp, #-32]! // 8-byte Folded Spill +; CHECK-NEXT: stp d9, d8, [sp, #8] // 16-byte Folded Spill +; CHECK-NEXT: str x30, [sp, #24] // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 32 +; CHECK-NEXT: .cfi_offset w30, -8 +; CHECK-NEXT: .cfi_offset b8, -16 +; CHECK-NEXT: .cfi_offset b9, -24 +; CHECK-NEXT: .cfi_offset b10, -32 +; CHECK-NEXT: bl bar +; CHECK-NEXT: fmov d8, d0 +; CHECK-NEXT: bl bar +; CHECK-NEXT: fmov d9, d0 +; CHECK-NEXT: bl bar +; CHECK-NEXT: fmov d10, d0 +; CHECK-NEXT: bl bar +; CHECK-NEXT: fadd d1, d8, d9 +; CHECK-NEXT: ldp d9, d8, [sp, #8] // 16-byte Folded Reload +; CHECK-NEXT: ldr x30, [sp, #24] // 8-byte Folded Reload +; CHECK-NEXT: fadd d1, d1, d10 +; CHECK-NEXT: fadd d0, d1, d0 +; CHECK-NEXT: ldr d10, [sp], #32 // 8-byte Folded Reload +; CHECK-NEXT: ret %x0 = call double @bar() %x1 = call double @bar() %x2 = call double @bar() @@ -812,6 +903,41 @@ define double @reassociate_adds_from_calls() { ret double %t2 } +define double @reassociate_adds_from_calls_reassoc() { +; CHECK-LABEL: reassociate_adds_from_calls_reassoc: +; CHECK: // %bb.0: +; CHECK-NEXT: str d10, [sp, #-32]! // 8-byte Folded Spill +; CHECK-NEXT: stp d9, d8, [sp, #8] // 16-byte Folded Spill +; CHECK-NEXT: str x30, [sp, #24] // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 32 +; CHECK-NEXT: .cfi_offset w30, -8 +; CHECK-NEXT: .cfi_offset b8, -16 +; CHECK-NEXT: .cfi_offset b9, -24 +; CHECK-NEXT: .cfi_offset b10, -32 +; CHECK-NEXT: bl bar +; CHECK-NEXT: fmov d8, d0 +; CHECK-NEXT: bl bar +; CHECK-NEXT: fmov d9, d0 +; CHECK-NEXT: bl bar +; CHECK-NEXT: fmov d10, d0 +; CHECK-NEXT: bl bar +; CHECK-NEXT: fadd d1, d8, d9 +; CHECK-NEXT: ldp d9, d8, [sp, #8] // 16-byte Folded Reload +; CHECK-NEXT: ldr x30, [sp, #24] // 8-byte Folded Reload +; CHECK-NEXT: fadd d0, d10, d0 +; CHECK-NEXT: fadd d0, d1, d0 +; CHECK-NEXT: ldr d10, [sp], #32 // 8-byte Folded Reload +; CHECK-NEXT: ret + %x0 = call reassoc nsz double @bar() + %x1 = call reassoc nsz double @bar() + %x2 = call reassoc nsz double @bar() + %x3 = call reassoc nsz double @bar() + %t0 = fadd reassoc nsz double %x0, %x1 + %t1 = fadd reassoc nsz double %t0, %x2 + %t2 = fadd reassoc nsz double %t1, %x3 + ret double %t2 +} + define double @already_reassociated() { ; CHECK-LABEL: already_reassociated: ; CHECK: // %bb.0: @@ -846,3 +972,38 @@ define double @already_reassociated() { %t2 = fadd double %t0, %t1 ret double %t2 } + +define double @already_reassociated_reassoc() { +; CHECK-LABEL: already_reassociated_reassoc: +; CHECK: // %bb.0: +; CHECK-NEXT: str d10, [sp, #-32]! // 8-byte Folded Spill +; CHECK-NEXT: stp d9, d8, [sp, #8] // 16-byte Folded Spill +; CHECK-NEXT: str x30, [sp, #24] // 8-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 32 +; CHECK-NEXT: .cfi_offset w30, -8 +; CHECK-NEXT: .cfi_offset b8, -16 +; CHECK-NEXT: .cfi_offset b9, -24 +; CHECK-NEXT: .cfi_offset b10, -32 +; CHECK-NEXT: bl bar +; CHECK-NEXT: fmov d8, d0 +; CHECK-NEXT: bl bar +; CHECK-NEXT: fmov d9, d0 +; CHECK-NEXT: bl bar +; CHECK-NEXT: fmov d10, d0 +; CHECK-NEXT: bl bar +; CHECK-NEXT: fadd d1, d8, d9 +; CHECK-NEXT: ldp d9, d8, [sp, #8] // 16-byte Folded Reload +; CHECK-NEXT: ldr x30, [sp, #24] // 8-byte Folded Reload +; CHECK-NEXT: fadd d0, d10, d0 +; CHECK-NEXT: fadd d0, d1, d0 +; CHECK-NEXT: ldr d10, [sp], #32 // 8-byte Folded Reload +; CHECK-NEXT: ret + %x0 = call reassoc nsz double @bar() + %x1 = call reassoc nsz double @bar() + %x2 = call reassoc nsz double @bar() + %x3 = call reassoc nsz double @bar() + %t0 = fadd reassoc nsz double %x0, %x1 + %t1 = fadd reassoc nsz double %x2, %x3 + %t2 = fadd reassoc nsz double %t0, %t1 + ret double %t2 +} diff --git a/llvm/test/CodeGen/AArch64/machine-combiner.mir b/llvm/test/CodeGen/AArch64/machine-combiner.mir index b967aaa..a0e1280 100644 --- a/llvm/test/CodeGen/AArch64/machine-combiner.mir +++ b/llvm/test/CodeGen/AArch64/machine-combiner.mir @@ -1,4 +1,4 @@ -# RUN: llc -mtriple=aarch64-none-linux-gnu -mcpu=cortex-a57 -enable-unsafe-fp-math \ +# RUN: llc -mtriple=aarch64-none-linux-gnu -mcpu=cortex-a57 \ # RUN: -run-pass machine-combiner -machine-combiner-inc-threshold=0 \ # RUN: -machine-combiner-verify-pattern-order=true -verify-machineinstrs -o - %s | FileCheck %s --- @@ -36,8 +36,8 @@ body: | %6 = ADDWrr %3, killed %5 %7 = SCVTFUWDri killed %6, implicit $fpcr ; CHECK: FMADDDrrr %7, %7, %0, implicit $fpcr - %8 = FMULDrr %7, %7, implicit $fpcr - %9 = FADDDrr %0, killed %8, implicit $fpcr + %8 = contract FMULDrr %7, %7, implicit $fpcr + %9 = contract FADDDrr %0, killed %8, implicit $fpcr $d0 = COPY %9 RET_ReallyLR implicit $d0 diff --git a/llvm/test/CodeGen/AArch64/sched-past-vector-ldst.ll b/llvm/test/CodeGen/AArch64/sched-past-vector-ldst.ll index cd53833..fc5012c 100644 --- a/llvm/test/CodeGen/AArch64/sched-past-vector-ldst.ll +++ b/llvm/test/CodeGen/AArch64/sched-past-vector-ldst.ll @@ -23,21 +23,21 @@ entry: %scevgep = getelementptr %Struct, ptr %this, i64 0, i32 2, i64 8, i32 0 %vec1 = tail call { <4 x float>, <4 x float> } @llvm.aarch64.neon.ld2.v4f32.p0(ptr %scevgep) %ev1 = extractvalue { <4 x float>, <4 x float> } %vec1, 1 - %fm1 = fmul <4 x float> %f, %ev1 - %av1 = fadd <4 x float> %f, %fm1 + %fm1 = fmul contract <4 x float> %f, %ev1 + %av1 = fadd contract <4 x float> %f, %fm1 %ev2 = extractvalue { <4 x float>, <4 x float> } %vec1, 0 - %fm2 = fmul <4 x float> %f, %ev2 - %av2 = fadd <4 x float> %f, %fm2 + %fm2 = fmul contract <4 x float> %f, %ev2 + %av2 = fadd contract <4 x float> %f, %fm2 %scevgep2 = getelementptr %Struct, ptr %this, i64 0, i32 3, i64 8, i32 0 tail call void @llvm.aarch64.neon.st2.v4f32.p0(<4 x float> %av2, <4 x float> %av1, ptr %scevgep2) %scevgep3 = getelementptr %Struct, ptr %this, i64 0, i32 2, i64 12, i32 0 %vec2 = tail call { <4 x float>, <4 x float> } @llvm.aarch64.neon.ld2.v4f32.p0(ptr %scevgep3) %ev3 = extractvalue { <4 x float>, <4 x float> } %vec2, 1 - %fm3 = fmul <4 x float> %f, %ev3 - %av3 = fadd <4 x float> %f, %fm3 + %fm3 = fmul contract <4 x float> %f, %ev3 + %av3 = fadd contract <4 x float> %f, %fm3 %ev4 = extractvalue { <4 x float>, <4 x float> } %vec2, 0 - %fm4 = fmul <4 x float> %f, %ev4 - %av4 = fadd <4 x float> %f, %fm4 + %fm4 = fmul contract <4 x float> %f, %ev4 + %av4 = fadd contract <4 x float> %f, %fm4 %scevgep4 = getelementptr %Struct, ptr %this, i64 0, i32 3, i64 12, i32 0 tail call void @llvm.aarch64.neon.st2.v4f32.p0(<4 x float> %av4, <4 x float> %av3, ptr %scevgep4) ret void @@ -49,6 +49,6 @@ declare { <4 x float>, <4 x float> } @llvm.aarch64.neon.ld2.v4f32.p0(ptr) #2 ; Function Attrs: nounwind declare void @llvm.aarch64.neon.st2.v4f32.p0(<4 x float>, <4 x float>, ptr nocapture) #1 -attributes #0 = { nounwind "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="true" "no-nans-fp-math"="true" "stack-protector-buffer-size"="8" "unsafe-fp-math"="true" "use-soft-float"="false" } +attributes #0 = { nounwind "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="true" "no-nans-fp-math"="true" "stack-protector-buffer-size"="8" "use-soft-float"="false" } attributes #1 = { nounwind } attributes #2 = { nounwind readonly } diff --git a/llvm/test/CodeGen/AArch64/sqrt-fastmath.ll b/llvm/test/CodeGen/AArch64/sqrt-fastmath.ll index f73b4bd..e29993d 100644 --- a/llvm/test/CodeGen/AArch64/sqrt-fastmath.ll +++ b/llvm/test/CodeGen/AArch64/sqrt-fastmath.ll @@ -2,15 +2,15 @@ ; RUN: llc < %s -mtriple=aarch64-unknown-linux-gnu -mattr=+neon,-use-reciprocal-square-root | FileCheck %s --check-prefix=FAULT ; RUN: llc < %s -mtriple=aarch64-unknown-linux-gnu -mattr=+neon,+use-reciprocal-square-root | FileCheck %s -declare float @llvm.sqrt.f32(float) #0 -declare <2 x float> @llvm.sqrt.v2f32(<2 x float>) #0 -declare <4 x float> @llvm.sqrt.v4f32(<4 x float>) #0 -declare <8 x float> @llvm.sqrt.v8f32(<8 x float>) #0 -declare double @llvm.sqrt.f64(double) #0 -declare <2 x double> @llvm.sqrt.v2f64(<2 x double>) #0 -declare <4 x double> @llvm.sqrt.v4f64(<4 x double>) #0 +declare float @llvm.sqrt.f32(float) +declare <2 x float> @llvm.sqrt.v2f32(<2 x float>) +declare <4 x float> @llvm.sqrt.v4f32(<4 x float>) +declare <8 x float> @llvm.sqrt.v8f32(<8 x float>) +declare double @llvm.sqrt.f64(double) +declare <2 x double> @llvm.sqrt.v2f64(<2 x double>) +declare <4 x double> @llvm.sqrt.v4f64(<4 x double>) -define float @fsqrt(float %a) #0 { +define float @fsqrt(float %a) { ; FAULT-LABEL: fsqrt: ; FAULT: // %bb.0: ; FAULT-NEXT: fsqrt s0, s0 @@ -33,7 +33,7 @@ define float @fsqrt(float %a) #0 { ret float %1 } -define float @fsqrt_ieee_denorms(float %a) #1 { +define float @fsqrt_ieee_denorms(float %a) #0 { ; FAULT-LABEL: fsqrt_ieee_denorms: ; FAULT: // %bb.0: ; FAULT-NEXT: fsqrt s0, s0 @@ -56,7 +56,7 @@ define float @fsqrt_ieee_denorms(float %a) #1 { ret float %1 } -define <2 x float> @f2sqrt(<2 x float> %a) #0 { +define <2 x float> @f2sqrt(<2 x float> %a) { ; FAULT-LABEL: f2sqrt: ; FAULT: // %bb.0: ; FAULT-NEXT: fsqrt v0.2s, v0.2s @@ -79,7 +79,7 @@ define <2 x float> @f2sqrt(<2 x float> %a) #0 { ret <2 x float> %1 } -define <4 x float> @f4sqrt(<4 x float> %a) #0 { +define <4 x float> @f4sqrt(<4 x float> %a) { ; FAULT-LABEL: f4sqrt: ; FAULT: // %bb.0: ; FAULT-NEXT: fsqrt v0.4s, v0.4s @@ -102,7 +102,7 @@ define <4 x float> @f4sqrt(<4 x float> %a) #0 { ret <4 x float> %1 } -define <8 x float> @f8sqrt(<8 x float> %a) #0 { +define <8 x float> @f8sqrt(<8 x float> %a) { ; FAULT-LABEL: f8sqrt: ; FAULT: // %bb.0: ; FAULT-NEXT: fsqrt v0.4s, v0.4s @@ -136,7 +136,7 @@ define <8 x float> @f8sqrt(<8 x float> %a) #0 { ret <8 x float> %1 } -define double @dsqrt(double %a) #0 { +define double @dsqrt(double %a) { ; FAULT-LABEL: dsqrt: ; FAULT: // %bb.0: ; FAULT-NEXT: fsqrt d0, d0 @@ -162,7 +162,7 @@ define double @dsqrt(double %a) #0 { ret double %1 } -define double @dsqrt_ieee_denorms(double %a) #1 { +define double @dsqrt_ieee_denorms(double %a) #0 { ; FAULT-LABEL: dsqrt_ieee_denorms: ; FAULT: // %bb.0: ; FAULT-NEXT: fsqrt d0, d0 @@ -188,7 +188,7 @@ define double @dsqrt_ieee_denorms(double %a) #1 { ret double %1 } -define <2 x double> @d2sqrt(<2 x double> %a) #0 { +define <2 x double> @d2sqrt(<2 x double> %a) { ; FAULT-LABEL: d2sqrt: ; FAULT: // %bb.0: ; FAULT-NEXT: fsqrt v0.2d, v0.2d @@ -214,7 +214,7 @@ define <2 x double> @d2sqrt(<2 x double> %a) #0 { ret <2 x double> %1 } -define <4 x double> @d4sqrt(<4 x double> %a) #0 { +define <4 x double> @d4sqrt(<4 x double> %a) { ; FAULT-LABEL: d4sqrt: ; FAULT: // %bb.0: ; FAULT-NEXT: fsqrt v0.2d, v0.2d @@ -254,7 +254,7 @@ define <4 x double> @d4sqrt(<4 x double> %a) #0 { ret <4 x double> %1 } -define float @frsqrt(float %a) #0 { +define float @frsqrt(float %a) { ; FAULT-LABEL: frsqrt: ; FAULT: // %bb.0: ; FAULT-NEXT: fsqrt s0, s0 @@ -277,7 +277,7 @@ define float @frsqrt(float %a) #0 { ret float %2 } -define <2 x float> @f2rsqrt(<2 x float> %a) #0 { +define <2 x float> @f2rsqrt(<2 x float> %a) { ; FAULT-LABEL: f2rsqrt: ; FAULT: // %bb.0: ; FAULT-NEXT: fsqrt v0.2s, v0.2s @@ -300,7 +300,7 @@ define <2 x float> @f2rsqrt(<2 x float> %a) #0 { ret <2 x float> %2 } -define <4 x float> @f4rsqrt(<4 x float> %a) #0 { +define <4 x float> @f4rsqrt(<4 x float> %a) { ; FAULT-LABEL: f4rsqrt: ; FAULT: // %bb.0: ; FAULT-NEXT: fsqrt v0.4s, v0.4s @@ -323,7 +323,7 @@ define <4 x float> @f4rsqrt(<4 x float> %a) #0 { ret <4 x float> %2 } -define <8 x float> @f8rsqrt(<8 x float> %a) #0 { +define <8 x float> @f8rsqrt(<8 x float> %a) { ; FAULT-LABEL: f8rsqrt: ; FAULT: // %bb.0: ; FAULT-NEXT: fsqrt v0.4s, v0.4s @@ -355,7 +355,7 @@ define <8 x float> @f8rsqrt(<8 x float> %a) #0 { ret <8 x float> %2 } -define double @drsqrt(double %a) #0 { +define double @drsqrt(double %a) { ; FAULT-LABEL: drsqrt: ; FAULT: // %bb.0: ; FAULT-NEXT: fsqrt d0, d0 @@ -381,7 +381,7 @@ define double @drsqrt(double %a) #0 { ret double %2 } -define <2 x double> @d2rsqrt(<2 x double> %a) #0 { +define <2 x double> @d2rsqrt(<2 x double> %a) { ; FAULT-LABEL: d2rsqrt: ; FAULT: // %bb.0: ; FAULT-NEXT: fsqrt v0.2d, v0.2d @@ -462,8 +462,8 @@ define double @sqrt_fdiv_common_operand(double %x) nounwind { ; CHECK-NEXT: fmul d1, d1, d2 ; CHECK-NEXT: fmul d2, d1, d1 ; CHECK-NEXT: frsqrts d2, d0, d2 -; CHECK-NEXT: fmul d1, d1, d2 ; CHECK-NEXT: fmul d0, d0, d1 +; CHECK-NEXT: fmul d0, d0, d2 ; CHECK-NEXT: ret %sqrt = call fast double @llvm.sqrt.f64(double %x) %r = fdiv fast double %x, %sqrt @@ -487,8 +487,8 @@ define <2 x double> @sqrt_fdiv_common_operand_vec(<2 x double> %x) nounwind { ; CHECK-NEXT: fmul v1.2d, v1.2d, v2.2d ; CHECK-NEXT: fmul v2.2d, v1.2d, v1.2d ; CHECK-NEXT: frsqrts v2.2d, v0.2d, v2.2d -; CHECK-NEXT: fmul v1.2d, v1.2d, v2.2d ; CHECK-NEXT: fmul v0.2d, v0.2d, v1.2d +; CHECK-NEXT: fmul v0.2d, v0.2d, v2.2d ; CHECK-NEXT: ret %sqrt = call <2 x double> @llvm.sqrt.v2f64(<2 x double> %x) %r = fdiv arcp nsz reassoc <2 x double> %x, %sqrt @@ -513,9 +513,9 @@ define double @sqrt_fdiv_common_operand_extra_use(double %x, ptr %p) nounwind { ; CHECK-NEXT: frsqrts d2, d0, d2 ; CHECK-NEXT: fmul d1, d1, d2 ; CHECK-NEXT: fmul d2, d1, d1 +; CHECK-NEXT: fmul d1, d0, d1 ; CHECK-NEXT: frsqrts d2, d0, d2 ; CHECK-NEXT: fmul d1, d1, d2 -; CHECK-NEXT: fmul d1, d0, d1 ; CHECK-NEXT: fcsel d2, d0, d1, eq ; CHECK-NEXT: fmov d0, d1 ; CHECK-NEXT: str d2, [x0] @@ -671,5 +671,4 @@ define double @sqrt_simplify_before_recip_4_uses(double %x, ptr %p1, ptr %p2, pt ret double %sqrt_fast } -attributes #0 = { "unsafe-fp-math"="true" } -attributes #1 = { "unsafe-fp-math"="true" "denormal-fp-math"="ieee" } +attributes #0 = { "denormal-fp-math"="ieee" } diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fma-add-mul-post-legalize.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fma-add-mul-post-legalize.mir index 789385d..b770d43 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fma-add-mul-post-legalize.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fma-add-mul-post-legalize.mir @@ -1,12 +1,8 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py # RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx900 -run-pass=amdgpu-postlegalizer-combiner %s -o - | FileCheck -check-prefix=GFX9 %s -# RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx900 -run-pass=amdgpu-postlegalizer-combiner -fp-contract=fast %s -o - | FileCheck -check-prefix=GFX9-CONTRACT %s # RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx900 -run-pass=amdgpu-postlegalizer-combiner --denormal-fp-math=preserve-sign %s -o - | FileCheck -check-prefix=GFX9-DENORM %s -# RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx900 -run-pass=amdgpu-postlegalizer-combiner -enable-unsafe-fp-math %s -o - | FileCheck -check-prefix=GFX9-UNSAFE %s # RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1010 -run-pass=amdgpu-postlegalizer-combiner %s -o - | FileCheck -check-prefix=GFX10 %s -# RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1010 -run-pass=amdgpu-postlegalizer-combiner -fp-contract=fast %s -o - | FileCheck -check-prefix=GFX10-CONTRACT %s # RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1010 -run-pass=amdgpu-postlegalizer-combiner --denormal-fp-math=preserve-sign %s -o - | FileCheck -check-prefix=GFX10-DENORM %s -# RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1010 -run-pass=amdgpu-postlegalizer-combiner -enable-unsafe-fp-math %s -o - | FileCheck -check-prefix=GFX10-UNSAFE %s --- name: test_f32_add_mul @@ -24,15 +20,7 @@ body: | ; GFX9-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL]], [[COPY2]] ; GFX9-NEXT: $vgpr0 = COPY [[FADD]](s32) ; GFX9-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 - ; GFX9-CONTRACT-LABEL: name: test_f32_add_mul - ; GFX9-CONTRACT: liveins: $vgpr0, $vgpr1, $vgpr2 - ; GFX9-CONTRACT-NEXT: {{ $}} - ; GFX9-CONTRACT-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-CONTRACT-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-CONTRACT-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-CONTRACT-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[COPY]], [[COPY1]], [[COPY2]] - ; GFX9-CONTRACT-NEXT: $vgpr0 = COPY [[FMA]](s32) - ; GFX9-CONTRACT-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 + ; ; GFX9-DENORM-LABEL: name: test_f32_add_mul ; GFX9-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX9-DENORM-NEXT: {{ $}} @@ -43,15 +31,7 @@ body: | ; GFX9-DENORM-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL]], [[COPY2]] ; GFX9-DENORM-NEXT: $vgpr0 = COPY [[FADD]](s32) ; GFX9-DENORM-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 - ; GFX9-UNSAFE-LABEL: name: test_f32_add_mul - ; GFX9-UNSAFE: liveins: $vgpr0, $vgpr1, $vgpr2 - ; GFX9-UNSAFE-NEXT: {{ $}} - ; GFX9-UNSAFE-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-UNSAFE-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-UNSAFE-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-UNSAFE-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[COPY]], [[COPY1]], [[COPY2]] - ; GFX9-UNSAFE-NEXT: $vgpr0 = COPY [[FMA]](s32) - ; GFX9-UNSAFE-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 + ; ; GFX10-LABEL: name: test_f32_add_mul ; GFX10: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX10-NEXT: {{ $}} @@ -62,15 +42,7 @@ body: | ; GFX10-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL]], [[COPY2]] ; GFX10-NEXT: $vgpr0 = COPY [[FADD]](s32) ; GFX10-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 - ; GFX10-CONTRACT-LABEL: name: test_f32_add_mul - ; GFX10-CONTRACT: liveins: $vgpr0, $vgpr1, $vgpr2 - ; GFX10-CONTRACT-NEXT: {{ $}} - ; GFX10-CONTRACT-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-CONTRACT-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-CONTRACT-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-CONTRACT-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[COPY]], [[COPY1]], [[COPY2]] - ; GFX10-CONTRACT-NEXT: $vgpr0 = COPY [[FMA]](s32) - ; GFX10-CONTRACT-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 + ; ; GFX10-DENORM-LABEL: name: test_f32_add_mul ; GFX10-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX10-DENORM-NEXT: {{ $}} @@ -81,15 +53,6 @@ body: | ; GFX10-DENORM-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL]], [[COPY2]] ; GFX10-DENORM-NEXT: $vgpr0 = COPY [[FADD]](s32) ; GFX10-DENORM-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 - ; GFX10-UNSAFE-LABEL: name: test_f32_add_mul - ; GFX10-UNSAFE: liveins: $vgpr0, $vgpr1, $vgpr2 - ; GFX10-UNSAFE-NEXT: {{ $}} - ; GFX10-UNSAFE-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-UNSAFE-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-UNSAFE-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-UNSAFE-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[COPY]], [[COPY1]], [[COPY2]] - ; GFX10-UNSAFE-NEXT: $vgpr0 = COPY [[FMA]](s32) - ; GFX10-UNSAFE-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s32) = COPY $vgpr2 @@ -100,6 +63,60 @@ body: | ... --- +name: test_f32_add_mul_contract +body: | + bb.1.entry: + liveins: $vgpr0, $vgpr1, $vgpr2 + + ; GFX9-LABEL: name: test_f32_add_mul_contract + ; GFX9: liveins: $vgpr0, $vgpr1, $vgpr2 + ; GFX9-NEXT: {{ $}} + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX9-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[COPY]], [[COPY1]], [[COPY2]] + ; GFX9-NEXT: $vgpr0 = COPY [[FMA]](s32) + ; GFX9-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 + ; + ; GFX9-DENORM-LABEL: name: test_f32_add_mul_contract + ; GFX9-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2 + ; GFX9-DENORM-NEXT: {{ $}} + ; GFX9-DENORM-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX9-DENORM-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9-DENORM-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX9-DENORM-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[COPY]], [[COPY1]], [[COPY2]] + ; GFX9-DENORM-NEXT: $vgpr0 = COPY [[FMA]](s32) + ; GFX9-DENORM-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 + ; + ; GFX10-LABEL: name: test_f32_add_mul_contract + ; GFX10: liveins: $vgpr0, $vgpr1, $vgpr2 + ; GFX10-NEXT: {{ $}} + ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX10-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[COPY]], [[COPY1]], [[COPY2]] + ; GFX10-NEXT: $vgpr0 = COPY [[FMA]](s32) + ; GFX10-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 + ; + ; GFX10-DENORM-LABEL: name: test_f32_add_mul_contract + ; GFX10-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2 + ; GFX10-DENORM-NEXT: {{ $}} + ; GFX10-DENORM-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX10-DENORM-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX10-DENORM-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX10-DENORM-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[COPY]], [[COPY1]], [[COPY2]] + ; GFX10-DENORM-NEXT: $vgpr0 = COPY [[FMA]](s32) + ; GFX10-DENORM-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 + %0:_(s32) = COPY $vgpr0 + %1:_(s32) = COPY $vgpr1 + %2:_(s32) = COPY $vgpr2 + %4:_(s32) = contract G_FMUL %0, %1 + %5:_(s32) = contract G_FADD %4, %2 + $vgpr0 = COPY %5(s32) + S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 +... + +--- name: test_f32_add_mul_rhs body: | bb.1.entry: @@ -115,15 +132,7 @@ body: | ; GFX9-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[COPY2]], [[FMUL]] ; GFX9-NEXT: $vgpr0 = COPY [[FADD]](s32) ; GFX9-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 - ; GFX9-CONTRACT-LABEL: name: test_f32_add_mul_rhs - ; GFX9-CONTRACT: liveins: $vgpr0, $vgpr1, $vgpr2 - ; GFX9-CONTRACT-NEXT: {{ $}} - ; GFX9-CONTRACT-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-CONTRACT-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-CONTRACT-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-CONTRACT-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[COPY]], [[COPY1]], [[COPY2]] - ; GFX9-CONTRACT-NEXT: $vgpr0 = COPY [[FMA]](s32) - ; GFX9-CONTRACT-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 + ; ; GFX9-DENORM-LABEL: name: test_f32_add_mul_rhs ; GFX9-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX9-DENORM-NEXT: {{ $}} @@ -134,15 +143,7 @@ body: | ; GFX9-DENORM-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[COPY2]], [[FMUL]] ; GFX9-DENORM-NEXT: $vgpr0 = COPY [[FADD]](s32) ; GFX9-DENORM-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 - ; GFX9-UNSAFE-LABEL: name: test_f32_add_mul_rhs - ; GFX9-UNSAFE: liveins: $vgpr0, $vgpr1, $vgpr2 - ; GFX9-UNSAFE-NEXT: {{ $}} - ; GFX9-UNSAFE-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-UNSAFE-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-UNSAFE-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-UNSAFE-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[COPY]], [[COPY1]], [[COPY2]] - ; GFX9-UNSAFE-NEXT: $vgpr0 = COPY [[FMA]](s32) - ; GFX9-UNSAFE-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 + ; ; GFX10-LABEL: name: test_f32_add_mul_rhs ; GFX10: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX10-NEXT: {{ $}} @@ -153,15 +154,7 @@ body: | ; GFX10-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[COPY2]], [[FMUL]] ; GFX10-NEXT: $vgpr0 = COPY [[FADD]](s32) ; GFX10-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 - ; GFX10-CONTRACT-LABEL: name: test_f32_add_mul_rhs - ; GFX10-CONTRACT: liveins: $vgpr0, $vgpr1, $vgpr2 - ; GFX10-CONTRACT-NEXT: {{ $}} - ; GFX10-CONTRACT-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-CONTRACT-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-CONTRACT-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-CONTRACT-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[COPY]], [[COPY1]], [[COPY2]] - ; GFX10-CONTRACT-NEXT: $vgpr0 = COPY [[FMA]](s32) - ; GFX10-CONTRACT-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 + ; ; GFX10-DENORM-LABEL: name: test_f32_add_mul_rhs ; GFX10-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX10-DENORM-NEXT: {{ $}} @@ -172,15 +165,6 @@ body: | ; GFX10-DENORM-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[COPY2]], [[FMUL]] ; GFX10-DENORM-NEXT: $vgpr0 = COPY [[FADD]](s32) ; GFX10-DENORM-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 - ; GFX10-UNSAFE-LABEL: name: test_f32_add_mul_rhs - ; GFX10-UNSAFE: liveins: $vgpr0, $vgpr1, $vgpr2 - ; GFX10-UNSAFE-NEXT: {{ $}} - ; GFX10-UNSAFE-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-UNSAFE-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-UNSAFE-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-UNSAFE-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[COPY]], [[COPY1]], [[COPY2]] - ; GFX10-UNSAFE-NEXT: $vgpr0 = COPY [[FMA]](s32) - ; GFX10-UNSAFE-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s32) = COPY $vgpr2 @@ -191,6 +175,60 @@ body: | ... --- +name: test_f32_add_mul_rhs_contract +body: | + bb.1.entry: + liveins: $vgpr0, $vgpr1, $vgpr2 + + ; GFX9-LABEL: name: test_f32_add_mul_rhs_contract + ; GFX9: liveins: $vgpr0, $vgpr1, $vgpr2 + ; GFX9-NEXT: {{ $}} + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX9-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[COPY]], [[COPY1]], [[COPY2]] + ; GFX9-NEXT: $vgpr0 = COPY [[FMA]](s32) + ; GFX9-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 + ; + ; GFX9-DENORM-LABEL: name: test_f32_add_mul_rhs_contract + ; GFX9-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2 + ; GFX9-DENORM-NEXT: {{ $}} + ; GFX9-DENORM-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX9-DENORM-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9-DENORM-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX9-DENORM-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[COPY]], [[COPY1]], [[COPY2]] + ; GFX9-DENORM-NEXT: $vgpr0 = COPY [[FMA]](s32) + ; GFX9-DENORM-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 + ; + ; GFX10-LABEL: name: test_f32_add_mul_rhs_contract + ; GFX10: liveins: $vgpr0, $vgpr1, $vgpr2 + ; GFX10-NEXT: {{ $}} + ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX10-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[COPY]], [[COPY1]], [[COPY2]] + ; GFX10-NEXT: $vgpr0 = COPY [[FMA]](s32) + ; GFX10-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 + ; + ; GFX10-DENORM-LABEL: name: test_f32_add_mul_rhs_contract + ; GFX10-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2 + ; GFX10-DENORM-NEXT: {{ $}} + ; GFX10-DENORM-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX10-DENORM-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX10-DENORM-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX10-DENORM-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[COPY]], [[COPY1]], [[COPY2]] + ; GFX10-DENORM-NEXT: $vgpr0 = COPY [[FMA]](s32) + ; GFX10-DENORM-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 + %0:_(s32) = COPY $vgpr0 + %1:_(s32) = COPY $vgpr1 + %2:_(s32) = COPY $vgpr2 + %4:_(s32) = contract G_FMUL %0, %1 + %5:_(s32) = contract G_FADD %2, %4 + $vgpr0 = COPY %5(s32) + S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 +... + +--- name: test_add_mul_multiple_defs_z body: | bb.1.entry: @@ -209,18 +247,7 @@ body: | ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>) ; GFX9-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL]], [[UV1]] ; GFX9-NEXT: $vgpr0 = COPY [[FADD]](s32) - ; GFX9-CONTRACT-LABEL: name: test_add_mul_multiple_defs_z - ; GFX9-CONTRACT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 - ; GFX9-CONTRACT-NEXT: {{ $}} - ; GFX9-CONTRACT-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-CONTRACT-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-CONTRACT-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-CONTRACT-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX9-CONTRACT-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) - ; GFX9-CONTRACT-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[MV]](p1) :: (load (<2 x s32>), addrspace 1) - ; GFX9-CONTRACT-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>) - ; GFX9-CONTRACT-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[COPY]], [[COPY1]], [[UV1]] - ; GFX9-CONTRACT-NEXT: $vgpr0 = COPY [[FMA]](s32) + ; ; GFX9-DENORM-LABEL: name: test_add_mul_multiple_defs_z ; GFX9-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX9-DENORM-NEXT: {{ $}} @@ -234,18 +261,7 @@ body: | ; GFX9-DENORM-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>) ; GFX9-DENORM-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL]], [[UV1]] ; GFX9-DENORM-NEXT: $vgpr0 = COPY [[FADD]](s32) - ; GFX9-UNSAFE-LABEL: name: test_add_mul_multiple_defs_z - ; GFX9-UNSAFE: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 - ; GFX9-UNSAFE-NEXT: {{ $}} - ; GFX9-UNSAFE-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-UNSAFE-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-UNSAFE-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-UNSAFE-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX9-UNSAFE-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) - ; GFX9-UNSAFE-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[MV]](p1) :: (load (<2 x s32>), addrspace 1) - ; GFX9-UNSAFE-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>) - ; GFX9-UNSAFE-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[COPY]], [[COPY1]], [[UV1]] - ; GFX9-UNSAFE-NEXT: $vgpr0 = COPY [[FMA]](s32) + ; ; GFX10-LABEL: name: test_add_mul_multiple_defs_z ; GFX10: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX10-NEXT: {{ $}} @@ -259,18 +275,7 @@ body: | ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>) ; GFX10-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL]], [[UV1]] ; GFX10-NEXT: $vgpr0 = COPY [[FADD]](s32) - ; GFX10-CONTRACT-LABEL: name: test_add_mul_multiple_defs_z - ; GFX10-CONTRACT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 - ; GFX10-CONTRACT-NEXT: {{ $}} - ; GFX10-CONTRACT-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-CONTRACT-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-CONTRACT-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-CONTRACT-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX10-CONTRACT-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) - ; GFX10-CONTRACT-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[MV]](p1) :: (load (<2 x s32>), addrspace 1) - ; GFX10-CONTRACT-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>) - ; GFX10-CONTRACT-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[COPY]], [[COPY1]], [[UV1]] - ; GFX10-CONTRACT-NEXT: $vgpr0 = COPY [[FMA]](s32) + ; ; GFX10-DENORM-LABEL: name: test_add_mul_multiple_defs_z ; GFX10-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX10-DENORM-NEXT: {{ $}} @@ -284,18 +289,6 @@ body: | ; GFX10-DENORM-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>) ; GFX10-DENORM-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[FMUL]], [[UV1]] ; GFX10-DENORM-NEXT: $vgpr0 = COPY [[FADD]](s32) - ; GFX10-UNSAFE-LABEL: name: test_add_mul_multiple_defs_z - ; GFX10-UNSAFE: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 - ; GFX10-UNSAFE-NEXT: {{ $}} - ; GFX10-UNSAFE-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-UNSAFE-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-UNSAFE-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-UNSAFE-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX10-UNSAFE-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) - ; GFX10-UNSAFE-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[MV]](p1) :: (load (<2 x s32>), addrspace 1) - ; GFX10-UNSAFE-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>) - ; GFX10-UNSAFE-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[COPY]], [[COPY1]], [[UV1]] - ; GFX10-UNSAFE-NEXT: $vgpr0 = COPY [[FMA]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %4:_(s32) = COPY $vgpr2 @@ -310,6 +303,76 @@ body: | ... --- +name: test_add_mul_multiple_defs_z_contract +body: | + bb.1.entry: + liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 + + ; GFX9-LABEL: name: test_add_mul_multiple_defs_z_contract + ; GFX9: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 + ; GFX9-NEXT: {{ $}} + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; GFX9-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) + ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[MV]](p1) :: (load (<2 x s32>), addrspace 1) + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>) + ; GFX9-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[COPY]], [[COPY1]], [[UV1]] + ; GFX9-NEXT: $vgpr0 = COPY [[FMA]](s32) + ; + ; GFX9-DENORM-LABEL: name: test_add_mul_multiple_defs_z_contract + ; GFX9-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 + ; GFX9-DENORM-NEXT: {{ $}} + ; GFX9-DENORM-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX9-DENORM-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9-DENORM-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX9-DENORM-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; GFX9-DENORM-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) + ; GFX9-DENORM-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[MV]](p1) :: (load (<2 x s32>), addrspace 1) + ; GFX9-DENORM-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>) + ; GFX9-DENORM-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[COPY]], [[COPY1]], [[UV1]] + ; GFX9-DENORM-NEXT: $vgpr0 = COPY [[FMA]](s32) + ; + ; GFX10-LABEL: name: test_add_mul_multiple_defs_z_contract + ; GFX10: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 + ; GFX10-NEXT: {{ $}} + ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; GFX10-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) + ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[MV]](p1) :: (load (<2 x s32>), addrspace 1) + ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>) + ; GFX10-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[COPY]], [[COPY1]], [[UV1]] + ; GFX10-NEXT: $vgpr0 = COPY [[FMA]](s32) + ; + ; GFX10-DENORM-LABEL: name: test_add_mul_multiple_defs_z_contract + ; GFX10-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 + ; GFX10-DENORM-NEXT: {{ $}} + ; GFX10-DENORM-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX10-DENORM-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX10-DENORM-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX10-DENORM-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; GFX10-DENORM-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) + ; GFX10-DENORM-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[MV]](p1) :: (load (<2 x s32>), addrspace 1) + ; GFX10-DENORM-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>) + ; GFX10-DENORM-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[COPY]], [[COPY1]], [[UV1]] + ; GFX10-DENORM-NEXT: $vgpr0 = COPY [[FMA]](s32) + %0:_(s32) = COPY $vgpr0 + %1:_(s32) = COPY $vgpr1 + %4:_(s32) = COPY $vgpr2 + %5:_(s32) = COPY $vgpr3 + %2:_(p1) = G_MERGE_VALUES %4(s32), %5(s32) + %6:_(s32) = contract G_FMUL %0, %1 + %7:_(<2 x s32>) = G_LOAD %2(p1) :: (load (<2 x s32>), addrspace 1) + %12:_(s32), %13:_(s32) = G_UNMERGE_VALUES %7(<2 x s32>) + %8:_(s32) = COPY %13(s32) + %10:_(s32) = contract G_FADD %6, %8 + $vgpr0 = COPY %10(s32) +... + +--- name: test_add_mul_rhs_multiple_defs_z body: | bb.1.entry: @@ -328,18 +391,7 @@ body: | ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>) ; GFX9-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[UV1]], [[FMUL]] ; GFX9-NEXT: $vgpr0 = COPY [[FADD]](s32) - ; GFX9-CONTRACT-LABEL: name: test_add_mul_rhs_multiple_defs_z - ; GFX9-CONTRACT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 - ; GFX9-CONTRACT-NEXT: {{ $}} - ; GFX9-CONTRACT-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-CONTRACT-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-CONTRACT-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-CONTRACT-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX9-CONTRACT-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) - ; GFX9-CONTRACT-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[MV]](p1) :: (load (<2 x s32>), addrspace 1) - ; GFX9-CONTRACT-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>) - ; GFX9-CONTRACT-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[COPY]], [[COPY1]], [[UV1]] - ; GFX9-CONTRACT-NEXT: $vgpr0 = COPY [[FMA]](s32) + ; ; GFX9-DENORM-LABEL: name: test_add_mul_rhs_multiple_defs_z ; GFX9-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX9-DENORM-NEXT: {{ $}} @@ -353,18 +405,7 @@ body: | ; GFX9-DENORM-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>) ; GFX9-DENORM-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[UV1]], [[FMUL]] ; GFX9-DENORM-NEXT: $vgpr0 = COPY [[FADD]](s32) - ; GFX9-UNSAFE-LABEL: name: test_add_mul_rhs_multiple_defs_z - ; GFX9-UNSAFE: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 - ; GFX9-UNSAFE-NEXT: {{ $}} - ; GFX9-UNSAFE-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-UNSAFE-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-UNSAFE-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-UNSAFE-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX9-UNSAFE-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) - ; GFX9-UNSAFE-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[MV]](p1) :: (load (<2 x s32>), addrspace 1) - ; GFX9-UNSAFE-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>) - ; GFX9-UNSAFE-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[COPY]], [[COPY1]], [[UV1]] - ; GFX9-UNSAFE-NEXT: $vgpr0 = COPY [[FMA]](s32) + ; ; GFX10-LABEL: name: test_add_mul_rhs_multiple_defs_z ; GFX10: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX10-NEXT: {{ $}} @@ -378,18 +419,7 @@ body: | ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>) ; GFX10-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[UV1]], [[FMUL]] ; GFX10-NEXT: $vgpr0 = COPY [[FADD]](s32) - ; GFX10-CONTRACT-LABEL: name: test_add_mul_rhs_multiple_defs_z - ; GFX10-CONTRACT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 - ; GFX10-CONTRACT-NEXT: {{ $}} - ; GFX10-CONTRACT-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-CONTRACT-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-CONTRACT-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-CONTRACT-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX10-CONTRACT-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) - ; GFX10-CONTRACT-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[MV]](p1) :: (load (<2 x s32>), addrspace 1) - ; GFX10-CONTRACT-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>) - ; GFX10-CONTRACT-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[COPY]], [[COPY1]], [[UV1]] - ; GFX10-CONTRACT-NEXT: $vgpr0 = COPY [[FMA]](s32) + ; ; GFX10-DENORM-LABEL: name: test_add_mul_rhs_multiple_defs_z ; GFX10-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 ; GFX10-DENORM-NEXT: {{ $}} @@ -403,18 +433,6 @@ body: | ; GFX10-DENORM-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>) ; GFX10-DENORM-NEXT: [[FADD:%[0-9]+]]:_(s32) = G_FADD [[UV1]], [[FMUL]] ; GFX10-DENORM-NEXT: $vgpr0 = COPY [[FADD]](s32) - ; GFX10-UNSAFE-LABEL: name: test_add_mul_rhs_multiple_defs_z - ; GFX10-UNSAFE: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 - ; GFX10-UNSAFE-NEXT: {{ $}} - ; GFX10-UNSAFE-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-UNSAFE-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-UNSAFE-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-UNSAFE-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX10-UNSAFE-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) - ; GFX10-UNSAFE-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[MV]](p1) :: (load (<2 x s32>), addrspace 1) - ; GFX10-UNSAFE-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>) - ; GFX10-UNSAFE-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[COPY]], [[COPY1]], [[UV1]] - ; GFX10-UNSAFE-NEXT: $vgpr0 = COPY [[FMA]](s32) %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %4:_(s32) = COPY $vgpr2 @@ -429,6 +447,76 @@ body: | ... --- +name: test_add_mul_rhs_multiple_defs_z_contract +body: | + bb.1.entry: + liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 + + ; GFX9-LABEL: name: test_add_mul_rhs_multiple_defs_z_contract + ; GFX9: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 + ; GFX9-NEXT: {{ $}} + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; GFX9-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) + ; GFX9-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[MV]](p1) :: (load (<2 x s32>), addrspace 1) + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>) + ; GFX9-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[COPY]], [[COPY1]], [[UV1]] + ; GFX9-NEXT: $vgpr0 = COPY [[FMA]](s32) + ; + ; GFX9-DENORM-LABEL: name: test_add_mul_rhs_multiple_defs_z_contract + ; GFX9-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 + ; GFX9-DENORM-NEXT: {{ $}} + ; GFX9-DENORM-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX9-DENORM-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9-DENORM-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX9-DENORM-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; GFX9-DENORM-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) + ; GFX9-DENORM-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[MV]](p1) :: (load (<2 x s32>), addrspace 1) + ; GFX9-DENORM-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>) + ; GFX9-DENORM-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[COPY]], [[COPY1]], [[UV1]] + ; GFX9-DENORM-NEXT: $vgpr0 = COPY [[FMA]](s32) + ; + ; GFX10-LABEL: name: test_add_mul_rhs_multiple_defs_z_contract + ; GFX10: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 + ; GFX10-NEXT: {{ $}} + ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; GFX10-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) + ; GFX10-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[MV]](p1) :: (load (<2 x s32>), addrspace 1) + ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>) + ; GFX10-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[COPY]], [[COPY1]], [[UV1]] + ; GFX10-NEXT: $vgpr0 = COPY [[FMA]](s32) + ; + ; GFX10-DENORM-LABEL: name: test_add_mul_rhs_multiple_defs_z_contract + ; GFX10-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3 + ; GFX10-DENORM-NEXT: {{ $}} + ; GFX10-DENORM-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX10-DENORM-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX10-DENORM-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX10-DENORM-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; GFX10-DENORM-NEXT: [[MV:%[0-9]+]]:_(p1) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) + ; GFX10-DENORM-NEXT: [[LOAD:%[0-9]+]]:_(<2 x s32>) = G_LOAD [[MV]](p1) :: (load (<2 x s32>), addrspace 1) + ; GFX10-DENORM-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[LOAD]](<2 x s32>) + ; GFX10-DENORM-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[COPY]], [[COPY1]], [[UV1]] + ; GFX10-DENORM-NEXT: $vgpr0 = COPY [[FMA]](s32) + %0:_(s32) = COPY $vgpr0 + %1:_(s32) = COPY $vgpr1 + %4:_(s32) = COPY $vgpr2 + %5:_(s32) = COPY $vgpr3 + %2:_(p1) = G_MERGE_VALUES %4(s32), %5(s32) + %6:_(s32) = contract G_FMUL %0, %1 + %7:_(<2 x s32>) = G_LOAD %2(p1) :: (load (<2 x s32>), addrspace 1) + %12:_(s32), %13:_(s32) = G_UNMERGE_VALUES %7(<2 x s32>) + %8:_(s32) = COPY %13(s32) + %10:_(s32) = contract G_FADD %8, %6 + $vgpr0 = COPY %10(s32) +... + +--- name: test_half_add_mul body: | bb.1.entry: @@ -448,19 +536,7 @@ body: | ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FADD]](s16) ; GFX9-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) ; GFX9-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 - ; GFX9-CONTRACT-LABEL: name: test_half_add_mul - ; GFX9-CONTRACT: liveins: $vgpr0, $vgpr1, $vgpr2 - ; GFX9-CONTRACT-NEXT: {{ $}} - ; GFX9-CONTRACT-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-CONTRACT-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX9-CONTRACT-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-CONTRACT-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; GFX9-CONTRACT-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-CONTRACT-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) - ; GFX9-CONTRACT-NEXT: [[FMA:%[0-9]+]]:_(s16) = G_FMA [[TRUNC]], [[TRUNC1]], [[TRUNC2]] - ; GFX9-CONTRACT-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FMA]](s16) - ; GFX9-CONTRACT-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) - ; GFX9-CONTRACT-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 + ; ; GFX9-DENORM-LABEL: name: test_half_add_mul ; GFX9-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX9-DENORM-NEXT: {{ $}} @@ -475,19 +551,7 @@ body: | ; GFX9-DENORM-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FADD]](s16) ; GFX9-DENORM-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) ; GFX9-DENORM-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 - ; GFX9-UNSAFE-LABEL: name: test_half_add_mul - ; GFX9-UNSAFE: liveins: $vgpr0, $vgpr1, $vgpr2 - ; GFX9-UNSAFE-NEXT: {{ $}} - ; GFX9-UNSAFE-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-UNSAFE-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX9-UNSAFE-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-UNSAFE-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; GFX9-UNSAFE-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-UNSAFE-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) - ; GFX9-UNSAFE-NEXT: [[FMA:%[0-9]+]]:_(s16) = G_FMA [[TRUNC]], [[TRUNC1]], [[TRUNC2]] - ; GFX9-UNSAFE-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FMA]](s16) - ; GFX9-UNSAFE-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) - ; GFX9-UNSAFE-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 + ; ; GFX10-LABEL: name: test_half_add_mul ; GFX10: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX10-NEXT: {{ $}} @@ -502,19 +566,7 @@ body: | ; GFX10-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FADD]](s16) ; GFX10-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) ; GFX10-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 - ; GFX10-CONTRACT-LABEL: name: test_half_add_mul - ; GFX10-CONTRACT: liveins: $vgpr0, $vgpr1, $vgpr2 - ; GFX10-CONTRACT-NEXT: {{ $}} - ; GFX10-CONTRACT-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-CONTRACT-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX10-CONTRACT-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-CONTRACT-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; GFX10-CONTRACT-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-CONTRACT-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) - ; GFX10-CONTRACT-NEXT: [[FMA:%[0-9]+]]:_(s16) = G_FMA [[TRUNC]], [[TRUNC1]], [[TRUNC2]] - ; GFX10-CONTRACT-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FMA]](s16) - ; GFX10-CONTRACT-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) - ; GFX10-CONTRACT-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 + ; ; GFX10-DENORM-LABEL: name: test_half_add_mul ; GFX10-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX10-DENORM-NEXT: {{ $}} @@ -529,19 +581,6 @@ body: | ; GFX10-DENORM-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FADD]](s16) ; GFX10-DENORM-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) ; GFX10-DENORM-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 - ; GFX10-UNSAFE-LABEL: name: test_half_add_mul - ; GFX10-UNSAFE: liveins: $vgpr0, $vgpr1, $vgpr2 - ; GFX10-UNSAFE-NEXT: {{ $}} - ; GFX10-UNSAFE-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-UNSAFE-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX10-UNSAFE-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-UNSAFE-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; GFX10-UNSAFE-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-UNSAFE-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) - ; GFX10-UNSAFE-NEXT: [[FMA:%[0-9]+]]:_(s16) = G_FMA [[TRUNC]], [[TRUNC1]], [[TRUNC2]] - ; GFX10-UNSAFE-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FMA]](s16) - ; GFX10-UNSAFE-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) - ; GFX10-UNSAFE-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 %4:_(s32) = COPY $vgpr0 %0:_(s16) = G_TRUNC %4(s32) %5:_(s32) = COPY $vgpr1 @@ -556,6 +595,80 @@ body: | ... --- +name: test_half_add_mul_contract +body: | + bb.1.entry: + liveins: $vgpr0, $vgpr1, $vgpr2 + + ; GFX9-LABEL: name: test_half_add_mul_contract + ; GFX9: liveins: $vgpr0, $vgpr1, $vgpr2 + ; GFX9-NEXT: {{ $}} + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) + ; GFX9-NEXT: [[FMA:%[0-9]+]]:_(s16) = G_FMA [[TRUNC]], [[TRUNC1]], [[TRUNC2]] + ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FMA]](s16) + ; GFX9-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX9-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 + ; + ; GFX9-DENORM-LABEL: name: test_half_add_mul_contract + ; GFX9-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2 + ; GFX9-DENORM-NEXT: {{ $}} + ; GFX9-DENORM-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX9-DENORM-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; GFX9-DENORM-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9-DENORM-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; GFX9-DENORM-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX9-DENORM-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) + ; GFX9-DENORM-NEXT: [[FMA:%[0-9]+]]:_(s16) = G_FMA [[TRUNC]], [[TRUNC1]], [[TRUNC2]] + ; GFX9-DENORM-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FMA]](s16) + ; GFX9-DENORM-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX9-DENORM-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 + ; + ; GFX10-LABEL: name: test_half_add_mul_contract + ; GFX10: liveins: $vgpr0, $vgpr1, $vgpr2 + ; GFX10-NEXT: {{ $}} + ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) + ; GFX10-NEXT: [[FMA:%[0-9]+]]:_(s16) = G_FMA [[TRUNC]], [[TRUNC1]], [[TRUNC2]] + ; GFX10-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FMA]](s16) + ; GFX10-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX10-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 + ; + ; GFX10-DENORM-LABEL: name: test_half_add_mul_contract + ; GFX10-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2 + ; GFX10-DENORM-NEXT: {{ $}} + ; GFX10-DENORM-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX10-DENORM-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; GFX10-DENORM-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX10-DENORM-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; GFX10-DENORM-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX10-DENORM-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) + ; GFX10-DENORM-NEXT: [[FMA:%[0-9]+]]:_(s16) = G_FMA [[TRUNC]], [[TRUNC1]], [[TRUNC2]] + ; GFX10-DENORM-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FMA]](s16) + ; GFX10-DENORM-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX10-DENORM-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 + %4:_(s32) = COPY $vgpr0 + %0:_(s16) = G_TRUNC %4(s32) + %5:_(s32) = COPY $vgpr1 + %1:_(s16) = G_TRUNC %5(s32) + %6:_(s32) = COPY $vgpr2 + %2:_(s16) = G_TRUNC %6(s32) + %7:_(s16) = contract G_FMUL %0, %1 + %8:_(s16) = contract G_FADD %7, %2 + %10:_(s32) = G_ANYEXT %8(s16) + $vgpr0 = COPY %10(s32) + S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 +... + +--- name: test_half_add_mul_rhs body: | bb.1.entry: @@ -575,19 +688,7 @@ body: | ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FADD]](s16) ; GFX9-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) ; GFX9-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 - ; GFX9-CONTRACT-LABEL: name: test_half_add_mul_rhs - ; GFX9-CONTRACT: liveins: $vgpr0, $vgpr1, $vgpr2 - ; GFX9-CONTRACT-NEXT: {{ $}} - ; GFX9-CONTRACT-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-CONTRACT-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX9-CONTRACT-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-CONTRACT-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; GFX9-CONTRACT-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-CONTRACT-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) - ; GFX9-CONTRACT-NEXT: [[FMA:%[0-9]+]]:_(s16) = G_FMA [[TRUNC]], [[TRUNC1]], [[TRUNC2]] - ; GFX9-CONTRACT-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FMA]](s16) - ; GFX9-CONTRACT-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) - ; GFX9-CONTRACT-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 + ; ; GFX9-DENORM-LABEL: name: test_half_add_mul_rhs ; GFX9-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX9-DENORM-NEXT: {{ $}} @@ -602,19 +703,7 @@ body: | ; GFX9-DENORM-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FADD]](s16) ; GFX9-DENORM-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) ; GFX9-DENORM-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 - ; GFX9-UNSAFE-LABEL: name: test_half_add_mul_rhs - ; GFX9-UNSAFE: liveins: $vgpr0, $vgpr1, $vgpr2 - ; GFX9-UNSAFE-NEXT: {{ $}} - ; GFX9-UNSAFE-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-UNSAFE-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX9-UNSAFE-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-UNSAFE-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; GFX9-UNSAFE-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-UNSAFE-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) - ; GFX9-UNSAFE-NEXT: [[FMA:%[0-9]+]]:_(s16) = G_FMA [[TRUNC]], [[TRUNC1]], [[TRUNC2]] - ; GFX9-UNSAFE-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FMA]](s16) - ; GFX9-UNSAFE-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) - ; GFX9-UNSAFE-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 + ; ; GFX10-LABEL: name: test_half_add_mul_rhs ; GFX10: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX10-NEXT: {{ $}} @@ -629,19 +718,7 @@ body: | ; GFX10-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FADD]](s16) ; GFX10-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) ; GFX10-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 - ; GFX10-CONTRACT-LABEL: name: test_half_add_mul_rhs - ; GFX10-CONTRACT: liveins: $vgpr0, $vgpr1, $vgpr2 - ; GFX10-CONTRACT-NEXT: {{ $}} - ; GFX10-CONTRACT-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-CONTRACT-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX10-CONTRACT-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-CONTRACT-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; GFX10-CONTRACT-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-CONTRACT-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) - ; GFX10-CONTRACT-NEXT: [[FMA:%[0-9]+]]:_(s16) = G_FMA [[TRUNC]], [[TRUNC1]], [[TRUNC2]] - ; GFX10-CONTRACT-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FMA]](s16) - ; GFX10-CONTRACT-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) - ; GFX10-CONTRACT-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 + ; ; GFX10-DENORM-LABEL: name: test_half_add_mul_rhs ; GFX10-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX10-DENORM-NEXT: {{ $}} @@ -656,19 +733,6 @@ body: | ; GFX10-DENORM-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FADD]](s16) ; GFX10-DENORM-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) ; GFX10-DENORM-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 - ; GFX10-UNSAFE-LABEL: name: test_half_add_mul_rhs - ; GFX10-UNSAFE: liveins: $vgpr0, $vgpr1, $vgpr2 - ; GFX10-UNSAFE-NEXT: {{ $}} - ; GFX10-UNSAFE-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-UNSAFE-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX10-UNSAFE-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-UNSAFE-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; GFX10-UNSAFE-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-UNSAFE-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) - ; GFX10-UNSAFE-NEXT: [[FMA:%[0-9]+]]:_(s16) = G_FMA [[TRUNC]], [[TRUNC1]], [[TRUNC2]] - ; GFX10-UNSAFE-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FMA]](s16) - ; GFX10-UNSAFE-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) - ; GFX10-UNSAFE-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 %4:_(s32) = COPY $vgpr0 %0:_(s16) = G_TRUNC %4(s32) %5:_(s32) = COPY $vgpr1 @@ -683,6 +747,80 @@ body: | ... --- +name: test_half_add_mul_rhs_contract +body: | + bb.1.entry: + liveins: $vgpr0, $vgpr1, $vgpr2 + + ; GFX9-LABEL: name: test_half_add_mul_rhs_contract + ; GFX9: liveins: $vgpr0, $vgpr1, $vgpr2 + ; GFX9-NEXT: {{ $}} + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) + ; GFX9-NEXT: [[FMA:%[0-9]+]]:_(s16) = G_FMA [[TRUNC]], [[TRUNC1]], [[TRUNC2]] + ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FMA]](s16) + ; GFX9-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX9-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 + ; + ; GFX9-DENORM-LABEL: name: test_half_add_mul_rhs_contract + ; GFX9-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2 + ; GFX9-DENORM-NEXT: {{ $}} + ; GFX9-DENORM-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX9-DENORM-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; GFX9-DENORM-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9-DENORM-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; GFX9-DENORM-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX9-DENORM-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) + ; GFX9-DENORM-NEXT: [[FMA:%[0-9]+]]:_(s16) = G_FMA [[TRUNC]], [[TRUNC1]], [[TRUNC2]] + ; GFX9-DENORM-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FMA]](s16) + ; GFX9-DENORM-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX9-DENORM-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 + ; + ; GFX10-LABEL: name: test_half_add_mul_rhs_contract + ; GFX10: liveins: $vgpr0, $vgpr1, $vgpr2 + ; GFX10-NEXT: {{ $}} + ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) + ; GFX10-NEXT: [[FMA:%[0-9]+]]:_(s16) = G_FMA [[TRUNC]], [[TRUNC1]], [[TRUNC2]] + ; GFX10-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FMA]](s16) + ; GFX10-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX10-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 + ; + ; GFX10-DENORM-LABEL: name: test_half_add_mul_rhs_contract + ; GFX10-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2 + ; GFX10-DENORM-NEXT: {{ $}} + ; GFX10-DENORM-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX10-DENORM-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; GFX10-DENORM-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX10-DENORM-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; GFX10-DENORM-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX10-DENORM-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) + ; GFX10-DENORM-NEXT: [[FMA:%[0-9]+]]:_(s16) = G_FMA [[TRUNC]], [[TRUNC1]], [[TRUNC2]] + ; GFX10-DENORM-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FMA]](s16) + ; GFX10-DENORM-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX10-DENORM-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 + %4:_(s32) = COPY $vgpr0 + %0:_(s16) = G_TRUNC %4(s32) + %5:_(s32) = COPY $vgpr1 + %1:_(s16) = G_TRUNC %5(s32) + %6:_(s32) = COPY $vgpr2 + %2:_(s16) = G_TRUNC %6(s32) + %7:_(s16) = contract G_FMUL %0, %1 + %8:_(s16) = contract G_FADD %2, %7 + %10:_(s32) = G_ANYEXT %8(s16) + $vgpr0 = COPY %10(s32) + S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 +... + +--- name: test_double_add_mul body: | bb.1.entry: @@ -706,23 +844,7 @@ body: | ; GFX9-NEXT: $vgpr0 = COPY [[UV]](s32) ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](s32) ; GFX9-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 - ; GFX9-CONTRACT-LABEL: name: test_double_add_mul - ; GFX9-CONTRACT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 - ; GFX9-CONTRACT-NEXT: {{ $}} - ; GFX9-CONTRACT-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-CONTRACT-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-CONTRACT-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; GFX9-CONTRACT-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-CONTRACT-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX9-CONTRACT-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) - ; GFX9-CONTRACT-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX9-CONTRACT-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX9-CONTRACT-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) - ; GFX9-CONTRACT-NEXT: [[FMA:%[0-9]+]]:_(s64) = G_FMA [[MV]], [[MV1]], [[MV2]] - ; GFX9-CONTRACT-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMA]](s64) - ; GFX9-CONTRACT-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX9-CONTRACT-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX9-CONTRACT-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 + ; ; GFX9-DENORM-LABEL: name: test_double_add_mul ; GFX9-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; GFX9-DENORM-NEXT: {{ $}} @@ -741,23 +863,7 @@ body: | ; GFX9-DENORM-NEXT: $vgpr0 = COPY [[UV]](s32) ; GFX9-DENORM-NEXT: $vgpr1 = COPY [[UV1]](s32) ; GFX9-DENORM-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 - ; GFX9-UNSAFE-LABEL: name: test_double_add_mul - ; GFX9-UNSAFE: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 - ; GFX9-UNSAFE-NEXT: {{ $}} - ; GFX9-UNSAFE-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-UNSAFE-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-UNSAFE-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; GFX9-UNSAFE-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-UNSAFE-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX9-UNSAFE-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) - ; GFX9-UNSAFE-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX9-UNSAFE-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX9-UNSAFE-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) - ; GFX9-UNSAFE-NEXT: [[FMA:%[0-9]+]]:_(s64) = G_FMA [[MV]], [[MV1]], [[MV2]] - ; GFX9-UNSAFE-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMA]](s64) - ; GFX9-UNSAFE-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX9-UNSAFE-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX9-UNSAFE-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 + ; ; GFX10-LABEL: name: test_double_add_mul ; GFX10: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; GFX10-NEXT: {{ $}} @@ -776,23 +882,7 @@ body: | ; GFX10-NEXT: $vgpr0 = COPY [[UV]](s32) ; GFX10-NEXT: $vgpr1 = COPY [[UV1]](s32) ; GFX10-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 - ; GFX10-CONTRACT-LABEL: name: test_double_add_mul - ; GFX10-CONTRACT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 - ; GFX10-CONTRACT-NEXT: {{ $}} - ; GFX10-CONTRACT-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-CONTRACT-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-CONTRACT-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; GFX10-CONTRACT-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-CONTRACT-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX10-CONTRACT-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) - ; GFX10-CONTRACT-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX10-CONTRACT-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX10-CONTRACT-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) - ; GFX10-CONTRACT-NEXT: [[FMA:%[0-9]+]]:_(s64) = G_FMA [[MV]], [[MV1]], [[MV2]] - ; GFX10-CONTRACT-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMA]](s64) - ; GFX10-CONTRACT-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10-CONTRACT-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10-CONTRACT-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 + ; ; GFX10-DENORM-LABEL: name: test_double_add_mul ; GFX10-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; GFX10-DENORM-NEXT: {{ $}} @@ -811,23 +901,6 @@ body: | ; GFX10-DENORM-NEXT: $vgpr0 = COPY [[UV]](s32) ; GFX10-DENORM-NEXT: $vgpr1 = COPY [[UV1]](s32) ; GFX10-DENORM-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 - ; GFX10-UNSAFE-LABEL: name: test_double_add_mul - ; GFX10-UNSAFE: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 - ; GFX10-UNSAFE-NEXT: {{ $}} - ; GFX10-UNSAFE-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-UNSAFE-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-UNSAFE-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; GFX10-UNSAFE-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-UNSAFE-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX10-UNSAFE-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) - ; GFX10-UNSAFE-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX10-UNSAFE-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX10-UNSAFE-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) - ; GFX10-UNSAFE-NEXT: [[FMA:%[0-9]+]]:_(s64) = G_FMA [[MV]], [[MV1]], [[MV2]] - ; GFX10-UNSAFE-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMA]](s64) - ; GFX10-UNSAFE-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10-UNSAFE-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10-UNSAFE-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 %4:_(s32) = COPY $vgpr0 %5:_(s32) = COPY $vgpr1 %0:_(s64) = G_MERGE_VALUES %4(s32), %5(s32) @@ -846,6 +919,101 @@ body: | ... --- +name: test_double_add_mul_contract +body: | + bb.1.entry: + liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 + + ; GFX9-LABEL: name: test_double_add_mul_contract + ; GFX9: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 + ; GFX9-NEXT: {{ $}} + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; GFX9-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) + ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 + ; GFX9-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) + ; GFX9-NEXT: [[FMA:%[0-9]+]]:_(s64) = G_FMA [[MV]], [[MV1]], [[MV2]] + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMA]](s64) + ; GFX9-NEXT: $vgpr0 = COPY [[UV]](s32) + ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; GFX9-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 + ; + ; GFX9-DENORM-LABEL: name: test_double_add_mul_contract + ; GFX9-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 + ; GFX9-DENORM-NEXT: {{ $}} + ; GFX9-DENORM-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX9-DENORM-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9-DENORM-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) + ; GFX9-DENORM-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX9-DENORM-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; GFX9-DENORM-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) + ; GFX9-DENORM-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; GFX9-DENORM-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 + ; GFX9-DENORM-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) + ; GFX9-DENORM-NEXT: [[FMA:%[0-9]+]]:_(s64) = G_FMA [[MV]], [[MV1]], [[MV2]] + ; GFX9-DENORM-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMA]](s64) + ; GFX9-DENORM-NEXT: $vgpr0 = COPY [[UV]](s32) + ; GFX9-DENORM-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; GFX9-DENORM-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 + ; + ; GFX10-LABEL: name: test_double_add_mul_contract + ; GFX10: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 + ; GFX10-NEXT: {{ $}} + ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX10-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) + ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; GFX10-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) + ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 + ; GFX10-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) + ; GFX10-NEXT: [[FMA:%[0-9]+]]:_(s64) = G_FMA [[MV]], [[MV1]], [[MV2]] + ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMA]](s64) + ; GFX10-NEXT: $vgpr0 = COPY [[UV]](s32) + ; GFX10-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; GFX10-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 + ; + ; GFX10-DENORM-LABEL: name: test_double_add_mul_contract + ; GFX10-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 + ; GFX10-DENORM-NEXT: {{ $}} + ; GFX10-DENORM-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX10-DENORM-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX10-DENORM-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) + ; GFX10-DENORM-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX10-DENORM-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; GFX10-DENORM-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) + ; GFX10-DENORM-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; GFX10-DENORM-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 + ; GFX10-DENORM-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) + ; GFX10-DENORM-NEXT: [[FMA:%[0-9]+]]:_(s64) = G_FMA [[MV]], [[MV1]], [[MV2]] + ; GFX10-DENORM-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMA]](s64) + ; GFX10-DENORM-NEXT: $vgpr0 = COPY [[UV]](s32) + ; GFX10-DENORM-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; GFX10-DENORM-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 + %4:_(s32) = COPY $vgpr0 + %5:_(s32) = COPY $vgpr1 + %0:_(s64) = G_MERGE_VALUES %4(s32), %5(s32) + %6:_(s32) = COPY $vgpr2 + %7:_(s32) = COPY $vgpr3 + %1:_(s64) = G_MERGE_VALUES %6(s32), %7(s32) + %8:_(s32) = COPY $vgpr4 + %9:_(s32) = COPY $vgpr5 + %2:_(s64) = G_MERGE_VALUES %8(s32), %9(s32) + %10:_(s64) = contract G_FMUL %0, %1 + %11:_(s64) = contract G_FADD %10, %2 + %13:_(s32), %14:_(s32) = G_UNMERGE_VALUES %11(s64) + $vgpr0 = COPY %13(s32) + $vgpr1 = COPY %14(s32) + S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 +... + + +--- name: test_double_add_mul_rhs body: | bb.1.entry: @@ -869,23 +1037,7 @@ body: | ; GFX9-NEXT: $vgpr0 = COPY [[UV]](s32) ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](s32) ; GFX9-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 - ; GFX9-CONTRACT-LABEL: name: test_double_add_mul_rhs - ; GFX9-CONTRACT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 - ; GFX9-CONTRACT-NEXT: {{ $}} - ; GFX9-CONTRACT-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-CONTRACT-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-CONTRACT-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; GFX9-CONTRACT-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-CONTRACT-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX9-CONTRACT-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) - ; GFX9-CONTRACT-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX9-CONTRACT-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX9-CONTRACT-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) - ; GFX9-CONTRACT-NEXT: [[FMA:%[0-9]+]]:_(s64) = G_FMA [[MV]], [[MV1]], [[MV2]] - ; GFX9-CONTRACT-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMA]](s64) - ; GFX9-CONTRACT-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX9-CONTRACT-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX9-CONTRACT-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 + ; ; GFX9-DENORM-LABEL: name: test_double_add_mul_rhs ; GFX9-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; GFX9-DENORM-NEXT: {{ $}} @@ -904,23 +1056,7 @@ body: | ; GFX9-DENORM-NEXT: $vgpr0 = COPY [[UV]](s32) ; GFX9-DENORM-NEXT: $vgpr1 = COPY [[UV1]](s32) ; GFX9-DENORM-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 - ; GFX9-UNSAFE-LABEL: name: test_double_add_mul_rhs - ; GFX9-UNSAFE: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 - ; GFX9-UNSAFE-NEXT: {{ $}} - ; GFX9-UNSAFE-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-UNSAFE-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-UNSAFE-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; GFX9-UNSAFE-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-UNSAFE-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX9-UNSAFE-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) - ; GFX9-UNSAFE-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX9-UNSAFE-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX9-UNSAFE-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) - ; GFX9-UNSAFE-NEXT: [[FMA:%[0-9]+]]:_(s64) = G_FMA [[MV]], [[MV1]], [[MV2]] - ; GFX9-UNSAFE-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMA]](s64) - ; GFX9-UNSAFE-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX9-UNSAFE-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX9-UNSAFE-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 + ; ; GFX10-LABEL: name: test_double_add_mul_rhs ; GFX10: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; GFX10-NEXT: {{ $}} @@ -939,23 +1075,7 @@ body: | ; GFX10-NEXT: $vgpr0 = COPY [[UV]](s32) ; GFX10-NEXT: $vgpr1 = COPY [[UV1]](s32) ; GFX10-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 - ; GFX10-CONTRACT-LABEL: name: test_double_add_mul_rhs - ; GFX10-CONTRACT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 - ; GFX10-CONTRACT-NEXT: {{ $}} - ; GFX10-CONTRACT-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-CONTRACT-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-CONTRACT-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; GFX10-CONTRACT-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-CONTRACT-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX10-CONTRACT-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) - ; GFX10-CONTRACT-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX10-CONTRACT-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX10-CONTRACT-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) - ; GFX10-CONTRACT-NEXT: [[FMA:%[0-9]+]]:_(s64) = G_FMA [[MV]], [[MV1]], [[MV2]] - ; GFX10-CONTRACT-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMA]](s64) - ; GFX10-CONTRACT-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10-CONTRACT-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10-CONTRACT-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 + ; ; GFX10-DENORM-LABEL: name: test_double_add_mul_rhs ; GFX10-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; GFX10-DENORM-NEXT: {{ $}} @@ -974,23 +1094,6 @@ body: | ; GFX10-DENORM-NEXT: $vgpr0 = COPY [[UV]](s32) ; GFX10-DENORM-NEXT: $vgpr1 = COPY [[UV1]](s32) ; GFX10-DENORM-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 - ; GFX10-UNSAFE-LABEL: name: test_double_add_mul_rhs - ; GFX10-UNSAFE: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 - ; GFX10-UNSAFE-NEXT: {{ $}} - ; GFX10-UNSAFE-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-UNSAFE-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-UNSAFE-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; GFX10-UNSAFE-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-UNSAFE-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX10-UNSAFE-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) - ; GFX10-UNSAFE-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX10-UNSAFE-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX10-UNSAFE-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) - ; GFX10-UNSAFE-NEXT: [[FMA:%[0-9]+]]:_(s64) = G_FMA [[MV]], [[MV1]], [[MV2]] - ; GFX10-UNSAFE-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMA]](s64) - ; GFX10-UNSAFE-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10-UNSAFE-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10-UNSAFE-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 %4:_(s32) = COPY $vgpr0 %5:_(s32) = COPY $vgpr1 %0:_(s64) = G_MERGE_VALUES %4(s32), %5(s32) @@ -1009,6 +1112,100 @@ body: | ... --- +name: test_double_add_mul_rhs_contract +body: | + bb.1.entry: + liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 + + ; GFX9-LABEL: name: test_double_add_mul_rhs_contract + ; GFX9: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 + ; GFX9-NEXT: {{ $}} + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; GFX9-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) + ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 + ; GFX9-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) + ; GFX9-NEXT: [[FMA:%[0-9]+]]:_(s64) = G_FMA [[MV]], [[MV1]], [[MV2]] + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMA]](s64) + ; GFX9-NEXT: $vgpr0 = COPY [[UV]](s32) + ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; GFX9-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 + ; + ; GFX9-DENORM-LABEL: name: test_double_add_mul_rhs_contract + ; GFX9-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 + ; GFX9-DENORM-NEXT: {{ $}} + ; GFX9-DENORM-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX9-DENORM-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9-DENORM-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) + ; GFX9-DENORM-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX9-DENORM-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; GFX9-DENORM-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) + ; GFX9-DENORM-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; GFX9-DENORM-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 + ; GFX9-DENORM-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) + ; GFX9-DENORM-NEXT: [[FMA:%[0-9]+]]:_(s64) = G_FMA [[MV]], [[MV1]], [[MV2]] + ; GFX9-DENORM-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMA]](s64) + ; GFX9-DENORM-NEXT: $vgpr0 = COPY [[UV]](s32) + ; GFX9-DENORM-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; GFX9-DENORM-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 + ; + ; GFX10-LABEL: name: test_double_add_mul_rhs_contract + ; GFX10: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 + ; GFX10-NEXT: {{ $}} + ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX10-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) + ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; GFX10-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) + ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 + ; GFX10-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) + ; GFX10-NEXT: [[FMA:%[0-9]+]]:_(s64) = G_FMA [[MV]], [[MV1]], [[MV2]] + ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMA]](s64) + ; GFX10-NEXT: $vgpr0 = COPY [[UV]](s32) + ; GFX10-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; GFX10-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 + ; + ; GFX10-DENORM-LABEL: name: test_double_add_mul_rhs_contract + ; GFX10-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 + ; GFX10-DENORM-NEXT: {{ $}} + ; GFX10-DENORM-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX10-DENORM-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX10-DENORM-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) + ; GFX10-DENORM-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX10-DENORM-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; GFX10-DENORM-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) + ; GFX10-DENORM-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; GFX10-DENORM-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 + ; GFX10-DENORM-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) + ; GFX10-DENORM-NEXT: [[FMA:%[0-9]+]]:_(s64) = G_FMA [[MV]], [[MV1]], [[MV2]] + ; GFX10-DENORM-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMA]](s64) + ; GFX10-DENORM-NEXT: $vgpr0 = COPY [[UV]](s32) + ; GFX10-DENORM-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; GFX10-DENORM-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 + %4:_(s32) = COPY $vgpr0 + %5:_(s32) = COPY $vgpr1 + %0:_(s64) = G_MERGE_VALUES %4(s32), %5(s32) + %6:_(s32) = COPY $vgpr2 + %7:_(s32) = COPY $vgpr3 + %1:_(s64) = G_MERGE_VALUES %6(s32), %7(s32) + %8:_(s32) = COPY $vgpr4 + %9:_(s32) = COPY $vgpr5 + %2:_(s64) = G_MERGE_VALUES %8(s32), %9(s32) + %10:_(s64) = contract G_FMUL %0, %1 + %11:_(s64) = contract G_FADD %2, %10 + %13:_(s32), %14:_(s32) = G_UNMERGE_VALUES %11(s64) + $vgpr0 = COPY %13(s32) + $vgpr1 = COPY %14(s32) + S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 +... + +--- name: test_4xfloat_add_mul body: | bb.1.entry: @@ -1040,32 +1237,7 @@ body: | ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](s32) ; GFX9-NEXT: $vgpr3 = COPY [[UV3]](s32) ; GFX9-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 - ; GFX9-CONTRACT-LABEL: name: test_4xfloat_add_mul - ; GFX9-CONTRACT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10 - ; GFX9-CONTRACT-NEXT: {{ $}} - ; GFX9-CONTRACT-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-CONTRACT-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-CONTRACT-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-CONTRACT-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX9-CONTRACT-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; GFX9-CONTRACT-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX9-CONTRACT-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX9-CONTRACT-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; GFX9-CONTRACT-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; GFX9-CONTRACT-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-CONTRACT-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; GFX9-CONTRACT-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 - ; GFX9-CONTRACT-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 - ; GFX9-CONTRACT-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 - ; GFX9-CONTRACT-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX9-CONTRACT-NEXT: [[FMUL:%[0-9]+]]:_(<4 x s32>) = G_FMUL [[BUILD_VECTOR]], [[BUILD_VECTOR1]] - ; GFX9-CONTRACT-NEXT: [[FADD:%[0-9]+]]:_(<4 x s32>) = G_FADD [[FMUL]], [[BUILD_VECTOR2]] - ; GFX9-CONTRACT-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](<4 x s32>) - ; GFX9-CONTRACT-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX9-CONTRACT-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX9-CONTRACT-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX9-CONTRACT-NEXT: $vgpr3 = COPY [[UV3]](s32) - ; GFX9-CONTRACT-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 + ; ; GFX9-DENORM-LABEL: name: test_4xfloat_add_mul ; GFX9-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10 ; GFX9-DENORM-NEXT: {{ $}} @@ -1092,32 +1264,7 @@ body: | ; GFX9-DENORM-NEXT: $vgpr2 = COPY [[UV2]](s32) ; GFX9-DENORM-NEXT: $vgpr3 = COPY [[UV3]](s32) ; GFX9-DENORM-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 - ; GFX9-UNSAFE-LABEL: name: test_4xfloat_add_mul - ; GFX9-UNSAFE: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10 - ; GFX9-UNSAFE-NEXT: {{ $}} - ; GFX9-UNSAFE-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-UNSAFE-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-UNSAFE-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-UNSAFE-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX9-UNSAFE-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; GFX9-UNSAFE-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX9-UNSAFE-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX9-UNSAFE-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; GFX9-UNSAFE-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; GFX9-UNSAFE-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-UNSAFE-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; GFX9-UNSAFE-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 - ; GFX9-UNSAFE-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 - ; GFX9-UNSAFE-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 - ; GFX9-UNSAFE-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX9-UNSAFE-NEXT: [[FMUL:%[0-9]+]]:_(<4 x s32>) = G_FMUL [[BUILD_VECTOR]], [[BUILD_VECTOR1]] - ; GFX9-UNSAFE-NEXT: [[FADD:%[0-9]+]]:_(<4 x s32>) = G_FADD [[FMUL]], [[BUILD_VECTOR2]] - ; GFX9-UNSAFE-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](<4 x s32>) - ; GFX9-UNSAFE-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX9-UNSAFE-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX9-UNSAFE-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX9-UNSAFE-NEXT: $vgpr3 = COPY [[UV3]](s32) - ; GFX9-UNSAFE-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 + ; ; GFX10-LABEL: name: test_4xfloat_add_mul ; GFX10: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10 ; GFX10-NEXT: {{ $}} @@ -1144,32 +1291,7 @@ body: | ; GFX10-NEXT: $vgpr2 = COPY [[UV2]](s32) ; GFX10-NEXT: $vgpr3 = COPY [[UV3]](s32) ; GFX10-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 - ; GFX10-CONTRACT-LABEL: name: test_4xfloat_add_mul - ; GFX10-CONTRACT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10 - ; GFX10-CONTRACT-NEXT: {{ $}} - ; GFX10-CONTRACT-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-CONTRACT-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-CONTRACT-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-CONTRACT-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX10-CONTRACT-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; GFX10-CONTRACT-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX10-CONTRACT-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX10-CONTRACT-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; GFX10-CONTRACT-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; GFX10-CONTRACT-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10-CONTRACT-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; GFX10-CONTRACT-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 - ; GFX10-CONTRACT-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 - ; GFX10-CONTRACT-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 - ; GFX10-CONTRACT-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX10-CONTRACT-NEXT: [[FMUL:%[0-9]+]]:_(<4 x s32>) = G_FMUL [[BUILD_VECTOR]], [[BUILD_VECTOR1]] - ; GFX10-CONTRACT-NEXT: [[FADD:%[0-9]+]]:_(<4 x s32>) = G_FADD [[FMUL]], [[BUILD_VECTOR2]] - ; GFX10-CONTRACT-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](<4 x s32>) - ; GFX10-CONTRACT-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10-CONTRACT-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10-CONTRACT-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX10-CONTRACT-NEXT: $vgpr3 = COPY [[UV3]](s32) - ; GFX10-CONTRACT-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 + ; ; GFX10-DENORM-LABEL: name: test_4xfloat_add_mul ; GFX10-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10 ; GFX10-DENORM-NEXT: {{ $}} @@ -1196,32 +1318,6 @@ body: | ; GFX10-DENORM-NEXT: $vgpr2 = COPY [[UV2]](s32) ; GFX10-DENORM-NEXT: $vgpr3 = COPY [[UV3]](s32) ; GFX10-DENORM-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 - ; GFX10-UNSAFE-LABEL: name: test_4xfloat_add_mul - ; GFX10-UNSAFE: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10 - ; GFX10-UNSAFE-NEXT: {{ $}} - ; GFX10-UNSAFE-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-UNSAFE-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-UNSAFE-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-UNSAFE-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX10-UNSAFE-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; GFX10-UNSAFE-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX10-UNSAFE-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX10-UNSAFE-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; GFX10-UNSAFE-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; GFX10-UNSAFE-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10-UNSAFE-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; GFX10-UNSAFE-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 - ; GFX10-UNSAFE-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 - ; GFX10-UNSAFE-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 - ; GFX10-UNSAFE-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX10-UNSAFE-NEXT: [[FMUL:%[0-9]+]]:_(<4 x s32>) = G_FMUL [[BUILD_VECTOR]], [[BUILD_VECTOR1]] - ; GFX10-UNSAFE-NEXT: [[FADD:%[0-9]+]]:_(<4 x s32>) = G_FADD [[FMUL]], [[BUILD_VECTOR2]] - ; GFX10-UNSAFE-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](<4 x s32>) - ; GFX10-UNSAFE-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10-UNSAFE-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10-UNSAFE-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX10-UNSAFE-NEXT: $vgpr3 = COPY [[UV3]](s32) - ; GFX10-UNSAFE-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 %4:_(s32) = COPY $vgpr0 %5:_(s32) = COPY $vgpr1 %6:_(s32) = COPY $vgpr2 @@ -1248,6 +1344,144 @@ body: | ... --- +name: test_4xfloat_add_mul_contract +body: | + bb.1.entry: + liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10 + + ; GFX9-LABEL: name: test_4xfloat_add_mul_contract + ; GFX9: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10 + ; GFX9-NEXT: {{ $}} + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 + ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 + ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) + ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 + ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 + ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 + ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 + ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) + ; GFX9-NEXT: [[FMUL:%[0-9]+]]:_(<4 x s32>) = contract G_FMUL [[BUILD_VECTOR]], [[BUILD_VECTOR1]] + ; GFX9-NEXT: [[FADD:%[0-9]+]]:_(<4 x s32>) = contract G_FADD [[FMUL]], [[BUILD_VECTOR2]] + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](<4 x s32>) + ; GFX9-NEXT: $vgpr0 = COPY [[UV]](s32) + ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](s32) + ; GFX9-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX9-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 + ; + ; GFX9-DENORM-LABEL: name: test_4xfloat_add_mul_contract + ; GFX9-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10 + ; GFX9-DENORM-NEXT: {{ $}} + ; GFX9-DENORM-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX9-DENORM-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9-DENORM-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX9-DENORM-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; GFX9-DENORM-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; GFX9-DENORM-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; GFX9-DENORM-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 + ; GFX9-DENORM-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 + ; GFX9-DENORM-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 + ; GFX9-DENORM-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) + ; GFX9-DENORM-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 + ; GFX9-DENORM-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 + ; GFX9-DENORM-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 + ; GFX9-DENORM-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 + ; GFX9-DENORM-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) + ; GFX9-DENORM-NEXT: [[FMUL:%[0-9]+]]:_(<4 x s32>) = contract G_FMUL [[BUILD_VECTOR]], [[BUILD_VECTOR1]] + ; GFX9-DENORM-NEXT: [[FADD:%[0-9]+]]:_(<4 x s32>) = contract G_FADD [[FMUL]], [[BUILD_VECTOR2]] + ; GFX9-DENORM-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](<4 x s32>) + ; GFX9-DENORM-NEXT: $vgpr0 = COPY [[UV]](s32) + ; GFX9-DENORM-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; GFX9-DENORM-NEXT: $vgpr2 = COPY [[UV2]](s32) + ; GFX9-DENORM-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX9-DENORM-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 + ; + ; GFX10-LABEL: name: test_4xfloat_add_mul_contract + ; GFX10: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10 + ; GFX10-NEXT: {{ $}} + ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 + ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 + ; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 + ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) + ; GFX10-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 + ; GFX10-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 + ; GFX10-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 + ; GFX10-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 + ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) + ; GFX10-NEXT: [[FMUL:%[0-9]+]]:_(<4 x s32>) = contract G_FMUL [[BUILD_VECTOR]], [[BUILD_VECTOR1]] + ; GFX10-NEXT: [[FADD:%[0-9]+]]:_(<4 x s32>) = contract G_FADD [[FMUL]], [[BUILD_VECTOR2]] + ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](<4 x s32>) + ; GFX10-NEXT: $vgpr0 = COPY [[UV]](s32) + ; GFX10-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; GFX10-NEXT: $vgpr2 = COPY [[UV2]](s32) + ; GFX10-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX10-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 + ; + ; GFX10-DENORM-LABEL: name: test_4xfloat_add_mul_contract + ; GFX10-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10 + ; GFX10-DENORM-NEXT: {{ $}} + ; GFX10-DENORM-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX10-DENORM-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX10-DENORM-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX10-DENORM-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; GFX10-DENORM-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; GFX10-DENORM-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; GFX10-DENORM-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 + ; GFX10-DENORM-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 + ; GFX10-DENORM-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 + ; GFX10-DENORM-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) + ; GFX10-DENORM-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 + ; GFX10-DENORM-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 + ; GFX10-DENORM-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 + ; GFX10-DENORM-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 + ; GFX10-DENORM-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) + ; GFX10-DENORM-NEXT: [[FMUL:%[0-9]+]]:_(<4 x s32>) = contract G_FMUL [[BUILD_VECTOR]], [[BUILD_VECTOR1]] + ; GFX10-DENORM-NEXT: [[FADD:%[0-9]+]]:_(<4 x s32>) = contract G_FADD [[FMUL]], [[BUILD_VECTOR2]] + ; GFX10-DENORM-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](<4 x s32>) + ; GFX10-DENORM-NEXT: $vgpr0 = COPY [[UV]](s32) + ; GFX10-DENORM-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; GFX10-DENORM-NEXT: $vgpr2 = COPY [[UV2]](s32) + ; GFX10-DENORM-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX10-DENORM-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 + %4:_(s32) = COPY $vgpr0 + %5:_(s32) = COPY $vgpr1 + %6:_(s32) = COPY $vgpr2 + %7:_(s32) = COPY $vgpr3 + %0:_(<4 x s32>) = G_BUILD_VECTOR %4(s32), %5(s32), %6(s32), %7(s32) + %8:_(s32) = COPY $vgpr4 + %9:_(s32) = COPY $vgpr5 + %10:_(s32) = COPY $vgpr6 + %11:_(s32) = COPY $vgpr7 + %1:_(<4 x s32>) = G_BUILD_VECTOR %8(s32), %9(s32), %10(s32), %11(s32) + %12:_(s32) = COPY $vgpr8 + %13:_(s32) = COPY $vgpr9 + %14:_(s32) = COPY $vgpr10 + %15:_(s32) = COPY $vgpr11 + %2:_(<4 x s32>) = G_BUILD_VECTOR %12(s32), %13(s32), %14(s32), %15(s32) + %16:_(<4 x s32>) = contract G_FMUL %0, %1 + %17:_(<4 x s32>) = contract G_FADD %16, %2 + %19:_(s32), %20:_(s32), %21:_(s32), %22:_(s32) = G_UNMERGE_VALUES %17(<4 x s32>) + $vgpr0 = COPY %19(s32) + $vgpr1 = COPY %20(s32) + $vgpr2 = COPY %21(s32) + $vgpr3 = COPY %22(s32) + S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 +... + +--- name: test_3xfloat_add_mul_rhs body: | bb.1.entry: @@ -1275,28 +1509,7 @@ body: | ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](s32) ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](s32) ; GFX9-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 - ; GFX9-CONTRACT-LABEL: name: test_3xfloat_add_mul_rhs - ; GFX9-CONTRACT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8 - ; GFX9-CONTRACT-NEXT: {{ $}} - ; GFX9-CONTRACT-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-CONTRACT-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-CONTRACT-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-CONTRACT-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32) - ; GFX9-CONTRACT-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX9-CONTRACT-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX9-CONTRACT-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX9-CONTRACT-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32) - ; GFX9-CONTRACT-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; GFX9-CONTRACT-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; GFX9-CONTRACT-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; GFX9-CONTRACT-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32) - ; GFX9-CONTRACT-NEXT: [[FMUL:%[0-9]+]]:_(<3 x s32>) = G_FMUL [[BUILD_VECTOR]], [[BUILD_VECTOR1]] - ; GFX9-CONTRACT-NEXT: [[FADD:%[0-9]+]]:_(<3 x s32>) = G_FADD [[BUILD_VECTOR2]], [[FMUL]] - ; GFX9-CONTRACT-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](<3 x s32>) - ; GFX9-CONTRACT-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX9-CONTRACT-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX9-CONTRACT-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX9-CONTRACT-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 + ; ; GFX9-DENORM-LABEL: name: test_3xfloat_add_mul_rhs ; GFX9-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8 ; GFX9-DENORM-NEXT: {{ $}} @@ -1319,28 +1532,7 @@ body: | ; GFX9-DENORM-NEXT: $vgpr1 = COPY [[UV1]](s32) ; GFX9-DENORM-NEXT: $vgpr2 = COPY [[UV2]](s32) ; GFX9-DENORM-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 - ; GFX9-UNSAFE-LABEL: name: test_3xfloat_add_mul_rhs - ; GFX9-UNSAFE: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8 - ; GFX9-UNSAFE-NEXT: {{ $}} - ; GFX9-UNSAFE-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-UNSAFE-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-UNSAFE-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-UNSAFE-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32) - ; GFX9-UNSAFE-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX9-UNSAFE-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX9-UNSAFE-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX9-UNSAFE-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32) - ; GFX9-UNSAFE-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; GFX9-UNSAFE-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; GFX9-UNSAFE-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; GFX9-UNSAFE-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32) - ; GFX9-UNSAFE-NEXT: [[FMUL:%[0-9]+]]:_(<3 x s32>) = G_FMUL [[BUILD_VECTOR]], [[BUILD_VECTOR1]] - ; GFX9-UNSAFE-NEXT: [[FADD:%[0-9]+]]:_(<3 x s32>) = G_FADD [[BUILD_VECTOR2]], [[FMUL]] - ; GFX9-UNSAFE-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](<3 x s32>) - ; GFX9-UNSAFE-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX9-UNSAFE-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX9-UNSAFE-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX9-UNSAFE-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 + ; ; GFX10-LABEL: name: test_3xfloat_add_mul_rhs ; GFX10: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8 ; GFX10-NEXT: {{ $}} @@ -1363,28 +1555,7 @@ body: | ; GFX10-NEXT: $vgpr1 = COPY [[UV1]](s32) ; GFX10-NEXT: $vgpr2 = COPY [[UV2]](s32) ; GFX10-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 - ; GFX10-CONTRACT-LABEL: name: test_3xfloat_add_mul_rhs - ; GFX10-CONTRACT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8 - ; GFX10-CONTRACT-NEXT: {{ $}} - ; GFX10-CONTRACT-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-CONTRACT-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-CONTRACT-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-CONTRACT-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32) - ; GFX10-CONTRACT-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX10-CONTRACT-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX10-CONTRACT-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX10-CONTRACT-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32) - ; GFX10-CONTRACT-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; GFX10-CONTRACT-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; GFX10-CONTRACT-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; GFX10-CONTRACT-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32) - ; GFX10-CONTRACT-NEXT: [[FMUL:%[0-9]+]]:_(<3 x s32>) = G_FMUL [[BUILD_VECTOR]], [[BUILD_VECTOR1]] - ; GFX10-CONTRACT-NEXT: [[FADD:%[0-9]+]]:_(<3 x s32>) = G_FADD [[BUILD_VECTOR2]], [[FMUL]] - ; GFX10-CONTRACT-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](<3 x s32>) - ; GFX10-CONTRACT-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10-CONTRACT-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10-CONTRACT-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX10-CONTRACT-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 + ; ; GFX10-DENORM-LABEL: name: test_3xfloat_add_mul_rhs ; GFX10-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8 ; GFX10-DENORM-NEXT: {{ $}} @@ -1407,28 +1578,6 @@ body: | ; GFX10-DENORM-NEXT: $vgpr1 = COPY [[UV1]](s32) ; GFX10-DENORM-NEXT: $vgpr2 = COPY [[UV2]](s32) ; GFX10-DENORM-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 - ; GFX10-UNSAFE-LABEL: name: test_3xfloat_add_mul_rhs - ; GFX10-UNSAFE: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8 - ; GFX10-UNSAFE-NEXT: {{ $}} - ; GFX10-UNSAFE-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-UNSAFE-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-UNSAFE-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-UNSAFE-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32) - ; GFX10-UNSAFE-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX10-UNSAFE-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX10-UNSAFE-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX10-UNSAFE-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32) - ; GFX10-UNSAFE-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; GFX10-UNSAFE-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; GFX10-UNSAFE-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; GFX10-UNSAFE-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32) - ; GFX10-UNSAFE-NEXT: [[FMUL:%[0-9]+]]:_(<3 x s32>) = G_FMUL [[BUILD_VECTOR]], [[BUILD_VECTOR1]] - ; GFX10-UNSAFE-NEXT: [[FADD:%[0-9]+]]:_(<3 x s32>) = G_FADD [[BUILD_VECTOR2]], [[FMUL]] - ; GFX10-UNSAFE-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](<3 x s32>) - ; GFX10-UNSAFE-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10-UNSAFE-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10-UNSAFE-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX10-UNSAFE-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 %4:_(s32) = COPY $vgpr0 %5:_(s32) = COPY $vgpr1 %6:_(s32) = COPY $vgpr2 @@ -1451,6 +1600,124 @@ body: | ... --- +name: test_3xfloat_add_mul_rhs_contract +body: | + bb.1.entry: + liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8 + + ; GFX9-LABEL: name: test_3xfloat_add_mul_rhs_contract + ; GFX9: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8 + ; GFX9-NEXT: {{ $}} + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32) + ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32) + ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 + ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 + ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 + ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32) + ; GFX9-NEXT: [[FMUL:%[0-9]+]]:_(<3 x s32>) = contract G_FMUL [[BUILD_VECTOR]], [[BUILD_VECTOR1]] + ; GFX9-NEXT: [[FADD:%[0-9]+]]:_(<3 x s32>) = contract G_FADD [[BUILD_VECTOR2]], [[FMUL]] + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](<3 x s32>) + ; GFX9-NEXT: $vgpr0 = COPY [[UV]](s32) + ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](s32) + ; GFX9-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 + ; + ; GFX9-DENORM-LABEL: name: test_3xfloat_add_mul_rhs_contract + ; GFX9-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8 + ; GFX9-DENORM-NEXT: {{ $}} + ; GFX9-DENORM-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX9-DENORM-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9-DENORM-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX9-DENORM-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32) + ; GFX9-DENORM-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; GFX9-DENORM-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; GFX9-DENORM-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 + ; GFX9-DENORM-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32) + ; GFX9-DENORM-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 + ; GFX9-DENORM-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 + ; GFX9-DENORM-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 + ; GFX9-DENORM-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32) + ; GFX9-DENORM-NEXT: [[FMUL:%[0-9]+]]:_(<3 x s32>) = contract G_FMUL [[BUILD_VECTOR]], [[BUILD_VECTOR1]] + ; GFX9-DENORM-NEXT: [[FADD:%[0-9]+]]:_(<3 x s32>) = contract G_FADD [[BUILD_VECTOR2]], [[FMUL]] + ; GFX9-DENORM-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](<3 x s32>) + ; GFX9-DENORM-NEXT: $vgpr0 = COPY [[UV]](s32) + ; GFX9-DENORM-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; GFX9-DENORM-NEXT: $vgpr2 = COPY [[UV2]](s32) + ; GFX9-DENORM-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 + ; + ; GFX10-LABEL: name: test_3xfloat_add_mul_rhs_contract + ; GFX10: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8 + ; GFX10-NEXT: {{ $}} + ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32) + ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 + ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32) + ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 + ; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 + ; GFX10-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 + ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32) + ; GFX10-NEXT: [[FMUL:%[0-9]+]]:_(<3 x s32>) = contract G_FMUL [[BUILD_VECTOR]], [[BUILD_VECTOR1]] + ; GFX10-NEXT: [[FADD:%[0-9]+]]:_(<3 x s32>) = contract G_FADD [[BUILD_VECTOR2]], [[FMUL]] + ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](<3 x s32>) + ; GFX10-NEXT: $vgpr0 = COPY [[UV]](s32) + ; GFX10-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; GFX10-NEXT: $vgpr2 = COPY [[UV2]](s32) + ; GFX10-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 + ; + ; GFX10-DENORM-LABEL: name: test_3xfloat_add_mul_rhs_contract + ; GFX10-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8 + ; GFX10-DENORM-NEXT: {{ $}} + ; GFX10-DENORM-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX10-DENORM-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX10-DENORM-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX10-DENORM-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32) + ; GFX10-DENORM-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; GFX10-DENORM-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; GFX10-DENORM-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 + ; GFX10-DENORM-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32) + ; GFX10-DENORM-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 + ; GFX10-DENORM-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 + ; GFX10-DENORM-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 + ; GFX10-DENORM-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32) + ; GFX10-DENORM-NEXT: [[FMUL:%[0-9]+]]:_(<3 x s32>) = contract G_FMUL [[BUILD_VECTOR]], [[BUILD_VECTOR1]] + ; GFX10-DENORM-NEXT: [[FADD:%[0-9]+]]:_(<3 x s32>) = contract G_FADD [[BUILD_VECTOR2]], [[FMUL]] + ; GFX10-DENORM-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](<3 x s32>) + ; GFX10-DENORM-NEXT: $vgpr0 = COPY [[UV]](s32) + ; GFX10-DENORM-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; GFX10-DENORM-NEXT: $vgpr2 = COPY [[UV2]](s32) + ; GFX10-DENORM-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 + %4:_(s32) = COPY $vgpr0 + %5:_(s32) = COPY $vgpr1 + %6:_(s32) = COPY $vgpr2 + %0:_(<3 x s32>) = G_BUILD_VECTOR %4(s32), %5(s32), %6(s32) + %7:_(s32) = COPY $vgpr3 + %8:_(s32) = COPY $vgpr4 + %9:_(s32) = COPY $vgpr5 + %1:_(<3 x s32>) = G_BUILD_VECTOR %7(s32), %8(s32), %9(s32) + %10:_(s32) = COPY $vgpr6 + %11:_(s32) = COPY $vgpr7 + %12:_(s32) = COPY $vgpr8 + %2:_(<3 x s32>) = G_BUILD_VECTOR %10(s32), %11(s32), %12(s32) + %13:_(<3 x s32>) = contract G_FMUL %0, %1 + %14:_(<3 x s32>) = contract G_FADD %2, %13 + %16:_(s32), %17:_(s32), %18:_(s32) = G_UNMERGE_VALUES %14(<3 x s32>) + $vgpr0 = COPY %16(s32) + $vgpr1 = COPY %17(s32) + $vgpr2 = COPY %18(s32) + S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 +... + +--- name: test_4xhalf_add_mul body: | bb.1.entry: @@ -1474,24 +1741,7 @@ body: | ; GFX9-NEXT: $vgpr0 = COPY [[UV]](<2 x s16>) ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](<2 x s16>) ; GFX9-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 - ; GFX9-CONTRACT-LABEL: name: test_4xhalf_add_mul - ; GFX9-CONTRACT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 - ; GFX9-CONTRACT-NEXT: {{ $}} - ; GFX9-CONTRACT-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX9-CONTRACT-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX9-CONTRACT-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>) - ; GFX9-CONTRACT-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 - ; GFX9-CONTRACT-NEXT: [[COPY3:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3 - ; GFX9-CONTRACT-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY2]](<2 x s16>), [[COPY3]](<2 x s16>) - ; GFX9-CONTRACT-NEXT: [[COPY4:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 - ; GFX9-CONTRACT-NEXT: [[COPY5:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 - ; GFX9-CONTRACT-NEXT: [[CONCAT_VECTORS2:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY4]](<2 x s16>), [[COPY5]](<2 x s16>) - ; GFX9-CONTRACT-NEXT: [[FMUL:%[0-9]+]]:_(<4 x s16>) = G_FMUL [[CONCAT_VECTORS]], [[CONCAT_VECTORS1]] - ; GFX9-CONTRACT-NEXT: [[FADD:%[0-9]+]]:_(<4 x s16>) = G_FADD [[FMUL]], [[CONCAT_VECTORS2]] - ; GFX9-CONTRACT-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[FADD]](<4 x s16>) - ; GFX9-CONTRACT-NEXT: $vgpr0 = COPY [[UV]](<2 x s16>) - ; GFX9-CONTRACT-NEXT: $vgpr1 = COPY [[UV1]](<2 x s16>) - ; GFX9-CONTRACT-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 + ; ; GFX9-DENORM-LABEL: name: test_4xhalf_add_mul ; GFX9-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; GFX9-DENORM-NEXT: {{ $}} @@ -1510,24 +1760,7 @@ body: | ; GFX9-DENORM-NEXT: $vgpr0 = COPY [[UV]](<2 x s16>) ; GFX9-DENORM-NEXT: $vgpr1 = COPY [[UV1]](<2 x s16>) ; GFX9-DENORM-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 - ; GFX9-UNSAFE-LABEL: name: test_4xhalf_add_mul - ; GFX9-UNSAFE: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 - ; GFX9-UNSAFE-NEXT: {{ $}} - ; GFX9-UNSAFE-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX9-UNSAFE-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX9-UNSAFE-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>) - ; GFX9-UNSAFE-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 - ; GFX9-UNSAFE-NEXT: [[COPY3:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3 - ; GFX9-UNSAFE-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY2]](<2 x s16>), [[COPY3]](<2 x s16>) - ; GFX9-UNSAFE-NEXT: [[COPY4:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 - ; GFX9-UNSAFE-NEXT: [[COPY5:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 - ; GFX9-UNSAFE-NEXT: [[CONCAT_VECTORS2:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY4]](<2 x s16>), [[COPY5]](<2 x s16>) - ; GFX9-UNSAFE-NEXT: [[FMUL:%[0-9]+]]:_(<4 x s16>) = G_FMUL [[CONCAT_VECTORS]], [[CONCAT_VECTORS1]] - ; GFX9-UNSAFE-NEXT: [[FADD:%[0-9]+]]:_(<4 x s16>) = G_FADD [[FMUL]], [[CONCAT_VECTORS2]] - ; GFX9-UNSAFE-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[FADD]](<4 x s16>) - ; GFX9-UNSAFE-NEXT: $vgpr0 = COPY [[UV]](<2 x s16>) - ; GFX9-UNSAFE-NEXT: $vgpr1 = COPY [[UV1]](<2 x s16>) - ; GFX9-UNSAFE-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 + ; ; GFX10-LABEL: name: test_4xhalf_add_mul ; GFX10: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; GFX10-NEXT: {{ $}} @@ -1546,24 +1779,7 @@ body: | ; GFX10-NEXT: $vgpr0 = COPY [[UV]](<2 x s16>) ; GFX10-NEXT: $vgpr1 = COPY [[UV1]](<2 x s16>) ; GFX10-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 - ; GFX10-CONTRACT-LABEL: name: test_4xhalf_add_mul - ; GFX10-CONTRACT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 - ; GFX10-CONTRACT-NEXT: {{ $}} - ; GFX10-CONTRACT-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX10-CONTRACT-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX10-CONTRACT-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>) - ; GFX10-CONTRACT-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 - ; GFX10-CONTRACT-NEXT: [[COPY3:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3 - ; GFX10-CONTRACT-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY2]](<2 x s16>), [[COPY3]](<2 x s16>) - ; GFX10-CONTRACT-NEXT: [[COPY4:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 - ; GFX10-CONTRACT-NEXT: [[COPY5:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 - ; GFX10-CONTRACT-NEXT: [[CONCAT_VECTORS2:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY4]](<2 x s16>), [[COPY5]](<2 x s16>) - ; GFX10-CONTRACT-NEXT: [[FMUL:%[0-9]+]]:_(<4 x s16>) = G_FMUL [[CONCAT_VECTORS]], [[CONCAT_VECTORS1]] - ; GFX10-CONTRACT-NEXT: [[FADD:%[0-9]+]]:_(<4 x s16>) = G_FADD [[FMUL]], [[CONCAT_VECTORS2]] - ; GFX10-CONTRACT-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[FADD]](<4 x s16>) - ; GFX10-CONTRACT-NEXT: $vgpr0 = COPY [[UV]](<2 x s16>) - ; GFX10-CONTRACT-NEXT: $vgpr1 = COPY [[UV1]](<2 x s16>) - ; GFX10-CONTRACT-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 + ; ; GFX10-DENORM-LABEL: name: test_4xhalf_add_mul ; GFX10-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; GFX10-DENORM-NEXT: {{ $}} @@ -1582,24 +1798,6 @@ body: | ; GFX10-DENORM-NEXT: $vgpr0 = COPY [[UV]](<2 x s16>) ; GFX10-DENORM-NEXT: $vgpr1 = COPY [[UV1]](<2 x s16>) ; GFX10-DENORM-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 - ; GFX10-UNSAFE-LABEL: name: test_4xhalf_add_mul - ; GFX10-UNSAFE: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 - ; GFX10-UNSAFE-NEXT: {{ $}} - ; GFX10-UNSAFE-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX10-UNSAFE-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX10-UNSAFE-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>) - ; GFX10-UNSAFE-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 - ; GFX10-UNSAFE-NEXT: [[COPY3:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3 - ; GFX10-UNSAFE-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY2]](<2 x s16>), [[COPY3]](<2 x s16>) - ; GFX10-UNSAFE-NEXT: [[COPY4:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 - ; GFX10-UNSAFE-NEXT: [[COPY5:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 - ; GFX10-UNSAFE-NEXT: [[CONCAT_VECTORS2:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY4]](<2 x s16>), [[COPY5]](<2 x s16>) - ; GFX10-UNSAFE-NEXT: [[FMUL:%[0-9]+]]:_(<4 x s16>) = G_FMUL [[CONCAT_VECTORS]], [[CONCAT_VECTORS1]] - ; GFX10-UNSAFE-NEXT: [[FADD:%[0-9]+]]:_(<4 x s16>) = G_FADD [[FMUL]], [[CONCAT_VECTORS2]] - ; GFX10-UNSAFE-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[FADD]](<4 x s16>) - ; GFX10-UNSAFE-NEXT: $vgpr0 = COPY [[UV]](<2 x s16>) - ; GFX10-UNSAFE-NEXT: $vgpr1 = COPY [[UV1]](<2 x s16>) - ; GFX10-UNSAFE-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 %4:_(<2 x s16>) = COPY $vgpr0 %5:_(<2 x s16>) = COPY $vgpr1 %0:_(<4 x s16>) = G_CONCAT_VECTORS %4(<2 x s16>), %5(<2 x s16>) @@ -1618,6 +1816,105 @@ body: | ... --- +name: test_4xhalf_add_mul_contract +body: | + bb.1.entry: + liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 + + ; GFX9-LABEL: name: test_4xhalf_add_mul_contract + ; GFX9: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 + ; GFX9-NEXT: {{ $}} + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 + ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>) + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 + ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3 + ; GFX9-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY2]](<2 x s16>), [[COPY3]](<2 x s16>) + ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 + ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 + ; GFX9-NEXT: [[CONCAT_VECTORS2:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY4]](<2 x s16>), [[COPY5]](<2 x s16>) + ; GFX9-NEXT: [[FMUL:%[0-9]+]]:_(<4 x s16>) = contract G_FMUL [[CONCAT_VECTORS]], [[CONCAT_VECTORS1]] + ; GFX9-NEXT: [[FADD:%[0-9]+]]:_(<4 x s16>) = contract G_FADD [[FMUL]], [[CONCAT_VECTORS2]] + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[FADD]](<4 x s16>) + ; GFX9-NEXT: $vgpr0 = COPY [[UV]](<2 x s16>) + ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](<2 x s16>) + ; GFX9-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 + ; + ; GFX9-DENORM-LABEL: name: test_4xhalf_add_mul_contract + ; GFX9-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 + ; GFX9-DENORM-NEXT: {{ $}} + ; GFX9-DENORM-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 + ; GFX9-DENORM-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 + ; GFX9-DENORM-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>) + ; GFX9-DENORM-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 + ; GFX9-DENORM-NEXT: [[COPY3:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3 + ; GFX9-DENORM-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY2]](<2 x s16>), [[COPY3]](<2 x s16>) + ; GFX9-DENORM-NEXT: [[COPY4:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 + ; GFX9-DENORM-NEXT: [[COPY5:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 + ; GFX9-DENORM-NEXT: [[CONCAT_VECTORS2:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY4]](<2 x s16>), [[COPY5]](<2 x s16>) + ; GFX9-DENORM-NEXT: [[FMUL:%[0-9]+]]:_(<4 x s16>) = contract G_FMUL [[CONCAT_VECTORS]], [[CONCAT_VECTORS1]] + ; GFX9-DENORM-NEXT: [[FADD:%[0-9]+]]:_(<4 x s16>) = contract G_FADD [[FMUL]], [[CONCAT_VECTORS2]] + ; GFX9-DENORM-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[FADD]](<4 x s16>) + ; GFX9-DENORM-NEXT: $vgpr0 = COPY [[UV]](<2 x s16>) + ; GFX9-DENORM-NEXT: $vgpr1 = COPY [[UV1]](<2 x s16>) + ; GFX9-DENORM-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 + ; + ; GFX10-LABEL: name: test_4xhalf_add_mul_contract + ; GFX10: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 + ; GFX10-NEXT: {{ $}} + ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 + ; GFX10-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>) + ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 + ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3 + ; GFX10-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY2]](<2 x s16>), [[COPY3]](<2 x s16>) + ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 + ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 + ; GFX10-NEXT: [[CONCAT_VECTORS2:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY4]](<2 x s16>), [[COPY5]](<2 x s16>) + ; GFX10-NEXT: [[FMUL:%[0-9]+]]:_(<4 x s16>) = contract G_FMUL [[CONCAT_VECTORS]], [[CONCAT_VECTORS1]] + ; GFX10-NEXT: [[FADD:%[0-9]+]]:_(<4 x s16>) = contract G_FADD [[FMUL]], [[CONCAT_VECTORS2]] + ; GFX10-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[FADD]](<4 x s16>) + ; GFX10-NEXT: $vgpr0 = COPY [[UV]](<2 x s16>) + ; GFX10-NEXT: $vgpr1 = COPY [[UV1]](<2 x s16>) + ; GFX10-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 + ; + ; GFX10-DENORM-LABEL: name: test_4xhalf_add_mul_contract + ; GFX10-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 + ; GFX10-DENORM-NEXT: {{ $}} + ; GFX10-DENORM-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 + ; GFX10-DENORM-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 + ; GFX10-DENORM-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>) + ; GFX10-DENORM-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 + ; GFX10-DENORM-NEXT: [[COPY3:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3 + ; GFX10-DENORM-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY2]](<2 x s16>), [[COPY3]](<2 x s16>) + ; GFX10-DENORM-NEXT: [[COPY4:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 + ; GFX10-DENORM-NEXT: [[COPY5:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 + ; GFX10-DENORM-NEXT: [[CONCAT_VECTORS2:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY4]](<2 x s16>), [[COPY5]](<2 x s16>) + ; GFX10-DENORM-NEXT: [[FMUL:%[0-9]+]]:_(<4 x s16>) = contract G_FMUL [[CONCAT_VECTORS]], [[CONCAT_VECTORS1]] + ; GFX10-DENORM-NEXT: [[FADD:%[0-9]+]]:_(<4 x s16>) = contract G_FADD [[FMUL]], [[CONCAT_VECTORS2]] + ; GFX10-DENORM-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[FADD]](<4 x s16>) + ; GFX10-DENORM-NEXT: $vgpr0 = COPY [[UV]](<2 x s16>) + ; GFX10-DENORM-NEXT: $vgpr1 = COPY [[UV1]](<2 x s16>) + ; GFX10-DENORM-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 + %4:_(<2 x s16>) = COPY $vgpr0 + %5:_(<2 x s16>) = COPY $vgpr1 + %0:_(<4 x s16>) = G_CONCAT_VECTORS %4(<2 x s16>), %5(<2 x s16>) + %6:_(<2 x s16>) = COPY $vgpr2 + %7:_(<2 x s16>) = COPY $vgpr3 + %1:_(<4 x s16>) = G_CONCAT_VECTORS %6(<2 x s16>), %7(<2 x s16>) + %8:_(<2 x s16>) = COPY $vgpr4 + %9:_(<2 x s16>) = COPY $vgpr5 + %2:_(<4 x s16>) = G_CONCAT_VECTORS %8(<2 x s16>), %9(<2 x s16>) + %10:_(<4 x s16>) = contract G_FMUL %0, %1 + %11:_(<4 x s16>) = contract G_FADD %10, %2 + %13:_(<2 x s16>), %14:_(<2 x s16>) = G_UNMERGE_VALUES %11(<4 x s16>) + $vgpr0 = COPY %13(<2 x s16>) + $vgpr1 = COPY %14(<2 x s16>) + S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 +... + + +--- name: test_3xhalf_add_mul_rhs body: | bb.1.entry: @@ -1648,31 +1945,6 @@ body: | ; GFX9-NEXT: $vgpr1 = COPY [[UV7]](<2 x s16>) ; GFX9-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 ; - ; GFX9-CONTRACT-LABEL: name: test_3xhalf_add_mul_rhs - ; GFX9-CONTRACT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 - ; GFX9-CONTRACT-NEXT: {{ $}} - ; GFX9-CONTRACT-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX9-CONTRACT-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX9-CONTRACT-NEXT: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF - ; GFX9-CONTRACT-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>), [[DEF]](<2 x s16>) - ; GFX9-CONTRACT-NEXT: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>) - ; GFX9-CONTRACT-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 - ; GFX9-CONTRACT-NEXT: [[COPY3:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3 - ; GFX9-CONTRACT-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY2]](<2 x s16>), [[COPY3]](<2 x s16>), [[DEF]](<2 x s16>) - ; GFX9-CONTRACT-NEXT: [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<6 x s16>) - ; GFX9-CONTRACT-NEXT: [[COPY4:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 - ; GFX9-CONTRACT-NEXT: [[COPY5:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 - ; GFX9-CONTRACT-NEXT: [[CONCAT_VECTORS2:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY4]](<2 x s16>), [[COPY5]](<2 x s16>), [[DEF]](<2 x s16>) - ; GFX9-CONTRACT-NEXT: [[UV4:%[0-9]+]]:_(<3 x s16>), [[UV5:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS2]](<6 x s16>) - ; GFX9-CONTRACT-NEXT: [[FMUL:%[0-9]+]]:_(<3 x s16>) = G_FMUL [[UV]], [[UV2]] - ; GFX9-CONTRACT-NEXT: [[FADD:%[0-9]+]]:_(<3 x s16>) = G_FADD [[UV4]], [[FMUL]] - ; GFX9-CONTRACT-NEXT: [[DEF1:%[0-9]+]]:_(<3 x s16>) = G_IMPLICIT_DEF - ; GFX9-CONTRACT-NEXT: [[CONCAT_VECTORS3:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[FADD]](<3 x s16>), [[DEF1]](<3 x s16>) - ; GFX9-CONTRACT-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>), [[UV8:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS3]](<6 x s16>) - ; GFX9-CONTRACT-NEXT: $vgpr0 = COPY [[UV6]](<2 x s16>) - ; GFX9-CONTRACT-NEXT: $vgpr1 = COPY [[UV7]](<2 x s16>) - ; GFX9-CONTRACT-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 - ; ; GFX9-DENORM-LABEL: name: test_3xhalf_add_mul_rhs ; GFX9-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; GFX9-DENORM-NEXT: {{ $}} @@ -1698,31 +1970,6 @@ body: | ; GFX9-DENORM-NEXT: $vgpr1 = COPY [[UV7]](<2 x s16>) ; GFX9-DENORM-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 ; - ; GFX9-UNSAFE-LABEL: name: test_3xhalf_add_mul_rhs - ; GFX9-UNSAFE: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 - ; GFX9-UNSAFE-NEXT: {{ $}} - ; GFX9-UNSAFE-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX9-UNSAFE-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX9-UNSAFE-NEXT: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF - ; GFX9-UNSAFE-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>), [[DEF]](<2 x s16>) - ; GFX9-UNSAFE-NEXT: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>) - ; GFX9-UNSAFE-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 - ; GFX9-UNSAFE-NEXT: [[COPY3:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3 - ; GFX9-UNSAFE-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY2]](<2 x s16>), [[COPY3]](<2 x s16>), [[DEF]](<2 x s16>) - ; GFX9-UNSAFE-NEXT: [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<6 x s16>) - ; GFX9-UNSAFE-NEXT: [[COPY4:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 - ; GFX9-UNSAFE-NEXT: [[COPY5:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 - ; GFX9-UNSAFE-NEXT: [[CONCAT_VECTORS2:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY4]](<2 x s16>), [[COPY5]](<2 x s16>), [[DEF]](<2 x s16>) - ; GFX9-UNSAFE-NEXT: [[UV4:%[0-9]+]]:_(<3 x s16>), [[UV5:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS2]](<6 x s16>) - ; GFX9-UNSAFE-NEXT: [[FMUL:%[0-9]+]]:_(<3 x s16>) = G_FMUL [[UV]], [[UV2]] - ; GFX9-UNSAFE-NEXT: [[FADD:%[0-9]+]]:_(<3 x s16>) = G_FADD [[UV4]], [[FMUL]] - ; GFX9-UNSAFE-NEXT: [[DEF1:%[0-9]+]]:_(<3 x s16>) = G_IMPLICIT_DEF - ; GFX9-UNSAFE-NEXT: [[CONCAT_VECTORS3:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[FADD]](<3 x s16>), [[DEF1]](<3 x s16>) - ; GFX9-UNSAFE-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>), [[UV8:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS3]](<6 x s16>) - ; GFX9-UNSAFE-NEXT: $vgpr0 = COPY [[UV6]](<2 x s16>) - ; GFX9-UNSAFE-NEXT: $vgpr1 = COPY [[UV7]](<2 x s16>) - ; GFX9-UNSAFE-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 - ; ; GFX10-LABEL: name: test_3xhalf_add_mul_rhs ; GFX10: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; GFX10-NEXT: {{ $}} @@ -1748,31 +1995,6 @@ body: | ; GFX10-NEXT: $vgpr1 = COPY [[UV7]](<2 x s16>) ; GFX10-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 ; - ; GFX10-CONTRACT-LABEL: name: test_3xhalf_add_mul_rhs - ; GFX10-CONTRACT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 - ; GFX10-CONTRACT-NEXT: {{ $}} - ; GFX10-CONTRACT-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX10-CONTRACT-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX10-CONTRACT-NEXT: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF - ; GFX10-CONTRACT-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>), [[DEF]](<2 x s16>) - ; GFX10-CONTRACT-NEXT: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>) - ; GFX10-CONTRACT-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 - ; GFX10-CONTRACT-NEXT: [[COPY3:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3 - ; GFX10-CONTRACT-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY2]](<2 x s16>), [[COPY3]](<2 x s16>), [[DEF]](<2 x s16>) - ; GFX10-CONTRACT-NEXT: [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<6 x s16>) - ; GFX10-CONTRACT-NEXT: [[COPY4:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 - ; GFX10-CONTRACT-NEXT: [[COPY5:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 - ; GFX10-CONTRACT-NEXT: [[CONCAT_VECTORS2:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY4]](<2 x s16>), [[COPY5]](<2 x s16>), [[DEF]](<2 x s16>) - ; GFX10-CONTRACT-NEXT: [[UV4:%[0-9]+]]:_(<3 x s16>), [[UV5:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS2]](<6 x s16>) - ; GFX10-CONTRACT-NEXT: [[FMUL:%[0-9]+]]:_(<3 x s16>) = G_FMUL [[UV]], [[UV2]] - ; GFX10-CONTRACT-NEXT: [[FADD:%[0-9]+]]:_(<3 x s16>) = G_FADD [[UV4]], [[FMUL]] - ; GFX10-CONTRACT-NEXT: [[DEF1:%[0-9]+]]:_(<3 x s16>) = G_IMPLICIT_DEF - ; GFX10-CONTRACT-NEXT: [[CONCAT_VECTORS3:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[FADD]](<3 x s16>), [[DEF1]](<3 x s16>) - ; GFX10-CONTRACT-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>), [[UV8:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS3]](<6 x s16>) - ; GFX10-CONTRACT-NEXT: $vgpr0 = COPY [[UV6]](<2 x s16>) - ; GFX10-CONTRACT-NEXT: $vgpr1 = COPY [[UV7]](<2 x s16>) - ; GFX10-CONTRACT-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 - ; ; GFX10-DENORM-LABEL: name: test_3xhalf_add_mul_rhs ; GFX10-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; GFX10-DENORM-NEXT: {{ $}} @@ -1797,31 +2019,6 @@ body: | ; GFX10-DENORM-NEXT: $vgpr0 = COPY [[UV6]](<2 x s16>) ; GFX10-DENORM-NEXT: $vgpr1 = COPY [[UV7]](<2 x s16>) ; GFX10-DENORM-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 - ; - ; GFX10-UNSAFE-LABEL: name: test_3xhalf_add_mul_rhs - ; GFX10-UNSAFE: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 - ; GFX10-UNSAFE-NEXT: {{ $}} - ; GFX10-UNSAFE-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX10-UNSAFE-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX10-UNSAFE-NEXT: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF - ; GFX10-UNSAFE-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>), [[DEF]](<2 x s16>) - ; GFX10-UNSAFE-NEXT: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>) - ; GFX10-UNSAFE-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 - ; GFX10-UNSAFE-NEXT: [[COPY3:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3 - ; GFX10-UNSAFE-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY2]](<2 x s16>), [[COPY3]](<2 x s16>), [[DEF]](<2 x s16>) - ; GFX10-UNSAFE-NEXT: [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<6 x s16>) - ; GFX10-UNSAFE-NEXT: [[COPY4:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 - ; GFX10-UNSAFE-NEXT: [[COPY5:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 - ; GFX10-UNSAFE-NEXT: [[CONCAT_VECTORS2:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY4]](<2 x s16>), [[COPY5]](<2 x s16>), [[DEF]](<2 x s16>) - ; GFX10-UNSAFE-NEXT: [[UV4:%[0-9]+]]:_(<3 x s16>), [[UV5:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS2]](<6 x s16>) - ; GFX10-UNSAFE-NEXT: [[FMUL:%[0-9]+]]:_(<3 x s16>) = G_FMUL [[UV]], [[UV2]] - ; GFX10-UNSAFE-NEXT: [[FADD:%[0-9]+]]:_(<3 x s16>) = G_FADD [[UV4]], [[FMUL]] - ; GFX10-UNSAFE-NEXT: [[DEF1:%[0-9]+]]:_(<3 x s16>) = G_IMPLICIT_DEF - ; GFX10-UNSAFE-NEXT: [[CONCAT_VECTORS3:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[FADD]](<3 x s16>), [[DEF1]](<3 x s16>) - ; GFX10-UNSAFE-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>), [[UV8:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS3]](<6 x s16>) - ; GFX10-UNSAFE-NEXT: $vgpr0 = COPY [[UV6]](<2 x s16>) - ; GFX10-UNSAFE-NEXT: $vgpr1 = COPY [[UV7]](<2 x s16>) - ; GFX10-UNSAFE-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 %4:_(<2 x s16>) = COPY $vgpr0 %5:_(<2 x s16>) = COPY $vgpr1 %10:_(<2 x s16>) = G_IMPLICIT_DEF @@ -1846,6 +2043,134 @@ body: | ... --- +name: test_3xhalf_add_mul_rhs_contract +body: | + bb.1.entry: + liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 + + ; GFX9-LABEL: name: test_3xhalf_add_mul_rhs_contract + ; GFX9: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 + ; GFX9-NEXT: {{ $}} + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 + ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>), [[DEF]](<2 x s16>) + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>) + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 + ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3 + ; GFX9-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY2]](<2 x s16>), [[COPY3]](<2 x s16>), [[DEF]](<2 x s16>) + ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<6 x s16>) + ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 + ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 + ; GFX9-NEXT: [[CONCAT_VECTORS2:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY4]](<2 x s16>), [[COPY5]](<2 x s16>), [[DEF]](<2 x s16>) + ; GFX9-NEXT: [[UV4:%[0-9]+]]:_(<3 x s16>), [[UV5:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS2]](<6 x s16>) + ; GFX9-NEXT: [[FMUL:%[0-9]+]]:_(<3 x s16>) = contract G_FMUL [[UV]], [[UV2]] + ; GFX9-NEXT: [[FADD:%[0-9]+]]:_(<3 x s16>) = contract G_FADD [[UV4]], [[FMUL]] + ; GFX9-NEXT: [[DEF1:%[0-9]+]]:_(<3 x s16>) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[CONCAT_VECTORS3:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[FADD]](<3 x s16>), [[DEF1]](<3 x s16>) + ; GFX9-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>), [[UV8:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS3]](<6 x s16>) + ; GFX9-NEXT: $vgpr0 = COPY [[UV6]](<2 x s16>) + ; GFX9-NEXT: $vgpr1 = COPY [[UV7]](<2 x s16>) + ; GFX9-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 + ; + ; GFX9-DENORM-LABEL: name: test_3xhalf_add_mul_rhs_contract + ; GFX9-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 + ; GFX9-DENORM-NEXT: {{ $}} + ; GFX9-DENORM-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 + ; GFX9-DENORM-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 + ; GFX9-DENORM-NEXT: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF + ; GFX9-DENORM-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>), [[DEF]](<2 x s16>) + ; GFX9-DENORM-NEXT: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>) + ; GFX9-DENORM-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 + ; GFX9-DENORM-NEXT: [[COPY3:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3 + ; GFX9-DENORM-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY2]](<2 x s16>), [[COPY3]](<2 x s16>), [[DEF]](<2 x s16>) + ; GFX9-DENORM-NEXT: [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<6 x s16>) + ; GFX9-DENORM-NEXT: [[COPY4:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 + ; GFX9-DENORM-NEXT: [[COPY5:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 + ; GFX9-DENORM-NEXT: [[CONCAT_VECTORS2:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY4]](<2 x s16>), [[COPY5]](<2 x s16>), [[DEF]](<2 x s16>) + ; GFX9-DENORM-NEXT: [[UV4:%[0-9]+]]:_(<3 x s16>), [[UV5:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS2]](<6 x s16>) + ; GFX9-DENORM-NEXT: [[FMUL:%[0-9]+]]:_(<3 x s16>) = contract G_FMUL [[UV]], [[UV2]] + ; GFX9-DENORM-NEXT: [[FADD:%[0-9]+]]:_(<3 x s16>) = contract G_FADD [[UV4]], [[FMUL]] + ; GFX9-DENORM-NEXT: [[DEF1:%[0-9]+]]:_(<3 x s16>) = G_IMPLICIT_DEF + ; GFX9-DENORM-NEXT: [[CONCAT_VECTORS3:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[FADD]](<3 x s16>), [[DEF1]](<3 x s16>) + ; GFX9-DENORM-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>), [[UV8:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS3]](<6 x s16>) + ; GFX9-DENORM-NEXT: $vgpr0 = COPY [[UV6]](<2 x s16>) + ; GFX9-DENORM-NEXT: $vgpr1 = COPY [[UV7]](<2 x s16>) + ; GFX9-DENORM-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 + ; + ; GFX10-LABEL: name: test_3xhalf_add_mul_rhs_contract + ; GFX10: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 + ; GFX10-NEXT: {{ $}} + ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 + ; GFX10-NEXT: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF + ; GFX10-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>), [[DEF]](<2 x s16>) + ; GFX10-NEXT: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>) + ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 + ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3 + ; GFX10-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY2]](<2 x s16>), [[COPY3]](<2 x s16>), [[DEF]](<2 x s16>) + ; GFX10-NEXT: [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<6 x s16>) + ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 + ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 + ; GFX10-NEXT: [[CONCAT_VECTORS2:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY4]](<2 x s16>), [[COPY5]](<2 x s16>), [[DEF]](<2 x s16>) + ; GFX10-NEXT: [[UV4:%[0-9]+]]:_(<3 x s16>), [[UV5:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS2]](<6 x s16>) + ; GFX10-NEXT: [[FMUL:%[0-9]+]]:_(<3 x s16>) = contract G_FMUL [[UV]], [[UV2]] + ; GFX10-NEXT: [[FADD:%[0-9]+]]:_(<3 x s16>) = contract G_FADD [[UV4]], [[FMUL]] + ; GFX10-NEXT: [[DEF1:%[0-9]+]]:_(<3 x s16>) = G_IMPLICIT_DEF + ; GFX10-NEXT: [[CONCAT_VECTORS3:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[FADD]](<3 x s16>), [[DEF1]](<3 x s16>) + ; GFX10-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>), [[UV8:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS3]](<6 x s16>) + ; GFX10-NEXT: $vgpr0 = COPY [[UV6]](<2 x s16>) + ; GFX10-NEXT: $vgpr1 = COPY [[UV7]](<2 x s16>) + ; GFX10-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 + ; + ; GFX10-DENORM-LABEL: name: test_3xhalf_add_mul_rhs_contract + ; GFX10-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 + ; GFX10-DENORM-NEXT: {{ $}} + ; GFX10-DENORM-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 + ; GFX10-DENORM-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 + ; GFX10-DENORM-NEXT: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF + ; GFX10-DENORM-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>), [[DEF]](<2 x s16>) + ; GFX10-DENORM-NEXT: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>) + ; GFX10-DENORM-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 + ; GFX10-DENORM-NEXT: [[COPY3:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3 + ; GFX10-DENORM-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY2]](<2 x s16>), [[COPY3]](<2 x s16>), [[DEF]](<2 x s16>) + ; GFX10-DENORM-NEXT: [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<6 x s16>) + ; GFX10-DENORM-NEXT: [[COPY4:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 + ; GFX10-DENORM-NEXT: [[COPY5:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 + ; GFX10-DENORM-NEXT: [[CONCAT_VECTORS2:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY4]](<2 x s16>), [[COPY5]](<2 x s16>), [[DEF]](<2 x s16>) + ; GFX10-DENORM-NEXT: [[UV4:%[0-9]+]]:_(<3 x s16>), [[UV5:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS2]](<6 x s16>) + ; GFX10-DENORM-NEXT: [[FMUL:%[0-9]+]]:_(<3 x s16>) = contract G_FMUL [[UV]], [[UV2]] + ; GFX10-DENORM-NEXT: [[FADD:%[0-9]+]]:_(<3 x s16>) = contract G_FADD [[UV4]], [[FMUL]] + ; GFX10-DENORM-NEXT: [[DEF1:%[0-9]+]]:_(<3 x s16>) = G_IMPLICIT_DEF + ; GFX10-DENORM-NEXT: [[CONCAT_VECTORS3:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[FADD]](<3 x s16>), [[DEF1]](<3 x s16>) + ; GFX10-DENORM-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>), [[UV8:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS3]](<6 x s16>) + ; GFX10-DENORM-NEXT: $vgpr0 = COPY [[UV6]](<2 x s16>) + ; GFX10-DENORM-NEXT: $vgpr1 = COPY [[UV7]](<2 x s16>) + ; GFX10-DENORM-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 + %4:_(<2 x s16>) = COPY $vgpr0 + %5:_(<2 x s16>) = COPY $vgpr1 + %10:_(<2 x s16>) = G_IMPLICIT_DEF + %11:_(<6 x s16>) = G_CONCAT_VECTORS %4(<2 x s16>), %5(<2 x s16>), %10(<2 x s16>) + %0:_(<3 x s16>), %12:_(<3 x s16>) = G_UNMERGE_VALUES %11(<6 x s16>) + %6:_(<2 x s16>) = COPY $vgpr2 + %7:_(<2 x s16>) = COPY $vgpr3 + %13:_(<6 x s16>) = G_CONCAT_VECTORS %6(<2 x s16>), %7(<2 x s16>), %10(<2 x s16>) + %1:_(<3 x s16>), %14:_(<3 x s16>) = G_UNMERGE_VALUES %13(<6 x s16>) + %8:_(<2 x s16>) = COPY $vgpr4 + %9:_(<2 x s16>) = COPY $vgpr5 + %15:_(<6 x s16>) = G_CONCAT_VECTORS %8(<2 x s16>), %9(<2 x s16>), %10(<2 x s16>) + %2:_(<3 x s16>), %16:_(<3 x s16>) = G_UNMERGE_VALUES %15(<6 x s16>) + %17:_(<3 x s16>) = contract G_FMUL %0, %1 + %18:_(<3 x s16>) = contract G_FADD %2, %17 + %22:_(<3 x s16>) = G_IMPLICIT_DEF + %23:_(<6 x s16>) = G_CONCAT_VECTORS %18(<3 x s16>), %22(<3 x s16>) + %20:_(<2 x s16>), %21:_(<2 x s16>), %24:_(<2 x s16>) = G_UNMERGE_VALUES %23(<6 x s16>) + $vgpr0 = COPY %20(<2 x s16>) + $vgpr1 = COPY %21(<2 x s16>) + S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 +... + +--- name: test_4xdouble_add_mul body: | bb.1.entry: @@ -1905,60 +2230,7 @@ body: | ; GFX9-NEXT: $vgpr6 = COPY [[UV6]](s32) ; GFX9-NEXT: $vgpr7 = COPY [[UV7]](s32) ; GFX9-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 - ; GFX9-CONTRACT-LABEL: name: test_4xdouble_add_mul - ; GFX9-CONTRACT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23 - ; GFX9-CONTRACT-NEXT: {{ $}} - ; GFX9-CONTRACT-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-CONTRACT-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-CONTRACT-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-CONTRACT-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX9-CONTRACT-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX9-CONTRACT-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX9-CONTRACT-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; GFX9-CONTRACT-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; GFX9-CONTRACT-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; GFX9-CONTRACT-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) - ; GFX9-CONTRACT-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) - ; GFX9-CONTRACT-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-CONTRACT-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[MV3]](s64) - ; GFX9-CONTRACT-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; GFX9-CONTRACT-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 - ; GFX9-CONTRACT-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 - ; GFX9-CONTRACT-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 - ; GFX9-CONTRACT-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12 - ; GFX9-CONTRACT-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13 - ; GFX9-CONTRACT-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14 - ; GFX9-CONTRACT-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15 - ; GFX9-CONTRACT-NEXT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY8]](s32), [[COPY9]](s32) - ; GFX9-CONTRACT-NEXT: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY10]](s32), [[COPY11]](s32) - ; GFX9-CONTRACT-NEXT: [[MV6:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY12]](s32), [[COPY13]](s32) - ; GFX9-CONTRACT-NEXT: [[MV7:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY14]](s32), [[COPY15]](s32) - ; GFX9-CONTRACT-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV4]](s64), [[MV5]](s64), [[MV6]](s64), [[MV7]](s64) - ; GFX9-CONTRACT-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16 - ; GFX9-CONTRACT-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17 - ; GFX9-CONTRACT-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr18 - ; GFX9-CONTRACT-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr19 - ; GFX9-CONTRACT-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr20 - ; GFX9-CONTRACT-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr21 - ; GFX9-CONTRACT-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr22 - ; GFX9-CONTRACT-NEXT: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr23 - ; GFX9-CONTRACT-NEXT: [[MV8:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY16]](s32), [[COPY17]](s32) - ; GFX9-CONTRACT-NEXT: [[MV9:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY18]](s32), [[COPY19]](s32) - ; GFX9-CONTRACT-NEXT: [[MV10:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY20]](s32), [[COPY21]](s32) - ; GFX9-CONTRACT-NEXT: [[MV11:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY22]](s32), [[COPY23]](s32) - ; GFX9-CONTRACT-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV8]](s64), [[MV9]](s64), [[MV10]](s64), [[MV11]](s64) - ; GFX9-CONTRACT-NEXT: [[FMUL:%[0-9]+]]:_(<4 x s64>) = G_FMUL [[BUILD_VECTOR]], [[BUILD_VECTOR1]] - ; GFX9-CONTRACT-NEXT: [[FADD:%[0-9]+]]:_(<4 x s64>) = G_FADD [[FMUL]], [[BUILD_VECTOR2]] - ; GFX9-CONTRACT-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](<4 x s64>) - ; GFX9-CONTRACT-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX9-CONTRACT-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX9-CONTRACT-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX9-CONTRACT-NEXT: $vgpr3 = COPY [[UV3]](s32) - ; GFX9-CONTRACT-NEXT: $vgpr4 = COPY [[UV4]](s32) - ; GFX9-CONTRACT-NEXT: $vgpr5 = COPY [[UV5]](s32) - ; GFX9-CONTRACT-NEXT: $vgpr6 = COPY [[UV6]](s32) - ; GFX9-CONTRACT-NEXT: $vgpr7 = COPY [[UV7]](s32) - ; GFX9-CONTRACT-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 + ; ; GFX9-DENORM-LABEL: name: test_4xdouble_add_mul ; GFX9-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23 ; GFX9-DENORM-NEXT: {{ $}} @@ -2013,60 +2285,7 @@ body: | ; GFX9-DENORM-NEXT: $vgpr6 = COPY [[UV6]](s32) ; GFX9-DENORM-NEXT: $vgpr7 = COPY [[UV7]](s32) ; GFX9-DENORM-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 - ; GFX9-UNSAFE-LABEL: name: test_4xdouble_add_mul - ; GFX9-UNSAFE: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23 - ; GFX9-UNSAFE-NEXT: {{ $}} - ; GFX9-UNSAFE-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-UNSAFE-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-UNSAFE-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-UNSAFE-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX9-UNSAFE-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX9-UNSAFE-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX9-UNSAFE-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; GFX9-UNSAFE-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; GFX9-UNSAFE-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; GFX9-UNSAFE-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) - ; GFX9-UNSAFE-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) - ; GFX9-UNSAFE-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-UNSAFE-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[MV3]](s64) - ; GFX9-UNSAFE-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; GFX9-UNSAFE-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 - ; GFX9-UNSAFE-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 - ; GFX9-UNSAFE-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 - ; GFX9-UNSAFE-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12 - ; GFX9-UNSAFE-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13 - ; GFX9-UNSAFE-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14 - ; GFX9-UNSAFE-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15 - ; GFX9-UNSAFE-NEXT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY8]](s32), [[COPY9]](s32) - ; GFX9-UNSAFE-NEXT: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY10]](s32), [[COPY11]](s32) - ; GFX9-UNSAFE-NEXT: [[MV6:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY12]](s32), [[COPY13]](s32) - ; GFX9-UNSAFE-NEXT: [[MV7:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY14]](s32), [[COPY15]](s32) - ; GFX9-UNSAFE-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV4]](s64), [[MV5]](s64), [[MV6]](s64), [[MV7]](s64) - ; GFX9-UNSAFE-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16 - ; GFX9-UNSAFE-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17 - ; GFX9-UNSAFE-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr18 - ; GFX9-UNSAFE-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr19 - ; GFX9-UNSAFE-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr20 - ; GFX9-UNSAFE-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr21 - ; GFX9-UNSAFE-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr22 - ; GFX9-UNSAFE-NEXT: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr23 - ; GFX9-UNSAFE-NEXT: [[MV8:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY16]](s32), [[COPY17]](s32) - ; GFX9-UNSAFE-NEXT: [[MV9:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY18]](s32), [[COPY19]](s32) - ; GFX9-UNSAFE-NEXT: [[MV10:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY20]](s32), [[COPY21]](s32) - ; GFX9-UNSAFE-NEXT: [[MV11:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY22]](s32), [[COPY23]](s32) - ; GFX9-UNSAFE-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV8]](s64), [[MV9]](s64), [[MV10]](s64), [[MV11]](s64) - ; GFX9-UNSAFE-NEXT: [[FMUL:%[0-9]+]]:_(<4 x s64>) = G_FMUL [[BUILD_VECTOR]], [[BUILD_VECTOR1]] - ; GFX9-UNSAFE-NEXT: [[FADD:%[0-9]+]]:_(<4 x s64>) = G_FADD [[FMUL]], [[BUILD_VECTOR2]] - ; GFX9-UNSAFE-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](<4 x s64>) - ; GFX9-UNSAFE-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX9-UNSAFE-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX9-UNSAFE-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX9-UNSAFE-NEXT: $vgpr3 = COPY [[UV3]](s32) - ; GFX9-UNSAFE-NEXT: $vgpr4 = COPY [[UV4]](s32) - ; GFX9-UNSAFE-NEXT: $vgpr5 = COPY [[UV5]](s32) - ; GFX9-UNSAFE-NEXT: $vgpr6 = COPY [[UV6]](s32) - ; GFX9-UNSAFE-NEXT: $vgpr7 = COPY [[UV7]](s32) - ; GFX9-UNSAFE-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 + ; ; GFX10-LABEL: name: test_4xdouble_add_mul ; GFX10: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23 ; GFX10-NEXT: {{ $}} @@ -2121,60 +2340,7 @@ body: | ; GFX10-NEXT: $vgpr6 = COPY [[UV6]](s32) ; GFX10-NEXT: $vgpr7 = COPY [[UV7]](s32) ; GFX10-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 - ; GFX10-CONTRACT-LABEL: name: test_4xdouble_add_mul - ; GFX10-CONTRACT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23 - ; GFX10-CONTRACT-NEXT: {{ $}} - ; GFX10-CONTRACT-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-CONTRACT-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-CONTRACT-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-CONTRACT-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX10-CONTRACT-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX10-CONTRACT-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX10-CONTRACT-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; GFX10-CONTRACT-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; GFX10-CONTRACT-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; GFX10-CONTRACT-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) - ; GFX10-CONTRACT-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) - ; GFX10-CONTRACT-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY6]](s32), [[COPY7]](s32) - ; GFX10-CONTRACT-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[MV3]](s64) - ; GFX10-CONTRACT-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; GFX10-CONTRACT-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 - ; GFX10-CONTRACT-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 - ; GFX10-CONTRACT-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 - ; GFX10-CONTRACT-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12 - ; GFX10-CONTRACT-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13 - ; GFX10-CONTRACT-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14 - ; GFX10-CONTRACT-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15 - ; GFX10-CONTRACT-NEXT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY8]](s32), [[COPY9]](s32) - ; GFX10-CONTRACT-NEXT: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY10]](s32), [[COPY11]](s32) - ; GFX10-CONTRACT-NEXT: [[MV6:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY12]](s32), [[COPY13]](s32) - ; GFX10-CONTRACT-NEXT: [[MV7:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY14]](s32), [[COPY15]](s32) - ; GFX10-CONTRACT-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV4]](s64), [[MV5]](s64), [[MV6]](s64), [[MV7]](s64) - ; GFX10-CONTRACT-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16 - ; GFX10-CONTRACT-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17 - ; GFX10-CONTRACT-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr18 - ; GFX10-CONTRACT-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr19 - ; GFX10-CONTRACT-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr20 - ; GFX10-CONTRACT-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr21 - ; GFX10-CONTRACT-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr22 - ; GFX10-CONTRACT-NEXT: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr23 - ; GFX10-CONTRACT-NEXT: [[MV8:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY16]](s32), [[COPY17]](s32) - ; GFX10-CONTRACT-NEXT: [[MV9:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY18]](s32), [[COPY19]](s32) - ; GFX10-CONTRACT-NEXT: [[MV10:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY20]](s32), [[COPY21]](s32) - ; GFX10-CONTRACT-NEXT: [[MV11:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY22]](s32), [[COPY23]](s32) - ; GFX10-CONTRACT-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV8]](s64), [[MV9]](s64), [[MV10]](s64), [[MV11]](s64) - ; GFX10-CONTRACT-NEXT: [[FMUL:%[0-9]+]]:_(<4 x s64>) = G_FMUL [[BUILD_VECTOR]], [[BUILD_VECTOR1]] - ; GFX10-CONTRACT-NEXT: [[FADD:%[0-9]+]]:_(<4 x s64>) = G_FADD [[FMUL]], [[BUILD_VECTOR2]] - ; GFX10-CONTRACT-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](<4 x s64>) - ; GFX10-CONTRACT-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10-CONTRACT-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10-CONTRACT-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX10-CONTRACT-NEXT: $vgpr3 = COPY [[UV3]](s32) - ; GFX10-CONTRACT-NEXT: $vgpr4 = COPY [[UV4]](s32) - ; GFX10-CONTRACT-NEXT: $vgpr5 = COPY [[UV5]](s32) - ; GFX10-CONTRACT-NEXT: $vgpr6 = COPY [[UV6]](s32) - ; GFX10-CONTRACT-NEXT: $vgpr7 = COPY [[UV7]](s32) - ; GFX10-CONTRACT-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 + ; ; GFX10-DENORM-LABEL: name: test_4xdouble_add_mul ; GFX10-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23 ; GFX10-DENORM-NEXT: {{ $}} @@ -2229,60 +2395,6 @@ body: | ; GFX10-DENORM-NEXT: $vgpr6 = COPY [[UV6]](s32) ; GFX10-DENORM-NEXT: $vgpr7 = COPY [[UV7]](s32) ; GFX10-DENORM-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 - ; GFX10-UNSAFE-LABEL: name: test_4xdouble_add_mul - ; GFX10-UNSAFE: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23 - ; GFX10-UNSAFE-NEXT: {{ $}} - ; GFX10-UNSAFE-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-UNSAFE-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-UNSAFE-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-UNSAFE-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX10-UNSAFE-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX10-UNSAFE-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX10-UNSAFE-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; GFX10-UNSAFE-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; GFX10-UNSAFE-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; GFX10-UNSAFE-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) - ; GFX10-UNSAFE-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) - ; GFX10-UNSAFE-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY6]](s32), [[COPY7]](s32) - ; GFX10-UNSAFE-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[MV3]](s64) - ; GFX10-UNSAFE-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; GFX10-UNSAFE-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 - ; GFX10-UNSAFE-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 - ; GFX10-UNSAFE-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 - ; GFX10-UNSAFE-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12 - ; GFX10-UNSAFE-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13 - ; GFX10-UNSAFE-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14 - ; GFX10-UNSAFE-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15 - ; GFX10-UNSAFE-NEXT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY8]](s32), [[COPY9]](s32) - ; GFX10-UNSAFE-NEXT: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY10]](s32), [[COPY11]](s32) - ; GFX10-UNSAFE-NEXT: [[MV6:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY12]](s32), [[COPY13]](s32) - ; GFX10-UNSAFE-NEXT: [[MV7:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY14]](s32), [[COPY15]](s32) - ; GFX10-UNSAFE-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV4]](s64), [[MV5]](s64), [[MV6]](s64), [[MV7]](s64) - ; GFX10-UNSAFE-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16 - ; GFX10-UNSAFE-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17 - ; GFX10-UNSAFE-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr18 - ; GFX10-UNSAFE-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr19 - ; GFX10-UNSAFE-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr20 - ; GFX10-UNSAFE-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr21 - ; GFX10-UNSAFE-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr22 - ; GFX10-UNSAFE-NEXT: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr23 - ; GFX10-UNSAFE-NEXT: [[MV8:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY16]](s32), [[COPY17]](s32) - ; GFX10-UNSAFE-NEXT: [[MV9:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY18]](s32), [[COPY19]](s32) - ; GFX10-UNSAFE-NEXT: [[MV10:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY20]](s32), [[COPY21]](s32) - ; GFX10-UNSAFE-NEXT: [[MV11:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY22]](s32), [[COPY23]](s32) - ; GFX10-UNSAFE-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV8]](s64), [[MV9]](s64), [[MV10]](s64), [[MV11]](s64) - ; GFX10-UNSAFE-NEXT: [[FMUL:%[0-9]+]]:_(<4 x s64>) = G_FMUL [[BUILD_VECTOR]], [[BUILD_VECTOR1]] - ; GFX10-UNSAFE-NEXT: [[FADD:%[0-9]+]]:_(<4 x s64>) = G_FADD [[FMUL]], [[BUILD_VECTOR2]] - ; GFX10-UNSAFE-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](<4 x s64>) - ; GFX10-UNSAFE-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10-UNSAFE-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10-UNSAFE-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX10-UNSAFE-NEXT: $vgpr3 = COPY [[UV3]](s32) - ; GFX10-UNSAFE-NEXT: $vgpr4 = COPY [[UV4]](s32) - ; GFX10-UNSAFE-NEXT: $vgpr5 = COPY [[UV5]](s32) - ; GFX10-UNSAFE-NEXT: $vgpr6 = COPY [[UV6]](s32) - ; GFX10-UNSAFE-NEXT: $vgpr7 = COPY [[UV7]](s32) - ; GFX10-UNSAFE-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 %4:_(s32) = COPY $vgpr0 %5:_(s32) = COPY $vgpr1 %6:_(s32) = COPY $vgpr2 @@ -2337,6 +2449,284 @@ body: | ... --- +name: test_4xdouble_add_mul_contract +body: | + bb.1.entry: + liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23 + + ; GFX9-LABEL: name: test_4xdouble_add_mul_contract + ; GFX9: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23 + ; GFX9-NEXT: {{ $}} + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 + ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 + ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 + ; GFX9-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) + ; GFX9-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) + ; GFX9-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) + ; GFX9-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY6]](s32), [[COPY7]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[MV3]](s64) + ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 + ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 + ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 + ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 + ; GFX9-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12 + ; GFX9-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13 + ; GFX9-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14 + ; GFX9-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15 + ; GFX9-NEXT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY8]](s32), [[COPY9]](s32) + ; GFX9-NEXT: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY10]](s32), [[COPY11]](s32) + ; GFX9-NEXT: [[MV6:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY12]](s32), [[COPY13]](s32) + ; GFX9-NEXT: [[MV7:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY14]](s32), [[COPY15]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV4]](s64), [[MV5]](s64), [[MV6]](s64), [[MV7]](s64) + ; GFX9-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16 + ; GFX9-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17 + ; GFX9-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr18 + ; GFX9-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr19 + ; GFX9-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr20 + ; GFX9-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr21 + ; GFX9-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr22 + ; GFX9-NEXT: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr23 + ; GFX9-NEXT: [[MV8:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY16]](s32), [[COPY17]](s32) + ; GFX9-NEXT: [[MV9:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY18]](s32), [[COPY19]](s32) + ; GFX9-NEXT: [[MV10:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY20]](s32), [[COPY21]](s32) + ; GFX9-NEXT: [[MV11:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY22]](s32), [[COPY23]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV8]](s64), [[MV9]](s64), [[MV10]](s64), [[MV11]](s64) + ; GFX9-NEXT: [[FMUL:%[0-9]+]]:_(<4 x s64>) = contract G_FMUL [[BUILD_VECTOR]], [[BUILD_VECTOR1]] + ; GFX9-NEXT: [[FADD:%[0-9]+]]:_(<4 x s64>) = contract G_FADD [[FMUL]], [[BUILD_VECTOR2]] + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](<4 x s64>) + ; GFX9-NEXT: $vgpr0 = COPY [[UV]](s32) + ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](s32) + ; GFX9-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX9-NEXT: $vgpr4 = COPY [[UV4]](s32) + ; GFX9-NEXT: $vgpr5 = COPY [[UV5]](s32) + ; GFX9-NEXT: $vgpr6 = COPY [[UV6]](s32) + ; GFX9-NEXT: $vgpr7 = COPY [[UV7]](s32) + ; GFX9-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 + ; + ; GFX9-DENORM-LABEL: name: test_4xdouble_add_mul_contract + ; GFX9-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23 + ; GFX9-DENORM-NEXT: {{ $}} + ; GFX9-DENORM-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX9-DENORM-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9-DENORM-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX9-DENORM-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; GFX9-DENORM-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; GFX9-DENORM-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 + ; GFX9-DENORM-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 + ; GFX9-DENORM-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 + ; GFX9-DENORM-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) + ; GFX9-DENORM-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) + ; GFX9-DENORM-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) + ; GFX9-DENORM-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY6]](s32), [[COPY7]](s32) + ; GFX9-DENORM-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[MV3]](s64) + ; GFX9-DENORM-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 + ; GFX9-DENORM-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 + ; GFX9-DENORM-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 + ; GFX9-DENORM-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 + ; GFX9-DENORM-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12 + ; GFX9-DENORM-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13 + ; GFX9-DENORM-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14 + ; GFX9-DENORM-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15 + ; GFX9-DENORM-NEXT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY8]](s32), [[COPY9]](s32) + ; GFX9-DENORM-NEXT: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY10]](s32), [[COPY11]](s32) + ; GFX9-DENORM-NEXT: [[MV6:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY12]](s32), [[COPY13]](s32) + ; GFX9-DENORM-NEXT: [[MV7:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY14]](s32), [[COPY15]](s32) + ; GFX9-DENORM-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV4]](s64), [[MV5]](s64), [[MV6]](s64), [[MV7]](s64) + ; GFX9-DENORM-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16 + ; GFX9-DENORM-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17 + ; GFX9-DENORM-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr18 + ; GFX9-DENORM-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr19 + ; GFX9-DENORM-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr20 + ; GFX9-DENORM-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr21 + ; GFX9-DENORM-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr22 + ; GFX9-DENORM-NEXT: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr23 + ; GFX9-DENORM-NEXT: [[MV8:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY16]](s32), [[COPY17]](s32) + ; GFX9-DENORM-NEXT: [[MV9:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY18]](s32), [[COPY19]](s32) + ; GFX9-DENORM-NEXT: [[MV10:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY20]](s32), [[COPY21]](s32) + ; GFX9-DENORM-NEXT: [[MV11:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY22]](s32), [[COPY23]](s32) + ; GFX9-DENORM-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV8]](s64), [[MV9]](s64), [[MV10]](s64), [[MV11]](s64) + ; GFX9-DENORM-NEXT: [[FMUL:%[0-9]+]]:_(<4 x s64>) = contract G_FMUL [[BUILD_VECTOR]], [[BUILD_VECTOR1]] + ; GFX9-DENORM-NEXT: [[FADD:%[0-9]+]]:_(<4 x s64>) = contract G_FADD [[FMUL]], [[BUILD_VECTOR2]] + ; GFX9-DENORM-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](<4 x s64>) + ; GFX9-DENORM-NEXT: $vgpr0 = COPY [[UV]](s32) + ; GFX9-DENORM-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; GFX9-DENORM-NEXT: $vgpr2 = COPY [[UV2]](s32) + ; GFX9-DENORM-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX9-DENORM-NEXT: $vgpr4 = COPY [[UV4]](s32) + ; GFX9-DENORM-NEXT: $vgpr5 = COPY [[UV5]](s32) + ; GFX9-DENORM-NEXT: $vgpr6 = COPY [[UV6]](s32) + ; GFX9-DENORM-NEXT: $vgpr7 = COPY [[UV7]](s32) + ; GFX9-DENORM-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 + ; + ; GFX10-LABEL: name: test_4xdouble_add_mul_contract + ; GFX10: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23 + ; GFX10-NEXT: {{ $}} + ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 + ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 + ; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 + ; GFX10-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) + ; GFX10-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) + ; GFX10-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) + ; GFX10-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY6]](s32), [[COPY7]](s32) + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[MV3]](s64) + ; GFX10-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 + ; GFX10-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 + ; GFX10-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 + ; GFX10-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 + ; GFX10-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12 + ; GFX10-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13 + ; GFX10-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14 + ; GFX10-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15 + ; GFX10-NEXT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY8]](s32), [[COPY9]](s32) + ; GFX10-NEXT: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY10]](s32), [[COPY11]](s32) + ; GFX10-NEXT: [[MV6:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY12]](s32), [[COPY13]](s32) + ; GFX10-NEXT: [[MV7:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY14]](s32), [[COPY15]](s32) + ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV4]](s64), [[MV5]](s64), [[MV6]](s64), [[MV7]](s64) + ; GFX10-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16 + ; GFX10-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17 + ; GFX10-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr18 + ; GFX10-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr19 + ; GFX10-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr20 + ; GFX10-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr21 + ; GFX10-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr22 + ; GFX10-NEXT: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr23 + ; GFX10-NEXT: [[MV8:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY16]](s32), [[COPY17]](s32) + ; GFX10-NEXT: [[MV9:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY18]](s32), [[COPY19]](s32) + ; GFX10-NEXT: [[MV10:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY20]](s32), [[COPY21]](s32) + ; GFX10-NEXT: [[MV11:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY22]](s32), [[COPY23]](s32) + ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV8]](s64), [[MV9]](s64), [[MV10]](s64), [[MV11]](s64) + ; GFX10-NEXT: [[FMUL:%[0-9]+]]:_(<4 x s64>) = contract G_FMUL [[BUILD_VECTOR]], [[BUILD_VECTOR1]] + ; GFX10-NEXT: [[FADD:%[0-9]+]]:_(<4 x s64>) = contract G_FADD [[FMUL]], [[BUILD_VECTOR2]] + ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](<4 x s64>) + ; GFX10-NEXT: $vgpr0 = COPY [[UV]](s32) + ; GFX10-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; GFX10-NEXT: $vgpr2 = COPY [[UV2]](s32) + ; GFX10-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX10-NEXT: $vgpr4 = COPY [[UV4]](s32) + ; GFX10-NEXT: $vgpr5 = COPY [[UV5]](s32) + ; GFX10-NEXT: $vgpr6 = COPY [[UV6]](s32) + ; GFX10-NEXT: $vgpr7 = COPY [[UV7]](s32) + ; GFX10-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 + ; + ; GFX10-DENORM-LABEL: name: test_4xdouble_add_mul_contract + ; GFX10-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23 + ; GFX10-DENORM-NEXT: {{ $}} + ; GFX10-DENORM-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX10-DENORM-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX10-DENORM-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX10-DENORM-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; GFX10-DENORM-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; GFX10-DENORM-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 + ; GFX10-DENORM-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 + ; GFX10-DENORM-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 + ; GFX10-DENORM-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) + ; GFX10-DENORM-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) + ; GFX10-DENORM-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) + ; GFX10-DENORM-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY6]](s32), [[COPY7]](s32) + ; GFX10-DENORM-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[MV3]](s64) + ; GFX10-DENORM-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 + ; GFX10-DENORM-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 + ; GFX10-DENORM-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 + ; GFX10-DENORM-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 + ; GFX10-DENORM-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12 + ; GFX10-DENORM-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13 + ; GFX10-DENORM-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14 + ; GFX10-DENORM-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15 + ; GFX10-DENORM-NEXT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY8]](s32), [[COPY9]](s32) + ; GFX10-DENORM-NEXT: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY10]](s32), [[COPY11]](s32) + ; GFX10-DENORM-NEXT: [[MV6:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY12]](s32), [[COPY13]](s32) + ; GFX10-DENORM-NEXT: [[MV7:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY14]](s32), [[COPY15]](s32) + ; GFX10-DENORM-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV4]](s64), [[MV5]](s64), [[MV6]](s64), [[MV7]](s64) + ; GFX10-DENORM-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16 + ; GFX10-DENORM-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17 + ; GFX10-DENORM-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr18 + ; GFX10-DENORM-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr19 + ; GFX10-DENORM-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr20 + ; GFX10-DENORM-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr21 + ; GFX10-DENORM-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr22 + ; GFX10-DENORM-NEXT: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr23 + ; GFX10-DENORM-NEXT: [[MV8:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY16]](s32), [[COPY17]](s32) + ; GFX10-DENORM-NEXT: [[MV9:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY18]](s32), [[COPY19]](s32) + ; GFX10-DENORM-NEXT: [[MV10:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY20]](s32), [[COPY21]](s32) + ; GFX10-DENORM-NEXT: [[MV11:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY22]](s32), [[COPY23]](s32) + ; GFX10-DENORM-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV8]](s64), [[MV9]](s64), [[MV10]](s64), [[MV11]](s64) + ; GFX10-DENORM-NEXT: [[FMUL:%[0-9]+]]:_(<4 x s64>) = contract G_FMUL [[BUILD_VECTOR]], [[BUILD_VECTOR1]] + ; GFX10-DENORM-NEXT: [[FADD:%[0-9]+]]:_(<4 x s64>) = contract G_FADD [[FMUL]], [[BUILD_VECTOR2]] + ; GFX10-DENORM-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](<4 x s64>) + ; GFX10-DENORM-NEXT: $vgpr0 = COPY [[UV]](s32) + ; GFX10-DENORM-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; GFX10-DENORM-NEXT: $vgpr2 = COPY [[UV2]](s32) + ; GFX10-DENORM-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX10-DENORM-NEXT: $vgpr4 = COPY [[UV4]](s32) + ; GFX10-DENORM-NEXT: $vgpr5 = COPY [[UV5]](s32) + ; GFX10-DENORM-NEXT: $vgpr6 = COPY [[UV6]](s32) + ; GFX10-DENORM-NEXT: $vgpr7 = COPY [[UV7]](s32) + ; GFX10-DENORM-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 + %4:_(s32) = COPY $vgpr0 + %5:_(s32) = COPY $vgpr1 + %6:_(s32) = COPY $vgpr2 + %7:_(s32) = COPY $vgpr3 + %8:_(s32) = COPY $vgpr4 + %9:_(s32) = COPY $vgpr5 + %10:_(s32) = COPY $vgpr6 + %11:_(s32) = COPY $vgpr7 + %28:_(s64) = G_MERGE_VALUES %4(s32), %5(s32) + %29:_(s64) = G_MERGE_VALUES %6(s32), %7(s32) + %30:_(s64) = G_MERGE_VALUES %8(s32), %9(s32) + %31:_(s64) = G_MERGE_VALUES %10(s32), %11(s32) + %0:_(<4 x s64>) = G_BUILD_VECTOR %28(s64), %29(s64), %30(s64), %31(s64) + %12:_(s32) = COPY $vgpr8 + %13:_(s32) = COPY $vgpr9 + %14:_(s32) = COPY $vgpr10 + %15:_(s32) = COPY $vgpr11 + %16:_(s32) = COPY $vgpr12 + %17:_(s32) = COPY $vgpr13 + %18:_(s32) = COPY $vgpr14 + %19:_(s32) = COPY $vgpr15 + %32:_(s64) = G_MERGE_VALUES %12(s32), %13(s32) + %33:_(s64) = G_MERGE_VALUES %14(s32), %15(s32) + %34:_(s64) = G_MERGE_VALUES %16(s32), %17(s32) + %35:_(s64) = G_MERGE_VALUES %18(s32), %19(s32) + %1:_(<4 x s64>) = G_BUILD_VECTOR %32(s64), %33(s64), %34(s64), %35(s64) + %20:_(s32) = COPY $vgpr16 + %21:_(s32) = COPY $vgpr17 + %22:_(s32) = COPY $vgpr18 + %23:_(s32) = COPY $vgpr19 + %24:_(s32) = COPY $vgpr20 + %25:_(s32) = COPY $vgpr21 + %26:_(s32) = COPY $vgpr22 + %27:_(s32) = COPY $vgpr23 + %36:_(s64) = G_MERGE_VALUES %20(s32), %21(s32) + %37:_(s64) = G_MERGE_VALUES %22(s32), %23(s32) + %38:_(s64) = G_MERGE_VALUES %24(s32), %25(s32) + %39:_(s64) = G_MERGE_VALUES %26(s32), %27(s32) + %2:_(<4 x s64>) = G_BUILD_VECTOR %36(s64), %37(s64), %38(s64), %39(s64) + %40:_(<4 x s64>) = contract G_FMUL %0, %1 + %41:_(<4 x s64>) = contract G_FADD %40, %2 + %43:_(s32), %44:_(s32), %45:_(s32), %46:_(s32), %47:_(s32), %48:_(s32), %49:_(s32), %50:_(s32) = G_UNMERGE_VALUES %41(<4 x s64>) + $vgpr0 = COPY %43(s32) + $vgpr1 = COPY %44(s32) + $vgpr2 = COPY %45(s32) + $vgpr3 = COPY %46(s32) + $vgpr4 = COPY %47(s32) + $vgpr5 = COPY %48(s32) + $vgpr6 = COPY %49(s32) + $vgpr7 = COPY %50(s32) + S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 +... + +--- name: test_3xdouble_add_mul_rhs body: | bb.1.entry: @@ -2385,49 +2775,7 @@ body: | ; GFX9-NEXT: $vgpr4 = COPY [[UV4]](s32) ; GFX9-NEXT: $vgpr5 = COPY [[UV5]](s32) ; GFX9-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5 - ; GFX9-CONTRACT-LABEL: name: test_3xdouble_add_mul_rhs - ; GFX9-CONTRACT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17 - ; GFX9-CONTRACT-NEXT: {{ $}} - ; GFX9-CONTRACT-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-CONTRACT-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-CONTRACT-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-CONTRACT-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX9-CONTRACT-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX9-CONTRACT-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX9-CONTRACT-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; GFX9-CONTRACT-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) - ; GFX9-CONTRACT-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) - ; GFX9-CONTRACT-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64) - ; GFX9-CONTRACT-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; GFX9-CONTRACT-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; GFX9-CONTRACT-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; GFX9-CONTRACT-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 - ; GFX9-CONTRACT-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 - ; GFX9-CONTRACT-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 - ; GFX9-CONTRACT-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-CONTRACT-NEXT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY8]](s32), [[COPY9]](s32) - ; GFX9-CONTRACT-NEXT: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY10]](s32), [[COPY11]](s32) - ; GFX9-CONTRACT-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV3]](s64), [[MV4]](s64), [[MV5]](s64) - ; GFX9-CONTRACT-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12 - ; GFX9-CONTRACT-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13 - ; GFX9-CONTRACT-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14 - ; GFX9-CONTRACT-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15 - ; GFX9-CONTRACT-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16 - ; GFX9-CONTRACT-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17 - ; GFX9-CONTRACT-NEXT: [[MV6:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY12]](s32), [[COPY13]](s32) - ; GFX9-CONTRACT-NEXT: [[MV7:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY14]](s32), [[COPY15]](s32) - ; GFX9-CONTRACT-NEXT: [[MV8:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY16]](s32), [[COPY17]](s32) - ; GFX9-CONTRACT-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV6]](s64), [[MV7]](s64), [[MV8]](s64) - ; GFX9-CONTRACT-NEXT: [[FMUL:%[0-9]+]]:_(<3 x s64>) = G_FMUL [[BUILD_VECTOR]], [[BUILD_VECTOR1]] - ; GFX9-CONTRACT-NEXT: [[FADD:%[0-9]+]]:_(<3 x s64>) = G_FADD [[BUILD_VECTOR2]], [[FMUL]] - ; GFX9-CONTRACT-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](<3 x s64>) - ; GFX9-CONTRACT-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX9-CONTRACT-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX9-CONTRACT-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX9-CONTRACT-NEXT: $vgpr3 = COPY [[UV3]](s32) - ; GFX9-CONTRACT-NEXT: $vgpr4 = COPY [[UV4]](s32) - ; GFX9-CONTRACT-NEXT: $vgpr5 = COPY [[UV5]](s32) - ; GFX9-CONTRACT-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5 + ; ; GFX9-DENORM-LABEL: name: test_3xdouble_add_mul_rhs ; GFX9-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17 ; GFX9-DENORM-NEXT: {{ $}} @@ -2471,49 +2819,7 @@ body: | ; GFX9-DENORM-NEXT: $vgpr4 = COPY [[UV4]](s32) ; GFX9-DENORM-NEXT: $vgpr5 = COPY [[UV5]](s32) ; GFX9-DENORM-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5 - ; GFX9-UNSAFE-LABEL: name: test_3xdouble_add_mul_rhs - ; GFX9-UNSAFE: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17 - ; GFX9-UNSAFE-NEXT: {{ $}} - ; GFX9-UNSAFE-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-UNSAFE-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-UNSAFE-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-UNSAFE-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX9-UNSAFE-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX9-UNSAFE-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX9-UNSAFE-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; GFX9-UNSAFE-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) - ; GFX9-UNSAFE-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) - ; GFX9-UNSAFE-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64) - ; GFX9-UNSAFE-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; GFX9-UNSAFE-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; GFX9-UNSAFE-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; GFX9-UNSAFE-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 - ; GFX9-UNSAFE-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 - ; GFX9-UNSAFE-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 - ; GFX9-UNSAFE-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-UNSAFE-NEXT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY8]](s32), [[COPY9]](s32) - ; GFX9-UNSAFE-NEXT: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY10]](s32), [[COPY11]](s32) - ; GFX9-UNSAFE-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV3]](s64), [[MV4]](s64), [[MV5]](s64) - ; GFX9-UNSAFE-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12 - ; GFX9-UNSAFE-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13 - ; GFX9-UNSAFE-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14 - ; GFX9-UNSAFE-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15 - ; GFX9-UNSAFE-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16 - ; GFX9-UNSAFE-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17 - ; GFX9-UNSAFE-NEXT: [[MV6:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY12]](s32), [[COPY13]](s32) - ; GFX9-UNSAFE-NEXT: [[MV7:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY14]](s32), [[COPY15]](s32) - ; GFX9-UNSAFE-NEXT: [[MV8:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY16]](s32), [[COPY17]](s32) - ; GFX9-UNSAFE-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV6]](s64), [[MV7]](s64), [[MV8]](s64) - ; GFX9-UNSAFE-NEXT: [[FMUL:%[0-9]+]]:_(<3 x s64>) = G_FMUL [[BUILD_VECTOR]], [[BUILD_VECTOR1]] - ; GFX9-UNSAFE-NEXT: [[FADD:%[0-9]+]]:_(<3 x s64>) = G_FADD [[BUILD_VECTOR2]], [[FMUL]] - ; GFX9-UNSAFE-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](<3 x s64>) - ; GFX9-UNSAFE-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX9-UNSAFE-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX9-UNSAFE-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX9-UNSAFE-NEXT: $vgpr3 = COPY [[UV3]](s32) - ; GFX9-UNSAFE-NEXT: $vgpr4 = COPY [[UV4]](s32) - ; GFX9-UNSAFE-NEXT: $vgpr5 = COPY [[UV5]](s32) - ; GFX9-UNSAFE-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5 + ; ; GFX10-LABEL: name: test_3xdouble_add_mul_rhs ; GFX10: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17 ; GFX10-NEXT: {{ $}} @@ -2557,49 +2863,7 @@ body: | ; GFX10-NEXT: $vgpr4 = COPY [[UV4]](s32) ; GFX10-NEXT: $vgpr5 = COPY [[UV5]](s32) ; GFX10-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5 - ; GFX10-CONTRACT-LABEL: name: test_3xdouble_add_mul_rhs - ; GFX10-CONTRACT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17 - ; GFX10-CONTRACT-NEXT: {{ $}} - ; GFX10-CONTRACT-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-CONTRACT-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-CONTRACT-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-CONTRACT-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX10-CONTRACT-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX10-CONTRACT-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX10-CONTRACT-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; GFX10-CONTRACT-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) - ; GFX10-CONTRACT-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) - ; GFX10-CONTRACT-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64) - ; GFX10-CONTRACT-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; GFX10-CONTRACT-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; GFX10-CONTRACT-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; GFX10-CONTRACT-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 - ; GFX10-CONTRACT-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 - ; GFX10-CONTRACT-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 - ; GFX10-CONTRACT-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY6]](s32), [[COPY7]](s32) - ; GFX10-CONTRACT-NEXT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY8]](s32), [[COPY9]](s32) - ; GFX10-CONTRACT-NEXT: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY10]](s32), [[COPY11]](s32) - ; GFX10-CONTRACT-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV3]](s64), [[MV4]](s64), [[MV5]](s64) - ; GFX10-CONTRACT-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12 - ; GFX10-CONTRACT-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13 - ; GFX10-CONTRACT-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14 - ; GFX10-CONTRACT-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15 - ; GFX10-CONTRACT-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16 - ; GFX10-CONTRACT-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17 - ; GFX10-CONTRACT-NEXT: [[MV6:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY12]](s32), [[COPY13]](s32) - ; GFX10-CONTRACT-NEXT: [[MV7:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY14]](s32), [[COPY15]](s32) - ; GFX10-CONTRACT-NEXT: [[MV8:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY16]](s32), [[COPY17]](s32) - ; GFX10-CONTRACT-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV6]](s64), [[MV7]](s64), [[MV8]](s64) - ; GFX10-CONTRACT-NEXT: [[FMUL:%[0-9]+]]:_(<3 x s64>) = G_FMUL [[BUILD_VECTOR]], [[BUILD_VECTOR1]] - ; GFX10-CONTRACT-NEXT: [[FADD:%[0-9]+]]:_(<3 x s64>) = G_FADD [[BUILD_VECTOR2]], [[FMUL]] - ; GFX10-CONTRACT-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](<3 x s64>) - ; GFX10-CONTRACT-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10-CONTRACT-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10-CONTRACT-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX10-CONTRACT-NEXT: $vgpr3 = COPY [[UV3]](s32) - ; GFX10-CONTRACT-NEXT: $vgpr4 = COPY [[UV4]](s32) - ; GFX10-CONTRACT-NEXT: $vgpr5 = COPY [[UV5]](s32) - ; GFX10-CONTRACT-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5 + ; ; GFX10-DENORM-LABEL: name: test_3xdouble_add_mul_rhs ; GFX10-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17 ; GFX10-DENORM-NEXT: {{ $}} @@ -2643,49 +2907,6 @@ body: | ; GFX10-DENORM-NEXT: $vgpr4 = COPY [[UV4]](s32) ; GFX10-DENORM-NEXT: $vgpr5 = COPY [[UV5]](s32) ; GFX10-DENORM-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5 - ; GFX10-UNSAFE-LABEL: name: test_3xdouble_add_mul_rhs - ; GFX10-UNSAFE: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17 - ; GFX10-UNSAFE-NEXT: {{ $}} - ; GFX10-UNSAFE-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-UNSAFE-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-UNSAFE-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-UNSAFE-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX10-UNSAFE-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX10-UNSAFE-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX10-UNSAFE-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; GFX10-UNSAFE-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) - ; GFX10-UNSAFE-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) - ; GFX10-UNSAFE-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64) - ; GFX10-UNSAFE-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; GFX10-UNSAFE-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; GFX10-UNSAFE-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; GFX10-UNSAFE-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 - ; GFX10-UNSAFE-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 - ; GFX10-UNSAFE-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 - ; GFX10-UNSAFE-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY6]](s32), [[COPY7]](s32) - ; GFX10-UNSAFE-NEXT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY8]](s32), [[COPY9]](s32) - ; GFX10-UNSAFE-NEXT: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY10]](s32), [[COPY11]](s32) - ; GFX10-UNSAFE-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV3]](s64), [[MV4]](s64), [[MV5]](s64) - ; GFX10-UNSAFE-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12 - ; GFX10-UNSAFE-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13 - ; GFX10-UNSAFE-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14 - ; GFX10-UNSAFE-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15 - ; GFX10-UNSAFE-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16 - ; GFX10-UNSAFE-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17 - ; GFX10-UNSAFE-NEXT: [[MV6:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY12]](s32), [[COPY13]](s32) - ; GFX10-UNSAFE-NEXT: [[MV7:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY14]](s32), [[COPY15]](s32) - ; GFX10-UNSAFE-NEXT: [[MV8:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY16]](s32), [[COPY17]](s32) - ; GFX10-UNSAFE-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV6]](s64), [[MV7]](s64), [[MV8]](s64) - ; GFX10-UNSAFE-NEXT: [[FMUL:%[0-9]+]]:_(<3 x s64>) = G_FMUL [[BUILD_VECTOR]], [[BUILD_VECTOR1]] - ; GFX10-UNSAFE-NEXT: [[FADD:%[0-9]+]]:_(<3 x s64>) = G_FADD [[BUILD_VECTOR2]], [[FMUL]] - ; GFX10-UNSAFE-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](<3 x s64>) - ; GFX10-UNSAFE-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10-UNSAFE-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10-UNSAFE-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX10-UNSAFE-NEXT: $vgpr3 = COPY [[UV3]](s32) - ; GFX10-UNSAFE-NEXT: $vgpr4 = COPY [[UV4]](s32) - ; GFX10-UNSAFE-NEXT: $vgpr5 = COPY [[UV5]](s32) - ; GFX10-UNSAFE-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5 %4:_(s32) = COPY $vgpr0 %5:_(s32) = COPY $vgpr1 %6:_(s32) = COPY $vgpr2 @@ -2727,3 +2948,226 @@ body: | $vgpr5 = COPY %39(s32) S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5 ... + +--- +name: test_3xdouble_add_mul_rhs_contract +body: | + bb.1.entry: + liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17 + + ; GFX9-LABEL: name: test_3xdouble_add_mul_rhs_contract + ; GFX9: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17 + ; GFX9-NEXT: {{ $}} + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 + ; GFX9-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) + ; GFX9-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) + ; GFX9-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64) + ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 + ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 + ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 + ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 + ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 + ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 + ; GFX9-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY6]](s32), [[COPY7]](s32) + ; GFX9-NEXT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY8]](s32), [[COPY9]](s32) + ; GFX9-NEXT: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY10]](s32), [[COPY11]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV3]](s64), [[MV4]](s64), [[MV5]](s64) + ; GFX9-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12 + ; GFX9-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13 + ; GFX9-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14 + ; GFX9-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15 + ; GFX9-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16 + ; GFX9-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17 + ; GFX9-NEXT: [[MV6:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY12]](s32), [[COPY13]](s32) + ; GFX9-NEXT: [[MV7:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY14]](s32), [[COPY15]](s32) + ; GFX9-NEXT: [[MV8:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY16]](s32), [[COPY17]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV6]](s64), [[MV7]](s64), [[MV8]](s64) + ; GFX9-NEXT: [[FMUL:%[0-9]+]]:_(<3 x s64>) = contract G_FMUL [[BUILD_VECTOR]], [[BUILD_VECTOR1]] + ; GFX9-NEXT: [[FADD:%[0-9]+]]:_(<3 x s64>) = contract G_FADD [[BUILD_VECTOR2]], [[FMUL]] + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](<3 x s64>) + ; GFX9-NEXT: $vgpr0 = COPY [[UV]](s32) + ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](s32) + ; GFX9-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX9-NEXT: $vgpr4 = COPY [[UV4]](s32) + ; GFX9-NEXT: $vgpr5 = COPY [[UV5]](s32) + ; GFX9-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5 + ; + ; GFX9-DENORM-LABEL: name: test_3xdouble_add_mul_rhs_contract + ; GFX9-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17 + ; GFX9-DENORM-NEXT: {{ $}} + ; GFX9-DENORM-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX9-DENORM-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9-DENORM-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX9-DENORM-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; GFX9-DENORM-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; GFX9-DENORM-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 + ; GFX9-DENORM-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) + ; GFX9-DENORM-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) + ; GFX9-DENORM-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) + ; GFX9-DENORM-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64) + ; GFX9-DENORM-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 + ; GFX9-DENORM-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 + ; GFX9-DENORM-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 + ; GFX9-DENORM-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 + ; GFX9-DENORM-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 + ; GFX9-DENORM-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 + ; GFX9-DENORM-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY6]](s32), [[COPY7]](s32) + ; GFX9-DENORM-NEXT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY8]](s32), [[COPY9]](s32) + ; GFX9-DENORM-NEXT: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY10]](s32), [[COPY11]](s32) + ; GFX9-DENORM-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV3]](s64), [[MV4]](s64), [[MV5]](s64) + ; GFX9-DENORM-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12 + ; GFX9-DENORM-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13 + ; GFX9-DENORM-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14 + ; GFX9-DENORM-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15 + ; GFX9-DENORM-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16 + ; GFX9-DENORM-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17 + ; GFX9-DENORM-NEXT: [[MV6:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY12]](s32), [[COPY13]](s32) + ; GFX9-DENORM-NEXT: [[MV7:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY14]](s32), [[COPY15]](s32) + ; GFX9-DENORM-NEXT: [[MV8:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY16]](s32), [[COPY17]](s32) + ; GFX9-DENORM-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV6]](s64), [[MV7]](s64), [[MV8]](s64) + ; GFX9-DENORM-NEXT: [[FMUL:%[0-9]+]]:_(<3 x s64>) = contract G_FMUL [[BUILD_VECTOR]], [[BUILD_VECTOR1]] + ; GFX9-DENORM-NEXT: [[FADD:%[0-9]+]]:_(<3 x s64>) = contract G_FADD [[BUILD_VECTOR2]], [[FMUL]] + ; GFX9-DENORM-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](<3 x s64>) + ; GFX9-DENORM-NEXT: $vgpr0 = COPY [[UV]](s32) + ; GFX9-DENORM-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; GFX9-DENORM-NEXT: $vgpr2 = COPY [[UV2]](s32) + ; GFX9-DENORM-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX9-DENORM-NEXT: $vgpr4 = COPY [[UV4]](s32) + ; GFX9-DENORM-NEXT: $vgpr5 = COPY [[UV5]](s32) + ; GFX9-DENORM-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5 + ; + ; GFX10-LABEL: name: test_3xdouble_add_mul_rhs_contract + ; GFX10: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17 + ; GFX10-NEXT: {{ $}} + ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 + ; GFX10-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) + ; GFX10-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) + ; GFX10-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64) + ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 + ; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 + ; GFX10-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 + ; GFX10-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 + ; GFX10-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 + ; GFX10-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 + ; GFX10-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY6]](s32), [[COPY7]](s32) + ; GFX10-NEXT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY8]](s32), [[COPY9]](s32) + ; GFX10-NEXT: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY10]](s32), [[COPY11]](s32) + ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV3]](s64), [[MV4]](s64), [[MV5]](s64) + ; GFX10-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12 + ; GFX10-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13 + ; GFX10-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14 + ; GFX10-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15 + ; GFX10-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16 + ; GFX10-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17 + ; GFX10-NEXT: [[MV6:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY12]](s32), [[COPY13]](s32) + ; GFX10-NEXT: [[MV7:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY14]](s32), [[COPY15]](s32) + ; GFX10-NEXT: [[MV8:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY16]](s32), [[COPY17]](s32) + ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV6]](s64), [[MV7]](s64), [[MV8]](s64) + ; GFX10-NEXT: [[FMUL:%[0-9]+]]:_(<3 x s64>) = contract G_FMUL [[BUILD_VECTOR]], [[BUILD_VECTOR1]] + ; GFX10-NEXT: [[FADD:%[0-9]+]]:_(<3 x s64>) = contract G_FADD [[BUILD_VECTOR2]], [[FMUL]] + ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](<3 x s64>) + ; GFX10-NEXT: $vgpr0 = COPY [[UV]](s32) + ; GFX10-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; GFX10-NEXT: $vgpr2 = COPY [[UV2]](s32) + ; GFX10-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX10-NEXT: $vgpr4 = COPY [[UV4]](s32) + ; GFX10-NEXT: $vgpr5 = COPY [[UV5]](s32) + ; GFX10-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5 + ; + ; GFX10-DENORM-LABEL: name: test_3xdouble_add_mul_rhs_contract + ; GFX10-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17 + ; GFX10-DENORM-NEXT: {{ $}} + ; GFX10-DENORM-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX10-DENORM-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX10-DENORM-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX10-DENORM-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; GFX10-DENORM-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; GFX10-DENORM-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 + ; GFX10-DENORM-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) + ; GFX10-DENORM-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) + ; GFX10-DENORM-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) + ; GFX10-DENORM-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64) + ; GFX10-DENORM-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 + ; GFX10-DENORM-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 + ; GFX10-DENORM-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 + ; GFX10-DENORM-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 + ; GFX10-DENORM-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 + ; GFX10-DENORM-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 + ; GFX10-DENORM-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY6]](s32), [[COPY7]](s32) + ; GFX10-DENORM-NEXT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY8]](s32), [[COPY9]](s32) + ; GFX10-DENORM-NEXT: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY10]](s32), [[COPY11]](s32) + ; GFX10-DENORM-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV3]](s64), [[MV4]](s64), [[MV5]](s64) + ; GFX10-DENORM-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12 + ; GFX10-DENORM-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13 + ; GFX10-DENORM-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14 + ; GFX10-DENORM-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15 + ; GFX10-DENORM-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16 + ; GFX10-DENORM-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17 + ; GFX10-DENORM-NEXT: [[MV6:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY12]](s32), [[COPY13]](s32) + ; GFX10-DENORM-NEXT: [[MV7:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY14]](s32), [[COPY15]](s32) + ; GFX10-DENORM-NEXT: [[MV8:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY16]](s32), [[COPY17]](s32) + ; GFX10-DENORM-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV6]](s64), [[MV7]](s64), [[MV8]](s64) + ; GFX10-DENORM-NEXT: [[FMUL:%[0-9]+]]:_(<3 x s64>) = contract G_FMUL [[BUILD_VECTOR]], [[BUILD_VECTOR1]] + ; GFX10-DENORM-NEXT: [[FADD:%[0-9]+]]:_(<3 x s64>) = contract G_FADD [[BUILD_VECTOR2]], [[FMUL]] + ; GFX10-DENORM-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](<3 x s64>) + ; GFX10-DENORM-NEXT: $vgpr0 = COPY [[UV]](s32) + ; GFX10-DENORM-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; GFX10-DENORM-NEXT: $vgpr2 = COPY [[UV2]](s32) + ; GFX10-DENORM-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX10-DENORM-NEXT: $vgpr4 = COPY [[UV4]](s32) + ; GFX10-DENORM-NEXT: $vgpr5 = COPY [[UV5]](s32) + ; GFX10-DENORM-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5 + %4:_(s32) = COPY $vgpr0 + %5:_(s32) = COPY $vgpr1 + %6:_(s32) = COPY $vgpr2 + %7:_(s32) = COPY $vgpr3 + %8:_(s32) = COPY $vgpr4 + %9:_(s32) = COPY $vgpr5 + %22:_(s64) = G_MERGE_VALUES %4(s32), %5(s32) + %23:_(s64) = G_MERGE_VALUES %6(s32), %7(s32) + %24:_(s64) = G_MERGE_VALUES %8(s32), %9(s32) + %0:_(<3 x s64>) = G_BUILD_VECTOR %22(s64), %23(s64), %24(s64) + %10:_(s32) = COPY $vgpr6 + %11:_(s32) = COPY $vgpr7 + %12:_(s32) = COPY $vgpr8 + %13:_(s32) = COPY $vgpr9 + %14:_(s32) = COPY $vgpr10 + %15:_(s32) = COPY $vgpr11 + %25:_(s64) = G_MERGE_VALUES %10(s32), %11(s32) + %26:_(s64) = G_MERGE_VALUES %12(s32), %13(s32) + %27:_(s64) = G_MERGE_VALUES %14(s32), %15(s32) + %1:_(<3 x s64>) = G_BUILD_VECTOR %25(s64), %26(s64), %27(s64) + %16:_(s32) = COPY $vgpr12 + %17:_(s32) = COPY $vgpr13 + %18:_(s32) = COPY $vgpr14 + %19:_(s32) = COPY $vgpr15 + %20:_(s32) = COPY $vgpr16 + %21:_(s32) = COPY $vgpr17 + %28:_(s64) = G_MERGE_VALUES %16(s32), %17(s32) + %29:_(s64) = G_MERGE_VALUES %18(s32), %19(s32) + %30:_(s64) = G_MERGE_VALUES %20(s32), %21(s32) + %2:_(<3 x s64>) = G_BUILD_VECTOR %28(s64), %29(s64), %30(s64) + %31:_(<3 x s64>) = contract G_FMUL %0, %1 + %32:_(<3 x s64>) = contract G_FADD %2, %31 + %34:_(s32), %35:_(s32), %36:_(s32), %37:_(s32), %38:_(s32), %39:_(s32) = G_UNMERGE_VALUES %32(<3 x s64>) + $vgpr0 = COPY %34(s32) + $vgpr1 = COPY %35(s32) + $vgpr2 = COPY %36(s32) + $vgpr3 = COPY %37(s32) + $vgpr4 = COPY %38(s32) + $vgpr5 = COPY %39(s32) + S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5 +... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fma-add-mul-pre-legalize.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fma-add-mul-pre-legalize.mir index 42e53be..8f9fc67 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fma-add-mul-pre-legalize.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fma-add-mul-pre-legalize.mir @@ -1,12 +1,8 @@ # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py # RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx900 -run-pass=amdgpu-prelegalizer-combiner %s -o - | FileCheck -check-prefix=GFX9 %s -# RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx900 -run-pass=amdgpu-prelegalizer-combiner -fp-contract=fast %s -o - | FileCheck -check-prefix=GFX9-CONTRACT %s # RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx900 -run-pass=amdgpu-prelegalizer-combiner --denormal-fp-math=preserve-sign %s -o - | FileCheck -check-prefix=GFX9-DENORM %s -# RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx900 -run-pass=amdgpu-prelegalizer-combiner -enable-unsafe-fp-math %s -o - | FileCheck -check-prefix=GFX9-UNSAFE %s # RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1010 -run-pass=amdgpu-prelegalizer-combiner %s -o - | FileCheck -check-prefix=GFX10 %s -# RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1010 -run-pass=amdgpu-prelegalizer-combiner -fp-contract=fast %s -o - | FileCheck -check-prefix=GFX10-CONTRACT %s # RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1010 -run-pass=amdgpu-prelegalizer-combiner --denormal-fp-math=preserve-sign %s -o - | FileCheck -check-prefix=GFX10-DENORM %s -# RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1010 -run-pass=amdgpu-prelegalizer-combiner -enable-unsafe-fp-math %s -o - | FileCheck -check-prefix=GFX10-UNSAFE %s --- name: test_f32_add_mul @@ -25,16 +21,6 @@ body: | ; GFX9-NEXT: $vgpr0 = COPY [[FADD]](s32) ; GFX9-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 ; - ; GFX9-CONTRACT-LABEL: name: test_f32_add_mul - ; GFX9-CONTRACT: liveins: $vgpr0, $vgpr1, $vgpr2 - ; GFX9-CONTRACT-NEXT: {{ $}} - ; GFX9-CONTRACT-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-CONTRACT-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-CONTRACT-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-CONTRACT-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[COPY]], [[COPY1]], [[COPY2]] - ; GFX9-CONTRACT-NEXT: $vgpr0 = COPY [[FMA]](s32) - ; GFX9-CONTRACT-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 - ; ; GFX9-DENORM-LABEL: name: test_f32_add_mul ; GFX9-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX9-DENORM-NEXT: {{ $}} @@ -46,16 +32,6 @@ body: | ; GFX9-DENORM-NEXT: $vgpr0 = COPY [[FADD]](s32) ; GFX9-DENORM-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 ; - ; GFX9-UNSAFE-LABEL: name: test_f32_add_mul - ; GFX9-UNSAFE: liveins: $vgpr0, $vgpr1, $vgpr2 - ; GFX9-UNSAFE-NEXT: {{ $}} - ; GFX9-UNSAFE-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-UNSAFE-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-UNSAFE-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-UNSAFE-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[COPY]], [[COPY1]], [[COPY2]] - ; GFX9-UNSAFE-NEXT: $vgpr0 = COPY [[FMA]](s32) - ; GFX9-UNSAFE-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 - ; ; GFX10-LABEL: name: test_f32_add_mul ; GFX10: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX10-NEXT: {{ $}} @@ -67,16 +43,6 @@ body: | ; GFX10-NEXT: $vgpr0 = COPY [[FADD]](s32) ; GFX10-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 ; - ; GFX10-CONTRACT-LABEL: name: test_f32_add_mul - ; GFX10-CONTRACT: liveins: $vgpr0, $vgpr1, $vgpr2 - ; GFX10-CONTRACT-NEXT: {{ $}} - ; GFX10-CONTRACT-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-CONTRACT-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-CONTRACT-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-CONTRACT-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[COPY]], [[COPY1]], [[COPY2]] - ; GFX10-CONTRACT-NEXT: $vgpr0 = COPY [[FMA]](s32) - ; GFX10-CONTRACT-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 - ; ; GFX10-DENORM-LABEL: name: test_f32_add_mul ; GFX10-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX10-DENORM-NEXT: {{ $}} @@ -87,16 +53,6 @@ body: | ; GFX10-DENORM-NEXT: [[FADD:%[0-9]+]]:_(s32) = reassoc G_FADD [[FMUL]], [[COPY2]] ; GFX10-DENORM-NEXT: $vgpr0 = COPY [[FADD]](s32) ; GFX10-DENORM-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 - ; - ; GFX10-UNSAFE-LABEL: name: test_f32_add_mul - ; GFX10-UNSAFE: liveins: $vgpr0, $vgpr1, $vgpr2 - ; GFX10-UNSAFE-NEXT: {{ $}} - ; GFX10-UNSAFE-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-UNSAFE-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-UNSAFE-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-UNSAFE-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[COPY]], [[COPY1]], [[COPY2]] - ; GFX10-UNSAFE-NEXT: $vgpr0 = COPY [[FMA]](s32) - ; GFX10-UNSAFE-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s32) = COPY $vgpr2 @@ -107,6 +63,60 @@ body: | ... --- +name: test_f32_add_mul_contract +body: | + bb.1.entry: + liveins: $vgpr0, $vgpr1, $vgpr2 + + ; GFX9-LABEL: name: test_f32_add_mul_contract + ; GFX9: liveins: $vgpr0, $vgpr1, $vgpr2 + ; GFX9-NEXT: {{ $}} + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX9-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[COPY]], [[COPY1]], [[COPY2]] + ; GFX9-NEXT: $vgpr0 = COPY [[FMA]](s32) + ; GFX9-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 + ; + ; GFX9-DENORM-LABEL: name: test_f32_add_mul_contract + ; GFX9-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2 + ; GFX9-DENORM-NEXT: {{ $}} + ; GFX9-DENORM-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX9-DENORM-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9-DENORM-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX9-DENORM-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[COPY]], [[COPY1]], [[COPY2]] + ; GFX9-DENORM-NEXT: $vgpr0 = COPY [[FMA]](s32) + ; GFX9-DENORM-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 + ; + ; GFX10-LABEL: name: test_f32_add_mul_contract + ; GFX10: liveins: $vgpr0, $vgpr1, $vgpr2 + ; GFX10-NEXT: {{ $}} + ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX10-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[COPY]], [[COPY1]], [[COPY2]] + ; GFX10-NEXT: $vgpr0 = COPY [[FMA]](s32) + ; GFX10-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 + ; + ; GFX10-DENORM-LABEL: name: test_f32_add_mul_contract + ; GFX10-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2 + ; GFX10-DENORM-NEXT: {{ $}} + ; GFX10-DENORM-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX10-DENORM-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX10-DENORM-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX10-DENORM-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[COPY]], [[COPY1]], [[COPY2]] + ; GFX10-DENORM-NEXT: $vgpr0 = COPY [[FMA]](s32) + ; GFX10-DENORM-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 + %0:_(s32) = COPY $vgpr0 + %1:_(s32) = COPY $vgpr1 + %2:_(s32) = COPY $vgpr2 + %4:_(s32) = reassoc contract G_FMUL %0, %1 + %5:_(s32) = reassoc contract G_FADD %4, %2 + $vgpr0 = COPY %5(s32) + S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 +... + +--- name: test_f32_add_mul_rhs body: | bb.1.entry: @@ -123,16 +133,6 @@ body: | ; GFX9-NEXT: $vgpr0 = COPY [[FADD]](s32) ; GFX9-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 ; - ; GFX9-CONTRACT-LABEL: name: test_f32_add_mul_rhs - ; GFX9-CONTRACT: liveins: $vgpr0, $vgpr1, $vgpr2 - ; GFX9-CONTRACT-NEXT: {{ $}} - ; GFX9-CONTRACT-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-CONTRACT-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-CONTRACT-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-CONTRACT-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[COPY]], [[COPY1]], [[COPY2]] - ; GFX9-CONTRACT-NEXT: $vgpr0 = COPY [[FMA]](s32) - ; GFX9-CONTRACT-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 - ; ; GFX9-DENORM-LABEL: name: test_f32_add_mul_rhs ; GFX9-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX9-DENORM-NEXT: {{ $}} @@ -144,16 +144,6 @@ body: | ; GFX9-DENORM-NEXT: $vgpr0 = COPY [[FADD]](s32) ; GFX9-DENORM-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 ; - ; GFX9-UNSAFE-LABEL: name: test_f32_add_mul_rhs - ; GFX9-UNSAFE: liveins: $vgpr0, $vgpr1, $vgpr2 - ; GFX9-UNSAFE-NEXT: {{ $}} - ; GFX9-UNSAFE-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-UNSAFE-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-UNSAFE-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-UNSAFE-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[COPY]], [[COPY1]], [[COPY2]] - ; GFX9-UNSAFE-NEXT: $vgpr0 = COPY [[FMA]](s32) - ; GFX9-UNSAFE-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 - ; ; GFX10-LABEL: name: test_f32_add_mul_rhs ; GFX10: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX10-NEXT: {{ $}} @@ -165,16 +155,6 @@ body: | ; GFX10-NEXT: $vgpr0 = COPY [[FADD]](s32) ; GFX10-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 ; - ; GFX10-CONTRACT-LABEL: name: test_f32_add_mul_rhs - ; GFX10-CONTRACT: liveins: $vgpr0, $vgpr1, $vgpr2 - ; GFX10-CONTRACT-NEXT: {{ $}} - ; GFX10-CONTRACT-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-CONTRACT-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-CONTRACT-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-CONTRACT-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[COPY]], [[COPY1]], [[COPY2]] - ; GFX10-CONTRACT-NEXT: $vgpr0 = COPY [[FMA]](s32) - ; GFX10-CONTRACT-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 - ; ; GFX10-DENORM-LABEL: name: test_f32_add_mul_rhs ; GFX10-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX10-DENORM-NEXT: {{ $}} @@ -185,16 +165,6 @@ body: | ; GFX10-DENORM-NEXT: [[FADD:%[0-9]+]]:_(s32) = reassoc G_FADD [[COPY2]], [[FMUL]] ; GFX10-DENORM-NEXT: $vgpr0 = COPY [[FADD]](s32) ; GFX10-DENORM-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 - ; - ; GFX10-UNSAFE-LABEL: name: test_f32_add_mul_rhs - ; GFX10-UNSAFE: liveins: $vgpr0, $vgpr1, $vgpr2 - ; GFX10-UNSAFE-NEXT: {{ $}} - ; GFX10-UNSAFE-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-UNSAFE-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-UNSAFE-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-UNSAFE-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[COPY]], [[COPY1]], [[COPY2]] - ; GFX10-UNSAFE-NEXT: $vgpr0 = COPY [[FMA]](s32) - ; GFX10-UNSAFE-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 %0:_(s32) = COPY $vgpr0 %1:_(s32) = COPY $vgpr1 %2:_(s32) = COPY $vgpr2 @@ -205,6 +175,60 @@ body: | ... --- +name: test_f32_add_mul_rhs_contract +body: | + bb.1.entry: + liveins: $vgpr0, $vgpr1, $vgpr2 + + ; GFX9-LABEL: name: test_f32_add_mul_rhs_contract + ; GFX9: liveins: $vgpr0, $vgpr1, $vgpr2 + ; GFX9-NEXT: {{ $}} + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX9-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[COPY]], [[COPY1]], [[COPY2]] + ; GFX9-NEXT: $vgpr0 = COPY [[FMA]](s32) + ; GFX9-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 + ; + ; GFX9-DENORM-LABEL: name: test_f32_add_mul_rhs_contract + ; GFX9-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2 + ; GFX9-DENORM-NEXT: {{ $}} + ; GFX9-DENORM-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX9-DENORM-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9-DENORM-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX9-DENORM-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[COPY]], [[COPY1]], [[COPY2]] + ; GFX9-DENORM-NEXT: $vgpr0 = COPY [[FMA]](s32) + ; GFX9-DENORM-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 + ; + ; GFX10-LABEL: name: test_f32_add_mul_rhs_contract + ; GFX10: liveins: $vgpr0, $vgpr1, $vgpr2 + ; GFX10-NEXT: {{ $}} + ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX10-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[COPY]], [[COPY1]], [[COPY2]] + ; GFX10-NEXT: $vgpr0 = COPY [[FMA]](s32) + ; GFX10-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 + ; + ; GFX10-DENORM-LABEL: name: test_f32_add_mul_rhs_contract + ; GFX10-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2 + ; GFX10-DENORM-NEXT: {{ $}} + ; GFX10-DENORM-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX10-DENORM-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX10-DENORM-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX10-DENORM-NEXT: [[FMA:%[0-9]+]]:_(s32) = G_FMA [[COPY]], [[COPY1]], [[COPY2]] + ; GFX10-DENORM-NEXT: $vgpr0 = COPY [[FMA]](s32) + ; GFX10-DENORM-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 + %0:_(s32) = COPY $vgpr0 + %1:_(s32) = COPY $vgpr1 + %2:_(s32) = COPY $vgpr2 + %4:_(s32) = reassoc contract G_FMUL %0, %1 + %5:_(s32) = reassoc contract G_FADD %2, %4 + $vgpr0 = COPY %5(s32) + S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 +... + +--- name: test_half_add_mul body: | bb.1.entry: @@ -225,20 +249,6 @@ body: | ; GFX9-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) ; GFX9-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 ; - ; GFX9-CONTRACT-LABEL: name: test_half_add_mul - ; GFX9-CONTRACT: liveins: $vgpr0, $vgpr1, $vgpr2 - ; GFX9-CONTRACT-NEXT: {{ $}} - ; GFX9-CONTRACT-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-CONTRACT-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX9-CONTRACT-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-CONTRACT-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; GFX9-CONTRACT-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-CONTRACT-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) - ; GFX9-CONTRACT-NEXT: [[FMA:%[0-9]+]]:_(s16) = G_FMA [[TRUNC]], [[TRUNC1]], [[TRUNC2]] - ; GFX9-CONTRACT-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FMA]](s16) - ; GFX9-CONTRACT-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) - ; GFX9-CONTRACT-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 - ; ; GFX9-DENORM-LABEL: name: test_half_add_mul ; GFX9-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX9-DENORM-NEXT: {{ $}} @@ -254,20 +264,6 @@ body: | ; GFX9-DENORM-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) ; GFX9-DENORM-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 ; - ; GFX9-UNSAFE-LABEL: name: test_half_add_mul - ; GFX9-UNSAFE: liveins: $vgpr0, $vgpr1, $vgpr2 - ; GFX9-UNSAFE-NEXT: {{ $}} - ; GFX9-UNSAFE-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-UNSAFE-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX9-UNSAFE-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-UNSAFE-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; GFX9-UNSAFE-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-UNSAFE-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) - ; GFX9-UNSAFE-NEXT: [[FMA:%[0-9]+]]:_(s16) = G_FMA [[TRUNC]], [[TRUNC1]], [[TRUNC2]] - ; GFX9-UNSAFE-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FMA]](s16) - ; GFX9-UNSAFE-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) - ; GFX9-UNSAFE-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 - ; ; GFX10-LABEL: name: test_half_add_mul ; GFX10: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX10-NEXT: {{ $}} @@ -283,20 +279,6 @@ body: | ; GFX10-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) ; GFX10-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 ; - ; GFX10-CONTRACT-LABEL: name: test_half_add_mul - ; GFX10-CONTRACT: liveins: $vgpr0, $vgpr1, $vgpr2 - ; GFX10-CONTRACT-NEXT: {{ $}} - ; GFX10-CONTRACT-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-CONTRACT-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX10-CONTRACT-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-CONTRACT-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; GFX10-CONTRACT-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-CONTRACT-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) - ; GFX10-CONTRACT-NEXT: [[FMA:%[0-9]+]]:_(s16) = G_FMA [[TRUNC]], [[TRUNC1]], [[TRUNC2]] - ; GFX10-CONTRACT-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FMA]](s16) - ; GFX10-CONTRACT-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) - ; GFX10-CONTRACT-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 - ; ; GFX10-DENORM-LABEL: name: test_half_add_mul ; GFX10-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX10-DENORM-NEXT: {{ $}} @@ -311,20 +293,6 @@ body: | ; GFX10-DENORM-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FADD]](s16) ; GFX10-DENORM-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) ; GFX10-DENORM-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 - ; - ; GFX10-UNSAFE-LABEL: name: test_half_add_mul - ; GFX10-UNSAFE: liveins: $vgpr0, $vgpr1, $vgpr2 - ; GFX10-UNSAFE-NEXT: {{ $}} - ; GFX10-UNSAFE-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-UNSAFE-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX10-UNSAFE-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-UNSAFE-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; GFX10-UNSAFE-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-UNSAFE-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) - ; GFX10-UNSAFE-NEXT: [[FMA:%[0-9]+]]:_(s16) = G_FMA [[TRUNC]], [[TRUNC1]], [[TRUNC2]] - ; GFX10-UNSAFE-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FMA]](s16) - ; GFX10-UNSAFE-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) - ; GFX10-UNSAFE-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 %4:_(s32) = COPY $vgpr0 %0:_(s16) = G_TRUNC %4(s32) %5:_(s32) = COPY $vgpr1 @@ -339,6 +307,81 @@ body: | ... --- +name: test_half_add_mul_contract +body: | + bb.1.entry: + liveins: $vgpr0, $vgpr1, $vgpr2 + + ; GFX9-LABEL: name: test_half_add_mul_contract + ; GFX9: liveins: $vgpr0, $vgpr1, $vgpr2 + ; GFX9-NEXT: {{ $}} + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) + ; GFX9-NEXT: [[FMA:%[0-9]+]]:_(s16) = G_FMA [[TRUNC]], [[TRUNC1]], [[TRUNC2]] + ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FMA]](s16) + ; GFX9-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX9-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 + ; + ; GFX9-DENORM-LABEL: name: test_half_add_mul_contract + ; GFX9-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2 + ; GFX9-DENORM-NEXT: {{ $}} + ; GFX9-DENORM-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX9-DENORM-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; GFX9-DENORM-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9-DENORM-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; GFX9-DENORM-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX9-DENORM-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) + ; GFX9-DENORM-NEXT: [[FMA:%[0-9]+]]:_(s16) = G_FMA [[TRUNC]], [[TRUNC1]], [[TRUNC2]] + ; GFX9-DENORM-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FMA]](s16) + ; GFX9-DENORM-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX9-DENORM-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 + ; + ; GFX10-LABEL: name: test_half_add_mul_contract + ; GFX10: liveins: $vgpr0, $vgpr1, $vgpr2 + ; GFX10-NEXT: {{ $}} + ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) + ; GFX10-NEXT: [[FMA:%[0-9]+]]:_(s16) = G_FMA [[TRUNC]], [[TRUNC1]], [[TRUNC2]] + ; GFX10-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FMA]](s16) + ; GFX10-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX10-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 + ; + ; GFX10-DENORM-LABEL: name: test_half_add_mul_contract + ; GFX10-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2 + ; GFX10-DENORM-NEXT: {{ $}} + ; GFX10-DENORM-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX10-DENORM-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; GFX10-DENORM-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX10-DENORM-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; GFX10-DENORM-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX10-DENORM-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) + ; GFX10-DENORM-NEXT: [[FMA:%[0-9]+]]:_(s16) = G_FMA [[TRUNC]], [[TRUNC1]], [[TRUNC2]] + ; GFX10-DENORM-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FMA]](s16) + ; GFX10-DENORM-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX10-DENORM-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 + %4:_(s32) = COPY $vgpr0 + %0:_(s16) = G_TRUNC %4(s32) + %5:_(s32) = COPY $vgpr1 + %1:_(s16) = G_TRUNC %5(s32) + %6:_(s32) = COPY $vgpr2 + %2:_(s16) = G_TRUNC %6(s32) + %7:_(s16) = reassoc contract G_FMUL %0, %1 + %8:_(s16) = reassoc contract G_FADD %7, %2 + %10:_(s32) = G_ANYEXT %8(s16) + $vgpr0 = COPY %10(s32) + S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 +... + + +--- name: test_half_add_mul_rhs body: | bb.1.entry: @@ -359,20 +402,6 @@ body: | ; GFX9-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) ; GFX9-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 ; - ; GFX9-CONTRACT-LABEL: name: test_half_add_mul_rhs - ; GFX9-CONTRACT: liveins: $vgpr0, $vgpr1, $vgpr2 - ; GFX9-CONTRACT-NEXT: {{ $}} - ; GFX9-CONTRACT-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-CONTRACT-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX9-CONTRACT-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-CONTRACT-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; GFX9-CONTRACT-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-CONTRACT-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) - ; GFX9-CONTRACT-NEXT: [[FMA:%[0-9]+]]:_(s16) = G_FMA [[TRUNC]], [[TRUNC1]], [[TRUNC2]] - ; GFX9-CONTRACT-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FMA]](s16) - ; GFX9-CONTRACT-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) - ; GFX9-CONTRACT-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 - ; ; GFX9-DENORM-LABEL: name: test_half_add_mul_rhs ; GFX9-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX9-DENORM-NEXT: {{ $}} @@ -388,20 +417,6 @@ body: | ; GFX9-DENORM-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) ; GFX9-DENORM-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 ; - ; GFX9-UNSAFE-LABEL: name: test_half_add_mul_rhs - ; GFX9-UNSAFE: liveins: $vgpr0, $vgpr1, $vgpr2 - ; GFX9-UNSAFE-NEXT: {{ $}} - ; GFX9-UNSAFE-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-UNSAFE-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX9-UNSAFE-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-UNSAFE-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; GFX9-UNSAFE-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-UNSAFE-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) - ; GFX9-UNSAFE-NEXT: [[FMA:%[0-9]+]]:_(s16) = G_FMA [[TRUNC]], [[TRUNC1]], [[TRUNC2]] - ; GFX9-UNSAFE-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FMA]](s16) - ; GFX9-UNSAFE-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) - ; GFX9-UNSAFE-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 - ; ; GFX10-LABEL: name: test_half_add_mul_rhs ; GFX10: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX10-NEXT: {{ $}} @@ -417,20 +432,6 @@ body: | ; GFX10-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) ; GFX10-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 ; - ; GFX10-CONTRACT-LABEL: name: test_half_add_mul_rhs - ; GFX10-CONTRACT: liveins: $vgpr0, $vgpr1, $vgpr2 - ; GFX10-CONTRACT-NEXT: {{ $}} - ; GFX10-CONTRACT-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-CONTRACT-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX10-CONTRACT-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-CONTRACT-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; GFX10-CONTRACT-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-CONTRACT-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) - ; GFX10-CONTRACT-NEXT: [[FMA:%[0-9]+]]:_(s16) = G_FMA [[TRUNC]], [[TRUNC1]], [[TRUNC2]] - ; GFX10-CONTRACT-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FMA]](s16) - ; GFX10-CONTRACT-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) - ; GFX10-CONTRACT-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 - ; ; GFX10-DENORM-LABEL: name: test_half_add_mul_rhs ; GFX10-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2 ; GFX10-DENORM-NEXT: {{ $}} @@ -445,20 +446,84 @@ body: | ; GFX10-DENORM-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FADD]](s16) ; GFX10-DENORM-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) ; GFX10-DENORM-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 + %4:_(s32) = COPY $vgpr0 + %0:_(s16) = G_TRUNC %4(s32) + %5:_(s32) = COPY $vgpr1 + %1:_(s16) = G_TRUNC %5(s32) + %6:_(s32) = COPY $vgpr2 + %2:_(s16) = G_TRUNC %6(s32) + %7:_(s16) = reassoc G_FMUL %0, %1 + %8:_(s16) = reassoc G_FADD %2, %7 + %10:_(s32) = G_ANYEXT %8(s16) + $vgpr0 = COPY %10(s32) + S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 +... + +--- +name: test_half_add_mul_rhs_contract +body: | + bb.1.entry: + liveins: $vgpr0, $vgpr1, $vgpr2 + + ; GFX9-LABEL: name: test_half_add_mul_rhs_contract + ; GFX9: liveins: $vgpr0, $vgpr1, $vgpr2 + ; GFX9-NEXT: {{ $}} + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX9-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX9-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) + ; GFX9-NEXT: [[FMUL:%[0-9]+]]:_(s16) = reassoc G_FMUL [[TRUNC]], [[TRUNC1]] + ; GFX9-NEXT: [[FADD:%[0-9]+]]:_(s16) = reassoc G_FADD [[TRUNC2]], [[FMUL]] + ; GFX9-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FADD]](s16) + ; GFX9-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX9-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 + ; + ; GFX9-DENORM-LABEL: name: test_half_add_mul_rhs_contract + ; GFX9-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2 + ; GFX9-DENORM-NEXT: {{ $}} + ; GFX9-DENORM-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX9-DENORM-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; GFX9-DENORM-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9-DENORM-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; GFX9-DENORM-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX9-DENORM-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) + ; GFX9-DENORM-NEXT: [[FMUL:%[0-9]+]]:_(s16) = reassoc G_FMUL [[TRUNC]], [[TRUNC1]] + ; GFX9-DENORM-NEXT: [[FADD:%[0-9]+]]:_(s16) = reassoc G_FADD [[TRUNC2]], [[FMUL]] + ; GFX9-DENORM-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FADD]](s16) + ; GFX9-DENORM-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX9-DENORM-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 ; - ; GFX10-UNSAFE-LABEL: name: test_half_add_mul_rhs - ; GFX10-UNSAFE: liveins: $vgpr0, $vgpr1, $vgpr2 - ; GFX10-UNSAFE-NEXT: {{ $}} - ; GFX10-UNSAFE-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-UNSAFE-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) - ; GFX10-UNSAFE-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-UNSAFE-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) - ; GFX10-UNSAFE-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-UNSAFE-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) - ; GFX10-UNSAFE-NEXT: [[FMA:%[0-9]+]]:_(s16) = G_FMA [[TRUNC]], [[TRUNC1]], [[TRUNC2]] - ; GFX10-UNSAFE-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FMA]](s16) - ; GFX10-UNSAFE-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) - ; GFX10-UNSAFE-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 + ; GFX10-LABEL: name: test_half_add_mul_rhs_contract + ; GFX10: liveins: $vgpr0, $vgpr1, $vgpr2 + ; GFX10-NEXT: {{ $}} + ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX10-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX10-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX10-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) + ; GFX10-NEXT: [[FMUL:%[0-9]+]]:_(s16) = reassoc G_FMUL [[TRUNC]], [[TRUNC1]] + ; GFX10-NEXT: [[FADD:%[0-9]+]]:_(s16) = reassoc G_FADD [[TRUNC2]], [[FMUL]] + ; GFX10-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FADD]](s16) + ; GFX10-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX10-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 + ; + ; GFX10-DENORM-LABEL: name: test_half_add_mul_rhs_contract + ; GFX10-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2 + ; GFX10-DENORM-NEXT: {{ $}} + ; GFX10-DENORM-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX10-DENORM-NEXT: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32) + ; GFX10-DENORM-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX10-DENORM-NEXT: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32) + ; GFX10-DENORM-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX10-DENORM-NEXT: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[COPY2]](s32) + ; GFX10-DENORM-NEXT: [[FMUL:%[0-9]+]]:_(s16) = reassoc G_FMUL [[TRUNC]], [[TRUNC1]] + ; GFX10-DENORM-NEXT: [[FADD:%[0-9]+]]:_(s16) = reassoc G_FADD [[TRUNC2]], [[FMUL]] + ; GFX10-DENORM-NEXT: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FADD]](s16) + ; GFX10-DENORM-NEXT: $vgpr0 = COPY [[ANYEXT]](s32) + ; GFX10-DENORM-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0 %4:_(s32) = COPY $vgpr0 %0:_(s16) = G_TRUNC %4(s32) %5:_(s32) = COPY $vgpr1 @@ -497,24 +562,6 @@ body: | ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](s32) ; GFX9-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 ; - ; GFX9-CONTRACT-LABEL: name: test_double_add_mul - ; GFX9-CONTRACT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 - ; GFX9-CONTRACT-NEXT: {{ $}} - ; GFX9-CONTRACT-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-CONTRACT-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-CONTRACT-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; GFX9-CONTRACT-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-CONTRACT-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX9-CONTRACT-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) - ; GFX9-CONTRACT-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX9-CONTRACT-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX9-CONTRACT-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) - ; GFX9-CONTRACT-NEXT: [[FMA:%[0-9]+]]:_(s64) = G_FMA [[MV]], [[MV1]], [[MV2]] - ; GFX9-CONTRACT-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMA]](s64) - ; GFX9-CONTRACT-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX9-CONTRACT-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX9-CONTRACT-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 - ; ; GFX9-DENORM-LABEL: name: test_double_add_mul ; GFX9-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; GFX9-DENORM-NEXT: {{ $}} @@ -534,24 +581,6 @@ body: | ; GFX9-DENORM-NEXT: $vgpr1 = COPY [[UV1]](s32) ; GFX9-DENORM-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 ; - ; GFX9-UNSAFE-LABEL: name: test_double_add_mul - ; GFX9-UNSAFE: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 - ; GFX9-UNSAFE-NEXT: {{ $}} - ; GFX9-UNSAFE-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-UNSAFE-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-UNSAFE-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; GFX9-UNSAFE-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-UNSAFE-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX9-UNSAFE-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) - ; GFX9-UNSAFE-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX9-UNSAFE-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX9-UNSAFE-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) - ; GFX9-UNSAFE-NEXT: [[FMA:%[0-9]+]]:_(s64) = G_FMA [[MV]], [[MV1]], [[MV2]] - ; GFX9-UNSAFE-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMA]](s64) - ; GFX9-UNSAFE-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX9-UNSAFE-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX9-UNSAFE-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 - ; ; GFX10-LABEL: name: test_double_add_mul ; GFX10: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; GFX10-NEXT: {{ $}} @@ -571,24 +600,6 @@ body: | ; GFX10-NEXT: $vgpr1 = COPY [[UV1]](s32) ; GFX10-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 ; - ; GFX10-CONTRACT-LABEL: name: test_double_add_mul - ; GFX10-CONTRACT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 - ; GFX10-CONTRACT-NEXT: {{ $}} - ; GFX10-CONTRACT-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-CONTRACT-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-CONTRACT-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; GFX10-CONTRACT-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-CONTRACT-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX10-CONTRACT-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) - ; GFX10-CONTRACT-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX10-CONTRACT-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX10-CONTRACT-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) - ; GFX10-CONTRACT-NEXT: [[FMA:%[0-9]+]]:_(s64) = G_FMA [[MV]], [[MV1]], [[MV2]] - ; GFX10-CONTRACT-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMA]](s64) - ; GFX10-CONTRACT-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10-CONTRACT-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10-CONTRACT-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 - ; ; GFX10-DENORM-LABEL: name: test_double_add_mul ; GFX10-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; GFX10-DENORM-NEXT: {{ $}} @@ -607,24 +618,6 @@ body: | ; GFX10-DENORM-NEXT: $vgpr0 = COPY [[UV]](s32) ; GFX10-DENORM-NEXT: $vgpr1 = COPY [[UV1]](s32) ; GFX10-DENORM-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 - ; - ; GFX10-UNSAFE-LABEL: name: test_double_add_mul - ; GFX10-UNSAFE: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 - ; GFX10-UNSAFE-NEXT: {{ $}} - ; GFX10-UNSAFE-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-UNSAFE-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-UNSAFE-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; GFX10-UNSAFE-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-UNSAFE-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX10-UNSAFE-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) - ; GFX10-UNSAFE-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX10-UNSAFE-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX10-UNSAFE-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) - ; GFX10-UNSAFE-NEXT: [[FMA:%[0-9]+]]:_(s64) = G_FMA [[MV]], [[MV1]], [[MV2]] - ; GFX10-UNSAFE-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMA]](s64) - ; GFX10-UNSAFE-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10-UNSAFE-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10-UNSAFE-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 %4:_(s32) = COPY $vgpr0 %5:_(s32) = COPY $vgpr1 %0:_(s64) = G_MERGE_VALUES %4(s32), %5(s32) @@ -643,6 +636,100 @@ body: | ... --- +name: test_double_add_mul_contract +body: | + bb.1.entry: + liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 + + ; GFX9-LABEL: name: test_double_add_mul_contract + ; GFX9: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 + ; GFX9-NEXT: {{ $}} + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; GFX9-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) + ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 + ; GFX9-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) + ; GFX9-NEXT: [[FMA:%[0-9]+]]:_(s64) = G_FMA [[MV]], [[MV1]], [[MV2]] + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMA]](s64) + ; GFX9-NEXT: $vgpr0 = COPY [[UV]](s32) + ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; GFX9-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 + ; + ; GFX9-DENORM-LABEL: name: test_double_add_mul_contract + ; GFX9-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 + ; GFX9-DENORM-NEXT: {{ $}} + ; GFX9-DENORM-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX9-DENORM-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9-DENORM-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) + ; GFX9-DENORM-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX9-DENORM-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; GFX9-DENORM-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) + ; GFX9-DENORM-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; GFX9-DENORM-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 + ; GFX9-DENORM-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) + ; GFX9-DENORM-NEXT: [[FMA:%[0-9]+]]:_(s64) = G_FMA [[MV]], [[MV1]], [[MV2]] + ; GFX9-DENORM-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMA]](s64) + ; GFX9-DENORM-NEXT: $vgpr0 = COPY [[UV]](s32) + ; GFX9-DENORM-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; GFX9-DENORM-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 + ; + ; GFX10-LABEL: name: test_double_add_mul_contract + ; GFX10: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 + ; GFX10-NEXT: {{ $}} + ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX10-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) + ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; GFX10-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) + ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 + ; GFX10-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) + ; GFX10-NEXT: [[FMA:%[0-9]+]]:_(s64) = G_FMA [[MV]], [[MV1]], [[MV2]] + ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMA]](s64) + ; GFX10-NEXT: $vgpr0 = COPY [[UV]](s32) + ; GFX10-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; GFX10-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 + ; + ; GFX10-DENORM-LABEL: name: test_double_add_mul_contract + ; GFX10-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 + ; GFX10-DENORM-NEXT: {{ $}} + ; GFX10-DENORM-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX10-DENORM-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX10-DENORM-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) + ; GFX10-DENORM-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX10-DENORM-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; GFX10-DENORM-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) + ; GFX10-DENORM-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; GFX10-DENORM-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 + ; GFX10-DENORM-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) + ; GFX10-DENORM-NEXT: [[FMA:%[0-9]+]]:_(s64) = G_FMA [[MV]], [[MV1]], [[MV2]] + ; GFX10-DENORM-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMA]](s64) + ; GFX10-DENORM-NEXT: $vgpr0 = COPY [[UV]](s32) + ; GFX10-DENORM-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; GFX10-DENORM-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 + %4:_(s32) = COPY $vgpr0 + %5:_(s32) = COPY $vgpr1 + %0:_(s64) = G_MERGE_VALUES %4(s32), %5(s32) + %6:_(s32) = COPY $vgpr2 + %7:_(s32) = COPY $vgpr3 + %1:_(s64) = G_MERGE_VALUES %6(s32), %7(s32) + %8:_(s32) = COPY $vgpr4 + %9:_(s32) = COPY $vgpr5 + %2:_(s64) = G_MERGE_VALUES %8(s32), %9(s32) + %10:_(s64) = reassoc contract G_FMUL %0, %1 + %11:_(s64) = reassoc contract G_FADD %10, %2 + %13:_(s32), %14:_(s32) = G_UNMERGE_VALUES %11(s64) + $vgpr0 = COPY %13(s32) + $vgpr1 = COPY %14(s32) + S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 +... + +--- name: test_double_add_mul_rhs body: | bb.1.entry: @@ -667,24 +754,6 @@ body: | ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](s32) ; GFX9-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 ; - ; GFX9-CONTRACT-LABEL: name: test_double_add_mul_rhs - ; GFX9-CONTRACT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 - ; GFX9-CONTRACT-NEXT: {{ $}} - ; GFX9-CONTRACT-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-CONTRACT-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-CONTRACT-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; GFX9-CONTRACT-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-CONTRACT-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX9-CONTRACT-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) - ; GFX9-CONTRACT-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX9-CONTRACT-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX9-CONTRACT-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) - ; GFX9-CONTRACT-NEXT: [[FMA:%[0-9]+]]:_(s64) = G_FMA [[MV]], [[MV1]], [[MV2]] - ; GFX9-CONTRACT-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMA]](s64) - ; GFX9-CONTRACT-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX9-CONTRACT-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX9-CONTRACT-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 - ; ; GFX9-DENORM-LABEL: name: test_double_add_mul_rhs ; GFX9-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; GFX9-DENORM-NEXT: {{ $}} @@ -704,24 +773,6 @@ body: | ; GFX9-DENORM-NEXT: $vgpr1 = COPY [[UV1]](s32) ; GFX9-DENORM-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 ; - ; GFX9-UNSAFE-LABEL: name: test_double_add_mul_rhs - ; GFX9-UNSAFE: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 - ; GFX9-UNSAFE-NEXT: {{ $}} - ; GFX9-UNSAFE-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-UNSAFE-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-UNSAFE-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; GFX9-UNSAFE-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-UNSAFE-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX9-UNSAFE-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) - ; GFX9-UNSAFE-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX9-UNSAFE-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX9-UNSAFE-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) - ; GFX9-UNSAFE-NEXT: [[FMA:%[0-9]+]]:_(s64) = G_FMA [[MV]], [[MV1]], [[MV2]] - ; GFX9-UNSAFE-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMA]](s64) - ; GFX9-UNSAFE-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX9-UNSAFE-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX9-UNSAFE-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 - ; ; GFX10-LABEL: name: test_double_add_mul_rhs ; GFX10: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; GFX10-NEXT: {{ $}} @@ -741,24 +792,6 @@ body: | ; GFX10-NEXT: $vgpr1 = COPY [[UV1]](s32) ; GFX10-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 ; - ; GFX10-CONTRACT-LABEL: name: test_double_add_mul_rhs - ; GFX10-CONTRACT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 - ; GFX10-CONTRACT-NEXT: {{ $}} - ; GFX10-CONTRACT-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-CONTRACT-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-CONTRACT-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; GFX10-CONTRACT-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-CONTRACT-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX10-CONTRACT-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) - ; GFX10-CONTRACT-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX10-CONTRACT-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX10-CONTRACT-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) - ; GFX10-CONTRACT-NEXT: [[FMA:%[0-9]+]]:_(s64) = G_FMA [[MV]], [[MV1]], [[MV2]] - ; GFX10-CONTRACT-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMA]](s64) - ; GFX10-CONTRACT-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10-CONTRACT-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10-CONTRACT-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 - ; ; GFX10-DENORM-LABEL: name: test_double_add_mul_rhs ; GFX10-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; GFX10-DENORM-NEXT: {{ $}} @@ -777,24 +810,6 @@ body: | ; GFX10-DENORM-NEXT: $vgpr0 = COPY [[UV]](s32) ; GFX10-DENORM-NEXT: $vgpr1 = COPY [[UV1]](s32) ; GFX10-DENORM-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 - ; - ; GFX10-UNSAFE-LABEL: name: test_double_add_mul_rhs - ; GFX10-UNSAFE: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 - ; GFX10-UNSAFE-NEXT: {{ $}} - ; GFX10-UNSAFE-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-UNSAFE-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-UNSAFE-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; GFX10-UNSAFE-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-UNSAFE-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX10-UNSAFE-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) - ; GFX10-UNSAFE-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX10-UNSAFE-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX10-UNSAFE-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) - ; GFX10-UNSAFE-NEXT: [[FMA:%[0-9]+]]:_(s64) = G_FMA [[MV]], [[MV1]], [[MV2]] - ; GFX10-UNSAFE-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMA]](s64) - ; GFX10-UNSAFE-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10-UNSAFE-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10-UNSAFE-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 %4:_(s32) = COPY $vgpr0 %5:_(s32) = COPY $vgpr1 %0:_(s64) = G_MERGE_VALUES %4(s32), %5(s32) @@ -813,6 +828,100 @@ body: | ... --- +name: test_double_add_mul_rhs_contract +body: | + bb.1.entry: + liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 + + ; GFX9-LABEL: name: test_double_add_mul_rhs_contract + ; GFX9: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 + ; GFX9-NEXT: {{ $}} + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; GFX9-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) + ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 + ; GFX9-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) + ; GFX9-NEXT: [[FMA:%[0-9]+]]:_(s64) = G_FMA [[MV]], [[MV1]], [[MV2]] + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMA]](s64) + ; GFX9-NEXT: $vgpr0 = COPY [[UV]](s32) + ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; GFX9-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 + ; + ; GFX9-DENORM-LABEL: name: test_double_add_mul_rhs_contract + ; GFX9-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 + ; GFX9-DENORM-NEXT: {{ $}} + ; GFX9-DENORM-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX9-DENORM-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9-DENORM-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) + ; GFX9-DENORM-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX9-DENORM-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; GFX9-DENORM-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) + ; GFX9-DENORM-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; GFX9-DENORM-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 + ; GFX9-DENORM-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) + ; GFX9-DENORM-NEXT: [[FMA:%[0-9]+]]:_(s64) = G_FMA [[MV]], [[MV1]], [[MV2]] + ; GFX9-DENORM-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMA]](s64) + ; GFX9-DENORM-NEXT: $vgpr0 = COPY [[UV]](s32) + ; GFX9-DENORM-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; GFX9-DENORM-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 + ; + ; GFX10-LABEL: name: test_double_add_mul_rhs_contract + ; GFX10: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 + ; GFX10-NEXT: {{ $}} + ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX10-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) + ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; GFX10-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) + ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 + ; GFX10-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) + ; GFX10-NEXT: [[FMA:%[0-9]+]]:_(s64) = G_FMA [[MV]], [[MV1]], [[MV2]] + ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMA]](s64) + ; GFX10-NEXT: $vgpr0 = COPY [[UV]](s32) + ; GFX10-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; GFX10-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 + ; + ; GFX10-DENORM-LABEL: name: test_double_add_mul_rhs_contract + ; GFX10-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 + ; GFX10-DENORM-NEXT: {{ $}} + ; GFX10-DENORM-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX10-DENORM-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX10-DENORM-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) + ; GFX10-DENORM-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX10-DENORM-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; GFX10-DENORM-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) + ; GFX10-DENORM-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; GFX10-DENORM-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 + ; GFX10-DENORM-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) + ; GFX10-DENORM-NEXT: [[FMA:%[0-9]+]]:_(s64) = G_FMA [[MV]], [[MV1]], [[MV2]] + ; GFX10-DENORM-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMA]](s64) + ; GFX10-DENORM-NEXT: $vgpr0 = COPY [[UV]](s32) + ; GFX10-DENORM-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; GFX10-DENORM-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 + %4:_(s32) = COPY $vgpr0 + %5:_(s32) = COPY $vgpr1 + %0:_(s64) = G_MERGE_VALUES %4(s32), %5(s32) + %6:_(s32) = COPY $vgpr2 + %7:_(s32) = COPY $vgpr3 + %1:_(s64) = G_MERGE_VALUES %6(s32), %7(s32) + %8:_(s32) = COPY $vgpr4 + %9:_(s32) = COPY $vgpr5 + %2:_(s64) = G_MERGE_VALUES %8(s32), %9(s32) + %10:_(s64) = reassoc contract G_FMUL %0, %1 + %11:_(s64) = reassoc contract G_FADD %2, %10 + %13:_(s32), %14:_(s32) = G_UNMERGE_VALUES %11(s64) + $vgpr0 = COPY %13(s32) + $vgpr1 = COPY %14(s32) + S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 +... + +--- name: test_4xfloat_add_mul body: | bb.1.entry: @@ -845,32 +954,6 @@ body: | ; GFX9-NEXT: $vgpr3 = COPY [[UV3]](s32) ; GFX9-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; - ; GFX9-CONTRACT-LABEL: name: test_4xfloat_add_mul - ; GFX9-CONTRACT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11 - ; GFX9-CONTRACT-NEXT: {{ $}} - ; GFX9-CONTRACT-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-CONTRACT-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-CONTRACT-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-CONTRACT-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX9-CONTRACT-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; GFX9-CONTRACT-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX9-CONTRACT-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX9-CONTRACT-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; GFX9-CONTRACT-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; GFX9-CONTRACT-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-CONTRACT-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; GFX9-CONTRACT-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 - ; GFX9-CONTRACT-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 - ; GFX9-CONTRACT-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 - ; GFX9-CONTRACT-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX9-CONTRACT-NEXT: [[FMA:%[0-9]+]]:_(<4 x s32>) = G_FMA [[BUILD_VECTOR]], [[BUILD_VECTOR1]], [[BUILD_VECTOR2]] - ; GFX9-CONTRACT-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMA]](<4 x s32>) - ; GFX9-CONTRACT-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX9-CONTRACT-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX9-CONTRACT-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX9-CONTRACT-NEXT: $vgpr3 = COPY [[UV3]](s32) - ; GFX9-CONTRACT-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 - ; ; GFX9-DENORM-LABEL: name: test_4xfloat_add_mul ; GFX9-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11 ; GFX9-DENORM-NEXT: {{ $}} @@ -898,32 +981,6 @@ body: | ; GFX9-DENORM-NEXT: $vgpr3 = COPY [[UV3]](s32) ; GFX9-DENORM-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; - ; GFX9-UNSAFE-LABEL: name: test_4xfloat_add_mul - ; GFX9-UNSAFE: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11 - ; GFX9-UNSAFE-NEXT: {{ $}} - ; GFX9-UNSAFE-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-UNSAFE-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-UNSAFE-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-UNSAFE-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX9-UNSAFE-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; GFX9-UNSAFE-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX9-UNSAFE-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX9-UNSAFE-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; GFX9-UNSAFE-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; GFX9-UNSAFE-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-UNSAFE-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; GFX9-UNSAFE-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 - ; GFX9-UNSAFE-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 - ; GFX9-UNSAFE-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 - ; GFX9-UNSAFE-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX9-UNSAFE-NEXT: [[FMA:%[0-9]+]]:_(<4 x s32>) = G_FMA [[BUILD_VECTOR]], [[BUILD_VECTOR1]], [[BUILD_VECTOR2]] - ; GFX9-UNSAFE-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMA]](<4 x s32>) - ; GFX9-UNSAFE-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX9-UNSAFE-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX9-UNSAFE-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX9-UNSAFE-NEXT: $vgpr3 = COPY [[UV3]](s32) - ; GFX9-UNSAFE-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 - ; ; GFX10-LABEL: name: test_4xfloat_add_mul ; GFX10: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11 ; GFX10-NEXT: {{ $}} @@ -951,32 +1008,6 @@ body: | ; GFX10-NEXT: $vgpr3 = COPY [[UV3]](s32) ; GFX10-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 ; - ; GFX10-CONTRACT-LABEL: name: test_4xfloat_add_mul - ; GFX10-CONTRACT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11 - ; GFX10-CONTRACT-NEXT: {{ $}} - ; GFX10-CONTRACT-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-CONTRACT-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-CONTRACT-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-CONTRACT-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX10-CONTRACT-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; GFX10-CONTRACT-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX10-CONTRACT-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX10-CONTRACT-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; GFX10-CONTRACT-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; GFX10-CONTRACT-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10-CONTRACT-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; GFX10-CONTRACT-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 - ; GFX10-CONTRACT-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 - ; GFX10-CONTRACT-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 - ; GFX10-CONTRACT-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX10-CONTRACT-NEXT: [[FMA:%[0-9]+]]:_(<4 x s32>) = G_FMA [[BUILD_VECTOR]], [[BUILD_VECTOR1]], [[BUILD_VECTOR2]] - ; GFX10-CONTRACT-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMA]](<4 x s32>) - ; GFX10-CONTRACT-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10-CONTRACT-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10-CONTRACT-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX10-CONTRACT-NEXT: $vgpr3 = COPY [[UV3]](s32) - ; GFX10-CONTRACT-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 - ; ; GFX10-DENORM-LABEL: name: test_4xfloat_add_mul ; GFX10-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11 ; GFX10-DENORM-NEXT: {{ $}} @@ -1003,32 +1034,6 @@ body: | ; GFX10-DENORM-NEXT: $vgpr2 = COPY [[UV2]](s32) ; GFX10-DENORM-NEXT: $vgpr3 = COPY [[UV3]](s32) ; GFX10-DENORM-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 - ; - ; GFX10-UNSAFE-LABEL: name: test_4xfloat_add_mul - ; GFX10-UNSAFE: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11 - ; GFX10-UNSAFE-NEXT: {{ $}} - ; GFX10-UNSAFE-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-UNSAFE-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-UNSAFE-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-UNSAFE-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX10-UNSAFE-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) - ; GFX10-UNSAFE-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX10-UNSAFE-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX10-UNSAFE-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; GFX10-UNSAFE-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; GFX10-UNSAFE-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) - ; GFX10-UNSAFE-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; GFX10-UNSAFE-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 - ; GFX10-UNSAFE-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 - ; GFX10-UNSAFE-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 - ; GFX10-UNSAFE-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) - ; GFX10-UNSAFE-NEXT: [[FMA:%[0-9]+]]:_(<4 x s32>) = G_FMA [[BUILD_VECTOR]], [[BUILD_VECTOR1]], [[BUILD_VECTOR2]] - ; GFX10-UNSAFE-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMA]](<4 x s32>) - ; GFX10-UNSAFE-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10-UNSAFE-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10-UNSAFE-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX10-UNSAFE-NEXT: $vgpr3 = COPY [[UV3]](s32) - ; GFX10-UNSAFE-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 %4:_(s32) = COPY $vgpr0 %5:_(s32) = COPY $vgpr1 %6:_(s32) = COPY $vgpr2 @@ -1055,6 +1060,140 @@ body: | ... --- +name: test_4xfloat_add_mul_contract +body: | + bb.1.entry: + liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11 + + ; GFX9-LABEL: name: test_4xfloat_add_mul_contract + ; GFX9: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11 + ; GFX9-NEXT: {{ $}} + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 + ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 + ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) + ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 + ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 + ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 + ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 + ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) + ; GFX9-NEXT: [[FMA:%[0-9]+]]:_(<4 x s32>) = G_FMA [[BUILD_VECTOR]], [[BUILD_VECTOR1]], [[BUILD_VECTOR2]] + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMA]](<4 x s32>) + ; GFX9-NEXT: $vgpr0 = COPY [[UV]](s32) + ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](s32) + ; GFX9-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX9-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 + ; + ; GFX9-DENORM-LABEL: name: test_4xfloat_add_mul_contract + ; GFX9-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11 + ; GFX9-DENORM-NEXT: {{ $}} + ; GFX9-DENORM-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX9-DENORM-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9-DENORM-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX9-DENORM-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; GFX9-DENORM-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; GFX9-DENORM-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; GFX9-DENORM-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 + ; GFX9-DENORM-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 + ; GFX9-DENORM-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 + ; GFX9-DENORM-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) + ; GFX9-DENORM-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 + ; GFX9-DENORM-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 + ; GFX9-DENORM-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 + ; GFX9-DENORM-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 + ; GFX9-DENORM-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) + ; GFX9-DENORM-NEXT: [[FMA:%[0-9]+]]:_(<4 x s32>) = G_FMA [[BUILD_VECTOR]], [[BUILD_VECTOR1]], [[BUILD_VECTOR2]] + ; GFX9-DENORM-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMA]](<4 x s32>) + ; GFX9-DENORM-NEXT: $vgpr0 = COPY [[UV]](s32) + ; GFX9-DENORM-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; GFX9-DENORM-NEXT: $vgpr2 = COPY [[UV2]](s32) + ; GFX9-DENORM-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX9-DENORM-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 + ; + ; GFX10-LABEL: name: test_4xfloat_add_mul_contract + ; GFX10: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11 + ; GFX10-NEXT: {{ $}} + ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 + ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 + ; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 + ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) + ; GFX10-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 + ; GFX10-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 + ; GFX10-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 + ; GFX10-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 + ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) + ; GFX10-NEXT: [[FMA:%[0-9]+]]:_(<4 x s32>) = G_FMA [[BUILD_VECTOR]], [[BUILD_VECTOR1]], [[BUILD_VECTOR2]] + ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMA]](<4 x s32>) + ; GFX10-NEXT: $vgpr0 = COPY [[UV]](s32) + ; GFX10-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; GFX10-NEXT: $vgpr2 = COPY [[UV2]](s32) + ; GFX10-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX10-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 + ; + ; GFX10-DENORM-LABEL: name: test_4xfloat_add_mul_contract + ; GFX10-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11 + ; GFX10-DENORM-NEXT: {{ $}} + ; GFX10-DENORM-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX10-DENORM-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX10-DENORM-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX10-DENORM-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; GFX10-DENORM-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32) + ; GFX10-DENORM-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; GFX10-DENORM-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 + ; GFX10-DENORM-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 + ; GFX10-DENORM-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 + ; GFX10-DENORM-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32) + ; GFX10-DENORM-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 + ; GFX10-DENORM-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 + ; GFX10-DENORM-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 + ; GFX10-DENORM-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 + ; GFX10-DENORM-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s32>) = G_BUILD_VECTOR [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32) + ; GFX10-DENORM-NEXT: [[FMA:%[0-9]+]]:_(<4 x s32>) = G_FMA [[BUILD_VECTOR]], [[BUILD_VECTOR1]], [[BUILD_VECTOR2]] + ; GFX10-DENORM-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMA]](<4 x s32>) + ; GFX10-DENORM-NEXT: $vgpr0 = COPY [[UV]](s32) + ; GFX10-DENORM-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; GFX10-DENORM-NEXT: $vgpr2 = COPY [[UV2]](s32) + ; GFX10-DENORM-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX10-DENORM-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 + %4:_(s32) = COPY $vgpr0 + %5:_(s32) = COPY $vgpr1 + %6:_(s32) = COPY $vgpr2 + %7:_(s32) = COPY $vgpr3 + %0:_(<4 x s32>) = G_BUILD_VECTOR %4(s32), %5(s32), %6(s32), %7(s32) + %8:_(s32) = COPY $vgpr4 + %9:_(s32) = COPY $vgpr5 + %10:_(s32) = COPY $vgpr6 + %11:_(s32) = COPY $vgpr7 + %1:_(<4 x s32>) = G_BUILD_VECTOR %8(s32), %9(s32), %10(s32), %11(s32) + %12:_(s32) = COPY $vgpr8 + %13:_(s32) = COPY $vgpr9 + %14:_(s32) = COPY $vgpr10 + %15:_(s32) = COPY $vgpr11 + %2:_(<4 x s32>) = G_BUILD_VECTOR %12(s32), %13(s32), %14(s32), %15(s32) + %16:_(<4 x s32>) = reassoc contract G_FMUL %0, %1 + %17:_(<4 x s32>) = reassoc contract G_FADD %16, %2 + %19:_(s32), %20:_(s32), %21:_(s32), %22:_(s32) = G_UNMERGE_VALUES %17(<4 x s32>) + $vgpr0 = COPY %19(s32) + $vgpr1 = COPY %20(s32) + $vgpr2 = COPY %21(s32) + $vgpr3 = COPY %22(s32) + S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3 +... + +--- name: test_3xfloat_add_mul_rhs body: | bb.1.entry: @@ -1083,28 +1222,6 @@ body: | ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](s32) ; GFX9-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 ; - ; GFX9-CONTRACT-LABEL: name: test_3xfloat_add_mul_rhs - ; GFX9-CONTRACT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8 - ; GFX9-CONTRACT-NEXT: {{ $}} - ; GFX9-CONTRACT-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-CONTRACT-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-CONTRACT-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-CONTRACT-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32) - ; GFX9-CONTRACT-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX9-CONTRACT-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX9-CONTRACT-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX9-CONTRACT-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32) - ; GFX9-CONTRACT-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; GFX9-CONTRACT-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; GFX9-CONTRACT-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; GFX9-CONTRACT-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32) - ; GFX9-CONTRACT-NEXT: [[FMA:%[0-9]+]]:_(<3 x s32>) = G_FMA [[BUILD_VECTOR]], [[BUILD_VECTOR1]], [[BUILD_VECTOR2]] - ; GFX9-CONTRACT-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMA]](<3 x s32>) - ; GFX9-CONTRACT-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX9-CONTRACT-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX9-CONTRACT-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX9-CONTRACT-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 - ; ; GFX9-DENORM-LABEL: name: test_3xfloat_add_mul_rhs ; GFX9-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8 ; GFX9-DENORM-NEXT: {{ $}} @@ -1128,28 +1245,6 @@ body: | ; GFX9-DENORM-NEXT: $vgpr2 = COPY [[UV2]](s32) ; GFX9-DENORM-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 ; - ; GFX9-UNSAFE-LABEL: name: test_3xfloat_add_mul_rhs - ; GFX9-UNSAFE: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8 - ; GFX9-UNSAFE-NEXT: {{ $}} - ; GFX9-UNSAFE-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-UNSAFE-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-UNSAFE-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-UNSAFE-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32) - ; GFX9-UNSAFE-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX9-UNSAFE-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX9-UNSAFE-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX9-UNSAFE-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32) - ; GFX9-UNSAFE-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; GFX9-UNSAFE-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; GFX9-UNSAFE-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; GFX9-UNSAFE-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32) - ; GFX9-UNSAFE-NEXT: [[FMA:%[0-9]+]]:_(<3 x s32>) = G_FMA [[BUILD_VECTOR]], [[BUILD_VECTOR1]], [[BUILD_VECTOR2]] - ; GFX9-UNSAFE-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMA]](<3 x s32>) - ; GFX9-UNSAFE-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX9-UNSAFE-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX9-UNSAFE-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX9-UNSAFE-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 - ; ; GFX10-LABEL: name: test_3xfloat_add_mul_rhs ; GFX10: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8 ; GFX10-NEXT: {{ $}} @@ -1173,28 +1268,6 @@ body: | ; GFX10-NEXT: $vgpr2 = COPY [[UV2]](s32) ; GFX10-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 ; - ; GFX10-CONTRACT-LABEL: name: test_3xfloat_add_mul_rhs - ; GFX10-CONTRACT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8 - ; GFX10-CONTRACT-NEXT: {{ $}} - ; GFX10-CONTRACT-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-CONTRACT-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-CONTRACT-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-CONTRACT-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32) - ; GFX10-CONTRACT-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX10-CONTRACT-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX10-CONTRACT-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX10-CONTRACT-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32) - ; GFX10-CONTRACT-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; GFX10-CONTRACT-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; GFX10-CONTRACT-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; GFX10-CONTRACT-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32) - ; GFX10-CONTRACT-NEXT: [[FMA:%[0-9]+]]:_(<3 x s32>) = G_FMA [[BUILD_VECTOR]], [[BUILD_VECTOR1]], [[BUILD_VECTOR2]] - ; GFX10-CONTRACT-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMA]](<3 x s32>) - ; GFX10-CONTRACT-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10-CONTRACT-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10-CONTRACT-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX10-CONTRACT-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 - ; ; GFX10-DENORM-LABEL: name: test_3xfloat_add_mul_rhs ; GFX10-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8 ; GFX10-DENORM-NEXT: {{ $}} @@ -1217,28 +1290,124 @@ body: | ; GFX10-DENORM-NEXT: $vgpr1 = COPY [[UV1]](s32) ; GFX10-DENORM-NEXT: $vgpr2 = COPY [[UV2]](s32) ; GFX10-DENORM-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 + %4:_(s32) = COPY $vgpr0 + %5:_(s32) = COPY $vgpr1 + %6:_(s32) = COPY $vgpr2 + %0:_(<3 x s32>) = G_BUILD_VECTOR %4(s32), %5(s32), %6(s32) + %7:_(s32) = COPY $vgpr3 + %8:_(s32) = COPY $vgpr4 + %9:_(s32) = COPY $vgpr5 + %1:_(<3 x s32>) = G_BUILD_VECTOR %7(s32), %8(s32), %9(s32) + %10:_(s32) = COPY $vgpr6 + %11:_(s32) = COPY $vgpr7 + %12:_(s32) = COPY $vgpr8 + %2:_(<3 x s32>) = G_BUILD_VECTOR %10(s32), %11(s32), %12(s32) + %13:_(<3 x s32>) = reassoc G_FMUL %0, %1 + %14:_(<3 x s32>) = reassoc G_FADD %2, %13 + %16:_(s32), %17:_(s32), %18:_(s32) = G_UNMERGE_VALUES %14(<3 x s32>) + $vgpr0 = COPY %16(s32) + $vgpr1 = COPY %17(s32) + $vgpr2 = COPY %18(s32) + S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 +... + +--- +name: test_3xfloat_add_mul_rhs_contract +body: | + bb.1.entry: + liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8 + + ; GFX9-LABEL: name: test_3xfloat_add_mul_rhs_contract + ; GFX9: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8 + ; GFX9-NEXT: {{ $}} + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32) + ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32) + ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 + ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 + ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 + ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32) + ; GFX9-NEXT: [[FMUL:%[0-9]+]]:_(<3 x s32>) = reassoc G_FMUL [[BUILD_VECTOR]], [[BUILD_VECTOR1]] + ; GFX9-NEXT: [[FADD:%[0-9]+]]:_(<3 x s32>) = reassoc G_FADD [[BUILD_VECTOR2]], [[FMUL]] + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](<3 x s32>) + ; GFX9-NEXT: $vgpr0 = COPY [[UV]](s32) + ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](s32) + ; GFX9-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 + ; + ; GFX9-DENORM-LABEL: name: test_3xfloat_add_mul_rhs_contract + ; GFX9-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8 + ; GFX9-DENORM-NEXT: {{ $}} + ; GFX9-DENORM-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX9-DENORM-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9-DENORM-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX9-DENORM-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32) + ; GFX9-DENORM-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; GFX9-DENORM-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; GFX9-DENORM-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 + ; GFX9-DENORM-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32) + ; GFX9-DENORM-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 + ; GFX9-DENORM-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 + ; GFX9-DENORM-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 + ; GFX9-DENORM-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32) + ; GFX9-DENORM-NEXT: [[FMUL:%[0-9]+]]:_(<3 x s32>) = reassoc G_FMUL [[BUILD_VECTOR]], [[BUILD_VECTOR1]] + ; GFX9-DENORM-NEXT: [[FADD:%[0-9]+]]:_(<3 x s32>) = reassoc G_FADD [[BUILD_VECTOR2]], [[FMUL]] + ; GFX9-DENORM-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](<3 x s32>) + ; GFX9-DENORM-NEXT: $vgpr0 = COPY [[UV]](s32) + ; GFX9-DENORM-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; GFX9-DENORM-NEXT: $vgpr2 = COPY [[UV2]](s32) + ; GFX9-DENORM-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 + ; + ; GFX10-LABEL: name: test_3xfloat_add_mul_rhs_contract + ; GFX10: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8 + ; GFX10-NEXT: {{ $}} + ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32) + ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 + ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32) + ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 + ; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 + ; GFX10-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 + ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32) + ; GFX10-NEXT: [[FMUL:%[0-9]+]]:_(<3 x s32>) = reassoc G_FMUL [[BUILD_VECTOR]], [[BUILD_VECTOR1]] + ; GFX10-NEXT: [[FADD:%[0-9]+]]:_(<3 x s32>) = reassoc G_FADD [[BUILD_VECTOR2]], [[FMUL]] + ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](<3 x s32>) + ; GFX10-NEXT: $vgpr0 = COPY [[UV]](s32) + ; GFX10-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; GFX10-NEXT: $vgpr2 = COPY [[UV2]](s32) + ; GFX10-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 ; - ; GFX10-UNSAFE-LABEL: name: test_3xfloat_add_mul_rhs - ; GFX10-UNSAFE: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8 - ; GFX10-UNSAFE-NEXT: {{ $}} - ; GFX10-UNSAFE-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-UNSAFE-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-UNSAFE-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-UNSAFE-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32) - ; GFX10-UNSAFE-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX10-UNSAFE-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX10-UNSAFE-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX10-UNSAFE-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32) - ; GFX10-UNSAFE-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; GFX10-UNSAFE-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; GFX10-UNSAFE-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; GFX10-UNSAFE-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32) - ; GFX10-UNSAFE-NEXT: [[FMA:%[0-9]+]]:_(<3 x s32>) = G_FMA [[BUILD_VECTOR]], [[BUILD_VECTOR1]], [[BUILD_VECTOR2]] - ; GFX10-UNSAFE-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMA]](<3 x s32>) - ; GFX10-UNSAFE-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10-UNSAFE-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10-UNSAFE-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX10-UNSAFE-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 + ; GFX10-DENORM-LABEL: name: test_3xfloat_add_mul_rhs_contract + ; GFX10-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8 + ; GFX10-DENORM-NEXT: {{ $}} + ; GFX10-DENORM-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX10-DENORM-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX10-DENORM-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX10-DENORM-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32) + ; GFX10-DENORM-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; GFX10-DENORM-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; GFX10-DENORM-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 + ; GFX10-DENORM-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32) + ; GFX10-DENORM-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 + ; GFX10-DENORM-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 + ; GFX10-DENORM-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 + ; GFX10-DENORM-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s32>) = G_BUILD_VECTOR [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32) + ; GFX10-DENORM-NEXT: [[FMUL:%[0-9]+]]:_(<3 x s32>) = reassoc G_FMUL [[BUILD_VECTOR]], [[BUILD_VECTOR1]] + ; GFX10-DENORM-NEXT: [[FADD:%[0-9]+]]:_(<3 x s32>) = reassoc G_FADD [[BUILD_VECTOR2]], [[FMUL]] + ; GFX10-DENORM-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FADD]](<3 x s32>) + ; GFX10-DENORM-NEXT: $vgpr0 = COPY [[UV]](s32) + ; GFX10-DENORM-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; GFX10-DENORM-NEXT: $vgpr2 = COPY [[UV2]](s32) + ; GFX10-DENORM-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2 %4:_(s32) = COPY $vgpr0 %5:_(s32) = COPY $vgpr1 %6:_(s32) = COPY $vgpr2 @@ -1285,24 +1454,6 @@ body: | ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](<2 x s16>) ; GFX9-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 ; - ; GFX9-CONTRACT-LABEL: name: test_4xhalf_add_mul - ; GFX9-CONTRACT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 - ; GFX9-CONTRACT-NEXT: {{ $}} - ; GFX9-CONTRACT-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX9-CONTRACT-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX9-CONTRACT-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>) - ; GFX9-CONTRACT-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 - ; GFX9-CONTRACT-NEXT: [[COPY3:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3 - ; GFX9-CONTRACT-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY2]](<2 x s16>), [[COPY3]](<2 x s16>) - ; GFX9-CONTRACT-NEXT: [[COPY4:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 - ; GFX9-CONTRACT-NEXT: [[COPY5:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 - ; GFX9-CONTRACT-NEXT: [[CONCAT_VECTORS2:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY4]](<2 x s16>), [[COPY5]](<2 x s16>) - ; GFX9-CONTRACT-NEXT: [[FMA:%[0-9]+]]:_(<4 x s16>) = G_FMA [[CONCAT_VECTORS]], [[CONCAT_VECTORS1]], [[CONCAT_VECTORS2]] - ; GFX9-CONTRACT-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[FMA]](<4 x s16>) - ; GFX9-CONTRACT-NEXT: $vgpr0 = COPY [[UV]](<2 x s16>) - ; GFX9-CONTRACT-NEXT: $vgpr1 = COPY [[UV1]](<2 x s16>) - ; GFX9-CONTRACT-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 - ; ; GFX9-DENORM-LABEL: name: test_4xhalf_add_mul ; GFX9-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; GFX9-DENORM-NEXT: {{ $}} @@ -1322,24 +1473,6 @@ body: | ; GFX9-DENORM-NEXT: $vgpr1 = COPY [[UV1]](<2 x s16>) ; GFX9-DENORM-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 ; - ; GFX9-UNSAFE-LABEL: name: test_4xhalf_add_mul - ; GFX9-UNSAFE: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 - ; GFX9-UNSAFE-NEXT: {{ $}} - ; GFX9-UNSAFE-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX9-UNSAFE-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX9-UNSAFE-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>) - ; GFX9-UNSAFE-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 - ; GFX9-UNSAFE-NEXT: [[COPY3:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3 - ; GFX9-UNSAFE-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY2]](<2 x s16>), [[COPY3]](<2 x s16>) - ; GFX9-UNSAFE-NEXT: [[COPY4:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 - ; GFX9-UNSAFE-NEXT: [[COPY5:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 - ; GFX9-UNSAFE-NEXT: [[CONCAT_VECTORS2:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY4]](<2 x s16>), [[COPY5]](<2 x s16>) - ; GFX9-UNSAFE-NEXT: [[FMA:%[0-9]+]]:_(<4 x s16>) = G_FMA [[CONCAT_VECTORS]], [[CONCAT_VECTORS1]], [[CONCAT_VECTORS2]] - ; GFX9-UNSAFE-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[FMA]](<4 x s16>) - ; GFX9-UNSAFE-NEXT: $vgpr0 = COPY [[UV]](<2 x s16>) - ; GFX9-UNSAFE-NEXT: $vgpr1 = COPY [[UV1]](<2 x s16>) - ; GFX9-UNSAFE-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 - ; ; GFX10-LABEL: name: test_4xhalf_add_mul ; GFX10: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; GFX10-NEXT: {{ $}} @@ -1359,24 +1492,6 @@ body: | ; GFX10-NEXT: $vgpr1 = COPY [[UV1]](<2 x s16>) ; GFX10-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 ; - ; GFX10-CONTRACT-LABEL: name: test_4xhalf_add_mul - ; GFX10-CONTRACT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 - ; GFX10-CONTRACT-NEXT: {{ $}} - ; GFX10-CONTRACT-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX10-CONTRACT-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX10-CONTRACT-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>) - ; GFX10-CONTRACT-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 - ; GFX10-CONTRACT-NEXT: [[COPY3:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3 - ; GFX10-CONTRACT-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY2]](<2 x s16>), [[COPY3]](<2 x s16>) - ; GFX10-CONTRACT-NEXT: [[COPY4:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 - ; GFX10-CONTRACT-NEXT: [[COPY5:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 - ; GFX10-CONTRACT-NEXT: [[CONCAT_VECTORS2:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY4]](<2 x s16>), [[COPY5]](<2 x s16>) - ; GFX10-CONTRACT-NEXT: [[FMA:%[0-9]+]]:_(<4 x s16>) = G_FMA [[CONCAT_VECTORS]], [[CONCAT_VECTORS1]], [[CONCAT_VECTORS2]] - ; GFX10-CONTRACT-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[FMA]](<4 x s16>) - ; GFX10-CONTRACT-NEXT: $vgpr0 = COPY [[UV]](<2 x s16>) - ; GFX10-CONTRACT-NEXT: $vgpr1 = COPY [[UV1]](<2 x s16>) - ; GFX10-CONTRACT-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 - ; ; GFX10-DENORM-LABEL: name: test_4xhalf_add_mul ; GFX10-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; GFX10-DENORM-NEXT: {{ $}} @@ -1395,24 +1510,6 @@ body: | ; GFX10-DENORM-NEXT: $vgpr0 = COPY [[UV]](<2 x s16>) ; GFX10-DENORM-NEXT: $vgpr1 = COPY [[UV1]](<2 x s16>) ; GFX10-DENORM-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 - ; - ; GFX10-UNSAFE-LABEL: name: test_4xhalf_add_mul - ; GFX10-UNSAFE: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 - ; GFX10-UNSAFE-NEXT: {{ $}} - ; GFX10-UNSAFE-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX10-UNSAFE-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX10-UNSAFE-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>) - ; GFX10-UNSAFE-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 - ; GFX10-UNSAFE-NEXT: [[COPY3:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3 - ; GFX10-UNSAFE-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY2]](<2 x s16>), [[COPY3]](<2 x s16>) - ; GFX10-UNSAFE-NEXT: [[COPY4:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 - ; GFX10-UNSAFE-NEXT: [[COPY5:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 - ; GFX10-UNSAFE-NEXT: [[CONCAT_VECTORS2:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY4]](<2 x s16>), [[COPY5]](<2 x s16>) - ; GFX10-UNSAFE-NEXT: [[FMA:%[0-9]+]]:_(<4 x s16>) = G_FMA [[CONCAT_VECTORS]], [[CONCAT_VECTORS1]], [[CONCAT_VECTORS2]] - ; GFX10-UNSAFE-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[FMA]](<4 x s16>) - ; GFX10-UNSAFE-NEXT: $vgpr0 = COPY [[UV]](<2 x s16>) - ; GFX10-UNSAFE-NEXT: $vgpr1 = COPY [[UV1]](<2 x s16>) - ; GFX10-UNSAFE-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 %4:_(<2 x s16>) = COPY $vgpr0 %5:_(<2 x s16>) = COPY $vgpr1 %0:_(<4 x s16>) = G_CONCAT_VECTORS %4(<2 x s16>), %5(<2 x s16>) @@ -1431,6 +1528,100 @@ body: | ... --- +name: test_4xhalf_add_mul_contract +body: | + bb.1.entry: + liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 + + ; GFX9-LABEL: name: test_4xhalf_add_mul_contract + ; GFX9: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 + ; GFX9-NEXT: {{ $}} + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 + ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>) + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 + ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3 + ; GFX9-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY2]](<2 x s16>), [[COPY3]](<2 x s16>) + ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 + ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 + ; GFX9-NEXT: [[CONCAT_VECTORS2:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY4]](<2 x s16>), [[COPY5]](<2 x s16>) + ; GFX9-NEXT: [[FMA:%[0-9]+]]:_(<4 x s16>) = G_FMA [[CONCAT_VECTORS]], [[CONCAT_VECTORS1]], [[CONCAT_VECTORS2]] + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[FMA]](<4 x s16>) + ; GFX9-NEXT: $vgpr0 = COPY [[UV]](<2 x s16>) + ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](<2 x s16>) + ; GFX9-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 + ; + ; GFX9-DENORM-LABEL: name: test_4xhalf_add_mul_contract + ; GFX9-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 + ; GFX9-DENORM-NEXT: {{ $}} + ; GFX9-DENORM-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 + ; GFX9-DENORM-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 + ; GFX9-DENORM-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>) + ; GFX9-DENORM-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 + ; GFX9-DENORM-NEXT: [[COPY3:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3 + ; GFX9-DENORM-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY2]](<2 x s16>), [[COPY3]](<2 x s16>) + ; GFX9-DENORM-NEXT: [[COPY4:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 + ; GFX9-DENORM-NEXT: [[COPY5:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 + ; GFX9-DENORM-NEXT: [[CONCAT_VECTORS2:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY4]](<2 x s16>), [[COPY5]](<2 x s16>) + ; GFX9-DENORM-NEXT: [[FMA:%[0-9]+]]:_(<4 x s16>) = G_FMA [[CONCAT_VECTORS]], [[CONCAT_VECTORS1]], [[CONCAT_VECTORS2]] + ; GFX9-DENORM-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[FMA]](<4 x s16>) + ; GFX9-DENORM-NEXT: $vgpr0 = COPY [[UV]](<2 x s16>) + ; GFX9-DENORM-NEXT: $vgpr1 = COPY [[UV1]](<2 x s16>) + ; GFX9-DENORM-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 + ; + ; GFX10-LABEL: name: test_4xhalf_add_mul_contract + ; GFX10: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 + ; GFX10-NEXT: {{ $}} + ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 + ; GFX10-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>) + ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 + ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3 + ; GFX10-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY2]](<2 x s16>), [[COPY3]](<2 x s16>) + ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 + ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 + ; GFX10-NEXT: [[CONCAT_VECTORS2:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY4]](<2 x s16>), [[COPY5]](<2 x s16>) + ; GFX10-NEXT: [[FMA:%[0-9]+]]:_(<4 x s16>) = G_FMA [[CONCAT_VECTORS]], [[CONCAT_VECTORS1]], [[CONCAT_VECTORS2]] + ; GFX10-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[FMA]](<4 x s16>) + ; GFX10-NEXT: $vgpr0 = COPY [[UV]](<2 x s16>) + ; GFX10-NEXT: $vgpr1 = COPY [[UV1]](<2 x s16>) + ; GFX10-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 + ; + ; GFX10-DENORM-LABEL: name: test_4xhalf_add_mul_contract + ; GFX10-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 + ; GFX10-DENORM-NEXT: {{ $}} + ; GFX10-DENORM-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 + ; GFX10-DENORM-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 + ; GFX10-DENORM-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>) + ; GFX10-DENORM-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 + ; GFX10-DENORM-NEXT: [[COPY3:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3 + ; GFX10-DENORM-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY2]](<2 x s16>), [[COPY3]](<2 x s16>) + ; GFX10-DENORM-NEXT: [[COPY4:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 + ; GFX10-DENORM-NEXT: [[COPY5:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 + ; GFX10-DENORM-NEXT: [[CONCAT_VECTORS2:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[COPY4]](<2 x s16>), [[COPY5]](<2 x s16>) + ; GFX10-DENORM-NEXT: [[FMA:%[0-9]+]]:_(<4 x s16>) = G_FMA [[CONCAT_VECTORS]], [[CONCAT_VECTORS1]], [[CONCAT_VECTORS2]] + ; GFX10-DENORM-NEXT: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[FMA]](<4 x s16>) + ; GFX10-DENORM-NEXT: $vgpr0 = COPY [[UV]](<2 x s16>) + ; GFX10-DENORM-NEXT: $vgpr1 = COPY [[UV1]](<2 x s16>) + ; GFX10-DENORM-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 + %4:_(<2 x s16>) = COPY $vgpr0 + %5:_(<2 x s16>) = COPY $vgpr1 + %0:_(<4 x s16>) = G_CONCAT_VECTORS %4(<2 x s16>), %5(<2 x s16>) + %6:_(<2 x s16>) = COPY $vgpr2 + %7:_(<2 x s16>) = COPY $vgpr3 + %1:_(<4 x s16>) = G_CONCAT_VECTORS %6(<2 x s16>), %7(<2 x s16>) + %8:_(<2 x s16>) = COPY $vgpr4 + %9:_(<2 x s16>) = COPY $vgpr5 + %2:_(<4 x s16>) = G_CONCAT_VECTORS %8(<2 x s16>), %9(<2 x s16>) + %10:_(<4 x s16>) = reassoc contract G_FMUL %0, %1 + %11:_(<4 x s16>) = reassoc contract G_FADD %10, %2 + %13:_(<2 x s16>), %14:_(<2 x s16>) = G_UNMERGE_VALUES %11(<4 x s16>) + $vgpr0 = COPY %13(<2 x s16>) + $vgpr1 = COPY %14(<2 x s16>) + S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 +... + +--- name: test_3xhalf_add_mul_rhs body: | bb.1.entry: @@ -1461,30 +1652,6 @@ body: | ; GFX9-NEXT: $vgpr1 = COPY [[UV7]](<2 x s16>) ; GFX9-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 ; - ; GFX9-CONTRACT-LABEL: name: test_3xhalf_add_mul_rhs - ; GFX9-CONTRACT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 - ; GFX9-CONTRACT-NEXT: {{ $}} - ; GFX9-CONTRACT-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX9-CONTRACT-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX9-CONTRACT-NEXT: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF - ; GFX9-CONTRACT-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>), [[DEF]](<2 x s16>) - ; GFX9-CONTRACT-NEXT: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>) - ; GFX9-CONTRACT-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 - ; GFX9-CONTRACT-NEXT: [[COPY3:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3 - ; GFX9-CONTRACT-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY2]](<2 x s16>), [[COPY3]](<2 x s16>), [[DEF]](<2 x s16>) - ; GFX9-CONTRACT-NEXT: [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<6 x s16>) - ; GFX9-CONTRACT-NEXT: [[COPY4:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 - ; GFX9-CONTRACT-NEXT: [[COPY5:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 - ; GFX9-CONTRACT-NEXT: [[CONCAT_VECTORS2:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY4]](<2 x s16>), [[COPY5]](<2 x s16>), [[DEF]](<2 x s16>) - ; GFX9-CONTRACT-NEXT: [[UV4:%[0-9]+]]:_(<3 x s16>), [[UV5:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS2]](<6 x s16>) - ; GFX9-CONTRACT-NEXT: [[FMA:%[0-9]+]]:_(<3 x s16>) = G_FMA [[UV]], [[UV2]], [[UV4]] - ; GFX9-CONTRACT-NEXT: [[DEF1:%[0-9]+]]:_(<3 x s16>) = G_IMPLICIT_DEF - ; GFX9-CONTRACT-NEXT: [[CONCAT_VECTORS3:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[FMA]](<3 x s16>), [[DEF1]](<3 x s16>) - ; GFX9-CONTRACT-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>), [[UV8:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS3]](<6 x s16>) - ; GFX9-CONTRACT-NEXT: $vgpr0 = COPY [[UV6]](<2 x s16>) - ; GFX9-CONTRACT-NEXT: $vgpr1 = COPY [[UV7]](<2 x s16>) - ; GFX9-CONTRACT-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 - ; ; GFX9-DENORM-LABEL: name: test_3xhalf_add_mul_rhs ; GFX9-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; GFX9-DENORM-NEXT: {{ $}} @@ -1510,30 +1677,6 @@ body: | ; GFX9-DENORM-NEXT: $vgpr1 = COPY [[UV7]](<2 x s16>) ; GFX9-DENORM-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 ; - ; GFX9-UNSAFE-LABEL: name: test_3xhalf_add_mul_rhs - ; GFX9-UNSAFE: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 - ; GFX9-UNSAFE-NEXT: {{ $}} - ; GFX9-UNSAFE-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX9-UNSAFE-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX9-UNSAFE-NEXT: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF - ; GFX9-UNSAFE-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>), [[DEF]](<2 x s16>) - ; GFX9-UNSAFE-NEXT: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>) - ; GFX9-UNSAFE-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 - ; GFX9-UNSAFE-NEXT: [[COPY3:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3 - ; GFX9-UNSAFE-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY2]](<2 x s16>), [[COPY3]](<2 x s16>), [[DEF]](<2 x s16>) - ; GFX9-UNSAFE-NEXT: [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<6 x s16>) - ; GFX9-UNSAFE-NEXT: [[COPY4:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 - ; GFX9-UNSAFE-NEXT: [[COPY5:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 - ; GFX9-UNSAFE-NEXT: [[CONCAT_VECTORS2:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY4]](<2 x s16>), [[COPY5]](<2 x s16>), [[DEF]](<2 x s16>) - ; GFX9-UNSAFE-NEXT: [[UV4:%[0-9]+]]:_(<3 x s16>), [[UV5:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS2]](<6 x s16>) - ; GFX9-UNSAFE-NEXT: [[FMA:%[0-9]+]]:_(<3 x s16>) = G_FMA [[UV]], [[UV2]], [[UV4]] - ; GFX9-UNSAFE-NEXT: [[DEF1:%[0-9]+]]:_(<3 x s16>) = G_IMPLICIT_DEF - ; GFX9-UNSAFE-NEXT: [[CONCAT_VECTORS3:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[FMA]](<3 x s16>), [[DEF1]](<3 x s16>) - ; GFX9-UNSAFE-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>), [[UV8:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS3]](<6 x s16>) - ; GFX9-UNSAFE-NEXT: $vgpr0 = COPY [[UV6]](<2 x s16>) - ; GFX9-UNSAFE-NEXT: $vgpr1 = COPY [[UV7]](<2 x s16>) - ; GFX9-UNSAFE-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 - ; ; GFX10-LABEL: name: test_3xhalf_add_mul_rhs ; GFX10: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; GFX10-NEXT: {{ $}} @@ -1559,30 +1702,6 @@ body: | ; GFX10-NEXT: $vgpr1 = COPY [[UV7]](<2 x s16>) ; GFX10-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 ; - ; GFX10-CONTRACT-LABEL: name: test_3xhalf_add_mul_rhs - ; GFX10-CONTRACT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 - ; GFX10-CONTRACT-NEXT: {{ $}} - ; GFX10-CONTRACT-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX10-CONTRACT-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX10-CONTRACT-NEXT: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF - ; GFX10-CONTRACT-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>), [[DEF]](<2 x s16>) - ; GFX10-CONTRACT-NEXT: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>) - ; GFX10-CONTRACT-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 - ; GFX10-CONTRACT-NEXT: [[COPY3:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3 - ; GFX10-CONTRACT-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY2]](<2 x s16>), [[COPY3]](<2 x s16>), [[DEF]](<2 x s16>) - ; GFX10-CONTRACT-NEXT: [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<6 x s16>) - ; GFX10-CONTRACT-NEXT: [[COPY4:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 - ; GFX10-CONTRACT-NEXT: [[COPY5:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 - ; GFX10-CONTRACT-NEXT: [[CONCAT_VECTORS2:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY4]](<2 x s16>), [[COPY5]](<2 x s16>), [[DEF]](<2 x s16>) - ; GFX10-CONTRACT-NEXT: [[UV4:%[0-9]+]]:_(<3 x s16>), [[UV5:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS2]](<6 x s16>) - ; GFX10-CONTRACT-NEXT: [[FMA:%[0-9]+]]:_(<3 x s16>) = G_FMA [[UV]], [[UV2]], [[UV4]] - ; GFX10-CONTRACT-NEXT: [[DEF1:%[0-9]+]]:_(<3 x s16>) = G_IMPLICIT_DEF - ; GFX10-CONTRACT-NEXT: [[CONCAT_VECTORS3:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[FMA]](<3 x s16>), [[DEF1]](<3 x s16>) - ; GFX10-CONTRACT-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>), [[UV8:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS3]](<6 x s16>) - ; GFX10-CONTRACT-NEXT: $vgpr0 = COPY [[UV6]](<2 x s16>) - ; GFX10-CONTRACT-NEXT: $vgpr1 = COPY [[UV7]](<2 x s16>) - ; GFX10-CONTRACT-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 - ; ; GFX10-DENORM-LABEL: name: test_3xhalf_add_mul_rhs ; GFX10-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 ; GFX10-DENORM-NEXT: {{ $}} @@ -1607,30 +1726,6 @@ body: | ; GFX10-DENORM-NEXT: $vgpr0 = COPY [[UV6]](<2 x s16>) ; GFX10-DENORM-NEXT: $vgpr1 = COPY [[UV7]](<2 x s16>) ; GFX10-DENORM-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 - ; - ; GFX10-UNSAFE-LABEL: name: test_3xhalf_add_mul_rhs - ; GFX10-UNSAFE: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 - ; GFX10-UNSAFE-NEXT: {{ $}} - ; GFX10-UNSAFE-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 - ; GFX10-UNSAFE-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 - ; GFX10-UNSAFE-NEXT: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF - ; GFX10-UNSAFE-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>), [[DEF]](<2 x s16>) - ; GFX10-UNSAFE-NEXT: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>) - ; GFX10-UNSAFE-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 - ; GFX10-UNSAFE-NEXT: [[COPY3:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3 - ; GFX10-UNSAFE-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY2]](<2 x s16>), [[COPY3]](<2 x s16>), [[DEF]](<2 x s16>) - ; GFX10-UNSAFE-NEXT: [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<6 x s16>) - ; GFX10-UNSAFE-NEXT: [[COPY4:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 - ; GFX10-UNSAFE-NEXT: [[COPY5:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 - ; GFX10-UNSAFE-NEXT: [[CONCAT_VECTORS2:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY4]](<2 x s16>), [[COPY5]](<2 x s16>), [[DEF]](<2 x s16>) - ; GFX10-UNSAFE-NEXT: [[UV4:%[0-9]+]]:_(<3 x s16>), [[UV5:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS2]](<6 x s16>) - ; GFX10-UNSAFE-NEXT: [[FMA:%[0-9]+]]:_(<3 x s16>) = G_FMA [[UV]], [[UV2]], [[UV4]] - ; GFX10-UNSAFE-NEXT: [[DEF1:%[0-9]+]]:_(<3 x s16>) = G_IMPLICIT_DEF - ; GFX10-UNSAFE-NEXT: [[CONCAT_VECTORS3:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[FMA]](<3 x s16>), [[DEF1]](<3 x s16>) - ; GFX10-UNSAFE-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>), [[UV8:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS3]](<6 x s16>) - ; GFX10-UNSAFE-NEXT: $vgpr0 = COPY [[UV6]](<2 x s16>) - ; GFX10-UNSAFE-NEXT: $vgpr1 = COPY [[UV7]](<2 x s16>) - ; GFX10-UNSAFE-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 %4:_(<2 x s16>) = COPY $vgpr0 %5:_(<2 x s16>) = COPY $vgpr1 %10:_(<2 x s16>) = G_IMPLICIT_DEF @@ -1655,6 +1750,130 @@ body: | ... --- +name: test_3xhalf_add_mul_rhs_contract +body: | + bb.1.entry: + liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 + + ; GFX9-LABEL: name: test_3xhalf_add_mul_rhs_contract + ; GFX9: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 + ; GFX9-NEXT: {{ $}} + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 + ; GFX9-NEXT: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>), [[DEF]](<2 x s16>) + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>) + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 + ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3 + ; GFX9-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY2]](<2 x s16>), [[COPY3]](<2 x s16>), [[DEF]](<2 x s16>) + ; GFX9-NEXT: [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<6 x s16>) + ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 + ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 + ; GFX9-NEXT: [[CONCAT_VECTORS2:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY4]](<2 x s16>), [[COPY5]](<2 x s16>), [[DEF]](<2 x s16>) + ; GFX9-NEXT: [[UV4:%[0-9]+]]:_(<3 x s16>), [[UV5:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS2]](<6 x s16>) + ; GFX9-NEXT: [[FMA:%[0-9]+]]:_(<3 x s16>) = G_FMA [[UV]], [[UV2]], [[UV4]] + ; GFX9-NEXT: [[DEF1:%[0-9]+]]:_(<3 x s16>) = G_IMPLICIT_DEF + ; GFX9-NEXT: [[CONCAT_VECTORS3:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[FMA]](<3 x s16>), [[DEF1]](<3 x s16>) + ; GFX9-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>), [[UV8:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS3]](<6 x s16>) + ; GFX9-NEXT: $vgpr0 = COPY [[UV6]](<2 x s16>) + ; GFX9-NEXT: $vgpr1 = COPY [[UV7]](<2 x s16>) + ; GFX9-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 + ; + ; GFX9-DENORM-LABEL: name: test_3xhalf_add_mul_rhs_contract + ; GFX9-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 + ; GFX9-DENORM-NEXT: {{ $}} + ; GFX9-DENORM-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 + ; GFX9-DENORM-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 + ; GFX9-DENORM-NEXT: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF + ; GFX9-DENORM-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>), [[DEF]](<2 x s16>) + ; GFX9-DENORM-NEXT: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>) + ; GFX9-DENORM-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 + ; GFX9-DENORM-NEXT: [[COPY3:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3 + ; GFX9-DENORM-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY2]](<2 x s16>), [[COPY3]](<2 x s16>), [[DEF]](<2 x s16>) + ; GFX9-DENORM-NEXT: [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<6 x s16>) + ; GFX9-DENORM-NEXT: [[COPY4:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 + ; GFX9-DENORM-NEXT: [[COPY5:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 + ; GFX9-DENORM-NEXT: [[CONCAT_VECTORS2:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY4]](<2 x s16>), [[COPY5]](<2 x s16>), [[DEF]](<2 x s16>) + ; GFX9-DENORM-NEXT: [[UV4:%[0-9]+]]:_(<3 x s16>), [[UV5:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS2]](<6 x s16>) + ; GFX9-DENORM-NEXT: [[FMA:%[0-9]+]]:_(<3 x s16>) = G_FMA [[UV]], [[UV2]], [[UV4]] + ; GFX9-DENORM-NEXT: [[DEF1:%[0-9]+]]:_(<3 x s16>) = G_IMPLICIT_DEF + ; GFX9-DENORM-NEXT: [[CONCAT_VECTORS3:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[FMA]](<3 x s16>), [[DEF1]](<3 x s16>) + ; GFX9-DENORM-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>), [[UV8:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS3]](<6 x s16>) + ; GFX9-DENORM-NEXT: $vgpr0 = COPY [[UV6]](<2 x s16>) + ; GFX9-DENORM-NEXT: $vgpr1 = COPY [[UV7]](<2 x s16>) + ; GFX9-DENORM-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 + ; + ; GFX10-LABEL: name: test_3xhalf_add_mul_rhs_contract + ; GFX10: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 + ; GFX10-NEXT: {{ $}} + ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 + ; GFX10-NEXT: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF + ; GFX10-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>), [[DEF]](<2 x s16>) + ; GFX10-NEXT: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>) + ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 + ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3 + ; GFX10-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY2]](<2 x s16>), [[COPY3]](<2 x s16>), [[DEF]](<2 x s16>) + ; GFX10-NEXT: [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<6 x s16>) + ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 + ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 + ; GFX10-NEXT: [[CONCAT_VECTORS2:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY4]](<2 x s16>), [[COPY5]](<2 x s16>), [[DEF]](<2 x s16>) + ; GFX10-NEXT: [[UV4:%[0-9]+]]:_(<3 x s16>), [[UV5:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS2]](<6 x s16>) + ; GFX10-NEXT: [[FMA:%[0-9]+]]:_(<3 x s16>) = G_FMA [[UV]], [[UV2]], [[UV4]] + ; GFX10-NEXT: [[DEF1:%[0-9]+]]:_(<3 x s16>) = G_IMPLICIT_DEF + ; GFX10-NEXT: [[CONCAT_VECTORS3:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[FMA]](<3 x s16>), [[DEF1]](<3 x s16>) + ; GFX10-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>), [[UV8:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS3]](<6 x s16>) + ; GFX10-NEXT: $vgpr0 = COPY [[UV6]](<2 x s16>) + ; GFX10-NEXT: $vgpr1 = COPY [[UV7]](<2 x s16>) + ; GFX10-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 + ; + ; GFX10-DENORM-LABEL: name: test_3xhalf_add_mul_rhs_contract + ; GFX10-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5 + ; GFX10-DENORM-NEXT: {{ $}} + ; GFX10-DENORM-NEXT: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0 + ; GFX10-DENORM-NEXT: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1 + ; GFX10-DENORM-NEXT: [[DEF:%[0-9]+]]:_(<2 x s16>) = G_IMPLICIT_DEF + ; GFX10-DENORM-NEXT: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY]](<2 x s16>), [[COPY1]](<2 x s16>), [[DEF]](<2 x s16>) + ; GFX10-DENORM-NEXT: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS]](<6 x s16>) + ; GFX10-DENORM-NEXT: [[COPY2:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr2 + ; GFX10-DENORM-NEXT: [[COPY3:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr3 + ; GFX10-DENORM-NEXT: [[CONCAT_VECTORS1:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY2]](<2 x s16>), [[COPY3]](<2 x s16>), [[DEF]](<2 x s16>) + ; GFX10-DENORM-NEXT: [[UV2:%[0-9]+]]:_(<3 x s16>), [[UV3:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS1]](<6 x s16>) + ; GFX10-DENORM-NEXT: [[COPY4:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr4 + ; GFX10-DENORM-NEXT: [[COPY5:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr5 + ; GFX10-DENORM-NEXT: [[CONCAT_VECTORS2:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[COPY4]](<2 x s16>), [[COPY5]](<2 x s16>), [[DEF]](<2 x s16>) + ; GFX10-DENORM-NEXT: [[UV4:%[0-9]+]]:_(<3 x s16>), [[UV5:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS2]](<6 x s16>) + ; GFX10-DENORM-NEXT: [[FMA:%[0-9]+]]:_(<3 x s16>) = G_FMA [[UV]], [[UV2]], [[UV4]] + ; GFX10-DENORM-NEXT: [[DEF1:%[0-9]+]]:_(<3 x s16>) = G_IMPLICIT_DEF + ; GFX10-DENORM-NEXT: [[CONCAT_VECTORS3:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[FMA]](<3 x s16>), [[DEF1]](<3 x s16>) + ; GFX10-DENORM-NEXT: [[UV6:%[0-9]+]]:_(<2 x s16>), [[UV7:%[0-9]+]]:_(<2 x s16>), [[UV8:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[CONCAT_VECTORS3]](<6 x s16>) + ; GFX10-DENORM-NEXT: $vgpr0 = COPY [[UV6]](<2 x s16>) + ; GFX10-DENORM-NEXT: $vgpr1 = COPY [[UV7]](<2 x s16>) + ; GFX10-DENORM-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 + %4:_(<2 x s16>) = COPY $vgpr0 + %5:_(<2 x s16>) = COPY $vgpr1 + %10:_(<2 x s16>) = G_IMPLICIT_DEF + %11:_(<6 x s16>) = G_CONCAT_VECTORS %4(<2 x s16>), %5(<2 x s16>), %10(<2 x s16>) + %0:_(<3 x s16>), %12:_(<3 x s16>) = G_UNMERGE_VALUES %11(<6 x s16>) + %6:_(<2 x s16>) = COPY $vgpr2 + %7:_(<2 x s16>) = COPY $vgpr3 + %13:_(<6 x s16>) = G_CONCAT_VECTORS %6(<2 x s16>), %7(<2 x s16>), %10(<2 x s16>) + %1:_(<3 x s16>), %14:_(<3 x s16>) = G_UNMERGE_VALUES %13(<6 x s16>) + %8:_(<2 x s16>) = COPY $vgpr4 + %9:_(<2 x s16>) = COPY $vgpr5 + %15:_(<6 x s16>) = G_CONCAT_VECTORS %8(<2 x s16>), %9(<2 x s16>), %10(<2 x s16>) + %2:_(<3 x s16>), %16:_(<3 x s16>) = G_UNMERGE_VALUES %15(<6 x s16>) + %17:_(<3 x s16>) = reassoc contract G_FMUL %0, %1 + %18:_(<3 x s16>) = reassoc contract G_FADD %2, %17 + %22:_(<3 x s16>) = G_IMPLICIT_DEF + %23:_(<6 x s16>) = G_CONCAT_VECTORS %18(<3 x s16>), %22(<3 x s16>) + %20:_(<2 x s16>), %21:_(<2 x s16>), %24:_(<2 x s16>) = G_UNMERGE_VALUES %23(<6 x s16>) + $vgpr0 = COPY %20(<2 x s16>) + $vgpr1 = COPY %21(<2 x s16>) + S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1 +... + +--- name: test_4xdouble_add_mul body: | bb.1.entry: @@ -1715,60 +1934,6 @@ body: | ; GFX9-NEXT: $vgpr7 = COPY [[UV7]](s32) ; GFX9-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 ; - ; GFX9-CONTRACT-LABEL: name: test_4xdouble_add_mul - ; GFX9-CONTRACT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23 - ; GFX9-CONTRACT-NEXT: {{ $}} - ; GFX9-CONTRACT-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-CONTRACT-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-CONTRACT-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-CONTRACT-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX9-CONTRACT-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX9-CONTRACT-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX9-CONTRACT-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; GFX9-CONTRACT-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; GFX9-CONTRACT-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; GFX9-CONTRACT-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) - ; GFX9-CONTRACT-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) - ; GFX9-CONTRACT-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-CONTRACT-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[MV3]](s64) - ; GFX9-CONTRACT-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; GFX9-CONTRACT-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 - ; GFX9-CONTRACT-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 - ; GFX9-CONTRACT-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 - ; GFX9-CONTRACT-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12 - ; GFX9-CONTRACT-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13 - ; GFX9-CONTRACT-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14 - ; GFX9-CONTRACT-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15 - ; GFX9-CONTRACT-NEXT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY8]](s32), [[COPY9]](s32) - ; GFX9-CONTRACT-NEXT: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY10]](s32), [[COPY11]](s32) - ; GFX9-CONTRACT-NEXT: [[MV6:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY12]](s32), [[COPY13]](s32) - ; GFX9-CONTRACT-NEXT: [[MV7:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY14]](s32), [[COPY15]](s32) - ; GFX9-CONTRACT-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV4]](s64), [[MV5]](s64), [[MV6]](s64), [[MV7]](s64) - ; GFX9-CONTRACT-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16 - ; GFX9-CONTRACT-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17 - ; GFX9-CONTRACT-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr18 - ; GFX9-CONTRACT-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr19 - ; GFX9-CONTRACT-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr20 - ; GFX9-CONTRACT-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr21 - ; GFX9-CONTRACT-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr22 - ; GFX9-CONTRACT-NEXT: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr23 - ; GFX9-CONTRACT-NEXT: [[MV8:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY16]](s32), [[COPY17]](s32) - ; GFX9-CONTRACT-NEXT: [[MV9:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY18]](s32), [[COPY19]](s32) - ; GFX9-CONTRACT-NEXT: [[MV10:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY20]](s32), [[COPY21]](s32) - ; GFX9-CONTRACT-NEXT: [[MV11:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY22]](s32), [[COPY23]](s32) - ; GFX9-CONTRACT-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV8]](s64), [[MV9]](s64), [[MV10]](s64), [[MV11]](s64) - ; GFX9-CONTRACT-NEXT: [[FMA:%[0-9]+]]:_(<4 x s64>) = G_FMA [[BUILD_VECTOR]], [[BUILD_VECTOR1]], [[BUILD_VECTOR2]] - ; GFX9-CONTRACT-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMA]](<4 x s64>) - ; GFX9-CONTRACT-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX9-CONTRACT-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX9-CONTRACT-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX9-CONTRACT-NEXT: $vgpr3 = COPY [[UV3]](s32) - ; GFX9-CONTRACT-NEXT: $vgpr4 = COPY [[UV4]](s32) - ; GFX9-CONTRACT-NEXT: $vgpr5 = COPY [[UV5]](s32) - ; GFX9-CONTRACT-NEXT: $vgpr6 = COPY [[UV6]](s32) - ; GFX9-CONTRACT-NEXT: $vgpr7 = COPY [[UV7]](s32) - ; GFX9-CONTRACT-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 - ; ; GFX9-DENORM-LABEL: name: test_4xdouble_add_mul ; GFX9-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23 ; GFX9-DENORM-NEXT: {{ $}} @@ -1824,60 +1989,6 @@ body: | ; GFX9-DENORM-NEXT: $vgpr7 = COPY [[UV7]](s32) ; GFX9-DENORM-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 ; - ; GFX9-UNSAFE-LABEL: name: test_4xdouble_add_mul - ; GFX9-UNSAFE: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23 - ; GFX9-UNSAFE-NEXT: {{ $}} - ; GFX9-UNSAFE-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-UNSAFE-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-UNSAFE-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-UNSAFE-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX9-UNSAFE-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX9-UNSAFE-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX9-UNSAFE-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; GFX9-UNSAFE-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; GFX9-UNSAFE-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; GFX9-UNSAFE-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) - ; GFX9-UNSAFE-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) - ; GFX9-UNSAFE-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-UNSAFE-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[MV3]](s64) - ; GFX9-UNSAFE-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; GFX9-UNSAFE-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 - ; GFX9-UNSAFE-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 - ; GFX9-UNSAFE-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 - ; GFX9-UNSAFE-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12 - ; GFX9-UNSAFE-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13 - ; GFX9-UNSAFE-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14 - ; GFX9-UNSAFE-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15 - ; GFX9-UNSAFE-NEXT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY8]](s32), [[COPY9]](s32) - ; GFX9-UNSAFE-NEXT: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY10]](s32), [[COPY11]](s32) - ; GFX9-UNSAFE-NEXT: [[MV6:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY12]](s32), [[COPY13]](s32) - ; GFX9-UNSAFE-NEXT: [[MV7:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY14]](s32), [[COPY15]](s32) - ; GFX9-UNSAFE-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV4]](s64), [[MV5]](s64), [[MV6]](s64), [[MV7]](s64) - ; GFX9-UNSAFE-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16 - ; GFX9-UNSAFE-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17 - ; GFX9-UNSAFE-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr18 - ; GFX9-UNSAFE-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr19 - ; GFX9-UNSAFE-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr20 - ; GFX9-UNSAFE-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr21 - ; GFX9-UNSAFE-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr22 - ; GFX9-UNSAFE-NEXT: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr23 - ; GFX9-UNSAFE-NEXT: [[MV8:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY16]](s32), [[COPY17]](s32) - ; GFX9-UNSAFE-NEXT: [[MV9:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY18]](s32), [[COPY19]](s32) - ; GFX9-UNSAFE-NEXT: [[MV10:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY20]](s32), [[COPY21]](s32) - ; GFX9-UNSAFE-NEXT: [[MV11:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY22]](s32), [[COPY23]](s32) - ; GFX9-UNSAFE-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV8]](s64), [[MV9]](s64), [[MV10]](s64), [[MV11]](s64) - ; GFX9-UNSAFE-NEXT: [[FMA:%[0-9]+]]:_(<4 x s64>) = G_FMA [[BUILD_VECTOR]], [[BUILD_VECTOR1]], [[BUILD_VECTOR2]] - ; GFX9-UNSAFE-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMA]](<4 x s64>) - ; GFX9-UNSAFE-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX9-UNSAFE-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX9-UNSAFE-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX9-UNSAFE-NEXT: $vgpr3 = COPY [[UV3]](s32) - ; GFX9-UNSAFE-NEXT: $vgpr4 = COPY [[UV4]](s32) - ; GFX9-UNSAFE-NEXT: $vgpr5 = COPY [[UV5]](s32) - ; GFX9-UNSAFE-NEXT: $vgpr6 = COPY [[UV6]](s32) - ; GFX9-UNSAFE-NEXT: $vgpr7 = COPY [[UV7]](s32) - ; GFX9-UNSAFE-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 - ; ; GFX10-LABEL: name: test_4xdouble_add_mul ; GFX10: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23 ; GFX10-NEXT: {{ $}} @@ -1933,60 +2044,6 @@ body: | ; GFX10-NEXT: $vgpr7 = COPY [[UV7]](s32) ; GFX10-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 ; - ; GFX10-CONTRACT-LABEL: name: test_4xdouble_add_mul - ; GFX10-CONTRACT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23 - ; GFX10-CONTRACT-NEXT: {{ $}} - ; GFX10-CONTRACT-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-CONTRACT-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-CONTRACT-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-CONTRACT-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX10-CONTRACT-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX10-CONTRACT-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX10-CONTRACT-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; GFX10-CONTRACT-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; GFX10-CONTRACT-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; GFX10-CONTRACT-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) - ; GFX10-CONTRACT-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) - ; GFX10-CONTRACT-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY6]](s32), [[COPY7]](s32) - ; GFX10-CONTRACT-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[MV3]](s64) - ; GFX10-CONTRACT-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; GFX10-CONTRACT-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 - ; GFX10-CONTRACT-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 - ; GFX10-CONTRACT-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 - ; GFX10-CONTRACT-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12 - ; GFX10-CONTRACT-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13 - ; GFX10-CONTRACT-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14 - ; GFX10-CONTRACT-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15 - ; GFX10-CONTRACT-NEXT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY8]](s32), [[COPY9]](s32) - ; GFX10-CONTRACT-NEXT: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY10]](s32), [[COPY11]](s32) - ; GFX10-CONTRACT-NEXT: [[MV6:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY12]](s32), [[COPY13]](s32) - ; GFX10-CONTRACT-NEXT: [[MV7:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY14]](s32), [[COPY15]](s32) - ; GFX10-CONTRACT-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV4]](s64), [[MV5]](s64), [[MV6]](s64), [[MV7]](s64) - ; GFX10-CONTRACT-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16 - ; GFX10-CONTRACT-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17 - ; GFX10-CONTRACT-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr18 - ; GFX10-CONTRACT-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr19 - ; GFX10-CONTRACT-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr20 - ; GFX10-CONTRACT-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr21 - ; GFX10-CONTRACT-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr22 - ; GFX10-CONTRACT-NEXT: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr23 - ; GFX10-CONTRACT-NEXT: [[MV8:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY16]](s32), [[COPY17]](s32) - ; GFX10-CONTRACT-NEXT: [[MV9:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY18]](s32), [[COPY19]](s32) - ; GFX10-CONTRACT-NEXT: [[MV10:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY20]](s32), [[COPY21]](s32) - ; GFX10-CONTRACT-NEXT: [[MV11:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY22]](s32), [[COPY23]](s32) - ; GFX10-CONTRACT-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV8]](s64), [[MV9]](s64), [[MV10]](s64), [[MV11]](s64) - ; GFX10-CONTRACT-NEXT: [[FMA:%[0-9]+]]:_(<4 x s64>) = G_FMA [[BUILD_VECTOR]], [[BUILD_VECTOR1]], [[BUILD_VECTOR2]] - ; GFX10-CONTRACT-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMA]](<4 x s64>) - ; GFX10-CONTRACT-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10-CONTRACT-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10-CONTRACT-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX10-CONTRACT-NEXT: $vgpr3 = COPY [[UV3]](s32) - ; GFX10-CONTRACT-NEXT: $vgpr4 = COPY [[UV4]](s32) - ; GFX10-CONTRACT-NEXT: $vgpr5 = COPY [[UV5]](s32) - ; GFX10-CONTRACT-NEXT: $vgpr6 = COPY [[UV6]](s32) - ; GFX10-CONTRACT-NEXT: $vgpr7 = COPY [[UV7]](s32) - ; GFX10-CONTRACT-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 - ; ; GFX10-DENORM-LABEL: name: test_4xdouble_add_mul ; GFX10-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23 ; GFX10-DENORM-NEXT: {{ $}} @@ -2041,60 +2098,6 @@ body: | ; GFX10-DENORM-NEXT: $vgpr6 = COPY [[UV6]](s32) ; GFX10-DENORM-NEXT: $vgpr7 = COPY [[UV7]](s32) ; GFX10-DENORM-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 - ; - ; GFX10-UNSAFE-LABEL: name: test_4xdouble_add_mul - ; GFX10-UNSAFE: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23 - ; GFX10-UNSAFE-NEXT: {{ $}} - ; GFX10-UNSAFE-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-UNSAFE-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-UNSAFE-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-UNSAFE-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX10-UNSAFE-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX10-UNSAFE-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX10-UNSAFE-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; GFX10-UNSAFE-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; GFX10-UNSAFE-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; GFX10-UNSAFE-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) - ; GFX10-UNSAFE-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) - ; GFX10-UNSAFE-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY6]](s32), [[COPY7]](s32) - ; GFX10-UNSAFE-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[MV3]](s64) - ; GFX10-UNSAFE-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; GFX10-UNSAFE-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 - ; GFX10-UNSAFE-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 - ; GFX10-UNSAFE-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 - ; GFX10-UNSAFE-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12 - ; GFX10-UNSAFE-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13 - ; GFX10-UNSAFE-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14 - ; GFX10-UNSAFE-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15 - ; GFX10-UNSAFE-NEXT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY8]](s32), [[COPY9]](s32) - ; GFX10-UNSAFE-NEXT: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY10]](s32), [[COPY11]](s32) - ; GFX10-UNSAFE-NEXT: [[MV6:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY12]](s32), [[COPY13]](s32) - ; GFX10-UNSAFE-NEXT: [[MV7:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY14]](s32), [[COPY15]](s32) - ; GFX10-UNSAFE-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV4]](s64), [[MV5]](s64), [[MV6]](s64), [[MV7]](s64) - ; GFX10-UNSAFE-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16 - ; GFX10-UNSAFE-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17 - ; GFX10-UNSAFE-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr18 - ; GFX10-UNSAFE-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr19 - ; GFX10-UNSAFE-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr20 - ; GFX10-UNSAFE-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr21 - ; GFX10-UNSAFE-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr22 - ; GFX10-UNSAFE-NEXT: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr23 - ; GFX10-UNSAFE-NEXT: [[MV8:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY16]](s32), [[COPY17]](s32) - ; GFX10-UNSAFE-NEXT: [[MV9:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY18]](s32), [[COPY19]](s32) - ; GFX10-UNSAFE-NEXT: [[MV10:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY20]](s32), [[COPY21]](s32) - ; GFX10-UNSAFE-NEXT: [[MV11:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY22]](s32), [[COPY23]](s32) - ; GFX10-UNSAFE-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV8]](s64), [[MV9]](s64), [[MV10]](s64), [[MV11]](s64) - ; GFX10-UNSAFE-NEXT: [[FMA:%[0-9]+]]:_(<4 x s64>) = G_FMA [[BUILD_VECTOR]], [[BUILD_VECTOR1]], [[BUILD_VECTOR2]] - ; GFX10-UNSAFE-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMA]](<4 x s64>) - ; GFX10-UNSAFE-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10-UNSAFE-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10-UNSAFE-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX10-UNSAFE-NEXT: $vgpr3 = COPY [[UV3]](s32) - ; GFX10-UNSAFE-NEXT: $vgpr4 = COPY [[UV4]](s32) - ; GFX10-UNSAFE-NEXT: $vgpr5 = COPY [[UV5]](s32) - ; GFX10-UNSAFE-NEXT: $vgpr6 = COPY [[UV6]](s32) - ; GFX10-UNSAFE-NEXT: $vgpr7 = COPY [[UV7]](s32) - ; GFX10-UNSAFE-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 %4:_(s32) = COPY $vgpr0 %5:_(s32) = COPY $vgpr1 %6:_(s32) = COPY $vgpr2 @@ -2149,6 +2152,280 @@ body: | ... --- +name: test_4xdouble_add_mul_contract +body: | + bb.1.entry: + liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23 + + ; GFX9-LABEL: name: test_4xdouble_add_mul_contract + ; GFX9: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23 + ; GFX9-NEXT: {{ $}} + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 + ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 + ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 + ; GFX9-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) + ; GFX9-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) + ; GFX9-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) + ; GFX9-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY6]](s32), [[COPY7]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[MV3]](s64) + ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 + ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 + ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 + ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 + ; GFX9-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12 + ; GFX9-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13 + ; GFX9-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14 + ; GFX9-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15 + ; GFX9-NEXT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY8]](s32), [[COPY9]](s32) + ; GFX9-NEXT: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY10]](s32), [[COPY11]](s32) + ; GFX9-NEXT: [[MV6:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY12]](s32), [[COPY13]](s32) + ; GFX9-NEXT: [[MV7:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY14]](s32), [[COPY15]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV4]](s64), [[MV5]](s64), [[MV6]](s64), [[MV7]](s64) + ; GFX9-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16 + ; GFX9-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17 + ; GFX9-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr18 + ; GFX9-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr19 + ; GFX9-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr20 + ; GFX9-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr21 + ; GFX9-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr22 + ; GFX9-NEXT: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr23 + ; GFX9-NEXT: [[MV8:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY16]](s32), [[COPY17]](s32) + ; GFX9-NEXT: [[MV9:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY18]](s32), [[COPY19]](s32) + ; GFX9-NEXT: [[MV10:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY20]](s32), [[COPY21]](s32) + ; GFX9-NEXT: [[MV11:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY22]](s32), [[COPY23]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV8]](s64), [[MV9]](s64), [[MV10]](s64), [[MV11]](s64) + ; GFX9-NEXT: [[FMA:%[0-9]+]]:_(<4 x s64>) = G_FMA [[BUILD_VECTOR]], [[BUILD_VECTOR1]], [[BUILD_VECTOR2]] + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMA]](<4 x s64>) + ; GFX9-NEXT: $vgpr0 = COPY [[UV]](s32) + ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](s32) + ; GFX9-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX9-NEXT: $vgpr4 = COPY [[UV4]](s32) + ; GFX9-NEXT: $vgpr5 = COPY [[UV5]](s32) + ; GFX9-NEXT: $vgpr6 = COPY [[UV6]](s32) + ; GFX9-NEXT: $vgpr7 = COPY [[UV7]](s32) + ; GFX9-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 + ; + ; GFX9-DENORM-LABEL: name: test_4xdouble_add_mul_contract + ; GFX9-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23 + ; GFX9-DENORM-NEXT: {{ $}} + ; GFX9-DENORM-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX9-DENORM-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9-DENORM-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX9-DENORM-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; GFX9-DENORM-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; GFX9-DENORM-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 + ; GFX9-DENORM-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 + ; GFX9-DENORM-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 + ; GFX9-DENORM-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) + ; GFX9-DENORM-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) + ; GFX9-DENORM-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) + ; GFX9-DENORM-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY6]](s32), [[COPY7]](s32) + ; GFX9-DENORM-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[MV3]](s64) + ; GFX9-DENORM-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 + ; GFX9-DENORM-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 + ; GFX9-DENORM-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 + ; GFX9-DENORM-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 + ; GFX9-DENORM-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12 + ; GFX9-DENORM-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13 + ; GFX9-DENORM-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14 + ; GFX9-DENORM-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15 + ; GFX9-DENORM-NEXT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY8]](s32), [[COPY9]](s32) + ; GFX9-DENORM-NEXT: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY10]](s32), [[COPY11]](s32) + ; GFX9-DENORM-NEXT: [[MV6:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY12]](s32), [[COPY13]](s32) + ; GFX9-DENORM-NEXT: [[MV7:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY14]](s32), [[COPY15]](s32) + ; GFX9-DENORM-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV4]](s64), [[MV5]](s64), [[MV6]](s64), [[MV7]](s64) + ; GFX9-DENORM-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16 + ; GFX9-DENORM-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17 + ; GFX9-DENORM-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr18 + ; GFX9-DENORM-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr19 + ; GFX9-DENORM-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr20 + ; GFX9-DENORM-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr21 + ; GFX9-DENORM-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr22 + ; GFX9-DENORM-NEXT: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr23 + ; GFX9-DENORM-NEXT: [[MV8:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY16]](s32), [[COPY17]](s32) + ; GFX9-DENORM-NEXT: [[MV9:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY18]](s32), [[COPY19]](s32) + ; GFX9-DENORM-NEXT: [[MV10:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY20]](s32), [[COPY21]](s32) + ; GFX9-DENORM-NEXT: [[MV11:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY22]](s32), [[COPY23]](s32) + ; GFX9-DENORM-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV8]](s64), [[MV9]](s64), [[MV10]](s64), [[MV11]](s64) + ; GFX9-DENORM-NEXT: [[FMA:%[0-9]+]]:_(<4 x s64>) = G_FMA [[BUILD_VECTOR]], [[BUILD_VECTOR1]], [[BUILD_VECTOR2]] + ; GFX9-DENORM-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMA]](<4 x s64>) + ; GFX9-DENORM-NEXT: $vgpr0 = COPY [[UV]](s32) + ; GFX9-DENORM-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; GFX9-DENORM-NEXT: $vgpr2 = COPY [[UV2]](s32) + ; GFX9-DENORM-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX9-DENORM-NEXT: $vgpr4 = COPY [[UV4]](s32) + ; GFX9-DENORM-NEXT: $vgpr5 = COPY [[UV5]](s32) + ; GFX9-DENORM-NEXT: $vgpr6 = COPY [[UV6]](s32) + ; GFX9-DENORM-NEXT: $vgpr7 = COPY [[UV7]](s32) + ; GFX9-DENORM-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 + ; + ; GFX10-LABEL: name: test_4xdouble_add_mul_contract + ; GFX10: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23 + ; GFX10-NEXT: {{ $}} + ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 + ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 + ; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 + ; GFX10-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) + ; GFX10-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) + ; GFX10-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) + ; GFX10-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY6]](s32), [[COPY7]](s32) + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[MV3]](s64) + ; GFX10-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 + ; GFX10-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 + ; GFX10-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 + ; GFX10-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 + ; GFX10-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12 + ; GFX10-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13 + ; GFX10-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14 + ; GFX10-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15 + ; GFX10-NEXT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY8]](s32), [[COPY9]](s32) + ; GFX10-NEXT: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY10]](s32), [[COPY11]](s32) + ; GFX10-NEXT: [[MV6:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY12]](s32), [[COPY13]](s32) + ; GFX10-NEXT: [[MV7:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY14]](s32), [[COPY15]](s32) + ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV4]](s64), [[MV5]](s64), [[MV6]](s64), [[MV7]](s64) + ; GFX10-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16 + ; GFX10-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17 + ; GFX10-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr18 + ; GFX10-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr19 + ; GFX10-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr20 + ; GFX10-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr21 + ; GFX10-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr22 + ; GFX10-NEXT: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr23 + ; GFX10-NEXT: [[MV8:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY16]](s32), [[COPY17]](s32) + ; GFX10-NEXT: [[MV9:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY18]](s32), [[COPY19]](s32) + ; GFX10-NEXT: [[MV10:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY20]](s32), [[COPY21]](s32) + ; GFX10-NEXT: [[MV11:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY22]](s32), [[COPY23]](s32) + ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV8]](s64), [[MV9]](s64), [[MV10]](s64), [[MV11]](s64) + ; GFX10-NEXT: [[FMA:%[0-9]+]]:_(<4 x s64>) = G_FMA [[BUILD_VECTOR]], [[BUILD_VECTOR1]], [[BUILD_VECTOR2]] + ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMA]](<4 x s64>) + ; GFX10-NEXT: $vgpr0 = COPY [[UV]](s32) + ; GFX10-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; GFX10-NEXT: $vgpr2 = COPY [[UV2]](s32) + ; GFX10-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX10-NEXT: $vgpr4 = COPY [[UV4]](s32) + ; GFX10-NEXT: $vgpr5 = COPY [[UV5]](s32) + ; GFX10-NEXT: $vgpr6 = COPY [[UV6]](s32) + ; GFX10-NEXT: $vgpr7 = COPY [[UV7]](s32) + ; GFX10-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 + ; + ; GFX10-DENORM-LABEL: name: test_4xdouble_add_mul_contract + ; GFX10-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23 + ; GFX10-DENORM-NEXT: {{ $}} + ; GFX10-DENORM-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX10-DENORM-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX10-DENORM-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX10-DENORM-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; GFX10-DENORM-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; GFX10-DENORM-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 + ; GFX10-DENORM-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 + ; GFX10-DENORM-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 + ; GFX10-DENORM-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) + ; GFX10-DENORM-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) + ; GFX10-DENORM-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) + ; GFX10-DENORM-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY6]](s32), [[COPY7]](s32) + ; GFX10-DENORM-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64), [[MV3]](s64) + ; GFX10-DENORM-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 + ; GFX10-DENORM-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 + ; GFX10-DENORM-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 + ; GFX10-DENORM-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 + ; GFX10-DENORM-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12 + ; GFX10-DENORM-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13 + ; GFX10-DENORM-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14 + ; GFX10-DENORM-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15 + ; GFX10-DENORM-NEXT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY8]](s32), [[COPY9]](s32) + ; GFX10-DENORM-NEXT: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY10]](s32), [[COPY11]](s32) + ; GFX10-DENORM-NEXT: [[MV6:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY12]](s32), [[COPY13]](s32) + ; GFX10-DENORM-NEXT: [[MV7:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY14]](s32), [[COPY15]](s32) + ; GFX10-DENORM-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV4]](s64), [[MV5]](s64), [[MV6]](s64), [[MV7]](s64) + ; GFX10-DENORM-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16 + ; GFX10-DENORM-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17 + ; GFX10-DENORM-NEXT: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr18 + ; GFX10-DENORM-NEXT: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr19 + ; GFX10-DENORM-NEXT: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr20 + ; GFX10-DENORM-NEXT: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr21 + ; GFX10-DENORM-NEXT: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr22 + ; GFX10-DENORM-NEXT: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr23 + ; GFX10-DENORM-NEXT: [[MV8:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY16]](s32), [[COPY17]](s32) + ; GFX10-DENORM-NEXT: [[MV9:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY18]](s32), [[COPY19]](s32) + ; GFX10-DENORM-NEXT: [[MV10:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY20]](s32), [[COPY21]](s32) + ; GFX10-DENORM-NEXT: [[MV11:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY22]](s32), [[COPY23]](s32) + ; GFX10-DENORM-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<4 x s64>) = G_BUILD_VECTOR [[MV8]](s64), [[MV9]](s64), [[MV10]](s64), [[MV11]](s64) + ; GFX10-DENORM-NEXT: [[FMA:%[0-9]+]]:_(<4 x s64>) = G_FMA [[BUILD_VECTOR]], [[BUILD_VECTOR1]], [[BUILD_VECTOR2]] + ; GFX10-DENORM-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMA]](<4 x s64>) + ; GFX10-DENORM-NEXT: $vgpr0 = COPY [[UV]](s32) + ; GFX10-DENORM-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; GFX10-DENORM-NEXT: $vgpr2 = COPY [[UV2]](s32) + ; GFX10-DENORM-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX10-DENORM-NEXT: $vgpr4 = COPY [[UV4]](s32) + ; GFX10-DENORM-NEXT: $vgpr5 = COPY [[UV5]](s32) + ; GFX10-DENORM-NEXT: $vgpr6 = COPY [[UV6]](s32) + ; GFX10-DENORM-NEXT: $vgpr7 = COPY [[UV7]](s32) + ; GFX10-DENORM-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 + %4:_(s32) = COPY $vgpr0 + %5:_(s32) = COPY $vgpr1 + %6:_(s32) = COPY $vgpr2 + %7:_(s32) = COPY $vgpr3 + %8:_(s32) = COPY $vgpr4 + %9:_(s32) = COPY $vgpr5 + %10:_(s32) = COPY $vgpr6 + %11:_(s32) = COPY $vgpr7 + %28:_(s64) = G_MERGE_VALUES %4(s32), %5(s32) + %29:_(s64) = G_MERGE_VALUES %6(s32), %7(s32) + %30:_(s64) = G_MERGE_VALUES %8(s32), %9(s32) + %31:_(s64) = G_MERGE_VALUES %10(s32), %11(s32) + %0:_(<4 x s64>) = G_BUILD_VECTOR %28(s64), %29(s64), %30(s64), %31(s64) + %12:_(s32) = COPY $vgpr8 + %13:_(s32) = COPY $vgpr9 + %14:_(s32) = COPY $vgpr10 + %15:_(s32) = COPY $vgpr11 + %16:_(s32) = COPY $vgpr12 + %17:_(s32) = COPY $vgpr13 + %18:_(s32) = COPY $vgpr14 + %19:_(s32) = COPY $vgpr15 + %32:_(s64) = G_MERGE_VALUES %12(s32), %13(s32) + %33:_(s64) = G_MERGE_VALUES %14(s32), %15(s32) + %34:_(s64) = G_MERGE_VALUES %16(s32), %17(s32) + %35:_(s64) = G_MERGE_VALUES %18(s32), %19(s32) + %1:_(<4 x s64>) = G_BUILD_VECTOR %32(s64), %33(s64), %34(s64), %35(s64) + %20:_(s32) = COPY $vgpr16 + %21:_(s32) = COPY $vgpr17 + %22:_(s32) = COPY $vgpr18 + %23:_(s32) = COPY $vgpr19 + %24:_(s32) = COPY $vgpr20 + %25:_(s32) = COPY $vgpr21 + %26:_(s32) = COPY $vgpr22 + %27:_(s32) = COPY $vgpr23 + %36:_(s64) = G_MERGE_VALUES %20(s32), %21(s32) + %37:_(s64) = G_MERGE_VALUES %22(s32), %23(s32) + %38:_(s64) = G_MERGE_VALUES %24(s32), %25(s32) + %39:_(s64) = G_MERGE_VALUES %26(s32), %27(s32) + %2:_(<4 x s64>) = G_BUILD_VECTOR %36(s64), %37(s64), %38(s64), %39(s64) + %40:_(<4 x s64>) = reassoc contract G_FMUL %0, %1 + %41:_(<4 x s64>) = reassoc contract G_FADD %40, %2 + %43:_(s32), %44:_(s32), %45:_(s32), %46:_(s32), %47:_(s32), %48:_(s32), %49:_(s32), %50:_(s32) = G_UNMERGE_VALUES %41(<4 x s64>) + $vgpr0 = COPY %43(s32) + $vgpr1 = COPY %44(s32) + $vgpr2 = COPY %45(s32) + $vgpr3 = COPY %46(s32) + $vgpr4 = COPY %47(s32) + $vgpr5 = COPY %48(s32) + $vgpr6 = COPY %49(s32) + $vgpr7 = COPY %50(s32) + S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5, implicit $vgpr6, implicit $vgpr7 +... + +--- name: test_3xdouble_add_mul_rhs body: | bb.1.entry: @@ -2198,49 +2475,6 @@ body: | ; GFX9-NEXT: $vgpr5 = COPY [[UV5]](s32) ; GFX9-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5 ; - ; GFX9-CONTRACT-LABEL: name: test_3xdouble_add_mul_rhs - ; GFX9-CONTRACT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17 - ; GFX9-CONTRACT-NEXT: {{ $}} - ; GFX9-CONTRACT-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-CONTRACT-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-CONTRACT-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-CONTRACT-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX9-CONTRACT-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX9-CONTRACT-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX9-CONTRACT-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; GFX9-CONTRACT-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) - ; GFX9-CONTRACT-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) - ; GFX9-CONTRACT-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64) - ; GFX9-CONTRACT-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; GFX9-CONTRACT-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; GFX9-CONTRACT-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; GFX9-CONTRACT-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 - ; GFX9-CONTRACT-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 - ; GFX9-CONTRACT-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 - ; GFX9-CONTRACT-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-CONTRACT-NEXT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY8]](s32), [[COPY9]](s32) - ; GFX9-CONTRACT-NEXT: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY10]](s32), [[COPY11]](s32) - ; GFX9-CONTRACT-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV3]](s64), [[MV4]](s64), [[MV5]](s64) - ; GFX9-CONTRACT-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12 - ; GFX9-CONTRACT-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13 - ; GFX9-CONTRACT-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14 - ; GFX9-CONTRACT-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15 - ; GFX9-CONTRACT-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16 - ; GFX9-CONTRACT-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17 - ; GFX9-CONTRACT-NEXT: [[MV6:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY12]](s32), [[COPY13]](s32) - ; GFX9-CONTRACT-NEXT: [[MV7:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY14]](s32), [[COPY15]](s32) - ; GFX9-CONTRACT-NEXT: [[MV8:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY16]](s32), [[COPY17]](s32) - ; GFX9-CONTRACT-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV6]](s64), [[MV7]](s64), [[MV8]](s64) - ; GFX9-CONTRACT-NEXT: [[FMA:%[0-9]+]]:_(<3 x s64>) = G_FMA [[BUILD_VECTOR]], [[BUILD_VECTOR1]], [[BUILD_VECTOR2]] - ; GFX9-CONTRACT-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMA]](<3 x s64>) - ; GFX9-CONTRACT-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX9-CONTRACT-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX9-CONTRACT-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX9-CONTRACT-NEXT: $vgpr3 = COPY [[UV3]](s32) - ; GFX9-CONTRACT-NEXT: $vgpr4 = COPY [[UV4]](s32) - ; GFX9-CONTRACT-NEXT: $vgpr5 = COPY [[UV5]](s32) - ; GFX9-CONTRACT-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5 - ; ; GFX9-DENORM-LABEL: name: test_3xdouble_add_mul_rhs ; GFX9-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17 ; GFX9-DENORM-NEXT: {{ $}} @@ -2285,49 +2519,6 @@ body: | ; GFX9-DENORM-NEXT: $vgpr5 = COPY [[UV5]](s32) ; GFX9-DENORM-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5 ; - ; GFX9-UNSAFE-LABEL: name: test_3xdouble_add_mul_rhs - ; GFX9-UNSAFE: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17 - ; GFX9-UNSAFE-NEXT: {{ $}} - ; GFX9-UNSAFE-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX9-UNSAFE-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX9-UNSAFE-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX9-UNSAFE-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX9-UNSAFE-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX9-UNSAFE-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX9-UNSAFE-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; GFX9-UNSAFE-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) - ; GFX9-UNSAFE-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) - ; GFX9-UNSAFE-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64) - ; GFX9-UNSAFE-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; GFX9-UNSAFE-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; GFX9-UNSAFE-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; GFX9-UNSAFE-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 - ; GFX9-UNSAFE-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 - ; GFX9-UNSAFE-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 - ; GFX9-UNSAFE-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY6]](s32), [[COPY7]](s32) - ; GFX9-UNSAFE-NEXT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY8]](s32), [[COPY9]](s32) - ; GFX9-UNSAFE-NEXT: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY10]](s32), [[COPY11]](s32) - ; GFX9-UNSAFE-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV3]](s64), [[MV4]](s64), [[MV5]](s64) - ; GFX9-UNSAFE-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12 - ; GFX9-UNSAFE-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13 - ; GFX9-UNSAFE-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14 - ; GFX9-UNSAFE-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15 - ; GFX9-UNSAFE-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16 - ; GFX9-UNSAFE-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17 - ; GFX9-UNSAFE-NEXT: [[MV6:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY12]](s32), [[COPY13]](s32) - ; GFX9-UNSAFE-NEXT: [[MV7:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY14]](s32), [[COPY15]](s32) - ; GFX9-UNSAFE-NEXT: [[MV8:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY16]](s32), [[COPY17]](s32) - ; GFX9-UNSAFE-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV6]](s64), [[MV7]](s64), [[MV8]](s64) - ; GFX9-UNSAFE-NEXT: [[FMA:%[0-9]+]]:_(<3 x s64>) = G_FMA [[BUILD_VECTOR]], [[BUILD_VECTOR1]], [[BUILD_VECTOR2]] - ; GFX9-UNSAFE-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMA]](<3 x s64>) - ; GFX9-UNSAFE-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX9-UNSAFE-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX9-UNSAFE-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX9-UNSAFE-NEXT: $vgpr3 = COPY [[UV3]](s32) - ; GFX9-UNSAFE-NEXT: $vgpr4 = COPY [[UV4]](s32) - ; GFX9-UNSAFE-NEXT: $vgpr5 = COPY [[UV5]](s32) - ; GFX9-UNSAFE-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5 - ; ; GFX10-LABEL: name: test_3xdouble_add_mul_rhs ; GFX10: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17 ; GFX10-NEXT: {{ $}} @@ -2372,49 +2563,6 @@ body: | ; GFX10-NEXT: $vgpr5 = COPY [[UV5]](s32) ; GFX10-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5 ; - ; GFX10-CONTRACT-LABEL: name: test_3xdouble_add_mul_rhs - ; GFX10-CONTRACT: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17 - ; GFX10-CONTRACT-NEXT: {{ $}} - ; GFX10-CONTRACT-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-CONTRACT-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-CONTRACT-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-CONTRACT-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX10-CONTRACT-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX10-CONTRACT-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX10-CONTRACT-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; GFX10-CONTRACT-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) - ; GFX10-CONTRACT-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) - ; GFX10-CONTRACT-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64) - ; GFX10-CONTRACT-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; GFX10-CONTRACT-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; GFX10-CONTRACT-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; GFX10-CONTRACT-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 - ; GFX10-CONTRACT-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 - ; GFX10-CONTRACT-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 - ; GFX10-CONTRACT-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY6]](s32), [[COPY7]](s32) - ; GFX10-CONTRACT-NEXT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY8]](s32), [[COPY9]](s32) - ; GFX10-CONTRACT-NEXT: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY10]](s32), [[COPY11]](s32) - ; GFX10-CONTRACT-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV3]](s64), [[MV4]](s64), [[MV5]](s64) - ; GFX10-CONTRACT-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12 - ; GFX10-CONTRACT-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13 - ; GFX10-CONTRACT-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14 - ; GFX10-CONTRACT-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15 - ; GFX10-CONTRACT-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16 - ; GFX10-CONTRACT-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17 - ; GFX10-CONTRACT-NEXT: [[MV6:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY12]](s32), [[COPY13]](s32) - ; GFX10-CONTRACT-NEXT: [[MV7:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY14]](s32), [[COPY15]](s32) - ; GFX10-CONTRACT-NEXT: [[MV8:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY16]](s32), [[COPY17]](s32) - ; GFX10-CONTRACT-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV6]](s64), [[MV7]](s64), [[MV8]](s64) - ; GFX10-CONTRACT-NEXT: [[FMA:%[0-9]+]]:_(<3 x s64>) = G_FMA [[BUILD_VECTOR]], [[BUILD_VECTOR1]], [[BUILD_VECTOR2]] - ; GFX10-CONTRACT-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMA]](<3 x s64>) - ; GFX10-CONTRACT-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10-CONTRACT-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10-CONTRACT-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX10-CONTRACT-NEXT: $vgpr3 = COPY [[UV3]](s32) - ; GFX10-CONTRACT-NEXT: $vgpr4 = COPY [[UV4]](s32) - ; GFX10-CONTRACT-NEXT: $vgpr5 = COPY [[UV5]](s32) - ; GFX10-CONTRACT-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5 - ; ; GFX10-DENORM-LABEL: name: test_3xdouble_add_mul_rhs ; GFX10-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17 ; GFX10-DENORM-NEXT: {{ $}} @@ -2458,49 +2606,6 @@ body: | ; GFX10-DENORM-NEXT: $vgpr4 = COPY [[UV4]](s32) ; GFX10-DENORM-NEXT: $vgpr5 = COPY [[UV5]](s32) ; GFX10-DENORM-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5 - ; - ; GFX10-UNSAFE-LABEL: name: test_3xdouble_add_mul_rhs - ; GFX10-UNSAFE: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17 - ; GFX10-UNSAFE-NEXT: {{ $}} - ; GFX10-UNSAFE-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 - ; GFX10-UNSAFE-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 - ; GFX10-UNSAFE-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 - ; GFX10-UNSAFE-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 - ; GFX10-UNSAFE-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 - ; GFX10-UNSAFE-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 - ; GFX10-UNSAFE-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) - ; GFX10-UNSAFE-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) - ; GFX10-UNSAFE-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) - ; GFX10-UNSAFE-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64) - ; GFX10-UNSAFE-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 - ; GFX10-UNSAFE-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 - ; GFX10-UNSAFE-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 - ; GFX10-UNSAFE-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 - ; GFX10-UNSAFE-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 - ; GFX10-UNSAFE-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 - ; GFX10-UNSAFE-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY6]](s32), [[COPY7]](s32) - ; GFX10-UNSAFE-NEXT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY8]](s32), [[COPY9]](s32) - ; GFX10-UNSAFE-NEXT: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY10]](s32), [[COPY11]](s32) - ; GFX10-UNSAFE-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV3]](s64), [[MV4]](s64), [[MV5]](s64) - ; GFX10-UNSAFE-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12 - ; GFX10-UNSAFE-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13 - ; GFX10-UNSAFE-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14 - ; GFX10-UNSAFE-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15 - ; GFX10-UNSAFE-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16 - ; GFX10-UNSAFE-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17 - ; GFX10-UNSAFE-NEXT: [[MV6:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY12]](s32), [[COPY13]](s32) - ; GFX10-UNSAFE-NEXT: [[MV7:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY14]](s32), [[COPY15]](s32) - ; GFX10-UNSAFE-NEXT: [[MV8:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY16]](s32), [[COPY17]](s32) - ; GFX10-UNSAFE-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV6]](s64), [[MV7]](s64), [[MV8]](s64) - ; GFX10-UNSAFE-NEXT: [[FMA:%[0-9]+]]:_(<3 x s64>) = G_FMA [[BUILD_VECTOR]], [[BUILD_VECTOR1]], [[BUILD_VECTOR2]] - ; GFX10-UNSAFE-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMA]](<3 x s64>) - ; GFX10-UNSAFE-NEXT: $vgpr0 = COPY [[UV]](s32) - ; GFX10-UNSAFE-NEXT: $vgpr1 = COPY [[UV1]](s32) - ; GFX10-UNSAFE-NEXT: $vgpr2 = COPY [[UV2]](s32) - ; GFX10-UNSAFE-NEXT: $vgpr3 = COPY [[UV3]](s32) - ; GFX10-UNSAFE-NEXT: $vgpr4 = COPY [[UV4]](s32) - ; GFX10-UNSAFE-NEXT: $vgpr5 = COPY [[UV5]](s32) - ; GFX10-UNSAFE-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5 %4:_(s32) = COPY $vgpr0 %5:_(s32) = COPY $vgpr1 %6:_(s32) = COPY $vgpr2 @@ -2542,3 +2647,222 @@ body: | $vgpr5 = COPY %39(s32) S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5 ... + +--- +name: test_3xdouble_add_mul_rhs_contract +body: | + bb.1.entry: + liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17 + + ; GFX9-LABEL: name: test_3xdouble_add_mul_rhs_contract + ; GFX9: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17 + ; GFX9-NEXT: {{ $}} + ; GFX9-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX9-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX9-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; GFX9-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; GFX9-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 + ; GFX9-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) + ; GFX9-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) + ; GFX9-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64) + ; GFX9-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 + ; GFX9-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 + ; GFX9-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 + ; GFX9-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 + ; GFX9-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 + ; GFX9-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 + ; GFX9-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY6]](s32), [[COPY7]](s32) + ; GFX9-NEXT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY8]](s32), [[COPY9]](s32) + ; GFX9-NEXT: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY10]](s32), [[COPY11]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV3]](s64), [[MV4]](s64), [[MV5]](s64) + ; GFX9-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12 + ; GFX9-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13 + ; GFX9-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14 + ; GFX9-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15 + ; GFX9-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16 + ; GFX9-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17 + ; GFX9-NEXT: [[MV6:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY12]](s32), [[COPY13]](s32) + ; GFX9-NEXT: [[MV7:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY14]](s32), [[COPY15]](s32) + ; GFX9-NEXT: [[MV8:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY16]](s32), [[COPY17]](s32) + ; GFX9-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV6]](s64), [[MV7]](s64), [[MV8]](s64) + ; GFX9-NEXT: [[FMA:%[0-9]+]]:_(<3 x s64>) = G_FMA [[BUILD_VECTOR]], [[BUILD_VECTOR1]], [[BUILD_VECTOR2]] + ; GFX9-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMA]](<3 x s64>) + ; GFX9-NEXT: $vgpr0 = COPY [[UV]](s32) + ; GFX9-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; GFX9-NEXT: $vgpr2 = COPY [[UV2]](s32) + ; GFX9-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX9-NEXT: $vgpr4 = COPY [[UV4]](s32) + ; GFX9-NEXT: $vgpr5 = COPY [[UV5]](s32) + ; GFX9-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5 + ; + ; GFX9-DENORM-LABEL: name: test_3xdouble_add_mul_rhs_contract + ; GFX9-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17 + ; GFX9-DENORM-NEXT: {{ $}} + ; GFX9-DENORM-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX9-DENORM-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX9-DENORM-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX9-DENORM-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; GFX9-DENORM-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; GFX9-DENORM-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 + ; GFX9-DENORM-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) + ; GFX9-DENORM-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) + ; GFX9-DENORM-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) + ; GFX9-DENORM-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64) + ; GFX9-DENORM-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 + ; GFX9-DENORM-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 + ; GFX9-DENORM-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 + ; GFX9-DENORM-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 + ; GFX9-DENORM-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 + ; GFX9-DENORM-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 + ; GFX9-DENORM-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY6]](s32), [[COPY7]](s32) + ; GFX9-DENORM-NEXT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY8]](s32), [[COPY9]](s32) + ; GFX9-DENORM-NEXT: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY10]](s32), [[COPY11]](s32) + ; GFX9-DENORM-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV3]](s64), [[MV4]](s64), [[MV5]](s64) + ; GFX9-DENORM-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12 + ; GFX9-DENORM-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13 + ; GFX9-DENORM-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14 + ; GFX9-DENORM-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15 + ; GFX9-DENORM-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16 + ; GFX9-DENORM-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17 + ; GFX9-DENORM-NEXT: [[MV6:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY12]](s32), [[COPY13]](s32) + ; GFX9-DENORM-NEXT: [[MV7:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY14]](s32), [[COPY15]](s32) + ; GFX9-DENORM-NEXT: [[MV8:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY16]](s32), [[COPY17]](s32) + ; GFX9-DENORM-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV6]](s64), [[MV7]](s64), [[MV8]](s64) + ; GFX9-DENORM-NEXT: [[FMA:%[0-9]+]]:_(<3 x s64>) = G_FMA [[BUILD_VECTOR]], [[BUILD_VECTOR1]], [[BUILD_VECTOR2]] + ; GFX9-DENORM-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMA]](<3 x s64>) + ; GFX9-DENORM-NEXT: $vgpr0 = COPY [[UV]](s32) + ; GFX9-DENORM-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; GFX9-DENORM-NEXT: $vgpr2 = COPY [[UV2]](s32) + ; GFX9-DENORM-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX9-DENORM-NEXT: $vgpr4 = COPY [[UV4]](s32) + ; GFX9-DENORM-NEXT: $vgpr5 = COPY [[UV5]](s32) + ; GFX9-DENORM-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5 + ; + ; GFX10-LABEL: name: test_3xdouble_add_mul_rhs_contract + ; GFX10: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17 + ; GFX10-NEXT: {{ $}} + ; GFX10-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX10-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX10-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX10-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; GFX10-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; GFX10-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 + ; GFX10-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) + ; GFX10-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) + ; GFX10-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) + ; GFX10-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64) + ; GFX10-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 + ; GFX10-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 + ; GFX10-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 + ; GFX10-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 + ; GFX10-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 + ; GFX10-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 + ; GFX10-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY6]](s32), [[COPY7]](s32) + ; GFX10-NEXT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY8]](s32), [[COPY9]](s32) + ; GFX10-NEXT: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY10]](s32), [[COPY11]](s32) + ; GFX10-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV3]](s64), [[MV4]](s64), [[MV5]](s64) + ; GFX10-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12 + ; GFX10-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13 + ; GFX10-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14 + ; GFX10-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15 + ; GFX10-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16 + ; GFX10-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17 + ; GFX10-NEXT: [[MV6:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY12]](s32), [[COPY13]](s32) + ; GFX10-NEXT: [[MV7:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY14]](s32), [[COPY15]](s32) + ; GFX10-NEXT: [[MV8:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY16]](s32), [[COPY17]](s32) + ; GFX10-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV6]](s64), [[MV7]](s64), [[MV8]](s64) + ; GFX10-NEXT: [[FMA:%[0-9]+]]:_(<3 x s64>) = G_FMA [[BUILD_VECTOR]], [[BUILD_VECTOR1]], [[BUILD_VECTOR2]] + ; GFX10-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMA]](<3 x s64>) + ; GFX10-NEXT: $vgpr0 = COPY [[UV]](s32) + ; GFX10-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; GFX10-NEXT: $vgpr2 = COPY [[UV2]](s32) + ; GFX10-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX10-NEXT: $vgpr4 = COPY [[UV4]](s32) + ; GFX10-NEXT: $vgpr5 = COPY [[UV5]](s32) + ; GFX10-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5 + ; + ; GFX10-DENORM-LABEL: name: test_3xdouble_add_mul_rhs_contract + ; GFX10-DENORM: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17 + ; GFX10-DENORM-NEXT: {{ $}} + ; GFX10-DENORM-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; GFX10-DENORM-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; GFX10-DENORM-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; GFX10-DENORM-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; GFX10-DENORM-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; GFX10-DENORM-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 + ; GFX10-DENORM-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) + ; GFX10-DENORM-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) + ; GFX10-DENORM-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) + ; GFX10-DENORM-NEXT: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV]](s64), [[MV1]](s64), [[MV2]](s64) + ; GFX10-DENORM-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 + ; GFX10-DENORM-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 + ; GFX10-DENORM-NEXT: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 + ; GFX10-DENORM-NEXT: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 + ; GFX10-DENORM-NEXT: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 + ; GFX10-DENORM-NEXT: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 + ; GFX10-DENORM-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY6]](s32), [[COPY7]](s32) + ; GFX10-DENORM-NEXT: [[MV4:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY8]](s32), [[COPY9]](s32) + ; GFX10-DENORM-NEXT: [[MV5:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY10]](s32), [[COPY11]](s32) + ; GFX10-DENORM-NEXT: [[BUILD_VECTOR1:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV3]](s64), [[MV4]](s64), [[MV5]](s64) + ; GFX10-DENORM-NEXT: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12 + ; GFX10-DENORM-NEXT: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13 + ; GFX10-DENORM-NEXT: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14 + ; GFX10-DENORM-NEXT: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15 + ; GFX10-DENORM-NEXT: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16 + ; GFX10-DENORM-NEXT: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17 + ; GFX10-DENORM-NEXT: [[MV6:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY12]](s32), [[COPY13]](s32) + ; GFX10-DENORM-NEXT: [[MV7:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY14]](s32), [[COPY15]](s32) + ; GFX10-DENORM-NEXT: [[MV8:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY16]](s32), [[COPY17]](s32) + ; GFX10-DENORM-NEXT: [[BUILD_VECTOR2:%[0-9]+]]:_(<3 x s64>) = G_BUILD_VECTOR [[MV6]](s64), [[MV7]](s64), [[MV8]](s64) + ; GFX10-DENORM-NEXT: [[FMA:%[0-9]+]]:_(<3 x s64>) = G_FMA [[BUILD_VECTOR]], [[BUILD_VECTOR1]], [[BUILD_VECTOR2]] + ; GFX10-DENORM-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[FMA]](<3 x s64>) + ; GFX10-DENORM-NEXT: $vgpr0 = COPY [[UV]](s32) + ; GFX10-DENORM-NEXT: $vgpr1 = COPY [[UV1]](s32) + ; GFX10-DENORM-NEXT: $vgpr2 = COPY [[UV2]](s32) + ; GFX10-DENORM-NEXT: $vgpr3 = COPY [[UV3]](s32) + ; GFX10-DENORM-NEXT: $vgpr4 = COPY [[UV4]](s32) + ; GFX10-DENORM-NEXT: $vgpr5 = COPY [[UV5]](s32) + ; GFX10-DENORM-NEXT: S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5 + %4:_(s32) = COPY $vgpr0 + %5:_(s32) = COPY $vgpr1 + %6:_(s32) = COPY $vgpr2 + %7:_(s32) = COPY $vgpr3 + %8:_(s32) = COPY $vgpr4 + %9:_(s32) = COPY $vgpr5 + %22:_(s64) = G_MERGE_VALUES %4(s32), %5(s32) + %23:_(s64) = G_MERGE_VALUES %6(s32), %7(s32) + %24:_(s64) = G_MERGE_VALUES %8(s32), %9(s32) + %0:_(<3 x s64>) = G_BUILD_VECTOR %22(s64), %23(s64), %24(s64) + %10:_(s32) = COPY $vgpr6 + %11:_(s32) = COPY $vgpr7 + %12:_(s32) = COPY $vgpr8 + %13:_(s32) = COPY $vgpr9 + %14:_(s32) = COPY $vgpr10 + %15:_(s32) = COPY $vgpr11 + %25:_(s64) = G_MERGE_VALUES %10(s32), %11(s32) + %26:_(s64) = G_MERGE_VALUES %12(s32), %13(s32) + %27:_(s64) = G_MERGE_VALUES %14(s32), %15(s32) + %1:_(<3 x s64>) = G_BUILD_VECTOR %25(s64), %26(s64), %27(s64) + %16:_(s32) = COPY $vgpr12 + %17:_(s32) = COPY $vgpr13 + %18:_(s32) = COPY $vgpr14 + %19:_(s32) = COPY $vgpr15 + %20:_(s32) = COPY $vgpr16 + %21:_(s32) = COPY $vgpr17 + %28:_(s64) = G_MERGE_VALUES %16(s32), %17(s32) + %29:_(s64) = G_MERGE_VALUES %18(s32), %19(s32) + %30:_(s64) = G_MERGE_VALUES %20(s32), %21(s32) + %2:_(<3 x s64>) = G_BUILD_VECTOR %28(s64), %29(s64), %30(s64) + %31:_(<3 x s64>) = reassoc contract G_FMUL %0, %1 + %32:_(<3 x s64>) = reassoc contract G_FADD %2, %31 + %34:_(s32), %35:_(s32), %36:_(s32), %37:_(s32), %38:_(s32), %39:_(s32) = G_UNMERGE_VALUES %32(<3 x s64>) + $vgpr0 = COPY %34(s32) + $vgpr1 = COPY %35(s32) + $vgpr2 = COPY %36(s32) + $vgpr3 = COPY %37(s32) + $vgpr4 = COPY %38(s32) + $vgpr5 = COPY %39(s32) + S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0, implicit $vgpr1, implicit $vgpr2, implicit $vgpr3, implicit $vgpr4, implicit $vgpr5 +... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fma-add-mul.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fma-add-mul.ll index 24dd535..3f6e3d8 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fma-add-mul.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fma-add-mul.ll @@ -2,11 +2,9 @@ ; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefix=GFX9 %s ; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx900 -fp-contract=fast < %s | FileCheck -check-prefix=GFX9-CONTRACT %s ; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx900 --denormal-fp-math=preserve-sign < %s | FileCheck -check-prefix=GFX9-DENORM %s -; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx900 -enable-unsafe-fp-math < %s | FileCheck -check-prefix=GFX9-UNSAFE %s ; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1010 < %s | FileCheck -check-prefix=GFX10 %s ; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1010 -fp-contract=fast < %s | FileCheck -check-prefix=GFX10-CONTRACT %s ; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1010 --denormal-fp-math=preserve-sign < %s | FileCheck -check-prefix=GFX10-DENORM %s -; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1010 -enable-unsafe-fp-math < %s | FileCheck -check-prefix=GFX10-UNSAFE %s define float @test_f32_add_mul(float %x, float %y, float %z) { ; GFX9-LABEL: test_f32_add_mul: @@ -28,12 +26,6 @@ define float @test_f32_add_mul(float %x, float %y, float %z) { ; GFX9-DENORM-NEXT: v_mad_f32 v0, v0, v1, v2 ; GFX9-DENORM-NEXT: s_setpc_b64 s[30:31] ; -; GFX9-UNSAFE-LABEL: test_f32_add_mul: -; GFX9-UNSAFE: ; %bb.0: ; %.entry -; GFX9-UNSAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-UNSAFE-NEXT: v_fma_f32 v0, v0, v1, v2 -; GFX9-UNSAFE-NEXT: s_setpc_b64 s[30:31] -; ; GFX10-LABEL: test_f32_add_mul: ; GFX10: ; %bb.0: ; %.entry ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -52,7 +44,6 @@ define float @test_f32_add_mul(float %x, float %y, float %z) { ; GFX10-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-DENORM-NEXT: v_mad_f32 v0, v0, v1, v2 ; GFX10-DENORM-NEXT: s_setpc_b64 s[30:31] -; ; GFX10-UNSAFE-LABEL: test_f32_add_mul: ; GFX10-UNSAFE: ; %bb.0: ; %.entry ; GFX10-UNSAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -64,6 +55,58 @@ define float @test_f32_add_mul(float %x, float %y, float %z) { ret float %b } +define float @test_f32_add_mul_contract(float %x, float %y, float %z) { +; GFX9-LABEL: test_f32_add_mul_contract: +; GFX9: ; %bb.0: ; %.entry +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_fma_f32 v0, v0, v1, v2 +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-CONTRACT-LABEL: test_f32_add_mul_contract: +; GFX9-CONTRACT: ; %bb.0: ; %.entry +; GFX9-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-CONTRACT-NEXT: v_fma_f32 v0, v0, v1, v2 +; GFX9-CONTRACT-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-DENORM-LABEL: test_f32_add_mul_contract: +; GFX9-DENORM: ; %bb.0: ; %.entry +; GFX9-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-DENORM-NEXT: v_mad_f32 v0, v0, v1, v2 +; GFX9-DENORM-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-LABEL: test_f32_add_mul_contract: +; GFX10: ; %bb.0: ; %.entry +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: v_fma_f32 v0, v0, v1, v2 +; GFX10-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-CONTRACT-LABEL: test_f32_add_mul_contract: +; GFX10-CONTRACT: ; %bb.0: ; %.entry +; GFX10-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-CONTRACT-NEXT: v_fma_f32 v0, v0, v1, v2 +; GFX10-CONTRACT-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-DENORM-LABEL: test_f32_add_mul_contract: +; GFX10-DENORM: ; %bb.0: ; %.entry +; GFX10-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-DENORM-NEXT: v_fma_f32 v0, v0, v1, v2 +; GFX10-DENORM-NEXT: s_setpc_b64 s[30:31] +; GFX9-UNSAFE-LABEL: test_f32_add_mul_contract: +; GFX9-UNSAFE: ; %bb.0: ; %.entry +; GFX9-UNSAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-UNSAFE-NEXT: v_fma_f32 v0, v0, v1, v2 +; GFX9-UNSAFE-NEXT: s_setpc_b64 s[30:31] +; GFX10-UNSAFE-LABEL: test_f32_add_mul_contract: +; GFX10-UNSAFE: ; %bb.0: ; %.entry +; GFX10-UNSAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-UNSAFE-NEXT: v_fma_f32 v0, v0, v1, v2 +; GFX10-UNSAFE-NEXT: s_setpc_b64 s[30:31] +.entry: + %a = fmul contract float %x, %y + %b = fadd contract float %a, %z + ret float %b +} + define float @test_f32_add_mul_rhs(float %x, float %y, float %z) { ; GFX9-LABEL: test_f32_add_mul_rhs: ; GFX9: ; %bb.0: ; %.entry @@ -84,12 +127,6 @@ define float @test_f32_add_mul_rhs(float %x, float %y, float %z) { ; GFX9-DENORM-NEXT: v_mad_f32 v0, v0, v1, v2 ; GFX9-DENORM-NEXT: s_setpc_b64 s[30:31] ; -; GFX9-UNSAFE-LABEL: test_f32_add_mul_rhs: -; GFX9-UNSAFE: ; %bb.0: ; %.entry -; GFX9-UNSAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-UNSAFE-NEXT: v_fma_f32 v0, v0, v1, v2 -; GFX9-UNSAFE-NEXT: s_setpc_b64 s[30:31] -; ; GFX10-LABEL: test_f32_add_mul_rhs: ; GFX10: ; %bb.0: ; %.entry ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -108,7 +145,6 @@ define float @test_f32_add_mul_rhs(float %x, float %y, float %z) { ; GFX10-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-DENORM-NEXT: v_mad_f32 v0, v0, v1, v2 ; GFX10-DENORM-NEXT: s_setpc_b64 s[30:31] -; ; GFX10-UNSAFE-LABEL: test_f32_add_mul_rhs: ; GFX10-UNSAFE: ; %bb.0: ; %.entry ; GFX10-UNSAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -120,6 +156,58 @@ define float @test_f32_add_mul_rhs(float %x, float %y, float %z) { ret float %b } +define float @test_f32_add_mul_rhs_contract(float %x, float %y, float %z) { +; GFX9-LABEL: test_f32_add_mul_rhs_contract: +; GFX9: ; %bb.0: ; %.entry +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_fma_f32 v0, v0, v1, v2 +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-CONTRACT-LABEL: test_f32_add_mul_rhs_contract: +; GFX9-CONTRACT: ; %bb.0: ; %.entry +; GFX9-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-CONTRACT-NEXT: v_fma_f32 v0, v0, v1, v2 +; GFX9-CONTRACT-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-DENORM-LABEL: test_f32_add_mul_rhs_contract: +; GFX9-DENORM: ; %bb.0: ; %.entry +; GFX9-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-DENORM-NEXT: v_mad_f32 v0, v0, v1, v2 +; GFX9-DENORM-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-LABEL: test_f32_add_mul_rhs_contract: +; GFX10: ; %bb.0: ; %.entry +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: v_fma_f32 v0, v0, v1, v2 +; GFX10-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-CONTRACT-LABEL: test_f32_add_mul_rhs_contract: +; GFX10-CONTRACT: ; %bb.0: ; %.entry +; GFX10-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-CONTRACT-NEXT: v_fma_f32 v0, v0, v1, v2 +; GFX10-CONTRACT-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-DENORM-LABEL: test_f32_add_mul_rhs_contract: +; GFX10-DENORM: ; %bb.0: ; %.entry +; GFX10-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-DENORM-NEXT: v_fma_f32 v0, v0, v1, v2 +; GFX10-DENORM-NEXT: s_setpc_b64 s[30:31] +; GFX9-UNSAFE-LABEL: test_f32_add_mul_rhs_contract: +; GFX9-UNSAFE: ; %bb.0: ; %.entry +; GFX9-UNSAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-UNSAFE-NEXT: v_fma_f32 v0, v0, v1, v2 +; GFX9-UNSAFE-NEXT: s_setpc_b64 s[30:31] +; GFX10-UNSAFE-LABEL: test_f32_add_mul_rhs_contract: +; GFX10-UNSAFE: ; %bb.0: ; %.entry +; GFX10-UNSAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-UNSAFE-NEXT: v_fma_f32 v0, v0, v1, v2 +; GFX10-UNSAFE-NEXT: s_setpc_b64 s[30:31] +.entry: + %a = fmul contract float %x, %y + %b = fadd contract float %z, %a + ret float %b +} + define float @test_add_mul_multiple_defs_z(float %x, float %y, ptr addrspace(1) %vec_ptr) { ; GFX9-LABEL: test_add_mul_multiple_defs_z: ; GFX9: ; %bb.0: ; %.entry @@ -147,14 +235,6 @@ define float @test_add_mul_multiple_defs_z(float %x, float %y, ptr addrspace(1) ; GFX9-DENORM-NEXT: v_mov_b32_e32 v0, v2 ; GFX9-DENORM-NEXT: s_setpc_b64 s[30:31] ; -; GFX9-UNSAFE-LABEL: test_add_mul_multiple_defs_z: -; GFX9-UNSAFE: ; %bb.0: ; %.entry -; GFX9-UNSAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-UNSAFE-NEXT: global_load_dword v2, v[2:3], off offset:4 -; GFX9-UNSAFE-NEXT: s_waitcnt vmcnt(0) -; GFX9-UNSAFE-NEXT: v_fma_f32 v0, v0, v1, v2 -; GFX9-UNSAFE-NEXT: s_setpc_b64 s[30:31] -; ; GFX10-LABEL: test_add_mul_multiple_defs_z: ; GFX10: ; %bb.0: ; %.entry ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -181,7 +261,6 @@ define float @test_add_mul_multiple_defs_z(float %x, float %y, ptr addrspace(1) ; GFX10-DENORM-NEXT: v_mac_f32_e32 v2, v0, v1 ; GFX10-DENORM-NEXT: v_mov_b32_e32 v0, v2 ; GFX10-DENORM-NEXT: s_setpc_b64 s[30:31] -; ; GFX10-UNSAFE-LABEL: test_add_mul_multiple_defs_z: ; GFX10-UNSAFE: ; %bb.0: ; %.entry ; GFX10-UNSAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -198,17 +277,16 @@ define float @test_add_mul_multiple_defs_z(float %x, float %y, ptr addrspace(1) ret float %b } -define float @test_add_mul_rhs_multiple_defs_z(float %x, float %y, ptr addrspace(1) %vec_ptr) { -; GFX9-LABEL: test_add_mul_rhs_multiple_defs_z: +define float @test_add_mul_multiple_defs_z_contract(float %x, float %y, ptr addrspace(1) %vec_ptr) { +; GFX9-LABEL: test_add_mul_multiple_defs_z_contract: ; GFX9: ; %bb.0: ; %.entry ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX9-NEXT: global_load_dword v2, v[2:3], off offset:4 -; GFX9-NEXT: v_mul_f32_e32 v0, v0, v1 ; GFX9-NEXT: s_waitcnt vmcnt(0) -; GFX9-NEXT: v_add_f32_e32 v0, v2, v0 +; GFX9-NEXT: v_fma_f32 v0, v0, v1, v2 ; GFX9-NEXT: s_setpc_b64 s[30:31] ; -; GFX9-CONTRACT-LABEL: test_add_mul_rhs_multiple_defs_z: +; GFX9-CONTRACT-LABEL: test_add_mul_multiple_defs_z_contract: ; GFX9-CONTRACT: ; %bb.0: ; %.entry ; GFX9-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX9-CONTRACT-NEXT: global_load_dword v2, v[2:3], off offset:4 @@ -216,7 +294,7 @@ define float @test_add_mul_rhs_multiple_defs_z(float %x, float %y, ptr addrspace ; GFX9-CONTRACT-NEXT: v_fma_f32 v0, v0, v1, v2 ; GFX9-CONTRACT-NEXT: s_setpc_b64 s[30:31] ; -; GFX9-DENORM-LABEL: test_add_mul_rhs_multiple_defs_z: +; GFX9-DENORM-LABEL: test_add_mul_multiple_defs_z_contract: ; GFX9-DENORM: ; %bb.0: ; %.entry ; GFX9-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX9-DENORM-NEXT: global_load_dword v2, v[2:3], off offset:4 @@ -225,13 +303,81 @@ define float @test_add_mul_rhs_multiple_defs_z(float %x, float %y, ptr addrspace ; GFX9-DENORM-NEXT: v_mov_b32_e32 v0, v2 ; GFX9-DENORM-NEXT: s_setpc_b64 s[30:31] ; -; GFX9-UNSAFE-LABEL: test_add_mul_rhs_multiple_defs_z: +; GFX10-LABEL: test_add_mul_multiple_defs_z_contract: +; GFX10: ; %bb.0: ; %.entry +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: global_load_dword v2, v[2:3], off offset:4 +; GFX10-NEXT: s_waitcnt vmcnt(0) +; GFX10-NEXT: v_fmac_f32_e32 v2, v0, v1 +; GFX10-NEXT: v_mov_b32_e32 v0, v2 +; GFX10-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-CONTRACT-LABEL: test_add_mul_multiple_defs_z_contract: +; GFX10-CONTRACT: ; %bb.0: ; %.entry +; GFX10-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-CONTRACT-NEXT: global_load_dword v2, v[2:3], off offset:4 +; GFX10-CONTRACT-NEXT: s_waitcnt vmcnt(0) +; GFX10-CONTRACT-NEXT: v_fmac_f32_e32 v2, v0, v1 +; GFX10-CONTRACT-NEXT: v_mov_b32_e32 v0, v2 +; GFX10-CONTRACT-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-DENORM-LABEL: test_add_mul_multiple_defs_z_contract: +; GFX10-DENORM: ; %bb.0: ; %.entry +; GFX10-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-DENORM-NEXT: global_load_dword v2, v[2:3], off offset:4 +; GFX10-DENORM-NEXT: s_waitcnt vmcnt(0) +; GFX10-DENORM-NEXT: v_fmac_f32_e32 v2, v0, v1 +; GFX10-DENORM-NEXT: v_mov_b32_e32 v0, v2 +; GFX10-DENORM-NEXT: s_setpc_b64 s[30:31] +; GFX9-UNSAFE-LABEL: test_add_mul_multiple_defs_z_contract: ; GFX9-UNSAFE: ; %bb.0: ; %.entry ; GFX9-UNSAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX9-UNSAFE-NEXT: global_load_dword v2, v[2:3], off offset:4 ; GFX9-UNSAFE-NEXT: s_waitcnt vmcnt(0) ; GFX9-UNSAFE-NEXT: v_fma_f32 v0, v0, v1, v2 ; GFX9-UNSAFE-NEXT: s_setpc_b64 s[30:31] +; GFX10-UNSAFE-LABEL: test_add_mul_multiple_defs_z_contract: +; GFX10-UNSAFE: ; %bb.0: ; %.entry +; GFX10-UNSAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-UNSAFE-NEXT: global_load_dword v2, v[2:3], off offset:4 +; GFX10-UNSAFE-NEXT: s_waitcnt vmcnt(0) +; GFX10-UNSAFE-NEXT: v_fmac_f32_e32 v2, v0, v1 +; GFX10-UNSAFE-NEXT: v_mov_b32_e32 v0, v2 +; GFX10-UNSAFE-NEXT: s_setpc_b64 s[30:31] +.entry: + %a = fmul contract float %x, %y + %vec = load <2 x float>, ptr addrspace(1) %vec_ptr + %z = extractelement <2 x float> %vec, i64 1 + %b = fadd contract float %a, %z + ret float %b +} + +define float @test_add_mul_rhs_multiple_defs_z(float %x, float %y, ptr addrspace(1) %vec_ptr) { +; GFX9-LABEL: test_add_mul_rhs_multiple_defs_z: +; GFX9: ; %bb.0: ; %.entry +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: global_load_dword v2, v[2:3], off offset:4 +; GFX9-NEXT: v_mul_f32_e32 v0, v0, v1 +; GFX9-NEXT: s_waitcnt vmcnt(0) +; GFX9-NEXT: v_add_f32_e32 v0, v2, v0 +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-CONTRACT-LABEL: test_add_mul_rhs_multiple_defs_z: +; GFX9-CONTRACT: ; %bb.0: ; %.entry +; GFX9-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-CONTRACT-NEXT: global_load_dword v2, v[2:3], off offset:4 +; GFX9-CONTRACT-NEXT: s_waitcnt vmcnt(0) +; GFX9-CONTRACT-NEXT: v_fma_f32 v0, v0, v1, v2 +; GFX9-CONTRACT-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-DENORM-LABEL: test_add_mul_rhs_multiple_defs_z: +; GFX9-DENORM: ; %bb.0: ; %.entry +; GFX9-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-DENORM-NEXT: global_load_dword v2, v[2:3], off offset:4 +; GFX9-DENORM-NEXT: s_waitcnt vmcnt(0) +; GFX9-DENORM-NEXT: v_mac_f32_e32 v2, v0, v1 +; GFX9-DENORM-NEXT: v_mov_b32_e32 v0, v2 +; GFX9-DENORM-NEXT: s_setpc_b64 s[30:31] ; ; GFX10-LABEL: test_add_mul_rhs_multiple_defs_z: ; GFX10: ; %bb.0: ; %.entry @@ -259,7 +405,6 @@ define float @test_add_mul_rhs_multiple_defs_z(float %x, float %y, ptr addrspace ; GFX10-DENORM-NEXT: v_mac_f32_e32 v2, v0, v1 ; GFX10-DENORM-NEXT: v_mov_b32_e32 v0, v2 ; GFX10-DENORM-NEXT: s_setpc_b64 s[30:31] -; ; GFX10-UNSAFE-LABEL: test_add_mul_rhs_multiple_defs_z: ; GFX10-UNSAFE: ; %bb.0: ; %.entry ; GFX10-UNSAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -296,12 +441,6 @@ define half @test_half_add_mul(half %x, half %y, half %z) { ; GFX9-DENORM-NEXT: v_mad_legacy_f16 v0, v0, v1, v2 ; GFX9-DENORM-NEXT: s_setpc_b64 s[30:31] ; -; GFX9-UNSAFE-LABEL: test_half_add_mul: -; GFX9-UNSAFE: ; %bb.0: ; %.entry -; GFX9-UNSAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-UNSAFE-NEXT: v_fma_f16 v0, v0, v1, v2 -; GFX9-UNSAFE-NEXT: s_setpc_b64 s[30:31] -; ; GFX10-LABEL: test_half_add_mul: ; GFX10: ; %bb.0: ; %.entry ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -321,7 +460,6 @@ define half @test_half_add_mul(half %x, half %y, half %z) { ; GFX10-DENORM-NEXT: v_mul_f16_e32 v0, v0, v1 ; GFX10-DENORM-NEXT: v_add_f16_e32 v0, v0, v2 ; GFX10-DENORM-NEXT: s_setpc_b64 s[30:31] -; ; GFX10-UNSAFE-LABEL: test_half_add_mul: ; GFX10-UNSAFE: ; %bb.0: ; %.entry ; GFX10-UNSAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -333,6 +471,59 @@ define half @test_half_add_mul(half %x, half %y, half %z) { ret half %b } +define half @test_half_add_mul_contract(half %x, half %y, half %z) { +; GFX9-LABEL: test_half_add_mul_contract: +; GFX9: ; %bb.0: ; %.entry +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_fma_f16 v0, v0, v1, v2 +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-CONTRACT-LABEL: test_half_add_mul_contract: +; GFX9-CONTRACT: ; %bb.0: ; %.entry +; GFX9-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-CONTRACT-NEXT: v_fma_f16 v0, v0, v1, v2 +; GFX9-CONTRACT-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-DENORM-LABEL: test_half_add_mul_contract: +; GFX9-DENORM: ; %bb.0: ; %.entry +; GFX9-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-DENORM-NEXT: v_mad_legacy_f16 v0, v0, v1, v2 +; GFX9-DENORM-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-LABEL: test_half_add_mul_contract: +; GFX10: ; %bb.0: ; %.entry +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: v_fma_f16 v0, v0, v1, v2 +; GFX10-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-CONTRACT-LABEL: test_half_add_mul_contract: +; GFX10-CONTRACT: ; %bb.0: ; %.entry +; GFX10-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-CONTRACT-NEXT: v_fma_f16 v0, v0, v1, v2 +; GFX10-CONTRACT-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-DENORM-LABEL: test_half_add_mul_contract: +; GFX10-DENORM: ; %bb.0: ; %.entry +; GFX10-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-DENORM-NEXT: v_mul_f16_e32 v0, v0, v1 +; GFX10-DENORM-NEXT: v_add_f16_e32 v0, v0, v2 +; GFX10-DENORM-NEXT: s_setpc_b64 s[30:31] +; GFX9-UNSAFE-LABEL: test_half_add_mul_contract: +; GFX9-UNSAFE: ; %bb.0: ; %.entry +; GFX9-UNSAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-UNSAFE-NEXT: v_fma_f16 v0, v0, v1, v2 +; GFX9-UNSAFE-NEXT: s_setpc_b64 s[30:31] +; GFX10-UNSAFE-LABEL: test_half_add_mul_contract: +; GFX10-UNSAFE: ; %bb.0: ; %.entry +; GFX10-UNSAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-UNSAFE-NEXT: v_fma_f16 v0, v0, v1, v2 +; GFX10-UNSAFE-NEXT: s_setpc_b64 s[30:31] +.entry: + %a = fmul contract half %x, %y + %b = fadd contract half %a, %z + ret half %b +} + define half @test_half_add_mul_rhs(half %x, half %y, half %z) { ; GFX9-LABEL: test_half_add_mul_rhs: ; GFX9: ; %bb.0: ; %.entry @@ -353,12 +544,6 @@ define half @test_half_add_mul_rhs(half %x, half %y, half %z) { ; GFX9-DENORM-NEXT: v_mad_legacy_f16 v0, v0, v1, v2 ; GFX9-DENORM-NEXT: s_setpc_b64 s[30:31] ; -; GFX9-UNSAFE-LABEL: test_half_add_mul_rhs: -; GFX9-UNSAFE: ; %bb.0: ; %.entry -; GFX9-UNSAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-UNSAFE-NEXT: v_fma_f16 v0, v0, v1, v2 -; GFX9-UNSAFE-NEXT: s_setpc_b64 s[30:31] -; ; GFX10-LABEL: test_half_add_mul_rhs: ; GFX10: ; %bb.0: ; %.entry ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -378,7 +563,6 @@ define half @test_half_add_mul_rhs(half %x, half %y, half %z) { ; GFX10-DENORM-NEXT: v_mul_f16_e32 v0, v0, v1 ; GFX10-DENORM-NEXT: v_add_f16_e32 v0, v2, v0 ; GFX10-DENORM-NEXT: s_setpc_b64 s[30:31] -; ; GFX10-UNSAFE-LABEL: test_half_add_mul_rhs: ; GFX10-UNSAFE: ; %bb.0: ; %.entry ; GFX10-UNSAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -390,6 +574,59 @@ define half @test_half_add_mul_rhs(half %x, half %y, half %z) { ret half %b } +define half @test_half_add_mul_rhs_contract(half %x, half %y, half %z) { +; GFX9-LABEL: test_half_add_mul_rhs_contract: +; GFX9: ; %bb.0: ; %.entry +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_fma_f16 v0, v0, v1, v2 +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-CONTRACT-LABEL: test_half_add_mul_rhs_contract: +; GFX9-CONTRACT: ; %bb.0: ; %.entry +; GFX9-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-CONTRACT-NEXT: v_fma_f16 v0, v0, v1, v2 +; GFX9-CONTRACT-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-DENORM-LABEL: test_half_add_mul_rhs_contract: +; GFX9-DENORM: ; %bb.0: ; %.entry +; GFX9-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-DENORM-NEXT: v_mad_legacy_f16 v0, v0, v1, v2 +; GFX9-DENORM-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-LABEL: test_half_add_mul_rhs_contract: +; GFX10: ; %bb.0: ; %.entry +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: v_fma_f16 v0, v0, v1, v2 +; GFX10-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-CONTRACT-LABEL: test_half_add_mul_rhs_contract: +; GFX10-CONTRACT: ; %bb.0: ; %.entry +; GFX10-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-CONTRACT-NEXT: v_fma_f16 v0, v0, v1, v2 +; GFX10-CONTRACT-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-DENORM-LABEL: test_half_add_mul_rhs_contract: +; GFX10-DENORM: ; %bb.0: ; %.entry +; GFX10-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-DENORM-NEXT: v_mul_f16_e32 v0, v0, v1 +; GFX10-DENORM-NEXT: v_add_f16_e32 v0, v2, v0 +; GFX10-DENORM-NEXT: s_setpc_b64 s[30:31] +; GFX9-UNSAFE-LABEL: test_half_add_mul_rhs_contract: +; GFX9-UNSAFE: ; %bb.0: ; %.entry +; GFX9-UNSAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-UNSAFE-NEXT: v_fma_f16 v0, v0, v1, v2 +; GFX9-UNSAFE-NEXT: s_setpc_b64 s[30:31] +; GFX10-UNSAFE-LABEL: test_half_add_mul_rhs_contract: +; GFX10-UNSAFE: ; %bb.0: ; %.entry +; GFX10-UNSAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-UNSAFE-NEXT: v_fma_f16 v0, v0, v1, v2 +; GFX10-UNSAFE-NEXT: s_setpc_b64 s[30:31] +.entry: + %a = fmul contract half %x, %y + %b = fadd contract half %z, %a + ret half %b +} + define double @test_double_add_mul(double %x, double %y, double %z) { ; GFX9-LABEL: test_double_add_mul: ; GFX9: ; %bb.0: ; %.entry @@ -411,12 +648,6 @@ define double @test_double_add_mul(double %x, double %y, double %z) { ; GFX9-DENORM-NEXT: v_add_f64 v[0:1], v[0:1], v[4:5] ; GFX9-DENORM-NEXT: s_setpc_b64 s[30:31] ; -; GFX9-UNSAFE-LABEL: test_double_add_mul: -; GFX9-UNSAFE: ; %bb.0: ; %.entry -; GFX9-UNSAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-UNSAFE-NEXT: v_fma_f64 v[0:1], v[0:1], v[2:3], v[4:5] -; GFX9-UNSAFE-NEXT: s_setpc_b64 s[30:31] -; ; GFX10-LABEL: test_double_add_mul: ; GFX10: ; %bb.0: ; %.entry ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -436,15 +667,61 @@ define double @test_double_add_mul(double %x, double %y, double %z) { ; GFX10-DENORM-NEXT: v_mul_f64 v[0:1], v[0:1], v[2:3] ; GFX10-DENORM-NEXT: v_add_f64 v[0:1], v[0:1], v[4:5] ; GFX10-DENORM-NEXT: s_setpc_b64 s[30:31] +.entry: + %a = fmul double %x, %y + %b = fadd double %a, %z + ret double %b +} + +define double @test_double_add_mul_contract(double %x, double %y, double %z) { +; GFX9-LABEL: test_double_add_mul_contract: +; GFX9: ; %bb.0: ; %.entry +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_fma_f64 v[0:1], v[0:1], v[2:3], v[4:5] +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-CONTRACT-LABEL: test_double_add_mul_contract: +; GFX9-CONTRACT: ; %bb.0: ; %.entry +; GFX9-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-CONTRACT-NEXT: v_fma_f64 v[0:1], v[0:1], v[2:3], v[4:5] +; GFX9-CONTRACT-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-DENORM-LABEL: test_double_add_mul_contract: +; GFX9-DENORM: ; %bb.0: ; %.entry +; GFX9-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-DENORM-NEXT: v_fma_f64 v[0:1], v[0:1], v[2:3], v[4:5] +; GFX9-DENORM-NEXT: s_setpc_b64 s[30:31] ; +; GFX10-LABEL: test_double_add_mul_contract: +; GFX10: ; %bb.0: ; %.entry +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: v_fma_f64 v[0:1], v[0:1], v[2:3], v[4:5] +; GFX10-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-CONTRACT-LABEL: test_double_add_mul_contract: +; GFX10-CONTRACT: ; %bb.0: ; %.entry +; GFX10-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-CONTRACT-NEXT: v_fma_f64 v[0:1], v[0:1], v[2:3], v[4:5] +; GFX10-CONTRACT-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-DENORM-LABEL: test_double_add_mul_contract: +; GFX10-DENORM: ; %bb.0: ; %.entry +; GFX10-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-DENORM-NEXT: v_fma_f64 v[0:1], v[0:1], v[2:3], v[4:5] +; GFX10-DENORM-NEXT: s_setpc_b64 s[30:31] +; GFX9-UNSAFE-LABEL: test_double_add_mul_contract: +; GFX9-UNSAFE: ; %bb.0: ; %.entry +; GFX9-UNSAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-UNSAFE-NEXT: v_fma_f64 v[0:1], v[0:1], v[2:3], v[4:5] +; GFX9-UNSAFE-NEXT: s_setpc_b64 s[30:31] ; GFX10-UNSAFE-LABEL: test_double_add_mul: ; GFX10-UNSAFE: ; %bb.0: ; %.entry ; GFX10-UNSAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-UNSAFE-NEXT: v_fma_f64 v[0:1], v[0:1], v[2:3], v[4:5] ; GFX10-UNSAFE-NEXT: s_setpc_b64 s[30:31] .entry: - %a = fmul double %x, %y - %b = fadd double %a, %z + %a = fmul contract double %x, %y + %b = fadd contract double %a, %z ret double %b } @@ -469,12 +746,6 @@ define double @test_double_add_mul_rhs(double %x, double %y, double %z) { ; GFX9-DENORM-NEXT: v_add_f64 v[0:1], v[4:5], v[0:1] ; GFX9-DENORM-NEXT: s_setpc_b64 s[30:31] ; -; GFX9-UNSAFE-LABEL: test_double_add_mul_rhs: -; GFX9-UNSAFE: ; %bb.0: ; %.entry -; GFX9-UNSAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-UNSAFE-NEXT: v_fma_f64 v[0:1], v[0:1], v[2:3], v[4:5] -; GFX9-UNSAFE-NEXT: s_setpc_b64 s[30:31] -; ; GFX10-LABEL: test_double_add_mul_rhs: ; GFX10: ; %bb.0: ; %.entry ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -494,15 +765,61 @@ define double @test_double_add_mul_rhs(double %x, double %y, double %z) { ; GFX10-DENORM-NEXT: v_mul_f64 v[0:1], v[0:1], v[2:3] ; GFX10-DENORM-NEXT: v_add_f64 v[0:1], v[4:5], v[0:1] ; GFX10-DENORM-NEXT: s_setpc_b64 s[30:31] +.entry: + %a = fmul double %x, %y + %b = fadd double %z, %a + ret double %b +} + +define double @test_double_add_mul_rhs_contract(double %x, double %y, double %z) { +; GFX9-LABEL: test_double_add_mul_rhs_contract: +; GFX9: ; %bb.0: ; %.entry +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_fma_f64 v[0:1], v[0:1], v[2:3], v[4:5] +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-CONTRACT-LABEL: test_double_add_mul_rhs_contract: +; GFX9-CONTRACT: ; %bb.0: ; %.entry +; GFX9-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-CONTRACT-NEXT: v_fma_f64 v[0:1], v[0:1], v[2:3], v[4:5] +; GFX9-CONTRACT-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-DENORM-LABEL: test_double_add_mul_rhs_contract: +; GFX9-DENORM: ; %bb.0: ; %.entry +; GFX9-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-DENORM-NEXT: v_fma_f64 v[0:1], v[0:1], v[2:3], v[4:5] +; GFX9-DENORM-NEXT: s_setpc_b64 s[30:31] ; -; GFX10-UNSAFE-LABEL: test_double_add_mul_rhs: +; GFX10-LABEL: test_double_add_mul_rhs_contract: +; GFX10: ; %bb.0: ; %.entry +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: v_fma_f64 v[0:1], v[0:1], v[2:3], v[4:5] +; GFX10-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-CONTRACT-LABEL: test_double_add_mul_rhs_contract: +; GFX10-CONTRACT: ; %bb.0: ; %.entry +; GFX10-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-CONTRACT-NEXT: v_fma_f64 v[0:1], v[0:1], v[2:3], v[4:5] +; GFX10-CONTRACT-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-DENORM-LABEL: test_double_add_mul_rhs_contract: +; GFX10-DENORM: ; %bb.0: ; %.entry +; GFX10-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-DENORM-NEXT: v_fma_f64 v[0:1], v[0:1], v[2:3], v[4:5] +; GFX10-DENORM-NEXT: s_setpc_b64 s[30:31] +; GFX9-UNSAFE-LABEL: test_double_add_mul_rhs_contract: +; GFX9-UNSAFE: ; %bb.0: ; %.entry +; GFX9-UNSAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-UNSAFE-NEXT: v_fma_f64 v[0:1], v[0:1], v[2:3], v[4:5] +; GFX9-UNSAFE-NEXT: s_setpc_b64 s[30:31] +; GFX10-UNSAFE-LABEL: test_double_add_mul_rhs_contract: ; GFX10-UNSAFE: ; %bb.0: ; %.entry ; GFX10-UNSAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-UNSAFE-NEXT: v_fma_f64 v[0:1], v[0:1], v[2:3], v[4:5] ; GFX10-UNSAFE-NEXT: s_setpc_b64 s[30:31] .entry: - %a = fmul double %x, %y - %b = fadd double %z, %a + %a = fmul contract double %x, %y + %b = fadd contract double %z, %a ret double %b } @@ -538,15 +855,6 @@ define <4 x float> @test_4xfloat_add_mul(<4 x float> %x, <4 x float> %y, <4 x fl ; GFX9-DENORM-NEXT: v_mad_f32 v3, v3, v7, v11 ; GFX9-DENORM-NEXT: s_setpc_b64 s[30:31] ; -; GFX9-UNSAFE-LABEL: test_4xfloat_add_mul: -; GFX9-UNSAFE: ; %bb.0: ; %.entry -; GFX9-UNSAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-UNSAFE-NEXT: v_fma_f32 v0, v0, v4, v8 -; GFX9-UNSAFE-NEXT: v_fma_f32 v1, v1, v5, v9 -; GFX9-UNSAFE-NEXT: v_fma_f32 v2, v2, v6, v10 -; GFX9-UNSAFE-NEXT: v_fma_f32 v3, v3, v7, v11 -; GFX9-UNSAFE-NEXT: s_setpc_b64 s[30:31] -; ; GFX10-LABEL: test_4xfloat_add_mul: ; GFX10: ; %bb.0: ; %.entry ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -577,8 +885,75 @@ define <4 x float> @test_4xfloat_add_mul(<4 x float> %x, <4 x float> %y, <4 x fl ; GFX10-DENORM-NEXT: v_mad_f32 v2, v2, v6, v10 ; GFX10-DENORM-NEXT: v_mad_f32 v3, v3, v7, v11 ; GFX10-DENORM-NEXT: s_setpc_b64 s[30:31] +.entry: + %a = fmul <4 x float> %x, %y + %b = fadd <4 x float> %a, %z + ret <4 x float> %b +} + +define <4 x float> @test_4xfloat_add_mul_contract(<4 x float> %x, <4 x float> %y, <4 x float> %z) { +; GFX9-LABEL: test_4xfloat_add_mul_contract: +; GFX9: ; %bb.0: ; %.entry +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_fma_f32 v0, v0, v4, v8 +; GFX9-NEXT: v_fma_f32 v1, v1, v5, v9 +; GFX9-NEXT: v_fma_f32 v2, v2, v6, v10 +; GFX9-NEXT: v_fma_f32 v3, v3, v7, v11 +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-CONTRACT-LABEL: test_4xfloat_add_mul_contract: +; GFX9-CONTRACT: ; %bb.0: ; %.entry +; GFX9-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-CONTRACT-NEXT: v_fma_f32 v0, v0, v4, v8 +; GFX9-CONTRACT-NEXT: v_fma_f32 v1, v1, v5, v9 +; GFX9-CONTRACT-NEXT: v_fma_f32 v2, v2, v6, v10 +; GFX9-CONTRACT-NEXT: v_fma_f32 v3, v3, v7, v11 +; GFX9-CONTRACT-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-DENORM-LABEL: test_4xfloat_add_mul_contract: +; GFX9-DENORM: ; %bb.0: ; %.entry +; GFX9-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-DENORM-NEXT: v_mad_f32 v0, v0, v4, v8 +; GFX9-DENORM-NEXT: v_mad_f32 v1, v1, v5, v9 +; GFX9-DENORM-NEXT: v_mad_f32 v2, v2, v6, v10 +; GFX9-DENORM-NEXT: v_mad_f32 v3, v3, v7, v11 +; GFX9-DENORM-NEXT: s_setpc_b64 s[30:31] ; -; GFX10-UNSAFE-LABEL: test_4xfloat_add_mul: +; GFX10-LABEL: test_4xfloat_add_mul_contract: +; GFX10: ; %bb.0: ; %.entry +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: v_fma_f32 v0, v0, v4, v8 +; GFX10-NEXT: v_fma_f32 v1, v1, v5, v9 +; GFX10-NEXT: v_fma_f32 v2, v2, v6, v10 +; GFX10-NEXT: v_fma_f32 v3, v3, v7, v11 +; GFX10-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-CONTRACT-LABEL: test_4xfloat_add_mul_contract: +; GFX10-CONTRACT: ; %bb.0: ; %.entry +; GFX10-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-CONTRACT-NEXT: v_fma_f32 v0, v0, v4, v8 +; GFX10-CONTRACT-NEXT: v_fma_f32 v1, v1, v5, v9 +; GFX10-CONTRACT-NEXT: v_fma_f32 v2, v2, v6, v10 +; GFX10-CONTRACT-NEXT: v_fma_f32 v3, v3, v7, v11 +; GFX10-CONTRACT-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-DENORM-LABEL: test_4xfloat_add_mul_contract: +; GFX10-DENORM: ; %bb.0: ; %.entry +; GFX10-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-DENORM-NEXT: v_fma_f32 v0, v0, v4, v8 +; GFX10-DENORM-NEXT: v_fma_f32 v1, v1, v5, v9 +; GFX10-DENORM-NEXT: v_fma_f32 v2, v2, v6, v10 +; GFX10-DENORM-NEXT: v_fma_f32 v3, v3, v7, v11 +; GFX10-DENORM-NEXT: s_setpc_b64 s[30:31] +; GFX9-UNSAFE-LABEL: test_4xfloat_add_mul_contract: +; GFX9-UNSAFE: ; %bb.0: ; %.entry +; GFX9-UNSAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-UNSAFE-NEXT: v_fma_f32 v0, v0, v4, v8 +; GFX9-UNSAFE-NEXT: v_fma_f32 v1, v1, v5, v9 +; GFX9-UNSAFE-NEXT: v_fma_f32 v2, v2, v6, v10 +; GFX9-UNSAFE-NEXT: v_fma_f32 v3, v3, v7, v11 +; GFX9-UNSAFE-NEXT: s_setpc_b64 s[30:31] +; GFX10-UNSAFE-LABEL: test_4xfloat_add_mul_contract: ; GFX10-UNSAFE: ; %bb.0: ; %.entry ; GFX10-UNSAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-UNSAFE-NEXT: v_fma_f32 v0, v0, v4, v8 @@ -587,8 +962,8 @@ define <4 x float> @test_4xfloat_add_mul(<4 x float> %x, <4 x float> %y, <4 x fl ; GFX10-UNSAFE-NEXT: v_fma_f32 v3, v3, v7, v11 ; GFX10-UNSAFE-NEXT: s_setpc_b64 s[30:31] .entry: - %a = fmul <4 x float> %x, %y - %b = fadd <4 x float> %a, %z + %a = fmul contract <4 x float> %x, %y + %b = fadd contract <4 x float> %a, %z ret <4 x float> %b } @@ -620,14 +995,6 @@ define <3 x float> @test_3xfloat_add_mul_rhs(<3 x float> %x, <3 x float> %y, <3 ; GFX9-DENORM-NEXT: v_mad_f32 v2, v2, v5, v8 ; GFX9-DENORM-NEXT: s_setpc_b64 s[30:31] ; -; GFX9-UNSAFE-LABEL: test_3xfloat_add_mul_rhs: -; GFX9-UNSAFE: ; %bb.0: ; %.entry -; GFX9-UNSAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-UNSAFE-NEXT: v_fma_f32 v0, v0, v3, v6 -; GFX9-UNSAFE-NEXT: v_fma_f32 v1, v1, v4, v7 -; GFX9-UNSAFE-NEXT: v_fma_f32 v2, v2, v5, v8 -; GFX9-UNSAFE-NEXT: s_setpc_b64 s[30:31] -; ; GFX10-LABEL: test_3xfloat_add_mul_rhs: ; GFX10: ; %bb.0: ; %.entry ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -654,8 +1021,68 @@ define <3 x float> @test_3xfloat_add_mul_rhs(<3 x float> %x, <3 x float> %y, <3 ; GFX10-DENORM-NEXT: v_mad_f32 v1, v1, v4, v7 ; GFX10-DENORM-NEXT: v_mad_f32 v2, v2, v5, v8 ; GFX10-DENORM-NEXT: s_setpc_b64 s[30:31] +.entry: + %a = fmul <3 x float> %x, %y + %b = fadd <3 x float> %z, %a + ret <3 x float> %b +} + +define <3 x float> @test_3xfloat_add_mul_rhs_contract(<3 x float> %x, <3 x float> %y, <3 x float> %z) { +; GFX9-LABEL: test_3xfloat_add_mul_rhs_contract: +; GFX9: ; %bb.0: ; %.entry +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_fma_f32 v0, v0, v3, v6 +; GFX9-NEXT: v_fma_f32 v1, v1, v4, v7 +; GFX9-NEXT: v_fma_f32 v2, v2, v5, v8 +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-CONTRACT-LABEL: test_3xfloat_add_mul_rhs_contract: +; GFX9-CONTRACT: ; %bb.0: ; %.entry +; GFX9-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-CONTRACT-NEXT: v_fma_f32 v0, v0, v3, v6 +; GFX9-CONTRACT-NEXT: v_fma_f32 v1, v1, v4, v7 +; GFX9-CONTRACT-NEXT: v_fma_f32 v2, v2, v5, v8 +; GFX9-CONTRACT-NEXT: s_setpc_b64 s[30:31] ; -; GFX10-UNSAFE-LABEL: test_3xfloat_add_mul_rhs: +; GFX9-DENORM-LABEL: test_3xfloat_add_mul_rhs_contract: +; GFX9-DENORM: ; %bb.0: ; %.entry +; GFX9-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-DENORM-NEXT: v_mad_f32 v0, v0, v3, v6 +; GFX9-DENORM-NEXT: v_mad_f32 v1, v1, v4, v7 +; GFX9-DENORM-NEXT: v_mad_f32 v2, v2, v5, v8 +; GFX9-DENORM-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-LABEL: test_3xfloat_add_mul_rhs_contract: +; GFX10: ; %bb.0: ; %.entry +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: v_fma_f32 v0, v0, v3, v6 +; GFX10-NEXT: v_fma_f32 v1, v1, v4, v7 +; GFX10-NEXT: v_fma_f32 v2, v2, v5, v8 +; GFX10-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-CONTRACT-LABEL: test_3xfloat_add_mul_rhs_contract: +; GFX10-CONTRACT: ; %bb.0: ; %.entry +; GFX10-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-CONTRACT-NEXT: v_fma_f32 v0, v0, v3, v6 +; GFX10-CONTRACT-NEXT: v_fma_f32 v1, v1, v4, v7 +; GFX10-CONTRACT-NEXT: v_fma_f32 v2, v2, v5, v8 +; GFX10-CONTRACT-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-DENORM-LABEL: test_3xfloat_add_mul_rhs_contract: +; GFX10-DENORM: ; %bb.0: ; %.entry +; GFX10-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-DENORM-NEXT: v_fma_f32 v0, v0, v3, v6 +; GFX10-DENORM-NEXT: v_fma_f32 v1, v1, v4, v7 +; GFX10-DENORM-NEXT: v_fma_f32 v2, v2, v5, v8 +; GFX10-DENORM-NEXT: s_setpc_b64 s[30:31] +; GFX9-UNSAFE-LABEL: test_3xfloat_add_mul_rhs_contract: +; GFX9-UNSAFE: ; %bb.0: ; %.entry +; GFX9-UNSAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-UNSAFE-NEXT: v_fma_f32 v0, v0, v3, v6 +; GFX9-UNSAFE-NEXT: v_fma_f32 v1, v1, v4, v7 +; GFX9-UNSAFE-NEXT: v_fma_f32 v2, v2, v5, v8 +; GFX9-UNSAFE-NEXT: s_setpc_b64 s[30:31] +; GFX10-UNSAFE-LABEL: test_3xfloat_add_mul_rhs_contract: ; GFX10-UNSAFE: ; %bb.0: ; %.entry ; GFX10-UNSAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-UNSAFE-NEXT: v_fma_f32 v0, v0, v3, v6 @@ -663,8 +1090,8 @@ define <3 x float> @test_3xfloat_add_mul_rhs(<3 x float> %x, <3 x float> %y, <3 ; GFX10-UNSAFE-NEXT: v_fma_f32 v2, v2, v5, v8 ; GFX10-UNSAFE-NEXT: s_setpc_b64 s[30:31] .entry: - %a = fmul <3 x float> %x, %y - %b = fadd <3 x float> %z, %a + %a = fmul contract <3 x float> %x, %y + %b = fadd contract <3 x float> %z, %a ret <3 x float> %b } @@ -694,13 +1121,6 @@ define <4 x half> @test_4xhalf_add_mul(<4 x half> %x, <4 x half> %y, <4 x half> ; GFX9-DENORM-NEXT: v_pk_add_f16 v1, v1, v5 ; GFX9-DENORM-NEXT: s_setpc_b64 s[30:31] ; -; GFX9-UNSAFE-LABEL: test_4xhalf_add_mul: -; GFX9-UNSAFE: ; %bb.0: ; %.entry -; GFX9-UNSAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-UNSAFE-NEXT: v_pk_fma_f16 v0, v0, v2, v4 -; GFX9-UNSAFE-NEXT: v_pk_fma_f16 v1, v1, v3, v5 -; GFX9-UNSAFE-NEXT: s_setpc_b64 s[30:31] -; ; GFX10-LABEL: test_4xhalf_add_mul: ; GFX10: ; %bb.0: ; %.entry ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -725,7 +1145,6 @@ define <4 x half> @test_4xhalf_add_mul(<4 x half> %x, <4 x half> %y, <4 x half> ; GFX10-DENORM-NEXT: v_pk_add_f16 v0, v0, v4 ; GFX10-DENORM-NEXT: v_pk_add_f16 v1, v1, v5 ; GFX10-DENORM-NEXT: s_setpc_b64 s[30:31] -; ; GFX10-UNSAFE-LABEL: test_4xhalf_add_mul: ; GFX10-UNSAFE: ; %bb.0: ; %.entry ; GFX10-UNSAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -738,6 +1157,70 @@ define <4 x half> @test_4xhalf_add_mul(<4 x half> %x, <4 x half> %y, <4 x half> ret <4 x half> %b } +define <4 x half> @test_4xhalf_add_mul_contract(<4 x half> %x, <4 x half> %y, <4 x half> %z) { +; GFX9-LABEL: test_4xhalf_add_mul_contract: +; GFX9: ; %bb.0: ; %.entry +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_pk_fma_f16 v0, v0, v2, v4 +; GFX9-NEXT: v_pk_fma_f16 v1, v1, v3, v5 +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-CONTRACT-LABEL: test_4xhalf_add_mul_contract: +; GFX9-CONTRACT: ; %bb.0: ; %.entry +; GFX9-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-CONTRACT-NEXT: v_pk_fma_f16 v0, v0, v2, v4 +; GFX9-CONTRACT-NEXT: v_pk_fma_f16 v1, v1, v3, v5 +; GFX9-CONTRACT-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-DENORM-LABEL: test_4xhalf_add_mul_contract: +; GFX9-DENORM: ; %bb.0: ; %.entry +; GFX9-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-DENORM-NEXT: v_pk_mul_f16 v0, v0, v2 +; GFX9-DENORM-NEXT: v_pk_mul_f16 v1, v1, v3 +; GFX9-DENORM-NEXT: v_pk_add_f16 v0, v0, v4 +; GFX9-DENORM-NEXT: v_pk_add_f16 v1, v1, v5 +; GFX9-DENORM-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-LABEL: test_4xhalf_add_mul_contract: +; GFX10: ; %bb.0: ; %.entry +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: v_pk_fma_f16 v0, v0, v2, v4 +; GFX10-NEXT: v_pk_fma_f16 v1, v1, v3, v5 +; GFX10-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-CONTRACT-LABEL: test_4xhalf_add_mul_contract: +; GFX10-CONTRACT: ; %bb.0: ; %.entry +; GFX10-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-CONTRACT-NEXT: v_pk_fma_f16 v0, v0, v2, v4 +; GFX10-CONTRACT-NEXT: v_pk_fma_f16 v1, v1, v3, v5 +; GFX10-CONTRACT-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-DENORM-LABEL: test_4xhalf_add_mul_contract: +; GFX10-DENORM: ; %bb.0: ; %.entry +; GFX10-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-DENORM-NEXT: v_pk_mul_f16 v0, v0, v2 +; GFX10-DENORM-NEXT: v_pk_mul_f16 v1, v1, v3 +; GFX10-DENORM-NEXT: v_pk_add_f16 v0, v0, v4 +; GFX10-DENORM-NEXT: v_pk_add_f16 v1, v1, v5 +; GFX10-DENORM-NEXT: s_setpc_b64 s[30:31] +; GFX9-UNSAFE-LABEL: test_4xhalf_add_mul_contract: +; GFX9-UNSAFE: ; %bb.0: ; %.entry +; GFX9-UNSAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-UNSAFE-NEXT: v_pk_fma_f16 v0, v0, v2, v4 +; GFX9-UNSAFE-NEXT: v_pk_fma_f16 v1, v1, v3, v5 +; GFX9-UNSAFE-NEXT: s_setpc_b64 s[30:31] +; GFX10-UNSAFE-LABEL: test_4xhalf_add_mul_contract: +; GFX10-UNSAFE: ; %bb.0: ; %.entry +; GFX10-UNSAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-UNSAFE-NEXT: v_pk_fma_f16 v0, v0, v2, v4 +; GFX10-UNSAFE-NEXT: v_pk_fma_f16 v1, v1, v3, v5 +; GFX10-UNSAFE-NEXT: s_setpc_b64 s[30:31] +.entry: + %a = fmul contract <4 x half> %x, %y + %b = fadd contract <4 x half> %a, %z + ret <4 x half> %b +} + define <3 x half> @test_3xhalf_add_mul_rhs(<3 x half> %x, <3 x half> %y, <3 x half> %z) { ; GFX9-LABEL: test_3xhalf_add_mul_rhs: ; GFX9: ; %bb.0: ; %.entry @@ -764,13 +1247,6 @@ define <3 x half> @test_3xhalf_add_mul_rhs(<3 x half> %x, <3 x half> %y, <3 x ha ; GFX9-DENORM-NEXT: v_pk_add_f16 v1, v5, v1 ; GFX9-DENORM-NEXT: s_setpc_b64 s[30:31] ; -; GFX9-UNSAFE-LABEL: test_3xhalf_add_mul_rhs: -; GFX9-UNSAFE: ; %bb.0: ; %.entry -; GFX9-UNSAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-UNSAFE-NEXT: v_pk_fma_f16 v0, v0, v2, v4 -; GFX9-UNSAFE-NEXT: v_pk_fma_f16 v1, v1, v3, v5 -; GFX9-UNSAFE-NEXT: s_setpc_b64 s[30:31] -; ; GFX10-LABEL: test_3xhalf_add_mul_rhs: ; GFX10: ; %bb.0: ; %.entry ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -795,16 +1271,73 @@ define <3 x half> @test_3xhalf_add_mul_rhs(<3 x half> %x, <3 x half> %y, <3 x ha ; GFX10-DENORM-NEXT: v_pk_add_f16 v0, v4, v0 ; GFX10-DENORM-NEXT: v_pk_add_f16 v1, v5, v1 ; GFX10-DENORM-NEXT: s_setpc_b64 s[30:31] +.entry: + %a = fmul <3 x half> %x, %y + %b = fadd <3 x half> %z, %a + ret <3 x half> %b +} + +define <3 x half> @test_3xhalf_add_mul_rhs_contract(<3 x half> %x, <3 x half> %y, <3 x half> %z) { +; GFX9-LABEL: test_3xhalf_add_mul_rhs_contract: +; GFX9: ; %bb.0: ; %.entry +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_pk_fma_f16 v0, v0, v2, v4 +; GFX9-NEXT: v_pk_fma_f16 v1, v1, v3, v5 +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-CONTRACT-LABEL: test_3xhalf_add_mul_rhs_contract: +; GFX9-CONTRACT: ; %bb.0: ; %.entry +; GFX9-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-CONTRACT-NEXT: v_pk_fma_f16 v0, v0, v2, v4 +; GFX9-CONTRACT-NEXT: v_pk_fma_f16 v1, v1, v3, v5 +; GFX9-CONTRACT-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-DENORM-LABEL: test_3xhalf_add_mul_rhs_contract: +; GFX9-DENORM: ; %bb.0: ; %.entry +; GFX9-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-DENORM-NEXT: v_pk_mul_f16 v0, v0, v2 +; GFX9-DENORM-NEXT: v_pk_mul_f16 v1, v1, v3 +; GFX9-DENORM-NEXT: v_pk_add_f16 v0, v4, v0 +; GFX9-DENORM-NEXT: v_pk_add_f16 v1, v5, v1 +; GFX9-DENORM-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-LABEL: test_3xhalf_add_mul_rhs_contract: +; GFX10: ; %bb.0: ; %.entry +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: v_pk_fma_f16 v0, v0, v2, v4 +; GFX10-NEXT: v_pk_fma_f16 v1, v1, v3, v5 +; GFX10-NEXT: s_setpc_b64 s[30:31] ; -; GFX10-UNSAFE-LABEL: test_3xhalf_add_mul_rhs: +; GFX10-CONTRACT-LABEL: test_3xhalf_add_mul_rhs_contract: +; GFX10-CONTRACT: ; %bb.0: ; %.entry +; GFX10-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-CONTRACT-NEXT: v_pk_fma_f16 v0, v0, v2, v4 +; GFX10-CONTRACT-NEXT: v_pk_fma_f16 v1, v1, v3, v5 +; GFX10-CONTRACT-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-DENORM-LABEL: test_3xhalf_add_mul_rhs_contract: +; GFX10-DENORM: ; %bb.0: ; %.entry +; GFX10-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-DENORM-NEXT: v_pk_mul_f16 v0, v0, v2 +; GFX10-DENORM-NEXT: v_pk_mul_f16 v1, v1, v3 +; GFX10-DENORM-NEXT: v_pk_add_f16 v0, v4, v0 +; GFX10-DENORM-NEXT: v_pk_add_f16 v1, v5, v1 +; GFX10-DENORM-NEXT: s_setpc_b64 s[30:31] +; GFX9-UNSAFE-LABEL: test_3xhalf_add_mul_rhs_contract: +; GFX9-UNSAFE: ; %bb.0: ; %.entry +; GFX9-UNSAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-UNSAFE-NEXT: v_pk_fma_f16 v0, v0, v2, v4 +; GFX9-UNSAFE-NEXT: v_pk_fma_f16 v1, v1, v3, v5 +; GFX9-UNSAFE-NEXT: s_setpc_b64 s[30:31] +; GFX10-UNSAFE-LABEL: test_3xhalf_add_mul_rhs_contract: ; GFX10-UNSAFE: ; %bb.0: ; %.entry ; GFX10-UNSAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX10-UNSAFE-NEXT: v_pk_fma_f16 v0, v0, v2, v4 ; GFX10-UNSAFE-NEXT: v_pk_fma_f16 v1, v1, v3, v5 ; GFX10-UNSAFE-NEXT: s_setpc_b64 s[30:31] .entry: - %a = fmul <3 x half> %x, %y - %b = fadd <3 x half> %z, %a + %a = fmul contract <3 x half> %x, %y + %b = fadd contract <3 x half> %z, %a ret <3 x half> %b } @@ -844,15 +1377,6 @@ define <4 x double> @test_4xdouble_add_mul(<4 x double> %x, <4 x double> %y, <4 ; GFX9-DENORM-NEXT: v_add_f64 v[6:7], v[6:7], v[22:23] ; GFX9-DENORM-NEXT: s_setpc_b64 s[30:31] ; -; GFX9-UNSAFE-LABEL: test_4xdouble_add_mul: -; GFX9-UNSAFE: ; %bb.0: ; %.entry -; GFX9-UNSAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-UNSAFE-NEXT: v_fma_f64 v[0:1], v[0:1], v[8:9], v[16:17] -; GFX9-UNSAFE-NEXT: v_fma_f64 v[2:3], v[2:3], v[10:11], v[18:19] -; GFX9-UNSAFE-NEXT: v_fma_f64 v[4:5], v[4:5], v[12:13], v[20:21] -; GFX9-UNSAFE-NEXT: v_fma_f64 v[6:7], v[6:7], v[14:15], v[22:23] -; GFX9-UNSAFE-NEXT: s_setpc_b64 s[30:31] -; ; GFX10-LABEL: test_4xdouble_add_mul: ; GFX10: ; %bb.0: ; %.entry ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -887,7 +1411,14 @@ define <4 x double> @test_4xdouble_add_mul(<4 x double> %x, <4 x double> %y, <4 ; GFX10-DENORM-NEXT: v_add_f64 v[4:5], v[4:5], v[20:21] ; GFX10-DENORM-NEXT: v_add_f64 v[6:7], v[6:7], v[22:23] ; GFX10-DENORM-NEXT: s_setpc_b64 s[30:31] -; +; GFX9-UNSAFE-LABEL: test_4xdouble_add_mul: +; GFX9-UNSAFE: ; %bb.0: ; %.entry +; GFX9-UNSAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-UNSAFE-NEXT: v_fma_f64 v[0:1], v[0:1], v[8:9], v[16:17] +; GFX9-UNSAFE-NEXT: v_fma_f64 v[2:3], v[2:3], v[10:11], v[18:19] +; GFX9-UNSAFE-NEXT: v_fma_f64 v[4:5], v[4:5], v[12:13], v[20:21] +; GFX9-UNSAFE-NEXT: v_fma_f64 v[6:7], v[6:7], v[14:15], v[22:23] +; GFX9-UNSAFE-NEXT: s_setpc_b64 s[30:31] ; GFX10-UNSAFE-LABEL: test_4xdouble_add_mul: ; GFX10-UNSAFE: ; %bb.0: ; %.entry ; GFX10-UNSAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -902,6 +1433,66 @@ define <4 x double> @test_4xdouble_add_mul(<4 x double> %x, <4 x double> %y, <4 ret <4 x double> %b } +define <4 x double> @test_4xdouble_add_mul_contract(<4 x double> %x, <4 x double> %y, <4 x double> %z) { +; GFX9-LABEL: test_4xdouble_add_mul_contract: +; GFX9: ; %bb.0: ; %.entry +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_fma_f64 v[0:1], v[0:1], v[8:9], v[16:17] +; GFX9-NEXT: v_fma_f64 v[2:3], v[2:3], v[10:11], v[18:19] +; GFX9-NEXT: v_fma_f64 v[4:5], v[4:5], v[12:13], v[20:21] +; GFX9-NEXT: v_fma_f64 v[6:7], v[6:7], v[14:15], v[22:23] +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-CONTRACT-LABEL: test_4xdouble_add_mul_contract: +; GFX9-CONTRACT: ; %bb.0: ; %.entry +; GFX9-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-CONTRACT-NEXT: v_fma_f64 v[0:1], v[0:1], v[8:9], v[16:17] +; GFX9-CONTRACT-NEXT: v_fma_f64 v[2:3], v[2:3], v[10:11], v[18:19] +; GFX9-CONTRACT-NEXT: v_fma_f64 v[4:5], v[4:5], v[12:13], v[20:21] +; GFX9-CONTRACT-NEXT: v_fma_f64 v[6:7], v[6:7], v[14:15], v[22:23] +; GFX9-CONTRACT-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-DENORM-LABEL: test_4xdouble_add_mul_contract: +; GFX9-DENORM: ; %bb.0: ; %.entry +; GFX9-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-DENORM-NEXT: v_fma_f64 v[0:1], v[0:1], v[8:9], v[16:17] +; GFX9-DENORM-NEXT: v_fma_f64 v[2:3], v[2:3], v[10:11], v[18:19] +; GFX9-DENORM-NEXT: v_fma_f64 v[4:5], v[4:5], v[12:13], v[20:21] +; GFX9-DENORM-NEXT: v_fma_f64 v[6:7], v[6:7], v[14:15], v[22:23] +; GFX9-DENORM-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-LABEL: test_4xdouble_add_mul_contract: +; GFX10: ; %bb.0: ; %.entry +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: v_fma_f64 v[0:1], v[0:1], v[8:9], v[16:17] +; GFX10-NEXT: v_fma_f64 v[2:3], v[2:3], v[10:11], v[18:19] +; GFX10-NEXT: v_fma_f64 v[4:5], v[4:5], v[12:13], v[20:21] +; GFX10-NEXT: v_fma_f64 v[6:7], v[6:7], v[14:15], v[22:23] +; GFX10-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-CONTRACT-LABEL: test_4xdouble_add_mul_contract: +; GFX10-CONTRACT: ; %bb.0: ; %.entry +; GFX10-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-CONTRACT-NEXT: v_fma_f64 v[0:1], v[0:1], v[8:9], v[16:17] +; GFX10-CONTRACT-NEXT: v_fma_f64 v[2:3], v[2:3], v[10:11], v[18:19] +; GFX10-CONTRACT-NEXT: v_fma_f64 v[4:5], v[4:5], v[12:13], v[20:21] +; GFX10-CONTRACT-NEXT: v_fma_f64 v[6:7], v[6:7], v[14:15], v[22:23] +; GFX10-CONTRACT-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-DENORM-LABEL: test_4xdouble_add_mul_contract: +; GFX10-DENORM: ; %bb.0: ; %.entry +; GFX10-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-DENORM-NEXT: v_fma_f64 v[0:1], v[0:1], v[8:9], v[16:17] +; GFX10-DENORM-NEXT: v_fma_f64 v[2:3], v[2:3], v[10:11], v[18:19] +; GFX10-DENORM-NEXT: v_fma_f64 v[4:5], v[4:5], v[12:13], v[20:21] +; GFX10-DENORM-NEXT: v_fma_f64 v[6:7], v[6:7], v[14:15], v[22:23] +; GFX10-DENORM-NEXT: s_setpc_b64 s[30:31] +.entry: + %a = fmul contract <4 x double> %x, %y + %b = fadd contract <4 x double> %a, %z + ret <4 x double> %b +} + define <3 x double> @test_3xdouble_add_mul_rhs(<3 x double> %x, <3 x double> %y, <3 x double> %z) { ; GFX9-LABEL: test_3xdouble_add_mul_rhs: ; GFX9: ; %bb.0: ; %.entry @@ -933,14 +1524,6 @@ define <3 x double> @test_3xdouble_add_mul_rhs(<3 x double> %x, <3 x double> %y, ; GFX9-DENORM-NEXT: v_add_f64 v[4:5], v[16:17], v[4:5] ; GFX9-DENORM-NEXT: s_setpc_b64 s[30:31] ; -; GFX9-UNSAFE-LABEL: test_3xdouble_add_mul_rhs: -; GFX9-UNSAFE: ; %bb.0: ; %.entry -; GFX9-UNSAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX9-UNSAFE-NEXT: v_fma_f64 v[0:1], v[0:1], v[6:7], v[12:13] -; GFX9-UNSAFE-NEXT: v_fma_f64 v[2:3], v[2:3], v[8:9], v[14:15] -; GFX9-UNSAFE-NEXT: v_fma_f64 v[4:5], v[4:5], v[10:11], v[16:17] -; GFX9-UNSAFE-NEXT: s_setpc_b64 s[30:31] -; ; GFX10-LABEL: test_3xdouble_add_mul_rhs: ; GFX10: ; %bb.0: ; %.entry ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -970,7 +1553,13 @@ define <3 x double> @test_3xdouble_add_mul_rhs(<3 x double> %x, <3 x double> %y, ; GFX10-DENORM-NEXT: v_add_f64 v[2:3], v[14:15], v[2:3] ; GFX10-DENORM-NEXT: v_add_f64 v[4:5], v[16:17], v[4:5] ; GFX10-DENORM-NEXT: s_setpc_b64 s[30:31] -; +; GFX9-UNSAFE-LABEL: test_3xdouble_add_mul_rhs: +; GFX9-UNSAFE: ; %bb.0: ; %.entry +; GFX9-UNSAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-UNSAFE-NEXT: v_fma_f64 v[0:1], v[0:1], v[6:7], v[12:13] +; GFX9-UNSAFE-NEXT: v_fma_f64 v[2:3], v[2:3], v[8:9], v[14:15] +; GFX9-UNSAFE-NEXT: v_fma_f64 v[4:5], v[4:5], v[10:11], v[16:17] +; GFX9-UNSAFE-NEXT: s_setpc_b64 s[30:31] ; GFX10-UNSAFE-LABEL: test_3xdouble_add_mul_rhs: ; GFX10-UNSAFE: ; %bb.0: ; %.entry ; GFX10-UNSAFE-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) @@ -983,3 +1572,57 @@ define <3 x double> @test_3xdouble_add_mul_rhs(<3 x double> %x, <3 x double> %y, %b = fadd <3 x double> %z, %a ret <3 x double> %b } + +define <3 x double> @test_3xdouble_add_mul_rhs_contract(<3 x double> %x, <3 x double> %y, <3 x double> %z) { +; GFX9-LABEL: test_3xdouble_add_mul_rhs_contract: +; GFX9: ; %bb.0: ; %.entry +; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-NEXT: v_fma_f64 v[0:1], v[0:1], v[6:7], v[12:13] +; GFX9-NEXT: v_fma_f64 v[2:3], v[2:3], v[8:9], v[14:15] +; GFX9-NEXT: v_fma_f64 v[4:5], v[4:5], v[10:11], v[16:17] +; GFX9-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-CONTRACT-LABEL: test_3xdouble_add_mul_rhs_contract: +; GFX9-CONTRACT: ; %bb.0: ; %.entry +; GFX9-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-CONTRACT-NEXT: v_fma_f64 v[0:1], v[0:1], v[6:7], v[12:13] +; GFX9-CONTRACT-NEXT: v_fma_f64 v[2:3], v[2:3], v[8:9], v[14:15] +; GFX9-CONTRACT-NEXT: v_fma_f64 v[4:5], v[4:5], v[10:11], v[16:17] +; GFX9-CONTRACT-NEXT: s_setpc_b64 s[30:31] +; +; GFX9-DENORM-LABEL: test_3xdouble_add_mul_rhs_contract: +; GFX9-DENORM: ; %bb.0: ; %.entry +; GFX9-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX9-DENORM-NEXT: v_fma_f64 v[0:1], v[0:1], v[6:7], v[12:13] +; GFX9-DENORM-NEXT: v_fma_f64 v[2:3], v[2:3], v[8:9], v[14:15] +; GFX9-DENORM-NEXT: v_fma_f64 v[4:5], v[4:5], v[10:11], v[16:17] +; GFX9-DENORM-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-LABEL: test_3xdouble_add_mul_rhs_contract: +; GFX10: ; %bb.0: ; %.entry +; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-NEXT: v_fma_f64 v[0:1], v[0:1], v[6:7], v[12:13] +; GFX10-NEXT: v_fma_f64 v[2:3], v[2:3], v[8:9], v[14:15] +; GFX10-NEXT: v_fma_f64 v[4:5], v[4:5], v[10:11], v[16:17] +; GFX10-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-CONTRACT-LABEL: test_3xdouble_add_mul_rhs_contract: +; GFX10-CONTRACT: ; %bb.0: ; %.entry +; GFX10-CONTRACT-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-CONTRACT-NEXT: v_fma_f64 v[0:1], v[0:1], v[6:7], v[12:13] +; GFX10-CONTRACT-NEXT: v_fma_f64 v[2:3], v[2:3], v[8:9], v[14:15] +; GFX10-CONTRACT-NEXT: v_fma_f64 v[4:5], v[4:5], v[10:11], v[16:17] +; GFX10-CONTRACT-NEXT: s_setpc_b64 s[30:31] +; +; GFX10-DENORM-LABEL: test_3xdouble_add_mul_rhs_contract: +; GFX10-DENORM: ; %bb.0: ; %.entry +; GFX10-DENORM-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GFX10-DENORM-NEXT: v_fma_f64 v[0:1], v[0:1], v[6:7], v[12:13] +; GFX10-DENORM-NEXT: v_fma_f64 v[2:3], v[2:3], v[8:9], v[14:15] +; GFX10-DENORM-NEXT: v_fma_f64 v[4:5], v[4:5], v[10:11], v[16:17] +; GFX10-DENORM-NEXT: s_setpc_b64 s[30:31] +.entry: + %a = fmul contract <3 x double> %x, %y + %b = fadd contract <3 x double> %z, %a + ret <3 x double> %b +} diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fma-unmerge-values.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fma-unmerge-values.mir index 2845a63..d9ac9a7 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fma-unmerge-values.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fma-unmerge-values.mir @@ -24,8 +24,8 @@ body: | %ptr:_(p1) = COPY $vgpr2_vgpr3 %vec:_(<2 x s32>) = G_LOAD %ptr(p1) :: (load (<2 x s32>), addrspace 1) %el0:_(s32), %el1:_(s32) = G_UNMERGE_VALUES %vec(<2 x s32>) - %6:_(s32) = G_FMUL %0, %1 - %7:_(s32) = G_FADD %6, %el1 + %6:_(s32) = contract G_FMUL %0, %1 + %7:_(s32) = contract G_FADD %6, %el1 $vgpr0 = COPY %7(s32) ... @@ -54,8 +54,8 @@ body: | %ptr:_(p1) = COPY $vgpr2_vgpr3 %vec:_(<2 x s32>) = G_LOAD %ptr(p1) :: (load (<2 x s32>), addrspace 1) %el0:_(s32), %el1:_(s32) = G_UNMERGE_VALUES %vec(<2 x s32>) - %6:_(s32) = G_FMUL %0, %1 - %7:_(s32) = G_FADD %el1, %6 + %6:_(s32) = contract G_FMUL %0, %1 + %7:_(s32) = contract G_FADD %el1, %6 $vgpr0 = COPY %7(s32) ... @@ -233,10 +233,10 @@ body: | %7:_(s16) = G_TRUNC %6(s32) %8:_(s32) = COPY $vgpr5 %9:_(s16) = G_TRUNC %8(s32) - %10:_(s16) = G_FMUL %7, %9 + %10:_(s16) = contract G_FMUL %7, %9 %11:_(s32) = G_FPEXT %10(s16) %12:_(s32) = G_FMA %0, %1, %11 - %13:_(s32) = G_FADD %12, %el1 + %13:_(s32) = contract G_FADD %12, %el1 $vgpr0 = COPY %13(s32) ... @@ -282,11 +282,11 @@ body: | %9:_(s16) = G_TRUNC %8(s32) %10:_(s32) = COPY $vgpr5 %11:_(s16) = G_TRUNC %10(s32) - %12:_(s16) = G_FMUL %9, %11 - %13:_(s16) = G_FMUL %1, %3 - %14:_(s16) = G_FADD %13, %12 + %12:_(s16) = contract G_FMUL %9, %11 + %13:_(s16) = contract G_FMUL %1, %3 + %14:_(s16) = contract G_FADD %13, %12 %15:_(s32) = G_FPEXT %14(s16) - %16:_(s32) = G_FADD %15, %el1 + %16:_(s32) = contract G_FADD %15, %el1 $vgpr0 = COPY %16(s32) ... @@ -326,10 +326,10 @@ body: | %7:_(s16) = G_TRUNC %6(s32) %8:_(s32) = COPY $vgpr5 %9:_(s16) = G_TRUNC %8(s32) - %10:_(s16) = G_FMUL %7, %9 + %10:_(s16) = contract G_FMUL %7, %9 %11:_(s32) = G_FPEXT %10(s16) %12:_(s32) = G_FMA %4, %5, %11 - %13:_(s32) = G_FADD %el1, %12 + %13:_(s32) = contract G_FADD %el1, %12 $vgpr0 = COPY %13(s32) ... @@ -375,11 +375,11 @@ body: | %9:_(s16) = G_TRUNC %8(s32) %10:_(s32) = COPY $vgpr5 %11:_(s16) = G_TRUNC %10(s32) - %12:_(s16) = G_FMUL %9, %11 - %13:_(s16) = G_FMUL %5, %7 - %14:_(s16) = G_FADD %13, %12 + %12:_(s16) = contract G_FMUL %9, %11 + %13:_(s16) = contract G_FMUL %5, %7 + %14:_(s16) = contract G_FADD %13, %12 %15:_(s32) = G_FPEXT %14(s16) - %16:_(s32) = G_FADD %el1, %15 + %16:_(s32) = contract G_FADD %el1, %15 $vgpr0 = COPY %16(s32) ... @@ -409,8 +409,8 @@ body: | %ptr:_(p1) = COPY $vgpr0_vgpr1 %vec:_(<2 x s32>) = G_LOAD %ptr(p1) :: (load (<2 x s32>), addrspace 1) %el0:_(s32), %el1:_(s32) = G_UNMERGE_VALUES %vec(<2 x s32>) - %6:_(s32) = G_FMUL %0, %1 - %7:_(s32) = G_FSUB %6, %el1 + %6:_(s32) = contract G_FMUL %0, %1 + %7:_(s32) = contract G_FSUB %6, %el1 $vgpr0 = COPY %7(s32) ... @@ -440,7 +440,7 @@ body: | %ptr:_(p1) = COPY $vgpr2_vgpr3 %vec:_(<2 x s32>) = G_LOAD %ptr(p1) :: (load (<2 x s32>), addrspace 1) %el0:_(s32), %el1:_(s32) = G_UNMERGE_VALUES %vec(<2 x s32>) - %6:_(s32) = G_FMUL %0, %1 - %7:_(s32) = G_FSUB %el1, %6 + %6:_(s32) = contract G_FMUL %0, %1 + %7:_(s32) = contract G_FSUB %el1, %6 $vgpr0 = COPY %7(s32) ... diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/flat-atomic-fadd.f32.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/flat-atomic-fadd.f32.ll index c4d57ac..da25ac0 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/flat-atomic-fadd.f32.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/flat-atomic-fadd.f32.ll @@ -12,7 +12,7 @@ define amdgpu_ps void @flat_atomic_fadd_f32_no_rtn_intrinsic(ptr %ptr, float %da ; GFX942-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX942-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 ; GFX942-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX942-NEXT: FLAT_ATOMIC_ADD_F32 [[REG_SEQUENCE]], [[COPY2]], 0, 0, implicit $exec, implicit $flat_scr :: (load store syncscope("agent") seq_cst (s32) on %ir.ptr) + ; GFX942-NEXT: FLAT_ATOMIC_ADD_F32 [[REG_SEQUENCE]], [[COPY2]], 0, 0, implicit $exec, implicit $flat_scr :: (load store syncscope("agent") seq_cst (s32) on %ir.ptr, !noalias.addrspace !0) ; GFX942-NEXT: S_ENDPGM 0 ; ; GFX11-LABEL: name: flat_atomic_fadd_f32_no_rtn_intrinsic @@ -23,7 +23,7 @@ define amdgpu_ps void @flat_atomic_fadd_f32_no_rtn_intrinsic(ptr %ptr, float %da ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 ; GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX11-NEXT: FLAT_ATOMIC_ADD_F32 [[REG_SEQUENCE]], [[COPY2]], 0, 0, implicit $exec, implicit $flat_scr :: (load store syncscope("agent") seq_cst (s32) on %ir.ptr) + ; GFX11-NEXT: FLAT_ATOMIC_ADD_F32 [[REG_SEQUENCE]], [[COPY2]], 0, 0, implicit $exec, implicit $flat_scr :: (load store syncscope("agent") seq_cst (s32) on %ir.ptr, !noalias.addrspace !0) ; GFX11-NEXT: S_ENDPGM 0 %ret = call float @llvm.amdgcn.flat.atomic.fadd.f32.p1.f32(ptr %ptr, float %data) ret void @@ -38,7 +38,7 @@ define amdgpu_ps float @flat_atomic_fadd_f32_rtn_intrinsic(ptr %ptr, float %data ; GFX942-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX942-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 ; GFX942-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX942-NEXT: [[FLAT_ATOMIC_ADD_F32_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_F32_RTN [[REG_SEQUENCE]], [[COPY2]], 0, 1, implicit $exec, implicit $flat_scr :: (load store syncscope("agent") seq_cst (s32) on %ir.ptr) + ; GFX942-NEXT: [[FLAT_ATOMIC_ADD_F32_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_F32_RTN [[REG_SEQUENCE]], [[COPY2]], 0, 1, implicit $exec, implicit $flat_scr :: (load store syncscope("agent") seq_cst (s32) on %ir.ptr, !noalias.addrspace !0) ; GFX942-NEXT: $vgpr0 = COPY [[FLAT_ATOMIC_ADD_F32_RTN]] ; GFX942-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 ; @@ -50,7 +50,7 @@ define amdgpu_ps float @flat_atomic_fadd_f32_rtn_intrinsic(ptr %ptr, float %data ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 ; GFX11-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 ; GFX11-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 - ; GFX11-NEXT: [[FLAT_ATOMIC_ADD_F32_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_F32_RTN [[REG_SEQUENCE]], [[COPY2]], 0, 1, implicit $exec, implicit $flat_scr :: (load store syncscope("agent") seq_cst (s32) on %ir.ptr) + ; GFX11-NEXT: [[FLAT_ATOMIC_ADD_F32_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_F32_RTN [[REG_SEQUENCE]], [[COPY2]], 0, 1, implicit $exec, implicit $flat_scr :: (load store syncscope("agent") seq_cst (s32) on %ir.ptr, !noalias.addrspace !0) ; GFX11-NEXT: $vgpr0 = COPY [[FLAT_ATOMIC_ADD_F32_RTN]] ; GFX11-NEXT: SI_RETURN_TO_EPILOG implicit $vgpr0 %ret = call float @llvm.amdgcn.flat.atomic.fadd.f32.p1.f32(ptr %ptr, float %data) diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/flat-atomic-fadd.f64.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/flat-atomic-fadd.f64.ll index c82ae2fb..bf36979 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/flat-atomic-fadd.f64.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/flat-atomic-fadd.f64.ll @@ -13,7 +13,7 @@ define amdgpu_ps void @flat_atomic_fadd_f64_no_rtn_atomicrmw(ptr %ptr, double %d ; GFX90A_GFX942-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GFX90A_GFX942-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1 - ; GFX90A_GFX942-NEXT: FLAT_ATOMIC_ADD_F64 [[REG_SEQUENCE]], [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load store syncscope("wavefront") monotonic (s64) on %ir.ptr) + ; GFX90A_GFX942-NEXT: FLAT_ATOMIC_ADD_F64 [[REG_SEQUENCE]], [[REG_SEQUENCE1]], 0, 0, implicit $exec, implicit $flat_scr :: (load store syncscope("wavefront") monotonic (s64) on %ir.ptr, !noalias.addrspace !0) ; GFX90A_GFX942-NEXT: S_ENDPGM 0 %ret = atomicrmw fadd ptr %ptr, double %data syncscope("wavefront") monotonic, !noalias.addrspace !1, !amdgpu.no.fine.grained.memory !0 ret void @@ -30,7 +30,7 @@ define amdgpu_ps double @flat_atomic_fadd_f64_rtn_atomicrmw(ptr %ptr, double %da ; GFX90A_GFX942-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 ; GFX90A_GFX942-NEXT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY $vgpr3 ; GFX90A_GFX942-NEXT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64_align2 = REG_SEQUENCE [[COPY2]], %subreg.sub0, [[COPY3]], %subreg.sub1 - ; GFX90A_GFX942-NEXT: [[FLAT_ATOMIC_ADD_F64_RTN:%[0-9]+]]:vreg_64_align2 = FLAT_ATOMIC_ADD_F64_RTN [[REG_SEQUENCE]], [[REG_SEQUENCE1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store syncscope("wavefront") monotonic (s64) on %ir.ptr) + ; GFX90A_GFX942-NEXT: [[FLAT_ATOMIC_ADD_F64_RTN:%[0-9]+]]:vreg_64_align2 = FLAT_ATOMIC_ADD_F64_RTN [[REG_SEQUENCE]], [[REG_SEQUENCE1]], 0, 1, implicit $exec, implicit $flat_scr :: (load store syncscope("wavefront") monotonic (s64) on %ir.ptr, !noalias.addrspace !0) ; GFX90A_GFX942-NEXT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[FLAT_ATOMIC_ADD_F64_RTN]].sub0 ; GFX90A_GFX942-NEXT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[FLAT_ATOMIC_ADD_F64_RTN]].sub1 ; GFX90A_GFX942-NEXT: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY4]], implicit $exec diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fptrunc.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fptrunc.mir index f513de8..477ef32 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fptrunc.mir +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fptrunc.mir @@ -385,117 +385,16 @@ body: | ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 ; CHECK-NEXT: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>) - ; CHECK-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](s64) - ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 20 - ; CHECK-NEXT: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C]](s32) - ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 2047 - ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[LSHR]], [[C1]] - ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1008 - ; CHECK-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[AND]], [[C2]] - ; CHECK-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 8 - ; CHECK-NEXT: [[LSHR1:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C3]](s32) - ; CHECK-NEXT: [[C4:%[0-9]+]]:_(s32) = G_CONSTANT i32 4094 - ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[LSHR1]], [[C4]] - ; CHECK-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 511 - ; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[UV3]], [[C5]] - ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[AND2]], [[UV2]] - ; CHECK-NEXT: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[OR]](s32), [[C6]] - ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[ICMP]](s1) - ; CHECK-NEXT: [[OR1:%[0-9]+]]:_(s32) = G_OR [[AND1]], [[ZEXT]] - ; CHECK-NEXT: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 512 - ; CHECK-NEXT: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[OR1]](s32), [[C6]] - ; CHECK-NEXT: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[C7]], [[C6]] - ; CHECK-NEXT: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 31744 - ; CHECK-NEXT: [[OR2:%[0-9]+]]:_(s32) = G_OR [[SELECT]], [[C8]] - ; CHECK-NEXT: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 12 - ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ADD]], [[C9]](s32) - ; CHECK-NEXT: [[OR3:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[SHL]] - ; CHECK-NEXT: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C10]], [[ADD]] - ; CHECK-NEXT: [[SMAX:%[0-9]+]]:_(s32) = G_SMAX [[SUB]], [[C6]] - ; CHECK-NEXT: [[C11:%[0-9]+]]:_(s32) = G_CONSTANT i32 13 - ; CHECK-NEXT: [[SMIN:%[0-9]+]]:_(s32) = G_SMIN [[SMAX]], [[C11]] - ; CHECK-NEXT: [[C12:%[0-9]+]]:_(s32) = G_CONSTANT i32 4096 - ; CHECK-NEXT: [[OR4:%[0-9]+]]:_(s32) = G_OR [[OR1]], [[C12]] - ; CHECK-NEXT: [[LSHR2:%[0-9]+]]:_(s32) = G_LSHR [[OR4]], [[SMIN]](s32) - ; CHECK-NEXT: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[LSHR2]], [[SMIN]](s32) - ; CHECK-NEXT: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SHL1]](s32), [[OR4]] - ; CHECK-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[ICMP2]](s1) - ; CHECK-NEXT: [[OR5:%[0-9]+]]:_(s32) = G_OR [[LSHR2]], [[ZEXT1]] - ; CHECK-NEXT: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[ADD]](s32), [[C10]] - ; CHECK-NEXT: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[OR5]], [[OR3]] - ; CHECK-NEXT: [[C13:%[0-9]+]]:_(s32) = G_CONSTANT i32 7 - ; CHECK-NEXT: [[AND3:%[0-9]+]]:_(s32) = G_AND [[SELECT1]], [[C13]] - ; CHECK-NEXT: [[C14:%[0-9]+]]:_(s32) = G_CONSTANT i32 2 - ; CHECK-NEXT: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[SELECT1]], [[C14]](s32) - ; CHECK-NEXT: [[C15:%[0-9]+]]:_(s32) = G_CONSTANT i32 3 - ; CHECK-NEXT: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[AND3]](s32), [[C15]] - ; CHECK-NEXT: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[ICMP4]](s1) - ; CHECK-NEXT: [[C16:%[0-9]+]]:_(s32) = G_CONSTANT i32 5 - ; CHECK-NEXT: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(sgt), [[AND3]](s32), [[C16]] - ; CHECK-NEXT: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[ICMP5]](s1) - ; CHECK-NEXT: [[OR6:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[ZEXT3]] - ; CHECK-NEXT: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[LSHR3]], [[OR6]] - ; CHECK-NEXT: [[C17:%[0-9]+]]:_(s32) = G_CONSTANT i32 30 - ; CHECK-NEXT: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(sgt), [[ADD]](s32), [[C17]] - ; CHECK-NEXT: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP6]](s1), [[C8]], [[ADD1]] - ; CHECK-NEXT: [[C18:%[0-9]+]]:_(s32) = G_CONSTANT i32 1039 - ; CHECK-NEXT: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[ADD]](s32), [[C18]] - ; CHECK-NEXT: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP7]](s1), [[OR2]], [[SELECT2]] - ; CHECK-NEXT: [[C19:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 - ; CHECK-NEXT: [[LSHR4:%[0-9]+]]:_(s32) = G_LSHR [[UV3]], [[C19]](s32) - ; CHECK-NEXT: [[C20:%[0-9]+]]:_(s32) = G_CONSTANT i32 32768 - ; CHECK-NEXT: [[AND4:%[0-9]+]]:_(s32) = G_AND [[LSHR4]], [[C20]] - ; CHECK-NEXT: [[OR7:%[0-9]+]]:_(s32) = G_OR [[AND4]], [[SELECT3]] - ; CHECK-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64) - ; CHECK-NEXT: [[LSHR5:%[0-9]+]]:_(s32) = G_LSHR [[UV5]], [[C]](s32) - ; CHECK-NEXT: [[AND5:%[0-9]+]]:_(s32) = G_AND [[LSHR5]], [[C1]] - ; CHECK-NEXT: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[AND5]], [[C2]] - ; CHECK-NEXT: [[LSHR6:%[0-9]+]]:_(s32) = G_LSHR [[UV5]], [[C3]](s32) - ; CHECK-NEXT: [[AND6:%[0-9]+]]:_(s32) = G_AND [[LSHR6]], [[C4]] - ; CHECK-NEXT: [[AND7:%[0-9]+]]:_(s32) = G_AND [[UV5]], [[C5]] - ; CHECK-NEXT: [[OR8:%[0-9]+]]:_(s32) = G_OR [[AND7]], [[UV4]] - ; CHECK-NEXT: [[ICMP8:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[OR8]](s32), [[C6]] - ; CHECK-NEXT: [[ZEXT4:%[0-9]+]]:_(s32) = G_ZEXT [[ICMP8]](s1) - ; CHECK-NEXT: [[OR9:%[0-9]+]]:_(s32) = G_OR [[AND6]], [[ZEXT4]] - ; CHECK-NEXT: [[ICMP9:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[OR9]](s32), [[C6]] - ; CHECK-NEXT: [[SELECT4:%[0-9]+]]:_(s32) = G_SELECT [[ICMP9]](s1), [[C7]], [[C6]] - ; CHECK-NEXT: [[OR10:%[0-9]+]]:_(s32) = G_OR [[SELECT4]], [[C8]] - ; CHECK-NEXT: [[SHL2:%[0-9]+]]:_(s32) = G_SHL [[ADD2]], [[C9]](s32) - ; CHECK-NEXT: [[OR11:%[0-9]+]]:_(s32) = G_OR [[OR9]], [[SHL2]] - ; CHECK-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C10]], [[ADD2]] - ; CHECK-NEXT: [[SMAX1:%[0-9]+]]:_(s32) = G_SMAX [[SUB1]], [[C6]] - ; CHECK-NEXT: [[SMIN1:%[0-9]+]]:_(s32) = G_SMIN [[SMAX1]], [[C11]] - ; CHECK-NEXT: [[OR12:%[0-9]+]]:_(s32) = G_OR [[OR9]], [[C12]] - ; CHECK-NEXT: [[LSHR7:%[0-9]+]]:_(s32) = G_LSHR [[OR12]], [[SMIN1]](s32) - ; CHECK-NEXT: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[LSHR7]], [[SMIN1]](s32) - ; CHECK-NEXT: [[ICMP10:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[SHL3]](s32), [[OR12]] - ; CHECK-NEXT: [[ZEXT5:%[0-9]+]]:_(s32) = G_ZEXT [[ICMP10]](s1) - ; CHECK-NEXT: [[OR13:%[0-9]+]]:_(s32) = G_OR [[LSHR7]], [[ZEXT5]] - ; CHECK-NEXT: [[ICMP11:%[0-9]+]]:_(s1) = G_ICMP intpred(slt), [[ADD2]](s32), [[C10]] - ; CHECK-NEXT: [[SELECT5:%[0-9]+]]:_(s32) = G_SELECT [[ICMP11]](s1), [[OR13]], [[OR11]] - ; CHECK-NEXT: [[AND8:%[0-9]+]]:_(s32) = G_AND [[SELECT5]], [[C13]] - ; CHECK-NEXT: [[LSHR8:%[0-9]+]]:_(s32) = G_LSHR [[SELECT5]], [[C14]](s32) - ; CHECK-NEXT: [[ICMP12:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[AND8]](s32), [[C15]] - ; CHECK-NEXT: [[ZEXT6:%[0-9]+]]:_(s32) = G_ZEXT [[ICMP12]](s1) - ; CHECK-NEXT: [[ICMP13:%[0-9]+]]:_(s1) = G_ICMP intpred(sgt), [[AND8]](s32), [[C16]] - ; CHECK-NEXT: [[ZEXT7:%[0-9]+]]:_(s32) = G_ZEXT [[ICMP13]](s1) - ; CHECK-NEXT: [[OR14:%[0-9]+]]:_(s32) = G_OR [[ZEXT6]], [[ZEXT7]] - ; CHECK-NEXT: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[LSHR8]], [[OR14]] - ; CHECK-NEXT: [[ICMP14:%[0-9]+]]:_(s1) = G_ICMP intpred(sgt), [[ADD2]](s32), [[C17]] - ; CHECK-NEXT: [[SELECT6:%[0-9]+]]:_(s32) = G_SELECT [[ICMP14]](s1), [[C8]], [[ADD3]] - ; CHECK-NEXT: [[ICMP15:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[ADD2]](s32), [[C18]] - ; CHECK-NEXT: [[SELECT7:%[0-9]+]]:_(s32) = G_SELECT [[ICMP15]](s1), [[OR10]], [[SELECT6]] - ; CHECK-NEXT: [[LSHR9:%[0-9]+]]:_(s32) = G_LSHR [[UV5]], [[C19]](s32) - ; CHECK-NEXT: [[AND9:%[0-9]+]]:_(s32) = G_AND [[LSHR9]], [[C20]] - ; CHECK-NEXT: [[OR15:%[0-9]+]]:_(s32) = G_OR [[AND9]], [[SELECT7]] - ; CHECK-NEXT: [[C21:%[0-9]+]]:_(s32) = G_CONSTANT i32 65535 - ; CHECK-NEXT: [[AND10:%[0-9]+]]:_(s32) = G_AND [[OR7]], [[C21]] - ; CHECK-NEXT: [[AND11:%[0-9]+]]:_(s32) = G_AND [[OR15]], [[C21]] - ; CHECK-NEXT: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[AND11]], [[C19]](s32) - ; CHECK-NEXT: [[OR16:%[0-9]+]]:_(s32) = G_OR [[AND10]], [[SHL4]] - ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR16]](s32) + ; CHECK-NEXT: [[FPTRUNC:%[0-9]+]]:_(s32) = afn G_FPTRUNC [[UV]](s64) + ; CHECK-NEXT: [[FPTRUNC1:%[0-9]+]]:_(s16) = afn G_FPTRUNC [[FPTRUNC]](s32) + ; CHECK-NEXT: [[FPTRUNC2:%[0-9]+]]:_(s32) = afn G_FPTRUNC [[UV1]](s64) + ; CHECK-NEXT: [[FPTRUNC3:%[0-9]+]]:_(s16) = afn G_FPTRUNC [[FPTRUNC2]](s32) + ; CHECK-NEXT: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC1]](s16) + ; CHECK-NEXT: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC3]](s16) + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 + ; CHECK-NEXT: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32) + ; CHECK-NEXT: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]] + ; CHECK-NEXT: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32) ; CHECK-NEXT: $vgpr0 = COPY [[BITCAST]](<2 x s16>) %0:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3 %1:_(<2 x s16>) = afn G_FPTRUNC %0 diff --git a/llvm/test/CodeGen/AMDGPU/fptrunc.f16.ll b/llvm/test/CodeGen/AMDGPU/fptrunc.f16.ll index d0b41e1..57b4857 100644 --- a/llvm/test/CodeGen/AMDGPU/fptrunc.f16.ll +++ b/llvm/test/CodeGen/AMDGPU/fptrunc.f16.ll @@ -1,16 +1,16 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2 -; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -global-isel=0 -enable-unsafe-fp-math < %s | FileCheck -enable-var-scope -check-prefixes=SI-SDAG %s -; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -global-isel=1 -enable-unsafe-fp-math < %s | FileCheck -check-prefixes=SI-GISEL %s -; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=fiji -global-isel=0 -mattr=-flat-for-global -enable-unsafe-fp-math < %s | FileCheck -enable-var-scope -check-prefixes=VI-SDAG %s -; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=fiji -global-isel=1 -mattr=-flat-for-global -enable-unsafe-fp-math < %s | FileCheck -enable-var-scope -check-prefixes=VI-GISEL %s -; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=gfx900 -global-isel=0 -mattr=-flat-for-global -denormal-fp-math=preserve-sign -enable-unsafe-fp-math < %s | FileCheck -enable-var-scope -check-prefixes=GFX9-SDAG %s -; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=gfx900 -global-isel=1 -mattr=-flat-for-global -denormal-fp-math=preserve-sign -enable-unsafe-fp-math < %s | FileCheck -enable-var-scope -check-prefixes=GFX9-GISEL %s -; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=gfx950 -global-isel=0 -mattr=-flat-for-global -denormal-fp-math=preserve-sign -enable-unsafe-fp-math < %s | FileCheck -enable-var-scope -check-prefixes=GFX950-SDAG %s -; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=gfx950 -global-isel=1 -mattr=-flat-for-global -denormal-fp-math=preserve-sign -enable-unsafe-fp-math < %s | FileCheck -enable-var-scope -check-prefixes=GFX950-GISEL %s -; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=gfx1100 -global-isel=0 -mattr=-flat-for-global,+real-true16 -denormal-fp-math=preserve-sign -enable-unsafe-fp-math < %s | FileCheck -enable-var-scope -check-prefixes=GFX11-SDAG-TRUE16 %s -; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=gfx1100 -global-isel=0 -mattr=-flat-for-global,-real-true16 -denormal-fp-math=preserve-sign -enable-unsafe-fp-math < %s | FileCheck -enable-var-scope -check-prefixes=GFX11-SDAG-FAKE16 %s -; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=gfx1100 -global-isel=1 -mattr=-flat-for-global,+real-true16 -denormal-fp-math=preserve-sign -enable-unsafe-fp-math < %s | FileCheck -enable-var-scope -check-prefixes=GFX11-GISEL-TRUE16 %s -; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=gfx1100 -global-isel=1 -mattr=-flat-for-global,-real-true16 -denormal-fp-math=preserve-sign -enable-unsafe-fp-math < %s | FileCheck -enable-var-scope -check-prefixes=GFX11-GISEL-FAKE16 %s +; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -global-isel=0 < %s | FileCheck -enable-var-scope -check-prefixes=SI-SDAG %s +; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -global-isel=1 < %s | FileCheck -check-prefixes=SI-GISEL %s +; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=fiji -global-isel=0 -mattr=-flat-for-global < %s | FileCheck -enable-var-scope -check-prefixes=VI-SDAG %s +; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=fiji -global-isel=1 -mattr=-flat-for-global < %s | FileCheck -enable-var-scope -check-prefixes=VI-GISEL %s +; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=gfx900 -global-isel=0 -mattr=-flat-for-global -denormal-fp-math=preserve-sign < %s | FileCheck -enable-var-scope -check-prefixes=GFX9-SDAG %s +; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=gfx900 -global-isel=1 -mattr=-flat-for-global -denormal-fp-math=preserve-sign < %s | FileCheck -enable-var-scope -check-prefixes=GFX9-GISEL %s +; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=gfx950 -global-isel=0 -mattr=-flat-for-global -denormal-fp-math=preserve-sign < %s | FileCheck -enable-var-scope -check-prefixes=GFX950-SDAG %s +; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=gfx950 -global-isel=1 -mattr=-flat-for-global -denormal-fp-math=preserve-sign < %s | FileCheck -enable-var-scope -check-prefixes=GFX950-GISEL %s +; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=gfx1100 -global-isel=0 -mattr=-flat-for-global,+real-true16 -denormal-fp-math=preserve-sign < %s | FileCheck -enable-var-scope -check-prefixes=GFX11-SDAG-TRUE16 %s +; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=gfx1100 -global-isel=0 -mattr=-flat-for-global,-real-true16 -denormal-fp-math=preserve-sign < %s | FileCheck -enable-var-scope -check-prefixes=GFX11-SDAG-FAKE16 %s +; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=gfx1100 -global-isel=1 -mattr=-flat-for-global,+real-true16 -denormal-fp-math=preserve-sign < %s | FileCheck -enable-var-scope -check-prefixes=GFX11-GISEL-TRUE16 %s +; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn -mcpu=gfx1100 -global-isel=1 -mattr=-flat-for-global,-real-true16 -denormal-fp-math=preserve-sign < %s | FileCheck -enable-var-scope -check-prefixes=GFX11-GISEL-FAKE16 %s define amdgpu_kernel void @fptrunc_f32_to_f16( ; SI-SDAG-LABEL: fptrunc_f32_to_f16: @@ -201,8 +201,8 @@ entry: ret void } -define amdgpu_kernel void @fptrunc_f64_to_f16( -; SI-SDAG-LABEL: fptrunc_f64_to_f16: +define amdgpu_kernel void @fptrunc_f32_to_f16_afn(ptr addrspace(1) %r, +; SI-SDAG-LABEL: fptrunc_f32_to_f16_afn: ; SI-SDAG: ; %bb.0: ; %entry ; SI-SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 ; SI-SDAG-NEXT: s_mov_b32 s7, 0xf000 @@ -212,29 +212,27 @@ define amdgpu_kernel void @fptrunc_f64_to_f16( ; SI-SDAG-NEXT: s_waitcnt lgkmcnt(0) ; SI-SDAG-NEXT: s_mov_b32 s8, s2 ; SI-SDAG-NEXT: s_mov_b32 s9, s3 -; SI-SDAG-NEXT: buffer_load_dwordx2 v[0:1], off, s[8:11], 0 +; SI-SDAG-NEXT: buffer_load_dword v0, off, s[8:11], 0 ; SI-SDAG-NEXT: s_mov_b32 s4, s0 ; SI-SDAG-NEXT: s_mov_b32 s5, s1 ; SI-SDAG-NEXT: s_waitcnt vmcnt(0) -; SI-SDAG-NEXT: v_cvt_f32_f64_e32 v0, v[0:1] ; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 ; SI-SDAG-NEXT: buffer_store_short v0, off, s[4:7], 0 ; SI-SDAG-NEXT: s_endpgm ; -; SI-GISEL-LABEL: fptrunc_f64_to_f16: +; SI-GISEL-LABEL: fptrunc_f32_to_f16_afn: ; SI-GISEL: ; %bb.0: ; %entry ; SI-GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 ; SI-GISEL-NEXT: s_waitcnt lgkmcnt(0) -; SI-GISEL-NEXT: s_load_dwordx2 s[4:5], s[2:3], 0x0 +; SI-GISEL-NEXT: s_load_dword s3, s[2:3], 0x0 ; SI-GISEL-NEXT: s_mov_b32 s2, -1 ; SI-GISEL-NEXT: s_waitcnt lgkmcnt(0) -; SI-GISEL-NEXT: v_cvt_f32_f64_e32 v0, s[4:5] -; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0 +; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v0, s3 ; SI-GISEL-NEXT: s_mov_b32 s3, 0xf000 ; SI-GISEL-NEXT: buffer_store_short v0, off, s[0:3], 0 ; SI-GISEL-NEXT: s_endpgm ; -; VI-SDAG-LABEL: fptrunc_f64_to_f16: +; VI-SDAG-LABEL: fptrunc_f32_to_f16_afn: ; VI-SDAG: ; %bb.0: ; %entry ; VI-SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 ; VI-SDAG-NEXT: s_mov_b32 s7, 0xf000 @@ -244,29 +242,27 @@ define amdgpu_kernel void @fptrunc_f64_to_f16( ; VI-SDAG-NEXT: s_waitcnt lgkmcnt(0) ; VI-SDAG-NEXT: s_mov_b32 s8, s2 ; VI-SDAG-NEXT: s_mov_b32 s9, s3 -; VI-SDAG-NEXT: buffer_load_dwordx2 v[0:1], off, s[8:11], 0 +; VI-SDAG-NEXT: buffer_load_dword v0, off, s[8:11], 0 ; VI-SDAG-NEXT: s_mov_b32 s4, s0 ; VI-SDAG-NEXT: s_mov_b32 s5, s1 ; VI-SDAG-NEXT: s_waitcnt vmcnt(0) -; VI-SDAG-NEXT: v_cvt_f32_f64_e32 v0, v[0:1] ; VI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 ; VI-SDAG-NEXT: buffer_store_short v0, off, s[4:7], 0 ; VI-SDAG-NEXT: s_endpgm ; -; VI-GISEL-LABEL: fptrunc_f64_to_f16: +; VI-GISEL-LABEL: fptrunc_f32_to_f16_afn: ; VI-GISEL: ; %bb.0: ; %entry ; VI-GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 ; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0) -; VI-GISEL-NEXT: s_load_dwordx2 s[2:3], s[2:3], 0x0 +; VI-GISEL-NEXT: s_load_dword s2, s[2:3], 0x0 +; VI-GISEL-NEXT: s_mov_b32 s3, 0xf000 ; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0) -; VI-GISEL-NEXT: v_cvt_f32_f64_e32 v0, s[2:3] +; VI-GISEL-NEXT: v_cvt_f16_f32_e32 v0, s2 ; VI-GISEL-NEXT: s_mov_b32 s2, -1 -; VI-GISEL-NEXT: s_mov_b32 s3, 0xf000 -; VI-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0 ; VI-GISEL-NEXT: buffer_store_short v0, off, s[0:3], 0 ; VI-GISEL-NEXT: s_endpgm ; -; GFX9-SDAG-LABEL: fptrunc_f64_to_f16: +; GFX9-SDAG-LABEL: fptrunc_f32_to_f16_afn: ; GFX9-SDAG: ; %bb.0: ; %entry ; GFX9-SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 ; GFX9-SDAG-NEXT: s_mov_b32 s7, 0xf000 @@ -276,29 +272,27 @@ define amdgpu_kernel void @fptrunc_f64_to_f16( ; GFX9-SDAG-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-SDAG-NEXT: s_mov_b32 s8, s2 ; GFX9-SDAG-NEXT: s_mov_b32 s9, s3 -; GFX9-SDAG-NEXT: buffer_load_dwordx2 v[0:1], off, s[8:11], 0 +; GFX9-SDAG-NEXT: buffer_load_dword v0, off, s[8:11], 0 ; GFX9-SDAG-NEXT: s_mov_b32 s4, s0 ; GFX9-SDAG-NEXT: s_mov_b32 s5, s1 ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) -; GFX9-SDAG-NEXT: v_cvt_f32_f64_e32 v0, v[0:1] ; GFX9-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 ; GFX9-SDAG-NEXT: buffer_store_short v0, off, s[4:7], 0 ; GFX9-SDAG-NEXT: s_endpgm ; -; GFX9-GISEL-LABEL: fptrunc_f64_to_f16: +; GFX9-GISEL-LABEL: fptrunc_f32_to_f16_afn: ; GFX9-GISEL: ; %bb.0: ; %entry ; GFX9-GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 ; GFX9-GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-GISEL-NEXT: s_load_dwordx2 s[2:3], s[2:3], 0x0 +; GFX9-GISEL-NEXT: s_load_dword s2, s[2:3], 0x0 +; GFX9-GISEL-NEXT: s_mov_b32 s3, 0xf000 ; GFX9-GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-GISEL-NEXT: v_cvt_f32_f64_e32 v0, s[2:3] +; GFX9-GISEL-NEXT: v_cvt_f16_f32_e32 v0, s2 ; GFX9-GISEL-NEXT: s_mov_b32 s2, -1 -; GFX9-GISEL-NEXT: s_mov_b32 s3, 0xf000 -; GFX9-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0 ; GFX9-GISEL-NEXT: buffer_store_short v0, off, s[0:3], 0 ; GFX9-GISEL-NEXT: s_endpgm ; -; GFX950-SDAG-LABEL: fptrunc_f64_to_f16: +; GFX950-SDAG-LABEL: fptrunc_f32_to_f16_afn: ; GFX950-SDAG: ; %bb.0: ; %entry ; GFX950-SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 ; GFX950-SDAG-NEXT: s_mov_b32 s7, 0xf000 @@ -308,23 +302,541 @@ define amdgpu_kernel void @fptrunc_f64_to_f16( ; GFX950-SDAG-NEXT: s_waitcnt lgkmcnt(0) ; GFX950-SDAG-NEXT: s_mov_b32 s8, s2 ; GFX950-SDAG-NEXT: s_mov_b32 s9, s3 -; GFX950-SDAG-NEXT: buffer_load_dwordx2 v[0:1], off, s[8:11], 0 +; GFX950-SDAG-NEXT: buffer_load_dword v0, off, s[8:11], 0 ; GFX950-SDAG-NEXT: s_mov_b32 s4, s0 ; GFX950-SDAG-NEXT: s_mov_b32 s5, s1 ; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) -; GFX950-SDAG-NEXT: v_cvt_f32_f64_e32 v0, v[0:1] ; GFX950-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 ; GFX950-SDAG-NEXT: buffer_store_short v0, off, s[4:7], 0 ; GFX950-SDAG-NEXT: s_endpgm ; +; GFX950-GISEL-LABEL: fptrunc_f32_to_f16_afn: +; GFX950-GISEL: ; %bb.0: ; %entry +; GFX950-GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 +; GFX950-GISEL-NEXT: s_waitcnt lgkmcnt(0) +; GFX950-GISEL-NEXT: s_load_dword s2, s[2:3], 0x0 +; GFX950-GISEL-NEXT: s_mov_b32 s3, 0xf000 +; GFX950-GISEL-NEXT: s_waitcnt lgkmcnt(0) +; GFX950-GISEL-NEXT: v_cvt_f16_f32_e32 v0, s2 +; GFX950-GISEL-NEXT: s_mov_b32 s2, -1 +; GFX950-GISEL-NEXT: buffer_store_short v0, off, s[0:3], 0 +; GFX950-GISEL-NEXT: s_endpgm +; +; GFX11-SDAG-TRUE16-LABEL: fptrunc_f32_to_f16_afn: +; GFX11-SDAG-TRUE16: ; %bb.0: ; %entry +; GFX11-SDAG-TRUE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 +; GFX11-SDAG-TRUE16-NEXT: s_mov_b32 s6, -1 +; GFX11-SDAG-TRUE16-NEXT: s_mov_b32 s7, 0x31016000 +; GFX11-SDAG-TRUE16-NEXT: s_mov_b32 s10, s6 +; GFX11-SDAG-TRUE16-NEXT: s_mov_b32 s11, s7 +; GFX11-SDAG-TRUE16-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-SDAG-TRUE16-NEXT: s_mov_b32 s8, s2 +; GFX11-SDAG-TRUE16-NEXT: s_mov_b32 s9, s3 +; GFX11-SDAG-TRUE16-NEXT: s_mov_b32 s4, s0 +; GFX11-SDAG-TRUE16-NEXT: buffer_load_b32 v0, off, s[8:11], 0 +; GFX11-SDAG-TRUE16-NEXT: s_mov_b32 s5, s1 +; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) +; GFX11-SDAG-TRUE16-NEXT: v_cvt_f16_f32_e32 v0.l, v0 +; GFX11-SDAG-TRUE16-NEXT: buffer_store_b16 v0, off, s[4:7], 0 +; GFX11-SDAG-TRUE16-NEXT: s_endpgm +; +; GFX11-SDAG-FAKE16-LABEL: fptrunc_f32_to_f16_afn: +; GFX11-SDAG-FAKE16: ; %bb.0: ; %entry +; GFX11-SDAG-FAKE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 +; GFX11-SDAG-FAKE16-NEXT: s_mov_b32 s6, -1 +; GFX11-SDAG-FAKE16-NEXT: s_mov_b32 s7, 0x31016000 +; GFX11-SDAG-FAKE16-NEXT: s_mov_b32 s10, s6 +; GFX11-SDAG-FAKE16-NEXT: s_mov_b32 s11, s7 +; GFX11-SDAG-FAKE16-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-SDAG-FAKE16-NEXT: s_mov_b32 s8, s2 +; GFX11-SDAG-FAKE16-NEXT: s_mov_b32 s9, s3 +; GFX11-SDAG-FAKE16-NEXT: s_mov_b32 s4, s0 +; GFX11-SDAG-FAKE16-NEXT: buffer_load_b32 v0, off, s[8:11], 0 +; GFX11-SDAG-FAKE16-NEXT: s_mov_b32 s5, s1 +; GFX11-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) +; GFX11-SDAG-FAKE16-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX11-SDAG-FAKE16-NEXT: buffer_store_b16 v0, off, s[4:7], 0 +; GFX11-SDAG-FAKE16-NEXT: s_endpgm +; +; GFX11-GISEL-TRUE16-LABEL: fptrunc_f32_to_f16_afn: +; GFX11-GISEL-TRUE16: ; %bb.0: ; %entry +; GFX11-GISEL-TRUE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 +; GFX11-GISEL-TRUE16-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-GISEL-TRUE16-NEXT: s_load_b32 s2, s[2:3], 0x0 +; GFX11-GISEL-TRUE16-NEXT: s_mov_b32 s3, 0x31016000 +; GFX11-GISEL-TRUE16-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-GISEL-TRUE16-NEXT: v_cvt_f16_f32_e32 v0.l, s2 +; GFX11-GISEL-TRUE16-NEXT: s_mov_b32 s2, -1 +; GFX11-GISEL-TRUE16-NEXT: buffer_store_b16 v0, off, s[0:3], 0 +; GFX11-GISEL-TRUE16-NEXT: s_endpgm +; +; GFX11-GISEL-FAKE16-LABEL: fptrunc_f32_to_f16_afn: +; GFX11-GISEL-FAKE16: ; %bb.0: ; %entry +; GFX11-GISEL-FAKE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 +; GFX11-GISEL-FAKE16-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-GISEL-FAKE16-NEXT: s_load_b32 s2, s[2:3], 0x0 +; GFX11-GISEL-FAKE16-NEXT: s_mov_b32 s3, 0x31016000 +; GFX11-GISEL-FAKE16-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-GISEL-FAKE16-NEXT: v_cvt_f16_f32_e32 v0, s2 +; GFX11-GISEL-FAKE16-NEXT: s_mov_b32 s2, -1 +; GFX11-GISEL-FAKE16-NEXT: buffer_store_b16 v0, off, s[0:3], 0 +; GFX11-GISEL-FAKE16-NEXT: s_endpgm + ptr addrspace(1) %a) { +entry: + %a.val = load float, ptr addrspace(1) %a + %r.val = fptrunc afn float %a.val to half + store half %r.val, ptr addrspace(1) %r + ret void +} + +define amdgpu_kernel void @fptrunc_f64_to_f16( +; SI-SDAG-LABEL: fptrunc_f64_to_f16: +; SI-SDAG: ; %bb.0: ; %entry +; SI-SDAG-NEXT: s_load_dwordx4 s[4:7], s[4:5], 0x9 +; SI-SDAG-NEXT: s_mov_b32 s3, 0xf000 +; SI-SDAG-NEXT: s_mov_b32 s2, -1 +; SI-SDAG-NEXT: s_mov_b32 s10, s2 +; SI-SDAG-NEXT: s_mov_b32 s11, s3 +; SI-SDAG-NEXT: s_waitcnt lgkmcnt(0) +; SI-SDAG-NEXT: s_mov_b32 s8, s6 +; SI-SDAG-NEXT: s_mov_b32 s9, s7 +; SI-SDAG-NEXT: buffer_load_dwordx2 v[0:1], off, s[8:11], 0 +; SI-SDAG-NEXT: s_movk_i32 s0, 0x7e00 +; SI-SDAG-NEXT: s_waitcnt vmcnt(0) +; SI-SDAG-NEXT: v_readfirstlane_b32 s1, v1 +; SI-SDAG-NEXT: s_and_b32 s6, s1, 0x1ff +; SI-SDAG-NEXT: s_lshr_b32 s7, s1, 8 +; SI-SDAG-NEXT: s_bfe_u32 s8, s1, 0xb0014 +; SI-SDAG-NEXT: v_or_b32_e32 v0, s6, v0 +; SI-SDAG-NEXT: s_and_b32 s6, s7, 0xffe +; SI-SDAG-NEXT: s_sub_i32 s7, 0x3f1, s8 +; SI-SDAG-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0 +; SI-SDAG-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc +; SI-SDAG-NEXT: v_med3_i32 v1, s7, 0, 13 +; SI-SDAG-NEXT: v_readfirstlane_b32 s7, v0 +; SI-SDAG-NEXT: v_readfirstlane_b32 s9, v1 +; SI-SDAG-NEXT: s_or_b32 s6, s6, s7 +; SI-SDAG-NEXT: s_or_b32 s7, s6, 0x1000 +; SI-SDAG-NEXT: s_lshr_b32 s10, s7, s9 +; SI-SDAG-NEXT: s_lshl_b32 s9, s10, s9 +; SI-SDAG-NEXT: s_cmp_lg_u32 s9, s7 +; SI-SDAG-NEXT: s_cselect_b32 s7, 1, 0 +; SI-SDAG-NEXT: s_addk_i32 s8, 0xfc10 +; SI-SDAG-NEXT: s_or_b32 s7, s10, s7 +; SI-SDAG-NEXT: s_lshl_b32 s9, s8, 12 +; SI-SDAG-NEXT: s_or_b32 s9, s6, s9 +; SI-SDAG-NEXT: s_cmp_lt_i32 s8, 1 +; SI-SDAG-NEXT: s_cselect_b32 s7, s7, s9 +; SI-SDAG-NEXT: s_and_b32 s9, s7, 7 +; SI-SDAG-NEXT: s_cmp_gt_i32 s9, 5 +; SI-SDAG-NEXT: s_cselect_b32 s10, 1, 0 +; SI-SDAG-NEXT: s_cmp_eq_u32 s9, 3 +; SI-SDAG-NEXT: s_cselect_b32 s9, 1, 0 +; SI-SDAG-NEXT: s_lshr_b32 s7, s7, 2 +; SI-SDAG-NEXT: s_or_b32 s9, s9, s10 +; SI-SDAG-NEXT: s_add_i32 s7, s7, s9 +; SI-SDAG-NEXT: s_cmp_lt_i32 s8, 31 +; SI-SDAG-NEXT: s_cselect_b32 s7, s7, 0x7c00 +; SI-SDAG-NEXT: s_cmp_lg_u32 s6, 0 +; SI-SDAG-NEXT: s_cselect_b32 s0, s0, 0x7c00 +; SI-SDAG-NEXT: s_cmpk_eq_i32 s8, 0x40f +; SI-SDAG-NEXT: s_cselect_b32 s0, s0, s7 +; SI-SDAG-NEXT: s_lshr_b32 s1, s1, 16 +; SI-SDAG-NEXT: s_and_b32 s1, s1, 0x8000 +; SI-SDAG-NEXT: s_or_b32 s6, s1, s0 +; SI-SDAG-NEXT: s_mov_b32 s0, s4 +; SI-SDAG-NEXT: s_mov_b32 s1, s5 +; SI-SDAG-NEXT: v_mov_b32_e32 v0, s6 +; SI-SDAG-NEXT: buffer_store_short v0, off, s[0:3], 0 +; SI-SDAG-NEXT: s_endpgm +; +; SI-GISEL-LABEL: fptrunc_f64_to_f16: +; SI-GISEL: ; %bb.0: ; %entry +; SI-GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 +; SI-GISEL-NEXT: s_waitcnt lgkmcnt(0) +; SI-GISEL-NEXT: s_load_dwordx2 s[4:5], s[2:3], 0x0 +; SI-GISEL-NEXT: s_mov_b32 s2, -1 +; SI-GISEL-NEXT: s_waitcnt lgkmcnt(0) +; SI-GISEL-NEXT: s_bfe_u32 s3, s5, 0xb0014 +; SI-GISEL-NEXT: s_lshr_b32 s6, s5, 8 +; SI-GISEL-NEXT: s_and_b32 s7, s5, 0x1ff +; SI-GISEL-NEXT: s_addk_i32 s3, 0xfc10 +; SI-GISEL-NEXT: s_and_b32 s6, s6, 0xffe +; SI-GISEL-NEXT: s_or_b32 s4, s7, s4 +; SI-GISEL-NEXT: s_cmp_lg_u32 s4, 0 +; SI-GISEL-NEXT: s_cselect_b32 s4, 1, 0 +; SI-GISEL-NEXT: s_or_b32 s4, s6, s4 +; SI-GISEL-NEXT: s_cmp_lg_u32 s4, 0 +; SI-GISEL-NEXT: s_cselect_b32 s6, 1, 0 +; SI-GISEL-NEXT: s_lshl_b32 s6, s6, 9 +; SI-GISEL-NEXT: s_lshl_b32 s7, s3, 12 +; SI-GISEL-NEXT: s_sub_i32 s8, 1, s3 +; SI-GISEL-NEXT: s_or_b32 s9, s4, 0x1000 +; SI-GISEL-NEXT: s_or_b32 s6, s6, 0x7c00 +; SI-GISEL-NEXT: s_or_b32 s4, s4, s7 +; SI-GISEL-NEXT: s_max_i32 s7, s8, 0 +; SI-GISEL-NEXT: s_min_i32 s7, s7, 13 +; SI-GISEL-NEXT: s_lshr_b32 s8, s9, s7 +; SI-GISEL-NEXT: s_lshl_b32 s7, s8, s7 +; SI-GISEL-NEXT: s_cmp_lg_u32 s7, s9 +; SI-GISEL-NEXT: s_cselect_b32 s7, 1, 0 +; SI-GISEL-NEXT: s_or_b32 s7, s8, s7 +; SI-GISEL-NEXT: s_cmp_lt_i32 s3, 1 +; SI-GISEL-NEXT: s_cselect_b32 s4, s7, s4 +; SI-GISEL-NEXT: s_and_b32 s7, s4, 7 +; SI-GISEL-NEXT: s_lshr_b32 s4, s4, 2 +; SI-GISEL-NEXT: s_cmp_eq_u32 s7, 3 +; SI-GISEL-NEXT: s_cselect_b32 s8, 1, 0 +; SI-GISEL-NEXT: s_cmp_gt_i32 s7, 5 +; SI-GISEL-NEXT: s_cselect_b32 s7, 1, 0 +; SI-GISEL-NEXT: s_or_b32 s7, s8, s7 +; SI-GISEL-NEXT: s_add_i32 s4, s4, s7 +; SI-GISEL-NEXT: s_cmp_gt_i32 s3, 30 +; SI-GISEL-NEXT: s_cselect_b32 s4, 0x7c00, s4 +; SI-GISEL-NEXT: s_cmpk_eq_i32 s3, 0x40f +; SI-GISEL-NEXT: s_cselect_b32 s3, s6, s4 +; SI-GISEL-NEXT: s_lshr_b32 s4, s5, 16 +; SI-GISEL-NEXT: s_and_b32 s4, s4, 0x8000 +; SI-GISEL-NEXT: s_or_b32 s4, s4, s3 +; SI-GISEL-NEXT: s_mov_b32 s3, 0xf000 +; SI-GISEL-NEXT: v_mov_b32_e32 v0, s4 +; SI-GISEL-NEXT: buffer_store_short v0, off, s[0:3], 0 +; SI-GISEL-NEXT: s_endpgm +; +; VI-SDAG-LABEL: fptrunc_f64_to_f16: +; VI-SDAG: ; %bb.0: ; %entry +; VI-SDAG-NEXT: s_load_dwordx4 s[4:7], s[4:5], 0x24 +; VI-SDAG-NEXT: s_mov_b32 s3, 0xf000 +; VI-SDAG-NEXT: s_mov_b32 s2, -1 +; VI-SDAG-NEXT: s_mov_b32 s10, s2 +; VI-SDAG-NEXT: s_mov_b32 s11, s3 +; VI-SDAG-NEXT: s_waitcnt lgkmcnt(0) +; VI-SDAG-NEXT: s_mov_b32 s8, s6 +; VI-SDAG-NEXT: s_mov_b32 s9, s7 +; VI-SDAG-NEXT: buffer_load_dwordx2 v[0:1], off, s[8:11], 0 +; VI-SDAG-NEXT: s_mov_b32 s0, s4 +; VI-SDAG-NEXT: s_mov_b32 s1, s5 +; VI-SDAG-NEXT: s_movk_i32 s6, 0x7e00 +; VI-SDAG-NEXT: s_waitcnt vmcnt(0) +; VI-SDAG-NEXT: v_readfirstlane_b32 s4, v1 +; VI-SDAG-NEXT: s_and_b32 s5, s4, 0x1ff +; VI-SDAG-NEXT: v_or_b32_e32 v0, s5, v0 +; VI-SDAG-NEXT: s_lshr_b32 s7, s4, 8 +; VI-SDAG-NEXT: s_bfe_u32 s8, s4, 0xb0014 +; VI-SDAG-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0 +; VI-SDAG-NEXT: s_and_b32 s5, s7, 0xffe +; VI-SDAG-NEXT: s_sub_i32 s7, 0x3f1, s8 +; VI-SDAG-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc +; VI-SDAG-NEXT: v_med3_i32 v1, s7, 0, 13 +; VI-SDAG-NEXT: v_readfirstlane_b32 s7, v0 +; VI-SDAG-NEXT: s_or_b32 s5, s5, s7 +; VI-SDAG-NEXT: v_readfirstlane_b32 s9, v1 +; VI-SDAG-NEXT: s_or_b32 s7, s5, 0x1000 +; VI-SDAG-NEXT: s_lshr_b32 s10, s7, s9 +; VI-SDAG-NEXT: s_lshl_b32 s9, s10, s9 +; VI-SDAG-NEXT: s_cmp_lg_u32 s9, s7 +; VI-SDAG-NEXT: s_cselect_b32 s7, 1, 0 +; VI-SDAG-NEXT: s_addk_i32 s8, 0xfc10 +; VI-SDAG-NEXT: s_lshl_b32 s9, s8, 12 +; VI-SDAG-NEXT: s_or_b32 s7, s10, s7 +; VI-SDAG-NEXT: s_or_b32 s9, s5, s9 +; VI-SDAG-NEXT: s_cmp_lt_i32 s8, 1 +; VI-SDAG-NEXT: s_cselect_b32 s7, s7, s9 +; VI-SDAG-NEXT: s_and_b32 s9, s7, 7 +; VI-SDAG-NEXT: s_cmp_gt_i32 s9, 5 +; VI-SDAG-NEXT: s_cselect_b32 s10, 1, 0 +; VI-SDAG-NEXT: s_cmp_eq_u32 s9, 3 +; VI-SDAG-NEXT: s_cselect_b32 s9, 1, 0 +; VI-SDAG-NEXT: s_lshr_b32 s7, s7, 2 +; VI-SDAG-NEXT: s_or_b32 s9, s9, s10 +; VI-SDAG-NEXT: s_add_i32 s7, s7, s9 +; VI-SDAG-NEXT: s_cmp_lt_i32 s8, 31 +; VI-SDAG-NEXT: s_cselect_b32 s7, s7, 0x7c00 +; VI-SDAG-NEXT: s_cmp_lg_u32 s5, 0 +; VI-SDAG-NEXT: s_cselect_b32 s5, s6, 0x7c00 +; VI-SDAG-NEXT: s_cmpk_eq_i32 s8, 0x40f +; VI-SDAG-NEXT: s_cselect_b32 s5, s5, s7 +; VI-SDAG-NEXT: s_lshr_b32 s4, s4, 16 +; VI-SDAG-NEXT: s_and_b32 s4, s4, 0x8000 +; VI-SDAG-NEXT: s_or_b32 s4, s4, s5 +; VI-SDAG-NEXT: v_mov_b32_e32 v0, s4 +; VI-SDAG-NEXT: buffer_store_short v0, off, s[0:3], 0 +; VI-SDAG-NEXT: s_endpgm +; +; VI-GISEL-LABEL: fptrunc_f64_to_f16: +; VI-GISEL: ; %bb.0: ; %entry +; VI-GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 +; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0) +; VI-GISEL-NEXT: s_load_dwordx2 s[2:3], s[2:3], 0x0 +; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0) +; VI-GISEL-NEXT: s_bfe_u32 s4, s3, 0xb0014 +; VI-GISEL-NEXT: s_lshr_b32 s5, s3, 8 +; VI-GISEL-NEXT: s_and_b32 s6, s3, 0x1ff +; VI-GISEL-NEXT: s_addk_i32 s4, 0xfc10 +; VI-GISEL-NEXT: s_and_b32 s5, s5, 0xffe +; VI-GISEL-NEXT: s_or_b32 s2, s6, s2 +; VI-GISEL-NEXT: s_cmp_lg_u32 s2, 0 +; VI-GISEL-NEXT: s_cselect_b32 s2, 1, 0 +; VI-GISEL-NEXT: s_or_b32 s2, s5, s2 +; VI-GISEL-NEXT: s_cmp_lg_u32 s2, 0 +; VI-GISEL-NEXT: s_cselect_b32 s5, 1, 0 +; VI-GISEL-NEXT: s_sub_i32 s7, 1, s4 +; VI-GISEL-NEXT: s_lshl_b32 s6, s4, 12 +; VI-GISEL-NEXT: s_max_i32 s7, s7, 0 +; VI-GISEL-NEXT: s_or_b32 s6, s2, s6 +; VI-GISEL-NEXT: s_min_i32 s7, s7, 13 +; VI-GISEL-NEXT: s_bitset1_b32 s2, 12 +; VI-GISEL-NEXT: s_lshl_b32 s5, s5, 9 +; VI-GISEL-NEXT: s_lshr_b32 s8, s2, s7 +; VI-GISEL-NEXT: s_or_b32 s5, s5, 0x7c00 +; VI-GISEL-NEXT: s_lshl_b32 s7, s8, s7 +; VI-GISEL-NEXT: s_cmp_lg_u32 s7, s2 +; VI-GISEL-NEXT: s_cselect_b32 s2, 1, 0 +; VI-GISEL-NEXT: s_or_b32 s2, s8, s2 +; VI-GISEL-NEXT: s_cmp_lt_i32 s4, 1 +; VI-GISEL-NEXT: s_cselect_b32 s2, s2, s6 +; VI-GISEL-NEXT: s_and_b32 s6, s2, 7 +; VI-GISEL-NEXT: s_lshr_b32 s2, s2, 2 +; VI-GISEL-NEXT: s_cmp_eq_u32 s6, 3 +; VI-GISEL-NEXT: s_cselect_b32 s7, 1, 0 +; VI-GISEL-NEXT: s_cmp_gt_i32 s6, 5 +; VI-GISEL-NEXT: s_cselect_b32 s6, 1, 0 +; VI-GISEL-NEXT: s_or_b32 s6, s7, s6 +; VI-GISEL-NEXT: s_add_i32 s2, s2, s6 +; VI-GISEL-NEXT: s_cmp_gt_i32 s4, 30 +; VI-GISEL-NEXT: s_cselect_b32 s2, 0x7c00, s2 +; VI-GISEL-NEXT: s_cmpk_eq_i32 s4, 0x40f +; VI-GISEL-NEXT: s_cselect_b32 s2, s5, s2 +; VI-GISEL-NEXT: s_lshr_b32 s3, s3, 16 +; VI-GISEL-NEXT: s_and_b32 s3, s3, 0x8000 +; VI-GISEL-NEXT: s_or_b32 s2, s3, s2 +; VI-GISEL-NEXT: v_mov_b32_e32 v0, s2 +; VI-GISEL-NEXT: s_mov_b32 s2, -1 +; VI-GISEL-NEXT: s_mov_b32 s3, 0xf000 +; VI-GISEL-NEXT: buffer_store_short v0, off, s[0:3], 0 +; VI-GISEL-NEXT: s_endpgm +; +; GFX9-SDAG-LABEL: fptrunc_f64_to_f16: +; GFX9-SDAG: ; %bb.0: ; %entry +; GFX9-SDAG-NEXT: s_load_dwordx4 s[8:11], s[4:5], 0x24 +; GFX9-SDAG-NEXT: s_mov_b32 s3, 0xf000 +; GFX9-SDAG-NEXT: s_mov_b32 s2, -1 +; GFX9-SDAG-NEXT: s_mov_b32 s6, s2 +; GFX9-SDAG-NEXT: s_mov_b32 s7, s3 +; GFX9-SDAG-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-SDAG-NEXT: s_mov_b32 s4, s10 +; GFX9-SDAG-NEXT: s_mov_b32 s5, s11 +; GFX9-SDAG-NEXT: buffer_load_dwordx2 v[0:1], off, s[4:7], 0 +; GFX9-SDAG-NEXT: s_mov_b32 s0, s8 +; GFX9-SDAG-NEXT: s_mov_b32 s1, s9 +; GFX9-SDAG-NEXT: s_movk_i32 s4, 0x7e00 +; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX9-SDAG-NEXT: v_readfirstlane_b32 s5, v1 +; GFX9-SDAG-NEXT: s_and_b32 s6, s5, 0x1ff +; GFX9-SDAG-NEXT: v_or_b32_e32 v0, s6, v0 +; GFX9-SDAG-NEXT: s_lshr_b32 s7, s5, 8 +; GFX9-SDAG-NEXT: s_bfe_u32 s8, s5, 0xb0014 +; GFX9-SDAG-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0 +; GFX9-SDAG-NEXT: s_and_b32 s6, s7, 0xffe +; GFX9-SDAG-NEXT: s_sub_i32 s7, 0x3f1, s8 +; GFX9-SDAG-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc +; GFX9-SDAG-NEXT: v_med3_i32 v1, s7, 0, 13 +; GFX9-SDAG-NEXT: v_readfirstlane_b32 s7, v0 +; GFX9-SDAG-NEXT: s_or_b32 s6, s6, s7 +; GFX9-SDAG-NEXT: v_readfirstlane_b32 s9, v1 +; GFX9-SDAG-NEXT: s_or_b32 s7, s6, 0x1000 +; GFX9-SDAG-NEXT: s_lshr_b32 s10, s7, s9 +; GFX9-SDAG-NEXT: s_lshl_b32 s9, s10, s9 +; GFX9-SDAG-NEXT: s_cmp_lg_u32 s9, s7 +; GFX9-SDAG-NEXT: s_cselect_b32 s7, 1, 0 +; GFX9-SDAG-NEXT: s_addk_i32 s8, 0xfc10 +; GFX9-SDAG-NEXT: s_lshl_b32 s9, s8, 12 +; GFX9-SDAG-NEXT: s_or_b32 s7, s10, s7 +; GFX9-SDAG-NEXT: s_or_b32 s9, s6, s9 +; GFX9-SDAG-NEXT: s_cmp_lt_i32 s8, 1 +; GFX9-SDAG-NEXT: s_cselect_b32 s7, s7, s9 +; GFX9-SDAG-NEXT: s_and_b32 s9, s7, 7 +; GFX9-SDAG-NEXT: s_cmp_gt_i32 s9, 5 +; GFX9-SDAG-NEXT: s_cselect_b32 s10, 1, 0 +; GFX9-SDAG-NEXT: s_cmp_eq_u32 s9, 3 +; GFX9-SDAG-NEXT: s_cselect_b32 s9, 1, 0 +; GFX9-SDAG-NEXT: s_lshr_b32 s7, s7, 2 +; GFX9-SDAG-NEXT: s_or_b32 s9, s9, s10 +; GFX9-SDAG-NEXT: s_add_i32 s7, s7, s9 +; GFX9-SDAG-NEXT: s_cmp_lt_i32 s8, 31 +; GFX9-SDAG-NEXT: s_cselect_b32 s7, s7, 0x7c00 +; GFX9-SDAG-NEXT: s_cmp_lg_u32 s6, 0 +; GFX9-SDAG-NEXT: s_cselect_b32 s4, s4, 0x7c00 +; GFX9-SDAG-NEXT: s_cmpk_eq_i32 s8, 0x40f +; GFX9-SDAG-NEXT: s_cselect_b32 s4, s4, s7 +; GFX9-SDAG-NEXT: s_lshr_b32 s5, s5, 16 +; GFX9-SDAG-NEXT: s_and_b32 s5, s5, 0x8000 +; GFX9-SDAG-NEXT: s_or_b32 s4, s5, s4 +; GFX9-SDAG-NEXT: v_mov_b32_e32 v0, s4 +; GFX9-SDAG-NEXT: buffer_store_short v0, off, s[0:3], 0 +; GFX9-SDAG-NEXT: s_endpgm +; +; GFX9-GISEL-LABEL: fptrunc_f64_to_f16: +; GFX9-GISEL: ; %bb.0: ; %entry +; GFX9-GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 +; GFX9-GISEL-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-GISEL-NEXT: s_load_dwordx2 s[2:3], s[2:3], 0x0 +; GFX9-GISEL-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-GISEL-NEXT: s_bfe_u32 s4, s3, 0xb0014 +; GFX9-GISEL-NEXT: s_lshr_b32 s5, s3, 8 +; GFX9-GISEL-NEXT: s_and_b32 s6, s3, 0x1ff +; GFX9-GISEL-NEXT: s_addk_i32 s4, 0xfc10 +; GFX9-GISEL-NEXT: s_and_b32 s5, s5, 0xffe +; GFX9-GISEL-NEXT: s_or_b32 s2, s6, s2 +; GFX9-GISEL-NEXT: s_cmp_lg_u32 s2, 0 +; GFX9-GISEL-NEXT: s_cselect_b32 s2, 1, 0 +; GFX9-GISEL-NEXT: s_or_b32 s2, s5, s2 +; GFX9-GISEL-NEXT: s_cmp_lg_u32 s2, 0 +; GFX9-GISEL-NEXT: s_cselect_b32 s5, 1, 0 +; GFX9-GISEL-NEXT: s_sub_i32 s7, 1, s4 +; GFX9-GISEL-NEXT: s_lshl_b32 s6, s4, 12 +; GFX9-GISEL-NEXT: s_max_i32 s7, s7, 0 +; GFX9-GISEL-NEXT: s_or_b32 s6, s2, s6 +; GFX9-GISEL-NEXT: s_min_i32 s7, s7, 13 +; GFX9-GISEL-NEXT: s_bitset1_b32 s2, 12 +; GFX9-GISEL-NEXT: s_lshl_b32 s5, s5, 9 +; GFX9-GISEL-NEXT: s_lshr_b32 s8, s2, s7 +; GFX9-GISEL-NEXT: s_or_b32 s5, s5, 0x7c00 +; GFX9-GISEL-NEXT: s_lshl_b32 s7, s8, s7 +; GFX9-GISEL-NEXT: s_cmp_lg_u32 s7, s2 +; GFX9-GISEL-NEXT: s_cselect_b32 s2, 1, 0 +; GFX9-GISEL-NEXT: s_or_b32 s2, s8, s2 +; GFX9-GISEL-NEXT: s_cmp_lt_i32 s4, 1 +; GFX9-GISEL-NEXT: s_cselect_b32 s2, s2, s6 +; GFX9-GISEL-NEXT: s_and_b32 s6, s2, 7 +; GFX9-GISEL-NEXT: s_lshr_b32 s2, s2, 2 +; GFX9-GISEL-NEXT: s_cmp_eq_u32 s6, 3 +; GFX9-GISEL-NEXT: s_cselect_b32 s7, 1, 0 +; GFX9-GISEL-NEXT: s_cmp_gt_i32 s6, 5 +; GFX9-GISEL-NEXT: s_cselect_b32 s6, 1, 0 +; GFX9-GISEL-NEXT: s_or_b32 s6, s7, s6 +; GFX9-GISEL-NEXT: s_add_i32 s2, s2, s6 +; GFX9-GISEL-NEXT: s_cmp_gt_i32 s4, 30 +; GFX9-GISEL-NEXT: s_cselect_b32 s2, 0x7c00, s2 +; GFX9-GISEL-NEXT: s_cmpk_eq_i32 s4, 0x40f +; GFX9-GISEL-NEXT: s_cselect_b32 s2, s5, s2 +; GFX9-GISEL-NEXT: s_lshr_b32 s3, s3, 16 +; GFX9-GISEL-NEXT: s_and_b32 s3, s3, 0x8000 +; GFX9-GISEL-NEXT: s_or_b32 s2, s3, s2 +; GFX9-GISEL-NEXT: v_mov_b32_e32 v0, s2 +; GFX9-GISEL-NEXT: s_mov_b32 s2, -1 +; GFX9-GISEL-NEXT: s_mov_b32 s3, 0xf000 +; GFX9-GISEL-NEXT: buffer_store_short v0, off, s[0:3], 0 +; GFX9-GISEL-NEXT: s_endpgm +; +; GFX950-SDAG-LABEL: fptrunc_f64_to_f16: +; GFX950-SDAG: ; %bb.0: ; %entry +; GFX950-SDAG-NEXT: s_load_dwordx4 s[8:11], s[4:5], 0x24 +; GFX950-SDAG-NEXT: s_mov_b32 s3, 0xf000 +; GFX950-SDAG-NEXT: s_mov_b32 s2, -1 +; GFX950-SDAG-NEXT: s_mov_b32 s6, s2 +; GFX950-SDAG-NEXT: s_mov_b32 s7, s3 +; GFX950-SDAG-NEXT: s_waitcnt lgkmcnt(0) +; GFX950-SDAG-NEXT: s_mov_b32 s4, s10 +; GFX950-SDAG-NEXT: s_mov_b32 s5, s11 +; GFX950-SDAG-NEXT: buffer_load_dwordx2 v[0:1], off, s[4:7], 0 +; GFX950-SDAG-NEXT: s_mov_b32 s0, s8 +; GFX950-SDAG-NEXT: s_mov_b32 s1, s9 +; GFX950-SDAG-NEXT: s_movk_i32 s4, 0x7e00 +; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX950-SDAG-NEXT: v_readfirstlane_b32 s5, v1 +; GFX950-SDAG-NEXT: s_and_b32 s6, s5, 0x1ff +; GFX950-SDAG-NEXT: v_or_b32_e32 v0, s6, v0 +; GFX950-SDAG-NEXT: s_lshr_b32 s7, s5, 8 +; GFX950-SDAG-NEXT: s_bfe_u32 s8, s5, 0xb0014 +; GFX950-SDAG-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0 +; GFX950-SDAG-NEXT: s_and_b32 s6, s7, 0xffe +; GFX950-SDAG-NEXT: s_sub_i32 s7, 0x3f1, s8 +; GFX950-SDAG-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc +; GFX950-SDAG-NEXT: v_med3_i32 v1, s7, 0, 13 +; GFX950-SDAG-NEXT: v_readfirstlane_b32 s7, v0 +; GFX950-SDAG-NEXT: s_or_b32 s6, s6, s7 +; GFX950-SDAG-NEXT: v_readfirstlane_b32 s9, v1 +; GFX950-SDAG-NEXT: s_or_b32 s7, s6, 0x1000 +; GFX950-SDAG-NEXT: s_lshr_b32 s10, s7, s9 +; GFX950-SDAG-NEXT: s_lshl_b32 s9, s10, s9 +; GFX950-SDAG-NEXT: s_cmp_lg_u32 s9, s7 +; GFX950-SDAG-NEXT: s_cselect_b32 s7, 1, 0 +; GFX950-SDAG-NEXT: s_addk_i32 s8, 0xfc10 +; GFX950-SDAG-NEXT: s_lshl_b32 s9, s8, 12 +; GFX950-SDAG-NEXT: s_or_b32 s7, s10, s7 +; GFX950-SDAG-NEXT: s_or_b32 s9, s6, s9 +; GFX950-SDAG-NEXT: s_cmp_lt_i32 s8, 1 +; GFX950-SDAG-NEXT: s_cselect_b32 s7, s7, s9 +; GFX950-SDAG-NEXT: s_and_b32 s9, s7, 7 +; GFX950-SDAG-NEXT: s_cmp_gt_i32 s9, 5 +; GFX950-SDAG-NEXT: s_cselect_b32 s10, 1, 0 +; GFX950-SDAG-NEXT: s_cmp_eq_u32 s9, 3 +; GFX950-SDAG-NEXT: s_cselect_b32 s9, 1, 0 +; GFX950-SDAG-NEXT: s_lshr_b32 s7, s7, 2 +; GFX950-SDAG-NEXT: s_or_b32 s9, s9, s10 +; GFX950-SDAG-NEXT: s_add_i32 s7, s7, s9 +; GFX950-SDAG-NEXT: s_cmp_lt_i32 s8, 31 +; GFX950-SDAG-NEXT: s_cselect_b32 s7, s7, 0x7c00 +; GFX950-SDAG-NEXT: s_cmp_lg_u32 s6, 0 +; GFX950-SDAG-NEXT: s_cselect_b32 s4, s4, 0x7c00 +; GFX950-SDAG-NEXT: s_cmpk_eq_i32 s8, 0x40f +; GFX950-SDAG-NEXT: s_cselect_b32 s4, s4, s7 +; GFX950-SDAG-NEXT: s_lshr_b32 s5, s5, 16 +; GFX950-SDAG-NEXT: s_and_b32 s5, s5, 0x8000 +; GFX950-SDAG-NEXT: s_or_b32 s4, s5, s4 +; GFX950-SDAG-NEXT: v_mov_b32_e32 v0, s4 +; GFX950-SDAG-NEXT: buffer_store_short v0, off, s[0:3], 0 +; GFX950-SDAG-NEXT: s_endpgm +; ; GFX950-GISEL-LABEL: fptrunc_f64_to_f16: ; GFX950-GISEL: ; %bb.0: ; %entry ; GFX950-GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 ; GFX950-GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX950-GISEL-NEXT: s_load_dwordx2 s[2:3], s[2:3], 0x0 ; GFX950-GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX950-GISEL-NEXT: v_cvt_f32_f64_e32 v0, s[2:3] -; GFX950-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX950-GISEL-NEXT: s_bfe_u32 s4, s3, 0xb0014 +; GFX950-GISEL-NEXT: s_lshr_b32 s5, s3, 8 +; GFX950-GISEL-NEXT: s_and_b32 s6, s3, 0x1ff +; GFX950-GISEL-NEXT: s_addk_i32 s4, 0xfc10 +; GFX950-GISEL-NEXT: s_and_b32 s5, s5, 0xffe +; GFX950-GISEL-NEXT: s_or_b32 s2, s6, s2 +; GFX950-GISEL-NEXT: s_cmp_lg_u32 s2, 0 +; GFX950-GISEL-NEXT: s_cselect_b32 s2, 1, 0 +; GFX950-GISEL-NEXT: s_or_b32 s2, s5, s2 +; GFX950-GISEL-NEXT: s_cmp_lg_u32 s2, 0 +; GFX950-GISEL-NEXT: s_cselect_b32 s5, 1, 0 +; GFX950-GISEL-NEXT: s_sub_i32 s7, 1, s4 +; GFX950-GISEL-NEXT: s_lshl_b32 s6, s4, 12 +; GFX950-GISEL-NEXT: s_max_i32 s7, s7, 0 +; GFX950-GISEL-NEXT: s_or_b32 s6, s2, s6 +; GFX950-GISEL-NEXT: s_min_i32 s7, s7, 13 +; GFX950-GISEL-NEXT: s_bitset1_b32 s2, 12 +; GFX950-GISEL-NEXT: s_lshl_b32 s5, s5, 9 +; GFX950-GISEL-NEXT: s_lshr_b32 s8, s2, s7 +; GFX950-GISEL-NEXT: s_or_b32 s5, s5, 0x7c00 +; GFX950-GISEL-NEXT: s_lshl_b32 s7, s8, s7 +; GFX950-GISEL-NEXT: s_cmp_lg_u32 s7, s2 +; GFX950-GISEL-NEXT: s_cselect_b32 s2, 1, 0 +; GFX950-GISEL-NEXT: s_or_b32 s2, s8, s2 +; GFX950-GISEL-NEXT: s_cmp_lt_i32 s4, 1 +; GFX950-GISEL-NEXT: s_cselect_b32 s2, s2, s6 +; GFX950-GISEL-NEXT: s_and_b32 s6, s2, 7 +; GFX950-GISEL-NEXT: s_lshr_b32 s2, s2, 2 +; GFX950-GISEL-NEXT: s_cmp_eq_u32 s6, 3 +; GFX950-GISEL-NEXT: s_cselect_b32 s7, 1, 0 +; GFX950-GISEL-NEXT: s_cmp_gt_i32 s6, 5 +; GFX950-GISEL-NEXT: s_cselect_b32 s6, 1, 0 +; GFX950-GISEL-NEXT: s_or_b32 s6, s7, s6 +; GFX950-GISEL-NEXT: s_add_i32 s2, s2, s6 +; GFX950-GISEL-NEXT: s_cmp_gt_i32 s4, 30 +; GFX950-GISEL-NEXT: s_cselect_b32 s2, 0x7c00, s2 +; GFX950-GISEL-NEXT: s_cmpk_eq_i32 s4, 0x40f +; GFX950-GISEL-NEXT: s_cselect_b32 s2, s5, s2 +; GFX950-GISEL-NEXT: s_lshr_b32 s3, s3, 16 +; GFX950-GISEL-NEXT: s_and_b32 s3, s3, 0x8000 +; GFX950-GISEL-NEXT: s_or_b32 s2, s3, s2 +; GFX950-GISEL-NEXT: v_mov_b32_e32 v0, s2 ; GFX950-GISEL-NEXT: s_mov_b32 s2, -1 ; GFX950-GISEL-NEXT: s_mov_b32 s3, 0xf000 ; GFX950-GISEL-NEXT: buffer_store_short v0, off, s[0:3], 0 @@ -340,13 +852,60 @@ define amdgpu_kernel void @fptrunc_f64_to_f16( ; GFX11-SDAG-TRUE16-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-SDAG-TRUE16-NEXT: s_mov_b32 s8, s2 ; GFX11-SDAG-TRUE16-NEXT: s_mov_b32 s9, s3 -; GFX11-SDAG-TRUE16-NEXT: s_mov_b32 s4, s0 ; GFX11-SDAG-TRUE16-NEXT: buffer_load_b64 v[0:1], off, s[8:11], 0 -; GFX11-SDAG-TRUE16-NEXT: s_mov_b32 s5, s1 ; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-SDAG-TRUE16-NEXT: v_cvt_f32_f64_e32 v0, v[0:1] -; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-SDAG-TRUE16-NEXT: v_cvt_f16_f32_e32 v0.l, v0 +; GFX11-SDAG-TRUE16-NEXT: v_readfirstlane_b32 s2, v1 +; GFX11-SDAG-TRUE16-NEXT: s_and_b32 s3, s2, 0x1ff +; GFX11-SDAG-TRUE16-NEXT: s_lshr_b32 s5, s2, 8 +; GFX11-SDAG-TRUE16-NEXT: v_or_b32_e32 v0, s3, v0 +; GFX11-SDAG-TRUE16-NEXT: s_bfe_u32 s3, s2, 0xb0014 +; GFX11-SDAG-TRUE16-NEXT: s_and_b32 s5, s5, 0xffe +; GFX11-SDAG-TRUE16-NEXT: s_sub_i32 s4, 0x3f1, s3 +; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_2) +; GFX11-SDAG-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0 +; GFX11-SDAG-TRUE16-NEXT: v_med3_i32 v1, s4, 0, 13 +; GFX11-SDAG-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo +; GFX11-SDAG-TRUE16-NEXT: v_readfirstlane_b32 s8, v1 +; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(SALU_CYCLE_1) +; GFX11-SDAG-TRUE16-NEXT: v_readfirstlane_b32 s4, v0 +; GFX11-SDAG-TRUE16-NEXT: s_or_b32 s4, s5, s4 +; GFX11-SDAG-TRUE16-NEXT: s_or_b32 s5, s4, 0x1000 +; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) +; GFX11-SDAG-TRUE16-NEXT: s_lshr_b32 s9, s5, s8 +; GFX11-SDAG-TRUE16-NEXT: s_lshl_b32 s8, s9, s8 +; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_4) | instid1(SALU_CYCLE_1) +; GFX11-SDAG-TRUE16-NEXT: s_cmp_lg_u32 s8, s5 +; GFX11-SDAG-TRUE16-NEXT: s_cselect_b32 s5, 1, 0 +; GFX11-SDAG-TRUE16-NEXT: s_addk_i32 s3, 0xfc10 +; GFX11-SDAG-TRUE16-NEXT: s_or_b32 s5, s9, s5 +; GFX11-SDAG-TRUE16-NEXT: s_lshl_b32 s8, s3, 12 +; GFX11-SDAG-TRUE16-NEXT: s_or_b32 s8, s4, s8 +; GFX11-SDAG-TRUE16-NEXT: s_cmp_lt_i32 s3, 1 +; GFX11-SDAG-TRUE16-NEXT: s_cselect_b32 s5, s5, s8 +; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) +; GFX11-SDAG-TRUE16-NEXT: s_and_b32 s8, s5, 7 +; GFX11-SDAG-TRUE16-NEXT: s_cmp_gt_i32 s8, 5 +; GFX11-SDAG-TRUE16-NEXT: s_cselect_b32 s9, 1, 0 +; GFX11-SDAG-TRUE16-NEXT: s_cmp_eq_u32 s8, 3 +; GFX11-SDAG-TRUE16-NEXT: s_cselect_b32 s8, 1, 0 +; GFX11-SDAG-TRUE16-NEXT: s_lshr_b32 s5, s5, 2 +; GFX11-SDAG-TRUE16-NEXT: s_or_b32 s8, s8, s9 +; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; GFX11-SDAG-TRUE16-NEXT: s_add_i32 s5, s5, s8 +; GFX11-SDAG-TRUE16-NEXT: s_cmp_lt_i32 s3, 31 +; GFX11-SDAG-TRUE16-NEXT: s_movk_i32 s8, 0x7e00 +; GFX11-SDAG-TRUE16-NEXT: s_cselect_b32 s5, s5, 0x7c00 +; GFX11-SDAG-TRUE16-NEXT: s_cmp_lg_u32 s4, 0 +; GFX11-SDAG-TRUE16-NEXT: s_cselect_b32 s4, s8, 0x7c00 +; GFX11-SDAG-TRUE16-NEXT: s_cmpk_eq_i32 s3, 0x40f +; GFX11-SDAG-TRUE16-NEXT: s_cselect_b32 s3, s4, s5 +; GFX11-SDAG-TRUE16-NEXT: s_lshr_b32 s2, s2, 16 +; GFX11-SDAG-TRUE16-NEXT: s_mov_b32 s4, s0 +; GFX11-SDAG-TRUE16-NEXT: s_and_b32 s2, s2, 0x8000 +; GFX11-SDAG-TRUE16-NEXT: s_mov_b32 s5, s1 +; GFX11-SDAG-TRUE16-NEXT: s_or_b32 s2, s2, s3 +; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; GFX11-SDAG-TRUE16-NEXT: v_mov_b32_e32 v0, s2 ; GFX11-SDAG-TRUE16-NEXT: buffer_store_b16 v0, off, s[4:7], 0 ; GFX11-SDAG-TRUE16-NEXT: s_endpgm ; @@ -360,13 +919,60 @@ define amdgpu_kernel void @fptrunc_f64_to_f16( ; GFX11-SDAG-FAKE16-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-SDAG-FAKE16-NEXT: s_mov_b32 s8, s2 ; GFX11-SDAG-FAKE16-NEXT: s_mov_b32 s9, s3 -; GFX11-SDAG-FAKE16-NEXT: s_mov_b32 s4, s0 ; GFX11-SDAG-FAKE16-NEXT: buffer_load_b64 v[0:1], off, s[8:11], 0 -; GFX11-SDAG-FAKE16-NEXT: s_mov_b32 s5, s1 ; GFX11-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-SDAG-FAKE16-NEXT: v_cvt_f32_f64_e32 v0, v[0:1] -; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-SDAG-FAKE16-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX11-SDAG-FAKE16-NEXT: v_readfirstlane_b32 s2, v1 +; GFX11-SDAG-FAKE16-NEXT: s_and_b32 s3, s2, 0x1ff +; GFX11-SDAG-FAKE16-NEXT: s_lshr_b32 s5, s2, 8 +; GFX11-SDAG-FAKE16-NEXT: v_or_b32_e32 v0, s3, v0 +; GFX11-SDAG-FAKE16-NEXT: s_bfe_u32 s3, s2, 0xb0014 +; GFX11-SDAG-FAKE16-NEXT: s_and_b32 s5, s5, 0xffe +; GFX11-SDAG-FAKE16-NEXT: s_sub_i32 s4, 0x3f1, s3 +; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_2) +; GFX11-SDAG-FAKE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0 +; GFX11-SDAG-FAKE16-NEXT: v_med3_i32 v1, s4, 0, 13 +; GFX11-SDAG-FAKE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo +; GFX11-SDAG-FAKE16-NEXT: v_readfirstlane_b32 s8, v1 +; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(SALU_CYCLE_1) +; GFX11-SDAG-FAKE16-NEXT: v_readfirstlane_b32 s4, v0 +; GFX11-SDAG-FAKE16-NEXT: s_or_b32 s4, s5, s4 +; GFX11-SDAG-FAKE16-NEXT: s_or_b32 s5, s4, 0x1000 +; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) +; GFX11-SDAG-FAKE16-NEXT: s_lshr_b32 s9, s5, s8 +; GFX11-SDAG-FAKE16-NEXT: s_lshl_b32 s8, s9, s8 +; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_4) | instid1(SALU_CYCLE_1) +; GFX11-SDAG-FAKE16-NEXT: s_cmp_lg_u32 s8, s5 +; GFX11-SDAG-FAKE16-NEXT: s_cselect_b32 s5, 1, 0 +; GFX11-SDAG-FAKE16-NEXT: s_addk_i32 s3, 0xfc10 +; GFX11-SDAG-FAKE16-NEXT: s_or_b32 s5, s9, s5 +; GFX11-SDAG-FAKE16-NEXT: s_lshl_b32 s8, s3, 12 +; GFX11-SDAG-FAKE16-NEXT: s_or_b32 s8, s4, s8 +; GFX11-SDAG-FAKE16-NEXT: s_cmp_lt_i32 s3, 1 +; GFX11-SDAG-FAKE16-NEXT: s_cselect_b32 s5, s5, s8 +; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) +; GFX11-SDAG-FAKE16-NEXT: s_and_b32 s8, s5, 7 +; GFX11-SDAG-FAKE16-NEXT: s_cmp_gt_i32 s8, 5 +; GFX11-SDAG-FAKE16-NEXT: s_cselect_b32 s9, 1, 0 +; GFX11-SDAG-FAKE16-NEXT: s_cmp_eq_u32 s8, 3 +; GFX11-SDAG-FAKE16-NEXT: s_cselect_b32 s8, 1, 0 +; GFX11-SDAG-FAKE16-NEXT: s_lshr_b32 s5, s5, 2 +; GFX11-SDAG-FAKE16-NEXT: s_or_b32 s8, s8, s9 +; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; GFX11-SDAG-FAKE16-NEXT: s_add_i32 s5, s5, s8 +; GFX11-SDAG-FAKE16-NEXT: s_cmp_lt_i32 s3, 31 +; GFX11-SDAG-FAKE16-NEXT: s_movk_i32 s8, 0x7e00 +; GFX11-SDAG-FAKE16-NEXT: s_cselect_b32 s5, s5, 0x7c00 +; GFX11-SDAG-FAKE16-NEXT: s_cmp_lg_u32 s4, 0 +; GFX11-SDAG-FAKE16-NEXT: s_cselect_b32 s4, s8, 0x7c00 +; GFX11-SDAG-FAKE16-NEXT: s_cmpk_eq_i32 s3, 0x40f +; GFX11-SDAG-FAKE16-NEXT: s_cselect_b32 s3, s4, s5 +; GFX11-SDAG-FAKE16-NEXT: s_lshr_b32 s2, s2, 16 +; GFX11-SDAG-FAKE16-NEXT: s_mov_b32 s4, s0 +; GFX11-SDAG-FAKE16-NEXT: s_and_b32 s2, s2, 0x8000 +; GFX11-SDAG-FAKE16-NEXT: s_mov_b32 s5, s1 +; GFX11-SDAG-FAKE16-NEXT: s_or_b32 s2, s2, s3 +; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; GFX11-SDAG-FAKE16-NEXT: v_mov_b32_e32 v0, s2 ; GFX11-SDAG-FAKE16-NEXT: buffer_store_b16 v0, off, s[4:7], 0 ; GFX11-SDAG-FAKE16-NEXT: s_endpgm ; @@ -376,6 +982,555 @@ define amdgpu_kernel void @fptrunc_f64_to_f16( ; GFX11-GISEL-TRUE16-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-GISEL-TRUE16-NEXT: s_load_b64 s[2:3], s[2:3], 0x0 ; GFX11-GISEL-TRUE16-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-GISEL-TRUE16-NEXT: s_and_b32 s6, s3, 0x1ff +; GFX11-GISEL-TRUE16-NEXT: s_bfe_u32 s4, s3, 0xb0014 +; GFX11-GISEL-TRUE16-NEXT: s_lshr_b32 s5, s3, 8 +; GFX11-GISEL-TRUE16-NEXT: s_or_b32 s2, s6, s2 +; GFX11-GISEL-TRUE16-NEXT: s_addk_i32 s4, 0xfc10 +; GFX11-GISEL-TRUE16-NEXT: s_and_b32 s5, s5, 0xffe +; GFX11-GISEL-TRUE16-NEXT: s_cmp_lg_u32 s2, 0 +; GFX11-GISEL-TRUE16-NEXT: s_cselect_b32 s2, 1, 0 +; GFX11-GISEL-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) +; GFX11-GISEL-TRUE16-NEXT: s_or_b32 s2, s5, s2 +; GFX11-GISEL-TRUE16-NEXT: s_cmp_lg_u32 s2, 0 +; GFX11-GISEL-TRUE16-NEXT: s_cselect_b32 s5, 1, 0 +; GFX11-GISEL-TRUE16-NEXT: s_sub_i32 s6, 1, s4 +; GFX11-GISEL-TRUE16-NEXT: s_or_b32 s8, s2, 0x1000 +; GFX11-GISEL-TRUE16-NEXT: s_max_i32 s6, s6, 0 +; GFX11-GISEL-TRUE16-NEXT: s_lshl_b32 s7, s4, 12 +; GFX11-GISEL-TRUE16-NEXT: s_min_i32 s6, s6, 13 +; GFX11-GISEL-TRUE16-NEXT: s_lshl_b32 s5, s5, 9 +; GFX11-GISEL-TRUE16-NEXT: s_lshr_b32 s9, s8, s6 +; GFX11-GISEL-TRUE16-NEXT: s_or_b32 s2, s2, s7 +; GFX11-GISEL-TRUE16-NEXT: s_lshl_b32 s6, s9, s6 +; GFX11-GISEL-TRUE16-NEXT: s_or_b32 s5, s5, 0x7c00 +; GFX11-GISEL-TRUE16-NEXT: s_cmp_lg_u32 s6, s8 +; GFX11-GISEL-TRUE16-NEXT: s_cselect_b32 s6, 1, 0 +; GFX11-GISEL-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_2) | instid1(SALU_CYCLE_1) +; GFX11-GISEL-TRUE16-NEXT: s_or_b32 s6, s9, s6 +; GFX11-GISEL-TRUE16-NEXT: s_cmp_lt_i32 s4, 1 +; GFX11-GISEL-TRUE16-NEXT: s_cselect_b32 s2, s6, s2 +; GFX11-GISEL-TRUE16-NEXT: s_and_b32 s6, s2, 7 +; GFX11-GISEL-TRUE16-NEXT: s_lshr_b32 s2, s2, 2 +; GFX11-GISEL-TRUE16-NEXT: s_cmp_eq_u32 s6, 3 +; GFX11-GISEL-TRUE16-NEXT: s_cselect_b32 s7, 1, 0 +; GFX11-GISEL-TRUE16-NEXT: s_cmp_gt_i32 s6, 5 +; GFX11-GISEL-TRUE16-NEXT: s_cselect_b32 s6, 1, 0 +; GFX11-GISEL-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) +; GFX11-GISEL-TRUE16-NEXT: s_or_b32 s6, s7, s6 +; GFX11-GISEL-TRUE16-NEXT: s_add_i32 s2, s2, s6 +; GFX11-GISEL-TRUE16-NEXT: s_cmp_gt_i32 s4, 30 +; GFX11-GISEL-TRUE16-NEXT: s_cselect_b32 s2, 0x7c00, s2 +; GFX11-GISEL-TRUE16-NEXT: s_cmpk_eq_i32 s4, 0x40f +; GFX11-GISEL-TRUE16-NEXT: s_cselect_b32 s2, s5, s2 +; GFX11-GISEL-TRUE16-NEXT: s_lshr_b32 s3, s3, 16 +; GFX11-GISEL-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) +; GFX11-GISEL-TRUE16-NEXT: s_and_b32 s3, s3, 0x8000 +; GFX11-GISEL-TRUE16-NEXT: s_or_b32 s2, s3, s2 +; GFX11-GISEL-TRUE16-NEXT: s_mov_b32 s3, 0x31016000 +; GFX11-GISEL-TRUE16-NEXT: v_mov_b32_e32 v0, s2 +; GFX11-GISEL-TRUE16-NEXT: s_mov_b32 s2, -1 +; GFX11-GISEL-TRUE16-NEXT: buffer_store_b16 v0, off, s[0:3], 0 +; GFX11-GISEL-TRUE16-NEXT: s_endpgm +; +; GFX11-GISEL-FAKE16-LABEL: fptrunc_f64_to_f16: +; GFX11-GISEL-FAKE16: ; %bb.0: ; %entry +; GFX11-GISEL-FAKE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 +; GFX11-GISEL-FAKE16-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-GISEL-FAKE16-NEXT: s_load_b64 s[2:3], s[2:3], 0x0 +; GFX11-GISEL-FAKE16-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-GISEL-FAKE16-NEXT: s_and_b32 s6, s3, 0x1ff +; GFX11-GISEL-FAKE16-NEXT: s_bfe_u32 s4, s3, 0xb0014 +; GFX11-GISEL-FAKE16-NEXT: s_lshr_b32 s5, s3, 8 +; GFX11-GISEL-FAKE16-NEXT: s_or_b32 s2, s6, s2 +; GFX11-GISEL-FAKE16-NEXT: s_addk_i32 s4, 0xfc10 +; GFX11-GISEL-FAKE16-NEXT: s_and_b32 s5, s5, 0xffe +; GFX11-GISEL-FAKE16-NEXT: s_cmp_lg_u32 s2, 0 +; GFX11-GISEL-FAKE16-NEXT: s_cselect_b32 s2, 1, 0 +; GFX11-GISEL-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) +; GFX11-GISEL-FAKE16-NEXT: s_or_b32 s2, s5, s2 +; GFX11-GISEL-FAKE16-NEXT: s_cmp_lg_u32 s2, 0 +; GFX11-GISEL-FAKE16-NEXT: s_cselect_b32 s5, 1, 0 +; GFX11-GISEL-FAKE16-NEXT: s_sub_i32 s6, 1, s4 +; GFX11-GISEL-FAKE16-NEXT: s_or_b32 s8, s2, 0x1000 +; GFX11-GISEL-FAKE16-NEXT: s_max_i32 s6, s6, 0 +; GFX11-GISEL-FAKE16-NEXT: s_lshl_b32 s7, s4, 12 +; GFX11-GISEL-FAKE16-NEXT: s_min_i32 s6, s6, 13 +; GFX11-GISEL-FAKE16-NEXT: s_lshl_b32 s5, s5, 9 +; GFX11-GISEL-FAKE16-NEXT: s_lshr_b32 s9, s8, s6 +; GFX11-GISEL-FAKE16-NEXT: s_or_b32 s2, s2, s7 +; GFX11-GISEL-FAKE16-NEXT: s_lshl_b32 s6, s9, s6 +; GFX11-GISEL-FAKE16-NEXT: s_or_b32 s5, s5, 0x7c00 +; GFX11-GISEL-FAKE16-NEXT: s_cmp_lg_u32 s6, s8 +; GFX11-GISEL-FAKE16-NEXT: s_cselect_b32 s6, 1, 0 +; GFX11-GISEL-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_2) | instid1(SALU_CYCLE_1) +; GFX11-GISEL-FAKE16-NEXT: s_or_b32 s6, s9, s6 +; GFX11-GISEL-FAKE16-NEXT: s_cmp_lt_i32 s4, 1 +; GFX11-GISEL-FAKE16-NEXT: s_cselect_b32 s2, s6, s2 +; GFX11-GISEL-FAKE16-NEXT: s_and_b32 s6, s2, 7 +; GFX11-GISEL-FAKE16-NEXT: s_lshr_b32 s2, s2, 2 +; GFX11-GISEL-FAKE16-NEXT: s_cmp_eq_u32 s6, 3 +; GFX11-GISEL-FAKE16-NEXT: s_cselect_b32 s7, 1, 0 +; GFX11-GISEL-FAKE16-NEXT: s_cmp_gt_i32 s6, 5 +; GFX11-GISEL-FAKE16-NEXT: s_cselect_b32 s6, 1, 0 +; GFX11-GISEL-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) +; GFX11-GISEL-FAKE16-NEXT: s_or_b32 s6, s7, s6 +; GFX11-GISEL-FAKE16-NEXT: s_add_i32 s2, s2, s6 +; GFX11-GISEL-FAKE16-NEXT: s_cmp_gt_i32 s4, 30 +; GFX11-GISEL-FAKE16-NEXT: s_cselect_b32 s2, 0x7c00, s2 +; GFX11-GISEL-FAKE16-NEXT: s_cmpk_eq_i32 s4, 0x40f +; GFX11-GISEL-FAKE16-NEXT: s_cselect_b32 s2, s5, s2 +; GFX11-GISEL-FAKE16-NEXT: s_lshr_b32 s3, s3, 16 +; GFX11-GISEL-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) +; GFX11-GISEL-FAKE16-NEXT: s_and_b32 s3, s3, 0x8000 +; GFX11-GISEL-FAKE16-NEXT: s_or_b32 s2, s3, s2 +; GFX11-GISEL-FAKE16-NEXT: s_mov_b32 s3, 0x31016000 +; GFX11-GISEL-FAKE16-NEXT: v_mov_b32_e32 v0, s2 +; GFX11-GISEL-FAKE16-NEXT: s_mov_b32 s2, -1 +; GFX11-GISEL-FAKE16-NEXT: buffer_store_b16 v0, off, s[0:3], 0 +; GFX11-GISEL-FAKE16-NEXT: s_endpgm + ptr addrspace(1) %r, + ptr addrspace(1) %a) { +entry: + %a.val = load double, ptr addrspace(1) %a + %r.val = fptrunc double %a.val to half + store half %r.val, ptr addrspace(1) %r + ret void +} + +define amdgpu_kernel void @fptrunc_f64_to_f16_afn( +; SI-SDAG-LABEL: fptrunc_f64_to_f16_afn: +; SI-SDAG: ; %bb.0: ; %entry +; SI-SDAG-NEXT: s_load_dwordx4 s[4:7], s[4:5], 0x9 +; SI-SDAG-NEXT: s_mov_b32 s3, 0xf000 +; SI-SDAG-NEXT: s_mov_b32 s2, -1 +; SI-SDAG-NEXT: s_mov_b32 s10, s2 +; SI-SDAG-NEXT: s_mov_b32 s11, s3 +; SI-SDAG-NEXT: s_waitcnt lgkmcnt(0) +; SI-SDAG-NEXT: s_mov_b32 s8, s6 +; SI-SDAG-NEXT: s_mov_b32 s9, s7 +; SI-SDAG-NEXT: buffer_load_dwordx2 v[0:1], off, s[8:11], 0 +; SI-SDAG-NEXT: s_movk_i32 s0, 0x7e00 +; SI-SDAG-NEXT: s_waitcnt vmcnt(0) +; SI-SDAG-NEXT: v_readfirstlane_b32 s1, v1 +; SI-SDAG-NEXT: s_and_b32 s6, s1, 0x1ff +; SI-SDAG-NEXT: s_lshr_b32 s7, s1, 8 +; SI-SDAG-NEXT: s_bfe_u32 s8, s1, 0xb0014 +; SI-SDAG-NEXT: v_or_b32_e32 v0, s6, v0 +; SI-SDAG-NEXT: s_and_b32 s6, s7, 0xffe +; SI-SDAG-NEXT: s_sub_i32 s7, 0x3f1, s8 +; SI-SDAG-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0 +; SI-SDAG-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc +; SI-SDAG-NEXT: v_med3_i32 v1, s7, 0, 13 +; SI-SDAG-NEXT: v_readfirstlane_b32 s7, v0 +; SI-SDAG-NEXT: v_readfirstlane_b32 s9, v1 +; SI-SDAG-NEXT: s_or_b32 s6, s6, s7 +; SI-SDAG-NEXT: s_or_b32 s7, s6, 0x1000 +; SI-SDAG-NEXT: s_lshr_b32 s10, s7, s9 +; SI-SDAG-NEXT: s_lshl_b32 s9, s10, s9 +; SI-SDAG-NEXT: s_cmp_lg_u32 s9, s7 +; SI-SDAG-NEXT: s_cselect_b32 s7, 1, 0 +; SI-SDAG-NEXT: s_addk_i32 s8, 0xfc10 +; SI-SDAG-NEXT: s_or_b32 s7, s10, s7 +; SI-SDAG-NEXT: s_lshl_b32 s9, s8, 12 +; SI-SDAG-NEXT: s_or_b32 s9, s6, s9 +; SI-SDAG-NEXT: s_cmp_lt_i32 s8, 1 +; SI-SDAG-NEXT: s_cselect_b32 s7, s7, s9 +; SI-SDAG-NEXT: s_and_b32 s9, s7, 7 +; SI-SDAG-NEXT: s_cmp_gt_i32 s9, 5 +; SI-SDAG-NEXT: s_cselect_b32 s10, 1, 0 +; SI-SDAG-NEXT: s_cmp_eq_u32 s9, 3 +; SI-SDAG-NEXT: s_cselect_b32 s9, 1, 0 +; SI-SDAG-NEXT: s_lshr_b32 s7, s7, 2 +; SI-SDAG-NEXT: s_or_b32 s9, s9, s10 +; SI-SDAG-NEXT: s_add_i32 s7, s7, s9 +; SI-SDAG-NEXT: s_cmp_lt_i32 s8, 31 +; SI-SDAG-NEXT: s_cselect_b32 s7, s7, 0x7c00 +; SI-SDAG-NEXT: s_cmp_lg_u32 s6, 0 +; SI-SDAG-NEXT: s_cselect_b32 s0, s0, 0x7c00 +; SI-SDAG-NEXT: s_cmpk_eq_i32 s8, 0x40f +; SI-SDAG-NEXT: s_cselect_b32 s0, s0, s7 +; SI-SDAG-NEXT: s_lshr_b32 s1, s1, 16 +; SI-SDAG-NEXT: s_and_b32 s1, s1, 0x8000 +; SI-SDAG-NEXT: s_or_b32 s6, s1, s0 +; SI-SDAG-NEXT: s_mov_b32 s0, s4 +; SI-SDAG-NEXT: s_mov_b32 s1, s5 +; SI-SDAG-NEXT: v_mov_b32_e32 v0, s6 +; SI-SDAG-NEXT: buffer_store_short v0, off, s[0:3], 0 +; SI-SDAG-NEXT: s_endpgm +; +; SI-GISEL-LABEL: fptrunc_f64_to_f16_afn: +; SI-GISEL: ; %bb.0: ; %entry +; SI-GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 +; SI-GISEL-NEXT: s_waitcnt lgkmcnt(0) +; SI-GISEL-NEXT: s_load_dwordx2 s[4:5], s[2:3], 0x0 +; SI-GISEL-NEXT: s_mov_b32 s2, -1 +; SI-GISEL-NEXT: s_waitcnt lgkmcnt(0) +; SI-GISEL-NEXT: v_cvt_f32_f64_e32 v0, s[4:5] +; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0 +; SI-GISEL-NEXT: s_mov_b32 s3, 0xf000 +; SI-GISEL-NEXT: buffer_store_short v0, off, s[0:3], 0 +; SI-GISEL-NEXT: s_endpgm +; +; VI-SDAG-LABEL: fptrunc_f64_to_f16_afn: +; VI-SDAG: ; %bb.0: ; %entry +; VI-SDAG-NEXT: s_load_dwordx4 s[4:7], s[4:5], 0x24 +; VI-SDAG-NEXT: s_mov_b32 s3, 0xf000 +; VI-SDAG-NEXT: s_mov_b32 s2, -1 +; VI-SDAG-NEXT: s_mov_b32 s10, s2 +; VI-SDAG-NEXT: s_mov_b32 s11, s3 +; VI-SDAG-NEXT: s_waitcnt lgkmcnt(0) +; VI-SDAG-NEXT: s_mov_b32 s8, s6 +; VI-SDAG-NEXT: s_mov_b32 s9, s7 +; VI-SDAG-NEXT: buffer_load_dwordx2 v[0:1], off, s[8:11], 0 +; VI-SDAG-NEXT: s_mov_b32 s0, s4 +; VI-SDAG-NEXT: s_mov_b32 s1, s5 +; VI-SDAG-NEXT: s_movk_i32 s6, 0x7e00 +; VI-SDAG-NEXT: s_waitcnt vmcnt(0) +; VI-SDAG-NEXT: v_readfirstlane_b32 s4, v1 +; VI-SDAG-NEXT: s_and_b32 s5, s4, 0x1ff +; VI-SDAG-NEXT: v_or_b32_e32 v0, s5, v0 +; VI-SDAG-NEXT: s_lshr_b32 s7, s4, 8 +; VI-SDAG-NEXT: s_bfe_u32 s8, s4, 0xb0014 +; VI-SDAG-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0 +; VI-SDAG-NEXT: s_and_b32 s5, s7, 0xffe +; VI-SDAG-NEXT: s_sub_i32 s7, 0x3f1, s8 +; VI-SDAG-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc +; VI-SDAG-NEXT: v_med3_i32 v1, s7, 0, 13 +; VI-SDAG-NEXT: v_readfirstlane_b32 s7, v0 +; VI-SDAG-NEXT: s_or_b32 s5, s5, s7 +; VI-SDAG-NEXT: v_readfirstlane_b32 s9, v1 +; VI-SDAG-NEXT: s_or_b32 s7, s5, 0x1000 +; VI-SDAG-NEXT: s_lshr_b32 s10, s7, s9 +; VI-SDAG-NEXT: s_lshl_b32 s9, s10, s9 +; VI-SDAG-NEXT: s_cmp_lg_u32 s9, s7 +; VI-SDAG-NEXT: s_cselect_b32 s7, 1, 0 +; VI-SDAG-NEXT: s_addk_i32 s8, 0xfc10 +; VI-SDAG-NEXT: s_lshl_b32 s9, s8, 12 +; VI-SDAG-NEXT: s_or_b32 s7, s10, s7 +; VI-SDAG-NEXT: s_or_b32 s9, s5, s9 +; VI-SDAG-NEXT: s_cmp_lt_i32 s8, 1 +; VI-SDAG-NEXT: s_cselect_b32 s7, s7, s9 +; VI-SDAG-NEXT: s_and_b32 s9, s7, 7 +; VI-SDAG-NEXT: s_cmp_gt_i32 s9, 5 +; VI-SDAG-NEXT: s_cselect_b32 s10, 1, 0 +; VI-SDAG-NEXT: s_cmp_eq_u32 s9, 3 +; VI-SDAG-NEXT: s_cselect_b32 s9, 1, 0 +; VI-SDAG-NEXT: s_lshr_b32 s7, s7, 2 +; VI-SDAG-NEXT: s_or_b32 s9, s9, s10 +; VI-SDAG-NEXT: s_add_i32 s7, s7, s9 +; VI-SDAG-NEXT: s_cmp_lt_i32 s8, 31 +; VI-SDAG-NEXT: s_cselect_b32 s7, s7, 0x7c00 +; VI-SDAG-NEXT: s_cmp_lg_u32 s5, 0 +; VI-SDAG-NEXT: s_cselect_b32 s5, s6, 0x7c00 +; VI-SDAG-NEXT: s_cmpk_eq_i32 s8, 0x40f +; VI-SDAG-NEXT: s_cselect_b32 s5, s5, s7 +; VI-SDAG-NEXT: s_lshr_b32 s4, s4, 16 +; VI-SDAG-NEXT: s_and_b32 s4, s4, 0x8000 +; VI-SDAG-NEXT: s_or_b32 s4, s4, s5 +; VI-SDAG-NEXT: v_mov_b32_e32 v0, s4 +; VI-SDAG-NEXT: buffer_store_short v0, off, s[0:3], 0 +; VI-SDAG-NEXT: s_endpgm +; +; VI-GISEL-LABEL: fptrunc_f64_to_f16_afn: +; VI-GISEL: ; %bb.0: ; %entry +; VI-GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 +; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0) +; VI-GISEL-NEXT: s_load_dwordx2 s[2:3], s[2:3], 0x0 +; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0) +; VI-GISEL-NEXT: v_cvt_f32_f64_e32 v0, s[2:3] +; VI-GISEL-NEXT: s_mov_b32 s2, -1 +; VI-GISEL-NEXT: s_mov_b32 s3, 0xf000 +; VI-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0 +; VI-GISEL-NEXT: buffer_store_short v0, off, s[0:3], 0 +; VI-GISEL-NEXT: s_endpgm +; +; GFX9-SDAG-LABEL: fptrunc_f64_to_f16_afn: +; GFX9-SDAG: ; %bb.0: ; %entry +; GFX9-SDAG-NEXT: s_load_dwordx4 s[8:11], s[4:5], 0x24 +; GFX9-SDAG-NEXT: s_mov_b32 s3, 0xf000 +; GFX9-SDAG-NEXT: s_mov_b32 s2, -1 +; GFX9-SDAG-NEXT: s_mov_b32 s6, s2 +; GFX9-SDAG-NEXT: s_mov_b32 s7, s3 +; GFX9-SDAG-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-SDAG-NEXT: s_mov_b32 s4, s10 +; GFX9-SDAG-NEXT: s_mov_b32 s5, s11 +; GFX9-SDAG-NEXT: buffer_load_dwordx2 v[0:1], off, s[4:7], 0 +; GFX9-SDAG-NEXT: s_mov_b32 s0, s8 +; GFX9-SDAG-NEXT: s_mov_b32 s1, s9 +; GFX9-SDAG-NEXT: s_movk_i32 s4, 0x7e00 +; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX9-SDAG-NEXT: v_readfirstlane_b32 s5, v1 +; GFX9-SDAG-NEXT: s_and_b32 s6, s5, 0x1ff +; GFX9-SDAG-NEXT: v_or_b32_e32 v0, s6, v0 +; GFX9-SDAG-NEXT: s_lshr_b32 s7, s5, 8 +; GFX9-SDAG-NEXT: s_bfe_u32 s8, s5, 0xb0014 +; GFX9-SDAG-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0 +; GFX9-SDAG-NEXT: s_and_b32 s6, s7, 0xffe +; GFX9-SDAG-NEXT: s_sub_i32 s7, 0x3f1, s8 +; GFX9-SDAG-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc +; GFX9-SDAG-NEXT: v_med3_i32 v1, s7, 0, 13 +; GFX9-SDAG-NEXT: v_readfirstlane_b32 s7, v0 +; GFX9-SDAG-NEXT: s_or_b32 s6, s6, s7 +; GFX9-SDAG-NEXT: v_readfirstlane_b32 s9, v1 +; GFX9-SDAG-NEXT: s_or_b32 s7, s6, 0x1000 +; GFX9-SDAG-NEXT: s_lshr_b32 s10, s7, s9 +; GFX9-SDAG-NEXT: s_lshl_b32 s9, s10, s9 +; GFX9-SDAG-NEXT: s_cmp_lg_u32 s9, s7 +; GFX9-SDAG-NEXT: s_cselect_b32 s7, 1, 0 +; GFX9-SDAG-NEXT: s_addk_i32 s8, 0xfc10 +; GFX9-SDAG-NEXT: s_lshl_b32 s9, s8, 12 +; GFX9-SDAG-NEXT: s_or_b32 s7, s10, s7 +; GFX9-SDAG-NEXT: s_or_b32 s9, s6, s9 +; GFX9-SDAG-NEXT: s_cmp_lt_i32 s8, 1 +; GFX9-SDAG-NEXT: s_cselect_b32 s7, s7, s9 +; GFX9-SDAG-NEXT: s_and_b32 s9, s7, 7 +; GFX9-SDAG-NEXT: s_cmp_gt_i32 s9, 5 +; GFX9-SDAG-NEXT: s_cselect_b32 s10, 1, 0 +; GFX9-SDAG-NEXT: s_cmp_eq_u32 s9, 3 +; GFX9-SDAG-NEXT: s_cselect_b32 s9, 1, 0 +; GFX9-SDAG-NEXT: s_lshr_b32 s7, s7, 2 +; GFX9-SDAG-NEXT: s_or_b32 s9, s9, s10 +; GFX9-SDAG-NEXT: s_add_i32 s7, s7, s9 +; GFX9-SDAG-NEXT: s_cmp_lt_i32 s8, 31 +; GFX9-SDAG-NEXT: s_cselect_b32 s7, s7, 0x7c00 +; GFX9-SDAG-NEXT: s_cmp_lg_u32 s6, 0 +; GFX9-SDAG-NEXT: s_cselect_b32 s4, s4, 0x7c00 +; GFX9-SDAG-NEXT: s_cmpk_eq_i32 s8, 0x40f +; GFX9-SDAG-NEXT: s_cselect_b32 s4, s4, s7 +; GFX9-SDAG-NEXT: s_lshr_b32 s5, s5, 16 +; GFX9-SDAG-NEXT: s_and_b32 s5, s5, 0x8000 +; GFX9-SDAG-NEXT: s_or_b32 s4, s5, s4 +; GFX9-SDAG-NEXT: v_mov_b32_e32 v0, s4 +; GFX9-SDAG-NEXT: buffer_store_short v0, off, s[0:3], 0 +; GFX9-SDAG-NEXT: s_endpgm +; +; GFX9-GISEL-LABEL: fptrunc_f64_to_f16_afn: +; GFX9-GISEL: ; %bb.0: ; %entry +; GFX9-GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 +; GFX9-GISEL-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-GISEL-NEXT: s_load_dwordx2 s[2:3], s[2:3], 0x0 +; GFX9-GISEL-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-GISEL-NEXT: v_cvt_f32_f64_e32 v0, s[2:3] +; GFX9-GISEL-NEXT: s_mov_b32 s2, -1 +; GFX9-GISEL-NEXT: s_mov_b32 s3, 0xf000 +; GFX9-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX9-GISEL-NEXT: buffer_store_short v0, off, s[0:3], 0 +; GFX9-GISEL-NEXT: s_endpgm +; +; GFX950-SDAG-LABEL: fptrunc_f64_to_f16_afn: +; GFX950-SDAG: ; %bb.0: ; %entry +; GFX950-SDAG-NEXT: s_load_dwordx4 s[8:11], s[4:5], 0x24 +; GFX950-SDAG-NEXT: s_mov_b32 s3, 0xf000 +; GFX950-SDAG-NEXT: s_mov_b32 s2, -1 +; GFX950-SDAG-NEXT: s_mov_b32 s6, s2 +; GFX950-SDAG-NEXT: s_mov_b32 s7, s3 +; GFX950-SDAG-NEXT: s_waitcnt lgkmcnt(0) +; GFX950-SDAG-NEXT: s_mov_b32 s4, s10 +; GFX950-SDAG-NEXT: s_mov_b32 s5, s11 +; GFX950-SDAG-NEXT: buffer_load_dwordx2 v[0:1], off, s[4:7], 0 +; GFX950-SDAG-NEXT: s_mov_b32 s0, s8 +; GFX950-SDAG-NEXT: s_mov_b32 s1, s9 +; GFX950-SDAG-NEXT: s_movk_i32 s4, 0x7e00 +; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX950-SDAG-NEXT: v_readfirstlane_b32 s5, v1 +; GFX950-SDAG-NEXT: s_and_b32 s6, s5, 0x1ff +; GFX950-SDAG-NEXT: v_or_b32_e32 v0, s6, v0 +; GFX950-SDAG-NEXT: s_lshr_b32 s7, s5, 8 +; GFX950-SDAG-NEXT: s_bfe_u32 s8, s5, 0xb0014 +; GFX950-SDAG-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0 +; GFX950-SDAG-NEXT: s_and_b32 s6, s7, 0xffe +; GFX950-SDAG-NEXT: s_sub_i32 s7, 0x3f1, s8 +; GFX950-SDAG-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc +; GFX950-SDAG-NEXT: v_med3_i32 v1, s7, 0, 13 +; GFX950-SDAG-NEXT: v_readfirstlane_b32 s7, v0 +; GFX950-SDAG-NEXT: s_or_b32 s6, s6, s7 +; GFX950-SDAG-NEXT: v_readfirstlane_b32 s9, v1 +; GFX950-SDAG-NEXT: s_or_b32 s7, s6, 0x1000 +; GFX950-SDAG-NEXT: s_lshr_b32 s10, s7, s9 +; GFX950-SDAG-NEXT: s_lshl_b32 s9, s10, s9 +; GFX950-SDAG-NEXT: s_cmp_lg_u32 s9, s7 +; GFX950-SDAG-NEXT: s_cselect_b32 s7, 1, 0 +; GFX950-SDAG-NEXT: s_addk_i32 s8, 0xfc10 +; GFX950-SDAG-NEXT: s_lshl_b32 s9, s8, 12 +; GFX950-SDAG-NEXT: s_or_b32 s7, s10, s7 +; GFX950-SDAG-NEXT: s_or_b32 s9, s6, s9 +; GFX950-SDAG-NEXT: s_cmp_lt_i32 s8, 1 +; GFX950-SDAG-NEXT: s_cselect_b32 s7, s7, s9 +; GFX950-SDAG-NEXT: s_and_b32 s9, s7, 7 +; GFX950-SDAG-NEXT: s_cmp_gt_i32 s9, 5 +; GFX950-SDAG-NEXT: s_cselect_b32 s10, 1, 0 +; GFX950-SDAG-NEXT: s_cmp_eq_u32 s9, 3 +; GFX950-SDAG-NEXT: s_cselect_b32 s9, 1, 0 +; GFX950-SDAG-NEXT: s_lshr_b32 s7, s7, 2 +; GFX950-SDAG-NEXT: s_or_b32 s9, s9, s10 +; GFX950-SDAG-NEXT: s_add_i32 s7, s7, s9 +; GFX950-SDAG-NEXT: s_cmp_lt_i32 s8, 31 +; GFX950-SDAG-NEXT: s_cselect_b32 s7, s7, 0x7c00 +; GFX950-SDAG-NEXT: s_cmp_lg_u32 s6, 0 +; GFX950-SDAG-NEXT: s_cselect_b32 s4, s4, 0x7c00 +; GFX950-SDAG-NEXT: s_cmpk_eq_i32 s8, 0x40f +; GFX950-SDAG-NEXT: s_cselect_b32 s4, s4, s7 +; GFX950-SDAG-NEXT: s_lshr_b32 s5, s5, 16 +; GFX950-SDAG-NEXT: s_and_b32 s5, s5, 0x8000 +; GFX950-SDAG-NEXT: s_or_b32 s4, s5, s4 +; GFX950-SDAG-NEXT: v_mov_b32_e32 v0, s4 +; GFX950-SDAG-NEXT: buffer_store_short v0, off, s[0:3], 0 +; GFX950-SDAG-NEXT: s_endpgm +; +; GFX950-GISEL-LABEL: fptrunc_f64_to_f16_afn: +; GFX950-GISEL: ; %bb.0: ; %entry +; GFX950-GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 +; GFX950-GISEL-NEXT: s_waitcnt lgkmcnt(0) +; GFX950-GISEL-NEXT: s_load_dwordx2 s[2:3], s[2:3], 0x0 +; GFX950-GISEL-NEXT: s_waitcnt lgkmcnt(0) +; GFX950-GISEL-NEXT: v_cvt_f32_f64_e32 v0, s[2:3] +; GFX950-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX950-GISEL-NEXT: s_mov_b32 s2, -1 +; GFX950-GISEL-NEXT: s_mov_b32 s3, 0xf000 +; GFX950-GISEL-NEXT: buffer_store_short v0, off, s[0:3], 0 +; GFX950-GISEL-NEXT: s_endpgm +; +; GFX11-SDAG-TRUE16-LABEL: fptrunc_f64_to_f16_afn: +; GFX11-SDAG-TRUE16: ; %bb.0: ; %entry +; GFX11-SDAG-TRUE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 +; GFX11-SDAG-TRUE16-NEXT: s_mov_b32 s6, -1 +; GFX11-SDAG-TRUE16-NEXT: s_mov_b32 s7, 0x31016000 +; GFX11-SDAG-TRUE16-NEXT: s_mov_b32 s10, s6 +; GFX11-SDAG-TRUE16-NEXT: s_mov_b32 s11, s7 +; GFX11-SDAG-TRUE16-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-SDAG-TRUE16-NEXT: s_mov_b32 s8, s2 +; GFX11-SDAG-TRUE16-NEXT: s_mov_b32 s9, s3 +; GFX11-SDAG-TRUE16-NEXT: buffer_load_b64 v[0:1], off, s[8:11], 0 +; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) +; GFX11-SDAG-TRUE16-NEXT: v_readfirstlane_b32 s2, v1 +; GFX11-SDAG-TRUE16-NEXT: s_and_b32 s3, s2, 0x1ff +; GFX11-SDAG-TRUE16-NEXT: s_lshr_b32 s5, s2, 8 +; GFX11-SDAG-TRUE16-NEXT: v_or_b32_e32 v0, s3, v0 +; GFX11-SDAG-TRUE16-NEXT: s_bfe_u32 s3, s2, 0xb0014 +; GFX11-SDAG-TRUE16-NEXT: s_and_b32 s5, s5, 0xffe +; GFX11-SDAG-TRUE16-NEXT: s_sub_i32 s4, 0x3f1, s3 +; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_2) +; GFX11-SDAG-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0 +; GFX11-SDAG-TRUE16-NEXT: v_med3_i32 v1, s4, 0, 13 +; GFX11-SDAG-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo +; GFX11-SDAG-TRUE16-NEXT: v_readfirstlane_b32 s8, v1 +; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(SALU_CYCLE_1) +; GFX11-SDAG-TRUE16-NEXT: v_readfirstlane_b32 s4, v0 +; GFX11-SDAG-TRUE16-NEXT: s_or_b32 s4, s5, s4 +; GFX11-SDAG-TRUE16-NEXT: s_or_b32 s5, s4, 0x1000 +; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) +; GFX11-SDAG-TRUE16-NEXT: s_lshr_b32 s9, s5, s8 +; GFX11-SDAG-TRUE16-NEXT: s_lshl_b32 s8, s9, s8 +; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_4) | instid1(SALU_CYCLE_1) +; GFX11-SDAG-TRUE16-NEXT: s_cmp_lg_u32 s8, s5 +; GFX11-SDAG-TRUE16-NEXT: s_cselect_b32 s5, 1, 0 +; GFX11-SDAG-TRUE16-NEXT: s_addk_i32 s3, 0xfc10 +; GFX11-SDAG-TRUE16-NEXT: s_or_b32 s5, s9, s5 +; GFX11-SDAG-TRUE16-NEXT: s_lshl_b32 s8, s3, 12 +; GFX11-SDAG-TRUE16-NEXT: s_or_b32 s8, s4, s8 +; GFX11-SDAG-TRUE16-NEXT: s_cmp_lt_i32 s3, 1 +; GFX11-SDAG-TRUE16-NEXT: s_cselect_b32 s5, s5, s8 +; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) +; GFX11-SDAG-TRUE16-NEXT: s_and_b32 s8, s5, 7 +; GFX11-SDAG-TRUE16-NEXT: s_cmp_gt_i32 s8, 5 +; GFX11-SDAG-TRUE16-NEXT: s_cselect_b32 s9, 1, 0 +; GFX11-SDAG-TRUE16-NEXT: s_cmp_eq_u32 s8, 3 +; GFX11-SDAG-TRUE16-NEXT: s_cselect_b32 s8, 1, 0 +; GFX11-SDAG-TRUE16-NEXT: s_lshr_b32 s5, s5, 2 +; GFX11-SDAG-TRUE16-NEXT: s_or_b32 s8, s8, s9 +; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; GFX11-SDAG-TRUE16-NEXT: s_add_i32 s5, s5, s8 +; GFX11-SDAG-TRUE16-NEXT: s_cmp_lt_i32 s3, 31 +; GFX11-SDAG-TRUE16-NEXT: s_movk_i32 s8, 0x7e00 +; GFX11-SDAG-TRUE16-NEXT: s_cselect_b32 s5, s5, 0x7c00 +; GFX11-SDAG-TRUE16-NEXT: s_cmp_lg_u32 s4, 0 +; GFX11-SDAG-TRUE16-NEXT: s_cselect_b32 s4, s8, 0x7c00 +; GFX11-SDAG-TRUE16-NEXT: s_cmpk_eq_i32 s3, 0x40f +; GFX11-SDAG-TRUE16-NEXT: s_cselect_b32 s3, s4, s5 +; GFX11-SDAG-TRUE16-NEXT: s_lshr_b32 s2, s2, 16 +; GFX11-SDAG-TRUE16-NEXT: s_mov_b32 s4, s0 +; GFX11-SDAG-TRUE16-NEXT: s_and_b32 s2, s2, 0x8000 +; GFX11-SDAG-TRUE16-NEXT: s_mov_b32 s5, s1 +; GFX11-SDAG-TRUE16-NEXT: s_or_b32 s2, s2, s3 +; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; GFX11-SDAG-TRUE16-NEXT: v_mov_b32_e32 v0, s2 +; GFX11-SDAG-TRUE16-NEXT: buffer_store_b16 v0, off, s[4:7], 0 +; GFX11-SDAG-TRUE16-NEXT: s_endpgm +; +; GFX11-SDAG-FAKE16-LABEL: fptrunc_f64_to_f16_afn: +; GFX11-SDAG-FAKE16: ; %bb.0: ; %entry +; GFX11-SDAG-FAKE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 +; GFX11-SDAG-FAKE16-NEXT: s_mov_b32 s6, -1 +; GFX11-SDAG-FAKE16-NEXT: s_mov_b32 s7, 0x31016000 +; GFX11-SDAG-FAKE16-NEXT: s_mov_b32 s10, s6 +; GFX11-SDAG-FAKE16-NEXT: s_mov_b32 s11, s7 +; GFX11-SDAG-FAKE16-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-SDAG-FAKE16-NEXT: s_mov_b32 s8, s2 +; GFX11-SDAG-FAKE16-NEXT: s_mov_b32 s9, s3 +; GFX11-SDAG-FAKE16-NEXT: buffer_load_b64 v[0:1], off, s[8:11], 0 +; GFX11-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) +; GFX11-SDAG-FAKE16-NEXT: v_readfirstlane_b32 s2, v1 +; GFX11-SDAG-FAKE16-NEXT: s_and_b32 s3, s2, 0x1ff +; GFX11-SDAG-FAKE16-NEXT: s_lshr_b32 s5, s2, 8 +; GFX11-SDAG-FAKE16-NEXT: v_or_b32_e32 v0, s3, v0 +; GFX11-SDAG-FAKE16-NEXT: s_bfe_u32 s3, s2, 0xb0014 +; GFX11-SDAG-FAKE16-NEXT: s_and_b32 s5, s5, 0xffe +; GFX11-SDAG-FAKE16-NEXT: s_sub_i32 s4, 0x3f1, s3 +; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_2) +; GFX11-SDAG-FAKE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0 +; GFX11-SDAG-FAKE16-NEXT: v_med3_i32 v1, s4, 0, 13 +; GFX11-SDAG-FAKE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo +; GFX11-SDAG-FAKE16-NEXT: v_readfirstlane_b32 s8, v1 +; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(SALU_CYCLE_1) +; GFX11-SDAG-FAKE16-NEXT: v_readfirstlane_b32 s4, v0 +; GFX11-SDAG-FAKE16-NEXT: s_or_b32 s4, s5, s4 +; GFX11-SDAG-FAKE16-NEXT: s_or_b32 s5, s4, 0x1000 +; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) +; GFX11-SDAG-FAKE16-NEXT: s_lshr_b32 s9, s5, s8 +; GFX11-SDAG-FAKE16-NEXT: s_lshl_b32 s8, s9, s8 +; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_4) | instid1(SALU_CYCLE_1) +; GFX11-SDAG-FAKE16-NEXT: s_cmp_lg_u32 s8, s5 +; GFX11-SDAG-FAKE16-NEXT: s_cselect_b32 s5, 1, 0 +; GFX11-SDAG-FAKE16-NEXT: s_addk_i32 s3, 0xfc10 +; GFX11-SDAG-FAKE16-NEXT: s_or_b32 s5, s9, s5 +; GFX11-SDAG-FAKE16-NEXT: s_lshl_b32 s8, s3, 12 +; GFX11-SDAG-FAKE16-NEXT: s_or_b32 s8, s4, s8 +; GFX11-SDAG-FAKE16-NEXT: s_cmp_lt_i32 s3, 1 +; GFX11-SDAG-FAKE16-NEXT: s_cselect_b32 s5, s5, s8 +; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) +; GFX11-SDAG-FAKE16-NEXT: s_and_b32 s8, s5, 7 +; GFX11-SDAG-FAKE16-NEXT: s_cmp_gt_i32 s8, 5 +; GFX11-SDAG-FAKE16-NEXT: s_cselect_b32 s9, 1, 0 +; GFX11-SDAG-FAKE16-NEXT: s_cmp_eq_u32 s8, 3 +; GFX11-SDAG-FAKE16-NEXT: s_cselect_b32 s8, 1, 0 +; GFX11-SDAG-FAKE16-NEXT: s_lshr_b32 s5, s5, 2 +; GFX11-SDAG-FAKE16-NEXT: s_or_b32 s8, s8, s9 +; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; GFX11-SDAG-FAKE16-NEXT: s_add_i32 s5, s5, s8 +; GFX11-SDAG-FAKE16-NEXT: s_cmp_lt_i32 s3, 31 +; GFX11-SDAG-FAKE16-NEXT: s_movk_i32 s8, 0x7e00 +; GFX11-SDAG-FAKE16-NEXT: s_cselect_b32 s5, s5, 0x7c00 +; GFX11-SDAG-FAKE16-NEXT: s_cmp_lg_u32 s4, 0 +; GFX11-SDAG-FAKE16-NEXT: s_cselect_b32 s4, s8, 0x7c00 +; GFX11-SDAG-FAKE16-NEXT: s_cmpk_eq_i32 s3, 0x40f +; GFX11-SDAG-FAKE16-NEXT: s_cselect_b32 s3, s4, s5 +; GFX11-SDAG-FAKE16-NEXT: s_lshr_b32 s2, s2, 16 +; GFX11-SDAG-FAKE16-NEXT: s_mov_b32 s4, s0 +; GFX11-SDAG-FAKE16-NEXT: s_and_b32 s2, s2, 0x8000 +; GFX11-SDAG-FAKE16-NEXT: s_mov_b32 s5, s1 +; GFX11-SDAG-FAKE16-NEXT: s_or_b32 s2, s2, s3 +; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; GFX11-SDAG-FAKE16-NEXT: v_mov_b32_e32 v0, s2 +; GFX11-SDAG-FAKE16-NEXT: buffer_store_b16 v0, off, s[4:7], 0 +; GFX11-SDAG-FAKE16-NEXT: s_endpgm +; +; GFX11-GISEL-TRUE16-LABEL: fptrunc_f64_to_f16_afn: +; GFX11-GISEL-TRUE16: ; %bb.0: ; %entry +; GFX11-GISEL-TRUE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 +; GFX11-GISEL-TRUE16-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-GISEL-TRUE16-NEXT: s_load_b64 s[2:3], s[2:3], 0x0 +; GFX11-GISEL-TRUE16-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-GISEL-TRUE16-NEXT: v_cvt_f32_f64_e32 v0, s[2:3] ; GFX11-GISEL-TRUE16-NEXT: s_mov_b32 s2, -1 ; GFX11-GISEL-TRUE16-NEXT: s_mov_b32 s3, 0x31016000 @@ -384,7 +1539,7 @@ define amdgpu_kernel void @fptrunc_f64_to_f16( ; GFX11-GISEL-TRUE16-NEXT: buffer_store_b16 v0, off, s[0:3], 0 ; GFX11-GISEL-TRUE16-NEXT: s_endpgm ; -; GFX11-GISEL-FAKE16-LABEL: fptrunc_f64_to_f16: +; GFX11-GISEL-FAKE16-LABEL: fptrunc_f64_to_f16_afn: ; GFX11-GISEL-FAKE16: ; %bb.0: ; %entry ; GFX11-GISEL-FAKE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 ; GFX11-GISEL-FAKE16-NEXT: s_waitcnt lgkmcnt(0) @@ -401,7 +1556,7 @@ define amdgpu_kernel void @fptrunc_f64_to_f16( ptr addrspace(1) %a) { entry: %a.val = load double, ptr addrspace(1) %a - %r.val = fptrunc double %a.val to half + %r.val = fptrunc afn double %a.val to half store half %r.val, ptr addrspace(1) %r ret void } @@ -626,25 +1781,106 @@ entry: define amdgpu_kernel void @fptrunc_v2f64_to_v2f16( ; SI-SDAG-LABEL: fptrunc_v2f64_to_v2f16: ; SI-SDAG: ; %bb.0: ; %entry -; SI-SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 -; SI-SDAG-NEXT: s_mov_b32 s7, 0xf000 -; SI-SDAG-NEXT: s_mov_b32 s6, -1 -; SI-SDAG-NEXT: s_mov_b32 s10, s6 -; SI-SDAG-NEXT: s_mov_b32 s11, s7 +; SI-SDAG-NEXT: s_load_dwordx4 s[4:7], s[4:5], 0x9 +; SI-SDAG-NEXT: s_mov_b32 s3, 0xf000 +; SI-SDAG-NEXT: s_mov_b32 s2, -1 +; SI-SDAG-NEXT: s_mov_b32 s10, s2 +; SI-SDAG-NEXT: s_mov_b32 s11, s3 ; SI-SDAG-NEXT: s_waitcnt lgkmcnt(0) -; SI-SDAG-NEXT: s_mov_b32 s8, s2 -; SI-SDAG-NEXT: s_mov_b32 s9, s3 +; SI-SDAG-NEXT: s_mov_b32 s8, s6 +; SI-SDAG-NEXT: s_mov_b32 s9, s7 ; SI-SDAG-NEXT: buffer_load_dwordx4 v[0:3], off, s[8:11], 0 -; SI-SDAG-NEXT: s_mov_b32 s4, s0 -; SI-SDAG-NEXT: s_mov_b32 s5, s1 +; SI-SDAG-NEXT: s_movk_i32 s0, 0x7e00 ; SI-SDAG-NEXT: s_waitcnt vmcnt(0) -; SI-SDAG-NEXT: v_cvt_f32_f64_e32 v2, v[2:3] -; SI-SDAG-NEXT: v_cvt_f32_f64_e32 v0, v[0:1] -; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v2 -; SI-SDAG-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; SI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 -; SI-SDAG-NEXT: v_or_b32_e32 v0, v0, v1 -; SI-SDAG-NEXT: buffer_store_dword v0, off, s[4:7], 0 +; SI-SDAG-NEXT: v_readfirstlane_b32 s1, v3 +; SI-SDAG-NEXT: v_readfirstlane_b32 s6, v1 +; SI-SDAG-NEXT: s_and_b32 s7, s1, 0x1ff +; SI-SDAG-NEXT: s_lshr_b32 s8, s1, 8 +; SI-SDAG-NEXT: s_bfe_u32 s9, s1, 0xb0014 +; SI-SDAG-NEXT: v_or_b32_e32 v1, s7, v2 +; SI-SDAG-NEXT: s_and_b32 s7, s8, 0xffe +; SI-SDAG-NEXT: s_sub_i32 s8, 0x3f1, s9 +; SI-SDAG-NEXT: v_cmp_ne_u32_e32 vcc, 0, v1 +; SI-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; SI-SDAG-NEXT: v_med3_i32 v2, s8, 0, 13 +; SI-SDAG-NEXT: v_readfirstlane_b32 s8, v1 +; SI-SDAG-NEXT: v_readfirstlane_b32 s10, v2 +; SI-SDAG-NEXT: s_or_b32 s7, s7, s8 +; SI-SDAG-NEXT: s_or_b32 s8, s7, 0x1000 +; SI-SDAG-NEXT: s_lshr_b32 s11, s8, s10 +; SI-SDAG-NEXT: s_lshl_b32 s10, s11, s10 +; SI-SDAG-NEXT: s_cmp_lg_u32 s10, s8 +; SI-SDAG-NEXT: s_cselect_b32 s8, 1, 0 +; SI-SDAG-NEXT: s_addk_i32 s9, 0xfc10 +; SI-SDAG-NEXT: s_or_b32 s8, s11, s8 +; SI-SDAG-NEXT: s_lshl_b32 s10, s9, 12 +; SI-SDAG-NEXT: s_or_b32 s10, s7, s10 +; SI-SDAG-NEXT: s_cmp_lt_i32 s9, 1 +; SI-SDAG-NEXT: s_cselect_b32 s8, s8, s10 +; SI-SDAG-NEXT: s_and_b32 s10, s8, 7 +; SI-SDAG-NEXT: s_cmp_gt_i32 s10, 5 +; SI-SDAG-NEXT: s_cselect_b32 s11, 1, 0 +; SI-SDAG-NEXT: s_cmp_eq_u32 s10, 3 +; SI-SDAG-NEXT: s_cselect_b32 s10, 1, 0 +; SI-SDAG-NEXT: s_lshr_b32 s8, s8, 2 +; SI-SDAG-NEXT: s_or_b32 s10, s10, s11 +; SI-SDAG-NEXT: s_add_i32 s8, s8, s10 +; SI-SDAG-NEXT: s_cmp_lt_i32 s9, 31 +; SI-SDAG-NEXT: s_cselect_b32 s8, s8, 0x7c00 +; SI-SDAG-NEXT: s_cmp_lg_u32 s7, 0 +; SI-SDAG-NEXT: s_cselect_b32 s7, s0, 0x7c00 +; SI-SDAG-NEXT: s_cmpk_eq_i32 s9, 0x40f +; SI-SDAG-NEXT: s_cselect_b32 s7, s7, s8 +; SI-SDAG-NEXT: s_lshr_b32 s1, s1, 16 +; SI-SDAG-NEXT: s_and_b32 s8, s6, 0x1ff +; SI-SDAG-NEXT: s_lshr_b32 s9, s6, 8 +; SI-SDAG-NEXT: s_bfe_u32 s10, s6, 0xb0014 +; SI-SDAG-NEXT: s_and_b32 s1, s1, 0x8000 +; SI-SDAG-NEXT: v_or_b32_e32 v0, s8, v0 +; SI-SDAG-NEXT: s_and_b32 s8, s9, 0xffe +; SI-SDAG-NEXT: s_sub_i32 s9, 0x3f1, s10 +; SI-SDAG-NEXT: s_or_b32 s1, s1, s7 +; SI-SDAG-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0 +; SI-SDAG-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc +; SI-SDAG-NEXT: v_med3_i32 v1, s9, 0, 13 +; SI-SDAG-NEXT: s_lshl_b32 s1, s1, 16 +; SI-SDAG-NEXT: v_readfirstlane_b32 s7, v0 +; SI-SDAG-NEXT: v_readfirstlane_b32 s9, v1 +; SI-SDAG-NEXT: s_or_b32 s7, s8, s7 +; SI-SDAG-NEXT: s_or_b32 s8, s7, 0x1000 +; SI-SDAG-NEXT: s_lshr_b32 s11, s8, s9 +; SI-SDAG-NEXT: s_lshl_b32 s9, s11, s9 +; SI-SDAG-NEXT: s_cmp_lg_u32 s9, s8 +; SI-SDAG-NEXT: s_cselect_b32 s8, 1, 0 +; SI-SDAG-NEXT: s_addk_i32 s10, 0xfc10 +; SI-SDAG-NEXT: s_or_b32 s8, s11, s8 +; SI-SDAG-NEXT: s_lshl_b32 s9, s10, 12 +; SI-SDAG-NEXT: s_or_b32 s9, s7, s9 +; SI-SDAG-NEXT: s_cmp_lt_i32 s10, 1 +; SI-SDAG-NEXT: s_cselect_b32 s8, s8, s9 +; SI-SDAG-NEXT: s_and_b32 s9, s8, 7 +; SI-SDAG-NEXT: s_cmp_gt_i32 s9, 5 +; SI-SDAG-NEXT: s_cselect_b32 s11, 1, 0 +; SI-SDAG-NEXT: s_cmp_eq_u32 s9, 3 +; SI-SDAG-NEXT: s_cselect_b32 s9, 1, 0 +; SI-SDAG-NEXT: s_lshr_b32 s8, s8, 2 +; SI-SDAG-NEXT: s_or_b32 s9, s9, s11 +; SI-SDAG-NEXT: s_add_i32 s8, s8, s9 +; SI-SDAG-NEXT: s_cmp_lt_i32 s10, 31 +; SI-SDAG-NEXT: s_cselect_b32 s8, s8, 0x7c00 +; SI-SDAG-NEXT: s_cmp_lg_u32 s7, 0 +; SI-SDAG-NEXT: s_cselect_b32 s0, s0, 0x7c00 +; SI-SDAG-NEXT: s_cmpk_eq_i32 s10, 0x40f +; SI-SDAG-NEXT: s_cselect_b32 s0, s0, s8 +; SI-SDAG-NEXT: s_lshr_b32 s6, s6, 16 +; SI-SDAG-NEXT: s_and_b32 s6, s6, 0x8000 +; SI-SDAG-NEXT: s_or_b32 s0, s6, s0 +; SI-SDAG-NEXT: s_and_b32 s0, s0, 0xffff +; SI-SDAG-NEXT: s_or_b32 s6, s0, s1 +; SI-SDAG-NEXT: s_mov_b32 s0, s4 +; SI-SDAG-NEXT: s_mov_b32 s1, s5 +; SI-SDAG-NEXT: v_mov_b32_e32 v0, s6 +; SI-SDAG-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; SI-SDAG-NEXT: s_endpgm ; ; SI-GISEL-LABEL: fptrunc_v2f64_to_v2f16: @@ -654,6 +1890,1251 @@ define amdgpu_kernel void @fptrunc_v2f64_to_v2f16( ; SI-GISEL-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x0 ; SI-GISEL-NEXT: s_mov_b32 s2, -1 ; SI-GISEL-NEXT: s_waitcnt lgkmcnt(0) +; SI-GISEL-NEXT: s_bfe_u32 s3, s5, 0xb0014 +; SI-GISEL-NEXT: s_lshr_b32 s8, s5, 8 +; SI-GISEL-NEXT: s_and_b32 s9, s5, 0x1ff +; SI-GISEL-NEXT: s_addk_i32 s3, 0xfc10 +; SI-GISEL-NEXT: s_and_b32 s8, s8, 0xffe +; SI-GISEL-NEXT: s_or_b32 s4, s9, s4 +; SI-GISEL-NEXT: s_cmp_lg_u32 s4, 0 +; SI-GISEL-NEXT: s_cselect_b32 s4, 1, 0 +; SI-GISEL-NEXT: s_or_b32 s4, s8, s4 +; SI-GISEL-NEXT: s_cmp_lg_u32 s4, 0 +; SI-GISEL-NEXT: s_cselect_b32 s8, 1, 0 +; SI-GISEL-NEXT: s_lshl_b32 s8, s8, 9 +; SI-GISEL-NEXT: s_lshl_b32 s9, s3, 12 +; SI-GISEL-NEXT: s_sub_i32 s10, 1, s3 +; SI-GISEL-NEXT: s_or_b32 s11, s4, 0x1000 +; SI-GISEL-NEXT: s_or_b32 s8, s8, 0x7c00 +; SI-GISEL-NEXT: s_or_b32 s4, s4, s9 +; SI-GISEL-NEXT: s_max_i32 s9, s10, 0 +; SI-GISEL-NEXT: s_min_i32 s9, s9, 13 +; SI-GISEL-NEXT: s_lshr_b32 s10, s11, s9 +; SI-GISEL-NEXT: s_lshl_b32 s9, s10, s9 +; SI-GISEL-NEXT: s_cmp_lg_u32 s9, s11 +; SI-GISEL-NEXT: s_cselect_b32 s9, 1, 0 +; SI-GISEL-NEXT: s_or_b32 s9, s10, s9 +; SI-GISEL-NEXT: s_cmp_lt_i32 s3, 1 +; SI-GISEL-NEXT: s_cselect_b32 s4, s9, s4 +; SI-GISEL-NEXT: s_and_b32 s9, s4, 7 +; SI-GISEL-NEXT: s_lshr_b32 s4, s4, 2 +; SI-GISEL-NEXT: s_cmp_eq_u32 s9, 3 +; SI-GISEL-NEXT: s_cselect_b32 s10, 1, 0 +; SI-GISEL-NEXT: s_cmp_gt_i32 s9, 5 +; SI-GISEL-NEXT: s_cselect_b32 s9, 1, 0 +; SI-GISEL-NEXT: s_or_b32 s9, s10, s9 +; SI-GISEL-NEXT: s_add_i32 s4, s4, s9 +; SI-GISEL-NEXT: s_cmp_gt_i32 s3, 30 +; SI-GISEL-NEXT: s_cselect_b32 s4, 0x7c00, s4 +; SI-GISEL-NEXT: s_cmpk_eq_i32 s3, 0x40f +; SI-GISEL-NEXT: s_cselect_b32 s3, s8, s4 +; SI-GISEL-NEXT: s_lshr_b32 s4, s5, 16 +; SI-GISEL-NEXT: s_bfe_u32 s5, s7, 0xb0014 +; SI-GISEL-NEXT: s_lshr_b32 s8, s7, 8 +; SI-GISEL-NEXT: s_and_b32 s9, s7, 0x1ff +; SI-GISEL-NEXT: s_and_b32 s4, s4, 0x8000 +; SI-GISEL-NEXT: s_addk_i32 s5, 0xfc10 +; SI-GISEL-NEXT: s_and_b32 s8, s8, 0xffe +; SI-GISEL-NEXT: s_or_b32 s6, s9, s6 +; SI-GISEL-NEXT: s_or_b32 s3, s4, s3 +; SI-GISEL-NEXT: s_cmp_lg_u32 s6, 0 +; SI-GISEL-NEXT: s_cselect_b32 s4, 1, 0 +; SI-GISEL-NEXT: s_or_b32 s4, s8, s4 +; SI-GISEL-NEXT: s_cmp_lg_u32 s4, 0 +; SI-GISEL-NEXT: s_cselect_b32 s6, 1, 0 +; SI-GISEL-NEXT: s_lshl_b32 s6, s6, 9 +; SI-GISEL-NEXT: s_lshl_b32 s8, s5, 12 +; SI-GISEL-NEXT: s_sub_i32 s9, 1, s5 +; SI-GISEL-NEXT: s_or_b32 s10, s4, 0x1000 +; SI-GISEL-NEXT: s_or_b32 s6, s6, 0x7c00 +; SI-GISEL-NEXT: s_or_b32 s4, s4, s8 +; SI-GISEL-NEXT: s_max_i32 s8, s9, 0 +; SI-GISEL-NEXT: s_min_i32 s8, s8, 13 +; SI-GISEL-NEXT: s_lshr_b32 s9, s10, s8 +; SI-GISEL-NEXT: s_lshl_b32 s8, s9, s8 +; SI-GISEL-NEXT: s_cmp_lg_u32 s8, s10 +; SI-GISEL-NEXT: s_cselect_b32 s8, 1, 0 +; SI-GISEL-NEXT: s_or_b32 s8, s9, s8 +; SI-GISEL-NEXT: s_cmp_lt_i32 s5, 1 +; SI-GISEL-NEXT: s_cselect_b32 s4, s8, s4 +; SI-GISEL-NEXT: s_and_b32 s8, s4, 7 +; SI-GISEL-NEXT: s_lshr_b32 s4, s4, 2 +; SI-GISEL-NEXT: s_cmp_eq_u32 s8, 3 +; SI-GISEL-NEXT: s_cselect_b32 s9, 1, 0 +; SI-GISEL-NEXT: s_cmp_gt_i32 s8, 5 +; SI-GISEL-NEXT: s_cselect_b32 s8, 1, 0 +; SI-GISEL-NEXT: s_or_b32 s8, s9, s8 +; SI-GISEL-NEXT: s_add_i32 s4, s4, s8 +; SI-GISEL-NEXT: s_cmp_gt_i32 s5, 30 +; SI-GISEL-NEXT: s_cselect_b32 s4, 0x7c00, s4 +; SI-GISEL-NEXT: s_cmpk_eq_i32 s5, 0x40f +; SI-GISEL-NEXT: s_cselect_b32 s4, s6, s4 +; SI-GISEL-NEXT: s_lshr_b32 s5, s7, 16 +; SI-GISEL-NEXT: s_and_b32 s3, s3, 0xffff +; SI-GISEL-NEXT: s_and_b32 s5, s5, 0x8000 +; SI-GISEL-NEXT: s_or_b32 s4, s5, s4 +; SI-GISEL-NEXT: s_and_b32 s4, s4, 0xffff +; SI-GISEL-NEXT: s_lshl_b32 s4, s4, 16 +; SI-GISEL-NEXT: s_or_b32 s4, s3, s4 +; SI-GISEL-NEXT: s_mov_b32 s3, 0xf000 +; SI-GISEL-NEXT: v_mov_b32_e32 v0, s4 +; SI-GISEL-NEXT: buffer_store_dword v0, off, s[0:3], 0 +; SI-GISEL-NEXT: s_endpgm +; +; VI-SDAG-LABEL: fptrunc_v2f64_to_v2f16: +; VI-SDAG: ; %bb.0: ; %entry +; VI-SDAG-NEXT: s_load_dwordx4 s[4:7], s[4:5], 0x24 +; VI-SDAG-NEXT: s_mov_b32 s3, 0xf000 +; VI-SDAG-NEXT: s_mov_b32 s2, -1 +; VI-SDAG-NEXT: s_mov_b32 s10, s2 +; VI-SDAG-NEXT: s_mov_b32 s11, s3 +; VI-SDAG-NEXT: s_waitcnt lgkmcnt(0) +; VI-SDAG-NEXT: s_mov_b32 s8, s6 +; VI-SDAG-NEXT: s_mov_b32 s9, s7 +; VI-SDAG-NEXT: buffer_load_dwordx4 v[0:3], off, s[8:11], 0 +; VI-SDAG-NEXT: s_mov_b32 s0, s4 +; VI-SDAG-NEXT: s_mov_b32 s1, s5 +; VI-SDAG-NEXT: s_movk_i32 s6, 0x7e00 +; VI-SDAG-NEXT: s_waitcnt vmcnt(0) +; VI-SDAG-NEXT: v_readfirstlane_b32 s4, v3 +; VI-SDAG-NEXT: s_and_b32 s7, s4, 0x1ff +; VI-SDAG-NEXT: v_readfirstlane_b32 s5, v1 +; VI-SDAG-NEXT: v_or_b32_e32 v1, s7, v2 +; VI-SDAG-NEXT: s_lshr_b32 s8, s4, 8 +; VI-SDAG-NEXT: s_bfe_u32 s9, s4, 0xb0014 +; VI-SDAG-NEXT: v_cmp_ne_u32_e32 vcc, 0, v1 +; VI-SDAG-NEXT: s_and_b32 s7, s8, 0xffe +; VI-SDAG-NEXT: s_sub_i32 s8, 0x3f1, s9 +; VI-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; VI-SDAG-NEXT: v_med3_i32 v2, s8, 0, 13 +; VI-SDAG-NEXT: v_readfirstlane_b32 s8, v1 +; VI-SDAG-NEXT: s_or_b32 s7, s7, s8 +; VI-SDAG-NEXT: v_readfirstlane_b32 s10, v2 +; VI-SDAG-NEXT: s_or_b32 s8, s7, 0x1000 +; VI-SDAG-NEXT: s_lshr_b32 s11, s8, s10 +; VI-SDAG-NEXT: s_lshl_b32 s10, s11, s10 +; VI-SDAG-NEXT: s_cmp_lg_u32 s10, s8 +; VI-SDAG-NEXT: s_cselect_b32 s8, 1, 0 +; VI-SDAG-NEXT: s_addk_i32 s9, 0xfc10 +; VI-SDAG-NEXT: s_lshl_b32 s10, s9, 12 +; VI-SDAG-NEXT: s_or_b32 s8, s11, s8 +; VI-SDAG-NEXT: s_or_b32 s10, s7, s10 +; VI-SDAG-NEXT: s_cmp_lt_i32 s9, 1 +; VI-SDAG-NEXT: s_cselect_b32 s8, s8, s10 +; VI-SDAG-NEXT: s_and_b32 s10, s8, 7 +; VI-SDAG-NEXT: s_cmp_gt_i32 s10, 5 +; VI-SDAG-NEXT: s_cselect_b32 s11, 1, 0 +; VI-SDAG-NEXT: s_cmp_eq_u32 s10, 3 +; VI-SDAG-NEXT: s_cselect_b32 s10, 1, 0 +; VI-SDAG-NEXT: s_lshr_b32 s8, s8, 2 +; VI-SDAG-NEXT: s_or_b32 s10, s10, s11 +; VI-SDAG-NEXT: s_add_i32 s8, s8, s10 +; VI-SDAG-NEXT: s_cmp_lt_i32 s9, 31 +; VI-SDAG-NEXT: s_cselect_b32 s8, s8, 0x7c00 +; VI-SDAG-NEXT: s_cmp_lg_u32 s7, 0 +; VI-SDAG-NEXT: s_cselect_b32 s7, s6, 0x7c00 +; VI-SDAG-NEXT: s_cmpk_eq_i32 s9, 0x40f +; VI-SDAG-NEXT: s_cselect_b32 s7, s7, s8 +; VI-SDAG-NEXT: s_and_b32 s8, s5, 0x1ff +; VI-SDAG-NEXT: v_or_b32_e32 v0, s8, v0 +; VI-SDAG-NEXT: s_lshr_b32 s4, s4, 16 +; VI-SDAG-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0 +; VI-SDAG-NEXT: s_lshr_b32 s9, s5, 8 +; VI-SDAG-NEXT: s_bfe_u32 s10, s5, 0xb0014 +; VI-SDAG-NEXT: s_and_b32 s4, s4, 0x8000 +; VI-SDAG-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc +; VI-SDAG-NEXT: s_and_b32 s8, s9, 0xffe +; VI-SDAG-NEXT: s_sub_i32 s9, 0x3f1, s10 +; VI-SDAG-NEXT: s_or_b32 s4, s4, s7 +; VI-SDAG-NEXT: v_readfirstlane_b32 s7, v0 +; VI-SDAG-NEXT: v_med3_i32 v1, s9, 0, 13 +; VI-SDAG-NEXT: s_or_b32 s7, s8, s7 +; VI-SDAG-NEXT: v_readfirstlane_b32 s9, v1 +; VI-SDAG-NEXT: s_or_b32 s8, s7, 0x1000 +; VI-SDAG-NEXT: s_lshr_b32 s11, s8, s9 +; VI-SDAG-NEXT: s_lshl_b32 s4, s4, 16 +; VI-SDAG-NEXT: s_lshl_b32 s9, s11, s9 +; VI-SDAG-NEXT: s_cmp_lg_u32 s9, s8 +; VI-SDAG-NEXT: s_cselect_b32 s8, 1, 0 +; VI-SDAG-NEXT: s_addk_i32 s10, 0xfc10 +; VI-SDAG-NEXT: s_lshl_b32 s9, s10, 12 +; VI-SDAG-NEXT: s_or_b32 s8, s11, s8 +; VI-SDAG-NEXT: s_or_b32 s9, s7, s9 +; VI-SDAG-NEXT: s_cmp_lt_i32 s10, 1 +; VI-SDAG-NEXT: s_cselect_b32 s8, s8, s9 +; VI-SDAG-NEXT: s_and_b32 s9, s8, 7 +; VI-SDAG-NEXT: s_cmp_gt_i32 s9, 5 +; VI-SDAG-NEXT: s_cselect_b32 s11, 1, 0 +; VI-SDAG-NEXT: s_cmp_eq_u32 s9, 3 +; VI-SDAG-NEXT: s_cselect_b32 s9, 1, 0 +; VI-SDAG-NEXT: s_lshr_b32 s8, s8, 2 +; VI-SDAG-NEXT: s_or_b32 s9, s9, s11 +; VI-SDAG-NEXT: s_add_i32 s8, s8, s9 +; VI-SDAG-NEXT: s_cmp_lt_i32 s10, 31 +; VI-SDAG-NEXT: s_cselect_b32 s8, s8, 0x7c00 +; VI-SDAG-NEXT: s_cmp_lg_u32 s7, 0 +; VI-SDAG-NEXT: s_cselect_b32 s6, s6, 0x7c00 +; VI-SDAG-NEXT: s_cmpk_eq_i32 s10, 0x40f +; VI-SDAG-NEXT: s_cselect_b32 s6, s6, s8 +; VI-SDAG-NEXT: s_lshr_b32 s5, s5, 16 +; VI-SDAG-NEXT: s_and_b32 s5, s5, 0x8000 +; VI-SDAG-NEXT: s_or_b32 s5, s5, s6 +; VI-SDAG-NEXT: s_and_b32 s5, s5, 0xffff +; VI-SDAG-NEXT: s_or_b32 s4, s5, s4 +; VI-SDAG-NEXT: v_mov_b32_e32 v0, s4 +; VI-SDAG-NEXT: buffer_store_dword v0, off, s[0:3], 0 +; VI-SDAG-NEXT: s_endpgm +; +; VI-GISEL-LABEL: fptrunc_v2f64_to_v2f16: +; VI-GISEL: ; %bb.0: ; %entry +; VI-GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 +; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0) +; VI-GISEL-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x0 +; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0) +; VI-GISEL-NEXT: s_bfe_u32 s2, s5, 0xb0014 +; VI-GISEL-NEXT: s_lshr_b32 s3, s5, 8 +; VI-GISEL-NEXT: s_and_b32 s8, s5, 0x1ff +; VI-GISEL-NEXT: s_addk_i32 s2, 0xfc10 +; VI-GISEL-NEXT: s_and_b32 s3, s3, 0xffe +; VI-GISEL-NEXT: s_or_b32 s4, s8, s4 +; VI-GISEL-NEXT: s_cmp_lg_u32 s4, 0 +; VI-GISEL-NEXT: s_cselect_b32 s4, 1, 0 +; VI-GISEL-NEXT: s_or_b32 s3, s3, s4 +; VI-GISEL-NEXT: s_cmp_lg_u32 s3, 0 +; VI-GISEL-NEXT: s_cselect_b32 s4, 1, 0 +; VI-GISEL-NEXT: s_sub_i32 s9, 1, s2 +; VI-GISEL-NEXT: s_lshl_b32 s8, s2, 12 +; VI-GISEL-NEXT: s_max_i32 s9, s9, 0 +; VI-GISEL-NEXT: s_or_b32 s8, s3, s8 +; VI-GISEL-NEXT: s_min_i32 s9, s9, 13 +; VI-GISEL-NEXT: s_bitset1_b32 s3, 12 +; VI-GISEL-NEXT: s_lshl_b32 s4, s4, 9 +; VI-GISEL-NEXT: s_lshr_b32 s10, s3, s9 +; VI-GISEL-NEXT: s_or_b32 s4, s4, 0x7c00 +; VI-GISEL-NEXT: s_lshl_b32 s9, s10, s9 +; VI-GISEL-NEXT: s_cmp_lg_u32 s9, s3 +; VI-GISEL-NEXT: s_cselect_b32 s3, 1, 0 +; VI-GISEL-NEXT: s_or_b32 s3, s10, s3 +; VI-GISEL-NEXT: s_cmp_lt_i32 s2, 1 +; VI-GISEL-NEXT: s_cselect_b32 s3, s3, s8 +; VI-GISEL-NEXT: s_and_b32 s8, s3, 7 +; VI-GISEL-NEXT: s_lshr_b32 s3, s3, 2 +; VI-GISEL-NEXT: s_cmp_eq_u32 s8, 3 +; VI-GISEL-NEXT: s_cselect_b32 s9, 1, 0 +; VI-GISEL-NEXT: s_cmp_gt_i32 s8, 5 +; VI-GISEL-NEXT: s_cselect_b32 s8, 1, 0 +; VI-GISEL-NEXT: s_or_b32 s8, s9, s8 +; VI-GISEL-NEXT: s_add_i32 s3, s3, s8 +; VI-GISEL-NEXT: s_cmp_gt_i32 s2, 30 +; VI-GISEL-NEXT: s_cselect_b32 s3, 0x7c00, s3 +; VI-GISEL-NEXT: s_cmpk_eq_i32 s2, 0x40f +; VI-GISEL-NEXT: s_cselect_b32 s2, s4, s3 +; VI-GISEL-NEXT: s_lshr_b32 s3, s5, 16 +; VI-GISEL-NEXT: s_and_b32 s3, s3, 0x8000 +; VI-GISEL-NEXT: s_or_b32 s2, s3, s2 +; VI-GISEL-NEXT: s_bfe_u32 s3, s7, 0xb0014 +; VI-GISEL-NEXT: s_lshr_b32 s4, s7, 8 +; VI-GISEL-NEXT: s_and_b32 s5, s7, 0x1ff +; VI-GISEL-NEXT: s_addk_i32 s3, 0xfc10 +; VI-GISEL-NEXT: s_and_b32 s4, s4, 0xffe +; VI-GISEL-NEXT: s_or_b32 s5, s5, s6 +; VI-GISEL-NEXT: s_cmp_lg_u32 s5, 0 +; VI-GISEL-NEXT: s_cselect_b32 s5, 1, 0 +; VI-GISEL-NEXT: s_or_b32 s4, s4, s5 +; VI-GISEL-NEXT: s_cmp_lg_u32 s4, 0 +; VI-GISEL-NEXT: s_cselect_b32 s5, 1, 0 +; VI-GISEL-NEXT: s_sub_i32 s8, 1, s3 +; VI-GISEL-NEXT: s_lshl_b32 s6, s3, 12 +; VI-GISEL-NEXT: s_max_i32 s8, s8, 0 +; VI-GISEL-NEXT: s_or_b32 s6, s4, s6 +; VI-GISEL-NEXT: s_min_i32 s8, s8, 13 +; VI-GISEL-NEXT: s_bitset1_b32 s4, 12 +; VI-GISEL-NEXT: s_lshl_b32 s5, s5, 9 +; VI-GISEL-NEXT: s_lshr_b32 s9, s4, s8 +; VI-GISEL-NEXT: s_or_b32 s5, s5, 0x7c00 +; VI-GISEL-NEXT: s_lshl_b32 s8, s9, s8 +; VI-GISEL-NEXT: s_cmp_lg_u32 s8, s4 +; VI-GISEL-NEXT: s_cselect_b32 s4, 1, 0 +; VI-GISEL-NEXT: s_or_b32 s4, s9, s4 +; VI-GISEL-NEXT: s_cmp_lt_i32 s3, 1 +; VI-GISEL-NEXT: s_cselect_b32 s4, s4, s6 +; VI-GISEL-NEXT: s_and_b32 s6, s4, 7 +; VI-GISEL-NEXT: s_lshr_b32 s4, s4, 2 +; VI-GISEL-NEXT: s_cmp_eq_u32 s6, 3 +; VI-GISEL-NEXT: s_cselect_b32 s8, 1, 0 +; VI-GISEL-NEXT: s_cmp_gt_i32 s6, 5 +; VI-GISEL-NEXT: s_cselect_b32 s6, 1, 0 +; VI-GISEL-NEXT: s_or_b32 s6, s8, s6 +; VI-GISEL-NEXT: s_add_i32 s4, s4, s6 +; VI-GISEL-NEXT: s_cmp_gt_i32 s3, 30 +; VI-GISEL-NEXT: s_cselect_b32 s4, 0x7c00, s4 +; VI-GISEL-NEXT: s_cmpk_eq_i32 s3, 0x40f +; VI-GISEL-NEXT: s_cselect_b32 s3, s5, s4 +; VI-GISEL-NEXT: s_lshr_b32 s4, s7, 16 +; VI-GISEL-NEXT: s_and_b32 s4, s4, 0x8000 +; VI-GISEL-NEXT: s_or_b32 s3, s4, s3 +; VI-GISEL-NEXT: s_and_b32 s3, s3, 0xffff +; VI-GISEL-NEXT: s_and_b32 s2, s2, 0xffff +; VI-GISEL-NEXT: s_lshl_b32 s3, s3, 16 +; VI-GISEL-NEXT: s_or_b32 s2, s2, s3 +; VI-GISEL-NEXT: v_mov_b32_e32 v0, s2 +; VI-GISEL-NEXT: s_mov_b32 s2, -1 +; VI-GISEL-NEXT: s_mov_b32 s3, 0xf000 +; VI-GISEL-NEXT: buffer_store_dword v0, off, s[0:3], 0 +; VI-GISEL-NEXT: s_endpgm +; +; GFX9-SDAG-LABEL: fptrunc_v2f64_to_v2f16: +; GFX9-SDAG: ; %bb.0: ; %entry +; GFX9-SDAG-NEXT: s_load_dwordx4 s[8:11], s[4:5], 0x24 +; GFX9-SDAG-NEXT: s_mov_b32 s3, 0xf000 +; GFX9-SDAG-NEXT: s_mov_b32 s2, -1 +; GFX9-SDAG-NEXT: s_mov_b32 s6, s2 +; GFX9-SDAG-NEXT: s_mov_b32 s7, s3 +; GFX9-SDAG-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-SDAG-NEXT: s_mov_b32 s4, s10 +; GFX9-SDAG-NEXT: s_mov_b32 s5, s11 +; GFX9-SDAG-NEXT: buffer_load_dwordx4 v[0:3], off, s[4:7], 0 +; GFX9-SDAG-NEXT: s_mov_b32 s0, s8 +; GFX9-SDAG-NEXT: s_mov_b32 s1, s9 +; GFX9-SDAG-NEXT: s_movk_i32 s4, 0x7e00 +; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX9-SDAG-NEXT: v_readfirstlane_b32 s5, v3 +; GFX9-SDAG-NEXT: s_and_b32 s7, s5, 0x1ff +; GFX9-SDAG-NEXT: v_readfirstlane_b32 s6, v1 +; GFX9-SDAG-NEXT: v_or_b32_e32 v1, s7, v2 +; GFX9-SDAG-NEXT: s_lshr_b32 s8, s5, 8 +; GFX9-SDAG-NEXT: s_bfe_u32 s9, s5, 0xb0014 +; GFX9-SDAG-NEXT: v_cmp_ne_u32_e32 vcc, 0, v1 +; GFX9-SDAG-NEXT: s_and_b32 s7, s8, 0xffe +; GFX9-SDAG-NEXT: s_sub_i32 s8, 0x3f1, s9 +; GFX9-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; GFX9-SDAG-NEXT: v_med3_i32 v2, s8, 0, 13 +; GFX9-SDAG-NEXT: v_readfirstlane_b32 s8, v1 +; GFX9-SDAG-NEXT: s_or_b32 s7, s7, s8 +; GFX9-SDAG-NEXT: v_readfirstlane_b32 s10, v2 +; GFX9-SDAG-NEXT: s_or_b32 s8, s7, 0x1000 +; GFX9-SDAG-NEXT: s_lshr_b32 s11, s8, s10 +; GFX9-SDAG-NEXT: s_lshl_b32 s10, s11, s10 +; GFX9-SDAG-NEXT: s_cmp_lg_u32 s10, s8 +; GFX9-SDAG-NEXT: s_cselect_b32 s8, 1, 0 +; GFX9-SDAG-NEXT: s_addk_i32 s9, 0xfc10 +; GFX9-SDAG-NEXT: s_lshl_b32 s10, s9, 12 +; GFX9-SDAG-NEXT: s_or_b32 s8, s11, s8 +; GFX9-SDAG-NEXT: s_or_b32 s10, s7, s10 +; GFX9-SDAG-NEXT: s_cmp_lt_i32 s9, 1 +; GFX9-SDAG-NEXT: s_cselect_b32 s8, s8, s10 +; GFX9-SDAG-NEXT: s_and_b32 s10, s8, 7 +; GFX9-SDAG-NEXT: s_cmp_gt_i32 s10, 5 +; GFX9-SDAG-NEXT: s_cselect_b32 s11, 1, 0 +; GFX9-SDAG-NEXT: s_cmp_eq_u32 s10, 3 +; GFX9-SDAG-NEXT: s_cselect_b32 s10, 1, 0 +; GFX9-SDAG-NEXT: s_lshr_b32 s8, s8, 2 +; GFX9-SDAG-NEXT: s_or_b32 s10, s10, s11 +; GFX9-SDAG-NEXT: s_add_i32 s8, s8, s10 +; GFX9-SDAG-NEXT: s_cmp_lt_i32 s9, 31 +; GFX9-SDAG-NEXT: s_cselect_b32 s8, s8, 0x7c00 +; GFX9-SDAG-NEXT: s_cmp_lg_u32 s7, 0 +; GFX9-SDAG-NEXT: s_cselect_b32 s7, s4, 0x7c00 +; GFX9-SDAG-NEXT: s_cmpk_eq_i32 s9, 0x40f +; GFX9-SDAG-NEXT: s_cselect_b32 s7, s7, s8 +; GFX9-SDAG-NEXT: s_and_b32 s8, s6, 0x1ff +; GFX9-SDAG-NEXT: v_or_b32_e32 v0, s8, v0 +; GFX9-SDAG-NEXT: s_lshr_b32 s5, s5, 16 +; GFX9-SDAG-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0 +; GFX9-SDAG-NEXT: s_lshr_b32 s9, s6, 8 +; GFX9-SDAG-NEXT: s_bfe_u32 s10, s6, 0xb0014 +; GFX9-SDAG-NEXT: s_and_b32 s5, s5, 0x8000 +; GFX9-SDAG-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc +; GFX9-SDAG-NEXT: s_and_b32 s8, s9, 0xffe +; GFX9-SDAG-NEXT: s_sub_i32 s9, 0x3f1, s10 +; GFX9-SDAG-NEXT: s_or_b32 s5, s5, s7 +; GFX9-SDAG-NEXT: v_readfirstlane_b32 s7, v0 +; GFX9-SDAG-NEXT: v_med3_i32 v1, s9, 0, 13 +; GFX9-SDAG-NEXT: s_or_b32 s7, s8, s7 +; GFX9-SDAG-NEXT: v_readfirstlane_b32 s9, v1 +; GFX9-SDAG-NEXT: s_or_b32 s8, s7, 0x1000 +; GFX9-SDAG-NEXT: s_lshr_b32 s11, s8, s9 +; GFX9-SDAG-NEXT: s_lshl_b32 s9, s11, s9 +; GFX9-SDAG-NEXT: s_cmp_lg_u32 s9, s8 +; GFX9-SDAG-NEXT: s_cselect_b32 s8, 1, 0 +; GFX9-SDAG-NEXT: s_addk_i32 s10, 0xfc10 +; GFX9-SDAG-NEXT: s_lshl_b32 s9, s10, 12 +; GFX9-SDAG-NEXT: s_or_b32 s8, s11, s8 +; GFX9-SDAG-NEXT: s_or_b32 s9, s7, s9 +; GFX9-SDAG-NEXT: s_cmp_lt_i32 s10, 1 +; GFX9-SDAG-NEXT: s_cselect_b32 s8, s8, s9 +; GFX9-SDAG-NEXT: s_and_b32 s9, s8, 7 +; GFX9-SDAG-NEXT: s_cmp_gt_i32 s9, 5 +; GFX9-SDAG-NEXT: s_cselect_b32 s11, 1, 0 +; GFX9-SDAG-NEXT: s_cmp_eq_u32 s9, 3 +; GFX9-SDAG-NEXT: s_cselect_b32 s9, 1, 0 +; GFX9-SDAG-NEXT: s_lshr_b32 s8, s8, 2 +; GFX9-SDAG-NEXT: s_or_b32 s9, s9, s11 +; GFX9-SDAG-NEXT: s_add_i32 s8, s8, s9 +; GFX9-SDAG-NEXT: s_cmp_lt_i32 s10, 31 +; GFX9-SDAG-NEXT: s_cselect_b32 s8, s8, 0x7c00 +; GFX9-SDAG-NEXT: s_cmp_lg_u32 s7, 0 +; GFX9-SDAG-NEXT: s_cselect_b32 s4, s4, 0x7c00 +; GFX9-SDAG-NEXT: s_cmpk_eq_i32 s10, 0x40f +; GFX9-SDAG-NEXT: s_cselect_b32 s4, s4, s8 +; GFX9-SDAG-NEXT: s_lshr_b32 s6, s6, 16 +; GFX9-SDAG-NEXT: s_and_b32 s6, s6, 0x8000 +; GFX9-SDAG-NEXT: s_or_b32 s4, s6, s4 +; GFX9-SDAG-NEXT: s_pack_ll_b32_b16 s4, s4, s5 +; GFX9-SDAG-NEXT: v_mov_b32_e32 v0, s4 +; GFX9-SDAG-NEXT: buffer_store_dword v0, off, s[0:3], 0 +; GFX9-SDAG-NEXT: s_endpgm +; +; GFX9-GISEL-LABEL: fptrunc_v2f64_to_v2f16: +; GFX9-GISEL: ; %bb.0: ; %entry +; GFX9-GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 +; GFX9-GISEL-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-GISEL-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x0 +; GFX9-GISEL-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-GISEL-NEXT: s_bfe_u32 s2, s5, 0xb0014 +; GFX9-GISEL-NEXT: s_lshr_b32 s3, s5, 8 +; GFX9-GISEL-NEXT: s_and_b32 s8, s5, 0x1ff +; GFX9-GISEL-NEXT: s_addk_i32 s2, 0xfc10 +; GFX9-GISEL-NEXT: s_and_b32 s3, s3, 0xffe +; GFX9-GISEL-NEXT: s_or_b32 s4, s8, s4 +; GFX9-GISEL-NEXT: s_cmp_lg_u32 s4, 0 +; GFX9-GISEL-NEXT: s_cselect_b32 s4, 1, 0 +; GFX9-GISEL-NEXT: s_or_b32 s3, s3, s4 +; GFX9-GISEL-NEXT: s_cmp_lg_u32 s3, 0 +; GFX9-GISEL-NEXT: s_cselect_b32 s4, 1, 0 +; GFX9-GISEL-NEXT: s_sub_i32 s9, 1, s2 +; GFX9-GISEL-NEXT: s_lshl_b32 s8, s2, 12 +; GFX9-GISEL-NEXT: s_max_i32 s9, s9, 0 +; GFX9-GISEL-NEXT: s_or_b32 s8, s3, s8 +; GFX9-GISEL-NEXT: s_min_i32 s9, s9, 13 +; GFX9-GISEL-NEXT: s_bitset1_b32 s3, 12 +; GFX9-GISEL-NEXT: s_lshl_b32 s4, s4, 9 +; GFX9-GISEL-NEXT: s_lshr_b32 s10, s3, s9 +; GFX9-GISEL-NEXT: s_or_b32 s4, s4, 0x7c00 +; GFX9-GISEL-NEXT: s_lshl_b32 s9, s10, s9 +; GFX9-GISEL-NEXT: s_cmp_lg_u32 s9, s3 +; GFX9-GISEL-NEXT: s_cselect_b32 s3, 1, 0 +; GFX9-GISEL-NEXT: s_or_b32 s3, s10, s3 +; GFX9-GISEL-NEXT: s_cmp_lt_i32 s2, 1 +; GFX9-GISEL-NEXT: s_cselect_b32 s3, s3, s8 +; GFX9-GISEL-NEXT: s_and_b32 s8, s3, 7 +; GFX9-GISEL-NEXT: s_lshr_b32 s3, s3, 2 +; GFX9-GISEL-NEXT: s_cmp_eq_u32 s8, 3 +; GFX9-GISEL-NEXT: s_cselect_b32 s9, 1, 0 +; GFX9-GISEL-NEXT: s_cmp_gt_i32 s8, 5 +; GFX9-GISEL-NEXT: s_cselect_b32 s8, 1, 0 +; GFX9-GISEL-NEXT: s_or_b32 s8, s9, s8 +; GFX9-GISEL-NEXT: s_add_i32 s3, s3, s8 +; GFX9-GISEL-NEXT: s_cmp_gt_i32 s2, 30 +; GFX9-GISEL-NEXT: s_cselect_b32 s3, 0x7c00, s3 +; GFX9-GISEL-NEXT: s_cmpk_eq_i32 s2, 0x40f +; GFX9-GISEL-NEXT: s_cselect_b32 s2, s4, s3 +; GFX9-GISEL-NEXT: s_lshr_b32 s3, s5, 16 +; GFX9-GISEL-NEXT: s_and_b32 s3, s3, 0x8000 +; GFX9-GISEL-NEXT: s_or_b32 s2, s3, s2 +; GFX9-GISEL-NEXT: s_bfe_u32 s3, s7, 0xb0014 +; GFX9-GISEL-NEXT: s_lshr_b32 s4, s7, 8 +; GFX9-GISEL-NEXT: s_and_b32 s5, s7, 0x1ff +; GFX9-GISEL-NEXT: s_addk_i32 s3, 0xfc10 +; GFX9-GISEL-NEXT: s_and_b32 s4, s4, 0xffe +; GFX9-GISEL-NEXT: s_or_b32 s5, s5, s6 +; GFX9-GISEL-NEXT: s_cmp_lg_u32 s5, 0 +; GFX9-GISEL-NEXT: s_cselect_b32 s5, 1, 0 +; GFX9-GISEL-NEXT: s_or_b32 s4, s4, s5 +; GFX9-GISEL-NEXT: s_cmp_lg_u32 s4, 0 +; GFX9-GISEL-NEXT: s_cselect_b32 s5, 1, 0 +; GFX9-GISEL-NEXT: s_sub_i32 s8, 1, s3 +; GFX9-GISEL-NEXT: s_lshl_b32 s6, s3, 12 +; GFX9-GISEL-NEXT: s_max_i32 s8, s8, 0 +; GFX9-GISEL-NEXT: s_or_b32 s6, s4, s6 +; GFX9-GISEL-NEXT: s_min_i32 s8, s8, 13 +; GFX9-GISEL-NEXT: s_bitset1_b32 s4, 12 +; GFX9-GISEL-NEXT: s_lshl_b32 s5, s5, 9 +; GFX9-GISEL-NEXT: s_lshr_b32 s9, s4, s8 +; GFX9-GISEL-NEXT: s_or_b32 s5, s5, 0x7c00 +; GFX9-GISEL-NEXT: s_lshl_b32 s8, s9, s8 +; GFX9-GISEL-NEXT: s_cmp_lg_u32 s8, s4 +; GFX9-GISEL-NEXT: s_cselect_b32 s4, 1, 0 +; GFX9-GISEL-NEXT: s_or_b32 s4, s9, s4 +; GFX9-GISEL-NEXT: s_cmp_lt_i32 s3, 1 +; GFX9-GISEL-NEXT: s_cselect_b32 s4, s4, s6 +; GFX9-GISEL-NEXT: s_and_b32 s6, s4, 7 +; GFX9-GISEL-NEXT: s_lshr_b32 s4, s4, 2 +; GFX9-GISEL-NEXT: s_cmp_eq_u32 s6, 3 +; GFX9-GISEL-NEXT: s_cselect_b32 s8, 1, 0 +; GFX9-GISEL-NEXT: s_cmp_gt_i32 s6, 5 +; GFX9-GISEL-NEXT: s_cselect_b32 s6, 1, 0 +; GFX9-GISEL-NEXT: s_or_b32 s6, s8, s6 +; GFX9-GISEL-NEXT: s_add_i32 s4, s4, s6 +; GFX9-GISEL-NEXT: s_cmp_gt_i32 s3, 30 +; GFX9-GISEL-NEXT: s_cselect_b32 s4, 0x7c00, s4 +; GFX9-GISEL-NEXT: s_cmpk_eq_i32 s3, 0x40f +; GFX9-GISEL-NEXT: s_cselect_b32 s3, s5, s4 +; GFX9-GISEL-NEXT: s_lshr_b32 s4, s7, 16 +; GFX9-GISEL-NEXT: s_and_b32 s4, s4, 0x8000 +; GFX9-GISEL-NEXT: s_or_b32 s3, s4, s3 +; GFX9-GISEL-NEXT: s_pack_ll_b32_b16 s2, s2, s3 +; GFX9-GISEL-NEXT: v_mov_b32_e32 v0, s2 +; GFX9-GISEL-NEXT: s_mov_b32 s2, -1 +; GFX9-GISEL-NEXT: s_mov_b32 s3, 0xf000 +; GFX9-GISEL-NEXT: buffer_store_dword v0, off, s[0:3], 0 +; GFX9-GISEL-NEXT: s_endpgm +; +; GFX950-SDAG-LABEL: fptrunc_v2f64_to_v2f16: +; GFX950-SDAG: ; %bb.0: ; %entry +; GFX950-SDAG-NEXT: s_load_dwordx4 s[8:11], s[4:5], 0x24 +; GFX950-SDAG-NEXT: s_mov_b32 s3, 0xf000 +; GFX950-SDAG-NEXT: s_mov_b32 s2, -1 +; GFX950-SDAG-NEXT: s_mov_b32 s6, s2 +; GFX950-SDAG-NEXT: s_mov_b32 s7, s3 +; GFX950-SDAG-NEXT: s_waitcnt lgkmcnt(0) +; GFX950-SDAG-NEXT: s_mov_b32 s4, s10 +; GFX950-SDAG-NEXT: s_mov_b32 s5, s11 +; GFX950-SDAG-NEXT: buffer_load_dwordx4 v[0:3], off, s[4:7], 0 +; GFX950-SDAG-NEXT: s_mov_b32 s0, s8 +; GFX950-SDAG-NEXT: s_mov_b32 s1, s9 +; GFX950-SDAG-NEXT: s_movk_i32 s4, 0x7e00 +; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) +; GFX950-SDAG-NEXT: v_readfirstlane_b32 s5, v3 +; GFX950-SDAG-NEXT: s_and_b32 s7, s5, 0x1ff +; GFX950-SDAG-NEXT: v_readfirstlane_b32 s6, v1 +; GFX950-SDAG-NEXT: v_or_b32_e32 v1, s7, v2 +; GFX950-SDAG-NEXT: s_lshr_b32 s8, s5, 8 +; GFX950-SDAG-NEXT: s_bfe_u32 s9, s5, 0xb0014 +; GFX950-SDAG-NEXT: v_cmp_ne_u32_e32 vcc, 0, v1 +; GFX950-SDAG-NEXT: s_and_b32 s7, s8, 0xffe +; GFX950-SDAG-NEXT: s_sub_i32 s8, 0x3f1, s9 +; GFX950-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; GFX950-SDAG-NEXT: v_med3_i32 v2, s8, 0, 13 +; GFX950-SDAG-NEXT: v_readfirstlane_b32 s8, v1 +; GFX950-SDAG-NEXT: s_or_b32 s7, s7, s8 +; GFX950-SDAG-NEXT: v_readfirstlane_b32 s10, v2 +; GFX950-SDAG-NEXT: s_or_b32 s8, s7, 0x1000 +; GFX950-SDAG-NEXT: s_lshr_b32 s11, s8, s10 +; GFX950-SDAG-NEXT: s_lshl_b32 s10, s11, s10 +; GFX950-SDAG-NEXT: s_cmp_lg_u32 s10, s8 +; GFX950-SDAG-NEXT: s_cselect_b32 s8, 1, 0 +; GFX950-SDAG-NEXT: s_addk_i32 s9, 0xfc10 +; GFX950-SDAG-NEXT: s_lshl_b32 s10, s9, 12 +; GFX950-SDAG-NEXT: s_or_b32 s8, s11, s8 +; GFX950-SDAG-NEXT: s_or_b32 s10, s7, s10 +; GFX950-SDAG-NEXT: s_cmp_lt_i32 s9, 1 +; GFX950-SDAG-NEXT: s_cselect_b32 s8, s8, s10 +; GFX950-SDAG-NEXT: s_and_b32 s10, s8, 7 +; GFX950-SDAG-NEXT: s_cmp_gt_i32 s10, 5 +; GFX950-SDAG-NEXT: s_cselect_b32 s11, 1, 0 +; GFX950-SDAG-NEXT: s_cmp_eq_u32 s10, 3 +; GFX950-SDAG-NEXT: s_cselect_b32 s10, 1, 0 +; GFX950-SDAG-NEXT: s_lshr_b32 s8, s8, 2 +; GFX950-SDAG-NEXT: s_or_b32 s10, s10, s11 +; GFX950-SDAG-NEXT: s_add_i32 s8, s8, s10 +; GFX950-SDAG-NEXT: s_cmp_lt_i32 s9, 31 +; GFX950-SDAG-NEXT: s_cselect_b32 s8, s8, 0x7c00 +; GFX950-SDAG-NEXT: s_cmp_lg_u32 s7, 0 +; GFX950-SDAG-NEXT: s_cselect_b32 s7, s4, 0x7c00 +; GFX950-SDAG-NEXT: s_cmpk_eq_i32 s9, 0x40f +; GFX950-SDAG-NEXT: s_cselect_b32 s7, s7, s8 +; GFX950-SDAG-NEXT: s_and_b32 s8, s6, 0x1ff +; GFX950-SDAG-NEXT: v_or_b32_e32 v0, s8, v0 +; GFX950-SDAG-NEXT: s_lshr_b32 s5, s5, 16 +; GFX950-SDAG-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0 +; GFX950-SDAG-NEXT: s_lshr_b32 s9, s6, 8 +; GFX950-SDAG-NEXT: s_bfe_u32 s10, s6, 0xb0014 +; GFX950-SDAG-NEXT: s_and_b32 s5, s5, 0x8000 +; GFX950-SDAG-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc +; GFX950-SDAG-NEXT: s_and_b32 s8, s9, 0xffe +; GFX950-SDAG-NEXT: s_sub_i32 s9, 0x3f1, s10 +; GFX950-SDAG-NEXT: s_or_b32 s5, s5, s7 +; GFX950-SDAG-NEXT: v_readfirstlane_b32 s7, v0 +; GFX950-SDAG-NEXT: v_med3_i32 v1, s9, 0, 13 +; GFX950-SDAG-NEXT: s_or_b32 s7, s8, s7 +; GFX950-SDAG-NEXT: v_readfirstlane_b32 s9, v1 +; GFX950-SDAG-NEXT: s_or_b32 s8, s7, 0x1000 +; GFX950-SDAG-NEXT: s_lshr_b32 s11, s8, s9 +; GFX950-SDAG-NEXT: s_lshl_b32 s9, s11, s9 +; GFX950-SDAG-NEXT: s_cmp_lg_u32 s9, s8 +; GFX950-SDAG-NEXT: s_cselect_b32 s8, 1, 0 +; GFX950-SDAG-NEXT: s_addk_i32 s10, 0xfc10 +; GFX950-SDAG-NEXT: s_lshl_b32 s9, s10, 12 +; GFX950-SDAG-NEXT: s_or_b32 s8, s11, s8 +; GFX950-SDAG-NEXT: s_or_b32 s9, s7, s9 +; GFX950-SDAG-NEXT: s_cmp_lt_i32 s10, 1 +; GFX950-SDAG-NEXT: s_cselect_b32 s8, s8, s9 +; GFX950-SDAG-NEXT: s_and_b32 s9, s8, 7 +; GFX950-SDAG-NEXT: s_cmp_gt_i32 s9, 5 +; GFX950-SDAG-NEXT: s_cselect_b32 s11, 1, 0 +; GFX950-SDAG-NEXT: s_cmp_eq_u32 s9, 3 +; GFX950-SDAG-NEXT: s_cselect_b32 s9, 1, 0 +; GFX950-SDAG-NEXT: s_lshr_b32 s8, s8, 2 +; GFX950-SDAG-NEXT: s_or_b32 s9, s9, s11 +; GFX950-SDAG-NEXT: s_add_i32 s8, s8, s9 +; GFX950-SDAG-NEXT: s_cmp_lt_i32 s10, 31 +; GFX950-SDAG-NEXT: s_cselect_b32 s8, s8, 0x7c00 +; GFX950-SDAG-NEXT: s_cmp_lg_u32 s7, 0 +; GFX950-SDAG-NEXT: s_cselect_b32 s4, s4, 0x7c00 +; GFX950-SDAG-NEXT: s_cmpk_eq_i32 s10, 0x40f +; GFX950-SDAG-NEXT: s_cselect_b32 s4, s4, s8 +; GFX950-SDAG-NEXT: s_lshr_b32 s6, s6, 16 +; GFX950-SDAG-NEXT: s_and_b32 s6, s6, 0x8000 +; GFX950-SDAG-NEXT: s_or_b32 s4, s6, s4 +; GFX950-SDAG-NEXT: s_pack_ll_b32_b16 s4, s4, s5 +; GFX950-SDAG-NEXT: v_mov_b32_e32 v0, s4 +; GFX950-SDAG-NEXT: buffer_store_dword v0, off, s[0:3], 0 +; GFX950-SDAG-NEXT: s_endpgm +; +; GFX950-GISEL-LABEL: fptrunc_v2f64_to_v2f16: +; GFX950-GISEL: ; %bb.0: ; %entry +; GFX950-GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 +; GFX950-GISEL-NEXT: s_waitcnt lgkmcnt(0) +; GFX950-GISEL-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x0 +; GFX950-GISEL-NEXT: s_waitcnt lgkmcnt(0) +; GFX950-GISEL-NEXT: s_bfe_u32 s2, s5, 0xb0014 +; GFX950-GISEL-NEXT: s_lshr_b32 s3, s5, 8 +; GFX950-GISEL-NEXT: s_and_b32 s8, s5, 0x1ff +; GFX950-GISEL-NEXT: s_addk_i32 s2, 0xfc10 +; GFX950-GISEL-NEXT: s_and_b32 s3, s3, 0xffe +; GFX950-GISEL-NEXT: s_or_b32 s4, s8, s4 +; GFX950-GISEL-NEXT: s_cmp_lg_u32 s4, 0 +; GFX950-GISEL-NEXT: s_cselect_b32 s4, 1, 0 +; GFX950-GISEL-NEXT: s_or_b32 s3, s3, s4 +; GFX950-GISEL-NEXT: s_cmp_lg_u32 s3, 0 +; GFX950-GISEL-NEXT: s_cselect_b32 s4, 1, 0 +; GFX950-GISEL-NEXT: s_sub_i32 s9, 1, s2 +; GFX950-GISEL-NEXT: s_lshl_b32 s8, s2, 12 +; GFX950-GISEL-NEXT: s_max_i32 s9, s9, 0 +; GFX950-GISEL-NEXT: s_or_b32 s8, s3, s8 +; GFX950-GISEL-NEXT: s_min_i32 s9, s9, 13 +; GFX950-GISEL-NEXT: s_bitset1_b32 s3, 12 +; GFX950-GISEL-NEXT: s_lshl_b32 s4, s4, 9 +; GFX950-GISEL-NEXT: s_lshr_b32 s10, s3, s9 +; GFX950-GISEL-NEXT: s_or_b32 s4, s4, 0x7c00 +; GFX950-GISEL-NEXT: s_lshl_b32 s9, s10, s9 +; GFX950-GISEL-NEXT: s_cmp_lg_u32 s9, s3 +; GFX950-GISEL-NEXT: s_cselect_b32 s3, 1, 0 +; GFX950-GISEL-NEXT: s_or_b32 s3, s10, s3 +; GFX950-GISEL-NEXT: s_cmp_lt_i32 s2, 1 +; GFX950-GISEL-NEXT: s_cselect_b32 s3, s3, s8 +; GFX950-GISEL-NEXT: s_and_b32 s8, s3, 7 +; GFX950-GISEL-NEXT: s_lshr_b32 s3, s3, 2 +; GFX950-GISEL-NEXT: s_cmp_eq_u32 s8, 3 +; GFX950-GISEL-NEXT: s_cselect_b32 s9, 1, 0 +; GFX950-GISEL-NEXT: s_cmp_gt_i32 s8, 5 +; GFX950-GISEL-NEXT: s_cselect_b32 s8, 1, 0 +; GFX950-GISEL-NEXT: s_or_b32 s8, s9, s8 +; GFX950-GISEL-NEXT: s_add_i32 s3, s3, s8 +; GFX950-GISEL-NEXT: s_cmp_gt_i32 s2, 30 +; GFX950-GISEL-NEXT: s_cselect_b32 s3, 0x7c00, s3 +; GFX950-GISEL-NEXT: s_cmpk_eq_i32 s2, 0x40f +; GFX950-GISEL-NEXT: s_cselect_b32 s2, s4, s3 +; GFX950-GISEL-NEXT: s_lshr_b32 s3, s5, 16 +; GFX950-GISEL-NEXT: s_and_b32 s3, s3, 0x8000 +; GFX950-GISEL-NEXT: s_or_b32 s2, s3, s2 +; GFX950-GISEL-NEXT: s_bfe_u32 s3, s7, 0xb0014 +; GFX950-GISEL-NEXT: s_lshr_b32 s4, s7, 8 +; GFX950-GISEL-NEXT: s_and_b32 s5, s7, 0x1ff +; GFX950-GISEL-NEXT: s_addk_i32 s3, 0xfc10 +; GFX950-GISEL-NEXT: s_and_b32 s4, s4, 0xffe +; GFX950-GISEL-NEXT: s_or_b32 s5, s5, s6 +; GFX950-GISEL-NEXT: s_cmp_lg_u32 s5, 0 +; GFX950-GISEL-NEXT: s_cselect_b32 s5, 1, 0 +; GFX950-GISEL-NEXT: s_or_b32 s4, s4, s5 +; GFX950-GISEL-NEXT: s_cmp_lg_u32 s4, 0 +; GFX950-GISEL-NEXT: s_cselect_b32 s5, 1, 0 +; GFX950-GISEL-NEXT: s_sub_i32 s8, 1, s3 +; GFX950-GISEL-NEXT: s_lshl_b32 s6, s3, 12 +; GFX950-GISEL-NEXT: s_max_i32 s8, s8, 0 +; GFX950-GISEL-NEXT: s_or_b32 s6, s4, s6 +; GFX950-GISEL-NEXT: s_min_i32 s8, s8, 13 +; GFX950-GISEL-NEXT: s_bitset1_b32 s4, 12 +; GFX950-GISEL-NEXT: s_lshl_b32 s5, s5, 9 +; GFX950-GISEL-NEXT: s_lshr_b32 s9, s4, s8 +; GFX950-GISEL-NEXT: s_or_b32 s5, s5, 0x7c00 +; GFX950-GISEL-NEXT: s_lshl_b32 s8, s9, s8 +; GFX950-GISEL-NEXT: s_cmp_lg_u32 s8, s4 +; GFX950-GISEL-NEXT: s_cselect_b32 s4, 1, 0 +; GFX950-GISEL-NEXT: s_or_b32 s4, s9, s4 +; GFX950-GISEL-NEXT: s_cmp_lt_i32 s3, 1 +; GFX950-GISEL-NEXT: s_cselect_b32 s4, s4, s6 +; GFX950-GISEL-NEXT: s_and_b32 s6, s4, 7 +; GFX950-GISEL-NEXT: s_lshr_b32 s4, s4, 2 +; GFX950-GISEL-NEXT: s_cmp_eq_u32 s6, 3 +; GFX950-GISEL-NEXT: s_cselect_b32 s8, 1, 0 +; GFX950-GISEL-NEXT: s_cmp_gt_i32 s6, 5 +; GFX950-GISEL-NEXT: s_cselect_b32 s6, 1, 0 +; GFX950-GISEL-NEXT: s_or_b32 s6, s8, s6 +; GFX950-GISEL-NEXT: s_add_i32 s4, s4, s6 +; GFX950-GISEL-NEXT: s_cmp_gt_i32 s3, 30 +; GFX950-GISEL-NEXT: s_cselect_b32 s4, 0x7c00, s4 +; GFX950-GISEL-NEXT: s_cmpk_eq_i32 s3, 0x40f +; GFX950-GISEL-NEXT: s_cselect_b32 s3, s5, s4 +; GFX950-GISEL-NEXT: s_lshr_b32 s4, s7, 16 +; GFX950-GISEL-NEXT: s_and_b32 s4, s4, 0x8000 +; GFX950-GISEL-NEXT: s_or_b32 s3, s4, s3 +; GFX950-GISEL-NEXT: s_pack_ll_b32_b16 s2, s2, s3 +; GFX950-GISEL-NEXT: v_mov_b32_e32 v0, s2 +; GFX950-GISEL-NEXT: s_mov_b32 s2, -1 +; GFX950-GISEL-NEXT: s_mov_b32 s3, 0xf000 +; GFX950-GISEL-NEXT: buffer_store_dword v0, off, s[0:3], 0 +; GFX950-GISEL-NEXT: s_endpgm +; +; GFX11-SDAG-TRUE16-LABEL: fptrunc_v2f64_to_v2f16: +; GFX11-SDAG-TRUE16: ; %bb.0: ; %entry +; GFX11-SDAG-TRUE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 +; GFX11-SDAG-TRUE16-NEXT: s_mov_b32 s6, -1 +; GFX11-SDAG-TRUE16-NEXT: s_mov_b32 s7, 0x31016000 +; GFX11-SDAG-TRUE16-NEXT: s_mov_b32 s10, s6 +; GFX11-SDAG-TRUE16-NEXT: s_mov_b32 s11, s7 +; GFX11-SDAG-TRUE16-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-SDAG-TRUE16-NEXT: s_mov_b32 s8, s2 +; GFX11-SDAG-TRUE16-NEXT: s_mov_b32 s9, s3 +; GFX11-SDAG-TRUE16-NEXT: buffer_load_b128 v[0:3], off, s[8:11], 0 +; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) +; GFX11-SDAG-TRUE16-NEXT: v_readfirstlane_b32 s2, v3 +; GFX11-SDAG-TRUE16-NEXT: s_and_b32 s3, s2, 0x1ff +; GFX11-SDAG-TRUE16-NEXT: s_lshr_b32 s5, s2, 8 +; GFX11-SDAG-TRUE16-NEXT: v_or_b32_e32 v2, s3, v2 +; GFX11-SDAG-TRUE16-NEXT: s_bfe_u32 s3, s2, 0xb0014 +; GFX11-SDAG-TRUE16-NEXT: s_and_b32 s5, s5, 0xffe +; GFX11-SDAG-TRUE16-NEXT: s_sub_i32 s4, 0x3f1, s3 +; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_2) +; GFX11-SDAG-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v2 +; GFX11-SDAG-TRUE16-NEXT: v_med3_i32 v3, s4, 0, 13 +; GFX11-SDAG-TRUE16-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc_lo +; GFX11-SDAG-TRUE16-NEXT: v_readfirstlane_b32 s8, v3 +; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(SALU_CYCLE_1) +; GFX11-SDAG-TRUE16-NEXT: v_readfirstlane_b32 s4, v2 +; GFX11-SDAG-TRUE16-NEXT: s_or_b32 s4, s5, s4 +; GFX11-SDAG-TRUE16-NEXT: s_or_b32 s5, s4, 0x1000 +; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) +; GFX11-SDAG-TRUE16-NEXT: s_lshr_b32 s9, s5, s8 +; GFX11-SDAG-TRUE16-NEXT: s_lshl_b32 s8, s9, s8 +; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_4) | instid1(SALU_CYCLE_1) +; GFX11-SDAG-TRUE16-NEXT: s_cmp_lg_u32 s8, s5 +; GFX11-SDAG-TRUE16-NEXT: s_cselect_b32 s5, 1, 0 +; GFX11-SDAG-TRUE16-NEXT: s_addk_i32 s3, 0xfc10 +; GFX11-SDAG-TRUE16-NEXT: s_or_b32 s5, s9, s5 +; GFX11-SDAG-TRUE16-NEXT: s_lshl_b32 s8, s3, 12 +; GFX11-SDAG-TRUE16-NEXT: s_or_b32 s8, s4, s8 +; GFX11-SDAG-TRUE16-NEXT: s_cmp_lt_i32 s3, 1 +; GFX11-SDAG-TRUE16-NEXT: s_cselect_b32 s5, s5, s8 +; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) +; GFX11-SDAG-TRUE16-NEXT: s_and_b32 s8, s5, 7 +; GFX11-SDAG-TRUE16-NEXT: s_cmp_gt_i32 s8, 5 +; GFX11-SDAG-TRUE16-NEXT: s_cselect_b32 s9, 1, 0 +; GFX11-SDAG-TRUE16-NEXT: s_cmp_eq_u32 s8, 3 +; GFX11-SDAG-TRUE16-NEXT: s_cselect_b32 s8, 1, 0 +; GFX11-SDAG-TRUE16-NEXT: s_lshr_b32 s5, s5, 2 +; GFX11-SDAG-TRUE16-NEXT: s_or_b32 s8, s8, s9 +; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; GFX11-SDAG-TRUE16-NEXT: s_add_i32 s5, s5, s8 +; GFX11-SDAG-TRUE16-NEXT: s_cmp_lt_i32 s3, 31 +; GFX11-SDAG-TRUE16-NEXT: s_movk_i32 s8, 0x7e00 +; GFX11-SDAG-TRUE16-NEXT: s_cselect_b32 s5, s5, 0x7c00 +; GFX11-SDAG-TRUE16-NEXT: s_cmp_lg_u32 s4, 0 +; GFX11-SDAG-TRUE16-NEXT: v_readfirstlane_b32 s4, v1 +; GFX11-SDAG-TRUE16-NEXT: s_cselect_b32 s9, s8, 0x7c00 +; GFX11-SDAG-TRUE16-NEXT: s_cmpk_eq_i32 s3, 0x40f +; GFX11-SDAG-TRUE16-NEXT: s_cselect_b32 s3, s9, s5 +; GFX11-SDAG-TRUE16-NEXT: s_and_b32 s5, s4, 0x1ff +; GFX11-SDAG-TRUE16-NEXT: s_lshr_b32 s10, s4, 8 +; GFX11-SDAG-TRUE16-NEXT: v_or_b32_e32 v0, s5, v0 +; GFX11-SDAG-TRUE16-NEXT: s_bfe_u32 s5, s4, 0xb0014 +; GFX11-SDAG-TRUE16-NEXT: s_and_b32 s10, s10, 0xffe +; GFX11-SDAG-TRUE16-NEXT: s_sub_i32 s9, 0x3f1, s5 +; GFX11-SDAG-TRUE16-NEXT: s_lshr_b32 s2, s2, 16 +; GFX11-SDAG-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0 +; GFX11-SDAG-TRUE16-NEXT: v_med3_i32 v1, s9, 0, 13 +; GFX11-SDAG-TRUE16-NEXT: s_and_b32 s2, s2, 0x8000 +; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) +; GFX11-SDAG-TRUE16-NEXT: s_or_b32 s2, s2, s3 +; GFX11-SDAG-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo +; GFX11-SDAG-TRUE16-NEXT: v_readfirstlane_b32 s11, v1 +; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(SALU_CYCLE_1) +; GFX11-SDAG-TRUE16-NEXT: v_readfirstlane_b32 s9, v0 +; GFX11-SDAG-TRUE16-NEXT: s_or_b32 s9, s10, s9 +; GFX11-SDAG-TRUE16-NEXT: s_or_b32 s10, s9, 0x1000 +; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) +; GFX11-SDAG-TRUE16-NEXT: s_lshr_b32 s12, s10, s11 +; GFX11-SDAG-TRUE16-NEXT: s_lshl_b32 s11, s12, s11 +; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_4) | instid1(SALU_CYCLE_1) +; GFX11-SDAG-TRUE16-NEXT: s_cmp_lg_u32 s11, s10 +; GFX11-SDAG-TRUE16-NEXT: s_cselect_b32 s3, 1, 0 +; GFX11-SDAG-TRUE16-NEXT: s_addk_i32 s5, 0xfc10 +; GFX11-SDAG-TRUE16-NEXT: s_or_b32 s3, s12, s3 +; GFX11-SDAG-TRUE16-NEXT: s_lshl_b32 s10, s5, 12 +; GFX11-SDAG-TRUE16-NEXT: s_or_b32 s10, s9, s10 +; GFX11-SDAG-TRUE16-NEXT: s_cmp_lt_i32 s5, 1 +; GFX11-SDAG-TRUE16-NEXT: s_cselect_b32 s3, s3, s10 +; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) +; GFX11-SDAG-TRUE16-NEXT: s_and_b32 s10, s3, 7 +; GFX11-SDAG-TRUE16-NEXT: s_cmp_gt_i32 s10, 5 +; GFX11-SDAG-TRUE16-NEXT: s_cselect_b32 s11, 1, 0 +; GFX11-SDAG-TRUE16-NEXT: s_cmp_eq_u32 s10, 3 +; GFX11-SDAG-TRUE16-NEXT: s_cselect_b32 s10, 1, 0 +; GFX11-SDAG-TRUE16-NEXT: s_lshr_b32 s3, s3, 2 +; GFX11-SDAG-TRUE16-NEXT: s_or_b32 s10, s10, s11 +; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; GFX11-SDAG-TRUE16-NEXT: s_add_i32 s3, s3, s10 +; GFX11-SDAG-TRUE16-NEXT: s_cmp_lt_i32 s5, 31 +; GFX11-SDAG-TRUE16-NEXT: s_cselect_b32 s3, s3, 0x7c00 +; GFX11-SDAG-TRUE16-NEXT: s_cmp_lg_u32 s9, 0 +; GFX11-SDAG-TRUE16-NEXT: s_cselect_b32 s8, s8, 0x7c00 +; GFX11-SDAG-TRUE16-NEXT: s_cmpk_eq_i32 s5, 0x40f +; GFX11-SDAG-TRUE16-NEXT: s_mov_b32 s5, s1 +; GFX11-SDAG-TRUE16-NEXT: s_cselect_b32 s3, s8, s3 +; GFX11-SDAG-TRUE16-NEXT: s_lshr_b32 s4, s4, 16 +; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) +; GFX11-SDAG-TRUE16-NEXT: s_and_b32 s4, s4, 0x8000 +; GFX11-SDAG-TRUE16-NEXT: s_or_b32 s3, s4, s3 +; GFX11-SDAG-TRUE16-NEXT: s_mov_b32 s4, s0 +; GFX11-SDAG-TRUE16-NEXT: s_pack_ll_b32_b16 s2, s3, s2 +; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; GFX11-SDAG-TRUE16-NEXT: v_mov_b32_e32 v0, s2 +; GFX11-SDAG-TRUE16-NEXT: buffer_store_b32 v0, off, s[4:7], 0 +; GFX11-SDAG-TRUE16-NEXT: s_endpgm +; +; GFX11-SDAG-FAKE16-LABEL: fptrunc_v2f64_to_v2f16: +; GFX11-SDAG-FAKE16: ; %bb.0: ; %entry +; GFX11-SDAG-FAKE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 +; GFX11-SDAG-FAKE16-NEXT: s_mov_b32 s6, -1 +; GFX11-SDAG-FAKE16-NEXT: s_mov_b32 s7, 0x31016000 +; GFX11-SDAG-FAKE16-NEXT: s_mov_b32 s10, s6 +; GFX11-SDAG-FAKE16-NEXT: s_mov_b32 s11, s7 +; GFX11-SDAG-FAKE16-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-SDAG-FAKE16-NEXT: s_mov_b32 s8, s2 +; GFX11-SDAG-FAKE16-NEXT: s_mov_b32 s9, s3 +; GFX11-SDAG-FAKE16-NEXT: buffer_load_b128 v[0:3], off, s[8:11], 0 +; GFX11-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) +; GFX11-SDAG-FAKE16-NEXT: v_readfirstlane_b32 s2, v3 +; GFX11-SDAG-FAKE16-NEXT: s_and_b32 s3, s2, 0x1ff +; GFX11-SDAG-FAKE16-NEXT: s_lshr_b32 s5, s2, 8 +; GFX11-SDAG-FAKE16-NEXT: v_or_b32_e32 v2, s3, v2 +; GFX11-SDAG-FAKE16-NEXT: s_bfe_u32 s3, s2, 0xb0014 +; GFX11-SDAG-FAKE16-NEXT: s_and_b32 s5, s5, 0xffe +; GFX11-SDAG-FAKE16-NEXT: s_sub_i32 s4, 0x3f1, s3 +; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_2) +; GFX11-SDAG-FAKE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v2 +; GFX11-SDAG-FAKE16-NEXT: v_med3_i32 v3, s4, 0, 13 +; GFX11-SDAG-FAKE16-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc_lo +; GFX11-SDAG-FAKE16-NEXT: v_readfirstlane_b32 s8, v3 +; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(SALU_CYCLE_1) +; GFX11-SDAG-FAKE16-NEXT: v_readfirstlane_b32 s4, v2 +; GFX11-SDAG-FAKE16-NEXT: s_or_b32 s4, s5, s4 +; GFX11-SDAG-FAKE16-NEXT: s_or_b32 s5, s4, 0x1000 +; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) +; GFX11-SDAG-FAKE16-NEXT: s_lshr_b32 s9, s5, s8 +; GFX11-SDAG-FAKE16-NEXT: s_lshl_b32 s8, s9, s8 +; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_4) | instid1(SALU_CYCLE_1) +; GFX11-SDAG-FAKE16-NEXT: s_cmp_lg_u32 s8, s5 +; GFX11-SDAG-FAKE16-NEXT: s_cselect_b32 s5, 1, 0 +; GFX11-SDAG-FAKE16-NEXT: s_addk_i32 s3, 0xfc10 +; GFX11-SDAG-FAKE16-NEXT: s_or_b32 s5, s9, s5 +; GFX11-SDAG-FAKE16-NEXT: s_lshl_b32 s8, s3, 12 +; GFX11-SDAG-FAKE16-NEXT: s_or_b32 s8, s4, s8 +; GFX11-SDAG-FAKE16-NEXT: s_cmp_lt_i32 s3, 1 +; GFX11-SDAG-FAKE16-NEXT: s_cselect_b32 s5, s5, s8 +; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) +; GFX11-SDAG-FAKE16-NEXT: s_and_b32 s8, s5, 7 +; GFX11-SDAG-FAKE16-NEXT: s_cmp_gt_i32 s8, 5 +; GFX11-SDAG-FAKE16-NEXT: s_cselect_b32 s9, 1, 0 +; GFX11-SDAG-FAKE16-NEXT: s_cmp_eq_u32 s8, 3 +; GFX11-SDAG-FAKE16-NEXT: s_cselect_b32 s8, 1, 0 +; GFX11-SDAG-FAKE16-NEXT: s_lshr_b32 s5, s5, 2 +; GFX11-SDAG-FAKE16-NEXT: s_or_b32 s8, s8, s9 +; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; GFX11-SDAG-FAKE16-NEXT: s_add_i32 s5, s5, s8 +; GFX11-SDAG-FAKE16-NEXT: s_cmp_lt_i32 s3, 31 +; GFX11-SDAG-FAKE16-NEXT: s_movk_i32 s8, 0x7e00 +; GFX11-SDAG-FAKE16-NEXT: s_cselect_b32 s5, s5, 0x7c00 +; GFX11-SDAG-FAKE16-NEXT: s_cmp_lg_u32 s4, 0 +; GFX11-SDAG-FAKE16-NEXT: v_readfirstlane_b32 s4, v1 +; GFX11-SDAG-FAKE16-NEXT: s_cselect_b32 s9, s8, 0x7c00 +; GFX11-SDAG-FAKE16-NEXT: s_cmpk_eq_i32 s3, 0x40f +; GFX11-SDAG-FAKE16-NEXT: s_cselect_b32 s3, s9, s5 +; GFX11-SDAG-FAKE16-NEXT: s_and_b32 s5, s4, 0x1ff +; GFX11-SDAG-FAKE16-NEXT: s_lshr_b32 s10, s4, 8 +; GFX11-SDAG-FAKE16-NEXT: v_or_b32_e32 v0, s5, v0 +; GFX11-SDAG-FAKE16-NEXT: s_bfe_u32 s5, s4, 0xb0014 +; GFX11-SDAG-FAKE16-NEXT: s_and_b32 s10, s10, 0xffe +; GFX11-SDAG-FAKE16-NEXT: s_sub_i32 s9, 0x3f1, s5 +; GFX11-SDAG-FAKE16-NEXT: s_lshr_b32 s2, s2, 16 +; GFX11-SDAG-FAKE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0 +; GFX11-SDAG-FAKE16-NEXT: v_med3_i32 v1, s9, 0, 13 +; GFX11-SDAG-FAKE16-NEXT: s_and_b32 s2, s2, 0x8000 +; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) +; GFX11-SDAG-FAKE16-NEXT: s_or_b32 s2, s2, s3 +; GFX11-SDAG-FAKE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo +; GFX11-SDAG-FAKE16-NEXT: v_readfirstlane_b32 s11, v1 +; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(SALU_CYCLE_1) +; GFX11-SDAG-FAKE16-NEXT: v_readfirstlane_b32 s9, v0 +; GFX11-SDAG-FAKE16-NEXT: s_or_b32 s9, s10, s9 +; GFX11-SDAG-FAKE16-NEXT: s_or_b32 s10, s9, 0x1000 +; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) +; GFX11-SDAG-FAKE16-NEXT: s_lshr_b32 s12, s10, s11 +; GFX11-SDAG-FAKE16-NEXT: s_lshl_b32 s11, s12, s11 +; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_4) | instid1(SALU_CYCLE_1) +; GFX11-SDAG-FAKE16-NEXT: s_cmp_lg_u32 s11, s10 +; GFX11-SDAG-FAKE16-NEXT: s_cselect_b32 s3, 1, 0 +; GFX11-SDAG-FAKE16-NEXT: s_addk_i32 s5, 0xfc10 +; GFX11-SDAG-FAKE16-NEXT: s_or_b32 s3, s12, s3 +; GFX11-SDAG-FAKE16-NEXT: s_lshl_b32 s10, s5, 12 +; GFX11-SDAG-FAKE16-NEXT: s_or_b32 s10, s9, s10 +; GFX11-SDAG-FAKE16-NEXT: s_cmp_lt_i32 s5, 1 +; GFX11-SDAG-FAKE16-NEXT: s_cselect_b32 s3, s3, s10 +; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) +; GFX11-SDAG-FAKE16-NEXT: s_and_b32 s10, s3, 7 +; GFX11-SDAG-FAKE16-NEXT: s_cmp_gt_i32 s10, 5 +; GFX11-SDAG-FAKE16-NEXT: s_cselect_b32 s11, 1, 0 +; GFX11-SDAG-FAKE16-NEXT: s_cmp_eq_u32 s10, 3 +; GFX11-SDAG-FAKE16-NEXT: s_cselect_b32 s10, 1, 0 +; GFX11-SDAG-FAKE16-NEXT: s_lshr_b32 s3, s3, 2 +; GFX11-SDAG-FAKE16-NEXT: s_or_b32 s10, s10, s11 +; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; GFX11-SDAG-FAKE16-NEXT: s_add_i32 s3, s3, s10 +; GFX11-SDAG-FAKE16-NEXT: s_cmp_lt_i32 s5, 31 +; GFX11-SDAG-FAKE16-NEXT: s_cselect_b32 s3, s3, 0x7c00 +; GFX11-SDAG-FAKE16-NEXT: s_cmp_lg_u32 s9, 0 +; GFX11-SDAG-FAKE16-NEXT: s_cselect_b32 s8, s8, 0x7c00 +; GFX11-SDAG-FAKE16-NEXT: s_cmpk_eq_i32 s5, 0x40f +; GFX11-SDAG-FAKE16-NEXT: s_mov_b32 s5, s1 +; GFX11-SDAG-FAKE16-NEXT: s_cselect_b32 s3, s8, s3 +; GFX11-SDAG-FAKE16-NEXT: s_lshr_b32 s4, s4, 16 +; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) +; GFX11-SDAG-FAKE16-NEXT: s_and_b32 s4, s4, 0x8000 +; GFX11-SDAG-FAKE16-NEXT: s_or_b32 s3, s4, s3 +; GFX11-SDAG-FAKE16-NEXT: s_mov_b32 s4, s0 +; GFX11-SDAG-FAKE16-NEXT: s_pack_ll_b32_b16 s2, s3, s2 +; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; GFX11-SDAG-FAKE16-NEXT: v_mov_b32_e32 v0, s2 +; GFX11-SDAG-FAKE16-NEXT: buffer_store_b32 v0, off, s[4:7], 0 +; GFX11-SDAG-FAKE16-NEXT: s_endpgm +; +; GFX11-GISEL-TRUE16-LABEL: fptrunc_v2f64_to_v2f16: +; GFX11-GISEL-TRUE16: ; %bb.0: ; %entry +; GFX11-GISEL-TRUE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 +; GFX11-GISEL-TRUE16-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-GISEL-TRUE16-NEXT: s_load_b128 s[4:7], s[2:3], 0x0 +; GFX11-GISEL-TRUE16-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-GISEL-TRUE16-NEXT: s_and_b32 s8, s5, 0x1ff +; GFX11-GISEL-TRUE16-NEXT: s_bfe_u32 s2, s5, 0xb0014 +; GFX11-GISEL-TRUE16-NEXT: s_lshr_b32 s3, s5, 8 +; GFX11-GISEL-TRUE16-NEXT: s_or_b32 s4, s8, s4 +; GFX11-GISEL-TRUE16-NEXT: s_addk_i32 s2, 0xfc10 +; GFX11-GISEL-TRUE16-NEXT: s_and_b32 s3, s3, 0xffe +; GFX11-GISEL-TRUE16-NEXT: s_cmp_lg_u32 s4, 0 +; GFX11-GISEL-TRUE16-NEXT: s_cselect_b32 s4, 1, 0 +; GFX11-GISEL-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) +; GFX11-GISEL-TRUE16-NEXT: s_or_b32 s3, s3, s4 +; GFX11-GISEL-TRUE16-NEXT: s_cmp_lg_u32 s3, 0 +; GFX11-GISEL-TRUE16-NEXT: s_cselect_b32 s4, 1, 0 +; GFX11-GISEL-TRUE16-NEXT: s_sub_i32 s8, 1, s2 +; GFX11-GISEL-TRUE16-NEXT: s_or_b32 s10, s3, 0x1000 +; GFX11-GISEL-TRUE16-NEXT: s_max_i32 s8, s8, 0 +; GFX11-GISEL-TRUE16-NEXT: s_lshl_b32 s9, s2, 12 +; GFX11-GISEL-TRUE16-NEXT: s_min_i32 s8, s8, 13 +; GFX11-GISEL-TRUE16-NEXT: s_lshl_b32 s4, s4, 9 +; GFX11-GISEL-TRUE16-NEXT: s_lshr_b32 s11, s10, s8 +; GFX11-GISEL-TRUE16-NEXT: s_or_b32 s3, s3, s9 +; GFX11-GISEL-TRUE16-NEXT: s_lshl_b32 s8, s11, s8 +; GFX11-GISEL-TRUE16-NEXT: s_or_b32 s4, s4, 0x7c00 +; GFX11-GISEL-TRUE16-NEXT: s_cmp_lg_u32 s8, s10 +; GFX11-GISEL-TRUE16-NEXT: s_cselect_b32 s8, 1, 0 +; GFX11-GISEL-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_2) | instid1(SALU_CYCLE_1) +; GFX11-GISEL-TRUE16-NEXT: s_or_b32 s8, s11, s8 +; GFX11-GISEL-TRUE16-NEXT: s_cmp_lt_i32 s2, 1 +; GFX11-GISEL-TRUE16-NEXT: s_cselect_b32 s3, s8, s3 +; GFX11-GISEL-TRUE16-NEXT: s_and_b32 s8, s3, 7 +; GFX11-GISEL-TRUE16-NEXT: s_lshr_b32 s3, s3, 2 +; GFX11-GISEL-TRUE16-NEXT: s_cmp_eq_u32 s8, 3 +; GFX11-GISEL-TRUE16-NEXT: s_cselect_b32 s9, 1, 0 +; GFX11-GISEL-TRUE16-NEXT: s_cmp_gt_i32 s8, 5 +; GFX11-GISEL-TRUE16-NEXT: s_cselect_b32 s8, 1, 0 +; GFX11-GISEL-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) +; GFX11-GISEL-TRUE16-NEXT: s_or_b32 s8, s9, s8 +; GFX11-GISEL-TRUE16-NEXT: s_add_i32 s3, s3, s8 +; GFX11-GISEL-TRUE16-NEXT: s_cmp_gt_i32 s2, 30 +; GFX11-GISEL-TRUE16-NEXT: s_cselect_b32 s3, 0x7c00, s3 +; GFX11-GISEL-TRUE16-NEXT: s_cmpk_eq_i32 s2, 0x40f +; GFX11-GISEL-TRUE16-NEXT: s_cselect_b32 s2, s4, s3 +; GFX11-GISEL-TRUE16-NEXT: s_lshr_b32 s3, s5, 16 +; GFX11-GISEL-TRUE16-NEXT: s_and_b32 s8, s7, 0x1ff +; GFX11-GISEL-TRUE16-NEXT: s_bfe_u32 s4, s7, 0xb0014 +; GFX11-GISEL-TRUE16-NEXT: s_lshr_b32 s5, s7, 8 +; GFX11-GISEL-TRUE16-NEXT: s_and_b32 s3, s3, 0x8000 +; GFX11-GISEL-TRUE16-NEXT: s_or_b32 s6, s8, s6 +; GFX11-GISEL-TRUE16-NEXT: s_addk_i32 s4, 0xfc10 +; GFX11-GISEL-TRUE16-NEXT: s_and_b32 s5, s5, 0xffe +; GFX11-GISEL-TRUE16-NEXT: s_or_b32 s2, s3, s2 +; GFX11-GISEL-TRUE16-NEXT: s_cmp_lg_u32 s6, 0 +; GFX11-GISEL-TRUE16-NEXT: s_cselect_b32 s3, 1, 0 +; GFX11-GISEL-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) +; GFX11-GISEL-TRUE16-NEXT: s_or_b32 s3, s5, s3 +; GFX11-GISEL-TRUE16-NEXT: s_cmp_lg_u32 s3, 0 +; GFX11-GISEL-TRUE16-NEXT: s_cselect_b32 s5, 1, 0 +; GFX11-GISEL-TRUE16-NEXT: s_sub_i32 s6, 1, s4 +; GFX11-GISEL-TRUE16-NEXT: s_or_b32 s9, s3, 0x1000 +; GFX11-GISEL-TRUE16-NEXT: s_max_i32 s6, s6, 0 +; GFX11-GISEL-TRUE16-NEXT: s_lshl_b32 s8, s4, 12 +; GFX11-GISEL-TRUE16-NEXT: s_min_i32 s6, s6, 13 +; GFX11-GISEL-TRUE16-NEXT: s_lshl_b32 s5, s5, 9 +; GFX11-GISEL-TRUE16-NEXT: s_lshr_b32 s10, s9, s6 +; GFX11-GISEL-TRUE16-NEXT: s_or_b32 s3, s3, s8 +; GFX11-GISEL-TRUE16-NEXT: s_lshl_b32 s6, s10, s6 +; GFX11-GISEL-TRUE16-NEXT: s_or_b32 s5, s5, 0x7c00 +; GFX11-GISEL-TRUE16-NEXT: s_cmp_lg_u32 s6, s9 +; GFX11-GISEL-TRUE16-NEXT: s_cselect_b32 s6, 1, 0 +; GFX11-GISEL-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_2) | instid1(SALU_CYCLE_1) +; GFX11-GISEL-TRUE16-NEXT: s_or_b32 s6, s10, s6 +; GFX11-GISEL-TRUE16-NEXT: s_cmp_lt_i32 s4, 1 +; GFX11-GISEL-TRUE16-NEXT: s_cselect_b32 s3, s6, s3 +; GFX11-GISEL-TRUE16-NEXT: s_and_b32 s6, s3, 7 +; GFX11-GISEL-TRUE16-NEXT: s_lshr_b32 s3, s3, 2 +; GFX11-GISEL-TRUE16-NEXT: s_cmp_eq_u32 s6, 3 +; GFX11-GISEL-TRUE16-NEXT: s_cselect_b32 s8, 1, 0 +; GFX11-GISEL-TRUE16-NEXT: s_cmp_gt_i32 s6, 5 +; GFX11-GISEL-TRUE16-NEXT: s_cselect_b32 s6, 1, 0 +; GFX11-GISEL-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) +; GFX11-GISEL-TRUE16-NEXT: s_or_b32 s6, s8, s6 +; GFX11-GISEL-TRUE16-NEXT: s_add_i32 s3, s3, s6 +; GFX11-GISEL-TRUE16-NEXT: s_cmp_gt_i32 s4, 30 +; GFX11-GISEL-TRUE16-NEXT: s_cselect_b32 s3, 0x7c00, s3 +; GFX11-GISEL-TRUE16-NEXT: s_cmpk_eq_i32 s4, 0x40f +; GFX11-GISEL-TRUE16-NEXT: s_cselect_b32 s3, s5, s3 +; GFX11-GISEL-TRUE16-NEXT: s_lshr_b32 s4, s7, 16 +; GFX11-GISEL-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) +; GFX11-GISEL-TRUE16-NEXT: s_and_b32 s4, s4, 0x8000 +; GFX11-GISEL-TRUE16-NEXT: s_or_b32 s3, s4, s3 +; GFX11-GISEL-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; GFX11-GISEL-TRUE16-NEXT: s_pack_ll_b32_b16 s2, s2, s3 +; GFX11-GISEL-TRUE16-NEXT: s_mov_b32 s3, 0x31016000 +; GFX11-GISEL-TRUE16-NEXT: v_mov_b32_e32 v0, s2 +; GFX11-GISEL-TRUE16-NEXT: s_mov_b32 s2, -1 +; GFX11-GISEL-TRUE16-NEXT: buffer_store_b32 v0, off, s[0:3], 0 +; GFX11-GISEL-TRUE16-NEXT: s_endpgm +; +; GFX11-GISEL-FAKE16-LABEL: fptrunc_v2f64_to_v2f16: +; GFX11-GISEL-FAKE16: ; %bb.0: ; %entry +; GFX11-GISEL-FAKE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 +; GFX11-GISEL-FAKE16-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-GISEL-FAKE16-NEXT: s_load_b128 s[4:7], s[2:3], 0x0 +; GFX11-GISEL-FAKE16-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-GISEL-FAKE16-NEXT: s_and_b32 s8, s5, 0x1ff +; GFX11-GISEL-FAKE16-NEXT: s_bfe_u32 s2, s5, 0xb0014 +; GFX11-GISEL-FAKE16-NEXT: s_lshr_b32 s3, s5, 8 +; GFX11-GISEL-FAKE16-NEXT: s_or_b32 s4, s8, s4 +; GFX11-GISEL-FAKE16-NEXT: s_addk_i32 s2, 0xfc10 +; GFX11-GISEL-FAKE16-NEXT: s_and_b32 s3, s3, 0xffe +; GFX11-GISEL-FAKE16-NEXT: s_cmp_lg_u32 s4, 0 +; GFX11-GISEL-FAKE16-NEXT: s_cselect_b32 s4, 1, 0 +; GFX11-GISEL-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) +; GFX11-GISEL-FAKE16-NEXT: s_or_b32 s3, s3, s4 +; GFX11-GISEL-FAKE16-NEXT: s_cmp_lg_u32 s3, 0 +; GFX11-GISEL-FAKE16-NEXT: s_cselect_b32 s4, 1, 0 +; GFX11-GISEL-FAKE16-NEXT: s_sub_i32 s8, 1, s2 +; GFX11-GISEL-FAKE16-NEXT: s_or_b32 s10, s3, 0x1000 +; GFX11-GISEL-FAKE16-NEXT: s_max_i32 s8, s8, 0 +; GFX11-GISEL-FAKE16-NEXT: s_lshl_b32 s9, s2, 12 +; GFX11-GISEL-FAKE16-NEXT: s_min_i32 s8, s8, 13 +; GFX11-GISEL-FAKE16-NEXT: s_lshl_b32 s4, s4, 9 +; GFX11-GISEL-FAKE16-NEXT: s_lshr_b32 s11, s10, s8 +; GFX11-GISEL-FAKE16-NEXT: s_or_b32 s3, s3, s9 +; GFX11-GISEL-FAKE16-NEXT: s_lshl_b32 s8, s11, s8 +; GFX11-GISEL-FAKE16-NEXT: s_or_b32 s4, s4, 0x7c00 +; GFX11-GISEL-FAKE16-NEXT: s_cmp_lg_u32 s8, s10 +; GFX11-GISEL-FAKE16-NEXT: s_cselect_b32 s8, 1, 0 +; GFX11-GISEL-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_2) | instid1(SALU_CYCLE_1) +; GFX11-GISEL-FAKE16-NEXT: s_or_b32 s8, s11, s8 +; GFX11-GISEL-FAKE16-NEXT: s_cmp_lt_i32 s2, 1 +; GFX11-GISEL-FAKE16-NEXT: s_cselect_b32 s3, s8, s3 +; GFX11-GISEL-FAKE16-NEXT: s_and_b32 s8, s3, 7 +; GFX11-GISEL-FAKE16-NEXT: s_lshr_b32 s3, s3, 2 +; GFX11-GISEL-FAKE16-NEXT: s_cmp_eq_u32 s8, 3 +; GFX11-GISEL-FAKE16-NEXT: s_cselect_b32 s9, 1, 0 +; GFX11-GISEL-FAKE16-NEXT: s_cmp_gt_i32 s8, 5 +; GFX11-GISEL-FAKE16-NEXT: s_cselect_b32 s8, 1, 0 +; GFX11-GISEL-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) +; GFX11-GISEL-FAKE16-NEXT: s_or_b32 s8, s9, s8 +; GFX11-GISEL-FAKE16-NEXT: s_add_i32 s3, s3, s8 +; GFX11-GISEL-FAKE16-NEXT: s_cmp_gt_i32 s2, 30 +; GFX11-GISEL-FAKE16-NEXT: s_cselect_b32 s3, 0x7c00, s3 +; GFX11-GISEL-FAKE16-NEXT: s_cmpk_eq_i32 s2, 0x40f +; GFX11-GISEL-FAKE16-NEXT: s_cselect_b32 s2, s4, s3 +; GFX11-GISEL-FAKE16-NEXT: s_lshr_b32 s3, s5, 16 +; GFX11-GISEL-FAKE16-NEXT: s_and_b32 s8, s7, 0x1ff +; GFX11-GISEL-FAKE16-NEXT: s_bfe_u32 s4, s7, 0xb0014 +; GFX11-GISEL-FAKE16-NEXT: s_lshr_b32 s5, s7, 8 +; GFX11-GISEL-FAKE16-NEXT: s_and_b32 s3, s3, 0x8000 +; GFX11-GISEL-FAKE16-NEXT: s_or_b32 s6, s8, s6 +; GFX11-GISEL-FAKE16-NEXT: s_addk_i32 s4, 0xfc10 +; GFX11-GISEL-FAKE16-NEXT: s_and_b32 s5, s5, 0xffe +; GFX11-GISEL-FAKE16-NEXT: s_or_b32 s2, s3, s2 +; GFX11-GISEL-FAKE16-NEXT: s_cmp_lg_u32 s6, 0 +; GFX11-GISEL-FAKE16-NEXT: s_cselect_b32 s3, 1, 0 +; GFX11-GISEL-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) +; GFX11-GISEL-FAKE16-NEXT: s_or_b32 s3, s5, s3 +; GFX11-GISEL-FAKE16-NEXT: s_cmp_lg_u32 s3, 0 +; GFX11-GISEL-FAKE16-NEXT: s_cselect_b32 s5, 1, 0 +; GFX11-GISEL-FAKE16-NEXT: s_sub_i32 s6, 1, s4 +; GFX11-GISEL-FAKE16-NEXT: s_or_b32 s9, s3, 0x1000 +; GFX11-GISEL-FAKE16-NEXT: s_max_i32 s6, s6, 0 +; GFX11-GISEL-FAKE16-NEXT: s_lshl_b32 s8, s4, 12 +; GFX11-GISEL-FAKE16-NEXT: s_min_i32 s6, s6, 13 +; GFX11-GISEL-FAKE16-NEXT: s_lshl_b32 s5, s5, 9 +; GFX11-GISEL-FAKE16-NEXT: s_lshr_b32 s10, s9, s6 +; GFX11-GISEL-FAKE16-NEXT: s_or_b32 s3, s3, s8 +; GFX11-GISEL-FAKE16-NEXT: s_lshl_b32 s6, s10, s6 +; GFX11-GISEL-FAKE16-NEXT: s_or_b32 s5, s5, 0x7c00 +; GFX11-GISEL-FAKE16-NEXT: s_cmp_lg_u32 s6, s9 +; GFX11-GISEL-FAKE16-NEXT: s_cselect_b32 s6, 1, 0 +; GFX11-GISEL-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_2) | instid1(SALU_CYCLE_1) +; GFX11-GISEL-FAKE16-NEXT: s_or_b32 s6, s10, s6 +; GFX11-GISEL-FAKE16-NEXT: s_cmp_lt_i32 s4, 1 +; GFX11-GISEL-FAKE16-NEXT: s_cselect_b32 s3, s6, s3 +; GFX11-GISEL-FAKE16-NEXT: s_and_b32 s6, s3, 7 +; GFX11-GISEL-FAKE16-NEXT: s_lshr_b32 s3, s3, 2 +; GFX11-GISEL-FAKE16-NEXT: s_cmp_eq_u32 s6, 3 +; GFX11-GISEL-FAKE16-NEXT: s_cselect_b32 s8, 1, 0 +; GFX11-GISEL-FAKE16-NEXT: s_cmp_gt_i32 s6, 5 +; GFX11-GISEL-FAKE16-NEXT: s_cselect_b32 s6, 1, 0 +; GFX11-GISEL-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) +; GFX11-GISEL-FAKE16-NEXT: s_or_b32 s6, s8, s6 +; GFX11-GISEL-FAKE16-NEXT: s_add_i32 s3, s3, s6 +; GFX11-GISEL-FAKE16-NEXT: s_cmp_gt_i32 s4, 30 +; GFX11-GISEL-FAKE16-NEXT: s_cselect_b32 s3, 0x7c00, s3 +; GFX11-GISEL-FAKE16-NEXT: s_cmpk_eq_i32 s4, 0x40f +; GFX11-GISEL-FAKE16-NEXT: s_cselect_b32 s3, s5, s3 +; GFX11-GISEL-FAKE16-NEXT: s_lshr_b32 s4, s7, 16 +; GFX11-GISEL-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) +; GFX11-GISEL-FAKE16-NEXT: s_and_b32 s4, s4, 0x8000 +; GFX11-GISEL-FAKE16-NEXT: s_or_b32 s3, s4, s3 +; GFX11-GISEL-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; GFX11-GISEL-FAKE16-NEXT: s_pack_ll_b32_b16 s2, s2, s3 +; GFX11-GISEL-FAKE16-NEXT: s_mov_b32 s3, 0x31016000 +; GFX11-GISEL-FAKE16-NEXT: v_mov_b32_e32 v0, s2 +; GFX11-GISEL-FAKE16-NEXT: s_mov_b32 s2, -1 +; GFX11-GISEL-FAKE16-NEXT: buffer_store_b32 v0, off, s[0:3], 0 +; GFX11-GISEL-FAKE16-NEXT: s_endpgm + ptr addrspace(1) %r, + ptr addrspace(1) %a) { +entry: + %a.val = load <2 x double>, ptr addrspace(1) %a + %r.val = fptrunc <2 x double> %a.val to <2 x half> + store <2 x half> %r.val, ptr addrspace(1) %r + ret void +} + +define amdgpu_kernel void @fptrunc_v2f64_to_v2f16_afn( +; SI-SDAG-LABEL: fptrunc_v2f64_to_v2f16_afn: +; SI-SDAG: ; %bb.0: ; %entry +; SI-SDAG-NEXT: s_load_dwordx4 s[4:7], s[4:5], 0x9 +; SI-SDAG-NEXT: s_mov_b32 s3, 0xf000 +; SI-SDAG-NEXT: s_mov_b32 s2, -1 +; SI-SDAG-NEXT: s_mov_b32 s10, s2 +; SI-SDAG-NEXT: s_mov_b32 s11, s3 +; SI-SDAG-NEXT: s_waitcnt lgkmcnt(0) +; SI-SDAG-NEXT: s_mov_b32 s8, s6 +; SI-SDAG-NEXT: s_mov_b32 s9, s7 +; SI-SDAG-NEXT: buffer_load_dwordx4 v[0:3], off, s[8:11], 0 +; SI-SDAG-NEXT: s_movk_i32 s0, 0x7e00 +; SI-SDAG-NEXT: s_waitcnt vmcnt(0) +; SI-SDAG-NEXT: v_readfirstlane_b32 s1, v3 +; SI-SDAG-NEXT: v_readfirstlane_b32 s6, v1 +; SI-SDAG-NEXT: s_and_b32 s7, s1, 0x1ff +; SI-SDAG-NEXT: s_lshr_b32 s8, s1, 8 +; SI-SDAG-NEXT: s_bfe_u32 s9, s1, 0xb0014 +; SI-SDAG-NEXT: v_or_b32_e32 v1, s7, v2 +; SI-SDAG-NEXT: s_and_b32 s7, s8, 0xffe +; SI-SDAG-NEXT: s_sub_i32 s8, 0x3f1, s9 +; SI-SDAG-NEXT: v_cmp_ne_u32_e32 vcc, 0, v1 +; SI-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; SI-SDAG-NEXT: v_med3_i32 v2, s8, 0, 13 +; SI-SDAG-NEXT: v_readfirstlane_b32 s8, v1 +; SI-SDAG-NEXT: v_readfirstlane_b32 s10, v2 +; SI-SDAG-NEXT: s_or_b32 s7, s7, s8 +; SI-SDAG-NEXT: s_or_b32 s8, s7, 0x1000 +; SI-SDAG-NEXT: s_lshr_b32 s11, s8, s10 +; SI-SDAG-NEXT: s_lshl_b32 s10, s11, s10 +; SI-SDAG-NEXT: s_cmp_lg_u32 s10, s8 +; SI-SDAG-NEXT: s_cselect_b32 s8, 1, 0 +; SI-SDAG-NEXT: s_addk_i32 s9, 0xfc10 +; SI-SDAG-NEXT: s_or_b32 s8, s11, s8 +; SI-SDAG-NEXT: s_lshl_b32 s10, s9, 12 +; SI-SDAG-NEXT: s_or_b32 s10, s7, s10 +; SI-SDAG-NEXT: s_cmp_lt_i32 s9, 1 +; SI-SDAG-NEXT: s_cselect_b32 s8, s8, s10 +; SI-SDAG-NEXT: s_and_b32 s10, s8, 7 +; SI-SDAG-NEXT: s_cmp_gt_i32 s10, 5 +; SI-SDAG-NEXT: s_cselect_b32 s11, 1, 0 +; SI-SDAG-NEXT: s_cmp_eq_u32 s10, 3 +; SI-SDAG-NEXT: s_cselect_b32 s10, 1, 0 +; SI-SDAG-NEXT: s_lshr_b32 s8, s8, 2 +; SI-SDAG-NEXT: s_or_b32 s10, s10, s11 +; SI-SDAG-NEXT: s_add_i32 s8, s8, s10 +; SI-SDAG-NEXT: s_cmp_lt_i32 s9, 31 +; SI-SDAG-NEXT: s_cselect_b32 s8, s8, 0x7c00 +; SI-SDAG-NEXT: s_cmp_lg_u32 s7, 0 +; SI-SDAG-NEXT: s_cselect_b32 s7, s0, 0x7c00 +; SI-SDAG-NEXT: s_cmpk_eq_i32 s9, 0x40f +; SI-SDAG-NEXT: s_cselect_b32 s7, s7, s8 +; SI-SDAG-NEXT: s_lshr_b32 s1, s1, 16 +; SI-SDAG-NEXT: s_and_b32 s8, s6, 0x1ff +; SI-SDAG-NEXT: s_lshr_b32 s9, s6, 8 +; SI-SDAG-NEXT: s_bfe_u32 s10, s6, 0xb0014 +; SI-SDAG-NEXT: s_and_b32 s1, s1, 0x8000 +; SI-SDAG-NEXT: v_or_b32_e32 v0, s8, v0 +; SI-SDAG-NEXT: s_and_b32 s8, s9, 0xffe +; SI-SDAG-NEXT: s_sub_i32 s9, 0x3f1, s10 +; SI-SDAG-NEXT: s_or_b32 s1, s1, s7 +; SI-SDAG-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0 +; SI-SDAG-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc +; SI-SDAG-NEXT: v_med3_i32 v1, s9, 0, 13 +; SI-SDAG-NEXT: s_lshl_b32 s1, s1, 16 +; SI-SDAG-NEXT: v_readfirstlane_b32 s7, v0 +; SI-SDAG-NEXT: v_readfirstlane_b32 s9, v1 +; SI-SDAG-NEXT: s_or_b32 s7, s8, s7 +; SI-SDAG-NEXT: s_or_b32 s8, s7, 0x1000 +; SI-SDAG-NEXT: s_lshr_b32 s11, s8, s9 +; SI-SDAG-NEXT: s_lshl_b32 s9, s11, s9 +; SI-SDAG-NEXT: s_cmp_lg_u32 s9, s8 +; SI-SDAG-NEXT: s_cselect_b32 s8, 1, 0 +; SI-SDAG-NEXT: s_addk_i32 s10, 0xfc10 +; SI-SDAG-NEXT: s_or_b32 s8, s11, s8 +; SI-SDAG-NEXT: s_lshl_b32 s9, s10, 12 +; SI-SDAG-NEXT: s_or_b32 s9, s7, s9 +; SI-SDAG-NEXT: s_cmp_lt_i32 s10, 1 +; SI-SDAG-NEXT: s_cselect_b32 s8, s8, s9 +; SI-SDAG-NEXT: s_and_b32 s9, s8, 7 +; SI-SDAG-NEXT: s_cmp_gt_i32 s9, 5 +; SI-SDAG-NEXT: s_cselect_b32 s11, 1, 0 +; SI-SDAG-NEXT: s_cmp_eq_u32 s9, 3 +; SI-SDAG-NEXT: s_cselect_b32 s9, 1, 0 +; SI-SDAG-NEXT: s_lshr_b32 s8, s8, 2 +; SI-SDAG-NEXT: s_or_b32 s9, s9, s11 +; SI-SDAG-NEXT: s_add_i32 s8, s8, s9 +; SI-SDAG-NEXT: s_cmp_lt_i32 s10, 31 +; SI-SDAG-NEXT: s_cselect_b32 s8, s8, 0x7c00 +; SI-SDAG-NEXT: s_cmp_lg_u32 s7, 0 +; SI-SDAG-NEXT: s_cselect_b32 s0, s0, 0x7c00 +; SI-SDAG-NEXT: s_cmpk_eq_i32 s10, 0x40f +; SI-SDAG-NEXT: s_cselect_b32 s0, s0, s8 +; SI-SDAG-NEXT: s_lshr_b32 s6, s6, 16 +; SI-SDAG-NEXT: s_and_b32 s6, s6, 0x8000 +; SI-SDAG-NEXT: s_or_b32 s0, s6, s0 +; SI-SDAG-NEXT: s_and_b32 s0, s0, 0xffff +; SI-SDAG-NEXT: s_or_b32 s6, s0, s1 +; SI-SDAG-NEXT: s_mov_b32 s0, s4 +; SI-SDAG-NEXT: s_mov_b32 s1, s5 +; SI-SDAG-NEXT: v_mov_b32_e32 v0, s6 +; SI-SDAG-NEXT: buffer_store_dword v0, off, s[0:3], 0 +; SI-SDAG-NEXT: s_endpgm +; +; SI-GISEL-LABEL: fptrunc_v2f64_to_v2f16_afn: +; SI-GISEL: ; %bb.0: ; %entry +; SI-GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 +; SI-GISEL-NEXT: s_waitcnt lgkmcnt(0) +; SI-GISEL-NEXT: s_load_dwordx4 s[4:7], s[2:3], 0x0 +; SI-GISEL-NEXT: s_mov_b32 s2, -1 +; SI-GISEL-NEXT: s_waitcnt lgkmcnt(0) ; SI-GISEL-NEXT: v_cvt_f32_f64_e32 v0, s[4:5] ; SI-GISEL-NEXT: v_cvt_f32_f64_e32 v1, s[6:7] ; SI-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0 @@ -664,29 +3145,111 @@ define amdgpu_kernel void @fptrunc_v2f64_to_v2f16( ; SI-GISEL-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; SI-GISEL-NEXT: s_endpgm ; -; VI-SDAG-LABEL: fptrunc_v2f64_to_v2f16: +; VI-SDAG-LABEL: fptrunc_v2f64_to_v2f16_afn: ; VI-SDAG: ; %bb.0: ; %entry -; VI-SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 -; VI-SDAG-NEXT: s_mov_b32 s7, 0xf000 -; VI-SDAG-NEXT: s_mov_b32 s6, -1 -; VI-SDAG-NEXT: s_mov_b32 s10, s6 -; VI-SDAG-NEXT: s_mov_b32 s11, s7 +; VI-SDAG-NEXT: s_load_dwordx4 s[4:7], s[4:5], 0x24 +; VI-SDAG-NEXT: s_mov_b32 s3, 0xf000 +; VI-SDAG-NEXT: s_mov_b32 s2, -1 +; VI-SDAG-NEXT: s_mov_b32 s10, s2 +; VI-SDAG-NEXT: s_mov_b32 s11, s3 ; VI-SDAG-NEXT: s_waitcnt lgkmcnt(0) -; VI-SDAG-NEXT: s_mov_b32 s8, s2 -; VI-SDAG-NEXT: s_mov_b32 s9, s3 +; VI-SDAG-NEXT: s_mov_b32 s8, s6 +; VI-SDAG-NEXT: s_mov_b32 s9, s7 ; VI-SDAG-NEXT: buffer_load_dwordx4 v[0:3], off, s[8:11], 0 -; VI-SDAG-NEXT: s_mov_b32 s4, s0 -; VI-SDAG-NEXT: s_mov_b32 s5, s1 +; VI-SDAG-NEXT: s_mov_b32 s0, s4 +; VI-SDAG-NEXT: s_mov_b32 s1, s5 +; VI-SDAG-NEXT: s_movk_i32 s6, 0x7e00 ; VI-SDAG-NEXT: s_waitcnt vmcnt(0) -; VI-SDAG-NEXT: v_cvt_f32_f64_e32 v2, v[2:3] -; VI-SDAG-NEXT: v_cvt_f32_f64_e32 v0, v[0:1] -; VI-SDAG-NEXT: v_cvt_f16_f32_sdwa v1, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD -; VI-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 -; VI-SDAG-NEXT: v_or_b32_e32 v0, v0, v1 -; VI-SDAG-NEXT: buffer_store_dword v0, off, s[4:7], 0 +; VI-SDAG-NEXT: v_readfirstlane_b32 s4, v3 +; VI-SDAG-NEXT: s_and_b32 s7, s4, 0x1ff +; VI-SDAG-NEXT: v_readfirstlane_b32 s5, v1 +; VI-SDAG-NEXT: v_or_b32_e32 v1, s7, v2 +; VI-SDAG-NEXT: s_lshr_b32 s8, s4, 8 +; VI-SDAG-NEXT: s_bfe_u32 s9, s4, 0xb0014 +; VI-SDAG-NEXT: v_cmp_ne_u32_e32 vcc, 0, v1 +; VI-SDAG-NEXT: s_and_b32 s7, s8, 0xffe +; VI-SDAG-NEXT: s_sub_i32 s8, 0x3f1, s9 +; VI-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; VI-SDAG-NEXT: v_med3_i32 v2, s8, 0, 13 +; VI-SDAG-NEXT: v_readfirstlane_b32 s8, v1 +; VI-SDAG-NEXT: s_or_b32 s7, s7, s8 +; VI-SDAG-NEXT: v_readfirstlane_b32 s10, v2 +; VI-SDAG-NEXT: s_or_b32 s8, s7, 0x1000 +; VI-SDAG-NEXT: s_lshr_b32 s11, s8, s10 +; VI-SDAG-NEXT: s_lshl_b32 s10, s11, s10 +; VI-SDAG-NEXT: s_cmp_lg_u32 s10, s8 +; VI-SDAG-NEXT: s_cselect_b32 s8, 1, 0 +; VI-SDAG-NEXT: s_addk_i32 s9, 0xfc10 +; VI-SDAG-NEXT: s_lshl_b32 s10, s9, 12 +; VI-SDAG-NEXT: s_or_b32 s8, s11, s8 +; VI-SDAG-NEXT: s_or_b32 s10, s7, s10 +; VI-SDAG-NEXT: s_cmp_lt_i32 s9, 1 +; VI-SDAG-NEXT: s_cselect_b32 s8, s8, s10 +; VI-SDAG-NEXT: s_and_b32 s10, s8, 7 +; VI-SDAG-NEXT: s_cmp_gt_i32 s10, 5 +; VI-SDAG-NEXT: s_cselect_b32 s11, 1, 0 +; VI-SDAG-NEXT: s_cmp_eq_u32 s10, 3 +; VI-SDAG-NEXT: s_cselect_b32 s10, 1, 0 +; VI-SDAG-NEXT: s_lshr_b32 s8, s8, 2 +; VI-SDAG-NEXT: s_or_b32 s10, s10, s11 +; VI-SDAG-NEXT: s_add_i32 s8, s8, s10 +; VI-SDAG-NEXT: s_cmp_lt_i32 s9, 31 +; VI-SDAG-NEXT: s_cselect_b32 s8, s8, 0x7c00 +; VI-SDAG-NEXT: s_cmp_lg_u32 s7, 0 +; VI-SDAG-NEXT: s_cselect_b32 s7, s6, 0x7c00 +; VI-SDAG-NEXT: s_cmpk_eq_i32 s9, 0x40f +; VI-SDAG-NEXT: s_cselect_b32 s7, s7, s8 +; VI-SDAG-NEXT: s_and_b32 s8, s5, 0x1ff +; VI-SDAG-NEXT: v_or_b32_e32 v0, s8, v0 +; VI-SDAG-NEXT: s_lshr_b32 s4, s4, 16 +; VI-SDAG-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0 +; VI-SDAG-NEXT: s_lshr_b32 s9, s5, 8 +; VI-SDAG-NEXT: s_bfe_u32 s10, s5, 0xb0014 +; VI-SDAG-NEXT: s_and_b32 s4, s4, 0x8000 +; VI-SDAG-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc +; VI-SDAG-NEXT: s_and_b32 s8, s9, 0xffe +; VI-SDAG-NEXT: s_sub_i32 s9, 0x3f1, s10 +; VI-SDAG-NEXT: s_or_b32 s4, s4, s7 +; VI-SDAG-NEXT: v_readfirstlane_b32 s7, v0 +; VI-SDAG-NEXT: v_med3_i32 v1, s9, 0, 13 +; VI-SDAG-NEXT: s_or_b32 s7, s8, s7 +; VI-SDAG-NEXT: v_readfirstlane_b32 s9, v1 +; VI-SDAG-NEXT: s_or_b32 s8, s7, 0x1000 +; VI-SDAG-NEXT: s_lshr_b32 s11, s8, s9 +; VI-SDAG-NEXT: s_lshl_b32 s4, s4, 16 +; VI-SDAG-NEXT: s_lshl_b32 s9, s11, s9 +; VI-SDAG-NEXT: s_cmp_lg_u32 s9, s8 +; VI-SDAG-NEXT: s_cselect_b32 s8, 1, 0 +; VI-SDAG-NEXT: s_addk_i32 s10, 0xfc10 +; VI-SDAG-NEXT: s_lshl_b32 s9, s10, 12 +; VI-SDAG-NEXT: s_or_b32 s8, s11, s8 +; VI-SDAG-NEXT: s_or_b32 s9, s7, s9 +; VI-SDAG-NEXT: s_cmp_lt_i32 s10, 1 +; VI-SDAG-NEXT: s_cselect_b32 s8, s8, s9 +; VI-SDAG-NEXT: s_and_b32 s9, s8, 7 +; VI-SDAG-NEXT: s_cmp_gt_i32 s9, 5 +; VI-SDAG-NEXT: s_cselect_b32 s11, 1, 0 +; VI-SDAG-NEXT: s_cmp_eq_u32 s9, 3 +; VI-SDAG-NEXT: s_cselect_b32 s9, 1, 0 +; VI-SDAG-NEXT: s_lshr_b32 s8, s8, 2 +; VI-SDAG-NEXT: s_or_b32 s9, s9, s11 +; VI-SDAG-NEXT: s_add_i32 s8, s8, s9 +; VI-SDAG-NEXT: s_cmp_lt_i32 s10, 31 +; VI-SDAG-NEXT: s_cselect_b32 s8, s8, 0x7c00 +; VI-SDAG-NEXT: s_cmp_lg_u32 s7, 0 +; VI-SDAG-NEXT: s_cselect_b32 s6, s6, 0x7c00 +; VI-SDAG-NEXT: s_cmpk_eq_i32 s10, 0x40f +; VI-SDAG-NEXT: s_cselect_b32 s6, s6, s8 +; VI-SDAG-NEXT: s_lshr_b32 s5, s5, 16 +; VI-SDAG-NEXT: s_and_b32 s5, s5, 0x8000 +; VI-SDAG-NEXT: s_or_b32 s5, s5, s6 +; VI-SDAG-NEXT: s_and_b32 s5, s5, 0xffff +; VI-SDAG-NEXT: s_or_b32 s4, s5, s4 +; VI-SDAG-NEXT: v_mov_b32_e32 v0, s4 +; VI-SDAG-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; VI-SDAG-NEXT: s_endpgm ; -; VI-GISEL-LABEL: fptrunc_v2f64_to_v2f16: +; VI-GISEL-LABEL: fptrunc_v2f64_to_v2f16_afn: ; VI-GISEL: ; %bb.0: ; %entry ; VI-GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 ; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0) @@ -702,29 +3265,109 @@ define amdgpu_kernel void @fptrunc_v2f64_to_v2f16( ; VI-GISEL-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; VI-GISEL-NEXT: s_endpgm ; -; GFX9-SDAG-LABEL: fptrunc_v2f64_to_v2f16: +; GFX9-SDAG-LABEL: fptrunc_v2f64_to_v2f16_afn: ; GFX9-SDAG: ; %bb.0: ; %entry -; GFX9-SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 -; GFX9-SDAG-NEXT: s_mov_b32 s7, 0xf000 -; GFX9-SDAG-NEXT: s_mov_b32 s6, -1 -; GFX9-SDAG-NEXT: s_mov_b32 s10, s6 -; GFX9-SDAG-NEXT: s_mov_b32 s11, s7 +; GFX9-SDAG-NEXT: s_load_dwordx4 s[8:11], s[4:5], 0x24 +; GFX9-SDAG-NEXT: s_mov_b32 s3, 0xf000 +; GFX9-SDAG-NEXT: s_mov_b32 s2, -1 +; GFX9-SDAG-NEXT: s_mov_b32 s6, s2 +; GFX9-SDAG-NEXT: s_mov_b32 s7, s3 ; GFX9-SDAG-NEXT: s_waitcnt lgkmcnt(0) -; GFX9-SDAG-NEXT: s_mov_b32 s8, s2 -; GFX9-SDAG-NEXT: s_mov_b32 s9, s3 -; GFX9-SDAG-NEXT: buffer_load_dwordx4 v[0:3], off, s[8:11], 0 -; GFX9-SDAG-NEXT: s_mov_b32 s4, s0 -; GFX9-SDAG-NEXT: s_mov_b32 s5, s1 +; GFX9-SDAG-NEXT: s_mov_b32 s4, s10 +; GFX9-SDAG-NEXT: s_mov_b32 s5, s11 +; GFX9-SDAG-NEXT: buffer_load_dwordx4 v[0:3], off, s[4:7], 0 +; GFX9-SDAG-NEXT: s_mov_b32 s0, s8 +; GFX9-SDAG-NEXT: s_mov_b32 s1, s9 +; GFX9-SDAG-NEXT: s_movk_i32 s4, 0x7e00 ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) -; GFX9-SDAG-NEXT: v_cvt_f32_f64_e32 v2, v[2:3] -; GFX9-SDAG-NEXT: v_cvt_f32_f64_e32 v0, v[0:1] -; GFX9-SDAG-NEXT: v_cvt_f16_f32_e32 v1, v2 -; GFX9-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 -; GFX9-SDAG-NEXT: v_pack_b32_f16 v0, v0, v1 -; GFX9-SDAG-NEXT: buffer_store_dword v0, off, s[4:7], 0 +; GFX9-SDAG-NEXT: v_readfirstlane_b32 s5, v3 +; GFX9-SDAG-NEXT: s_and_b32 s7, s5, 0x1ff +; GFX9-SDAG-NEXT: v_readfirstlane_b32 s6, v1 +; GFX9-SDAG-NEXT: v_or_b32_e32 v1, s7, v2 +; GFX9-SDAG-NEXT: s_lshr_b32 s8, s5, 8 +; GFX9-SDAG-NEXT: s_bfe_u32 s9, s5, 0xb0014 +; GFX9-SDAG-NEXT: v_cmp_ne_u32_e32 vcc, 0, v1 +; GFX9-SDAG-NEXT: s_and_b32 s7, s8, 0xffe +; GFX9-SDAG-NEXT: s_sub_i32 s8, 0x3f1, s9 +; GFX9-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; GFX9-SDAG-NEXT: v_med3_i32 v2, s8, 0, 13 +; GFX9-SDAG-NEXT: v_readfirstlane_b32 s8, v1 +; GFX9-SDAG-NEXT: s_or_b32 s7, s7, s8 +; GFX9-SDAG-NEXT: v_readfirstlane_b32 s10, v2 +; GFX9-SDAG-NEXT: s_or_b32 s8, s7, 0x1000 +; GFX9-SDAG-NEXT: s_lshr_b32 s11, s8, s10 +; GFX9-SDAG-NEXT: s_lshl_b32 s10, s11, s10 +; GFX9-SDAG-NEXT: s_cmp_lg_u32 s10, s8 +; GFX9-SDAG-NEXT: s_cselect_b32 s8, 1, 0 +; GFX9-SDAG-NEXT: s_addk_i32 s9, 0xfc10 +; GFX9-SDAG-NEXT: s_lshl_b32 s10, s9, 12 +; GFX9-SDAG-NEXT: s_or_b32 s8, s11, s8 +; GFX9-SDAG-NEXT: s_or_b32 s10, s7, s10 +; GFX9-SDAG-NEXT: s_cmp_lt_i32 s9, 1 +; GFX9-SDAG-NEXT: s_cselect_b32 s8, s8, s10 +; GFX9-SDAG-NEXT: s_and_b32 s10, s8, 7 +; GFX9-SDAG-NEXT: s_cmp_gt_i32 s10, 5 +; GFX9-SDAG-NEXT: s_cselect_b32 s11, 1, 0 +; GFX9-SDAG-NEXT: s_cmp_eq_u32 s10, 3 +; GFX9-SDAG-NEXT: s_cselect_b32 s10, 1, 0 +; GFX9-SDAG-NEXT: s_lshr_b32 s8, s8, 2 +; GFX9-SDAG-NEXT: s_or_b32 s10, s10, s11 +; GFX9-SDAG-NEXT: s_add_i32 s8, s8, s10 +; GFX9-SDAG-NEXT: s_cmp_lt_i32 s9, 31 +; GFX9-SDAG-NEXT: s_cselect_b32 s8, s8, 0x7c00 +; GFX9-SDAG-NEXT: s_cmp_lg_u32 s7, 0 +; GFX9-SDAG-NEXT: s_cselect_b32 s7, s4, 0x7c00 +; GFX9-SDAG-NEXT: s_cmpk_eq_i32 s9, 0x40f +; GFX9-SDAG-NEXT: s_cselect_b32 s7, s7, s8 +; GFX9-SDAG-NEXT: s_and_b32 s8, s6, 0x1ff +; GFX9-SDAG-NEXT: v_or_b32_e32 v0, s8, v0 +; GFX9-SDAG-NEXT: s_lshr_b32 s5, s5, 16 +; GFX9-SDAG-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0 +; GFX9-SDAG-NEXT: s_lshr_b32 s9, s6, 8 +; GFX9-SDAG-NEXT: s_bfe_u32 s10, s6, 0xb0014 +; GFX9-SDAG-NEXT: s_and_b32 s5, s5, 0x8000 +; GFX9-SDAG-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc +; GFX9-SDAG-NEXT: s_and_b32 s8, s9, 0xffe +; GFX9-SDAG-NEXT: s_sub_i32 s9, 0x3f1, s10 +; GFX9-SDAG-NEXT: s_or_b32 s5, s5, s7 +; GFX9-SDAG-NEXT: v_readfirstlane_b32 s7, v0 +; GFX9-SDAG-NEXT: v_med3_i32 v1, s9, 0, 13 +; GFX9-SDAG-NEXT: s_or_b32 s7, s8, s7 +; GFX9-SDAG-NEXT: v_readfirstlane_b32 s9, v1 +; GFX9-SDAG-NEXT: s_or_b32 s8, s7, 0x1000 +; GFX9-SDAG-NEXT: s_lshr_b32 s11, s8, s9 +; GFX9-SDAG-NEXT: s_lshl_b32 s9, s11, s9 +; GFX9-SDAG-NEXT: s_cmp_lg_u32 s9, s8 +; GFX9-SDAG-NEXT: s_cselect_b32 s8, 1, 0 +; GFX9-SDAG-NEXT: s_addk_i32 s10, 0xfc10 +; GFX9-SDAG-NEXT: s_lshl_b32 s9, s10, 12 +; GFX9-SDAG-NEXT: s_or_b32 s8, s11, s8 +; GFX9-SDAG-NEXT: s_or_b32 s9, s7, s9 +; GFX9-SDAG-NEXT: s_cmp_lt_i32 s10, 1 +; GFX9-SDAG-NEXT: s_cselect_b32 s8, s8, s9 +; GFX9-SDAG-NEXT: s_and_b32 s9, s8, 7 +; GFX9-SDAG-NEXT: s_cmp_gt_i32 s9, 5 +; GFX9-SDAG-NEXT: s_cselect_b32 s11, 1, 0 +; GFX9-SDAG-NEXT: s_cmp_eq_u32 s9, 3 +; GFX9-SDAG-NEXT: s_cselect_b32 s9, 1, 0 +; GFX9-SDAG-NEXT: s_lshr_b32 s8, s8, 2 +; GFX9-SDAG-NEXT: s_or_b32 s9, s9, s11 +; GFX9-SDAG-NEXT: s_add_i32 s8, s8, s9 +; GFX9-SDAG-NEXT: s_cmp_lt_i32 s10, 31 +; GFX9-SDAG-NEXT: s_cselect_b32 s8, s8, 0x7c00 +; GFX9-SDAG-NEXT: s_cmp_lg_u32 s7, 0 +; GFX9-SDAG-NEXT: s_cselect_b32 s4, s4, 0x7c00 +; GFX9-SDAG-NEXT: s_cmpk_eq_i32 s10, 0x40f +; GFX9-SDAG-NEXT: s_cselect_b32 s4, s4, s8 +; GFX9-SDAG-NEXT: s_lshr_b32 s6, s6, 16 +; GFX9-SDAG-NEXT: s_and_b32 s6, s6, 0x8000 +; GFX9-SDAG-NEXT: s_or_b32 s4, s6, s4 +; GFX9-SDAG-NEXT: s_pack_ll_b32_b16 s4, s4, s5 +; GFX9-SDAG-NEXT: v_mov_b32_e32 v0, s4 +; GFX9-SDAG-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX9-SDAG-NEXT: s_endpgm ; -; GFX9-GISEL-LABEL: fptrunc_v2f64_to_v2f16: +; GFX9-GISEL-LABEL: fptrunc_v2f64_to_v2f16_afn: ; GFX9-GISEL: ; %bb.0: ; %entry ; GFX9-GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 ; GFX9-GISEL-NEXT: s_waitcnt lgkmcnt(0) @@ -740,27 +3383,109 @@ define amdgpu_kernel void @fptrunc_v2f64_to_v2f16( ; GFX9-GISEL-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX9-GISEL-NEXT: s_endpgm ; -; GFX950-SDAG-LABEL: fptrunc_v2f64_to_v2f16: +; GFX950-SDAG-LABEL: fptrunc_v2f64_to_v2f16_afn: ; GFX950-SDAG: ; %bb.0: ; %entry -; GFX950-SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 -; GFX950-SDAG-NEXT: s_mov_b32 s7, 0xf000 -; GFX950-SDAG-NEXT: s_mov_b32 s6, -1 -; GFX950-SDAG-NEXT: s_mov_b32 s10, s6 -; GFX950-SDAG-NEXT: s_mov_b32 s11, s7 +; GFX950-SDAG-NEXT: s_load_dwordx4 s[8:11], s[4:5], 0x24 +; GFX950-SDAG-NEXT: s_mov_b32 s3, 0xf000 +; GFX950-SDAG-NEXT: s_mov_b32 s2, -1 +; GFX950-SDAG-NEXT: s_mov_b32 s6, s2 +; GFX950-SDAG-NEXT: s_mov_b32 s7, s3 ; GFX950-SDAG-NEXT: s_waitcnt lgkmcnt(0) -; GFX950-SDAG-NEXT: s_mov_b32 s8, s2 -; GFX950-SDAG-NEXT: s_mov_b32 s9, s3 -; GFX950-SDAG-NEXT: buffer_load_dwordx4 v[0:3], off, s[8:11], 0 -; GFX950-SDAG-NEXT: s_mov_b32 s4, s0 -; GFX950-SDAG-NEXT: s_mov_b32 s5, s1 +; GFX950-SDAG-NEXT: s_mov_b32 s4, s10 +; GFX950-SDAG-NEXT: s_mov_b32 s5, s11 +; GFX950-SDAG-NEXT: buffer_load_dwordx4 v[0:3], off, s[4:7], 0 +; GFX950-SDAG-NEXT: s_mov_b32 s0, s8 +; GFX950-SDAG-NEXT: s_mov_b32 s1, s9 +; GFX950-SDAG-NEXT: s_movk_i32 s4, 0x7e00 ; GFX950-SDAG-NEXT: s_waitcnt vmcnt(0) -; GFX950-SDAG-NEXT: v_cvt_f32_f64_e32 v2, v[2:3] -; GFX950-SDAG-NEXT: v_cvt_f32_f64_e32 v0, v[0:1] -; GFX950-SDAG-NEXT: v_cvt_pk_f16_f32 v0, v0, v2 -; GFX950-SDAG-NEXT: buffer_store_dword v0, off, s[4:7], 0 +; GFX950-SDAG-NEXT: v_readfirstlane_b32 s5, v3 +; GFX950-SDAG-NEXT: s_and_b32 s7, s5, 0x1ff +; GFX950-SDAG-NEXT: v_readfirstlane_b32 s6, v1 +; GFX950-SDAG-NEXT: v_or_b32_e32 v1, s7, v2 +; GFX950-SDAG-NEXT: s_lshr_b32 s8, s5, 8 +; GFX950-SDAG-NEXT: s_bfe_u32 s9, s5, 0xb0014 +; GFX950-SDAG-NEXT: v_cmp_ne_u32_e32 vcc, 0, v1 +; GFX950-SDAG-NEXT: s_and_b32 s7, s8, 0xffe +; GFX950-SDAG-NEXT: s_sub_i32 s8, 0x3f1, s9 +; GFX950-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 1, vcc +; GFX950-SDAG-NEXT: v_med3_i32 v2, s8, 0, 13 +; GFX950-SDAG-NEXT: v_readfirstlane_b32 s8, v1 +; GFX950-SDAG-NEXT: s_or_b32 s7, s7, s8 +; GFX950-SDAG-NEXT: v_readfirstlane_b32 s10, v2 +; GFX950-SDAG-NEXT: s_or_b32 s8, s7, 0x1000 +; GFX950-SDAG-NEXT: s_lshr_b32 s11, s8, s10 +; GFX950-SDAG-NEXT: s_lshl_b32 s10, s11, s10 +; GFX950-SDAG-NEXT: s_cmp_lg_u32 s10, s8 +; GFX950-SDAG-NEXT: s_cselect_b32 s8, 1, 0 +; GFX950-SDAG-NEXT: s_addk_i32 s9, 0xfc10 +; GFX950-SDAG-NEXT: s_lshl_b32 s10, s9, 12 +; GFX950-SDAG-NEXT: s_or_b32 s8, s11, s8 +; GFX950-SDAG-NEXT: s_or_b32 s10, s7, s10 +; GFX950-SDAG-NEXT: s_cmp_lt_i32 s9, 1 +; GFX950-SDAG-NEXT: s_cselect_b32 s8, s8, s10 +; GFX950-SDAG-NEXT: s_and_b32 s10, s8, 7 +; GFX950-SDAG-NEXT: s_cmp_gt_i32 s10, 5 +; GFX950-SDAG-NEXT: s_cselect_b32 s11, 1, 0 +; GFX950-SDAG-NEXT: s_cmp_eq_u32 s10, 3 +; GFX950-SDAG-NEXT: s_cselect_b32 s10, 1, 0 +; GFX950-SDAG-NEXT: s_lshr_b32 s8, s8, 2 +; GFX950-SDAG-NEXT: s_or_b32 s10, s10, s11 +; GFX950-SDAG-NEXT: s_add_i32 s8, s8, s10 +; GFX950-SDAG-NEXT: s_cmp_lt_i32 s9, 31 +; GFX950-SDAG-NEXT: s_cselect_b32 s8, s8, 0x7c00 +; GFX950-SDAG-NEXT: s_cmp_lg_u32 s7, 0 +; GFX950-SDAG-NEXT: s_cselect_b32 s7, s4, 0x7c00 +; GFX950-SDAG-NEXT: s_cmpk_eq_i32 s9, 0x40f +; GFX950-SDAG-NEXT: s_cselect_b32 s7, s7, s8 +; GFX950-SDAG-NEXT: s_and_b32 s8, s6, 0x1ff +; GFX950-SDAG-NEXT: v_or_b32_e32 v0, s8, v0 +; GFX950-SDAG-NEXT: s_lshr_b32 s5, s5, 16 +; GFX950-SDAG-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0 +; GFX950-SDAG-NEXT: s_lshr_b32 s9, s6, 8 +; GFX950-SDAG-NEXT: s_bfe_u32 s10, s6, 0xb0014 +; GFX950-SDAG-NEXT: s_and_b32 s5, s5, 0x8000 +; GFX950-SDAG-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc +; GFX950-SDAG-NEXT: s_and_b32 s8, s9, 0xffe +; GFX950-SDAG-NEXT: s_sub_i32 s9, 0x3f1, s10 +; GFX950-SDAG-NEXT: s_or_b32 s5, s5, s7 +; GFX950-SDAG-NEXT: v_readfirstlane_b32 s7, v0 +; GFX950-SDAG-NEXT: v_med3_i32 v1, s9, 0, 13 +; GFX950-SDAG-NEXT: s_or_b32 s7, s8, s7 +; GFX950-SDAG-NEXT: v_readfirstlane_b32 s9, v1 +; GFX950-SDAG-NEXT: s_or_b32 s8, s7, 0x1000 +; GFX950-SDAG-NEXT: s_lshr_b32 s11, s8, s9 +; GFX950-SDAG-NEXT: s_lshl_b32 s9, s11, s9 +; GFX950-SDAG-NEXT: s_cmp_lg_u32 s9, s8 +; GFX950-SDAG-NEXT: s_cselect_b32 s8, 1, 0 +; GFX950-SDAG-NEXT: s_addk_i32 s10, 0xfc10 +; GFX950-SDAG-NEXT: s_lshl_b32 s9, s10, 12 +; GFX950-SDAG-NEXT: s_or_b32 s8, s11, s8 +; GFX950-SDAG-NEXT: s_or_b32 s9, s7, s9 +; GFX950-SDAG-NEXT: s_cmp_lt_i32 s10, 1 +; GFX950-SDAG-NEXT: s_cselect_b32 s8, s8, s9 +; GFX950-SDAG-NEXT: s_and_b32 s9, s8, 7 +; GFX950-SDAG-NEXT: s_cmp_gt_i32 s9, 5 +; GFX950-SDAG-NEXT: s_cselect_b32 s11, 1, 0 +; GFX950-SDAG-NEXT: s_cmp_eq_u32 s9, 3 +; GFX950-SDAG-NEXT: s_cselect_b32 s9, 1, 0 +; GFX950-SDAG-NEXT: s_lshr_b32 s8, s8, 2 +; GFX950-SDAG-NEXT: s_or_b32 s9, s9, s11 +; GFX950-SDAG-NEXT: s_add_i32 s8, s8, s9 +; GFX950-SDAG-NEXT: s_cmp_lt_i32 s10, 31 +; GFX950-SDAG-NEXT: s_cselect_b32 s8, s8, 0x7c00 +; GFX950-SDAG-NEXT: s_cmp_lg_u32 s7, 0 +; GFX950-SDAG-NEXT: s_cselect_b32 s4, s4, 0x7c00 +; GFX950-SDAG-NEXT: s_cmpk_eq_i32 s10, 0x40f +; GFX950-SDAG-NEXT: s_cselect_b32 s4, s4, s8 +; GFX950-SDAG-NEXT: s_lshr_b32 s6, s6, 16 +; GFX950-SDAG-NEXT: s_and_b32 s6, s6, 0x8000 +; GFX950-SDAG-NEXT: s_or_b32 s4, s6, s4 +; GFX950-SDAG-NEXT: s_pack_ll_b32_b16 s4, s4, s5 +; GFX950-SDAG-NEXT: v_mov_b32_e32 v0, s4 +; GFX950-SDAG-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX950-SDAG-NEXT: s_endpgm ; -; GFX950-GISEL-LABEL: fptrunc_v2f64_to_v2f16: +; GFX950-GISEL-LABEL: fptrunc_v2f64_to_v2f16_afn: ; GFX950-GISEL: ; %bb.0: ; %entry ; GFX950-GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 ; GFX950-GISEL-NEXT: s_waitcnt lgkmcnt(0) @@ -776,7 +3501,7 @@ define amdgpu_kernel void @fptrunc_v2f64_to_v2f16( ; GFX950-GISEL-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX950-GISEL-NEXT: s_endpgm ; -; GFX11-SDAG-TRUE16-LABEL: fptrunc_v2f64_to_v2f16: +; GFX11-SDAG-TRUE16-LABEL: fptrunc_v2f64_to_v2f16_afn: ; GFX11-SDAG-TRUE16: ; %bb.0: ; %entry ; GFX11-SDAG-TRUE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 ; GFX11-SDAG-TRUE16-NEXT: s_mov_b32 s6, -1 @@ -786,21 +3511,113 @@ define amdgpu_kernel void @fptrunc_v2f64_to_v2f16( ; GFX11-SDAG-TRUE16-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-SDAG-TRUE16-NEXT: s_mov_b32 s8, s2 ; GFX11-SDAG-TRUE16-NEXT: s_mov_b32 s9, s3 -; GFX11-SDAG-TRUE16-NEXT: s_mov_b32 s4, s0 ; GFX11-SDAG-TRUE16-NEXT: buffer_load_b128 v[0:3], off, s[8:11], 0 -; GFX11-SDAG-TRUE16-NEXT: s_mov_b32 s5, s1 ; GFX11-SDAG-TRUE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-SDAG-TRUE16-NEXT: v_cvt_f32_f64_e32 v2, v[2:3] -; GFX11-SDAG-TRUE16-NEXT: v_cvt_f32_f64_e32 v1, v[0:1] -; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-SDAG-TRUE16-NEXT: v_cvt_f16_f32_e32 v0.l, v2 -; GFX11-SDAG-TRUE16-NEXT: v_cvt_f16_f32_e32 v0.h, v1 -; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-SDAG-TRUE16-NEXT: v_pack_b32_f16 v0, v0.h, v0.l +; GFX11-SDAG-TRUE16-NEXT: v_readfirstlane_b32 s2, v3 +; GFX11-SDAG-TRUE16-NEXT: s_and_b32 s3, s2, 0x1ff +; GFX11-SDAG-TRUE16-NEXT: s_lshr_b32 s5, s2, 8 +; GFX11-SDAG-TRUE16-NEXT: v_or_b32_e32 v2, s3, v2 +; GFX11-SDAG-TRUE16-NEXT: s_bfe_u32 s3, s2, 0xb0014 +; GFX11-SDAG-TRUE16-NEXT: s_and_b32 s5, s5, 0xffe +; GFX11-SDAG-TRUE16-NEXT: s_sub_i32 s4, 0x3f1, s3 +; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_2) +; GFX11-SDAG-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v2 +; GFX11-SDAG-TRUE16-NEXT: v_med3_i32 v3, s4, 0, 13 +; GFX11-SDAG-TRUE16-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc_lo +; GFX11-SDAG-TRUE16-NEXT: v_readfirstlane_b32 s8, v3 +; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(SALU_CYCLE_1) +; GFX11-SDAG-TRUE16-NEXT: v_readfirstlane_b32 s4, v2 +; GFX11-SDAG-TRUE16-NEXT: s_or_b32 s4, s5, s4 +; GFX11-SDAG-TRUE16-NEXT: s_or_b32 s5, s4, 0x1000 +; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) +; GFX11-SDAG-TRUE16-NEXT: s_lshr_b32 s9, s5, s8 +; GFX11-SDAG-TRUE16-NEXT: s_lshl_b32 s8, s9, s8 +; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_4) | instid1(SALU_CYCLE_1) +; GFX11-SDAG-TRUE16-NEXT: s_cmp_lg_u32 s8, s5 +; GFX11-SDAG-TRUE16-NEXT: s_cselect_b32 s5, 1, 0 +; GFX11-SDAG-TRUE16-NEXT: s_addk_i32 s3, 0xfc10 +; GFX11-SDAG-TRUE16-NEXT: s_or_b32 s5, s9, s5 +; GFX11-SDAG-TRUE16-NEXT: s_lshl_b32 s8, s3, 12 +; GFX11-SDAG-TRUE16-NEXT: s_or_b32 s8, s4, s8 +; GFX11-SDAG-TRUE16-NEXT: s_cmp_lt_i32 s3, 1 +; GFX11-SDAG-TRUE16-NEXT: s_cselect_b32 s5, s5, s8 +; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) +; GFX11-SDAG-TRUE16-NEXT: s_and_b32 s8, s5, 7 +; GFX11-SDAG-TRUE16-NEXT: s_cmp_gt_i32 s8, 5 +; GFX11-SDAG-TRUE16-NEXT: s_cselect_b32 s9, 1, 0 +; GFX11-SDAG-TRUE16-NEXT: s_cmp_eq_u32 s8, 3 +; GFX11-SDAG-TRUE16-NEXT: s_cselect_b32 s8, 1, 0 +; GFX11-SDAG-TRUE16-NEXT: s_lshr_b32 s5, s5, 2 +; GFX11-SDAG-TRUE16-NEXT: s_or_b32 s8, s8, s9 +; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; GFX11-SDAG-TRUE16-NEXT: s_add_i32 s5, s5, s8 +; GFX11-SDAG-TRUE16-NEXT: s_cmp_lt_i32 s3, 31 +; GFX11-SDAG-TRUE16-NEXT: s_movk_i32 s8, 0x7e00 +; GFX11-SDAG-TRUE16-NEXT: s_cselect_b32 s5, s5, 0x7c00 +; GFX11-SDAG-TRUE16-NEXT: s_cmp_lg_u32 s4, 0 +; GFX11-SDAG-TRUE16-NEXT: v_readfirstlane_b32 s4, v1 +; GFX11-SDAG-TRUE16-NEXT: s_cselect_b32 s9, s8, 0x7c00 +; GFX11-SDAG-TRUE16-NEXT: s_cmpk_eq_i32 s3, 0x40f +; GFX11-SDAG-TRUE16-NEXT: s_cselect_b32 s3, s9, s5 +; GFX11-SDAG-TRUE16-NEXT: s_and_b32 s5, s4, 0x1ff +; GFX11-SDAG-TRUE16-NEXT: s_lshr_b32 s10, s4, 8 +; GFX11-SDAG-TRUE16-NEXT: v_or_b32_e32 v0, s5, v0 +; GFX11-SDAG-TRUE16-NEXT: s_bfe_u32 s5, s4, 0xb0014 +; GFX11-SDAG-TRUE16-NEXT: s_and_b32 s10, s10, 0xffe +; GFX11-SDAG-TRUE16-NEXT: s_sub_i32 s9, 0x3f1, s5 +; GFX11-SDAG-TRUE16-NEXT: s_lshr_b32 s2, s2, 16 +; GFX11-SDAG-TRUE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0 +; GFX11-SDAG-TRUE16-NEXT: v_med3_i32 v1, s9, 0, 13 +; GFX11-SDAG-TRUE16-NEXT: s_and_b32 s2, s2, 0x8000 +; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) +; GFX11-SDAG-TRUE16-NEXT: s_or_b32 s2, s2, s3 +; GFX11-SDAG-TRUE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo +; GFX11-SDAG-TRUE16-NEXT: v_readfirstlane_b32 s11, v1 +; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(SALU_CYCLE_1) +; GFX11-SDAG-TRUE16-NEXT: v_readfirstlane_b32 s9, v0 +; GFX11-SDAG-TRUE16-NEXT: s_or_b32 s9, s10, s9 +; GFX11-SDAG-TRUE16-NEXT: s_or_b32 s10, s9, 0x1000 +; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) +; GFX11-SDAG-TRUE16-NEXT: s_lshr_b32 s12, s10, s11 +; GFX11-SDAG-TRUE16-NEXT: s_lshl_b32 s11, s12, s11 +; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_4) | instid1(SALU_CYCLE_1) +; GFX11-SDAG-TRUE16-NEXT: s_cmp_lg_u32 s11, s10 +; GFX11-SDAG-TRUE16-NEXT: s_cselect_b32 s3, 1, 0 +; GFX11-SDAG-TRUE16-NEXT: s_addk_i32 s5, 0xfc10 +; GFX11-SDAG-TRUE16-NEXT: s_or_b32 s3, s12, s3 +; GFX11-SDAG-TRUE16-NEXT: s_lshl_b32 s10, s5, 12 +; GFX11-SDAG-TRUE16-NEXT: s_or_b32 s10, s9, s10 +; GFX11-SDAG-TRUE16-NEXT: s_cmp_lt_i32 s5, 1 +; GFX11-SDAG-TRUE16-NEXT: s_cselect_b32 s3, s3, s10 +; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) +; GFX11-SDAG-TRUE16-NEXT: s_and_b32 s10, s3, 7 +; GFX11-SDAG-TRUE16-NEXT: s_cmp_gt_i32 s10, 5 +; GFX11-SDAG-TRUE16-NEXT: s_cselect_b32 s11, 1, 0 +; GFX11-SDAG-TRUE16-NEXT: s_cmp_eq_u32 s10, 3 +; GFX11-SDAG-TRUE16-NEXT: s_cselect_b32 s10, 1, 0 +; GFX11-SDAG-TRUE16-NEXT: s_lshr_b32 s3, s3, 2 +; GFX11-SDAG-TRUE16-NEXT: s_or_b32 s10, s10, s11 +; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; GFX11-SDAG-TRUE16-NEXT: s_add_i32 s3, s3, s10 +; GFX11-SDAG-TRUE16-NEXT: s_cmp_lt_i32 s5, 31 +; GFX11-SDAG-TRUE16-NEXT: s_cselect_b32 s3, s3, 0x7c00 +; GFX11-SDAG-TRUE16-NEXT: s_cmp_lg_u32 s9, 0 +; GFX11-SDAG-TRUE16-NEXT: s_cselect_b32 s8, s8, 0x7c00 +; GFX11-SDAG-TRUE16-NEXT: s_cmpk_eq_i32 s5, 0x40f +; GFX11-SDAG-TRUE16-NEXT: s_mov_b32 s5, s1 +; GFX11-SDAG-TRUE16-NEXT: s_cselect_b32 s3, s8, s3 +; GFX11-SDAG-TRUE16-NEXT: s_lshr_b32 s4, s4, 16 +; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) +; GFX11-SDAG-TRUE16-NEXT: s_and_b32 s4, s4, 0x8000 +; GFX11-SDAG-TRUE16-NEXT: s_or_b32 s3, s4, s3 +; GFX11-SDAG-TRUE16-NEXT: s_mov_b32 s4, s0 +; GFX11-SDAG-TRUE16-NEXT: s_pack_ll_b32_b16 s2, s3, s2 +; GFX11-SDAG-TRUE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; GFX11-SDAG-TRUE16-NEXT: v_mov_b32_e32 v0, s2 ; GFX11-SDAG-TRUE16-NEXT: buffer_store_b32 v0, off, s[4:7], 0 ; GFX11-SDAG-TRUE16-NEXT: s_endpgm ; -; GFX11-SDAG-FAKE16-LABEL: fptrunc_v2f64_to_v2f16: +; GFX11-SDAG-FAKE16-LABEL: fptrunc_v2f64_to_v2f16_afn: ; GFX11-SDAG-FAKE16: ; %bb.0: ; %entry ; GFX11-SDAG-FAKE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 ; GFX11-SDAG-FAKE16-NEXT: s_mov_b32 s6, -1 @@ -810,21 +3627,113 @@ define amdgpu_kernel void @fptrunc_v2f64_to_v2f16( ; GFX11-SDAG-FAKE16-NEXT: s_waitcnt lgkmcnt(0) ; GFX11-SDAG-FAKE16-NEXT: s_mov_b32 s8, s2 ; GFX11-SDAG-FAKE16-NEXT: s_mov_b32 s9, s3 -; GFX11-SDAG-FAKE16-NEXT: s_mov_b32 s4, s0 ; GFX11-SDAG-FAKE16-NEXT: buffer_load_b128 v[0:3], off, s[8:11], 0 -; GFX11-SDAG-FAKE16-NEXT: s_mov_b32 s5, s1 ; GFX11-SDAG-FAKE16-NEXT: s_waitcnt vmcnt(0) -; GFX11-SDAG-FAKE16-NEXT: v_cvt_f32_f64_e32 v2, v[2:3] -; GFX11-SDAG-FAKE16-NEXT: v_cvt_f32_f64_e32 v0, v[0:1] -; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) -; GFX11-SDAG-FAKE16-NEXT: v_cvt_f16_f32_e32 v1, v2 -; GFX11-SDAG-FAKE16-NEXT: v_cvt_f16_f32_e32 v0, v0 -; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-SDAG-FAKE16-NEXT: v_pack_b32_f16 v0, v0, v1 +; GFX11-SDAG-FAKE16-NEXT: v_readfirstlane_b32 s2, v3 +; GFX11-SDAG-FAKE16-NEXT: s_and_b32 s3, s2, 0x1ff +; GFX11-SDAG-FAKE16-NEXT: s_lshr_b32 s5, s2, 8 +; GFX11-SDAG-FAKE16-NEXT: v_or_b32_e32 v2, s3, v2 +; GFX11-SDAG-FAKE16-NEXT: s_bfe_u32 s3, s2, 0xb0014 +; GFX11-SDAG-FAKE16-NEXT: s_and_b32 s5, s5, 0xffe +; GFX11-SDAG-FAKE16-NEXT: s_sub_i32 s4, 0x3f1, s3 +; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_2) +; GFX11-SDAG-FAKE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v2 +; GFX11-SDAG-FAKE16-NEXT: v_med3_i32 v3, s4, 0, 13 +; GFX11-SDAG-FAKE16-NEXT: v_cndmask_b32_e64 v2, 0, 1, vcc_lo +; GFX11-SDAG-FAKE16-NEXT: v_readfirstlane_b32 s8, v3 +; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(SALU_CYCLE_1) +; GFX11-SDAG-FAKE16-NEXT: v_readfirstlane_b32 s4, v2 +; GFX11-SDAG-FAKE16-NEXT: s_or_b32 s4, s5, s4 +; GFX11-SDAG-FAKE16-NEXT: s_or_b32 s5, s4, 0x1000 +; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) +; GFX11-SDAG-FAKE16-NEXT: s_lshr_b32 s9, s5, s8 +; GFX11-SDAG-FAKE16-NEXT: s_lshl_b32 s8, s9, s8 +; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_4) | instid1(SALU_CYCLE_1) +; GFX11-SDAG-FAKE16-NEXT: s_cmp_lg_u32 s8, s5 +; GFX11-SDAG-FAKE16-NEXT: s_cselect_b32 s5, 1, 0 +; GFX11-SDAG-FAKE16-NEXT: s_addk_i32 s3, 0xfc10 +; GFX11-SDAG-FAKE16-NEXT: s_or_b32 s5, s9, s5 +; GFX11-SDAG-FAKE16-NEXT: s_lshl_b32 s8, s3, 12 +; GFX11-SDAG-FAKE16-NEXT: s_or_b32 s8, s4, s8 +; GFX11-SDAG-FAKE16-NEXT: s_cmp_lt_i32 s3, 1 +; GFX11-SDAG-FAKE16-NEXT: s_cselect_b32 s5, s5, s8 +; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) +; GFX11-SDAG-FAKE16-NEXT: s_and_b32 s8, s5, 7 +; GFX11-SDAG-FAKE16-NEXT: s_cmp_gt_i32 s8, 5 +; GFX11-SDAG-FAKE16-NEXT: s_cselect_b32 s9, 1, 0 +; GFX11-SDAG-FAKE16-NEXT: s_cmp_eq_u32 s8, 3 +; GFX11-SDAG-FAKE16-NEXT: s_cselect_b32 s8, 1, 0 +; GFX11-SDAG-FAKE16-NEXT: s_lshr_b32 s5, s5, 2 +; GFX11-SDAG-FAKE16-NEXT: s_or_b32 s8, s8, s9 +; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; GFX11-SDAG-FAKE16-NEXT: s_add_i32 s5, s5, s8 +; GFX11-SDAG-FAKE16-NEXT: s_cmp_lt_i32 s3, 31 +; GFX11-SDAG-FAKE16-NEXT: s_movk_i32 s8, 0x7e00 +; GFX11-SDAG-FAKE16-NEXT: s_cselect_b32 s5, s5, 0x7c00 +; GFX11-SDAG-FAKE16-NEXT: s_cmp_lg_u32 s4, 0 +; GFX11-SDAG-FAKE16-NEXT: v_readfirstlane_b32 s4, v1 +; GFX11-SDAG-FAKE16-NEXT: s_cselect_b32 s9, s8, 0x7c00 +; GFX11-SDAG-FAKE16-NEXT: s_cmpk_eq_i32 s3, 0x40f +; GFX11-SDAG-FAKE16-NEXT: s_cselect_b32 s3, s9, s5 +; GFX11-SDAG-FAKE16-NEXT: s_and_b32 s5, s4, 0x1ff +; GFX11-SDAG-FAKE16-NEXT: s_lshr_b32 s10, s4, 8 +; GFX11-SDAG-FAKE16-NEXT: v_or_b32_e32 v0, s5, v0 +; GFX11-SDAG-FAKE16-NEXT: s_bfe_u32 s5, s4, 0xb0014 +; GFX11-SDAG-FAKE16-NEXT: s_and_b32 s10, s10, 0xffe +; GFX11-SDAG-FAKE16-NEXT: s_sub_i32 s9, 0x3f1, s5 +; GFX11-SDAG-FAKE16-NEXT: s_lshr_b32 s2, s2, 16 +; GFX11-SDAG-FAKE16-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0 +; GFX11-SDAG-FAKE16-NEXT: v_med3_i32 v1, s9, 0, 13 +; GFX11-SDAG-FAKE16-NEXT: s_and_b32 s2, s2, 0x8000 +; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_1) | instid1(VALU_DEP_2) +; GFX11-SDAG-FAKE16-NEXT: s_or_b32 s2, s2, s3 +; GFX11-SDAG-FAKE16-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc_lo +; GFX11-SDAG-FAKE16-NEXT: v_readfirstlane_b32 s11, v1 +; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(SALU_CYCLE_1) +; GFX11-SDAG-FAKE16-NEXT: v_readfirstlane_b32 s9, v0 +; GFX11-SDAG-FAKE16-NEXT: s_or_b32 s9, s10, s9 +; GFX11-SDAG-FAKE16-NEXT: s_or_b32 s10, s9, 0x1000 +; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) +; GFX11-SDAG-FAKE16-NEXT: s_lshr_b32 s12, s10, s11 +; GFX11-SDAG-FAKE16-NEXT: s_lshl_b32 s11, s12, s11 +; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_4) | instid1(SALU_CYCLE_1) +; GFX11-SDAG-FAKE16-NEXT: s_cmp_lg_u32 s11, s10 +; GFX11-SDAG-FAKE16-NEXT: s_cselect_b32 s3, 1, 0 +; GFX11-SDAG-FAKE16-NEXT: s_addk_i32 s5, 0xfc10 +; GFX11-SDAG-FAKE16-NEXT: s_or_b32 s3, s12, s3 +; GFX11-SDAG-FAKE16-NEXT: s_lshl_b32 s10, s5, 12 +; GFX11-SDAG-FAKE16-NEXT: s_or_b32 s10, s9, s10 +; GFX11-SDAG-FAKE16-NEXT: s_cmp_lt_i32 s5, 1 +; GFX11-SDAG-FAKE16-NEXT: s_cselect_b32 s3, s3, s10 +; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) +; GFX11-SDAG-FAKE16-NEXT: s_and_b32 s10, s3, 7 +; GFX11-SDAG-FAKE16-NEXT: s_cmp_gt_i32 s10, 5 +; GFX11-SDAG-FAKE16-NEXT: s_cselect_b32 s11, 1, 0 +; GFX11-SDAG-FAKE16-NEXT: s_cmp_eq_u32 s10, 3 +; GFX11-SDAG-FAKE16-NEXT: s_cselect_b32 s10, 1, 0 +; GFX11-SDAG-FAKE16-NEXT: s_lshr_b32 s3, s3, 2 +; GFX11-SDAG-FAKE16-NEXT: s_or_b32 s10, s10, s11 +; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; GFX11-SDAG-FAKE16-NEXT: s_add_i32 s3, s3, s10 +; GFX11-SDAG-FAKE16-NEXT: s_cmp_lt_i32 s5, 31 +; GFX11-SDAG-FAKE16-NEXT: s_cselect_b32 s3, s3, 0x7c00 +; GFX11-SDAG-FAKE16-NEXT: s_cmp_lg_u32 s9, 0 +; GFX11-SDAG-FAKE16-NEXT: s_cselect_b32 s8, s8, 0x7c00 +; GFX11-SDAG-FAKE16-NEXT: s_cmpk_eq_i32 s5, 0x40f +; GFX11-SDAG-FAKE16-NEXT: s_mov_b32 s5, s1 +; GFX11-SDAG-FAKE16-NEXT: s_cselect_b32 s3, s8, s3 +; GFX11-SDAG-FAKE16-NEXT: s_lshr_b32 s4, s4, 16 +; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) +; GFX11-SDAG-FAKE16-NEXT: s_and_b32 s4, s4, 0x8000 +; GFX11-SDAG-FAKE16-NEXT: s_or_b32 s3, s4, s3 +; GFX11-SDAG-FAKE16-NEXT: s_mov_b32 s4, s0 +; GFX11-SDAG-FAKE16-NEXT: s_pack_ll_b32_b16 s2, s3, s2 +; GFX11-SDAG-FAKE16-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; GFX11-SDAG-FAKE16-NEXT: v_mov_b32_e32 v0, s2 ; GFX11-SDAG-FAKE16-NEXT: buffer_store_b32 v0, off, s[4:7], 0 ; GFX11-SDAG-FAKE16-NEXT: s_endpgm ; -; GFX11-GISEL-TRUE16-LABEL: fptrunc_v2f64_to_v2f16: +; GFX11-GISEL-TRUE16-LABEL: fptrunc_v2f64_to_v2f16_afn: ; GFX11-GISEL-TRUE16: ; %bb.0: ; %entry ; GFX11-GISEL-TRUE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 ; GFX11-GISEL-TRUE16-NEXT: s_waitcnt lgkmcnt(0) @@ -842,7 +3751,7 @@ define amdgpu_kernel void @fptrunc_v2f64_to_v2f16( ; GFX11-GISEL-TRUE16-NEXT: buffer_store_b32 v0, off, s[0:3], 0 ; GFX11-GISEL-TRUE16-NEXT: s_endpgm ; -; GFX11-GISEL-FAKE16-LABEL: fptrunc_v2f64_to_v2f16: +; GFX11-GISEL-FAKE16-LABEL: fptrunc_v2f64_to_v2f16_afn: ; GFX11-GISEL-FAKE16: ; %bb.0: ; %entry ; GFX11-GISEL-FAKE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 ; GFX11-GISEL-FAKE16-NEXT: s_waitcnt lgkmcnt(0) @@ -863,7 +3772,7 @@ define amdgpu_kernel void @fptrunc_v2f64_to_v2f16( ptr addrspace(1) %a) { entry: %a.val = load <2 x double>, ptr addrspace(1) %a - %r.val = fptrunc <2 x double> %a.val to <2 x half> + %r.val = fptrunc afn <2 x double> %a.val to <2 x half> store <2 x half> %r.val, ptr addrspace(1) %r ret void } diff --git a/llvm/test/CodeGen/AMDGPU/fptrunc.ll b/llvm/test/CodeGen/AMDGPU/fptrunc.ll index 2bd3659..4f8eab1 100644 --- a/llvm/test/CodeGen/AMDGPU/fptrunc.ll +++ b/llvm/test/CodeGen/AMDGPU/fptrunc.ll @@ -3,17 +3,15 @@ ; RUN: llc -mtriple=amdgcn -mcpu=tonga -global-isel=0 -mattr=-flat-for-global < %s | FileCheck -check-prefixes=VI-SDAG,VI-SAFE-SDAG %s ; RUN: llc -mtriple=amdgcn -mcpu=tonga -global-isel=1 -mattr=-flat-for-global < %s | FileCheck -check-prefixes=VI-GISEL,VI-SAFE-GISEL %s ; RUN: llc -mtriple=amdgcn -mcpu=tonga -global-isel=0 -mattr=-flat-for-global -enable-unsafe-fp-math < %s | FileCheck -check-prefixes=VI-SDAG,VI-UNSAFE-SDAG %s -; RUN: llc -mtriple=amdgcn -mcpu=tonga -global-isel=1 -mattr=-flat-for-global -enable-unsafe-fp-math < %s | FileCheck -check-prefixes=VI-GISEL,VI-UNSAFE-GISEL %s ; RUN: llc -mtriple=amdgcn -mcpu=gfx1030 -global-isel=0 -mattr=-flat-for-global < %s | FileCheck -check-prefixes=GFX10-SDAG,GFX10-SAFE-SDAG %s ; RUN: llc -mtriple=amdgcn -mcpu=gfx1030 -global-isel=1 -mattr=-flat-for-global < %s | FileCheck -check-prefixes=GFX10-GISEL,GFX10-SAFE-GISEL %s ; RUN: llc -mtriple=amdgcn -mcpu=gfx1030 -global-isel=0 -mattr=-flat-for-global -enable-unsafe-fp-math < %s | FileCheck -check-prefixes=GFX10-SDAG,GFX10-UNSAFE-SDAG %s -; RUN: llc -mtriple=amdgcn -mcpu=gfx1030 -global-isel=1 -mattr=-flat-for-global -enable-unsafe-fp-math < %s | FileCheck -check-prefixes=GFX10-GISEL,GFX10-UNSAFE-GISEL %s ; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -global-isel=0 -mattr=-flat-for-global < %s | FileCheck -check-prefixes=GFX11-SDAG,GFX11-SAFE-SDAG %s ; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -global-isel=1 -mattr=-flat-for-global < %s | FileCheck -check-prefixes=GFX11-GISEL,GFX11-SAFE-GISEL %s ; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -global-isel=0 -mattr=-flat-for-global,+real-true16 -enable-unsafe-fp-math < %s | FileCheck -check-prefixes=GFX11-SDAG,GFX11-UNSAFE-DAG-TRUE16 %s ; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -global-isel=0 -mattr=-flat-for-global,-real-true16 -enable-unsafe-fp-math < %s | FileCheck -check-prefixes=GFX11-SDAG,GFX11-UNSAFE-DAG-FAKE16 %s -; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -global-isel=1 -mattr=-flat-for-global,+real-true16 -enable-unsafe-fp-math < %s | FileCheck -check-prefixes=GFX11-GISEL,GFX11-UNSAFE-GISEL-TRUE16 %s -; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -global-isel=1 -mattr=-flat-for-global,-real-true16 -enable-unsafe-fp-math < %s | FileCheck -check-prefixes=GFX11-GISEL,GFX11-UNSAFE-GISEL-FAKE16 %s +; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -global-isel=1 -mattr=-flat-for-global,+real-true16 < %s | FileCheck -check-prefixes=GFX11-GISEL,GFX11-UNSAFE-GISEL-TRUE16 %s +; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -global-isel=1 -mattr=-flat-for-global,-real-true16 < %s | FileCheck -check-prefixes=GFX11-GISEL,GFX11-UNSAFE-GISEL-FAKE16 %s define amdgpu_kernel void @fptrunc_f64_to_f32(ptr addrspace(1) %out, double %in) { ; SI-LABEL: fptrunc_f64_to_f32: @@ -94,6 +92,85 @@ define amdgpu_kernel void @fptrunc_f64_to_f32(ptr addrspace(1) %out, double %in) ret void } +define amdgpu_kernel void @fptrunc_f64_to_f32_afn(ptr addrspace(1) %out, double %in) { +; SI-LABEL: fptrunc_f64_to_f32_afn: +; SI: ; %bb.0: +; SI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x9 +; SI-NEXT: s_mov_b32 s7, 0xf000 +; SI-NEXT: s_mov_b32 s6, -1 +; SI-NEXT: s_waitcnt lgkmcnt(0) +; SI-NEXT: s_mov_b32 s4, s0 +; SI-NEXT: s_mov_b32 s5, s1 +; SI-NEXT: v_cvt_f32_f64_e32 v0, s[2:3] +; SI-NEXT: buffer_store_dword v0, off, s[4:7], 0 +; SI-NEXT: s_endpgm +; +; VI-SDAG-LABEL: fptrunc_f64_to_f32_afn: +; VI-SDAG: ; %bb.0: +; VI-SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 +; VI-SDAG-NEXT: s_mov_b32 s7, 0xf000 +; VI-SDAG-NEXT: s_mov_b32 s6, -1 +; VI-SDAG-NEXT: s_waitcnt lgkmcnt(0) +; VI-SDAG-NEXT: v_cvt_f32_f64_e32 v0, s[2:3] +; VI-SDAG-NEXT: s_mov_b32 s4, s0 +; VI-SDAG-NEXT: s_mov_b32 s5, s1 +; VI-SDAG-NEXT: buffer_store_dword v0, off, s[4:7], 0 +; VI-SDAG-NEXT: s_endpgm +; +; VI-GISEL-LABEL: fptrunc_f64_to_f32_afn: +; VI-GISEL: ; %bb.0: +; VI-GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 +; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0) +; VI-GISEL-NEXT: v_cvt_f32_f64_e32 v0, s[2:3] +; VI-GISEL-NEXT: s_mov_b32 s2, -1 +; VI-GISEL-NEXT: s_mov_b32 s3, 0xf000 +; VI-GISEL-NEXT: buffer_store_dword v0, off, s[0:3], 0 +; VI-GISEL-NEXT: s_endpgm +; +; GFX10-SDAG-LABEL: fptrunc_f64_to_f32_afn: +; GFX10-SDAG: ; %bb.0: +; GFX10-SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 +; GFX10-SDAG-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-SDAG-NEXT: v_cvt_f32_f64_e32 v0, s[2:3] +; GFX10-SDAG-NEXT: s_mov_b32 s3, 0x31016000 +; GFX10-SDAG-NEXT: s_mov_b32 s2, -1 +; GFX10-SDAG-NEXT: buffer_store_dword v0, off, s[0:3], 0 +; GFX10-SDAG-NEXT: s_endpgm +; +; GFX10-GISEL-LABEL: fptrunc_f64_to_f32_afn: +; GFX10-GISEL: ; %bb.0: +; GFX10-GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 +; GFX10-GISEL-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-GISEL-NEXT: v_cvt_f32_f64_e32 v0, s[2:3] +; GFX10-GISEL-NEXT: s_mov_b32 s2, -1 +; GFX10-GISEL-NEXT: s_mov_b32 s3, 0x31016000 +; GFX10-GISEL-NEXT: buffer_store_dword v0, off, s[0:3], 0 +; GFX10-GISEL-NEXT: s_endpgm +; +; GFX11-SDAG-LABEL: fptrunc_f64_to_f32_afn: +; GFX11-SDAG: ; %bb.0: +; GFX11-SDAG-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 +; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-SDAG-NEXT: v_cvt_f32_f64_e32 v0, s[2:3] +; GFX11-SDAG-NEXT: s_mov_b32 s3, 0x31016000 +; GFX11-SDAG-NEXT: s_mov_b32 s2, -1 +; GFX11-SDAG-NEXT: buffer_store_b32 v0, off, s[0:3], 0 +; GFX11-SDAG-NEXT: s_endpgm +; +; GFX11-GISEL-LABEL: fptrunc_f64_to_f32_afn: +; GFX11-GISEL: ; %bb.0: +; GFX11-GISEL-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 +; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-GISEL-NEXT: v_cvt_f32_f64_e32 v0, s[2:3] +; GFX11-GISEL-NEXT: s_mov_b32 s2, -1 +; GFX11-GISEL-NEXT: s_mov_b32 s3, 0x31016000 +; GFX11-GISEL-NEXT: buffer_store_b32 v0, off, s[0:3], 0 +; GFX11-GISEL-NEXT: s_endpgm + %result = fptrunc afn double %in to float + store float %result, ptr addrspace(1) %out + ret void +} + define amdgpu_kernel void @fptrunc_f64_to_f16(ptr addrspace(1) %out, double %in) { ; SI-LABEL: fptrunc_f64_to_f16: ; SI: ; %bb.0: @@ -203,56 +280,56 @@ define amdgpu_kernel void @fptrunc_f64_to_f16(ptr addrspace(1) %out, double %in) ; VI-SAFE-SDAG-NEXT: buffer_store_short v0, off, s[0:3], 0 ; VI-SAFE-SDAG-NEXT: s_endpgm ; -; VI-SAFE-GISEL-LABEL: fptrunc_f64_to_f16: -; VI-SAFE-GISEL: ; %bb.0: -; VI-SAFE-GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 -; VI-SAFE-GISEL-NEXT: s_waitcnt lgkmcnt(0) -; VI-SAFE-GISEL-NEXT: s_bfe_u32 s4, s3, 0xb0014 -; VI-SAFE-GISEL-NEXT: s_lshr_b32 s5, s3, 8 -; VI-SAFE-GISEL-NEXT: s_and_b32 s6, s3, 0x1ff -; VI-SAFE-GISEL-NEXT: s_addk_i32 s4, 0xfc10 -; VI-SAFE-GISEL-NEXT: s_and_b32 s5, s5, 0xffe -; VI-SAFE-GISEL-NEXT: s_or_b32 s2, s6, s2 -; VI-SAFE-GISEL-NEXT: s_cmp_lg_u32 s2, 0 -; VI-SAFE-GISEL-NEXT: s_cselect_b32 s2, 1, 0 -; VI-SAFE-GISEL-NEXT: s_or_b32 s2, s5, s2 -; VI-SAFE-GISEL-NEXT: s_cmp_lg_u32 s2, 0 -; VI-SAFE-GISEL-NEXT: s_cselect_b32 s5, 1, 0 -; VI-SAFE-GISEL-NEXT: s_sub_i32 s7, 1, s4 -; VI-SAFE-GISEL-NEXT: s_lshl_b32 s6, s4, 12 -; VI-SAFE-GISEL-NEXT: s_max_i32 s7, s7, 0 -; VI-SAFE-GISEL-NEXT: s_or_b32 s6, s2, s6 -; VI-SAFE-GISEL-NEXT: s_min_i32 s7, s7, 13 -; VI-SAFE-GISEL-NEXT: s_bitset1_b32 s2, 12 -; VI-SAFE-GISEL-NEXT: s_lshl_b32 s5, s5, 9 -; VI-SAFE-GISEL-NEXT: s_lshr_b32 s8, s2, s7 -; VI-SAFE-GISEL-NEXT: s_or_b32 s5, s5, 0x7c00 -; VI-SAFE-GISEL-NEXT: s_lshl_b32 s7, s8, s7 -; VI-SAFE-GISEL-NEXT: s_cmp_lg_u32 s7, s2 -; VI-SAFE-GISEL-NEXT: s_cselect_b32 s2, 1, 0 -; VI-SAFE-GISEL-NEXT: s_or_b32 s2, s8, s2 -; VI-SAFE-GISEL-NEXT: s_cmp_lt_i32 s4, 1 -; VI-SAFE-GISEL-NEXT: s_cselect_b32 s2, s2, s6 -; VI-SAFE-GISEL-NEXT: s_and_b32 s6, s2, 7 -; VI-SAFE-GISEL-NEXT: s_lshr_b32 s2, s2, 2 -; VI-SAFE-GISEL-NEXT: s_cmp_eq_u32 s6, 3 -; VI-SAFE-GISEL-NEXT: s_cselect_b32 s7, 1, 0 -; VI-SAFE-GISEL-NEXT: s_cmp_gt_i32 s6, 5 -; VI-SAFE-GISEL-NEXT: s_cselect_b32 s6, 1, 0 -; VI-SAFE-GISEL-NEXT: s_or_b32 s6, s7, s6 -; VI-SAFE-GISEL-NEXT: s_add_i32 s2, s2, s6 -; VI-SAFE-GISEL-NEXT: s_cmp_gt_i32 s4, 30 -; VI-SAFE-GISEL-NEXT: s_cselect_b32 s2, 0x7c00, s2 -; VI-SAFE-GISEL-NEXT: s_cmpk_eq_i32 s4, 0x40f -; VI-SAFE-GISEL-NEXT: s_cselect_b32 s2, s5, s2 -; VI-SAFE-GISEL-NEXT: s_lshr_b32 s3, s3, 16 -; VI-SAFE-GISEL-NEXT: s_and_b32 s3, s3, 0x8000 -; VI-SAFE-GISEL-NEXT: s_or_b32 s2, s3, s2 -; VI-SAFE-GISEL-NEXT: v_mov_b32_e32 v0, s2 -; VI-SAFE-GISEL-NEXT: s_mov_b32 s2, -1 -; VI-SAFE-GISEL-NEXT: s_mov_b32 s3, 0xf000 -; VI-SAFE-GISEL-NEXT: buffer_store_short v0, off, s[0:3], 0 -; VI-SAFE-GISEL-NEXT: s_endpgm +; VI-GISEL-LABEL: fptrunc_f64_to_f16: +; VI-GISEL: ; %bb.0: +; VI-GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 +; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0) +; VI-GISEL-NEXT: s_bfe_u32 s4, s3, 0xb0014 +; VI-GISEL-NEXT: s_lshr_b32 s5, s3, 8 +; VI-GISEL-NEXT: s_and_b32 s6, s3, 0x1ff +; VI-GISEL-NEXT: s_addk_i32 s4, 0xfc10 +; VI-GISEL-NEXT: s_and_b32 s5, s5, 0xffe +; VI-GISEL-NEXT: s_or_b32 s2, s6, s2 +; VI-GISEL-NEXT: s_cmp_lg_u32 s2, 0 +; VI-GISEL-NEXT: s_cselect_b32 s2, 1, 0 +; VI-GISEL-NEXT: s_or_b32 s2, s5, s2 +; VI-GISEL-NEXT: s_cmp_lg_u32 s2, 0 +; VI-GISEL-NEXT: s_cselect_b32 s5, 1, 0 +; VI-GISEL-NEXT: s_sub_i32 s7, 1, s4 +; VI-GISEL-NEXT: s_lshl_b32 s6, s4, 12 +; VI-GISEL-NEXT: s_max_i32 s7, s7, 0 +; VI-GISEL-NEXT: s_or_b32 s6, s2, s6 +; VI-GISEL-NEXT: s_min_i32 s7, s7, 13 +; VI-GISEL-NEXT: s_bitset1_b32 s2, 12 +; VI-GISEL-NEXT: s_lshl_b32 s5, s5, 9 +; VI-GISEL-NEXT: s_lshr_b32 s8, s2, s7 +; VI-GISEL-NEXT: s_or_b32 s5, s5, 0x7c00 +; VI-GISEL-NEXT: s_lshl_b32 s7, s8, s7 +; VI-GISEL-NEXT: s_cmp_lg_u32 s7, s2 +; VI-GISEL-NEXT: s_cselect_b32 s2, 1, 0 +; VI-GISEL-NEXT: s_or_b32 s2, s8, s2 +; VI-GISEL-NEXT: s_cmp_lt_i32 s4, 1 +; VI-GISEL-NEXT: s_cselect_b32 s2, s2, s6 +; VI-GISEL-NEXT: s_and_b32 s6, s2, 7 +; VI-GISEL-NEXT: s_lshr_b32 s2, s2, 2 +; VI-GISEL-NEXT: s_cmp_eq_u32 s6, 3 +; VI-GISEL-NEXT: s_cselect_b32 s7, 1, 0 +; VI-GISEL-NEXT: s_cmp_gt_i32 s6, 5 +; VI-GISEL-NEXT: s_cselect_b32 s6, 1, 0 +; VI-GISEL-NEXT: s_or_b32 s6, s7, s6 +; VI-GISEL-NEXT: s_add_i32 s2, s2, s6 +; VI-GISEL-NEXT: s_cmp_gt_i32 s4, 30 +; VI-GISEL-NEXT: s_cselect_b32 s2, 0x7c00, s2 +; VI-GISEL-NEXT: s_cmpk_eq_i32 s4, 0x40f +; VI-GISEL-NEXT: s_cselect_b32 s2, s5, s2 +; VI-GISEL-NEXT: s_lshr_b32 s3, s3, 16 +; VI-GISEL-NEXT: s_and_b32 s3, s3, 0x8000 +; VI-GISEL-NEXT: s_or_b32 s2, s3, s2 +; VI-GISEL-NEXT: v_mov_b32_e32 v0, s2 +; VI-GISEL-NEXT: s_mov_b32 s2, -1 +; VI-GISEL-NEXT: s_mov_b32 s3, 0xf000 +; VI-GISEL-NEXT: buffer_store_short v0, off, s[0:3], 0 +; VI-GISEL-NEXT: s_endpgm ; ; VI-UNSAFE-SDAG-LABEL: fptrunc_f64_to_f16: ; VI-UNSAFE-SDAG: ; %bb.0: @@ -265,17 +342,6 @@ define amdgpu_kernel void @fptrunc_f64_to_f16(ptr addrspace(1) %out, double %in) ; VI-UNSAFE-SDAG-NEXT: buffer_store_short v0, off, s[0:3], 0 ; VI-UNSAFE-SDAG-NEXT: s_endpgm ; -; VI-UNSAFE-GISEL-LABEL: fptrunc_f64_to_f16: -; VI-UNSAFE-GISEL: ; %bb.0: -; VI-UNSAFE-GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 -; VI-UNSAFE-GISEL-NEXT: s_waitcnt lgkmcnt(0) -; VI-UNSAFE-GISEL-NEXT: v_cvt_f32_f64_e32 v0, s[2:3] -; VI-UNSAFE-GISEL-NEXT: s_mov_b32 s2, -1 -; VI-UNSAFE-GISEL-NEXT: s_mov_b32 s3, 0xf000 -; VI-UNSAFE-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0 -; VI-UNSAFE-GISEL-NEXT: buffer_store_short v0, off, s[0:3], 0 -; VI-UNSAFE-GISEL-NEXT: s_endpgm -; ; GFX10-SAFE-SDAG-LABEL: fptrunc_f64_to_f16: ; GFX10-SAFE-SDAG: ; %bb.0: ; GFX10-SAFE-SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 @@ -328,56 +394,56 @@ define amdgpu_kernel void @fptrunc_f64_to_f16(ptr addrspace(1) %out, double %in) ; GFX10-SAFE-SDAG-NEXT: buffer_store_short v0, off, s[0:3], 0 ; GFX10-SAFE-SDAG-NEXT: s_endpgm ; -; GFX10-SAFE-GISEL-LABEL: fptrunc_f64_to_f16: -; GFX10-SAFE-GISEL: ; %bb.0: -; GFX10-SAFE-GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 -; GFX10-SAFE-GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX10-SAFE-GISEL-NEXT: s_and_b32 s6, s3, 0x1ff -; GFX10-SAFE-GISEL-NEXT: s_bfe_u32 s4, s3, 0xb0014 -; GFX10-SAFE-GISEL-NEXT: s_lshr_b32 s5, s3, 8 -; GFX10-SAFE-GISEL-NEXT: s_or_b32 s2, s6, s2 -; GFX10-SAFE-GISEL-NEXT: s_addk_i32 s4, 0xfc10 -; GFX10-SAFE-GISEL-NEXT: s_and_b32 s5, s5, 0xffe -; GFX10-SAFE-GISEL-NEXT: s_cmp_lg_u32 s2, 0 -; GFX10-SAFE-GISEL-NEXT: s_cselect_b32 s2, 1, 0 -; GFX10-SAFE-GISEL-NEXT: s_or_b32 s2, s5, s2 -; GFX10-SAFE-GISEL-NEXT: s_cmp_lg_u32 s2, 0 -; GFX10-SAFE-GISEL-NEXT: s_cselect_b32 s5, 1, 0 -; GFX10-SAFE-GISEL-NEXT: s_sub_i32 s6, 1, s4 -; GFX10-SAFE-GISEL-NEXT: s_or_b32 s8, s2, 0x1000 -; GFX10-SAFE-GISEL-NEXT: s_max_i32 s6, s6, 0 -; GFX10-SAFE-GISEL-NEXT: s_lshl_b32 s7, s4, 12 -; GFX10-SAFE-GISEL-NEXT: s_min_i32 s6, s6, 13 -; GFX10-SAFE-GISEL-NEXT: s_lshl_b32 s5, s5, 9 -; GFX10-SAFE-GISEL-NEXT: s_lshr_b32 s9, s8, s6 -; GFX10-SAFE-GISEL-NEXT: s_or_b32 s2, s2, s7 -; GFX10-SAFE-GISEL-NEXT: s_lshl_b32 s6, s9, s6 -; GFX10-SAFE-GISEL-NEXT: s_or_b32 s5, s5, 0x7c00 -; GFX10-SAFE-GISEL-NEXT: s_cmp_lg_u32 s6, s8 -; GFX10-SAFE-GISEL-NEXT: s_cselect_b32 s6, 1, 0 -; GFX10-SAFE-GISEL-NEXT: s_or_b32 s6, s9, s6 -; GFX10-SAFE-GISEL-NEXT: s_cmp_lt_i32 s4, 1 -; GFX10-SAFE-GISEL-NEXT: s_cselect_b32 s2, s6, s2 -; GFX10-SAFE-GISEL-NEXT: s_and_b32 s6, s2, 7 -; GFX10-SAFE-GISEL-NEXT: s_lshr_b32 s2, s2, 2 -; GFX10-SAFE-GISEL-NEXT: s_cmp_eq_u32 s6, 3 -; GFX10-SAFE-GISEL-NEXT: s_cselect_b32 s7, 1, 0 -; GFX10-SAFE-GISEL-NEXT: s_cmp_gt_i32 s6, 5 -; GFX10-SAFE-GISEL-NEXT: s_cselect_b32 s6, 1, 0 -; GFX10-SAFE-GISEL-NEXT: s_or_b32 s6, s7, s6 -; GFX10-SAFE-GISEL-NEXT: s_add_i32 s2, s2, s6 -; GFX10-SAFE-GISEL-NEXT: s_cmp_gt_i32 s4, 30 -; GFX10-SAFE-GISEL-NEXT: s_cselect_b32 s2, 0x7c00, s2 -; GFX10-SAFE-GISEL-NEXT: s_cmpk_eq_i32 s4, 0x40f -; GFX10-SAFE-GISEL-NEXT: s_cselect_b32 s2, s5, s2 -; GFX10-SAFE-GISEL-NEXT: s_lshr_b32 s3, s3, 16 -; GFX10-SAFE-GISEL-NEXT: s_and_b32 s3, s3, 0x8000 -; GFX10-SAFE-GISEL-NEXT: s_or_b32 s2, s3, s2 -; GFX10-SAFE-GISEL-NEXT: s_mov_b32 s3, 0x31016000 -; GFX10-SAFE-GISEL-NEXT: v_mov_b32_e32 v0, s2 -; GFX10-SAFE-GISEL-NEXT: s_mov_b32 s2, -1 -; GFX10-SAFE-GISEL-NEXT: buffer_store_short v0, off, s[0:3], 0 -; GFX10-SAFE-GISEL-NEXT: s_endpgm +; GFX10-GISEL-LABEL: fptrunc_f64_to_f16: +; GFX10-GISEL: ; %bb.0: +; GFX10-GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 +; GFX10-GISEL-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-GISEL-NEXT: s_and_b32 s6, s3, 0x1ff +; GFX10-GISEL-NEXT: s_bfe_u32 s4, s3, 0xb0014 +; GFX10-GISEL-NEXT: s_lshr_b32 s5, s3, 8 +; GFX10-GISEL-NEXT: s_or_b32 s2, s6, s2 +; GFX10-GISEL-NEXT: s_addk_i32 s4, 0xfc10 +; GFX10-GISEL-NEXT: s_and_b32 s5, s5, 0xffe +; GFX10-GISEL-NEXT: s_cmp_lg_u32 s2, 0 +; GFX10-GISEL-NEXT: s_cselect_b32 s2, 1, 0 +; GFX10-GISEL-NEXT: s_or_b32 s2, s5, s2 +; GFX10-GISEL-NEXT: s_cmp_lg_u32 s2, 0 +; GFX10-GISEL-NEXT: s_cselect_b32 s5, 1, 0 +; GFX10-GISEL-NEXT: s_sub_i32 s6, 1, s4 +; GFX10-GISEL-NEXT: s_or_b32 s8, s2, 0x1000 +; GFX10-GISEL-NEXT: s_max_i32 s6, s6, 0 +; GFX10-GISEL-NEXT: s_lshl_b32 s7, s4, 12 +; GFX10-GISEL-NEXT: s_min_i32 s6, s6, 13 +; GFX10-GISEL-NEXT: s_lshl_b32 s5, s5, 9 +; GFX10-GISEL-NEXT: s_lshr_b32 s9, s8, s6 +; GFX10-GISEL-NEXT: s_or_b32 s2, s2, s7 +; GFX10-GISEL-NEXT: s_lshl_b32 s6, s9, s6 +; GFX10-GISEL-NEXT: s_or_b32 s5, s5, 0x7c00 +; GFX10-GISEL-NEXT: s_cmp_lg_u32 s6, s8 +; GFX10-GISEL-NEXT: s_cselect_b32 s6, 1, 0 +; GFX10-GISEL-NEXT: s_or_b32 s6, s9, s6 +; GFX10-GISEL-NEXT: s_cmp_lt_i32 s4, 1 +; GFX10-GISEL-NEXT: s_cselect_b32 s2, s6, s2 +; GFX10-GISEL-NEXT: s_and_b32 s6, s2, 7 +; GFX10-GISEL-NEXT: s_lshr_b32 s2, s2, 2 +; GFX10-GISEL-NEXT: s_cmp_eq_u32 s6, 3 +; GFX10-GISEL-NEXT: s_cselect_b32 s7, 1, 0 +; GFX10-GISEL-NEXT: s_cmp_gt_i32 s6, 5 +; GFX10-GISEL-NEXT: s_cselect_b32 s6, 1, 0 +; GFX10-GISEL-NEXT: s_or_b32 s6, s7, s6 +; GFX10-GISEL-NEXT: s_add_i32 s2, s2, s6 +; GFX10-GISEL-NEXT: s_cmp_gt_i32 s4, 30 +; GFX10-GISEL-NEXT: s_cselect_b32 s2, 0x7c00, s2 +; GFX10-GISEL-NEXT: s_cmpk_eq_i32 s4, 0x40f +; GFX10-GISEL-NEXT: s_cselect_b32 s2, s5, s2 +; GFX10-GISEL-NEXT: s_lshr_b32 s3, s3, 16 +; GFX10-GISEL-NEXT: s_and_b32 s3, s3, 0x8000 +; GFX10-GISEL-NEXT: s_or_b32 s2, s3, s2 +; GFX10-GISEL-NEXT: s_mov_b32 s3, 0x31016000 +; GFX10-GISEL-NEXT: v_mov_b32_e32 v0, s2 +; GFX10-GISEL-NEXT: s_mov_b32 s2, -1 +; GFX10-GISEL-NEXT: buffer_store_short v0, off, s[0:3], 0 +; GFX10-GISEL-NEXT: s_endpgm ; ; GFX10-UNSAFE-SDAG-LABEL: fptrunc_f64_to_f16: ; GFX10-UNSAFE-SDAG: ; %bb.0: @@ -390,17 +456,6 @@ define amdgpu_kernel void @fptrunc_f64_to_f16(ptr addrspace(1) %out, double %in) ; GFX10-UNSAFE-SDAG-NEXT: buffer_store_short v0, off, s[0:3], 0 ; GFX10-UNSAFE-SDAG-NEXT: s_endpgm ; -; GFX10-UNSAFE-GISEL-LABEL: fptrunc_f64_to_f16: -; GFX10-UNSAFE-GISEL: ; %bb.0: -; GFX10-UNSAFE-GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 -; GFX10-UNSAFE-GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX10-UNSAFE-GISEL-NEXT: v_cvt_f32_f64_e32 v0, s[2:3] -; GFX10-UNSAFE-GISEL-NEXT: s_mov_b32 s2, -1 -; GFX10-UNSAFE-GISEL-NEXT: s_mov_b32 s3, 0x31016000 -; GFX10-UNSAFE-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0 -; GFX10-UNSAFE-GISEL-NEXT: buffer_store_short v0, off, s[0:3], 0 -; GFX10-UNSAFE-GISEL-NEXT: s_endpgm -; ; GFX11-SAFE-SDAG-LABEL: fptrunc_f64_to_f16: ; GFX11-SAFE-SDAG: ; %bb.0: ; GFX11-SAFE-SDAG-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 @@ -461,62 +516,368 @@ define amdgpu_kernel void @fptrunc_f64_to_f16(ptr addrspace(1) %out, double %in) ; GFX11-SAFE-SDAG-NEXT: buffer_store_b16 v0, off, s[0:3], 0 ; GFX11-SAFE-SDAG-NEXT: s_endpgm ; -; GFX11-SAFE-GISEL-LABEL: fptrunc_f64_to_f16: +; GFX11-GISEL-LABEL: fptrunc_f64_to_f16: +; GFX11-GISEL: ; %bb.0: +; GFX11-GISEL-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 +; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-GISEL-NEXT: s_and_b32 s6, s3, 0x1ff +; GFX11-GISEL-NEXT: s_bfe_u32 s4, s3, 0xb0014 +; GFX11-GISEL-NEXT: s_lshr_b32 s5, s3, 8 +; GFX11-GISEL-NEXT: s_or_b32 s2, s6, s2 +; GFX11-GISEL-NEXT: s_addk_i32 s4, 0xfc10 +; GFX11-GISEL-NEXT: s_and_b32 s5, s5, 0xffe +; GFX11-GISEL-NEXT: s_cmp_lg_u32 s2, 0 +; GFX11-GISEL-NEXT: s_cselect_b32 s2, 1, 0 +; GFX11-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) +; GFX11-GISEL-NEXT: s_or_b32 s2, s5, s2 +; GFX11-GISEL-NEXT: s_cmp_lg_u32 s2, 0 +; GFX11-GISEL-NEXT: s_cselect_b32 s5, 1, 0 +; GFX11-GISEL-NEXT: s_sub_i32 s6, 1, s4 +; GFX11-GISEL-NEXT: s_or_b32 s8, s2, 0x1000 +; GFX11-GISEL-NEXT: s_max_i32 s6, s6, 0 +; GFX11-GISEL-NEXT: s_lshl_b32 s7, s4, 12 +; GFX11-GISEL-NEXT: s_min_i32 s6, s6, 13 +; GFX11-GISEL-NEXT: s_lshl_b32 s5, s5, 9 +; GFX11-GISEL-NEXT: s_lshr_b32 s9, s8, s6 +; GFX11-GISEL-NEXT: s_or_b32 s2, s2, s7 +; GFX11-GISEL-NEXT: s_lshl_b32 s6, s9, s6 +; GFX11-GISEL-NEXT: s_or_b32 s5, s5, 0x7c00 +; GFX11-GISEL-NEXT: s_cmp_lg_u32 s6, s8 +; GFX11-GISEL-NEXT: s_cselect_b32 s6, 1, 0 +; GFX11-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_2) | instid1(SALU_CYCLE_1) +; GFX11-GISEL-NEXT: s_or_b32 s6, s9, s6 +; GFX11-GISEL-NEXT: s_cmp_lt_i32 s4, 1 +; GFX11-GISEL-NEXT: s_cselect_b32 s2, s6, s2 +; GFX11-GISEL-NEXT: s_and_b32 s6, s2, 7 +; GFX11-GISEL-NEXT: s_lshr_b32 s2, s2, 2 +; GFX11-GISEL-NEXT: s_cmp_eq_u32 s6, 3 +; GFX11-GISEL-NEXT: s_cselect_b32 s7, 1, 0 +; GFX11-GISEL-NEXT: s_cmp_gt_i32 s6, 5 +; GFX11-GISEL-NEXT: s_cselect_b32 s6, 1, 0 +; GFX11-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) +; GFX11-GISEL-NEXT: s_or_b32 s6, s7, s6 +; GFX11-GISEL-NEXT: s_add_i32 s2, s2, s6 +; GFX11-GISEL-NEXT: s_cmp_gt_i32 s4, 30 +; GFX11-GISEL-NEXT: s_cselect_b32 s2, 0x7c00, s2 +; GFX11-GISEL-NEXT: s_cmpk_eq_i32 s4, 0x40f +; GFX11-GISEL-NEXT: s_cselect_b32 s2, s5, s2 +; GFX11-GISEL-NEXT: s_lshr_b32 s3, s3, 16 +; GFX11-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) +; GFX11-GISEL-NEXT: s_and_b32 s3, s3, 0x8000 +; GFX11-GISEL-NEXT: s_or_b32 s2, s3, s2 +; GFX11-GISEL-NEXT: s_mov_b32 s3, 0x31016000 +; GFX11-GISEL-NEXT: v_mov_b32_e32 v0, s2 +; GFX11-GISEL-NEXT: s_mov_b32 s2, -1 +; GFX11-GISEL-NEXT: buffer_store_b16 v0, off, s[0:3], 0 +; GFX11-GISEL-NEXT: s_endpgm +; +; GFX11-UNSAFE-DAG-TRUE16-LABEL: fptrunc_f64_to_f16: +; GFX11-UNSAFE-DAG-TRUE16: ; %bb.0: +; GFX11-UNSAFE-DAG-TRUE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 +; GFX11-UNSAFE-DAG-TRUE16-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-UNSAFE-DAG-TRUE16-NEXT: v_cvt_f32_f64_e32 v0, s[2:3] +; GFX11-UNSAFE-DAG-TRUE16-NEXT: s_mov_b32 s3, 0x31016000 +; GFX11-UNSAFE-DAG-TRUE16-NEXT: s_mov_b32 s2, -1 +; GFX11-UNSAFE-DAG-TRUE16-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-UNSAFE-DAG-TRUE16-NEXT: v_cvt_f16_f32_e32 v0.l, v0 +; GFX11-UNSAFE-DAG-TRUE16-NEXT: buffer_store_b16 v0, off, s[0:3], 0 +; GFX11-UNSAFE-DAG-TRUE16-NEXT: s_endpgm +; +; GFX11-UNSAFE-DAG-FAKE16-LABEL: fptrunc_f64_to_f16: +; GFX11-UNSAFE-DAG-FAKE16: ; %bb.0: +; GFX11-UNSAFE-DAG-FAKE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 +; GFX11-UNSAFE-DAG-FAKE16-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-UNSAFE-DAG-FAKE16-NEXT: v_cvt_f32_f64_e32 v0, s[2:3] +; GFX11-UNSAFE-DAG-FAKE16-NEXT: s_mov_b32 s3, 0x31016000 +; GFX11-UNSAFE-DAG-FAKE16-NEXT: s_mov_b32 s2, -1 +; GFX11-UNSAFE-DAG-FAKE16-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-UNSAFE-DAG-FAKE16-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX11-UNSAFE-DAG-FAKE16-NEXT: buffer_store_b16 v0, off, s[0:3], 0 +; GFX11-UNSAFE-DAG-FAKE16-NEXT: s_endpgm + %result = fptrunc double %in to half + %result_i16 = bitcast half %result to i16 + store i16 %result_i16, ptr addrspace(1) %out + ret void +} + +define amdgpu_kernel void @fptrunc_f64_to_f16_afn(ptr addrspace(1) %out, double %in) { +; SI-LABEL: fptrunc_f64_to_f16_afn: +; SI: ; %bb.0: +; SI-NEXT: s_load_dwordx4 s[4:7], s[4:5], 0x9 +; SI-NEXT: s_mov_b32 s3, 0xf000 +; SI-NEXT: s_movk_i32 s2, 0x7e00 +; SI-NEXT: s_waitcnt lgkmcnt(0) +; SI-NEXT: s_lshr_b32 s0, s7, 8 +; SI-NEXT: s_and_b32 s1, s7, 0x1ff +; SI-NEXT: s_and_b32 s8, s0, 0xffe +; SI-NEXT: s_or_b32 s0, s1, s6 +; SI-NEXT: s_cmp_lg_u32 s0, 0 +; SI-NEXT: s_cselect_b64 s[0:1], -1, 0 +; SI-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[0:1] +; SI-NEXT: s_bfe_u32 s0, s7, 0xb0014 +; SI-NEXT: v_readfirstlane_b32 s1, v0 +; SI-NEXT: s_sub_i32 s6, 0x3f1, s0 +; SI-NEXT: s_or_b32 s1, s8, s1 +; SI-NEXT: v_med3_i32 v0, s6, 0, 13 +; SI-NEXT: s_or_b32 s6, s1, 0x1000 +; SI-NEXT: v_readfirstlane_b32 s8, v0 +; SI-NEXT: s_lshr_b32 s9, s6, s8 +; SI-NEXT: s_lshl_b32 s8, s9, s8 +; SI-NEXT: s_cmp_lg_u32 s8, s6 +; SI-NEXT: s_cselect_b32 s6, 1, 0 +; SI-NEXT: s_addk_i32 s0, 0xfc10 +; SI-NEXT: s_or_b32 s6, s9, s6 +; SI-NEXT: s_lshl_b32 s8, s0, 12 +; SI-NEXT: s_or_b32 s8, s1, s8 +; SI-NEXT: s_cmp_lt_i32 s0, 1 +; SI-NEXT: s_cselect_b32 s6, s6, s8 +; SI-NEXT: s_and_b32 s8, s6, 7 +; SI-NEXT: s_cmp_gt_i32 s8, 5 +; SI-NEXT: s_cselect_b32 s9, 1, 0 +; SI-NEXT: s_cmp_eq_u32 s8, 3 +; SI-NEXT: s_cselect_b32 s8, 1, 0 +; SI-NEXT: s_lshr_b32 s6, s6, 2 +; SI-NEXT: s_or_b32 s8, s8, s9 +; SI-NEXT: s_add_i32 s6, s6, s8 +; SI-NEXT: s_cmp_lt_i32 s0, 31 +; SI-NEXT: s_cselect_b32 s6, s6, 0x7c00 +; SI-NEXT: s_cmp_lg_u32 s1, 0 +; SI-NEXT: s_cselect_b32 s1, s2, 0x7c00 +; SI-NEXT: s_cmpk_eq_i32 s0, 0x40f +; SI-NEXT: s_cselect_b32 s0, s1, s6 +; SI-NEXT: s_lshr_b32 s1, s7, 16 +; SI-NEXT: s_and_b32 s1, s1, 0x8000 +; SI-NEXT: s_or_b32 s6, s1, s0 +; SI-NEXT: s_mov_b32 s2, -1 +; SI-NEXT: s_mov_b32 s0, s4 +; SI-NEXT: s_mov_b32 s1, s5 +; SI-NEXT: v_mov_b32_e32 v0, s6 +; SI-NEXT: buffer_store_short v0, off, s[0:3], 0 +; SI-NEXT: s_endpgm +; +; VI-SAFE-SDAG-LABEL: fptrunc_f64_to_f16_afn: +; VI-SAFE-SDAG: ; %bb.0: +; VI-SAFE-SDAG-NEXT: s_load_dwordx4 s[4:7], s[4:5], 0x24 +; VI-SAFE-SDAG-NEXT: s_mov_b32 s3, 0xf000 +; VI-SAFE-SDAG-NEXT: s_mov_b32 s2, -1 +; VI-SAFE-SDAG-NEXT: s_waitcnt lgkmcnt(0) +; VI-SAFE-SDAG-NEXT: s_mov_b32 s0, s4 +; VI-SAFE-SDAG-NEXT: s_lshr_b32 s4, s7, 8 +; VI-SAFE-SDAG-NEXT: s_and_b32 s8, s4, 0xffe +; VI-SAFE-SDAG-NEXT: s_and_b32 s4, s7, 0x1ff +; VI-SAFE-SDAG-NEXT: s_or_b32 s4, s4, s6 +; VI-SAFE-SDAG-NEXT: s_cmp_lg_u32 s4, 0 +; VI-SAFE-SDAG-NEXT: s_mov_b32 s1, s5 +; VI-SAFE-SDAG-NEXT: s_cselect_b64 s[4:5], -1, 0 +; VI-SAFE-SDAG-NEXT: v_cndmask_b32_e64 v0, 0, 1, s[4:5] +; VI-SAFE-SDAG-NEXT: v_readfirstlane_b32 s4, v0 +; VI-SAFE-SDAG-NEXT: s_bfe_u32 s6, s7, 0xb0014 +; VI-SAFE-SDAG-NEXT: s_or_b32 s4, s8, s4 +; VI-SAFE-SDAG-NEXT: s_sub_i32 s8, 0x3f1, s6 +; VI-SAFE-SDAG-NEXT: v_med3_i32 v0, s8, 0, 13 +; VI-SAFE-SDAG-NEXT: s_or_b32 s5, s4, 0x1000 +; VI-SAFE-SDAG-NEXT: v_readfirstlane_b32 s8, v0 +; VI-SAFE-SDAG-NEXT: s_lshr_b32 s9, s5, s8 +; VI-SAFE-SDAG-NEXT: s_lshl_b32 s8, s9, s8 +; VI-SAFE-SDAG-NEXT: s_cmp_lg_u32 s8, s5 +; VI-SAFE-SDAG-NEXT: s_cselect_b32 s5, 1, 0 +; VI-SAFE-SDAG-NEXT: s_addk_i32 s6, 0xfc10 +; VI-SAFE-SDAG-NEXT: s_lshl_b32 s8, s6, 12 +; VI-SAFE-SDAG-NEXT: s_or_b32 s5, s9, s5 +; VI-SAFE-SDAG-NEXT: s_or_b32 s8, s4, s8 +; VI-SAFE-SDAG-NEXT: s_cmp_lt_i32 s6, 1 +; VI-SAFE-SDAG-NEXT: s_cselect_b32 s5, s5, s8 +; VI-SAFE-SDAG-NEXT: s_and_b32 s8, s5, 7 +; VI-SAFE-SDAG-NEXT: s_cmp_gt_i32 s8, 5 +; VI-SAFE-SDAG-NEXT: s_cselect_b32 s9, 1, 0 +; VI-SAFE-SDAG-NEXT: s_cmp_eq_u32 s8, 3 +; VI-SAFE-SDAG-NEXT: s_cselect_b32 s8, 1, 0 +; VI-SAFE-SDAG-NEXT: s_or_b32 s8, s8, s9 +; VI-SAFE-SDAG-NEXT: s_lshr_b32 s5, s5, 2 +; VI-SAFE-SDAG-NEXT: s_add_i32 s5, s5, s8 +; VI-SAFE-SDAG-NEXT: s_cmp_lt_i32 s6, 31 +; VI-SAFE-SDAG-NEXT: s_cselect_b32 s5, s5, 0x7c00 +; VI-SAFE-SDAG-NEXT: s_cmp_lg_u32 s4, 0 +; VI-SAFE-SDAG-NEXT: s_movk_i32 s4, 0x7e00 +; VI-SAFE-SDAG-NEXT: s_cselect_b32 s4, s4, 0x7c00 +; VI-SAFE-SDAG-NEXT: s_cmpk_eq_i32 s6, 0x40f +; VI-SAFE-SDAG-NEXT: s_cselect_b32 s4, s4, s5 +; VI-SAFE-SDAG-NEXT: s_lshr_b32 s5, s7, 16 +; VI-SAFE-SDAG-NEXT: s_and_b32 s5, s5, 0x8000 +; VI-SAFE-SDAG-NEXT: s_or_b32 s4, s5, s4 +; VI-SAFE-SDAG-NEXT: v_mov_b32_e32 v0, s4 +; VI-SAFE-SDAG-NEXT: buffer_store_short v0, off, s[0:3], 0 +; VI-SAFE-SDAG-NEXT: s_endpgm +; +; VI-GISEL-LABEL: fptrunc_f64_to_f16_afn: +; VI-GISEL: ; %bb.0: +; VI-GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 +; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0) +; VI-GISEL-NEXT: v_cvt_f32_f64_e32 v0, s[2:3] +; VI-GISEL-NEXT: s_mov_b32 s2, -1 +; VI-GISEL-NEXT: s_mov_b32 s3, 0xf000 +; VI-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0 +; VI-GISEL-NEXT: buffer_store_short v0, off, s[0:3], 0 +; VI-GISEL-NEXT: s_endpgm +; +; VI-UNSAFE-SDAG-LABEL: fptrunc_f64_to_f16_afn: +; VI-UNSAFE-SDAG: ; %bb.0: +; VI-UNSAFE-SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 +; VI-UNSAFE-SDAG-NEXT: s_waitcnt lgkmcnt(0) +; VI-UNSAFE-SDAG-NEXT: v_cvt_f32_f64_e32 v0, s[2:3] +; VI-UNSAFE-SDAG-NEXT: s_mov_b32 s3, 0xf000 +; VI-UNSAFE-SDAG-NEXT: s_mov_b32 s2, -1 +; VI-UNSAFE-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 +; VI-UNSAFE-SDAG-NEXT: buffer_store_short v0, off, s[0:3], 0 +; VI-UNSAFE-SDAG-NEXT: s_endpgm +; +; GFX10-SAFE-SDAG-LABEL: fptrunc_f64_to_f16_afn: +; GFX10-SAFE-SDAG: ; %bb.0: +; GFX10-SAFE-SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 +; GFX10-SAFE-SDAG-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-SAFE-SDAG-NEXT: s_and_b32 s4, s3, 0x1ff +; GFX10-SAFE-SDAG-NEXT: s_lshr_b32 s5, s3, 8 +; GFX10-SAFE-SDAG-NEXT: s_or_b32 s2, s4, s2 +; GFX10-SAFE-SDAG-NEXT: s_and_b32 s4, s5, 0xffe +; GFX10-SAFE-SDAG-NEXT: s_cmp_lg_u32 s2, 0 +; GFX10-SAFE-SDAG-NEXT: s_cselect_b32 s2, -1, 0 +; GFX10-SAFE-SDAG-NEXT: v_cndmask_b32_e64 v0, 0, 1, s2 +; GFX10-SAFE-SDAG-NEXT: s_bfe_u32 s2, s3, 0xb0014 +; GFX10-SAFE-SDAG-NEXT: s_sub_i32 s5, 0x3f1, s2 +; GFX10-SAFE-SDAG-NEXT: v_med3_i32 v1, s5, 0, 13 +; GFX10-SAFE-SDAG-NEXT: v_readfirstlane_b32 s5, v0 +; GFX10-SAFE-SDAG-NEXT: v_readfirstlane_b32 s6, v1 +; GFX10-SAFE-SDAG-NEXT: s_or_b32 s4, s4, s5 +; GFX10-SAFE-SDAG-NEXT: s_or_b32 s5, s4, 0x1000 +; GFX10-SAFE-SDAG-NEXT: s_lshr_b32 s7, s5, s6 +; GFX10-SAFE-SDAG-NEXT: s_lshl_b32 s6, s7, s6 +; GFX10-SAFE-SDAG-NEXT: s_cmp_lg_u32 s6, s5 +; GFX10-SAFE-SDAG-NEXT: s_cselect_b32 s5, 1, 0 +; GFX10-SAFE-SDAG-NEXT: s_addk_i32 s2, 0xfc10 +; GFX10-SAFE-SDAG-NEXT: s_or_b32 s5, s7, s5 +; GFX10-SAFE-SDAG-NEXT: s_lshl_b32 s6, s2, 12 +; GFX10-SAFE-SDAG-NEXT: s_or_b32 s6, s4, s6 +; GFX10-SAFE-SDAG-NEXT: s_cmp_lt_i32 s2, 1 +; GFX10-SAFE-SDAG-NEXT: s_cselect_b32 s5, s5, s6 +; GFX10-SAFE-SDAG-NEXT: s_and_b32 s6, s5, 7 +; GFX10-SAFE-SDAG-NEXT: s_cmp_gt_i32 s6, 5 +; GFX10-SAFE-SDAG-NEXT: s_cselect_b32 s7, 1, 0 +; GFX10-SAFE-SDAG-NEXT: s_cmp_eq_u32 s6, 3 +; GFX10-SAFE-SDAG-NEXT: s_cselect_b32 s6, 1, 0 +; GFX10-SAFE-SDAG-NEXT: s_lshr_b32 s5, s5, 2 +; GFX10-SAFE-SDAG-NEXT: s_or_b32 s6, s6, s7 +; GFX10-SAFE-SDAG-NEXT: s_add_i32 s5, s5, s6 +; GFX10-SAFE-SDAG-NEXT: s_cmp_lt_i32 s2, 31 +; GFX10-SAFE-SDAG-NEXT: s_movk_i32 s6, 0x7e00 +; GFX10-SAFE-SDAG-NEXT: s_cselect_b32 s5, s5, 0x7c00 +; GFX10-SAFE-SDAG-NEXT: s_cmp_lg_u32 s4, 0 +; GFX10-SAFE-SDAG-NEXT: s_cselect_b32 s4, s6, 0x7c00 +; GFX10-SAFE-SDAG-NEXT: s_cmpk_eq_i32 s2, 0x40f +; GFX10-SAFE-SDAG-NEXT: s_cselect_b32 s2, s4, s5 +; GFX10-SAFE-SDAG-NEXT: s_lshr_b32 s3, s3, 16 +; GFX10-SAFE-SDAG-NEXT: s_and_b32 s3, s3, 0x8000 +; GFX10-SAFE-SDAG-NEXT: s_or_b32 s2, s3, s2 +; GFX10-SAFE-SDAG-NEXT: s_mov_b32 s3, 0x31016000 +; GFX10-SAFE-SDAG-NEXT: v_mov_b32_e32 v0, s2 +; GFX10-SAFE-SDAG-NEXT: s_mov_b32 s2, -1 +; GFX10-SAFE-SDAG-NEXT: buffer_store_short v0, off, s[0:3], 0 +; GFX10-SAFE-SDAG-NEXT: s_endpgm +; +; GFX10-GISEL-LABEL: fptrunc_f64_to_f16_afn: +; GFX10-GISEL: ; %bb.0: +; GFX10-GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 +; GFX10-GISEL-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-GISEL-NEXT: v_cvt_f32_f64_e32 v0, s[2:3] +; GFX10-GISEL-NEXT: s_mov_b32 s2, -1 +; GFX10-GISEL-NEXT: s_mov_b32 s3, 0x31016000 +; GFX10-GISEL-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX10-GISEL-NEXT: buffer_store_short v0, off, s[0:3], 0 +; GFX10-GISEL-NEXT: s_endpgm +; +; GFX10-UNSAFE-SDAG-LABEL: fptrunc_f64_to_f16_afn: +; GFX10-UNSAFE-SDAG: ; %bb.0: +; GFX10-UNSAFE-SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24 +; GFX10-UNSAFE-SDAG-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-UNSAFE-SDAG-NEXT: v_cvt_f32_f64_e32 v0, s[2:3] +; GFX10-UNSAFE-SDAG-NEXT: s_mov_b32 s3, 0x31016000 +; GFX10-UNSAFE-SDAG-NEXT: s_mov_b32 s2, -1 +; GFX10-UNSAFE-SDAG-NEXT: v_cvt_f16_f32_e32 v0, v0 +; GFX10-UNSAFE-SDAG-NEXT: buffer_store_short v0, off, s[0:3], 0 +; GFX10-UNSAFE-SDAG-NEXT: s_endpgm +; +; GFX11-SAFE-SDAG-LABEL: fptrunc_f64_to_f16_afn: +; GFX11-SAFE-SDAG: ; %bb.0: +; GFX11-SAFE-SDAG-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 +; GFX11-SAFE-SDAG-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-SAFE-SDAG-NEXT: s_and_b32 s4, s3, 0x1ff +; GFX11-SAFE-SDAG-NEXT: s_lshr_b32 s5, s3, 8 +; GFX11-SAFE-SDAG-NEXT: s_or_b32 s2, s4, s2 +; GFX11-SAFE-SDAG-NEXT: s_and_b32 s4, s5, 0xffe +; GFX11-SAFE-SDAG-NEXT: s_cmp_lg_u32 s2, 0 +; GFX11-SAFE-SDAG-NEXT: s_cselect_b32 s2, -1, 0 +; GFX11-SAFE-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_1) | instid1(SALU_CYCLE_1) +; GFX11-SAFE-SDAG-NEXT: v_cndmask_b32_e64 v0, 0, 1, s2 +; GFX11-SAFE-SDAG-NEXT: s_bfe_u32 s2, s3, 0xb0014 +; GFX11-SAFE-SDAG-NEXT: s_sub_i32 s5, 0x3f1, s2 +; GFX11-SAFE-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_2) +; GFX11-SAFE-SDAG-NEXT: v_med3_i32 v1, s5, 0, 13 +; GFX11-SAFE-SDAG-NEXT: v_readfirstlane_b32 s5, v0 +; GFX11-SAFE-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(SKIP_1) | instid1(SALU_CYCLE_1) +; GFX11-SAFE-SDAG-NEXT: v_readfirstlane_b32 s6, v1 +; GFX11-SAFE-SDAG-NEXT: s_or_b32 s4, s4, s5 +; GFX11-SAFE-SDAG-NEXT: s_or_b32 s5, s4, 0x1000 +; GFX11-SAFE-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) +; GFX11-SAFE-SDAG-NEXT: s_lshr_b32 s7, s5, s6 +; GFX11-SAFE-SDAG-NEXT: s_lshl_b32 s6, s7, s6 +; GFX11-SAFE-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_4) | instid1(SALU_CYCLE_1) +; GFX11-SAFE-SDAG-NEXT: s_cmp_lg_u32 s6, s5 +; GFX11-SAFE-SDAG-NEXT: s_cselect_b32 s5, 1, 0 +; GFX11-SAFE-SDAG-NEXT: s_addk_i32 s2, 0xfc10 +; GFX11-SAFE-SDAG-NEXT: s_or_b32 s5, s7, s5 +; GFX11-SAFE-SDAG-NEXT: s_lshl_b32 s6, s2, 12 +; GFX11-SAFE-SDAG-NEXT: s_or_b32 s6, s4, s6 +; GFX11-SAFE-SDAG-NEXT: s_cmp_lt_i32 s2, 1 +; GFX11-SAFE-SDAG-NEXT: s_cselect_b32 s5, s5, s6 +; GFX11-SAFE-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) +; GFX11-SAFE-SDAG-NEXT: s_and_b32 s6, s5, 7 +; GFX11-SAFE-SDAG-NEXT: s_cmp_gt_i32 s6, 5 +; GFX11-SAFE-SDAG-NEXT: s_cselect_b32 s7, 1, 0 +; GFX11-SAFE-SDAG-NEXT: s_cmp_eq_u32 s6, 3 +; GFX11-SAFE-SDAG-NEXT: s_cselect_b32 s6, 1, 0 +; GFX11-SAFE-SDAG-NEXT: s_lshr_b32 s5, s5, 2 +; GFX11-SAFE-SDAG-NEXT: s_or_b32 s6, s6, s7 +; GFX11-SAFE-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1) +; GFX11-SAFE-SDAG-NEXT: s_add_i32 s5, s5, s6 +; GFX11-SAFE-SDAG-NEXT: s_cmp_lt_i32 s2, 31 +; GFX11-SAFE-SDAG-NEXT: s_movk_i32 s6, 0x7e00 +; GFX11-SAFE-SDAG-NEXT: s_cselect_b32 s5, s5, 0x7c00 +; GFX11-SAFE-SDAG-NEXT: s_cmp_lg_u32 s4, 0 +; GFX11-SAFE-SDAG-NEXT: s_cselect_b32 s4, s6, 0x7c00 +; GFX11-SAFE-SDAG-NEXT: s_cmpk_eq_i32 s2, 0x40f +; GFX11-SAFE-SDAG-NEXT: s_cselect_b32 s2, s4, s5 +; GFX11-SAFE-SDAG-NEXT: s_lshr_b32 s3, s3, 16 +; GFX11-SAFE-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) +; GFX11-SAFE-SDAG-NEXT: s_and_b32 s3, s3, 0x8000 +; GFX11-SAFE-SDAG-NEXT: s_or_b32 s2, s3, s2 +; GFX11-SAFE-SDAG-NEXT: s_mov_b32 s3, 0x31016000 +; GFX11-SAFE-SDAG-NEXT: v_mov_b32_e32 v0, s2 +; GFX11-SAFE-SDAG-NEXT: s_mov_b32 s2, -1 +; GFX11-SAFE-SDAG-NEXT: buffer_store_b16 v0, off, s[0:3], 0 +; GFX11-SAFE-SDAG-NEXT: s_endpgm +; +; GFX11-SAFE-GISEL-LABEL: fptrunc_f64_to_f16_afn: ; GFX11-SAFE-GISEL: ; %bb.0: ; GFX11-SAFE-GISEL-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 ; GFX11-SAFE-GISEL-NEXT: s_waitcnt lgkmcnt(0) -; GFX11-SAFE-GISEL-NEXT: s_and_b32 s6, s3, 0x1ff -; GFX11-SAFE-GISEL-NEXT: s_bfe_u32 s4, s3, 0xb0014 -; GFX11-SAFE-GISEL-NEXT: s_lshr_b32 s5, s3, 8 -; GFX11-SAFE-GISEL-NEXT: s_or_b32 s2, s6, s2 -; GFX11-SAFE-GISEL-NEXT: s_addk_i32 s4, 0xfc10 -; GFX11-SAFE-GISEL-NEXT: s_and_b32 s5, s5, 0xffe -; GFX11-SAFE-GISEL-NEXT: s_cmp_lg_u32 s2, 0 -; GFX11-SAFE-GISEL-NEXT: s_cselect_b32 s2, 1, 0 -; GFX11-SAFE-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) -; GFX11-SAFE-GISEL-NEXT: s_or_b32 s2, s5, s2 -; GFX11-SAFE-GISEL-NEXT: s_cmp_lg_u32 s2, 0 -; GFX11-SAFE-GISEL-NEXT: s_cselect_b32 s5, 1, 0 -; GFX11-SAFE-GISEL-NEXT: s_sub_i32 s6, 1, s4 -; GFX11-SAFE-GISEL-NEXT: s_or_b32 s8, s2, 0x1000 -; GFX11-SAFE-GISEL-NEXT: s_max_i32 s6, s6, 0 -; GFX11-SAFE-GISEL-NEXT: s_lshl_b32 s7, s4, 12 -; GFX11-SAFE-GISEL-NEXT: s_min_i32 s6, s6, 13 -; GFX11-SAFE-GISEL-NEXT: s_lshl_b32 s5, s5, 9 -; GFX11-SAFE-GISEL-NEXT: s_lshr_b32 s9, s8, s6 -; GFX11-SAFE-GISEL-NEXT: s_or_b32 s2, s2, s7 -; GFX11-SAFE-GISEL-NEXT: s_lshl_b32 s6, s9, s6 -; GFX11-SAFE-GISEL-NEXT: s_or_b32 s5, s5, 0x7c00 -; GFX11-SAFE-GISEL-NEXT: s_cmp_lg_u32 s6, s8 -; GFX11-SAFE-GISEL-NEXT: s_cselect_b32 s6, 1, 0 -; GFX11-SAFE-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(SKIP_2) | instid1(SALU_CYCLE_1) -; GFX11-SAFE-GISEL-NEXT: s_or_b32 s6, s9, s6 -; GFX11-SAFE-GISEL-NEXT: s_cmp_lt_i32 s4, 1 -; GFX11-SAFE-GISEL-NEXT: s_cselect_b32 s2, s6, s2 -; GFX11-SAFE-GISEL-NEXT: s_and_b32 s6, s2, 7 -; GFX11-SAFE-GISEL-NEXT: s_lshr_b32 s2, s2, 2 -; GFX11-SAFE-GISEL-NEXT: s_cmp_eq_u32 s6, 3 -; GFX11-SAFE-GISEL-NEXT: s_cselect_b32 s7, 1, 0 -; GFX11-SAFE-GISEL-NEXT: s_cmp_gt_i32 s6, 5 -; GFX11-SAFE-GISEL-NEXT: s_cselect_b32 s6, 1, 0 -; GFX11-SAFE-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) -; GFX11-SAFE-GISEL-NEXT: s_or_b32 s6, s7, s6 -; GFX11-SAFE-GISEL-NEXT: s_add_i32 s2, s2, s6 -; GFX11-SAFE-GISEL-NEXT: s_cmp_gt_i32 s4, 30 -; GFX11-SAFE-GISEL-NEXT: s_cselect_b32 s2, 0x7c00, s2 -; GFX11-SAFE-GISEL-NEXT: s_cmpk_eq_i32 s4, 0x40f -; GFX11-SAFE-GISEL-NEXT: s_cselect_b32 s2, s5, s2 -; GFX11-SAFE-GISEL-NEXT: s_lshr_b32 s3, s3, 16 -; GFX11-SAFE-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(SALU_CYCLE_1) -; GFX11-SAFE-GISEL-NEXT: s_and_b32 s3, s3, 0x8000 -; GFX11-SAFE-GISEL-NEXT: s_or_b32 s2, s3, s2 -; GFX11-SAFE-GISEL-NEXT: s_mov_b32 s3, 0x31016000 -; GFX11-SAFE-GISEL-NEXT: v_mov_b32_e32 v0, s2 +; GFX11-SAFE-GISEL-NEXT: v_cvt_f32_f64_e32 v0, s[2:3] ; GFX11-SAFE-GISEL-NEXT: s_mov_b32 s2, -1 +; GFX11-SAFE-GISEL-NEXT: s_mov_b32 s3, 0x31016000 +; GFX11-SAFE-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_1) +; GFX11-SAFE-GISEL-NEXT: v_cvt_f16_f32_e32 v0.l, v0 ; GFX11-SAFE-GISEL-NEXT: buffer_store_b16 v0, off, s[0:3], 0 ; GFX11-SAFE-GISEL-NEXT: s_endpgm ; -; GFX11-UNSAFE-DAG-TRUE16-LABEL: fptrunc_f64_to_f16: +; GFX11-UNSAFE-DAG-TRUE16-LABEL: fptrunc_f64_to_f16_afn: ; GFX11-UNSAFE-DAG-TRUE16: ; %bb.0: ; GFX11-UNSAFE-DAG-TRUE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 ; GFX11-UNSAFE-DAG-TRUE16-NEXT: s_waitcnt lgkmcnt(0) @@ -528,7 +889,7 @@ define amdgpu_kernel void @fptrunc_f64_to_f16(ptr addrspace(1) %out, double %in) ; GFX11-UNSAFE-DAG-TRUE16-NEXT: buffer_store_b16 v0, off, s[0:3], 0 ; GFX11-UNSAFE-DAG-TRUE16-NEXT: s_endpgm ; -; GFX11-UNSAFE-DAG-FAKE16-LABEL: fptrunc_f64_to_f16: +; GFX11-UNSAFE-DAG-FAKE16-LABEL: fptrunc_f64_to_f16_afn: ; GFX11-UNSAFE-DAG-FAKE16: ; %bb.0: ; GFX11-UNSAFE-DAG-FAKE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 ; GFX11-UNSAFE-DAG-FAKE16-NEXT: s_waitcnt lgkmcnt(0) @@ -540,7 +901,7 @@ define amdgpu_kernel void @fptrunc_f64_to_f16(ptr addrspace(1) %out, double %in) ; GFX11-UNSAFE-DAG-FAKE16-NEXT: buffer_store_b16 v0, off, s[0:3], 0 ; GFX11-UNSAFE-DAG-FAKE16-NEXT: s_endpgm ; -; GFX11-UNSAFE-GISEL-TRUE16-LABEL: fptrunc_f64_to_f16: +; GFX11-UNSAFE-GISEL-TRUE16-LABEL: fptrunc_f64_to_f16_afn: ; GFX11-UNSAFE-GISEL-TRUE16: ; %bb.0: ; GFX11-UNSAFE-GISEL-TRUE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 ; GFX11-UNSAFE-GISEL-TRUE16-NEXT: s_waitcnt lgkmcnt(0) @@ -552,7 +913,7 @@ define amdgpu_kernel void @fptrunc_f64_to_f16(ptr addrspace(1) %out, double %in) ; GFX11-UNSAFE-GISEL-TRUE16-NEXT: buffer_store_b16 v0, off, s[0:3], 0 ; GFX11-UNSAFE-GISEL-TRUE16-NEXT: s_endpgm ; -; GFX11-UNSAFE-GISEL-FAKE16-LABEL: fptrunc_f64_to_f16: +; GFX11-UNSAFE-GISEL-FAKE16-LABEL: fptrunc_f64_to_f16_afn: ; GFX11-UNSAFE-GISEL-FAKE16: ; %bb.0: ; GFX11-UNSAFE-GISEL-FAKE16-NEXT: s_load_b128 s[0:3], s[4:5], 0x24 ; GFX11-UNSAFE-GISEL-FAKE16-NEXT: s_waitcnt lgkmcnt(0) @@ -563,7 +924,7 @@ define amdgpu_kernel void @fptrunc_f64_to_f16(ptr addrspace(1) %out, double %in) ; GFX11-UNSAFE-GISEL-FAKE16-NEXT: v_cvt_f16_f32_e32 v0, v0 ; GFX11-UNSAFE-GISEL-FAKE16-NEXT: buffer_store_b16 v0, off, s[0:3], 0 ; GFX11-UNSAFE-GISEL-FAKE16-NEXT: s_endpgm - %result = fptrunc double %in to half + %result = fptrunc afn double %in to half %result_i16 = bitcast half %result to i16 store i16 %result_i16, ptr addrspace(1) %out ret void @@ -662,6 +1023,99 @@ define amdgpu_kernel void @fptrunc_v2f64_to_v2f32(ptr addrspace(1) %out, <2 x do ret void } +define amdgpu_kernel void @fptrunc_v2f64_to_v2f32_afn(ptr addrspace(1) %out, <2 x double> %in) { +; SI-LABEL: fptrunc_v2f64_to_v2f32_afn: +; SI: ; %bb.0: +; SI-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0xd +; SI-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x9 +; SI-NEXT: s_mov_b32 s7, 0xf000 +; SI-NEXT: s_mov_b32 s6, -1 +; SI-NEXT: s_waitcnt lgkmcnt(0) +; SI-NEXT: v_cvt_f32_f64_e32 v1, s[2:3] +; SI-NEXT: v_cvt_f32_f64_e32 v0, s[0:1] +; SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 +; SI-NEXT: s_endpgm +; +; VI-SDAG-LABEL: fptrunc_v2f64_to_v2f32_afn: +; VI-SDAG: ; %bb.0: +; VI-SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x34 +; VI-SDAG-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x24 +; VI-SDAG-NEXT: s_mov_b32 s7, 0xf000 +; VI-SDAG-NEXT: s_mov_b32 s6, -1 +; VI-SDAG-NEXT: s_waitcnt lgkmcnt(0) +; VI-SDAG-NEXT: v_cvt_f32_f64_e32 v1, s[2:3] +; VI-SDAG-NEXT: v_cvt_f32_f64_e32 v0, s[0:1] +; VI-SDAG-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 +; VI-SDAG-NEXT: s_endpgm +; +; VI-GISEL-LABEL: fptrunc_v2f64_to_v2f32_afn: +; VI-GISEL: ; %bb.0: +; VI-GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x34 +; VI-GISEL-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x24 +; VI-GISEL-NEXT: s_mov_b32 s6, -1 +; VI-GISEL-NEXT: s_mov_b32 s7, 0xf000 +; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0) +; VI-GISEL-NEXT: v_cvt_f32_f64_e32 v0, s[0:1] +; VI-GISEL-NEXT: v_cvt_f32_f64_e32 v1, s[2:3] +; VI-GISEL-NEXT: buffer_store_dwordx2 v[0:1], off, s[4:7], 0 +; VI-GISEL-NEXT: s_endpgm +; +; GFX10-SDAG-LABEL: fptrunc_v2f64_to_v2f32_afn: +; GFX10-SDAG: ; %bb.0: +; GFX10-SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x34 +; GFX10-SDAG-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-SDAG-NEXT: v_cvt_f32_f64_e32 v1, s[2:3] +; GFX10-SDAG-NEXT: v_cvt_f32_f64_e32 v0, s[0:1] +; GFX10-SDAG-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 +; GFX10-SDAG-NEXT: s_mov_b32 s3, 0x31016000 +; GFX10-SDAG-NEXT: s_mov_b32 s2, -1 +; GFX10-SDAG-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-SDAG-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 +; GFX10-SDAG-NEXT: s_endpgm +; +; GFX10-GISEL-LABEL: fptrunc_v2f64_to_v2f32_afn: +; GFX10-GISEL: ; %bb.0: +; GFX10-GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x34 +; GFX10-GISEL-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-GISEL-NEXT: v_cvt_f32_f64_e32 v0, s[0:1] +; GFX10-GISEL-NEXT: v_cvt_f32_f64_e32 v1, s[2:3] +; GFX10-GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 +; GFX10-GISEL-NEXT: s_mov_b32 s2, -1 +; GFX10-GISEL-NEXT: s_mov_b32 s3, 0x31016000 +; GFX10-GISEL-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-GISEL-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 +; GFX10-GISEL-NEXT: s_endpgm +; +; GFX11-SDAG-LABEL: fptrunc_v2f64_to_v2f32_afn: +; GFX11-SDAG: ; %bb.0: +; GFX11-SDAG-NEXT: s_load_b128 s[0:3], s[4:5], 0x34 +; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-SDAG-NEXT: v_cvt_f32_f64_e32 v1, s[2:3] +; GFX11-SDAG-NEXT: v_cvt_f32_f64_e32 v0, s[0:1] +; GFX11-SDAG-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 +; GFX11-SDAG-NEXT: s_mov_b32 s3, 0x31016000 +; GFX11-SDAG-NEXT: s_mov_b32 s2, -1 +; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-SDAG-NEXT: buffer_store_b64 v[0:1], off, s[0:3], 0 +; GFX11-SDAG-NEXT: s_endpgm +; +; GFX11-GISEL-LABEL: fptrunc_v2f64_to_v2f32_afn: +; GFX11-GISEL: ; %bb.0: +; GFX11-GISEL-NEXT: s_load_b128 s[0:3], s[4:5], 0x34 +; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-GISEL-NEXT: v_cvt_f32_f64_e32 v0, s[0:1] +; GFX11-GISEL-NEXT: v_cvt_f32_f64_e32 v1, s[2:3] +; GFX11-GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 +; GFX11-GISEL-NEXT: s_mov_b32 s2, -1 +; GFX11-GISEL-NEXT: s_mov_b32 s3, 0x31016000 +; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-GISEL-NEXT: buffer_store_b64 v[0:1], off, s[0:3], 0 +; GFX11-GISEL-NEXT: s_endpgm + %result = fptrunc afn <2 x double> %in to <2 x float> + store <2 x float> %result, ptr addrspace(1) %out + ret void +} + define amdgpu_kernel void @fptrunc_v3f64_to_v3f32(ptr addrspace(1) %out, <3 x double> %in) { ; SI-LABEL: fptrunc_v3f64_to_v3f32: ; SI: ; %bb.0: @@ -769,6 +1223,113 @@ define amdgpu_kernel void @fptrunc_v3f64_to_v3f32(ptr addrspace(1) %out, <3 x do ret void } +define amdgpu_kernel void @fptrunc_v3f64_to_v3f32_afn(ptr addrspace(1) %out, <3 x double> %in) { +; SI-LABEL: fptrunc_v3f64_to_v3f32_afn: +; SI: ; %bb.0: +; SI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9 +; SI-NEXT: s_load_dwordx4 s[8:11], s[4:5], 0x11 +; SI-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x15 +; SI-NEXT: s_mov_b32 s3, 0xf000 +; SI-NEXT: s_mov_b32 s2, -1 +; SI-NEXT: s_waitcnt lgkmcnt(0) +; SI-NEXT: v_cvt_f32_f64_e32 v1, s[10:11] +; SI-NEXT: v_cvt_f32_f64_e32 v0, s[8:9] +; SI-NEXT: v_cvt_f32_f64_e32 v2, s[4:5] +; SI-NEXT: buffer_store_dword v2, off, s[0:3], 0 offset:8 +; SI-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 +; SI-NEXT: s_endpgm +; +; VI-SDAG-LABEL: fptrunc_v3f64_to_v3f32_afn: +; VI-SDAG: ; %bb.0: +; VI-SDAG-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x54 +; VI-SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x44 +; VI-SDAG-NEXT: s_load_dwordx2 s[4:5], s[4:5], 0x24 +; VI-SDAG-NEXT: s_waitcnt lgkmcnt(0) +; VI-SDAG-NEXT: v_cvt_f32_f64_e32 v2, s[6:7] +; VI-SDAG-NEXT: v_cvt_f32_f64_e32 v1, s[2:3] +; VI-SDAG-NEXT: v_cvt_f32_f64_e32 v0, s[0:1] +; VI-SDAG-NEXT: s_mov_b32 s7, 0xf000 +; VI-SDAG-NEXT: s_mov_b32 s6, -1 +; VI-SDAG-NEXT: buffer_store_dwordx3 v[0:2], off, s[4:7], 0 +; VI-SDAG-NEXT: s_endpgm +; +; VI-GISEL-LABEL: fptrunc_v3f64_to_v3f32_afn: +; VI-GISEL: ; %bb.0: +; VI-GISEL-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0x44 +; VI-GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 +; VI-GISEL-NEXT: s_mov_b32 s2, -1 +; VI-GISEL-NEXT: s_mov_b32 s3, 0xf000 +; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0) +; VI-GISEL-NEXT: v_cvt_f32_f64_e32 v0, s[8:9] +; VI-GISEL-NEXT: v_cvt_f32_f64_e32 v1, s[10:11] +; VI-GISEL-NEXT: v_cvt_f32_f64_e32 v2, s[12:13] +; VI-GISEL-NEXT: buffer_store_dwordx3 v[0:2], off, s[0:3], 0 +; VI-GISEL-NEXT: s_endpgm +; +; GFX10-SDAG-LABEL: fptrunc_v3f64_to_v3f32_afn: +; GFX10-SDAG: ; %bb.0: +; GFX10-SDAG-NEXT: s_clause 0x1 +; GFX10-SDAG-NEXT: s_load_dwordx2 s[6:7], s[4:5], 0x54 +; GFX10-SDAG-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x44 +; GFX10-SDAG-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-SDAG-NEXT: v_cvt_f32_f64_e32 v2, s[6:7] +; GFX10-SDAG-NEXT: v_cvt_f32_f64_e32 v1, s[2:3] +; GFX10-SDAG-NEXT: v_cvt_f32_f64_e32 v0, s[0:1] +; GFX10-SDAG-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 +; GFX10-SDAG-NEXT: s_mov_b32 s3, 0x31016000 +; GFX10-SDAG-NEXT: s_mov_b32 s2, -1 +; GFX10-SDAG-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-SDAG-NEXT: buffer_store_dwordx3 v[0:2], off, s[0:3], 0 +; GFX10-SDAG-NEXT: s_endpgm +; +; GFX10-GISEL-LABEL: fptrunc_v3f64_to_v3f32_afn: +; GFX10-GISEL: ; %bb.0: +; GFX10-GISEL-NEXT: s_clause 0x1 +; GFX10-GISEL-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0x44 +; GFX10-GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 +; GFX10-GISEL-NEXT: s_mov_b32 s2, -1 +; GFX10-GISEL-NEXT: s_mov_b32 s3, 0x31016000 +; GFX10-GISEL-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-GISEL-NEXT: v_cvt_f32_f64_e32 v0, s[8:9] +; GFX10-GISEL-NEXT: v_cvt_f32_f64_e32 v1, s[10:11] +; GFX10-GISEL-NEXT: v_cvt_f32_f64_e32 v2, s[12:13] +; GFX10-GISEL-NEXT: buffer_store_dwordx3 v[0:2], off, s[0:3], 0 +; GFX10-GISEL-NEXT: s_endpgm +; +; GFX11-SDAG-LABEL: fptrunc_v3f64_to_v3f32_afn: +; GFX11-SDAG: ; %bb.0: +; GFX11-SDAG-NEXT: s_clause 0x1 +; GFX11-SDAG-NEXT: s_load_b64 s[6:7], s[4:5], 0x54 +; GFX11-SDAG-NEXT: s_load_b128 s[0:3], s[4:5], 0x44 +; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-SDAG-NEXT: v_cvt_f32_f64_e32 v2, s[6:7] +; GFX11-SDAG-NEXT: v_cvt_f32_f64_e32 v1, s[2:3] +; GFX11-SDAG-NEXT: v_cvt_f32_f64_e32 v0, s[0:1] +; GFX11-SDAG-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 +; GFX11-SDAG-NEXT: s_mov_b32 s3, 0x31016000 +; GFX11-SDAG-NEXT: s_mov_b32 s2, -1 +; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-SDAG-NEXT: buffer_store_b96 v[0:2], off, s[0:3], 0 +; GFX11-SDAG-NEXT: s_endpgm +; +; GFX11-GISEL-LABEL: fptrunc_v3f64_to_v3f32_afn: +; GFX11-GISEL: ; %bb.0: +; GFX11-GISEL-NEXT: s_clause 0x1 +; GFX11-GISEL-NEXT: s_load_b256 s[8:15], s[4:5], 0x44 +; GFX11-GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 +; GFX11-GISEL-NEXT: s_mov_b32 s2, -1 +; GFX11-GISEL-NEXT: s_mov_b32 s3, 0x31016000 +; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-GISEL-NEXT: v_cvt_f32_f64_e32 v0, s[8:9] +; GFX11-GISEL-NEXT: v_cvt_f32_f64_e32 v1, s[10:11] +; GFX11-GISEL-NEXT: v_cvt_f32_f64_e32 v2, s[12:13] +; GFX11-GISEL-NEXT: buffer_store_b96 v[0:2], off, s[0:3], 0 +; GFX11-GISEL-NEXT: s_endpgm + %result = fptrunc afn <3 x double> %in to <3 x float> + store <3 x float> %result, ptr addrspace(1) %out + ret void +} + define amdgpu_kernel void @fptrunc_v4f64_to_v4f32(ptr addrspace(1) %out, <4 x double> %in) { ; SI-LABEL: fptrunc_v4f64_to_v4f32: ; SI: ; %bb.0: @@ -876,6 +1437,113 @@ define amdgpu_kernel void @fptrunc_v4f64_to_v4f32(ptr addrspace(1) %out, <4 x do ret void } +define amdgpu_kernel void @fptrunc_v4f64_to_v4f32_afn(ptr addrspace(1) %out, <4 x double> %in) { +; SI-LABEL: fptrunc_v4f64_to_v4f32_afn: +; SI: ; %bb.0: +; SI-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0x11 +; SI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9 +; SI-NEXT: s_mov_b32 s3, 0xf000 +; SI-NEXT: s_mov_b32 s2, -1 +; SI-NEXT: s_waitcnt lgkmcnt(0) +; SI-NEXT: v_cvt_f32_f64_e32 v3, s[14:15] +; SI-NEXT: v_cvt_f32_f64_e32 v2, s[12:13] +; SI-NEXT: v_cvt_f32_f64_e32 v1, s[10:11] +; SI-NEXT: v_cvt_f32_f64_e32 v0, s[8:9] +; SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 +; SI-NEXT: s_endpgm +; +; VI-SDAG-LABEL: fptrunc_v4f64_to_v4f32_afn: +; VI-SDAG: ; %bb.0: +; VI-SDAG-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0x44 +; VI-SDAG-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 +; VI-SDAG-NEXT: s_mov_b32 s3, 0xf000 +; VI-SDAG-NEXT: s_mov_b32 s2, -1 +; VI-SDAG-NEXT: s_waitcnt lgkmcnt(0) +; VI-SDAG-NEXT: v_cvt_f32_f64_e32 v3, s[14:15] +; VI-SDAG-NEXT: v_cvt_f32_f64_e32 v2, s[12:13] +; VI-SDAG-NEXT: v_cvt_f32_f64_e32 v1, s[10:11] +; VI-SDAG-NEXT: v_cvt_f32_f64_e32 v0, s[8:9] +; VI-SDAG-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 +; VI-SDAG-NEXT: s_endpgm +; +; VI-GISEL-LABEL: fptrunc_v4f64_to_v4f32_afn: +; VI-GISEL: ; %bb.0: +; VI-GISEL-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0x44 +; VI-GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 +; VI-GISEL-NEXT: s_mov_b32 s2, -1 +; VI-GISEL-NEXT: s_mov_b32 s3, 0xf000 +; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0) +; VI-GISEL-NEXT: v_cvt_f32_f64_e32 v0, s[8:9] +; VI-GISEL-NEXT: v_cvt_f32_f64_e32 v1, s[10:11] +; VI-GISEL-NEXT: v_cvt_f32_f64_e32 v2, s[12:13] +; VI-GISEL-NEXT: v_cvt_f32_f64_e32 v3, s[14:15] +; VI-GISEL-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 +; VI-GISEL-NEXT: s_endpgm +; +; GFX10-SDAG-LABEL: fptrunc_v4f64_to_v4f32_afn: +; GFX10-SDAG: ; %bb.0: +; GFX10-SDAG-NEXT: s_clause 0x1 +; GFX10-SDAG-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0x44 +; GFX10-SDAG-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 +; GFX10-SDAG-NEXT: s_mov_b32 s3, 0x31016000 +; GFX10-SDAG-NEXT: s_mov_b32 s2, -1 +; GFX10-SDAG-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-SDAG-NEXT: v_cvt_f32_f64_e32 v3, s[14:15] +; GFX10-SDAG-NEXT: v_cvt_f32_f64_e32 v2, s[12:13] +; GFX10-SDAG-NEXT: v_cvt_f32_f64_e32 v1, s[10:11] +; GFX10-SDAG-NEXT: v_cvt_f32_f64_e32 v0, s[8:9] +; GFX10-SDAG-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 +; GFX10-SDAG-NEXT: s_endpgm +; +; GFX10-GISEL-LABEL: fptrunc_v4f64_to_v4f32_afn: +; GFX10-GISEL: ; %bb.0: +; GFX10-GISEL-NEXT: s_clause 0x1 +; GFX10-GISEL-NEXT: s_load_dwordx8 s[8:15], s[4:5], 0x44 +; GFX10-GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 +; GFX10-GISEL-NEXT: s_mov_b32 s2, -1 +; GFX10-GISEL-NEXT: s_mov_b32 s3, 0x31016000 +; GFX10-GISEL-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-GISEL-NEXT: v_cvt_f32_f64_e32 v0, s[8:9] +; GFX10-GISEL-NEXT: v_cvt_f32_f64_e32 v1, s[10:11] +; GFX10-GISEL-NEXT: v_cvt_f32_f64_e32 v2, s[12:13] +; GFX10-GISEL-NEXT: v_cvt_f32_f64_e32 v3, s[14:15] +; GFX10-GISEL-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 +; GFX10-GISEL-NEXT: s_endpgm +; +; GFX11-SDAG-LABEL: fptrunc_v4f64_to_v4f32_afn: +; GFX11-SDAG: ; %bb.0: +; GFX11-SDAG-NEXT: s_clause 0x1 +; GFX11-SDAG-NEXT: s_load_b256 s[8:15], s[4:5], 0x44 +; GFX11-SDAG-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 +; GFX11-SDAG-NEXT: s_mov_b32 s3, 0x31016000 +; GFX11-SDAG-NEXT: s_mov_b32 s2, -1 +; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-SDAG-NEXT: v_cvt_f32_f64_e32 v3, s[14:15] +; GFX11-SDAG-NEXT: v_cvt_f32_f64_e32 v2, s[12:13] +; GFX11-SDAG-NEXT: v_cvt_f32_f64_e32 v1, s[10:11] +; GFX11-SDAG-NEXT: v_cvt_f32_f64_e32 v0, s[8:9] +; GFX11-SDAG-NEXT: buffer_store_b128 v[0:3], off, s[0:3], 0 +; GFX11-SDAG-NEXT: s_endpgm +; +; GFX11-GISEL-LABEL: fptrunc_v4f64_to_v4f32_afn: +; GFX11-GISEL: ; %bb.0: +; GFX11-GISEL-NEXT: s_clause 0x1 +; GFX11-GISEL-NEXT: s_load_b256 s[8:15], s[4:5], 0x44 +; GFX11-GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 +; GFX11-GISEL-NEXT: s_mov_b32 s2, -1 +; GFX11-GISEL-NEXT: s_mov_b32 s3, 0x31016000 +; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-GISEL-NEXT: v_cvt_f32_f64_e32 v0, s[8:9] +; GFX11-GISEL-NEXT: v_cvt_f32_f64_e32 v1, s[10:11] +; GFX11-GISEL-NEXT: v_cvt_f32_f64_e32 v2, s[12:13] +; GFX11-GISEL-NEXT: v_cvt_f32_f64_e32 v3, s[14:15] +; GFX11-GISEL-NEXT: buffer_store_b128 v[0:3], off, s[0:3], 0 +; GFX11-GISEL-NEXT: s_endpgm + %result = fptrunc afn <4 x double> %in to <4 x float> + store <4 x float> %result, ptr addrspace(1) %out + ret void +} + define amdgpu_kernel void @fptrunc_v8f64_to_v8f32(ptr addrspace(1) %out, <8 x double> %in) { ; SI-LABEL: fptrunc_v8f64_to_v8f32: ; SI: ; %bb.0: @@ -1019,3 +1687,150 @@ define amdgpu_kernel void @fptrunc_v8f64_to_v8f32(ptr addrspace(1) %out, <8 x do store <8 x float> %result, ptr addrspace(1) %out ret void } + +define amdgpu_kernel void @fptrunc_v8f64_to_v8f32_afn(ptr addrspace(1) %out, <8 x double> %in) { +; SI-LABEL: fptrunc_v8f64_to_v8f32_afn: +; SI: ; %bb.0: +; SI-NEXT: s_load_dwordx16 s[8:23], s[4:5], 0x19 +; SI-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x9 +; SI-NEXT: s_mov_b32 s3, 0xf000 +; SI-NEXT: s_mov_b32 s2, -1 +; SI-NEXT: s_waitcnt lgkmcnt(0) +; SI-NEXT: v_cvt_f32_f64_e32 v3, s[14:15] +; SI-NEXT: v_cvt_f32_f64_e32 v2, s[12:13] +; SI-NEXT: v_cvt_f32_f64_e32 v1, s[10:11] +; SI-NEXT: v_cvt_f32_f64_e32 v0, s[8:9] +; SI-NEXT: v_cvt_f32_f64_e32 v7, s[22:23] +; SI-NEXT: v_cvt_f32_f64_e32 v6, s[20:21] +; SI-NEXT: v_cvt_f32_f64_e32 v5, s[18:19] +; SI-NEXT: v_cvt_f32_f64_e32 v4, s[16:17] +; SI-NEXT: buffer_store_dwordx4 v[4:7], off, s[0:3], 0 offset:16 +; SI-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 +; SI-NEXT: s_endpgm +; +; VI-SDAG-LABEL: fptrunc_v8f64_to_v8f32_afn: +; VI-SDAG: ; %bb.0: +; VI-SDAG-NEXT: s_load_dwordx16 s[8:23], s[4:5], 0x64 +; VI-SDAG-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 +; VI-SDAG-NEXT: s_mov_b32 s3, 0xf000 +; VI-SDAG-NEXT: s_mov_b32 s2, -1 +; VI-SDAG-NEXT: s_waitcnt lgkmcnt(0) +; VI-SDAG-NEXT: v_cvt_f32_f64_e32 v7, s[22:23] +; VI-SDAG-NEXT: v_cvt_f32_f64_e32 v6, s[20:21] +; VI-SDAG-NEXT: v_cvt_f32_f64_e32 v5, s[18:19] +; VI-SDAG-NEXT: v_cvt_f32_f64_e32 v4, s[16:17] +; VI-SDAG-NEXT: v_cvt_f32_f64_e32 v3, s[14:15] +; VI-SDAG-NEXT: v_cvt_f32_f64_e32 v2, s[12:13] +; VI-SDAG-NEXT: v_cvt_f32_f64_e32 v1, s[10:11] +; VI-SDAG-NEXT: v_cvt_f32_f64_e32 v0, s[8:9] +; VI-SDAG-NEXT: buffer_store_dwordx4 v[4:7], off, s[0:3], 0 offset:16 +; VI-SDAG-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 +; VI-SDAG-NEXT: s_endpgm +; +; VI-GISEL-LABEL: fptrunc_v8f64_to_v8f32_afn: +; VI-GISEL: ; %bb.0: +; VI-GISEL-NEXT: s_load_dwordx16 s[8:23], s[4:5], 0x64 +; VI-GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 +; VI-GISEL-NEXT: s_mov_b32 s2, -1 +; VI-GISEL-NEXT: s_mov_b32 s3, 0xf000 +; VI-GISEL-NEXT: s_waitcnt lgkmcnt(0) +; VI-GISEL-NEXT: v_cvt_f32_f64_e32 v0, s[8:9] +; VI-GISEL-NEXT: v_cvt_f32_f64_e32 v1, s[10:11] +; VI-GISEL-NEXT: v_cvt_f32_f64_e32 v2, s[12:13] +; VI-GISEL-NEXT: v_cvt_f32_f64_e32 v3, s[14:15] +; VI-GISEL-NEXT: v_cvt_f32_f64_e32 v4, s[16:17] +; VI-GISEL-NEXT: v_cvt_f32_f64_e32 v5, s[18:19] +; VI-GISEL-NEXT: v_cvt_f32_f64_e32 v6, s[20:21] +; VI-GISEL-NEXT: v_cvt_f32_f64_e32 v7, s[22:23] +; VI-GISEL-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 +; VI-GISEL-NEXT: buffer_store_dwordx4 v[4:7], off, s[0:3], 0 offset:16 +; VI-GISEL-NEXT: s_endpgm +; +; GFX10-SDAG-LABEL: fptrunc_v8f64_to_v8f32_afn: +; GFX10-SDAG: ; %bb.0: +; GFX10-SDAG-NEXT: s_clause 0x1 +; GFX10-SDAG-NEXT: s_load_dwordx16 s[8:23], s[4:5], 0x64 +; GFX10-SDAG-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 +; GFX10-SDAG-NEXT: s_mov_b32 s3, 0x31016000 +; GFX10-SDAG-NEXT: s_mov_b32 s2, -1 +; GFX10-SDAG-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-SDAG-NEXT: v_cvt_f32_f64_e32 v7, s[22:23] +; GFX10-SDAG-NEXT: v_cvt_f32_f64_e32 v6, s[20:21] +; GFX10-SDAG-NEXT: v_cvt_f32_f64_e32 v5, s[18:19] +; GFX10-SDAG-NEXT: v_cvt_f32_f64_e32 v4, s[16:17] +; GFX10-SDAG-NEXT: v_cvt_f32_f64_e32 v3, s[14:15] +; GFX10-SDAG-NEXT: v_cvt_f32_f64_e32 v2, s[12:13] +; GFX10-SDAG-NEXT: v_cvt_f32_f64_e32 v1, s[10:11] +; GFX10-SDAG-NEXT: v_cvt_f32_f64_e32 v0, s[8:9] +; GFX10-SDAG-NEXT: buffer_store_dwordx4 v[4:7], off, s[0:3], 0 offset:16 +; GFX10-SDAG-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 +; GFX10-SDAG-NEXT: s_endpgm +; +; GFX10-GISEL-LABEL: fptrunc_v8f64_to_v8f32_afn: +; GFX10-GISEL: ; %bb.0: +; GFX10-GISEL-NEXT: s_clause 0x1 +; GFX10-GISEL-NEXT: s_load_dwordx16 s[8:23], s[4:5], 0x64 +; GFX10-GISEL-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x24 +; GFX10-GISEL-NEXT: s_mov_b32 s2, -1 +; GFX10-GISEL-NEXT: s_mov_b32 s3, 0x31016000 +; GFX10-GISEL-NEXT: s_waitcnt lgkmcnt(0) +; GFX10-GISEL-NEXT: v_cvt_f32_f64_e32 v0, s[8:9] +; GFX10-GISEL-NEXT: v_cvt_f32_f64_e32 v1, s[10:11] +; GFX10-GISEL-NEXT: v_cvt_f32_f64_e32 v2, s[12:13] +; GFX10-GISEL-NEXT: v_cvt_f32_f64_e32 v3, s[14:15] +; GFX10-GISEL-NEXT: v_cvt_f32_f64_e32 v4, s[16:17] +; GFX10-GISEL-NEXT: v_cvt_f32_f64_e32 v5, s[18:19] +; GFX10-GISEL-NEXT: v_cvt_f32_f64_e32 v6, s[20:21] +; GFX10-GISEL-NEXT: v_cvt_f32_f64_e32 v7, s[22:23] +; GFX10-GISEL-NEXT: buffer_store_dwordx4 v[0:3], off, s[0:3], 0 +; GFX10-GISEL-NEXT: buffer_store_dwordx4 v[4:7], off, s[0:3], 0 offset:16 +; GFX10-GISEL-NEXT: s_endpgm +; +; GFX11-SDAG-LABEL: fptrunc_v8f64_to_v8f32_afn: +; GFX11-SDAG: ; %bb.0: +; GFX11-SDAG-NEXT: s_clause 0x1 +; GFX11-SDAG-NEXT: s_load_b512 s[8:23], s[4:5], 0x64 +; GFX11-SDAG-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 +; GFX11-SDAG-NEXT: s_mov_b32 s3, 0x31016000 +; GFX11-SDAG-NEXT: s_mov_b32 s2, -1 +; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-SDAG-NEXT: v_cvt_f32_f64_e32 v7, s[22:23] +; GFX11-SDAG-NEXT: v_cvt_f32_f64_e32 v6, s[20:21] +; GFX11-SDAG-NEXT: v_cvt_f32_f64_e32 v5, s[18:19] +; GFX11-SDAG-NEXT: v_cvt_f32_f64_e32 v4, s[16:17] +; GFX11-SDAG-NEXT: v_cvt_f32_f64_e32 v3, s[14:15] +; GFX11-SDAG-NEXT: v_cvt_f32_f64_e32 v2, s[12:13] +; GFX11-SDAG-NEXT: v_cvt_f32_f64_e32 v1, s[10:11] +; GFX11-SDAG-NEXT: v_cvt_f32_f64_e32 v0, s[8:9] +; GFX11-SDAG-NEXT: s_clause 0x1 +; GFX11-SDAG-NEXT: buffer_store_b128 v[4:7], off, s[0:3], 0 offset:16 +; GFX11-SDAG-NEXT: buffer_store_b128 v[0:3], off, s[0:3], 0 +; GFX11-SDAG-NEXT: s_endpgm +; +; GFX11-GISEL-LABEL: fptrunc_v8f64_to_v8f32_afn: +; GFX11-GISEL: ; %bb.0: +; GFX11-GISEL-NEXT: s_clause 0x1 +; GFX11-GISEL-NEXT: s_load_b512 s[8:23], s[4:5], 0x64 +; GFX11-GISEL-NEXT: s_load_b64 s[0:1], s[4:5], 0x24 +; GFX11-GISEL-NEXT: s_mov_b32 s2, -1 +; GFX11-GISEL-NEXT: s_mov_b32 s3, 0x31016000 +; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0) +; GFX11-GISEL-NEXT: v_cvt_f32_f64_e32 v0, s[8:9] +; GFX11-GISEL-NEXT: v_cvt_f32_f64_e32 v1, s[10:11] +; GFX11-GISEL-NEXT: v_cvt_f32_f64_e32 v2, s[12:13] +; GFX11-GISEL-NEXT: v_cvt_f32_f64_e32 v3, s[14:15] +; GFX11-GISEL-NEXT: v_cvt_f32_f64_e32 v4, s[16:17] +; GFX11-GISEL-NEXT: v_cvt_f32_f64_e32 v5, s[18:19] +; GFX11-GISEL-NEXT: v_cvt_f32_f64_e32 v6, s[20:21] +; GFX11-GISEL-NEXT: v_cvt_f32_f64_e32 v7, s[22:23] +; GFX11-GISEL-NEXT: s_clause 0x1 +; GFX11-GISEL-NEXT: buffer_store_b128 v[0:3], off, s[0:3], 0 +; GFX11-GISEL-NEXT: buffer_store_b128 v[4:7], off, s[0:3], 0 offset:16 +; GFX11-GISEL-NEXT: s_endpgm + %result = fptrunc <8 x double> %in to <8 x float> + store <8 x float> %result, ptr addrspace(1) %out + ret void +} +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; GFX10-SAFE-GISEL: {{.*}} +; VI-SAFE-GISEL: {{.*}} diff --git a/llvm/test/CodeGen/AMDGPU/gfx1250-no-scope-cu-stores.ll b/llvm/test/CodeGen/AMDGPU/gfx1250-no-scope-cu-stores.ll new file mode 100644 index 0000000..d13d76f --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/gfx1250-no-scope-cu-stores.ll @@ -0,0 +1,100 @@ +; RUN: llc -mtriple=amdgcn-amd-amdhsa -O3 -mcpu=gfx1250 < %s | FileCheck --check-prefixes=GCN,CU %s +; RUN: llc -mtriple=amdgcn-amd-amdhsa -O3 -mcpu=gfx1250 -mattr=-cu-stores < %s | FileCheck --check-prefixes=GCN,NOCU %s + +; Check that if -cu-stores is used, we use SCOPE_SE minimum on all stores. + +; GCN: flat_store: +; CU: flat_store_b32 v{{.*}}, v{{.*}}, s{{.*}} scope:SCOPE_SE +; NOCU: flat_store_b32 v{{.*}}, v{{.*}}, s{{.*}} scope:SCOPE_SE +; GCN: .amdhsa_kernel flat_store +; CU: .amdhsa_uses_cu_stores 1 +; NOCU: .amdhsa_uses_cu_stores 0 +define amdgpu_kernel void @flat_store(ptr %dst, i32 %val) { +entry: + store i32 %val, ptr %dst + ret void +} + +; GCN: global_store: +; CU: global_store_b32 v{{.*}}, v{{.*}}, s{{.*}}{{$}} +; NOCU: global_store_b32 v{{.*}}, v{{.*}}, s{{.*}} scope:SCOPE_SE +; GCN: .amdhsa_kernel global_store +; CU: .amdhsa_uses_cu_stores 1 +; NOCU: .amdhsa_uses_cu_stores 0 +define amdgpu_kernel void @global_store(ptr addrspace(1) %dst, i32 %val) { +entry: + store i32 %val, ptr addrspace(1) %dst + ret void +} + +; GCN: local_store: +; CU: ds_store_b32 v{{.*}}, v{{.*}}{{$}} +; NOCU: ds_store_b32 v{{.*}}, v{{.*}}{{$}} +; GCN: .amdhsa_kernel local_store +; CU: .amdhsa_uses_cu_stores 1 +; NOCU: .amdhsa_uses_cu_stores 0 +define amdgpu_kernel void @local_store(ptr addrspace(3) %dst, i32 %val) { +entry: + store i32 %val, ptr addrspace(3) %dst + ret void +} + +; GCN: scratch_store: +; CU: scratch_store_b32 off, v{{.*}}, s{{.*}} scope:SCOPE_SE +; NOCU: scratch_store_b32 off, v{{.*}}, s{{.*}} scope:SCOPE_SE +; GCN: .amdhsa_kernel scratch_store +; CU: .amdhsa_uses_cu_stores 1 +; NOCU: .amdhsa_uses_cu_stores 0 +define amdgpu_kernel void @scratch_store(ptr addrspace(5) %dst, i32 %val) { +entry: + store i32 %val, ptr addrspace(5) %dst + ret void +} + +; GCN: flat_atomic_store: +; CU: flat_store_b32 v{{.*}}, v{{.*}}, s{{.*}} scope:SCOPE_SE +; NOCU: flat_store_b32 v{{.*}}, v{{.*}}, s{{.*}} scope:SCOPE_SE +; GCN: .amdhsa_kernel flat_atomic_store +; CU: .amdhsa_uses_cu_stores 1 +; NOCU: .amdhsa_uses_cu_stores 0 +define amdgpu_kernel void @flat_atomic_store(ptr %dst, i32 %val) { +entry: + store atomic i32 %val, ptr %dst syncscope("wavefront") unordered, align 4 + ret void +} + +; GCN: global_atomic_store: +; CU: global_store_b32 v{{.*}}, v{{.*}}, s{{.*}}{{$}} +; NOCU: global_store_b32 v{{.*}}, v{{.*}}, s{{.*}} scope:SCOPE_SE +; GCN: .amdhsa_kernel global_atomic_store +; CU: .amdhsa_uses_cu_stores 1 +; NOCU: .amdhsa_uses_cu_stores 0 +define amdgpu_kernel void @global_atomic_store(ptr addrspace(1) %dst, i32 %val) { +entry: + store atomic i32 %val, ptr addrspace(1) %dst syncscope("wavefront") unordered, align 4 + ret void +} + +; GCN: local_atomic_store: +; CU: ds_store_b32 v{{.*}}, v{{.*}}{{$}} +; NOCU: ds_store_b32 v{{.*}}, v{{.*}}{{$}} +; GCN: .amdhsa_kernel local_atomic_store +; CU: .amdhsa_uses_cu_stores 1 +; NOCU: .amdhsa_uses_cu_stores 0 +define amdgpu_kernel void @local_atomic_store(ptr addrspace(3) %dst, i32 %val) { +entry: + store atomic i32 %val, ptr addrspace(3) %dst syncscope("wavefront") unordered, align 4 + ret void +} + +; GCN: scratch_atomic_store: +; CU: scratch_store_b32 off, v{{.*}}, s{{.*}} scope:SCOPE_SE +; NOCU: scratch_store_b32 off, v{{.*}}, s{{.*}} scope:SCOPE_SE +; GCN: .amdhsa_kernel scratch_atomic_store +; CU: .amdhsa_uses_cu_stores 1 +; NOCU: .amdhsa_uses_cu_stores 0 +define amdgpu_kernel void @scratch_atomic_store(ptr addrspace(5) %dst, i32 %val) { +entry: + store atomic i32 %val, ptr addrspace(5) %dst syncscope("wavefront") unordered, align 4 + ret void +} diff --git a/llvm/test/CodeGen/ARM/fp16.ll b/llvm/test/CodeGen/ARM/fp16.ll index dc35fa3..9ff7010 100644 --- a/llvm/test/CodeGen/ARM/fp16.ll +++ b/llvm/test/CodeGen/ARM/fp16.ll @@ -86,8 +86,8 @@ define i16 @test_to_fp16(double %in) { ; CHECK-FP16-SAFE: bl __aeabi_d2h -; CHECK-FP16-UNSAFE: vcvt.f32.f64 s0, d0 -; CHECK-FP16-UNSAFE-NEXT: vcvtb.f16.f32 s0, s0 +; CHECK-FP16-UNSAFE: vmov r0, r1, d0 +; CHECK-FP16-UNSAFE-NEXT: bl __aeabi_d2h ; CHECK-ARMV8: vcvtb.f16.f64 [[TMP:s[0-9]+]], d0 ; CHECK-ARMV8: vmov r0, [[TMP]] diff --git a/llvm/test/CodeGen/BPF/BTF/map-def-2.ll b/llvm/test/CodeGen/BPF/BTF/map-def-2.ll index 5f971ec..d4c836f 100644 --- a/llvm/test/CodeGen/BPF/BTF/map-def-2.ll +++ b/llvm/test/CodeGen/BPF/BTF/map-def-2.ll @@ -1,5 +1,6 @@ -; RUN: llc -mtriple=bpfel -filetype=asm -o - %s | FileCheck -check-prefixes=CHECK %s -; RUN: llc -mtriple=bpfeb -filetype=asm -o - %s | FileCheck -check-prefixes=CHECK %s +; RUN: llc -mtriple=bpfel -mcpu=v3 -filetype=obj -o %t1 %s +; RUN: llvm-objcopy --dump-section='.BTF'=%t2 %t1 +; RUN: %python %p/print_btf.py %t2 | FileCheck -check-prefixes=CHECK-BTF %s ; ; Source code: ; struct key_type { @@ -18,51 +19,17 @@ @hash_map = dso_local local_unnamed_addr global %struct.map_type zeroinitializer, section ".maps", align 8, !dbg !0 -; CHECK: .long 0 # BTF_KIND_PTR(id = 1) -; CHECK-NEXT: .long 33554432 # 0x2000000 -; CHECK-NEXT: .long 2 -; CHECK-NEXT: .long 1 # BTF_KIND_STRUCT(id = 2) -; CHECK-NEXT: .long 67108865 # 0x4000001 -; CHECK-NEXT: .long 4 -; CHECK-NEXT: .long 10 -; CHECK-NEXT: .long 3 -; CHECK-NEXT: .long 0 # 0x0 -; CHECK-NEXT: .long 13 # BTF_KIND_INT(id = 3) -; CHECK-NEXT: .long 16777216 # 0x1000000 -; CHECK-NEXT: .long 4 -; CHECK-NEXT: .long 16777248 # 0x1000020 -; CHECK-NEXT: .long 17 # BTF_KIND_TYPEDEF(id = 4) -; CHECK-NEXT: .long 134217728 # 0x8000000 -; CHECK-NEXT: .long 5 -; CHECK-NEXT: .long 28 # BTF_KIND_TYPEDEF(id = 5) -; CHECK-NEXT: .long 134217728 # 0x8000000 -; CHECK-NEXT: .long 6 -; CHECK-NEXT: .long 38 # BTF_KIND_STRUCT(id = 6) -; CHECK-NEXT: .long 67108865 # 0x4000001 -; CHECK-NEXT: .long 8 -; CHECK-NEXT: .long 47 -; CHECK-NEXT: .long 1 -; CHECK-NEXT: .long 0 # 0x0 -; CHECK-NEXT: .long 51 # BTF_KIND_VAR(id = 7) -; CHECK-NEXT: .long 234881024 # 0xe000000 -; CHECK-NEXT: .long 4 -; CHECK-NEXT: .long 1 -; CHECK-NEXT: .long 60 # BTF_KIND_DATASEC(id = 8) -; CHECK-NEXT: .long 251658241 # 0xf000001 -; CHECK-NEXT: .long 0 -; CHECK-NEXT: .long 7 -; CHECK-NEXT: .long hash_map -; CHECK-NEXT: .long 8 - -; CHECK: .ascii "key_type" # string offset=1 -; CHECK: .ascii "a1" # string offset=10 -; CHECK: .ascii "int" # string offset=13 -; CHECK: .ascii "__map_type" # string offset=17 -; CHECK: .ascii "_map_type" # string offset=28 -; CHECK: .ascii "map_type" # string offset=38 -; CHECK: .ascii "key" # string offset=47 -; CHECK: .ascii "hash_map" # string offset=51 -; CHECK: .ascii ".maps" # string offset=60 +; CHECK-BTF: [1] PTR '(anon)' type_id=2 +; CHECK-BTF-NEXT: [2] STRUCT 'key_type' size=4 vlen=1 +; CHECK-BTF-NEXT: 'a1' type_id=3 bits_offset=0 +; CHECK-BTF-NEXT: [3] INT 'int' size=4 bits_offset=0 nr_bits=32 encoding=SIGNED +; CHECK-BTF-NEXT: [4] STRUCT 'map_type' size=8 vlen=1 +; CHECK-BTF-NEXT: 'key' type_id=1 bits_offset=0 +; CHECK-BTF-NEXT: [5] TYPEDEF '_map_type' type_id=4 +; CHECK-BTF-NEXT: [6] TYPEDEF '__map_type' type_id=5 +; CHECK-BTF-NEXT: [7] VAR 'hash_map' type_id=6, linkage=global +; CHECK-BTF-NEXT: [8] DATASEC '.maps' size=0 vlen=1 +; CHECK-BTF-NEXT: type_id=7 offset=0 size=8 !llvm.dbg.cu = !{!2} !llvm.module.flags = !{!16, !17, !18} diff --git a/llvm/test/CodeGen/BPF/BTF/map-def-3.ll b/llvm/test/CodeGen/BPF/BTF/map-def-3.ll index 6aa8af9..1d95f03 100644 --- a/llvm/test/CodeGen/BPF/BTF/map-def-3.ll +++ b/llvm/test/CodeGen/BPF/BTF/map-def-3.ll @@ -1,5 +1,6 @@ -; RUN: llc -mtriple=bpfel -filetype=asm -o - %s | FileCheck -check-prefixes=CHECK %s -; RUN: llc -mtriple=bpfeb -filetype=asm -o - %s | FileCheck -check-prefixes=CHECK %s +; RUN: llc -mtriple=bpfel -mcpu=v3 -filetype=obj -o %t1 %s +; RUN: llvm-objcopy --dump-section='.BTF'=%t2 %t1 +; RUN: %python %p/print_btf.py %t2 | FileCheck -check-prefixes=CHECK-BTF %s ; ; Source code: ; struct key_type { @@ -13,36 +14,13 @@ @hash_map = dso_local local_unnamed_addr constant %struct.key_type zeroinitializer, section ".maps", align 4, !dbg !0 -; CHECK: .long 1 # BTF_KIND_INT(id = 1) -; CHECK-NEXT: .long 16777216 # 0x1000000 -; CHECK-NEXT: .long 4 -; CHECK-NEXT: .long 16777248 # 0x1000020 -; CHECK-NEXT: .long 0 # BTF_KIND_CONST(id = 2) -; CHECK-NEXT: .long 167772160 # 0xa000000 -; CHECK-NEXT: .long 3 -; CHECK-NEXT: .long 5 # BTF_KIND_STRUCT(id = 3) -; CHECK-NEXT: .long 67108865 # 0x4000001 -; CHECK-NEXT: .long 4 -; CHECK-NEXT: .long 14 -; CHECK-NEXT: .long 1 -; CHECK-NEXT: .long 0 # 0x0 -; CHECK-NEXT: .long 17 # BTF_KIND_VAR(id = 4) -; CHECK-NEXT: .long 234881024 # 0xe000000 -; CHECK-NEXT: .long 2 -; CHECK-NEXT: .long 1 -; CHECK-NEXT: .long 26 # BTF_KIND_DATASEC(id = 5) -; CHECK-NEXT: .long 251658241 # 0xf000001 -; CHECK-NEXT: .long 0 -; CHECK-NEXT: .long 4 -; CHECK-NEXT: .long hash_map -; CHECK-NEXT: .long 4 - -; CHECK: .ascii "int" # string offset=1 -; CHECK: .ascii "key_type" # string offset=5 -; CHECK: .ascii "a1" # string offset=14 -; CHECK: .ascii "hash_map" # string offset=17 -; CHECK: .ascii ".maps" # string offset=26 - +; CHECK-BTF: [1] INT 'int' size=4 bits_offset=0 nr_bits=32 encoding=SIGNED +; CHECK-BTF-NEXT: [2] STRUCT 'key_type' size=4 vlen=1 +; CHECK-BTF-NEXT: 'a1' type_id=1 bits_offset=0 +; CHECK-BTF-NEXT: [3] CONST '(anon)' type_id=2 +; CHECK-BTF-NEXT: [4] VAR 'hash_map' type_id=3, linkage=global +; CHECK-BTF-NEXT: [5] DATASEC '.maps' size=0 vlen=1 +; CHECK-BTF-NEXT: type_id=4 offset=0 size=4 !llvm.dbg.cu = !{!2} !llvm.module.flags = !{!11, !12, !13} diff --git a/llvm/test/CodeGen/BPF/BTF/map-def-nested-array.ll b/llvm/test/CodeGen/BPF/BTF/map-def-nested-array.ll new file mode 100644 index 0000000..fc95daf --- /dev/null +++ b/llvm/test/CodeGen/BPF/BTF/map-def-nested-array.ll @@ -0,0 +1,75 @@ +; RUN: llc -mtriple=bpfel -mcpu=v3 -filetype=obj -o %t1 %s +; RUN: llvm-objcopy --dump-section='.BTF'=%t2 %t1 +; RUN: %python %p/print_btf.py %t2 | FileCheck -check-prefixes=CHECK-BTF-SHORT %s +; RUN: %python %p/print_btf.py %t2 | FileCheck -check-prefixes=CHECK-BTF %s +; Source: +; struct nested_value_type { +; int a1; +; }; +; struct map_type { +; struct { +; struct nested_value_type *value; +; } *values[]; +; }; +; Compilation flags: +; clang -target bpf -g -O2 -S -emit-llvm prog.c + +; ModuleID = 'prog.c' +source_filename = "prog.c" +target datalayout = "e-m:e-p:64:64-i64:64-i128:128-n32:64-S128" +target triple = "bpf" + +%struct.map_type = type { [0 x ptr] } + +@array_of_maps = dso_local local_unnamed_addr global %struct.map_type zeroinitializer, section ".maps", align 8, !dbg !0 + +; We expect no forward declarations. +; +; CHECK-BTF-SHORT-NOT: FWD + +; Assert the whole BTF. +; +; CHECK-BTF: [1] PTR '(anon)' type_id=2 +; CHECK-BTF-NEXT: [2] STRUCT 'nested_value_type' size=4 vlen=1 +; CHECK-BTF-NEXT: 'a1' type_id=3 bits_offset=0 +; CHECK-BTF-NEXT: [3] INT 'int' size=4 bits_offset=0 nr_bits=32 encoding=SIGNED +; CHECK-BTF-NEXT: [4] STRUCT '(anon)' size=8 vlen=1 +; CHECK-BTF-NEXT: 'value' type_id=1 bits_offset=0 +; CHECK-BTF-NEXT: [5] PTR '(anon)' type_id=4 +; CHECK-BTF-NEXT: [6] ARRAY '(anon)' type_id=5 index_type_id=7 nr_elems=0 +; CHECK-BTF-NEXT: [7] INT '__ARRAY_SIZE_TYPE__' size=4 bits_offset=0 nr_bits=32 encoding=(none) +; CHECK-BTF-NEXT: [8] STRUCT 'map_type' size=0 vlen=1 +; CHECK-BTF-NEXT: 'values' type_id=6 bits_offset=0 +; CHECK-BTF-NEXT: [9] VAR 'array_of_maps' type_id=8, linkage=global +; CHECK-BTF-NEXT: [10] DATASEC '.maps' size=0 vlen=1 +; CHECK-BTF-NEXT: type_id=9 offset=0 size=0 + +!llvm.dbg.cu = !{!2} +!llvm.module.flags = !{!20, !21, !22, !23} +!llvm.ident = !{!24} + +!0 = !DIGlobalVariableExpression(var: !1, expr: !DIExpression()) +!1 = distinct !DIGlobalVariable(name: "array_of_maps", scope: !2, file: !3, line: 9, type: !5, isLocal: false, isDefinition: true) +!2 = distinct !DICompileUnit(language: DW_LANG_C11, file: !3, producer: "clang version 22.0.0git (git@github.com:llvm/llvm-project.git ed93eaa421b714028b85cc887d80c45991d7207f)", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, globals: !4, splitDebugInlining: false, nameTableKind: None) +!3 = !DIFile(filename: "prog.c", directory: "/home/mtardy/llvm-bug-repro", checksumkind: CSK_MD5, checksum: "9381d9e83e9c0b235a14704224815e96") +!4 = !{!0} +!5 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "map_type", file: !3, line: 4, elements: !6) +!6 = !{!7} +!7 = !DIDerivedType(tag: DW_TAG_member, name: "values", scope: !5, file: !3, line: 7, baseType: !8) +!8 = !DICompositeType(tag: DW_TAG_array_type, baseType: !9, elements: !18) +!9 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !10, size: 64) +!10 = distinct !DICompositeType(tag: DW_TAG_structure_type, scope: !5, file: !3, line: 5, size: 64, elements: !11) +!11 = !{!12} +!12 = !DIDerivedType(tag: DW_TAG_member, name: "value", scope: !10, file: !3, line: 6, baseType: !13, size: 64) +!13 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !14, size: 64) +!14 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "nested_value_type", file: !3, line: 1, size: 32, elements: !15) +!15 = !{!16} +!16 = !DIDerivedType(tag: DW_TAG_member, name: "a1", scope: !14, file: !3, line: 2, baseType: !17, size: 32) +!17 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) +!18 = !{!19} +!19 = !DISubrange(count: -1) +!20 = !{i32 7, !"Dwarf Version", i32 5} +!21 = !{i32 2, !"Debug Info Version", i32 3} +!22 = !{i32 1, !"wchar_size", i32 4} +!23 = !{i32 7, !"frame-pointer", i32 2} +!24 = !{!"clang version 22.0.0git (git@github.com:llvm/llvm-project.git ed93eaa421b714028b85cc887d80c45991d7207f)"} diff --git a/llvm/test/CodeGen/MIR/AMDGPU/noalias-addrspace-expect-id.mir b/llvm/test/CodeGen/MIR/AMDGPU/noalias-addrspace-expect-id.mir new file mode 100644 index 0000000..4179ff2 --- /dev/null +++ b/llvm/test/CodeGen/MIR/AMDGPU/noalias-addrspace-expect-id.mir @@ -0,0 +1,29 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5 +# RUN: not llc -mtriple=amdgcn -mcpu=gfx1200 -run-pass none -o /dev/null %s 2>&1 | FileCheck %s + +--- | + define void @expect_id(ptr %ptr, float %data) #0 { + %1 = atomicrmw fadd ptr %ptr, float %data syncscope("agent") seq_cst, align 4, !noalias.addrspace !0 + ret void + } + + attributes #0 = { "target-cpu"="gfx1200" } + + !0 = !{i32 5, i32 6} +... + +--- +name: expect_id + +body: | + bb.1 (%ir-block.0): + liveins: $vgpr0, $vgpr1, $vgpr2 + + ; CHECK: expected metadata id after '!' + %2:vgpr_32 = COPY $vgpr0 + %3:vgpr_32 = COPY $vgpr1 + %0:vreg_64 = REG_SEQUENCE %2, %subreg.sub0, %3, %subreg.sub1 + %1:vgpr_32 = COPY $vgpr2 + FLAT_ATOMIC_ADD_F32 %0, %1, 0, 0, implicit $exec, implicit $flat_scr :: (load store syncscope("agent") seq_cst (s32) on %ir.ptr, !noalias.addrspace !!) + S_ENDPGM 0 +... diff --git a/llvm/test/CodeGen/MIR/AMDGPU/noalias-addrspace-parse.mir b/llvm/test/CodeGen/MIR/AMDGPU/noalias-addrspace-parse.mir new file mode 100644 index 0000000..7fe6aa9 --- /dev/null +++ b/llvm/test/CodeGen/MIR/AMDGPU/noalias-addrspace-parse.mir @@ -0,0 +1,36 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5 +# RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -run-pass=none -o - %s | FileCheck %s + + +--- | + define void @test_parsing_printing(ptr %ptr, float %data) { + %1 = atomicrmw fadd ptr %ptr, float %data syncscope("agent") seq_cst, align 4, !noalias.addrspace !0 + ret void + } + + !0 = !{i32 5, i32 6} +... + +--- +name: test_parsing_printing + +body: | + bb.1 (%ir-block.0): + liveins: $vgpr0, $vgpr1, $vgpr2 + + ; CHECK-LABEL: name: test_parsing_printing + ; CHECK: liveins: $vgpr0, $vgpr1, $vgpr2 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0 + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1 + ; CHECK-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1 + ; CHECK-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2 + ; CHECK-NEXT: FLAT_ATOMIC_ADD_F32 [[REG_SEQUENCE]], [[COPY2]], 0, 0, implicit $exec, implicit $flat_scr :: (load store syncscope("agent") seq_cst (s32) on %ir.ptr, !noalias.addrspace !0) + ; CHECK-NEXT: S_ENDPGM 0 + %2:vgpr_32 = COPY $vgpr0 + %3:vgpr_32 = COPY $vgpr1 + %0:vreg_64 = REG_SEQUENCE %2, %subreg.sub0, %3, %subreg.sub1 + %1:vgpr_32 = COPY $vgpr2 + FLAT_ATOMIC_ADD_F32 %0, %1, 0, 0, implicit $exec, implicit $flat_scr :: (load store syncscope("agent") seq_cst (s32) on %ir.ptr, !noalias.addrspace !0) + S_ENDPGM 0 +... diff --git a/llvm/test/CodeGen/MIR/AMDGPU/noalias-addrspace-undefine-matadata.mir b/llvm/test/CodeGen/MIR/AMDGPU/noalias-addrspace-undefine-matadata.mir new file mode 100644 index 0000000..505b514 --- /dev/null +++ b/llvm/test/CodeGen/MIR/AMDGPU/noalias-addrspace-undefine-matadata.mir @@ -0,0 +1,28 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 5 +# RUN: not llc -mtriple=amdgcn -mcpu=gfx1200 -run-pass none -o /dev/null %s 2>&1 | FileCheck %s + + +--- | + define void @undefined_metadata(ptr %ptr, float %data) { + %1 = atomicrmw fadd ptr %ptr, float %data syncscope("agent") seq_cst, align 4, !noalias.addrspace !0 + ret void + } + + !0 = !{i32 5, i32 6} +... + +--- +name: undefined_metadata + +body: | + bb.1 (%ir-block.0): + liveins: $vgpr0, $vgpr1, $vgpr2 + + ; CHECK: use of undefined metadata '!3' + %2:vgpr_32 = COPY $vgpr0 + %3:vgpr_32 = COPY $vgpr1 + %0:vreg_64 = REG_SEQUENCE %2, %subreg.sub0, %3, %subreg.sub1 + %1:vgpr_32 = COPY $vgpr2 + FLAT_ATOMIC_ADD_F32 %0, %1, 0, 0, implicit $exec, implicit $flat_scr :: (load store syncscope("agent") seq_cst (s32) on %ir.ptr, !noalias.addrspace !3) + S_ENDPGM 0 +... diff --git a/llvm/test/CodeGen/MIR/X86/call-site-info-ambiguous-indirect-call-typeid.mir b/llvm/test/CodeGen/MIR/X86/call-site-info-ambiguous-indirect-call-typeid.mir new file mode 100644 index 0000000..cb78898 --- /dev/null +++ b/llvm/test/CodeGen/MIR/X86/call-site-info-ambiguous-indirect-call-typeid.mir @@ -0,0 +1,31 @@ +# Test MIR printer and parser to check if a call instruction with multiple +# callee types are handled correctly. + +# RUN: llc -mtriple=x86_64 --call-graph-section %s -run-pass=none -o - | FileCheck --match-full-lines %s +# CHECK: name: ambiguous_caller +# CHECK: callSites: +# CHECK-NEXT: - { bb: {{.*}}, offset: {{.*}}, fwdArgRegs: {{.*}}, calleeTypeIds: +# CHECK-NEXT: [ 1234, 5678 ] } + +--- | + define ptr @ambiguous_caller() { + entry: + %fn = alloca ptr, align 8 + %call1 = call ptr %fn(i64 4), !callee_type !0 + ret ptr %call1 + } + + !0 = !{!1, !2} + !1 = !{i64 0, !"callee_type0.generalized"} + !2 = !{i64 0, !"callee_type2.generalized"} +... +--- +name: ambiguous_caller +callSites: + - { bb: 0, offset: 1, fwdArgRegs: [], calleeTypeIds: [ 1234, 5678 ] } +body: | + bb.0.entry: + %0:gr64 = MOV32ri64 4 + CALL64r killed %0, csr_64, implicit $rsp, implicit $ssp, implicit $rdi, implicit-def $rsp, implicit-def $ssp, implicit-def $rax + RET 0, $rax +... diff --git a/llvm/test/CodeGen/MIR/X86/call-site-info-direct-calls-typeid.mir b/llvm/test/CodeGen/MIR/X86/call-site-info-direct-calls-typeid.mir new file mode 100644 index 0000000..faa021c --- /dev/null +++ b/llvm/test/CodeGen/MIR/X86/call-site-info-direct-calls-typeid.mir @@ -0,0 +1,54 @@ +# Test MIR printer and parser to NOT have `CalleeTypeIds` field in callSites. +# `CalleeTypeId` is used for propagating call site type identifiers for +# indirect targets only. This test does not contain any indirect targets. + +# RUN: llc -mtriple=x86_64 --call-graph-section %s -run-pass=none -o - | FileCheck --match-full-lines %s +# CHECK-NOT: calleeTypeIds +# CHECK: name: bar +# CHECK: callSites: +# CHECK-NEXT: - { bb: {{.*}}, offset: {{.*}}, fwdArgRegs: [] } +# CHECK-NEXT: - { bb: {{.*}}, offset: {{.*}}, fwdArgRegs: [] } +# CHECK: name: foo +# CHECK: callSites: +# CHECK-NEXT: - { bb: {{.*}}, offset: {{.*}}, fwdArgRegs: [] } + +--- | + declare i32 @fizz(i32, i32) + + declare i32 @buzz(i32, i32) + + define i32 @bar(i32 %x, i32 %y) !type !0 { + entry: + %call = call i32 @buzz(i32 %x, i32 %x) + %call1 = call i32 @fizz(i32 %x, i32 %x) + ret i32 0 + } + + define i32 @foo(i32 %x, i32 %y) !type !0 { + entry: + %call1 = call i32 @bar(i32 %x, i32 %x) + ret i32 0 + } + + !0 = !{i64 0, !"_ZTSFiiiE.generalized"} +... +--- +name: bar +callSites: + - { bb: 0, offset: 0, fwdArgRegs: [] } + - { bb: 0, offset: 1, fwdArgRegs: [] } +body: | + bb.0.entry: + CALL64pcrel32 target-flags(x86-plt) @buzz, csr_64, implicit $rsp, implicit $ssp, implicit $edi, implicit $esi, implicit-def $rsp, implicit-def $ssp, implicit-def $eax + CALL64pcrel32 target-flags(x86-plt) @fizz, csr_64, implicit $rsp, implicit $ssp, implicit $edi, implicit $esi, implicit-def $rsp, implicit-def $ssp, implicit-def $eax + +... +--- +name: foo +callSites: + - { bb: 0, offset: 0, fwdArgRegs: [] } +body: | + bb.0.entry: + CALL64pcrel32 target-flags(x86-plt) @bar, csr_64, implicit $rsp, implicit $ssp, implicit $edi, implicit $esi, implicit-def $rsp, implicit-def $ssp, implicit-def $eax + +... diff --git a/llvm/test/CodeGen/MIR/X86/call-site-info-typeid.mir b/llvm/test/CodeGen/MIR/X86/call-site-info-typeid.mir new file mode 100644 index 0000000..303b8fa --- /dev/null +++ b/llvm/test/CodeGen/MIR/X86/call-site-info-typeid.mir @@ -0,0 +1,28 @@ +# Test MIR printer and parser for type id field in callSites. It is used +# for propagating call site type identifiers to emit in the call graph section. + +# RUN: llc -mtriple=x86_64 --call-graph-section %s -run-pass=none -o - | FileCheck --match-full-lines %s +# CHECK: name: call_foo +# CHECK: callSites: +# CHECK-NEXT: - { bb: {{.*}}, offset: {{.*}}, fwdArgRegs: [], calleeTypeIds: +# CHECK-NEXT: [ 123456789 ] } + +--- | + define i32 @call_foo() { + entry: + %0 = load ptr, ptr null, align 8 + call void %0(i8 0), !callee_type !0 + ret i32 0 + } + + !0 = !{!1} + !1 = !{i64 0, !"_ZTSFvcE.generalized"} +... +--- +name: call_foo +callSites: + - { bb: 0, offset: 0, fwdArgRegs: [], calleeTypeIds: [ 123456789 ] } +body: | + bb.0.entry: + CALL64m $noreg, 1, $noreg, 0, $noreg, csr_64, implicit $rsp, implicit $ssp, implicit $edi, implicit-def $rsp, implicit-def $ssp :: (load (s64) from `ptr null`) +... diff --git a/llvm/test/CodeGen/NVPTX/aggregate-return.ll b/llvm/test/CodeGen/NVPTX/aggregate-return.ll index 7f52e52..abc873e 100644 --- a/llvm/test/CodeGen/NVPTX/aggregate-return.ll +++ b/llvm/test/CodeGen/NVPTX/aggregate-return.ll @@ -16,8 +16,8 @@ define void @test_v2f32(<2 x float> %input, ptr %output) { ; CHECK-NEXT: ld.param.b64 %rd1, [test_v2f32_param_0]; ; CHECK-NEXT: { // callseq 0, 0 ; CHECK-NEXT: .param .align 8 .b8 param0[8]; -; CHECK-NEXT: st.param.b64 [param0], %rd1; ; CHECK-NEXT: .param .align 8 .b8 retval0[8]; +; CHECK-NEXT: st.param.b64 [param0], %rd1; ; CHECK-NEXT: call.uni (retval0), barv, (param0); ; CHECK-NEXT: ld.param.b64 %rd2, [retval0]; ; CHECK-NEXT: } // callseq 0 @@ -32,24 +32,24 @@ define void @test_v2f32(<2 x float> %input, ptr %output) { define void @test_v3f32(<3 x float> %input, ptr %output) { ; CHECK-LABEL: test_v3f32( ; CHECK: { -; CHECK-NEXT: .reg .b32 %r<10>; -; CHECK-NEXT: .reg .b64 %rd<2>; +; CHECK-NEXT: .reg .b32 %r<4>; +; CHECK-NEXT: .reg .b64 %rd<5>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: -; CHECK-NEXT: ld.param.v2.b32 {%r1, %r2}, [test_v3f32_param_0]; -; CHECK-NEXT: ld.param.b32 %r3, [test_v3f32_param_0+8]; +; CHECK-NEXT: ld.param.b64 %rd1, [test_v3f32_param_0]; +; CHECK-NEXT: ld.param.b32 %r1, [test_v3f32_param_0+8]; ; CHECK-NEXT: { // callseq 1, 0 ; CHECK-NEXT: .param .align 16 .b8 param0[16]; -; CHECK-NEXT: st.param.v2.b32 [param0], {%r1, %r2}; -; CHECK-NEXT: st.param.b32 [param0+8], %r3; ; CHECK-NEXT: .param .align 16 .b8 retval0[16]; +; CHECK-NEXT: st.param.b32 [param0+8], %r1; +; CHECK-NEXT: st.param.b64 [param0], %rd1; ; CHECK-NEXT: call.uni (retval0), barv3, (param0); -; CHECK-NEXT: ld.param.v2.b32 {%r4, %r5}, [retval0]; -; CHECK-NEXT: ld.param.b32 %r6, [retval0+8]; +; CHECK-NEXT: ld.param.b32 %r2, [retval0+8]; +; CHECK-NEXT: ld.param.b64 %rd2, [retval0]; ; CHECK-NEXT: } // callseq 1 -; CHECK-NEXT: ld.param.b64 %rd1, [test_v3f32_param_1]; -; CHECK-NEXT: st.v2.b32 [%rd1], {%r4, %r5}; -; CHECK-NEXT: st.b32 [%rd1+8], %r6; +; CHECK-NEXT: ld.param.b64 %rd4, [test_v3f32_param_1]; +; CHECK-NEXT: st.b32 [%rd4+8], %r2; +; CHECK-NEXT: st.b64 [%rd4], %rd2; ; CHECK-NEXT: ret; %call = tail call <3 x float> @barv3(<3 x float> %input) ; Make sure we don't load more values than than we need to. @@ -68,16 +68,16 @@ define void @test_a2f32([2 x float] %input, ptr %output) { ; CHECK-NEXT: ld.param.b32 %r2, [test_a2f32_param_0+4]; ; CHECK-NEXT: { // callseq 2, 0 ; CHECK-NEXT: .param .align 4 .b8 param0[8]; -; CHECK-NEXT: st.param.b32 [param0], %r1; -; CHECK-NEXT: st.param.b32 [param0+4], %r2; ; CHECK-NEXT: .param .align 4 .b8 retval0[8]; +; CHECK-NEXT: st.param.b32 [param0+4], %r2; +; CHECK-NEXT: st.param.b32 [param0], %r1; ; CHECK-NEXT: call.uni (retval0), bara, (param0); -; CHECK-NEXT: ld.param.b32 %r3, [retval0]; -; CHECK-NEXT: ld.param.b32 %r4, [retval0+4]; +; CHECK-NEXT: ld.param.b32 %r3, [retval0+4]; +; CHECK-NEXT: ld.param.b32 %r4, [retval0]; ; CHECK-NEXT: } // callseq 2 ; CHECK-NEXT: ld.param.b64 %rd1, [test_a2f32_param_1]; -; CHECK-NEXT: st.b32 [%rd1+4], %r4; -; CHECK-NEXT: st.b32 [%rd1], %r3; +; CHECK-NEXT: st.b32 [%rd1+4], %r3; +; CHECK-NEXT: st.b32 [%rd1], %r4; ; CHECK-NEXT: ret; %call = tail call [2 x float] @bara([2 x float] %input) store [2 x float] %call, ptr %output, align 4 @@ -95,16 +95,16 @@ define void @test_s2f32({float, float} %input, ptr %output) { ; CHECK-NEXT: ld.param.b32 %r2, [test_s2f32_param_0+4]; ; CHECK-NEXT: { // callseq 3, 0 ; CHECK-NEXT: .param .align 4 .b8 param0[8]; -; CHECK-NEXT: st.param.b32 [param0], %r1; -; CHECK-NEXT: st.param.b32 [param0+4], %r2; ; CHECK-NEXT: .param .align 4 .b8 retval0[8]; +; CHECK-NEXT: st.param.b32 [param0+4], %r2; +; CHECK-NEXT: st.param.b32 [param0], %r1; ; CHECK-NEXT: call.uni (retval0), bars, (param0); -; CHECK-NEXT: ld.param.b32 %r3, [retval0]; -; CHECK-NEXT: ld.param.b32 %r4, [retval0+4]; +; CHECK-NEXT: ld.param.b32 %r3, [retval0+4]; +; CHECK-NEXT: ld.param.b32 %r4, [retval0]; ; CHECK-NEXT: } // callseq 3 ; CHECK-NEXT: ld.param.b64 %rd1, [test_s2f32_param_1]; -; CHECK-NEXT: st.b32 [%rd1+4], %r4; -; CHECK-NEXT: st.b32 [%rd1], %r3; +; CHECK-NEXT: st.b32 [%rd1+4], %r3; +; CHECK-NEXT: st.b32 [%rd1], %r4; ; CHECK-NEXT: ret; %call = tail call {float, float} @bars({float, float} %input) store {float, float} %call, ptr %output, align 4 diff --git a/llvm/test/CodeGen/NVPTX/bf16x2-instructions.ll b/llvm/test/CodeGen/NVPTX/bf16x2-instructions.ll index ba5813c..b4641d0 100644 --- a/llvm/test/CodeGen/NVPTX/bf16x2-instructions.ll +++ b/llvm/test/CodeGen/NVPTX/bf16x2-instructions.ll @@ -208,13 +208,13 @@ define <2 x bfloat> @test_call(<2 x bfloat> %a, <2 x bfloat> %b) #0 { ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ld.param.b32 %r1, [test_call_param_0]; -; CHECK-NEXT: ld.param.b32 %r2, [test_call_param_1]; ; CHECK-NEXT: { // callseq 0, 0 ; CHECK-NEXT: .param .align 4 .b8 param0[4]; -; CHECK-NEXT: st.param.b32 [param0], %r1; ; CHECK-NEXT: .param .align 4 .b8 param1[4]; -; CHECK-NEXT: st.param.b32 [param1], %r2; ; CHECK-NEXT: .param .align 4 .b8 retval0[4]; +; CHECK-NEXT: ld.param.b32 %r2, [test_call_param_1]; +; CHECK-NEXT: st.param.b32 [param1], %r2; +; CHECK-NEXT: st.param.b32 [param0], %r1; ; CHECK-NEXT: call.uni (retval0), test_callee, (param0, param1); ; CHECK-NEXT: ld.param.b32 %r3, [retval0]; ; CHECK-NEXT: } // callseq 0 diff --git a/llvm/test/CodeGen/NVPTX/byval-const-global.ll b/llvm/test/CodeGen/NVPTX/byval-const-global.ll index ad9e4b0..b4934e1a 100644 --- a/llvm/test/CodeGen/NVPTX/byval-const-global.ll +++ b/llvm/test/CodeGen/NVPTX/byval-const-global.ll @@ -13,12 +13,12 @@ define void @foo() { ; CHECK-NEXT: .reg .b64 %rd<3>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: -; CHECK-NEXT: ld.global.b64 %rd1, [G]; -; CHECK-NEXT: ld.global.b64 %rd2, [G+8]; ; CHECK-NEXT: { // callseq 0, 0 ; CHECK-NEXT: .param .align 8 .b8 param0[16]; -; CHECK-NEXT: st.param.b64 [param0], %rd1; -; CHECK-NEXT: st.param.b64 [param0+8], %rd2; +; CHECK-NEXT: ld.global.b64 %rd1, [G+8]; +; CHECK-NEXT: st.param.b64 [param0+8], %rd1; +; CHECK-NEXT: ld.global.b64 %rd2, [G]; +; CHECK-NEXT: st.param.b64 [param0], %rd2; ; CHECK-NEXT: call.uni bar, (param0); ; CHECK-NEXT: } // callseq 0 ; CHECK-NEXT: ret; diff --git a/llvm/test/CodeGen/NVPTX/call-with-alloca-buffer.ll b/llvm/test/CodeGen/NVPTX/call-with-alloca-buffer.ll index 0cd7058..0eb7f64 100644 --- a/llvm/test/CodeGen/NVPTX/call-with-alloca-buffer.ll +++ b/llvm/test/CodeGen/NVPTX/call-with-alloca-buffer.ll @@ -44,11 +44,11 @@ entry: %arrayidx7 = getelementptr inbounds [16 x i8], ptr %buf, i64 0, i64 3 store float %3, ptr %arrayidx7, align 4 -; CHECK: .param .b64 param0; -; CHECK-NEXT: st.param.b64 [param0], %rd[[A_REG]] -; CHECK-NEXT: .param .b64 param1; -; CHECK-NEXT: st.param.b64 [param1], %rd[[SP_REG]] -; CHECK-NEXT: call.uni callee, +; CHECK-DAG: .param .b64 param0; +; CHECK-DAG: .param .b64 param1; +; CHECK-DAG: st.param.b64 [param0], %rd[[A_REG]] +; CHECK-DAG: st.param.b64 [param1], %rd[[SP_REG]] +; CHECK: call.uni callee, call void @callee(ptr %a, ptr %buf) #2 ret void diff --git a/llvm/test/CodeGen/NVPTX/call_bitcast_byval.ll b/llvm/test/CodeGen/NVPTX/call_bitcast_byval.ll index f67145d..483d48a 100644 --- a/llvm/test/CodeGen/NVPTX/call_bitcast_byval.ll +++ b/llvm/test/CodeGen/NVPTX/call_bitcast_byval.ll @@ -14,11 +14,11 @@ target triple = "nvptx64-nvidia-cuda" %complex_half = type { half, half } ; CHECK: .param .align 2 .b8 param2[4]; -; CHECK: st.param.b16 [param2], %rs1; -; CHECK: st.param.b16 [param2+2], %rs2; ; CHECK: .param .align 2 .b8 retval0[4]; -; CHECK-NEXT: prototype_0 : .callprototype (.param .align 2 .b8 _[4]) _ (.param .b32 _, .param .b32 _, .param .align 2 .b8 _[4]); -; CHECK-NEXT: call (retval0), +; CHECK-DAG: st.param.b16 [param2], %rs{{[0-9]+}}; +; CHECK-DAG: st.param.b16 [param2+2], %rs{{[0-9]+}}; +; CHECK: prototype_0 : .callprototype (.param .align 2 .b8 _[4]) _ (.param .b32 _, .param .b32 _, .param .align 2 .b8 _[4]); +; CHECK: call (retval0), define weak_odr void @foo() { entry: %call.i.i.i = tail call %"class.complex" @_Z20__spirv_GroupCMulKHRjjN5__spv12complex_halfE(i32 0, i32 0, ptr byval(%"class.complex") null) @@ -36,10 +36,10 @@ define internal void @callee(ptr byval(%"class.complex") %byval_arg) { } define void @boom() { %fp = call ptr @usefp(ptr @callee) - ; CHECK: .param .align 2 .b8 param0[4]; - ; CHECK: st.param.b16 [param0], %rs1; - ; CHECK: st.param.b16 [param0+2], %rs2; - ; CHECK: .callprototype ()_ (.param .align 2 .b8 _[4]); + ; CHECK-DAG: .param .align 2 .b8 param0[4]; + ; CHECK-DAG: st.param.b16 [param0], %rs{{[0-9]+}}; + ; CHECK-DAG: st.param.b16 [param0+2], %rs{{[0-9]+}}; + ; CHECK-DAG: .callprototype ()_ (.param .align 2 .b8 _[4]); call void %fp(ptr byval(%"class.complex") null) ret void } diff --git a/llvm/test/CodeGen/NVPTX/combine-mad.ll b/llvm/test/CodeGen/NVPTX/combine-mad.ll index 2232810..da303b7 100644 --- a/llvm/test/CodeGen/NVPTX/combine-mad.ll +++ b/llvm/test/CodeGen/NVPTX/combine-mad.ll @@ -199,10 +199,10 @@ define i32 @test_mad_multi_use(i32 %a, i32 %b, i32 %c) { ; CHECK-NEXT: add.s32 %r5, %r3, %r4; ; CHECK-NEXT: { // callseq 0, 0 ; CHECK-NEXT: .param .b32 param0; -; CHECK-NEXT: st.param.b32 [param0], %r3; ; CHECK-NEXT: .param .b32 param1; -; CHECK-NEXT: st.param.b32 [param1], %r5; ; CHECK-NEXT: .param .b32 retval0; +; CHECK-NEXT: st.param.b32 [param0], %r3; +; CHECK-NEXT: st.param.b32 [param1], %r5; ; CHECK-NEXT: call.uni (retval0), use, (param0, param1); ; CHECK-NEXT: ld.param.b32 %r6, [retval0]; ; CHECK-NEXT: } // callseq 0 diff --git a/llvm/test/CodeGen/NVPTX/compare-int.ll b/llvm/test/CodeGen/NVPTX/compare-int.ll index b44ae47..9338172d 100644 --- a/llvm/test/CodeGen/NVPTX/compare-int.ll +++ b/llvm/test/CodeGen/NVPTX/compare-int.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 ; RUN: llc < %s -mtriple=nvptx -mcpu=sm_20 | FileCheck %s ; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_20 | FileCheck %s ; RUN: %if ptxas && !ptxas-12.0 %{ llc < %s -mtriple=nvptx -mcpu=sm_20 | %ptxas-verify %} @@ -11,90 +12,180 @@ ;;; i64 define i64 @icmp_eq_i64(i64 %a, i64 %b) { -; CHECK: setp.eq.b64 %p[[P0:[0-9]+]], %rd{{[0-9]+}}, %rd{{[0-9]+}} -; CHECK: selp.b64 %rd{{[0-9]+}}, 1, 0, %p[[P0]] -; CHECK: ret +; CHECK-LABEL: icmp_eq_i64( +; CHECK: { +; CHECK-NEXT: .reg .pred %p<2>; +; CHECK-NEXT: .reg .b64 %rd<4>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.b64 %rd1, [icmp_eq_i64_param_0]; +; CHECK-NEXT: ld.param.b64 %rd2, [icmp_eq_i64_param_1]; +; CHECK-NEXT: setp.eq.b64 %p1, %rd1, %rd2; +; CHECK-NEXT: selp.b64 %rd3, 1, 0, %p1; +; CHECK-NEXT: st.param.b64 [func_retval0], %rd3; +; CHECK-NEXT: ret; %cmp = icmp eq i64 %a, %b %ret = zext i1 %cmp to i64 ret i64 %ret } define i64 @icmp_ne_i64(i64 %a, i64 %b) { -; CHECK: setp.ne.b64 %p[[P0:[0-9]+]], %rd{{[0-9]+}}, %rd{{[0-9]+}} -; CHECK: selp.b64 %rd{{[0-9]+}}, 1, 0, %p[[P0]] -; CHECK: ret +; CHECK-LABEL: icmp_ne_i64( +; CHECK: { +; CHECK-NEXT: .reg .pred %p<2>; +; CHECK-NEXT: .reg .b64 %rd<4>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.b64 %rd1, [icmp_ne_i64_param_0]; +; CHECK-NEXT: ld.param.b64 %rd2, [icmp_ne_i64_param_1]; +; CHECK-NEXT: setp.ne.b64 %p1, %rd1, %rd2; +; CHECK-NEXT: selp.b64 %rd3, 1, 0, %p1; +; CHECK-NEXT: st.param.b64 [func_retval0], %rd3; +; CHECK-NEXT: ret; %cmp = icmp ne i64 %a, %b %ret = zext i1 %cmp to i64 ret i64 %ret } define i64 @icmp_ugt_i64(i64 %a, i64 %b) { -; CHECK: setp.gt.u64 %p[[P0:[0-9]+]], %rd{{[0-9]+}}, %rd{{[0-9]+}} -; CHECK: selp.b64 %rd{{[0-9]+}}, 1, 0, %p[[P0]] -; CHECK: ret +; CHECK-LABEL: icmp_ugt_i64( +; CHECK: { +; CHECK-NEXT: .reg .pred %p<2>; +; CHECK-NEXT: .reg .b64 %rd<4>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.b64 %rd1, [icmp_ugt_i64_param_0]; +; CHECK-NEXT: ld.param.b64 %rd2, [icmp_ugt_i64_param_1]; +; CHECK-NEXT: setp.gt.u64 %p1, %rd1, %rd2; +; CHECK-NEXT: selp.b64 %rd3, 1, 0, %p1; +; CHECK-NEXT: st.param.b64 [func_retval0], %rd3; +; CHECK-NEXT: ret; %cmp = icmp ugt i64 %a, %b %ret = zext i1 %cmp to i64 ret i64 %ret } define i64 @icmp_uge_i64(i64 %a, i64 %b) { -; CHECK: setp.ge.u64 %p[[P0:[0-9]+]], %rd{{[0-9]+}}, %rd{{[0-9]+}} -; CHECK: selp.b64 %rd{{[0-9]+}}, 1, 0, %p[[P0]] -; CHECK: ret +; CHECK-LABEL: icmp_uge_i64( +; CHECK: { +; CHECK-NEXT: .reg .pred %p<2>; +; CHECK-NEXT: .reg .b64 %rd<4>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.b64 %rd1, [icmp_uge_i64_param_0]; +; CHECK-NEXT: ld.param.b64 %rd2, [icmp_uge_i64_param_1]; +; CHECK-NEXT: setp.ge.u64 %p1, %rd1, %rd2; +; CHECK-NEXT: selp.b64 %rd3, 1, 0, %p1; +; CHECK-NEXT: st.param.b64 [func_retval0], %rd3; +; CHECK-NEXT: ret; %cmp = icmp uge i64 %a, %b %ret = zext i1 %cmp to i64 ret i64 %ret } define i64 @icmp_ult_i64(i64 %a, i64 %b) { -; CHECK: setp.lt.u64 %p[[P0:[0-9]+]], %rd{{[0-9]+}}, %rd{{[0-9]+}} -; CHECK: selp.b64 %rd{{[0-9]+}}, 1, 0, %p[[P0]] -; CHECK: ret +; CHECK-LABEL: icmp_ult_i64( +; CHECK: { +; CHECK-NEXT: .reg .pred %p<2>; +; CHECK-NEXT: .reg .b64 %rd<4>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.b64 %rd1, [icmp_ult_i64_param_0]; +; CHECK-NEXT: ld.param.b64 %rd2, [icmp_ult_i64_param_1]; +; CHECK-NEXT: setp.lt.u64 %p1, %rd1, %rd2; +; CHECK-NEXT: selp.b64 %rd3, 1, 0, %p1; +; CHECK-NEXT: st.param.b64 [func_retval0], %rd3; +; CHECK-NEXT: ret; %cmp = icmp ult i64 %a, %b %ret = zext i1 %cmp to i64 ret i64 %ret } define i64 @icmp_ule_i64(i64 %a, i64 %b) { -; CHECK: setp.le.u64 %p[[P0:[0-9]+]], %rd{{[0-9]+}}, %rd{{[0-9]+}} -; CHECK: selp.b64 %rd{{[0-9]+}}, 1, 0, %p[[P0]] -; CHECK: ret +; CHECK-LABEL: icmp_ule_i64( +; CHECK: { +; CHECK-NEXT: .reg .pred %p<2>; +; CHECK-NEXT: .reg .b64 %rd<4>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.b64 %rd1, [icmp_ule_i64_param_0]; +; CHECK-NEXT: ld.param.b64 %rd2, [icmp_ule_i64_param_1]; +; CHECK-NEXT: setp.le.u64 %p1, %rd1, %rd2; +; CHECK-NEXT: selp.b64 %rd3, 1, 0, %p1; +; CHECK-NEXT: st.param.b64 [func_retval0], %rd3; +; CHECK-NEXT: ret; %cmp = icmp ule i64 %a, %b %ret = zext i1 %cmp to i64 ret i64 %ret } define i64 @icmp_sgt_i64(i64 %a, i64 %b) { -; CHECK: setp.gt.s64 %p[[P0:[0-9]+]], %rd{{[0-9]+}}, %rd{{[0-9]+}} -; CHECK: selp.b64 %rd{{[0-9]+}}, 1, 0, %p[[P0]] -; CHECK: ret +; CHECK-LABEL: icmp_sgt_i64( +; CHECK: { +; CHECK-NEXT: .reg .pred %p<2>; +; CHECK-NEXT: .reg .b64 %rd<4>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.b64 %rd1, [icmp_sgt_i64_param_0]; +; CHECK-NEXT: ld.param.b64 %rd2, [icmp_sgt_i64_param_1]; +; CHECK-NEXT: setp.gt.s64 %p1, %rd1, %rd2; +; CHECK-NEXT: selp.b64 %rd3, 1, 0, %p1; +; CHECK-NEXT: st.param.b64 [func_retval0], %rd3; +; CHECK-NEXT: ret; %cmp = icmp sgt i64 %a, %b %ret = zext i1 %cmp to i64 ret i64 %ret } define i64 @icmp_sge_i64(i64 %a, i64 %b) { -; CHECK: setp.ge.s64 %p[[P0:[0-9]+]], %rd{{[0-9]+}}, %rd{{[0-9]+}} -; CHECK: selp.b64 %rd{{[0-9]+}}, 1, 0, %p[[P0]] -; CHECK: ret +; CHECK-LABEL: icmp_sge_i64( +; CHECK: { +; CHECK-NEXT: .reg .pred %p<2>; +; CHECK-NEXT: .reg .b64 %rd<4>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.b64 %rd1, [icmp_sge_i64_param_0]; +; CHECK-NEXT: ld.param.b64 %rd2, [icmp_sge_i64_param_1]; +; CHECK-NEXT: setp.ge.s64 %p1, %rd1, %rd2; +; CHECK-NEXT: selp.b64 %rd3, 1, 0, %p1; +; CHECK-NEXT: st.param.b64 [func_retval0], %rd3; +; CHECK-NEXT: ret; %cmp = icmp sge i64 %a, %b %ret = zext i1 %cmp to i64 ret i64 %ret } define i64 @icmp_slt_i64(i64 %a, i64 %b) { -; CHECK: setp.lt.s64 %p[[P0:[0-9]+]], %rd{{[0-9]+}}, %rd{{[0-9]+}} -; CHECK: selp.b64 %rd{{[0-9]+}}, 1, 0, %p[[P0]] -; CHECK: ret +; CHECK-LABEL: icmp_slt_i64( +; CHECK: { +; CHECK-NEXT: .reg .pred %p<2>; +; CHECK-NEXT: .reg .b64 %rd<4>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.b64 %rd1, [icmp_slt_i64_param_0]; +; CHECK-NEXT: ld.param.b64 %rd2, [icmp_slt_i64_param_1]; +; CHECK-NEXT: setp.lt.s64 %p1, %rd1, %rd2; +; CHECK-NEXT: selp.b64 %rd3, 1, 0, %p1; +; CHECK-NEXT: st.param.b64 [func_retval0], %rd3; +; CHECK-NEXT: ret; %cmp = icmp slt i64 %a, %b %ret = zext i1 %cmp to i64 ret i64 %ret } define i64 @icmp_sle_i64(i64 %a, i64 %b) { -; CHECK: setp.le.s64 %p[[P0:[0-9]+]], %rd{{[0-9]+}}, %rd{{[0-9]+}} -; CHECK: selp.b64 %rd{{[0-9]+}}, 1, 0, %p[[P0]] -; CHECK: ret +; CHECK-LABEL: icmp_sle_i64( +; CHECK: { +; CHECK-NEXT: .reg .pred %p<2>; +; CHECK-NEXT: .reg .b64 %rd<4>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.b64 %rd1, [icmp_sle_i64_param_0]; +; CHECK-NEXT: ld.param.b64 %rd2, [icmp_sle_i64_param_1]; +; CHECK-NEXT: setp.le.s64 %p1, %rd1, %rd2; +; CHECK-NEXT: selp.b64 %rd3, 1, 0, %p1; +; CHECK-NEXT: st.param.b64 [func_retval0], %rd3; +; CHECK-NEXT: ret; %cmp = icmp sle i64 %a, %b %ret = zext i1 %cmp to i64 ret i64 %ret @@ -103,90 +194,180 @@ define i64 @icmp_sle_i64(i64 %a, i64 %b) { ;;; i32 define i32 @icmp_eq_i32(i32 %a, i32 %b) { -; CHECK: setp.eq.b32 %p[[P0:[0-9]+]], %r{{[0-9]+}}, %r{{[0-9]+}} -; CHECK: selp.b32 %r{{[0-9]+}}, 1, 0, %p[[P0]] -; CHECK: ret +; CHECK-LABEL: icmp_eq_i32( +; CHECK: { +; CHECK-NEXT: .reg .pred %p<2>; +; CHECK-NEXT: .reg .b32 %r<4>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.b32 %r1, [icmp_eq_i32_param_0]; +; CHECK-NEXT: ld.param.b32 %r2, [icmp_eq_i32_param_1]; +; CHECK-NEXT: setp.eq.b32 %p1, %r1, %r2; +; CHECK-NEXT: selp.b32 %r3, 1, 0, %p1; +; CHECK-NEXT: st.param.b32 [func_retval0], %r3; +; CHECK-NEXT: ret; %cmp = icmp eq i32 %a, %b %ret = zext i1 %cmp to i32 ret i32 %ret } define i32 @icmp_ne_i32(i32 %a, i32 %b) { -; CHECK: setp.ne.b32 %p[[P0:[0-9]+]], %r{{[0-9]+}}, %r{{[0-9]+}} -; CHECK: selp.b32 %r{{[0-9]+}}, 1, 0, %p[[P0]] -; CHECK: ret +; CHECK-LABEL: icmp_ne_i32( +; CHECK: { +; CHECK-NEXT: .reg .pred %p<2>; +; CHECK-NEXT: .reg .b32 %r<4>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.b32 %r1, [icmp_ne_i32_param_0]; +; CHECK-NEXT: ld.param.b32 %r2, [icmp_ne_i32_param_1]; +; CHECK-NEXT: setp.ne.b32 %p1, %r1, %r2; +; CHECK-NEXT: selp.b32 %r3, 1, 0, %p1; +; CHECK-NEXT: st.param.b32 [func_retval0], %r3; +; CHECK-NEXT: ret; %cmp = icmp ne i32 %a, %b %ret = zext i1 %cmp to i32 ret i32 %ret } define i32 @icmp_ugt_i32(i32 %a, i32 %b) { -; CHECK: setp.gt.u32 %p[[P0:[0-9]+]], %r{{[0-9]+}}, %r{{[0-9]+}} -; CHECK: selp.b32 %r{{[0-9]+}}, 1, 0, %p[[P0]] -; CHECK: ret +; CHECK-LABEL: icmp_ugt_i32( +; CHECK: { +; CHECK-NEXT: .reg .pred %p<2>; +; CHECK-NEXT: .reg .b32 %r<4>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.b32 %r1, [icmp_ugt_i32_param_0]; +; CHECK-NEXT: ld.param.b32 %r2, [icmp_ugt_i32_param_1]; +; CHECK-NEXT: setp.gt.u32 %p1, %r1, %r2; +; CHECK-NEXT: selp.b32 %r3, 1, 0, %p1; +; CHECK-NEXT: st.param.b32 [func_retval0], %r3; +; CHECK-NEXT: ret; %cmp = icmp ugt i32 %a, %b %ret = zext i1 %cmp to i32 ret i32 %ret } define i32 @icmp_uge_i32(i32 %a, i32 %b) { -; CHECK: setp.ge.u32 %p[[P0:[0-9]+]], %r{{[0-9]+}}, %r{{[0-9]+}} -; CHECK: selp.b32 %r{{[0-9]+}}, 1, 0, %p[[P0]] -; CHECK: ret +; CHECK-LABEL: icmp_uge_i32( +; CHECK: { +; CHECK-NEXT: .reg .pred %p<2>; +; CHECK-NEXT: .reg .b32 %r<4>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.b32 %r1, [icmp_uge_i32_param_0]; +; CHECK-NEXT: ld.param.b32 %r2, [icmp_uge_i32_param_1]; +; CHECK-NEXT: setp.ge.u32 %p1, %r1, %r2; +; CHECK-NEXT: selp.b32 %r3, 1, 0, %p1; +; CHECK-NEXT: st.param.b32 [func_retval0], %r3; +; CHECK-NEXT: ret; %cmp = icmp uge i32 %a, %b %ret = zext i1 %cmp to i32 ret i32 %ret } define i32 @icmp_ult_i32(i32 %a, i32 %b) { -; CHECK: setp.lt.u32 %p[[P0:[0-9]+]], %r{{[0-9]+}}, %r{{[0-9]+}} -; CHECK: selp.b32 %r{{[0-9]+}}, 1, 0, %p[[P0]] -; CHECK: ret +; CHECK-LABEL: icmp_ult_i32( +; CHECK: { +; CHECK-NEXT: .reg .pred %p<2>; +; CHECK-NEXT: .reg .b32 %r<4>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.b32 %r1, [icmp_ult_i32_param_0]; +; CHECK-NEXT: ld.param.b32 %r2, [icmp_ult_i32_param_1]; +; CHECK-NEXT: setp.lt.u32 %p1, %r1, %r2; +; CHECK-NEXT: selp.b32 %r3, 1, 0, %p1; +; CHECK-NEXT: st.param.b32 [func_retval0], %r3; +; CHECK-NEXT: ret; %cmp = icmp ult i32 %a, %b %ret = zext i1 %cmp to i32 ret i32 %ret } define i32 @icmp_ule_i32(i32 %a, i32 %b) { -; CHECK: setp.le.u32 %p[[P0:[0-9]+]], %r{{[0-9]+}}, %r{{[0-9]+}} -; CHECK: selp.b32 %r{{[0-9]+}}, 1, 0, %p[[P0]] -; CHECK: ret +; CHECK-LABEL: icmp_ule_i32( +; CHECK: { +; CHECK-NEXT: .reg .pred %p<2>; +; CHECK-NEXT: .reg .b32 %r<4>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.b32 %r1, [icmp_ule_i32_param_0]; +; CHECK-NEXT: ld.param.b32 %r2, [icmp_ule_i32_param_1]; +; CHECK-NEXT: setp.le.u32 %p1, %r1, %r2; +; CHECK-NEXT: selp.b32 %r3, 1, 0, %p1; +; CHECK-NEXT: st.param.b32 [func_retval0], %r3; +; CHECK-NEXT: ret; %cmp = icmp ule i32 %a, %b %ret = zext i1 %cmp to i32 ret i32 %ret } define i32 @icmp_sgt_i32(i32 %a, i32 %b) { -; CHECK: setp.gt.s32 %p[[P0:[0-9]+]], %r{{[0-9]+}}, %r{{[0-9]+}} -; CHECK: selp.b32 %r{{[0-9]+}}, 1, 0, %p[[P0]] -; CHECK: ret +; CHECK-LABEL: icmp_sgt_i32( +; CHECK: { +; CHECK-NEXT: .reg .pred %p<2>; +; CHECK-NEXT: .reg .b32 %r<4>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.b32 %r1, [icmp_sgt_i32_param_0]; +; CHECK-NEXT: ld.param.b32 %r2, [icmp_sgt_i32_param_1]; +; CHECK-NEXT: setp.gt.s32 %p1, %r1, %r2; +; CHECK-NEXT: selp.b32 %r3, 1, 0, %p1; +; CHECK-NEXT: st.param.b32 [func_retval0], %r3; +; CHECK-NEXT: ret; %cmp = icmp sgt i32 %a, %b %ret = zext i1 %cmp to i32 ret i32 %ret } define i32 @icmp_sge_i32(i32 %a, i32 %b) { -; CHECK: setp.ge.s32 %p[[P0:[0-9]+]], %r{{[0-9]+}}, %r{{[0-9]+}} -; CHECK: selp.b32 %r{{[0-9]+}}, 1, 0, %p[[P0]] -; CHECK: ret +; CHECK-LABEL: icmp_sge_i32( +; CHECK: { +; CHECK-NEXT: .reg .pred %p<2>; +; CHECK-NEXT: .reg .b32 %r<4>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.b32 %r1, [icmp_sge_i32_param_0]; +; CHECK-NEXT: ld.param.b32 %r2, [icmp_sge_i32_param_1]; +; CHECK-NEXT: setp.ge.s32 %p1, %r1, %r2; +; CHECK-NEXT: selp.b32 %r3, 1, 0, %p1; +; CHECK-NEXT: st.param.b32 [func_retval0], %r3; +; CHECK-NEXT: ret; %cmp = icmp sge i32 %a, %b %ret = zext i1 %cmp to i32 ret i32 %ret } define i32 @icmp_slt_i32(i32 %a, i32 %b) { -; CHECK: setp.lt.s32 %p[[P0:[0-9]+]], %r{{[0-9]+}}, %r{{[0-9]+}} -; CHECK: selp.b32 %r{{[0-9]+}}, 1, 0, %p[[P0]] -; CHECK: ret +; CHECK-LABEL: icmp_slt_i32( +; CHECK: { +; CHECK-NEXT: .reg .pred %p<2>; +; CHECK-NEXT: .reg .b32 %r<4>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.b32 %r1, [icmp_slt_i32_param_0]; +; CHECK-NEXT: ld.param.b32 %r2, [icmp_slt_i32_param_1]; +; CHECK-NEXT: setp.lt.s32 %p1, %r1, %r2; +; CHECK-NEXT: selp.b32 %r3, 1, 0, %p1; +; CHECK-NEXT: st.param.b32 [func_retval0], %r3; +; CHECK-NEXT: ret; %cmp = icmp slt i32 %a, %b %ret = zext i1 %cmp to i32 ret i32 %ret } define i32 @icmp_sle_i32(i32 %a, i32 %b) { -; CHECK: setp.le.s32 %p[[P0:[0-9]+]], %r{{[0-9]+}}, %r{{[0-9]+}} -; CHECK: selp.b32 %r{{[0-9]+}}, 1, 0, %p[[P0]] -; CHECK: ret +; CHECK-LABEL: icmp_sle_i32( +; CHECK: { +; CHECK-NEXT: .reg .pred %p<2>; +; CHECK-NEXT: .reg .b32 %r<4>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.b32 %r1, [icmp_sle_i32_param_0]; +; CHECK-NEXT: ld.param.b32 %r2, [icmp_sle_i32_param_1]; +; CHECK-NEXT: setp.le.s32 %p1, %r1, %r2; +; CHECK-NEXT: selp.b32 %r3, 1, 0, %p1; +; CHECK-NEXT: st.param.b32 [func_retval0], %r3; +; CHECK-NEXT: ret; %cmp = icmp sle i32 %a, %b %ret = zext i1 %cmp to i32 ret i32 %ret @@ -196,90 +377,190 @@ define i32 @icmp_sle_i32(i32 %a, i32 %b) { ;;; i16 define i16 @icmp_eq_i16(i16 %a, i16 %b) { -; CHECK: setp.eq.b16 %p[[P0:[0-9]+]], %rs{{[0-9]+}}, %rs{{[0-9]+}} -; CHECK: selp.b32 %r{{[0-9]+}}, 1, 0, %p[[P0]] -; CHECK: ret +; CHECK-LABEL: icmp_eq_i16( +; CHECK: { +; CHECK-NEXT: .reg .pred %p<2>; +; CHECK-NEXT: .reg .b16 %rs<3>; +; CHECK-NEXT: .reg .b32 %r<2>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.b16 %rs1, [icmp_eq_i16_param_0]; +; CHECK-NEXT: ld.param.b16 %rs2, [icmp_eq_i16_param_1]; +; CHECK-NEXT: setp.eq.b16 %p1, %rs1, %rs2; +; CHECK-NEXT: selp.b32 %r1, 1, 0, %p1; +; CHECK-NEXT: st.param.b32 [func_retval0], %r1; +; CHECK-NEXT: ret; %cmp = icmp eq i16 %a, %b %ret = zext i1 %cmp to i16 ret i16 %ret } define i16 @icmp_ne_i16(i16 %a, i16 %b) { -; CHECK: setp.ne.b16 %p[[P0:[0-9]+]], %rs{{[0-9]+}}, %rs{{[0-9]+}} -; CHECK: selp.b32 %r{{[0-9]+}}, 1, 0, %p[[P0]] -; CHECK: ret +; CHECK-LABEL: icmp_ne_i16( +; CHECK: { +; CHECK-NEXT: .reg .pred %p<2>; +; CHECK-NEXT: .reg .b16 %rs<3>; +; CHECK-NEXT: .reg .b32 %r<2>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.b16 %rs1, [icmp_ne_i16_param_0]; +; CHECK-NEXT: ld.param.b16 %rs2, [icmp_ne_i16_param_1]; +; CHECK-NEXT: setp.ne.b16 %p1, %rs1, %rs2; +; CHECK-NEXT: selp.b32 %r1, 1, 0, %p1; +; CHECK-NEXT: st.param.b32 [func_retval0], %r1; +; CHECK-NEXT: ret; %cmp = icmp ne i16 %a, %b %ret = zext i1 %cmp to i16 ret i16 %ret } define i16 @icmp_ugt_i16(i16 %a, i16 %b) { -; CHECK: setp.gt.u16 %p[[P0:[0-9]+]], %rs{{[0-9]+}}, %rs{{[0-9]+}} -; CHECK: selp.b32 %r{{[0-9]+}}, 1, 0, %p[[P0]] -; CHECK: ret +; CHECK-LABEL: icmp_ugt_i16( +; CHECK: { +; CHECK-NEXT: .reg .pred %p<2>; +; CHECK-NEXT: .reg .b16 %rs<3>; +; CHECK-NEXT: .reg .b32 %r<2>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.b16 %rs1, [icmp_ugt_i16_param_0]; +; CHECK-NEXT: ld.param.b16 %rs2, [icmp_ugt_i16_param_1]; +; CHECK-NEXT: setp.gt.u16 %p1, %rs1, %rs2; +; CHECK-NEXT: selp.b32 %r1, 1, 0, %p1; +; CHECK-NEXT: st.param.b32 [func_retval0], %r1; +; CHECK-NEXT: ret; %cmp = icmp ugt i16 %a, %b %ret = zext i1 %cmp to i16 ret i16 %ret } define i16 @icmp_uge_i16(i16 %a, i16 %b) { -; CHECK: setp.ge.u16 %p[[P0:[0-9]+]], %rs{{[0-9]+}}, %rs{{[0-9]+}} -; CHECK: selp.b32 %r{{[0-9]+}}, 1, 0, %p[[P0]] -; CHECK: ret +; CHECK-LABEL: icmp_uge_i16( +; CHECK: { +; CHECK-NEXT: .reg .pred %p<2>; +; CHECK-NEXT: .reg .b16 %rs<3>; +; CHECK-NEXT: .reg .b32 %r<2>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.b16 %rs1, [icmp_uge_i16_param_0]; +; CHECK-NEXT: ld.param.b16 %rs2, [icmp_uge_i16_param_1]; +; CHECK-NEXT: setp.ge.u16 %p1, %rs1, %rs2; +; CHECK-NEXT: selp.b32 %r1, 1, 0, %p1; +; CHECK-NEXT: st.param.b32 [func_retval0], %r1; +; CHECK-NEXT: ret; %cmp = icmp uge i16 %a, %b %ret = zext i1 %cmp to i16 ret i16 %ret } define i16 @icmp_ult_i16(i16 %a, i16 %b) { -; CHECK: setp.lt.u16 %p[[P0:[0-9]+]], %rs{{[0-9]+}}, %rs{{[0-9]+}} -; CHECK: selp.b32 %r{{[0-9]+}}, 1, 0, %p[[P0]] -; CHECK: ret +; CHECK-LABEL: icmp_ult_i16( +; CHECK: { +; CHECK-NEXT: .reg .pred %p<2>; +; CHECK-NEXT: .reg .b16 %rs<3>; +; CHECK-NEXT: .reg .b32 %r<2>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.b16 %rs1, [icmp_ult_i16_param_0]; +; CHECK-NEXT: ld.param.b16 %rs2, [icmp_ult_i16_param_1]; +; CHECK-NEXT: setp.lt.u16 %p1, %rs1, %rs2; +; CHECK-NEXT: selp.b32 %r1, 1, 0, %p1; +; CHECK-NEXT: st.param.b32 [func_retval0], %r1; +; CHECK-NEXT: ret; %cmp = icmp ult i16 %a, %b %ret = zext i1 %cmp to i16 ret i16 %ret } define i16 @icmp_ule_i16(i16 %a, i16 %b) { -; CHECK: setp.le.u16 %p[[P0:[0-9]+]], %rs{{[0-9]+}}, %rs{{[0-9]+}} -; CHECK: selp.b32 %r{{[0-9]+}}, 1, 0, %p[[P0]] -; CHECK: ret +; CHECK-LABEL: icmp_ule_i16( +; CHECK: { +; CHECK-NEXT: .reg .pred %p<2>; +; CHECK-NEXT: .reg .b16 %rs<3>; +; CHECK-NEXT: .reg .b32 %r<2>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.b16 %rs1, [icmp_ule_i16_param_0]; +; CHECK-NEXT: ld.param.b16 %rs2, [icmp_ule_i16_param_1]; +; CHECK-NEXT: setp.le.u16 %p1, %rs1, %rs2; +; CHECK-NEXT: selp.b32 %r1, 1, 0, %p1; +; CHECK-NEXT: st.param.b32 [func_retval0], %r1; +; CHECK-NEXT: ret; %cmp = icmp ule i16 %a, %b %ret = zext i1 %cmp to i16 ret i16 %ret } define i16 @icmp_sgt_i16(i16 %a, i16 %b) { -; CHECK: setp.gt.s16 %p[[P0:[0-9]+]], %rs{{[0-9]+}}, %rs{{[0-9]+}} -; CHECK: selp.b32 %r{{[0-9]+}}, 1, 0, %p[[P0]] -; CHECK: ret +; CHECK-LABEL: icmp_sgt_i16( +; CHECK: { +; CHECK-NEXT: .reg .pred %p<2>; +; CHECK-NEXT: .reg .b16 %rs<3>; +; CHECK-NEXT: .reg .b32 %r<2>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.b16 %rs1, [icmp_sgt_i16_param_0]; +; CHECK-NEXT: ld.param.b16 %rs2, [icmp_sgt_i16_param_1]; +; CHECK-NEXT: setp.gt.s16 %p1, %rs1, %rs2; +; CHECK-NEXT: selp.b32 %r1, 1, 0, %p1; +; CHECK-NEXT: st.param.b32 [func_retval0], %r1; +; CHECK-NEXT: ret; %cmp = icmp sgt i16 %a, %b %ret = zext i1 %cmp to i16 ret i16 %ret } define i16 @icmp_sge_i16(i16 %a, i16 %b) { -; CHECK: setp.ge.s16 %p[[P0:[0-9]+]], %rs{{[0-9]+}}, %rs{{[0-9]+}} -; CHECK: selp.b32 %r{{[0-9]+}}, 1, 0, %p[[P0]] -; CHECK: ret +; CHECK-LABEL: icmp_sge_i16( +; CHECK: { +; CHECK-NEXT: .reg .pred %p<2>; +; CHECK-NEXT: .reg .b16 %rs<3>; +; CHECK-NEXT: .reg .b32 %r<2>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.b16 %rs1, [icmp_sge_i16_param_0]; +; CHECK-NEXT: ld.param.b16 %rs2, [icmp_sge_i16_param_1]; +; CHECK-NEXT: setp.ge.s16 %p1, %rs1, %rs2; +; CHECK-NEXT: selp.b32 %r1, 1, 0, %p1; +; CHECK-NEXT: st.param.b32 [func_retval0], %r1; +; CHECK-NEXT: ret; %cmp = icmp sge i16 %a, %b %ret = zext i1 %cmp to i16 ret i16 %ret } define i16 @icmp_slt_i16(i16 %a, i16 %b) { -; CHECK: setp.lt.s16 %p[[P0:[0-9]+]], %rs{{[0-9]+}}, %rs{{[0-9]+}} -; CHECK: selp.b32 %r{{[0-9]+}}, 1, 0, %p[[P0]] -; CHECK: ret +; CHECK-LABEL: icmp_slt_i16( +; CHECK: { +; CHECK-NEXT: .reg .pred %p<2>; +; CHECK-NEXT: .reg .b16 %rs<3>; +; CHECK-NEXT: .reg .b32 %r<2>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.b16 %rs1, [icmp_slt_i16_param_0]; +; CHECK-NEXT: ld.param.b16 %rs2, [icmp_slt_i16_param_1]; +; CHECK-NEXT: setp.lt.s16 %p1, %rs1, %rs2; +; CHECK-NEXT: selp.b32 %r1, 1, 0, %p1; +; CHECK-NEXT: st.param.b32 [func_retval0], %r1; +; CHECK-NEXT: ret; %cmp = icmp slt i16 %a, %b %ret = zext i1 %cmp to i16 ret i16 %ret } define i16 @icmp_sle_i16(i16 %a, i16 %b) { -; CHECK: setp.le.s16 %p[[P0:[0-9]+]], %rs{{[0-9]+}}, %rs{{[0-9]+}} -; CHECK: selp.b32 %r{{[0-9]+}}, 1, 0, %p[[P0]] -; CHECK: ret +; CHECK-LABEL: icmp_sle_i16( +; CHECK: { +; CHECK-NEXT: .reg .pred %p<2>; +; CHECK-NEXT: .reg .b16 %rs<3>; +; CHECK-NEXT: .reg .b32 %r<2>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.b16 %rs1, [icmp_sle_i16_param_0]; +; CHECK-NEXT: ld.param.b16 %rs2, [icmp_sle_i16_param_1]; +; CHECK-NEXT: setp.le.s16 %p1, %rs1, %rs2; +; CHECK-NEXT: selp.b32 %r1, 1, 0, %p1; +; CHECK-NEXT: st.param.b32 [func_retval0], %r1; +; CHECK-NEXT: ret; %cmp = icmp sle i16 %a, %b %ret = zext i1 %cmp to i16 ret i16 %ret @@ -290,9 +571,19 @@ define i16 @icmp_sle_i16(i16 %a, i16 %b) { define i8 @icmp_eq_i8(i8 %a, i8 %b) { ; Comparison happens in 16-bit -; CHECK: setp.eq.b16 %p[[P0:[0-9]+]], %rs{{[0-9]+}}, %rs{{[0-9]+}} -; CHECK: selp.b32 %r{{[0-9]+}}, 1, 0, %p[[P0]] -; CHECK: ret +; CHECK-LABEL: icmp_eq_i8( +; CHECK: { +; CHECK-NEXT: .reg .pred %p<2>; +; CHECK-NEXT: .reg .b16 %rs<3>; +; CHECK-NEXT: .reg .b32 %r<2>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.b8 %rs1, [icmp_eq_i8_param_0]; +; CHECK-NEXT: ld.param.b8 %rs2, [icmp_eq_i8_param_1]; +; CHECK-NEXT: setp.eq.b16 %p1, %rs1, %rs2; +; CHECK-NEXT: selp.b32 %r1, 1, 0, %p1; +; CHECK-NEXT: st.param.b32 [func_retval0], %r1; +; CHECK-NEXT: ret; %cmp = icmp eq i8 %a, %b %ret = zext i1 %cmp to i8 ret i8 %ret @@ -300,9 +591,19 @@ define i8 @icmp_eq_i8(i8 %a, i8 %b) { define i8 @icmp_ne_i8(i8 %a, i8 %b) { ; Comparison happens in 16-bit -; CHECK: setp.ne.b16 %p[[P0:[0-9]+]], %rs{{[0-9]+}}, %rs{{[0-9]+}} -; CHECK: selp.b32 %r{{[0-9]+}}, 1, 0, %p[[P0]] -; CHECK: ret +; CHECK-LABEL: icmp_ne_i8( +; CHECK: { +; CHECK-NEXT: .reg .pred %p<2>; +; CHECK-NEXT: .reg .b16 %rs<3>; +; CHECK-NEXT: .reg .b32 %r<2>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.b8 %rs1, [icmp_ne_i8_param_0]; +; CHECK-NEXT: ld.param.b8 %rs2, [icmp_ne_i8_param_1]; +; CHECK-NEXT: setp.ne.b16 %p1, %rs1, %rs2; +; CHECK-NEXT: selp.b32 %r1, 1, 0, %p1; +; CHECK-NEXT: st.param.b32 [func_retval0], %r1; +; CHECK-NEXT: ret; %cmp = icmp ne i8 %a, %b %ret = zext i1 %cmp to i8 ret i8 %ret @@ -310,9 +611,19 @@ define i8 @icmp_ne_i8(i8 %a, i8 %b) { define i8 @icmp_ugt_i8(i8 %a, i8 %b) { ; Comparison happens in 16-bit -; CHECK: setp.gt.u16 %p[[P0:[0-9]+]], %rs{{[0-9]+}}, %rs{{[0-9]+}} -; CHECK: selp.b32 %r{{[0-9]+}}, 1, 0, %p[[P0]] -; CHECK: ret +; CHECK-LABEL: icmp_ugt_i8( +; CHECK: { +; CHECK-NEXT: .reg .pred %p<2>; +; CHECK-NEXT: .reg .b16 %rs<3>; +; CHECK-NEXT: .reg .b32 %r<2>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.b8 %rs1, [icmp_ugt_i8_param_0]; +; CHECK-NEXT: ld.param.b8 %rs2, [icmp_ugt_i8_param_1]; +; CHECK-NEXT: setp.gt.u16 %p1, %rs1, %rs2; +; CHECK-NEXT: selp.b32 %r1, 1, 0, %p1; +; CHECK-NEXT: st.param.b32 [func_retval0], %r1; +; CHECK-NEXT: ret; %cmp = icmp ugt i8 %a, %b %ret = zext i1 %cmp to i8 ret i8 %ret @@ -320,9 +631,19 @@ define i8 @icmp_ugt_i8(i8 %a, i8 %b) { define i8 @icmp_uge_i8(i8 %a, i8 %b) { ; Comparison happens in 16-bit -; CHECK: setp.ge.u16 %p[[P0:[0-9]+]], %rs{{[0-9]+}}, %rs{{[0-9]+}} -; CHECK: selp.b32 %r{{[0-9]+}}, 1, 0, %p[[P0]] -; CHECK: ret +; CHECK-LABEL: icmp_uge_i8( +; CHECK: { +; CHECK-NEXT: .reg .pred %p<2>; +; CHECK-NEXT: .reg .b16 %rs<3>; +; CHECK-NEXT: .reg .b32 %r<2>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.b8 %rs1, [icmp_uge_i8_param_0]; +; CHECK-NEXT: ld.param.b8 %rs2, [icmp_uge_i8_param_1]; +; CHECK-NEXT: setp.ge.u16 %p1, %rs1, %rs2; +; CHECK-NEXT: selp.b32 %r1, 1, 0, %p1; +; CHECK-NEXT: st.param.b32 [func_retval0], %r1; +; CHECK-NEXT: ret; %cmp = icmp uge i8 %a, %b %ret = zext i1 %cmp to i8 ret i8 %ret @@ -330,9 +651,19 @@ define i8 @icmp_uge_i8(i8 %a, i8 %b) { define i8 @icmp_ult_i8(i8 %a, i8 %b) { ; Comparison happens in 16-bit -; CHECK: setp.lt.u16 %p[[P0:[0-9]+]], %rs{{[0-9]+}}, %rs{{[0-9]+}} -; CHECK: selp.b32 %r{{[0-9]+}}, 1, 0, %p[[P0]] -; CHECK: ret +; CHECK-LABEL: icmp_ult_i8( +; CHECK: { +; CHECK-NEXT: .reg .pred %p<2>; +; CHECK-NEXT: .reg .b16 %rs<3>; +; CHECK-NEXT: .reg .b32 %r<2>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.b8 %rs1, [icmp_ult_i8_param_0]; +; CHECK-NEXT: ld.param.b8 %rs2, [icmp_ult_i8_param_1]; +; CHECK-NEXT: setp.lt.u16 %p1, %rs1, %rs2; +; CHECK-NEXT: selp.b32 %r1, 1, 0, %p1; +; CHECK-NEXT: st.param.b32 [func_retval0], %r1; +; CHECK-NEXT: ret; %cmp = icmp ult i8 %a, %b %ret = zext i1 %cmp to i8 ret i8 %ret @@ -340,9 +671,19 @@ define i8 @icmp_ult_i8(i8 %a, i8 %b) { define i8 @icmp_ule_i8(i8 %a, i8 %b) { ; Comparison happens in 16-bit -; CHECK: setp.le.u16 %p[[P0:[0-9]+]], %rs{{[0-9]+}}, %rs{{[0-9]+}} -; CHECK: selp.b32 %r{{[0-9]+}}, 1, 0, %p[[P0]] -; CHECK: ret +; CHECK-LABEL: icmp_ule_i8( +; CHECK: { +; CHECK-NEXT: .reg .pred %p<2>; +; CHECK-NEXT: .reg .b16 %rs<3>; +; CHECK-NEXT: .reg .b32 %r<2>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.b8 %rs1, [icmp_ule_i8_param_0]; +; CHECK-NEXT: ld.param.b8 %rs2, [icmp_ule_i8_param_1]; +; CHECK-NEXT: setp.le.u16 %p1, %rs1, %rs2; +; CHECK-NEXT: selp.b32 %r1, 1, 0, %p1; +; CHECK-NEXT: st.param.b32 [func_retval0], %r1; +; CHECK-NEXT: ret; %cmp = icmp ule i8 %a, %b %ret = zext i1 %cmp to i8 ret i8 %ret @@ -350,9 +691,19 @@ define i8 @icmp_ule_i8(i8 %a, i8 %b) { define i8 @icmp_sgt_i8(i8 %a, i8 %b) { ; Comparison happens in 16-bit -; CHECK: setp.gt.s16 %p[[P0:[0-9]+]], %rs{{[0-9]+}}, %rs{{[0-9]+}} -; CHECK: selp.b32 %r{{[0-9]+}}, 1, 0, %p[[P0]] -; CHECK: ret +; CHECK-LABEL: icmp_sgt_i8( +; CHECK: { +; CHECK-NEXT: .reg .pred %p<2>; +; CHECK-NEXT: .reg .b16 %rs<3>; +; CHECK-NEXT: .reg .b32 %r<2>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.s8 %rs1, [icmp_sgt_i8_param_0]; +; CHECK-NEXT: ld.param.s8 %rs2, [icmp_sgt_i8_param_1]; +; CHECK-NEXT: setp.gt.s16 %p1, %rs1, %rs2; +; CHECK-NEXT: selp.b32 %r1, 1, 0, %p1; +; CHECK-NEXT: st.param.b32 [func_retval0], %r1; +; CHECK-NEXT: ret; %cmp = icmp sgt i8 %a, %b %ret = zext i1 %cmp to i8 ret i8 %ret @@ -360,9 +711,19 @@ define i8 @icmp_sgt_i8(i8 %a, i8 %b) { define i8 @icmp_sge_i8(i8 %a, i8 %b) { ; Comparison happens in 16-bit -; CHECK: setp.ge.s16 %p[[P0:[0-9]+]], %rs{{[0-9]+}}, %rs{{[0-9]+}} -; CHECK: selp.b32 %r{{[0-9]+}}, 1, 0, %p[[P0]] -; CHECK: ret +; CHECK-LABEL: icmp_sge_i8( +; CHECK: { +; CHECK-NEXT: .reg .pred %p<2>; +; CHECK-NEXT: .reg .b16 %rs<3>; +; CHECK-NEXT: .reg .b32 %r<2>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.s8 %rs1, [icmp_sge_i8_param_0]; +; CHECK-NEXT: ld.param.s8 %rs2, [icmp_sge_i8_param_1]; +; CHECK-NEXT: setp.ge.s16 %p1, %rs1, %rs2; +; CHECK-NEXT: selp.b32 %r1, 1, 0, %p1; +; CHECK-NEXT: st.param.b32 [func_retval0], %r1; +; CHECK-NEXT: ret; %cmp = icmp sge i8 %a, %b %ret = zext i1 %cmp to i8 ret i8 %ret @@ -370,9 +731,19 @@ define i8 @icmp_sge_i8(i8 %a, i8 %b) { define i8 @icmp_slt_i8(i8 %a, i8 %b) { ; Comparison happens in 16-bit -; CHECK: setp.lt.s16 %p[[P0:[0-9]+]], %rs{{[0-9]+}}, %rs{{[0-9]+}} -; CHECK: selp.b32 %r{{[0-9]+}}, 1, 0, %p[[P0]] -; CHECK: ret +; CHECK-LABEL: icmp_slt_i8( +; CHECK: { +; CHECK-NEXT: .reg .pred %p<2>; +; CHECK-NEXT: .reg .b16 %rs<3>; +; CHECK-NEXT: .reg .b32 %r<2>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.s8 %rs1, [icmp_slt_i8_param_0]; +; CHECK-NEXT: ld.param.s8 %rs2, [icmp_slt_i8_param_1]; +; CHECK-NEXT: setp.lt.s16 %p1, %rs1, %rs2; +; CHECK-NEXT: selp.b32 %r1, 1, 0, %p1; +; CHECK-NEXT: st.param.b32 [func_retval0], %r1; +; CHECK-NEXT: ret; %cmp = icmp slt i8 %a, %b %ret = zext i1 %cmp to i8 ret i8 %ret @@ -380,9 +751,19 @@ define i8 @icmp_slt_i8(i8 %a, i8 %b) { define i8 @icmp_sle_i8(i8 %a, i8 %b) { ; Comparison happens in 16-bit -; CHECK: setp.le.s16 %p[[P0:[0-9]+]], %rs{{[0-9]+}}, %rs{{[0-9]+}} -; CHECK: selp.b32 %r{{[0-9]+}}, 1, 0, %p[[P0]] -; CHECK: ret +; CHECK-LABEL: icmp_sle_i8( +; CHECK: { +; CHECK-NEXT: .reg .pred %p<2>; +; CHECK-NEXT: .reg .b16 %rs<3>; +; CHECK-NEXT: .reg .b32 %r<2>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.s8 %rs1, [icmp_sle_i8_param_0]; +; CHECK-NEXT: ld.param.s8 %rs2, [icmp_sle_i8_param_1]; +; CHECK-NEXT: setp.le.s16 %p1, %rs1, %rs2; +; CHECK-NEXT: selp.b32 %r1, 1, 0, %p1; +; CHECK-NEXT: st.param.b32 [func_retval0], %r1; +; CHECK-NEXT: ret; %cmp = icmp sle i8 %a, %b %ret = zext i1 %cmp to i8 ret i8 %ret diff --git a/llvm/test/CodeGen/NVPTX/convert-call-to-indirect.ll b/llvm/test/CodeGen/NVPTX/convert-call-to-indirect.ll index d1b478d..48209a8 100644 --- a/llvm/test/CodeGen/NVPTX/convert-call-to-indirect.ll +++ b/llvm/test/CodeGen/NVPTX/convert-call-to-indirect.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 ; RUN: llc < %s -march=nvptx64 -mcpu=sm_90 | FileCheck %s ; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_90 | %ptxas-verify -arch=sm_90 %} @@ -7,52 +8,203 @@ declare i64 @callee_variadic(ptr %p, ...); define %struct.64 @test_return_type_mismatch(ptr %p) { ; CHECK-LABEL: test_return_type_mismatch( -; CHECK: .param .align 1 .b8 retval0[8]; +; CHECK: { +; CHECK-NEXT: .reg .b64 %rd<40>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.b64 %rd2, [test_return_type_mismatch_param_0]; +; CHECK-NEXT: { // callseq 0, 0 +; CHECK-NEXT: .param .b64 param0; +; CHECK-NEXT: .param .align 1 .b8 retval0[8]; +; CHECK-NEXT: st.param.b64 [param0], %rd2; ; CHECK-NEXT: prototype_0 : .callprototype (.param .align 1 .b8 _[8]) _ (.param .b64 _); -; CHECK-NEXT: call (retval0), %rd{{[0-9]+}}, (param0), prototype_0; +; CHECK-NEXT: mov.b64 %rd1, callee; +; CHECK-NEXT: call (retval0), %rd1, (param0), prototype_0; +; CHECK-NEXT: ld.param.b8 %rd3, [retval0+7]; +; CHECK-NEXT: ld.param.b8 %rd4, [retval0+6]; +; CHECK-NEXT: ld.param.b8 %rd5, [retval0+5]; +; CHECK-NEXT: ld.param.b8 %rd6, [retval0+4]; +; CHECK-NEXT: ld.param.b8 %rd7, [retval0+3]; +; CHECK-NEXT: ld.param.b8 %rd8, [retval0+2]; +; CHECK-NEXT: ld.param.b8 %rd9, [retval0+1]; +; CHECK-NEXT: ld.param.b8 %rd10, [retval0]; +; CHECK-NEXT: } // callseq 0 +; CHECK-NEXT: shl.b64 %rd13, %rd9, 8; +; CHECK-NEXT: or.b64 %rd14, %rd13, %rd10; +; CHECK-NEXT: shl.b64 %rd16, %rd8, 16; +; CHECK-NEXT: shl.b64 %rd18, %rd7, 24; +; CHECK-NEXT: or.b64 %rd19, %rd18, %rd16; +; CHECK-NEXT: or.b64 %rd20, %rd19, %rd14; +; CHECK-NEXT: shl.b64 %rd23, %rd5, 8; +; CHECK-NEXT: or.b64 %rd24, %rd23, %rd6; +; CHECK-NEXT: shl.b64 %rd26, %rd4, 16; +; CHECK-NEXT: shl.b64 %rd28, %rd3, 24; +; CHECK-NEXT: or.b64 %rd29, %rd28, %rd26; +; CHECK-NEXT: or.b64 %rd30, %rd29, %rd24; +; CHECK-NEXT: shl.b64 %rd31, %rd30, 32; +; CHECK-NEXT: or.b64 %rd32, %rd31, %rd20; +; CHECK-NEXT: st.param.b8 [func_retval0], %rd10; +; CHECK-NEXT: shr.u64 %rd33, %rd32, 56; +; CHECK-NEXT: st.param.b8 [func_retval0+7], %rd33; +; CHECK-NEXT: shr.u64 %rd34, %rd32, 48; +; CHECK-NEXT: st.param.b8 [func_retval0+6], %rd34; +; CHECK-NEXT: shr.u64 %rd35, %rd32, 40; +; CHECK-NEXT: st.param.b8 [func_retval0+5], %rd35; +; CHECK-NEXT: shr.u64 %rd36, %rd32, 32; +; CHECK-NEXT: st.param.b8 [func_retval0+4], %rd36; +; CHECK-NEXT: shr.u64 %rd37, %rd32, 24; +; CHECK-NEXT: st.param.b8 [func_retval0+3], %rd37; +; CHECK-NEXT: shr.u64 %rd38, %rd32, 16; +; CHECK-NEXT: st.param.b8 [func_retval0+2], %rd38; +; CHECK-NEXT: shr.u64 %rd39, %rd32, 8; +; CHECK-NEXT: st.param.b8 [func_retval0+1], %rd39; +; CHECK-NEXT: ret; %ret = call %struct.64 @callee(ptr %p) ret %struct.64 %ret } define i64 @test_param_type_mismatch(ptr %p) { ; CHECK-LABEL: test_param_type_mismatch( -; CHECK: .param .b64 retval0; +; CHECK: { +; CHECK-NEXT: .reg .b64 %rd<4>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: { // callseq 1, 0 +; CHECK-NEXT: .param .b64 param0; +; CHECK-NEXT: .param .b64 retval0; ; CHECK-NEXT: prototype_1 : .callprototype (.param .b64 _) _ (.param .b64 _); -; CHECK-NEXT: call (retval0), %rd{{[0-9]+}}, (param0), prototype_1; +; CHECK-NEXT: st.param.b64 [param0], 7; +; CHECK-NEXT: mov.b64 %rd1, callee; +; CHECK-NEXT: call (retval0), %rd1, (param0), prototype_1; +; CHECK-NEXT: ld.param.b64 %rd2, [retval0]; +; CHECK-NEXT: } // callseq 1 +; CHECK-NEXT: st.param.b64 [func_retval0], %rd2; +; CHECK-NEXT: ret; %ret = call i64 @callee(i64 7) ret i64 %ret } define i64 @test_param_count_mismatch(ptr %p) { ; CHECK-LABEL: test_param_count_mismatch( -; CHECK: .param .b64 retval0; +; CHECK: { +; CHECK-NEXT: .reg .b64 %rd<5>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.b64 %rd2, [test_param_count_mismatch_param_0]; +; CHECK-NEXT: { // callseq 2, 0 +; CHECK-NEXT: .param .b64 param0; +; CHECK-NEXT: .param .b64 param1; +; CHECK-NEXT: .param .b64 retval0; +; CHECK-NEXT: st.param.b64 [param0], %rd2; ; CHECK-NEXT: prototype_2 : .callprototype (.param .b64 _) _ (.param .b64 _, .param .b64 _); -; CHECK-NEXT: call (retval0), %rd{{[0-9]+}}, (param0, param1), prototype_2; +; CHECK-NEXT: st.param.b64 [param1], 7; +; CHECK-NEXT: mov.b64 %rd1, callee; +; CHECK-NEXT: call (retval0), %rd1, (param0, param1), prototype_2; +; CHECK-NEXT: ld.param.b64 %rd3, [retval0]; +; CHECK-NEXT: } // callseq 2 +; CHECK-NEXT: st.param.b64 [func_retval0], %rd3; +; CHECK-NEXT: ret; %ret = call i64 @callee(ptr %p, i64 7) ret i64 %ret } define %struct.64 @test_return_type_mismatch_variadic(ptr %p) { ; CHECK-LABEL: test_return_type_mismatch_variadic( -; CHECK: .param .align 1 .b8 retval0[8]; +; CHECK: { +; CHECK-NEXT: .reg .b64 %rd<40>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.b64 %rd2, [test_return_type_mismatch_variadic_param_0]; +; CHECK-NEXT: { // callseq 3, 0 +; CHECK-NEXT: .param .b64 param0; +; CHECK-NEXT: .param .align 1 .b8 retval0[8]; +; CHECK-NEXT: st.param.b64 [param0], %rd2; ; CHECK-NEXT: prototype_3 : .callprototype (.param .align 1 .b8 _[8]) _ (.param .b64 _); -; CHECK-NEXT: call (retval0), %rd{{[0-9]+}}, (param0), prototype_3; +; CHECK-NEXT: mov.b64 %rd1, callee_variadic; +; CHECK-NEXT: call (retval0), %rd1, (param0), prototype_3; +; CHECK-NEXT: ld.param.b8 %rd3, [retval0+7]; +; CHECK-NEXT: ld.param.b8 %rd4, [retval0+6]; +; CHECK-NEXT: ld.param.b8 %rd5, [retval0+5]; +; CHECK-NEXT: ld.param.b8 %rd6, [retval0+4]; +; CHECK-NEXT: ld.param.b8 %rd7, [retval0+3]; +; CHECK-NEXT: ld.param.b8 %rd8, [retval0+2]; +; CHECK-NEXT: ld.param.b8 %rd9, [retval0+1]; +; CHECK-NEXT: ld.param.b8 %rd10, [retval0]; +; CHECK-NEXT: } // callseq 3 +; CHECK-NEXT: shl.b64 %rd13, %rd9, 8; +; CHECK-NEXT: or.b64 %rd14, %rd13, %rd10; +; CHECK-NEXT: shl.b64 %rd16, %rd8, 16; +; CHECK-NEXT: shl.b64 %rd18, %rd7, 24; +; CHECK-NEXT: or.b64 %rd19, %rd18, %rd16; +; CHECK-NEXT: or.b64 %rd20, %rd19, %rd14; +; CHECK-NEXT: shl.b64 %rd23, %rd5, 8; +; CHECK-NEXT: or.b64 %rd24, %rd23, %rd6; +; CHECK-NEXT: shl.b64 %rd26, %rd4, 16; +; CHECK-NEXT: shl.b64 %rd28, %rd3, 24; +; CHECK-NEXT: or.b64 %rd29, %rd28, %rd26; +; CHECK-NEXT: or.b64 %rd30, %rd29, %rd24; +; CHECK-NEXT: shl.b64 %rd31, %rd30, 32; +; CHECK-NEXT: or.b64 %rd32, %rd31, %rd20; +; CHECK-NEXT: st.param.b8 [func_retval0], %rd10; +; CHECK-NEXT: shr.u64 %rd33, %rd32, 56; +; CHECK-NEXT: st.param.b8 [func_retval0+7], %rd33; +; CHECK-NEXT: shr.u64 %rd34, %rd32, 48; +; CHECK-NEXT: st.param.b8 [func_retval0+6], %rd34; +; CHECK-NEXT: shr.u64 %rd35, %rd32, 40; +; CHECK-NEXT: st.param.b8 [func_retval0+5], %rd35; +; CHECK-NEXT: shr.u64 %rd36, %rd32, 32; +; CHECK-NEXT: st.param.b8 [func_retval0+4], %rd36; +; CHECK-NEXT: shr.u64 %rd37, %rd32, 24; +; CHECK-NEXT: st.param.b8 [func_retval0+3], %rd37; +; CHECK-NEXT: shr.u64 %rd38, %rd32, 16; +; CHECK-NEXT: st.param.b8 [func_retval0+2], %rd38; +; CHECK-NEXT: shr.u64 %rd39, %rd32, 8; +; CHECK-NEXT: st.param.b8 [func_retval0+1], %rd39; +; CHECK-NEXT: ret; %ret = call %struct.64 (ptr, ...) @callee_variadic(ptr %p) ret %struct.64 %ret } define i64 @test_param_type_mismatch_variadic(ptr %p) { ; CHECK-LABEL: test_param_type_mismatch_variadic( -; CHECK: .param .b64 retval0; +; CHECK: { +; CHECK-NEXT: .reg .b64 %rd<4>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.b64 %rd1, [test_param_type_mismatch_variadic_param_0]; +; CHECK-NEXT: { // callseq 4, 0 +; CHECK-NEXT: .param .align 8 .b8 param1[8]; +; CHECK-NEXT: .param .b64 param0; +; CHECK-NEXT: .param .b64 retval0; +; CHECK-NEXT: st.param.b64 [param0], %rd1; +; CHECK-NEXT: st.param.b64 [param1], 7; ; CHECK-NEXT: call.uni (retval0), callee_variadic, (param0, param1); +; CHECK-NEXT: ld.param.b64 %rd2, [retval0]; +; CHECK-NEXT: } // callseq 4 +; CHECK-NEXT: st.param.b64 [func_retval0], %rd2; +; CHECK-NEXT: ret; %ret = call i64 (ptr, ...) @callee_variadic(ptr %p, i64 7) ret i64 %ret } define i64 @test_param_count_mismatch_variadic(ptr %p) { ; CHECK-LABEL: test_param_count_mismatch_variadic( -; CHECK: .param .b64 retval0; +; CHECK: { +; CHECK-NEXT: .reg .b64 %rd<4>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.b64 %rd1, [test_param_count_mismatch_variadic_param_0]; +; CHECK-NEXT: { // callseq 5, 0 +; CHECK-NEXT: .param .align 8 .b8 param1[8]; +; CHECK-NEXT: .param .b64 param0; +; CHECK-NEXT: .param .b64 retval0; +; CHECK-NEXT: st.param.b64 [param0], %rd1; +; CHECK-NEXT: st.param.b64 [param1], 7; ; CHECK-NEXT: call.uni (retval0), callee_variadic, (param0, param1); +; CHECK-NEXT: ld.param.b64 %rd2, [retval0]; +; CHECK-NEXT: } // callseq 5 +; CHECK-NEXT: st.param.b64 [func_retval0], %rd2; +; CHECK-NEXT: ret; %ret = call i64 (ptr, ...) @callee_variadic(ptr %p, i64 7) ret i64 %ret } diff --git a/llvm/test/CodeGen/NVPTX/dynamic_stackalloc.ll b/llvm/test/CodeGen/NVPTX/dynamic_stackalloc.ll index 4d2ba7d..06fb8d2 100644 --- a/llvm/test/CodeGen/NVPTX/dynamic_stackalloc.ll +++ b/llvm/test/CodeGen/NVPTX/dynamic_stackalloc.ll @@ -22,8 +22,8 @@ define i32 @test_dynamic_stackalloc(i64 %n) { ; CHECK-32-NEXT: cvta.local.u32 %r5, %r4; ; CHECK-32-NEXT: { // callseq 0, 0 ; CHECK-32-NEXT: .param .b32 param0; -; CHECK-32-NEXT: st.param.b32 [param0], %r5; ; CHECK-32-NEXT: .param .b32 retval0; +; CHECK-32-NEXT: st.param.b32 [param0], %r5; ; CHECK-32-NEXT: call.uni (retval0), bar, (param0); ; CHECK-32-NEXT: ld.param.b32 %r6, [retval0]; ; CHECK-32-NEXT: } // callseq 0 @@ -43,8 +43,8 @@ define i32 @test_dynamic_stackalloc(i64 %n) { ; CHECK-64-NEXT: cvta.local.u64 %rd5, %rd4; ; CHECK-64-NEXT: { // callseq 0, 0 ; CHECK-64-NEXT: .param .b64 param0; -; CHECK-64-NEXT: st.param.b64 [param0], %rd5; ; CHECK-64-NEXT: .param .b32 retval0; +; CHECK-64-NEXT: st.param.b64 [param0], %rd5; ; CHECK-64-NEXT: call.uni (retval0), bar, (param0); ; CHECK-64-NEXT: ld.param.b32 %r1, [retval0]; ; CHECK-64-NEXT: } // callseq 0 diff --git a/llvm/test/CodeGen/NVPTX/f16x2-instructions.ll b/llvm/test/CodeGen/NVPTX/f16x2-instructions.ll index 8918fbd..d4fcea3 100644 --- a/llvm/test/CodeGen/NVPTX/f16x2-instructions.ll +++ b/llvm/test/CodeGen/NVPTX/f16x2-instructions.ll @@ -462,10 +462,10 @@ define <2 x half> @test_call(<2 x half> %a, <2 x half> %b) #0 { ; CHECK-NEXT: ld.param.b32 %r1, [test_call_param_0]; ; CHECK-NEXT: { // callseq 0, 0 ; CHECK-NEXT: .param .align 4 .b8 param0[4]; -; CHECK-NEXT: st.param.b32 [param0], %r1; ; CHECK-NEXT: .param .align 4 .b8 param1[4]; -; CHECK-NEXT: st.param.b32 [param1], %r2; ; CHECK-NEXT: .param .align 4 .b8 retval0[4]; +; CHECK-NEXT: st.param.b32 [param1], %r2; +; CHECK-NEXT: st.param.b32 [param0], %r1; ; CHECK-NEXT: call.uni (retval0), test_callee, (param0, param1); ; CHECK-NEXT: ld.param.b32 %r3, [retval0]; ; CHECK-NEXT: } // callseq 0 @@ -485,10 +485,10 @@ define <2 x half> @test_call_flipped(<2 x half> %a, <2 x half> %b) #0 { ; CHECK-NEXT: ld.param.b32 %r1, [test_call_flipped_param_0]; ; CHECK-NEXT: { // callseq 1, 0 ; CHECK-NEXT: .param .align 4 .b8 param0[4]; -; CHECK-NEXT: st.param.b32 [param0], %r2; ; CHECK-NEXT: .param .align 4 .b8 param1[4]; -; CHECK-NEXT: st.param.b32 [param1], %r1; ; CHECK-NEXT: .param .align 4 .b8 retval0[4]; +; CHECK-NEXT: st.param.b32 [param1], %r1; +; CHECK-NEXT: st.param.b32 [param0], %r2; ; CHECK-NEXT: call.uni (retval0), test_callee, (param0, param1); ; CHECK-NEXT: ld.param.b32 %r3, [retval0]; ; CHECK-NEXT: } // callseq 1 @@ -508,10 +508,10 @@ define <2 x half> @test_tailcall_flipped(<2 x half> %a, <2 x half> %b) #0 { ; CHECK-NEXT: ld.param.b32 %r1, [test_tailcall_flipped_param_0]; ; CHECK-NEXT: { // callseq 2, 0 ; CHECK-NEXT: .param .align 4 .b8 param0[4]; -; CHECK-NEXT: st.param.b32 [param0], %r2; ; CHECK-NEXT: .param .align 4 .b8 param1[4]; -; CHECK-NEXT: st.param.b32 [param1], %r1; ; CHECK-NEXT: .param .align 4 .b8 retval0[4]; +; CHECK-NEXT: st.param.b32 [param1], %r1; +; CHECK-NEXT: st.param.b32 [param0], %r2; ; CHECK-NEXT: call.uni (retval0), test_callee, (param0, param1); ; CHECK-NEXT: ld.param.b32 %r3, [retval0]; ; CHECK-NEXT: } // callseq 2 diff --git a/llvm/test/CodeGen/NVPTX/f32x2-instructions.ll b/llvm/test/CodeGen/NVPTX/f32x2-instructions.ll index 30afd69..b84a0ec 100644 --- a/llvm/test/CodeGen/NVPTX/f32x2-instructions.ll +++ b/llvm/test/CodeGen/NVPTX/f32x2-instructions.ll @@ -859,10 +859,10 @@ define <2 x float> @test_call(<2 x float> %a, <2 x float> %b) #0 { ; CHECK-NEXT: ld.param.b64 %rd1, [test_call_param_0]; ; CHECK-NEXT: { // callseq 0, 0 ; CHECK-NEXT: .param .align 8 .b8 param0[8]; -; CHECK-NEXT: st.param.b64 [param0], %rd1; ; CHECK-NEXT: .param .align 8 .b8 param1[8]; -; CHECK-NEXT: st.param.b64 [param1], %rd2; ; CHECK-NEXT: .param .align 8 .b8 retval0[8]; +; CHECK-NEXT: st.param.b64 [param1], %rd2; +; CHECK-NEXT: st.param.b64 [param0], %rd1; ; CHECK-NEXT: call.uni (retval0), test_callee, (param0, param1); ; CHECK-NEXT: ld.param.b64 %rd3, [retval0]; ; CHECK-NEXT: } // callseq 0 @@ -882,10 +882,10 @@ define <2 x float> @test_call_flipped(<2 x float> %a, <2 x float> %b) #0 { ; CHECK-NEXT: ld.param.b64 %rd1, [test_call_flipped_param_0]; ; CHECK-NEXT: { // callseq 1, 0 ; CHECK-NEXT: .param .align 8 .b8 param0[8]; -; CHECK-NEXT: st.param.b64 [param0], %rd2; ; CHECK-NEXT: .param .align 8 .b8 param1[8]; -; CHECK-NEXT: st.param.b64 [param1], %rd1; ; CHECK-NEXT: .param .align 8 .b8 retval0[8]; +; CHECK-NEXT: st.param.b64 [param1], %rd1; +; CHECK-NEXT: st.param.b64 [param0], %rd2; ; CHECK-NEXT: call.uni (retval0), test_callee, (param0, param1); ; CHECK-NEXT: ld.param.b64 %rd3, [retval0]; ; CHECK-NEXT: } // callseq 1 @@ -905,10 +905,10 @@ define <2 x float> @test_tailcall_flipped(<2 x float> %a, <2 x float> %b) #0 { ; CHECK-NEXT: ld.param.b64 %rd1, [test_tailcall_flipped_param_0]; ; CHECK-NEXT: { // callseq 2, 0 ; CHECK-NEXT: .param .align 8 .b8 param0[8]; -; CHECK-NEXT: st.param.b64 [param0], %rd2; ; CHECK-NEXT: .param .align 8 .b8 param1[8]; -; CHECK-NEXT: st.param.b64 [param1], %rd1; ; CHECK-NEXT: .param .align 8 .b8 retval0[8]; +; CHECK-NEXT: st.param.b64 [param1], %rd1; +; CHECK-NEXT: st.param.b64 [param0], %rd2; ; CHECK-NEXT: call.uni (retval0), test_callee, (param0, param1); ; CHECK-NEXT: ld.param.b64 %rd3, [retval0]; ; CHECK-NEXT: } // callseq 2 diff --git a/llvm/test/CodeGen/NVPTX/fma.ll b/llvm/test/CodeGen/NVPTX/fma.ll index 5aa12b0..87274aa 100644 --- a/llvm/test/CodeGen/NVPTX/fma.ll +++ b/llvm/test/CodeGen/NVPTX/fma.ll @@ -36,10 +36,10 @@ define ptx_device float @t2_f32(float %x, float %y, float %z, float %w) { ; CHECK-NEXT: fma.rn.f32 %r6, %r1, %r2, %r5; ; CHECK-NEXT: { // callseq 0, 0 ; CHECK-NEXT: .param .b32 param0; -; CHECK-NEXT: st.param.b32 [param0], %r4; ; CHECK-NEXT: .param .b32 param1; -; CHECK-NEXT: st.param.b32 [param1], %r6; ; CHECK-NEXT: .param .b32 retval0; +; CHECK-NEXT: st.param.b32 [param1], %r6; +; CHECK-NEXT: st.param.b32 [param0], %r4; ; CHECK-NEXT: call.uni (retval0), dummy_f32, (param0, param1); ; CHECK-NEXT: ld.param.b32 %r7, [retval0]; ; CHECK-NEXT: } // callseq 0 @@ -83,10 +83,10 @@ define ptx_device double @t2_f64(double %x, double %y, double %z, double %w) { ; CHECK-NEXT: fma.rn.f64 %rd6, %rd1, %rd2, %rd5; ; CHECK-NEXT: { // callseq 1, 0 ; CHECK-NEXT: .param .b64 param0; -; CHECK-NEXT: st.param.b64 [param0], %rd4; ; CHECK-NEXT: .param .b64 param1; -; CHECK-NEXT: st.param.b64 [param1], %rd6; ; CHECK-NEXT: .param .b64 retval0; +; CHECK-NEXT: st.param.b64 [param1], %rd6; +; CHECK-NEXT: st.param.b64 [param0], %rd4; ; CHECK-NEXT: call.uni (retval0), dummy_f64, (param0, param1); ; CHECK-NEXT: ld.param.b64 %rd7, [retval0]; ; CHECK-NEXT: } // callseq 1 diff --git a/llvm/test/CodeGen/NVPTX/forward-ld-param.ll b/llvm/test/CodeGen/NVPTX/forward-ld-param.ll index ed8f6b4..636e12b 100644 --- a/llvm/test/CodeGen/NVPTX/forward-ld-param.ll +++ b/llvm/test/CodeGen/NVPTX/forward-ld-param.ll @@ -64,9 +64,9 @@ define void @test_ld_param_byval(ptr byval(i32) %a) { ; CHECK-NEXT: .reg .b64 %rd<2>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: -; CHECK-NEXT: ld.param.b32 %r1, [test_ld_param_byval_param_0]; ; CHECK-NEXT: { // callseq 1, 0 ; CHECK-NEXT: .param .align 4 .b8 param0[4]; +; CHECK-NEXT: ld.param.b32 %r1, [test_ld_param_byval_param_0]; ; CHECK-NEXT: st.param.b32 [param0], %r1; ; CHECK-NEXT: call.uni byval_user, (param0); ; CHECK-NEXT: } // callseq 1 diff --git a/llvm/test/CodeGen/NVPTX/i128-param.ll b/llvm/test/CodeGen/NVPTX/i128-param.ll index 4f4c2fe..79abca0 100644 --- a/llvm/test/CodeGen/NVPTX/i128-param.ll +++ b/llvm/test/CodeGen/NVPTX/i128-param.ll @@ -29,11 +29,11 @@ start: ; CHECK-DAG: ld.param.v2.b64 {%[[REG2:rd[0-9]+]], %[[REG3:rd[0-9]+]]}, [caller_kernel_param_1]; ; CHECK: { // callseq [[CALLSEQ_ID:[0-9]]], 0 - ; CHECK: .param .align 16 .b8 param0[16]; - ; CHECK-NEXT: st.param.v2.b64 [param0], {%[[REG0]], %[[REG1]]} - ; CHECK: .param .align 16 .b8 param1[16]; - ; CHECK-NEXT: st.param.v2.b64 [param1], {%[[REG2]], %[[REG3]]} - ; CHECK: } // callseq [[CALLSEQ_ID]] + ; CHECK-DAG: .param .align 16 .b8 param0[16]; + ; CHECK-DAG: .param .align 16 .b8 param1[16]; + ; CHECK-DAG: st.param.v2.b64 [param0], {%[[REG0]], %[[REG1]]} + ; CHECK-DAG: st.param.v2.b64 [param1], {%[[REG2]], %[[REG3]]} + ; CHECK: } // callseq [[CALLSEQ_ID]] call void @callee(i128 %0, i128 %1, ptr %2) ret void @@ -48,11 +48,11 @@ start: ; CHECK-DAG: ld.param.v2.b64 {%[[REG2:rd[0-9]+]], %[[REG3:rd[0-9]+]]}, [caller_func_param_1] ; CHECK: { // callseq [[CALLSEQ_ID:[0-9]]], 0 - ; CHECK: .param .align 16 .b8 param0[16]; - ; CHECK: st.param.v2.b64 [param0], {%[[REG0]], %[[REG1]]} - ; CHECK: .param .align 16 .b8 param1[16]; - ; CHECK: st.param.v2.b64 [param1], {%[[REG2]], %[[REG3]]} - ; CHECK: } // callseq [[CALLSEQ_ID]] + ; CHECK-DAG: .param .align 16 .b8 param0[16]; + ; CHECK-DAG: .param .align 16 .b8 param1[16]; + ; CHECK-DAG: st.param.v2.b64 [param0], {%[[REG0]], %[[REG1]]} + ; CHECK-DAG: st.param.v2.b64 [param1], {%[[REG2]], %[[REG3]]} + ; CHECK: } // callseq [[CALLSEQ_ID]] call void @callee(i128 %0, i128 %1, ptr %2) ret void diff --git a/llvm/test/CodeGen/NVPTX/i16x2-instructions.ll b/llvm/test/CodeGen/NVPTX/i16x2-instructions.ll index 2b7a06c..74136bb 100644 --- a/llvm/test/CodeGen/NVPTX/i16x2-instructions.ll +++ b/llvm/test/CodeGen/NVPTX/i16x2-instructions.ll @@ -642,10 +642,10 @@ define <2 x i16> @test_call(<2 x i16> %a, <2 x i16> %b) #0 { ; COMMON-NEXT: ld.param.b32 %r1, [test_call_param_0]; ; COMMON-NEXT: { // callseq 0, 0 ; COMMON-NEXT: .param .align 4 .b8 param0[4]; -; COMMON-NEXT: st.param.b32 [param0], %r1; ; COMMON-NEXT: .param .align 4 .b8 param1[4]; -; COMMON-NEXT: st.param.b32 [param1], %r2; ; COMMON-NEXT: .param .align 4 .b8 retval0[4]; +; COMMON-NEXT: st.param.b32 [param1], %r2; +; COMMON-NEXT: st.param.b32 [param0], %r1; ; COMMON-NEXT: call.uni (retval0), test_callee, (param0, param1); ; COMMON-NEXT: ld.param.b32 %r3, [retval0]; ; COMMON-NEXT: } // callseq 0 @@ -665,10 +665,10 @@ define <2 x i16> @test_call_flipped(<2 x i16> %a, <2 x i16> %b) #0 { ; COMMON-NEXT: ld.param.b32 %r1, [test_call_flipped_param_0]; ; COMMON-NEXT: { // callseq 1, 0 ; COMMON-NEXT: .param .align 4 .b8 param0[4]; -; COMMON-NEXT: st.param.b32 [param0], %r2; ; COMMON-NEXT: .param .align 4 .b8 param1[4]; -; COMMON-NEXT: st.param.b32 [param1], %r1; ; COMMON-NEXT: .param .align 4 .b8 retval0[4]; +; COMMON-NEXT: st.param.b32 [param1], %r1; +; COMMON-NEXT: st.param.b32 [param0], %r2; ; COMMON-NEXT: call.uni (retval0), test_callee, (param0, param1); ; COMMON-NEXT: ld.param.b32 %r3, [retval0]; ; COMMON-NEXT: } // callseq 1 @@ -688,10 +688,10 @@ define <2 x i16> @test_tailcall_flipped(<2 x i16> %a, <2 x i16> %b) #0 { ; COMMON-NEXT: ld.param.b32 %r1, [test_tailcall_flipped_param_0]; ; COMMON-NEXT: { // callseq 2, 0 ; COMMON-NEXT: .param .align 4 .b8 param0[4]; -; COMMON-NEXT: st.param.b32 [param0], %r2; ; COMMON-NEXT: .param .align 4 .b8 param1[4]; -; COMMON-NEXT: st.param.b32 [param1], %r1; ; COMMON-NEXT: .param .align 4 .b8 retval0[4]; +; COMMON-NEXT: st.param.b32 [param1], %r1; +; COMMON-NEXT: st.param.b32 [param0], %r2; ; COMMON-NEXT: call.uni (retval0), test_callee, (param0, param1); ; COMMON-NEXT: ld.param.b32 %r3, [retval0]; ; COMMON-NEXT: } // callseq 2 diff --git a/llvm/test/CodeGen/NVPTX/i8x2-instructions.ll b/llvm/test/CodeGen/NVPTX/i8x2-instructions.ll index 3edd4e4..98f94bb 100644 --- a/llvm/test/CodeGen/NVPTX/i8x2-instructions.ll +++ b/llvm/test/CodeGen/NVPTX/i8x2-instructions.ll @@ -1,42 +1,107 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 -; RUN: llc < %s -mtriple=nvptx64-nvidia-cuda -mcpu=sm_90 -mattr=+ptx80 \ -; RUN: -O0 -disable-post-ra -frame-pointer=all -verify-machineinstrs \ -; RUN: | FileCheck %s -; RUN: %if ptxas %{ \ -; RUN: llc < %s -mtriple=nvptx64-nvidia-cuda -mcpu=sm_90 -asm-verbose=false \ -; RUN: -O0 -disable-post-ra -frame-pointer=all -verify-machineinstrs \ -; RUN: | %ptxas-verify -arch=sm_90 \ +; RUN: llc < %s -mcpu=sm_90 -mattr=+ptx80 -disable-post-ra -frame-pointer=all \ +; RUN: -verify-machineinstrs -O0 | FileCheck %s --check-prefixes=O0,COMMON +; RUN: llc < %s -mcpu=sm_90 -mattr=+ptx80 -disable-post-ra -frame-pointer=all \ +; RUN: -verify-machineinstrs | FileCheck %s --check-prefixes=O3,COMMON +; RUN: %if ptxas %{ \ +; RUN: llc < %s -mcpu=sm_90 -mattr=+ptx80 -disable-post-ra -frame-pointer=all \ +; RUN: -verify-machineinstrs -O0 \ +; RUN: | %ptxas-verify -arch=sm_90 \ +; RUN: %} +; RUN: %if ptxas %{ \ +; RUN: llc < %s -mcpu=sm_90 -mattr=+ptx80 -disable-post-ra -frame-pointer=all \ +; RUN: -verify-machineinstrs \ +; RUN: | %ptxas-verify -arch=sm_90 \ ; RUN: %} +target triple = "nvptx64-nvidia-cuda" target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" define i16 @test_bitcast_2xi8_i16(<2 x i8> %a) { -; CHECK-LABEL: test_bitcast_2xi8_i16( -; CHECK: { -; CHECK-NEXT: .reg .b16 %rs<5>; -; CHECK-NEXT: .reg .b32 %r<3>; -; CHECK-EMPTY: -; CHECK-NEXT: // %bb.0: -; CHECK-NEXT: ld.param.v2.b8 {%rs1, %rs2}, [test_bitcast_2xi8_i16_param_0]; -; CHECK-NEXT: mov.b32 %r1, {%rs1, %rs2}; -; CHECK-NEXT: shl.b16 %rs3, %rs2, 8; -; CHECK-NEXT: or.b16 %rs4, %rs1, %rs3; -; CHECK-NEXT: cvt.u32.u16 %r2, %rs4; -; CHECK-NEXT: st.param.b32 [func_retval0], %r2; -; CHECK-NEXT: ret; +; O0-LABEL: test_bitcast_2xi8_i16( +; O0: { +; O0-NEXT: .reg .b16 %rs<5>; +; O0-NEXT: .reg .b32 %r<3>; +; O0-EMPTY: +; O0-NEXT: // %bb.0: +; O0-NEXT: ld.param.v2.b8 {%rs1, %rs2}, [test_bitcast_2xi8_i16_param_0]; +; O0-NEXT: mov.b32 %r1, {%rs1, %rs2}; +; O0-NEXT: shl.b16 %rs3, %rs2, 8; +; O0-NEXT: or.b16 %rs4, %rs1, %rs3; +; O0-NEXT: cvt.u32.u16 %r2, %rs4; +; O0-NEXT: st.param.b32 [func_retval0], %r2; +; O0-NEXT: ret; +; +; O3-LABEL: test_bitcast_2xi8_i16( +; O3: { +; O3-NEXT: .reg .b32 %r<2>; +; O3-EMPTY: +; O3-NEXT: // %bb.0: +; O3-NEXT: ld.param.b16 %r1, [test_bitcast_2xi8_i16_param_0]; +; O3-NEXT: st.param.b32 [func_retval0], %r1; +; O3-NEXT: ret; %res = bitcast <2 x i8> %a to i16 ret i16 %res } define <2 x i8> @test_bitcast_i16_2xi8(i16 %a) { -; CHECK-LABEL: test_bitcast_i16_2xi8( -; CHECK: { -; CHECK-NEXT: .reg .b16 %rs<2>; -; CHECK-EMPTY: -; CHECK-NEXT: // %bb.0: -; CHECK-NEXT: ld.param.b16 %rs1, [test_bitcast_i16_2xi8_param_0]; -; CHECK-NEXT: st.param.b16 [func_retval0], %rs1; -; CHECK-NEXT: ret; +; O0-LABEL: test_bitcast_i16_2xi8( +; O0: { +; O0-NEXT: .reg .b16 %rs<2>; +; O0-EMPTY: +; O0-NEXT: // %bb.0: +; O0-NEXT: ld.param.b16 %rs1, [test_bitcast_i16_2xi8_param_0]; +; O0-NEXT: st.param.b16 [func_retval0], %rs1; +; O0-NEXT: ret; +; +; O3-LABEL: test_bitcast_i16_2xi8( +; O3: { +; O3-NEXT: .reg .b16 %rs<2>; +; O3-EMPTY: +; O3-NEXT: // %bb.0: +; O3-NEXT: ld.param.b16 %rs1, [test_bitcast_i16_2xi8_param_0]; +; O3-NEXT: st.param.b16 [func_retval0], %rs1; +; O3-NEXT: ret; %res = bitcast i16 %a to <2 x i8> ret <2 x i8> %res } + +define <2 x i8> @test_call_2xi8(<2 x i8> %a) { +; O0-LABEL: test_call_2xi8( +; O0: { +; O0-NEXT: .reg .b16 %rs<7>; +; O0-NEXT: .reg .b32 %r<2>; +; O0-EMPTY: +; O0-NEXT: // %bb.0: +; O0-NEXT: ld.param.v2.b8 {%rs1, %rs2}, [test_call_2xi8_param_0]; +; O0-NEXT: mov.b32 %r1, {%rs1, %rs2}; +; O0-NEXT: { // callseq 0, 0 +; O0-NEXT: .param .align 2 .b8 param0[2]; +; O0-NEXT: .param .align 2 .b8 retval0[2]; +; O0-NEXT: st.param.v2.b8 [param0], {%rs1, %rs2}; +; O0-NEXT: call.uni (retval0), test_call_2xi8, (param0); +; O0-NEXT: ld.param.v2.b8 {%rs3, %rs4}, [retval0]; +; O0-NEXT: } // callseq 0 +; O0-NEXT: st.param.v2.b8 [func_retval0], {%rs3, %rs4}; +; O0-NEXT: ret; +; +; O3-LABEL: test_call_2xi8( +; O3: { +; O3-NEXT: .reg .b16 %rs<7>; +; O3-EMPTY: +; O3-NEXT: // %bb.0: +; O3-NEXT: ld.param.v2.b8 {%rs1, %rs2}, [test_call_2xi8_param_0]; +; O3-NEXT: { // callseq 0, 0 +; O3-NEXT: .param .align 2 .b8 param0[2]; +; O3-NEXT: .param .align 2 .b8 retval0[2]; +; O3-NEXT: st.param.v2.b8 [param0], {%rs1, %rs2}; +; O3-NEXT: call.uni (retval0), test_call_2xi8, (param0); +; O3-NEXT: ld.param.v2.b8 {%rs3, %rs4}, [retval0]; +; O3-NEXT: } // callseq 0 +; O3-NEXT: st.param.v2.b8 [func_retval0], {%rs3, %rs4}; +; O3-NEXT: ret; + %res = call <2 x i8> @test_call_2xi8(<2 x i8> %a) + ret <2 x i8> %res +} +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; COMMON: {{.*}} diff --git a/llvm/test/CodeGen/NVPTX/i8x4-instructions.ll b/llvm/test/CodeGen/NVPTX/i8x4-instructions.ll index da99cec..06c2cc8 100644 --- a/llvm/test/CodeGen/NVPTX/i8x4-instructions.ll +++ b/llvm/test/CodeGen/NVPTX/i8x4-instructions.ll @@ -1273,10 +1273,10 @@ define <4 x i8> @test_call(<4 x i8> %a, <4 x i8> %b) #0 { ; O0-NEXT: ld.param.b32 %r1, [test_call_param_0]; ; O0-NEXT: { // callseq 0, 0 ; O0-NEXT: .param .align 4 .b8 param0[4]; -; O0-NEXT: st.param.b32 [param0], %r1; ; O0-NEXT: .param .align 4 .b8 param1[4]; -; O0-NEXT: st.param.b32 [param1], %r2; ; O0-NEXT: .param .align 4 .b8 retval0[4]; +; O0-NEXT: st.param.b32 [param1], %r2; +; O0-NEXT: st.param.b32 [param0], %r1; ; O0-NEXT: call.uni (retval0), test_callee, (param0, param1); ; O0-NEXT: ld.param.b32 %r3, [retval0]; ; O0-NEXT: } // callseq 0 @@ -1289,13 +1289,13 @@ define <4 x i8> @test_call(<4 x i8> %a, <4 x i8> %b) #0 { ; O3-EMPTY: ; O3-NEXT: // %bb.0: ; O3-NEXT: ld.param.b32 %r1, [test_call_param_0]; -; O3-NEXT: ld.param.b32 %r2, [test_call_param_1]; ; O3-NEXT: { // callseq 0, 0 ; O3-NEXT: .param .align 4 .b8 param0[4]; -; O3-NEXT: st.param.b32 [param0], %r1; ; O3-NEXT: .param .align 4 .b8 param1[4]; -; O3-NEXT: st.param.b32 [param1], %r2; ; O3-NEXT: .param .align 4 .b8 retval0[4]; +; O3-NEXT: ld.param.b32 %r2, [test_call_param_1]; +; O3-NEXT: st.param.b32 [param1], %r2; +; O3-NEXT: st.param.b32 [param0], %r1; ; O3-NEXT: call.uni (retval0), test_callee, (param0, param1); ; O3-NEXT: ld.param.b32 %r3, [retval0]; ; O3-NEXT: } // callseq 0 @@ -1315,10 +1315,10 @@ define <4 x i8> @test_call_flipped(<4 x i8> %a, <4 x i8> %b) #0 { ; O0-NEXT: ld.param.b32 %r1, [test_call_flipped_param_0]; ; O0-NEXT: { // callseq 1, 0 ; O0-NEXT: .param .align 4 .b8 param0[4]; -; O0-NEXT: st.param.b32 [param0], %r2; ; O0-NEXT: .param .align 4 .b8 param1[4]; -; O0-NEXT: st.param.b32 [param1], %r1; ; O0-NEXT: .param .align 4 .b8 retval0[4]; +; O0-NEXT: st.param.b32 [param1], %r1; +; O0-NEXT: st.param.b32 [param0], %r2; ; O0-NEXT: call.uni (retval0), test_callee, (param0, param1); ; O0-NEXT: ld.param.b32 %r3, [retval0]; ; O0-NEXT: } // callseq 1 @@ -1331,13 +1331,13 @@ define <4 x i8> @test_call_flipped(<4 x i8> %a, <4 x i8> %b) #0 { ; O3-EMPTY: ; O3-NEXT: // %bb.0: ; O3-NEXT: ld.param.b32 %r1, [test_call_flipped_param_0]; -; O3-NEXT: ld.param.b32 %r2, [test_call_flipped_param_1]; ; O3-NEXT: { // callseq 1, 0 ; O3-NEXT: .param .align 4 .b8 param0[4]; -; O3-NEXT: st.param.b32 [param0], %r2; ; O3-NEXT: .param .align 4 .b8 param1[4]; -; O3-NEXT: st.param.b32 [param1], %r1; ; O3-NEXT: .param .align 4 .b8 retval0[4]; +; O3-NEXT: st.param.b32 [param1], %r1; +; O3-NEXT: ld.param.b32 %r2, [test_call_flipped_param_1]; +; O3-NEXT: st.param.b32 [param0], %r2; ; O3-NEXT: call.uni (retval0), test_callee, (param0, param1); ; O3-NEXT: ld.param.b32 %r3, [retval0]; ; O3-NEXT: } // callseq 1 @@ -1357,10 +1357,10 @@ define <4 x i8> @test_tailcall_flipped(<4 x i8> %a, <4 x i8> %b) #0 { ; O0-NEXT: ld.param.b32 %r1, [test_tailcall_flipped_param_0]; ; O0-NEXT: { // callseq 2, 0 ; O0-NEXT: .param .align 4 .b8 param0[4]; -; O0-NEXT: st.param.b32 [param0], %r2; ; O0-NEXT: .param .align 4 .b8 param1[4]; -; O0-NEXT: st.param.b32 [param1], %r1; ; O0-NEXT: .param .align 4 .b8 retval0[4]; +; O0-NEXT: st.param.b32 [param1], %r1; +; O0-NEXT: st.param.b32 [param0], %r2; ; O0-NEXT: call.uni (retval0), test_callee, (param0, param1); ; O0-NEXT: ld.param.b32 %r3, [retval0]; ; O0-NEXT: } // callseq 2 @@ -1373,13 +1373,13 @@ define <4 x i8> @test_tailcall_flipped(<4 x i8> %a, <4 x i8> %b) #0 { ; O3-EMPTY: ; O3-NEXT: // %bb.0: ; O3-NEXT: ld.param.b32 %r1, [test_tailcall_flipped_param_0]; -; O3-NEXT: ld.param.b32 %r2, [test_tailcall_flipped_param_1]; ; O3-NEXT: { // callseq 2, 0 ; O3-NEXT: .param .align 4 .b8 param0[4]; -; O3-NEXT: st.param.b32 [param0], %r2; ; O3-NEXT: .param .align 4 .b8 param1[4]; -; O3-NEXT: st.param.b32 [param1], %r1; ; O3-NEXT: .param .align 4 .b8 retval0[4]; +; O3-NEXT: st.param.b32 [param1], %r1; +; O3-NEXT: ld.param.b32 %r2, [test_tailcall_flipped_param_1]; +; O3-NEXT: st.param.b32 [param0], %r2; ; O3-NEXT: call.uni (retval0), test_callee, (param0, param1); ; O3-NEXT: ld.param.b32 %r3, [retval0]; ; O3-NEXT: } // callseq 2 diff --git a/llvm/test/CodeGen/NVPTX/idioms.ll b/llvm/test/CodeGen/NVPTX/idioms.ll index be84f9b..a3bf892 100644 --- a/llvm/test/CodeGen/NVPTX/idioms.ll +++ b/llvm/test/CodeGen/NVPTX/idioms.ll @@ -173,8 +173,8 @@ define %struct.S16 @i32_to_2xi16_shr(i32 noundef %i){ ; CHECK-NEXT: } // callseq 0 ; CHECK-NEXT: shr.s32 %r2, %r1, 16; ; CHECK-NEXT: shr.u32 %r3, %r2, 16; -; CHECK-NEXT: st.param.b16 [func_retval0], %r2; ; CHECK-NEXT: st.param.b16 [func_retval0+2], %r3; +; CHECK-NEXT: st.param.b16 [func_retval0], %r2; ; CHECK-NEXT: ret; call void @escape_int(i32 %i); // Force %i to be loaded completely. %i1 = ashr i32 %i, 16 diff --git a/llvm/test/CodeGen/NVPTX/indirect_byval.ll b/llvm/test/CodeGen/NVPTX/indirect_byval.ll index eae0321..782e672 100644 --- a/llvm/test/CodeGen/NVPTX/indirect_byval.ll +++ b/llvm/test/CodeGen/NVPTX/indirect_byval.ll @@ -23,15 +23,15 @@ define internal i32 @foo() { ; CHECK-NEXT: mov.b64 %SPL, __local_depot0; ; CHECK-NEXT: cvta.local.u64 %SP, %SPL; ; CHECK-NEXT: ld.global.b64 %rd1, [ptr]; -; CHECK-NEXT: add.u64 %rd3, %SPL, 1; -; CHECK-NEXT: ld.local.b8 %rs1, [%rd3]; -; CHECK-NEXT: add.u64 %rd4, %SP, 0; ; CHECK-NEXT: { // callseq 0, 0 ; CHECK-NEXT: .param .align 1 .b8 param0[1]; -; CHECK-NEXT: st.param.b8 [param0], %rs1; ; CHECK-NEXT: .param .b64 param1; -; CHECK-NEXT: st.param.b64 [param1], %rd4; ; CHECK-NEXT: .param .b32 retval0; +; CHECK-NEXT: add.u64 %rd2, %SP, 0; +; CHECK-NEXT: st.param.b64 [param1], %rd2; +; CHECK-NEXT: add.u64 %rd4, %SPL, 1; +; CHECK-NEXT: ld.local.b8 %rs1, [%rd4]; +; CHECK-NEXT: st.param.b8 [param0], %rs1; ; CHECK-NEXT: prototype_0 : .callprototype (.param .b32 _) _ (.param .align 1 .b8 _[1], .param .b64 _); ; CHECK-NEXT: call (retval0), %rd1, (param0, param1), prototype_0; ; CHECK-NEXT: ld.param.b32 %r1, [retval0]; @@ -60,15 +60,15 @@ define internal i32 @bar() { ; CHECK-NEXT: mov.b64 %SPL, __local_depot1; ; CHECK-NEXT: cvta.local.u64 %SP, %SPL; ; CHECK-NEXT: ld.global.b64 %rd1, [ptr]; -; CHECK-NEXT: add.u64 %rd3, %SPL, 8; -; CHECK-NEXT: ld.local.b64 %rd4, [%rd3]; -; CHECK-NEXT: add.u64 %rd5, %SP, 0; ; CHECK-NEXT: { // callseq 1, 0 ; CHECK-NEXT: .param .align 8 .b8 param0[8]; -; CHECK-NEXT: st.param.b64 [param0], %rd4; ; CHECK-NEXT: .param .b64 param1; -; CHECK-NEXT: st.param.b64 [param1], %rd5; ; CHECK-NEXT: .param .b32 retval0; +; CHECK-NEXT: add.u64 %rd2, %SP, 0; +; CHECK-NEXT: st.param.b64 [param1], %rd2; +; CHECK-NEXT: add.u64 %rd4, %SPL, 8; +; CHECK-NEXT: ld.local.b64 %rd5, [%rd4]; +; CHECK-NEXT: st.param.b64 [param0], %rd5; ; CHECK-NEXT: prototype_1 : .callprototype (.param .b32 _) _ (.param .align 8 .b8 _[8], .param .b64 _); ; CHECK-NEXT: call (retval0), %rd1, (param0, param1), prototype_1; ; CHECK-NEXT: ld.param.b32 %r1, [retval0]; diff --git a/llvm/test/CodeGen/NVPTX/lower-args-gridconstant.ll b/llvm/test/CodeGen/NVPTX/lower-args-gridconstant.ll index 321a624..38185c7b 100644 --- a/llvm/test/CodeGen/NVPTX/lower-args-gridconstant.ll +++ b/llvm/test/CodeGen/NVPTX/lower-args-gridconstant.ll @@ -121,20 +121,18 @@ define ptx_kernel void @grid_const_struct(ptr byval(%struct.s) align 4 %input, p define ptx_kernel void @grid_const_escape(ptr byval(%struct.s) align 4 %input) { ; PTX-LABEL: grid_const_escape( ; PTX: { -; PTX-NEXT: .reg .b32 %r<2>; ; PTX-NEXT: .reg .b64 %rd<4>; ; PTX-EMPTY: ; PTX-NEXT: // %bb.0: ; PTX-NEXT: mov.b64 %rd2, grid_const_escape_param_0; ; PTX-NEXT: cvta.param.u64 %rd3, %rd2; -; PTX-NEXT: mov.b64 %rd1, escape; ; PTX-NEXT: { // callseq 0, 0 ; PTX-NEXT: .param .b64 param0; -; PTX-NEXT: st.param.b64 [param0], %rd3; ; PTX-NEXT: .param .b32 retval0; +; PTX-NEXT: st.param.b64 [param0], %rd3; ; PTX-NEXT: prototype_0 : .callprototype (.param .b32 _) _ (.param .b64 _); +; PTX-NEXT: mov.b64 %rd1, escape; ; PTX-NEXT: call (retval0), %rd1, (param0), prototype_0; -; PTX-NEXT: ld.param.b32 %r1, [retval0]; ; PTX-NEXT: } // callseq 0 ; PTX-NEXT: ret; ; OPT-LABEL: define ptx_kernel void @grid_const_escape( @@ -153,7 +151,7 @@ define ptx_kernel void @multiple_grid_const_escape(ptr byval(%struct.s) align 4 ; PTX-NEXT: .local .align 4 .b8 __local_depot4[4]; ; PTX-NEXT: .reg .b64 %SP; ; PTX-NEXT: .reg .b64 %SPL; -; PTX-NEXT: .reg .b32 %r<3>; +; PTX-NEXT: .reg .b32 %r<2>; ; PTX-NEXT: .reg .b64 %rd<8>; ; PTX-EMPTY: ; PTX-NEXT: // %bb.0: @@ -167,18 +165,17 @@ define ptx_kernel void @multiple_grid_const_escape(ptr byval(%struct.s) align 4 ; PTX-NEXT: add.u64 %rd6, %SP, 0; ; PTX-NEXT: add.u64 %rd7, %SPL, 0; ; PTX-NEXT: st.local.b32 [%rd7], %r1; -; PTX-NEXT: mov.b64 %rd1, escape3; ; PTX-NEXT: { // callseq 1, 0 ; PTX-NEXT: .param .b64 param0; -; PTX-NEXT: st.param.b64 [param0], %rd5; ; PTX-NEXT: .param .b64 param1; -; PTX-NEXT: st.param.b64 [param1], %rd6; ; PTX-NEXT: .param .b64 param2; -; PTX-NEXT: st.param.b64 [param2], %rd4; ; PTX-NEXT: .param .b32 retval0; +; PTX-NEXT: st.param.b64 [param2], %rd4; +; PTX-NEXT: st.param.b64 [param1], %rd6; +; PTX-NEXT: st.param.b64 [param0], %rd5; ; PTX-NEXT: prototype_1 : .callprototype (.param .b32 _) _ (.param .b64 _, .param .b64 _, .param .b64 _); +; PTX-NEXT: mov.b64 %rd1, escape3; ; PTX-NEXT: call (retval0), %rd1, (param0, param1, param2), prototype_1; -; PTX-NEXT: ld.param.b32 %r2, [retval0]; ; PTX-NEXT: } // callseq 1 ; PTX-NEXT: ret; ; OPT-LABEL: define ptx_kernel void @multiple_grid_const_escape( @@ -255,7 +252,7 @@ define ptx_kernel void @grid_const_inlineasm_escape(ptr byval(%struct.s) align 4 define ptx_kernel void @grid_const_partial_escape(ptr byval(i32) %input, ptr %output) { ; PTX-LABEL: grid_const_partial_escape( ; PTX: { -; PTX-NEXT: .reg .b32 %r<4>; +; PTX-NEXT: .reg .b32 %r<3>; ; PTX-NEXT: .reg .b64 %rd<6>; ; PTX-EMPTY: ; PTX-NEXT: // %bb.0: @@ -266,14 +263,13 @@ define ptx_kernel void @grid_const_partial_escape(ptr byval(i32) %input, ptr %ou ; PTX-NEXT: ld.param.b32 %r1, [grid_const_partial_escape_param_0]; ; PTX-NEXT: add.s32 %r2, %r1, %r1; ; PTX-NEXT: st.global.b32 [%rd4], %r2; -; PTX-NEXT: mov.b64 %rd1, escape; ; PTX-NEXT: { // callseq 2, 0 ; PTX-NEXT: .param .b64 param0; -; PTX-NEXT: st.param.b64 [param0], %rd5; ; PTX-NEXT: .param .b32 retval0; +; PTX-NEXT: st.param.b64 [param0], %rd5; ; PTX-NEXT: prototype_2 : .callprototype (.param .b32 _) _ (.param .b64 _); +; PTX-NEXT: mov.b64 %rd1, escape; ; PTX-NEXT: call (retval0), %rd1, (param0), prototype_2; -; PTX-NEXT: ld.param.b32 %r3, [retval0]; ; PTX-NEXT: } // callseq 2 ; PTX-NEXT: ret; ; OPT-LABEL: define ptx_kernel void @grid_const_partial_escape( @@ -295,7 +291,7 @@ define ptx_kernel void @grid_const_partial_escape(ptr byval(i32) %input, ptr %ou define ptx_kernel i32 @grid_const_partial_escapemem(ptr byval(%struct.s) %input, ptr %output) { ; PTX-LABEL: grid_const_partial_escapemem( ; PTX: { -; PTX-NEXT: .reg .b32 %r<5>; +; PTX-NEXT: .reg .b32 %r<4>; ; PTX-NEXT: .reg .b64 %rd<6>; ; PTX-EMPTY: ; PTX-NEXT: // %bb.0: @@ -307,14 +303,13 @@ define ptx_kernel i32 @grid_const_partial_escapemem(ptr byval(%struct.s) %input, ; PTX-NEXT: ld.param.b32 %r2, [grid_const_partial_escapemem_param_0+4]; ; PTX-NEXT: st.global.b64 [%rd4], %rd5; ; PTX-NEXT: add.s32 %r3, %r1, %r2; -; PTX-NEXT: mov.b64 %rd1, escape; ; PTX-NEXT: { // callseq 3, 0 ; PTX-NEXT: .param .b64 param0; -; PTX-NEXT: st.param.b64 [param0], %rd5; ; PTX-NEXT: .param .b32 retval0; +; PTX-NEXT: st.param.b64 [param0], %rd5; ; PTX-NEXT: prototype_3 : .callprototype (.param .b32 _) _ (.param .b64 _); +; PTX-NEXT: mov.b64 %rd1, escape; ; PTX-NEXT: call (retval0), %rd1, (param0), prototype_3; -; PTX-NEXT: ld.param.b32 %r4, [retval0]; ; PTX-NEXT: } // callseq 3 ; PTX-NEXT: st.param.b32 [func_retval0], %r3; ; PTX-NEXT: ret; @@ -535,9 +530,9 @@ define ptx_kernel void @test_forward_byval_arg(ptr byval(i32) align 4 %input) { ; PTX-NEXT: .reg .b32 %r<2>; ; PTX-EMPTY: ; PTX-NEXT: // %bb.0: -; PTX-NEXT: ld.param.b32 %r1, [test_forward_byval_arg_param_0]; ; PTX-NEXT: { // callseq 4, 0 ; PTX-NEXT: .param .align 4 .b8 param0[4]; +; PTX-NEXT: ld.param.b32 %r1, [test_forward_byval_arg_param_0]; ; PTX-NEXT: st.param.b32 [param0], %r1; ; PTX-NEXT: call.uni device_func, (param0); ; PTX-NEXT: } // callseq 4 diff --git a/llvm/test/CodeGen/NVPTX/lower-args.ll b/llvm/test/CodeGen/NVPTX/lower-args.ll index c165de7..7c029ab 100644 --- a/llvm/test/CodeGen/NVPTX/lower-args.ll +++ b/llvm/test/CodeGen/NVPTX/lower-args.ll @@ -31,7 +31,7 @@ define void @load_alignment(ptr nocapture readonly byval(%class.outer) align 8 % ; PTX-LABEL: load_alignment( ; PTX: { ; PTX-NEXT: .reg .b32 %r<4>; -; PTX-NEXT: .reg .b64 %rd<7>; +; PTX-NEXT: .reg .b64 %rd<6>; ; PTX-EMPTY: ; PTX-NEXT: // %bb.0: // %entry ; PTX-NEXT: mov.b64 %rd1, load_alignment_param_0; @@ -45,10 +45,9 @@ define void @load_alignment(ptr nocapture readonly byval(%class.outer) align 8 % ; PTX-NEXT: st.b32 [%rd3], %r3; ; PTX-NEXT: { // callseq 0, 0 ; PTX-NEXT: .param .b64 param0; -; PTX-NEXT: st.param.b64 [param0], %rd5; ; PTX-NEXT: .param .b64 retval0; +; PTX-NEXT: st.param.b64 [param0], %rd5; ; PTX-NEXT: call.uni (retval0), escape, (param0); -; PTX-NEXT: ld.param.b64 %rd6, [retval0]; ; PTX-NEXT: } // callseq 0 ; PTX-NEXT: ret; entry: @@ -76,17 +75,16 @@ define void @load_padding(ptr nocapture readonly byval(%class.padded) %arg) { ; ; PTX-LABEL: load_padding( ; PTX: { -; PTX-NEXT: .reg .b64 %rd<4>; +; PTX-NEXT: .reg .b64 %rd<3>; ; PTX-EMPTY: ; PTX-NEXT: // %bb.0: ; PTX-NEXT: mov.b64 %rd1, load_padding_param_0; ; PTX-NEXT: cvta.local.u64 %rd2, %rd1; ; PTX-NEXT: { // callseq 1, 0 ; PTX-NEXT: .param .b64 param0; -; PTX-NEXT: st.param.b64 [param0], %rd2; ; PTX-NEXT: .param .b64 retval0; +; PTX-NEXT: st.param.b64 [param0], %rd2; ; PTX-NEXT: call.uni (retval0), escape, (param0); -; PTX-NEXT: ld.param.b64 %rd3, [retval0]; ; PTX-NEXT: } // callseq 1 ; PTX-NEXT: ret; %tmp = call ptr @escape(ptr nonnull align 16 %arg) diff --git a/llvm/test/CodeGen/NVPTX/lower-byval-args.ll b/llvm/test/CodeGen/NVPTX/lower-byval-args.ll index 4784d70..20a3519 100644 --- a/llvm/test/CodeGen/NVPTX/lower-byval-args.ll +++ b/llvm/test/CodeGen/NVPTX/lower-byval-args.ll @@ -911,9 +911,9 @@ define void @device_func(ptr byval(i32) align 4 %input) { ; PTX-NEXT: .reg .b64 %rd<2>; ; PTX-EMPTY: ; PTX-NEXT: // %bb.0: -; PTX-NEXT: ld.param.b32 %r1, [device_func_param_0]; ; PTX-NEXT: { // callseq 3, 0 ; PTX-NEXT: .param .align 4 .b8 param0[4]; +; PTX-NEXT: ld.param.b32 %r1, [device_func_param_0]; ; PTX-NEXT: st.param.b32 [param0], %r1; ; PTX-NEXT: call.uni device_func, (param0); ; PTX-NEXT: } // callseq 3 diff --git a/llvm/test/CodeGen/NVPTX/misched_func_call.ll b/llvm/test/CodeGen/NVPTX/misched_func_call.ll index 8401f45..b2994c0 100644 --- a/llvm/test/CodeGen/NVPTX/misched_func_call.ll +++ b/llvm/test/CodeGen/NVPTX/misched_func_call.ll @@ -8,7 +8,7 @@ define ptx_kernel void @wombat(i32 %arg, i32 %arg1, i32 %arg2) { ; CHECK-LABEL: wombat( ; CHECK: { ; CHECK-NEXT: .reg .b32 %r<11>; -; CHECK-NEXT: .reg .b64 %rd<6>; +; CHECK-NEXT: .reg .b64 %rd<5>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: // %bb ; CHECK-NEXT: ld.param.b32 %r4, [wombat_param_2]; @@ -19,19 +19,18 @@ define ptx_kernel void @wombat(i32 %arg, i32 %arg1, i32 %arg2) { ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: { // callseq 0, 0 ; CHECK-NEXT: .param .b64 param0; -; CHECK-NEXT: st.param.b64 [param0], 0d0000000000000000; ; CHECK-NEXT: .param .b64 retval0; +; CHECK-NEXT: st.param.b64 [param0], 0; ; CHECK-NEXT: call.uni (retval0), quux, (param0); -; CHECK-NEXT: ld.param.b64 %rd1, [retval0]; ; CHECK-NEXT: } // callseq 0 ; CHECK-NEXT: mul.lo.s32 %r7, %r10, %r3; ; CHECK-NEXT: or.b32 %r8, %r4, %r7; ; CHECK-NEXT: mul.lo.s32 %r9, %r2, %r8; -; CHECK-NEXT: cvt.rn.f64.s32 %rd2, %r9; -; CHECK-NEXT: cvt.rn.f64.u32 %rd3, %r10; -; CHECK-NEXT: add.rn.f64 %rd4, %rd3, %rd2; -; CHECK-NEXT: mov.b64 %rd5, 0; -; CHECK-NEXT: st.global.b64 [%rd5], %rd4; +; CHECK-NEXT: cvt.rn.f64.s32 %rd1, %r9; +; CHECK-NEXT: cvt.rn.f64.u32 %rd2, %r10; +; CHECK-NEXT: add.rn.f64 %rd3, %rd2, %rd1; +; CHECK-NEXT: mov.b64 %rd4, 0; +; CHECK-NEXT: st.global.b64 [%rd4], %rd3; ; CHECK-NEXT: mov.b32 %r10, 1; ; CHECK-NEXT: bra.uni $L__BB0_1; bb: diff --git a/llvm/test/CodeGen/NVPTX/param-add.ll b/llvm/test/CodeGen/NVPTX/param-add.ll index 4fa1235..c5ea9f8 100644 --- a/llvm/test/CodeGen/NVPTX/param-add.ll +++ b/llvm/test/CodeGen/NVPTX/param-add.ll @@ -18,16 +18,16 @@ define i32 @test(%struct.1float alignstack(32) %data) { ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ld.param.b32 %r1, [test_param_0]; -; CHECK-NEXT: shr.u32 %r2, %r1, 8; -; CHECK-NEXT: shr.u32 %r3, %r1, 16; -; CHECK-NEXT: shr.u32 %r4, %r1, 24; ; CHECK-NEXT: { // callseq 0, 0 ; CHECK-NEXT: .param .align 1 .b8 param0[4]; +; CHECK-NEXT: .param .b32 retval0; ; CHECK-NEXT: st.param.b8 [param0], %r1; +; CHECK-NEXT: shr.u32 %r2, %r1, 8; ; CHECK-NEXT: st.param.b8 [param0+1], %r2; +; CHECK-NEXT: shr.u32 %r3, %r1, 16; ; CHECK-NEXT: st.param.b8 [param0+2], %r3; +; CHECK-NEXT: shr.u32 %r4, %r3, 8; ; CHECK-NEXT: st.param.b8 [param0+3], %r4; -; CHECK-NEXT: .param .b32 retval0; ; CHECK-NEXT: call.uni (retval0), callee, (param0); ; CHECK-NEXT: ld.param.b32 %r5, [retval0]; ; CHECK-NEXT: } // callseq 0 diff --git a/llvm/test/CodeGen/NVPTX/param-load-store.ll b/llvm/test/CodeGen/NVPTX/param-load-store.ll index 6c52bfd..db3fbbc 100644 --- a/llvm/test/CodeGen/NVPTX/param-load-store.ll +++ b/llvm/test/CodeGen/NVPTX/param-load-store.ll @@ -27,10 +27,10 @@ ; CHECK: ld.param.b8 [[A8:%rs[0-9]+]], [test_i1_param_0]; ; CHECK: and.b16 [[A:%rs[0-9]+]], [[A8]], 1; ; CHECK: setp.ne.b16 %p1, [[A]], 0 +; CHECK-DAG: .param .b32 param0; +; CHECK-DAG: .param .b32 retval0; ; CHECK: cvt.u32.u16 [[B:%r[0-9]+]], [[A8]] -; CHECK: .param .b32 param0; -; CHECK: st.param.b32 [param0], [[B]] -; CHECK: .param .b32 retval0; +; CHECK-DAG: st.param.b32 [param0], [[B]] ; CHECK: call.uni (retval0), test_i1, ; CHECK: ld.param.b32 [[R8:%r[0-9]+]], [retval0]; ; CHECK: st.param.b32 [func_retval0], [[R8]]; @@ -47,11 +47,11 @@ define i1 @test_i1(i1 %a) { ; CHECK-NEXT: .param .b32 test_i1s_param_0 ; CHECK: ld.param.b8 [[A8:%rs[0-9]+]], [test_i1s_param_0]; ; CHECK: cvt.u32.u16 [[A32:%r[0-9]+]], [[A8]]; +; CHECK: .param .b32 param0; +; CHECK: .param .b32 retval0; ; CHECK: and.b32 [[A1:%r[0-9]+]], [[A32]], 1; ; CHECK: neg.s32 [[A:%r[0-9]+]], [[A1]]; -; CHECK: .param .b32 param0; ; CHECK: st.param.b32 [param0], [[A]]; -; CHECK: .param .b32 retval0; ; CHECK: call.uni ; CHECK: ld.param.b32 [[R8:%r[0-9]+]], [retval0]; ; CHECK: and.b32 [[R1:%r[0-9]+]], [[R8]], 1; @@ -70,9 +70,9 @@ define signext i1 @test_i1s(i1 signext %a) { ; CHECK-DAG: ld.param.b8 [[E2:%rs[0-9]+]], [test_v3i1_param_0+2]; ; CHECK-DAG: ld.param.b8 [[E0:%rs[0-9]+]], [test_v3i1_param_0] ; CHECK: .param .align 1 .b8 param0[1]; +; CHECK: .param .align 1 .b8 retval0[1]; ; CHECK-DAG: st.param.b8 [param0], [[E0]]; ; CHECK-DAG: st.param.b8 [param0+2], [[E2]]; -; CHECK: .param .align 1 .b8 retval0[1]; ; CHECK: call.uni (retval0), test_v3i1, ; CHECK-DAG: ld.param.b8 [[RE0:%rs[0-9]+]], [retval0]; ; CHECK-DAG: ld.param.b8 [[RE2:%rs[0-9]+]], [retval0+2]; @@ -89,8 +89,8 @@ define <3 x i1> @test_v3i1(<3 x i1> %a) { ; CHECK-NEXT: .param .align 1 .b8 test_v4i1_param_0[1] ; CHECK: ld.param.b8 [[E0:%rs[0-9]+]], [test_v4i1_param_0] ; CHECK: .param .align 1 .b8 param0[1]; -; CHECK: st.param.b8 [param0], [[E0]]; ; CHECK: .param .align 1 .b8 retval0[1]; +; CHECK: st.param.b8 [param0], [[E0]]; ; CHECK: call.uni (retval0), test_v4i1, ; CHECK: ld.param.b8 [[RE0:%rs[0-9]+]], [retval0]; ; CHECK: ld.param.b8 [[RE1:%rs[0-9]+]], [retval0+1]; @@ -112,9 +112,9 @@ define <4 x i1> @test_v4i1(<4 x i1> %a) { ; CHECK-DAG: ld.param.b8 [[E4:%rs[0-9]+]], [test_v5i1_param_0+4]; ; CHECK-DAG: ld.param.b8 [[E0:%rs[0-9]+]], [test_v5i1_param_0] ; CHECK: .param .align 1 .b8 param0[1]; +; CHECK: .param .align 1 .b8 retval0[1]; ; CHECK-DAG: st.param.b8 [param0], [[E0]]; ; CHECK-DAG: st.param.b8 [param0+4], [[E4]]; -; CHECK: .param .align 1 .b8 retval0[1]; ; CHECK: call.uni (retval0), test_v5i1, ; CHECK-DAG: ld.param.b8 [[RE0:%rs[0-9]+]], [retval0]; ; CHECK-DAG: ld.param.b8 [[RE4:%rs[0-9]+]], [retval0+4]; @@ -131,8 +131,8 @@ define <5 x i1> @test_v5i1(<5 x i1> %a) { ; CHECK-NEXT: .param .b32 test_i2_param_0 ; CHECK: ld.param.b8 {{%rs[0-9]+}}, [test_i2_param_0]; ; CHECK: .param .b32 param0; -; CHECK: st.param.b32 [param0], {{%r[0-9]+}}; ; CHECK: .param .b32 retval0; +; CHECK: st.param.b32 [param0], {{%r[0-9]+}}; ; CHECK: call.uni (retval0), test_i2, ; CHECK: ld.param.b32 {{%r[0-9]+}}, [retval0]; ; CHECK: st.param.b32 [func_retval0], {{%r[0-9]+}}; @@ -147,8 +147,8 @@ define i2 @test_i2(i2 %a) { ; CHECK-NEXT: .param .b32 test_i3_param_0 ; CHECK: ld.param.b8 {{%rs[0-9]+}}, [test_i3_param_0]; ; CHECK: .param .b32 param0; -; CHECK: st.param.b32 [param0], {{%r[0-9]+}}; ; CHECK: .param .b32 retval0; +; CHECK: st.param.b32 [param0], {{%r[0-9]+}}; ; CHECK: call.uni (retval0), test_i3, ; CHECK: ld.param.b32 {{%r[0-9]+}}, [retval0]; ; CHECK: st.param.b32 [func_retval0], {{%r[0-9]+}}; @@ -163,10 +163,10 @@ define i3 @test_i3(i3 %a) { ; CHECK-LABEL: test_i8( ; CHECK-NEXT: .param .b32 test_i8_param_0 ; CHECK: ld.param.b8 [[A8:%rs[0-9]+]], [test_i8_param_0]; -; CHECK: cvt.u32.u16 [[A32:%r[0-9]+]], [[A8]]; ; CHECK: .param .b32 param0; -; CHECK: st.param.b32 [param0], [[A32]]; ; CHECK: .param .b32 retval0; +; CHECK: cvt.u32.u16 [[A32:%r[0-9]+]], [[A8]]; +; CHECK: st.param.b32 [param0], [[A32]]; ; CHECK: call.uni (retval0), test_i8, ; CHECK: ld.param.b32 [[R32:%r[0-9]+]], [retval0]; ; CHECK: st.param.b32 [func_retval0], [[R32]]; @@ -181,10 +181,10 @@ define i8 @test_i8(i8 %a) { ; CHECK-LABEL: test_i8s( ; CHECK-NEXT: .param .b32 test_i8s_param_0 ; CHECK: ld.param.s8 [[A8:%rs[0-9]+]], [test_i8s_param_0]; -; CHECK: cvt.s32.s16 [[A:%r[0-9]+]], [[A8]]; ; CHECK: .param .b32 param0; -; CHECK: st.param.b32 [param0], [[A]]; ; CHECK: .param .b32 retval0; +; CHECK: cvt.s32.s16 [[A:%r[0-9]+]], [[A8]]; +; CHECK: st.param.b32 [param0], [[A]]; ; CHECK: call.uni (retval0), test_i8s, ; CHECK: ld.param.b32 [[R32:%r[0-9]+]], [retval0]; ; -- This is suspicious (though correct) -- why not cvt.u8.u32, cvt.s8.s32 ? @@ -202,8 +202,8 @@ define signext i8 @test_i8s(i8 signext %a) { ; CHECK-NEXT: .param .align 4 .b8 test_v3i8_param_0[4] ; CHECK: ld.param.b32 [[R:%r[0-9]+]], [test_v3i8_param_0]; ; CHECK: .param .align 4 .b8 param0[4]; -; CHECK: st.param.b32 [param0], [[R]] ; CHECK: .param .align 4 .b8 retval0[4]; +; CHECK: st.param.b32 [param0], [[R]] ; CHECK: call.uni (retval0), test_v3i8, ; CHECK: ld.param.b32 [[RE:%r[0-9]+]], [retval0]; ; v4i8/i32->{v3i8 elements}->v4i8/i32 conversion is messy and not very @@ -220,8 +220,8 @@ define <3 x i8> @test_v3i8(<3 x i8> %a) { ; CHECK-NEXT: .param .align 4 .b8 test_v4i8_param_0[4] ; CHECK: ld.param.b32 [[R:%r[0-9]+]], [test_v4i8_param_0] ; CHECK: .param .align 4 .b8 param0[4]; -; CHECK: st.param.b32 [param0], [[R]]; ; CHECK: .param .align 4 .b8 retval0[4]; +; CHECK: st.param.b32 [param0], [[R]]; ; CHECK: call.uni (retval0), test_v4i8, ; CHECK: ld.param.b32 [[RET:%r[0-9]+]], [retval0]; ; CHECK: st.param.b32 [func_retval0], [[RET]]; @@ -237,20 +237,13 @@ define <4 x i8> @test_v4i8(<4 x i8> %a) { ; CHECK-DAG: ld.param.b32 [[E0:%r[0-9]+]], [test_v5i8_param_0] ; CHECK-DAG: ld.param.b8 [[E4:%rs[0-9]+]], [test_v5i8_param_0+4]; ; CHECK: .param .align 8 .b8 param0[8]; -; CHECK-DAG: st.param.v4.b8 [param0], -; CHECK-DAG: st.param.b8 [param0+4], [[E4]]; ; CHECK: .param .align 8 .b8 retval0[8]; +; CHECK-DAG: st.param.b32 [param0], [[E0]]; +; CHECK-DAG: st.param.b8 [param0+4], [[E4]]; ; CHECK: call.uni (retval0), test_v5i8, -; CHECK-DAG: ld.param.v4.b8 {[[RE0:%rs[0-9]+]], [[RE1:%rs[0-9]+]], [[RE2:%rs[0-9]+]], [[RE3:%rs[0-9]+]]}, [retval0]; +; CHECK-DAG: ld.param.b32 [[RE0:%r[0-9]+]], [retval0]; ; CHECK-DAG: ld.param.b8 [[RE4:%rs[0-9]+]], [retval0+4]; -; CHECK-DAG: cvt.u32.u16 [[R3:%r[0-9]+]], [[RE3]]; -; CHECK-DAG: cvt.u32.u16 [[R2:%r[0-9]+]], [[RE2]]; -; CHECK-DAG: prmt.b32 [[P0:%r[0-9]+]], [[R2]], [[R3]], 0x3340U; -; CHECK-DAG: cvt.u32.u16 [[R1:%r[0-9]+]], [[RE1]]; -; CHECK-DAG: cvt.u32.u16 [[R0:%r[0-9]+]], [[RE0]]; -; CHECK-DAG: prmt.b32 [[P1:%r[0-9]+]], [[R0]], [[R1]], 0x3340U; -; CHECK-DAG: prmt.b32 [[P2:%r[0-9]+]], [[P1]], [[P0]], 0x5410U; -; CHECK-DAG: st.param.b32 [func_retval0], [[P2]]; +; CHECK-DAG: st.param.b32 [func_retval0], [[RE0]]; ; CHECK-DAG: st.param.b8 [func_retval0+4], [[RE4]]; ; CHECK-NEXT: ret; define <5 x i8> @test_v5i8(<5 x i8> %a) { @@ -262,8 +255,8 @@ define <5 x i8> @test_v5i8(<5 x i8> %a) { ; CHECK-LABEL: test_i11( ; CHECK-NEXT: .param .b32 test_i11_param_0 ; CHECK: ld.param.b16 {{%rs[0-9]+}}, [test_i11_param_0]; -; CHECK: st.param.b32 [param0], {{%r[0-9]+}}; ; CHECK: .param .b32 retval0; +; CHECK: st.param.b32 [param0], {{%r[0-9]+}}; ; CHECK: call.uni (retval0), test_i11, ; CHECK: ld.param.b32 {{%r[0-9]+}}, [retval0]; ; CHECK: st.param.b32 [func_retval0], {{%r[0-9]+}}; @@ -277,10 +270,10 @@ define i11 @test_i11(i11 %a) { ; CHECK-LABEL: test_i16( ; CHECK-NEXT: .param .b32 test_i16_param_0 ; CHECK: ld.param.b16 [[E16:%rs[0-9]+]], [test_i16_param_0]; -; CHECK: cvt.u32.u16 [[E32:%r[0-9]+]], [[E16]]; ; CHECK: .param .b32 param0; -; CHECK: st.param.b32 [param0], [[E32]]; ; CHECK: .param .b32 retval0; +; CHECK: cvt.u32.u16 [[E32:%r[0-9]+]], [[E16]]; +; CHECK: st.param.b32 [param0], [[E32]]; ; CHECK: call.uni (retval0), test_i16, ; CHECK: ld.param.b32 [[RE32:%r[0-9]+]], [retval0]; ; CHECK: st.param.b32 [func_retval0], [[RE32]]; @@ -294,10 +287,10 @@ define i16 @test_i16(i16 %a) { ; CHECK-LABEL: test_i16s( ; CHECK-NEXT: .param .b32 test_i16s_param_0 ; CHECK: ld.param.b16 [[E16:%rs[0-9]+]], [test_i16s_param_0]; -; CHECK: cvt.s32.s16 [[E32:%r[0-9]+]], [[E16]]; ; CHECK: .param .b32 param0; -; CHECK: st.param.b32 [param0], [[E32]]; ; CHECK: .param .b32 retval0; +; CHECK: cvt.s32.s16 [[E32:%r[0-9]+]], [[E16]]; +; CHECK: st.param.b32 [param0], [[E32]]; ; CHECK: call.uni (retval0), test_i16s, ; CHECK: ld.param.b32 [[RE32:%r[0-9]+]], [retval0]; ; CHECK: cvt.s32.s16 [[R:%r[0-9]+]], [[RE32]]; @@ -312,14 +305,15 @@ define signext i16 @test_i16s(i16 signext %a) { ; CHECK-LABEL: test_v3i16( ; CHECK-NEXT: .param .align 8 .b8 test_v3i16_param_0[8] ; CHECK-DAG: ld.param.b16 [[E2:%rs[0-9]+]], [test_v3i16_param_0+4]; -; CHECK-DAG: ld.param.v2.b16 {[[E0:%rs[0-9]+]], [[E1:%rs[0-9]+]]}, [test_v3i16_param_0]; +; CHECK-DAG: ld.param.b32 [[E0:%r[0-9]+]], [test_v3i16_param_0]; ; CHECK: .param .align 8 .b8 param0[8]; -; CHECK: st.param.v2.b16 [param0], {[[E0]], [[E1]]}; -; CHECK: st.param.b16 [param0+4], [[E2]]; ; CHECK: .param .align 8 .b8 retval0[8]; +; CHECK-DAG: st.param.b32 [param0], [[E0]]; +; CHECK-DAG: st.param.b16 [param0+4], [[E2]]; ; CHECK: call.uni (retval0), test_v3i16, -; CHECK: ld.param.v2.b16 {[[RE0:%rs[0-9]+]], [[RE1:%rs[0-9]+]]}, [retval0]; +; CHECK: ld.param.b32 [[RE:%r[0-9]+]], [retval0]; ; CHECK: ld.param.b16 [[RE2:%rs[0-9]+]], [retval0+4]; +; CHECK-DAG: mov.b32 {[[RE0:%rs[0-9]+]], [[RE1:%rs[0-9]+]]}, [[RE]]; ; CHECK-DAG: st.param.v2.b16 [func_retval0], {[[RE0]], [[RE1]]}; ; CHECK-DAG: st.param.b16 [func_retval0+4], [[RE2]]; ; CHECK-NEXT: ret; @@ -333,8 +327,8 @@ define <3 x i16> @test_v3i16(<3 x i16> %a) { ; CHECK-NEXT: .param .align 8 .b8 test_v4i16_param_0[8] ; CHECK: ld.param.v2.b32 {[[E0:%r[0-9]+]], [[E1:%r[0-9]+]]}, [test_v4i16_param_0] ; CHECK: .param .align 8 .b8 param0[8]; -; CHECK: st.param.v2.b32 [param0], {[[E0]], [[E1]]}; ; CHECK: .param .align 8 .b8 retval0[8]; +; CHECK: st.param.v2.b32 [param0], {[[E0]], [[E1]]}; ; CHECK: call.uni (retval0), test_v4i16, ; CHECK: ld.param.v2.b32 {[[RE0:%r[0-9]+]], [[RE1:%r[0-9]+]]}, [retval0]; ; CHECK: st.param.v2.b32 [func_retval0], {[[RE0]], [[RE1]]} @@ -348,15 +342,15 @@ define <4 x i16> @test_v4i16(<4 x i16> %a) { ; CHECK-LABEL: test_v5i16( ; CHECK-NEXT: .param .align 16 .b8 test_v5i16_param_0[16] ; CHECK-DAG: ld.param.b16 [[E4:%rs[0-9]+]], [test_v5i16_param_0+8]; -; CHECK-DAG: ld.param.v4.b16 {[[E0:%rs[0-9]+]], [[E1:%rs[0-9]+]], [[E2:%rs[0-9]+]], [[E3:%rs[0-9]+]]}, [test_v5i16_param_0] +; CHECK-DAG: ld.param.v2.b32 {[[E0:%r[0-9]+]], [[E1:%r[0-9]+]]}, [test_v5i16_param_0] ; CHECK: .param .align 16 .b8 param0[16]; -; CHECK-DAG: st.param.v4.b16 [param0], {[[E0]], [[E1]], [[E2]], [[E3]]}; -; CHECK-DAG: st.param.b16 [param0+8], [[E4]]; ; CHECK: .param .align 16 .b8 retval0[16]; +; CHECK-DAG: st.param.v2.b32 [param0], {[[E0]], [[E1]]}; +; CHECK-DAG: st.param.b16 [param0+8], [[E4]]; ; CHECK: call.uni (retval0), test_v5i16, -; CHECK-DAG: ld.param.v4.b16 {[[RE0:%rs[0-9]+]], [[RE1:%rs[0-9]+]], [[RE2:%rs[0-9]+]], [[RE3:%rs[0-9]+]]}, [retval0]; +; CHECK-DAG: ld.param.v2.b32 {[[RE0:%r[0-9]+]], [[RE1:%r[0-9]+]]}, [retval0]; ; CHECK-DAG: ld.param.b16 [[RE4:%rs[0-9]+]], [retval0+8]; -; CHECK-DAG: st.param.v4.b16 [func_retval0], {[[RE0]], [[RE1]], [[RE2]], [[RE3]]} +; CHECK-DAG: st.param.v2.b32 [func_retval0], {[[RE0]], [[RE1]]} ; CHECK-DAG: st.param.b16 [func_retval0+8], [[RE4]]; ; CHECK-NEXT: ret; define <5 x i16> @test_v5i16(<5 x i16> %a) { @@ -369,8 +363,8 @@ define <5 x i16> @test_v5i16(<5 x i16> %a) { ; CHECK-NEXT: .param .align 2 .b8 test_f16_param_0[2] ; CHECK: ld.param.b16 [[E:%rs[0-9]+]], [test_f16_param_0]; ; CHECK: .param .align 2 .b8 param0[2]; -; CHECK: st.param.b16 [param0], [[E]]; ; CHECK: .param .align 2 .b8 retval0[2]; +; CHECK: st.param.b16 [param0], [[E]]; ; CHECK: call.uni (retval0), test_f16, ; CHECK: ld.param.b16 [[R:%rs[0-9]+]], [retval0]; ; CHECK: st.param.b16 [func_retval0], [[R]] @@ -385,8 +379,8 @@ define half @test_f16(half %a) { ; CHECK-NEXT: .param .align 4 .b8 test_v2f16_param_0[4] ; CHECK: ld.param.b32 [[E:%r[0-9]+]], [test_v2f16_param_0]; ; CHECK: .param .align 4 .b8 param0[4]; -; CHECK: st.param.b32 [param0], [[E]]; ; CHECK: .param .align 4 .b8 retval0[4]; +; CHECK: st.param.b32 [param0], [[E]]; ; CHECK: call.uni (retval0), test_v2f16, ; CHECK: ld.param.b32 [[R:%r[0-9]+]], [retval0]; ; CHECK: st.param.b32 [func_retval0], [[R]] @@ -401,8 +395,8 @@ define <2 x half> @test_v2f16(<2 x half> %a) { ; CHECK-NEXT: .param .align 2 .b8 test_bf16_param_0[2] ; CHECK: ld.param.b16 [[E:%rs[0-9]+]], [test_bf16_param_0]; ; CHECK: .param .align 2 .b8 param0[2]; -; CHECK: st.param.b16 [param0], [[E]]; ; CHECK: .param .align 2 .b8 retval0[2]; +; CHECK: st.param.b16 [param0], [[E]]; ; CHECK: call.uni (retval0), test_bf16, ; CHECK: ld.param.b16 [[R:%rs[0-9]+]], [retval0]; ; CHECK: st.param.b16 [func_retval0], [[R]] @@ -417,8 +411,8 @@ define bfloat @test_bf16(bfloat %a) { ; CHECK-NEXT: .param .align 4 .b8 test_v2bf16_param_0[4] ; CHECK: ld.param.b32 [[E:%r[0-9]+]], [test_v2bf16_param_0]; ; CHECK: .param .align 4 .b8 param0[4]; -; CHECK: st.param.b32 [param0], [[E]]; ; CHECK: .param .align 4 .b8 retval0[4]; +; CHECK: st.param.b32 [param0], [[E]]; ; CHECK: call.uni (retval0), test_v2bf16, ; CHECK: ld.param.b32 [[R:%r[0-9]+]], [retval0]; ; CHECK: st.param.b32 [func_retval0], [[R]] @@ -432,15 +426,16 @@ define <2 x bfloat> @test_v2bf16(<2 x bfloat> %a) { ; CHECK:.func (.param .align 8 .b8 func_retval0[8]) ; CHECK-LABEL: test_v3f16( ; CHECK: .param .align 8 .b8 test_v3f16_param_0[8] -; CHECK-DAG: ld.param.v2.b16 {[[E0:%rs[0-9]+]], [[E1:%rs[0-9]+]]}, [test_v3f16_param_0]; +; CHECK-DAG: ld.param.b32 [[E0:%r[0-9]+]], [test_v3f16_param_0]; ; CHECK-DAG: ld.param.b16 [[E2:%rs[0-9]+]], [test_v3f16_param_0+4]; ; CHECK: .param .align 8 .b8 param0[8]; -; CHECK-DAG: st.param.v2.b16 [param0], {[[E0]], [[E1]]}; -; CHECK-DAG: st.param.b16 [param0+4], [[E2]]; ; CHECK: .param .align 8 .b8 retval0[8]; +; CHECK-DAG: st.param.b32 [param0], [[E0]]; +; CHECK-DAG: st.param.b16 [param0+4], [[E2]]; ; CHECK: call.uni (retval0), test_v3f16, -; CHECK-DAG: ld.param.v2.b16 {[[R0:%rs[0-9]+]], [[R1:%rs[0-9]+]]}, [retval0]; +; CHECK-DAG: ld.param.b32 [[R:%r[0-9]+]], [retval0]; ; CHECK-DAG: ld.param.b16 [[R2:%rs[0-9]+]], [retval0+4]; +; CHECK-DAG: mov.b32 {[[R0:%rs[0-9]+]], [[R1:%rs[0-9]+]]}, [[R]]; ; CHECK-DAG: st.param.v2.b16 [func_retval0], {[[R0]], [[R1]]}; ; CHECK-DAG: st.param.b16 [func_retval0+4], [[R2]]; ; CHECK: ret; @@ -454,8 +449,8 @@ define <3 x half> @test_v3f16(<3 x half> %a) { ; CHECK: .param .align 8 .b8 test_v4f16_param_0[8] ; CHECK: ld.param.v2.b32 {[[R01:%r[0-9]+]], [[R23:%r[0-9]+]]}, [test_v4f16_param_0]; ; CHECK: .param .align 8 .b8 param0[8]; -; CHECK: st.param.v2.b32 [param0], {[[R01]], [[R23]]}; ; CHECK: .param .align 8 .b8 retval0[8]; +; CHECK: st.param.v2.b32 [param0], {[[R01]], [[R23]]}; ; CHECK: call.uni (retval0), test_v4f16, ; CHECK: ld.param.v2.b32 {[[RH01:%r[0-9]+]], [[RH23:%r[0-9]+]]}, [retval0]; ; CHECK: st.param.v2.b32 [func_retval0], {[[RH01]], [[RH23]]}; @@ -468,16 +463,16 @@ define <4 x half> @test_v4f16(<4 x half> %a) { ; CHECK:.func (.param .align 16 .b8 func_retval0[16]) ; CHECK-LABEL: test_v5f16( ; CHECK: .param .align 16 .b8 test_v5f16_param_0[16] -; CHECK-DAG: ld.param.v4.b16 {[[E0:%rs[0-9]+]], [[E1:%rs[0-9]+]], [[E2:%rs[0-9]+]], [[E3:%rs[0-9]+]]}, [test_v5f16_param_0]; +; CHECK-DAG: ld.param.v2.b32 {[[E0:%r[0-9]+]], [[E1:%r[0-9]+]]}, [test_v5f16_param_0]; ; CHECK-DAG: ld.param.b16 [[E4:%rs[0-9]+]], [test_v5f16_param_0+8]; ; CHECK: .param .align 16 .b8 param0[16]; -; CHECK-DAG: st.param.v4.b16 [param0], -; CHECK-DAG: st.param.b16 [param0+8], [[E4]]; ; CHECK: .param .align 16 .b8 retval0[16]; +; CHECK-DAG: st.param.v2.b32 [param0], {[[E0]], [[E1]]}; +; CHECK-DAG: st.param.b16 [param0+8], [[E4]]; ; CHECK: call.uni (retval0), test_v5f16, -; CHECK-DAG: ld.param.v4.b16 {[[R0:%rs[0-9]+]], [[R1:%rs[0-9]+]], [[R2:%rs[0-9]+]], [[R3:%rs[0-9]+]]}, [retval0]; +; CHECK-DAG: ld.param.v2.b32 {[[R0:%r[0-9]+]], [[R1:%r[0-9]+]]}, [retval0]; ; CHECK-DAG: ld.param.b16 [[R4:%rs[0-9]+]], [retval0+8]; -; CHECK-DAG: st.param.v4.b16 [func_retval0], {[[R0]], [[R1]], [[R2]], [[R3]]}; +; CHECK-DAG: st.param.v2.b32 [func_retval0], {[[R0]], [[R1]]}; ; CHECK-DAG: st.param.b16 [func_retval0+8], [[R4]]; ; CHECK: ret; define <5 x half> @test_v5f16(<5 x half> %a) { @@ -490,8 +485,8 @@ define <5 x half> @test_v5f16(<5 x half> %a) { ; CHECK: .param .align 16 .b8 test_v8f16_param_0[16] ; CHECK: ld.param.v4.b32 {[[R01:%r[0-9]+]], [[R23:%r[0-9]+]], [[R45:%r[0-9]+]], [[R67:%r[0-9]+]]}, [test_v8f16_param_0]; ; CHECK: .param .align 16 .b8 param0[16]; -; CHECK: st.param.v4.b32 [param0], {[[R01]], [[R23]], [[R45]], [[R67]]}; ; CHECK: .param .align 16 .b8 retval0[16]; +; CHECK: st.param.v4.b32 [param0], {[[R01]], [[R23]], [[R45]], [[R67]]}; ; CHECK: call.uni (retval0), test_v8f16, ; CHECK: ld.param.v4.b32 {[[RH01:%r[0-9]+]], [[RH23:%r[0-9]+]], [[RH45:%r[0-9]+]], [[RH67:%r[0-9]+]]}, [retval0]; ; CHECK: st.param.v4.b32 [func_retval0], {[[RH01]], [[RH23]], [[RH45]], [[RH67]]}; @@ -504,20 +499,20 @@ define <8 x half> @test_v8f16(<8 x half> %a) { ; CHECK:.func (.param .align 32 .b8 func_retval0[32]) ; CHECK-LABEL: test_v9f16( ; CHECK: .param .align 32 .b8 test_v9f16_param_0[32] -; CHECK-DAG: ld.param.v4.b16 {[[E0:%rs[0-9]+]], [[E1:%rs[0-9]+]], [[E2:%rs[0-9]+]], [[E3:%rs[0-9]+]]}, [test_v9f16_param_0]; -; CHECK-DAG: ld.param.v4.b16 {[[E4:%rs[0-9]+]], [[E5:%rs[0-9]+]], [[E6:%rs[0-9]+]], [[E7:%rs[0-9]+]]}, [test_v9f16_param_0+8]; +; CHECK-DAG: ld.param.v2.b32 {[[E0:%r[0-9]+]], [[E1:%r[0-9]+]]}, [test_v9f16_param_0]; +; CHECK-DAG: ld.param.v2.b32 {[[E2:%r[0-9]+]], [[E3:%r[0-9]+]]}, [test_v9f16_param_0+8]; ; CHECK-DAG: ld.param.b16 [[E8:%rs[0-9]+]], [test_v9f16_param_0+16]; ; CHECK: .param .align 32 .b8 param0[32]; -; CHECK-DAG: st.param.v4.b16 [param0], -; CHECK-DAG: st.param.v4.b16 [param0+8], -; CHECK-DAG: st.param.b16 [param0+16], [[E8]]; ; CHECK: .param .align 32 .b8 retval0[32]; +; CHECK-DAG: st.param.v2.b32 [param0], {[[E0]], [[E1]]}; +; CHECK-DAG: st.param.v2.b32 [param0+8], {[[E2]], [[E3]]}; +; CHECK-DAG: st.param.b16 [param0+16], [[E8]]; ; CHECK: call.uni (retval0), test_v9f16, -; CHECK-DAG: ld.param.v4.b16 {[[R0:%rs[0-9]+]], [[R1:%rs[0-9]+]], [[R2:%rs[0-9]+]], [[R3:%rs[0-9]+]]}, [retval0]; -; CHECK-DAG: ld.param.v4.b16 {[[R4:%rs[0-9]+]], [[R5:%rs[0-9]+]], [[R6:%rs[0-9]+]], [[R7:%rs[0-9]+]]}, [retval0+8]; +; CHECK-DAG: ld.param.v2.b32 {[[R0:%r[0-9]+]], [[R1:%r[0-9]+]]}, [retval0]; +; CHECK-DAG: ld.param.v2.b32 {[[R2:%r[0-9]+]], [[R3:%r[0-9]+]]}, [retval0+8]; ; CHECK-DAG: ld.param.b16 [[R8:%rs[0-9]+]], [retval0+16]; -; CHECK-DAG: st.param.v4.b16 [func_retval0], {[[R0]], [[R1]], [[R2]], [[R3]]}; -; CHECK-DAG: st.param.v4.b16 [func_retval0+8], {[[R4]], [[R5]], [[R6]], [[R7]]}; +; CHECK-DAG: st.param.v2.b32 [func_retval0], {[[R0]], [[R1]]}; +; CHECK-DAG: st.param.v2.b32 [func_retval0+8], {[[R2]], [[R3]]}; ; CHECK-DAG: st.param.b16 [func_retval0+16], [[R8]]; ; CHECK: ret; define <9 x half> @test_v9f16(<9 x half> %a) { @@ -531,8 +526,8 @@ define <9 x half> @test_v9f16(<9 x half> %a) { ; CHECK-DAG: ld.param.b16 {{%r[0-9]+}}, [test_i19_param_0]; ; CHECK-DAG: ld.param.b8 {{%r[0-9]+}}, [test_i19_param_0+2]; ; CHECK: .param .b32 param0; -; CHECK: st.param.b32 [param0], {{%r[0-9]+}}; ; CHECK: .param .b32 retval0; +; CHECK: st.param.b32 [param0], {{%r[0-9]+}}; ; CHECK: call.uni (retval0), test_i19, ; CHECK: ld.param.b32 {{%r[0-9]+}}, [retval0]; ; CHECK: st.param.b32 [func_retval0], {{%r[0-9]+}}; @@ -548,8 +543,8 @@ define i19 @test_i19(i19 %a) { ; CHECK-DAG: ld.param.b16 {{%r[0-9]+}}, [test_i23_param_0]; ; CHECK-DAG: ld.param.b8 {{%r[0-9]+}}, [test_i23_param_0+2]; ; CHECK: .param .b32 param0; -; CHECK: st.param.b32 [param0], {{%r[0-9]+}}; ; CHECK: .param .b32 retval0; +; CHECK: st.param.b32 [param0], {{%r[0-9]+}}; ; CHECK: call.uni (retval0), test_i23, ; CHECK: ld.param.b32 {{%r[0-9]+}}, [retval0]; ; CHECK: st.param.b32 [func_retval0], {{%r[0-9]+}}; @@ -565,8 +560,8 @@ define i23 @test_i23(i23 %a) { ; CHECK-DAG: ld.param.b8 {{%r[0-9]+}}, [test_i24_param_0+2]; ; CHECK-DAG: ld.param.b16 {{%r[0-9]+}}, [test_i24_param_0]; ; CHECK: .param .b32 param0; -; CHECK: st.param.b32 [param0], {{%r[0-9]+}}; ; CHECK: .param .b32 retval0; +; CHECK: st.param.b32 [param0], {{%r[0-9]+}}; ; CHECK: call.uni (retval0), test_i24, ; CHECK: ld.param.b32 {{%r[0-9]+}}, [retval0]; ; CHECK: st.param.b32 [func_retval0], {{%r[0-9]+}}; @@ -581,8 +576,8 @@ define i24 @test_i24(i24 %a) { ; CHECK-NEXT: .param .b32 test_i29_param_0 ; CHECK: ld.param.b32 {{%r[0-9]+}}, [test_i29_param_0]; ; CHECK: .param .b32 param0; -; CHECK: st.param.b32 [param0], {{%r[0-9]+}}; ; CHECK: .param .b32 retval0; +; CHECK: st.param.b32 [param0], {{%r[0-9]+}}; ; CHECK: call.uni (retval0), test_i29, ; CHECK: ld.param.b32 {{%r[0-9]+}}, [retval0]; ; CHECK: st.param.b32 [func_retval0], {{%r[0-9]+}}; @@ -597,8 +592,8 @@ define i29 @test_i29(i29 %a) { ; CHECK-NEXT: .param .b32 test_i32_param_0 ; CHECK: ld.param.b32 [[E:%r[0-9]+]], [test_i32_param_0]; ; CHECK: .param .b32 param0; -; CHECK: st.param.b32 [param0], [[E]]; ; CHECK: .param .b32 retval0; +; CHECK: st.param.b32 [param0], [[E]]; ; CHECK: call.uni (retval0), test_i32, ; CHECK: ld.param.b32 [[R:%r[0-9]+]], [retval0]; ; CHECK: st.param.b32 [func_retval0], [[R]]; @@ -613,10 +608,10 @@ define i32 @test_i32(i32 %a) { ; CHECK-NEXT: .param .align 16 .b8 test_v3i32_param_0[16] ; CHECK-DAG: ld.param.b32 [[E2:%r[0-9]+]], [test_v3i32_param_0+8]; ; CHECK-DAG: ld.param.v2.b32 {[[E0:%r[0-9]+]], [[E1:%r[0-9]+]]}, [test_v3i32_param_0]; -; CHECK: .param .align 16 .b8 param0[16]; -; CHECK: st.param.v2.b32 [param0], {[[E0]], [[E1]]}; -; CHECK: st.param.b32 [param0+8], [[E2]]; -; CHECK: .param .align 16 .b8 retval0[16]; +; CHECK-DAG: .param .align 16 .b8 param0[16]; +; CHECK-DAG: .param .align 16 .b8 retval0[16]; +; CHECK-DAG: st.param.v2.b32 [param0], {[[E0]], [[E1]]}; +; CHECK-DAG: st.param.b32 [param0+8], [[E2]]; ; CHECK: call.uni (retval0), test_v3i32, ; CHECK: ld.param.v2.b32 {[[RE0:%r[0-9]+]], [[RE1:%r[0-9]+]]}, [retval0]; ; CHECK: ld.param.b32 [[RE2:%r[0-9]+]], [retval0+8]; @@ -632,9 +627,9 @@ define <3 x i32> @test_v3i32(<3 x i32> %a) { ; CHECK-LABEL: test_v4i32( ; CHECK-NEXT: .param .align 16 .b8 test_v4i32_param_0[16] ; CHECK: ld.param.v4.b32 {[[E0:%r[0-9]+]], [[E1:%r[0-9]+]], [[E2:%r[0-9]+]], [[E3:%r[0-9]+]]}, [test_v4i32_param_0] -; CHECK: .param .align 16 .b8 param0[16]; -; CHECK: st.param.v4.b32 [param0], {[[E0]], [[E1]], [[E2]], [[E3]]}; -; CHECK: .param .align 16 .b8 retval0[16]; +; CHECK-DAG: .param .align 16 .b8 param0[16]; +; CHECK-DAG: .param .align 16 .b8 retval0[16]; +; CHECK-DAG: st.param.v4.b32 [param0], {[[E0]], [[E1]], [[E2]], [[E3]]}; ; CHECK: call.uni (retval0), test_v4i32, ; CHECK: ld.param.v4.b32 {[[RE0:%r[0-9]+]], [[RE1:%r[0-9]+]], [[RE2:%r[0-9]+]], [[RE3:%r[0-9]+]]}, [retval0]; ; CHECK: st.param.v4.b32 [func_retval0], {[[RE0]], [[RE1]], [[RE2]], [[RE3]]} @@ -650,9 +645,9 @@ define <4 x i32> @test_v4i32(<4 x i32> %a) { ; CHECK-DAG: ld.param.b32 [[E4:%r[0-9]+]], [test_v5i32_param_0+16]; ; CHECK-DAG: ld.param.v4.b32 {[[E0:%r[0-9]+]], [[E1:%r[0-9]+]], [[E2:%r[0-9]+]], [[E3:%r[0-9]+]]}, [test_v5i32_param_0] ; CHECK: .param .align 32 .b8 param0[32]; +; CHECK: .param .align 32 .b8 retval0[32]; ; CHECK-DAG: st.param.v4.b32 [param0], {[[E0]], [[E1]], [[E2]], [[E3]]}; ; CHECK-DAG: st.param.b32 [param0+16], [[E4]]; -; CHECK: .param .align 32 .b8 retval0[32]; ; CHECK: call.uni (retval0), test_v5i32, ; CHECK-DAG: ld.param.v4.b32 {[[RE0:%r[0-9]+]], [[RE1:%r[0-9]+]], [[RE2:%r[0-9]+]], [[RE3:%r[0-9]+]]}, [retval0]; ; CHECK-DAG: ld.param.b32 [[RE4:%r[0-9]+]], [retval0+16]; @@ -669,8 +664,8 @@ define <5 x i32> @test_v5i32(<5 x i32> %a) { ; CHECK-NEXT: .param .b32 test_f32_param_0 ; CHECK: ld.param.b32 [[E:%r[0-9]+]], [test_f32_param_0]; ; CHECK: .param .b32 param0; -; CHECK: st.param.b32 [param0], [[E]]; ; CHECK: .param .b32 retval0; +; CHECK: st.param.b32 [param0], [[E]]; ; CHECK: call.uni (retval0), test_f32, ; CHECK: ld.param.b32 [[R:%r[0-9]+]], [retval0]; ; CHECK: st.param.b32 [func_retval0], [[R]]; @@ -686,8 +681,8 @@ define float @test_f32(float %a) { ; CHECK-DAG: ld.param.b8 {{%rd[0-9]+}}, [test_i40_param_0+4]; ; CHECK-DAG: ld.param.b32 {{%rd[0-9]+}}, [test_i40_param_0]; ; CHECK: .param .b64 param0; -; CHECK: st.param.b64 [param0], {{%rd[0-9]+}}; ; CHECK: .param .b64 retval0; +; CHECK: st.param.b64 [param0], {{%rd[0-9]+}}; ; CHECK: call.uni (retval0), test_i40, ; CHECK: ld.param.b64 {{%rd[0-9]+}}, [retval0]; ; CHECK: st.param.b64 [func_retval0], {{%rd[0-9]+}}; @@ -703,8 +698,8 @@ define i40 @test_i40(i40 %a) { ; CHECK-DAG: ld.param.b16 {{%rd[0-9]+}}, [test_i47_param_0+4]; ; CHECK-DAG: ld.param.b32 {{%rd[0-9]+}}, [test_i47_param_0]; ; CHECK: .param .b64 param0; -; CHECK: st.param.b64 [param0], {{%rd[0-9]+}}; ; CHECK: .param .b64 retval0; +; CHECK: st.param.b64 [param0], {{%rd[0-9]+}}; ; CHECK: call.uni (retval0), test_i47, ; CHECK: ld.param.b64 {{%rd[0-9]+}}, [retval0]; ; CHECK: st.param.b64 [func_retval0], {{%rd[0-9]+}}; @@ -720,8 +715,8 @@ define i47 @test_i47(i47 %a) { ; CHECK-DAG: ld.param.b16 {{%rd[0-9]+}}, [test_i48_param_0+4]; ; CHECK-DAG: ld.param.b32 {{%rd[0-9]+}}, [test_i48_param_0]; ; CHECK: .param .b64 param0; -; CHECK: st.param.b64 [param0], {{%rd[0-9]+}}; ; CHECK: .param .b64 retval0; +; CHECK: st.param.b64 [param0], {{%rd[0-9]+}}; ; CHECK: call.uni (retval0), test_i48, ; CHECK: ld.param.b64 {{%rd[0-9]+}}, [retval0]; ; CHECK: st.param.b64 [func_retval0], {{%rd[0-9]+}}; @@ -738,8 +733,8 @@ define i48 @test_i48(i48 %a) { ; CHECK-DAG: ld.param.b16 {{%rd[0-9]+}}, [test_i51_param_0+4]; ; CHECK-DAG: ld.param.b32 {{%rd[0-9]+}}, [test_i51_param_0]; ; CHECK: .param .b64 param0; -; CHECK: st.param.b64 [param0], {{%rd[0-9]+}}; ; CHECK: .param .b64 retval0; +; CHECK: st.param.b64 [param0], {{%rd[0-9]+}}; ; CHECK: call.uni (retval0), test_i51, ; CHECK: ld.param.b64 {{%rd[0-9]+}}, [retval0]; ; CHECK: st.param.b64 [func_retval0], {{%rd[0-9]+}}; @@ -756,8 +751,8 @@ define i51 @test_i51(i51 %a) { ; CHECK-DAG: ld.param.b16 {{%rd[0-9]+}}, [test_i56_param_0+4]; ; CHECK-DAG: ld.param.b32 {{%rd[0-9]+}}, [test_i56_param_0]; ; CHECK: .param .b64 param0; -; CHECK: st.param.b64 [param0], {{%rd[0-9]+}}; ; CHECK: .param .b64 retval0; +; CHECK: st.param.b64 [param0], {{%rd[0-9]+}}; ; CHECK: call.uni (retval0), test_i56, ; CHECK: ld.param.b64 {{%rd[0-9]+}}, [retval0]; ; CHECK: st.param.b64 [func_retval0], {{%rd[0-9]+}}; @@ -772,8 +767,8 @@ define i56 @test_i56(i56 %a) { ; CHECK-NEXT: .param .b64 test_i57_param_0 ; CHECK: ld.param.b64 {{%rd[0-9]+}}, [test_i57_param_0]; ; CHECK: .param .b64 param0; -; CHECK: st.param.b64 [param0], {{%rd[0-9]+}}; ; CHECK: .param .b64 retval0; +; CHECK: st.param.b64 [param0], {{%rd[0-9]+}}; ; CHECK: call.uni (retval0), test_i57, ; CHECK: ld.param.b64 {{%rd[0-9]+}}, [retval0]; ; CHECK: st.param.b64 [func_retval0], {{%rd[0-9]+}}; @@ -788,8 +783,8 @@ define i57 @test_i57(i57 %a) { ; CHECK-NEXT: .param .b64 test_i64_param_0 ; CHECK: ld.param.b64 [[E:%rd[0-9]+]], [test_i64_param_0]; ; CHECK: .param .b64 param0; -; CHECK: st.param.b64 [param0], [[E]]; ; CHECK: .param .b64 retval0; +; CHECK: st.param.b64 [param0], [[E]]; ; CHECK: call.uni (retval0), test_i64, ; CHECK: ld.param.b64 [[R:%rd[0-9]+]], [retval0]; ; CHECK: st.param.b64 [func_retval0], [[R]]; @@ -805,9 +800,9 @@ define i64 @test_i64(i64 %a) { ; CHECK-DAG: ld.param.b64 [[E2:%rd[0-9]+]], [test_v3i64_param_0+16]; ; CHECK-DAG: ld.param.v2.b64 {[[E0:%rd[0-9]+]], [[E1:%rd[0-9]+]]}, [test_v3i64_param_0]; ; CHECK: .param .align 32 .b8 param0[32]; -; CHECK: st.param.v2.b64 [param0], {[[E0]], [[E1]]}; -; CHECK: st.param.b64 [param0+16], [[E2]]; ; CHECK: .param .align 32 .b8 retval0[32]; +; CHECK-DAG: st.param.v2.b64 [param0], {[[E0]], [[E1]]}; +; CHECK-DAG: st.param.b64 [param0+16], [[E2]]; ; CHECK: call.uni (retval0), test_v3i64, ; CHECK: ld.param.v2.b64 {[[RE0:%rd[0-9]+]], [[RE1:%rd[0-9]+]]}, [retval0]; ; CHECK: ld.param.b64 [[RE2:%rd[0-9]+]], [retval0+16]; @@ -828,9 +823,9 @@ define <3 x i64> @test_v3i64(<3 x i64> %a) { ; CHECK-DAG: ld.param.v2.b64 {[[E2:%rd[0-9]+]], [[E3:%rd[0-9]+]]}, [test_v4i64_param_0+16]; ; CHECK-DAG: ld.param.v2.b64 {[[E0:%rd[0-9]+]], [[E1:%rd[0-9]+]]}, [test_v4i64_param_0]; ; CHECK: .param .align 32 .b8 param0[32]; -; CHECK: st.param.v2.b64 [param0], {[[E0]], [[E1]]}; -; CHECK: st.param.v2.b64 [param0+16], {[[E2]], [[E3]]}; ; CHECK: .param .align 32 .b8 retval0[32]; +; CHECK-DAG: st.param.v2.b64 [param0], {[[E0]], [[E1]]}; +; CHECK-DAG: st.param.v2.b64 [param0+16], {[[E2]], [[E3]]}; ; CHECK: call.uni (retval0), test_v4i64, ; CHECK: ld.param.v2.b64 {[[RE0:%rd[0-9]+]], [[RE1:%rd[0-9]+]]}, [retval0]; ; CHECK: ld.param.v2.b64 {[[RE2:%rd[0-9]+]], [[RE3:%rd[0-9]+]]}, [retval0+16]; @@ -849,8 +844,8 @@ define <4 x i64> @test_v4i64(<4 x i64> %a) { ; CHECK-NEXT: .align 1 .b8 test_s_i1_param_0[1] ; CHECK: ld.param.b8 [[A:%rs[0-9]+]], [test_s_i1_param_0]; ; CHECK: .param .align 1 .b8 param0[1]; -; CHECK: st.param.b8 [param0], [[A]] ; CHECK: .param .align 1 .b8 retval0[1]; +; CHECK: st.param.b8 [param0], [[A]] ; CHECK: call.uni (retval0), test_s_i1, ; CHECK: ld.param.b8 [[R:%rs[0-9]+]], [retval0]; ; CHECK: st.param.b8 [func_retval0], [[R]]; @@ -865,8 +860,8 @@ define %s_i1 @test_s_i1(%s_i1 %a) { ; CHECK-NEXT: .param .align 1 .b8 test_s_i8_param_0[1] ; CHECK: ld.param.b8 [[A:%rs[0-9]+]], [test_s_i8_param_0]; ; CHECK: .param .align 1 .b8 param0[1]; -; CHECK: st.param.b8 [param0], [[A]] ; CHECK: .param .align 1 .b8 retval0[1]; +; CHECK: st.param.b8 [param0], [[A]] ; CHECK: call.uni (retval0), test_s_i8, ; CHECK: ld.param.b8 [[R:%rs[0-9]+]], [retval0]; ; CHECK: st.param.b8 [func_retval0], [[R]]; @@ -881,8 +876,8 @@ define %s_i8 @test_s_i8(%s_i8 %a) { ; CHECK-NEXT: .param .align 2 .b8 test_s_i16_param_0[2] ; CHECK: ld.param.b16 [[A:%rs[0-9]+]], [test_s_i16_param_0]; ; CHECK: .param .align 2 .b8 param0[2]; -; CHECK: st.param.b16 [param0], [[A]] ; CHECK: .param .align 2 .b8 retval0[2]; +; CHECK: st.param.b16 [param0], [[A]] ; CHECK: call.uni (retval0), test_s_i16, ; CHECK: ld.param.b16 [[R:%rs[0-9]+]], [retval0]; ; CHECK: st.param.b16 [func_retval0], [[R]]; @@ -897,8 +892,8 @@ define %s_i16 @test_s_i16(%s_i16 %a) { ; CHECK-NEXT: .param .align 2 .b8 test_s_f16_param_0[2] ; CHECK: ld.param.b16 [[A:%rs[0-9]+]], [test_s_f16_param_0]; ; CHECK: .param .align 2 .b8 param0[2]; -; CHECK: st.param.b16 [param0], [[A]] ; CHECK: .param .align 2 .b8 retval0[2]; +; CHECK: st.param.b16 [param0], [[A]] ; CHECK: call.uni (retval0), test_s_f16, ; CHECK: ld.param.b16 [[R:%rs[0-9]+]], [retval0]; ; CHECK: st.param.b16 [func_retval0], [[R]]; @@ -913,8 +908,8 @@ define %s_f16 @test_s_f16(%s_f16 %a) { ; CHECK-NEXT: .param .align 4 .b8 test_s_i32_param_0[4] ; CHECK: ld.param.b32 [[E:%r[0-9]+]], [test_s_i32_param_0]; ; CHECK: .param .align 4 .b8 param0[4] -; CHECK: st.param.b32 [param0], [[E]]; ; CHECK: .param .align 4 .b8 retval0[4]; +; CHECK: st.param.b32 [param0], [[E]]; ; CHECK: call.uni (retval0), test_s_i32, ; CHECK: ld.param.b32 [[R:%r[0-9]+]], [retval0]; ; CHECK: st.param.b32 [func_retval0], [[R]]; @@ -929,8 +924,8 @@ define %s_i32 @test_s_i32(%s_i32 %a) { ; CHECK-NEXT: .param .align 4 .b8 test_s_f32_param_0[4] ; CHECK: ld.param.b32 [[E:%r[0-9]+]], [test_s_f32_param_0]; ; CHECK: .param .align 4 .b8 param0[4] -; CHECK: st.param.b32 [param0], [[E]]; ; CHECK: .param .align 4 .b8 retval0[4]; +; CHECK: st.param.b32 [param0], [[E]]; ; CHECK: call.uni (retval0), test_s_f32, ; CHECK: ld.param.b32 [[R:%r[0-9]+]], [retval0]; ; CHECK: st.param.b32 [func_retval0], [[R]]; @@ -945,8 +940,8 @@ define %s_f32 @test_s_f32(%s_f32 %a) { ; CHECK-NEXT: .param .align 8 .b8 test_s_i64_param_0[8] ; CHECK: ld.param.b64 [[E:%rd[0-9]+]], [test_s_i64_param_0]; ; CHECK: .param .align 8 .b8 param0[8]; -; CHECK: st.param.b64 [param0], [[E]]; ; CHECK: .param .align 8 .b8 retval0[8]; +; CHECK: st.param.b64 [param0], [[E]]; ; CHECK: call.uni (retval0), test_s_i64, ; CHECK: ld.param.b64 [[R:%rd[0-9]+]], [retval0]; ; CHECK: st.param.b64 [func_retval0], [[R]]; @@ -966,12 +961,12 @@ define %s_i64 @test_s_i64(%s_i64 %a) { ; CHECK-DAG: ld.param.b32 [[E1:%r[0-9]+]], [test_s_i32f32_param_0+4]; ; CHECK-DAG: ld.param.b32 [[E0:%r[0-9]+]], [test_s_i32f32_param_0]; ; CHECK: .param .align 8 .b8 param0[24]; +; CHECK: .param .align 8 .b8 retval0[24]; ; CHECK-DAG: st.param.b32 [param0], [[E0]]; ; CHECK-DAG: st.param.b32 [param0+4], [[E1]]; ; CHECK-DAG: st.param.b32 [param0+8], [[E2]]; ; CHECK-DAG: st.param.b32 [param0+12], [[E3]]; ; CHECK-DAG: st.param.b64 [param0+16], [[E4]]; -; CHECK: .param .align 8 .b8 retval0[24]; ; CHECK: call.uni (retval0), test_s_i32f32, ; CHECK-DAG: ld.param.b32 [[RE0:%r[0-9]+]], [retval0]; ; CHECK-DAG: ld.param.b32 [[RE1:%r[0-9]+]], [retval0+4]; @@ -997,10 +992,10 @@ define %s_i32f32 @test_s_i32f32(%s_i32f32 %a) { ; CHECK-DAG: ld.param.v2.b32 {[[E2:%r[0-9]+]], [[E3:%r[0-9]+]]}, [test_s_i32x4_param_0+8]; ; CHECK-DAG: ld.param.v2.b32 {[[E0:%r[0-9]+]], [[E1:%r[0-9]+]]}, [test_s_i32x4_param_0]; ; CHECK: .param .align 8 .b8 param0[24]; -; CHECK: st.param.v2.b32 [param0], {[[E0]], [[E1]]}; -; CHECK: st.param.v2.b32 [param0+8], {[[E2]], [[E3]]}; -; CHECK: st.param.b64 [param0+16], [[E4]]; ; CHECK: .param .align 8 .b8 retval0[24]; +; CHECK-DAG: st.param.v2.b32 [param0], {[[E0]], [[E1]]}; +; CHECK-DAG: st.param.v2.b32 [param0+8], {[[E2]], [[E3]]}; +; CHECK-DAG: st.param.b64 [param0+16], [[E4]]; ; CHECK: call.uni (retval0), test_s_i32x4, ; CHECK: ld.param.v2.b32 {[[RE0:%r[0-9]+]], [[RE1:%r[0-9]+]]}, [retval0]; ; CHECK: ld.param.v2.b32 {[[RE2:%r[0-9]+]], [[RE3:%r[0-9]+]]}, [retval0+8]; @@ -1024,16 +1019,13 @@ define %s_i32x4 @test_s_i32x4(%s_i32x4 %a) { ; CHECK: ld.param.b8 [[E2:%rs[0-9]+]], [test_s_i1i32x4_param_0+8]; ; CHECK: ld.param.v2.b32 {[[E0:%r[0-9]+]], [[E1:%r[0-9]+]]}, [test_s_i1i32x4_param_0]; ; CHECK: .param .align 8 .b8 param0[32]; -; CHECK: st.param.v2.b32 [param0], {[[E0]], [[E1]]}; -; CHECK: st.param.b8 [param0+8], [[E2]]; -; CHECK: st.param.b32 [param0+12], [[E3]]; -; CHECK: st.param.b32 [param0+16], [[E4]]; -; CHECK: st.param.b64 [param0+24], [[E5]]; ; CHECK: .param .align 8 .b8 retval0[32]; -; CHECK: call.uni (retval0), test_s_i1i32x4, -; CHECK: ( -; CHECK: param0 -; CHECK: ); +; CHECK-DAG: st.param.v2.b32 [param0], {[[E0]], [[E1]]}; +; CHECK-DAG: st.param.b8 [param0+8], [[E2]]; +; CHECK-DAG: st.param.b32 [param0+12], [[E3]]; +; CHECK-DAG: st.param.b32 [param0+16], [[E4]]; +; CHECK-DAG: st.param.b64 [param0+24], [[E5]]; +; CHECK: call.uni (retval0), test_s_i1i32x4, (param0); ; CHECK: ld.param.v2.b32 {[[RE0:%r[0-9]+]], [[RE1:%r[0-9]+]]}, [retval0]; ; CHECK: ld.param.b8 [[RE2:%rs[0-9]+]], [retval0+8]; ; CHECK: ld.param.b32 [[RE3:%r[0-9]+]], [retval0+12]; @@ -1082,6 +1074,7 @@ define %s_i8i32x4 @test_s_i1i32x4(%s_i8i32x4 %a) { ; CHECK-DAG: ld.param.b8 %r{{.*}}, [test_s_i1i32x4p_param_0+1]; ; CHECK-DAG: ld.param.b8 %r{{.*}}, [test_s_i1i32x4p_param_0]; ; CHECK: .param .align 1 .b8 param0[25]; +; CHECK: .param .align 1 .b8 retval0[25]; ; CHECK-DAG: st.param.b8 [param0], ; CHECK-DAG: st.param.b8 [param0+1], ; CHECK-DAG: st.param.b8 [param0+2], @@ -1107,33 +1100,32 @@ define %s_i8i32x4 @test_s_i1i32x4(%s_i8i32x4 %a) { ; CHECK-DAG: st.param.b8 [param0+22], ; CHECK-DAG: st.param.b8 [param0+23], ; CHECK-DAG: st.param.b8 [param0+24], -; CHECK: .param .align 1 .b8 retval0[25]; -; CHECK: call.uni (retval0), test_s_i1i32x4p, -; CHECK-DAG: ld.param.b8 %rs{{[0-9]+}}, [retval0]; -; CHECK-DAG: ld.param.b8 %rs{{[0-9]+}}, [retval0+1]; -; CHECK-DAG: ld.param.b8 %rs{{[0-9]+}}, [retval0+2]; -; CHECK-DAG: ld.param.b8 %rs{{[0-9]+}}, [retval0+3]; -; CHECK-DAG: ld.param.b8 %rs{{[0-9]+}}, [retval0+4]; -; CHECK-DAG: ld.param.b8 %rs{{[0-9]+}}, [retval0+5]; -; CHECK-DAG: ld.param.b8 %rs{{[0-9]+}}, [retval0+6]; -; CHECK-DAG: ld.param.b8 %rs{{[0-9]+}}, [retval0+7]; -; CHECK-DAG: ld.param.b8 %rs{{[0-9]+}}, [retval0+8]; -; CHECK-DAG: ld.param.b8 %rs{{[0-9]+}}, [retval0+9]; -; CHECK-DAG: ld.param.b8 %rs{{[0-9]+}}, [retval0+10]; -; CHECK-DAG: ld.param.b8 %rs{{[0-9]+}}, [retval0+11]; -; CHECK-DAG: ld.param.b8 %rs{{[0-9]+}}, [retval0+12]; -; CHECK-DAG: ld.param.b8 %rs{{[0-9]+}}, [retval0+13]; -; CHECK-DAG: ld.param.b8 %rs{{[0-9]+}}, [retval0+14]; -; CHECK-DAG: ld.param.b8 %rs{{[0-9]+}}, [retval0+15]; -; CHECK-DAG: ld.param.b8 %rs{{[0-9]+}}, [retval0+16]; -; CHECK-DAG: ld.param.b8 %rs{{[0-9]+}}, [retval0+17]; -; CHECK-DAG: ld.param.b8 %rs{{[0-9]+}}, [retval0+18]; -; CHECK-DAG: ld.param.b8 %rs{{[0-9]+}}, [retval0+19]; -; CHECK-DAG: ld.param.b8 %rs{{[0-9]+}}, [retval0+20]; -; CHECK-DAG: ld.param.b8 %rs{{[0-9]+}}, [retval0+21]; -; CHECK-DAG: ld.param.b8 %rs{{[0-9]+}}, [retval0+22]; -; CHECK-DAG: ld.param.b8 %rs{{[0-9]+}}, [retval0+23]; -; CHECK-DAG: ld.param.b8 %rs{{[0-9]+}}, [retval0+24]; +; CHECK: call.uni (retval0), test_s_i1i32x4p, (param0); +; CHECK-DAG: ld.param.b8 %rs{{[0-9]+}}, [retval0+8]; +; CHECK-DAG: ld.param.b8 %r{{[0-9]+}}, [retval0+3]; +; CHECK-DAG: ld.param.b8 %r{{[0-9]+}}, [retval0+2]; +; CHECK-DAG: ld.param.b8 %r{{[0-9]+}}, [retval0+1]; +; CHECK-DAG: ld.param.b8 %r{{[0-9]+}}, [retval0]; +; CHECK-DAG: ld.param.b8 %r{{[0-9]+}}, [retval0+7]; +; CHECK-DAG: ld.param.b8 %r{{[0-9]+}}, [retval0+6]; +; CHECK-DAG: ld.param.b8 %r{{[0-9]+}}, [retval0+5]; +; CHECK-DAG: ld.param.b8 %r{{[0-9]+}}, [retval0+4]; +; CHECK-DAG: ld.param.b8 %r{{[0-9]+}}, [retval0+12]; +; CHECK-DAG: ld.param.b8 %r{{[0-9]+}}, [retval0+11]; +; CHECK-DAG: ld.param.b8 %r{{[0-9]+}}, [retval0+10]; +; CHECK-DAG: ld.param.b8 %r{{[0-9]+}}, [retval0+9]; +; CHECK-DAG: ld.param.b8 %r{{[0-9]+}}, [retval0+16]; +; CHECK-DAG: ld.param.b8 %r{{[0-9]+}}, [retval0+15]; +; CHECK-DAG: ld.param.b8 %r{{[0-9]+}}, [retval0+14]; +; CHECK-DAG: ld.param.b8 %r{{[0-9]+}}, [retval0+13]; +; CHECK-DAG: ld.param.b8 %rd{{[0-9]+}}, [retval0+24]; +; CHECK-DAG: ld.param.b8 %rd{{[0-9]+}}, [retval0+23]; +; CHECK-DAG: ld.param.b8 %rd{{[0-9]+}}, [retval0+22]; +; CHECK-DAG: ld.param.b8 %rd{{[0-9]+}}, [retval0+21]; +; CHECK-DAG: ld.param.b8 %rd{{[0-9]+}}, [retval0+20]; +; CHECK-DAG: ld.param.b8 %rd{{[0-9]+}}, [retval0+19]; +; CHECK-DAG: ld.param.b8 %rd{{[0-9]+}}, [retval0+18]; +; CHECK-DAG: ld.param.b8 %rd{{[0-9]+}}, [retval0+17]; ; CHECK: } // callseq ; CHECK-DAG: st.param.b8 [func_retval0], ; CHECK-DAG: st.param.b8 [func_retval0+1], @@ -1177,13 +1169,13 @@ define %s_i8i32x4p @test_s_i1i32x4p(%s_i8i32x4p %a) { ; CHECK: ld.param.b32 [[E2:%r[0-9]+]], [test_s_crossfield_param_0+8]; ; CHECK: ld.param.v2.b32 {[[E0:%r[0-9]+]], [[E1:%r[0-9]+]]}, [test_s_crossfield_param_0]; ; CHECK: .param .align 16 .b8 param0[80]; -; CHECK: st.param.v2.b32 [param0], {[[E0]], [[E1]]}; -; CHECK: st.param.b32 [param0+8], [[E2]]; -; CHECK: st.param.v4.b32 [param0+16], {[[E3]], [[E4]], [[E5]], [[E6]]}; -; CHECK: st.param.v4.b32 [param0+32], {[[E7]], [[E8]], [[E9]], [[E10]]}; -; CHECK: st.param.v4.b32 [param0+48], {[[E11]], [[E12]], [[E13]], [[E14]]}; -; CHECK: st.param.b32 [param0+64], [[E15]]; ; CHECK: .param .align 16 .b8 retval0[80]; +; CHECK-DAG: st.param.v2.b32 [param0], {[[E0]], [[E1]]}; +; CHECK-DAG: st.param.b32 [param0+8], [[E2]]; +; CHECK-DAG: st.param.v4.b32 [param0+16], {[[E3]], [[E4]], [[E5]], [[E6]]}; +; CHECK-DAG: st.param.v4.b32 [param0+32], {[[E7]], [[E8]], [[E9]], [[E10]]}; +; CHECK-DAG: st.param.v4.b32 [param0+48], {[[E11]], [[E12]], [[E13]], [[E14]]}; +; CHECK-DAG: st.param.b32 [param0+64], [[E15]]; ; CHECK: call.uni (retval0), test_s_crossfield, ; CHECK: ld.param.v2.b32 {[[RE0:%r[0-9]+]], [[RE1:%r[0-9]+]]}, [retval0]; ; CHECK: ld.param.b32 [[RE2:%r[0-9]+]], [retval0+8]; diff --git a/llvm/test/CodeGen/NVPTX/param-overalign.ll b/llvm/test/CodeGen/NVPTX/param-overalign.ll index 88ad0b0..2155fb4 100644 --- a/llvm/test/CodeGen/NVPTX/param-overalign.ll +++ b/llvm/test/CodeGen/NVPTX/param-overalign.ll @@ -28,8 +28,8 @@ define float @caller_md(float %a, float %b) { ; CHECK-NEXT: ld.param.b32 %r2, [caller_md_param_1]; ; CHECK-NEXT: { // callseq 0, 0 ; CHECK-NEXT: .param .align 8 .b8 param0[8]; -; CHECK-NEXT: st.param.v2.b32 [param0], {%r1, %r2}; ; CHECK-NEXT: .param .b32 retval0; +; CHECK-NEXT: st.param.v2.b32 [param0], {%r1, %r2}; ; CHECK-NEXT: call.uni (retval0), callee_md, (param0); ; CHECK-NEXT: ld.param.b32 %r3, [retval0]; ; CHECK-NEXT: } // callseq 0 @@ -69,8 +69,8 @@ define float @caller(float %a, float %b) { ; CHECK-NEXT: ld.param.b32 %r2, [caller_param_1]; ; CHECK-NEXT: { // callseq 1, 0 ; CHECK-NEXT: .param .align 8 .b8 param0[8]; -; CHECK-NEXT: st.param.v2.b32 [param0], {%r1, %r2}; ; CHECK-NEXT: .param .b32 retval0; +; CHECK-NEXT: st.param.v2.b32 [param0], {%r1, %r2}; ; CHECK-NEXT: call.uni (retval0), callee, (param0); ; CHECK-NEXT: ld.param.b32 %r3, [retval0]; ; CHECK-NEXT: } // callseq 1 diff --git a/llvm/test/CodeGen/NVPTX/param-vectorize-device.ll b/llvm/test/CodeGen/NVPTX/param-vectorize-device.ll index a480984a..a592b82 100644 --- a/llvm/test/CodeGen/NVPTX/param-vectorize-device.ll +++ b/llvm/test/CodeGen/NVPTX/param-vectorize-device.ll @@ -84,8 +84,8 @@ define dso_local void @caller_St4x1(ptr nocapture noundef readonly byval(%struct ; CHECK: .param .b64 caller_St4x1_param_1 ; CHECK: ) ; CHECK: .param .b32 param0; - ; CHECK: st.param.b32 [param0], {{%r[0-9]+}}; ; CHECK: .param .align 16 .b8 retval0[4]; + ; CHECK: st.param.b32 [param0], {{%r[0-9]+}}; ; CHECK: call.uni (retval0), callee_St4x1, (param0); ; CHECK: ld.param.b32 {{%r[0-9]+}}, [retval0]; %1 = load i32, ptr %in, align 4 @@ -112,8 +112,8 @@ define dso_local void @caller_St4x2(ptr nocapture noundef readonly byval(%struct ; CHECK: .param .b64 caller_St4x2_param_1 ; CHECK: ) ; CHECK: .param .align 16 .b8 param0[8]; - ; CHECK: st.param.v2.b32 [param0], {{{%r[0-9]+}}, {{%r[0-9]+}}}; ; CHECK: .param .align 16 .b8 retval0[8]; + ; CHECK: st.param.v2.b32 [param0], {{{%r[0-9]+}}, {{%r[0-9]+}}}; ; CHECK: call.uni (retval0), callee_St4x2, (param0); ; CHECK: ld.param.v2.b32 {{{%r[0-9]+}}, {{%r[0-9]+}}}, [retval0]; %agg.tmp = alloca %struct.St4x2, align 8 @@ -149,9 +149,9 @@ define dso_local void @caller_St4x3(ptr nocapture noundef readonly byval(%struct ; CHECK: .param .b64 caller_St4x3_param_1 ; CHECK: ) ; CHECK: .param .align 16 .b8 param0[12]; + ; CHECK: .param .align 16 .b8 retval0[12]; ; CHECK: st.param.v2.b32 [param0], {{{%r[0-9]+}}, {{%r[0-9]+}}}; ; CHECK: st.param.b32 [param0+8], {{%r[0-9]+}}; - ; CHECK: .param .align 16 .b8 retval0[12]; ; CHECK: call.uni (retval0), callee_St4x3, (param0); ; CHECK: ld.param.v2.b32 {{{%r[0-9]+}}, {{%r[0-9]+}}}, [retval0]; ; CHECK: ld.param.b32 {{%r[0-9]+}}, [retval0+8]; @@ -193,8 +193,8 @@ define dso_local void @caller_St4x4(ptr nocapture noundef readonly byval(%struct ; CHECK: .param .b64 caller_St4x4_param_1 ; CHECK: ) ; CHECK: .param .align 16 .b8 param0[16]; - ; CHECK: st.param.v4.b32 [param0], {{{%r[0-9]+}}, {{%r[0-9]+}}, {{%r[0-9]+}}, {{%r[0-9]+}}}; ; CHECK: .param .align 16 .b8 retval0[16]; + ; CHECK: st.param.v4.b32 [param0], {{{%r[0-9]+}}, {{%r[0-9]+}}, {{%r[0-9]+}}, {{%r[0-9]+}}}; ; CHECK: call.uni (retval0), callee_St4x4, (param0); ; CHECK: ld.param.v4.b32 {{{%r[0-9]+}}, {{%r[0-9]+}}, {{%r[0-9]+}}, {{%r[0-9]+}}}, [retval0]; %call = tail call fastcc [4 x i32] @callee_St4x4(ptr noundef nonnull byval(%struct.St4x4) align 4 %in) #2 @@ -239,9 +239,9 @@ define dso_local void @caller_St4x5(ptr nocapture noundef readonly byval(%struct ; CHECK: .param .b64 caller_St4x5_param_1 ; CHECK: ) ; CHECK: .param .align 16 .b8 param0[20]; + ; CHECK: .param .align 16 .b8 retval0[20]; ; CHECK: st.param.v4.b32 [param0], {{{%r[0-9]+}}, {{%r[0-9]+}}, {{%r[0-9]+}}, {{%r[0-9]+}}}; ; CHECK: st.param.b32 [param0+16], {{%r[0-9]+}}; - ; CHECK: .param .align 16 .b8 retval0[20]; ; CHECK: call.uni (retval0), callee_St4x5, (param0); ; CHECK: ld.param.v4.b32 {{{%r[0-9]+}}, {{%r[0-9]+}}, {{%r[0-9]+}}, {{%r[0-9]+}}}, [retval0]; ; CHECK: ld.param.b32 {{%r[0-9]+}}, [retval0+16]; @@ -295,9 +295,9 @@ define dso_local void @caller_St4x6(ptr nocapture noundef readonly byval(%struct ; CHECK: .param .b64 caller_St4x6_param_1 ; CHECK: ) ; CHECK: .param .align 16 .b8 param0[24]; + ; CHECK: .param .align 16 .b8 retval0[24]; ; CHECK: st.param.v4.b32 [param0], {{{%r[0-9]+}}, {{%r[0-9]+}}, {{%r[0-9]+}}, {{%r[0-9]+}}}; ; CHECK: st.param.v2.b32 [param0+16], {{{%r[0-9]+}}, {{%r[0-9]+}}}; - ; CHECK: .param .align 16 .b8 retval0[24]; ; CHECK: call.uni (retval0), callee_St4x6, (param0); ; CHECK: ld.param.v4.b32 {{{%r[0-9]+}}, {{%r[0-9]+}}, {{%r[0-9]+}}, {{%r[0-9]+}}}, [retval0]; ; CHECK: ld.param.v2.b32 {{{%r[0-9]+}}, {{%r[0-9]+}}}, [retval0+16]; @@ -357,10 +357,10 @@ define dso_local void @caller_St4x7(ptr nocapture noundef readonly byval(%struct ; CHECK: .param .b64 caller_St4x7_param_1 ; CHECK: ) ; CHECK: .param .align 16 .b8 param0[28]; + ; CHECK: .param .align 16 .b8 retval0[28]; ; CHECK: st.param.v4.b32 [param0], {{{%r[0-9]+}}, {{%r[0-9]+}}, {{%r[0-9]+}}, {{%r[0-9]+}}}; ; CHECK: st.param.v2.b32 [param0+16], {{{%r[0-9]+}}, {{%r[0-9]+}}}; ; CHECK: st.param.b32 [param0+24], {{%r[0-9]+}}; - ; CHECK: .param .align 16 .b8 retval0[28]; ; CHECK: call.uni (retval0), callee_St4x7, (param0); ; CHECK: ld.param.v4.b32 {{{%r[0-9]+}}, {{%r[0-9]+}}, {{%r[0-9]+}}, {{%r[0-9]+}}}, [retval0]; ; CHECK: ld.param.v2.b32 {{{%r[0-9]+}}, {{%r[0-9]+}}}, [retval0+16]; @@ -429,9 +429,9 @@ define dso_local void @caller_St4x8(ptr nocapture noundef readonly byval(%struct ; CHECK: .param .b64 caller_St4x8_param_1 ; CHECK: ) ; CHECK: .param .align 16 .b8 param0[32]; - ; CHECK: st.param.v4.b32 [param0], {{{%r[0-9]+}}, {{%r[0-9]+}}, {{%r[0-9]+}}, {{%r[0-9]+}}}; - ; CHECK: st.param.v4.b32 [param0+16], {{{%r[0-9]+}}, {{%r[0-9]+}}, {{%r[0-9]+}}, {{%r[0-9]+}}}; ; CHECK: .param .align 16 .b8 retval0[32]; + ; CHECK-DAG: st.param.v4.b32 [param0], {{{%r[0-9]+}}, {{%r[0-9]+}}, {{%r[0-9]+}}, {{%r[0-9]+}}}; + ; CHECK-DAG: st.param.v4.b32 [param0+16], {{{%r[0-9]+}}, {{%r[0-9]+}}, {{%r[0-9]+}}, {{%r[0-9]+}}}; ; CHECK: call.uni (retval0), callee_St4x8, (param0); ; CHECK: ld.param.v4.b32 {{{%r[0-9]+}}, {{%r[0-9]+}}, {{%r[0-9]+}}, {{%r[0-9]+}}}, [retval0]; ; CHECK: ld.param.v4.b32 {{{%r[0-9]+}}, {{%r[0-9]+}}, {{%r[0-9]+}}, {{%r[0-9]+}}}, [retval0+16]; @@ -503,8 +503,8 @@ define dso_local void @caller_St8x1(ptr nocapture noundef readonly byval(%struct ; CHECK: .param .b64 caller_St8x1_param_1 ; CHECK: ) ; CHECK: .param .b64 param0; - ; CHECK: st.param.b64 [param0], {{%rd[0-9]+}}; ; CHECK: .param .align 16 .b8 retval0[8]; + ; CHECK: st.param.b64 [param0], {{%rd[0-9]+}}; ; CHECK: call.uni (retval0), callee_St8x1, (param0); ; CHECK: ld.param.b64 {{%rd[0-9]+}}, [retval0]; %1 = load i64, ptr %in, align 8 @@ -531,8 +531,8 @@ define dso_local void @caller_St8x2(ptr nocapture noundef readonly byval(%struct ; CHECK: .param .b64 caller_St8x2_param_1 ; CHECK: ) ; CHECK: .param .align 16 .b8 param0[16]; - ; CHECK: st.param.v2.b64 [param0], {{{%rd[0-9]+}}, {{%rd[0-9]+}}}; ; CHECK: .param .align 16 .b8 retval0[16]; + ; CHECK: st.param.v2.b64 [param0], {{{%rd[0-9]+}}, {{%rd[0-9]+}}}; ; CHECK: call.uni (retval0), callee_St8x2, (param0); ; CHECK: ld.param.v2.b64 {{{%rd[0-9]+}}, {{%rd[0-9]+}}}, [retval0]; %call = tail call fastcc [2 x i64] @callee_St8x2(ptr noundef nonnull byval(%struct.St8x2) align 8 %in) #2 @@ -565,9 +565,9 @@ define dso_local void @caller_St8x3(ptr nocapture noundef readonly byval(%struct ; CHECK: .param .b64 caller_St8x3_param_1 ; CHECK: ) ; CHECK: .param .align 16 .b8 param0[24]; + ; CHECK: .param .align 16 .b8 retval0[24]; ; CHECK: st.param.v2.b64 [param0], {{{%rd[0-9]+}}, {{%rd[0-9]+}}}; ; CHECK: st.param.b64 [param0+16], {{%rd[0-9]+}}; - ; CHECK: .param .align 16 .b8 retval0[24]; ; CHECK: call.uni (retval0), callee_St8x3, (param0); ; CHECK: ld.param.v2.b64 {{{%rd[0-9]+}}, {{%rd[0-9]+}}}, [retval0]; ; CHECK: ld.param.b64 {{%rd[0-9]+}}, [retval0+16]; @@ -609,9 +609,9 @@ define dso_local void @caller_St8x4(ptr nocapture noundef readonly byval(%struct ; CHECK: .param .b64 caller_St8x4_param_1 ; CHECK: ) ; CHECK: .param .align 16 .b8 param0[32]; - ; CHECK: st.param.v2.b64 [param0], {{{%rd[0-9]+}}, {{%rd[0-9]+}}}; - ; CHECK: st.param.v2.b64 [param0+16], {{{%rd[0-9]+}}, {{%rd[0-9]+}}}; ; CHECK: .param .align 16 .b8 retval0[32]; + ; CHECK-DAG: st.param.v2.b64 [param0], {{{%rd[0-9]+}}, {{%rd[0-9]+}}}; + ; CHECK-DAG: st.param.v2.b64 [param0+16], {{{%rd[0-9]+}}, {{%rd[0-9]+}}}; ; CHECK: call.uni (retval0), callee_St8x4, (param0); ; CHECK: ld.param.v2.b64 {{{%rd[0-9]+}}, {{%rd[0-9]+}}}, [retval0]; ; CHECK: ld.param.v2.b64 {{{%rd[0-9]+}}, {{%rd[0-9]+}}}, [retval0+16]; diff --git a/llvm/test/CodeGen/NVPTX/proxy-reg-erasure.mir b/llvm/test/CodeGen/NVPTX/proxy-reg-erasure.mir index 5d0d6f6..4a53152 100644 --- a/llvm/test/CodeGen/NVPTX/proxy-reg-erasure.mir +++ b/llvm/test/CodeGen/NVPTX/proxy-reg-erasure.mir @@ -77,7 +77,7 @@ constants: [] machineFunctionInfo: {} body: | bb.0: - %0:b32, %1:b32, %2:b32, %3:b32 = LoadParamMemV4I32 0 + %0:b32, %1:b32, %2:b32, %3:b32 = LDV_i32_v4 0, 0, 101, 3, 32, &retval0, 0 :: (load (s128), addrspace 101) ; CHECK-NOT: ProxyReg %4:b32 = ProxyRegB32 killed %0 %5:b32 = ProxyRegB32 killed %1 @@ -86,7 +86,7 @@ body: | ; CHECK: STV_i32_v4 killed %0, killed %1, killed %2, killed %3 STV_i32_v4 killed %4, killed %5, killed %6, killed %7, 0, 0, 101, 32, &func_retval0, 0 :: (store (s128), addrspace 101) - %8:b32 = LoadParamMemI32 0 + %8:b32 = LD_i32 0, 0, 101, 3, 32, &retval0, 0 :: (load (s32), addrspace 101) ; CHECK-NOT: ProxyReg %9:b32 = ProxyRegB32 killed %8 %10:b32 = ProxyRegB32 killed %9 diff --git a/llvm/test/CodeGen/NVPTX/st-param-imm.ll b/llvm/test/CodeGen/NVPTX/st-param-imm.ll index 6aa1119..f90435a 100644 --- a/llvm/test/CodeGen/NVPTX/st-param-imm.ll +++ b/llvm/test/CodeGen/NVPTX/st-param-imm.ll @@ -26,8 +26,8 @@ define void @st_param_i8_i16() { ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: { // callseq 0, 0 ; CHECK-NEXT: .param .align 2 .b8 param0[4]; -; CHECK-NEXT: st.param.b8 [param0], 1; ; CHECK-NEXT: st.param.b16 [param0+2], 2; +; CHECK-NEXT: st.param.b8 [param0], 1; ; CHECK-NEXT: call.uni call_i8_i16, (param0); ; CHECK-NEXT: } // callseq 0 ; CHECK-NEXT: ret; @@ -75,7 +75,7 @@ define void @st_param_f32() { ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: { // callseq 3, 0 ; CHECK-NEXT: .param .b32 param0; -; CHECK-NEXT: st.param.b32 [param0], 0f40A00000; +; CHECK-NEXT: st.param.b32 [param0], 1084227584; ; CHECK-NEXT: call.uni call_f32, (param0); ; CHECK-NEXT: } // callseq 3 ; CHECK-NEXT: ret; @@ -91,7 +91,7 @@ define void @st_param_f64() { ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: { // callseq 4, 0 ; CHECK-NEXT: .param .b64 param0; -; CHECK-NEXT: st.param.b64 [param0], 0d4018000000000000; +; CHECK-NEXT: st.param.b64 [param0], 4618441417868443648; ; CHECK-NEXT: call.uni call_f64, (param0); ; CHECK-NEXT: } // callseq 4 ; CHECK-NEXT: ret; @@ -165,7 +165,7 @@ define void @st_param_v2_i16_ii() { ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: { // callseq 8, 0 ; CHECK-NEXT: .param .align 4 .b8 param0[4]; -; CHECK-NEXT: st.param.v2.b16 [param0], {1, 2}; +; CHECK-NEXT: st.param.b32 [param0], 131073; ; CHECK-NEXT: call.uni call_v2_i16, (param0); ; CHECK-NEXT: } // callseq 8 ; CHECK-NEXT: ret; @@ -432,7 +432,7 @@ define void @st_param_v4_i8_iiii() { ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: { // callseq 23, 0 ; CHECK-NEXT: .param .align 4 .b8 param0[4]; -; CHECK-NEXT: st.param.v4.b8 [param0], {1, 2, 3, 4}; +; CHECK-NEXT: st.param.b32 [param0], 67305985; ; CHECK-NEXT: call.uni call_v4_i8, (param0); ; CHECK-NEXT: } // callseq 23 ; CHECK-NEXT: ret; @@ -442,15 +442,18 @@ define void @st_param_v4_i8_iiii() { define void @st_param_v4_i8_irrr(i8 %b, i8 %c, i8 %d) { ; CHECK-LABEL: st_param_v4_i8_irrr( ; CHECK: { -; CHECK-NEXT: .reg .b16 %rs<4>; +; CHECK-NEXT: .reg .b32 %r<7>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: -; CHECK-NEXT: ld.param.b8 %rs1, [st_param_v4_i8_irrr_param_2]; -; CHECK-NEXT: ld.param.b8 %rs2, [st_param_v4_i8_irrr_param_1]; -; CHECK-NEXT: ld.param.b8 %rs3, [st_param_v4_i8_irrr_param_0]; +; CHECK-NEXT: ld.param.b8 %r1, [st_param_v4_i8_irrr_param_2]; +; CHECK-NEXT: ld.param.b8 %r2, [st_param_v4_i8_irrr_param_1]; +; CHECK-NEXT: prmt.b32 %r3, %r2, %r1, 0x3340U; +; CHECK-NEXT: ld.param.b8 %r4, [st_param_v4_i8_irrr_param_0]; +; CHECK-NEXT: prmt.b32 %r5, 1, %r4, 0x3340U; +; CHECK-NEXT: prmt.b32 %r6, %r5, %r3, 0x5410U; ; CHECK-NEXT: { // callseq 24, 0 ; CHECK-NEXT: .param .align 4 .b8 param0[4]; -; CHECK-NEXT: st.param.v4.b8 [param0], {1, %rs3, %rs2, %rs1}; +; CHECK-NEXT: st.param.b32 [param0], %r6; ; CHECK-NEXT: call.uni call_v4_i8, (param0); ; CHECK-NEXT: } // callseq 24 ; CHECK-NEXT: ret; @@ -464,15 +467,18 @@ define void @st_param_v4_i8_irrr(i8 %b, i8 %c, i8 %d) { define void @st_param_v4_i8_rirr(i8 %a, i8 %c, i8 %d) { ; CHECK-LABEL: st_param_v4_i8_rirr( ; CHECK: { -; CHECK-NEXT: .reg .b16 %rs<4>; +; CHECK-NEXT: .reg .b32 %r<7>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: -; CHECK-NEXT: ld.param.b8 %rs1, [st_param_v4_i8_rirr_param_2]; -; CHECK-NEXT: ld.param.b8 %rs2, [st_param_v4_i8_rirr_param_1]; -; CHECK-NEXT: ld.param.b8 %rs3, [st_param_v4_i8_rirr_param_0]; +; CHECK-NEXT: ld.param.b8 %r1, [st_param_v4_i8_rirr_param_2]; +; CHECK-NEXT: ld.param.b8 %r2, [st_param_v4_i8_rirr_param_1]; +; CHECK-NEXT: prmt.b32 %r3, %r2, %r1, 0x3340U; +; CHECK-NEXT: ld.param.b8 %r4, [st_param_v4_i8_rirr_param_0]; +; CHECK-NEXT: prmt.b32 %r5, %r4, 2, 0x3340U; +; CHECK-NEXT: prmt.b32 %r6, %r5, %r3, 0x5410U; ; CHECK-NEXT: { // callseq 25, 0 ; CHECK-NEXT: .param .align 4 .b8 param0[4]; -; CHECK-NEXT: st.param.v4.b8 [param0], {%rs3, 2, %rs2, %rs1}; +; CHECK-NEXT: st.param.b32 [param0], %r6; ; CHECK-NEXT: call.uni call_v4_i8, (param0); ; CHECK-NEXT: } // callseq 25 ; CHECK-NEXT: ret; @@ -486,15 +492,18 @@ define void @st_param_v4_i8_rirr(i8 %a, i8 %c, i8 %d) { define void @st_param_v4_i8_rrir(i8 %a, i8 %b, i8 %d) { ; CHECK-LABEL: st_param_v4_i8_rrir( ; CHECK: { -; CHECK-NEXT: .reg .b16 %rs<4>; +; CHECK-NEXT: .reg .b32 %r<7>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: -; CHECK-NEXT: ld.param.b8 %rs1, [st_param_v4_i8_rrir_param_2]; -; CHECK-NEXT: ld.param.b8 %rs2, [st_param_v4_i8_rrir_param_1]; -; CHECK-NEXT: ld.param.b8 %rs3, [st_param_v4_i8_rrir_param_0]; +; CHECK-NEXT: ld.param.b8 %r1, [st_param_v4_i8_rrir_param_1]; +; CHECK-NEXT: ld.param.b8 %r2, [st_param_v4_i8_rrir_param_0]; +; CHECK-NEXT: prmt.b32 %r3, %r2, %r1, 0x3340U; +; CHECK-NEXT: ld.param.b8 %r4, [st_param_v4_i8_rrir_param_2]; +; CHECK-NEXT: prmt.b32 %r5, 3, %r4, 0x3340U; +; CHECK-NEXT: prmt.b32 %r6, %r3, %r5, 0x5410U; ; CHECK-NEXT: { // callseq 26, 0 ; CHECK-NEXT: .param .align 4 .b8 param0[4]; -; CHECK-NEXT: st.param.v4.b8 [param0], {%rs3, %rs2, 3, %rs1}; +; CHECK-NEXT: st.param.b32 [param0], %r6; ; CHECK-NEXT: call.uni call_v4_i8, (param0); ; CHECK-NEXT: } // callseq 26 ; CHECK-NEXT: ret; @@ -508,15 +517,18 @@ define void @st_param_v4_i8_rrir(i8 %a, i8 %b, i8 %d) { define void @st_param_v4_i8_rrri(i8 %a, i8 %b, i8 %c) { ; CHECK-LABEL: st_param_v4_i8_rrri( ; CHECK: { -; CHECK-NEXT: .reg .b16 %rs<4>; +; CHECK-NEXT: .reg .b32 %r<7>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: -; CHECK-NEXT: ld.param.b8 %rs1, [st_param_v4_i8_rrri_param_2]; -; CHECK-NEXT: ld.param.b8 %rs2, [st_param_v4_i8_rrri_param_1]; -; CHECK-NEXT: ld.param.b8 %rs3, [st_param_v4_i8_rrri_param_0]; +; CHECK-NEXT: ld.param.b8 %r1, [st_param_v4_i8_rrri_param_1]; +; CHECK-NEXT: ld.param.b8 %r2, [st_param_v4_i8_rrri_param_0]; +; CHECK-NEXT: prmt.b32 %r3, %r2, %r1, 0x3340U; +; CHECK-NEXT: ld.param.b8 %r4, [st_param_v4_i8_rrri_param_2]; +; CHECK-NEXT: prmt.b32 %r5, %r4, 4, 0x3340U; +; CHECK-NEXT: prmt.b32 %r6, %r3, %r5, 0x5410U; ; CHECK-NEXT: { // callseq 27, 0 ; CHECK-NEXT: .param .align 4 .b8 param0[4]; -; CHECK-NEXT: st.param.v4.b8 [param0], {%rs3, %rs2, %rs1, 4}; +; CHECK-NEXT: st.param.b32 [param0], %r6; ; CHECK-NEXT: call.uni call_v4_i8, (param0); ; CHECK-NEXT: } // callseq 27 ; CHECK-NEXT: ret; @@ -530,14 +542,16 @@ define void @st_param_v4_i8_rrri(i8 %a, i8 %b, i8 %c) { define void @st_param_v4_i8_iirr(i8 %c, i8 %d) { ; CHECK-LABEL: st_param_v4_i8_iirr( ; CHECK: { -; CHECK-NEXT: .reg .b16 %rs<3>; +; CHECK-NEXT: .reg .b32 %r<5>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: -; CHECK-NEXT: ld.param.b8 %rs1, [st_param_v4_i8_iirr_param_1]; -; CHECK-NEXT: ld.param.b8 %rs2, [st_param_v4_i8_iirr_param_0]; +; CHECK-NEXT: ld.param.b8 %r1, [st_param_v4_i8_iirr_param_1]; +; CHECK-NEXT: ld.param.b8 %r2, [st_param_v4_i8_iirr_param_0]; +; CHECK-NEXT: prmt.b32 %r3, %r2, %r1, 0x3340U; +; CHECK-NEXT: prmt.b32 %r4, 513, %r3, 0x5410U; ; CHECK-NEXT: { // callseq 28, 0 ; CHECK-NEXT: .param .align 4 .b8 param0[4]; -; CHECK-NEXT: st.param.v4.b8 [param0], {1, 2, %rs2, %rs1}; +; CHECK-NEXT: st.param.b32 [param0], %r4; ; CHECK-NEXT: call.uni call_v4_i8, (param0); ; CHECK-NEXT: } // callseq 28 ; CHECK-NEXT: ret; @@ -551,14 +565,17 @@ define void @st_param_v4_i8_iirr(i8 %c, i8 %d) { define void @st_param_v4_i8_irir(i8 %b, i8 %d) { ; CHECK-LABEL: st_param_v4_i8_irir( ; CHECK: { -; CHECK-NEXT: .reg .b16 %rs<3>; +; CHECK-NEXT: .reg .b32 %r<6>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: -; CHECK-NEXT: ld.param.b8 %rs1, [st_param_v4_i8_irir_param_1]; -; CHECK-NEXT: ld.param.b8 %rs2, [st_param_v4_i8_irir_param_0]; +; CHECK-NEXT: ld.param.b8 %r1, [st_param_v4_i8_irir_param_1]; +; CHECK-NEXT: prmt.b32 %r2, 3, %r1, 0x3340U; +; CHECK-NEXT: ld.param.b8 %r3, [st_param_v4_i8_irir_param_0]; +; CHECK-NEXT: prmt.b32 %r4, 1, %r3, 0x3340U; +; CHECK-NEXT: prmt.b32 %r5, %r4, %r2, 0x5410U; ; CHECK-NEXT: { // callseq 29, 0 ; CHECK-NEXT: .param .align 4 .b8 param0[4]; -; CHECK-NEXT: st.param.v4.b8 [param0], {1, %rs2, 3, %rs1}; +; CHECK-NEXT: st.param.b32 [param0], %r5; ; CHECK-NEXT: call.uni call_v4_i8, (param0); ; CHECK-NEXT: } // callseq 29 ; CHECK-NEXT: ret; @@ -572,14 +589,17 @@ define void @st_param_v4_i8_irir(i8 %b, i8 %d) { define void @st_param_v4_i8_irri(i8 %b, i8 %c) { ; CHECK-LABEL: st_param_v4_i8_irri( ; CHECK: { -; CHECK-NEXT: .reg .b16 %rs<3>; +; CHECK-NEXT: .reg .b32 %r<6>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: -; CHECK-NEXT: ld.param.b8 %rs1, [st_param_v4_i8_irri_param_1]; -; CHECK-NEXT: ld.param.b8 %rs2, [st_param_v4_i8_irri_param_0]; +; CHECK-NEXT: ld.param.b8 %r1, [st_param_v4_i8_irri_param_1]; +; CHECK-NEXT: prmt.b32 %r2, %r1, 4, 0x3340U; +; CHECK-NEXT: ld.param.b8 %r3, [st_param_v4_i8_irri_param_0]; +; CHECK-NEXT: prmt.b32 %r4, 1, %r3, 0x3340U; +; CHECK-NEXT: prmt.b32 %r5, %r4, %r2, 0x5410U; ; CHECK-NEXT: { // callseq 30, 0 ; CHECK-NEXT: .param .align 4 .b8 param0[4]; -; CHECK-NEXT: st.param.v4.b8 [param0], {1, %rs2, %rs1, 4}; +; CHECK-NEXT: st.param.b32 [param0], %r5; ; CHECK-NEXT: call.uni call_v4_i8, (param0); ; CHECK-NEXT: } // callseq 30 ; CHECK-NEXT: ret; @@ -593,14 +613,17 @@ define void @st_param_v4_i8_irri(i8 %b, i8 %c) { define void @st_param_v4_i8_riir(i8 %a, i8 %d) { ; CHECK-LABEL: st_param_v4_i8_riir( ; CHECK: { -; CHECK-NEXT: .reg .b16 %rs<3>; +; CHECK-NEXT: .reg .b32 %r<6>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: -; CHECK-NEXT: ld.param.b8 %rs1, [st_param_v4_i8_riir_param_1]; -; CHECK-NEXT: ld.param.b8 %rs2, [st_param_v4_i8_riir_param_0]; +; CHECK-NEXT: ld.param.b8 %r1, [st_param_v4_i8_riir_param_1]; +; CHECK-NEXT: prmt.b32 %r2, 3, %r1, 0x3340U; +; CHECK-NEXT: ld.param.b8 %r3, [st_param_v4_i8_riir_param_0]; +; CHECK-NEXT: prmt.b32 %r4, %r3, 2, 0x3340U; +; CHECK-NEXT: prmt.b32 %r5, %r4, %r2, 0x5410U; ; CHECK-NEXT: { // callseq 31, 0 ; CHECK-NEXT: .param .align 4 .b8 param0[4]; -; CHECK-NEXT: st.param.v4.b8 [param0], {%rs2, 2, 3, %rs1}; +; CHECK-NEXT: st.param.b32 [param0], %r5; ; CHECK-NEXT: call.uni call_v4_i8, (param0); ; CHECK-NEXT: } // callseq 31 ; CHECK-NEXT: ret; @@ -614,14 +637,17 @@ define void @st_param_v4_i8_riir(i8 %a, i8 %d) { define void @st_param_v4_i8_riri(i8 %a, i8 %c) { ; CHECK-LABEL: st_param_v4_i8_riri( ; CHECK: { -; CHECK-NEXT: .reg .b16 %rs<3>; +; CHECK-NEXT: .reg .b32 %r<6>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: -; CHECK-NEXT: ld.param.b8 %rs1, [st_param_v4_i8_riri_param_1]; -; CHECK-NEXT: ld.param.b8 %rs2, [st_param_v4_i8_riri_param_0]; +; CHECK-NEXT: ld.param.b8 %r1, [st_param_v4_i8_riri_param_1]; +; CHECK-NEXT: prmt.b32 %r2, %r1, 4, 0x3340U; +; CHECK-NEXT: ld.param.b8 %r3, [st_param_v4_i8_riri_param_0]; +; CHECK-NEXT: prmt.b32 %r4, %r3, 2, 0x3340U; +; CHECK-NEXT: prmt.b32 %r5, %r4, %r2, 0x5410U; ; CHECK-NEXT: { // callseq 32, 0 ; CHECK-NEXT: .param .align 4 .b8 param0[4]; -; CHECK-NEXT: st.param.v4.b8 [param0], {%rs2, 2, %rs1, 4}; +; CHECK-NEXT: st.param.b32 [param0], %r5; ; CHECK-NEXT: call.uni call_v4_i8, (param0); ; CHECK-NEXT: } // callseq 32 ; CHECK-NEXT: ret; @@ -635,14 +661,16 @@ define void @st_param_v4_i8_riri(i8 %a, i8 %c) { define void @st_param_v4_i8_rrii(i8 %a, i8 %b) { ; CHECK-LABEL: st_param_v4_i8_rrii( ; CHECK: { -; CHECK-NEXT: .reg .b16 %rs<3>; +; CHECK-NEXT: .reg .b32 %r<5>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: -; CHECK-NEXT: ld.param.b8 %rs1, [st_param_v4_i8_rrii_param_1]; -; CHECK-NEXT: ld.param.b8 %rs2, [st_param_v4_i8_rrii_param_0]; +; CHECK-NEXT: ld.param.b8 %r1, [st_param_v4_i8_rrii_param_1]; +; CHECK-NEXT: ld.param.b8 %r2, [st_param_v4_i8_rrii_param_0]; +; CHECK-NEXT: prmt.b32 %r3, %r2, %r1, 0x3340U; +; CHECK-NEXT: prmt.b32 %r4, %r3, 1027, 0x5410U; ; CHECK-NEXT: { // callseq 33, 0 ; CHECK-NEXT: .param .align 4 .b8 param0[4]; -; CHECK-NEXT: st.param.v4.b8 [param0], {%rs2, %rs1, 3, 4}; +; CHECK-NEXT: st.param.b32 [param0], %r4; ; CHECK-NEXT: call.uni call_v4_i8, (param0); ; CHECK-NEXT: } // callseq 33 ; CHECK-NEXT: ret; @@ -656,13 +684,15 @@ define void @st_param_v4_i8_rrii(i8 %a, i8 %b) { define void @st_param_v4_i8_iiir(i8 %d) { ; CHECK-LABEL: st_param_v4_i8_iiir( ; CHECK: { -; CHECK-NEXT: .reg .b16 %rs<2>; +; CHECK-NEXT: .reg .b32 %r<4>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: -; CHECK-NEXT: ld.param.b8 %rs1, [st_param_v4_i8_iiir_param_0]; ; CHECK-NEXT: { // callseq 34, 0 ; CHECK-NEXT: .param .align 4 .b8 param0[4]; -; CHECK-NEXT: st.param.v4.b8 [param0], {1, 2, 3, %rs1}; +; CHECK-NEXT: ld.param.b8 %r1, [st_param_v4_i8_iiir_param_0]; +; CHECK-NEXT: prmt.b32 %r2, 3, %r1, 0x3340U; +; CHECK-NEXT: prmt.b32 %r3, 513, %r2, 0x5410U; +; CHECK-NEXT: st.param.b32 [param0], %r3; ; CHECK-NEXT: call.uni call_v4_i8, (param0); ; CHECK-NEXT: } // callseq 34 ; CHECK-NEXT: ret; @@ -676,13 +706,15 @@ define void @st_param_v4_i8_iiir(i8 %d) { define void @st_param_v4_i8_iiri(i8 %c) { ; CHECK-LABEL: st_param_v4_i8_iiri( ; CHECK: { -; CHECK-NEXT: .reg .b16 %rs<2>; +; CHECK-NEXT: .reg .b32 %r<4>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: -; CHECK-NEXT: ld.param.b8 %rs1, [st_param_v4_i8_iiri_param_0]; ; CHECK-NEXT: { // callseq 35, 0 ; CHECK-NEXT: .param .align 4 .b8 param0[4]; -; CHECK-NEXT: st.param.v4.b8 [param0], {1, 2, %rs1, 4}; +; CHECK-NEXT: ld.param.b8 %r1, [st_param_v4_i8_iiri_param_0]; +; CHECK-NEXT: prmt.b32 %r2, %r1, 4, 0x3340U; +; CHECK-NEXT: prmt.b32 %r3, 513, %r2, 0x5410U; +; CHECK-NEXT: st.param.b32 [param0], %r3; ; CHECK-NEXT: call.uni call_v4_i8, (param0); ; CHECK-NEXT: } // callseq 35 ; CHECK-NEXT: ret; @@ -696,13 +728,15 @@ define void @st_param_v4_i8_iiri(i8 %c) { define void @st_param_v4_i8_irii(i8 %b) { ; CHECK-LABEL: st_param_v4_i8_irii( ; CHECK: { -; CHECK-NEXT: .reg .b16 %rs<2>; +; CHECK-NEXT: .reg .b32 %r<4>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: -; CHECK-NEXT: ld.param.b8 %rs1, [st_param_v4_i8_irii_param_0]; ; CHECK-NEXT: { // callseq 36, 0 ; CHECK-NEXT: .param .align 4 .b8 param0[4]; -; CHECK-NEXT: st.param.v4.b8 [param0], {1, %rs1, 3, 4}; +; CHECK-NEXT: ld.param.b8 %r1, [st_param_v4_i8_irii_param_0]; +; CHECK-NEXT: prmt.b32 %r2, 1, %r1, 0x3340U; +; CHECK-NEXT: prmt.b32 %r3, %r2, 1027, 0x5410U; +; CHECK-NEXT: st.param.b32 [param0], %r3; ; CHECK-NEXT: call.uni call_v4_i8, (param0); ; CHECK-NEXT: } // callseq 36 ; CHECK-NEXT: ret; @@ -716,13 +750,15 @@ define void @st_param_v4_i8_irii(i8 %b) { define void @st_param_v4_i8_riii(i8 %a) { ; CHECK-LABEL: st_param_v4_i8_riii( ; CHECK: { -; CHECK-NEXT: .reg .b16 %rs<2>; +; CHECK-NEXT: .reg .b32 %r<4>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: -; CHECK-NEXT: ld.param.b8 %rs1, [st_param_v4_i8_riii_param_0]; ; CHECK-NEXT: { // callseq 37, 0 ; CHECK-NEXT: .param .align 4 .b8 param0[4]; -; CHECK-NEXT: st.param.v4.b8 [param0], {%rs1, 2, 3, 4}; +; CHECK-NEXT: ld.param.b8 %r1, [st_param_v4_i8_riii_param_0]; +; CHECK-NEXT: prmt.b32 %r2, %r1, 2, 0x3340U; +; CHECK-NEXT: prmt.b32 %r3, %r2, 1027, 0x5410U; +; CHECK-NEXT: st.param.b32 [param0], %r3; ; CHECK-NEXT: call.uni call_v4_i8, (param0); ; CHECK-NEXT: } // callseq 37 ; CHECK-NEXT: ret; @@ -742,7 +778,7 @@ define void @st_param_v4_i16_iiii() { ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: { // callseq 38, 0 ; CHECK-NEXT: .param .align 8 .b8 param0[8]; -; CHECK-NEXT: st.param.v4.b16 [param0], {1, 2, 3, 4}; +; CHECK-NEXT: st.param.v2.b32 [param0], {131073, 262147}; ; CHECK-NEXT: call.uni call_v4_i16, (param0); ; CHECK-NEXT: } // callseq 38 ; CHECK-NEXT: ret; @@ -841,13 +877,15 @@ define void @st_param_v4_i16_iirr(i16 %c, i16 %d) { ; CHECK-LABEL: st_param_v4_i16_iirr( ; CHECK: { ; CHECK-NEXT: .reg .b16 %rs<3>; +; CHECK-NEXT: .reg .b32 %r<2>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ld.param.b16 %rs1, [st_param_v4_i16_iirr_param_0]; ; CHECK-NEXT: ld.param.b16 %rs2, [st_param_v4_i16_iirr_param_1]; +; CHECK-NEXT: mov.b32 %r1, {%rs1, %rs2}; ; CHECK-NEXT: { // callseq 43, 0 ; CHECK-NEXT: .param .align 8 .b8 param0[8]; -; CHECK-NEXT: st.param.v4.b16 [param0], {1, 2, %rs1, %rs2}; +; CHECK-NEXT: st.param.v2.b32 [param0], {131073, %r1}; ; CHECK-NEXT: call.uni call_v4_i16, (param0); ; CHECK-NEXT: } // callseq 43 ; CHECK-NEXT: ret; @@ -946,13 +984,15 @@ define void @st_param_v4_i16_rrii(i16 %a, i16 %b) { ; CHECK-LABEL: st_param_v4_i16_rrii( ; CHECK: { ; CHECK-NEXT: .reg .b16 %rs<3>; +; CHECK-NEXT: .reg .b32 %r<2>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ld.param.b16 %rs1, [st_param_v4_i16_rrii_param_0]; ; CHECK-NEXT: ld.param.b16 %rs2, [st_param_v4_i16_rrii_param_1]; +; CHECK-NEXT: mov.b32 %r1, {%rs1, %rs2}; ; CHECK-NEXT: { // callseq 48, 0 ; CHECK-NEXT: .param .align 8 .b8 param0[8]; -; CHECK-NEXT: st.param.v4.b16 [param0], {%rs1, %rs2, 3, 4}; +; CHECK-NEXT: st.param.v2.b32 [param0], {%r1, 262147}; ; CHECK-NEXT: call.uni call_v4_i16, (param0); ; CHECK-NEXT: } // callseq 48 ; CHECK-NEXT: ret; @@ -966,13 +1006,16 @@ define void @st_param_v4_i16_rrii(i16 %a, i16 %b) { define void @st_param_v4_i16_iiir(i16 %d) { ; CHECK-LABEL: st_param_v4_i16_iiir( ; CHECK: { -; CHECK-NEXT: .reg .b16 %rs<2>; +; CHECK-NEXT: .reg .b16 %rs<3>; +; CHECK-NEXT: .reg .b32 %r<2>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ld.param.b16 %rs1, [st_param_v4_i16_iiir_param_0]; +; CHECK-NEXT: mov.b16 %rs2, 3; +; CHECK-NEXT: mov.b32 %r1, {%rs2, %rs1}; ; CHECK-NEXT: { // callseq 49, 0 ; CHECK-NEXT: .param .align 8 .b8 param0[8]; -; CHECK-NEXT: st.param.v4.b16 [param0], {1, 2, 3, %rs1}; +; CHECK-NEXT: st.param.v2.b32 [param0], {131073, %r1}; ; CHECK-NEXT: call.uni call_v4_i16, (param0); ; CHECK-NEXT: } // callseq 49 ; CHECK-NEXT: ret; @@ -986,13 +1029,16 @@ define void @st_param_v4_i16_iiir(i16 %d) { define void @st_param_v4_i16_iiri(i16 %c) { ; CHECK-LABEL: st_param_v4_i16_iiri( ; CHECK: { -; CHECK-NEXT: .reg .b16 %rs<2>; +; CHECK-NEXT: .reg .b16 %rs<3>; +; CHECK-NEXT: .reg .b32 %r<2>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ld.param.b16 %rs1, [st_param_v4_i16_iiri_param_0]; +; CHECK-NEXT: mov.b16 %rs2, 4; +; CHECK-NEXT: mov.b32 %r1, {%rs1, %rs2}; ; CHECK-NEXT: { // callseq 50, 0 ; CHECK-NEXT: .param .align 8 .b8 param0[8]; -; CHECK-NEXT: st.param.v4.b16 [param0], {1, 2, %rs1, 4}; +; CHECK-NEXT: st.param.v2.b32 [param0], {131073, %r1}; ; CHECK-NEXT: call.uni call_v4_i16, (param0); ; CHECK-NEXT: } // callseq 50 ; CHECK-NEXT: ret; @@ -1006,13 +1052,16 @@ define void @st_param_v4_i16_iiri(i16 %c) { define void @st_param_v4_i16_irii(i16 %b) { ; CHECK-LABEL: st_param_v4_i16_irii( ; CHECK: { -; CHECK-NEXT: .reg .b16 %rs<2>; +; CHECK-NEXT: .reg .b16 %rs<3>; +; CHECK-NEXT: .reg .b32 %r<2>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ld.param.b16 %rs1, [st_param_v4_i16_irii_param_0]; +; CHECK-NEXT: mov.b16 %rs2, 1; +; CHECK-NEXT: mov.b32 %r1, {%rs2, %rs1}; ; CHECK-NEXT: { // callseq 51, 0 ; CHECK-NEXT: .param .align 8 .b8 param0[8]; -; CHECK-NEXT: st.param.v4.b16 [param0], {1, %rs1, 3, 4}; +; CHECK-NEXT: st.param.v2.b32 [param0], {%r1, 262147}; ; CHECK-NEXT: call.uni call_v4_i16, (param0); ; CHECK-NEXT: } // callseq 51 ; CHECK-NEXT: ret; @@ -1026,13 +1075,16 @@ define void @st_param_v4_i16_irii(i16 %b) { define void @st_param_v4_i16_riii(i16 %a) { ; CHECK-LABEL: st_param_v4_i16_riii( ; CHECK: { -; CHECK-NEXT: .reg .b16 %rs<2>; +; CHECK-NEXT: .reg .b16 %rs<3>; +; CHECK-NEXT: .reg .b32 %r<2>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: ; CHECK-NEXT: ld.param.b16 %rs1, [st_param_v4_i16_riii_param_0]; +; CHECK-NEXT: mov.b16 %rs2, 2; +; CHECK-NEXT: mov.b32 %r1, {%rs1, %rs2}; ; CHECK-NEXT: { // callseq 52, 0 ; CHECK-NEXT: .param .align 8 .b8 param0[8]; -; CHECK-NEXT: st.param.v4.b16 [param0], {%rs1, 2, 3, 4}; +; CHECK-NEXT: st.param.v2.b32 [param0], {%r1, 262147}; ; CHECK-NEXT: call.uni call_v4_i16, (param0); ; CHECK-NEXT: } // callseq 52 ; CHECK-NEXT: ret; @@ -1672,13 +1724,12 @@ declare void @call_v4_f32(%struct.float4 alignstack(16)) define void @st_param_bfloat() { ; CHECK-LABEL: st_param_bfloat( ; CHECK: { -; CHECK-NEXT: .reg .b16 %rs<2>; +; CHECK-EMPTY: ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: -; CHECK-NEXT: mov.b16 %rs1, 0x4100; ; CHECK-NEXT: { // callseq 83, 0 ; CHECK-NEXT: .param .align 2 .b8 param0[2]; -; CHECK-NEXT: st.param.b16 [param0], %rs1; +; CHECK-NEXT: st.param.b16 [param0], 0x4100; ; CHECK-NEXT: call.uni call_bfloat, (param0); ; CHECK-NEXT: } // callseq 83 ; CHECK-NEXT: ret; diff --git a/llvm/test/CodeGen/NVPTX/store-undef.ll b/llvm/test/CodeGen/NVPTX/store-undef.ll index 5b31b5e..c8ca6b6 100644 --- a/llvm/test/CodeGen/NVPTX/store-undef.ll +++ b/llvm/test/CodeGen/NVPTX/store-undef.ll @@ -34,9 +34,9 @@ define void @test_store_param_def(i64 %param0, i32 %param1) { ; CHECK-NEXT: ld.param.b32 %r1, [test_store_param_def_param_1]; ; CHECK-NEXT: { // callseq 1, 0 ; CHECK-NEXT: .param .align 16 .b8 param0[32]; +; CHECK-NEXT: st.param.v4.b32 [param0+16], {%r2, %r1, %r3, %r4}; +; CHECK-NEXT: st.param.v2.b32 [param0+8], {%r5, %r1}; ; CHECK-NEXT: st.param.b64 [param0], %rd1; -; CHECK-NEXT: st.param.v2.b32 [param0+8], {%r2, %r1}; -; CHECK-NEXT: st.param.v4.b32 [param0+16], {%r3, %r1, %r4, %r5}; ; CHECK-NEXT: call.uni test_call, (param0); ; CHECK-NEXT: } // callseq 1 ; CHECK-NEXT: ret; diff --git a/llvm/test/CodeGen/NVPTX/tex-read-cuda.ll b/llvm/test/CodeGen/NVPTX/tex-read-cuda.ll index d6961a9..3138d7c 100644 --- a/llvm/test/CodeGen/NVPTX/tex-read-cuda.ll +++ b/llvm/test/CodeGen/NVPTX/tex-read-cuda.ll @@ -69,8 +69,8 @@ define ptx_kernel void @baz(ptr %red, i32 %idx) { ; CHECK-NEXT: tex.1d.v4.f32.s32 {%r2, %r3, %r4, %r5}, [tex0, {%r1}]; ; CHECK-NEXT: { // callseq 0, 0 ; CHECK-NEXT: .param .b64 param0; -; CHECK-NEXT: st.param.b64 [param0], %rd3; ; CHECK-NEXT: .param .b32 retval0; +; CHECK-NEXT: st.param.b64 [param0], %rd3; ; CHECK-NEXT: call.uni (retval0), texfunc, (param0); ; CHECK-NEXT: ld.param.b32 %r6, [retval0]; ; CHECK-NEXT: } // callseq 0 diff --git a/llvm/test/CodeGen/NVPTX/unaligned-param-load-store.ll b/llvm/test/CodeGen/NVPTX/unaligned-param-load-store.ll index 87e46b1..697eb90 100644 --- a/llvm/test/CodeGen/NVPTX/unaligned-param-load-store.ll +++ b/llvm/test/CodeGen/NVPTX/unaligned-param-load-store.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 ; Verifies correctness of load/store of parameters and return values. -; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_35 -O0 -verify-machineinstrs | FileCheck -allow-deprecated-dag-overlap %s -; RUN: %if ptxas %{ llc < %s -mtriple=nvptx64 -mcpu=sm_35 -O0 -verify-machineinstrs | %ptxas-verify %} +; RUN: llc < %s -mtriple=nvptx64 -mcpu=sm_35 -verify-machineinstrs | FileCheck -allow-deprecated-dag-overlap %s +; RUN: %if ptxas %{ llc < %s -mtriple=nvptx64 -mcpu=sm_35 -verify-machineinstrs | %ptxas-verify %} %s_i8i16p = type { <{ i16, i8, i16 }>, i64 } %s_i8i32p = type { <{ i32, i8, i32 }>, i64 } @@ -24,37 +24,35 @@ define %s_i8i16p @test_s_i8i16p(%s_i8i16p %a) { ; CHECK-LABEL: test_s_i8i16p( ; CHECK: { -; CHECK-NEXT: .reg .b16 %rs<15>; +; CHECK-NEXT: .reg .b16 %rs<13>; +; CHECK-NEXT: .reg .b32 %r<2>; ; CHECK-NEXT: .reg .b64 %rd<4>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: -; CHECK-NEXT: ld.param.b8 %rs4, [test_s_i8i16p_param_0+4]; -; CHECK-NEXT: shl.b16 %rs5, %rs4, 8; -; CHECK-NEXT: ld.param.b8 %rs6, [test_s_i8i16p_param_0+3]; -; CHECK-NEXT: or.b16 %rs3, %rs5, %rs6; +; CHECK-NEXT: ld.param.b32 %r1, [test_s_i8i16p_param_0]; ; CHECK-NEXT: ld.param.b64 %rd1, [test_s_i8i16p_param_0+8]; -; CHECK-NEXT: ld.param.b8 %rs2, [test_s_i8i16p_param_0+2]; -; CHECK-NEXT: ld.param.b16 %rs1, [test_s_i8i16p_param_0]; +; CHECK-NEXT: ld.param.b8 %rs1, [test_s_i8i16p_param_0+4]; ; CHECK-NEXT: { // callseq 0, 0 ; CHECK-NEXT: .param .align 8 .b8 param0[16]; -; CHECK-NEXT: st.param.b16 [param0], %rs1; -; CHECK-NEXT: st.param.b8 [param0+2], %rs2; -; CHECK-NEXT: st.param.b8 [param0+3], %rs3; -; CHECK-NEXT: st.param.b8 [param0+4], %rs4; -; CHECK-NEXT: st.param.b64 [param0+8], %rd1; ; CHECK-NEXT: .param .align 8 .b8 retval0[16]; +; CHECK-NEXT: st.param.b8 [param0+4], %rs1; +; CHECK-NEXT: st.param.b64 [param0+8], %rd1; +; CHECK-NEXT: st.param.b32 [param0], %r1; ; CHECK-NEXT: call.uni (retval0), test_s_i8i16p, (param0); -; CHECK-NEXT: ld.param.b16 %rs7, [retval0]; -; CHECK-NEXT: ld.param.b8 %rs8, [retval0+2]; -; CHECK-NEXT: ld.param.b8 %rs9, [retval0+3]; -; CHECK-NEXT: ld.param.b8 %rs10, [retval0+4]; ; CHECK-NEXT: ld.param.b64 %rd2, [retval0+8]; +; CHECK-NEXT: ld.param.b8 %rs2, [retval0+2]; +; CHECK-NEXT: ld.param.b16 %rs3, [retval0]; +; CHECK-NEXT: ld.param.b8 %rs4, [retval0+4]; +; CHECK-NEXT: ld.param.b8 %rs5, [retval0+3]; ; CHECK-NEXT: } // callseq 0 -; CHECK-NEXT: st.param.b16 [func_retval0], %rs7; -; CHECK-NEXT: st.param.b8 [func_retval0+2], %rs8; -; CHECK-NEXT: st.param.b8 [func_retval0+4], %rs10; -; CHECK-NEXT: st.param.b8 [func_retval0+3], %rs9; +; CHECK-NEXT: shl.b16 %rs8, %rs4, 8; +; CHECK-NEXT: or.b16 %rs9, %rs8, %rs5; +; CHECK-NEXT: st.param.b8 [func_retval0+3], %rs5; ; CHECK-NEXT: st.param.b64 [func_retval0+8], %rd2; +; CHECK-NEXT: st.param.b8 [func_retval0+2], %rs2; +; CHECK-NEXT: st.param.b16 [func_retval0], %rs3; +; CHECK-NEXT: shr.u16 %rs12, %rs9, 8; +; CHECK-NEXT: st.param.b8 [func_retval0+4], %rs12; ; CHECK-NEXT: ret; %r = tail call %s_i8i16p @test_s_i8i16p(%s_i8i16p %a) ret %s_i8i16p %r @@ -64,56 +62,51 @@ define %s_i8i16p @test_s_i8i16p(%s_i8i16p %a) { define %s_i8i32p @test_s_i8i32p(%s_i8i32p %a) { ; CHECK-LABEL: test_s_i8i32p( ; CHECK: { -; CHECK-NEXT: .reg .b16 %rs<12>; -; CHECK-NEXT: .reg .b32 %r<20>; +; CHECK-NEXT: .reg .b16 %rs<4>; +; CHECK-NEXT: .reg .b32 %r<24>; ; CHECK-NEXT: .reg .b64 %rd<4>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: -; CHECK-NEXT: ld.param.b8 %r3, [test_s_i8i32p_param_0+6]; -; CHECK-NEXT: shl.b32 %r4, %r3, 8; -; CHECK-NEXT: ld.param.b8 %r5, [test_s_i8i32p_param_0+5]; -; CHECK-NEXT: or.b32 %r6, %r4, %r5; -; CHECK-NEXT: ld.param.b8 %r7, [test_s_i8i32p_param_0+7]; -; CHECK-NEXT: shl.b32 %r8, %r7, 16; -; CHECK-NEXT: ld.param.b8 %r9, [test_s_i8i32p_param_0+8]; -; CHECK-NEXT: shl.b32 %r10, %r9, 24; -; CHECK-NEXT: or.b32 %r11, %r10, %r8; -; CHECK-NEXT: or.b32 %r2, %r11, %r6; -; CHECK-NEXT: ld.param.b64 %rd1, [test_s_i8i32p_param_0+16]; -; CHECK-NEXT: ld.param.b8 %rs1, [test_s_i8i32p_param_0+4]; ; CHECK-NEXT: ld.param.b32 %r1, [test_s_i8i32p_param_0]; -; CHECK-NEXT: shr.u32 %r12, %r2, 8; -; CHECK-NEXT: shr.u32 %r13, %r11, 16; +; CHECK-NEXT: ld.param.b16 %rs1, [test_s_i8i32p_param_0+4]; +; CHECK-NEXT: ld.param.b64 %rd1, [test_s_i8i32p_param_0+16]; +; CHECK-NEXT: ld.param.b8 %r2, [test_s_i8i32p_param_0+6]; +; CHECK-NEXT: ld.param.b8 %r3, [test_s_i8i32p_param_0+7]; +; CHECK-NEXT: ld.param.b8 %r4, [test_s_i8i32p_param_0+8]; ; CHECK-NEXT: { // callseq 1, 0 ; CHECK-NEXT: .param .align 8 .b8 param0[24]; -; CHECK-NEXT: st.param.b32 [param0], %r1; -; CHECK-NEXT: st.param.b8 [param0+4], %rs1; -; CHECK-NEXT: st.param.b8 [param0+5], %r2; -; CHECK-NEXT: st.param.b8 [param0+6], %r12; -; CHECK-NEXT: st.param.b8 [param0+7], %r13; -; CHECK-NEXT: st.param.b8 [param0+8], %r9; -; CHECK-NEXT: st.param.b64 [param0+16], %rd1; ; CHECK-NEXT: .param .align 8 .b8 retval0[24]; +; CHECK-NEXT: st.param.b8 [param0+8], %r4; +; CHECK-NEXT: st.param.b8 [param0+7], %r3; +; CHECK-NEXT: st.param.b8 [param0+6], %r2; +; CHECK-NEXT: st.param.b64 [param0+16], %rd1; +; CHECK-NEXT: st.param.b16 [param0+4], %rs1; +; CHECK-NEXT: st.param.b32 [param0], %r1; ; CHECK-NEXT: call.uni (retval0), test_s_i8i32p, (param0); -; CHECK-NEXT: ld.param.b32 %r14, [retval0]; -; CHECK-NEXT: ld.param.b8 %rs2, [retval0+4]; -; CHECK-NEXT: ld.param.b8 %rs3, [retval0+5]; -; CHECK-NEXT: ld.param.b8 %rs4, [retval0+6]; -; CHECK-NEXT: ld.param.b8 %rs5, [retval0+7]; -; CHECK-NEXT: ld.param.b8 %rs6, [retval0+8]; ; CHECK-NEXT: ld.param.b64 %rd2, [retval0+16]; +; CHECK-NEXT: ld.param.b8 %rs2, [retval0+4]; +; CHECK-NEXT: ld.param.b32 %r5, [retval0]; +; CHECK-NEXT: ld.param.b8 %r6, [retval0+8]; +; CHECK-NEXT: ld.param.b8 %r7, [retval0+7]; +; CHECK-NEXT: ld.param.b8 %r8, [retval0+6]; +; CHECK-NEXT: ld.param.b8 %r9, [retval0+5]; ; CHECK-NEXT: } // callseq 1 -; CHECK-NEXT: cvt.u32.u16 %r15, %rs3; -; CHECK-NEXT: cvt.u32.u16 %r16, %rs4; -; CHECK-NEXT: cvt.u32.u16 %r17, %rs5; -; CHECK-NEXT: cvt.u32.u16 %r18, %rs6; -; CHECK-NEXT: st.param.b32 [func_retval0], %r14; -; CHECK-NEXT: st.param.b8 [func_retval0+4], %rs2; -; CHECK-NEXT: st.param.b8 [func_retval0+8], %r18; -; CHECK-NEXT: st.param.b8 [func_retval0+7], %r17; -; CHECK-NEXT: st.param.b8 [func_retval0+6], %r16; -; CHECK-NEXT: st.param.b8 [func_retval0+5], %r15; +; CHECK-NEXT: shl.b32 %r12, %r8, 8; +; CHECK-NEXT: or.b32 %r13, %r12, %r9; +; CHECK-NEXT: shl.b32 %r15, %r7, 16; +; CHECK-NEXT: shl.b32 %r17, %r6, 24; +; CHECK-NEXT: or.b32 %r18, %r17, %r15; +; CHECK-NEXT: or.b32 %r19, %r18, %r13; +; CHECK-NEXT: st.param.b8 [func_retval0+5], %r9; ; CHECK-NEXT: st.param.b64 [func_retval0+16], %rd2; +; CHECK-NEXT: st.param.b8 [func_retval0+4], %rs2; +; CHECK-NEXT: st.param.b32 [func_retval0], %r5; +; CHECK-NEXT: shr.u32 %r21, %r19, 24; +; CHECK-NEXT: st.param.b8 [func_retval0+8], %r21; +; CHECK-NEXT: shr.u32 %r22, %r19, 16; +; CHECK-NEXT: st.param.b8 [func_retval0+7], %r22; +; CHECK-NEXT: shr.u32 %r23, %r19, 8; +; CHECK-NEXT: st.param.b8 [func_retval0+6], %r23; ; CHECK-NEXT: ret; %r = tail call %s_i8i32p @test_s_i8i32p(%s_i8i32p %a) ret %s_i8i32p %r @@ -123,112 +116,66 @@ define %s_i8i32p @test_s_i8i32p(%s_i8i32p %a) { define %s_i8i64p @test_s_i8i64p(%s_i8i64p %a) { ; CHECK-LABEL: test_s_i8i64p( ; CHECK: { -; CHECK-NEXT: .reg .b16 %rs<20>; -; CHECK-NEXT: .reg .b64 %rd<68>; +; CHECK-NEXT: .reg .b16 %rs<3>; +; CHECK-NEXT: .reg .b64 %rd<46>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: -; CHECK-NEXT: ld.param.b8 %rd4, [test_s_i8i64p_param_0+10]; -; CHECK-NEXT: shl.b64 %rd5, %rd4, 8; -; CHECK-NEXT: ld.param.b8 %rd6, [test_s_i8i64p_param_0+9]; -; CHECK-NEXT: or.b64 %rd7, %rd5, %rd6; -; CHECK-NEXT: ld.param.b8 %rd8, [test_s_i8i64p_param_0+11]; -; CHECK-NEXT: shl.b64 %rd9, %rd8, 16; -; CHECK-NEXT: ld.param.b8 %rd10, [test_s_i8i64p_param_0+12]; -; CHECK-NEXT: shl.b64 %rd11, %rd10, 24; -; CHECK-NEXT: or.b64 %rd12, %rd11, %rd9; -; CHECK-NEXT: or.b64 %rd13, %rd12, %rd7; -; CHECK-NEXT: ld.param.b8 %rd14, [test_s_i8i64p_param_0+14]; -; CHECK-NEXT: shl.b64 %rd15, %rd14, 8; -; CHECK-NEXT: ld.param.b8 %rd16, [test_s_i8i64p_param_0+13]; -; CHECK-NEXT: or.b64 %rd17, %rd15, %rd16; -; CHECK-NEXT: ld.param.b8 %rd18, [test_s_i8i64p_param_0+15]; -; CHECK-NEXT: shl.b64 %rd19, %rd18, 16; -; CHECK-NEXT: ld.param.b8 %rd20, [test_s_i8i64p_param_0+16]; -; CHECK-NEXT: shl.b64 %rd21, %rd20, 24; -; CHECK-NEXT: or.b64 %rd22, %rd21, %rd19; -; CHECK-NEXT: or.b64 %rd23, %rd22, %rd17; -; CHECK-NEXT: shl.b64 %rd24, %rd23, 32; -; CHECK-NEXT: or.b64 %rd2, %rd24, %rd13; -; CHECK-NEXT: ld.param.b64 %rd3, [test_s_i8i64p_param_0+24]; -; CHECK-NEXT: ld.param.b8 %rs1, [test_s_i8i64p_param_0+8]; ; CHECK-NEXT: ld.param.b64 %rd1, [test_s_i8i64p_param_0]; -; CHECK-NEXT: shr.u64 %rd25, %rd2, 8; -; CHECK-NEXT: shr.u64 %rd26, %rd2, 16; -; CHECK-NEXT: shr.u64 %rd27, %rd2, 24; -; CHECK-NEXT: bfe.u64 %rd28, %rd23, 8, 24; -; CHECK-NEXT: bfe.u64 %rd29, %rd23, 16, 16; -; CHECK-NEXT: bfe.u64 %rd30, %rd23, 24, 8; +; CHECK-NEXT: ld.param.b64 %rd2, [test_s_i8i64p_param_0+8]; +; CHECK-NEXT: ld.param.b64 %rd3, [test_s_i8i64p_param_0+24]; +; CHECK-NEXT: ld.param.b8 %rd4, [test_s_i8i64p_param_0+16]; ; CHECK-NEXT: { // callseq 2, 0 ; CHECK-NEXT: .param .align 8 .b8 param0[32]; -; CHECK-NEXT: st.param.b64 [param0], %rd1; -; CHECK-NEXT: st.param.b8 [param0+8], %rs1; -; CHECK-NEXT: st.param.b8 [param0+9], %rd2; -; CHECK-NEXT: st.param.b8 [param0+10], %rd25; -; CHECK-NEXT: st.param.b8 [param0+11], %rd26; -; CHECK-NEXT: st.param.b8 [param0+12], %rd27; -; CHECK-NEXT: st.param.b8 [param0+13], %rd23; -; CHECK-NEXT: st.param.b8 [param0+14], %rd28; -; CHECK-NEXT: st.param.b8 [param0+15], %rd29; -; CHECK-NEXT: st.param.b8 [param0+16], %rd30; -; CHECK-NEXT: st.param.b64 [param0+24], %rd3; ; CHECK-NEXT: .param .align 8 .b8 retval0[32]; +; CHECK-NEXT: st.param.b8 [param0+16], %rd4; +; CHECK-NEXT: st.param.b64 [param0+24], %rd3; +; CHECK-NEXT: st.param.b64 [param0+8], %rd2; +; CHECK-NEXT: st.param.b64 [param0], %rd1; ; CHECK-NEXT: call.uni (retval0), test_s_i8i64p, (param0); -; CHECK-NEXT: ld.param.b64 %rd31, [retval0]; -; CHECK-NEXT: ld.param.b8 %rs2, [retval0+8]; -; CHECK-NEXT: ld.param.b8 %rs3, [retval0+9]; -; CHECK-NEXT: ld.param.b8 %rs4, [retval0+10]; -; CHECK-NEXT: ld.param.b8 %rs5, [retval0+11]; -; CHECK-NEXT: ld.param.b8 %rs6, [retval0+12]; -; CHECK-NEXT: ld.param.b8 %rs7, [retval0+13]; -; CHECK-NEXT: ld.param.b8 %rs8, [retval0+14]; -; CHECK-NEXT: ld.param.b8 %rs9, [retval0+15]; -; CHECK-NEXT: ld.param.b8 %rs10, [retval0+16]; -; CHECK-NEXT: ld.param.b64 %rd32, [retval0+24]; +; CHECK-NEXT: ld.param.b64 %rd5, [retval0+24]; +; CHECK-NEXT: ld.param.b8 %rs1, [retval0+8]; +; CHECK-NEXT: ld.param.b64 %rd6, [retval0]; +; CHECK-NEXT: ld.param.b8 %rd7, [retval0+16]; +; CHECK-NEXT: ld.param.b8 %rd8, [retval0+15]; +; CHECK-NEXT: ld.param.b8 %rd9, [retval0+14]; +; CHECK-NEXT: ld.param.b8 %rd10, [retval0+13]; +; CHECK-NEXT: ld.param.b8 %rd11, [retval0+12]; +; CHECK-NEXT: ld.param.b8 %rd12, [retval0+11]; +; CHECK-NEXT: ld.param.b8 %rd13, [retval0+10]; +; CHECK-NEXT: ld.param.b8 %rd14, [retval0+9]; ; CHECK-NEXT: } // callseq 2 -; CHECK-NEXT: cvt.u64.u16 %rd33, %rs3; -; CHECK-NEXT: and.b64 %rd34, %rd33, 255; -; CHECK-NEXT: cvt.u64.u16 %rd35, %rs4; -; CHECK-NEXT: and.b64 %rd36, %rd35, 255; -; CHECK-NEXT: shl.b64 %rd37, %rd36, 8; -; CHECK-NEXT: or.b64 %rd38, %rd34, %rd37; -; CHECK-NEXT: cvt.u64.u16 %rd39, %rs5; -; CHECK-NEXT: and.b64 %rd40, %rd39, 255; -; CHECK-NEXT: shl.b64 %rd41, %rd40, 16; -; CHECK-NEXT: or.b64 %rd42, %rd38, %rd41; -; CHECK-NEXT: cvt.u64.u16 %rd43, %rs6; -; CHECK-NEXT: and.b64 %rd44, %rd43, 255; -; CHECK-NEXT: shl.b64 %rd45, %rd44, 24; -; CHECK-NEXT: or.b64 %rd46, %rd42, %rd45; -; CHECK-NEXT: cvt.u64.u16 %rd47, %rs7; -; CHECK-NEXT: and.b64 %rd48, %rd47, 255; -; CHECK-NEXT: shl.b64 %rd49, %rd48, 32; -; CHECK-NEXT: or.b64 %rd50, %rd46, %rd49; -; CHECK-NEXT: cvt.u64.u16 %rd51, %rs8; -; CHECK-NEXT: and.b64 %rd52, %rd51, 255; -; CHECK-NEXT: shl.b64 %rd53, %rd52, 40; -; CHECK-NEXT: or.b64 %rd54, %rd50, %rd53; -; CHECK-NEXT: cvt.u64.u16 %rd55, %rs9; -; CHECK-NEXT: and.b64 %rd56, %rd55, 255; -; CHECK-NEXT: shl.b64 %rd57, %rd56, 48; -; CHECK-NEXT: or.b64 %rd58, %rd54, %rd57; -; CHECK-NEXT: cvt.u64.u16 %rd59, %rs10; -; CHECK-NEXT: shl.b64 %rd60, %rd59, 56; -; CHECK-NEXT: or.b64 %rd61, %rd58, %rd60; -; CHECK-NEXT: st.param.b64 [func_retval0], %rd31; -; CHECK-NEXT: st.param.b8 [func_retval0+8], %rs2; +; CHECK-NEXT: shl.b64 %rd17, %rd13, 8; +; CHECK-NEXT: or.b64 %rd18, %rd17, %rd14; +; CHECK-NEXT: shl.b64 %rd20, %rd12, 16; +; CHECK-NEXT: shl.b64 %rd22, %rd11, 24; +; CHECK-NEXT: or.b64 %rd23, %rd22, %rd20; +; CHECK-NEXT: or.b64 %rd24, %rd23, %rd18; +; CHECK-NEXT: shl.b64 %rd27, %rd9, 8; +; CHECK-NEXT: or.b64 %rd28, %rd27, %rd10; +; CHECK-NEXT: shl.b64 %rd30, %rd8, 16; +; CHECK-NEXT: shl.b64 %rd32, %rd7, 24; +; CHECK-NEXT: or.b64 %rd33, %rd32, %rd30; +; CHECK-NEXT: or.b64 %rd34, %rd33, %rd28; +; CHECK-NEXT: shl.b64 %rd35, %rd34, 32; +; CHECK-NEXT: or.b64 %rd36, %rd35, %rd24; +; CHECK-NEXT: st.param.b8 [func_retval0+9], %rd14; +; CHECK-NEXT: st.param.b64 [func_retval0+24], %rd5; +; CHECK-NEXT: st.param.b8 [func_retval0+8], %rs1; +; CHECK-NEXT: st.param.b64 [func_retval0], %rd6; +; CHECK-NEXT: shr.u64 %rd39, %rd36, 56; +; CHECK-NEXT: st.param.b8 [func_retval0+16], %rd39; +; CHECK-NEXT: shr.u64 %rd40, %rd36, 48; +; CHECK-NEXT: st.param.b8 [func_retval0+15], %rd40; +; CHECK-NEXT: shr.u64 %rd41, %rd36, 40; +; CHECK-NEXT: st.param.b8 [func_retval0+14], %rd41; +; CHECK-NEXT: shr.u64 %rd42, %rd36, 32; +; CHECK-NEXT: st.param.b8 [func_retval0+13], %rd42; +; CHECK-NEXT: shr.u64 %rd43, %rd36, 24; ; CHECK-NEXT: st.param.b8 [func_retval0+12], %rd43; -; CHECK-NEXT: st.param.b8 [func_retval0+11], %rd39; -; CHECK-NEXT: st.param.b8 [func_retval0+10], %rd35; -; CHECK-NEXT: st.param.b8 [func_retval0+9], %rd33; -; CHECK-NEXT: shr.u64 %rd64, %rd50, 32; -; CHECK-NEXT: st.param.b8 [func_retval0+13], %rd64; -; CHECK-NEXT: shr.u64 %rd65, %rd54, 40; -; CHECK-NEXT: st.param.b8 [func_retval0+14], %rd65; -; CHECK-NEXT: shr.u64 %rd66, %rd58, 48; -; CHECK-NEXT: st.param.b8 [func_retval0+15], %rd66; -; CHECK-NEXT: shr.u64 %rd67, %rd61, 56; -; CHECK-NEXT: st.param.b8 [func_retval0+16], %rd67; -; CHECK-NEXT: st.param.b64 [func_retval0+24], %rd32; +; CHECK-NEXT: shr.u64 %rd44, %rd36, 16; +; CHECK-NEXT: st.param.b8 [func_retval0+11], %rd44; +; CHECK-NEXT: shr.u64 %rd45, %rd36, 8; +; CHECK-NEXT: st.param.b8 [func_retval0+10], %rd45; ; CHECK-NEXT: ret; %r = tail call %s_i8i64p @test_s_i8i64p(%s_i8i64p %a) ret %s_i8i64p %r @@ -242,33 +189,32 @@ define %s_i8f16p @test_s_i8f16p(%s_i8f16p %a) { ; CHECK-NEXT: .reg .b64 %rd<4>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: -; CHECK-NEXT: ld.param.b8 %rs4, [test_s_i8f16p_param_0+4]; -; CHECK-NEXT: shl.b16 %rs5, %rs4, 8; -; CHECK-NEXT: ld.param.b8 %rs6, [test_s_i8f16p_param_0+3]; -; CHECK-NEXT: or.b16 %rs3, %rs5, %rs6; -; CHECK-NEXT: ld.param.b64 %rd1, [test_s_i8f16p_param_0+8]; -; CHECK-NEXT: ld.param.b8 %rs2, [test_s_i8f16p_param_0+2]; ; CHECK-NEXT: ld.param.b16 %rs1, [test_s_i8f16p_param_0]; +; CHECK-NEXT: ld.param.b16 %rs2, [test_s_i8f16p_param_0+2]; +; CHECK-NEXT: ld.param.b64 %rd1, [test_s_i8f16p_param_0+8]; +; CHECK-NEXT: ld.param.b8 %rs3, [test_s_i8f16p_param_0+4]; ; CHECK-NEXT: { // callseq 3, 0 ; CHECK-NEXT: .param .align 8 .b8 param0[16]; -; CHECK-NEXT: st.param.b16 [param0], %rs1; -; CHECK-NEXT: st.param.b8 [param0+2], %rs2; -; CHECK-NEXT: st.param.b8 [param0+3], %rs3; -; CHECK-NEXT: st.param.b8 [param0+4], %rs4; -; CHECK-NEXT: st.param.b64 [param0+8], %rd1; ; CHECK-NEXT: .param .align 8 .b8 retval0[16]; +; CHECK-NEXT: st.param.b8 [param0+4], %rs3; +; CHECK-NEXT: st.param.b64 [param0+8], %rd1; +; CHECK-NEXT: st.param.b16 [param0+2], %rs2; +; CHECK-NEXT: st.param.b16 [param0], %rs1; ; CHECK-NEXT: call.uni (retval0), test_s_i8f16p, (param0); -; CHECK-NEXT: ld.param.b16 %rs7, [retval0]; -; CHECK-NEXT: ld.param.b8 %rs8, [retval0+2]; -; CHECK-NEXT: ld.param.b8 %rs9, [retval0+3]; -; CHECK-NEXT: ld.param.b8 %rs10, [retval0+4]; ; CHECK-NEXT: ld.param.b64 %rd2, [retval0+8]; +; CHECK-NEXT: ld.param.b8 %rs4, [retval0+2]; +; CHECK-NEXT: ld.param.b16 %rs5, [retval0]; +; CHECK-NEXT: ld.param.b8 %rs6, [retval0+4]; +; CHECK-NEXT: ld.param.b8 %rs7, [retval0+3]; ; CHECK-NEXT: } // callseq 3 -; CHECK-NEXT: st.param.b16 [func_retval0], %rs7; -; CHECK-NEXT: st.param.b8 [func_retval0+2], %rs8; -; CHECK-NEXT: st.param.b8 [func_retval0+4], %rs10; -; CHECK-NEXT: st.param.b8 [func_retval0+3], %rs9; +; CHECK-NEXT: shl.b16 %rs10, %rs6, 8; +; CHECK-NEXT: or.b16 %rs11, %rs10, %rs7; +; CHECK-NEXT: st.param.b8 [func_retval0+3], %rs7; ; CHECK-NEXT: st.param.b64 [func_retval0+8], %rd2; +; CHECK-NEXT: st.param.b8 [func_retval0+2], %rs4; +; CHECK-NEXT: st.param.b16 [func_retval0], %rs5; +; CHECK-NEXT: shr.u16 %rs14, %rs11, 8; +; CHECK-NEXT: st.param.b8 [func_retval0+4], %rs14; ; CHECK-NEXT: ret; %r = tail call %s_i8f16p @test_s_i8f16p(%s_i8f16p %a) ret %s_i8f16p %r @@ -278,56 +224,51 @@ define %s_i8f16p @test_s_i8f16p(%s_i8f16p %a) { define %s_i8f16x2p @test_s_i8f16x2p(%s_i8f16x2p %a) { ; CHECK-LABEL: test_s_i8f16x2p( ; CHECK: { -; CHECK-NEXT: .reg .b16 %rs<12>; -; CHECK-NEXT: .reg .b32 %r<20>; +; CHECK-NEXT: .reg .b16 %rs<4>; +; CHECK-NEXT: .reg .b32 %r<24>; ; CHECK-NEXT: .reg .b64 %rd<4>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: -; CHECK-NEXT: ld.param.b8 %r3, [test_s_i8f16x2p_param_0+6]; -; CHECK-NEXT: shl.b32 %r4, %r3, 8; -; CHECK-NEXT: ld.param.b8 %r5, [test_s_i8f16x2p_param_0+5]; -; CHECK-NEXT: or.b32 %r6, %r4, %r5; -; CHECK-NEXT: ld.param.b8 %r7, [test_s_i8f16x2p_param_0+7]; -; CHECK-NEXT: shl.b32 %r8, %r7, 16; -; CHECK-NEXT: ld.param.b8 %r9, [test_s_i8f16x2p_param_0+8]; -; CHECK-NEXT: shl.b32 %r10, %r9, 24; -; CHECK-NEXT: or.b32 %r11, %r10, %r8; -; CHECK-NEXT: or.b32 %r2, %r11, %r6; -; CHECK-NEXT: ld.param.b64 %rd1, [test_s_i8f16x2p_param_0+16]; -; CHECK-NEXT: ld.param.b8 %rs1, [test_s_i8f16x2p_param_0+4]; ; CHECK-NEXT: ld.param.b32 %r1, [test_s_i8f16x2p_param_0]; -; CHECK-NEXT: shr.u32 %r12, %r2, 8; -; CHECK-NEXT: shr.u32 %r13, %r11, 16; +; CHECK-NEXT: ld.param.b16 %rs1, [test_s_i8f16x2p_param_0+4]; +; CHECK-NEXT: ld.param.b64 %rd1, [test_s_i8f16x2p_param_0+16]; +; CHECK-NEXT: ld.param.b8 %r2, [test_s_i8f16x2p_param_0+6]; +; CHECK-NEXT: ld.param.b8 %r3, [test_s_i8f16x2p_param_0+7]; +; CHECK-NEXT: ld.param.b8 %r4, [test_s_i8f16x2p_param_0+8]; ; CHECK-NEXT: { // callseq 4, 0 ; CHECK-NEXT: .param .align 8 .b8 param0[24]; -; CHECK-NEXT: st.param.b32 [param0], %r1; -; CHECK-NEXT: st.param.b8 [param0+4], %rs1; -; CHECK-NEXT: st.param.b8 [param0+5], %r2; -; CHECK-NEXT: st.param.b8 [param0+6], %r12; -; CHECK-NEXT: st.param.b8 [param0+7], %r13; -; CHECK-NEXT: st.param.b8 [param0+8], %r9; -; CHECK-NEXT: st.param.b64 [param0+16], %rd1; ; CHECK-NEXT: .param .align 8 .b8 retval0[24]; +; CHECK-NEXT: st.param.b8 [param0+8], %r4; +; CHECK-NEXT: st.param.b8 [param0+7], %r3; +; CHECK-NEXT: st.param.b8 [param0+6], %r2; +; CHECK-NEXT: st.param.b64 [param0+16], %rd1; +; CHECK-NEXT: st.param.b16 [param0+4], %rs1; +; CHECK-NEXT: st.param.b32 [param0], %r1; ; CHECK-NEXT: call.uni (retval0), test_s_i8f16x2p, (param0); -; CHECK-NEXT: ld.param.b32 %r14, [retval0]; -; CHECK-NEXT: ld.param.b8 %rs2, [retval0+4]; -; CHECK-NEXT: ld.param.b8 %rs3, [retval0+5]; -; CHECK-NEXT: ld.param.b8 %rs4, [retval0+6]; -; CHECK-NEXT: ld.param.b8 %rs5, [retval0+7]; -; CHECK-NEXT: ld.param.b8 %rs6, [retval0+8]; ; CHECK-NEXT: ld.param.b64 %rd2, [retval0+16]; +; CHECK-NEXT: ld.param.b8 %rs2, [retval0+4]; +; CHECK-NEXT: ld.param.b32 %r5, [retval0]; +; CHECK-NEXT: ld.param.b8 %r6, [retval0+8]; +; CHECK-NEXT: ld.param.b8 %r7, [retval0+7]; +; CHECK-NEXT: ld.param.b8 %r8, [retval0+6]; +; CHECK-NEXT: ld.param.b8 %r9, [retval0+5]; ; CHECK-NEXT: } // callseq 4 -; CHECK-NEXT: cvt.u32.u16 %r15, %rs3; -; CHECK-NEXT: cvt.u32.u16 %r16, %rs4; -; CHECK-NEXT: cvt.u32.u16 %r17, %rs5; -; CHECK-NEXT: cvt.u32.u16 %r18, %rs6; -; CHECK-NEXT: st.param.b32 [func_retval0], %r14; -; CHECK-NEXT: st.param.b8 [func_retval0+4], %rs2; -; CHECK-NEXT: st.param.b8 [func_retval0+8], %r18; -; CHECK-NEXT: st.param.b8 [func_retval0+7], %r17; -; CHECK-NEXT: st.param.b8 [func_retval0+6], %r16; -; CHECK-NEXT: st.param.b8 [func_retval0+5], %r15; +; CHECK-NEXT: shl.b32 %r12, %r8, 8; +; CHECK-NEXT: or.b32 %r13, %r12, %r9; +; CHECK-NEXT: shl.b32 %r15, %r7, 16; +; CHECK-NEXT: shl.b32 %r17, %r6, 24; +; CHECK-NEXT: or.b32 %r18, %r17, %r15; +; CHECK-NEXT: or.b32 %r19, %r18, %r13; +; CHECK-NEXT: st.param.b8 [func_retval0+5], %r9; ; CHECK-NEXT: st.param.b64 [func_retval0+16], %rd2; +; CHECK-NEXT: st.param.b8 [func_retval0+4], %rs2; +; CHECK-NEXT: st.param.b32 [func_retval0], %r5; +; CHECK-NEXT: shr.u32 %r21, %r19, 24; +; CHECK-NEXT: st.param.b8 [func_retval0+8], %r21; +; CHECK-NEXT: shr.u32 %r22, %r19, 16; +; CHECK-NEXT: st.param.b8 [func_retval0+7], %r22; +; CHECK-NEXT: shr.u32 %r23, %r19, 8; +; CHECK-NEXT: st.param.b8 [func_retval0+6], %r23; ; CHECK-NEXT: ret; %r = tail call %s_i8f16x2p @test_s_i8f16x2p(%s_i8f16x2p %a) ret %s_i8f16x2p %r @@ -337,56 +278,51 @@ define %s_i8f16x2p @test_s_i8f16x2p(%s_i8f16x2p %a) { define %s_i8f32p @test_s_i8f32p(%s_i8f32p %a) { ; CHECK-LABEL: test_s_i8f32p( ; CHECK: { -; CHECK-NEXT: .reg .b16 %rs<12>; -; CHECK-NEXT: .reg .b32 %r<20>; +; CHECK-NEXT: .reg .b16 %rs<4>; +; CHECK-NEXT: .reg .b32 %r<24>; ; CHECK-NEXT: .reg .b64 %rd<4>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: -; CHECK-NEXT: ld.param.b8 %r3, [test_s_i8f32p_param_0+6]; -; CHECK-NEXT: shl.b32 %r4, %r3, 8; -; CHECK-NEXT: ld.param.b8 %r5, [test_s_i8f32p_param_0+5]; -; CHECK-NEXT: or.b32 %r6, %r4, %r5; -; CHECK-NEXT: ld.param.b8 %r7, [test_s_i8f32p_param_0+7]; -; CHECK-NEXT: shl.b32 %r8, %r7, 16; -; CHECK-NEXT: ld.param.b8 %r9, [test_s_i8f32p_param_0+8]; -; CHECK-NEXT: shl.b32 %r10, %r9, 24; -; CHECK-NEXT: or.b32 %r11, %r10, %r8; -; CHECK-NEXT: or.b32 %r2, %r11, %r6; -; CHECK-NEXT: ld.param.b64 %rd1, [test_s_i8f32p_param_0+16]; -; CHECK-NEXT: ld.param.b8 %rs1, [test_s_i8f32p_param_0+4]; ; CHECK-NEXT: ld.param.b32 %r1, [test_s_i8f32p_param_0]; -; CHECK-NEXT: shr.u32 %r12, %r2, 8; -; CHECK-NEXT: shr.u32 %r13, %r11, 16; +; CHECK-NEXT: ld.param.b16 %rs1, [test_s_i8f32p_param_0+4]; +; CHECK-NEXT: ld.param.b64 %rd1, [test_s_i8f32p_param_0+16]; +; CHECK-NEXT: ld.param.b8 %r2, [test_s_i8f32p_param_0+6]; +; CHECK-NEXT: ld.param.b8 %r3, [test_s_i8f32p_param_0+7]; +; CHECK-NEXT: ld.param.b8 %r4, [test_s_i8f32p_param_0+8]; ; CHECK-NEXT: { // callseq 5, 0 ; CHECK-NEXT: .param .align 8 .b8 param0[24]; -; CHECK-NEXT: st.param.b32 [param0], %r1; -; CHECK-NEXT: st.param.b8 [param0+4], %rs1; -; CHECK-NEXT: st.param.b8 [param0+5], %r2; -; CHECK-NEXT: st.param.b8 [param0+6], %r12; -; CHECK-NEXT: st.param.b8 [param0+7], %r13; -; CHECK-NEXT: st.param.b8 [param0+8], %r9; -; CHECK-NEXT: st.param.b64 [param0+16], %rd1; ; CHECK-NEXT: .param .align 8 .b8 retval0[24]; +; CHECK-NEXT: st.param.b8 [param0+8], %r4; +; CHECK-NEXT: st.param.b8 [param0+7], %r3; +; CHECK-NEXT: st.param.b8 [param0+6], %r2; +; CHECK-NEXT: st.param.b64 [param0+16], %rd1; +; CHECK-NEXT: st.param.b16 [param0+4], %rs1; +; CHECK-NEXT: st.param.b32 [param0], %r1; ; CHECK-NEXT: call.uni (retval0), test_s_i8f32p, (param0); -; CHECK-NEXT: ld.param.b32 %r14, [retval0]; -; CHECK-NEXT: ld.param.b8 %rs2, [retval0+4]; -; CHECK-NEXT: ld.param.b8 %rs3, [retval0+5]; -; CHECK-NEXT: ld.param.b8 %rs4, [retval0+6]; -; CHECK-NEXT: ld.param.b8 %rs5, [retval0+7]; -; CHECK-NEXT: ld.param.b8 %rs6, [retval0+8]; ; CHECK-NEXT: ld.param.b64 %rd2, [retval0+16]; +; CHECK-NEXT: ld.param.b8 %rs2, [retval0+4]; +; CHECK-NEXT: ld.param.b32 %r5, [retval0]; +; CHECK-NEXT: ld.param.b8 %r6, [retval0+8]; +; CHECK-NEXT: ld.param.b8 %r7, [retval0+7]; +; CHECK-NEXT: ld.param.b8 %r8, [retval0+6]; +; CHECK-NEXT: ld.param.b8 %r9, [retval0+5]; ; CHECK-NEXT: } // callseq 5 -; CHECK-NEXT: cvt.u32.u16 %r15, %rs3; -; CHECK-NEXT: cvt.u32.u16 %r16, %rs4; -; CHECK-NEXT: cvt.u32.u16 %r17, %rs5; -; CHECK-NEXT: cvt.u32.u16 %r18, %rs6; -; CHECK-NEXT: st.param.b32 [func_retval0], %r14; -; CHECK-NEXT: st.param.b8 [func_retval0+4], %rs2; -; CHECK-NEXT: st.param.b8 [func_retval0+8], %r18; -; CHECK-NEXT: st.param.b8 [func_retval0+7], %r17; -; CHECK-NEXT: st.param.b8 [func_retval0+6], %r16; -; CHECK-NEXT: st.param.b8 [func_retval0+5], %r15; +; CHECK-NEXT: shl.b32 %r12, %r8, 8; +; CHECK-NEXT: or.b32 %r13, %r12, %r9; +; CHECK-NEXT: shl.b32 %r15, %r7, 16; +; CHECK-NEXT: shl.b32 %r17, %r6, 24; +; CHECK-NEXT: or.b32 %r18, %r17, %r15; +; CHECK-NEXT: or.b32 %r19, %r18, %r13; +; CHECK-NEXT: st.param.b8 [func_retval0+5], %r9; ; CHECK-NEXT: st.param.b64 [func_retval0+16], %rd2; +; CHECK-NEXT: st.param.b8 [func_retval0+4], %rs2; +; CHECK-NEXT: st.param.b32 [func_retval0], %r5; +; CHECK-NEXT: shr.u32 %r21, %r19, 24; +; CHECK-NEXT: st.param.b8 [func_retval0+8], %r21; +; CHECK-NEXT: shr.u32 %r22, %r19, 16; +; CHECK-NEXT: st.param.b8 [func_retval0+7], %r22; +; CHECK-NEXT: shr.u32 %r23, %r19, 8; +; CHECK-NEXT: st.param.b8 [func_retval0+6], %r23; ; CHECK-NEXT: ret; %r = tail call %s_i8f32p @test_s_i8f32p(%s_i8f32p %a) ret %s_i8f32p %r @@ -396,112 +332,66 @@ define %s_i8f32p @test_s_i8f32p(%s_i8f32p %a) { define %s_i8f64p @test_s_i8f64p(%s_i8f64p %a) { ; CHECK-LABEL: test_s_i8f64p( ; CHECK: { -; CHECK-NEXT: .reg .b16 %rs<20>; -; CHECK-NEXT: .reg .b64 %rd<68>; +; CHECK-NEXT: .reg .b16 %rs<3>; +; CHECK-NEXT: .reg .b64 %rd<46>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: -; CHECK-NEXT: ld.param.b8 %rd4, [test_s_i8f64p_param_0+10]; -; CHECK-NEXT: shl.b64 %rd5, %rd4, 8; -; CHECK-NEXT: ld.param.b8 %rd6, [test_s_i8f64p_param_0+9]; -; CHECK-NEXT: or.b64 %rd7, %rd5, %rd6; -; CHECK-NEXT: ld.param.b8 %rd8, [test_s_i8f64p_param_0+11]; -; CHECK-NEXT: shl.b64 %rd9, %rd8, 16; -; CHECK-NEXT: ld.param.b8 %rd10, [test_s_i8f64p_param_0+12]; -; CHECK-NEXT: shl.b64 %rd11, %rd10, 24; -; CHECK-NEXT: or.b64 %rd12, %rd11, %rd9; -; CHECK-NEXT: or.b64 %rd13, %rd12, %rd7; -; CHECK-NEXT: ld.param.b8 %rd14, [test_s_i8f64p_param_0+14]; -; CHECK-NEXT: shl.b64 %rd15, %rd14, 8; -; CHECK-NEXT: ld.param.b8 %rd16, [test_s_i8f64p_param_0+13]; -; CHECK-NEXT: or.b64 %rd17, %rd15, %rd16; -; CHECK-NEXT: ld.param.b8 %rd18, [test_s_i8f64p_param_0+15]; -; CHECK-NEXT: shl.b64 %rd19, %rd18, 16; -; CHECK-NEXT: ld.param.b8 %rd20, [test_s_i8f64p_param_0+16]; -; CHECK-NEXT: shl.b64 %rd21, %rd20, 24; -; CHECK-NEXT: or.b64 %rd22, %rd21, %rd19; -; CHECK-NEXT: or.b64 %rd23, %rd22, %rd17; -; CHECK-NEXT: shl.b64 %rd24, %rd23, 32; -; CHECK-NEXT: or.b64 %rd2, %rd24, %rd13; -; CHECK-NEXT: ld.param.b64 %rd3, [test_s_i8f64p_param_0+24]; -; CHECK-NEXT: ld.param.b8 %rs1, [test_s_i8f64p_param_0+8]; ; CHECK-NEXT: ld.param.b64 %rd1, [test_s_i8f64p_param_0]; -; CHECK-NEXT: shr.u64 %rd25, %rd2, 8; -; CHECK-NEXT: shr.u64 %rd26, %rd2, 16; -; CHECK-NEXT: shr.u64 %rd27, %rd2, 24; -; CHECK-NEXT: bfe.u64 %rd28, %rd23, 8, 24; -; CHECK-NEXT: bfe.u64 %rd29, %rd23, 16, 16; -; CHECK-NEXT: bfe.u64 %rd30, %rd23, 24, 8; +; CHECK-NEXT: ld.param.b64 %rd2, [test_s_i8f64p_param_0+8]; +; CHECK-NEXT: ld.param.b64 %rd3, [test_s_i8f64p_param_0+24]; +; CHECK-NEXT: ld.param.b8 %rd4, [test_s_i8f64p_param_0+16]; ; CHECK-NEXT: { // callseq 6, 0 ; CHECK-NEXT: .param .align 8 .b8 param0[32]; -; CHECK-NEXT: st.param.b64 [param0], %rd1; -; CHECK-NEXT: st.param.b8 [param0+8], %rs1; -; CHECK-NEXT: st.param.b8 [param0+9], %rd2; -; CHECK-NEXT: st.param.b8 [param0+10], %rd25; -; CHECK-NEXT: st.param.b8 [param0+11], %rd26; -; CHECK-NEXT: st.param.b8 [param0+12], %rd27; -; CHECK-NEXT: st.param.b8 [param0+13], %rd23; -; CHECK-NEXT: st.param.b8 [param0+14], %rd28; -; CHECK-NEXT: st.param.b8 [param0+15], %rd29; -; CHECK-NEXT: st.param.b8 [param0+16], %rd30; -; CHECK-NEXT: st.param.b64 [param0+24], %rd3; ; CHECK-NEXT: .param .align 8 .b8 retval0[32]; +; CHECK-NEXT: st.param.b8 [param0+16], %rd4; +; CHECK-NEXT: st.param.b64 [param0+24], %rd3; +; CHECK-NEXT: st.param.b64 [param0+8], %rd2; +; CHECK-NEXT: st.param.b64 [param0], %rd1; ; CHECK-NEXT: call.uni (retval0), test_s_i8f64p, (param0); -; CHECK-NEXT: ld.param.b64 %rd31, [retval0]; -; CHECK-NEXT: ld.param.b8 %rs2, [retval0+8]; -; CHECK-NEXT: ld.param.b8 %rs3, [retval0+9]; -; CHECK-NEXT: ld.param.b8 %rs4, [retval0+10]; -; CHECK-NEXT: ld.param.b8 %rs5, [retval0+11]; -; CHECK-NEXT: ld.param.b8 %rs6, [retval0+12]; -; CHECK-NEXT: ld.param.b8 %rs7, [retval0+13]; -; CHECK-NEXT: ld.param.b8 %rs8, [retval0+14]; -; CHECK-NEXT: ld.param.b8 %rs9, [retval0+15]; -; CHECK-NEXT: ld.param.b8 %rs10, [retval0+16]; -; CHECK-NEXT: ld.param.b64 %rd32, [retval0+24]; +; CHECK-NEXT: ld.param.b64 %rd5, [retval0+24]; +; CHECK-NEXT: ld.param.b8 %rs1, [retval0+8]; +; CHECK-NEXT: ld.param.b64 %rd6, [retval0]; +; CHECK-NEXT: ld.param.b8 %rd7, [retval0+16]; +; CHECK-NEXT: ld.param.b8 %rd8, [retval0+15]; +; CHECK-NEXT: ld.param.b8 %rd9, [retval0+14]; +; CHECK-NEXT: ld.param.b8 %rd10, [retval0+13]; +; CHECK-NEXT: ld.param.b8 %rd11, [retval0+12]; +; CHECK-NEXT: ld.param.b8 %rd12, [retval0+11]; +; CHECK-NEXT: ld.param.b8 %rd13, [retval0+10]; +; CHECK-NEXT: ld.param.b8 %rd14, [retval0+9]; ; CHECK-NEXT: } // callseq 6 -; CHECK-NEXT: cvt.u64.u16 %rd33, %rs3; -; CHECK-NEXT: and.b64 %rd34, %rd33, 255; -; CHECK-NEXT: cvt.u64.u16 %rd35, %rs4; -; CHECK-NEXT: and.b64 %rd36, %rd35, 255; -; CHECK-NEXT: shl.b64 %rd37, %rd36, 8; -; CHECK-NEXT: or.b64 %rd38, %rd34, %rd37; -; CHECK-NEXT: cvt.u64.u16 %rd39, %rs5; -; CHECK-NEXT: and.b64 %rd40, %rd39, 255; -; CHECK-NEXT: shl.b64 %rd41, %rd40, 16; -; CHECK-NEXT: or.b64 %rd42, %rd38, %rd41; -; CHECK-NEXT: cvt.u64.u16 %rd43, %rs6; -; CHECK-NEXT: and.b64 %rd44, %rd43, 255; -; CHECK-NEXT: shl.b64 %rd45, %rd44, 24; -; CHECK-NEXT: or.b64 %rd46, %rd42, %rd45; -; CHECK-NEXT: cvt.u64.u16 %rd47, %rs7; -; CHECK-NEXT: and.b64 %rd48, %rd47, 255; -; CHECK-NEXT: shl.b64 %rd49, %rd48, 32; -; CHECK-NEXT: or.b64 %rd50, %rd46, %rd49; -; CHECK-NEXT: cvt.u64.u16 %rd51, %rs8; -; CHECK-NEXT: and.b64 %rd52, %rd51, 255; -; CHECK-NEXT: shl.b64 %rd53, %rd52, 40; -; CHECK-NEXT: or.b64 %rd54, %rd50, %rd53; -; CHECK-NEXT: cvt.u64.u16 %rd55, %rs9; -; CHECK-NEXT: and.b64 %rd56, %rd55, 255; -; CHECK-NEXT: shl.b64 %rd57, %rd56, 48; -; CHECK-NEXT: or.b64 %rd58, %rd54, %rd57; -; CHECK-NEXT: cvt.u64.u16 %rd59, %rs10; -; CHECK-NEXT: shl.b64 %rd60, %rd59, 56; -; CHECK-NEXT: or.b64 %rd61, %rd58, %rd60; -; CHECK-NEXT: st.param.b64 [func_retval0], %rd31; -; CHECK-NEXT: st.param.b8 [func_retval0+8], %rs2; +; CHECK-NEXT: shl.b64 %rd17, %rd13, 8; +; CHECK-NEXT: or.b64 %rd18, %rd17, %rd14; +; CHECK-NEXT: shl.b64 %rd20, %rd12, 16; +; CHECK-NEXT: shl.b64 %rd22, %rd11, 24; +; CHECK-NEXT: or.b64 %rd23, %rd22, %rd20; +; CHECK-NEXT: or.b64 %rd24, %rd23, %rd18; +; CHECK-NEXT: shl.b64 %rd27, %rd9, 8; +; CHECK-NEXT: or.b64 %rd28, %rd27, %rd10; +; CHECK-NEXT: shl.b64 %rd30, %rd8, 16; +; CHECK-NEXT: shl.b64 %rd32, %rd7, 24; +; CHECK-NEXT: or.b64 %rd33, %rd32, %rd30; +; CHECK-NEXT: or.b64 %rd34, %rd33, %rd28; +; CHECK-NEXT: shl.b64 %rd35, %rd34, 32; +; CHECK-NEXT: or.b64 %rd36, %rd35, %rd24; +; CHECK-NEXT: st.param.b8 [func_retval0+9], %rd14; +; CHECK-NEXT: st.param.b64 [func_retval0+24], %rd5; +; CHECK-NEXT: st.param.b8 [func_retval0+8], %rs1; +; CHECK-NEXT: st.param.b64 [func_retval0], %rd6; +; CHECK-NEXT: shr.u64 %rd39, %rd36, 56; +; CHECK-NEXT: st.param.b8 [func_retval0+16], %rd39; +; CHECK-NEXT: shr.u64 %rd40, %rd36, 48; +; CHECK-NEXT: st.param.b8 [func_retval0+15], %rd40; +; CHECK-NEXT: shr.u64 %rd41, %rd36, 40; +; CHECK-NEXT: st.param.b8 [func_retval0+14], %rd41; +; CHECK-NEXT: shr.u64 %rd42, %rd36, 32; +; CHECK-NEXT: st.param.b8 [func_retval0+13], %rd42; +; CHECK-NEXT: shr.u64 %rd43, %rd36, 24; ; CHECK-NEXT: st.param.b8 [func_retval0+12], %rd43; -; CHECK-NEXT: st.param.b8 [func_retval0+11], %rd39; -; CHECK-NEXT: st.param.b8 [func_retval0+10], %rd35; -; CHECK-NEXT: st.param.b8 [func_retval0+9], %rd33; -; CHECK-NEXT: shr.u64 %rd64, %rd50, 32; -; CHECK-NEXT: st.param.b8 [func_retval0+13], %rd64; -; CHECK-NEXT: shr.u64 %rd65, %rd54, 40; -; CHECK-NEXT: st.param.b8 [func_retval0+14], %rd65; -; CHECK-NEXT: shr.u64 %rd66, %rd58, 48; -; CHECK-NEXT: st.param.b8 [func_retval0+15], %rd66; -; CHECK-NEXT: shr.u64 %rd67, %rd61, 56; -; CHECK-NEXT: st.param.b8 [func_retval0+16], %rd67; -; CHECK-NEXT: st.param.b64 [func_retval0+24], %rd32; +; CHECK-NEXT: shr.u64 %rd44, %rd36, 16; +; CHECK-NEXT: st.param.b8 [func_retval0+11], %rd44; +; CHECK-NEXT: shr.u64 %rd45, %rd36, 8; +; CHECK-NEXT: st.param.b8 [func_retval0+10], %rd45; ; CHECK-NEXT: ret; %r = tail call %s_i8f64p @test_s_i8f64p(%s_i8f64p %a) ret %s_i8f64p %r diff --git a/llvm/test/CodeGen/NVPTX/vaargs.ll b/llvm/test/CodeGen/NVPTX/vaargs.ll index 3ca729f..9e312a2 100644 --- a/llvm/test/CodeGen/NVPTX/vaargs.ll +++ b/llvm/test/CodeGen/NVPTX/vaargs.ll @@ -89,14 +89,14 @@ define i32 @test_foo(i32 %i, i64 %l, double %d, ptr %p) { ; CHECK-NEXT: ld.param.b32 [[ARG_I32:%r[0-9]+]], [test_foo_param_0]; ; Store arguments to an array -; CHECK32: .param .align 8 .b8 param1[28]; -; CHECK64: .param .align 8 .b8 param1[32]; -; CHECK-NEXT: st.param.b32 [param1], [[ARG_I32]]; -; CHECK-NEXT: st.param.b64 [param1+8], [[ARG_I64]]; -; CHECK-NEXT: st.param.b64 [param1+16], [[ARG_DOUBLE]]; -; CHECK-NEXT: st.param.b[[BITS]] [param1+24], [[ARG_VOID_PTR]]; -; CHECK-NEXT: .param .b32 retval0; -; CHECK-NEXT: prototype_1 : .callprototype (.param .b32 _) _ (.param .b32 _, .param .align 8 .b8 _[] +; CHECK32: .param .align 8 .b8 param1[28]; +; CHECK64: .param .align 8 .b8 param1[32]; +; CHECK-DAG: .param .b32 retval0; +; CHECK-DAG: st.param.b32 [param1], [[ARG_I32]]; +; CHECK-DAG: st.param.b64 [param1+8], [[ARG_I64]]; +; CHECK-DAG: st.param.b64 [param1+16], [[ARG_DOUBLE]]; +; CHECK-DAG: st.param.b[[BITS]] [param1+24], [[ARG_VOID_PTR]]; +; CHECK-DAG: prototype_1 : .callprototype (.param .b32 _) _ (.param .b32 _, .param .align 8 .b8 _[] entry: %ptr = load ptr, ptr addrspacecast (ptr addrspace(1) @foo_ptr to ptr), align 8 diff --git a/llvm/test/CodeGen/NVPTX/variadics-backend.ll b/llvm/test/CodeGen/NVPTX/variadics-backend.ll index ad2e704..a9b3675 100644 --- a/llvm/test/CodeGen/NVPTX/variadics-backend.ll +++ b/llvm/test/CodeGen/NVPTX/variadics-backend.ll @@ -115,13 +115,13 @@ define dso_local i32 @foo() { ; CHECK-PTX-NEXT: st.b64 [%SP+16], 1; ; CHECK-PTX-NEXT: st.b64 [%SP+24], 4607182418800017408; ; CHECK-PTX-NEXT: st.b64 [%SP+32], 4607182418800017408; -; CHECK-PTX-NEXT: add.u64 %rd1, %SP, 0; ; CHECK-PTX-NEXT: { // callseq 0, 0 ; CHECK-PTX-NEXT: .param .b32 param0; -; CHECK-PTX-NEXT: st.param.b32 [param0], 1; ; CHECK-PTX-NEXT: .param .b64 param1; -; CHECK-PTX-NEXT: st.param.b64 [param1], %rd1; ; CHECK-PTX-NEXT: .param .b32 retval0; +; CHECK-PTX-NEXT: add.u64 %rd1, %SP, 0; +; CHECK-PTX-NEXT: st.param.b64 [param1], %rd1; +; CHECK-PTX-NEXT: st.param.b32 [param0], 1; ; CHECK-PTX-NEXT: call.uni (retval0), variadics1, (param0, param1); ; CHECK-PTX-NEXT: ld.param.b32 %r1, [retval0]; ; CHECK-PTX-NEXT: } // callseq 0 @@ -218,13 +218,13 @@ define dso_local i32 @bar() { ; CHECK-PTX-NEXT: st.b32 [%SP+8], 1; ; CHECK-PTX-NEXT: st.b8 [%SP+12], 1; ; CHECK-PTX-NEXT: st.b64 [%SP+16], 1; -; CHECK-PTX-NEXT: add.u64 %rd3, %SP, 8; ; CHECK-PTX-NEXT: { // callseq 1, 0 ; CHECK-PTX-NEXT: .param .b32 param0; -; CHECK-PTX-NEXT: st.param.b32 [param0], 1; ; CHECK-PTX-NEXT: .param .b64 param1; -; CHECK-PTX-NEXT: st.param.b64 [param1], %rd3; ; CHECK-PTX-NEXT: .param .b32 retval0; +; CHECK-PTX-NEXT: add.u64 %rd3, %SP, 8; +; CHECK-PTX-NEXT: st.param.b64 [param1], %rd3; +; CHECK-PTX-NEXT: st.param.b32 [param0], 1; ; CHECK-PTX-NEXT: call.uni (retval0), variadics2, (param0, param1); ; CHECK-PTX-NEXT: ld.param.b32 %r1, [retval0]; ; CHECK-PTX-NEXT: } // callseq 1 @@ -289,13 +289,13 @@ define dso_local i32 @baz() { ; CHECK-PTX-NEXT: mov.b64 %SPL, __local_depot5; ; CHECK-PTX-NEXT: cvta.local.u64 %SP, %SPL; ; CHECK-PTX-NEXT: st.v4.b32 [%SP], {1, 1, 1, 1}; -; CHECK-PTX-NEXT: add.u64 %rd1, %SP, 0; ; CHECK-PTX-NEXT: { // callseq 2, 0 ; CHECK-PTX-NEXT: .param .b32 param0; -; CHECK-PTX-NEXT: st.param.b32 [param0], 1; ; CHECK-PTX-NEXT: .param .b64 param1; -; CHECK-PTX-NEXT: st.param.b64 [param1], %rd1; ; CHECK-PTX-NEXT: .param .b32 retval0; +; CHECK-PTX-NEXT: add.u64 %rd1, %SP, 0; +; CHECK-PTX-NEXT: st.param.b64 [param1], %rd1; +; CHECK-PTX-NEXT: st.param.b32 [param0], 1; ; CHECK-PTX-NEXT: call.uni (retval0), variadics3, (param0, param1); ; CHECK-PTX-NEXT: ld.param.b32 %r1, [retval0]; ; CHECK-PTX-NEXT: } // callseq 2 @@ -348,7 +348,6 @@ define dso_local void @qux() { ; CHECK-PTX-NEXT: .local .align 8 .b8 __local_depot7[24]; ; CHECK-PTX-NEXT: .reg .b64 %SP; ; CHECK-PTX-NEXT: .reg .b64 %SPL; -; CHECK-PTX-NEXT: .reg .b32 %r<2>; ; CHECK-PTX-NEXT: .reg .b64 %rd<8>; ; CHECK-PTX-EMPTY: ; CHECK-PTX-NEXT: // %bb.0: // %entry @@ -360,18 +359,17 @@ define dso_local void @qux() { ; CHECK-PTX-NEXT: ld.global.nc.b64 %rd4, [__const_$_qux_$_s]; ; CHECK-PTX-NEXT: st.local.b64 [%rd2], %rd4; ; CHECK-PTX-NEXT: st.b64 [%SP+16], 1; -; CHECK-PTX-NEXT: ld.local.b64 %rd5, [%rd2]; -; CHECK-PTX-NEXT: ld.local.b64 %rd6, [%rd2+8]; -; CHECK-PTX-NEXT: add.u64 %rd7, %SP, 16; ; CHECK-PTX-NEXT: { // callseq 3, 0 ; CHECK-PTX-NEXT: .param .align 8 .b8 param0[16]; -; CHECK-PTX-NEXT: st.param.b64 [param0], %rd5; -; CHECK-PTX-NEXT: st.param.b64 [param0+8], %rd6; ; CHECK-PTX-NEXT: .param .b64 param1; -; CHECK-PTX-NEXT: st.param.b64 [param1], %rd7; ; CHECK-PTX-NEXT: .param .b32 retval0; +; CHECK-PTX-NEXT: add.u64 %rd5, %SP, 16; +; CHECK-PTX-NEXT: st.param.b64 [param1], %rd5; +; CHECK-PTX-NEXT: ld.local.b64 %rd6, [%rd2+8]; +; CHECK-PTX-NEXT: st.param.b64 [param0+8], %rd6; +; CHECK-PTX-NEXT: ld.local.b64 %rd7, [%rd2]; +; CHECK-PTX-NEXT: st.param.b64 [param0], %rd7; ; CHECK-PTX-NEXT: call.uni (retval0), variadics4, (param0, param1); -; CHECK-PTX-NEXT: ld.param.b32 %r1, [retval0]; ; CHECK-PTX-NEXT: } // callseq 3 ; CHECK-PTX-NEXT: ret; entry: diff --git a/llvm/test/CodeGen/RISCV/rvv/vlseg-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vlseg-rv32.ll index e6a98c9..eb3422d 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vlseg-rv32.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vlseg-rv32.ll @@ -2,4246 +2,3303 @@ ; RUN: llc -mtriple=riscv32 -mattr=+zve64d,+f,+d,+zvfh,+zvfbfmin \ ; RUN: -verify-machineinstrs < %s | FileCheck %s -declare target("riscv.vector.tuple", <vscale x 1 x i8>, 2) @llvm.riscv.vlseg2.triscv.vector.tuple_nxv1i8_2t(target("riscv.vector.tuple", <vscale x 1 x i8>, 2), ptr, i32, i32) -declare target("riscv.vector.tuple", <vscale x 1 x i8>, 2) @llvm.riscv.vlseg2.mask.triscv.vector.tuple_nxv1i8_2t.nxv1i1(target("riscv.vector.tuple", <vscale x 1 x i8>, 2), ptr, <vscale x 1 x i1>, i32, i32, i32) - -define <vscale x 1 x i8> @test_vlseg2_nxv1i8_triscv.vector.tuple_nxv1i8_2t(ptr %base, i32 %vl) { +define target("riscv.vector.tuple", <vscale x 1 x i8>, 2) @test_vlseg2_nxv1i8_triscv.vector.tuple_nxv1i8_2t(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg2_nxv1i8_triscv.vector.tuple_nxv1i8_2t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma -; CHECK-NEXT: vlseg2e8.v v7, (a0) +; CHECK-NEXT: vlseg2e8.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 1 x i8>, 2) @llvm.riscv.vlseg2.triscv.vector.tuple_nxv1i8_2t(target("riscv.vector.tuple", <vscale x 1 x i8>, 2) undef, ptr %base, i32 %vl, i32 3) - %1 = call <vscale x 1 x i8> @llvm.riscv.tuple.extract.nxv1i8.triscv.vector.tuple_nxv1i8_2t(target("riscv.vector.tuple", <vscale x 1 x i8>, 2) %0, i32 1) - ret <vscale x 1 x i8> %1 + ret target("riscv.vector.tuple", <vscale x 1 x i8>, 2) %0 } - -define <vscale x 1 x i8> @test_vlseg2_mask_nxv1i8_triscv.vector.tuple_nxv1i8_2t(ptr %base, i32 %vl, <vscale x 1 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 1 x i8>, 2) @test_vlseg2_mask_nxv1i8_triscv.vector.tuple_nxv1i8_2t(ptr %base, i32 %vl, <vscale x 1 x i1> %mask) { ; CHECK-LABEL: test_vlseg2_mask_nxv1i8_triscv.vector.tuple_nxv1i8_2t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma -; CHECK-NEXT: vlseg2e8.v v7, (a0), v0.t +; CHECK-NEXT: vlseg2e8.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 1 x i8>, 2) @llvm.riscv.vlseg2.mask.triscv.vector.tuple_nxv1i8_2t.nxv1i1(target("riscv.vector.tuple", <vscale x 1 x i8>, 2) undef, ptr %base, <vscale x 1 x i1> %mask, i32 %vl, i32 1, i32 3) - %1 = call <vscale x 1 x i8> @llvm.riscv.tuple.extract.nxv1i8.triscv.vector.tuple_nxv1i8_2t(target("riscv.vector.tuple", <vscale x 1 x i8>, 2) %0, i32 1) - ret <vscale x 1 x i8> %1 + ret target("riscv.vector.tuple", <vscale x 1 x i8>, 2) %0 } - -declare target("riscv.vector.tuple", <vscale x 2 x i8>, 2) @llvm.riscv.vlseg2.triscv.vector.tuple_nxv2i8_2t(target("riscv.vector.tuple", <vscale x 2 x i8>, 2), ptr, i32, i32) -declare target("riscv.vector.tuple", <vscale x 2 x i8>, 2) @llvm.riscv.vlseg2.mask.triscv.vector.tuple_nxv2i8_2t.nxv2i1(target("riscv.vector.tuple", <vscale x 2 x i8>, 2), ptr, <vscale x 2 x i1>, i32, i32, i32) - -define <vscale x 2 x i8> @test_vlseg2_nxv2i8_triscv.vector.tuple_nxv2i8_2t(ptr %base, i32 %vl) { +define target("riscv.vector.tuple", <vscale x 2 x i8>, 2) @test_vlseg2_nxv2i8_triscv.vector.tuple_nxv2i8_2t(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg2_nxv2i8_triscv.vector.tuple_nxv2i8_2t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma -; CHECK-NEXT: vlseg2e8.v v7, (a0) +; CHECK-NEXT: vlseg2e8.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 2 x i8>, 2) @llvm.riscv.vlseg2.triscv.vector.tuple_nxv2i8_2t(target("riscv.vector.tuple", <vscale x 2 x i8>, 2) undef, ptr %base, i32 %vl, i32 3) - %1 = call <vscale x 2 x i8> @llvm.riscv.tuple.extract.nxv2i8.triscv.vector.tuple_nxv2i8_2t(target("riscv.vector.tuple", <vscale x 2 x i8>, 2) %0, i32 1) - ret <vscale x 2 x i8> %1 + ret target("riscv.vector.tuple", <vscale x 2 x i8>, 2) %0 } - -define <vscale x 2 x i8> @test_vlseg2_mask_nxv2i8_triscv.vector.tuple_nxv2i8_2t(ptr %base, i32 %vl, <vscale x 2 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 2 x i8>, 2) @test_vlseg2_mask_nxv2i8_triscv.vector.tuple_nxv2i8_2t(ptr %base, i32 %vl, <vscale x 2 x i1> %mask) { ; CHECK-LABEL: test_vlseg2_mask_nxv2i8_triscv.vector.tuple_nxv2i8_2t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma -; CHECK-NEXT: vlseg2e8.v v7, (a0), v0.t +; CHECK-NEXT: vlseg2e8.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 2 x i8>, 2) @llvm.riscv.vlseg2.mask.triscv.vector.tuple_nxv2i8_2t.nxv2i1(target("riscv.vector.tuple", <vscale x 2 x i8>, 2) undef, ptr %base, <vscale x 2 x i1> %mask, i32 %vl, i32 1, i32 3) - %1 = call <vscale x 2 x i8> @llvm.riscv.tuple.extract.nxv2i8.triscv.vector.tuple_nxv2i8_2t(target("riscv.vector.tuple", <vscale x 2 x i8>, 2) %0, i32 1) - ret <vscale x 2 x i8> %1 + ret target("riscv.vector.tuple", <vscale x 2 x i8>, 2) %0 } - -declare target("riscv.vector.tuple", <vscale x 4 x i8>, 2) @llvm.riscv.vlseg2.triscv.vector.tuple_nxv4i8_2t(target("riscv.vector.tuple", <vscale x 4 x i8>, 2), ptr, i32, i32) -declare target("riscv.vector.tuple", <vscale x 4 x i8>, 2) @llvm.riscv.vlseg2.mask.triscv.vector.tuple_nxv4i8_2t.nxv4i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 2), ptr, <vscale x 4 x i1>, i32, i32, i32) - -define <vscale x 4 x i8> @test_vlseg2_nxv4i8_triscv.vector.tuple_nxv4i8_2t(ptr %base, i32 %vl) { +define target("riscv.vector.tuple", <vscale x 4 x i8>, 2) @test_vlseg2_nxv4i8_triscv.vector.tuple_nxv4i8_2t(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg2_nxv4i8_triscv.vector.tuple_nxv4i8_2t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma -; CHECK-NEXT: vlseg2e8.v v7, (a0) +; CHECK-NEXT: vlseg2e8.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 2) @llvm.riscv.vlseg2.triscv.vector.tuple_nxv4i8_2t(target("riscv.vector.tuple", <vscale x 4 x i8>, 2) undef, ptr %base, i32 %vl, i32 3) - %1 = call <vscale x 4 x i8> @llvm.riscv.tuple.extract.nxv4i8.triscv.vector.tuple_nxv4i8_2t(target("riscv.vector.tuple", <vscale x 4 x i8>, 2) %0, i32 1) - ret <vscale x 4 x i8> %1 + ret target("riscv.vector.tuple", <vscale x 4 x i8>, 2) %0 } - -define <vscale x 4 x i8> @test_vlseg2_mask_nxv4i8_triscv.vector.tuple_nxv4i8_2t(ptr %base, i32 %vl, <vscale x 4 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 4 x i8>, 2) @test_vlseg2_mask_nxv4i8_triscv.vector.tuple_nxv4i8_2t(ptr %base, i32 %vl, <vscale x 4 x i1> %mask) { ; CHECK-LABEL: test_vlseg2_mask_nxv4i8_triscv.vector.tuple_nxv4i8_2t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma -; CHECK-NEXT: vlseg2e8.v v7, (a0), v0.t +; CHECK-NEXT: vlseg2e8.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 2) @llvm.riscv.vlseg2.mask.triscv.vector.tuple_nxv4i8_2t.nxv4i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 2) undef, ptr %base, <vscale x 4 x i1> %mask, i32 %vl, i32 1, i32 3) - %1 = call <vscale x 4 x i8> @llvm.riscv.tuple.extract.nxv4i8.triscv.vector.tuple_nxv4i8_2t(target("riscv.vector.tuple", <vscale x 4 x i8>, 2) %0, i32 1) - ret <vscale x 4 x i8> %1 + ret target("riscv.vector.tuple", <vscale x 4 x i8>, 2) %0 } - -declare target("riscv.vector.tuple", <vscale x 8 x i8>, 2) @llvm.riscv.vlseg2.triscv.vector.tuple_nxv8i8_2t(target("riscv.vector.tuple", <vscale x 8 x i8>, 2), ptr, i32, i32) -declare target("riscv.vector.tuple", <vscale x 8 x i8>, 2) @llvm.riscv.vlseg2.mask.triscv.vector.tuple_nxv8i8_2t.nxv8i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 2), ptr, <vscale x 8 x i1>, i32, i32, i32) - -define <vscale x 8 x i8> @test_vlseg2_nxv8i8_triscv.vector.tuple_nxv8i8_2t(ptr %base, i32 %vl) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 2) @test_vlseg2_nxv8i8_triscv.vector.tuple_nxv8i8_2t(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg2_nxv8i8_triscv.vector.tuple_nxv8i8_2t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma -; CHECK-NEXT: vlseg2e8.v v7, (a0) +; CHECK-NEXT: vlseg2e8.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 2) @llvm.riscv.vlseg2.triscv.vector.tuple_nxv8i8_2t(target("riscv.vector.tuple", <vscale x 8 x i8>, 2) undef, ptr %base, i32 %vl, i32 3) - %1 = call <vscale x 8 x i8> @llvm.riscv.tuple.extract.nxv8i8.triscv.vector.tuple_nxv8i8_2t(target("riscv.vector.tuple", <vscale x 8 x i8>, 2) %0, i32 1) - ret <vscale x 8 x i8> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 2) %0 } - -define <vscale x 8 x i8> @test_vlseg2_mask_nxv8i8_triscv.vector.tuple_nxv8i8_2t(ptr %base, i32 %vl, <vscale x 8 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 2) @test_vlseg2_mask_nxv8i8_triscv.vector.tuple_nxv8i8_2t(ptr %base, i32 %vl, <vscale x 8 x i1> %mask) { ; CHECK-LABEL: test_vlseg2_mask_nxv8i8_triscv.vector.tuple_nxv8i8_2t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma -; CHECK-NEXT: vlseg2e8.v v7, (a0), v0.t +; CHECK-NEXT: vlseg2e8.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 2) @llvm.riscv.vlseg2.mask.triscv.vector.tuple_nxv8i8_2t.nxv8i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 2) undef, ptr %base, <vscale x 8 x i1> %mask, i32 %vl, i32 1, i32 3) - %1 = call <vscale x 8 x i8> @llvm.riscv.tuple.extract.nxv8i8.triscv.vector.tuple_nxv8i8_2t(target("riscv.vector.tuple", <vscale x 8 x i8>, 2) %0, i32 1) - ret <vscale x 8 x i8> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 2) %0 } - -declare target("riscv.vector.tuple", <vscale x 16 x i8>, 2) @llvm.riscv.vlseg2.triscv.vector.tuple_nxv16i8_2t(target("riscv.vector.tuple", <vscale x 16 x i8>, 2), ptr, i32, i32) -declare target("riscv.vector.tuple", <vscale x 16 x i8>, 2) @llvm.riscv.vlseg2.mask.triscv.vector.tuple_nxv16i8_2t.nxv16i1(target("riscv.vector.tuple", <vscale x 16 x i8>, 2), ptr, <vscale x 16 x i1>, i32, i32, i32) - -define <vscale x 16 x i8> @test_vlseg2_nxv16i8_triscv.vector.tuple_nxv16i8_2t(ptr %base, i32 %vl) { +define target("riscv.vector.tuple", <vscale x 16 x i8>, 2) @test_vlseg2_nxv16i8_triscv.vector.tuple_nxv16i8_2t(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg2_nxv16i8_triscv.vector.tuple_nxv16i8_2t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma -; CHECK-NEXT: vlseg2e8.v v6, (a0) +; CHECK-NEXT: vlseg2e8.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 2) @llvm.riscv.vlseg2.triscv.vector.tuple_nxv16i8_2t(target("riscv.vector.tuple", <vscale x 16 x i8>, 2) undef, ptr %base, i32 %vl, i32 3) - %1 = call <vscale x 16 x i8> @llvm.riscv.tuple.extract.nxv16i8.triscv.vector.tuple_nxv16i8_2t(target("riscv.vector.tuple", <vscale x 16 x i8>, 2) %0, i32 1) - ret <vscale x 16 x i8> %1 + ret target("riscv.vector.tuple", <vscale x 16 x i8>, 2) %0 } - -define <vscale x 16 x i8> @test_vlseg2_mask_nxv16i8_triscv.vector.tuple_nxv16i8_2t(ptr %base, i32 %vl, <vscale x 16 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 16 x i8>, 2) @test_vlseg2_mask_nxv16i8_triscv.vector.tuple_nxv16i8_2t(ptr %base, i32 %vl, <vscale x 16 x i1> %mask) { ; CHECK-LABEL: test_vlseg2_mask_nxv16i8_triscv.vector.tuple_nxv16i8_2t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma -; CHECK-NEXT: vlseg2e8.v v6, (a0), v0.t +; CHECK-NEXT: vlseg2e8.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 2) @llvm.riscv.vlseg2.mask.triscv.vector.tuple_nxv16i8_2t.nxv16i1(target("riscv.vector.tuple", <vscale x 16 x i8>, 2) undef, ptr %base, <vscale x 16 x i1> %mask, i32 %vl, i32 1, i32 3) - %1 = call <vscale x 16 x i8> @llvm.riscv.tuple.extract.nxv16i8.triscv.vector.tuple_nxv16i8_2t(target("riscv.vector.tuple", <vscale x 16 x i8>, 2) %0, i32 1) - ret <vscale x 16 x i8> %1 + ret target("riscv.vector.tuple", <vscale x 16 x i8>, 2) %0 } - -declare target("riscv.vector.tuple", <vscale x 32 x i8>, 2) @llvm.riscv.vlseg2.triscv.vector.tuple_nxv32i8_2t(target("riscv.vector.tuple", <vscale x 32 x i8>, 2), ptr, i32, i32) -declare target("riscv.vector.tuple", <vscale x 32 x i8>, 2) @llvm.riscv.vlseg2.mask.triscv.vector.tuple_nxv32i8_2t.nxv32i1(target("riscv.vector.tuple", <vscale x 32 x i8>, 2), ptr, <vscale x 32 x i1>, i32, i32, i32) - -define <vscale x 32 x i8> @test_vlseg2_nxv32i8_triscv.vector.tuple_nxv32i8_2t(ptr %base, i32 %vl) { +define target("riscv.vector.tuple", <vscale x 32 x i8>, 2) @test_vlseg2_nxv32i8_triscv.vector.tuple_nxv32i8_2t(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg2_nxv32i8_triscv.vector.tuple_nxv32i8_2t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, ma -; CHECK-NEXT: vlseg2e8.v v4, (a0) +; CHECK-NEXT: vlseg2e8.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 32 x i8>, 2) @llvm.riscv.vlseg2.triscv.vector.tuple_nxv32i8_2t(target("riscv.vector.tuple", <vscale x 32 x i8>, 2) undef, ptr %base, i32 %vl, i32 3) - %1 = call <vscale x 32 x i8> @llvm.riscv.tuple.extract.nxv32i8.triscv.vector.tuple_nxv32i8_2t(target("riscv.vector.tuple", <vscale x 32 x i8>, 2) %0, i32 1) - ret <vscale x 32 x i8> %1 + ret target("riscv.vector.tuple", <vscale x 32 x i8>, 2) %0 } - -define <vscale x 32 x i8> @test_vlseg2_mask_nxv32i8_triscv.vector.tuple_nxv32i8_2t(ptr %base, i32 %vl, <vscale x 32 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 32 x i8>, 2) @test_vlseg2_mask_nxv32i8_triscv.vector.tuple_nxv32i8_2t(ptr %base, i32 %vl, <vscale x 32 x i1> %mask) { ; CHECK-LABEL: test_vlseg2_mask_nxv32i8_triscv.vector.tuple_nxv32i8_2t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, ma -; CHECK-NEXT: vlseg2e8.v v4, (a0), v0.t +; CHECK-NEXT: vlseg2e8.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 32 x i8>, 2) @llvm.riscv.vlseg2.mask.triscv.vector.tuple_nxv32i8_2t.nxv32i1(target("riscv.vector.tuple", <vscale x 32 x i8>, 2) undef, ptr %base, <vscale x 32 x i1> %mask, i32 %vl, i32 1, i32 3) - %1 = call <vscale x 32 x i8> @llvm.riscv.tuple.extract.nxv32i8.triscv.vector.tuple_nxv32i8_2t(target("riscv.vector.tuple", <vscale x 32 x i8>, 2) %0, i32 1) - ret <vscale x 32 x i8> %1 + ret target("riscv.vector.tuple", <vscale x 32 x i8>, 2) %0 } - -declare target("riscv.vector.tuple", <vscale x 1 x i8>, 3) @llvm.riscv.vlseg3.triscv.vector.tuple_nxv1i8_3t(target("riscv.vector.tuple", <vscale x 1 x i8>, 3), ptr, i32, i32) -declare target("riscv.vector.tuple", <vscale x 1 x i8>, 3) @llvm.riscv.vlseg3.mask.triscv.vector.tuple_nxv1i8_3t.nxv1i1(target("riscv.vector.tuple", <vscale x 1 x i8>, 3), ptr, <vscale x 1 x i1>, i32, i32, i32) - -define <vscale x 1 x i8> @test_vlseg3_nxv1i8_triscv.vector.tuple_nxv1i8_3t(ptr %base, i32 %vl) { +define target("riscv.vector.tuple", <vscale x 1 x i8>, 3) @test_vlseg3_nxv1i8_triscv.vector.tuple_nxv1i8_3t(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg3_nxv1i8_triscv.vector.tuple_nxv1i8_3t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma -; CHECK-NEXT: vlseg3e8.v v7, (a0) +; CHECK-NEXT: vlseg3e8.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 1 x i8>, 3) @llvm.riscv.vlseg3.triscv.vector.tuple_nxv1i8_3t(target("riscv.vector.tuple", <vscale x 1 x i8>, 3) undef, ptr %base, i32 %vl, i32 3) - %1 = call <vscale x 1 x i8> @llvm.riscv.tuple.extract.nxv1i8.triscv.vector.tuple_nxv1i8_3t(target("riscv.vector.tuple", <vscale x 1 x i8>, 3) %0, i32 1) - ret <vscale x 1 x i8> %1 + ret target("riscv.vector.tuple", <vscale x 1 x i8>, 3) %0 } - -define <vscale x 1 x i8> @test_vlseg3_mask_nxv1i8_triscv.vector.tuple_nxv1i8_3t(ptr %base, i32 %vl, <vscale x 1 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 1 x i8>, 3) @test_vlseg3_mask_nxv1i8_triscv.vector.tuple_nxv1i8_3t(ptr %base, i32 %vl, <vscale x 1 x i1> %mask) { ; CHECK-LABEL: test_vlseg3_mask_nxv1i8_triscv.vector.tuple_nxv1i8_3t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma -; CHECK-NEXT: vlseg3e8.v v7, (a0), v0.t +; CHECK-NEXT: vlseg3e8.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 1 x i8>, 3) @llvm.riscv.vlseg3.mask.triscv.vector.tuple_nxv1i8_3t.nxv1i1(target("riscv.vector.tuple", <vscale x 1 x i8>, 3) undef, ptr %base, <vscale x 1 x i1> %mask, i32 %vl, i32 1, i32 3) - %1 = call <vscale x 1 x i8> @llvm.riscv.tuple.extract.nxv1i8.triscv.vector.tuple_nxv1i8_3t(target("riscv.vector.tuple", <vscale x 1 x i8>, 3) %0, i32 1) - ret <vscale x 1 x i8> %1 + ret target("riscv.vector.tuple", <vscale x 1 x i8>, 3) %0 } - -declare target("riscv.vector.tuple", <vscale x 2 x i8>, 3) @llvm.riscv.vlseg3.triscv.vector.tuple_nxv2i8_3t(target("riscv.vector.tuple", <vscale x 2 x i8>, 3), ptr, i32, i32) -declare target("riscv.vector.tuple", <vscale x 2 x i8>, 3) @llvm.riscv.vlseg3.mask.triscv.vector.tuple_nxv2i8_3t.nxv2i1(target("riscv.vector.tuple", <vscale x 2 x i8>, 3), ptr, <vscale x 2 x i1>, i32, i32, i32) - -define <vscale x 2 x i8> @test_vlseg3_nxv2i8_triscv.vector.tuple_nxv2i8_3t(ptr %base, i32 %vl) { +define target("riscv.vector.tuple", <vscale x 2 x i8>, 3) @test_vlseg3_nxv2i8_triscv.vector.tuple_nxv2i8_3t(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg3_nxv2i8_triscv.vector.tuple_nxv2i8_3t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma -; CHECK-NEXT: vlseg3e8.v v7, (a0) +; CHECK-NEXT: vlseg3e8.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 2 x i8>, 3) @llvm.riscv.vlseg3.triscv.vector.tuple_nxv2i8_3t(target("riscv.vector.tuple", <vscale x 2 x i8>, 3) undef, ptr %base, i32 %vl, i32 3) - %1 = call <vscale x 2 x i8> @llvm.riscv.tuple.extract.nxv2i8.triscv.vector.tuple_nxv2i8_3t(target("riscv.vector.tuple", <vscale x 2 x i8>, 3) %0, i32 1) - ret <vscale x 2 x i8> %1 + ret target("riscv.vector.tuple", <vscale x 2 x i8>, 3) %0 } - -define <vscale x 2 x i8> @test_vlseg3_mask_nxv2i8_triscv.vector.tuple_nxv2i8_3t(ptr %base, i32 %vl, <vscale x 2 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 2 x i8>, 3) @test_vlseg3_mask_nxv2i8_triscv.vector.tuple_nxv2i8_3t(ptr %base, i32 %vl, <vscale x 2 x i1> %mask) { ; CHECK-LABEL: test_vlseg3_mask_nxv2i8_triscv.vector.tuple_nxv2i8_3t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma -; CHECK-NEXT: vlseg3e8.v v7, (a0), v0.t +; CHECK-NEXT: vlseg3e8.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 2 x i8>, 3) @llvm.riscv.vlseg3.mask.triscv.vector.tuple_nxv2i8_3t.nxv2i1(target("riscv.vector.tuple", <vscale x 2 x i8>, 3) undef, ptr %base, <vscale x 2 x i1> %mask, i32 %vl, i32 1, i32 3) - %1 = call <vscale x 2 x i8> @llvm.riscv.tuple.extract.nxv2i8.triscv.vector.tuple_nxv2i8_3t(target("riscv.vector.tuple", <vscale x 2 x i8>, 3) %0, i32 1) - ret <vscale x 2 x i8> %1 + ret target("riscv.vector.tuple", <vscale x 2 x i8>, 3) %0 } - -declare target("riscv.vector.tuple", <vscale x 4 x i8>, 3) @llvm.riscv.vlseg3.triscv.vector.tuple_nxv4i8_3t(target("riscv.vector.tuple", <vscale x 4 x i8>, 3), ptr, i32, i32) -declare target("riscv.vector.tuple", <vscale x 4 x i8>, 3) @llvm.riscv.vlseg3.mask.triscv.vector.tuple_nxv4i8_3t.nxv4i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 3), ptr, <vscale x 4 x i1>, i32, i32, i32) - -define <vscale x 4 x i8> @test_vlseg3_nxv4i8_triscv.vector.tuple_nxv4i8_3t(ptr %base, i32 %vl) { +define target("riscv.vector.tuple", <vscale x 4 x i8>, 3) @test_vlseg3_nxv4i8_triscv.vector.tuple_nxv4i8_3t(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg3_nxv4i8_triscv.vector.tuple_nxv4i8_3t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma -; CHECK-NEXT: vlseg3e8.v v7, (a0) +; CHECK-NEXT: vlseg3e8.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 3) @llvm.riscv.vlseg3.triscv.vector.tuple_nxv4i8_3t(target("riscv.vector.tuple", <vscale x 4 x i8>, 3) undef, ptr %base, i32 %vl, i32 3) - %1 = call <vscale x 4 x i8> @llvm.riscv.tuple.extract.nxv4i8.triscv.vector.tuple_nxv4i8_3t(target("riscv.vector.tuple", <vscale x 4 x i8>, 3) %0, i32 1) - ret <vscale x 4 x i8> %1 + ret target("riscv.vector.tuple", <vscale x 4 x i8>, 3) %0 } - -define <vscale x 4 x i8> @test_vlseg3_mask_nxv4i8_triscv.vector.tuple_nxv4i8_3t(ptr %base, i32 %vl, <vscale x 4 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 4 x i8>, 3) @test_vlseg3_mask_nxv4i8_triscv.vector.tuple_nxv4i8_3t(ptr %base, i32 %vl, <vscale x 4 x i1> %mask) { ; CHECK-LABEL: test_vlseg3_mask_nxv4i8_triscv.vector.tuple_nxv4i8_3t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma -; CHECK-NEXT: vlseg3e8.v v7, (a0), v0.t +; CHECK-NEXT: vlseg3e8.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 3) @llvm.riscv.vlseg3.mask.triscv.vector.tuple_nxv4i8_3t.nxv4i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 3) undef, ptr %base, <vscale x 4 x i1> %mask, i32 %vl, i32 1, i32 3) - %1 = call <vscale x 4 x i8> @llvm.riscv.tuple.extract.nxv4i8.triscv.vector.tuple_nxv4i8_3t(target("riscv.vector.tuple", <vscale x 4 x i8>, 3) %0, i32 1) - ret <vscale x 4 x i8> %1 + ret target("riscv.vector.tuple", <vscale x 4 x i8>, 3) %0 } - -declare target("riscv.vector.tuple", <vscale x 8 x i8>, 3) @llvm.riscv.vlseg3.triscv.vector.tuple_nxv8i8_3t(target("riscv.vector.tuple", <vscale x 8 x i8>, 3), ptr, i32, i32) -declare target("riscv.vector.tuple", <vscale x 8 x i8>, 3) @llvm.riscv.vlseg3.mask.triscv.vector.tuple_nxv8i8_3t.nxv8i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 3), ptr, <vscale x 8 x i1>, i32, i32, i32) - -define <vscale x 8 x i8> @test_vlseg3_nxv8i8_triscv.vector.tuple_nxv8i8_3t(ptr %base, i32 %vl) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 3) @test_vlseg3_nxv8i8_triscv.vector.tuple_nxv8i8_3t(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg3_nxv8i8_triscv.vector.tuple_nxv8i8_3t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma -; CHECK-NEXT: vlseg3e8.v v7, (a0) +; CHECK-NEXT: vlseg3e8.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 3) @llvm.riscv.vlseg3.triscv.vector.tuple_nxv8i8_3t(target("riscv.vector.tuple", <vscale x 8 x i8>, 3) undef, ptr %base, i32 %vl, i32 3) - %1 = call <vscale x 8 x i8> @llvm.riscv.tuple.extract.nxv8i8.triscv.vector.tuple_nxv8i8_3t(target("riscv.vector.tuple", <vscale x 8 x i8>, 3) %0, i32 1) - ret <vscale x 8 x i8> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 3) %0 } - -define <vscale x 8 x i8> @test_vlseg3_mask_nxv8i8_triscv.vector.tuple_nxv8i8_3t(ptr %base, i32 %vl, <vscale x 8 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 3) @test_vlseg3_mask_nxv8i8_triscv.vector.tuple_nxv8i8_3t(ptr %base, i32 %vl, <vscale x 8 x i1> %mask) { ; CHECK-LABEL: test_vlseg3_mask_nxv8i8_triscv.vector.tuple_nxv8i8_3t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma -; CHECK-NEXT: vlseg3e8.v v7, (a0), v0.t +; CHECK-NEXT: vlseg3e8.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 3) @llvm.riscv.vlseg3.mask.triscv.vector.tuple_nxv8i8_3t.nxv8i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 3) undef, ptr %base, <vscale x 8 x i1> %mask, i32 %vl, i32 1, i32 3) - %1 = call <vscale x 8 x i8> @llvm.riscv.tuple.extract.nxv8i8.triscv.vector.tuple_nxv8i8_3t(target("riscv.vector.tuple", <vscale x 8 x i8>, 3) %0, i32 1) - ret <vscale x 8 x i8> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 3) %0 } - -declare target("riscv.vector.tuple", <vscale x 16 x i8>, 3) @llvm.riscv.vlseg3.triscv.vector.tuple_nxv16i8_3t(target("riscv.vector.tuple", <vscale x 16 x i8>, 3), ptr, i32, i32) -declare target("riscv.vector.tuple", <vscale x 16 x i8>, 3) @llvm.riscv.vlseg3.mask.triscv.vector.tuple_nxv16i8_3t.nxv16i1(target("riscv.vector.tuple", <vscale x 16 x i8>, 3), ptr, <vscale x 16 x i1>, i32, i32, i32) - -define <vscale x 16 x i8> @test_vlseg3_nxv16i8_triscv.vector.tuple_nxv16i8_3t(ptr %base, i32 %vl) { +define target("riscv.vector.tuple", <vscale x 16 x i8>, 3) @test_vlseg3_nxv16i8_triscv.vector.tuple_nxv16i8_3t(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg3_nxv16i8_triscv.vector.tuple_nxv16i8_3t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma -; CHECK-NEXT: vlseg3e8.v v6, (a0) +; CHECK-NEXT: vlseg3e8.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 3) @llvm.riscv.vlseg3.triscv.vector.tuple_nxv16i8_3t(target("riscv.vector.tuple", <vscale x 16 x i8>, 3) undef, ptr %base, i32 %vl, i32 3) - %1 = call <vscale x 16 x i8> @llvm.riscv.tuple.extract.nxv16i8.triscv.vector.tuple_nxv16i8_3t(target("riscv.vector.tuple", <vscale x 16 x i8>, 3) %0, i32 1) - ret <vscale x 16 x i8> %1 + ret target("riscv.vector.tuple", <vscale x 16 x i8>, 3) %0 } - -define <vscale x 16 x i8> @test_vlseg3_mask_nxv16i8_triscv.vector.tuple_nxv16i8_3t(ptr %base, i32 %vl, <vscale x 16 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 16 x i8>, 3) @test_vlseg3_mask_nxv16i8_triscv.vector.tuple_nxv16i8_3t(ptr %base, i32 %vl, <vscale x 16 x i1> %mask) { ; CHECK-LABEL: test_vlseg3_mask_nxv16i8_triscv.vector.tuple_nxv16i8_3t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma -; CHECK-NEXT: vlseg3e8.v v6, (a0), v0.t +; CHECK-NEXT: vlseg3e8.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 3) @llvm.riscv.vlseg3.mask.triscv.vector.tuple_nxv16i8_3t.nxv16i1(target("riscv.vector.tuple", <vscale x 16 x i8>, 3) undef, ptr %base, <vscale x 16 x i1> %mask, i32 %vl, i32 1, i32 3) - %1 = call <vscale x 16 x i8> @llvm.riscv.tuple.extract.nxv16i8.triscv.vector.tuple_nxv16i8_3t(target("riscv.vector.tuple", <vscale x 16 x i8>, 3) %0, i32 1) - ret <vscale x 16 x i8> %1 + ret target("riscv.vector.tuple", <vscale x 16 x i8>, 3) %0 } - -declare target("riscv.vector.tuple", <vscale x 1 x i8>, 4) @llvm.riscv.vlseg4.triscv.vector.tuple_nxv1i8_4t(target("riscv.vector.tuple", <vscale x 1 x i8>, 4), ptr, i32, i32) -declare target("riscv.vector.tuple", <vscale x 1 x i8>, 4) @llvm.riscv.vlseg4.mask.triscv.vector.tuple_nxv1i8_4t.nxv1i1(target("riscv.vector.tuple", <vscale x 1 x i8>, 4), ptr, <vscale x 1 x i1>, i32, i32, i32) - -define <vscale x 1 x i8> @test_vlseg4_nxv1i8_triscv.vector.tuple_nxv1i8_4t(ptr %base, i32 %vl) { +define target("riscv.vector.tuple", <vscale x 1 x i8>, 4) @test_vlseg4_nxv1i8_triscv.vector.tuple_nxv1i8_4t(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg4_nxv1i8_triscv.vector.tuple_nxv1i8_4t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma -; CHECK-NEXT: vlseg4e8.v v7, (a0) +; CHECK-NEXT: vlseg4e8.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 1 x i8>, 4) @llvm.riscv.vlseg4.triscv.vector.tuple_nxv1i8_4t(target("riscv.vector.tuple", <vscale x 1 x i8>, 4) undef, ptr %base, i32 %vl, i32 3) - %1 = call <vscale x 1 x i8> @llvm.riscv.tuple.extract.nxv1i8.triscv.vector.tuple_nxv1i8_4t(target("riscv.vector.tuple", <vscale x 1 x i8>, 4) %0, i32 1) - ret <vscale x 1 x i8> %1 + ret target("riscv.vector.tuple", <vscale x 1 x i8>, 4) %0 } - -define <vscale x 1 x i8> @test_vlseg4_mask_nxv1i8_triscv.vector.tuple_nxv1i8_4t(ptr %base, i32 %vl, <vscale x 1 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 1 x i8>, 4) @test_vlseg4_mask_nxv1i8_triscv.vector.tuple_nxv1i8_4t(ptr %base, i32 %vl, <vscale x 1 x i1> %mask) { ; CHECK-LABEL: test_vlseg4_mask_nxv1i8_triscv.vector.tuple_nxv1i8_4t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma -; CHECK-NEXT: vlseg4e8.v v7, (a0), v0.t +; CHECK-NEXT: vlseg4e8.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 1 x i8>, 4) @llvm.riscv.vlseg4.mask.triscv.vector.tuple_nxv1i8_4t.nxv1i1(target("riscv.vector.tuple", <vscale x 1 x i8>, 4) undef, ptr %base, <vscale x 1 x i1> %mask, i32 %vl, i32 1, i32 3) - %1 = call <vscale x 1 x i8> @llvm.riscv.tuple.extract.nxv1i8.triscv.vector.tuple_nxv1i8_4t(target("riscv.vector.tuple", <vscale x 1 x i8>, 4) %0, i32 1) - ret <vscale x 1 x i8> %1 + ret target("riscv.vector.tuple", <vscale x 1 x i8>, 4) %0 } - -declare target("riscv.vector.tuple", <vscale x 2 x i8>, 4) @llvm.riscv.vlseg4.triscv.vector.tuple_nxv2i8_4t(target("riscv.vector.tuple", <vscale x 2 x i8>, 4), ptr, i32, i32) -declare target("riscv.vector.tuple", <vscale x 2 x i8>, 4) @llvm.riscv.vlseg4.mask.triscv.vector.tuple_nxv2i8_4t.nxv2i1(target("riscv.vector.tuple", <vscale x 2 x i8>, 4), ptr, <vscale x 2 x i1>, i32, i32, i32) - -define <vscale x 2 x i8> @test_vlseg4_nxv2i8_triscv.vector.tuple_nxv2i8_4t(ptr %base, i32 %vl) { +define target("riscv.vector.tuple", <vscale x 2 x i8>, 4) @test_vlseg4_nxv2i8_triscv.vector.tuple_nxv2i8_4t(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg4_nxv2i8_triscv.vector.tuple_nxv2i8_4t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma -; CHECK-NEXT: vlseg4e8.v v7, (a0) +; CHECK-NEXT: vlseg4e8.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 2 x i8>, 4) @llvm.riscv.vlseg4.triscv.vector.tuple_nxv2i8_4t(target("riscv.vector.tuple", <vscale x 2 x i8>, 4) undef, ptr %base, i32 %vl, i32 3) - %1 = call <vscale x 2 x i8> @llvm.riscv.tuple.extract.nxv2i8.triscv.vector.tuple_nxv2i8_4t(target("riscv.vector.tuple", <vscale x 2 x i8>, 4) %0, i32 1) - ret <vscale x 2 x i8> %1 + ret target("riscv.vector.tuple", <vscale x 2 x i8>, 4) %0 } - -define <vscale x 2 x i8> @test_vlseg4_mask_nxv2i8_triscv.vector.tuple_nxv2i8_4t(ptr %base, i32 %vl, <vscale x 2 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 2 x i8>, 4) @test_vlseg4_mask_nxv2i8_triscv.vector.tuple_nxv2i8_4t(ptr %base, i32 %vl, <vscale x 2 x i1> %mask) { ; CHECK-LABEL: test_vlseg4_mask_nxv2i8_triscv.vector.tuple_nxv2i8_4t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma -; CHECK-NEXT: vlseg4e8.v v7, (a0), v0.t +; CHECK-NEXT: vlseg4e8.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 2 x i8>, 4) @llvm.riscv.vlseg4.mask.triscv.vector.tuple_nxv2i8_4t.nxv2i1(target("riscv.vector.tuple", <vscale x 2 x i8>, 4) undef, ptr %base, <vscale x 2 x i1> %mask, i32 %vl, i32 1, i32 3) - %1 = call <vscale x 2 x i8> @llvm.riscv.tuple.extract.nxv2i8.triscv.vector.tuple_nxv2i8_4t(target("riscv.vector.tuple", <vscale x 2 x i8>, 4) %0, i32 1) - ret <vscale x 2 x i8> %1 + ret target("riscv.vector.tuple", <vscale x 2 x i8>, 4) %0 } - -declare target("riscv.vector.tuple", <vscale x 4 x i8>, 4) @llvm.riscv.vlseg4.triscv.vector.tuple_nxv4i8_4t(target("riscv.vector.tuple", <vscale x 4 x i8>, 4), ptr, i32, i32) -declare target("riscv.vector.tuple", <vscale x 4 x i8>, 4) @llvm.riscv.vlseg4.mask.triscv.vector.tuple_nxv4i8_4t.nxv4i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 4), ptr, <vscale x 4 x i1>, i32, i32, i32) - -define <vscale x 4 x i8> @test_vlseg4_nxv4i8_triscv.vector.tuple_nxv4i8_4t(ptr %base, i32 %vl) { +define target("riscv.vector.tuple", <vscale x 4 x i8>, 4) @test_vlseg4_nxv4i8_triscv.vector.tuple_nxv4i8_4t(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg4_nxv4i8_triscv.vector.tuple_nxv4i8_4t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma -; CHECK-NEXT: vlseg4e8.v v7, (a0) +; CHECK-NEXT: vlseg4e8.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 4) @llvm.riscv.vlseg4.triscv.vector.tuple_nxv4i8_4t(target("riscv.vector.tuple", <vscale x 4 x i8>, 4) undef, ptr %base, i32 %vl, i32 3) - %1 = call <vscale x 4 x i8> @llvm.riscv.tuple.extract.nxv4i8.triscv.vector.tuple_nxv4i8_4t(target("riscv.vector.tuple", <vscale x 4 x i8>, 4) %0, i32 1) - ret <vscale x 4 x i8> %1 + ret target("riscv.vector.tuple", <vscale x 4 x i8>, 4) %0 } - -define <vscale x 4 x i8> @test_vlseg4_mask_nxv4i8_triscv.vector.tuple_nxv4i8_4t(ptr %base, i32 %vl, <vscale x 4 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 4 x i8>, 4) @test_vlseg4_mask_nxv4i8_triscv.vector.tuple_nxv4i8_4t(ptr %base, i32 %vl, <vscale x 4 x i1> %mask) { ; CHECK-LABEL: test_vlseg4_mask_nxv4i8_triscv.vector.tuple_nxv4i8_4t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma -; CHECK-NEXT: vlseg4e8.v v7, (a0), v0.t +; CHECK-NEXT: vlseg4e8.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 4) @llvm.riscv.vlseg4.mask.triscv.vector.tuple_nxv4i8_4t.nxv4i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 4) undef, ptr %base, <vscale x 4 x i1> %mask, i32 %vl, i32 1, i32 3) - %1 = call <vscale x 4 x i8> @llvm.riscv.tuple.extract.nxv4i8.triscv.vector.tuple_nxv4i8_4t(target("riscv.vector.tuple", <vscale x 4 x i8>, 4) %0, i32 1) - ret <vscale x 4 x i8> %1 + ret target("riscv.vector.tuple", <vscale x 4 x i8>, 4) %0 } - -declare target("riscv.vector.tuple", <vscale x 8 x i8>, 4) @llvm.riscv.vlseg4.triscv.vector.tuple_nxv8i8_4t(target("riscv.vector.tuple", <vscale x 8 x i8>, 4), ptr, i32, i32) -declare target("riscv.vector.tuple", <vscale x 8 x i8>, 4) @llvm.riscv.vlseg4.mask.triscv.vector.tuple_nxv8i8_4t.nxv8i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 4), ptr, <vscale x 8 x i1>, i32, i32, i32) - -define <vscale x 8 x i8> @test_vlseg4_nxv8i8_triscv.vector.tuple_nxv8i8_4t(ptr %base, i32 %vl) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 4) @test_vlseg4_nxv8i8_triscv.vector.tuple_nxv8i8_4t(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg4_nxv8i8_triscv.vector.tuple_nxv8i8_4t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma -; CHECK-NEXT: vlseg4e8.v v7, (a0) +; CHECK-NEXT: vlseg4e8.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 4) @llvm.riscv.vlseg4.triscv.vector.tuple_nxv8i8_4t(target("riscv.vector.tuple", <vscale x 8 x i8>, 4) undef, ptr %base, i32 %vl, i32 3) - %1 = call <vscale x 8 x i8> @llvm.riscv.tuple.extract.nxv8i8.triscv.vector.tuple_nxv8i8_4t(target("riscv.vector.tuple", <vscale x 8 x i8>, 4) %0, i32 1) - ret <vscale x 8 x i8> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 4) %0 } - -define <vscale x 8 x i8> @test_vlseg4_mask_nxv8i8_triscv.vector.tuple_nxv8i8_4t(ptr %base, i32 %vl, <vscale x 8 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 4) @test_vlseg4_mask_nxv8i8_triscv.vector.tuple_nxv8i8_4t(ptr %base, i32 %vl, <vscale x 8 x i1> %mask) { ; CHECK-LABEL: test_vlseg4_mask_nxv8i8_triscv.vector.tuple_nxv8i8_4t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma -; CHECK-NEXT: vlseg4e8.v v7, (a0), v0.t +; CHECK-NEXT: vlseg4e8.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 4) @llvm.riscv.vlseg4.mask.triscv.vector.tuple_nxv8i8_4t.nxv8i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 4) undef, ptr %base, <vscale x 8 x i1> %mask, i32 %vl, i32 1, i32 3) - %1 = call <vscale x 8 x i8> @llvm.riscv.tuple.extract.nxv8i8.triscv.vector.tuple_nxv8i8_4t(target("riscv.vector.tuple", <vscale x 8 x i8>, 4) %0, i32 1) - ret <vscale x 8 x i8> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 4) %0 } - -declare target("riscv.vector.tuple", <vscale x 16 x i8>, 4) @llvm.riscv.vlseg4.triscv.vector.tuple_nxv16i8_4t(target("riscv.vector.tuple", <vscale x 16 x i8>, 4), ptr, i32, i32) -declare target("riscv.vector.tuple", <vscale x 16 x i8>, 4) @llvm.riscv.vlseg4.mask.triscv.vector.tuple_nxv16i8_4t.nxv16i1(target("riscv.vector.tuple", <vscale x 16 x i8>, 4), ptr, <vscale x 16 x i1>, i32, i32, i32) - -define <vscale x 16 x i8> @test_vlseg4_nxv16i8_triscv.vector.tuple_nxv16i8_4t(ptr %base, i32 %vl) { +define target("riscv.vector.tuple", <vscale x 16 x i8>, 4) @test_vlseg4_nxv16i8_triscv.vector.tuple_nxv16i8_4t(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg4_nxv16i8_triscv.vector.tuple_nxv16i8_4t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma -; CHECK-NEXT: vlseg4e8.v v6, (a0) +; CHECK-NEXT: vlseg4e8.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 4) @llvm.riscv.vlseg4.triscv.vector.tuple_nxv16i8_4t(target("riscv.vector.tuple", <vscale x 16 x i8>, 4) undef, ptr %base, i32 %vl, i32 3) - %1 = call <vscale x 16 x i8> @llvm.riscv.tuple.extract.nxv16i8.triscv.vector.tuple_nxv16i8_4t(target("riscv.vector.tuple", <vscale x 16 x i8>, 4) %0, i32 1) - ret <vscale x 16 x i8> %1 + ret target("riscv.vector.tuple", <vscale x 16 x i8>, 4) %0 } - -define <vscale x 16 x i8> @test_vlseg4_mask_nxv16i8_triscv.vector.tuple_nxv16i8_4t(ptr %base, i32 %vl, <vscale x 16 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 16 x i8>, 4) @test_vlseg4_mask_nxv16i8_triscv.vector.tuple_nxv16i8_4t(ptr %base, i32 %vl, <vscale x 16 x i1> %mask) { ; CHECK-LABEL: test_vlseg4_mask_nxv16i8_triscv.vector.tuple_nxv16i8_4t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma -; CHECK-NEXT: vlseg4e8.v v6, (a0), v0.t +; CHECK-NEXT: vlseg4e8.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 4) @llvm.riscv.vlseg4.mask.triscv.vector.tuple_nxv16i8_4t.nxv16i1(target("riscv.vector.tuple", <vscale x 16 x i8>, 4) undef, ptr %base, <vscale x 16 x i1> %mask, i32 %vl, i32 1, i32 3) - %1 = call <vscale x 16 x i8> @llvm.riscv.tuple.extract.nxv16i8.triscv.vector.tuple_nxv16i8_4t(target("riscv.vector.tuple", <vscale x 16 x i8>, 4) %0, i32 1) - ret <vscale x 16 x i8> %1 + ret target("riscv.vector.tuple", <vscale x 16 x i8>, 4) %0 } - -declare target("riscv.vector.tuple", <vscale x 1 x i8>, 5) @llvm.riscv.vlseg5.triscv.vector.tuple_nxv1i8_5t(target("riscv.vector.tuple", <vscale x 1 x i8>, 5), ptr, i32, i32) -declare target("riscv.vector.tuple", <vscale x 1 x i8>, 5) @llvm.riscv.vlseg5.mask.triscv.vector.tuple_nxv1i8_5t.nxv1i1(target("riscv.vector.tuple", <vscale x 1 x i8>, 5), ptr, <vscale x 1 x i1>, i32, i32, i32) - -define <vscale x 1 x i8> @test_vlseg5_nxv1i8_triscv.vector.tuple_nxv1i8_5t(ptr %base, i32 %vl) { +define target("riscv.vector.tuple", <vscale x 1 x i8>, 5) @test_vlseg5_nxv1i8_triscv.vector.tuple_nxv1i8_5t(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg5_nxv1i8_triscv.vector.tuple_nxv1i8_5t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma -; CHECK-NEXT: vlseg5e8.v v7, (a0) +; CHECK-NEXT: vlseg5e8.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 1 x i8>, 5) @llvm.riscv.vlseg5.triscv.vector.tuple_nxv1i8_5t(target("riscv.vector.tuple", <vscale x 1 x i8>, 5) undef, ptr %base, i32 %vl, i32 3) - %1 = call <vscale x 1 x i8> @llvm.riscv.tuple.extract.nxv1i8.triscv.vector.tuple_nxv1i8_5t(target("riscv.vector.tuple", <vscale x 1 x i8>, 5) %0, i32 1) - ret <vscale x 1 x i8> %1 + ret target("riscv.vector.tuple", <vscale x 1 x i8>, 5) %0 } - -define <vscale x 1 x i8> @test_vlseg5_mask_nxv1i8_triscv.vector.tuple_nxv1i8_5t(ptr %base, i32 %vl, <vscale x 1 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 1 x i8>, 5) @test_vlseg5_mask_nxv1i8_triscv.vector.tuple_nxv1i8_5t(ptr %base, i32 %vl, <vscale x 1 x i1> %mask) { ; CHECK-LABEL: test_vlseg5_mask_nxv1i8_triscv.vector.tuple_nxv1i8_5t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma -; CHECK-NEXT: vlseg5e8.v v7, (a0), v0.t +; CHECK-NEXT: vlseg5e8.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 1 x i8>, 5) @llvm.riscv.vlseg5.mask.triscv.vector.tuple_nxv1i8_5t.nxv1i1(target("riscv.vector.tuple", <vscale x 1 x i8>, 5) undef, ptr %base, <vscale x 1 x i1> %mask, i32 %vl, i32 1, i32 3) - %1 = call <vscale x 1 x i8> @llvm.riscv.tuple.extract.nxv1i8.triscv.vector.tuple_nxv1i8_5t(target("riscv.vector.tuple", <vscale x 1 x i8>, 5) %0, i32 1) - ret <vscale x 1 x i8> %1 + ret target("riscv.vector.tuple", <vscale x 1 x i8>, 5) %0 } - -declare target("riscv.vector.tuple", <vscale x 2 x i8>, 5) @llvm.riscv.vlseg5.triscv.vector.tuple_nxv2i8_5t(target("riscv.vector.tuple", <vscale x 2 x i8>, 5), ptr, i32, i32) -declare target("riscv.vector.tuple", <vscale x 2 x i8>, 5) @llvm.riscv.vlseg5.mask.triscv.vector.tuple_nxv2i8_5t.nxv2i1(target("riscv.vector.tuple", <vscale x 2 x i8>, 5), ptr, <vscale x 2 x i1>, i32, i32, i32) - -define <vscale x 2 x i8> @test_vlseg5_nxv2i8_triscv.vector.tuple_nxv2i8_5t(ptr %base, i32 %vl) { +define target("riscv.vector.tuple", <vscale x 2 x i8>, 5) @test_vlseg5_nxv2i8_triscv.vector.tuple_nxv2i8_5t(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg5_nxv2i8_triscv.vector.tuple_nxv2i8_5t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma -; CHECK-NEXT: vlseg5e8.v v7, (a0) +; CHECK-NEXT: vlseg5e8.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 2 x i8>, 5) @llvm.riscv.vlseg5.triscv.vector.tuple_nxv2i8_5t(target("riscv.vector.tuple", <vscale x 2 x i8>, 5) undef, ptr %base, i32 %vl, i32 3) - %1 = call <vscale x 2 x i8> @llvm.riscv.tuple.extract.nxv2i8.triscv.vector.tuple_nxv2i8_5t(target("riscv.vector.tuple", <vscale x 2 x i8>, 5) %0, i32 1) - ret <vscale x 2 x i8> %1 + ret target("riscv.vector.tuple", <vscale x 2 x i8>, 5) %0 } - -define <vscale x 2 x i8> @test_vlseg5_mask_nxv2i8_triscv.vector.tuple_nxv2i8_5t(ptr %base, i32 %vl, <vscale x 2 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 2 x i8>, 5) @test_vlseg5_mask_nxv2i8_triscv.vector.tuple_nxv2i8_5t(ptr %base, i32 %vl, <vscale x 2 x i1> %mask) { ; CHECK-LABEL: test_vlseg5_mask_nxv2i8_triscv.vector.tuple_nxv2i8_5t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma -; CHECK-NEXT: vlseg5e8.v v7, (a0), v0.t +; CHECK-NEXT: vlseg5e8.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 2 x i8>, 5) @llvm.riscv.vlseg5.mask.triscv.vector.tuple_nxv2i8_5t.nxv2i1(target("riscv.vector.tuple", <vscale x 2 x i8>, 5) undef, ptr %base, <vscale x 2 x i1> %mask, i32 %vl, i32 1, i32 3) - %1 = call <vscale x 2 x i8> @llvm.riscv.tuple.extract.nxv2i8.triscv.vector.tuple_nxv2i8_5t(target("riscv.vector.tuple", <vscale x 2 x i8>, 5) %0, i32 1) - ret <vscale x 2 x i8> %1 + ret target("riscv.vector.tuple", <vscale x 2 x i8>, 5) %0 } - -declare target("riscv.vector.tuple", <vscale x 4 x i8>, 5) @llvm.riscv.vlseg5.triscv.vector.tuple_nxv4i8_5t(target("riscv.vector.tuple", <vscale x 4 x i8>, 5), ptr, i32, i32) -declare target("riscv.vector.tuple", <vscale x 4 x i8>, 5) @llvm.riscv.vlseg5.mask.triscv.vector.tuple_nxv4i8_5t.nxv4i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 5), ptr, <vscale x 4 x i1>, i32, i32, i32) - -define <vscale x 4 x i8> @test_vlseg5_nxv4i8_triscv.vector.tuple_nxv4i8_5t(ptr %base, i32 %vl) { +define target("riscv.vector.tuple", <vscale x 4 x i8>, 5) @test_vlseg5_nxv4i8_triscv.vector.tuple_nxv4i8_5t(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg5_nxv4i8_triscv.vector.tuple_nxv4i8_5t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma -; CHECK-NEXT: vlseg5e8.v v7, (a0) +; CHECK-NEXT: vlseg5e8.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 5) @llvm.riscv.vlseg5.triscv.vector.tuple_nxv4i8_5t(target("riscv.vector.tuple", <vscale x 4 x i8>, 5) undef, ptr %base, i32 %vl, i32 3) - %1 = call <vscale x 4 x i8> @llvm.riscv.tuple.extract.nxv4i8.triscv.vector.tuple_nxv4i8_5t(target("riscv.vector.tuple", <vscale x 4 x i8>, 5) %0, i32 1) - ret <vscale x 4 x i8> %1 + ret target("riscv.vector.tuple", <vscale x 4 x i8>, 5) %0 } - -define <vscale x 4 x i8> @test_vlseg5_mask_nxv4i8_triscv.vector.tuple_nxv4i8_5t(ptr %base, i32 %vl, <vscale x 4 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 4 x i8>, 5) @test_vlseg5_mask_nxv4i8_triscv.vector.tuple_nxv4i8_5t(ptr %base, i32 %vl, <vscale x 4 x i1> %mask) { ; CHECK-LABEL: test_vlseg5_mask_nxv4i8_triscv.vector.tuple_nxv4i8_5t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma -; CHECK-NEXT: vlseg5e8.v v7, (a0), v0.t +; CHECK-NEXT: vlseg5e8.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 5) @llvm.riscv.vlseg5.mask.triscv.vector.tuple_nxv4i8_5t.nxv4i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 5) undef, ptr %base, <vscale x 4 x i1> %mask, i32 %vl, i32 1, i32 3) - %1 = call <vscale x 4 x i8> @llvm.riscv.tuple.extract.nxv4i8.triscv.vector.tuple_nxv4i8_5t(target("riscv.vector.tuple", <vscale x 4 x i8>, 5) %0, i32 1) - ret <vscale x 4 x i8> %1 + ret target("riscv.vector.tuple", <vscale x 4 x i8>, 5) %0 } - -declare target("riscv.vector.tuple", <vscale x 8 x i8>, 5) @llvm.riscv.vlseg5.triscv.vector.tuple_nxv8i8_5t(target("riscv.vector.tuple", <vscale x 8 x i8>, 5), ptr, i32, i32) -declare target("riscv.vector.tuple", <vscale x 8 x i8>, 5) @llvm.riscv.vlseg5.mask.triscv.vector.tuple_nxv8i8_5t.nxv8i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 5), ptr, <vscale x 8 x i1>, i32, i32, i32) - -define <vscale x 8 x i8> @test_vlseg5_nxv8i8_triscv.vector.tuple_nxv8i8_5t(ptr %base, i32 %vl) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 5) @test_vlseg5_nxv8i8_triscv.vector.tuple_nxv8i8_5t(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg5_nxv8i8_triscv.vector.tuple_nxv8i8_5t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma -; CHECK-NEXT: vlseg5e8.v v7, (a0) +; CHECK-NEXT: vlseg5e8.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 5) @llvm.riscv.vlseg5.triscv.vector.tuple_nxv8i8_5t(target("riscv.vector.tuple", <vscale x 8 x i8>, 5) undef, ptr %base, i32 %vl, i32 3) - %1 = call <vscale x 8 x i8> @llvm.riscv.tuple.extract.nxv8i8.triscv.vector.tuple_nxv8i8_5t(target("riscv.vector.tuple", <vscale x 8 x i8>, 5) %0, i32 1) - ret <vscale x 8 x i8> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 5) %0 } - -define <vscale x 8 x i8> @test_vlseg5_mask_nxv8i8_triscv.vector.tuple_nxv8i8_5t(ptr %base, i32 %vl, <vscale x 8 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 5) @test_vlseg5_mask_nxv8i8_triscv.vector.tuple_nxv8i8_5t(ptr %base, i32 %vl, <vscale x 8 x i1> %mask) { ; CHECK-LABEL: test_vlseg5_mask_nxv8i8_triscv.vector.tuple_nxv8i8_5t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma -; CHECK-NEXT: vlseg5e8.v v7, (a0), v0.t +; CHECK-NEXT: vlseg5e8.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 5) @llvm.riscv.vlseg5.mask.triscv.vector.tuple_nxv8i8_5t.nxv8i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 5) undef, ptr %base, <vscale x 8 x i1> %mask, i32 %vl, i32 1, i32 3) - %1 = call <vscale x 8 x i8> @llvm.riscv.tuple.extract.nxv8i8.triscv.vector.tuple_nxv8i8_5t(target("riscv.vector.tuple", <vscale x 8 x i8>, 5) %0, i32 1) - ret <vscale x 8 x i8> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 5) %0 } - -declare target("riscv.vector.tuple", <vscale x 1 x i8>, 6) @llvm.riscv.vlseg6.triscv.vector.tuple_nxv1i8_6t(target("riscv.vector.tuple", <vscale x 1 x i8>, 6), ptr, i32, i32) -declare target("riscv.vector.tuple", <vscale x 1 x i8>, 6) @llvm.riscv.vlseg6.mask.triscv.vector.tuple_nxv1i8_6t.nxv1i1(target("riscv.vector.tuple", <vscale x 1 x i8>, 6), ptr, <vscale x 1 x i1>, i32, i32, i32) - -define <vscale x 1 x i8> @test_vlseg6_nxv1i8_triscv.vector.tuple_nxv1i8_6t(ptr %base, i32 %vl) { +define target("riscv.vector.tuple", <vscale x 1 x i8>, 6) @test_vlseg6_nxv1i8_triscv.vector.tuple_nxv1i8_6t(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg6_nxv1i8_triscv.vector.tuple_nxv1i8_6t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma -; CHECK-NEXT: vlseg6e8.v v7, (a0) +; CHECK-NEXT: vlseg6e8.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 1 x i8>, 6) @llvm.riscv.vlseg6.triscv.vector.tuple_nxv1i8_6t(target("riscv.vector.tuple", <vscale x 1 x i8>, 6) undef, ptr %base, i32 %vl, i32 3) - %1 = call <vscale x 1 x i8> @llvm.riscv.tuple.extract.nxv1i8.triscv.vector.tuple_nxv1i8_6t(target("riscv.vector.tuple", <vscale x 1 x i8>, 6) %0, i32 1) - ret <vscale x 1 x i8> %1 + ret target("riscv.vector.tuple", <vscale x 1 x i8>, 6) %0 } - -define <vscale x 1 x i8> @test_vlseg6_mask_nxv1i8_triscv.vector.tuple_nxv1i8_6t(ptr %base, i32 %vl, <vscale x 1 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 1 x i8>, 6) @test_vlseg6_mask_nxv1i8_triscv.vector.tuple_nxv1i8_6t(ptr %base, i32 %vl, <vscale x 1 x i1> %mask) { ; CHECK-LABEL: test_vlseg6_mask_nxv1i8_triscv.vector.tuple_nxv1i8_6t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma -; CHECK-NEXT: vlseg6e8.v v7, (a0), v0.t +; CHECK-NEXT: vlseg6e8.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 1 x i8>, 6) @llvm.riscv.vlseg6.mask.triscv.vector.tuple_nxv1i8_6t.nxv1i1(target("riscv.vector.tuple", <vscale x 1 x i8>, 6) undef, ptr %base, <vscale x 1 x i1> %mask, i32 %vl, i32 1, i32 3) - %1 = call <vscale x 1 x i8> @llvm.riscv.tuple.extract.nxv1i8.triscv.vector.tuple_nxv1i8_6t(target("riscv.vector.tuple", <vscale x 1 x i8>, 6) %0, i32 1) - ret <vscale x 1 x i8> %1 + ret target("riscv.vector.tuple", <vscale x 1 x i8>, 6) %0 } - -declare target("riscv.vector.tuple", <vscale x 2 x i8>, 6) @llvm.riscv.vlseg6.triscv.vector.tuple_nxv2i8_6t(target("riscv.vector.tuple", <vscale x 2 x i8>, 6), ptr, i32, i32) -declare target("riscv.vector.tuple", <vscale x 2 x i8>, 6) @llvm.riscv.vlseg6.mask.triscv.vector.tuple_nxv2i8_6t.nxv2i1(target("riscv.vector.tuple", <vscale x 2 x i8>, 6), ptr, <vscale x 2 x i1>, i32, i32, i32) - -define <vscale x 2 x i8> @test_vlseg6_nxv2i8_triscv.vector.tuple_nxv2i8_6t(ptr %base, i32 %vl) { +define target("riscv.vector.tuple", <vscale x 2 x i8>, 6) @test_vlseg6_nxv2i8_triscv.vector.tuple_nxv2i8_6t(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg6_nxv2i8_triscv.vector.tuple_nxv2i8_6t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma -; CHECK-NEXT: vlseg6e8.v v7, (a0) +; CHECK-NEXT: vlseg6e8.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 2 x i8>, 6) @llvm.riscv.vlseg6.triscv.vector.tuple_nxv2i8_6t(target("riscv.vector.tuple", <vscale x 2 x i8>, 6) undef, ptr %base, i32 %vl, i32 3) - %1 = call <vscale x 2 x i8> @llvm.riscv.tuple.extract.nxv2i8.triscv.vector.tuple_nxv2i8_6t(target("riscv.vector.tuple", <vscale x 2 x i8>, 6) %0, i32 1) - ret <vscale x 2 x i8> %1 + ret target("riscv.vector.tuple", <vscale x 2 x i8>, 6) %0 } - -define <vscale x 2 x i8> @test_vlseg6_mask_nxv2i8_triscv.vector.tuple_nxv2i8_6t(ptr %base, i32 %vl, <vscale x 2 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 2 x i8>, 6) @test_vlseg6_mask_nxv2i8_triscv.vector.tuple_nxv2i8_6t(ptr %base, i32 %vl, <vscale x 2 x i1> %mask) { ; CHECK-LABEL: test_vlseg6_mask_nxv2i8_triscv.vector.tuple_nxv2i8_6t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma -; CHECK-NEXT: vlseg6e8.v v7, (a0), v0.t +; CHECK-NEXT: vlseg6e8.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 2 x i8>, 6) @llvm.riscv.vlseg6.mask.triscv.vector.tuple_nxv2i8_6t.nxv2i1(target("riscv.vector.tuple", <vscale x 2 x i8>, 6) undef, ptr %base, <vscale x 2 x i1> %mask, i32 %vl, i32 1, i32 3) - %1 = call <vscale x 2 x i8> @llvm.riscv.tuple.extract.nxv2i8.triscv.vector.tuple_nxv2i8_6t(target("riscv.vector.tuple", <vscale x 2 x i8>, 6) %0, i32 1) - ret <vscale x 2 x i8> %1 + ret target("riscv.vector.tuple", <vscale x 2 x i8>, 6) %0 } - -declare target("riscv.vector.tuple", <vscale x 4 x i8>, 6) @llvm.riscv.vlseg6.triscv.vector.tuple_nxv4i8_6t(target("riscv.vector.tuple", <vscale x 4 x i8>, 6), ptr, i32, i32) -declare target("riscv.vector.tuple", <vscale x 4 x i8>, 6) @llvm.riscv.vlseg6.mask.triscv.vector.tuple_nxv4i8_6t.nxv4i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 6), ptr, <vscale x 4 x i1>, i32, i32, i32) - -define <vscale x 4 x i8> @test_vlseg6_nxv4i8_triscv.vector.tuple_nxv4i8_6t(ptr %base, i32 %vl) { +define target("riscv.vector.tuple", <vscale x 4 x i8>, 6) @test_vlseg6_nxv4i8_triscv.vector.tuple_nxv4i8_6t(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg6_nxv4i8_triscv.vector.tuple_nxv4i8_6t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma -; CHECK-NEXT: vlseg6e8.v v7, (a0) +; CHECK-NEXT: vlseg6e8.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 6) @llvm.riscv.vlseg6.triscv.vector.tuple_nxv4i8_6t(target("riscv.vector.tuple", <vscale x 4 x i8>, 6) undef, ptr %base, i32 %vl, i32 3) - %1 = call <vscale x 4 x i8> @llvm.riscv.tuple.extract.nxv4i8.triscv.vector.tuple_nxv4i8_6t(target("riscv.vector.tuple", <vscale x 4 x i8>, 6) %0, i32 1) - ret <vscale x 4 x i8> %1 + ret target("riscv.vector.tuple", <vscale x 4 x i8>, 6) %0 } - -define <vscale x 4 x i8> @test_vlseg6_mask_nxv4i8_triscv.vector.tuple_nxv4i8_6t(ptr %base, i32 %vl, <vscale x 4 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 4 x i8>, 6) @test_vlseg6_mask_nxv4i8_triscv.vector.tuple_nxv4i8_6t(ptr %base, i32 %vl, <vscale x 4 x i1> %mask) { ; CHECK-LABEL: test_vlseg6_mask_nxv4i8_triscv.vector.tuple_nxv4i8_6t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma -; CHECK-NEXT: vlseg6e8.v v7, (a0), v0.t +; CHECK-NEXT: vlseg6e8.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 6) @llvm.riscv.vlseg6.mask.triscv.vector.tuple_nxv4i8_6t.nxv4i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 6) undef, ptr %base, <vscale x 4 x i1> %mask, i32 %vl, i32 1, i32 3) - %1 = call <vscale x 4 x i8> @llvm.riscv.tuple.extract.nxv4i8.triscv.vector.tuple_nxv4i8_6t(target("riscv.vector.tuple", <vscale x 4 x i8>, 6) %0, i32 1) - ret <vscale x 4 x i8> %1 + ret target("riscv.vector.tuple", <vscale x 4 x i8>, 6) %0 } - -declare target("riscv.vector.tuple", <vscale x 8 x i8>, 6) @llvm.riscv.vlseg6.triscv.vector.tuple_nxv8i8_6t(target("riscv.vector.tuple", <vscale x 8 x i8>, 6), ptr, i32, i32) -declare target("riscv.vector.tuple", <vscale x 8 x i8>, 6) @llvm.riscv.vlseg6.mask.triscv.vector.tuple_nxv8i8_6t.nxv8i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 6), ptr, <vscale x 8 x i1>, i32, i32, i32) - -define <vscale x 8 x i8> @test_vlseg6_nxv8i8_triscv.vector.tuple_nxv8i8_6t(ptr %base, i32 %vl) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 6) @test_vlseg6_nxv8i8_triscv.vector.tuple_nxv8i8_6t(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg6_nxv8i8_triscv.vector.tuple_nxv8i8_6t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma -; CHECK-NEXT: vlseg6e8.v v7, (a0) +; CHECK-NEXT: vlseg6e8.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 6) @llvm.riscv.vlseg6.triscv.vector.tuple_nxv8i8_6t(target("riscv.vector.tuple", <vscale x 8 x i8>, 6) undef, ptr %base, i32 %vl, i32 3) - %1 = call <vscale x 8 x i8> @llvm.riscv.tuple.extract.nxv8i8.triscv.vector.tuple_nxv8i8_6t(target("riscv.vector.tuple", <vscale x 8 x i8>, 6) %0, i32 1) - ret <vscale x 8 x i8> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 6) %0 } - -define <vscale x 8 x i8> @test_vlseg6_mask_nxv8i8_triscv.vector.tuple_nxv8i8_6t(ptr %base, i32 %vl, <vscale x 8 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 6) @test_vlseg6_mask_nxv8i8_triscv.vector.tuple_nxv8i8_6t(ptr %base, i32 %vl, <vscale x 8 x i1> %mask) { ; CHECK-LABEL: test_vlseg6_mask_nxv8i8_triscv.vector.tuple_nxv8i8_6t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma -; CHECK-NEXT: vlseg6e8.v v7, (a0), v0.t +; CHECK-NEXT: vlseg6e8.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 6) @llvm.riscv.vlseg6.mask.triscv.vector.tuple_nxv8i8_6t.nxv8i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 6) undef, ptr %base, <vscale x 8 x i1> %mask, i32 %vl, i32 1, i32 3) - %1 = call <vscale x 8 x i8> @llvm.riscv.tuple.extract.nxv8i8.triscv.vector.tuple_nxv8i8_6t(target("riscv.vector.tuple", <vscale x 8 x i8>, 6) %0, i32 1) - ret <vscale x 8 x i8> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 6) %0 } - -declare target("riscv.vector.tuple", <vscale x 1 x i8>, 7) @llvm.riscv.vlseg7.triscv.vector.tuple_nxv1i8_7t(target("riscv.vector.tuple", <vscale x 1 x i8>, 7), ptr, i32, i32) -declare target("riscv.vector.tuple", <vscale x 1 x i8>, 7) @llvm.riscv.vlseg7.mask.triscv.vector.tuple_nxv1i8_7t.nxv1i1(target("riscv.vector.tuple", <vscale x 1 x i8>, 7), ptr, <vscale x 1 x i1>, i32, i32, i32) - -define <vscale x 1 x i8> @test_vlseg7_nxv1i8_triscv.vector.tuple_nxv1i8_7t(ptr %base, i32 %vl) { +define target("riscv.vector.tuple", <vscale x 1 x i8>, 7) @test_vlseg7_nxv1i8_triscv.vector.tuple_nxv1i8_7t(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg7_nxv1i8_triscv.vector.tuple_nxv1i8_7t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma -; CHECK-NEXT: vlseg7e8.v v7, (a0) +; CHECK-NEXT: vlseg7e8.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 1 x i8>, 7) @llvm.riscv.vlseg7.triscv.vector.tuple_nxv1i8_7t(target("riscv.vector.tuple", <vscale x 1 x i8>, 7) undef, ptr %base, i32 %vl, i32 3) - %1 = call <vscale x 1 x i8> @llvm.riscv.tuple.extract.nxv1i8.triscv.vector.tuple_nxv1i8_7t(target("riscv.vector.tuple", <vscale x 1 x i8>, 7) %0, i32 1) - ret <vscale x 1 x i8> %1 + ret target("riscv.vector.tuple", <vscale x 1 x i8>, 7) %0 } - -define <vscale x 1 x i8> @test_vlseg7_mask_nxv1i8_triscv.vector.tuple_nxv1i8_7t(ptr %base, i32 %vl, <vscale x 1 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 1 x i8>, 7) @test_vlseg7_mask_nxv1i8_triscv.vector.tuple_nxv1i8_7t(ptr %base, i32 %vl, <vscale x 1 x i1> %mask) { ; CHECK-LABEL: test_vlseg7_mask_nxv1i8_triscv.vector.tuple_nxv1i8_7t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma -; CHECK-NEXT: vlseg7e8.v v7, (a0), v0.t +; CHECK-NEXT: vlseg7e8.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 1 x i8>, 7) @llvm.riscv.vlseg7.mask.triscv.vector.tuple_nxv1i8_7t.nxv1i1(target("riscv.vector.tuple", <vscale x 1 x i8>, 7) undef, ptr %base, <vscale x 1 x i1> %mask, i32 %vl, i32 1, i32 3) - %1 = call <vscale x 1 x i8> @llvm.riscv.tuple.extract.nxv1i8.triscv.vector.tuple_nxv1i8_7t(target("riscv.vector.tuple", <vscale x 1 x i8>, 7) %0, i32 1) - ret <vscale x 1 x i8> %1 + ret target("riscv.vector.tuple", <vscale x 1 x i8>, 7) %0 } - -declare target("riscv.vector.tuple", <vscale x 2 x i8>, 7) @llvm.riscv.vlseg7.triscv.vector.tuple_nxv2i8_7t(target("riscv.vector.tuple", <vscale x 2 x i8>, 7), ptr, i32, i32) -declare target("riscv.vector.tuple", <vscale x 2 x i8>, 7) @llvm.riscv.vlseg7.mask.triscv.vector.tuple_nxv2i8_7t.nxv2i1(target("riscv.vector.tuple", <vscale x 2 x i8>, 7), ptr, <vscale x 2 x i1>, i32, i32, i32) - -define <vscale x 2 x i8> @test_vlseg7_nxv2i8_triscv.vector.tuple_nxv2i8_7t(ptr %base, i32 %vl) { +define target("riscv.vector.tuple", <vscale x 2 x i8>, 7) @test_vlseg7_nxv2i8_triscv.vector.tuple_nxv2i8_7t(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg7_nxv2i8_triscv.vector.tuple_nxv2i8_7t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma -; CHECK-NEXT: vlseg7e8.v v7, (a0) +; CHECK-NEXT: vlseg7e8.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 2 x i8>, 7) @llvm.riscv.vlseg7.triscv.vector.tuple_nxv2i8_7t(target("riscv.vector.tuple", <vscale x 2 x i8>, 7) undef, ptr %base, i32 %vl, i32 3) - %1 = call <vscale x 2 x i8> @llvm.riscv.tuple.extract.nxv2i8.triscv.vector.tuple_nxv2i8_7t(target("riscv.vector.tuple", <vscale x 2 x i8>, 7) %0, i32 1) - ret <vscale x 2 x i8> %1 + ret target("riscv.vector.tuple", <vscale x 2 x i8>, 7) %0 } - -define <vscale x 2 x i8> @test_vlseg7_mask_nxv2i8_triscv.vector.tuple_nxv2i8_7t(ptr %base, i32 %vl, <vscale x 2 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 2 x i8>, 7) @test_vlseg7_mask_nxv2i8_triscv.vector.tuple_nxv2i8_7t(ptr %base, i32 %vl, <vscale x 2 x i1> %mask) { ; CHECK-LABEL: test_vlseg7_mask_nxv2i8_triscv.vector.tuple_nxv2i8_7t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma -; CHECK-NEXT: vlseg7e8.v v7, (a0), v0.t +; CHECK-NEXT: vlseg7e8.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 2 x i8>, 7) @llvm.riscv.vlseg7.mask.triscv.vector.tuple_nxv2i8_7t.nxv2i1(target("riscv.vector.tuple", <vscale x 2 x i8>, 7) undef, ptr %base, <vscale x 2 x i1> %mask, i32 %vl, i32 1, i32 3) - %1 = call <vscale x 2 x i8> @llvm.riscv.tuple.extract.nxv2i8.triscv.vector.tuple_nxv2i8_7t(target("riscv.vector.tuple", <vscale x 2 x i8>, 7) %0, i32 1) - ret <vscale x 2 x i8> %1 + ret target("riscv.vector.tuple", <vscale x 2 x i8>, 7) %0 } - -declare target("riscv.vector.tuple", <vscale x 4 x i8>, 7) @llvm.riscv.vlseg7.triscv.vector.tuple_nxv4i8_7t(target("riscv.vector.tuple", <vscale x 4 x i8>, 7), ptr, i32, i32) -declare target("riscv.vector.tuple", <vscale x 4 x i8>, 7) @llvm.riscv.vlseg7.mask.triscv.vector.tuple_nxv4i8_7t.nxv4i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 7), ptr, <vscale x 4 x i1>, i32, i32, i32) - -define <vscale x 4 x i8> @test_vlseg7_nxv4i8_triscv.vector.tuple_nxv4i8_7t(ptr %base, i32 %vl) { +define target("riscv.vector.tuple", <vscale x 4 x i8>, 7) @test_vlseg7_nxv4i8_triscv.vector.tuple_nxv4i8_7t(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg7_nxv4i8_triscv.vector.tuple_nxv4i8_7t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma -; CHECK-NEXT: vlseg7e8.v v7, (a0) +; CHECK-NEXT: vlseg7e8.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 7) @llvm.riscv.vlseg7.triscv.vector.tuple_nxv4i8_7t(target("riscv.vector.tuple", <vscale x 4 x i8>, 7) undef, ptr %base, i32 %vl, i32 3) - %1 = call <vscale x 4 x i8> @llvm.riscv.tuple.extract.nxv4i8.triscv.vector.tuple_nxv4i8_7t(target("riscv.vector.tuple", <vscale x 4 x i8>, 7) %0, i32 1) - ret <vscale x 4 x i8> %1 + ret target("riscv.vector.tuple", <vscale x 4 x i8>, 7) %0 } - -define <vscale x 4 x i8> @test_vlseg7_mask_nxv4i8_triscv.vector.tuple_nxv4i8_7t(ptr %base, i32 %vl, <vscale x 4 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 4 x i8>, 7) @test_vlseg7_mask_nxv4i8_triscv.vector.tuple_nxv4i8_7t(ptr %base, i32 %vl, <vscale x 4 x i1> %mask) { ; CHECK-LABEL: test_vlseg7_mask_nxv4i8_triscv.vector.tuple_nxv4i8_7t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma -; CHECK-NEXT: vlseg7e8.v v7, (a0), v0.t +; CHECK-NEXT: vlseg7e8.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 7) @llvm.riscv.vlseg7.mask.triscv.vector.tuple_nxv4i8_7t.nxv4i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 7) undef, ptr %base, <vscale x 4 x i1> %mask, i32 %vl, i32 1, i32 3) - %1 = call <vscale x 4 x i8> @llvm.riscv.tuple.extract.nxv4i8.triscv.vector.tuple_nxv4i8_7t(target("riscv.vector.tuple", <vscale x 4 x i8>, 7) %0, i32 1) - ret <vscale x 4 x i8> %1 + ret target("riscv.vector.tuple", <vscale x 4 x i8>, 7) %0 } - -declare target("riscv.vector.tuple", <vscale x 8 x i8>, 7) @llvm.riscv.vlseg7.triscv.vector.tuple_nxv8i8_7t(target("riscv.vector.tuple", <vscale x 8 x i8>, 7), ptr, i32, i32) -declare target("riscv.vector.tuple", <vscale x 8 x i8>, 7) @llvm.riscv.vlseg7.mask.triscv.vector.tuple_nxv8i8_7t.nxv8i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 7), ptr, <vscale x 8 x i1>, i32, i32, i32) - -define <vscale x 8 x i8> @test_vlseg7_nxv8i8_triscv.vector.tuple_nxv8i8_7t(ptr %base, i32 %vl) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 7) @test_vlseg7_nxv8i8_triscv.vector.tuple_nxv8i8_7t(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg7_nxv8i8_triscv.vector.tuple_nxv8i8_7t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma -; CHECK-NEXT: vlseg7e8.v v7, (a0) +; CHECK-NEXT: vlseg7e8.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 7) @llvm.riscv.vlseg7.triscv.vector.tuple_nxv8i8_7t(target("riscv.vector.tuple", <vscale x 8 x i8>, 7) undef, ptr %base, i32 %vl, i32 3) - %1 = call <vscale x 8 x i8> @llvm.riscv.tuple.extract.nxv8i8.triscv.vector.tuple_nxv8i8_7t(target("riscv.vector.tuple", <vscale x 8 x i8>, 7) %0, i32 1) - ret <vscale x 8 x i8> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 7) %0 } - -define <vscale x 8 x i8> @test_vlseg7_mask_nxv8i8_triscv.vector.tuple_nxv8i8_7t(ptr %base, i32 %vl, <vscale x 8 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 7) @test_vlseg7_mask_nxv8i8_triscv.vector.tuple_nxv8i8_7t(ptr %base, i32 %vl, <vscale x 8 x i1> %mask) { ; CHECK-LABEL: test_vlseg7_mask_nxv8i8_triscv.vector.tuple_nxv8i8_7t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma -; CHECK-NEXT: vlseg7e8.v v7, (a0), v0.t +; CHECK-NEXT: vlseg7e8.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 7) @llvm.riscv.vlseg7.mask.triscv.vector.tuple_nxv8i8_7t.nxv8i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 7) undef, ptr %base, <vscale x 8 x i1> %mask, i32 %vl, i32 1, i32 3) - %1 = call <vscale x 8 x i8> @llvm.riscv.tuple.extract.nxv8i8.triscv.vector.tuple_nxv8i8_7t(target("riscv.vector.tuple", <vscale x 8 x i8>, 7) %0, i32 1) - ret <vscale x 8 x i8> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 7) %0 } - -declare target("riscv.vector.tuple", <vscale x 1 x i8>, 8) @llvm.riscv.vlseg8.triscv.vector.tuple_nxv1i8_8t(target("riscv.vector.tuple", <vscale x 1 x i8>, 8), ptr, i32, i32) -declare target("riscv.vector.tuple", <vscale x 1 x i8>, 8) @llvm.riscv.vlseg8.mask.triscv.vector.tuple_nxv1i8_8t.nxv1i1(target("riscv.vector.tuple", <vscale x 1 x i8>, 8), ptr, <vscale x 1 x i1>, i32, i32, i32) - -define <vscale x 1 x i8> @test_vlseg8_nxv1i8_triscv.vector.tuple_nxv1i8_8t(ptr %base, i32 %vl) { +define target("riscv.vector.tuple", <vscale x 1 x i8>, 8) @test_vlseg8_nxv1i8_triscv.vector.tuple_nxv1i8_8t(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg8_nxv1i8_triscv.vector.tuple_nxv1i8_8t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma -; CHECK-NEXT: vlseg8e8.v v7, (a0) +; CHECK-NEXT: vlseg8e8.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 1 x i8>, 8) @llvm.riscv.vlseg8.triscv.vector.tuple_nxv1i8_8t(target("riscv.vector.tuple", <vscale x 1 x i8>, 8) undef, ptr %base, i32 %vl, i32 3) - %1 = call <vscale x 1 x i8> @llvm.riscv.tuple.extract.nxv1i8.triscv.vector.tuple_nxv1i8_8t(target("riscv.vector.tuple", <vscale x 1 x i8>, 8) %0, i32 1) - ret <vscale x 1 x i8> %1 + ret target("riscv.vector.tuple", <vscale x 1 x i8>, 8) %0 } - -define <vscale x 1 x i8> @test_vlseg8_mask_nxv1i8_triscv.vector.tuple_nxv1i8_8t(ptr %base, i32 %vl, <vscale x 1 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 1 x i8>, 8) @test_vlseg8_mask_nxv1i8_triscv.vector.tuple_nxv1i8_8t(ptr %base, i32 %vl, <vscale x 1 x i1> %mask) { ; CHECK-LABEL: test_vlseg8_mask_nxv1i8_triscv.vector.tuple_nxv1i8_8t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma -; CHECK-NEXT: vlseg8e8.v v7, (a0), v0.t +; CHECK-NEXT: vlseg8e8.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 1 x i8>, 8) @llvm.riscv.vlseg8.mask.triscv.vector.tuple_nxv1i8_8t.nxv1i1(target("riscv.vector.tuple", <vscale x 1 x i8>, 8) undef, ptr %base, <vscale x 1 x i1> %mask, i32 %vl, i32 1, i32 3) - %1 = call <vscale x 1 x i8> @llvm.riscv.tuple.extract.nxv1i8.triscv.vector.tuple_nxv1i8_8t(target("riscv.vector.tuple", <vscale x 1 x i8>, 8) %0, i32 1) - ret <vscale x 1 x i8> %1 + ret target("riscv.vector.tuple", <vscale x 1 x i8>, 8) %0 } - -declare target("riscv.vector.tuple", <vscale x 2 x i8>, 8) @llvm.riscv.vlseg8.triscv.vector.tuple_nxv2i8_8t(target("riscv.vector.tuple", <vscale x 2 x i8>, 8), ptr, i32, i32) -declare target("riscv.vector.tuple", <vscale x 2 x i8>, 8) @llvm.riscv.vlseg8.mask.triscv.vector.tuple_nxv2i8_8t.nxv2i1(target("riscv.vector.tuple", <vscale x 2 x i8>, 8), ptr, <vscale x 2 x i1>, i32, i32, i32) - -define <vscale x 2 x i8> @test_vlseg8_nxv2i8_triscv.vector.tuple_nxv2i8_8t(ptr %base, i32 %vl) { +define target("riscv.vector.tuple", <vscale x 2 x i8>, 8) @test_vlseg8_nxv2i8_triscv.vector.tuple_nxv2i8_8t(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg8_nxv2i8_triscv.vector.tuple_nxv2i8_8t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma -; CHECK-NEXT: vlseg8e8.v v7, (a0) +; CHECK-NEXT: vlseg8e8.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 2 x i8>, 8) @llvm.riscv.vlseg8.triscv.vector.tuple_nxv2i8_8t(target("riscv.vector.tuple", <vscale x 2 x i8>, 8) undef, ptr %base, i32 %vl, i32 3) - %1 = call <vscale x 2 x i8> @llvm.riscv.tuple.extract.nxv2i8.triscv.vector.tuple_nxv2i8_8t(target("riscv.vector.tuple", <vscale x 2 x i8>, 8) %0, i32 1) - ret <vscale x 2 x i8> %1 + ret target("riscv.vector.tuple", <vscale x 2 x i8>, 8) %0 } - -define <vscale x 2 x i8> @test_vlseg8_mask_nxv2i8_triscv.vector.tuple_nxv2i8_8t(ptr %base, i32 %vl, <vscale x 2 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 2 x i8>, 8) @test_vlseg8_mask_nxv2i8_triscv.vector.tuple_nxv2i8_8t(ptr %base, i32 %vl, <vscale x 2 x i1> %mask) { ; CHECK-LABEL: test_vlseg8_mask_nxv2i8_triscv.vector.tuple_nxv2i8_8t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma -; CHECK-NEXT: vlseg8e8.v v7, (a0), v0.t +; CHECK-NEXT: vlseg8e8.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 2 x i8>, 8) @llvm.riscv.vlseg8.mask.triscv.vector.tuple_nxv2i8_8t.nxv2i1(target("riscv.vector.tuple", <vscale x 2 x i8>, 8) undef, ptr %base, <vscale x 2 x i1> %mask, i32 %vl, i32 1, i32 3) - %1 = call <vscale x 2 x i8> @llvm.riscv.tuple.extract.nxv2i8.triscv.vector.tuple_nxv2i8_8t(target("riscv.vector.tuple", <vscale x 2 x i8>, 8) %0, i32 1) - ret <vscale x 2 x i8> %1 + ret target("riscv.vector.tuple", <vscale x 2 x i8>, 8) %0 } - -declare target("riscv.vector.tuple", <vscale x 4 x i8>, 8) @llvm.riscv.vlseg8.triscv.vector.tuple_nxv4i8_8t(target("riscv.vector.tuple", <vscale x 4 x i8>, 8), ptr, i32, i32) -declare target("riscv.vector.tuple", <vscale x 4 x i8>, 8) @llvm.riscv.vlseg8.mask.triscv.vector.tuple_nxv4i8_8t.nxv4i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 8), ptr, <vscale x 4 x i1>, i32, i32, i32) - -define <vscale x 4 x i8> @test_vlseg8_nxv4i8_triscv.vector.tuple_nxv4i8_8t(ptr %base, i32 %vl) { +define target("riscv.vector.tuple", <vscale x 4 x i8>, 8) @test_vlseg8_nxv4i8_triscv.vector.tuple_nxv4i8_8t(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg8_nxv4i8_triscv.vector.tuple_nxv4i8_8t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma -; CHECK-NEXT: vlseg8e8.v v7, (a0) +; CHECK-NEXT: vlseg8e8.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 8) @llvm.riscv.vlseg8.triscv.vector.tuple_nxv4i8_8t(target("riscv.vector.tuple", <vscale x 4 x i8>, 8) undef, ptr %base, i32 %vl, i32 3) - %1 = call <vscale x 4 x i8> @llvm.riscv.tuple.extract.nxv4i8.triscv.vector.tuple_nxv4i8_8t(target("riscv.vector.tuple", <vscale x 4 x i8>, 8) %0, i32 1) - ret <vscale x 4 x i8> %1 + ret target("riscv.vector.tuple", <vscale x 4 x i8>, 8) %0 } - -define <vscale x 4 x i8> @test_vlseg8_mask_nxv4i8_triscv.vector.tuple_nxv4i8_8t(ptr %base, i32 %vl, <vscale x 4 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 4 x i8>, 8) @test_vlseg8_mask_nxv4i8_triscv.vector.tuple_nxv4i8_8t(ptr %base, i32 %vl, <vscale x 4 x i1> %mask) { ; CHECK-LABEL: test_vlseg8_mask_nxv4i8_triscv.vector.tuple_nxv4i8_8t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma -; CHECK-NEXT: vlseg8e8.v v7, (a0), v0.t +; CHECK-NEXT: vlseg8e8.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 8) @llvm.riscv.vlseg8.mask.triscv.vector.tuple_nxv4i8_8t.nxv4i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 8) undef, ptr %base, <vscale x 4 x i1> %mask, i32 %vl, i32 1, i32 3) - %1 = call <vscale x 4 x i8> @llvm.riscv.tuple.extract.nxv4i8.triscv.vector.tuple_nxv4i8_8t(target("riscv.vector.tuple", <vscale x 4 x i8>, 8) %0, i32 1) - ret <vscale x 4 x i8> %1 + ret target("riscv.vector.tuple", <vscale x 4 x i8>, 8) %0 } - -declare target("riscv.vector.tuple", <vscale x 8 x i8>, 8) @llvm.riscv.vlseg8.triscv.vector.tuple_nxv8i8_8t(target("riscv.vector.tuple", <vscale x 8 x i8>, 8), ptr, i32, i32) -declare target("riscv.vector.tuple", <vscale x 8 x i8>, 8) @llvm.riscv.vlseg8.mask.triscv.vector.tuple_nxv8i8_8t.nxv8i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 8), ptr, <vscale x 8 x i1>, i32, i32, i32) - -define <vscale x 8 x i8> @test_vlseg8_nxv8i8_triscv.vector.tuple_nxv8i8_8t(ptr %base, i32 %vl) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 8) @test_vlseg8_nxv8i8_triscv.vector.tuple_nxv8i8_8t(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg8_nxv8i8_triscv.vector.tuple_nxv8i8_8t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma -; CHECK-NEXT: vlseg8e8.v v7, (a0) +; CHECK-NEXT: vlseg8e8.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 8) @llvm.riscv.vlseg8.triscv.vector.tuple_nxv8i8_8t(target("riscv.vector.tuple", <vscale x 8 x i8>, 8) undef, ptr %base, i32 %vl, i32 3) - %1 = call <vscale x 8 x i8> @llvm.riscv.tuple.extract.nxv8i8.triscv.vector.tuple_nxv8i8_8t(target("riscv.vector.tuple", <vscale x 8 x i8>, 8) %0, i32 1) - ret <vscale x 8 x i8> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 8) %0 } - -define <vscale x 8 x i8> @test_vlseg8_mask_nxv8i8_triscv.vector.tuple_nxv8i8_8t(ptr %base, i32 %vl, <vscale x 8 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 8) @test_vlseg8_mask_nxv8i8_triscv.vector.tuple_nxv8i8_8t(ptr %base, i32 %vl, <vscale x 8 x i1> %mask) { ; CHECK-LABEL: test_vlseg8_mask_nxv8i8_triscv.vector.tuple_nxv8i8_8t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma -; CHECK-NEXT: vlseg8e8.v v7, (a0), v0.t +; CHECK-NEXT: vlseg8e8.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 8) @llvm.riscv.vlseg8.mask.triscv.vector.tuple_nxv8i8_8t.nxv8i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 8) undef, ptr %base, <vscale x 8 x i1> %mask, i32 %vl, i32 1, i32 3) - %1 = call <vscale x 8 x i8> @llvm.riscv.tuple.extract.nxv8i8.triscv.vector.tuple_nxv8i8_8t(target("riscv.vector.tuple", <vscale x 8 x i8>, 8) %0, i32 1) - ret <vscale x 8 x i8> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 8) %0 } - -declare target("riscv.vector.tuple", <vscale x 2 x i8>, 2) @llvm.riscv.vlseg2.mask.triscv.vector.tuple_nxv2i8_2t.nxv1i1(target("riscv.vector.tuple", <vscale x 2 x i8>, 2), ptr, <vscale x 1 x i1>, i32, i32, i32) - -define <vscale x 1 x i16> @test_vlseg2_nxv1i16_triscv.vector.tuple_nxv2i8_2t(ptr %base, i32 %vl) { +define target("riscv.vector.tuple", <vscale x 2 x i8>, 2) @test_vlseg2_nxv1i16_triscv.vector.tuple_nxv2i8_2t(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg2_nxv1i16_triscv.vector.tuple_nxv2i8_2t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma -; CHECK-NEXT: vlseg2e16.v v7, (a0) +; CHECK-NEXT: vlseg2e16.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 2 x i8>, 2) @llvm.riscv.vlseg2.triscv.vector.tuple_nxv2i8_2t(target("riscv.vector.tuple", <vscale x 2 x i8>, 2) undef, ptr %base, i32 %vl, i32 4) - %1 = call <vscale x 1 x i16> @llvm.riscv.tuple.extract.nxv1i16.triscv.vector.tuple_nxv2i8_2t(target("riscv.vector.tuple", <vscale x 2 x i8>, 2) %0, i32 1) - ret <vscale x 1 x i16> %1 + ret target("riscv.vector.tuple", <vscale x 2 x i8>, 2) %0 } - -define <vscale x 1 x i16> @test_vlseg2_mask_nxv1i16_triscv.vector.tuple_nxv2i8_2t(ptr %base, i32 %vl, <vscale x 1 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 2 x i8>, 2) @test_vlseg2_mask_nxv1i16_triscv.vector.tuple_nxv2i8_2t(ptr %base, i32 %vl, <vscale x 1 x i1> %mask) { ; CHECK-LABEL: test_vlseg2_mask_nxv1i16_triscv.vector.tuple_nxv2i8_2t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma -; CHECK-NEXT: vlseg2e16.v v7, (a0), v0.t +; CHECK-NEXT: vlseg2e16.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 2 x i8>, 2) @llvm.riscv.vlseg2.mask.triscv.vector.tuple_nxv2i8_2t.nxv1i1(target("riscv.vector.tuple", <vscale x 2 x i8>, 2) undef, ptr %base, <vscale x 1 x i1> %mask, i32 %vl, i32 1, i32 4) - %1 = call <vscale x 1 x i16> @llvm.riscv.tuple.extract.nxv1i16.triscv.vector.tuple_nxv2i8_2t(target("riscv.vector.tuple", <vscale x 2 x i8>, 2) %0, i32 1) - ret <vscale x 1 x i16> %1 + ret target("riscv.vector.tuple", <vscale x 2 x i8>, 2) %0 } - -declare target("riscv.vector.tuple", <vscale x 4 x i8>, 2) @llvm.riscv.vlseg2.mask.triscv.vector.tuple_nxv4i8_2t.nxv2i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 2), ptr, <vscale x 2 x i1>, i32, i32, i32) - -define <vscale x 2 x i16> @test_vlseg2_nxv2i16_triscv.vector.tuple_nxv4i8_2t(ptr %base, i32 %vl) { +define target("riscv.vector.tuple", <vscale x 4 x i8>, 2) @test_vlseg2_nxv2i16_triscv.vector.tuple_nxv4i8_2t(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg2_nxv2i16_triscv.vector.tuple_nxv4i8_2t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma -; CHECK-NEXT: vlseg2e16.v v7, (a0) +; CHECK-NEXT: vlseg2e16.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 2) @llvm.riscv.vlseg2.triscv.vector.tuple_nxv4i8_2t(target("riscv.vector.tuple", <vscale x 4 x i8>, 2) undef, ptr %base, i32 %vl, i32 4) - %1 = call <vscale x 2 x i16> @llvm.riscv.tuple.extract.nxv2i16.triscv.vector.tuple_nxv4i8_2t(target("riscv.vector.tuple", <vscale x 4 x i8>, 2) %0, i32 1) - ret <vscale x 2 x i16> %1 + ret target("riscv.vector.tuple", <vscale x 4 x i8>, 2) %0 } - -define <vscale x 2 x i16> @test_vlseg2_mask_nxv2i16_triscv.vector.tuple_nxv4i8_2t(ptr %base, i32 %vl, <vscale x 2 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 4 x i8>, 2) @test_vlseg2_mask_nxv2i16_triscv.vector.tuple_nxv4i8_2t(ptr %base, i32 %vl, <vscale x 2 x i1> %mask) { ; CHECK-LABEL: test_vlseg2_mask_nxv2i16_triscv.vector.tuple_nxv4i8_2t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma -; CHECK-NEXT: vlseg2e16.v v7, (a0), v0.t +; CHECK-NEXT: vlseg2e16.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 2) @llvm.riscv.vlseg2.mask.triscv.vector.tuple_nxv4i8_2t.nxv2i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 2) undef, ptr %base, <vscale x 2 x i1> %mask, i32 %vl, i32 1, i32 4) - %1 = call <vscale x 2 x i16> @llvm.riscv.tuple.extract.nxv2i16.triscv.vector.tuple_nxv4i8_2t(target("riscv.vector.tuple", <vscale x 4 x i8>, 2) %0, i32 1) - ret <vscale x 2 x i16> %1 + ret target("riscv.vector.tuple", <vscale x 4 x i8>, 2) %0 } - -declare target("riscv.vector.tuple", <vscale x 8 x i8>, 2) @llvm.riscv.vlseg2.mask.triscv.vector.tuple_nxv8i8_2t.nxv4i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 2), ptr, <vscale x 4 x i1>, i32, i32, i32) - -define <vscale x 4 x i16> @test_vlseg2_nxv4i16_triscv.vector.tuple_nxv8i8_2t(ptr %base, i32 %vl) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 2) @test_vlseg2_nxv4i16_triscv.vector.tuple_nxv8i8_2t(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg2_nxv4i16_triscv.vector.tuple_nxv8i8_2t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma -; CHECK-NEXT: vlseg2e16.v v7, (a0) +; CHECK-NEXT: vlseg2e16.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 2) @llvm.riscv.vlseg2.triscv.vector.tuple_nxv8i8_2t(target("riscv.vector.tuple", <vscale x 8 x i8>, 2) undef, ptr %base, i32 %vl, i32 4) - %1 = call <vscale x 4 x i16> @llvm.riscv.tuple.extract.nxv4i16.triscv.vector.tuple_nxv8i8_2t(target("riscv.vector.tuple", <vscale x 8 x i8>, 2) %0, i32 1) - ret <vscale x 4 x i16> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 2) %0 } - -define <vscale x 4 x i16> @test_vlseg2_mask_nxv4i16_triscv.vector.tuple_nxv8i8_2t(ptr %base, i32 %vl, <vscale x 4 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 2) @test_vlseg2_mask_nxv4i16_triscv.vector.tuple_nxv8i8_2t(ptr %base, i32 %vl, <vscale x 4 x i1> %mask) { ; CHECK-LABEL: test_vlseg2_mask_nxv4i16_triscv.vector.tuple_nxv8i8_2t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma -; CHECK-NEXT: vlseg2e16.v v7, (a0), v0.t +; CHECK-NEXT: vlseg2e16.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 2) @llvm.riscv.vlseg2.mask.triscv.vector.tuple_nxv8i8_2t.nxv4i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 2) undef, ptr %base, <vscale x 4 x i1> %mask, i32 %vl, i32 1, i32 4) - %1 = call <vscale x 4 x i16> @llvm.riscv.tuple.extract.nxv4i16.triscv.vector.tuple_nxv8i8_2t(target("riscv.vector.tuple", <vscale x 8 x i8>, 2) %0, i32 1) - ret <vscale x 4 x i16> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 2) %0 } - -declare target("riscv.vector.tuple", <vscale x 16 x i8>, 2) @llvm.riscv.vlseg2.mask.triscv.vector.tuple_nxv16i8_2t.nxv8i1(target("riscv.vector.tuple", <vscale x 16 x i8>, 2), ptr, <vscale x 8 x i1>, i32, i32, i32) - -define <vscale x 8 x i16> @test_vlseg2_nxv8i16_triscv.vector.tuple_nxv16i8_2t(ptr %base, i32 %vl) { +define target("riscv.vector.tuple", <vscale x 16 x i8>, 2) @test_vlseg2_nxv8i16_triscv.vector.tuple_nxv16i8_2t(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg2_nxv8i16_triscv.vector.tuple_nxv16i8_2t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, ma -; CHECK-NEXT: vlseg2e16.v v6, (a0) +; CHECK-NEXT: vlseg2e16.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 2) @llvm.riscv.vlseg2.triscv.vector.tuple_nxv16i8_2t(target("riscv.vector.tuple", <vscale x 16 x i8>, 2) undef, ptr %base, i32 %vl, i32 4) - %1 = call <vscale x 8 x i16> @llvm.riscv.tuple.extract.nxv8i16.triscv.vector.tuple_nxv16i8_2t(target("riscv.vector.tuple", <vscale x 16 x i8>, 2) %0, i32 1) - ret <vscale x 8 x i16> %1 + ret target("riscv.vector.tuple", <vscale x 16 x i8>, 2) %0 } - -define <vscale x 8 x i16> @test_vlseg2_mask_nxv8i16_triscv.vector.tuple_nxv16i8_2t(ptr %base, i32 %vl, <vscale x 8 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 16 x i8>, 2) @test_vlseg2_mask_nxv8i16_triscv.vector.tuple_nxv16i8_2t(ptr %base, i32 %vl, <vscale x 8 x i1> %mask) { ; CHECK-LABEL: test_vlseg2_mask_nxv8i16_triscv.vector.tuple_nxv16i8_2t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, ma -; CHECK-NEXT: vlseg2e16.v v6, (a0), v0.t +; CHECK-NEXT: vlseg2e16.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 2) @llvm.riscv.vlseg2.mask.triscv.vector.tuple_nxv16i8_2t.nxv8i1(target("riscv.vector.tuple", <vscale x 16 x i8>, 2) undef, ptr %base, <vscale x 8 x i1> %mask, i32 %vl, i32 1, i32 4) - %1 = call <vscale x 8 x i16> @llvm.riscv.tuple.extract.nxv8i16.triscv.vector.tuple_nxv16i8_2t(target("riscv.vector.tuple", <vscale x 16 x i8>, 2) %0, i32 1) - ret <vscale x 8 x i16> %1 + ret target("riscv.vector.tuple", <vscale x 16 x i8>, 2) %0 } - -declare target("riscv.vector.tuple", <vscale x 32 x i8>, 2) @llvm.riscv.vlseg2.mask.triscv.vector.tuple_nxv32i8_2t.nxv16i1(target("riscv.vector.tuple", <vscale x 32 x i8>, 2), ptr, <vscale x 16 x i1>, i32, i32, i32) - -define <vscale x 16 x i16> @test_vlseg2_nxv16i16_triscv.vector.tuple_nxv32i8_2t(ptr %base, i32 %vl) { +define target("riscv.vector.tuple", <vscale x 32 x i8>, 2) @test_vlseg2_nxv16i16_triscv.vector.tuple_nxv32i8_2t(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg2_nxv16i16_triscv.vector.tuple_nxv32i8_2t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma -; CHECK-NEXT: vlseg2e16.v v4, (a0) +; CHECK-NEXT: vlseg2e16.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 32 x i8>, 2) @llvm.riscv.vlseg2.triscv.vector.tuple_nxv32i8_2t(target("riscv.vector.tuple", <vscale x 32 x i8>, 2) undef, ptr %base, i32 %vl, i32 4) - %1 = call <vscale x 16 x i16> @llvm.riscv.tuple.extract.nxv16i16.triscv.vector.tuple_nxv32i8_2t(target("riscv.vector.tuple", <vscale x 32 x i8>, 2) %0, i32 1) - ret <vscale x 16 x i16> %1 + ret target("riscv.vector.tuple", <vscale x 32 x i8>, 2) %0 } - -define <vscale x 16 x i16> @test_vlseg2_mask_nxv16i16_triscv.vector.tuple_nxv32i8_2t(ptr %base, i32 %vl, <vscale x 16 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 32 x i8>, 2) @test_vlseg2_mask_nxv16i16_triscv.vector.tuple_nxv32i8_2t(ptr %base, i32 %vl, <vscale x 16 x i1> %mask) { ; CHECK-LABEL: test_vlseg2_mask_nxv16i16_triscv.vector.tuple_nxv32i8_2t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma -; CHECK-NEXT: vlseg2e16.v v4, (a0), v0.t +; CHECK-NEXT: vlseg2e16.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 32 x i8>, 2) @llvm.riscv.vlseg2.mask.triscv.vector.tuple_nxv32i8_2t.nxv16i1(target("riscv.vector.tuple", <vscale x 32 x i8>, 2) undef, ptr %base, <vscale x 16 x i1> %mask, i32 %vl, i32 1, i32 4) - %1 = call <vscale x 16 x i16> @llvm.riscv.tuple.extract.nxv16i16.triscv.vector.tuple_nxv32i8_2t(target("riscv.vector.tuple", <vscale x 32 x i8>, 2) %0, i32 1) - ret <vscale x 16 x i16> %1 + ret target("riscv.vector.tuple", <vscale x 32 x i8>, 2) %0 } - -declare target("riscv.vector.tuple", <vscale x 2 x i8>, 3) @llvm.riscv.vlseg3.mask.triscv.vector.tuple_nxv2i8_3t.nxv1i1(target("riscv.vector.tuple", <vscale x 2 x i8>, 3), ptr, <vscale x 1 x i1>, i32, i32, i32) - -define <vscale x 1 x i16> @test_vlseg3_nxv1i16_triscv.vector.tuple_nxv2i8_3t(ptr %base, i32 %vl) { +define target("riscv.vector.tuple", <vscale x 2 x i8>, 3) @test_vlseg3_nxv1i16_triscv.vector.tuple_nxv2i8_3t(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg3_nxv1i16_triscv.vector.tuple_nxv2i8_3t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma -; CHECK-NEXT: vlseg3e16.v v7, (a0) +; CHECK-NEXT: vlseg3e16.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 2 x i8>, 3) @llvm.riscv.vlseg3.triscv.vector.tuple_nxv2i8_3t(target("riscv.vector.tuple", <vscale x 2 x i8>, 3) undef, ptr %base, i32 %vl, i32 4) - %1 = call <vscale x 1 x i16> @llvm.riscv.tuple.extract.nxv1i16.triscv.vector.tuple_nxv2i8_3t(target("riscv.vector.tuple", <vscale x 2 x i8>, 3) %0, i32 1) - ret <vscale x 1 x i16> %1 + ret target("riscv.vector.tuple", <vscale x 2 x i8>, 3) %0 } - -define <vscale x 1 x i16> @test_vlseg3_mask_nxv1i16_triscv.vector.tuple_nxv2i8_3t(ptr %base, i32 %vl, <vscale x 1 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 2 x i8>, 3) @test_vlseg3_mask_nxv1i16_triscv.vector.tuple_nxv2i8_3t(ptr %base, i32 %vl, <vscale x 1 x i1> %mask) { ; CHECK-LABEL: test_vlseg3_mask_nxv1i16_triscv.vector.tuple_nxv2i8_3t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma -; CHECK-NEXT: vlseg3e16.v v7, (a0), v0.t +; CHECK-NEXT: vlseg3e16.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 2 x i8>, 3) @llvm.riscv.vlseg3.mask.triscv.vector.tuple_nxv2i8_3t.nxv1i1(target("riscv.vector.tuple", <vscale x 2 x i8>, 3) undef, ptr %base, <vscale x 1 x i1> %mask, i32 %vl, i32 1, i32 4) - %1 = call <vscale x 1 x i16> @llvm.riscv.tuple.extract.nxv1i16.triscv.vector.tuple_nxv2i8_3t(target("riscv.vector.tuple", <vscale x 2 x i8>, 3) %0, i32 1) - ret <vscale x 1 x i16> %1 + ret target("riscv.vector.tuple", <vscale x 2 x i8>, 3) %0 } - -declare target("riscv.vector.tuple", <vscale x 4 x i8>, 3) @llvm.riscv.vlseg3.mask.triscv.vector.tuple_nxv4i8_3t.nxv2i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 3), ptr, <vscale x 2 x i1>, i32, i32, i32) - -define <vscale x 2 x i16> @test_vlseg3_nxv2i16_triscv.vector.tuple_nxv4i8_3t(ptr %base, i32 %vl) { +define target("riscv.vector.tuple", <vscale x 4 x i8>, 3) @test_vlseg3_nxv2i16_triscv.vector.tuple_nxv4i8_3t(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg3_nxv2i16_triscv.vector.tuple_nxv4i8_3t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma -; CHECK-NEXT: vlseg3e16.v v7, (a0) +; CHECK-NEXT: vlseg3e16.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 3) @llvm.riscv.vlseg3.triscv.vector.tuple_nxv4i8_3t(target("riscv.vector.tuple", <vscale x 4 x i8>, 3) undef, ptr %base, i32 %vl, i32 4) - %1 = call <vscale x 2 x i16> @llvm.riscv.tuple.extract.nxv2i16.triscv.vector.tuple_nxv4i8_3t(target("riscv.vector.tuple", <vscale x 4 x i8>, 3) %0, i32 1) - ret <vscale x 2 x i16> %1 + ret target("riscv.vector.tuple", <vscale x 4 x i8>, 3) %0 } - -define <vscale x 2 x i16> @test_vlseg3_mask_nxv2i16_triscv.vector.tuple_nxv4i8_3t(ptr %base, i32 %vl, <vscale x 2 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 4 x i8>, 3) @test_vlseg3_mask_nxv2i16_triscv.vector.tuple_nxv4i8_3t(ptr %base, i32 %vl, <vscale x 2 x i1> %mask) { ; CHECK-LABEL: test_vlseg3_mask_nxv2i16_triscv.vector.tuple_nxv4i8_3t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma -; CHECK-NEXT: vlseg3e16.v v7, (a0), v0.t +; CHECK-NEXT: vlseg3e16.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 3) @llvm.riscv.vlseg3.mask.triscv.vector.tuple_nxv4i8_3t.nxv2i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 3) undef, ptr %base, <vscale x 2 x i1> %mask, i32 %vl, i32 1, i32 4) - %1 = call <vscale x 2 x i16> @llvm.riscv.tuple.extract.nxv2i16.triscv.vector.tuple_nxv4i8_3t(target("riscv.vector.tuple", <vscale x 4 x i8>, 3) %0, i32 1) - ret <vscale x 2 x i16> %1 + ret target("riscv.vector.tuple", <vscale x 4 x i8>, 3) %0 } - -declare target("riscv.vector.tuple", <vscale x 8 x i8>, 3) @llvm.riscv.vlseg3.mask.triscv.vector.tuple_nxv8i8_3t.nxv4i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 3), ptr, <vscale x 4 x i1>, i32, i32, i32) - -define <vscale x 4 x i16> @test_vlseg3_nxv4i16_triscv.vector.tuple_nxv8i8_3t(ptr %base, i32 %vl) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 3) @test_vlseg3_nxv4i16_triscv.vector.tuple_nxv8i8_3t(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg3_nxv4i16_triscv.vector.tuple_nxv8i8_3t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma -; CHECK-NEXT: vlseg3e16.v v7, (a0) +; CHECK-NEXT: vlseg3e16.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 3) @llvm.riscv.vlseg3.triscv.vector.tuple_nxv8i8_3t(target("riscv.vector.tuple", <vscale x 8 x i8>, 3) undef, ptr %base, i32 %vl, i32 4) - %1 = call <vscale x 4 x i16> @llvm.riscv.tuple.extract.nxv4i16.triscv.vector.tuple_nxv8i8_3t(target("riscv.vector.tuple", <vscale x 8 x i8>, 3) %0, i32 1) - ret <vscale x 4 x i16> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 3) %0 } - -define <vscale x 4 x i16> @test_vlseg3_mask_nxv4i16_triscv.vector.tuple_nxv8i8_3t(ptr %base, i32 %vl, <vscale x 4 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 3) @test_vlseg3_mask_nxv4i16_triscv.vector.tuple_nxv8i8_3t(ptr %base, i32 %vl, <vscale x 4 x i1> %mask) { ; CHECK-LABEL: test_vlseg3_mask_nxv4i16_triscv.vector.tuple_nxv8i8_3t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma -; CHECK-NEXT: vlseg3e16.v v7, (a0), v0.t +; CHECK-NEXT: vlseg3e16.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 3) @llvm.riscv.vlseg3.mask.triscv.vector.tuple_nxv8i8_3t.nxv4i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 3) undef, ptr %base, <vscale x 4 x i1> %mask, i32 %vl, i32 1, i32 4) - %1 = call <vscale x 4 x i16> @llvm.riscv.tuple.extract.nxv4i16.triscv.vector.tuple_nxv8i8_3t(target("riscv.vector.tuple", <vscale x 8 x i8>, 3) %0, i32 1) - ret <vscale x 4 x i16> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 3) %0 } - -declare target("riscv.vector.tuple", <vscale x 16 x i8>, 3) @llvm.riscv.vlseg3.mask.triscv.vector.tuple_nxv16i8_3t.nxv8i1(target("riscv.vector.tuple", <vscale x 16 x i8>, 3), ptr, <vscale x 8 x i1>, i32, i32, i32) - -define <vscale x 8 x i16> @test_vlseg3_nxv8i16_triscv.vector.tuple_nxv16i8_3t(ptr %base, i32 %vl) { +define target("riscv.vector.tuple", <vscale x 16 x i8>, 3) @test_vlseg3_nxv8i16_triscv.vector.tuple_nxv16i8_3t(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg3_nxv8i16_triscv.vector.tuple_nxv16i8_3t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, ma -; CHECK-NEXT: vlseg3e16.v v6, (a0) +; CHECK-NEXT: vlseg3e16.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 3) @llvm.riscv.vlseg3.triscv.vector.tuple_nxv16i8_3t(target("riscv.vector.tuple", <vscale x 16 x i8>, 3) undef, ptr %base, i32 %vl, i32 4) - %1 = call <vscale x 8 x i16> @llvm.riscv.tuple.extract.nxv8i16.triscv.vector.tuple_nxv16i8_3t(target("riscv.vector.tuple", <vscale x 16 x i8>, 3) %0, i32 1) - ret <vscale x 8 x i16> %1 + ret target("riscv.vector.tuple", <vscale x 16 x i8>, 3) %0 } - -define <vscale x 8 x i16> @test_vlseg3_mask_nxv8i16_triscv.vector.tuple_nxv16i8_3t(ptr %base, i32 %vl, <vscale x 8 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 16 x i8>, 3) @test_vlseg3_mask_nxv8i16_triscv.vector.tuple_nxv16i8_3t(ptr %base, i32 %vl, <vscale x 8 x i1> %mask) { ; CHECK-LABEL: test_vlseg3_mask_nxv8i16_triscv.vector.tuple_nxv16i8_3t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, ma -; CHECK-NEXT: vlseg3e16.v v6, (a0), v0.t +; CHECK-NEXT: vlseg3e16.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 3) @llvm.riscv.vlseg3.mask.triscv.vector.tuple_nxv16i8_3t.nxv8i1(target("riscv.vector.tuple", <vscale x 16 x i8>, 3) undef, ptr %base, <vscale x 8 x i1> %mask, i32 %vl, i32 1, i32 4) - %1 = call <vscale x 8 x i16> @llvm.riscv.tuple.extract.nxv8i16.triscv.vector.tuple_nxv16i8_3t(target("riscv.vector.tuple", <vscale x 16 x i8>, 3) %0, i32 1) - ret <vscale x 8 x i16> %1 + ret target("riscv.vector.tuple", <vscale x 16 x i8>, 3) %0 } - -declare target("riscv.vector.tuple", <vscale x 2 x i8>, 4) @llvm.riscv.vlseg4.mask.triscv.vector.tuple_nxv2i8_4t.nxv1i1(target("riscv.vector.tuple", <vscale x 2 x i8>, 4), ptr, <vscale x 1 x i1>, i32, i32, i32) - -define <vscale x 1 x i16> @test_vlseg4_nxv1i16_triscv.vector.tuple_nxv2i8_4t(ptr %base, i32 %vl) { +define target("riscv.vector.tuple", <vscale x 2 x i8>, 4) @test_vlseg4_nxv1i16_triscv.vector.tuple_nxv2i8_4t(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg4_nxv1i16_triscv.vector.tuple_nxv2i8_4t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma -; CHECK-NEXT: vlseg4e16.v v7, (a0) +; CHECK-NEXT: vlseg4e16.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 2 x i8>, 4) @llvm.riscv.vlseg4.triscv.vector.tuple_nxv2i8_4t(target("riscv.vector.tuple", <vscale x 2 x i8>, 4) undef, ptr %base, i32 %vl, i32 4) - %1 = call <vscale x 1 x i16> @llvm.riscv.tuple.extract.nxv1i16.triscv.vector.tuple_nxv2i8_4t(target("riscv.vector.tuple", <vscale x 2 x i8>, 4) %0, i32 1) - ret <vscale x 1 x i16> %1 + ret target("riscv.vector.tuple", <vscale x 2 x i8>, 4) %0 } - -define <vscale x 1 x i16> @test_vlseg4_mask_nxv1i16_triscv.vector.tuple_nxv2i8_4t(ptr %base, i32 %vl, <vscale x 1 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 2 x i8>, 4) @test_vlseg4_mask_nxv1i16_triscv.vector.tuple_nxv2i8_4t(ptr %base, i32 %vl, <vscale x 1 x i1> %mask) { ; CHECK-LABEL: test_vlseg4_mask_nxv1i16_triscv.vector.tuple_nxv2i8_4t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma -; CHECK-NEXT: vlseg4e16.v v7, (a0), v0.t +; CHECK-NEXT: vlseg4e16.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 2 x i8>, 4) @llvm.riscv.vlseg4.mask.triscv.vector.tuple_nxv2i8_4t.nxv1i1(target("riscv.vector.tuple", <vscale x 2 x i8>, 4) undef, ptr %base, <vscale x 1 x i1> %mask, i32 %vl, i32 1, i32 4) - %1 = call <vscale x 1 x i16> @llvm.riscv.tuple.extract.nxv1i16.triscv.vector.tuple_nxv2i8_4t(target("riscv.vector.tuple", <vscale x 2 x i8>, 4) %0, i32 1) - ret <vscale x 1 x i16> %1 + ret target("riscv.vector.tuple", <vscale x 2 x i8>, 4) %0 } - -declare target("riscv.vector.tuple", <vscale x 4 x i8>, 4) @llvm.riscv.vlseg4.mask.triscv.vector.tuple_nxv4i8_4t.nxv2i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 4), ptr, <vscale x 2 x i1>, i32, i32, i32) - -define <vscale x 2 x i16> @test_vlseg4_nxv2i16_triscv.vector.tuple_nxv4i8_4t(ptr %base, i32 %vl) { +define target("riscv.vector.tuple", <vscale x 4 x i8>, 4) @test_vlseg4_nxv2i16_triscv.vector.tuple_nxv4i8_4t(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg4_nxv2i16_triscv.vector.tuple_nxv4i8_4t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma -; CHECK-NEXT: vlseg4e16.v v7, (a0) +; CHECK-NEXT: vlseg4e16.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 4) @llvm.riscv.vlseg4.triscv.vector.tuple_nxv4i8_4t(target("riscv.vector.tuple", <vscale x 4 x i8>, 4) undef, ptr %base, i32 %vl, i32 4) - %1 = call <vscale x 2 x i16> @llvm.riscv.tuple.extract.nxv2i16.triscv.vector.tuple_nxv4i8_4t(target("riscv.vector.tuple", <vscale x 4 x i8>, 4) %0, i32 1) - ret <vscale x 2 x i16> %1 + ret target("riscv.vector.tuple", <vscale x 4 x i8>, 4) %0 } - -define <vscale x 2 x i16> @test_vlseg4_mask_nxv2i16_triscv.vector.tuple_nxv4i8_4t(ptr %base, i32 %vl, <vscale x 2 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 4 x i8>, 4) @test_vlseg4_mask_nxv2i16_triscv.vector.tuple_nxv4i8_4t(ptr %base, i32 %vl, <vscale x 2 x i1> %mask) { ; CHECK-LABEL: test_vlseg4_mask_nxv2i16_triscv.vector.tuple_nxv4i8_4t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma -; CHECK-NEXT: vlseg4e16.v v7, (a0), v0.t +; CHECK-NEXT: vlseg4e16.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 4) @llvm.riscv.vlseg4.mask.triscv.vector.tuple_nxv4i8_4t.nxv2i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 4) undef, ptr %base, <vscale x 2 x i1> %mask, i32 %vl, i32 1, i32 4) - %1 = call <vscale x 2 x i16> @llvm.riscv.tuple.extract.nxv2i16.triscv.vector.tuple_nxv4i8_4t(target("riscv.vector.tuple", <vscale x 4 x i8>, 4) %0, i32 1) - ret <vscale x 2 x i16> %1 + ret target("riscv.vector.tuple", <vscale x 4 x i8>, 4) %0 } - -declare target("riscv.vector.tuple", <vscale x 8 x i8>, 4) @llvm.riscv.vlseg4.mask.triscv.vector.tuple_nxv8i8_4t.nxv4i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 4), ptr, <vscale x 4 x i1>, i32, i32, i32) - -define <vscale x 4 x i16> @test_vlseg4_nxv4i16_triscv.vector.tuple_nxv8i8_4t(ptr %base, i32 %vl) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 4) @test_vlseg4_nxv4i16_triscv.vector.tuple_nxv8i8_4t(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg4_nxv4i16_triscv.vector.tuple_nxv8i8_4t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma -; CHECK-NEXT: vlseg4e16.v v7, (a0) +; CHECK-NEXT: vlseg4e16.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 4) @llvm.riscv.vlseg4.triscv.vector.tuple_nxv8i8_4t(target("riscv.vector.tuple", <vscale x 8 x i8>, 4) undef, ptr %base, i32 %vl, i32 4) - %1 = call <vscale x 4 x i16> @llvm.riscv.tuple.extract.nxv4i16.triscv.vector.tuple_nxv8i8_4t(target("riscv.vector.tuple", <vscale x 8 x i8>, 4) %0, i32 1) - ret <vscale x 4 x i16> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 4) %0 } - -define <vscale x 4 x i16> @test_vlseg4_mask_nxv4i16_triscv.vector.tuple_nxv8i8_4t(ptr %base, i32 %vl, <vscale x 4 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 4) @test_vlseg4_mask_nxv4i16_triscv.vector.tuple_nxv8i8_4t(ptr %base, i32 %vl, <vscale x 4 x i1> %mask) { ; CHECK-LABEL: test_vlseg4_mask_nxv4i16_triscv.vector.tuple_nxv8i8_4t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma -; CHECK-NEXT: vlseg4e16.v v7, (a0), v0.t +; CHECK-NEXT: vlseg4e16.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 4) @llvm.riscv.vlseg4.mask.triscv.vector.tuple_nxv8i8_4t.nxv4i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 4) undef, ptr %base, <vscale x 4 x i1> %mask, i32 %vl, i32 1, i32 4) - %1 = call <vscale x 4 x i16> @llvm.riscv.tuple.extract.nxv4i16.triscv.vector.tuple_nxv8i8_4t(target("riscv.vector.tuple", <vscale x 8 x i8>, 4) %0, i32 1) - ret <vscale x 4 x i16> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 4) %0 } - -declare target("riscv.vector.tuple", <vscale x 16 x i8>, 4) @llvm.riscv.vlseg4.mask.triscv.vector.tuple_nxv16i8_4t.nxv8i1(target("riscv.vector.tuple", <vscale x 16 x i8>, 4), ptr, <vscale x 8 x i1>, i32, i32, i32) - -define <vscale x 8 x i16> @test_vlseg4_nxv8i16_triscv.vector.tuple_nxv16i8_4t(ptr %base, i32 %vl) { +define target("riscv.vector.tuple", <vscale x 16 x i8>, 4) @test_vlseg4_nxv8i16_triscv.vector.tuple_nxv16i8_4t(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg4_nxv8i16_triscv.vector.tuple_nxv16i8_4t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, ma -; CHECK-NEXT: vlseg4e16.v v6, (a0) +; CHECK-NEXT: vlseg4e16.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 4) @llvm.riscv.vlseg4.triscv.vector.tuple_nxv16i8_4t(target("riscv.vector.tuple", <vscale x 16 x i8>, 4) undef, ptr %base, i32 %vl, i32 4) - %1 = call <vscale x 8 x i16> @llvm.riscv.tuple.extract.nxv8i16.triscv.vector.tuple_nxv16i8_4t(target("riscv.vector.tuple", <vscale x 16 x i8>, 4) %0, i32 1) - ret <vscale x 8 x i16> %1 + ret target("riscv.vector.tuple", <vscale x 16 x i8>, 4) %0 } - -define <vscale x 8 x i16> @test_vlseg4_mask_nxv8i16_triscv.vector.tuple_nxv16i8_4t(ptr %base, i32 %vl, <vscale x 8 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 16 x i8>, 4) @test_vlseg4_mask_nxv8i16_triscv.vector.tuple_nxv16i8_4t(ptr %base, i32 %vl, <vscale x 8 x i1> %mask) { ; CHECK-LABEL: test_vlseg4_mask_nxv8i16_triscv.vector.tuple_nxv16i8_4t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, ma -; CHECK-NEXT: vlseg4e16.v v6, (a0), v0.t +; CHECK-NEXT: vlseg4e16.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 4) @llvm.riscv.vlseg4.mask.triscv.vector.tuple_nxv16i8_4t.nxv8i1(target("riscv.vector.tuple", <vscale x 16 x i8>, 4) undef, ptr %base, <vscale x 8 x i1> %mask, i32 %vl, i32 1, i32 4) - %1 = call <vscale x 8 x i16> @llvm.riscv.tuple.extract.nxv8i16.triscv.vector.tuple_nxv16i8_4t(target("riscv.vector.tuple", <vscale x 16 x i8>, 4) %0, i32 1) - ret <vscale x 8 x i16> %1 + ret target("riscv.vector.tuple", <vscale x 16 x i8>, 4) %0 } - -declare target("riscv.vector.tuple", <vscale x 2 x i8>, 5) @llvm.riscv.vlseg5.mask.triscv.vector.tuple_nxv2i8_5t.nxv1i1(target("riscv.vector.tuple", <vscale x 2 x i8>, 5), ptr, <vscale x 1 x i1>, i32, i32, i32) - -define <vscale x 1 x i16> @test_vlseg5_nxv1i16_triscv.vector.tuple_nxv2i8_5t(ptr %base, i32 %vl) { +define target("riscv.vector.tuple", <vscale x 2 x i8>, 5) @test_vlseg5_nxv1i16_triscv.vector.tuple_nxv2i8_5t(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg5_nxv1i16_triscv.vector.tuple_nxv2i8_5t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma -; CHECK-NEXT: vlseg5e16.v v7, (a0) +; CHECK-NEXT: vlseg5e16.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 2 x i8>, 5) @llvm.riscv.vlseg5.triscv.vector.tuple_nxv2i8_5t(target("riscv.vector.tuple", <vscale x 2 x i8>, 5) undef, ptr %base, i32 %vl, i32 4) - %1 = call <vscale x 1 x i16> @llvm.riscv.tuple.extract.nxv1i16.triscv.vector.tuple_nxv2i8_5t(target("riscv.vector.tuple", <vscale x 2 x i8>, 5) %0, i32 1) - ret <vscale x 1 x i16> %1 + ret target("riscv.vector.tuple", <vscale x 2 x i8>, 5) %0 } - -define <vscale x 1 x i16> @test_vlseg5_mask_nxv1i16_triscv.vector.tuple_nxv2i8_5t(ptr %base, i32 %vl, <vscale x 1 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 2 x i8>, 5) @test_vlseg5_mask_nxv1i16_triscv.vector.tuple_nxv2i8_5t(ptr %base, i32 %vl, <vscale x 1 x i1> %mask) { ; CHECK-LABEL: test_vlseg5_mask_nxv1i16_triscv.vector.tuple_nxv2i8_5t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma -; CHECK-NEXT: vlseg5e16.v v7, (a0), v0.t +; CHECK-NEXT: vlseg5e16.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 2 x i8>, 5) @llvm.riscv.vlseg5.mask.triscv.vector.tuple_nxv2i8_5t.nxv1i1(target("riscv.vector.tuple", <vscale x 2 x i8>, 5) undef, ptr %base, <vscale x 1 x i1> %mask, i32 %vl, i32 1, i32 4) - %1 = call <vscale x 1 x i16> @llvm.riscv.tuple.extract.nxv1i16.triscv.vector.tuple_nxv2i8_5t(target("riscv.vector.tuple", <vscale x 2 x i8>, 5) %0, i32 1) - ret <vscale x 1 x i16> %1 + ret target("riscv.vector.tuple", <vscale x 2 x i8>, 5) %0 } - -declare target("riscv.vector.tuple", <vscale x 4 x i8>, 5) @llvm.riscv.vlseg5.mask.triscv.vector.tuple_nxv4i8_5t.nxv2i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 5), ptr, <vscale x 2 x i1>, i32, i32, i32) - -define <vscale x 2 x i16> @test_vlseg5_nxv2i16_triscv.vector.tuple_nxv4i8_5t(ptr %base, i32 %vl) { +define target("riscv.vector.tuple", <vscale x 4 x i8>, 5) @test_vlseg5_nxv2i16_triscv.vector.tuple_nxv4i8_5t(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg5_nxv2i16_triscv.vector.tuple_nxv4i8_5t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma -; CHECK-NEXT: vlseg5e16.v v7, (a0) +; CHECK-NEXT: vlseg5e16.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 5) @llvm.riscv.vlseg5.triscv.vector.tuple_nxv4i8_5t(target("riscv.vector.tuple", <vscale x 4 x i8>, 5) undef, ptr %base, i32 %vl, i32 4) - %1 = call <vscale x 2 x i16> @llvm.riscv.tuple.extract.nxv2i16.triscv.vector.tuple_nxv4i8_5t(target("riscv.vector.tuple", <vscale x 4 x i8>, 5) %0, i32 1) - ret <vscale x 2 x i16> %1 + ret target("riscv.vector.tuple", <vscale x 4 x i8>, 5) %0 } - -define <vscale x 2 x i16> @test_vlseg5_mask_nxv2i16_triscv.vector.tuple_nxv4i8_5t(ptr %base, i32 %vl, <vscale x 2 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 4 x i8>, 5) @test_vlseg5_mask_nxv2i16_triscv.vector.tuple_nxv4i8_5t(ptr %base, i32 %vl, <vscale x 2 x i1> %mask) { ; CHECK-LABEL: test_vlseg5_mask_nxv2i16_triscv.vector.tuple_nxv4i8_5t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma -; CHECK-NEXT: vlseg5e16.v v7, (a0), v0.t +; CHECK-NEXT: vlseg5e16.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 5) @llvm.riscv.vlseg5.mask.triscv.vector.tuple_nxv4i8_5t.nxv2i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 5) undef, ptr %base, <vscale x 2 x i1> %mask, i32 %vl, i32 1, i32 4) - %1 = call <vscale x 2 x i16> @llvm.riscv.tuple.extract.nxv2i16.triscv.vector.tuple_nxv4i8_5t(target("riscv.vector.tuple", <vscale x 4 x i8>, 5) %0, i32 1) - ret <vscale x 2 x i16> %1 + ret target("riscv.vector.tuple", <vscale x 4 x i8>, 5) %0 } - -declare target("riscv.vector.tuple", <vscale x 8 x i8>, 5) @llvm.riscv.vlseg5.mask.triscv.vector.tuple_nxv8i8_5t.nxv4i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 5), ptr, <vscale x 4 x i1>, i32, i32, i32) - -define <vscale x 4 x i16> @test_vlseg5_nxv4i16_triscv.vector.tuple_nxv8i8_5t(ptr %base, i32 %vl) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 5) @test_vlseg5_nxv4i16_triscv.vector.tuple_nxv8i8_5t(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg5_nxv4i16_triscv.vector.tuple_nxv8i8_5t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma -; CHECK-NEXT: vlseg5e16.v v7, (a0) +; CHECK-NEXT: vlseg5e16.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 5) @llvm.riscv.vlseg5.triscv.vector.tuple_nxv8i8_5t(target("riscv.vector.tuple", <vscale x 8 x i8>, 5) undef, ptr %base, i32 %vl, i32 4) - %1 = call <vscale x 4 x i16> @llvm.riscv.tuple.extract.nxv4i16.triscv.vector.tuple_nxv8i8_5t(target("riscv.vector.tuple", <vscale x 8 x i8>, 5) %0, i32 1) - ret <vscale x 4 x i16> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 5) %0 } - -define <vscale x 4 x i16> @test_vlseg5_mask_nxv4i16_triscv.vector.tuple_nxv8i8_5t(ptr %base, i32 %vl, <vscale x 4 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 5) @test_vlseg5_mask_nxv4i16_triscv.vector.tuple_nxv8i8_5t(ptr %base, i32 %vl, <vscale x 4 x i1> %mask) { ; CHECK-LABEL: test_vlseg5_mask_nxv4i16_triscv.vector.tuple_nxv8i8_5t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma -; CHECK-NEXT: vlseg5e16.v v7, (a0), v0.t +; CHECK-NEXT: vlseg5e16.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 5) @llvm.riscv.vlseg5.mask.triscv.vector.tuple_nxv8i8_5t.nxv4i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 5) undef, ptr %base, <vscale x 4 x i1> %mask, i32 %vl, i32 1, i32 4) - %1 = call <vscale x 4 x i16> @llvm.riscv.tuple.extract.nxv4i16.triscv.vector.tuple_nxv8i8_5t(target("riscv.vector.tuple", <vscale x 8 x i8>, 5) %0, i32 1) - ret <vscale x 4 x i16> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 5) %0 } - -declare target("riscv.vector.tuple", <vscale x 2 x i8>, 6) @llvm.riscv.vlseg6.mask.triscv.vector.tuple_nxv2i8_6t.nxv1i1(target("riscv.vector.tuple", <vscale x 2 x i8>, 6), ptr, <vscale x 1 x i1>, i32, i32, i32) - -define <vscale x 1 x i16> @test_vlseg6_nxv1i16_triscv.vector.tuple_nxv2i8_6t(ptr %base, i32 %vl) { +define target("riscv.vector.tuple", <vscale x 2 x i8>, 6) @test_vlseg6_nxv1i16_triscv.vector.tuple_nxv2i8_6t(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg6_nxv1i16_triscv.vector.tuple_nxv2i8_6t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma -; CHECK-NEXT: vlseg6e16.v v7, (a0) +; CHECK-NEXT: vlseg6e16.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 2 x i8>, 6) @llvm.riscv.vlseg6.triscv.vector.tuple_nxv2i8_6t(target("riscv.vector.tuple", <vscale x 2 x i8>, 6) undef, ptr %base, i32 %vl, i32 4) - %1 = call <vscale x 1 x i16> @llvm.riscv.tuple.extract.nxv1i16.triscv.vector.tuple_nxv2i8_6t(target("riscv.vector.tuple", <vscale x 2 x i8>, 6) %0, i32 1) - ret <vscale x 1 x i16> %1 + ret target("riscv.vector.tuple", <vscale x 2 x i8>, 6) %0 } - -define <vscale x 1 x i16> @test_vlseg6_mask_nxv1i16_triscv.vector.tuple_nxv2i8_6t(ptr %base, i32 %vl, <vscale x 1 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 2 x i8>, 6) @test_vlseg6_mask_nxv1i16_triscv.vector.tuple_nxv2i8_6t(ptr %base, i32 %vl, <vscale x 1 x i1> %mask) { ; CHECK-LABEL: test_vlseg6_mask_nxv1i16_triscv.vector.tuple_nxv2i8_6t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma -; CHECK-NEXT: vlseg6e16.v v7, (a0), v0.t +; CHECK-NEXT: vlseg6e16.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 2 x i8>, 6) @llvm.riscv.vlseg6.mask.triscv.vector.tuple_nxv2i8_6t.nxv1i1(target("riscv.vector.tuple", <vscale x 2 x i8>, 6) undef, ptr %base, <vscale x 1 x i1> %mask, i32 %vl, i32 1, i32 4) - %1 = call <vscale x 1 x i16> @llvm.riscv.tuple.extract.nxv1i16.triscv.vector.tuple_nxv2i8_6t(target("riscv.vector.tuple", <vscale x 2 x i8>, 6) %0, i32 1) - ret <vscale x 1 x i16> %1 + ret target("riscv.vector.tuple", <vscale x 2 x i8>, 6) %0 } - -declare target("riscv.vector.tuple", <vscale x 4 x i8>, 6) @llvm.riscv.vlseg6.mask.triscv.vector.tuple_nxv4i8_6t.nxv2i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 6), ptr, <vscale x 2 x i1>, i32, i32, i32) - -define <vscale x 2 x i16> @test_vlseg6_nxv2i16_triscv.vector.tuple_nxv4i8_6t(ptr %base, i32 %vl) { +define target("riscv.vector.tuple", <vscale x 4 x i8>, 6) @test_vlseg6_nxv2i16_triscv.vector.tuple_nxv4i8_6t(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg6_nxv2i16_triscv.vector.tuple_nxv4i8_6t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma -; CHECK-NEXT: vlseg6e16.v v7, (a0) +; CHECK-NEXT: vlseg6e16.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 6) @llvm.riscv.vlseg6.triscv.vector.tuple_nxv4i8_6t(target("riscv.vector.tuple", <vscale x 4 x i8>, 6) undef, ptr %base, i32 %vl, i32 4) - %1 = call <vscale x 2 x i16> @llvm.riscv.tuple.extract.nxv2i16.triscv.vector.tuple_nxv4i8_6t(target("riscv.vector.tuple", <vscale x 4 x i8>, 6) %0, i32 1) - ret <vscale x 2 x i16> %1 + ret target("riscv.vector.tuple", <vscale x 4 x i8>, 6) %0 } - -define <vscale x 2 x i16> @test_vlseg6_mask_nxv2i16_triscv.vector.tuple_nxv4i8_6t(ptr %base, i32 %vl, <vscale x 2 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 4 x i8>, 6) @test_vlseg6_mask_nxv2i16_triscv.vector.tuple_nxv4i8_6t(ptr %base, i32 %vl, <vscale x 2 x i1> %mask) { ; CHECK-LABEL: test_vlseg6_mask_nxv2i16_triscv.vector.tuple_nxv4i8_6t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma -; CHECK-NEXT: vlseg6e16.v v7, (a0), v0.t +; CHECK-NEXT: vlseg6e16.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 6) @llvm.riscv.vlseg6.mask.triscv.vector.tuple_nxv4i8_6t.nxv2i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 6) undef, ptr %base, <vscale x 2 x i1> %mask, i32 %vl, i32 1, i32 4) - %1 = call <vscale x 2 x i16> @llvm.riscv.tuple.extract.nxv2i16.triscv.vector.tuple_nxv4i8_6t(target("riscv.vector.tuple", <vscale x 4 x i8>, 6) %0, i32 1) - ret <vscale x 2 x i16> %1 + ret target("riscv.vector.tuple", <vscale x 4 x i8>, 6) %0 } - -declare target("riscv.vector.tuple", <vscale x 8 x i8>, 6) @llvm.riscv.vlseg6.mask.triscv.vector.tuple_nxv8i8_6t.nxv4i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 6), ptr, <vscale x 4 x i1>, i32, i32, i32) - -define <vscale x 4 x i16> @test_vlseg6_nxv4i16_triscv.vector.tuple_nxv8i8_6t(ptr %base, i32 %vl) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 6) @test_vlseg6_nxv4i16_triscv.vector.tuple_nxv8i8_6t(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg6_nxv4i16_triscv.vector.tuple_nxv8i8_6t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma -; CHECK-NEXT: vlseg6e16.v v7, (a0) +; CHECK-NEXT: vlseg6e16.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 6) @llvm.riscv.vlseg6.triscv.vector.tuple_nxv8i8_6t(target("riscv.vector.tuple", <vscale x 8 x i8>, 6) undef, ptr %base, i32 %vl, i32 4) - %1 = call <vscale x 4 x i16> @llvm.riscv.tuple.extract.nxv4i16.triscv.vector.tuple_nxv8i8_6t(target("riscv.vector.tuple", <vscale x 8 x i8>, 6) %0, i32 1) - ret <vscale x 4 x i16> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 6) %0 } - -define <vscale x 4 x i16> @test_vlseg6_mask_nxv4i16_triscv.vector.tuple_nxv8i8_6t(ptr %base, i32 %vl, <vscale x 4 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 6) @test_vlseg6_mask_nxv4i16_triscv.vector.tuple_nxv8i8_6t(ptr %base, i32 %vl, <vscale x 4 x i1> %mask) { ; CHECK-LABEL: test_vlseg6_mask_nxv4i16_triscv.vector.tuple_nxv8i8_6t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma -; CHECK-NEXT: vlseg6e16.v v7, (a0), v0.t +; CHECK-NEXT: vlseg6e16.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 6) @llvm.riscv.vlseg6.mask.triscv.vector.tuple_nxv8i8_6t.nxv4i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 6) undef, ptr %base, <vscale x 4 x i1> %mask, i32 %vl, i32 1, i32 4) - %1 = call <vscale x 4 x i16> @llvm.riscv.tuple.extract.nxv4i16.triscv.vector.tuple_nxv8i8_6t(target("riscv.vector.tuple", <vscale x 8 x i8>, 6) %0, i32 1) - ret <vscale x 4 x i16> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 6) %0 } - -declare target("riscv.vector.tuple", <vscale x 2 x i8>, 7) @llvm.riscv.vlseg7.mask.triscv.vector.tuple_nxv2i8_7t.nxv1i1(target("riscv.vector.tuple", <vscale x 2 x i8>, 7), ptr, <vscale x 1 x i1>, i32, i32, i32) - -define <vscale x 1 x i16> @test_vlseg7_nxv1i16_triscv.vector.tuple_nxv2i8_7t(ptr %base, i32 %vl) { +define target("riscv.vector.tuple", <vscale x 2 x i8>, 7) @test_vlseg7_nxv1i16_triscv.vector.tuple_nxv2i8_7t(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg7_nxv1i16_triscv.vector.tuple_nxv2i8_7t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma -; CHECK-NEXT: vlseg7e16.v v7, (a0) +; CHECK-NEXT: vlseg7e16.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 2 x i8>, 7) @llvm.riscv.vlseg7.triscv.vector.tuple_nxv2i8_7t(target("riscv.vector.tuple", <vscale x 2 x i8>, 7) undef, ptr %base, i32 %vl, i32 4) - %1 = call <vscale x 1 x i16> @llvm.riscv.tuple.extract.nxv1i16.triscv.vector.tuple_nxv2i8_7t(target("riscv.vector.tuple", <vscale x 2 x i8>, 7) %0, i32 1) - ret <vscale x 1 x i16> %1 + ret target("riscv.vector.tuple", <vscale x 2 x i8>, 7) %0 } - -define <vscale x 1 x i16> @test_vlseg7_mask_nxv1i16_triscv.vector.tuple_nxv2i8_7t(ptr %base, i32 %vl, <vscale x 1 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 2 x i8>, 7) @test_vlseg7_mask_nxv1i16_triscv.vector.tuple_nxv2i8_7t(ptr %base, i32 %vl, <vscale x 1 x i1> %mask) { ; CHECK-LABEL: test_vlseg7_mask_nxv1i16_triscv.vector.tuple_nxv2i8_7t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma -; CHECK-NEXT: vlseg7e16.v v7, (a0), v0.t +; CHECK-NEXT: vlseg7e16.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 2 x i8>, 7) @llvm.riscv.vlseg7.mask.triscv.vector.tuple_nxv2i8_7t.nxv1i1(target("riscv.vector.tuple", <vscale x 2 x i8>, 7) undef, ptr %base, <vscale x 1 x i1> %mask, i32 %vl, i32 1, i32 4) - %1 = call <vscale x 1 x i16> @llvm.riscv.tuple.extract.nxv1i16.triscv.vector.tuple_nxv2i8_7t(target("riscv.vector.tuple", <vscale x 2 x i8>, 7) %0, i32 1) - ret <vscale x 1 x i16> %1 + ret target("riscv.vector.tuple", <vscale x 2 x i8>, 7) %0 } - -declare target("riscv.vector.tuple", <vscale x 4 x i8>, 7) @llvm.riscv.vlseg7.mask.triscv.vector.tuple_nxv4i8_7t.nxv2i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 7), ptr, <vscale x 2 x i1>, i32, i32, i32) - -define <vscale x 2 x i16> @test_vlseg7_nxv2i16_triscv.vector.tuple_nxv4i8_7t(ptr %base, i32 %vl) { +define target("riscv.vector.tuple", <vscale x 4 x i8>, 7) @test_vlseg7_nxv2i16_triscv.vector.tuple_nxv4i8_7t(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg7_nxv2i16_triscv.vector.tuple_nxv4i8_7t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma -; CHECK-NEXT: vlseg7e16.v v7, (a0) +; CHECK-NEXT: vlseg7e16.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 7) @llvm.riscv.vlseg7.triscv.vector.tuple_nxv4i8_7t(target("riscv.vector.tuple", <vscale x 4 x i8>, 7) undef, ptr %base, i32 %vl, i32 4) - %1 = call <vscale x 2 x i16> @llvm.riscv.tuple.extract.nxv2i16.triscv.vector.tuple_nxv4i8_7t(target("riscv.vector.tuple", <vscale x 4 x i8>, 7) %0, i32 1) - ret <vscale x 2 x i16> %1 + ret target("riscv.vector.tuple", <vscale x 4 x i8>, 7) %0 } - -define <vscale x 2 x i16> @test_vlseg7_mask_nxv2i16_triscv.vector.tuple_nxv4i8_7t(ptr %base, i32 %vl, <vscale x 2 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 4 x i8>, 7) @test_vlseg7_mask_nxv2i16_triscv.vector.tuple_nxv4i8_7t(ptr %base, i32 %vl, <vscale x 2 x i1> %mask) { ; CHECK-LABEL: test_vlseg7_mask_nxv2i16_triscv.vector.tuple_nxv4i8_7t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma -; CHECK-NEXT: vlseg7e16.v v7, (a0), v0.t +; CHECK-NEXT: vlseg7e16.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 7) @llvm.riscv.vlseg7.mask.triscv.vector.tuple_nxv4i8_7t.nxv2i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 7) undef, ptr %base, <vscale x 2 x i1> %mask, i32 %vl, i32 1, i32 4) - %1 = call <vscale x 2 x i16> @llvm.riscv.tuple.extract.nxv2i16.triscv.vector.tuple_nxv4i8_7t(target("riscv.vector.tuple", <vscale x 4 x i8>, 7) %0, i32 1) - ret <vscale x 2 x i16> %1 + ret target("riscv.vector.tuple", <vscale x 4 x i8>, 7) %0 } - -declare target("riscv.vector.tuple", <vscale x 8 x i8>, 7) @llvm.riscv.vlseg7.mask.triscv.vector.tuple_nxv8i8_7t.nxv4i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 7), ptr, <vscale x 4 x i1>, i32, i32, i32) - -define <vscale x 4 x i16> @test_vlseg7_nxv4i16_triscv.vector.tuple_nxv8i8_7t(ptr %base, i32 %vl) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 7) @test_vlseg7_nxv4i16_triscv.vector.tuple_nxv8i8_7t(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg7_nxv4i16_triscv.vector.tuple_nxv8i8_7t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma -; CHECK-NEXT: vlseg7e16.v v7, (a0) +; CHECK-NEXT: vlseg7e16.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 7) @llvm.riscv.vlseg7.triscv.vector.tuple_nxv8i8_7t(target("riscv.vector.tuple", <vscale x 8 x i8>, 7) undef, ptr %base, i32 %vl, i32 4) - %1 = call <vscale x 4 x i16> @llvm.riscv.tuple.extract.nxv4i16.triscv.vector.tuple_nxv8i8_7t(target("riscv.vector.tuple", <vscale x 8 x i8>, 7) %0, i32 1) - ret <vscale x 4 x i16> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 7) %0 } - -define <vscale x 4 x i16> @test_vlseg7_mask_nxv4i16_triscv.vector.tuple_nxv8i8_7t(ptr %base, i32 %vl, <vscale x 4 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 7) @test_vlseg7_mask_nxv4i16_triscv.vector.tuple_nxv8i8_7t(ptr %base, i32 %vl, <vscale x 4 x i1> %mask) { ; CHECK-LABEL: test_vlseg7_mask_nxv4i16_triscv.vector.tuple_nxv8i8_7t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma -; CHECK-NEXT: vlseg7e16.v v7, (a0), v0.t +; CHECK-NEXT: vlseg7e16.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 7) @llvm.riscv.vlseg7.mask.triscv.vector.tuple_nxv8i8_7t.nxv4i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 7) undef, ptr %base, <vscale x 4 x i1> %mask, i32 %vl, i32 1, i32 4) - %1 = call <vscale x 4 x i16> @llvm.riscv.tuple.extract.nxv4i16.triscv.vector.tuple_nxv8i8_7t(target("riscv.vector.tuple", <vscale x 8 x i8>, 7) %0, i32 1) - ret <vscale x 4 x i16> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 7) %0 } - -declare target("riscv.vector.tuple", <vscale x 2 x i8>, 8) @llvm.riscv.vlseg8.mask.triscv.vector.tuple_nxv2i8_8t.nxv1i1(target("riscv.vector.tuple", <vscale x 2 x i8>, 8), ptr, <vscale x 1 x i1>, i32, i32, i32) - -define <vscale x 1 x i16> @test_vlseg8_nxv1i16_triscv.vector.tuple_nxv2i8_8t(ptr %base, i32 %vl) { +define target("riscv.vector.tuple", <vscale x 2 x i8>, 8) @test_vlseg8_nxv1i16_triscv.vector.tuple_nxv2i8_8t(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg8_nxv1i16_triscv.vector.tuple_nxv2i8_8t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma -; CHECK-NEXT: vlseg8e16.v v7, (a0) +; CHECK-NEXT: vlseg8e16.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 2 x i8>, 8) @llvm.riscv.vlseg8.triscv.vector.tuple_nxv2i8_8t(target("riscv.vector.tuple", <vscale x 2 x i8>, 8) undef, ptr %base, i32 %vl, i32 4) - %1 = call <vscale x 1 x i16> @llvm.riscv.tuple.extract.nxv1i16.triscv.vector.tuple_nxv2i8_8t(target("riscv.vector.tuple", <vscale x 2 x i8>, 8) %0, i32 1) - ret <vscale x 1 x i16> %1 + ret target("riscv.vector.tuple", <vscale x 2 x i8>, 8) %0 } - -define <vscale x 1 x i16> @test_vlseg8_mask_nxv1i16_triscv.vector.tuple_nxv2i8_8t(ptr %base, i32 %vl, <vscale x 1 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 2 x i8>, 8) @test_vlseg8_mask_nxv1i16_triscv.vector.tuple_nxv2i8_8t(ptr %base, i32 %vl, <vscale x 1 x i1> %mask) { ; CHECK-LABEL: test_vlseg8_mask_nxv1i16_triscv.vector.tuple_nxv2i8_8t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma -; CHECK-NEXT: vlseg8e16.v v7, (a0), v0.t +; CHECK-NEXT: vlseg8e16.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 2 x i8>, 8) @llvm.riscv.vlseg8.mask.triscv.vector.tuple_nxv2i8_8t.nxv1i1(target("riscv.vector.tuple", <vscale x 2 x i8>, 8) undef, ptr %base, <vscale x 1 x i1> %mask, i32 %vl, i32 1, i32 4) - %1 = call <vscale x 1 x i16> @llvm.riscv.tuple.extract.nxv1i16.triscv.vector.tuple_nxv2i8_8t(target("riscv.vector.tuple", <vscale x 2 x i8>, 8) %0, i32 1) - ret <vscale x 1 x i16> %1 + ret target("riscv.vector.tuple", <vscale x 2 x i8>, 8) %0 } - -declare target("riscv.vector.tuple", <vscale x 4 x i8>, 8) @llvm.riscv.vlseg8.mask.triscv.vector.tuple_nxv4i8_8t.nxv2i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 8), ptr, <vscale x 2 x i1>, i32, i32, i32) - -define <vscale x 2 x i16> @test_vlseg8_nxv2i16_triscv.vector.tuple_nxv4i8_8t(ptr %base, i32 %vl) { +define target("riscv.vector.tuple", <vscale x 4 x i8>, 8) @test_vlseg8_nxv2i16_triscv.vector.tuple_nxv4i8_8t(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg8_nxv2i16_triscv.vector.tuple_nxv4i8_8t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma -; CHECK-NEXT: vlseg8e16.v v7, (a0) +; CHECK-NEXT: vlseg8e16.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 8) @llvm.riscv.vlseg8.triscv.vector.tuple_nxv4i8_8t(target("riscv.vector.tuple", <vscale x 4 x i8>, 8) undef, ptr %base, i32 %vl, i32 4) - %1 = call <vscale x 2 x i16> @llvm.riscv.tuple.extract.nxv2i16.triscv.vector.tuple_nxv4i8_8t(target("riscv.vector.tuple", <vscale x 4 x i8>, 8) %0, i32 1) - ret <vscale x 2 x i16> %1 + ret target("riscv.vector.tuple", <vscale x 4 x i8>, 8) %0 } - -define <vscale x 2 x i16> @test_vlseg8_mask_nxv2i16_triscv.vector.tuple_nxv4i8_8t(ptr %base, i32 %vl, <vscale x 2 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 4 x i8>, 8) @test_vlseg8_mask_nxv2i16_triscv.vector.tuple_nxv4i8_8t(ptr %base, i32 %vl, <vscale x 2 x i1> %mask) { ; CHECK-LABEL: test_vlseg8_mask_nxv2i16_triscv.vector.tuple_nxv4i8_8t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma -; CHECK-NEXT: vlseg8e16.v v7, (a0), v0.t +; CHECK-NEXT: vlseg8e16.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 8) @llvm.riscv.vlseg8.mask.triscv.vector.tuple_nxv4i8_8t.nxv2i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 8) undef, ptr %base, <vscale x 2 x i1> %mask, i32 %vl, i32 1, i32 4) - %1 = call <vscale x 2 x i16> @llvm.riscv.tuple.extract.nxv2i16.triscv.vector.tuple_nxv4i8_8t(target("riscv.vector.tuple", <vscale x 4 x i8>, 8) %0, i32 1) - ret <vscale x 2 x i16> %1 + ret target("riscv.vector.tuple", <vscale x 4 x i8>, 8) %0 } - -declare target("riscv.vector.tuple", <vscale x 8 x i8>, 8) @llvm.riscv.vlseg8.mask.triscv.vector.tuple_nxv8i8_8t.nxv4i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 8), ptr, <vscale x 4 x i1>, i32, i32, i32) - -define <vscale x 4 x i16> @test_vlseg8_nxv4i16_triscv.vector.tuple_nxv8i8_8t(ptr %base, i32 %vl) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 8) @test_vlseg8_nxv4i16_triscv.vector.tuple_nxv8i8_8t(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg8_nxv4i16_triscv.vector.tuple_nxv8i8_8t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma -; CHECK-NEXT: vlseg8e16.v v7, (a0) +; CHECK-NEXT: vlseg8e16.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 8) @llvm.riscv.vlseg8.triscv.vector.tuple_nxv8i8_8t(target("riscv.vector.tuple", <vscale x 8 x i8>, 8) undef, ptr %base, i32 %vl, i32 4) - %1 = call <vscale x 4 x i16> @llvm.riscv.tuple.extract.nxv4i16.triscv.vector.tuple_nxv8i8_8t(target("riscv.vector.tuple", <vscale x 8 x i8>, 8) %0, i32 1) - ret <vscale x 4 x i16> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 8) %0 } - -define <vscale x 4 x i16> @test_vlseg8_mask_nxv4i16_triscv.vector.tuple_nxv8i8_8t(ptr %base, i32 %vl, <vscale x 4 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 8) @test_vlseg8_mask_nxv4i16_triscv.vector.tuple_nxv8i8_8t(ptr %base, i32 %vl, <vscale x 4 x i1> %mask) { ; CHECK-LABEL: test_vlseg8_mask_nxv4i16_triscv.vector.tuple_nxv8i8_8t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma -; CHECK-NEXT: vlseg8e16.v v7, (a0), v0.t +; CHECK-NEXT: vlseg8e16.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 8) @llvm.riscv.vlseg8.mask.triscv.vector.tuple_nxv8i8_8t.nxv4i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 8) undef, ptr %base, <vscale x 4 x i1> %mask, i32 %vl, i32 1, i32 4) - %1 = call <vscale x 4 x i16> @llvm.riscv.tuple.extract.nxv4i16.triscv.vector.tuple_nxv8i8_8t(target("riscv.vector.tuple", <vscale x 8 x i8>, 8) %0, i32 1) - ret <vscale x 4 x i16> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 8) %0 } - -declare target("riscv.vector.tuple", <vscale x 4 x i8>, 2) @llvm.riscv.vlseg2.mask.triscv.vector.tuple_nxv4i8_2t.nxv1i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 2), ptr, <vscale x 1 x i1>, i32, i32, i32) - -define <vscale x 1 x i32> @test_vlseg2_nxv1i32_triscv.vector.tuple_nxv4i8_2t(ptr %base, i32 %vl) { +define target("riscv.vector.tuple", <vscale x 4 x i8>, 2) @test_vlseg2_nxv1i32_triscv.vector.tuple_nxv4i8_2t(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg2_nxv1i32_triscv.vector.tuple_nxv4i8_2t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma -; CHECK-NEXT: vlseg2e32.v v7, (a0) +; CHECK-NEXT: vlseg2e32.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 2) @llvm.riscv.vlseg2.triscv.vector.tuple_nxv4i8_2t(target("riscv.vector.tuple", <vscale x 4 x i8>, 2) undef, ptr %base, i32 %vl, i32 5) - %1 = call <vscale x 1 x i32> @llvm.riscv.tuple.extract.nxv1i32.triscv.vector.tuple_nxv4i8_2t(target("riscv.vector.tuple", <vscale x 4 x i8>, 2) %0, i32 1) - ret <vscale x 1 x i32> %1 + ret target("riscv.vector.tuple", <vscale x 4 x i8>, 2) %0 } - -define <vscale x 1 x i32> @test_vlseg2_mask_nxv1i32_triscv.vector.tuple_nxv4i8_2t(ptr %base, i32 %vl, <vscale x 1 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 4 x i8>, 2) @test_vlseg2_mask_nxv1i32_triscv.vector.tuple_nxv4i8_2t(ptr %base, i32 %vl, <vscale x 1 x i1> %mask) { ; CHECK-LABEL: test_vlseg2_mask_nxv1i32_triscv.vector.tuple_nxv4i8_2t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma -; CHECK-NEXT: vlseg2e32.v v7, (a0), v0.t +; CHECK-NEXT: vlseg2e32.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 2) @llvm.riscv.vlseg2.mask.triscv.vector.tuple_nxv4i8_2t.nxv1i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 2) undef, ptr %base, <vscale x 1 x i1> %mask, i32 %vl, i32 1, i32 5) - %1 = call <vscale x 1 x i32> @llvm.riscv.tuple.extract.nxv1i32.triscv.vector.tuple_nxv4i8_2t(target("riscv.vector.tuple", <vscale x 4 x i8>, 2) %0, i32 1) - ret <vscale x 1 x i32> %1 + ret target("riscv.vector.tuple", <vscale x 4 x i8>, 2) %0 } - -declare target("riscv.vector.tuple", <vscale x 8 x i8>, 2) @llvm.riscv.vlseg2.mask.triscv.vector.tuple_nxv8i8_2t.nxv2i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 2), ptr, <vscale x 2 x i1>, i32, i32, i32) - -define <vscale x 2 x i32> @test_vlseg2_nxv2i32_triscv.vector.tuple_nxv8i8_2t(ptr %base, i32 %vl) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 2) @test_vlseg2_nxv2i32_triscv.vector.tuple_nxv8i8_2t(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg2_nxv2i32_triscv.vector.tuple_nxv8i8_2t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma -; CHECK-NEXT: vlseg2e32.v v7, (a0) +; CHECK-NEXT: vlseg2e32.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 2) @llvm.riscv.vlseg2.triscv.vector.tuple_nxv8i8_2t(target("riscv.vector.tuple", <vscale x 8 x i8>, 2) undef, ptr %base, i32 %vl, i32 5) - %1 = call <vscale x 2 x i32> @llvm.riscv.tuple.extract.nxv2i32.triscv.vector.tuple_nxv8i8_2t(target("riscv.vector.tuple", <vscale x 8 x i8>, 2) %0, i32 1) - ret <vscale x 2 x i32> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 2) %0 } - -define <vscale x 2 x i32> @test_vlseg2_mask_nxv2i32_triscv.vector.tuple_nxv8i8_2t(ptr %base, i32 %vl, <vscale x 2 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 2) @test_vlseg2_mask_nxv2i32_triscv.vector.tuple_nxv8i8_2t(ptr %base, i32 %vl, <vscale x 2 x i1> %mask) { ; CHECK-LABEL: test_vlseg2_mask_nxv2i32_triscv.vector.tuple_nxv8i8_2t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma -; CHECK-NEXT: vlseg2e32.v v7, (a0), v0.t +; CHECK-NEXT: vlseg2e32.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 2) @llvm.riscv.vlseg2.mask.triscv.vector.tuple_nxv8i8_2t.nxv2i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 2) undef, ptr %base, <vscale x 2 x i1> %mask, i32 %vl, i32 1, i32 5) - %1 = call <vscale x 2 x i32> @llvm.riscv.tuple.extract.nxv2i32.triscv.vector.tuple_nxv8i8_2t(target("riscv.vector.tuple", <vscale x 8 x i8>, 2) %0, i32 1) - ret <vscale x 2 x i32> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 2) %0 } - -declare target("riscv.vector.tuple", <vscale x 16 x i8>, 2) @llvm.riscv.vlseg2.mask.triscv.vector.tuple_nxv16i8_2t.nxv4i1(target("riscv.vector.tuple", <vscale x 16 x i8>, 2), ptr, <vscale x 4 x i1>, i32, i32, i32) - -define <vscale x 4 x i32> @test_vlseg2_nxv4i32_triscv.vector.tuple_nxv16i8_2t(ptr %base, i32 %vl) { +define target("riscv.vector.tuple", <vscale x 16 x i8>, 2) @test_vlseg2_nxv4i32_triscv.vector.tuple_nxv16i8_2t(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg2_nxv4i32_triscv.vector.tuple_nxv16i8_2t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma -; CHECK-NEXT: vlseg2e32.v v6, (a0) +; CHECK-NEXT: vlseg2e32.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 2) @llvm.riscv.vlseg2.triscv.vector.tuple_nxv16i8_2t(target("riscv.vector.tuple", <vscale x 16 x i8>, 2) undef, ptr %base, i32 %vl, i32 5) - %1 = call <vscale x 4 x i32> @llvm.riscv.tuple.extract.nxv4i32.triscv.vector.tuple_nxv16i8_2t(target("riscv.vector.tuple", <vscale x 16 x i8>, 2) %0, i32 1) - ret <vscale x 4 x i32> %1 + ret target("riscv.vector.tuple", <vscale x 16 x i8>, 2) %0 } - -define <vscale x 4 x i32> @test_vlseg2_mask_nxv4i32_triscv.vector.tuple_nxv16i8_2t(ptr %base, i32 %vl, <vscale x 4 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 16 x i8>, 2) @test_vlseg2_mask_nxv4i32_triscv.vector.tuple_nxv16i8_2t(ptr %base, i32 %vl, <vscale x 4 x i1> %mask) { ; CHECK-LABEL: test_vlseg2_mask_nxv4i32_triscv.vector.tuple_nxv16i8_2t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma -; CHECK-NEXT: vlseg2e32.v v6, (a0), v0.t +; CHECK-NEXT: vlseg2e32.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 2) @llvm.riscv.vlseg2.mask.triscv.vector.tuple_nxv16i8_2t.nxv4i1(target("riscv.vector.tuple", <vscale x 16 x i8>, 2) undef, ptr %base, <vscale x 4 x i1> %mask, i32 %vl, i32 1, i32 5) - %1 = call <vscale x 4 x i32> @llvm.riscv.tuple.extract.nxv4i32.triscv.vector.tuple_nxv16i8_2t(target("riscv.vector.tuple", <vscale x 16 x i8>, 2) %0, i32 1) - ret <vscale x 4 x i32> %1 + ret target("riscv.vector.tuple", <vscale x 16 x i8>, 2) %0 } - -declare target("riscv.vector.tuple", <vscale x 32 x i8>, 2) @llvm.riscv.vlseg2.mask.triscv.vector.tuple_nxv32i8_2t.nxv8i1(target("riscv.vector.tuple", <vscale x 32 x i8>, 2), ptr, <vscale x 8 x i1>, i32, i32, i32) - -define <vscale x 8 x i32> @test_vlseg2_nxv8i32_triscv.vector.tuple_nxv32i8_2t(ptr %base, i32 %vl) { +define target("riscv.vector.tuple", <vscale x 32 x i8>, 2) @test_vlseg2_nxv8i32_triscv.vector.tuple_nxv32i8_2t(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg2_nxv8i32_triscv.vector.tuple_nxv32i8_2t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, ma -; CHECK-NEXT: vlseg2e32.v v4, (a0) +; CHECK-NEXT: vlseg2e32.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 32 x i8>, 2) @llvm.riscv.vlseg2.triscv.vector.tuple_nxv32i8_2t(target("riscv.vector.tuple", <vscale x 32 x i8>, 2) undef, ptr %base, i32 %vl, i32 5) - %1 = call <vscale x 8 x i32> @llvm.riscv.tuple.extract.nxv8i32.triscv.vector.tuple_nxv32i8_2t(target("riscv.vector.tuple", <vscale x 32 x i8>, 2) %0, i32 1) - ret <vscale x 8 x i32> %1 + ret target("riscv.vector.tuple", <vscale x 32 x i8>, 2) %0 } - -define <vscale x 8 x i32> @test_vlseg2_mask_nxv8i32_triscv.vector.tuple_nxv32i8_2t(ptr %base, i32 %vl, <vscale x 8 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 32 x i8>, 2) @test_vlseg2_mask_nxv8i32_triscv.vector.tuple_nxv32i8_2t(ptr %base, i32 %vl, <vscale x 8 x i1> %mask) { ; CHECK-LABEL: test_vlseg2_mask_nxv8i32_triscv.vector.tuple_nxv32i8_2t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, ma -; CHECK-NEXT: vlseg2e32.v v4, (a0), v0.t +; CHECK-NEXT: vlseg2e32.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 32 x i8>, 2) @llvm.riscv.vlseg2.mask.triscv.vector.tuple_nxv32i8_2t.nxv8i1(target("riscv.vector.tuple", <vscale x 32 x i8>, 2) undef, ptr %base, <vscale x 8 x i1> %mask, i32 %vl, i32 1, i32 5) - %1 = call <vscale x 8 x i32> @llvm.riscv.tuple.extract.nxv8i32.triscv.vector.tuple_nxv32i8_2t(target("riscv.vector.tuple", <vscale x 32 x i8>, 2) %0, i32 1) - ret <vscale x 8 x i32> %1 + ret target("riscv.vector.tuple", <vscale x 32 x i8>, 2) %0 } - -declare target("riscv.vector.tuple", <vscale x 4 x i8>, 3) @llvm.riscv.vlseg3.mask.triscv.vector.tuple_nxv4i8_3t.nxv1i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 3), ptr, <vscale x 1 x i1>, i32, i32, i32) - -define <vscale x 1 x i32> @test_vlseg3_nxv1i32_triscv.vector.tuple_nxv4i8_3t(ptr %base, i32 %vl) { +define target("riscv.vector.tuple", <vscale x 4 x i8>, 3) @test_vlseg3_nxv1i32_triscv.vector.tuple_nxv4i8_3t(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg3_nxv1i32_triscv.vector.tuple_nxv4i8_3t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma -; CHECK-NEXT: vlseg3e32.v v7, (a0) +; CHECK-NEXT: vlseg3e32.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 3) @llvm.riscv.vlseg3.triscv.vector.tuple_nxv4i8_3t(target("riscv.vector.tuple", <vscale x 4 x i8>, 3) undef, ptr %base, i32 %vl, i32 5) - %1 = call <vscale x 1 x i32> @llvm.riscv.tuple.extract.nxv1i32.triscv.vector.tuple_nxv4i8_3t(target("riscv.vector.tuple", <vscale x 4 x i8>, 3) %0, i32 1) - ret <vscale x 1 x i32> %1 + ret target("riscv.vector.tuple", <vscale x 4 x i8>, 3) %0 } - -define <vscale x 1 x i32> @test_vlseg3_mask_nxv1i32_triscv.vector.tuple_nxv4i8_3t(ptr %base, i32 %vl, <vscale x 1 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 4 x i8>, 3) @test_vlseg3_mask_nxv1i32_triscv.vector.tuple_nxv4i8_3t(ptr %base, i32 %vl, <vscale x 1 x i1> %mask) { ; CHECK-LABEL: test_vlseg3_mask_nxv1i32_triscv.vector.tuple_nxv4i8_3t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma -; CHECK-NEXT: vlseg3e32.v v7, (a0), v0.t +; CHECK-NEXT: vlseg3e32.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 3) @llvm.riscv.vlseg3.mask.triscv.vector.tuple_nxv4i8_3t.nxv1i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 3) undef, ptr %base, <vscale x 1 x i1> %mask, i32 %vl, i32 1, i32 5) - %1 = call <vscale x 1 x i32> @llvm.riscv.tuple.extract.nxv1i32.triscv.vector.tuple_nxv4i8_3t(target("riscv.vector.tuple", <vscale x 4 x i8>, 3) %0, i32 1) - ret <vscale x 1 x i32> %1 + ret target("riscv.vector.tuple", <vscale x 4 x i8>, 3) %0 } - -declare target("riscv.vector.tuple", <vscale x 8 x i8>, 3) @llvm.riscv.vlseg3.mask.triscv.vector.tuple_nxv8i8_3t.nxv2i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 3), ptr, <vscale x 2 x i1>, i32, i32, i32) - -define <vscale x 2 x i32> @test_vlseg3_nxv2i32_triscv.vector.tuple_nxv8i8_3t(ptr %base, i32 %vl) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 3) @test_vlseg3_nxv2i32_triscv.vector.tuple_nxv8i8_3t(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg3_nxv2i32_triscv.vector.tuple_nxv8i8_3t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma -; CHECK-NEXT: vlseg3e32.v v7, (a0) +; CHECK-NEXT: vlseg3e32.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 3) @llvm.riscv.vlseg3.triscv.vector.tuple_nxv8i8_3t(target("riscv.vector.tuple", <vscale x 8 x i8>, 3) undef, ptr %base, i32 %vl, i32 5) - %1 = call <vscale x 2 x i32> @llvm.riscv.tuple.extract.nxv2i32.triscv.vector.tuple_nxv8i8_3t(target("riscv.vector.tuple", <vscale x 8 x i8>, 3) %0, i32 1) - ret <vscale x 2 x i32> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 3) %0 } - -define <vscale x 2 x i32> @test_vlseg3_mask_nxv2i32_triscv.vector.tuple_nxv8i8_3t(ptr %base, i32 %vl, <vscale x 2 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 3) @test_vlseg3_mask_nxv2i32_triscv.vector.tuple_nxv8i8_3t(ptr %base, i32 %vl, <vscale x 2 x i1> %mask) { ; CHECK-LABEL: test_vlseg3_mask_nxv2i32_triscv.vector.tuple_nxv8i8_3t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma -; CHECK-NEXT: vlseg3e32.v v7, (a0), v0.t +; CHECK-NEXT: vlseg3e32.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 3) @llvm.riscv.vlseg3.mask.triscv.vector.tuple_nxv8i8_3t.nxv2i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 3) undef, ptr %base, <vscale x 2 x i1> %mask, i32 %vl, i32 1, i32 5) - %1 = call <vscale x 2 x i32> @llvm.riscv.tuple.extract.nxv2i32.triscv.vector.tuple_nxv8i8_3t(target("riscv.vector.tuple", <vscale x 8 x i8>, 3) %0, i32 1) - ret <vscale x 2 x i32> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 3) %0 } - -declare target("riscv.vector.tuple", <vscale x 16 x i8>, 3) @llvm.riscv.vlseg3.mask.triscv.vector.tuple_nxv16i8_3t.nxv4i1(target("riscv.vector.tuple", <vscale x 16 x i8>, 3), ptr, <vscale x 4 x i1>, i32, i32, i32) - -define <vscale x 4 x i32> @test_vlseg3_nxv4i32_triscv.vector.tuple_nxv16i8_3t(ptr %base, i32 %vl) { +define target("riscv.vector.tuple", <vscale x 16 x i8>, 3) @test_vlseg3_nxv4i32_triscv.vector.tuple_nxv16i8_3t(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg3_nxv4i32_triscv.vector.tuple_nxv16i8_3t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma -; CHECK-NEXT: vlseg3e32.v v6, (a0) +; CHECK-NEXT: vlseg3e32.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 3) @llvm.riscv.vlseg3.triscv.vector.tuple_nxv16i8_3t(target("riscv.vector.tuple", <vscale x 16 x i8>, 3) undef, ptr %base, i32 %vl, i32 5) - %1 = call <vscale x 4 x i32> @llvm.riscv.tuple.extract.nxv4i32.triscv.vector.tuple_nxv16i8_3t(target("riscv.vector.tuple", <vscale x 16 x i8>, 3) %0, i32 1) - ret <vscale x 4 x i32> %1 + ret target("riscv.vector.tuple", <vscale x 16 x i8>, 3) %0 } - -define <vscale x 4 x i32> @test_vlseg3_mask_nxv4i32_triscv.vector.tuple_nxv16i8_3t(ptr %base, i32 %vl, <vscale x 4 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 16 x i8>, 3) @test_vlseg3_mask_nxv4i32_triscv.vector.tuple_nxv16i8_3t(ptr %base, i32 %vl, <vscale x 4 x i1> %mask) { ; CHECK-LABEL: test_vlseg3_mask_nxv4i32_triscv.vector.tuple_nxv16i8_3t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma -; CHECK-NEXT: vlseg3e32.v v6, (a0), v0.t +; CHECK-NEXT: vlseg3e32.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 3) @llvm.riscv.vlseg3.mask.triscv.vector.tuple_nxv16i8_3t.nxv4i1(target("riscv.vector.tuple", <vscale x 16 x i8>, 3) undef, ptr %base, <vscale x 4 x i1> %mask, i32 %vl, i32 1, i32 5) - %1 = call <vscale x 4 x i32> @llvm.riscv.tuple.extract.nxv4i32.triscv.vector.tuple_nxv16i8_3t(target("riscv.vector.tuple", <vscale x 16 x i8>, 3) %0, i32 1) - ret <vscale x 4 x i32> %1 + ret target("riscv.vector.tuple", <vscale x 16 x i8>, 3) %0 } - -declare target("riscv.vector.tuple", <vscale x 4 x i8>, 4) @llvm.riscv.vlseg4.mask.triscv.vector.tuple_nxv4i8_4t.nxv1i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 4), ptr, <vscale x 1 x i1>, i32, i32, i32) - -define <vscale x 1 x i32> @test_vlseg4_nxv1i32_triscv.vector.tuple_nxv4i8_4t(ptr %base, i32 %vl) { +define target("riscv.vector.tuple", <vscale x 4 x i8>, 4) @test_vlseg4_nxv1i32_triscv.vector.tuple_nxv4i8_4t(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg4_nxv1i32_triscv.vector.tuple_nxv4i8_4t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma -; CHECK-NEXT: vlseg4e32.v v7, (a0) +; CHECK-NEXT: vlseg4e32.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 4) @llvm.riscv.vlseg4.triscv.vector.tuple_nxv4i8_4t(target("riscv.vector.tuple", <vscale x 4 x i8>, 4) undef, ptr %base, i32 %vl, i32 5) - %1 = call <vscale x 1 x i32> @llvm.riscv.tuple.extract.nxv1i32.triscv.vector.tuple_nxv4i8_4t(target("riscv.vector.tuple", <vscale x 4 x i8>, 4) %0, i32 1) - ret <vscale x 1 x i32> %1 + ret target("riscv.vector.tuple", <vscale x 4 x i8>, 4) %0 } - -define <vscale x 1 x i32> @test_vlseg4_mask_nxv1i32_triscv.vector.tuple_nxv4i8_4t(ptr %base, i32 %vl, <vscale x 1 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 4 x i8>, 4) @test_vlseg4_mask_nxv1i32_triscv.vector.tuple_nxv4i8_4t(ptr %base, i32 %vl, <vscale x 1 x i1> %mask) { ; CHECK-LABEL: test_vlseg4_mask_nxv1i32_triscv.vector.tuple_nxv4i8_4t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma -; CHECK-NEXT: vlseg4e32.v v7, (a0), v0.t +; CHECK-NEXT: vlseg4e32.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 4) @llvm.riscv.vlseg4.mask.triscv.vector.tuple_nxv4i8_4t.nxv1i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 4) undef, ptr %base, <vscale x 1 x i1> %mask, i32 %vl, i32 1, i32 5) - %1 = call <vscale x 1 x i32> @llvm.riscv.tuple.extract.nxv1i32.triscv.vector.tuple_nxv4i8_4t(target("riscv.vector.tuple", <vscale x 4 x i8>, 4) %0, i32 1) - ret <vscale x 1 x i32> %1 + ret target("riscv.vector.tuple", <vscale x 4 x i8>, 4) %0 } - -declare target("riscv.vector.tuple", <vscale x 8 x i8>, 4) @llvm.riscv.vlseg4.mask.triscv.vector.tuple_nxv8i8_4t.nxv2i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 4), ptr, <vscale x 2 x i1>, i32, i32, i32) - -define <vscale x 2 x i32> @test_vlseg4_nxv2i32_triscv.vector.tuple_nxv8i8_4t(ptr %base, i32 %vl) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 4) @test_vlseg4_nxv2i32_triscv.vector.tuple_nxv8i8_4t(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg4_nxv2i32_triscv.vector.tuple_nxv8i8_4t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma -; CHECK-NEXT: vlseg4e32.v v7, (a0) +; CHECK-NEXT: vlseg4e32.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 4) @llvm.riscv.vlseg4.triscv.vector.tuple_nxv8i8_4t(target("riscv.vector.tuple", <vscale x 8 x i8>, 4) undef, ptr %base, i32 %vl, i32 5) - %1 = call <vscale x 2 x i32> @llvm.riscv.tuple.extract.nxv2i32.triscv.vector.tuple_nxv8i8_4t(target("riscv.vector.tuple", <vscale x 8 x i8>, 4) %0, i32 1) - ret <vscale x 2 x i32> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 4) %0 } - -define <vscale x 2 x i32> @test_vlseg4_mask_nxv2i32_triscv.vector.tuple_nxv8i8_4t(ptr %base, i32 %vl, <vscale x 2 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 4) @test_vlseg4_mask_nxv2i32_triscv.vector.tuple_nxv8i8_4t(ptr %base, i32 %vl, <vscale x 2 x i1> %mask) { ; CHECK-LABEL: test_vlseg4_mask_nxv2i32_triscv.vector.tuple_nxv8i8_4t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma -; CHECK-NEXT: vlseg4e32.v v7, (a0), v0.t +; CHECK-NEXT: vlseg4e32.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 4) @llvm.riscv.vlseg4.mask.triscv.vector.tuple_nxv8i8_4t.nxv2i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 4) undef, ptr %base, <vscale x 2 x i1> %mask, i32 %vl, i32 1, i32 5) - %1 = call <vscale x 2 x i32> @llvm.riscv.tuple.extract.nxv2i32.triscv.vector.tuple_nxv8i8_4t(target("riscv.vector.tuple", <vscale x 8 x i8>, 4) %0, i32 1) - ret <vscale x 2 x i32> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 4) %0 } - -declare target("riscv.vector.tuple", <vscale x 16 x i8>, 4) @llvm.riscv.vlseg4.mask.triscv.vector.tuple_nxv16i8_4t.nxv4i1(target("riscv.vector.tuple", <vscale x 16 x i8>, 4), ptr, <vscale x 4 x i1>, i32, i32, i32) - -define <vscale x 4 x i32> @test_vlseg4_nxv4i32_triscv.vector.tuple_nxv16i8_4t(ptr %base, i32 %vl) { +define target("riscv.vector.tuple", <vscale x 16 x i8>, 4) @test_vlseg4_nxv4i32_triscv.vector.tuple_nxv16i8_4t(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg4_nxv4i32_triscv.vector.tuple_nxv16i8_4t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma -; CHECK-NEXT: vlseg4e32.v v6, (a0) +; CHECK-NEXT: vlseg4e32.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 4) @llvm.riscv.vlseg4.triscv.vector.tuple_nxv16i8_4t(target("riscv.vector.tuple", <vscale x 16 x i8>, 4) undef, ptr %base, i32 %vl, i32 5) - %1 = call <vscale x 4 x i32> @llvm.riscv.tuple.extract.nxv4i32.triscv.vector.tuple_nxv16i8_4t(target("riscv.vector.tuple", <vscale x 16 x i8>, 4) %0, i32 1) - ret <vscale x 4 x i32> %1 + ret target("riscv.vector.tuple", <vscale x 16 x i8>, 4) %0 } - -define <vscale x 4 x i32> @test_vlseg4_mask_nxv4i32_triscv.vector.tuple_nxv16i8_4t(ptr %base, i32 %vl, <vscale x 4 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 16 x i8>, 4) @test_vlseg4_mask_nxv4i32_triscv.vector.tuple_nxv16i8_4t(ptr %base, i32 %vl, <vscale x 4 x i1> %mask) { ; CHECK-LABEL: test_vlseg4_mask_nxv4i32_triscv.vector.tuple_nxv16i8_4t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma -; CHECK-NEXT: vlseg4e32.v v6, (a0), v0.t +; CHECK-NEXT: vlseg4e32.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 4) @llvm.riscv.vlseg4.mask.triscv.vector.tuple_nxv16i8_4t.nxv4i1(target("riscv.vector.tuple", <vscale x 16 x i8>, 4) undef, ptr %base, <vscale x 4 x i1> %mask, i32 %vl, i32 1, i32 5) - %1 = call <vscale x 4 x i32> @llvm.riscv.tuple.extract.nxv4i32.triscv.vector.tuple_nxv16i8_4t(target("riscv.vector.tuple", <vscale x 16 x i8>, 4) %0, i32 1) - ret <vscale x 4 x i32> %1 + ret target("riscv.vector.tuple", <vscale x 16 x i8>, 4) %0 } - -declare target("riscv.vector.tuple", <vscale x 4 x i8>, 5) @llvm.riscv.vlseg5.mask.triscv.vector.tuple_nxv4i8_5t.nxv1i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 5), ptr, <vscale x 1 x i1>, i32, i32, i32) - -define <vscale x 1 x i32> @test_vlseg5_nxv1i32_triscv.vector.tuple_nxv4i8_5t(ptr %base, i32 %vl) { +define target("riscv.vector.tuple", <vscale x 4 x i8>, 5) @test_vlseg5_nxv1i32_triscv.vector.tuple_nxv4i8_5t(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg5_nxv1i32_triscv.vector.tuple_nxv4i8_5t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma -; CHECK-NEXT: vlseg5e32.v v7, (a0) +; CHECK-NEXT: vlseg5e32.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 5) @llvm.riscv.vlseg5.triscv.vector.tuple_nxv4i8_5t(target("riscv.vector.tuple", <vscale x 4 x i8>, 5) undef, ptr %base, i32 %vl, i32 5) - %1 = call <vscale x 1 x i32> @llvm.riscv.tuple.extract.nxv1i32.triscv.vector.tuple_nxv4i8_5t(target("riscv.vector.tuple", <vscale x 4 x i8>, 5) %0, i32 1) - ret <vscale x 1 x i32> %1 + ret target("riscv.vector.tuple", <vscale x 4 x i8>, 5) %0 } - -define <vscale x 1 x i32> @test_vlseg5_mask_nxv1i32_triscv.vector.tuple_nxv4i8_5t(ptr %base, i32 %vl, <vscale x 1 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 4 x i8>, 5) @test_vlseg5_mask_nxv1i32_triscv.vector.tuple_nxv4i8_5t(ptr %base, i32 %vl, <vscale x 1 x i1> %mask) { ; CHECK-LABEL: test_vlseg5_mask_nxv1i32_triscv.vector.tuple_nxv4i8_5t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma -; CHECK-NEXT: vlseg5e32.v v7, (a0), v0.t +; CHECK-NEXT: vlseg5e32.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 5) @llvm.riscv.vlseg5.mask.triscv.vector.tuple_nxv4i8_5t.nxv1i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 5) undef, ptr %base, <vscale x 1 x i1> %mask, i32 %vl, i32 1, i32 5) - %1 = call <vscale x 1 x i32> @llvm.riscv.tuple.extract.nxv1i32.triscv.vector.tuple_nxv4i8_5t(target("riscv.vector.tuple", <vscale x 4 x i8>, 5) %0, i32 1) - ret <vscale x 1 x i32> %1 + ret target("riscv.vector.tuple", <vscale x 4 x i8>, 5) %0 } - -declare target("riscv.vector.tuple", <vscale x 8 x i8>, 5) @llvm.riscv.vlseg5.mask.triscv.vector.tuple_nxv8i8_5t.nxv2i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 5), ptr, <vscale x 2 x i1>, i32, i32, i32) - -define <vscale x 2 x i32> @test_vlseg5_nxv2i32_triscv.vector.tuple_nxv8i8_5t(ptr %base, i32 %vl) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 5) @test_vlseg5_nxv2i32_triscv.vector.tuple_nxv8i8_5t(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg5_nxv2i32_triscv.vector.tuple_nxv8i8_5t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma -; CHECK-NEXT: vlseg5e32.v v7, (a0) +; CHECK-NEXT: vlseg5e32.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 5) @llvm.riscv.vlseg5.triscv.vector.tuple_nxv8i8_5t(target("riscv.vector.tuple", <vscale x 8 x i8>, 5) undef, ptr %base, i32 %vl, i32 5) - %1 = call <vscale x 2 x i32> @llvm.riscv.tuple.extract.nxv2i32.triscv.vector.tuple_nxv8i8_5t(target("riscv.vector.tuple", <vscale x 8 x i8>, 5) %0, i32 1) - ret <vscale x 2 x i32> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 5) %0 } - -define <vscale x 2 x i32> @test_vlseg5_mask_nxv2i32_triscv.vector.tuple_nxv8i8_5t(ptr %base, i32 %vl, <vscale x 2 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 5) @test_vlseg5_mask_nxv2i32_triscv.vector.tuple_nxv8i8_5t(ptr %base, i32 %vl, <vscale x 2 x i1> %mask) { ; CHECK-LABEL: test_vlseg5_mask_nxv2i32_triscv.vector.tuple_nxv8i8_5t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma -; CHECK-NEXT: vlseg5e32.v v7, (a0), v0.t +; CHECK-NEXT: vlseg5e32.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 5) @llvm.riscv.vlseg5.mask.triscv.vector.tuple_nxv8i8_5t.nxv2i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 5) undef, ptr %base, <vscale x 2 x i1> %mask, i32 %vl, i32 1, i32 5) - %1 = call <vscale x 2 x i32> @llvm.riscv.tuple.extract.nxv2i32.triscv.vector.tuple_nxv8i8_5t(target("riscv.vector.tuple", <vscale x 8 x i8>, 5) %0, i32 1) - ret <vscale x 2 x i32> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 5) %0 } - -declare target("riscv.vector.tuple", <vscale x 4 x i8>, 6) @llvm.riscv.vlseg6.mask.triscv.vector.tuple_nxv4i8_6t.nxv1i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 6), ptr, <vscale x 1 x i1>, i32, i32, i32) - -define <vscale x 1 x i32> @test_vlseg6_nxv1i32_triscv.vector.tuple_nxv4i8_6t(ptr %base, i32 %vl) { +define target("riscv.vector.tuple", <vscale x 4 x i8>, 6) @test_vlseg6_nxv1i32_triscv.vector.tuple_nxv4i8_6t(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg6_nxv1i32_triscv.vector.tuple_nxv4i8_6t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma -; CHECK-NEXT: vlseg6e32.v v7, (a0) +; CHECK-NEXT: vlseg6e32.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 6) @llvm.riscv.vlseg6.triscv.vector.tuple_nxv4i8_6t(target("riscv.vector.tuple", <vscale x 4 x i8>, 6) undef, ptr %base, i32 %vl, i32 5) - %1 = call <vscale x 1 x i32> @llvm.riscv.tuple.extract.nxv1i32.triscv.vector.tuple_nxv4i8_6t(target("riscv.vector.tuple", <vscale x 4 x i8>, 6) %0, i32 1) - ret <vscale x 1 x i32> %1 + ret target("riscv.vector.tuple", <vscale x 4 x i8>, 6) %0 } - -define <vscale x 1 x i32> @test_vlseg6_mask_nxv1i32_triscv.vector.tuple_nxv4i8_6t(ptr %base, i32 %vl, <vscale x 1 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 4 x i8>, 6) @test_vlseg6_mask_nxv1i32_triscv.vector.tuple_nxv4i8_6t(ptr %base, i32 %vl, <vscale x 1 x i1> %mask) { ; CHECK-LABEL: test_vlseg6_mask_nxv1i32_triscv.vector.tuple_nxv4i8_6t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma -; CHECK-NEXT: vlseg6e32.v v7, (a0), v0.t +; CHECK-NEXT: vlseg6e32.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 6) @llvm.riscv.vlseg6.mask.triscv.vector.tuple_nxv4i8_6t.nxv1i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 6) undef, ptr %base, <vscale x 1 x i1> %mask, i32 %vl, i32 1, i32 5) - %1 = call <vscale x 1 x i32> @llvm.riscv.tuple.extract.nxv1i32.triscv.vector.tuple_nxv4i8_6t(target("riscv.vector.tuple", <vscale x 4 x i8>, 6) %0, i32 1) - ret <vscale x 1 x i32> %1 + ret target("riscv.vector.tuple", <vscale x 4 x i8>, 6) %0 } - -declare target("riscv.vector.tuple", <vscale x 8 x i8>, 6) @llvm.riscv.vlseg6.mask.triscv.vector.tuple_nxv8i8_6t.nxv2i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 6), ptr, <vscale x 2 x i1>, i32, i32, i32) - -define <vscale x 2 x i32> @test_vlseg6_nxv2i32_triscv.vector.tuple_nxv8i8_6t(ptr %base, i32 %vl) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 6) @test_vlseg6_nxv2i32_triscv.vector.tuple_nxv8i8_6t(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg6_nxv2i32_triscv.vector.tuple_nxv8i8_6t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma -; CHECK-NEXT: vlseg6e32.v v7, (a0) +; CHECK-NEXT: vlseg6e32.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 6) @llvm.riscv.vlseg6.triscv.vector.tuple_nxv8i8_6t(target("riscv.vector.tuple", <vscale x 8 x i8>, 6) undef, ptr %base, i32 %vl, i32 5) - %1 = call <vscale x 2 x i32> @llvm.riscv.tuple.extract.nxv2i32.triscv.vector.tuple_nxv8i8_6t(target("riscv.vector.tuple", <vscale x 8 x i8>, 6) %0, i32 1) - ret <vscale x 2 x i32> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 6) %0 } - -define <vscale x 2 x i32> @test_vlseg6_mask_nxv2i32_triscv.vector.tuple_nxv8i8_6t(ptr %base, i32 %vl, <vscale x 2 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 6) @test_vlseg6_mask_nxv2i32_triscv.vector.tuple_nxv8i8_6t(ptr %base, i32 %vl, <vscale x 2 x i1> %mask) { ; CHECK-LABEL: test_vlseg6_mask_nxv2i32_triscv.vector.tuple_nxv8i8_6t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma -; CHECK-NEXT: vlseg6e32.v v7, (a0), v0.t +; CHECK-NEXT: vlseg6e32.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 6) @llvm.riscv.vlseg6.mask.triscv.vector.tuple_nxv8i8_6t.nxv2i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 6) undef, ptr %base, <vscale x 2 x i1> %mask, i32 %vl, i32 1, i32 5) - %1 = call <vscale x 2 x i32> @llvm.riscv.tuple.extract.nxv2i32.triscv.vector.tuple_nxv8i8_6t(target("riscv.vector.tuple", <vscale x 8 x i8>, 6) %0, i32 1) - ret <vscale x 2 x i32> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 6) %0 } - -declare target("riscv.vector.tuple", <vscale x 4 x i8>, 7) @llvm.riscv.vlseg7.mask.triscv.vector.tuple_nxv4i8_7t.nxv1i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 7), ptr, <vscale x 1 x i1>, i32, i32, i32) - -define <vscale x 1 x i32> @test_vlseg7_nxv1i32_triscv.vector.tuple_nxv4i8_7t(ptr %base, i32 %vl) { +define target("riscv.vector.tuple", <vscale x 4 x i8>, 7) @test_vlseg7_nxv1i32_triscv.vector.tuple_nxv4i8_7t(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg7_nxv1i32_triscv.vector.tuple_nxv4i8_7t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma -; CHECK-NEXT: vlseg7e32.v v7, (a0) +; CHECK-NEXT: vlseg7e32.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 7) @llvm.riscv.vlseg7.triscv.vector.tuple_nxv4i8_7t(target("riscv.vector.tuple", <vscale x 4 x i8>, 7) undef, ptr %base, i32 %vl, i32 5) - %1 = call <vscale x 1 x i32> @llvm.riscv.tuple.extract.nxv1i32.triscv.vector.tuple_nxv4i8_7t(target("riscv.vector.tuple", <vscale x 4 x i8>, 7) %0, i32 1) - ret <vscale x 1 x i32> %1 + ret target("riscv.vector.tuple", <vscale x 4 x i8>, 7) %0 } - -define <vscale x 1 x i32> @test_vlseg7_mask_nxv1i32_triscv.vector.tuple_nxv4i8_7t(ptr %base, i32 %vl, <vscale x 1 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 4 x i8>, 7) @test_vlseg7_mask_nxv1i32_triscv.vector.tuple_nxv4i8_7t(ptr %base, i32 %vl, <vscale x 1 x i1> %mask) { ; CHECK-LABEL: test_vlseg7_mask_nxv1i32_triscv.vector.tuple_nxv4i8_7t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma -; CHECK-NEXT: vlseg7e32.v v7, (a0), v0.t +; CHECK-NEXT: vlseg7e32.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 7) @llvm.riscv.vlseg7.mask.triscv.vector.tuple_nxv4i8_7t.nxv1i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 7) undef, ptr %base, <vscale x 1 x i1> %mask, i32 %vl, i32 1, i32 5) - %1 = call <vscale x 1 x i32> @llvm.riscv.tuple.extract.nxv1i32.triscv.vector.tuple_nxv4i8_7t(target("riscv.vector.tuple", <vscale x 4 x i8>, 7) %0, i32 1) - ret <vscale x 1 x i32> %1 + ret target("riscv.vector.tuple", <vscale x 4 x i8>, 7) %0 } - -declare target("riscv.vector.tuple", <vscale x 8 x i8>, 7) @llvm.riscv.vlseg7.mask.triscv.vector.tuple_nxv8i8_7t.nxv2i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 7), ptr, <vscale x 2 x i1>, i32, i32, i32) - -define <vscale x 2 x i32> @test_vlseg7_nxv2i32_triscv.vector.tuple_nxv8i8_7t(ptr %base, i32 %vl) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 7) @test_vlseg7_nxv2i32_triscv.vector.tuple_nxv8i8_7t(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg7_nxv2i32_triscv.vector.tuple_nxv8i8_7t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma -; CHECK-NEXT: vlseg7e32.v v7, (a0) +; CHECK-NEXT: vlseg7e32.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 7) @llvm.riscv.vlseg7.triscv.vector.tuple_nxv8i8_7t(target("riscv.vector.tuple", <vscale x 8 x i8>, 7) undef, ptr %base, i32 %vl, i32 5) - %1 = call <vscale x 2 x i32> @llvm.riscv.tuple.extract.nxv2i32.triscv.vector.tuple_nxv8i8_7t(target("riscv.vector.tuple", <vscale x 8 x i8>, 7) %0, i32 1) - ret <vscale x 2 x i32> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 7) %0 } - -define <vscale x 2 x i32> @test_vlseg7_mask_nxv2i32_triscv.vector.tuple_nxv8i8_7t(ptr %base, i32 %vl, <vscale x 2 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 7) @test_vlseg7_mask_nxv2i32_triscv.vector.tuple_nxv8i8_7t(ptr %base, i32 %vl, <vscale x 2 x i1> %mask) { ; CHECK-LABEL: test_vlseg7_mask_nxv2i32_triscv.vector.tuple_nxv8i8_7t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma -; CHECK-NEXT: vlseg7e32.v v7, (a0), v0.t +; CHECK-NEXT: vlseg7e32.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 7) @llvm.riscv.vlseg7.mask.triscv.vector.tuple_nxv8i8_7t.nxv2i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 7) undef, ptr %base, <vscale x 2 x i1> %mask, i32 %vl, i32 1, i32 5) - %1 = call <vscale x 2 x i32> @llvm.riscv.tuple.extract.nxv2i32.triscv.vector.tuple_nxv8i8_7t(target("riscv.vector.tuple", <vscale x 8 x i8>, 7) %0, i32 1) - ret <vscale x 2 x i32> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 7) %0 } - -declare target("riscv.vector.tuple", <vscale x 4 x i8>, 8) @llvm.riscv.vlseg8.mask.triscv.vector.tuple_nxv4i8_8t.nxv1i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 8), ptr, <vscale x 1 x i1>, i32, i32, i32) - -define <vscale x 1 x i32> @test_vlseg8_nxv1i32_triscv.vector.tuple_nxv4i8_8t(ptr %base, i32 %vl) { +define target("riscv.vector.tuple", <vscale x 4 x i8>, 8) @test_vlseg8_nxv1i32_triscv.vector.tuple_nxv4i8_8t(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg8_nxv1i32_triscv.vector.tuple_nxv4i8_8t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma -; CHECK-NEXT: vlseg8e32.v v7, (a0) +; CHECK-NEXT: vlseg8e32.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 8) @llvm.riscv.vlseg8.triscv.vector.tuple_nxv4i8_8t(target("riscv.vector.tuple", <vscale x 4 x i8>, 8) undef, ptr %base, i32 %vl, i32 5) - %1 = call <vscale x 1 x i32> @llvm.riscv.tuple.extract.nxv1i32.triscv.vector.tuple_nxv4i8_8t(target("riscv.vector.tuple", <vscale x 4 x i8>, 8) %0, i32 1) - ret <vscale x 1 x i32> %1 + ret target("riscv.vector.tuple", <vscale x 4 x i8>, 8) %0 } - -define <vscale x 1 x i32> @test_vlseg8_mask_nxv1i32_triscv.vector.tuple_nxv4i8_8t(ptr %base, i32 %vl, <vscale x 1 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 4 x i8>, 8) @test_vlseg8_mask_nxv1i32_triscv.vector.tuple_nxv4i8_8t(ptr %base, i32 %vl, <vscale x 1 x i1> %mask) { ; CHECK-LABEL: test_vlseg8_mask_nxv1i32_triscv.vector.tuple_nxv4i8_8t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma -; CHECK-NEXT: vlseg8e32.v v7, (a0), v0.t +; CHECK-NEXT: vlseg8e32.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 8) @llvm.riscv.vlseg8.mask.triscv.vector.tuple_nxv4i8_8t.nxv1i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 8) undef, ptr %base, <vscale x 1 x i1> %mask, i32 %vl, i32 1, i32 5) - %1 = call <vscale x 1 x i32> @llvm.riscv.tuple.extract.nxv1i32.triscv.vector.tuple_nxv4i8_8t(target("riscv.vector.tuple", <vscale x 4 x i8>, 8) %0, i32 1) - ret <vscale x 1 x i32> %1 + ret target("riscv.vector.tuple", <vscale x 4 x i8>, 8) %0 } - -declare target("riscv.vector.tuple", <vscale x 8 x i8>, 8) @llvm.riscv.vlseg8.mask.triscv.vector.tuple_nxv8i8_8t.nxv2i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 8), ptr, <vscale x 2 x i1>, i32, i32, i32) - -define <vscale x 2 x i32> @test_vlseg8_nxv2i32_triscv.vector.tuple_nxv8i8_8t(ptr %base, i32 %vl) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 8) @test_vlseg8_nxv2i32_triscv.vector.tuple_nxv8i8_8t(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg8_nxv2i32_triscv.vector.tuple_nxv8i8_8t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma -; CHECK-NEXT: vlseg8e32.v v7, (a0) +; CHECK-NEXT: vlseg8e32.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 8) @llvm.riscv.vlseg8.triscv.vector.tuple_nxv8i8_8t(target("riscv.vector.tuple", <vscale x 8 x i8>, 8) undef, ptr %base, i32 %vl, i32 5) - %1 = call <vscale x 2 x i32> @llvm.riscv.tuple.extract.nxv2i32.triscv.vector.tuple_nxv8i8_8t(target("riscv.vector.tuple", <vscale x 8 x i8>, 8) %0, i32 1) - ret <vscale x 2 x i32> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 8) %0 } - -define <vscale x 2 x i32> @test_vlseg8_mask_nxv2i32_triscv.vector.tuple_nxv8i8_8t(ptr %base, i32 %vl, <vscale x 2 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 8) @test_vlseg8_mask_nxv2i32_triscv.vector.tuple_nxv8i8_8t(ptr %base, i32 %vl, <vscale x 2 x i1> %mask) { ; CHECK-LABEL: test_vlseg8_mask_nxv2i32_triscv.vector.tuple_nxv8i8_8t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma -; CHECK-NEXT: vlseg8e32.v v7, (a0), v0.t +; CHECK-NEXT: vlseg8e32.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 8) @llvm.riscv.vlseg8.mask.triscv.vector.tuple_nxv8i8_8t.nxv2i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 8) undef, ptr %base, <vscale x 2 x i1> %mask, i32 %vl, i32 1, i32 5) - %1 = call <vscale x 2 x i32> @llvm.riscv.tuple.extract.nxv2i32.triscv.vector.tuple_nxv8i8_8t(target("riscv.vector.tuple", <vscale x 8 x i8>, 8) %0, i32 1) - ret <vscale x 2 x i32> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 8) %0 } - -declare target("riscv.vector.tuple", <vscale x 8 x i8>, 2) @llvm.riscv.vlseg2.mask.triscv.vector.tuple_nxv8i8_2t.nxv1i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 2), ptr, <vscale x 1 x i1>, i32, i32, i32) - -define <vscale x 1 x i64> @test_vlseg2_nxv1i64_triscv.vector.tuple_nxv8i8_2t(ptr %base, i32 %vl) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 2) @test_vlseg2_nxv1i64_triscv.vector.tuple_nxv8i8_2t(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg2_nxv1i64_triscv.vector.tuple_nxv8i8_2t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, ma -; CHECK-NEXT: vlseg2e64.v v7, (a0) +; CHECK-NEXT: vlseg2e64.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 2) @llvm.riscv.vlseg2.triscv.vector.tuple_nxv8i8_2t(target("riscv.vector.tuple", <vscale x 8 x i8>, 2) undef, ptr %base, i32 %vl, i32 6) - %1 = call <vscale x 1 x i64> @llvm.riscv.tuple.extract.nxv1i64.triscv.vector.tuple_nxv8i8_2t(target("riscv.vector.tuple", <vscale x 8 x i8>, 2) %0, i32 1) - ret <vscale x 1 x i64> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 2) %0 } - -define <vscale x 1 x i64> @test_vlseg2_mask_nxv1i64_triscv.vector.tuple_nxv8i8_2t(ptr %base, i32 %vl, <vscale x 1 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 2) @test_vlseg2_mask_nxv1i64_triscv.vector.tuple_nxv8i8_2t(ptr %base, i32 %vl, <vscale x 1 x i1> %mask) { ; CHECK-LABEL: test_vlseg2_mask_nxv1i64_triscv.vector.tuple_nxv8i8_2t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, ma -; CHECK-NEXT: vlseg2e64.v v7, (a0), v0.t +; CHECK-NEXT: vlseg2e64.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 2) @llvm.riscv.vlseg2.mask.triscv.vector.tuple_nxv8i8_2t.nxv1i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 2) undef, ptr %base, <vscale x 1 x i1> %mask, i32 %vl, i32 1, i32 6) - %1 = call <vscale x 1 x i64> @llvm.riscv.tuple.extract.nxv1i64.triscv.vector.tuple_nxv8i8_2t(target("riscv.vector.tuple", <vscale x 8 x i8>, 2) %0, i32 1) - ret <vscale x 1 x i64> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 2) %0 } - -declare target("riscv.vector.tuple", <vscale x 16 x i8>, 2) @llvm.riscv.vlseg2.mask.triscv.vector.tuple_nxv16i8_2t.nxv2i1(target("riscv.vector.tuple", <vscale x 16 x i8>, 2), ptr, <vscale x 2 x i1>, i32, i32, i32) - -define <vscale x 2 x i64> @test_vlseg2_nxv2i64_triscv.vector.tuple_nxv16i8_2t(ptr %base, i32 %vl) { +define target("riscv.vector.tuple", <vscale x 16 x i8>, 2) @test_vlseg2_nxv2i64_triscv.vector.tuple_nxv16i8_2t(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg2_nxv2i64_triscv.vector.tuple_nxv16i8_2t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, ma -; CHECK-NEXT: vlseg2e64.v v6, (a0) +; CHECK-NEXT: vlseg2e64.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 2) @llvm.riscv.vlseg2.triscv.vector.tuple_nxv16i8_2t(target("riscv.vector.tuple", <vscale x 16 x i8>, 2) undef, ptr %base, i32 %vl, i32 6) - %1 = call <vscale x 2 x i64> @llvm.riscv.tuple.extract.nxv2i64.triscv.vector.tuple_nxv16i8_2t(target("riscv.vector.tuple", <vscale x 16 x i8>, 2) %0, i32 1) - ret <vscale x 2 x i64> %1 + ret target("riscv.vector.tuple", <vscale x 16 x i8>, 2) %0 } - -define <vscale x 2 x i64> @test_vlseg2_mask_nxv2i64_triscv.vector.tuple_nxv16i8_2t(ptr %base, i32 %vl, <vscale x 2 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 16 x i8>, 2) @test_vlseg2_mask_nxv2i64_triscv.vector.tuple_nxv16i8_2t(ptr %base, i32 %vl, <vscale x 2 x i1> %mask) { ; CHECK-LABEL: test_vlseg2_mask_nxv2i64_triscv.vector.tuple_nxv16i8_2t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, ma -; CHECK-NEXT: vlseg2e64.v v6, (a0), v0.t +; CHECK-NEXT: vlseg2e64.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 2) @llvm.riscv.vlseg2.mask.triscv.vector.tuple_nxv16i8_2t.nxv2i1(target("riscv.vector.tuple", <vscale x 16 x i8>, 2) undef, ptr %base, <vscale x 2 x i1> %mask, i32 %vl, i32 1, i32 6) - %1 = call <vscale x 2 x i64> @llvm.riscv.tuple.extract.nxv2i64.triscv.vector.tuple_nxv16i8_2t(target("riscv.vector.tuple", <vscale x 16 x i8>, 2) %0, i32 1) - ret <vscale x 2 x i64> %1 + ret target("riscv.vector.tuple", <vscale x 16 x i8>, 2) %0 } - -declare target("riscv.vector.tuple", <vscale x 32 x i8>, 2) @llvm.riscv.vlseg2.mask.triscv.vector.tuple_nxv32i8_2t.nxv4i1(target("riscv.vector.tuple", <vscale x 32 x i8>, 2), ptr, <vscale x 4 x i1>, i32, i32, i32) - -define <vscale x 4 x i64> @test_vlseg2_nxv4i64_triscv.vector.tuple_nxv32i8_2t(ptr %base, i32 %vl) { +define target("riscv.vector.tuple", <vscale x 32 x i8>, 2) @test_vlseg2_nxv4i64_triscv.vector.tuple_nxv32i8_2t(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg2_nxv4i64_triscv.vector.tuple_nxv32i8_2t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e64, m4, ta, ma -; CHECK-NEXT: vlseg2e64.v v4, (a0) +; CHECK-NEXT: vlseg2e64.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 32 x i8>, 2) @llvm.riscv.vlseg2.triscv.vector.tuple_nxv32i8_2t(target("riscv.vector.tuple", <vscale x 32 x i8>, 2) undef, ptr %base, i32 %vl, i32 6) - %1 = call <vscale x 4 x i64> @llvm.riscv.tuple.extract.nxv4i64.triscv.vector.tuple_nxv32i8_2t(target("riscv.vector.tuple", <vscale x 32 x i8>, 2) %0, i32 1) - ret <vscale x 4 x i64> %1 + ret target("riscv.vector.tuple", <vscale x 32 x i8>, 2) %0 } - -define <vscale x 4 x i64> @test_vlseg2_mask_nxv4i64_triscv.vector.tuple_nxv32i8_2t(ptr %base, i32 %vl, <vscale x 4 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 32 x i8>, 2) @test_vlseg2_mask_nxv4i64_triscv.vector.tuple_nxv32i8_2t(ptr %base, i32 %vl, <vscale x 4 x i1> %mask) { ; CHECK-LABEL: test_vlseg2_mask_nxv4i64_triscv.vector.tuple_nxv32i8_2t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e64, m4, ta, ma -; CHECK-NEXT: vlseg2e64.v v4, (a0), v0.t +; CHECK-NEXT: vlseg2e64.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 32 x i8>, 2) @llvm.riscv.vlseg2.mask.triscv.vector.tuple_nxv32i8_2t.nxv4i1(target("riscv.vector.tuple", <vscale x 32 x i8>, 2) undef, ptr %base, <vscale x 4 x i1> %mask, i32 %vl, i32 1, i32 6) - %1 = call <vscale x 4 x i64> @llvm.riscv.tuple.extract.nxv4i64.triscv.vector.tuple_nxv32i8_2t(target("riscv.vector.tuple", <vscale x 32 x i8>, 2) %0, i32 1) - ret <vscale x 4 x i64> %1 + ret target("riscv.vector.tuple", <vscale x 32 x i8>, 2) %0 } - -declare target("riscv.vector.tuple", <vscale x 8 x i8>, 3) @llvm.riscv.vlseg3.mask.triscv.vector.tuple_nxv8i8_3t.nxv1i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 3), ptr, <vscale x 1 x i1>, i32, i32, i32) - -define <vscale x 1 x i64> @test_vlseg3_nxv1i64_triscv.vector.tuple_nxv8i8_3t(ptr %base, i32 %vl) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 3) @test_vlseg3_nxv1i64_triscv.vector.tuple_nxv8i8_3t(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg3_nxv1i64_triscv.vector.tuple_nxv8i8_3t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, ma -; CHECK-NEXT: vlseg3e64.v v7, (a0) +; CHECK-NEXT: vlseg3e64.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 3) @llvm.riscv.vlseg3.triscv.vector.tuple_nxv8i8_3t(target("riscv.vector.tuple", <vscale x 8 x i8>, 3) undef, ptr %base, i32 %vl, i32 6) - %1 = call <vscale x 1 x i64> @llvm.riscv.tuple.extract.nxv1i64.triscv.vector.tuple_nxv8i8_3t(target("riscv.vector.tuple", <vscale x 8 x i8>, 3) %0, i32 1) - ret <vscale x 1 x i64> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 3) %0 } - -define <vscale x 1 x i64> @test_vlseg3_mask_nxv1i64_triscv.vector.tuple_nxv8i8_3t(ptr %base, i32 %vl, <vscale x 1 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 3) @test_vlseg3_mask_nxv1i64_triscv.vector.tuple_nxv8i8_3t(ptr %base, i32 %vl, <vscale x 1 x i1> %mask) { ; CHECK-LABEL: test_vlseg3_mask_nxv1i64_triscv.vector.tuple_nxv8i8_3t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, ma -; CHECK-NEXT: vlseg3e64.v v7, (a0), v0.t +; CHECK-NEXT: vlseg3e64.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 3) @llvm.riscv.vlseg3.mask.triscv.vector.tuple_nxv8i8_3t.nxv1i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 3) undef, ptr %base, <vscale x 1 x i1> %mask, i32 %vl, i32 1, i32 6) - %1 = call <vscale x 1 x i64> @llvm.riscv.tuple.extract.nxv1i64.triscv.vector.tuple_nxv8i8_3t(target("riscv.vector.tuple", <vscale x 8 x i8>, 3) %0, i32 1) - ret <vscale x 1 x i64> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 3) %0 } - -declare target("riscv.vector.tuple", <vscale x 16 x i8>, 3) @llvm.riscv.vlseg3.mask.triscv.vector.tuple_nxv16i8_3t.nxv2i1(target("riscv.vector.tuple", <vscale x 16 x i8>, 3), ptr, <vscale x 2 x i1>, i32, i32, i32) - -define <vscale x 2 x i64> @test_vlseg3_nxv2i64_triscv.vector.tuple_nxv16i8_3t(ptr %base, i32 %vl) { +define target("riscv.vector.tuple", <vscale x 16 x i8>, 3) @test_vlseg3_nxv2i64_triscv.vector.tuple_nxv16i8_3t(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg3_nxv2i64_triscv.vector.tuple_nxv16i8_3t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, ma -; CHECK-NEXT: vlseg3e64.v v6, (a0) +; CHECK-NEXT: vlseg3e64.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 3) @llvm.riscv.vlseg3.triscv.vector.tuple_nxv16i8_3t(target("riscv.vector.tuple", <vscale x 16 x i8>, 3) undef, ptr %base, i32 %vl, i32 6) - %1 = call <vscale x 2 x i64> @llvm.riscv.tuple.extract.nxv2i64.triscv.vector.tuple_nxv16i8_3t(target("riscv.vector.tuple", <vscale x 16 x i8>, 3) %0, i32 1) - ret <vscale x 2 x i64> %1 + ret target("riscv.vector.tuple", <vscale x 16 x i8>, 3) %0 } - -define <vscale x 2 x i64> @test_vlseg3_mask_nxv2i64_triscv.vector.tuple_nxv16i8_3t(ptr %base, i32 %vl, <vscale x 2 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 16 x i8>, 3) @test_vlseg3_mask_nxv2i64_triscv.vector.tuple_nxv16i8_3t(ptr %base, i32 %vl, <vscale x 2 x i1> %mask) { ; CHECK-LABEL: test_vlseg3_mask_nxv2i64_triscv.vector.tuple_nxv16i8_3t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, ma -; CHECK-NEXT: vlseg3e64.v v6, (a0), v0.t +; CHECK-NEXT: vlseg3e64.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 3) @llvm.riscv.vlseg3.mask.triscv.vector.tuple_nxv16i8_3t.nxv2i1(target("riscv.vector.tuple", <vscale x 16 x i8>, 3) undef, ptr %base, <vscale x 2 x i1> %mask, i32 %vl, i32 1, i32 6) - %1 = call <vscale x 2 x i64> @llvm.riscv.tuple.extract.nxv2i64.triscv.vector.tuple_nxv16i8_3t(target("riscv.vector.tuple", <vscale x 16 x i8>, 3) %0, i32 1) - ret <vscale x 2 x i64> %1 + ret target("riscv.vector.tuple", <vscale x 16 x i8>, 3) %0 } - -declare target("riscv.vector.tuple", <vscale x 8 x i8>, 4) @llvm.riscv.vlseg4.mask.triscv.vector.tuple_nxv8i8_4t.nxv1i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 4), ptr, <vscale x 1 x i1>, i32, i32, i32) - -define <vscale x 1 x i64> @test_vlseg4_nxv1i64_triscv.vector.tuple_nxv8i8_4t(ptr %base, i32 %vl) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 4) @test_vlseg4_nxv1i64_triscv.vector.tuple_nxv8i8_4t(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg4_nxv1i64_triscv.vector.tuple_nxv8i8_4t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, ma -; CHECK-NEXT: vlseg4e64.v v7, (a0) +; CHECK-NEXT: vlseg4e64.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 4) @llvm.riscv.vlseg4.triscv.vector.tuple_nxv8i8_4t(target("riscv.vector.tuple", <vscale x 8 x i8>, 4) undef, ptr %base, i32 %vl, i32 6) - %1 = call <vscale x 1 x i64> @llvm.riscv.tuple.extract.nxv1i64.triscv.vector.tuple_nxv8i8_4t(target("riscv.vector.tuple", <vscale x 8 x i8>, 4) %0, i32 1) - ret <vscale x 1 x i64> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 4) %0 } - -define <vscale x 1 x i64> @test_vlseg4_mask_nxv1i64_triscv.vector.tuple_nxv8i8_4t(ptr %base, i32 %vl, <vscale x 1 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 4) @test_vlseg4_mask_nxv1i64_triscv.vector.tuple_nxv8i8_4t(ptr %base, i32 %vl, <vscale x 1 x i1> %mask) { ; CHECK-LABEL: test_vlseg4_mask_nxv1i64_triscv.vector.tuple_nxv8i8_4t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, ma -; CHECK-NEXT: vlseg4e64.v v7, (a0), v0.t +; CHECK-NEXT: vlseg4e64.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 4) @llvm.riscv.vlseg4.mask.triscv.vector.tuple_nxv8i8_4t.nxv1i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 4) undef, ptr %base, <vscale x 1 x i1> %mask, i32 %vl, i32 1, i32 6) - %1 = call <vscale x 1 x i64> @llvm.riscv.tuple.extract.nxv1i64.triscv.vector.tuple_nxv8i8_4t(target("riscv.vector.tuple", <vscale x 8 x i8>, 4) %0, i32 1) - ret <vscale x 1 x i64> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 4) %0 } - -declare target("riscv.vector.tuple", <vscale x 16 x i8>, 4) @llvm.riscv.vlseg4.mask.triscv.vector.tuple_nxv16i8_4t.nxv2i1(target("riscv.vector.tuple", <vscale x 16 x i8>, 4), ptr, <vscale x 2 x i1>, i32, i32, i32) - -define <vscale x 2 x i64> @test_vlseg4_nxv2i64_triscv.vector.tuple_nxv16i8_4t(ptr %base, i32 %vl) { +define target("riscv.vector.tuple", <vscale x 16 x i8>, 4) @test_vlseg4_nxv2i64_triscv.vector.tuple_nxv16i8_4t(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg4_nxv2i64_triscv.vector.tuple_nxv16i8_4t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, ma -; CHECK-NEXT: vlseg4e64.v v6, (a0) +; CHECK-NEXT: vlseg4e64.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 4) @llvm.riscv.vlseg4.triscv.vector.tuple_nxv16i8_4t(target("riscv.vector.tuple", <vscale x 16 x i8>, 4) undef, ptr %base, i32 %vl, i32 6) - %1 = call <vscale x 2 x i64> @llvm.riscv.tuple.extract.nxv2i64.triscv.vector.tuple_nxv16i8_4t(target("riscv.vector.tuple", <vscale x 16 x i8>, 4) %0, i32 1) - ret <vscale x 2 x i64> %1 + ret target("riscv.vector.tuple", <vscale x 16 x i8>, 4) %0 } - -define <vscale x 2 x i64> @test_vlseg4_mask_nxv2i64_triscv.vector.tuple_nxv16i8_4t(ptr %base, i32 %vl, <vscale x 2 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 16 x i8>, 4) @test_vlseg4_mask_nxv2i64_triscv.vector.tuple_nxv16i8_4t(ptr %base, i32 %vl, <vscale x 2 x i1> %mask) { ; CHECK-LABEL: test_vlseg4_mask_nxv2i64_triscv.vector.tuple_nxv16i8_4t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, ma -; CHECK-NEXT: vlseg4e64.v v6, (a0), v0.t +; CHECK-NEXT: vlseg4e64.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 4) @llvm.riscv.vlseg4.mask.triscv.vector.tuple_nxv16i8_4t.nxv2i1(target("riscv.vector.tuple", <vscale x 16 x i8>, 4) undef, ptr %base, <vscale x 2 x i1> %mask, i32 %vl, i32 1, i32 6) - %1 = call <vscale x 2 x i64> @llvm.riscv.tuple.extract.nxv2i64.triscv.vector.tuple_nxv16i8_4t(target("riscv.vector.tuple", <vscale x 16 x i8>, 4) %0, i32 1) - ret <vscale x 2 x i64> %1 + ret target("riscv.vector.tuple", <vscale x 16 x i8>, 4) %0 } - -declare target("riscv.vector.tuple", <vscale x 8 x i8>, 5) @llvm.riscv.vlseg5.mask.triscv.vector.tuple_nxv8i8_5t.nxv1i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 5), ptr, <vscale x 1 x i1>, i32, i32, i32) - -define <vscale x 1 x i64> @test_vlseg5_nxv1i64_triscv.vector.tuple_nxv8i8_5t(ptr %base, i32 %vl) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 5) @test_vlseg5_nxv1i64_triscv.vector.tuple_nxv8i8_5t(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg5_nxv1i64_triscv.vector.tuple_nxv8i8_5t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, ma -; CHECK-NEXT: vlseg5e64.v v7, (a0) +; CHECK-NEXT: vlseg5e64.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 5) @llvm.riscv.vlseg5.triscv.vector.tuple_nxv8i8_5t(target("riscv.vector.tuple", <vscale x 8 x i8>, 5) undef, ptr %base, i32 %vl, i32 6) - %1 = call <vscale x 1 x i64> @llvm.riscv.tuple.extract.nxv1i64.triscv.vector.tuple_nxv8i8_5t(target("riscv.vector.tuple", <vscale x 8 x i8>, 5) %0, i32 1) - ret <vscale x 1 x i64> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 5) %0 } - -define <vscale x 1 x i64> @test_vlseg5_mask_nxv1i64_triscv.vector.tuple_nxv8i8_5t(ptr %base, i32 %vl, <vscale x 1 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 5) @test_vlseg5_mask_nxv1i64_triscv.vector.tuple_nxv8i8_5t(ptr %base, i32 %vl, <vscale x 1 x i1> %mask) { ; CHECK-LABEL: test_vlseg5_mask_nxv1i64_triscv.vector.tuple_nxv8i8_5t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, ma -; CHECK-NEXT: vlseg5e64.v v7, (a0), v0.t +; CHECK-NEXT: vlseg5e64.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 5) @llvm.riscv.vlseg5.mask.triscv.vector.tuple_nxv8i8_5t.nxv1i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 5) undef, ptr %base, <vscale x 1 x i1> %mask, i32 %vl, i32 1, i32 6) - %1 = call <vscale x 1 x i64> @llvm.riscv.tuple.extract.nxv1i64.triscv.vector.tuple_nxv8i8_5t(target("riscv.vector.tuple", <vscale x 8 x i8>, 5) %0, i32 1) - ret <vscale x 1 x i64> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 5) %0 } - -declare target("riscv.vector.tuple", <vscale x 8 x i8>, 6) @llvm.riscv.vlseg6.mask.triscv.vector.tuple_nxv8i8_6t.nxv1i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 6), ptr, <vscale x 1 x i1>, i32, i32, i32) - -define <vscale x 1 x i64> @test_vlseg6_nxv1i64_triscv.vector.tuple_nxv8i8_6t(ptr %base, i32 %vl) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 6) @test_vlseg6_nxv1i64_triscv.vector.tuple_nxv8i8_6t(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg6_nxv1i64_triscv.vector.tuple_nxv8i8_6t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, ma -; CHECK-NEXT: vlseg6e64.v v7, (a0) +; CHECK-NEXT: vlseg6e64.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 6) @llvm.riscv.vlseg6.triscv.vector.tuple_nxv8i8_6t(target("riscv.vector.tuple", <vscale x 8 x i8>, 6) undef, ptr %base, i32 %vl, i32 6) - %1 = call <vscale x 1 x i64> @llvm.riscv.tuple.extract.nxv1i64.triscv.vector.tuple_nxv8i8_6t(target("riscv.vector.tuple", <vscale x 8 x i8>, 6) %0, i32 1) - ret <vscale x 1 x i64> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 6) %0 } - -define <vscale x 1 x i64> @test_vlseg6_mask_nxv1i64_triscv.vector.tuple_nxv8i8_6t(ptr %base, i32 %vl, <vscale x 1 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 6) @test_vlseg6_mask_nxv1i64_triscv.vector.tuple_nxv8i8_6t(ptr %base, i32 %vl, <vscale x 1 x i1> %mask) { ; CHECK-LABEL: test_vlseg6_mask_nxv1i64_triscv.vector.tuple_nxv8i8_6t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, ma -; CHECK-NEXT: vlseg6e64.v v7, (a0), v0.t +; CHECK-NEXT: vlseg6e64.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 6) @llvm.riscv.vlseg6.mask.triscv.vector.tuple_nxv8i8_6t.nxv1i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 6) undef, ptr %base, <vscale x 1 x i1> %mask, i32 %vl, i32 1, i32 6) - %1 = call <vscale x 1 x i64> @llvm.riscv.tuple.extract.nxv1i64.triscv.vector.tuple_nxv8i8_6t(target("riscv.vector.tuple", <vscale x 8 x i8>, 6) %0, i32 1) - ret <vscale x 1 x i64> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 6) %0 } - -declare target("riscv.vector.tuple", <vscale x 8 x i8>, 7) @llvm.riscv.vlseg7.mask.triscv.vector.tuple_nxv8i8_7t.nxv1i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 7), ptr, <vscale x 1 x i1>, i32, i32, i32) - -define <vscale x 1 x i64> @test_vlseg7_nxv1i64_triscv.vector.tuple_nxv8i8_7t(ptr %base, i32 %vl) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 7) @test_vlseg7_nxv1i64_triscv.vector.tuple_nxv8i8_7t(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg7_nxv1i64_triscv.vector.tuple_nxv8i8_7t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, ma -; CHECK-NEXT: vlseg7e64.v v7, (a0) +; CHECK-NEXT: vlseg7e64.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 7) @llvm.riscv.vlseg7.triscv.vector.tuple_nxv8i8_7t(target("riscv.vector.tuple", <vscale x 8 x i8>, 7) undef, ptr %base, i32 %vl, i32 6) - %1 = call <vscale x 1 x i64> @llvm.riscv.tuple.extract.nxv1i64.triscv.vector.tuple_nxv8i8_7t(target("riscv.vector.tuple", <vscale x 8 x i8>, 7) %0, i32 1) - ret <vscale x 1 x i64> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 7) %0 } - -define <vscale x 1 x i64> @test_vlseg7_mask_nxv1i64_triscv.vector.tuple_nxv8i8_7t(ptr %base, i32 %vl, <vscale x 1 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 7) @test_vlseg7_mask_nxv1i64_triscv.vector.tuple_nxv8i8_7t(ptr %base, i32 %vl, <vscale x 1 x i1> %mask) { ; CHECK-LABEL: test_vlseg7_mask_nxv1i64_triscv.vector.tuple_nxv8i8_7t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, ma -; CHECK-NEXT: vlseg7e64.v v7, (a0), v0.t +; CHECK-NEXT: vlseg7e64.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 7) @llvm.riscv.vlseg7.mask.triscv.vector.tuple_nxv8i8_7t.nxv1i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 7) undef, ptr %base, <vscale x 1 x i1> %mask, i32 %vl, i32 1, i32 6) - %1 = call <vscale x 1 x i64> @llvm.riscv.tuple.extract.nxv1i64.triscv.vector.tuple_nxv8i8_7t(target("riscv.vector.tuple", <vscale x 8 x i8>, 7) %0, i32 1) - ret <vscale x 1 x i64> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 7) %0 } - -declare target("riscv.vector.tuple", <vscale x 8 x i8>, 8) @llvm.riscv.vlseg8.mask.triscv.vector.tuple_nxv8i8_8t.nxv1i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 8), ptr, <vscale x 1 x i1>, i32, i32, i32) - -define <vscale x 1 x i64> @test_vlseg8_nxv1i64_triscv.vector.tuple_nxv8i8_8t(ptr %base, i32 %vl) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 8) @test_vlseg8_nxv1i64_triscv.vector.tuple_nxv8i8_8t(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg8_nxv1i64_triscv.vector.tuple_nxv8i8_8t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, ma -; CHECK-NEXT: vlseg8e64.v v7, (a0) +; CHECK-NEXT: vlseg8e64.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 8) @llvm.riscv.vlseg8.triscv.vector.tuple_nxv8i8_8t(target("riscv.vector.tuple", <vscale x 8 x i8>, 8) undef, ptr %base, i32 %vl, i32 6) - %1 = call <vscale x 1 x i64> @llvm.riscv.tuple.extract.nxv1i64.triscv.vector.tuple_nxv8i8_8t(target("riscv.vector.tuple", <vscale x 8 x i8>, 8) %0, i32 1) - ret <vscale x 1 x i64> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 8) %0 } - -define <vscale x 1 x i64> @test_vlseg8_mask_nxv1i64_triscv.vector.tuple_nxv8i8_8t(ptr %base, i32 %vl, <vscale x 1 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 8) @test_vlseg8_mask_nxv1i64_triscv.vector.tuple_nxv8i8_8t(ptr %base, i32 %vl, <vscale x 1 x i1> %mask) { ; CHECK-LABEL: test_vlseg8_mask_nxv1i64_triscv.vector.tuple_nxv8i8_8t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, ma -; CHECK-NEXT: vlseg8e64.v v7, (a0), v0.t +; CHECK-NEXT: vlseg8e64.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 8) @llvm.riscv.vlseg8.mask.triscv.vector.tuple_nxv8i8_8t.nxv1i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 8) undef, ptr %base, <vscale x 1 x i1> %mask, i32 %vl, i32 1, i32 6) - %1 = call <vscale x 1 x i64> @llvm.riscv.tuple.extract.nxv1i64.triscv.vector.tuple_nxv8i8_8t(target("riscv.vector.tuple", <vscale x 8 x i8>, 8) %0, i32 1) - ret <vscale x 1 x i64> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 8) %0 } - - -define <vscale x 1 x half> @test_vlseg2_nxv1f16_triscv.vector.tuple_nxv2i8_2t(ptr %base, i32 %vl) { +define target("riscv.vector.tuple", <vscale x 2 x i8>, 2) @test_vlseg2_nxv1f16_triscv.vector.tuple_nxv2i8_2t(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg2_nxv1f16_triscv.vector.tuple_nxv2i8_2t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma -; CHECK-NEXT: vlseg2e16.v v7, (a0) +; CHECK-NEXT: vlseg2e16.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 2 x i8>, 2) @llvm.riscv.vlseg2.triscv.vector.tuple_nxv2i8_2t(target("riscv.vector.tuple", <vscale x 2 x i8>, 2) undef, ptr %base, i32 %vl, i32 4) - %1 = call <vscale x 1 x half> @llvm.riscv.tuple.extract.nxv1f16.triscv.vector.tuple_nxv2i8_2t(target("riscv.vector.tuple", <vscale x 2 x i8>, 2) %0, i32 1) - ret <vscale x 1 x half> %1 + ret target("riscv.vector.tuple", <vscale x 2 x i8>, 2) %0 } - -define <vscale x 1 x half> @test_vlseg2_mask_nxv1f16_triscv.vector.tuple_nxv2i8_2t(ptr %base, i32 %vl, <vscale x 1 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 2 x i8>, 2) @test_vlseg2_mask_nxv1f16_triscv.vector.tuple_nxv2i8_2t(ptr %base, i32 %vl, <vscale x 1 x i1> %mask) { ; CHECK-LABEL: test_vlseg2_mask_nxv1f16_triscv.vector.tuple_nxv2i8_2t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma -; CHECK-NEXT: vlseg2e16.v v7, (a0), v0.t +; CHECK-NEXT: vlseg2e16.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 2 x i8>, 2) @llvm.riscv.vlseg2.mask.triscv.vector.tuple_nxv2i8_2t.nxv1i1(target("riscv.vector.tuple", <vscale x 2 x i8>, 2) undef, ptr %base, <vscale x 1 x i1> %mask, i32 %vl, i32 1, i32 4) - %1 = call <vscale x 1 x half> @llvm.riscv.tuple.extract.nxv1f16.triscv.vector.tuple_nxv2i8_2t(target("riscv.vector.tuple", <vscale x 2 x i8>, 2) %0, i32 1) - ret <vscale x 1 x half> %1 + ret target("riscv.vector.tuple", <vscale x 2 x i8>, 2) %0 } - - -define <vscale x 2 x half> @test_vlseg2_nxv2f16_triscv.vector.tuple_nxv4i8_2t(ptr %base, i32 %vl) { +define target("riscv.vector.tuple", <vscale x 4 x i8>, 2) @test_vlseg2_nxv2f16_triscv.vector.tuple_nxv4i8_2t(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg2_nxv2f16_triscv.vector.tuple_nxv4i8_2t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma -; CHECK-NEXT: vlseg2e16.v v7, (a0) +; CHECK-NEXT: vlseg2e16.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 2) @llvm.riscv.vlseg2.triscv.vector.tuple_nxv4i8_2t(target("riscv.vector.tuple", <vscale x 4 x i8>, 2) undef, ptr %base, i32 %vl, i32 4) - %1 = call <vscale x 2 x half> @llvm.riscv.tuple.extract.nxv2f16.triscv.vector.tuple_nxv4i8_2t(target("riscv.vector.tuple", <vscale x 4 x i8>, 2) %0, i32 1) - ret <vscale x 2 x half> %1 + ret target("riscv.vector.tuple", <vscale x 4 x i8>, 2) %0 } - -define <vscale x 2 x half> @test_vlseg2_mask_nxv2f16_triscv.vector.tuple_nxv4i8_2t(ptr %base, i32 %vl, <vscale x 2 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 4 x i8>, 2) @test_vlseg2_mask_nxv2f16_triscv.vector.tuple_nxv4i8_2t(ptr %base, i32 %vl, <vscale x 2 x i1> %mask) { ; CHECK-LABEL: test_vlseg2_mask_nxv2f16_triscv.vector.tuple_nxv4i8_2t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma -; CHECK-NEXT: vlseg2e16.v v7, (a0), v0.t +; CHECK-NEXT: vlseg2e16.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 2) @llvm.riscv.vlseg2.mask.triscv.vector.tuple_nxv4i8_2t.nxv2i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 2) undef, ptr %base, <vscale x 2 x i1> %mask, i32 %vl, i32 1, i32 4) - %1 = call <vscale x 2 x half> @llvm.riscv.tuple.extract.nxv2f16.triscv.vector.tuple_nxv4i8_2t(target("riscv.vector.tuple", <vscale x 4 x i8>, 2) %0, i32 1) - ret <vscale x 2 x half> %1 + ret target("riscv.vector.tuple", <vscale x 4 x i8>, 2) %0 } - - -define <vscale x 4 x half> @test_vlseg2_nxv4f16_triscv.vector.tuple_nxv8i8_2t(ptr %base, i32 %vl) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 2) @test_vlseg2_nxv4f16_triscv.vector.tuple_nxv8i8_2t(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg2_nxv4f16_triscv.vector.tuple_nxv8i8_2t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma -; CHECK-NEXT: vlseg2e16.v v7, (a0) +; CHECK-NEXT: vlseg2e16.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 2) @llvm.riscv.vlseg2.triscv.vector.tuple_nxv8i8_2t(target("riscv.vector.tuple", <vscale x 8 x i8>, 2) undef, ptr %base, i32 %vl, i32 4) - %1 = call <vscale x 4 x half> @llvm.riscv.tuple.extract.nxv4f16.triscv.vector.tuple_nxv8i8_2t(target("riscv.vector.tuple", <vscale x 8 x i8>, 2) %0, i32 1) - ret <vscale x 4 x half> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 2) %0 } - -define <vscale x 4 x half> @test_vlseg2_mask_nxv4f16_triscv.vector.tuple_nxv8i8_2t(ptr %base, i32 %vl, <vscale x 4 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 2) @test_vlseg2_mask_nxv4f16_triscv.vector.tuple_nxv8i8_2t(ptr %base, i32 %vl, <vscale x 4 x i1> %mask) { ; CHECK-LABEL: test_vlseg2_mask_nxv4f16_triscv.vector.tuple_nxv8i8_2t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma -; CHECK-NEXT: vlseg2e16.v v7, (a0), v0.t +; CHECK-NEXT: vlseg2e16.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 2) @llvm.riscv.vlseg2.mask.triscv.vector.tuple_nxv8i8_2t.nxv4i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 2) undef, ptr %base, <vscale x 4 x i1> %mask, i32 %vl, i32 1, i32 4) - %1 = call <vscale x 4 x half> @llvm.riscv.tuple.extract.nxv4f16.triscv.vector.tuple_nxv8i8_2t(target("riscv.vector.tuple", <vscale x 8 x i8>, 2) %0, i32 1) - ret <vscale x 4 x half> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 2) %0 } - - -define <vscale x 8 x half> @test_vlseg2_nxv8f16_triscv.vector.tuple_nxv16i8_2t(ptr %base, i32 %vl) { +define target("riscv.vector.tuple", <vscale x 16 x i8>, 2) @test_vlseg2_nxv8f16_triscv.vector.tuple_nxv16i8_2t(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg2_nxv8f16_triscv.vector.tuple_nxv16i8_2t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, ma -; CHECK-NEXT: vlseg2e16.v v6, (a0) +; CHECK-NEXT: vlseg2e16.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 2) @llvm.riscv.vlseg2.triscv.vector.tuple_nxv16i8_2t(target("riscv.vector.tuple", <vscale x 16 x i8>, 2) undef, ptr %base, i32 %vl, i32 4) - %1 = call <vscale x 8 x half> @llvm.riscv.tuple.extract.nxv8f16.triscv.vector.tuple_nxv16i8_2t(target("riscv.vector.tuple", <vscale x 16 x i8>, 2) %0, i32 1) - ret <vscale x 8 x half> %1 + ret target("riscv.vector.tuple", <vscale x 16 x i8>, 2) %0 } - -define <vscale x 8 x half> @test_vlseg2_mask_nxv8f16_triscv.vector.tuple_nxv16i8_2t(ptr %base, i32 %vl, <vscale x 8 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 16 x i8>, 2) @test_vlseg2_mask_nxv8f16_triscv.vector.tuple_nxv16i8_2t(ptr %base, i32 %vl, <vscale x 8 x i1> %mask) { ; CHECK-LABEL: test_vlseg2_mask_nxv8f16_triscv.vector.tuple_nxv16i8_2t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, ma -; CHECK-NEXT: vlseg2e16.v v6, (a0), v0.t +; CHECK-NEXT: vlseg2e16.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 2) @llvm.riscv.vlseg2.mask.triscv.vector.tuple_nxv16i8_2t.nxv8i1(target("riscv.vector.tuple", <vscale x 16 x i8>, 2) undef, ptr %base, <vscale x 8 x i1> %mask, i32 %vl, i32 1, i32 4) - %1 = call <vscale x 8 x half> @llvm.riscv.tuple.extract.nxv8f16.triscv.vector.tuple_nxv16i8_2t(target("riscv.vector.tuple", <vscale x 16 x i8>, 2) %0, i32 1) - ret <vscale x 8 x half> %1 + ret target("riscv.vector.tuple", <vscale x 16 x i8>, 2) %0 } - - -define <vscale x 16 x half> @test_vlseg2_nxv16f16_triscv.vector.tuple_nxv32i8_2t(ptr %base, i32 %vl) { +define target("riscv.vector.tuple", <vscale x 32 x i8>, 2) @test_vlseg2_nxv16f16_triscv.vector.tuple_nxv32i8_2t(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg2_nxv16f16_triscv.vector.tuple_nxv32i8_2t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma -; CHECK-NEXT: vlseg2e16.v v4, (a0) +; CHECK-NEXT: vlseg2e16.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 32 x i8>, 2) @llvm.riscv.vlseg2.triscv.vector.tuple_nxv32i8_2t(target("riscv.vector.tuple", <vscale x 32 x i8>, 2) undef, ptr %base, i32 %vl, i32 4) - %1 = call <vscale x 16 x half> @llvm.riscv.tuple.extract.nxv16f16.triscv.vector.tuple_nxv32i8_2t(target("riscv.vector.tuple", <vscale x 32 x i8>, 2) %0, i32 1) - ret <vscale x 16 x half> %1 + ret target("riscv.vector.tuple", <vscale x 32 x i8>, 2) %0 } - -define <vscale x 16 x half> @test_vlseg2_mask_nxv16f16_triscv.vector.tuple_nxv32i8_2t(ptr %base, i32 %vl, <vscale x 16 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 32 x i8>, 2) @test_vlseg2_mask_nxv16f16_triscv.vector.tuple_nxv32i8_2t(ptr %base, i32 %vl, <vscale x 16 x i1> %mask) { ; CHECK-LABEL: test_vlseg2_mask_nxv16f16_triscv.vector.tuple_nxv32i8_2t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma -; CHECK-NEXT: vlseg2e16.v v4, (a0), v0.t +; CHECK-NEXT: vlseg2e16.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 32 x i8>, 2) @llvm.riscv.vlseg2.mask.triscv.vector.tuple_nxv32i8_2t.nxv16i1(target("riscv.vector.tuple", <vscale x 32 x i8>, 2) undef, ptr %base, <vscale x 16 x i1> %mask, i32 %vl, i32 1, i32 4) - %1 = call <vscale x 16 x half> @llvm.riscv.tuple.extract.nxv16f16.triscv.vector.tuple_nxv32i8_2t(target("riscv.vector.tuple", <vscale x 32 x i8>, 2) %0, i32 1) - ret <vscale x 16 x half> %1 + ret target("riscv.vector.tuple", <vscale x 32 x i8>, 2) %0 } - - -define <vscale x 1 x half> @test_vlseg3_nxv1f16_triscv.vector.tuple_nxv2i8_3t(ptr %base, i32 %vl) { +define target("riscv.vector.tuple", <vscale x 2 x i8>, 3) @test_vlseg3_nxv1f16_triscv.vector.tuple_nxv2i8_3t(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg3_nxv1f16_triscv.vector.tuple_nxv2i8_3t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma -; CHECK-NEXT: vlseg3e16.v v7, (a0) +; CHECK-NEXT: vlseg3e16.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 2 x i8>, 3) @llvm.riscv.vlseg3.triscv.vector.tuple_nxv2i8_3t(target("riscv.vector.tuple", <vscale x 2 x i8>, 3) undef, ptr %base, i32 %vl, i32 4) - %1 = call <vscale x 1 x half> @llvm.riscv.tuple.extract.nxv1f16.triscv.vector.tuple_nxv2i8_3t(target("riscv.vector.tuple", <vscale x 2 x i8>, 3) %0, i32 1) - ret <vscale x 1 x half> %1 + ret target("riscv.vector.tuple", <vscale x 2 x i8>, 3) %0 } - -define <vscale x 1 x half> @test_vlseg3_mask_nxv1f16_triscv.vector.tuple_nxv2i8_3t(ptr %base, i32 %vl, <vscale x 1 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 2 x i8>, 3) @test_vlseg3_mask_nxv1f16_triscv.vector.tuple_nxv2i8_3t(ptr %base, i32 %vl, <vscale x 1 x i1> %mask) { ; CHECK-LABEL: test_vlseg3_mask_nxv1f16_triscv.vector.tuple_nxv2i8_3t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma -; CHECK-NEXT: vlseg3e16.v v7, (a0), v0.t +; CHECK-NEXT: vlseg3e16.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 2 x i8>, 3) @llvm.riscv.vlseg3.mask.triscv.vector.tuple_nxv2i8_3t.nxv1i1(target("riscv.vector.tuple", <vscale x 2 x i8>, 3) undef, ptr %base, <vscale x 1 x i1> %mask, i32 %vl, i32 1, i32 4) - %1 = call <vscale x 1 x half> @llvm.riscv.tuple.extract.nxv1f16.triscv.vector.tuple_nxv2i8_3t(target("riscv.vector.tuple", <vscale x 2 x i8>, 3) %0, i32 1) - ret <vscale x 1 x half> %1 + ret target("riscv.vector.tuple", <vscale x 2 x i8>, 3) %0 } - - -define <vscale x 2 x half> @test_vlseg3_nxv2f16_triscv.vector.tuple_nxv4i8_3t(ptr %base, i32 %vl) { +define target("riscv.vector.tuple", <vscale x 4 x i8>, 3) @test_vlseg3_nxv2f16_triscv.vector.tuple_nxv4i8_3t(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg3_nxv2f16_triscv.vector.tuple_nxv4i8_3t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma -; CHECK-NEXT: vlseg3e16.v v7, (a0) +; CHECK-NEXT: vlseg3e16.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 3) @llvm.riscv.vlseg3.triscv.vector.tuple_nxv4i8_3t(target("riscv.vector.tuple", <vscale x 4 x i8>, 3) undef, ptr %base, i32 %vl, i32 4) - %1 = call <vscale x 2 x half> @llvm.riscv.tuple.extract.nxv2f16.triscv.vector.tuple_nxv4i8_3t(target("riscv.vector.tuple", <vscale x 4 x i8>, 3) %0, i32 1) - ret <vscale x 2 x half> %1 + ret target("riscv.vector.tuple", <vscale x 4 x i8>, 3) %0 } - -define <vscale x 2 x half> @test_vlseg3_mask_nxv2f16_triscv.vector.tuple_nxv4i8_3t(ptr %base, i32 %vl, <vscale x 2 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 4 x i8>, 3) @test_vlseg3_mask_nxv2f16_triscv.vector.tuple_nxv4i8_3t(ptr %base, i32 %vl, <vscale x 2 x i1> %mask) { ; CHECK-LABEL: test_vlseg3_mask_nxv2f16_triscv.vector.tuple_nxv4i8_3t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma -; CHECK-NEXT: vlseg3e16.v v7, (a0), v0.t +; CHECK-NEXT: vlseg3e16.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 3) @llvm.riscv.vlseg3.mask.triscv.vector.tuple_nxv4i8_3t.nxv2i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 3) undef, ptr %base, <vscale x 2 x i1> %mask, i32 %vl, i32 1, i32 4) - %1 = call <vscale x 2 x half> @llvm.riscv.tuple.extract.nxv2f16.triscv.vector.tuple_nxv4i8_3t(target("riscv.vector.tuple", <vscale x 4 x i8>, 3) %0, i32 1) - ret <vscale x 2 x half> %1 + ret target("riscv.vector.tuple", <vscale x 4 x i8>, 3) %0 } - - -define <vscale x 4 x half> @test_vlseg3_nxv4f16_triscv.vector.tuple_nxv8i8_3t(ptr %base, i32 %vl) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 3) @test_vlseg3_nxv4f16_triscv.vector.tuple_nxv8i8_3t(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg3_nxv4f16_triscv.vector.tuple_nxv8i8_3t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma -; CHECK-NEXT: vlseg3e16.v v7, (a0) +; CHECK-NEXT: vlseg3e16.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 3) @llvm.riscv.vlseg3.triscv.vector.tuple_nxv8i8_3t(target("riscv.vector.tuple", <vscale x 8 x i8>, 3) undef, ptr %base, i32 %vl, i32 4) - %1 = call <vscale x 4 x half> @llvm.riscv.tuple.extract.nxv4f16.triscv.vector.tuple_nxv8i8_3t(target("riscv.vector.tuple", <vscale x 8 x i8>, 3) %0, i32 1) - ret <vscale x 4 x half> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 3) %0 } - -define <vscale x 4 x half> @test_vlseg3_mask_nxv4f16_triscv.vector.tuple_nxv8i8_3t(ptr %base, i32 %vl, <vscale x 4 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 3) @test_vlseg3_mask_nxv4f16_triscv.vector.tuple_nxv8i8_3t(ptr %base, i32 %vl, <vscale x 4 x i1> %mask) { ; CHECK-LABEL: test_vlseg3_mask_nxv4f16_triscv.vector.tuple_nxv8i8_3t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma -; CHECK-NEXT: vlseg3e16.v v7, (a0), v0.t +; CHECK-NEXT: vlseg3e16.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 3) @llvm.riscv.vlseg3.mask.triscv.vector.tuple_nxv8i8_3t.nxv4i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 3) undef, ptr %base, <vscale x 4 x i1> %mask, i32 %vl, i32 1, i32 4) - %1 = call <vscale x 4 x half> @llvm.riscv.tuple.extract.nxv4f16.triscv.vector.tuple_nxv8i8_3t(target("riscv.vector.tuple", <vscale x 8 x i8>, 3) %0, i32 1) - ret <vscale x 4 x half> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 3) %0 } - - -define <vscale x 8 x half> @test_vlseg3_nxv8f16_triscv.vector.tuple_nxv16i8_3t(ptr %base, i32 %vl) { +define target("riscv.vector.tuple", <vscale x 16 x i8>, 3) @test_vlseg3_nxv8f16_triscv.vector.tuple_nxv16i8_3t(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg3_nxv8f16_triscv.vector.tuple_nxv16i8_3t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, ma -; CHECK-NEXT: vlseg3e16.v v6, (a0) +; CHECK-NEXT: vlseg3e16.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 3) @llvm.riscv.vlseg3.triscv.vector.tuple_nxv16i8_3t(target("riscv.vector.tuple", <vscale x 16 x i8>, 3) undef, ptr %base, i32 %vl, i32 4) - %1 = call <vscale x 8 x half> @llvm.riscv.tuple.extract.nxv8f16.triscv.vector.tuple_nxv16i8_3t(target("riscv.vector.tuple", <vscale x 16 x i8>, 3) %0, i32 1) - ret <vscale x 8 x half> %1 + ret target("riscv.vector.tuple", <vscale x 16 x i8>, 3) %0 } - -define <vscale x 8 x half> @test_vlseg3_mask_nxv8f16_triscv.vector.tuple_nxv16i8_3t(ptr %base, i32 %vl, <vscale x 8 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 16 x i8>, 3) @test_vlseg3_mask_nxv8f16_triscv.vector.tuple_nxv16i8_3t(ptr %base, i32 %vl, <vscale x 8 x i1> %mask) { ; CHECK-LABEL: test_vlseg3_mask_nxv8f16_triscv.vector.tuple_nxv16i8_3t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, ma -; CHECK-NEXT: vlseg3e16.v v6, (a0), v0.t +; CHECK-NEXT: vlseg3e16.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 3) @llvm.riscv.vlseg3.mask.triscv.vector.tuple_nxv16i8_3t.nxv8i1(target("riscv.vector.tuple", <vscale x 16 x i8>, 3) undef, ptr %base, <vscale x 8 x i1> %mask, i32 %vl, i32 1, i32 4) - %1 = call <vscale x 8 x half> @llvm.riscv.tuple.extract.nxv8f16.triscv.vector.tuple_nxv16i8_3t(target("riscv.vector.tuple", <vscale x 16 x i8>, 3) %0, i32 1) - ret <vscale x 8 x half> %1 + ret target("riscv.vector.tuple", <vscale x 16 x i8>, 3) %0 } - - -define <vscale x 1 x half> @test_vlseg4_nxv1f16_triscv.vector.tuple_nxv2i8_4t(ptr %base, i32 %vl) { +define target("riscv.vector.tuple", <vscale x 2 x i8>, 4) @test_vlseg4_nxv1f16_triscv.vector.tuple_nxv2i8_4t(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg4_nxv1f16_triscv.vector.tuple_nxv2i8_4t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma -; CHECK-NEXT: vlseg4e16.v v7, (a0) +; CHECK-NEXT: vlseg4e16.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 2 x i8>, 4) @llvm.riscv.vlseg4.triscv.vector.tuple_nxv2i8_4t(target("riscv.vector.tuple", <vscale x 2 x i8>, 4) undef, ptr %base, i32 %vl, i32 4) - %1 = call <vscale x 1 x half> @llvm.riscv.tuple.extract.nxv1f16.triscv.vector.tuple_nxv2i8_4t(target("riscv.vector.tuple", <vscale x 2 x i8>, 4) %0, i32 1) - ret <vscale x 1 x half> %1 + ret target("riscv.vector.tuple", <vscale x 2 x i8>, 4) %0 } - -define <vscale x 1 x half> @test_vlseg4_mask_nxv1f16_triscv.vector.tuple_nxv2i8_4t(ptr %base, i32 %vl, <vscale x 1 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 2 x i8>, 4) @test_vlseg4_mask_nxv1f16_triscv.vector.tuple_nxv2i8_4t(ptr %base, i32 %vl, <vscale x 1 x i1> %mask) { ; CHECK-LABEL: test_vlseg4_mask_nxv1f16_triscv.vector.tuple_nxv2i8_4t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma -; CHECK-NEXT: vlseg4e16.v v7, (a0), v0.t +; CHECK-NEXT: vlseg4e16.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 2 x i8>, 4) @llvm.riscv.vlseg4.mask.triscv.vector.tuple_nxv2i8_4t.nxv1i1(target("riscv.vector.tuple", <vscale x 2 x i8>, 4) undef, ptr %base, <vscale x 1 x i1> %mask, i32 %vl, i32 1, i32 4) - %1 = call <vscale x 1 x half> @llvm.riscv.tuple.extract.nxv1f16.triscv.vector.tuple_nxv2i8_4t(target("riscv.vector.tuple", <vscale x 2 x i8>, 4) %0, i32 1) - ret <vscale x 1 x half> %1 + ret target("riscv.vector.tuple", <vscale x 2 x i8>, 4) %0 } - - -define <vscale x 2 x half> @test_vlseg4_nxv2f16_triscv.vector.tuple_nxv4i8_4t(ptr %base, i32 %vl) { +define target("riscv.vector.tuple", <vscale x 4 x i8>, 4) @test_vlseg4_nxv2f16_triscv.vector.tuple_nxv4i8_4t(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg4_nxv2f16_triscv.vector.tuple_nxv4i8_4t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma -; CHECK-NEXT: vlseg4e16.v v7, (a0) +; CHECK-NEXT: vlseg4e16.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 4) @llvm.riscv.vlseg4.triscv.vector.tuple_nxv4i8_4t(target("riscv.vector.tuple", <vscale x 4 x i8>, 4) undef, ptr %base, i32 %vl, i32 4) - %1 = call <vscale x 2 x half> @llvm.riscv.tuple.extract.nxv2f16.triscv.vector.tuple_nxv4i8_4t(target("riscv.vector.tuple", <vscale x 4 x i8>, 4) %0, i32 1) - ret <vscale x 2 x half> %1 + ret target("riscv.vector.tuple", <vscale x 4 x i8>, 4) %0 } - -define <vscale x 2 x half> @test_vlseg4_mask_nxv2f16_triscv.vector.tuple_nxv4i8_4t(ptr %base, i32 %vl, <vscale x 2 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 4 x i8>, 4) @test_vlseg4_mask_nxv2f16_triscv.vector.tuple_nxv4i8_4t(ptr %base, i32 %vl, <vscale x 2 x i1> %mask) { ; CHECK-LABEL: test_vlseg4_mask_nxv2f16_triscv.vector.tuple_nxv4i8_4t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma -; CHECK-NEXT: vlseg4e16.v v7, (a0), v0.t +; CHECK-NEXT: vlseg4e16.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 4) @llvm.riscv.vlseg4.mask.triscv.vector.tuple_nxv4i8_4t.nxv2i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 4) undef, ptr %base, <vscale x 2 x i1> %mask, i32 %vl, i32 1, i32 4) - %1 = call <vscale x 2 x half> @llvm.riscv.tuple.extract.nxv2f16.triscv.vector.tuple_nxv4i8_4t(target("riscv.vector.tuple", <vscale x 4 x i8>, 4) %0, i32 1) - ret <vscale x 2 x half> %1 + ret target("riscv.vector.tuple", <vscale x 4 x i8>, 4) %0 } - - -define <vscale x 4 x half> @test_vlseg4_nxv4f16_triscv.vector.tuple_nxv8i8_4t(ptr %base, i32 %vl) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 4) @test_vlseg4_nxv4f16_triscv.vector.tuple_nxv8i8_4t(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg4_nxv4f16_triscv.vector.tuple_nxv8i8_4t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma -; CHECK-NEXT: vlseg4e16.v v7, (a0) +; CHECK-NEXT: vlseg4e16.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 4) @llvm.riscv.vlseg4.triscv.vector.tuple_nxv8i8_4t(target("riscv.vector.tuple", <vscale x 8 x i8>, 4) undef, ptr %base, i32 %vl, i32 4) - %1 = call <vscale x 4 x half> @llvm.riscv.tuple.extract.nxv4f16.triscv.vector.tuple_nxv8i8_4t(target("riscv.vector.tuple", <vscale x 8 x i8>, 4) %0, i32 1) - ret <vscale x 4 x half> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 4) %0 } - -define <vscale x 4 x half> @test_vlseg4_mask_nxv4f16_triscv.vector.tuple_nxv8i8_4t(ptr %base, i32 %vl, <vscale x 4 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 4) @test_vlseg4_mask_nxv4f16_triscv.vector.tuple_nxv8i8_4t(ptr %base, i32 %vl, <vscale x 4 x i1> %mask) { ; CHECK-LABEL: test_vlseg4_mask_nxv4f16_triscv.vector.tuple_nxv8i8_4t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma -; CHECK-NEXT: vlseg4e16.v v7, (a0), v0.t +; CHECK-NEXT: vlseg4e16.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 4) @llvm.riscv.vlseg4.mask.triscv.vector.tuple_nxv8i8_4t.nxv4i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 4) undef, ptr %base, <vscale x 4 x i1> %mask, i32 %vl, i32 1, i32 4) - %1 = call <vscale x 4 x half> @llvm.riscv.tuple.extract.nxv4f16.triscv.vector.tuple_nxv8i8_4t(target("riscv.vector.tuple", <vscale x 8 x i8>, 4) %0, i32 1) - ret <vscale x 4 x half> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 4) %0 } - - -define <vscale x 8 x half> @test_vlseg4_nxv8f16_triscv.vector.tuple_nxv16i8_4t(ptr %base, i32 %vl) { +define target("riscv.vector.tuple", <vscale x 16 x i8>, 4) @test_vlseg4_nxv8f16_triscv.vector.tuple_nxv16i8_4t(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg4_nxv8f16_triscv.vector.tuple_nxv16i8_4t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, ma -; CHECK-NEXT: vlseg4e16.v v6, (a0) +; CHECK-NEXT: vlseg4e16.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 4) @llvm.riscv.vlseg4.triscv.vector.tuple_nxv16i8_4t(target("riscv.vector.tuple", <vscale x 16 x i8>, 4) undef, ptr %base, i32 %vl, i32 4) - %1 = call <vscale x 8 x half> @llvm.riscv.tuple.extract.nxv8f16.triscv.vector.tuple_nxv16i8_4t(target("riscv.vector.tuple", <vscale x 16 x i8>, 4) %0, i32 1) - ret <vscale x 8 x half> %1 + ret target("riscv.vector.tuple", <vscale x 16 x i8>, 4) %0 } - -define <vscale x 8 x half> @test_vlseg4_mask_nxv8f16_triscv.vector.tuple_nxv16i8_4t(ptr %base, i32 %vl, <vscale x 8 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 16 x i8>, 4) @test_vlseg4_mask_nxv8f16_triscv.vector.tuple_nxv16i8_4t(ptr %base, i32 %vl, <vscale x 8 x i1> %mask) { ; CHECK-LABEL: test_vlseg4_mask_nxv8f16_triscv.vector.tuple_nxv16i8_4t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, ma -; CHECK-NEXT: vlseg4e16.v v6, (a0), v0.t +; CHECK-NEXT: vlseg4e16.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 4) @llvm.riscv.vlseg4.mask.triscv.vector.tuple_nxv16i8_4t.nxv8i1(target("riscv.vector.tuple", <vscale x 16 x i8>, 4) undef, ptr %base, <vscale x 8 x i1> %mask, i32 %vl, i32 1, i32 4) - %1 = call <vscale x 8 x half> @llvm.riscv.tuple.extract.nxv8f16.triscv.vector.tuple_nxv16i8_4t(target("riscv.vector.tuple", <vscale x 16 x i8>, 4) %0, i32 1) - ret <vscale x 8 x half> %1 + ret target("riscv.vector.tuple", <vscale x 16 x i8>, 4) %0 } - - -define <vscale x 1 x half> @test_vlseg5_nxv1f16_triscv.vector.tuple_nxv2i8_5t(ptr %base, i32 %vl) { +define target("riscv.vector.tuple", <vscale x 2 x i8>, 5) @test_vlseg5_nxv1f16_triscv.vector.tuple_nxv2i8_5t(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg5_nxv1f16_triscv.vector.tuple_nxv2i8_5t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma -; CHECK-NEXT: vlseg5e16.v v7, (a0) +; CHECK-NEXT: vlseg5e16.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 2 x i8>, 5) @llvm.riscv.vlseg5.triscv.vector.tuple_nxv2i8_5t(target("riscv.vector.tuple", <vscale x 2 x i8>, 5) undef, ptr %base, i32 %vl, i32 4) - %1 = call <vscale x 1 x half> @llvm.riscv.tuple.extract.nxv1f16.triscv.vector.tuple_nxv2i8_5t(target("riscv.vector.tuple", <vscale x 2 x i8>, 5) %0, i32 1) - ret <vscale x 1 x half> %1 + ret target("riscv.vector.tuple", <vscale x 2 x i8>, 5) %0 } - -define <vscale x 1 x half> @test_vlseg5_mask_nxv1f16_triscv.vector.tuple_nxv2i8_5t(ptr %base, i32 %vl, <vscale x 1 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 2 x i8>, 5) @test_vlseg5_mask_nxv1f16_triscv.vector.tuple_nxv2i8_5t(ptr %base, i32 %vl, <vscale x 1 x i1> %mask) { ; CHECK-LABEL: test_vlseg5_mask_nxv1f16_triscv.vector.tuple_nxv2i8_5t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma -; CHECK-NEXT: vlseg5e16.v v7, (a0), v0.t +; CHECK-NEXT: vlseg5e16.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 2 x i8>, 5) @llvm.riscv.vlseg5.mask.triscv.vector.tuple_nxv2i8_5t.nxv1i1(target("riscv.vector.tuple", <vscale x 2 x i8>, 5) undef, ptr %base, <vscale x 1 x i1> %mask, i32 %vl, i32 1, i32 4) - %1 = call <vscale x 1 x half> @llvm.riscv.tuple.extract.nxv1f16.triscv.vector.tuple_nxv2i8_5t(target("riscv.vector.tuple", <vscale x 2 x i8>, 5) %0, i32 1) - ret <vscale x 1 x half> %1 + ret target("riscv.vector.tuple", <vscale x 2 x i8>, 5) %0 } - - -define <vscale x 2 x half> @test_vlseg5_nxv2f16_triscv.vector.tuple_nxv4i8_5t(ptr %base, i32 %vl) { +define target("riscv.vector.tuple", <vscale x 4 x i8>, 5) @test_vlseg5_nxv2f16_triscv.vector.tuple_nxv4i8_5t(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg5_nxv2f16_triscv.vector.tuple_nxv4i8_5t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma -; CHECK-NEXT: vlseg5e16.v v7, (a0) +; CHECK-NEXT: vlseg5e16.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 5) @llvm.riscv.vlseg5.triscv.vector.tuple_nxv4i8_5t(target("riscv.vector.tuple", <vscale x 4 x i8>, 5) undef, ptr %base, i32 %vl, i32 4) - %1 = call <vscale x 2 x half> @llvm.riscv.tuple.extract.nxv2f16.triscv.vector.tuple_nxv4i8_5t(target("riscv.vector.tuple", <vscale x 4 x i8>, 5) %0, i32 1) - ret <vscale x 2 x half> %1 + ret target("riscv.vector.tuple", <vscale x 4 x i8>, 5) %0 } - -define <vscale x 2 x half> @test_vlseg5_mask_nxv2f16_triscv.vector.tuple_nxv4i8_5t(ptr %base, i32 %vl, <vscale x 2 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 4 x i8>, 5) @test_vlseg5_mask_nxv2f16_triscv.vector.tuple_nxv4i8_5t(ptr %base, i32 %vl, <vscale x 2 x i1> %mask) { ; CHECK-LABEL: test_vlseg5_mask_nxv2f16_triscv.vector.tuple_nxv4i8_5t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma -; CHECK-NEXT: vlseg5e16.v v7, (a0), v0.t +; CHECK-NEXT: vlseg5e16.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 5) @llvm.riscv.vlseg5.mask.triscv.vector.tuple_nxv4i8_5t.nxv2i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 5) undef, ptr %base, <vscale x 2 x i1> %mask, i32 %vl, i32 1, i32 4) - %1 = call <vscale x 2 x half> @llvm.riscv.tuple.extract.nxv2f16.triscv.vector.tuple_nxv4i8_5t(target("riscv.vector.tuple", <vscale x 4 x i8>, 5) %0, i32 1) - ret <vscale x 2 x half> %1 + ret target("riscv.vector.tuple", <vscale x 4 x i8>, 5) %0 } - - -define <vscale x 4 x half> @test_vlseg5_nxv4f16_triscv.vector.tuple_nxv8i8_5t(ptr %base, i32 %vl) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 5) @test_vlseg5_nxv4f16_triscv.vector.tuple_nxv8i8_5t(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg5_nxv4f16_triscv.vector.tuple_nxv8i8_5t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma -; CHECK-NEXT: vlseg5e16.v v7, (a0) +; CHECK-NEXT: vlseg5e16.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 5) @llvm.riscv.vlseg5.triscv.vector.tuple_nxv8i8_5t(target("riscv.vector.tuple", <vscale x 8 x i8>, 5) undef, ptr %base, i32 %vl, i32 4) - %1 = call <vscale x 4 x half> @llvm.riscv.tuple.extract.nxv4f16.triscv.vector.tuple_nxv8i8_5t(target("riscv.vector.tuple", <vscale x 8 x i8>, 5) %0, i32 1) - ret <vscale x 4 x half> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 5) %0 } - -define <vscale x 4 x half> @test_vlseg5_mask_nxv4f16_triscv.vector.tuple_nxv8i8_5t(ptr %base, i32 %vl, <vscale x 4 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 5) @test_vlseg5_mask_nxv4f16_triscv.vector.tuple_nxv8i8_5t(ptr %base, i32 %vl, <vscale x 4 x i1> %mask) { ; CHECK-LABEL: test_vlseg5_mask_nxv4f16_triscv.vector.tuple_nxv8i8_5t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma -; CHECK-NEXT: vlseg5e16.v v7, (a0), v0.t +; CHECK-NEXT: vlseg5e16.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 5) @llvm.riscv.vlseg5.mask.triscv.vector.tuple_nxv8i8_5t.nxv4i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 5) undef, ptr %base, <vscale x 4 x i1> %mask, i32 %vl, i32 1, i32 4) - %1 = call <vscale x 4 x half> @llvm.riscv.tuple.extract.nxv4f16.triscv.vector.tuple_nxv8i8_5t(target("riscv.vector.tuple", <vscale x 8 x i8>, 5) %0, i32 1) - ret <vscale x 4 x half> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 5) %0 } - - -define <vscale x 1 x half> @test_vlseg6_nxv1f16_triscv.vector.tuple_nxv2i8_6t(ptr %base, i32 %vl) { +define target("riscv.vector.tuple", <vscale x 2 x i8>, 6) @test_vlseg6_nxv1f16_triscv.vector.tuple_nxv2i8_6t(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg6_nxv1f16_triscv.vector.tuple_nxv2i8_6t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma -; CHECK-NEXT: vlseg6e16.v v7, (a0) +; CHECK-NEXT: vlseg6e16.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 2 x i8>, 6) @llvm.riscv.vlseg6.triscv.vector.tuple_nxv2i8_6t(target("riscv.vector.tuple", <vscale x 2 x i8>, 6) undef, ptr %base, i32 %vl, i32 4) - %1 = call <vscale x 1 x half> @llvm.riscv.tuple.extract.nxv1f16.triscv.vector.tuple_nxv2i8_6t(target("riscv.vector.tuple", <vscale x 2 x i8>, 6) %0, i32 1) - ret <vscale x 1 x half> %1 + ret target("riscv.vector.tuple", <vscale x 2 x i8>, 6) %0 } - -define <vscale x 1 x half> @test_vlseg6_mask_nxv1f16_triscv.vector.tuple_nxv2i8_6t(ptr %base, i32 %vl, <vscale x 1 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 2 x i8>, 6) @test_vlseg6_mask_nxv1f16_triscv.vector.tuple_nxv2i8_6t(ptr %base, i32 %vl, <vscale x 1 x i1> %mask) { ; CHECK-LABEL: test_vlseg6_mask_nxv1f16_triscv.vector.tuple_nxv2i8_6t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma -; CHECK-NEXT: vlseg6e16.v v7, (a0), v0.t +; CHECK-NEXT: vlseg6e16.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 2 x i8>, 6) @llvm.riscv.vlseg6.mask.triscv.vector.tuple_nxv2i8_6t.nxv1i1(target("riscv.vector.tuple", <vscale x 2 x i8>, 6) undef, ptr %base, <vscale x 1 x i1> %mask, i32 %vl, i32 1, i32 4) - %1 = call <vscale x 1 x half> @llvm.riscv.tuple.extract.nxv1f16.triscv.vector.tuple_nxv2i8_6t(target("riscv.vector.tuple", <vscale x 2 x i8>, 6) %0, i32 1) - ret <vscale x 1 x half> %1 + ret target("riscv.vector.tuple", <vscale x 2 x i8>, 6) %0 } - - -define <vscale x 2 x half> @test_vlseg6_nxv2f16_triscv.vector.tuple_nxv4i8_6t(ptr %base, i32 %vl) { +define target("riscv.vector.tuple", <vscale x 4 x i8>, 6) @test_vlseg6_nxv2f16_triscv.vector.tuple_nxv4i8_6t(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg6_nxv2f16_triscv.vector.tuple_nxv4i8_6t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma -; CHECK-NEXT: vlseg6e16.v v7, (a0) +; CHECK-NEXT: vlseg6e16.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 6) @llvm.riscv.vlseg6.triscv.vector.tuple_nxv4i8_6t(target("riscv.vector.tuple", <vscale x 4 x i8>, 6) undef, ptr %base, i32 %vl, i32 4) - %1 = call <vscale x 2 x half> @llvm.riscv.tuple.extract.nxv2f16.triscv.vector.tuple_nxv4i8_6t(target("riscv.vector.tuple", <vscale x 4 x i8>, 6) %0, i32 1) - ret <vscale x 2 x half> %1 + ret target("riscv.vector.tuple", <vscale x 4 x i8>, 6) %0 } - -define <vscale x 2 x half> @test_vlseg6_mask_nxv2f16_triscv.vector.tuple_nxv4i8_6t(ptr %base, i32 %vl, <vscale x 2 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 4 x i8>, 6) @test_vlseg6_mask_nxv2f16_triscv.vector.tuple_nxv4i8_6t(ptr %base, i32 %vl, <vscale x 2 x i1> %mask) { ; CHECK-LABEL: test_vlseg6_mask_nxv2f16_triscv.vector.tuple_nxv4i8_6t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma -; CHECK-NEXT: vlseg6e16.v v7, (a0), v0.t +; CHECK-NEXT: vlseg6e16.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 6) @llvm.riscv.vlseg6.mask.triscv.vector.tuple_nxv4i8_6t.nxv2i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 6) undef, ptr %base, <vscale x 2 x i1> %mask, i32 %vl, i32 1, i32 4) - %1 = call <vscale x 2 x half> @llvm.riscv.tuple.extract.nxv2f16.triscv.vector.tuple_nxv4i8_6t(target("riscv.vector.tuple", <vscale x 4 x i8>, 6) %0, i32 1) - ret <vscale x 2 x half> %1 + ret target("riscv.vector.tuple", <vscale x 4 x i8>, 6) %0 } - - -define <vscale x 4 x half> @test_vlseg6_nxv4f16_triscv.vector.tuple_nxv8i8_6t(ptr %base, i32 %vl) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 6) @test_vlseg6_nxv4f16_triscv.vector.tuple_nxv8i8_6t(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg6_nxv4f16_triscv.vector.tuple_nxv8i8_6t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma -; CHECK-NEXT: vlseg6e16.v v7, (a0) +; CHECK-NEXT: vlseg6e16.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 6) @llvm.riscv.vlseg6.triscv.vector.tuple_nxv8i8_6t(target("riscv.vector.tuple", <vscale x 8 x i8>, 6) undef, ptr %base, i32 %vl, i32 4) - %1 = call <vscale x 4 x half> @llvm.riscv.tuple.extract.nxv4f16.triscv.vector.tuple_nxv8i8_6t(target("riscv.vector.tuple", <vscale x 8 x i8>, 6) %0, i32 1) - ret <vscale x 4 x half> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 6) %0 } - -define <vscale x 4 x half> @test_vlseg6_mask_nxv4f16_triscv.vector.tuple_nxv8i8_6t(ptr %base, i32 %vl, <vscale x 4 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 6) @test_vlseg6_mask_nxv4f16_triscv.vector.tuple_nxv8i8_6t(ptr %base, i32 %vl, <vscale x 4 x i1> %mask) { ; CHECK-LABEL: test_vlseg6_mask_nxv4f16_triscv.vector.tuple_nxv8i8_6t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma -; CHECK-NEXT: vlseg6e16.v v7, (a0), v0.t +; CHECK-NEXT: vlseg6e16.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 6) @llvm.riscv.vlseg6.mask.triscv.vector.tuple_nxv8i8_6t.nxv4i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 6) undef, ptr %base, <vscale x 4 x i1> %mask, i32 %vl, i32 1, i32 4) - %1 = call <vscale x 4 x half> @llvm.riscv.tuple.extract.nxv4f16.triscv.vector.tuple_nxv8i8_6t(target("riscv.vector.tuple", <vscale x 8 x i8>, 6) %0, i32 1) - ret <vscale x 4 x half> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 6) %0 } - - -define <vscale x 1 x half> @test_vlseg7_nxv1f16_triscv.vector.tuple_nxv2i8_7t(ptr %base, i32 %vl) { +define target("riscv.vector.tuple", <vscale x 2 x i8>, 7) @test_vlseg7_nxv1f16_triscv.vector.tuple_nxv2i8_7t(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg7_nxv1f16_triscv.vector.tuple_nxv2i8_7t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma -; CHECK-NEXT: vlseg7e16.v v7, (a0) +; CHECK-NEXT: vlseg7e16.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 2 x i8>, 7) @llvm.riscv.vlseg7.triscv.vector.tuple_nxv2i8_7t(target("riscv.vector.tuple", <vscale x 2 x i8>, 7) undef, ptr %base, i32 %vl, i32 4) - %1 = call <vscale x 1 x half> @llvm.riscv.tuple.extract.nxv1f16.triscv.vector.tuple_nxv2i8_7t(target("riscv.vector.tuple", <vscale x 2 x i8>, 7) %0, i32 1) - ret <vscale x 1 x half> %1 + ret target("riscv.vector.tuple", <vscale x 2 x i8>, 7) %0 } - -define <vscale x 1 x half> @test_vlseg7_mask_nxv1f16_triscv.vector.tuple_nxv2i8_7t(ptr %base, i32 %vl, <vscale x 1 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 2 x i8>, 7) @test_vlseg7_mask_nxv1f16_triscv.vector.tuple_nxv2i8_7t(ptr %base, i32 %vl, <vscale x 1 x i1> %mask) { ; CHECK-LABEL: test_vlseg7_mask_nxv1f16_triscv.vector.tuple_nxv2i8_7t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma -; CHECK-NEXT: vlseg7e16.v v7, (a0), v0.t +; CHECK-NEXT: vlseg7e16.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 2 x i8>, 7) @llvm.riscv.vlseg7.mask.triscv.vector.tuple_nxv2i8_7t.nxv1i1(target("riscv.vector.tuple", <vscale x 2 x i8>, 7) undef, ptr %base, <vscale x 1 x i1> %mask, i32 %vl, i32 1, i32 4) - %1 = call <vscale x 1 x half> @llvm.riscv.tuple.extract.nxv1f16.triscv.vector.tuple_nxv2i8_7t(target("riscv.vector.tuple", <vscale x 2 x i8>, 7) %0, i32 1) - ret <vscale x 1 x half> %1 + ret target("riscv.vector.tuple", <vscale x 2 x i8>, 7) %0 } - - -define <vscale x 2 x half> @test_vlseg7_nxv2f16_triscv.vector.tuple_nxv4i8_7t(ptr %base, i32 %vl) { +define target("riscv.vector.tuple", <vscale x 4 x i8>, 7) @test_vlseg7_nxv2f16_triscv.vector.tuple_nxv4i8_7t(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg7_nxv2f16_triscv.vector.tuple_nxv4i8_7t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma -; CHECK-NEXT: vlseg7e16.v v7, (a0) +; CHECK-NEXT: vlseg7e16.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 7) @llvm.riscv.vlseg7.triscv.vector.tuple_nxv4i8_7t(target("riscv.vector.tuple", <vscale x 4 x i8>, 7) undef, ptr %base, i32 %vl, i32 4) - %1 = call <vscale x 2 x half> @llvm.riscv.tuple.extract.nxv2f16.triscv.vector.tuple_nxv4i8_7t(target("riscv.vector.tuple", <vscale x 4 x i8>, 7) %0, i32 1) - ret <vscale x 2 x half> %1 + ret target("riscv.vector.tuple", <vscale x 4 x i8>, 7) %0 } - -define <vscale x 2 x half> @test_vlseg7_mask_nxv2f16_triscv.vector.tuple_nxv4i8_7t(ptr %base, i32 %vl, <vscale x 2 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 4 x i8>, 7) @test_vlseg7_mask_nxv2f16_triscv.vector.tuple_nxv4i8_7t(ptr %base, i32 %vl, <vscale x 2 x i1> %mask) { ; CHECK-LABEL: test_vlseg7_mask_nxv2f16_triscv.vector.tuple_nxv4i8_7t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma -; CHECK-NEXT: vlseg7e16.v v7, (a0), v0.t +; CHECK-NEXT: vlseg7e16.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 7) @llvm.riscv.vlseg7.mask.triscv.vector.tuple_nxv4i8_7t.nxv2i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 7) undef, ptr %base, <vscale x 2 x i1> %mask, i32 %vl, i32 1, i32 4) - %1 = call <vscale x 2 x half> @llvm.riscv.tuple.extract.nxv2f16.triscv.vector.tuple_nxv4i8_7t(target("riscv.vector.tuple", <vscale x 4 x i8>, 7) %0, i32 1) - ret <vscale x 2 x half> %1 + ret target("riscv.vector.tuple", <vscale x 4 x i8>, 7) %0 } - - -define <vscale x 4 x half> @test_vlseg7_nxv4f16_triscv.vector.tuple_nxv8i8_7t(ptr %base, i32 %vl) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 7) @test_vlseg7_nxv4f16_triscv.vector.tuple_nxv8i8_7t(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg7_nxv4f16_triscv.vector.tuple_nxv8i8_7t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma -; CHECK-NEXT: vlseg7e16.v v7, (a0) +; CHECK-NEXT: vlseg7e16.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 7) @llvm.riscv.vlseg7.triscv.vector.tuple_nxv8i8_7t(target("riscv.vector.tuple", <vscale x 8 x i8>, 7) undef, ptr %base, i32 %vl, i32 4) - %1 = call <vscale x 4 x half> @llvm.riscv.tuple.extract.nxv4f16.triscv.vector.tuple_nxv8i8_7t(target("riscv.vector.tuple", <vscale x 8 x i8>, 7) %0, i32 1) - ret <vscale x 4 x half> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 7) %0 } - -define <vscale x 4 x half> @test_vlseg7_mask_nxv4f16_triscv.vector.tuple_nxv8i8_7t(ptr %base, i32 %vl, <vscale x 4 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 7) @test_vlseg7_mask_nxv4f16_triscv.vector.tuple_nxv8i8_7t(ptr %base, i32 %vl, <vscale x 4 x i1> %mask) { ; CHECK-LABEL: test_vlseg7_mask_nxv4f16_triscv.vector.tuple_nxv8i8_7t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma -; CHECK-NEXT: vlseg7e16.v v7, (a0), v0.t +; CHECK-NEXT: vlseg7e16.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 7) @llvm.riscv.vlseg7.mask.triscv.vector.tuple_nxv8i8_7t.nxv4i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 7) undef, ptr %base, <vscale x 4 x i1> %mask, i32 %vl, i32 1, i32 4) - %1 = call <vscale x 4 x half> @llvm.riscv.tuple.extract.nxv4f16.triscv.vector.tuple_nxv8i8_7t(target("riscv.vector.tuple", <vscale x 8 x i8>, 7) %0, i32 1) - ret <vscale x 4 x half> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 7) %0 } - - -define <vscale x 1 x half> @test_vlseg8_nxv1f16_triscv.vector.tuple_nxv2i8_8t(ptr %base, i32 %vl) { +define target("riscv.vector.tuple", <vscale x 2 x i8>, 8) @test_vlseg8_nxv1f16_triscv.vector.tuple_nxv2i8_8t(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg8_nxv1f16_triscv.vector.tuple_nxv2i8_8t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma -; CHECK-NEXT: vlseg8e16.v v7, (a0) +; CHECK-NEXT: vlseg8e16.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 2 x i8>, 8) @llvm.riscv.vlseg8.triscv.vector.tuple_nxv2i8_8t(target("riscv.vector.tuple", <vscale x 2 x i8>, 8) undef, ptr %base, i32 %vl, i32 4) - %1 = call <vscale x 1 x half> @llvm.riscv.tuple.extract.nxv1f16.triscv.vector.tuple_nxv2i8_8t(target("riscv.vector.tuple", <vscale x 2 x i8>, 8) %0, i32 1) - ret <vscale x 1 x half> %1 + ret target("riscv.vector.tuple", <vscale x 2 x i8>, 8) %0 } - -define <vscale x 1 x half> @test_vlseg8_mask_nxv1f16_triscv.vector.tuple_nxv2i8_8t(ptr %base, i32 %vl, <vscale x 1 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 2 x i8>, 8) @test_vlseg8_mask_nxv1f16_triscv.vector.tuple_nxv2i8_8t(ptr %base, i32 %vl, <vscale x 1 x i1> %mask) { ; CHECK-LABEL: test_vlseg8_mask_nxv1f16_triscv.vector.tuple_nxv2i8_8t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma -; CHECK-NEXT: vlseg8e16.v v7, (a0), v0.t +; CHECK-NEXT: vlseg8e16.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 2 x i8>, 8) @llvm.riscv.vlseg8.mask.triscv.vector.tuple_nxv2i8_8t.nxv1i1(target("riscv.vector.tuple", <vscale x 2 x i8>, 8) undef, ptr %base, <vscale x 1 x i1> %mask, i32 %vl, i32 1, i32 4) - %1 = call <vscale x 1 x half> @llvm.riscv.tuple.extract.nxv1f16.triscv.vector.tuple_nxv2i8_8t(target("riscv.vector.tuple", <vscale x 2 x i8>, 8) %0, i32 1) - ret <vscale x 1 x half> %1 + ret target("riscv.vector.tuple", <vscale x 2 x i8>, 8) %0 } - - -define <vscale x 2 x half> @test_vlseg8_nxv2f16_triscv.vector.tuple_nxv4i8_8t(ptr %base, i32 %vl) { +define target("riscv.vector.tuple", <vscale x 4 x i8>, 8) @test_vlseg8_nxv2f16_triscv.vector.tuple_nxv4i8_8t(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg8_nxv2f16_triscv.vector.tuple_nxv4i8_8t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma -; CHECK-NEXT: vlseg8e16.v v7, (a0) +; CHECK-NEXT: vlseg8e16.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 8) @llvm.riscv.vlseg8.triscv.vector.tuple_nxv4i8_8t(target("riscv.vector.tuple", <vscale x 4 x i8>, 8) undef, ptr %base, i32 %vl, i32 4) - %1 = call <vscale x 2 x half> @llvm.riscv.tuple.extract.nxv2f16.triscv.vector.tuple_nxv4i8_8t(target("riscv.vector.tuple", <vscale x 4 x i8>, 8) %0, i32 1) - ret <vscale x 2 x half> %1 + ret target("riscv.vector.tuple", <vscale x 4 x i8>, 8) %0 } - -define <vscale x 2 x half> @test_vlseg8_mask_nxv2f16_triscv.vector.tuple_nxv4i8_8t(ptr %base, i32 %vl, <vscale x 2 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 4 x i8>, 8) @test_vlseg8_mask_nxv2f16_triscv.vector.tuple_nxv4i8_8t(ptr %base, i32 %vl, <vscale x 2 x i1> %mask) { ; CHECK-LABEL: test_vlseg8_mask_nxv2f16_triscv.vector.tuple_nxv4i8_8t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma -; CHECK-NEXT: vlseg8e16.v v7, (a0), v0.t +; CHECK-NEXT: vlseg8e16.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 8) @llvm.riscv.vlseg8.mask.triscv.vector.tuple_nxv4i8_8t.nxv2i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 8) undef, ptr %base, <vscale x 2 x i1> %mask, i32 %vl, i32 1, i32 4) - %1 = call <vscale x 2 x half> @llvm.riscv.tuple.extract.nxv2f16.triscv.vector.tuple_nxv4i8_8t(target("riscv.vector.tuple", <vscale x 4 x i8>, 8) %0, i32 1) - ret <vscale x 2 x half> %1 + ret target("riscv.vector.tuple", <vscale x 4 x i8>, 8) %0 } - - -define <vscale x 4 x half> @test_vlseg8_nxv4f16_triscv.vector.tuple_nxv8i8_8t(ptr %base, i32 %vl) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 8) @test_vlseg8_nxv4f16_triscv.vector.tuple_nxv8i8_8t(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg8_nxv4f16_triscv.vector.tuple_nxv8i8_8t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma -; CHECK-NEXT: vlseg8e16.v v7, (a0) +; CHECK-NEXT: vlseg8e16.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 8) @llvm.riscv.vlseg8.triscv.vector.tuple_nxv8i8_8t(target("riscv.vector.tuple", <vscale x 8 x i8>, 8) undef, ptr %base, i32 %vl, i32 4) - %1 = call <vscale x 4 x half> @llvm.riscv.tuple.extract.nxv4f16.triscv.vector.tuple_nxv8i8_8t(target("riscv.vector.tuple", <vscale x 8 x i8>, 8) %0, i32 1) - ret <vscale x 4 x half> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 8) %0 } - -define <vscale x 4 x half> @test_vlseg8_mask_nxv4f16_triscv.vector.tuple_nxv8i8_8t(ptr %base, i32 %vl, <vscale x 4 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 8) @test_vlseg8_mask_nxv4f16_triscv.vector.tuple_nxv8i8_8t(ptr %base, i32 %vl, <vscale x 4 x i1> %mask) { ; CHECK-LABEL: test_vlseg8_mask_nxv4f16_triscv.vector.tuple_nxv8i8_8t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma -; CHECK-NEXT: vlseg8e16.v v7, (a0), v0.t +; CHECK-NEXT: vlseg8e16.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 8) @llvm.riscv.vlseg8.mask.triscv.vector.tuple_nxv8i8_8t.nxv4i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 8) undef, ptr %base, <vscale x 4 x i1> %mask, i32 %vl, i32 1, i32 4) - %1 = call <vscale x 4 x half> @llvm.riscv.tuple.extract.nxv4f16.triscv.vector.tuple_nxv8i8_8t(target("riscv.vector.tuple", <vscale x 8 x i8>, 8) %0, i32 1) - ret <vscale x 4 x half> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 8) %0 } - - -define <vscale x 1 x float> @test_vlseg2_nxv1f32_triscv.vector.tuple_nxv4i8_2t(ptr %base, i32 %vl) { +define target("riscv.vector.tuple", <vscale x 4 x i8>, 2) @test_vlseg2_nxv1f32_triscv.vector.tuple_nxv4i8_2t(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg2_nxv1f32_triscv.vector.tuple_nxv4i8_2t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma -; CHECK-NEXT: vlseg2e32.v v7, (a0) +; CHECK-NEXT: vlseg2e32.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 2) @llvm.riscv.vlseg2.triscv.vector.tuple_nxv4i8_2t(target("riscv.vector.tuple", <vscale x 4 x i8>, 2) undef, ptr %base, i32 %vl, i32 5) - %1 = call <vscale x 1 x float> @llvm.riscv.tuple.extract.nxv1f32.triscv.vector.tuple_nxv4i8_2t(target("riscv.vector.tuple", <vscale x 4 x i8>, 2) %0, i32 1) - ret <vscale x 1 x float> %1 + ret target("riscv.vector.tuple", <vscale x 4 x i8>, 2) %0 } - -define <vscale x 1 x float> @test_vlseg2_mask_nxv1f32_triscv.vector.tuple_nxv4i8_2t(ptr %base, i32 %vl, <vscale x 1 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 4 x i8>, 2) @test_vlseg2_mask_nxv1f32_triscv.vector.tuple_nxv4i8_2t(ptr %base, i32 %vl, <vscale x 1 x i1> %mask) { ; CHECK-LABEL: test_vlseg2_mask_nxv1f32_triscv.vector.tuple_nxv4i8_2t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma -; CHECK-NEXT: vlseg2e32.v v7, (a0), v0.t +; CHECK-NEXT: vlseg2e32.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 2) @llvm.riscv.vlseg2.mask.triscv.vector.tuple_nxv4i8_2t.nxv1i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 2) undef, ptr %base, <vscale x 1 x i1> %mask, i32 %vl, i32 1, i32 5) - %1 = call <vscale x 1 x float> @llvm.riscv.tuple.extract.nxv1f32.triscv.vector.tuple_nxv4i8_2t(target("riscv.vector.tuple", <vscale x 4 x i8>, 2) %0, i32 1) - ret <vscale x 1 x float> %1 + ret target("riscv.vector.tuple", <vscale x 4 x i8>, 2) %0 } - - -define <vscale x 2 x float> @test_vlseg2_nxv2f32_triscv.vector.tuple_nxv8i8_2t(ptr %base, i32 %vl) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 2) @test_vlseg2_nxv2f32_triscv.vector.tuple_nxv8i8_2t(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg2_nxv2f32_triscv.vector.tuple_nxv8i8_2t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma -; CHECK-NEXT: vlseg2e32.v v7, (a0) +; CHECK-NEXT: vlseg2e32.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 2) @llvm.riscv.vlseg2.triscv.vector.tuple_nxv8i8_2t(target("riscv.vector.tuple", <vscale x 8 x i8>, 2) undef, ptr %base, i32 %vl, i32 5) - %1 = call <vscale x 2 x float> @llvm.riscv.tuple.extract.nxv2f32.triscv.vector.tuple_nxv8i8_2t(target("riscv.vector.tuple", <vscale x 8 x i8>, 2) %0, i32 1) - ret <vscale x 2 x float> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 2) %0 } - -define <vscale x 2 x float> @test_vlseg2_mask_nxv2f32_triscv.vector.tuple_nxv8i8_2t(ptr %base, i32 %vl, <vscale x 2 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 2) @test_vlseg2_mask_nxv2f32_triscv.vector.tuple_nxv8i8_2t(ptr %base, i32 %vl, <vscale x 2 x i1> %mask) { ; CHECK-LABEL: test_vlseg2_mask_nxv2f32_triscv.vector.tuple_nxv8i8_2t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma -; CHECK-NEXT: vlseg2e32.v v7, (a0), v0.t +; CHECK-NEXT: vlseg2e32.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 2) @llvm.riscv.vlseg2.mask.triscv.vector.tuple_nxv8i8_2t.nxv2i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 2) undef, ptr %base, <vscale x 2 x i1> %mask, i32 %vl, i32 1, i32 5) - %1 = call <vscale x 2 x float> @llvm.riscv.tuple.extract.nxv2f32.triscv.vector.tuple_nxv8i8_2t(target("riscv.vector.tuple", <vscale x 8 x i8>, 2) %0, i32 1) - ret <vscale x 2 x float> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 2) %0 } - - -define <vscale x 4 x float> @test_vlseg2_nxv4f32_triscv.vector.tuple_nxv16i8_2t(ptr %base, i32 %vl) { +define target("riscv.vector.tuple", <vscale x 16 x i8>, 2) @test_vlseg2_nxv4f32_triscv.vector.tuple_nxv16i8_2t(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg2_nxv4f32_triscv.vector.tuple_nxv16i8_2t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma -; CHECK-NEXT: vlseg2e32.v v6, (a0) +; CHECK-NEXT: vlseg2e32.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 2) @llvm.riscv.vlseg2.triscv.vector.tuple_nxv16i8_2t(target("riscv.vector.tuple", <vscale x 16 x i8>, 2) undef, ptr %base, i32 %vl, i32 5) - %1 = call <vscale x 4 x float> @llvm.riscv.tuple.extract.nxv4f32.triscv.vector.tuple_nxv16i8_2t(target("riscv.vector.tuple", <vscale x 16 x i8>, 2) %0, i32 1) - ret <vscale x 4 x float> %1 + ret target("riscv.vector.tuple", <vscale x 16 x i8>, 2) %0 } - -define <vscale x 4 x float> @test_vlseg2_mask_nxv4f32_triscv.vector.tuple_nxv16i8_2t(ptr %base, i32 %vl, <vscale x 4 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 16 x i8>, 2) @test_vlseg2_mask_nxv4f32_triscv.vector.tuple_nxv16i8_2t(ptr %base, i32 %vl, <vscale x 4 x i1> %mask) { ; CHECK-LABEL: test_vlseg2_mask_nxv4f32_triscv.vector.tuple_nxv16i8_2t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma -; CHECK-NEXT: vlseg2e32.v v6, (a0), v0.t +; CHECK-NEXT: vlseg2e32.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 2) @llvm.riscv.vlseg2.mask.triscv.vector.tuple_nxv16i8_2t.nxv4i1(target("riscv.vector.tuple", <vscale x 16 x i8>, 2) undef, ptr %base, <vscale x 4 x i1> %mask, i32 %vl, i32 1, i32 5) - %1 = call <vscale x 4 x float> @llvm.riscv.tuple.extract.nxv4f32.triscv.vector.tuple_nxv16i8_2t(target("riscv.vector.tuple", <vscale x 16 x i8>, 2) %0, i32 1) - ret <vscale x 4 x float> %1 + ret target("riscv.vector.tuple", <vscale x 16 x i8>, 2) %0 } - - -define <vscale x 8 x float> @test_vlseg2_nxv8f32_triscv.vector.tuple_nxv32i8_2t(ptr %base, i32 %vl) { +define target("riscv.vector.tuple", <vscale x 32 x i8>, 2) @test_vlseg2_nxv8f32_triscv.vector.tuple_nxv32i8_2t(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg2_nxv8f32_triscv.vector.tuple_nxv32i8_2t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, ma -; CHECK-NEXT: vlseg2e32.v v4, (a0) +; CHECK-NEXT: vlseg2e32.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 32 x i8>, 2) @llvm.riscv.vlseg2.triscv.vector.tuple_nxv32i8_2t(target("riscv.vector.tuple", <vscale x 32 x i8>, 2) undef, ptr %base, i32 %vl, i32 5) - %1 = call <vscale x 8 x float> @llvm.riscv.tuple.extract.nxv8f32.triscv.vector.tuple_nxv32i8_2t(target("riscv.vector.tuple", <vscale x 32 x i8>, 2) %0, i32 1) - ret <vscale x 8 x float> %1 + ret target("riscv.vector.tuple", <vscale x 32 x i8>, 2) %0 } - -define <vscale x 8 x float> @test_vlseg2_mask_nxv8f32_triscv.vector.tuple_nxv32i8_2t(ptr %base, i32 %vl, <vscale x 8 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 32 x i8>, 2) @test_vlseg2_mask_nxv8f32_triscv.vector.tuple_nxv32i8_2t(ptr %base, i32 %vl, <vscale x 8 x i1> %mask) { ; CHECK-LABEL: test_vlseg2_mask_nxv8f32_triscv.vector.tuple_nxv32i8_2t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, ma -; CHECK-NEXT: vlseg2e32.v v4, (a0), v0.t +; CHECK-NEXT: vlseg2e32.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 32 x i8>, 2) @llvm.riscv.vlseg2.mask.triscv.vector.tuple_nxv32i8_2t.nxv8i1(target("riscv.vector.tuple", <vscale x 32 x i8>, 2) undef, ptr %base, <vscale x 8 x i1> %mask, i32 %vl, i32 1, i32 5) - %1 = call <vscale x 8 x float> @llvm.riscv.tuple.extract.nxv8f32.triscv.vector.tuple_nxv32i8_2t(target("riscv.vector.tuple", <vscale x 32 x i8>, 2) %0, i32 1) - ret <vscale x 8 x float> %1 + ret target("riscv.vector.tuple", <vscale x 32 x i8>, 2) %0 } - - -define <vscale x 1 x float> @test_vlseg3_nxv1f32_triscv.vector.tuple_nxv4i8_3t(ptr %base, i32 %vl) { +define target("riscv.vector.tuple", <vscale x 4 x i8>, 3) @test_vlseg3_nxv1f32_triscv.vector.tuple_nxv4i8_3t(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg3_nxv1f32_triscv.vector.tuple_nxv4i8_3t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma -; CHECK-NEXT: vlseg3e32.v v7, (a0) +; CHECK-NEXT: vlseg3e32.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 3) @llvm.riscv.vlseg3.triscv.vector.tuple_nxv4i8_3t(target("riscv.vector.tuple", <vscale x 4 x i8>, 3) undef, ptr %base, i32 %vl, i32 5) - %1 = call <vscale x 1 x float> @llvm.riscv.tuple.extract.nxv1f32.triscv.vector.tuple_nxv4i8_3t(target("riscv.vector.tuple", <vscale x 4 x i8>, 3) %0, i32 1) - ret <vscale x 1 x float> %1 + ret target("riscv.vector.tuple", <vscale x 4 x i8>, 3) %0 } - -define <vscale x 1 x float> @test_vlseg3_mask_nxv1f32_triscv.vector.tuple_nxv4i8_3t(ptr %base, i32 %vl, <vscale x 1 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 4 x i8>, 3) @test_vlseg3_mask_nxv1f32_triscv.vector.tuple_nxv4i8_3t(ptr %base, i32 %vl, <vscale x 1 x i1> %mask) { ; CHECK-LABEL: test_vlseg3_mask_nxv1f32_triscv.vector.tuple_nxv4i8_3t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma -; CHECK-NEXT: vlseg3e32.v v7, (a0), v0.t +; CHECK-NEXT: vlseg3e32.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 3) @llvm.riscv.vlseg3.mask.triscv.vector.tuple_nxv4i8_3t.nxv1i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 3) undef, ptr %base, <vscale x 1 x i1> %mask, i32 %vl, i32 1, i32 5) - %1 = call <vscale x 1 x float> @llvm.riscv.tuple.extract.nxv1f32.triscv.vector.tuple_nxv4i8_3t(target("riscv.vector.tuple", <vscale x 4 x i8>, 3) %0, i32 1) - ret <vscale x 1 x float> %1 + ret target("riscv.vector.tuple", <vscale x 4 x i8>, 3) %0 } - - -define <vscale x 2 x float> @test_vlseg3_nxv2f32_triscv.vector.tuple_nxv8i8_3t(ptr %base, i32 %vl) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 3) @test_vlseg3_nxv2f32_triscv.vector.tuple_nxv8i8_3t(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg3_nxv2f32_triscv.vector.tuple_nxv8i8_3t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma -; CHECK-NEXT: vlseg3e32.v v7, (a0) +; CHECK-NEXT: vlseg3e32.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 3) @llvm.riscv.vlseg3.triscv.vector.tuple_nxv8i8_3t(target("riscv.vector.tuple", <vscale x 8 x i8>, 3) undef, ptr %base, i32 %vl, i32 5) - %1 = call <vscale x 2 x float> @llvm.riscv.tuple.extract.nxv2f32.triscv.vector.tuple_nxv8i8_3t(target("riscv.vector.tuple", <vscale x 8 x i8>, 3) %0, i32 1) - ret <vscale x 2 x float> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 3) %0 } - -define <vscale x 2 x float> @test_vlseg3_mask_nxv2f32_triscv.vector.tuple_nxv8i8_3t(ptr %base, i32 %vl, <vscale x 2 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 3) @test_vlseg3_mask_nxv2f32_triscv.vector.tuple_nxv8i8_3t(ptr %base, i32 %vl, <vscale x 2 x i1> %mask) { ; CHECK-LABEL: test_vlseg3_mask_nxv2f32_triscv.vector.tuple_nxv8i8_3t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma -; CHECK-NEXT: vlseg3e32.v v7, (a0), v0.t +; CHECK-NEXT: vlseg3e32.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 3) @llvm.riscv.vlseg3.mask.triscv.vector.tuple_nxv8i8_3t.nxv2i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 3) undef, ptr %base, <vscale x 2 x i1> %mask, i32 %vl, i32 1, i32 5) - %1 = call <vscale x 2 x float> @llvm.riscv.tuple.extract.nxv2f32.triscv.vector.tuple_nxv8i8_3t(target("riscv.vector.tuple", <vscale x 8 x i8>, 3) %0, i32 1) - ret <vscale x 2 x float> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 3) %0 } - - -define <vscale x 4 x float> @test_vlseg3_nxv4f32_triscv.vector.tuple_nxv16i8_3t(ptr %base, i32 %vl) { +define target("riscv.vector.tuple", <vscale x 16 x i8>, 3) @test_vlseg3_nxv4f32_triscv.vector.tuple_nxv16i8_3t(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg3_nxv4f32_triscv.vector.tuple_nxv16i8_3t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma -; CHECK-NEXT: vlseg3e32.v v6, (a0) +; CHECK-NEXT: vlseg3e32.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 3) @llvm.riscv.vlseg3.triscv.vector.tuple_nxv16i8_3t(target("riscv.vector.tuple", <vscale x 16 x i8>, 3) undef, ptr %base, i32 %vl, i32 5) - %1 = call <vscale x 4 x float> @llvm.riscv.tuple.extract.nxv4f32.triscv.vector.tuple_nxv16i8_3t(target("riscv.vector.tuple", <vscale x 16 x i8>, 3) %0, i32 1) - ret <vscale x 4 x float> %1 + ret target("riscv.vector.tuple", <vscale x 16 x i8>, 3) %0 } - -define <vscale x 4 x float> @test_vlseg3_mask_nxv4f32_triscv.vector.tuple_nxv16i8_3t(ptr %base, i32 %vl, <vscale x 4 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 16 x i8>, 3) @test_vlseg3_mask_nxv4f32_triscv.vector.tuple_nxv16i8_3t(ptr %base, i32 %vl, <vscale x 4 x i1> %mask) { ; CHECK-LABEL: test_vlseg3_mask_nxv4f32_triscv.vector.tuple_nxv16i8_3t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma -; CHECK-NEXT: vlseg3e32.v v6, (a0), v0.t +; CHECK-NEXT: vlseg3e32.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 3) @llvm.riscv.vlseg3.mask.triscv.vector.tuple_nxv16i8_3t.nxv4i1(target("riscv.vector.tuple", <vscale x 16 x i8>, 3) undef, ptr %base, <vscale x 4 x i1> %mask, i32 %vl, i32 1, i32 5) - %1 = call <vscale x 4 x float> @llvm.riscv.tuple.extract.nxv4f32.triscv.vector.tuple_nxv16i8_3t(target("riscv.vector.tuple", <vscale x 16 x i8>, 3) %0, i32 1) - ret <vscale x 4 x float> %1 + ret target("riscv.vector.tuple", <vscale x 16 x i8>, 3) %0 } - - -define <vscale x 1 x float> @test_vlseg4_nxv1f32_triscv.vector.tuple_nxv4i8_4t(ptr %base, i32 %vl) { +define target("riscv.vector.tuple", <vscale x 4 x i8>, 4) @test_vlseg4_nxv1f32_triscv.vector.tuple_nxv4i8_4t(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg4_nxv1f32_triscv.vector.tuple_nxv4i8_4t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma -; CHECK-NEXT: vlseg4e32.v v7, (a0) +; CHECK-NEXT: vlseg4e32.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 4) @llvm.riscv.vlseg4.triscv.vector.tuple_nxv4i8_4t(target("riscv.vector.tuple", <vscale x 4 x i8>, 4) undef, ptr %base, i32 %vl, i32 5) - %1 = call <vscale x 1 x float> @llvm.riscv.tuple.extract.nxv1f32.triscv.vector.tuple_nxv4i8_4t(target("riscv.vector.tuple", <vscale x 4 x i8>, 4) %0, i32 1) - ret <vscale x 1 x float> %1 + ret target("riscv.vector.tuple", <vscale x 4 x i8>, 4) %0 } - -define <vscale x 1 x float> @test_vlseg4_mask_nxv1f32_triscv.vector.tuple_nxv4i8_4t(ptr %base, i32 %vl, <vscale x 1 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 4 x i8>, 4) @test_vlseg4_mask_nxv1f32_triscv.vector.tuple_nxv4i8_4t(ptr %base, i32 %vl, <vscale x 1 x i1> %mask) { ; CHECK-LABEL: test_vlseg4_mask_nxv1f32_triscv.vector.tuple_nxv4i8_4t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma -; CHECK-NEXT: vlseg4e32.v v7, (a0), v0.t +; CHECK-NEXT: vlseg4e32.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 4) @llvm.riscv.vlseg4.mask.triscv.vector.tuple_nxv4i8_4t.nxv1i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 4) undef, ptr %base, <vscale x 1 x i1> %mask, i32 %vl, i32 1, i32 5) - %1 = call <vscale x 1 x float> @llvm.riscv.tuple.extract.nxv1f32.triscv.vector.tuple_nxv4i8_4t(target("riscv.vector.tuple", <vscale x 4 x i8>, 4) %0, i32 1) - ret <vscale x 1 x float> %1 + ret target("riscv.vector.tuple", <vscale x 4 x i8>, 4) %0 } - - -define <vscale x 2 x float> @test_vlseg4_nxv2f32_triscv.vector.tuple_nxv8i8_4t(ptr %base, i32 %vl) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 4) @test_vlseg4_nxv2f32_triscv.vector.tuple_nxv8i8_4t(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg4_nxv2f32_triscv.vector.tuple_nxv8i8_4t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma -; CHECK-NEXT: vlseg4e32.v v7, (a0) +; CHECK-NEXT: vlseg4e32.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 4) @llvm.riscv.vlseg4.triscv.vector.tuple_nxv8i8_4t(target("riscv.vector.tuple", <vscale x 8 x i8>, 4) undef, ptr %base, i32 %vl, i32 5) - %1 = call <vscale x 2 x float> @llvm.riscv.tuple.extract.nxv2f32.triscv.vector.tuple_nxv8i8_4t(target("riscv.vector.tuple", <vscale x 8 x i8>, 4) %0, i32 1) - ret <vscale x 2 x float> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 4) %0 } - -define <vscale x 2 x float> @test_vlseg4_mask_nxv2f32_triscv.vector.tuple_nxv8i8_4t(ptr %base, i32 %vl, <vscale x 2 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 4) @test_vlseg4_mask_nxv2f32_triscv.vector.tuple_nxv8i8_4t(ptr %base, i32 %vl, <vscale x 2 x i1> %mask) { ; CHECK-LABEL: test_vlseg4_mask_nxv2f32_triscv.vector.tuple_nxv8i8_4t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma -; CHECK-NEXT: vlseg4e32.v v7, (a0), v0.t +; CHECK-NEXT: vlseg4e32.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 4) @llvm.riscv.vlseg4.mask.triscv.vector.tuple_nxv8i8_4t.nxv2i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 4) undef, ptr %base, <vscale x 2 x i1> %mask, i32 %vl, i32 1, i32 5) - %1 = call <vscale x 2 x float> @llvm.riscv.tuple.extract.nxv2f32.triscv.vector.tuple_nxv8i8_4t(target("riscv.vector.tuple", <vscale x 8 x i8>, 4) %0, i32 1) - ret <vscale x 2 x float> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 4) %0 } - - -define <vscale x 4 x float> @test_vlseg4_nxv4f32_triscv.vector.tuple_nxv16i8_4t(ptr %base, i32 %vl) { +define target("riscv.vector.tuple", <vscale x 16 x i8>, 4) @test_vlseg4_nxv4f32_triscv.vector.tuple_nxv16i8_4t(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg4_nxv4f32_triscv.vector.tuple_nxv16i8_4t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma -; CHECK-NEXT: vlseg4e32.v v6, (a0) +; CHECK-NEXT: vlseg4e32.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 4) @llvm.riscv.vlseg4.triscv.vector.tuple_nxv16i8_4t(target("riscv.vector.tuple", <vscale x 16 x i8>, 4) undef, ptr %base, i32 %vl, i32 5) - %1 = call <vscale x 4 x float> @llvm.riscv.tuple.extract.nxv4f32.triscv.vector.tuple_nxv16i8_4t(target("riscv.vector.tuple", <vscale x 16 x i8>, 4) %0, i32 1) - ret <vscale x 4 x float> %1 + ret target("riscv.vector.tuple", <vscale x 16 x i8>, 4) %0 } - -define <vscale x 4 x float> @test_vlseg4_mask_nxv4f32_triscv.vector.tuple_nxv16i8_4t(ptr %base, i32 %vl, <vscale x 4 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 16 x i8>, 4) @test_vlseg4_mask_nxv4f32_triscv.vector.tuple_nxv16i8_4t(ptr %base, i32 %vl, <vscale x 4 x i1> %mask) { ; CHECK-LABEL: test_vlseg4_mask_nxv4f32_triscv.vector.tuple_nxv16i8_4t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma -; CHECK-NEXT: vlseg4e32.v v6, (a0), v0.t +; CHECK-NEXT: vlseg4e32.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 4) @llvm.riscv.vlseg4.mask.triscv.vector.tuple_nxv16i8_4t.nxv4i1(target("riscv.vector.tuple", <vscale x 16 x i8>, 4) undef, ptr %base, <vscale x 4 x i1> %mask, i32 %vl, i32 1, i32 5) - %1 = call <vscale x 4 x float> @llvm.riscv.tuple.extract.nxv4f32.triscv.vector.tuple_nxv16i8_4t(target("riscv.vector.tuple", <vscale x 16 x i8>, 4) %0, i32 1) - ret <vscale x 4 x float> %1 + ret target("riscv.vector.tuple", <vscale x 16 x i8>, 4) %0 } - - -define <vscale x 1 x float> @test_vlseg5_nxv1f32_triscv.vector.tuple_nxv4i8_5t(ptr %base, i32 %vl) { +define target("riscv.vector.tuple", <vscale x 4 x i8>, 5) @test_vlseg5_nxv1f32_triscv.vector.tuple_nxv4i8_5t(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg5_nxv1f32_triscv.vector.tuple_nxv4i8_5t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma -; CHECK-NEXT: vlseg5e32.v v7, (a0) +; CHECK-NEXT: vlseg5e32.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 5) @llvm.riscv.vlseg5.triscv.vector.tuple_nxv4i8_5t(target("riscv.vector.tuple", <vscale x 4 x i8>, 5) undef, ptr %base, i32 %vl, i32 5) - %1 = call <vscale x 1 x float> @llvm.riscv.tuple.extract.nxv1f32.triscv.vector.tuple_nxv4i8_5t(target("riscv.vector.tuple", <vscale x 4 x i8>, 5) %0, i32 1) - ret <vscale x 1 x float> %1 + ret target("riscv.vector.tuple", <vscale x 4 x i8>, 5) %0 } - -define <vscale x 1 x float> @test_vlseg5_mask_nxv1f32_triscv.vector.tuple_nxv4i8_5t(ptr %base, i32 %vl, <vscale x 1 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 4 x i8>, 5) @test_vlseg5_mask_nxv1f32_triscv.vector.tuple_nxv4i8_5t(ptr %base, i32 %vl, <vscale x 1 x i1> %mask) { ; CHECK-LABEL: test_vlseg5_mask_nxv1f32_triscv.vector.tuple_nxv4i8_5t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma -; CHECK-NEXT: vlseg5e32.v v7, (a0), v0.t +; CHECK-NEXT: vlseg5e32.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 5) @llvm.riscv.vlseg5.mask.triscv.vector.tuple_nxv4i8_5t.nxv1i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 5) undef, ptr %base, <vscale x 1 x i1> %mask, i32 %vl, i32 1, i32 5) - %1 = call <vscale x 1 x float> @llvm.riscv.tuple.extract.nxv1f32.triscv.vector.tuple_nxv4i8_5t(target("riscv.vector.tuple", <vscale x 4 x i8>, 5) %0, i32 1) - ret <vscale x 1 x float> %1 + ret target("riscv.vector.tuple", <vscale x 4 x i8>, 5) %0 } - - -define <vscale x 2 x float> @test_vlseg5_nxv2f32_triscv.vector.tuple_nxv8i8_5t(ptr %base, i32 %vl) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 5) @test_vlseg5_nxv2f32_triscv.vector.tuple_nxv8i8_5t(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg5_nxv2f32_triscv.vector.tuple_nxv8i8_5t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma -; CHECK-NEXT: vlseg5e32.v v7, (a0) +; CHECK-NEXT: vlseg5e32.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 5) @llvm.riscv.vlseg5.triscv.vector.tuple_nxv8i8_5t(target("riscv.vector.tuple", <vscale x 8 x i8>, 5) undef, ptr %base, i32 %vl, i32 5) - %1 = call <vscale x 2 x float> @llvm.riscv.tuple.extract.nxv2f32.triscv.vector.tuple_nxv8i8_5t(target("riscv.vector.tuple", <vscale x 8 x i8>, 5) %0, i32 1) - ret <vscale x 2 x float> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 5) %0 } - -define <vscale x 2 x float> @test_vlseg5_mask_nxv2f32_triscv.vector.tuple_nxv8i8_5t(ptr %base, i32 %vl, <vscale x 2 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 5) @test_vlseg5_mask_nxv2f32_triscv.vector.tuple_nxv8i8_5t(ptr %base, i32 %vl, <vscale x 2 x i1> %mask) { ; CHECK-LABEL: test_vlseg5_mask_nxv2f32_triscv.vector.tuple_nxv8i8_5t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma -; CHECK-NEXT: vlseg5e32.v v7, (a0), v0.t +; CHECK-NEXT: vlseg5e32.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 5) @llvm.riscv.vlseg5.mask.triscv.vector.tuple_nxv8i8_5t.nxv2i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 5) undef, ptr %base, <vscale x 2 x i1> %mask, i32 %vl, i32 1, i32 5) - %1 = call <vscale x 2 x float> @llvm.riscv.tuple.extract.nxv2f32.triscv.vector.tuple_nxv8i8_5t(target("riscv.vector.tuple", <vscale x 8 x i8>, 5) %0, i32 1) - ret <vscale x 2 x float> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 5) %0 } - - -define <vscale x 1 x float> @test_vlseg6_nxv1f32_triscv.vector.tuple_nxv4i8_6t(ptr %base, i32 %vl) { +define target("riscv.vector.tuple", <vscale x 4 x i8>, 6) @test_vlseg6_nxv1f32_triscv.vector.tuple_nxv4i8_6t(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg6_nxv1f32_triscv.vector.tuple_nxv4i8_6t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma -; CHECK-NEXT: vlseg6e32.v v7, (a0) +; CHECK-NEXT: vlseg6e32.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 6) @llvm.riscv.vlseg6.triscv.vector.tuple_nxv4i8_6t(target("riscv.vector.tuple", <vscale x 4 x i8>, 6) undef, ptr %base, i32 %vl, i32 5) - %1 = call <vscale x 1 x float> @llvm.riscv.tuple.extract.nxv1f32.triscv.vector.tuple_nxv4i8_6t(target("riscv.vector.tuple", <vscale x 4 x i8>, 6) %0, i32 1) - ret <vscale x 1 x float> %1 + ret target("riscv.vector.tuple", <vscale x 4 x i8>, 6) %0 } - -define <vscale x 1 x float> @test_vlseg6_mask_nxv1f32_triscv.vector.tuple_nxv4i8_6t(ptr %base, i32 %vl, <vscale x 1 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 4 x i8>, 6) @test_vlseg6_mask_nxv1f32_triscv.vector.tuple_nxv4i8_6t(ptr %base, i32 %vl, <vscale x 1 x i1> %mask) { ; CHECK-LABEL: test_vlseg6_mask_nxv1f32_triscv.vector.tuple_nxv4i8_6t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma -; CHECK-NEXT: vlseg6e32.v v7, (a0), v0.t +; CHECK-NEXT: vlseg6e32.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 6) @llvm.riscv.vlseg6.mask.triscv.vector.tuple_nxv4i8_6t.nxv1i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 6) undef, ptr %base, <vscale x 1 x i1> %mask, i32 %vl, i32 1, i32 5) - %1 = call <vscale x 1 x float> @llvm.riscv.tuple.extract.nxv1f32.triscv.vector.tuple_nxv4i8_6t(target("riscv.vector.tuple", <vscale x 4 x i8>, 6) %0, i32 1) - ret <vscale x 1 x float> %1 + ret target("riscv.vector.tuple", <vscale x 4 x i8>, 6) %0 } - - -define <vscale x 2 x float> @test_vlseg6_nxv2f32_triscv.vector.tuple_nxv8i8_6t(ptr %base, i32 %vl) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 6) @test_vlseg6_nxv2f32_triscv.vector.tuple_nxv8i8_6t(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg6_nxv2f32_triscv.vector.tuple_nxv8i8_6t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma -; CHECK-NEXT: vlseg6e32.v v7, (a0) +; CHECK-NEXT: vlseg6e32.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 6) @llvm.riscv.vlseg6.triscv.vector.tuple_nxv8i8_6t(target("riscv.vector.tuple", <vscale x 8 x i8>, 6) undef, ptr %base, i32 %vl, i32 5) - %1 = call <vscale x 2 x float> @llvm.riscv.tuple.extract.nxv2f32.triscv.vector.tuple_nxv8i8_6t(target("riscv.vector.tuple", <vscale x 8 x i8>, 6) %0, i32 1) - ret <vscale x 2 x float> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 6) %0 } - -define <vscale x 2 x float> @test_vlseg6_mask_nxv2f32_triscv.vector.tuple_nxv8i8_6t(ptr %base, i32 %vl, <vscale x 2 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 6) @test_vlseg6_mask_nxv2f32_triscv.vector.tuple_nxv8i8_6t(ptr %base, i32 %vl, <vscale x 2 x i1> %mask) { ; CHECK-LABEL: test_vlseg6_mask_nxv2f32_triscv.vector.tuple_nxv8i8_6t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma -; CHECK-NEXT: vlseg6e32.v v7, (a0), v0.t +; CHECK-NEXT: vlseg6e32.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 6) @llvm.riscv.vlseg6.mask.triscv.vector.tuple_nxv8i8_6t.nxv2i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 6) undef, ptr %base, <vscale x 2 x i1> %mask, i32 %vl, i32 1, i32 5) - %1 = call <vscale x 2 x float> @llvm.riscv.tuple.extract.nxv2f32.triscv.vector.tuple_nxv8i8_6t(target("riscv.vector.tuple", <vscale x 8 x i8>, 6) %0, i32 1) - ret <vscale x 2 x float> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 6) %0 } - - -define <vscale x 1 x float> @test_vlseg7_nxv1f32_triscv.vector.tuple_nxv4i8_7t(ptr %base, i32 %vl) { +define target("riscv.vector.tuple", <vscale x 4 x i8>, 7) @test_vlseg7_nxv1f32_triscv.vector.tuple_nxv4i8_7t(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg7_nxv1f32_triscv.vector.tuple_nxv4i8_7t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma -; CHECK-NEXT: vlseg7e32.v v7, (a0) +; CHECK-NEXT: vlseg7e32.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 7) @llvm.riscv.vlseg7.triscv.vector.tuple_nxv4i8_7t(target("riscv.vector.tuple", <vscale x 4 x i8>, 7) undef, ptr %base, i32 %vl, i32 5) - %1 = call <vscale x 1 x float> @llvm.riscv.tuple.extract.nxv1f32.triscv.vector.tuple_nxv4i8_7t(target("riscv.vector.tuple", <vscale x 4 x i8>, 7) %0, i32 1) - ret <vscale x 1 x float> %1 + ret target("riscv.vector.tuple", <vscale x 4 x i8>, 7) %0 } - -define <vscale x 1 x float> @test_vlseg7_mask_nxv1f32_triscv.vector.tuple_nxv4i8_7t(ptr %base, i32 %vl, <vscale x 1 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 4 x i8>, 7) @test_vlseg7_mask_nxv1f32_triscv.vector.tuple_nxv4i8_7t(ptr %base, i32 %vl, <vscale x 1 x i1> %mask) { ; CHECK-LABEL: test_vlseg7_mask_nxv1f32_triscv.vector.tuple_nxv4i8_7t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma -; CHECK-NEXT: vlseg7e32.v v7, (a0), v0.t +; CHECK-NEXT: vlseg7e32.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 7) @llvm.riscv.vlseg7.mask.triscv.vector.tuple_nxv4i8_7t.nxv1i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 7) undef, ptr %base, <vscale x 1 x i1> %mask, i32 %vl, i32 1, i32 5) - %1 = call <vscale x 1 x float> @llvm.riscv.tuple.extract.nxv1f32.triscv.vector.tuple_nxv4i8_7t(target("riscv.vector.tuple", <vscale x 4 x i8>, 7) %0, i32 1) - ret <vscale x 1 x float> %1 + ret target("riscv.vector.tuple", <vscale x 4 x i8>, 7) %0 } - - -define <vscale x 2 x float> @test_vlseg7_nxv2f32_triscv.vector.tuple_nxv8i8_7t(ptr %base, i32 %vl) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 7) @test_vlseg7_nxv2f32_triscv.vector.tuple_nxv8i8_7t(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg7_nxv2f32_triscv.vector.tuple_nxv8i8_7t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma -; CHECK-NEXT: vlseg7e32.v v7, (a0) +; CHECK-NEXT: vlseg7e32.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 7) @llvm.riscv.vlseg7.triscv.vector.tuple_nxv8i8_7t(target("riscv.vector.tuple", <vscale x 8 x i8>, 7) undef, ptr %base, i32 %vl, i32 5) - %1 = call <vscale x 2 x float> @llvm.riscv.tuple.extract.nxv2f32.triscv.vector.tuple_nxv8i8_7t(target("riscv.vector.tuple", <vscale x 8 x i8>, 7) %0, i32 1) - ret <vscale x 2 x float> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 7) %0 } - -define <vscale x 2 x float> @test_vlseg7_mask_nxv2f32_triscv.vector.tuple_nxv8i8_7t(ptr %base, i32 %vl, <vscale x 2 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 7) @test_vlseg7_mask_nxv2f32_triscv.vector.tuple_nxv8i8_7t(ptr %base, i32 %vl, <vscale x 2 x i1> %mask) { ; CHECK-LABEL: test_vlseg7_mask_nxv2f32_triscv.vector.tuple_nxv8i8_7t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma -; CHECK-NEXT: vlseg7e32.v v7, (a0), v0.t +; CHECK-NEXT: vlseg7e32.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 7) @llvm.riscv.vlseg7.mask.triscv.vector.tuple_nxv8i8_7t.nxv2i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 7) undef, ptr %base, <vscale x 2 x i1> %mask, i32 %vl, i32 1, i32 5) - %1 = call <vscale x 2 x float> @llvm.riscv.tuple.extract.nxv2f32.triscv.vector.tuple_nxv8i8_7t(target("riscv.vector.tuple", <vscale x 8 x i8>, 7) %0, i32 1) - ret <vscale x 2 x float> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 7) %0 } - - -define <vscale x 1 x float> @test_vlseg8_nxv1f32_triscv.vector.tuple_nxv4i8_8t(ptr %base, i32 %vl) { +define target("riscv.vector.tuple", <vscale x 4 x i8>, 8) @test_vlseg8_nxv1f32_triscv.vector.tuple_nxv4i8_8t(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg8_nxv1f32_triscv.vector.tuple_nxv4i8_8t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma -; CHECK-NEXT: vlseg8e32.v v7, (a0) +; CHECK-NEXT: vlseg8e32.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 8) @llvm.riscv.vlseg8.triscv.vector.tuple_nxv4i8_8t(target("riscv.vector.tuple", <vscale x 4 x i8>, 8) undef, ptr %base, i32 %vl, i32 5) - %1 = call <vscale x 1 x float> @llvm.riscv.tuple.extract.nxv1f32.triscv.vector.tuple_nxv4i8_8t(target("riscv.vector.tuple", <vscale x 4 x i8>, 8) %0, i32 1) - ret <vscale x 1 x float> %1 + ret target("riscv.vector.tuple", <vscale x 4 x i8>, 8) %0 } - -define <vscale x 1 x float> @test_vlseg8_mask_nxv1f32_triscv.vector.tuple_nxv4i8_8t(ptr %base, i32 %vl, <vscale x 1 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 4 x i8>, 8) @test_vlseg8_mask_nxv1f32_triscv.vector.tuple_nxv4i8_8t(ptr %base, i32 %vl, <vscale x 1 x i1> %mask) { ; CHECK-LABEL: test_vlseg8_mask_nxv1f32_triscv.vector.tuple_nxv4i8_8t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma -; CHECK-NEXT: vlseg8e32.v v7, (a0), v0.t +; CHECK-NEXT: vlseg8e32.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 8) @llvm.riscv.vlseg8.mask.triscv.vector.tuple_nxv4i8_8t.nxv1i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 8) undef, ptr %base, <vscale x 1 x i1> %mask, i32 %vl, i32 1, i32 5) - %1 = call <vscale x 1 x float> @llvm.riscv.tuple.extract.nxv1f32.triscv.vector.tuple_nxv4i8_8t(target("riscv.vector.tuple", <vscale x 4 x i8>, 8) %0, i32 1) - ret <vscale x 1 x float> %1 + ret target("riscv.vector.tuple", <vscale x 4 x i8>, 8) %0 } - - -define <vscale x 2 x float> @test_vlseg8_nxv2f32_triscv.vector.tuple_nxv8i8_8t(ptr %base, i32 %vl) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 8) @test_vlseg8_nxv2f32_triscv.vector.tuple_nxv8i8_8t(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg8_nxv2f32_triscv.vector.tuple_nxv8i8_8t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma -; CHECK-NEXT: vlseg8e32.v v7, (a0) +; CHECK-NEXT: vlseg8e32.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 8) @llvm.riscv.vlseg8.triscv.vector.tuple_nxv8i8_8t(target("riscv.vector.tuple", <vscale x 8 x i8>, 8) undef, ptr %base, i32 %vl, i32 5) - %1 = call <vscale x 2 x float> @llvm.riscv.tuple.extract.nxv2f32.triscv.vector.tuple_nxv8i8_8t(target("riscv.vector.tuple", <vscale x 8 x i8>, 8) %0, i32 1) - ret <vscale x 2 x float> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 8) %0 } - -define <vscale x 2 x float> @test_vlseg8_mask_nxv2f32_triscv.vector.tuple_nxv8i8_8t(ptr %base, i32 %vl, <vscale x 2 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 8) @test_vlseg8_mask_nxv2f32_triscv.vector.tuple_nxv8i8_8t(ptr %base, i32 %vl, <vscale x 2 x i1> %mask) { ; CHECK-LABEL: test_vlseg8_mask_nxv2f32_triscv.vector.tuple_nxv8i8_8t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma -; CHECK-NEXT: vlseg8e32.v v7, (a0), v0.t +; CHECK-NEXT: vlseg8e32.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 8) @llvm.riscv.vlseg8.mask.triscv.vector.tuple_nxv8i8_8t.nxv2i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 8) undef, ptr %base, <vscale x 2 x i1> %mask, i32 %vl, i32 1, i32 5) - %1 = call <vscale x 2 x float> @llvm.riscv.tuple.extract.nxv2f32.triscv.vector.tuple_nxv8i8_8t(target("riscv.vector.tuple", <vscale x 8 x i8>, 8) %0, i32 1) - ret <vscale x 2 x float> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 8) %0 } - - -define <vscale x 1 x double> @test_vlseg2_nxv1f64_triscv.vector.tuple_nxv8i8_2t(ptr %base, i32 %vl) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 2) @test_vlseg2_nxv1f64_triscv.vector.tuple_nxv8i8_2t(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg2_nxv1f64_triscv.vector.tuple_nxv8i8_2t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, ma -; CHECK-NEXT: vlseg2e64.v v7, (a0) +; CHECK-NEXT: vlseg2e64.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 2) @llvm.riscv.vlseg2.triscv.vector.tuple_nxv8i8_2t(target("riscv.vector.tuple", <vscale x 8 x i8>, 2) undef, ptr %base, i32 %vl, i32 6) - %1 = call <vscale x 1 x double> @llvm.riscv.tuple.extract.nxv1f64.triscv.vector.tuple_nxv8i8_2t(target("riscv.vector.tuple", <vscale x 8 x i8>, 2) %0, i32 1) - ret <vscale x 1 x double> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 2) %0 } - -define <vscale x 1 x double> @test_vlseg2_mask_nxv1f64_triscv.vector.tuple_nxv8i8_2t(ptr %base, i32 %vl, <vscale x 1 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 2) @test_vlseg2_mask_nxv1f64_triscv.vector.tuple_nxv8i8_2t(ptr %base, i32 %vl, <vscale x 1 x i1> %mask) { ; CHECK-LABEL: test_vlseg2_mask_nxv1f64_triscv.vector.tuple_nxv8i8_2t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, ma -; CHECK-NEXT: vlseg2e64.v v7, (a0), v0.t +; CHECK-NEXT: vlseg2e64.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 2) @llvm.riscv.vlseg2.mask.triscv.vector.tuple_nxv8i8_2t.nxv1i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 2) undef, ptr %base, <vscale x 1 x i1> %mask, i32 %vl, i32 1, i32 6) - %1 = call <vscale x 1 x double> @llvm.riscv.tuple.extract.nxv1f64.triscv.vector.tuple_nxv8i8_2t(target("riscv.vector.tuple", <vscale x 8 x i8>, 2) %0, i32 1) - ret <vscale x 1 x double> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 2) %0 } - - -define <vscale x 2 x double> @test_vlseg2_nxv2f64_triscv.vector.tuple_nxv16i8_2t(ptr %base, i32 %vl) { +define target("riscv.vector.tuple", <vscale x 16 x i8>, 2) @test_vlseg2_nxv2f64_triscv.vector.tuple_nxv16i8_2t(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg2_nxv2f64_triscv.vector.tuple_nxv16i8_2t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, ma -; CHECK-NEXT: vlseg2e64.v v6, (a0) +; CHECK-NEXT: vlseg2e64.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 2) @llvm.riscv.vlseg2.triscv.vector.tuple_nxv16i8_2t(target("riscv.vector.tuple", <vscale x 16 x i8>, 2) undef, ptr %base, i32 %vl, i32 6) - %1 = call <vscale x 2 x double> @llvm.riscv.tuple.extract.nxv2f64.triscv.vector.tuple_nxv16i8_2t(target("riscv.vector.tuple", <vscale x 16 x i8>, 2) %0, i32 1) - ret <vscale x 2 x double> %1 + ret target("riscv.vector.tuple", <vscale x 16 x i8>, 2) %0 } - -define <vscale x 2 x double> @test_vlseg2_mask_nxv2f64_triscv.vector.tuple_nxv16i8_2t(ptr %base, i32 %vl, <vscale x 2 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 16 x i8>, 2) @test_vlseg2_mask_nxv2f64_triscv.vector.tuple_nxv16i8_2t(ptr %base, i32 %vl, <vscale x 2 x i1> %mask) { ; CHECK-LABEL: test_vlseg2_mask_nxv2f64_triscv.vector.tuple_nxv16i8_2t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, ma -; CHECK-NEXT: vlseg2e64.v v6, (a0), v0.t +; CHECK-NEXT: vlseg2e64.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 2) @llvm.riscv.vlseg2.mask.triscv.vector.tuple_nxv16i8_2t.nxv2i1(target("riscv.vector.tuple", <vscale x 16 x i8>, 2) undef, ptr %base, <vscale x 2 x i1> %mask, i32 %vl, i32 1, i32 6) - %1 = call <vscale x 2 x double> @llvm.riscv.tuple.extract.nxv2f64.triscv.vector.tuple_nxv16i8_2t(target("riscv.vector.tuple", <vscale x 16 x i8>, 2) %0, i32 1) - ret <vscale x 2 x double> %1 + ret target("riscv.vector.tuple", <vscale x 16 x i8>, 2) %0 } - - -define <vscale x 4 x double> @test_vlseg2_nxv4f64_triscv.vector.tuple_nxv32i8_2t(ptr %base, i32 %vl) { +define target("riscv.vector.tuple", <vscale x 32 x i8>, 2) @test_vlseg2_nxv4f64_triscv.vector.tuple_nxv32i8_2t(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg2_nxv4f64_triscv.vector.tuple_nxv32i8_2t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e64, m4, ta, ma -; CHECK-NEXT: vlseg2e64.v v4, (a0) +; CHECK-NEXT: vlseg2e64.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 32 x i8>, 2) @llvm.riscv.vlseg2.triscv.vector.tuple_nxv32i8_2t(target("riscv.vector.tuple", <vscale x 32 x i8>, 2) undef, ptr %base, i32 %vl, i32 6) - %1 = call <vscale x 4 x double> @llvm.riscv.tuple.extract.nxv4f64.triscv.vector.tuple_nxv32i8_2t(target("riscv.vector.tuple", <vscale x 32 x i8>, 2) %0, i32 1) - ret <vscale x 4 x double> %1 + ret target("riscv.vector.tuple", <vscale x 32 x i8>, 2) %0 } - -define <vscale x 4 x double> @test_vlseg2_mask_nxv4f64_triscv.vector.tuple_nxv32i8_2t(ptr %base, i32 %vl, <vscale x 4 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 32 x i8>, 2) @test_vlseg2_mask_nxv4f64_triscv.vector.tuple_nxv32i8_2t(ptr %base, i32 %vl, <vscale x 4 x i1> %mask) { ; CHECK-LABEL: test_vlseg2_mask_nxv4f64_triscv.vector.tuple_nxv32i8_2t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e64, m4, ta, ma -; CHECK-NEXT: vlseg2e64.v v4, (a0), v0.t +; CHECK-NEXT: vlseg2e64.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 32 x i8>, 2) @llvm.riscv.vlseg2.mask.triscv.vector.tuple_nxv32i8_2t.nxv4i1(target("riscv.vector.tuple", <vscale x 32 x i8>, 2) undef, ptr %base, <vscale x 4 x i1> %mask, i32 %vl, i32 1, i32 6) - %1 = call <vscale x 4 x double> @llvm.riscv.tuple.extract.nxv4f64.triscv.vector.tuple_nxv32i8_2t(target("riscv.vector.tuple", <vscale x 32 x i8>, 2) %0, i32 1) - ret <vscale x 4 x double> %1 + ret target("riscv.vector.tuple", <vscale x 32 x i8>, 2) %0 } - - -define <vscale x 1 x double> @test_vlseg3_nxv1f64_triscv.vector.tuple_nxv8i8_3t(ptr %base, i32 %vl) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 3) @test_vlseg3_nxv1f64_triscv.vector.tuple_nxv8i8_3t(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg3_nxv1f64_triscv.vector.tuple_nxv8i8_3t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, ma -; CHECK-NEXT: vlseg3e64.v v7, (a0) +; CHECK-NEXT: vlseg3e64.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 3) @llvm.riscv.vlseg3.triscv.vector.tuple_nxv8i8_3t(target("riscv.vector.tuple", <vscale x 8 x i8>, 3) undef, ptr %base, i32 %vl, i32 6) - %1 = call <vscale x 1 x double> @llvm.riscv.tuple.extract.nxv1f64.triscv.vector.tuple_nxv8i8_3t(target("riscv.vector.tuple", <vscale x 8 x i8>, 3) %0, i32 1) - ret <vscale x 1 x double> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 3) %0 } - -define <vscale x 1 x double> @test_vlseg3_mask_nxv1f64_triscv.vector.tuple_nxv8i8_3t(ptr %base, i32 %vl, <vscale x 1 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 3) @test_vlseg3_mask_nxv1f64_triscv.vector.tuple_nxv8i8_3t(ptr %base, i32 %vl, <vscale x 1 x i1> %mask) { ; CHECK-LABEL: test_vlseg3_mask_nxv1f64_triscv.vector.tuple_nxv8i8_3t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, ma -; CHECK-NEXT: vlseg3e64.v v7, (a0), v0.t +; CHECK-NEXT: vlseg3e64.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 3) @llvm.riscv.vlseg3.mask.triscv.vector.tuple_nxv8i8_3t.nxv1i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 3) undef, ptr %base, <vscale x 1 x i1> %mask, i32 %vl, i32 1, i32 6) - %1 = call <vscale x 1 x double> @llvm.riscv.tuple.extract.nxv1f64.triscv.vector.tuple_nxv8i8_3t(target("riscv.vector.tuple", <vscale x 8 x i8>, 3) %0, i32 1) - ret <vscale x 1 x double> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 3) %0 } - - -define <vscale x 2 x double> @test_vlseg3_nxv2f64_triscv.vector.tuple_nxv16i8_3t(ptr %base, i32 %vl) { +define target("riscv.vector.tuple", <vscale x 16 x i8>, 3) @test_vlseg3_nxv2f64_triscv.vector.tuple_nxv16i8_3t(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg3_nxv2f64_triscv.vector.tuple_nxv16i8_3t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, ma -; CHECK-NEXT: vlseg3e64.v v6, (a0) +; CHECK-NEXT: vlseg3e64.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 3) @llvm.riscv.vlseg3.triscv.vector.tuple_nxv16i8_3t(target("riscv.vector.tuple", <vscale x 16 x i8>, 3) undef, ptr %base, i32 %vl, i32 6) - %1 = call <vscale x 2 x double> @llvm.riscv.tuple.extract.nxv2f64.triscv.vector.tuple_nxv16i8_3t(target("riscv.vector.tuple", <vscale x 16 x i8>, 3) %0, i32 1) - ret <vscale x 2 x double> %1 + ret target("riscv.vector.tuple", <vscale x 16 x i8>, 3) %0 } - -define <vscale x 2 x double> @test_vlseg3_mask_nxv2f64_triscv.vector.tuple_nxv16i8_3t(ptr %base, i32 %vl, <vscale x 2 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 16 x i8>, 3) @test_vlseg3_mask_nxv2f64_triscv.vector.tuple_nxv16i8_3t(ptr %base, i32 %vl, <vscale x 2 x i1> %mask) { ; CHECK-LABEL: test_vlseg3_mask_nxv2f64_triscv.vector.tuple_nxv16i8_3t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, ma -; CHECK-NEXT: vlseg3e64.v v6, (a0), v0.t +; CHECK-NEXT: vlseg3e64.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 3) @llvm.riscv.vlseg3.mask.triscv.vector.tuple_nxv16i8_3t.nxv2i1(target("riscv.vector.tuple", <vscale x 16 x i8>, 3) undef, ptr %base, <vscale x 2 x i1> %mask, i32 %vl, i32 1, i32 6) - %1 = call <vscale x 2 x double> @llvm.riscv.tuple.extract.nxv2f64.triscv.vector.tuple_nxv16i8_3t(target("riscv.vector.tuple", <vscale x 16 x i8>, 3) %0, i32 1) - ret <vscale x 2 x double> %1 + ret target("riscv.vector.tuple", <vscale x 16 x i8>, 3) %0 } - - -define <vscale x 1 x double> @test_vlseg4_nxv1f64_triscv.vector.tuple_nxv8i8_4t(ptr %base, i32 %vl) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 4) @test_vlseg4_nxv1f64_triscv.vector.tuple_nxv8i8_4t(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg4_nxv1f64_triscv.vector.tuple_nxv8i8_4t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, ma -; CHECK-NEXT: vlseg4e64.v v7, (a0) +; CHECK-NEXT: vlseg4e64.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 4) @llvm.riscv.vlseg4.triscv.vector.tuple_nxv8i8_4t(target("riscv.vector.tuple", <vscale x 8 x i8>, 4) undef, ptr %base, i32 %vl, i32 6) - %1 = call <vscale x 1 x double> @llvm.riscv.tuple.extract.nxv1f64.triscv.vector.tuple_nxv8i8_4t(target("riscv.vector.tuple", <vscale x 8 x i8>, 4) %0, i32 1) - ret <vscale x 1 x double> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 4) %0 } - -define <vscale x 1 x double> @test_vlseg4_mask_nxv1f64_triscv.vector.tuple_nxv8i8_4t(ptr %base, i32 %vl, <vscale x 1 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 4) @test_vlseg4_mask_nxv1f64_triscv.vector.tuple_nxv8i8_4t(ptr %base, i32 %vl, <vscale x 1 x i1> %mask) { ; CHECK-LABEL: test_vlseg4_mask_nxv1f64_triscv.vector.tuple_nxv8i8_4t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, ma -; CHECK-NEXT: vlseg4e64.v v7, (a0), v0.t +; CHECK-NEXT: vlseg4e64.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 4) @llvm.riscv.vlseg4.mask.triscv.vector.tuple_nxv8i8_4t.nxv1i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 4) undef, ptr %base, <vscale x 1 x i1> %mask, i32 %vl, i32 1, i32 6) - %1 = call <vscale x 1 x double> @llvm.riscv.tuple.extract.nxv1f64.triscv.vector.tuple_nxv8i8_4t(target("riscv.vector.tuple", <vscale x 8 x i8>, 4) %0, i32 1) - ret <vscale x 1 x double> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 4) %0 } - - -define <vscale x 2 x double> @test_vlseg4_nxv2f64_triscv.vector.tuple_nxv16i8_4t(ptr %base, i32 %vl) { +define target("riscv.vector.tuple", <vscale x 16 x i8>, 4) @test_vlseg4_nxv2f64_triscv.vector.tuple_nxv16i8_4t(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg4_nxv2f64_triscv.vector.tuple_nxv16i8_4t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, ma -; CHECK-NEXT: vlseg4e64.v v6, (a0) +; CHECK-NEXT: vlseg4e64.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 4) @llvm.riscv.vlseg4.triscv.vector.tuple_nxv16i8_4t(target("riscv.vector.tuple", <vscale x 16 x i8>, 4) undef, ptr %base, i32 %vl, i32 6) - %1 = call <vscale x 2 x double> @llvm.riscv.tuple.extract.nxv2f64.triscv.vector.tuple_nxv16i8_4t(target("riscv.vector.tuple", <vscale x 16 x i8>, 4) %0, i32 1) - ret <vscale x 2 x double> %1 + ret target("riscv.vector.tuple", <vscale x 16 x i8>, 4) %0 } - -define <vscale x 2 x double> @test_vlseg4_mask_nxv2f64_triscv.vector.tuple_nxv16i8_4t(ptr %base, i32 %vl, <vscale x 2 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 16 x i8>, 4) @test_vlseg4_mask_nxv2f64_triscv.vector.tuple_nxv16i8_4t(ptr %base, i32 %vl, <vscale x 2 x i1> %mask) { ; CHECK-LABEL: test_vlseg4_mask_nxv2f64_triscv.vector.tuple_nxv16i8_4t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, ma -; CHECK-NEXT: vlseg4e64.v v6, (a0), v0.t +; CHECK-NEXT: vlseg4e64.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 4) @llvm.riscv.vlseg4.mask.triscv.vector.tuple_nxv16i8_4t.nxv2i1(target("riscv.vector.tuple", <vscale x 16 x i8>, 4) undef, ptr %base, <vscale x 2 x i1> %mask, i32 %vl, i32 1, i32 6) - %1 = call <vscale x 2 x double> @llvm.riscv.tuple.extract.nxv2f64.triscv.vector.tuple_nxv16i8_4t(target("riscv.vector.tuple", <vscale x 16 x i8>, 4) %0, i32 1) - ret <vscale x 2 x double> %1 + ret target("riscv.vector.tuple", <vscale x 16 x i8>, 4) %0 } - - -define <vscale x 1 x double> @test_vlseg5_nxv1f64_triscv.vector.tuple_nxv8i8_5t(ptr %base, i32 %vl) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 5) @test_vlseg5_nxv1f64_triscv.vector.tuple_nxv8i8_5t(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg5_nxv1f64_triscv.vector.tuple_nxv8i8_5t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, ma -; CHECK-NEXT: vlseg5e64.v v7, (a0) +; CHECK-NEXT: vlseg5e64.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 5) @llvm.riscv.vlseg5.triscv.vector.tuple_nxv8i8_5t(target("riscv.vector.tuple", <vscale x 8 x i8>, 5) undef, ptr %base, i32 %vl, i32 6) - %1 = call <vscale x 1 x double> @llvm.riscv.tuple.extract.nxv1f64.triscv.vector.tuple_nxv8i8_5t(target("riscv.vector.tuple", <vscale x 8 x i8>, 5) %0, i32 1) - ret <vscale x 1 x double> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 5) %0 } - -define <vscale x 1 x double> @test_vlseg5_mask_nxv1f64_triscv.vector.tuple_nxv8i8_5t(ptr %base, i32 %vl, <vscale x 1 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 5) @test_vlseg5_mask_nxv1f64_triscv.vector.tuple_nxv8i8_5t(ptr %base, i32 %vl, <vscale x 1 x i1> %mask) { ; CHECK-LABEL: test_vlseg5_mask_nxv1f64_triscv.vector.tuple_nxv8i8_5t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, ma -; CHECK-NEXT: vlseg5e64.v v7, (a0), v0.t +; CHECK-NEXT: vlseg5e64.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 5) @llvm.riscv.vlseg5.mask.triscv.vector.tuple_nxv8i8_5t.nxv1i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 5) undef, ptr %base, <vscale x 1 x i1> %mask, i32 %vl, i32 1, i32 6) - %1 = call <vscale x 1 x double> @llvm.riscv.tuple.extract.nxv1f64.triscv.vector.tuple_nxv8i8_5t(target("riscv.vector.tuple", <vscale x 8 x i8>, 5) %0, i32 1) - ret <vscale x 1 x double> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 5) %0 } - - -define <vscale x 1 x double> @test_vlseg6_nxv1f64_triscv.vector.tuple_nxv8i8_6t(ptr %base, i32 %vl) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 6) @test_vlseg6_nxv1f64_triscv.vector.tuple_nxv8i8_6t(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg6_nxv1f64_triscv.vector.tuple_nxv8i8_6t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, ma -; CHECK-NEXT: vlseg6e64.v v7, (a0) +; CHECK-NEXT: vlseg6e64.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 6) @llvm.riscv.vlseg6.triscv.vector.tuple_nxv8i8_6t(target("riscv.vector.tuple", <vscale x 8 x i8>, 6) undef, ptr %base, i32 %vl, i32 6) - %1 = call <vscale x 1 x double> @llvm.riscv.tuple.extract.nxv1f64.triscv.vector.tuple_nxv8i8_6t(target("riscv.vector.tuple", <vscale x 8 x i8>, 6) %0, i32 1) - ret <vscale x 1 x double> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 6) %0 } - -define <vscale x 1 x double> @test_vlseg6_mask_nxv1f64_triscv.vector.tuple_nxv8i8_6t(ptr %base, i32 %vl, <vscale x 1 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 6) @test_vlseg6_mask_nxv1f64_triscv.vector.tuple_nxv8i8_6t(ptr %base, i32 %vl, <vscale x 1 x i1> %mask) { ; CHECK-LABEL: test_vlseg6_mask_nxv1f64_triscv.vector.tuple_nxv8i8_6t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, ma -; CHECK-NEXT: vlseg6e64.v v7, (a0), v0.t +; CHECK-NEXT: vlseg6e64.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 6) @llvm.riscv.vlseg6.mask.triscv.vector.tuple_nxv8i8_6t.nxv1i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 6) undef, ptr %base, <vscale x 1 x i1> %mask, i32 %vl, i32 1, i32 6) - %1 = call <vscale x 1 x double> @llvm.riscv.tuple.extract.nxv1f64.triscv.vector.tuple_nxv8i8_6t(target("riscv.vector.tuple", <vscale x 8 x i8>, 6) %0, i32 1) - ret <vscale x 1 x double> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 6) %0 } - - -define <vscale x 1 x double> @test_vlseg7_nxv1f64_triscv.vector.tuple_nxv8i8_7t(ptr %base, i32 %vl) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 7) @test_vlseg7_nxv1f64_triscv.vector.tuple_nxv8i8_7t(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg7_nxv1f64_triscv.vector.tuple_nxv8i8_7t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, ma -; CHECK-NEXT: vlseg7e64.v v7, (a0) +; CHECK-NEXT: vlseg7e64.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 7) @llvm.riscv.vlseg7.triscv.vector.tuple_nxv8i8_7t(target("riscv.vector.tuple", <vscale x 8 x i8>, 7) undef, ptr %base, i32 %vl, i32 6) - %1 = call <vscale x 1 x double> @llvm.riscv.tuple.extract.nxv1f64.triscv.vector.tuple_nxv8i8_7t(target("riscv.vector.tuple", <vscale x 8 x i8>, 7) %0, i32 1) - ret <vscale x 1 x double> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 7) %0 } - -define <vscale x 1 x double> @test_vlseg7_mask_nxv1f64_triscv.vector.tuple_nxv8i8_7t(ptr %base, i32 %vl, <vscale x 1 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 7) @test_vlseg7_mask_nxv1f64_triscv.vector.tuple_nxv8i8_7t(ptr %base, i32 %vl, <vscale x 1 x i1> %mask) { ; CHECK-LABEL: test_vlseg7_mask_nxv1f64_triscv.vector.tuple_nxv8i8_7t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, ma -; CHECK-NEXT: vlseg7e64.v v7, (a0), v0.t +; CHECK-NEXT: vlseg7e64.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 7) @llvm.riscv.vlseg7.mask.triscv.vector.tuple_nxv8i8_7t.nxv1i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 7) undef, ptr %base, <vscale x 1 x i1> %mask, i32 %vl, i32 1, i32 6) - %1 = call <vscale x 1 x double> @llvm.riscv.tuple.extract.nxv1f64.triscv.vector.tuple_nxv8i8_7t(target("riscv.vector.tuple", <vscale x 8 x i8>, 7) %0, i32 1) - ret <vscale x 1 x double> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 7) %0 } - - -define <vscale x 1 x double> @test_vlseg8_nxv1f64_triscv.vector.tuple_nxv8i8_8t(ptr %base, i32 %vl) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 8) @test_vlseg8_nxv1f64_triscv.vector.tuple_nxv8i8_8t(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg8_nxv1f64_triscv.vector.tuple_nxv8i8_8t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, ma -; CHECK-NEXT: vlseg8e64.v v7, (a0) +; CHECK-NEXT: vlseg8e64.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 8) @llvm.riscv.vlseg8.triscv.vector.tuple_nxv8i8_8t(target("riscv.vector.tuple", <vscale x 8 x i8>, 8) undef, ptr %base, i32 %vl, i32 6) - %1 = call <vscale x 1 x double> @llvm.riscv.tuple.extract.nxv1f64.triscv.vector.tuple_nxv8i8_8t(target("riscv.vector.tuple", <vscale x 8 x i8>, 8) %0, i32 1) - ret <vscale x 1 x double> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 8) %0 } - -define <vscale x 1 x double> @test_vlseg8_mask_nxv1f64_triscv.vector.tuple_nxv8i8_8t(ptr %base, i32 %vl, <vscale x 1 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 8) @test_vlseg8_mask_nxv1f64_triscv.vector.tuple_nxv8i8_8t(ptr %base, i32 %vl, <vscale x 1 x i1> %mask) { ; CHECK-LABEL: test_vlseg8_mask_nxv1f64_triscv.vector.tuple_nxv8i8_8t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, ma -; CHECK-NEXT: vlseg8e64.v v7, (a0), v0.t +; CHECK-NEXT: vlseg8e64.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 8) @llvm.riscv.vlseg8.mask.triscv.vector.tuple_nxv8i8_8t.nxv1i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 8) undef, ptr %base, <vscale x 1 x i1> %mask, i32 %vl, i32 1, i32 6) - %1 = call <vscale x 1 x double> @llvm.riscv.tuple.extract.nxv1f64.triscv.vector.tuple_nxv8i8_8t(target("riscv.vector.tuple", <vscale x 8 x i8>, 8) %0, i32 1) - ret <vscale x 1 x double> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 8) %0 } - - -define <vscale x 1 x bfloat> @test_vlseg2_nxv1bf16_triscv.vector.tuple_nxv2i8_2t(ptr %base, i32 %vl) { +define target("riscv.vector.tuple", <vscale x 2 x i8>, 2) @test_vlseg2_nxv1bf16_triscv.vector.tuple_nxv2i8_2t(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg2_nxv1bf16_triscv.vector.tuple_nxv2i8_2t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma -; CHECK-NEXT: vlseg2e16.v v7, (a0) +; CHECK-NEXT: vlseg2e16.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 2 x i8>, 2) @llvm.riscv.vlseg2.triscv.vector.tuple_nxv2i8_2t(target("riscv.vector.tuple", <vscale x 2 x i8>, 2) undef, ptr %base, i32 %vl, i32 4) - %1 = call <vscale x 1 x bfloat> @llvm.riscv.tuple.extract.nxv1bf16.triscv.vector.tuple_nxv2i8_2t(target("riscv.vector.tuple", <vscale x 2 x i8>, 2) %0, i32 1) - ret <vscale x 1 x bfloat> %1 + ret target("riscv.vector.tuple", <vscale x 2 x i8>, 2) %0 } - -define <vscale x 1 x bfloat> @test_vlseg2_mask_nxv1bf16_triscv.vector.tuple_nxv2i8_2t(ptr %base, i32 %vl, <vscale x 1 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 2 x i8>, 2) @test_vlseg2_mask_nxv1bf16_triscv.vector.tuple_nxv2i8_2t(ptr %base, i32 %vl, <vscale x 1 x i1> %mask) { ; CHECK-LABEL: test_vlseg2_mask_nxv1bf16_triscv.vector.tuple_nxv2i8_2t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma -; CHECK-NEXT: vlseg2e16.v v7, (a0), v0.t +; CHECK-NEXT: vlseg2e16.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 2 x i8>, 2) @llvm.riscv.vlseg2.mask.triscv.vector.tuple_nxv2i8_2t.nxv1i1(target("riscv.vector.tuple", <vscale x 2 x i8>, 2) undef, ptr %base, <vscale x 1 x i1> %mask, i32 %vl, i32 1, i32 4) - %1 = call <vscale x 1 x bfloat> @llvm.riscv.tuple.extract.nxv1bf16.triscv.vector.tuple_nxv2i8_2t(target("riscv.vector.tuple", <vscale x 2 x i8>, 2) %0, i32 1) - ret <vscale x 1 x bfloat> %1 + ret target("riscv.vector.tuple", <vscale x 2 x i8>, 2) %0 } - - -define <vscale x 2 x bfloat> @test_vlseg2_nxv2bf16_triscv.vector.tuple_nxv4i8_2t(ptr %base, i32 %vl) { +define target("riscv.vector.tuple", <vscale x 4 x i8>, 2) @test_vlseg2_nxv2bf16_triscv.vector.tuple_nxv4i8_2t(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg2_nxv2bf16_triscv.vector.tuple_nxv4i8_2t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma -; CHECK-NEXT: vlseg2e16.v v7, (a0) +; CHECK-NEXT: vlseg2e16.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 2) @llvm.riscv.vlseg2.triscv.vector.tuple_nxv4i8_2t(target("riscv.vector.tuple", <vscale x 4 x i8>, 2) undef, ptr %base, i32 %vl, i32 4) - %1 = call <vscale x 2 x bfloat> @llvm.riscv.tuple.extract.nxv2bf16.triscv.vector.tuple_nxv4i8_2t(target("riscv.vector.tuple", <vscale x 4 x i8>, 2) %0, i32 1) - ret <vscale x 2 x bfloat> %1 + ret target("riscv.vector.tuple", <vscale x 4 x i8>, 2) %0 } - -define <vscale x 2 x bfloat> @test_vlseg2_mask_nxv2bf16_triscv.vector.tuple_nxv4i8_2t(ptr %base, i32 %vl, <vscale x 2 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 4 x i8>, 2) @test_vlseg2_mask_nxv2bf16_triscv.vector.tuple_nxv4i8_2t(ptr %base, i32 %vl, <vscale x 2 x i1> %mask) { ; CHECK-LABEL: test_vlseg2_mask_nxv2bf16_triscv.vector.tuple_nxv4i8_2t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma -; CHECK-NEXT: vlseg2e16.v v7, (a0), v0.t +; CHECK-NEXT: vlseg2e16.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 2) @llvm.riscv.vlseg2.mask.triscv.vector.tuple_nxv4i8_2t.nxv2i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 2) undef, ptr %base, <vscale x 2 x i1> %mask, i32 %vl, i32 1, i32 4) - %1 = call <vscale x 2 x bfloat> @llvm.riscv.tuple.extract.nxv2bf16.triscv.vector.tuple_nxv4i8_2t(target("riscv.vector.tuple", <vscale x 4 x i8>, 2) %0, i32 1) - ret <vscale x 2 x bfloat> %1 + ret target("riscv.vector.tuple", <vscale x 4 x i8>, 2) %0 } - - -define <vscale x 4 x bfloat> @test_vlseg2_nxv4bf16_triscv.vector.tuple_nxv8i8_2t(ptr %base, i32 %vl) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 2) @test_vlseg2_nxv4bf16_triscv.vector.tuple_nxv8i8_2t(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg2_nxv4bf16_triscv.vector.tuple_nxv8i8_2t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma -; CHECK-NEXT: vlseg2e16.v v7, (a0) +; CHECK-NEXT: vlseg2e16.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 2) @llvm.riscv.vlseg2.triscv.vector.tuple_nxv8i8_2t(target("riscv.vector.tuple", <vscale x 8 x i8>, 2) undef, ptr %base, i32 %vl, i32 4) - %1 = call <vscale x 4 x bfloat> @llvm.riscv.tuple.extract.nxv4bf16.triscv.vector.tuple_nxv8i8_2t(target("riscv.vector.tuple", <vscale x 8 x i8>, 2) %0, i32 1) - ret <vscale x 4 x bfloat> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 2) %0 } - -define <vscale x 4 x bfloat> @test_vlseg2_mask_nxv4bf16_triscv.vector.tuple_nxv8i8_2t(ptr %base, i32 %vl, <vscale x 4 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 2) @test_vlseg2_mask_nxv4bf16_triscv.vector.tuple_nxv8i8_2t(ptr %base, i32 %vl, <vscale x 4 x i1> %mask) { ; CHECK-LABEL: test_vlseg2_mask_nxv4bf16_triscv.vector.tuple_nxv8i8_2t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma -; CHECK-NEXT: vlseg2e16.v v7, (a0), v0.t +; CHECK-NEXT: vlseg2e16.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 2) @llvm.riscv.vlseg2.mask.triscv.vector.tuple_nxv8i8_2t.nxv4i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 2) undef, ptr %base, <vscale x 4 x i1> %mask, i32 %vl, i32 1, i32 4) - %1 = call <vscale x 4 x bfloat> @llvm.riscv.tuple.extract.nxv4bf16.triscv.vector.tuple_nxv8i8_2t(target("riscv.vector.tuple", <vscale x 8 x i8>, 2) %0, i32 1) - ret <vscale x 4 x bfloat> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 2) %0 } - - -define <vscale x 8 x bfloat> @test_vlseg2_nxv8bf16_triscv.vector.tuple_nxv16i8_2t(ptr %base, i32 %vl) { +define target("riscv.vector.tuple", <vscale x 16 x i8>, 2) @test_vlseg2_nxv8bf16_triscv.vector.tuple_nxv16i8_2t(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg2_nxv8bf16_triscv.vector.tuple_nxv16i8_2t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, ma -; CHECK-NEXT: vlseg2e16.v v6, (a0) +; CHECK-NEXT: vlseg2e16.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 2) @llvm.riscv.vlseg2.triscv.vector.tuple_nxv16i8_2t(target("riscv.vector.tuple", <vscale x 16 x i8>, 2) undef, ptr %base, i32 %vl, i32 4) - %1 = call <vscale x 8 x bfloat> @llvm.riscv.tuple.extract.nxv8bf16.triscv.vector.tuple_nxv16i8_2t(target("riscv.vector.tuple", <vscale x 16 x i8>, 2) %0, i32 1) - ret <vscale x 8 x bfloat> %1 + ret target("riscv.vector.tuple", <vscale x 16 x i8>, 2) %0 } - -define <vscale x 8 x bfloat> @test_vlseg2_mask_nxv8bf16_triscv.vector.tuple_nxv16i8_2t(ptr %base, i32 %vl, <vscale x 8 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 16 x i8>, 2) @test_vlseg2_mask_nxv8bf16_triscv.vector.tuple_nxv16i8_2t(ptr %base, i32 %vl, <vscale x 8 x i1> %mask) { ; CHECK-LABEL: test_vlseg2_mask_nxv8bf16_triscv.vector.tuple_nxv16i8_2t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, ma -; CHECK-NEXT: vlseg2e16.v v6, (a0), v0.t +; CHECK-NEXT: vlseg2e16.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 2) @llvm.riscv.vlseg2.mask.triscv.vector.tuple_nxv16i8_2t.nxv8i1(target("riscv.vector.tuple", <vscale x 16 x i8>, 2) undef, ptr %base, <vscale x 8 x i1> %mask, i32 %vl, i32 1, i32 4) - %1 = call <vscale x 8 x bfloat> @llvm.riscv.tuple.extract.nxv8bf16.triscv.vector.tuple_nxv16i8_2t(target("riscv.vector.tuple", <vscale x 16 x i8>, 2) %0, i32 1) - ret <vscale x 8 x bfloat> %1 + ret target("riscv.vector.tuple", <vscale x 16 x i8>, 2) %0 } - - -define <vscale x 16 x bfloat> @test_vlseg2_nxv16bf16_triscv.vector.tuple_nxv32i8_2t(ptr %base, i32 %vl) { +define target("riscv.vector.tuple", <vscale x 32 x i8>, 2) @test_vlseg2_nxv16bf16_triscv.vector.tuple_nxv32i8_2t(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg2_nxv16bf16_triscv.vector.tuple_nxv32i8_2t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma -; CHECK-NEXT: vlseg2e16.v v4, (a0) +; CHECK-NEXT: vlseg2e16.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 32 x i8>, 2) @llvm.riscv.vlseg2.triscv.vector.tuple_nxv32i8_2t(target("riscv.vector.tuple", <vscale x 32 x i8>, 2) undef, ptr %base, i32 %vl, i32 4) - %1 = call <vscale x 16 x bfloat> @llvm.riscv.tuple.extract.nxv16bf16.triscv.vector.tuple_nxv32i8_2t(target("riscv.vector.tuple", <vscale x 32 x i8>, 2) %0, i32 1) - ret <vscale x 16 x bfloat> %1 + ret target("riscv.vector.tuple", <vscale x 32 x i8>, 2) %0 } - -define <vscale x 16 x bfloat> @test_vlseg2_mask_nxv16bf16_triscv.vector.tuple_nxv32i8_2t(ptr %base, i32 %vl, <vscale x 16 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 32 x i8>, 2) @test_vlseg2_mask_nxv16bf16_triscv.vector.tuple_nxv32i8_2t(ptr %base, i32 %vl, <vscale x 16 x i1> %mask) { ; CHECK-LABEL: test_vlseg2_mask_nxv16bf16_triscv.vector.tuple_nxv32i8_2t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma -; CHECK-NEXT: vlseg2e16.v v4, (a0), v0.t +; CHECK-NEXT: vlseg2e16.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 32 x i8>, 2) @llvm.riscv.vlseg2.mask.triscv.vector.tuple_nxv32i8_2t.nxv16i1(target("riscv.vector.tuple", <vscale x 32 x i8>, 2) undef, ptr %base, <vscale x 16 x i1> %mask, i32 %vl, i32 1, i32 4) - %1 = call <vscale x 16 x bfloat> @llvm.riscv.tuple.extract.nxv16bf16.triscv.vector.tuple_nxv32i8_2t(target("riscv.vector.tuple", <vscale x 32 x i8>, 2) %0, i32 1) - ret <vscale x 16 x bfloat> %1 + ret target("riscv.vector.tuple", <vscale x 32 x i8>, 2) %0 } - - -define <vscale x 1 x bfloat> @test_vlseg3_nxv1bf16_triscv.vector.tuple_nxv2i8_3t(ptr %base, i32 %vl) { +define target("riscv.vector.tuple", <vscale x 2 x i8>, 3) @test_vlseg3_nxv1bf16_triscv.vector.tuple_nxv2i8_3t(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg3_nxv1bf16_triscv.vector.tuple_nxv2i8_3t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma -; CHECK-NEXT: vlseg3e16.v v7, (a0) +; CHECK-NEXT: vlseg3e16.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 2 x i8>, 3) @llvm.riscv.vlseg3.triscv.vector.tuple_nxv2i8_3t(target("riscv.vector.tuple", <vscale x 2 x i8>, 3) undef, ptr %base, i32 %vl, i32 4) - %1 = call <vscale x 1 x bfloat> @llvm.riscv.tuple.extract.nxv1bf16.triscv.vector.tuple_nxv2i8_3t(target("riscv.vector.tuple", <vscale x 2 x i8>, 3) %0, i32 1) - ret <vscale x 1 x bfloat> %1 + ret target("riscv.vector.tuple", <vscale x 2 x i8>, 3) %0 } - -define <vscale x 1 x bfloat> @test_vlseg3_mask_nxv1bf16_triscv.vector.tuple_nxv2i8_3t(ptr %base, i32 %vl, <vscale x 1 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 2 x i8>, 3) @test_vlseg3_mask_nxv1bf16_triscv.vector.tuple_nxv2i8_3t(ptr %base, i32 %vl, <vscale x 1 x i1> %mask) { ; CHECK-LABEL: test_vlseg3_mask_nxv1bf16_triscv.vector.tuple_nxv2i8_3t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma -; CHECK-NEXT: vlseg3e16.v v7, (a0), v0.t +; CHECK-NEXT: vlseg3e16.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 2 x i8>, 3) @llvm.riscv.vlseg3.mask.triscv.vector.tuple_nxv2i8_3t.nxv1i1(target("riscv.vector.tuple", <vscale x 2 x i8>, 3) undef, ptr %base, <vscale x 1 x i1> %mask, i32 %vl, i32 1, i32 4) - %1 = call <vscale x 1 x bfloat> @llvm.riscv.tuple.extract.nxv1bf16.triscv.vector.tuple_nxv2i8_3t(target("riscv.vector.tuple", <vscale x 2 x i8>, 3) %0, i32 1) - ret <vscale x 1 x bfloat> %1 + ret target("riscv.vector.tuple", <vscale x 2 x i8>, 3) %0 } - - -define <vscale x 2 x bfloat> @test_vlseg3_nxv2bf16_triscv.vector.tuple_nxv4i8_3t(ptr %base, i32 %vl) { +define target("riscv.vector.tuple", <vscale x 4 x i8>, 3) @test_vlseg3_nxv2bf16_triscv.vector.tuple_nxv4i8_3t(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg3_nxv2bf16_triscv.vector.tuple_nxv4i8_3t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma -; CHECK-NEXT: vlseg3e16.v v7, (a0) +; CHECK-NEXT: vlseg3e16.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 3) @llvm.riscv.vlseg3.triscv.vector.tuple_nxv4i8_3t(target("riscv.vector.tuple", <vscale x 4 x i8>, 3) undef, ptr %base, i32 %vl, i32 4) - %1 = call <vscale x 2 x bfloat> @llvm.riscv.tuple.extract.nxv2bf16.triscv.vector.tuple_nxv4i8_3t(target("riscv.vector.tuple", <vscale x 4 x i8>, 3) %0, i32 1) - ret <vscale x 2 x bfloat> %1 + ret target("riscv.vector.tuple", <vscale x 4 x i8>, 3) %0 } - -define <vscale x 2 x bfloat> @test_vlseg3_mask_nxv2bf16_triscv.vector.tuple_nxv4i8_3t(ptr %base, i32 %vl, <vscale x 2 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 4 x i8>, 3) @test_vlseg3_mask_nxv2bf16_triscv.vector.tuple_nxv4i8_3t(ptr %base, i32 %vl, <vscale x 2 x i1> %mask) { ; CHECK-LABEL: test_vlseg3_mask_nxv2bf16_triscv.vector.tuple_nxv4i8_3t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma -; CHECK-NEXT: vlseg3e16.v v7, (a0), v0.t +; CHECK-NEXT: vlseg3e16.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 3) @llvm.riscv.vlseg3.mask.triscv.vector.tuple_nxv4i8_3t.nxv2i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 3) undef, ptr %base, <vscale x 2 x i1> %mask, i32 %vl, i32 1, i32 4) - %1 = call <vscale x 2 x bfloat> @llvm.riscv.tuple.extract.nxv2bf16.triscv.vector.tuple_nxv4i8_3t(target("riscv.vector.tuple", <vscale x 4 x i8>, 3) %0, i32 1) - ret <vscale x 2 x bfloat> %1 + ret target("riscv.vector.tuple", <vscale x 4 x i8>, 3) %0 } - - -define <vscale x 4 x bfloat> @test_vlseg3_nxv4bf16_triscv.vector.tuple_nxv8i8_3t(ptr %base, i32 %vl) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 3) @test_vlseg3_nxv4bf16_triscv.vector.tuple_nxv8i8_3t(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg3_nxv4bf16_triscv.vector.tuple_nxv8i8_3t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma -; CHECK-NEXT: vlseg3e16.v v7, (a0) +; CHECK-NEXT: vlseg3e16.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 3) @llvm.riscv.vlseg3.triscv.vector.tuple_nxv8i8_3t(target("riscv.vector.tuple", <vscale x 8 x i8>, 3) undef, ptr %base, i32 %vl, i32 4) - %1 = call <vscale x 4 x bfloat> @llvm.riscv.tuple.extract.nxv4bf16.triscv.vector.tuple_nxv8i8_3t(target("riscv.vector.tuple", <vscale x 8 x i8>, 3) %0, i32 1) - ret <vscale x 4 x bfloat> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 3) %0 } - -define <vscale x 4 x bfloat> @test_vlseg3_mask_nxv4bf16_triscv.vector.tuple_nxv8i8_3t(ptr %base, i32 %vl, <vscale x 4 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 3) @test_vlseg3_mask_nxv4bf16_triscv.vector.tuple_nxv8i8_3t(ptr %base, i32 %vl, <vscale x 4 x i1> %mask) { ; CHECK-LABEL: test_vlseg3_mask_nxv4bf16_triscv.vector.tuple_nxv8i8_3t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma -; CHECK-NEXT: vlseg3e16.v v7, (a0), v0.t +; CHECK-NEXT: vlseg3e16.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 3) @llvm.riscv.vlseg3.mask.triscv.vector.tuple_nxv8i8_3t.nxv4i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 3) undef, ptr %base, <vscale x 4 x i1> %mask, i32 %vl, i32 1, i32 4) - %1 = call <vscale x 4 x bfloat> @llvm.riscv.tuple.extract.nxv4bf16.triscv.vector.tuple_nxv8i8_3t(target("riscv.vector.tuple", <vscale x 8 x i8>, 3) %0, i32 1) - ret <vscale x 4 x bfloat> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 3) %0 } - - -define <vscale x 8 x bfloat> @test_vlseg3_nxv8bf16_triscv.vector.tuple_nxv16i8_3t(ptr %base, i32 %vl) { +define target("riscv.vector.tuple", <vscale x 16 x i8>, 3) @test_vlseg3_nxv8bf16_triscv.vector.tuple_nxv16i8_3t(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg3_nxv8bf16_triscv.vector.tuple_nxv16i8_3t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, ma -; CHECK-NEXT: vlseg3e16.v v6, (a0) +; CHECK-NEXT: vlseg3e16.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 3) @llvm.riscv.vlseg3.triscv.vector.tuple_nxv16i8_3t(target("riscv.vector.tuple", <vscale x 16 x i8>, 3) undef, ptr %base, i32 %vl, i32 4) - %1 = call <vscale x 8 x bfloat> @llvm.riscv.tuple.extract.nxv8bf16.triscv.vector.tuple_nxv16i8_3t(target("riscv.vector.tuple", <vscale x 16 x i8>, 3) %0, i32 1) - ret <vscale x 8 x bfloat> %1 + ret target("riscv.vector.tuple", <vscale x 16 x i8>, 3) %0 } - -define <vscale x 8 x bfloat> @test_vlseg3_mask_nxv8bf16_triscv.vector.tuple_nxv16i8_3t(ptr %base, i32 %vl, <vscale x 8 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 16 x i8>, 3) @test_vlseg3_mask_nxv8bf16_triscv.vector.tuple_nxv16i8_3t(ptr %base, i32 %vl, <vscale x 8 x i1> %mask) { ; CHECK-LABEL: test_vlseg3_mask_nxv8bf16_triscv.vector.tuple_nxv16i8_3t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, ma -; CHECK-NEXT: vlseg3e16.v v6, (a0), v0.t +; CHECK-NEXT: vlseg3e16.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 3) @llvm.riscv.vlseg3.mask.triscv.vector.tuple_nxv16i8_3t.nxv8i1(target("riscv.vector.tuple", <vscale x 16 x i8>, 3) undef, ptr %base, <vscale x 8 x i1> %mask, i32 %vl, i32 1, i32 4) - %1 = call <vscale x 8 x bfloat> @llvm.riscv.tuple.extract.nxv8bf16.triscv.vector.tuple_nxv16i8_3t(target("riscv.vector.tuple", <vscale x 16 x i8>, 3) %0, i32 1) - ret <vscale x 8 x bfloat> %1 + ret target("riscv.vector.tuple", <vscale x 16 x i8>, 3) %0 } - - -define <vscale x 1 x bfloat> @test_vlseg4_nxv1bf16_triscv.vector.tuple_nxv2i8_4t(ptr %base, i32 %vl) { +define target("riscv.vector.tuple", <vscale x 2 x i8>, 4) @test_vlseg4_nxv1bf16_triscv.vector.tuple_nxv2i8_4t(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg4_nxv1bf16_triscv.vector.tuple_nxv2i8_4t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma -; CHECK-NEXT: vlseg4e16.v v7, (a0) +; CHECK-NEXT: vlseg4e16.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 2 x i8>, 4) @llvm.riscv.vlseg4.triscv.vector.tuple_nxv2i8_4t(target("riscv.vector.tuple", <vscale x 2 x i8>, 4) undef, ptr %base, i32 %vl, i32 4) - %1 = call <vscale x 1 x bfloat> @llvm.riscv.tuple.extract.nxv1bf16.triscv.vector.tuple_nxv2i8_4t(target("riscv.vector.tuple", <vscale x 2 x i8>, 4) %0, i32 1) - ret <vscale x 1 x bfloat> %1 + ret target("riscv.vector.tuple", <vscale x 2 x i8>, 4) %0 } - -define <vscale x 1 x bfloat> @test_vlseg4_mask_nxv1bf16_triscv.vector.tuple_nxv2i8_4t(ptr %base, i32 %vl, <vscale x 1 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 2 x i8>, 4) @test_vlseg4_mask_nxv1bf16_triscv.vector.tuple_nxv2i8_4t(ptr %base, i32 %vl, <vscale x 1 x i1> %mask) { ; CHECK-LABEL: test_vlseg4_mask_nxv1bf16_triscv.vector.tuple_nxv2i8_4t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma -; CHECK-NEXT: vlseg4e16.v v7, (a0), v0.t +; CHECK-NEXT: vlseg4e16.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 2 x i8>, 4) @llvm.riscv.vlseg4.mask.triscv.vector.tuple_nxv2i8_4t.nxv1i1(target("riscv.vector.tuple", <vscale x 2 x i8>, 4) undef, ptr %base, <vscale x 1 x i1> %mask, i32 %vl, i32 1, i32 4) - %1 = call <vscale x 1 x bfloat> @llvm.riscv.tuple.extract.nxv1bf16.triscv.vector.tuple_nxv2i8_4t(target("riscv.vector.tuple", <vscale x 2 x i8>, 4) %0, i32 1) - ret <vscale x 1 x bfloat> %1 + ret target("riscv.vector.tuple", <vscale x 2 x i8>, 4) %0 } - - -define <vscale x 2 x bfloat> @test_vlseg4_nxv2bf16_triscv.vector.tuple_nxv4i8_4t(ptr %base, i32 %vl) { +define target("riscv.vector.tuple", <vscale x 4 x i8>, 4) @test_vlseg4_nxv2bf16_triscv.vector.tuple_nxv4i8_4t(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg4_nxv2bf16_triscv.vector.tuple_nxv4i8_4t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma -; CHECK-NEXT: vlseg4e16.v v7, (a0) +; CHECK-NEXT: vlseg4e16.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 4) @llvm.riscv.vlseg4.triscv.vector.tuple_nxv4i8_4t(target("riscv.vector.tuple", <vscale x 4 x i8>, 4) undef, ptr %base, i32 %vl, i32 4) - %1 = call <vscale x 2 x bfloat> @llvm.riscv.tuple.extract.nxv2bf16.triscv.vector.tuple_nxv4i8_4t(target("riscv.vector.tuple", <vscale x 4 x i8>, 4) %0, i32 1) - ret <vscale x 2 x bfloat> %1 + ret target("riscv.vector.tuple", <vscale x 4 x i8>, 4) %0 } - -define <vscale x 2 x bfloat> @test_vlseg4_mask_nxv2bf16_triscv.vector.tuple_nxv4i8_4t(ptr %base, i32 %vl, <vscale x 2 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 4 x i8>, 4) @test_vlseg4_mask_nxv2bf16_triscv.vector.tuple_nxv4i8_4t(ptr %base, i32 %vl, <vscale x 2 x i1> %mask) { ; CHECK-LABEL: test_vlseg4_mask_nxv2bf16_triscv.vector.tuple_nxv4i8_4t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma -; CHECK-NEXT: vlseg4e16.v v7, (a0), v0.t +; CHECK-NEXT: vlseg4e16.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 4) @llvm.riscv.vlseg4.mask.triscv.vector.tuple_nxv4i8_4t.nxv2i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 4) undef, ptr %base, <vscale x 2 x i1> %mask, i32 %vl, i32 1, i32 4) - %1 = call <vscale x 2 x bfloat> @llvm.riscv.tuple.extract.nxv2bf16.triscv.vector.tuple_nxv4i8_4t(target("riscv.vector.tuple", <vscale x 4 x i8>, 4) %0, i32 1) - ret <vscale x 2 x bfloat> %1 + ret target("riscv.vector.tuple", <vscale x 4 x i8>, 4) %0 } - - -define <vscale x 4 x bfloat> @test_vlseg4_nxv4bf16_triscv.vector.tuple_nxv8i8_4t(ptr %base, i32 %vl) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 4) @test_vlseg4_nxv4bf16_triscv.vector.tuple_nxv8i8_4t(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg4_nxv4bf16_triscv.vector.tuple_nxv8i8_4t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma -; CHECK-NEXT: vlseg4e16.v v7, (a0) +; CHECK-NEXT: vlseg4e16.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 4) @llvm.riscv.vlseg4.triscv.vector.tuple_nxv8i8_4t(target("riscv.vector.tuple", <vscale x 8 x i8>, 4) undef, ptr %base, i32 %vl, i32 4) - %1 = call <vscale x 4 x bfloat> @llvm.riscv.tuple.extract.nxv4bf16.triscv.vector.tuple_nxv8i8_4t(target("riscv.vector.tuple", <vscale x 8 x i8>, 4) %0, i32 1) - ret <vscale x 4 x bfloat> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 4) %0 } - -define <vscale x 4 x bfloat> @test_vlseg4_mask_nxv4bf16_triscv.vector.tuple_nxv8i8_4t(ptr %base, i32 %vl, <vscale x 4 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 4) @test_vlseg4_mask_nxv4bf16_triscv.vector.tuple_nxv8i8_4t(ptr %base, i32 %vl, <vscale x 4 x i1> %mask) { ; CHECK-LABEL: test_vlseg4_mask_nxv4bf16_triscv.vector.tuple_nxv8i8_4t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma -; CHECK-NEXT: vlseg4e16.v v7, (a0), v0.t +; CHECK-NEXT: vlseg4e16.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 4) @llvm.riscv.vlseg4.mask.triscv.vector.tuple_nxv8i8_4t.nxv4i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 4) undef, ptr %base, <vscale x 4 x i1> %mask, i32 %vl, i32 1, i32 4) - %1 = call <vscale x 4 x bfloat> @llvm.riscv.tuple.extract.nxv4bf16.triscv.vector.tuple_nxv8i8_4t(target("riscv.vector.tuple", <vscale x 8 x i8>, 4) %0, i32 1) - ret <vscale x 4 x bfloat> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 4) %0 } - - -define <vscale x 8 x bfloat> @test_vlseg4_nxv8bf16_triscv.vector.tuple_nxv16i8_4t(ptr %base, i32 %vl) { +define target("riscv.vector.tuple", <vscale x 16 x i8>, 4) @test_vlseg4_nxv8bf16_triscv.vector.tuple_nxv16i8_4t(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg4_nxv8bf16_triscv.vector.tuple_nxv16i8_4t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, ma -; CHECK-NEXT: vlseg4e16.v v6, (a0) +; CHECK-NEXT: vlseg4e16.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 4) @llvm.riscv.vlseg4.triscv.vector.tuple_nxv16i8_4t(target("riscv.vector.tuple", <vscale x 16 x i8>, 4) undef, ptr %base, i32 %vl, i32 4) - %1 = call <vscale x 8 x bfloat> @llvm.riscv.tuple.extract.nxv8bf16.triscv.vector.tuple_nxv16i8_4t(target("riscv.vector.tuple", <vscale x 16 x i8>, 4) %0, i32 1) - ret <vscale x 8 x bfloat> %1 + ret target("riscv.vector.tuple", <vscale x 16 x i8>, 4) %0 } - -define <vscale x 8 x bfloat> @test_vlseg4_mask_nxv8bf16_triscv.vector.tuple_nxv16i8_4t(ptr %base, i32 %vl, <vscale x 8 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 16 x i8>, 4) @test_vlseg4_mask_nxv8bf16_triscv.vector.tuple_nxv16i8_4t(ptr %base, i32 %vl, <vscale x 8 x i1> %mask) { ; CHECK-LABEL: test_vlseg4_mask_nxv8bf16_triscv.vector.tuple_nxv16i8_4t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, ma -; CHECK-NEXT: vlseg4e16.v v6, (a0), v0.t +; CHECK-NEXT: vlseg4e16.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 4) @llvm.riscv.vlseg4.mask.triscv.vector.tuple_nxv16i8_4t.nxv8i1(target("riscv.vector.tuple", <vscale x 16 x i8>, 4) undef, ptr %base, <vscale x 8 x i1> %mask, i32 %vl, i32 1, i32 4) - %1 = call <vscale x 8 x bfloat> @llvm.riscv.tuple.extract.nxv8bf16.triscv.vector.tuple_nxv16i8_4t(target("riscv.vector.tuple", <vscale x 16 x i8>, 4) %0, i32 1) - ret <vscale x 8 x bfloat> %1 + ret target("riscv.vector.tuple", <vscale x 16 x i8>, 4) %0 } - - -define <vscale x 1 x bfloat> @test_vlseg5_nxv1bf16_triscv.vector.tuple_nxv2i8_5t(ptr %base, i32 %vl) { +define target("riscv.vector.tuple", <vscale x 2 x i8>, 5) @test_vlseg5_nxv1bf16_triscv.vector.tuple_nxv2i8_5t(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg5_nxv1bf16_triscv.vector.tuple_nxv2i8_5t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma -; CHECK-NEXT: vlseg5e16.v v7, (a0) +; CHECK-NEXT: vlseg5e16.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 2 x i8>, 5) @llvm.riscv.vlseg5.triscv.vector.tuple_nxv2i8_5t(target("riscv.vector.tuple", <vscale x 2 x i8>, 5) undef, ptr %base, i32 %vl, i32 4) - %1 = call <vscale x 1 x bfloat> @llvm.riscv.tuple.extract.nxv1bf16.triscv.vector.tuple_nxv2i8_5t(target("riscv.vector.tuple", <vscale x 2 x i8>, 5) %0, i32 1) - ret <vscale x 1 x bfloat> %1 + ret target("riscv.vector.tuple", <vscale x 2 x i8>, 5) %0 } - -define <vscale x 1 x bfloat> @test_vlseg5_mask_nxv1bf16_triscv.vector.tuple_nxv2i8_5t(ptr %base, i32 %vl, <vscale x 1 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 2 x i8>, 5) @test_vlseg5_mask_nxv1bf16_triscv.vector.tuple_nxv2i8_5t(ptr %base, i32 %vl, <vscale x 1 x i1> %mask) { ; CHECK-LABEL: test_vlseg5_mask_nxv1bf16_triscv.vector.tuple_nxv2i8_5t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma -; CHECK-NEXT: vlseg5e16.v v7, (a0), v0.t +; CHECK-NEXT: vlseg5e16.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 2 x i8>, 5) @llvm.riscv.vlseg5.mask.triscv.vector.tuple_nxv2i8_5t.nxv1i1(target("riscv.vector.tuple", <vscale x 2 x i8>, 5) undef, ptr %base, <vscale x 1 x i1> %mask, i32 %vl, i32 1, i32 4) - %1 = call <vscale x 1 x bfloat> @llvm.riscv.tuple.extract.nxv1bf16.triscv.vector.tuple_nxv2i8_5t(target("riscv.vector.tuple", <vscale x 2 x i8>, 5) %0, i32 1) - ret <vscale x 1 x bfloat> %1 + ret target("riscv.vector.tuple", <vscale x 2 x i8>, 5) %0 } - - -define <vscale x 2 x bfloat> @test_vlseg5_nxv2bf16_triscv.vector.tuple_nxv4i8_5t(ptr %base, i32 %vl) { +define target("riscv.vector.tuple", <vscale x 4 x i8>, 5) @test_vlseg5_nxv2bf16_triscv.vector.tuple_nxv4i8_5t(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg5_nxv2bf16_triscv.vector.tuple_nxv4i8_5t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma -; CHECK-NEXT: vlseg5e16.v v7, (a0) +; CHECK-NEXT: vlseg5e16.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 5) @llvm.riscv.vlseg5.triscv.vector.tuple_nxv4i8_5t(target("riscv.vector.tuple", <vscale x 4 x i8>, 5) undef, ptr %base, i32 %vl, i32 4) - %1 = call <vscale x 2 x bfloat> @llvm.riscv.tuple.extract.nxv2bf16.triscv.vector.tuple_nxv4i8_5t(target("riscv.vector.tuple", <vscale x 4 x i8>, 5) %0, i32 1) - ret <vscale x 2 x bfloat> %1 + ret target("riscv.vector.tuple", <vscale x 4 x i8>, 5) %0 } - -define <vscale x 2 x bfloat> @test_vlseg5_mask_nxv2bf16_triscv.vector.tuple_nxv4i8_5t(ptr %base, i32 %vl, <vscale x 2 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 4 x i8>, 5) @test_vlseg5_mask_nxv2bf16_triscv.vector.tuple_nxv4i8_5t(ptr %base, i32 %vl, <vscale x 2 x i1> %mask) { ; CHECK-LABEL: test_vlseg5_mask_nxv2bf16_triscv.vector.tuple_nxv4i8_5t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma -; CHECK-NEXT: vlseg5e16.v v7, (a0), v0.t +; CHECK-NEXT: vlseg5e16.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 5) @llvm.riscv.vlseg5.mask.triscv.vector.tuple_nxv4i8_5t.nxv2i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 5) undef, ptr %base, <vscale x 2 x i1> %mask, i32 %vl, i32 1, i32 4) - %1 = call <vscale x 2 x bfloat> @llvm.riscv.tuple.extract.nxv2bf16.triscv.vector.tuple_nxv4i8_5t(target("riscv.vector.tuple", <vscale x 4 x i8>, 5) %0, i32 1) - ret <vscale x 2 x bfloat> %1 + ret target("riscv.vector.tuple", <vscale x 4 x i8>, 5) %0 } - - -define <vscale x 4 x bfloat> @test_vlseg5_nxv4bf16_triscv.vector.tuple_nxv8i8_5t(ptr %base, i32 %vl) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 5) @test_vlseg5_nxv4bf16_triscv.vector.tuple_nxv8i8_5t(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg5_nxv4bf16_triscv.vector.tuple_nxv8i8_5t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma -; CHECK-NEXT: vlseg5e16.v v7, (a0) +; CHECK-NEXT: vlseg5e16.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 5) @llvm.riscv.vlseg5.triscv.vector.tuple_nxv8i8_5t(target("riscv.vector.tuple", <vscale x 8 x i8>, 5) undef, ptr %base, i32 %vl, i32 4) - %1 = call <vscale x 4 x bfloat> @llvm.riscv.tuple.extract.nxv4bf16.triscv.vector.tuple_nxv8i8_5t(target("riscv.vector.tuple", <vscale x 8 x i8>, 5) %0, i32 1) - ret <vscale x 4 x bfloat> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 5) %0 } - -define <vscale x 4 x bfloat> @test_vlseg5_mask_nxv4bf16_triscv.vector.tuple_nxv8i8_5t(ptr %base, i32 %vl, <vscale x 4 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 5) @test_vlseg5_mask_nxv4bf16_triscv.vector.tuple_nxv8i8_5t(ptr %base, i32 %vl, <vscale x 4 x i1> %mask) { ; CHECK-LABEL: test_vlseg5_mask_nxv4bf16_triscv.vector.tuple_nxv8i8_5t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma -; CHECK-NEXT: vlseg5e16.v v7, (a0), v0.t +; CHECK-NEXT: vlseg5e16.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 5) @llvm.riscv.vlseg5.mask.triscv.vector.tuple_nxv8i8_5t.nxv4i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 5) undef, ptr %base, <vscale x 4 x i1> %mask, i32 %vl, i32 1, i32 4) - %1 = call <vscale x 4 x bfloat> @llvm.riscv.tuple.extract.nxv4bf16.triscv.vector.tuple_nxv8i8_5t(target("riscv.vector.tuple", <vscale x 8 x i8>, 5) %0, i32 1) - ret <vscale x 4 x bfloat> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 5) %0 } - - -define <vscale x 1 x bfloat> @test_vlseg6_nxv1bf16_triscv.vector.tuple_nxv2i8_6t(ptr %base, i32 %vl) { +define target("riscv.vector.tuple", <vscale x 2 x i8>, 6) @test_vlseg6_nxv1bf16_triscv.vector.tuple_nxv2i8_6t(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg6_nxv1bf16_triscv.vector.tuple_nxv2i8_6t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma -; CHECK-NEXT: vlseg6e16.v v7, (a0) +; CHECK-NEXT: vlseg6e16.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 2 x i8>, 6) @llvm.riscv.vlseg6.triscv.vector.tuple_nxv2i8_6t(target("riscv.vector.tuple", <vscale x 2 x i8>, 6) undef, ptr %base, i32 %vl, i32 4) - %1 = call <vscale x 1 x bfloat> @llvm.riscv.tuple.extract.nxv1bf16.triscv.vector.tuple_nxv2i8_6t(target("riscv.vector.tuple", <vscale x 2 x i8>, 6) %0, i32 1) - ret <vscale x 1 x bfloat> %1 + ret target("riscv.vector.tuple", <vscale x 2 x i8>, 6) %0 } - -define <vscale x 1 x bfloat> @test_vlseg6_mask_nxv1bf16_triscv.vector.tuple_nxv2i8_6t(ptr %base, i32 %vl, <vscale x 1 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 2 x i8>, 6) @test_vlseg6_mask_nxv1bf16_triscv.vector.tuple_nxv2i8_6t(ptr %base, i32 %vl, <vscale x 1 x i1> %mask) { ; CHECK-LABEL: test_vlseg6_mask_nxv1bf16_triscv.vector.tuple_nxv2i8_6t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma -; CHECK-NEXT: vlseg6e16.v v7, (a0), v0.t +; CHECK-NEXT: vlseg6e16.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 2 x i8>, 6) @llvm.riscv.vlseg6.mask.triscv.vector.tuple_nxv2i8_6t.nxv1i1(target("riscv.vector.tuple", <vscale x 2 x i8>, 6) undef, ptr %base, <vscale x 1 x i1> %mask, i32 %vl, i32 1, i32 4) - %1 = call <vscale x 1 x bfloat> @llvm.riscv.tuple.extract.nxv1bf16.triscv.vector.tuple_nxv2i8_6t(target("riscv.vector.tuple", <vscale x 2 x i8>, 6) %0, i32 1) - ret <vscale x 1 x bfloat> %1 + ret target("riscv.vector.tuple", <vscale x 2 x i8>, 6) %0 } - - -define <vscale x 2 x bfloat> @test_vlseg6_nxv2bf16_triscv.vector.tuple_nxv4i8_6t(ptr %base, i32 %vl) { +define target("riscv.vector.tuple", <vscale x 4 x i8>, 6) @test_vlseg6_nxv2bf16_triscv.vector.tuple_nxv4i8_6t(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg6_nxv2bf16_triscv.vector.tuple_nxv4i8_6t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma -; CHECK-NEXT: vlseg6e16.v v7, (a0) +; CHECK-NEXT: vlseg6e16.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 6) @llvm.riscv.vlseg6.triscv.vector.tuple_nxv4i8_6t(target("riscv.vector.tuple", <vscale x 4 x i8>, 6) undef, ptr %base, i32 %vl, i32 4) - %1 = call <vscale x 2 x bfloat> @llvm.riscv.tuple.extract.nxv2bf16.triscv.vector.tuple_nxv4i8_6t(target("riscv.vector.tuple", <vscale x 4 x i8>, 6) %0, i32 1) - ret <vscale x 2 x bfloat> %1 + ret target("riscv.vector.tuple", <vscale x 4 x i8>, 6) %0 } - -define <vscale x 2 x bfloat> @test_vlseg6_mask_nxv2bf16_triscv.vector.tuple_nxv4i8_6t(ptr %base, i32 %vl, <vscale x 2 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 4 x i8>, 6) @test_vlseg6_mask_nxv2bf16_triscv.vector.tuple_nxv4i8_6t(ptr %base, i32 %vl, <vscale x 2 x i1> %mask) { ; CHECK-LABEL: test_vlseg6_mask_nxv2bf16_triscv.vector.tuple_nxv4i8_6t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma -; CHECK-NEXT: vlseg6e16.v v7, (a0), v0.t +; CHECK-NEXT: vlseg6e16.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 6) @llvm.riscv.vlseg6.mask.triscv.vector.tuple_nxv4i8_6t.nxv2i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 6) undef, ptr %base, <vscale x 2 x i1> %mask, i32 %vl, i32 1, i32 4) - %1 = call <vscale x 2 x bfloat> @llvm.riscv.tuple.extract.nxv2bf16.triscv.vector.tuple_nxv4i8_6t(target("riscv.vector.tuple", <vscale x 4 x i8>, 6) %0, i32 1) - ret <vscale x 2 x bfloat> %1 + ret target("riscv.vector.tuple", <vscale x 4 x i8>, 6) %0 } - - -define <vscale x 4 x bfloat> @test_vlseg6_nxv4bf16_triscv.vector.tuple_nxv8i8_6t(ptr %base, i32 %vl) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 6) @test_vlseg6_nxv4bf16_triscv.vector.tuple_nxv8i8_6t(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg6_nxv4bf16_triscv.vector.tuple_nxv8i8_6t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma -; CHECK-NEXT: vlseg6e16.v v7, (a0) +; CHECK-NEXT: vlseg6e16.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 6) @llvm.riscv.vlseg6.triscv.vector.tuple_nxv8i8_6t(target("riscv.vector.tuple", <vscale x 8 x i8>, 6) undef, ptr %base, i32 %vl, i32 4) - %1 = call <vscale x 4 x bfloat> @llvm.riscv.tuple.extract.nxv4bf16.triscv.vector.tuple_nxv8i8_6t(target("riscv.vector.tuple", <vscale x 8 x i8>, 6) %0, i32 1) - ret <vscale x 4 x bfloat> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 6) %0 } - -define <vscale x 4 x bfloat> @test_vlseg6_mask_nxv4bf16_triscv.vector.tuple_nxv8i8_6t(ptr %base, i32 %vl, <vscale x 4 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 6) @test_vlseg6_mask_nxv4bf16_triscv.vector.tuple_nxv8i8_6t(ptr %base, i32 %vl, <vscale x 4 x i1> %mask) { ; CHECK-LABEL: test_vlseg6_mask_nxv4bf16_triscv.vector.tuple_nxv8i8_6t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma -; CHECK-NEXT: vlseg6e16.v v7, (a0), v0.t +; CHECK-NEXT: vlseg6e16.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 6) @llvm.riscv.vlseg6.mask.triscv.vector.tuple_nxv8i8_6t.nxv4i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 6) undef, ptr %base, <vscale x 4 x i1> %mask, i32 %vl, i32 1, i32 4) - %1 = call <vscale x 4 x bfloat> @llvm.riscv.tuple.extract.nxv4bf16.triscv.vector.tuple_nxv8i8_6t(target("riscv.vector.tuple", <vscale x 8 x i8>, 6) %0, i32 1) - ret <vscale x 4 x bfloat> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 6) %0 } - - -define <vscale x 1 x bfloat> @test_vlseg7_nxv1bf16_triscv.vector.tuple_nxv2i8_7t(ptr %base, i32 %vl) { +define target("riscv.vector.tuple", <vscale x 2 x i8>, 7) @test_vlseg7_nxv1bf16_triscv.vector.tuple_nxv2i8_7t(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg7_nxv1bf16_triscv.vector.tuple_nxv2i8_7t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma -; CHECK-NEXT: vlseg7e16.v v7, (a0) +; CHECK-NEXT: vlseg7e16.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 2 x i8>, 7) @llvm.riscv.vlseg7.triscv.vector.tuple_nxv2i8_7t(target("riscv.vector.tuple", <vscale x 2 x i8>, 7) undef, ptr %base, i32 %vl, i32 4) - %1 = call <vscale x 1 x bfloat> @llvm.riscv.tuple.extract.nxv1bf16.triscv.vector.tuple_nxv2i8_7t(target("riscv.vector.tuple", <vscale x 2 x i8>, 7) %0, i32 1) - ret <vscale x 1 x bfloat> %1 + ret target("riscv.vector.tuple", <vscale x 2 x i8>, 7) %0 } - -define <vscale x 1 x bfloat> @test_vlseg7_mask_nxv1bf16_triscv.vector.tuple_nxv2i8_7t(ptr %base, i32 %vl, <vscale x 1 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 2 x i8>, 7) @test_vlseg7_mask_nxv1bf16_triscv.vector.tuple_nxv2i8_7t(ptr %base, i32 %vl, <vscale x 1 x i1> %mask) { ; CHECK-LABEL: test_vlseg7_mask_nxv1bf16_triscv.vector.tuple_nxv2i8_7t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma -; CHECK-NEXT: vlseg7e16.v v7, (a0), v0.t +; CHECK-NEXT: vlseg7e16.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 2 x i8>, 7) @llvm.riscv.vlseg7.mask.triscv.vector.tuple_nxv2i8_7t.nxv1i1(target("riscv.vector.tuple", <vscale x 2 x i8>, 7) undef, ptr %base, <vscale x 1 x i1> %mask, i32 %vl, i32 1, i32 4) - %1 = call <vscale x 1 x bfloat> @llvm.riscv.tuple.extract.nxv1bf16.triscv.vector.tuple_nxv2i8_7t(target("riscv.vector.tuple", <vscale x 2 x i8>, 7) %0, i32 1) - ret <vscale x 1 x bfloat> %1 + ret target("riscv.vector.tuple", <vscale x 2 x i8>, 7) %0 } - - -define <vscale x 2 x bfloat> @test_vlseg7_nxv2bf16_triscv.vector.tuple_nxv4i8_7t(ptr %base, i32 %vl) { +define target("riscv.vector.tuple", <vscale x 4 x i8>, 7) @test_vlseg7_nxv2bf16_triscv.vector.tuple_nxv4i8_7t(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg7_nxv2bf16_triscv.vector.tuple_nxv4i8_7t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma -; CHECK-NEXT: vlseg7e16.v v7, (a0) +; CHECK-NEXT: vlseg7e16.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 7) @llvm.riscv.vlseg7.triscv.vector.tuple_nxv4i8_7t(target("riscv.vector.tuple", <vscale x 4 x i8>, 7) undef, ptr %base, i32 %vl, i32 4) - %1 = call <vscale x 2 x bfloat> @llvm.riscv.tuple.extract.nxv2bf16.triscv.vector.tuple_nxv4i8_7t(target("riscv.vector.tuple", <vscale x 4 x i8>, 7) %0, i32 1) - ret <vscale x 2 x bfloat> %1 + ret target("riscv.vector.tuple", <vscale x 4 x i8>, 7) %0 } - -define <vscale x 2 x bfloat> @test_vlseg7_mask_nxv2bf16_triscv.vector.tuple_nxv4i8_7t(ptr %base, i32 %vl, <vscale x 2 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 4 x i8>, 7) @test_vlseg7_mask_nxv2bf16_triscv.vector.tuple_nxv4i8_7t(ptr %base, i32 %vl, <vscale x 2 x i1> %mask) { ; CHECK-LABEL: test_vlseg7_mask_nxv2bf16_triscv.vector.tuple_nxv4i8_7t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma -; CHECK-NEXT: vlseg7e16.v v7, (a0), v0.t +; CHECK-NEXT: vlseg7e16.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 7) @llvm.riscv.vlseg7.mask.triscv.vector.tuple_nxv4i8_7t.nxv2i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 7) undef, ptr %base, <vscale x 2 x i1> %mask, i32 %vl, i32 1, i32 4) - %1 = call <vscale x 2 x bfloat> @llvm.riscv.tuple.extract.nxv2bf16.triscv.vector.tuple_nxv4i8_7t(target("riscv.vector.tuple", <vscale x 4 x i8>, 7) %0, i32 1) - ret <vscale x 2 x bfloat> %1 + ret target("riscv.vector.tuple", <vscale x 4 x i8>, 7) %0 } - - -define <vscale x 4 x bfloat> @test_vlseg7_nxv4bf16_triscv.vector.tuple_nxv8i8_7t(ptr %base, i32 %vl) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 7) @test_vlseg7_nxv4bf16_triscv.vector.tuple_nxv8i8_7t(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg7_nxv4bf16_triscv.vector.tuple_nxv8i8_7t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma -; CHECK-NEXT: vlseg7e16.v v7, (a0) +; CHECK-NEXT: vlseg7e16.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 7) @llvm.riscv.vlseg7.triscv.vector.tuple_nxv8i8_7t(target("riscv.vector.tuple", <vscale x 8 x i8>, 7) undef, ptr %base, i32 %vl, i32 4) - %1 = call <vscale x 4 x bfloat> @llvm.riscv.tuple.extract.nxv4bf16.triscv.vector.tuple_nxv8i8_7t(target("riscv.vector.tuple", <vscale x 8 x i8>, 7) %0, i32 1) - ret <vscale x 4 x bfloat> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 7) %0 } - -define <vscale x 4 x bfloat> @test_vlseg7_mask_nxv4bf16_triscv.vector.tuple_nxv8i8_7t(ptr %base, i32 %vl, <vscale x 4 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 7) @test_vlseg7_mask_nxv4bf16_triscv.vector.tuple_nxv8i8_7t(ptr %base, i32 %vl, <vscale x 4 x i1> %mask) { ; CHECK-LABEL: test_vlseg7_mask_nxv4bf16_triscv.vector.tuple_nxv8i8_7t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma -; CHECK-NEXT: vlseg7e16.v v7, (a0), v0.t +; CHECK-NEXT: vlseg7e16.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 7) @llvm.riscv.vlseg7.mask.triscv.vector.tuple_nxv8i8_7t.nxv4i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 7) undef, ptr %base, <vscale x 4 x i1> %mask, i32 %vl, i32 1, i32 4) - %1 = call <vscale x 4 x bfloat> @llvm.riscv.tuple.extract.nxv4bf16.triscv.vector.tuple_nxv8i8_7t(target("riscv.vector.tuple", <vscale x 8 x i8>, 7) %0, i32 1) - ret <vscale x 4 x bfloat> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 7) %0 } - - -define <vscale x 1 x bfloat> @test_vlseg8_nxv1bf16_triscv.vector.tuple_nxv2i8_8t(ptr %base, i32 %vl) { +define target("riscv.vector.tuple", <vscale x 2 x i8>, 8) @test_vlseg8_nxv1bf16_triscv.vector.tuple_nxv2i8_8t(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg8_nxv1bf16_triscv.vector.tuple_nxv2i8_8t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma -; CHECK-NEXT: vlseg8e16.v v7, (a0) +; CHECK-NEXT: vlseg8e16.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 2 x i8>, 8) @llvm.riscv.vlseg8.triscv.vector.tuple_nxv2i8_8t(target("riscv.vector.tuple", <vscale x 2 x i8>, 8) undef, ptr %base, i32 %vl, i32 4) - %1 = call <vscale x 1 x bfloat> @llvm.riscv.tuple.extract.nxv1bf16.triscv.vector.tuple_nxv2i8_8t(target("riscv.vector.tuple", <vscale x 2 x i8>, 8) %0, i32 1) - ret <vscale x 1 x bfloat> %1 + ret target("riscv.vector.tuple", <vscale x 2 x i8>, 8) %0 } - -define <vscale x 1 x bfloat> @test_vlseg8_mask_nxv1bf16_triscv.vector.tuple_nxv2i8_8t(ptr %base, i32 %vl, <vscale x 1 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 2 x i8>, 8) @test_vlseg8_mask_nxv1bf16_triscv.vector.tuple_nxv2i8_8t(ptr %base, i32 %vl, <vscale x 1 x i1> %mask) { ; CHECK-LABEL: test_vlseg8_mask_nxv1bf16_triscv.vector.tuple_nxv2i8_8t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma -; CHECK-NEXT: vlseg8e16.v v7, (a0), v0.t +; CHECK-NEXT: vlseg8e16.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 2 x i8>, 8) @llvm.riscv.vlseg8.mask.triscv.vector.tuple_nxv2i8_8t.nxv1i1(target("riscv.vector.tuple", <vscale x 2 x i8>, 8) undef, ptr %base, <vscale x 1 x i1> %mask, i32 %vl, i32 1, i32 4) - %1 = call <vscale x 1 x bfloat> @llvm.riscv.tuple.extract.nxv1bf16.triscv.vector.tuple_nxv2i8_8t(target("riscv.vector.tuple", <vscale x 2 x i8>, 8) %0, i32 1) - ret <vscale x 1 x bfloat> %1 + ret target("riscv.vector.tuple", <vscale x 2 x i8>, 8) %0 } - - -define <vscale x 2 x bfloat> @test_vlseg8_nxv2bf16_triscv.vector.tuple_nxv4i8_8t(ptr %base, i32 %vl) { +define target("riscv.vector.tuple", <vscale x 4 x i8>, 8) @test_vlseg8_nxv2bf16_triscv.vector.tuple_nxv4i8_8t(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg8_nxv2bf16_triscv.vector.tuple_nxv4i8_8t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma -; CHECK-NEXT: vlseg8e16.v v7, (a0) +; CHECK-NEXT: vlseg8e16.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 8) @llvm.riscv.vlseg8.triscv.vector.tuple_nxv4i8_8t(target("riscv.vector.tuple", <vscale x 4 x i8>, 8) undef, ptr %base, i32 %vl, i32 4) - %1 = call <vscale x 2 x bfloat> @llvm.riscv.tuple.extract.nxv2bf16.triscv.vector.tuple_nxv4i8_8t(target("riscv.vector.tuple", <vscale x 4 x i8>, 8) %0, i32 1) - ret <vscale x 2 x bfloat> %1 + ret target("riscv.vector.tuple", <vscale x 4 x i8>, 8) %0 } - -define <vscale x 2 x bfloat> @test_vlseg8_mask_nxv2bf16_triscv.vector.tuple_nxv4i8_8t(ptr %base, i32 %vl, <vscale x 2 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 4 x i8>, 8) @test_vlseg8_mask_nxv2bf16_triscv.vector.tuple_nxv4i8_8t(ptr %base, i32 %vl, <vscale x 2 x i1> %mask) { ; CHECK-LABEL: test_vlseg8_mask_nxv2bf16_triscv.vector.tuple_nxv4i8_8t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma -; CHECK-NEXT: vlseg8e16.v v7, (a0), v0.t +; CHECK-NEXT: vlseg8e16.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 8) @llvm.riscv.vlseg8.mask.triscv.vector.tuple_nxv4i8_8t.nxv2i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 8) undef, ptr %base, <vscale x 2 x i1> %mask, i32 %vl, i32 1, i32 4) - %1 = call <vscale x 2 x bfloat> @llvm.riscv.tuple.extract.nxv2bf16.triscv.vector.tuple_nxv4i8_8t(target("riscv.vector.tuple", <vscale x 4 x i8>, 8) %0, i32 1) - ret <vscale x 2 x bfloat> %1 + ret target("riscv.vector.tuple", <vscale x 4 x i8>, 8) %0 } - - -define <vscale x 4 x bfloat> @test_vlseg8_nxv4bf16_triscv.vector.tuple_nxv8i8_8t(ptr %base, i32 %vl) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 8) @test_vlseg8_nxv4bf16_triscv.vector.tuple_nxv8i8_8t(ptr %base, i32 %vl) { ; CHECK-LABEL: test_vlseg8_nxv4bf16_triscv.vector.tuple_nxv8i8_8t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma -; CHECK-NEXT: vlseg8e16.v v7, (a0) +; CHECK-NEXT: vlseg8e16.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 8) @llvm.riscv.vlseg8.triscv.vector.tuple_nxv8i8_8t(target("riscv.vector.tuple", <vscale x 8 x i8>, 8) undef, ptr %base, i32 %vl, i32 4) - %1 = call <vscale x 4 x bfloat> @llvm.riscv.tuple.extract.nxv4bf16.triscv.vector.tuple_nxv8i8_8t(target("riscv.vector.tuple", <vscale x 8 x i8>, 8) %0, i32 1) - ret <vscale x 4 x bfloat> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 8) %0 } - -define <vscale x 4 x bfloat> @test_vlseg8_mask_nxv4bf16_triscv.vector.tuple_nxv8i8_8t(ptr %base, i32 %vl, <vscale x 4 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 8) @test_vlseg8_mask_nxv4bf16_triscv.vector.tuple_nxv8i8_8t(ptr %base, i32 %vl, <vscale x 4 x i1> %mask) { ; CHECK-LABEL: test_vlseg8_mask_nxv4bf16_triscv.vector.tuple_nxv8i8_8t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma -; CHECK-NEXT: vlseg8e16.v v7, (a0), v0.t +; CHECK-NEXT: vlseg8e16.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 8) @llvm.riscv.vlseg8.mask.triscv.vector.tuple_nxv8i8_8t.nxv4i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 8) undef, ptr %base, <vscale x 4 x i1> %mask, i32 %vl, i32 1, i32 4) - %1 = call <vscale x 4 x bfloat> @llvm.riscv.tuple.extract.nxv4bf16.triscv.vector.tuple_nxv8i8_8t(target("riscv.vector.tuple", <vscale x 8 x i8>, 8) %0, i32 1) - ret <vscale x 4 x bfloat> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 8) %0 } - diff --git a/llvm/test/CodeGen/RISCV/rvv/vlseg-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vlseg-rv64.ll index 16e5e7b9..faeabaf 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vlseg-rv64.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vlseg-rv64.ll @@ -2,4330 +2,3373 @@ ; RUN: llc -mtriple=riscv64 -mattr=+zve64d,+f,+d,+zvfh,+zvfbfmin \ ; RUN: -verify-machineinstrs < %s | FileCheck %s -declare target("riscv.vector.tuple", <vscale x 1 x i8>, 2) @llvm.riscv.vlseg2.triscv.vector.tuple_nxv1i8_2t(target("riscv.vector.tuple", <vscale x 1 x i8>, 2), ptr, i64, i64) -declare target("riscv.vector.tuple", <vscale x 1 x i8>, 2) @llvm.riscv.vlseg2.mask.triscv.vector.tuple_nxv1i8_2t.nxv1i1(target("riscv.vector.tuple", <vscale x 1 x i8>, 2), ptr, <vscale x 1 x i1>, i64, i64, i64) - -define <vscale x 1 x i8> @test_vlseg2_nxv1i8_triscv.vector.tuple_nxv1i8_2t(ptr %base, i64 %vl) { +define target("riscv.vector.tuple", <vscale x 1 x i8>, 2) @test_vlseg2_nxv1i8_triscv.vector.tuple_nxv1i8_2t(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg2_nxv1i8_triscv.vector.tuple_nxv1i8_2t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma -; CHECK-NEXT: vlseg2e8.v v7, (a0) +; CHECK-NEXT: vlseg2e8.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 1 x i8>, 2) @llvm.riscv.vlseg2.triscv.vector.tuple_nxv1i8_2t(target("riscv.vector.tuple", <vscale x 1 x i8>, 2) undef, ptr %base, i64 %vl, i64 3) - %1 = call <vscale x 1 x i8> @llvm.riscv.tuple.extract.nxv1i8.triscv.vector.tuple_nxv1i8_2t(target("riscv.vector.tuple", <vscale x 1 x i8>, 2) %0, i32 1) - ret <vscale x 1 x i8> %1 + ret target("riscv.vector.tuple", <vscale x 1 x i8>, 2) %0 } - -define <vscale x 1 x i8> @test_vlseg2_mask_nxv1i8_triscv.vector.tuple_nxv1i8_2t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 1 x i8>, 2) @test_vlseg2_mask_nxv1i8_triscv.vector.tuple_nxv1i8_2t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) { ; CHECK-LABEL: test_vlseg2_mask_nxv1i8_triscv.vector.tuple_nxv1i8_2t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma -; CHECK-NEXT: vlseg2e8.v v7, (a0), v0.t +; CHECK-NEXT: vlseg2e8.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 1 x i8>, 2) @llvm.riscv.vlseg2.mask.triscv.vector.tuple_nxv1i8_2t.nxv1i1(target("riscv.vector.tuple", <vscale x 1 x i8>, 2) undef, ptr %base, <vscale x 1 x i1> %mask, i64 %vl, i64 1, i64 3) - %1 = call <vscale x 1 x i8> @llvm.riscv.tuple.extract.nxv1i8.triscv.vector.tuple_nxv1i8_2t(target("riscv.vector.tuple", <vscale x 1 x i8>, 2) %0, i32 1) - ret <vscale x 1 x i8> %1 + ret target("riscv.vector.tuple", <vscale x 1 x i8>, 2) %0 } - -define <vscale x 1 x i8> @test_vlseg2_allonesmask_nxv1i8_triscv.vector.tuple_nxv1i8_2t(ptr %base, i64 %vl) { +define target("riscv.vector.tuple", <vscale x 1 x i8>, 2) @test_vlseg2_allonesmask_nxv1i8_triscv.vector.tuple_nxv1i8_2t(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg2_allonesmask_nxv1i8_triscv.vector.tuple_nxv1i8_2t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma -; CHECK-NEXT: vlseg2e8.v v7, (a0) +; CHECK-NEXT: vlseg2e8.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 1 x i8>, 2) @llvm.riscv.vlseg2.mask.triscv.vector.tuple_nxv1i8_2t.nxv1i1(target("riscv.vector.tuple", <vscale x 1 x i8>, 2) undef, ptr %base, <vscale x 1 x i1> splat (i1 true), i64 %vl, i64 1, i64 3) - %1 = call <vscale x 1 x i8> @llvm.riscv.tuple.extract.nxv1i8.triscv.vector.tuple_nxv1i8_2t(target("riscv.vector.tuple", <vscale x 1 x i8>, 2) %0, i32 1) - ret <vscale x 1 x i8> %1 + ret target("riscv.vector.tuple", <vscale x 1 x i8>, 2) %0 } - -declare target("riscv.vector.tuple", <vscale x 2 x i8>, 2) @llvm.riscv.vlseg2.triscv.vector.tuple_nxv2i8_2t(target("riscv.vector.tuple", <vscale x 2 x i8>, 2), ptr, i64, i64) -declare target("riscv.vector.tuple", <vscale x 2 x i8>, 2) @llvm.riscv.vlseg2.mask.triscv.vector.tuple_nxv2i8_2t.nxv2i1(target("riscv.vector.tuple", <vscale x 2 x i8>, 2), ptr, <vscale x 2 x i1>, i64, i64, i64) - -define <vscale x 2 x i8> @test_vlseg2_nxv2i8_triscv.vector.tuple_nxv2i8_2t(ptr %base, i64 %vl) { +define target("riscv.vector.tuple", <vscale x 2 x i8>, 2) @test_vlseg2_nxv2i8_triscv.vector.tuple_nxv2i8_2t(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg2_nxv2i8_triscv.vector.tuple_nxv2i8_2t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma -; CHECK-NEXT: vlseg2e8.v v7, (a0) +; CHECK-NEXT: vlseg2e8.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 2 x i8>, 2) @llvm.riscv.vlseg2.triscv.vector.tuple_nxv2i8_2t(target("riscv.vector.tuple", <vscale x 2 x i8>, 2) undef, ptr %base, i64 %vl, i64 3) - %1 = call <vscale x 2 x i8> @llvm.riscv.tuple.extract.nxv2i8.triscv.vector.tuple_nxv2i8_2t(target("riscv.vector.tuple", <vscale x 2 x i8>, 2) %0, i32 1) - ret <vscale x 2 x i8> %1 + ret target("riscv.vector.tuple", <vscale x 2 x i8>, 2) %0 } - -define <vscale x 2 x i8> @test_vlseg2_mask_nxv2i8_triscv.vector.tuple_nxv2i8_2t(ptr %base, i64 %vl, <vscale x 2 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 2 x i8>, 2) @test_vlseg2_mask_nxv2i8_triscv.vector.tuple_nxv2i8_2t(ptr %base, i64 %vl, <vscale x 2 x i1> %mask) { ; CHECK-LABEL: test_vlseg2_mask_nxv2i8_triscv.vector.tuple_nxv2i8_2t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma -; CHECK-NEXT: vlseg2e8.v v7, (a0), v0.t +; CHECK-NEXT: vlseg2e8.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 2 x i8>, 2) @llvm.riscv.vlseg2.mask.triscv.vector.tuple_nxv2i8_2t.nxv2i1(target("riscv.vector.tuple", <vscale x 2 x i8>, 2) undef, ptr %base, <vscale x 2 x i1> %mask, i64 %vl, i64 1, i64 3) - %1 = call <vscale x 2 x i8> @llvm.riscv.tuple.extract.nxv2i8.triscv.vector.tuple_nxv2i8_2t(target("riscv.vector.tuple", <vscale x 2 x i8>, 2) %0, i32 1) - ret <vscale x 2 x i8> %1 + ret target("riscv.vector.tuple", <vscale x 2 x i8>, 2) %0 } - -declare target("riscv.vector.tuple", <vscale x 4 x i8>, 2) @llvm.riscv.vlseg2.triscv.vector.tuple_nxv4i8_2t(target("riscv.vector.tuple", <vscale x 4 x i8>, 2), ptr, i64, i64) -declare target("riscv.vector.tuple", <vscale x 4 x i8>, 2) @llvm.riscv.vlseg2.mask.triscv.vector.tuple_nxv4i8_2t.nxv4i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 2), ptr, <vscale x 4 x i1>, i64, i64, i64) - -define <vscale x 4 x i8> @test_vlseg2_nxv4i8_triscv.vector.tuple_nxv4i8_2t(ptr %base, i64 %vl) { +define target("riscv.vector.tuple", <vscale x 4 x i8>, 2) @test_vlseg2_nxv4i8_triscv.vector.tuple_nxv4i8_2t(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg2_nxv4i8_triscv.vector.tuple_nxv4i8_2t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma -; CHECK-NEXT: vlseg2e8.v v7, (a0) +; CHECK-NEXT: vlseg2e8.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 2) @llvm.riscv.vlseg2.triscv.vector.tuple_nxv4i8_2t(target("riscv.vector.tuple", <vscale x 4 x i8>, 2) undef, ptr %base, i64 %vl, i64 3) - %1 = call <vscale x 4 x i8> @llvm.riscv.tuple.extract.nxv4i8.triscv.vector.tuple_nxv4i8_2t(target("riscv.vector.tuple", <vscale x 4 x i8>, 2) %0, i32 1) - ret <vscale x 4 x i8> %1 + ret target("riscv.vector.tuple", <vscale x 4 x i8>, 2) %0 } - -define <vscale x 4 x i8> @test_vlseg2_mask_nxv4i8_triscv.vector.tuple_nxv4i8_2t(ptr %base, i64 %vl, <vscale x 4 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 4 x i8>, 2) @test_vlseg2_mask_nxv4i8_triscv.vector.tuple_nxv4i8_2t(ptr %base, i64 %vl, <vscale x 4 x i1> %mask) { ; CHECK-LABEL: test_vlseg2_mask_nxv4i8_triscv.vector.tuple_nxv4i8_2t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma -; CHECK-NEXT: vlseg2e8.v v7, (a0), v0.t +; CHECK-NEXT: vlseg2e8.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 2) @llvm.riscv.vlseg2.mask.triscv.vector.tuple_nxv4i8_2t.nxv4i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 2) undef, ptr %base, <vscale x 4 x i1> %mask, i64 %vl, i64 1, i64 3) - %1 = call <vscale x 4 x i8> @llvm.riscv.tuple.extract.nxv4i8.triscv.vector.tuple_nxv4i8_2t(target("riscv.vector.tuple", <vscale x 4 x i8>, 2) %0, i32 1) - ret <vscale x 4 x i8> %1 + ret target("riscv.vector.tuple", <vscale x 4 x i8>, 2) %0 } - -declare target("riscv.vector.tuple", <vscale x 8 x i8>, 2) @llvm.riscv.vlseg2.triscv.vector.tuple_nxv8i8_2t(target("riscv.vector.tuple", <vscale x 8 x i8>, 2), ptr, i64, i64) -declare target("riscv.vector.tuple", <vscale x 8 x i8>, 2) @llvm.riscv.vlseg2.mask.triscv.vector.tuple_nxv8i8_2t.nxv8i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 2), ptr, <vscale x 8 x i1>, i64, i64, i64) - -define <vscale x 8 x i8> @test_vlseg2_nxv8i8_triscv.vector.tuple_nxv8i8_2t(ptr %base, i64 %vl) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 2) @test_vlseg2_nxv8i8_triscv.vector.tuple_nxv8i8_2t(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg2_nxv8i8_triscv.vector.tuple_nxv8i8_2t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma -; CHECK-NEXT: vlseg2e8.v v7, (a0) +; CHECK-NEXT: vlseg2e8.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 2) @llvm.riscv.vlseg2.triscv.vector.tuple_nxv8i8_2t(target("riscv.vector.tuple", <vscale x 8 x i8>, 2) undef, ptr %base, i64 %vl, i64 3) - %1 = call <vscale x 8 x i8> @llvm.riscv.tuple.extract.nxv8i8.triscv.vector.tuple_nxv8i8_2t(target("riscv.vector.tuple", <vscale x 8 x i8>, 2) %0, i32 1) - ret <vscale x 8 x i8> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 2) %0 } - -define <vscale x 8 x i8> @test_vlseg2_mask_nxv8i8_triscv.vector.tuple_nxv8i8_2t(ptr %base, i64 %vl, <vscale x 8 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 2) @test_vlseg2_mask_nxv8i8_triscv.vector.tuple_nxv8i8_2t(ptr %base, i64 %vl, <vscale x 8 x i1> %mask) { ; CHECK-LABEL: test_vlseg2_mask_nxv8i8_triscv.vector.tuple_nxv8i8_2t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma -; CHECK-NEXT: vlseg2e8.v v7, (a0), v0.t +; CHECK-NEXT: vlseg2e8.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 2) @llvm.riscv.vlseg2.mask.triscv.vector.tuple_nxv8i8_2t.nxv8i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 2) undef, ptr %base, <vscale x 8 x i1> %mask, i64 %vl, i64 1, i64 3) - %1 = call <vscale x 8 x i8> @llvm.riscv.tuple.extract.nxv8i8.triscv.vector.tuple_nxv8i8_2t(target("riscv.vector.tuple", <vscale x 8 x i8>, 2) %0, i32 1) - ret <vscale x 8 x i8> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 2) %0 } - -declare target("riscv.vector.tuple", <vscale x 16 x i8>, 2) @llvm.riscv.vlseg2.triscv.vector.tuple_nxv16i8_2t(target("riscv.vector.tuple", <vscale x 16 x i8>, 2), ptr, i64, i64) -declare target("riscv.vector.tuple", <vscale x 16 x i8>, 2) @llvm.riscv.vlseg2.mask.triscv.vector.tuple_nxv16i8_2t.nxv16i1(target("riscv.vector.tuple", <vscale x 16 x i8>, 2), ptr, <vscale x 16 x i1>, i64, i64, i64) - -define <vscale x 16 x i8> @test_vlseg2_nxv16i8_triscv.vector.tuple_nxv16i8_2t(ptr %base, i64 %vl) { +define target("riscv.vector.tuple", <vscale x 16 x i8>, 2) @test_vlseg2_nxv16i8_triscv.vector.tuple_nxv16i8_2t(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg2_nxv16i8_triscv.vector.tuple_nxv16i8_2t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma -; CHECK-NEXT: vlseg2e8.v v6, (a0) +; CHECK-NEXT: vlseg2e8.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 2) @llvm.riscv.vlseg2.triscv.vector.tuple_nxv16i8_2t(target("riscv.vector.tuple", <vscale x 16 x i8>, 2) undef, ptr %base, i64 %vl, i64 3) - %1 = call <vscale x 16 x i8> @llvm.riscv.tuple.extract.nxv16i8.triscv.vector.tuple_nxv16i8_2t(target("riscv.vector.tuple", <vscale x 16 x i8>, 2) %0, i32 1) - ret <vscale x 16 x i8> %1 + ret target("riscv.vector.tuple", <vscale x 16 x i8>, 2) %0 } - -define <vscale x 16 x i8> @test_vlseg2_mask_nxv16i8_triscv.vector.tuple_nxv16i8_2t(ptr %base, i64 %vl, <vscale x 16 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 16 x i8>, 2) @test_vlseg2_mask_nxv16i8_triscv.vector.tuple_nxv16i8_2t(ptr %base, i64 %vl, <vscale x 16 x i1> %mask) { ; CHECK-LABEL: test_vlseg2_mask_nxv16i8_triscv.vector.tuple_nxv16i8_2t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma -; CHECK-NEXT: vlseg2e8.v v6, (a0), v0.t +; CHECK-NEXT: vlseg2e8.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 2) @llvm.riscv.vlseg2.mask.triscv.vector.tuple_nxv16i8_2t.nxv16i1(target("riscv.vector.tuple", <vscale x 16 x i8>, 2) undef, ptr %base, <vscale x 16 x i1> %mask, i64 %vl, i64 1, i64 3) - %1 = call <vscale x 16 x i8> @llvm.riscv.tuple.extract.nxv16i8.triscv.vector.tuple_nxv16i8_2t(target("riscv.vector.tuple", <vscale x 16 x i8>, 2) %0, i32 1) - ret <vscale x 16 x i8> %1 + ret target("riscv.vector.tuple", <vscale x 16 x i8>, 2) %0 } - -declare target("riscv.vector.tuple", <vscale x 32 x i8>, 2) @llvm.riscv.vlseg2.triscv.vector.tuple_nxv32i8_2t(target("riscv.vector.tuple", <vscale x 32 x i8>, 2), ptr, i64, i64) -declare target("riscv.vector.tuple", <vscale x 32 x i8>, 2) @llvm.riscv.vlseg2.mask.triscv.vector.tuple_nxv32i8_2t.nxv32i1(target("riscv.vector.tuple", <vscale x 32 x i8>, 2), ptr, <vscale x 32 x i1>, i64, i64, i64) - -define <vscale x 32 x i8> @test_vlseg2_nxv32i8_triscv.vector.tuple_nxv32i8_2t(ptr %base, i64 %vl) { +define target("riscv.vector.tuple", <vscale x 32 x i8>, 2) @test_vlseg2_nxv32i8_triscv.vector.tuple_nxv32i8_2t(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg2_nxv32i8_triscv.vector.tuple_nxv32i8_2t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, ma -; CHECK-NEXT: vlseg2e8.v v4, (a0) +; CHECK-NEXT: vlseg2e8.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 32 x i8>, 2) @llvm.riscv.vlseg2.triscv.vector.tuple_nxv32i8_2t(target("riscv.vector.tuple", <vscale x 32 x i8>, 2) undef, ptr %base, i64 %vl, i64 3) - %1 = call <vscale x 32 x i8> @llvm.riscv.tuple.extract.nxv32i8.triscv.vector.tuple_nxv32i8_2t(target("riscv.vector.tuple", <vscale x 32 x i8>, 2) %0, i32 1) - ret <vscale x 32 x i8> %1 + ret target("riscv.vector.tuple", <vscale x 32 x i8>, 2) %0 } - -define <vscale x 32 x i8> @test_vlseg2_mask_nxv32i8_triscv.vector.tuple_nxv32i8_2t(ptr %base, i64 %vl, <vscale x 32 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 32 x i8>, 2) @test_vlseg2_mask_nxv32i8_triscv.vector.tuple_nxv32i8_2t(ptr %base, i64 %vl, <vscale x 32 x i1> %mask) { ; CHECK-LABEL: test_vlseg2_mask_nxv32i8_triscv.vector.tuple_nxv32i8_2t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e8, m4, ta, ma -; CHECK-NEXT: vlseg2e8.v v4, (a0), v0.t +; CHECK-NEXT: vlseg2e8.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 32 x i8>, 2) @llvm.riscv.vlseg2.mask.triscv.vector.tuple_nxv32i8_2t.nxv32i1(target("riscv.vector.tuple", <vscale x 32 x i8>, 2) undef, ptr %base, <vscale x 32 x i1> %mask, i64 %vl, i64 1, i64 3) - %1 = call <vscale x 32 x i8> @llvm.riscv.tuple.extract.nxv32i8.triscv.vector.tuple_nxv32i8_2t(target("riscv.vector.tuple", <vscale x 32 x i8>, 2) %0, i32 1) - ret <vscale x 32 x i8> %1 + ret target("riscv.vector.tuple", <vscale x 32 x i8>, 2) %0 } - -declare target("riscv.vector.tuple", <vscale x 1 x i8>, 3) @llvm.riscv.vlseg3.triscv.vector.tuple_nxv1i8_3t(target("riscv.vector.tuple", <vscale x 1 x i8>, 3), ptr, i64, i64) -declare target("riscv.vector.tuple", <vscale x 1 x i8>, 3) @llvm.riscv.vlseg3.mask.triscv.vector.tuple_nxv1i8_3t.nxv1i1(target("riscv.vector.tuple", <vscale x 1 x i8>, 3), ptr, <vscale x 1 x i1>, i64, i64, i64) - -define <vscale x 1 x i8> @test_vlseg3_nxv1i8_triscv.vector.tuple_nxv1i8_3t(ptr %base, i64 %vl) { +define target("riscv.vector.tuple", <vscale x 1 x i8>, 3) @test_vlseg3_nxv1i8_triscv.vector.tuple_nxv1i8_3t(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg3_nxv1i8_triscv.vector.tuple_nxv1i8_3t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma -; CHECK-NEXT: vlseg3e8.v v7, (a0) +; CHECK-NEXT: vlseg3e8.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 1 x i8>, 3) @llvm.riscv.vlseg3.triscv.vector.tuple_nxv1i8_3t(target("riscv.vector.tuple", <vscale x 1 x i8>, 3) undef, ptr %base, i64 %vl, i64 3) - %1 = call <vscale x 1 x i8> @llvm.riscv.tuple.extract.nxv1i8.triscv.vector.tuple_nxv1i8_3t(target("riscv.vector.tuple", <vscale x 1 x i8>, 3) %0, i32 1) - ret <vscale x 1 x i8> %1 + ret target("riscv.vector.tuple", <vscale x 1 x i8>, 3) %0 } - -define <vscale x 1 x i8> @test_vlseg3_mask_nxv1i8_triscv.vector.tuple_nxv1i8_3t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 1 x i8>, 3) @test_vlseg3_mask_nxv1i8_triscv.vector.tuple_nxv1i8_3t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) { ; CHECK-LABEL: test_vlseg3_mask_nxv1i8_triscv.vector.tuple_nxv1i8_3t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma -; CHECK-NEXT: vlseg3e8.v v7, (a0), v0.t +; CHECK-NEXT: vlseg3e8.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 1 x i8>, 3) @llvm.riscv.vlseg3.mask.triscv.vector.tuple_nxv1i8_3t.nxv1i1(target("riscv.vector.tuple", <vscale x 1 x i8>, 3) undef, ptr %base, <vscale x 1 x i1> %mask, i64 %vl, i64 1, i64 3) - %1 = call <vscale x 1 x i8> @llvm.riscv.tuple.extract.nxv1i8.triscv.vector.tuple_nxv1i8_3t(target("riscv.vector.tuple", <vscale x 1 x i8>, 3) %0, i32 1) - ret <vscale x 1 x i8> %1 + ret target("riscv.vector.tuple", <vscale x 1 x i8>, 3) %0 } - -define <vscale x 1 x i8> @test_vlseg3_allonesmask_nxv1i8_triscv.vector.tuple_nxv1i8_3t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 1 x i8>, 3) @test_vlseg3_allonesmask_nxv1i8_triscv.vector.tuple_nxv1i8_3t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) { ; CHECK-LABEL: test_vlseg3_allonesmask_nxv1i8_triscv.vector.tuple_nxv1i8_3t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma -; CHECK-NEXT: vlseg3e8.v v7, (a0) +; CHECK-NEXT: vlseg3e8.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 1 x i8>, 3) @llvm.riscv.vlseg3.mask.triscv.vector.tuple_nxv1i8_3t.nxv1i1(target("riscv.vector.tuple", <vscale x 1 x i8>, 3) undef, ptr %base, <vscale x 1 x i1> splat (i1 true), i64 %vl, i64 1, i64 3) - %1 = call <vscale x 1 x i8> @llvm.riscv.tuple.extract.nxv1i8.triscv.vector.tuple_nxv1i8_3t(target("riscv.vector.tuple", <vscale x 1 x i8>, 3) %0, i32 1) - ret <vscale x 1 x i8> %1 + ret target("riscv.vector.tuple", <vscale x 1 x i8>, 3) %0 } - -declare target("riscv.vector.tuple", <vscale x 2 x i8>, 3) @llvm.riscv.vlseg3.triscv.vector.tuple_nxv2i8_3t(target("riscv.vector.tuple", <vscale x 2 x i8>, 3), ptr, i64, i64) -declare target("riscv.vector.tuple", <vscale x 2 x i8>, 3) @llvm.riscv.vlseg3.mask.triscv.vector.tuple_nxv2i8_3t.nxv2i1(target("riscv.vector.tuple", <vscale x 2 x i8>, 3), ptr, <vscale x 2 x i1>, i64, i64, i64) - -define <vscale x 2 x i8> @test_vlseg3_nxv2i8_triscv.vector.tuple_nxv2i8_3t(ptr %base, i64 %vl) { +define target("riscv.vector.tuple", <vscale x 2 x i8>, 3) @test_vlseg3_nxv2i8_triscv.vector.tuple_nxv2i8_3t(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg3_nxv2i8_triscv.vector.tuple_nxv2i8_3t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma -; CHECK-NEXT: vlseg3e8.v v7, (a0) +; CHECK-NEXT: vlseg3e8.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 2 x i8>, 3) @llvm.riscv.vlseg3.triscv.vector.tuple_nxv2i8_3t(target("riscv.vector.tuple", <vscale x 2 x i8>, 3) undef, ptr %base, i64 %vl, i64 3) - %1 = call <vscale x 2 x i8> @llvm.riscv.tuple.extract.nxv2i8.triscv.vector.tuple_nxv2i8_3t(target("riscv.vector.tuple", <vscale x 2 x i8>, 3) %0, i32 1) - ret <vscale x 2 x i8> %1 + ret target("riscv.vector.tuple", <vscale x 2 x i8>, 3) %0 } - -define <vscale x 2 x i8> @test_vlseg3_mask_nxv2i8_triscv.vector.tuple_nxv2i8_3t(ptr %base, i64 %vl, <vscale x 2 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 2 x i8>, 3) @test_vlseg3_mask_nxv2i8_triscv.vector.tuple_nxv2i8_3t(ptr %base, i64 %vl, <vscale x 2 x i1> %mask) { ; CHECK-LABEL: test_vlseg3_mask_nxv2i8_triscv.vector.tuple_nxv2i8_3t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma -; CHECK-NEXT: vlseg3e8.v v7, (a0), v0.t +; CHECK-NEXT: vlseg3e8.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 2 x i8>, 3) @llvm.riscv.vlseg3.mask.triscv.vector.tuple_nxv2i8_3t.nxv2i1(target("riscv.vector.tuple", <vscale x 2 x i8>, 3) undef, ptr %base, <vscale x 2 x i1> %mask, i64 %vl, i64 1, i64 3) - %1 = call <vscale x 2 x i8> @llvm.riscv.tuple.extract.nxv2i8.triscv.vector.tuple_nxv2i8_3t(target("riscv.vector.tuple", <vscale x 2 x i8>, 3) %0, i32 1) - ret <vscale x 2 x i8> %1 + ret target("riscv.vector.tuple", <vscale x 2 x i8>, 3) %0 } - -declare target("riscv.vector.tuple", <vscale x 4 x i8>, 3) @llvm.riscv.vlseg3.triscv.vector.tuple_nxv4i8_3t(target("riscv.vector.tuple", <vscale x 4 x i8>, 3), ptr, i64, i64) -declare target("riscv.vector.tuple", <vscale x 4 x i8>, 3) @llvm.riscv.vlseg3.mask.triscv.vector.tuple_nxv4i8_3t.nxv4i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 3), ptr, <vscale x 4 x i1>, i64, i64, i64) - -define <vscale x 4 x i8> @test_vlseg3_nxv4i8_triscv.vector.tuple_nxv4i8_3t(ptr %base, i64 %vl) { +define target("riscv.vector.tuple", <vscale x 4 x i8>, 3) @test_vlseg3_nxv4i8_triscv.vector.tuple_nxv4i8_3t(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg3_nxv4i8_triscv.vector.tuple_nxv4i8_3t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma -; CHECK-NEXT: vlseg3e8.v v7, (a0) +; CHECK-NEXT: vlseg3e8.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 3) @llvm.riscv.vlseg3.triscv.vector.tuple_nxv4i8_3t(target("riscv.vector.tuple", <vscale x 4 x i8>, 3) undef, ptr %base, i64 %vl, i64 3) - %1 = call <vscale x 4 x i8> @llvm.riscv.tuple.extract.nxv4i8.triscv.vector.tuple_nxv4i8_3t(target("riscv.vector.tuple", <vscale x 4 x i8>, 3) %0, i32 1) - ret <vscale x 4 x i8> %1 + ret target("riscv.vector.tuple", <vscale x 4 x i8>, 3) %0 } - -define <vscale x 4 x i8> @test_vlseg3_mask_nxv4i8_triscv.vector.tuple_nxv4i8_3t(ptr %base, i64 %vl, <vscale x 4 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 4 x i8>, 3) @test_vlseg3_mask_nxv4i8_triscv.vector.tuple_nxv4i8_3t(ptr %base, i64 %vl, <vscale x 4 x i1> %mask) { ; CHECK-LABEL: test_vlseg3_mask_nxv4i8_triscv.vector.tuple_nxv4i8_3t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma -; CHECK-NEXT: vlseg3e8.v v7, (a0), v0.t +; CHECK-NEXT: vlseg3e8.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 3) @llvm.riscv.vlseg3.mask.triscv.vector.tuple_nxv4i8_3t.nxv4i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 3) undef, ptr %base, <vscale x 4 x i1> %mask, i64 %vl, i64 1, i64 3) - %1 = call <vscale x 4 x i8> @llvm.riscv.tuple.extract.nxv4i8.triscv.vector.tuple_nxv4i8_3t(target("riscv.vector.tuple", <vscale x 4 x i8>, 3) %0, i32 1) - ret <vscale x 4 x i8> %1 + ret target("riscv.vector.tuple", <vscale x 4 x i8>, 3) %0 } - -declare target("riscv.vector.tuple", <vscale x 8 x i8>, 3) @llvm.riscv.vlseg3.triscv.vector.tuple_nxv8i8_3t(target("riscv.vector.tuple", <vscale x 8 x i8>, 3), ptr, i64, i64) -declare target("riscv.vector.tuple", <vscale x 8 x i8>, 3) @llvm.riscv.vlseg3.mask.triscv.vector.tuple_nxv8i8_3t.nxv8i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 3), ptr, <vscale x 8 x i1>, i64, i64, i64) - -define <vscale x 8 x i8> @test_vlseg3_nxv8i8_triscv.vector.tuple_nxv8i8_3t(ptr %base, i64 %vl) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 3) @test_vlseg3_nxv8i8_triscv.vector.tuple_nxv8i8_3t(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg3_nxv8i8_triscv.vector.tuple_nxv8i8_3t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma -; CHECK-NEXT: vlseg3e8.v v7, (a0) +; CHECK-NEXT: vlseg3e8.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 3) @llvm.riscv.vlseg3.triscv.vector.tuple_nxv8i8_3t(target("riscv.vector.tuple", <vscale x 8 x i8>, 3) undef, ptr %base, i64 %vl, i64 3) - %1 = call <vscale x 8 x i8> @llvm.riscv.tuple.extract.nxv8i8.triscv.vector.tuple_nxv8i8_3t(target("riscv.vector.tuple", <vscale x 8 x i8>, 3) %0, i32 1) - ret <vscale x 8 x i8> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 3) %0 } - -define <vscale x 8 x i8> @test_vlseg3_mask_nxv8i8_triscv.vector.tuple_nxv8i8_3t(ptr %base, i64 %vl, <vscale x 8 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 3) @test_vlseg3_mask_nxv8i8_triscv.vector.tuple_nxv8i8_3t(ptr %base, i64 %vl, <vscale x 8 x i1> %mask) { ; CHECK-LABEL: test_vlseg3_mask_nxv8i8_triscv.vector.tuple_nxv8i8_3t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma -; CHECK-NEXT: vlseg3e8.v v7, (a0), v0.t +; CHECK-NEXT: vlseg3e8.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 3) @llvm.riscv.vlseg3.mask.triscv.vector.tuple_nxv8i8_3t.nxv8i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 3) undef, ptr %base, <vscale x 8 x i1> %mask, i64 %vl, i64 1, i64 3) - %1 = call <vscale x 8 x i8> @llvm.riscv.tuple.extract.nxv8i8.triscv.vector.tuple_nxv8i8_3t(target("riscv.vector.tuple", <vscale x 8 x i8>, 3) %0, i32 1) - ret <vscale x 8 x i8> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 3) %0 } - -declare target("riscv.vector.tuple", <vscale x 16 x i8>, 3) @llvm.riscv.vlseg3.triscv.vector.tuple_nxv16i8_3t(target("riscv.vector.tuple", <vscale x 16 x i8>, 3), ptr, i64, i64) -declare target("riscv.vector.tuple", <vscale x 16 x i8>, 3) @llvm.riscv.vlseg3.mask.triscv.vector.tuple_nxv16i8_3t.nxv16i1(target("riscv.vector.tuple", <vscale x 16 x i8>, 3), ptr, <vscale x 16 x i1>, i64, i64, i64) - -define <vscale x 16 x i8> @test_vlseg3_nxv16i8_triscv.vector.tuple_nxv16i8_3t(ptr %base, i64 %vl) { +define target("riscv.vector.tuple", <vscale x 16 x i8>, 3) @test_vlseg3_nxv16i8_triscv.vector.tuple_nxv16i8_3t(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg3_nxv16i8_triscv.vector.tuple_nxv16i8_3t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma -; CHECK-NEXT: vlseg3e8.v v6, (a0) +; CHECK-NEXT: vlseg3e8.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 3) @llvm.riscv.vlseg3.triscv.vector.tuple_nxv16i8_3t(target("riscv.vector.tuple", <vscale x 16 x i8>, 3) undef, ptr %base, i64 %vl, i64 3) - %1 = call <vscale x 16 x i8> @llvm.riscv.tuple.extract.nxv16i8.triscv.vector.tuple_nxv16i8_3t(target("riscv.vector.tuple", <vscale x 16 x i8>, 3) %0, i32 1) - ret <vscale x 16 x i8> %1 + ret target("riscv.vector.tuple", <vscale x 16 x i8>, 3) %0 } - -define <vscale x 16 x i8> @test_vlseg3_mask_nxv16i8_triscv.vector.tuple_nxv16i8_3t(ptr %base, i64 %vl, <vscale x 16 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 16 x i8>, 3) @test_vlseg3_mask_nxv16i8_triscv.vector.tuple_nxv16i8_3t(ptr %base, i64 %vl, <vscale x 16 x i1> %mask) { ; CHECK-LABEL: test_vlseg3_mask_nxv16i8_triscv.vector.tuple_nxv16i8_3t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma -; CHECK-NEXT: vlseg3e8.v v6, (a0), v0.t +; CHECK-NEXT: vlseg3e8.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 3) @llvm.riscv.vlseg3.mask.triscv.vector.tuple_nxv16i8_3t.nxv16i1(target("riscv.vector.tuple", <vscale x 16 x i8>, 3) undef, ptr %base, <vscale x 16 x i1> %mask, i64 %vl, i64 1, i64 3) - %1 = call <vscale x 16 x i8> @llvm.riscv.tuple.extract.nxv16i8.triscv.vector.tuple_nxv16i8_3t(target("riscv.vector.tuple", <vscale x 16 x i8>, 3) %0, i32 1) - ret <vscale x 16 x i8> %1 + ret target("riscv.vector.tuple", <vscale x 16 x i8>, 3) %0 } - -declare target("riscv.vector.tuple", <vscale x 1 x i8>, 4) @llvm.riscv.vlseg4.triscv.vector.tuple_nxv1i8_4t(target("riscv.vector.tuple", <vscale x 1 x i8>, 4), ptr, i64, i64) -declare target("riscv.vector.tuple", <vscale x 1 x i8>, 4) @llvm.riscv.vlseg4.mask.triscv.vector.tuple_nxv1i8_4t.nxv1i1(target("riscv.vector.tuple", <vscale x 1 x i8>, 4), ptr, <vscale x 1 x i1>, i64, i64, i64) - -define <vscale x 1 x i8> @test_vlseg4_nxv1i8_triscv.vector.tuple_nxv1i8_4t(ptr %base, i64 %vl) { +define target("riscv.vector.tuple", <vscale x 1 x i8>, 4) @test_vlseg4_nxv1i8_triscv.vector.tuple_nxv1i8_4t(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg4_nxv1i8_triscv.vector.tuple_nxv1i8_4t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma -; CHECK-NEXT: vlseg4e8.v v7, (a0) +; CHECK-NEXT: vlseg4e8.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 1 x i8>, 4) @llvm.riscv.vlseg4.triscv.vector.tuple_nxv1i8_4t(target("riscv.vector.tuple", <vscale x 1 x i8>, 4) undef, ptr %base, i64 %vl, i64 3) - %1 = call <vscale x 1 x i8> @llvm.riscv.tuple.extract.nxv1i8.triscv.vector.tuple_nxv1i8_4t(target("riscv.vector.tuple", <vscale x 1 x i8>, 4) %0, i32 1) - ret <vscale x 1 x i8> %1 + ret target("riscv.vector.tuple", <vscale x 1 x i8>, 4) %0 } - -define <vscale x 1 x i8> @test_vlseg4_mask_nxv1i8_triscv.vector.tuple_nxv1i8_4t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 1 x i8>, 4) @test_vlseg4_mask_nxv1i8_triscv.vector.tuple_nxv1i8_4t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) { ; CHECK-LABEL: test_vlseg4_mask_nxv1i8_triscv.vector.tuple_nxv1i8_4t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma -; CHECK-NEXT: vlseg4e8.v v7, (a0), v0.t +; CHECK-NEXT: vlseg4e8.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 1 x i8>, 4) @llvm.riscv.vlseg4.mask.triscv.vector.tuple_nxv1i8_4t.nxv1i1(target("riscv.vector.tuple", <vscale x 1 x i8>, 4) undef, ptr %base, <vscale x 1 x i1> %mask, i64 %vl, i64 1, i64 3) - %1 = call <vscale x 1 x i8> @llvm.riscv.tuple.extract.nxv1i8.triscv.vector.tuple_nxv1i8_4t(target("riscv.vector.tuple", <vscale x 1 x i8>, 4) %0, i32 1) - ret <vscale x 1 x i8> %1 + ret target("riscv.vector.tuple", <vscale x 1 x i8>, 4) %0 } - -define <vscale x 1 x i8> @test_vlseg4_allonesmask_nxv1i8_triscv.vector.tuple_nxv1i8_4t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 1 x i8>, 4) @test_vlseg4_allonesmask_nxv1i8_triscv.vector.tuple_nxv1i8_4t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) { ; CHECK-LABEL: test_vlseg4_allonesmask_nxv1i8_triscv.vector.tuple_nxv1i8_4t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma -; CHECK-NEXT: vlseg4e8.v v7, (a0) +; CHECK-NEXT: vlseg4e8.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 1 x i8>, 4) @llvm.riscv.vlseg4.mask.triscv.vector.tuple_nxv1i8_4t.nxv1i1(target("riscv.vector.tuple", <vscale x 1 x i8>, 4) undef, ptr %base, <vscale x 1 x i1> splat (i1 true), i64 %vl, i64 1, i64 3) - %1 = call <vscale x 1 x i8> @llvm.riscv.tuple.extract.nxv1i8.triscv.vector.tuple_nxv1i8_4t(target("riscv.vector.tuple", <vscale x 1 x i8>, 4) %0, i32 1) - ret <vscale x 1 x i8> %1 + ret target("riscv.vector.tuple", <vscale x 1 x i8>, 4) %0 } - -declare target("riscv.vector.tuple", <vscale x 2 x i8>, 4) @llvm.riscv.vlseg4.triscv.vector.tuple_nxv2i8_4t(target("riscv.vector.tuple", <vscale x 2 x i8>, 4), ptr, i64, i64) -declare target("riscv.vector.tuple", <vscale x 2 x i8>, 4) @llvm.riscv.vlseg4.mask.triscv.vector.tuple_nxv2i8_4t.nxv2i1(target("riscv.vector.tuple", <vscale x 2 x i8>, 4), ptr, <vscale x 2 x i1>, i64, i64, i64) - -define <vscale x 2 x i8> @test_vlseg4_nxv2i8_triscv.vector.tuple_nxv2i8_4t(ptr %base, i64 %vl) { +define target("riscv.vector.tuple", <vscale x 2 x i8>, 4) @test_vlseg4_nxv2i8_triscv.vector.tuple_nxv2i8_4t(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg4_nxv2i8_triscv.vector.tuple_nxv2i8_4t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma -; CHECK-NEXT: vlseg4e8.v v7, (a0) +; CHECK-NEXT: vlseg4e8.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 2 x i8>, 4) @llvm.riscv.vlseg4.triscv.vector.tuple_nxv2i8_4t(target("riscv.vector.tuple", <vscale x 2 x i8>, 4) undef, ptr %base, i64 %vl, i64 3) - %1 = call <vscale x 2 x i8> @llvm.riscv.tuple.extract.nxv2i8.triscv.vector.tuple_nxv2i8_4t(target("riscv.vector.tuple", <vscale x 2 x i8>, 4) %0, i32 1) - ret <vscale x 2 x i8> %1 + ret target("riscv.vector.tuple", <vscale x 2 x i8>, 4) %0 } - -define <vscale x 2 x i8> @test_vlseg4_mask_nxv2i8_triscv.vector.tuple_nxv2i8_4t(ptr %base, i64 %vl, <vscale x 2 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 2 x i8>, 4) @test_vlseg4_mask_nxv2i8_triscv.vector.tuple_nxv2i8_4t(ptr %base, i64 %vl, <vscale x 2 x i1> %mask) { ; CHECK-LABEL: test_vlseg4_mask_nxv2i8_triscv.vector.tuple_nxv2i8_4t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma -; CHECK-NEXT: vlseg4e8.v v7, (a0), v0.t +; CHECK-NEXT: vlseg4e8.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 2 x i8>, 4) @llvm.riscv.vlseg4.mask.triscv.vector.tuple_nxv2i8_4t.nxv2i1(target("riscv.vector.tuple", <vscale x 2 x i8>, 4) undef, ptr %base, <vscale x 2 x i1> %mask, i64 %vl, i64 1, i64 3) - %1 = call <vscale x 2 x i8> @llvm.riscv.tuple.extract.nxv2i8.triscv.vector.tuple_nxv2i8_4t(target("riscv.vector.tuple", <vscale x 2 x i8>, 4) %0, i32 1) - ret <vscale x 2 x i8> %1 + ret target("riscv.vector.tuple", <vscale x 2 x i8>, 4) %0 } - -declare target("riscv.vector.tuple", <vscale x 4 x i8>, 4) @llvm.riscv.vlseg4.triscv.vector.tuple_nxv4i8_4t(target("riscv.vector.tuple", <vscale x 4 x i8>, 4), ptr, i64, i64) -declare target("riscv.vector.tuple", <vscale x 4 x i8>, 4) @llvm.riscv.vlseg4.mask.triscv.vector.tuple_nxv4i8_4t.nxv4i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 4), ptr, <vscale x 4 x i1>, i64, i64, i64) - -define <vscale x 4 x i8> @test_vlseg4_nxv4i8_triscv.vector.tuple_nxv4i8_4t(ptr %base, i64 %vl) { +define target("riscv.vector.tuple", <vscale x 4 x i8>, 4) @test_vlseg4_nxv4i8_triscv.vector.tuple_nxv4i8_4t(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg4_nxv4i8_triscv.vector.tuple_nxv4i8_4t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma -; CHECK-NEXT: vlseg4e8.v v7, (a0) +; CHECK-NEXT: vlseg4e8.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 4) @llvm.riscv.vlseg4.triscv.vector.tuple_nxv4i8_4t(target("riscv.vector.tuple", <vscale x 4 x i8>, 4) undef, ptr %base, i64 %vl, i64 3) - %1 = call <vscale x 4 x i8> @llvm.riscv.tuple.extract.nxv4i8.triscv.vector.tuple_nxv4i8_4t(target("riscv.vector.tuple", <vscale x 4 x i8>, 4) %0, i32 1) - ret <vscale x 4 x i8> %1 + ret target("riscv.vector.tuple", <vscale x 4 x i8>, 4) %0 } - -define <vscale x 4 x i8> @test_vlseg4_mask_nxv4i8_triscv.vector.tuple_nxv4i8_4t(ptr %base, i64 %vl, <vscale x 4 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 4 x i8>, 4) @test_vlseg4_mask_nxv4i8_triscv.vector.tuple_nxv4i8_4t(ptr %base, i64 %vl, <vscale x 4 x i1> %mask) { ; CHECK-LABEL: test_vlseg4_mask_nxv4i8_triscv.vector.tuple_nxv4i8_4t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma -; CHECK-NEXT: vlseg4e8.v v7, (a0), v0.t +; CHECK-NEXT: vlseg4e8.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 4) @llvm.riscv.vlseg4.mask.triscv.vector.tuple_nxv4i8_4t.nxv4i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 4) undef, ptr %base, <vscale x 4 x i1> %mask, i64 %vl, i64 1, i64 3) - %1 = call <vscale x 4 x i8> @llvm.riscv.tuple.extract.nxv4i8.triscv.vector.tuple_nxv4i8_4t(target("riscv.vector.tuple", <vscale x 4 x i8>, 4) %0, i32 1) - ret <vscale x 4 x i8> %1 + ret target("riscv.vector.tuple", <vscale x 4 x i8>, 4) %0 } - -declare target("riscv.vector.tuple", <vscale x 8 x i8>, 4) @llvm.riscv.vlseg4.triscv.vector.tuple_nxv8i8_4t(target("riscv.vector.tuple", <vscale x 8 x i8>, 4), ptr, i64, i64) -declare target("riscv.vector.tuple", <vscale x 8 x i8>, 4) @llvm.riscv.vlseg4.mask.triscv.vector.tuple_nxv8i8_4t.nxv8i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 4), ptr, <vscale x 8 x i1>, i64, i64, i64) - -define <vscale x 8 x i8> @test_vlseg4_nxv8i8_triscv.vector.tuple_nxv8i8_4t(ptr %base, i64 %vl) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 4) @test_vlseg4_nxv8i8_triscv.vector.tuple_nxv8i8_4t(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg4_nxv8i8_triscv.vector.tuple_nxv8i8_4t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma -; CHECK-NEXT: vlseg4e8.v v7, (a0) +; CHECK-NEXT: vlseg4e8.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 4) @llvm.riscv.vlseg4.triscv.vector.tuple_nxv8i8_4t(target("riscv.vector.tuple", <vscale x 8 x i8>, 4) undef, ptr %base, i64 %vl, i64 3) - %1 = call <vscale x 8 x i8> @llvm.riscv.tuple.extract.nxv8i8.triscv.vector.tuple_nxv8i8_4t(target("riscv.vector.tuple", <vscale x 8 x i8>, 4) %0, i32 1) - ret <vscale x 8 x i8> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 4) %0 } - -define <vscale x 8 x i8> @test_vlseg4_mask_nxv8i8_triscv.vector.tuple_nxv8i8_4t(ptr %base, i64 %vl, <vscale x 8 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 4) @test_vlseg4_mask_nxv8i8_triscv.vector.tuple_nxv8i8_4t(ptr %base, i64 %vl, <vscale x 8 x i1> %mask) { ; CHECK-LABEL: test_vlseg4_mask_nxv8i8_triscv.vector.tuple_nxv8i8_4t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma -; CHECK-NEXT: vlseg4e8.v v7, (a0), v0.t +; CHECK-NEXT: vlseg4e8.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 4) @llvm.riscv.vlseg4.mask.triscv.vector.tuple_nxv8i8_4t.nxv8i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 4) undef, ptr %base, <vscale x 8 x i1> %mask, i64 %vl, i64 1, i64 3) - %1 = call <vscale x 8 x i8> @llvm.riscv.tuple.extract.nxv8i8.triscv.vector.tuple_nxv8i8_4t(target("riscv.vector.tuple", <vscale x 8 x i8>, 4) %0, i32 1) - ret <vscale x 8 x i8> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 4) %0 } - -declare target("riscv.vector.tuple", <vscale x 16 x i8>, 4) @llvm.riscv.vlseg4.triscv.vector.tuple_nxv16i8_4t(target("riscv.vector.tuple", <vscale x 16 x i8>, 4), ptr, i64, i64) -declare target("riscv.vector.tuple", <vscale x 16 x i8>, 4) @llvm.riscv.vlseg4.mask.triscv.vector.tuple_nxv16i8_4t.nxv16i1(target("riscv.vector.tuple", <vscale x 16 x i8>, 4), ptr, <vscale x 16 x i1>, i64, i64, i64) - -define <vscale x 16 x i8> @test_vlseg4_nxv16i8_triscv.vector.tuple_nxv16i8_4t(ptr %base, i64 %vl) { +define target("riscv.vector.tuple", <vscale x 16 x i8>, 4) @test_vlseg4_nxv16i8_triscv.vector.tuple_nxv16i8_4t(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg4_nxv16i8_triscv.vector.tuple_nxv16i8_4t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma -; CHECK-NEXT: vlseg4e8.v v6, (a0) +; CHECK-NEXT: vlseg4e8.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 4) @llvm.riscv.vlseg4.triscv.vector.tuple_nxv16i8_4t(target("riscv.vector.tuple", <vscale x 16 x i8>, 4) undef, ptr %base, i64 %vl, i64 3) - %1 = call <vscale x 16 x i8> @llvm.riscv.tuple.extract.nxv16i8.triscv.vector.tuple_nxv16i8_4t(target("riscv.vector.tuple", <vscale x 16 x i8>, 4) %0, i32 1) - ret <vscale x 16 x i8> %1 + ret target("riscv.vector.tuple", <vscale x 16 x i8>, 4) %0 } - -define <vscale x 16 x i8> @test_vlseg4_mask_nxv16i8_triscv.vector.tuple_nxv16i8_4t(ptr %base, i64 %vl, <vscale x 16 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 16 x i8>, 4) @test_vlseg4_mask_nxv16i8_triscv.vector.tuple_nxv16i8_4t(ptr %base, i64 %vl, <vscale x 16 x i1> %mask) { ; CHECK-LABEL: test_vlseg4_mask_nxv16i8_triscv.vector.tuple_nxv16i8_4t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e8, m2, ta, ma -; CHECK-NEXT: vlseg4e8.v v6, (a0), v0.t +; CHECK-NEXT: vlseg4e8.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 4) @llvm.riscv.vlseg4.mask.triscv.vector.tuple_nxv16i8_4t.nxv16i1(target("riscv.vector.tuple", <vscale x 16 x i8>, 4) undef, ptr %base, <vscale x 16 x i1> %mask, i64 %vl, i64 1, i64 3) - %1 = call <vscale x 16 x i8> @llvm.riscv.tuple.extract.nxv16i8.triscv.vector.tuple_nxv16i8_4t(target("riscv.vector.tuple", <vscale x 16 x i8>, 4) %0, i32 1) - ret <vscale x 16 x i8> %1 + ret target("riscv.vector.tuple", <vscale x 16 x i8>, 4) %0 } - -declare target("riscv.vector.tuple", <vscale x 1 x i8>, 5) @llvm.riscv.vlseg5.triscv.vector.tuple_nxv1i8_5t(target("riscv.vector.tuple", <vscale x 1 x i8>, 5), ptr, i64, i64) -declare target("riscv.vector.tuple", <vscale x 1 x i8>, 5) @llvm.riscv.vlseg5.mask.triscv.vector.tuple_nxv1i8_5t.nxv1i1(target("riscv.vector.tuple", <vscale x 1 x i8>, 5), ptr, <vscale x 1 x i1>, i64, i64, i64) - -define <vscale x 1 x i8> @test_vlseg5_nxv1i8_triscv.vector.tuple_nxv1i8_5t(ptr %base, i64 %vl) { +define target("riscv.vector.tuple", <vscale x 1 x i8>, 5) @test_vlseg5_nxv1i8_triscv.vector.tuple_nxv1i8_5t(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg5_nxv1i8_triscv.vector.tuple_nxv1i8_5t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma -; CHECK-NEXT: vlseg5e8.v v7, (a0) +; CHECK-NEXT: vlseg5e8.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 1 x i8>, 5) @llvm.riscv.vlseg5.triscv.vector.tuple_nxv1i8_5t(target("riscv.vector.tuple", <vscale x 1 x i8>, 5) undef, ptr %base, i64 %vl, i64 3) - %1 = call <vscale x 1 x i8> @llvm.riscv.tuple.extract.nxv1i8.triscv.vector.tuple_nxv1i8_5t(target("riscv.vector.tuple", <vscale x 1 x i8>, 5) %0, i32 1) - ret <vscale x 1 x i8> %1 + ret target("riscv.vector.tuple", <vscale x 1 x i8>, 5) %0 } - -define <vscale x 1 x i8> @test_vlseg5_mask_nxv1i8_triscv.vector.tuple_nxv1i8_5t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 1 x i8>, 5) @test_vlseg5_mask_nxv1i8_triscv.vector.tuple_nxv1i8_5t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) { ; CHECK-LABEL: test_vlseg5_mask_nxv1i8_triscv.vector.tuple_nxv1i8_5t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma -; CHECK-NEXT: vlseg5e8.v v7, (a0), v0.t +; CHECK-NEXT: vlseg5e8.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 1 x i8>, 5) @llvm.riscv.vlseg5.mask.triscv.vector.tuple_nxv1i8_5t.nxv1i1(target("riscv.vector.tuple", <vscale x 1 x i8>, 5) undef, ptr %base, <vscale x 1 x i1> %mask, i64 %vl, i64 1, i64 3) - %1 = call <vscale x 1 x i8> @llvm.riscv.tuple.extract.nxv1i8.triscv.vector.tuple_nxv1i8_5t(target("riscv.vector.tuple", <vscale x 1 x i8>, 5) %0, i32 1) - ret <vscale x 1 x i8> %1 + ret target("riscv.vector.tuple", <vscale x 1 x i8>, 5) %0 } - -define <vscale x 1 x i8> @test_vlseg5_allonesmask_nxv1i8_triscv.vector.tuple_nxv1i8_5t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 1 x i8>, 5) @test_vlseg5_allonesmask_nxv1i8_triscv.vector.tuple_nxv1i8_5t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) { ; CHECK-LABEL: test_vlseg5_allonesmask_nxv1i8_triscv.vector.tuple_nxv1i8_5t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma -; CHECK-NEXT: vlseg5e8.v v7, (a0) +; CHECK-NEXT: vlseg5e8.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 1 x i8>, 5) @llvm.riscv.vlseg5.mask.triscv.vector.tuple_nxv1i8_5t.nxv1i1(target("riscv.vector.tuple", <vscale x 1 x i8>, 5) undef, ptr %base, <vscale x 1 x i1> splat (i1 true), i64 %vl, i64 1, i64 3) - %1 = call <vscale x 1 x i8> @llvm.riscv.tuple.extract.nxv1i8.triscv.vector.tuple_nxv1i8_5t(target("riscv.vector.tuple", <vscale x 1 x i8>, 5) %0, i32 1) - ret <vscale x 1 x i8> %1 + ret target("riscv.vector.tuple", <vscale x 1 x i8>, 5) %0 } - -declare target("riscv.vector.tuple", <vscale x 2 x i8>, 5) @llvm.riscv.vlseg5.triscv.vector.tuple_nxv2i8_5t(target("riscv.vector.tuple", <vscale x 2 x i8>, 5), ptr, i64, i64) -declare target("riscv.vector.tuple", <vscale x 2 x i8>, 5) @llvm.riscv.vlseg5.mask.triscv.vector.tuple_nxv2i8_5t.nxv2i1(target("riscv.vector.tuple", <vscale x 2 x i8>, 5), ptr, <vscale x 2 x i1>, i64, i64, i64) - -define <vscale x 2 x i8> @test_vlseg5_nxv2i8_triscv.vector.tuple_nxv2i8_5t(ptr %base, i64 %vl) { +define target("riscv.vector.tuple", <vscale x 2 x i8>, 5) @test_vlseg5_nxv2i8_triscv.vector.tuple_nxv2i8_5t(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg5_nxv2i8_triscv.vector.tuple_nxv2i8_5t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma -; CHECK-NEXT: vlseg5e8.v v7, (a0) +; CHECK-NEXT: vlseg5e8.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 2 x i8>, 5) @llvm.riscv.vlseg5.triscv.vector.tuple_nxv2i8_5t(target("riscv.vector.tuple", <vscale x 2 x i8>, 5) undef, ptr %base, i64 %vl, i64 3) - %1 = call <vscale x 2 x i8> @llvm.riscv.tuple.extract.nxv2i8.triscv.vector.tuple_nxv2i8_5t(target("riscv.vector.tuple", <vscale x 2 x i8>, 5) %0, i32 1) - ret <vscale x 2 x i8> %1 + ret target("riscv.vector.tuple", <vscale x 2 x i8>, 5) %0 } - -define <vscale x 2 x i8> @test_vlseg5_mask_nxv2i8_triscv.vector.tuple_nxv2i8_5t(ptr %base, i64 %vl, <vscale x 2 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 2 x i8>, 5) @test_vlseg5_mask_nxv2i8_triscv.vector.tuple_nxv2i8_5t(ptr %base, i64 %vl, <vscale x 2 x i1> %mask) { ; CHECK-LABEL: test_vlseg5_mask_nxv2i8_triscv.vector.tuple_nxv2i8_5t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma -; CHECK-NEXT: vlseg5e8.v v7, (a0), v0.t +; CHECK-NEXT: vlseg5e8.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 2 x i8>, 5) @llvm.riscv.vlseg5.mask.triscv.vector.tuple_nxv2i8_5t.nxv2i1(target("riscv.vector.tuple", <vscale x 2 x i8>, 5) undef, ptr %base, <vscale x 2 x i1> %mask, i64 %vl, i64 1, i64 3) - %1 = call <vscale x 2 x i8> @llvm.riscv.tuple.extract.nxv2i8.triscv.vector.tuple_nxv2i8_5t(target("riscv.vector.tuple", <vscale x 2 x i8>, 5) %0, i32 1) - ret <vscale x 2 x i8> %1 + ret target("riscv.vector.tuple", <vscale x 2 x i8>, 5) %0 } - -declare target("riscv.vector.tuple", <vscale x 4 x i8>, 5) @llvm.riscv.vlseg5.triscv.vector.tuple_nxv4i8_5t(target("riscv.vector.tuple", <vscale x 4 x i8>, 5), ptr, i64, i64) -declare target("riscv.vector.tuple", <vscale x 4 x i8>, 5) @llvm.riscv.vlseg5.mask.triscv.vector.tuple_nxv4i8_5t.nxv4i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 5), ptr, <vscale x 4 x i1>, i64, i64, i64) - -define <vscale x 4 x i8> @test_vlseg5_nxv4i8_triscv.vector.tuple_nxv4i8_5t(ptr %base, i64 %vl) { +define target("riscv.vector.tuple", <vscale x 4 x i8>, 5) @test_vlseg5_nxv4i8_triscv.vector.tuple_nxv4i8_5t(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg5_nxv4i8_triscv.vector.tuple_nxv4i8_5t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma -; CHECK-NEXT: vlseg5e8.v v7, (a0) +; CHECK-NEXT: vlseg5e8.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 5) @llvm.riscv.vlseg5.triscv.vector.tuple_nxv4i8_5t(target("riscv.vector.tuple", <vscale x 4 x i8>, 5) undef, ptr %base, i64 %vl, i64 3) - %1 = call <vscale x 4 x i8> @llvm.riscv.tuple.extract.nxv4i8.triscv.vector.tuple_nxv4i8_5t(target("riscv.vector.tuple", <vscale x 4 x i8>, 5) %0, i32 1) - ret <vscale x 4 x i8> %1 + ret target("riscv.vector.tuple", <vscale x 4 x i8>, 5) %0 } - -define <vscale x 4 x i8> @test_vlseg5_mask_nxv4i8_triscv.vector.tuple_nxv4i8_5t(ptr %base, i64 %vl, <vscale x 4 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 4 x i8>, 5) @test_vlseg5_mask_nxv4i8_triscv.vector.tuple_nxv4i8_5t(ptr %base, i64 %vl, <vscale x 4 x i1> %mask) { ; CHECK-LABEL: test_vlseg5_mask_nxv4i8_triscv.vector.tuple_nxv4i8_5t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma -; CHECK-NEXT: vlseg5e8.v v7, (a0), v0.t +; CHECK-NEXT: vlseg5e8.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 5) @llvm.riscv.vlseg5.mask.triscv.vector.tuple_nxv4i8_5t.nxv4i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 5) undef, ptr %base, <vscale x 4 x i1> %mask, i64 %vl, i64 1, i64 3) - %1 = call <vscale x 4 x i8> @llvm.riscv.tuple.extract.nxv4i8.triscv.vector.tuple_nxv4i8_5t(target("riscv.vector.tuple", <vscale x 4 x i8>, 5) %0, i32 1) - ret <vscale x 4 x i8> %1 + ret target("riscv.vector.tuple", <vscale x 4 x i8>, 5) %0 } - -declare target("riscv.vector.tuple", <vscale x 8 x i8>, 5) @llvm.riscv.vlseg5.triscv.vector.tuple_nxv8i8_5t(target("riscv.vector.tuple", <vscale x 8 x i8>, 5), ptr, i64, i64) -declare target("riscv.vector.tuple", <vscale x 8 x i8>, 5) @llvm.riscv.vlseg5.mask.triscv.vector.tuple_nxv8i8_5t.nxv8i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 5), ptr, <vscale x 8 x i1>, i64, i64, i64) - -define <vscale x 8 x i8> @test_vlseg5_nxv8i8_triscv.vector.tuple_nxv8i8_5t(ptr %base, i64 %vl) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 5) @test_vlseg5_nxv8i8_triscv.vector.tuple_nxv8i8_5t(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg5_nxv8i8_triscv.vector.tuple_nxv8i8_5t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma -; CHECK-NEXT: vlseg5e8.v v7, (a0) +; CHECK-NEXT: vlseg5e8.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 5) @llvm.riscv.vlseg5.triscv.vector.tuple_nxv8i8_5t(target("riscv.vector.tuple", <vscale x 8 x i8>, 5) undef, ptr %base, i64 %vl, i64 3) - %1 = call <vscale x 8 x i8> @llvm.riscv.tuple.extract.nxv8i8.triscv.vector.tuple_nxv8i8_5t(target("riscv.vector.tuple", <vscale x 8 x i8>, 5) %0, i32 1) - ret <vscale x 8 x i8> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 5) %0 } - -define <vscale x 8 x i8> @test_vlseg5_mask_nxv8i8_triscv.vector.tuple_nxv8i8_5t(ptr %base, i64 %vl, <vscale x 8 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 5) @test_vlseg5_mask_nxv8i8_triscv.vector.tuple_nxv8i8_5t(ptr %base, i64 %vl, <vscale x 8 x i1> %mask) { ; CHECK-LABEL: test_vlseg5_mask_nxv8i8_triscv.vector.tuple_nxv8i8_5t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma -; CHECK-NEXT: vlseg5e8.v v7, (a0), v0.t +; CHECK-NEXT: vlseg5e8.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 5) @llvm.riscv.vlseg5.mask.triscv.vector.tuple_nxv8i8_5t.nxv8i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 5) undef, ptr %base, <vscale x 8 x i1> %mask, i64 %vl, i64 1, i64 3) - %1 = call <vscale x 8 x i8> @llvm.riscv.tuple.extract.nxv8i8.triscv.vector.tuple_nxv8i8_5t(target("riscv.vector.tuple", <vscale x 8 x i8>, 5) %0, i32 1) - ret <vscale x 8 x i8> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 5) %0 } - -declare target("riscv.vector.tuple", <vscale x 1 x i8>, 6) @llvm.riscv.vlseg6.triscv.vector.tuple_nxv1i8_6t(target("riscv.vector.tuple", <vscale x 1 x i8>, 6), ptr, i64, i64) -declare target("riscv.vector.tuple", <vscale x 1 x i8>, 6) @llvm.riscv.vlseg6.mask.triscv.vector.tuple_nxv1i8_6t.nxv1i1(target("riscv.vector.tuple", <vscale x 1 x i8>, 6), ptr, <vscale x 1 x i1>, i64, i64, i64) - -define <vscale x 1 x i8> @test_vlseg6_nxv1i8_triscv.vector.tuple_nxv1i8_6t(ptr %base, i64 %vl) { +define target("riscv.vector.tuple", <vscale x 1 x i8>, 6) @test_vlseg6_nxv1i8_triscv.vector.tuple_nxv1i8_6t(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg6_nxv1i8_triscv.vector.tuple_nxv1i8_6t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma -; CHECK-NEXT: vlseg6e8.v v7, (a0) +; CHECK-NEXT: vlseg6e8.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 1 x i8>, 6) @llvm.riscv.vlseg6.triscv.vector.tuple_nxv1i8_6t(target("riscv.vector.tuple", <vscale x 1 x i8>, 6) undef, ptr %base, i64 %vl, i64 3) - %1 = call <vscale x 1 x i8> @llvm.riscv.tuple.extract.nxv1i8.triscv.vector.tuple_nxv1i8_6t(target("riscv.vector.tuple", <vscale x 1 x i8>, 6) %0, i32 1) - ret <vscale x 1 x i8> %1 + ret target("riscv.vector.tuple", <vscale x 1 x i8>, 6) %0 } - -define <vscale x 1 x i8> @test_vlseg6_mask_nxv1i8_triscv.vector.tuple_nxv1i8_6t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 1 x i8>, 6) @test_vlseg6_mask_nxv1i8_triscv.vector.tuple_nxv1i8_6t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) { ; CHECK-LABEL: test_vlseg6_mask_nxv1i8_triscv.vector.tuple_nxv1i8_6t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma -; CHECK-NEXT: vlseg6e8.v v7, (a0), v0.t +; CHECK-NEXT: vlseg6e8.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 1 x i8>, 6) @llvm.riscv.vlseg6.mask.triscv.vector.tuple_nxv1i8_6t.nxv1i1(target("riscv.vector.tuple", <vscale x 1 x i8>, 6) undef, ptr %base, <vscale x 1 x i1> %mask, i64 %vl, i64 1, i64 3) - %1 = call <vscale x 1 x i8> @llvm.riscv.tuple.extract.nxv1i8.triscv.vector.tuple_nxv1i8_6t(target("riscv.vector.tuple", <vscale x 1 x i8>, 6) %0, i32 1) - ret <vscale x 1 x i8> %1 + ret target("riscv.vector.tuple", <vscale x 1 x i8>, 6) %0 } - -define <vscale x 1 x i8> @test_vlseg6_allonesmask_nxv1i8_triscv.vector.tuple_nxv1i8_6t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 1 x i8>, 6) @test_vlseg6_allonesmask_nxv1i8_triscv.vector.tuple_nxv1i8_6t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) { ; CHECK-LABEL: test_vlseg6_allonesmask_nxv1i8_triscv.vector.tuple_nxv1i8_6t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma -; CHECK-NEXT: vlseg6e8.v v7, (a0) +; CHECK-NEXT: vlseg6e8.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 1 x i8>, 6) @llvm.riscv.vlseg6.mask.triscv.vector.tuple_nxv1i8_6t.nxv1i1(target("riscv.vector.tuple", <vscale x 1 x i8>, 6) undef, ptr %base, <vscale x 1 x i1> splat (i1 true), i64 %vl, i64 1, i64 3) - %1 = call <vscale x 1 x i8> @llvm.riscv.tuple.extract.nxv1i8.triscv.vector.tuple_nxv1i8_6t(target("riscv.vector.tuple", <vscale x 1 x i8>, 6) %0, i32 1) - ret <vscale x 1 x i8> %1 + ret target("riscv.vector.tuple", <vscale x 1 x i8>, 6) %0 } - -declare target("riscv.vector.tuple", <vscale x 2 x i8>, 6) @llvm.riscv.vlseg6.triscv.vector.tuple_nxv2i8_6t(target("riscv.vector.tuple", <vscale x 2 x i8>, 6), ptr, i64, i64) -declare target("riscv.vector.tuple", <vscale x 2 x i8>, 6) @llvm.riscv.vlseg6.mask.triscv.vector.tuple_nxv2i8_6t.nxv2i1(target("riscv.vector.tuple", <vscale x 2 x i8>, 6), ptr, <vscale x 2 x i1>, i64, i64, i64) - -define <vscale x 2 x i8> @test_vlseg6_nxv2i8_triscv.vector.tuple_nxv2i8_6t(ptr %base, i64 %vl) { +define target("riscv.vector.tuple", <vscale x 2 x i8>, 6) @test_vlseg6_nxv2i8_triscv.vector.tuple_nxv2i8_6t(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg6_nxv2i8_triscv.vector.tuple_nxv2i8_6t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma -; CHECK-NEXT: vlseg6e8.v v7, (a0) +; CHECK-NEXT: vlseg6e8.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 2 x i8>, 6) @llvm.riscv.vlseg6.triscv.vector.tuple_nxv2i8_6t(target("riscv.vector.tuple", <vscale x 2 x i8>, 6) undef, ptr %base, i64 %vl, i64 3) - %1 = call <vscale x 2 x i8> @llvm.riscv.tuple.extract.nxv2i8.triscv.vector.tuple_nxv2i8_6t(target("riscv.vector.tuple", <vscale x 2 x i8>, 6) %0, i32 1) - ret <vscale x 2 x i8> %1 + ret target("riscv.vector.tuple", <vscale x 2 x i8>, 6) %0 } - -define <vscale x 2 x i8> @test_vlseg6_mask_nxv2i8_triscv.vector.tuple_nxv2i8_6t(ptr %base, i64 %vl, <vscale x 2 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 2 x i8>, 6) @test_vlseg6_mask_nxv2i8_triscv.vector.tuple_nxv2i8_6t(ptr %base, i64 %vl, <vscale x 2 x i1> %mask) { ; CHECK-LABEL: test_vlseg6_mask_nxv2i8_triscv.vector.tuple_nxv2i8_6t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma -; CHECK-NEXT: vlseg6e8.v v7, (a0), v0.t +; CHECK-NEXT: vlseg6e8.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 2 x i8>, 6) @llvm.riscv.vlseg6.mask.triscv.vector.tuple_nxv2i8_6t.nxv2i1(target("riscv.vector.tuple", <vscale x 2 x i8>, 6) undef, ptr %base, <vscale x 2 x i1> %mask, i64 %vl, i64 1, i64 3) - %1 = call <vscale x 2 x i8> @llvm.riscv.tuple.extract.nxv2i8.triscv.vector.tuple_nxv2i8_6t(target("riscv.vector.tuple", <vscale x 2 x i8>, 6) %0, i32 1) - ret <vscale x 2 x i8> %1 + ret target("riscv.vector.tuple", <vscale x 2 x i8>, 6) %0 } - -declare target("riscv.vector.tuple", <vscale x 4 x i8>, 6) @llvm.riscv.vlseg6.triscv.vector.tuple_nxv4i8_6t(target("riscv.vector.tuple", <vscale x 4 x i8>, 6), ptr, i64, i64) -declare target("riscv.vector.tuple", <vscale x 4 x i8>, 6) @llvm.riscv.vlseg6.mask.triscv.vector.tuple_nxv4i8_6t.nxv4i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 6), ptr, <vscale x 4 x i1>, i64, i64, i64) - -define <vscale x 4 x i8> @test_vlseg6_nxv4i8_triscv.vector.tuple_nxv4i8_6t(ptr %base, i64 %vl) { +define target("riscv.vector.tuple", <vscale x 4 x i8>, 6) @test_vlseg6_nxv4i8_triscv.vector.tuple_nxv4i8_6t(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg6_nxv4i8_triscv.vector.tuple_nxv4i8_6t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma -; CHECK-NEXT: vlseg6e8.v v7, (a0) +; CHECK-NEXT: vlseg6e8.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 6) @llvm.riscv.vlseg6.triscv.vector.tuple_nxv4i8_6t(target("riscv.vector.tuple", <vscale x 4 x i8>, 6) undef, ptr %base, i64 %vl, i64 3) - %1 = call <vscale x 4 x i8> @llvm.riscv.tuple.extract.nxv4i8.triscv.vector.tuple_nxv4i8_6t(target("riscv.vector.tuple", <vscale x 4 x i8>, 6) %0, i32 1) - ret <vscale x 4 x i8> %1 + ret target("riscv.vector.tuple", <vscale x 4 x i8>, 6) %0 } - -define <vscale x 4 x i8> @test_vlseg6_mask_nxv4i8_triscv.vector.tuple_nxv4i8_6t(ptr %base, i64 %vl, <vscale x 4 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 4 x i8>, 6) @test_vlseg6_mask_nxv4i8_triscv.vector.tuple_nxv4i8_6t(ptr %base, i64 %vl, <vscale x 4 x i1> %mask) { ; CHECK-LABEL: test_vlseg6_mask_nxv4i8_triscv.vector.tuple_nxv4i8_6t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma -; CHECK-NEXT: vlseg6e8.v v7, (a0), v0.t +; CHECK-NEXT: vlseg6e8.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 6) @llvm.riscv.vlseg6.mask.triscv.vector.tuple_nxv4i8_6t.nxv4i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 6) undef, ptr %base, <vscale x 4 x i1> %mask, i64 %vl, i64 1, i64 3) - %1 = call <vscale x 4 x i8> @llvm.riscv.tuple.extract.nxv4i8.triscv.vector.tuple_nxv4i8_6t(target("riscv.vector.tuple", <vscale x 4 x i8>, 6) %0, i32 1) - ret <vscale x 4 x i8> %1 + ret target("riscv.vector.tuple", <vscale x 4 x i8>, 6) %0 } - -declare target("riscv.vector.tuple", <vscale x 8 x i8>, 6) @llvm.riscv.vlseg6.triscv.vector.tuple_nxv8i8_6t(target("riscv.vector.tuple", <vscale x 8 x i8>, 6), ptr, i64, i64) -declare target("riscv.vector.tuple", <vscale x 8 x i8>, 6) @llvm.riscv.vlseg6.mask.triscv.vector.tuple_nxv8i8_6t.nxv8i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 6), ptr, <vscale x 8 x i1>, i64, i64, i64) - -define <vscale x 8 x i8> @test_vlseg6_nxv8i8_triscv.vector.tuple_nxv8i8_6t(ptr %base, i64 %vl) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 6) @test_vlseg6_nxv8i8_triscv.vector.tuple_nxv8i8_6t(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg6_nxv8i8_triscv.vector.tuple_nxv8i8_6t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma -; CHECK-NEXT: vlseg6e8.v v7, (a0) +; CHECK-NEXT: vlseg6e8.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 6) @llvm.riscv.vlseg6.triscv.vector.tuple_nxv8i8_6t(target("riscv.vector.tuple", <vscale x 8 x i8>, 6) undef, ptr %base, i64 %vl, i64 3) - %1 = call <vscale x 8 x i8> @llvm.riscv.tuple.extract.nxv8i8.triscv.vector.tuple_nxv8i8_6t(target("riscv.vector.tuple", <vscale x 8 x i8>, 6) %0, i32 1) - ret <vscale x 8 x i8> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 6) %0 } - -define <vscale x 8 x i8> @test_vlseg6_mask_nxv8i8_triscv.vector.tuple_nxv8i8_6t(ptr %base, i64 %vl, <vscale x 8 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 6) @test_vlseg6_mask_nxv8i8_triscv.vector.tuple_nxv8i8_6t(ptr %base, i64 %vl, <vscale x 8 x i1> %mask) { ; CHECK-LABEL: test_vlseg6_mask_nxv8i8_triscv.vector.tuple_nxv8i8_6t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma -; CHECK-NEXT: vlseg6e8.v v7, (a0), v0.t +; CHECK-NEXT: vlseg6e8.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 6) @llvm.riscv.vlseg6.mask.triscv.vector.tuple_nxv8i8_6t.nxv8i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 6) undef, ptr %base, <vscale x 8 x i1> %mask, i64 %vl, i64 1, i64 3) - %1 = call <vscale x 8 x i8> @llvm.riscv.tuple.extract.nxv8i8.triscv.vector.tuple_nxv8i8_6t(target("riscv.vector.tuple", <vscale x 8 x i8>, 6) %0, i32 1) - ret <vscale x 8 x i8> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 6) %0 } - -declare target("riscv.vector.tuple", <vscale x 1 x i8>, 7) @llvm.riscv.vlseg7.triscv.vector.tuple_nxv1i8_7t(target("riscv.vector.tuple", <vscale x 1 x i8>, 7), ptr, i64, i64) -declare target("riscv.vector.tuple", <vscale x 1 x i8>, 7) @llvm.riscv.vlseg7.mask.triscv.vector.tuple_nxv1i8_7t.nxv1i1(target("riscv.vector.tuple", <vscale x 1 x i8>, 7), ptr, <vscale x 1 x i1>, i64, i64, i64) - -define <vscale x 1 x i8> @test_vlseg7_nxv1i8_triscv.vector.tuple_nxv1i8_7t(ptr %base, i64 %vl) { +define target("riscv.vector.tuple", <vscale x 1 x i8>, 7) @test_vlseg7_nxv1i8_triscv.vector.tuple_nxv1i8_7t(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg7_nxv1i8_triscv.vector.tuple_nxv1i8_7t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma -; CHECK-NEXT: vlseg7e8.v v7, (a0) +; CHECK-NEXT: vlseg7e8.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 1 x i8>, 7) @llvm.riscv.vlseg7.triscv.vector.tuple_nxv1i8_7t(target("riscv.vector.tuple", <vscale x 1 x i8>, 7) undef, ptr %base, i64 %vl, i64 3) - %1 = call <vscale x 1 x i8> @llvm.riscv.tuple.extract.nxv1i8.triscv.vector.tuple_nxv1i8_7t(target("riscv.vector.tuple", <vscale x 1 x i8>, 7) %0, i32 1) - ret <vscale x 1 x i8> %1 + ret target("riscv.vector.tuple", <vscale x 1 x i8>, 7) %0 } - -define <vscale x 1 x i8> @test_vlseg7_mask_nxv1i8_triscv.vector.tuple_nxv1i8_7t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 1 x i8>, 7) @test_vlseg7_mask_nxv1i8_triscv.vector.tuple_nxv1i8_7t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) { ; CHECK-LABEL: test_vlseg7_mask_nxv1i8_triscv.vector.tuple_nxv1i8_7t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma -; CHECK-NEXT: vlseg7e8.v v7, (a0), v0.t +; CHECK-NEXT: vlseg7e8.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 1 x i8>, 7) @llvm.riscv.vlseg7.mask.triscv.vector.tuple_nxv1i8_7t.nxv1i1(target("riscv.vector.tuple", <vscale x 1 x i8>, 7) undef, ptr %base, <vscale x 1 x i1> %mask, i64 %vl, i64 1, i64 3) - %1 = call <vscale x 1 x i8> @llvm.riscv.tuple.extract.nxv1i8.triscv.vector.tuple_nxv1i8_7t(target("riscv.vector.tuple", <vscale x 1 x i8>, 7) %0, i32 1) - ret <vscale x 1 x i8> %1 + ret target("riscv.vector.tuple", <vscale x 1 x i8>, 7) %0 } - -define <vscale x 1 x i8> @test_vlseg7_allonesmask_nxv1i8_triscv.vector.tuple_nxv1i8_7t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 1 x i8>, 7) @test_vlseg7_allonesmask_nxv1i8_triscv.vector.tuple_nxv1i8_7t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) { ; CHECK-LABEL: test_vlseg7_allonesmask_nxv1i8_triscv.vector.tuple_nxv1i8_7t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma -; CHECK-NEXT: vlseg7e8.v v7, (a0) +; CHECK-NEXT: vlseg7e8.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 1 x i8>, 7) @llvm.riscv.vlseg7.mask.triscv.vector.tuple_nxv1i8_7t.nxv1i1(target("riscv.vector.tuple", <vscale x 1 x i8>, 7) undef, ptr %base, <vscale x 1 x i1> splat (i1 true), i64 %vl, i64 1, i64 3) - %1 = call <vscale x 1 x i8> @llvm.riscv.tuple.extract.nxv1i8.triscv.vector.tuple_nxv1i8_7t(target("riscv.vector.tuple", <vscale x 1 x i8>, 7) %0, i32 1) - ret <vscale x 1 x i8> %1 + ret target("riscv.vector.tuple", <vscale x 1 x i8>, 7) %0 } - -declare target("riscv.vector.tuple", <vscale x 2 x i8>, 7) @llvm.riscv.vlseg7.triscv.vector.tuple_nxv2i8_7t(target("riscv.vector.tuple", <vscale x 2 x i8>, 7), ptr, i64, i64) -declare target("riscv.vector.tuple", <vscale x 2 x i8>, 7) @llvm.riscv.vlseg7.mask.triscv.vector.tuple_nxv2i8_7t.nxv2i1(target("riscv.vector.tuple", <vscale x 2 x i8>, 7), ptr, <vscale x 2 x i1>, i64, i64, i64) - -define <vscale x 2 x i8> @test_vlseg7_nxv2i8_triscv.vector.tuple_nxv2i8_7t(ptr %base, i64 %vl) { +define target("riscv.vector.tuple", <vscale x 2 x i8>, 7) @test_vlseg7_nxv2i8_triscv.vector.tuple_nxv2i8_7t(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg7_nxv2i8_triscv.vector.tuple_nxv2i8_7t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma -; CHECK-NEXT: vlseg7e8.v v7, (a0) +; CHECK-NEXT: vlseg7e8.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 2 x i8>, 7) @llvm.riscv.vlseg7.triscv.vector.tuple_nxv2i8_7t(target("riscv.vector.tuple", <vscale x 2 x i8>, 7) undef, ptr %base, i64 %vl, i64 3) - %1 = call <vscale x 2 x i8> @llvm.riscv.tuple.extract.nxv2i8.triscv.vector.tuple_nxv2i8_7t(target("riscv.vector.tuple", <vscale x 2 x i8>, 7) %0, i32 1) - ret <vscale x 2 x i8> %1 + ret target("riscv.vector.tuple", <vscale x 2 x i8>, 7) %0 } - -define <vscale x 2 x i8> @test_vlseg7_mask_nxv2i8_triscv.vector.tuple_nxv2i8_7t(ptr %base, i64 %vl, <vscale x 2 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 2 x i8>, 7) @test_vlseg7_mask_nxv2i8_triscv.vector.tuple_nxv2i8_7t(ptr %base, i64 %vl, <vscale x 2 x i1> %mask) { ; CHECK-LABEL: test_vlseg7_mask_nxv2i8_triscv.vector.tuple_nxv2i8_7t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma -; CHECK-NEXT: vlseg7e8.v v7, (a0), v0.t +; CHECK-NEXT: vlseg7e8.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 2 x i8>, 7) @llvm.riscv.vlseg7.mask.triscv.vector.tuple_nxv2i8_7t.nxv2i1(target("riscv.vector.tuple", <vscale x 2 x i8>, 7) undef, ptr %base, <vscale x 2 x i1> %mask, i64 %vl, i64 1, i64 3) - %1 = call <vscale x 2 x i8> @llvm.riscv.tuple.extract.nxv2i8.triscv.vector.tuple_nxv2i8_7t(target("riscv.vector.tuple", <vscale x 2 x i8>, 7) %0, i32 1) - ret <vscale x 2 x i8> %1 + ret target("riscv.vector.tuple", <vscale x 2 x i8>, 7) %0 } - -declare target("riscv.vector.tuple", <vscale x 4 x i8>, 7) @llvm.riscv.vlseg7.triscv.vector.tuple_nxv4i8_7t(target("riscv.vector.tuple", <vscale x 4 x i8>, 7), ptr, i64, i64) -declare target("riscv.vector.tuple", <vscale x 4 x i8>, 7) @llvm.riscv.vlseg7.mask.triscv.vector.tuple_nxv4i8_7t.nxv4i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 7), ptr, <vscale x 4 x i1>, i64, i64, i64) - -define <vscale x 4 x i8> @test_vlseg7_nxv4i8_triscv.vector.tuple_nxv4i8_7t(ptr %base, i64 %vl) { +define target("riscv.vector.tuple", <vscale x 4 x i8>, 7) @test_vlseg7_nxv4i8_triscv.vector.tuple_nxv4i8_7t(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg7_nxv4i8_triscv.vector.tuple_nxv4i8_7t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma -; CHECK-NEXT: vlseg7e8.v v7, (a0) +; CHECK-NEXT: vlseg7e8.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 7) @llvm.riscv.vlseg7.triscv.vector.tuple_nxv4i8_7t(target("riscv.vector.tuple", <vscale x 4 x i8>, 7) undef, ptr %base, i64 %vl, i64 3) - %1 = call <vscale x 4 x i8> @llvm.riscv.tuple.extract.nxv4i8.triscv.vector.tuple_nxv4i8_7t(target("riscv.vector.tuple", <vscale x 4 x i8>, 7) %0, i32 1) - ret <vscale x 4 x i8> %1 + ret target("riscv.vector.tuple", <vscale x 4 x i8>, 7) %0 } - -define <vscale x 4 x i8> @test_vlseg7_mask_nxv4i8_triscv.vector.tuple_nxv4i8_7t(ptr %base, i64 %vl, <vscale x 4 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 4 x i8>, 7) @test_vlseg7_mask_nxv4i8_triscv.vector.tuple_nxv4i8_7t(ptr %base, i64 %vl, <vscale x 4 x i1> %mask) { ; CHECK-LABEL: test_vlseg7_mask_nxv4i8_triscv.vector.tuple_nxv4i8_7t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma -; CHECK-NEXT: vlseg7e8.v v7, (a0), v0.t +; CHECK-NEXT: vlseg7e8.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 7) @llvm.riscv.vlseg7.mask.triscv.vector.tuple_nxv4i8_7t.nxv4i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 7) undef, ptr %base, <vscale x 4 x i1> %mask, i64 %vl, i64 1, i64 3) - %1 = call <vscale x 4 x i8> @llvm.riscv.tuple.extract.nxv4i8.triscv.vector.tuple_nxv4i8_7t(target("riscv.vector.tuple", <vscale x 4 x i8>, 7) %0, i32 1) - ret <vscale x 4 x i8> %1 + ret target("riscv.vector.tuple", <vscale x 4 x i8>, 7) %0 } - -declare target("riscv.vector.tuple", <vscale x 8 x i8>, 7) @llvm.riscv.vlseg7.triscv.vector.tuple_nxv8i8_7t(target("riscv.vector.tuple", <vscale x 8 x i8>, 7), ptr, i64, i64) -declare target("riscv.vector.tuple", <vscale x 8 x i8>, 7) @llvm.riscv.vlseg7.mask.triscv.vector.tuple_nxv8i8_7t.nxv8i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 7), ptr, <vscale x 8 x i1>, i64, i64, i64) - -define <vscale x 8 x i8> @test_vlseg7_nxv8i8_triscv.vector.tuple_nxv8i8_7t(ptr %base, i64 %vl) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 7) @test_vlseg7_nxv8i8_triscv.vector.tuple_nxv8i8_7t(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg7_nxv8i8_triscv.vector.tuple_nxv8i8_7t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma -; CHECK-NEXT: vlseg7e8.v v7, (a0) +; CHECK-NEXT: vlseg7e8.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 7) @llvm.riscv.vlseg7.triscv.vector.tuple_nxv8i8_7t(target("riscv.vector.tuple", <vscale x 8 x i8>, 7) undef, ptr %base, i64 %vl, i64 3) - %1 = call <vscale x 8 x i8> @llvm.riscv.tuple.extract.nxv8i8.triscv.vector.tuple_nxv8i8_7t(target("riscv.vector.tuple", <vscale x 8 x i8>, 7) %0, i32 1) - ret <vscale x 8 x i8> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 7) %0 } - -define <vscale x 8 x i8> @test_vlseg7_mask_nxv8i8_triscv.vector.tuple_nxv8i8_7t(ptr %base, i64 %vl, <vscale x 8 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 7) @test_vlseg7_mask_nxv8i8_triscv.vector.tuple_nxv8i8_7t(ptr %base, i64 %vl, <vscale x 8 x i1> %mask) { ; CHECK-LABEL: test_vlseg7_mask_nxv8i8_triscv.vector.tuple_nxv8i8_7t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma -; CHECK-NEXT: vlseg7e8.v v7, (a0), v0.t +; CHECK-NEXT: vlseg7e8.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 7) @llvm.riscv.vlseg7.mask.triscv.vector.tuple_nxv8i8_7t.nxv8i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 7) undef, ptr %base, <vscale x 8 x i1> %mask, i64 %vl, i64 1, i64 3) - %1 = call <vscale x 8 x i8> @llvm.riscv.tuple.extract.nxv8i8.triscv.vector.tuple_nxv8i8_7t(target("riscv.vector.tuple", <vscale x 8 x i8>, 7) %0, i32 1) - ret <vscale x 8 x i8> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 7) %0 } - -declare target("riscv.vector.tuple", <vscale x 1 x i8>, 8) @llvm.riscv.vlseg8.triscv.vector.tuple_nxv1i8_8t(target("riscv.vector.tuple", <vscale x 1 x i8>, 8), ptr, i64, i64) -declare target("riscv.vector.tuple", <vscale x 1 x i8>, 8) @llvm.riscv.vlseg8.mask.triscv.vector.tuple_nxv1i8_8t.nxv1i1(target("riscv.vector.tuple", <vscale x 1 x i8>, 8), ptr, <vscale x 1 x i1>, i64, i64, i64) - -define <vscale x 1 x i8> @test_vlseg8_nxv1i8_triscv.vector.tuple_nxv1i8_8t(ptr %base, i64 %vl) { +define target("riscv.vector.tuple", <vscale x 1 x i8>, 8) @test_vlseg8_nxv1i8_triscv.vector.tuple_nxv1i8_8t(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg8_nxv1i8_triscv.vector.tuple_nxv1i8_8t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma -; CHECK-NEXT: vlseg8e8.v v7, (a0) +; CHECK-NEXT: vlseg8e8.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 1 x i8>, 8) @llvm.riscv.vlseg8.triscv.vector.tuple_nxv1i8_8t(target("riscv.vector.tuple", <vscale x 1 x i8>, 8) undef, ptr %base, i64 %vl, i64 3) - %1 = call <vscale x 1 x i8> @llvm.riscv.tuple.extract.nxv1i8.triscv.vector.tuple_nxv1i8_8t(target("riscv.vector.tuple", <vscale x 1 x i8>, 8) %0, i32 1) - ret <vscale x 1 x i8> %1 + ret target("riscv.vector.tuple", <vscale x 1 x i8>, 8) %0 } - -define <vscale x 1 x i8> @test_vlseg8_mask_nxv1i8_triscv.vector.tuple_nxv1i8_8t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 1 x i8>, 8) @test_vlseg8_mask_nxv1i8_triscv.vector.tuple_nxv1i8_8t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) { ; CHECK-LABEL: test_vlseg8_mask_nxv1i8_triscv.vector.tuple_nxv1i8_8t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma -; CHECK-NEXT: vlseg8e8.v v7, (a0), v0.t +; CHECK-NEXT: vlseg8e8.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 1 x i8>, 8) @llvm.riscv.vlseg8.mask.triscv.vector.tuple_nxv1i8_8t.nxv1i1(target("riscv.vector.tuple", <vscale x 1 x i8>, 8) undef, ptr %base, <vscale x 1 x i1> %mask, i64 %vl, i64 1, i64 3) - %1 = call <vscale x 1 x i8> @llvm.riscv.tuple.extract.nxv1i8.triscv.vector.tuple_nxv1i8_8t(target("riscv.vector.tuple", <vscale x 1 x i8>, 8) %0, i32 1) - ret <vscale x 1 x i8> %1 + ret target("riscv.vector.tuple", <vscale x 1 x i8>, 8) %0 } - -define <vscale x 1 x i8> @test_vlseg8_allonesmask_nxv1i8_triscv.vector.tuple_nxv1i8_8t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 1 x i8>, 8) @test_vlseg8_allonesmask_nxv1i8_triscv.vector.tuple_nxv1i8_8t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) { ; CHECK-LABEL: test_vlseg8_allonesmask_nxv1i8_triscv.vector.tuple_nxv1i8_8t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e8, mf8, ta, ma -; CHECK-NEXT: vlseg8e8.v v7, (a0) +; CHECK-NEXT: vlseg8e8.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 1 x i8>, 8) @llvm.riscv.vlseg8.mask.triscv.vector.tuple_nxv1i8_8t.nxv1i1(target("riscv.vector.tuple", <vscale x 1 x i8>, 8) undef, ptr %base, <vscale x 1 x i1> splat (i1 true), i64 %vl, i64 1, i64 3) - %1 = call <vscale x 1 x i8> @llvm.riscv.tuple.extract.nxv1i8.triscv.vector.tuple_nxv1i8_8t(target("riscv.vector.tuple", <vscale x 1 x i8>, 8) %0, i32 1) - ret <vscale x 1 x i8> %1 + ret target("riscv.vector.tuple", <vscale x 1 x i8>, 8) %0 } - -declare target("riscv.vector.tuple", <vscale x 2 x i8>, 8) @llvm.riscv.vlseg8.triscv.vector.tuple_nxv2i8_8t(target("riscv.vector.tuple", <vscale x 2 x i8>, 8), ptr, i64, i64) -declare target("riscv.vector.tuple", <vscale x 2 x i8>, 8) @llvm.riscv.vlseg8.mask.triscv.vector.tuple_nxv2i8_8t.nxv2i1(target("riscv.vector.tuple", <vscale x 2 x i8>, 8), ptr, <vscale x 2 x i1>, i64, i64, i64) - -define <vscale x 2 x i8> @test_vlseg8_nxv2i8_triscv.vector.tuple_nxv2i8_8t(ptr %base, i64 %vl) { +define target("riscv.vector.tuple", <vscale x 2 x i8>, 8) @test_vlseg8_nxv2i8_triscv.vector.tuple_nxv2i8_8t(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg8_nxv2i8_triscv.vector.tuple_nxv2i8_8t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma -; CHECK-NEXT: vlseg8e8.v v7, (a0) +; CHECK-NEXT: vlseg8e8.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 2 x i8>, 8) @llvm.riscv.vlseg8.triscv.vector.tuple_nxv2i8_8t(target("riscv.vector.tuple", <vscale x 2 x i8>, 8) undef, ptr %base, i64 %vl, i64 3) - %1 = call <vscale x 2 x i8> @llvm.riscv.tuple.extract.nxv2i8.triscv.vector.tuple_nxv2i8_8t(target("riscv.vector.tuple", <vscale x 2 x i8>, 8) %0, i32 1) - ret <vscale x 2 x i8> %1 + ret target("riscv.vector.tuple", <vscale x 2 x i8>, 8) %0 } - -define <vscale x 2 x i8> @test_vlseg8_mask_nxv2i8_triscv.vector.tuple_nxv2i8_8t(ptr %base, i64 %vl, <vscale x 2 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 2 x i8>, 8) @test_vlseg8_mask_nxv2i8_triscv.vector.tuple_nxv2i8_8t(ptr %base, i64 %vl, <vscale x 2 x i1> %mask) { ; CHECK-LABEL: test_vlseg8_mask_nxv2i8_triscv.vector.tuple_nxv2i8_8t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e8, mf4, ta, ma -; CHECK-NEXT: vlseg8e8.v v7, (a0), v0.t +; CHECK-NEXT: vlseg8e8.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 2 x i8>, 8) @llvm.riscv.vlseg8.mask.triscv.vector.tuple_nxv2i8_8t.nxv2i1(target("riscv.vector.tuple", <vscale x 2 x i8>, 8) undef, ptr %base, <vscale x 2 x i1> %mask, i64 %vl, i64 1, i64 3) - %1 = call <vscale x 2 x i8> @llvm.riscv.tuple.extract.nxv2i8.triscv.vector.tuple_nxv2i8_8t(target("riscv.vector.tuple", <vscale x 2 x i8>, 8) %0, i32 1) - ret <vscale x 2 x i8> %1 + ret target("riscv.vector.tuple", <vscale x 2 x i8>, 8) %0 } - -declare target("riscv.vector.tuple", <vscale x 4 x i8>, 8) @llvm.riscv.vlseg8.triscv.vector.tuple_nxv4i8_8t(target("riscv.vector.tuple", <vscale x 4 x i8>, 8), ptr, i64, i64) -declare target("riscv.vector.tuple", <vscale x 4 x i8>, 8) @llvm.riscv.vlseg8.mask.triscv.vector.tuple_nxv4i8_8t.nxv4i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 8), ptr, <vscale x 4 x i1>, i64, i64, i64) - -define <vscale x 4 x i8> @test_vlseg8_nxv4i8_triscv.vector.tuple_nxv4i8_8t(ptr %base, i64 %vl) { +define target("riscv.vector.tuple", <vscale x 4 x i8>, 8) @test_vlseg8_nxv4i8_triscv.vector.tuple_nxv4i8_8t(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg8_nxv4i8_triscv.vector.tuple_nxv4i8_8t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma -; CHECK-NEXT: vlseg8e8.v v7, (a0) +; CHECK-NEXT: vlseg8e8.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 8) @llvm.riscv.vlseg8.triscv.vector.tuple_nxv4i8_8t(target("riscv.vector.tuple", <vscale x 4 x i8>, 8) undef, ptr %base, i64 %vl, i64 3) - %1 = call <vscale x 4 x i8> @llvm.riscv.tuple.extract.nxv4i8.triscv.vector.tuple_nxv4i8_8t(target("riscv.vector.tuple", <vscale x 4 x i8>, 8) %0, i32 1) - ret <vscale x 4 x i8> %1 + ret target("riscv.vector.tuple", <vscale x 4 x i8>, 8) %0 } - -define <vscale x 4 x i8> @test_vlseg8_mask_nxv4i8_triscv.vector.tuple_nxv4i8_8t(ptr %base, i64 %vl, <vscale x 4 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 4 x i8>, 8) @test_vlseg8_mask_nxv4i8_triscv.vector.tuple_nxv4i8_8t(ptr %base, i64 %vl, <vscale x 4 x i1> %mask) { ; CHECK-LABEL: test_vlseg8_mask_nxv4i8_triscv.vector.tuple_nxv4i8_8t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e8, mf2, ta, ma -; CHECK-NEXT: vlseg8e8.v v7, (a0), v0.t +; CHECK-NEXT: vlseg8e8.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 8) @llvm.riscv.vlseg8.mask.triscv.vector.tuple_nxv4i8_8t.nxv4i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 8) undef, ptr %base, <vscale x 4 x i1> %mask, i64 %vl, i64 1, i64 3) - %1 = call <vscale x 4 x i8> @llvm.riscv.tuple.extract.nxv4i8.triscv.vector.tuple_nxv4i8_8t(target("riscv.vector.tuple", <vscale x 4 x i8>, 8) %0, i32 1) - ret <vscale x 4 x i8> %1 + ret target("riscv.vector.tuple", <vscale x 4 x i8>, 8) %0 } - -declare target("riscv.vector.tuple", <vscale x 8 x i8>, 8) @llvm.riscv.vlseg8.triscv.vector.tuple_nxv8i8_8t(target("riscv.vector.tuple", <vscale x 8 x i8>, 8), ptr, i64, i64) -declare target("riscv.vector.tuple", <vscale x 8 x i8>, 8) @llvm.riscv.vlseg8.mask.triscv.vector.tuple_nxv8i8_8t.nxv8i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 8), ptr, <vscale x 8 x i1>, i64, i64, i64) - -define <vscale x 8 x i8> @test_vlseg8_nxv8i8_triscv.vector.tuple_nxv8i8_8t(ptr %base, i64 %vl) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 8) @test_vlseg8_nxv8i8_triscv.vector.tuple_nxv8i8_8t(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg8_nxv8i8_triscv.vector.tuple_nxv8i8_8t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma -; CHECK-NEXT: vlseg8e8.v v7, (a0) +; CHECK-NEXT: vlseg8e8.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 8) @llvm.riscv.vlseg8.triscv.vector.tuple_nxv8i8_8t(target("riscv.vector.tuple", <vscale x 8 x i8>, 8) undef, ptr %base, i64 %vl, i64 3) - %1 = call <vscale x 8 x i8> @llvm.riscv.tuple.extract.nxv8i8.triscv.vector.tuple_nxv8i8_8t(target("riscv.vector.tuple", <vscale x 8 x i8>, 8) %0, i32 1) - ret <vscale x 8 x i8> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 8) %0 } - -define <vscale x 8 x i8> @test_vlseg8_mask_nxv8i8_triscv.vector.tuple_nxv8i8_8t(ptr %base, i64 %vl, <vscale x 8 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 8) @test_vlseg8_mask_nxv8i8_triscv.vector.tuple_nxv8i8_8t(ptr %base, i64 %vl, <vscale x 8 x i1> %mask) { ; CHECK-LABEL: test_vlseg8_mask_nxv8i8_triscv.vector.tuple_nxv8i8_8t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e8, m1, ta, ma -; CHECK-NEXT: vlseg8e8.v v7, (a0), v0.t +; CHECK-NEXT: vlseg8e8.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 8) @llvm.riscv.vlseg8.mask.triscv.vector.tuple_nxv8i8_8t.nxv8i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 8) undef, ptr %base, <vscale x 8 x i1> %mask, i64 %vl, i64 1, i64 3) - %1 = call <vscale x 8 x i8> @llvm.riscv.tuple.extract.nxv8i8.triscv.vector.tuple_nxv8i8_8t(target("riscv.vector.tuple", <vscale x 8 x i8>, 8) %0, i32 1) - ret <vscale x 8 x i8> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 8) %0 } - -declare target("riscv.vector.tuple", <vscale x 2 x i8>, 2) @llvm.riscv.vlseg2.mask.triscv.vector.tuple_nxv2i8_2t.nxv1i1(target("riscv.vector.tuple", <vscale x 2 x i8>, 2), ptr, <vscale x 1 x i1>, i64, i64, i64) - -define <vscale x 1 x i16> @test_vlseg2_nxv1i16_triscv.vector.tuple_nxv2i8_2t(ptr %base, i64 %vl) { +define target("riscv.vector.tuple", <vscale x 2 x i8>, 2) @test_vlseg2_nxv1i16_triscv.vector.tuple_nxv2i8_2t(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg2_nxv1i16_triscv.vector.tuple_nxv2i8_2t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma -; CHECK-NEXT: vlseg2e16.v v7, (a0) +; CHECK-NEXT: vlseg2e16.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 2 x i8>, 2) @llvm.riscv.vlseg2.triscv.vector.tuple_nxv2i8_2t(target("riscv.vector.tuple", <vscale x 2 x i8>, 2) undef, ptr %base, i64 %vl, i64 4) - %1 = call <vscale x 1 x i16> @llvm.riscv.tuple.extract.nxv1i16.triscv.vector.tuple_nxv2i8_2t(target("riscv.vector.tuple", <vscale x 2 x i8>, 2) %0, i32 1) - ret <vscale x 1 x i16> %1 + ret target("riscv.vector.tuple", <vscale x 2 x i8>, 2) %0 } - -define <vscale x 1 x i16> @test_vlseg2_mask_nxv1i16_triscv.vector.tuple_nxv2i8_2t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 2 x i8>, 2) @test_vlseg2_mask_nxv1i16_triscv.vector.tuple_nxv2i8_2t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) { ; CHECK-LABEL: test_vlseg2_mask_nxv1i16_triscv.vector.tuple_nxv2i8_2t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma -; CHECK-NEXT: vlseg2e16.v v7, (a0), v0.t +; CHECK-NEXT: vlseg2e16.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 2 x i8>, 2) @llvm.riscv.vlseg2.mask.triscv.vector.tuple_nxv2i8_2t.nxv1i1(target("riscv.vector.tuple", <vscale x 2 x i8>, 2) undef, ptr %base, <vscale x 1 x i1> %mask, i64 %vl, i64 1, i64 4) - %1 = call <vscale x 1 x i16> @llvm.riscv.tuple.extract.nxv1i16.triscv.vector.tuple_nxv2i8_2t(target("riscv.vector.tuple", <vscale x 2 x i8>, 2) %0, i32 1) - ret <vscale x 1 x i16> %1 + ret target("riscv.vector.tuple", <vscale x 2 x i8>, 2) %0 } - -declare target("riscv.vector.tuple", <vscale x 4 x i8>, 2) @llvm.riscv.vlseg2.mask.triscv.vector.tuple_nxv4i8_2t.nxv2i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 2), ptr, <vscale x 2 x i1>, i64, i64, i64) - -define <vscale x 2 x i16> @test_vlseg2_nxv2i16_triscv.vector.tuple_nxv4i8_2t(ptr %base, i64 %vl) { +define target("riscv.vector.tuple", <vscale x 4 x i8>, 2) @test_vlseg2_nxv2i16_triscv.vector.tuple_nxv4i8_2t(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg2_nxv2i16_triscv.vector.tuple_nxv4i8_2t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma -; CHECK-NEXT: vlseg2e16.v v7, (a0) +; CHECK-NEXT: vlseg2e16.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 2) @llvm.riscv.vlseg2.triscv.vector.tuple_nxv4i8_2t(target("riscv.vector.tuple", <vscale x 4 x i8>, 2) undef, ptr %base, i64 %vl, i64 4) - %1 = call <vscale x 2 x i16> @llvm.riscv.tuple.extract.nxv2i16.triscv.vector.tuple_nxv4i8_2t(target("riscv.vector.tuple", <vscale x 4 x i8>, 2) %0, i32 1) - ret <vscale x 2 x i16> %1 + ret target("riscv.vector.tuple", <vscale x 4 x i8>, 2) %0 } - -define <vscale x 2 x i16> @test_vlseg2_mask_nxv2i16_triscv.vector.tuple_nxv4i8_2t(ptr %base, i64 %vl, <vscale x 2 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 4 x i8>, 2) @test_vlseg2_mask_nxv2i16_triscv.vector.tuple_nxv4i8_2t(ptr %base, i64 %vl, <vscale x 2 x i1> %mask) { ; CHECK-LABEL: test_vlseg2_mask_nxv2i16_triscv.vector.tuple_nxv4i8_2t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma -; CHECK-NEXT: vlseg2e16.v v7, (a0), v0.t +; CHECK-NEXT: vlseg2e16.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 2) @llvm.riscv.vlseg2.mask.triscv.vector.tuple_nxv4i8_2t.nxv2i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 2) undef, ptr %base, <vscale x 2 x i1> %mask, i64 %vl, i64 1, i64 4) - %1 = call <vscale x 2 x i16> @llvm.riscv.tuple.extract.nxv2i16.triscv.vector.tuple_nxv4i8_2t(target("riscv.vector.tuple", <vscale x 4 x i8>, 2) %0, i32 1) - ret <vscale x 2 x i16> %1 + ret target("riscv.vector.tuple", <vscale x 4 x i8>, 2) %0 } - -declare target("riscv.vector.tuple", <vscale x 8 x i8>, 2) @llvm.riscv.vlseg2.mask.triscv.vector.tuple_nxv8i8_2t.nxv4i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 2), ptr, <vscale x 4 x i1>, i64, i64, i64) - -define <vscale x 4 x i16> @test_vlseg2_nxv4i16_triscv.vector.tuple_nxv8i8_2t(ptr %base, i64 %vl) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 2) @test_vlseg2_nxv4i16_triscv.vector.tuple_nxv8i8_2t(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg2_nxv4i16_triscv.vector.tuple_nxv8i8_2t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma -; CHECK-NEXT: vlseg2e16.v v7, (a0) +; CHECK-NEXT: vlseg2e16.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 2) @llvm.riscv.vlseg2.triscv.vector.tuple_nxv8i8_2t(target("riscv.vector.tuple", <vscale x 8 x i8>, 2) undef, ptr %base, i64 %vl, i64 4) - %1 = call <vscale x 4 x i16> @llvm.riscv.tuple.extract.nxv4i16.triscv.vector.tuple_nxv8i8_2t(target("riscv.vector.tuple", <vscale x 8 x i8>, 2) %0, i32 1) - ret <vscale x 4 x i16> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 2) %0 } - -define <vscale x 4 x i16> @test_vlseg2_mask_nxv4i16_triscv.vector.tuple_nxv8i8_2t(ptr %base, i64 %vl, <vscale x 4 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 2) @test_vlseg2_mask_nxv4i16_triscv.vector.tuple_nxv8i8_2t(ptr %base, i64 %vl, <vscale x 4 x i1> %mask) { ; CHECK-LABEL: test_vlseg2_mask_nxv4i16_triscv.vector.tuple_nxv8i8_2t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma -; CHECK-NEXT: vlseg2e16.v v7, (a0), v0.t +; CHECK-NEXT: vlseg2e16.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 2) @llvm.riscv.vlseg2.mask.triscv.vector.tuple_nxv8i8_2t.nxv4i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 2) undef, ptr %base, <vscale x 4 x i1> %mask, i64 %vl, i64 1, i64 4) - %1 = call <vscale x 4 x i16> @llvm.riscv.tuple.extract.nxv4i16.triscv.vector.tuple_nxv8i8_2t(target("riscv.vector.tuple", <vscale x 8 x i8>, 2) %0, i32 1) - ret <vscale x 4 x i16> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 2) %0 } - -declare target("riscv.vector.tuple", <vscale x 16 x i8>, 2) @llvm.riscv.vlseg2.mask.triscv.vector.tuple_nxv16i8_2t.nxv8i1(target("riscv.vector.tuple", <vscale x 16 x i8>, 2), ptr, <vscale x 8 x i1>, i64, i64, i64) - -define <vscale x 8 x i16> @test_vlseg2_nxv8i16_triscv.vector.tuple_nxv16i8_2t(ptr %base, i64 %vl) { +define target("riscv.vector.tuple", <vscale x 16 x i8>, 2) @test_vlseg2_nxv8i16_triscv.vector.tuple_nxv16i8_2t(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg2_nxv8i16_triscv.vector.tuple_nxv16i8_2t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, ma -; CHECK-NEXT: vlseg2e16.v v6, (a0) +; CHECK-NEXT: vlseg2e16.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 2) @llvm.riscv.vlseg2.triscv.vector.tuple_nxv16i8_2t(target("riscv.vector.tuple", <vscale x 16 x i8>, 2) undef, ptr %base, i64 %vl, i64 4) - %1 = call <vscale x 8 x i16> @llvm.riscv.tuple.extract.nxv8i16.triscv.vector.tuple_nxv16i8_2t(target("riscv.vector.tuple", <vscale x 16 x i8>, 2) %0, i32 1) - ret <vscale x 8 x i16> %1 + ret target("riscv.vector.tuple", <vscale x 16 x i8>, 2) %0 } - -define <vscale x 8 x i16> @test_vlseg2_mask_nxv8i16_triscv.vector.tuple_nxv16i8_2t(ptr %base, i64 %vl, <vscale x 8 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 16 x i8>, 2) @test_vlseg2_mask_nxv8i16_triscv.vector.tuple_nxv16i8_2t(ptr %base, i64 %vl, <vscale x 8 x i1> %mask) { ; CHECK-LABEL: test_vlseg2_mask_nxv8i16_triscv.vector.tuple_nxv16i8_2t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, ma -; CHECK-NEXT: vlseg2e16.v v6, (a0), v0.t +; CHECK-NEXT: vlseg2e16.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 2) @llvm.riscv.vlseg2.mask.triscv.vector.tuple_nxv16i8_2t.nxv8i1(target("riscv.vector.tuple", <vscale x 16 x i8>, 2) undef, ptr %base, <vscale x 8 x i1> %mask, i64 %vl, i64 1, i64 4) - %1 = call <vscale x 8 x i16> @llvm.riscv.tuple.extract.nxv8i16.triscv.vector.tuple_nxv16i8_2t(target("riscv.vector.tuple", <vscale x 16 x i8>, 2) %0, i32 1) - ret <vscale x 8 x i16> %1 + ret target("riscv.vector.tuple", <vscale x 16 x i8>, 2) %0 } - -declare target("riscv.vector.tuple", <vscale x 32 x i8>, 2) @llvm.riscv.vlseg2.mask.triscv.vector.tuple_nxv32i8_2t.nxv16i1(target("riscv.vector.tuple", <vscale x 32 x i8>, 2), ptr, <vscale x 16 x i1>, i64, i64, i64) - -define <vscale x 16 x i16> @test_vlseg2_nxv16i16_triscv.vector.tuple_nxv32i8_2t(ptr %base, i64 %vl) { +define target("riscv.vector.tuple", <vscale x 32 x i8>, 2) @test_vlseg2_nxv16i16_triscv.vector.tuple_nxv32i8_2t(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg2_nxv16i16_triscv.vector.tuple_nxv32i8_2t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma -; CHECK-NEXT: vlseg2e16.v v4, (a0) +; CHECK-NEXT: vlseg2e16.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 32 x i8>, 2) @llvm.riscv.vlseg2.triscv.vector.tuple_nxv32i8_2t(target("riscv.vector.tuple", <vscale x 32 x i8>, 2) undef, ptr %base, i64 %vl, i64 4) - %1 = call <vscale x 16 x i16> @llvm.riscv.tuple.extract.nxv16i16.triscv.vector.tuple_nxv32i8_2t(target("riscv.vector.tuple", <vscale x 32 x i8>, 2) %0, i32 1) - ret <vscale x 16 x i16> %1 + ret target("riscv.vector.tuple", <vscale x 32 x i8>, 2) %0 } - -define <vscale x 16 x i16> @test_vlseg2_mask_nxv16i16_triscv.vector.tuple_nxv32i8_2t(ptr %base, i64 %vl, <vscale x 16 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 32 x i8>, 2) @test_vlseg2_mask_nxv16i16_triscv.vector.tuple_nxv32i8_2t(ptr %base, i64 %vl, <vscale x 16 x i1> %mask) { ; CHECK-LABEL: test_vlseg2_mask_nxv16i16_triscv.vector.tuple_nxv32i8_2t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma -; CHECK-NEXT: vlseg2e16.v v4, (a0), v0.t +; CHECK-NEXT: vlseg2e16.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 32 x i8>, 2) @llvm.riscv.vlseg2.mask.triscv.vector.tuple_nxv32i8_2t.nxv16i1(target("riscv.vector.tuple", <vscale x 32 x i8>, 2) undef, ptr %base, <vscale x 16 x i1> %mask, i64 %vl, i64 1, i64 4) - %1 = call <vscale x 16 x i16> @llvm.riscv.tuple.extract.nxv16i16.triscv.vector.tuple_nxv32i8_2t(target("riscv.vector.tuple", <vscale x 32 x i8>, 2) %0, i32 1) - ret <vscale x 16 x i16> %1 + ret target("riscv.vector.tuple", <vscale x 32 x i8>, 2) %0 } - -declare target("riscv.vector.tuple", <vscale x 2 x i8>, 3) @llvm.riscv.vlseg3.mask.triscv.vector.tuple_nxv2i8_3t.nxv1i1(target("riscv.vector.tuple", <vscale x 2 x i8>, 3), ptr, <vscale x 1 x i1>, i64, i64, i64) - -define <vscale x 1 x i16> @test_vlseg3_nxv1i16_triscv.vector.tuple_nxv2i8_3t(ptr %base, i64 %vl) { +define target("riscv.vector.tuple", <vscale x 2 x i8>, 3) @test_vlseg3_nxv1i16_triscv.vector.tuple_nxv2i8_3t(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg3_nxv1i16_triscv.vector.tuple_nxv2i8_3t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma -; CHECK-NEXT: vlseg3e16.v v7, (a0) +; CHECK-NEXT: vlseg3e16.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 2 x i8>, 3) @llvm.riscv.vlseg3.triscv.vector.tuple_nxv2i8_3t(target("riscv.vector.tuple", <vscale x 2 x i8>, 3) undef, ptr %base, i64 %vl, i64 4) - %1 = call <vscale x 1 x i16> @llvm.riscv.tuple.extract.nxv1i16.triscv.vector.tuple_nxv2i8_3t(target("riscv.vector.tuple", <vscale x 2 x i8>, 3) %0, i32 1) - ret <vscale x 1 x i16> %1 + ret target("riscv.vector.tuple", <vscale x 2 x i8>, 3) %0 } - -define <vscale x 1 x i16> @test_vlseg3_mask_nxv1i16_triscv.vector.tuple_nxv2i8_3t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 2 x i8>, 3) @test_vlseg3_mask_nxv1i16_triscv.vector.tuple_nxv2i8_3t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) { ; CHECK-LABEL: test_vlseg3_mask_nxv1i16_triscv.vector.tuple_nxv2i8_3t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma -; CHECK-NEXT: vlseg3e16.v v7, (a0), v0.t +; CHECK-NEXT: vlseg3e16.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 2 x i8>, 3) @llvm.riscv.vlseg3.mask.triscv.vector.tuple_nxv2i8_3t.nxv1i1(target("riscv.vector.tuple", <vscale x 2 x i8>, 3) undef, ptr %base, <vscale x 1 x i1> %mask, i64 %vl, i64 1, i64 4) - %1 = call <vscale x 1 x i16> @llvm.riscv.tuple.extract.nxv1i16.triscv.vector.tuple_nxv2i8_3t(target("riscv.vector.tuple", <vscale x 2 x i8>, 3) %0, i32 1) - ret <vscale x 1 x i16> %1 + ret target("riscv.vector.tuple", <vscale x 2 x i8>, 3) %0 } - -declare target("riscv.vector.tuple", <vscale x 4 x i8>, 3) @llvm.riscv.vlseg3.mask.triscv.vector.tuple_nxv4i8_3t.nxv2i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 3), ptr, <vscale x 2 x i1>, i64, i64, i64) - -define <vscale x 2 x i16> @test_vlseg3_nxv2i16_triscv.vector.tuple_nxv4i8_3t(ptr %base, i64 %vl) { +define target("riscv.vector.tuple", <vscale x 4 x i8>, 3) @test_vlseg3_nxv2i16_triscv.vector.tuple_nxv4i8_3t(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg3_nxv2i16_triscv.vector.tuple_nxv4i8_3t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma -; CHECK-NEXT: vlseg3e16.v v7, (a0) +; CHECK-NEXT: vlseg3e16.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 3) @llvm.riscv.vlseg3.triscv.vector.tuple_nxv4i8_3t(target("riscv.vector.tuple", <vscale x 4 x i8>, 3) undef, ptr %base, i64 %vl, i64 4) - %1 = call <vscale x 2 x i16> @llvm.riscv.tuple.extract.nxv2i16.triscv.vector.tuple_nxv4i8_3t(target("riscv.vector.tuple", <vscale x 4 x i8>, 3) %0, i32 1) - ret <vscale x 2 x i16> %1 + ret target("riscv.vector.tuple", <vscale x 4 x i8>, 3) %0 } - -define <vscale x 2 x i16> @test_vlseg3_mask_nxv2i16_triscv.vector.tuple_nxv4i8_3t(ptr %base, i64 %vl, <vscale x 2 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 4 x i8>, 3) @test_vlseg3_mask_nxv2i16_triscv.vector.tuple_nxv4i8_3t(ptr %base, i64 %vl, <vscale x 2 x i1> %mask) { ; CHECK-LABEL: test_vlseg3_mask_nxv2i16_triscv.vector.tuple_nxv4i8_3t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma -; CHECK-NEXT: vlseg3e16.v v7, (a0), v0.t +; CHECK-NEXT: vlseg3e16.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 3) @llvm.riscv.vlseg3.mask.triscv.vector.tuple_nxv4i8_3t.nxv2i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 3) undef, ptr %base, <vscale x 2 x i1> %mask, i64 %vl, i64 1, i64 4) - %1 = call <vscale x 2 x i16> @llvm.riscv.tuple.extract.nxv2i16.triscv.vector.tuple_nxv4i8_3t(target("riscv.vector.tuple", <vscale x 4 x i8>, 3) %0, i32 1) - ret <vscale x 2 x i16> %1 + ret target("riscv.vector.tuple", <vscale x 4 x i8>, 3) %0 } - -declare target("riscv.vector.tuple", <vscale x 8 x i8>, 3) @llvm.riscv.vlseg3.mask.triscv.vector.tuple_nxv8i8_3t.nxv4i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 3), ptr, <vscale x 4 x i1>, i64, i64, i64) - -define <vscale x 4 x i16> @test_vlseg3_nxv4i16_triscv.vector.tuple_nxv8i8_3t(ptr %base, i64 %vl) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 3) @test_vlseg3_nxv4i16_triscv.vector.tuple_nxv8i8_3t(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg3_nxv4i16_triscv.vector.tuple_nxv8i8_3t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma -; CHECK-NEXT: vlseg3e16.v v7, (a0) +; CHECK-NEXT: vlseg3e16.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 3) @llvm.riscv.vlseg3.triscv.vector.tuple_nxv8i8_3t(target("riscv.vector.tuple", <vscale x 8 x i8>, 3) undef, ptr %base, i64 %vl, i64 4) - %1 = call <vscale x 4 x i16> @llvm.riscv.tuple.extract.nxv4i16.triscv.vector.tuple_nxv8i8_3t(target("riscv.vector.tuple", <vscale x 8 x i8>, 3) %0, i32 1) - ret <vscale x 4 x i16> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 3) %0 } - -define <vscale x 4 x i16> @test_vlseg3_mask_nxv4i16_triscv.vector.tuple_nxv8i8_3t(ptr %base, i64 %vl, <vscale x 4 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 3) @test_vlseg3_mask_nxv4i16_triscv.vector.tuple_nxv8i8_3t(ptr %base, i64 %vl, <vscale x 4 x i1> %mask) { ; CHECK-LABEL: test_vlseg3_mask_nxv4i16_triscv.vector.tuple_nxv8i8_3t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma -; CHECK-NEXT: vlseg3e16.v v7, (a0), v0.t +; CHECK-NEXT: vlseg3e16.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 3) @llvm.riscv.vlseg3.mask.triscv.vector.tuple_nxv8i8_3t.nxv4i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 3) undef, ptr %base, <vscale x 4 x i1> %mask, i64 %vl, i64 1, i64 4) - %1 = call <vscale x 4 x i16> @llvm.riscv.tuple.extract.nxv4i16.triscv.vector.tuple_nxv8i8_3t(target("riscv.vector.tuple", <vscale x 8 x i8>, 3) %0, i32 1) - ret <vscale x 4 x i16> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 3) %0 } - -declare target("riscv.vector.tuple", <vscale x 16 x i8>, 3) @llvm.riscv.vlseg3.mask.triscv.vector.tuple_nxv16i8_3t.nxv8i1(target("riscv.vector.tuple", <vscale x 16 x i8>, 3), ptr, <vscale x 8 x i1>, i64, i64, i64) - -define <vscale x 8 x i16> @test_vlseg3_nxv8i16_triscv.vector.tuple_nxv16i8_3t(ptr %base, i64 %vl) { +define target("riscv.vector.tuple", <vscale x 16 x i8>, 3) @test_vlseg3_nxv8i16_triscv.vector.tuple_nxv16i8_3t(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg3_nxv8i16_triscv.vector.tuple_nxv16i8_3t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, ma -; CHECK-NEXT: vlseg3e16.v v6, (a0) +; CHECK-NEXT: vlseg3e16.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 3) @llvm.riscv.vlseg3.triscv.vector.tuple_nxv16i8_3t(target("riscv.vector.tuple", <vscale x 16 x i8>, 3) undef, ptr %base, i64 %vl, i64 4) - %1 = call <vscale x 8 x i16> @llvm.riscv.tuple.extract.nxv8i16.triscv.vector.tuple_nxv16i8_3t(target("riscv.vector.tuple", <vscale x 16 x i8>, 3) %0, i32 1) - ret <vscale x 8 x i16> %1 + ret target("riscv.vector.tuple", <vscale x 16 x i8>, 3) %0 } - -define <vscale x 8 x i16> @test_vlseg3_mask_nxv8i16_triscv.vector.tuple_nxv16i8_3t(ptr %base, i64 %vl, <vscale x 8 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 16 x i8>, 3) @test_vlseg3_mask_nxv8i16_triscv.vector.tuple_nxv16i8_3t(ptr %base, i64 %vl, <vscale x 8 x i1> %mask) { ; CHECK-LABEL: test_vlseg3_mask_nxv8i16_triscv.vector.tuple_nxv16i8_3t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, ma -; CHECK-NEXT: vlseg3e16.v v6, (a0), v0.t +; CHECK-NEXT: vlseg3e16.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 3) @llvm.riscv.vlseg3.mask.triscv.vector.tuple_nxv16i8_3t.nxv8i1(target("riscv.vector.tuple", <vscale x 16 x i8>, 3) undef, ptr %base, <vscale x 8 x i1> %mask, i64 %vl, i64 1, i64 4) - %1 = call <vscale x 8 x i16> @llvm.riscv.tuple.extract.nxv8i16.triscv.vector.tuple_nxv16i8_3t(target("riscv.vector.tuple", <vscale x 16 x i8>, 3) %0, i32 1) - ret <vscale x 8 x i16> %1 + ret target("riscv.vector.tuple", <vscale x 16 x i8>, 3) %0 } - -declare target("riscv.vector.tuple", <vscale x 2 x i8>, 4) @llvm.riscv.vlseg4.mask.triscv.vector.tuple_nxv2i8_4t.nxv1i1(target("riscv.vector.tuple", <vscale x 2 x i8>, 4), ptr, <vscale x 1 x i1>, i64, i64, i64) - -define <vscale x 1 x i16> @test_vlseg4_nxv1i16_triscv.vector.tuple_nxv2i8_4t(ptr %base, i64 %vl) { +define target("riscv.vector.tuple", <vscale x 2 x i8>, 4) @test_vlseg4_nxv1i16_triscv.vector.tuple_nxv2i8_4t(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg4_nxv1i16_triscv.vector.tuple_nxv2i8_4t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma -; CHECK-NEXT: vlseg4e16.v v7, (a0) +; CHECK-NEXT: vlseg4e16.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 2 x i8>, 4) @llvm.riscv.vlseg4.triscv.vector.tuple_nxv2i8_4t(target("riscv.vector.tuple", <vscale x 2 x i8>, 4) undef, ptr %base, i64 %vl, i64 4) - %1 = call <vscale x 1 x i16> @llvm.riscv.tuple.extract.nxv1i16.triscv.vector.tuple_nxv2i8_4t(target("riscv.vector.tuple", <vscale x 2 x i8>, 4) %0, i32 1) - ret <vscale x 1 x i16> %1 + ret target("riscv.vector.tuple", <vscale x 2 x i8>, 4) %0 } - -define <vscale x 1 x i16> @test_vlseg4_mask_nxv1i16_triscv.vector.tuple_nxv2i8_4t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 2 x i8>, 4) @test_vlseg4_mask_nxv1i16_triscv.vector.tuple_nxv2i8_4t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) { ; CHECK-LABEL: test_vlseg4_mask_nxv1i16_triscv.vector.tuple_nxv2i8_4t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma -; CHECK-NEXT: vlseg4e16.v v7, (a0), v0.t +; CHECK-NEXT: vlseg4e16.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 2 x i8>, 4) @llvm.riscv.vlseg4.mask.triscv.vector.tuple_nxv2i8_4t.nxv1i1(target("riscv.vector.tuple", <vscale x 2 x i8>, 4) undef, ptr %base, <vscale x 1 x i1> %mask, i64 %vl, i64 1, i64 4) - %1 = call <vscale x 1 x i16> @llvm.riscv.tuple.extract.nxv1i16.triscv.vector.tuple_nxv2i8_4t(target("riscv.vector.tuple", <vscale x 2 x i8>, 4) %0, i32 1) - ret <vscale x 1 x i16> %1 + ret target("riscv.vector.tuple", <vscale x 2 x i8>, 4) %0 } - -declare target("riscv.vector.tuple", <vscale x 4 x i8>, 4) @llvm.riscv.vlseg4.mask.triscv.vector.tuple_nxv4i8_4t.nxv2i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 4), ptr, <vscale x 2 x i1>, i64, i64, i64) - -define <vscale x 2 x i16> @test_vlseg4_nxv2i16_triscv.vector.tuple_nxv4i8_4t(ptr %base, i64 %vl) { +define target("riscv.vector.tuple", <vscale x 4 x i8>, 4) @test_vlseg4_nxv2i16_triscv.vector.tuple_nxv4i8_4t(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg4_nxv2i16_triscv.vector.tuple_nxv4i8_4t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma -; CHECK-NEXT: vlseg4e16.v v7, (a0) +; CHECK-NEXT: vlseg4e16.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 4) @llvm.riscv.vlseg4.triscv.vector.tuple_nxv4i8_4t(target("riscv.vector.tuple", <vscale x 4 x i8>, 4) undef, ptr %base, i64 %vl, i64 4) - %1 = call <vscale x 2 x i16> @llvm.riscv.tuple.extract.nxv2i16.triscv.vector.tuple_nxv4i8_4t(target("riscv.vector.tuple", <vscale x 4 x i8>, 4) %0, i32 1) - ret <vscale x 2 x i16> %1 + ret target("riscv.vector.tuple", <vscale x 4 x i8>, 4) %0 } - -define <vscale x 2 x i16> @test_vlseg4_mask_nxv2i16_triscv.vector.tuple_nxv4i8_4t(ptr %base, i64 %vl, <vscale x 2 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 4 x i8>, 4) @test_vlseg4_mask_nxv2i16_triscv.vector.tuple_nxv4i8_4t(ptr %base, i64 %vl, <vscale x 2 x i1> %mask) { ; CHECK-LABEL: test_vlseg4_mask_nxv2i16_triscv.vector.tuple_nxv4i8_4t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma -; CHECK-NEXT: vlseg4e16.v v7, (a0), v0.t +; CHECK-NEXT: vlseg4e16.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 4) @llvm.riscv.vlseg4.mask.triscv.vector.tuple_nxv4i8_4t.nxv2i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 4) undef, ptr %base, <vscale x 2 x i1> %mask, i64 %vl, i64 1, i64 4) - %1 = call <vscale x 2 x i16> @llvm.riscv.tuple.extract.nxv2i16.triscv.vector.tuple_nxv4i8_4t(target("riscv.vector.tuple", <vscale x 4 x i8>, 4) %0, i32 1) - ret <vscale x 2 x i16> %1 + ret target("riscv.vector.tuple", <vscale x 4 x i8>, 4) %0 } - -declare target("riscv.vector.tuple", <vscale x 8 x i8>, 4) @llvm.riscv.vlseg4.mask.triscv.vector.tuple_nxv8i8_4t.nxv4i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 4), ptr, <vscale x 4 x i1>, i64, i64, i64) - -define <vscale x 4 x i16> @test_vlseg4_nxv4i16_triscv.vector.tuple_nxv8i8_4t(ptr %base, i64 %vl) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 4) @test_vlseg4_nxv4i16_triscv.vector.tuple_nxv8i8_4t(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg4_nxv4i16_triscv.vector.tuple_nxv8i8_4t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma -; CHECK-NEXT: vlseg4e16.v v7, (a0) +; CHECK-NEXT: vlseg4e16.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 4) @llvm.riscv.vlseg4.triscv.vector.tuple_nxv8i8_4t(target("riscv.vector.tuple", <vscale x 8 x i8>, 4) undef, ptr %base, i64 %vl, i64 4) - %1 = call <vscale x 4 x i16> @llvm.riscv.tuple.extract.nxv4i16.triscv.vector.tuple_nxv8i8_4t(target("riscv.vector.tuple", <vscale x 8 x i8>, 4) %0, i32 1) - ret <vscale x 4 x i16> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 4) %0 } - -define <vscale x 4 x i16> @test_vlseg4_mask_nxv4i16_triscv.vector.tuple_nxv8i8_4t(ptr %base, i64 %vl, <vscale x 4 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 4) @test_vlseg4_mask_nxv4i16_triscv.vector.tuple_nxv8i8_4t(ptr %base, i64 %vl, <vscale x 4 x i1> %mask) { ; CHECK-LABEL: test_vlseg4_mask_nxv4i16_triscv.vector.tuple_nxv8i8_4t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma -; CHECK-NEXT: vlseg4e16.v v7, (a0), v0.t +; CHECK-NEXT: vlseg4e16.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 4) @llvm.riscv.vlseg4.mask.triscv.vector.tuple_nxv8i8_4t.nxv4i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 4) undef, ptr %base, <vscale x 4 x i1> %mask, i64 %vl, i64 1, i64 4) - %1 = call <vscale x 4 x i16> @llvm.riscv.tuple.extract.nxv4i16.triscv.vector.tuple_nxv8i8_4t(target("riscv.vector.tuple", <vscale x 8 x i8>, 4) %0, i32 1) - ret <vscale x 4 x i16> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 4) %0 } - -declare target("riscv.vector.tuple", <vscale x 16 x i8>, 4) @llvm.riscv.vlseg4.mask.triscv.vector.tuple_nxv16i8_4t.nxv8i1(target("riscv.vector.tuple", <vscale x 16 x i8>, 4), ptr, <vscale x 8 x i1>, i64, i64, i64) - -define <vscale x 8 x i16> @test_vlseg4_nxv8i16_triscv.vector.tuple_nxv16i8_4t(ptr %base, i64 %vl) { +define target("riscv.vector.tuple", <vscale x 16 x i8>, 4) @test_vlseg4_nxv8i16_triscv.vector.tuple_nxv16i8_4t(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg4_nxv8i16_triscv.vector.tuple_nxv16i8_4t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, ma -; CHECK-NEXT: vlseg4e16.v v6, (a0) +; CHECK-NEXT: vlseg4e16.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 4) @llvm.riscv.vlseg4.triscv.vector.tuple_nxv16i8_4t(target("riscv.vector.tuple", <vscale x 16 x i8>, 4) undef, ptr %base, i64 %vl, i64 4) - %1 = call <vscale x 8 x i16> @llvm.riscv.tuple.extract.nxv8i16.triscv.vector.tuple_nxv16i8_4t(target("riscv.vector.tuple", <vscale x 16 x i8>, 4) %0, i32 1) - ret <vscale x 8 x i16> %1 + ret target("riscv.vector.tuple", <vscale x 16 x i8>, 4) %0 } - -define <vscale x 8 x i16> @test_vlseg4_mask_nxv8i16_triscv.vector.tuple_nxv16i8_4t(ptr %base, i64 %vl, <vscale x 8 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 16 x i8>, 4) @test_vlseg4_mask_nxv8i16_triscv.vector.tuple_nxv16i8_4t(ptr %base, i64 %vl, <vscale x 8 x i1> %mask) { ; CHECK-LABEL: test_vlseg4_mask_nxv8i16_triscv.vector.tuple_nxv16i8_4t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, ma -; CHECK-NEXT: vlseg4e16.v v6, (a0), v0.t +; CHECK-NEXT: vlseg4e16.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 4) @llvm.riscv.vlseg4.mask.triscv.vector.tuple_nxv16i8_4t.nxv8i1(target("riscv.vector.tuple", <vscale x 16 x i8>, 4) undef, ptr %base, <vscale x 8 x i1> %mask, i64 %vl, i64 1, i64 4) - %1 = call <vscale x 8 x i16> @llvm.riscv.tuple.extract.nxv8i16.triscv.vector.tuple_nxv16i8_4t(target("riscv.vector.tuple", <vscale x 16 x i8>, 4) %0, i32 1) - ret <vscale x 8 x i16> %1 + ret target("riscv.vector.tuple", <vscale x 16 x i8>, 4) %0 } - -declare target("riscv.vector.tuple", <vscale x 2 x i8>, 5) @llvm.riscv.vlseg5.mask.triscv.vector.tuple_nxv2i8_5t.nxv1i1(target("riscv.vector.tuple", <vscale x 2 x i8>, 5), ptr, <vscale x 1 x i1>, i64, i64, i64) - -define <vscale x 1 x i16> @test_vlseg5_nxv1i16_triscv.vector.tuple_nxv2i8_5t(ptr %base, i64 %vl) { +define target("riscv.vector.tuple", <vscale x 2 x i8>, 5) @test_vlseg5_nxv1i16_triscv.vector.tuple_nxv2i8_5t(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg5_nxv1i16_triscv.vector.tuple_nxv2i8_5t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma -; CHECK-NEXT: vlseg5e16.v v7, (a0) +; CHECK-NEXT: vlseg5e16.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 2 x i8>, 5) @llvm.riscv.vlseg5.triscv.vector.tuple_nxv2i8_5t(target("riscv.vector.tuple", <vscale x 2 x i8>, 5) undef, ptr %base, i64 %vl, i64 4) - %1 = call <vscale x 1 x i16> @llvm.riscv.tuple.extract.nxv1i16.triscv.vector.tuple_nxv2i8_5t(target("riscv.vector.tuple", <vscale x 2 x i8>, 5) %0, i32 1) - ret <vscale x 1 x i16> %1 + ret target("riscv.vector.tuple", <vscale x 2 x i8>, 5) %0 } - -define <vscale x 1 x i16> @test_vlseg5_mask_nxv1i16_triscv.vector.tuple_nxv2i8_5t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 2 x i8>, 5) @test_vlseg5_mask_nxv1i16_triscv.vector.tuple_nxv2i8_5t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) { ; CHECK-LABEL: test_vlseg5_mask_nxv1i16_triscv.vector.tuple_nxv2i8_5t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma -; CHECK-NEXT: vlseg5e16.v v7, (a0), v0.t +; CHECK-NEXT: vlseg5e16.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 2 x i8>, 5) @llvm.riscv.vlseg5.mask.triscv.vector.tuple_nxv2i8_5t.nxv1i1(target("riscv.vector.tuple", <vscale x 2 x i8>, 5) undef, ptr %base, <vscale x 1 x i1> %mask, i64 %vl, i64 1, i64 4) - %1 = call <vscale x 1 x i16> @llvm.riscv.tuple.extract.nxv1i16.triscv.vector.tuple_nxv2i8_5t(target("riscv.vector.tuple", <vscale x 2 x i8>, 5) %0, i32 1) - ret <vscale x 1 x i16> %1 + ret target("riscv.vector.tuple", <vscale x 2 x i8>, 5) %0 } - -declare target("riscv.vector.tuple", <vscale x 4 x i8>, 5) @llvm.riscv.vlseg5.mask.triscv.vector.tuple_nxv4i8_5t.nxv2i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 5), ptr, <vscale x 2 x i1>, i64, i64, i64) - -define <vscale x 2 x i16> @test_vlseg5_nxv2i16_triscv.vector.tuple_nxv4i8_5t(ptr %base, i64 %vl) { +define target("riscv.vector.tuple", <vscale x 4 x i8>, 5) @test_vlseg5_nxv2i16_triscv.vector.tuple_nxv4i8_5t(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg5_nxv2i16_triscv.vector.tuple_nxv4i8_5t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma -; CHECK-NEXT: vlseg5e16.v v7, (a0) +; CHECK-NEXT: vlseg5e16.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 5) @llvm.riscv.vlseg5.triscv.vector.tuple_nxv4i8_5t(target("riscv.vector.tuple", <vscale x 4 x i8>, 5) undef, ptr %base, i64 %vl, i64 4) - %1 = call <vscale x 2 x i16> @llvm.riscv.tuple.extract.nxv2i16.triscv.vector.tuple_nxv4i8_5t(target("riscv.vector.tuple", <vscale x 4 x i8>, 5) %0, i32 1) - ret <vscale x 2 x i16> %1 + ret target("riscv.vector.tuple", <vscale x 4 x i8>, 5) %0 } - -define <vscale x 2 x i16> @test_vlseg5_mask_nxv2i16_triscv.vector.tuple_nxv4i8_5t(ptr %base, i64 %vl, <vscale x 2 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 4 x i8>, 5) @test_vlseg5_mask_nxv2i16_triscv.vector.tuple_nxv4i8_5t(ptr %base, i64 %vl, <vscale x 2 x i1> %mask) { ; CHECK-LABEL: test_vlseg5_mask_nxv2i16_triscv.vector.tuple_nxv4i8_5t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma -; CHECK-NEXT: vlseg5e16.v v7, (a0), v0.t +; CHECK-NEXT: vlseg5e16.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 5) @llvm.riscv.vlseg5.mask.triscv.vector.tuple_nxv4i8_5t.nxv2i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 5) undef, ptr %base, <vscale x 2 x i1> %mask, i64 %vl, i64 1, i64 4) - %1 = call <vscale x 2 x i16> @llvm.riscv.tuple.extract.nxv2i16.triscv.vector.tuple_nxv4i8_5t(target("riscv.vector.tuple", <vscale x 4 x i8>, 5) %0, i32 1) - ret <vscale x 2 x i16> %1 + ret target("riscv.vector.tuple", <vscale x 4 x i8>, 5) %0 } - -declare target("riscv.vector.tuple", <vscale x 8 x i8>, 5) @llvm.riscv.vlseg5.mask.triscv.vector.tuple_nxv8i8_5t.nxv4i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 5), ptr, <vscale x 4 x i1>, i64, i64, i64) - -define <vscale x 4 x i16> @test_vlseg5_nxv4i16_triscv.vector.tuple_nxv8i8_5t(ptr %base, i64 %vl) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 5) @test_vlseg5_nxv4i16_triscv.vector.tuple_nxv8i8_5t(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg5_nxv4i16_triscv.vector.tuple_nxv8i8_5t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma -; CHECK-NEXT: vlseg5e16.v v7, (a0) +; CHECK-NEXT: vlseg5e16.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 5) @llvm.riscv.vlseg5.triscv.vector.tuple_nxv8i8_5t(target("riscv.vector.tuple", <vscale x 8 x i8>, 5) undef, ptr %base, i64 %vl, i64 4) - %1 = call <vscale x 4 x i16> @llvm.riscv.tuple.extract.nxv4i16.triscv.vector.tuple_nxv8i8_5t(target("riscv.vector.tuple", <vscale x 8 x i8>, 5) %0, i32 1) - ret <vscale x 4 x i16> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 5) %0 } - -define <vscale x 4 x i16> @test_vlseg5_mask_nxv4i16_triscv.vector.tuple_nxv8i8_5t(ptr %base, i64 %vl, <vscale x 4 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 5) @test_vlseg5_mask_nxv4i16_triscv.vector.tuple_nxv8i8_5t(ptr %base, i64 %vl, <vscale x 4 x i1> %mask) { ; CHECK-LABEL: test_vlseg5_mask_nxv4i16_triscv.vector.tuple_nxv8i8_5t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma -; CHECK-NEXT: vlseg5e16.v v7, (a0), v0.t +; CHECK-NEXT: vlseg5e16.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 5) @llvm.riscv.vlseg5.mask.triscv.vector.tuple_nxv8i8_5t.nxv4i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 5) undef, ptr %base, <vscale x 4 x i1> %mask, i64 %vl, i64 1, i64 4) - %1 = call <vscale x 4 x i16> @llvm.riscv.tuple.extract.nxv4i16.triscv.vector.tuple_nxv8i8_5t(target("riscv.vector.tuple", <vscale x 8 x i8>, 5) %0, i32 1) - ret <vscale x 4 x i16> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 5) %0 } - -declare target("riscv.vector.tuple", <vscale x 2 x i8>, 6) @llvm.riscv.vlseg6.mask.triscv.vector.tuple_nxv2i8_6t.nxv1i1(target("riscv.vector.tuple", <vscale x 2 x i8>, 6), ptr, <vscale x 1 x i1>, i64, i64, i64) - -define <vscale x 1 x i16> @test_vlseg6_nxv1i16_triscv.vector.tuple_nxv2i8_6t(ptr %base, i64 %vl) { +define target("riscv.vector.tuple", <vscale x 2 x i8>, 6) @test_vlseg6_nxv1i16_triscv.vector.tuple_nxv2i8_6t(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg6_nxv1i16_triscv.vector.tuple_nxv2i8_6t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma -; CHECK-NEXT: vlseg6e16.v v7, (a0) +; CHECK-NEXT: vlseg6e16.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 2 x i8>, 6) @llvm.riscv.vlseg6.triscv.vector.tuple_nxv2i8_6t(target("riscv.vector.tuple", <vscale x 2 x i8>, 6) undef, ptr %base, i64 %vl, i64 4) - %1 = call <vscale x 1 x i16> @llvm.riscv.tuple.extract.nxv1i16.triscv.vector.tuple_nxv2i8_6t(target("riscv.vector.tuple", <vscale x 2 x i8>, 6) %0, i32 1) - ret <vscale x 1 x i16> %1 + ret target("riscv.vector.tuple", <vscale x 2 x i8>, 6) %0 } - -define <vscale x 1 x i16> @test_vlseg6_mask_nxv1i16_triscv.vector.tuple_nxv2i8_6t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 2 x i8>, 6) @test_vlseg6_mask_nxv1i16_triscv.vector.tuple_nxv2i8_6t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) { ; CHECK-LABEL: test_vlseg6_mask_nxv1i16_triscv.vector.tuple_nxv2i8_6t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma -; CHECK-NEXT: vlseg6e16.v v7, (a0), v0.t +; CHECK-NEXT: vlseg6e16.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 2 x i8>, 6) @llvm.riscv.vlseg6.mask.triscv.vector.tuple_nxv2i8_6t.nxv1i1(target("riscv.vector.tuple", <vscale x 2 x i8>, 6) undef, ptr %base, <vscale x 1 x i1> %mask, i64 %vl, i64 1, i64 4) - %1 = call <vscale x 1 x i16> @llvm.riscv.tuple.extract.nxv1i16.triscv.vector.tuple_nxv2i8_6t(target("riscv.vector.tuple", <vscale x 2 x i8>, 6) %0, i32 1) - ret <vscale x 1 x i16> %1 + ret target("riscv.vector.tuple", <vscale x 2 x i8>, 6) %0 } - -declare target("riscv.vector.tuple", <vscale x 4 x i8>, 6) @llvm.riscv.vlseg6.mask.triscv.vector.tuple_nxv4i8_6t.nxv2i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 6), ptr, <vscale x 2 x i1>, i64, i64, i64) - -define <vscale x 2 x i16> @test_vlseg6_nxv2i16_triscv.vector.tuple_nxv4i8_6t(ptr %base, i64 %vl) { +define target("riscv.vector.tuple", <vscale x 4 x i8>, 6) @test_vlseg6_nxv2i16_triscv.vector.tuple_nxv4i8_6t(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg6_nxv2i16_triscv.vector.tuple_nxv4i8_6t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma -; CHECK-NEXT: vlseg6e16.v v7, (a0) +; CHECK-NEXT: vlseg6e16.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 6) @llvm.riscv.vlseg6.triscv.vector.tuple_nxv4i8_6t(target("riscv.vector.tuple", <vscale x 4 x i8>, 6) undef, ptr %base, i64 %vl, i64 4) - %1 = call <vscale x 2 x i16> @llvm.riscv.tuple.extract.nxv2i16.triscv.vector.tuple_nxv4i8_6t(target("riscv.vector.tuple", <vscale x 4 x i8>, 6) %0, i32 1) - ret <vscale x 2 x i16> %1 + ret target("riscv.vector.tuple", <vscale x 4 x i8>, 6) %0 } - -define <vscale x 2 x i16> @test_vlseg6_mask_nxv2i16_triscv.vector.tuple_nxv4i8_6t(ptr %base, i64 %vl, <vscale x 2 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 4 x i8>, 6) @test_vlseg6_mask_nxv2i16_triscv.vector.tuple_nxv4i8_6t(ptr %base, i64 %vl, <vscale x 2 x i1> %mask) { ; CHECK-LABEL: test_vlseg6_mask_nxv2i16_triscv.vector.tuple_nxv4i8_6t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma -; CHECK-NEXT: vlseg6e16.v v7, (a0), v0.t +; CHECK-NEXT: vlseg6e16.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 6) @llvm.riscv.vlseg6.mask.triscv.vector.tuple_nxv4i8_6t.nxv2i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 6) undef, ptr %base, <vscale x 2 x i1> %mask, i64 %vl, i64 1, i64 4) - %1 = call <vscale x 2 x i16> @llvm.riscv.tuple.extract.nxv2i16.triscv.vector.tuple_nxv4i8_6t(target("riscv.vector.tuple", <vscale x 4 x i8>, 6) %0, i32 1) - ret <vscale x 2 x i16> %1 + ret target("riscv.vector.tuple", <vscale x 4 x i8>, 6) %0 } - -declare target("riscv.vector.tuple", <vscale x 8 x i8>, 6) @llvm.riscv.vlseg6.mask.triscv.vector.tuple_nxv8i8_6t.nxv4i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 6), ptr, <vscale x 4 x i1>, i64, i64, i64) - -define <vscale x 4 x i16> @test_vlseg6_nxv4i16_triscv.vector.tuple_nxv8i8_6t(ptr %base, i64 %vl) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 6) @test_vlseg6_nxv4i16_triscv.vector.tuple_nxv8i8_6t(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg6_nxv4i16_triscv.vector.tuple_nxv8i8_6t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma -; CHECK-NEXT: vlseg6e16.v v7, (a0) +; CHECK-NEXT: vlseg6e16.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 6) @llvm.riscv.vlseg6.triscv.vector.tuple_nxv8i8_6t(target("riscv.vector.tuple", <vscale x 8 x i8>, 6) undef, ptr %base, i64 %vl, i64 4) - %1 = call <vscale x 4 x i16> @llvm.riscv.tuple.extract.nxv4i16.triscv.vector.tuple_nxv8i8_6t(target("riscv.vector.tuple", <vscale x 8 x i8>, 6) %0, i32 1) - ret <vscale x 4 x i16> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 6) %0 } - -define <vscale x 4 x i16> @test_vlseg6_mask_nxv4i16_triscv.vector.tuple_nxv8i8_6t(ptr %base, i64 %vl, <vscale x 4 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 6) @test_vlseg6_mask_nxv4i16_triscv.vector.tuple_nxv8i8_6t(ptr %base, i64 %vl, <vscale x 4 x i1> %mask) { ; CHECK-LABEL: test_vlseg6_mask_nxv4i16_triscv.vector.tuple_nxv8i8_6t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma -; CHECK-NEXT: vlseg6e16.v v7, (a0), v0.t +; CHECK-NEXT: vlseg6e16.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 6) @llvm.riscv.vlseg6.mask.triscv.vector.tuple_nxv8i8_6t.nxv4i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 6) undef, ptr %base, <vscale x 4 x i1> %mask, i64 %vl, i64 1, i64 4) - %1 = call <vscale x 4 x i16> @llvm.riscv.tuple.extract.nxv4i16.triscv.vector.tuple_nxv8i8_6t(target("riscv.vector.tuple", <vscale x 8 x i8>, 6) %0, i32 1) - ret <vscale x 4 x i16> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 6) %0 } - -declare target("riscv.vector.tuple", <vscale x 2 x i8>, 7) @llvm.riscv.vlseg7.mask.triscv.vector.tuple_nxv2i8_7t.nxv1i1(target("riscv.vector.tuple", <vscale x 2 x i8>, 7), ptr, <vscale x 1 x i1>, i64, i64, i64) - -define <vscale x 1 x i16> @test_vlseg7_nxv1i16_triscv.vector.tuple_nxv2i8_7t(ptr %base, i64 %vl) { +define target("riscv.vector.tuple", <vscale x 2 x i8>, 7) @test_vlseg7_nxv1i16_triscv.vector.tuple_nxv2i8_7t(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg7_nxv1i16_triscv.vector.tuple_nxv2i8_7t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma -; CHECK-NEXT: vlseg7e16.v v7, (a0) +; CHECK-NEXT: vlseg7e16.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 2 x i8>, 7) @llvm.riscv.vlseg7.triscv.vector.tuple_nxv2i8_7t(target("riscv.vector.tuple", <vscale x 2 x i8>, 7) undef, ptr %base, i64 %vl, i64 4) - %1 = call <vscale x 1 x i16> @llvm.riscv.tuple.extract.nxv1i16.triscv.vector.tuple_nxv2i8_7t(target("riscv.vector.tuple", <vscale x 2 x i8>, 7) %0, i32 1) - ret <vscale x 1 x i16> %1 + ret target("riscv.vector.tuple", <vscale x 2 x i8>, 7) %0 } - -define <vscale x 1 x i16> @test_vlseg7_mask_nxv1i16_triscv.vector.tuple_nxv2i8_7t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 2 x i8>, 7) @test_vlseg7_mask_nxv1i16_triscv.vector.tuple_nxv2i8_7t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) { ; CHECK-LABEL: test_vlseg7_mask_nxv1i16_triscv.vector.tuple_nxv2i8_7t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma -; CHECK-NEXT: vlseg7e16.v v7, (a0), v0.t +; CHECK-NEXT: vlseg7e16.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 2 x i8>, 7) @llvm.riscv.vlseg7.mask.triscv.vector.tuple_nxv2i8_7t.nxv1i1(target("riscv.vector.tuple", <vscale x 2 x i8>, 7) undef, ptr %base, <vscale x 1 x i1> %mask, i64 %vl, i64 1, i64 4) - %1 = call <vscale x 1 x i16> @llvm.riscv.tuple.extract.nxv1i16.triscv.vector.tuple_nxv2i8_7t(target("riscv.vector.tuple", <vscale x 2 x i8>, 7) %0, i32 1) - ret <vscale x 1 x i16> %1 + ret target("riscv.vector.tuple", <vscale x 2 x i8>, 7) %0 } - -declare target("riscv.vector.tuple", <vscale x 4 x i8>, 7) @llvm.riscv.vlseg7.mask.triscv.vector.tuple_nxv4i8_7t.nxv2i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 7), ptr, <vscale x 2 x i1>, i64, i64, i64) - -define <vscale x 2 x i16> @test_vlseg7_nxv2i16_triscv.vector.tuple_nxv4i8_7t(ptr %base, i64 %vl) { +define target("riscv.vector.tuple", <vscale x 4 x i8>, 7) @test_vlseg7_nxv2i16_triscv.vector.tuple_nxv4i8_7t(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg7_nxv2i16_triscv.vector.tuple_nxv4i8_7t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma -; CHECK-NEXT: vlseg7e16.v v7, (a0) +; CHECK-NEXT: vlseg7e16.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 7) @llvm.riscv.vlseg7.triscv.vector.tuple_nxv4i8_7t(target("riscv.vector.tuple", <vscale x 4 x i8>, 7) undef, ptr %base, i64 %vl, i64 4) - %1 = call <vscale x 2 x i16> @llvm.riscv.tuple.extract.nxv2i16.triscv.vector.tuple_nxv4i8_7t(target("riscv.vector.tuple", <vscale x 4 x i8>, 7) %0, i32 1) - ret <vscale x 2 x i16> %1 + ret target("riscv.vector.tuple", <vscale x 4 x i8>, 7) %0 } - -define <vscale x 2 x i16> @test_vlseg7_mask_nxv2i16_triscv.vector.tuple_nxv4i8_7t(ptr %base, i64 %vl, <vscale x 2 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 4 x i8>, 7) @test_vlseg7_mask_nxv2i16_triscv.vector.tuple_nxv4i8_7t(ptr %base, i64 %vl, <vscale x 2 x i1> %mask) { ; CHECK-LABEL: test_vlseg7_mask_nxv2i16_triscv.vector.tuple_nxv4i8_7t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma -; CHECK-NEXT: vlseg7e16.v v7, (a0), v0.t +; CHECK-NEXT: vlseg7e16.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 7) @llvm.riscv.vlseg7.mask.triscv.vector.tuple_nxv4i8_7t.nxv2i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 7) undef, ptr %base, <vscale x 2 x i1> %mask, i64 %vl, i64 1, i64 4) - %1 = call <vscale x 2 x i16> @llvm.riscv.tuple.extract.nxv2i16.triscv.vector.tuple_nxv4i8_7t(target("riscv.vector.tuple", <vscale x 4 x i8>, 7) %0, i32 1) - ret <vscale x 2 x i16> %1 + ret target("riscv.vector.tuple", <vscale x 4 x i8>, 7) %0 } - -declare target("riscv.vector.tuple", <vscale x 8 x i8>, 7) @llvm.riscv.vlseg7.mask.triscv.vector.tuple_nxv8i8_7t.nxv4i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 7), ptr, <vscale x 4 x i1>, i64, i64, i64) - -define <vscale x 4 x i16> @test_vlseg7_nxv4i16_triscv.vector.tuple_nxv8i8_7t(ptr %base, i64 %vl) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 7) @test_vlseg7_nxv4i16_triscv.vector.tuple_nxv8i8_7t(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg7_nxv4i16_triscv.vector.tuple_nxv8i8_7t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma -; CHECK-NEXT: vlseg7e16.v v7, (a0) +; CHECK-NEXT: vlseg7e16.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 7) @llvm.riscv.vlseg7.triscv.vector.tuple_nxv8i8_7t(target("riscv.vector.tuple", <vscale x 8 x i8>, 7) undef, ptr %base, i64 %vl, i64 4) - %1 = call <vscale x 4 x i16> @llvm.riscv.tuple.extract.nxv4i16.triscv.vector.tuple_nxv8i8_7t(target("riscv.vector.tuple", <vscale x 8 x i8>, 7) %0, i32 1) - ret <vscale x 4 x i16> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 7) %0 } - -define <vscale x 4 x i16> @test_vlseg7_mask_nxv4i16_triscv.vector.tuple_nxv8i8_7t(ptr %base, i64 %vl, <vscale x 4 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 7) @test_vlseg7_mask_nxv4i16_triscv.vector.tuple_nxv8i8_7t(ptr %base, i64 %vl, <vscale x 4 x i1> %mask) { ; CHECK-LABEL: test_vlseg7_mask_nxv4i16_triscv.vector.tuple_nxv8i8_7t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma -; CHECK-NEXT: vlseg7e16.v v7, (a0), v0.t +; CHECK-NEXT: vlseg7e16.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 7) @llvm.riscv.vlseg7.mask.triscv.vector.tuple_nxv8i8_7t.nxv4i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 7) undef, ptr %base, <vscale x 4 x i1> %mask, i64 %vl, i64 1, i64 4) - %1 = call <vscale x 4 x i16> @llvm.riscv.tuple.extract.nxv4i16.triscv.vector.tuple_nxv8i8_7t(target("riscv.vector.tuple", <vscale x 8 x i8>, 7) %0, i32 1) - ret <vscale x 4 x i16> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 7) %0 } - -declare target("riscv.vector.tuple", <vscale x 2 x i8>, 8) @llvm.riscv.vlseg8.mask.triscv.vector.tuple_nxv2i8_8t.nxv1i1(target("riscv.vector.tuple", <vscale x 2 x i8>, 8), ptr, <vscale x 1 x i1>, i64, i64, i64) - -define <vscale x 1 x i16> @test_vlseg8_nxv1i16_triscv.vector.tuple_nxv2i8_8t(ptr %base, i64 %vl) { +define target("riscv.vector.tuple", <vscale x 2 x i8>, 8) @test_vlseg8_nxv1i16_triscv.vector.tuple_nxv2i8_8t(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg8_nxv1i16_triscv.vector.tuple_nxv2i8_8t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma -; CHECK-NEXT: vlseg8e16.v v7, (a0) +; CHECK-NEXT: vlseg8e16.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 2 x i8>, 8) @llvm.riscv.vlseg8.triscv.vector.tuple_nxv2i8_8t(target("riscv.vector.tuple", <vscale x 2 x i8>, 8) undef, ptr %base, i64 %vl, i64 4) - %1 = call <vscale x 1 x i16> @llvm.riscv.tuple.extract.nxv1i16.triscv.vector.tuple_nxv2i8_8t(target("riscv.vector.tuple", <vscale x 2 x i8>, 8) %0, i32 1) - ret <vscale x 1 x i16> %1 + ret target("riscv.vector.tuple", <vscale x 2 x i8>, 8) %0 } - -define <vscale x 1 x i16> @test_vlseg8_mask_nxv1i16_triscv.vector.tuple_nxv2i8_8t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 2 x i8>, 8) @test_vlseg8_mask_nxv1i16_triscv.vector.tuple_nxv2i8_8t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) { ; CHECK-LABEL: test_vlseg8_mask_nxv1i16_triscv.vector.tuple_nxv2i8_8t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma -; CHECK-NEXT: vlseg8e16.v v7, (a0), v0.t +; CHECK-NEXT: vlseg8e16.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 2 x i8>, 8) @llvm.riscv.vlseg8.mask.triscv.vector.tuple_nxv2i8_8t.nxv1i1(target("riscv.vector.tuple", <vscale x 2 x i8>, 8) undef, ptr %base, <vscale x 1 x i1> %mask, i64 %vl, i64 1, i64 4) - %1 = call <vscale x 1 x i16> @llvm.riscv.tuple.extract.nxv1i16.triscv.vector.tuple_nxv2i8_8t(target("riscv.vector.tuple", <vscale x 2 x i8>, 8) %0, i32 1) - ret <vscale x 1 x i16> %1 + ret target("riscv.vector.tuple", <vscale x 2 x i8>, 8) %0 } - -declare target("riscv.vector.tuple", <vscale x 4 x i8>, 8) @llvm.riscv.vlseg8.mask.triscv.vector.tuple_nxv4i8_8t.nxv2i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 8), ptr, <vscale x 2 x i1>, i64, i64, i64) - -define <vscale x 2 x i16> @test_vlseg8_nxv2i16_triscv.vector.tuple_nxv4i8_8t(ptr %base, i64 %vl) { +define target("riscv.vector.tuple", <vscale x 4 x i8>, 8) @test_vlseg8_nxv2i16_triscv.vector.tuple_nxv4i8_8t(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg8_nxv2i16_triscv.vector.tuple_nxv4i8_8t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma -; CHECK-NEXT: vlseg8e16.v v7, (a0) +; CHECK-NEXT: vlseg8e16.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 8) @llvm.riscv.vlseg8.triscv.vector.tuple_nxv4i8_8t(target("riscv.vector.tuple", <vscale x 4 x i8>, 8) undef, ptr %base, i64 %vl, i64 4) - %1 = call <vscale x 2 x i16> @llvm.riscv.tuple.extract.nxv2i16.triscv.vector.tuple_nxv4i8_8t(target("riscv.vector.tuple", <vscale x 4 x i8>, 8) %0, i32 1) - ret <vscale x 2 x i16> %1 + ret target("riscv.vector.tuple", <vscale x 4 x i8>, 8) %0 } - -define <vscale x 2 x i16> @test_vlseg8_mask_nxv2i16_triscv.vector.tuple_nxv4i8_8t(ptr %base, i64 %vl, <vscale x 2 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 4 x i8>, 8) @test_vlseg8_mask_nxv2i16_triscv.vector.tuple_nxv4i8_8t(ptr %base, i64 %vl, <vscale x 2 x i1> %mask) { ; CHECK-LABEL: test_vlseg8_mask_nxv2i16_triscv.vector.tuple_nxv4i8_8t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma -; CHECK-NEXT: vlseg8e16.v v7, (a0), v0.t +; CHECK-NEXT: vlseg8e16.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 8) @llvm.riscv.vlseg8.mask.triscv.vector.tuple_nxv4i8_8t.nxv2i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 8) undef, ptr %base, <vscale x 2 x i1> %mask, i64 %vl, i64 1, i64 4) - %1 = call <vscale x 2 x i16> @llvm.riscv.tuple.extract.nxv2i16.triscv.vector.tuple_nxv4i8_8t(target("riscv.vector.tuple", <vscale x 4 x i8>, 8) %0, i32 1) - ret <vscale x 2 x i16> %1 + ret target("riscv.vector.tuple", <vscale x 4 x i8>, 8) %0 } - -declare target("riscv.vector.tuple", <vscale x 8 x i8>, 8) @llvm.riscv.vlseg8.mask.triscv.vector.tuple_nxv8i8_8t.nxv4i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 8), ptr, <vscale x 4 x i1>, i64, i64, i64) - -define <vscale x 4 x i16> @test_vlseg8_nxv4i16_triscv.vector.tuple_nxv8i8_8t(ptr %base, i64 %vl) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 8) @test_vlseg8_nxv4i16_triscv.vector.tuple_nxv8i8_8t(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg8_nxv4i16_triscv.vector.tuple_nxv8i8_8t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma -; CHECK-NEXT: vlseg8e16.v v7, (a0) +; CHECK-NEXT: vlseg8e16.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 8) @llvm.riscv.vlseg8.triscv.vector.tuple_nxv8i8_8t(target("riscv.vector.tuple", <vscale x 8 x i8>, 8) undef, ptr %base, i64 %vl, i64 4) - %1 = call <vscale x 4 x i16> @llvm.riscv.tuple.extract.nxv4i16.triscv.vector.tuple_nxv8i8_8t(target("riscv.vector.tuple", <vscale x 8 x i8>, 8) %0, i32 1) - ret <vscale x 4 x i16> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 8) %0 } - -define <vscale x 4 x i16> @test_vlseg8_mask_nxv4i16_triscv.vector.tuple_nxv8i8_8t(ptr %base, i64 %vl, <vscale x 4 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 8) @test_vlseg8_mask_nxv4i16_triscv.vector.tuple_nxv8i8_8t(ptr %base, i64 %vl, <vscale x 4 x i1> %mask) { ; CHECK-LABEL: test_vlseg8_mask_nxv4i16_triscv.vector.tuple_nxv8i8_8t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma -; CHECK-NEXT: vlseg8e16.v v7, (a0), v0.t +; CHECK-NEXT: vlseg8e16.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 8) @llvm.riscv.vlseg8.mask.triscv.vector.tuple_nxv8i8_8t.nxv4i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 8) undef, ptr %base, <vscale x 4 x i1> %mask, i64 %vl, i64 1, i64 4) - %1 = call <vscale x 4 x i16> @llvm.riscv.tuple.extract.nxv4i16.triscv.vector.tuple_nxv8i8_8t(target("riscv.vector.tuple", <vscale x 8 x i8>, 8) %0, i32 1) - ret <vscale x 4 x i16> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 8) %0 } - -declare target("riscv.vector.tuple", <vscale x 4 x i8>, 2) @llvm.riscv.vlseg2.mask.triscv.vector.tuple_nxv4i8_2t.nxv1i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 2), ptr, <vscale x 1 x i1>, i64, i64, i64) - -define <vscale x 1 x i32> @test_vlseg2_nxv1i32_triscv.vector.tuple_nxv4i8_2t(ptr %base, i64 %vl) { +define target("riscv.vector.tuple", <vscale x 4 x i8>, 2) @test_vlseg2_nxv1i32_triscv.vector.tuple_nxv4i8_2t(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg2_nxv1i32_triscv.vector.tuple_nxv4i8_2t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma -; CHECK-NEXT: vlseg2e32.v v7, (a0) +; CHECK-NEXT: vlseg2e32.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 2) @llvm.riscv.vlseg2.triscv.vector.tuple_nxv4i8_2t(target("riscv.vector.tuple", <vscale x 4 x i8>, 2) undef, ptr %base, i64 %vl, i64 5) - %1 = call <vscale x 1 x i32> @llvm.riscv.tuple.extract.nxv1i32.triscv.vector.tuple_nxv4i8_2t(target("riscv.vector.tuple", <vscale x 4 x i8>, 2) %0, i32 1) - ret <vscale x 1 x i32> %1 + ret target("riscv.vector.tuple", <vscale x 4 x i8>, 2) %0 } - -define <vscale x 1 x i32> @test_vlseg2_mask_nxv1i32_triscv.vector.tuple_nxv4i8_2t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 4 x i8>, 2) @test_vlseg2_mask_nxv1i32_triscv.vector.tuple_nxv4i8_2t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) { ; CHECK-LABEL: test_vlseg2_mask_nxv1i32_triscv.vector.tuple_nxv4i8_2t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma -; CHECK-NEXT: vlseg2e32.v v7, (a0), v0.t +; CHECK-NEXT: vlseg2e32.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 2) @llvm.riscv.vlseg2.mask.triscv.vector.tuple_nxv4i8_2t.nxv1i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 2) undef, ptr %base, <vscale x 1 x i1> %mask, i64 %vl, i64 1, i64 5) - %1 = call <vscale x 1 x i32> @llvm.riscv.tuple.extract.nxv1i32.triscv.vector.tuple_nxv4i8_2t(target("riscv.vector.tuple", <vscale x 4 x i8>, 2) %0, i32 1) - ret <vscale x 1 x i32> %1 + ret target("riscv.vector.tuple", <vscale x 4 x i8>, 2) %0 } - -declare target("riscv.vector.tuple", <vscale x 8 x i8>, 2) @llvm.riscv.vlseg2.mask.triscv.vector.tuple_nxv8i8_2t.nxv2i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 2), ptr, <vscale x 2 x i1>, i64, i64, i64) - -define <vscale x 2 x i32> @test_vlseg2_nxv2i32_triscv.vector.tuple_nxv8i8_2t(ptr %base, i64 %vl) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 2) @test_vlseg2_nxv2i32_triscv.vector.tuple_nxv8i8_2t(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg2_nxv2i32_triscv.vector.tuple_nxv8i8_2t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma -; CHECK-NEXT: vlseg2e32.v v7, (a0) +; CHECK-NEXT: vlseg2e32.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 2) @llvm.riscv.vlseg2.triscv.vector.tuple_nxv8i8_2t(target("riscv.vector.tuple", <vscale x 8 x i8>, 2) undef, ptr %base, i64 %vl, i64 5) - %1 = call <vscale x 2 x i32> @llvm.riscv.tuple.extract.nxv2i32.triscv.vector.tuple_nxv8i8_2t(target("riscv.vector.tuple", <vscale x 8 x i8>, 2) %0, i32 1) - ret <vscale x 2 x i32> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 2) %0 } - -define <vscale x 2 x i32> @test_vlseg2_mask_nxv2i32_triscv.vector.tuple_nxv8i8_2t(ptr %base, i64 %vl, <vscale x 2 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 2) @test_vlseg2_mask_nxv2i32_triscv.vector.tuple_nxv8i8_2t(ptr %base, i64 %vl, <vscale x 2 x i1> %mask) { ; CHECK-LABEL: test_vlseg2_mask_nxv2i32_triscv.vector.tuple_nxv8i8_2t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma -; CHECK-NEXT: vlseg2e32.v v7, (a0), v0.t +; CHECK-NEXT: vlseg2e32.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 2) @llvm.riscv.vlseg2.mask.triscv.vector.tuple_nxv8i8_2t.nxv2i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 2) undef, ptr %base, <vscale x 2 x i1> %mask, i64 %vl, i64 1, i64 5) - %1 = call <vscale x 2 x i32> @llvm.riscv.tuple.extract.nxv2i32.triscv.vector.tuple_nxv8i8_2t(target("riscv.vector.tuple", <vscale x 8 x i8>, 2) %0, i32 1) - ret <vscale x 2 x i32> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 2) %0 } - -declare target("riscv.vector.tuple", <vscale x 16 x i8>, 2) @llvm.riscv.vlseg2.mask.triscv.vector.tuple_nxv16i8_2t.nxv4i1(target("riscv.vector.tuple", <vscale x 16 x i8>, 2), ptr, <vscale x 4 x i1>, i64, i64, i64) - -define <vscale x 4 x i32> @test_vlseg2_nxv4i32_triscv.vector.tuple_nxv16i8_2t(ptr %base, i64 %vl) { +define target("riscv.vector.tuple", <vscale x 16 x i8>, 2) @test_vlseg2_nxv4i32_triscv.vector.tuple_nxv16i8_2t(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg2_nxv4i32_triscv.vector.tuple_nxv16i8_2t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma -; CHECK-NEXT: vlseg2e32.v v6, (a0) +; CHECK-NEXT: vlseg2e32.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 2) @llvm.riscv.vlseg2.triscv.vector.tuple_nxv16i8_2t(target("riscv.vector.tuple", <vscale x 16 x i8>, 2) undef, ptr %base, i64 %vl, i64 5) - %1 = call <vscale x 4 x i32> @llvm.riscv.tuple.extract.nxv4i32.triscv.vector.tuple_nxv16i8_2t(target("riscv.vector.tuple", <vscale x 16 x i8>, 2) %0, i32 1) - ret <vscale x 4 x i32> %1 + ret target("riscv.vector.tuple", <vscale x 16 x i8>, 2) %0 } - -define <vscale x 4 x i32> @test_vlseg2_mask_nxv4i32_triscv.vector.tuple_nxv16i8_2t(ptr %base, i64 %vl, <vscale x 4 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 16 x i8>, 2) @test_vlseg2_mask_nxv4i32_triscv.vector.tuple_nxv16i8_2t(ptr %base, i64 %vl, <vscale x 4 x i1> %mask) { ; CHECK-LABEL: test_vlseg2_mask_nxv4i32_triscv.vector.tuple_nxv16i8_2t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma -; CHECK-NEXT: vlseg2e32.v v6, (a0), v0.t +; CHECK-NEXT: vlseg2e32.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 2) @llvm.riscv.vlseg2.mask.triscv.vector.tuple_nxv16i8_2t.nxv4i1(target("riscv.vector.tuple", <vscale x 16 x i8>, 2) undef, ptr %base, <vscale x 4 x i1> %mask, i64 %vl, i64 1, i64 5) - %1 = call <vscale x 4 x i32> @llvm.riscv.tuple.extract.nxv4i32.triscv.vector.tuple_nxv16i8_2t(target("riscv.vector.tuple", <vscale x 16 x i8>, 2) %0, i32 1) - ret <vscale x 4 x i32> %1 + ret target("riscv.vector.tuple", <vscale x 16 x i8>, 2) %0 } - -declare target("riscv.vector.tuple", <vscale x 32 x i8>, 2) @llvm.riscv.vlseg2.mask.triscv.vector.tuple_nxv32i8_2t.nxv8i1(target("riscv.vector.tuple", <vscale x 32 x i8>, 2), ptr, <vscale x 8 x i1>, i64, i64, i64) - -define <vscale x 8 x i32> @test_vlseg2_nxv8i32_triscv.vector.tuple_nxv32i8_2t(ptr %base, i64 %vl) { +define target("riscv.vector.tuple", <vscale x 32 x i8>, 2) @test_vlseg2_nxv8i32_triscv.vector.tuple_nxv32i8_2t(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg2_nxv8i32_triscv.vector.tuple_nxv32i8_2t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, ma -; CHECK-NEXT: vlseg2e32.v v4, (a0) +; CHECK-NEXT: vlseg2e32.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 32 x i8>, 2) @llvm.riscv.vlseg2.triscv.vector.tuple_nxv32i8_2t(target("riscv.vector.tuple", <vscale x 32 x i8>, 2) undef, ptr %base, i64 %vl, i64 5) - %1 = call <vscale x 8 x i32> @llvm.riscv.tuple.extract.nxv8i32.triscv.vector.tuple_nxv32i8_2t(target("riscv.vector.tuple", <vscale x 32 x i8>, 2) %0, i32 1) - ret <vscale x 8 x i32> %1 + ret target("riscv.vector.tuple", <vscale x 32 x i8>, 2) %0 } - -define <vscale x 8 x i32> @test_vlseg2_mask_nxv8i32_triscv.vector.tuple_nxv32i8_2t(ptr %base, i64 %vl, <vscale x 8 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 32 x i8>, 2) @test_vlseg2_mask_nxv8i32_triscv.vector.tuple_nxv32i8_2t(ptr %base, i64 %vl, <vscale x 8 x i1> %mask) { ; CHECK-LABEL: test_vlseg2_mask_nxv8i32_triscv.vector.tuple_nxv32i8_2t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, ma -; CHECK-NEXT: vlseg2e32.v v4, (a0), v0.t +; CHECK-NEXT: vlseg2e32.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 32 x i8>, 2) @llvm.riscv.vlseg2.mask.triscv.vector.tuple_nxv32i8_2t.nxv8i1(target("riscv.vector.tuple", <vscale x 32 x i8>, 2) undef, ptr %base, <vscale x 8 x i1> %mask, i64 %vl, i64 1, i64 5) - %1 = call <vscale x 8 x i32> @llvm.riscv.tuple.extract.nxv8i32.triscv.vector.tuple_nxv32i8_2t(target("riscv.vector.tuple", <vscale x 32 x i8>, 2) %0, i32 1) - ret <vscale x 8 x i32> %1 + ret target("riscv.vector.tuple", <vscale x 32 x i8>, 2) %0 } - -declare target("riscv.vector.tuple", <vscale x 4 x i8>, 3) @llvm.riscv.vlseg3.mask.triscv.vector.tuple_nxv4i8_3t.nxv1i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 3), ptr, <vscale x 1 x i1>, i64, i64, i64) - -define <vscale x 1 x i32> @test_vlseg3_nxv1i32_triscv.vector.tuple_nxv4i8_3t(ptr %base, i64 %vl) { +define target("riscv.vector.tuple", <vscale x 4 x i8>, 3) @test_vlseg3_nxv1i32_triscv.vector.tuple_nxv4i8_3t(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg3_nxv1i32_triscv.vector.tuple_nxv4i8_3t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma -; CHECK-NEXT: vlseg3e32.v v7, (a0) +; CHECK-NEXT: vlseg3e32.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 3) @llvm.riscv.vlseg3.triscv.vector.tuple_nxv4i8_3t(target("riscv.vector.tuple", <vscale x 4 x i8>, 3) undef, ptr %base, i64 %vl, i64 5) - %1 = call <vscale x 1 x i32> @llvm.riscv.tuple.extract.nxv1i32.triscv.vector.tuple_nxv4i8_3t(target("riscv.vector.tuple", <vscale x 4 x i8>, 3) %0, i32 1) - ret <vscale x 1 x i32> %1 + ret target("riscv.vector.tuple", <vscale x 4 x i8>, 3) %0 } - -define <vscale x 1 x i32> @test_vlseg3_mask_nxv1i32_triscv.vector.tuple_nxv4i8_3t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 4 x i8>, 3) @test_vlseg3_mask_nxv1i32_triscv.vector.tuple_nxv4i8_3t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) { ; CHECK-LABEL: test_vlseg3_mask_nxv1i32_triscv.vector.tuple_nxv4i8_3t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma -; CHECK-NEXT: vlseg3e32.v v7, (a0), v0.t +; CHECK-NEXT: vlseg3e32.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 3) @llvm.riscv.vlseg3.mask.triscv.vector.tuple_nxv4i8_3t.nxv1i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 3) undef, ptr %base, <vscale x 1 x i1> %mask, i64 %vl, i64 1, i64 5) - %1 = call <vscale x 1 x i32> @llvm.riscv.tuple.extract.nxv1i32.triscv.vector.tuple_nxv4i8_3t(target("riscv.vector.tuple", <vscale x 4 x i8>, 3) %0, i32 1) - ret <vscale x 1 x i32> %1 + ret target("riscv.vector.tuple", <vscale x 4 x i8>, 3) %0 } - -declare target("riscv.vector.tuple", <vscale x 8 x i8>, 3) @llvm.riscv.vlseg3.mask.triscv.vector.tuple_nxv8i8_3t.nxv2i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 3), ptr, <vscale x 2 x i1>, i64, i64, i64) - -define <vscale x 2 x i32> @test_vlseg3_nxv2i32_triscv.vector.tuple_nxv8i8_3t(ptr %base, i64 %vl) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 3) @test_vlseg3_nxv2i32_triscv.vector.tuple_nxv8i8_3t(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg3_nxv2i32_triscv.vector.tuple_nxv8i8_3t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma -; CHECK-NEXT: vlseg3e32.v v7, (a0) +; CHECK-NEXT: vlseg3e32.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 3) @llvm.riscv.vlseg3.triscv.vector.tuple_nxv8i8_3t(target("riscv.vector.tuple", <vscale x 8 x i8>, 3) undef, ptr %base, i64 %vl, i64 5) - %1 = call <vscale x 2 x i32> @llvm.riscv.tuple.extract.nxv2i32.triscv.vector.tuple_nxv8i8_3t(target("riscv.vector.tuple", <vscale x 8 x i8>, 3) %0, i32 1) - ret <vscale x 2 x i32> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 3) %0 } - -define <vscale x 2 x i32> @test_vlseg3_mask_nxv2i32_triscv.vector.tuple_nxv8i8_3t(ptr %base, i64 %vl, <vscale x 2 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 3) @test_vlseg3_mask_nxv2i32_triscv.vector.tuple_nxv8i8_3t(ptr %base, i64 %vl, <vscale x 2 x i1> %mask) { ; CHECK-LABEL: test_vlseg3_mask_nxv2i32_triscv.vector.tuple_nxv8i8_3t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma -; CHECK-NEXT: vlseg3e32.v v7, (a0), v0.t +; CHECK-NEXT: vlseg3e32.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 3) @llvm.riscv.vlseg3.mask.triscv.vector.tuple_nxv8i8_3t.nxv2i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 3) undef, ptr %base, <vscale x 2 x i1> %mask, i64 %vl, i64 1, i64 5) - %1 = call <vscale x 2 x i32> @llvm.riscv.tuple.extract.nxv2i32.triscv.vector.tuple_nxv8i8_3t(target("riscv.vector.tuple", <vscale x 8 x i8>, 3) %0, i32 1) - ret <vscale x 2 x i32> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 3) %0 } - -declare target("riscv.vector.tuple", <vscale x 16 x i8>, 3) @llvm.riscv.vlseg3.mask.triscv.vector.tuple_nxv16i8_3t.nxv4i1(target("riscv.vector.tuple", <vscale x 16 x i8>, 3), ptr, <vscale x 4 x i1>, i64, i64, i64) - -define <vscale x 4 x i32> @test_vlseg3_nxv4i32_triscv.vector.tuple_nxv16i8_3t(ptr %base, i64 %vl) { +define target("riscv.vector.tuple", <vscale x 16 x i8>, 3) @test_vlseg3_nxv4i32_triscv.vector.tuple_nxv16i8_3t(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg3_nxv4i32_triscv.vector.tuple_nxv16i8_3t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma -; CHECK-NEXT: vlseg3e32.v v6, (a0) +; CHECK-NEXT: vlseg3e32.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 3) @llvm.riscv.vlseg3.triscv.vector.tuple_nxv16i8_3t(target("riscv.vector.tuple", <vscale x 16 x i8>, 3) undef, ptr %base, i64 %vl, i64 5) - %1 = call <vscale x 4 x i32> @llvm.riscv.tuple.extract.nxv4i32.triscv.vector.tuple_nxv16i8_3t(target("riscv.vector.tuple", <vscale x 16 x i8>, 3) %0, i32 1) - ret <vscale x 4 x i32> %1 + ret target("riscv.vector.tuple", <vscale x 16 x i8>, 3) %0 } - -define <vscale x 4 x i32> @test_vlseg3_mask_nxv4i32_triscv.vector.tuple_nxv16i8_3t(ptr %base, i64 %vl, <vscale x 4 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 16 x i8>, 3) @test_vlseg3_mask_nxv4i32_triscv.vector.tuple_nxv16i8_3t(ptr %base, i64 %vl, <vscale x 4 x i1> %mask) { ; CHECK-LABEL: test_vlseg3_mask_nxv4i32_triscv.vector.tuple_nxv16i8_3t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma -; CHECK-NEXT: vlseg3e32.v v6, (a0), v0.t +; CHECK-NEXT: vlseg3e32.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 3) @llvm.riscv.vlseg3.mask.triscv.vector.tuple_nxv16i8_3t.nxv4i1(target("riscv.vector.tuple", <vscale x 16 x i8>, 3) undef, ptr %base, <vscale x 4 x i1> %mask, i64 %vl, i64 1, i64 5) - %1 = call <vscale x 4 x i32> @llvm.riscv.tuple.extract.nxv4i32.triscv.vector.tuple_nxv16i8_3t(target("riscv.vector.tuple", <vscale x 16 x i8>, 3) %0, i32 1) - ret <vscale x 4 x i32> %1 + ret target("riscv.vector.tuple", <vscale x 16 x i8>, 3) %0 } - -declare target("riscv.vector.tuple", <vscale x 4 x i8>, 4) @llvm.riscv.vlseg4.mask.triscv.vector.tuple_nxv4i8_4t.nxv1i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 4), ptr, <vscale x 1 x i1>, i64, i64, i64) - -define <vscale x 1 x i32> @test_vlseg4_nxv1i32_triscv.vector.tuple_nxv4i8_4t(ptr %base, i64 %vl) { +define target("riscv.vector.tuple", <vscale x 4 x i8>, 4) @test_vlseg4_nxv1i32_triscv.vector.tuple_nxv4i8_4t(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg4_nxv1i32_triscv.vector.tuple_nxv4i8_4t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma -; CHECK-NEXT: vlseg4e32.v v7, (a0) +; CHECK-NEXT: vlseg4e32.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 4) @llvm.riscv.vlseg4.triscv.vector.tuple_nxv4i8_4t(target("riscv.vector.tuple", <vscale x 4 x i8>, 4) undef, ptr %base, i64 %vl, i64 5) - %1 = call <vscale x 1 x i32> @llvm.riscv.tuple.extract.nxv1i32.triscv.vector.tuple_nxv4i8_4t(target("riscv.vector.tuple", <vscale x 4 x i8>, 4) %0, i32 1) - ret <vscale x 1 x i32> %1 + ret target("riscv.vector.tuple", <vscale x 4 x i8>, 4) %0 } - -define <vscale x 1 x i32> @test_vlseg4_mask_nxv1i32_triscv.vector.tuple_nxv4i8_4t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 4 x i8>, 4) @test_vlseg4_mask_nxv1i32_triscv.vector.tuple_nxv4i8_4t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) { ; CHECK-LABEL: test_vlseg4_mask_nxv1i32_triscv.vector.tuple_nxv4i8_4t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma -; CHECK-NEXT: vlseg4e32.v v7, (a0), v0.t +; CHECK-NEXT: vlseg4e32.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 4) @llvm.riscv.vlseg4.mask.triscv.vector.tuple_nxv4i8_4t.nxv1i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 4) undef, ptr %base, <vscale x 1 x i1> %mask, i64 %vl, i64 1, i64 5) - %1 = call <vscale x 1 x i32> @llvm.riscv.tuple.extract.nxv1i32.triscv.vector.tuple_nxv4i8_4t(target("riscv.vector.tuple", <vscale x 4 x i8>, 4) %0, i32 1) - ret <vscale x 1 x i32> %1 + ret target("riscv.vector.tuple", <vscale x 4 x i8>, 4) %0 } - -declare target("riscv.vector.tuple", <vscale x 8 x i8>, 4) @llvm.riscv.vlseg4.mask.triscv.vector.tuple_nxv8i8_4t.nxv2i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 4), ptr, <vscale x 2 x i1>, i64, i64, i64) - -define <vscale x 2 x i32> @test_vlseg4_nxv2i32_triscv.vector.tuple_nxv8i8_4t(ptr %base, i64 %vl) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 4) @test_vlseg4_nxv2i32_triscv.vector.tuple_nxv8i8_4t(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg4_nxv2i32_triscv.vector.tuple_nxv8i8_4t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma -; CHECK-NEXT: vlseg4e32.v v7, (a0) +; CHECK-NEXT: vlseg4e32.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 4) @llvm.riscv.vlseg4.triscv.vector.tuple_nxv8i8_4t(target("riscv.vector.tuple", <vscale x 8 x i8>, 4) undef, ptr %base, i64 %vl, i64 5) - %1 = call <vscale x 2 x i32> @llvm.riscv.tuple.extract.nxv2i32.triscv.vector.tuple_nxv8i8_4t(target("riscv.vector.tuple", <vscale x 8 x i8>, 4) %0, i32 1) - ret <vscale x 2 x i32> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 4) %0 } - -define <vscale x 2 x i32> @test_vlseg4_mask_nxv2i32_triscv.vector.tuple_nxv8i8_4t(ptr %base, i64 %vl, <vscale x 2 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 4) @test_vlseg4_mask_nxv2i32_triscv.vector.tuple_nxv8i8_4t(ptr %base, i64 %vl, <vscale x 2 x i1> %mask) { ; CHECK-LABEL: test_vlseg4_mask_nxv2i32_triscv.vector.tuple_nxv8i8_4t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma -; CHECK-NEXT: vlseg4e32.v v7, (a0), v0.t +; CHECK-NEXT: vlseg4e32.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 4) @llvm.riscv.vlseg4.mask.triscv.vector.tuple_nxv8i8_4t.nxv2i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 4) undef, ptr %base, <vscale x 2 x i1> %mask, i64 %vl, i64 1, i64 5) - %1 = call <vscale x 2 x i32> @llvm.riscv.tuple.extract.nxv2i32.triscv.vector.tuple_nxv8i8_4t(target("riscv.vector.tuple", <vscale x 8 x i8>, 4) %0, i32 1) - ret <vscale x 2 x i32> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 4) %0 } - -declare target("riscv.vector.tuple", <vscale x 16 x i8>, 4) @llvm.riscv.vlseg4.mask.triscv.vector.tuple_nxv16i8_4t.nxv4i1(target("riscv.vector.tuple", <vscale x 16 x i8>, 4), ptr, <vscale x 4 x i1>, i64, i64, i64) - -define <vscale x 4 x i32> @test_vlseg4_nxv4i32_triscv.vector.tuple_nxv16i8_4t(ptr %base, i64 %vl) { +define target("riscv.vector.tuple", <vscale x 16 x i8>, 4) @test_vlseg4_nxv4i32_triscv.vector.tuple_nxv16i8_4t(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg4_nxv4i32_triscv.vector.tuple_nxv16i8_4t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma -; CHECK-NEXT: vlseg4e32.v v6, (a0) +; CHECK-NEXT: vlseg4e32.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 4) @llvm.riscv.vlseg4.triscv.vector.tuple_nxv16i8_4t(target("riscv.vector.tuple", <vscale x 16 x i8>, 4) undef, ptr %base, i64 %vl, i64 5) - %1 = call <vscale x 4 x i32> @llvm.riscv.tuple.extract.nxv4i32.triscv.vector.tuple_nxv16i8_4t(target("riscv.vector.tuple", <vscale x 16 x i8>, 4) %0, i32 1) - ret <vscale x 4 x i32> %1 + ret target("riscv.vector.tuple", <vscale x 16 x i8>, 4) %0 } - -define <vscale x 4 x i32> @test_vlseg4_mask_nxv4i32_triscv.vector.tuple_nxv16i8_4t(ptr %base, i64 %vl, <vscale x 4 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 16 x i8>, 4) @test_vlseg4_mask_nxv4i32_triscv.vector.tuple_nxv16i8_4t(ptr %base, i64 %vl, <vscale x 4 x i1> %mask) { ; CHECK-LABEL: test_vlseg4_mask_nxv4i32_triscv.vector.tuple_nxv16i8_4t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma -; CHECK-NEXT: vlseg4e32.v v6, (a0), v0.t +; CHECK-NEXT: vlseg4e32.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 4) @llvm.riscv.vlseg4.mask.triscv.vector.tuple_nxv16i8_4t.nxv4i1(target("riscv.vector.tuple", <vscale x 16 x i8>, 4) undef, ptr %base, <vscale x 4 x i1> %mask, i64 %vl, i64 1, i64 5) - %1 = call <vscale x 4 x i32> @llvm.riscv.tuple.extract.nxv4i32.triscv.vector.tuple_nxv16i8_4t(target("riscv.vector.tuple", <vscale x 16 x i8>, 4) %0, i32 1) - ret <vscale x 4 x i32> %1 + ret target("riscv.vector.tuple", <vscale x 16 x i8>, 4) %0 } - -declare target("riscv.vector.tuple", <vscale x 4 x i8>, 5) @llvm.riscv.vlseg5.mask.triscv.vector.tuple_nxv4i8_5t.nxv1i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 5), ptr, <vscale x 1 x i1>, i64, i64, i64) - -define <vscale x 1 x i32> @test_vlseg5_nxv1i32_triscv.vector.tuple_nxv4i8_5t(ptr %base, i64 %vl) { +define target("riscv.vector.tuple", <vscale x 4 x i8>, 5) @test_vlseg5_nxv1i32_triscv.vector.tuple_nxv4i8_5t(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg5_nxv1i32_triscv.vector.tuple_nxv4i8_5t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma -; CHECK-NEXT: vlseg5e32.v v7, (a0) +; CHECK-NEXT: vlseg5e32.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 5) @llvm.riscv.vlseg5.triscv.vector.tuple_nxv4i8_5t(target("riscv.vector.tuple", <vscale x 4 x i8>, 5) undef, ptr %base, i64 %vl, i64 5) - %1 = call <vscale x 1 x i32> @llvm.riscv.tuple.extract.nxv1i32.triscv.vector.tuple_nxv4i8_5t(target("riscv.vector.tuple", <vscale x 4 x i8>, 5) %0, i32 1) - ret <vscale x 1 x i32> %1 + ret target("riscv.vector.tuple", <vscale x 4 x i8>, 5) %0 } - -define <vscale x 1 x i32> @test_vlseg5_mask_nxv1i32_triscv.vector.tuple_nxv4i8_5t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 4 x i8>, 5) @test_vlseg5_mask_nxv1i32_triscv.vector.tuple_nxv4i8_5t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) { ; CHECK-LABEL: test_vlseg5_mask_nxv1i32_triscv.vector.tuple_nxv4i8_5t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma -; CHECK-NEXT: vlseg5e32.v v7, (a0), v0.t +; CHECK-NEXT: vlseg5e32.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 5) @llvm.riscv.vlseg5.mask.triscv.vector.tuple_nxv4i8_5t.nxv1i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 5) undef, ptr %base, <vscale x 1 x i1> %mask, i64 %vl, i64 1, i64 5) - %1 = call <vscale x 1 x i32> @llvm.riscv.tuple.extract.nxv1i32.triscv.vector.tuple_nxv4i8_5t(target("riscv.vector.tuple", <vscale x 4 x i8>, 5) %0, i32 1) - ret <vscale x 1 x i32> %1 + ret target("riscv.vector.tuple", <vscale x 4 x i8>, 5) %0 } - -declare target("riscv.vector.tuple", <vscale x 8 x i8>, 5) @llvm.riscv.vlseg5.mask.triscv.vector.tuple_nxv8i8_5t.nxv2i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 5), ptr, <vscale x 2 x i1>, i64, i64, i64) - -define <vscale x 2 x i32> @test_vlseg5_nxv2i32_triscv.vector.tuple_nxv8i8_5t(ptr %base, i64 %vl) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 5) @test_vlseg5_nxv2i32_triscv.vector.tuple_nxv8i8_5t(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg5_nxv2i32_triscv.vector.tuple_nxv8i8_5t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma -; CHECK-NEXT: vlseg5e32.v v7, (a0) +; CHECK-NEXT: vlseg5e32.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 5) @llvm.riscv.vlseg5.triscv.vector.tuple_nxv8i8_5t(target("riscv.vector.tuple", <vscale x 8 x i8>, 5) undef, ptr %base, i64 %vl, i64 5) - %1 = call <vscale x 2 x i32> @llvm.riscv.tuple.extract.nxv2i32.triscv.vector.tuple_nxv8i8_5t(target("riscv.vector.tuple", <vscale x 8 x i8>, 5) %0, i32 1) - ret <vscale x 2 x i32> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 5) %0 } - -define <vscale x 2 x i32> @test_vlseg5_mask_nxv2i32_triscv.vector.tuple_nxv8i8_5t(ptr %base, i64 %vl, <vscale x 2 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 5) @test_vlseg5_mask_nxv2i32_triscv.vector.tuple_nxv8i8_5t(ptr %base, i64 %vl, <vscale x 2 x i1> %mask) { ; CHECK-LABEL: test_vlseg5_mask_nxv2i32_triscv.vector.tuple_nxv8i8_5t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma -; CHECK-NEXT: vlseg5e32.v v7, (a0), v0.t +; CHECK-NEXT: vlseg5e32.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 5) @llvm.riscv.vlseg5.mask.triscv.vector.tuple_nxv8i8_5t.nxv2i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 5) undef, ptr %base, <vscale x 2 x i1> %mask, i64 %vl, i64 1, i64 5) - %1 = call <vscale x 2 x i32> @llvm.riscv.tuple.extract.nxv2i32.triscv.vector.tuple_nxv8i8_5t(target("riscv.vector.tuple", <vscale x 8 x i8>, 5) %0, i32 1) - ret <vscale x 2 x i32> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 5) %0 } - -declare target("riscv.vector.tuple", <vscale x 4 x i8>, 6) @llvm.riscv.vlseg6.mask.triscv.vector.tuple_nxv4i8_6t.nxv1i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 6), ptr, <vscale x 1 x i1>, i64, i64, i64) - -define <vscale x 1 x i32> @test_vlseg6_nxv1i32_triscv.vector.tuple_nxv4i8_6t(ptr %base, i64 %vl) { +define target("riscv.vector.tuple", <vscale x 4 x i8>, 6) @test_vlseg6_nxv1i32_triscv.vector.tuple_nxv4i8_6t(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg6_nxv1i32_triscv.vector.tuple_nxv4i8_6t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma -; CHECK-NEXT: vlseg6e32.v v7, (a0) +; CHECK-NEXT: vlseg6e32.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 6) @llvm.riscv.vlseg6.triscv.vector.tuple_nxv4i8_6t(target("riscv.vector.tuple", <vscale x 4 x i8>, 6) undef, ptr %base, i64 %vl, i64 5) - %1 = call <vscale x 1 x i32> @llvm.riscv.tuple.extract.nxv1i32.triscv.vector.tuple_nxv4i8_6t(target("riscv.vector.tuple", <vscale x 4 x i8>, 6) %0, i32 1) - ret <vscale x 1 x i32> %1 + ret target("riscv.vector.tuple", <vscale x 4 x i8>, 6) %0 } - -define <vscale x 1 x i32> @test_vlseg6_mask_nxv1i32_triscv.vector.tuple_nxv4i8_6t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 4 x i8>, 6) @test_vlseg6_mask_nxv1i32_triscv.vector.tuple_nxv4i8_6t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) { ; CHECK-LABEL: test_vlseg6_mask_nxv1i32_triscv.vector.tuple_nxv4i8_6t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma -; CHECK-NEXT: vlseg6e32.v v7, (a0), v0.t +; CHECK-NEXT: vlseg6e32.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 6) @llvm.riscv.vlseg6.mask.triscv.vector.tuple_nxv4i8_6t.nxv1i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 6) undef, ptr %base, <vscale x 1 x i1> %mask, i64 %vl, i64 1, i64 5) - %1 = call <vscale x 1 x i32> @llvm.riscv.tuple.extract.nxv1i32.triscv.vector.tuple_nxv4i8_6t(target("riscv.vector.tuple", <vscale x 4 x i8>, 6) %0, i32 1) - ret <vscale x 1 x i32> %1 + ret target("riscv.vector.tuple", <vscale x 4 x i8>, 6) %0 } - -declare target("riscv.vector.tuple", <vscale x 8 x i8>, 6) @llvm.riscv.vlseg6.mask.triscv.vector.tuple_nxv8i8_6t.nxv2i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 6), ptr, <vscale x 2 x i1>, i64, i64, i64) - -define <vscale x 2 x i32> @test_vlseg6_nxv2i32_triscv.vector.tuple_nxv8i8_6t(ptr %base, i64 %vl) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 6) @test_vlseg6_nxv2i32_triscv.vector.tuple_nxv8i8_6t(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg6_nxv2i32_triscv.vector.tuple_nxv8i8_6t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma -; CHECK-NEXT: vlseg6e32.v v7, (a0) +; CHECK-NEXT: vlseg6e32.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 6) @llvm.riscv.vlseg6.triscv.vector.tuple_nxv8i8_6t(target("riscv.vector.tuple", <vscale x 8 x i8>, 6) undef, ptr %base, i64 %vl, i64 5) - %1 = call <vscale x 2 x i32> @llvm.riscv.tuple.extract.nxv2i32.triscv.vector.tuple_nxv8i8_6t(target("riscv.vector.tuple", <vscale x 8 x i8>, 6) %0, i32 1) - ret <vscale x 2 x i32> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 6) %0 } - -define <vscale x 2 x i32> @test_vlseg6_mask_nxv2i32_triscv.vector.tuple_nxv8i8_6t(ptr %base, i64 %vl, <vscale x 2 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 6) @test_vlseg6_mask_nxv2i32_triscv.vector.tuple_nxv8i8_6t(ptr %base, i64 %vl, <vscale x 2 x i1> %mask) { ; CHECK-LABEL: test_vlseg6_mask_nxv2i32_triscv.vector.tuple_nxv8i8_6t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma -; CHECK-NEXT: vlseg6e32.v v7, (a0), v0.t +; CHECK-NEXT: vlseg6e32.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 6) @llvm.riscv.vlseg6.mask.triscv.vector.tuple_nxv8i8_6t.nxv2i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 6) undef, ptr %base, <vscale x 2 x i1> %mask, i64 %vl, i64 1, i64 5) - %1 = call <vscale x 2 x i32> @llvm.riscv.tuple.extract.nxv2i32.triscv.vector.tuple_nxv8i8_6t(target("riscv.vector.tuple", <vscale x 8 x i8>, 6) %0, i32 1) - ret <vscale x 2 x i32> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 6) %0 } - -declare target("riscv.vector.tuple", <vscale x 4 x i8>, 7) @llvm.riscv.vlseg7.mask.triscv.vector.tuple_nxv4i8_7t.nxv1i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 7), ptr, <vscale x 1 x i1>, i64, i64, i64) - -define <vscale x 1 x i32> @test_vlseg7_nxv1i32_triscv.vector.tuple_nxv4i8_7t(ptr %base, i64 %vl) { +define target("riscv.vector.tuple", <vscale x 4 x i8>, 7) @test_vlseg7_nxv1i32_triscv.vector.tuple_nxv4i8_7t(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg7_nxv1i32_triscv.vector.tuple_nxv4i8_7t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma -; CHECK-NEXT: vlseg7e32.v v7, (a0) +; CHECK-NEXT: vlseg7e32.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 7) @llvm.riscv.vlseg7.triscv.vector.tuple_nxv4i8_7t(target("riscv.vector.tuple", <vscale x 4 x i8>, 7) undef, ptr %base, i64 %vl, i64 5) - %1 = call <vscale x 1 x i32> @llvm.riscv.tuple.extract.nxv1i32.triscv.vector.tuple_nxv4i8_7t(target("riscv.vector.tuple", <vscale x 4 x i8>, 7) %0, i32 1) - ret <vscale x 1 x i32> %1 + ret target("riscv.vector.tuple", <vscale x 4 x i8>, 7) %0 } - -define <vscale x 1 x i32> @test_vlseg7_mask_nxv1i32_triscv.vector.tuple_nxv4i8_7t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 4 x i8>, 7) @test_vlseg7_mask_nxv1i32_triscv.vector.tuple_nxv4i8_7t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) { ; CHECK-LABEL: test_vlseg7_mask_nxv1i32_triscv.vector.tuple_nxv4i8_7t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma -; CHECK-NEXT: vlseg7e32.v v7, (a0), v0.t +; CHECK-NEXT: vlseg7e32.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 7) @llvm.riscv.vlseg7.mask.triscv.vector.tuple_nxv4i8_7t.nxv1i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 7) undef, ptr %base, <vscale x 1 x i1> %mask, i64 %vl, i64 1, i64 5) - %1 = call <vscale x 1 x i32> @llvm.riscv.tuple.extract.nxv1i32.triscv.vector.tuple_nxv4i8_7t(target("riscv.vector.tuple", <vscale x 4 x i8>, 7) %0, i32 1) - ret <vscale x 1 x i32> %1 + ret target("riscv.vector.tuple", <vscale x 4 x i8>, 7) %0 } - -declare target("riscv.vector.tuple", <vscale x 8 x i8>, 7) @llvm.riscv.vlseg7.mask.triscv.vector.tuple_nxv8i8_7t.nxv2i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 7), ptr, <vscale x 2 x i1>, i64, i64, i64) - -define <vscale x 2 x i32> @test_vlseg7_nxv2i32_triscv.vector.tuple_nxv8i8_7t(ptr %base, i64 %vl) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 7) @test_vlseg7_nxv2i32_triscv.vector.tuple_nxv8i8_7t(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg7_nxv2i32_triscv.vector.tuple_nxv8i8_7t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma -; CHECK-NEXT: vlseg7e32.v v7, (a0) +; CHECK-NEXT: vlseg7e32.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 7) @llvm.riscv.vlseg7.triscv.vector.tuple_nxv8i8_7t(target("riscv.vector.tuple", <vscale x 8 x i8>, 7) undef, ptr %base, i64 %vl, i64 5) - %1 = call <vscale x 2 x i32> @llvm.riscv.tuple.extract.nxv2i32.triscv.vector.tuple_nxv8i8_7t(target("riscv.vector.tuple", <vscale x 8 x i8>, 7) %0, i32 1) - ret <vscale x 2 x i32> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 7) %0 } - -define <vscale x 2 x i32> @test_vlseg7_mask_nxv2i32_triscv.vector.tuple_nxv8i8_7t(ptr %base, i64 %vl, <vscale x 2 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 7) @test_vlseg7_mask_nxv2i32_triscv.vector.tuple_nxv8i8_7t(ptr %base, i64 %vl, <vscale x 2 x i1> %mask) { ; CHECK-LABEL: test_vlseg7_mask_nxv2i32_triscv.vector.tuple_nxv8i8_7t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma -; CHECK-NEXT: vlseg7e32.v v7, (a0), v0.t +; CHECK-NEXT: vlseg7e32.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 7) @llvm.riscv.vlseg7.mask.triscv.vector.tuple_nxv8i8_7t.nxv2i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 7) undef, ptr %base, <vscale x 2 x i1> %mask, i64 %vl, i64 1, i64 5) - %1 = call <vscale x 2 x i32> @llvm.riscv.tuple.extract.nxv2i32.triscv.vector.tuple_nxv8i8_7t(target("riscv.vector.tuple", <vscale x 8 x i8>, 7) %0, i32 1) - ret <vscale x 2 x i32> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 7) %0 } - -declare target("riscv.vector.tuple", <vscale x 4 x i8>, 8) @llvm.riscv.vlseg8.mask.triscv.vector.tuple_nxv4i8_8t.nxv1i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 8), ptr, <vscale x 1 x i1>, i64, i64, i64) - -define <vscale x 1 x i32> @test_vlseg8_nxv1i32_triscv.vector.tuple_nxv4i8_8t(ptr %base, i64 %vl) { +define target("riscv.vector.tuple", <vscale x 4 x i8>, 8) @test_vlseg8_nxv1i32_triscv.vector.tuple_nxv4i8_8t(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg8_nxv1i32_triscv.vector.tuple_nxv4i8_8t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma -; CHECK-NEXT: vlseg8e32.v v7, (a0) +; CHECK-NEXT: vlseg8e32.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 8) @llvm.riscv.vlseg8.triscv.vector.tuple_nxv4i8_8t(target("riscv.vector.tuple", <vscale x 4 x i8>, 8) undef, ptr %base, i64 %vl, i64 5) - %1 = call <vscale x 1 x i32> @llvm.riscv.tuple.extract.nxv1i32.triscv.vector.tuple_nxv4i8_8t(target("riscv.vector.tuple", <vscale x 4 x i8>, 8) %0, i32 1) - ret <vscale x 1 x i32> %1 + ret target("riscv.vector.tuple", <vscale x 4 x i8>, 8) %0 } - -define <vscale x 1 x i32> @test_vlseg8_mask_nxv1i32_triscv.vector.tuple_nxv4i8_8t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 4 x i8>, 8) @test_vlseg8_mask_nxv1i32_triscv.vector.tuple_nxv4i8_8t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) { ; CHECK-LABEL: test_vlseg8_mask_nxv1i32_triscv.vector.tuple_nxv4i8_8t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma -; CHECK-NEXT: vlseg8e32.v v7, (a0), v0.t +; CHECK-NEXT: vlseg8e32.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 8) @llvm.riscv.vlseg8.mask.triscv.vector.tuple_nxv4i8_8t.nxv1i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 8) undef, ptr %base, <vscale x 1 x i1> %mask, i64 %vl, i64 1, i64 5) - %1 = call <vscale x 1 x i32> @llvm.riscv.tuple.extract.nxv1i32.triscv.vector.tuple_nxv4i8_8t(target("riscv.vector.tuple", <vscale x 4 x i8>, 8) %0, i32 1) - ret <vscale x 1 x i32> %1 + ret target("riscv.vector.tuple", <vscale x 4 x i8>, 8) %0 } - -declare target("riscv.vector.tuple", <vscale x 8 x i8>, 8) @llvm.riscv.vlseg8.mask.triscv.vector.tuple_nxv8i8_8t.nxv2i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 8), ptr, <vscale x 2 x i1>, i64, i64, i64) - -define <vscale x 2 x i32> @test_vlseg8_nxv2i32_triscv.vector.tuple_nxv8i8_8t(ptr %base, i64 %vl) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 8) @test_vlseg8_nxv2i32_triscv.vector.tuple_nxv8i8_8t(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg8_nxv2i32_triscv.vector.tuple_nxv8i8_8t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma -; CHECK-NEXT: vlseg8e32.v v7, (a0) +; CHECK-NEXT: vlseg8e32.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 8) @llvm.riscv.vlseg8.triscv.vector.tuple_nxv8i8_8t(target("riscv.vector.tuple", <vscale x 8 x i8>, 8) undef, ptr %base, i64 %vl, i64 5) - %1 = call <vscale x 2 x i32> @llvm.riscv.tuple.extract.nxv2i32.triscv.vector.tuple_nxv8i8_8t(target("riscv.vector.tuple", <vscale x 8 x i8>, 8) %0, i32 1) - ret <vscale x 2 x i32> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 8) %0 } - -define <vscale x 2 x i32> @test_vlseg8_mask_nxv2i32_triscv.vector.tuple_nxv8i8_8t(ptr %base, i64 %vl, <vscale x 2 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 8) @test_vlseg8_mask_nxv2i32_triscv.vector.tuple_nxv8i8_8t(ptr %base, i64 %vl, <vscale x 2 x i1> %mask) { ; CHECK-LABEL: test_vlseg8_mask_nxv2i32_triscv.vector.tuple_nxv8i8_8t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma -; CHECK-NEXT: vlseg8e32.v v7, (a0), v0.t +; CHECK-NEXT: vlseg8e32.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 8) @llvm.riscv.vlseg8.mask.triscv.vector.tuple_nxv8i8_8t.nxv2i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 8) undef, ptr %base, <vscale x 2 x i1> %mask, i64 %vl, i64 1, i64 5) - %1 = call <vscale x 2 x i32> @llvm.riscv.tuple.extract.nxv2i32.triscv.vector.tuple_nxv8i8_8t(target("riscv.vector.tuple", <vscale x 8 x i8>, 8) %0, i32 1) - ret <vscale x 2 x i32> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 8) %0 } - -declare target("riscv.vector.tuple", <vscale x 8 x i8>, 2) @llvm.riscv.vlseg2.mask.triscv.vector.tuple_nxv8i8_2t.nxv1i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 2), ptr, <vscale x 1 x i1>, i64, i64, i64) - -define <vscale x 1 x i64> @test_vlseg2_nxv1i64_triscv.vector.tuple_nxv8i8_2t(ptr %base, i64 %vl) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 2) @test_vlseg2_nxv1i64_triscv.vector.tuple_nxv8i8_2t(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg2_nxv1i64_triscv.vector.tuple_nxv8i8_2t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, ma -; CHECK-NEXT: vlseg2e64.v v7, (a0) +; CHECK-NEXT: vlseg2e64.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 2) @llvm.riscv.vlseg2.triscv.vector.tuple_nxv8i8_2t(target("riscv.vector.tuple", <vscale x 8 x i8>, 2) undef, ptr %base, i64 %vl, i64 6) - %1 = call <vscale x 1 x i64> @llvm.riscv.tuple.extract.nxv1i64.triscv.vector.tuple_nxv8i8_2t(target("riscv.vector.tuple", <vscale x 8 x i8>, 2) %0, i32 1) - ret <vscale x 1 x i64> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 2) %0 } - -define <vscale x 1 x i64> @test_vlseg2_mask_nxv1i64_triscv.vector.tuple_nxv8i8_2t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 2) @test_vlseg2_mask_nxv1i64_triscv.vector.tuple_nxv8i8_2t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) { ; CHECK-LABEL: test_vlseg2_mask_nxv1i64_triscv.vector.tuple_nxv8i8_2t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, ma -; CHECK-NEXT: vlseg2e64.v v7, (a0), v0.t +; CHECK-NEXT: vlseg2e64.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 2) @llvm.riscv.vlseg2.mask.triscv.vector.tuple_nxv8i8_2t.nxv1i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 2) undef, ptr %base, <vscale x 1 x i1> %mask, i64 %vl, i64 1, i64 6) - %1 = call <vscale x 1 x i64> @llvm.riscv.tuple.extract.nxv1i64.triscv.vector.tuple_nxv8i8_2t(target("riscv.vector.tuple", <vscale x 8 x i8>, 2) %0, i32 1) - ret <vscale x 1 x i64> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 2) %0 } - -declare target("riscv.vector.tuple", <vscale x 16 x i8>, 2) @llvm.riscv.vlseg2.mask.triscv.vector.tuple_nxv16i8_2t.nxv2i1(target("riscv.vector.tuple", <vscale x 16 x i8>, 2), ptr, <vscale x 2 x i1>, i64, i64, i64) - -define <vscale x 2 x i64> @test_vlseg2_nxv2i64_triscv.vector.tuple_nxv16i8_2t(ptr %base, i64 %vl) { +define target("riscv.vector.tuple", <vscale x 16 x i8>, 2) @test_vlseg2_nxv2i64_triscv.vector.tuple_nxv16i8_2t(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg2_nxv2i64_triscv.vector.tuple_nxv16i8_2t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, ma -; CHECK-NEXT: vlseg2e64.v v6, (a0) +; CHECK-NEXT: vlseg2e64.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 2) @llvm.riscv.vlseg2.triscv.vector.tuple_nxv16i8_2t(target("riscv.vector.tuple", <vscale x 16 x i8>, 2) undef, ptr %base, i64 %vl, i64 6) - %1 = call <vscale x 2 x i64> @llvm.riscv.tuple.extract.nxv2i64.triscv.vector.tuple_nxv16i8_2t(target("riscv.vector.tuple", <vscale x 16 x i8>, 2) %0, i32 1) - ret <vscale x 2 x i64> %1 + ret target("riscv.vector.tuple", <vscale x 16 x i8>, 2) %0 } - -define <vscale x 2 x i64> @test_vlseg2_mask_nxv2i64_triscv.vector.tuple_nxv16i8_2t(ptr %base, i64 %vl, <vscale x 2 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 16 x i8>, 2) @test_vlseg2_mask_nxv2i64_triscv.vector.tuple_nxv16i8_2t(ptr %base, i64 %vl, <vscale x 2 x i1> %mask) { ; CHECK-LABEL: test_vlseg2_mask_nxv2i64_triscv.vector.tuple_nxv16i8_2t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, ma -; CHECK-NEXT: vlseg2e64.v v6, (a0), v0.t +; CHECK-NEXT: vlseg2e64.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 2) @llvm.riscv.vlseg2.mask.triscv.vector.tuple_nxv16i8_2t.nxv2i1(target("riscv.vector.tuple", <vscale x 16 x i8>, 2) undef, ptr %base, <vscale x 2 x i1> %mask, i64 %vl, i64 1, i64 6) - %1 = call <vscale x 2 x i64> @llvm.riscv.tuple.extract.nxv2i64.triscv.vector.tuple_nxv16i8_2t(target("riscv.vector.tuple", <vscale x 16 x i8>, 2) %0, i32 1) - ret <vscale x 2 x i64> %1 + ret target("riscv.vector.tuple", <vscale x 16 x i8>, 2) %0 } - -declare target("riscv.vector.tuple", <vscale x 32 x i8>, 2) @llvm.riscv.vlseg2.mask.triscv.vector.tuple_nxv32i8_2t.nxv4i1(target("riscv.vector.tuple", <vscale x 32 x i8>, 2), ptr, <vscale x 4 x i1>, i64, i64, i64) - -define <vscale x 4 x i64> @test_vlseg2_nxv4i64_triscv.vector.tuple_nxv32i8_2t(ptr %base, i64 %vl) { +define target("riscv.vector.tuple", <vscale x 32 x i8>, 2) @test_vlseg2_nxv4i64_triscv.vector.tuple_nxv32i8_2t(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg2_nxv4i64_triscv.vector.tuple_nxv32i8_2t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e64, m4, ta, ma -; CHECK-NEXT: vlseg2e64.v v4, (a0) +; CHECK-NEXT: vlseg2e64.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 32 x i8>, 2) @llvm.riscv.vlseg2.triscv.vector.tuple_nxv32i8_2t(target("riscv.vector.tuple", <vscale x 32 x i8>, 2) undef, ptr %base, i64 %vl, i64 6) - %1 = call <vscale x 4 x i64> @llvm.riscv.tuple.extract.nxv4i64.triscv.vector.tuple_nxv32i8_2t(target("riscv.vector.tuple", <vscale x 32 x i8>, 2) %0, i32 1) - ret <vscale x 4 x i64> %1 + ret target("riscv.vector.tuple", <vscale x 32 x i8>, 2) %0 } - -define <vscale x 4 x i64> @test_vlseg2_mask_nxv4i64_triscv.vector.tuple_nxv32i8_2t(ptr %base, i64 %vl, <vscale x 4 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 32 x i8>, 2) @test_vlseg2_mask_nxv4i64_triscv.vector.tuple_nxv32i8_2t(ptr %base, i64 %vl, <vscale x 4 x i1> %mask) { ; CHECK-LABEL: test_vlseg2_mask_nxv4i64_triscv.vector.tuple_nxv32i8_2t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e64, m4, ta, ma -; CHECK-NEXT: vlseg2e64.v v4, (a0), v0.t +; CHECK-NEXT: vlseg2e64.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 32 x i8>, 2) @llvm.riscv.vlseg2.mask.triscv.vector.tuple_nxv32i8_2t.nxv4i1(target("riscv.vector.tuple", <vscale x 32 x i8>, 2) undef, ptr %base, <vscale x 4 x i1> %mask, i64 %vl, i64 1, i64 6) - %1 = call <vscale x 4 x i64> @llvm.riscv.tuple.extract.nxv4i64.triscv.vector.tuple_nxv32i8_2t(target("riscv.vector.tuple", <vscale x 32 x i8>, 2) %0, i32 1) - ret <vscale x 4 x i64> %1 + ret target("riscv.vector.tuple", <vscale x 32 x i8>, 2) %0 } - -declare target("riscv.vector.tuple", <vscale x 8 x i8>, 3) @llvm.riscv.vlseg3.mask.triscv.vector.tuple_nxv8i8_3t.nxv1i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 3), ptr, <vscale x 1 x i1>, i64, i64, i64) - -define <vscale x 1 x i64> @test_vlseg3_nxv1i64_triscv.vector.tuple_nxv8i8_3t(ptr %base, i64 %vl) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 3) @test_vlseg3_nxv1i64_triscv.vector.tuple_nxv8i8_3t(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg3_nxv1i64_triscv.vector.tuple_nxv8i8_3t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, ma -; CHECK-NEXT: vlseg3e64.v v7, (a0) +; CHECK-NEXT: vlseg3e64.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 3) @llvm.riscv.vlseg3.triscv.vector.tuple_nxv8i8_3t(target("riscv.vector.tuple", <vscale x 8 x i8>, 3) undef, ptr %base, i64 %vl, i64 6) - %1 = call <vscale x 1 x i64> @llvm.riscv.tuple.extract.nxv1i64.triscv.vector.tuple_nxv8i8_3t(target("riscv.vector.tuple", <vscale x 8 x i8>, 3) %0, i32 1) - ret <vscale x 1 x i64> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 3) %0 } - -define <vscale x 1 x i64> @test_vlseg3_mask_nxv1i64_triscv.vector.tuple_nxv8i8_3t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 3) @test_vlseg3_mask_nxv1i64_triscv.vector.tuple_nxv8i8_3t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) { ; CHECK-LABEL: test_vlseg3_mask_nxv1i64_triscv.vector.tuple_nxv8i8_3t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, ma -; CHECK-NEXT: vlseg3e64.v v7, (a0), v0.t +; CHECK-NEXT: vlseg3e64.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 3) @llvm.riscv.vlseg3.mask.triscv.vector.tuple_nxv8i8_3t.nxv1i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 3) undef, ptr %base, <vscale x 1 x i1> %mask, i64 %vl, i64 1, i64 6) - %1 = call <vscale x 1 x i64> @llvm.riscv.tuple.extract.nxv1i64.triscv.vector.tuple_nxv8i8_3t(target("riscv.vector.tuple", <vscale x 8 x i8>, 3) %0, i32 1) - ret <vscale x 1 x i64> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 3) %0 } - -declare target("riscv.vector.tuple", <vscale x 16 x i8>, 3) @llvm.riscv.vlseg3.mask.triscv.vector.tuple_nxv16i8_3t.nxv2i1(target("riscv.vector.tuple", <vscale x 16 x i8>, 3), ptr, <vscale x 2 x i1>, i64, i64, i64) - -define <vscale x 2 x i64> @test_vlseg3_nxv2i64_triscv.vector.tuple_nxv16i8_3t(ptr %base, i64 %vl) { +define target("riscv.vector.tuple", <vscale x 16 x i8>, 3) @test_vlseg3_nxv2i64_triscv.vector.tuple_nxv16i8_3t(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg3_nxv2i64_triscv.vector.tuple_nxv16i8_3t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, ma -; CHECK-NEXT: vlseg3e64.v v6, (a0) +; CHECK-NEXT: vlseg3e64.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 3) @llvm.riscv.vlseg3.triscv.vector.tuple_nxv16i8_3t(target("riscv.vector.tuple", <vscale x 16 x i8>, 3) undef, ptr %base, i64 %vl, i64 6) - %1 = call <vscale x 2 x i64> @llvm.riscv.tuple.extract.nxv2i64.triscv.vector.tuple_nxv16i8_3t(target("riscv.vector.tuple", <vscale x 16 x i8>, 3) %0, i32 1) - ret <vscale x 2 x i64> %1 + ret target("riscv.vector.tuple", <vscale x 16 x i8>, 3) %0 } - -define <vscale x 2 x i64> @test_vlseg3_mask_nxv2i64_triscv.vector.tuple_nxv16i8_3t(ptr %base, i64 %vl, <vscale x 2 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 16 x i8>, 3) @test_vlseg3_mask_nxv2i64_triscv.vector.tuple_nxv16i8_3t(ptr %base, i64 %vl, <vscale x 2 x i1> %mask) { ; CHECK-LABEL: test_vlseg3_mask_nxv2i64_triscv.vector.tuple_nxv16i8_3t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, ma -; CHECK-NEXT: vlseg3e64.v v6, (a0), v0.t +; CHECK-NEXT: vlseg3e64.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 3) @llvm.riscv.vlseg3.mask.triscv.vector.tuple_nxv16i8_3t.nxv2i1(target("riscv.vector.tuple", <vscale x 16 x i8>, 3) undef, ptr %base, <vscale x 2 x i1> %mask, i64 %vl, i64 1, i64 6) - %1 = call <vscale x 2 x i64> @llvm.riscv.tuple.extract.nxv2i64.triscv.vector.tuple_nxv16i8_3t(target("riscv.vector.tuple", <vscale x 16 x i8>, 3) %0, i32 1) - ret <vscale x 2 x i64> %1 + ret target("riscv.vector.tuple", <vscale x 16 x i8>, 3) %0 } - -declare target("riscv.vector.tuple", <vscale x 8 x i8>, 4) @llvm.riscv.vlseg4.mask.triscv.vector.tuple_nxv8i8_4t.nxv1i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 4), ptr, <vscale x 1 x i1>, i64, i64, i64) - -define <vscale x 1 x i64> @test_vlseg4_nxv1i64_triscv.vector.tuple_nxv8i8_4t(ptr %base, i64 %vl) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 4) @test_vlseg4_nxv1i64_triscv.vector.tuple_nxv8i8_4t(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg4_nxv1i64_triscv.vector.tuple_nxv8i8_4t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, ma -; CHECK-NEXT: vlseg4e64.v v7, (a0) +; CHECK-NEXT: vlseg4e64.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 4) @llvm.riscv.vlseg4.triscv.vector.tuple_nxv8i8_4t(target("riscv.vector.tuple", <vscale x 8 x i8>, 4) undef, ptr %base, i64 %vl, i64 6) - %1 = call <vscale x 1 x i64> @llvm.riscv.tuple.extract.nxv1i64.triscv.vector.tuple_nxv8i8_4t(target("riscv.vector.tuple", <vscale x 8 x i8>, 4) %0, i32 1) - ret <vscale x 1 x i64> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 4) %0 } - -define <vscale x 1 x i64> @test_vlseg4_mask_nxv1i64_triscv.vector.tuple_nxv8i8_4t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 4) @test_vlseg4_mask_nxv1i64_triscv.vector.tuple_nxv8i8_4t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) { ; CHECK-LABEL: test_vlseg4_mask_nxv1i64_triscv.vector.tuple_nxv8i8_4t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, ma -; CHECK-NEXT: vlseg4e64.v v7, (a0), v0.t +; CHECK-NEXT: vlseg4e64.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 4) @llvm.riscv.vlseg4.mask.triscv.vector.tuple_nxv8i8_4t.nxv1i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 4) undef, ptr %base, <vscale x 1 x i1> %mask, i64 %vl, i64 1, i64 6) - %1 = call <vscale x 1 x i64> @llvm.riscv.tuple.extract.nxv1i64.triscv.vector.tuple_nxv8i8_4t(target("riscv.vector.tuple", <vscale x 8 x i8>, 4) %0, i32 1) - ret <vscale x 1 x i64> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 4) %0 } - -declare target("riscv.vector.tuple", <vscale x 16 x i8>, 4) @llvm.riscv.vlseg4.mask.triscv.vector.tuple_nxv16i8_4t.nxv2i1(target("riscv.vector.tuple", <vscale x 16 x i8>, 4), ptr, <vscale x 2 x i1>, i64, i64, i64) - -define <vscale x 2 x i64> @test_vlseg4_nxv2i64_triscv.vector.tuple_nxv16i8_4t(ptr %base, i64 %vl) { +define target("riscv.vector.tuple", <vscale x 16 x i8>, 4) @test_vlseg4_nxv2i64_triscv.vector.tuple_nxv16i8_4t(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg4_nxv2i64_triscv.vector.tuple_nxv16i8_4t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, ma -; CHECK-NEXT: vlseg4e64.v v6, (a0) +; CHECK-NEXT: vlseg4e64.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 4) @llvm.riscv.vlseg4.triscv.vector.tuple_nxv16i8_4t(target("riscv.vector.tuple", <vscale x 16 x i8>, 4) undef, ptr %base, i64 %vl, i64 6) - %1 = call <vscale x 2 x i64> @llvm.riscv.tuple.extract.nxv2i64.triscv.vector.tuple_nxv16i8_4t(target("riscv.vector.tuple", <vscale x 16 x i8>, 4) %0, i32 1) - ret <vscale x 2 x i64> %1 + ret target("riscv.vector.tuple", <vscale x 16 x i8>, 4) %0 } - -define <vscale x 2 x i64> @test_vlseg4_mask_nxv2i64_triscv.vector.tuple_nxv16i8_4t(ptr %base, i64 %vl, <vscale x 2 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 16 x i8>, 4) @test_vlseg4_mask_nxv2i64_triscv.vector.tuple_nxv16i8_4t(ptr %base, i64 %vl, <vscale x 2 x i1> %mask) { ; CHECK-LABEL: test_vlseg4_mask_nxv2i64_triscv.vector.tuple_nxv16i8_4t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, ma -; CHECK-NEXT: vlseg4e64.v v6, (a0), v0.t +; CHECK-NEXT: vlseg4e64.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 4) @llvm.riscv.vlseg4.mask.triscv.vector.tuple_nxv16i8_4t.nxv2i1(target("riscv.vector.tuple", <vscale x 16 x i8>, 4) undef, ptr %base, <vscale x 2 x i1> %mask, i64 %vl, i64 1, i64 6) - %1 = call <vscale x 2 x i64> @llvm.riscv.tuple.extract.nxv2i64.triscv.vector.tuple_nxv16i8_4t(target("riscv.vector.tuple", <vscale x 16 x i8>, 4) %0, i32 1) - ret <vscale x 2 x i64> %1 + ret target("riscv.vector.tuple", <vscale x 16 x i8>, 4) %0 } - -declare target("riscv.vector.tuple", <vscale x 8 x i8>, 5) @llvm.riscv.vlseg5.mask.triscv.vector.tuple_nxv8i8_5t.nxv1i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 5), ptr, <vscale x 1 x i1>, i64, i64, i64) - -define <vscale x 1 x i64> @test_vlseg5_nxv1i64_triscv.vector.tuple_nxv8i8_5t(ptr %base, i64 %vl) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 5) @test_vlseg5_nxv1i64_triscv.vector.tuple_nxv8i8_5t(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg5_nxv1i64_triscv.vector.tuple_nxv8i8_5t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, ma -; CHECK-NEXT: vlseg5e64.v v7, (a0) +; CHECK-NEXT: vlseg5e64.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 5) @llvm.riscv.vlseg5.triscv.vector.tuple_nxv8i8_5t(target("riscv.vector.tuple", <vscale x 8 x i8>, 5) undef, ptr %base, i64 %vl, i64 6) - %1 = call <vscale x 1 x i64> @llvm.riscv.tuple.extract.nxv1i64.triscv.vector.tuple_nxv8i8_5t(target("riscv.vector.tuple", <vscale x 8 x i8>, 5) %0, i32 1) - ret <vscale x 1 x i64> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 5) %0 } - -define <vscale x 1 x i64> @test_vlseg5_mask_nxv1i64_triscv.vector.tuple_nxv8i8_5t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 5) @test_vlseg5_mask_nxv1i64_triscv.vector.tuple_nxv8i8_5t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) { ; CHECK-LABEL: test_vlseg5_mask_nxv1i64_triscv.vector.tuple_nxv8i8_5t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, ma -; CHECK-NEXT: vlseg5e64.v v7, (a0), v0.t +; CHECK-NEXT: vlseg5e64.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 5) @llvm.riscv.vlseg5.mask.triscv.vector.tuple_nxv8i8_5t.nxv1i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 5) undef, ptr %base, <vscale x 1 x i1> %mask, i64 %vl, i64 1, i64 6) - %1 = call <vscale x 1 x i64> @llvm.riscv.tuple.extract.nxv1i64.triscv.vector.tuple_nxv8i8_5t(target("riscv.vector.tuple", <vscale x 8 x i8>, 5) %0, i32 1) - ret <vscale x 1 x i64> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 5) %0 } - -declare target("riscv.vector.tuple", <vscale x 8 x i8>, 6) @llvm.riscv.vlseg6.mask.triscv.vector.tuple_nxv8i8_6t.nxv1i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 6), ptr, <vscale x 1 x i1>, i64, i64, i64) - -define <vscale x 1 x i64> @test_vlseg6_nxv1i64_triscv.vector.tuple_nxv8i8_6t(ptr %base, i64 %vl) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 6) @test_vlseg6_nxv1i64_triscv.vector.tuple_nxv8i8_6t(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg6_nxv1i64_triscv.vector.tuple_nxv8i8_6t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, ma -; CHECK-NEXT: vlseg6e64.v v7, (a0) +; CHECK-NEXT: vlseg6e64.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 6) @llvm.riscv.vlseg6.triscv.vector.tuple_nxv8i8_6t(target("riscv.vector.tuple", <vscale x 8 x i8>, 6) undef, ptr %base, i64 %vl, i64 6) - %1 = call <vscale x 1 x i64> @llvm.riscv.tuple.extract.nxv1i64.triscv.vector.tuple_nxv8i8_6t(target("riscv.vector.tuple", <vscale x 8 x i8>, 6) %0, i32 1) - ret <vscale x 1 x i64> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 6) %0 } - -define <vscale x 1 x i64> @test_vlseg6_mask_nxv1i64_triscv.vector.tuple_nxv8i8_6t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 6) @test_vlseg6_mask_nxv1i64_triscv.vector.tuple_nxv8i8_6t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) { ; CHECK-LABEL: test_vlseg6_mask_nxv1i64_triscv.vector.tuple_nxv8i8_6t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, ma -; CHECK-NEXT: vlseg6e64.v v7, (a0), v0.t +; CHECK-NEXT: vlseg6e64.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 6) @llvm.riscv.vlseg6.mask.triscv.vector.tuple_nxv8i8_6t.nxv1i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 6) undef, ptr %base, <vscale x 1 x i1> %mask, i64 %vl, i64 1, i64 6) - %1 = call <vscale x 1 x i64> @llvm.riscv.tuple.extract.nxv1i64.triscv.vector.tuple_nxv8i8_6t(target("riscv.vector.tuple", <vscale x 8 x i8>, 6) %0, i32 1) - ret <vscale x 1 x i64> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 6) %0 } - -declare target("riscv.vector.tuple", <vscale x 8 x i8>, 7) @llvm.riscv.vlseg7.mask.triscv.vector.tuple_nxv8i8_7t.nxv1i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 7), ptr, <vscale x 1 x i1>, i64, i64, i64) - -define <vscale x 1 x i64> @test_vlseg7_nxv1i64_triscv.vector.tuple_nxv8i8_7t(ptr %base, i64 %vl) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 7) @test_vlseg7_nxv1i64_triscv.vector.tuple_nxv8i8_7t(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg7_nxv1i64_triscv.vector.tuple_nxv8i8_7t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, ma -; CHECK-NEXT: vlseg7e64.v v7, (a0) +; CHECK-NEXT: vlseg7e64.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 7) @llvm.riscv.vlseg7.triscv.vector.tuple_nxv8i8_7t(target("riscv.vector.tuple", <vscale x 8 x i8>, 7) undef, ptr %base, i64 %vl, i64 6) - %1 = call <vscale x 1 x i64> @llvm.riscv.tuple.extract.nxv1i64.triscv.vector.tuple_nxv8i8_7t(target("riscv.vector.tuple", <vscale x 8 x i8>, 7) %0, i32 1) - ret <vscale x 1 x i64> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 7) %0 } - -define <vscale x 1 x i64> @test_vlseg7_mask_nxv1i64_triscv.vector.tuple_nxv8i8_7t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 7) @test_vlseg7_mask_nxv1i64_triscv.vector.tuple_nxv8i8_7t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) { ; CHECK-LABEL: test_vlseg7_mask_nxv1i64_triscv.vector.tuple_nxv8i8_7t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, ma -; CHECK-NEXT: vlseg7e64.v v7, (a0), v0.t +; CHECK-NEXT: vlseg7e64.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 7) @llvm.riscv.vlseg7.mask.triscv.vector.tuple_nxv8i8_7t.nxv1i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 7) undef, ptr %base, <vscale x 1 x i1> %mask, i64 %vl, i64 1, i64 6) - %1 = call <vscale x 1 x i64> @llvm.riscv.tuple.extract.nxv1i64.triscv.vector.tuple_nxv8i8_7t(target("riscv.vector.tuple", <vscale x 8 x i8>, 7) %0, i32 1) - ret <vscale x 1 x i64> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 7) %0 } - -declare target("riscv.vector.tuple", <vscale x 8 x i8>, 8) @llvm.riscv.vlseg8.mask.triscv.vector.tuple_nxv8i8_8t.nxv1i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 8), ptr, <vscale x 1 x i1>, i64, i64, i64) - -define <vscale x 1 x i64> @test_vlseg8_nxv1i64_triscv.vector.tuple_nxv8i8_8t(ptr %base, i64 %vl) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 8) @test_vlseg8_nxv1i64_triscv.vector.tuple_nxv8i8_8t(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg8_nxv1i64_triscv.vector.tuple_nxv8i8_8t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, ma -; CHECK-NEXT: vlseg8e64.v v7, (a0) +; CHECK-NEXT: vlseg8e64.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 8) @llvm.riscv.vlseg8.triscv.vector.tuple_nxv8i8_8t(target("riscv.vector.tuple", <vscale x 8 x i8>, 8) undef, ptr %base, i64 %vl, i64 6) - %1 = call <vscale x 1 x i64> @llvm.riscv.tuple.extract.nxv1i64.triscv.vector.tuple_nxv8i8_8t(target("riscv.vector.tuple", <vscale x 8 x i8>, 8) %0, i32 1) - ret <vscale x 1 x i64> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 8) %0 } - -define <vscale x 1 x i64> @test_vlseg8_mask_nxv1i64_triscv.vector.tuple_nxv8i8_8t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 8) @test_vlseg8_mask_nxv1i64_triscv.vector.tuple_nxv8i8_8t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) { ; CHECK-LABEL: test_vlseg8_mask_nxv1i64_triscv.vector.tuple_nxv8i8_8t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, ma -; CHECK-NEXT: vlseg8e64.v v7, (a0), v0.t +; CHECK-NEXT: vlseg8e64.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 8) @llvm.riscv.vlseg8.mask.triscv.vector.tuple_nxv8i8_8t.nxv1i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 8) undef, ptr %base, <vscale x 1 x i1> %mask, i64 %vl, i64 1, i64 6) - %1 = call <vscale x 1 x i64> @llvm.riscv.tuple.extract.nxv1i64.triscv.vector.tuple_nxv8i8_8t(target("riscv.vector.tuple", <vscale x 8 x i8>, 8) %0, i32 1) - ret <vscale x 1 x i64> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 8) %0 } - - -define <vscale x 1 x half> @test_vlseg2_nxv1f16_triscv.vector.tuple_nxv2i8_2t(ptr %base, i64 %vl) { +define target("riscv.vector.tuple", <vscale x 2 x i8>, 2) @test_vlseg2_nxv1f16_triscv.vector.tuple_nxv2i8_2t(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg2_nxv1f16_triscv.vector.tuple_nxv2i8_2t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma -; CHECK-NEXT: vlseg2e16.v v7, (a0) +; CHECK-NEXT: vlseg2e16.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 2 x i8>, 2) @llvm.riscv.vlseg2.triscv.vector.tuple_nxv2i8_2t(target("riscv.vector.tuple", <vscale x 2 x i8>, 2) undef, ptr %base, i64 %vl, i64 4) - %1 = call <vscale x 1 x half> @llvm.riscv.tuple.extract.nxv1f16.triscv.vector.tuple_nxv2i8_2t(target("riscv.vector.tuple", <vscale x 2 x i8>, 2) %0, i32 1) - ret <vscale x 1 x half> %1 + ret target("riscv.vector.tuple", <vscale x 2 x i8>, 2) %0 } - -define <vscale x 1 x half> @test_vlseg2_mask_nxv1f16_triscv.vector.tuple_nxv2i8_2t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 2 x i8>, 2) @test_vlseg2_mask_nxv1f16_triscv.vector.tuple_nxv2i8_2t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) { ; CHECK-LABEL: test_vlseg2_mask_nxv1f16_triscv.vector.tuple_nxv2i8_2t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma -; CHECK-NEXT: vlseg2e16.v v7, (a0), v0.t +; CHECK-NEXT: vlseg2e16.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 2 x i8>, 2) @llvm.riscv.vlseg2.mask.triscv.vector.tuple_nxv2i8_2t.nxv1i1(target("riscv.vector.tuple", <vscale x 2 x i8>, 2) undef, ptr %base, <vscale x 1 x i1> %mask, i64 %vl, i64 1, i64 4) - %1 = call <vscale x 1 x half> @llvm.riscv.tuple.extract.nxv1f16.triscv.vector.tuple_nxv2i8_2t(target("riscv.vector.tuple", <vscale x 2 x i8>, 2) %0, i32 1) - ret <vscale x 1 x half> %1 + ret target("riscv.vector.tuple", <vscale x 2 x i8>, 2) %0 } - - -define <vscale x 2 x half> @test_vlseg2_nxv2f16_triscv.vector.tuple_nxv4i8_2t(ptr %base, i64 %vl) { +define target("riscv.vector.tuple", <vscale x 4 x i8>, 2) @test_vlseg2_nxv2f16_triscv.vector.tuple_nxv4i8_2t(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg2_nxv2f16_triscv.vector.tuple_nxv4i8_2t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma -; CHECK-NEXT: vlseg2e16.v v7, (a0) +; CHECK-NEXT: vlseg2e16.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 2) @llvm.riscv.vlseg2.triscv.vector.tuple_nxv4i8_2t(target("riscv.vector.tuple", <vscale x 4 x i8>, 2) undef, ptr %base, i64 %vl, i64 4) - %1 = call <vscale x 2 x half> @llvm.riscv.tuple.extract.nxv2f16.triscv.vector.tuple_nxv4i8_2t(target("riscv.vector.tuple", <vscale x 4 x i8>, 2) %0, i32 1) - ret <vscale x 2 x half> %1 + ret target("riscv.vector.tuple", <vscale x 4 x i8>, 2) %0 } - -define <vscale x 2 x half> @test_vlseg2_mask_nxv2f16_triscv.vector.tuple_nxv4i8_2t(ptr %base, i64 %vl, <vscale x 2 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 4 x i8>, 2) @test_vlseg2_mask_nxv2f16_triscv.vector.tuple_nxv4i8_2t(ptr %base, i64 %vl, <vscale x 2 x i1> %mask) { ; CHECK-LABEL: test_vlseg2_mask_nxv2f16_triscv.vector.tuple_nxv4i8_2t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma -; CHECK-NEXT: vlseg2e16.v v7, (a0), v0.t +; CHECK-NEXT: vlseg2e16.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 2) @llvm.riscv.vlseg2.mask.triscv.vector.tuple_nxv4i8_2t.nxv2i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 2) undef, ptr %base, <vscale x 2 x i1> %mask, i64 %vl, i64 1, i64 4) - %1 = call <vscale x 2 x half> @llvm.riscv.tuple.extract.nxv2f16.triscv.vector.tuple_nxv4i8_2t(target("riscv.vector.tuple", <vscale x 4 x i8>, 2) %0, i32 1) - ret <vscale x 2 x half> %1 + ret target("riscv.vector.tuple", <vscale x 4 x i8>, 2) %0 } - - -define <vscale x 4 x half> @test_vlseg2_nxv4f16_triscv.vector.tuple_nxv8i8_2t(ptr %base, i64 %vl) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 2) @test_vlseg2_nxv4f16_triscv.vector.tuple_nxv8i8_2t(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg2_nxv4f16_triscv.vector.tuple_nxv8i8_2t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma -; CHECK-NEXT: vlseg2e16.v v7, (a0) +; CHECK-NEXT: vlseg2e16.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 2) @llvm.riscv.vlseg2.triscv.vector.tuple_nxv8i8_2t(target("riscv.vector.tuple", <vscale x 8 x i8>, 2) undef, ptr %base, i64 %vl, i64 4) - %1 = call <vscale x 4 x half> @llvm.riscv.tuple.extract.nxv4f16.triscv.vector.tuple_nxv8i8_2t(target("riscv.vector.tuple", <vscale x 8 x i8>, 2) %0, i32 1) - ret <vscale x 4 x half> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 2) %0 } - -define <vscale x 4 x half> @test_vlseg2_mask_nxv4f16_triscv.vector.tuple_nxv8i8_2t(ptr %base, i64 %vl, <vscale x 4 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 2) @test_vlseg2_mask_nxv4f16_triscv.vector.tuple_nxv8i8_2t(ptr %base, i64 %vl, <vscale x 4 x i1> %mask) { ; CHECK-LABEL: test_vlseg2_mask_nxv4f16_triscv.vector.tuple_nxv8i8_2t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma -; CHECK-NEXT: vlseg2e16.v v7, (a0), v0.t +; CHECK-NEXT: vlseg2e16.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 2) @llvm.riscv.vlseg2.mask.triscv.vector.tuple_nxv8i8_2t.nxv4i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 2) undef, ptr %base, <vscale x 4 x i1> %mask, i64 %vl, i64 1, i64 4) - %1 = call <vscale x 4 x half> @llvm.riscv.tuple.extract.nxv4f16.triscv.vector.tuple_nxv8i8_2t(target("riscv.vector.tuple", <vscale x 8 x i8>, 2) %0, i32 1) - ret <vscale x 4 x half> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 2) %0 } - - -define <vscale x 8 x half> @test_vlseg2_nxv8f16_triscv.vector.tuple_nxv16i8_2t(ptr %base, i64 %vl) { +define target("riscv.vector.tuple", <vscale x 16 x i8>, 2) @test_vlseg2_nxv8f16_triscv.vector.tuple_nxv16i8_2t(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg2_nxv8f16_triscv.vector.tuple_nxv16i8_2t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, ma -; CHECK-NEXT: vlseg2e16.v v6, (a0) +; CHECK-NEXT: vlseg2e16.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 2) @llvm.riscv.vlseg2.triscv.vector.tuple_nxv16i8_2t(target("riscv.vector.tuple", <vscale x 16 x i8>, 2) undef, ptr %base, i64 %vl, i64 4) - %1 = call <vscale x 8 x half> @llvm.riscv.tuple.extract.nxv8f16.triscv.vector.tuple_nxv16i8_2t(target("riscv.vector.tuple", <vscale x 16 x i8>, 2) %0, i32 1) - ret <vscale x 8 x half> %1 + ret target("riscv.vector.tuple", <vscale x 16 x i8>, 2) %0 } - -define <vscale x 8 x half> @test_vlseg2_mask_nxv8f16_triscv.vector.tuple_nxv16i8_2t(ptr %base, i64 %vl, <vscale x 8 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 16 x i8>, 2) @test_vlseg2_mask_nxv8f16_triscv.vector.tuple_nxv16i8_2t(ptr %base, i64 %vl, <vscale x 8 x i1> %mask) { ; CHECK-LABEL: test_vlseg2_mask_nxv8f16_triscv.vector.tuple_nxv16i8_2t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, ma -; CHECK-NEXT: vlseg2e16.v v6, (a0), v0.t +; CHECK-NEXT: vlseg2e16.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 2) @llvm.riscv.vlseg2.mask.triscv.vector.tuple_nxv16i8_2t.nxv8i1(target("riscv.vector.tuple", <vscale x 16 x i8>, 2) undef, ptr %base, <vscale x 8 x i1> %mask, i64 %vl, i64 1, i64 4) - %1 = call <vscale x 8 x half> @llvm.riscv.tuple.extract.nxv8f16.triscv.vector.tuple_nxv16i8_2t(target("riscv.vector.tuple", <vscale x 16 x i8>, 2) %0, i32 1) - ret <vscale x 8 x half> %1 + ret target("riscv.vector.tuple", <vscale x 16 x i8>, 2) %0 } - - -define <vscale x 16 x half> @test_vlseg2_nxv16f16_triscv.vector.tuple_nxv32i8_2t(ptr %base, i64 %vl) { +define target("riscv.vector.tuple", <vscale x 32 x i8>, 2) @test_vlseg2_nxv16f16_triscv.vector.tuple_nxv32i8_2t(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg2_nxv16f16_triscv.vector.tuple_nxv32i8_2t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma -; CHECK-NEXT: vlseg2e16.v v4, (a0) +; CHECK-NEXT: vlseg2e16.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 32 x i8>, 2) @llvm.riscv.vlseg2.triscv.vector.tuple_nxv32i8_2t(target("riscv.vector.tuple", <vscale x 32 x i8>, 2) undef, ptr %base, i64 %vl, i64 4) - %1 = call <vscale x 16 x half> @llvm.riscv.tuple.extract.nxv16f16.triscv.vector.tuple_nxv32i8_2t(target("riscv.vector.tuple", <vscale x 32 x i8>, 2) %0, i32 1) - ret <vscale x 16 x half> %1 + ret target("riscv.vector.tuple", <vscale x 32 x i8>, 2) %0 } - -define <vscale x 16 x half> @test_vlseg2_mask_nxv16f16_triscv.vector.tuple_nxv32i8_2t(ptr %base, i64 %vl, <vscale x 16 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 32 x i8>, 2) @test_vlseg2_mask_nxv16f16_triscv.vector.tuple_nxv32i8_2t(ptr %base, i64 %vl, <vscale x 16 x i1> %mask) { ; CHECK-LABEL: test_vlseg2_mask_nxv16f16_triscv.vector.tuple_nxv32i8_2t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma -; CHECK-NEXT: vlseg2e16.v v4, (a0), v0.t +; CHECK-NEXT: vlseg2e16.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 32 x i8>, 2) @llvm.riscv.vlseg2.mask.triscv.vector.tuple_nxv32i8_2t.nxv16i1(target("riscv.vector.tuple", <vscale x 32 x i8>, 2) undef, ptr %base, <vscale x 16 x i1> %mask, i64 %vl, i64 1, i64 4) - %1 = call <vscale x 16 x half> @llvm.riscv.tuple.extract.nxv16f16.triscv.vector.tuple_nxv32i8_2t(target("riscv.vector.tuple", <vscale x 32 x i8>, 2) %0, i32 1) - ret <vscale x 16 x half> %1 + ret target("riscv.vector.tuple", <vscale x 32 x i8>, 2) %0 } - - -define <vscale x 1 x half> @test_vlseg3_nxv1f16_triscv.vector.tuple_nxv2i8_3t(ptr %base, i64 %vl) { +define target("riscv.vector.tuple", <vscale x 2 x i8>, 3) @test_vlseg3_nxv1f16_triscv.vector.tuple_nxv2i8_3t(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg3_nxv1f16_triscv.vector.tuple_nxv2i8_3t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma -; CHECK-NEXT: vlseg3e16.v v7, (a0) +; CHECK-NEXT: vlseg3e16.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 2 x i8>, 3) @llvm.riscv.vlseg3.triscv.vector.tuple_nxv2i8_3t(target("riscv.vector.tuple", <vscale x 2 x i8>, 3) undef, ptr %base, i64 %vl, i64 4) - %1 = call <vscale x 1 x half> @llvm.riscv.tuple.extract.nxv1f16.triscv.vector.tuple_nxv2i8_3t(target("riscv.vector.tuple", <vscale x 2 x i8>, 3) %0, i32 1) - ret <vscale x 1 x half> %1 + ret target("riscv.vector.tuple", <vscale x 2 x i8>, 3) %0 } - -define <vscale x 1 x half> @test_vlseg3_mask_nxv1f16_triscv.vector.tuple_nxv2i8_3t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 2 x i8>, 3) @test_vlseg3_mask_nxv1f16_triscv.vector.tuple_nxv2i8_3t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) { ; CHECK-LABEL: test_vlseg3_mask_nxv1f16_triscv.vector.tuple_nxv2i8_3t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma -; CHECK-NEXT: vlseg3e16.v v7, (a0), v0.t +; CHECK-NEXT: vlseg3e16.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 2 x i8>, 3) @llvm.riscv.vlseg3.mask.triscv.vector.tuple_nxv2i8_3t.nxv1i1(target("riscv.vector.tuple", <vscale x 2 x i8>, 3) undef, ptr %base, <vscale x 1 x i1> %mask, i64 %vl, i64 1, i64 4) - %1 = call <vscale x 1 x half> @llvm.riscv.tuple.extract.nxv1f16.triscv.vector.tuple_nxv2i8_3t(target("riscv.vector.tuple", <vscale x 2 x i8>, 3) %0, i32 1) - ret <vscale x 1 x half> %1 + ret target("riscv.vector.tuple", <vscale x 2 x i8>, 3) %0 } - - -define <vscale x 2 x half> @test_vlseg3_nxv2f16_triscv.vector.tuple_nxv4i8_3t(ptr %base, i64 %vl) { +define target("riscv.vector.tuple", <vscale x 4 x i8>, 3) @test_vlseg3_nxv2f16_triscv.vector.tuple_nxv4i8_3t(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg3_nxv2f16_triscv.vector.tuple_nxv4i8_3t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma -; CHECK-NEXT: vlseg3e16.v v7, (a0) +; CHECK-NEXT: vlseg3e16.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 3) @llvm.riscv.vlseg3.triscv.vector.tuple_nxv4i8_3t(target("riscv.vector.tuple", <vscale x 4 x i8>, 3) undef, ptr %base, i64 %vl, i64 4) - %1 = call <vscale x 2 x half> @llvm.riscv.tuple.extract.nxv2f16.triscv.vector.tuple_nxv4i8_3t(target("riscv.vector.tuple", <vscale x 4 x i8>, 3) %0, i32 1) - ret <vscale x 2 x half> %1 + ret target("riscv.vector.tuple", <vscale x 4 x i8>, 3) %0 } - -define <vscale x 2 x half> @test_vlseg3_mask_nxv2f16_triscv.vector.tuple_nxv4i8_3t(ptr %base, i64 %vl, <vscale x 2 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 4 x i8>, 3) @test_vlseg3_mask_nxv2f16_triscv.vector.tuple_nxv4i8_3t(ptr %base, i64 %vl, <vscale x 2 x i1> %mask) { ; CHECK-LABEL: test_vlseg3_mask_nxv2f16_triscv.vector.tuple_nxv4i8_3t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma -; CHECK-NEXT: vlseg3e16.v v7, (a0), v0.t +; CHECK-NEXT: vlseg3e16.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 3) @llvm.riscv.vlseg3.mask.triscv.vector.tuple_nxv4i8_3t.nxv2i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 3) undef, ptr %base, <vscale x 2 x i1> %mask, i64 %vl, i64 1, i64 4) - %1 = call <vscale x 2 x half> @llvm.riscv.tuple.extract.nxv2f16.triscv.vector.tuple_nxv4i8_3t(target("riscv.vector.tuple", <vscale x 4 x i8>, 3) %0, i32 1) - ret <vscale x 2 x half> %1 + ret target("riscv.vector.tuple", <vscale x 4 x i8>, 3) %0 } - - -define <vscale x 4 x half> @test_vlseg3_nxv4f16_triscv.vector.tuple_nxv8i8_3t(ptr %base, i64 %vl) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 3) @test_vlseg3_nxv4f16_triscv.vector.tuple_nxv8i8_3t(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg3_nxv4f16_triscv.vector.tuple_nxv8i8_3t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma -; CHECK-NEXT: vlseg3e16.v v7, (a0) +; CHECK-NEXT: vlseg3e16.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 3) @llvm.riscv.vlseg3.triscv.vector.tuple_nxv8i8_3t(target("riscv.vector.tuple", <vscale x 8 x i8>, 3) undef, ptr %base, i64 %vl, i64 4) - %1 = call <vscale x 4 x half> @llvm.riscv.tuple.extract.nxv4f16.triscv.vector.tuple_nxv8i8_3t(target("riscv.vector.tuple", <vscale x 8 x i8>, 3) %0, i32 1) - ret <vscale x 4 x half> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 3) %0 } - -define <vscale x 4 x half> @test_vlseg3_mask_nxv4f16_triscv.vector.tuple_nxv8i8_3t(ptr %base, i64 %vl, <vscale x 4 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 3) @test_vlseg3_mask_nxv4f16_triscv.vector.tuple_nxv8i8_3t(ptr %base, i64 %vl, <vscale x 4 x i1> %mask) { ; CHECK-LABEL: test_vlseg3_mask_nxv4f16_triscv.vector.tuple_nxv8i8_3t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma -; CHECK-NEXT: vlseg3e16.v v7, (a0), v0.t +; CHECK-NEXT: vlseg3e16.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 3) @llvm.riscv.vlseg3.mask.triscv.vector.tuple_nxv8i8_3t.nxv4i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 3) undef, ptr %base, <vscale x 4 x i1> %mask, i64 %vl, i64 1, i64 4) - %1 = call <vscale x 4 x half> @llvm.riscv.tuple.extract.nxv4f16.triscv.vector.tuple_nxv8i8_3t(target("riscv.vector.tuple", <vscale x 8 x i8>, 3) %0, i32 1) - ret <vscale x 4 x half> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 3) %0 } - - -define <vscale x 8 x half> @test_vlseg3_nxv8f16_triscv.vector.tuple_nxv16i8_3t(ptr %base, i64 %vl) { +define target("riscv.vector.tuple", <vscale x 16 x i8>, 3) @test_vlseg3_nxv8f16_triscv.vector.tuple_nxv16i8_3t(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg3_nxv8f16_triscv.vector.tuple_nxv16i8_3t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, ma -; CHECK-NEXT: vlseg3e16.v v6, (a0) +; CHECK-NEXT: vlseg3e16.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 3) @llvm.riscv.vlseg3.triscv.vector.tuple_nxv16i8_3t(target("riscv.vector.tuple", <vscale x 16 x i8>, 3) undef, ptr %base, i64 %vl, i64 4) - %1 = call <vscale x 8 x half> @llvm.riscv.tuple.extract.nxv8f16.triscv.vector.tuple_nxv16i8_3t(target("riscv.vector.tuple", <vscale x 16 x i8>, 3) %0, i32 1) - ret <vscale x 8 x half> %1 + ret target("riscv.vector.tuple", <vscale x 16 x i8>, 3) %0 } - -define <vscale x 8 x half> @test_vlseg3_mask_nxv8f16_triscv.vector.tuple_nxv16i8_3t(ptr %base, i64 %vl, <vscale x 8 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 16 x i8>, 3) @test_vlseg3_mask_nxv8f16_triscv.vector.tuple_nxv16i8_3t(ptr %base, i64 %vl, <vscale x 8 x i1> %mask) { ; CHECK-LABEL: test_vlseg3_mask_nxv8f16_triscv.vector.tuple_nxv16i8_3t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, ma -; CHECK-NEXT: vlseg3e16.v v6, (a0), v0.t +; CHECK-NEXT: vlseg3e16.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 3) @llvm.riscv.vlseg3.mask.triscv.vector.tuple_nxv16i8_3t.nxv8i1(target("riscv.vector.tuple", <vscale x 16 x i8>, 3) undef, ptr %base, <vscale x 8 x i1> %mask, i64 %vl, i64 1, i64 4) - %1 = call <vscale x 8 x half> @llvm.riscv.tuple.extract.nxv8f16.triscv.vector.tuple_nxv16i8_3t(target("riscv.vector.tuple", <vscale x 16 x i8>, 3) %0, i32 1) - ret <vscale x 8 x half> %1 + ret target("riscv.vector.tuple", <vscale x 16 x i8>, 3) %0 } - - -define <vscale x 1 x half> @test_vlseg4_nxv1f16_triscv.vector.tuple_nxv2i8_4t(ptr %base, i64 %vl) { +define target("riscv.vector.tuple", <vscale x 2 x i8>, 4) @test_vlseg4_nxv1f16_triscv.vector.tuple_nxv2i8_4t(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg4_nxv1f16_triscv.vector.tuple_nxv2i8_4t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma -; CHECK-NEXT: vlseg4e16.v v7, (a0) +; CHECK-NEXT: vlseg4e16.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 2 x i8>, 4) @llvm.riscv.vlseg4.triscv.vector.tuple_nxv2i8_4t(target("riscv.vector.tuple", <vscale x 2 x i8>, 4) undef, ptr %base, i64 %vl, i64 4) - %1 = call <vscale x 1 x half> @llvm.riscv.tuple.extract.nxv1f16.triscv.vector.tuple_nxv2i8_4t(target("riscv.vector.tuple", <vscale x 2 x i8>, 4) %0, i32 1) - ret <vscale x 1 x half> %1 + ret target("riscv.vector.tuple", <vscale x 2 x i8>, 4) %0 } - -define <vscale x 1 x half> @test_vlseg4_mask_nxv1f16_triscv.vector.tuple_nxv2i8_4t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 2 x i8>, 4) @test_vlseg4_mask_nxv1f16_triscv.vector.tuple_nxv2i8_4t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) { ; CHECK-LABEL: test_vlseg4_mask_nxv1f16_triscv.vector.tuple_nxv2i8_4t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma -; CHECK-NEXT: vlseg4e16.v v7, (a0), v0.t +; CHECK-NEXT: vlseg4e16.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 2 x i8>, 4) @llvm.riscv.vlseg4.mask.triscv.vector.tuple_nxv2i8_4t.nxv1i1(target("riscv.vector.tuple", <vscale x 2 x i8>, 4) undef, ptr %base, <vscale x 1 x i1> %mask, i64 %vl, i64 1, i64 4) - %1 = call <vscale x 1 x half> @llvm.riscv.tuple.extract.nxv1f16.triscv.vector.tuple_nxv2i8_4t(target("riscv.vector.tuple", <vscale x 2 x i8>, 4) %0, i32 1) - ret <vscale x 1 x half> %1 + ret target("riscv.vector.tuple", <vscale x 2 x i8>, 4) %0 } - - -define <vscale x 2 x half> @test_vlseg4_nxv2f16_triscv.vector.tuple_nxv4i8_4t(ptr %base, i64 %vl) { +define target("riscv.vector.tuple", <vscale x 4 x i8>, 4) @test_vlseg4_nxv2f16_triscv.vector.tuple_nxv4i8_4t(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg4_nxv2f16_triscv.vector.tuple_nxv4i8_4t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma -; CHECK-NEXT: vlseg4e16.v v7, (a0) +; CHECK-NEXT: vlseg4e16.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 4) @llvm.riscv.vlseg4.triscv.vector.tuple_nxv4i8_4t(target("riscv.vector.tuple", <vscale x 4 x i8>, 4) undef, ptr %base, i64 %vl, i64 4) - %1 = call <vscale x 2 x half> @llvm.riscv.tuple.extract.nxv2f16.triscv.vector.tuple_nxv4i8_4t(target("riscv.vector.tuple", <vscale x 4 x i8>, 4) %0, i32 1) - ret <vscale x 2 x half> %1 + ret target("riscv.vector.tuple", <vscale x 4 x i8>, 4) %0 } - -define <vscale x 2 x half> @test_vlseg4_mask_nxv2f16_triscv.vector.tuple_nxv4i8_4t(ptr %base, i64 %vl, <vscale x 2 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 4 x i8>, 4) @test_vlseg4_mask_nxv2f16_triscv.vector.tuple_nxv4i8_4t(ptr %base, i64 %vl, <vscale x 2 x i1> %mask) { ; CHECK-LABEL: test_vlseg4_mask_nxv2f16_triscv.vector.tuple_nxv4i8_4t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma -; CHECK-NEXT: vlseg4e16.v v7, (a0), v0.t +; CHECK-NEXT: vlseg4e16.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 4) @llvm.riscv.vlseg4.mask.triscv.vector.tuple_nxv4i8_4t.nxv2i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 4) undef, ptr %base, <vscale x 2 x i1> %mask, i64 %vl, i64 1, i64 4) - %1 = call <vscale x 2 x half> @llvm.riscv.tuple.extract.nxv2f16.triscv.vector.tuple_nxv4i8_4t(target("riscv.vector.tuple", <vscale x 4 x i8>, 4) %0, i32 1) - ret <vscale x 2 x half> %1 + ret target("riscv.vector.tuple", <vscale x 4 x i8>, 4) %0 } - - -define <vscale x 4 x half> @test_vlseg4_nxv4f16_triscv.vector.tuple_nxv8i8_4t(ptr %base, i64 %vl) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 4) @test_vlseg4_nxv4f16_triscv.vector.tuple_nxv8i8_4t(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg4_nxv4f16_triscv.vector.tuple_nxv8i8_4t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma -; CHECK-NEXT: vlseg4e16.v v7, (a0) +; CHECK-NEXT: vlseg4e16.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 4) @llvm.riscv.vlseg4.triscv.vector.tuple_nxv8i8_4t(target("riscv.vector.tuple", <vscale x 8 x i8>, 4) undef, ptr %base, i64 %vl, i64 4) - %1 = call <vscale x 4 x half> @llvm.riscv.tuple.extract.nxv4f16.triscv.vector.tuple_nxv8i8_4t(target("riscv.vector.tuple", <vscale x 8 x i8>, 4) %0, i32 1) - ret <vscale x 4 x half> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 4) %0 } - -define <vscale x 4 x half> @test_vlseg4_mask_nxv4f16_triscv.vector.tuple_nxv8i8_4t(ptr %base, i64 %vl, <vscale x 4 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 4) @test_vlseg4_mask_nxv4f16_triscv.vector.tuple_nxv8i8_4t(ptr %base, i64 %vl, <vscale x 4 x i1> %mask) { ; CHECK-LABEL: test_vlseg4_mask_nxv4f16_triscv.vector.tuple_nxv8i8_4t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma -; CHECK-NEXT: vlseg4e16.v v7, (a0), v0.t +; CHECK-NEXT: vlseg4e16.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 4) @llvm.riscv.vlseg4.mask.triscv.vector.tuple_nxv8i8_4t.nxv4i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 4) undef, ptr %base, <vscale x 4 x i1> %mask, i64 %vl, i64 1, i64 4) - %1 = call <vscale x 4 x half> @llvm.riscv.tuple.extract.nxv4f16.triscv.vector.tuple_nxv8i8_4t(target("riscv.vector.tuple", <vscale x 8 x i8>, 4) %0, i32 1) - ret <vscale x 4 x half> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 4) %0 } - - -define <vscale x 8 x half> @test_vlseg4_nxv8f16_triscv.vector.tuple_nxv16i8_4t(ptr %base, i64 %vl) { +define target("riscv.vector.tuple", <vscale x 16 x i8>, 4) @test_vlseg4_nxv8f16_triscv.vector.tuple_nxv16i8_4t(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg4_nxv8f16_triscv.vector.tuple_nxv16i8_4t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, ma -; CHECK-NEXT: vlseg4e16.v v6, (a0) +; CHECK-NEXT: vlseg4e16.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 4) @llvm.riscv.vlseg4.triscv.vector.tuple_nxv16i8_4t(target("riscv.vector.tuple", <vscale x 16 x i8>, 4) undef, ptr %base, i64 %vl, i64 4) - %1 = call <vscale x 8 x half> @llvm.riscv.tuple.extract.nxv8f16.triscv.vector.tuple_nxv16i8_4t(target("riscv.vector.tuple", <vscale x 16 x i8>, 4) %0, i32 1) - ret <vscale x 8 x half> %1 + ret target("riscv.vector.tuple", <vscale x 16 x i8>, 4) %0 } - -define <vscale x 8 x half> @test_vlseg4_mask_nxv8f16_triscv.vector.tuple_nxv16i8_4t(ptr %base, i64 %vl, <vscale x 8 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 16 x i8>, 4) @test_vlseg4_mask_nxv8f16_triscv.vector.tuple_nxv16i8_4t(ptr %base, i64 %vl, <vscale x 8 x i1> %mask) { ; CHECK-LABEL: test_vlseg4_mask_nxv8f16_triscv.vector.tuple_nxv16i8_4t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, ma -; CHECK-NEXT: vlseg4e16.v v6, (a0), v0.t +; CHECK-NEXT: vlseg4e16.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 4) @llvm.riscv.vlseg4.mask.triscv.vector.tuple_nxv16i8_4t.nxv8i1(target("riscv.vector.tuple", <vscale x 16 x i8>, 4) undef, ptr %base, <vscale x 8 x i1> %mask, i64 %vl, i64 1, i64 4) - %1 = call <vscale x 8 x half> @llvm.riscv.tuple.extract.nxv8f16.triscv.vector.tuple_nxv16i8_4t(target("riscv.vector.tuple", <vscale x 16 x i8>, 4) %0, i32 1) - ret <vscale x 8 x half> %1 + ret target("riscv.vector.tuple", <vscale x 16 x i8>, 4) %0 } - - -define <vscale x 1 x half> @test_vlseg5_nxv1f16_triscv.vector.tuple_nxv2i8_5t(ptr %base, i64 %vl) { +define target("riscv.vector.tuple", <vscale x 2 x i8>, 5) @test_vlseg5_nxv1f16_triscv.vector.tuple_nxv2i8_5t(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg5_nxv1f16_triscv.vector.tuple_nxv2i8_5t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma -; CHECK-NEXT: vlseg5e16.v v7, (a0) +; CHECK-NEXT: vlseg5e16.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 2 x i8>, 5) @llvm.riscv.vlseg5.triscv.vector.tuple_nxv2i8_5t(target("riscv.vector.tuple", <vscale x 2 x i8>, 5) undef, ptr %base, i64 %vl, i64 4) - %1 = call <vscale x 1 x half> @llvm.riscv.tuple.extract.nxv1f16.triscv.vector.tuple_nxv2i8_5t(target("riscv.vector.tuple", <vscale x 2 x i8>, 5) %0, i32 1) - ret <vscale x 1 x half> %1 + ret target("riscv.vector.tuple", <vscale x 2 x i8>, 5) %0 } - -define <vscale x 1 x half> @test_vlseg5_mask_nxv1f16_triscv.vector.tuple_nxv2i8_5t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 2 x i8>, 5) @test_vlseg5_mask_nxv1f16_triscv.vector.tuple_nxv2i8_5t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) { ; CHECK-LABEL: test_vlseg5_mask_nxv1f16_triscv.vector.tuple_nxv2i8_5t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma -; CHECK-NEXT: vlseg5e16.v v7, (a0), v0.t +; CHECK-NEXT: vlseg5e16.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 2 x i8>, 5) @llvm.riscv.vlseg5.mask.triscv.vector.tuple_nxv2i8_5t.nxv1i1(target("riscv.vector.tuple", <vscale x 2 x i8>, 5) undef, ptr %base, <vscale x 1 x i1> %mask, i64 %vl, i64 1, i64 4) - %1 = call <vscale x 1 x half> @llvm.riscv.tuple.extract.nxv1f16.triscv.vector.tuple_nxv2i8_5t(target("riscv.vector.tuple", <vscale x 2 x i8>, 5) %0, i32 1) - ret <vscale x 1 x half> %1 + ret target("riscv.vector.tuple", <vscale x 2 x i8>, 5) %0 } - - -define <vscale x 2 x half> @test_vlseg5_nxv2f16_triscv.vector.tuple_nxv4i8_5t(ptr %base, i64 %vl) { +define target("riscv.vector.tuple", <vscale x 4 x i8>, 5) @test_vlseg5_nxv2f16_triscv.vector.tuple_nxv4i8_5t(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg5_nxv2f16_triscv.vector.tuple_nxv4i8_5t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma -; CHECK-NEXT: vlseg5e16.v v7, (a0) +; CHECK-NEXT: vlseg5e16.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 5) @llvm.riscv.vlseg5.triscv.vector.tuple_nxv4i8_5t(target("riscv.vector.tuple", <vscale x 4 x i8>, 5) undef, ptr %base, i64 %vl, i64 4) - %1 = call <vscale x 2 x half> @llvm.riscv.tuple.extract.nxv2f16.triscv.vector.tuple_nxv4i8_5t(target("riscv.vector.tuple", <vscale x 4 x i8>, 5) %0, i32 1) - ret <vscale x 2 x half> %1 + ret target("riscv.vector.tuple", <vscale x 4 x i8>, 5) %0 } - -define <vscale x 2 x half> @test_vlseg5_mask_nxv2f16_triscv.vector.tuple_nxv4i8_5t(ptr %base, i64 %vl, <vscale x 2 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 4 x i8>, 5) @test_vlseg5_mask_nxv2f16_triscv.vector.tuple_nxv4i8_5t(ptr %base, i64 %vl, <vscale x 2 x i1> %mask) { ; CHECK-LABEL: test_vlseg5_mask_nxv2f16_triscv.vector.tuple_nxv4i8_5t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma -; CHECK-NEXT: vlseg5e16.v v7, (a0), v0.t +; CHECK-NEXT: vlseg5e16.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 5) @llvm.riscv.vlseg5.mask.triscv.vector.tuple_nxv4i8_5t.nxv2i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 5) undef, ptr %base, <vscale x 2 x i1> %mask, i64 %vl, i64 1, i64 4) - %1 = call <vscale x 2 x half> @llvm.riscv.tuple.extract.nxv2f16.triscv.vector.tuple_nxv4i8_5t(target("riscv.vector.tuple", <vscale x 4 x i8>, 5) %0, i32 1) - ret <vscale x 2 x half> %1 + ret target("riscv.vector.tuple", <vscale x 4 x i8>, 5) %0 } - - -define <vscale x 4 x half> @test_vlseg5_nxv4f16_triscv.vector.tuple_nxv8i8_5t(ptr %base, i64 %vl) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 5) @test_vlseg5_nxv4f16_triscv.vector.tuple_nxv8i8_5t(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg5_nxv4f16_triscv.vector.tuple_nxv8i8_5t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma -; CHECK-NEXT: vlseg5e16.v v7, (a0) +; CHECK-NEXT: vlseg5e16.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 5) @llvm.riscv.vlseg5.triscv.vector.tuple_nxv8i8_5t(target("riscv.vector.tuple", <vscale x 8 x i8>, 5) undef, ptr %base, i64 %vl, i64 4) - %1 = call <vscale x 4 x half> @llvm.riscv.tuple.extract.nxv4f16.triscv.vector.tuple_nxv8i8_5t(target("riscv.vector.tuple", <vscale x 8 x i8>, 5) %0, i32 1) - ret <vscale x 4 x half> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 5) %0 } - -define <vscale x 4 x half> @test_vlseg5_mask_nxv4f16_triscv.vector.tuple_nxv8i8_5t(ptr %base, i64 %vl, <vscale x 4 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 5) @test_vlseg5_mask_nxv4f16_triscv.vector.tuple_nxv8i8_5t(ptr %base, i64 %vl, <vscale x 4 x i1> %mask) { ; CHECK-LABEL: test_vlseg5_mask_nxv4f16_triscv.vector.tuple_nxv8i8_5t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma -; CHECK-NEXT: vlseg5e16.v v7, (a0), v0.t +; CHECK-NEXT: vlseg5e16.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 5) @llvm.riscv.vlseg5.mask.triscv.vector.tuple_nxv8i8_5t.nxv4i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 5) undef, ptr %base, <vscale x 4 x i1> %mask, i64 %vl, i64 1, i64 4) - %1 = call <vscale x 4 x half> @llvm.riscv.tuple.extract.nxv4f16.triscv.vector.tuple_nxv8i8_5t(target("riscv.vector.tuple", <vscale x 8 x i8>, 5) %0, i32 1) - ret <vscale x 4 x half> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 5) %0 } - - -define <vscale x 1 x half> @test_vlseg6_nxv1f16_triscv.vector.tuple_nxv2i8_6t(ptr %base, i64 %vl) { +define target("riscv.vector.tuple", <vscale x 2 x i8>, 6) @test_vlseg6_nxv1f16_triscv.vector.tuple_nxv2i8_6t(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg6_nxv1f16_triscv.vector.tuple_nxv2i8_6t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma -; CHECK-NEXT: vlseg6e16.v v7, (a0) +; CHECK-NEXT: vlseg6e16.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 2 x i8>, 6) @llvm.riscv.vlseg6.triscv.vector.tuple_nxv2i8_6t(target("riscv.vector.tuple", <vscale x 2 x i8>, 6) undef, ptr %base, i64 %vl, i64 4) - %1 = call <vscale x 1 x half> @llvm.riscv.tuple.extract.nxv1f16.triscv.vector.tuple_nxv2i8_6t(target("riscv.vector.tuple", <vscale x 2 x i8>, 6) %0, i32 1) - ret <vscale x 1 x half> %1 + ret target("riscv.vector.tuple", <vscale x 2 x i8>, 6) %0 } - -define <vscale x 1 x half> @test_vlseg6_mask_nxv1f16_triscv.vector.tuple_nxv2i8_6t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 2 x i8>, 6) @test_vlseg6_mask_nxv1f16_triscv.vector.tuple_nxv2i8_6t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) { ; CHECK-LABEL: test_vlseg6_mask_nxv1f16_triscv.vector.tuple_nxv2i8_6t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma -; CHECK-NEXT: vlseg6e16.v v7, (a0), v0.t +; CHECK-NEXT: vlseg6e16.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 2 x i8>, 6) @llvm.riscv.vlseg6.mask.triscv.vector.tuple_nxv2i8_6t.nxv1i1(target("riscv.vector.tuple", <vscale x 2 x i8>, 6) undef, ptr %base, <vscale x 1 x i1> %mask, i64 %vl, i64 1, i64 4) - %1 = call <vscale x 1 x half> @llvm.riscv.tuple.extract.nxv1f16.triscv.vector.tuple_nxv2i8_6t(target("riscv.vector.tuple", <vscale x 2 x i8>, 6) %0, i32 1) - ret <vscale x 1 x half> %1 + ret target("riscv.vector.tuple", <vscale x 2 x i8>, 6) %0 } - - -define <vscale x 2 x half> @test_vlseg6_nxv2f16_triscv.vector.tuple_nxv4i8_6t(ptr %base, i64 %vl) { +define target("riscv.vector.tuple", <vscale x 4 x i8>, 6) @test_vlseg6_nxv2f16_triscv.vector.tuple_nxv4i8_6t(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg6_nxv2f16_triscv.vector.tuple_nxv4i8_6t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma -; CHECK-NEXT: vlseg6e16.v v7, (a0) +; CHECK-NEXT: vlseg6e16.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 6) @llvm.riscv.vlseg6.triscv.vector.tuple_nxv4i8_6t(target("riscv.vector.tuple", <vscale x 4 x i8>, 6) undef, ptr %base, i64 %vl, i64 4) - %1 = call <vscale x 2 x half> @llvm.riscv.tuple.extract.nxv2f16.triscv.vector.tuple_nxv4i8_6t(target("riscv.vector.tuple", <vscale x 4 x i8>, 6) %0, i32 1) - ret <vscale x 2 x half> %1 + ret target("riscv.vector.tuple", <vscale x 4 x i8>, 6) %0 } - -define <vscale x 2 x half> @test_vlseg6_mask_nxv2f16_triscv.vector.tuple_nxv4i8_6t(ptr %base, i64 %vl, <vscale x 2 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 4 x i8>, 6) @test_vlseg6_mask_nxv2f16_triscv.vector.tuple_nxv4i8_6t(ptr %base, i64 %vl, <vscale x 2 x i1> %mask) { ; CHECK-LABEL: test_vlseg6_mask_nxv2f16_triscv.vector.tuple_nxv4i8_6t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma -; CHECK-NEXT: vlseg6e16.v v7, (a0), v0.t +; CHECK-NEXT: vlseg6e16.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 6) @llvm.riscv.vlseg6.mask.triscv.vector.tuple_nxv4i8_6t.nxv2i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 6) undef, ptr %base, <vscale x 2 x i1> %mask, i64 %vl, i64 1, i64 4) - %1 = call <vscale x 2 x half> @llvm.riscv.tuple.extract.nxv2f16.triscv.vector.tuple_nxv4i8_6t(target("riscv.vector.tuple", <vscale x 4 x i8>, 6) %0, i32 1) - ret <vscale x 2 x half> %1 + ret target("riscv.vector.tuple", <vscale x 4 x i8>, 6) %0 } - - -define <vscale x 4 x half> @test_vlseg6_nxv4f16_triscv.vector.tuple_nxv8i8_6t(ptr %base, i64 %vl) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 6) @test_vlseg6_nxv4f16_triscv.vector.tuple_nxv8i8_6t(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg6_nxv4f16_triscv.vector.tuple_nxv8i8_6t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma -; CHECK-NEXT: vlseg6e16.v v7, (a0) +; CHECK-NEXT: vlseg6e16.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 6) @llvm.riscv.vlseg6.triscv.vector.tuple_nxv8i8_6t(target("riscv.vector.tuple", <vscale x 8 x i8>, 6) undef, ptr %base, i64 %vl, i64 4) - %1 = call <vscale x 4 x half> @llvm.riscv.tuple.extract.nxv4f16.triscv.vector.tuple_nxv8i8_6t(target("riscv.vector.tuple", <vscale x 8 x i8>, 6) %0, i32 1) - ret <vscale x 4 x half> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 6) %0 } - -define <vscale x 4 x half> @test_vlseg6_mask_nxv4f16_triscv.vector.tuple_nxv8i8_6t(ptr %base, i64 %vl, <vscale x 4 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 6) @test_vlseg6_mask_nxv4f16_triscv.vector.tuple_nxv8i8_6t(ptr %base, i64 %vl, <vscale x 4 x i1> %mask) { ; CHECK-LABEL: test_vlseg6_mask_nxv4f16_triscv.vector.tuple_nxv8i8_6t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma -; CHECK-NEXT: vlseg6e16.v v7, (a0), v0.t +; CHECK-NEXT: vlseg6e16.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 6) @llvm.riscv.vlseg6.mask.triscv.vector.tuple_nxv8i8_6t.nxv4i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 6) undef, ptr %base, <vscale x 4 x i1> %mask, i64 %vl, i64 1, i64 4) - %1 = call <vscale x 4 x half> @llvm.riscv.tuple.extract.nxv4f16.triscv.vector.tuple_nxv8i8_6t(target("riscv.vector.tuple", <vscale x 8 x i8>, 6) %0, i32 1) - ret <vscale x 4 x half> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 6) %0 } - - -define <vscale x 1 x half> @test_vlseg7_nxv1f16_triscv.vector.tuple_nxv2i8_7t(ptr %base, i64 %vl) { +define target("riscv.vector.tuple", <vscale x 2 x i8>, 7) @test_vlseg7_nxv1f16_triscv.vector.tuple_nxv2i8_7t(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg7_nxv1f16_triscv.vector.tuple_nxv2i8_7t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma -; CHECK-NEXT: vlseg7e16.v v7, (a0) +; CHECK-NEXT: vlseg7e16.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 2 x i8>, 7) @llvm.riscv.vlseg7.triscv.vector.tuple_nxv2i8_7t(target("riscv.vector.tuple", <vscale x 2 x i8>, 7) undef, ptr %base, i64 %vl, i64 4) - %1 = call <vscale x 1 x half> @llvm.riscv.tuple.extract.nxv1f16.triscv.vector.tuple_nxv2i8_7t(target("riscv.vector.tuple", <vscale x 2 x i8>, 7) %0, i32 1) - ret <vscale x 1 x half> %1 + ret target("riscv.vector.tuple", <vscale x 2 x i8>, 7) %0 } - -define <vscale x 1 x half> @test_vlseg7_mask_nxv1f16_triscv.vector.tuple_nxv2i8_7t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 2 x i8>, 7) @test_vlseg7_mask_nxv1f16_triscv.vector.tuple_nxv2i8_7t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) { ; CHECK-LABEL: test_vlseg7_mask_nxv1f16_triscv.vector.tuple_nxv2i8_7t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma -; CHECK-NEXT: vlseg7e16.v v7, (a0), v0.t +; CHECK-NEXT: vlseg7e16.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 2 x i8>, 7) @llvm.riscv.vlseg7.mask.triscv.vector.tuple_nxv2i8_7t.nxv1i1(target("riscv.vector.tuple", <vscale x 2 x i8>, 7) undef, ptr %base, <vscale x 1 x i1> %mask, i64 %vl, i64 1, i64 4) - %1 = call <vscale x 1 x half> @llvm.riscv.tuple.extract.nxv1f16.triscv.vector.tuple_nxv2i8_7t(target("riscv.vector.tuple", <vscale x 2 x i8>, 7) %0, i32 1) - ret <vscale x 1 x half> %1 + ret target("riscv.vector.tuple", <vscale x 2 x i8>, 7) %0 } - - -define <vscale x 2 x half> @test_vlseg7_nxv2f16_triscv.vector.tuple_nxv4i8_7t(ptr %base, i64 %vl) { +define target("riscv.vector.tuple", <vscale x 4 x i8>, 7) @test_vlseg7_nxv2f16_triscv.vector.tuple_nxv4i8_7t(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg7_nxv2f16_triscv.vector.tuple_nxv4i8_7t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma -; CHECK-NEXT: vlseg7e16.v v7, (a0) +; CHECK-NEXT: vlseg7e16.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 7) @llvm.riscv.vlseg7.triscv.vector.tuple_nxv4i8_7t(target("riscv.vector.tuple", <vscale x 4 x i8>, 7) undef, ptr %base, i64 %vl, i64 4) - %1 = call <vscale x 2 x half> @llvm.riscv.tuple.extract.nxv2f16.triscv.vector.tuple_nxv4i8_7t(target("riscv.vector.tuple", <vscale x 4 x i8>, 7) %0, i32 1) - ret <vscale x 2 x half> %1 + ret target("riscv.vector.tuple", <vscale x 4 x i8>, 7) %0 } - -define <vscale x 2 x half> @test_vlseg7_mask_nxv2f16_triscv.vector.tuple_nxv4i8_7t(ptr %base, i64 %vl, <vscale x 2 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 4 x i8>, 7) @test_vlseg7_mask_nxv2f16_triscv.vector.tuple_nxv4i8_7t(ptr %base, i64 %vl, <vscale x 2 x i1> %mask) { ; CHECK-LABEL: test_vlseg7_mask_nxv2f16_triscv.vector.tuple_nxv4i8_7t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma -; CHECK-NEXT: vlseg7e16.v v7, (a0), v0.t +; CHECK-NEXT: vlseg7e16.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 7) @llvm.riscv.vlseg7.mask.triscv.vector.tuple_nxv4i8_7t.nxv2i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 7) undef, ptr %base, <vscale x 2 x i1> %mask, i64 %vl, i64 1, i64 4) - %1 = call <vscale x 2 x half> @llvm.riscv.tuple.extract.nxv2f16.triscv.vector.tuple_nxv4i8_7t(target("riscv.vector.tuple", <vscale x 4 x i8>, 7) %0, i32 1) - ret <vscale x 2 x half> %1 + ret target("riscv.vector.tuple", <vscale x 4 x i8>, 7) %0 } - - -define <vscale x 4 x half> @test_vlseg7_nxv4f16_triscv.vector.tuple_nxv8i8_7t(ptr %base, i64 %vl) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 7) @test_vlseg7_nxv4f16_triscv.vector.tuple_nxv8i8_7t(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg7_nxv4f16_triscv.vector.tuple_nxv8i8_7t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma -; CHECK-NEXT: vlseg7e16.v v7, (a0) +; CHECK-NEXT: vlseg7e16.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 7) @llvm.riscv.vlseg7.triscv.vector.tuple_nxv8i8_7t(target("riscv.vector.tuple", <vscale x 8 x i8>, 7) undef, ptr %base, i64 %vl, i64 4) - %1 = call <vscale x 4 x half> @llvm.riscv.tuple.extract.nxv4f16.triscv.vector.tuple_nxv8i8_7t(target("riscv.vector.tuple", <vscale x 8 x i8>, 7) %0, i32 1) - ret <vscale x 4 x half> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 7) %0 } - -define <vscale x 4 x half> @test_vlseg7_mask_nxv4f16_triscv.vector.tuple_nxv8i8_7t(ptr %base, i64 %vl, <vscale x 4 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 7) @test_vlseg7_mask_nxv4f16_triscv.vector.tuple_nxv8i8_7t(ptr %base, i64 %vl, <vscale x 4 x i1> %mask) { ; CHECK-LABEL: test_vlseg7_mask_nxv4f16_triscv.vector.tuple_nxv8i8_7t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma -; CHECK-NEXT: vlseg7e16.v v7, (a0), v0.t +; CHECK-NEXT: vlseg7e16.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 7) @llvm.riscv.vlseg7.mask.triscv.vector.tuple_nxv8i8_7t.nxv4i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 7) undef, ptr %base, <vscale x 4 x i1> %mask, i64 %vl, i64 1, i64 4) - %1 = call <vscale x 4 x half> @llvm.riscv.tuple.extract.nxv4f16.triscv.vector.tuple_nxv8i8_7t(target("riscv.vector.tuple", <vscale x 8 x i8>, 7) %0, i32 1) - ret <vscale x 4 x half> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 7) %0 } - - -define <vscale x 1 x half> @test_vlseg8_nxv1f16_triscv.vector.tuple_nxv2i8_8t(ptr %base, i64 %vl) { +define target("riscv.vector.tuple", <vscale x 2 x i8>, 8) @test_vlseg8_nxv1f16_triscv.vector.tuple_nxv2i8_8t(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg8_nxv1f16_triscv.vector.tuple_nxv2i8_8t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma -; CHECK-NEXT: vlseg8e16.v v7, (a0) +; CHECK-NEXT: vlseg8e16.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 2 x i8>, 8) @llvm.riscv.vlseg8.triscv.vector.tuple_nxv2i8_8t(target("riscv.vector.tuple", <vscale x 2 x i8>, 8) undef, ptr %base, i64 %vl, i64 4) - %1 = call <vscale x 1 x half> @llvm.riscv.tuple.extract.nxv1f16.triscv.vector.tuple_nxv2i8_8t(target("riscv.vector.tuple", <vscale x 2 x i8>, 8) %0, i32 1) - ret <vscale x 1 x half> %1 + ret target("riscv.vector.tuple", <vscale x 2 x i8>, 8) %0 } - -define <vscale x 1 x half> @test_vlseg8_mask_nxv1f16_triscv.vector.tuple_nxv2i8_8t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 2 x i8>, 8) @test_vlseg8_mask_nxv1f16_triscv.vector.tuple_nxv2i8_8t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) { ; CHECK-LABEL: test_vlseg8_mask_nxv1f16_triscv.vector.tuple_nxv2i8_8t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma -; CHECK-NEXT: vlseg8e16.v v7, (a0), v0.t +; CHECK-NEXT: vlseg8e16.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 2 x i8>, 8) @llvm.riscv.vlseg8.mask.triscv.vector.tuple_nxv2i8_8t.nxv1i1(target("riscv.vector.tuple", <vscale x 2 x i8>, 8) undef, ptr %base, <vscale x 1 x i1> %mask, i64 %vl, i64 1, i64 4) - %1 = call <vscale x 1 x half> @llvm.riscv.tuple.extract.nxv1f16.triscv.vector.tuple_nxv2i8_8t(target("riscv.vector.tuple", <vscale x 2 x i8>, 8) %0, i32 1) - ret <vscale x 1 x half> %1 + ret target("riscv.vector.tuple", <vscale x 2 x i8>, 8) %0 } - - -define <vscale x 2 x half> @test_vlseg8_nxv2f16_triscv.vector.tuple_nxv4i8_8t(ptr %base, i64 %vl) { +define target("riscv.vector.tuple", <vscale x 4 x i8>, 8) @test_vlseg8_nxv2f16_triscv.vector.tuple_nxv4i8_8t(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg8_nxv2f16_triscv.vector.tuple_nxv4i8_8t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma -; CHECK-NEXT: vlseg8e16.v v7, (a0) +; CHECK-NEXT: vlseg8e16.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 8) @llvm.riscv.vlseg8.triscv.vector.tuple_nxv4i8_8t(target("riscv.vector.tuple", <vscale x 4 x i8>, 8) undef, ptr %base, i64 %vl, i64 4) - %1 = call <vscale x 2 x half> @llvm.riscv.tuple.extract.nxv2f16.triscv.vector.tuple_nxv4i8_8t(target("riscv.vector.tuple", <vscale x 4 x i8>, 8) %0, i32 1) - ret <vscale x 2 x half> %1 + ret target("riscv.vector.tuple", <vscale x 4 x i8>, 8) %0 } - -define <vscale x 2 x half> @test_vlseg8_mask_nxv2f16_triscv.vector.tuple_nxv4i8_8t(ptr %base, i64 %vl, <vscale x 2 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 4 x i8>, 8) @test_vlseg8_mask_nxv2f16_triscv.vector.tuple_nxv4i8_8t(ptr %base, i64 %vl, <vscale x 2 x i1> %mask) { ; CHECK-LABEL: test_vlseg8_mask_nxv2f16_triscv.vector.tuple_nxv4i8_8t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma -; CHECK-NEXT: vlseg8e16.v v7, (a0), v0.t +; CHECK-NEXT: vlseg8e16.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 8) @llvm.riscv.vlseg8.mask.triscv.vector.tuple_nxv4i8_8t.nxv2i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 8) undef, ptr %base, <vscale x 2 x i1> %mask, i64 %vl, i64 1, i64 4) - %1 = call <vscale x 2 x half> @llvm.riscv.tuple.extract.nxv2f16.triscv.vector.tuple_nxv4i8_8t(target("riscv.vector.tuple", <vscale x 4 x i8>, 8) %0, i32 1) - ret <vscale x 2 x half> %1 + ret target("riscv.vector.tuple", <vscale x 4 x i8>, 8) %0 } - - -define <vscale x 4 x half> @test_vlseg8_nxv4f16_triscv.vector.tuple_nxv8i8_8t(ptr %base, i64 %vl) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 8) @test_vlseg8_nxv4f16_triscv.vector.tuple_nxv8i8_8t(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg8_nxv4f16_triscv.vector.tuple_nxv8i8_8t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma -; CHECK-NEXT: vlseg8e16.v v7, (a0) +; CHECK-NEXT: vlseg8e16.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 8) @llvm.riscv.vlseg8.triscv.vector.tuple_nxv8i8_8t(target("riscv.vector.tuple", <vscale x 8 x i8>, 8) undef, ptr %base, i64 %vl, i64 4) - %1 = call <vscale x 4 x half> @llvm.riscv.tuple.extract.nxv4f16.triscv.vector.tuple_nxv8i8_8t(target("riscv.vector.tuple", <vscale x 8 x i8>, 8) %0, i32 1) - ret <vscale x 4 x half> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 8) %0 } - -define <vscale x 4 x half> @test_vlseg8_mask_nxv4f16_triscv.vector.tuple_nxv8i8_8t(ptr %base, i64 %vl, <vscale x 4 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 8) @test_vlseg8_mask_nxv4f16_triscv.vector.tuple_nxv8i8_8t(ptr %base, i64 %vl, <vscale x 4 x i1> %mask) { ; CHECK-LABEL: test_vlseg8_mask_nxv4f16_triscv.vector.tuple_nxv8i8_8t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma -; CHECK-NEXT: vlseg8e16.v v7, (a0), v0.t +; CHECK-NEXT: vlseg8e16.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 8) @llvm.riscv.vlseg8.mask.triscv.vector.tuple_nxv8i8_8t.nxv4i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 8) undef, ptr %base, <vscale x 4 x i1> %mask, i64 %vl, i64 1, i64 4) - %1 = call <vscale x 4 x half> @llvm.riscv.tuple.extract.nxv4f16.triscv.vector.tuple_nxv8i8_8t(target("riscv.vector.tuple", <vscale x 8 x i8>, 8) %0, i32 1) - ret <vscale x 4 x half> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 8) %0 } - - -define <vscale x 1 x float> @test_vlseg2_nxv1f32_triscv.vector.tuple_nxv4i8_2t(ptr %base, i64 %vl) { +define target("riscv.vector.tuple", <vscale x 4 x i8>, 2) @test_vlseg2_nxv1f32_triscv.vector.tuple_nxv4i8_2t(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg2_nxv1f32_triscv.vector.tuple_nxv4i8_2t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma -; CHECK-NEXT: vlseg2e32.v v7, (a0) +; CHECK-NEXT: vlseg2e32.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 2) @llvm.riscv.vlseg2.triscv.vector.tuple_nxv4i8_2t(target("riscv.vector.tuple", <vscale x 4 x i8>, 2) undef, ptr %base, i64 %vl, i64 5) - %1 = call <vscale x 1 x float> @llvm.riscv.tuple.extract.nxv1f32.triscv.vector.tuple_nxv4i8_2t(target("riscv.vector.tuple", <vscale x 4 x i8>, 2) %0, i32 1) - ret <vscale x 1 x float> %1 + ret target("riscv.vector.tuple", <vscale x 4 x i8>, 2) %0 } - -define <vscale x 1 x float> @test_vlseg2_mask_nxv1f32_triscv.vector.tuple_nxv4i8_2t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 4 x i8>, 2) @test_vlseg2_mask_nxv1f32_triscv.vector.tuple_nxv4i8_2t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) { ; CHECK-LABEL: test_vlseg2_mask_nxv1f32_triscv.vector.tuple_nxv4i8_2t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma -; CHECK-NEXT: vlseg2e32.v v7, (a0), v0.t +; CHECK-NEXT: vlseg2e32.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 2) @llvm.riscv.vlseg2.mask.triscv.vector.tuple_nxv4i8_2t.nxv1i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 2) undef, ptr %base, <vscale x 1 x i1> %mask, i64 %vl, i64 1, i64 5) - %1 = call <vscale x 1 x float> @llvm.riscv.tuple.extract.nxv1f32.triscv.vector.tuple_nxv4i8_2t(target("riscv.vector.tuple", <vscale x 4 x i8>, 2) %0, i32 1) - ret <vscale x 1 x float> %1 + ret target("riscv.vector.tuple", <vscale x 4 x i8>, 2) %0 } - - -define <vscale x 2 x float> @test_vlseg2_nxv2f32_triscv.vector.tuple_nxv8i8_2t(ptr %base, i64 %vl) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 2) @test_vlseg2_nxv2f32_triscv.vector.tuple_nxv8i8_2t(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg2_nxv2f32_triscv.vector.tuple_nxv8i8_2t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma -; CHECK-NEXT: vlseg2e32.v v7, (a0) +; CHECK-NEXT: vlseg2e32.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 2) @llvm.riscv.vlseg2.triscv.vector.tuple_nxv8i8_2t(target("riscv.vector.tuple", <vscale x 8 x i8>, 2) undef, ptr %base, i64 %vl, i64 5) - %1 = call <vscale x 2 x float> @llvm.riscv.tuple.extract.nxv2f32.triscv.vector.tuple_nxv8i8_2t(target("riscv.vector.tuple", <vscale x 8 x i8>, 2) %0, i32 1) - ret <vscale x 2 x float> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 2) %0 } - -define <vscale x 2 x float> @test_vlseg2_mask_nxv2f32_triscv.vector.tuple_nxv8i8_2t(ptr %base, i64 %vl, <vscale x 2 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 2) @test_vlseg2_mask_nxv2f32_triscv.vector.tuple_nxv8i8_2t(ptr %base, i64 %vl, <vscale x 2 x i1> %mask) { ; CHECK-LABEL: test_vlseg2_mask_nxv2f32_triscv.vector.tuple_nxv8i8_2t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma -; CHECK-NEXT: vlseg2e32.v v7, (a0), v0.t +; CHECK-NEXT: vlseg2e32.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 2) @llvm.riscv.vlseg2.mask.triscv.vector.tuple_nxv8i8_2t.nxv2i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 2) undef, ptr %base, <vscale x 2 x i1> %mask, i64 %vl, i64 1, i64 5) - %1 = call <vscale x 2 x float> @llvm.riscv.tuple.extract.nxv2f32.triscv.vector.tuple_nxv8i8_2t(target("riscv.vector.tuple", <vscale x 8 x i8>, 2) %0, i32 1) - ret <vscale x 2 x float> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 2) %0 } - - -define <vscale x 4 x float> @test_vlseg2_nxv4f32_triscv.vector.tuple_nxv16i8_2t(ptr %base, i64 %vl) { +define target("riscv.vector.tuple", <vscale x 16 x i8>, 2) @test_vlseg2_nxv4f32_triscv.vector.tuple_nxv16i8_2t(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg2_nxv4f32_triscv.vector.tuple_nxv16i8_2t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma -; CHECK-NEXT: vlseg2e32.v v6, (a0) +; CHECK-NEXT: vlseg2e32.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 2) @llvm.riscv.vlseg2.triscv.vector.tuple_nxv16i8_2t(target("riscv.vector.tuple", <vscale x 16 x i8>, 2) undef, ptr %base, i64 %vl, i64 5) - %1 = call <vscale x 4 x float> @llvm.riscv.tuple.extract.nxv4f32.triscv.vector.tuple_nxv16i8_2t(target("riscv.vector.tuple", <vscale x 16 x i8>, 2) %0, i32 1) - ret <vscale x 4 x float> %1 + ret target("riscv.vector.tuple", <vscale x 16 x i8>, 2) %0 } - -define <vscale x 4 x float> @test_vlseg2_mask_nxv4f32_triscv.vector.tuple_nxv16i8_2t(ptr %base, i64 %vl, <vscale x 4 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 16 x i8>, 2) @test_vlseg2_mask_nxv4f32_triscv.vector.tuple_nxv16i8_2t(ptr %base, i64 %vl, <vscale x 4 x i1> %mask) { ; CHECK-LABEL: test_vlseg2_mask_nxv4f32_triscv.vector.tuple_nxv16i8_2t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma -; CHECK-NEXT: vlseg2e32.v v6, (a0), v0.t +; CHECK-NEXT: vlseg2e32.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 2) @llvm.riscv.vlseg2.mask.triscv.vector.tuple_nxv16i8_2t.nxv4i1(target("riscv.vector.tuple", <vscale x 16 x i8>, 2) undef, ptr %base, <vscale x 4 x i1> %mask, i64 %vl, i64 1, i64 5) - %1 = call <vscale x 4 x float> @llvm.riscv.tuple.extract.nxv4f32.triscv.vector.tuple_nxv16i8_2t(target("riscv.vector.tuple", <vscale x 16 x i8>, 2) %0, i32 1) - ret <vscale x 4 x float> %1 + ret target("riscv.vector.tuple", <vscale x 16 x i8>, 2) %0 } - - -define <vscale x 8 x float> @test_vlseg2_nxv8f32_triscv.vector.tuple_nxv32i8_2t(ptr %base, i64 %vl) { +define target("riscv.vector.tuple", <vscale x 32 x i8>, 2) @test_vlseg2_nxv8f32_triscv.vector.tuple_nxv32i8_2t(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg2_nxv8f32_triscv.vector.tuple_nxv32i8_2t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, ma -; CHECK-NEXT: vlseg2e32.v v4, (a0) +; CHECK-NEXT: vlseg2e32.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 32 x i8>, 2) @llvm.riscv.vlseg2.triscv.vector.tuple_nxv32i8_2t(target("riscv.vector.tuple", <vscale x 32 x i8>, 2) undef, ptr %base, i64 %vl, i64 5) - %1 = call <vscale x 8 x float> @llvm.riscv.tuple.extract.nxv8f32.triscv.vector.tuple_nxv32i8_2t(target("riscv.vector.tuple", <vscale x 32 x i8>, 2) %0, i32 1) - ret <vscale x 8 x float> %1 + ret target("riscv.vector.tuple", <vscale x 32 x i8>, 2) %0 } - -define <vscale x 8 x float> @test_vlseg2_mask_nxv8f32_triscv.vector.tuple_nxv32i8_2t(ptr %base, i64 %vl, <vscale x 8 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 32 x i8>, 2) @test_vlseg2_mask_nxv8f32_triscv.vector.tuple_nxv32i8_2t(ptr %base, i64 %vl, <vscale x 8 x i1> %mask) { ; CHECK-LABEL: test_vlseg2_mask_nxv8f32_triscv.vector.tuple_nxv32i8_2t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e32, m4, ta, ma -; CHECK-NEXT: vlseg2e32.v v4, (a0), v0.t +; CHECK-NEXT: vlseg2e32.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 32 x i8>, 2) @llvm.riscv.vlseg2.mask.triscv.vector.tuple_nxv32i8_2t.nxv8i1(target("riscv.vector.tuple", <vscale x 32 x i8>, 2) undef, ptr %base, <vscale x 8 x i1> %mask, i64 %vl, i64 1, i64 5) - %1 = call <vscale x 8 x float> @llvm.riscv.tuple.extract.nxv8f32.triscv.vector.tuple_nxv32i8_2t(target("riscv.vector.tuple", <vscale x 32 x i8>, 2) %0, i32 1) - ret <vscale x 8 x float> %1 + ret target("riscv.vector.tuple", <vscale x 32 x i8>, 2) %0 } - - -define <vscale x 1 x float> @test_vlseg3_nxv1f32_triscv.vector.tuple_nxv4i8_3t(ptr %base, i64 %vl) { +define target("riscv.vector.tuple", <vscale x 4 x i8>, 3) @test_vlseg3_nxv1f32_triscv.vector.tuple_nxv4i8_3t(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg3_nxv1f32_triscv.vector.tuple_nxv4i8_3t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma -; CHECK-NEXT: vlseg3e32.v v7, (a0) +; CHECK-NEXT: vlseg3e32.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 3) @llvm.riscv.vlseg3.triscv.vector.tuple_nxv4i8_3t(target("riscv.vector.tuple", <vscale x 4 x i8>, 3) undef, ptr %base, i64 %vl, i64 5) - %1 = call <vscale x 1 x float> @llvm.riscv.tuple.extract.nxv1f32.triscv.vector.tuple_nxv4i8_3t(target("riscv.vector.tuple", <vscale x 4 x i8>, 3) %0, i32 1) - ret <vscale x 1 x float> %1 + ret target("riscv.vector.tuple", <vscale x 4 x i8>, 3) %0 } - -define <vscale x 1 x float> @test_vlseg3_mask_nxv1f32_triscv.vector.tuple_nxv4i8_3t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 4 x i8>, 3) @test_vlseg3_mask_nxv1f32_triscv.vector.tuple_nxv4i8_3t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) { ; CHECK-LABEL: test_vlseg3_mask_nxv1f32_triscv.vector.tuple_nxv4i8_3t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma -; CHECK-NEXT: vlseg3e32.v v7, (a0), v0.t +; CHECK-NEXT: vlseg3e32.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 3) @llvm.riscv.vlseg3.mask.triscv.vector.tuple_nxv4i8_3t.nxv1i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 3) undef, ptr %base, <vscale x 1 x i1> %mask, i64 %vl, i64 1, i64 5) - %1 = call <vscale x 1 x float> @llvm.riscv.tuple.extract.nxv1f32.triscv.vector.tuple_nxv4i8_3t(target("riscv.vector.tuple", <vscale x 4 x i8>, 3) %0, i32 1) - ret <vscale x 1 x float> %1 + ret target("riscv.vector.tuple", <vscale x 4 x i8>, 3) %0 } - - -define <vscale x 2 x float> @test_vlseg3_nxv2f32_triscv.vector.tuple_nxv8i8_3t(ptr %base, i64 %vl) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 3) @test_vlseg3_nxv2f32_triscv.vector.tuple_nxv8i8_3t(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg3_nxv2f32_triscv.vector.tuple_nxv8i8_3t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma -; CHECK-NEXT: vlseg3e32.v v7, (a0) +; CHECK-NEXT: vlseg3e32.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 3) @llvm.riscv.vlseg3.triscv.vector.tuple_nxv8i8_3t(target("riscv.vector.tuple", <vscale x 8 x i8>, 3) undef, ptr %base, i64 %vl, i64 5) - %1 = call <vscale x 2 x float> @llvm.riscv.tuple.extract.nxv2f32.triscv.vector.tuple_nxv8i8_3t(target("riscv.vector.tuple", <vscale x 8 x i8>, 3) %0, i32 1) - ret <vscale x 2 x float> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 3) %0 } - -define <vscale x 2 x float> @test_vlseg3_mask_nxv2f32_triscv.vector.tuple_nxv8i8_3t(ptr %base, i64 %vl, <vscale x 2 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 3) @test_vlseg3_mask_nxv2f32_triscv.vector.tuple_nxv8i8_3t(ptr %base, i64 %vl, <vscale x 2 x i1> %mask) { ; CHECK-LABEL: test_vlseg3_mask_nxv2f32_triscv.vector.tuple_nxv8i8_3t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma -; CHECK-NEXT: vlseg3e32.v v7, (a0), v0.t +; CHECK-NEXT: vlseg3e32.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 3) @llvm.riscv.vlseg3.mask.triscv.vector.tuple_nxv8i8_3t.nxv2i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 3) undef, ptr %base, <vscale x 2 x i1> %mask, i64 %vl, i64 1, i64 5) - %1 = call <vscale x 2 x float> @llvm.riscv.tuple.extract.nxv2f32.triscv.vector.tuple_nxv8i8_3t(target("riscv.vector.tuple", <vscale x 8 x i8>, 3) %0, i32 1) - ret <vscale x 2 x float> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 3) %0 } - - -define <vscale x 4 x float> @test_vlseg3_nxv4f32_triscv.vector.tuple_nxv16i8_3t(ptr %base, i64 %vl) { +define target("riscv.vector.tuple", <vscale x 16 x i8>, 3) @test_vlseg3_nxv4f32_triscv.vector.tuple_nxv16i8_3t(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg3_nxv4f32_triscv.vector.tuple_nxv16i8_3t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma -; CHECK-NEXT: vlseg3e32.v v6, (a0) +; CHECK-NEXT: vlseg3e32.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 3) @llvm.riscv.vlseg3.triscv.vector.tuple_nxv16i8_3t(target("riscv.vector.tuple", <vscale x 16 x i8>, 3) undef, ptr %base, i64 %vl, i64 5) - %1 = call <vscale x 4 x float> @llvm.riscv.tuple.extract.nxv4f32.triscv.vector.tuple_nxv16i8_3t(target("riscv.vector.tuple", <vscale x 16 x i8>, 3) %0, i32 1) - ret <vscale x 4 x float> %1 + ret target("riscv.vector.tuple", <vscale x 16 x i8>, 3) %0 } - -define <vscale x 4 x float> @test_vlseg3_mask_nxv4f32_triscv.vector.tuple_nxv16i8_3t(ptr %base, i64 %vl, <vscale x 4 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 16 x i8>, 3) @test_vlseg3_mask_nxv4f32_triscv.vector.tuple_nxv16i8_3t(ptr %base, i64 %vl, <vscale x 4 x i1> %mask) { ; CHECK-LABEL: test_vlseg3_mask_nxv4f32_triscv.vector.tuple_nxv16i8_3t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma -; CHECK-NEXT: vlseg3e32.v v6, (a0), v0.t +; CHECK-NEXT: vlseg3e32.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 3) @llvm.riscv.vlseg3.mask.triscv.vector.tuple_nxv16i8_3t.nxv4i1(target("riscv.vector.tuple", <vscale x 16 x i8>, 3) undef, ptr %base, <vscale x 4 x i1> %mask, i64 %vl, i64 1, i64 5) - %1 = call <vscale x 4 x float> @llvm.riscv.tuple.extract.nxv4f32.triscv.vector.tuple_nxv16i8_3t(target("riscv.vector.tuple", <vscale x 16 x i8>, 3) %0, i32 1) - ret <vscale x 4 x float> %1 + ret target("riscv.vector.tuple", <vscale x 16 x i8>, 3) %0 } - - -define <vscale x 1 x float> @test_vlseg4_nxv1f32_triscv.vector.tuple_nxv4i8_4t(ptr %base, i64 %vl) { +define target("riscv.vector.tuple", <vscale x 4 x i8>, 4) @test_vlseg4_nxv1f32_triscv.vector.tuple_nxv4i8_4t(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg4_nxv1f32_triscv.vector.tuple_nxv4i8_4t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma -; CHECK-NEXT: vlseg4e32.v v7, (a0) +; CHECK-NEXT: vlseg4e32.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 4) @llvm.riscv.vlseg4.triscv.vector.tuple_nxv4i8_4t(target("riscv.vector.tuple", <vscale x 4 x i8>, 4) undef, ptr %base, i64 %vl, i64 5) - %1 = call <vscale x 1 x float> @llvm.riscv.tuple.extract.nxv1f32.triscv.vector.tuple_nxv4i8_4t(target("riscv.vector.tuple", <vscale x 4 x i8>, 4) %0, i32 1) - ret <vscale x 1 x float> %1 + ret target("riscv.vector.tuple", <vscale x 4 x i8>, 4) %0 } - -define <vscale x 1 x float> @test_vlseg4_mask_nxv1f32_triscv.vector.tuple_nxv4i8_4t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 4 x i8>, 4) @test_vlseg4_mask_nxv1f32_triscv.vector.tuple_nxv4i8_4t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) { ; CHECK-LABEL: test_vlseg4_mask_nxv1f32_triscv.vector.tuple_nxv4i8_4t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma -; CHECK-NEXT: vlseg4e32.v v7, (a0), v0.t +; CHECK-NEXT: vlseg4e32.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 4) @llvm.riscv.vlseg4.mask.triscv.vector.tuple_nxv4i8_4t.nxv1i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 4) undef, ptr %base, <vscale x 1 x i1> %mask, i64 %vl, i64 1, i64 5) - %1 = call <vscale x 1 x float> @llvm.riscv.tuple.extract.nxv1f32.triscv.vector.tuple_nxv4i8_4t(target("riscv.vector.tuple", <vscale x 4 x i8>, 4) %0, i32 1) - ret <vscale x 1 x float> %1 + ret target("riscv.vector.tuple", <vscale x 4 x i8>, 4) %0 } - - -define <vscale x 2 x float> @test_vlseg4_nxv2f32_triscv.vector.tuple_nxv8i8_4t(ptr %base, i64 %vl) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 4) @test_vlseg4_nxv2f32_triscv.vector.tuple_nxv8i8_4t(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg4_nxv2f32_triscv.vector.tuple_nxv8i8_4t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma -; CHECK-NEXT: vlseg4e32.v v7, (a0) +; CHECK-NEXT: vlseg4e32.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 4) @llvm.riscv.vlseg4.triscv.vector.tuple_nxv8i8_4t(target("riscv.vector.tuple", <vscale x 8 x i8>, 4) undef, ptr %base, i64 %vl, i64 5) - %1 = call <vscale x 2 x float> @llvm.riscv.tuple.extract.nxv2f32.triscv.vector.tuple_nxv8i8_4t(target("riscv.vector.tuple", <vscale x 8 x i8>, 4) %0, i32 1) - ret <vscale x 2 x float> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 4) %0 } - -define <vscale x 2 x float> @test_vlseg4_mask_nxv2f32_triscv.vector.tuple_nxv8i8_4t(ptr %base, i64 %vl, <vscale x 2 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 4) @test_vlseg4_mask_nxv2f32_triscv.vector.tuple_nxv8i8_4t(ptr %base, i64 %vl, <vscale x 2 x i1> %mask) { ; CHECK-LABEL: test_vlseg4_mask_nxv2f32_triscv.vector.tuple_nxv8i8_4t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma -; CHECK-NEXT: vlseg4e32.v v7, (a0), v0.t +; CHECK-NEXT: vlseg4e32.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 4) @llvm.riscv.vlseg4.mask.triscv.vector.tuple_nxv8i8_4t.nxv2i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 4) undef, ptr %base, <vscale x 2 x i1> %mask, i64 %vl, i64 1, i64 5) - %1 = call <vscale x 2 x float> @llvm.riscv.tuple.extract.nxv2f32.triscv.vector.tuple_nxv8i8_4t(target("riscv.vector.tuple", <vscale x 8 x i8>, 4) %0, i32 1) - ret <vscale x 2 x float> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 4) %0 } - - -define <vscale x 4 x float> @test_vlseg4_nxv4f32_triscv.vector.tuple_nxv16i8_4t(ptr %base, i64 %vl) { +define target("riscv.vector.tuple", <vscale x 16 x i8>, 4) @test_vlseg4_nxv4f32_triscv.vector.tuple_nxv16i8_4t(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg4_nxv4f32_triscv.vector.tuple_nxv16i8_4t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma -; CHECK-NEXT: vlseg4e32.v v6, (a0) +; CHECK-NEXT: vlseg4e32.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 4) @llvm.riscv.vlseg4.triscv.vector.tuple_nxv16i8_4t(target("riscv.vector.tuple", <vscale x 16 x i8>, 4) undef, ptr %base, i64 %vl, i64 5) - %1 = call <vscale x 4 x float> @llvm.riscv.tuple.extract.nxv4f32.triscv.vector.tuple_nxv16i8_4t(target("riscv.vector.tuple", <vscale x 16 x i8>, 4) %0, i32 1) - ret <vscale x 4 x float> %1 + ret target("riscv.vector.tuple", <vscale x 16 x i8>, 4) %0 } - -define <vscale x 4 x float> @test_vlseg4_mask_nxv4f32_triscv.vector.tuple_nxv16i8_4t(ptr %base, i64 %vl, <vscale x 4 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 16 x i8>, 4) @test_vlseg4_mask_nxv4f32_triscv.vector.tuple_nxv16i8_4t(ptr %base, i64 %vl, <vscale x 4 x i1> %mask) { ; CHECK-LABEL: test_vlseg4_mask_nxv4f32_triscv.vector.tuple_nxv16i8_4t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e32, m2, ta, ma -; CHECK-NEXT: vlseg4e32.v v6, (a0), v0.t +; CHECK-NEXT: vlseg4e32.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 4) @llvm.riscv.vlseg4.mask.triscv.vector.tuple_nxv16i8_4t.nxv4i1(target("riscv.vector.tuple", <vscale x 16 x i8>, 4) undef, ptr %base, <vscale x 4 x i1> %mask, i64 %vl, i64 1, i64 5) - %1 = call <vscale x 4 x float> @llvm.riscv.tuple.extract.nxv4f32.triscv.vector.tuple_nxv16i8_4t(target("riscv.vector.tuple", <vscale x 16 x i8>, 4) %0, i32 1) - ret <vscale x 4 x float> %1 + ret target("riscv.vector.tuple", <vscale x 16 x i8>, 4) %0 } - - -define <vscale x 1 x float> @test_vlseg5_nxv1f32_triscv.vector.tuple_nxv4i8_5t(ptr %base, i64 %vl) { +define target("riscv.vector.tuple", <vscale x 4 x i8>, 5) @test_vlseg5_nxv1f32_triscv.vector.tuple_nxv4i8_5t(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg5_nxv1f32_triscv.vector.tuple_nxv4i8_5t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma -; CHECK-NEXT: vlseg5e32.v v7, (a0) +; CHECK-NEXT: vlseg5e32.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 5) @llvm.riscv.vlseg5.triscv.vector.tuple_nxv4i8_5t(target("riscv.vector.tuple", <vscale x 4 x i8>, 5) undef, ptr %base, i64 %vl, i64 5) - %1 = call <vscale x 1 x float> @llvm.riscv.tuple.extract.nxv1f32.triscv.vector.tuple_nxv4i8_5t(target("riscv.vector.tuple", <vscale x 4 x i8>, 5) %0, i32 1) - ret <vscale x 1 x float> %1 + ret target("riscv.vector.tuple", <vscale x 4 x i8>, 5) %0 } - -define <vscale x 1 x float> @test_vlseg5_mask_nxv1f32_triscv.vector.tuple_nxv4i8_5t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 4 x i8>, 5) @test_vlseg5_mask_nxv1f32_triscv.vector.tuple_nxv4i8_5t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) { ; CHECK-LABEL: test_vlseg5_mask_nxv1f32_triscv.vector.tuple_nxv4i8_5t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma -; CHECK-NEXT: vlseg5e32.v v7, (a0), v0.t +; CHECK-NEXT: vlseg5e32.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 5) @llvm.riscv.vlseg5.mask.triscv.vector.tuple_nxv4i8_5t.nxv1i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 5) undef, ptr %base, <vscale x 1 x i1> %mask, i64 %vl, i64 1, i64 5) - %1 = call <vscale x 1 x float> @llvm.riscv.tuple.extract.nxv1f32.triscv.vector.tuple_nxv4i8_5t(target("riscv.vector.tuple", <vscale x 4 x i8>, 5) %0, i32 1) - ret <vscale x 1 x float> %1 + ret target("riscv.vector.tuple", <vscale x 4 x i8>, 5) %0 } - - -define <vscale x 2 x float> @test_vlseg5_nxv2f32_triscv.vector.tuple_nxv8i8_5t(ptr %base, i64 %vl) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 5) @test_vlseg5_nxv2f32_triscv.vector.tuple_nxv8i8_5t(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg5_nxv2f32_triscv.vector.tuple_nxv8i8_5t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma -; CHECK-NEXT: vlseg5e32.v v7, (a0) +; CHECK-NEXT: vlseg5e32.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 5) @llvm.riscv.vlseg5.triscv.vector.tuple_nxv8i8_5t(target("riscv.vector.tuple", <vscale x 8 x i8>, 5) undef, ptr %base, i64 %vl, i64 5) - %1 = call <vscale x 2 x float> @llvm.riscv.tuple.extract.nxv2f32.triscv.vector.tuple_nxv8i8_5t(target("riscv.vector.tuple", <vscale x 8 x i8>, 5) %0, i32 1) - ret <vscale x 2 x float> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 5) %0 } - -define <vscale x 2 x float> @test_vlseg5_mask_nxv2f32_triscv.vector.tuple_nxv8i8_5t(ptr %base, i64 %vl, <vscale x 2 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 5) @test_vlseg5_mask_nxv2f32_triscv.vector.tuple_nxv8i8_5t(ptr %base, i64 %vl, <vscale x 2 x i1> %mask) { ; CHECK-LABEL: test_vlseg5_mask_nxv2f32_triscv.vector.tuple_nxv8i8_5t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma -; CHECK-NEXT: vlseg5e32.v v7, (a0), v0.t +; CHECK-NEXT: vlseg5e32.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 5) @llvm.riscv.vlseg5.mask.triscv.vector.tuple_nxv8i8_5t.nxv2i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 5) undef, ptr %base, <vscale x 2 x i1> %mask, i64 %vl, i64 1, i64 5) - %1 = call <vscale x 2 x float> @llvm.riscv.tuple.extract.nxv2f32.triscv.vector.tuple_nxv8i8_5t(target("riscv.vector.tuple", <vscale x 8 x i8>, 5) %0, i32 1) - ret <vscale x 2 x float> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 5) %0 } - - -define <vscale x 1 x float> @test_vlseg6_nxv1f32_triscv.vector.tuple_nxv4i8_6t(ptr %base, i64 %vl) { +define target("riscv.vector.tuple", <vscale x 4 x i8>, 6) @test_vlseg6_nxv1f32_triscv.vector.tuple_nxv4i8_6t(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg6_nxv1f32_triscv.vector.tuple_nxv4i8_6t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma -; CHECK-NEXT: vlseg6e32.v v7, (a0) +; CHECK-NEXT: vlseg6e32.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 6) @llvm.riscv.vlseg6.triscv.vector.tuple_nxv4i8_6t(target("riscv.vector.tuple", <vscale x 4 x i8>, 6) undef, ptr %base, i64 %vl, i64 5) - %1 = call <vscale x 1 x float> @llvm.riscv.tuple.extract.nxv1f32.triscv.vector.tuple_nxv4i8_6t(target("riscv.vector.tuple", <vscale x 4 x i8>, 6) %0, i32 1) - ret <vscale x 1 x float> %1 + ret target("riscv.vector.tuple", <vscale x 4 x i8>, 6) %0 } - -define <vscale x 1 x float> @test_vlseg6_mask_nxv1f32_triscv.vector.tuple_nxv4i8_6t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 4 x i8>, 6) @test_vlseg6_mask_nxv1f32_triscv.vector.tuple_nxv4i8_6t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) { ; CHECK-LABEL: test_vlseg6_mask_nxv1f32_triscv.vector.tuple_nxv4i8_6t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma -; CHECK-NEXT: vlseg6e32.v v7, (a0), v0.t +; CHECK-NEXT: vlseg6e32.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 6) @llvm.riscv.vlseg6.mask.triscv.vector.tuple_nxv4i8_6t.nxv1i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 6) undef, ptr %base, <vscale x 1 x i1> %mask, i64 %vl, i64 1, i64 5) - %1 = call <vscale x 1 x float> @llvm.riscv.tuple.extract.nxv1f32.triscv.vector.tuple_nxv4i8_6t(target("riscv.vector.tuple", <vscale x 4 x i8>, 6) %0, i32 1) - ret <vscale x 1 x float> %1 + ret target("riscv.vector.tuple", <vscale x 4 x i8>, 6) %0 } - - -define <vscale x 2 x float> @test_vlseg6_nxv2f32_triscv.vector.tuple_nxv8i8_6t(ptr %base, i64 %vl) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 6) @test_vlseg6_nxv2f32_triscv.vector.tuple_nxv8i8_6t(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg6_nxv2f32_triscv.vector.tuple_nxv8i8_6t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma -; CHECK-NEXT: vlseg6e32.v v7, (a0) +; CHECK-NEXT: vlseg6e32.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 6) @llvm.riscv.vlseg6.triscv.vector.tuple_nxv8i8_6t(target("riscv.vector.tuple", <vscale x 8 x i8>, 6) undef, ptr %base, i64 %vl, i64 5) - %1 = call <vscale x 2 x float> @llvm.riscv.tuple.extract.nxv2f32.triscv.vector.tuple_nxv8i8_6t(target("riscv.vector.tuple", <vscale x 8 x i8>, 6) %0, i32 1) - ret <vscale x 2 x float> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 6) %0 } - -define <vscale x 2 x float> @test_vlseg6_mask_nxv2f32_triscv.vector.tuple_nxv8i8_6t(ptr %base, i64 %vl, <vscale x 2 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 6) @test_vlseg6_mask_nxv2f32_triscv.vector.tuple_nxv8i8_6t(ptr %base, i64 %vl, <vscale x 2 x i1> %mask) { ; CHECK-LABEL: test_vlseg6_mask_nxv2f32_triscv.vector.tuple_nxv8i8_6t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma -; CHECK-NEXT: vlseg6e32.v v7, (a0), v0.t +; CHECK-NEXT: vlseg6e32.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 6) @llvm.riscv.vlseg6.mask.triscv.vector.tuple_nxv8i8_6t.nxv2i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 6) undef, ptr %base, <vscale x 2 x i1> %mask, i64 %vl, i64 1, i64 5) - %1 = call <vscale x 2 x float> @llvm.riscv.tuple.extract.nxv2f32.triscv.vector.tuple_nxv8i8_6t(target("riscv.vector.tuple", <vscale x 8 x i8>, 6) %0, i32 1) - ret <vscale x 2 x float> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 6) %0 } - - -define <vscale x 1 x float> @test_vlseg7_nxv1f32_triscv.vector.tuple_nxv4i8_7t(ptr %base, i64 %vl) { +define target("riscv.vector.tuple", <vscale x 4 x i8>, 7) @test_vlseg7_nxv1f32_triscv.vector.tuple_nxv4i8_7t(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg7_nxv1f32_triscv.vector.tuple_nxv4i8_7t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma -; CHECK-NEXT: vlseg7e32.v v7, (a0) +; CHECK-NEXT: vlseg7e32.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 7) @llvm.riscv.vlseg7.triscv.vector.tuple_nxv4i8_7t(target("riscv.vector.tuple", <vscale x 4 x i8>, 7) undef, ptr %base, i64 %vl, i64 5) - %1 = call <vscale x 1 x float> @llvm.riscv.tuple.extract.nxv1f32.triscv.vector.tuple_nxv4i8_7t(target("riscv.vector.tuple", <vscale x 4 x i8>, 7) %0, i32 1) - ret <vscale x 1 x float> %1 + ret target("riscv.vector.tuple", <vscale x 4 x i8>, 7) %0 } - -define <vscale x 1 x float> @test_vlseg7_mask_nxv1f32_triscv.vector.tuple_nxv4i8_7t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 4 x i8>, 7) @test_vlseg7_mask_nxv1f32_triscv.vector.tuple_nxv4i8_7t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) { ; CHECK-LABEL: test_vlseg7_mask_nxv1f32_triscv.vector.tuple_nxv4i8_7t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma -; CHECK-NEXT: vlseg7e32.v v7, (a0), v0.t +; CHECK-NEXT: vlseg7e32.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 7) @llvm.riscv.vlseg7.mask.triscv.vector.tuple_nxv4i8_7t.nxv1i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 7) undef, ptr %base, <vscale x 1 x i1> %mask, i64 %vl, i64 1, i64 5) - %1 = call <vscale x 1 x float> @llvm.riscv.tuple.extract.nxv1f32.triscv.vector.tuple_nxv4i8_7t(target("riscv.vector.tuple", <vscale x 4 x i8>, 7) %0, i32 1) - ret <vscale x 1 x float> %1 + ret target("riscv.vector.tuple", <vscale x 4 x i8>, 7) %0 } - - -define <vscale x 2 x float> @test_vlseg7_nxv2f32_triscv.vector.tuple_nxv8i8_7t(ptr %base, i64 %vl) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 7) @test_vlseg7_nxv2f32_triscv.vector.tuple_nxv8i8_7t(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg7_nxv2f32_triscv.vector.tuple_nxv8i8_7t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma -; CHECK-NEXT: vlseg7e32.v v7, (a0) +; CHECK-NEXT: vlseg7e32.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 7) @llvm.riscv.vlseg7.triscv.vector.tuple_nxv8i8_7t(target("riscv.vector.tuple", <vscale x 8 x i8>, 7) undef, ptr %base, i64 %vl, i64 5) - %1 = call <vscale x 2 x float> @llvm.riscv.tuple.extract.nxv2f32.triscv.vector.tuple_nxv8i8_7t(target("riscv.vector.tuple", <vscale x 8 x i8>, 7) %0, i32 1) - ret <vscale x 2 x float> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 7) %0 } - -define <vscale x 2 x float> @test_vlseg7_mask_nxv2f32_triscv.vector.tuple_nxv8i8_7t(ptr %base, i64 %vl, <vscale x 2 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 7) @test_vlseg7_mask_nxv2f32_triscv.vector.tuple_nxv8i8_7t(ptr %base, i64 %vl, <vscale x 2 x i1> %mask) { ; CHECK-LABEL: test_vlseg7_mask_nxv2f32_triscv.vector.tuple_nxv8i8_7t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma -; CHECK-NEXT: vlseg7e32.v v7, (a0), v0.t +; CHECK-NEXT: vlseg7e32.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 7) @llvm.riscv.vlseg7.mask.triscv.vector.tuple_nxv8i8_7t.nxv2i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 7) undef, ptr %base, <vscale x 2 x i1> %mask, i64 %vl, i64 1, i64 5) - %1 = call <vscale x 2 x float> @llvm.riscv.tuple.extract.nxv2f32.triscv.vector.tuple_nxv8i8_7t(target("riscv.vector.tuple", <vscale x 8 x i8>, 7) %0, i32 1) - ret <vscale x 2 x float> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 7) %0 } - - -define <vscale x 1 x float> @test_vlseg8_nxv1f32_triscv.vector.tuple_nxv4i8_8t(ptr %base, i64 %vl) { +define target("riscv.vector.tuple", <vscale x 4 x i8>, 8) @test_vlseg8_nxv1f32_triscv.vector.tuple_nxv4i8_8t(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg8_nxv1f32_triscv.vector.tuple_nxv4i8_8t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma -; CHECK-NEXT: vlseg8e32.v v7, (a0) +; CHECK-NEXT: vlseg8e32.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 8) @llvm.riscv.vlseg8.triscv.vector.tuple_nxv4i8_8t(target("riscv.vector.tuple", <vscale x 4 x i8>, 8) undef, ptr %base, i64 %vl, i64 5) - %1 = call <vscale x 1 x float> @llvm.riscv.tuple.extract.nxv1f32.triscv.vector.tuple_nxv4i8_8t(target("riscv.vector.tuple", <vscale x 4 x i8>, 8) %0, i32 1) - ret <vscale x 1 x float> %1 + ret target("riscv.vector.tuple", <vscale x 4 x i8>, 8) %0 } - -define <vscale x 1 x float> @test_vlseg8_mask_nxv1f32_triscv.vector.tuple_nxv4i8_8t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 4 x i8>, 8) @test_vlseg8_mask_nxv1f32_triscv.vector.tuple_nxv4i8_8t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) { ; CHECK-LABEL: test_vlseg8_mask_nxv1f32_triscv.vector.tuple_nxv4i8_8t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e32, mf2, ta, ma -; CHECK-NEXT: vlseg8e32.v v7, (a0), v0.t +; CHECK-NEXT: vlseg8e32.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 8) @llvm.riscv.vlseg8.mask.triscv.vector.tuple_nxv4i8_8t.nxv1i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 8) undef, ptr %base, <vscale x 1 x i1> %mask, i64 %vl, i64 1, i64 5) - %1 = call <vscale x 1 x float> @llvm.riscv.tuple.extract.nxv1f32.triscv.vector.tuple_nxv4i8_8t(target("riscv.vector.tuple", <vscale x 4 x i8>, 8) %0, i32 1) - ret <vscale x 1 x float> %1 + ret target("riscv.vector.tuple", <vscale x 4 x i8>, 8) %0 } - - -define <vscale x 2 x float> @test_vlseg8_nxv2f32_triscv.vector.tuple_nxv8i8_8t(ptr %base, i64 %vl) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 8) @test_vlseg8_nxv2f32_triscv.vector.tuple_nxv8i8_8t(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg8_nxv2f32_triscv.vector.tuple_nxv8i8_8t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma -; CHECK-NEXT: vlseg8e32.v v7, (a0) +; CHECK-NEXT: vlseg8e32.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 8) @llvm.riscv.vlseg8.triscv.vector.tuple_nxv8i8_8t(target("riscv.vector.tuple", <vscale x 8 x i8>, 8) undef, ptr %base, i64 %vl, i64 5) - %1 = call <vscale x 2 x float> @llvm.riscv.tuple.extract.nxv2f32.triscv.vector.tuple_nxv8i8_8t(target("riscv.vector.tuple", <vscale x 8 x i8>, 8) %0, i32 1) - ret <vscale x 2 x float> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 8) %0 } - -define <vscale x 2 x float> @test_vlseg8_mask_nxv2f32_triscv.vector.tuple_nxv8i8_8t(ptr %base, i64 %vl, <vscale x 2 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 8) @test_vlseg8_mask_nxv2f32_triscv.vector.tuple_nxv8i8_8t(ptr %base, i64 %vl, <vscale x 2 x i1> %mask) { ; CHECK-LABEL: test_vlseg8_mask_nxv2f32_triscv.vector.tuple_nxv8i8_8t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e32, m1, ta, ma -; CHECK-NEXT: vlseg8e32.v v7, (a0), v0.t +; CHECK-NEXT: vlseg8e32.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 8) @llvm.riscv.vlseg8.mask.triscv.vector.tuple_nxv8i8_8t.nxv2i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 8) undef, ptr %base, <vscale x 2 x i1> %mask, i64 %vl, i64 1, i64 5) - %1 = call <vscale x 2 x float> @llvm.riscv.tuple.extract.nxv2f32.triscv.vector.tuple_nxv8i8_8t(target("riscv.vector.tuple", <vscale x 8 x i8>, 8) %0, i32 1) - ret <vscale x 2 x float> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 8) %0 } - - -define <vscale x 1 x double> @test_vlseg2_nxv1f64_triscv.vector.tuple_nxv8i8_2t(ptr %base, i64 %vl) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 2) @test_vlseg2_nxv1f64_triscv.vector.tuple_nxv8i8_2t(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg2_nxv1f64_triscv.vector.tuple_nxv8i8_2t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, ma -; CHECK-NEXT: vlseg2e64.v v7, (a0) +; CHECK-NEXT: vlseg2e64.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 2) @llvm.riscv.vlseg2.triscv.vector.tuple_nxv8i8_2t(target("riscv.vector.tuple", <vscale x 8 x i8>, 2) undef, ptr %base, i64 %vl, i64 6) - %1 = call <vscale x 1 x double> @llvm.riscv.tuple.extract.nxv1f64.triscv.vector.tuple_nxv8i8_2t(target("riscv.vector.tuple", <vscale x 8 x i8>, 2) %0, i32 1) - ret <vscale x 1 x double> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 2) %0 } - -define <vscale x 1 x double> @test_vlseg2_mask_nxv1f64_triscv.vector.tuple_nxv8i8_2t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 2) @test_vlseg2_mask_nxv1f64_triscv.vector.tuple_nxv8i8_2t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) { ; CHECK-LABEL: test_vlseg2_mask_nxv1f64_triscv.vector.tuple_nxv8i8_2t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, ma -; CHECK-NEXT: vlseg2e64.v v7, (a0), v0.t +; CHECK-NEXT: vlseg2e64.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 2) @llvm.riscv.vlseg2.mask.triscv.vector.tuple_nxv8i8_2t.nxv1i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 2) undef, ptr %base, <vscale x 1 x i1> %mask, i64 %vl, i64 1, i64 6) - %1 = call <vscale x 1 x double> @llvm.riscv.tuple.extract.nxv1f64.triscv.vector.tuple_nxv8i8_2t(target("riscv.vector.tuple", <vscale x 8 x i8>, 2) %0, i32 1) - ret <vscale x 1 x double> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 2) %0 } - - -define <vscale x 2 x double> @test_vlseg2_nxv2f64_triscv.vector.tuple_nxv16i8_2t(ptr %base, i64 %vl) { +define target("riscv.vector.tuple", <vscale x 16 x i8>, 2) @test_vlseg2_nxv2f64_triscv.vector.tuple_nxv16i8_2t(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg2_nxv2f64_triscv.vector.tuple_nxv16i8_2t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, ma -; CHECK-NEXT: vlseg2e64.v v6, (a0) +; CHECK-NEXT: vlseg2e64.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 2) @llvm.riscv.vlseg2.triscv.vector.tuple_nxv16i8_2t(target("riscv.vector.tuple", <vscale x 16 x i8>, 2) undef, ptr %base, i64 %vl, i64 6) - %1 = call <vscale x 2 x double> @llvm.riscv.tuple.extract.nxv2f64.triscv.vector.tuple_nxv16i8_2t(target("riscv.vector.tuple", <vscale x 16 x i8>, 2) %0, i32 1) - ret <vscale x 2 x double> %1 + ret target("riscv.vector.tuple", <vscale x 16 x i8>, 2) %0 } - -define <vscale x 2 x double> @test_vlseg2_mask_nxv2f64_triscv.vector.tuple_nxv16i8_2t(ptr %base, i64 %vl, <vscale x 2 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 16 x i8>, 2) @test_vlseg2_mask_nxv2f64_triscv.vector.tuple_nxv16i8_2t(ptr %base, i64 %vl, <vscale x 2 x i1> %mask) { ; CHECK-LABEL: test_vlseg2_mask_nxv2f64_triscv.vector.tuple_nxv16i8_2t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, ma -; CHECK-NEXT: vlseg2e64.v v6, (a0), v0.t +; CHECK-NEXT: vlseg2e64.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 2) @llvm.riscv.vlseg2.mask.triscv.vector.tuple_nxv16i8_2t.nxv2i1(target("riscv.vector.tuple", <vscale x 16 x i8>, 2) undef, ptr %base, <vscale x 2 x i1> %mask, i64 %vl, i64 1, i64 6) - %1 = call <vscale x 2 x double> @llvm.riscv.tuple.extract.nxv2f64.triscv.vector.tuple_nxv16i8_2t(target("riscv.vector.tuple", <vscale x 16 x i8>, 2) %0, i32 1) - ret <vscale x 2 x double> %1 + ret target("riscv.vector.tuple", <vscale x 16 x i8>, 2) %0 } - - -define <vscale x 4 x double> @test_vlseg2_nxv4f64_triscv.vector.tuple_nxv32i8_2t(ptr %base, i64 %vl) { +define target("riscv.vector.tuple", <vscale x 32 x i8>, 2) @test_vlseg2_nxv4f64_triscv.vector.tuple_nxv32i8_2t(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg2_nxv4f64_triscv.vector.tuple_nxv32i8_2t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e64, m4, ta, ma -; CHECK-NEXT: vlseg2e64.v v4, (a0) +; CHECK-NEXT: vlseg2e64.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 32 x i8>, 2) @llvm.riscv.vlseg2.triscv.vector.tuple_nxv32i8_2t(target("riscv.vector.tuple", <vscale x 32 x i8>, 2) undef, ptr %base, i64 %vl, i64 6) - %1 = call <vscale x 4 x double> @llvm.riscv.tuple.extract.nxv4f64.triscv.vector.tuple_nxv32i8_2t(target("riscv.vector.tuple", <vscale x 32 x i8>, 2) %0, i32 1) - ret <vscale x 4 x double> %1 + ret target("riscv.vector.tuple", <vscale x 32 x i8>, 2) %0 } - -define <vscale x 4 x double> @test_vlseg2_mask_nxv4f64_triscv.vector.tuple_nxv32i8_2t(ptr %base, i64 %vl, <vscale x 4 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 32 x i8>, 2) @test_vlseg2_mask_nxv4f64_triscv.vector.tuple_nxv32i8_2t(ptr %base, i64 %vl, <vscale x 4 x i1> %mask) { ; CHECK-LABEL: test_vlseg2_mask_nxv4f64_triscv.vector.tuple_nxv32i8_2t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e64, m4, ta, ma -; CHECK-NEXT: vlseg2e64.v v4, (a0), v0.t +; CHECK-NEXT: vlseg2e64.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 32 x i8>, 2) @llvm.riscv.vlseg2.mask.triscv.vector.tuple_nxv32i8_2t.nxv4i1(target("riscv.vector.tuple", <vscale x 32 x i8>, 2) undef, ptr %base, <vscale x 4 x i1> %mask, i64 %vl, i64 1, i64 6) - %1 = call <vscale x 4 x double> @llvm.riscv.tuple.extract.nxv4f64.triscv.vector.tuple_nxv32i8_2t(target("riscv.vector.tuple", <vscale x 32 x i8>, 2) %0, i32 1) - ret <vscale x 4 x double> %1 + ret target("riscv.vector.tuple", <vscale x 32 x i8>, 2) %0 } - - -define <vscale x 1 x double> @test_vlseg3_nxv1f64_triscv.vector.tuple_nxv8i8_3t(ptr %base, i64 %vl) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 3) @test_vlseg3_nxv1f64_triscv.vector.tuple_nxv8i8_3t(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg3_nxv1f64_triscv.vector.tuple_nxv8i8_3t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, ma -; CHECK-NEXT: vlseg3e64.v v7, (a0) +; CHECK-NEXT: vlseg3e64.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 3) @llvm.riscv.vlseg3.triscv.vector.tuple_nxv8i8_3t(target("riscv.vector.tuple", <vscale x 8 x i8>, 3) undef, ptr %base, i64 %vl, i64 6) - %1 = call <vscale x 1 x double> @llvm.riscv.tuple.extract.nxv1f64.triscv.vector.tuple_nxv8i8_3t(target("riscv.vector.tuple", <vscale x 8 x i8>, 3) %0, i32 1) - ret <vscale x 1 x double> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 3) %0 } - -define <vscale x 1 x double> @test_vlseg3_mask_nxv1f64_triscv.vector.tuple_nxv8i8_3t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 3) @test_vlseg3_mask_nxv1f64_triscv.vector.tuple_nxv8i8_3t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) { ; CHECK-LABEL: test_vlseg3_mask_nxv1f64_triscv.vector.tuple_nxv8i8_3t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, ma -; CHECK-NEXT: vlseg3e64.v v7, (a0), v0.t +; CHECK-NEXT: vlseg3e64.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 3) @llvm.riscv.vlseg3.mask.triscv.vector.tuple_nxv8i8_3t.nxv1i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 3) undef, ptr %base, <vscale x 1 x i1> %mask, i64 %vl, i64 1, i64 6) - %1 = call <vscale x 1 x double> @llvm.riscv.tuple.extract.nxv1f64.triscv.vector.tuple_nxv8i8_3t(target("riscv.vector.tuple", <vscale x 8 x i8>, 3) %0, i32 1) - ret <vscale x 1 x double> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 3) %0 } - - -define <vscale x 2 x double> @test_vlseg3_nxv2f64_triscv.vector.tuple_nxv16i8_3t(ptr %base, i64 %vl) { +define target("riscv.vector.tuple", <vscale x 16 x i8>, 3) @test_vlseg3_nxv2f64_triscv.vector.tuple_nxv16i8_3t(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg3_nxv2f64_triscv.vector.tuple_nxv16i8_3t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, ma -; CHECK-NEXT: vlseg3e64.v v6, (a0) +; CHECK-NEXT: vlseg3e64.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 3) @llvm.riscv.vlseg3.triscv.vector.tuple_nxv16i8_3t(target("riscv.vector.tuple", <vscale x 16 x i8>, 3) undef, ptr %base, i64 %vl, i64 6) - %1 = call <vscale x 2 x double> @llvm.riscv.tuple.extract.nxv2f64.triscv.vector.tuple_nxv16i8_3t(target("riscv.vector.tuple", <vscale x 16 x i8>, 3) %0, i32 1) - ret <vscale x 2 x double> %1 + ret target("riscv.vector.tuple", <vscale x 16 x i8>, 3) %0 } - -define <vscale x 2 x double> @test_vlseg3_mask_nxv2f64_triscv.vector.tuple_nxv16i8_3t(ptr %base, i64 %vl, <vscale x 2 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 16 x i8>, 3) @test_vlseg3_mask_nxv2f64_triscv.vector.tuple_nxv16i8_3t(ptr %base, i64 %vl, <vscale x 2 x i1> %mask) { ; CHECK-LABEL: test_vlseg3_mask_nxv2f64_triscv.vector.tuple_nxv16i8_3t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, ma -; CHECK-NEXT: vlseg3e64.v v6, (a0), v0.t +; CHECK-NEXT: vlseg3e64.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 3) @llvm.riscv.vlseg3.mask.triscv.vector.tuple_nxv16i8_3t.nxv2i1(target("riscv.vector.tuple", <vscale x 16 x i8>, 3) undef, ptr %base, <vscale x 2 x i1> %mask, i64 %vl, i64 1, i64 6) - %1 = call <vscale x 2 x double> @llvm.riscv.tuple.extract.nxv2f64.triscv.vector.tuple_nxv16i8_3t(target("riscv.vector.tuple", <vscale x 16 x i8>, 3) %0, i32 1) - ret <vscale x 2 x double> %1 + ret target("riscv.vector.tuple", <vscale x 16 x i8>, 3) %0 } - - -define <vscale x 1 x double> @test_vlseg4_nxv1f64_triscv.vector.tuple_nxv8i8_4t(ptr %base, i64 %vl) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 4) @test_vlseg4_nxv1f64_triscv.vector.tuple_nxv8i8_4t(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg4_nxv1f64_triscv.vector.tuple_nxv8i8_4t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, ma -; CHECK-NEXT: vlseg4e64.v v7, (a0) +; CHECK-NEXT: vlseg4e64.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 4) @llvm.riscv.vlseg4.triscv.vector.tuple_nxv8i8_4t(target("riscv.vector.tuple", <vscale x 8 x i8>, 4) undef, ptr %base, i64 %vl, i64 6) - %1 = call <vscale x 1 x double> @llvm.riscv.tuple.extract.nxv1f64.triscv.vector.tuple_nxv8i8_4t(target("riscv.vector.tuple", <vscale x 8 x i8>, 4) %0, i32 1) - ret <vscale x 1 x double> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 4) %0 } - -define <vscale x 1 x double> @test_vlseg4_mask_nxv1f64_triscv.vector.tuple_nxv8i8_4t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 4) @test_vlseg4_mask_nxv1f64_triscv.vector.tuple_nxv8i8_4t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) { ; CHECK-LABEL: test_vlseg4_mask_nxv1f64_triscv.vector.tuple_nxv8i8_4t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, ma -; CHECK-NEXT: vlseg4e64.v v7, (a0), v0.t +; CHECK-NEXT: vlseg4e64.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 4) @llvm.riscv.vlseg4.mask.triscv.vector.tuple_nxv8i8_4t.nxv1i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 4) undef, ptr %base, <vscale x 1 x i1> %mask, i64 %vl, i64 1, i64 6) - %1 = call <vscale x 1 x double> @llvm.riscv.tuple.extract.nxv1f64.triscv.vector.tuple_nxv8i8_4t(target("riscv.vector.tuple", <vscale x 8 x i8>, 4) %0, i32 1) - ret <vscale x 1 x double> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 4) %0 } - - -define <vscale x 2 x double> @test_vlseg4_nxv2f64_triscv.vector.tuple_nxv16i8_4t(ptr %base, i64 %vl) { +define target("riscv.vector.tuple", <vscale x 16 x i8>, 4) @test_vlseg4_nxv2f64_triscv.vector.tuple_nxv16i8_4t(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg4_nxv2f64_triscv.vector.tuple_nxv16i8_4t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, ma -; CHECK-NEXT: vlseg4e64.v v6, (a0) +; CHECK-NEXT: vlseg4e64.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 4) @llvm.riscv.vlseg4.triscv.vector.tuple_nxv16i8_4t(target("riscv.vector.tuple", <vscale x 16 x i8>, 4) undef, ptr %base, i64 %vl, i64 6) - %1 = call <vscale x 2 x double> @llvm.riscv.tuple.extract.nxv2f64.triscv.vector.tuple_nxv16i8_4t(target("riscv.vector.tuple", <vscale x 16 x i8>, 4) %0, i32 1) - ret <vscale x 2 x double> %1 + ret target("riscv.vector.tuple", <vscale x 16 x i8>, 4) %0 } - -define <vscale x 2 x double> @test_vlseg4_mask_nxv2f64_triscv.vector.tuple_nxv16i8_4t(ptr %base, i64 %vl, <vscale x 2 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 16 x i8>, 4) @test_vlseg4_mask_nxv2f64_triscv.vector.tuple_nxv16i8_4t(ptr %base, i64 %vl, <vscale x 2 x i1> %mask) { ; CHECK-LABEL: test_vlseg4_mask_nxv2f64_triscv.vector.tuple_nxv16i8_4t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e64, m2, ta, ma -; CHECK-NEXT: vlseg4e64.v v6, (a0), v0.t +; CHECK-NEXT: vlseg4e64.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 4) @llvm.riscv.vlseg4.mask.triscv.vector.tuple_nxv16i8_4t.nxv2i1(target("riscv.vector.tuple", <vscale x 16 x i8>, 4) undef, ptr %base, <vscale x 2 x i1> %mask, i64 %vl, i64 1, i64 6) - %1 = call <vscale x 2 x double> @llvm.riscv.tuple.extract.nxv2f64.triscv.vector.tuple_nxv16i8_4t(target("riscv.vector.tuple", <vscale x 16 x i8>, 4) %0, i32 1) - ret <vscale x 2 x double> %1 + ret target("riscv.vector.tuple", <vscale x 16 x i8>, 4) %0 } - - -define <vscale x 1 x double> @test_vlseg5_nxv1f64_triscv.vector.tuple_nxv8i8_5t(ptr %base, i64 %vl) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 5) @test_vlseg5_nxv1f64_triscv.vector.tuple_nxv8i8_5t(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg5_nxv1f64_triscv.vector.tuple_nxv8i8_5t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, ma -; CHECK-NEXT: vlseg5e64.v v7, (a0) +; CHECK-NEXT: vlseg5e64.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 5) @llvm.riscv.vlseg5.triscv.vector.tuple_nxv8i8_5t(target("riscv.vector.tuple", <vscale x 8 x i8>, 5) undef, ptr %base, i64 %vl, i64 6) - %1 = call <vscale x 1 x double> @llvm.riscv.tuple.extract.nxv1f64.triscv.vector.tuple_nxv8i8_5t(target("riscv.vector.tuple", <vscale x 8 x i8>, 5) %0, i32 1) - ret <vscale x 1 x double> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 5) %0 } - -define <vscale x 1 x double> @test_vlseg5_mask_nxv1f64_triscv.vector.tuple_nxv8i8_5t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 5) @test_vlseg5_mask_nxv1f64_triscv.vector.tuple_nxv8i8_5t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) { ; CHECK-LABEL: test_vlseg5_mask_nxv1f64_triscv.vector.tuple_nxv8i8_5t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, ma -; CHECK-NEXT: vlseg5e64.v v7, (a0), v0.t +; CHECK-NEXT: vlseg5e64.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 5) @llvm.riscv.vlseg5.mask.triscv.vector.tuple_nxv8i8_5t.nxv1i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 5) undef, ptr %base, <vscale x 1 x i1> %mask, i64 %vl, i64 1, i64 6) - %1 = call <vscale x 1 x double> @llvm.riscv.tuple.extract.nxv1f64.triscv.vector.tuple_nxv8i8_5t(target("riscv.vector.tuple", <vscale x 8 x i8>, 5) %0, i32 1) - ret <vscale x 1 x double> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 5) %0 } - - -define <vscale x 1 x double> @test_vlseg6_nxv1f64_triscv.vector.tuple_nxv8i8_6t(ptr %base, i64 %vl) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 6) @test_vlseg6_nxv1f64_triscv.vector.tuple_nxv8i8_6t(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg6_nxv1f64_triscv.vector.tuple_nxv8i8_6t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, ma -; CHECK-NEXT: vlseg6e64.v v7, (a0) +; CHECK-NEXT: vlseg6e64.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 6) @llvm.riscv.vlseg6.triscv.vector.tuple_nxv8i8_6t(target("riscv.vector.tuple", <vscale x 8 x i8>, 6) undef, ptr %base, i64 %vl, i64 6) - %1 = call <vscale x 1 x double> @llvm.riscv.tuple.extract.nxv1f64.triscv.vector.tuple_nxv8i8_6t(target("riscv.vector.tuple", <vscale x 8 x i8>, 6) %0, i32 1) - ret <vscale x 1 x double> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 6) %0 } - -define <vscale x 1 x double> @test_vlseg6_mask_nxv1f64_triscv.vector.tuple_nxv8i8_6t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 6) @test_vlseg6_mask_nxv1f64_triscv.vector.tuple_nxv8i8_6t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) { ; CHECK-LABEL: test_vlseg6_mask_nxv1f64_triscv.vector.tuple_nxv8i8_6t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, ma -; CHECK-NEXT: vlseg6e64.v v7, (a0), v0.t +; CHECK-NEXT: vlseg6e64.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 6) @llvm.riscv.vlseg6.mask.triscv.vector.tuple_nxv8i8_6t.nxv1i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 6) undef, ptr %base, <vscale x 1 x i1> %mask, i64 %vl, i64 1, i64 6) - %1 = call <vscale x 1 x double> @llvm.riscv.tuple.extract.nxv1f64.triscv.vector.tuple_nxv8i8_6t(target("riscv.vector.tuple", <vscale x 8 x i8>, 6) %0, i32 1) - ret <vscale x 1 x double> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 6) %0 } - - -define <vscale x 1 x double> @test_vlseg7_nxv1f64_triscv.vector.tuple_nxv8i8_7t(ptr %base, i64 %vl) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 7) @test_vlseg7_nxv1f64_triscv.vector.tuple_nxv8i8_7t(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg7_nxv1f64_triscv.vector.tuple_nxv8i8_7t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, ma -; CHECK-NEXT: vlseg7e64.v v7, (a0) +; CHECK-NEXT: vlseg7e64.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 7) @llvm.riscv.vlseg7.triscv.vector.tuple_nxv8i8_7t(target("riscv.vector.tuple", <vscale x 8 x i8>, 7) undef, ptr %base, i64 %vl, i64 6) - %1 = call <vscale x 1 x double> @llvm.riscv.tuple.extract.nxv1f64.triscv.vector.tuple_nxv8i8_7t(target("riscv.vector.tuple", <vscale x 8 x i8>, 7) %0, i32 1) - ret <vscale x 1 x double> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 7) %0 } - -define <vscale x 1 x double> @test_vlseg7_mask_nxv1f64_triscv.vector.tuple_nxv8i8_7t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 7) @test_vlseg7_mask_nxv1f64_triscv.vector.tuple_nxv8i8_7t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) { ; CHECK-LABEL: test_vlseg7_mask_nxv1f64_triscv.vector.tuple_nxv8i8_7t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, ma -; CHECK-NEXT: vlseg7e64.v v7, (a0), v0.t +; CHECK-NEXT: vlseg7e64.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 7) @llvm.riscv.vlseg7.mask.triscv.vector.tuple_nxv8i8_7t.nxv1i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 7) undef, ptr %base, <vscale x 1 x i1> %mask, i64 %vl, i64 1, i64 6) - %1 = call <vscale x 1 x double> @llvm.riscv.tuple.extract.nxv1f64.triscv.vector.tuple_nxv8i8_7t(target("riscv.vector.tuple", <vscale x 8 x i8>, 7) %0, i32 1) - ret <vscale x 1 x double> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 7) %0 } - - -define <vscale x 1 x double> @test_vlseg8_nxv1f64_triscv.vector.tuple_nxv8i8_8t(ptr %base, i64 %vl) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 8) @test_vlseg8_nxv1f64_triscv.vector.tuple_nxv8i8_8t(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg8_nxv1f64_triscv.vector.tuple_nxv8i8_8t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, ma -; CHECK-NEXT: vlseg8e64.v v7, (a0) +; CHECK-NEXT: vlseg8e64.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 8) @llvm.riscv.vlseg8.triscv.vector.tuple_nxv8i8_8t(target("riscv.vector.tuple", <vscale x 8 x i8>, 8) undef, ptr %base, i64 %vl, i64 6) - %1 = call <vscale x 1 x double> @llvm.riscv.tuple.extract.nxv1f64.triscv.vector.tuple_nxv8i8_8t(target("riscv.vector.tuple", <vscale x 8 x i8>, 8) %0, i32 1) - ret <vscale x 1 x double> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 8) %0 } - -define <vscale x 1 x double> @test_vlseg8_mask_nxv1f64_triscv.vector.tuple_nxv8i8_8t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 8) @test_vlseg8_mask_nxv1f64_triscv.vector.tuple_nxv8i8_8t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) { ; CHECK-LABEL: test_vlseg8_mask_nxv1f64_triscv.vector.tuple_nxv8i8_8t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e64, m1, ta, ma -; CHECK-NEXT: vlseg8e64.v v7, (a0), v0.t +; CHECK-NEXT: vlseg8e64.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 8) @llvm.riscv.vlseg8.mask.triscv.vector.tuple_nxv8i8_8t.nxv1i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 8) undef, ptr %base, <vscale x 1 x i1> %mask, i64 %vl, i64 1, i64 6) - %1 = call <vscale x 1 x double> @llvm.riscv.tuple.extract.nxv1f64.triscv.vector.tuple_nxv8i8_8t(target("riscv.vector.tuple", <vscale x 8 x i8>, 8) %0, i32 1) - ret <vscale x 1 x double> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 8) %0 } - - -define <vscale x 1 x bfloat> @test_vlseg2_nxv1bf16_triscv.vector.tuple_nxv2i8_2t(ptr %base, i64 %vl) { +define target("riscv.vector.tuple", <vscale x 2 x i8>, 2) @test_vlseg2_nxv1bf16_triscv.vector.tuple_nxv2i8_2t(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg2_nxv1bf16_triscv.vector.tuple_nxv2i8_2t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma -; CHECK-NEXT: vlseg2e16.v v7, (a0) +; CHECK-NEXT: vlseg2e16.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 2 x i8>, 2) @llvm.riscv.vlseg2.triscv.vector.tuple_nxv2i8_2t(target("riscv.vector.tuple", <vscale x 2 x i8>, 2) undef, ptr %base, i64 %vl, i64 4) - %1 = call <vscale x 1 x bfloat> @llvm.riscv.tuple.extract.nxv1bf16.triscv.vector.tuple_nxv2i8_2t(target("riscv.vector.tuple", <vscale x 2 x i8>, 2) %0, i32 1) - ret <vscale x 1 x bfloat> %1 + ret target("riscv.vector.tuple", <vscale x 2 x i8>, 2) %0 } - -define <vscale x 1 x bfloat> @test_vlseg2_mask_nxv1bf16_triscv.vector.tuple_nxv2i8_2t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 2 x i8>, 2) @test_vlseg2_mask_nxv1bf16_triscv.vector.tuple_nxv2i8_2t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) { ; CHECK-LABEL: test_vlseg2_mask_nxv1bf16_triscv.vector.tuple_nxv2i8_2t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma -; CHECK-NEXT: vlseg2e16.v v7, (a0), v0.t +; CHECK-NEXT: vlseg2e16.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 2 x i8>, 2) @llvm.riscv.vlseg2.mask.triscv.vector.tuple_nxv2i8_2t.nxv1i1(target("riscv.vector.tuple", <vscale x 2 x i8>, 2) undef, ptr %base, <vscale x 1 x i1> %mask, i64 %vl, i64 1, i64 4) - %1 = call <vscale x 1 x bfloat> @llvm.riscv.tuple.extract.nxv1bf16.triscv.vector.tuple_nxv2i8_2t(target("riscv.vector.tuple", <vscale x 2 x i8>, 2) %0, i32 1) - ret <vscale x 1 x bfloat> %1 + ret target("riscv.vector.tuple", <vscale x 2 x i8>, 2) %0 } - - -define <vscale x 2 x bfloat> @test_vlseg2_nxv2bf16_triscv.vector.tuple_nxv4i8_2t(ptr %base, i64 %vl) { +define target("riscv.vector.tuple", <vscale x 4 x i8>, 2) @test_vlseg2_nxv2bf16_triscv.vector.tuple_nxv4i8_2t(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg2_nxv2bf16_triscv.vector.tuple_nxv4i8_2t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma -; CHECK-NEXT: vlseg2e16.v v7, (a0) +; CHECK-NEXT: vlseg2e16.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 2) @llvm.riscv.vlseg2.triscv.vector.tuple_nxv4i8_2t(target("riscv.vector.tuple", <vscale x 4 x i8>, 2) undef, ptr %base, i64 %vl, i64 4) - %1 = call <vscale x 2 x bfloat> @llvm.riscv.tuple.extract.nxv2bf16.triscv.vector.tuple_nxv4i8_2t(target("riscv.vector.tuple", <vscale x 4 x i8>, 2) %0, i32 1) - ret <vscale x 2 x bfloat> %1 + ret target("riscv.vector.tuple", <vscale x 4 x i8>, 2) %0 } - -define <vscale x 2 x bfloat> @test_vlseg2_mask_nxv2bf16_triscv.vector.tuple_nxv4i8_2t(ptr %base, i64 %vl, <vscale x 2 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 4 x i8>, 2) @test_vlseg2_mask_nxv2bf16_triscv.vector.tuple_nxv4i8_2t(ptr %base, i64 %vl, <vscale x 2 x i1> %mask) { ; CHECK-LABEL: test_vlseg2_mask_nxv2bf16_triscv.vector.tuple_nxv4i8_2t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma -; CHECK-NEXT: vlseg2e16.v v7, (a0), v0.t +; CHECK-NEXT: vlseg2e16.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 2) @llvm.riscv.vlseg2.mask.triscv.vector.tuple_nxv4i8_2t.nxv2i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 2) undef, ptr %base, <vscale x 2 x i1> %mask, i64 %vl, i64 1, i64 4) - %1 = call <vscale x 2 x bfloat> @llvm.riscv.tuple.extract.nxv2bf16.triscv.vector.tuple_nxv4i8_2t(target("riscv.vector.tuple", <vscale x 4 x i8>, 2) %0, i32 1) - ret <vscale x 2 x bfloat> %1 + ret target("riscv.vector.tuple", <vscale x 4 x i8>, 2) %0 } - - -define <vscale x 4 x bfloat> @test_vlseg2_nxv4bf16_triscv.vector.tuple_nxv8i8_2t(ptr %base, i64 %vl) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 2) @test_vlseg2_nxv4bf16_triscv.vector.tuple_nxv8i8_2t(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg2_nxv4bf16_triscv.vector.tuple_nxv8i8_2t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma -; CHECK-NEXT: vlseg2e16.v v7, (a0) +; CHECK-NEXT: vlseg2e16.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 2) @llvm.riscv.vlseg2.triscv.vector.tuple_nxv8i8_2t(target("riscv.vector.tuple", <vscale x 8 x i8>, 2) undef, ptr %base, i64 %vl, i64 4) - %1 = call <vscale x 4 x bfloat> @llvm.riscv.tuple.extract.nxv4bf16.triscv.vector.tuple_nxv8i8_2t(target("riscv.vector.tuple", <vscale x 8 x i8>, 2) %0, i32 1) - ret <vscale x 4 x bfloat> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 2) %0 } - -define <vscale x 4 x bfloat> @test_vlseg2_mask_nxv4bf16_triscv.vector.tuple_nxv8i8_2t(ptr %base, i64 %vl, <vscale x 4 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 2) @test_vlseg2_mask_nxv4bf16_triscv.vector.tuple_nxv8i8_2t(ptr %base, i64 %vl, <vscale x 4 x i1> %mask) { ; CHECK-LABEL: test_vlseg2_mask_nxv4bf16_triscv.vector.tuple_nxv8i8_2t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma -; CHECK-NEXT: vlseg2e16.v v7, (a0), v0.t +; CHECK-NEXT: vlseg2e16.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 2) @llvm.riscv.vlseg2.mask.triscv.vector.tuple_nxv8i8_2t.nxv4i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 2) undef, ptr %base, <vscale x 4 x i1> %mask, i64 %vl, i64 1, i64 4) - %1 = call <vscale x 4 x bfloat> @llvm.riscv.tuple.extract.nxv4bf16.triscv.vector.tuple_nxv8i8_2t(target("riscv.vector.tuple", <vscale x 8 x i8>, 2) %0, i32 1) - ret <vscale x 4 x bfloat> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 2) %0 } - - -define <vscale x 8 x bfloat> @test_vlseg2_nxv8bf16_triscv.vector.tuple_nxv16i8_2t(ptr %base, i64 %vl) { +define target("riscv.vector.tuple", <vscale x 16 x i8>, 2) @test_vlseg2_nxv8bf16_triscv.vector.tuple_nxv16i8_2t(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg2_nxv8bf16_triscv.vector.tuple_nxv16i8_2t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, ma -; CHECK-NEXT: vlseg2e16.v v6, (a0) +; CHECK-NEXT: vlseg2e16.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 2) @llvm.riscv.vlseg2.triscv.vector.tuple_nxv16i8_2t(target("riscv.vector.tuple", <vscale x 16 x i8>, 2) undef, ptr %base, i64 %vl, i64 4) - %1 = call <vscale x 8 x bfloat> @llvm.riscv.tuple.extract.nxv8bf16.triscv.vector.tuple_nxv16i8_2t(target("riscv.vector.tuple", <vscale x 16 x i8>, 2) %0, i32 1) - ret <vscale x 8 x bfloat> %1 + ret target("riscv.vector.tuple", <vscale x 16 x i8>, 2) %0 } - -define <vscale x 8 x bfloat> @test_vlseg2_mask_nxv8bf16_triscv.vector.tuple_nxv16i8_2t(ptr %base, i64 %vl, <vscale x 8 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 16 x i8>, 2) @test_vlseg2_mask_nxv8bf16_triscv.vector.tuple_nxv16i8_2t(ptr %base, i64 %vl, <vscale x 8 x i1> %mask) { ; CHECK-LABEL: test_vlseg2_mask_nxv8bf16_triscv.vector.tuple_nxv16i8_2t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, ma -; CHECK-NEXT: vlseg2e16.v v6, (a0), v0.t +; CHECK-NEXT: vlseg2e16.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 2) @llvm.riscv.vlseg2.mask.triscv.vector.tuple_nxv16i8_2t.nxv8i1(target("riscv.vector.tuple", <vscale x 16 x i8>, 2) undef, ptr %base, <vscale x 8 x i1> %mask, i64 %vl, i64 1, i64 4) - %1 = call <vscale x 8 x bfloat> @llvm.riscv.tuple.extract.nxv8bf16.triscv.vector.tuple_nxv16i8_2t(target("riscv.vector.tuple", <vscale x 16 x i8>, 2) %0, i32 1) - ret <vscale x 8 x bfloat> %1 + ret target("riscv.vector.tuple", <vscale x 16 x i8>, 2) %0 } - - -define <vscale x 16 x bfloat> @test_vlseg2_nxv16bf16_triscv.vector.tuple_nxv32i8_2t(ptr %base, i64 %vl) { +define target("riscv.vector.tuple", <vscale x 32 x i8>, 2) @test_vlseg2_nxv16bf16_triscv.vector.tuple_nxv32i8_2t(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg2_nxv16bf16_triscv.vector.tuple_nxv32i8_2t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma -; CHECK-NEXT: vlseg2e16.v v4, (a0) +; CHECK-NEXT: vlseg2e16.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 32 x i8>, 2) @llvm.riscv.vlseg2.triscv.vector.tuple_nxv32i8_2t(target("riscv.vector.tuple", <vscale x 32 x i8>, 2) undef, ptr %base, i64 %vl, i64 4) - %1 = call <vscale x 16 x bfloat> @llvm.riscv.tuple.extract.nxv16bf16.triscv.vector.tuple_nxv32i8_2t(target("riscv.vector.tuple", <vscale x 32 x i8>, 2) %0, i32 1) - ret <vscale x 16 x bfloat> %1 + ret target("riscv.vector.tuple", <vscale x 32 x i8>, 2) %0 } - -define <vscale x 16 x bfloat> @test_vlseg2_mask_nxv16bf16_triscv.vector.tuple_nxv32i8_2t(ptr %base, i64 %vl, <vscale x 16 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 32 x i8>, 2) @test_vlseg2_mask_nxv16bf16_triscv.vector.tuple_nxv32i8_2t(ptr %base, i64 %vl, <vscale x 16 x i1> %mask) { ; CHECK-LABEL: test_vlseg2_mask_nxv16bf16_triscv.vector.tuple_nxv32i8_2t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, m4, ta, ma -; CHECK-NEXT: vlseg2e16.v v4, (a0), v0.t +; CHECK-NEXT: vlseg2e16.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 32 x i8>, 2) @llvm.riscv.vlseg2.mask.triscv.vector.tuple_nxv32i8_2t.nxv16i1(target("riscv.vector.tuple", <vscale x 32 x i8>, 2) undef, ptr %base, <vscale x 16 x i1> %mask, i64 %vl, i64 1, i64 4) - %1 = call <vscale x 16 x bfloat> @llvm.riscv.tuple.extract.nxv16bf16.triscv.vector.tuple_nxv32i8_2t(target("riscv.vector.tuple", <vscale x 32 x i8>, 2) %0, i32 1) - ret <vscale x 16 x bfloat> %1 + ret target("riscv.vector.tuple", <vscale x 32 x i8>, 2) %0 } - - -define <vscale x 1 x bfloat> @test_vlseg3_nxv1bf16_triscv.vector.tuple_nxv2i8_3t(ptr %base, i64 %vl) { +define target("riscv.vector.tuple", <vscale x 2 x i8>, 3) @test_vlseg3_nxv1bf16_triscv.vector.tuple_nxv2i8_3t(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg3_nxv1bf16_triscv.vector.tuple_nxv2i8_3t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma -; CHECK-NEXT: vlseg3e16.v v7, (a0) +; CHECK-NEXT: vlseg3e16.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 2 x i8>, 3) @llvm.riscv.vlseg3.triscv.vector.tuple_nxv2i8_3t(target("riscv.vector.tuple", <vscale x 2 x i8>, 3) undef, ptr %base, i64 %vl, i64 4) - %1 = call <vscale x 1 x bfloat> @llvm.riscv.tuple.extract.nxv1bf16.triscv.vector.tuple_nxv2i8_3t(target("riscv.vector.tuple", <vscale x 2 x i8>, 3) %0, i32 1) - ret <vscale x 1 x bfloat> %1 + ret target("riscv.vector.tuple", <vscale x 2 x i8>, 3) %0 } - -define <vscale x 1 x bfloat> @test_vlseg3_mask_nxv1bf16_triscv.vector.tuple_nxv2i8_3t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 2 x i8>, 3) @test_vlseg3_mask_nxv1bf16_triscv.vector.tuple_nxv2i8_3t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) { ; CHECK-LABEL: test_vlseg3_mask_nxv1bf16_triscv.vector.tuple_nxv2i8_3t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma -; CHECK-NEXT: vlseg3e16.v v7, (a0), v0.t +; CHECK-NEXT: vlseg3e16.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 2 x i8>, 3) @llvm.riscv.vlseg3.mask.triscv.vector.tuple_nxv2i8_3t.nxv1i1(target("riscv.vector.tuple", <vscale x 2 x i8>, 3) undef, ptr %base, <vscale x 1 x i1> %mask, i64 %vl, i64 1, i64 4) - %1 = call <vscale x 1 x bfloat> @llvm.riscv.tuple.extract.nxv1bf16.triscv.vector.tuple_nxv2i8_3t(target("riscv.vector.tuple", <vscale x 2 x i8>, 3) %0, i32 1) - ret <vscale x 1 x bfloat> %1 + ret target("riscv.vector.tuple", <vscale x 2 x i8>, 3) %0 } - - -define <vscale x 2 x bfloat> @test_vlseg3_nxv2bf16_triscv.vector.tuple_nxv4i8_3t(ptr %base, i64 %vl) { +define target("riscv.vector.tuple", <vscale x 4 x i8>, 3) @test_vlseg3_nxv2bf16_triscv.vector.tuple_nxv4i8_3t(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg3_nxv2bf16_triscv.vector.tuple_nxv4i8_3t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma -; CHECK-NEXT: vlseg3e16.v v7, (a0) +; CHECK-NEXT: vlseg3e16.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 3) @llvm.riscv.vlseg3.triscv.vector.tuple_nxv4i8_3t(target("riscv.vector.tuple", <vscale x 4 x i8>, 3) undef, ptr %base, i64 %vl, i64 4) - %1 = call <vscale x 2 x bfloat> @llvm.riscv.tuple.extract.nxv2bf16.triscv.vector.tuple_nxv4i8_3t(target("riscv.vector.tuple", <vscale x 4 x i8>, 3) %0, i32 1) - ret <vscale x 2 x bfloat> %1 + ret target("riscv.vector.tuple", <vscale x 4 x i8>, 3) %0 } - -define <vscale x 2 x bfloat> @test_vlseg3_mask_nxv2bf16_triscv.vector.tuple_nxv4i8_3t(ptr %base, i64 %vl, <vscale x 2 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 4 x i8>, 3) @test_vlseg3_mask_nxv2bf16_triscv.vector.tuple_nxv4i8_3t(ptr %base, i64 %vl, <vscale x 2 x i1> %mask) { ; CHECK-LABEL: test_vlseg3_mask_nxv2bf16_triscv.vector.tuple_nxv4i8_3t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma -; CHECK-NEXT: vlseg3e16.v v7, (a0), v0.t +; CHECK-NEXT: vlseg3e16.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 3) @llvm.riscv.vlseg3.mask.triscv.vector.tuple_nxv4i8_3t.nxv2i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 3) undef, ptr %base, <vscale x 2 x i1> %mask, i64 %vl, i64 1, i64 4) - %1 = call <vscale x 2 x bfloat> @llvm.riscv.tuple.extract.nxv2bf16.triscv.vector.tuple_nxv4i8_3t(target("riscv.vector.tuple", <vscale x 4 x i8>, 3) %0, i32 1) - ret <vscale x 2 x bfloat> %1 + ret target("riscv.vector.tuple", <vscale x 4 x i8>, 3) %0 } - - -define <vscale x 4 x bfloat> @test_vlseg3_nxv4bf16_triscv.vector.tuple_nxv8i8_3t(ptr %base, i64 %vl) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 3) @test_vlseg3_nxv4bf16_triscv.vector.tuple_nxv8i8_3t(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg3_nxv4bf16_triscv.vector.tuple_nxv8i8_3t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma -; CHECK-NEXT: vlseg3e16.v v7, (a0) +; CHECK-NEXT: vlseg3e16.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 3) @llvm.riscv.vlseg3.triscv.vector.tuple_nxv8i8_3t(target("riscv.vector.tuple", <vscale x 8 x i8>, 3) undef, ptr %base, i64 %vl, i64 4) - %1 = call <vscale x 4 x bfloat> @llvm.riscv.tuple.extract.nxv4bf16.triscv.vector.tuple_nxv8i8_3t(target("riscv.vector.tuple", <vscale x 8 x i8>, 3) %0, i32 1) - ret <vscale x 4 x bfloat> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 3) %0 } - -define <vscale x 4 x bfloat> @test_vlseg3_mask_nxv4bf16_triscv.vector.tuple_nxv8i8_3t(ptr %base, i64 %vl, <vscale x 4 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 3) @test_vlseg3_mask_nxv4bf16_triscv.vector.tuple_nxv8i8_3t(ptr %base, i64 %vl, <vscale x 4 x i1> %mask) { ; CHECK-LABEL: test_vlseg3_mask_nxv4bf16_triscv.vector.tuple_nxv8i8_3t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma -; CHECK-NEXT: vlseg3e16.v v7, (a0), v0.t +; CHECK-NEXT: vlseg3e16.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 3) @llvm.riscv.vlseg3.mask.triscv.vector.tuple_nxv8i8_3t.nxv4i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 3) undef, ptr %base, <vscale x 4 x i1> %mask, i64 %vl, i64 1, i64 4) - %1 = call <vscale x 4 x bfloat> @llvm.riscv.tuple.extract.nxv4bf16.triscv.vector.tuple_nxv8i8_3t(target("riscv.vector.tuple", <vscale x 8 x i8>, 3) %0, i32 1) - ret <vscale x 4 x bfloat> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 3) %0 } - - -define <vscale x 8 x bfloat> @test_vlseg3_nxv8bf16_triscv.vector.tuple_nxv16i8_3t(ptr %base, i64 %vl) { +define target("riscv.vector.tuple", <vscale x 16 x i8>, 3) @test_vlseg3_nxv8bf16_triscv.vector.tuple_nxv16i8_3t(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg3_nxv8bf16_triscv.vector.tuple_nxv16i8_3t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, ma -; CHECK-NEXT: vlseg3e16.v v6, (a0) +; CHECK-NEXT: vlseg3e16.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 3) @llvm.riscv.vlseg3.triscv.vector.tuple_nxv16i8_3t(target("riscv.vector.tuple", <vscale x 16 x i8>, 3) undef, ptr %base, i64 %vl, i64 4) - %1 = call <vscale x 8 x bfloat> @llvm.riscv.tuple.extract.nxv8bf16.triscv.vector.tuple_nxv16i8_3t(target("riscv.vector.tuple", <vscale x 16 x i8>, 3) %0, i32 1) - ret <vscale x 8 x bfloat> %1 + ret target("riscv.vector.tuple", <vscale x 16 x i8>, 3) %0 } - -define <vscale x 8 x bfloat> @test_vlseg3_mask_nxv8bf16_triscv.vector.tuple_nxv16i8_3t(ptr %base, i64 %vl, <vscale x 8 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 16 x i8>, 3) @test_vlseg3_mask_nxv8bf16_triscv.vector.tuple_nxv16i8_3t(ptr %base, i64 %vl, <vscale x 8 x i1> %mask) { ; CHECK-LABEL: test_vlseg3_mask_nxv8bf16_triscv.vector.tuple_nxv16i8_3t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, ma -; CHECK-NEXT: vlseg3e16.v v6, (a0), v0.t +; CHECK-NEXT: vlseg3e16.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 3) @llvm.riscv.vlseg3.mask.triscv.vector.tuple_nxv16i8_3t.nxv8i1(target("riscv.vector.tuple", <vscale x 16 x i8>, 3) undef, ptr %base, <vscale x 8 x i1> %mask, i64 %vl, i64 1, i64 4) - %1 = call <vscale x 8 x bfloat> @llvm.riscv.tuple.extract.nxv8bf16.triscv.vector.tuple_nxv16i8_3t(target("riscv.vector.tuple", <vscale x 16 x i8>, 3) %0, i32 1) - ret <vscale x 8 x bfloat> %1 + ret target("riscv.vector.tuple", <vscale x 16 x i8>, 3) %0 } - - -define <vscale x 1 x bfloat> @test_vlseg4_nxv1bf16_triscv.vector.tuple_nxv2i8_4t(ptr %base, i64 %vl) { +define target("riscv.vector.tuple", <vscale x 2 x i8>, 4) @test_vlseg4_nxv1bf16_triscv.vector.tuple_nxv2i8_4t(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg4_nxv1bf16_triscv.vector.tuple_nxv2i8_4t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma -; CHECK-NEXT: vlseg4e16.v v7, (a0) +; CHECK-NEXT: vlseg4e16.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 2 x i8>, 4) @llvm.riscv.vlseg4.triscv.vector.tuple_nxv2i8_4t(target("riscv.vector.tuple", <vscale x 2 x i8>, 4) undef, ptr %base, i64 %vl, i64 4) - %1 = call <vscale x 1 x bfloat> @llvm.riscv.tuple.extract.nxv1bf16.triscv.vector.tuple_nxv2i8_4t(target("riscv.vector.tuple", <vscale x 2 x i8>, 4) %0, i32 1) - ret <vscale x 1 x bfloat> %1 + ret target("riscv.vector.tuple", <vscale x 2 x i8>, 4) %0 } - -define <vscale x 1 x bfloat> @test_vlseg4_mask_nxv1bf16_triscv.vector.tuple_nxv2i8_4t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 2 x i8>, 4) @test_vlseg4_mask_nxv1bf16_triscv.vector.tuple_nxv2i8_4t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) { ; CHECK-LABEL: test_vlseg4_mask_nxv1bf16_triscv.vector.tuple_nxv2i8_4t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma -; CHECK-NEXT: vlseg4e16.v v7, (a0), v0.t +; CHECK-NEXT: vlseg4e16.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 2 x i8>, 4) @llvm.riscv.vlseg4.mask.triscv.vector.tuple_nxv2i8_4t.nxv1i1(target("riscv.vector.tuple", <vscale x 2 x i8>, 4) undef, ptr %base, <vscale x 1 x i1> %mask, i64 %vl, i64 1, i64 4) - %1 = call <vscale x 1 x bfloat> @llvm.riscv.tuple.extract.nxv1bf16.triscv.vector.tuple_nxv2i8_4t(target("riscv.vector.tuple", <vscale x 2 x i8>, 4) %0, i32 1) - ret <vscale x 1 x bfloat> %1 + ret target("riscv.vector.tuple", <vscale x 2 x i8>, 4) %0 } - - -define <vscale x 2 x bfloat> @test_vlseg4_nxv2bf16_triscv.vector.tuple_nxv4i8_4t(ptr %base, i64 %vl) { +define target("riscv.vector.tuple", <vscale x 4 x i8>, 4) @test_vlseg4_nxv2bf16_triscv.vector.tuple_nxv4i8_4t(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg4_nxv2bf16_triscv.vector.tuple_nxv4i8_4t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma -; CHECK-NEXT: vlseg4e16.v v7, (a0) +; CHECK-NEXT: vlseg4e16.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 4) @llvm.riscv.vlseg4.triscv.vector.tuple_nxv4i8_4t(target("riscv.vector.tuple", <vscale x 4 x i8>, 4) undef, ptr %base, i64 %vl, i64 4) - %1 = call <vscale x 2 x bfloat> @llvm.riscv.tuple.extract.nxv2bf16.triscv.vector.tuple_nxv4i8_4t(target("riscv.vector.tuple", <vscale x 4 x i8>, 4) %0, i32 1) - ret <vscale x 2 x bfloat> %1 + ret target("riscv.vector.tuple", <vscale x 4 x i8>, 4) %0 } - -define <vscale x 2 x bfloat> @test_vlseg4_mask_nxv2bf16_triscv.vector.tuple_nxv4i8_4t(ptr %base, i64 %vl, <vscale x 2 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 4 x i8>, 4) @test_vlseg4_mask_nxv2bf16_triscv.vector.tuple_nxv4i8_4t(ptr %base, i64 %vl, <vscale x 2 x i1> %mask) { ; CHECK-LABEL: test_vlseg4_mask_nxv2bf16_triscv.vector.tuple_nxv4i8_4t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma -; CHECK-NEXT: vlseg4e16.v v7, (a0), v0.t +; CHECK-NEXT: vlseg4e16.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 4) @llvm.riscv.vlseg4.mask.triscv.vector.tuple_nxv4i8_4t.nxv2i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 4) undef, ptr %base, <vscale x 2 x i1> %mask, i64 %vl, i64 1, i64 4) - %1 = call <vscale x 2 x bfloat> @llvm.riscv.tuple.extract.nxv2bf16.triscv.vector.tuple_nxv4i8_4t(target("riscv.vector.tuple", <vscale x 4 x i8>, 4) %0, i32 1) - ret <vscale x 2 x bfloat> %1 + ret target("riscv.vector.tuple", <vscale x 4 x i8>, 4) %0 } - - -define <vscale x 4 x bfloat> @test_vlseg4_nxv4bf16_triscv.vector.tuple_nxv8i8_4t(ptr %base, i64 %vl) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 4) @test_vlseg4_nxv4bf16_triscv.vector.tuple_nxv8i8_4t(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg4_nxv4bf16_triscv.vector.tuple_nxv8i8_4t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma -; CHECK-NEXT: vlseg4e16.v v7, (a0) +; CHECK-NEXT: vlseg4e16.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 4) @llvm.riscv.vlseg4.triscv.vector.tuple_nxv8i8_4t(target("riscv.vector.tuple", <vscale x 8 x i8>, 4) undef, ptr %base, i64 %vl, i64 4) - %1 = call <vscale x 4 x bfloat> @llvm.riscv.tuple.extract.nxv4bf16.triscv.vector.tuple_nxv8i8_4t(target("riscv.vector.tuple", <vscale x 8 x i8>, 4) %0, i32 1) - ret <vscale x 4 x bfloat> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 4) %0 } - -define <vscale x 4 x bfloat> @test_vlseg4_mask_nxv4bf16_triscv.vector.tuple_nxv8i8_4t(ptr %base, i64 %vl, <vscale x 4 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 4) @test_vlseg4_mask_nxv4bf16_triscv.vector.tuple_nxv8i8_4t(ptr %base, i64 %vl, <vscale x 4 x i1> %mask) { ; CHECK-LABEL: test_vlseg4_mask_nxv4bf16_triscv.vector.tuple_nxv8i8_4t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma -; CHECK-NEXT: vlseg4e16.v v7, (a0), v0.t +; CHECK-NEXT: vlseg4e16.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 4) @llvm.riscv.vlseg4.mask.triscv.vector.tuple_nxv8i8_4t.nxv4i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 4) undef, ptr %base, <vscale x 4 x i1> %mask, i64 %vl, i64 1, i64 4) - %1 = call <vscale x 4 x bfloat> @llvm.riscv.tuple.extract.nxv4bf16.triscv.vector.tuple_nxv8i8_4t(target("riscv.vector.tuple", <vscale x 8 x i8>, 4) %0, i32 1) - ret <vscale x 4 x bfloat> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 4) %0 } - - -define <vscale x 8 x bfloat> @test_vlseg4_nxv8bf16_triscv.vector.tuple_nxv16i8_4t(ptr %base, i64 %vl) { +define target("riscv.vector.tuple", <vscale x 16 x i8>, 4) @test_vlseg4_nxv8bf16_triscv.vector.tuple_nxv16i8_4t(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg4_nxv8bf16_triscv.vector.tuple_nxv16i8_4t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, ma -; CHECK-NEXT: vlseg4e16.v v6, (a0) +; CHECK-NEXT: vlseg4e16.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 4) @llvm.riscv.vlseg4.triscv.vector.tuple_nxv16i8_4t(target("riscv.vector.tuple", <vscale x 16 x i8>, 4) undef, ptr %base, i64 %vl, i64 4) - %1 = call <vscale x 8 x bfloat> @llvm.riscv.tuple.extract.nxv8bf16.triscv.vector.tuple_nxv16i8_4t(target("riscv.vector.tuple", <vscale x 16 x i8>, 4) %0, i32 1) - ret <vscale x 8 x bfloat> %1 + ret target("riscv.vector.tuple", <vscale x 16 x i8>, 4) %0 } - -define <vscale x 8 x bfloat> @test_vlseg4_mask_nxv8bf16_triscv.vector.tuple_nxv16i8_4t(ptr %base, i64 %vl, <vscale x 8 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 16 x i8>, 4) @test_vlseg4_mask_nxv8bf16_triscv.vector.tuple_nxv16i8_4t(ptr %base, i64 %vl, <vscale x 8 x i1> %mask) { ; CHECK-LABEL: test_vlseg4_mask_nxv8bf16_triscv.vector.tuple_nxv16i8_4t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, m2, ta, ma -; CHECK-NEXT: vlseg4e16.v v6, (a0), v0.t +; CHECK-NEXT: vlseg4e16.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 16 x i8>, 4) @llvm.riscv.vlseg4.mask.triscv.vector.tuple_nxv16i8_4t.nxv8i1(target("riscv.vector.tuple", <vscale x 16 x i8>, 4) undef, ptr %base, <vscale x 8 x i1> %mask, i64 %vl, i64 1, i64 4) - %1 = call <vscale x 8 x bfloat> @llvm.riscv.tuple.extract.nxv8bf16.triscv.vector.tuple_nxv16i8_4t(target("riscv.vector.tuple", <vscale x 16 x i8>, 4) %0, i32 1) - ret <vscale x 8 x bfloat> %1 + ret target("riscv.vector.tuple", <vscale x 16 x i8>, 4) %0 } - - -define <vscale x 1 x bfloat> @test_vlseg5_nxv1bf16_triscv.vector.tuple_nxv2i8_5t(ptr %base, i64 %vl) { +define target("riscv.vector.tuple", <vscale x 2 x i8>, 5) @test_vlseg5_nxv1bf16_triscv.vector.tuple_nxv2i8_5t(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg5_nxv1bf16_triscv.vector.tuple_nxv2i8_5t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma -; CHECK-NEXT: vlseg5e16.v v7, (a0) +; CHECK-NEXT: vlseg5e16.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 2 x i8>, 5) @llvm.riscv.vlseg5.triscv.vector.tuple_nxv2i8_5t(target("riscv.vector.tuple", <vscale x 2 x i8>, 5) undef, ptr %base, i64 %vl, i64 4) - %1 = call <vscale x 1 x bfloat> @llvm.riscv.tuple.extract.nxv1bf16.triscv.vector.tuple_nxv2i8_5t(target("riscv.vector.tuple", <vscale x 2 x i8>, 5) %0, i32 1) - ret <vscale x 1 x bfloat> %1 + ret target("riscv.vector.tuple", <vscale x 2 x i8>, 5) %0 } - -define <vscale x 1 x bfloat> @test_vlseg5_mask_nxv1bf16_triscv.vector.tuple_nxv2i8_5t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 2 x i8>, 5) @test_vlseg5_mask_nxv1bf16_triscv.vector.tuple_nxv2i8_5t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) { ; CHECK-LABEL: test_vlseg5_mask_nxv1bf16_triscv.vector.tuple_nxv2i8_5t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma -; CHECK-NEXT: vlseg5e16.v v7, (a0), v0.t +; CHECK-NEXT: vlseg5e16.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 2 x i8>, 5) @llvm.riscv.vlseg5.mask.triscv.vector.tuple_nxv2i8_5t.nxv1i1(target("riscv.vector.tuple", <vscale x 2 x i8>, 5) undef, ptr %base, <vscale x 1 x i1> %mask, i64 %vl, i64 1, i64 4) - %1 = call <vscale x 1 x bfloat> @llvm.riscv.tuple.extract.nxv1bf16.triscv.vector.tuple_nxv2i8_5t(target("riscv.vector.tuple", <vscale x 2 x i8>, 5) %0, i32 1) - ret <vscale x 1 x bfloat> %1 + ret target("riscv.vector.tuple", <vscale x 2 x i8>, 5) %0 } - - -define <vscale x 2 x bfloat> @test_vlseg5_nxv2bf16_triscv.vector.tuple_nxv4i8_5t(ptr %base, i64 %vl) { +define target("riscv.vector.tuple", <vscale x 4 x i8>, 5) @test_vlseg5_nxv2bf16_triscv.vector.tuple_nxv4i8_5t(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg5_nxv2bf16_triscv.vector.tuple_nxv4i8_5t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma -; CHECK-NEXT: vlseg5e16.v v7, (a0) +; CHECK-NEXT: vlseg5e16.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 5) @llvm.riscv.vlseg5.triscv.vector.tuple_nxv4i8_5t(target("riscv.vector.tuple", <vscale x 4 x i8>, 5) undef, ptr %base, i64 %vl, i64 4) - %1 = call <vscale x 2 x bfloat> @llvm.riscv.tuple.extract.nxv2bf16.triscv.vector.tuple_nxv4i8_5t(target("riscv.vector.tuple", <vscale x 4 x i8>, 5) %0, i32 1) - ret <vscale x 2 x bfloat> %1 + ret target("riscv.vector.tuple", <vscale x 4 x i8>, 5) %0 } - -define <vscale x 2 x bfloat> @test_vlseg5_mask_nxv2bf16_triscv.vector.tuple_nxv4i8_5t(ptr %base, i64 %vl, <vscale x 2 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 4 x i8>, 5) @test_vlseg5_mask_nxv2bf16_triscv.vector.tuple_nxv4i8_5t(ptr %base, i64 %vl, <vscale x 2 x i1> %mask) { ; CHECK-LABEL: test_vlseg5_mask_nxv2bf16_triscv.vector.tuple_nxv4i8_5t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma -; CHECK-NEXT: vlseg5e16.v v7, (a0), v0.t +; CHECK-NEXT: vlseg5e16.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 5) @llvm.riscv.vlseg5.mask.triscv.vector.tuple_nxv4i8_5t.nxv2i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 5) undef, ptr %base, <vscale x 2 x i1> %mask, i64 %vl, i64 1, i64 4) - %1 = call <vscale x 2 x bfloat> @llvm.riscv.tuple.extract.nxv2bf16.triscv.vector.tuple_nxv4i8_5t(target("riscv.vector.tuple", <vscale x 4 x i8>, 5) %0, i32 1) - ret <vscale x 2 x bfloat> %1 + ret target("riscv.vector.tuple", <vscale x 4 x i8>, 5) %0 } - - -define <vscale x 4 x bfloat> @test_vlseg5_nxv4bf16_triscv.vector.tuple_nxv8i8_5t(ptr %base, i64 %vl) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 5) @test_vlseg5_nxv4bf16_triscv.vector.tuple_nxv8i8_5t(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg5_nxv4bf16_triscv.vector.tuple_nxv8i8_5t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma -; CHECK-NEXT: vlseg5e16.v v7, (a0) +; CHECK-NEXT: vlseg5e16.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 5) @llvm.riscv.vlseg5.triscv.vector.tuple_nxv8i8_5t(target("riscv.vector.tuple", <vscale x 8 x i8>, 5) undef, ptr %base, i64 %vl, i64 4) - %1 = call <vscale x 4 x bfloat> @llvm.riscv.tuple.extract.nxv4bf16.triscv.vector.tuple_nxv8i8_5t(target("riscv.vector.tuple", <vscale x 8 x i8>, 5) %0, i32 1) - ret <vscale x 4 x bfloat> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 5) %0 } - -define <vscale x 4 x bfloat> @test_vlseg5_mask_nxv4bf16_triscv.vector.tuple_nxv8i8_5t(ptr %base, i64 %vl, <vscale x 4 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 5) @test_vlseg5_mask_nxv4bf16_triscv.vector.tuple_nxv8i8_5t(ptr %base, i64 %vl, <vscale x 4 x i1> %mask) { ; CHECK-LABEL: test_vlseg5_mask_nxv4bf16_triscv.vector.tuple_nxv8i8_5t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma -; CHECK-NEXT: vlseg5e16.v v7, (a0), v0.t +; CHECK-NEXT: vlseg5e16.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 5) @llvm.riscv.vlseg5.mask.triscv.vector.tuple_nxv8i8_5t.nxv4i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 5) undef, ptr %base, <vscale x 4 x i1> %mask, i64 %vl, i64 1, i64 4) - %1 = call <vscale x 4 x bfloat> @llvm.riscv.tuple.extract.nxv4bf16.triscv.vector.tuple_nxv8i8_5t(target("riscv.vector.tuple", <vscale x 8 x i8>, 5) %0, i32 1) - ret <vscale x 4 x bfloat> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 5) %0 } - - -define <vscale x 1 x bfloat> @test_vlseg6_nxv1bf16_triscv.vector.tuple_nxv2i8_6t(ptr %base, i64 %vl) { +define target("riscv.vector.tuple", <vscale x 2 x i8>, 6) @test_vlseg6_nxv1bf16_triscv.vector.tuple_nxv2i8_6t(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg6_nxv1bf16_triscv.vector.tuple_nxv2i8_6t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma -; CHECK-NEXT: vlseg6e16.v v7, (a0) +; CHECK-NEXT: vlseg6e16.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 2 x i8>, 6) @llvm.riscv.vlseg6.triscv.vector.tuple_nxv2i8_6t(target("riscv.vector.tuple", <vscale x 2 x i8>, 6) undef, ptr %base, i64 %vl, i64 4) - %1 = call <vscale x 1 x bfloat> @llvm.riscv.tuple.extract.nxv1bf16.triscv.vector.tuple_nxv2i8_6t(target("riscv.vector.tuple", <vscale x 2 x i8>, 6) %0, i32 1) - ret <vscale x 1 x bfloat> %1 + ret target("riscv.vector.tuple", <vscale x 2 x i8>, 6) %0 } - -define <vscale x 1 x bfloat> @test_vlseg6_mask_nxv1bf16_triscv.vector.tuple_nxv2i8_6t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 2 x i8>, 6) @test_vlseg6_mask_nxv1bf16_triscv.vector.tuple_nxv2i8_6t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) { ; CHECK-LABEL: test_vlseg6_mask_nxv1bf16_triscv.vector.tuple_nxv2i8_6t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma -; CHECK-NEXT: vlseg6e16.v v7, (a0), v0.t +; CHECK-NEXT: vlseg6e16.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 2 x i8>, 6) @llvm.riscv.vlseg6.mask.triscv.vector.tuple_nxv2i8_6t.nxv1i1(target("riscv.vector.tuple", <vscale x 2 x i8>, 6) undef, ptr %base, <vscale x 1 x i1> %mask, i64 %vl, i64 1, i64 4) - %1 = call <vscale x 1 x bfloat> @llvm.riscv.tuple.extract.nxv1bf16.triscv.vector.tuple_nxv2i8_6t(target("riscv.vector.tuple", <vscale x 2 x i8>, 6) %0, i32 1) - ret <vscale x 1 x bfloat> %1 + ret target("riscv.vector.tuple", <vscale x 2 x i8>, 6) %0 } - - -define <vscale x 2 x bfloat> @test_vlseg6_nxv2bf16_triscv.vector.tuple_nxv4i8_6t(ptr %base, i64 %vl) { +define target("riscv.vector.tuple", <vscale x 4 x i8>, 6) @test_vlseg6_nxv2bf16_triscv.vector.tuple_nxv4i8_6t(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg6_nxv2bf16_triscv.vector.tuple_nxv4i8_6t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma -; CHECK-NEXT: vlseg6e16.v v7, (a0) +; CHECK-NEXT: vlseg6e16.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 6) @llvm.riscv.vlseg6.triscv.vector.tuple_nxv4i8_6t(target("riscv.vector.tuple", <vscale x 4 x i8>, 6) undef, ptr %base, i64 %vl, i64 4) - %1 = call <vscale x 2 x bfloat> @llvm.riscv.tuple.extract.nxv2bf16.triscv.vector.tuple_nxv4i8_6t(target("riscv.vector.tuple", <vscale x 4 x i8>, 6) %0, i32 1) - ret <vscale x 2 x bfloat> %1 + ret target("riscv.vector.tuple", <vscale x 4 x i8>, 6) %0 } - -define <vscale x 2 x bfloat> @test_vlseg6_mask_nxv2bf16_triscv.vector.tuple_nxv4i8_6t(ptr %base, i64 %vl, <vscale x 2 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 4 x i8>, 6) @test_vlseg6_mask_nxv2bf16_triscv.vector.tuple_nxv4i8_6t(ptr %base, i64 %vl, <vscale x 2 x i1> %mask) { ; CHECK-LABEL: test_vlseg6_mask_nxv2bf16_triscv.vector.tuple_nxv4i8_6t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma -; CHECK-NEXT: vlseg6e16.v v7, (a0), v0.t +; CHECK-NEXT: vlseg6e16.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 6) @llvm.riscv.vlseg6.mask.triscv.vector.tuple_nxv4i8_6t.nxv2i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 6) undef, ptr %base, <vscale x 2 x i1> %mask, i64 %vl, i64 1, i64 4) - %1 = call <vscale x 2 x bfloat> @llvm.riscv.tuple.extract.nxv2bf16.triscv.vector.tuple_nxv4i8_6t(target("riscv.vector.tuple", <vscale x 4 x i8>, 6) %0, i32 1) - ret <vscale x 2 x bfloat> %1 + ret target("riscv.vector.tuple", <vscale x 4 x i8>, 6) %0 } - - -define <vscale x 4 x bfloat> @test_vlseg6_nxv4bf16_triscv.vector.tuple_nxv8i8_6t(ptr %base, i64 %vl) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 6) @test_vlseg6_nxv4bf16_triscv.vector.tuple_nxv8i8_6t(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg6_nxv4bf16_triscv.vector.tuple_nxv8i8_6t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma -; CHECK-NEXT: vlseg6e16.v v7, (a0) +; CHECK-NEXT: vlseg6e16.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 6) @llvm.riscv.vlseg6.triscv.vector.tuple_nxv8i8_6t(target("riscv.vector.tuple", <vscale x 8 x i8>, 6) undef, ptr %base, i64 %vl, i64 4) - %1 = call <vscale x 4 x bfloat> @llvm.riscv.tuple.extract.nxv4bf16.triscv.vector.tuple_nxv8i8_6t(target("riscv.vector.tuple", <vscale x 8 x i8>, 6) %0, i32 1) - ret <vscale x 4 x bfloat> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 6) %0 } - -define <vscale x 4 x bfloat> @test_vlseg6_mask_nxv4bf16_triscv.vector.tuple_nxv8i8_6t(ptr %base, i64 %vl, <vscale x 4 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 6) @test_vlseg6_mask_nxv4bf16_triscv.vector.tuple_nxv8i8_6t(ptr %base, i64 %vl, <vscale x 4 x i1> %mask) { ; CHECK-LABEL: test_vlseg6_mask_nxv4bf16_triscv.vector.tuple_nxv8i8_6t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma -; CHECK-NEXT: vlseg6e16.v v7, (a0), v0.t +; CHECK-NEXT: vlseg6e16.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 6) @llvm.riscv.vlseg6.mask.triscv.vector.tuple_nxv8i8_6t.nxv4i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 6) undef, ptr %base, <vscale x 4 x i1> %mask, i64 %vl, i64 1, i64 4) - %1 = call <vscale x 4 x bfloat> @llvm.riscv.tuple.extract.nxv4bf16.triscv.vector.tuple_nxv8i8_6t(target("riscv.vector.tuple", <vscale x 8 x i8>, 6) %0, i32 1) - ret <vscale x 4 x bfloat> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 6) %0 } - - -define <vscale x 1 x bfloat> @test_vlseg7_nxv1bf16_triscv.vector.tuple_nxv2i8_7t(ptr %base, i64 %vl) { +define target("riscv.vector.tuple", <vscale x 2 x i8>, 7) @test_vlseg7_nxv1bf16_triscv.vector.tuple_nxv2i8_7t(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg7_nxv1bf16_triscv.vector.tuple_nxv2i8_7t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma -; CHECK-NEXT: vlseg7e16.v v7, (a0) +; CHECK-NEXT: vlseg7e16.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 2 x i8>, 7) @llvm.riscv.vlseg7.triscv.vector.tuple_nxv2i8_7t(target("riscv.vector.tuple", <vscale x 2 x i8>, 7) undef, ptr %base, i64 %vl, i64 4) - %1 = call <vscale x 1 x bfloat> @llvm.riscv.tuple.extract.nxv1bf16.triscv.vector.tuple_nxv2i8_7t(target("riscv.vector.tuple", <vscale x 2 x i8>, 7) %0, i32 1) - ret <vscale x 1 x bfloat> %1 + ret target("riscv.vector.tuple", <vscale x 2 x i8>, 7) %0 } - -define <vscale x 1 x bfloat> @test_vlseg7_mask_nxv1bf16_triscv.vector.tuple_nxv2i8_7t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 2 x i8>, 7) @test_vlseg7_mask_nxv1bf16_triscv.vector.tuple_nxv2i8_7t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) { ; CHECK-LABEL: test_vlseg7_mask_nxv1bf16_triscv.vector.tuple_nxv2i8_7t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma -; CHECK-NEXT: vlseg7e16.v v7, (a0), v0.t +; CHECK-NEXT: vlseg7e16.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 2 x i8>, 7) @llvm.riscv.vlseg7.mask.triscv.vector.tuple_nxv2i8_7t.nxv1i1(target("riscv.vector.tuple", <vscale x 2 x i8>, 7) undef, ptr %base, <vscale x 1 x i1> %mask, i64 %vl, i64 1, i64 4) - %1 = call <vscale x 1 x bfloat> @llvm.riscv.tuple.extract.nxv1bf16.triscv.vector.tuple_nxv2i8_7t(target("riscv.vector.tuple", <vscale x 2 x i8>, 7) %0, i32 1) - ret <vscale x 1 x bfloat> %1 + ret target("riscv.vector.tuple", <vscale x 2 x i8>, 7) %0 } - - -define <vscale x 2 x bfloat> @test_vlseg7_nxv2bf16_triscv.vector.tuple_nxv4i8_7t(ptr %base, i64 %vl) { +define target("riscv.vector.tuple", <vscale x 4 x i8>, 7) @test_vlseg7_nxv2bf16_triscv.vector.tuple_nxv4i8_7t(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg7_nxv2bf16_triscv.vector.tuple_nxv4i8_7t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma -; CHECK-NEXT: vlseg7e16.v v7, (a0) +; CHECK-NEXT: vlseg7e16.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 7) @llvm.riscv.vlseg7.triscv.vector.tuple_nxv4i8_7t(target("riscv.vector.tuple", <vscale x 4 x i8>, 7) undef, ptr %base, i64 %vl, i64 4) - %1 = call <vscale x 2 x bfloat> @llvm.riscv.tuple.extract.nxv2bf16.triscv.vector.tuple_nxv4i8_7t(target("riscv.vector.tuple", <vscale x 4 x i8>, 7) %0, i32 1) - ret <vscale x 2 x bfloat> %1 + ret target("riscv.vector.tuple", <vscale x 4 x i8>, 7) %0 } - -define <vscale x 2 x bfloat> @test_vlseg7_mask_nxv2bf16_triscv.vector.tuple_nxv4i8_7t(ptr %base, i64 %vl, <vscale x 2 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 4 x i8>, 7) @test_vlseg7_mask_nxv2bf16_triscv.vector.tuple_nxv4i8_7t(ptr %base, i64 %vl, <vscale x 2 x i1> %mask) { ; CHECK-LABEL: test_vlseg7_mask_nxv2bf16_triscv.vector.tuple_nxv4i8_7t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma -; CHECK-NEXT: vlseg7e16.v v7, (a0), v0.t +; CHECK-NEXT: vlseg7e16.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 7) @llvm.riscv.vlseg7.mask.triscv.vector.tuple_nxv4i8_7t.nxv2i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 7) undef, ptr %base, <vscale x 2 x i1> %mask, i64 %vl, i64 1, i64 4) - %1 = call <vscale x 2 x bfloat> @llvm.riscv.tuple.extract.nxv2bf16.triscv.vector.tuple_nxv4i8_7t(target("riscv.vector.tuple", <vscale x 4 x i8>, 7) %0, i32 1) - ret <vscale x 2 x bfloat> %1 + ret target("riscv.vector.tuple", <vscale x 4 x i8>, 7) %0 } - - -define <vscale x 4 x bfloat> @test_vlseg7_nxv4bf16_triscv.vector.tuple_nxv8i8_7t(ptr %base, i64 %vl) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 7) @test_vlseg7_nxv4bf16_triscv.vector.tuple_nxv8i8_7t(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg7_nxv4bf16_triscv.vector.tuple_nxv8i8_7t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma -; CHECK-NEXT: vlseg7e16.v v7, (a0) +; CHECK-NEXT: vlseg7e16.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 7) @llvm.riscv.vlseg7.triscv.vector.tuple_nxv8i8_7t(target("riscv.vector.tuple", <vscale x 8 x i8>, 7) undef, ptr %base, i64 %vl, i64 4) - %1 = call <vscale x 4 x bfloat> @llvm.riscv.tuple.extract.nxv4bf16.triscv.vector.tuple_nxv8i8_7t(target("riscv.vector.tuple", <vscale x 8 x i8>, 7) %0, i32 1) - ret <vscale x 4 x bfloat> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 7) %0 } - -define <vscale x 4 x bfloat> @test_vlseg7_mask_nxv4bf16_triscv.vector.tuple_nxv8i8_7t(ptr %base, i64 %vl, <vscale x 4 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 7) @test_vlseg7_mask_nxv4bf16_triscv.vector.tuple_nxv8i8_7t(ptr %base, i64 %vl, <vscale x 4 x i1> %mask) { ; CHECK-LABEL: test_vlseg7_mask_nxv4bf16_triscv.vector.tuple_nxv8i8_7t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma -; CHECK-NEXT: vlseg7e16.v v7, (a0), v0.t +; CHECK-NEXT: vlseg7e16.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 7) @llvm.riscv.vlseg7.mask.triscv.vector.tuple_nxv8i8_7t.nxv4i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 7) undef, ptr %base, <vscale x 4 x i1> %mask, i64 %vl, i64 1, i64 4) - %1 = call <vscale x 4 x bfloat> @llvm.riscv.tuple.extract.nxv4bf16.triscv.vector.tuple_nxv8i8_7t(target("riscv.vector.tuple", <vscale x 8 x i8>, 7) %0, i32 1) - ret <vscale x 4 x bfloat> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 7) %0 } - - -define <vscale x 1 x bfloat> @test_vlseg8_nxv1bf16_triscv.vector.tuple_nxv2i8_8t(ptr %base, i64 %vl) { +define target("riscv.vector.tuple", <vscale x 2 x i8>, 8) @test_vlseg8_nxv1bf16_triscv.vector.tuple_nxv2i8_8t(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg8_nxv1bf16_triscv.vector.tuple_nxv2i8_8t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma -; CHECK-NEXT: vlseg8e16.v v7, (a0) +; CHECK-NEXT: vlseg8e16.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 2 x i8>, 8) @llvm.riscv.vlseg8.triscv.vector.tuple_nxv2i8_8t(target("riscv.vector.tuple", <vscale x 2 x i8>, 8) undef, ptr %base, i64 %vl, i64 4) - %1 = call <vscale x 1 x bfloat> @llvm.riscv.tuple.extract.nxv1bf16.triscv.vector.tuple_nxv2i8_8t(target("riscv.vector.tuple", <vscale x 2 x i8>, 8) %0, i32 1) - ret <vscale x 1 x bfloat> %1 + ret target("riscv.vector.tuple", <vscale x 2 x i8>, 8) %0 } - -define <vscale x 1 x bfloat> @test_vlseg8_mask_nxv1bf16_triscv.vector.tuple_nxv2i8_8t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 2 x i8>, 8) @test_vlseg8_mask_nxv1bf16_triscv.vector.tuple_nxv2i8_8t(ptr %base, i64 %vl, <vscale x 1 x i1> %mask) { ; CHECK-LABEL: test_vlseg8_mask_nxv1bf16_triscv.vector.tuple_nxv2i8_8t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, mf4, ta, ma -; CHECK-NEXT: vlseg8e16.v v7, (a0), v0.t +; CHECK-NEXT: vlseg8e16.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 2 x i8>, 8) @llvm.riscv.vlseg8.mask.triscv.vector.tuple_nxv2i8_8t.nxv1i1(target("riscv.vector.tuple", <vscale x 2 x i8>, 8) undef, ptr %base, <vscale x 1 x i1> %mask, i64 %vl, i64 1, i64 4) - %1 = call <vscale x 1 x bfloat> @llvm.riscv.tuple.extract.nxv1bf16.triscv.vector.tuple_nxv2i8_8t(target("riscv.vector.tuple", <vscale x 2 x i8>, 8) %0, i32 1) - ret <vscale x 1 x bfloat> %1 + ret target("riscv.vector.tuple", <vscale x 2 x i8>, 8) %0 } - - -define <vscale x 2 x bfloat> @test_vlseg8_nxv2bf16_triscv.vector.tuple_nxv4i8_8t(ptr %base, i64 %vl) { +define target("riscv.vector.tuple", <vscale x 4 x i8>, 8) @test_vlseg8_nxv2bf16_triscv.vector.tuple_nxv4i8_8t(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg8_nxv2bf16_triscv.vector.tuple_nxv4i8_8t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma -; CHECK-NEXT: vlseg8e16.v v7, (a0) +; CHECK-NEXT: vlseg8e16.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 8) @llvm.riscv.vlseg8.triscv.vector.tuple_nxv4i8_8t(target("riscv.vector.tuple", <vscale x 4 x i8>, 8) undef, ptr %base, i64 %vl, i64 4) - %1 = call <vscale x 2 x bfloat> @llvm.riscv.tuple.extract.nxv2bf16.triscv.vector.tuple_nxv4i8_8t(target("riscv.vector.tuple", <vscale x 4 x i8>, 8) %0, i32 1) - ret <vscale x 2 x bfloat> %1 + ret target("riscv.vector.tuple", <vscale x 4 x i8>, 8) %0 } - -define <vscale x 2 x bfloat> @test_vlseg8_mask_nxv2bf16_triscv.vector.tuple_nxv4i8_8t(ptr %base, i64 %vl, <vscale x 2 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 4 x i8>, 8) @test_vlseg8_mask_nxv2bf16_triscv.vector.tuple_nxv4i8_8t(ptr %base, i64 %vl, <vscale x 2 x i1> %mask) { ; CHECK-LABEL: test_vlseg8_mask_nxv2bf16_triscv.vector.tuple_nxv4i8_8t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, mf2, ta, ma -; CHECK-NEXT: vlseg8e16.v v7, (a0), v0.t +; CHECK-NEXT: vlseg8e16.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 4 x i8>, 8) @llvm.riscv.vlseg8.mask.triscv.vector.tuple_nxv4i8_8t.nxv2i1(target("riscv.vector.tuple", <vscale x 4 x i8>, 8) undef, ptr %base, <vscale x 2 x i1> %mask, i64 %vl, i64 1, i64 4) - %1 = call <vscale x 2 x bfloat> @llvm.riscv.tuple.extract.nxv2bf16.triscv.vector.tuple_nxv4i8_8t(target("riscv.vector.tuple", <vscale x 4 x i8>, 8) %0, i32 1) - ret <vscale x 2 x bfloat> %1 + ret target("riscv.vector.tuple", <vscale x 4 x i8>, 8) %0 } - - -define <vscale x 4 x bfloat> @test_vlseg8_nxv4bf16_triscv.vector.tuple_nxv8i8_8t(ptr %base, i64 %vl) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 8) @test_vlseg8_nxv4bf16_triscv.vector.tuple_nxv8i8_8t(ptr %base, i64 %vl) { ; CHECK-LABEL: test_vlseg8_nxv4bf16_triscv.vector.tuple_nxv8i8_8t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma -; CHECK-NEXT: vlseg8e16.v v7, (a0) +; CHECK-NEXT: vlseg8e16.v v8, (a0) ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 8) @llvm.riscv.vlseg8.triscv.vector.tuple_nxv8i8_8t(target("riscv.vector.tuple", <vscale x 8 x i8>, 8) undef, ptr %base, i64 %vl, i64 4) - %1 = call <vscale x 4 x bfloat> @llvm.riscv.tuple.extract.nxv4bf16.triscv.vector.tuple_nxv8i8_8t(target("riscv.vector.tuple", <vscale x 8 x i8>, 8) %0, i32 1) - ret <vscale x 4 x bfloat> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 8) %0 } - -define <vscale x 4 x bfloat> @test_vlseg8_mask_nxv4bf16_triscv.vector.tuple_nxv8i8_8t(ptr %base, i64 %vl, <vscale x 4 x i1> %mask) { +define target("riscv.vector.tuple", <vscale x 8 x i8>, 8) @test_vlseg8_mask_nxv4bf16_triscv.vector.tuple_nxv8i8_8t(ptr %base, i64 %vl, <vscale x 4 x i1> %mask) { ; CHECK-LABEL: test_vlseg8_mask_nxv4bf16_triscv.vector.tuple_nxv8i8_8t: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: vsetvli zero, a1, e16, m1, ta, ma -; CHECK-NEXT: vlseg8e16.v v7, (a0), v0.t +; CHECK-NEXT: vlseg8e16.v v8, (a0), v0.t ; CHECK-NEXT: ret entry: %0 = tail call target("riscv.vector.tuple", <vscale x 8 x i8>, 8) @llvm.riscv.vlseg8.mask.triscv.vector.tuple_nxv8i8_8t.nxv4i1(target("riscv.vector.tuple", <vscale x 8 x i8>, 8) undef, ptr %base, <vscale x 4 x i1> %mask, i64 %vl, i64 1, i64 4) - %1 = call <vscale x 4 x bfloat> @llvm.riscv.tuple.extract.nxv4bf16.triscv.vector.tuple_nxv8i8_8t(target("riscv.vector.tuple", <vscale x 8 x i8>, 8) %0, i32 1) - ret <vscale x 4 x bfloat> %1 + ret target("riscv.vector.tuple", <vscale x 8 x i8>, 8) %0 } - diff --git a/llvm/test/CodeGen/WebAssembly/lower-em-sjlj-alloca.ll b/llvm/test/CodeGen/WebAssembly/lower-em-sjlj-alloca.ll new file mode 100644 index 0000000..0f968de --- /dev/null +++ b/llvm/test/CodeGen/WebAssembly/lower-em-sjlj-alloca.ll @@ -0,0 +1,129 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; RUN: opt -S -wasm-lower-em-ehsjlj -wasm-enable-sjlj -mtriple=wasm32-unknown-emscripten < %s | FileCheck %s + +@buf = external global i8 +declare i32 @setjmp(ptr) returns_twice +declare void @dummy() + +define void @test_static() { +; CHECK-LABEL: define void @test_static() personality ptr @__gxx_wasm_personality_v0 { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: [[X:%.*]] = alloca i32, align 4 +; CHECK-NEXT: [[FUNCTIONINVOCATIONID:%.*]] = alloca i32, align 4 +; CHECK-NEXT: br label %[[SETJMP_DISPATCH:.*]] +; CHECK: [[SETJMP_DISPATCH]]: +; CHECK-NEXT: [[VAL1:%.*]] = phi i32 [ [[VAL:%.*]], %[[IF_END:.*]] ], [ undef, %[[ENTRY]] ] +; CHECK-NEXT: [[LABEL_PHI:%.*]] = phi i32 [ [[LABEL:%.*]], %[[IF_END]] ], [ -1, %[[ENTRY]] ] +; CHECK-NEXT: switch i32 [[LABEL_PHI]], label %[[ENTRY_SPLIT:.*]] [ +; CHECK-NEXT: i32 1, label %[[ENTRY_SPLIT_SPLIT:.*]] +; CHECK-NEXT: ] +; CHECK: [[ENTRY_SPLIT]]: +; CHECK-NEXT: call void @llvm.lifetime.start.p0(i64 4, ptr [[X]]) +; CHECK-NEXT: call void @__wasm_setjmp(ptr @buf, i32 1, ptr [[FUNCTIONINVOCATIONID]]) +; CHECK-NEXT: br label %[[ENTRY_SPLIT_SPLIT]] +; CHECK: [[ENTRY_SPLIT_SPLIT]]: +; CHECK-NEXT: [[SETJMP_RET:%.*]] = phi i32 [ 0, %[[ENTRY_SPLIT]] ], [ [[VAL1]], %[[SETJMP_DISPATCH]] ] +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[SETJMP_RET]], 0 +; CHECK-NEXT: br i1 [[CMP]], label %[[IF:.*]], label %[[ELSE:.*]] +; CHECK: [[IF]]: +; CHECK-NEXT: invoke void @dummy() +; CHECK-NEXT: to [[DOTNOEXC:label %.*]] unwind label %[[CATCH_DISPATCH_LONGJMP:.*]] +; CHECK: [[_NOEXC:.*:]] +; CHECK-NEXT: ret void +; CHECK: [[ELSE]]: +; CHECK-NEXT: call void @llvm.lifetime.end.p0(i64 4, ptr [[X]]) +; CHECK-NEXT: ret void +; CHECK: [[CATCH_DISPATCH_LONGJMP]]: +; CHECK-NEXT: [[TMP0:%.*]] = catchswitch within none [label %catch.longjmp] unwind to caller +; CHECK: [[CATCH_LONGJMP:.*:]] +; CHECK-NEXT: [[TMP1:%.*]] = catchpad within [[TMP0]] [] +; CHECK-NEXT: [[THROWN:%.*]] = call ptr @llvm.wasm.catch(i32 1) +; CHECK-NEXT: [[ENV_GEP:%.*]] = getelementptr { ptr, i32 }, ptr [[THROWN]], i32 0, i32 0 +; CHECK-NEXT: [[VAL_GEP:%.*]] = getelementptr { ptr, i32 }, ptr [[THROWN]], i32 0, i32 1 +; CHECK-NEXT: [[ENV:%.*]] = load ptr, ptr [[ENV_GEP]], align 4 +; CHECK-NEXT: [[VAL]] = load i32, ptr [[VAL_GEP]], align 4 +; CHECK-NEXT: [[LABEL]] = call i32 @__wasm_setjmp_test(ptr [[ENV]], ptr [[FUNCTIONINVOCATIONID]]) [ "funclet"(token [[TMP1]]) ] +; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i32 [[LABEL]], 0 +; CHECK-NEXT: br i1 [[TMP2]], label %[[IF_THEN:.*]], label %[[IF_END]] +; CHECK: [[IF_THEN]]: +; CHECK-NEXT: call void @__wasm_longjmp(ptr [[ENV]], i32 [[VAL]]) [ "funclet"(token [[TMP1]]) ] +; CHECK-NEXT: unreachable +; CHECK: [[IF_END]]: +; CHECK-NEXT: catchret from [[TMP1]] to label %[[SETJMP_DISPATCH]] +; +entry: + %x = alloca i32, align 4 + call void @llvm.lifetime.start.p0(i64 4, ptr %x) + %call = call i32 @setjmp(ptr @buf) returns_twice + %cmp = icmp eq i32 %call, 0 + br i1 %cmp, label %if, label %else + +if: + call void @dummy() + ret void + +else: + call void @llvm.lifetime.end.p0(i64 4, ptr %x) + ret void +} + +define void @test_dynamic(i32 %size) { +; CHECK-LABEL: define void @test_dynamic( +; CHECK-SAME: i32 [[SIZE:%.*]]) personality ptr @__gxx_wasm_personality_v0 { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: [[FUNCTIONINVOCATIONID:%.*]] = alloca i32, align 4 +; CHECK-NEXT: br label %[[SETJMP_DISPATCH:.*]] +; CHECK: [[SETJMP_DISPATCH]]: +; CHECK-NEXT: [[VAL1:%.*]] = phi i32 [ [[VAL:%.*]], %[[IF_END:.*]] ], [ undef, %[[ENTRY]] ] +; CHECK-NEXT: [[LABEL_PHI:%.*]] = phi i32 [ [[LABEL:%.*]], %[[IF_END]] ], [ -1, %[[ENTRY]] ] +; CHECK-NEXT: switch i32 [[LABEL_PHI]], label %[[ENTRY_SPLIT:.*]] [ +; CHECK-NEXT: i32 1, label %[[ENTRY_SPLIT_SPLIT:.*]] +; CHECK-NEXT: ] +; CHECK: [[ENTRY_SPLIT]]: +; CHECK-NEXT: [[X:%.*]] = alloca i32, i32 [[SIZE]], align 4 +; CHECK-NEXT: call void @__wasm_setjmp(ptr @buf, i32 1, ptr [[FUNCTIONINVOCATIONID]]) +; CHECK-NEXT: br label %[[ENTRY_SPLIT_SPLIT]] +; CHECK: [[ENTRY_SPLIT_SPLIT]]: +; CHECK-NEXT: [[SETJMP_RET:%.*]] = phi i32 [ 0, %[[ENTRY_SPLIT]] ], [ [[VAL1]], %[[SETJMP_DISPATCH]] ] +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[SETJMP_RET]], 0 +; CHECK-NEXT: br i1 [[CMP]], label %[[IF:.*]], label %[[ELSE:.*]] +; CHECK: [[IF]]: +; CHECK-NEXT: invoke void @dummy() +; CHECK-NEXT: to [[DOTNOEXC:label %.*]] unwind label %[[CATCH_DISPATCH_LONGJMP:.*]] +; CHECK: [[_NOEXC:.*:]] +; CHECK-NEXT: ret void +; CHECK: [[ELSE]]: +; CHECK-NEXT: ret void +; CHECK: [[CATCH_DISPATCH_LONGJMP]]: +; CHECK-NEXT: [[TMP0:%.*]] = catchswitch within none [label %catch.longjmp] unwind to caller +; CHECK: [[CATCH_LONGJMP:.*:]] +; CHECK-NEXT: [[TMP1:%.*]] = catchpad within [[TMP0]] [] +; CHECK-NEXT: [[THROWN:%.*]] = call ptr @llvm.wasm.catch(i32 1) +; CHECK-NEXT: [[ENV_GEP:%.*]] = getelementptr { ptr, i32 }, ptr [[THROWN]], i32 0, i32 0 +; CHECK-NEXT: [[VAL_GEP:%.*]] = getelementptr { ptr, i32 }, ptr [[THROWN]], i32 0, i32 1 +; CHECK-NEXT: [[ENV:%.*]] = load ptr, ptr [[ENV_GEP]], align 4 +; CHECK-NEXT: [[VAL]] = load i32, ptr [[VAL_GEP]], align 4 +; CHECK-NEXT: [[LABEL]] = call i32 @__wasm_setjmp_test(ptr [[ENV]], ptr [[FUNCTIONINVOCATIONID]]) [ "funclet"(token [[TMP1]]) ] +; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i32 [[LABEL]], 0 +; CHECK-NEXT: br i1 [[TMP2]], label %[[IF_THEN:.*]], label %[[IF_END]] +; CHECK: [[IF_THEN]]: +; CHECK-NEXT: call void @__wasm_longjmp(ptr [[ENV]], i32 [[VAL]]) [ "funclet"(token [[TMP1]]) ] +; CHECK-NEXT: unreachable +; CHECK: [[IF_END]]: +; CHECK-NEXT: catchret from [[TMP1]] to label %[[SETJMP_DISPATCH]] +; +entry: + %x = alloca i32, i32 %size, align 4 + call void @llvm.lifetime.start.p0(i64 -1, ptr %x) + %call = call i32 @setjmp(ptr @buf) returns_twice + %cmp = icmp eq i32 %call, 0 + br i1 %cmp, label %if, label %else + +if: + call void @dummy() + ret void + +else: + call void @llvm.lifetime.end.p0(i64 -1, ptr %x) + ret void +} diff --git a/llvm/test/CodeGen/WebAssembly/lower-em-sjlj-debuginfo.ll b/llvm/test/CodeGen/WebAssembly/lower-em-sjlj-debuginfo.ll index fec9836..bab8403 100644 --- a/llvm/test/CodeGen/WebAssembly/lower-em-sjlj-debuginfo.ll +++ b/llvm/test/CodeGen/WebAssembly/lower-em-sjlj-debuginfo.ll @@ -16,10 +16,10 @@ entry: call void @foo(), !dbg !7 ret void, !dbg !8 ; CHECK: entry: - ; CHECK-NEXT: %functionInvocationId = alloca i32, align 4, !dbg ![[DL0:.*]] + ; CHECK-NEXT: %buf = alloca [1 x %struct.__jmp_buf_tag], align 16, !dbg ![[DL0:.*]] + ; CHECK-NEXT: %functionInvocationId = alloca i32, align 4, !dbg ![[DL0]] ; CHECK: entry.split: - ; CHECK: alloca {{.*}}, !dbg ![[DL0]] ; CHECK: call void @__wasm_setjmp{{.*}}, !dbg ![[DL1:.*]] ; CHECK-NEXT: br {{.*}}, !dbg ![[DL2:.*]] diff --git a/llvm/test/CodeGen/WebAssembly/lower-em-sjlj.ll b/llvm/test/CodeGen/WebAssembly/lower-em-sjlj.ll index b584342..51dcf2f 100644 --- a/llvm/test/CodeGen/WebAssembly/lower-em-sjlj.ll +++ b/llvm/test/CodeGen/WebAssembly/lower-em-sjlj.ll @@ -22,17 +22,17 @@ entry: call void @longjmp(ptr %buf, i32 1) #1 unreachable ; CHECK: entry: +; CHECK-NEXT: %buf = alloca [1 x %struct.__jmp_buf_tag], align 16 ; CHECK-NEXT: %functionInvocationId = alloca i32, align 4 ; CHECK-NEXT: br label %entry.split ; CHECK: entry.split -; CHECK-NEXT: %[[BUF:.*]] = alloca [1 x %struct.__jmp_buf_tag] -; CHECK-NEXT: call void @__wasm_setjmp(ptr %[[BUF]], i32 1, ptr %functionInvocationId) +; CHECK-NEXT: call void @__wasm_setjmp(ptr %buf, i32 1, ptr %functionInvocationId) ; CHECK-NEXT: br label %entry.split.split ; CHECK: entry.split.split: ; CHECK-NEXT: phi i32 [ 0, %entry.split ], [ %[[LONGJMP_RESULT:.*]], %if.end ] -; CHECK-NEXT: %[[JMPBUF:.*]] = ptrtoint ptr %[[BUF]] to [[PTR]] +; CHECK-NEXT: %[[JMPBUF:.*]] = ptrtoint ptr %buf to [[PTR]] ; CHECK-NEXT: store [[PTR]] 0, ptr @__THREW__ ; CHECK-NEXT: call cc{{.*}} void @__invoke_void_[[PTR]]_i32(ptr @emscripten_longjmp, [[PTR]] %[[JMPBUF]], i32 1) ; CHECK-NEXT: %[[__THREW__VAL:.*]] = load [[PTR]], ptr @__THREW__ diff --git a/llvm/test/CodeGen/WebAssembly/lower-wasm-ehsjlj.ll b/llvm/test/CodeGen/WebAssembly/lower-wasm-ehsjlj.ll index b4c93c4..9de6652 100644 --- a/llvm/test/CodeGen/WebAssembly/lower-wasm-ehsjlj.ll +++ b/llvm/test/CodeGen/WebAssembly/lower-wasm-ehsjlj.ll @@ -108,7 +108,7 @@ catch: ; preds = %catch.start call void @__cxa_end_catch() [ "funclet"(token %2) ] catchret from %2 to label %catchret.dest ; CHECK: catch: ; preds = %catch.start -; CHECK-NEXT: %exn = load ptr, ptr %exn.slot6, align 4 +; CHECK-NEXT: %exn = load ptr, ptr %exn.slot, align 4 ; CHECK-NEXT: %5 = call ptr @__cxa_begin_catch(ptr %exn) #3 [ "funclet"(token %2) ] ; CHECK-NEXT: invoke void @__cxa_end_catch() [ "funclet"(token %2) ] ; CHECK-NEXT: to label %.noexc unwind label %catch.dispatch.longjmp diff --git a/llvm/test/CodeGen/WebAssembly/lower-wasm-sjlj.ll b/llvm/test/CodeGen/WebAssembly/lower-wasm-sjlj.ll index 82c04e2..e1cb859 100644 --- a/llvm/test/CodeGen/WebAssembly/lower-wasm-sjlj.ll +++ b/llvm/test/CodeGen/WebAssembly/lower-wasm-sjlj.ll @@ -25,26 +25,24 @@ entry: unreachable ; CHECK: entry: +; CHECK-NEXT: %buf = alloca [1 x %struct.__jmp_buf_tag], align 16 ; CHECK-NEXT: %functionInvocationId = alloca i32, align 4 ; CHECK-NEXT: br label %setjmp.dispatch ; CHECK: setjmp.dispatch: ; CHECK-NEXT: %[[VAL2:.*]] = phi i32 [ %val, %if.end ], [ undef, %entry ] -; CHECK-NEXT: %[[BUF:.*]] = phi ptr [ %[[BUF2:.*]], %if.end ], [ undef, %entry ] ; CHECK-NEXT: %label.phi = phi i32 [ %label, %if.end ], [ -1, %entry ] ; CHECK-NEXT: switch i32 %label.phi, label %entry.split [ ; CHECK-NEXT: i32 1, label %entry.split.split ; CHECK-NEXT: ] ; CHECK: entry.split: -; CHECK-NEXT: %buf = alloca [1 x %struct.__jmp_buf_tag], align 16 ; CHECK-NEXT: call void @__wasm_setjmp(ptr %buf, i32 1, ptr %functionInvocationId) ; CHECK-NEXT: br label %entry.split.split ; CHECK: entry.split.split: -; CHECK-NEXT: %[[BUF2]] = phi ptr [ %[[BUF]], %setjmp.dispatch ], [ %buf, %entry.split ] ; CHECK-NEXT: %setjmp.ret = phi i32 [ 0, %entry.split ], [ %[[VAL2]], %setjmp.dispatch ] -; CHECK-NEXT: invoke void @__wasm_longjmp(ptr %[[BUF2]], i32 1) +; CHECK-NEXT: invoke void @__wasm_longjmp(ptr %buf, i32 1) ; CHECK-NEXT: to label %.noexc unwind label %catch.dispatch.longjmp ; CHECK: .noexc: diff --git a/llvm/test/CodeGen/WebAssembly/simd-arith.ll b/llvm/test/CodeGen/WebAssembly/simd-arith.ll index e3607e1..36637e1 100644 --- a/llvm/test/CodeGen/WebAssembly/simd-arith.ll +++ b/llvm/test/CodeGen/WebAssembly/simd-arith.ll @@ -199,139 +199,17 @@ define <16 x i8> @mul_v16i8(<16 x i8> %x, <16 x i8> %y) { ; SIMD128-LABEL: mul_v16i8: ; SIMD128: .functype mul_v16i8 (v128, v128) -> (v128) ; SIMD128-NEXT: # %bb.0: -; SIMD128-NEXT: i8x16.extract_lane_u $push4=, $0, 0 -; SIMD128-NEXT: i8x16.extract_lane_u $push3=, $1, 0 -; SIMD128-NEXT: i32.mul $push5=, $pop4, $pop3 -; SIMD128-NEXT: i8x16.splat $push6=, $pop5 -; SIMD128-NEXT: i8x16.extract_lane_u $push1=, $0, 1 -; SIMD128-NEXT: i8x16.extract_lane_u $push0=, $1, 1 -; SIMD128-NEXT: i32.mul $push2=, $pop1, $pop0 -; SIMD128-NEXT: i8x16.replace_lane $push7=, $pop6, 1, $pop2 -; SIMD128-NEXT: i8x16.extract_lane_u $push9=, $0, 2 -; SIMD128-NEXT: i8x16.extract_lane_u $push8=, $1, 2 -; SIMD128-NEXT: i32.mul $push10=, $pop9, $pop8 -; SIMD128-NEXT: i8x16.replace_lane $push11=, $pop7, 2, $pop10 -; SIMD128-NEXT: i8x16.extract_lane_u $push13=, $0, 3 -; SIMD128-NEXT: i8x16.extract_lane_u $push12=, $1, 3 -; SIMD128-NEXT: i32.mul $push14=, $pop13, $pop12 -; SIMD128-NEXT: i8x16.replace_lane $push15=, $pop11, 3, $pop14 -; SIMD128-NEXT: i8x16.extract_lane_u $push17=, $0, 4 -; SIMD128-NEXT: i8x16.extract_lane_u $push16=, $1, 4 -; SIMD128-NEXT: i32.mul $push18=, $pop17, $pop16 -; SIMD128-NEXT: i8x16.replace_lane $push19=, $pop15, 4, $pop18 -; SIMD128-NEXT: i8x16.extract_lane_u $push21=, $0, 5 -; SIMD128-NEXT: i8x16.extract_lane_u $push20=, $1, 5 -; SIMD128-NEXT: i32.mul $push22=, $pop21, $pop20 -; SIMD128-NEXT: i8x16.replace_lane $push23=, $pop19, 5, $pop22 -; SIMD128-NEXT: i8x16.extract_lane_u $push25=, $0, 6 -; SIMD128-NEXT: i8x16.extract_lane_u $push24=, $1, 6 -; SIMD128-NEXT: i32.mul $push26=, $pop25, $pop24 -; SIMD128-NEXT: i8x16.replace_lane $push27=, $pop23, 6, $pop26 -; SIMD128-NEXT: i8x16.extract_lane_u $push29=, $0, 7 -; SIMD128-NEXT: i8x16.extract_lane_u $push28=, $1, 7 -; SIMD128-NEXT: i32.mul $push30=, $pop29, $pop28 -; SIMD128-NEXT: i8x16.replace_lane $push31=, $pop27, 7, $pop30 -; SIMD128-NEXT: i8x16.extract_lane_u $push33=, $0, 8 -; SIMD128-NEXT: i8x16.extract_lane_u $push32=, $1, 8 -; SIMD128-NEXT: i32.mul $push34=, $pop33, $pop32 -; SIMD128-NEXT: i8x16.replace_lane $push35=, $pop31, 8, $pop34 -; SIMD128-NEXT: i8x16.extract_lane_u $push37=, $0, 9 -; SIMD128-NEXT: i8x16.extract_lane_u $push36=, $1, 9 -; SIMD128-NEXT: i32.mul $push38=, $pop37, $pop36 -; SIMD128-NEXT: i8x16.replace_lane $push39=, $pop35, 9, $pop38 -; SIMD128-NEXT: i8x16.extract_lane_u $push41=, $0, 10 -; SIMD128-NEXT: i8x16.extract_lane_u $push40=, $1, 10 -; SIMD128-NEXT: i32.mul $push42=, $pop41, $pop40 -; SIMD128-NEXT: i8x16.replace_lane $push43=, $pop39, 10, $pop42 -; SIMD128-NEXT: i8x16.extract_lane_u $push45=, $0, 11 -; SIMD128-NEXT: i8x16.extract_lane_u $push44=, $1, 11 -; SIMD128-NEXT: i32.mul $push46=, $pop45, $pop44 -; SIMD128-NEXT: i8x16.replace_lane $push47=, $pop43, 11, $pop46 -; SIMD128-NEXT: i8x16.extract_lane_u $push49=, $0, 12 -; SIMD128-NEXT: i8x16.extract_lane_u $push48=, $1, 12 -; SIMD128-NEXT: i32.mul $push50=, $pop49, $pop48 -; SIMD128-NEXT: i8x16.replace_lane $push51=, $pop47, 12, $pop50 -; SIMD128-NEXT: i8x16.extract_lane_u $push53=, $0, 13 -; SIMD128-NEXT: i8x16.extract_lane_u $push52=, $1, 13 -; SIMD128-NEXT: i32.mul $push54=, $pop53, $pop52 -; SIMD128-NEXT: i8x16.replace_lane $push55=, $pop51, 13, $pop54 -; SIMD128-NEXT: i8x16.extract_lane_u $push57=, $0, 14 -; SIMD128-NEXT: i8x16.extract_lane_u $push56=, $1, 14 -; SIMD128-NEXT: i32.mul $push58=, $pop57, $pop56 -; SIMD128-NEXT: i8x16.replace_lane $push59=, $pop55, 14, $pop58 -; SIMD128-NEXT: i8x16.extract_lane_u $push61=, $0, 15 -; SIMD128-NEXT: i8x16.extract_lane_u $push60=, $1, 15 -; SIMD128-NEXT: i32.mul $push62=, $pop61, $pop60 -; SIMD128-NEXT: i8x16.replace_lane $push63=, $pop59, 15, $pop62 -; SIMD128-NEXT: return $pop63 +; SIMD128-NEXT: i16x8.extmul_low_i8x16_u $push1=, $0, $1 +; SIMD128-NEXT: i16x8.extmul_high_i8x16_u $push0=, $0, $1 +; SIMD128-NEXT: i8x16.shuffle $push2=, $pop1, $pop0, 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30 +; SIMD128-NEXT: return $pop2 ; ; SIMD128-FAST-LABEL: mul_v16i8: ; SIMD128-FAST: .functype mul_v16i8 (v128, v128) -> (v128) ; SIMD128-FAST-NEXT: # %bb.0: -; SIMD128-FAST-NEXT: i8x16.extract_lane_u $push5=, $0, 0 -; SIMD128-FAST-NEXT: i8x16.extract_lane_u $push4=, $1, 0 -; SIMD128-FAST-NEXT: i32.mul $push6=, $pop5, $pop4 -; SIMD128-FAST-NEXT: i8x16.splat $push7=, $pop6 -; SIMD128-FAST-NEXT: i8x16.extract_lane_u $push2=, $0, 1 -; SIMD128-FAST-NEXT: i8x16.extract_lane_u $push1=, $1, 1 -; SIMD128-FAST-NEXT: i32.mul $push3=, $pop2, $pop1 -; SIMD128-FAST-NEXT: i8x16.replace_lane $push8=, $pop7, 1, $pop3 -; SIMD128-FAST-NEXT: i8x16.extract_lane_u $push10=, $0, 2 -; SIMD128-FAST-NEXT: i8x16.extract_lane_u $push9=, $1, 2 -; SIMD128-FAST-NEXT: i32.mul $push11=, $pop10, $pop9 -; SIMD128-FAST-NEXT: i8x16.replace_lane $push12=, $pop8, 2, $pop11 -; SIMD128-FAST-NEXT: i8x16.extract_lane_u $push14=, $0, 3 -; SIMD128-FAST-NEXT: i8x16.extract_lane_u $push13=, $1, 3 -; SIMD128-FAST-NEXT: i32.mul $push15=, $pop14, $pop13 -; SIMD128-FAST-NEXT: i8x16.replace_lane $push16=, $pop12, 3, $pop15 -; SIMD128-FAST-NEXT: i8x16.extract_lane_u $push18=, $0, 4 -; SIMD128-FAST-NEXT: i8x16.extract_lane_u $push17=, $1, 4 -; SIMD128-FAST-NEXT: i32.mul $push19=, $pop18, $pop17 -; SIMD128-FAST-NEXT: i8x16.replace_lane $push20=, $pop16, 4, $pop19 -; SIMD128-FAST-NEXT: i8x16.extract_lane_u $push22=, $0, 5 -; SIMD128-FAST-NEXT: i8x16.extract_lane_u $push21=, $1, 5 -; SIMD128-FAST-NEXT: i32.mul $push23=, $pop22, $pop21 -; SIMD128-FAST-NEXT: i8x16.replace_lane $push24=, $pop20, 5, $pop23 -; SIMD128-FAST-NEXT: i8x16.extract_lane_u $push26=, $0, 6 -; SIMD128-FAST-NEXT: i8x16.extract_lane_u $push25=, $1, 6 -; SIMD128-FAST-NEXT: i32.mul $push27=, $pop26, $pop25 -; SIMD128-FAST-NEXT: i8x16.replace_lane $push28=, $pop24, 6, $pop27 -; SIMD128-FAST-NEXT: i8x16.extract_lane_u $push30=, $0, 7 -; SIMD128-FAST-NEXT: i8x16.extract_lane_u $push29=, $1, 7 -; SIMD128-FAST-NEXT: i32.mul $push31=, $pop30, $pop29 -; SIMD128-FAST-NEXT: i8x16.replace_lane $push32=, $pop28, 7, $pop31 -; SIMD128-FAST-NEXT: i8x16.extract_lane_u $push34=, $0, 8 -; SIMD128-FAST-NEXT: i8x16.extract_lane_u $push33=, $1, 8 -; SIMD128-FAST-NEXT: i32.mul $push35=, $pop34, $pop33 -; SIMD128-FAST-NEXT: i8x16.replace_lane $push36=, $pop32, 8, $pop35 -; SIMD128-FAST-NEXT: i8x16.extract_lane_u $push38=, $0, 9 -; SIMD128-FAST-NEXT: i8x16.extract_lane_u $push37=, $1, 9 -; SIMD128-FAST-NEXT: i32.mul $push39=, $pop38, $pop37 -; SIMD128-FAST-NEXT: i8x16.replace_lane $push40=, $pop36, 9, $pop39 -; SIMD128-FAST-NEXT: i8x16.extract_lane_u $push42=, $0, 10 -; SIMD128-FAST-NEXT: i8x16.extract_lane_u $push41=, $1, 10 -; SIMD128-FAST-NEXT: i32.mul $push43=, $pop42, $pop41 -; SIMD128-FAST-NEXT: i8x16.replace_lane $push44=, $pop40, 10, $pop43 -; SIMD128-FAST-NEXT: i8x16.extract_lane_u $push46=, $0, 11 -; SIMD128-FAST-NEXT: i8x16.extract_lane_u $push45=, $1, 11 -; SIMD128-FAST-NEXT: i32.mul $push47=, $pop46, $pop45 -; SIMD128-FAST-NEXT: i8x16.replace_lane $push48=, $pop44, 11, $pop47 -; SIMD128-FAST-NEXT: i8x16.extract_lane_u $push50=, $0, 12 -; SIMD128-FAST-NEXT: i8x16.extract_lane_u $push49=, $1, 12 -; SIMD128-FAST-NEXT: i32.mul $push51=, $pop50, $pop49 -; SIMD128-FAST-NEXT: i8x16.replace_lane $push52=, $pop48, 12, $pop51 -; SIMD128-FAST-NEXT: i8x16.extract_lane_u $push54=, $0, 13 -; SIMD128-FAST-NEXT: i8x16.extract_lane_u $push53=, $1, 13 -; SIMD128-FAST-NEXT: i32.mul $push55=, $pop54, $pop53 -; SIMD128-FAST-NEXT: i8x16.replace_lane $push56=, $pop52, 13, $pop55 -; SIMD128-FAST-NEXT: i8x16.extract_lane_u $push58=, $0, 14 -; SIMD128-FAST-NEXT: i8x16.extract_lane_u $push57=, $1, 14 -; SIMD128-FAST-NEXT: i32.mul $push59=, $pop58, $pop57 -; SIMD128-FAST-NEXT: i8x16.replace_lane $push60=, $pop56, 14, $pop59 -; SIMD128-FAST-NEXT: i8x16.extract_lane_u $push62=, $0, 15 -; SIMD128-FAST-NEXT: i8x16.extract_lane_u $push61=, $1, 15 -; SIMD128-FAST-NEXT: i32.mul $push63=, $pop62, $pop61 -; SIMD128-FAST-NEXT: i8x16.replace_lane $push0=, $pop60, 15, $pop63 +; SIMD128-FAST-NEXT: i16x8.extmul_low_i8x16_u $push2=, $0, $1 +; SIMD128-FAST-NEXT: i16x8.extmul_high_i8x16_u $push1=, $0, $1 +; SIMD128-FAST-NEXT: i8x16.shuffle $push0=, $pop2, $pop1, 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30 ; SIMD128-FAST-NEXT: return $pop0 ; ; NO-SIMD128-LABEL: mul_v16i8: diff --git a/llvm/test/CodeGen/WebAssembly/simd-relaxed-fnma.ll b/llvm/test/CodeGen/WebAssembly/simd-relaxed-fnma.ll new file mode 100644 index 0000000..6e2d860 --- /dev/null +++ b/llvm/test/CodeGen/WebAssembly/simd-relaxed-fnma.ll @@ -0,0 +1,145 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc < %s -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers -mattr=+fp16,+simd128,+relaxed-simd | FileCheck %s --check-prefix=RELAXED +; RUN: llc < %s -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers -mattr=+fp16,+simd128, | FileCheck %s --check-prefix=STRICT + +target triple = "wasm32" + +define double @fsub_fmul_contract_f64(double %a, double %b, double %c) { +; RELAXED-LABEL: fsub_fmul_contract_f64: +; RELAXED: .functype fsub_fmul_contract_f64 (f64, f64, f64) -> (f64) +; RELAXED-NEXT: # %bb.0: +; RELAXED-NEXT: f64.mul $push0=, $1, $0 +; RELAXED-NEXT: f64.sub $push1=, $2, $pop0 +; RELAXED-NEXT: return $pop1 +; +; STRICT-LABEL: fsub_fmul_contract_f64: +; STRICT: .functype fsub_fmul_contract_f64 (f64, f64, f64) -> (f64) +; STRICT-NEXT: # %bb.0: +; STRICT-NEXT: f64.mul $push0=, $1, $0 +; STRICT-NEXT: f64.sub $push1=, $2, $pop0 +; STRICT-NEXT: return $pop1 + %mul = fmul contract double %b, %a + %sub = fsub contract double %c, %mul + ret double %sub +} + +define <4 x float> @fsub_fmul_contract_4xf32(<4 x float> %a, <4 x float> %b, <4 x float> %c) { +; RELAXED-LABEL: fsub_fmul_contract_4xf32: +; RELAXED: .functype fsub_fmul_contract_4xf32 (v128, v128, v128) -> (v128) +; RELAXED-NEXT: # %bb.0: +; RELAXED-NEXT: f32x4.relaxed_nmadd $push0=, $2, $1, $0 +; RELAXED-NEXT: return $pop0 +; +; STRICT-LABEL: fsub_fmul_contract_4xf32: +; STRICT: .functype fsub_fmul_contract_4xf32 (v128, v128, v128) -> (v128) +; STRICT-NEXT: # %bb.0: +; STRICT-NEXT: f32x4.mul $push0=, $1, $0 +; STRICT-NEXT: f32x4.sub $push1=, $2, $pop0 +; STRICT-NEXT: return $pop1 + %mul = fmul contract <4 x float> %b, %a + %sub = fsub contract <4 x float> %c, %mul + ret <4 x float> %sub +} + + +define <8 x half> @fsub_fmul_contract_8xf16(<8 x half> %a, <8 x half> %b, <8 x half> %c) { +; RELAXED-LABEL: fsub_fmul_contract_8xf16: +; RELAXED: .functype fsub_fmul_contract_8xf16 (v128, v128, v128) -> (v128) +; RELAXED-NEXT: # %bb.0: +; RELAXED-NEXT: f16x8.relaxed_nmadd $push0=, $2, $1, $0 +; RELAXED-NEXT: return $pop0 +; +; STRICT-LABEL: fsub_fmul_contract_8xf16: +; STRICT: .functype fsub_fmul_contract_8xf16 (v128, v128, v128) -> (v128) +; STRICT-NEXT: # %bb.0: +; STRICT-NEXT: f16x8.mul $push0=, $1, $0 +; STRICT-NEXT: f16x8.sub $push1=, $2, $pop0 +; STRICT-NEXT: return $pop1 + %mul = fmul contract <8 x half> %b, %a + %sub = fsub contract <8 x half> %c, %mul + ret <8 x half> %sub +} + + +define <4 x float> @fsub_fmul_4xf32(<4 x float> %a, <4 x float> %b, <4 x float> %c) { +; RELAXED-LABEL: fsub_fmul_4xf32: +; RELAXED: .functype fsub_fmul_4xf32 (v128, v128, v128) -> (v128) +; RELAXED-NEXT: # %bb.0: +; RELAXED-NEXT: f32x4.mul $push0=, $1, $0 +; RELAXED-NEXT: f32x4.sub $push1=, $2, $pop0 +; RELAXED-NEXT: return $pop1 +; +; STRICT-LABEL: fsub_fmul_4xf32: +; STRICT: .functype fsub_fmul_4xf32 (v128, v128, v128) -> (v128) +; STRICT-NEXT: # %bb.0: +; STRICT-NEXT: f32x4.mul $push0=, $1, $0 +; STRICT-NEXT: f32x4.sub $push1=, $2, $pop0 +; STRICT-NEXT: return $pop1 + %mul = fmul <4 x float> %b, %a + %sub = fsub contract <4 x float> %c, %mul + ret <4 x float> %sub +} + +define <8 x float> @fsub_fmul_contract_8xf32(<8 x float> %a, <8 x float> %b, <8 x float> %c) { +; RELAXED-LABEL: fsub_fmul_contract_8xf32: +; RELAXED: .functype fsub_fmul_contract_8xf32 (i32, v128, v128, v128, v128, v128, v128) -> () +; RELAXED-NEXT: # %bb.0: +; RELAXED-NEXT: f32x4.relaxed_nmadd $push0=, $6, $4, $2 +; RELAXED-NEXT: v128.store 16($0), $pop0 +; RELAXED-NEXT: f32x4.relaxed_nmadd $push1=, $5, $3, $1 +; RELAXED-NEXT: v128.store 0($0), $pop1 +; RELAXED-NEXT: return +; +; STRICT-LABEL: fsub_fmul_contract_8xf32: +; STRICT: .functype fsub_fmul_contract_8xf32 (i32, v128, v128, v128, v128, v128, v128) -> () +; STRICT-NEXT: # %bb.0: +; STRICT-NEXT: f32x4.mul $push0=, $4, $2 +; STRICT-NEXT: f32x4.sub $push1=, $6, $pop0 +; STRICT-NEXT: v128.store 16($0), $pop1 +; STRICT-NEXT: f32x4.mul $push2=, $3, $1 +; STRICT-NEXT: f32x4.sub $push3=, $5, $pop2 +; STRICT-NEXT: v128.store 0($0), $pop3 +; STRICT-NEXT: return + %mul = fmul contract <8 x float> %b, %a + %sub = fsub contract <8 x float> %c, %mul + ret <8 x float> %sub +} + + +define <2 x double> @fsub_fmul_contract_2xf64(<2 x double> %a, <2 x double> %b, <2 x double> %c) { +; RELAXED-LABEL: fsub_fmul_contract_2xf64: +; RELAXED: .functype fsub_fmul_contract_2xf64 (v128, v128, v128) -> (v128) +; RELAXED-NEXT: # %bb.0: +; RELAXED-NEXT: f64x2.relaxed_nmadd $push0=, $2, $1, $0 +; RELAXED-NEXT: return $pop0 +; +; STRICT-LABEL: fsub_fmul_contract_2xf64: +; STRICT: .functype fsub_fmul_contract_2xf64 (v128, v128, v128) -> (v128) +; STRICT-NEXT: # %bb.0: +; STRICT-NEXT: f64x2.mul $push0=, $1, $0 +; STRICT-NEXT: f64x2.sub $push1=, $2, $pop0 +; STRICT-NEXT: return $pop1 + %mul = fmul contract <2 x double> %b, %a + %sub = fsub contract <2 x double> %c, %mul + ret <2 x double> %sub +} + +define float @fsub_fmul_contract_f32(float %a, float %b, float %c) { +; RELAXED-LABEL: fsub_fmul_contract_f32: +; RELAXED: .functype fsub_fmul_contract_f32 (f32, f32, f32) -> (f32) +; RELAXED-NEXT: # %bb.0: +; RELAXED-NEXT: f32.mul $push0=, $1, $0 +; RELAXED-NEXT: f32.sub $push1=, $2, $pop0 +; RELAXED-NEXT: return $pop1 +; +; STRICT-LABEL: fsub_fmul_contract_f32: +; STRICT: .functype fsub_fmul_contract_f32 (f32, f32, f32) -> (f32) +; STRICT-NEXT: # %bb.0: +; STRICT-NEXT: f32.mul $push0=, $1, $0 +; STRICT-NEXT: f32.sub $push1=, $2, $pop0 +; STRICT-NEXT: return $pop1 + %mul = fmul contract float %b, %a + %sub = fsub contract float %c, %mul + ret float %sub +} + diff --git a/llvm/test/CodeGen/WebAssembly/vector-reduce.ll b/llvm/test/CodeGen/WebAssembly/vector-reduce.ll index 1d194b6..4c30a3a 100644 --- a/llvm/test/CodeGen/WebAssembly/vector-reduce.ll +++ b/llvm/test/CodeGen/WebAssembly/vector-reduce.ll @@ -116,40 +116,28 @@ define i8 @pairwise_mul_v16i8(<16 x i8> %arg) { ; SIMD128-LABEL: pairwise_mul_v16i8: ; SIMD128: .functype pairwise_mul_v16i8 (v128) -> (i32) ; SIMD128-NEXT: # %bb.0: -; SIMD128-NEXT: i8x16.extract_lane_u $push26=, $0, 0 -; SIMD128-NEXT: i8x16.shuffle $push32=, $0, $0, 8, 9, 10, 11, 12, 13, 14, 15, 0, 0, 0, 0, 0, 0, 0, 0 -; SIMD128-NEXT: local.tee $push31=, $1=, $pop32 -; SIMD128-NEXT: i8x16.extract_lane_u $push25=, $pop31, 0 -; SIMD128-NEXT: i32.mul $push27=, $pop26, $pop25 -; SIMD128-NEXT: i8x16.extract_lane_u $push23=, $0, 4 -; SIMD128-NEXT: i8x16.extract_lane_u $push22=, $1, 4 -; SIMD128-NEXT: i32.mul $push24=, $pop23, $pop22 -; SIMD128-NEXT: i32.mul $push28=, $pop27, $pop24 -; SIMD128-NEXT: i8x16.extract_lane_u $push19=, $0, 2 -; SIMD128-NEXT: i8x16.extract_lane_u $push18=, $1, 2 -; SIMD128-NEXT: i32.mul $push20=, $pop19, $pop18 -; SIMD128-NEXT: i8x16.extract_lane_u $push16=, $0, 6 -; SIMD128-NEXT: i8x16.extract_lane_u $push15=, $1, 6 -; SIMD128-NEXT: i32.mul $push17=, $pop16, $pop15 -; SIMD128-NEXT: i32.mul $push21=, $pop20, $pop17 -; SIMD128-NEXT: i32.mul $push29=, $pop28, $pop21 -; SIMD128-NEXT: i8x16.extract_lane_u $push11=, $0, 1 -; SIMD128-NEXT: i8x16.extract_lane_u $push10=, $1, 1 -; SIMD128-NEXT: i32.mul $push12=, $pop11, $pop10 -; SIMD128-NEXT: i8x16.extract_lane_u $push8=, $0, 5 -; SIMD128-NEXT: i8x16.extract_lane_u $push7=, $1, 5 -; SIMD128-NEXT: i32.mul $push9=, $pop8, $pop7 -; SIMD128-NEXT: i32.mul $push13=, $pop12, $pop9 -; SIMD128-NEXT: i8x16.extract_lane_u $push4=, $0, 3 -; SIMD128-NEXT: i8x16.extract_lane_u $push3=, $1, 3 -; SIMD128-NEXT: i32.mul $push5=, $pop4, $pop3 -; SIMD128-NEXT: i8x16.extract_lane_u $push1=, $0, 7 -; SIMD128-NEXT: i8x16.extract_lane_u $push0=, $1, 7 -; SIMD128-NEXT: i32.mul $push2=, $pop1, $pop0 -; SIMD128-NEXT: i32.mul $push6=, $pop5, $pop2 -; SIMD128-NEXT: i32.mul $push14=, $pop13, $pop6 -; SIMD128-NEXT: i32.mul $push30=, $pop29, $pop14 -; SIMD128-NEXT: return $pop30 +; SIMD128-NEXT: i8x16.shuffle $push20=, $0, $0, 8, 9, 10, 11, 12, 13, 14, 15, 0, 0, 0, 0, 0, 0, 0, 0 +; SIMD128-NEXT: local.tee $push19=, $1=, $pop20 +; SIMD128-NEXT: i16x8.extmul_low_i8x16_u $push1=, $0, $pop19 +; SIMD128-NEXT: i16x8.extmul_high_i8x16_u $push0=, $0, $1 +; SIMD128-NEXT: i8x16.shuffle $push18=, $pop1, $pop0, 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30 +; SIMD128-NEXT: local.tee $push17=, $0=, $pop18 +; SIMD128-NEXT: i8x16.shuffle $push16=, $0, $0, 4, 5, 6, 7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 +; SIMD128-NEXT: local.tee $push15=, $1=, $pop16 +; SIMD128-NEXT: i16x8.extmul_low_i8x16_u $push3=, $pop17, $pop15 +; SIMD128-NEXT: i16x8.extmul_high_i8x16_u $push2=, $0, $1 +; SIMD128-NEXT: i8x16.shuffle $push14=, $pop3, $pop2, 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30 +; SIMD128-NEXT: local.tee $push13=, $0=, $pop14 +; SIMD128-NEXT: i8x16.shuffle $push12=, $0, $0, 2, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 +; SIMD128-NEXT: local.tee $push11=, $1=, $pop12 +; SIMD128-NEXT: i16x8.extmul_low_i8x16_u $push5=, $pop13, $pop11 +; SIMD128-NEXT: i16x8.extmul_high_i8x16_u $push4=, $0, $1 +; SIMD128-NEXT: i8x16.shuffle $push10=, $pop5, $pop4, 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30 +; SIMD128-NEXT: local.tee $push9=, $0=, $pop10 +; SIMD128-NEXT: i8x16.shuffle $push6=, $0, $0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 +; SIMD128-NEXT: i16x8.extmul_low_i8x16_u $push7=, $pop9, $pop6 +; SIMD128-NEXT: i8x16.extract_lane_u $push8=, $pop7, 0 +; SIMD128-NEXT: return $pop8 %res = tail call i8 @llvm.vector.reduce.mul.v16i8(<16 x i8> %arg) ret i8 %res } diff --git a/llvm/test/CodeGen/X86/swap.ll b/llvm/test/CodeGen/X86/swap.ll index 1dc454dd..3330403 100644 --- a/llvm/test/CodeGen/X86/swap.ll +++ b/llvm/test/CodeGen/X86/swap.ll @@ -113,21 +113,17 @@ define dso_local void @onealloc_readback_1(ptr nocapture %a, ptr nocapture %b) l ; ; AA-LABEL: onealloc_readback_1: ; AA: # %bb.0: # %entry -; AA-NEXT: vmovups (%rdi), %xmm0 -; AA-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp) ; AA-NEXT: vmovups (%rsi), %xmm0 ; AA-NEXT: vmovups %xmm0, (%rdi) ; AA-NEXT: retq entry: %alloc = alloca [16 x i8], i8 2, align 1 %part1 = getelementptr inbounds [16 x i8], ptr %alloc, i64 1, i64 0 - call void @llvm.lifetime.start.p0(i64 16, ptr nonnull %part1) - call void @llvm.lifetime.start.p0(i64 16, ptr nonnull %alloc) + call void @llvm.lifetime.start.p0(i64 32, ptr nonnull %alloc) call void @llvm.memcpy.p0.p0.i64(ptr nonnull align 1 %part1, ptr align 1 %a, i64 16, i1 false) call void @llvm.memcpy.p0.p0.i64(ptr nonnull align 1 %alloc, ptr align 1 %b, i64 16, i1 false) - call void @llvm.lifetime.end.p0(i64 16, ptr nonnull %part1) tail call void @llvm.memcpy.p0.p0.i64(ptr nonnull align 1 %a, ptr align 1 %alloc, i64 16, i1 false) - call void @llvm.lifetime.end.p0(i64 16, ptr nonnull %alloc) + call void @llvm.lifetime.end.p0(i64 32, ptr nonnull %alloc) ret void } @@ -144,19 +140,16 @@ define dso_local void @onealloc_readback_2(ptr nocapture %a, ptr nocapture %b) l ; AA-LABEL: onealloc_readback_2: ; AA: # %bb.0: # %entry ; AA-NEXT: vmovups (%rsi), %xmm0 -; AA-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp) ; AA-NEXT: vmovups %xmm0, (%rdi) ; AA-NEXT: retq entry: %alloc = alloca [16 x i8], i8 2, align 1 %part2 = getelementptr inbounds [16 x i8], ptr %alloc, i64 1, i64 0 - call void @llvm.lifetime.start.p0(i64 16, ptr nonnull %alloc) - call void @llvm.lifetime.start.p0(i64 16, ptr nonnull %part2) + call void @llvm.lifetime.start.p0(i64 32, ptr nonnull %alloc) call void @llvm.memcpy.p0.p0.i64(ptr nonnull align 1 %alloc, ptr align 1 %a, i64 16, i1 false) call void @llvm.memcpy.p0.p0.i64(ptr nonnull align 1 %part2, ptr align 1 %b, i64 16, i1 false) - call void @llvm.lifetime.end.p0(i64 16, ptr nonnull %alloc) tail call void @llvm.memcpy.p0.p0.i64(ptr nonnull align 1 %a, ptr align 1 %part2, i64 16, i1 false) - call void @llvm.lifetime.end.p0(i64 16, ptr nonnull %part2) + call void @llvm.lifetime.end.p0(i64 32, ptr nonnull %alloc) ret void } diff --git a/llvm/test/DebugInfo/NVPTX/dbg-declare-alloca.ll b/llvm/test/DebugInfo/NVPTX/dbg-declare-alloca.ll index 8a9052c..fa42481 100644 --- a/llvm/test/DebugInfo/NVPTX/dbg-declare-alloca.ll +++ b/llvm/test/DebugInfo/NVPTX/dbg-declare-alloca.ll @@ -6,16 +6,12 @@ ; CHECK: .visible .func use_dbg_declare() ; CHECK: .local .align 8 .b8 __local_depot0[8]; ; CHECK: mov.b64 %SPL, __local_depot0; -; CHECK: add.u64 %rd1, %SP, 0; ; CHECK: .loc 1 5 3 // t.c:5:3 ; CHECK: { // callseq 0, 0 ; CHECK: .param .b64 param0; +; CHECK: add.u64 %rd1, %SP, 0; ; CHECK: st.param.b64 [param0], %rd1; -; CHECK: call.uni -; CHECK: escape_foo, -; CHECK: ( -; CHECK: param0 -; CHECK: ); +; CHECK: call.uni escape_foo, (param0); ; CHECK: } // callseq 0 ; CHECK: .loc 1 6 1 // t.c:6:1 ; CHECK: ret; diff --git a/llvm/test/DebugInfo/X86/branch-folder-dbg.mir b/llvm/test/DebugInfo/X86/branch-folder-dbg.mir index 7832598..11b37218 100644 --- a/llvm/test/DebugInfo/X86/branch-folder-dbg.mir +++ b/llvm/test/DebugInfo/X86/branch-folder-dbg.mir @@ -9,11 +9,15 @@ ## can be killed. ## ## Check DBG_PHIs are deleted rather than hoisted (implicit-check-not). +## +## Check DBG_LABELs are hoisted and not modified (and don't cause a crash). # CHECK: bb.0 # CHECK: CALL64pcrel32 @f, csr_64, implicit $rsp, implicit $ssp, implicit-def $rsp, implicit-def $ssp, implicit-def $rax ## --- Start splice from bb.2.if.else (and debug instructions from bb.1.if.then) --- +# CHECK-NEXT: DBG_LABEL 0 # CHECK-NEXT: DBG_VALUE $noreg, $noreg, ![[#]], !DIExpression(), debug-location ![[#]] +# CHECK-NEXT: DBG_LABEL 1 # CHECK-NEXT: DBG_VALUE $noreg, $noreg, ![[#]], !DIExpression(), debug-location ![[#]] # CHECK-NEXT: $edi = MOV32r0 implicit-def dead $eflags, debug-instr-number 2, debug-location !DILocation(line: 0, scope: ![[#]]) # CHECK-NEXT: DBG_VALUE $noreg, $noreg, ![[#]], !DIExpression(DW_OP_LLVM_arg, 0), debug-location ![[#]] @@ -98,6 +102,7 @@ body: | successors: %bb.3(0x80000000) DBG_PHI $esp, 3 + DBG_LABEL 0 DBG_VALUE $esi, $noreg, !11, !DIExpression(), debug-location !13 $edi = MOV32r0 implicit-def dead $eflags, debug-instr-number 1, debug-location !14 DBG_INSTR_REF !11, !DIExpression(DW_OP_LLVM_arg, 0), dbg-instr-ref(1, 0), debug-location !13 @@ -109,6 +114,7 @@ body: | successors: %bb.3(0x80000000) DBG_PHI $esp, 4 + DBG_LABEL 1 DBG_VALUE $esp, $noreg, !11, !DIExpression(), debug-location !13 $edi = MOV32r0 implicit-def dead $eflags, debug-instr-number 2, debug-location !16 DBG_INSTR_REF !11, !DIExpression(DW_OP_LLVM_arg, 0), dbg-instr-ref(2, 0), debug-location !13 diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_vflat.s b/llvm/test/MC/AMDGPU/gfx1250_asm_vflat.s index b9eb2d2..c5288a7 100644 --- a/llvm/test/MC/AMDGPU/gfx1250_asm_vflat.s +++ b/llvm/test/MC/AMDGPU/gfx1250_asm_vflat.s @@ -249,6 +249,250 @@ flat_load_monitor_b32 v1, v[2:3] offset:64 // GFX1250: flat_load_monitor_b32 v1, v[2:3] offset:64 ; encoding: [0x7c,0x00,0x1c,0xec,0x01,0x00,0x00,0x00,0x02,0x40,0x00,0x00] // GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU +flat_load_monitor_b32 v1, v[2:3] offset:-64 th:TH_LOAD_BYPASS scope:SCOPE_SYS +// GFX1250: flat_load_monitor_b32 v1, v[2:3] offset:-64 th:TH_LOAD_BYPASS scope:SCOPE_SYS ; encoding: [0x7c,0x00,0x1c,0xec,0x01,0x00,0x3c,0x00,0x02,0xc0,0xff,0xff] +// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU + +flat_load_monitor_b64 v[0:1], v[2:3] +// GFX1250: flat_load_monitor_b64 v[0:1], v[2:3] ; encoding: [0x7c,0x40,0x1c,0xec,0x00,0x00,0x00,0x00,0x02,0x00,0x00,0x00] +// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU + +flat_load_monitor_b64 v[0:1], v[2:3] offset:64 +// GFX1250: flat_load_monitor_b64 v[0:1], v[2:3] offset:64 ; encoding: [0x7c,0x40,0x1c,0xec,0x00,0x00,0x00,0x00,0x02,0x40,0x00,0x00] +// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU + +flat_load_monitor_b64 v[0:1], v[2:3] offset:-64 th:TH_LOAD_BYPASS scope:SCOPE_SYS +// GFX1250: flat_load_monitor_b64 v[0:1], v[2:3] offset:-64 th:TH_LOAD_BYPASS scope:SCOPE_SYS ; encoding: [0x7c,0x40,0x1c,0xec,0x00,0x00,0x3c,0x00,0x02,0xc0,0xff,0xff] +// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU + +flat_load_monitor_b128 v[0:3], v[4:5] +// GFX1250: flat_load_monitor_b128 v[0:3], v[4:5] ; encoding: [0x7c,0x80,0x1c,0xec,0x00,0x00,0x00,0x00,0x04,0x00,0x00,0x00] +// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU + +flat_load_monitor_b128 v[0:3], v[4:5] offset:64 +// GFX1250: flat_load_monitor_b128 v[0:3], v[4:5] offset:64 ; encoding: [0x7c,0x80,0x1c,0xec,0x00,0x00,0x00,0x00,0x04,0x40,0x00,0x00] +// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU + +flat_load_monitor_b128 v[0:3], v[4:5] offset:-64 th:TH_LOAD_BYPASS scope:SCOPE_SYS +// GFX1250: flat_load_monitor_b128 v[0:3], v[4:5] offset:-64 th:TH_LOAD_BYPASS scope:SCOPE_SYS ; encoding: [0x7c,0x80,0x1c,0xec,0x00,0x00,0x3c,0x00,0x04,0xc0,0xff,0xff] +// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU + +flat_load_monitor_b32 v1, v2, s[4:5] offset:64 scale_offset +// GFX1250: flat_load_monitor_b32 v1, v2, s[4:5] offset:64 scale_offset ; encoding: [0x04,0x00,0x1c,0xec,0x01,0x00,0x01,0x00,0x02,0x40,0x00,0x00] +// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU + +flat_load_monitor_b64 v[2:3], v2, s[4:5] offset:64 scale_offset +// GFX1250: flat_load_monitor_b64 v[2:3], v2, s[4:5] offset:64 scale_offset ; encoding: [0x04,0x40,0x1c,0xec,0x02,0x00,0x01,0x00,0x02,0x40,0x00,0x00] +// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU + +global_store_async_from_lds_b8 v[2:3], v1, off th:TH_STORE_BYPASS scope:SCOPE_SYS +// GFX1250: global_store_async_from_lds_b8 v[2:3], v1, off th:TH_STORE_BYPASS scope:SCOPE_SYS ; encoding: [0x7c,0xc0,0x18,0xee,0x00,0x00,0xbc,0x00,0x02,0x00,0x00,0x00] +// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU + +global_store_async_from_lds_b8 v[2:3], v1, off offset:64 +// GFX1250: global_store_async_from_lds_b8 v[2:3], v1, off offset:64 ; encoding: [0x7c,0xc0,0x18,0xee,0x00,0x00,0x80,0x00,0x02,0x40,0x00,0x00] +// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU + +global_store_async_from_lds_b8 v[2:3], v1, off offset:-64 +// GFX1250: global_store_async_from_lds_b8 v[2:3], v1, off offset:-64 ; encoding: [0x7c,0xc0,0x18,0xee,0x00,0x00,0x80,0x00,0x02,0xc0,0xff,0xff] +// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU + +global_store_async_from_lds_b8 v2, v1, s[2:3] th:TH_STORE_NT_HT scope:SCOPE_DEV +// GFX1250: global_store_async_from_lds_b8 v2, v1, s[2:3] th:TH_STORE_NT_HT scope:SCOPE_DEV ; encoding: [0x02,0xc0,0x18,0xee,0x00,0x00,0xe8,0x00,0x02,0x00,0x00,0x00] +// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU + +global_store_async_from_lds_b8 v2, v1, s[2:3] offset:64 +// GFX1250: global_store_async_from_lds_b8 v2, v1, s[2:3] offset:64 ; encoding: [0x02,0xc0,0x18,0xee,0x00,0x00,0x80,0x00,0x02,0x40,0x00,0x00] +// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU + +global_store_async_from_lds_b8 v2, v1, s[2:3] offset:-64 +// GFX1250: global_store_async_from_lds_b8 v2, v1, s[2:3] offset:-64 ; encoding: [0x02,0xc0,0x18,0xee,0x00,0x00,0x80,0x00,0x02,0xc0,0xff,0xff] +// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU + +global_store_async_from_lds_b32 v[2:3], v1, off th:TH_STORE_BYPASS scope:SCOPE_SYS +// GFX1250: global_store_async_from_lds_b32 v[2:3], v1, off th:TH_STORE_BYPASS scope:SCOPE_SYS ; encoding: [0x7c,0x00,0x19,0xee,0x00,0x00,0xbc,0x00,0x02,0x00,0x00,0x00] +// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU + +global_store_async_from_lds_b32 v[2:3], v1, off offset:64 +// GFX1250: global_store_async_from_lds_b32 v[2:3], v1, off offset:64 ; encoding: [0x7c,0x00,0x19,0xee,0x00,0x00,0x80,0x00,0x02,0x40,0x00,0x00] +// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU + +global_store_async_from_lds_b32 v[2:3], v1, off offset:-64 +// GFX1250: global_store_async_from_lds_b32 v[2:3], v1, off offset:-64 ; encoding: [0x7c,0x00,0x19,0xee,0x00,0x00,0x80,0x00,0x02,0xc0,0xff,0xff] +// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU + +global_store_async_from_lds_b32 v2, v1, s[2:3] th:TH_STORE_NT_HT scope:SCOPE_DEV +// GFX1250: global_store_async_from_lds_b32 v2, v1, s[2:3] th:TH_STORE_NT_HT scope:SCOPE_DEV ; encoding: [0x02,0x00,0x19,0xee,0x00,0x00,0xe8,0x00,0x02,0x00,0x00,0x00] +// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU + +global_store_async_from_lds_b32 v2, v1, s[2:3] offset:64 +// GFX1250: global_store_async_from_lds_b32 v2, v1, s[2:3] offset:64 ; encoding: [0x02,0x00,0x19,0xee,0x00,0x00,0x80,0x00,0x02,0x40,0x00,0x00] +// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU + +global_store_async_from_lds_b32 v2, v1, s[2:3] offset:-64 +// GFX1250: global_store_async_from_lds_b32 v2, v1, s[2:3] offset:-64 ; encoding: [0x02,0x00,0x19,0xee,0x00,0x00,0x80,0x00,0x02,0xc0,0xff,0xff] +// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU + +global_store_async_from_lds_b64 v[2:3], v1, off th:TH_STORE_BYPASS scope:SCOPE_SYS +// GFX1250: global_store_async_from_lds_b64 v[2:3], v1, off th:TH_STORE_BYPASS scope:SCOPE_SYS ; encoding: [0x7c,0x40,0x19,0xee,0x00,0x00,0xbc,0x00,0x02,0x00,0x00,0x00] +// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU + +global_store_async_from_lds_b64 v[2:3], v1, off offset:64 +// GFX1250: global_store_async_from_lds_b64 v[2:3], v1, off offset:64 ; encoding: [0x7c,0x40,0x19,0xee,0x00,0x00,0x80,0x00,0x02,0x40,0x00,0x00] +// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU + +global_store_async_from_lds_b64 v[2:3], v1, off offset:-64 +// GFX1250: global_store_async_from_lds_b64 v[2:3], v1, off offset:-64 ; encoding: [0x7c,0x40,0x19,0xee,0x00,0x00,0x80,0x00,0x02,0xc0,0xff,0xff] +// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU + +global_store_async_from_lds_b64 v2, v1, s[2:3] th:TH_STORE_NT_HT scope:SCOPE_DEV +// GFX1250: global_store_async_from_lds_b64 v2, v1, s[2:3] th:TH_STORE_NT_HT scope:SCOPE_DEV ; encoding: [0x02,0x40,0x19,0xee,0x00,0x00,0xe8,0x00,0x02,0x00,0x00,0x00] +// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU + +global_store_async_from_lds_b64 v2, v1, s[2:3] offset:64 +// GFX1250: global_store_async_from_lds_b64 v2, v1, s[2:3] offset:64 ; encoding: [0x02,0x40,0x19,0xee,0x00,0x00,0x80,0x00,0x02,0x40,0x00,0x00] +// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU + +global_store_async_from_lds_b64 v2, v1, s[2:3] offset:-64 +// GFX1250: global_store_async_from_lds_b64 v2, v1, s[2:3] offset:-64 ; encoding: [0x02,0x40,0x19,0xee,0x00,0x00,0x80,0x00,0x02,0xc0,0xff,0xff] +// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU + +global_store_async_from_lds_b128 v[2:3], v1, off th:TH_STORE_BYPASS scope:SCOPE_SYS +// GFX1250: global_store_async_from_lds_b128 v[2:3], v1, off th:TH_STORE_BYPASS scope:SCOPE_SYS ; encoding: [0x7c,0x80,0x19,0xee,0x00,0x00,0xbc,0x00,0x02,0x00,0x00,0x00] +// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU + +global_store_async_from_lds_b128 v[2:3], v1, off offset:64 +// GFX1250: global_store_async_from_lds_b128 v[2:3], v1, off offset:64 ; encoding: [0x7c,0x80,0x19,0xee,0x00,0x00,0x80,0x00,0x02,0x40,0x00,0x00] +// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU + +global_store_async_from_lds_b128 v[2:3], v1, off offset:-64 +// GFX1250: global_store_async_from_lds_b128 v[2:3], v1, off offset:-64 ; encoding: [0x7c,0x80,0x19,0xee,0x00,0x00,0x80,0x00,0x02,0xc0,0xff,0xff] +// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU + +global_store_async_from_lds_b128 v2, v1, s[2:3] th:TH_STORE_NT_HT scope:SCOPE_DEV +// GFX1250: global_store_async_from_lds_b128 v2, v1, s[2:3] th:TH_STORE_NT_HT scope:SCOPE_DEV ; encoding: [0x02,0x80,0x19,0xee,0x00,0x00,0xe8,0x00,0x02,0x00,0x00,0x00] +// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU + +global_store_async_from_lds_b128 v2, v1, s[2:3] offset:64 +// GFX1250: global_store_async_from_lds_b128 v2, v1, s[2:3] offset:64 ; encoding: [0x02,0x80,0x19,0xee,0x00,0x00,0x80,0x00,0x02,0x40,0x00,0x00] +// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU + +global_store_async_from_lds_b128 v2, v1, s[2:3] offset:-64 +// GFX1250: global_store_async_from_lds_b128 v2, v1, s[2:3] offset:-64 ; encoding: [0x02,0x80,0x19,0xee,0x00,0x00,0x80,0x00,0x02,0xc0,0xff,0xff] +// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU + +global_store_async_from_lds_b32 v2, v1, s[4:5] scale_offset th:TH_STORE_BYPASS scope:SCOPE_SYS +// GFX1250: global_store_async_from_lds_b32 v2, v1, s[4:5] scale_offset th:TH_STORE_BYPASS scope:SCOPE_SYS ; encoding: [0x04,0x00,0x19,0xee,0x00,0x00,0xbd,0x00,0x02,0x00,0x00,0x00] +// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU + +global_store_async_from_lds_b64 v2, v1, s[4:5] scale_offset th:TH_STORE_BYPASS scope:SCOPE_SYS +// GFX1250: global_store_async_from_lds_b64 v2, v1, s[4:5] scale_offset th:TH_STORE_BYPASS scope:SCOPE_SYS ; encoding: [0x04,0x40,0x19,0xee,0x00,0x00,0xbd,0x00,0x02,0x00,0x00,0x00] +// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU + +global_load_async_to_lds_b8 v1, v[2:3], off th:TH_LOAD_BYPASS scope:SCOPE_SYS +// GFX1250: global_load_async_to_lds_b8 v1, v[2:3], off th:TH_LOAD_BYPASS scope:SCOPE_SYS ; encoding: [0x7c,0xc0,0x17,0xee,0x01,0x00,0x3c,0x00,0x02,0x00,0x00,0x00] +// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU + +global_load_async_to_lds_b8 v1, v[2:3], off offset:64 +// GFX1250: global_load_async_to_lds_b8 v1, v[2:3], off offset:64 ; encoding: [0x7c,0xc0,0x17,0xee,0x01,0x00,0x00,0x00,0x02,0x40,0x00,0x00] +// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU + +global_load_async_to_lds_b8 v1, v[2:3], off offset:-64 +// GFX1250: global_load_async_to_lds_b8 v1, v[2:3], off offset:-64 ; encoding: [0x7c,0xc0,0x17,0xee,0x01,0x00,0x00,0x00,0x02,0xc0,0xff,0xff] +// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU + +global_load_async_to_lds_b8 v1, v2, s[2:3] th:TH_LOAD_NT_HT scope:SCOPE_DEV +// GFX1250: global_load_async_to_lds_b8 v1, v2, s[2:3] th:TH_LOAD_NT_HT scope:SCOPE_DEV ; encoding: [0x02,0xc0,0x17,0xee,0x01,0x00,0x68,0x00,0x02,0x00,0x00,0x00] +// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU + +global_load_async_to_lds_b8 v1, v2, s[2:3] offset:64 +// GFX1250: global_load_async_to_lds_b8 v1, v2, s[2:3] offset:64 ; encoding: [0x02,0xc0,0x17,0xee,0x01,0x00,0x00,0x00,0x02,0x40,0x00,0x00] +// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU + +global_load_async_to_lds_b8 v1, v2, s[2:3] offset:-64 +// GFX1250: global_load_async_to_lds_b8 v1, v2, s[2:3] offset:-64 ; encoding: [0x02,0xc0,0x17,0xee,0x01,0x00,0x00,0x00,0x02,0xc0,0xff,0xff] +// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU + +global_load_async_to_lds_b32 v1, v[2:3], off th:TH_LOAD_BYPASS scope:SCOPE_SYS +// GFX1250: global_load_async_to_lds_b32 v1, v[2:3], off th:TH_LOAD_BYPASS scope:SCOPE_SYS ; encoding: [0x7c,0x00,0x18,0xee,0x01,0x00,0x3c,0x00,0x02,0x00,0x00,0x00] +// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU + +global_load_async_to_lds_b32 v1, v[2:3], off offset:64 +// GFX1250: global_load_async_to_lds_b32 v1, v[2:3], off offset:64 ; encoding: [0x7c,0x00,0x18,0xee,0x01,0x00,0x00,0x00,0x02,0x40,0x00,0x00] +// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU + +global_load_async_to_lds_b32 v1, v[2:3], off offset:-64 +// GFX1250: global_load_async_to_lds_b32 v1, v[2:3], off offset:-64 ; encoding: [0x7c,0x00,0x18,0xee,0x01,0x00,0x00,0x00,0x02,0xc0,0xff,0xff] +// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU + +global_load_async_to_lds_b32 v1, v2, s[2:3] th:TH_LOAD_NT_HT scope:SCOPE_DEV +// GFX1250: global_load_async_to_lds_b32 v1, v2, s[2:3] th:TH_LOAD_NT_HT scope:SCOPE_DEV ; encoding: [0x02,0x00,0x18,0xee,0x01,0x00,0x68,0x00,0x02,0x00,0x00,0x00] +// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU + +global_load_async_to_lds_b32 v1, v2, s[2:3] offset:64 +// GFX1250: global_load_async_to_lds_b32 v1, v2, s[2:3] offset:64 ; encoding: [0x02,0x00,0x18,0xee,0x01,0x00,0x00,0x00,0x02,0x40,0x00,0x00] +// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU + +global_load_async_to_lds_b32 v1, v2, s[2:3] offset:-64 +// GFX1250: global_load_async_to_lds_b32 v1, v2, s[2:3] offset:-64 ; encoding: [0x02,0x00,0x18,0xee,0x01,0x00,0x00,0x00,0x02,0xc0,0xff,0xff] +// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU + +global_load_async_to_lds_b64 v1, v[2:3], off th:TH_LOAD_BYPASS scope:SCOPE_SYS +// GFX1250: global_load_async_to_lds_b64 v1, v[2:3], off th:TH_LOAD_BYPASS scope:SCOPE_SYS ; encoding: [0x7c,0x40,0x18,0xee,0x01,0x00,0x3c,0x00,0x02,0x00,0x00,0x00] +// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU + +global_load_async_to_lds_b64 v1, v[2:3], off offset:64 +// GFX1250: global_load_async_to_lds_b64 v1, v[2:3], off offset:64 ; encoding: [0x7c,0x40,0x18,0xee,0x01,0x00,0x00,0x00,0x02,0x40,0x00,0x00] +// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU + +global_load_async_to_lds_b64 v1, v[2:3], off offset:-64 +// GFX1250: global_load_async_to_lds_b64 v1, v[2:3], off offset:-64 ; encoding: [0x7c,0x40,0x18,0xee,0x01,0x00,0x00,0x00,0x02,0xc0,0xff,0xff] +// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU + +global_load_async_to_lds_b64 v1, v2, s[2:3] th:TH_LOAD_NT_HT scope:SCOPE_DEV +// GFX1250: global_load_async_to_lds_b64 v1, v2, s[2:3] th:TH_LOAD_NT_HT scope:SCOPE_DEV ; encoding: [0x02,0x40,0x18,0xee,0x01,0x00,0x68,0x00,0x02,0x00,0x00,0x00] +// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU + +global_load_async_to_lds_b64 v1, v2, s[2:3] offset:64 +// GFX1250: global_load_async_to_lds_b64 v1, v2, s[2:3] offset:64 ; encoding: [0x02,0x40,0x18,0xee,0x01,0x00,0x00,0x00,0x02,0x40,0x00,0x00] +// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU + +global_load_async_to_lds_b64 v1, v2, s[2:3] offset:-64 +// GFX1250: global_load_async_to_lds_b64 v1, v2, s[2:3] offset:-64 ; encoding: [0x02,0x40,0x18,0xee,0x01,0x00,0x00,0x00,0x02,0xc0,0xff,0xff] +// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU + +global_load_async_to_lds_b128 v1, v[2:3], off th:TH_LOAD_BYPASS scope:SCOPE_SYS +// GFX1250: global_load_async_to_lds_b128 v1, v[2:3], off th:TH_LOAD_BYPASS scope:SCOPE_SYS ; encoding: [0x7c,0x80,0x18,0xee,0x01,0x00,0x3c,0x00,0x02,0x00,0x00,0x00] +// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU + +global_load_async_to_lds_b128 v1, v[2:3], off offset:64 +// GFX1250: global_load_async_to_lds_b128 v1, v[2:3], off offset:64 ; encoding: [0x7c,0x80,0x18,0xee,0x01,0x00,0x00,0x00,0x02,0x40,0x00,0x00] +// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU + +global_load_async_to_lds_b128 v1, v[2:3], off offset:-64 +// GFX1250: global_load_async_to_lds_b128 v1, v[2:3], off offset:-64 ; encoding: [0x7c,0x80,0x18,0xee,0x01,0x00,0x00,0x00,0x02,0xc0,0xff,0xff] +// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU + +global_load_async_to_lds_b128 v1, v2, s[2:3] th:TH_LOAD_NT_HT scope:SCOPE_DEV +// GFX1250: global_load_async_to_lds_b128 v1, v2, s[2:3] th:TH_LOAD_NT_HT scope:SCOPE_DEV ; encoding: [0x02,0x80,0x18,0xee,0x01,0x00,0x68,0x00,0x02,0x00,0x00,0x00] +// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU + +global_load_async_to_lds_b128 v1, v2, s[2:3] offset:64 +// GFX1250: global_load_async_to_lds_b128 v1, v2, s[2:3] offset:64 ; encoding: [0x02,0x80,0x18,0xee,0x01,0x00,0x00,0x00,0x02,0x40,0x00,0x00] +// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU + +global_load_async_to_lds_b128 v1, v2, s[2:3] offset:-64 +// GFX1250: global_load_async_to_lds_b128 v1, v2, s[2:3] offset:-64 ; encoding: [0x02,0x80,0x18,0xee,0x01,0x00,0x00,0x00,0x02,0xc0,0xff,0xff] +// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU + +global_load_async_to_lds_b32 v2, v1, s[4:5] scale_offset th:TH_LOAD_BYPASS scope:SCOPE_SYS +// GFX1250: global_load_async_to_lds_b32 v2, v1, s[4:5] scale_offset th:TH_LOAD_BYPASS scope:SCOPE_SYS ; encoding: [0x04,0x00,0x18,0xee,0x02,0x00,0x3d,0x00,0x01,0x00,0x00,0x00] +// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU + +global_load_async_to_lds_b64 v2, v1, s[4:5] scale_offset th:TH_LOAD_BYPASS scope:SCOPE_SYS +// GFX1250: global_load_async_to_lds_b64 v2, v1, s[4:5] scale_offset th:TH_LOAD_BYPASS scope:SCOPE_SYS ; encoding: [0x04,0x40,0x18,0xee,0x02,0x00,0x3d,0x00,0x01,0x00,0x00,0x00] +// GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU + tensor_save s[0:1] // GFX1250: tensor_save s[0:1] ; encoding: [0x00,0x80,0x1b,0xee,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00] // GFX12-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: instruction not supported on this GPU diff --git a/llvm/test/MC/AMDGPU/gfx1250_asm_vflat_err.s b/llvm/test/MC/AMDGPU/gfx1250_asm_vflat_err.s index 26d7ed3..c9fe702 100644 --- a/llvm/test/MC/AMDGPU/gfx1250_asm_vflat_err.s +++ b/llvm/test/MC/AMDGPU/gfx1250_asm_vflat_err.s @@ -57,3 +57,51 @@ scratch_load_b32 v5, off, off offset:32 scale_offset // GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: scale_offset is not supported for this instruction // GFX1250-ERR-NEXT:{{^}}scratch_load_b32 v5, off, off offset:32 scale_offset // GFX1250-ERR-NEXT:{{^}} ^ + +global_store_async_from_lds_b8 v[2:3], v1, off th:TH_LOAD_BYPASS scope:SCOPE_SYS +// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid th value for store instructions + +global_store_async_from_lds_b8 v1, v2, s[2:3] th:TH_LOAD_NT_HT scope:SCOPE_DEV +// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid th value for store instructions + +global_store_async_from_lds_b32 v[2:3], v1, off th:TH_LOAD_BYPASS scope:SCOPE_SYS +// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid th value for store instructions + +global_store_async_from_lds_b32 v1, v2, s[2:3] th:TH_LOAD_NT_HT scope:SCOPE_DEV +// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid th value for store instructions + +global_store_async_from_lds_b64 v[2:3], v1, off th:TH_LOAD_BYPASS scope:SCOPE_SYS +// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid th value for store instructions + +global_store_async_from_lds_b64 v1, v2, s[2:3] th:TH_LOAD_NT_HT scope:SCOPE_DEV +// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid th value for store instructions + +global_store_async_from_lds_b128 v[2:3], v1, off th:TH_LOAD_BYPASS scope:SCOPE_SYS +// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid th value for store instructions + +global_store_async_from_lds_b128 v1, v2, s[2:3] th:TH_LOAD_NT_HT scope:SCOPE_DEV +// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid th value for store instructions + +global_load_async_to_lds_b8 v1, v[2:3], off th:TH_STORE_BYPASS scope:SCOPE_SYS +// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid th value for load instructions + +global_load_async_to_lds_b8 v1, v2, s[2:3] th:TH_STORE_NT_HT scope:SCOPE_DEV +// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid th value for load instructions + +global_load_async_to_lds_b32 v1, v[2:3], off th:TH_STORE_BYPASS scope:SCOPE_SYS +// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid th value for load instructions + +global_load_async_to_lds_b32 v1, v2, s[2:3] th:TH_STORE_NT_HT scope:SCOPE_DEV +// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid th value for load instructions + +global_load_async_to_lds_b64 v1, v[2:3], off th:TH_STORE_BYPASS scope:SCOPE_SYS +// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid th value for load instructions + +global_load_async_to_lds_b64 v1, v2, s[2:3] th:TH_STORE_NT_HT scope:SCOPE_DEV +// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid th value for load instructions + +global_load_async_to_lds_b128 v1, v[2:3], off th:TH_STORE_BYPASS scope:SCOPE_SYS +// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid th value for load instructions + +global_load_async_to_lds_b128 v1, v2, s[2:3] th:TH_STORE_NT_HT scope:SCOPE_DEV +// GFX1250-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid th value for load instructions diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vflat.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vflat.txt index de7895f..291192b 100644 --- a/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vflat.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx1250_dasm_vflat.txt @@ -3177,6 +3177,162 @@ # GFX1250: global_load_monitor_b64 v[2:3], v2, s[4:5] offset:64 scale_offset ; encoding: [0x04,0x40,0x1c,0xee,0x02,0x00,0x01,0x00,0x02,0x40,0x00,0x00] 0x04,0x40,0x1c,0xee,0x02,0x00,0x01,0x00,0x02,0x40,0x00,0x00 +# GFX1250: global_load_async_to_lds_b128 v1, v[2:3], off offset:64 ; encoding: [0x7c,0x80,0x18,0xee,0x01,0x00,0x00,0x00,0x02,0x40,0x00,0x00] +0x7c,0x80,0x18,0xee,0x01,0x00,0x00,0x00,0x02,0x40,0x00,0x00 + +# GFX1250: global_load_async_to_lds_b128 v1, v[2:3], off offset:-64 ; encoding: [0x7c,0x80,0x18,0xee,0x01,0x00,0x00,0x00,0x02,0xc0,0xff,0xff] +0x7c,0x80,0x18,0xee,0x01,0x00,0x00,0x00,0x02,0xc0,0xff,0xff + +# GFX1250: global_load_async_to_lds_b128 v1, v[2:3], off th:TH_LOAD_BYPASS scope:SCOPE_SYS ; encoding: [0x7c,0x80,0x18,0xee,0x01,0x00,0x3c,0x00,0x02,0x00,0x00,0x00] +0x7c,0x80,0x18,0xee,0x01,0x00,0x3c,0x00,0x02,0x00,0x00,0x00 + +# GFX1250: global_load_async_to_lds_b128 v1, v2, s[2:3] offset:64 ; encoding: [0x02,0x80,0x18,0xee,0x01,0x00,0x00,0x00,0x02,0x40,0x00,0x00] +0x02,0x80,0x18,0xee,0x01,0x00,0x00,0x00,0x02,0x40,0x00,0x00 + +# GFX1250: global_load_async_to_lds_b128 v1, v2, s[2:3] offset:-64 ; encoding: [0x02,0x80,0x18,0xee,0x01,0x00,0x00,0x00,0x02,0xc0,0xff,0xff] +0x02,0x80,0x18,0xee,0x01,0x00,0x00,0x00,0x02,0xc0,0xff,0xff + +# GFX1250: global_load_async_to_lds_b128 v1, v2, s[2:3] th:TH_LOAD_NT_HT scope:SCOPE_DEV ; encoding: [0x02,0x80,0x18,0xee,0x01,0x00,0x68,0x00,0x02,0x00,0x00,0x00] +0x02,0x80,0x18,0xee,0x01,0x00,0x68,0x00,0x02,0x00,0x00,0x00 + +# GFX1250: global_load_async_to_lds_b32 v1, v[2:3], off offset:64 ; encoding: [0x7c,0x00,0x18,0xee,0x01,0x00,0x00,0x00,0x02,0x40,0x00,0x00] +0x7c,0x00,0x18,0xee,0x01,0x00,0x00,0x00,0x02,0x40,0x00,0x00 + +# GFX1250: global_load_async_to_lds_b32 v1, v[2:3], off offset:-64 ; encoding: [0x7c,0x00,0x18,0xee,0x01,0x00,0x00,0x00,0x02,0xc0,0xff,0xff] +0x7c,0x00,0x18,0xee,0x01,0x00,0x00,0x00,0x02,0xc0,0xff,0xff + +# GFX1250: global_load_async_to_lds_b32 v1, v[2:3], off th:TH_LOAD_BYPASS scope:SCOPE_SYS ; encoding: [0x7c,0x00,0x18,0xee,0x01,0x00,0x3c,0x00,0x02,0x00,0x00,0x00] +0x7c,0x00,0x18,0xee,0x01,0x00,0x3c,0x00,0x02,0x00,0x00,0x00 + +# GFX1250: global_load_async_to_lds_b32 v1, v2, s[2:3] offset:64 ; encoding: [0x02,0x00,0x18,0xee,0x01,0x00,0x00,0x00,0x02,0x40,0x00,0x00] +0x02,0x00,0x18,0xee,0x01,0x00,0x00,0x00,0x02,0x40,0x00,0x00 + +# GFX1250: global_load_async_to_lds_b32 v1, v2, s[2:3] offset:-64 ; encoding: [0x02,0x00,0x18,0xee,0x01,0x00,0x00,0x00,0x02,0xc0,0xff,0xff] +0x02,0x00,0x18,0xee,0x01,0x00,0x00,0x00,0x02,0xc0,0xff,0xff + +# GFX1250: global_load_async_to_lds_b32 v1, v2, s[2:3] th:TH_LOAD_NT_HT scope:SCOPE_DEV ; encoding: [0x02,0x00,0x18,0xee,0x01,0x00,0x68,0x00,0x02,0x00,0x00,0x00] +0x02,0x00,0x18,0xee,0x01,0x00,0x68,0x00,0x02,0x00,0x00,0x00 + +# GFX1250: global_load_async_to_lds_b64 v1, v[2:3], off offset:64 ; encoding: [0x7c,0x40,0x18,0xee,0x01,0x00,0x00,0x00,0x02,0x40,0x00,0x00] +0x7c,0x40,0x18,0xee,0x01,0x00,0x00,0x00,0x02,0x40,0x00,0x00 + +# GFX1250: global_load_async_to_lds_b64 v1, v[2:3], off offset:-64 ; encoding: [0x7c,0x40,0x18,0xee,0x01,0x00,0x00,0x00,0x02,0xc0,0xff,0xff] +0x7c,0x40,0x18,0xee,0x01,0x00,0x00,0x00,0x02,0xc0,0xff,0xff + +# GFX1250: global_load_async_to_lds_b64 v1, v[2:3], off th:TH_LOAD_BYPASS scope:SCOPE_SYS ; encoding: [0x7c,0x40,0x18,0xee,0x01,0x00,0x3c,0x00,0x02,0x00,0x00,0x00] +0x7c,0x40,0x18,0xee,0x01,0x00,0x3c,0x00,0x02,0x00,0x00,0x00 + +# GFX1250: global_load_async_to_lds_b64 v1, v2, s[2:3] offset:64 ; encoding: [0x02,0x40,0x18,0xee,0x01,0x00,0x00,0x00,0x02,0x40,0x00,0x00] +0x02,0x40,0x18,0xee,0x01,0x00,0x00,0x00,0x02,0x40,0x00,0x00 + +# GFX1250: global_load_async_to_lds_b64 v1, v2, s[2:3] offset:-64 ; encoding: [0x02,0x40,0x18,0xee,0x01,0x00,0x00,0x00,0x02,0xc0,0xff,0xff] +0x02,0x40,0x18,0xee,0x01,0x00,0x00,0x00,0x02,0xc0,0xff,0xff + +# GFX1250: global_load_async_to_lds_b64 v1, v2, s[2:3] th:TH_LOAD_NT_HT scope:SCOPE_DEV ; encoding: [0x02,0x40,0x18,0xee,0x01,0x00,0x68,0x00,0x02,0x00,0x00,0x00] +0x02,0x40,0x18,0xee,0x01,0x00,0x68,0x00,0x02,0x00,0x00,0x00 + +# GFX1250: global_load_async_to_lds_b8 v1, v[2:3], off offset:64 ; encoding: [0x7c,0xc0,0x17,0xee,0x01,0x00,0x00,0x00,0x02,0x40,0x00,0x00] +0x7c,0xc0,0x17,0xee,0x01,0x00,0x00,0x00,0x02,0x40,0x00,0x00 + +# GFX1250: global_load_async_to_lds_b8 v1, v[2:3], off offset:-64 ; encoding: [0x7c,0xc0,0x17,0xee,0x01,0x00,0x00,0x00,0x02,0xc0,0xff,0xff] +0x7c,0xc0,0x17,0xee,0x01,0x00,0x00,0x00,0x02,0xc0,0xff,0xff + +# GFX1250: global_load_async_to_lds_b8 v1, v[2:3], off th:TH_LOAD_BYPASS scope:SCOPE_SYS ; encoding: [0x7c,0xc0,0x17,0xee,0x01,0x00,0x3c,0x00,0x02,0x00,0x00,0x00] +0x7c,0xc0,0x17,0xee,0x01,0x00,0x3c,0x00,0x02,0x00,0x00,0x00 + +# GFX1250: global_load_async_to_lds_b8 v1, v2, s[2:3] offset:64 ; encoding: [0x02,0xc0,0x17,0xee,0x01,0x00,0x00,0x00,0x02,0x40,0x00,0x00] +0x02,0xc0,0x17,0xee,0x01,0x00,0x00,0x00,0x02,0x40,0x00,0x00 + +# GFX1250: global_load_async_to_lds_b8 v1, v2, s[2:3] offset:-64 ; encoding: [0x02,0xc0,0x17,0xee,0x01,0x00,0x00,0x00,0x02,0xc0,0xff,0xff] +0x02,0xc0,0x17,0xee,0x01,0x00,0x00,0x00,0x02,0xc0,0xff,0xff + +# GFX1250: global_load_async_to_lds_b8 v1, v2, s[2:3] th:TH_LOAD_NT_HT scope:SCOPE_DEV ; encoding: [0x02,0xc0,0x17,0xee,0x01,0x00,0x68,0x00,0x02,0x00,0x00,0x00] +0x02,0xc0,0x17,0xee,0x01,0x00,0x68,0x00,0x02,0x00,0x00,0x00 + +# GFX1250: global_load_async_to_lds_b32 v2, v1, s[4:5] scale_offset th:TH_LOAD_BYPASS scope:SCOPE_SYS ; encoding: [0x04,0x00,0x18,0xee,0x02,0x00,0x3d,0x00,0x01,0x00,0x00,0x00] +0x04,0x00,0x18,0xee,0x02,0x00,0x3d,0x00,0x01,0x00,0x00,0x00 + +# GFX1250: global_load_async_to_lds_b64 v2, v1, s[4:5] scale_offset th:TH_LOAD_BYPASS scope:SCOPE_SYS ; encoding: [0x04,0x40,0x18,0xee,0x02,0x00,0x3d,0x00,0x01,0x00,0x00,0x00] +0x04,0x40,0x18,0xee,0x02,0x00,0x3d,0x00,0x01,0x00,0x00,0x00 + +# GFX1250: global_store_async_from_lds_b128 v[2:3], v1, off offset:64 ; encoding: [0x7c,0x80,0x19,0xee,0x00,0x00,0x80,0x00,0x02,0x40,0x00,0x00] +0x7c,0x80,0x19,0xee,0x00,0x00,0x80,0x00,0x02,0x40,0x00,0x00 + +# GFX1250: global_store_async_from_lds_b128 v[2:3], v1, off offset:-64 ; encoding: [0x7c,0x80,0x19,0xee,0x00,0x00,0x80,0x00,0x02,0xc0,0xff,0xff] +0x7c,0x80,0x19,0xee,0x00,0x00,0x80,0x00,0x02,0xc0,0xff,0xff + +# GFX1250: global_store_async_from_lds_b128 v[2:3], v1, off th:TH_STORE_BYPASS scope:SCOPE_SYS ; encoding: [0x7c,0x80,0x19,0xee,0x00,0x00,0xbc,0x00,0x02,0x00,0x00,0x00] +0x7c,0x80,0x19,0xee,0x00,0x00,0xbc,0x00,0x02,0x00,0x00,0x00 + +# GFX1250: global_store_async_from_lds_b128 v2, v1, s[2:3] offset:64 ; encoding: [0x02,0x80,0x19,0xee,0x00,0x00,0x80,0x00,0x02,0x40,0x00,0x00] +0x02,0x80,0x19,0xee,0x00,0x00,0x80,0x00,0x02,0x40,0x00,0x00 + +# GFX1250: global_store_async_from_lds_b128 v2, v1, s[2:3] offset:-64 ; encoding: [0x02,0x80,0x19,0xee,0x00,0x00,0x80,0x00,0x02,0xc0,0xff,0xff] +0x02,0x80,0x19,0xee,0x00,0x00,0x80,0x00,0x02,0xc0,0xff,0xff + +# GFX1250: global_store_async_from_lds_b128 v2, v1, s[2:3] th:TH_STORE_NT_HT scope:SCOPE_DEV ; encoding: [0x02,0x80,0x19,0xee,0x00,0x00,0xe8,0x00,0x02,0x00,0x00,0x00] +0x02,0x80,0x19,0xee,0x00,0x00,0xe8,0x00,0x02,0x00,0x00,0x00 + +# GFX1250: global_store_async_from_lds_b32 v[2:3], v1, off offset:64 ; encoding: [0x7c,0x00,0x19,0xee,0x00,0x00,0x80,0x00,0x02,0x40,0x00,0x00] +0x7c,0x00,0x19,0xee,0x00,0x00,0x80,0x00,0x02,0x40,0x00,0x00 + +# GFX1250: global_store_async_from_lds_b32 v[2:3], v1, off offset:-64 ; encoding: [0x7c,0x00,0x19,0xee,0x00,0x00,0x80,0x00,0x02,0xc0,0xff,0xff] +0x7c,0x00,0x19,0xee,0x00,0x00,0x80,0x00,0x02,0xc0,0xff,0xff + +# GFX1250: global_store_async_from_lds_b32 v[2:3], v1, off th:TH_STORE_BYPASS scope:SCOPE_SYS ; encoding: [0x7c,0x00,0x19,0xee,0x00,0x00,0xbc,0x00,0x02,0x00,0x00,0x00] +0x7c,0x00,0x19,0xee,0x00,0x00,0xbc,0x00,0x02,0x00,0x00,0x00 + +# GFX1250: global_store_async_from_lds_b32 v2, v1, s[2:3] offset:64 ; encoding: [0x02,0x00,0x19,0xee,0x00,0x00,0x80,0x00,0x02,0x40,0x00,0x00] +0x02,0x00,0x19,0xee,0x00,0x00,0x80,0x00,0x02,0x40,0x00,0x00 + +# GFX1250: global_store_async_from_lds_b32 v2, v1, s[2:3] offset:-64 ; encoding: [0x02,0x00,0x19,0xee,0x00,0x00,0x80,0x00,0x02,0xc0,0xff,0xff] +0x02,0x00,0x19,0xee,0x00,0x00,0x80,0x00,0x02,0xc0,0xff,0xff + +# GFX1250: global_store_async_from_lds_b32 v2, v1, s[2:3] th:TH_STORE_NT_HT scope:SCOPE_DEV ; encoding: [0x02,0x00,0x19,0xee,0x00,0x00,0xe8,0x00,0x02,0x00,0x00,0x00] +0x02,0x00,0x19,0xee,0x00,0x00,0xe8,0x00,0x02,0x00,0x00,0x00 + +# GFX1250: global_store_async_from_lds_b64 v[2:3], v1, off offset:64 ; encoding: [0x7c,0x40,0x19,0xee,0x00,0x00,0x80,0x00,0x02,0x40,0x00,0x00] +0x7c,0x40,0x19,0xee,0x00,0x00,0x80,0x00,0x02,0x40,0x00,0x00 + +# GFX1250: global_store_async_from_lds_b64 v[2:3], v1, off offset:-64 ; encoding: [0x7c,0x40,0x19,0xee,0x00,0x00,0x80,0x00,0x02,0xc0,0xff,0xff] +0x7c,0x40,0x19,0xee,0x00,0x00,0x80,0x00,0x02,0xc0,0xff,0xff + +# GFX1250: global_store_async_from_lds_b64 v[2:3], v1, off th:TH_STORE_BYPASS scope:SCOPE_SYS ; encoding: [0x7c,0x40,0x19,0xee,0x00,0x00,0xbc,0x00,0x02,0x00,0x00,0x00] +0x7c,0x40,0x19,0xee,0x00,0x00,0xbc,0x00,0x02,0x00,0x00,0x00 + +# GFX1250: global_store_async_from_lds_b64 v2, v1, s[2:3] offset:64 ; encoding: [0x02,0x40,0x19,0xee,0x00,0x00,0x80,0x00,0x02,0x40,0x00,0x00] +0x02,0x40,0x19,0xee,0x00,0x00,0x80,0x00,0x02,0x40,0x00,0x00 + +# GFX1250: global_store_async_from_lds_b64 v2, v1, s[2:3] offset:-64 ; encoding: [0x02,0x40,0x19,0xee,0x00,0x00,0x80,0x00,0x02,0xc0,0xff,0xff] +0x02,0x40,0x19,0xee,0x00,0x00,0x80,0x00,0x02,0xc0,0xff,0xff + +# GFX1250: global_store_async_from_lds_b64 v2, v1, s[2:3] th:TH_STORE_NT_HT scope:SCOPE_DEV ; encoding: [0x02,0x40,0x19,0xee,0x00,0x00,0xe8,0x00,0x02,0x00,0x00,0x00] +0x02,0x40,0x19,0xee,0x00,0x00,0xe8,0x00,0x02,0x00,0x00,0x00 + +# GFX1250: global_store_async_from_lds_b8 v[2:3], v1, off offset:64 ; encoding: [0x7c,0xc0,0x18,0xee,0x00,0x00,0x80,0x00,0x02,0x40,0x00,0x00] +0x7c,0xc0,0x18,0xee,0x00,0x00,0x80,0x00,0x02,0x40,0x00,0x00 + +# GFX1250: global_store_async_from_lds_b8 v[2:3], v1, off offset:-64 ; encoding: [0x7c,0xc0,0x18,0xee,0x00,0x00,0x80,0x00,0x02,0xc0,0xff,0xff] +0x7c,0xc0,0x18,0xee,0x00,0x00,0x80,0x00,0x02,0xc0,0xff,0xff + +# GFX1250: global_store_async_from_lds_b8 v[2:3], v1, off th:TH_STORE_BYPASS scope:SCOPE_SYS ; encoding: [0x7c,0xc0,0x18,0xee,0x00,0x00,0xbc,0x00,0x02,0x00,0x00,0x00] +0x7c,0xc0,0x18,0xee,0x00,0x00,0xbc,0x00,0x02,0x00,0x00,0x00 + +# GFX1250: global_store_async_from_lds_b8 v2, v1, s[2:3] offset:64 ; encoding: [0x02,0xc0,0x18,0xee,0x00,0x00,0x80,0x00,0x02,0x40,0x00,0x00] +0x02,0xc0,0x18,0xee,0x00,0x00,0x80,0x00,0x02,0x40,0x00,0x00 + +# GFX1250: global_store_async_from_lds_b8 v2, v1, s[2:3] offset:-64 ; encoding: [0x02,0xc0,0x18,0xee,0x00,0x00,0x80,0x00,0x02,0xc0,0xff,0xff] +0x02,0xc0,0x18,0xee,0x00,0x00,0x80,0x00,0x02,0xc0,0xff,0xff + +# GFX1250: global_store_async_from_lds_b8 v2, v1, s[2:3] th:TH_STORE_NT_HT scope:SCOPE_DEV ; encoding: [0x02,0xc0,0x18,0xee,0x00,0x00,0xe8,0x00,0x02,0x00,0x00,0x00] +0x02,0xc0,0x18,0xee,0x00,0x00,0xe8,0x00,0x02,0x00,0x00,0x00 + +# GFX1250: global_store_async_from_lds_b32 v2, v1, s[4:5] scale_offset th:TH_STORE_BYPASS scope:SCOPE_SYS ; encoding: [0x04,0x00,0x19,0xee,0x00,0x00,0xbd,0x00,0x02,0x00,0x00,0x00] +0x04,0x00,0x19,0xee,0x00,0x00,0xbd,0x00,0x02,0x00,0x00,0x00 + +# GFX1250: global_store_async_from_lds_b64 v2, v1, s[4:5] scale_offset th:TH_STORE_BYPASS scope:SCOPE_SYS ; encoding: [0x04,0x40,0x19,0xee,0x00,0x00,0xbd,0x00,0x02,0x00,0x00,0x00] +0x04,0x40,0x19,0xee,0x00,0x00,0xbd,0x00,0x02,0x00,0x00,0x00 + # GFX1250: tensor_save s[0:1] ; encoding: [0x00,0x80,0x1b,0xee,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00] 0x00,0x80,0x1b,0xee,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 diff --git a/llvm/test/MC/Disassembler/AMDGPU/kernel-descriptor-errors.test b/llvm/test/MC/Disassembler/AMDGPU/kernel-descriptor-errors.test index fdca11b..369005f 100644 --- a/llvm/test/MC/Disassembler/AMDGPU/kernel-descriptor-errors.test +++ b/llvm/test/MC/Disassembler/AMDGPU/kernel-descriptor-errors.test @@ -13,10 +13,10 @@ # RES_4_2: ; error decoding test.kd: kernel descriptor reserved bits in range (511:480) set # RES_4_2-NEXT: ; decoding failed region as bytes -# RUN: yaml2obj %s -DGPU=GFX90A -DKD=00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000006000000000000 \ -# RUN: | llvm-objdump --disassemble-symbols=test.kd - | FileCheck %s --check-prefix=RES_457 -# RES_457: ; error decoding test.kd: kernel descriptor reserved bits in range (457:455) set -# RES_457-NEXT: ; decoding failed region as bytes +# RUN: yaml2obj %s -DGPU=GFX90A -DKD=00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000003000000000000 \ +# RUN: | llvm-objdump --disassemble-symbols=test.kd - | FileCheck %s --check-prefix=RES_456 +# RES_456: ; error decoding test.kd: kernel descriptor reserved bits in range (456:455) set +# RES_456-NEXT: ; decoding failed region as bytes # RUN: yaml2obj %s -DGPU=GFX90A -DKD=0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000c000000000000 \ # RUN: | llvm-objdump --disassemble-symbols=test.kd - | FileCheck %s --check-prefix=WF32 diff --git a/llvm/test/MC/ELF/many-instructions.s b/llvm/test/MC/ELF/many-instructions.s new file mode 100644 index 0000000..cbdb2a7 --- /dev/null +++ b/llvm/test/MC/ELF/many-instructions.s @@ -0,0 +1,10 @@ +# REQUIRES: asserts +# RUN: llvm-mc -filetype=obj -triple=x86_64 %s -o /dev/null -debug-only=mc-dump + +## Test that encodeInstruction may cause a new fragment to be created. +# CHECK: 0 Data Size:16200 +# CHECK: 16200 Data Size:180 + +.rept 16384/10 +movabsq $foo, %rax +.endr diff --git a/llvm/test/TableGen/CompressInstEmitter/suboperands.td b/llvm/test/TableGen/CompressInstEmitter/suboperands.td index cd724e9..f4e43d5 100644 --- a/llvm/test/TableGen/CompressInstEmitter/suboperands.td +++ b/llvm/test/TableGen/CompressInstEmitter/suboperands.td @@ -115,7 +115,7 @@ def : CompressPat<(BigInst3 RegsC:$dst, RegsC:$src, simm6:$imm), // CHECK-NEXT: ArchMCRegisterClasses[Arch::RegsCRegClassID].contains(MI.getOperand(0).getReg()) && // CHECK-NEXT: MI.getOperand(1).isReg() && // CHECK-NEXT: ArchMCRegisterClasses[Arch::RegsCRegClassID].contains(MI.getOperand(1).getReg()) && -// CHECK-NEXT: ArchValidateMCOperandForCompress(MI.getOperand(2), STI, 1)) { +// CHECK-NEXT: ArchValidateMCOperandForCompress(MI.getOperand(2), STI, 1 /* simm6 */)) { // CHECK-NEXT: // small $dst, $addr // CHECK-NEXT: OutInst.setOpcode(Arch::SmallInst); // CHECK-NEXT: // Operand: dst @@ -131,7 +131,7 @@ def : CompressPat<(BigInst3 RegsC:$dst, RegsC:$src, simm6:$imm), // CHECK-NEXT: ArchMCRegisterClasses[Arch::RegsCRegClassID].contains(MI.getOperand(0).getReg()) && // CHECK-NEXT: MI.getOperand(1).isReg() && // CHECK-NEXT: ArchMCRegisterClasses[Arch::RegsCRegClassID].contains(MI.getOperand(1).getReg()) && -// CHECK-NEXT: ArchValidateMCOperandForCompress(MI.getOperand(2), STI, 1)) { +// CHECK-NEXT: ArchValidateMCOperandForCompress(MI.getOperand(2), STI, 1 /* simm6 */)) { // CHECK-NEXT: // small $dst, $src, $imm // CHECK-NEXT: OutInst.setOpcode(Arch::SmallInst2); // CHECK-NEXT: // Operand: dst @@ -148,7 +148,7 @@ def : CompressPat<(BigInst3 RegsC:$dst, RegsC:$src, simm6:$imm), // CHECK-NEXT: ArchMCRegisterClasses[Arch::RegsCRegClassID].contains(MI.getOperand(0).getReg()) && // CHECK-NEXT: MI.getOperand(1).isReg() && // CHECK-NEXT: ArchMCRegisterClasses[Arch::RegsCRegClassID].contains(MI.getOperand(1).getReg()) && -// CHECK-NEXT: ArchValidateMCOperandForCompress(MI.getOperand(2), STI, 1)) { +// CHECK-NEXT: ArchValidateMCOperandForCompress(MI.getOperand(2), STI, 1 /* simm6 */)) { // CHECK-NEXT: // small $dst, $addr // CHECK-NEXT: OutInst.setOpcode(Arch::SmallInst3); // CHECK-NEXT: // Operand: dst @@ -170,7 +170,8 @@ def : CompressPat<(BigInst3 RegsC:$dst, RegsC:$src, simm6:$imm), // CHECK-NEXT: ArchMCRegisterClasses[Arch::RegsCRegClassID].contains(MI.getOperand(0).getReg()) && // CHECK-NEXT: MI.getOperand(1).isReg() && // CHECK-NEXT: ArchMCRegisterClasses[Arch::RegsCRegClassID].contains(MI.getOperand(1).getReg()) && -// CHECK-NEXT: ArchValidateMCOperandForUncompress(MI.getOperand(2), STI, 1)) { +// CHECK-NEXT: ArchValidateMCOperandForUncompress(MI.getOperand(2), STI, 1 /* simm6 */) && +// CHECK-NEXT: ArchValidateMCOperandForUncompress(MI.getOperand(2), STI, 2 /* simm12 */)) // CHECK-NEXT: // big $dst, $addr // CHECK-NEXT: OutInst.setOpcode(Arch::BigInst); // CHECK-NEXT: // Operand: dst @@ -186,7 +187,8 @@ def : CompressPat<(BigInst3 RegsC:$dst, RegsC:$src, simm6:$imm), // CHECK-NEXT: ArchMCRegisterClasses[Arch::RegsCRegClassID].contains(MI.getOperand(0).getReg()) && // CHECK-NEXT: MI.getOperand(1).isReg() && // CHECK-NEXT: ArchMCRegisterClasses[Arch::RegsCRegClassID].contains(MI.getOperand(1).getReg()) && -// CHECK-NEXT: ArchValidateMCOperandForUncompress(MI.getOperand(2), STI, 1)) { +// CHECK-NEXT: ArchValidateMCOperandForUncompress(MI.getOperand(2), STI, 1 /* simm6 */) && +// CHECK-NEXT: ArchValidateMCOperandForUncompress(MI.getOperand(2), STI, 2 /* simm12 */)) { // CHECK-NEXT: // big $dst, $addr // CHECK-NEXT: OutInst.setOpcode(Arch::BigInst2); // CHECK-NEXT: // Operand: dst @@ -202,7 +204,8 @@ def : CompressPat<(BigInst3 RegsC:$dst, RegsC:$src, simm6:$imm), // CHECK-NEXT: ArchMCRegisterClasses[Arch::RegsCRegClassID].contains(MI.getOperand(0).getReg()) && // CHECK-NEXT: MI.getOperand(1).isReg() && // CHECK-NEXT: ArchMCRegisterClasses[Arch::RegsCRegClassID].contains(MI.getOperand(1).getReg()) && -// CHECK-NEXT: ArchValidateMCOperandForUncompress(MI.getOperand(2), STI, 1)) { +// CHECK-NEXT: ArchValidateMCOperandForUncompress(MI.getOperand(2), STI, 1 /* simm6 */) && +// CHECK-NEXT: ArchValidateMCOperandForUncompress(MI.getOperand(2), STI, 2 /* simm12 */)) { // CHECK-NEXT: // big $dst, $src, $imm // CHECK-NEXT: OutInst.setOpcode(Arch::BigInst3); // CHECK-NEXT: // Operand: dst @@ -226,7 +229,7 @@ def : CompressPat<(BigInst3 RegsC:$dst, RegsC:$src, simm6:$imm), // CHECK-NEXT: MI.getOperand(1).isReg() && MI.getOperand(1).getReg().isPhysical() && // CHECK-NEXT: ArchMCRegisterClasses[Arch::RegsCRegClassID].contains(MI.getOperand(1).getReg()) && // CHECK-NEXT: MI.getOperand(2).isImm() && -// CHECK-NEXT: ArchValidateMachineOperand(MI.getOperand(2), &STI, 1)) { +// CHECK-NEXT: ArchValidateMachineOperand(MI.getOperand(2), &STI, 1 /* simm6 */)) { // CHECK-NEXT: // small $dst, $addr // CHECK-NEXT: // Operand: dst // CHECK-NEXT: // Operand: addr @@ -238,7 +241,7 @@ def : CompressPat<(BigInst3 RegsC:$dst, RegsC:$src, simm6:$imm), // CHECK-NEXT: MI.getOperand(1).isReg() && MI.getOperand(1).getReg().isPhysical() && // CHECK-NEXT: ArchMCRegisterClasses[Arch::RegsCRegClassID].contains(MI.getOperand(1).getReg()) && // CHECK-NEXT: MI.getOperand(2).isImm() && -// CHECK-NEXT: ArchValidateMachineOperand(MI.getOperand(2), &STI, 1)) { +// CHECK-NEXT: ArchValidateMachineOperand(MI.getOperand(2), &STI, 1 /* simm6 */)) { // CHECK-NEXT: // small $dst, $src, $imm // CHECK-NEXT: // Operand: dst // CHECK-NEXT: // Operand: src @@ -251,7 +254,7 @@ def : CompressPat<(BigInst3 RegsC:$dst, RegsC:$src, simm6:$imm), // CHECK-NEXT: MI.getOperand(1).isReg() && MI.getOperand(1).getReg().isPhysical() && // CHECK-NEXT: ArchMCRegisterClasses[Arch::RegsCRegClassID].contains(MI.getOperand(1).getReg()) && // CHECK-NEXT: MI.getOperand(2).isImm() && -// CHECK-NEXT: ArchValidateMachineOperand(MI.getOperand(2), &STI, 1)) { +// CHECK-NEXT: ArchValidateMachineOperand(MI.getOperand(2), &STI, 1 /* simm6 */)) { // CHECK-NEXT: // small $dst, $addr // CHECK-NEXT: // Operand: dst // CHECK-NEXT: // Operand: addr diff --git a/llvm/test/TableGen/getsetop.td b/llvm/test/TableGen/getsetop.td index aac644f..031606f 100644 --- a/llvm/test/TableGen/getsetop.td +++ b/llvm/test/TableGen/getsetop.td @@ -28,6 +28,7 @@ def bob : Super; def test { dag orig = (foo 1, 2:$a, $b); dag another = (qux "hello", $world); + dag named = (foo:$root 1, 2:$a, $b); // CHECK: dag replaceWithBar = (bar 1, 2:$a, ?:$b); dag replaceWithBar = !setop(orig, bar); @@ -41,6 +42,19 @@ def test { // CHECK: dag getopToSetop = (foo "hello", ?:$world); dag getopToSetop = !setdagop(another, !getdagop(orig)); + // CHECK: dag setOpName = (foo:$baz 1, 2:$a, ?:$b); + dag setOpName = !setdagopname(orig, "baz"); + + // CHECK: dag getopNameToSetOpName = (foo:$root 1, 2:$a, ?:$b); + dag getopNameToSetOpName = !setdagopname(orig, !getdagopname(named)); + + // CHECK: dag setOpNameExpl = (foo:$baz 1, 2:$a, ?:$b); + dag setOpNameExpl = !setdagopname((foo 1, 2:$a, $b), "baz"); + + // CHECK: dag getopNameToSetOpNameExpl = (foo:$root 1, 2:$a, ?:$b); + dag getopNameToSetOpNameExpl = + !setdagopname(orig, !getdagopname((foo:$root 1, 2:$a, $b))); + // CHECK: dag getopToBangDag = (foo 1:$a, 2:$b, 3:$c); dag getopToBangDag = !dag(!getdagop(orig), [1, 2, 3], ["a", "b", "c"]); diff --git a/llvm/test/TableGen/unsetop.td b/llvm/test/TableGen/unsetop.td index 7a4f98a..54ede19 100644 --- a/llvm/test/TableGen/unsetop.td +++ b/llvm/test/TableGen/unsetop.td @@ -16,6 +16,12 @@ def test { dag undefSecond = !con((op 1), (? 2)); // CHECK: dag undefBoth = (? 1, 2); dag undefBoth = !con((? 1), (? 2)); + // CHECK: dag namedLHS = (op:$lhs 1, 2); + dag namedLHS = !con((op:$lhs 1), (op 2)); + // CHECK: dag namedRHS = (op:$rhs 1, 2); + dag namedRHS = !con((op 1), (op:$rhs 2)); + // CHECK: dag namedBoth = (op:$lhs 1, 2); + dag namedBoth = !con((op:$lhs 1), (op:$rhs 2)); #ifdef ERROR // ERROR: Concatenated Dag operators do not match: '(op 1)' vs. '(otherop 2)' diff --git a/llvm/test/ThinLTO/X86/memprof_func_assign_fix.ll b/llvm/test/ThinLTO/X86/memprof_func_assign_fix.ll new file mode 100644 index 0000000..8303d6d --- /dev/null +++ b/llvm/test/ThinLTO/X86/memprof_func_assign_fix.ll @@ -0,0 +1,145 @@ +;; Make sure we assign the original callsite to a function clone (which will be +;; the original function clone), even when we cannot update its caller (due to +;; missing metadata e.g. from mismatched profiles). Otherwise we will try to use +;; the original function for a different clone, leading to confusion later when +;; rewriting the calls. + +;; -stats requires asserts +; REQUIRES: asserts + +; RUN: opt -thinlto-bc %s >%t.o +; RUN: llvm-lto2 run %t.o -enable-memprof-context-disambiguation \ +; RUN: -supports-hot-cold-new \ +; RUN: -r=%t.o,A,plx \ +; RUN: -r=%t.o,B,plx \ +; RUN: -r=%t.o,C,plx \ +; RUN: -r=%t.o,D,plx \ +; RUN: -r=%t.o,E,plx \ +; RUN: -r=%t.o,F,plx \ +; RUN: -r=%t.o,G,plx \ +; RUN: -r=%t.o,A1,plx \ +; RUN: -r=%t.o,B1,plx \ +; RUN: -r=%t.o,_Znwm, \ +; RUN: -memprof-verify-ccg -memprof-verify-nodes -debug-only=memprof-context-disambiguation \ +; RUN: -stats -pass-remarks=memprof-context-disambiguation -save-temps \ +; RUN: -o %t.out 2>&1 | FileCheck %s \ +; RUN: --implicit-check-not="Mismatch in call clone assignment" \ +; RUN: --implicit-check-not="Number of callsites assigned to call multiple non-matching clones" + +; RUN: llvm-dis %t.out.1.4.opt.bc -o - | FileCheck %s --check-prefix=IR + +; ModuleID = '<stdin>' +source_filename = "reduced.ll" +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +; IR-LABEL: define dso_local void @A() +define void @A() #0 { + ; IR: call void @C() + call void @C() + ret void +} + +; IR-LABEL: define dso_local void @B() +define void @B() #0 { + ; IR: call void @C.memprof.1() + call void @C(), !callsite !1 + ret void +} + +; IR-LABEL: define dso_local void @C() +define void @C() #0 { + ; IR: call void @F() + call void @F(), !callsite !16 + ; IR: call void @D() + call void @D(), !callsite !2 + ret void +} + +; IR-LABEL: define dso_local void @D() +define void @D() #0 { + ; IR: call void @E() + call void @E(), !callsite !3 + ; IR: call void @G() + call void @G(), !callsite !17 + ret void +} + +; IR-LABEL: define dso_local void @E() +define void @E() #0 { + ; IR: call ptr @_Znwm(i64 0) #[[NOTCOLD:[0-9]+]] + %1 = call ptr @_Znwm(i64 0), !memprof !4, !callsite !9 + ret void +} + +; IR-LABEL: define dso_local void @F() +define void @F() #0 { + ; IR: call void @G() + call void @G(), !callsite !17 + ret void +} + +; IR-LABEL: define dso_local void @G() +define void @G() #0 { + ; IR: call ptr @_Znwm(i64 0) #[[NOTCOLD]] + %2 = call ptr @_Znwm(i64 0), !memprof !10, !callsite !15 + ret void +} + +; IR-LABEL: define dso_local void @A1() +define void @A1() #0 { + ; IR: call void @C() + call void @C(), !callsite !18 + ret void +} + +; IR-LABEL: define dso_local void @B1() +define void @B1() #0 { + ; IR: call void @C.memprof.1() + call void @C(), !callsite !19 + ret void +} + +; IR-LABEL: define dso_local void @C.memprof.1() + ; IR: call void @F.memprof.1() + ; IR: call void @D.memprof.1() + +; IR-LABEL: define dso_local void @D.memprof.1() + ; IR: call void @E.memprof.1() + ; IR: call void @G() + +; IR-LABEL: define dso_local void @E.memprof.1() + ; IR: call ptr @_Znwm(i64 0) #[[COLD:[0-9]+]] + +; IR-LABEL: define dso_local void @F.memprof.1() + ; IR: call void @G.memprof.1() + +; IR-LABEL: define dso_local void @G.memprof.1() + ; IR: call ptr @_Znwm(i64 0) #[[COLD]] + +declare ptr @_Znwm(i64) + +attributes #0 = { noinline optnone } +; IR: attributes #[[NOTCOLD]] = { "memprof"="notcold" } +; IR: attributes #[[COLD]] = { "memprof"="cold" } + +!0 = !{i64 123} +!1 = !{i64 234} +!2 = !{i64 345} +!3 = !{i64 456} +!4 = !{!5, !7} +!5 = !{!6, !"notcold"} +!6 = !{i64 567, i64 456, i64 345, i64 123} +!7 = !{!8, !"cold"} +!8 = !{i64 567, i64 456, i64 345, i64 234} +!9 = !{i64 567} +!10 = !{!11, !13} +!11 = !{!12, !"notcold"} +!12 = !{i64 678, i64 891, i64 789, i64 912} +!13 = !{!14, !"cold"} +!14 = !{i64 678, i64 891, i64 789, i64 812} +!15 = !{i64 678} +!16 = !{i64 789} +!17 = !{i64 891} +!18 = !{i64 912} +!19 = !{i64 812} diff --git a/llvm/test/Transforms/FunctionAttrs/noalias.ll b/llvm/test/Transforms/FunctionAttrs/noalias.ll index 8beb6fe..de8bd9e 100644 --- a/llvm/test/Transforms/FunctionAttrs/noalias.ll +++ b/llvm/test/Transforms/FunctionAttrs/noalias.ll @@ -235,7 +235,7 @@ define ptr @return_unknown_call(ptr %fn) { } define ptr @return_unknown_noalias_call(ptr %fn) { -; CHECK-LABEL: define ptr @return_unknown_noalias_call( +; CHECK-LABEL: define noalias ptr @return_unknown_noalias_call( ; CHECK-SAME: ptr readonly captures(none) [[FN:%.*]]) { ; CHECK-NEXT: [[A:%.*]] = call noalias ptr [[FN]]() ; CHECK-NEXT: ret ptr [[A]] diff --git a/llvm/test/Transforms/FunctionAttrs/nonnull.ll b/llvm/test/Transforms/FunctionAttrs/nonnull.ll index 9b17ded..8df242f 100644 --- a/llvm/test/Transforms/FunctionAttrs/nonnull.ll +++ b/llvm/test/Transforms/FunctionAttrs/nonnull.ll @@ -1412,7 +1412,7 @@ define ptr @unknown_func(ptr %fn) { } define ptr @unknown_nonnull_func(ptr %fn) { -; FNATTRS-LABEL: define ptr @unknown_nonnull_func( +; FNATTRS-LABEL: define nonnull ptr @unknown_nonnull_func( ; FNATTRS-SAME: ptr readonly captures(none) [[FN:%.*]]) { ; FNATTRS-NEXT: [[RES:%.*]] = call nonnull ptr [[FN]]() ; FNATTRS-NEXT: ret ptr [[RES]] diff --git a/llvm/test/Transforms/FunctionAttrs/nounwind.ll b/llvm/test/Transforms/FunctionAttrs/nounwind.ll index a64d9a6..076a7df 100644 --- a/llvm/test/Transforms/FunctionAttrs/nounwind.ll +++ b/llvm/test/Transforms/FunctionAttrs/nounwind.ll @@ -418,9 +418,10 @@ define void @unknown_call(ptr %fn) { } define void @unknown_nounwind_call(ptr %fn) { +; FNATTRS: Function Attrs: nounwind ; FNATTRS-LABEL: define {{[^@]+}}@unknown_nounwind_call -; FNATTRS-SAME: (ptr readonly captures(none) [[FN:%.*]]) { -; FNATTRS-NEXT: call void [[FN]]() #[[ATTR2:[0-9]+]] +; FNATTRS-SAME: (ptr readonly captures(none) [[FN:%.*]]) #[[ATTR2:[0-9]+]] { +; FNATTRS-NEXT: call void [[FN]]() #[[ATTR2]] ; FNATTRS-NEXT: ret void ; ; ATTRIBUTOR: Function Attrs: nounwind diff --git a/llvm/test/Transforms/HipStdPar/math-fixup.ll b/llvm/test/Transforms/HipStdPar/math-fixup.ll new file mode 100644 index 0000000..2c4622c --- /dev/null +++ b/llvm/test/Transforms/HipStdPar/math-fixup.ll @@ -0,0 +1,548 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; RUN: opt -S -passes=hipstdpar-math-fixup %s | FileCheck %s + +define void @test_acos(double %dbl, float %flt) { +; CHECK-LABEL: define void @test_acos( +; CHECK-SAME: double [[DBL:%.*]], float [[FLT:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[TMP0:%.*]] = call double @__hipstdpar_acos_f64(double [[DBL]]) +; CHECK-NEXT: [[TMP1:%.*]] = call float @__hipstdpar_acos_f32(float [[FLT]]) +; CHECK-NEXT: ret void +; +entry: + %0 = call double @llvm.acos.f64(double %dbl) + %1 = call float @llvm.acos.f32(float %flt) + ret void +} + +define void @test_acosh(double %dbl, float %flt) { +; CHECK-LABEL: define void @test_acosh( +; CHECK-SAME: double [[DBL:%.*]], float [[FLT:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[TMP0:%.*]] = call double @__hipstdpar_acosh_f64(double [[DBL]]) +; CHECK-NEXT: [[TMP1:%.*]] = call float @__hipstdpar_acosh_f32(float [[FLT]]) +; CHECK-NEXT: ret void +; +entry: + %0 = call double @acosh(double %dbl) + %1 = call float @acoshf(float %flt) + ret void +} + +define void @test_asin(double %dbl, float %flt) { +; CHECK-LABEL: define void @test_asin( +; CHECK-SAME: double [[DBL:%.*]], float [[FLT:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[TMP0:%.*]] = call double @__hipstdpar_asin_f64(double [[DBL]]) +; CHECK-NEXT: [[TMP1:%.*]] = call float @__hipstdpar_asin_f32(float [[FLT]]) +; CHECK-NEXT: ret void +; +entry: + %0 = call double @llvm.asin.f64(double %dbl) + %1 = call float @llvm.asin.f32(float %flt) + ret void +} + +define void @test_asinh(double %dbl, float %flt) { +; CHECK-LABEL: define void @test_asinh( +; CHECK-SAME: double [[DBL:%.*]], float [[FLT:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[TMP0:%.*]] = call double @__hipstdpar_asinh_f64(double [[DBL]]) +; CHECK-NEXT: [[TMP1:%.*]] = call float @__hipstdpar_asinh_f32(float [[FLT]]) +; CHECK-NEXT: ret void +; +entry: + %0 = call double @asinh(double %dbl) + %1 = call float @asinhf(float %flt) + ret void +} + +define void @test_atan(double %dbl, float %flt) { +; CHECK-LABEL: define void @test_atan( +; CHECK-SAME: double [[DBL:%.*]], float [[FLT:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[TMP0:%.*]] = call double @__hipstdpar_atan_f64(double [[DBL]]) +; CHECK-NEXT: [[TMP1:%.*]] = call float @__hipstdpar_atan_f32(float [[FLT]]) +; CHECK-NEXT: ret void +; +entry: + %0 = call double @llvm.atan.f64(double %dbl) + %1 = call float @llvm.atan.f32(float %flt) + ret void +} + +define void @test_atanh(double %dbl, float %flt) { +; CHECK-LABEL: define void @test_atanh( +; CHECK-SAME: double [[DBL:%.*]], float [[FLT:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[TMP0:%.*]] = call double @__hipstdpar_atanh_f64(double [[DBL]]) +; CHECK-NEXT: [[TMP1:%.*]] = call float @__hipstdpar_atanh_f32(float [[FLT]]) +; CHECK-NEXT: ret void +; +entry: + %0 = call double @atanh(double %dbl) + %1 = call float @atanhf(float %flt) + ret void +} + +define void @test_atan2(double %dbl, float %flt) { +; CHECK-LABEL: define void @test_atan2( +; CHECK-SAME: double [[DBL:%.*]], float [[FLT:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[TMP0:%.*]] = call double @__hipstdpar_atan2_f64(double [[DBL]], double [[DBL]]) +; CHECK-NEXT: [[TMP1:%.*]] = call float @__hipstdpar_atan2_f32(float [[FLT]], float [[FLT]]) +; CHECK-NEXT: ret void +; +entry: + %0 = call double @llvm.atan2.f64(double %dbl, double %dbl) + %1 = call float @llvm.atan2.f32(float %flt, float %flt) + ret void +} + +define void @test_cbrt(double %dbl, float %flt) { +; CHECK-LABEL: define void @test_cbrt( +; CHECK-SAME: double [[DBL:%.*]], float [[FLT:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[TMP0:%.*]] = call double @__hipstdpar_cbrt_f64(double [[DBL]]) +; CHECK-NEXT: [[TMP1:%.*]] = call float @__hipstdpar_cbrt_f32(float [[FLT]]) +; CHECK-NEXT: ret void +; +entry: + %0 = call double @cbrt(double %dbl) + %1 = call float @cbrtf(float %flt) + ret void +} + +define void @test_cos(double %dbl) { +; CHECK-LABEL: define void @test_cos( +; CHECK-SAME: double [[DBL:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[TMP0:%.*]] = call double @__hipstdpar_cos_f64(double [[DBL]]) +; CHECK-NEXT: ret void +; +entry: + %0 = call double @llvm.cos.f64(double %dbl) + ret void +} + +define void @test_cosh(double %dbl, float %flt) { +; CHECK-LABEL: define void @test_cosh( +; CHECK-SAME: double [[DBL:%.*]], float [[FLT:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[TMP0:%.*]] = call double @__hipstdpar_cosh_f64(double [[DBL]]) +; CHECK-NEXT: [[TMP1:%.*]] = call float @__hipstdpar_cosh_f32(float [[FLT]]) +; CHECK-NEXT: ret void +; +entry: + %0 = call double @llvm.cosh.f64(double %dbl) + %1 = call float @llvm.cosh.f32(float %flt) + ret void +} + +define void @test_erf(double %dbl, float %flt) { +; CHECK-LABEL: define void @test_erf( +; CHECK-SAME: double [[DBL:%.*]], float [[FLT:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[TMP0:%.*]] = call double @__hipstdpar_erf_f64(double [[DBL]]) +; CHECK-NEXT: [[TMP1:%.*]] = call float @__hipstdpar_erf_f32(float [[FLT]]) +; CHECK-NEXT: ret void +; +entry: + %0 = call double @erf(double %dbl) + %1 = call float @erff(float %flt) + ret void +} + +define void @test_erfc(double %dbl, float %flt) { +; CHECK-LABEL: define void @test_erfc( +; CHECK-SAME: double [[DBL:%.*]], float [[FLT:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[TMP0:%.*]] = call double @__hipstdpar_erfc_f64(double [[DBL]]) +; CHECK-NEXT: [[TMP1:%.*]] = call float @__hipstdpar_erfc_f32(float [[FLT]]) +; CHECK-NEXT: ret void +; +entry: + %0 = call double @erfc(double %dbl) + %1 = call float @erfcf(float %flt) + ret void +} + +define void @test_exp(double %dbl) { +; CHECK-LABEL: define void @test_exp( +; CHECK-SAME: double [[DBL:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[TMP0:%.*]] = call double @__hipstdpar_exp_f64(double [[DBL]]) +; CHECK-NEXT: ret void +; +entry: + %0 = call double @llvm.exp.f64(double %dbl) + ret void +} + +define void @test_exp2(double %dbl) { +; CHECK-LABEL: define void @test_exp2( +; CHECK-SAME: double [[DBL:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[TMP0:%.*]] = call double @__hipstdpar_exp2_f64(double [[DBL]]) +; CHECK-NEXT: ret void +; +entry: + %0 = call double @llvm.exp2.f64(double %dbl) + ret void +} + +define void @test_expm1(double %dbl, float %flt) { +; CHECK-LABEL: define void @test_expm1( +; CHECK-SAME: double [[DBL:%.*]], float [[FLT:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[TMP0:%.*]] = call double @__hipstdpar_expm1_f64(double [[DBL]]) +; CHECK-NEXT: [[TMP1:%.*]] = call float @__hipstdpar_expm1_f32(float [[FLT]]) +; CHECK-NEXT: ret void +; +entry: + %0 = call double @expm1(double %dbl) + %1 = call float @expm1f(float %flt) + ret void +} + +define void @test_fdim(double %dbl, float %flt) { +; CHECK-LABEL: define void @test_fdim( +; CHECK-SAME: double [[DBL:%.*]], float [[FLT:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[TMP0:%.*]] = call double @__hipstdpar_fdim_f64(double [[DBL]], double [[DBL]]) +; CHECK-NEXT: [[TMP1:%.*]] = call float @__hipstdpar_fdim_f32(float [[FLT]], float [[FLT]]) +; CHECK-NEXT: ret void +; +entry: + %0 = call double @fdim(double %dbl, double %dbl) + %1 = call float @fdimf(float %flt, float %flt) + ret void +} + +define void @test_hypot(double %dbl, float %flt) { +; CHECK-LABEL: define void @test_hypot( +; CHECK-SAME: double [[DBL:%.*]], float [[FLT:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[TMP0:%.*]] = call double @__hipstdpar_hypot_f64(double [[DBL]], double [[DBL]]) +; CHECK-NEXT: [[TMP1:%.*]] = call float @__hipstdpar_hypot_f32(float [[FLT]], float [[FLT]]) +; CHECK-NEXT: ret void +; +entry: + %0 = call double @hypot(double %dbl, double %dbl) + %1 = call float @hypotf(float %flt, float %flt) + ret void +} + +define void @test_lgamma(double %dbl, float %flt) { +; CHECK-LABEL: define void @test_lgamma( +; CHECK-SAME: double [[DBL:%.*]], float [[FLT:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[TMP0:%.*]] = call double @__hipstdpar_lgamma_f64(double [[DBL]]) +; CHECK-NEXT: [[TMP1:%.*]] = call float @__hipstdpar_lgamma_f32(float [[FLT]]) +; CHECK-NEXT: ret void +; +entry: + %0 = call double @lgamma(double %dbl) + %1 = call float @lgammaf(float %flt) + ret void +} + +define void @test_log(double %dbl) { +; CHECK-LABEL: define void @test_log( +; CHECK-SAME: double [[DBL:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[TMP0:%.*]] = call double @__hipstdpar_log_f64(double [[DBL]]) +; CHECK-NEXT: ret void +; +entry: + %0 = call double @llvm.log.f64(double %dbl) + ret void +} + +define void @test_log10(double %dbl) { +; CHECK-LABEL: define void @test_log10( +; CHECK-SAME: double [[DBL:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[TMP0:%.*]] = call double @__hipstdpar_log10_f64(double [[DBL]]) +; CHECK-NEXT: ret void +; +entry: + %0 = call double @llvm.log10.f64(double %dbl) + ret void +} + +define void @test_log2(double %dbl) { +; CHECK-LABEL: define void @test_log2( +; CHECK-SAME: double [[DBL:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[TMP0:%.*]] = call double @__hipstdpar_log2_f64(double [[DBL]]) +; CHECK-NEXT: ret void +; +entry: + %0 = call double @llvm.log2.f64(double %dbl) + ret void +} + +define void @test_log1p(double %dbl, float %flt) { +; CHECK-LABEL: define void @test_log1p( +; CHECK-SAME: double [[DBL:%.*]], float [[FLT:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[TMP0:%.*]] = call double @__hipstdpar_log1p_f64(double [[DBL]]) +; CHECK-NEXT: [[TMP1:%.*]] = call float @__hipstdpar_log1p_f32(float [[FLT]]) +; CHECK-NEXT: ret void +; +entry: + %0 = call double @log1p(double %dbl) + %1 = call float @log1pf(float %flt) + ret void +} + +define void @test_modf(double %dbl, float %flt, ptr %pdbl, ptr %pflt) { +; CHECK-LABEL: define void @test_modf( +; CHECK-SAME: double [[DBL:%.*]], float [[FLT:%.*]], ptr [[PDBL:%.*]], ptr [[PFLT:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[TMP0:%.*]] = tail call { double, double } @__hipstdpar_modf_f64(double [[DBL]]) +; CHECK-NEXT: [[TMP1:%.*]] = extractvalue { double, double } [[TMP0]], 0 +; CHECK-NEXT: [[TMP2:%.*]] = extractvalue { double, double } [[TMP0]], 1 +; CHECK-NEXT: store double [[TMP2]], ptr [[PDBL]], align 8 +; CHECK-NEXT: [[TMP3:%.*]] = tail call { float, float } @__hipstdpar_modf_f32(float [[FLT]]) +; CHECK-NEXT: [[TMP4:%.*]] = extractvalue { float, float } [[TMP3]], 0 +; CHECK-NEXT: [[TMP5:%.*]] = extractvalue { float, float } [[TMP3]], 1 +; CHECK-NEXT: store float [[TMP5]], ptr [[PFLT]], align 4 +; CHECK-NEXT: ret void +; +entry: + %0 = tail call { double, double } @llvm.modf.f64(double %dbl) + %1 = extractvalue { double, double } %0, 0 + %2 = extractvalue { double, double } %0, 1 + store double %2, ptr %pdbl, align 8 + %3 = tail call { float, float } @llvm.modf.f32(float %flt) + %4 = extractvalue { float, float } %3, 0 + %5 = extractvalue { float, float } %3, 1 + store float %5, ptr %pflt, align 4 + ret void +} + +define void @test_pow(double %dbl) { +; CHECK-LABEL: define void @test_pow( +; CHECK-SAME: double [[DBL:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[TMP0:%.*]] = call double @__hipstdpar_pow_f64(double [[DBL]], double [[DBL]]) +; CHECK-NEXT: ret void +; +entry: + %0 = call double @llvm.pow.f64(double %dbl, double %dbl) + ret void +} + +define void @test_remainder(double %dbl, float %flt) { +; CHECK-LABEL: define void @test_remainder( +; CHECK-SAME: double [[DBL:%.*]], float [[FLT:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[TMP0:%.*]] = call double @__hipstdpar_remainder_f64(double [[DBL]], double [[DBL]]) +; CHECK-NEXT: [[TMP1:%.*]] = call float @__hipstdpar_remainder_f32(float [[FLT]], float [[FLT]]) +; CHECK-NEXT: ret void +; +entry: + %0 = call double @remainder(double %dbl, double %dbl) + %1 = call float @remainderf(float %flt, float %flt) + ret void +} + +define void @test_remquo(double %dbl, float %flt, ptr %p) { +; CHECK-LABEL: define void @test_remquo( +; CHECK-SAME: double [[DBL:%.*]], float [[FLT:%.*]], ptr [[P:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[TMP0:%.*]] = call double @__hipstdpar_remquo_f64(double [[DBL]], double [[DBL]], ptr [[P]]) +; CHECK-NEXT: [[TMP1:%.*]] = call float @__hipstdpar_remquo_f32(float [[FLT]], float [[FLT]], ptr [[P]]) +; CHECK-NEXT: ret void +; +entry: + %0 = call double @remquo(double %dbl, double %dbl, ptr %p) + %1 = call float @remquof(float %flt, float %flt, ptr %p) + ret void +} + +define void @test_sin(double %dbl) { +; CHECK-LABEL: define void @test_sin( +; CHECK-SAME: double [[DBL:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[TMP0:%.*]] = call double @__hipstdpar_sin_f64(double [[DBL]]) +; CHECK-NEXT: ret void +; +entry: + %0 = call double @llvm.sin.f64(double %dbl) + ret void +} + +define void @test_sinh(double %dbl, float %flt) { +; CHECK-LABEL: define void @test_sinh( +; CHECK-SAME: double [[DBL:%.*]], float [[FLT:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[TMP0:%.*]] = call double @__hipstdpar_sinh_f64(double [[DBL]]) +; CHECK-NEXT: [[TMP1:%.*]] = call float @__hipstdpar_sinh_f32(float [[FLT]]) +; CHECK-NEXT: ret void +; +entry: + %0 = call double @llvm.sinh.f64(double %dbl) + %1 = call float @llvm.sinh.f32(float %flt) + ret void +} + +define void @test_tan(double %dbl, float %flt) { +; CHECK-LABEL: define void @test_tan( +; CHECK-SAME: double [[DBL:%.*]], float [[FLT:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[TMP0:%.*]] = call double @__hipstdpar_tan_f64(double [[DBL]]) +; CHECK-NEXT: [[TMP1:%.*]] = call float @__hipstdpar_tan_f32(float [[FLT]]) +; CHECK-NEXT: ret void +; +entry: + %0 = call double @llvm.tan.f64(double %dbl) + %1 = call float @llvm.tan.f32(float %flt) + ret void +} + +define void @test_tanh(double %dbl, float %flt) { +; CHECK-LABEL: define void @test_tanh( +; CHECK-SAME: double [[DBL:%.*]], float [[FLT:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[TMP0:%.*]] = call double @__hipstdpar_tanh_f64(double [[DBL]]) +; CHECK-NEXT: [[TMP1:%.*]] = call float @__hipstdpar_tanh_f32(float [[FLT]]) +; CHECK-NEXT: ret void +; +entry: + %0 = call double @llvm.tanh.f64(double %dbl) + %1 = call float @llvm.tanh.f32(float %flt) + ret void +} + +define void @test_tgamma(double %dbl, float %flt) { +; CHECK-LABEL: define void @test_tgamma( +; CHECK-SAME: double [[DBL:%.*]], float [[FLT:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[TMP0:%.*]] = call double @__hipstdpar_tgamma_f64(double [[DBL]]) +; CHECK-NEXT: [[TMP1:%.*]] = call float @__hipstdpar_tgamma_f32(float [[FLT]]) +; CHECK-NEXT: ret void +; +entry: + %0 = call double @tgamma(double %dbl) + %1 = call float @tgammaf(float %flt) + ret void +} + +@globdbl = global double 4.200000e+01 +@globflt = global float 4.200000e+01 + +define void @global_args() { +; CHECK-LABEL: define void @global_args() { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[DBL:%.*]] = load double, ptr @globdbl, align 8 +; CHECK-NEXT: [[FLT:%.*]] = load float, ptr @globflt, align 4 +; CHECK-NEXT: [[TMP0:%.*]] = call double @__hipstdpar_remquo_f64(double [[DBL]], double [[DBL]], ptr @globdbl) +; CHECK-NEXT: [[TMP1:%.*]] = call float @__hipstdpar_remquo_f32(float [[FLT]], float [[FLT]], ptr @globflt) +; CHECK-NEXT: ret void +; +entry: + %dbl = load double, ptr @globdbl + %flt = load float, ptr @globflt + %1 = call double @remquo(double %dbl, double %dbl, ptr @globdbl) + %2 = call float @remquof(float %flt, float %flt, ptr @globflt) + ret void +} + +declare hidden double @remainder(double, double) + +declare hidden float @remainderf(float, float) + +declare hidden double @remquo(double, double, ptr) + +declare hidden float @remquof(float, float, ptr) + +declare hidden double @fdim(double, double) + +declare hidden float @fdimf(float, float) + +declare double @llvm.exp.f64(double) + +declare float @llvm.exp.f32(float) + +declare double @llvm.exp2.f64(double) + +declare float @llvm.exp2.f32(float) + +declare hidden double @expm1(double) + +declare hidden float @expm1f(float) + +declare double @llvm.log.f64(double) + +declare double @llvm.log10.f64(double) + +declare double @llvm.log2.f64(double) + +declare hidden double @log1p(double) + +declare hidden float @log1pf(float) + +declare { float, float } @llvm.modf.f32(float) + +declare { double, double } @llvm.modf.f64(double) + +declare double @llvm.pow.f64(double, double) + +declare hidden double @cbrt(double) + +declare hidden float @cbrtf(float) + +declare hidden double @hypot(double, double) + +declare hidden float @hypotf(float, float) + +declare double @llvm.sin.f64(double) + +declare double @llvm.cos.f64(double) + +declare double @llvm.tan.f64(double) + +declare double @llvm.asin.f64(double) + +declare double @llvm.acos.f64(double) + +declare double @llvm.atan.f64(double) + +declare double @llvm.atan2.f64(double, double) + +declare double @llvm.sinh.f64(double) + +declare double @llvm.cosh.f64(double) + +declare double @llvm.tanh.f64(double) + +declare hidden double @asinh(double) + +declare hidden float @asinhf(float) + +declare hidden double @acosh(double) + +declare hidden float @acoshf(float) + +declare hidden double @atanh(double) + +declare hidden float @atanhf(float) + +declare hidden double @erf(double) + +declare hidden float @erff(float) + +declare hidden double @erfc(double) + +declare hidden float @erfcf(float) + +declare hidden double @tgamma(double) + +declare hidden float @tgammaf(float) + +declare hidden double @lgamma(double) + +declare hidden float @lgammaf(float) diff --git a/llvm/test/Transforms/InstCombine/recurrence-binary-intrinsic.ll b/llvm/test/Transforms/InstCombine/recurrence-binary-intrinsic.ll new file mode 100644 index 0000000..c637481 --- /dev/null +++ b/llvm/test/Transforms/InstCombine/recurrence-binary-intrinsic.ll @@ -0,0 +1,394 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; RUN: opt -passes=instcombine -S < %s | FileCheck %s + +define i8 @simple_recurrence_intrinsic_smax(i8 %n, i8 %a, i8 %b) { +; CHECK-LABEL: define i8 @simple_recurrence_intrinsic_smax( +; CHECK-SAME: i8 [[N:%.*]], i8 [[A:%.*]], i8 [[B:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: br label %[[LOOP:.*]] +; CHECK: [[LOOP]]: +; CHECK-NEXT: [[IV:%.*]] = phi i8 [ [[IV_NEXT:%.*]], %[[LOOP]] ], [ 0, %[[ENTRY]] ] +; CHECK-NEXT: [[SMAX_ACC:%.*]] = phi i8 [ [[SMAX:%.*]], %[[LOOP]] ], [ [[A]], %[[ENTRY]] ] +; CHECK-NEXT: [[SMAX]] = call i8 @llvm.smax.i8(i8 [[SMAX_ACC]], i8 [[B]]) +; CHECK-NEXT: [[IV_NEXT]] = add nuw i8 [[IV]], 1 +; CHECK-NEXT: [[CMP:%.*]] = icmp ult i8 [[IV_NEXT]], [[N]] +; CHECK-NEXT: br i1 [[CMP]], label %[[LOOP]], label %[[EXIT:.*]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: ret i8 [[SMAX]] +; +entry: + br label %loop + +loop: + %iv = phi i8 [ %iv.next, %loop ], [ 0, %entry ] + %smax.acc = phi i8 [ %smax, %loop ], [ %a, %entry ] + %smax = call i8 @llvm.smax.i8(i8 %smax.acc, i8 %b) + %iv.next = add nuw i8 %iv, 1 + %cmp = icmp ult i8 %iv.next, %n + br i1 %cmp, label %loop, label %exit + +exit: + ret i8 %smax +} + +define i8 @simple_recurrence_intrinsic_smin(i8 %n, i8 %a, i8 %b) { +; CHECK-LABEL: define i8 @simple_recurrence_intrinsic_smin( +; CHECK-SAME: i8 [[N:%.*]], i8 [[A:%.*]], i8 [[B:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: br label %[[LOOP:.*]] +; CHECK: [[LOOP]]: +; CHECK-NEXT: [[IV:%.*]] = phi i8 [ [[IV_NEXT:%.*]], %[[LOOP]] ], [ 0, %[[ENTRY]] ] +; CHECK-NEXT: [[SMIN_ACC:%.*]] = phi i8 [ [[SMIN:%.*]], %[[LOOP]] ], [ [[A]], %[[ENTRY]] ] +; CHECK-NEXT: [[SMIN]] = call i8 @llvm.smin.i8(i8 [[SMIN_ACC]], i8 [[B]]) +; CHECK-NEXT: [[IV_NEXT]] = add nuw i8 [[IV]], 1 +; CHECK-NEXT: [[CMP:%.*]] = icmp ult i8 [[IV_NEXT]], [[N]] +; CHECK-NEXT: br i1 [[CMP]], label %[[LOOP]], label %[[EXIT:.*]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: ret i8 [[SMIN]] +; +entry: + br label %loop + +loop: + %iv = phi i8 [ %iv.next, %loop ], [ 0, %entry ] + %smin.acc = phi i8 [ %smin, %loop ], [ %a, %entry ] + %smin = call i8 @llvm.smin.i8(i8 %smin.acc, i8 %b) + %iv.next = add nuw i8 %iv, 1 + %cmp = icmp ult i8 %iv.next, %n + br i1 %cmp, label %loop, label %exit + +exit: + ret i8 %smin +} + +define i8 @simple_recurrence_intrinsic_umax(i8 %n, i8 %a, i8 %b) { +; CHECK-LABEL: define i8 @simple_recurrence_intrinsic_umax( +; CHECK-SAME: i8 [[N:%.*]], i8 [[A:%.*]], i8 [[B:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: br label %[[LOOP:.*]] +; CHECK: [[LOOP]]: +; CHECK-NEXT: [[IV:%.*]] = phi i8 [ [[IV_NEXT:%.*]], %[[LOOP]] ], [ 0, %[[ENTRY]] ] +; CHECK-NEXT: [[UMAX_ACC:%.*]] = phi i8 [ [[UMAX:%.*]], %[[LOOP]] ], [ [[A]], %[[ENTRY]] ] +; CHECK-NEXT: [[UMAX]] = call i8 @llvm.umax.i8(i8 [[UMAX_ACC]], i8 [[B]]) +; CHECK-NEXT: [[IV_NEXT]] = add nuw i8 [[IV]], 1 +; CHECK-NEXT: [[CMP:%.*]] = icmp ult i8 [[IV_NEXT]], [[N]] +; CHECK-NEXT: br i1 [[CMP]], label %[[LOOP]], label %[[EXIT:.*]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: ret i8 [[UMAX]] +; +entry: + br label %loop + +loop: + %iv = phi i8 [ %iv.next, %loop ], [ 0, %entry ] + %umax.acc = phi i8 [ %umax, %loop ], [ %a, %entry ] + %umax = call i8 @llvm.umax.i8(i8 %umax.acc, i8 %b) + %iv.next = add nuw i8 %iv, 1 + %cmp = icmp ult i8 %iv.next, %n + br i1 %cmp, label %loop, label %exit + +exit: + ret i8 %umax +} + +define i8 @simple_recurrence_intrinsic_umin(i8 %n, i8 %a, i8 %b) { +; CHECK-LABEL: define i8 @simple_recurrence_intrinsic_umin( +; CHECK-SAME: i8 [[N:%.*]], i8 [[A:%.*]], i8 [[B:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: br label %[[LOOP:.*]] +; CHECK: [[LOOP]]: +; CHECK-NEXT: [[IV:%.*]] = phi i8 [ [[IV_NEXT:%.*]], %[[LOOP]] ], [ 0, %[[ENTRY]] ] +; CHECK-NEXT: [[UMIN_ACC:%.*]] = phi i8 [ [[UMIN:%.*]], %[[LOOP]] ], [ [[A]], %[[ENTRY]] ] +; CHECK-NEXT: [[UMIN]] = call i8 @llvm.umin.i8(i8 [[UMIN_ACC]], i8 [[B]]) +; CHECK-NEXT: [[IV_NEXT]] = add nuw i8 [[IV]], 1 +; CHECK-NEXT: [[CMP:%.*]] = icmp ult i8 [[IV_NEXT]], [[N]] +; CHECK-NEXT: br i1 [[CMP]], label %[[LOOP]], label %[[EXIT:.*]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: ret i8 [[UMIN]] +; +entry: + br label %loop + +loop: + %iv = phi i8 [ %iv.next, %loop ], [ 0, %entry ] + %umin.acc = phi i8 [ %umin, %loop ], [ %a, %entry ] + %umin = call i8 @llvm.umin.i8(i8 %umin.acc, i8 %b) + %iv.next = add nuw i8 %iv, 1 + %cmp = icmp ult i8 %iv.next, %n + br i1 %cmp, label %loop, label %exit + +exit: + ret i8 %umin +} + +define float @simple_recurrence_intrinsic_maxnum(i32 %n, float %a, float %b) { +; CHECK-LABEL: define float @simple_recurrence_intrinsic_maxnum( +; CHECK-SAME: i32 [[N:%.*]], float [[A:%.*]], float [[B:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: br label %[[LOOP:.*]] +; CHECK: [[LOOP]]: +; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[IV_NEXT:%.*]], %[[LOOP]] ], [ 0, %[[ENTRY]] ] +; CHECK-NEXT: [[FMAX_ACC:%.*]] = phi float [ [[FMAX:%.*]], %[[LOOP]] ], [ [[A]], %[[ENTRY]] ] +; CHECK-NEXT: [[FMAX]] = call float @llvm.maxnum.f32(float [[FMAX_ACC]], float [[B]]) +; CHECK-NEXT: [[IV_NEXT]] = add nuw i32 [[IV]], 1 +; CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 [[IV_NEXT]], [[N]] +; CHECK-NEXT: br i1 [[CMP]], label %[[LOOP]], label %[[EXIT:.*]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: ret float [[FMAX]] +; +entry: + br label %loop + +loop: + %iv = phi i32 [ %iv.next, %loop ], [ 0, %entry ] + %fmax.acc = phi float [ %fmax, %loop ], [ %a, %entry ] + %fmax = call float @llvm.maxnum.f32(float %fmax.acc, float %b) + %iv.next = add nuw i32 %iv, 1 + %cmp = icmp ult i32 %iv.next, %n + br i1 %cmp, label %loop, label %exit +exit: + ret float %fmax +} + +define float @simple_recurrence_intrinsic_minnum(i32 %n, float %a, float %b) { +; CHECK-LABEL: define float @simple_recurrence_intrinsic_minnum( +; CHECK-SAME: i32 [[N:%.*]], float [[A:%.*]], float [[B:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: br label %[[LOOP:.*]] +; CHECK: [[LOOP]]: +; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[IV_NEXT:%.*]], %[[LOOP]] ], [ 0, %[[ENTRY]] ] +; CHECK-NEXT: [[FMIN_ACC:%.*]] = phi float [ [[FMIN:%.*]], %[[LOOP]] ], [ [[A]], %[[ENTRY]] ] +; CHECK-NEXT: [[FMIN]] = call float @llvm.minnum.f32(float [[FMIN_ACC]], float [[B]]) +; CHECK-NEXT: [[IV_NEXT]] = add nuw i32 [[IV]], 1 +; CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 [[IV_NEXT]], [[N]] +; CHECK-NEXT: br i1 [[CMP]], label %[[LOOP]], label %[[EXIT:.*]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: ret float [[FMIN]] +; +entry: + br label %loop + +loop: + %iv = phi i32 [ %iv.next, %loop ], [ 0, %entry ] + %fmin.acc = phi float [ %fmin, %loop ], [ %a, %entry ] + %fmin = call float @llvm.minnum.f32(float %fmin.acc, float %b) + %iv.next = add nuw i32 %iv, 1 + %cmp = icmp ult i32 %iv.next, %n + br i1 %cmp, label %loop, label %exit +exit: + ret float %fmin +} + +define float @simple_recurrence_intrinsic_maximum(i32 %n, float %a, float %b) { +; CHECK-LABEL: define float @simple_recurrence_intrinsic_maximum( +; CHECK-SAME: i32 [[N:%.*]], float [[A:%.*]], float [[B:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: br label %[[LOOP:.*]] +; CHECK: [[LOOP]]: +; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[IV_NEXT:%.*]], %[[LOOP]] ], [ 0, %[[ENTRY]] ] +; CHECK-NEXT: [[FMAX_ACC:%.*]] = phi float [ [[FMAX:%.*]], %[[LOOP]] ], [ [[A]], %[[ENTRY]] ] +; CHECK-NEXT: [[FMAX]] = call nnan float @llvm.maximum.f32(float [[FMAX_ACC]], float [[B]]) +; CHECK-NEXT: [[IV_NEXT]] = add nuw i32 [[IV]], 1 +; CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 [[IV_NEXT]], [[N]] +; CHECK-NEXT: br i1 [[CMP]], label %[[LOOP]], label %[[EXIT:.*]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: ret float [[FMAX]] +; +entry: + br label %loop + +loop: + %iv = phi i32 [ %iv.next, %loop ], [ 0, %entry ] + %fmax.acc = phi float [ %fmax, %loop ], [ %a, %entry ] + %fmax = call nnan float @llvm.maximum.f32(float %fmax.acc, float %b) + %iv.next = add nuw i32 %iv, 1 + %cmp = icmp ult i32 %iv.next, %n + br i1 %cmp, label %loop, label %exit +exit: + ret float %fmax +} + +define float @simple_recurrence_intrinsic_minimum(i32 %n, float %a, float %b) { +; CHECK-LABEL: define float @simple_recurrence_intrinsic_minimum( +; CHECK-SAME: i32 [[N:%.*]], float [[A:%.*]], float [[B:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: br label %[[LOOP:.*]] +; CHECK: [[LOOP]]: +; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[IV_NEXT:%.*]], %[[LOOP]] ], [ 0, %[[ENTRY]] ] +; CHECK-NEXT: [[FMIN_ACC:%.*]] = phi float [ [[FMIN:%.*]], %[[LOOP]] ], [ [[A]], %[[ENTRY]] ] +; CHECK-NEXT: [[FMIN]] = call nnan float @llvm.minimum.f32(float [[FMIN_ACC]], float [[B]]) +; CHECK-NEXT: [[IV_NEXT]] = add nuw i32 [[IV]], 1 +; CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 [[IV_NEXT]], [[N]] +; CHECK-NEXT: br i1 [[CMP]], label %[[LOOP]], label %[[EXIT:.*]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: ret float [[FMIN]] +; +entry: + br label %loop + +loop: + %iv = phi i32 [ %iv.next, %loop ], [ 0, %entry ] + %fmin.acc = phi float [ %fmin, %loop ], [ %a, %entry ] + %fmin = call nnan float @llvm.minimum.f32(float %fmin.acc, float %b) + %iv.next = add nuw i32 %iv, 1 + %cmp = icmp ult i32 %iv.next, %n + br i1 %cmp, label %loop, label %exit +exit: + ret float %fmin +} + +define float @simple_recurrence_intrinsic_maximumnum(i32 %n, float %a, float %b) { +; CHECK-LABEL: define float @simple_recurrence_intrinsic_maximumnum( +; CHECK-SAME: i32 [[N:%.*]], float [[A:%.*]], float [[B:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: br label %[[LOOP:.*]] +; CHECK: [[LOOP]]: +; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[IV_NEXT:%.*]], %[[LOOP]] ], [ 0, %[[ENTRY]] ] +; CHECK-NEXT: [[FMAX_ACC:%.*]] = phi float [ [[FMAX:%.*]], %[[LOOP]] ], [ [[A]], %[[ENTRY]] ] +; CHECK-NEXT: [[FMAX]] = call nnan float @llvm.maximumnum.f32(float [[FMAX_ACC]], float [[B]]) +; CHECK-NEXT: [[IV_NEXT]] = add nuw i32 [[IV]], 1 +; CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 [[IV_NEXT]], [[N]] +; CHECK-NEXT: br i1 [[CMP]], label %[[LOOP]], label %[[EXIT:.*]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: ret float [[FMAX]] +; +entry: + br label %loop + +loop: + %iv = phi i32 [ %iv.next, %loop ], [ 0, %entry ] + %fmax.acc = phi float [ %fmax, %loop ], [ %a, %entry ] + %fmax = call nnan float @llvm.maximumnum.f32(float %fmax.acc, float %b) + %iv.next = add nuw i32 %iv, 1 + %cmp = icmp ult i32 %iv.next, %n + br i1 %cmp, label %loop, label %exit +exit: + ret float %fmax +} + +define float @simple_recurrence_intrinsic_minimumnum(i32 %n, float %a, float %b) { +; CHECK-LABEL: define float @simple_recurrence_intrinsic_minimumnum( +; CHECK-SAME: i32 [[N:%.*]], float [[A:%.*]], float [[B:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: br label %[[LOOP:.*]] +; CHECK: [[LOOP]]: +; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[IV_NEXT:%.*]], %[[LOOP]] ], [ 0, %[[ENTRY]] ] +; CHECK-NEXT: [[FMIN_ACC:%.*]] = phi float [ [[FMIN:%.*]], %[[LOOP]] ], [ [[A]], %[[ENTRY]] ] +; CHECK-NEXT: [[FMIN]] = call nnan float @llvm.minimumnum.f32(float [[FMIN_ACC]], float [[B]]) +; CHECK-NEXT: [[IV_NEXT]] = add nuw i32 [[IV]], 1 +; CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 [[IV_NEXT]], [[N]] +; CHECK-NEXT: br i1 [[CMP]], label %[[LOOP]], label %[[EXIT:.*]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: ret float [[FMIN]] +; +entry: + br label %loop + +loop: + %iv = phi i32 [ %iv.next, %loop ], [ 0, %entry ] + %fmin.acc = phi float [ %fmin, %loop ], [ %a, %entry ] + %fmin = call nnan float @llvm.minimumnum.f32(float %fmin.acc, float %b) + %iv.next = add nuw i32 %iv, 1 + %cmp = icmp ult i32 %iv.next, %n + br i1 %cmp, label %loop, label %exit +exit: + ret float %fmin +} + +define i8 @simple_recurrence_intrinsic_multiuse_phi(i8 %n, i8 %a, i8 %b) { +; CHECK-LABEL: define i8 @simple_recurrence_intrinsic_multiuse_phi( +; CHECK-SAME: i8 [[N:%.*]], i8 [[A:%.*]], i8 [[B:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: br label %[[LOOP:.*]] +; CHECK: [[LOOP]]: +; CHECK-NEXT: [[IV:%.*]] = phi i8 [ [[IV_NEXT:%.*]], %[[LOOP]] ], [ 0, %[[ENTRY]] ] +; CHECK-NEXT: [[UMAX_ACC:%.*]] = phi i8 [ [[UMAX:%.*]], %[[LOOP]] ], [ [[A]], %[[ENTRY]] ] +; CHECK-NEXT: call void @use(i8 [[UMAX_ACC]]) +; CHECK-NEXT: [[UMAX]] = call i8 @llvm.umax.i8(i8 [[UMAX_ACC]], i8 [[B]]) +; CHECK-NEXT: [[IV_NEXT]] = add nuw i8 [[IV]], 1 +; CHECK-NEXT: [[CMP:%.*]] = icmp ult i8 [[IV_NEXT]], [[N]] +; CHECK-NEXT: br i1 [[CMP]], label %[[LOOP]], label %[[EXIT:.*]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: ret i8 [[UMAX]] +; +entry: + br label %loop + +loop: + %iv = phi i8 [ %iv.next, %loop ], [ 0, %entry ] + %umax.acc = phi i8 [ %umax, %loop ], [ %a, %entry ] + call void @use(i8 %umax.acc) + %umax = call i8 @llvm.umax.i8(i8 %umax.acc, i8 %b) + %iv.next = add nuw i8 %iv, 1 + %cmp = icmp ult i8 %iv.next, %n + br i1 %cmp, label %loop, label %exit + +exit: + ret i8 %umax +} + +; Negative tests. + +define i8 @simple_recurrence_intrinsic_uadd_sat(i8 %n, i8 %a, i8 %b) { +; CHECK-LABEL: define i8 @simple_recurrence_intrinsic_uadd_sat( +; CHECK-SAME: i8 [[N:%.*]], i8 [[A:%.*]], i8 [[B:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: br label %[[LOOP:.*]] +; CHECK: [[LOOP]]: +; CHECK-NEXT: [[IV:%.*]] = phi i8 [ [[IV_NEXT:%.*]], %[[LOOP]] ], [ 0, %[[ENTRY]] ] +; CHECK-NEXT: [[UADD_SAT_ACC:%.*]] = phi i8 [ [[UADD_SAT:%.*]], %[[LOOP]] ], [ [[A]], %[[ENTRY]] ] +; CHECK-NEXT: [[UADD_SAT]] = call i8 @llvm.uadd.sat.i8(i8 [[UADD_SAT_ACC]], i8 [[B]]) +; CHECK-NEXT: [[IV_NEXT]] = add nuw i8 [[IV]], 1 +; CHECK-NEXT: [[CMP:%.*]] = icmp ult i8 [[IV_NEXT]], [[N]] +; CHECK-NEXT: br i1 [[CMP]], label %[[LOOP]], label %[[EXIT:.*]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: ret i8 [[UADD_SAT]] +; +entry: + br label %loop + +loop: + %iv = phi i8 [ %iv.next, %loop ], [ 0, %entry ] + %uadd.sat.acc = phi i8 [ %uadd.sat, %loop ], [ %a, %entry ] + %uadd.sat = call i8 @llvm.uadd.sat.i8(i8 %uadd.sat.acc, i8 %b) + %iv.next = add nuw i8 %iv, 1 + %cmp = icmp ult i8 %iv.next, %n + br i1 %cmp, label %loop, label %exit + +exit: + ret i8 %uadd.sat +} + +define i8 @simple_recurrence_intrinsic_arg_loop_variant(i8 %n, i8 %a) { +; CHECK-LABEL: define i8 @simple_recurrence_intrinsic_arg_loop_variant( +; CHECK-SAME: i8 [[N:%.*]], i8 [[A:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: br label %[[LOOP:.*]] +; CHECK: [[LOOP]]: +; CHECK-NEXT: [[IV:%.*]] = phi i8 [ [[IV_NEXT:%.*]], %[[LOOP]] ], [ 0, %[[ENTRY]] ] +; CHECK-NEXT: [[UMAX_ACC:%.*]] = phi i8 [ [[UMAX:%.*]], %[[LOOP]] ], [ [[A]], %[[ENTRY]] ] +; CHECK-NEXT: [[B:%.*]] = xor i8 [[IV]], 42 +; CHECK-NEXT: [[UMAX]] = call i8 @llvm.umax.i8(i8 [[UMAX_ACC]], i8 [[B]]) +; CHECK-NEXT: [[IV_NEXT]] = add nuw i8 [[IV]], 1 +; CHECK-NEXT: [[CMP:%.*]] = icmp ult i8 [[IV_NEXT]], [[N]] +; CHECK-NEXT: br i1 [[CMP]], label %[[LOOP]], label %[[EXIT:.*]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: ret i8 [[UMAX]] +; +entry: + br label %loop + +loop: + %iv = phi i8 [ %iv.next, %loop ], [ 0, %entry ] + %umax.acc = phi i8 [ %umax, %loop ], [ %a, %entry ] + %b = xor i8 %iv, 42 + %umax = call i8 @llvm.umax.i8(i8 %umax.acc, i8 %b) + %iv.next = add nuw i8 %iv, 1 + %cmp = icmp ult i8 %iv.next, %n + br i1 %cmp, label %loop, label %exit + +exit: + ret i8 %umax +} + +declare void @use(i8) diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/truncate-to-minimal-bitwidth-evl-crash.ll b/llvm/test/Transforms/LoopVectorize/RISCV/truncate-to-minimal-bitwidth-evl-crash.ll index 27d7bd0..4da31a0 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/truncate-to-minimal-bitwidth-evl-crash.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/truncate-to-minimal-bitwidth-evl-crash.ll @@ -25,13 +25,7 @@ define void @truncate_to_minimal_bitwidths_widen_cast_recipe(ptr %src) { ; CHECK-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], %[[VECTOR_BODY]] ] ; CHECK-NEXT: [[AVL:%.*]] = sub i64 9, [[EVL_BASED_IV]] ; CHECK-NEXT: [[TMP7:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[AVL]], i32 8, i1 true) -; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[SRC]], i64 [[EVL_BASED_IV]] -; CHECK-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 8 x i8> @llvm.vp.load.nxv8i8.p0(ptr align 1 [[TMP5]], <vscale x 8 x i1> splat (i1 true), i32 [[TMP7]]) -; CHECK-NEXT: [[TMP8:%.*]] = zext <vscale x 8 x i8> [[VP_OP_LOAD]] to <vscale x 8 x i16> -; CHECK-NEXT: [[TMP12:%.*]] = mul <vscale x 8 x i16> zeroinitializer, [[TMP8]] -; CHECK-NEXT: [[TMP13:%.*]] = lshr <vscale x 8 x i16> [[TMP12]], splat (i16 1) -; CHECK-NEXT: [[TMP14:%.*]] = trunc <vscale x 8 x i16> [[TMP13]] to <vscale x 8 x i8> -; CHECK-NEXT: call void @llvm.vp.scatter.nxv8i8.nxv8p0(<vscale x 8 x i8> [[TMP14]], <vscale x 8 x ptr> align 1 zeroinitializer, <vscale x 8 x i1> splat (i1 true), i32 [[TMP7]]) +; CHECK-NEXT: call void @llvm.vp.scatter.nxv8i8.nxv8p0(<vscale x 8 x i8> zeroinitializer, <vscale x 8 x ptr> align 1 zeroinitializer, <vscale x 8 x i1> splat (i1 true), i32 [[TMP7]]) ; CHECK-NEXT: [[TMP9:%.*]] = zext i32 [[TMP7]] to i64 ; CHECK-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP9]], [[EVL_BASED_IV]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP4]] diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/type-info-cache-evl-crash.ll b/llvm/test/Transforms/LoopVectorize/RISCV/type-info-cache-evl-crash.ll index 27abddf..bb2e099 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/type-info-cache-evl-crash.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/type-info-cache-evl-crash.ll @@ -39,15 +39,13 @@ define void @type_info_cache_clobber(ptr %dstv, ptr %src, i64 %wide.trip.count) ; CHECK-NEXT: [[TMP13:%.*]] = getelementptr i8, ptr [[SRC]], i64 [[EVL_BASED_IV]] ; CHECK-NEXT: [[VP_OP_LOAD:%.*]] = call <vscale x 8 x i8> @llvm.vp.load.nxv8i8.p0(ptr align 1 [[TMP13]], <vscale x 8 x i1> splat (i1 true), i32 [[TMP11]]), !alias.scope [[META0:![0-9]+]] ; CHECK-NEXT: [[TMP15:%.*]] = zext <vscale x 8 x i8> [[VP_OP_LOAD]] to <vscale x 8 x i32> -; CHECK-NEXT: [[VP_OP:%.*]] = mul <vscale x 8 x i32> [[TMP15]], zeroinitializer ; CHECK-NEXT: [[TMP23:%.*]] = ashr <vscale x 8 x i32> [[TMP15]], zeroinitializer ; CHECK-NEXT: [[VP_OP3:%.*]] = or <vscale x 8 x i32> [[TMP23]], zeroinitializer ; CHECK-NEXT: [[TMP16:%.*]] = icmp ult <vscale x 8 x i32> [[TMP15]], zeroinitializer ; CHECK-NEXT: [[TMP17:%.*]] = select <vscale x 8 x i1> [[TMP16]], <vscale x 8 x i32> [[VP_OP3]], <vscale x 8 x i32> zeroinitializer ; CHECK-NEXT: [[TMP24:%.*]] = trunc <vscale x 8 x i32> [[TMP17]] to <vscale x 8 x i8> ; CHECK-NEXT: call void @llvm.vp.scatter.nxv8i8.nxv8p0(<vscale x 8 x i8> [[TMP24]], <vscale x 8 x ptr> align 1 [[BROADCAST_SPLAT]], <vscale x 8 x i1> splat (i1 true), i32 [[TMP11]]), !alias.scope [[META3:![0-9]+]], !noalias [[META0]] -; CHECK-NEXT: [[TMP19:%.*]] = trunc <vscale x 8 x i32> [[VP_OP]] to <vscale x 8 x i16> -; CHECK-NEXT: call void @llvm.vp.scatter.nxv8i16.nxv8p0(<vscale x 8 x i16> [[TMP19]], <vscale x 8 x ptr> align 2 zeroinitializer, <vscale x 8 x i1> splat (i1 true), i32 [[TMP11]]) +; CHECK-NEXT: call void @llvm.vp.scatter.nxv8i16.nxv8p0(<vscale x 8 x i16> zeroinitializer, <vscale x 8 x ptr> align 2 zeroinitializer, <vscale x 8 x i1> splat (i1 true), i32 [[TMP11]]) ; CHECK-NEXT: [[TMP20:%.*]] = zext i32 [[TMP11]] to i64 ; CHECK-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP20]], [[EVL_BASED_IV]] ; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP10]] diff --git a/llvm/test/Transforms/LoopVectorize/vector-loop-backedge-elimination-early-exit.ll b/llvm/test/Transforms/LoopVectorize/vector-loop-backedge-elimination-early-exit.ll index 1ad75bb..3d44317 100644 --- a/llvm/test/Transforms/LoopVectorize/vector-loop-backedge-elimination-early-exit.ll +++ b/llvm/test/Transforms/LoopVectorize/vector-loop-backedge-elimination-early-exit.ll @@ -281,3 +281,147 @@ exit: %res = phi i64 [ %iv, %loop.header ], [ 1, %loop.latch ] ret i64 %res } + +define i8 @test_early_exit_max_vector_tc_eq_16(ptr dereferenceable(17) %A) nosync nofree { +; VF8UF1-LABEL: define i8 @test_early_exit_max_vector_tc_eq_16( +; VF8UF1-SAME: ptr dereferenceable(17) [[A:%.*]]) #[[ATTR0]] { +; VF8UF1-NEXT: [[ENTRY:.*]]: +; VF8UF1-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; VF8UF1: [[VECTOR_PH]]: +; VF8UF1-NEXT: br label %[[VECTOR_BODY:.*]] +; VF8UF1: [[VECTOR_BODY]]: +; VF8UF1-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; VF8UF1-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[INDEX]] +; VF8UF1-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i8>, ptr [[TMP0]], align 1 +; VF8UF1-NEXT: [[TMP1:%.*]] = icmp eq <8 x i8> [[WIDE_LOAD]], zeroinitializer +; VF8UF1-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 +; VF8UF1-NEXT: [[TMP2:%.*]] = call i1 @llvm.vector.reduce.or.v8i1(<8 x i1> [[TMP1]]) +; VF8UF1-NEXT: [[TMP3:%.*]] = icmp eq i64 [[INDEX_NEXT]], 16 +; VF8UF1-NEXT: [[TMP4:%.*]] = or i1 [[TMP2]], [[TMP3]] +; VF8UF1-NEXT: br i1 [[TMP4]], label %[[MIDDLE_SPLIT:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] +; VF8UF1: [[MIDDLE_SPLIT]]: +; VF8UF1-NEXT: br i1 [[TMP2]], label %[[VECTOR_EARLY_EXIT:.*]], label %[[MIDDLE_BLOCK:.*]] +; VF8UF1: [[MIDDLE_BLOCK]]: +; VF8UF1-NEXT: br label %[[SCALAR_PH]] +; VF8UF1: [[VECTOR_EARLY_EXIT]]: +; VF8UF1-NEXT: br label %[[EXIT:.*]] +; VF8UF1: [[SCALAR_PH]]: +; VF8UF1-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 16, %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; VF8UF1-NEXT: br label %[[LOOP_HEADER:.*]] +; VF8UF1: [[LOOP_HEADER]]: +; VF8UF1-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ] +; VF8UF1-NEXT: [[P_SRC:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[IV]] +; VF8UF1-NEXT: [[L:%.*]] = load i8, ptr [[P_SRC]], align 1 +; VF8UF1-NEXT: [[C:%.*]] = icmp eq i8 [[L]], 0 +; VF8UF1-NEXT: br i1 [[C]], label %[[EXIT]], label %[[LOOP_LATCH]] +; VF8UF1: [[LOOP_LATCH]]: +; VF8UF1-NEXT: [[IV_NEXT]] = add nsw i64 [[IV]], 1 +; VF8UF1-NEXT: [[CMP:%.*]] = icmp eq i64 [[IV_NEXT]], 17 +; VF8UF1-NEXT: br i1 [[CMP]], label %[[EXIT]], label %[[LOOP_HEADER]], !llvm.loop [[LOOP7:![0-9]+]] +; VF8UF1: [[EXIT]]: +; VF8UF1-NEXT: [[RES:%.*]] = phi i8 [ 0, %[[LOOP_HEADER]] ], [ 1, %[[LOOP_LATCH]] ], [ 0, %[[VECTOR_EARLY_EXIT]] ] +; VF8UF1-NEXT: ret i8 [[RES]] +; +; VF8UF2-LABEL: define i8 @test_early_exit_max_vector_tc_eq_16( +; VF8UF2-SAME: ptr dereferenceable(17) [[A:%.*]]) #[[ATTR0]] { +; VF8UF2-NEXT: [[ENTRY:.*]]: +; VF8UF2-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; VF8UF2: [[VECTOR_PH]]: +; VF8UF2-NEXT: br label %[[VECTOR_BODY:.*]] +; VF8UF2: [[VECTOR_BODY]]: +; VF8UF2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; VF8UF2-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[INDEX]] +; VF8UF2-NEXT: [[TMP1:%.*]] = getelementptr inbounds i8, ptr [[TMP0]], i32 8 +; VF8UF2-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i8>, ptr [[TMP0]], align 1 +; VF8UF2-NEXT: [[WIDE_LOAD1:%.*]] = load <8 x i8>, ptr [[TMP1]], align 1 +; VF8UF2-NEXT: [[TMP2:%.*]] = icmp eq <8 x i8> [[WIDE_LOAD]], zeroinitializer +; VF8UF2-NEXT: [[TMP3:%.*]] = icmp eq <8 x i8> [[WIDE_LOAD1]], zeroinitializer +; VF8UF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 +; VF8UF2-NEXT: [[TMP4:%.*]] = or <8 x i1> [[TMP2]], [[TMP3]] +; VF8UF2-NEXT: [[TMP5:%.*]] = call i1 @llvm.vector.reduce.or.v8i1(<8 x i1> [[TMP4]]) +; VF8UF2-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], 16 +; VF8UF2-NEXT: [[TMP7:%.*]] = or i1 [[TMP5]], [[TMP6]] +; VF8UF2-NEXT: br i1 [[TMP7]], label %[[MIDDLE_SPLIT:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] +; VF8UF2: [[MIDDLE_SPLIT]]: +; VF8UF2-NEXT: br i1 [[TMP5]], label %[[VECTOR_EARLY_EXIT:.*]], label %[[MIDDLE_BLOCK:.*]] +; VF8UF2: [[MIDDLE_BLOCK]]: +; VF8UF2-NEXT: br label %[[SCALAR_PH]] +; VF8UF2: [[VECTOR_EARLY_EXIT]]: +; VF8UF2-NEXT: br label %[[EXIT:.*]] +; VF8UF2: [[SCALAR_PH]]: +; VF8UF2-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 16, %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; VF8UF2-NEXT: br label %[[LOOP_HEADER:.*]] +; VF8UF2: [[LOOP_HEADER]]: +; VF8UF2-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ] +; VF8UF2-NEXT: [[P_SRC:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[IV]] +; VF8UF2-NEXT: [[L:%.*]] = load i8, ptr [[P_SRC]], align 1 +; VF8UF2-NEXT: [[C:%.*]] = icmp eq i8 [[L]], 0 +; VF8UF2-NEXT: br i1 [[C]], label %[[EXIT]], label %[[LOOP_LATCH]] +; VF8UF2: [[LOOP_LATCH]]: +; VF8UF2-NEXT: [[IV_NEXT]] = add nsw i64 [[IV]], 1 +; VF8UF2-NEXT: [[CMP:%.*]] = icmp eq i64 [[IV_NEXT]], 17 +; VF8UF2-NEXT: br i1 [[CMP]], label %[[EXIT]], label %[[LOOP_HEADER]], !llvm.loop [[LOOP5:![0-9]+]] +; VF8UF2: [[EXIT]]: +; VF8UF2-NEXT: [[RES:%.*]] = phi i8 [ 0, %[[LOOP_HEADER]] ], [ 1, %[[LOOP_LATCH]] ], [ 0, %[[VECTOR_EARLY_EXIT]] ] +; VF8UF2-NEXT: ret i8 [[RES]] +; +; VF16UF1-LABEL: define i8 @test_early_exit_max_vector_tc_eq_16( +; VF16UF1-SAME: ptr dereferenceable(17) [[A:%.*]]) #[[ATTR0]] { +; VF16UF1-NEXT: [[ENTRY:.*]]: +; VF16UF1-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; VF16UF1: [[VECTOR_PH]]: +; VF16UF1-NEXT: br label %[[VECTOR_BODY:.*]] +; VF16UF1: [[VECTOR_BODY]]: +; VF16UF1-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; VF16UF1-NEXT: [[TMP0:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[INDEX]] +; VF16UF1-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[TMP0]], align 1 +; VF16UF1-NEXT: [[TMP1:%.*]] = icmp eq <16 x i8> [[WIDE_LOAD]], zeroinitializer +; VF16UF1-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 +; VF16UF1-NEXT: [[TMP2:%.*]] = call i1 @llvm.vector.reduce.or.v16i1(<16 x i1> [[TMP1]]) +; VF16UF1-NEXT: [[TMP3:%.*]] = icmp eq i64 [[INDEX_NEXT]], 16 +; VF16UF1-NEXT: [[TMP4:%.*]] = or i1 [[TMP2]], [[TMP3]] +; VF16UF1-NEXT: br i1 [[TMP4]], label %[[MIDDLE_SPLIT:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] +; VF16UF1: [[MIDDLE_SPLIT]]: +; VF16UF1-NEXT: br i1 [[TMP2]], label %[[VECTOR_EARLY_EXIT:.*]], label %[[MIDDLE_BLOCK:.*]] +; VF16UF1: [[MIDDLE_BLOCK]]: +; VF16UF1-NEXT: br label %[[SCALAR_PH]] +; VF16UF1: [[VECTOR_EARLY_EXIT]]: +; VF16UF1-NEXT: br label %[[EXIT:.*]] +; VF16UF1: [[SCALAR_PH]]: +; VF16UF1-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 16, %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; VF16UF1-NEXT: br label %[[LOOP_HEADER:.*]] +; VF16UF1: [[LOOP_HEADER]]: +; VF16UF1-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP_LATCH:.*]] ] +; VF16UF1-NEXT: [[P_SRC:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[IV]] +; VF16UF1-NEXT: [[L:%.*]] = load i8, ptr [[P_SRC]], align 1 +; VF16UF1-NEXT: [[C:%.*]] = icmp eq i8 [[L]], 0 +; VF16UF1-NEXT: br i1 [[C]], label %[[EXIT]], label %[[LOOP_LATCH]] +; VF16UF1: [[LOOP_LATCH]]: +; VF16UF1-NEXT: [[IV_NEXT]] = add nsw i64 [[IV]], 1 +; VF16UF1-NEXT: [[CMP:%.*]] = icmp eq i64 [[IV_NEXT]], 17 +; VF16UF1-NEXT: br i1 [[CMP]], label %[[EXIT]], label %[[LOOP_HEADER]], !llvm.loop [[LOOP5:![0-9]+]] +; VF16UF1: [[EXIT]]: +; VF16UF1-NEXT: [[RES:%.*]] = phi i8 [ 0, %[[LOOP_HEADER]] ], [ 1, %[[LOOP_LATCH]] ], [ 0, %[[VECTOR_EARLY_EXIT]] ] +; VF16UF1-NEXT: ret i8 [[RES]] +; +entry: + br label %loop.header + +loop.header: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop.latch ] + %p.src = getelementptr inbounds i8, ptr %A, i64 %iv + %l = load i8, ptr %p.src, align 1 + %c = icmp eq i8 %l, 0 + br i1 %c, label %exit, label %loop.latch + +loop.latch: + %iv.next = add nsw i64 %iv, 1 + %cmp = icmp eq i64 %iv.next, 17 + br i1 %cmp, label %exit, label %loop.header + +exit: + %res = phi i8 [ 0, %loop.header ], [ 1, %loop.latch ] + ret i8 %res +} + + diff --git a/llvm/test/Transforms/LoopVectorize/vector-loop-backedge-elimination.ll b/llvm/test/Transforms/LoopVectorize/vector-loop-backedge-elimination.ll index b396e29..59c76ae 100644 --- a/llvm/test/Transforms/LoopVectorize/vector-loop-backedge-elimination.ll +++ b/llvm/test/Transforms/LoopVectorize/vector-loop-backedge-elimination.ll @@ -1218,6 +1218,133 @@ exit: ret void } +define void @test_vector_tc_eq_16(ptr %A) { +; VF8UF1-LABEL: define void @test_vector_tc_eq_16( +; VF8UF1-SAME: ptr [[A:%.*]]) { +; VF8UF1-NEXT: [[ENTRY:.*]]: +; VF8UF1-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; VF8UF1: [[VECTOR_PH]]: +; VF8UF1-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[A]], i64 16 +; VF8UF1-NEXT: br label %[[VECTOR_BODY:.*]] +; VF8UF1: [[VECTOR_BODY]]: +; VF8UF1-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; VF8UF1-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[A]], i64 [[INDEX]] +; VF8UF1-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i8>, ptr [[NEXT_GEP]], align 1 +; VF8UF1-NEXT: [[TMP1:%.*]] = add nsw <8 x i8> [[WIDE_LOAD]], splat (i8 10) +; VF8UF1-NEXT: store <8 x i8> [[TMP1]], ptr [[NEXT_GEP]], align 1 +; VF8UF1-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8 +; VF8UF1-NEXT: [[TMP2:%.*]] = icmp eq i64 [[INDEX_NEXT]], 16 +; VF8UF1-NEXT: br i1 [[TMP2]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]] +; VF8UF1: [[MIDDLE_BLOCK]]: +; VF8UF1-NEXT: br label %[[SCALAR_PH]] +; VF8UF1: [[SCALAR_PH]]: +; VF8UF1-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 16, %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; VF8UF1-NEXT: [[BC_RESUME_VAL1:%.*]] = phi ptr [ [[TMP0]], %[[MIDDLE_BLOCK]] ], [ [[A]], %[[ENTRY]] ] +; VF8UF1-NEXT: br label %[[LOOP:.*]] +; VF8UF1: [[LOOP]]: +; VF8UF1-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] +; VF8UF1-NEXT: [[P_SRC:%.*]] = phi ptr [ [[BC_RESUME_VAL1]], %[[SCALAR_PH]] ], [ [[P_SRC_NEXT:%.*]], %[[LOOP]] ] +; VF8UF1-NEXT: [[P_SRC_NEXT]] = getelementptr inbounds i8, ptr [[P_SRC]], i64 1 +; VF8UF1-NEXT: [[L:%.*]] = load i8, ptr [[P_SRC]], align 1 +; VF8UF1-NEXT: [[ADD:%.*]] = add nsw i8 [[L]], 10 +; VF8UF1-NEXT: store i8 [[ADD]], ptr [[P_SRC]], align 1 +; VF8UF1-NEXT: [[IV_NEXT]] = add nsw i64 [[IV]], 1 +; VF8UF1-NEXT: [[CMP:%.*]] = icmp eq i64 [[IV_NEXT]], 17 +; VF8UF1-NEXT: br i1 [[CMP]], label %[[EXIT:.*]], label %[[LOOP]], !llvm.loop [[LOOP9:![0-9]+]] +; VF8UF1: [[EXIT]]: +; VF8UF1-NEXT: ret void +; +; VF8UF2-LABEL: define void @test_vector_tc_eq_16( +; VF8UF2-SAME: ptr [[A:%.*]]) { +; VF8UF2-NEXT: [[ENTRY:.*]]: +; VF8UF2-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; VF8UF2: [[VECTOR_PH]]: +; VF8UF2-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[A]], i64 16 +; VF8UF2-NEXT: br label %[[VECTOR_BODY:.*]] +; VF8UF2: [[VECTOR_BODY]]: +; VF8UF2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; VF8UF2-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[A]], i64 [[INDEX]] +; VF8UF2-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[NEXT_GEP]], i32 8 +; VF8UF2-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i8>, ptr [[NEXT_GEP]], align 1 +; VF8UF2-NEXT: [[WIDE_LOAD1:%.*]] = load <8 x i8>, ptr [[TMP1]], align 1 +; VF8UF2-NEXT: [[TMP2:%.*]] = add nsw <8 x i8> [[WIDE_LOAD]], splat (i8 10) +; VF8UF2-NEXT: [[TMP3:%.*]] = add nsw <8 x i8> [[WIDE_LOAD1]], splat (i8 10) +; VF8UF2-NEXT: store <8 x i8> [[TMP2]], ptr [[NEXT_GEP]], align 1 +; VF8UF2-NEXT: store <8 x i8> [[TMP3]], ptr [[TMP1]], align 1 +; VF8UF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 +; VF8UF2-NEXT: [[TMP4:%.*]] = icmp eq i64 [[INDEX_NEXT]], 16 +; VF8UF2-NEXT: br i1 [[TMP4]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] +; VF8UF2: [[MIDDLE_BLOCK]]: +; VF8UF2-NEXT: br label %[[SCALAR_PH]] +; VF8UF2: [[SCALAR_PH]]: +; VF8UF2-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 16, %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; VF8UF2-NEXT: [[BC_RESUME_VAL2:%.*]] = phi ptr [ [[TMP0]], %[[MIDDLE_BLOCK]] ], [ [[A]], %[[ENTRY]] ] +; VF8UF2-NEXT: br label %[[LOOP:.*]] +; VF8UF2: [[LOOP]]: +; VF8UF2-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] +; VF8UF2-NEXT: [[P_SRC:%.*]] = phi ptr [ [[BC_RESUME_VAL2]], %[[SCALAR_PH]] ], [ [[P_SRC_NEXT:%.*]], %[[LOOP]] ] +; VF8UF2-NEXT: [[P_SRC_NEXT]] = getelementptr inbounds i8, ptr [[P_SRC]], i64 1 +; VF8UF2-NEXT: [[L:%.*]] = load i8, ptr [[P_SRC]], align 1 +; VF8UF2-NEXT: [[ADD:%.*]] = add nsw i8 [[L]], 10 +; VF8UF2-NEXT: store i8 [[ADD]], ptr [[P_SRC]], align 1 +; VF8UF2-NEXT: [[IV_NEXT]] = add nsw i64 [[IV]], 1 +; VF8UF2-NEXT: [[CMP:%.*]] = icmp eq i64 [[IV_NEXT]], 17 +; VF8UF2-NEXT: br i1 [[CMP]], label %[[EXIT:.*]], label %[[LOOP]], !llvm.loop [[LOOP7:![0-9]+]] +; VF8UF2: [[EXIT]]: +; VF8UF2-NEXT: ret void +; +; VF16UF1-LABEL: define void @test_vector_tc_eq_16( +; VF16UF1-SAME: ptr [[A:%.*]]) { +; VF16UF1-NEXT: [[ENTRY:.*]]: +; VF16UF1-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; VF16UF1: [[VECTOR_PH]]: +; VF16UF1-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[A]], i64 16 +; VF16UF1-NEXT: br label %[[VECTOR_BODY:.*]] +; VF16UF1: [[VECTOR_BODY]]: +; VF16UF1-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; VF16UF1-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[A]], i64 [[INDEX]] +; VF16UF1-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[NEXT_GEP]], align 1 +; VF16UF1-NEXT: [[TMP1:%.*]] = add nsw <16 x i8> [[WIDE_LOAD]], splat (i8 10) +; VF16UF1-NEXT: store <16 x i8> [[TMP1]], ptr [[NEXT_GEP]], align 1 +; VF16UF1-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 +; VF16UF1-NEXT: [[TMP2:%.*]] = icmp eq i64 [[INDEX_NEXT]], 16 +; VF16UF1-NEXT: br i1 [[TMP2]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] +; VF16UF1: [[MIDDLE_BLOCK]]: +; VF16UF1-NEXT: br label %[[SCALAR_PH]] +; VF16UF1: [[SCALAR_PH]]: +; VF16UF1-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 16, %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; VF16UF1-NEXT: [[BC_RESUME_VAL1:%.*]] = phi ptr [ [[TMP0]], %[[MIDDLE_BLOCK]] ], [ [[A]], %[[ENTRY]] ] +; VF16UF1-NEXT: br label %[[LOOP:.*]] +; VF16UF1: [[LOOP]]: +; VF16UF1-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] +; VF16UF1-NEXT: [[P_SRC:%.*]] = phi ptr [ [[BC_RESUME_VAL1]], %[[SCALAR_PH]] ], [ [[P_SRC_NEXT:%.*]], %[[LOOP]] ] +; VF16UF1-NEXT: [[P_SRC_NEXT]] = getelementptr inbounds i8, ptr [[P_SRC]], i64 1 +; VF16UF1-NEXT: [[L:%.*]] = load i8, ptr [[P_SRC]], align 1 +; VF16UF1-NEXT: [[ADD:%.*]] = add nsw i8 [[L]], 10 +; VF16UF1-NEXT: store i8 [[ADD]], ptr [[P_SRC]], align 1 +; VF16UF1-NEXT: [[IV_NEXT]] = add nsw i64 [[IV]], 1 +; VF16UF1-NEXT: [[CMP:%.*]] = icmp eq i64 [[IV_NEXT]], 17 +; VF16UF1-NEXT: br i1 [[CMP]], label %[[EXIT:.*]], label %[[LOOP]], !llvm.loop [[LOOP7:![0-9]+]] +; VF16UF1: [[EXIT]]: +; VF16UF1-NEXT: ret void +; +entry: + br label %loop + +loop: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] + %p.src = phi ptr [ %A, %entry ], [ %p.src.next, %loop ] + %p.src.next = getelementptr inbounds i8, ptr %p.src, i64 1 + %l = load i8, ptr %p.src, align 1 + %add = add nsw i8 %l, 10 + store i8 %add, ptr %p.src + %iv.next = add nsw i64 %iv, 1 + %cmp = icmp eq i64 %iv.next, 17 + br i1 %cmp, label %exit, label %loop + +exit: + ret void +} ;. ; VF8UF1: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]} ; VF8UF1: [[META1]] = !{!"llvm.loop.isvectorized", i32 1} @@ -1227,6 +1354,8 @@ exit: ; VF8UF1: [[LOOP5]] = distinct !{[[LOOP5]], [[META1]], [[META2]]} ; VF8UF1: [[LOOP6]] = distinct !{[[LOOP6]], [[META2]], [[META1]]} ; VF8UF1: [[LOOP7]] = distinct !{[[LOOP7]], [[META2]], [[META1]]} +; VF8UF1: [[LOOP8]] = distinct !{[[LOOP8]], [[META1]], [[META2]]} +; VF8UF1: [[LOOP9]] = distinct !{[[LOOP9]], [[META2]], [[META1]]} ;. ; VF8UF2: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]} ; VF8UF2: [[META1]] = !{!"llvm.loop.unroll.runtime.disable"} @@ -1234,6 +1363,8 @@ exit: ; VF8UF2: [[LOOP3]] = distinct !{[[LOOP3]], [[META1]], [[META2]]} ; VF8UF2: [[LOOP4]] = distinct !{[[LOOP4]], [[META1]], [[META2]]} ; VF8UF2: [[LOOP5]] = distinct !{[[LOOP5]], [[META1]], [[META2]]} +; VF8UF2: [[LOOP6]] = distinct !{[[LOOP6]], [[META2]], [[META1]]} +; VF8UF2: [[LOOP7]] = distinct !{[[LOOP7]], [[META1]], [[META2]]} ;. ; VF16UF1: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]} ; VF16UF1: [[META1]] = !{!"llvm.loop.unroll.runtime.disable"} @@ -1241,4 +1372,6 @@ exit: ; VF16UF1: [[LOOP3]] = distinct !{[[LOOP3]], [[META1]], [[META2]]} ; VF16UF1: [[LOOP4]] = distinct !{[[LOOP4]], [[META1]], [[META2]]} ; VF16UF1: [[LOOP5]] = distinct !{[[LOOP5]], [[META1]], [[META2]]} +; VF16UF1: [[LOOP6]] = distinct !{[[LOOP6]], [[META2]], [[META1]]} +; VF16UF1: [[LOOP7]] = distinct !{[[LOOP7]], [[META1]], [[META2]]} ;. diff --git a/llvm/test/Transforms/MemProfContextDisambiguation/func_assign_fix.ll b/llvm/test/Transforms/MemProfContextDisambiguation/func_assign_fix.ll new file mode 100644 index 0000000..d0450e0 --- /dev/null +++ b/llvm/test/Transforms/MemProfContextDisambiguation/func_assign_fix.ll @@ -0,0 +1,130 @@ +;; Make sure we assign the original callsite to a function clone (which will be +;; the original function clone), even when we cannot update its caller (due to +;; missing metadata e.g. from mismatched profiles). Otherwise we will try to use +;; the original function for a different clone, leading to confusion later when +;; rewriting the calls. + +;; -stats requires asserts +; REQUIRES: asserts + +; RUN: opt -passes=memprof-context-disambiguation -supports-hot-cold-new \ +; RUN: -memprof-verify-ccg -memprof-verify-nodes -stats -debug \ +; RUN: -pass-remarks=memprof-context-disambiguation %s -S 2>&1 | \ +; RUN: FileCheck %s --implicit-check-not="Mismatch in call clone assignment" \ +; RUN: --implicit-check-not="Number of callsites assigned to call multiple non-matching clones" + + +; ModuleID = '<stdin>' +source_filename = "reduced.ll" +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +; CHECK-LABEL: define void @A() +define void @A() { + ; CHECK: call void @C() + call void @C() + ret void +} + +; CHECK-LABEL: define void @B() +define void @B() { + ; CHECK: call void @C.memprof.1() + call void @C(), !callsite !1 + ret void +} + +; CHECK-LABEL: define void @C() +define void @C() { + ; CHECK: call void @F() + call void @F(), !callsite !16 + ; CHECK: call void @D() + call void @D(), !callsite !2 + ret void +} + +; CHECK-LABEL: define void @D() +define void @D() { + ; CHECK: call void @E() + call void @E(), !callsite !3 + ; CHECK: call void @G() + call void @G(), !callsite !17 + ret void +} + +; CHECK-LABEL: define void @E() +define void @E() { + ; CHECK: call ptr @_Znwm(i64 0) #[[NOTCOLD:[0-9]+]] + %1 = call ptr @_Znwm(i64 0), !memprof !4, !callsite !9 + ret void +} + +; CHECK-LABEL: define void @F() +define void @F() { + ; CHECK: call void @G() + call void @G(), !callsite !17 + ret void +} + +; CHECK-LABEL: define void @G() +define void @G() { + ; CHECK: call ptr @_Znwm(i64 0) #[[NOTCOLD]] + %2 = call ptr @_Znwm(i64 0), !memprof !10, !callsite !15 + ret void +} + +; CHECK-LABEL: define void @A1() +define void @A1() { + ; CHECK: call void @C() + call void @C(), !callsite !18 + ret void +} + +; CHECK-LABEL: define void @B1() +define void @B1() { + ; CHECK: call void @C.memprof.1() + call void @C(), !callsite !19 + ret void +} + +; CHECK-LABEL: define void @C.memprof.1() + ; CHECK: call void @F.memprof.1() + ; CHECK: call void @D.memprof.1() + +; CHECK-LABEL: define void @D.memprof.1() + ; CHECK: call void @E.memprof.1() + ; CHECK: call void @G() + +; CHECK-LABEL: define void @E.memprof.1() + ; CHECK: call ptr @_Znwm(i64 0) #[[COLD:[0-9]+]] + +; CHECK-LABEL: define void @F.memprof.1() + ; CHECK: call void @G.memprof.1() + +; CHECK-LABEL: define void @G.memprof.1() + ; CHECK: call ptr @_Znwm(i64 0) #[[COLD]] + +declare ptr @_Znwm(i64) + +; IR: attributes #[[NOTCOLD]] = { "memprof"="notcold" } +; IR: attributes #[[COLD]] = { "memprof"="cold" } + +!0 = !{i64 123} +!1 = !{i64 234} +!2 = !{i64 345} +!3 = !{i64 456} +!4 = !{!5, !7} +!5 = !{!6, !"notcold"} +!6 = !{i64 567, i64 456, i64 345, i64 123} +!7 = !{!8, !"cold"} +!8 = !{i64 567, i64 456, i64 345, i64 234} +!9 = !{i64 567} +!10 = !{!11, !13} +!11 = !{!12, !"notcold"} +!12 = !{i64 678, i64 891, i64 789, i64 912} +!13 = !{!14, !"cold"} +!14 = !{i64 678, i64 891, i64 789, i64 812} +!15 = !{i64 678} +!16 = !{i64 789} +!17 = !{i64 891} +!18 = !{i64 912} +!19 = !{i64 812} diff --git a/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/nvptx-basic.ll.expected b/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/nvptx-basic.ll.expected index 51cafac..e1da112 100644 --- a/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/nvptx-basic.ll.expected +++ b/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/nvptx-basic.ll.expected @@ -10,15 +10,15 @@ define dso_local void @caller_St8x4(ptr nocapture noundef readonly byval(%struct ; CHECK-NEXT: .reg .b64 %rd<13>; ; CHECK-EMPTY: ; CHECK-NEXT: // %bb.0: -; CHECK-NEXT: ld.param.b64 %rd1, [caller_St8x4_param_0+8]; -; CHECK-NEXT: ld.param.b64 %rd2, [caller_St8x4_param_0]; -; CHECK-NEXT: ld.param.b64 %rd3, [caller_St8x4_param_0+24]; -; CHECK-NEXT: ld.param.b64 %rd4, [caller_St8x4_param_0+16]; ; CHECK-NEXT: { // callseq 0, 0 ; CHECK-NEXT: .param .align 16 .b8 param0[32]; -; CHECK-NEXT: st.param.v2.b64 [param0], {%rd2, %rd1}; -; CHECK-NEXT: st.param.v2.b64 [param0+16], {%rd4, %rd3}; ; CHECK-NEXT: .param .align 16 .b8 retval0[32]; +; CHECK-NEXT: ld.param.b64 %rd1, [caller_St8x4_param_0+24]; +; CHECK-NEXT: ld.param.b64 %rd2, [caller_St8x4_param_0+16]; +; CHECK-NEXT: st.param.v2.b64 [param0+16], {%rd2, %rd1}; +; CHECK-NEXT: ld.param.b64 %rd3, [caller_St8x4_param_0+8]; +; CHECK-NEXT: ld.param.b64 %rd4, [caller_St8x4_param_0]; +; CHECK-NEXT: st.param.v2.b64 [param0], {%rd4, %rd3}; ; CHECK-NEXT: call.uni (retval0), callee_St8x4, (param0); ; CHECK-NEXT: ld.param.v2.b64 {%rd5, %rd6}, [retval0]; ; CHECK-NEXT: ld.param.v2.b64 {%rd7, %rd8}, [retval0+16]; |